From 172d0a4c759804b07f5592c270ae3c36253274d5 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Sun, 6 Mar 2022 12:09:24 +0800
Subject: [PATCH 01/23] Add support for LoongArch

---
 0ad/0ad-fix-build.patch                       |     18 +
 0ad/0ad-la64.patch                            |     97 +
 0ad/PKGBUILD                                  |     10 +
 a2ps/PKGBUILD                                 |      2 +
 aardvark-dns/PKGBUILD                         |      2 +-
 abseil-cpp/PKGBUILD                           |      7 +-
 abseil-cpp/abseil-cpp-la64.patch              |     13 +
 acme-redirect/PKGBUILD                        |     11 +-
 acpi_call-lts/PKGBUILD                        |      2 +-
 acpi_call/PKGBUILD                            |      2 +-
 acpica/PKGBUILD                               |     10 +-
 acpica/acpica-fix-build.patch                 |     13 +
 acpica/acpica-unix-la64.patch                 |     39 +
 aida-x/PKGBUILD                               |      2 +-
 aiksaurus/PKGBUILD                            |      9 +-
 aiksaurus/aiksaurus-fix-build.patch           |    400 +
 alacritty/PKGBUILD                            |      2 +-
 alsa-tools/PKGBUILD                           |      2 +-
 android-tools/PKGBUILD                        |     11 +-
 android-tools/android-tools-la64.patch        |     13 +
 anewer/PKGBUILD                               |      2 +-
 angle-grinder/PKGBUILD                        |      2 +-
 apache-orc/PKGBUILD                           |      6 +-
 apptainer/PKGBUILD                            |      3 +
 apr-util/PKGBUILD                             |      2 +-
 apr/PKGBUILD                                  |     14 +-
 apr/apr-1.7-fix-build.patch                   |     13 +
 arch-rebuild-order/PKGBUILD                   |      6 +-
 arch-repro-status/PKGBUILD                    |      2 +-
 archinstall/PKGBUILD                          |      8 +-
 archinstall/archinstall-la64-2.6.3.patch      |    139 +
 arrow/PKGBUILD                                |      5 +
 arti/PKGBUILD                                 |      2 +-
 aspell-ru/PKGBUILD                            |      2 +-
 at51/PKGBUILD                                 |      2 +-
 auth-tarball-from-git/PKGBUILD                |      2 +-
 autogen/PKGBUILD                              |      1 +
 autotiling-rs/PKGBUILD                        |      2 +-
 avisynthplus/PKGBUILD                         |     10 +-
 avisynthplus/avisynthplus-la64.patch          |     19 +
 b3sum/PKGBUILD                                |      2 +-
 babl/PKGBUILD                                 |      7 +-
 babl/babl-fix-gir-pkgname.patch               |     13 +
 bacon/PKGBUILD                                |      2 +-
 baidupcs-go/PKGBUILD                          |      2 +
 bandwhich/PKGBUILD                            |      2 +-
 bcprov/PKGBUILD                               |      2 +-
 bees/PKGBUILD                                 |     11 +-
 bigloo/PKGBUILD                               |      8 +-
 bigloo/bigloo-la64.patch                      |     11 +
 bingrep/PKGBUILD                              |      2 +-
 binocle/PKGBUILD                              |      2 +-
 blender/PKGBUILD                              |     13 +-
 bonnie++/PKGBUILD                             |      2 +-
 boost/PKGBUILD                                |      7 +-
 boost/boost-1.79.0-la64.patch                 |     55 +
 booster/PKGBUILD                              |      6 +-
 bore/PKGBUILD                                 |      2 +-
 borg/PKGBUILD                                 |      2 +-
 bottom/PKGBUILD                               |      2 +-
 box2d/PKGBUILD                                |     11 +-
 box2d/box2d-fix-build.patch                   |     13 +
 boxxy/PKGBUILD                                |      2 +-
 breezy/PKGBUILD                               |     11 +-
 breezy/breezy-fix-install.patch               |     15 +
 brltty/PKGBUILD                               |      1 -
 broadcom-wl/PKGBUILD                          |      2 +-
 broot/PKGBUILD                                |      2 +-
 buildkit/0001-add-loongarch64-support.patch   |    130 +
 buildkit/PKGBUILD                             |     11 +-
 bupstash/PKGBUILD                             |      2 +-
 caddy/PKGBUILD                                |      3 +
 calf/PKGBUILD                                 |      2 +-
 capnet-assist/PKGBUILD                        |      8 +
 cargo-audit/PKGBUILD                          |      2 +-
 cargo-auditable/PKGBUILD                      |      2 +-
 cargo-bloat/PKGBUILD                          |      2 +-
 cargo-c/PKGBUILD                              |      2 +-
 cargo-cyclonedx/PKGBUILD                      |      2 +-
 cargo-depgraph/PKGBUILD                       |      2 +-
 cargo-edit/PKGBUILD                           |      2 +-
 cargo-expand/PKGBUILD                         |      2 +-
 cargo-geiger/PKGBUILD                         |      2 +-
 cargo-generate/PKGBUILD                       |      2 +-
 cargo-insta/PKGBUILD                          |      2 +-
 cargo-machete/PKGBUILD                        |      2 +-
 cargo-msrv/PKGBUILD                           |      2 +-
 cargo-ndk/PKGBUILD                            |      2 +-
 cargo-outdated/PKGBUILD                       |      2 +-
 cargo-pgrx/PKGBUILD                           |      2 +-
 cargo-sort/PKGBUILD                           |      2 +-
 cargo-spellcheck/PKGBUILD                     |      2 +-
 cargo-supply-chain/PKGBUILD                   |      2 +-
 cargo-tarpaulin/PKGBUILD                      |      2 +-
 cargo-tauri/PKGBUILD                          |      2 +-
 cargo-udeps/PKGBUILD                          |      4 +-
 cargo-update/PKGBUILD                         |      2 +-
 cargo-watch/PKGBUILD                          |      2 +-
 cargo2junit/PKGBUILD                          |      2 +-
 cbindgen/PKGBUILD                             |      2 +-
 cdparanoia/PKGBUILD                           |      2 +
 cdrtools/PKGBUILD                             |     11 +-
 cdrtools/cdrtools-la64.patch                  |     16 +
 chezmoi/PKGBUILD                              |      5 +
 chmlib/PKGBUILD                               |     11 +-
 chmlib/chmlib-fix-gcc13.patch                 |     25 +
 choose/PKGBUILD                               |      2 +-
 clamav/PKGBUILD                               |      2 +-
 clang14/0001-add-loong64-support.patch        |  16205 ++
 clang14/0002-add-loong64-support.patch        |   9243 ++
 clang14/PKGBUILD                              |     12 +-
 clash/PKGBUILD                                |      4 +
 cln/PKGBUILD                                  |     11 +-
 cln/cln-la64.patch                            |     53 +
 clucene/PKGBUILD                              |      3 +
 cni-plugins/PKGBUILD                          |      3 +
 cobalt/PKGBUILD                               |      2 +-
 cocogitto/PKGBUILD                            |      2 +-
 code/PKGBUILD                                 |      3 +
 coin-or-cbc/PKGBUILD                          |      6 +-
 committed/PKGBUILD                            |      2 +-
 compiler-rt/PKGBUILD                          |      3 +
 ...C8F98282B944E3B0D5C2530FC3042E345AD05D.asc |     75 +
 conky/PKGBUILD                                |      2 +-
 containerd/PKGBUILD                           |      4 +
 cpputest/PKGBUILD                             |      2 +-
 cri-o/PKGBUILD                                |      1 +
 criu/2183.patch                               |   2237 +
 criu/PKGBUILD                                 |      5 +-
 cups-pdf/PKGBUILD                             |      2 +-
 cups-pk-helper/PKGBUILD                       |      2 +-
 cxxbridge/PKGBUILD                            |      2 +-
 cypari2/PKGBUILD                              |      2 +-
 daktilo/PKGBUILD                              |      2 +-
 dbeaver/PKGBUILD                              |      2 +-
 dbus-c++/PKGBUILD                             |      3 +
 dconf-editor/dconf-editor-fix-meson.patch     |     20 +
 deepin-anything/PKGBUILD                      |      2 +-
 deepin-desktop-base/PKGBUILD                  |     12 +-
 .../deepin-deskto-base-la64.patch             |     27 +
 deepin-grand-search/62.patch                  |    126 +
 deepin-grand-search/PKGBUILD                  |      4 +-
 devtools/PKGBUILD                             |     16 +-
 devtools/devtools-loong64-1.0.4.patch         |    898 +
 dfrs/PKGBUILD                                 |     19 +-
 dfrs/dfrs-la64.patch                          |     13 +
 dhcp/PKGBUILD                                 |     10 +
 diesel-cli/PKGBUILD                           |      2 +-
 discover/PKGBUILD                             |      2 +-
 diskonaut/PKGBUILD                            |      2 +-
 diskus/PKGBUILD                               |      2 +-
 distcc/PKGBUILD                               |      5 +-
 dns-over-https/PKGBUILD                       |      6 +-
 docker-machine/PKGBUILD                       |     11 +-
 docker-machine/docker-machine-la64.patch      |     16 +
 docker/PKGBUILD                               |     11 +-
 docker/moby-la64.patch                        |     12 +
 dog/PKGBUILD                                  |      2 +-
 dra/PKGBUILD                                  |      2 +-
 dragonfly-reverb/PKGBUILD                     |      2 +-
 dtc/PKGBUILD                                  |      2 +-
 dua-cli/PKGBUILD                              |      8 +-
 dump_syms/PKGBUILD                            |     10 +-
 duplicity/PKGBUILD                            |     10 +-
 dysk/PKGBUILD                                 |      2 +-
 easyloggingpp/PKGBUILD                        |      2 +-
 edk2/60-edk2-loongarch64.json                 |     31 +
 edk2/PKGBUILD                                 |     60 +-
 edk2/edk2-use-env-toolchains.patch            |     62 +
 edk2/relax_edk2_gcc14.diff                    |     44 +
 efitools/PKGBUILD                             |      9 +-
 efitools/efitools-la64.patch                  |     13 +
 electron/PKGBUILD                             |      6 +-
 electron25/PKGBUILD                           |     10 +-
 emacs/PKGBUILD                                |      2 +-
 emacs/emacs-la64.patch                        |     10 +
 erdtree/PKGBUILD                              |      2 +-
 espeakup/PKGBUILD                             |      2 +-
 espflash/PKGBUILD                             |      2 +-
 espup/PKGBUILD                                |      2 +-
 eva/PKGBUILD                                  |      2 +-
 evcxr_repl/PKGBUILD                           |      2 +-
 farstream/PKGBUILD                            |      2 +-
 fbterm/0001-Fix-build-with-gcc-6.patch        |    104 +
 fbterm/PKGBUILD                               |     60 +
 fbterm/color_palette.patch                    |    102 +
 fbterm/fbconfig.patch                         |     78 +
 fbterm/fbterm.patch                           |      9 +
 fbterm/fbtermrc                               |     66 +
 fbterm/fix_ftbfs_crosscompile.patch           |     28 +
 fbterm/fix_ftbfs_epoll.patch                  |     15 +
 fcitx/PKGBUILD                                |      8 +-
 fcitx5-chinese-addons/PKGBUILD                |      4 +-
 fcitx5-m17n/PKGBUILD                          |      2 +-
 ffcall/PKGBUILD                               |     12 +-
 ffcall/libffcall-la64-2.4.patch               |   3304 +
 ffmpeg/PKGBUILD                               |      8 +-
 ffmpeg4.4/PKGBUILD                            |     10 -
 ffmpegthumbnailer/PKGBUILD                    |      2 +-
 fftw/PKGBUILD                                 |     26 +-
 firecracker/PKGBUILD                          |      2 +-
 .../0001-Add-support-for-LoongArch64.patch    |     80 +
 ...2-Enable-VA-API-support-for-AMD-GPUs.patch |     31 +
 ...rchitectural-limit-on-VA-API-support.patch |     40 +
 .../0004-Enable-WebRTC-for-LoongArch.patch    |    152 +
 .../0005-Fix-libyuv-build-with-LSX-LASX.patch |    398 +
 firefox-developer-edition/PKGBUILD            |     59 +-
 .../0001-Add-support-for-LoongArch64.patch    |     80 +
 ...2-Enable-VA-API-support-for-AMD-GPUs.patch |     31 +
 ...rchitectural-limit-on-VA-API-support.patch |     40 +
 .../0004-Enable-WebRTC-for-LoongArch.patch    |    152 +
 .../0005-Fix-libyuv-build-with-LSX-LASX.patch |    398 +
 firefox/PKGBUILD                              |     68 +-
 flac/PKGBUILD                                 |      2 +-
 flashrom/0001-Loongson-3-SPI-tmp.patch        |    353 +
 flashrom/0002-Speed-up.patch                  |     37 +
 .../0003-Add-support-for-loongarch64.patch    |    220 +
 flashrom/PKGBUILD                             |     28 +-
 flatpak/PKGBUILD                              |      2 +-
 fluidd/PKGBUILD                               |      1 +
 fmt/PKGBUILD                                  |      2 +-
 foomatic-db-engine/PKGBUILD                   |      2 +-
 foomatic-db/PKGBUILD                          |      2 +-
 foot/PKGBUILD                                 |      2 +-
 fossil/PKGBUILD                               |      4 +
 fuse2/PKGBUILD                                |     14 +-
 fuse2/fuse-closefrom.patch                    |     22 +
 fuse2/fuse-loongarch.patch                    |     28 +
 .../0001-add-support-for-loongarch64.patch    |    315 +
 fwupd-efi/PKGBUILD                            |     13 +-
 fwupd/PKGBUILD                                |     21 +-
 fwupd/fwupd-1.9.5-loong64.patch               |     39 +
 gcc12/PKGBUILD                                |     24 +-
 gcr/PKGBUILD                                  |      4 -
 gendesk/PKGBUILD                              |      5 +-
 geos/PKGBUILD                                 |     10 +-
 geos/geos-3.11.1-gcc13.patch                  |     90 +
 gfold/PKGBUILD                                |      2 +-
 ghc/PKGBUILD                                  |     14 +-
 git-branchless/PKGBUILD                       |      2 +-
 git-bug/PKGBUILD                              |      2 +
 git-cliff/PKGBUILD                            |      2 +-
 git-delta/PKGBUILD                            |      2 +-
 git-grab/PKGBUILD                             |      2 +-
 gitlab-exporter/PKGBUILD                      |      2 +-
 gitlab-gitaly/PKGBUILD                        |      7 +
 gitlab-runner/PKGBUILD                        |      2 +
 gitlab-shell/PKGBUILD                         |      6 +-
 gitoxide/PKGBUILD                             |      2 +-
 gitui/PKGBUILD                                |      2 +-
 gloox/PKGBUILD                                |     11 +-
 gloox/gloox-fix-build.patch                   |    188 +
 glusterfs/PKGBUILD                            |      3 +-
 gn/PKGBUILD                                   |     16 +-
 gn/gn-fix-build.patch                         |     10 +
 gnome-control-center/PKGBUILD                 |      2 +-
 .../gnome-dictionary-fix-meson.patch          |     34 +
 .../gnome-font-viewer-meson.patch             |     25 +
 gnome-mplayer/PKGBUILD                        |      2 +-
 gnome-remote-desktop/PKGBUILD                 |      2 +-
 gnome-tetravex/PKGBUILD                       |      6 +-
 gnome-tetravex/gnome-tetravex-fix-meson.patch |     22 +
 gnome-tour/PKGBUILD                           |      6 +
 gnu-efi/PKGBUILD                              |     10 +-
 gnu-efi/gnu-efi-3.0.17-la64.patch             |     89 +
 gnugo/PKGBUILD                                |      2 +
 go-md2man/PKGBUILD                            |      3 +-
 go/PKGBUILD                                   |     19 +-
 godot/PKGBUILD                                |      4 +-
 gpg-tui/PKGBUILD                              |      2 +-
 gping/PKGBUILD                                |      2 +-
 gptfdisk/PKGBUILD                             |      2 +
 graphviz/PKGBUILD                             |      4 +-
 grcov/PKGBUILD                                |      2 +-
 greetd-regreet/PKGBUILD                       |      2 +-
 greetd-tuigreet/PKGBUILD                      |      2 +-
 greetd/PKGBUILD                               |      2 +-
 grex/PKGBUILD                                 |      2 +-
 grpc/PKGBUILD                                 |      2 +-
 gssdp/PKGBUILD                                |      1 +
 gstreamer/PKGBUILD                            |     18 +-
 gtk3/PKGBUILD                                 |      2 +
 gtk4/gtk-objcopy.patch                        |     36 +
 gunicorn/PKGBUILD                             |      1 +
 halp/PKGBUILD                                 |      2 +-
 handlr/PKGBUILD                               |      2 +-
 haskell-doctest-parallel/PKGBUILD             |      2 +-
 hck/PKGBUILD                                  |      2 +-
 hdf5/PKGBUILD                                 |      6 +-
 heh/PKGBUILD                                  |      2 +-
 helix/PKGBUILD                                |      2 +-
 hexyl/PKGBUILD                                |      2 +-
 hidapi/hidapi-fix-build.patch                 |     12 +
 himalaya/PKGBUILD                             |      2 +-
 hitori/PKGBUILD                               |      7 +-
 hitori/hitori-fix-meson.patch                 |     21 +
 hotdoc/PKGBUILD                               |      2 +-
 hplip/PKGBUILD                                |      2 +-
 hspell/PKGBUILD                               |     12 +-
 htmlcxx/PKGBUILD                              |     11 +-
 htmlcxx/htmlcxx-la64.patch                    |     26 +
 htmlq/PKGBUILD                                |      2 +-
 httplz/PKGBUILD                               |      2 +-
 hub/PKGBUILD                                  |     14 +-
 hugo/PKGBUILD                                 |      2 +-
 hypercorn/PKGBUILD                            |      1 +
 hyperfine/PKGBUILD                            |      2 +-
 i3status-rust/PKGBUILD                        |      2 +-
 iempluginsuite/PKGBUILD                       |      2 +-
 igrep/PKGBUILD                                |      2 +-
 imagemagick/PKGBUILD                          |      4 +-
 imlib2/PKGBUILD                               |      3 +-
 inkscape/PKGBUILD                             |      1 +
 ipmitool/PKGBUILD                             |      3 +-
 ipp-usb/PKGBUILD                              |      3 +-
 ipxe/PKGBUILD                                 |     52 +-
 ipxe/arch.ipxe                                |     45 +-
 ipxe/general.h                                |      2 +-
 ipxe/ipxe-la64.patch                          |     15 +
 ispc/PKGBUILD                                 |      7 +-
 jack2/PKGBUILD                                |      2 +-
 java-openjdk/freedesktop-java.desktop         |     12 -
 java-openjdk/freedesktop-jconsole.desktop     |     11 -
 java-openjdk/freedesktop-jshell.desktop       |      9 -
 java-openjdk/install_jdk-openjdk.sh           |     50 -
 java-openjdk/install_jre-openjdk-headless.sh  |     48 -
 java-openjdk/install_jre-openjdk.sh           |     35 -
 java11-openjdk/PKGBUILD                       |     21 +-
 java11-openjdk/jdk11-11.0.20.1-la64.patch     | 116875 ++++++++++++++
 java17-openjdk/PKGBUILD                       |     22 +-
 java17-openjdk/jdk17-17.0.9.8-la64.patch      | 121820 +++++++++++++++
 java8-openjdk/PKGBUILD                        |     13 +-
 java8-openjdk/jdk8u382-la64.patch             | 116949 ++++++++++++++
 jemalloc/PKGBUILD                             |     15 +-
 jemalloc/add-loongarch64.patch                |     14 +
 jless/PKGBUILD                                |      2 +-
 js102/PKGBUILD                                |     75 +-
 js102/js102-loong64-jit.patch                 |     72 +
 js115/PKGBUILD                                |     74 +-
 js91/mozjs-la64.patch                         |     98 +
 js91/spidermonkey-91-add-loongarch.patch      |    686 +
 just/PKGBUILD                                 |      8 +-
 jwt-cli/PKGBUILD                              |      2 +-
 k3b/PKGBUILD                                  |      2 +-
 ...c-support-for-LoongArch-architecture.patch |    123 +
 kcov/PKGBUILD                                 |     11 +-
 kdeplasma-addons/PKGBUILD                     |      2 +-
 kernel-headers-musl/PKGBUILD                  |     19 +-
 kmon/PKGBUILD                                 |      2 +-
 kondo/PKGBUILD                                |      2 +-
 kooha/PKGBUILD                                |      2 +-
 kubie/PKGBUILD                                |      2 +-
 ladspa/PKGBUILD                               |      4 +
 lapce/PKGBUILD                                |      2 +-
 latex2rtf/PKGBUILD                            |      4 +-
 ldproxy/PKGBUILD                              |      2 +-
 leafpad/PKGBUILD                              |      2 +
 lgi/PKGBUILD                                  |      2 +-
 libavif/PKGBUILD                              |      2 +-
 libb2/PKGBUILD                                |     14 +-
 libb2/libb2-fix-build.patch                   |     14 +
 libcdio/PKGBUILD                              |      2 +
 libclc/PKGBUILD                               |      1 -
 libdaemon/PKGBUILD                            |      2 +-
 libdrm/PKGBUILD                               |      2 +-
 libdsme/PKGBUILD                              |      2 +-
 libetebase/PKGBUILD                           |      2 +-
 libfbclient/PKGBUILD                          |      3 +
 libfbclient/fbclient-la64-4.0.0.patch         |     27 +
 libfbclient/fbclient-la64.patch               |     27 +
 libfido2/libfido2-no-ssp.patch                |     27 +
 libfilezilla/PKGBUILD                         |      5 +
 libgda/PKGBUILD                               |     24 +-
 libgda/libgda-la64.patch                      |     22 +
 libgexiv2/PKGBUILD                            |      2 +-
 libglvnd/PKGBUILD                             |      1 +
 libgme/PKGBUILD                               |      2 +-
 libgoom2/PKGBUILD                             |      2 +
 libgpod/PKGBUILD                              |      2 +-
 libimagequant/PKGBUILD                        |      6 +
 libinput/PKGBUILD                             |      2 +
 libjpeg-turbo/PKGBUILD                        |      3 +-
 libjxl/PKGBUILD                               |      2 +-
 libksysguard/PKGBUILD                         |      2 +-
 libopenraw/PKGBUILD                           |      7 +-
 libopenraw/libopenraw-fix-build.patch         |     12 +
 liborcus/PKGBUILD                             |     11 +-
 liborcus/liborcus-cstdint.patch               |     20 +
 libotr/PKGBUILD                               |      7 +-
 libotr/libotr-fix-build.patch                 |     12 +
 libraw/PKGBUILD                               |      1 +
 libredefender/PKGBUILD                        |      2 +-
 libreoffice-fresh/PKGBUILD                    |      4 +-
 libretro-genesis-plus-gx/PKGBUILD             |      5 +
 librustls/PKGBUILD                            |      2 +-
 libserialport/PKGBUILD                        |      2 +-
 .../0001-add-support-for-loongarch64.patch    |    672 +
 libsmbios/PKGBUILD                            |      6 +-
 liburcu/PKGBUILD                              |      9 +-
 liburcu/userspace-rcu-loongarch64.patch       |    186 +
 libusbsio/PKGBUILD                            |      6 +-
 libvirt/PKGBUILD                              |     13 +-
 libvirt/libvirt-loongarch.patch               |    407 +
 libvisual/PKGBUILD                            |      2 +
 libvpx/PKGBUILD                               |      2 +
 libyuv/0001-fix-build-error.patch             |    113 +
 libyuv/PKGBUILD                               |      7 +-
 link-grammar/PKGBUILD                         |      5 +-
 linux-hardened/PKGBUILD                       |     54 +-
 linux-hardened/config.la64                    |   8539 +
 linux-hardened/remove_shm_align_mask.diff     |     67 +
 linux-tools/PKGBUILD                          |     25 +-
 liteide/PKGBUILD                              |      7 +-
 liteide/liteide-fix-build.patch               |     11 +
 lld/PKGBUILD                                  |      9 +-
 lld/lld-la64.patch                            |   2569 +
 llvm/PKGBUILD                                 |     10 +-
 ...timeDyld-MCJIT-Add-LoongArch-support.patch |    328 +
 llvm/llvm-newreloc-la64.patch                 |    132 +
 llvm14/PKGBUILD                               |      9 +-
 llvm14/llvm-loong64.patch                     |  47164 ++++++
 lm_sensors/PKGBUILD                           |      2 +-
 lsd/PKGBUILD                                  |      2 +-
 lua-compat53/PKGBUILD                         |      2 +-
 lua-system/PKGBUILD                           |      2 +-
 lua-term/PKGBUILD                             |      2 +-
 luaexpat/PKGBUILD                             |      2 +-
 luajit/PKGBUILD                               |     14 +-
 luarocks/PKGBUILD                             |     11 +-
 luarocks/luarocks-la64.patch                  |     24 +
 luasocket/PKGBUILD                            |      2 +-
 lucky-commit/PKGBUILD                         |      2 +-
 lurk/PKGBUILD                                 |      2 +-
 lxc/4363.patch                                |    235 +
 lxc/PKGBUILD                                  |      9 +-
 malcontent/PKGBUILD                           |      1 +
 mandown/PKGBUILD                              |      6 +
 mariadb/PKGBUILD                              |      8 +-
 mariadb/mariadb-fix-build.patch               |     50 +
 marisa/PKGBUILD                               |      2 +-
 marked-man/PKGBUILD                           |      2 +-
 mastodon-twitter-sync/PKGBUILD                |      2 +-
 mate-applets/PKGBUILD                         |      4 +-
 mate-terminal/PKGBUILD                        |     11 +-
 mate-terminal/theme-colors-false.patch        |     11 +
 materialx/PKGBUILD                            |      7 +-
 materialx/materialx-fix-build.patch           |     11 +
 matrix-synapse/PKGBUILD                       |      4 +-
 maturin/PKGBUILD                              |      2 +-
 maven/PKGBUILD                                |      4 +-
 mcfly/PKGBUILD                                |      2 +-
 mdbook-linkcheck/PKGBUILD                     |     10 +-
 mdbook/PKGBUILD                               |      2 +-
 mdcat/PKGBUILD                                |      2 +-
 menyoki/PKGBUILD                              |      2 +-
 mesa/PKGBUILD                                 |      4 +-
 meson/PKGBUILD                                |     12 +-
 mididings/PKGBUILD                            |      4 +-
 mingw-w64-gcc/PKGBUILD                        |      4 +-
 mirro-rs/PKGBUILD                             |      2 +-
 mkinitcpio-archiso/PKGBUILD                   |     11 +-
 .../mkinitcpio-archiso-loong64.patch          |     12 +
 mkosi/PKGBUILD                                |      8 +-
 mold/PKGBUILD                                 |      2 +-
 mpg123/PKGBUILD                               |      2 +-
 mplayer/PKGBUILD                              |     10 +-
 mplayer/mplayer-la64.patch                    |     31 +
 musl/0001-musl-add-loongarch64-support.patch  |   1722 +
 musl/PKGBUILD                                 |     11 +-
 mutter/PKGBUILD                               |      2 +-
 nautilus-sendto/PKGBUILD                      |      7 +-
 .../nautilus-sendto-fix-meson.patch           |     13 +
 navi/PKGBUILD                                 |      2 +-
 ncspot/PKGBUILD                               |      2 +-
 neofetch/PKGBUILD                             |     11 +-
 neofetch/neofetch-la64.patch                  |     15 +
 netavark/PKGBUILD                             |      2 +-
 netpbm/PKGBUILD                               |      3 +
 netplan/PKGBUILD                              |     12 +-
 netplan/netplan-disable-pandoc.patch          |     27 +
 newsboat/PKGBUILD                             |      2 +-
 nextcloud-app-deck/PKGBUILD                   |      2 +-
 nextcloud-app-notify_push/PKGBUILD            |      6 +-
 nginx-mod-ndk-set-misc/PKGBUILD               |      2 +-
 nickel/PKGBUILD                               |      2 +-
 ninja/PKGBUILD                                |      6 +-
 ...o-not-use-PTHREAD_STACK_MIN-on-glibc.patch |     32 +
 ntp/ntp-ssp-la.patch                          |     10 +
 nushell/PKGBUILD                              |      2 +-
 nuspell/PKGBUILD                              |      2 +-
 nvidia-cg-toolkit/PKGBUILD                    |      1 +
 nvidia-lts/PKGBUILD                           |      2 +-
 nvidia/PKGBUILD                               |      2 +-
 ocaml/PKGBUILD                                |     15 +-
 ocaml/ocaml-5.0.0-la64.patch                  |   2389 +
 onefetch/PKGBUILD                             |      2 +-
 open-iscsi/PKGBUILD                           |      3 +-
 open-iscsi/open-iscsi-fix-build.patch         |     25 +
 open-isns/PKGBUILD                            |      2 +-
 openal/PKGBUILD                               |      2 +-
 openblas/PKGBUILD                             |     11 +-
 openblas/fix-loong.patch                      |     47 +
 opencv/PKGBUILD                               |      7 +-
 openh264/PKGBUILD                             |      1 +
 openimagedenoise/PKGBUILD                     |      2 +-
 openjade/PKGBUILD                             |     12 +-
 openjade/openjade-nola.patch                  |     12 +
 openmp/PKGBUILD                               |      2 +-
 openmp/openmp-loong64.patch                   |    514 +
 openpgp-ca/PKGBUILD                           |      2 +-
 openpgp-card-tools/PKGBUILD                   |      2 +-
 opus/PKGBUILD                                 |      2 +
 ouch/PKGBUILD                                 |      2 +-
 paccat/PKGBUILD                               |      2 +-
 pacman-bintrans/PKGBUILD                      |      2 +-
 pacman-contrib/PKGBUILD                       |      3 +
 pacman-contrib/pkgbuild-vim-la64.patch        |     13 +
 pari/PKGBUILD                                 |      4 +-
 pastel/PKGBUILD                               |      2 +-
 perl-image-sane/PKGBUILD                      |      2 +-
 phonon/PKGBUILD                               |      2 +-
 pipe-rename/PKGBUILD                          |      2 +-
 pixman/83.patch                               |  11078 ++
 pixman/PKGBUILD                               |     18 +-
 pkgfile/PKGBUILD                              |     13 +-
 pkgfile/pkgfile-use-loong64.patch             |     17 +
 plasma-desktop/PKGBUILD                       |      3 +-
 pngquant/PKGBUILD                             |      2 +-
 polkit-qt/PKGBUILD                            |      2 +-
 polkit/PKGBUILD                               |      1 +
 portmidi/PKGBUILD                             |      2 +
 postgresql/PKGBUILD                           |      4 +-
 postgresql/add-loongarch-support.patch        |     13 +
 ppsspp/PKGBUILD                               |     19 +
 primecount/PKGBUILD                           |      2 +-
 procs/PKGBUILD                                |      2 +-
 progpick/PKGBUILD                             |      2 +-
 prometheus-memcached-exporter/PKGBUILD        |      3 +
 prometheus-mysqld-exporter/PKGBUILD           |      3 +
 prometheus-wireguard-exporter/PKGBUILD        |      2 +-
 protobuf-c/PKGBUILD                           |      1 +
 psiconv/PKGBUILD                              |      4 +-
 psiconv/psiconv-fix-build.patch               |     74 +
 pueue/PKGBUILD                                |      2 +-
 pyalpm/PKGBUILD                               |      2 +-
 pycups/PKGBUILD                               |      2 +-
 pyflow/PKGBUILD                               |      2 +-
 pygobject/PKGBUILD                            |      2 +-
 pyqt5/PKGBUILD                                |      2 +-
 pyqt6-3d/PKGBUILD                             |      2 +-
 pyqt6-charts/PKGBUILD                         |      2 +-
 pyqt6-datavisualization/PKGBUILD              |      2 +-
 pyqt6-networkauth/PKGBUILD                    |      2 +-
 python-aiohttp/PKGBUILD                       |      2 +-
 python-ansiwrap/PKGBUILD                      |      1 +
 python-appdirs/PKGBUILD                       |      4 +-
 python-apsw/PKGBUILD                          |      2 +-
 python-binaryornot/PKGBUILD                   |     16 +-
 python-black/PKGBUILD                         |      1 +
 python-cachy/PKGBUILD                         |      1 +
 python-cryptography/PKGBUILD                  |      6 +-
 python-debugpy/PKGBUILD                       |      2 +-
 python-et-xmlfile/PKGBUILD                    |      2 +-
 python-greenlet/PKGBUILD                      |     11 +-
 python-greenlet/python-greenlet-la64.patch    |     61 +
 python-libcst/PKGBUILD                        |      1 +
 python-mss/PKGBUILD                           |      1 +
 python-nodeenv/PKGBUILD                       |     13 +-
 python-nodeenv/nodeenv-loong64.patch          |     12 +
 python-numpy/PKGBUILD                         |     11 +-
 python-numpy/add-loongarch-support.patch      |     33 +
 python-parso/PKGBUILD                         |      2 +-
 python-poetry/PKGBUILD                        |      2 +-
 python-pyelftools/PKGBUILD                    |     13 +-
 python-pyelftools/pyelftools-0.29.patch       |    452 +
 python-pylint/PKGBUILD                        |      1 +
 python-pyopenssl/PKGBUILD                     |      2 +-
 python-pypandoc/PKGBUILD                      |      2 +-
 python-rpds-py/PKGBUILD                       |      2 +-
 python-simple-term-menu/PKGBUILD              |      8 +
 python-simple-term-menu/cjk-preview.patch     |     11 +
 python-stone/PKGBUILD                         |      1 +
 python-urllib3/PKGBUILD                       |      2 +-
 python-virtualenv/PKGBUILD                    |     10 +-
 python-wstools/PKGBUILD                       |      2 +-
 qd/PKGBUILD                                   |      2 +-
 qemu/PKGBUILD                                 |     15 +-
 qemu/qemu-4k-pagesize.patch                   |     58 +
 qemu/qemu-kvm-la64.patch                      |   1680 +
 qt5-base/PKGBUILD                             |      1 +
 qt5-doc/PKGBUILD                              |      7 +-
 qt5-doc/qt5-base-la64.patch                   |     12 +
 qt5-script/PKGBUILD                           |      7 +-
 qt5-script/loongarch_ports.patch              |     60 +
 qt5-script/qt5-base-la64.patch                |     12 +
 qt5-webengine/PKGBUILD                        |      4 +-
 qt5-webengine/qtwebengine-5.15.2-la64.patch   |  63993 ++++++++
 qt6-quick3dphysics/PKGBUILD                   |     10 +-
 qt6-quick3dphysics/qt3d-la64.patch            |     31 +
 qt6-tools/qt6-tools-fix-build.patch           |     11 +
 qtcreator/qtcreator-la64.patch                |    179 +
 quazip/PKGBUILD                               |      5 +-
 rathole/PKGBUILD                              |      2 +-
 rbw/PKGBUILD                                  |      2 +-
 reapack/PKGBUILD                              |      2 +-
 rebuilderd/PKGBUILD                           |      2 +-
 refind/PKGBUILD                               |     16 +-
 refind/refind-la64-0.14.0.patch               |    729 +
 repod/0001-add-loong64-support.patch          |     33 +
 repod/PKGBUILD                                |     14 +-
 repro-env/PKGBUILD                            |      2 +-
 rhit/PKGBUILD                                 |      2 +-
 rhythmbox/PKGBUILD                            |      2 +-
 riff/PKGBUILD                                 |      2 +-
 ripgrep-all/PKGBUILD                          |      2 +-
 riscv64-linux-gnu-glibc/PKGBUILD              |      2 +-
 roc-toolkit/PKGBUILD                          |     10 +-
 rosenpass/PKGBUILD                            |      2 +-
 rpg-cli/PKGBUILD                              |      2 +-
 rpm-tools/PKGBUILD                            |      7 +-
 rpm-tools/rpm-add-loongarch.patch             |     63 +
 rq/PKGBUILD                                   |      2 +-
 ruby-base64/PKGBUILD                          |      2 +-
 ruby-bigdecimal/PKGBUILD                      |      2 +-
 ruby-cri/PKGBUILD                             |      2 +-
 ruby-ffi/PKGBUILD                             |      2 +-
 ruby-iconv/PKGBUILD                           |      2 +-
 ruby-rake/PKGBUILD                            |      1 +
 ruby/PKGBUILD                                 |     18 +-
 ruff/PKGBUILD                                 |      2 +-
 runc/PKGBUILD                                 |     17 +-
 runc/runc-la64.patch                          |     11 +
 runst/PKGBUILD                                |      2 +-
 rust-bindgen/PKGBUILD                         |      2 +-
 rust-script/PKGBUILD                          |      2 +-
 rust/PKGBUILD                                 |     69 +-
 rustscan/PKGBUILD                             |      2 +-
 rustypaste-cli/PKGBUILD                       |      2 +-
 rustypaste/PKGBUILD                           |      2 +-
 sad/PKGBUILD                                  |      2 +-
 sbsigntools/PKGBUILD                          |      5 +-
 sbsigntools/sbsigntools-la64.patch            |     24 +
 scaleway-cli/PKGBUILD                         |      4 +-
 sccache/PKGBUILD                              |     14 +-
 sdl2_gfx/PKGBUILD                             |      2 +-
 seabios/PKGBUILD                              |      9 +-
 selene/PKGBUILD                               |      2 +-
 sentry-cli/PKGBUILD                           |      8 +-
 sequoia-chameleon-gnupg/PKGBUILD              |      2 +-
 sequoia-sop/PKGBUILD                          |      4 +-
 sequoia-sq/PKGBUILD                           |      4 +-
 sequoia-wot/PKGBUILD                          |      2 +-
 sh4d0wup/PKGBUILD                             |      2 +-
 shaderc/PKGBUILD                              |      1 +
 sharutils/PKGBUILD                            |      2 +
 sheldon/PKGBUILD                              |     10 +-
 shotgun/PKGBUILD                              |      4 +-
 signon-plugin-oauth2/PKGBUILD                 |     10 +-
 signon-ui/PKGBUILD                            |     10 +-
 signon-ui/signon-ui-loong64-bad-fix.patch     |     31 +
 singularity/PKGBUILD                          |      2 +-
 skim/PKGBUILD                                 |      2 +-
 sn0int/PKGBUILD                               |      2 +-
 sniffglue/PKGBUILD                            |      2 +-
 sniffnet/PKGBUILD                             |      2 +-
 sonic/PKGBUILD                                |      2 +-
 spicy-launcher/PKGBUILD                       |      4 +-
 spirv-tools/PKGBUILD                          |      2 +-
 spotify-launcher/PKGBUILD                     |      2 +-
 spotifyd/PKGBUILD                             |      2 +-
 spytrap-adb/PKGBUILD                          |      2 +-
 sshx/PKGBUILD                                 |      2 +-
 stalonetray/PKGBUILD                          |      2 +-
 stardict/PKGBUILD                             |      3 +
 starship/PKGBUILD                             |      2 +-
 stochas/PKGBUILD                              |      4 +-
 suitesparse/PKGBUILD                          |      6 +
 supermin/PKGBUILD                             |     12 +-
 sws/PKGBUILD                                  |      2 +-
 syslog-ng/PKGBUILD                            |      4 +-
 systeroid/PKGBUILD                            |      2 +-
 taplo-cli/PKGBUILD                            |      4 +-
 taskwarrior-tui/PKGBUILD                      |      2 +-
 tealdeer/PKGBUILD                             |      2 +-
 tectonic/PKGBUILD                             |      2 +-
 tere/PKGBUILD                                 |      2 +-
 texlab/PKGBUILD                               |      2 +-
 texlive-bin/PKGBUILD                          |      7 +-
 texlive-bin/texlive-bin-la64.patch            |  11150 ++
 thunderbird/PKGBUILD                          |     10 +-
 thunderbird/firefox-115-loong.patch           |    619 +
 thunderbird/mozconfig.cfg                     |      7 +-
 tickrs/PKGBUILD                               |      2 +-
 toastify/PKGBUILD                             |      2 +-
 tokei/PKGBUILD                                |      8 +-
 torchvision/PKGBUILD                          |      4 +-
 tracker3-miners/PKGBUILD                      |      4 +-
 .../tracker-miners-fix-build.patch            |     13 +
 trippy/PKGBUILD                               |      2 +-
 typst/PKGBUILD                                |      2 +-
 ublock-origin/PKGBUILD                        |      1 +
 unrar/PKGBUILD                                |      1 +
 unzip/PKGBUILD                                |      1 +
 updlockfiles/PKGBUILD                         |      2 +-
 upx/0001-just-for-la64-build.patch            |     35 +
 upx/PKGBUILD                                  |      3 +
 uucp/PKGBUILD                                 |      2 +
 v2ray-domain-list-community/PKGBUILD          |      3 +
 v2ray/PKGBUILD                                |      4 +
 valgrind/PKGBUILD                             |     18 +-
 valgrind/valgrind-3.21-la64.patch             |  93230 +++++++++++
 vaultwarden/PKGBUILD                          |      2 +-
 virt-manager/600.patch                        |    186 +
 virt-manager/PKGBUILD                         |     13 +-
 virtiofsd/PKGBUILD                            |      2 +-
 virtualbox-host-modules-arch/PKGBUILD         |      2 +-
 viu/PKGBUILD                                  |      2 +-
 vivid/PKGBUILD                                |      2 +-
 vtk/PKGBUILD                                  |     21 +-
 vtk/vtk-loong64.patch                         |     15 +
 wasm-pack/PKGBUILD                            |      2 +-
 wasmtime/PKGBUILD                             |      2 +-
 wayland-protocols/PKGBUILD                    |      2 +-
 wayland/PKGBUILD                              |      2 +-
 webkit2gtk-4.1/PKGBUILD                       |     19 +-
 webkit2gtk-4.1/webkit2-gtk-fix-build.patch    |     10 +
 .../webkit2gtk-fix-cmake-build.patch          |     11 +
 webkit2gtk/PKGBUILD                           |     21 +-
 webkit2gtk/webkit2-gtk-fix-build.patch        |     10 +
 webkit2gtk/webkit2gtk-fix-cmake-build.patch   |     11 +
 webkitgtk-6.0/PKGBUILD                        |     15 +-
 .../webkit2gtk-fix-cmake-build.patch          |     11 +
 webrtc-audio-processing-1/PKGBUILD            |      7 +-
 .../webrtc-audio-processing-la64.patch        |     12 +
 webrtc-audio-processing/PKGBUILD              |      4 +-
 .../webrtc-audio-processing-la64.patch        |     14 +
 whipper/PKGBUILD                              |      2 +-
 wiki-tui/PKGBUILD                             |      2 +-
 wldash/PKGBUILD                               |      4 +-
 wolf-shaper/PKGBUILD                          |      2 +-
 woodpecker/PKGBUILD                           |      4 +
 wpewebkit/PKGBUILD                            |     16 +-
 wpewebkit/webkit2gtk-fix-cmake-build.patch    |     11 +
 x11vnc/PKGBUILD                               |      4 +-
 x264/PKGBUILD                                 |      9 +-
 x86_64-linux-gnu-binutils/PKGBUILD            |     66 +
 ...24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc |     51 +
 x86_64-linux-gnu-gcc/PKGBUILD                 |     96 +
 ...975A70E63C361C73AE69EF6EEB81F8981C74C7.asc |     53 +
 ...C235A34C46AA3FFB293709A328C3A2C3C45C06.asc |     16 +
 ...A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc |    122 +
 x86_64-linux-gnu-gdb/PKGBUILD                 |     49 +
 x86_64-linux-gnu-glibc/PKGBUILD               |    126 +
 ...73542B39962DF7B299931416792B4EA25340F8.asc |     54 +
 ...7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc |     68 +
 x86_64-linux-gnu-glibc/reenable_DT_HASH.patch |     28 +
 x86_64-linux-gnu-glibc/sdt-config.h           |      6 +
 x86_64-linux-gnu-glibc/sdt.h                  |    430 +
 x86_64-linux-gnu-linux-api-headers/PKGBUILD   |     29 +
 xdg-desktop-portal-wlr/PKGBUILD               |      2 +-
 xf86-video-loongson/PKGBUILD                  |     46 +
 ...h-against-Multimedia-Video-Controlle.patch |     32 +
 xorg-server/10-modeset.conf                   |      6 +
 xorg-server/PKGBUILD                          |      9 +
 xsd/0120-g++10.patch                          |     19 +
 xsd/xsd-c++17.patch                           |     48 +
 xsv/PKGBUILD                                  |      2 +-
 yaegi/PKGBUILD                                |      2 +-
 yazi/PKGBUILD                                 |      2 +-
 zbus_xmlgen/PKGBUILD                          |      2 +-
 zellij/PKGBUILD                               |      2 +-
 zenith/PKGBUILD                               |      2 +-
 zip/PKGBUILD                                  |      2 +
 zola/PKGBUILD                                 |      2 +-
 zoxide/PKGBUILD                               |      2 +-
 zram-generator/PKGBUILD                       |      6 +
 776 files changed, 646613 insertions(+), 1231 deletions(-)
 create mode 100644 0ad/0ad-fix-build.patch
 create mode 100644 0ad/0ad-la64.patch
 create mode 100644 abseil-cpp/abseil-cpp-la64.patch
 create mode 100644 acpica/acpica-fix-build.patch
 create mode 100644 acpica/acpica-unix-la64.patch
 create mode 100644 aiksaurus/aiksaurus-fix-build.patch
 create mode 100644 android-tools/android-tools-la64.patch
 create mode 100644 apr/apr-1.7-fix-build.patch
 create mode 100644 archinstall/archinstall-la64-2.6.3.patch
 create mode 100644 avisynthplus/avisynthplus-la64.patch
 create mode 100644 babl/babl-fix-gir-pkgname.patch
 create mode 100644 bigloo/bigloo-la64.patch
 create mode 100644 boost/boost-1.79.0-la64.patch
 create mode 100644 box2d/box2d-fix-build.patch
 create mode 100644 breezy/breezy-fix-install.patch
 create mode 100644 buildkit/0001-add-loongarch64-support.patch
 create mode 100644 cdrtools/cdrtools-la64.patch
 create mode 100644 chmlib/chmlib-fix-gcc13.patch
 create mode 100644 clang14/0001-add-loong64-support.patch
 create mode 100644 clang14/0002-add-loong64-support.patch
 create mode 100644 cln/cln-la64.patch
 create mode 100644 compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc
 create mode 100644 criu/2183.patch
 create mode 100644 dconf-editor/dconf-editor-fix-meson.patch
 create mode 100644 deepin-desktop-base/deepin-deskto-base-la64.patch
 create mode 100644 deepin-grand-search/62.patch
 create mode 100644 devtools/devtools-loong64-1.0.4.patch
 create mode 100644 dfrs/dfrs-la64.patch
 create mode 100644 docker-machine/docker-machine-la64.patch
 create mode 100644 docker/moby-la64.patch
 create mode 100644 edk2/60-edk2-loongarch64.json
 create mode 100644 edk2/edk2-use-env-toolchains.patch
 create mode 100644 edk2/relax_edk2_gcc14.diff
 create mode 100644 efitools/efitools-la64.patch
 create mode 100644 emacs/emacs-la64.patch
 create mode 100644 fbterm/0001-Fix-build-with-gcc-6.patch
 create mode 100644 fbterm/PKGBUILD
 create mode 100644 fbterm/color_palette.patch
 create mode 100644 fbterm/fbconfig.patch
 create mode 100644 fbterm/fbterm.patch
 create mode 100644 fbterm/fbtermrc
 create mode 100644 fbterm/fix_ftbfs_crosscompile.patch
 create mode 100644 fbterm/fix_ftbfs_epoll.patch
 create mode 100644 ffcall/libffcall-la64-2.4.patch
 create mode 100644 firefox-developer-edition/0001-Add-support-for-LoongArch64.patch
 create mode 100644 firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch
 create mode 100644 firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch
 create mode 100644 firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch
 create mode 100644 firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch
 create mode 100644 firefox/0001-Add-support-for-LoongArch64.patch
 create mode 100644 firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
 create mode 100644 firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
 create mode 100644 firefox/0004-Enable-WebRTC-for-LoongArch.patch
 create mode 100644 firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch
 create mode 100644 flashrom/0001-Loongson-3-SPI-tmp.patch
 create mode 100644 flashrom/0002-Speed-up.patch
 create mode 100644 flashrom/0003-Add-support-for-loongarch64.patch
 create mode 100644 fuse2/fuse-closefrom.patch
 create mode 100644 fuse2/fuse-loongarch.patch
 create mode 100644 fwupd-efi/0001-add-support-for-loongarch64.patch
 create mode 100644 fwupd/fwupd-1.9.5-loong64.patch
 create mode 100644 geos/geos-3.11.1-gcc13.patch
 create mode 100644 gloox/gloox-fix-build.patch
 create mode 100644 gn/gn-fix-build.patch
 create mode 100644 gnome-dictionary/gnome-dictionary-fix-meson.patch
 create mode 100644 gnome-font-viewer/gnome-font-viewer-meson.patch
 create mode 100644 gnome-tetravex/gnome-tetravex-fix-meson.patch
 create mode 100644 gnu-efi/gnu-efi-3.0.17-la64.patch
 create mode 100644 gtk4/gtk-objcopy.patch
 create mode 100644 hidapi/hidapi-fix-build.patch
 create mode 100644 hitori/hitori-fix-meson.patch
 create mode 100644 htmlcxx/htmlcxx-la64.patch
 create mode 100644 ipxe/ipxe-la64.patch
 delete mode 100644 java-openjdk/freedesktop-java.desktop
 delete mode 100644 java-openjdk/freedesktop-jconsole.desktop
 delete mode 100644 java-openjdk/freedesktop-jshell.desktop
 delete mode 100644 java-openjdk/install_jdk-openjdk.sh
 delete mode 100644 java-openjdk/install_jre-openjdk-headless.sh
 delete mode 100644 java-openjdk/install_jre-openjdk.sh
 create mode 100644 java11-openjdk/jdk11-11.0.20.1-la64.patch
 create mode 100644 java17-openjdk/jdk17-17.0.9.8-la64.patch
 create mode 100644 java8-openjdk/jdk8u382-la64.patch
 create mode 100644 jemalloc/add-loongarch64.patch
 create mode 100644 js102/js102-loong64-jit.patch
 create mode 100644 js91/mozjs-la64.patch
 create mode 100644 js91/spidermonkey-91-add-loongarch.patch
 create mode 100644 kcov/0001-Add-basic-support-for-LoongArch-architecture.patch
 create mode 100644 libb2/libb2-fix-build.patch
 create mode 100644 libfbclient/fbclient-la64-4.0.0.patch
 create mode 100644 libfbclient/fbclient-la64.patch
 create mode 100644 libfido2/libfido2-no-ssp.patch
 create mode 100644 libgda/libgda-la64.patch
 create mode 100644 libopenraw/libopenraw-fix-build.patch
 create mode 100644 liborcus/liborcus-cstdint.patch
 create mode 100644 libotr/libotr-fix-build.patch
 create mode 100644 libsmbios/0001-add-support-for-loongarch64.patch
 create mode 100644 liburcu/userspace-rcu-loongarch64.patch
 create mode 100644 libvirt/libvirt-loongarch.patch
 create mode 100644 libyuv/0001-fix-build-error.patch
 create mode 100644 linux-hardened/config.la64
 create mode 100644 linux-hardened/remove_shm_align_mask.diff
 create mode 100644 liteide/liteide-fix-build.patch
 create mode 100644 lld/lld-la64.patch
 create mode 100644 llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch
 create mode 100644 llvm/llvm-newreloc-la64.patch
 create mode 100644 llvm14/llvm-loong64.patch
 create mode 100644 luarocks/luarocks-la64.patch
 create mode 100644 lxc/4363.patch
 create mode 100644 mariadb/mariadb-fix-build.patch
 create mode 100644 mate-terminal/theme-colors-false.patch
 create mode 100644 materialx/materialx-fix-build.patch
 create mode 100644 mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch
 create mode 100644 mplayer/mplayer-la64.patch
 create mode 100644 musl/0001-musl-add-loongarch64-support.patch
 create mode 100644 nautilus-sendto/nautilus-sendto-fix-meson.patch
 create mode 100644 neofetch/neofetch-la64.patch
 create mode 100644 netplan/netplan-disable-pandoc.patch
 create mode 100644 ntp/0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch
 create mode 100644 ntp/ntp-ssp-la.patch
 create mode 100644 ocaml/ocaml-5.0.0-la64.patch
 create mode 100644 open-iscsi/open-iscsi-fix-build.patch
 create mode 100644 openblas/fix-loong.patch
 create mode 100644 openjade/openjade-nola.patch
 create mode 100644 openmp/openmp-loong64.patch
 create mode 100644 pacman-contrib/pkgbuild-vim-la64.patch
 create mode 100644 pixman/83.patch
 create mode 100644 pkgfile/pkgfile-use-loong64.patch
 create mode 100644 postgresql/add-loongarch-support.patch
 create mode 100644 psiconv/psiconv-fix-build.patch
 create mode 100644 python-greenlet/python-greenlet-la64.patch
 create mode 100644 python-nodeenv/nodeenv-loong64.patch
 create mode 100644 python-numpy/add-loongarch-support.patch
 create mode 100644 python-pyelftools/pyelftools-0.29.patch
 create mode 100644 python-simple-term-menu/cjk-preview.patch
 create mode 100644 qemu/qemu-4k-pagesize.patch
 create mode 100644 qemu/qemu-kvm-la64.patch
 create mode 100644 qt5-doc/qt5-base-la64.patch
 create mode 100644 qt5-script/loongarch_ports.patch
 create mode 100644 qt5-script/qt5-base-la64.patch
 create mode 100644 qt5-webengine/qtwebengine-5.15.2-la64.patch
 create mode 100644 qt6-quick3dphysics/qt3d-la64.patch
 create mode 100644 qt6-tools/qt6-tools-fix-build.patch
 create mode 100644 qtcreator/qtcreator-la64.patch
 create mode 100644 refind/refind-la64-0.14.0.patch
 create mode 100644 repod/0001-add-loong64-support.patch
 create mode 100644 rpm-tools/rpm-add-loongarch.patch
 create mode 100644 runc/runc-la64.patch
 create mode 100644 sbsigntools/sbsigntools-la64.patch
 create mode 100644 signon-ui/signon-ui-loong64-bad-fix.patch
 create mode 100644 texlive-bin/texlive-bin-la64.patch
 create mode 100644 thunderbird/firefox-115-loong.patch
 create mode 100644 tracker3-miners/tracker-miners-fix-build.patch
 create mode 100644 upx/0001-just-for-la64-build.patch
 create mode 100644 valgrind/valgrind-3.21-la64.patch
 create mode 100644 virt-manager/600.patch
 create mode 100644 vtk/vtk-loong64.patch
 create mode 100644 webkit2gtk-4.1/webkit2-gtk-fix-build.patch
 create mode 100644 webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch
 create mode 100644 webkit2gtk/webkit2-gtk-fix-build.patch
 create mode 100644 webkit2gtk/webkit2gtk-fix-cmake-build.patch
 create mode 100644 webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch
 create mode 100644 webrtc-audio-processing-1/webrtc-audio-processing-la64.patch
 create mode 100644 webrtc-audio-processing/webrtc-audio-processing-la64.patch
 create mode 100644 wpewebkit/webkit2gtk-fix-cmake-build.patch
 create mode 100644 x86_64-linux-gnu-binutils/PKGBUILD
 create mode 100644 x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc
 create mode 100644 x86_64-linux-gnu-gcc/PKGBUILD
 create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc
 create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc
 create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc
 create mode 100644 x86_64-linux-gnu-gdb/PKGBUILD
 create mode 100644 x86_64-linux-gnu-glibc/PKGBUILD
 create mode 100644 x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc
 create mode 100644 x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc
 create mode 100644 x86_64-linux-gnu-glibc/reenable_DT_HASH.patch
 create mode 100644 x86_64-linux-gnu-glibc/sdt-config.h
 create mode 100644 x86_64-linux-gnu-glibc/sdt.h
 create mode 100644 x86_64-linux-gnu-linux-api-headers/PKGBUILD
 create mode 100644 xf86-video-loongson/PKGBUILD
 create mode 100644 xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch
 create mode 100644 xorg-server/10-modeset.conf
 create mode 100644 xsd/0120-g++10.patch
 create mode 100644 xsd/xsd-c++17.patch

diff --git a/0ad/0ad-fix-build.patch b/0ad/0ad-fix-build.patch
new file mode 100644
index 0000000000..d3ffd43165
--- /dev/null
+++ b/0ad/0ad-fix-build.patch
@@ -0,0 +1,18 @@
+--- a/build.sh	2023-04-20 17:19:57.567640306 +0800
++++ b/build.sh	2023-04-20 17:20:56.920551226 +0800
+@@ -140,6 +140,7 @@
+     ${CONF_OPTS} \
+     --enable-debug \
+     --disable-optimize \
++    --disable-new-pass-manager \
+     --enable-gczeal
+   ${MAKE} ${MAKE_OPTS}
+   cd ..
+@@ -151,6 +152,7 @@
+   LLVM_OBJDUMP="${LLVM_OBJDUMP}" \
+   ${CONF_OPTS} \
++  --disable-new-pass-manager \
+   --enable-optimize
+ ${MAKE} ${MAKE_OPTS}
+ cd ..
+ 
diff --git a/0ad/0ad-la64.patch b/0ad/0ad-la64.patch
new file mode 100644
index 0000000000..2d0d6a3f72
--- /dev/null
+++ b/0ad/0ad-la64.patch
@@ -0,0 +1,97 @@
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild b/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild
+index 9344cc4e7..6c3f64524 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild
+@@ -32,6 +32,7 @@ arches = {
+     'x86_64': 'x64',
+     'x86': 'ia32',
+     'aarch64': 'arm64',
++    'loongarch64': 'loongarch64',
+ }
+ 
+ gyp_vars['host_arch'] = arches.get(CONFIG['HOST_CPU_ARCH'], CONFIG['HOST_CPU_ARCH'])
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure b/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure
+index 2fdeb5497..3a906aef8 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure
+@@ -738,6 +738,9 @@ def split_triplet(triplet, allow_msvc=False):
+     elif cpu.startswith('aarch64'):
+         canonical_cpu = 'aarch64'
+         endianness = 'little'
++    elif cpu.startswith('loongarch64'):
++        canonical_cpu = 'loongarch64'
++        endianness = 'little'
+     elif cpu == 'sh4':
+         canonical_cpu = 'sh4'
+         endianness = 'little'
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h b/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h
+index 8c6a0e16e..afb63176d 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h
+@@ -123,6 +123,7 @@ int main(int argc, char** argv) {
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+     defined(__SH4__) || defined(__alpha__) || \
++    defined(__loongarch64) || \
+     defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\
+     defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \
+     defined(__riscv) || defined(__e2k__) || \
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h b/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h
+index 0486cbad1..0a95e11e7 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h
+@@ -391,7 +391,7 @@ inline bool AtomicOperations::isLockfreeJS(int32_t size) {
+ #elif defined(__ppc__) || defined(__PPC__) || defined(__sparc__) ||     \
+     defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
+     defined(__PPC64LE__) || defined(__alpha__) || defined(__hppa__) ||  \
+-    defined(__sh__) || defined(__s390__) || defined(__s390x__)
++    defined(__loongarch64) || defined(__sh__) || defined(__s390__) || defined(__s390x__)
+ #  include "jit/shared/AtomicOperations-feeling-lucky.h"
+ #else
+ #  error "No AtomicOperations support provided for this platform"
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h
+index f031495b7..55a40ab39 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h
+@@ -93,6 +93,7 @@ int main(int argc, char** argv) {
+ #if defined(_M_X64) || defined(__x86_64__) || \
+     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
+     defined(__hppa__) || defined(__ia64__) || \
++    defined(__loongarch__) || \
+     defined(__mips__) || \
+     defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp
+index fbd336471..1de67549e 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp
+@@ -141,6 +141,9 @@
+ #elif defined __hppa
+ #  define RETURN_INSTR 0xe840c002 /* bv,n r0(rp) */
+ 
++#elif defined __loongarch64
++#  define RETURN_INSTR 0x4c000020 /* jirl */
++
+ #elif defined __mips
+ #  define RETURN_INSTR 0x03e00008 /* jr ra */
+ 
+diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py b/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py
+index 7542dcdc6..87e1461be 100644
+--- a/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py
++++ b/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py
+@@ -45,6 +45,7 @@ CPU_bitness = {
+     'arm': 32,
+     'hppa': 32,
+     'ia64': 64,
++    'loongarch64': 64,
+     'mips32': 32,
+     'mips64': 64,
+     'ppc': 32,
+@@ -83,6 +84,7 @@ CPU_preprocessor_checks = OrderedDict((
+     ('ppc', '__powerpc__'),
+     ('Alpha', '__alpha__'),
+     ('hppa', '__hppa__'),
++    ('loongarch64', '__loongarch64'),
+     ('sparc64', '__sparc__ && __arch64__'),
+     ('sparc', '__sparc__'),
+     ('mips64', '__mips64'),
diff --git a/0ad/PKGBUILD b/0ad/PKGBUILD
index 7286e409a0..453c704d8a 100644
--- a/0ad/PKGBUILD
+++ b/0ad/PKGBUILD
@@ -20,12 +20,16 @@ source=("https://releases.wildfiregames.com/$pkgname-$_pkgver-unix-build.tar.xz"
         https://github.com/0ad/0ad/commit/839edc3a.patch
         https://github.com/0ad/0ad/commit/093e1eb2.patch
         https://github.com/0ad/0ad/commit/d2426312.patch)
+	0ad-fix-build.patch
+	0ad-la64.patch)
 sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c500bf4162651a5e1fcdb42bd5fb5b4f5c512c78372479fbd8565dd093f272'
             '3a0f935ab05e1c0d6a926ba02a5ed72afbb94b6910acaad77661b927680f192a06c7614287fad2ff8a54e3e1ee814614c9abfc9497a27e86b9e58ae1f6eebbfb'
             '748a75420541947e2a215b3a8789a0e137179e4981d0977e1c4b20cd7b86af2d96b9976e04d60ace8d5ee465d542cadc42ee9bceedaaa97d2b320f533e3e3892'
             '1dfc8a0c6ac29040f72d9bbf6b631a74cbdec444b9078a015345139228666354d9b5059f85b640ce3afc0f590bcbe8afd5e158509a0c95751e1cd69fece46876'
             'a7fd1454385f56b7c8cb0fc6ac001761d4419df4aeec570ba846c7df4eb327d25b9ff1a7946cb334315109fa90ca2c1820583619f4e1ec5d53805afa08e10093'
             '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb')
+            '956effc37bbad8ca44a5e82a8750ca82c1c2347152dd684ebc2921953d4fa81ef9291b5bb5de05559b2b4ef79c336b837216892f0bcf806e50aac8c4ea42edde'
+            'c01e52a4241736eda82f6002c3627d9c4b5b505109969fc608d95dd71db8681df8f3de6a372bca8fe977bee14f5180f4c27681e40d26b0a06ddc556122886d04')
 
 prepare() {
   cd "$pkgname-$_pkgver"
@@ -36,6 +40,12 @@ prepare() {
   patch -p1 -i ../839edc3a.patch # Fix build with fmt 10
   patch -p1 -i ../093e1eb2.patch # Fix build with GCC 13
   patch -p1 -i ../d2426312.patch # Fix build with libxml2 2.12
+
+  patch -d libraries/source/spidermonkey -p1 -i $srcdir/0ad-fix-build.patch
+  cp $srcdir/0ad-la64.patch libraries/source/spidermonkey/0ad-la64.patch
+  echo "patch -p5 < ../0ad-la64.patch" >> libraries/source/spidermonkey/patch.sh
+
+  echo "cp /usr/share/automake-1.16/config.* build/autoconf/" >> libraries/source/spidermonkey/patch.sh
 }
 
 build() {
diff --git a/a2ps/PKGBUILD b/a2ps/PKGBUILD
index 05f2552547..cf306591f0 100644
--- a/a2ps/PKGBUILD
+++ b/a2ps/PKGBUILD
@@ -28,6 +28,8 @@ prepare() {
 
 build() {
   cd ${pkgname}-${pkgver}
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
   libtoolize --force --copy
   autoreconf --force --install -I m4 
   LIBS+="-lm" ./configure --prefix=/usr --sysconfdir=/etc/a2ps \
diff --git a/aardvark-dns/PKGBUILD b/aardvark-dns/PKGBUILD
index 4a219235cf..7e1798efc3 100644
--- a/aardvark-dns/PKGBUILD
+++ b/aardvark-dns/PKGBUILD
@@ -31,7 +31,7 @@ pkgver() {
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/abseil-cpp/PKGBUILD b/abseil-cpp/PKGBUILD
index 9600745050..fbf442e730 100644
--- a/abseil-cpp/PKGBUILD
+++ b/abseil-cpp/PKGBUILD
@@ -11,13 +11,16 @@ license=('Apache')
 depends=('gcc-libs')
 makedepends=('cmake' 'gtest')
 source=("https://github.com/abseil/abseil-cpp/archive/$pkgver/$pkgname-$pkgver.tar.gz"
-         scoped-mock-log.patch)
+         scoped-mock-log.patch
+         abseil-cpp-la64.patch)
 sha256sums=('987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed'
-            'a6cbc612a2b96fcbd52d081e03e8581107ceb4827edb19d96510a31c568e1396')
+            'a6cbc612a2b96fcbd52d081e03e8581107ceb4827edb19d96510a31c568e1396'
+            '8817cf256a94dd9059bccd540a4d6bbe0d9606c600e8543a1b1011226c350b23')
 
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
   patch -p1 -i ../scoped-mock-log.patch # Install target needed by protobuf
+  patch -p1 -i $srcdir/abseil-cpp-la64.patch
 }
 
 build() {
diff --git a/abseil-cpp/abseil-cpp-la64.patch b/abseil-cpp/abseil-cpp-la64.patch
new file mode 100644
index 0000000000..6b538ea737
--- /dev/null
+++ b/abseil-cpp/abseil-cpp-la64.patch
@@ -0,0 +1,13 @@
+Index: abseil-cpp-20211102.0/absl/debugging/internal/examine_stack.cc
+===================================================================
+--- abseil-cpp-20211102.0.orig/absl/debugging/internal/examine_stack.cc
++++ abseil-cpp-20211102.0/absl/debugging/internal/examine_stack.cc
+@@ -57,6 +57,8 @@ void* GetProgramCounter(void* vuc) {
+       return reinterpret_cast<void*>(context->uc_mcontext.gregs[14]);
+ #elif defined(__ia64__)
+     return reinterpret_cast<void*>(context->uc_mcontext.sc_ip);
++#elif defined(__loongarch64)
++    return reinterpret_cast<void*>(context->uc_mcontext.__pc);
+ #elif defined(__m68k__)
+     return reinterpret_cast<void*>(context->uc_mcontext.gregs[16]);
+ #elif defined(__mips__)
diff --git a/acme-redirect/PKGBUILD b/acme-redirect/PKGBUILD
index 46bb1a042e..d41035a003 100644
--- a/acme-redirect/PKGBUILD
+++ b/acme-redirect/PKGBUILD
@@ -22,13 +22,20 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  mkdir .cargo
+  cat > .cargo/config.toml <<EOF
+[source]
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo update
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd "${pkgname}-${pkgver}"
   CFLAGS+=' -ffat-lto-objects'
-  cargo build --frozen --release
+  cargo build --release
   make docs
 }
 
diff --git a/acpi_call-lts/PKGBUILD b/acpi_call-lts/PKGBUILD
index 28ea1398f8..2da54f16f1 100644
--- a/acpi_call-lts/PKGBUILD
+++ b/acpi_call-lts/PKGBUILD
@@ -25,7 +25,7 @@ package() {
   _kernver=$(</usr/src/linux-lts/version)
 
   install -Dt "$pkgdir/usr/lib/modules/$_kernver/extramodules" -m0644 \
-    acpi_call/${pkgver}/$_kernver/$CARCH/module/*
+    acpi_call/${pkgver}/$_kernver/`uname -m`/module/*
 
   # compress each module individually
   find "$pkgdir" -name '*.ko' -exec xz -T1 {} +
diff --git a/acpi_call/PKGBUILD b/acpi_call/PKGBUILD
index 03aec41e85..3f97a910fd 100644
--- a/acpi_call/PKGBUILD
+++ b/acpi_call/PKGBUILD
@@ -24,7 +24,7 @@ package() {
   _kernver=$(</usr/src/linux/version)
 
   install -Dt "$pkgdir/usr/lib/modules/$_kernver/extramodules" -m0644 \
-    acpi_call/${pkgver}/$_kernver/$CARCH/module/*
+    acpi_call/${pkgver}/$_kernver/`uname -m`/module/*
 
   # compress each module individually
   find "$pkgdir" -name '*.ko' -exec xz -T1 {} +
diff --git a/acpica/PKGBUILD b/acpica/PKGBUILD
index 71592bb2cc..3568d465f6 100644
--- a/acpica/PKGBUILD
+++ b/acpica/PKGBUILD
@@ -21,10 +21,18 @@ options=(!makeflags)
 # https://github.com/acpica/acpica/issues/886
 # https://github.com/acpica/acpica/issues/883
 # https://github.com/acpica/acpica/issues/857
-source=(https://downloadmirror.intel.com/783534/$pkgname-unix-$pkgver.tar.gz)
+source=(https://downloadmirror.intel.com/783534/$pkgname-unix-$pkgver.tar.gz
+        acpica-fix-build.patch
+        acpica-unix-la64.patch)
 sha512sums=('d726e69ebd8b8110690e3aff8d1919b43b0a2185efdeb9131ea8d89d321ca3a318a89c721ea740ae366f31ed3d1c11c2906f8807ee8a190e6f67fe5b2023cea4')
 b2sums=('c560b8ea1f91f46aa505929d04133636f31fe01a06463e9a4cfafef6f40136a2321de17a9165fca22ee67a64267bc823ed12b2b6054451cf8346540e509c284b')
 
+prepare() {
+  cd acpica-unix-${pkgver}
+  patch -p1 -i "$srcdir/acpica-fix-build.patch"
+  patch -p1 -i "$srcdir/$pkgname-unix-la64.patch"
+}
+
 build() {
   NOWERROR=TRUE make -C $pkgname-unix-$pkgver
 }
diff --git a/acpica/acpica-fix-build.patch b/acpica/acpica-fix-build.patch
new file mode 100644
index 0000000000..ebec6c02af
--- /dev/null
+++ b/acpica/acpica-fix-build.patch
@@ -0,0 +1,13 @@
+Index: acpica-unix-20211217/source/components/utilities/utdebug.c
+===================================================================
+--- acpica-unix-20211217.orig/source/components/utilities/utdebug.c
++++ acpica-unix-20211217/source/components/utilities/utdebug.c
+@@ -182,7 +182,7 @@ void
+ AcpiUtInitStackPtrTrace (
+     void)
+ {
+-    ACPI_SIZE               CurrentSp;
++    static ACPI_SIZE               CurrentSp;
+ 
+ 
+     AcpiGbl_EntryStackPointer = &CurrentSp;
diff --git a/acpica/acpica-unix-la64.patch b/acpica/acpica-unix-la64.patch
new file mode 100644
index 0000000000..7e1f0a4852
--- /dev/null
+++ b/acpica/acpica-unix-la64.patch
@@ -0,0 +1,39 @@
+Index: acpica-unix-20210331/source/include/platform/acefi.h
+===================================================================
+--- acpica-unix-20210331.orig/source/include/platform/acefi.h
++++ acpica-unix-20210331/source/include/platform/acefi.h
+@@ -173,7 +173,7 @@
+ 
+ #endif
+ 
+-#if defined(__x86_64__)
++#if defined(__x86_64__) || defined(__loongarch64)
+ #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
+ #define USE_MS_ABI 1
+ #endif
+@@ -189,11 +189,11 @@
+ 
+ #define VOID        void
+ 
+-#if defined(__ia64__) || defined(__x86_64__)
++#if defined(__ia64__) || defined(__x86_64__) || defined(__loongarch64)
+ 
+ #define ACPI_MACHINE_WIDTH          64
+ 
+-#if defined(__x86_64__)
++#if defined(__x86_64__) || defined(__loongarch64)
+ 
+ /* for x86_64, EFI_FUNCTION_WRAPPER must be defined */
+ 
+Index: acpica-unix-20210331/source/include/platform/aclinux.h
+===================================================================
+--- acpica-unix-20210331.orig/source/include/platform/aclinux.h
++++ acpica-unix-20210331/source/include/platform/aclinux.h
+@@ -325,6 +325,7 @@
+ #if defined(__ia64__)    || (defined(__x86_64__) && !defined(__ILP32__)) ||\
+     defined(__aarch64__) || defined(__PPC64__) ||\
+     defined(__s390x__) ||\
++    defined(__loongarch64) ||\
+     (defined(__riscv) && (defined(__LP64__) || defined(_LP64)))
+ #define ACPI_MACHINE_WIDTH          64
+ #define COMPILER_DEPENDENT_INT64    long
diff --git a/aida-x/PKGBUILD b/aida-x/PKGBUILD
index 8e66ad45b7..c10a644983 100644
--- a/aida-x/PKGBUILD
+++ b/aida-x/PKGBUILD
@@ -172,5 +172,5 @@ package_aida-x-vst3() {
   )
 
   # mv -v $pkgname/* "$pkgdir"
-  install -vDm 755 build/bin/$_name.vst3/Contents/$CARCH-linux/*.so -t "$pkgdir/usr/lib/vst3/$_name.vst3/Contents/$CARCH-linux/"
+  install -vDm 755 build/bin/$_name.vst3/Contents/`uname -m`-linux/*.so -t "$pkgdir/usr/lib/vst3/$_name.vst3/Contents/`uname -m`-linux/"
 }
diff --git a/aiksaurus/PKGBUILD b/aiksaurus/PKGBUILD
index 86a5b37543..9a4acc1985 100644
--- a/aiksaurus/PKGBUILD
+++ b/aiksaurus/PKGBUILD
@@ -11,19 +11,24 @@ arch=('loong64' 'x86_64')
 depends=('gcc-libs')
 source=(https://downloads.sourceforge.net/${pkgname}/${pkgname}-${pkgver}.tar.gz
         aiksaurus-gcc44.patch
-        format-security.patch)
+        format-security.patch
+        aiksaurus-fix-build.patch)
 sha512sums=('48591850f28f1a8f4b4986df14090ef7bd57cbfbad739cb0013db021f6f5bcb3c592b38e36774735499e27b9e99330504f8d9c6022158e25469cbc81d13f7463'
             '66db53f7499425eb1ff572df4a674f378ed681edeb48ea5926b21d39c8a399c36740e03de101e23a47e043fcce29f66a03c64dc813dc5beda1043d0057711fa5'
-            '72ebdc63cbb2c56bd8274f92501cbbae8c66e7d81b11b8fdeda38149da4bf44ab88699d248093eedc9813f6cd60e2a1f360fb39d778b4b7827777b1d003ab1dc')
+            '72ebdc63cbb2c56bd8274f92501cbbae8c66e7d81b11b8fdeda38149da4bf44ab88699d248093eedc9813f6cd60e2a1f360fb39d778b4b7827777b1d003ab1dc'
+            'bcbffca8b6632b708550da7c15c46457f6e9b74e4bd5705c0310a51f0bf37ba215e328a88e7f4d870276414d646aa64c88933ab22dd64d6b69215030958b1a4e')
 
 prepare() {
   cd $pkgname-$pkgver
   patch -p1 < ../format-security.patch
   patch -p0 < ../aiksaurus-gcc44.patch
+  patch -p1 -i $srcdir/aiksaurus-fix-build.patch
 }
 
 build() {
   cd ${pkgname}-${pkgver}
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
 
   export CXXFLAGS+=' -std=c++14'
   ./configure --prefix=/usr
diff --git a/aiksaurus/aiksaurus-fix-build.patch b/aiksaurus/aiksaurus-fix-build.patch
new file mode 100644
index 0000000000..05a717398a
--- /dev/null
+++ b/aiksaurus/aiksaurus-fix-build.patch
@@ -0,0 +1,400 @@
+diff -uNr aiksaurus-1.2.1/base/Aiksaurus.cpp aiksaurus-1.2.1.ok/base/Aiksaurus.cpp
+--- aiksaurus-1.2.1/base/Aiksaurus.cpp	2003-08-09 00:18:43.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/Aiksaurus.cpp	2022-06-23 12:28:39.298196569 +0800
+@@ -98,21 +98,21 @@
+         public:
+ 
+             ThesaurusImpl(const char* mfile, const char* wfile)
+-                throw(AiksaurusException);
++                 noexcept(false);
+             
+             ~ThesaurusImpl() throw();
+ 
+             const char* word() const throw();
+ 
+-            bool find(const char* word) throw(AiksaurusException);
+-            const char* next(int& id) throw(AiksaurusException);
+-            const char* similar() throw(AiksaurusException);
++            bool find(const char* word) noexcept(false);
++            const char* next(int& id) noexcept(false);
++            const char* similar() noexcept(false);
+     };
+ 
+ 
+     
+ ThesaurusImpl::ThesaurusImpl(const char* mfile, const char* wfile)
+-throw(AiksaurusException)
++noexcept(false)
+     : d_meanings(mfile), 
+       d_words(wfile), 
+       d_links(NULL), 
+@@ -152,7 +152,7 @@
+ 
+ 
+ bool 
+-ThesaurusImpl::find(const char* word) throw(AiksaurusException)
++ThesaurusImpl::find(const char* word) noexcept(false)
+ {
+     try
+     {
+@@ -197,7 +197,7 @@
+ 
+ 
+ 
+-const char* ThesaurusImpl::next(int& id) throw(AiksaurusException)
++const char* ThesaurusImpl::next(int& id) noexcept(false)
+ {
+     if (d_currentStream >= d_meaningStreams.size())
+         return "";
+@@ -220,7 +220,7 @@
+ }
+ 
+ 
+-const char* ThesaurusImpl::similar() throw(AiksaurusException)
++const char* ThesaurusImpl::similar() noexcept(false)
+ {
+     if (d_similarID < d_similarStop)
+     {
+diff -uNr aiksaurus-1.2.1/base/MeaningsFile.cpp aiksaurus-1.2.1.ok/base/MeaningsFile.cpp
+--- aiksaurus-1.2.1/base/MeaningsFile.cpp	2003-06-15 18:17:56.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/MeaningsFile.cpp	2022-06-23 12:29:38.830290837 +0800
+@@ -74,7 +74,7 @@
+             // MeaningStream::read
+             //   Put the next integer in the stream into x.
+             //   Return true normally, false if EOF.
+-            bool read(int& x) throw(AiksaurusException);
++            bool read(int& x) noexcept(false);
+     };
+ 
+ 
+@@ -104,7 +104,7 @@
+ //   a link. (EOF or problem reading file).
+ //
+ inline bool 
+-MeaningStream::read(int& x) throw(AiksaurusException)
++MeaningStream::read(int& x) noexcept(false)
+ {
+     bool ret = true;
+     
+@@ -166,7 +166,7 @@
+ //   All we need to do is get a handle to the file. 
+ //   We'll also check to make sure our meanings file opens ok.
+ //
+-MeaningsFile::MeaningsFile(const char* fname) throw(AiksaurusException)
++MeaningsFile::MeaningsFile(const char* fname) noexcept(false)
+ {
+     d_file_ptr = fopen(fname, "rb");
+     if (!d_file_ptr)
+@@ -195,7 +195,7 @@
+ //   line, plus one slot for end-of-links (-1).
+ //
+ int
+-MeaningsFile::_readline(MeaningStream& s, int* buffer) throw(AiksaurusException) 
++MeaningsFile::_readline(MeaningStream& s, int* buffer) noexcept(false)
+ {
+     int i = 0;
+ 
+@@ -225,7 +225,7 @@
+ //   the function is called.
+ //
+ int* 
+-MeaningsFile::getWords(int id) throw(AiksaurusException)
++MeaningsFile::getWords(int id) noexcept(false)
+ {   
+     // First we need to create our buffer to return.
+     // We know that there are at most s_dataMaxLineLength
+diff -uNr aiksaurus-1.2.1/base/MeaningsFile.h aiksaurus-1.2.1.ok/base/MeaningsFile.h
+--- aiksaurus-1.2.1/base/MeaningsFile.h	2003-06-15 18:17:56.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/MeaningsFile.h	2022-06-23 12:22:49.261982237 +0800
+@@ -46,16 +46,16 @@
+ 
+             // Utility function
+             int _readline(MeaningStream& s, int* buffer) 
+-                throw(AiksaurusException);
++                noexcept(false);
+             
+         public:
+ 
+             // Creation and Destruction
+-            MeaningsFile(const char* fname) throw(AiksaurusException);
++            MeaningsFile(const char* fname) noexcept(false);
+             ~MeaningsFile() throw();
+ 
+             // Word Lookup        
+-            int* getWords(int id) throw(AiksaurusException);
++            int* getWords(int id) noexcept(false);
+     };
+ }
+ 
+diff -uNr aiksaurus-1.2.1/base/WordsFile.cpp aiksaurus-1.2.1.ok/base/WordsFile.cpp
+--- aiksaurus-1.2.1/base/WordsFile.cpp	2003-06-15 18:17:58.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/WordsFile.cpp	2022-06-23 12:30:29.725543860 +0800
+@@ -112,7 +112,7 @@
+ //   Attempt to safely initialize the Words file.  Might have problems
+ //   with running out of memory or file not found.
+ //
+-WordsFile::WordsFile(const char* fname) throw(AiksaurusException)
++WordsFile::WordsFile(const char* fname) noexcept(false)
+ {
+     
+     try
+@@ -171,7 +171,7 @@
+ //   Returns the index of the word on success, or -1 on 
+ //   failure.
+ //
+-bool WordsFile::findWord(const char* str, int& index) throw(AiksaurusException)
++bool WordsFile::findWord(const char* str, int& index) noexcept(false)
+ {  
+     // Create copy of str, so that we can turn spaces into colons.
+     // We only need to copy the first s_wordlen + 1 bytes to ensure
+@@ -280,7 +280,7 @@
+ // loadWord()
+ //   Read a particular word from the Words file. 
+ //
+-void WordsFile::loadWord(int id) throw(AiksaurusException)
++void WordsFile::loadWord(int id) noexcept(false)
+ {
+     assert(id >= 0);
+     assert(id < getSize());
+diff -uNr aiksaurus-1.2.1/base/WordsFile.h aiksaurus-1.2.1.ok/base/WordsFile.h
+--- aiksaurus-1.2.1/base/WordsFile.h	2003-06-15 18:17:59.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/WordsFile.h	2022-06-23 12:23:41.711944968 +0800
+@@ -55,12 +55,12 @@
+         public:
+ 
+         // Creation and Destruction
+-            WordsFile(const char* fname) throw(AiksaurusException);
++            WordsFile(const char* fname) noexcept(false);
+             ~WordsFile() throw();
+         
+         // Word Lookup        
+-            void loadWord(int id) throw(AiksaurusException);
+-            bool findWord(const char* str, int& index) throw(AiksaurusException);
++            void loadWord(int id) noexcept(false);
++            bool findWord(const char* str, int& index) noexcept(false);
+ 
+         // Inspection  
+             int getSize() const throw();
+diff -uNr aiksaurus-1.2.1/base/WordStream.h aiksaurus-1.2.1.ok/base/WordStream.h
+--- aiksaurus-1.2.1/base/WordStream.h	2003-06-15 18:17:58.000000000 +0800
++++ aiksaurus-1.2.1.ok/base/WordStream.h	2022-06-23 12:24:57.220828383 +0800
+@@ -38,7 +38,7 @@
+             
+         public:     
+             
+-            WordStream(int* words) throw(std::bad_alloc)
++            WordStream(int* words) noexcept(false)
+             {
+                 for(int i = 0;words[i] != -1;++i)
+                     d_words.push(words[i]);
+diff -uNr aiksaurus-1.2.1/gtk/src/AiksaurusGTK.cpp aiksaurus-1.2.1.ok/gtk/src/AiksaurusGTK.cpp
+--- aiksaurus-1.2.1/gtk/src/AiksaurusGTK.cpp	2003-05-28 07:46:37.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/AiksaurusGTK.cpp	2022-06-23 12:41:19.848044967 +0800
+@@ -54,7 +54,7 @@
+             const char* runThesaurus(const char* word) throw();
+             void setTitle(const char* title) throw();
+             void setReplacebar(bool replacebar) throw();
+-            void setInitialMessage(const char* message) throw(std::bad_alloc);
++            void setInitialMessage(const char* message) noexcept(false);
+             
+             void eventCancel() throw();
+             void eventReplace(const char* replacement) throw();
+@@ -84,7 +84,7 @@
+     }
+ 
+     
+-    void DialogImpl::setInitialMessage(const char* message) throw(std::bad_alloc)
++    void DialogImpl::setInitialMessage(const char* message) noexcept(false)
+     {
+         d_initialMessage = message;
+     }
+diff -uNr aiksaurus-1.2.1/gtk/src/Display.cpp aiksaurus-1.2.1.ok/gtk/src/Display.cpp
+--- aiksaurus-1.2.1/gtk/src/Display.cpp	2022-06-23 12:46:37.102548374 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Display.cpp	2022-06-23 12:43:10.935559296 +0800
+@@ -73,7 +73,7 @@
+ 
+     
+     void Display::_createMeaning(const string& title, vector<string>& words)
+-        throw(std::bad_alloc)
++         noexcept(false)
+     {
+         Meaning *mean = new Meaning(title, words, *this);
+         d_meanings.push_back(mean);
+@@ -98,7 +98,7 @@
+         d_meanings.clear();
+     }
+        
+-    void Display::_displayResults(const char* word) throw(Exception, std::bad_alloc)
++    void Display::_displayResults(const char* word) noexcept(false)
+     {
+         _checkThesaurus();
+         
+@@ -137,7 +137,7 @@
+    
+ 
+     
+-    void Display::_checkThesaurus() throw(Exception)
++    void Display::_checkThesaurus() noexcept(false)
+     {
+         if (d_thesaurus.error()[0])
+         {
+@@ -153,7 +153,7 @@
+     }
+     
+     void Display::_displayAlternatives()
+-        throw(Exception, std::bad_alloc)
++         noexcept(false)
+     {
+         _checkThesaurus();
+         vector<string> words;
+@@ -176,7 +176,7 @@
+         gtk_widget_show_all(d_layout);
+     }
+     
+-    void Display::search(const char* word) throw(std::bad_alloc)
++    void Display::search(const char* word) noexcept(false)
+     {
+         try 
+         {
+@@ -197,7 +197,7 @@
+     }
+ 
+ 
+-    void Display::_handleClick(bool isDoubleClick, const char* text) throw(std::bad_alloc)
++    void Display::_handleClick(bool isDoubleClick, const char* text) noexcept(false)
+     {
+         string str(text); // might throw
+ 
+diff -uNr aiksaurus-1.2.1/gtk/src/Display.h aiksaurus-1.2.1.ok/gtk/src/Display.h
+--- aiksaurus-1.2.1/gtk/src/Display.h	2002-07-11 18:09:37.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Display.h	2022-06-23 12:40:58.468835523 +0800
+@@ -29,17 +29,17 @@
+             std::vector<Meaning*> d_meanings;
+ 
+             void _handleSelection(GtkWidget* list) throw();
+-            void _handleClick(bool isDoubleClick, const char* text) throw(std::bad_alloc);
++            void _handleClick(bool isDoubleClick, const char* text) noexcept(false);
+         
+             void _resetDisplay() throw();
+             
+             void _createMeaning(const std::string& title, std::vector<std::string>& words) 
+-                throw(std::bad_alloc);
++                 noexcept(false);
+             
+-            void _displayResults(const char* word) throw(Exception, std::bad_alloc);
+-            void _displayAlternatives() throw(Exception, std::bad_alloc);
++            void _displayResults(const char* word) noexcept(false);
++            void _displayAlternatives() noexcept(false);
+             
+-            void _checkThesaurus() throw(Exception);
++            void _checkThesaurus() noexcept(false);
+             
+             static void _initResources() throw();
+ 
+@@ -50,7 +50,7 @@
+             const Aiksaurus& getThesaurus() const throw();
+             GtkWidget* getDisplay() throw();
+ 
+-            void search(const char* word) throw(std::bad_alloc);
++            void search(const char* word) noexcept(false);
+             void showMessage(const char* message) throw();
+     };
+ 
+diff -uNr aiksaurus-1.2.1/gtk/src/Meaning.cpp aiksaurus-1.2.1.ok/gtk/src/Meaning.cpp
+--- aiksaurus-1.2.1/gtk/src/Meaning.cpp	2003-05-28 07:56:55.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Meaning.cpp	2022-06-23 12:43:59.713595459 +0800
+@@ -26,7 +26,7 @@
+     }
+ 
+     Meaning::Meaning(const string& title, vector<string>& words, Display& display) 
+-    throw(bad_alloc)
++     noexcept(false)
+         : d_title(title), d_words(words), d_display(display), d_lists(4)
+     {
+         d_masterLayout = gtk_event_box_new();
+@@ -142,7 +142,7 @@
+ 
+     gint Meaning::_wordclick
+     (GtkWidget* list, gint row, gint col, GdkEventButton *e, gpointer data)
+-    throw(std::bad_alloc)
++     noexcept(false)
+     {
+         Meaning *m = static_cast<Meaning*>(data);
+         m->d_display._handleSelection(GTK_WIDGET(list));
+diff -uNr aiksaurus-1.2.1/gtk/src/Meaning.h aiksaurus-1.2.1.ok/gtk/src/Meaning.h
+--- aiksaurus-1.2.1/gtk/src/Meaning.h	2003-05-28 07:56:55.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Meaning.h	2022-06-23 12:42:22.403620376 +0800
+@@ -24,12 +24,12 @@
+             GtkWidget* d_label;
+         
+             static gint _wordclick(GtkWidget* l, gint row, gint col, 
+-				   GdkEventButton *e, gpointer data) throw(std::bad_alloc);
++				   GdkEventButton *e, gpointer data) noexcept(false);
+ 
+         public:
+        
+             Meaning(const string& title, vector<string>& words, Display& display)
+-                throw(std::bad_alloc);
++                 noexcept(false);
+        
+             ~Meaning() throw();
+         
+diff -uNr aiksaurus-1.2.1/gtk/src/Toolbar.cpp aiksaurus-1.2.1.ok/gtk/src/Toolbar.cpp
+--- aiksaurus-1.2.1/gtk/src/Toolbar.cpp	2004-06-12 12:12:57.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Toolbar.cpp	2022-06-23 12:45:07.159865371 +0800
+@@ -26,7 +26,7 @@
+ namespace AiksaurusGTK_impl 
+ {
+ 
+-    Toolbar::Toolbar(DialogMediator& mediator, GtkWidget* window) throw(std::bad_alloc)
++    Toolbar::Toolbar(DialogMediator& mediator, GtkWidget* window) noexcept(false)
+         : d_mediator(mediator), 
+           d_searchbar_words(12), 
+           d_ishistorymove(false), 
+@@ -96,7 +96,7 @@
+ 
+     }
+     
+-    void Toolbar::_updateNavigation() throw(std::bad_alloc)
++    void Toolbar::_updateNavigation() noexcept(false)
+     {
+         if (d_history.size_back())
+             d_backbutton_ptr->enable();
+@@ -115,7 +115,7 @@
+         d_forwardbutton_ptr->updateMenuOptions();
+     }
+ 
+-    void Toolbar::search(const char* str) throw(std::bad_alloc)
++    void Toolbar::search(const char* str) noexcept(false)
+     {
+         if (!d_ishistorymove)
+             d_history.search(str);
+diff -uNr aiksaurus-1.2.1/gtk/src/Toolbar.h aiksaurus-1.2.1.ok/gtk/src/Toolbar.h
+--- aiksaurus-1.2.1/gtk/src/Toolbar.h	2003-05-28 07:46:42.000000000 +0800
++++ aiksaurus-1.2.1.ok/gtk/src/Toolbar.h	2022-06-23 12:38:42.279404124 +0800
+@@ -56,7 +56,7 @@
+             GtkWidget* d_searchbar_ptr;
+             GtkWidget* d_searchbar_label_ptr;
+ 
+-            void _updateNavigation() throw(std::bad_alloc);
++            void _updateNavigation() noexcept(false);
+ 
+             void _setTooltip(GtkWidget* w, const char* str) throw();
+ 
+@@ -75,14 +75,14 @@
+ 
+         public:
+ 
+-            Toolbar(DialogMediator& mediator, GtkWidget* window) throw(std::bad_alloc);
++            Toolbar(DialogMediator& mediator, GtkWidget* window) noexcept(false);
+             ~Toolbar() throw();
+ 
+             GtkWidget* getToolbar() throw();
+             const char* getText() const throw();
+             void focus() throw();
+ 
+-            void search(const char* str) throw(std::bad_alloc);
++            void search(const char* str) noexcept(false);
+     };
+ 
+ }
diff --git a/alacritty/PKGBUILD b/alacritty/PKGBUILD
index b8adf73210..27dca355eb 100644
--- a/alacritty/PKGBUILD
+++ b/alacritty/PKGBUILD
@@ -23,7 +23,7 @@ sha256sums=('SKIP')
 
 prepare() {
   cd "$pkgname"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build(){
diff --git a/alsa-tools/PKGBUILD b/alsa-tools/PKGBUILD
index d382cd111b..be16ce2bc2 100644
--- a/alsa-tools/PKGBUILD
+++ b/alsa-tools/PKGBUILD
@@ -6,7 +6,7 @@
 
 pkgname=alsa-tools
 pkgver=1.2.5
-pkgrel=2
+pkgrel=4
 pkgdesc="Advanced tools for certain sound cards"
 arch=(loong64 x86_64)
 url="https://alsa-project.org/"
diff --git a/android-tools/PKGBUILD b/android-tools/PKGBUILD
index 0418f8116e..63f6c3ac51 100644
--- a/android-tools/PKGBUILD
+++ b/android-tools/PKGBUILD
@@ -12,8 +12,15 @@ url='http://tools.android.com/'
 license=(Apache MIT)
 depends=(libusb protobuf brotli zstd android-udev pcre2)
 makedepends=(gtest cmake go ninja git)
-source=(https://github.com/nmeum/android-tools/releases/download/$_tag/android-tools-$_tag.tar.xz)
-sha256sums=('7a22ff9cea81ff4f38f560687858e8f8fb733624412597e3cc1ab0262f8da3a1')
+source=(https://github.com/nmeum/android-tools/releases/download/$_tag/android-tools-$_tag.tar.xz
+	android-tools-la64.patch)
+sha256sums=('7a22ff9cea81ff4f38f560687858e8f8fb733624412597e3cc1ab0262f8da3a1'
+            '1a9c66a0c00eba62ad4a7babd26047f8f48dac4095f2849f75f7648c5f366d01')
+
+prepare() {
+  cd android-tools-$_tag
+  patch -p1 -i ../android-tools-la64.patch
+}
 
 build() {
   cd android-tools-$_tag
diff --git a/android-tools/android-tools-la64.patch b/android-tools/android-tools-la64.patch
new file mode 100644
index 0000000000..e192cd01d8
--- /dev/null
+++ b/android-tools/android-tools-la64.patch
@@ -0,0 +1,13 @@
+Index: android-tools-34.0.1/vendor/boringssl/include/openssl/base.h
+===================================================================
+--- android-tools-34.0.1.orig/vendor/boringssl/include/openssl/base.h
++++ android-tools-34.0.1/vendor/boringssl/include/openssl/base.h
+@@ -118,6 +118,8 @@ extern "C" {
+ #define OPENSSL_32_BIT
+ #elif defined(__myriad2__)
+ #define OPENSSL_32_BIT
++#elif defined(__loongarch_lp64)
++#define OPENSSL_64_BIT
+ #else
+ // Note BoringSSL only supports standard 32-bit and 64-bit two's-complement,
+ // little-endian architectures. Functions will not produce the correct answer
diff --git a/anewer/PKGBUILD b/anewer/PKGBUILD
index d95b896baa..674fad0604 100644
--- a/anewer/PKGBUILD
+++ b/anewer/PKGBUILD
@@ -13,7 +13,7 @@ b2sums=('b6a65f94b5d57ecd7947c75dda57c8c1166c94b2677a6fe25bf1fede8af49e4546429f3
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/angle-grinder/PKGBUILD b/angle-grinder/PKGBUILD
index 818d5ba8ae..6bae5b9643 100644
--- a/angle-grinder/PKGBUILD
+++ b/angle-grinder/PKGBUILD
@@ -23,7 +23,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/apache-orc/PKGBUILD b/apache-orc/PKGBUILD
index a50af9a7be..b80163550b 100644
--- a/apache-orc/PKGBUILD
+++ b/apache-orc/PKGBUILD
@@ -11,7 +11,7 @@ url="https://orc.apache.org"
 license=(Apache)
 depends=(lz4 protobuf snappy zlib zstd)
 makedepends=(cmake)
-checkdepends=(gtest)
+makedepends+=(gtest)
 source=(https://dlcdn.apache.org/${_pkg}/${_pkg}-${pkgver}/${_pkg}-${pkgver}.tar.gz{,.asc})
 sha256sums=('0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa'
             'SKIP')
@@ -40,7 +40,9 @@ build(){
     -DORC_PREFER_STATIC_ZLIB=OFF \
     -DBUILD_LIBHDFSPP=OFF \
     -DBUILD_JAVA=OFF \
-    -DINSTALL_VENDORED_LIBS=OFF
+    -DSTOP_BUILD_ON_WARNING=OFF \
+    -DINSTALL_VENDORED_LIBS=OFF \
+    -DBUILD_CPP_TESTS=OFF
   make -C build
 }
 
diff --git a/apptainer/PKGBUILD b/apptainer/PKGBUILD
index 8a7892543d..28bc401586 100644
--- a/apptainer/PKGBUILD
+++ b/apptainer/PKGBUILD
@@ -72,6 +72,9 @@ build() {
 
   # provide version to build script
   echo "$pkgver" > VERSION
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.8
+  go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.12.3
+  go mod tidy
 
   # set Go flags
   export CGO_CPPFLAGS="${CPPFLAGS}"
diff --git a/apr-util/PKGBUILD b/apr-util/PKGBUILD
index 71a94784df..4f01001b24 100644
--- a/apr-util/PKGBUILD
+++ b/apr-util/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=apr-util
 pkgver=1.6.3
-pkgrel=1
+pkgrel=2
 pkgdesc="The Apache Portable Runtime"
 arch=('loong64' 'x86_64')
 url="https://apr.apache.org/"
diff --git a/apr/PKGBUILD b/apr/PKGBUILD
index d85b8e5220..c33ea681cc 100644
--- a/apr/PKGBUILD
+++ b/apr/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=apr
 pkgver=1.7.4
-pkgrel=1
+pkgrel=3
 pkgdesc='The Apache Portable Runtime'
 arch=('loong64' 'x86_64')
 url='https://apr.apache.org/'
@@ -16,14 +16,16 @@ source=(https://archive.apache.org/dist/apr/apr-$pkgver.tar.bz2{,.asc}
         fix-apr.pc.patch
         ship_find_apr.m4.patch
         omit_extra_libs.patch
-        dont_override_external_buildflags)
+        dont_override_external_buildflags
+        apr-1.7-fix-build.patch)
 sha256sums=('fc648de983f3a2a6c9e78dea1f180639bd2fad6c06d556d4367a701fe5c35577'
             'SKIP'
             '572efb102d02bb3e85ff08eca6b2ea8ff7936ce5228da7a45c1e639faca36a5c'
             '12595d331b48be9e44bd843635eb4f0f500bd213e197a551a9d383a28a24641f'
             '315932ef6536fc0644c1efe770ceb3bb675c3c7103a7cbb2f02efd8be03eb752'
             '3d491d3af8fb5a75db4e085a17e5d8dcbe058bd256ef893ee779dc97fc9f8ad6'
-            '5ac0bdc532479f6082d29115ac9d3ca24524fd8b97a556568755b88e5a68e3df')
+            '5ac0bdc532479f6082d29115ac9d3ca24524fd8b97a556568755b88e5a68e3df'
+            'e543e08a3517b5a6143c1b6efeb9fffec091953e689d2d4d98526407be8f7d9c')
 validpgpkeys=('5B5181C2C0AB13E59DA3F7A3EC582EB639FF092C'  # Jeff Trawick
               'B1B96F45DFBDCCF974019235193F180AB55D9977' # Nick Kew <niq@apache.org>
               '65B2D44FE74BD5E3DE3AC3F082781DE46D5954FA' # "Eric Covener <covener@apache.org>"
@@ -36,10 +38,14 @@ prepare() {
   patch -Np1 -i ../fix-apr.pc.patch
   patch -Np1 -i ../omit_extra_libs.patch
   patch -Np1 -i ../dont_override_external_buildflags
-  #./buildconf
+  patch -Np1 -i ../apr-1.7-fix-build.patch
+  ./buildconf
 }
 
 build() {
+#  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+#  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
+#  unset CFLAGS CXXFLAGS
   cd apr-$pkgver
   ./configure --prefix=/usr --includedir=/usr/include/apr-1 \
     --with-installbuilddir=/usr/share/apr-1/build \
diff --git a/apr/apr-1.7-fix-build.patch b/apr/apr-1.7-fix-build.patch
new file mode 100644
index 0000000000..aaad10e7ae
--- /dev/null
+++ b/apr/apr-1.7-fix-build.patch
@@ -0,0 +1,13 @@
+Index: apr-1.7.0/build/apr_common.m4
+===================================================================
+--- apr-1.7.0.orig/build/apr_common.m4
++++ apr-1.7.0/build/apr_common.m4
+@@ -501,7 +501,7 @@ AC_DEFUN([APR_TRY_COMPILE_NO_WARNING],
+ [apr_save_CFLAGS=$CFLAGS
+  CFLAGS="$CFLAGS $CFLAGS_WARN"
+  if test "$ac_cv_prog_gcc" = "yes"; then 
+-   CFLAGS="$CFLAGS -Werror"
++   CFLAGS="$CFLAGS"
+  fi
+  AC_COMPILE_IFELSE(
+   [AC_LANG_SOURCE(
diff --git a/arch-rebuild-order/PKGBUILD b/arch-rebuild-order/PKGBUILD
index 9f1a1b95e0..35ce9a2d50 100644
--- a/arch-rebuild-order/PKGBUILD
+++ b/arch-rebuild-order/PKGBUILD
@@ -8,7 +8,7 @@ url='https://gitlab.archlinux.org/archlinux/arch-rebuild-order'
 arch=('loong64' 'x86_64')
 license=('MIT')
 depends=('glibc' 'libalpm.so')
-makedepends=('cargo' 'mandown' 'git')
+makedepends=('rust' 'mandown' 'git')
 groups=('archlinux-tools')
 source=(git+https://gitlab.archlinux.org/archlinux/arch-rebuild-order.git#tag=v$pkgver?signed)
 sha512sums=('SKIP')
@@ -16,12 +16,12 @@ validpgpkeys=("E499C79F53C96A54E572FEE1C06086337C50773E")
 
 prepare() {
   cd ${pkgname}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd ${pkgname}
-  cargo build --frozen --release --all-features
+  cargo build --release --all-features
 }
 
 check() {
diff --git a/arch-repro-status/PKGBUILD b/arch-repro-status/PKGBUILD
index 6349ef31e6..0aa6cc331d 100644
--- a/arch-repro-status/PKGBUILD
+++ b/arch-repro-status/PKGBUILD
@@ -16,7 +16,7 @@ sha512sums=('16fa85c6bd1990363f7129d7c7b2229fa682e0032cef176f22f5cdc1dd03fd60894
 prepare() {
   cd "$pkgname-v$pkgver"
   mkdir completions/
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/archinstall/PKGBUILD b/archinstall/PKGBUILD
index b7adbeef6b..1f371acf4c 100644
--- a/archinstall/PKGBUILD
+++ b/archinstall/PKGBUILD
@@ -42,15 +42,19 @@ provides=(python-archinstall)
 source=(
   $pkgname-v$pkgver.tar.gz::$url/archive/refs/tags/v$pkgver.tar.gz
   $pkgname-v$pkgver.tar.gz.sig::$url/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz.sig
+  archinstall-la64-2.6.3.patch
 )
 sha512sums=('b88a301ff747f10f5b2e23b8c9217f28b54a5846123580eba06d1960a64e56357ec85414f3c0ce8b641cc2038fd5244608bdbac3ee00bef0a55928a51add0c05'
-            'SKIP')
+            'SKIP'
+            'bba2c25733ab266f2b848f2e569dc6b03a0dd9e844cdb976a16fe0190145c6d7858b6530fd0ac9785579968f43fba0e9037802b62d160b98832076e09f5376df')
 b2sums=('105b297c649e08edb67e36f1675a65c4b9a930cee154e7bdf9cde8d0a5af25ca8045f31fe2cdcccc076dc278dd9f952fbc03fe57f7436118e623c96cddcc8a4a'
-        'SKIP')
+        'SKIP'
+        'ed1717d1f5649383bc15db231636181a135857da3ec4f51d2e559b7af402c4013ba6e0daa0d5af154ca00f3fea15a88c11e97b8c9060a867345cb361b2b8151b')
 validpgpkeys=('8AA2213C8464C82D879C8127D4B58E897A929F2E') # torxed@archlinux.org
 
 prepare() {
   cd $pkgname-$pkgver
+  patch -p1 -i $srcdir/archinstall-la64-2.6.3.patch
 }
 
 build() {
diff --git a/archinstall/archinstall-la64-2.6.3.patch b/archinstall/archinstall-la64-2.6.3.patch
new file mode 100644
index 0000000000..84b5638eb3
--- /dev/null
+++ b/archinstall/archinstall-la64-2.6.3.patch
@@ -0,0 +1,139 @@
+diff --git a/archinstall/lib/global_menu.py b/archinstall/lib/global_menu.py
+index b38dac0b..ca69a322 100644
+--- a/archinstall/lib/global_menu.py
++++ b/archinstall/lib/global_menu.py
+@@ -46,7 +46,9 @@ class GlobalMenu(AbstractMenu):
+ 				_('Archinstall language'),
+ 				lambda x: self._select_archinstall_language(x),
+ 				display_func=lambda x: x.display_name,
+-				default=self.translation_handler.get_language_by_abbr('en'))
++				default=self.translation_handler.get_language_by_abbr('zh-CN'))
++		self.translation_handler.activate(self.translation_handler.get_language_by_abbr('zh-CN'))
++
+ 		self._menu_options['locale_config'] = \
+ 			Selector(
+ 				_('Locales'),
+diff --git a/archinstall/lib/hardware.py b/archinstall/lib/hardware.py
+index 56d3bc7b..737cbe06 100644
+--- a/archinstall/lib/hardware.py
++++ b/archinstall/lib/hardware.py
+@@ -47,6 +47,7 @@ class GfxPackage(Enum):
+ 	VulkanRadeon = 'vulkan-radeon'
+ 	Xf86VideoAmdgpu = "xf86-video-amdgpu"
+ 	Xf86VideoAti = "xf86-video-ati"
++	Xf86VideoLoongson = 'xf86-video-loongson'
+ 	Xf86VideoNouveau = 'xf86-video-nouveau'
+ 	Xf86VideoVmware = 'xf86-video-vmware'
+ 
+@@ -76,6 +77,7 @@ class GfxDriver(Enum):
+ 					GfxPackage.Mesa,
+ 					GfxPackage.Xf86VideoAmdgpu,
+ 					GfxPackage.Xf86VideoAti,
++					GfxPackage.Xf86VideoLoongson,
+ 					GfxPackage.Xf86VideoNouveau,
+ 					GfxPackage.Xf86VideoVmware,
+ 					GfxPackage.LibvaMesaDriver,
+@@ -89,6 +91,7 @@ class GfxDriver(Enum):
+ 					GfxPackage.Mesa,
+ 					GfxPackage.Xf86VideoAmdgpu,
+ 					GfxPackage.Xf86VideoAti,
++					GfxPackage.Xf86VideoLoongson,
+ 					GfxPackage.LibvaMesaDriver,
+ 					GfxPackage.VulkanRadeon
+ 				]
+@@ -272,6 +275,7 @@ class SysInfo:
+ 			'snd_gina20',
+ 			'snd_gina24',
+ 			'snd_hda_codec_ca0132',
++			'snd_hda_loongson',
+ 			'snd_hdsp',
+ 			'snd_indigo',
+ 			'snd_indigodj',
+diff --git a/archinstall/lib/installer.py b/archinstall/lib/installer.py
+index 585389ed..4e066896 100644
+--- a/archinstall/lib/installer.py
++++ b/archinstall/lib/installer.py
+@@ -916,7 +916,7 @@ class Installer:
+ 				boot_dir_arg.append(f'--boot-directory={boot_dir}')
+ 
+ 			add_options = [
+-				'--target=x86_64-efi',
++				'--target=loongarch64-efi',
+ 				f'--efi-directory={efi_partition.mountpoint}',
+ 				*boot_dir_arg,
+ 				'--bootloader-id=GRUB',
+diff --git a/archinstall/lib/locale/locale_menu.py b/archinstall/lib/locale/locale_menu.py
+index 2e254315..fbcbddd8 100644
+--- a/archinstall/lib/locale/locale_menu.py
++++ b/archinstall/lib/locale/locale_menu.py
+@@ -16,7 +16,7 @@ class LocaleConfiguration:
+ 
+ 	@staticmethod
+ 	def default() -> 'LocaleConfiguration':
+-		return LocaleConfiguration('us', 'en_US', 'UTF-8')
++		return LocaleConfiguration('us', 'zh_CN', 'UTF-8')
+ 
+ 	def json(self) -> Dict[str, str]:
+ 		return {
+@@ -68,7 +68,7 @@ class LocaleMenu(AbstractSubMenu):
+ 			Selector(
+ 				_('Locale language'),
+ 				lambda preset: select_locale_lang(preset),
+-				default=self._preset.sys_lang,
++				default='zh_CN',
+ 				enabled=True)
+ 		self._menu_options['sys-encoding'] = \
+ 			Selector(
+diff --git a/archinstall/lib/mirrors.py b/archinstall/lib/mirrors.py
+index 74cdd0aa..70b3794e 100644
+--- a/archinstall/lib/mirrors.py
++++ b/archinstall/lib/mirrors.py
+@@ -323,7 +323,7 @@ def list_mirrors() -> Dict[str, List[str]]:
+ 		with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp:
+ 			mirrorlist = fp.read()
+ 	else:
+-		url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on"
++		url = "https://archapi.zhcn.cc/api/v1/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on"
+ 		try:
+ 			mirrorlist = fetch_data_from_url(url)
+ 		except ValueError as err:
+diff --git a/archinstall/lib/translationhandler.py b/archinstall/lib/translationhandler.py
+index 33230562..5caa191f 100644
+--- a/archinstall/lib/translationhandler.py
++++ b/archinstall/lib/translationhandler.py
+@@ -24,7 +24,10 @@ class Language:
+ 
+ 	@property
+ 	def display_name(self) -> str:
+-		name = self.name_en
++		if self.translated_lang:
++			name = self.translated_lang
++		else:
++			name = self.name_en
+ 		return f'{name} ({self.translation_percent}%)'
+ 
+ 	def is_match(self, lang_or_translated_lang: str) -> bool:
+diff --git a/archinstall/scripts/guided.py b/archinstall/scripts/guided.py
+index d7cf16cd..1d746c21 100644
+--- a/archinstall/scripts/guided.py
++++ b/archinstall/scripts/guided.py
+@@ -181,6 +181,19 @@ def perform_installation(mountpoint: Path):
+ 		if profile_config := archinstall.arguments.get('profile_config', None):
+ 			profile_handler.install_profile_config(installation, profile_config)
+ 
++			# Add Chinese input method and fonts
++			if locale_config.sys_lang in ["zh_CN", "zh_TW"]:
++				installation.add_additional_packages(['wqy-bitmapfont', 'wqy-microhei', 'wqy-microhei-lite', 'wqy-zenhei'])
++				# Install Chinese Input Method
++				if profile_config.profile.is_desktop_type_profile():
++					installation.add_additional_packages(['fcitx5', 'fcitx5-chinese-addons', 'fcitx5-configtool', 'fcitx5-gtk', 'fcitx5-qt'])
++					with open(f"{archinstall.storage['installation_session'].target}/etc/X11/xinit/xinitrc.d/50-input.sh", 'w') as finput:
++						finput.write(f'export XIM=fcitx\n')
++						finput.write(f'export GTK_IM_MODULE=fcitx\n')
++						finput.write(f'export QT_IM_MODULE=fcitx\n')
++						finput.write(f'export XMODIFIERS="@im=fcitx"\n')
++					installation.arch_chroot('chmod +x /etc/X11/xinit/xinitrc.d/50-input.sh')
++
+ 		if timezone := archinstall.arguments.get('timezone', None):
+ 			installation.set_timezone(timezone)
+ 
diff --git a/arrow/PKGBUILD b/arrow/PKGBUILD
index ad226f1b50..bf021063d8 100644
--- a/arrow/PKGBUILD
+++ b/arrow/PKGBUILD
@@ -33,6 +33,11 @@ prepare() {
 build(){
   CC=clang \
   CXX=clang++ \
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   cmake \
     -B build -S apache-${pkgname}-${pkgver}/cpp \
     -DCMAKE_INSTALL_PREFIX="/usr" \
diff --git a/arti/PKGBUILD b/arti/PKGBUILD
index cc59f298ff..aa67e66e36 100644
--- a/arti/PKGBUILD
+++ b/arti/PKGBUILD
@@ -23,7 +23,7 @@ b2sums=('f1bec1c26a147372f19a66022dda7a2f8989d40338399f926f84c8ad17d6cd9d92ad565
 prepare() {
   mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver"
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/aspell-ru/PKGBUILD b/aspell-ru/PKGBUILD
index fa07c6f2dc..c33beac8a2 100644
--- a/aspell-ru/PKGBUILD
+++ b/aspell-ru/PKGBUILD
@@ -7,7 +7,7 @@ _pkgver=0.99f7-1
 pkgver=${_pkgver//-/.}
 pkgrel=1
 pkgdesc="Russian dictionary for aspell"
-arch=(loong64' 'x86_64) # We cannot use 'any' see FS#22443
+arch=('loong64' 'x86_64') # We cannot use 'any' see FS#22443
 url='http://aspell.net'
 license=(custom)
 depends=(aspell)
diff --git a/at51/PKGBUILD b/at51/PKGBUILD
index 2a8c9b070d..e685079485 100644
--- a/at51/PKGBUILD
+++ b/at51/PKGBUILD
@@ -21,7 +21,7 @@ b2sums=('3bb3793c2082fa4ce2973bd4c58ff684ebe6afdcf1507d112a0a9c89e8410bda8493d13
 
 prepare() {
   cd $pkgname-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 build() {
   cd $pkgname-$pkgver
diff --git a/auth-tarball-from-git/PKGBUILD b/auth-tarball-from-git/PKGBUILD
index d790d5e0b5..a1efaa435f 100644
--- a/auth-tarball-from-git/PKGBUILD
+++ b/auth-tarball-from-git/PKGBUILD
@@ -19,7 +19,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/autogen/PKGBUILD b/autogen/PKGBUILD
index 02a2cc2082..96f869bfb9 100644
--- a/autogen/PKGBUILD
+++ b/autogen/PKGBUILD
@@ -32,6 +32,7 @@ prepare() {
 
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
+  unset CFLAGS CXXFLAGS
   ./configure --prefix=/usr
   sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool
   make
diff --git a/autotiling-rs/PKGBUILD b/autotiling-rs/PKGBUILD
index 77f07302be..49f174065a 100644
--- a/autotiling-rs/PKGBUILD
+++ b/autotiling-rs/PKGBUILD
@@ -15,7 +15,7 @@ b2sums=('3bffa4f9beef917c1ac731507e61ac716164829b1ce038e20708ca8d0511dd9a4382716
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/avisynthplus/PKGBUILD b/avisynthplus/PKGBUILD
index 6093ec84b1..e2464a4da6 100644
--- a/avisynthplus/PKGBUILD
+++ b/avisynthplus/PKGBUILD
@@ -12,9 +12,15 @@ optdepends=('devil: for ImageSeq plugin')
 makedepends=('cmake' 'devil')
 provides=('libavisynth.so')
 source=("https://github.com/AviSynth/AviSynthPlus/archive/v${pkgver}/${pkgname}-${pkgver}.tar.gz"
-        'avisynthplus.xml')
+        'avisynthplus.xml'
+        'avisynthplus-la64.patch')
 sha256sums=('b847705af6f16fa26664d06e0fea2bda14a7f6aac8249a9c37e4106ecb8fd44c'
-            'c4b270a3df7fbe1c153400215169c4ae4cae3b7a8710c843393e3a6ed0fd8a3e')
+            'c4b270a3df7fbe1c153400215169c4ae4cae3b7a8710c843393e3a6ed0fd8a3e'
+            '02a013f60b849eda8fe5edc9a8e451e790d2a7b3ebc2dc034730718be62475d7')
+
+prepare() {
+    patch -d "AviSynthPlus-${pkgver}" -p1 -i "$srcdir/avisynthplus-la64.patch"
+}
 
 build() {
     cmake -B build -S "AviSynthPlus-${pkgver}" \
diff --git a/avisynthplus/avisynthplus-la64.patch b/avisynthplus/avisynthplus-la64.patch
new file mode 100644
index 0000000000..234b2697b8
--- /dev/null
+++ b/avisynthplus/avisynthplus-la64.patch
@@ -0,0 +1,19 @@
+commit 9609a9565b6cb754ce7787fbd032f7d7f7a151dd
+Author: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date:   Sun Jun 5 08:44:22 2022 +0800
+
+    add support for LoongArch
+
+diff --git a/avs_core/include/avs/config.h b/avs_core/include/avs/config.h
+index bdabf17f..1d0b4eef 100644
+--- a/avs_core/include/avs/config.h
++++ b/avs_core/include/avs/config.h
+@@ -59,6 +59,8 @@
+ #   define PPC32
+ #elif defined(__riscv)
+ #   define RISCV
++#elif defined(__loongarch__)
++#   define LOONGARCH
+ #elif defined(__sparc_v9__)
+ #   define SPARC
+ #elif defined(__mips__)
diff --git a/b3sum/PKGBUILD b/b3sum/PKGBUILD
index 8507d3116a..8404995ca7 100644
--- a/b3sum/PKGBUILD
+++ b/b3sum/PKGBUILD
@@ -15,7 +15,7 @@ b2sums=('SKIP')
 
 prepare() {
   cd $_name/$pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/babl/PKGBUILD b/babl/PKGBUILD
index 0f897b3859..904f15b96f 100644
--- a/babl/PKGBUILD
+++ b/babl/PKGBUILD
@@ -10,8 +10,10 @@ url='https://gegl.org/babl/'
 license=('LGPL3')
 depends=('glibc' 'lcms2')
 makedepends=('git' 'meson' 'gobject-introspection' 'vala')
-source=("git+https://gitlab.gnome.org/GNOME/babl.git#tag=$_tag")
-sha256sums=('SKIP')
+source=("git+https://gitlab.gnome.org/GNOME/babl.git#tag=$_tag"
+        babl-fix-gir-pkgname.patch)
+sha256sums=('SKIP'
+            'bceba2643d5baef3d5add6f715d8bf982cbaf25701e589d489b1a8d819879e6a')
 
 pkgver() {
   cd "${pkgname}"
@@ -21,6 +23,7 @@ pkgver() {
 prepare() {
   # https://gitlab.gnome.org/GNOME/babl/-/merge_requests/45
   sed -i s/Description/description/ "${pkgname}"/meson.build
+  patch -d babl -p1 -i $srcdir/babl-fix-gir-pkgname.patch
 }
 
 build() {
diff --git a/babl/babl-fix-gir-pkgname.patch b/babl/babl-fix-gir-pkgname.patch
new file mode 100644
index 0000000000..ea0bb8d0ba
--- /dev/null
+++ b/babl/babl-fix-gir-pkgname.patch
@@ -0,0 +1,13 @@
+Index: babl/babl/meson.build
+===================================================================
+--- babl.orig/babl/meson.build
++++ babl/babl/meson.build
+@@ -156,7 +156,7 @@ if build_gir
+     namespace: 'Babl',
+     nsversion: api_version,
+     header: 'babl.h',
+-    export_packages: 'babl-0.1',
++    export_packages: 'babl',
+     install: true,
+   )
+ 
diff --git a/bacon/PKGBUILD b/bacon/PKGBUILD
index 111624c894..0f9267a520 100644
--- a/bacon/PKGBUILD
+++ b/bacon/PKGBUILD
@@ -24,7 +24,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/baidupcs-go/PKGBUILD b/baidupcs-go/PKGBUILD
index 7a6803c9bf..07b6881513 100644
--- a/baidupcs-go/PKGBUILD
+++ b/baidupcs-go/PKGBUILD
@@ -21,6 +21,8 @@ build() {
   export CGO_LDFLAGS="${LDFLAGS}"
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CPPFLAGS="${CPPFLAGS}"
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod tidy
   go build -o baidupcs-go
 }
 
diff --git a/bandwhich/PKGBUILD b/bandwhich/PKGBUILD
index a2348391d9..76bc317249 100644
--- a/bandwhich/PKGBUILD
+++ b/bandwhich/PKGBUILD
@@ -19,7 +19,7 @@ b2sums=('faa9bc5620e9e2a7d5ddd8c715934b2eefc6f4f069348fc14d983ac9c7b22e43b1d167c
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/bcprov/PKGBUILD b/bcprov/PKGBUILD
index eaa60f4e96..5705f83b95 100644
--- a/bcprov/PKGBUILD
+++ b/bcprov/PKGBUILD
@@ -8,7 +8,7 @@ arch=('any')
 url='https://www.bouncycastle.org/java.html'
 license=('MIT')
 depends=('java-runtime-headless')
-makedepends=('ant' 'strip-nondeterminism')
+makedepends=('git' 'ant' 'strip-nondeterminism')
 source=("$pkgname-$pkgver.tar.gz::https://github.com/bcgit/bc-java/archive/refs/tags/r${pkgver/./rv}.tar.gz")
 sha512sums=('7d2abab42a7e29159ae063244a4296708d1269e5a8250f0e2f62b095916d509e6e4213f4b32d45f375e1aabea572860d9b81df9ee5efcdff9b9e569864c9f8db')
 
diff --git a/bees/PKGBUILD b/bees/PKGBUILD
index 95b0763398..36588d71d8 100644
--- a/bees/PKGBUILD
+++ b/bees/PKGBUILD
@@ -11,8 +11,15 @@ url="https://github.com/Zygo/bees"
 license=('GPL3')
 depends=('util-linux-libs' 'bash')
 makedepends=('btrfs-progs' 'systemd')
-source=("${pkgname}-${pkgver}.tar.gz"::"https://github.com/Zygo/bees/archive/v${pkgver}.tar.gz")
-sha256sums=('d100efbc6084f494400892ef53fa476fd6f201dba3b2fddee11ef90dd9d6111d')
+source=("${pkgname}-${pkgver}.tar.gz"::"https://github.com/Zygo/bees/archive/v${pkgver}.tar.gz"
+        "bees-fix-build.patch::https://github.com/Zygo/bees/commit/d6732c58e29b6f969e8b53c16541d1572a31c485.patch")
+sha256sums=('d100efbc6084f494400892ef53fa476fd6f201dba3b2fddee11ef90dd9d6111d'
+            'd08111d97ee1b8c1d3b7abcdc25872de965e472f318383e9121917667748d3c7')
+
+prepare() {
+  cd "${srcdir}/${pkgname}-${pkgver}"
+  patch -p1 -i $srcdir/bees-fix-build.patch
+}
 
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
diff --git a/bigloo/PKGBUILD b/bigloo/PKGBUILD
index 3830533ecd..fc32cbe94b 100644
--- a/bigloo/PKGBUILD
+++ b/bigloo/PKGBUILD
@@ -14,13 +14,17 @@ depends=('gmp' 'openssl' 'libunistring' 'libnsl' 'gc' 'libuv')
 makedepends=('emacs' 'zip' 'sqlite' 'alsa-lib' 'flac' 'avahi' 'libpulse')
 optdepends=('emacs' 'zip' 'sqlite' 'alsa-lib' 'flac' 'avahi')
 options=('!makeflags' '!lto')
-source=("ftp://ftp-sop.inria.fr/indes/fp/Bigloo/${pkgname}-${_src_ver}.tar.gz")
-sha256sums=('d8f04e889936187dc519719b749ad03fe574165a0b6d318e561f1b3bce0d5808')
+source=("ftp://ftp-sop.inria.fr/indes/fp/Bigloo/${pkgname}-${_src_ver}.tar.gz"
+bigloo-la64.patch)
+sha256sums=('d8f04e889936187dc519719b749ad03fe574165a0b6d318e561f1b3bce0d5808'
+            '7ccb954d6116379c38f0405b3ed4160ebed68a1134225365170c865d2be8920f')
 
 elisp_dir=/usr/share/emacs/site-lisp/bigloo
 
 prepare() {
   sed -i 's/$(GCLIB)/c/' "${srcdir}/${pkgname}-${_src_ver}/configure"
+  cd "${srcdir}/${pkgname}-${_src_ver}"
+  patch -p1 -i $srcdir/bigloo-la64.patch
 }
 
 build() {
diff --git a/bigloo/bigloo-la64.patch b/bigloo/bigloo-la64.patch
new file mode 100644
index 0000000000..50b3f9b86a
--- /dev/null
+++ b/bigloo/bigloo-la64.patch
@@ -0,0 +1,11 @@
+Index: bigloo-4.5a-1/libbacktrace/install-libbacktrace
+===================================================================
+--- bigloo-4.5a-1.orig/libbacktrace/install-libbacktrace
++++ bigloo-4.5a-1/libbacktrace/install-libbacktrace
+@@ -11,4 +11,5 @@
+ #*=====================================================================*/
+ 
+ tar xfz $LIBBACKTRACESRC || (echo "tar xfz $LIBBACKTRACESRC failed"; exit 1)
+-
++for c_s in $(find -type f -name config.sub -o -name configure.sub); do cp -f /usr/share/automake-1.16/config.sub "$c_s"; done
++for c_g in $(find -type f -name config.guess -o -name configure.guess); do cp -f /usr/share/automake-1.16/config.guess "$c_g"; done
diff --git a/bingrep/PKGBUILD b/bingrep/PKGBUILD
index 3bd3343e2c..7a523edf78 100644
--- a/bingrep/PKGBUILD
+++ b/bingrep/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('9a50aecffdd613f3241d12802ad49dc5d98219c8a99455418dc741eebc0a7c2a261
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/binocle/PKGBUILD b/binocle/PKGBUILD
index fc9dd6bdc2..257fb62c90 100644
--- a/binocle/PKGBUILD
+++ b/binocle/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('d4d2e225723e72d991eac9dc91c0056c902eeabbe046161447c4a8a4e3200515b5d
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/blender/PKGBUILD b/blender/PKGBUILD
index 04e39c99ae..41c4cff075 100644
--- a/blender/PKGBUILD
+++ b/blender/PKGBUILD
@@ -18,13 +18,12 @@ url="https://www.blender.org"
 depends=('libpng' 'libtiff' 'openexr' 'python' 'desktop-file-utils' 'python-requests' 'potrace'
          'shared-mime-info' 'hicolor-icon-theme' 'xdg-utils' 'glew' 'openjpeg2' 'python-numpy'
          'freetype2' 'openal' 'ffmpeg' 'fftw' 'boost-libs' 'opencollada' 'alembic' 'openxr'
-         'openimageio' 'libsndfile' 'jack' 'opencolorio' 'openimagedenoise' 'materialx'
-         'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'embree' 'libharu'
-         'draco' 'openpgl' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'usd' 'openshadinglanguage'
+         'openimageio' 'libsndfile' 'jack' 'opencolorio'
+         'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'libharu'
+         'draco' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'openshadinglanguage'
          'intel-oneapi-compiler-shared-runtime-libs' 'intel-oneapi-compiler-dpcpp-cpp-runtime-libs')
 makedepends=('cmake' 'boost' 'mesa' 'git' 'subversion' 'llvm' 'cuda' 'ninja' 'wayland-protocols' 'libxkbcommon'
-             'libdecor' 'hip-runtime-amd' 'level-zero-headers' 'intel-oneapi-dpcpp-cpp'
-             'intel-oneapi-compiler-shared-runtime' 'intel-compute-runtime')
+             'libdecor' 'level-zero-headers')
 optdepends=('cuda: Cycles renderer CUDA support'
             'intel-compute-runtime: Cycles renderer Intel OneAPI support'
             'libdecor: wayland support')
@@ -117,6 +116,8 @@ build() {
     -DWITH_PYTHON_INSTALL=OFF \
     -DOCLOC_INSTALL_DIR=/usr \
     -DUSD_ROOT_DIR=/usr \
+    -DWITH_MATERIALX=OFF \
+    -DWITH_CYCLES=OFF \
     -DSYCL_OFFLINE_COMPILER_PARALLEL_JOBS=8
   cmake --build build
     # For debug:
@@ -142,5 +143,5 @@ package() {
   rm -r "${pkgdir}"/usr/share/blender/4*/python
 
   # Move OneAPI AOT lib to proper place
-  mv "${pkgdir}"/usr/share/blender/lib/libcycles_kernel_oneapi_aot.so "${pkgdir}"/usr/lib/
+#  mv "${pkgdir}"/usr/share/blender/lib/libcycles_kernel_oneapi_aot.so "${pkgdir}"/usr/lib/
 }
diff --git a/bonnie++/PKGBUILD b/bonnie++/PKGBUILD
index 2cc4531276..7ff31db6f5 100644
--- a/bonnie++/PKGBUILD
+++ b/bonnie++/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=bonnie++
 pkgver=2.00a
-pkgrel=2
+pkgrel=3
 pkgdesc="Based on the Bonnie hard drive benchmark by Tim Bray"
 arch=('loong64' 'x86_64')
 url="https://www.coker.com.au/bonnie++/"
diff --git a/boost/PKGBUILD b/boost/PKGBUILD
index 5aae060550..8cc5cad354 100644
--- a/boost/PKGBUILD
+++ b/boost/PKGBUILD
@@ -21,11 +21,13 @@ makedepends=('icu' 'python' 'python-numpy' 'bzip2' 'zlib' 'openmpi' 'zstd')
 source=(https://boostorg.jfrog.io/artifactory/main/release/$pkgver/source/$_srcname.tar.bz2
         boost-1.81.0-phoenix-multiple-definitions.patch
         $pkgname-support-fn.contains-f-where-f-is-a-function.patch::https://github.com/boostorg/function/commit/7ca2310b15e3.patch
-        $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch)
+        $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch
+        boost-1.79.0-la64.patch)
 sha256sums=('6478edfe2f3305127cffe8caf73ea0176c53769f4bf1585be237eb30798c3b8e'
             '3ebf428ef6be090a7b56a233330375539ac429333b83708e28fe5db049cfecdb'
             '1b5998ee8fb389dd6df55a3684d29ffa37246bc007e8e6712bf2be6c7f745036'
-            'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee')
+            'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee'
+            '0fb9188bf211deff0d48dfb7cef614bbdebcd7dccea6e8c015da5d691eda5d94')
 
 prepare() {
   cd $_srcname
@@ -40,6 +42,7 @@ prepare() {
 
   # https://github.com/boostorg/ublas/pull/97
   patch -Np2 -i ../$pkgname-ublas-c++20-iterator.patch
+  patch -Np1 -i $srcdir/boost-1.79.0-la64.patch
 }
 
 build() {
diff --git a/boost/boost-1.79.0-la64.patch b/boost/boost-1.79.0-la64.patch
new file mode 100644
index 0000000000..2b9602a900
--- /dev/null
+++ b/boost/boost-1.79.0-la64.patch
@@ -0,0 +1,55 @@
+diff --git a/boostcpp.jam b/boostcpp.jam
+index 082536e2a5..7565dae80d 100644
+--- a/boostcpp.jam
++++ b/boostcpp.jam
+@@ -634,7 +634,7 @@ rule address-model ( )
+     return <conditional>@boostcpp.deduce-address-model ;
+ }
+ 
+-local deducable-architectures = arm mips1 power riscv s390x sparc x86 combined ;
++local deducable-architectures = arm loongarch mips1 power riscv s390x sparc x86 combined ;
+ feature.feature deduced-architecture : $(deducable-architectures) : propagated optional composite hidden ;
+ for a in $(deducable-architectures)
+ {
+@@ -645,9 +645,10 @@ rule deduce-architecture ( properties * )
+ {
+     local result ;
+     local filtered = [ toolset-properties $(properties) ] ;
+-    local names = arm mips1 power riscv s390x sparc x86 combined ;
++    local names = arm loongarch mips1 power riscv s390x sparc x86 combined ;
+     local idx = [ configure.find-builds "default architecture" : $(filtered)
+         : /boost/architecture//arm
++        : /boost/architecture//loongarch
+         : /boost/architecture//mips1
+         : /boost/architecture//power
+         : /boost/architecture//riscv
+Submodule libs/config 08dced51e9..5c177b2269:
+diff --git a/libs/config/checks/architecture/Jamfile.jam b/libs/config/checks/architecture/Jamfile.jam
+index 2ba54f9a..e8838b41 100644
+--- a/libs/config/checks/architecture/Jamfile.jam
++++ b/libs/config/checks/architecture/Jamfile.jam
+@@ -18,6 +18,7 @@ obj 64 : 64.cpp ;
+ 
+ obj arm      : arm.cpp ;
+ obj combined : combined.cpp ;
++obj loongarch : loongarch.cpp ;
+ obj mips     : mips.cpp ;
+ alias mips1  : mips ; # Backwards compatibility
+ obj power    : power.cpp ;
+diff --git a/libs/config/checks/architecture/loongarch.cpp b/libs/config/checks/architecture/loongarch.cpp
+new file mode 100644
+index 00000000..5be8cb09
+--- /dev/null
++++ b/libs/config/checks/architecture/loongarch.cpp
+@@ -0,0 +1,11 @@
++// loongarch.cpp
++//
++// Copyright (c) 2012 Steven Watanabe
++//
++// Distributed under the Boost Software License Version 1.0. (See
++// accompanying file LICENSE_1_0.txt or copy at
++// http://www.boost.org/LICENSE_1_0.txt)
++
++#if !defined(__loongarch__)
++#error "Not LoongArch"
++#endif
diff --git a/booster/PKGBUILD b/booster/PKGBUILD
index 610a91a7b1..85e627597e 100644
--- a/booster/PKGBUILD
+++ b/booster/PKGBUILD
@@ -23,10 +23,14 @@ sha512sums=('66443568c504d563d5a774dd25d47d72ec745cad2b77fea3cbf881b51ed1ecfa093
 build() {
   cd booster-$pkgver
 
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod edit -replace=github.com/u-root/uio=github.com/loongarch64/uio@dev-main
+  go mod tidy
   cd generator
   CGO_CPPFLAGS="${CPPFLAGS}" CGO_CFLAGS="${CFLAGS}" CGO_CXXFLAGS="${CXXFLAGS}" CGO_LDFLAGS="${LDFLAGS}" \
     go build -trimpath \
-      -buildmode=pie \
       -mod=readonly \
       -modcacherw \
       -ldflags "-linkmode external -extldflags \"${LDFLAGS}\""
diff --git a/bore/PKGBUILD b/bore/PKGBUILD
index c661321155..019564a944 100644
--- a/bore/PKGBUILD
+++ b/bore/PKGBUILD
@@ -14,7 +14,7 @@ sha512sums=('55d783a46e25393fc003d9c90760c141692af88fe88fecfc27c632bdc6d33523096
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/borg/PKGBUILD b/borg/PKGBUILD
index b7754cd501..e318eec732 100644
--- a/borg/PKGBUILD
+++ b/borg/PKGBUILD
@@ -30,7 +30,7 @@ build() {
 }
 
 check() {
-  cd "$_pkgname-$pkgver/build/lib.linux-$CARCH-cpython-"*/
+  cd "$_pkgname-$pkgver/build/lib.linux-`uname -m`-cpython-"*/
   PYTHONPATH=$PWD PYTHONDONTWRITEBYTECODE=1 pytest -k 'not benchmark'
 }
 
diff --git a/bottom/PKGBUILD b/bottom/PKGBUILD
index 12732a5662..4fc5eed06c 100644
--- a/bottom/PKGBUILD
+++ b/bottom/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('201484c33cb9978776fe089a04b0b231cfaf719c9210f678ba3909f50cd2a078295e3fc
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/box2d/PKGBUILD b/box2d/PKGBUILD
index 31a0e8510f..6ebe55f6a8 100644
--- a/box2d/PKGBUILD
+++ b/box2d/PKGBUILD
@@ -11,8 +11,15 @@ depends=('gcc-libs')
 makedepends=('cmake' 'doctest' 'doxygen' 'ninja')
 # We're going to this alternate fork until the patches are upstreamed.
 # See https://github.com/erincatto/box2d/issues/621
-source=("$pkgname-$pkgver.tar.gz::https://github.com/erincatto/Box2D/archive/v${pkgver}.tar.gz")
-sha512sums=('d900f925b77906777719c91488bdc5e2df1ad1f4a8ca39a574229f5e57070e3a843bdd7530e817112605fde6d82145c872d8afdfc65b84531a73199098c81162')
+source=("$pkgname-$pkgver.tar.gz::https://github.com/erincatto/Box2D/archive/v${pkgver}.tar.gz"
+  box2d-fix-build.patch)
+sha512sums=('d900f925b77906777719c91488bdc5e2df1ad1f4a8ca39a574229f5e57070e3a843bdd7530e817112605fde6d82145c872d8afdfc65b84531a73199098c81162'
+            '74055d49f0b9f601c2e68576aa3e0ef43c061beb428cda3de847d5b9fb8bf6adb74b69521264f9aea048d268e5104be6bafdcfb0cfb09aec1de1662d263235e9')
+
+prepare() {
+  cd $pkgname-$pkgver
+  patch -p1 -i $srcdir/box2d-fix-build.patch
+}
 
 prepare() {
   # Use system doctest
diff --git a/box2d/box2d-fix-build.patch b/box2d/box2d-fix-build.patch
new file mode 100644
index 0000000000..0f89e2c772
--- /dev/null
+++ b/box2d/box2d-fix-build.patch
@@ -0,0 +1,13 @@
+Index: box2d-2.4.1/unit-test/doctest.h
+===================================================================
+--- box2d-2.4.1.orig/unit-test/doctest.h
++++ box2d-2.4.1/unit-test/doctest.h
+@@ -4018,7 +4018,7 @@ namespace {
+         static bool             isSet;
+         static struct sigaction oldSigActions[DOCTEST_COUNTOF(signalDefs)];
+         static stack_t          oldSigStack;
+-        static char             altStackMem[4 * SIGSTKSZ];
++        static char             altStackMem[65536];
+ 
+         static void handleSignal(int sig) {
+             const char* name = "<unknown signal>";
diff --git a/boxxy/PKGBUILD b/boxxy/PKGBUILD
index 8577810451..97e8be02d7 100644
--- a/boxxy/PKGBUILD
+++ b/boxxy/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/breezy/PKGBUILD b/breezy/PKGBUILD
index a893cbe2f7..6ac69bacbb 100644
--- a/breezy/PKGBUILD
+++ b/breezy/PKGBUILD
@@ -40,14 +40,21 @@ provides=(bzr)
 conflicts=(bzr)
 replaces=(bzr)
 _tag=d206a54af4040025561cc9159fa5559c14a7ef46
-source=(git+https://github.com/breezy-team/breezy.git#tag=${_tag})
-sha256sums=(SKIP)
+source=(git+https://github.com/breezy-team/breezy.git#tag=${_tag}
+breezy-fix-install.patch)
+sha256sums=('SKIP'
+            '1e778eae61605bd8a99d57ce97da4ac9ca74fe1b86c371fd55805e96aeb347e8')
 
 pkgver() {
   cd breezy
   git describe --tags | sed 's/brz-//; s/-/./g'
 }
 
+prepare() {
+  cd breezy
+	patch -p1 -i "$srcdir/breezy-fix-install.patch"
+}
+
 build() {
   cd breezy
   python -m build --wheel --no-isolation
diff --git a/breezy/breezy-fix-install.patch b/breezy/breezy-fix-install.patch
new file mode 100644
index 0000000000..94918fc9eb
--- /dev/null
+++ b/breezy/breezy-fix-install.patch
@@ -0,0 +1,15 @@
+--- aaa/setup.py	2023-11-03 11:00:28.748584492 +0800
++++ /tmp/setup.py	2023-11-03 10:56:36.364171431 +0800
+@@ -193,12 +193,6 @@
+ 
+ # ad-hoc for easy_install
+ DATA_FILES = []
+-if ('bdist_egg' not in sys.argv and 'bdist_wheel' not in sys.argv
+-        and 'editable_wheel' not in sys.argv):
+-    # generate and install brz.1 only with plain install, not the
+-    # easy_install one
+-    build.sub_commands.append(('build_man', lambda _: True))
+-    DATA_FILES = [('man/man1', ['brz.1', 'breezy/git/git-remote-bzr.1'])]
+ 
+ import site
+ 
diff --git a/brltty/PKGBUILD b/brltty/PKGBUILD
index 8fe68730ab..16960e56fc 100644
--- a/brltty/PKGBUILD
+++ b/brltty/PKGBUILD
@@ -117,7 +117,6 @@ package_brltty() {
     'libx11: for xbrlapi'
     'libxfixes: for xbrlapi'
     'libxtst: for xbrlapi'
-    'ocaml: OCaml support'
     'python: Python support'
     'speech-dispatcher: speech-dispatcher driver'
     'tcl: tcl support'
diff --git a/broadcom-wl/PKGBUILD b/broadcom-wl/PKGBUILD
index 3058b83539..84bfb3269c 100644
--- a/broadcom-wl/PKGBUILD
+++ b/broadcom-wl/PKGBUILD
@@ -25,7 +25,7 @@ package() {
     _extramodules="/usr/lib/modules/${_kernver}/extramodules"
 
     install -Dm644 -t "${pkgdir}${_extramodules}" \
-        ${_module}/${pkgver}/${_kernver}/${CARCH}/module/*
+        ${_module}/${pkgver}/${_kernver}/`uname -m`/module/*
 
     # compress kernel modules
     find "$pkgdir" -name "*.ko" -exec xz {} +
diff --git a/broot/PKGBUILD b/broot/PKGBUILD
index b56c320fae..4d9a46ea6c 100644
--- a/broot/PKGBUILD
+++ b/broot/PKGBUILD
@@ -18,7 +18,7 @@ sha256sums=('0b9bf4a0dfa8a9cdcefcf18222dba4025379a8fa19190075835a99a507ae3d73')
 
 prepare() {
   cd $pkgname-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/buildkit/0001-add-loongarch64-support.patch b/buildkit/0001-add-loongarch64-support.patch
new file mode 100644
index 0000000000..2833a2b6bd
--- /dev/null
+++ b/buildkit/0001-add-loongarch64-support.patch
@@ -0,0 +1,130 @@
+From e94d2e706531af3efd0235f1d8c7c6fdf31ab5eb Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Mon, 13 Nov 2023 10:14:01 +0800
+Subject: [PATCH] add loongarch64 support
+
+---
+ util/archutil/Dockerfile                  |  6 ++++--
+ util/archutil/detect.go                   | 10 ++++++++++
+ util/archutil/fixtures/exit.loongarch64.s |  6 ++++++
+ util/archutil/loong64_binary.go           |  9 +++++++++
+ util/archutil/loong64_check.go            |  8 ++++++++
+ util/archutil/loong64_check_loong64.go    |  8 ++++++++
+ 6 files changed, 45 insertions(+), 2 deletions(-)
+ create mode 100644 util/archutil/fixtures/exit.loongarch64.s
+ create mode 100644 util/archutil/loong64_binary.go
+ create mode 100644 util/archutil/loong64_check.go
+ create mode 100644 util/archutil/loong64_check_loong64.go
+
+diff --git a/util/archutil/Dockerfile b/util/archutil/Dockerfile
+index 2b24b230b..df161291b 100644
+--- a/util/archutil/Dockerfile
++++ b/util/archutil/Dockerfile
+@@ -8,7 +8,8 @@ RUN apt-get update && apt-get --no-install-recommends install -y \
+   binutils-s390x-linux-gnu \
+   binutils-powerpc64le-linux-gnu \
+   binutils-mips64el-linux-gnuabi64 \
+-  binutils-mips64-linux-gnuabi64
++  binutils-mips64-linux-gnuabi64 \
++  binutils-loongarch64-linux-gnu
+ WORKDIR /src
+ 
+ 
+@@ -64,9 +65,10 @@ COPY --from=exit-ppc64 /src/exit ppc64
+ COPY --from=exit-ppc64le /src/exit ppc64le
+ COPY --from=exit-mips64le /src/exit mips64le
+ COPY --from=exit-mips64 /src/exit mips64
++COPY --from=exit-loong64 /src/exit loong64
+ COPY generate.go .
+ 
+-RUN go run generate.go amd64 386 arm64 arm riscv64 s390x ppc64 ppc64le mips64le mips64 && ls -l
++RUN go run generate.go amd64 386 arm64 arm riscv64 s390x ppc64 ppc64le mips64le mips64 loong64 && ls -l
+ 
+ 
+ FROM scratch
+diff --git a/util/archutil/detect.go b/util/archutil/detect.go
+index 782644127..b36726c92 100644
+--- a/util/archutil/detect.go
++++ b/util/archutil/detect.go
+@@ -78,6 +78,11 @@ func SupportedPlatforms(noCache bool) []ocispecs.Platform {
+ 			arr = append(arr, linux(p))
+ 		}
+ 	}
++	if p := "loong64"; def.Architecture != p {
++                if _, err := loong64Supported(); err == nil {
++                        arr = append(arr, linux(p))
++                }
++        }
+ 	if p := "arm"; def.Architecture != p {
+ 		if _, err := armSupported(); err == nil {
+ 			p := linux("arm")
+@@ -144,6 +149,11 @@ func WarnIfUnsupported(pfs []ocispecs.Platform) {
+ 					printPlatformWarning(p, err)
+ 				}
+ 			}
++			if p.Architecture == "loong64" {
++                                if _, err := loong64Supported(); err != nil {
++                                        printPlatformWarning(p, err)
++                                }
++                        }
+ 			if p.Architecture == "arm" {
+ 				if _, err := armSupported(); err != nil {
+ 					printPlatformWarning(p, err)
+diff --git a/util/archutil/fixtures/exit.loongarch64.s b/util/archutil/fixtures/exit.loongarch64.s
+new file mode 100644
+index 000000000..478cd622c
+--- /dev/null
++++ b/util/archutil/fixtures/exit.loongarch64.s
+@@ -0,0 +1,6 @@
++	.global _start
++	.text
++_start:
++	li.w $a0,0
++	li.w $a7,93
++	syscall 0
+diff --git a/util/archutil/loong64_binary.go b/util/archutil/loong64_binary.go
+new file mode 100644
+index 000000000..fa85a4553
+--- /dev/null
++++ b/util/archutil/loong64_binary.go
+@@ -0,0 +1,9 @@
++//go:build !loong64
++// +build !loong64
++
++package archutil
++
++// This file is generated by running make inside the archutil package.
++// Do not edit manually.
++
++const Binaryloong64 = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xaa\x77\xf5\x71\x63\x62\x64\x64\x80\x01\x26\x06\x08\x6f\x03\x03\x83\x02\x88\x76\x80\x8a\x5f\x80\xd2\xcc\x60\x31\x0b\x06\x26\x06\x07\x06\x66\x06\x26\x06\x90\x1a\x56\x06\x14\xa0\xc0\x88\x44\xef\x81\x0a\xc2\x68\x98\x81\x81\x4f\x4b\x52\xd8\x18\x88\x07\x02\x50\x9a\x85\x41\x94\x81\xbb\xa4\x91\x99\x81\x41\x9b\x81\x41\xaf\x38\xa3\xb8\xa4\xa8\x24\x31\x89\x41\xaf\x24\xb5\xa2\x84\x81\x0a\x80\x9b\x81\x01\xec\x27\x98\xdb\x60\xe1\xb0\x01\xca\xe7\x41\x53\xcf\x88\x85\xcf\x8c\xc5\x5c\x98\xff\x05\x09\xe8\x07\x04\x00\x00\xff\xff\x31\xd2\xf1\xb5\x90\x01\x00\x00"
+diff --git a/util/archutil/loong64_check.go b/util/archutil/loong64_check.go
+new file mode 100644
+index 000000000..9bc966ce2
+--- /dev/null
++++ b/util/archutil/loong64_check.go
+@@ -0,0 +1,8 @@
++//go:build !loong64
++// +build !loong64
++
++package archutil
++
++func loong64Supported() (string, error) {
++	return check("loong64", Binaryloong64)
++}
+diff --git a/util/archutil/loong64_check_loong64.go b/util/archutil/loong64_check_loong64.go
+new file mode 100644
+index 000000000..b801c5938
+--- /dev/null
++++ b/util/archutil/loong64_check_loong64.go
+@@ -0,0 +1,8 @@
++//go:build loong64
++// +build loong64
++
++package archutil
++
++func loong64Supported() (string, error) {
++	return "", nil
++}
+-- 
+2.42.0
+
diff --git a/buildkit/PKGBUILD b/buildkit/PKGBUILD
index 5f874d19c7..3a0143ad43 100644
--- a/buildkit/PKGBUILD
+++ b/buildkit/PKGBUILD
@@ -12,8 +12,10 @@ depends=('runc' 'containerd')
 makedepends=('git' 'go')
 options=('!lto')
 _commit='567a99433ca23402d5e9b9f9124005d2e59b8861'
-source=("$pkgname::git+$url.git#commit=$_commit")
-b2sums=('SKIP')
+source=("$pkgname::git+$url.git#commit=$_commit"
+0001-add-loongarch64-support.patch)
+b2sums=('SKIP'
+        '633f1e8e9e66c38f3a21a6c3af14721efd6a2cc6c0201e8492d333a86461a5783eec2c144edfe1ef17ee8385fa29588f30e88d26cf49ce8b477480be3e5459be')
 
 pkgver() {
   cd "$pkgname"
@@ -28,6 +30,7 @@ prepare() {
   # fix paths in systemd unit files
   sed -i 's:/usr/local:/usr:' \
     examples/systemd/{system,user}/*.service
+  patch -p1 -i $srcdir/0001-add-loongarch64-support.patch
 
   # create directory for build output
   mkdir build
@@ -46,6 +49,10 @@ build() {
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export GOPATH="${srcdir}"
 
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.13.0
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef
+  go mod tidy
+
   local package='github.com/moby/buildkit'
 
   go build -v \
diff --git a/bupstash/PKGBUILD b/bupstash/PKGBUILD
index 9385d3e396..3f1f5b4201 100644
--- a/bupstash/PKGBUILD
+++ b/bupstash/PKGBUILD
@@ -25,7 +25,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 build() {
   cd "$pkgname"
diff --git a/caddy/PKGBUILD b/caddy/PKGBUILD
index f8ad1ddd82..fe15df98b9 100644
--- a/caddy/PKGBUILD
+++ b/caddy/PKGBUILD
@@ -64,6 +64,9 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw"
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@master
+  go mod tidy
+
   go build .
 
   for i in zsh bash fish; do
diff --git a/calf/PKGBUILD b/calf/PKGBUILD
index c9a83c4070..0334824c4f 100644
--- a/calf/PKGBUILD
+++ b/calf/PKGBUILD
@@ -38,7 +38,7 @@ prepare(){
 build() {
   local configure_options=(
     --enable-experimental
-    --enable-sse
+    --disable-sse
     --prefix=/usr
     --with-lv2
   )
diff --git a/capnet-assist/PKGBUILD b/capnet-assist/PKGBUILD
index b70ae7028b..00cb2c6344 100644
--- a/capnet-assist/PKGBUILD
+++ b/capnet-assist/PKGBUILD
@@ -13,6 +13,8 @@ depends=(
   glib2
   gtk3
   libgranite.so
+
+
   libhandy-1.so
   libsoup
   networkmanager
@@ -33,6 +35,12 @@ pkgver() {
   git describe --tags
 }
 
+prepare() {
+  cd capnet-assist
+  sed -i '7d' data/meson.build
+  sed -i '16d' data/meson.build
+}
+
 build() {
   arch-meson capnet-assist build \
     -D b_pie=false
diff --git a/cargo-audit/PKGBUILD b/cargo-audit/PKGBUILD
index ce9e916bee..8b8192409b 100644
--- a/cargo-audit/PKGBUILD
+++ b/cargo-audit/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('a3fd2dd5d2382fb5dc4733af86a3a9535154e4f8f846f8c8f013f270bd4ac3932f8070e
 
 prepare() {
   cd rustsec-${pkgname}-v${pkgver}/${pkgname}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-auditable/PKGBUILD b/cargo-auditable/PKGBUILD
index 6f9508be55..7db46c6956 100644
--- a/cargo-auditable/PKGBUILD
+++ b/cargo-auditable/PKGBUILD
@@ -17,7 +17,7 @@ sha512sums=('191b6ef15436bd3c6a9b4666e80de5a085afe00f8ee3793040fc5e5f78eecc25d45
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
   patch -Np1 -i "../$pkgname-$pkgver-cargo-lock.patch"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-bloat/PKGBUILD b/cargo-bloat/PKGBUILD
index 8f750ea676..7630c8e653 100644
--- a/cargo-bloat/PKGBUILD
+++ b/cargo-bloat/PKGBUILD
@@ -14,7 +14,7 @@ sha256sums=('4f338c1a7f7ee6bcac150f7856ed1f32cf8d9009cfd513ca6c1aac1e6685c35f')
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-c/PKGBUILD b/cargo-c/PKGBUILD
index 60223ec3ad..ac384b4564 100644
--- a/cargo-c/PKGBUILD
+++ b/cargo-c/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('a52bb78cf6db00aa1caf06c679cfece27357c84367d8ac167d715e05e5f5a778'
 
 prepare() {
     ln -sf "../${pkgname}-${pkgver}.Cargo.lock" "${pkgname}-${pkgver}/Cargo.lock"
-    cargo fetch --locked --target "${CARCH}-unknown-linux-gnu" --manifest-path="${pkgname}-${pkgver}/Cargo.toml"
+    cargo fetch --locked --manifest-path="${pkgname}-${pkgver}/Cargo.toml"
 }
 
 build() {
diff --git a/cargo-cyclonedx/PKGBUILD b/cargo-cyclonedx/PKGBUILD
index de6036a83e..c51f0130d9 100644
--- a/cargo-cyclonedx/PKGBUILD
+++ b/cargo-cyclonedx/PKGBUILD
@@ -21,7 +21,7 @@ b2sums=('c1907710867b3c1342cc5c9661a095c5c4f62c52a8284eccba8ff71398d933667924df6
 prepare() {
   cd $_upstream_name-$pkgname-$pkgver
   export RUSTUP_TOOLCHAIN=stable
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-depgraph/PKGBUILD b/cargo-depgraph/PKGBUILD
index 089a877bb1..a7539be79e 100644
--- a/cargo-depgraph/PKGBUILD
+++ b/cargo-depgraph/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('d447316253217e0157af027c50bca10e84eba9f27b4f7c9642bcf38ad36d4766'
 prepare() {
   cd "$pkgname-$pkgver"
   patch -Np1 -i "$srcdir/$pkgname-$pkgver-lockfile.patch"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
 }
 
 build() {
diff --git a/cargo-edit/PKGBUILD b/cargo-edit/PKGBUILD
index bf4c5bed03..d095bbdfc7 100644
--- a/cargo-edit/PKGBUILD
+++ b/cargo-edit/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('37e91b5eb41fd56e2be382ee77bd6a6c859d1e1d7c99d45c2597e1a24194ea79ad1c563
 prepare() {
   cd "${pkgname}-${pkgver}"
   sed -i '/\"vendored-libgit2\"/d' Cargo.toml
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-expand/PKGBUILD b/cargo-expand/PKGBUILD
index 33eb14c8b5..35bca5f34d 100644
--- a/cargo-expand/PKGBUILD
+++ b/cargo-expand/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-geiger/PKGBUILD b/cargo-geiger/PKGBUILD
index eb5f8ae45f..39a7bdc300 100644
--- a/cargo-geiger/PKGBUILD
+++ b/cargo-geiger/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 prepare() {
   mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver"
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-generate/PKGBUILD b/cargo-generate/PKGBUILD
index a1eba51572..5e97546f6a 100644
--- a/cargo-generate/PKGBUILD
+++ b/cargo-generate/PKGBUILD
@@ -14,7 +14,7 @@ sha256sums=('520e7a98bf82f368e911c14e774f8ef16a4c8ffd785d492c9d518ee563dc3864')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-insta/PKGBUILD b/cargo-insta/PKGBUILD
index 1088ebd7c0..5049b5775c 100644
--- a/cargo-insta/PKGBUILD
+++ b/cargo-insta/PKGBUILD
@@ -17,7 +17,7 @@ sha512sums=('0ee791792e5324f8e53efb645ae862965b973498059cd32e66f379a971d5ffadac2
 prepare() {
     cd "$pkgname-$pkgver"
     export RUSTUP_TOOLCHAIN=stable
-    cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+    cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-machete/PKGBUILD b/cargo-machete/PKGBUILD
index 608c786fba..88725e4382 100644
--- a/cargo-machete/PKGBUILD
+++ b/cargo-machete/PKGBUILD
@@ -13,7 +13,7 @@ sha256sums=('a13fab0c5ff64907e6b39dee054e5e9c4278fbe06065ff5bfcb160a5c1d204ea')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-msrv/PKGBUILD b/cargo-msrv/PKGBUILD
index 56b9f2cc8b..86de6152b0 100644
--- a/cargo-msrv/PKGBUILD
+++ b/cargo-msrv/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 prepare() {
   cd "$pkgname-$pkgver"
   export RUSTUP_TOOLCHAIN=stable
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-ndk/PKGBUILD b/cargo-ndk/PKGBUILD
index 758c926256..7d32b53f30 100644
--- a/cargo-ndk/PKGBUILD
+++ b/cargo-ndk/PKGBUILD
@@ -14,7 +14,7 @@ sha256sums=('7756f00ff040030c64e6590ec6ffe59245165b9c78350462d960e5ff6fe12dcd')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-outdated/PKGBUILD b/cargo-outdated/PKGBUILD
index e6dffe7a0b..09a4c8e5e7 100644
--- a/cargo-outdated/PKGBUILD
+++ b/cargo-outdated/PKGBUILD
@@ -20,7 +20,7 @@ options=('!lto')
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-pgrx/PKGBUILD b/cargo-pgrx/PKGBUILD
index b6f2a3bd13..d7435d1672 100644
--- a/cargo-pgrx/PKGBUILD
+++ b/cargo-pgrx/PKGBUILD
@@ -26,7 +26,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-sort/PKGBUILD b/cargo-sort/PKGBUILD
index bbe3b5b55b..4944d3abe3 100644
--- a/cargo-sort/PKGBUILD
+++ b/cargo-sort/PKGBUILD
@@ -19,7 +19,7 @@ prepare() {
   cd "$pkgname-$pkgver"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-spellcheck/PKGBUILD b/cargo-spellcheck/PKGBUILD
index 356c499b53..b3eceb53a7 100644
--- a/cargo-spellcheck/PKGBUILD
+++ b/cargo-spellcheck/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p completions
 }
 
diff --git a/cargo-supply-chain/PKGBUILD b/cargo-supply-chain/PKGBUILD
index 8d1806c144..74530895db 100644
--- a/cargo-supply-chain/PKGBUILD
+++ b/cargo-supply-chain/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('c196e9b8bd6882c6fa4360f27d623ff91275a0209612a1b74043a9869ead3e21557052d
 prepare() {
   cd "$pkgname-$pkgver"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-tarpaulin/PKGBUILD b/cargo-tarpaulin/PKGBUILD
index bd03da7725..2569570aa0 100644
--- a/cargo-tarpaulin/PKGBUILD
+++ b/cargo-tarpaulin/PKGBUILD
@@ -28,7 +28,7 @@ b2sums=('SKIP')
 prepare() {
   cargo fetch \
     --locked \
-    --target $CARCH-unknown-linux-gnu \
+    --target `uname -m`-unknown-linux-gnu \
     --manifest-path tarpaulin/Cargo.toml
 }
 
diff --git a/cargo-tauri/PKGBUILD b/cargo-tauri/PKGBUILD
index f3bd33b2af..9d5dfc08eb 100644
--- a/cargo-tauri/PKGBUILD
+++ b/cargo-tauri/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 prepare() {
   mv "$_pkgname-tauri-cli-v$pkgver" "$pkgname-$pkgver"
   cd "$pkgname-$pkgver/tooling/cli"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p completions
 }
 
diff --git a/cargo-udeps/PKGBUILD b/cargo-udeps/PKGBUILD
index 05aa82f5a1..217f1ec32f 100644
--- a/cargo-udeps/PKGBUILD
+++ b/cargo-udeps/PKGBUILD
@@ -15,14 +15,14 @@ sha256sums=('e5839d74071c44efb44ae33859ff438ff5823c007960889f567b2c2c33cff4d1')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd "$pkgname-$pkgver"
   export LIBSSH2_SYS_USE_PKG_CONFIG=1
   CFLAGS+=" -ffat-lto-objects"
-  cargo build --release --frozen
+  cargo build --release
 }
 
 # Tests require rustup nightly
diff --git a/cargo-update/PKGBUILD b/cargo-update/PKGBUILD
index 4d11ce49ca..98ddb27abd 100644
--- a/cargo-update/PKGBUILD
+++ b/cargo-update/PKGBUILD
@@ -19,7 +19,7 @@ options=('!lto')
 prepare() {
   cd "$pkgname-$pkgver"
   cp "$srcdir/Cargo.lock" .
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo-watch/PKGBUILD b/cargo-watch/PKGBUILD
index 8126017898..5ff224d1a6 100644
--- a/cargo-watch/PKGBUILD
+++ b/cargo-watch/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('f6b1a250b0ba4a79d525f7d2038f1a4ddae1495261c38cff1c32f6f73dae500a689dfcd
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cargo2junit/PKGBUILD b/cargo2junit/PKGBUILD
index b483e643a2..6eeacc6413 100644
--- a/cargo2junit/PKGBUILD
+++ b/cargo2junit/PKGBUILD
@@ -14,7 +14,7 @@ sha256sums=('647c41ce7416421f41dee298a6fb99ad8be7c584c7c16c1a7926720eb9777376')
 
 prepare() {
 	cd "$pkgname-$pkgver"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/cbindgen/PKGBUILD b/cbindgen/PKGBUILD
index cf1977d861..1ceb6614a7 100644
--- a/cbindgen/PKGBUILD
+++ b/cbindgen/PKGBUILD
@@ -34,7 +34,7 @@ pkgver() {
 
 prepare() {
   cd cbindgen
-  cargo fetch --locked --target x86_64-unknown-linux-gnu
+  cargo fetch --locked --target `uname -m`-unknown-linux-gnu
 }
 
 build() {
diff --git a/cdparanoia/PKGBUILD b/cdparanoia/PKGBUILD
index e69b576a4b..17143d000c 100644
--- a/cdparanoia/PKGBUILD
+++ b/cdparanoia/PKGBUILD
@@ -31,6 +31,8 @@ prepare() {
 
 build() {
   cd cdparanoia-III-$pkgver
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   ./configure --prefix=/usr --mandir=/usr/share/man
   make
 }
diff --git a/cdrtools/PKGBUILD b/cdrtools/PKGBUILD
index 535b5f58e4..2018940579 100644
--- a/cdrtools/PKGBUILD
+++ b/cdrtools/PKGBUILD
@@ -12,10 +12,17 @@ provides=('cdrkit')
 replaces=('cdrkit')
 options=(!makeflags)
 backup=('etc/default/cdrecord' 'etc/default/rscsi')
-source=(https://downloads.sourceforge.net/cdrtools/cdrtools-$pkgver.tar.bz2)
-sha256sums=('aa28438f458ef3f314b79f2029db27679dae1d5ffe1569b6de57742511915e81')
+source=(https://downloads.sourceforge.net/cdrtools/cdrtools-$pkgver.tar.bz2
+    cdrtools-la64.patch)
+sha256sums=('aa28438f458ef3f314b79f2029db27679dae1d5ffe1569b6de57742511915e81'
+            '2a3925ae1293a84277179497d7f0f17789447f8fa72c90f012ac36d36b3edc30')
 install=cdrtools.install
 
+prepare() {
+  cd "$srcdir"/cdrtools-${pkgver%%a*}
+	patch -p1 -i "$srcdir/cdrtools-la64.patch"
+}
+
 build() {
   cd "$srcdir"/cdrtools-${pkgver%%a*}
   sed -i 's|/opt/schily|/usr|g' DEFAULTS/Defaults.linux
diff --git a/cdrtools/cdrtools-la64.patch b/cdrtools/cdrtools-la64.patch
new file mode 100644
index 0000000000..db1227b2ac
--- /dev/null
+++ b/cdrtools/cdrtools-la64.patch
@@ -0,0 +1,16 @@
+Index: cdrtools-3.02/RULES/MKLINKS
+===================================================================
+--- cdrtools-3.02.orig/RULES/MKLINKS
++++ cdrtools-3.02/RULES/MKLINKS
+@@ -262,6 +262,11 @@ $symlink	i586-linux-clang64.rul	aarch64-
+ $symlink	i586-linux-gcc.rul	aarch64-linux-gcc.rul
+ $symlink	i586-linux-gcc32.rul	aarch64-linux-gcc32.rul
+ $symlink	i586-linux-gcc64.rul	aarch64-linux-gcc64.rul
++$symlink	i586-linux-cc.rul	loongarch64-linux-cc.rul
++$symlink	i586-linux-clang.rul	loongarch64-linux-clang.rul
++$symlink	i586-linux-clang64.rul	loongarch64-linux-clang64.rul
++$symlink	i586-linux-gcc.rul	loongarch64-linux-gcc.rul
++$symlink	i586-linux-gcc64.rul	loongarch64-linux-gcc64.rul
+ $symlink	i586-linux-cc.rul	sh3-linux-cc.rul
+ $symlink	i586-linux-gcc.rul	sh3-linux-gcc.rul
+ $symlink	i586-linux-cc.rul	sh4-linux-cc.rul
diff --git a/chezmoi/PKGBUILD b/chezmoi/PKGBUILD
index ebee5ffa5b..b5dcf9e302 100644
--- a/chezmoi/PKGBUILD
+++ b/chezmoi/PKGBUILD
@@ -25,6 +25,11 @@ prepare() {
 
 build() {
   cd "$pkgname-$pkgver"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@master
+  go mod tidy
 
   export CGO_LDFLAGS="${LDFLAGS}"
   export CGO_CFLAGS="${CFLAGS}"
diff --git a/chmlib/PKGBUILD b/chmlib/PKGBUILD
index 617a5cf5cb..df56185a35 100644
--- a/chmlib/PKGBUILD
+++ b/chmlib/PKGBUILD
@@ -10,8 +10,15 @@ arch=('loong64' 'x86_64')
 url="http://www.jedrea.com/chmlib/"
 license=('LGPL')
 depends=('glibc')
-source=("http://www.jedrea.com/chmlib/chmlib-0.40.tar.bz2")
-sha256sums=('3449d64b0cf71578b2c7e3ddc048d4af3661f44a83941ea074a7813f3a59ffa3')
+source=("http://www.jedrea.com/chmlib/chmlib-0.40.tar.bz2"
+    chmlib-fix-gcc13.patch)
+sha256sums=('3449d64b0cf71578b2c7e3ddc048d4af3661f44a83941ea074a7813f3a59ffa3'
+            'b5f792c16b01c9ad0a08e4f2c55134c0bb5d93096f1d5634fd65a0da8ac5ab9e')
+
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/chmlib-fix-gcc13.patch"
+}
 
 build() {
   cd "${srcdir}"/${pkgname}-${pkgver}
diff --git a/chmlib/chmlib-fix-gcc13.patch b/chmlib/chmlib-fix-gcc13.patch
new file mode 100644
index 0000000000..1c32dc614b
--- /dev/null
+++ b/chmlib/chmlib-fix-gcc13.patch
@@ -0,0 +1,25 @@
+Index: chmlib-0.40/src/chm_http.c
+===================================================================
+--- chmlib-0.40.orig/src/chm_http.c
++++ chmlib-0.40/src/chm_http.c
+@@ -42,6 +42,7 @@
+ /* includes for networking */
+ #include <sys/socket.h>
+ #include <sys/types.h>
++#include <unistd.h>
+ #include <netinet/in.h>
+ 
+ /* threading includes */
+Index: chmlib-0.40/src/chm_lib.c
+===================================================================
+--- chmlib-0.40.orig/src/chm_lib.c
++++ chmlib-0.40/src/chm_lib.c
+@@ -164,7 +164,7 @@ typedef unsigned long long      UInt64;
+ 
+ /* x86-64 */
+ /* Note that these may be appropriate for other 64-bit machines. */
+-#elif __x86_64__ || __ia64__
++#elif __x86_64__ || __ia64__ || __loongarch_lp64
+ typedef unsigned char           UChar;
+ typedef short                   Int16;
+ typedef unsigned short          UInt16;
diff --git a/choose/PKGBUILD b/choose/PKGBUILD
index 50d6b223d0..ec05a1c13a 100644
--- a/choose/PKGBUILD
+++ b/choose/PKGBUILD
@@ -25,7 +25,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/clamav/PKGBUILD b/clamav/PKGBUILD
index 15409b5f97..7d0052acef 100644
--- a/clamav/PKGBUILD
+++ b/clamav/PKGBUILD
@@ -14,7 +14,7 @@ arch=('loong64' 'x86_64')
 depends=('bzip2' 'libltdl' 'libxml2' 'curl' 'systemd-libs' 'pcre2' 'json-c' 'libmspack'
          'ncurses' libsystemd.so libncursesw.so libcurl.so libjson-c.so libbz2.so)
 makedepends=('libmilter' 'systemd' 'cmake' 'ninja' 'python' 'cargo')
-checkdepends=('check')
+makedepends+=('check')
 backup=('etc/clamav/clamd.conf'
         'etc/clamav/freshclam.conf'
         'etc/clamav/clamav-milter.conf'
diff --git a/clang14/0001-add-loong64-support.patch b/clang14/0001-add-loong64-support.patch
new file mode 100644
index 0000000000..4c6ae8d62f
--- /dev/null
+++ b/clang14/0001-add-loong64-support.patch
@@ -0,0 +1,16205 @@
+From e5f62e4b6e97e38353668baeac0ef7219992aa63 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Tue, 20 Dec 2022 18:53:42 +0800
+Subject: [PATCH 1/2] add loong64 support
+
+---
+ bindings/python/tests/CMakeLists.txt          |    2 +-
+ include/clang/Basic/BuiltinsLoongArch.def     | 1974 ++++++
+ include/clang/Basic/DiagnosticDriverKinds.td  |    2 +
+ include/clang/Basic/TargetBuiltins.h          |   13 +-
+ include/clang/Basic/TargetCXXABI.def          |    6 +
+ include/clang/Basic/TargetCXXABI.h            |    6 +
+ include/clang/Driver/Options.td               |   21 +-
+ include/clang/Sema/Sema.h                     |    3 +
+ include/clang/module.modulemap                |    1 +
+ lib/AST/ASTContext.cpp                        |    2 +
+ lib/Basic/CMakeLists.txt                      |    1 +
+ lib/Basic/Targets.cpp                         |   20 +
+ lib/Basic/Targets/LoongArch.cpp               |  149 +
+ lib/Basic/Targets/LoongArch.h                 |  352 ++
+ lib/CodeGen/CodeGenFunction.cpp               |   36 +-
+ lib/CodeGen/CodeGenFunction.h                 |    4 +
+ lib/CodeGen/CodeGenModule.cpp                 |   17 +-
+ lib/CodeGen/CodeGenModule.h                   |    5 -
+ lib/CodeGen/ItaniumCXXABI.cpp                 |    3 +
+ lib/CodeGen/TargetInfo.cpp                    |  555 ++
+ lib/Driver/CMakeLists.txt                     |    1 +
+ lib/Driver/Driver.cpp                         |   16 +
+ lib/Driver/SanitizerArgs.cpp                  |   13 -
+ lib/Driver/ToolChains/Arch/LoongArch.cpp      |  179 +
+ lib/Driver/ToolChains/Arch/LoongArch.h        |   41 +
+ lib/Driver/ToolChains/Clang.cpp               |   55 +
+ lib/Driver/ToolChains/Clang.h                 |    4 +
+ lib/Driver/ToolChains/CommonArgs.cpp          |   21 +
+ lib/Driver/ToolChains/Gnu.cpp                 |   65 +
+ lib/Driver/ToolChains/Linux.cpp               |   24 +-
+ lib/Driver/ToolChains/Linux.h                 |    5 -
+ lib/Driver/XRayArgs.cpp                       |    2 +
+ lib/Headers/CMakeLists.txt                    |    3 +
+ lib/Headers/larchintrin.h                     |  319 +
+ lib/Headers/lasxintrin.h                      | 5349 +++++++++++++++++
+ lib/Headers/lsxintrin.h                       | 5165 ++++++++++++++++
+ lib/Sema/SemaChecking.cpp                     |  544 ++
+ lib/Sema/SemaTemplateInstantiateDecl.cpp      |    5 +-
+ test/CodeGen/sanitize-coverage-old-pm.c       |    4 +-
+ test/CodeGen/ubsan-function.cpp               |    5 +-
+ test/CodeGenCXX/catch-undef-behavior.cpp      |   37 +-
+ test/CodeGenCXX/ubsan-function-noexcept.cpp   |    6 +-
+ test/Driver/baremetal-sysroot.cpp             |    2 +-
+ test/Driver/baremetal.cpp                     |    2 +-
+ test/Driver/fsanitize.c                       |    9 +-
+ test/Driver/hexagon-toolchain-linux.c         |    4 +-
+ test/Driver/mips-cs.cpp                       |   48 +-
+ test/Driver/stack-protector.c                 |    4 +-
+ test/Preprocessor/init.c                      |   30 +
+ .../InterpreterExceptionTest.cpp              |    5 +
+ 50 files changed, 15017 insertions(+), 122 deletions(-)
+ create mode 100644 include/clang/Basic/BuiltinsLoongArch.def
+ create mode 100644 lib/Basic/Targets/LoongArch.cpp
+ create mode 100644 lib/Basic/Targets/LoongArch.h
+ create mode 100644 lib/Driver/ToolChains/Arch/LoongArch.cpp
+ create mode 100644 lib/Driver/ToolChains/Arch/LoongArch.h
+ create mode 100644 lib/Headers/larchintrin.h
+ create mode 100644 lib/Headers/lasxintrin.h
+ create mode 100644 lib/Headers/lsxintrin.h
+
+diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt
+index 280da9d0..9d9cb911 100644
+--- a/bindings/python/tests/CMakeLists.txt
++++ b/bindings/python/tests/CMakeLists.txt
+@@ -40,7 +40,7 @@ endif()
+ # addressed.
+ # SystemZ has broken Python/FFI interface:
+ # https://reviews.llvm.org/D52840#1265716
+-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$")
++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$")
+   set(RUN_PYTHON_TESTS FALSE)
+ endif()
+ 
+diff --git a/include/clang/Basic/BuiltinsLoongArch.def b/include/clang/Basic/BuiltinsLoongArch.def
+new file mode 100644
+index 00000000..5606e62d
+--- /dev/null
++++ b/include/clang/Basic/BuiltinsLoongArch.def
+@@ -0,0 +1,1974 @@
++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the LoongArch-specific builtin function database. Users of
++// this file must define the BUILTIN macro to make use of this information.
++//
++//===----------------------------------------------------------------------===//
++
++// The format of this database matches clang/Basic/Builtins.def.
++
++// LoongArch LSX
++
++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc")
++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vldrepl_b, "V16cv*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_h, "V8sv*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_w, "V4iv*Ii", "nc")
++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLiv*Ii", "nc")
++
++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc")
++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc")
++
++BUILTIN(__builtin_lsx_vldx, "V16Scv*LLi", "nc")
++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc")
++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc")
++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc")
++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc")
++
++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc")
++
++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc")
++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc")
++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc")
++
++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc")
++
++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc")
++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc")
++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc")
++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc")
++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc")
++
++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc")
++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc")
++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc")
++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc")
++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc")
++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc")
++
++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc")
++
++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc")
++
++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc")
++
++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc")
++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc")
++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc")
++
++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc")
++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc")
++
++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc")
++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc")
++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc")
++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc")
++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc")
++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc")
++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc")
++
++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc")
++
++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc")
++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc")
++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc")
++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc")
++
++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc")
++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc")
++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc")
++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc")
++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc")
++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc")
++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc")
++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc")
++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc")
++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc")
++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc")
++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc")
++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc")
++
++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc")
++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc")
++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc")
++
++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc")
++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc")
++
++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc")
++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc")
++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc")
++
++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc")
++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc")
++
++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc")
++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc")
++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc")
++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc")
++
++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc")
++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc")
++
++BUILTIN(__builtin_lsx_vld, "V16Scv*Ii", "nc")
++
++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc")
++
++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc")
++
++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc")
++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc")
++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc")
++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc")
++
++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc")
++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc")
++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc")
++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc")
++
++//LoongArch LASX
++
++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc")
++
++
++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc")
++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc")
++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc")
++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc")
++
++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc")
++
++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc")
++
++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc")
++
++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc")
++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc")
++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc")
++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc")
++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc")
++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc")
++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc")
++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc")
++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc")
++
++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc")
++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc")
++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc")
++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc")
++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc")
++
++BUILTIN(__builtin_lasx_xvld, "V32Scv*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc")
++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc")
++
++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc")
++
++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvldx, "V32Scv*LLi", "nc")
++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc")
++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc")
++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc")
++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc")
++
++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc")
++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc")
++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc")
++
++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc")
++
++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cv*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_h, "V16sv*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_w, "V8iv*Ii", "nc")
++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLiv*Ii", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc")
++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc")
++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc")
++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc")
++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc")
++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc")
++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc")
++
++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc")
++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc")
++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc")
++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc")
++
++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc")
++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc")
++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc")
++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc")
++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc")
++
++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc")
++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc")
++
++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc")
++
++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc")
++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc")
++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc")
++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc")
++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc")
++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc")
++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc")
++
++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc")
++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc")
++
++
++// LoongArch BASE
++
++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_csrrd, "UiIUi", "nc")
++BUILTIN(__builtin_loongarch_dcsrrd, "ULiIULi", "nc")
++BUILTIN(__builtin_loongarch_csrwr, "UiUiIUi", "nc")
++BUILTIN(__builtin_loongarch_dcsrwr, "ULiULiIULi", "nc")
++BUILTIN(__builtin_loongarch_csrxchg, "UiUiUiIUi", "nc")
++BUILTIN(__builtin_loongarch_dcsrxchg, "ULiULiULiIULi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc")
++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc")
++BUILTIN(__builtin_loongarch_cacop, "viUii", "nc")
++BUILTIN(__builtin_loongarch_dcacop, "viULiLi", "nc")
++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc")
++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc")
++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc")
++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc")
++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_break, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc")
++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc")
++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc")
++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc")
++#undef BUILTIN
+diff --git a/include/clang/Basic/DiagnosticDriverKinds.td b/include/clang/Basic/DiagnosticDriverKinds.td
+index 3efedbe0..abcdec74 100644
+--- a/include/clang/Basic/DiagnosticDriverKinds.td
++++ b/include/clang/Basic/DiagnosticDriverKinds.td
+@@ -193,6 +193,8 @@ def err_drv_force_crash : Error<
+   "failing because %select{environment variable 'FORCE_CLANG_DIAGNOSTICS_CRASH' is set|'-gen-reproducer' is used}0">;
+ def err_drv_invalid_mfloat_abi : Error<
+   "invalid float ABI '%0'">;
++def err_drv_invalid_loongarch_mfpu : Error<
++  "invalid loongarch FPU value '%0'. Please specify FPU = 64,32 or none">;
+ def err_drv_invalid_mtp : Error<
+   "invalid thread pointer reading mode '%0'">;
+ def err_drv_missing_arg_mtp : Error<
+diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h
+index d4ea8e98..5f3851af 100644
+--- a/include/clang/Basic/TargetBuiltins.h
++++ b/include/clang/Basic/TargetBuiltins.h
+@@ -145,6 +145,16 @@ namespace clang {
+   };
+   } // namespace RISCV
+ 
++  /// LoongArch builtins
++  namespace LoongArch {
++  enum {
++    LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1,
++#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
++#include "clang/Basic/BuiltinsLoongArch.def"
++    LastTSBuiltin
++  };
++  } // namespace LoongArch
++
+   /// Flags to identify the types for overloaded Neon builtins.
+   ///
+   /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h.
+@@ -336,7 +346,8 @@ namespace clang {
+        PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin,
+        X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin,
+        Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
+-       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin});
++       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin,
++       LoongArch::LastTSBuiltin});
+ 
+ } // end namespace clang.
+ 
+diff --git a/include/clang/Basic/TargetCXXABI.def b/include/clang/Basic/TargetCXXABI.def
+index 9501cca7..8ea4bece 100644
+--- a/include/clang/Basic/TargetCXXABI.def
++++ b/include/clang/Basic/TargetCXXABI.def
+@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64")
+ ///   - representation of member function pointers adjusted as in ARM.
+ ITANIUM_CXXABI(GenericMIPS, "mips")
+ 
++/// The generic LoongArch ABI is a modified version of the Itanium ABI.
++///
++/// At the moment, only change from the generic ABI in this case is:
++///   - representation of member function pointers adjusted as in ARM.
++ITANIUM_CXXABI(GenericLoongArch, "loongarch")
++
+ /// The WebAssembly ABI is a modified version of the Itanium ABI.
+ ///
+ /// The changes from the Itanium ABI are:
+diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h
+index e727f85e..507cf580 100644
+--- a/include/clang/Basic/TargetCXXABI.h
++++ b/include/clang/Basic/TargetCXXABI.h
+@@ -102,6 +102,9 @@ public:
+     case GenericAArch64:
+       return T.isAArch64();
+ 
++    case GenericLoongArch:
++      return T.isLoongArch();
++
+     case GenericMIPS:
+       return T.isMIPS();
+ 
+@@ -166,6 +169,7 @@ public:
+     case Fuchsia:
+     case GenericARM:
+     case GenericAArch64:
++    case GenericLoongArch:
+     case GenericMIPS:
+       // TODO: ARM-style pointers to member functions put the discriminator in
+       //       the this adjustment, so they don't require functions to have any
+@@ -250,6 +254,7 @@ public:
+     case GenericItanium:
+     case iOS:   // old iOS compilers did not follow this rule
+     case Microsoft:
++    case GenericLoongArch:
+     case GenericMIPS:
+     case XL:
+       return true;
+@@ -288,6 +293,7 @@ public:
+     case GenericAArch64:
+     case GenericARM:
+     case iOS:
++    case GenericLoongArch:
+     case GenericMIPS:
+     case XL:
+       return UseTailPaddingUnlessPOD03;
+diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
+index e0d21584..bbca2ae5 100644
+--- a/include/clang/Driver/Options.td
++++ b/include/clang/Driver/Options.td
+@@ -176,6 +176,8 @@ def m_x86_Features_Group : OptionGroup<"<x86 features group>">,
+                            Group<m_Group>, Flags<[CoreOption]>, DocName<"X86">;
+ def m_riscv_Features_Group : OptionGroup<"<riscv features group>">,
+                              Group<m_Group>, DocName<"RISCV">;
++def m_loongarch_Features_Group : OptionGroup<"<loongarch features group>">,
++                                 Group<m_Group>, DocName<"LoongArch">;
+ 
+ def m_libc_Group : OptionGroup<"<m libc group>">, Group<m_mips_Features_Group>,
+                    Flags<[HelpHidden]>;
+@@ -3315,12 +3317,15 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group<m_riscv_Features_Gr
+ def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>,
+   HelpText<"Enable use of experimental RISC-V extensions.">;
+ 
+-def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
+-  HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
+-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_arm_Features_Group>,
+-  HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">;
++def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>,
++  HelpText<"Allow memory accesses to be unaligned">;
++def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_Group>,
++  HelpText<"Force all memory accesses to be aligned">;
+ def mstrict_align : Flag<["-"], "mstrict-align">, Alias<mno_unaligned_access>, Flags<[CC1Option,HelpHidden]>,
+   HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">;
++def mno_strict_align : Flag<["-"], "mno-strict-align">, Group<m_loongarch_Features_Group>,
++  Flags<[CC1Option,HelpHidden]>, Alias<munaligned_access>,
++  HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">;
+ def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_arm_Features_Group>;
+ def mrestrict_it: Flag<["-"], "mrestrict-it">, Group<m_arm_Features_Group>,
+   HelpText<"Disallow generation of deprecated IT blocks for ARMv8. It is on by default for ARMv8 Thumb mode.">;
+@@ -3616,6 +3621,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg=">
+ def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">,
+   Flags<[CC1Option]>, Group<m_Group>,
+   MarshallingInfoFlag<CodeGenOpts<"CallFEntry">>;
++def mlsx : Flag<["-"], "mlsx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Use LARCH Loongson LSX instructions.">;
++def mno_lsx : Flag<["-"], "mno-lsx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Disable LARCH Loongson LSX instructions.">;
++def mlasx : Flag<["-"], "mlasx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Enable LARCH Loongson LASX instructions.">;
++def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
++  HelpText<"Disable LARCH Loongson LASX instructions.">;
+ def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">,
+   Flags<[CC1Option]>, Group<m_Group>,
+   MarshallingInfoFlag<CodeGenOpts<"MNopMCount">>;
+diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
+index 4b609f4b..c6ee1053 100644
+--- a/include/clang/Sema/Sema.h
++++ b/include/clang/Sema/Sema.h
+@@ -12749,6 +12749,9 @@ private:
+   bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum);
+   bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
+                                      CallExpr *TheCall);
++  bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI,
++                                         unsigned BuiltinID,
++                                         CallExpr *TheCall);
+ 
+   bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
+   bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);
+diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap
+index 2b73cd54..efc6aa21 100644
+--- a/include/clang/module.modulemap
++++ b/include/clang/module.modulemap
+@@ -42,6 +42,7 @@ module Clang_Basic {
+   textual header "Basic/BuiltinsHexagon.def"
+   textual header "Basic/BuiltinsHexagonDep.def"
+   textual header "Basic/BuiltinsHexagonMapCustomDep.def"
++  textual header "Basic/BuiltinsLoongArch.def"
+   textual header "Basic/BuiltinsMips.def"
+   textual header "Basic/BuiltinsNEON.def"
+   textual header "Basic/BuiltinsNVPTX.def"
+diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
+index e4b3827b..e56cd4ce 100644
+--- a/lib/AST/ASTContext.cpp
++++ b/lib/AST/ASTContext.cpp
+@@ -901,6 +901,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WatchOS:
+   case TargetCXXABI::GenericAArch64:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::WebAssembly:
+@@ -11651,6 +11652,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+   case TargetCXXABI::GenericAArch64:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::GenericARM:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WebAssembly:
+diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt
+index 40de9433..ac6bc570 100644
+--- a/lib/Basic/CMakeLists.txt
++++ b/lib/Basic/CMakeLists.txt
+@@ -78,6 +78,7 @@ add_clang_library(clangBasic
+   Targets/Hexagon.cpp
+   Targets/Lanai.cpp
+   Targets/Le64.cpp
++  Targets/LoongArch.cpp
+   Targets/M68k.cpp
+   Targets/MSP430.cpp
+   Targets/Mips.cpp
+diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
+index 994a491c..35f577f4 100644
+--- a/lib/Basic/Targets.cpp
++++ b/lib/Basic/Targets.cpp
+@@ -22,6 +22,7 @@
+ #include "Targets/Hexagon.h"
+ #include "Targets/Lanai.h"
+ #include "Targets/Le64.h"
++#include "Targets/LoongArch.h"
+ #include "Targets/M68k.h"
+ #include "Targets/MSP430.h"
+ #include "Targets/Mips.h"
+@@ -325,6 +326,25 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+   case llvm::Triple::le64:
+     return new Le64TargetInfo(Triple, Opts);
+ 
++#if 0
++  //TODO: support it in future
++  case llvm::Triple::loongarch32:
++    switch (os) {
++    case llvm::Triple::Linux:
++      return new LinuxTargetInfo<LoongArchTargetInfo>(Triple, Opts);
++    default:
++      return new LoongArchTargetInfo(Triple, Opts);
++    }
++#endif
++
++  case llvm::Triple::loongarch64:
++    switch (os) {
++    case llvm::Triple::Linux:
++      return new LinuxTargetInfo<LoongArchTargetInfo>(Triple, Opts);
++    default:
++      return new LoongArchTargetInfo(Triple, Opts);
++    }
++
+   case llvm::Triple::ppc:
+     if (Triple.isOSDarwin())
+       return new DarwinPPC32TargetInfo(Triple, Opts);
+diff --git a/lib/Basic/Targets/LoongArch.cpp b/lib/Basic/Targets/LoongArch.cpp
+new file mode 100644
+index 00000000..f94d9f09
+--- /dev/null
++++ b/lib/Basic/Targets/LoongArch.cpp
+@@ -0,0 +1,149 @@
++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements LoongArch TargetInfo objects.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "Targets.h"
++#include "clang/Basic/Diagnostic.h"
++#include "clang/Basic/MacroBuilder.h"
++#include "clang/Basic/TargetBuiltins.h"
++#include "llvm/ADT/StringSwitch.h"
++
++using namespace clang;
++using namespace clang::targets;
++
++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = {
++#define BUILTIN(ID, TYPE, ATTRS)                                               \
++  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
++  {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},
++#include "clang/Basic/BuiltinsLoongArch.def"
++};
++
++bool LoongArchTargetInfo::processorSupportsGPR64() const {
++  return llvm::StringSwitch<bool>(CPU)
++      .Cases("la464", "generic-la64", true)
++      .Default(false);
++  return false;
++}
++
++static constexpr llvm::StringLiteral ValidCPUNames[] = {
++    {"la464"}, {"generic-la64"}, {"generic-la32"}};
++
++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const {
++  return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
++}
++
++void LoongArchTargetInfo::fillValidCPUList(
++    SmallVectorImpl<StringRef> &Values) const {
++  Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
++}
++
++void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
++                                      MacroBuilder &Builder) const {
++  Builder.defineMacro("__loongarch__");
++
++  if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") {
++    Builder.defineMacro("__loongarch_lp64");
++    Builder.defineMacro("__loongarch64");
++    Builder.defineMacro("_ABILP64", "3");
++    Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64");
++  } else
++    llvm_unreachable("Invalid ABI.");
++
++  Builder.defineMacro("__REGISTER_PREFIX__", "");
++
++  if (HasLSX)
++    Builder.defineMacro("__loongarch_sx", Twine(1));
++
++  if (HasLASX)
++    Builder.defineMacro("__loongarch_asx", Twine(1));
++
++  Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0)));
++  Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth()));
++  Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth()));
++
++  Builder.defineMacro("_LOONGARCH_TUNE", "\"" + CPU + "\"");
++  Builder.defineMacro("_LOONGARCH_TUNE_" + StringRef(CPU).upper());
++
++  Builder.defineMacro("_LOONGARCH_ARCH", "\"" + getTriple().getArchName() + "\"");
++  Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(getTriple().getArchName()).upper());
++
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
++  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
++
++  // 32-bit loongarch processors don't have the necessary ll.d/sc.d instructions
++  // found in 64-bit processors.
++  if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")
++    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
++
++  // Bit-width of general purpose registers.
++  Builder.defineMacro("__loongarch_grlen", Twine(getRegisterWidth()));
++
++  // Bit-width of floating-point registers. The possible values for
++  // this macro are 0, 32 and 64. 0 if there is no FPU.
++  if (HasBasicD || HasBasicF)
++    Builder.defineMacro("__loongarch_frlen", HasBasicD ? "64" : "32");
++  else
++    Builder.defineMacro("__loongarch_frlen", "0");
++
++  // FIXME: Defined if floating-point/extended ABI type is single or double.
++  if (ABI == "lp64d" || ABI == "lp64f")
++    Builder.defineMacro("__loongarch_hard_float");
++
++  // FIXME: Defined if floating-point/extended ABI type is double.
++  if (ABI == "lp64d")
++    Builder.defineMacro("__loongarch_double_float");
++
++  // FIXME: Defined if floating-point/extended ABI type is single.
++  if (ABI == "lp64f")
++    Builder.defineMacro("__loongarch_single_float");
++
++  // FIXME: Defined if floating-point/extended ABI type is soft.
++  if (ABI == "lp64s")
++    Builder.defineMacro("__loongarch_soft_float");
++}
++
++bool LoongArchTargetInfo::hasFeature(StringRef Feature) const {
++  return llvm::StringSwitch<bool>(Feature)
++      .Case("lsx", HasLSX)
++      .Case("lasx", HasLASX)
++      .Case("d", HasBasicD)
++      .Case("f", HasBasicF)
++      .Default(false);
++}
++
++ArrayRef<Builtin::Info> LoongArchTargetInfo::getTargetBuiltins() const {
++  return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin -
++                                             Builtin::FirstTSBuiltin);
++}
++
++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
++  // 64-bit ABI's require 64-bit CPU's.
++  if (!processorSupportsGPR64() &&
++      (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")) {
++    Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU;
++    return false;
++  }
++
++  // FIXME: It's valid to use lp64d/lp64s/lp64f on a loongarch32 triple
++  // but the backend can't handle this yet. It's better to fail here than on the
++  // backend assertion.
++  if (getTriple().isLoongArch32() &&
++      (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")) {
++    Diags.Report(diag::err_target_unsupported_abi_for_triple)
++        << ABI << getTriple().str();
++    return false;
++  }
++
++  return true;
++}
+diff --git a/lib/Basic/Targets/LoongArch.h b/lib/Basic/Targets/LoongArch.h
+new file mode 100644
+index 00000000..6e854fd7
+--- /dev/null
++++ b/lib/Basic/Targets/LoongArch.h
+@@ -0,0 +1,352 @@
++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares LoongArch TargetInfo objects.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
++#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
++
++#include "clang/Basic/TargetInfo.h"
++#include "clang/Basic/TargetOptions.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/Support/Compiler.h"
++
++namespace clang {
++namespace targets {
++
++class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
++  void setDataLayout() {
++    StringRef Layout;
++
++    if (ABI == "ilp32d" || ABI == "ilp32f" || ABI == "ilp32s")
++      // TODO
++      llvm_unreachable("Unimplemented ABI");
++    else if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")
++      Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128";
++    else
++      llvm_unreachable("Invalid ABI");
++
++    resetDataLayout(("e-" + Layout).str());
++  }
++
++  static const Builtin::Info BuiltinInfo[];
++  std::string CPU;
++  bool HasLSX;
++  bool HasLASX;
++  bool HasBasicF;
++  bool HasBasicD;
++
++protected:
++  std::string ABI;
++
++public:
++  LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
++      : TargetInfo(Triple), HasLSX(false), HasLASX(false), HasBasicF(false),
++        HasBasicD(false) {
++    TheCXXABI.set(TargetCXXABI::GenericLoongArch);
++
++    if (Triple.isLoongArch32())
++      // TODO
++      llvm_unreachable("Unimplemented triple");
++    else
++      setABI("lp64d");
++
++    // Currently, CPU only supports 'la464' in LA.
++    if ( ABI == "lp64d")
++      CPU = "la464";
++  }
++
++  bool processorSupportsGPR64() const;
++
++  StringRef getABI() const override { return ABI; }
++
++  bool setABI(const std::string &Name) override {
++    if (Name == "ilp32d" || Name == "ilp32f" || Name == "ilp32s") {
++      // TODO
++      llvm_unreachable("Unimplemented ABI");
++    }
++
++    if (Name == "lp64d" || Name == "lp64s" || Name == "lp64f") {
++      setLP64ABITypes();
++      ABI = Name;
++      return true;
++    }
++    return false;
++  }
++
++  void setLP64ABITypes() {
++    LongDoubleWidth = LongDoubleAlign = 128;
++    LongDoubleFormat = &llvm::APFloat::IEEEquad();
++    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
++    SuitableAlign = 128;
++    Int64Type = SignedLong;
++    IntMaxType = Int64Type;
++    LongWidth = LongAlign = 64;
++    PointerWidth = PointerAlign = 64;
++    PtrDiffType = SignedLong;
++    SizeType = UnsignedLong;
++  }
++
++  bool isValidCPUName(StringRef Name) const override;
++  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
++
++  bool setCPU(const std::string &Name) override {
++    CPU = Name;
++    return isValidCPUName(Name);
++  }
++
++  const std::string &getCPU() const { return CPU; }
++  bool
++  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
++                 StringRef CPU,
++                 const std::vector<std::string> &FeaturesVec) const override {
++#if 0
++    if (CPU.empty())
++      CPU = getCPU();
++    Features[CPU] = true;
++#else
++//    if (CPU == "la464")
++//      Features["loongarch64"] = true;
++
++//FIXME: we need this?
++//    if (CPU == "la464")
++//      Features["64bit"] = true;
++#endif
++    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
++  }
++
++  void getTargetDefines(const LangOptions &Opts,
++                        MacroBuilder &Builder) const override;
++
++  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
++
++  bool hasFeature(StringRef Feature) const override;
++
++  bool hasBitIntType() const override { return true; }
++
++  BuiltinVaListKind getBuiltinVaListKind() const override {
++    return TargetInfo::VoidPtrBuiltinVaList;
++  }
++
++  ArrayRef<const char *> getGCCRegNames() const override {
++    static const char *const GCCRegNames[] = {
++        // CPU register names
++        // Must match second column of GCCRegAliases
++        "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9",
++        "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18",
++        "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27",
++        "$r28", "$r29", "$r30", "$r31",
++        // Floating point register names
++        "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9",
++        "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18",
++        "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",
++        "$f28", "$f29", "$f30", "$f31",
++        // condition register names
++        "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7",
++        // LSX register names
++        "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8",
++        "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16",
++        "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24",
++        "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31",
++        // LASX register names
++        "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8",
++        "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16",
++        "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24",
++        "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31"
++
++    };
++    return llvm::makeArrayRef(GCCRegNames);
++  }
++
++  bool validateAsmConstraint(const char *&Name,
++                             TargetInfo::ConstraintInfo &Info) const override {
++    switch (*Name) {
++    default:
++      return false;
++    case 'r': // CPU registers.
++    case 'f': // floating-point registers.
++      Info.setAllowsRegister();
++      return true;
++    case 'l': // Signed 16-bit constant
++    case 'I': // Signed 12-bit constant
++    case 'K': // Unsigned 12-bit constant
++    case 'J': // Integer 0
++    case 'G': // Floating-point 0
++      return true;
++    case 'm': // Memory address with 12-bit offset
++    case 'R': // An address that can be used in a non-macro load or store
++      Info.setAllowsMemory();
++      return true;
++    case 'Z':
++      if (Name[1] == 'C'        // Memory address with 16-bit and 4 bytes aligned offset
++          || Name[1] == 'B' ) { // Memory address with 0 offset
++        Info.setAllowsMemory();
++        Name++; // Skip over 'Z'.
++        return true;
++      }
++      return false;
++    }
++  }
++
++  std::string convertConstraint(const char *&Constraint) const override {
++    std::string R;
++    switch (*Constraint) {
++    case 'Z': // Two-character constraint; add "^" hint for later parsing.
++      if (Constraint[1] == 'C' || Constraint[1] == 'B') {
++        R = std::string("^") + std::string(Constraint, 2);
++        Constraint++;
++        return R;
++      }
++      break;
++    }
++    return TargetInfo::convertConstraint(Constraint);
++  }
++
++  const char *getClobbers() const override {
++#if 0
++    // In GCC, $1 is not widely used in generated code (it's used only in a few
++    // specific situations), so there is no real need for users to add it to
++    // the clobbers list if they want to use it in their inline assembly code.
++    //
++    // In LLVM, $1 is treated as a normal GPR and is always allocatable during
++    // code generation, so using it in inline assembly without adding it to the
++    // clobbers list can cause conflicts between the inline assembly code and
++    // the surrounding generated code.
++    //
++    // Another problem is that LLVM is allowed to choose $1 for inline assembly
++    // operands, which will conflict with the ".set at" assembler option (which
++    // we use only for inline assembly, in order to maintain compatibility with
++    // GCC) and will also conflict with the user's usage of $1.
++    //
++    // The easiest way to avoid these conflicts and keep $1 as an allocatable
++    // register for generated code is to automatically clobber $1 for all inline
++    // assembly code.
++    //
++    // FIXME: We should automatically clobber $1 only for inline assembly code
++    // which actually uses it. This would allow LLVM to use $1 for inline
++    // assembly operands if the user's assembly code doesn't use it.
++    return "~{$1}";
++#endif
++    return "";
++  }
++
++  bool handleTargetFeatures(std::vector<std::string> &Features,
++                            DiagnosticsEngine &Diags) override {
++    HasBasicF = false;
++    HasBasicD = false;
++
++    for (const auto &Feature : Features) {
++      if (Feature == "+lsx")
++        HasLSX = true;
++      else if (Feature == "+lasx") {
++        HasLASX = true;
++        HasLSX = true;
++      } else if (Feature == "+f")
++        HasBasicF = true;
++      else if (Feature == "+d")
++        HasBasicD = true;
++    }
++
++    setDataLayout();
++
++    return true;
++  }
++
++  int getEHDataRegisterNumber(unsigned RegNo) const override {
++    if (RegNo == 0)
++      return 4;
++    if (RegNo == 1)
++      return 5;
++    return -1;
++  }
++
++  bool isCLZForZeroUndef() const override { return false; }
++
++  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
++    static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
++        {{"zero", "$zero", "r0", "$0"}, "$r0"},
++        {{"ra", "$ra", "r1", "$1"}, "$r1"},
++        {{"tp", "$tp", "r2", "$2"}, "$r2"},
++        {{"sp", "$sp", "r3", "$3"}, "$r3"},
++        {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"},
++        {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"},
++        {{"a2", "$a2", "r6", "$6"}, "$r6"},
++        {{"a3", "$a3", "r7", "$7"}, "$r7"},
++        {{"a4", "$a4", "r8", "$8"}, "$r8"},
++        {{"a5", "$a5", "r9", "$9"}, "$r9"},
++        {{"a6", "$a6", "r10", "$10"}, "$r10"},
++        {{"a7", "$a7", "r11", "$11"}, "$r11"},
++        {{"t0", "$t0", "r12", "$12"}, "$r12"},
++        {{"t1", "$t1", "r13", "$13"}, "$r13"},
++        {{"t2", "$t2", "r14", "$14"}, "$r14"},
++        {{"t3", "$t3", "r15", "$15"}, "$r15"},
++        {{"t4", "$t4", "r16", "$16"}, "$r16"},
++        {{"t5", "$t5", "r17", "$17"}, "$r17"},
++        {{"t6", "$t6", "r18", "$18"}, "$r18"},
++        {{"t7", "$t7", "r19", "$19"}, "$r19"},
++        {{"t8", "$t8", "r20", "$20"}, "$r20"},
++        //{{"x", "$x", "r21", "$21"}, "$r21"},
++        {{"fp", "$fp", "r22", "$22"}, "$r22"},
++        {{"s0", "$s0", "r23", "$23"}, "$r23"},
++        {{"s1", "$s1", "r24", "$24"}, "$r24"},
++        {{"s2", "$s2", "r25", "$25"}, "$r25"},
++        {{"s3", "$s3", "r26", "$26"}, "$r26"},
++        {{"s4", "$s4", "r27", "$27"}, "$r27"},
++        {{"s5", "$s5", "r28", "$28"}, "$r28"},
++        {{"s6", "$s6", "r29", "$29"}, "$r29"},
++        {{"s7", "$s7", "r30", "$30"}, "$r30"},
++        {{"s8", "$s8", "r31", "$31"}, "$r31"},
++        {{"fa0", "$fa0", "f0"}, "$f0"},
++        {{"fa1", "$fa1", "f1"}, "$f1"},
++        {{"fa2", "$fa2", "f2"}, "$f2"},
++        {{"fa3", "$fa3", "f3"}, "$f3"},
++        {{"fa4", "$fa4", "f4"}, "$f4"},
++        {{"fa5", "$fa5", "f5"}, "$f5"},
++        {{"fa6", "$fa6", "f6"}, "$f6"},
++        {{"fa7", "$fa7", "f7"}, "$f7"},
++        {{"ft0", "$ft0", "f8"}, "$f8"},
++        {{"ft1", "$ft1", "f9"}, "$f9"},
++        {{"ft2", "$ft2", "f10"}, "$f10"},
++        {{"ft3", "$ft3", "f11"}, "$f11"},
++        {{"ft4", "$ft4", "f12"}, "$f12"},
++        {{"ft5", "$ft5", "f13"}, "$f13"},
++        {{"ft6", "$ft6", "f14"}, "$f14"},
++        {{"ft7", "$ft7", "f15"}, "$f15"},
++        {{"ft8", "$ft8", "f16"}, "$f16"},
++        {{"ft9", "$ft9", "f17"}, "$f17"},
++        {{"ft10", "$ft10", "f18"}, "$f18"},
++        {{"ft11", "$ft11", "f19"}, "$f19"},
++        {{"ft12", "$ft12", "f20"}, "$f20"},
++        {{"ft13", "$ft13", "f21"}, "$f21"},
++        {{"ft14", "$ft14", "f22"}, "$f22"},
++        {{"ft15", "$ft15", "f23"}, "$f23"},
++        {{"fs0", "$fs0", "f24"}, "$f24"},
++        {{"fs1", "$fs1", "f25"}, "$f25"},
++        {{"fs2", "$fs2", "f26"}, "$f26"},
++        {{"fs3", "$fs3", "f27"}, "$f27"},
++        {{"fs4", "$fs4", "f28"}, "$f28"},
++        {{"fs5", "$fs5", "f29"}, "$f29"},
++        {{"fs6", "$fs6", "f30"}, "$f30"},
++        {{"fs7", "$fs7", "f31"}, "$f31"},
++    };
++    return llvm::makeArrayRef(GCCRegAliases);
++  }
++
++  bool hasInt128Type() const override {
++    return (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") ||
++           getTargetOpts().ForceEnableInt128;
++  }
++
++  bool validateTarget(DiagnosticsEngine &Diags) const override;
++};
++} // namespace targets
++} // namespace clang
++
++#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H
+diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
+index d7393526..50e16389 100644
+--- a/lib/CodeGen/CodeGenFunction.cpp
++++ b/lib/CodeGen/CodeGenFunction.cpp
+@@ -560,6 +560,29 @@ bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const {
+               XRayInstrKind::Typed);
+ }
+ 
++llvm::Constant *
++CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F,
++                                            llvm::Constant *Addr) {
++  // Addresses stored in prologue data can't require run-time fixups and must
++  // be PC-relative. Run-time fixups are undesirable because they necessitate
++  // writable text segments, which are unsafe. And absolute addresses are
++  // undesirable because they break PIE mode.
++
++  // Add a layer of indirection through a private global. Taking its address
++  // won't result in a run-time fixup, even if Addr has linkonce_odr linkage.
++  auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(),
++                                      /*isConstant=*/true,
++                                      llvm::GlobalValue::PrivateLinkage, Addr);
++
++  // Create a PC-relative address.
++  auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy);
++  auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy);
++  auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt);
++  return (IntPtrTy == Int32Ty)
++             ? PCRelAsInt
++             : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty);
++}
++
+ llvm::Value *
+ CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F,
+                                           llvm::Value *EncodedAddr) {
+@@ -903,13 +926,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
+           FD->getType(), EST_None);
+       llvm::Constant *FTRTTIConst =
+           CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true);
+-      llvm::GlobalVariable *FTRTTIProxy =
+-          CGM.GetOrCreateRTTIProxyGlobalVariable(FTRTTIConst);
+-      llvm::LLVMContext &Ctx = Fn->getContext();
+-      llvm::MDBuilder MDB(Ctx);
+-      Fn->setMetadata(llvm::LLVMContext::MD_func_sanitize,
+-                      MDB.createRTTIPointerPrologue(PrologueSig, FTRTTIProxy));
+-      CGM.addCompilerUsedGlobal(FTRTTIProxy);
++      llvm::Constant *FTRTTIConstEncoded =
++          EncodeAddrForUseInPrologue(Fn, FTRTTIConst);
++      llvm::Constant *PrologueStructElems[] = {PrologueSig, FTRTTIConstEncoded};
++      llvm::Constant *PrologueStructConst =
++          llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true);
++      Fn->setPrologueData(PrologueStructConst);
+     }
+   }
+ 
+diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
+index 046b249b..df99cd9a 100644
+--- a/lib/CodeGen/CodeGenFunction.h
++++ b/lib/CodeGen/CodeGenFunction.h
+@@ -2351,6 +2351,10 @@ public:
+   /// XRay typed event handling calls.
+   bool AlwaysEmitXRayTypedEvents() const;
+ 
++  /// Encode an address into a form suitable for use in a function prologue.
++  llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F,
++                                             llvm::Constant *Addr);
++
+   /// Decode an address used in a function prologue, encoded by \c
+   /// EncodeAddrForUseInPrologue.
+   llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F,
+diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
+index 58eef1b0..6c95dd61 100644
+--- a/lib/CodeGen/CodeGenModule.cpp
++++ b/lib/CodeGen/CodeGenModule.cpp
+@@ -82,6 +82,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
+   case TargetCXXABI::GenericARM:
+   case TargetCXXABI::iOS:
+   case TargetCXXABI::WatchOS:
++  case TargetCXXABI::GenericLoongArch:
+   case TargetCXXABI::GenericMIPS:
+   case TargetCXXABI::GenericItanium:
+   case TargetCXXABI::WebAssembly:
+@@ -1826,22 +1827,6 @@ CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) {
+   return MostBases.takeVector();
+ }
+ 
+-llvm::GlobalVariable *
+-CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) {
+-  auto It = RTTIProxyMap.find(Addr);
+-  if (It != RTTIProxyMap.end())
+-    return It->second;
+-
+-  auto *FTRTTIProxy = new llvm::GlobalVariable(
+-      TheModule, Addr->getType(),
+-      /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Addr,
+-      "__llvm_rtti_proxy");
+-  FTRTTIProxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+-
+-  RTTIProxyMap[Addr] = FTRTTIProxy;
+-  return FTRTTIProxy;
+-}
+-
+ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
+                                                            llvm::Function *F) {
+   llvm::AttrBuilder B(F->getContext());
+diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
+index 3a9d542e..a8a63c8d 100644
+--- a/lib/CodeGen/CodeGenModule.h
++++ b/lib/CodeGen/CodeGenModule.h
+@@ -561,8 +561,6 @@ private:
+   MetadataTypeMap VirtualMetadataIdMap;
+   MetadataTypeMap GeneralizedMetadataIdMap;
+ 
+-  llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap;
+-
+ public:
+   CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
+                 const PreprocessorOptions &ppopts,
+@@ -1413,9 +1411,6 @@ public:
+   std::vector<const CXXRecordDecl *>
+   getMostBaseClasses(const CXXRecordDecl *RD);
+ 
+-  llvm::GlobalVariable *
+-  GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr);
+-
+   /// Get the declaration of std::terminate for the platform.
+   llvm::FunctionCallee getTerminateFn();
+ 
+diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
+index 2979d92c..5ef50a16 100644
+--- a/lib/CodeGen/ItaniumCXXABI.cpp
++++ b/lib/CodeGen/ItaniumCXXABI.cpp
+@@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
+     return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
+                              /*UseARMGuardVarABI=*/true);
+ 
++  case TargetCXXABI::GenericLoongArch:
++    return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
++
+   case TargetCXXABI::GenericMIPS:
+     return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
+ 
+diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
+index d83bc9e5..2ce2573a 100644
+--- a/lib/CodeGen/TargetInfo.cpp
++++ b/lib/CodeGen/TargetInfo.cpp
+@@ -11242,6 +11242,557 @@ public:
+ };
+ } // namespace
+ 
++//===----------------------------------------------------------------------===//
++// LoongArch ABI Implementation
++//===----------------------------------------------------------------------===//
++
++namespace {
++class LoongArchABIInfo : public DefaultABIInfo {
++private:
++  // Size of the integer ('r') registers in bits.
++  unsigned GRLen;
++  // Size of the floating point ('f') registers in bits. Note that the target
++  // ISA might have a wider FRLen than the selected ABI.
++  unsigned FRLen;
++  static const int NumArgGPRs = 8;
++  static const int NumArgFPRs = 8;
++  bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
++                                      llvm::Type *&Field1Ty,
++                                      CharUnits &Field1Off,
++                                      llvm::Type *&Field2Ty,
++                                      CharUnits &Field2Off) const;
++
++public:
++  LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen)
++      : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {}
++
++  // DefaultABIInfo's classifyReturnType and classifyArgumentType are
++  // non-virtual, but computeInfo is virtual, so we overload it.
++  void computeInfo(CGFunctionInfo &FI) const override;
++
++  ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
++                                  int &ArgFPRsLeft) const;
++  ABIArgInfo classifyReturnType(QualType RetTy) const;
++
++  uint64_t MinABIStackAlignInBytes = 8;
++  uint64_t StackAlignInBytes = 16;
++  llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
++  llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
++  void CoerceToIntArgs(uint64_t TySize,
++                       SmallVectorImpl<llvm::Type *> &ArgList) const;
++
++  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
++                    QualType Ty) const override;
++
++  ABIArgInfo extendType(QualType Ty) const;
++
++  bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
++                                CharUnits &Field1Off, llvm::Type *&Field2Ty,
++                                CharUnits &Field2Off, int &NeededArgGPRs,
++                                int &NeededArgFPRs) const;
++  ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
++                                               CharUnits Field1Off,
++                                               llvm::Type *Field2Ty,
++                                               CharUnits Field2Off) const;
++};
++} // end anonymous namespace
++
++void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const {
++  QualType RetTy = FI.getReturnType();
++  if (!getCXXABI().classifyReturnType(FI))
++    FI.getReturnInfo() = classifyReturnType(RetTy);
++
++  // IsRetIndirect is true if classifyArgumentType indicated the value should
++  // be passed indirect or if the type size is greater than 2*grlen.
++  bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect ||
++                       getContext().getTypeSize(RetTy) > (2 * GRLen);
++
++  // We must track the number of GPRs used in order to conform to the LoongArch
++  // ABI, as integer scalars passed in registers should have signext/zeroext
++  // when promoted, but are anyext if passed on the stack. As GPR usage is
++  // different for variadic arguments, we must also track whether we are
++  // examining a vararg or not.
++  int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
++  int ArgFPRsLeft = FRLen ? NumArgFPRs : 0;
++  int NumFixedArgs = FI.getNumRequiredArgs();
++
++  int ArgNum = 0;
++  for (auto &ArgInfo : FI.arguments()) {
++    bool IsFixed = ArgNum < NumFixedArgs;
++    ArgInfo.info =
++        classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
++    ArgNum++;
++  }
++}
++
++// Returns true if the struct is a potential candidate for the floating point
++// calling convention. If this function returns true, the caller is
++// responsible for checking that if there is only a single field then that
++// field is a float.
++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
++                                                  llvm::Type *&Field1Ty,
++                                                  CharUnits &Field1Off,
++                                                  llvm::Type *&Field2Ty,
++                                                  CharUnits &Field2Off) const {
++  bool IsInt = Ty->isIntegralOrEnumerationType();
++  bool IsFloat = Ty->isRealFloatingType();
++
++  if (IsInt || IsFloat) {
++    uint64_t Size = getContext().getTypeSize(Ty);
++    if (IsInt && Size > GRLen)
++      return false;
++    // Can't be eligible if larger than the FP registers. Half precision isn't
++    // currently supported on LoongArch and the ABI hasn't been confirmed, so
++    // default to the integer ABI in that case.
++    if (IsFloat && (Size > FRLen || Size < 32))
++      return false;
++    // Can't be eligible if an integer type was already found (int+int pairs
++    // are not eligible).
++    if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
++      return false;
++    if (!Field1Ty) {
++      Field1Ty = CGT.ConvertType(Ty);
++      Field1Off = CurOff;
++      return true;
++    }
++    if (!Field2Ty) {
++      Field2Ty = CGT.ConvertType(Ty);
++      Field2Off = CurOff;
++      return true;
++    }
++    return false;
++  }
++
++  if (auto CTy = Ty->getAs<ComplexType>()) {
++    if (Field1Ty)
++      return false;
++    QualType EltTy = CTy->getElementType();
++    if (getContext().getTypeSize(EltTy) > FRLen)
++      return false;
++    Field1Ty = CGT.ConvertType(EltTy);
++    Field1Off = CurOff;
++    assert(CurOff.isZero() && "Unexpected offset for first field");
++    Field2Ty = Field1Ty;
++    Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
++    return true;
++  }
++
++  if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
++    uint64_t ArraySize = ATy->getSize().getZExtValue();
++    QualType EltTy = ATy->getElementType();
++    CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
++    for (uint64_t i = 0; i < ArraySize; ++i) {
++      bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
++                                                Field1Off, Field2Ty, Field2Off);
++      if (!Ret)
++        return false;
++      CurOff += EltSize;
++    }
++    return true;
++  }
++
++  if (const auto *RTy = Ty->getAs<RecordType>()) {
++    // Structures with either a non-trivial destructor or a non-trivial
++    // copy constructor are not eligible for the FP calling convention.
++    if (getRecordArgABI(Ty, CGT.getCXXABI()))
++      return false;
++    if (isEmptyRecord(getContext(), Ty, true))
++      return true;
++    const RecordDecl *RD = RTy->getDecl();
++    // Unions aren't eligible unless they're empty (which is caught above).
++    if (RD->isUnion())
++      return false;
++    const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
++    // If this is a C++ record, check the bases first.
++    if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
++      for (const CXXBaseSpecifier &B : CXXRD->bases()) {
++        const auto *BDecl =
++            cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
++        CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
++        bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
++                                                  Field1Ty, Field1Off, Field2Ty,
++                                                  Field2Off);
++        if (!Ret)
++          return false;
++      }
++    }
++    int ZeroWidthBitFieldCount = 0;
++    for (const FieldDecl *FD : RD->fields()) {
++      uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
++      QualType QTy = FD->getType();
++      if (FD->isBitField()) {
++        unsigned BitWidth = FD->getBitWidthValue(getContext());
++        // Allow a bitfield with a type greater than GRLen as long as the
++        // bitwidth is GRLen or less.
++        if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen)
++          QTy = getContext().getIntTypeForBitwidth(GRLen, false);
++        if (BitWidth == 0) {
++          ZeroWidthBitFieldCount++;
++          continue;
++        }
++      }
++
++      bool Ret = detectFPCCEligibleStructHelper(
++          QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
++          Field1Ty, Field1Off, Field2Ty, Field2Off);
++      if (!Ret)
++        return false;
++
++      // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
++      // or int+fp structs, but are ignored for a struct with an fp field and
++      // any number of zero-width bitfields.
++      if (Field2Ty && ZeroWidthBitFieldCount > 0)
++        return false;
++    }
++    return Field1Ty != nullptr;
++  }
++
++  return false;
++}
++
++// Determine if a struct is eligible for passing according to the floating
++// point calling convention (i.e., when flattened it contains a single fp
++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
++// NeededArgGPRs are incremented appropriately.
++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
++                                            CharUnits &Field1Off,
++                                            llvm::Type *&Field2Ty,
++                                            CharUnits &Field2Off,
++                                            int &NeededArgGPRs,
++                                            int &NeededArgFPRs) const {
++  Field1Ty = nullptr;
++  Field2Ty = nullptr;
++  NeededArgGPRs = 0;
++  NeededArgFPRs = 0;
++  bool IsCandidate = detectFPCCEligibleStructHelper(
++      Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
++  // Not really a candidate if we have a single int but no float.
++  if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
++    return IsCandidate = false;
++  if (!IsCandidate)
++    return false;
++  if (Field1Ty && Field1Ty->isFloatingPointTy())
++    NeededArgFPRs++;
++  else if (Field1Ty)
++    NeededArgGPRs++;
++  if (Field2Ty && Field2Ty->isFloatingPointTy())
++    NeededArgFPRs++;
++  else if (Field2Ty)
++    NeededArgGPRs++;
++  return IsCandidate;
++}
++
++// Call getCoerceAndExpand for the two-element flattened struct described by
++// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
++// appropriate coerceToType and unpaddedCoerceToType.
++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct(
++    llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
++    CharUnits Field2Off) const {
++  SmallVector<llvm::Type *, 3> CoerceElts;
++  SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
++  if (!Field1Off.isZero())
++    CoerceElts.push_back(llvm::ArrayType::get(
++        llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
++
++  CoerceElts.push_back(Field1Ty);
++  UnpaddedCoerceElts.push_back(Field1Ty);
++
++  if (!Field2Ty) {
++    return ABIArgInfo::getCoerceAndExpand(
++        llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
++        UnpaddedCoerceElts[0]);
++  }
++
++  CharUnits Field2Align =
++      CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
++  CharUnits Field1Size =
++      CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
++  CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align);
++
++  CharUnits Padding = CharUnits::Zero();
++  if (Field2Off > Field2OffNoPadNoPack)
++    Padding = Field2Off - Field2OffNoPadNoPack;
++  else if (Field2Off != Field2Align && Field2Off > Field1Size)
++    Padding = Field2Off - Field1Size;
++
++  bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
++
++  if (!Padding.isZero())
++    CoerceElts.push_back(llvm::ArrayType::get(
++        llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
++
++  CoerceElts.push_back(Field2Ty);
++  UnpaddedCoerceElts.push_back(Field2Ty);
++
++  auto CoerceToType =
++      llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
++  auto UnpaddedCoerceToType =
++      llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
++
++  return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
++}
++
++void LoongArchABIInfo::CoerceToIntArgs(
++    uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
++  llvm::IntegerType *IntTy =
++    llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
++
++  // Add (TySize / MinABIStackAlignInBytes) args of IntTy.
++  for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
++    ArgList.push_back(IntTy);
++
++  // If necessary, add one more integer type to ArgList.
++  unsigned R = TySize % (MinABIStackAlignInBytes * 8);
++
++  if (R)
++    ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
++}
++
++llvm::Type*  LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
++  SmallVector<llvm::Type*, 8> ArgList, IntArgList;
++
++  if (Ty->isComplexType())
++    return CGT.ConvertType(Ty);
++
++  const RecordType *RT = Ty->getAs<RecordType>();
++
++  // Unions/vectors are passed in integer registers.
++  if (!RT || !RT->isStructureOrClassType()) {
++    CoerceToIntArgs(TySize, ArgList);
++    return llvm::StructType::get(getVMContext(), ArgList);
++  }
++
++  const RecordDecl *RD = RT->getDecl();
++  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
++  assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
++
++  uint64_t LastOffset = 0;
++  unsigned idx = 0;
++  llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
++
++  // Iterate over fields in the struct/class and check if there are any aligned
++  // double fields.
++  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
++       i != e; ++i, ++idx) {
++    const QualType Ty = i->getType();
++    const BuiltinType *BT = Ty->getAs<BuiltinType>();
++
++    if (!BT || BT->getKind() != BuiltinType::Double)
++      continue;
++
++    uint64_t Offset = Layout.getFieldOffset(idx);
++    if (Offset % 64) // Ignore doubles that are not aligned.
++      continue;
++
++    // Add ((Offset - LastOffset) / 64) args of type i64.
++    for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
++      ArgList.push_back(I64);
++
++    // Add double type.
++    ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
++    LastOffset = Offset + 64;
++  }
++
++  CoerceToIntArgs(TySize - LastOffset, IntArgList);
++  ArgList.append(IntArgList.begin(), IntArgList.end());
++
++  return llvm::StructType::get(getVMContext(), ArgList);
++}
++
++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset,
++                                        uint64_t Offset) const {
++  if (OrigOffset + MinABIStackAlignInBytes > Offset)
++    return nullptr;
++
++  return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
++}
++
++ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
++                                              int &ArgGPRsLeft,
++                                              int &ArgFPRsLeft) const {
++  assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
++  Ty = useFirstFieldIfTransparentUnion(Ty);
++
++  // Structures with either a non-trivial destructor or a non-trivial
++  // copy constructor are always passed indirectly.
++  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
++    if (ArgGPRsLeft)
++      ArgGPRsLeft -= 1;
++    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
++                                           CGCXXABI::RAA_DirectInMemory);
++  }
++
++  // Ignore empty structs/unions.
++  if (isEmptyRecord(getContext(), Ty, true))
++    return ABIArgInfo::getIgnore();
++
++  uint64_t Size = getContext().getTypeSize(Ty);
++
++  // Pass floating point values via FPRs if possible.
++  if (IsFixed && Ty->isFloatingType() && FRLen >= Size && ArgFPRsLeft) {
++    ArgFPRsLeft--;
++    return ABIArgInfo::getDirect();
++  }
++
++  // Complex types for the hard float ABI must be passed direct rather than
++  // using CoerceAndExpand.
++  if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) {
++    QualType EltTy = Ty->getAs<ComplexType>()->getElementType();
++    if (getContext().getTypeSize(EltTy) <= FRLen) {
++      ArgFPRsLeft -= 2;
++      return ABIArgInfo::getDirect();
++    }
++  }
++
++  if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) &&
++                              (getTarget().hasFeature("lsx"))) ||
++                             ((getContext().getTypeSize(Ty) == 256) &&
++                              getTarget().hasFeature("lasx"))))
++    return ABIArgInfo::getDirect();
++
++  if (IsFixed && FRLen && Ty->isStructureOrClassType()) {
++    llvm::Type *Field1Ty = nullptr;
++    llvm::Type *Field2Ty = nullptr;
++    CharUnits Field1Off = CharUnits::Zero();
++    CharUnits Field2Off = CharUnits::Zero();
++    int NeededArgGPRs;
++    int NeededArgFPRs;
++    bool IsCandidate =
++        detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
++                                 NeededArgGPRs, NeededArgFPRs);
++    if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
++        NeededArgFPRs <= ArgFPRsLeft) {
++      ArgGPRsLeft -= NeededArgGPRs;
++      ArgFPRsLeft -= NeededArgFPRs;
++      return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
++                                               Field2Off);
++    }
++  } else if (Ty->isStructureOrClassType() && Size == 128 &&
++             isAggregateTypeForABI(Ty)) {
++    uint64_t Offset = 8;
++    uint64_t OrigOffset = Offset;
++    uint64_t TySize = getContext().getTypeSize(Ty);
++    uint64_t Align = getContext().getTypeAlign(Ty) / 8;
++
++    Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
++                     (uint64_t)StackAlignInBytes);
++    unsigned CurrOffset = llvm::alignTo(Offset, Align);
++    Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
++
++    ABIArgInfo ArgInfo =
++        ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
++                              getPaddingType(OrigOffset, CurrOffset));
++    ArgInfo.setInReg(true);
++    return ArgInfo;
++  }
++
++  uint64_t NeededAlign = getContext().getTypeAlign(Ty);
++  // Determine the number of GPRs needed to pass the current argument
++  // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned"
++  // register pairs, so may consume 3 registers.
++  int NeededArgGPRs = 1;
++  if (!IsFixed && NeededAlign == 2 * GRLen)
++    NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
++  else if (Size > GRLen && Size <= 2 * GRLen)
++    NeededArgGPRs = 2;
++
++  if (NeededArgGPRs > ArgGPRsLeft) {
++    NeededArgGPRs = ArgGPRsLeft;
++  }
++
++  ArgGPRsLeft -= NeededArgGPRs;
++
++  if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
++    // Treat an enum type as its underlying type.
++    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
++      Ty = EnumTy->getDecl()->getIntegerType();
++
++    // All integral types are promoted to GRLen width, unless passed on the
++    // stack.
++    if (Size < GRLen && Ty->isIntegralOrEnumerationType()) {
++      return extendType(Ty);
++    }
++
++    return ABIArgInfo::getDirect();
++  }
++
++  // Aggregates which are <= 2*GRLen will be passed in registers if possible,
++  // so coerce to integers.
++  if (Size <= 2 * GRLen) {
++    unsigned Alignment = getContext().getTypeAlign(Ty);
++
++    // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is
++    // required, and a 2-element GRLen array if only GRLen alignment is required.
++    if (Size <= GRLen) {
++      return ABIArgInfo::getDirect(
++          llvm::IntegerType::get(getVMContext(), GRLen));
++    } else if (Alignment == 2 * GRLen) {
++      return ABIArgInfo::getDirect(
++          llvm::IntegerType::get(getVMContext(), 2 * GRLen));
++    } else {
++      return ABIArgInfo::getDirect(llvm::ArrayType::get(
++          llvm::IntegerType::get(getVMContext(), GRLen), 2));
++    }
++  }
++  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
++}
++
++ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const {
++  if (RetTy->isVoidType())
++    return ABIArgInfo::getIgnore();
++
++  int ArgGPRsLeft = 2;
++  int ArgFPRsLeft = FRLen ? 2 : 0;
++
++  // The rules for return and argument types are the same, so defer to
++  // classifyArgumentType.
++  return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
++                              ArgFPRsLeft);
++}
++
++Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
++                                    QualType Ty) const {
++  CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8);
++
++  // Empty records are ignored for parameter passing purposes.
++  if (isEmptyRecord(getContext(), Ty, true)) {
++    Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize);
++    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
++    return Addr;
++  }
++
++  auto TInfo = getContext().getTypeInfoInChars(Ty);
++
++  // Arguments bigger than 2*GRlen bytes are passed indirectly.
++  bool IsIndirect = TInfo.Width > 2 * SlotSize;
++
++  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
++                          SlotSize, /*AllowHigherAlign=*/true);
++}
++
++ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const {
++  int TySize = getContext().getTypeSize(Ty);
++  // LP64 ABI requires unsigned 32 bit integers to be sign extended.
++  if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
++    return ABIArgInfo::getSignExtend(Ty);
++  return ABIArgInfo::getExtend(Ty);
++}
++
++namespace {
++class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo {
++public:
++  LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen,
++                         unsigned FRLen)
++      : TargetCodeGenInfo(std::make_unique<LoongArchABIInfo>(
++                          CGT, GRLen, FRLen)) {}
++
++  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
++                           CodeGen::CodeGenModule &CGM) const override {
++    return;
++  }
++};
++} // namespace
++
+ //===----------------------------------------------------------------------===//
+ // VE ABI Implementation.
+ //
+@@ -11320,6 +11871,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
+ 
+   case llvm::Triple::le32:
+     return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
++
+   case llvm::Triple::m68k:
+     return SetCGInfo(new M68kTargetCodeGenInfo(Types));
+   case llvm::Triple::mips:
+@@ -11437,6 +11989,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
+   case llvm::Triple::msp430:
+     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
+ 
++  case llvm::Triple::loongarch64:
++    return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64));
++
+   case llvm::Triple::riscv32:
+   case llvm::Triple::riscv64: {
+     StringRef ABIStr = getTarget().getABI();
+diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt
+index 78e8fd18..90454cb1 100644
+--- a/lib/Driver/CMakeLists.txt
++++ b/lib/Driver/CMakeLists.txt
+@@ -26,6 +26,7 @@ add_clang_library(clangDriver
+   ToolChain.cpp
+   ToolChains/Arch/AArch64.cpp
+   ToolChains/Arch/ARM.cpp
++  ToolChains/Arch/LoongArch.cpp
+   ToolChains/Arch/M68k.cpp
+   ToolChains/Arch/Mips.cpp
+   ToolChains/Arch/PPC.cpp
+diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
+index 3bfddeef..0e13c31c 100644
+--- a/lib/Driver/Driver.cpp
++++ b/lib/Driver/Driver.cpp
+@@ -617,6 +617,22 @@ static llvm::Triple computeTargetTriple(const Driver &D,
+     Target.setVendorName("intel");
+   }
+ 
++  // If target is LoongArch adjust the target triple
++  // accordingly to provided ABI name.
++  A = Args.getLastArg(options::OPT_mabi_EQ);
++  if (A && Target.isLoongArch()) {
++    StringRef ABIName = A->getValue();
++    if (ABIName == "ilp32d" || ABIName == "ilp32f" || ABIName == "ilp32s") {
++      // TODO
++      llvm_unreachable("Unimplemented ABI");
++    } else if (ABIName == "lp64d") {
++      Target = Target.get64BitArchVariant();
++      if (Target.getEnvironment() == llvm::Triple::GNU ||
++          Target.getEnvironment() == llvm::Triple::GNUABILPX32)
++        Target.setEnvironment(llvm::Triple::GNUABI64);
++    }
++  }
++
+   // If target is MIPS adjust the target triple
+   // accordingly to provided ABI name.
+   A = Args.getLastArg(options::OPT_mabi_EQ);
+diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
+index 96cef9eb..403fac76 100644
+--- a/lib/Driver/SanitizerArgs.cpp
++++ b/lib/Driver/SanitizerArgs.cpp
+@@ -367,19 +367,6 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
+         Add &= ~NotAllowedWithMinimalRuntime;
+       }
+ 
+-      if (llvm::opt::Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
+-        StringRef CM = A->getValue();
+-        if (CM != "small" &&
+-            (Add & SanitizerKind::Function & ~DiagnosedKinds)) {
+-          if (DiagnoseErrors)
+-            D.Diag(diag::err_drv_argument_only_allowed_with)
+-                << "-fsanitize=function"
+-                << "-mcmodel=small";
+-          Add &= ~SanitizerKind::Function;
+-          DiagnosedKinds |= SanitizerKind::Function;
+-        }
+-      }
+-
+       // FIXME: Make CFI on member function calls compatible with cross-DSO CFI.
+       // There are currently two problems:
+       // - Virtual function call checks need to pass a pointer to the function
+diff --git a/lib/Driver/ToolChains/Arch/LoongArch.cpp b/lib/Driver/ToolChains/Arch/LoongArch.cpp
+new file mode 100644
+index 00000000..0dcec221
+--- /dev/null
++++ b/lib/Driver/ToolChains/Arch/LoongArch.cpp
+@@ -0,0 +1,179 @@
++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "ToolChains/CommonArgs.h"
++#include "clang/Driver/Driver.h"
++#include "clang/Driver/DriverDiagnostic.h"
++#include "clang/Driver/Options.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/Option/ArgList.h"
++
++using namespace clang::driver;
++using namespace clang::driver::tools;
++using namespace clang;
++using namespace llvm::opt;
++
++// Get CPU and ABI names. They are not independent
++// so we have to calculate them together.
++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
++                            StringRef &CPUName, StringRef &ABIName) {
++  const char *DefLoongArch32CPU = "generic-la32";
++  const char *DefLoongArch64CPU = "la464";
++
++  if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ,
++                               options::OPT_mcpu_EQ))
++    CPUName = A->getValue();
++
++  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
++    ABIName = A->getValue();
++
++  // Setup default CPU and ABI names.
++  if (CPUName.empty() && ABIName.empty()) {
++    switch (Triple.getArch()) {
++    default:
++      llvm_unreachable("Unexpected triple arch name");
++    case llvm::Triple::loongarch32:
++      CPUName = DefLoongArch32CPU;
++      break;
++    case llvm::Triple::loongarch64:
++      CPUName = DefLoongArch64CPU;
++      break;
++    }
++  }
++
++  if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32))
++    ABIName = "lpx32";
++
++  if (ABIName.empty()) {
++    ABIName = llvm::StringSwitch<const char *>(CPUName)
++                  .Case("generic-la32", "ilp32d")
++                  .Cases("la464", "generic-la64", "lp64d")
++                  .Default(Triple.isLoongArch32() ? "ilp32d" : "lp64d");
++  }
++
++  if (CPUName.empty()) {
++    // Deduce CPU name from ABI name.
++    CPUName = llvm::StringSwitch<const char *>(ABIName)
++                  .Cases("lp64d", "lp64f", "lp64s", DefLoongArch64CPU)
++                  .Default("");
++  }
++
++  if (Arg *A = Args.getLastArg(options::OPT_msingle_float,
++                               options::OPT_mdouble_float,
++                               options::OPT_msoft_float)) {
++    if (A->getOption().matches(options::OPT_msingle_float))
++      ABIName = "lp64f";
++    else if (A->getOption().matches(options::OPT_mdouble_float))
++      ABIName = "lp64d";
++    else
++      ABIName = "lp64s";
++  }
++
++  // FIXME: Warn on inconsistent use of -march and -mabi.
++}
++
++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args,
++                                      const llvm::Triple &Triple) {
++  StringRef CPUName, ABIName;
++  tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++  return llvm::StringSwitch<std::string>(ABIName)
++      .Cases("ilp32d", "ilp32f", "ilp32s", "32")
++      .Cases("lp64d", "lp64f", "lp64s", "64");
++}
++
++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple,
++                                 const ArgList &Args,
++                                 std::vector<StringRef> &Features) {
++  StringRef CPUName;
++  StringRef ABIName;
++  StringRef FPUValue;
++  getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++
++  bool NonPIC = false;
++
++  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
++                                    options::OPT_fpic, options::OPT_fno_pic,
++                                    options::OPT_fPIE, options::OPT_fno_PIE,
++                                    options::OPT_fpie, options::OPT_fno_pie);
++  if (LastPICArg) {
++    Option O = LastPICArg->getOption();
++    NonPIC =
++        (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) ||
++         O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie));
++  }
++
++  if (NonPIC) {
++    NonPIC = false;
++  }
++
++  AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx,
++                   "lsx");
++  AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx,
++                   "lasx");
++
++  AddTargetFeature(Args, Features, options::OPT_munaligned_access,
++                   options::OPT_mno_unaligned_access, "unaligned-access");
++  if (Arg *A = Args.getLastArg(options::OPT_mfpu_EQ))
++    FPUValue = A->getValue();
++
++  if (Arg *A = Args.getLastArg(options::OPT_msingle_float,
++                               options::OPT_mdouble_float,
++                               options::OPT_msoft_float)) {
++    if (A->getOption().matches(options::OPT_msingle_float))
++      FPUValue = "32";
++    else if (A->getOption().matches(options::OPT_mdouble_float))
++      FPUValue = "64";
++    else
++      FPUValue = "none";
++  }
++
++  // Setup feature.
++  if (FPUValue.empty())
++    Features.push_back("+d");
++  else {
++    if (FPUValue == "64")
++      Features.push_back("+d");
++    else if (FPUValue == "32")
++      Features.push_back("+f");
++    else if (FPUValue == "none") {
++      Features.push_back("-f");
++      Features.push_back("-d");
++    } else
++      D.Diag(clang::diag::err_drv_invalid_loongarch_mfpu)
++          << FPUValue;
++  }
++
++  // lp64f ABI and -mfpu=none are incompatible.
++  if (hasLoongArchAbiArg(Args, "lp64f") && hasLoongArchFpuArg(Args, "none")) {
++    D.Diag(clang::diag::err_opt_not_valid_with_opt) << "lp64f"
++                                                    << "-mfpu=none";
++  }
++
++  // Also lp64d ABI is only compatible with -mfpu=64.
++  if ((hasLoongArchAbiArg(Args, "lp64d") || ABIName == "lp64d") &&
++      (hasLoongArchFpuArg(Args, "none") || hasLoongArchFpuArg(Args, "32"))) {
++    D.Diag(clang::diag::err_opt_not_valid_without_opt) << "lp64d"
++                                                       << "-mfpu=64";
++  }
++}
++
++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) {
++  Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
++  return A && (A->getValue() == StringRef(Value));
++}
++
++bool loongarch::isUCLibc(const ArgList &Args) {
++  Arg *A = Args.getLastArg(options::OPT_m_libc_Group);
++  return A && A->getOption().matches(options::OPT_muclibc);
++}
++
++bool loongarch::hasLoongArchFpuArg(const ArgList &Args, const char *Value) {
++  Arg *A = Args.getLastArg(options::OPT_mfpu_EQ);
++  return A && (A->getValue() == StringRef(Value));
++}
+diff --git a/lib/Driver/ToolChains/Arch/LoongArch.h b/lib/Driver/ToolChains/Arch/LoongArch.h
+new file mode 100644
+index 00000000..5c581ff6
+--- /dev/null
++++ b/lib/Driver/ToolChains/Arch/LoongArch.h
+@@ -0,0 +1,41 @@
++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
++
++#include "clang/Driver/Driver.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/Option/Option.h"
++#include <string>
++#include <vector>
++
++namespace clang {
++namespace driver {
++namespace tools {
++
++namespace loongarch {
++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args,
++                      const llvm::Triple &Triple, StringRef &CPUName,
++                      StringRef &ABIName);
++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple,
++                           const llvm::opt::ArgList &Args,
++                           std::vector<StringRef> &Features);
++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args,
++                                const llvm::Triple &Triple);
++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value);
++bool hasLoongArchFpuArg(const llvm::opt::ArgList &Args, const char *Value);
++bool isUCLibc(const llvm::opt::ArgList &Args);
++
++} // end namespace loongarch
++} // end namespace target
++} // end namespace driver
++} // end namespace clang
++
++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H
+diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp
+index f2f18e90..fdfde359 100644
+--- a/lib/Driver/ToolChains/Clang.cpp
++++ b/lib/Driver/ToolChains/Clang.cpp
+@@ -10,6 +10,7 @@
+ #include "AMDGPU.h"
+ #include "Arch/AArch64.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/M68k.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+@@ -331,6 +332,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+     arm::getARMTargetFeatures(D, Triple, Args, CmdArgs, Features, ForAS);
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features);
++    break;
++
+   case llvm::Triple::ppc:
+   case llvm::Triple::ppcle:
+   case llvm::Triple::ppc64:
+@@ -535,6 +541,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args,
+     // XCore never wants frame pointers, regardless of OS.
+     // WebAssembly never wants frame pointers.
+     return false;
++  case llvm::Triple::loongarch64:
++  case llvm::Triple::loongarch32:
+   case llvm::Triple::ppc:
+   case llvm::Triple::ppcle:
+   case llvm::Triple::ppc64:
+@@ -1751,6 +1759,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
+     CmdArgs.push_back("-fallow-half-arguments-and-returns");
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    AddLoongArchTargetArgs(Args, CmdArgs);
++    break;
++
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+@@ -1895,6 +1908,32 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
+   }
+ }
+ 
++void Clang::AddLoongArchTargetArgs(const ArgList &Args,
++                                   ArgStringList &CmdArgs) const {
++  const Driver &D = getToolChain().getDriver();
++  StringRef CPUName;
++  StringRef ABIName;
++  const llvm::Triple &Triple = getToolChain().getTriple();
++  loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++
++  CmdArgs.push_back("-target-abi");
++  CmdArgs.push_back(ABIName.data());
++
++  if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
++                               options::OPT_mno_check_zero_division)) {
++    if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
++      CmdArgs.push_back("-mllvm");
++      CmdArgs.push_back("-mnocheck-zero-division");
++    }
++  }
++
++  llvm::Reloc::Model RelocationModel;
++  unsigned PICLevel;
++  bool IsPIE;
++  std::tie(RelocationModel, PICLevel, IsPIE) =
++      ParsePICArgs(getToolChain(), Args);
++}
++
+ void Clang::AddMIPSTargetArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
+   const Driver &D = getToolChain().getDriver();
+@@ -7627,6 +7666,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args,
+ 
+ // Begin ClangAs
+ 
++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args,
++                                     ArgStringList &CmdArgs) const {
++  StringRef CPUName;
++  StringRef ABIName;
++  const llvm::Triple &Triple = getToolChain().getTriple();
++  loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++
++  CmdArgs.push_back("-target-abi");
++  CmdArgs.push_back(ABIName.data());
++}
++
+ void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
+                                 ArgStringList &CmdArgs) const {
+   StringRef CPUName;
+@@ -7816,6 +7866,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
+   default:
+     break;
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    AddLoongArchTargetArgs(Args, CmdArgs);
++    break;
++
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h
+index 79407c98..ba59f751 100644
+--- a/lib/Driver/ToolChains/Clang.h
++++ b/lib/Driver/ToolChains/Clang.h
+@@ -57,6 +57,8 @@ private:
+                         bool KernelOrKext) const;
+   void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const;
++  void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
++                              llvm::opt::ArgStringList &CmdArgs) const;
+   void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+   void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
+@@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool {
+ public:
+   ClangAs(const ToolChain &TC)
+       : Tool("clang::as", "clang integrated assembler", TC) {}
++  void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
++                              llvm::opt::ArgStringList &CmdArgs) const;
+   void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+   void AddX86TargetArgs(const llvm::opt::ArgList &Args,
+diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp
+index 8f9244ca..a8bcf851 100644
+--- a/lib/Driver/ToolChains/CommonArgs.cpp
++++ b/lib/Driver/ToolChains/CommonArgs.cpp
+@@ -9,6 +9,7 @@
+ #include "CommonArgs.h"
+ #include "Arch/AArch64.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/M68k.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+@@ -383,6 +384,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args,
+       return A->getValue();
+     return "";
+ 
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64: {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName);
++    return std::string(CPUName);
++  }
++
+   case llvm::Triple::m68k:
+     return m68k::getM68kTargetCPU(Args);
+ 
+@@ -1321,6 +1330,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
+   if ((ROPI || RWPI) && (PIC || PIE))
+     ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic);
+ 
++  if (Triple.isLoongArch()) {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++    // When targeting the LP64D ABI, PIC is the default.
++    if (ABIName == "lp64d")
++      PIC = true;
++    // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot,
++    // does not use PIC level 2 for historical reasons.
++    IsPICLevelTwo = false;
++  }
++
+   if (Triple.isMIPS()) {
+     StringRef CPUName;
+     StringRef ABIName;
+diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp
+index 7a9570a6..c0aa8fef 100644
+--- a/lib/Driver/ToolChains/Gnu.cpp
++++ b/lib/Driver/ToolChains/Gnu.cpp
+@@ -8,6 +8,7 @@
+ 
+ #include "Gnu.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+ #include "Arch/RISCV.h"
+@@ -254,6 +255,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
+   case llvm::Triple::armeb:
+   case llvm::Triple::thumbeb:
+     return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi";
++  case llvm::Triple::loongarch32:
++    return "elf32loongarch";
++  case llvm::Triple::loongarch64:
++    return "elf64loongarch";
+   case llvm::Triple::m68k:
+     return "m68kelf";
+   case llvm::Triple::ppc:
+@@ -822,6 +827,55 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
+ 
+     break;
+   }
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64: {
++    StringRef CPUName;
++    StringRef ABIName;
++    loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
++
++    //FIXME: Currently gnu as doesn't support -march
++    //CmdArgs.push_back("-march=loongarch");
++    //CmdArgs.push_back(CPUName.data());
++
++    CmdArgs.push_back("-mabi=lp64d");
++
++    // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
++    // or -mshared (not implemented) is in effect.
++    if (RelocationModel == llvm::Reloc::Static)
++      CmdArgs.push_back("-mno-shared");
++
++    break;
++
++    // Add the last -mfp32/-mfp64.
++    if (Arg *A = Args.getLastArg(options::OPT_mfp32,
++                                 options::OPT_mfp64)) {
++      A->claim();
++      A->render(Args, CmdArgs);
++    }
++
++    if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) {
++      // Do not use AddLastArg because not all versions of LoongArch assembler
++      // support -mlsx / -mno-lsx options.
++      if (A->getOption().matches(options::OPT_mlsx))
++        CmdArgs.push_back(Args.MakeArgString("-mlsx"));
++    }
++
++    if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) {
++      // Do not use AddLastArg because not all versions of LoongArch assembler
++      // support -mlasx / -mno-lasx options.
++      if (A->getOption().matches(options::OPT_mlasx))
++        CmdArgs.push_back(Args.MakeArgString("-mlasx"));
++    }
++
++    Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
++                    options::OPT_msoft_float);
++
++    Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
++                    options::OPT_msingle_float);
++
++    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
++    break;
++  }
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+   case llvm::Triple::mips64:
+@@ -2185,6 +2239,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
+       "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
+       "s390x-suse-linux", "s390x-redhat-linux"};
+ 
++  static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"};
++  static const char *const LoongArch64Triples[] = {
++      "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu",
++      "loongarch64-loongson-linux-gnu"};
+ 
+   using std::begin;
+   using std::end;
+@@ -2353,6 +2411,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
+       BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
+     }
+     break;
++  case llvm::Triple::loongarch64:
++    LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs));
++    TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples));
++    break;
+   case llvm::Triple::m68k:
+     LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs));
+     TripleAliases.append(begin(M68kTriples), end(M68kTriples));
+@@ -2708,6 +2770,7 @@ bool Generic_GCC::isPICDefault() const {
+   switch (getArch()) {
+   case llvm::Triple::x86_64:
+     return getTriple().isOSWindows();
++  case llvm::Triple::loongarch64:
+   case llvm::Triple::mips64:
+   case llvm::Triple::mips64el:
+     return true;
+@@ -2750,6 +2813,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
+   case llvm::Triple::mips64el:
+   case llvm::Triple::msp430:
+   case llvm::Triple::m68k:
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
+     return true;
+   case llvm::Triple::sparc:
+   case llvm::Triple::sparcel:
+diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
+index 83cb4115..d001dcd0 100644
+--- a/lib/Driver/ToolChains/Linux.cpp
++++ b/lib/Driver/ToolChains/Linux.cpp
+@@ -8,6 +8,7 @@
+ 
+ #include "Linux.h"
+ #include "Arch/ARM.h"
++#include "Arch/LoongArch.h"
+ #include "Arch/Mips.h"
+ #include "Arch/PPC.h"
+ #include "Arch/RISCV.h"
+@@ -85,6 +86,11 @@ std::string Linux::getMultiarchTriple(const Driver &D,
+   case llvm::Triple::aarch64_be:
+     return "aarch64_be-linux-gnu";
+ 
++  case llvm::Triple::loongarch32:
++    return "loongarch32-linux-gnu";
++  case llvm::Triple::loongarch64:
++    return "loongarch64-linux-gnu";
++
+   case llvm::Triple::m68k:
+     return "m68k-linux-gnu";
+ 
+@@ -452,6 +458,14 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
+     Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3";
+     break;
+   }
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64: {
++    StringRef CPUName, ABIName;
++    tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName);
++    LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple);
++    Loader = ("ld-linux-loongarch-" + ABIName + ".so.1").str();
++    break;
++  }
+   case llvm::Triple::m68k:
+     LibDir = "lib";
+     Loader = "ld.so.1";
+@@ -702,6 +716,7 @@ SanitizerMask Linux::getSupportedSanitizers() const {
+   const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64;
+   const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz;
+   const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon;
++  const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64;
+   SanitizerMask Res = ToolChain::getSupportedSanitizers();
+   Res |= SanitizerKind::Address;
+   Res |= SanitizerKind::PointerCompare;
+@@ -712,19 +727,20 @@ SanitizerMask Linux::getSupportedSanitizers() const {
+   Res |= SanitizerKind::Memory;
+   Res |= SanitizerKind::Vptr;
+   Res |= SanitizerKind::SafeStack;
+-  if (IsX86_64 || IsMIPS64 || IsAArch64)
++  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64)
+     Res |= SanitizerKind::DataFlow;
+   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 ||
+-      IsRISCV64 || IsSystemZ || IsHexagon)
++      IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64)
+     Res |= SanitizerKind::Leak;
+-  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ)
++  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ ||
++      IsLoongArch64)
+     Res |= SanitizerKind::Thread;
+   if (IsX86_64)
+     Res |= SanitizerKind::KernelMemory;
+   if (IsX86 || IsX86_64)
+     Res |= SanitizerKind::Function;
+   if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch ||
+-      IsPowerPC64 || IsHexagon)
++      IsPowerPC64 || IsHexagon || IsLoongArch64)
+     Res |= SanitizerKind::Scudo;
+   if (IsX86_64 || IsAArch64) {
+     Res |= SanitizerKind::HWAddress;
+diff --git a/lib/Driver/ToolChains/Linux.h b/lib/Driver/ToolChains/Linux.h
+index 3c4546cb..a5648d79 100644
+--- a/lib/Driver/ToolChains/Linux.h
++++ b/lib/Driver/ToolChains/Linux.h
+@@ -10,7 +10,6 @@
+ #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LINUX_H
+ 
+ #include "Gnu.h"
+-#include "clang/Basic/LangOptions.h"
+ #include "clang/Driver/ToolChain.h"
+ 
+ namespace clang {
+@@ -47,10 +46,6 @@ public:
+   IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList &Args) const override;
+   bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
+   bool IsMathErrnoDefault() const override;
+-  LangOptions::StackProtectorMode
+-  GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
+-    return LangOptions::SSPStrong;
+-  }
+   SanitizerMask getSupportedSanitizers() const override;
+   void addProfileRTLibs(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const override;
+diff --git a/lib/Driver/XRayArgs.cpp b/lib/Driver/XRayArgs.cpp
+index 63b57517..4e3ae3f2 100644
+--- a/lib/Driver/XRayArgs.cpp
++++ b/lib/Driver/XRayArgs.cpp
+@@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
+     case llvm::Triple::aarch64:
+     case llvm::Triple::hexagon:
+     case llvm::Triple::ppc64le:
++    case llvm::Triple::loongarch32:
++    case llvm::Triple::loongarch64:
+     case llvm::Triple::mips:
+     case llvm::Triple::mipsel:
+     case llvm::Triple::mips64:
+diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
+index 07898898..5ae7dbab 100644
+--- a/lib/Headers/CMakeLists.txt
++++ b/lib/Headers/CMakeLists.txt
+@@ -85,6 +85,7 @@ set(files
+   invpcidintrin.h
+   iso646.h
+   keylockerintrin.h
++  larchintrin.h
+   limits.h
+   lwpintrin.h
+   lzcntintrin.h
+@@ -94,6 +95,8 @@ set(files
+   module.modulemap
+   movdirintrin.h
+   msa.h
++  lsxintrin.h
++  lasxintrin.h
+   mwaitxintrin.h
+   nmmintrin.h
+   opencl-c.h
+diff --git a/lib/Headers/larchintrin.h b/lib/Headers/larchintrin.h
+new file mode 100644
+index 00000000..7e99f19a
+--- /dev/null
++++ b/lib/Headers/larchintrin.h
+@@ -0,0 +1,319 @@
++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch Base intrinsics
++//
++//===----------------------------------------------------------------------===//
++#ifndef __LOONGARCH_BASE_H
++#define __LOONGARCH_BASE_H
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef struct drdtime{
++	unsigned long dvalue;
++	unsigned long dtimeid;
++} __drdtime_t;
++
++typedef struct rdtime{
++	unsigned int value;
++	unsigned int timeid;
++} __rdtime_t;
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned int, uimm14_32 */
++#define __csrrd(/*uimm14_32*/ _1)	((unsigned int)__builtin_loongarch_csrrd(_1))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned int, uimm14_32 */
++#define __csrwr(/*unsigned int*/ _1, /*uimm14_32*/ _2)	((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2)))
++
++/* Assembly instruction format:          rd, rj, csr_num */
++/* Data types in instruction templates:  unsigned int, unsigned int, uimm14_32 */
++#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*uimm14_32*/ _3) ((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3)))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned long int, uimm14 */
++#define __dcsrrd(/*uimm14*/ _1)	((unsigned long int)__builtin_loongarch_dcsrrd(_1))
++
++/* Assembly instruction format:          rd, csr_num */
++/* Data types in instruction templates:  unsigned long int, uimm14 */
++#define __dcsrwr(/*unsigned long int*/ _1, /*uimm14*/ _2)	((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2)))
++
++/* Assembly instruction format:          rd, rj, csr_num */
++/* Data types in instruction templates:  unsigned long int, unsigned long int, uimm14 */
++#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*uimm14*/ _3) ((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3)))
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned char, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned char __iocsrrd_b(unsigned int _1)
++{
++	return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned short, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned short __iocsrrd_h(unsigned int _1)
++{
++	return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned int __iocsrrd_w(unsigned int _1)
++{
++	return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates: unsigned long int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++unsigned long int __iocsrrd_d(unsigned int _1)
++{
++	return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned char, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_b(unsigned char _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned short, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_h(unsigned short _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates:  unsigned int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_w(unsigned int _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          rd, rj */
++/* Data types in instruction templates: unsigned long int, unsigned int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __iocsrwr_d(unsigned long int _1, unsigned int _2)
++{
++	return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2);
++}
++
++/* Assembly instruction format:          op, rj, si12 */
++/* Data types in instruction templates: uimm5, unsigned int, simm12 */
++#define __cacop(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) ((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3)))
++
++/* Assembly instruction format:          op, rj, si12 */
++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */
++#define __dcacop(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3)	((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3)))
++
++#define __rdtime_d	__builtin_loongarch_rdtime_d
++#define __rdtimel_w	__builtin_loongarch_rdtimel_w
++#define __rdtimeh_w	__builtin_loongarch_rdtimeh_w
++
++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtime_d (void)
++{
++  __drdtime_t drdtime;
++  __asm__ volatile (
++    "rdtime.d\t%[val],%[tid]\n\t"
++    : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid)
++    :
++  );
++  return drdtime;
++}
++
++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtimeh_w (void)
++{
++  __rdtime_t rdtime;
++  __asm__ volatile (
++    "rdtimeh.w\t%[val],%[tid]\n\t"
++    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
++    :
++  );
++  return rdtime;
++}
++
++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++__builtin_loongarch_rdtimel_w (void)
++{
++  __rdtime_t rdtime;
++  __asm__ volatile (
++    "rdtimel.w\t%[val],%[tid]\n\t"
++    : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid)
++    :
++  );
++  return rdtime;
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, char, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_b_w(char _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, short, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_h_w(short _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_w_w(int _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates: int, long int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crc_w_d_w(long int _1, int _2)
++{
++	return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, char, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_b_w(char _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, short, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_h_w(short _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates:  int, int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_w_w(int _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2);
++}
++
++/* Assembly instruction format:          rd, rj, rk */
++/* Data types in instruction templates: int, long int, int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++int __crcc_w_d_w(long int _1, int _2)
++{
++	return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2);
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbclr()
++{
++	return (void)__builtin_loongarch_tlbclr();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbflush()
++{
++	return (void)__builtin_loongarch_tlbflush();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbfill()
++{
++	return (void)__builtin_loongarch_tlbfill();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbrd()
++{
++	return (void)__builtin_loongarch_tlbrd();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbwr()
++{
++	return (void)__builtin_loongarch_tlbwr();
++}
++
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __tlbsrch()
++{
++	return (void)__builtin_loongarch_tlbsrch();
++}
++
++/* Assembly instruction format:          code */
++/* Data types in instruction templates:  uimm15 */
++#define __syscall(/*uimm15*/ _1)	((void)__builtin_loongarch_syscall(_1))
++
++/* Assembly instruction format:          code */
++/* Data types in instruction templates:  uimm15 */
++#define __break(/*uimm15*/ _1)	((void)__builtin_loongarch_break(_1))
++
++/* Assembly instruction format:          hint */
++/* Data types in instruction templates:  uimm15 */
++#define __dbar(/*uimm15*/ _1)	((void)__builtin_loongarch_dbar(_1))
++
++/* Assembly instruction format:          hint */
++/* Data types in instruction templates:  uimm15 */
++#define __ibar(/*uimm15*/ _1)	((void)__builtin_loongarch_ibar(_1))
++
++/* Assembly instruction format:          rj, rk */
++/* Data types in instruction templates:  long int, long int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __asrtle_d(long int _1, long int _2)
++{
++	return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          rj, rk */
++/* Data types in instruction templates:  long int, long int */
++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++void __asrtgt_d(long int _1, long int _2)
++{
++	return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2);
++}
++
++#define __movfcsr2gr(uimm5) \
++({ \
++  unsigned int rd; \
++  __asm__ volatile ( \
++    "movfcsr2gr %0, $fcsr" #uimm5 \
++    : "=&r"(rd) \
++    : \
++  ); rd; \
++})
++
++#define __movgr2fcsr(uimm5, rj) \
++{ \
++  __asm__ volatile ( \
++    "movgr2fcsr $fcsr" #uimm5 ", %0" \
++    : \
++    : "r" (rj) \
++  ); \
++}
++
++#ifdef __cplusplus
++}
++#endif
++#endif /* __LOONGARCH_BASE_H */
+diff --git a/lib/Headers/lasxintrin.h b/lib/Headers/lasxintrin.h
+new file mode 100644
+index 00000000..48a0a176
+--- /dev/null
++++ b/lib/Headers/lasxintrin.h
+@@ -0,0 +1,5349 @@
++//===----------- lasxintrin.h - LoongArch LASX intrinsics
++//------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch LASX intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef _GCC_LOONGSON_ASXINTRIN_H
++#define _GCC_LOONGSON_ASXINTRIN_H 1
++
++#if defined(__loongarch_asx)
++
++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1)));
++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1)));
++typedef short v16i16 __attribute__((vector_size(32), aligned(32)));
++typedef short v16i16_h __attribute__((vector_size(32), aligned(2)));
++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2)));
++typedef int v8i32 __attribute__((vector_size(32), aligned(32)));
++typedef int v8i32_w __attribute__((vector_size(32), aligned(4)));
++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4)));
++typedef long long v4i64 __attribute__((vector_size(32), aligned(32)));
++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8)));
++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32)));
++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8)));
++typedef float v8f32 __attribute__((vector_size(32), aligned(32)));
++typedef float v8f32_w __attribute__((vector_size(32), aligned(4)));
++typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
++typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
++
++typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
++typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
++
++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__));
++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__));
++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__));
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsll_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsra_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrar_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrl_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitclr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitset_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitrev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2)                         \
++  ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V32QI, V32QI, QI */
++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V16HI, V16HI, QI */
++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V8SI, V8SI, QI */
++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V4DI, V4DI, QI */
++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmax_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V32QI, V32QI, QI */
++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V16HI, V16HI, QI */
++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V8SI, V8SI, QI */
++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V4DI, V4DI, QI */
++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmin_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvseq_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V32QI, V32QI, QI */
++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V16HI, V16HI, QI */
++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V8SI, V8SI, QI */
++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V4DI, V4DI, QI */
++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V32QI, V32QI, QI */
++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V16HI, V16HI, QI */
++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V8SI, V8SI, QI */
++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V4DI, V4DI, QI */
++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvslt_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, UV32QI, UQI */
++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, UV16HI, UQI */
++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, UV8SI, UQI */
++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V4DI, UV4DI, UQI */
++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V32QI, V32QI, QI */
++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V16HI, V16HI, QI */
++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V8SI, V8SI, QI */
++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, si5 */
++/* Data types in instruction templates:  V4DI, V4DI, QI */
++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsle_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, UV32QI, UQI */
++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, UV16HI, UQI */
++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, UV8SI, UQI */
++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V4DI, UV4DI, UQI */
++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV16HI, UV16HI, UQI */
++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV8SI, UV8SI, UQI */
++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV4DI, UV4DI, UQI */
++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadda_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsadd_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavg_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvavgr_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssub_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvabsd_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmul_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvdiv_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmod_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui2 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui1 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpickod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvh_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvilvl_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackev_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpackod_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvand_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2)                             \
++  ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvnor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvxor_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UQI */
++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI, UQI */
++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V32QI, V32QI, USI */
++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V16HI, V16HI, USI */
++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V8SI, V8SI, USI */
++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj */
++/* Data types in instruction templates:  V32QI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_b(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj */
++/* Data types in instruction templates:  V16HI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_h(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj */
++/* Data types in instruction templates:  V8SI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_w(int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1);
++}
++
++/* Assembly instruction format:          xd, rj */
++/* Data types in instruction templates:  V4DI, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplgr2vr_d(long int _1) {
++  return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvpcnt_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclo_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvclz_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfadd_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfadd_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfsub_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfsub_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmul_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmul_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfdiv_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfdiv_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) {
++  return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) {
++  return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmin_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmin_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmina_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmina_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmax_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmax_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmaxa_s(__m256 _1, __m256 _2) {
++  return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmaxa_d(__m256d _1, __m256d _2) {
++  return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfclass_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfclass_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfsqrt_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfsqrt_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrecip_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrecip_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrint_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrint_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfrsqrt_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfrsqrt_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvflogb_s(__m256 _1) {
++  return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvflogb_d(__m256d _1) {
++  return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvth_s_h(__m256i _1) {
++  return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfcvth_d_s(__m256 _1) {
++  return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfcvtl_s_h(__m256i _1) {
++  return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfcvtl_d_s(__m256 _1) {
++  return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  UV8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_wu_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  UV4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_lu_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  UV8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_wu_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  UV4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_lu_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_w(__m256i _1) {
++  return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffint_d_l(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SF, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_wu(__m256i _1) {
++  return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffint_d_lu(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, rk */
++/* Data types in instruction templates:  V32QI, V32QI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_b(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk */
++/* Data types in instruction templates:  V16HI, V16HI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_h(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk */
++/* Data types in instruction templates:  V8SI, V8SI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_w(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, rk */
++/* Data types in instruction templates:  V4DI, V4DI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve_d(__m256i _1, int _2) {
++  return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)           \
++  ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvandn_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvneg_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmuh_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V16HI, V32QI, UQI */
++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V8SI, V16HI, UQI */
++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V4DI, V8SI, UQI */
++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2)                        \
++  ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  UV16HI, UV32QI, UQI */
++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV8SI, UV16HI, UQI */
++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV4DI, UV8SI, UQI */
++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2)                      \
++  ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsran_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssran_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrln_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV32QI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, UQI */
++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)          \
++  ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, UQI */
++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)          \
++  ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)          \
++  ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2)                            \
++  ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, UQI */
++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, UQI */
++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, UQI */
++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, UQI */
++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)         \
++  ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskltz_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsigncov_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V8SF, V8SF, V8SF, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) {
++  return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk, xa */
++/* Data types in instruction templates:  V4DF, V4DF, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) {
++  return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_w_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_l_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftint_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SF, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
++    __lasx_xvffint_s_l(__m256i _1, __m256i _2) {
++  return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DF, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) {
++  return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftinth_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffinth_d_w(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DF, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
++    __lasx_xvffintl_d_w(__m256i _1) {
++  return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrzh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrzl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrph_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrpl_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrmh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrml_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrneh_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvftintrnel_l_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrne_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfrintrne_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrne_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfrintrne_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrz_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfrintrz_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrz_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfrintrz_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrp_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfrintrp_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrp_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfrintrp_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrm_s(__m256 _1) {
++  return (__m256i)__builtin_lasx_xvfrintrm_s((v8f32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvfrintrm_d(__m256d _1) {
++  return (__m256i)__builtin_lasx_xvfrintrm_d((v4f64)_1);
++}
++
++/* Assembly instruction format:          xd, rj, si12 */
++/* Data types in instruction templates:  V32QI, CVPOINTER, SI */
++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2)                                \
++  ((__m256i)__builtin_lasx_xvld((void *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si12 */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, SI */
++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3)                \
++  ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3)))
++
++/* Assembly instruction format:          xd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, SI, UQI */
++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V16HI, CVPOINTER, SI, UQI */
++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V8SI, CVPOINTER, SI, UQI */
++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V4DI, CVPOINTER, SI, UQI */
++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3,            \
++                         /*idx*/ _4)                                           \
++  ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, UQI */
++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3)          \
++  ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui2 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, UQI */
++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3)          \
++  ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui2 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_b_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_h_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvssrln_w_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvorn_v(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, i13 */
++/* Data types in instruction templates:  V4DI, HI */
++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1)))
++
++/* Assembly instruction format:          xd, rj, rk */
++/* Data types in instruction templates:  V32QI, CVPOINTER, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvldx(void *_1, long int _2) {
++  return (__m256i)__builtin_lasx_xvldx((void *)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          xd, rj, rk */
++/* Data types in instruction templates:  VOID, V32QI, CVPOINTER, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
++    __lasx_xvstx(__m256i _1, void *_2, long int _3) {
++  return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvextl_qu_du(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, rj, ui3 */
++/* Data types in instruction templates:  V8SI, V8SI, SI, UQI */
++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3)            \
++  ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          xd, rj, ui2 */
++/* Data types in instruction templates:  V4DI, V4DI, DI, UQI */
++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3)       \
++  ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvreplve0_q(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_h_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_w_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_w_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_d_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_hu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_wu_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_wu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_wu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_vext2xv_du_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3)           \
++  ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui8 */
++/* Data types in instruction templates:  V4DI, V4DI, USI */
++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvperm_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, rj, si12 */
++/* Data types in instruction templates:  V32QI, CVPOINTER, SI */
++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_b((void *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si11 */
++/* Data types in instruction templates:  V16HI, CVPOINTER, SI */
++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_h((void *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si10 */
++/* Data types in instruction templates:  V8SI, CVPOINTER, SI */
++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2)                          \
++  ((__m256i)__builtin_lasx_xvldrepl_w((void *)(_1), (_2)))
++
++/* Assembly instruction format:          xd, rj, si9 */
++/* Data types in instruction templates:  V4DI, CVPOINTER, SI */
++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvldrepl_d((void *)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui3 */
++/* Data types in instruction templates:  SI, V8SI, UQI */
++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2)                       \
++  ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui3 */
++/* Data types in instruction templates:  USI, V8SI, UQI */
++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2)                      \
++  ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui2 */
++/* Data types in instruction templates:  DI, V4DI, UQI */
++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2)                       \
++  ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          rd, xj, ui2 */
++/* Data types in instruction templates:  UDI, V4DI, UQI */
++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2)                      \
++  ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2,
++                                               (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2,
++                                               (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2,
++                                                (v4u64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2,
++                                                (v8u32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2,
++                                                (v16u16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2,
++                                                (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2,
++                                               (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2,
++                                               (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV4DI, UV4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2,
++                                                (v4u64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV4DI, UV4DI, UV8SI, UV8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2,
++                                                (v8u32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV8SI, UV8SI, UV16HI, UV16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2,
++                                                (v16u16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  UV16HI, UV16HI, UV32QI, UV32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2,
++                                                (v32u8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2,
++                                                  (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2,
++                                                  (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2,
++                                                  (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2,
++                                                  (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2,
++                                                  (v4i64)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, UV8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2,
++                                                  (v8i32)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, UV16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2,
++                                                  (v16i16)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, UV32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
++  return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2,
++                                                  (v32i8)_3);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_b(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_h(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_w(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvrotr_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvadd_q(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvsub_q(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj, xk */
++/* Data types in instruction templates:  V4DI, UV4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) {
++  return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmskgez_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V32QI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvmsknz_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V16HI, V32QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_h_b(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V8SI, V16HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_w_h(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V8SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_d_w(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1);
++}
++
++/* Assembly instruction format:          xd, xj */
++/* Data types in instruction templates:  V4DI, V4DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_q_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV16HI, UV32QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_hu_bu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV8SI, UV16HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_wu_hu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV8SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_du_wu(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1);
++}
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  UV4DI, UV4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvexth_qu_du(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui3 */
++/* Data types in instruction templates:  V32QI, V32QI, UQI */
++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V16HI, V16HI, UQI */
++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V8SI, V8SI, UQI */
++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V4DI, V4DI, UQI */
++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2)                           \
++  ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2)))
++
++/* Assembly instruction format:          xd, xj.  */
++/* Data types in instruction templates:  V4DI, V4DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
++    __lasx_xvextl_q_d(__m256i _1) {
++  return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1);
++}
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI */
++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI */
++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI */
++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI */
++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI */
++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI */
++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI */
++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI */
++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)         \
++  ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)        \
++  ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI */
++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI */
++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI */
++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI */
++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  V32QI, V32QI, V32QI, USI */
++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  V16HI, V16HI, V16HI, USI */
++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  V8SI, V8SI, V8SI, USI */
++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  V4DI, V4DI, V4DI, USI */
++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)       \
++  ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui4 */
++/* Data types in instruction templates:  UV32QI, UV32QI, V32QI, USI */
++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui5 */
++/* Data types in instruction templates:  UV16HI, UV16HI, V16HI, USI */
++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui6 */
++/* Data types in instruction templates:  UV8SI, UV8SI, V8SI, USI */
++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
++
++/* Assembly instruction format:          xd, xj, ui7 */
++/* Data types in instruction templates:  UV4DI, UV4DI, V4DI, USI */
++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3)      \
++  ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbnz_v(__m256i _1) {
++  return __builtin_lasx_xbnz_v((v32u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbz_v(__m256i _1) {
++  return __builtin_lasx_xbz_v((v32u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbnz_b(__m256i _1) {
++  return __builtin_lasx_xbnz_b((v32u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbnz_h(__m256i _1) {
++  return __builtin_lasx_xbnz_h((v16u16)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbnz_w(__m256i _1) {
++  return __builtin_lasx_xbnz_w((v8u32)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbnz_d(__m256i _1) {
++  return __builtin_lasx_xbnz_d((v4u64)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbz_b(__m256i _1) {
++  return __builtin_lasx_xbz_b((v32u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbz_h(__m256i _1) {
++  return __builtin_lasx_xbz_h((v16u16)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbz_w(__m256i _1) {
++  return __builtin_lasx_xbz_w((v8u32)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lasx_xbz_d(__m256i _1) {
++  return __builtin_lasx_xbz_d((v4u64)_1);
++}
++
++#if 0
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V32QI, i10 */
++#define __lasx_xvrepli_b(/*i10*/ _1)	((__m256i)__builtin_lasx_xvrepli_b(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V16HI, i10 */
++#define __lasx_xvrepli_h(/*i10*/ _1)	((__m256i)__builtin_lasx_xvrepli_h(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V8SI, i10 */
++#define __lasx_xvrepli_w(/*i10*/ _1)	((__m256i)__builtin_lasx_xvrepli_w(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V4DI, i10 */
++#define __lasx_xvrepli_d(/*i10*/ _1)	((__m256i)__builtin_lasx_xvrepli_d(_1)
++#endif
++
++#endif /* defined(__loongarch_asx) */
++#endif /* _GCC_LOONGSON_ASXINTRIN_H */
+diff --git a/lib/Headers/lsxintrin.h b/lib/Headers/lsxintrin.h
+new file mode 100644
+index 00000000..bd5f15a0
+--- /dev/null
++++ b/lib/Headers/lsxintrin.h
+@@ -0,0 +1,5165 @@
++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch LSX intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef _GCC_LOONGSON_SXINTRIN_H
++#define _GCC_LOONGSON_SXINTRIN_H 1
++
++#if defined(__loongarch_sx)
++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));
++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));
++typedef short v8i16 __attribute__((vector_size(16), aligned(16)));
++typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));
++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));
++typedef int v4i32 __attribute__((vector_size(16), aligned(16)));
++typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));
++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));
++typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));
++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));
++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));
++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));
++typedef float v4f32 __attribute__((vector_size(16), aligned(16)));
++typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));
++typedef double v2f64 __attribute__((vector_size(16), aligned(16)));
++typedef double v2f64_d __attribute__((vector_size(16), aligned(8)));
++
++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__));
++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__));
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsll_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsra_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrar_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrl_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitclr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitset_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitrev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2)                           \
++  ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V16QI, V16QI, QI */
++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V8HI, V8HI, QI */
++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V4SI, V4SI, QI */
++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V2DI, V2DI, QI */
++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmax_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V16QI, V16QI, QI */
++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V8HI, V8HI, QI */
++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V4SI, V4SI, QI */
++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V2DI, V2DI, QI */
++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmin_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vseq_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V16QI, V16QI, QI */
++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V8HI, V8HI, QI */
++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V4SI, V4SI, QI */
++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V2DI, V2DI, QI */
++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V16QI, V16QI, QI */
++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V8HI, V8HI, QI */
++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V4SI, V4SI, QI */
++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V2DI, V2DI, QI */
++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vslt_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, UV16QI, UQI */
++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, UV8HI, UQI */
++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, UV4SI, UQI */
++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V2DI, UV2DI, UQI */
++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V16QI, V16QI, QI */
++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V8HI, V8HI, QI */
++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V4SI, V4SI, QI */
++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, si5 */
++/* Data types in instruction templates:  V2DI, V2DI, QI */
++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsle_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, UV16QI, UQI */
++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, UV8HI, UQI */
++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, UV4SI, UQI */
++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V2DI, UV2DI, UQI */
++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2)                               \
++  ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV8HI, UV8HI, UQI */
++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV4SI, UV4SI, UQI */
++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV2DI, UV2DI, UQI */
++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2)                              \
++  ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadda_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsadd_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavg_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vavgr_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssub_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vabsd_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmul_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vdiv_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmod_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk */
++/* Data types in instruction templates:  V16QI, V16QI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_b(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk */
++/* Data types in instruction templates:  V8HI, V8HI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_h(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk */
++/* Data types in instruction templates:  V4SI, V4SI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_w(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, rk */
++/* Data types in instruction templates:  V2DI, V2DI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplve_d(__m128i _1, int _2) {
++  return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui2 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui1 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2)                           \
++  ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpickod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvh_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vilvl_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackev_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpackod_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vand_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2)                               \
++  ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vnor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vxor_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UQI */
++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2)                              \
++  ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI, UQI */
++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2)                            \
++  ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj */
++/* Data types in instruction templates:  V16QI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_b(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj */
++/* Data types in instruction templates:  V8HI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_h(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj */
++/* Data types in instruction templates:  V4SI, SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_w(int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1);
++}
++
++/* Assembly instruction format:          vd, rj */
++/* Data types in instruction templates:  V2DI, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vreplgr2vr_d(long int _1) {
++  return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vpcnt_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclo_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclo_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vclz_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vclz_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          rd, vj, ui4 */
++/* Data types in instruction templates:  SI, V16QI, UQI */
++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui3 */
++/* Data types in instruction templates:  SI, V8HI, UQI */
++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui2 */
++/* Data types in instruction templates:  SI, V4SI, UQI */
++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2)                         \
++  ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui1 */
++/* Data types in instruction templates:  DI, V2DI, UQI */
++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2)                         \
++  ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui4 */
++/* Data types in instruction templates:  USI, V16QI, UQI */
++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui3 */
++/* Data types in instruction templates:  USI, V8HI, UQI */
++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui2 */
++/* Data types in instruction templates:  USI, V4SI, UQI */
++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2)                        \
++  ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          rd, vj, ui1 */
++/* Data types in instruction templates:  UDI, V2DI, UQI */
++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2)                        \
++  ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, SI, UQI */
++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui3 */
++/* Data types in instruction templates:  V8HI, V8HI, SI, UQI */
++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui2 */
++/* Data types in instruction templates:  V4SI, V4SI, SI, UQI */
++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3)              \
++  ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, ui1 */
++/* Data types in instruction templates:  V2DI, V2DI, SI, UQI */
++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3)         \
++  ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfadd_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfadd_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfsub_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfsub_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmul_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmul_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfdiv_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfdiv_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfcvt_h_s(__m128 _1, __m128 _2) {
++  return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvt_s_d(__m128d _1, __m128d _2) {
++  return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmin_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmin_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmina_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmina_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmax_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmax_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmaxa_s(__m128 _1, __m128 _2) {
++  return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmaxa_d(__m128d _1, __m128d _2) {
++  return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfclass_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfclass_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfsqrt_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfsqrt_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrecip_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrecip_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrint_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrint_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrint_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfrsqrt_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfrsqrt_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vflogb_s(__m128 _1) {
++  return (__m128)__builtin_lsx_vflogb_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vflogb_d(__m128d _1) {
++  return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvth_s_h(__m128i _1) {
++  return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfcvth_d_s(__m128 _1) {
++  return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfcvtl_s_h(__m128i _1) {
++  return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfcvtl_d_s(__m128 _1) {
++  return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  UV4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_wu_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  UV2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_lu_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  UV4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_wu_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  UV2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_lu_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_w(__m128i _1) {
++  return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffint_d_l(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SF, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_wu(__m128i _1) {
++  return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffint_d_lu(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vandn_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vneg_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vneg_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmuh_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V8HI, V16QI, UQI */
++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V4SI, V8HI, UQI */
++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V2DI, V4SI, UQI */
++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2)                          \
++  ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  UV8HI, UV16QI, UQI */
++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV4SI, UV8HI, UQI */
++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV2DI, UV4SI, UQI */
++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2)                        \
++  ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsran_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssran_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrarn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrln_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV16QI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, UQI */
++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)            \
++  ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, UQI */
++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)            \
++  ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)            \
++  ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2)                              \
++  ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, UQI */
++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, UQI */
++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, UQI */
++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, UQI */
++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)           \
++  ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskltz_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsigncov_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V4SF, V4SF, V4SF, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) {
++  return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V2DF, V2DF, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) {
++  return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_w_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_l_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftint_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SF, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
++    __lsx_vffint_s_l(__m128i _1, __m128i _2) {
++  return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrz_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrp_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrm_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DF, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrne_w_d(__m128d _1, __m128d _2) {
++  return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftinth_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffinth_d_w(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DF, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
++    __lsx_vffintl_d_w(__m128i _1) {
++  return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrzl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrzh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrpl_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrph_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrml_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrmh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrnel_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vftintrneh_l_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrne_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfrintrne_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrne_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfrintrne_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrz_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfrintrz_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrz_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfrintrz_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrp_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfrintrp_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrp_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfrintrp_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V4SF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrm_s(__m128 _1) {
++  return (__m128i)__builtin_lsx_vfrintrm_s((v4f32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DF */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vfrintrm_d(__m128d _1) {
++  return (__m128i)__builtin_lsx_vfrintrm_d((v2f64)_1);
++}
++
++/* Assembly instruction format:          vd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, SI, UQI */
++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V8HI, CVPOINTER, SI, UQI */
++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V4SI, CVPOINTER, SI, UQI */
++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, rj, si8, idx */
++/* Data types in instruction templates:  VOID, V2DI, CVPOINTER, SI, UQI */
++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4)  \
++  ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsubwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_q_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV4SI, UV4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV4SI, UV4SI, UV8HI, UV8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV8HI, UV8HI, UV16QI, UV16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2,
++                                                (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2,
++                                                (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2,
++                                                (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, UV4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2,
++                                                (v4i32)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, UV8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2,
++                                                (v8i16)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, UV16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2,
++                                                (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  UV2DI, UV2DI, UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2,
++                                                (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, UV2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2,
++                                                (v2i64)_3);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_b(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vrotr_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vadd_q(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vsub_q(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, rj, si12 */
++/* Data types in instruction templates:  V16QI, CVPOINTER, SI */
++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_b((void *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si11 */
++/* Data types in instruction templates:  V8HI, CVPOINTER, SI */
++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_h((void *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si10 */
++/* Data types in instruction templates:  V4SI, CVPOINTER, SI */
++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2)                            \
++  ((__m128i)__builtin_lsx_vldrepl_w((void *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si9 */
++/* Data types in instruction templates:  V2DI, CVPOINTER, SI */
++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2)                             \
++  ((__m128i)__builtin_lsx_vldrepl_d((void *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmskgez_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vmsknz_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V8HI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_h_b(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V4SI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_w_h(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_d_w(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_q_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV8HI, UV16QI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_hu_bu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV4SI, UV8HI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_wu_hu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, UV4SI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_du_wu(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1);
++}
++
++/* Assembly instruction format:          vd, vj.  */
++/* Data types in instruction templates:  UV2DI, UV2DI.  */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vexth_qu_du(__m128i _1) {
++  return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, ui3 */
++/* Data types in instruction templates:  V16QI, V16QI, UQI */
++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V8HI, V8HI, UQI */
++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V4SI, V4SI, UQI */
++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V2DI, V2DI, UQI */
++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2)                             \
++  ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2)))
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vextl_q_d(__m128i _1) {
++  return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1);
++}
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI */
++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI */
++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI */
++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI */
++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI */
++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI */
++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI */
++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI */
++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)           \
++  ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)          \
++  ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI */
++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI */
++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI */
++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI */
++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, USI */
++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  V8HI, V8HI, V8HI, USI */
++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  V2DI, V2DI, V2DI, USI */
++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)         \
++  ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui4 */
++/* Data types in instruction templates:  UV16QI, UV16QI, V16QI, USI */
++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui5 */
++/* Data types in instruction templates:  UV8HI, UV8HI, V8HI, USI */
++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui6 */
++/* Data types in instruction templates:  UV4SI, UV4SI, V4SI, USI */
++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui7 */
++/* Data types in instruction templates:  UV2DI, UV2DI, V2DI, USI */
++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3)        \
++  ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, ui8 */
++/* Data types in instruction templates:  V4SI, V4SI, V4SI, USI */
++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3)             \
++  ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3)))
++
++/* Assembly instruction format:          vd, rj, si12 */
++/* Data types in instruction templates:  V16QI, CVPOINTER, SI */
++#define __lsx_vld(/*void **/ _1, /*si12*/ _2)                                  \
++  ((__m128i)__builtin_lsx_vld((void *)(_1), (_2)))
++
++/* Assembly instruction format:          vd, rj, si12 */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, SI */
++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3)                  \
++  ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3)))
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V8HI, V8HI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_b_h(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V8HI, V4SI, V4SI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_h_w(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V4SI, V2DI, V2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vssrln_w_d(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2);
++}
++
++/* Assembly instruction format:          vd, vj, vk */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vorn_v(__m128i _1, __m128i _2) {
++  return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2);
++}
++
++/* Assembly instruction format:          vd, i13 */
++/* Data types in instruction templates:  V2DI, HI */
++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1)))
++
++/* Assembly instruction format:          vd, vj, vk, va */
++/* Data types in instruction templates:  V16QI, V16QI, V16QI, V16QI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) {
++  return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
++}
++
++/* Assembly instruction format:          vd, rj, rk */
++/* Data types in instruction templates:  V16QI, CVPOINTER, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vldx(void *_1, long int _2) {
++  return (__m128i)__builtin_lsx_vldx((void *)_1, (long int)_2);
++}
++
++/* Assembly instruction format:          vd, rj, rk */
++/* Data types in instruction templates:  VOID, V16QI, CVPOINTER, DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
++    __lsx_vstx(__m128i _1, void *_2, long int _3) {
++  return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3);
++}
++
++/* Assembly instruction format:          vd, vj */
++/* Data types in instruction templates:  UV2DI, UV2DI */
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
++    __lsx_vextl_qu_du(__m128i _1) {
++  return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bnz_v(__m128i _1) {
++  return __builtin_lsx_bnz_v((v16u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bz_v(__m128i _1) {
++  return __builtin_lsx_bz_v((v16u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bnz_b(__m128i _1) {
++  return __builtin_lsx_bnz_b((v16u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bnz_h(__m128i _1) {
++  return __builtin_lsx_bnz_h((v8u16)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bnz_w(__m128i _1) {
++  return __builtin_lsx_bnz_w((v4u32)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bnz_d(__m128i _1) {
++  return __builtin_lsx_bnz_d((v2u64)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bz_b(__m128i _1) {
++  return __builtin_lsx_bz_b((v16u8)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bz_h(__m128i _1) {
++  return __builtin_lsx_bz_h((v8u16)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bz_w(__m128i _1) {
++  return __builtin_lsx_bz_w((v4u32)_1);
++}
++
++extern __inline
++    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int
++    __lsx_bz_d(__m128i _1) {
++  return __builtin_lsx_bz_d((v2u64)_1);
++}
++
++#if 0
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V16QI, i10 */
++#define __lsx_vrepli_b(/*i10*/ _1)	((__m128i)__builtin_lsx_vrepli_b(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V8HI, i10 */
++#define __lsx_vrepli_h(/*i10*/ _1)	((__m128i)__builtin_lsx_vrepli_h(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V4SI, i10 */
++#define __lsx_vrepli_w(/*i10*/ _1)	((__m128i)__builtin_lsx_vrepli_w(_1)
++
++/* Assembly instruction format:          vd, i10 */
++/* Data types in instruction templates:  V2DI, i10 */
++#define __lsx_vrepli_d(/*i10*/ _1)	((__m128i)__builtin_lsx_vrepli_d(_1)
++
++#endif
++
++#endif /* defined(__loongarch_sx) */
++#endif /* _GCC_LOONGSON_SXINTRIN_H */
+diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
+index 69dcc3aa..d5721b52 100644
+--- a/lib/Sema/SemaChecking.cpp
++++ b/lib/Sema/SemaChecking.cpp
+@@ -1658,6 +1658,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
+   case llvm::Triple::riscv32:
+   case llvm::Triple::riscv64:
+     return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall);
++  case llvm::Triple::loongarch32:
++  case llvm::Triple::loongarch64:
++    return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall);
+   }
+ }
+ 
+@@ -4032,6 +4035,547 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
+   return false;
+ }
+ 
++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the
++// intrinsic is correct.
++//
++// FIXME: The size tests here should instead be tablegen'd along with the
++//        definitions from include/clang/Basic/BuiltinsLoongArch.def.
++// FIXME: GCC is strict on signedness for some of these intrinsics, we should
++//        be too.
++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI,
++                                             unsigned BuiltinID,
++                                             CallExpr *TheCall) {
++  unsigned i = 0, l = 0, u = 0, m = 0;
++  switch (BuiltinID) {
++  default: return false;
++  // LSX/LASX intrinsics.
++  // These intrinsics take an unsigned 3 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_b:
++  case LoongArch::BI__builtin_lasx_xvbitclri_b:
++  case LoongArch::BI__builtin_lsx_vbitrevi_b:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_b:
++  case LoongArch::BI__builtin_lsx_vbitseti_b:
++  case LoongArch::BI__builtin_lasx_xvbitseti_b:
++  case LoongArch::BI__builtin_lsx_vsat_b:
++  case LoongArch::BI__builtin_lsx_vsat_bu:
++  case LoongArch::BI__builtin_lasx_xvsat_b:
++  case LoongArch::BI__builtin_lasx_xvsat_bu:
++  case LoongArch::BI__builtin_lsx_vslli_b:
++  case LoongArch::BI__builtin_lasx_xvslli_b:
++  case LoongArch::BI__builtin_lsx_vsrai_b:
++  case LoongArch::BI__builtin_lasx_xvsrai_b:
++  case LoongArch::BI__builtin_lsx_vsrari_b:
++  case LoongArch::BI__builtin_lasx_xvsrari_b:
++  case LoongArch::BI__builtin_lsx_vsrli_b:
++  case LoongArch::BI__builtin_lasx_xvsrli_b:
++  case LoongArch::BI__builtin_lsx_vsllwil_h_b:
++  case LoongArch::BI__builtin_lsx_vsllwil_hu_bu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_h_b:
++  case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu:
++  case LoongArch::BI__builtin_lsx_vrotri_b:
++  case LoongArch::BI__builtin_lasx_xvrotri_b:
++  case LoongArch::BI__builtin_lasx_xvsrlri_b:
++  case LoongArch::BI__builtin_lsx_vsrlri_b:
++    i = 1;
++    l = 0;
++    u = 7;
++    break;
++  // These intrinsics take an unsigned 4 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_h:
++  case LoongArch::BI__builtin_lasx_xvbitclri_h:
++  case LoongArch::BI__builtin_lsx_vbitrevi_h:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_h:
++  case LoongArch::BI__builtin_lsx_vbitseti_h:
++  case LoongArch::BI__builtin_lasx_xvbitseti_h:
++  case LoongArch::BI__builtin_lsx_vsat_h:
++  case LoongArch::BI__builtin_lsx_vsat_hu:
++  case LoongArch::BI__builtin_lasx_xvsat_h:
++  case LoongArch::BI__builtin_lasx_xvsat_hu:
++  case LoongArch::BI__builtin_lsx_vslli_h:
++  case LoongArch::BI__builtin_lasx_xvslli_h:
++  case LoongArch::BI__builtin_lsx_vsrai_h:
++  case LoongArch::BI__builtin_lasx_xvsrai_h:
++  case LoongArch::BI__builtin_lsx_vsrari_h:
++  case LoongArch::BI__builtin_lasx_xvsrari_h:
++  case LoongArch::BI__builtin_lsx_vsrli_h:
++  case LoongArch::BI__builtin_lasx_xvsrli_h:
++  case LoongArch::BI__builtin_lsx_vsllwil_w_h:
++  case LoongArch::BI__builtin_lsx_vsllwil_wu_hu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_w_h:
++  case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu:
++  case LoongArch::BI__builtin_lsx_vrotri_h:
++  case LoongArch::BI__builtin_lasx_xvrotri_h:
++  case LoongArch::BI__builtin_lasx_xvsrlri_h:
++  case LoongArch::BI__builtin_lsx_vsrlri_h:
++    i = 1;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrarni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrarni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrarni_bu_h:
++  case LoongArch::BI__builtin_lsx_vssrani_b_h:
++  case LoongArch::BI__builtin_lsx_vssrani_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrani_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrani_bu_h:
++  case LoongArch::BI__builtin_lsx_vsrarni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrarni_b_h:
++  case LoongArch::BI__builtin_lsx_vsrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrlni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrlni_bu_h:
++  case LoongArch::BI__builtin_lsx_vssrlrni_b_h:
++  case LoongArch::BI__builtin_lsx_vssrlrni_bu_h:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_b_h:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h:
++  case LoongArch::BI__builtin_lsx_vsrani_b_h:
++  case LoongArch::BI__builtin_lasx_xvsrani_b_h:
++    i = 2;
++    l = 0;
++    u = 15;
++    break;
++  // These intrinsics take an unsigned 5 bit immediate.
++  // The first block of intrinsics actually have an unsigned 5 bit field,
++  // not a df/n field.
++  case LoongArch::BI__builtin_lsx_vslei_bu:
++  case LoongArch::BI__builtin_lsx_vslei_hu:
++  case LoongArch::BI__builtin_lsx_vslei_wu:
++  case LoongArch::BI__builtin_lsx_vslei_du:
++  case LoongArch::BI__builtin_lasx_xvslei_bu:
++  case LoongArch::BI__builtin_lasx_xvslei_hu:
++  case LoongArch::BI__builtin_lasx_xvslei_wu:
++  case LoongArch::BI__builtin_lasx_xvslei_du:
++  case LoongArch::BI__builtin_lsx_vslti_bu:
++  case LoongArch::BI__builtin_lsx_vslti_hu:
++  case LoongArch::BI__builtin_lsx_vslti_wu:
++  case LoongArch::BI__builtin_lsx_vslti_du:
++  case LoongArch::BI__builtin_lasx_xvslti_bu:
++  case LoongArch::BI__builtin_lasx_xvslti_hu:
++  case LoongArch::BI__builtin_lasx_xvslti_wu:
++  case LoongArch::BI__builtin_lasx_xvslti_du:
++  case LoongArch::BI__builtin_lsx_vmaxi_bu:
++  case LoongArch::BI__builtin_lsx_vmaxi_hu:
++  case LoongArch::BI__builtin_lsx_vmaxi_wu:
++  case LoongArch::BI__builtin_lsx_vmaxi_du:
++  case LoongArch::BI__builtin_lasx_xvmaxi_bu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_hu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_wu:
++  case LoongArch::BI__builtin_lasx_xvmaxi_du:
++  case LoongArch::BI__builtin_lsx_vmini_bu:
++  case LoongArch::BI__builtin_lsx_vmini_hu:
++  case LoongArch::BI__builtin_lsx_vmini_wu:
++  case LoongArch::BI__builtin_lsx_vmini_du:
++  case LoongArch::BI__builtin_lasx_xvmini_bu:
++  case LoongArch::BI__builtin_lasx_xvmini_hu:
++  case LoongArch::BI__builtin_lasx_xvmini_wu:
++  case LoongArch::BI__builtin_lasx_xvmini_du:
++  case LoongArch::BI__builtin_lsx_vaddi_bu:
++  case LoongArch::BI__builtin_lsx_vaddi_hu:
++  case LoongArch::BI__builtin_lsx_vaddi_wu:
++  case LoongArch::BI__builtin_lsx_vaddi_du:
++  case LoongArch::BI__builtin_lasx_xvaddi_bu:
++  case LoongArch::BI__builtin_lasx_xvaddi_hu:
++  case LoongArch::BI__builtin_lasx_xvaddi_wu:
++  case LoongArch::BI__builtin_lasx_xvaddi_du:
++  case LoongArch::BI__builtin_lsx_vbitclri_w:
++  case LoongArch::BI__builtin_lasx_xvbitclri_w:
++  case LoongArch::BI__builtin_lsx_vbitrevi_w:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_w:
++  case LoongArch::BI__builtin_lsx_vbitseti_w:
++  case LoongArch::BI__builtin_lasx_xvbitseti_w:
++  case LoongArch::BI__builtin_lsx_vsat_w:
++  case LoongArch::BI__builtin_lsx_vsat_wu:
++  case LoongArch::BI__builtin_lasx_xvsat_w:
++  case LoongArch::BI__builtin_lasx_xvsat_wu:
++  case LoongArch::BI__builtin_lsx_vslli_w:
++  case LoongArch::BI__builtin_lasx_xvslli_w:
++  case LoongArch::BI__builtin_lsx_vsrai_w:
++  case LoongArch::BI__builtin_lasx_xvsrai_w:
++  case LoongArch::BI__builtin_lsx_vsrari_w:
++  case LoongArch::BI__builtin_lasx_xvsrari_w:
++  case LoongArch::BI__builtin_lsx_vsrli_w:
++  case LoongArch::BI__builtin_lasx_xvsrli_w:
++  case LoongArch::BI__builtin_lsx_vsllwil_d_w:
++  case LoongArch::BI__builtin_lsx_vsllwil_du_wu:
++  case LoongArch::BI__builtin_lasx_xvsllwil_d_w:
++  case LoongArch::BI__builtin_lasx_xvsllwil_du_wu:
++  case LoongArch::BI__builtin_lsx_vsrlri_w:
++  case LoongArch::BI__builtin_lasx_xvsrlri_w:
++  case LoongArch::BI__builtin_lsx_vrotri_w:
++  case LoongArch::BI__builtin_lasx_xvrotri_w:
++  case LoongArch::BI__builtin_lsx_vsubi_bu:
++  case LoongArch::BI__builtin_lsx_vsubi_hu:
++  case LoongArch::BI__builtin_lasx_xvsubi_bu:
++  case LoongArch::BI__builtin_lasx_xvsubi_hu:
++  case LoongArch::BI__builtin_lasx_xvsubi_wu:
++  case LoongArch::BI__builtin_lasx_xvsubi_du:
++  case LoongArch::BI__builtin_lsx_vbsrl_v:
++  case LoongArch::BI__builtin_lsx_vbsll_v:
++  case LoongArch::BI__builtin_lasx_xvbsrl_v:
++  case LoongArch::BI__builtin_lasx_xvbsll_v:
++  case LoongArch::BI__builtin_lsx_vsubi_wu:
++  case LoongArch::BI__builtin_lsx_vsubi_du:
++    i = 1;
++    l = 0;
++    u = 31;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrarni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrarni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrarni_hu_w:
++  case LoongArch::BI__builtin_lsx_vssrani_h_w:
++  case LoongArch::BI__builtin_lsx_vssrani_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrani_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrani_hu_w:
++  case LoongArch::BI__builtin_lsx_vsrarni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrarni_h_w:
++  case LoongArch::BI__builtin_lsx_vsrani_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrani_h_w:
++  case LoongArch::BI__builtin_lsx_vfrstpi_b:
++  case LoongArch::BI__builtin_lsx_vfrstpi_h:
++  case LoongArch::BI__builtin_lasx_xvfrstpi_b:
++  case LoongArch::BI__builtin_lasx_xvfrstpi_h:
++  case LoongArch::BI__builtin_lsx_vsrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrlni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrlni_hu_w:
++  case LoongArch::BI__builtin_lsx_vssrlrni_h_w:
++  case LoongArch::BI__builtin_lsx_vssrlrni_hu_w:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_h_w:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w:
++    i = 2;
++    l = 0;
++    u = 31;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_b:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 31);
++  // These intrinsics take an unsigned 6 bit immediate.
++  case LoongArch::BI__builtin_lsx_vbitclri_d:
++  case LoongArch::BI__builtin_lasx_xvbitclri_d:
++  case LoongArch::BI__builtin_lsx_vbitrevi_d:
++  case LoongArch::BI__builtin_lasx_xvbitrevi_d:
++  case LoongArch::BI__builtin_lsx_vbitseti_d:
++  case LoongArch::BI__builtin_lasx_xvbitseti_d:
++  case LoongArch::BI__builtin_lsx_vsat_d:
++  case LoongArch::BI__builtin_lsx_vsat_du:
++  case LoongArch::BI__builtin_lasx_xvsat_d:
++  case LoongArch::BI__builtin_lasx_xvsat_du:
++  case LoongArch::BI__builtin_lsx_vslli_d:
++  case LoongArch::BI__builtin_lasx_xvslli_d:
++  case LoongArch::BI__builtin_lsx_vsrai_d:
++  case LoongArch::BI__builtin_lasx_xvsrai_d:
++  case LoongArch::BI__builtin_lsx_vsrli_d:
++  case LoongArch::BI__builtin_lasx_xvsrli_d:
++  case LoongArch::BI__builtin_lsx_vsrari_d:
++  case LoongArch::BI__builtin_lasx_xvsrari_d:
++  case LoongArch::BI__builtin_lsx_vrotri_d:
++  case LoongArch::BI__builtin_lasx_xvrotri_d:
++  case LoongArch::BI__builtin_lasx_xvsrlri_d:
++  case LoongArch::BI__builtin_lsx_vsrlri_d:
++    i = 1;
++    l = 0;
++    u = 63;
++    break;
++  case LoongArch::BI__builtin_lsx_vssrarni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrarni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrarni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrarni_wu_d:
++  case LoongArch::BI__builtin_lsx_vssrani_w_d:
++  case LoongArch::BI__builtin_lsx_vssrani_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrani_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrani_wu_d:
++  case LoongArch::BI__builtin_lsx_vsrarni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrarni_w_d:
++  case LoongArch::BI__builtin_lsx_vsrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrlni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrlni_wu_d:
++  case LoongArch::BI__builtin_lsx_vssrlrni_w_d:
++  case LoongArch::BI__builtin_lsx_vssrlrni_wu_d:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_w_d:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d:
++  case LoongArch::BI__builtin_lsx_vsrani_w_d:
++  case LoongArch::BI__builtin_lasx_xvsrani_w_d:
++    i = 2;
++    l = 0;
++    u = 63;
++    break;
++  // These intrinsics take an unsigned 7 bit immediate.
++  case LoongArch::BI__builtin_lsx_vssrarni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrarni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrarni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrarni_du_q:
++  case LoongArch::BI__builtin_lsx_vssrani_d_q:
++  case LoongArch::BI__builtin_lsx_vssrani_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrani_du_q:
++  case LoongArch::BI__builtin_lsx_vsrarni_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrarni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrlni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrlni_du_q:
++  case LoongArch::BI__builtin_lsx_vssrlrni_d_q:
++  case LoongArch::BI__builtin_lsx_vssrlrni_du_q:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_d_q:
++  case LoongArch::BI__builtin_lasx_xvssrlrni_du_q:
++  case LoongArch::BI__builtin_lsx_vsrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrani_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrlni_d_q:
++  case LoongArch::BI__builtin_lasx_xvsrlrni_d_q:
++  case LoongArch::BI__builtin_lsx_vsrlni_d_q:
++    i = 2;
++    l = 0;
++    u = 127;
++    break;
++  // These intrinsics take a signed 5 bit immediate.
++  case LoongArch::BI__builtin_lsx_vseqi_b:
++  case LoongArch::BI__builtin_lsx_vseqi_h:
++  case LoongArch::BI__builtin_lsx_vseqi_w:
++  case LoongArch::BI__builtin_lsx_vseqi_d:
++  case LoongArch::BI__builtin_lasx_xvseqi_b:
++  case LoongArch::BI__builtin_lasx_xvseqi_h:
++  case LoongArch::BI__builtin_lasx_xvseqi_w:
++  case LoongArch::BI__builtin_lasx_xvseqi_d:
++  case LoongArch::BI__builtin_lsx_vslti_b:
++  case LoongArch::BI__builtin_lsx_vslti_h:
++  case LoongArch::BI__builtin_lsx_vslti_w:
++  case LoongArch::BI__builtin_lsx_vslti_d:
++  case LoongArch::BI__builtin_lasx_xvslti_b:
++  case LoongArch::BI__builtin_lasx_xvslti_h:
++  case LoongArch::BI__builtin_lasx_xvslti_w:
++  case LoongArch::BI__builtin_lasx_xvslti_d:
++  case LoongArch::BI__builtin_lsx_vslei_b:
++  case LoongArch::BI__builtin_lsx_vslei_h:
++  case LoongArch::BI__builtin_lsx_vslei_w:
++  case LoongArch::BI__builtin_lsx_vslei_d:
++  case LoongArch::BI__builtin_lasx_xvslei_b:
++  case LoongArch::BI__builtin_lasx_xvslei_h:
++  case LoongArch::BI__builtin_lasx_xvslei_w:
++  case LoongArch::BI__builtin_lasx_xvslei_d:
++  case LoongArch::BI__builtin_lsx_vmaxi_b:
++  case LoongArch::BI__builtin_lsx_vmaxi_h:
++  case LoongArch::BI__builtin_lsx_vmaxi_w:
++  case LoongArch::BI__builtin_lsx_vmaxi_d:
++  case LoongArch::BI__builtin_lasx_xvmaxi_b:
++  case LoongArch::BI__builtin_lasx_xvmaxi_h:
++  case LoongArch::BI__builtin_lasx_xvmaxi_w:
++  case LoongArch::BI__builtin_lasx_xvmaxi_d:
++  case LoongArch::BI__builtin_lsx_vmini_b:
++  case LoongArch::BI__builtin_lsx_vmini_h:
++  case LoongArch::BI__builtin_lsx_vmini_w:
++  case LoongArch::BI__builtin_lasx_xvmini_b:
++  case LoongArch::BI__builtin_lasx_xvmini_h:
++  case LoongArch::BI__builtin_lasx_xvmini_w:
++  case LoongArch::BI__builtin_lasx_xvmini_d:
++  case LoongArch::BI__builtin_lsx_vmini_d:
++    i = 1;
++    l = -16;
++    u = 15;
++    break;
++  // These intrinsics take a signed 9 bit immediate.
++  case LoongArch::BI__builtin_lasx_xvldrepl_d:
++  case LoongArch::BI__builtin_lsx_vldrepl_d:
++    i = 1;
++    l = -256;
++    u = 255;
++    break;
++  // These intrinsics take an unsigned 8 bit immediate.
++  case LoongArch::BI__builtin_lsx_vandi_b:
++  case LoongArch::BI__builtin_lasx_xvandi_b:
++  case LoongArch::BI__builtin_lsx_vnori_b:
++  case LoongArch::BI__builtin_lasx_xvnori_b:
++  case LoongArch::BI__builtin_lsx_vori_b:
++  case LoongArch::BI__builtin_lasx_xvori_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_h:
++  case LoongArch::BI__builtin_lsx_vshuf4i_w:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_b:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_h:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_w:
++  case LoongArch::BI__builtin_lasx_xvxori_b:
++  case LoongArch::BI__builtin_lasx_xvpermi_d:
++  case LoongArch::BI__builtin_lsx_vxori_b:
++    i = 1;
++    l = 0;
++    u = 255;
++    break;
++  case LoongArch::BI__builtin_lsx_vbitseli_b:
++  case LoongArch::BI__builtin_lasx_xvbitseli_b:
++  case LoongArch::BI__builtin_lsx_vshuf4i_d:
++  case LoongArch::BI__builtin_lasx_xvshuf4i_d:
++  case LoongArch::BI__builtin_lsx_vextrins_b:
++  case LoongArch::BI__builtin_lsx_vextrins_h:
++  case LoongArch::BI__builtin_lsx_vextrins_w:
++  case LoongArch::BI__builtin_lsx_vextrins_d:
++  case LoongArch::BI__builtin_lasx_xvextrins_b:
++  case LoongArch::BI__builtin_lasx_xvextrins_h:
++  case LoongArch::BI__builtin_lasx_xvextrins_w:
++  case LoongArch::BI__builtin_lasx_xvextrins_d:
++  case LoongArch::BI__builtin_lasx_xvpermi_q:
++  case LoongArch::BI__builtin_lsx_vpermi_w:
++  case LoongArch::BI__builtin_lasx_xvpermi_w:
++    i = 2;
++    l = 0;
++    u = 255;
++    break;
++  // df/n format
++  // These intrinsics take an unsigned 4 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_b:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_bu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_b:
++  case LoongArch::BI__builtin_lsx_vreplvei_b:
++    i = 1;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_b:
++    i = 2;
++    l = 0;
++    u = 15;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_h:
++  case LoongArch::BI__builtin_lsx_vstelm_b:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 15);
++  // These intrinsics take an unsigned 3 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_h:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_hu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_h:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_w:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_wu:
++  case LoongArch::BI__builtin_lasx_xvpickve_w:
++  case LoongArch::BI__builtin_lsx_vreplvei_h:
++    i = 1;
++    l = 0;
++    u = 7;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_h:
++  case LoongArch::BI__builtin_lasx_xvinsgr2vr_w:
++  case LoongArch::BI__builtin_lasx_xvinsve0_w:
++    i = 2;
++    l = 0;
++    u = 7;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_w:
++  case LoongArch::BI__builtin_lsx_vstelm_h:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
++  // These intrinsics take an unsigned 2 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_w:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_wu:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_w:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_d:
++  case LoongArch::BI__builtin_lasx_xvpickve2gr_du:
++  case LoongArch::BI__builtin_lasx_xvpickve_d:
++  case LoongArch::BI__builtin_lsx_vreplvei_w:
++    i = 1;
++    l = 0;
++    u = 3;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_w:
++  case LoongArch::BI__builtin_lasx_xvinsve0_d:
++  case LoongArch::BI__builtin_lasx_xvinsgr2vr_d:
++    i = 2;
++    l = 0;
++    u = 3;
++    break;
++  case LoongArch::BI__builtin_lasx_xvstelm_d:
++  case LoongArch::BI__builtin_lsx_vstelm_w:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 3);
++  // These intrinsics take an unsigned 1 bit immediate.
++  case LoongArch::BI__builtin_lsx_vpickve2gr_d:
++  case LoongArch::BI__builtin_lsx_vpickve2gr_du:
++  case LoongArch::BI__builtin_lasx_xvrepl128vei_d:
++  case LoongArch::BI__builtin_lsx_vreplvei_d:
++    i = 1;
++    l = 0;
++    u = 1;
++    break;
++  case LoongArch::BI__builtin_lsx_vinsgr2vr_d:
++    i = 2;
++    l = 0;
++    u = 1;
++    break;
++  case LoongArch::BI__builtin_lsx_vstelm_d:
++    return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) ||
++           SemaBuiltinConstantArgRange(TheCall, 3, 0, 1);
++  // Memory offsets and immediate loads.
++  // These intrinsics take a signed 10 bit immediate.
++  case LoongArch::BI__builtin_lasx_xvldrepl_w:
++  case LoongArch::BI__builtin_lsx_vldrepl_w:
++    i = 1;
++    l = -512;
++    u = 511;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldrepl_h:
++  case LoongArch::BI__builtin_lsx_vldrepl_h:
++    i = 1;
++    l = -1024;
++    u = 1023;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldrepl_b:
++  case LoongArch::BI__builtin_lsx_vldrepl_b:
++    i = 1;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lasx_xvld:
++  case LoongArch::BI__builtin_lsx_vld:
++    i = 1;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lsx_vst:
++  case LoongArch::BI__builtin_lasx_xvst:
++    i = 2;
++    l = -2048;
++    u = 2047;
++    break;
++  case LoongArch::BI__builtin_lasx_xvldi:
++  case LoongArch::BI__builtin_lsx_vldi:
++    i = 0;
++    l = -4096;
++    u = 4095;
++    break;
++  // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate.
++  case LoongArch::BI__builtin_loongarch_cacop:
++  case LoongArch::BI__builtin_loongarch_dcacop:
++    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
++           SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047);
++  // These intrinsics take an unsigned 14 bit immediate.
++  case LoongArch::BI__builtin_loongarch_csrrd:
++  case LoongArch::BI__builtin_loongarch_dcsrrd: i = 0; l = 0; u = 16383; break;
++  case LoongArch::BI__builtin_loongarch_csrwr:
++  case LoongArch::BI__builtin_loongarch_dcsrwr: i = 1; l = 0; u = 16383; break;
++  case LoongArch::BI__builtin_loongarch_csrxchg:
++  case LoongArch::BI__builtin_loongarch_dcsrxchg: i = 2; l = 0; u = 16383; break;
++  // These intrinsics take an unsigned 15 bit immediate.
++  case LoongArch::BI__builtin_loongarch_dbar:
++  case LoongArch::BI__builtin_loongarch_ibar:
++  case LoongArch::BI__builtin_loongarch_syscall:
++  case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break;
++  }
++
++  if (!m)
++    return SemaBuiltinConstantArgRange(TheCall, i, l, u);
++
++  return SemaBuiltinConstantArgRange(TheCall, i, l, u) ||
++         SemaBuiltinConstantArgMultiple(TheCall, i, m);
++}
++
+ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
+                                            CallExpr *TheCall) {
+   if (BuiltinID == SystemZ::BI__builtin_tabort) {
+diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
+index 29378282..467372c7 100644
+--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
++++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
+@@ -4826,8 +4826,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
+                                      /*Complain*/DefinitionRequired)) {
+     if (DefinitionRequired)
+       Function->setInvalidDecl();
+-    else if (TSK == TSK_ExplicitInstantiationDefinition ||
+-             (Function->isConstexpr() && !Recursive)) {
++    else if (TSK == TSK_ExplicitInstantiationDefinition) {
+       // Try again at the end of the translation unit (at which point a
+       // definition will be required).
+       assert(!Recursive);
+@@ -4842,7 +4841,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
+         Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
+         if (getLangOpts().CPlusPlus11)
+           Diag(PointOfInstantiation, diag::note_inst_declaration_hint)
+-              << Function;
++            << Function;
+       }
+     }
+ 
+diff --git a/test/CodeGen/sanitize-coverage-old-pm.c b/test/CodeGen/sanitize-coverage-old-pm.c
+index 9b4f8991..18123a53 100644
+--- a/test/CodeGen/sanitize-coverage-old-pm.c
++++ b/test/CodeGen/sanitize-coverage-old-pm.c
+@@ -7,8 +7,8 @@
+ //
+ // Host armv7 is currently unsupported: https://bugs.llvm.org/show_bug.cgi?id=46117
+ // UNSUPPORTED: armv7, armv7l, thumbv7, armv8l
+-// The same issue also occurs on a riscv32 host.
+-// XFAIL: riscv32
++// The same issue also occurs on riscv32 and loongarch64 hosts.
++// XFAIL: riscv32, loongarch64
+ 
+ int x[10];
+ 
+diff --git a/test/CodeGen/ubsan-function.cpp b/test/CodeGen/ubsan-function.cpp
+index 8a16dfdf..2466d8a2 100644
+--- a/test/CodeGen/ubsan-function.cpp
++++ b/test/CodeGen/ubsan-function.cpp
+@@ -1,7 +1,6 @@
+ // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s
+ 
+-// CHECK: @[[PROXY:.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+-// CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] {
++// CHECK-LABEL: define{{.*}} void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> {
+ void fun() {}
+ 
+ // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(void ()* noundef %f)
+@@ -21,5 +20,3 @@ void fun() {}
+ // CHECK: [[LABEL3]]:
+ // CHECK: br label %[[LABEL4]], !nosanitize
+ void caller(void (*f)()) { f(); }
+-
+-// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** @[[PROXY]]}
+diff --git a/test/CodeGenCXX/catch-undef-behavior.cpp b/test/CodeGenCXX/catch-undef-behavior.cpp
+index ade29797..d6b094cb 100644
+--- a/test/CodeGenCXX/catch-undef-behavior.cpp
++++ b/test/CodeGenCXX/catch-undef-behavior.cpp
+@@ -1,8 +1,8 @@
+-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefixes=CHECK,CHECK-FUNCSAN
++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s
+ // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr,address -fsanitize-recover=vptr,address -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-ASAN
+ // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr -fsanitize-recover=vptr -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=DOWNCAST-NULL
+-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-FUNCSAN
+-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-FUNCSAN
++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-X32
++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-X86
+ 
+ struct S {
+   double d;
+@@ -16,7 +16,9 @@ struct S {
+ // Check that type mismatch handler is not modified by ASan.
+ // CHECK-ASAN: private unnamed_addr global { { [{{.*}} x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8*, i8 } { {{.*}}, { i16, i16, [4 x i8] }* [[TYPE_DESCR]], {{.*}} }
+ 
+-// CHECK-FUNCSAN: [[PROXY:@.+]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
++// CHECK: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
++// CHECK-X86: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
++// CHECK-X32: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*)
+ 
+ struct T : S {};
+ 
+@@ -397,7 +399,10 @@ void downcast_reference(B &b) {
+   // CHECK-NEXT: br i1 [[AND]]
+ }
+ 
+-// CHECK-FUNCSAN: @_Z22indirect_function_callPFviE({{.*}} !func_sanitize ![[FUNCSAN:.*]] {
++//
++// CHECK-LABEL: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** {{.*}} to i64), i64 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i64)) to i32) }>
++// CHECK-X32: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
++// CHECK-X86: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }>
+ void indirect_function_call(void (*p)(int)) {
+   // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i32 }>*
+ 
+@@ -478,34 +483,34 @@ void force_irgen() {
+ }
+ 
+ // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls1B1fEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define{{.*}} void @_ZTv0_n24_N29FunctionSanitizerVirtualCalls1B1fEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls11force_irgenEv()
+-// CHECK: !func_sanitize
++// CHECK: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1AC1Ev
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1gEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1hEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1BC1Ev
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1bEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1gEv
+-// CHECK-NOT: !func_sanitize
++// CHECK-NOT: prologue
+ //
+ // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1qEv
+-// CHECK: !func_sanitize
++// CHECK: prologue
+ 
+ }
+ 
+@@ -749,5 +754,3 @@ void ThisAlign::this_align_lambda_2() {
+ }
+ 
+ // CHECK: attributes [[NR_NUW]] = { noreturn nounwind }
+-
+-// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
+diff --git a/test/CodeGenCXX/ubsan-function-noexcept.cpp b/test/CodeGenCXX/ubsan-function-noexcept.cpp
+index 9d5eb1ed..3c0c0e8b 100644
+--- a/test/CodeGenCXX/ubsan-function-noexcept.cpp
++++ b/test/CodeGenCXX/ubsan-function-noexcept.cpp
+@@ -2,8 +2,8 @@
+ 
+ // Check that typeinfo recorded in function prolog doesn't have "Do" noexcept
+ // qualifier in its mangled name.
+-// CHECK: [[PROXY:@.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
+-// CHECK: define{{.*}} void @_Z1fv() #{{.*}} !func_sanitize ![[FUNCSAN:.*]] {
++// CHECK: @[[RTTI:[0-9]+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*)
++// CHECK: define{{.*}} void @_Z1fv() #{{.*}} prologue <{ i32, i32 }> <{ i32 {{.*}}, i32 trunc (i64 sub (i64 ptrtoint (i8** @[[RTTI]] to i64), i64 ptrtoint (void ()* @_Z1fv to i64)) to i32) }>
+ void f() noexcept {}
+ 
+ // CHECK: define{{.*}} void @_Z1gPDoFvvE
+@@ -13,5 +13,3 @@ void g(void (*p)() noexcept) {
+   // CHECK: icmp eq i8* %{{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize
+   p();
+ }
+-
+-// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
+diff --git a/test/Driver/baremetal-sysroot.cpp b/test/Driver/baremetal-sysroot.cpp
+index ae174e01..fc660207 100644
+--- a/test/Driver/baremetal-sysroot.cpp
++++ b/test/Driver/baremetal-sysroot.cpp
+@@ -10,7 +10,7 @@
+ // RUN: ln -s %clang %T/baremetal_default_sysroot/bin/clang
+ 
+ // RUN: %T/baremetal_default_sysroot/bin/clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     -target armv6m-none-eabi \
++// RUN:     -target armv6m-none-eabi --sysroot= \
+ // RUN:   | FileCheck --check-prefix=CHECK-V6M-C %s
+ // CHECK-V6M-C: "{{.*}}clang{{.*}}" "-cc1" "-triple" "thumbv6m-none-unknown-eabi"
+ // CHECK-V6M-C-SAME: "-internal-isystem" "{{.*}}/baremetal_default_sysroot{{[/\\]+}}bin{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+}}armv6m-none-eabi{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1"
+diff --git a/test/Driver/baremetal.cpp b/test/Driver/baremetal.cpp
+index 7c11fe67..56eb5b70 100644
+--- a/test/Driver/baremetal.cpp
++++ b/test/Driver/baremetal.cpp
+@@ -105,7 +105,7 @@
+ // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include"
+ 
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:      -target aarch64-none-elf \
++// RUN:      -target aarch64-none-elf --sysroot= \
+ // RUN:   | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s
+ // Verify that the bare metal driver does not include any host system paths:
+ // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]]
+diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
+index a98fc2ee..17fce198 100644
+--- a/test/Driver/fsanitize.c
++++ b/test/Driver/fsanitize.c
+@@ -666,12 +666,12 @@
+ // RUN: %clang -fno-sanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NOSP
+ // NOSP-NOT: "-fsanitize=safe-stack"
+ 
+-// RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
++// RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
+ // RUN: %clang -target x86_64-linux-gnu -fsanitize=address,safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP-ASAN
+ // RUN: %clang -target x86_64-linux-gnu -fstack-protector -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
+ // RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -fstack-protector-all -### %s 2>&1 | FileCheck %s -check-prefix=SP
+-// RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
+-// RUN: %clang -target aarch64-linux-android -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
++// RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
++// RUN: %clang -target aarch64-linux-android -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
+ // RUN: %clang -target i386-contiki-unknown -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP
+ // NO-SP-NOT: stack-protector
+ // NO-SP: "-fsanitize=safe-stack"
+@@ -915,6 +915,3 @@
+ 
+ // RUN: %clang -fsanitize=undefined,float-divide-by-zero %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DIVBYZERO-UBSAN
+ // CHECK-DIVBYZERO-UBSAN: "-fsanitize={{.*}},float-divide-by-zero,{{.*}}"
+-
+-// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL
+-// CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small'
+diff --git a/test/Driver/hexagon-toolchain-linux.c b/test/Driver/hexagon-toolchain-linux.c
+index da595903..1ef0561f 100644
+--- a/test/Driver/hexagon-toolchain-linux.c
++++ b/test/Driver/hexagon-toolchain-linux.c
+@@ -100,7 +100,7 @@
+ // -----------------------------------------------------------------------------
+ // internal-isystem for linux with and without musl
+ // -----------------------------------------------------------------------------
+-// RUN: %clang -### -target hexagon-unknown-linux-musl \
++// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \
+ // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+ // RUN:   -resource-dir=%S/Inputs/resource_dir \
+ // RUN:   %s 2>&1 \
+@@ -110,7 +110,7 @@
+ // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include"
+ // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include"
+ 
+-// RUN: %clang -### -target hexagon-unknown-linux \
++// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \
+ // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+ // RUN:   -resource-dir=%S/Inputs/resource_dir \
+ // RUN:   %s 2>&1 \
+diff --git a/test/Driver/mips-cs.cpp b/test/Driver/mips-cs.cpp
+index 39f87d8f..6ef4c5d4 100644
+--- a/test/Driver/mips-cs.cpp
++++ b/test/Driver/mips-cs.cpp
+@@ -4,7 +4,7 @@
+ //
+ // = Big-endian, hard float
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -no-pie \
++// RUN:     --target=mips-linux-gnu \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32 %s
+ // CHECK-BE-HF-32: "-internal-isystem"
+@@ -32,7 +32,7 @@
+ //
+ // = Big-endian, hard float, uclibc
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -muclibc -no-pie \
++// RUN:     --target=mips-linux-gnu -muclibc \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-HF-32 %s
+ // CHECK-BE-UC-HF-32: "-internal-isystem"
+@@ -61,7 +61,7 @@
+ //
+ // = Big-endian, hard float, mips16
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -mips16 -no-pie \
++// RUN:     --target=mips-linux-gnu -mips16 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-16 %s
+ // CHECK-BE-HF-16: "-internal-isystem"
+@@ -90,7 +90,7 @@
+ //
+ // = Big-endian, hard float, mmicromips
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -mmicromips -no-pie \
++// RUN:     --target=mips-linux-gnu -mmicromips \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-MICRO %s
+ // CHECK-BE-HF-MICRO: "-internal-isystem"
+@@ -119,7 +119,7 @@
+ //
+ // = Big-endian, hard float, nan2008
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -mnan=2008 -no-pie \
++// RUN:     --target=mips-linux-gnu -mnan=2008 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-NAN %s
+ // CHECK-BE-HF-NAN: "-internal-isystem"
+@@ -148,7 +148,7 @@
+ //
+ // = Big-endian, hard float, uclibc, nan2008
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -muclibc -mnan=2008 -no-pie \
++// RUN:     --target=mips-linux-gnu -muclibc -mnan=2008 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-HF-NAN %s
+ // CHECK-BE-UC-HF-NAN: "-internal-isystem"
+@@ -177,7 +177,7 @@
+ //
+ // = Big-endian, soft float
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -msoft-float -no-pie \
++// RUN:     --target=mips-linux-gnu -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-32 %s
+ // CHECK-BE-SF-32: "-internal-isystem"
+@@ -206,7 +206,7 @@
+ //
+ // = Big-endian, soft float, uclibc
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -muclibc -msoft-float -no-pie \
++// RUN:     --target=mips-linux-gnu -muclibc -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-UC-SF-32 %s
+ // CHECK-BE-UC-SF-32: "-internal-isystem"
+@@ -235,7 +235,7 @@
+ //
+ // = Big-endian, soft float, mips16
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -msoft-float -mips16 -no-pie \
++// RUN:     --target=mips-linux-gnu -msoft-float -mips16 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-16 %s
+ // CHECK-BE-SF-16: "-internal-isystem"
+@@ -264,7 +264,7 @@
+ //
+ // = Big-endian, soft float, micromips
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips-linux-gnu -msoft-float -mmicromips -no-pie \
++// RUN:     --target=mips-linux-gnu -msoft-float -mmicromips \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-MICRO %s
+ // CHECK-BE-SF-MICRO: "-internal-isystem"
+@@ -293,7 +293,7 @@
+ //
+ // = Big-endian, hard float, 64-bit
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips64-linux-gnu -no-pie \
++// RUN:     --target=mips64-linux-gnu \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-HF-64 %s
+ // CHECK-BE-HF-64: "-internal-isystem"
+@@ -322,7 +322,7 @@
+ //
+ // = Big-endian, soft float, 64-bit
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips64-linux-gnu -msoft-float -no-pie \
++// RUN:     --target=mips64-linux-gnu -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-BE-SF-64 %s
+ // CHECK-BE-SF-64: "-internal-isystem"
+@@ -351,7 +351,7 @@
+ //
+ // = Little-endian, hard float
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mhard-float -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mhard-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-32 %s
+ // CHECK-EL-HF-32: "-internal-isystem"
+@@ -380,7 +380,7 @@
+ //
+ // = Little-endian, hard float, uclibc
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mhard-float -muclibc -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mhard-float -muclibc \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-HF-32 %s
+ // CHECK-EL-UC-HF-32: "-internal-isystem"
+@@ -409,7 +409,7 @@
+ //
+ // = Little-endian, hard float, mips16
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mips16 -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mips16 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-16 %s
+ // CHECK-EL-HF-16: "-internal-isystem"
+@@ -438,7 +438,7 @@
+ //
+ // = Little-endian, hard float, micromips
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mmicromips -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mmicromips \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-MICRO %s
+ // CHECK-EL-HF-MICRO: "-internal-isystem"
+@@ -467,7 +467,7 @@
+ //
+ // = Little-endian, hard float, nan2008
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mnan=2008 -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mnan=2008 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-NAN %s
+ // CHECK-EL-HF-NAN: "-internal-isystem"
+@@ -496,7 +496,7 @@
+ //
+ // = Little-endian, hard float, uclibc, nan2008
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -muclibc -mnan=2008 -no-pie \
++// RUN:     --target=mipsel-linux-gnu -muclibc -mnan=2008 \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-HF-NAN %s
+ // CHECK-EL-UC-HF-NAN: "-internal-isystem"
+@@ -525,7 +525,7 @@
+ //
+ // = Little-endian, soft float
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-32 %s
+ // CHECK-EL-SF-32: "-internal-isystem"
+@@ -554,7 +554,7 @@
+ //
+ // = Little-endian, soft float, uclibc
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft -muclibc -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mfloat-abi=soft -muclibc \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-UC-SF-32 %s
+ // CHECK-EL-UC-SF-32: "-internal-isystem"
+@@ -583,7 +583,7 @@
+ //
+ // = Little-endian, soft float, mips16
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mips16 -msoft-float -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mips16 -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-16 %s
+ // CHECK-EL-SF-16: "-internal-isystem"
+@@ -612,7 +612,7 @@
+ //
+ // = Little-endian, soft float, micromips
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mipsel-linux-gnu -mmicromips -msoft-float -no-pie \
++// RUN:     --target=mipsel-linux-gnu -mmicromips -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-MICRO %s
+ // CHECK-EL-SF-MICRO: "-internal-isystem"
+@@ -641,7 +641,7 @@
+ //
+ // = Little-endian, hard float, 64-bit
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips64el-linux-gnu -no-pie \
++// RUN:     --target=mips64el-linux-gnu \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-HF-64 %s
+ // CHECK-EL-HF-64: "-internal-isystem"
+@@ -670,7 +670,7 @@
+ //
+ // = Little-endian, soft float, 64-bit
+ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+-// RUN:     --target=mips64el-linux-gnu -msoft-float -no-pie \
++// RUN:     --target=mips64el-linux-gnu -msoft-float \
+ // RUN:     -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \
+ // RUN:   | FileCheck --check-prefix=CHECK-EL-SF-64 %s
+ // CHECK-EL-SF-64: "-internal-isystem"
+diff --git a/test/Driver/stack-protector.c b/test/Driver/stack-protector.c
+index dfffe0d6..a3e40b50 100644
+--- a/test/Driver/stack-protector.c
++++ b/test/Driver/stack-protector.c
+@@ -3,11 +3,11 @@
+ // NOSSP-NOT: "-stack-protector-buffer-size" 
+ 
+ // RUN: %clang -target i386-unknown-linux -fstack-protector -### %s 2>&1 | FileCheck %s -check-prefix=SSP
+-// SSP: "-stack-protector" "2"
++// SSP: "-stack-protector" "1"
+ // SSP-NOT: "-stack-protector-buffer-size" 
+ 
+ // RUN: %clang -target i386-unknown-linux -fstack-protector --param ssp-buffer-size=16 -### %s 2>&1 | FileCheck %s -check-prefix=SSP-BUF
+-// SSP-BUF: "-stack-protector" "2"
++// SSP-BUF: "-stack-protector" "1"
+ // SSP-BUF: "-stack-protector-buffer-size" "16" 
+ 
+ // RUN: %clang -target i386-pc-openbsd -### %s 2>&1 | FileCheck %s -check-prefix=OPENBSD
+diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c
+index 46cfcd6d..c83c82d7 100644
+--- a/test/Preprocessor/init.c
++++ b/test/Preprocessor/init.c
+@@ -2603,3 +2603,33 @@
+ // RISCV64-LINUX: #define __unix__ 1
+ // RISCV64-LINUX: #define linux 1
+ // RISCV64-LINUX: #define unix 1
++
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature +d /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-HASBASICD %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature +f /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-HASBASICF %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature -d  -target-feature -f /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-SOFT %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64s /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64S %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64f /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64F,LOONGARCH64-HARD %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64d /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64D,LOONGARCH64-HARD %s
++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 /dev/null \
++// RUN:   | FileCheck -match-full-lines -check-prefix=LOONGARCH64 %s
++// LOONGARCH64: #define _LOONGARCH_ARCH "loongarch64"
++// LOONGARCH64: #define _LOONGARCH_SZINT 32
++// LOONGARCH64: #define _LOONGARCH_SZLONG 64
++// LOONGARCH64: #define _LOONGARCH_SZPTR 64
++// LOONGARCH64: #define _LOONGARCH_TUNE "la464"
++// LOONGARCH64: #define __loongarch__ 1
++// LOONGARCH64-LP64D: #define __loongarch_double_float 1
++// LOONGARCH64-HASBASICD: #define __loongarch_frlen 64
++// LOONGARCH64-HASBASICF: #define __loongarch_frlen 32
++// LOONGARCH64-SOFT: #define __loongarch_frlen 0
++// LOONGARCH64: #define __loongarch_grlen 64
++// LOONGARCH64-HARD: #define __loongarch_hard_float 1
++// LOONGARCH64: #define __loongarch_lp64 1
++// LOONGARCH64-LP64F: #define __loongarch_single_float 1
++// LOONGARCH64-LP64S: #define __loongarch_soft_float 1
+diff --git a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
+index 75928d91..3350ee3f 100644
+--- a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
++++ b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp
+@@ -104,6 +104,11 @@ extern "C" int throw_exception() {
+   if (Triple.isPPC())
+     return;
+ 
++  // FIXME: LoongArch64 fails due to `Symbols not found:
++  // [DW.ref.__gxx_personality_v0]`
++  if (Triple.isLoongArch64())
++    return;
++
+   // FIXME: ARM fails due to `Not implemented relocation type!`
+   if (Triple.isARM())
+     return;
+-- 
+2.38.1
+
diff --git a/clang14/0002-add-loong64-support.patch b/clang14/0002-add-loong64-support.patch
new file mode 100644
index 0000000000..298561e608
--- /dev/null
+++ b/clang14/0002-add-loong64-support.patch
@@ -0,0 +1,9243 @@
+From 7f62cae72a49ab95af602ef5103ba0aeee68b604 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Tue, 20 Dec 2022 18:54:24 +0800
+Subject: [PATCH 2/2] add loong64 support
+
+---
+ test/CodeGen/LoongArch/abi-lp64d.c            |  474 +++
+ .../LoongArch/inlineasm-float-double-in-gpr.c |   49 +
+ test/CodeGen/builtins-loongarch-base.c        |  417 ++
+ test/CodeGen/builtins-loongarch-lasx-error.c  |  266 ++
+ test/CodeGen/builtins-loongarch-lasx.c        | 3761 +++++++++++++++++
+ test/CodeGen/builtins-loongarch-lsx-error.c   |  250 ++
+ test/CodeGen/builtins-loongarch-lsx.c         | 3630 ++++++++++++++++
+ test/CodeGen/loongarch-inline-asm-modifiers.c |   50 +
+ test/CodeGen/loongarch-inline-asm.c           |   31 +
+ .../LoongArch/abi-lp64d-struct-inherit.cpp    |   95 +
+ test/Driver/loongarch-abi-fpu.c               |   26 +
+ test/Driver/loongarch-alignment-feature.c     |    8 +
+ test/Driver/loongarch-double-single-soft.c    |   12 +
+ test/Driver/loongarch-mabi.c                  |   22 +
+ test/Driver/loongarch-mfpu.c                  |   21 +
+ 15 files changed, 9112 insertions(+)
+ create mode 100644 test/CodeGen/LoongArch/abi-lp64d.c
+ create mode 100644 test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c
+ create mode 100644 test/CodeGen/builtins-loongarch-base.c
+ create mode 100644 test/CodeGen/builtins-loongarch-lasx-error.c
+ create mode 100644 test/CodeGen/builtins-loongarch-lasx.c
+ create mode 100644 test/CodeGen/builtins-loongarch-lsx-error.c
+ create mode 100644 test/CodeGen/builtins-loongarch-lsx.c
+ create mode 100644 test/CodeGen/loongarch-inline-asm-modifiers.c
+ create mode 100644 test/CodeGen/loongarch-inline-asm.c
+ create mode 100644 test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp
+ create mode 100644 test/Driver/loongarch-abi-fpu.c
+ create mode 100644 test/Driver/loongarch-alignment-feature.c
+ create mode 100644 test/Driver/loongarch-double-single-soft.c
+ create mode 100644 test/Driver/loongarch-mabi.c
+ create mode 100644 test/Driver/loongarch-mfpu.c
+
+diff --git a/test/CodeGen/LoongArch/abi-lp64d.c b/test/CodeGen/LoongArch/abi-lp64d.c
+new file mode 100644
+index 00000000..80435701
+--- /dev/null
++++ b/test/CodeGen/LoongArch/abi-lp64d.c
+@@ -0,0 +1,474 @@
++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \
++// RUN: -emit-llvm %s -o - | FileCheck %s
++
++/// This test checks the calling convention of the lp64d ABI.
++
++#include <stddef.h>
++#include <stdint.h>
++
++/// Part 0: C Data Types and Alignment.
++
++/// `char` datatype is signed by default.
++/// In most cases, the unsigned integer data types are zero-extended when stored
++/// in general-purpose register, and the signed integer data types are
++/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as
++/// unsigned int, are stored in general-purpose registers as proper sign
++/// extensions of their 32-bit values.
++
++// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool()
++_Bool check_bool() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i8 @check_char()
++char check_char() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i16 @check_short()
++short check_short() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i32 @check_int()
++int check_int() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_long()
++long check_long() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_longlong()
++long long check_longlong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar()
++unsigned char check_uchar() { return 0; }
++
++// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort()
++unsigned short check_ushort() { return 0; }
++
++// CHECK-LABEL: define{{.*}} signext i32 @check_uint()
++unsigned int check_uint() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_ulong()
++unsigned long check_ulong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
++unsigned long long check_ulonglong() { return 0; }
++
++// CHECK-LABEL: define{{.*}} float @check_float()
++float check_float() { return 0; }
++
++// CHECK-LABEL: define{{.*}} double @check_double()
++double check_double() { return 0; }
++
++// CHECK-LABEL: define{{.*}} fp128 @check_longdouble()
++long double check_longdouble() { return 0; }
++
++/// Part 1: Scalar arguments and return value.
++
++/// The lp64d abi says:
++/// 1. 1 < WOA <= GRLEN
++/// a. Argument is passed in a single argument register, or on the stack by
++/// value if none is available.
++/// i. If the argument is floating-point type, the argument is passed in FAR. if
++/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s
++/// passed on the stack. When passed in registers or on the stack,
++/// floating-point types narrower than GRLEN bits are widened to GRLEN bits,
++/// with the upper bits undefined.
++/// ii. If the argument is integer or pointer type, the argument is passed in
++/// GAR. If no GAR is available, it’s passed on the stack. When passed in
++/// registers or on the stack, the unsigned integer scalars narrower than GRLEN
++/// bits are zero-extended to GRLEN bits, and the signed integer scalars are
++/// sign-extended.
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in
++/// the lower-numbered register and the high-order GRLEN bits in the
++/// higher-numbered register. If exactly one register is available, the
++/// low-order GRLEN bits are passed in the register and the high-order GRLEN
++/// bits are passed on the stack. If no GAR is available, it’s passed on the
++/// stack.
++
++/// Note that most of these conventions are handled at the llvm side, so here we
++/// only check the correctness of argument (or return value)'s sign/zero
++/// extension attribute.
++
++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p)
++int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f,
++             uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i,
++             int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n,
++             uint32_t o, int64_t p) {
++  return 0;
++}
++
++/// Part 2: Structure arguments and return value.
++
++/// The lp64d abi says:
++/// Empty structures are ignored by C compilers which support them as a
++/// non-standard extension(same as union arguments and return values). Bits
++/// unused due to padding, and bits past the end of a structure whose size in
++/// bits is not divisible by GRLEN, are undefined. And the layout of the
++/// structure on the stack is consistent with that in memory.
++
++/// Check empty structs are ignored.
++
++struct empty_s {};
++
++// CHECK-LABEL: define{{.*}} void @f_empty_s()
++struct empty_s f_empty_s(struct empty_s x) {
++  return x;
++}
++
++/// 1. 0 < WOA ≤ GRLEN
++/// a. The structure has only fixed-point members. If there is an available GAR,
++/// the structure is passed through the GAR by value passing; If no GAR is
++/// available, it’s passed on the stack.
++
++struct i16x4_s {
++  int16_t a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce)
++struct i16x4_s f_i16x4_s(struct i16x4_s x) {
++  return x;
++}
++
++/// b. The structure has only floating-point members:
++/// i. One floating-point member. The argument is passed in a FAR; If no FAR is
++/// available, the value is passed in a GAR; if no GAR is available, the value
++/// is passed on the stack.
++
++struct f32x1_s {
++  float a;
++};
++
++struct f64x1_s {
++  double a;
++};
++
++// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
++struct f32x1_s f_f32x1_s(struct f32x1_s x) {
++  return x;
++}
++
++// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0)
++struct f64x1_s f_f64x1_s(struct f64x1_s x) {
++  return x;
++}
++
++/// ii. Two floating-point members. The argument is passed in a pair of
++/// available FAR, with the low-order float member bits in the lower-numbered
++/// FAR and the high-order float member bits in the higher-numbered FAR. If the
++/// number of available FAR is less than 2, it’s passed in a GAR, and passed on
++/// the stack if no GAR is available.
++
++struct f32x2_s {
++  float a, b;
++};
++
++// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
++struct f32x2_s f_f32x2_s(struct f32x2_s x) {
++  return x;
++}
++
++/// c. The structure has both fixed-point and floating-point members, i.e. the
++/// structure has one float member and...
++/// i. Multiple fixed-point members. If there are available GAR, the structure
++/// is passed in a GAR, and passed on the stack if no GAR is available.
++
++struct f32x1_i16x2_s {
++  float a;
++  int16_t b, c;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
++struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
++  return x;
++}
++
++/// ii. Only one fixed-point member. If one FAR and one GAR are available, the
++/// floating-point member of the structure is passed in the FAR, and the integer
++/// member of the structure is passed in the GAR; If no floating-point register
++/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
++/// passed on the stack.
++
++struct f32x1_i32x1_s {
++  float a;
++  int32_t b;
++};
++
++// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
++struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
++  return x;
++}
++
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. Only fixed-point members.
++/// i. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack, and passed on the stack if
++/// no GAR is available.
++
++struct i64x2_s {
++  int64_t a, b;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce)
++struct i64x2_s f_i64x2_s(struct i64x2_s x) {
++  return x;
++}
++
++/// b. Only floating-point members.
++/// i. The structure has one long double member or one double member and two
++/// adjacent float members or 3-4 float members. The argument is passed in a
++/// pair of available GAR, with the low-order bits in the lower-numbered GAR and
++/// the high-order bits in the higher-numbered GAR. If only one GAR is
++/// available, the low-order bits are in the GAR and the high-order bits are on
++/// the stack, and passed on the stack if no GAR is available.
++
++struct f128x1_s {
++  long double a;
++};
++
++// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce)
++struct f128x1_s f_f128x1_s(struct f128x1_s x) {
++  return x;
++}
++
++struct f64x1_f32x2_s {
++  double a;
++  float b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce)
++struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) {
++  return x;
++}
++
++struct f32x3_s {
++  float a, b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce)
++struct f32x3_s f_f32x3_s(struct f32x3_s x) {
++  return x;
++}
++
++struct f32x4_s {
++  float a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce)
++struct f32x4_s f_f32x4_s(struct f32x4_s x) {
++  return x;
++}
++
++/// ii. The structure with two double members is passed in a pair of available
++/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
++/// one double member and one float member is same.
++
++struct f64x2_s {
++  double a, b;
++};
++
++// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1)
++struct f64x2_s f_f64x2_s(struct f64x2_s x) {
++  return x;
++}
++
++/// c. Both fixed-point and floating-point members.
++/// i. The structure has one double member and only one fixed-point member.
++/// A. If one FAR and one GAR are available, the floating-point member of the
++/// structure is passed in the FAR, and the integer member of the structure is
++/// passed in the GAR; If no floating-point registers but two GARs are
++/// available, it’s passed in the two GARs; If only one GAR is available, the
++/// low-order bits are in the GAR and the high-order bits are on the stack; And
++/// it’s passed on the stack if no GAR is available.
++
++struct f64x1_i64x1_s {
++  double a;
++  int64_t b;
++};
++
++// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1)
++struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) {
++  return x;
++}
++
++/// ii. Others
++/// A. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack, and passed on the stack if
++/// no GAR is available.
++
++struct f64x1_i32x2_s {
++  double a;
++  int32_t b, c;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce)
++struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) {
++  return x;
++}
++
++struct f32x2_i32x2_s {
++  float a, b;
++  int32_t c, d;
++};
++
++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce)
++struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
++  return x;
++}
++
++/// 3. WOA > 2 × GRLEN
++/// a. It’s passed by reference and are replaced in the argument list with the
++/// address. If there is an available GAR, the reference is passed in the GAR,
++/// and passed on the stack if no GAR is available.
++
++struct i64x4_s {
++  int64_t a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(%struct.i64x4_s*{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, %struct.i64x4_s*{{.*}} %x)
++struct i64x4_s f_i64x4_s(struct i64x4_s x) {
++  return x;
++}
++
++struct f64x4_s {
++  double a, b, c, d;
++};
++
++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(%struct.f64x4_s*{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, %struct.f64x4_s*{{.*}} %x)
++struct f64x4_s f_f64x4_s(struct f64x4_s x) {
++  return x;
++}
++
++/// Part 3: Union arguments and return value.
++
++/// Check empty unions are ignored.
++
++union empty_u {};
++
++// CHECK-LABEL: define{{.*}} void @f_empty_u()
++union empty_u f_empty_u(union empty_u x) {
++  return x;
++}
++
++/// Union is passed in GAR or stack.
++/// 1. 0 < WOA ≤ GRLEN
++/// a. The argument is passed in a GAR, or on the stack by value if no GAR is
++/// available.
++
++union i32_f32_u {
++  int32_t a;
++  float b;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce)
++union i32_f32_u f_i32_f32_u(union i32_f32_u x) {
++  return x;
++}
++
++union i64_f64_u {
++  int64_t a;
++  double b;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce)
++union i64_f64_u f_i64_f64_u(union i64_f64_u x) {
++  return x;
++}
++
++/// 2. GRLEN < WOA ≤ 2 × GRLEN
++/// a. The argument is passed in a pair of available GAR, with the low-order
++/// bits in the lower-numbered GAR and the high-order bits in the
++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in
++/// the GAR and the high-order bits are on the stack. The arguments are passed
++/// on the stack when no GAR is available.
++
++union i128_f128_u {
++  __int128_t a;
++  long double b;
++};
++
++// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce)
++union i128_f128_u f_i128_f128_u(union i128_f128_u x) {
++  return x;
++}
++
++/// 3. WOA > 2 × GRLEN
++/// a. It’s passed by reference and are replaced in the argument list with the
++/// address. If there is an available GAR, the reference is passed in the GAR,
++/// and passed on the stack if no GAR is available.
++
++union i64_arr3_u {
++  int64_t a[3];
++};
++
++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(%union.i64_arr3_u*{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, %union.i64_arr3_u*{{.*}} %x)
++union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) {
++  return x;
++}
++
++/// Part 4: Complex number arguments and return value.
++
++/// A complex floating-point number, or a structure containing just one complex
++/// floating-point number, is passed as though it were a structure containing
++/// two floating-point reals.
++
++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1)
++float __complex__ f_floatcomplex(float __complex__ x) { return x; }
++
++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1)
++double __complex__ f_doublecomplex(double __complex__ x) { return x; }
++
++struct floatcomplex_s {
++  float __complex__ c;
++};
++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1)
++struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) {
++  return x;
++}
++
++struct doublecomplex_s {
++  double __complex__ c;
++};
++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1)
++struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) {
++  return x;
++}
++
++/// Part 5: Variadic arguments.
++
++/// Variadic arguments are passed in GARs in the same manner as named arguments.
++
++int f_va_callee(int, ...);
++
++// CHECK-LABEL: define{{.*}} void @f_va_caller()
++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}})
++void f_va_caller(void) {
++  f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9},
++              (struct i64x2_s){10, 11});
++}
++
++// CHECK-LABE: define signext i32 @f_va_int(i8* %fmt, ...)
++// CHECK: entry:
++// CHECK:   %fmt.addr = alloca i8*, align 8
++// CHECK:   %va = alloca i8*, align 8
++// CHECK:   %v = alloca i32, align 4
++// CHECK:   store i8* %fmt, i8** %fmt.addr, align 8
++// CHECK:   %va1 = bitcast i8** %va to i8*
++// CHECK:   call void @llvm.va_start(i8* %va1)
++// CHECK:   %argp.cur = load i8*, i8** %va, align 8
++// CHECK:   %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 8
++// CHECK:   store i8* %argp.next, i8** %va, align 8
++// CHECK:   %0 = bitcast i8* %argp.cur to i32*
++// CHECK:   %1 = load i32, i32* %0, align 8
++// CHECK:   store i32 %1, i32* %v, align 4
++// CHECK:   %va2 = bitcast i8** %va to i8*
++// CHECK:   call void @llvm.va_end(i8* %va2)
++// CHECK:   %2 = load i32, i32* %v, align 4
++// CHECK:   ret i32 %2
++// CHECK: }
++int f_va_int(char *fmt, ...) {
++  __builtin_va_list va;
++  __builtin_va_start(va, fmt);
++  int v = __builtin_va_arg(va, int);
++  __builtin_va_end(va);
++  return v;
++}
+diff --git a/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c
+new file mode 100644
+index 00000000..bc9c616b
+--- /dev/null
++++ b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c
+@@ -0,0 +1,49 @@
++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \
++// RUN:   | FileCheck %s
++
++float f;
++double d;
++
++// CHECK-LABEL: @reg_float(
++// CHECK: [[FLT_ARG:%.*]] = load float, float* @f
++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]])
++// CHECK: ret void
++void reg_float() {
++  float a = f;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @r4_float(
++// CHECK: [[FLT_ARG:%.*]] = load float, float* @f
++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]])
++// CHECK: ret void
++void r4_float() {
++  register float a asm("$r4") = f;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @reg_double(
++// CHECK: [[DBL_ARG:%.*]] = load double, double* @d
++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]])
++// CHECK: ret void
++void reg_double() {
++  double a = d;
++  asm volatile(""
++               :
++               : "r"(a));
++}
++
++// CHECK-LABEL: @r4_double(
++// CHECK: [[DBL_ARG:%.*]] = load double, double* @d
++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]])
++// CHECK: ret void
++void r4_double() {
++  register double a asm("$r4") = d;
++  asm volatile(""
++               :
++               : "r"(a));
++}
+diff --git a/test/CodeGen/builtins-loongarch-base.c b/test/CodeGen/builtins-loongarch-base.c
+new file mode 100644
+index 00000000..d7221359
+--- /dev/null
++++ b/test/CodeGen/builtins-loongarch-base.c
+@@ -0,0 +1,417 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s
++
++#include <larchintrin.h>
++
++typedef char i8;
++typedef unsigned char u8;
++typedef short i16;
++typedef unsigned short u16;
++typedef int i32;
++typedef unsigned int u32;
++
++#if __LONG_MAX__ == __LONG_LONG_MAX__
++typedef long int i64;
++typedef unsigned long int u64;
++#else
++typedef long long i64;
++typedef unsigned long long u64;
++#endif
++
++__drdtime_t drdtime;
++__rdtime_t rdtime;
++
++void cpucfg(){
++
++  u32 u32_r, u32_a;
++  // __cpucfg
++  // rd, rj
++  // unsigned int, unsigned int
++  u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg
++
++}
++
++void csrrd(){
++
++  u32 u32_r;
++  // __csrrd
++  // rd, csr_num
++  // unsigned int, uimm14_32
++  u32_r=__builtin_loongarch_csrrd(1); // CHECK: call i32 @llvm.loongarch.csrrd
++
++}
++
++void dcsrrd(){
++
++  u64 u64_r;
++  // __dcsrrd
++  // rd, csr_num
++  // unsigned long int, uimm14
++  u64_r=__builtin_loongarch_dcsrrd(1); // CHECK: call i64 @llvm.loongarch.dcsrrd
++
++}
++
++void csrwr(){
++
++  u32 u32_r, u32_a;
++  // __csrwr
++  // rd, csr_num
++  // unsigned int, uimm14_32
++  u32_r=__builtin_loongarch_csrwr(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr
++
++}
++
++void dcsrwr(){
++
++  u64 u64_r, u64_a;
++  // __dcsrwr
++  // rd, csr_num
++  // unsigned long int, uimm14
++  u64_r=__builtin_loongarch_dcsrwr(u64_a, 1); // CHECK: call i64 @llvm.loongarch.dcsrwr
++
++}
++
++void csrxchg(){
++
++  u32 u32_r, u32_a, u32_b;
++  // __csrxchg
++  // rd, rj, csr_num
++  // unsigned int, unsigned int, uimm14_32
++  u32_r=__builtin_loongarch_csrxchg(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg
++
++}
++
++void dcsrxchg(){
++
++  u64 u64_r, u64_a, u64_b;
++  // __dcsrxchg
++  // rd, rj, csr_num
++  // unsigned long int, unsigned long int, uimm14
++  u64_r=__builtin_loongarch_dcsrxchg(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.dcsrxchg
++
++}
++
++void iocsrrd_b(){
++
++  u32 u32_a;
++  u8 u8_r;
++  // __iocsrrd_b
++  // rd, rj
++  // unsigned char, unsigned int
++  u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b
++
++}
++
++void iocsrrd_h(){
++
++  u32 u32_a;
++  u16 u16_r;
++  // __iocsrrd_h
++  // rd, rj
++  // unsigned short, unsigned int
++  u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h
++
++}
++
++void iocsrrd_w(){
++
++  u32 u32_r, u32_a;
++  // __iocsrrd_w
++  // rd, rj
++  // unsigned int, unsigned int
++  u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w
++
++}
++
++void iocsrrd_d(){
++
++  u32 u32_a;
++  u64 u64_r;
++  // __iocsrrd_d
++  // rd, rj
++  // unsigned long int, unsigned int
++  u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d
++
++}
++
++void iocsrwr_b(){
++
++  u32 u32_a;
++  u8 u8_a;
++  // __iocsrwr_b
++  // rd, rj
++  // unsigned char, unsigned int
++  __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b
++
++}
++
++void iocsrwr_h(){
++
++  u32 u32_a;
++  u16 u16_a;
++  // __iocsrwr_h
++  // rd, rj
++  // unsigned short, unsigned int
++  __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h
++
++}
++
++void iocsrwr_w(){
++
++  u32 u32_a, u32_b;
++  // __iocsrwr_w
++  // rd, rj
++  // unsigned int, unsigned int
++  __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w
++
++}
++
++void iocsrwr_d(){
++
++  u32 u32_a;
++  u64 u64_a;
++  // __iocsrwr_d
++  // rd, rj
++  // unsigned long int, unsigned int
++  __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d
++
++}
++
++void cacop(){
++
++  i32 i32_a;
++  // __cacop
++  // op, rj, si12
++  // uimm5, unsigned int, simm12
++  __builtin_loongarch_cacop(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop
++
++}
++
++void dcacop(){
++
++  i64 i64_a;
++  // __dcacop
++  // op, rj, si12
++  // uimm5, unsigned long int, simm12
++  __builtin_loongarch_dcacop(1, i64_a, 2); // CHECK: void @llvm.loongarch.dcacop
++
++}
++
++void rdtime_d(){
++
++  drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void rdtimeh_w(){
++
++  rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void rdtimel_w(){
++
++  rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"()
++
++}
++
++void crc_w_b_w(){
++
++  i32 i32_r, i32_a;
++  i8 i8_a;
++  // __crc_w_b_w
++  // rd, rj, rk
++  // int, char, int
++  i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w
++
++}
++
++void crc_w_h_w(){
++
++  i32 i32_r, i32_a;
++  i16 i16_a;
++  // __crc_w_h_w
++  // rd, rj, rk
++  // int, short, int
++  i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w
++
++}
++
++void crc_w_w_w(){
++
++  i32 i32_r, i32_a, i32_b;
++  // __crc_w_w_w
++  // rd, rj, rk
++  // int, int, int
++  i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w
++
++}
++
++void crc_w_d_w(){
++
++  i32 i32_r, i32_a;
++  i64 i64_a;
++  // __crc_w_d_w
++  // rd, rj, rk
++  // int, long int, int
++  i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w
++
++}
++
++void crcc_w_b_w(){
++
++  i32 i32_r, i32_a;
++  i8 i8_a;
++  // __crcc_w_b_w
++  // rd, rj, rk
++  // int, char, int
++  i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w
++
++}
++
++void crcc_w_h_w(){
++
++  i32 i32_r, i32_a;
++  i16 i16_a;
++  // __crcc_w_h_w
++  // rd, rj, rk
++  // int, short, int
++  i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w
++
++}
++
++void crcc_w_w_w(){
++
++  i32 i32_r, i32_a, i32_b;
++  // __crcc_w_w_w
++  // rd, rj, rk
++  // int, int, int
++  i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w
++
++}
++
++void crcc_w_d_w(){
++
++  i32 i32_r, i32_a;
++  i64 i64_a;
++  // __crcc_w_d_w
++  // rd, rj, rk
++  // int, long int, int
++  i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w
++
++}
++
++void tlbclr(){
++
++  // __tlbclr
++  __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr
++
++}
++
++void tlbflush(){
++
++  // __tlbflush
++  __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush
++
++}
++
++void tlbfill(){
++
++  // __tlbfill
++  __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill 
++
++}
++
++void tlbrd(){
++
++  // __tlbrd
++  __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd
++
++}
++
++void tlbwr(){
++
++  // __tlbwr
++  __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr
++
++}
++
++void tlbsrch(){
++
++  // __tlbsrch
++  __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch
++
++}
++
++void syscall(){
++
++  // __syscall
++  // Code
++  // uimm15
++  __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall
++
++}
++
++void break_builtin(){
++
++  // __break
++  // Code
++  // uimm15
++  __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break
++
++}
++
++void asrtle_d(){
++
++  i64 i64_a, i64_b;
++  // __asrtle_d
++  // rj, rk
++  // long int, long int
++  __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d
++
++}
++
++void asrtgt_d(){
++
++  i64 i64_a, i64_b;
++  // __asrtgt_d
++  // rj, rk
++  // long int, long int
++  __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d
++
++}
++
++void dbar(){
++
++  // __dbar
++  // hint
++  // uimm15
++  __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar
++
++}
++
++void ibar(){
++
++  // __ibar
++  // hint
++  // uimm15
++  __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar
++
++}
++
++void movfcsr2gr(){
++
++  u32 u32_r;
++  // __movfcsr2gr
++  u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"()
++
++}
++
++
++void movgr2fcsr() {
++
++  u32 u32_a;
++  // __movgr2fcsr
++  __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0)
++
++}
+diff --git a/test/CodeGen/builtins-loongarch-lasx-error.c b/test/CodeGen/builtins-loongarch-lasx-error.c
+new file mode 100644
+index 00000000..99f2687e
+--- /dev/null
++++ b/test/CodeGen/builtins-loongarch-lasx-error.c
+@@ -0,0 +1,266 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \
++// RUN:            -target-feature +lasx \
++// RUN:            -verify -o - 2>&1
++
++#include <lasxintrin.h>
++
++void test() {
++  v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32i8 v32i8_r;
++
++  v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i16 v16i16_r;
++
++  v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i32 v8i32_r;
++
++  v4i64 v4i64_a = (v4i64){0, 1, 2, 3};
++  v4i64 v4i64_b = (v4i64){1, 2, 3, 4};
++  v4i64 v4i64_c = (v4i64){2, 3, 4, 5};
++  v4i64 v4i64_r;
++
++  v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32u8 v32u8_r;
++
++  v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u16 v16u16_r;
++
++  v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u32 v8u32_r;
++
++  v4u64 v4u64_a = (v4u64){0, 1, 2, 3};
++  v4u64 v4u64_b = (v4u64){1, 2, 3, 4};
++  v4u64 v4u64_c = (v4u64){2, 3, 4, 5};
++  v4u64 v4u64_r;
++
++  v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7};
++  v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8};
++  v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9};
++  v8f32 v8f32_r;
++  v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3};
++  v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4};
++  v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5};
++  v4f64 v4f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  v32i8_r = __lasx_xvslli_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvslli_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvslli_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslli_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrai_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrai_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrai_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrai_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrari_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrari_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrari_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrari_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrli_b(v32i8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrli_h(v16i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrli_w(v8i32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrli_d(v4i64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvaddi_du(v4i64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsubi_du(v4i64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvmini_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvmini_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvmini_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvmini_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32u8_r = __lasx_xvmini_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvmini_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvmini_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvmini_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvseqi_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvseqi_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvseqi_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvseqi_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslti_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvslti_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvslti_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvslti_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslti_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvslti_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvslti_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslti_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvslei_b(v32i8_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i16_r = __lasx_xvslei_h(v16i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i32_r = __lasx_xvslei_w(v8i32_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i64_r = __lasx_xvslei_d(v4i64_a, -17);                  // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v32i8_r = __lasx_xvslei_bu(v32u8_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvslei_hu(v16u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvslei_wu(v8u32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvslei_du(v4u64_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvsat_b(v32i8_a, 8);                     // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvsat_h(v16i16_a, 16);                  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvsat_w(v8i32_a, 32);                    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvsat_d(v4i64_a, 64);                    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32u8_r = __lasx_xvsat_bu(v32u8_a, 8);                    // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16u16_r = __lasx_xvsat_hu(v16u16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u32_r = __lasx_xvsat_wu(v8u32_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u64_r = __lasx_xvsat_du(v4u64_a, 64);                   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8);            // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v32u8_r = __lasx_xvandi_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvori_b(v32u8_a, 256);                   // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvnori_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvxori_b(v32u8_a, 256);                  // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256);        // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32);               // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8);             // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16);            // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32);             // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32);        // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256);       // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvbsll_v(v32i8_a, 32);                   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256);   // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvld(&v32i8_a, -2049);                   // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lasx_xvst(v32i8_a, &v32i8_b, -2049);                    // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32);               // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8);                // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4);                // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8);         // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4);         // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v8i32_r = __lasx_xvpickve_w(v8i32_b, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvpickve_d(v4i64_b, 4);                  // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v4i64_r = __lasx_xvldi(-4097);                            // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}}
++  v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8);         // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4);         // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256);        // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i64_r = __lasx_xvpermi_d(v4i64_a, 256);                 // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049);             // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025);           // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}}
++  v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513);              // expected-error {{argument value -513 is outside the valid range [-512, 511]}}
++  v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257);              // expected-error {{argument value -257 is outside the valid range [-256, 255]}}
++  i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8);                // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4);                 // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4);                // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v32i8_r = __lasx_xvrotri_b(v32i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v16i16_r = __lasx_xvrotri_h(v16i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i32_r = __lasx_xvrotri_w(v8i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i64_r = __lasx_xvrotri_d(v4i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16);       // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64);       // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128);      // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16);       // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64);       // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128);      // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64);      // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128);     // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++}
+diff --git a/test/CodeGen/builtins-loongarch-lasx.c b/test/CodeGen/builtins-loongarch-lasx.c
+new file mode 100644
+index 00000000..b9ec3a3c
+--- /dev/null
++++ b/test/CodeGen/builtins-loongarch-lasx.c
+@@ -0,0 +1,3761 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \
++// RUN:            -target-feature +lasx \
++// RUN:            -target-feature +d \
++// RUN:            -o - | FileCheck %s
++
++#include <lasxintrin.h>
++
++#define ui1_b 1
++#define ui2 1
++#define ui2_b ui2
++#define ui3 4
++#define ui3_b ui3
++#define ui4 7
++#define ui4_b ui4
++#define ui5 25
++#define ui5_b ui5
++#define ui6 44
++#define ui6_b ui6
++#define ui7 100
++#define ui7_b ui7
++#define ui8 127 //200
++#define ui8_b ui8
++#define si5_b -4
++#define si8 -100
++#define si9 0
++#define si10 0
++#define si11 0
++#define si12 0
++#define i10 500
++#define i13 4000
++#define mode 0
++#define idx1 1
++#define idx2 2
++#define idx3 4
++#define idx4 8
++
++void test(void) {
++  v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32i8 v32i8_r;
++
++  v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i16 v16i16_r;
++
++  v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i32 v8i32_r;
++
++  v4i64 v4i64_a = (v4i64){0, 1, 2, 3};
++  v4i64 v4i64_b = (v4i64){1, 2, 3, 4};
++  v4i64 v4i64_c = (v4i64){2, 3, 4, 5};
++  v4i64 v4i64_r;
++
++  v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
++  v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
++                          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
++  v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
++                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
++  v32u8 v32u8_r;
++
++  v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u16 v16u16_r;
++
++  v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u32 v8u32_r;
++
++  v4u64 v4u64_a = (v4u64){0, 1, 2, 3};
++  v4u64 v4u64_b = (v4u64){1, 2, 3, 4};
++  v4u64 v4u64_c = (v4u64){2, 3, 4, 5};
++  v4u64 v4u64_r;
++
++  v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7};
++  v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8};
++  v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9};
++  v8f32 v8f32_r;
++  v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3};
++  v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4};
++  v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5};
++  v4f64 v4f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  long int i64_d = 0;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  // __lasx_xvsll_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b(
++
++  // __lasx_xvsll_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h(
++
++  // __lasx_xvsll_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w(
++
++  // __lasx_xvsll_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d(
++
++  // __lasx_xvslli_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b(
++
++  // __lasx_xvslli_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h(
++
++  // __lasx_xvslli_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w(
++
++  // __lasx_xvslli_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d(
++
++  // __lasx_xvsra_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b(
++
++  // __lasx_xvsra_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h(
++
++  // __lasx_xvsra_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w(
++
++  // __lasx_xvsra_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d(
++
++  // __lasx_xvsrai_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(
++
++  // __lasx_xvsrai_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(
++
++  // __lasx_xvsrai_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(
++
++  // __lasx_xvsrai_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(
++
++  // __lasx_xvsrar_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(
++
++  // __lasx_xvsrar_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(
++
++  // __lasx_xvsrar_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(
++
++  // __lasx_xvsrar_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(
++
++  // __lasx_xvsrari_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(
++
++  // __lasx_xvsrari_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(
++
++  // __lasx_xvsrari_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(
++
++  // __lasx_xvsrari_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(
++
++  // __lasx_xvsrl_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(
++
++  // __lasx_xvsrl_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(
++
++  // __lasx_xvsrl_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(
++
++  // __lasx_xvsrl_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(
++
++  // __lasx_xvsrli_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(
++
++  // __lasx_xvsrli_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(
++
++  // __lasx_xvsrli_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(
++
++  // __lasx_xvsrli_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(
++
++  // __lasx_xvsrlr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(
++
++  // __lasx_xvsrlr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(
++
++  // __lasx_xvsrlr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(
++
++  // __lasx_xvsrlr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(
++
++  // __lasx_xvsrlri_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(
++
++  // __lasx_xvsrlri_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(
++
++  // __lasx_xvsrlri_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(
++
++  // __lasx_xvsrlri_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(
++
++  // __lasx_xvbitclr_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(
++
++  // __lasx_xvbitclr_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(
++
++  // __lasx_xvbitclr_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(
++
++  // __lasx_xvbitclr_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(
++
++  // __lasx_xvbitclri_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(
++
++  // __lasx_xvbitclri_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(
++
++  // __lasx_xvbitclri_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(
++
++  // __lasx_xvbitclri_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(
++
++  // __lasx_xvbitset_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(
++
++  // __lasx_xvbitset_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(
++
++  // __lasx_xvbitset_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(
++
++  // __lasx_xvbitset_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(
++
++  // __lasx_xvbitseti_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(
++
++  // __lasx_xvbitseti_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(
++
++  // __lasx_xvbitseti_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(
++
++  // __lasx_xvbitseti_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(
++
++  // __lasx_xvbitrev_b
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(
++
++  // __lasx_xvbitrev_h
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(
++
++  // __lasx_xvbitrev_w
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(
++
++  // __lasx_xvbitrev_d
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(
++
++  // __lasx_xvbitrevi_b
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(
++
++  // __lasx_xvbitrevi_h
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(
++
++  // __lasx_xvbitrevi_w
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(
++
++  // __lasx_xvbitrevi_d
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(
++
++  // __lasx_xvadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b(
++
++  // __lasx_xvadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h(
++
++  // __lasx_xvadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w(
++
++  // __lasx_xvadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d(
++
++  // __lasx_xvaddi_bu
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(
++
++  // __lasx_xvaddi_hu
++  // xd, xj, ui5
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(
++
++  // __lasx_xvaddi_wu
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(
++
++  // __lasx_xvaddi_du
++  // xd, xj, ui5
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(
++
++  // __lasx_xvsub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b(
++
++  // __lasx_xvsub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h(
++
++  // __lasx_xvsub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w(
++
++  // __lasx_xvsub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d(
++
++  // __lasx_xvsubi_bu
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(
++
++  // __lasx_xvsubi_hu
++  // xd, xj, ui5
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(
++
++  // __lasx_xvsubi_wu
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(
++
++  // __lasx_xvsubi_du
++  // xd, xj, ui5
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(
++
++  // __lasx_xvmax_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b(
++
++  // __lasx_xvmax_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h(
++
++  // __lasx_xvmax_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w(
++
++  // __lasx_xvmax_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d(
++
++  // __lasx_xvmaxi_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(
++
++  // __lasx_xvmaxi_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(
++
++  // __lasx_xvmaxi_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(
++
++  // __lasx_xvmaxi_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(
++
++  // __lasx_xvmax_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(
++
++  // __lasx_xvmax_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(
++
++  // __lasx_xvmax_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(
++
++  // __lasx_xvmax_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du(
++
++  // __lasx_xvmaxi_bu
++  // xd, xj, ui5
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(
++
++  // __lasx_xvmaxi_hu
++  // xd, xj, ui5
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(
++
++  // __lasx_xvmaxi_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(
++
++  // __lasx_xvmaxi_du
++  // xd, xj, ui5
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(
++
++  // __lasx_xvmin_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b(
++
++  // __lasx_xvmin_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h(
++
++  // __lasx_xvmin_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w(
++
++  // __lasx_xvmin_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d(
++
++  // __lasx_xvmini_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b(
++
++  // __lasx_xvmini_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h(
++
++  // __lasx_xvmini_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w(
++
++  // __lasx_xvmini_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d(
++
++  // __lasx_xvmin_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(
++
++  // __lasx_xvmin_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(
++
++  // __lasx_xvmin_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(
++
++  // __lasx_xvmin_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du(
++
++  // __lasx_xvmini_bu
++  // xd, xj, ui5
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(
++
++  // __lasx_xvmini_hu
++  // xd, xj, ui5
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(
++
++  // __lasx_xvmini_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(
++
++  // __lasx_xvmini_du
++  // xd, xj, ui5
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du(
++
++  // __lasx_xvseq_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b(
++
++  // __lasx_xvseq_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h(
++
++  // __lasx_xvseq_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w(
++
++  // __lasx_xvseq_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d(
++
++  // __lasx_xvseqi_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(
++
++  // __lasx_xvseqi_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(
++
++  // __lasx_xvseqi_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(
++
++  // __lasx_xvseqi_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(
++
++  // __lasx_xvslt_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b(
++
++  // __lasx_xvslt_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h(
++
++  // __lasx_xvslt_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w(
++
++  // __lasx_xvslt_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d(
++
++  // __lasx_xvslti_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b(
++
++  // __lasx_xvslti_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h(
++
++  // __lasx_xvslti_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w(
++
++  // __lasx_xvslti_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d(
++
++  // __lasx_xvslt_bu
++  // xd, xj, xk
++  // V32QI, UV32QI, UV32QI
++  v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(
++
++  // __lasx_xvslt_hu
++  // xd, xj, xk
++  // V16HI, UV16HI, UV16HI
++  v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(
++
++  // __lasx_xvslt_wu
++  // xd, xj, xk
++  // V8SI, UV8SI, UV8SI
++  v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(
++
++  // __lasx_xvslt_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du(
++
++  // __lasx_xvslti_bu
++  // xd, xj, ui5
++  // V32QI, UV32QI, UQI
++  v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(
++
++  // __lasx_xvslti_hu
++  // xd, xj, ui5
++  // V16HI, UV16HI, UQI
++  v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(
++
++  // __lasx_xvslti_wu
++  // xd, xj, ui5
++  // V8SI, UV8SI, UQI
++  v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(
++
++  // __lasx_xvslti_du
++  // xd, xj, ui5
++  // V4DI, UV4DI, UQI
++  v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du(
++
++  // __lasx_xvsle_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b(
++
++  // __lasx_xvsle_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h(
++
++  // __lasx_xvsle_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w(
++
++  // __lasx_xvsle_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d(
++
++  // __lasx_xvslei_b
++  // xd, xj, si5
++  // V32QI, V32QI, QI
++  v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b(
++
++  // __lasx_xvslei_h
++  // xd, xj, si5
++  // V16HI, V16HI, QI
++  v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h(
++
++  // __lasx_xvslei_w
++  // xd, xj, si5
++  // V8SI, V8SI, QI
++  v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w(
++
++  // __lasx_xvslei_d
++  // xd, xj, si5
++  // V4DI, V4DI, QI
++  v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d(
++
++  // __lasx_xvsle_bu
++  // xd, xj, xk
++  // V32QI, UV32QI, UV32QI
++  v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(
++
++  // __lasx_xvsle_hu
++  // xd, xj, xk
++  // V16HI, UV16HI, UV16HI
++  v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(
++
++  // __lasx_xvsle_wu
++  // xd, xj, xk
++  // V8SI, UV8SI, UV8SI
++  v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(
++
++  // __lasx_xvsle_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du(
++
++  // __lasx_xvslei_bu
++  // xd, xj, ui5
++  // V32QI, UV32QI, UQI
++  v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(
++
++  // __lasx_xvslei_hu
++  // xd, xj, ui5
++  // V16HI, UV16HI, UQI
++  v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(
++
++  // __lasx_xvslei_wu
++  // xd, xj, ui5
++  // V8SI, UV8SI, UQI
++  v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(
++
++  // __lasx_xvslei_du
++  // xd, xj, ui5
++  // V4DI, UV4DI, UQI
++  v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du(
++
++  // __lasx_xvsat_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b(
++
++  // __lasx_xvsat_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h(
++
++  // __lasx_xvsat_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w(
++
++  // __lasx_xvsat_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d(
++
++  // __lasx_xvsat_bu
++  // xd, xj, ui3
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(
++
++  // __lasx_xvsat_hu
++  // xd, xj, ui4
++  // UV16HI, UV16HI, UQI
++  v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(
++
++  // __lasx_xvsat_wu
++  // xd, xj, ui5
++  // UV8SI, UV8SI, UQI
++  v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(
++
++  // __lasx_xvsat_du
++  // xd, xj, ui6
++  // UV4DI, UV4DI, UQI
++  v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du(
++
++  // __lasx_xvadda_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b(
++
++  // __lasx_xvadda_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h(
++
++  // __lasx_xvadda_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w(
++
++  // __lasx_xvadda_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d(
++
++  // __lasx_xvsadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(
++
++  // __lasx_xvsadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(
++
++  // __lasx_xvsadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(
++
++  // __lasx_xvsadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(
++
++  // __lasx_xvsadd_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(
++
++  // __lasx_xvsadd_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(
++
++  // __lasx_xvsadd_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(
++
++  // __lasx_xvsadd_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(
++
++  // __lasx_xvavg_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b(
++
++  // __lasx_xvavg_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h(
++
++  // __lasx_xvavg_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w(
++
++  // __lasx_xvavg_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d(
++
++  // __lasx_xvavg_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(
++
++  // __lasx_xvavg_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(
++
++  // __lasx_xvavg_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(
++
++  // __lasx_xvavg_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du(
++
++  // __lasx_xvavgr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(
++
++  // __lasx_xvavgr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(
++
++  // __lasx_xvavgr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(
++
++  // __lasx_xvavgr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(
++
++  // __lasx_xvavgr_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(
++
++  // __lasx_xvavgr_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(
++
++  // __lasx_xvavgr_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(
++
++  // __lasx_xvavgr_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(
++
++  // __lasx_xvssub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b(
++
++  // __lasx_xvssub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h(
++
++  // __lasx_xvssub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w(
++
++  // __lasx_xvssub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d(
++
++  // __lasx_xvssub_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(
++
++  // __lasx_xvssub_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(
++
++  // __lasx_xvssub_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(
++
++  // __lasx_xvssub_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du(
++
++  // __lasx_xvabsd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(
++
++  // __lasx_xvabsd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(
++
++  // __lasx_xvabsd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(
++
++  // __lasx_xvabsd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(
++
++  // __lasx_xvabsd_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(
++
++  // __lasx_xvabsd_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(
++
++  // __lasx_xvabsd_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(
++
++  // __lasx_xvabsd_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(
++
++  // __lasx_xvmul_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b(
++
++  // __lasx_xvmul_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h(
++
++  // __lasx_xvmul_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w(
++
++  // __lasx_xvmul_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d(
++
++  // __lasx_xvmadd_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(
++
++  // __lasx_xvmadd_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(
++
++  // __lasx_xvmadd_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(
++
++  // __lasx_xvmadd_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(
++
++  // __lasx_xvmsub_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(
++
++  // __lasx_xvmsub_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(
++
++  // __lasx_xvmsub_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(
++
++  // __lasx_xvmsub_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(
++
++  // __lasx_xvdiv_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(
++
++  // __lasx_xvdiv_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(
++
++  // __lasx_xvdiv_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(
++
++  // __lasx_xvdiv_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(
++
++  // __lasx_xvdiv_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(
++
++  // __lasx_xvdiv_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(
++
++  // __lasx_xvdiv_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(
++
++  // __lasx_xvdiv_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(
++
++  // __lasx_xvhaddw_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(
++
++  // __lasx_xvhaddw_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(
++
++  // __lasx_xvhaddw_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(
++
++  // __lasx_xvhaddw_hu_bu
++  // xd, xj, xk
++  // UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(
++
++  // __lasx_xvhaddw_wu_hu
++  // xd, xj, xk
++  // UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(
++
++  // __lasx_xvhaddw_du_wu
++  // xd, xj, xk
++  // UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(
++
++  // __lasx_xvhsubw_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(
++
++  // __lasx_xvhsubw_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(
++
++  // __lasx_xvhsubw_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(
++
++  // __lasx_xvhsubw_hu_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(
++
++  // __lasx_xvhsubw_wu_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(
++
++  // __lasx_xvhsubw_du_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(
++
++  // __lasx_xvmod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b(
++
++  // __lasx_xvmod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h(
++
++  // __lasx_xvmod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w(
++
++  // __lasx_xvmod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d(
++
++  // __lasx_xvmod_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(
++
++  // __lasx_xvmod_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(
++
++  // __lasx_xvmod_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(
++
++  // __lasx_xvmod_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du(
++
++  // __lasx_xvrepl128vei_b
++  // xd, xj, ui4
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(
++
++  // __lasx_xvrepl128vei_h
++  // xd, xj, ui3
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(
++
++  // __lasx_xvrepl128vei_w
++  // xd, xj, ui2
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(
++
++  // __lasx_xvrepl128vei_d
++  // xd, xj, ui1
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(
++
++  // __lasx_xvpickev_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(
++
++  // __lasx_xvpickev_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(
++
++  // __lasx_xvpickev_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(
++
++  // __lasx_xvpickev_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(
++
++  // __lasx_xvpickod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(
++
++  // __lasx_xvpickod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(
++
++  // __lasx_xvpickod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(
++
++  // __lasx_xvpickod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(
++
++  // __lasx_xvilvh_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(
++
++  // __lasx_xvilvh_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(
++
++  // __lasx_xvilvh_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(
++
++  // __lasx_xvilvh_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(
++
++  // __lasx_xvilvl_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(
++
++  // __lasx_xvilvl_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(
++
++  // __lasx_xvilvl_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(
++
++  // __lasx_xvilvl_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(
++
++  // __lasx_xvpackev_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(
++
++  // __lasx_xvpackev_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(
++
++  // __lasx_xvpackev_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(
++
++  // __lasx_xvpackev_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(
++
++  // __lasx_xvpackod_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(
++
++  // __lasx_xvpackod_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(
++
++  // __lasx_xvpackod_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(
++
++  // __lasx_xvpackod_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(
++
++  // __lasx_xvshuf_b
++  // xd, xj, xk, xa
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(
++
++  // __lasx_xvshuf_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(
++
++  // __lasx_xvshuf_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(
++
++  // __lasx_xvshuf_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(
++
++  // __lasx_xvand_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v(
++
++  // __lasx_xvandi_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b(
++
++  // __lasx_xvor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v(
++
++  // __lasx_xvori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b(
++
++  // __lasx_xvnor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v(
++
++  // __lasx_xvnori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b(
++
++  // __lasx_xvxor_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v(
++
++  // __lasx_xvxori_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b(
++
++  // __lasx_xvbitsel_v
++  // xd, xj, xk, xa
++  // UV32QI, UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(
++
++  // __lasx_xvbitseli_b
++  // xd, xj, ui8
++  // UV32QI, UV32QI, UV32QI, UQI
++  v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(
++
++  // __lasx_xvshuf4i_b
++  // xd, xj, ui8
++  // V32QI, V32QI, USI
++  v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(
++
++  // __lasx_xvshuf4i_h
++  // xd, xj, ui8
++  // V16HI, V16HI, USI
++  v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(
++
++  // __lasx_xvshuf4i_w
++  // xd, xj, ui8
++  // V8SI, V8SI, USI
++  v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(
++
++  // __lasx_xvreplgr2vr_b
++  // xd, rj
++  // V32QI, SI
++  v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(
++
++  // __lasx_xvreplgr2vr_h
++  // xd, rj
++  // V16HI, SI
++  v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(
++
++  // __lasx_xvreplgr2vr_w
++  // xd, rj
++  // V8SI, SI
++  v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(
++
++  // __lasx_xvreplgr2vr_d
++  // xd, rj
++  // V4DI, DI
++  v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(
++
++  // __lasx_xvpcnt_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(
++
++  // __lasx_xvpcnt_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(
++
++  // __lasx_xvpcnt_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(
++
++  // __lasx_xvpcnt_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(
++
++  // __lasx_xvclo_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b(
++
++  // __lasx_xvclo_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h(
++
++  // __lasx_xvclo_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w(
++
++  // __lasx_xvclo_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d(
++
++  // __lasx_xvclz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b(
++
++  // __lasx_xvclz_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h(
++
++  // __lasx_xvclz_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w(
++
++  // __lasx_xvclz_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d(
++
++  // __lasx_xvfcmp_caf_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(
++
++  // __lasx_xvfcmp_caf_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(
++
++  // __lasx_xvfcmp_cor_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(
++
++  // __lasx_xvfcmp_cor_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(
++
++  // __lasx_xvfcmp_cun_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(
++
++  // __lasx_xvfcmp_cun_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(
++
++  // __lasx_xvfcmp_cune_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(
++
++  // __lasx_xvfcmp_cune_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(
++
++  // __lasx_xvfcmp_cueq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(
++
++  // __lasx_xvfcmp_cueq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(
++
++  // __lasx_xvfcmp_ceq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(
++
++  // __lasx_xvfcmp_ceq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(
++
++  // __lasx_xvfcmp_cne_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(
++
++  // __lasx_xvfcmp_cne_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(
++
++  // __lasx_xvfcmp_clt_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(
++
++  // __lasx_xvfcmp_clt_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(
++
++  // __lasx_xvfcmp_cult_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(
++
++  // __lasx_xvfcmp_cult_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(
++
++  // __lasx_xvfcmp_cle_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(
++
++  // __lasx_xvfcmp_cle_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(
++
++  // __lasx_xvfcmp_cule_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(
++
++  // __lasx_xvfcmp_cule_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(
++
++  // __lasx_xvfcmp_saf_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(
++
++  // __lasx_xvfcmp_saf_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(
++
++  // __lasx_xvfcmp_sor_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(
++
++  // __lasx_xvfcmp_sor_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(
++
++  // __lasx_xvfcmp_sun_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(
++
++  // __lasx_xvfcmp_sun_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(
++
++  // __lasx_xvfcmp_sune_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(
++
++  // __lasx_xvfcmp_sune_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(
++
++  // __lasx_xvfcmp_sueq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(
++
++  // __lasx_xvfcmp_sueq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(
++
++  // __lasx_xvfcmp_seq_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(
++
++  // __lasx_xvfcmp_seq_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(
++
++  // __lasx_xvfcmp_sne_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(
++
++  // __lasx_xvfcmp_sne_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(
++
++  // __lasx_xvfcmp_slt_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(
++
++  // __lasx_xvfcmp_slt_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(
++
++  // __lasx_xvfcmp_sult_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(
++
++  // __lasx_xvfcmp_sult_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(
++
++  // __lasx_xvfcmp_sle_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(
++
++  // __lasx_xvfcmp_sle_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(
++
++  // __lasx_xvfcmp_sule_s
++  // xd, xj, xk
++  // V8SI, V8SF, V8SF
++  v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(
++
++  // __lasx_xvfcmp_sule_d
++  // xd, xj, xk
++  // V4DI, V4DF, V4DF
++  v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(
++
++  // __lasx_xvfadd_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s(
++
++  // __lasx_xvfadd_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d(
++
++  // __lasx_xvfsub_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s(
++
++  // __lasx_xvfsub_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d(
++
++  // __lasx_xvfmul_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s(
++
++  // __lasx_xvfmul_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d(
++
++  // __lasx_xvfdiv_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(
++
++  // __lasx_xvfdiv_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(
++
++  // __lasx_xvfcvt_h_s
++  // xd, xj, xk
++  // V16HI, V8SF, V8SF
++  v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(
++
++  // __lasx_xvfcvt_s_d
++  // xd, xj, xk
++  // V8SF, V4DF, V4DF
++  v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(
++
++  // __lasx_xvfmin_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s(
++
++  // __lasx_xvfmin_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d(
++
++  // __lasx_xvfmina_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s(
++
++  // __lasx_xvfmina_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d(
++
++  // __lasx_xvfmax_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s(
++
++  // __lasx_xvfmax_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d(
++
++  // __lasx_xvfmaxa_s
++  // xd, xj, xk
++  // V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(
++
++  // __lasx_xvfmaxa_d
++  // xd, xj, xk
++  // V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(
++
++  // __lasx_xvfclass_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(
++
++  // __lasx_xvfclass_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(
++
++  // __lasx_xvfsqrt_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(
++
++  // __lasx_xvfsqrt_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(
++
++  // __lasx_xvfrecip_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(
++
++  // __lasx_xvfrecip_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(
++
++  // __lasx_xvfrint_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s(
++
++  // __lasx_xvfrint_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d(
++
++  // __lasx_xvfrsqrt_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(
++
++  // __lasx_xvfrsqrt_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(
++
++  // __lasx_xvflogb_s
++  // xd, xj
++  // V8SF, V8SF
++  v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s(
++
++  // __lasx_xvflogb_d
++  // xd, xj
++  // V4DF, V4DF
++  v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d(
++
++  // __lasx_xvfcvth_s_h
++  // xd, xj
++  // V8SF, V16HI
++  v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(
++
++  // __lasx_xvfcvth_d_s
++  // xd, xj
++  // V4DF, V8SF
++  v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(
++
++  // __lasx_xvfcvtl_s_h
++  // xd, xj
++  // V8SF, V16HI
++  v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(
++
++  // __lasx_xvfcvtl_d_s
++  // xd, xj
++  // V4DF, V8SF
++  v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(
++
++  // __lasx_xvftint_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(
++
++  // __lasx_xvftint_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(
++
++  // __lasx_xvftint_wu_s
++  // xd, xj
++  // UV8SI, V8SF
++  v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(
++
++  // __lasx_xvftint_lu_d
++  // xd, xj
++  // UV4DI, V4DF
++  v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(
++
++  // __lasx_xvftintrz_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(
++
++  // __lasx_xvftintrz_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(
++
++  // __lasx_xvftintrz_wu_s
++  // xd, xj
++  // UV8SI, V8SF
++  v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(
++
++  // __lasx_xvftintrz_lu_d
++  // xd, xj
++  // UV4DI, V4DF
++  v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(
++
++  // __lasx_xvffint_s_w
++  // xd, xj
++  // V8SF, V8SI
++  v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(
++
++  // __lasx_xvffint_d_l
++  // xd, xj
++  // V4DF, V4DI
++  v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(
++
++  // __lasx_xvffint_s_wu
++  // xd, xj
++  // V8SF, UV8SI
++  v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(
++
++  // __lasx_xvffint_d_lu
++  // xd, xj
++  // V4DF, UV4DI
++  v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(
++
++  // __lasx_xvreplve_b
++  // xd, xj, rk
++  // V32QI, V32QI, SI
++  v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(
++
++  // __lasx_xvreplve_h
++  // xd, xj, rk
++  // V16HI, V16HI, SI
++  v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(
++
++  // __lasx_xvreplve_w
++  // xd, xj, rk
++  // V8SI, V8SI, SI
++  v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(
++
++  // __lasx_xvreplve_d
++  // xd, xj, rk
++  // V4DI, V4DI, SI
++  v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(
++
++  // __lasx_xvpermi_w
++  // xd, xj, ui8
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(
++
++  // __lasx_xvandn_v
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v(
++
++  // __lasx_xvneg_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b(
++
++  // __lasx_xvneg_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h(
++
++  // __lasx_xvneg_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w(
++
++  // __lasx_xvneg_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d(
++
++  // __lasx_xvmuh_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(
++
++  // __lasx_xvmuh_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(
++
++  // __lasx_xvmuh_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(
++
++  // __lasx_xvmuh_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(
++
++  // __lasx_xvmuh_bu
++  // xd, xj, xk
++  // UV32QI, UV32QI, UV32QI
++  v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(
++
++  // __lasx_xvmuh_hu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV16HI
++  v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(
++
++  // __lasx_xvmuh_wu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV8SI
++  v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(
++
++  // __lasx_xvmuh_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(
++
++  // __lasx_xvsllwil_h_b
++  // xd, xj, ui3
++  // V16HI, V32QI, UQI
++  v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(
++
++  // __lasx_xvsllwil_w_h
++  // xd, xj, ui4
++  // V8SI, V16HI, UQI
++  v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(
++
++  // __lasx_xvsllwil_d_w
++  // xd, xj, ui5
++  // V4DI, V8SI, UQI
++  v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(
++
++  // __lasx_xvsllwil_hu_bu
++  // xd, xj, ui3
++  // UV16HI, UV32QI, UQI
++  v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(
++
++  // __lasx_xvsllwil_wu_hu
++  // xd, xj, ui4
++  // UV8SI, UV16HI, UQI
++  v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(
++
++  // __lasx_xvsllwil_du_wu
++  // xd, xj, ui5
++  // UV4DI, UV8SI, UQI
++  v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(
++
++  // __lasx_xvsran_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(
++
++  // __lasx_xvsran_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(
++
++  // __lasx_xvsran_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(
++
++  // __lasx_xvssran_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(
++
++  // __lasx_xvssran_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(
++
++  // __lasx_xvssran_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(
++
++  // __lasx_xvssran_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(
++
++  // __lasx_xvssran_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(
++
++  // __lasx_xvssran_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(
++
++  // __lasx_xvsrarn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(
++
++  // __lasx_xvsrarn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(
++
++  // __lasx_xvsrarn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(
++
++  // __lasx_xvssrarn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(
++
++  // __lasx_xvssrarn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(
++
++  // __lasx_xvssrarn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(
++
++  // __lasx_xvssrarn_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(
++
++  // __lasx_xvssrarn_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(
++
++  // __lasx_xvssrarn_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(
++
++  // __lasx_xvsrln_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(
++
++  // __lasx_xvsrln_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(
++
++  // __lasx_xvsrln_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(
++
++  // __lasx_xvssrln_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(
++
++  // __lasx_xvssrln_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(
++
++  // __lasx_xvssrln_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(
++
++  // __lasx_xvsrlrn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(
++
++  // __lasx_xvsrlrn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(
++
++  // __lasx_xvsrlrn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(
++
++  // __lasx_xvssrlrn_bu_h
++  // xd, xj, xk
++  // UV32QI, UV16HI, UV16HI
++  v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(
++
++  // __lasx_xvssrlrn_hu_w
++  // xd, xj, xk
++  // UV16HI, UV8SI, UV8SI
++  v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(
++
++  // __lasx_xvssrlrn_wu_d
++  // xd, xj, xk
++  // UV8SI, UV4DI, UV4DI
++  v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(
++
++  // __lasx_xvfrstpi_b
++  // xd, xj, ui5
++  // V32QI, V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(
++
++  // __lasx_xvfrstpi_h
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(
++
++  // __lasx_xvfrstp_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(
++
++  // __lasx_xvfrstp_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(
++
++  // __lasx_xvshuf4i_d
++  // xd, xj, ui8
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(
++
++  // __lasx_xvbsrl_v
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(
++
++  // __lasx_xvbsll_v
++  // xd, xj, ui5
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(
++
++  // __lasx_xvextrins_b
++  // xd, xj, ui8
++  // V32QI, V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(
++
++  // __lasx_xvextrins_h
++  // xd, xj, ui8
++  // V16HI, V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(
++
++  // __lasx_xvextrins_w
++  // xd, xj, ui8
++  // V8SI, V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(
++
++  // __lasx_xvextrins_d
++  // xd, xj, ui8
++  // V4DI, V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(
++
++  // __lasx_xvmskltz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(
++
++  // __lasx_xvmskltz_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(
++
++  // __lasx_xvmskltz_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(
++
++  // __lasx_xvmskltz_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(
++
++  // __lasx_xvsigncov_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(
++
++  // __lasx_xvsigncov_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(
++
++  // __lasx_xvsigncov_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(
++
++  // __lasx_xvsigncov_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(
++
++  // __lasx_xvfmadd_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(
++
++  // __lasx_xvfmadd_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(
++
++  // __lasx_xvfmsub_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(
++
++  // __lasx_xvfmsub_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(
++
++  // __lasx_xvfnmadd_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(
++
++  // __lasx_xvfnmadd_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(
++
++  // __lasx_xvfnmsub_s
++  // xd, xj, xk, xa
++  // V8SF, V8SF, V8SF, V8SF
++  v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(
++
++  // __lasx_xvfnmsub_d
++  // xd, xj, xk, xa
++  // V4DF, V4DF, V4DF, V4DF
++  v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(
++
++  // __lasx_xvftintrne_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(
++
++  // __lasx_xvftintrne_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(
++
++  // __lasx_xvftintrp_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(
++
++  // __lasx_xvftintrp_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(
++
++  // __lasx_xvftintrm_w_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(
++
++  // __lasx_xvftintrm_l_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(
++
++  // __lasx_xvftint_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(
++
++  // __lasx_xvffint_s_l
++  // xd, xj, xk
++  // V8SF, V4DI, V4DI
++  v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(
++
++  // __lasx_xvftintrz_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(
++
++  // __lasx_xvftintrp_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(
++
++  // __lasx_xvftintrm_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(
++
++  // __lasx_xvftintrne_w_d
++  // xd, xj, xk
++  // V8SI, V4DF, V4DF
++  v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(
++
++  // __lasx_xvftinth_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(
++
++  // __lasx_xvftintl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(
++
++  // __lasx_xvffinth_d_w
++  // xd, xj
++  // V4DF, V8SI
++  v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(
++
++  // __lasx_xvffintl_d_w
++  // xd, xj
++  // V4DF, V8SI
++  v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(
++
++  // __lasx_xvftintrzh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(
++
++  // __lasx_xvftintrzl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(
++
++  // __lasx_xvftintrph_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(
++
++  // __lasx_xvftintrpl_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(
++
++  // __lasx_xvftintrmh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(
++
++  // __lasx_xvftintrml_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(
++
++  // __lasx_xvftintrneh_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(
++
++  // __lasx_xvftintrnel_l_s
++  // xd, xj
++  // V4DI, V8SF
++  v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(
++
++  // __lasx_xvfrintrne_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrne.s(
++
++  // __lasx_xvfrintrne_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrne.d(
++
++  // __lasx_xvfrintrz_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrz.s(
++
++  // __lasx_xvfrintrz_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrz.d(
++
++  // __lasx_xvfrintrp_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrp.s(
++
++  // __lasx_xvfrintrp_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrp.d(
++
++  // __lasx_xvfrintrm_s
++  // xd, xj
++  // V8SI, V8SF
++  v8i32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrm.s(
++
++  // __lasx_xvfrintrm_d
++  // xd, xj
++  // V4DI, V4DF
++  v4i64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrm.d(
++
++  // __lasx_xvld
++  // xd, rj, si12
++  // V32QI, CVPOINTER, SI
++  v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld(
++
++  // __lasx_xvst
++  // xd, rj, si12
++  // VOID, V32QI, CVPOINTER, SI
++  __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst(
++
++  // __lasx_xvstelm_b
++  // xd, rj, si8, idx
++  // VOID, V32QI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b(
++
++  // __lasx_xvstelm_h
++  // xd, rj, si8, idx
++  // VOID, V16HI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h(
++
++  // __lasx_xvstelm_w
++  // xd, rj, si8, idx
++  // VOID, V8SI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w(
++
++  // __lasx_xvstelm_d
++  // xd, rj, si8, idx
++  // VOID, V4DI, CVPOINTER, SI, UQI
++  __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d(
++
++  // __lasx_xvinsve0_w
++  // xd, xj, ui3
++  // V8SI, V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(
++
++  // __lasx_xvinsve0_d
++  // xd, xj, ui2
++  // V4DI, V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(
++
++  // __lasx_xvpickve_w
++  // xd, xj, ui3
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(
++
++  // __lasx_xvpickve_d
++  // xd, xj, ui2
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(
++
++  // __lasx_xvssrlrn_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(
++
++  // __lasx_xvssrlrn_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(
++
++  // __lasx_xvssrlrn_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(
++
++  // __lasx_xvssrln_b_h
++  // xd, xj, xk
++  // V32QI, V16HI, V16HI
++  v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(
++
++  // __lasx_xvssrln_h_w
++  // xd, xj, xk
++  // V16HI, V8SI, V8SI
++  v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(
++
++  // __lasx_xvssrln_w_d
++  // xd, xj, xk
++  // V8SI, V4DI, V4DI
++  v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(
++
++  // __lasx_xvorn_v
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v(
++
++  // __lasx_xvldi
++  // xd, i13
++  // V4DI, HI
++  v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi(
++
++  // __lasx_xvldx
++  // xd, rj, rk
++  // V32QI, CVPOINTER, DI
++  v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx(
++
++  // __lasx_xvstx
++  // xd, rj, rk
++  // VOID, V32QI, CVPOINTER, DI
++  __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx(
++
++  // __lasx_xvinsgr2vr_w
++  // xd, rj, ui3
++  // V8SI, V8SI, SI, UQI
++  v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(
++
++  // __lasx_xvinsgr2vr_d
++  // xd, rj, ui2
++  // V4DI, V4DI, DI, UQI
++  v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(
++
++  // __lasx_xvreplve0_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(
++
++  // __lasx_xvreplve0_h
++  // xd, xj
++  // V16HI, V16HI
++  v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(
++
++  // __lasx_xvreplve0_w
++  // xd, xj
++  // V8SI, V8SI
++  v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(
++
++  // __lasx_xvreplve0_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(
++
++  // __lasx_xvreplve0_q
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(
++
++  // __lasx_vext2xv_h_b
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(
++
++  // __lasx_vext2xv_w_h
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(
++
++  // __lasx_vext2xv_d_w
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(
++
++  // __lasx_vext2xv_w_b
++  // xd, xj
++  // V8SI, V32QI
++  v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(
++
++  //gcc build fail
++  // __lasx_vext2xv_d_h
++  // xd, xj
++  // V4DI, V16HI
++  v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(
++
++  // __lasx_vext2xv_d_b
++  // xd, xj
++  // V4DI, V32QI
++  v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(
++
++  // __lasx_vext2xv_hu_bu
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(
++
++  // __lasx_vext2xv_wu_hu
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(
++
++  // __lasx_vext2xv_du_wu
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(
++
++  // __lasx_vext2xv_wu_bu
++  // xd, xj
++  // V8SI, V32QI
++  v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(
++
++  //gcc build fail
++  // __lasx_vext2xv_du_hu
++  // xd, xj
++  // V4DI, V16HI
++  v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(
++
++  // __lasx_vext2xv_du_bu
++  // xd, xj
++  // V4DI, V32QI
++  v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(
++
++  // __lasx_xvpermi_q
++  // xd, xj, ui8
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(
++
++  // __lasx_xvpermi_d
++  // xd, xj, ui8
++  // V4DI, V4DI, USI
++  v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(
++
++  // __lasx_xvperm_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w(
++
++  // __lasx_xvldrepl_b
++  // xd, rj, si12
++  // V32QI, CVPOINTER, SI
++  v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(
++
++  // __lasx_xvldrepl_h
++  // xd, rj, si11
++  // V16HI, CVPOINTER, SI
++  v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(
++
++  // __lasx_xvldrepl_w
++  // xd, rj, si10
++  // V8SI, CVPOINTER, SI
++  v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(
++
++  // __lasx_xvldrepl_d
++  // xd, rj, si9
++  // V4DI, CVPOINTER, SI
++  v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(
++
++  // __lasx_xvpickve2gr_w
++  // rd, xj, ui3
++  // SI, V8SI, UQI
++  i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w(
++
++  // __lasx_xvpickve2gr_wu
++  // rd, xj, ui3
++  // USI, V8SI, UQI
++  u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(
++
++  // __lasx_xvpickve2gr_d
++  // rd, xj, ui2
++  // DI, V4DI, UQI
++  i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d(
++
++  // __lasx_xvpickve2gr_du
++  // rd, xj, ui2
++  // UDI, V4DI, UQI
++  u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du(
++
++  // __lasx_xvaddwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(
++
++  // __lasx_xvaddwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(
++
++  // __lasx_xvaddwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(
++
++  // __lasx_xvaddwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(
++
++  // __lasx_xvaddwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(
++
++  // __lasx_xvaddwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(
++
++  // __lasx_xvaddwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(
++
++  // __lasx_xvaddwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(
++
++  // __lasx_xvsubwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(
++
++  // __lasx_xvsubwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(
++
++  // __lasx_xvsubwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(
++
++  // __lasx_xvsubwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(
++
++  // __lasx_xvsubwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(
++
++  // __lasx_xvsubwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(
++
++  // __lasx_xvsubwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(
++
++  // __lasx_xvsubwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(
++
++  // __lasx_xvmulwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(
++
++  // __lasx_xvmulwev_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(
++
++  // __lasx_xvmulwev_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(
++
++  // __lasx_xvmulwev_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(
++
++  // __lasx_xvmulwev_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(
++
++  // __lasx_xvmulwev_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(
++
++  // __lasx_xvmulwev_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(
++
++  // __lasx_xvmulwev_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(
++
++  // __lasx_xvaddwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(
++
++  // __lasx_xvaddwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(
++
++  // __lasx_xvaddwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(
++
++  // __lasx_xvaddwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(
++
++  // __lasx_xvaddwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(
++
++  // __lasx_xvaddwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(
++
++  // __lasx_xvaddwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(
++
++  // __lasx_xvaddwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(
++
++  // __lasx_xvsubwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(
++
++  // __lasx_xvsubwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(
++
++  // __lasx_xvsubwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(
++
++  // __lasx_xvsubwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(
++
++  // __lasx_xvsubwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(
++
++  // __lasx_xvsubwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(
++
++  // __lasx_xvsubwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(
++
++  // __lasx_xvsubwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(
++
++  // __lasx_xvmulwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(
++
++  // __lasx_xvmulwod_d_w
++  // xd, xj, xk
++  // V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(
++
++  // __lasx_xvmulwod_w_h
++  // xd, xj, xk
++  // V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(
++
++  // __lasx_xvmulwod_h_b
++  // xd, xj, xk
++  // V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(
++
++  // __lasx_xvmulwod_q_du
++  // xd, xj, xk
++  // V4DI, UV4DI, UV4DI
++  v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(
++
++  // __lasx_xvmulwod_d_wu
++  // xd, xj, xk
++  // V4DI, UV8SI, UV8SI
++  v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(
++
++  // __lasx_xvmulwod_w_hu
++  // xd, xj, xk
++  // V8SI, UV16HI, UV16HI
++  v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(
++
++  // __lasx_xvmulwod_h_bu
++  // xd, xj, xk
++  // V16HI, UV32QI, UV32QI
++  v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(
++
++  // __lasx_xvaddwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(
++
++  // __lasx_xvaddwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(
++
++  // __lasx_xvaddwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(
++
++  // __lasx_xvmulwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(
++
++  // __lasx_xvmulwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(
++
++  // __lasx_xvmulwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(
++
++  // __lasx_xvaddwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(
++
++  // __lasx_xvaddwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(
++
++  // __lasx_xvaddwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(
++
++  // __lasx_xvmulwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(
++
++  // __lasx_xvmulwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(
++
++  // __lasx_xvmulwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(
++
++  // __lasx_xvhaddw_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(
++
++  // __lasx_xvhaddw_qu_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(
++
++  // __lasx_xvhsubw_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(
++
++  // __lasx_xvhsubw_qu_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(
++
++  // __lasx_xvmaddwev_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(
++
++  // __lasx_xvmaddwev_d_w
++  // xd, xj, xk
++  // V4DI, V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(
++
++  // __lasx_xvmaddwev_w_h
++  // xd, xj, xk
++  // V8SI, V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(
++
++  // __lasx_xvmaddwev_h_b
++  // xd, xj, xk
++  // V16HI, V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(
++
++  // __lasx_xvmaddwev_q_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(
++
++  // __lasx_xvmaddwev_d_wu
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(
++
++  // __lasx_xvmaddwev_w_hu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(
++
++  // __lasx_xvmaddwev_h_bu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(
++
++  // __lasx_xvmaddwod_q_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(
++
++  // __lasx_xvmaddwod_d_w
++  // xd, xj, xk
++  // V4DI, V4DI, V8SI, V8SI
++  v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(
++
++  // __lasx_xvmaddwod_w_h
++  // xd, xj, xk
++  // V8SI, V8SI, V16HI, V16HI
++  v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(
++
++  // __lasx_xvmaddwod_h_b
++  // xd, xj, xk
++  // V16HI, V16HI, V32QI, V32QI
++  v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(
++
++  // __lasx_xvmaddwod_q_du
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV4DI, UV4DI
++  v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(
++
++  // __lasx_xvmaddwod_d_wu
++  // xd, xj, xk
++  // UV4DI, UV4DI, UV8SI, UV8SI
++  v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(
++
++  // __lasx_xvmaddwod_w_hu
++  // xd, xj, xk
++  // UV8SI, UV8SI, UV16HI, UV16HI
++  v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(
++
++  // __lasx_xvmaddwod_h_bu
++  // xd, xj, xk
++  // UV16HI, UV16HI, UV32QI, UV32QI
++  v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(
++
++  // __lasx_xvmaddwev_q_du_d
++  // xd, xj, xk
++  // V4DI, V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(
++
++  // __lasx_xvmaddwev_d_wu_w
++  // xd, xj, xk
++  // V4DI, V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(
++
++  // __lasx_xvmaddwev_w_hu_h
++  // xd, xj, xk
++  // V8SI, V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(
++
++  // __lasx_xvmaddwev_h_bu_b
++  // xd, xj, xk
++  // V16HI, V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(
++
++  // __lasx_xvmaddwod_q_du_d
++  // xd, xj, xk
++  // V4DI, V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(
++
++  // __lasx_xvmaddwod_d_wu_w
++  // xd, xj, xk
++  // V4DI, V4DI, UV8SI, V8SI
++  v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(
++
++  // __lasx_xvmaddwod_w_hu_h
++  // xd, xj, xk
++  // V8SI, V8SI, UV16HI, V16HI
++  v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(
++
++  // __lasx_xvmaddwod_h_bu_b
++  // xd, xj, xk
++  // V16HI, V16HI, UV32QI, V32QI
++  v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(
++
++  // __lasx_xvrotr_b
++  // xd, xj, xk
++  // V32QI, V32QI, V32QI
++  v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(
++
++  // __lasx_xvrotr_h
++  // xd, xj, xk
++  // V16HI, V16HI, V16HI
++  v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(
++
++  // __lasx_xvrotr_w
++  // xd, xj, xk
++  // V8SI, V8SI, V8SI
++  v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(
++
++  // __lasx_xvrotr_d
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(
++
++  // __lasx_xvadd_q
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q(
++
++  // __lasx_xvsub_q
++  // xd, xj, xk
++  // V4DI, V4DI, V4DI
++  v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q(
++
++  // __lasx_xvaddwev_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(
++
++  // __lasx_xvaddwod_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(
++
++  // __lasx_xvmulwev_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(
++
++  // __lasx_xvmulwod_q_du_d
++  // xd, xj, xk
++  // V4DI, UV4DI, V4DI
++  v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(
++
++  // __lasx_xvmskgez_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(
++
++  // __lasx_xvmsknz_b
++  // xd, xj
++  // V32QI, V32QI
++  v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(
++
++  // __lasx_xvexth_h_b
++  // xd, xj
++  // V16HI, V32QI
++  v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(
++
++  // __lasx_xvexth_w_h
++  // xd, xj
++  // V8SI, V16HI
++  v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(
++
++  // __lasx_xvexth_d_w
++  // xd, xj
++  // V4DI, V8SI
++  v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(
++
++  // __lasx_xvexth_q_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(
++
++  // __lasx_xvexth_hu_bu
++  // xd, xj
++  // UV16HI, UV32QI
++  v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(
++
++  // __lasx_xvexth_wu_hu
++  // xd, xj
++  // UV8SI, UV16HI
++  v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(
++
++  // __lasx_xvexth_du_wu
++  // xd, xj
++  // UV4DI, UV8SI
++  v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(
++
++  // __lasx_xvexth_qu_du
++  // xd, xj
++  // UV4DI, UV4DI
++  v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(
++
++  // __lasx_xvextl_q_d
++  // xd, xj
++  // V4DI, V4DI
++  v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(
++
++  // __lasx_xvextl_qu_du
++  // xd, xj
++  // UV4DI, UV4DI
++  v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(
++
++  // __lasx_xvrotri_b
++  // xd, xj, ui3
++  // V32QI, V32QI, UQI
++  v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(
++
++  // __lasx_xvrotri_h
++  // xd, xj, ui4
++  // V16HI, V16HI, UQI
++  v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(
++
++  // __lasx_xvrotri_w
++  // xd, xj, ui5
++  // V8SI, V8SI, UQI
++  v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(
++
++  // __lasx_xvrotri_d
++  // xd, xj, ui6
++  // V4DI, V4DI, UQI
++  v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(
++
++  // __lasx_xvsrlni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(
++
++  // __lasx_xvsrlni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(
++
++  // __lasx_xvsrlni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(
++
++  // __lasx_xvsrlni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(
++
++  // __lasx_xvsrlrni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(
++
++  // __lasx_xvsrlrni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(
++
++  // __lasx_xvsrlrni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(
++
++  // __lasx_xvsrlrni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(
++
++  // __lasx_xvssrlni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(
++
++  // __lasx_xvssrlni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(
++
++  // __lasx_xvssrlni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(
++
++  // __lasx_xvssrlni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(
++
++  // __lasx_xvssrlni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(
++
++  // __lasx_xvssrlni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(
++
++  // __lasx_xvssrlni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(
++
++  // __lasx_xvssrlni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(
++
++  // __lasx_xvssrlrni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(
++
++  // __lasx_xvssrlrni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(
++
++  // __lasx_xvssrlrni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(
++
++  // __lasx_xvssrlrni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(
++
++  // __lasx_xvssrlrni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(
++
++  // __lasx_xvssrlrni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(
++
++  // __lasx_xvssrlrni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(
++
++  // __lasx_xvssrlrni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(
++
++  // __lasx_xvsrani_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(
++
++  // __lasx_xvsrani_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(
++
++  // __lasx_xvsrani_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(
++
++  // __lasx_xvsrani_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(
++
++  // __lasx_xvsrarni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(
++
++  // __lasx_xvsrarni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(
++
++  // __lasx_xvsrarni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(
++
++  // __lasx_xvsrarni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(
++
++  // __lasx_xvssrani_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(
++
++  // __lasx_xvssrani_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(
++
++  // __lasx_xvssrani_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(
++
++  // __lasx_xvssrani_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(
++
++  // __lasx_xvssrani_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(
++
++  // __lasx_xvssrani_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(
++
++  // __lasx_xvssrani_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(
++
++  // __lasx_xvssrani_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(
++
++  // __lasx_xvssrarni_b_h
++  // xd, xj, ui4
++  // V32QI, V32QI, V32QI, USI
++  v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(
++
++  // __lasx_xvssrarni_h_w
++  // xd, xj, ui5
++  // V16HI, V16HI, V16HI, USI
++  v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(
++
++  // __lasx_xvssrarni_w_d
++  // xd, xj, ui6
++  // V8SI, V8SI, V8SI, USI
++  v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(
++
++  // __lasx_xvssrarni_d_q
++  // xd, xj, ui7
++  // V4DI, V4DI, V4DI, USI
++  v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(
++
++  // __lasx_xvssrarni_bu_h
++  // xd, xj, ui4
++  // UV32QI, UV32QI, V32QI, USI
++  v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(
++
++  // __lasx_xvssrarni_hu_w
++  // xd, xj, ui5
++  // UV16HI, UV16HI, V16HI, USI
++  v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(
++
++  // __lasx_xvssrarni_wu_d
++  // xd, xj, ui6
++  // UV8SI, UV8SI, V8SI, USI
++  v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(
++
++  // __lasx_xvssrarni_du_q
++  // xd, xj, ui7
++  // UV4DI, UV4DI, V4DI, USI
++  v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(
++
++  // __lasx_xbnz_v
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v(
++
++  // __lasx_xbz_v
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v(
++
++  // __lasx_xbnz_b
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b(
++
++  // __lasx_xbnz_h
++  // rd, xj
++  // SI, UV16HI
++  i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h(
++
++  // __lasx_xbnz_w
++  // rd, xj
++  // SI, UV8SI
++  i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w(
++
++  // __lasx_xbnz_d
++  // rd, xj
++  // SI, UV4DI
++  i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d(
++
++  // __lasx_xbz_b
++  // rd, xj
++  // SI, UV32QI
++  i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b(
++
++  // __lasx_xbz_h
++  // rd, xj
++  // SI, UV16HI
++  i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h(
++
++  // __lasx_xbz_w
++  // rd, xj
++  // SI, UV8SI
++  i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w(
++
++  // __lasx_xbz_d
++  // rd, xj
++  // SI, UV4DI
++  i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d(
++}
+diff --git a/test/CodeGen/builtins-loongarch-lsx-error.c b/test/CodeGen/builtins-loongarch-lsx-error.c
+new file mode 100644
+index 00000000..f566a736
+--- /dev/null
++++ b/test/CodeGen/builtins-loongarch-lsx-error.c
+@@ -0,0 +1,250 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \
++// RUN:            -target-feature +lsx \
++// RUN:            -verify -o - 2>&1
++
++#include <lsxintrin.h>
++
++void test() {
++  v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i8 v16i8_r;
++  v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i16 v8i16_r;
++  v4i32 v4i32_a = (v4i32){0, 1, 2, 3};
++  v4i32 v4i32_b = (v4i32){1, 2, 3, 4};
++  v4i32 v4i32_c = (v4i32){2, 3, 4, 5};
++  v4i32 v4i32_r;
++  v2i64 v2i64_a = (v2i64){0, 1};
++  v2i64 v2i64_b = (v2i64){1, 2};
++  v2i64 v2i64_c = (v2i64){2, 3};
++  v2i64 v2i64_r;
++
++  v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u8 v16u8_r;
++  v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u16 v8u16_r;
++  v4u32 v4u32_a = (v4u32){0, 1, 2, 3};
++  v4u32 v4u32_b = (v4u32){1, 2, 3, 4};
++  v4u32 v4u32_c = (v4u32){2, 3, 4, 5};
++  v4u32 v4u32_r;
++  v2u64 v2u64_a = (v2u64){0, 1};
++  v2u64 v2u64_b = (v2u64){1, 2};
++  v2u64 v2u64_c = (v2u64){2, 3};
++  v2u64 v2u64_r;
++
++  v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3};
++  v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4};
++  v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5};
++  v4f32 v4f32_r;
++  v2f64 v2f64_a = (v2f64){0.5, 1};
++  v2f64 v2f64_b = (v2f64){1.5, 2};
++  v2f64 v2f64_c = (v2f64){2.5, 3};
++  v2f64 v2f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  v16i8_r = __lsx_vslli_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vslli_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vslli_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslli_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrai_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrai_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrai_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrai_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrari_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrari_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrari_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrari_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrli_b(v16i8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrli_h(v8i16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrli_w(v4i32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrli_d(v2i64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrlri_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsrlri_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsrlri_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsrlri_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitclri_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitclri_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitclri_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitclri_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitseti_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitseti_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitseti_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitseti_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32);              // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64);              // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vaddi_bu(v16i8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vaddi_hu(v8i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vaddi_wu(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vaddi_du(v2i64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vsubi_bu(v16i8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vsubi_hu(v8i16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsubi_wu(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsubi_du(v2i64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vmaxi_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vmaxi_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vmaxi_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vmaxi_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vmaxi_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vmini_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vmini_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vmini_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vmini_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16u8_r = __lsx_vmini_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vmini_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vmini_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vmini_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vseqi_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vseqi_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vseqi_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vseqi_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslti_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vslti_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vslti_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vslti_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslti_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vslti_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vslti_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslti_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vslei_b(v16i8_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v8i16_r = __lsx_vslei_h(v8i16_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v4i32_r = __lsx_vslei_w(v4i32_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v2i64_r = __lsx_vslei_d(v2i64_a, -17);                // expected-error {{argument value -17 is outside the valid range [-16, 15]}}
++  v16i8_r = __lsx_vslei_bu(v16u8_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vslei_hu(v8u16_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vslei_wu(v4u32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vslei_du(v2u64_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vsat_b(v16i8_a, 8);                   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vsat_h(v8i16_a, 16);                  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vsat_w(v4i32_a, 32);                  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vsat_d(v2i64_a, 64);                  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16u8_r = __lsx_vsat_bu(v16u8_a, 8);                  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8u16_r = __lsx_vsat_hu(v8u16_a, 16);                 // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4u32_r = __lsx_vsat_wu(v4u32_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2u64_r = __lsx_vsat_du(v2u64_a, 64);                 // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vreplvei_b(v16i8_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vreplvei_h(v8i16_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vreplvei_w(v4i32_a, 4);               // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v2i64_r = __lsx_vreplvei_d(v2i64_a, 2);               // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16u8_r = __lsx_vandi_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vori_b(v16u8_a, 256);                 // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vnori_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vxori_b(v16u8_a, 256);                // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256);              // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  i32_r = __lsx_vpickve2gr_b(v16i8_a, 16);              // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  i32_r = __lsx_vpickve2gr_h(v8i16_a, 8);               // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  i32_r = __lsx_vpickve2gr_w(v4i32_a, 4);               // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  i64_r = __lsx_vpickve2gr_d(v2i64_a, 2);               // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  u64_r = __lsx_vpickve2gr_du(v2i64_a, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16);      // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8);       // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4);       // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2);       // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32);             // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8);            // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16);           // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32);           // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32);      // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32);      // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256);     // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vbsrl_v(v16i8_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vbsll_v(v16i8_a, 32);                 // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256);    // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16);             // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8);              // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4);              // expected-error {{argument value 4 is outside the valid range [0, 3]}}
++  __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2);              // expected-error {{argument value 2 is outside the valid range [0, 1]}}
++  v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049);           // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025);           // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}}
++  v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513);            // expected-error {{argument value -513 is outside the valid range [-512, 511]}}
++  v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257);            // expected-error {{argument value -257 is outside the valid range [-256, 255]}}
++  v16i8_r = __lsx_vrotri_b(v16i8_a, 8);                 // expected-error {{argument value 8 is outside the valid range [0, 7]}}
++  v8i16_r = __lsx_vrotri_h(v8i16_a, 16);                // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v4i32_r = __lsx_vrotri_w(v4i32_a, 32);                // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v2i64_r = __lsx_vrotri_d(v2i64_a, 64);                // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64);  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32);     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64);     // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128);    // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16);    // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32);    // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64);    // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128);   // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32);   // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64);   // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128);  // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
++  v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32);  // expected-error {{argument value 32 is outside the valid range [0, 31]}}
++  v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64);  // expected-error {{argument value 64 is outside the valid range [0, 63]}}
++  v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}}
++  v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256);      // expected-error {{argument value 256 is outside the valid range [0, 255]}}
++  v16i8_r = __lsx_vld(&v16i8_a, -2049);                 // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  __lsx_vst(v16i8_a, &v16i8_b, -2049);                  // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}}
++  v2i64_r = __lsx_vldi(-4097);                          // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}}
++}
+diff --git a/test/CodeGen/builtins-loongarch-lsx.c b/test/CodeGen/builtins-loongarch-lsx.c
+new file mode 100644
+index 00000000..0cfc2105
+--- /dev/null
++++ b/test/CodeGen/builtins-loongarch-lsx.c
+@@ -0,0 +1,3630 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \
++// RUN:            -target-feature +lsx \
++// RUN:            -target-feature +d \
++// RUN:            -o - | FileCheck %s
++
++#include <lsxintrin.h>
++
++#define ui1 0
++#define ui2 1
++#define ui3 4
++#define ui4 7
++#define ui5 25
++#define ui6 44
++#define ui7 100
++#define ui8 127 //200
++#define si5 -4
++#define si8 -100
++#define si9 0
++#define si10 0
++#define si11 0
++#define si12 0
++#define i10 500
++#define i13 4000
++#define mode 11
++#define idx1 1
++#define idx2 2
++#define idx3 4
++#define idx4 8
++
++void test(void) {
++  v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16i8 v16i8_r;
++  v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8i16 v8i16_r;
++  v4i32 v4i32_a = (v4i32){0, 1, 2, 3};
++  v4i32 v4i32_b = (v4i32){1, 2, 3, 4};
++  v4i32 v4i32_c = (v4i32){2, 3, 4, 5};
++  v4i32 v4i32_r;
++  v2i64 v2i64_a = (v2i64){0, 1};
++  v2i64 v2i64_b = (v2i64){1, 2};
++  v2i64 v2i64_c = (v2i64){2, 3};
++  v2i64 v2i64_r;
++
++  v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
++  v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
++  v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
++  v16u8 v16u8_r;
++  v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7};
++  v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8};
++  v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9};
++  v8u16 v8u16_r;
++  v4u32 v4u32_a = (v4u32){0, 1, 2, 3};
++  v4u32 v4u32_b = (v4u32){1, 2, 3, 4};
++  v4u32 v4u32_c = (v4u32){2, 3, 4, 5};
++  v4u32 v4u32_r;
++  v2u64 v2u64_a = (v2u64){0, 1};
++  v2u64 v2u64_b = (v2u64){1, 2};
++  v2u64 v2u64_c = (v2u64){2, 3};
++  v2u64 v2u64_r;
++
++  v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3};
++  v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4};
++  v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5};
++  v4f32 v4f32_r;
++  v2f64 v2f64_a = (v2f64){0.5, 1};
++  v2f64 v2f64_b = (v2f64){1.5, 2};
++  v2f64 v2f64_c = (v2f64){2.5, 3};
++  v2f64 v2f64_r;
++
++  int i32_r;
++  int i32_a = 1;
++  int i32_b = 2;
++  unsigned int u32_r;
++  unsigned int u32_a = 1;
++  unsigned int u32_b = 2;
++  long long i64_r;
++  long long i64_a = 1;
++  long long i64_b = 2;
++  long long i64_c = 3;
++  long int i64_d = 0;
++  unsigned long long u64_r;
++  unsigned long long u64_a = 1;
++  unsigned long long u64_b = 2;
++  unsigned long long u64_c = 3;
++
++  // __lsx_vsll_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b(
++
++  // __lsx_vsll_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h(
++
++  // __lsx_vsll_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w(
++
++  // __lsx_vsll_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d(
++
++  // __lsx_vslli_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b(
++
++  // __lsx_vslli_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h(
++
++  // __lsx_vslli_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w(
++
++  // __lsx_vslli_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d(
++
++  // __lsx_vsra_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b(
++
++  // __lsx_vsra_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h(
++
++  // __lsx_vsra_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w(
++
++  // __lsx_vsra_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d(
++
++  // __lsx_vsrai_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b(
++
++  // __lsx_vsrai_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h(
++
++  // __lsx_vsrai_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w(
++
++  // __lsx_vsrai_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d(
++
++  // __lsx_vsrar_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b(
++
++  // __lsx_vsrar_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h(
++
++  // __lsx_vsrar_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w(
++
++  // __lsx_vsrar_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d(
++
++  // __lsx_vsrari_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b(
++
++  // __lsx_vsrari_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h(
++
++  // __lsx_vsrari_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w(
++
++  // __lsx_vsrari_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d(
++
++  // __lsx_vsrl_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b(
++
++  // __lsx_vsrl_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h(
++
++  // __lsx_vsrl_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w(
++
++  // __lsx_vsrl_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d(
++
++  // __lsx_vsrli_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b(
++
++  // __lsx_vsrli_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h(
++
++  // __lsx_vsrli_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w(
++
++  // __lsx_vsrli_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d(
++
++  // __lsx_vsrlr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(
++
++  // __lsx_vsrlr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(
++
++  // __lsx_vsrlr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(
++
++  // __lsx_vsrlr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(
++
++  // __lsx_vsrlri_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(
++
++  // __lsx_vsrlri_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(
++
++  // __lsx_vsrlri_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(
++
++  // __lsx_vsrlri_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(
++
++  // __lsx_vbitclr_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(
++
++  // __lsx_vbitclr_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(
++
++  // __lsx_vbitclr_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(
++
++  // __lsx_vbitclr_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(
++
++  // __lsx_vbitclri_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(
++
++  // __lsx_vbitclri_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(
++
++  // __lsx_vbitclri_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(
++
++  // __lsx_vbitclri_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(
++
++  // __lsx_vbitset_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b(
++
++  // __lsx_vbitset_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h(
++
++  // __lsx_vbitset_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w(
++
++  // __lsx_vbitset_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d(
++
++  // __lsx_vbitseti_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(
++
++  // __lsx_vbitseti_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(
++
++  // __lsx_vbitseti_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(
++
++  // __lsx_vbitseti_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(
++
++  // __lsx_vbitrev_b
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(
++
++  // __lsx_vbitrev_h
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(
++
++  // __lsx_vbitrev_w
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(
++
++  // __lsx_vbitrev_d
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(
++
++  // __lsx_vbitrevi_b
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(
++
++  // __lsx_vbitrevi_h
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(
++
++  // __lsx_vbitrevi_w
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(
++
++  // __lsx_vbitrevi_d
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(
++
++  // __lsx_vadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b(
++
++  // __lsx_vadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h(
++
++  // __lsx_vadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w(
++
++  // __lsx_vadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d(
++
++  // __lsx_vaddi_bu
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(
++
++  // __lsx_vaddi_hu
++  // vd, vj, ui5
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(
++
++  // __lsx_vaddi_wu
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(
++
++  // __lsx_vaddi_du
++  // vd, vj, ui5
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du(
++
++  // __lsx_vsub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b(
++
++  // __lsx_vsub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h(
++
++  // __lsx_vsub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w(
++
++  // __lsx_vsub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d(
++
++  // __lsx_vsubi_bu
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(
++
++  // __lsx_vsubi_hu
++  // vd, vj, ui5
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(
++
++  // __lsx_vsubi_wu
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(
++
++  // __lsx_vsubi_du
++  // vd, vj, ui5
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du(
++
++  // __lsx_vmax_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b(
++
++  // __lsx_vmax_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h(
++
++  // __lsx_vmax_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w(
++
++  // __lsx_vmax_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d(
++
++  // __lsx_vmaxi_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(
++
++  // __lsx_vmaxi_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(
++
++  // __lsx_vmaxi_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(
++
++  // __lsx_vmaxi_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(
++
++  // __lsx_vmax_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu(
++
++  // __lsx_vmax_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu(
++
++  // __lsx_vmax_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu(
++
++  // __lsx_vmax_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du(
++
++  // __lsx_vmaxi_bu
++  // vd, vj, ui5
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(
++
++  // __lsx_vmaxi_hu
++  // vd, vj, ui5
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(
++
++  // __lsx_vmaxi_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(
++
++  // __lsx_vmaxi_du
++  // vd, vj, ui5
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(
++
++  // __lsx_vmin_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b(
++
++  // __lsx_vmin_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h(
++
++  // __lsx_vmin_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w(
++
++  // __lsx_vmin_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d(
++
++  // __lsx_vmini_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b(
++
++  // __lsx_vmini_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h(
++
++  // __lsx_vmini_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w(
++
++  // __lsx_vmini_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d(
++
++  // __lsx_vmin_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu(
++
++  // __lsx_vmin_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu(
++
++  // __lsx_vmin_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu(
++
++  // __lsx_vmin_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du(
++
++  // __lsx_vmini_bu
++  // vd, vj, ui5
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu(
++
++  // __lsx_vmini_hu
++  // vd, vj, ui5
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu(
++
++  // __lsx_vmini_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu(
++
++  // __lsx_vmini_du
++  // vd, vj, ui5
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du(
++
++  // __lsx_vseq_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b(
++
++  // __lsx_vseq_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h(
++
++  // __lsx_vseq_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w(
++
++  // __lsx_vseq_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d(
++
++  // __lsx_vseqi_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b(
++
++  // __lsx_vseqi_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h(
++
++  // __lsx_vseqi_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w(
++
++  // __lsx_vseqi_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d(
++
++  // __lsx_vslti_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b(
++
++  // __lsx_vslt_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b(
++
++  // __lsx_vslt_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h(
++
++  // __lsx_vslt_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w(
++
++  // __lsx_vslt_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d(
++
++  // __lsx_vslti_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h(
++
++  // __lsx_vslti_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w(
++
++  // __lsx_vslti_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d(
++
++  // __lsx_vslt_bu
++  // vd, vj, vk
++  // V16QI, UV16QI, UV16QI
++  v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu(
++
++  // __lsx_vslt_hu
++  // vd, vj, vk
++  // V8HI, UV8HI, UV8HI
++  v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu(
++
++  // __lsx_vslt_wu
++  // vd, vj, vk
++  // V4SI, UV4SI, UV4SI
++  v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu(
++
++  // __lsx_vslt_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du(
++
++  // __lsx_vslti_bu
++  // vd, vj, ui5
++  // V16QI, UV16QI, UQI
++  v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu(
++
++  // __lsx_vslti_hu
++  // vd, vj, ui5
++  // V8HI, UV8HI, UQI
++  v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu(
++
++  // __lsx_vslti_wu
++  // vd, vj, ui5
++  // V4SI, UV4SI, UQI
++  v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu(
++
++  // __lsx_vslti_du
++  // vd, vj, ui5
++  // V2DI, UV2DI, UQI
++  v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du(
++
++  // __lsx_vsle_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b(
++
++  // __lsx_vsle_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h(
++
++  // __lsx_vsle_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w(
++
++  // __lsx_vsle_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d(
++
++  // __lsx_vslei_b
++  // vd, vj, si5
++  // V16QI, V16QI, QI
++  v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b(
++
++  // __lsx_vslei_h
++  // vd, vj, si5
++  // V8HI, V8HI, QI
++  v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h(
++
++  // __lsx_vslei_w
++  // vd, vj, si5
++  // V4SI, V4SI, QI
++  v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w(
++
++  // __lsx_vslei_d
++  // vd, vj, si5
++  // V2DI, V2DI, QI
++  v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d(
++
++  // __lsx_vsle_bu
++  // vd, vj, vk
++  // V16QI, UV16QI, UV16QI
++  v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu(
++
++  // __lsx_vsle_hu
++  // vd, vj, vk
++  // V8HI, UV8HI, UV8HI
++  v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu(
++
++  // __lsx_vsle_wu
++  // vd, vj, vk
++  // V4SI, UV4SI, UV4SI
++  v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu(
++
++  // __lsx_vsle_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du(
++
++  // __lsx_vslei_bu
++  // vd, vj, ui5
++  // V16QI, UV16QI, UQI
++  v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu(
++
++  // __lsx_vslei_hu
++  // vd, vj, ui5
++  // V8HI, UV8HI, UQI
++  v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu(
++
++  // __lsx_vslei_wu
++  // vd, vj, ui5
++  // V4SI, UV4SI, UQI
++  v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu(
++
++  // __lsx_vslei_du
++  // vd, vj, ui5
++  // V2DI, UV2DI, UQI
++  v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du(
++
++  // __lsx_vsat_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b(
++
++  // __lsx_vsat_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h(
++
++  // __lsx_vsat_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w(
++
++  // __lsx_vsat_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d(
++
++  // __lsx_vsat_bu
++  // vd, vj, ui3
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu(
++
++  // __lsx_vsat_hu
++  // vd, vj, ui4
++  // UV8HI, UV8HI, UQI
++  v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu(
++
++  // __lsx_vsat_wu
++  // vd, vj, ui5
++  // UV4SI, UV4SI, UQI
++  v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu(
++
++  // __lsx_vsat_du
++  // vd, vj, ui6
++  // UV2DI, UV2DI, UQI
++  v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du(
++
++  // __lsx_vadda_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b(
++
++  // __lsx_vadda_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h(
++
++  // __lsx_vadda_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w(
++
++  // __lsx_vadda_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d(
++
++  // __lsx_vsadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b(
++
++  // __lsx_vsadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h(
++
++  // __lsx_vsadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w(
++
++  // __lsx_vsadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d(
++
++  // __lsx_vsadd_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(
++
++  // __lsx_vsadd_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(
++
++  // __lsx_vsadd_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(
++
++  // __lsx_vsadd_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du(
++
++  // __lsx_vavg_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b(
++
++  // __lsx_vavg_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h(
++
++  // __lsx_vavg_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w(
++
++  // __lsx_vavg_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d(
++
++  // __lsx_vavg_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu(
++
++  // __lsx_vavg_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu(
++
++  // __lsx_vavg_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu(
++
++  // __lsx_vavg_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du(
++
++  // __lsx_vavgr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b(
++
++  // __lsx_vavgr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h(
++
++  // __lsx_vavgr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w(
++
++  // __lsx_vavgr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d(
++
++  // __lsx_vavgr_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(
++
++  // __lsx_vavgr_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(
++
++  // __lsx_vavgr_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(
++
++  // __lsx_vavgr_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du(
++
++  // __lsx_vssub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b(
++
++  // __lsx_vssub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h(
++
++  // __lsx_vssub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w(
++
++  // __lsx_vssub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d(
++
++  // __lsx_vssub_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu(
++
++  // __lsx_vssub_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu(
++
++  // __lsx_vssub_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu(
++
++  // __lsx_vssub_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du(
++
++  // __lsx_vabsd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b(
++
++  // __lsx_vabsd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h(
++
++  // __lsx_vabsd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w(
++
++  // __lsx_vabsd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d(
++
++  // __lsx_vabsd_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(
++
++  // __lsx_vabsd_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(
++
++  // __lsx_vabsd_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(
++
++  // __lsx_vabsd_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du(
++
++  // __lsx_vmul_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b(
++
++  // __lsx_vmul_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h(
++
++  // __lsx_vmul_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w(
++
++  // __lsx_vmul_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d(
++
++  // __lsx_vmadd_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b(
++
++  // __lsx_vmadd_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h(
++
++  // __lsx_vmadd_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w(
++
++  // __lsx_vmadd_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d(
++
++  // __lsx_vmsub_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b(
++
++  // __lsx_vmsub_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h(
++
++  // __lsx_vmsub_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w(
++
++  // __lsx_vmsub_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d(
++
++  // __lsx_vdiv_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b(
++
++  // __lsx_vdiv_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h(
++
++  // __lsx_vdiv_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w(
++
++  // __lsx_vdiv_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d(
++
++  // __lsx_vdiv_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(
++
++  // __lsx_vdiv_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(
++
++  // __lsx_vdiv_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(
++
++  // __lsx_vdiv_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du(
++
++  // __lsx_vhaddw_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(
++
++  // __lsx_vhaddw_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(
++
++  // __lsx_vhaddw_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(
++
++  // __lsx_vhaddw_hu_bu
++  // vd, vj, vk
++  // UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(
++
++  // __lsx_vhaddw_wu_hu
++  // vd, vj, vk
++  // UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(
++
++  // __lsx_vhaddw_du_wu
++  // vd, vj, vk
++  // UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(
++
++  // __lsx_vhsubw_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(
++
++  // __lsx_vhsubw_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(
++
++  // __lsx_vhsubw_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(
++
++  // __lsx_vhsubw_hu_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(
++
++  // __lsx_vhsubw_wu_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(
++
++  // __lsx_vhsubw_du_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(
++
++  // __lsx_vmod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b(
++
++  // __lsx_vmod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h(
++
++  // __lsx_vmod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w(
++
++  // __lsx_vmod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d(
++
++  // __lsx_vmod_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu(
++
++  // __lsx_vmod_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu(
++
++  // __lsx_vmod_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu(
++
++  // __lsx_vmod_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du(
++
++  // __lsx_vreplve_b
++  // vd, vj, rk
++  // V16QI, V16QI, SI
++  v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b(
++
++  // __lsx_vreplve_h
++  // vd, vj, rk
++  // V8HI, V8HI, SI
++  v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h(
++
++  // __lsx_vreplve_w
++  // vd, vj, rk
++  // V4SI, V4SI, SI
++  v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w(
++
++  // __lsx_vreplve_d
++  // vd, vj, rk
++  // V2DI, V2DI, SI
++  v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d(
++
++  // __lsx_vreplvei_b
++  // vd, vj, ui4
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(
++
++  // __lsx_vreplvei_h
++  // vd, vj, ui3
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(
++
++  // __lsx_vreplvei_w
++  // vd, vj, ui2
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(
++
++  // __lsx_vreplvei_d
++  // vd, vj, ui1
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(
++
++  // __lsx_vpickev_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b(
++
++  // __lsx_vpickev_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h(
++
++  // __lsx_vpickev_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w(
++
++  // __lsx_vpickev_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d(
++
++  // __lsx_vpickod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b(
++
++  // __lsx_vpickod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h(
++
++  // __lsx_vpickod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w(
++
++  // __lsx_vpickod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d(
++
++  // __lsx_vilvh_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b(
++
++  // __lsx_vilvh_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h(
++
++  // __lsx_vilvh_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w(
++
++  // __lsx_vilvh_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d(
++
++  // __lsx_vilvl_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b(
++
++  // __lsx_vilvl_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h(
++
++  // __lsx_vilvl_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w(
++
++  // __lsx_vilvl_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d(
++
++  // __lsx_vpackev_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b(
++
++  // __lsx_vpackev_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h(
++
++  // __lsx_vpackev_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w(
++
++  // __lsx_vpackev_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d(
++
++  // __lsx_vpackod_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b(
++
++  // __lsx_vpackod_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h(
++
++  // __lsx_vpackod_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w(
++
++  // __lsx_vpackod_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d(
++
++  // __lsx_vshuf_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h(
++
++  // __lsx_vshuf_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w(
++
++  // __lsx_vshuf_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d(
++
++  // __lsx_vand_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v(
++
++  // __lsx_vandi_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b(
++
++  // __lsx_vor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v(
++
++  // __lsx_vori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b(
++
++  // __lsx_vnor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v(
++
++  // __lsx_vnori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b(
++
++  // __lsx_vxor_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v(
++
++  // __lsx_vxori_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b(
++
++  // __lsx_vbitsel_v
++  // vd, vj, vk, va
++  // UV16QI, UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(
++
++  // __lsx_vbitseli_b
++  // vd, vj, ui8
++  // UV16QI, UV16QI, UV16QI, UQI
++  v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(
++
++  // __lsx_vshuf4i_b
++  // vd, vj, ui8
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(
++
++  // __lsx_vshuf4i_h
++  // vd, vj, ui8
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(
++
++  // __lsx_vshuf4i_w
++  // vd, vj, ui8
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(
++
++  // __lsx_vreplgr2vr_b
++  // vd, rj
++  // V16QI, SI
++  v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(
++
++  // __lsx_vreplgr2vr_h
++  // vd, rj
++  // V8HI, SI
++  v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(
++
++  // __lsx_vreplgr2vr_w
++  // vd, rj
++  // V4SI, SI
++  v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(
++
++  // __lsx_vreplgr2vr_d
++  // vd, rj
++  // V2DI, DI
++  v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(
++
++  // __lsx_vpcnt_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(
++
++  // __lsx_vpcnt_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(
++
++  // __lsx_vpcnt_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(
++
++  // __lsx_vpcnt_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(
++
++  // __lsx_vclo_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b(
++
++  // __lsx_vclo_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h(
++
++  // __lsx_vclo_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w(
++
++  // __lsx_vclo_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d(
++
++  // __lsx_vclz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b(
++
++  // __lsx_vclz_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h(
++
++  // __lsx_vclz_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w(
++
++  // __lsx_vclz_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d(
++
++  // __lsx_vpickve2gr_b
++  // rd, vj, ui4
++  // SI, V16QI, UQI
++  i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b(
++
++  // __lsx_vpickve2gr_h
++  // rd, vj, ui3
++  // SI, V8HI, UQI
++  i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h(
++
++  // __lsx_vpickve2gr_w
++  // rd, vj, ui2
++  // SI, V4SI, UQI
++  i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w(
++
++  // __lsx_vpickve2gr_d
++  // rd, vj, ui1
++  // DI, V2DI, UQI
++  i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d(
++
++  // __lsx_vpickve2gr_bu
++  // rd, vj, ui4
++  // USI, V16QI, UQI
++  u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu(
++
++  // __lsx_vpickve2gr_hu
++  // rd, vj, ui3
++  // USI, V8HI, UQI
++  u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu(
++
++  // __lsx_vpickve2gr_wu
++  // rd, vj, ui2
++  // USI, V4SI, UQI
++  u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu(
++
++  // __lsx_vpickve2gr_du
++  // rd, vj, ui1
++  // UDI, V2DI, UQI
++  u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du(
++
++  // __lsx_vinsgr2vr_b
++  // vd, rj, ui4
++  // V16QI, V16QI, SI, UQI
++  v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(
++
++  // __lsx_vinsgr2vr_h
++  // vd, rj, ui3
++  // V8HI, V8HI, SI, UQI
++  v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(
++
++  // __lsx_vinsgr2vr_w
++  // vd, rj, ui2
++  // V4SI, V4SI, SI, UQI
++  v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(
++
++  // __lsx_vinsgr2vr_d
++  // vd, rj, ui1
++  // V2DI, V2DI, SI, UQI
++  v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(
++
++  // __lsx_vfcmp_caf_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(
++
++  // __lsx_vfcmp_caf_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(
++
++  // __lsx_vfcmp_cor_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(
++
++  // __lsx_vfcmp_cor_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(
++
++  // __lsx_vfcmp_cun_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(
++
++  // __lsx_vfcmp_cun_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(
++
++  // __lsx_vfcmp_cune_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(
++
++  // __lsx_vfcmp_cune_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(
++
++  // __lsx_vfcmp_cueq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(
++
++  // __lsx_vfcmp_cueq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(
++
++  // __lsx_vfcmp_ceq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(
++
++  // __lsx_vfcmp_ceq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(
++
++  // __lsx_vfcmp_cne_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(
++
++  // __lsx_vfcmp_cne_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(
++
++  // __lsx_vfcmp_clt_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(
++
++  // __lsx_vfcmp_clt_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(
++
++  // __lsx_vfcmp_cult_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(
++
++  // __lsx_vfcmp_cult_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(
++
++  // __lsx_vfcmp_cle_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(
++
++  // __lsx_vfcmp_cle_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(
++
++  // __lsx_vfcmp_cule_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(
++
++  // __lsx_vfcmp_cule_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(
++
++  // __lsx_vfcmp_saf_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(
++
++  // __lsx_vfcmp_saf_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(
++
++  // __lsx_vfcmp_sor_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(
++
++  // __lsx_vfcmp_sor_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(
++
++  // __lsx_vfcmp_sun_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(
++
++  // __lsx_vfcmp_sun_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(
++
++  // __lsx_vfcmp_sune_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(
++
++  // __lsx_vfcmp_sune_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(
++
++  // __lsx_vfcmp_sueq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(
++
++  // __lsx_vfcmp_sueq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(
++
++  // __lsx_vfcmp_seq_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(
++
++  // __lsx_vfcmp_seq_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(
++
++  // __lsx_vfcmp_sne_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(
++
++  // __lsx_vfcmp_sne_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(
++
++  // __lsx_vfcmp_slt_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(
++
++  // __lsx_vfcmp_slt_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(
++
++  // __lsx_vfcmp_sult_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(
++
++  // __lsx_vfcmp_sult_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(
++
++  // __lsx_vfcmp_sle_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(
++
++  // __lsx_vfcmp_sle_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(
++
++  // __lsx_vfcmp_sule_s
++  // vd, vj, vk
++  // V4SI, V4SF, V4SF
++  v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(
++
++  // __lsx_vfcmp_sule_d
++  // vd, vj, vk
++  // V2DI, V2DF, V2DF
++  v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(
++
++  // __lsx_vfadd_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s(
++  // __lsx_vfadd_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d(
++
++  // __lsx_vfsub_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s(
++
++  // __lsx_vfsub_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d(
++
++  // __lsx_vfmul_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s(
++
++  // __lsx_vfmul_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d(
++
++  // __lsx_vfdiv_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s(
++
++  // __lsx_vfdiv_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d(
++
++  // __lsx_vfcvt_h_s
++  // vd, vj, vk
++  // V8HI, V4SF, V4SF
++  v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(
++
++  // __lsx_vfcvt_s_d
++  // vd, vj, vk
++  // V4SF, V2DF, V2DF
++  v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(
++
++  // __lsx_vfmin_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s(
++
++  // __lsx_vfmin_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d(
++
++  // __lsx_vfmina_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s(
++
++  // __lsx_vfmina_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d(
++
++  // __lsx_vfmax_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s(
++
++  // __lsx_vfmax_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d(
++
++  // __lsx_vfmaxa_s
++  // vd, vj, vk
++  // V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(
++
++  // __lsx_vfmaxa_d
++  // vd, vj, vk
++  // V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(
++
++  // __lsx_vfclass_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s(
++
++  // __lsx_vfclass_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d(
++
++  // __lsx_vfsqrt_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(
++
++  // __lsx_vfsqrt_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(
++
++  // __lsx_vfrecip_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s(
++
++  // __lsx_vfrecip_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d(
++
++  // __lsx_vfrint_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s(
++
++  // __lsx_vfrint_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d(
++
++  // __lsx_vfrsqrt_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(
++
++  // __lsx_vfrsqrt_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(
++
++  // __lsx_vflogb_s
++  // vd, vj
++  // V4SF, V4SF
++  v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s(
++
++  // __lsx_vflogb_d
++  // vd, vj
++  // V2DF, V2DF
++  v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d(
++
++  // __lsx_vfcvth_s_h
++  // vd, vj
++  // V4SF, V8HI
++  v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(
++
++  // __lsx_vfcvth_d_s
++  // vd, vj
++  // V2DF, V4SF
++  v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(
++
++  //gcc build fail
++
++  // __lsx_vfcvtl_s_h
++  // vd, vj
++  // V4SF, V8HI
++  v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(
++
++  // __lsx_vfcvtl_d_s
++  // vd, vj
++  // V2DF, V4SF
++  v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(
++
++  // __lsx_vftint_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(
++
++  // __lsx_vftint_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(
++
++  // __lsx_vftint_wu_s
++  // vd, vj
++  // UV4SI, V4SF
++  v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(
++
++  // __lsx_vftint_lu_d
++  // vd, vj
++  // UV2DI, V2DF
++  v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(
++
++  // __lsx_vftintrz_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(
++
++  // __lsx_vftintrz_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(
++
++  // __lsx_vftintrz_wu_s
++  // vd, vj
++  // UV4SI, V4SF
++  v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(
++
++  // __lsx_vftintrz_lu_d
++  // vd, vj
++  // UV2DI, V2DF
++  v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(
++
++  // __lsx_vffint_s_w
++  // vd, vj
++  // V4SF, V4SI
++  v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w(
++
++  // __lsx_vffint_d_l
++  // vd, vj
++  // V2DF, V2DI
++  v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l(
++
++  // __lsx_vffint_s_wu
++  // vd, vj
++  // V4SF, UV4SI
++  v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(
++
++  // __lsx_vffint_d_lu
++  // vd, vj
++  // V2DF, UV2DI
++  v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(
++
++  // __lsx_vandn_v
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v(
++
++  // __lsx_vneg_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b(
++
++  // __lsx_vneg_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h(
++
++  // __lsx_vneg_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w(
++
++  // __lsx_vneg_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d(
++
++  // __lsx_vmuh_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b(
++
++  // __lsx_vmuh_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h(
++
++  // __lsx_vmuh_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w(
++
++  // __lsx_vmuh_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d(
++
++  // __lsx_vmuh_bu
++  // vd, vj, vk
++  // UV16QI, UV16QI, UV16QI
++  v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(
++
++  // __lsx_vmuh_hu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV8HI
++  v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(
++
++  // __lsx_vmuh_wu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV4SI
++  v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(
++
++  // __lsx_vmuh_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du(
++
++  // __lsx_vsllwil_h_b
++  // vd, vj, ui3
++  // V8HI, V16QI, UQI
++  v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(
++
++  // __lsx_vsllwil_w_h
++  // vd, vj, ui4
++  // V4SI, V8HI, UQI
++  v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(
++
++  // __lsx_vsllwil_d_w
++  // vd, vj, ui5
++  // V2DI, V4SI, UQI
++  v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(
++
++  // __lsx_vsllwil_hu_bu
++  // vd, vj, ui3
++  // UV8HI, UV16QI, UQI
++  v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(
++
++  // __lsx_vsllwil_wu_hu
++  // vd, vj, ui4
++  // UV4SI, UV8HI, UQI
++  v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(
++
++  // __lsx_vsllwil_du_wu
++  // vd, vj, ui5
++  // UV2DI, UV4SI, UQI
++  v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(
++
++  // __lsx_vsran_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(
++
++  // __lsx_vsran_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(
++
++  // __lsx_vsran_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(
++
++  // __lsx_vssran_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(
++
++  // __lsx_vssran_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(
++
++  // __lsx_vssran_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(
++
++  // __lsx_vssran_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(
++
++  // __lsx_vssran_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(
++
++  // __lsx_vssran_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(
++
++  // __lsx_vsrarn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(
++
++  // __lsx_vsrarn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(
++
++  // __lsx_vsrarn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(
++
++  // __lsx_vssrarn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(
++
++  // __lsx_vssrarn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(
++
++  // __lsx_vssrarn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(
++
++  // __lsx_vssrarn_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(
++
++  // __lsx_vssrarn_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(
++
++  // __lsx_vssrarn_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(
++
++  // __lsx_vsrln_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(
++
++  // __lsx_vsrln_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(
++
++  // __lsx_vsrln_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(
++
++  // __lsx_vssrln_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(
++
++  // __lsx_vssrln_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(
++
++  // __lsx_vssrln_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(
++
++  // __lsx_vsrlrn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(
++
++  // __lsx_vsrlrn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(
++
++  // __lsx_vsrlrn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(
++
++  // __lsx_vssrlrn_bu_h
++  // vd, vj, vk
++  // UV16QI, UV8HI, UV8HI
++  v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(
++
++  // __lsx_vssrlrn_hu_w
++  // vd, vj, vk
++  // UV8HI, UV4SI, UV4SI
++  v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(
++
++  // __lsx_vssrlrn_wu_d
++  // vd, vj, vk
++  // UV4SI, UV2DI, UV2DI
++  v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(
++
++  // __lsx_vfrstpi_b
++  // vd, vj, ui5
++  // V16QI, V16QI, V16QI, UQI
++  v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(
++
++  // __lsx_vfrstpi_h
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, UQI
++  v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(
++
++  // __lsx_vfrstp_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(
++
++  // __lsx_vfrstp_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(
++
++  // __lsx_vshuf4i_d
++  // vd, vj, ui8
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(
++
++  // __lsx_vbsrl_v
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(
++
++  // __lsx_vbsll_v
++  // vd, vj, ui5
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v(
++
++  // __lsx_vextrins_b
++  // vd, vj, ui8
++  // V16QI, V16QI, V16QI, UQI
++  v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b(
++
++  // __lsx_vextrins_h
++  // vd, vj, ui8
++  // V8HI, V8HI, V8HI, UQI
++  v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h(
++
++  // __lsx_vextrins_w
++  // vd, vj, ui8
++  // V4SI, V4SI, V4SI, UQI
++  v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w(
++
++  // __lsx_vextrins_d
++  // vd, vj, ui8
++  // V2DI, V2DI, V2DI, UQI
++  v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d(
++
++  // __lsx_vmskltz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(
++
++  // __lsx_vmskltz_h
++  // vd, vj
++  // V8HI, V8HI
++  v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(
++
++  // __lsx_vmskltz_w
++  // vd, vj
++  // V4SI, V4SI
++  v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(
++
++  // __lsx_vmskltz_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(
++
++  // __lsx_vsigncov_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(
++
++  // __lsx_vsigncov_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(
++
++  // __lsx_vsigncov_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(
++
++  // __lsx_vsigncov_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(
++
++  // __lsx_vfmadd_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s(
++
++  // __lsx_vfmadd_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d(
++
++  // __lsx_vfmsub_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s(
++
++  // __lsx_vfmsub_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d(
++
++  // __lsx_vfnmadd_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(
++
++  // __lsx_vfnmadd_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(
++
++  // __lsx_vfnmsub_s
++  // vd, vj, vk, va
++  // V4SF, V4SF, V4SF, V4SF
++  v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(
++
++  // __lsx_vfnmsub_d
++  // vd, vj, vk, va
++  // V2DF, V2DF, V2DF, V2DF
++  v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(
++
++  // __lsx_vftintrne_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(
++
++  // __lsx_vftintrne_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(
++
++  // __lsx_vftintrp_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(
++
++  // __lsx_vftintrp_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(
++
++  // __lsx_vftintrm_w_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(
++
++  // __lsx_vftintrm_l_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(
++
++  // __lsx_vftint_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(
++
++  // __lsx_vffint_s_l
++  // vd, vj, vk
++  // V4SF, V2DI, V2DI
++  v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l(
++
++  // __lsx_vftintrz_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(
++
++  // __lsx_vftintrp_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(
++
++  // __lsx_vftintrm_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(
++
++  // __lsx_vftintrne_w_d
++  // vd, vj, vk
++  // V4SI, V2DF, V2DF
++  v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(
++
++  // __lsx_vftintl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(
++
++  // __lsx_vftinth_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(
++
++  // __lsx_vffinth_d_w
++  // vd, vj
++  // V2DF, V4SI
++  v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(
++
++  // __lsx_vffintl_d_w
++  // vd, vj
++  // V2DF, V4SI
++  v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(
++
++  // __lsx_vftintrzl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(
++
++  // __lsx_vftintrzh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(
++
++  // __lsx_vftintrpl_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(
++
++  // __lsx_vftintrph_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(
++
++  // __lsx_vftintrml_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(
++
++  // __lsx_vftintrmh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(
++
++  // __lsx_vftintrnel_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(
++
++  // __lsx_vftintrneh_l_s
++  // vd, vj
++  // V2DI, V4SF
++  v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(
++
++  // __lsx_vfrintrne_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrne.s(
++
++  // __lsx_vfrintrne_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrne.d(
++
++  // __lsx_vfrintrz_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrz.s(
++
++  // __lsx_vfrintrz_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrz.d(
++
++  // __lsx_vfrintrp_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrp.s(
++
++  // __lsx_vfrintrp_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrp.d(
++
++  // __lsx_vfrintrm_s
++  // vd, vj
++  // V4SI, V4SF
++  v4i32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrm.s(
++
++  // __lsx_vfrintrm_d
++  // vd, vj
++  // V2DI, V2DF
++  v2i64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrm.d(
++
++  // __lsx_vstelm_b
++  // vd, rj, si8, idx
++  // VOID, V16QI, CVPOINTER, SI, UQI
++  __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b(
++  // __lsx_vstelm_h
++  // vd, rj, si8, idx
++  // VOID, V8HI, CVPOINTER, SI, UQI
++  __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h(
++
++  // __lsx_vstelm_w
++  // vd, rj, si8, idx
++  // VOID, V4SI, CVPOINTER, SI, UQI
++  __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w(
++
++  // __lsx_vstelm_d
++  // vd, rj, si8, idx
++  // VOID, V2DI, CVPOINTER, SI, UQI
++  __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d(
++
++  // __lsx_vaddwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(
++
++  // __lsx_vaddwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(
++
++  // __lsx_vaddwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(
++
++  // __lsx_vaddwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(
++
++  // __lsx_vaddwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(
++
++  // __lsx_vaddwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(
++
++  // __lsx_vaddwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(
++
++  // __lsx_vaddwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(
++
++  // __lsx_vaddwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(
++
++  // __lsx_vaddwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(
++
++  // __lsx_vaddwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(
++
++  // __lsx_vaddwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(
++
++  // __lsx_vaddwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(
++
++  // __lsx_vaddwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(
++
++  // __lsx_vaddwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(
++
++  // __lsx_vaddwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(
++
++  // __lsx_vaddwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(
++
++  // __lsx_vaddwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(
++
++  // __lsx_vsubwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(
++
++  // __lsx_vsubwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(
++
++  // __lsx_vsubwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(
++
++  // __lsx_vsubwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(
++
++  // __lsx_vsubwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(
++
++  // __lsx_vsubwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(
++
++  // __lsx_vsubwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(
++
++  // __lsx_vsubwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(
++
++  // __lsx_vsubwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(
++
++  // __lsx_vsubwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(
++
++  // __lsx_vsubwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(
++
++  // __lsx_vsubwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(
++
++  // __lsx_vaddwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(
++
++  // __lsx_vaddwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(
++
++  // __lsx_vaddwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(
++
++  // __lsx_vaddwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(
++
++  // __lsx_vsubwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(
++
++  // __lsx_vsubwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(
++
++  // __lsx_vsubwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(
++
++  // __lsx_vsubwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(
++
++  // __lsx_vaddwev_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(
++
++  // __lsx_vaddwod_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(
++
++  // __lsx_vmulwev_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(
++
++  // __lsx_vmulwev_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(
++
++  // __lsx_vmulwev_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(
++
++  // __lsx_vmulwod_d_w
++  // vd, vj, vk
++  // V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(
++
++  // __lsx_vmulwod_w_h
++  // vd, vj, vk
++  // V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(
++
++  // __lsx_vmulwod_h_b
++  // vd, vj, vk
++  // V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(
++
++  // __lsx_vmulwev_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(
++
++  // __lsx_vmulwev_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(
++
++  // __lsx_vmulwev_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(
++
++  // __lsx_vmulwod_d_wu
++  // vd, vj, vk
++  // V2DI, UV4SI, UV4SI
++  v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(
++
++  // __lsx_vmulwod_w_hu
++  // vd, vj, vk
++  // V4SI, UV8HI, UV8HI
++  v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(
++
++  // __lsx_vmulwod_h_bu
++  // vd, vj, vk
++  // V8HI, UV16QI, UV16QI
++  v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(
++
++  // __lsx_vmulwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(
++
++  // __lsx_vmulwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(
++
++  // __lsx_vmulwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(
++
++  // __lsx_vmulwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(
++
++  // __lsx_vmulwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(
++
++  // __lsx_vmulwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(
++
++  // __lsx_vmulwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(
++
++  // __lsx_vmulwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(
++
++  // __lsx_vmulwev_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(
++
++  // __lsx_vmulwod_q_du
++  // vd, vj, vk
++  // V2DI, UV2DI, UV2DI
++  v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(
++
++  // __lsx_vmulwev_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(
++
++  // __lsx_vmulwod_q_du_d
++  // vd, vj, vk
++  // V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(
++
++  // __lsx_vhaddw_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(
++
++  // __lsx_vhaddw_qu_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(
++
++  // __lsx_vhsubw_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(
++
++  // __lsx_vhsubw_qu_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(
++
++  // __lsx_vmaddwev_d_w
++  // vd, vj, vk
++  // V2DI, V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(
++
++  // __lsx_vmaddwev_w_h
++  // vd, vj, vk
++  // V4SI, V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(
++
++  // __lsx_vmaddwev_h_b
++  // vd, vj, vk
++  // V8HI, V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(
++
++  // __lsx_vmaddwev_d_wu
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(
++
++  // __lsx_vmaddwev_w_hu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(
++
++  // __lsx_vmaddwev_h_bu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(
++
++  // __lsx_vmaddwod_d_w
++  // vd, vj, vk
++  // V2DI, V2DI, V4SI, V4SI
++  v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(
++
++  // __lsx_vmaddwod_w_h
++  // vd, vj, vk
++  // V4SI, V4SI, V8HI, V8HI
++  v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(
++
++  // __lsx_vmaddwod_h_b
++  // vd, vj, vk
++  // V8HI, V8HI, V16QI, V16QI
++  v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(
++
++  // __lsx_vmaddwod_d_wu
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV4SI, UV4SI
++  v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(
++
++  // __lsx_vmaddwod_w_hu
++  // vd, vj, vk
++  // UV4SI, UV4SI, UV8HI, UV8HI
++  v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(
++
++  // __lsx_vmaddwod_h_bu
++  // vd, vj, vk
++  // UV8HI, UV8HI, UV16QI, UV16QI
++  v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(
++
++  // __lsx_vmaddwev_d_wu_w
++  // vd, vj, vk
++  // V2DI, V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(
++
++  // __lsx_vmaddwev_w_hu_h
++  // vd, vj, vk
++  // V4SI, V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(
++
++  // __lsx_vmaddwev_h_bu_b
++  // vd, vj, vk
++  // V8HI, V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(
++
++  // __lsx_vmaddwod_d_wu_w
++  // vd, vj, vk
++  // V2DI, V2DI, UV4SI, V4SI
++  v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(
++
++  // __lsx_vmaddwod_w_hu_h
++  // vd, vj, vk
++  // V4SI, V4SI, UV8HI, V8HI
++  v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(
++
++  // __lsx_vmaddwod_h_bu_b
++  // vd, vj, vk
++  // V8HI, V8HI, UV16QI, V16QI
++  v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(
++
++  // __lsx_vmaddwev_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(
++
++  // __lsx_vmaddwod_q_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(
++
++  // __lsx_vmaddwev_q_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(
++
++  // __lsx_vmaddwod_q_du
++  // vd, vj, vk
++  // UV2DI, UV2DI, UV2DI, UV2DI
++  v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(
++
++  // __lsx_vmaddwev_q_du_d
++  // vd, vj, vk
++  // V2DI, V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(
++
++  // __lsx_vmaddwod_q_du_d
++  // vd, vj, vk
++  // V2DI, V2DI, UV2DI, V2DI
++  v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(
++
++  // __lsx_vrotr_b
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b(
++
++  // __lsx_vrotr_h
++  // vd, vj, vk
++  // V8HI, V8HI, V8HI
++  v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h(
++
++  // __lsx_vrotr_w
++  // vd, vj, vk
++  // V4SI, V4SI, V4SI
++  v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w(
++
++  // __lsx_vrotr_d
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d(
++
++  // __lsx_vadd_q
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q(
++
++  // __lsx_vsub_q
++  // vd, vj, vk
++  // V2DI, V2DI, V2DI
++  v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q(
++
++  // __lsx_vldrepl_b
++  // vd, rj, si12
++  // V16QI, CVPOINTER, SI
++  v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(
++
++  // __lsx_vldrepl_h
++  // vd, rj, si11
++  // V8HI, CVPOINTER, SI
++  v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(
++
++  // __lsx_vldrepl_w
++  // vd, rj, si10
++  // V4SI, CVPOINTER, SI
++  v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(
++
++  // __lsx_vldrepl_d
++  // vd, rj, si9
++  // V2DI, CVPOINTER, SI
++  v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(
++
++  // __lsx_vmskgez_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(
++
++  // __lsx_vmsknz_b
++  // vd, vj
++  // V16QI, V16QI
++  v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(
++
++  // __lsx_vexth_h_b
++  // vd, vj
++  // V8HI, V16QI
++  v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(
++
++  // __lsx_vexth_w_h
++  // vd, vj
++  // V4SI, V8HI
++  v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(
++
++  // __lsx_vexth_d_w
++  // vd, vj
++  // V2DI, V4SI
++  v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(
++
++  // __lsx_vexth_q_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(
++
++  // __lsx_vexth_hu_bu
++  // vd, vj
++  // UV8HI, UV16QI
++  v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(
++
++  // __lsx_vexth_wu_hu
++  // vd, vj
++  // UV4SI, UV8HI
++  v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(
++
++  // __lsx_vexth_du_wu
++  // vd, vj
++  // UV2DI, UV4SI
++  v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(
++
++  // __lsx_vexth_qu_du
++  // vd, vj
++  // UV2DI, UV2DI
++  v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(
++
++  // __lsx_vrotri_b
++  // vd, vj, ui3
++  // V16QI, V16QI, UQI
++  v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b(
++
++  // __lsx_vrotri_h
++  // vd, vj, ui4
++  // V8HI, V8HI, UQI
++  v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h(
++
++  // __lsx_vrotri_w
++  // vd, vj, ui5
++  // V4SI, V4SI, UQI
++  v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w(
++
++  // __lsx_vrotri_d
++  // vd, vj, ui6
++  // V2DI, V2DI, UQI
++  v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d(
++
++  // __lsx_vextl_q_d
++  // vd, vj
++  // V2DI, V2DI
++  v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(
++
++  // __lsx_vsrlni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(
++
++  // __lsx_vsrlni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(
++
++  // __lsx_vsrlni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(
++
++  // __lsx_vsrlni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(
++
++  // __lsx_vssrlni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(
++
++  // __lsx_vssrlni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(
++
++  // __lsx_vssrlni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(
++
++  // __lsx_vssrlni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(
++
++  // __lsx_vssrlni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(
++
++  // __lsx_vssrlni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(
++
++  // __lsx_vssrlni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(
++
++  // __lsx_vssrlni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(
++
++  // __lsx_vssrlrni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(
++
++  // __lsx_vssrlrni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(
++
++  // __lsx_vssrlrni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(
++
++  // __lsx_vssrlrni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(
++
++  // __lsx_vssrlrni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(
++
++  // __lsx_vssrlrni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(
++
++  // __lsx_vssrlrni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(
++
++  // __lsx_vssrlrni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(
++
++  // __lsx_vsrani_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(
++
++  // __lsx_vsrani_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(
++
++  // __lsx_vsrani_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(
++
++  // __lsx_vsrani_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(
++
++  // __lsx_vsrarni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(
++
++  // __lsx_vsrarni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(
++
++  // __lsx_vsrarni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(
++
++  // __lsx_vsrarni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(
++
++  // __lsx_vssrani_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(
++
++  // __lsx_vssrani_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(
++
++  // __lsx_vssrani_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(
++
++  // __lsx_vssrani_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(
++
++  // __lsx_vssrani_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(
++
++  // __lsx_vssrani_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(
++
++  // __lsx_vssrani_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(
++
++  // __lsx_vssrani_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(
++
++  // __lsx_vssrarni_b_h
++  // vd, vj, ui4
++  // V16QI, V16QI, V16QI, USI
++  v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(
++
++  // __lsx_vssrarni_h_w
++  // vd, vj, ui5
++  // V8HI, V8HI, V8HI, USI
++  v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(
++
++  // __lsx_vssrarni_w_d
++  // vd, vj, ui6
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(
++
++  // __lsx_vssrarni_d_q
++  // vd, vj, ui7
++  // V2DI, V2DI, V2DI, USI
++  v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(
++
++  // __lsx_vssrarni_bu_h
++  // vd, vj, ui4
++  // UV16QI, UV16QI, V16QI, USI
++  v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(
++
++  // __lsx_vssrarni_hu_w
++  // vd, vj, ui5
++  // UV8HI, UV8HI, V8HI, USI
++  v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(
++
++  // __lsx_vssrarni_wu_d
++  // vd, vj, ui6
++  // UV4SI, UV4SI, V4SI, USI
++  v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(
++
++  // __lsx_vssrarni_du_q
++  // vd, vj, ui7
++  // UV2DI, UV2DI, V2DI, USI
++  v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(
++
++  // __lsx_vpermi_w
++  // vd, vj, ui8
++  // V4SI, V4SI, V4SI, USI
++  v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w(
++
++  // __lsx_vld
++  // vd, rj, si12
++  // V16QI, CVPOINTER, SI
++  v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld(
++
++  // __lsx_vst
++  // vd, rj, si12
++  // VOID, V16QI, CVPOINTER, SI
++  __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst(
++
++  // __lsx_vssrlrn_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(
++
++  // __lsx_vssrlrn_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(
++
++  // __lsx_vssrlrn_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(
++
++  // __lsx_vssrln_b_h
++  // vd, vj, vk
++  // V16QI, V8HI, V8HI
++  v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(
++
++  // __lsx_vssrln_h_w
++  // vd, vj, vk
++  // V8HI, V4SI, V4SI
++  v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(
++
++  // __lsx_vssrln_w_d
++  // vd, vj, vk
++  // V4SI, V2DI, V2DI
++  v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(
++
++  // __lsx_vorn_v
++  // vd, vj, vk
++  // V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v(
++
++  // __lsx_vldi
++  // vd, i13
++  // V2DI, HI
++  v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi(
++
++  // __lsx_vshuf_b
++  // vd, vj, vk, va
++  // V16QI, V16QI, V16QI, V16QI
++  v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b(
++
++  // __lsx_vldx
++  // vd, rj, rk
++  // V16QI, CVPOINTER, DI
++  v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx(
++
++  // __lsx_vstx
++  // vd, rj, rk
++  // VOID, V16QI, CVPOINTER, DI
++  __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx(
++
++  // __lsx_vextl_qu_du
++  // vd, vj
++  // UV2DI, UV2DI
++  v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(
++
++  // __lsx_bnz_v
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v(
++
++  // __lsx_bz_v
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v(
++
++  // __lsx_bnz_b
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b(
++
++  // __lsx_bnz_h
++  // rd, vj
++  // SI, UV8HI
++  i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h(
++
++  // __lsx_bnz_w
++  // rd, vj
++  // SI, UV4SI
++  i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w(
++
++  // __lsx_bnz_d
++  // rd, vj
++  // SI, UV2DI
++  i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d(
++
++  // __lsx_bz_b
++  // rd, vj
++  // SI, UV16QI
++  i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b(
++
++  // __lsx_bz_h
++  // rd, vj
++  // SI, UV8HI
++  i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h(
++
++  // __lsx_bz_w
++  // rd, vj
++  // SI, UV4SI
++  i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w(
++
++  // __lsx_bz_d
++  // rd, vj
++  // SI, UV2DI
++  i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d(
++}
+diff --git a/test/CodeGen/loongarch-inline-asm-modifiers.c b/test/CodeGen/loongarch-inline-asm-modifiers.c
+new file mode 100644
+index 00000000..08822e64
+--- /dev/null
++++ b/test/CodeGen/loongarch-inline-asm-modifiers.c
+@@ -0,0 +1,50 @@
++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \
++// RUN: | FileCheck %s
++
++// This checks that the frontend will accept inline asm operand modifiers
++
++int printf(const char*, ...);
++
++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32)));
++
++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w    $0,$1;\0A", "=r,*m"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 {{[0-9]+}}, i64 {{[0-9]+}})) #2,
++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w    $0,${1:D};\0A", "=r,*m"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 {{[0-9]+}}, i64 {{[0-9]+}})) #2,
++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f"
++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f"
++int b[8] = {0,1,2,3,4,5,6,7};
++int  main()
++{
++  int i;
++  v2i64 v2i64_r;
++  v4i64 v4i64_r;
++
++  // The first word. Notice, no 'D'
++  {asm (
++  "ld.w    %0,%1;\n"
++  : "=r" (i)
++  : "m" (*(b+4)));}
++
++  printf("%d\n",i);
++
++  // The second word
++  {asm (
++  "ld.w    %0,%D1;\n"
++  : "=r" (i)
++  : "m" (*(b+4))
++  );}
++
++  // LSX registers
++  { asm("vldi %w0,1"
++        : "=f"(v2i64_r)); }
++
++  printf("%d\n", i);
++
++  // LASX registers
++  { asm("xldi %u0,1"
++        : "=f"(v4i64_r)); }
++
++  printf("%d\n",i);
++
++  return 1;
++}
+diff --git a/test/CodeGen/loongarch-inline-asm.c b/test/CodeGen/loongarch-inline-asm.c
+new file mode 100644
+index 00000000..dadb7e3f
+--- /dev/null
++++ b/test/CodeGen/loongarch-inline-asm.c
+@@ -0,0 +1,31 @@
++// REQUIRES: loongarch-registered-target
++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s
++
++int data;
++
++void m () {
++  asm("ld.w $r1, %0" :: "m"(data));
++  // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(i32* elementtype(i32) @data)
++}
++
++void ZC () {
++  asm("ll.w $r1, %0" :: "ZC"(data));
++  // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(i32* elementtype(i32) @data)
++}
++
++void ZB () {
++  asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data));
++  // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(i32* elementtype(i32) @data)
++}
++
++void R () {
++  asm("ld.w $r1, %0" :: "R"(data));
++  // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(i32* elementtype(i32) @data)
++}
++
++int *p;
++void preld () {
++  asm("preld 0, %0, 2" :: "r"(p));
++  // CHECK: %0 = load i32*, i32** @p, align 8
++  // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(i32* %0)
++}
+diff --git a/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp
+new file mode 100644
+index 00000000..cbe6469d
+--- /dev/null
++++ b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp
+@@ -0,0 +1,95 @@
++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \
++// RUN:   -emit-llvm %s -o - | FileCheck %s
++
++#include <stdint.h>
++
++/// Ensure that fields inherited from a parent struct are treated in the same
++/// way as fields directly in the child for the purposes of LoongArch ABI rules.
++
++struct parent1_int32_s {
++  int32_t i1;
++};
++
++struct child1_int32_s : parent1_int32_s {
++  int32_t i2;
++};
++
++// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce)
++struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) {
++  return a;
++}
++
++struct parent2_int32_s {
++  int32_t i1;
++};
++
++struct child2_float_s : parent2_int32_s {
++  float f1;
++};
++
++// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1)
++struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) {
++  return a;
++}
++
++struct parent3_float_s {
++  float f1;
++};
++
++struct child3_int64_s : parent3_float_s {
++  int64_t i1;
++};
++
++// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1)
++struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) {
++  return a;
++}
++
++struct parent4_double_s {
++  double d1;
++};
++
++struct child4_double_s : parent4_double_s {
++  double d1;
++};
++
++// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1)
++struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) {
++  return a;
++}
++
++/// When virtual inheritance is used, the resulting struct isn't eligible for
++/// passing in registers.
++
++struct parent5_virtual_s {
++  int32_t i1;
++};
++
++struct child5_virtual_s : virtual parent5_virtual_s {
++  float f1;
++};
++
++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(%struct.child5_virtual_s*{{.*}} %this, %struct.child5_virtual_s*{{.*}} dereferenceable(12) %0)
++struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) {
++  return a;
++}
++
++/// Check for correct lowering in the presence of diamoned inheritance.
++
++struct parent6_float_s {
++  float f1;
++};
++
++struct child6a_s : parent6_float_s {
++};
++
++struct child6b_s : parent6_float_s {
++};
++
++struct grandchild_6_s : child6a_s, child6b_s {
++};
++
++// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1)
++struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) {
++  return a;
++}
+diff --git a/test/Driver/loongarch-abi-fpu.c b/test/Driver/loongarch-abi-fpu.c
+new file mode 100644
+index 00000000..180d440c
+--- /dev/null
++++ b/test/Driver/loongarch-abi-fpu.c
+@@ -0,0 +1,26 @@
++/// Check passing -mabi=<ABIName> and -mfpu=<FPU> options to the backend.
++
++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=none -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-NF-ND %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=32 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-F %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=64 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-D %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=none -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=ERRLP64F-WITH-FPUNONE %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=32 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-F %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=64 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-D %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=none -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=32 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s
++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=64 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-D %s
++
++// FEATURE-D: "-target-feature" "+d"
++// FEATURE-F: "-target-feature" "+f"
++// FEATURE-NF-ND: "-target-feature" "-f" "-target-feature" "-d"
++// ERRLP64D-ONLY-FPU64: error: option 'lp64d' cannot be specified without '-mfpu=64'
++// ERRLP64F-WITH-FPUNONE: error: option 'lp64f' cannot be specified with '-mfpu=none'
+diff --git a/test/Driver/loongarch-alignment-feature.c b/test/Driver/loongarch-alignment-feature.c
+new file mode 100644
+index 00000000..2270ff53
+--- /dev/null
++++ b/test/Driver/loongarch-alignment-feature.c
+@@ -0,0 +1,8 @@
++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t
++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s
++
++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t
++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s
++
++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access"
++// CHECK-ALIGNED: "-target-feature" "-unaligned-access"
+diff --git a/test/Driver/loongarch-double-single-soft.c b/test/Driver/loongarch-double-single-soft.c
+new file mode 100644
+index 00000000..4b25f876
+--- /dev/null
++++ b/test/Driver/loongarch-double-single-soft.c
+@@ -0,0 +1,12 @@
++// Check passing -m*-float options to the backend.
++
++// RUN: %clang -target loongarch64 %s -mdouble-float -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-DOUBLE %s
++// RUN: %clang -target loongarch64 %s -msingle-float -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-SINGLE %s
++// RUN: %clang -target loongarch64 %s -msoft-float -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-SOFT %s
++
++// CHECK-DOUBLE: "-target-feature" "+d" "-target-abi" "lp64d"
++// CHECK-SINGLE: "-target-feature" "+f" "-target-abi" "lp64f"
++// CHECK-SOFT: "-target-feature" "-f" "-target-feature" "-d" "-target-abi" "lp64s"
+diff --git a/test/Driver/loongarch-mabi.c b/test/Driver/loongarch-mabi.c
+new file mode 100644
+index 00000000..88a90408
+--- /dev/null
++++ b/test/Driver/loongarch-mabi.c
+@@ -0,0 +1,22 @@
++// Check passing -mabi=<ABIName> options to the backend.
++
++// check default ABI for loongarch64
++// RUN: %clang -target loongarch64 %s -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-LP64D %s
++// check -mabi=lp64d option for loongarch64
++// RUN: %clang -target loongarch64 %s -mabi=lp64d -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-LP64D %s
++// check -mabi=lp64f option for loongarch64
++// RUN: %clang -target loongarch64 %s -mabi=lp64f -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-LP64F %s
++// check -mabi=lp64s option for loongarch64
++// RUN: %clang -target loongarch64 %s -mabi=lp64s -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-LP64S %s
++// check invalid -mabi=x option for loongarch64
++// RUN: not %clang -target loongarch64 %s -mabi=x 2>&1 \
++// RUN:   | FileCheck --check-prefix=CHECK-X %s
++
++// CHECK-LP64D: "-target-abi" "lp64d"
++// CHECK-LP64F: "-target-abi" "lp64f"
++// CHECK-LP64S: "-target-abi" "lp64s"
++// CHECK-X: error: unknown target ABI 'x'
+diff --git a/test/Driver/loongarch-mfpu.c b/test/Driver/loongarch-mfpu.c
+new file mode 100644
+index 00000000..0cf05fd3
+--- /dev/null
++++ b/test/Driver/loongarch-mfpu.c
+@@ -0,0 +1,21 @@
++// Check passing -mfpu=<FPU> options to the backend.
++
++// check default feature for loongarch64
++// RUN: %clang -target loongarch64 %s -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-D %s
++// check -mfpu=64 option for loongarch64
++// RUN: %clang -target loongarch64 %s -mfpu=64 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=FEATURE-D %s
++// check -mfpu=32 option for loongarch64
++// RUN: %clang -target loongarch64 %s -mfpu=32 -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s
++// check -mfpu=none option for loongarch64
++// RUN: %clang -target loongarch64 %s -mfpu=none -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s
++// check -mfpu=x option for loongarch64
++// RUN: %clang -target loongarch64 %s -mfpu=x -### 2>&1 \
++// RUN:   | FileCheck --check-prefix=INVALID-FPU %s
++
++// FEATURE-D: "-target-feature" "+d"
++// INVALID-FPU: error: invalid loongarch FPU value 'x'. Please specify FPU = 64,32 or none
++// ERRLP64D-ONLY-FPU64: error: option 'lp64d' cannot be specified without '-mfpu=64'
+-- 
+2.38.1
+
diff --git a/clang14/PKGBUILD b/clang14/PKGBUILD
index 0b76be2b3b..13097ce52e 100644
--- a/clang14/PKGBUILD
+++ b/clang14/PKGBUILD
@@ -19,14 +19,18 @@ source=($_source_base/clang-$pkgver.src.tar.xz{,.sig}
         $_source_base/llvm-$pkgver.src.tar.xz{,.sig}
         enforce-instantiation-of-constexpr-template-functions.patch
         clang-coroutines-ubsan.patch
-        enable-fstack-protector-strong-by-default.patch)
+        enable-fstack-protector-strong-by-default.patch
+        0001-add-loong64-support.patch
+        0002-add-loong64-support.patch)
 sha256sums=('2b5847b6a63118b9efe5c85548363c81ffe096b66c3b3675e953e26342ae4031'
             'SKIP'
             '050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a'
             'SKIP'
             'eb2916131ae63b3bd1689f6a27dc0c2fadad73a5c5f2c828062c8a2c547e4a0d'
             '2c25ddf0ba6be01949842873fef4d285456321aaccd4ba95db61b69a4c580106'
-            '7a9ce949579a3b02d4b91b6835c4fb45adc5f743007572fb0e28e6433e48f3a5')
+            '7a9ce949579a3b02d4b91b6835c4fb45adc5f743007572fb0e28e6433e48f3a5'
+            'ac0284b611d21a01327b66ad1ab9dfb83cb883b7f99b7960418477b7d2c04dfc'
+            '192ecde3154897e13b6d23de9aefca3022ad43a1339a1c4bfa27832e6166886c')
 validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar@redhat.com>
 
 # Utilizing LLVM_DISTRIBUTION_COMPONENTS to avoid
@@ -62,6 +66,10 @@ prepare() {
 
   # https://github.com/llvm/llvm-project/issues/49689
   patch -Np2 -i ../clang-coroutines-ubsan.patch
+
+  # https://github.com/llvm/llvm-project/issues/54116
+  patch -Np1 -i ../0001-add-loong64-support.patch
+  patch -Np1 -i ../0002-add-loong64-support.patch
 }
 
 build() {
diff --git a/clash/PKGBUILD b/clash/PKGBUILD
index 4b0af71517..b359732c89 100644
--- a/clash/PKGBUILD
+++ b/clash/PKGBUILD
@@ -25,8 +25,12 @@ build() {
     export CGO_CXXFLAGS="${CXXFLAGS}"
     export CGO_LDFLAGS="${LDFLAGS}"
     export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw"
+    export GOPROXY=https://goproxy.cn
 
     cd "${pkgname}-${pkgver}"
+    go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef
+    go mod edit -replace=github.com/u-root/uio=github.com/loongarch64/uio@dev-main
+    go mod tidy
     go build -trimpath -ldflags "-X github.com/Dreamacro/clash/constant.Version=${pkgver}" -mod=readonly
 }
 
diff --git a/cln/PKGBUILD b/cln/PKGBUILD
index 4645f6bb9a..11252173a3 100644
--- a/cln/PKGBUILD
+++ b/cln/PKGBUILD
@@ -10,8 +10,15 @@ url="https://www.ginac.de/CLN/"
 license=('GPL')
 depends=('gmp')
 makedepends=('texlive-latex')
-source=(https://www.ginac.de/CLN/${pkgname}-${pkgver}.tar.bz2)
-sha256sums=('f492530e8879bda529009b6033e1923c8f4aae843149fc28c667c20b094d984a')
+source=(https://www.ginac.de/CLN/${pkgname}-${pkgver}.tar.bz2
+        cln-la64.patch)
+sha256sums=('f492530e8879bda529009b6033e1923c8f4aae843149fc28c667c20b094d984a'
+            '8cea3f49ab301dff766450bfdbff3ac4e5bf64cb60307f73681267aa151bb6c5')
+
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/cln-la64.patch"
+}
 
 build() {
   cd ${pkgname}-${pkgver}
diff --git a/cln/cln-la64.patch b/cln/cln-la64.patch
new file mode 100644
index 0000000000..df654d5ba3
--- /dev/null
+++ b/cln/cln-la64.patch
@@ -0,0 +1,53 @@
+Index: cln-1.3.6/include/cln/object.h
+===================================================================
+--- cln-1.3.6.orig/include/cln/object.h
++++ cln-1.3.6/include/cln/object.h
+@@ -25,7 +25,7 @@ namespace cln {
+ #if defined(__i386__) || (defined(__mips__) && !defined(__LP64__)) || (defined(__sparc__) && !defined(__arch64__)) || defined(__hppa__) || defined(__arm__) || defined(__rs6000__) || defined(__m88k__) || defined(__convex__) || (defined(__s390__) && !defined(__s390x__)) || defined(__sh__) || (defined(__x86_64__) && defined(__ILP32__))
+   #define cl_word_alignment  4
+ #endif
+-#if defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(__s390x__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)
++#if defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(__s390x__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64)
+   #define cl_word_alignment  8
+ #endif
+ #if !defined(cl_word_alignment)
+Index: cln-1.3.6/include/cln/types.h
+===================================================================
+--- cln-1.3.6.orig/include/cln/types.h
++++ cln-1.3.6/include/cln/types.h
+@@ -51,7 +51,7 @@
+     #undef HAVE_LONGLONG
+    #endif
+   #endif
+-  #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) || defined(_M_AMD64)) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)) || defined(__e2k__)
++  #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) || defined(_M_AMD64)) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)) || defined(__e2k__) || defined(__loongarch64)
+     // 64 bit registers in hardware
+     #define HAVE_FAST_LONGLONG
+   #endif
+@@ -79,7 +79,7 @@
+ 
+ // Integer type used for counters.
+ // Constraint: sizeof(uintC) >= sizeof(uintL)
+-  #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)))
++  #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)) || defined(__loongarch64)) 
+     #define intCsize long_bitsize
+     typedef long           sintC;
+     typedef unsigned long  uintC;
+@@ -91,7 +91,7 @@
+ 
+ // Integer type used for lfloat exponents.
+ // Constraint: sizeof(uintE) >= sizeof(uintC)
+-  #if (defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__rs6000__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)))
++  #if (defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__rs6000__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64)))
+     #define intEsize 64
+     typedef sint64  sintE;
+     typedef uint64  uintE;
+@@ -132,7 +132,7 @@
+     typedef int sintD;
+     typedef unsigned int uintD;
+   #else  // we are not using GMP, so just guess something reasonable
+-    #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__s390x__) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)))
++    #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__s390x__) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64)))
+       #define intDsize 64
+       typedef sint64  sintD;
+       typedef uint64  uintD;
diff --git a/clucene/PKGBUILD b/clucene/PKGBUILD
index 828ce314f1..84602d628c 100644
--- a/clucene/PKGBUILD
+++ b/clucene/PKGBUILD
@@ -13,6 +13,7 @@ license=('APACHE' 'LGPL')
 depends=('gcc-libs>=4.7.1-5' 'zlib')
 makedepends=('cmake' 'boost>=1.54.0')
 source=(https://downloads.sourceforge.net/$pkgname/$pkgname-core-$pkgver.tar.gz
+        https://gitweb.gentoo.org/repo/gentoo.git/plain/dev-cpp/clucene/files/clucene-2.3.3.4-gmtime.patch
         # Fedora patches
         clucene-core-2.3.3.4-pkgconfig.patch
         clucene-core-2.3.3.4-install_contribs_lib.patch
@@ -26,6 +27,7 @@ source=(https://downloads.sourceforge.net/$pkgname/$pkgname-core-$pkgver.tar.gz
         clucene-narrowing-conversions.patch
         clucene-multimap-put.patch)
 sha512sums=('1c9da9077edcebd46563bd9e47d330518e0b30061016650a759cfe051e9748fdad8932a472b1cca53a6adafed5f41656527271fc5f55ddfcefb558f0d83286b4'
+            '1c23c08edf5512c29a061e4a8515dfa209151af83e46680842f0aeee1269d529a14fd4a89aab7c25312fd7dbb9daf80912a265d21fcf08ac892a467be4a59a60'
             'b357cb5a1c5d66219f3168724a71af3ebf5c45c752a612d3a69c170b739acc065dc17c261c3a730298ea6c637fe820637a100f73ab03d931734f80bb598fbf55'
             '0aa92635949089196e4e7c579e78761e8751987ef5036320a161a4aaa67da3c63756398c903419c76ea0fbdc8a949e871fcb65be98179a9853a24a5a4cacfde3'
             'f606481b3bae44487a05e81da1e19dfa0bc5db8b10832d5b84c4e269fecb99ad010b90c5132e618c300f32b8c5bf28cfd0038c4ca2ddb4870c5a3f5113a18e64'
@@ -51,6 +53,7 @@ prepare() {
   patch -Np0 -i "${srcdir}"/clucene-debug.patch
   patch -Np0 -i "${srcdir}"/clucene-narrowing-conversions.patch
   patch -Np0 -i "${srcdir}"/clucene-multimap-put.patch
+  patch -Np1 -i "${srcdir}"/clucene-2.3.3.4-gmtime.patch
 
   # FS#77036 / https://sourceforge.net/p/clucene/bugs/235/ 
   patch -Np1 -i ../0001-Fix-missing-include-time.h.patch
diff --git a/cni-plugins/PKGBUILD b/cni-plugins/PKGBUILD
index f1ac376996..0458b04cdc 100644
--- a/cni-plugins/PKGBUILD
+++ b/cni-plugins/PKGBUILD
@@ -34,6 +34,9 @@ build() {
   export CGO_LDFLAGS="${LDFLAGS}"
   export GOPATH="${srcdir}"
   export GOFLAGS="-buildmode=pie -mod=readonly -modcacherw"
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
 
   # custom go build calls, since build_linux.sh is not flexible enough
   for plugin in plugins/meta/* plugins/main/* plugins/ipam/*; do
diff --git a/cobalt/PKGBUILD b/cobalt/PKGBUILD
index 1f69e2ccd0..e1d3be7991 100644
--- a/cobalt/PKGBUILD
+++ b/cobalt/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('d99f09b5764339acdadae5142e429070c5634494538c0aa8a72e6f48114fbba0a12873c
 
 build() {
   cd ${pkgname}.rs-${pkgver}
-  cargo build --release --locked --features 'syntax-highlight sass'
+  cargo build --release --features 'syntax-highlight sass'
 }
 
 check() {
diff --git a/cocogitto/PKGBUILD b/cocogitto/PKGBUILD
index ecfdfd1f64..c12465f811 100644
--- a/cocogitto/PKGBUILD
+++ b/cocogitto/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('2a0e332b7028ffcfeb113c734b4bf506c34362730e371b03a3e4a71142099330')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+#	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 	mkdir {completions,man}
 }
 
diff --git a/code/PKGBUILD b/code/PKGBUILD
index 21bccfc973..a1b509bcbe 100644
--- a/code/PKGBUILD
+++ b/code/PKGBUILD
@@ -44,6 +44,9 @@ case "$CARCH" in
   armv7h)
     _vscode_arch=arm
     ;;
+  loong64)
+    _vscode_arch=loong64
+    ;;
   *)
     # Needed for mksrcinfo
     _vscode_arch=DUMMY
diff --git a/coin-or-cbc/PKGBUILD b/coin-or-cbc/PKGBUILD
index c7ed6f3600..9f39caa2be 100644
--- a/coin-or-cbc/PKGBUILD
+++ b/coin-or-cbc/PKGBUILD
@@ -22,10 +22,10 @@ source=($pkgname-$pkgver.tar.gz::https://github.com/coin-or/Cbc/archive/refs/tag
 sha256sums=('1fb591dd88336fdaf096b8e42e46111e41671a5eb85d4ee36e45baff1678bd33')
 
 build() {
-  cd Cbc-releases-$pkgver
+  cd Cbc-releases-$pkgver/Cbc
   ./configure --prefix=/usr \
               --enable-cbc-parallel \
-              --with-nauty-lib=/usr/lib/libnauty.a --with-nauty-incdir=/usr/include/nauty
+              --with-cbc-lib=/usr/lib/libnauty.a --with-cbc-incdir=/usr/include/nauty
   sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool
   make
 }
@@ -36,7 +36,7 @@ check() {
 }
 
 package() {
-  cd Cbc-releases-$pkgver
+  cd Cbc-releases-$pkgver/Cbc
   make DESTDIR="$pkgdir" install
 
 # Remove nauty from linker flags in pc file, it is statically compiled
diff --git a/committed/PKGBUILD b/committed/PKGBUILD
index 22657da4fb..8cb1aaa9bc 100644
--- a/committed/PKGBUILD
+++ b/committed/PKGBUILD
@@ -15,7 +15,7 @@ sha256sums=('bb89632260499ae9dbbf1b2cd43dc9d43337e75259f84f762f821b1eb358849b')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/compiler-rt/PKGBUILD b/compiler-rt/PKGBUILD
index 2040d9a63b..d9d6d302c3 100644
--- a/compiler-rt/PKGBUILD
+++ b/compiler-rt/PKGBUILD
@@ -36,6 +36,9 @@ prepare() {
 
 build() {
   cd compiler-rt-$pkgver.src/build
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CFLAGS/-mlsx /}
 
   local cmake_args=(
     -G Ninja
diff --git a/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc b/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc
new file mode 100644
index 0000000000..7ef29ec73c
--- /dev/null
+++ b/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc
@@ -0,0 +1,75 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBFS+1SABEACnmkESkY7eZq0GhDjbkWpKmURGk9+ycsfAhA44NqUvf4tk1GPM
+5SkJ/fYedYZJaDVhIp98fHgucD0O+vjOzghtgwtITusYjiPHPFBd/MN+MQqSEAP+
+LUa/kjHLjgyXxKhFUIDGVaDWL5tKOA7/AQKl1TyJ8lz89NHQoUHFsF/hu10+qhJe
+V65d32MXFehIUSvegh8DrPuExrliSiORO4HOhuc6151dWA4YBWVg4rX5kfKrGMMT
+pTWnSSZtgoRhkKW2Ey8cmZUqPuUJIfWyeNVu1e4SFtAivLvu/Ymz2WBJcNA1ZlTr
+RCOR5SIRgZ453pQnI/Bzna2nnJ/TV1gGJIGRahj/ini0cs2x1CILfS/YJQ3rWGGo
+OxwG0BVmPk0cmLVtyTq8gUPwxcPUd6WcBKhot3TDMlrffZACnQwQjlVjk5S1dEEz
+atUfpEuNitU9WOM4jr/gjv36ZNCOWm95YwLhsuci/NddBN8HXhyvs+zYTVZEXa2W
+l/FqOdQsQqZBcJjjWckGKhESdd7934+cesGD3O8KaeSGxww7slJrS0+6QJ8oBoAB
+P/WCn/y2AiY2syEKp3wYIGJyAbsm542zMZ4nc7pYfSu49mcyhQQICmqN5QvOyYUx
+OSqwbAOUNtlOyeRLZNIKoXtTqWDEu5aEiDROTw6Rkq+dIcxPNgOLdeQ3HwARAQAB
+tCFIYW5zIFdlbm5ib3JnIDxoYW5zQGNocm9taXVtLm9yZz6JAlQEEwEKAD4WIQS2
+yPmCgrlE47DVwlMPwwQuNFrQXQUCXKW+LwIbAwUJDwUmjQULCQgHAgYVCgkICwIE
+FgIDAQIeAQIXgAAKCRAPwwQuNFrQXXw+EACc4n7pYF89qmi6k4u1H5PLPcRVw4Ch
+zY293N5JT8dM7c5Q0opPcgSS625SzAzEA8I3kRakFMsYZmJ7NFeFwIV7iJnaolft
+iGCinbnB6bF8NnaEUOU0Pl4ByAuPiZqq8t5ORWUnZX/iRtOFEmCyRWHJPxCPFcJG
+XCmQHTwnucePFdvNoIHN8vbkrHU32SUQ3iL4aEH92Y2s4D3WoNMW7g3b7srRynO1
+pzrT+bhihrl1MAnR6FiS4lSjw7VaEon1PJyaxs6OYO2x/fEz+uUnNPYZGhHQDTQ8
+DUyXNlXQ1mOOTMAwxg5JmqWfA2y1pmgJGpKe92t6vpVe9E90GBS9oCvSFXzItNg+
+p+9ogNDxMWnT48fygCqDVpk/PLdlyuNAQfuvtcZb8h5y1bzcwwBGHWb9McG12Z/K
+JpcWvSQe/eZ9uHcyj2+b7SQHIJL9eaBsyhgvv573PK62Rc8fze+HtwZMWMvw5Fsc
++q5pJ8JS8y3s/EZYJ8URQ00QWOL6DDN1ik0vjxZ6zf+dpK1/3jToSrTnsY5TxXAM
+gxeoFVhAtccnoAYY2zp2Dp7JonGNqXrE8rjMe67QBWzVUADgWMlCvFZ4W7ZGcj9y
+2XgA4DbOgJVsx3xAGA6FuEIV0UDwDo4WweWnD4Jo+KVC3nWGW8AjNQb9EAn33WlI
+K/mivl/oxH2rx7kCDQRUvtUgARAA7EHGtB6wKGOsKoqNjk+dKxJil5vh+ui5ysLz
+3wAXDYOA39nP5bvC1JNu3P8ZFwK6uPNm83ujasK42TSPT6zWyBlmbYF2V2VpsvL5
+QX+RJbWtvmqF9dwYa5u7jw4x21J+iT2U5zRDUvgc2UYTiVQGRnOYjtiSp+X4HCub
+2umLniDi5r08iKIcgCYyhkhxu04bUpoOvoKhdGT/eDZmIZTCGreMUauiIGwoRqnY
+UnVuHk0mTYSDylXt8w4XuFRAoFms060g+7yEDlYSCS7dTdViNFIjdIOLpBecMv7E
+fFqOJakq0XcmNmHzL8IJMPw/I/fhiN9m4WaR2yR7lx3HofRXZQKIfjnedyAVV1AN
+eRjif7QxPOHLbG7QhVWcHFgNg2GL7cyNMcl30LjEyL237ki4S8MA+GB9mMOlBqQQ
+/PqFWaCPSaUoiBGKUFEr3+Q7GTL260GkaTeMQkau7+Eo2WgU2ymhi1jrMBMCvwRw
+6CgIVATSciS1yDfAX344ISdXbz9rtdnBRnsaX+p84e12vfvjCjyR3xHdXx3Yb2rn
+DT+4JX001DR8ZZkM8Ohi3rCc8vqBm/+ckzyhlj67SsLbhbBJxkieJqvILgkcNqwC
+GvZLYK2AK8GCyUrp/eAPXoofE9kwGlfvdPM5giEwQ/+9eBUltQPp1iG35T1zg6EQ
+MmjCfR0AEQEAAYkCPAQYAQIAJgIbDBYhBLbI+YKCuUTjsNXCUw/DBC40WtBdBQJa
+XfpLBQkPBSarAAoJEA/DBC40WtBdPX8P/1ilEM2BomXdhUO1Vmh5DCHsFDpQtlN5
+cU+iBiQXaPdVaDyz1SYCziyD/hr70otJqe1eNf4kWxG/SVB7kav9WXxVDgsoRcF+
+IaZKK+Mhnt6il13dg/bDoblPdIDh3YJB+yDiuck+dciPMo2JI6LfrzJue318vRja
+vZqotOY/pjuKywNQ74nVNbVcebfj0k9HQeXhxO42dabgm5fabYIkRzlcGUMCFr2l
+RWz4nkLYPRQUWTJ47N4k/DLrHkClYebzifwCOFBKm7WpErEpd3B6Lq2RBZYwe6L5
+OBJj/MKSYP3+hjXkSLlq8nhaAhtMslShkyLvSuI+ZTxOGOnMDtL42TSDusw+r5eX
+XCGMpT+7S52WysgmPOSHp+2opSYiRvFhOmOGcS6M2sSvmbZLpnrHfL0TlBqAExF3
+FGF+T4dvIAJw/+n2tc7OXgzb3UOgp4AAfvQYeeIbHI2z2sCgyv+EPldb9avPd1wo
+xzaznnkToxkgsTZmKiVxGf5tg4w9m1aVvH3y3y6ox/j2BjgUZAFkDA+CUyvHuaub
+sdMiJdqFOFAY4mDqLMkMAPlHBIQaUBwvbxPwoC4zoIsuSGUF9DCIqxQE2eH2vzBX
+eUH6lXQaEv7eLTvuBNh9kFHAvOMV2Gb3FQoRpnqs3UFf2XOLHh5I0rmeWfSNSrXr
+sfYgf//ax/x3uQINBFylxXABEAC2Qt89UYDndAxNoCIJktuSBWh9BxC1JPPQtmLd
+XTsG5vd2h63rBN64ZYTGuW2AQxGV24ngP8rv5F1QzSPY0UgOt25r7pS3+1MZbv+d
+sZTtN4LWTXRdIVU+wcqKX1FZCGDSuGs5EpyElnKHxxGh7Wi0KFZMN64t83WPrbzq
+aiKrpp9/QHMUtrNqPgUBNKvH8k5g/AGa21+fF1kRsUtmsZbre4IK9bakIjmAfNMA
+ZA/YnJy0Ou06HcFWzkfTRLMrQHINUzOzNOhhXuYx3h4qSrvcJnqoGMJ9pZkOfrEJ
+VPQexYq3hvL1jwMLdFKDozViUx520/7K8frusf+Df0RlucEVF4QjAV4RAuHBtrzP
+LkH/0v6U3u1rX+5VMK8otud43cXcNet/cZ97jRm2rPzviRgYI9EljjD9vGPCIzmo
+aJYs+eNJRIJGPqzVV+AELiH9Bc9jCad8XeECBsTCVNx+kEijKclQWr+3y610SXNY
+JRKzlPBlMrqJ0U+/vNo59TUgZlwC8KdbiWtxEQ3JYFT7rHVH9cQeAlLXAE0yIfZK
++ss2HpIXgBvJ4nNyNBcFzoqF/iKBcH6yYRILNSGLEKOBnX3/XpAlvnOB1gcTSOQY
+frNoXHpA7yzpGh1MeypdCeOqOicZZRF/xX1KR6YDC5YDOFM2paydDNS1ql0Wp0VW
+WcIp1wARAQABiQI8BBgBCgAmFiEEtsj5goK5ROOw1cJTD8MELjRa0F0FAlylxXAC
+GwwFCQlmAYAACgkQD8MELjRa0F3Quw/+MVB3lHyIORyth4q9KsTUUXBW11UtjKqq
+SML0nMuNiqHefNd9P1+zVougyF002TfjkSnOpOoH2Uub3iCX0Cfyigo0rcjBXAvO
+j9N9g8eL1xBenTdxYiiHvvIm0BadikfsdoqQebv3ONFda7eoQl689LqMKZ9ZEOxi
+w7xQKcIPiNEt2WvBVv4mpEFx1pDbLZ/bUgbR3t7v/t6ijAVdIOjQvW/WPemyRTcB
+7iJd68H6Uou/Ofy5EPUH4c/heyCw+eUUFnC9msDIvwtTbkz0Aaa7awbpoegFMz2L
+LmSRMLybFn5lQTRR7TizzUvrprOx+UalbUASJS+TONZmVltz0eVVeJ3IHylUM/24
+cBh2wXqR63osDCZZkXVxbN9AtyoezEVvg8+XhDLyXeh+o05A/lRjMA33BkwyoKzi
+5nZb7iaVYWlKM8Zs6PrB8zq9ErDGcka7gikvUuJ2KLKjJqj19/6Z90oCtJQa9ifi
+glN+ER3y4hLHFmKI6ns+GNf0FwpgwD7WD9XBQR9uxBPCrVjXXv4IT9rBidzXT8rK
+iXYX9tHBHn2wAk28uJOtdDNcsOdOEqfdmIVfBXNv2df6r8ewEzpNd2MpEOZRW8mc
+cn+5dkF+W2mGn8Vky04ewU2+Bo9rApv3zJ76s0Skt2c8axKKtLhHY/H5HPiLNC29
+Qk8uiuyeUfE=
+=H/uX
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/conky/PKGBUILD b/conky/PKGBUILD
index dc0efa4643..a65bf54a63 100644
--- a/conky/PKGBUILD
+++ b/conky/PKGBUILD
@@ -79,7 +79,7 @@ build() {
     -D CMAKE_CXX_FLAGS="$CXXFLAGS -ffat-lto-objects" \
     -D MAINTAINER_MODE=OFF \
     -D BUILD_TESTS=ON \
-    -D BUILD_DOCS=ON \
+    -D BUILD_DOCS=OFF \
     -D BUILD_EXTRAS=ON \
     -D BUILD_WLAN=ON \
     -D BUILD_XDBE=ON \
diff --git a/containerd/PKGBUILD b/containerd/PKGBUILD
index e166cedcce..8791747158 100644
--- a/containerd/PKGBUILD
+++ b/containerd/PKGBUILD
@@ -26,6 +26,10 @@ prepare() {
 build() {
   cd "${pkgname}" 
   export GOFLAGS="-trimpath -mod=readonly -modcacherw"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef
+  go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.10.1-0.20230613102335-1140a754d780
+  go mod tidy
   make VERSION=v$pkgver GO_BUILD_FLAGS="-trimpath -mod=readonly -modcacherw" GO_GCFLAGS="" EXTRA_LDFLAGS="-buildid="
   make VERSION=v$pkgver man
 }
diff --git a/cpputest/PKGBUILD b/cpputest/PKGBUILD
index ccfe005856..581088f76a 100644
--- a/cpputest/PKGBUILD
+++ b/cpputest/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=cpputest
 pkgver=4.0
-pkgrel=4
+pkgrel=5
 pkgdesc="Unit testing and mocking framework for C/C++"
 arch=(loong64 x86_64)
 url="https://github.com/cpputest/cpputest"
diff --git a/cri-o/PKGBUILD b/cri-o/PKGBUILD
index 328e3193fb..91eb208557 100644
--- a/cri-o/PKGBUILD
+++ b/cri-o/PKGBUILD
@@ -31,6 +31,7 @@ makedepends=(
   libassuan
   libseccomp
   ostree
+  git
 )
 optdepends=(
   'apparmor: for apparmor integration'
diff --git a/criu/2183.patch b/criu/2183.patch
new file mode 100644
index 0000000000..998e5ae3a6
--- /dev/null
+++ b/criu/2183.patch
@@ -0,0 +1,2237 @@
+From 50bae581a3e5e996bc36e656631a29be9f12ad9a Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Mon, 12 Jun 2023 11:23:38 +0800
+Subject: [PATCH 1/6] include: add common header files for loongarch64
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ include/common/arch/loongarch64/asm/atomic.h  | 62 +++++++++++++++++++
+ include/common/arch/loongarch64/asm/bitops.h  | 24 +++++++
+ .../common/arch/loongarch64/asm/bitsperlong.h |  6 ++
+ include/common/arch/loongarch64/asm/linkage.h | 19 ++++++
+ include/common/arch/loongarch64/asm/page.h    | 39 ++++++++++++
+ 5 files changed, 150 insertions(+)
+ create mode 100644 include/common/arch/loongarch64/asm/atomic.h
+ create mode 100644 include/common/arch/loongarch64/asm/bitops.h
+ create mode 100644 include/common/arch/loongarch64/asm/bitsperlong.h
+ create mode 100644 include/common/arch/loongarch64/asm/linkage.h
+ create mode 100644 include/common/arch/loongarch64/asm/page.h
+
+diff --git a/include/common/arch/loongarch64/asm/atomic.h b/include/common/arch/loongarch64/asm/atomic.h
+new file mode 100644
+index 0000000000..9017254397
+--- /dev/null
++++ b/include/common/arch/loongarch64/asm/atomic.h
+@@ -0,0 +1,62 @@
++#ifndef __CR_ATOMIC_H__
++#define __CR_ATOMIC_H__
++
++#include <linux/types.h>
++#include "common/compiler.h"
++
++typedef struct {
++	int counter;
++} atomic_t;
++
++static inline int atomic_read(const atomic_t *v)
++{
++	return (*(volatile int *)&(v)->counter);
++}
++
++static inline void atomic_set(atomic_t *v, int i)
++{
++	v->counter = i;
++}
++
++static inline int __atomic_add(int i, atomic_t *v)
++{
++	int result;
++	asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(v->counter), "=&r"(result) : "r"(i) : "memory");
++	return result + i;
++}
++
++static inline void atomic_add(int i, atomic_t *v)
++{
++	__atomic_add(i, v);
++}
++
++static inline int atomic_add_return(int i, atomic_t *v)
++{
++	return __atomic_add(i, v);
++}
++
++#define atomic_sub(i, v)	atomic_add(-(int)i, v)
++#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v)
++#define atomic_inc(v)		atomic_add(1, v)
++#define atomic_inc_return(v)	atomic_add_return(1, v)
++#define atomic_dec(v)		atomic_sub(1, v)
++#define atomic_dec_return(v)	atomic_sub_return(1, v)
++
++static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
++{
++	int ret;
++	asm volatile("1:                     \n"
++		     " ll.w  %0, %1          \n"
++		     " bne   %0, %2, 2f      \n"
++		     " or    $t0, %3, $zero  \n"
++		     " sc.w  $t0, %1         \n"
++		     " beqz  $t0, 1b         \n"
++		     "2:                     \n"
++		     " dbar  0               \n"
++		     : "=&r"(ret), "+ZB"(ptr->counter)
++		     : "r"(old), "r"(new)
++		     : "t0", "memory");
++	return ret;
++}
++
++#endif /* __CR_ATOMIC_H__ */
+diff --git a/include/common/arch/loongarch64/asm/bitops.h b/include/common/arch/loongarch64/asm/bitops.h
+new file mode 100644
+index 0000000000..170e4f7369
+--- /dev/null
++++ b/include/common/arch/loongarch64/asm/bitops.h
+@@ -0,0 +1,24 @@
++#ifndef _LINUX_BITOPS_H
++#define _LINUX_BITOPS_H
++#include "common/asm-generic/bitops.h"
++
++/**
++ * test_and_set_bit - Set a bit and return its old value
++ * @nr: Bit to set
++ * @addr: Address to count from
++ *
++ * This operation is atomic and cannot be reordered.
++ * It also implies a memory barrier.
++ */
++
++#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
++#define BIT_WORD(nr) ((1UL << ((nr) / BITS_PER_LONG)) - 1)
++static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
++{
++	unsigned long res, mask;
++	mask = BIT_MASK(nr);
++	asm volatile("amor_db.d  %0, %2, %1" : "=&r"(res), "+ZB"(addr[BIT_WORD(nr)]) : "r"(mask) : "memory");
++	return (res & mask) != 0;
++}
++
++#endif
+diff --git a/include/common/arch/loongarch64/asm/bitsperlong.h b/include/common/arch/loongarch64/asm/bitsperlong.h
+new file mode 100644
+index 0000000000..13d06a384e
+--- /dev/null
++++ b/include/common/arch/loongarch64/asm/bitsperlong.h
+@@ -0,0 +1,6 @@
++#ifndef __CR_BITSPERLONG_H__
++#define __CR_BITSPERLONG_H__
++
++#define BITS_PER_LONG _LOONGARCH_SZLONG
++
++#endif /* __CR_BITSPERLONG_H__ */
+diff --git a/include/common/arch/loongarch64/asm/linkage.h b/include/common/arch/loongarch64/asm/linkage.h
+new file mode 100644
+index 0000000000..448acc29fc
+--- /dev/null
++++ b/include/common/arch/loongarch64/asm/linkage.h
+@@ -0,0 +1,19 @@
++#ifndef __CR_LINKAGE_H__
++#define __CR_LINKAGE_H__
++
++#define __ALIGN	    .align 2
++#define __ALIGN_STR ".align 2"
++
++#define GLOBAL(name) \
++	.globl name; \
++name:
++
++#define ENTRY(name)            \
++	.globl name;           \
++	__ALIGN;               \
++	.type name, @function; \
++name:
++
++#define END(sym) .size sym, .- sym
++
++#endif /* __CR_LINKAGE_H__ */
+diff --git a/include/common/arch/loongarch64/asm/page.h b/include/common/arch/loongarch64/asm/page.h
+new file mode 100644
+index 0000000000..25bdbc1412
+--- /dev/null
++++ b/include/common/arch/loongarch64/asm/page.h
+@@ -0,0 +1,39 @@
++#ifndef __CR_ASM_PAGE_H__
++#define __CR_ASM_PAGE_H__
++
++#define ARCH_HAS_LONG_PAGES
++
++#ifndef CR_NOGLIBC
++#include <string.h> /* ffsl() */
++#include <unistd.h> /* _SC_PAGESIZE */
++
++static unsigned __page_size;
++static unsigned __page_shift;
++
++static inline unsigned page_size(void)
++{
++	if (!__page_size)
++		__page_size = sysconf(_SC_PAGESIZE);
++	return __page_size;
++}
++
++static inline unsigned page_shift(void)
++{
++	if (!__page_shift)
++		__page_shift = (ffsl(page_size()) - 1);
++	return __page_shift;
++}
++
++#define PAGE_SIZE  page_size()
++#define PAGE_SHIFT page_shift()
++#define PAGE_MASK  (~(PAGE_SIZE - 1))
++
++#define PAGE_PFN(addr) ((addr) / PAGE_SIZE)
++#else /* CR_NOGLIBC */
++
++extern unsigned page_size(void);
++#define PAGE_SIZE page_size()
++
++#endif /* CR_NOGLIBC */
++
++#endif /* __CR_ASM_PAGE_H__ */
+
+From 0d63f58663fa22eb9fecd1cc778a49c49ddccfc9 Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Mon, 12 Jun 2023 09:35:40 +0000
+Subject: [PATCH 2/6] compel: add loongarch64 support
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ Makefile                                      |  10 +-
+ compel/Makefile                               |   2 +-
+ .../plugins/include/asm/prologue.h            |  35 +++
+ .../plugins/include/asm/syscall-types.h       |  30 +++
+ .../loongarch64/plugins/include/features.h    |   4 +
+ .../loongarch64/plugins/std/parasite-head.S   |   9 +
+ .../plugins/std/syscalls/Makefile.syscalls    | 117 ++++++++++
+ .../syscalls/syscall-common-loongarch-64.S    |  44 ++++
+ .../plugins/std/syscalls/syscall_64.tbl       | 121 +++++++++++
+ .../loongarch64/scripts/compel-pack.lds.S     |  32 +++
+ compel/arch/loongarch64/src/lib/cpu.c         |  41 ++++
+ .../loongarch64/src/lib/handle-elf-host.c     |  22 ++
+ compel/arch/loongarch64/src/lib/handle-elf.c  |  22 ++
+ .../loongarch64/src/lib/include/handle-elf.h  |   8 +
+ .../loongarch64/src/lib/include/syscall.h     |   8 +
+ .../src/lib/include/uapi/asm/breakpoints.h    |   6 +
+ .../src/lib/include/uapi/asm/cpu.h            |   6 +
+ .../src/lib/include/uapi/asm/fpu.h            |   4 +
+ .../src/lib/include/uapi/asm/infect-types.h   |  67 ++++++
+ .../src/lib/include/uapi/asm/sigframe.h       |  86 ++++++++
+ compel/arch/loongarch64/src/lib/infect.c      | 204 ++++++++++++++++++
+ compel/src/main.c                             |   3 +
+ scripts/nmk/scripts/include.mk                |   3 +-
+ 23 files changed, 881 insertions(+), 3 deletions(-)
+ create mode 100644 compel/arch/loongarch64/plugins/include/asm/prologue.h
+ create mode 100644 compel/arch/loongarch64/plugins/include/asm/syscall-types.h
+ create mode 100644 compel/arch/loongarch64/plugins/include/features.h
+ create mode 100644 compel/arch/loongarch64/plugins/std/parasite-head.S
+ create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
+ create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
+ create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
+ create mode 100644 compel/arch/loongarch64/scripts/compel-pack.lds.S
+ create mode 100644 compel/arch/loongarch64/src/lib/cpu.c
+ create mode 100644 compel/arch/loongarch64/src/lib/handle-elf-host.c
+ create mode 100644 compel/arch/loongarch64/src/lib/handle-elf.c
+ create mode 100644 compel/arch/loongarch64/src/lib/include/handle-elf.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/syscall.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
+ create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
+ create mode 100644 compel/arch/loongarch64/src/lib/infect.c
+
+diff --git a/Makefile b/Makefile
+index a5c6c5bccf..9a297d2d83 100644
+--- a/Makefile
++++ b/Makefile
+@@ -19,7 +19,7 @@ endif
+ 
+ #
+ # Supported Architectures
+-ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips,$(ARCH)),)
++ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64,$(ARCH)),)
+         $(error "The architecture $(ARCH) isn't supported")
+ endif
+ 
+@@ -80,6 +80,10 @@ ifeq ($(ARCH),mips)
+         DEFINES		:= -DCONFIG_MIPS
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++        DEFINES		:= -DCONFIG_LOONGARCH64
++endif
++
+ #
+ # CFLAGS_PIE:
+ #
+@@ -122,6 +126,10 @@ ifeq ($(ARCH),mips)
+ WARNINGS		:= -rdynamic
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++WARNINGS		:= -Wno-implicit-function-declaration
++endif
++
+ ifneq ($(GCOV),)
+         LDFLAGS         += -lgcov
+         CFLAGS          += $(CFLAGS-GCOV)
+diff --git a/compel/Makefile b/compel/Makefile
+index b79aee6871..78ec4826af 100644
+--- a/compel/Makefile
++++ b/compel/Makefile
+@@ -33,7 +33,7 @@ lib-y			+= arch/$(ARCH)/src/lib/thread_area.o
+ endif
+ 
+ # handle_elf() has no support of ELF relocations on ARM (yet?)
+-ifneq ($(filter arm aarch64,$(ARCH)),)
++ifneq ($(filter arm aarch64 loongarch64,$(ARCH)),)
+ CFLAGS			+= -DNO_RELOCS
+ HOSTCFLAGS		+= -DNO_RELOCS
+ endif
+diff --git a/compel/arch/loongarch64/plugins/include/asm/prologue.h b/compel/arch/loongarch64/plugins/include/asm/prologue.h
+new file mode 100644
+index 0000000000..c19ce54d7a
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/include/asm/prologue.h
+@@ -0,0 +1,35 @@
++#ifndef __ASM_PROLOGUE_H__
++#define __ASM_PROLOGUE_H__
++
++#ifndef __ASSEMBLY__
++
++#include <sys/types.h>
++#include <sys/socket.h>
++#include <sys/un.h>
++
++#include <errno.h>
++
++#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
++
++typedef struct prologue_init_args {
++	struct sockaddr_un ctl_sock_addr;
++	unsigned int ctl_sock_addr_len;
++
++	unsigned int arg_s;
++	void *arg_p;
++
++	void *sigframe;
++} prologue_init_args_t;
++
++#endif /* __ASSEMBLY__ */
++
++/*
++ * Reserve enough space for sigframe.
++ *
++ * FIXME It is rather should be taken from sigframe header.
++ */
++#define PROLOGUE_SGFRAME_SIZE 4096
++
++#define PROLOGUE_INIT_ARGS_SIZE 1024
++
++#endif /* __ASM_PROLOGUE_H__ */
+diff --git a/compel/arch/loongarch64/plugins/include/asm/syscall-types.h b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
+new file mode 100644
+index 0000000000..b883bd8bed
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
+@@ -0,0 +1,30 @@
++#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
++#define COMPEL_ARCH_SYSCALL_TYPES_H__
++
++#include <common/asm/bitsperlong.h>
++/* Types for sigaction, sigprocmask syscalls */
++typedef void rt_signalfn_t(int, siginfo_t *, void *);
++typedef rt_signalfn_t *rt_sighandler_t;
++
++typedef void rt_restorefn_t(void);
++typedef rt_restorefn_t *rt_sigrestore_t;
++
++/* refer to arch/loongarch/include/uapi/asm/signal.h */
++#define _KNSIG	     64
++#define _NSIG_BPW    BITS_PER_LONG
++#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
++
++typedef struct {
++	uint64_t sig[_KNSIG_WORDS];
++} k_rtsigset_t;
++
++typedef struct {
++	rt_sighandler_t rt_sa_handler;
++	unsigned long rt_sa_flags;
++	rt_sigrestore_t rt_sa_restorer;
++	k_rtsigset_t rt_sa_mask;
++} rt_sigaction_t;
++
++#define SA_RESTORER 0x04000000
++
++#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
+diff --git a/compel/arch/loongarch64/plugins/include/features.h b/compel/arch/loongarch64/plugins/include/features.h
+new file mode 100644
+index 0000000000..b4a3cded2b
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/include/features.h
+@@ -0,0 +1,4 @@
++#ifndef __COMPEL_ARCH_FEATURES_H
++#define __COMPEL_ARCH_FEATURES_H
++
++#endif /* __COMPEL_ARCH_FEATURES_H */
+diff --git a/compel/arch/loongarch64/plugins/std/parasite-head.S b/compel/arch/loongarch64/plugins/std/parasite-head.S
+new file mode 100644
+index 0000000000..3a960490eb
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/std/parasite-head.S
+@@ -0,0 +1,9 @@
++
++#include "common/asm/linkage.h"
++
++	.section .head.text, "ax"
++ENTRY(__export_parasite_head_start)
++	bl	parasite_service;
++	break 0;
++END(__export_parasite_head_start)
++
+diff --git a/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
+new file mode 100644
+index 0000000000..0d08f34e1d
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
+@@ -0,0 +1,117 @@
++std-lib-y		+= ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o
++sys-proto-types		:= $(obj)/include/uapi/std/syscall-types.h
++sys-proto-generic	:= $(obj)/include/uapi/std/syscall.h
++sys-codes-generic	:= $(obj)/include/uapi/std/syscall-codes.h
++sys-codes		 = $(obj)/include/uapi/std/syscall-codes-$(1).h
++sys-proto		 = $(obj)/include/uapi/std/syscall-$(1).h
++sys-def			 = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl
++sys-asm			 = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S
++sys-asm-common-name	 = std/syscalls/syscall-common-loongarch-$(1).S
++sys-asm-common		 = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
++sys-asm-types		:= $(obj)/include/uapi/std/asm/syscall-types.h
++sys-exec-tbl		 = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c
++
++sys-bits		:= 64
++
++AV			:= $$$$
++
++define gen-rule-sys-codes
++$(sys-codes): $(sys-def) $(sys-proto-types)
++	$(call msg-gen, $$@)
++	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
++	$(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__"					>> $$@
++	$(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__"					>> $$@
++	$(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1;					\
++		sub("^__NR", "SYS", SYSN);						\
++		print "\n#ifndef ", $(AV)1;						\
++		print "#define", $(AV)1, $(AV)2;					\
++		print "#endif";								\
++		print "\n#ifndef ", SYSN;						\
++		print "#define ", SYSN, $(AV)1;						\
++		print "#endif";}'							>> $$@
++	$(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */"				>> $$@
++endef
++
++define gen-rule-sys-proto
++$(sys-proto): $(sys-def) $(sys-proto-types)
++	$(call msg-gen, $$@)
++	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
++	$(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__"					>> $$@
++	$(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__"					>> $$@
++	$(Q) echo '#include <compel/plugins/std/syscall-codes-$(1).h>'			>> $$@
++	$(Q) echo '#include <compel/plugins/std/syscall-types.h>'			>> $$@
++ifeq ($(1),32)
++	$(Q) echo '#include "asm/syscall32.h"'						>> $$@
++endif
++	$(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3,			\
++			substr($(AV)0, index($(AV)0,$(AV)4)), ";"}'			>> $$@
++	$(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */"				>> $$@
++endef
++
++define gen-rule-sys-asm
++$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types)
++	$(call msg-gen, $$@)
++	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
++	$(Q) echo '#include <compel/plugins/std/syscall-codes-$(1).h>'			>> $$@
++	$(Q) echo '#include "$(sys-asm-common-name)"'					>> $$@
++	$(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}'	>> $$@
++endef
++
++define gen-rule-sys-exec-tbl
++$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types)
++	$(call msg-gen, $$@)
++	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
++	$(Q) cat $$< | awk '/^__NR/{print						\
++		"SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}'			>> $$@
++endef
++
++$(sys-codes-generic): $(sys-proto-types)
++	$(call msg-gen, $@)
++	$(Q) echo "/* Autogenerated, don't edit */"			>  $@
++	$(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__"			>> $@
++	$(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__"			>> $@
++	$(Q) echo '#include <compel/plugins/std/syscall-codes-64.h>'	>> $@
++	$(Q) cat $< | awk '/^__NR/{NR32=$$1;				\
++		sub("^__NR", "__NR32", NR32);				\
++		print "\n#ifndef ", NR32;				\
++		print "#define ", NR32, $$2;				\
++		print "#endif";}'					>> $@
++	$(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */"		>> $@
++mrproper-y += $(sys-codes-generic)
++
++$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types)
++	$(call msg-gen, $@)
++	$(Q) echo "/* Autogenerated, don't edit */"			>  $@
++	$(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__"			>> $@
++	$(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__"			>> $@
++	$(Q) echo ""							>> $@
++	$(Q) echo '#include <compel/plugins/std/syscall-64.h>'		>> $@
++	$(Q) echo ""							>> $@
++	$(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */"		>> $@
++mrproper-y += $(sys-proto-generic)
++
++define gen-rule-sys-exec-tbl
++$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic)
++	$(call msg-gen, $$@)
++	$(Q) echo "/* Autogenerated, don't edit */"			>  $$@
++	$(Q) cat $$< | awk '/^__NR/{print				\
++		"SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}'	>> $$@
++endef
++
++$(eval $(call map,gen-rule-sys-codes,$(sys-bits)))
++$(eval $(call map,gen-rule-sys-proto,$(sys-bits)))
++$(eval $(call map,gen-rule-sys-asm,$(sys-bits)))
++$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits)))
++
++$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
++	$(call msg-gen, $@)
++	$(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
++
++std-headers-deps	+= $(call sys-codes,$(sys-bits))
++std-headers-deps	+= $(call sys-proto,$(sys-bits))
++std-headers-deps	+= $(call sys-asm,$(sys-bits))
++std-headers-deps	+= $(call sys-exec-tbl,$(sys-bits))
++std-headers-deps	+= $(sys-codes-generic)
++std-headers-deps	+= $(sys-proto-generic)
++std-headers-deps	+= $(sys-asm-types)
++mrproper-y		+= $(std-headers-deps)
+diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
+new file mode 100644
+index 0000000000..fff8944669
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
+@@ -0,0 +1,44 @@
++#include "common/asm/linkage.h"
++
++#define SYSCALL(name, opcode)		\
++ENTRY(name);					\
++	addi.d	$a7, $zero, opcode;	\
++	syscall 0;					\
++	jirl	$r0, $r1, 0;		\
++END(name)
++
++#ifndef AT_FDCWD
++#define AT_FDCWD	-100
++#endif
++
++#ifndef AT_REMOVEDIR
++#define AT_REMOVEDIR	0x200
++#endif
++
++ENTRY(sys_open)
++	or		$a3, $zero, $a2
++	or		$a2, $zero, $a1
++	or		$a1, $zero, $a0
++	addi.d	$a0, $zero, AT_FDCWD
++	b		sys_openat
++END(sys_open)
++
++ENTRY(sys_mkdir)
++	or		$a3, $zero, $a2
++	or		$a2, $zero, $a1
++	or		$a1, $zero, $a0
++	addi.d	$a0, $zero, AT_FDCWD
++	b		sys_mkdirat
++END(sys_mkdir)
++
++ENTRY(sys_rmdir)
++	addi.d	$a2, $zero, AT_REMOVEDIR
++	or		$a1, $zero, $a0
++	addi.d	$a0, $zero, AT_FDCWD
++	b		sys_unlinkat
++END(sys_rmdir)
++
++ENTRY(__cr_restore_rt)
++	addi.d	$a7, $zero, __NR_rt_sigreturn
++	syscall	0
++END(__cr_restore_rt)
+diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
+new file mode 100644
+index 0000000000..b37a22674e
+--- /dev/null
++++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
+@@ -0,0 +1,121 @@
++#
++# System calls table, please make sure the table consist only the syscalls
++# really used somewhere in project.
++# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999.
++#
++# __NR_name			code		name			arguments
++# -------------------------------------------------------------------------------------------------------------------------------------------------------------
++__NR_io_setup				0		sys_io_setup			(unsigned nr_events, aio_context_t *ctx)
++__NR_io_submit				2		sys_io_submit			(aio_context_t ctx, long nr, struct iocb **iocbpp)
++__NR_io_getevents			4		sys_io_getevents		(aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
++__NR_fcntl					25		sys_fcntl				(int fd, int type, long arg)
++__NR_ioctl					29		sys_ioctl				(unsigned int fd, unsigned int cmd, unsigned long arg)
++__NR_flock					32		sys_flock				(int fd, unsigned long cmd)
++__NR_mkdirat				34		sys_mkdirat             (int dfd, const char *pathname, int flag)
++__NR_unlinkat				35		sys_unlinkat            (int dfd, const char *pathname, int flag)
++__NR_umount2				39		sys_umount2				(char *name, int flags)
++__NR_mount					40		sys_mount				(char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
++__NR_fallocate				47		sys_fallocate			(int fd, int mode, loff_t offset, loff_t len)
++__NR_close					57		sys_close				(int fd)
++__NR_openat					56		sys_openat				(int dfd, const char *filename, int flags, int mode)
++__NR_lseek					62		sys_lseek				(int fd, unsigned long offset, unsigned long origin)
++__NR_read					63		sys_read				(int fd, void *buf, unsigned long count)
++__NR_write					64		sys_write				(int fd, const void *buf, unsigned long count)
++__NR_pread64				67		sys_pread				(unsigned int fd, char *buf, size_t count, loff_t pos)
++__NR_preadv					69		sys_preadv_raw			(int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
++__NR_ppoll					73		sys_ppoll				(struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
++__NR_signalfd4				74		sys_signalfd			(int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
++__NR_vmsplice				75		sys_vmsplice			(int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
++__NR_readlinkat				78		sys_readlinkat			(int fd, const char *path, char *buf, int bufsize)
++__NR_timerfd_settime		86		sys_timerfd_settime		(int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
++__NR_capget					90		sys_capget				(struct cap_header *h, struct cap_data *d)
++__NR_capset					91		sys_capset				(struct cap_header *h, struct cap_data *d)
++__NR_personality			92		sys_personality			(unsigned int personality)
++__NR_exit					93		sys_exit				(unsigned long error_code)
++__NR_exit_group				94		sys_exit_group			(int error_code)
++__NR_waitid					95		sys_waitid				(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
++__NR_set_tid_address		96		sys_set_tid_address		(int *tid_addr)
++__NR_futex					98		sys_futex				(uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
++__NR_set_robust_list		99		sys_set_robust_list		(struct robust_list_head *head, size_t len)
++__NR_get_robust_list		100		sys_get_robust_list		(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
++__NR_nanosleep				101		sys_nanosleep			(struct timespec *req, struct timespec *rem)
++__NR_getitimer				102		sys_getitimer			(int which, const struct itimerval *val)
++__NR_setitimer				103		sys_setitimer			(int which, const struct itimerval *val, struct itimerval *old)
++__NR_sys_timer_create		107		sys_timer_create		(clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
++__NR_sys_timer_gettime		108		sys_timer_gettime		(int timer_id, const struct itimerspec *setting)
++__NR_sys_timer_getoverrun	109		sys_timer_getoverrun	(int timer_id)
++__NR_sys_timer_settime		110		sys_timer_settime		(kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
++__NR_sys_timer_delete		111		sys_timer_delete		(kernel_timer_t timer_id)
++__NR_clock_gettime			113		sys_clock_gettime		(const clockid_t which_clock, const struct timespec *tp)
++__NR_sched_setscheduler		119		sys_sched_setscheduler	(int pid, int policy, struct sched_param *p)
++__NR_restart_syscall		128		sys_restart_syscall		(void)
++__NR_kill					129		sys_kill				(long pid, int sig)
++__NR_sigaltstack			132		sys_sigaltstack			(const void *uss, void *uoss)
++__NR_rt_sigaction			134		sys_sigaction			(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
++__NR_rt_sigprocmask			135		sys_sigprocmask			(int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
++__NR_rt_sigqueueinfo		138		sys_rt_sigqueueinfo		(pid_t pid, int sig, siginfo_t *info)
++__NR_rt_sigreturn			139		sys_rt_sigreturn		(void)
++__NR_setpriority			140		sys_setpriority			(int which, int who, int nice)
++__NR_setresuid				147		sys_setresuid			(int uid, int euid, int suid)
++__NR_getresuid				148		sys_getresuid			(int *uid, int *euid, int *suid)
++__NR_setresgid				149		sys_setresgid			(int gid, int egid, int sgid)
++__NR_getresgid				150		sys_getresgid			(int *gid, int *egid, int *sgid)
++__NR_getpgid				155		sys_getpgid				(pid_t pid)
++__NR_setfsuid				151		sys_setfsuid			(int fsuid)
++__NR_setfsgid				152		sys_setfsgid			(int fsgid)
++__NR_getsid					156		sys_getsid				(void)
++__NR_getgroups				158		sys_getgroups			(int gsize, unsigned int *groups)
++__NR_setgroups				159		sys_setgroups			(int gsize, unsigned int *groups)
++__NR_setrlimit				164		sys_setrlimit			(int resource, struct krlimit *rlim)
++__NR_umask					166		sys_umask				(int mask)
++__NR_prctl					167		sys_prctl				(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
++__NR_gettimeofday			169		sys_gettimeofday		(struct timeval *tv, struct timezone *tz)
++__NR_getpid					172		sys_getpid				(void)
++__NR_ptrace					177		sys_ptrace				(long request, pid_t pid, void *addr, void *data)
++__NR_gettid					178		sys_gettid				(void)
++__NR_shmat					196		sys_shmat				(int shmid, void *shmaddr, int shmflag)
++__NR_socket					198		sys_socket				(int domain, int type, int protocol)
++__NR_bind					200		sys_bind				(int sockfd, const struct sockaddr *addr, int addrlen)
++__NR_connect				203		sys_connect				(int sockfd, struct sockaddr *addr, int addrlen)
++__NR_sendto					206		sys_sendto				(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
++__NR_recvfrom				207		sys_recvfrom			(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
++__NR_setsockopt				208		sys_setsockopt			(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
++__NR_getsockopt				209		sys_getsockopt			(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
++__NR_shutdown				210		sys_shutdown			(int sockfd, int how)
++__NR_sendmsg				211		sys_sendmsg				(int sockfd, const struct msghdr *msg, int flags)
++__NR_recvmsg				212		sys_recvmsg				(int sockfd, struct msghdr *msg, int flags)
++__NR_brk					214		sys_brk					(void *addr)
++__NR_munmap					215		sys_munmap				(void *addr, unsigned long len)
++__NR_mremap					216		sys_mremap				(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr)
++__NR_clone					220		sys_clone				(unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
++__NR_mmap					222		sys_mmap				(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
++__NR_mprotect				226		sys_mprotect			(const void *addr, unsigned long len, unsigned long prot)
++__NR_mincore				232		sys_mincore				(void *addr, unsigned long size, unsigned char *vec)
++__NR_madvise				233		sys_madvise				(unsigned long start, size_t len, int behavior)
++__NR_rt_tgsigqueueinfo		240		sys_rt_tgsigqueueinfo	(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
++__NR_wait4					260		sys_wait4				(int pid, int *status, int options, struct rusage *ru)
++__NR_fanotify_init			262		sys_fanotify_init		(unsigned int flags, unsigned int event_f_flags)
++__NR_fanotify_mark			263		sys_fanotify_mark		(int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
++__NR_open_by_handle_at		265		sys_open_by_handle_at	(int mountdirfd, struct file_handle *handle, int flags)
++__NR_setns					268		sys_setns				(int fd, int nstype)
++__NR_kcmp					272		sys_kcmp				(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
++__NR_seccomp				277		sys_seccomp				(unsigned int op, unsigned int flags, const char *uargs)
++__NR_memfd_create			279		sys_memfd_create		(const char *name, unsigned int flags)
++__NR_userfaultfd			282		sys_userfaultfd			(int flags)
++__NR_rseq					293		sys_rseq				(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
++__NR_open_tree				428		sys_open_tree			(int dirfd, const char *pathname, unsigned int flags)
++__NR_move_mount				429		sys_move_mount			(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
++__NR_fsopen					430		sys_fsopen				(char *fsname, unsigned int flags)
++__NR_fsconfig				431		sys_fsconfig			(int fd, unsigned int cmd, const char *key, const char *value, int aux)
++__NR_fsmount				432		sys_fsmount				(int fd, unsigned int flags, unsigned int attr_flags)
++__NR_pidfd_open				434		sys_pidfd_open			(pid_t pid, unsigned int flags)
++__NR_clone3					435		sys_clone3				(struct clone_args *uargs, size_t size)
++__NR_openat2				437		sys_openat2				(int dirfd, char *pathname, struct open_how *how, size_t size)
++__NR_pidfd_getfd			438		sys_pidfd_getfd			(int pidfd, int targetfd, unsigned int flags)
++#__NR_dup2					!		sys_dup2				(int oldfd, int newfd)
++#__NR_rmdir					!		sys_rmdir				(const char *name)
++#__NR_unlink				!		sys_unlink				(char *pathname)
++#__NR_cacheflush			!		sys_cacheflush			(char *addr, int nbytes, int cache)
++#__NR_set_thread_area		!		sys_set_thread_area		(unsigned long *addr)
++#__NR_mkdir					!		sys_mkdir				(const char *name, int mode)
++#__NR_open					!		sys_open				(const char *filename, unsigned long flags, unsigned long mode)
+diff --git a/compel/arch/loongarch64/scripts/compel-pack.lds.S b/compel/arch/loongarch64/scripts/compel-pack.lds.S
+new file mode 100644
+index 0000000000..cfb7a2fb35
+--- /dev/null
++++ b/compel/arch/loongarch64/scripts/compel-pack.lds.S
+@@ -0,0 +1,32 @@
++OUTPUT_ARCH(loongarch)
++EXTERN(__export_parasite_head_start)
++
++SECTIONS
++{
++	.crblob 0x0 : {
++		*(.head.text)
++		ASSERT(DEFINED(__export_parasite_head_start),
++			"Symbol __export_parasite_head_start is missing");
++		*(.text*)
++		. = ALIGN(32);
++		*(.data*)
++		. = ALIGN(32);
++		*(.rodata*)
++		. = ALIGN(32);
++		*(.bss*)
++		. = ALIGN(32);
++		*(.got*)
++		. = ALIGN(32);
++		*(.toc*)
++		. = ALIGN(32);
++	} =0x00000000,
++
++	/DISCARD/ : {
++		*(.debug*)
++		*(.comment*)
++		*(.note*)
++		*(.group*)
++		*(.eh_frame*)
++		*(*)
++	}
++}
+diff --git a/compel/arch/loongarch64/src/lib/cpu.c b/compel/arch/loongarch64/src/lib/cpu.c
+new file mode 100644
+index 0000000000..172b90e275
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/cpu.c
+@@ -0,0 +1,41 @@
++#include <string.h>
++#include <stdbool.h>
++
++#include "compel-cpu.h"
++#include "common/bitops.h"
++#include "common/compiler.h"
++#include "log.h"
++
++#undef LOG_PREFIX
++#define LOG_PREFIX "cpu: "
++
++static compel_cpuinfo_t rt_info;
++static bool rt_info_done = false;
++
++void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
++{
++}
++
++void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
++{
++}
++
++int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
++{
++	return 0;
++}
++
++int compel_cpuid(compel_cpuinfo_t *c)
++{
++	return 0;
++}
++
++bool compel_cpu_has_feature(unsigned int feature)
++{
++	if (!rt_info_done) {
++		compel_cpuid(&rt_info);
++		rt_info_done = true;
++	}
++
++	return compel_test_cpu_cap(&rt_info, feature);
++}
+diff --git a/compel/arch/loongarch64/src/lib/handle-elf-host.c b/compel/arch/loongarch64/src/lib/handle-elf-host.c
+new file mode 100644
+index 0000000000..a605a5a452
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/handle-elf-host.c
+@@ -0,0 +1,22 @@
++#include <string.h>
++#include <errno.h>
++
++#include "handle-elf.h"
++#include "piegen.h"
++#include "log.h"
++
++static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
++	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++};
++
++extern int __handle_elf(void *mem, size_t size);
++
++int handle_binary(void *mem, size_t size)
++{
++	if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
++		return __handle_elf(mem, size);
++
++	pr_err("Unsupported Elf format detected\n");
++	return -EINVAL;
++}
+diff --git a/compel/arch/loongarch64/src/lib/handle-elf.c b/compel/arch/loongarch64/src/lib/handle-elf.c
+new file mode 100644
+index 0000000000..a605a5a452
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/handle-elf.c
+@@ -0,0 +1,22 @@
++#include <string.h>
++#include <errno.h>
++
++#include "handle-elf.h"
++#include "piegen.h"
++#include "log.h"
++
++static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
++	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++};
++
++extern int __handle_elf(void *mem, size_t size);
++
++int handle_binary(void *mem, size_t size)
++{
++	if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
++		return __handle_elf(mem, size);
++
++	pr_err("Unsupported Elf format detected\n");
++	return -EINVAL;
++}
+diff --git a/compel/arch/loongarch64/src/lib/include/handle-elf.h b/compel/arch/loongarch64/src/lib/include/handle-elf.h
+new file mode 100644
+index 0000000000..b0a66ef879
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/handle-elf.h
+@@ -0,0 +1,8 @@
++#ifndef COMPEL_HANDLE_ELF_H__
++#define COMPEL_HANDLE_ELF_H__
++
++#include "elf64-types.h"
++
++#define arch_is_machine_supported(e_machine) (e_machine == EM_LOONGARCH)
++
++#endif /* COMPEL_HANDLE_ELF_H__ */
+diff --git a/compel/arch/loongarch64/src/lib/include/syscall.h b/compel/arch/loongarch64/src/lib/include/syscall.h
+new file mode 100644
+index 0000000000..ac3e2799ac
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/syscall.h
+@@ -0,0 +1,8 @@
++#ifndef __COMPEL_SYSCALL_H__
++#define __COMPEL_SYSCALL_H__
++
++#ifndef SIGSTKFLT
++#define SIGSTKFLT 16
++#endif
++
++#endif
+diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
+new file mode 100644
+index 0000000000..21eb1309f2
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
+@@ -0,0 +1,6 @@
++#ifndef __COMPEL_BREAKPOINTS_H__
++#define __COMPEL_BREAKPOINTS_H__
++#define ARCH_SI_TRAP TRAP_BRKPT
++extern int ptrace_set_breakpoint(pid_t pid, void *addr);
++extern int ptrace_flush_breakpoints(pid_t pid);
++#endif
+diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
+new file mode 100644
+index 0000000000..e568df789c
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
+@@ -0,0 +1,6 @@
++#ifndef __CR_ASM_CPU_H__
++#define __CR_ASM_CPU_H__
++
++typedef struct {
++} compel_cpuinfo_t;
++#endif /* __CR_ASM_CPU_H__ */
+diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
+new file mode 100644
+index 0000000000..7f476d541a
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
+@@ -0,0 +1,4 @@
++#ifndef __CR_ASM_FPU_H__
++#define __CR_ASM_FPU_H__
++
++#endif /* __CR_ASM_FPU_H__ */
+diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
+new file mode 100644
+index 0000000000..0b047a5b08
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
+@@ -0,0 +1,67 @@
++#ifndef UAPI_COMPEL_ASM_TYPES_H__
++#define UAPI_COMPEL_ASM_TYPES_H__
++
++#include <stdint.h>
++
++#define SIGMAX	   64
++#define SIGMAX_OLD 31
++
++/*
++ * From the Linux kernel header arch/loongarch/include/uapi/asm/ptrace.h
++ *
++ * A thread LoongArch CPU context
++ *
++ * struct user_fp_state {
++ *     uint64_t    fpr[32];
++ *     uint64_t    fcc;
++ *     uint32_t    fcsr;
++ * };
++ *
++ * struct user_pt_regs {
++ *     unsigned long regs[32];
++ *     unsigned long csr_era;
++ *     unsigned long csr_badv;
++ *     unsigned long reserved[11];
++ * };
++ */
++
++struct user_gp_regs {
++	uint64_t regs[32];
++	uint64_t orig_a0;
++	uint64_t pc;
++	uint64_t csr_badv;
++	uint64_t reserved[10];
++} __attribute__((aligned(8)));
++
++struct user_fp_regs {
++	uint64_t regs[32];
++	uint64_t fcc;
++	uint32_t fcsr;
++};
++
++typedef struct user_gp_regs user_regs_struct_t;
++typedef struct user_fp_regs user_fpregs_struct_t;
++
++#define user_regs_native(regs) true
++
++#define __compel_arch_fetch_thread_area(tid, th) 0
++#define compel_arch_fetch_thread_area(tctl)	 0
++#define compel_arch_get_tls_task(ctl, tls)
++#define compel_arch_get_tls_thread(tctl, tls)
++
++#define REG_RES(r)	   ((uint64_t)(r).regs[4])
++#define REG_IP(r)	   ((uint64_t)(r).pc)
++#define REG_SP(r)	   ((uint64_t)(r).regs[3])
++#define REG_SYSCALL_NR(r)  ((uint64_t)(r).regs[11])
++#define SET_REG_IP(r, val) ((r).pc = (val))
++
++#define GPR_NUM 32
++#define FPR_NUM 32
++
++#define __NR(syscall, compat)   \
++	({                      \
++		(void)compat;   \
++		__NR_##syscall; \
++	})
++
++#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
+diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
+new file mode 100644
+index 0000000000..fcb545a1d2
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
+@@ -0,0 +1,86 @@
++#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
++#define UAPI_COMPEL_ASM_SIGFRAME_H__
++
++#include <stdint.h>
++#include <stdbool.h>
++#include <stdlib.h>
++
++#include <compel/asm/fpu.h>
++#include <compel/plugins/std/syscall-codes.h>
++
++#include <asm/types.h>
++
++#define rt_sigcontext sigcontext
++/* sigcontext defined in usr/include/uapi/asm/sigcontext.h*/
++#include <compel/sigframe-common.h>
++typedef __u32 u32;
++
++typedef struct sigcontext_t {
++	__u64 pc;
++	__u64 regs[32];
++	__u32 flags;
++	__u64 extcontext[0] __attribute__((__aligned__(16)));
++} sigcontext_t;
++
++typedef struct context_info_t {
++	__u32 magic;
++	__u32 size;
++	__u64 padding;
++} context_info_t;
++
++#define FPU_CTX_MAGIC 0x46505501
++#define FPU_CTX_ALIGN 8
++typedef struct fpu_context_t {
++	__u64 regs[32];
++	__u64 fcc;
++	__u64 fcsr;
++} fpu_context_t;
++
++typedef struct ucontext {
++	unsigned long uc_flags;
++	struct ucontext *uc_link;
++	stack_t uc_stack;
++	sigset_t uc_sigmask;
++	__u8 __unused[1024 / 8 - sizeof(sigset_t)];
++	sigcontext_t uc_mcontext;
++} ucontext;
++
++/* Copy from the kernel source arch/loongarch/kernel/signal.c */
++struct rt_sigframe {
++	rt_siginfo_t rs_info;
++	ucontext rs_uc;
++};
++
++#define RT_SIGFRAME_UC(rt_sigframe)	 (&(rt_sigframe->rs_uc))
++#define RT_SIGFRAME_SIGMASK(rt_sigframe) ((k_rtsigset_t *)&RT_SIGFRAME_UC(rt_sigframe)->uc_sigmask)
++#define RT_SIGFRAME_SIGCTX(rt_sigframe)	 (&(RT_SIGFRAME_UC(rt_sigframe)->uc_mcontext))
++#define RT_SIGFRAME_REGIP(rt_sigframe)	 ((long unsigned int)(RT_SIGFRAME_SIGCTX(rt_sigframe)->pc))
++#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
++
++#define RT_SIGFRAME_FPU(rt_sigframe)                                                                 \
++	({                                                                                           \
++		context_info_t *ctx = (context_info_t *)RT_SIGFRAME_SIGCTX(rt_sigframe)->extcontext; \
++		ctx->magic = FPU_CTX_MAGIC;                                                          \
++		ctx->size = sizeof(context_info_t) + sizeof(fpu_context_t);                          \
++		(fpu_context_t *)((char *)ctx + sizeof(context_info_t));                             \
++	})
++
++#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
++
++/* clang-format off */
++#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe)  \
++    asm volatile(                               \
++            "addi.d $sp, %0, 0 \n"              \
++            "addi.d $a7, $zero, "__stringify(__NR_rt_sigreturn)"    \n" \
++            "syscall   0"                       \
++            :                                   \
++            :"r"(new_sp)                        \
++            : "$a7", "memory")
++/* clang-format on */
++
++int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe);
++
++#define rt_sigframe_erase_sigset(sigframe)	memset(RT_SIGFRAME_SIGMASK(sigframe), 0, sizeof(k_rtsigset_t))
++#define rt_sigframe_copy_sigset(sigframe, from) memcpy(RT_SIGFRAME_SIGMASK(sigframe), from, sizeof(k_rtsigset_t))
++
++#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
+diff --git a/compel/arch/loongarch64/src/lib/infect.c b/compel/arch/loongarch64/src/lib/infect.c
+new file mode 100644
+index 0000000000..8e3c19aff2
+--- /dev/null
++++ b/compel/arch/loongarch64/src/lib/infect.c
+@@ -0,0 +1,204 @@
++#include <sys/types.h>
++#include <sys/uio.h>
++#include <sys/auxv.h>
++#include <sys/mman.h>
++#include <errno.h>
++
++#include <compel/asm/fpu.h>
++#include <compel/cpu.h>
++#include "errno.h"
++#include <compel/plugins/std/syscall-codes.h>
++#include <compel/plugins/std/syscall.h>
++#include "common/err.h"
++#include "common/page.h"
++#include "asm/infect-types.h"
++#include "ptrace.h"
++#include "infect.h"
++#include "infect-priv.h"
++#include "log.h"
++#include "common/bug.h"
++
++/*
++ * Injected syscall instruction
++ * loongarch64 is Little Endian
++ */
++const char code_syscall[] = {
++	0x00, 0x00, 0x2b, 0x00, /* syscall    */
++	0x00, 0x00, 0x2a, 0x00	/*  break      */
++};
++
++int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
++{
++	sigcontext_t *sc;
++	fpu_context_t *fpu;
++
++	sc = RT_SIGFRAME_SIGCTX(sigframe);
++	memcpy(sc->regs, regs->regs, sizeof(regs->regs));
++	sc->pc = regs->pc;
++
++	fpu = RT_SIGFRAME_FPU(sigframe);
++	memcpy(fpu->regs, fpregs->regs, sizeof(fpregs->regs));
++	fpu->fcc = fpregs->fcc;
++	fpu->fcsr = fpregs->fcsr;
++	return 0;
++}
++
++int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
++{
++	return 0;
++}
++
++int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
++			 void *arg, __maybe_unused unsigned long flags)
++{
++	user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp;
++	struct iovec iov;
++	int ret;
++
++	pr_info("Dumping GP/FPU registers for %d\n", pid);
++
++	iov.iov_base = regs;
++	iov.iov_len = sizeof(user_regs_struct_t);
++	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) {
++		pr_perror("Failed to obtain CPU registers for %d", pid);
++		goto err;
++	}
++
++	/*
++	 * Refer to Linux kernel arch/loongarch/kernel/signal.c
++	 */
++	if (regs->regs[0]) {
++		switch (regs->regs[4]) {
++		case -ERESTARTNOHAND:
++		case -ERESTARTSYS:
++		case -ERESTARTNOINTR:
++			regs->regs[4] = regs->orig_a0;
++			regs->pc -= 4;
++			break;
++		case -ERESTART_RESTARTBLOCK:
++			regs->regs[4] = regs->orig_a0;
++			regs->regs[11] = __NR_restart_syscall;
++			regs->pc -= 4;
++			break;
++		}
++		regs->regs[0] = 0; /* Don't deal with this again.  */
++	}
++
++	iov.iov_base = fpregs;
++	iov.iov_len = sizeof(user_fpregs_struct_t);
++	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
++		pr_perror("Failed to obtain FPU registers for %d", pid);
++		goto err;
++	}
++
++	ret = save(arg, regs, fpregs);
++err:
++	return 0;
++}
++
++int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
++{
++	struct iovec iov;
++
++	pr_info("Restoring GP/FPU registers for %d\n", pid);
++
++	iov.iov_base = ext_regs;
++	iov.iov_len = sizeof(*ext_regs);
++	if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
++		pr_perror("Failed to set FPU registers for %d", pid);
++		return -1;
++	}
++	return 0;
++}
++
++/*
++ * Registers $4 ~ $11 represents arguments a0 ~ a7, especially a7 is
++ * used as syscall number.
++ */
++int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
++		   unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
++{
++	int err;
++	user_regs_struct_t regs = ctl->orig.regs;
++
++	regs.regs[11] = (unsigned long)nr;
++	regs.regs[4] = arg1;
++	regs.regs[5] = arg2;
++	regs.regs[6] = arg3;
++	regs.regs[7] = arg4;
++	regs.regs[8] = arg5;
++	regs.regs[9] = arg6;
++	err = compel_execute_syscall(ctl, &regs, code_syscall);
++
++	*ret = regs.regs[4];
++
++	return err;
++}
++
++void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
++{
++	long map;
++	int err;
++
++	err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT);
++
++	if (err < 0 || IS_ERR_VALUE(map)) {
++		pr_err("remote mmap() failed: %s\n", strerror(-map));
++		return NULL;
++	}
++
++	return (void *)map;
++}
++
++/*
++ * regs must be inited when calling this function from original context
++ */
++void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
++{
++	regs->pc = new_ip;
++	if (stack)
++		regs->regs[4] = (unsigned long)stack;
++}
++
++bool arch_can_dump_task(struct parasite_ctl *ctl)
++{
++	return true;
++}
++
++int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
++{
++	long ret;
++	int err;
++
++	err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->rs_uc.uc_stack, 0, 0, 0, 0);
++	return err ? err : ret;
++}
++
++/*
++ * TODO: add feature
++ */
++int ptrace_set_breakpoint(pid_t pid, void *addr)
++{
++	return 0;
++}
++
++int ptrace_flush_breakpoints(pid_t pid)
++{
++	return 0;
++}
++
++/*
++ * Refer to Linux kernel arch/loongarch/include/asm/processor.h
++ */
++#define TASK_SIZE32	(1UL) << 31
++#define TASK_SIZE64_MIN (1UL) << 40
++#define TASK_SIZE64_MAX (1UL) << 48
++
++unsigned long compel_task_size(void)
++{
++	unsigned long task_size;
++	for (task_size = TASK_SIZE64_MIN; task_size < TASK_SIZE64_MAX; task_size <<= 1)
++		if (munmap((void *)task_size, page_size()))
++			break;
++	return task_size;
++}
+diff --git a/compel/src/main.c b/compel/src/main.c
+index ef05a46d01..bc16c0ab41 100644
+--- a/compel/src/main.c
++++ b/compel/src/main.c
+@@ -57,6 +57,9 @@ static const flags_t flags = {
+ #elif defined CONFIG_MIPS
+ 	.arch = "mips",
+ 	.cflags = COMPEL_CFLAGS_PIE,
++#elif defined CONFIG_LOONGARCH64
++	.arch = "loongarch64",
++	.cflags = COMPEL_CFLAGS_PIE,
+ #else
+ #error "CONFIG_<ARCH> not defined, or unsupported ARCH"
+ #endif
+diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk
+index c1c1e94af4..55c5be307f 100644
+--- a/scripts/nmk/scripts/include.mk
++++ b/scripts/nmk/scripts/include.mk
+@@ -20,7 +20,8 @@ ARCH	?= $(shell echo $(SUBARCH) | sed	\
+                 -e s/ppc64.*/ppc64/		\
+                 -e s/mips.*/mips/		\
+                 -e s/sh[234].*/sh/		\
+-                -e s/aarch64.*/aarch64/)
++                -e s/aarch64.*/aarch64/		\
++                -e s/loongarch64.*/loongarch64/)
+ 
+ export SUBARCH ARCH
+ 
+
+From 91c0f7a6d8bef0d8d5836d86430864b2036e140f Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Mon, 12 Jun 2023 15:09:22 +0800
+Subject: [PATCH 3/6] images: add loongarch64 core image
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ images/Makefile               |  1 +
+ images/core-loongarch64.proto | 23 +++++++++++++++++++++++
+ images/core.proto             |  3 +++
+ 3 files changed, 27 insertions(+)
+ create mode 100755 images/core-loongarch64.proto
+
+diff --git a/images/Makefile b/images/Makefile
+index 004e22ec3f..ca85b1a213 100644
+--- a/images/Makefile
++++ b/images/Makefile
+@@ -2,6 +2,7 @@ proto-obj-y	+= stats.o
+ proto-obj-y	+= core.o
+ proto-obj-y	+= core-x86.o
+ proto-obj-y	+= core-mips.o
++proto-obj-y	+= core-loongarch64.o
+ proto-obj-y	+= core-arm.o
+ proto-obj-y	+= core-aarch64.o
+ proto-obj-y	+= core-ppc64.o
+diff --git a/images/core-loongarch64.proto b/images/core-loongarch64.proto
+new file mode 100755
+index 0000000000..8258f006ea
+--- /dev/null
++++ b/images/core-loongarch64.proto
+@@ -0,0 +1,23 @@
++// SPDX-License-Identifier: MIT
++
++syntax = "proto2";
++
++import "opts.proto";
++
++message user_loongarch64_gpregs_entry {
++	repeated uint64 regs	= 1;
++	required uint64 pc		= 2;
++}
++
++message user_loongarch64_fpregs_entry {
++	repeated uint64 regs    = 1;
++	required uint64 fcc     = 2;
++	required uint32 fcsr    = 3;
++}
++
++message thread_info_loongarch64 {
++	required uint64	clear_tid_addr	= 1[(criu).hex = true];
++	required uint64	tls				= 2;
++	required user_loongarch64_gpregs_entry	gpregs	= 3[(criu).hex = true];
++	required user_loongarch64_fpregs_entry	fpregs	= 4[(criu).hex = true];
++}
+diff --git a/images/core.proto b/images/core.proto
+index eddd1dc555..1882fe8e42 100644
+--- a/images/core.proto
++++ b/images/core.proto
+@@ -8,6 +8,7 @@ import "core-aarch64.proto";
+ import "core-ppc64.proto";
+ import "core-s390.proto";
+ import "core-mips.proto";
++import "core-loongarch64.proto";
+ 
+ import "rlimit.proto";
+ import "timer.proto";
+@@ -122,6 +123,7 @@ message core_entry {
+ 		PPC64		= 4;
+ 		S390		= 5;
+ 		MIPS		= 6;
++		LOONGARCH64		= 7;
+ 	}
+ 
+ 	required march			mtype		= 1;
+@@ -131,6 +133,7 @@ message core_entry {
+ 	optional thread_info_ppc64	ti_ppc64	= 9;
+ 	optional thread_info_s390	ti_s390		= 10;
+ 	optional thread_info_mips	ti_mips		= 11;
++	optional thread_info_loongarch64	ti_loongarch64  = 12;
+ 
+ 	optional task_core_entry	tc		= 3;
+ 	optional task_kobj_ids_entry	ids		= 4;
+
+From c3de76052f24ad5aa88d0093d1c033735f57f163 Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Mon, 12 Jun 2023 15:15:30 +0800
+Subject: [PATCH 4/6] criu: add loongarch64 support to parasite and restorer
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ criu/arch/loongarch64/Makefile                |  14 +++
+ criu/arch/loongarch64/cpu.c                   |  31 +++++
+ criu/arch/loongarch64/crtools.c               | 115 ++++++++++++++++++
+ criu/arch/loongarch64/include/asm/dump.h      |  15 +++
+ criu/arch/loongarch64/include/asm/int.h       |   6 +
+ criu/arch/loongarch64/include/asm/kerndat.h   |   7 ++
+ .../include/asm/parasite-syscall.h            |   6 +
+ criu/arch/loongarch64/include/asm/parasite.h  |  11 ++
+ criu/arch/loongarch64/include/asm/restore.h   |  33 +++++
+ criu/arch/loongarch64/include/asm/restorer.h  |  97 +++++++++++++++
+ .../loongarch64/include/asm/thread_pointer.h  |  27 ++++
+ criu/arch/loongarch64/include/asm/types.h     |  39 ++++++
+ criu/arch/loongarch64/include/asm/vdso.h      |  27 ++++
+ criu/arch/loongarch64/restorer.c              |  14 +++
+ criu/arch/loongarch64/sigframe.c              |  12 ++
+ criu/arch/loongarch64/vdso-pie.c              |  48 ++++++++
+ 16 files changed, 502 insertions(+)
+ create mode 100644 criu/arch/loongarch64/Makefile
+ create mode 100644 criu/arch/loongarch64/cpu.c
+ create mode 100644 criu/arch/loongarch64/crtools.c
+ create mode 100644 criu/arch/loongarch64/include/asm/dump.h
+ create mode 100644 criu/arch/loongarch64/include/asm/int.h
+ create mode 100644 criu/arch/loongarch64/include/asm/kerndat.h
+ create mode 100644 criu/arch/loongarch64/include/asm/parasite-syscall.h
+ create mode 100644 criu/arch/loongarch64/include/asm/parasite.h
+ create mode 100644 criu/arch/loongarch64/include/asm/restore.h
+ create mode 100644 criu/arch/loongarch64/include/asm/restorer.h
+ create mode 100644 criu/arch/loongarch64/include/asm/thread_pointer.h
+ create mode 100644 criu/arch/loongarch64/include/asm/types.h
+ create mode 100644 criu/arch/loongarch64/include/asm/vdso.h
+ create mode 100644 criu/arch/loongarch64/restorer.c
+ create mode 100644 criu/arch/loongarch64/sigframe.c
+ create mode 100644 criu/arch/loongarch64/vdso-pie.c
+
+diff --git a/criu/arch/loongarch64/Makefile b/criu/arch/loongarch64/Makefile
+new file mode 100644
+index 0000000000..4bd99eb7eb
+--- /dev/null
++++ b/criu/arch/loongarch64/Makefile
+@@ -0,0 +1,14 @@
++builtin-name		:= crtools.built-in.o
++
++ccflags-y		+= -iquote $(obj)/include
++ccflags-y		+= -iquote criu/include -iquote include
++ccflags-y		+= $(COMPEL_UAPI_INCLUDES)
++
++asflags-y		+= -Wstrict-prototypes
++asflags-y		+= -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer
++asflags-y		+= -iquote $(obj)/include
++ldflags-y		+= -r -z noexecstack
++
++obj-y			+= cpu.o
++obj-y			+= crtools.o
++obj-y			+= sigframe.o
+diff --git a/criu/arch/loongarch64/cpu.c b/criu/arch/loongarch64/cpu.c
+new file mode 100644
+index 0000000000..5559c4288f
+--- /dev/null
++++ b/criu/arch/loongarch64/cpu.c
+@@ -0,0 +1,31 @@
++#undef LOG_PREFIX
++#define LOG_PREFIX "cpu: "
++
++int cpu_init(void)
++{
++	return 0;
++}
++
++int cpu_dump_cpuinfo(void)
++{
++	return 0;
++}
++
++int cpu_validate_cpuinfo(void)
++{
++	return 0;
++}
++
++int cpuinfo_dump(void)
++{
++	if (cpu_init())
++		return -1;
++	if (cpu_dump_cpuinfo())
++		return -1;
++	return 0;
++}
++
++int cpuinfo_check(void)
++{
++	return 0;
++}
+diff --git a/criu/arch/loongarch64/crtools.c b/criu/arch/loongarch64/crtools.c
+new file mode 100644
+index 0000000000..eeb0731ca6
+--- /dev/null
++++ b/criu/arch/loongarch64/crtools.c
+@@ -0,0 +1,115 @@
++#include <signal.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++#include <elf.h>
++#include <sys/mman.h>
++#include <sys/syscall.h>
++#include <sys/auxv.h>
++#include <sys/wait.h>
++
++#include "types.h"
++#include "log.h"
++#include "asm/restorer.h"
++#include "asm/parasite-syscall.h"
++#include <compel/asm/fpu.h>
++#include "asm/dump.h"
++#include "cr_options.h"
++#include "common/compiler.h"
++#include "restorer.h"
++#include "parasite-syscall.h"
++#include "util.h"
++#include "cpu.h"
++#include <compel/plugins/std/syscall-codes.h>
++#include "kerndat.h"
++
++#include "protobuf.h"
++#include "images/core.pb-c.h"
++#include "images/creds.pb-c.h"
++
++#define assign_reg(dst, src, e) (dst)->e = (__typeof__(dst->e))(src)->e
++
++int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
++{
++	int i;
++	CoreEntry *core = x;
++	UserLoongarch64GpregsEntry *gprs = core->ti_loongarch64->gpregs;
++	UserLoongarch64FpregsEntry *fprs = core->ti_loongarch64->fpregs;
++	for (i = 0; i < GPR_NUM; i++)
++		assign_reg(gprs, regs, regs[i]);
++	assign_reg(gprs, regs, pc);
++
++	for (i = 0; i < FPR_NUM; i++)
++		assign_reg(fpregs, fpregs, regs[i]);
++	assign_reg(fprs, fpregs, fcc);
++	assign_reg(fprs, fpregs, fcsr);
++	return 0;
++}
++
++int arch_alloc_thread_info(CoreEntry *core)
++{
++	ThreadInfoLoongarch64 *ti_loongarch64;
++	UserLoongarch64GpregsEntry *gpregs;
++	UserLoongarch64FpregsEntry *fpregs;
++
++	ti_loongarch64 = xmalloc(sizeof(*ti_loongarch64));
++	thread_info_loongarch64__init(ti_loongarch64);
++	core->ti_loongarch64 = ti_loongarch64;
++
++	gpregs = xmalloc(sizeof(*gpregs));
++	if (!gpregs)
++		goto err;
++	user_loongarch64_gpregs_entry__init(gpregs);
++	gpregs->n_regs = GPR_NUM;
++	gpregs->regs = xmalloc(GPR_NUM * sizeof(uint64_t));
++	if (!gpregs->regs)
++		goto err;
++	ti_loongarch64->gpregs = gpregs;
++
++	fpregs = xmalloc(sizeof(*fpregs));
++	if (!fpregs)
++		goto err;
++	user_loongarch64_fpregs_entry__init(fpregs);
++	fpregs->n_regs = FPR_NUM;
++	fpregs->regs = xmalloc(FPR_NUM * sizeof(uint64_t));
++	if (!fpregs->regs)
++		goto err;
++	ti_loongarch64->fpregs = fpregs;
++
++	return 0;
++err:
++	return -1;
++}
++
++void arch_free_thread_info(CoreEntry *core)
++{
++	if (CORE_THREAD_ARCH_INFO(core)) {
++		if (CORE_THREAD_ARCH_INFO(core)->fpregs) {
++			xfree(CORE_THREAD_ARCH_INFO(core)->fpregs->regs);
++			xfree(CORE_THREAD_ARCH_INFO(core)->fpregs);
++		}
++		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs);
++		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs);
++		xfree(CORE_THREAD_ARCH_INFO(core));
++		CORE_THREAD_ARCH_INFO(core) = NULL;
++	}
++}
++
++int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
++{
++	fpu_context_t *fpu = RT_SIGFRAME_FPU(sigframe);
++	UserLoongarch64FpregsEntry *fpregs = core->ti_loongarch64->fpregs;
++
++	memcpy(fpu->regs, fpregs->regs, sizeof(fpu->regs));
++	fpu->fcc = fpregs->fcc;
++	fpu->fcsr = fpregs->fcsr;
++	return 0;
++}
++
++int restore_gpregs(struct rt_sigframe *sigframe, UserRegsEntry *r)
++{
++	sigcontext_t *sc = RT_SIGFRAME_SIGCTX(sigframe);
++	memcpy(sc->regs, r->regs, sizeof(sc->regs));
++	sc->pc = r->pc;
++	return 0;
++}
+diff --git a/criu/arch/loongarch64/include/asm/dump.h b/criu/arch/loongarch64/include/asm/dump.h
+new file mode 100644
+index 0000000000..04347155c3
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/dump.h
+@@ -0,0 +1,15 @@
++#ifndef __CR_ASM_DUMP_H__
++#define __CR_ASM_DUMP_H__
++
++extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
++extern int arch_alloc_thread_info(CoreEntry *core);
++extern void arch_free_thread_info(CoreEntry *core);
++
++static inline void core_put_tls(CoreEntry *core, tls_t tls)
++{
++	core->ti_loongarch64->tls = tls;
++}
++
++#define get_task_futex_robust_list_compat(pid, info) -1
++
++#endif
+diff --git a/criu/arch/loongarch64/include/asm/int.h b/criu/arch/loongarch64/include/asm/int.h
+new file mode 100644
+index 0000000000..642804e9b4
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/int.h
+@@ -0,0 +1,6 @@
++#ifndef __CR_ASM_INT_H__
++#define __CR_ASM_INT_H__
++
++#include "asm-generic/int.h"
++
++#endif /* __CR_ASM_INT_H__ */
+diff --git a/criu/arch/loongarch64/include/asm/kerndat.h b/criu/arch/loongarch64/include/asm/kerndat.h
+new file mode 100644
+index 0000000000..bb70cf6cf5
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/kerndat.h
+@@ -0,0 +1,7 @@
++#ifndef __CR_ASM_KERNDAT_H__
++#define __CR_ASM_KERNDAT_H__
++
++#define kdat_compatible_cr() 0
++#define kdat_can_map_vdso()  0
++
++#endif /* __CR_ASM_KERNDAT_H__ */
+diff --git a/criu/arch/loongarch64/include/asm/parasite-syscall.h b/criu/arch/loongarch64/include/asm/parasite-syscall.h
+new file mode 100644
+index 0000000000..6008c37923
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/parasite-syscall.h
+@@ -0,0 +1,6 @@
++#ifndef __CR_ASM_PARASITE_SYSCALL_H__
++#define __CR_ASM_PARASITE_SYSCALL_H__
++
++struct parasite_ctl;
++
++#endif
+diff --git a/criu/arch/loongarch64/include/asm/parasite.h b/criu/arch/loongarch64/include/asm/parasite.h
+new file mode 100644
+index 0000000000..b64cb3185c
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/parasite.h
+@@ -0,0 +1,11 @@
++#ifndef __ASM_PARASITE_H__
++#define __ASM_PARASITE_H__
++
++static inline void arch_get_tls(tls_t *ptls)
++{
++	tls_t tls;
++	asm volatile("or %0, $zero, $tp" : "=r"(tls));
++	*ptls = tls;
++}
++
++#endif
+diff --git a/criu/arch/loongarch64/include/asm/restore.h b/criu/arch/loongarch64/include/asm/restore.h
+new file mode 100644
+index 0000000000..d956231c81
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/restore.h
+@@ -0,0 +1,33 @@
++#ifndef __CR_ASM_RESTORE_H__
++#define __CR_ASM_RESTORE_H__
++
++#include "asm/restorer.h"
++#include "images/core.pb-c.h"
++
++/* clang-format off */
++#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, task_args)	\
++({										\
++ 	uint64_t save_sp;							\
++	asm volatile("or %0, $zero, $sp" : "=r"(save_sp) : :"memory");		\
++	asm volatile(								\
++	        "or	$a0, $zero, %2	\n"					\
++	        "or	$sp, $zero, %0	\n"					\
++	        "jirl	$ra, %1, 0 	\n"					\
++	        :                               				\
++	        : "r"(new_sp & ~15),						\
++	          "r"(restore_task_exec_start), 				\
++	          "r"(task_args)						\
++	        : "$a0", "memory");						\
++	asm volatile("or $sp, $zero, %0" : : "r"(save_sp) : "memory"); 		\
++})
++
++/* clang-format on */
++
++static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls)
++{
++	*ptls = pcore->ti_loongarch64->tls;
++}
++
++int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
++
++#endif
+diff --git a/criu/arch/loongarch64/include/asm/restorer.h b/criu/arch/loongarch64/include/asm/restorer.h
+new file mode 100644
+index 0000000000..7a0d35c5b5
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/restorer.h
+@@ -0,0 +1,97 @@
++#ifndef __CR_ASM_RESTORER_H__
++#define __CR_ASM_RESTORER_H__
++
++#include "asm/types.h"
++#include <compel/asm/fpu.h>
++#include "images/core.pb-c.h"
++#include <compel/plugins/std/syscall-codes.h>
++#include <compel/asm/sigframe.h>
++
++/* clang-format off */
++#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid,	\
++			thread_args, clone_restore_fn)			\
++		asm volatile(								\
++				"clone_emul:					\n"	\
++				"ld.d		$a1, %2				\n"	\
++				"addi.d		$a1, $a1, -16 		\n"	\
++				"st.d		%5, $a1, 0			\n"	\
++				"st.d		%6, $a1, 8			\n"	\
++				"or			$a0, $zero, %1		\n"	\
++				"or			$a2, $zero, %3		\n"	\
++				"or			$a3, $zero, %4		\n"	\
++				"ori 		$a7, $zero, "__stringify(__NR_clone)"	\n"	\
++				"syscall	0					\n"	\
++												   	\
++				"beqz		$a0, thread_run     \n"	\
++												   	\
++				"or			%0, $zero, $a0		\n"	\
++				"b			clone_end			\n"	\
++												   	\
++				"thread_run:					\n"	\
++				"ld.d		$a1, $sp, 0			\n"	\
++				"ld.d		$a0, $sp, 8			\n"	\
++				"jirl		$ra, $a1, 0			\n"	\
++												   	\
++				"clone_end:						\n"	\
++				: "=r"(ret)							\
++				: "r"(clone_flags),					\
++				  "ZB"(new_sp),						\
++				  "r"(&parent_tid),					\
++				  "r"(&thread_args[i].pid),			\
++				  "r"(&clone_restore_fn),			\
++				  "r"(&thread_args[i])				\
++				: "$a0", "$a1", "$a2", "$a3", "$a7", "memory")
++
++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args,	\
++			clone_restore_fn)								\
++		asm volatile(								\
++				"clone3_emul:					\n"	\
++				"or			$a0, $zero, %1		\n"	\
++				"or			$a1, $zero, %2		\n"	\
++				"or			$a2, $zero, %3		\n"	\
++				"or			$a3, $zero, %4		\n"	\
++				"ori		$a7, $zero, "__stringify(__NR_clone3)"	\n"	\
++				"syscall	0					\n"	\
++													\
++				"beqz		$a0, clone3_thread_run	\n"	\
++													\
++				"or			%0, $zero, $a0		\n"	\
++				"b			clone3_end			\n"	\
++													\
++				"clone3_thread_run:				\n"	\
++				"or			$a0, $zero, $a3		\n"	\
++				"jirl		$ra, $a2, 0			\n"	\
++				"clone3_end:					\n"	\
++				: "=r"(ret)							\
++				: "r"(&clone_args),					\
++				  "r"(size),						\
++				  "r"(clone_restore_fn),			\
++				  "r"(args)							\
++				: "$a0", "$a1", "$a2", "$a3", "$a7", "memory")
++/* clang-format on */
++
++static inline void restore_tls(tls_t *ptls)
++{
++	asm volatile("or $tp, $zero, %0" : : "r"(*ptls));
++}
++static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act)
++{
++	return -1;
++}
++static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
++{
++	return -1;
++}
++static inline void *alloc_compat_syscall_stack(void)
++{
++	return NULL;
++}
++static inline void free_compat_syscall_stack(void *stack32)
++{
++}
++int restore_gpregs(struct rt_sigframe *f, UserLoongarch64GpregsEntry *r);
++int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r);
++
++#define arch_map_vdso(map, compat) -1
++
++#endif
+diff --git a/criu/arch/loongarch64/include/asm/thread_pointer.h b/criu/arch/loongarch64/include/asm/thread_pointer.h
+new file mode 100644
+index 0000000000..f7e07066a5
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/thread_pointer.h
+@@ -0,0 +1,27 @@
++/* __thread_pointer definition.  Generic version.
++   Copyright (C) 2021 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef _SYS_THREAD_POINTER_H
++#define _SYS_THREAD_POINTER_H
++
++static inline void *__criu_thread_pointer(void)
++{
++	return __builtin_thread_pointer();
++}
++
++#endif /* _SYS_THREAD_POINTER_H */
+diff --git a/criu/arch/loongarch64/include/asm/types.h b/criu/arch/loongarch64/include/asm/types.h
+new file mode 100644
+index 0000000000..72bca2022b
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/types.h
+@@ -0,0 +1,39 @@
++#ifndef __CR_ASM_TYPES_H__
++#define __CR_ASM_TYPES_H__
++
++#include <stdbool.h>
++#include <signal.h>
++
++#include "page.h"
++#include "bitops.h"
++#include "asm/int.h"
++#include "images/core.pb-c.h"
++
++#include <compel/plugins/std/asm/syscall-types.h>
++
++#define core_is_compat(core) false
++
++#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__LOONGARCH64
++
++#define CORE_THREAD_ARCH_INFO(core) core->ti_loongarch64
++
++#define TI_SP(core) ((core)->ti_loongarch64->gpregs->regs[4])
++
++#define TI_IP(core) ((core)->ti_loongarch64->gpregs->pc)
++
++typedef UserLoongarch64GpregsEntry UserRegsEntry;
++
++static inline uint64_t encode_pointer(void *p)
++{
++	return (uint64_t)p;
++}
++static inline void *decode_pointer(uint64_t v)
++{
++	return (void *)v;
++}
++
++#define AT_VECTOR_SIZE 44
++typedef uint64_t auxv_t;
++typedef uint64_t tls_t;
++
++#endif /* __CR_ASM_TYPES_H__ */
+diff --git a/criu/arch/loongarch64/include/asm/vdso.h b/criu/arch/loongarch64/include/asm/vdso.h
+new file mode 100644
+index 0000000000..64631dee09
+--- /dev/null
++++ b/criu/arch/loongarch64/include/asm/vdso.h
+@@ -0,0 +1,27 @@
++#ifndef __CR_ASM_VDSO_H__
++#define __CR_ASM_VDSO_H__
++
++#include "asm/int.h"
++#include "asm-generic/vdso.h"
++
++/* This definition is used in pie/util-vdso.c to initialize the vdso symbol
++ * name string table 'vdso_symbols'
++ */
++
++/*
++ * This is a minimal amount of symbols
++ * we should support at the moment.
++ */
++#define VDSO_SYMBOL_MAX	 5
++#define VDSO_SYMBOL_GTOD 3
++
++#define ARCH_VDSO_SYMBOLS_LIST                                   \
++	const char *aarch_vdso_symbol1 = "__vdso_getcpu";        \
++	const char *aarch_vdso_symbol2 = "__vdso_clock_getres";  \
++	const char *aarch_vdso_symbol3 = "__vdso_clock_gettime"; \
++	const char *aarch_vdso_symbol4 = "__vdso_gettimeofday";  \
++	const char *aarch_vdso_symbol5 = "__vdso_rt_sigreturn";
++
++#define ARCH_VDSO_SYMBOLS \
++	aarch_vdso_symbol1, aarch_vdso_symbol2, aarch_vdso_symbol3, aarch_vdso_symbol4, aarch_vdso_symbol5
++#endif
+diff --git a/criu/arch/loongarch64/restorer.c b/criu/arch/loongarch64/restorer.c
+new file mode 100644
+index 0000000000..730318ac14
+--- /dev/null
++++ b/criu/arch/loongarch64/restorer.c
+@@ -0,0 +1,14 @@
++#include <unistd.h>
++
++#include "restorer.h"
++#include "asm/restorer.h"
++#include <compel/asm/fpu.h>
++
++#include <compel/plugins/std/syscall.h>
++#include "log.h"
++#include "cpu.h"
++
++int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r)
++{
++	return 0;
++}
+diff --git a/criu/arch/loongarch64/sigframe.c b/criu/arch/loongarch64/sigframe.c
+new file mode 100644
+index 0000000000..18983ff138
+--- /dev/null
++++ b/criu/arch/loongarch64/sigframe.c
+@@ -0,0 +1,12 @@
++#include <stdlib.h>
++#include <stdint.h>
++
++#include "asm/sigframe.h"
++#include "asm/types.h"
++
++#include "log.h"
++#include <stdio.h>
++int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
++{
++	return 0;
++}
+diff --git a/criu/arch/loongarch64/vdso-pie.c b/criu/arch/loongarch64/vdso-pie.c
+new file mode 100644
+index 0000000000..7a75d2741d
+--- /dev/null
++++ b/criu/arch/loongarch64/vdso-pie.c
+@@ -0,0 +1,48 @@
++#include <unistd.h>
++#include "asm/types.h"
++
++#include <compel/plugins/std/string.h>
++#include <compel/plugins/std/syscall.h>
++#include "parasite-vdso.h"
++#include "log.h"
++#include "common/bug.h"
++
++#ifdef LOG_PREFIX
++#undef LOG_PREFIX
++#endif
++#define LOG_PREFIX "vdso: "
++static void insert_trampoline(uintptr_t from, uintptr_t to)
++{
++	struct {
++		uint32_t pcaddi;
++		uint32_t ldptr;
++		uint32_t jirl;
++		uint32_t guards;
++		uint64_t imm64;
++	} __packed jmp = {
++		.pcaddi = 0x18000095, /*  pcaddi  $x, 4        */
++		.ldptr = 0x260002b5,  /*  ldptr.d $x, $x, 0    */
++		.jirl = 0x4c0002a0,   /*  jirl    $zero, $x, 0 */
++		.guards = 0x002a0000, /*  break   0            */
++		.imm64 = to,
++	};
++	memcpy((void *)from, &jmp, sizeof(jmp));
++}
++
++int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, struct vdso_symtable *sto,
++			struct vdso_symtable *sfrom, bool compat_vdso)
++{
++	unsigned int i;
++	unsigned long from, to;
++	for (i = 0; i < ARRAY_SIZE(sto->symbols); i++) {
++		if (vdso_symbol_empty(&sfrom->symbols[i]))
++			continue;
++		pr_debug("br: %lx/%lx -> %lx/%lx (index %d)\n", base_from, sfrom->symbols[i].offset, base_to,
++			 sto->symbols[i].offset, i);
++
++		from = base_from + sfrom->symbols[i].offset;
++		to = base_to + sto->symbols[i].offset;
++		insert_trampoline(from, to);
++	}
++	return 0;
++}
+
+From 7a4a4fbb9a0055112ce7ebc005ef56a317d0b64b Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Mon, 12 Jun 2023 15:26:35 +0800
+Subject: [PATCH 5/6] zdtm: add loongarch64 support
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ .../lib/arch/loongarch64/include/asm/atomic.h | 49 +++++++++++++++++++
+ test/zdtm/lib/test.c                          |  2 +-
+ 2 files changed, 50 insertions(+), 1 deletion(-)
+ create mode 100644 test/zdtm/lib/arch/loongarch64/include/asm/atomic.h
+
+diff --git a/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h
+new file mode 100644
+index 0000000000..1803aaeb44
+--- /dev/null
++++ b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h
+@@ -0,0 +1,49 @@
++#ifndef __CR_ATOMIC_H__
++#define __CR_ATOMIC_H__
++
++typedef uint32_t atomic_t;
++
++#define atomic_get(v)	 (*(volatile int *)v)
++#define atomic_set(v, i) (*(v) = (i))
++
++static inline int __atomic_add(int i, atomic_t *v)
++{
++	int result;
++	asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(*v), "=&r"(result) : "r"(i) : "memory");
++	return result + i;
++}
++
++static inline void atomic_add(int i, atomic_t *v)
++{
++	__atomic_add(i, v);
++}
++
++static inline int atomic_add_return(int i, atomic_t *v)
++{
++	return __atomic_add(i, v);
++}
++
++#define atomic_sub(i, v)	atomic_add(-(int)i, v)
++#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v)
++#define atomic_inc(v)		atomic_add_return(1, v)
++#define atomic_dec(v)		atomic_sub_return(1, v)
++#define atomic_dec_return(v)	atomic_sub_return(1, v)
++
++static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
++{
++	int ret;
++	asm volatile("1:                     \n"
++		     " ll.w  %0, %1          \n"
++		     " bne   %0, %2, 2f      \n"
++		     " or    $t0, %3, $zero  \n"
++		     " sc.w  $t0, %1         \n"
++		     " beqz  $t0, 1b         \n"
++		     "2:                     \n"
++		     " dbar  0               \n"
++		     : "=&r"(ret), "+ZB"(*ptr)
++		     : "r"(old), "r"(new)
++		     : "t0", "memory");
++	return ret;
++}
++
++#endif /* __CR_ATOMIC_H__ */
+diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
+index 6291ea4a7b..a5ba38b2dd 100644
+--- a/test/zdtm/lib/test.c
++++ b/test/zdtm/lib/test.c
+@@ -406,7 +406,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid
+ {
+ #ifdef __x86_64__
+ 	return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls);
+-#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__)
++#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__ || __loongarch64)
+ 	return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid);
+ #elif __s390x__
+ 	return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls);
+
+From 53f1b58307ef74c26cfc8cb2d2f69a1ab40cbc3b Mon Sep 17 00:00:00 2001
+From: znley <shanjiantao@loongson.cn>
+Date: Tue, 11 Jul 2023 15:20:00 +0800
+Subject: [PATCH 6/6] ci: add workflow for loongarch64
+
+Signed-off-by: znley <shanjiantao@loongson.cn>
+---
+ .github/workflows/loongarch64-qemu-test.yml | 15 +++++
+ scripts/ci/Makefile                         |  5 ++
+ scripts/ci/loongarch64-qemu-test.sh         | 69 +++++++++++++++++++++
+ 3 files changed, 89 insertions(+)
+ create mode 100644 .github/workflows/loongarch64-qemu-test.yml
+ create mode 100755 scripts/ci/loongarch64-qemu-test.sh
+
+diff --git a/.github/workflows/loongarch64-qemu-test.yml b/.github/workflows/loongarch64-qemu-test.yml
+new file mode 100644
+index 0000000000..ba22fa25ff
+--- /dev/null
++++ b/.github/workflows/loongarch64-qemu-test.yml
+@@ -0,0 +1,15 @@
++name: LoongArch64 Qemu Test
++
++on: [push, pull_request]
++
++# Cancel any preceding run on the pull request.
++concurrency:
++  group: loongarch64-qemu-test-${{ github.event.pull_request.number || github.ref }}
++  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
++
++jobs:
++  build:
++    runs-on: ubuntu-22.04
++    steps:
++      - uses: actions/checkout@v2 
++      - run: sudo make -C scripts/ci loongarch64-qemu-test
+diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile
+index 5c45791034..ce844a17ce 100644
+--- a/scripts/ci/Makefile
++++ b/scripts/ci/Makefile
+@@ -110,5 +110,10 @@ check-commit:
+ 
+ .PHONY: check-commit
+ 
++loongarch64-qemu-test:
++	./loongarch64-qemu-test.sh
++
++.PHONY: loongarch64-qemu-test
++
+ %:
+ 	$(MAKE) -C ../build $@$(target-suffix)
+diff --git a/scripts/ci/loongarch64-qemu-test.sh b/scripts/ci/loongarch64-qemu-test.sh
+new file mode 100755
+index 0000000000..52e587619c
+--- /dev/null
++++ b/scripts/ci/loongarch64-qemu-test.sh
+@@ -0,0 +1,69 @@
++#!/bin/bash
++
++set -o nounset
++set -o errexit
++set -x
++
++./apt-install \
++    apt-transport-https \
++    ca-certificates \
++    curl \
++    software-properties-common \
++    sshpass \
++    openssh-client
++
++curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
++
++add-apt-repository \
++   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
++   $(lsb_release -cs) \
++   stable test"
++
++./apt-install docker-ce
++
++# shellcheck source=/dev/null
++. /etc/lsb-release
++
++# docker checkpoint and restore is an experimental feature
++echo '{ "experimental": true }' > /etc/docker/daemon.json
++service docker restart
++
++docker info
++
++# run a loongarch64 vm
++
++PORT='2222'
++USER='root'
++PASSWORD='loongarch64'
++NAME='vm'
++
++docker run \
++    -d \
++    --net host \
++    --name $NAME \
++    merore/archlinux-loongarch64
++
++run() {
++    if [ -z "$1" ]; then
++        echo "Command cannot be empty."
++        exit 1
++    fi
++    sshpass -p $PASSWORD ssh -o StrictHostKeyChecking=no -p $PORT $USER@127.0.0.1 "$1"
++}
++
++# wait vm to start
++while (! run "uname -a")
++do
++    echo "Wait vm to start..."
++    sleep 1
++done
++echo "The loongarch64 vm is started!"
++
++# Tar criu and send to vm
++tar -cf criu.tar ../../../criu
++sshpass -p $PASSWORD scp -o StrictHostKeyChecking=no -P $PORT criu.tar $USER@127.0.0.1:/root
++
++# build and test
++run 'cd /root; tar -xf criu.tar'
++run 'cd /root/criu; make -j4'
++run "cd /root/criu; ./test/zdtm.py run -t zdtm/static/maps02 -t zdtm/static/maps05 -t zdtm/static/maps06 -t zdtm/static/maps10 -t zdtm/static/maps_file_prot -t zdtm/static/memfd00 -t zdtm/transition/fork -t zdtm/transition/fork2 -t zdtm/transition/shmem -f h"
diff --git a/criu/PKGBUILD b/criu/PKGBUILD
index 06d3ec0591..86a3e31ecc 100644
--- a/criu/PKGBUILD
+++ b/criu/PKGBUILD
@@ -37,11 +37,13 @@ source=(
   'no-python-pip.patch'
   'no-recompile-on-install.patch'
   'no-amdgpu-manpage.patch'
+  2183.patch
 )
 b2sums=('SKIP'
         'd83da0ce0222c1aea1fc0c97bbf8a40f3cd5a6b5d55ee973b64f97bd9769df265b148e89cee8ee6564f065adc00552b511904f322555ac659b735933d42a9a64'
         'e4b7c4831fa513d602c73e377847705240a6a42ee1986effd10a589784bd0ad818032ff8283c1f9fd17cb7ddf3204e4a932796a1df816afc30a0e594c92b50f6'
-        '9c713724e8f6b062f7a09e34555d31e5aa0315db6308b7527835484eaad8dbf5deac5c66521bf5a819462d5f38c64f6602ba421f7bbb73180a3b05189816c8f6')
+        '9c713724e8f6b062f7a09e34555d31e5aa0315db6308b7527835484eaad8dbf5deac5c66521bf5a819462d5f38c64f6602ba421f7bbb73180a3b05189816c8f6'
+        'b0d762bb3cc93608fe32f96d3c018ada1e8708196031cb2f80ec6350af1d96d121476288083e1053805dcdeaf9fa7e636e08142a5ea83ecaa393341cffc9c664')
 
 pkgver() {
   cd "$pkgname"
@@ -60,6 +62,7 @@ prepare() {
 
   # do not install amdgpu_plugin manpage
   patch -p1 -i "$srcdir/no-amdgpu-manpage.patch"
+  patch -p1 -i "$srcdir/2183.patch"
 }
 
 build() {
diff --git a/cups-pdf/PKGBUILD b/cups-pdf/PKGBUILD
index bb17900797..52f1ed1f19 100644
--- a/cups-pdf/PKGBUILD
+++ b/cups-pdf/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=cups-pdf
 pkgver=3.0.1
-pkgrel=7
+pkgrel=8
 pkgdesc="PDF printer for cups"
 arch=(loong64 x86_64)
 depends=('cups' 'ghostscript')
diff --git a/cups-pk-helper/PKGBUILD b/cups-pk-helper/PKGBUILD
index cec13e4588..89ead6f475 100644
--- a/cups-pk-helper/PKGBUILD
+++ b/cups-pk-helper/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=cups-pk-helper
 pkgver=0.2.7
-pkgrel=1
+pkgrel=2
 pkgdesc="A helper that makes system-config-printer use PolicyKit"
 arch=(loong64 x86_64)
 url="https://www.freedesktop.org/software/cups-pk-helper/releases/"
diff --git a/cxxbridge/PKGBUILD b/cxxbridge/PKGBUILD
index c1d223977b..dd2191dc75 100644
--- a/cxxbridge/PKGBUILD
+++ b/cxxbridge/PKGBUILD
@@ -19,7 +19,7 @@ b2sums=('91f01cb775413d89f5b8f4ae2adc49b91da48f47448c78787404274b7ba47e5a2891a44
 prepare() {
   cd "cxx-${pkgver}"
   cp ../Cargo.lock .
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 updlockfiles() {
diff --git a/cypari2/PKGBUILD b/cypari2/PKGBUILD
index cd77cd2e8a..a6e0e3622f 100644
--- a/cypari2/PKGBUILD
+++ b/cypari2/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase=cypari2
 pkgname=python-cypari2
 pkgver=2.1.4
-pkgrel=1
+pkgrel=2
 pkgdesc='Cython bindings for PARI'
 arch=(loong64 x86_64)
 url='https://www.sagemath.org'
diff --git a/daktilo/PKGBUILD b/daktilo/PKGBUILD
index f0e523cffe..fcb6b2898c 100644
--- a/daktilo/PKGBUILD
+++ b/daktilo/PKGBUILD
@@ -14,7 +14,7 @@ sha512sums=('059318cba86996f08540167c77ac5711aa470083ab92415dab3a37b82be3d224264
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir completions/
   mkdir man/
 }
diff --git a/dbeaver/PKGBUILD b/dbeaver/PKGBUILD
index 7046b15c84..4f3b0ef85b 100644
--- a/dbeaver/PKGBUILD
+++ b/dbeaver/PKGBUILD
@@ -64,7 +64,7 @@ package() {
   done
 
   # Move into the target directory
-  cd "target/products/org.jkiss.dbeaver.core.product/linux/gtk/${CARCH}"
+  cd "target/products/org.jkiss.dbeaver.core.product/linux/gtk/`uname -m`"
 
   # Initially install everything into /usr/lib/dbeaver
   install -m 755 -d "${pkgdir}/usr/lib"
diff --git a/dbus-c++/PKGBUILD b/dbus-c++/PKGBUILD
index 8d288ca8cb..85c2487206 100644
--- a/dbus-c++/PKGBUILD
+++ b/dbus-c++/PKGBUILD
@@ -63,9 +63,12 @@ build() {
     --disable-static
     --enable-glib
     --prefix=/usr
+    --enable-glib --disable-examples --disable-tests
   )
 
   cd $_pkgname-$pkgver
+  # -lpthread needed
+  export LDFLAGS+=" -lpthread"
   ./configure "${configure_options[@]}"
   # prevent excessive overlinking due to libtool
   sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool
diff --git a/dconf-editor/dconf-editor-fix-meson.patch b/dconf-editor/dconf-editor-fix-meson.patch
new file mode 100644
index 0000000000..d3320604a5
--- /dev/null
+++ b/dconf-editor/dconf-editor-fix-meson.patch
@@ -0,0 +1,20 @@
+diff --git a/editor/meson.build b/editor/meson.build
+index 4c89ab99..0eb3bb1a 100644
+--- a/editor/meson.build
++++ b/editor/meson.build
+@@ -1,7 +1,6 @@
+ desktop = dconf_editor_namespace + '.desktop'
+ 
+ i18n.merge_file (
+-  desktop,
+   type: 'desktop',
+   input: desktop + '.in',
+   output: desktop,
+@@ -26,7 +25,6 @@ configure_file(
+ appdata = dconf_editor_namespace + '.appdata.xml'
+ 
+ i18n.merge_file(
+-  appdata,
+   input: appdata + '.in',
+   output: appdata,
+   po_dir: po_dir,
diff --git a/deepin-anything/PKGBUILD b/deepin-anything/PKGBUILD
index 7bb034f561..492a91a707 100644
--- a/deepin-anything/PKGBUILD
+++ b/deepin-anything/PKGBUILD
@@ -9,7 +9,7 @@ pkgdesc="Deepin Anything file search tool"
 arch=('loong64' 'x86_64')
 url="https://github.com/linuxdeepin/deepin-anything"
 license=('GPL3')
-makedepends=('dtkcore' 'udisks2-qt5')
+makedepends=('dtkcore' 'udisks2-qt5' 'pcre')
 source=("https://github.com/linuxdeepin/deepin-anything/archive/$pkgver/$pkgname-$pkgver.tar.gz"
         deepin-anything-server.sysusers)
 sha512sums=('73cc90f394b53a8d8edc17b258b63789cddd3094008de8403b24cdb0ed94e677cb9eedac297b73e8142204612a4ae5119450ebb08be4ea1c94abf0d375f8b51d'
diff --git a/deepin-desktop-base/PKGBUILD b/deepin-desktop-base/PKGBUILD
index 8b233e54c7..ab28810655 100644
--- a/deepin-desktop-base/PKGBUILD
+++ b/deepin-desktop-base/PKGBUILD
@@ -12,9 +12,16 @@ url="https://github.com/linuxdeepin/deepin-desktop-base"
 license=('GPL3')
 groups=('deepin')
 source=("https://github.com/linuxdeepin/deepin-desktop-base/archive/$pkgver/$pkgname-$pkgver.tar.gz"
-        distribution.info)
+        distribution.info
+        deepin-deskto-base-la64.patch)
 sha512sums=('3828007c10836a63d62244f5490bd0f8b66763b2b5a2ff43919c1e53e5d0d7a39973b44cc4ec1f17598f5f52f1db349d8617a44fa5a2d1875789b7b616f62dc0'
-            '17b17c3174a052ae93e57cfffaf551cac05a56da62b2f4829f5e8f4d0bd9dbe47e043ffcbd2c6299a771f7f8b8f2fffc727f2b5b754ab1cffbbf72fa3f54d035')
+            '17b17c3174a052ae93e57cfffaf551cac05a56da62b2f4829f5e8f4d0bd9dbe47e043ffcbd2c6299a771f7f8b8f2fffc727f2b5b754ab1cffbbf72fa3f54d035'
+            'f79f5377f13e76ea812c822d9d6c41aa0faac16de1a0ddaa95b05673022fc5fe8f332f3bba6a99f6dfff4ffac6aa32d4bdd2ec44104fe1503de25cefa2709efe')
+
+prepare() {
+  cd "$pkgname-$pkgver"
+  patch -p1 -i "$srcdir/deepin-deskto-base-la64.patch"
+}
 
 build() {
   cd $pkgname-$pkgver
@@ -37,6 +44,7 @@ package() {
   ln -s ../usr/lib/deepin/desktop-version "$pkgdir"/etc/deepin-version
 
   # Install os-version and rename to uos-version
+  cp files/os-version-amd files/os-version
   install -Dm644 files/os-version "$pkgdir"/etc/uos-version
 
   # Remove apt-specific templates
diff --git a/deepin-desktop-base/deepin-deskto-base-la64.patch b/deepin-desktop-base/deepin-deskto-base-la64.patch
new file mode 100644
index 0000000000..362257c379
--- /dev/null
+++ b/deepin-desktop-base/deepin-deskto-base-la64.patch
@@ -0,0 +1,27 @@
+Index: deepin-desktop-base-2021.06.16/Makefile
+===================================================================
+--- deepin-desktop-base-2021.06.16.orig/Makefile
++++ deepin-desktop-base-2021.06.16/Makefile
+@@ -23,6 +23,8 @@ build:
+ 		sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/os-version-loongson > files/os-version
+         else ifeq (${ARCH_BUILD}, mips64)
+ 		sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-loongson.in > files/desktop-version
++        else ifeq (${ARCH_BUILD}, loongarch64)
++		sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-loongarch64.in > files/desktop-version
+         else ifeq (${ARCH_BUILD}, sw_64)
+ 		sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-sw.in > files/desktop-version
+ 		sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/os-version-sw > files/os-version
+Index: deepin-desktop-base-2021.06.16/files/desktop-version-loongarch64.in
+===================================================================
+--- /dev/null
++++ deepin-desktop-base-2021.06.16/files/desktop-version-loongarch64.in
+@@ -0,0 +1,9 @@
++[Release]
++Version=@@VERSION@@
++Type=Desktop
++Type[zh_CN]=社区版
++Edition=Y2020E0001
++Copyright=Y2020CR001
++[Addition]
++Milestone=@@RELEASE@@
++Buildid=build1
diff --git a/deepin-grand-search/62.patch b/deepin-grand-search/62.patch
new file mode 100644
index 0000000000..7c83b5a7be
--- /dev/null
+++ b/deepin-grand-search/62.patch
@@ -0,0 +1,126 @@
+From 66dd5c6c79922ee5366d9bf09e8a9879f7231306 Mon Sep 17 00:00:00 2001
+From: xzl <xiangzelong@deepin.org>
+Date: Thu, 7 Sep 2023 16:05:54 +0800
+Subject: [PATCH] fix: fix build error in v23
+
+Log:
+---
+ src/grand-search-daemon/main.cpp                                 | 1 +
+ .../exhibition/matchresult/listview/grandsearchlistdelegate.cpp  | 1 +
+ .../gui/exhibition/matchresult/listview/grandsearchlistview.cpp  | 1 +
+ src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp | 1 +
+ src/grand-search/gui/searchconfig/bestmatchwidget.cpp            | 1 +
+ .../gui/searchconfig/blacklistview/blacklistview.cpp             | 1 +
+ src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp  | 1 +
+ src/libgrand-search-daemon/main.cpp                              | 1 +
+ src/preview-plugin/audio-preview/audioview.cpp                   | 1 +
+ 9 files changed, 9 insertions(+)
+
+diff --git a/src/grand-search-daemon/main.cpp b/src/grand-search-daemon/main.cpp
+index a2aada02..87bab961 100644
+--- a/src/grand-search-daemon/main.cpp
++++ b/src/grand-search-daemon/main.cpp
+@@ -10,6 +10,7 @@
+ #include <DApplication>
+ 
+ #include <QDebug>
++#include <QDir>
+ 
+ #include <unistd.h>
+ #include <signal.h>
+diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
+index 26cb863b..b71278c9 100755
+--- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
++++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
+@@ -28,6 +28,7 @@
+ #define TailMaxWidth              150      // 拖尾信息最大显示宽度
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ using namespace GrandSearch;
+ 
+ GrandSearchListDelegate::GrandSearchListDelegate(QAbstractItemView *parent)
+diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
+index f0133d41..d665d6b5 100755
+--- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
++++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
+@@ -20,6 +20,7 @@
+ 
+ using namespace GrandSearch;
+ DCORE_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ DWIDGET_USE_NAMESPACE
+ 
+ #define ICON_SIZE 24
+diff --git a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
+index 2d6acdc8..16e3a19e 100644
+--- a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
++++ b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
+@@ -25,6 +25,7 @@
+ using namespace GrandSearch;
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ 
+ NameLabel::NameLabel(const QString &text, QWidget *parent, Qt::WindowFlags f):
+     QLabel(text, parent, f)
+diff --git a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
+index 85e9d7cb..37606ebb 100644
+--- a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
++++ b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
+@@ -14,6 +14,7 @@
+ #include <DGuiApplicationHelper>
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ using namespace GrandSearch;
+ 
+ BestMatchWidget::BestMatchWidget(QWidget *parent)
+diff --git a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
+index 1473e4c8..eb634305 100644
+--- a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
++++ b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
+@@ -26,6 +26,7 @@ DCORE_USE_NAMESPACE
+ #define InitCount       7   // 初始显示数量
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ using namespace GrandSearch;
+ 
+ BlackListView::BlackListView(QWidget *parent)
+diff --git a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
+index 1449f095..ba0efbfa 100644
+--- a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
++++ b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
+@@ -12,6 +12,7 @@
+ #define ICONLABELSIZE   36
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ using namespace GrandSearch;
+ 
+ SwitchWidget::SwitchWidget(const QString &title, QWidget *parent)
+diff --git a/src/libgrand-search-daemon/main.cpp b/src/libgrand-search-daemon/main.cpp
+index 1dba137b..53aab763 100644
+--- a/src/libgrand-search-daemon/main.cpp
++++ b/src/libgrand-search-daemon/main.cpp
+@@ -13,6 +13,7 @@
+ #include <QDBusConnection>
+ #include <QDBusError>
+ #include <QDebug>
++#include <QThread>
+ 
+ GRANDSEARCH_USE_NAMESPACE
+ DCORE_USE_NAMESPACE
+diff --git a/src/preview-plugin/audio-preview/audioview.cpp b/src/preview-plugin/audio-preview/audioview.cpp
+index d50fde46..2e7d8cd7 100644
+--- a/src/preview-plugin/audio-preview/audioview.cpp
++++ b/src/preview-plugin/audio-preview/audioview.cpp
+@@ -19,6 +19,7 @@
+ #define MARGIN_SIZE             15
+ 
+ DWIDGET_USE_NAMESPACE
++DGUI_USE_NAMESPACE
+ GRANDSEARCH_USE_NAMESPACE
+ using namespace GrandSearch::audio_preview;
+ 
diff --git a/deepin-grand-search/PKGBUILD b/deepin-grand-search/PKGBUILD
index 469780e349..86f4e48c7d 100644
--- a/deepin-grand-search/PKGBUILD
+++ b/deepin-grand-search/PKGBUILD
@@ -13,7 +13,8 @@ depends=(deepin-anything deepin-application-manager taglib ffmpeg icu deepin-pdf
 makedepends=(cmake deepin-dock ninja qt5-tools)
 groups=(deepin-extra)
 source=("$pkgname-$pkgver.tar.gz::https://github.com/linuxdeepin/dde-grand-search/archive/$pkgver.tar.gz"
-         taglib-2.patch)
+         taglib-2.patch
+	 62.patch)
 sha512sums=('7d2bd203b9c0dfef57a0667690252a9b3b3f3b5b2e30f44f6706de0d98885908f21f982fd19257812b92a0564e4e7888f8a6789bee2aa5ac2c573a2cadf0b838'
             '8364cd5aa0350a7d109be7ce10035c6c4e3fd6686205bc880017b1fc93a10cff6e78a8f66daeb25427c416a6dc075482136146c9d8278aee6de71653673d59a0')
 
@@ -22,6 +23,7 @@ prepare() {
   # https://github.com/linuxdeepin/dde-grand-search/pull/65
   sed -i 's/-fPIE -pie//g' src/*/CMakeLists.txt
 
+  patch -p1 -i $srcdir/62.patch
   patch -p1 -i ../taglib-2.patch
 }
 
diff --git a/devtools/PKGBUILD b/devtools/PKGBUILD
index 9ec5c0b21a..1587c5f80a 100644
--- a/devtools/PKGBUILD
+++ b/devtools/PKGBUILD
@@ -34,11 +34,12 @@ depends=(
 )
 makedepends=(
   asciidoc
-  shellcheck
+#  shellcheck
 )
 optdepends=('btrfs-progs: btrfs support')
 replaces=(devtools-git-poc)
-source=(https://gitlab.archlinux.org/archlinux/devtools/-/releases/v${pkgver}/downloads/devtools-${pkgver}.tar.gz{,.sig})
+source=(https://gitlab.archlinux.org/archlinux/devtools/-/releases/v${pkgver}/downloads/devtools-${pkgver}.tar.gz{,.sig}
+    devtools-loong64-1.0.4.patch)
 validpgpkeys=(
   '4AA4767BBC9C4B1D18AE28B77F2D434B9741E8AC' # Pierre Schmitz <pierre@archlinux.org>
   '86CFFCA918CF3AF47147588051E8B148A9999C34' # Evangelos Foutras <foutrelis@archlinux.org>
@@ -49,9 +50,16 @@ validpgpkeys=(
   'E240B57E2C4630BA768E2F26FC1B547C8D8172C8' # Levente Polyak <anthraxx@archlinux.org>
 )
 sha256sums=('dded4b47f669751cf86367284c9adabbec92321f5fb0fc684d4fcc0e039c6719'
-            'SKIP')
+            'SKIP'
+            '1ad1e4de88d9f7e1ae2de9b7ab4a494634e39075ee04bb497241cc5e154d1a00')
 b2sums=('bfc3727fe70dbae1333d491a48342955230072830ab9cb7a308992eaba7127202ed6a3489398f86540ebc213b2d530d92ae7485fb02241a67f7d71c496088123'
-        'SKIP')
+        'SKIP'
+        '3cde8f2e4ca9e69b7d9a7159391896511dbd5b2f1f9f575c49c790ce330b8749eba6c7c1f69eebe33d632a245feacddd8fb84e7f79efbf5bab7d9ea55bf7d15f')
+
+prepare() {
+  cd ${pkgname}-${pkgver}
+  patch -p1 -i $srcdir/devtools-loong64-1.0.4.patch
+}
 
 build() {
   cd ${pkgname}-${pkgver}
diff --git a/devtools/devtools-loong64-1.0.4.patch b/devtools/devtools-loong64-1.0.4.patch
new file mode 100644
index 0000000000..36f76978f9
--- /dev/null
+++ b/devtools/devtools-loong64-1.0.4.patch
@@ -0,0 +1,898 @@
+diff --git a/Makefile b/Makefile
+index bfe80a8..20c3152 100644
+--- a/Makefile
++++ b/Makefile
+@@ -25,28 +25,41 @@ COMMITPKG_LINKS = \
+ 	extrapkg \
+ 	extra-testingpkg \
+ 	extra-stagingpkg \
++	laurpkg \
++	laur-testingpkg \
++	laur-stagingpkg \
+ 	multilibpkg \
+ 	multilib-testingpkg \
+ 	multilib-stagingpkg \
+ 	kde-unstablepkg \
+-	gnome-unstablepkg
++	gnome-unstablepkg \
++	wine-appspkg
+ 
+ ARCHBUILD_LINKS = \
++	core-testing-loong64-build \
+ 	core-testing-x86_64-build \
+ 	core-testing-x86_64_v3-build \
++	core-staging-loong64-build \
+ 	core-staging-x86_64-build \
+ 	core-staging-x86_64_v3-build \
++	extra-loong64-build \
+ 	extra-x86_64-build \
+ 	extra-x86_64_v3-build \
++	extra-testing-loong64-build \
+ 	extra-testing-x86_64-build \
+ 	extra-testing-x86_64_v3-build \
++	extra-staging-loong64-build \
+ 	extra-staging-x86_64-build \
+ 	extra-staging-x86_64_v3-build \
++	laur-loong64-build \
++	laur-testing-loong64-build \
++	laur-staging-loong64-build \
+ 	multilib-build \
+ 	multilib-testing-build \
+ 	multilib-staging-build \
+-	kde-unstable-x86_64-build \
+-	gnome-unstable-x86_64-build
++	kde-unstable-loong64-build kde-unstable-x86_64-build \
++	gnome-unstable-loong64-build gnome-unstable-x86_64-build \
++	wine-apps-loong64-build
+ 
+ COMPLETIONS = $(addprefix $(BUILDDIR)/,$(patsubst %.in,%,$(wildcard contrib/completion/*/*)))
+ 
+@@ -159,7 +172,7 @@ dist:
+ 	git archive --format=tar --prefix=devtools-$(V)/ v$(V) | gzip > devtools-$(V).tar.gz
+ 	gpg --detach-sign --use-agent devtools-$(V).tar.gz
+ 
+-check: $(BINPROGS_SRC) $(LIBRARY_SRC) contrib/completion/bash/devtools.in config/makepkg/x86_64.conf contrib/makepkg/PKGBUILD.proto
++check: $(BINPROGS_SRC) $(LIBRARY_SRC) contrib/completion/bash/devtools.in config/makepkg/loong64.conf config/makepkg/x86_64.conf contrib/makepkg/PKGBUILD.proto
+ 	shellcheck $^
+ 
+ .PHONY: all binprogs library completion conf man clean install uninstall tag dist upload check
+diff --git a/config/makepkg/loong64.conf b/config/makepkg/loong64.conf
+new file mode 100644
+index 0000000..c1b2987
+--- /dev/null
++++ b/config/makepkg/loong64.conf
+@@ -0,0 +1,162 @@
++#!/hint/bash
++# shellcheck disable=2034
++
++#
++# /etc/makepkg.conf
++#
++
++#########################################################################
++# SOURCE ACQUISITION
++#########################################################################
++#
++#-- The download utilities that makepkg should use to acquire sources
++#  Format: 'protocol::agent'
++DLAGENTS=('file::/usr/bin/curl -qgC - -o %o %u'
++          'ftp::/usr/bin/curl -qgfC - --ftp-pasv --retry 3 --retry-delay 3 -o %o %u'
++          'http::/usr/bin/curl -qgb "" -fLC - --retry 3 --retry-delay 3 -o %o %u'
++          'https::/usr/bin/curl -qgb "" -fLC - --retry 3 --retry-delay 3 -o %o %u'
++          'rsync::/usr/bin/rsync --no-motd -z %u %o'
++          'scp::/usr/bin/scp -C %u %o')
++
++# Other common tools:
++# /usr/bin/snarf
++# /usr/bin/lftpget -c
++# /usr/bin/wget
++
++#-- The package required by makepkg to download VCS sources
++#  Format: 'protocol::package'
++VCSCLIENTS=('bzr::bzr'
++            'fossil::fossil'
++            'git::git'
++            'hg::mercurial'
++            'svn::subversion')
++
++#########################################################################
++# ARCHITECTURE, COMPILE FLAGS
++#########################################################################
++#
++CARCH="loong64"
++CHOST="loongarch64-unknown-linux-gnu"
++
++#-- Compiler and Linker Flags
++#CPPFLAGS=""
++CFLAGS="-mabi=lp64d -march=loongarch64 -mlsx -O2 -pipe -fno-plt -fexceptions \
++        -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security \
++        -fstack-clash-protection"
++CXXFLAGS="$CFLAGS -Wp,-D_GLIBCXX_ASSERTIONS"
++LDFLAGS="-Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now"
++LTOFLAGS="-flto=auto"
++RUSTFLAGS=""
++#-- Make Flags: change this for DistCC/SMP systems
++#MAKEFLAGS="-j2"
++#-- Debugging flags
++DEBUG_CFLAGS="-g"
++DEBUG_CXXFLAGS="$DEBUG_CFLAGS"
++DEBUG_RUSTFLAGS="-C debuginfo=2"
++
++#########################################################################
++# BUILD ENVIRONMENT
++#########################################################################
++#
++# Makepkg defaults: BUILDENV=(!distcc !color !ccache check !sign)
++#  A negated environment option will do the opposite of the comments below.
++#
++#-- distcc:   Use the Distributed C/C++/ObjC compiler
++#-- color:    Colorize output messages
++#-- ccache:   Use ccache to cache compilation
++#-- check:    Run the check() function if present in the PKGBUILD
++#-- sign:     Generate PGP signature file
++#
++BUILDENV=(!distcc color !ccache check !sign)
++#
++#-- If using DistCC, your MAKEFLAGS will also need modification. In addition,
++#-- specify a space-delimited list of hosts running in the DistCC cluster.
++#DISTCC_HOSTS=""
++#
++#-- Specify a directory for package building.
++#BUILDDIR=/tmp/makepkg
++
++#########################################################################
++# GLOBAL PACKAGE OPTIONS
++#   These are default values for the options=() settings
++#########################################################################
++#
++# Makepkg defaults: OPTIONS=(!strip docs libtool staticlibs emptydirs !zipman !purge !debug !lto)
++#  A negated option will do the opposite of the comments below.
++#
++#-- strip:      Strip symbols from binaries/libraries
++#-- docs:       Save doc directories specified by DOC_DIRS
++#-- libtool:    Leave libtool (.la) files in packages
++#-- staticlibs: Leave static library (.a) files in packages
++#-- emptydirs:  Leave empty directories in packages
++#-- zipman:     Compress manual (man and info) pages in MAN_DIRS with gzip
++#-- purge:      Remove files specified by PURGE_TARGETS
++#-- debug:      Add debugging flags as specified in DEBUG_* variables
++#-- lto:        Add compile flags for building with link time optimization
++#
++OPTIONS=(strip docs !libtool !staticlibs emptydirs zipman purge debug lto)
++
++#-- File integrity checks to use. Valid: md5, sha1, sha224, sha256, sha384, sha512, b2
++INTEGRITY_CHECK=(sha256)
++#-- Options to be used when stripping binaries. See `man strip' for details.
++STRIP_BINARIES="--strip-all"
++#-- Options to be used when stripping shared libraries. See `man strip' for details.
++STRIP_SHARED="--strip-unneeded"
++#-- Options to be used when stripping static libraries. See `man strip' for details.
++STRIP_STATIC="--strip-debug"
++#-- Manual (man and info) directories to compress (if zipman is specified)
++MAN_DIRS=({usr{,/local}{,/share},opt/*}/{man,info})
++#-- Doc directories to remove (if !docs is specified)
++DOC_DIRS=(usr/{,local/}{,share/}{doc,gtk-doc} opt/*/{doc,gtk-doc})
++#-- Files to be removed from all packages (if purge is specified)
++PURGE_TARGETS=(usr/{,share}/info/dir .packlist *.pod)
++#-- Directory to store source code in for debug packages
++DBGSRCDIR="/usr/src/debug"
++
++#########################################################################
++# PACKAGE OUTPUT
++#########################################################################
++#
++# Default: put built package and cached source in build directory
++#
++#-- Destination: specify a fixed directory where all packages will be placed
++#PKGDEST=/home/packages
++#-- Source cache: specify a fixed directory where source files will be cached
++#SRCDEST=/home/sources
++#-- Source packages: specify a fixed directory where all src packages will be placed
++#SRCPKGDEST=/home/srcpackages
++#-- Log files: specify a fixed directory where all log files will be placed
++#LOGDEST=/home/makepkglogs
++#-- Packager: name/email of the person or organization building packages
++#PACKAGER="John Doe <john@doe.com>"
++#-- Specify a key to use for package signing
++#GPGKEY=""
++
++#########################################################################
++# COMPRESSION DEFAULTS
++#########################################################################
++#
++COMPRESSGZ=(gzip -c -f -n)
++COMPRESSBZ2=(bzip2 -c -f)
++COMPRESSXZ=(xz -c -z -)
++COMPRESSZST=(zstd -c -T0 --ultra -20 -)
++COMPRESSLRZ=(lrzip -q)
++COMPRESSLZO=(lzop -q)
++COMPRESSZ=(compress -c -f)
++COMPRESSLZ4=(lz4 -q)
++COMPRESSLZ=(lzip -c -f)
++
++#########################################################################
++# EXTENSION DEFAULTS
++#########################################################################
++#
++PKGEXT='.pkg.tar.zst'
++SRCEXT='.src.tar.gz'
++
++#########################################################################
++# OTHER
++#########################################################################
++#
++#-- Command used to run pacman as root, instead of trying sudo and su
++#PACMAN_AUTH=()
++# vim: set ft=sh ts=2 sw=2 et:
+diff --git a/config/pacman/laur-staging.conf b/config/pacman/laur-staging.conf
+new file mode 100644
+index 0000000..1726921
+--- /dev/null
++++ b/config/pacman/laur-staging.conf
+@@ -0,0 +1,104 @@
++#
++# /etc/pacman.conf
++#
++# See the pacman.conf(5) manpage for option and repository directives
++
++#
++# GENERAL OPTIONS
++#
++[options]
++# The following paths are commented out with their default values listed.
++# If you wish to use different paths, uncomment and update the paths.
++#RootDir     = /
++#DBPath      = /var/lib/pacman/
++#CacheDir    = /var/cache/pacman/pkg/
++#LogFile     = /var/log/pacman.log
++#GPGDir      = /etc/pacman.d/gnupg/
++#HookDir     = /etc/pacman.d/hooks/
++HoldPkg     = pacman glibc
++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u
++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u
++#CleanMethod = KeepInstalled
++Architecture = auto
++
++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup
++#IgnorePkg   =
++#IgnoreGroup =
++
++#NoUpgrade   =
++#NoExtract   =
++
++# Misc options
++#UseSyslog
++#Color
++NoProgressBar
++# We cannot check disk space from within a chroot environment
++#CheckSpace
++VerbosePkgLists
++ParallelDownloads = 5
++
++# By default, pacman accepts packages signed by keys that its local keyring
++# trusts (see pacman-key and its man page), as well as unsigned packages.
++SigLevel    = Required DatabaseOptional
++LocalFileSigLevel = Optional
++#RemoteFileSigLevel = Required
++
++# NOTE: You must run `pacman-key --init` before first using pacman; the local
++# keyring can then be populated with the keys of all official Arch Linux
++# packagers with `pacman-key --populate archlinux`.
++
++#
++# REPOSITORIES
++#   - can be defined here or included from another file
++#   - pacman will search repositories in the order defined here
++#   - local/custom mirrors can be added here or in separate files
++#   - repositories listed first will take precedence when packages
++#     have identical names, regardless of version number
++#   - URLs will have $repo replaced by the name of the current repo
++#   - URLs will have $arch replaced by the name of the architecture
++#
++# Repository entries are of the format:
++#       [repo-name]
++#       Server = ServerName
++#       Include = IncludePath
++#
++# The header [repo-name] is crucial - it must be present and
++# uncommented to enable the repo.
++#
++
++# The testing repositories are disabled by default. To enable, uncomment the
++# repo name header and Include lines. You can add preferred servers immediately
++# after the header, and they will be used before the default mirrors.
++
++[core-staging]
++Include = /etc/pacman.d/mirrorlist
++
++[core-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[core]
++Include = /etc/pacman.d/mirrorlist
++
++[extra-staging]
++Include = /etc/pacman.d/mirrorlist
++
++[extra-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[extra]
++Include = /etc/pacman.d/mirrorlist
++
++[laur-staging]
++Include = /etc/pacman.d/mirrorlist
++
++[laur-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[laur]
++Include = /etc/pacman.d/mirrorlist
++
++# An example of a custom package repository.  See the pacman manpage for
++# tips on creating your own repositories.
++#[custom]
++#SigLevel = Optional TrustAll
++#Server = file:///home/custompkgs
+diff --git a/config/pacman/laur-testing.conf b/config/pacman/laur-testing.conf
+new file mode 100644
+index 0000000..bbe7791
+--- /dev/null
++++ b/config/pacman/laur-testing.conf
+@@ -0,0 +1,95 @@
++#
++# /etc/pacman.conf
++#
++# See the pacman.conf(5) manpage for option and repository directives
++
++#
++# GENERAL OPTIONS
++#
++[options]
++# The following paths are commented out with their default values listed.
++# If you wish to use different paths, uncomment and update the paths.
++#RootDir     = /
++#DBPath      = /var/lib/pacman/
++#CacheDir    = /var/cache/pacman/pkg/
++#LogFile     = /var/log/pacman.log
++#GPGDir      = /etc/pacman.d/gnupg/
++#HookDir     = /etc/pacman.d/hooks/
++HoldPkg     = pacman glibc
++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u
++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u
++#CleanMethod = KeepInstalled
++Architecture = auto
++
++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup
++#IgnorePkg   =
++#IgnoreGroup =
++
++#NoUpgrade   =
++#NoExtract   =
++
++# Misc options
++#UseSyslog
++#Color
++NoProgressBar
++# We cannot check disk space from within a chroot environment
++#CheckSpace
++VerbosePkgLists
++ParallelDownloads = 5
++
++# By default, pacman accepts packages signed by keys that its local keyring
++# trusts (see pacman-key and its man page), as well as unsigned packages.
++SigLevel    = Required DatabaseOptional
++LocalFileSigLevel = Optional
++#RemoteFileSigLevel = Required
++
++# NOTE: You must run `pacman-key --init` before first using pacman; the local
++# keyring can then be populated with the keys of all official Arch Linux
++# packagers with `pacman-key --populate archlinux`.
++
++#
++# REPOSITORIES
++#   - can be defined here or included from another file
++#   - pacman will search repositories in the order defined here
++#   - local/custom mirrors can be added here or in separate files
++#   - repositories listed first will take precedence when packages
++#     have identical names, regardless of version number
++#   - URLs will have $repo replaced by the name of the current repo
++#   - URLs will have $arch replaced by the name of the architecture
++#
++# Repository entries are of the format:
++#       [repo-name]
++#       Server = ServerName
++#       Include = IncludePath
++#
++# The header [repo-name] is crucial - it must be present and
++# uncommented to enable the repo.
++#
++
++# The testing repositories are disabled by default. To enable, uncomment the
++# repo name header and Include lines. You can add preferred servers immediately
++# after the header, and they will be used before the default mirrors.
++
++[core-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[core]
++Include = /etc/pacman.d/mirrorlist
++
++[extra-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[extra]
++Include = /etc/pacman.d/mirrorlist
++
++[laur-testing]
++Include = /etc/pacman.d/mirrorlist
++
++[laur]
++Include = /etc/pacman.d/mirrorlist
++
++# An example of a custom package repository.  See the pacman manpage for
++# tips on creating your own repositories.
++#[custom]
++#SigLevel = Optional TrustAll
++#Server = file:///home/custompkgs
+diff --git a/config/pacman/laur.conf b/config/pacman/laur.conf
+new file mode 100644
+index 0000000..6c50d86
+--- /dev/null
++++ b/config/pacman/laur.conf
+@@ -0,0 +1,92 @@
++#
++# /etc/pacman.conf
++#
++# See the pacman.conf(5) manpage for option and repository directives
++
++#
++# GENERAL OPTIONS
++#
++[options]
++# The following paths are commented out with their default values listed.
++# If you wish to use different paths, uncomment and update the paths.
++#RootDir     = /
++#DBPath      = /var/lib/pacman/
++#CacheDir    = /var/cache/pacman/pkg/
++#LogFile     = /var/log/pacman.log
++#GPGDir      = /etc/pacman.d/gnupg/
++#HookDir     = /etc/pacman.d/hooks/
++HoldPkg     = pacman glibc
++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u
++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u
++#CleanMethod = KeepInstalled
++Architecture = auto
++
++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup
++#IgnorePkg   =
++#IgnoreGroup =
++
++#NoUpgrade   =
++#NoExtract   =
++
++# Misc options
++#UseSyslog
++#Color
++NoProgressBar
++# We cannot check disk space from within a chroot environment
++#CheckSpace
++VerbosePkgLists
++ParallelDownloads = 5
++
++# By default, pacman accepts packages signed by keys that its local keyring
++# trusts (see pacman-key and its man page), as well as unsigned packages.
++SigLevel    = Required DatabaseOptional
++LocalFileSigLevel = Optional
++#RemoteFileSigLevel = Required
++
++# NOTE: You must run `pacman-key --init` before first using pacman; the local
++# keyring can then be populated with the keys of all official Arch Linux
++# packagers with `pacman-key --populate archlinux`.
++
++#
++# REPOSITORIES
++#   - can be defined here or included from another file
++#   - pacman will search repositories in the order defined here
++#   - local/custom mirrors can be added here or in separate files
++#   - repositories listed first will take precedence when packages
++#     have identical names, regardless of version number
++#   - URLs will have $repo replaced by the name of the current repo
++#   - URLs will have $arch replaced by the name of the architecture
++#
++# Repository entries are of the format:
++#       [repo-name]
++#       Server = ServerName
++#       Include = IncludePath
++#
++# The header [repo-name] is crucial - it must be present and
++# uncommented to enable the repo.
++#
++
++# The testing repositories are disabled by default. To enable, uncomment the
++# repo name header and Include lines. You can add preferred servers immediately
++# after the header, and they will be used before the default mirrors.
++
++#[core-testing]
++#Include = /etc/pacman.d/mirrorlist
++
++[core]
++Include = /etc/pacman.d/mirrorlist
++
++#[extra-testing]
++#Include = /etc/pacman.d/mirrorlist
++
++[extra]
++Include = /etc/pacman.d/mirrorlist
++
++[laur]
++Include = /etc/pacman.d/mirrorlist
++
++# An example of a custom package repository.  See the pacman manpage for
++# tips on creating your own repositories.
++#[custom]
++#SigLevel = Optional TrustAll
++#Server = file:///home/custompkgs
+diff --git a/config/pacman/multilib.conf b/config/pacman/multilib.conf
+index e09a9d5..496d84b 100644
+--- a/config/pacman/multilib.conf
++++ b/config/pacman/multilib.conf
+@@ -88,7 +88,7 @@ Include = /etc/pacman.d/mirrorlist
+ #[multilib-testing]
+ #Include = /etc/pacman.d/mirrorlist
+ 
+-[multilib]
++[laur]
+ Include = /etc/pacman.d/mirrorlist
+ 
+ # An example of a custom package repository.  See the pacman manpage for
+diff --git a/config/setarch-aliases.d/loong64 b/config/setarch-aliases.d/loong64
+new file mode 100644
+index 0000000..a4d6d47
+--- /dev/null
++++ b/config/setarch-aliases.d/loong64
+@@ -0,0 +1 @@
++loongarch64
+diff --git a/src/archbuild.in b/src/archbuild.in
+index 2f3faf9..c977a24 100644
+--- a/src/archbuild.in
++++ b/src/archbuild.in
+@@ -15,7 +15,7 @@ makechrootpkg_args=(-c -n -C)
+ cmd="${0##*/}"
+ if [[ "${cmd%%-*}" == 'multilib' ]]; then
+ 	repo="${cmd%-build}"
+-	arch='x86_64'
++	arch='loong64'
+ 	base_packages+=(multilib-devel)
+ else
+ 	tag="${cmd%-build}"
+diff --git a/src/archrelease.in b/src/archrelease.in
+index 818b0ca..d1e277b 100644
+--- a/src/archrelease.in
++++ b/src/archrelease.in
+@@ -45,50 +45,50 @@ if [[ ! -f PKGBUILD ]]; then
+ 	die 'archrelease: PKGBUILD not found'
+ fi
+ 
+-# shellcheck source=contrib/makepkg/PKGBUILD.proto
+-. ./PKGBUILD
+-pkgbase=${pkgbase:-$pkgname}
+-pkgver=$(get_full_version "$pkgbase")
+-gittag=$(get_tag_from_pkgver "$pkgver")
+-
+-# Check if releasing from a branch
+-if ! branchname=$(git symbolic-ref --short HEAD); then
+-	die 'not on any branch'
+-fi
+-if [[ "${branchname}" != main ]]; then
+-	die 'must be run from the main branch'
+-fi
+-
+-# Check if remote origin is setup properly
+-if ! giturl=$(git remote get-url origin) || [[ ${giturl} != *${GIT_PACKAGING_URL_SSH}* ]]; then
+-	die "remote origin is not configured, run 'pkgctl repo configure'"
+-fi
+-if ! git ls-remote origin >/dev/null; then
+-	die "configured remote origin may not exist, run 'pkgctl repo create ${pkgbase}' to create it"
+-fi
+-
+-msg 'Fetching remote changes'
+-git fetch --prune --prune-tags origin || die 'failed to fetch remote changes'
+-
+-# Check if local branch is up to date and contains the latest origin commit
+-if remoteref=$(git rev-parse "origin/${branchname}" 2>/dev/null); then
+-	if [[ $(git branch "${branchname}" --contains "${remoteref}" --format '%(refname:short)') != "${branchname}" ]]; then
+-		die "local branch is out of date, run 'git pull --rebase'"
+-	fi
+-fi
+-
+-# If the tag exists we check if it's properly signed and that it
+-# matches the working directory PKGBUILD.
+-if git tag --verify "$gittag" &> /dev/null; then
+-	cwd_checksum=$(sha256sum PKGBUILD|cut -d' ' -f1)
+-	tag_checksum=$(git show "${gittag}:PKGBUILD" | sha256sum |cut -d' ' -f1)
+-	if [[ "$cwd_checksum" != "$tag_checksum" ]]; then
+-		die "tagged PKGBUILD is not the same as the working dir PKGBUILD"
+-	fi
+-	git push --tags --set-upstream origin main || abort
+-	exit 0
+-fi
+-
+-msg "Releasing package"
+-git tag --sign --message="Package release ${pkgver}" "$gittag" || abort
+-git push --tags --set-upstream origin main || abort
++## shellcheck source=contrib/makepkg/PKGBUILD.proto
++#. ./PKGBUILD
++#pkgbase=${pkgbase:-$pkgname}
++#pkgver=$(get_full_version "$pkgbase")
++#gittag=$(get_tag_from_pkgver "$pkgver")
++#
++## Check if releasing from a branch
++#if ! branchname=$(git symbolic-ref --short HEAD); then
++#	die 'not on any branch'
++#fi
++#if [[ "${branchname}" != main ]]; then
++#	die 'must be run from the main branch'
++#fi
++#
++## Check if remote origin is setup properly
++#if ! giturl=$(git remote get-url origin) || [[ ${giturl} != *${GIT_PACKAGING_URL_SSH}* ]]; then
++#	die "remote origin is not configured, run 'pkgctl repo configure'"
++#fi
++#if ! git ls-remote origin >/dev/null; then
++#	die "configured remote origin may not exist, run 'pkgctl repo create ${pkgbase}' to create it"
++#fi
++#
++#msg 'Fetching remote changes'
++#git fetch --prune --prune-tags origin || die 'failed to fetch remote changes'
++#
++## Check if local branch is up to date and contains the latest origin commit
++#if remoteref=$(git rev-parse "origin/${branchname}" 2>/dev/null); then
++#	if [[ $(git branch "${branchname}" --contains "${remoteref}" --format '%(refname:short)') != "${branchname}" ]]; then
++#		die "local branch is out of date, run 'git pull --rebase'"
++#	fi
++#fi
++#
++## If the tag exists we check if it's properly signed and that it
++## matches the working directory PKGBUILD.
++#if git tag --verify "$gittag" &> /dev/null; then
++#	cwd_checksum=$(sha256sum PKGBUILD|cut -d' ' -f1)
++#	tag_checksum=$(git show "${gittag}:PKGBUILD" | sha256sum |cut -d' ' -f1)
++#	if [[ "$cwd_checksum" != "$tag_checksum" ]]; then
++#		die "tagged PKGBUILD is not the same as the working dir PKGBUILD"
++#	fi
++#	git push --tags --set-upstream origin main || abort
++#	exit 0
++#fi
++#
++#msg "Releasing package"
++#git tag --sign --message="Package release ${pkgver}" "$gittag" || abort
++#git push --tags --set-upstream origin main || abort
+diff --git a/src/commitpkg.in b/src/commitpkg.in
+index f979d61..0b1226f 100644
+--- a/src/commitpkg.in
++++ b/src/commitpkg.in
+@@ -128,17 +128,17 @@ for key in "${validpgpkeys[@]}"; do
+ done
+ 
+ # assert that they really are controlled by git
+-if (( ${#needsversioning[*]} )); then
+-	for file in "${needsversioning[@]}"; do
+-		# skip none existing files
+-		if [[ ! -f "${file}" ]]; then
+-			continue
+-		fi
+-		if ! git ls-files --error-unmatch "$file"; then
+-			die "%s is not under version control" "$file"
+-		fi
+-	done
+-fi
++#if (( ${#needsversioning[*]} )); then
++#	for file in "${needsversioning[@]}"; do
++#		# skip none existing files
++#		if [[ ! -f "${file}" ]]; then
++#			continue
++#		fi
++#		if ! git ls-files --error-unmatch "$file"; then
++#			die "%s is not under version control" "$file"
++#		fi
++#	done
++#fi
+ 
+ 
+ server=${PACKAGING_REPO_RELEASE_HOST}
+@@ -176,51 +176,51 @@ for _arch in "${arch[@]}"; do
+ done
+ 
+ # check for PKGBUILD standards
+-check_pkgbuild_validity
++#check_pkgbuild_validity
+ 
+ # auto generate .SRCINFO
+-stat_busy 'Generating .SRCINFO'
+-write_srcinfo_content > .SRCINFO
+-git add --force .SRCINFO
+-stat_done
+-
+-if [[ -n $(git status --porcelain --untracked-files=no) ]]; then
+-	stat_busy 'Staging files'
+-	for f in $(git ls-files --modified); do
+-		git add "$f"
+-	done
+-	for f in $(git ls-files --deleted); do
+-		git rm "$f"
+-	done
+-	stat_done
+-
+-	msgtemplate="upgpkg: $(get_full_version)"
+-	if [[ -n $1 ]]; then
+-		stat_busy 'Committing changes'
+-		git commit -q -m "${msgtemplate}: ${1}" || die
+-		stat_done
+-	else
+-		[[ -z ${WORKDIR:-} ]] && setup_workdir
+-		msgfile=$(mktemp --tmpdir="${WORKDIR}" commitpkg.XXXXXXXXXX)
+-		echo "$msgtemplate" > "$msgfile"
+-		if [[ -n $GIT_EDITOR ]]; then
+-			$GIT_EDITOR "$msgfile" || die
+-		elif [[ -n $VISUAL ]]; then
+-			$VISUAL "$msgfile" || die
+-		elif [[ -n $EDITOR ]]; then
+-			$EDITOR "$msgfile" || die
+-		elif giteditor=$(git config --get core.editor); then
+-			$giteditor "$msgfile" || die
+-		else
+-			die "No usable editor found (tried \$GIT_EDITOR, \$VISUAL, \$EDITOR, git config [core.editor])."
+-		fi
+-		[[ -s $msgfile ]] || die
+-		stat_busy 'Committing changes'
+-		git commit -v -q -F "$msgfile" || die
+-		unlink "$msgfile"
+-		stat_done
+-	fi
+-fi
++#stat_busy 'Generating .SRCINFO'
++#write_srcinfo_content > .SRCINFO
++#git add --force .SRCINFO
++#stat_done
++
++#if [[ -n $(git status --porcelain --untracked-files=no) ]]; then
++#	stat_busy 'Staging files'
++#	for f in $(git ls-files --modified); do
++#		git add "$f"
++#	done
++#	for f in $(git ls-files --deleted); do
++#		git rm "$f"
++#	done
++#	stat_done
++#
++#	msgtemplate="upgpkg: $(get_full_version)"
++#	if [[ -n $1 ]]; then
++#		stat_busy 'Committing changes'
++#		git commit -q -m "${msgtemplate}: ${1}" || die
++#		stat_done
++#	else
++#		[[ -z ${WORKDIR:-} ]] && setup_workdir
++#		msgfile=$(mktemp --tmpdir="${WORKDIR}" commitpkg.XXXXXXXXXX)
++#		echo "$msgtemplate" > "$msgfile"
++#		if [[ -n $GIT_EDITOR ]]; then
++#			$GIT_EDITOR "$msgfile" || die
++#		elif [[ -n $VISUAL ]]; then
++#			$VISUAL "$msgfile" || die
++#		elif [[ -n $EDITOR ]]; then
++#			$EDITOR "$msgfile" || die
++#		elif giteditor=$(git config --get core.editor); then
++#			$giteditor "$msgfile" || die
++#		else
++#			die "No usable editor found (tried \$GIT_EDITOR, \$VISUAL, \$EDITOR, git config [core.editor])."
++#		fi
++#		[[ -s $msgfile ]] || die
++#		stat_busy 'Committing changes'
++#		git commit -v -q -F "$msgfile" || die
++#		unlink "$msgfile"
++#		stat_done
++#	fi
++#fi
+ 
+ declare -a uploads
+ declare -a commit_arches
+diff --git a/src/lib/build/build.sh b/src/lib/build/build.sh
+index a5a272d..ec7af1a 100644
+--- a/src/lib/build/build.sh
++++ b/src/lib/build/build.sh
+@@ -319,6 +319,9 @@ pkgctl_build() {
+ 				BUILD_ARCH=("${_arch[0]}")
+ 			else
+ 				for loop_arch in "${arch[@]}"; do 
++					if in_array "${loop_arch}" "x86_64"; then
++						continue
++					fi
+ 					if in_array "${loop_arch}" "${_arch[@]}"; then
+ 						BUILD_ARCH+=("$loop_arch")
+ 					else
+diff --git a/src/lib/valid-repos.sh b/src/lib/valid-repos.sh
+index 14f90ce..21a03ec 100644
+--- a/src/lib/valid-repos.sh
++++ b/src/lib/valid-repos.sh
+@@ -7,16 +7,20 @@
+ _repos=(
+ 	core core-staging core-testing
+ 	extra extra-staging extra-testing
++	laur laur-staging laur-testing
+ 	multilib multilib-staging multilib-testing
+ 	gnome-unstable
+ 	kde-unstable
++	wine-apps
+ )
+ 
+ # shellcheck disable=2034
+ _build_repos=(
+ 	core-staging core-testing
+ 	extra extra-staging extra-testing
++	laur laur-staging laur-testing
+ 	multilib multilib-staging multilib-testing
+ 	gnome-unstable
+ 	kde-unstable
++	wine-apps
+ )
+diff --git a/src/lib/valid-tags.sh b/src/lib/valid-tags.sh
+index ca8d7d7..c2397e2 100644
+--- a/src/lib/valid-tags.sh
++++ b/src/lib/valid-tags.sh
+@@ -5,21 +5,26 @@
+ 
+ # shellcheck disable=2034
+ _arch=(
++	loong64
+ 	x86_64
+ 	any
+ )
+ 
+ # shellcheck disable=2034
+ _tags=(
+-	core-x86_64 core-any
+-	core-staging-x86_64 core-staging-any
+-	core-testing-x86_64 core-testing-any
+-	extra-x86_64 extra-any
+-	extra-staging-x86_64 extra-staging-any
+-	extra-testing-x86_64 extra-testing-any
++	core-loong64 core-x86_64 core-any
++	core-staging-loong64 core-staging-x86_64 core-staging-any
++	core-testing-loong64 core-testing-x86_64 core-testing-any
++	extra-loong64 extra-x86_64 extra-any
++	extra-staging-loong64 extra-staging-x86_64 extra-staging-any
++	extra-testing-loong64 extra-testing-x86_64 extra-testing-any
++	laur-loong64
++	laur-testing-loong64
++	laur-staging-loong64
+ 	multilib-x86_64
+ 	multilib-testing-x86_64
+ 	multilib-staging-x86_64
+-	kde-unstable-x86_64 kde-unstable-any
+-	gnome-unstable-x86_64 gnome-unstable-any
++	kde-unstable-loong64 kde-unstable-x86_64 kde-unstable-any
++	gnome-unstable-loong64 gnome-unstable-x86_64 gnome-unstable-any
++	wine-apps-loong64 wine-apps-any
+ )
+diff --git a/src/makechrootpkg.in b/src/makechrootpkg.in
+index 2cfd849..c1b00af 100644
+--- a/src/makechrootpkg.in
++++ b/src/makechrootpkg.in
+@@ -14,7 +14,7 @@ source /usr/share/makepkg/util/config.sh
+ 
+ shopt -s nullglob
+ 
+-default_makepkg_args=(--syncdeps --noconfirm --log --holdver --skipinteg)
++default_makepkg_args=(--syncdeps --noconfirm --log --holdver --skipinteg --skippgpcheck)
+ makepkg_args=("${default_makepkg_args[@]}")
+ verifysource_args=()
+ chrootdir=
+@@ -241,7 +241,7 @@ download_sources() {
+ 	# Ensure sources are downloaded
+ 	sudo -u "$makepkg_user" --preserve-env=GNUPGHOME,SSH_AUTH_SOCK \
+ 		env SRCDEST="$SRCDEST" BUILDDIR="$WORKDIR" \
+-		makepkg --config="$copydir/etc/makepkg.conf" --verifysource -o "${verifysource_args[@]}" ||
++		makepkg --config="$copydir/etc/makepkg.conf" --skippgpcheck --verifysource -o "${verifysource_args[@]}" ||
+ 		die "Could not download sources."
+ }
+ 
diff --git a/dfrs/PKGBUILD b/dfrs/PKGBUILD
index ab19daf01f..ec452fbbc6 100644
--- a/dfrs/PKGBUILD
+++ b/dfrs/PKGBUILD
@@ -10,18 +10,31 @@ license=('MIT')
 depends=('glibc' 'gcc-libs')
 makedepends=('cargo' 'scdoc')
 source=(${url}/archive/${pkgver}/${pkgname}-${pkgver}.tar.gz
-        ${url}/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.gz.sig)
+        ${url}/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.gz.sig
+        dfrs-la64.patch)
 options=('!makeflags')
 sha512sums=('8b8f67ff919e4f1012fe415b564574fe9b11caf01da90a025ca8b8c7707e3f90e865e30cfbeb24db7b80829d082ba7accce44e1897a67e39d129d94c171f21fe'
-            'SKIP')
+            'SKIP'
+            '6464137f5f049a9d5b5c196bd459932726f572dffd4115a1b855753f739e87c5c994c24637920c5f173dd63af250827e0be99cdd6916608555f377cf0dac7500')
 b2sums=('c051c1d712811d2b9c5273ec4b89bc54f8bc10a07a18a99c9908f7c58025b99e74c0d06ce9d5ec6029eda4250befbd52d3e269ec2c6fb1a160b06b4f3b019c8d'
-        'SKIP')
+        'SKIP'
+        '373b428a11de3ed16a5d6992e48ebf4b92a370e669a3e17457e8ff33dd0215a8ba1efcb3e99975f7f24ac7030e9780ab5a3495ddcdf95c6a4a12c5fb0a6ba220')
 validpgpkeys=(
   'E240B57E2C4630BA768E2F26FC1B547C8D8172C8'  # Levente Polyak
 )
 
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/dfrs-la64.patch"
+}
 build() {
   cd ${pkgname}-${pkgver}
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
   make
 }
 
diff --git a/dfrs/dfrs-la64.patch b/dfrs/dfrs-la64.patch
new file mode 100644
index 0000000000..188d771e57
--- /dev/null
+++ b/dfrs/dfrs-la64.patch
@@ -0,0 +1,13 @@
+Index: dfrs-0.0.7/Makefile
+===================================================================
+--- dfrs-0.0.7.orig/Makefile
++++ dfrs-0.0.7/Makefile
+@@ -19,7 +19,7 @@ SED := sed
+ 
+ DEBUG := 0
+ ifeq ($(DEBUG), 0)
+-	CARGO_OPTIONS := --release --locked
++	CARGO_OPTIONS := --release
+ 	CARGO_TARGET := release
+ else
+ 	CARGO_OPTIONS :=
diff --git a/dhcp/PKGBUILD b/dhcp/PKGBUILD
index eaf8a5ee3c..e443d9b00d 100644
--- a/dhcp/PKGBUILD
+++ b/dhcp/PKGBUILD
@@ -38,6 +38,7 @@ prepare() {
 
 build() {
   cd "${srcdir}/${pkgbase}-${_realver}"
+  autoreconf
 
   ./configure \
       --prefix=/usr \
@@ -52,6 +53,15 @@ build() {
       --with-cli-lease-file=/var/lib/dhclient/dhclient.leases \
       --with-cli6-lease-file=/var/lib/dhclient/dhclient6.leases
 
+  (
+   cd bind
+   tar xf bind.tar.gz
+   cp -f /usr/share/automake-1.16/config.sub bind-9.11.36/config.sub
+   cp -f /usr/share/automake-1.16/config.guess bind-9.11.36/config.guess
+   tar zcf bind.tar.gz bind-9.11.36
+   rm -rf bind-9.11.36
+  )
+
   make -j1
 }
 
diff --git a/diesel-cli/PKGBUILD b/diesel-cli/PKGBUILD
index 939b5ff192..54a333f5ce 100644
--- a/diesel-cli/PKGBUILD
+++ b/diesel-cli/PKGBUILD
@@ -24,7 +24,7 @@ b2sums=('bb784689cb14847c10300de6eaa4f69868a75f0fd1eacb1d84119c185adf6fdb49d25c5
 prepare() {
   cp Cargo.lock "diesel-${pkgver}"
   cd "diesel-${pkgver}/diesel_cli"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 updlockfiles() {
diff --git a/discover/PKGBUILD b/discover/PKGBUILD
index eb6f336851..9144f60388 100644
--- a/discover/PKGBUILD
+++ b/discover/PKGBUILD
@@ -9,7 +9,7 @@ arch=(loong64 x86_64)
 url='https://kde.org/plasma-desktop/'
 license=(LGPL)
 depends=(knewstuff5 kidletime5 qt5-graphicaleffects appstream-qt5 archlinux-appstream-data
-         kirigami2 discount kuserfeedback5 purpose5 qt5-webview)
+         kirigami2 discount kuserfeedback5 purpose5) # qt5-webview)
 makedepends=(extra-cmake-modules plasma-framework5 packagekit-qt5 flatpak fwupd)
 optdepends=('packagekit-qt5: to manage packages from Arch Linux repositories (not recommended, use at your own risk)'
             'flatpak: Flatpak packages support'
diff --git a/diskonaut/PKGBUILD b/diskonaut/PKGBUILD
index 1bd95a9066..2c163ec2ac 100644
--- a/diskonaut/PKGBUILD
+++ b/diskonaut/PKGBUILD
@@ -13,7 +13,7 @@ b2sums=('20d4fbd713c82dd19901574361d80161e717eaefcf4568a9b1b52723f235b94999dc157
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/diskus/PKGBUILD b/diskus/PKGBUILD
index 1b9d6bfe6f..8207055046 100644
--- a/diskus/PKGBUILD
+++ b/diskus/PKGBUILD
@@ -14,7 +14,7 @@ b2sums=('870a68e2fef66377ef6fdd3b5e5344c006ff029bd18a82413e2fc9e748766b17dcde451
 
 prepare() {
   cd diskus-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/distcc/PKGBUILD b/distcc/PKGBUILD
index 299263f19c..1b6b3203bb 100644
--- a/distcc/PKGBUILD
+++ b/distcc/PKGBUILD
@@ -70,8 +70,9 @@ package() {
     "$pkgdir/usr/lib/systemd/system/distccd.service"
 
   # Package symlinks
-  _targets=(c++ c89 c99 cc clang clang++ cpp g++ gcc $CARCH-pc-linux-gnu-g++
-            $CARCH-pc-linux-gnu-gcc $CARCH-pc-linux-gnu-gcc-$(gcc -dumpversion))
+  ARCH=`uname -m`
+  _targets=(c++ c89 c99 cc clang clang++ cpp g++ gcc $ARCH-pc-linux-gnu-g++
+            $ARCH-pc-linux-gnu-gcc $ARCH-pc-linux-gnu-gcc-$(gcc -dumpversion))
   install -d "$pkgdir/usr/lib/$pkgname/bin"
   for bin in "${_targets[@]}"; do
     # For whitelist since version 3.3, ref FS#57978
diff --git a/dns-over-https/PKGBUILD b/dns-over-https/PKGBUILD
index 9b38f7671c..db7d52f43b 100644
--- a/dns-over-https/PKGBUILD
+++ b/dns-over-https/PKGBUILD
@@ -25,10 +25,14 @@ prepare(){
 
 build() {
   cd "$pkgname-$pkgver"
-  export GOFLAGS="-buildmode=pie -trimpath"
+  export GOFLAGS="-trimpath"
   export CGO_LDFLAGS="${LDFLAGS}"
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CPPFLAGS="${CPPFLAGS}"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
   go build -v -o client ./doh-client
   go build -v -o server ./doh-server
 }
diff --git a/docker-machine/PKGBUILD b/docker-machine/PKGBUILD
index d5ff252144..2a2049d892 100644
--- a/docker-machine/PKGBUILD
+++ b/docker-machine/PKGBUILD
@@ -10,16 +10,21 @@ url='https://github.com/docker/machine'
 license=('Apache')
 depends=('glibc')
 optdepends=('net-tools: for VirtualBox support')
-makedepends=('go')
+makedepends=('go' 'git')
 optdepends=('net-tools: required for VirtualBox driver')
-source=("$pkgname-$pkgver.tar.gz::https://github.com/docker/machine/archive/v$pkgver.tar.gz")
-sha512sums=('92408c18be23cee040e96412dafeff16415834c3141b0a9c1f82cb9d6b1e0700344a158a44ffac24fed2c244d3fa5c531d9f13ffffaa4be1f4fcbd79104f899e')
+source=("$pkgname-$pkgver.tar.gz::https://github.com/docker/machine/archive/v$pkgver.tar.gz"
+        docker-machine-la64.patch)
+sha512sums=('92408c18be23cee040e96412dafeff16415834c3141b0a9c1f82cb9d6b1e0700344a158a44ffac24fed2c244d3fa5c531d9f13ffffaa4be1f4fcbd79104f899e'
+            '6aba97975505e8d439604747e67055d9f45bb75c40dc6a0ae2a90d5ce94be686c16c158c506be1af644ec651a6593b1b45747327e5238643721763d3ccd4907c')
 
 prepare() {
   mkdir -p build/src
   mv machine-$pkgver/vendor/* build/src/
+  rm -rf build/src/golang.org/x/sys
+  git clone https://github.com/golang/sys.git build/src/golang.org/x/sys
   mkdir -p build/src/github.com/docker
   ln -s "$srcdir"/machine-$pkgver build/src/github.com/docker/machine
+  patch -p1 -i $srcdir/docker-machine-la64.patch
 }
 
 build() {
diff --git a/docker-machine/docker-machine-la64.patch b/docker-machine/docker-machine-la64.patch
new file mode 100644
index 0000000000..c657276336
--- /dev/null
+++ b/docker-machine/docker-machine-la64.patch
@@ -0,0 +1,16 @@
+--- a/build/src/github.com/bugsnag/panicwrap/dup2.go	2019-09-02 19:53:51.000000000 +0800
++++ b/build/src/github.com/bugsnag/panicwrap/dup2.go	2022-11-27 21:10:31.367788962 +0800
+@@ -1,4 +1,4 @@
+-// +build darwin dragonfly freebsd linux,!arm64 netbsd openbsd
++// +build darwin dragonfly freebsd linux,!arm64 linux,!loong64 netbsd openbsd
+ 
+ package panicwrap
+ 
+--- a/build/src/github.com/bugsnag/panicwrap/dup3.go	2019-09-02 19:53:51.000000000 +0800
++++ b/build/src/github.com/bugsnag/panicwrap/dup3.go	2022-11-27 21:04:11.457811920 +0800
+@@ -1,4 +1,4 @@
+-// +build linux,arm64
++// +build linux,arm64 linux,loong64
+ 
+ package panicwrap
+ 
diff --git a/docker/PKGBUILD b/docker/PKGBUILD
index 49a427dc95..bbad85d323 100644
--- a/docker/PKGBUILD
+++ b/docker/PKGBUILD
@@ -22,11 +22,13 @@ _TINI_COMMIT=de40ad007797e0dcd8b7126f27bb87401d224240
 source=("git+https://github.com/docker/cli.git#tag=v$pkgver"
         "git+https://github.com/moby/moby.git#tag=v$pkgver"
         "git+https://github.com/krallin/tini.git#commit=$_TINI_COMMIT"
-        "$pkgname.sysusers")
+        "$pkgname.sysusers"
+        "moby-la64.patch")
 sha256sums=('SKIP'
             'SKIP'
             'SKIP'
-            '541826011a9836d05a2f42293d5f1beadf2ca8d89fb604487d61a013505678eb')
+            '541826011a9836d05a2f42293d5f1beadf2ca8d89fb604487d61a013505678eb'
+            '0571a94167a0613482b31bfc8d082ac41aecf0fe1667635915b60a3f48764413')
 
 # create a fake go path directory and pushd into it
 # $1 real directory
@@ -42,6 +44,11 @@ _fake_gopath_popd() {
   popd >/dev/null
 }
 
+prepare() {
+  cd moby
+  patch -p1 -i $srcdir/moby-la64.patch
+}
+
 build() {
   ### check my mistakes on commit version
   echo 'Checking commit mismatch'
diff --git a/docker/moby-la64.patch b/docker/moby-la64.patch
new file mode 100644
index 0000000000..f0a027976f
--- /dev/null
+++ b/docker/moby-la64.patch
@@ -0,0 +1,12 @@
+Index: moby/vendor/github.com/cilium/ebpf/internal/endian_le.go
+===================================================================
+--- moby.orig/vendor/github.com/cilium/ebpf/internal/endian_le.go
++++ moby/vendor/github.com/cilium/ebpf/internal/endian_le.go
+@@ -1,5 +1,5 @@
+-//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
+-// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64
++//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
++// +build 386 amd64 amd64p32 arm arm64 loong64 mipsle mips64le mips64p32le ppc64le riscv64
+ 
+ package internal
+ 
diff --git a/dog/PKGBUILD b/dog/PKGBUILD
index 2b7624ebf5..b4a6c2ac6b 100644
--- a/dog/PKGBUILD
+++ b/dog/PKGBUILD
@@ -21,7 +21,7 @@ b2sums=('7ea52027e73deb6db6cb67b89063cddd507246256b3ca93479a634ff6696ab6961f004d
 prepare() {
   cd ${pkgname}-${pkgver}
   patch -Np1 -i ../dog-openssl-crates-update.patch
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/dra/PKGBUILD b/dra/PKGBUILD
index 91c2937d08..a6c49033a4 100644
--- a/dra/PKGBUILD
+++ b/dra/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p completions
 }
 
diff --git a/dragonfly-reverb/PKGBUILD b/dragonfly-reverb/PKGBUILD
index c6b54c263a..01772110de 100644
--- a/dragonfly-reverb/PKGBUILD
+++ b/dragonfly-reverb/PKGBUILD
@@ -179,6 +179,6 @@ package_dragonfly-reverb-vst3() {
 
   cd $pkgbase-$pkgver
   for name in "${_names[@]}"; do
-    install -vDm 755 bin/$name.vst3/Contents/$CARCH-linux/$name.so -t "$pkgdir/usr/lib/vst3/"
+    install -vDm 755 bin/$name.vst3/Contents/`uname -m`-linux/$name.so -t "$pkgdir/usr/lib/vst3/"
   done
 }
diff --git a/dtc/PKGBUILD b/dtc/PKGBUILD
index a193bf1d6d..75a2ab13ce 100644
--- a/dtc/PKGBUILD
+++ b/dtc/PKGBUILD
@@ -10,7 +10,7 @@ arch=(loong64 x86_64)
 license=(GPL2)
 depends=(bash glibc libyaml python)
 makedepends=(meson swig python-setuptools-scm)
-checkdepends=(valgrind)
+makedepends+=(valgrind)
 source=(https://www.kernel.org/pub/software/utils/dtc/dtc-$pkgver.tar.xz
         fix_test.patch::https://github.com/dgibson/dtc/commit/32174a66efa4ad19fc6a2a6422e4af2ae4f055cb.patch
 	fix_meson_version.patch::https://github.com/dgibson/dtc/commit/64a907f08b9bedd89833c1eee674148cff2343c6.patch
diff --git a/dua-cli/PKGBUILD b/dua-cli/PKGBUILD
index 1565f1b601..36235b2dae 100644
--- a/dua-cli/PKGBUILD
+++ b/dua-cli/PKGBUILD
@@ -21,7 +21,13 @@ prepare() {
 
 build() {
   cd "$srcdir/$pkgname-$pkgver"
-  cargo build --release --locked
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo build --release
 }
 
 check() {
diff --git a/dump_syms/PKGBUILD b/dump_syms/PKGBUILD
index cc2a7224ef..0d121062ef 100644
--- a/dump_syms/PKGBUILD
+++ b/dump_syms/PKGBUILD
@@ -33,12 +33,18 @@ pkgver() {
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target x86_64-unknown-linux-gnu
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo fetch --target `uname -m`-unknown-linux-gnu
 }
 
 build() {
   cd $pkgname
-  cargo build --release --frozen --all-targets
+  cargo build --release --all-targets
 }
 
 check() {
diff --git a/duplicity/PKGBUILD b/duplicity/PKGBUILD
index d39373df8c..064ff8f9f9 100644
--- a/duplicity/PKGBUILD
+++ b/duplicity/PKGBUILD
@@ -46,11 +46,11 @@ source=(
 b2sums=('SKIP'
         '04dad8ce369f4fb78e2b3239e30d63410d8e8c33b5e05717b5b62ba54c624c2e931220b950bad507deab60a07086c434949f0766bc3ae2eb771d62e1919cbecd')
 
-pkgver() {
-  cd "$pkgname"
-
-  git describe --tags | sed 's/^rel\.//'
-}
+#pkgver() {
+#  cd "$pkgname"
+#
+#  git describe --tags | sed 's/^rel\.//' |sed 's/-/./g'
+#}
 
 prepare() {
   cd "$pkgname"
diff --git a/dysk/PKGBUILD b/dysk/PKGBUILD
index 4a492293a7..38a4e17e14 100644
--- a/dysk/PKGBUILD
+++ b/dysk/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('1fe771a5c74cc75d3742bca8f3d69fc386be0d015ab16205f95c38bf669131ca212
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/easyloggingpp/PKGBUILD b/easyloggingpp/PKGBUILD
index eee55f5067..456910ecbf 100644
--- a/easyloggingpp/PKGBUILD
+++ b/easyloggingpp/PKGBUILD
@@ -8,7 +8,7 @@ arch=('any')
 url="https://github.com/amrayn/easyloggingpp"
 license=('MIT')
 makedepends=(cmake ninja)
-checkdepends=(gtest)
+makedepends+=(gtest)
 source=("$pkgname-$pkgver.tar.gz::https://github.com/muflihun/easyloggingpp/archive/v${pkgver}.tar.gz"
         0001-Switch-CMake-build-system-from-C-11-to-C-14.patch)
 sha256sums=('9110638e21ef02428254af8688bf9e766483db8cc2624144aa3c59006907ce22'
diff --git a/edk2/60-edk2-loongarch64.json b/edk2/60-edk2-loongarch64.json
new file mode 100644
index 0000000000..4806bee426
--- /dev/null
+++ b/edk2/60-edk2-loongarch64.json
@@ -0,0 +1,31 @@
+{
+    "description": "LoongArch UEFI for loongarch64",
+    "interface-types": [
+        "uefi"
+    ],
+    "mapping": {
+        "device": "flash",
+        "executable": {
+            "filename": "/usr/share/edk2/loongarch64/QEMU_CODE.fd",
+            "format": "raw"
+        },
+        "nvram-template": {
+            "filename": "/usr/share/edk2/loongarch64/QEMU_VARS.fd",
+            "format": "raw"
+        }
+    },
+    "targets": [
+        {
+            "architecture": "loongarch64",
+            "machines": [
+                "virt-*"
+            ]
+        }
+    ],
+    "features": [
+        "verbose-static"
+    ],
+    "tags": [
+
+    ]
+}
diff --git a/edk2/PKGBUILD b/edk2/PKGBUILD
index 56fe76c055..e684ce9b15 100644
--- a/edk2/PKGBUILD
+++ b/edk2/PKGBUILD
@@ -19,6 +19,7 @@ license=(
 makedepends=(
   aarch64-linux-gnu-gcc
   arm-none-eabi-gcc
+  x86_64-linux-gnu-gcc
   acpica
   git
   util-linux-libs
@@ -29,6 +30,7 @@ makedepends=(
 options=(!makeflags)
 source=(
   git+$url#tag=$_commit
+  edk2-platforms::git+https://github.com/tianocore/edk2-platforms.git
   $pkgbase-softfloat::git+https://github.com/ucb-bar/berkeley-softfloat-3.git
   https://www.openssl.org/source/openssl-$_openssl_ver.tar.gz{,.asc}
   https://github.com/google/brotli/archive/v$_brotli_ver/brotli-$_brotli_ver.tar.gz
@@ -44,6 +46,7 @@ source=(
   60-edk2-ovmf-microvm-4m.json
   60-edk2-aarch64.json
   60-edk2-arm.json
+  60-edk2-loongarch64.json
   70-edk2-ovmf-i386-csm.json
   70-edk2-ovmf-i386-csm-4m.json
   70-edk2-ovmf-x86_64-csm.json
@@ -55,8 +58,11 @@ source=(
   82-edk2-ovmf-ia32-on-x86_64-csm.json
   82-edk2-ovmf-ia32-on-x86_64-csm-4m.json
   $pkgbase-202202-brotli.patch
+  edk2-use-env-toolchains.patch
+  relax_edk2_gcc14.diff
 )
 sha512sums=('SKIP'
+            'SKIP'
             'SKIP'
             '86c99146b37236419b110db77dd3ac3992e6bed78c258f0cc3434ca233460b4e17c0ac81d7058547fe9cb72a9fd80ee56d4b4916bb731dbe2bbcf1c3d46bf31a'
             'SKIP'
@@ -73,6 +79,7 @@ sha512sums=('SKIP'
             'b17d3ff5c9230c394ca4ee8229842c801b0cab3d88b546f2094dd0b42f2bc535f5bda3f9faee4b5418482185887648f906daaf0b7307c4c19747f5f0ab504f9a'
             '126822ef6198e87fb38014a5ba21969c9a163b41df3cdef6825317971ecc8df4a63099113e687634b88648acc93f24917d729e1c44295d2df7012288740307d3'
             'bbf663d539a985504d5fbc95552a2a60ac860a6bce4a62ecc551292d838b41cba3b5203f580a76a05e9f862ef98e7a3e5da39505c1f39d8ef48c08778fac584a'
+            'a6af6487c470e9af86022ee44ea53d2a0f513cacc413c9cc61eeec80ee2d1569daddf311ce8ccf4660d7899d7dd8119f1ef74953d91462c949c4d7ce8c129f16'
             'b5829aaf5ebae0073de26695eddbda61d117fbfb5e3c9f169fade31127ceb9bbc332af760bf6033d90a277d44c095fc30fe0d69defd81fb1aaf82cba0cf6fb90'
             '2e03935b57fabbbac4493ba6d54ac5b68abfd75775a56c95f5ba8c4627ba38260a3691a335e597c65096c50ce5038389efbb41ef5822a1ff49a8f312d8e37f75'
             '6e91029d451c9d43c1488ce0e252e6abc18fb1da48b6938d6ae3644fce58c97da6fff6addc60740b1b9ed5e6b86e9d7e94ee0dd55ea73833a82401b4c6f8c936'
@@ -83,8 +90,11 @@ sha512sums=('SKIP'
             'c9dbe7b2b6b8c18b7b8fdfef5bc329d9142c442f2f3dbae3ca4919255dcaf2ab576cd305648228d5dd48040ca3b14f44ee33b05cb6ca13b49e2836947b78ea53'
             '692e5bdefb61ae7b8d6e2063f163e2b68136b2522d606806766186f10c5fae1f7583fd83cda52c235d0d8eb0651e5a711f505021a8d8d949d8dccfce7f0c82ac'
             'c699ad500f24569643a4581f4bb5be0e4a90d160f0b3ae7728cf8e27b39665983b80439ca7b853b1bd9a174c8c123cbaf7ed3cd4a17d6460f4fec670c62a1183'
-            'd074c794796d17d77eed7c34201d93d7ef3f1322fe1ea4a2ddd7137fae884d49f94f465ee39cfd8346b026142668a41f5a8671e521409505dd6d002f71c0eebc')
+            'd074c794796d17d77eed7c34201d93d7ef3f1322fe1ea4a2ddd7137fae884d49f94f465ee39cfd8346b026142668a41f5a8671e521409505dd6d002f71c0eebc'
+            '94d889b4bc1dacf6ab4543b5e6ac7f99a5ec71f0362577df9e49f6ed0af4275455d8fb2f1ce8c279b7e4ce8a24cafa2006446f9fe793ab60680dcd328abf0429'
+            '19c992e3c4d99a5335666e40b6619d3c701359db502722950217e7916ecb14901a5d14c400c8e871d91fe1477ab71849697bdad06014df3b13a59cbadfb0bf1d')
 b2sums=('SKIP'
+        'SKIP'
         'SKIP'
         'cc1df41fa12ba4443e15e94f6ebdc5e103b9dab5eab2e1c8f74e6a74fa2c38207817921b65d7293cb241c190a910191c7163600bb75243adde0e2f9ec31cc885'
         'SKIP'
@@ -101,6 +111,7 @@ b2sums=('SKIP'
         '01dbc4cad102535504eace2d9da225a481b62785d37365f1dea2d1210990ca6177485aa0134a074c09d253b539f12ae810706a77a46779ddb7dd4f1b9b934011'
         'f84ff505702e4b2a38b6fd23fbb732c25d3102a04bb6918b0cc3b3d7528a92626324199cea4ed91955aade98f308f1d1037255f26cc9ee21ace75fc6376e7df6'
         '04a7eb373d6ea1415d7cd6e8dea0d16b75cbb1fb88572a30b8ce9960dd0404adc7f25fce2ccfb103eb09405411dc4d4e0084236e4c814916d81e957dc6aedfd4'
+        'e869e1c5751a1691598c3247dcfbcadc6652ecc27cc26dc66cd9e1f7336ea7c4b5e757892137a259b50441d86b83939f01943113787bedb4318a42657dcaac59'
         'b4fcc2351b2d77b85cdce35180353aef06900af1554479853bf915d27a756d4bbed50a50e85b72e2e7f4868e6dec3b9c5b27f743d7c112e24e4e0c50cd103a33'
         '1783b83c6e39c99feb59043c3cff48b24bef55d43949cd9a3097dfbee73a6cf511c180d610a52de876ccea9833fec46d7a88ebce8114e54620b9988232fb9bcc'
         '1d76eda20067c1bb9928b0304244ab5770a9c4e1f401a74d51da31a47f3a5d6e1e64b5394768cdae6a5bc396b68b6a32eb1a407e1c6377461dd2d5f2f5a2538d'
@@ -111,13 +122,15 @@ b2sums=('SKIP'
         '0c1e145109de9a25339633b563e47f6c09ea314f636023d09a58559a499dd0bd283a45e050fc99fe34c4d712bd00a035064fa8406734d57029c67b9adb4b11ce'
         '0ad956e3e662909abafd0b9a2b7ef12e35a8832183cb41e17dcafaa4f5db1e47ef20b3040268644daebb24f66c18b99de07f41e7d62089691c07de688a08f05a'
         'a44b5ffc35d78925ac7362ec2cf75475d02e05ed0b9e8771c909d090187aaff7436e8d856d58b8a56827990006b813c63318b60a8a7780844c829a2b13a502cf'
-        '644c071dc4fbbccaa64b0b1babcad60395ffce1a7a317a6f5380eff44cbb886be5f29156a8e967ab02b508a33954fcf5602606b43362cc3bb1936a8cfc3a3c07')
+        '644c071dc4fbbccaa64b0b1babcad60395ffce1a7a317a6f5380eff44cbb886be5f29156a8e967ab02b508a33954fcf5602606b43362cc3bb1936a8cfc3a3c07'
+        '8c2e7b07d669e97388567f6a2d449a3f33fe98823d7a0807882b4a72f5d22c8e9e3141f0009abf2b398adb95562f8e281e6d53041e0c2ac36f178a320d5be55a'
+        '02869de544482f0d4d8a796cb94fc76491c49f00ab361218fd2bb6b480a974c8ce3e8706ab70f3d977724491db47bccfc0b7947edae1e57f46212ebf127750d6')
 validpgpkeys=(
   8657ABB260F056B1E5190839D9C4D26D0E604491  # Matt Caswell <matt@openssl.org>
   7953AC1FBC3DC8B3B292393ED5E9E43F7DF9EE8C  # Richard Levitte <richard@levitte.org>
   A21FAB74B0088AA361152586B8EF1A6BA9DA2D5C  # Tomáš Mráz <tm@t8m.info>
 )
-_arch_list=(ARM AARCH64 IA32 X64)
+_arch_list=(ARM AARCH64 LOONGARCH64 IA32 X64)
 _build_type=RELEASE
 _build_plugin=GCC5
 
@@ -129,6 +142,8 @@ pkgver() {
 prepare() {
   # patch to be able to use brotli 1.0.9
   patch -Np1 -d $pkgbase -i ../$pkgbase-202202-brotli.patch
+  patch -Np1 -d $pkgbase -i ../edk2-use-env-toolchains.patch
+  patch -Np1 -d $pkgbase -i ../relax_edk2_gcc14.diff
 
   cd $pkgbase
 
@@ -157,6 +172,10 @@ prepare() {
 
   # copy seabios's CSM binary into place, so that it can be included in the binaries:
   cp -v /usr/share/qemu/bios-csm.bin OvmfPkg/Csm/Csm16/Csm16.bin
+  ln -sf $srcdir/edk2-platforms/Drivers .
+  ln -sf $srcdir/edk2-platforms/Features .
+  ln -sf $srcdir/edk2-platforms/Platform .
+  ln -sf $srcdir/edk2-platforms/Silicon .
 }
 
 # TODO: check TPM_ENABLE/TPM2_ENABLE
@@ -199,6 +218,8 @@ build() {
   ARCH=AARCH64 make -C BaseTools
   echo "Building base tools (ARM)"
   ARCH=ARM make -C BaseTools
+  echo "Building base tools (LOONGARCH64)"
+  ARCH=LOONGARCH64 make -C BaseTools
   echo "Building base tools"
   make -C BaseTools
   # expose build tooling in PATH
@@ -366,6 +387,24 @@ build() {
       dd if=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_EFI.fd of=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_CODE.fd conv=notrunc
       dd if=/dev/zero of=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_VARS.fd bs=1M count=64
       ;;
+      LOONGARCH64)
+      echo "Building ovmf ($_arch) with secure boot"
+      local _build_options=(
+        -p Platform/Loongson/LoongArchQemuPkg/Loongson.dsc
+        -a "${_arch}"
+        "${_common_args[@]}"
+        "${_efi_args[@]}"
+        -D NETWORK_HTTP_BOOT_ENABLE
+        -D NETWORK_TLS_ENABLE
+        -D SECURE_BOOT_ENABLE
+        -D TPM_ENABLE
+        -D TPM_CONFIG_ENABLE
+      )
+      BaseTools/BinWrappers/PosixLike/build "${_build_options[@]}"
+      dd if=/dev/zero of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_CODE.fd bs=1M count=4
+      dd if=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_EFI.fd of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_CODE.fd conv=notrunc
+      dd if=/dev/zero of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_VARS.fd bs=1M count=16
+      ;;
     esac
   done
 }
@@ -415,6 +454,21 @@ package_edk2-arm() {
   install -vDm 644 License.txt -t "$pkgdir/usr/share/licenses/$pkgname/"
 }
 
+package_edk2-loongarch64() {
+  local _arch=LOONGARCH64
+
+  pkgdesc="Firmware for Virtual Machines (loongarch64)"
+  url="https://github.com/tianocore/tianocore.github.io/wiki/LoongArchQemuPkg"
+
+  cd $pkgbase
+  install -vDm 644 Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/*.fd -t "$pkgdir/usr/share/$pkgbase/${_arch,,}/"
+  # install qemu descriptors in accordance with qemu:
+  # https://git.qemu.org/?p=qemu.git;a=tree;f=pc-bios/descriptors
+  install -vDm 644 ../*$pkgname*.json -t "$pkgdir/usr/share/qemu/firmware/"
+  # license
+  install -vDm 644 License.txt -t "$pkgdir/usr/share/licenses/$pkgname/"
+}
+
 package_edk2-shell() {
   local _arch
   # minimal UEFI shell, as defined in ShellPkg/Application/Shell/ShellPkg.inf
diff --git a/edk2/edk2-use-env-toolchains.patch b/edk2/edk2-use-env-toolchains.patch
new file mode 100644
index 0000000000..b7832443dc
--- /dev/null
+++ b/edk2/edk2-use-env-toolchains.patch
@@ -0,0 +1,62 @@
+Index: edk2-edk2-stable202211/BaseTools/Conf/tools_def.template
+===================================================================
+--- a/BaseTools/Conf/tools_def.template
++++ b/BaseTools/Conf/tools_def.template
+@@ -2294,17 +2294,17 @@ RELEASE_GCC49_AARCH64_DLINK_XIPFLAGS = -
+ ##################
+ # GCC5 IA32 definitions
+ ##################
+-*_GCC5_IA32_OBJCOPY_PATH         = DEF(GCC5_IA32_PREFIX)objcopy
+-*_GCC5_IA32_CC_PATH              = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_SLINK_PATH           = DEF(GCC5_IA32_PREFIX)gcc-ar
+-*_GCC5_IA32_DLINK_PATH           = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_ASLDLINK_PATH        = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_ASM_PATH             = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_PP_PATH              = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_VFRPP_PATH           = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_ASLCC_PATH           = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_ASLPP_PATH           = DEF(GCC5_IA32_PREFIX)gcc
+-*_GCC5_IA32_RC_PATH              = DEF(GCC5_IA32_PREFIX)objcopy
++*_GCC5_IA32_OBJCOPY_PATH         = ENV(GCC5_IA32_PREFIX)objcopy
++*_GCC5_IA32_CC_PATH              = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_SLINK_PATH           = ENV(GCC5_IA32_PREFIX)gcc-ar
++*_GCC5_IA32_DLINK_PATH           = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_ASLDLINK_PATH        = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_ASM_PATH             = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_PP_PATH              = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_VFRPP_PATH           = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_ASLCC_PATH           = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_ASLPP_PATH           = ENV(GCC5_IA32_PREFIX)gcc
++*_GCC5_IA32_RC_PATH              = ENV(GCC5_IA32_PREFIX)objcopy
+ 
+ *_GCC5_IA32_ASLCC_FLAGS          = DEF(GCC5_ASLCC_FLAGS) -m32
+ *_GCC5_IA32_ASLDLINK_FLAGS       = DEF(GCC5_IA32_X64_ASLDLINK_FLAGS) -Wl,-m,elf_i386 -no-pie
+@@ -2326,17 +2326,17 @@ RELEASE_GCC5_IA32_DLINK_FLAGS    = DEF(G
+ ##################
+ # GCC5 X64 definitions
+ ##################
+-*_GCC5_X64_OBJCOPY_PATH          = DEF(GCC5_X64_PREFIX)objcopy
+-*_GCC5_X64_CC_PATH               = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_SLINK_PATH            = DEF(GCC5_X64_PREFIX)gcc-ar
+-*_GCC5_X64_DLINK_PATH            = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_ASLDLINK_PATH         = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_ASM_PATH              = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_PP_PATH               = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_VFRPP_PATH            = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_ASLCC_PATH            = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_ASLPP_PATH            = DEF(GCC5_X64_PREFIX)gcc
+-*_GCC5_X64_RC_PATH               = DEF(GCC5_X64_PREFIX)objcopy
++*_GCC5_X64_OBJCOPY_PATH          = ENV(GCC5_X64_PREFIX)objcopy
++*_GCC5_X64_CC_PATH               = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_SLINK_PATH            = ENV(GCC5_X64_PREFIX)gcc-ar
++*_GCC5_X64_DLINK_PATH            = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_ASLDLINK_PATH         = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_ASM_PATH              = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_PP_PATH               = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_VFRPP_PATH            = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_ASLCC_PATH            = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_ASLPP_PATH            = ENV(GCC5_X64_PREFIX)gcc
++*_GCC5_X64_RC_PATH               = ENV(GCC5_X64_PREFIX)objcopy
+ 
+ *_GCC5_X64_ASLCC_FLAGS           = DEF(GCC5_ASLCC_FLAGS) -m64
+ *_GCC5_X64_ASLDLINK_FLAGS        = DEF(GCC5_IA32_X64_ASLDLINK_FLAGS) -Wl,-m,elf_x86_64
diff --git a/edk2/relax_edk2_gcc14.diff b/edk2/relax_edk2_gcc14.diff
new file mode 100644
index 0000000000..35901ff55b
--- /dev/null
+++ b/edk2/relax_edk2_gcc14.diff
@@ -0,0 +1,44 @@
+diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c
+index d53ecb1767..8018d68db1 100644
+--- a/BaseTools/Source/C/GenFw/Elf64Convert.c
++++ b/BaseTools/Source/C/GenFw/Elf64Convert.c
+@@ -1778,7 +1778,11 @@ WriteSections64 (
+           case R_LARCH_TLS_LD64_HI20:
+           case R_LARCH_TLS_GD_PC_HI20:
+           case R_LARCH_TLS_GD64_HI20:
++          case R_LARCH_32_PCREL:
+           case R_LARCH_RELAX:
++          case R_LARCH_DELETE:
++          case R_LARCH_ALIGN:
++          case R_LARCH_PCREL20_S2:
+             //
+             // These types are not used or do not require fixup.
+             //
+@@ -2185,7 +2189,11 @@ WriteRelocations64 (
+               case R_LARCH_TLS_LD64_HI20:
+               case R_LARCH_TLS_GD_PC_HI20:
+               case R_LARCH_TLS_GD64_HI20:
++              case R_LARCH_32_PCREL:
+               case R_LARCH_RELAX:
++              case R_LARCH_DELETE:
++              case R_LARCH_ALIGN:
++              case R_LARCH_PCREL20_S2:
+                 //
+                 // These types are not used or do not require fixup in PE format files.
+                 //
+diff --git a/BaseTools/Source/C/GenFw/elf_common.h b/BaseTools/Source/C/GenFw/elf_common.h
+index ccd32804b0..d3a5303953 100644
+--- a/BaseTools/Source/C/GenFw/elf_common.h
++++ b/BaseTools/Source/C/GenFw/elf_common.h
+@@ -1144,5 +1144,10 @@ typedef struct {
+ #define R_LARCH_TLS_LD64_HI20              96
+ #define R_LARCH_TLS_GD_PC_HI20             97
+ #define R_LARCH_TLS_GD64_HI20              98
+-#define R_LARCH_RELAX                      99
++#define R_LARCH_32_PCREL                   99
++#define R_LARCH_RELAX                      100
++#define R_LARCH_DELETE                     101
++#define R_LARCH_ALIGN                      102
++#define R_LARCH_PCREL20_S2                 103
++
+ #endif /* !_SYS_ELF_COMMON_H_ */
diff --git a/efitools/PKGBUILD b/efitools/PKGBUILD
index fd81afc3e2..f4744f4c73 100644
--- a/efitools/PKGBUILD
+++ b/efitools/PKGBUILD
@@ -13,19 +13,22 @@ license=(GPL2 LGPL2.1)
 makedepends=(git gnu-efi-libs help2man perl-file-slurp sbsigntools)
 depends=(glibc openssl)
 source=("git+https://git.kernel.org/pub/scm/linux/kernel/git/jejb/$pkgname.git#tag=v${pkgver}?signed"
-        "${pkgname}-1.9.2-console_warning_typo.patch")
+        "${pkgname}-1.9.2-console_warning_typo.patch"
+	efitools-la64.patch)
 sha512sums=('SKIP'
-            '9e609eb4fb2a7116166626d15470d66e2eb66a25867618d4065d48636304f88549a71c5e827ac92750183f0fabaa3b84beea3dffa905031a2867939bfae955e7')
+            '9e609eb4fb2a7116166626d15470d66e2eb66a25867618d4065d48636304f88549a71c5e827ac92750183f0fabaa3b84beea3dffa905031a2867939bfae955e7'
+            '23f8751e4cfe3369d3ec161fef908ebab91833de2e36982cc708f3fcfbaa654facc0cefca7b6bd909a918ae056640913a7d648b011cf261bae7024f0153eac55')
 validpgpkeys=('D5606E73C8B46271BEAD9ADF814AE47C214854D6') # James Bottomley <jejb@kernel.org>
 
 prepare() {
   cd "${pkgname}"
   patch -Np1 -i "../${pkgname}-1.9.2-console_warning_typo.patch"
+  patch -Np1 -i "../efitools-la64.patch"
 }
 
 build() {
   # fix PreLoader.efi building on x86_64 #49314
-  export ARCH="${CARCH}"
+  export ARCH="`uname -m`"
   # build with one job because the Makefile does not support parallel jobs ;_;
   # https://bugs.archlinux.org/task/73600
   make -j1 -C "${pkgname}"
diff --git a/efitools/efitools-la64.patch b/efitools/efitools-la64.patch
new file mode 100644
index 0000000000..7ecf4efe8d
--- /dev/null
+++ b/efitools/efitools-la64.patch
@@ -0,0 +1,13 @@
+Index: efitools/Make.rules
+===================================================================
+--- efitools.orig/Make.rules
++++ efitools/Make.rules
+@@ -10,6 +10,8 @@ else ifeq ($(ARCH),aarch64)
+ ARCH3264 =
+ else ifeq ($(ARCH),arm)
+ ARCH3264 =
++else ifeq ($(ARCH),loongarch64)
++ARCH3264 =
+ else
+ $(error unknown architecture $(ARCH))
+ endif
diff --git a/electron/PKGBUILD b/electron/PKGBUILD
index d9c9a136b7..a68f59fc39 100644
--- a/electron/PKGBUILD
+++ b/electron/PKGBUILD
@@ -9,10 +9,10 @@ pkgdesc='Meta package providing the latest available stable Electron build'
 arch=(any)
 url='https://electronjs.org'
 license=(MIT)
-depends=("electron$pkgver")
+depends=("electron$pkgver-bin")
 
 package() {
 	mkdir -p "$pkgdir/usr/bin" "$pkgdir/usr/lib"
-	ln -sf "${depends[0]}" "$pkgdir/usr/bin/$pkgname"
-	ln -sf "${depends[0]}" "$pkgdir/usr/lib/$pkgname"
+	ln -sf "${depends[0]%-bin}" "$pkgdir/usr/bin/$pkgname"
+	ln -sf "${depends[0]%-bin}" "$pkgdir/usr/lib/$pkgname"
 }
diff --git a/electron25/PKGBUILD b/electron25/PKGBUILD
index 2d6ddceae3..4565753c8d 100644
--- a/electron25/PKGBUILD
+++ b/electron25/PKGBUILD
@@ -62,7 +62,10 @@ source=("git+https://github.com/electron/electron.git#tag=v$pkgver"
         std-vector-non-const.patch
         use-system-libraries-in-node.patch
         libxml2-2.12.patch
-        icu-74.patch)
+        icu-74.patch
+        electron-la64.patch
+       )
+# shellcheck disable=SC2034
 sha256sums=('SKIP'
             'SKIP'
             'SKIP'
@@ -77,6 +80,10 @@ sha256sums=('SKIP'
             'ff588a8a4fd2f79eb8a4f11cf1aa151298ffb895be566c57cc355d47f161f53f'
             'bfae9e773edfd0ddbc617777fdd4c0609cba2b048be7afe40f97768e4eb6117e'
             '547e092f6a20ebd15e486b31111145bc94b8709ec230da89c591963001378845')
+            '621ed210d75d0e846192c1571bb30db988721224a41572c27769c0288d361c11'
+            '1b782b0f6d4f645e4e0daa8a4852d63f0c972aa0473319216ff04613a0592a69'
+            'ba4dd0a25a4fc3267ed19ccb39f28b28176ca3f97f53a4e9f5e9215280040ea0'
+            '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66')
 
 # Possible replacements are listed in build/linux/unbundle/replace_gn_files.py
 # Keys are the names in the above script; values are the dependencies in Arch
@@ -145,6 +152,7 @@ EOF
 
   pushd src/electron
   patch -Np1 -i ../../std-vector-non-const.patch
+  patch -Np1 -i ../../electron-la64.patch
   popd
 
   echo "Running hooks..."
diff --git a/emacs/PKGBUILD b/emacs/PKGBUILD
index e6adc0db04..7ff18f7181 100644
--- a/emacs/PKGBUILD
+++ b/emacs/PKGBUILD
@@ -118,7 +118,7 @@ package_emacs() {
 
 package_emacs-nativecomp() {
   pkgdesc='The extensible, customizable, self-documenting real-time display editor with native compilation enabled'
-  depends+=(libgccjit)
+#depends+=(libgccjit)
   provides=(emacs)
   conflicts=(emacs)
 
diff --git a/emacs/emacs-la64.patch b/emacs/emacs-la64.patch
new file mode 100644
index 0000000000..f55e617b8f
--- /dev/null
+++ b/emacs/emacs-la64.patch
@@ -0,0 +1,10 @@
+--- emacs-27.2.orig/src/sysdep.c	2022-04-26 17:50:07.714027425 +0800
++++ emacs-27.2/src/sysdep.c	2022-04-26 17:50:01.898411164 +0800
+@@ -1818,6 +1818,7 @@
+ 
+ /* Alternate stack used by SIGSEGV handler below.  */
+ 
++#define SIGSTKSZ 16384
+ static unsigned char sigsegv_stack[SIGSTKSZ];
+ 
+ 
diff --git a/erdtree/PKGBUILD b/erdtree/PKGBUILD
index 704884c1f9..b7a30ac892 100644
--- a/erdtree/PKGBUILD
+++ b/erdtree/PKGBUILD
@@ -23,7 +23,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/espeakup/PKGBUILD b/espeakup/PKGBUILD
index 9eb2e05c1b..c89958ad50 100644
--- a/espeakup/PKGBUILD
+++ b/espeakup/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=espeakup
 pkgver=0.90
-pkgrel=2
+pkgrel=3
 pkgdesc="A light weight connector for espeak-ng and speakup"
 arch=(loong64 x86_64)
 url="https://github.com/linux-speakup/espeakup"
diff --git a/espflash/PKGBUILD b/espflash/PKGBUILD
index a20342f8f1..b53139d338 100644
--- a/espflash/PKGBUILD
+++ b/espflash/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('f4361c5c8f7d31d10cf22c67723847b1597c6ca307c67aa76e9b1620e9f3bb0a18b9f03
 
 prepare() {
   cd ${pkgbase}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/espup/PKGBUILD b/espup/PKGBUILD
index 671e862f42..6d9d169bc5 100644
--- a/espup/PKGBUILD
+++ b/espup/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('ecf2b10a72f664db49e584901ccacbff78e00a7b33a9ab9ae146d09ebb754d9dcd5e5ad
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/eva/PKGBUILD b/eva/PKGBUILD
index 810f8af0bf..dc67d93440 100644
--- a/eva/PKGBUILD
+++ b/eva/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('d6a6eb8e0d46de1fea9bd00c361bd7955fcd7cc8f3310b786aad48c1dce7b3f7')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/evcxr_repl/PKGBUILD b/evcxr_repl/PKGBUILD
index 2646b2d384..f2db39e12f 100644
--- a/evcxr_repl/PKGBUILD
+++ b/evcxr_repl/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$_pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/farstream/PKGBUILD b/farstream/PKGBUILD
index bea546c842..25714268f3 100644
--- a/farstream/PKGBUILD
+++ b/farstream/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=farstream
 pkgver=0.2.9
-pkgrel=3
+pkgrel=4
 pkgdesc="Farstream (formerly Farsight) - Audio/Video Communications Framework"
 arch=('loong64' 'x86_64')
 url="https://www.freedesktop.org/wiki/Software/Farstream"
diff --git a/fbterm/0001-Fix-build-with-gcc-6.patch b/fbterm/0001-Fix-build-with-gcc-6.patch
new file mode 100644
index 0000000000..ad5dd65c48
--- /dev/null
+++ b/fbterm/0001-Fix-build-with-gcc-6.patch
@@ -0,0 +1,104 @@
+From 69917d25c6f718572433262d86691bf24e72e4c8 Mon Sep 17 00:00:00 2001
+From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
+Date: Wed, 13 Jul 2016 12:02:10 +0900
+Subject: [PATCH] Fix build with gcc-6
+
+Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
+---
+ src/lib/vterm.cpp        |  4 ++--
+ src/lib/vterm_states.cpp | 18 +++++++++---------
+ 2 files changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/src/lib/vterm.cpp b/src/lib/vterm.cpp
+index 3a5dcc7..f79f44c 100644
+--- a/src/lib/vterm.cpp
++++ b/src/lib/vterm.cpp
+@@ -68,13 +68,13 @@ u8 VTerm::control_map[MAX_CONTROL_CODE], VTerm::escape_map[NR_STATES][MAX_ESCAPE
+ 
+ void VTerm::init_state()
+ {
+-	for (u8 i = 1; control_sequences[i].code != (u16)-1; i++) {
++	for (u8 i = 1; control_sequences[i].code != (u16)0xFFFF; i++) {
+ 		control_map[control_sequences[i].code] = i;
+ 	}
+ 
+ 	u8 state = ESnormal;
+ 	for (u8 i = 1; ; i++) {
+-		if (escape_sequences[i].code == (u16)-1) {
++		if (escape_sequences[i].code == (u16)0xFFFF) {
+ 			state++;
+ 			if (state == NR_STATES) break;
+ 		} else {
+diff --git a/src/lib/vterm_states.cpp b/src/lib/vterm_states.cpp
+index 49e7588..6aaa8b3 100644
+--- a/src/lib/vterm_states.cpp
++++ b/src/lib/vterm_states.cpp
+@@ -39,14 +39,14 @@ const VTerm::Sequence VTerm::control_sequences[] = {
+ 	{ 0x1B, 0,	ESesc },
+ 	{ 0x7F, 0,	ESkeep },
+ 	{ 0x9B, 0,	ESsquare },
+-	{ -1}
++	{ 0xFFFF}
+ };
+ 
+ const VTerm::Sequence VTerm::escape_sequences[] = {
+ 	{   0, 0, ESnormal },
+ 
+ 	// ESnormal
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// ESesc
+ 	{ '[', &VTerm::clear_param,	ESsquare },
+@@ -65,7 +65,7 @@ const VTerm::Sequence VTerm::escape_sequences[] = {
+ 	{ '8', &VTerm::restore_cursor,	ESnormal },
+ 	{ '>', &VTerm::keypad_numeric,	ESnormal },
+ 	{ '=', &VTerm::keypad_application,	ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// ESsquare
+ 	{ '[', 0,	ESfunckey },
+@@ -104,7 +104,7 @@ const VTerm::Sequence VTerm::escape_sequences[] = {
+ 	{ '`', &VTerm::cursor_position_col,	ESnormal },
+ 	{ ']', &VTerm::linux_specific, ESnormal },
+ 	{ '}', &VTerm::fbterm_specific, ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// ESnonstd
+ 	{ '0' | ADDSAME(9), &VTerm::set_palette,    ESkeep },
+@@ -112,25 +112,25 @@ const VTerm::Sequence VTerm::escape_sequences[] = {
+ 	{ 'a' | ADDSAME(5), &VTerm::set_palette,    ESkeep },
+ 	{ 'P', &VTerm::begin_set_palette, ESkeep },
+ 	{ 'R', &VTerm::reset_palette, ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// ESpercent
+ 	{ '@', &VTerm::clear_utf8,	ESnormal },
+ 	{ 'G', &VTerm::set_utf8,	ESnormal },
+ 	{ '8', &VTerm::set_utf8,	ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// EScharset
+ 	{ '0', &VTerm::set_charset, ESnormal },
+ 	{ 'B', &VTerm::set_charset, ESnormal },
+ 	{ 'U', &VTerm::set_charset, ESnormal },
+ 	{ 'K', &VTerm::set_charset, ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// EShash
+ 	{ '8', &VTerm::screen_align,	ESnormal },
+-	{ -1 },
++	{ 0xFFFF },
+ 
+ 	// ESfunckey
+-	{ -1 },
++	{ 0xFFFF },
+ };
+-- 
+2.8.1
+
diff --git a/fbterm/PKGBUILD b/fbterm/PKGBUILD
new file mode 100644
index 0000000000..73ca72ded3
--- /dev/null
+++ b/fbterm/PKGBUILD
@@ -0,0 +1,60 @@
+# Maintainer: ivanp7 <ivanpzv8 at gmail dot com>
+
+pkgname=fbterm
+_gitname=fbterm
+_majorver=1.7
+pkgver=1.7_5
+pkgrel=5
+pkgdesc='Framebuffer terminal emulator'
+arch=('x86_64' 'armv7h' 'loong64')
+url='https://salsa.debian.org/debian/fbterm'
+license=('GPL2')
+depends=(freetype2 fontconfig ncurses)
+makedepends=(autoconf patch)
+provides=(fbterm)
+conflicts=(fbterm)
+source=("http://deb.debian.org/debian/pool/main/f/fbterm/fbterm_1.7.orig.tar.gz"
+    '0001-Fix-build-with-gcc-6.patch' 'fix_ftbfs_crosscompile.patch' 'fix_ftbfs_epoll.patch'
+    'fbconfig.patch' 'color_palette.patch' 'fbterm.patch'
+    'fbtermrc')
+sha256sums=('b98d487e84618503887e3996162354c482e24884bad8bf2219b6776372f306ad'
+            '8054410ab97da3df03406543c6a471acf3323b9e5712da6455d7c49cad7489ce'
+            '73f0c87aaa5a74631c167fb765c0340dc28626b00d0a3cd065cebf71acc585f7'
+            '2b5daa2664adf1efb3e478c2f97376c055b1698422524d262fbae2e7a530a323'
+            '0d1781e2654d32d5dfd1cbf17680b49aefbb124b7164ca1d70fcf4468563be7c'
+            'ad865628f2f6d67c82a5d29b1ec68af37293b9df5f4a6e8fad6b356d08ab368b'
+            '5cd1c14c640679a40f8a9d9781c2a5af5db7543c2296cda99f2886aa40468735'
+            'ccd21f8b66631067393cb74e222aca1935c449be569b95c1008d6c7c76b7d4b6')
+
+prepare() {
+  cd $srcdir/$_gitname-$_majorver
+
+  patch -p1 < "$srcdir/fbconfig.patch"
+  patch -p1 < "$srcdir/color_palette.patch"
+  patch -p1 < "$srcdir/fbterm.patch"
+  patch -p1 < "$srcdir/0001-Fix-build-with-gcc-6.patch"
+  patch -p1 < "$srcdir/fix_ftbfs_crosscompile.patch"
+  patch -p1 < "$srcdir/fix_ftbfs_epoll.patch"
+
+  autoreconf -fvi
+  ./configure --prefix=/usr
+}
+
+build() {
+  cd $srcdir/$_gitname-$_majorver
+  make
+  mkdir -p tic
+  TERMINFO=$srcdir/$_gitname-$_majorver/tic tic terminfo/fbterm
+}
+
+package() {
+  cd $srcdir/$_gitname-$_majorver
+  make DESTDIR="$pkgdir/" install
+
+  mkdir -p "$pkgdir/usr/share/terminfo"
+  cp -r tic/f "$pkgdir/usr/share/terminfo/"
+
+  mkdir -p "$pkgdir/etc/fbterm"
+  cp $srcdir/fbtermrc "$pkgdir/etc/fbterm/fbtermrc.example"
+}
+
diff --git a/fbterm/color_palette.patch b/fbterm/color_palette.patch
new file mode 100644
index 0000000000..efc7eefe7e
--- /dev/null
+++ b/fbterm/color_palette.patch
@@ -0,0 +1,102 @@
+--- a/src/screen_render.cpp
++++ b/src/screen_render.cpp
+@@ -78,7 +78,7 @@ void Screen::initFillDraw()
+
+ 		u32 color = 0;
+ 		Config::instance()->getOption("color-background", color);
+-		if (color > 7) color = 0;
++		if (color >= NR_COLORS) color = 0;
+		bgcolor = color;
+
+		u32 size = mBytesPerLine * ((mRotateType == Rotate0 || mRotateType == Rotate180) ? mHeight : mWidth);
+--- a/src/fbshell.h	2010-10-18 11:20:11.000000000 +0300
++++ b/src/fbshell.h	2021-11-27 23:48:54.286721768 +0300
+@@ -58,6 +58,7 @@ class FbShell : public Shell {
+ 	void changeMode(ModeType type, u16 val);
+ 	void reportCursor();
+ 	void reportMode();
++	void configColors();
+
+ 	struct Cursor {
+ 		Cursor() {
+--- a/src/fbshell.cpp	2010-10-18 11:20:11.000000000 +0300
++++ b/src/fbshell.cpp	2021-11-27 23:48:54.286721768 +0300
+@@ -39,7 +39,7 @@
+ #define screen (Screen::instance())
+ #define manager (FbShellManager::instance())
+
+-static const Color defaultPalette[NR_COLORS] = {
++static Color defaultPalette[NR_COLORS] = {
+ 	{0x00, 0x00, 0x00}, /* 0 */
+ 	{0xaa, 0x00, 0x00}, /* 1 */
+ 	{0x00, 0xaa, 0x00}, /* 2 */
+@@ -322,11 +322,11 @@ u8 VTerm::init_default_color(bool foreground)
+ 	if (foreground) {
+ 		color = 7;
+ 		Config::instance()->getOption("color-foreground", color);
+-		if (color > 7) color = 7;
++		if (color >= NR_COLORS) color = 7;
+ 	} else {
+ 		color = 0;
+ 		Config::instance()->getOption("color-background", color);
+-		if (color > 7) color = 0;
++		if (color >= NR_COLORS) color = 0;
+ 	}
+
+ 	return color;
+@@ -552,6 +552,8 @@ static s32 tty0_fd = -1;
+
+ void FbShell::switchVt(bool enter, FbShell *peer)
+ {
++	configColors();
++
+ 	if (tty0_fd == -1) tty0_fd = open("/dev/tty0", O_RDWR);
+ 	if (tty0_fd != -1) {
+ 		seteuid(0);
+@@ -771,3 +773,34 @@ bool FbShell::childProcessExited(s32 pid)
+
+ 	return false;
+ }
++
++void FbShell::configColors(){
++	s8 varColor[32], color[7], rgb[3];
++	u32 i,j,k,x;
++	for(k=0;k<NR_COLORS;k++){
++		sprintf(varColor,"color-%d",k);
++		Config::instance()->getOption(varColor, color, sizeof(color));
++		for(i=0;i<3;i++){
++			rgb[i]=0;
++			for(j=0;j<2;j++){
++				x=i*2+j;
++				if(('0' <= color[x]) && (color[x] <= '9'))
++					rgb[i]|=(color[x]-48);
++				else if(('A' <= color[x]) && (color[x] <= 'F'))
++					rgb[i]|=(color[x]-55);
++				else if(('a' <= color[x]) && (color[x] <= 'f'))
++					rgb[i]|=(color[x]-87);
++				else
++					goto NoTouch;
++				if(!j)
++					rgb[i]<<=4;
++			}
++			if(i==2){
++				defaultPalette[k].red=rgb[0];
++				defaultPalette[k].green=rgb[1];
++				defaultPalette[k].blue=rgb[2];
++			}
++		}
++NoTouch:;
++	}
++}
+--- a/doc/fbterm.1.in	2010-10-18 11:20:11.000000000 +0300
++++ b/doc/fbterm.1.in	2021-11-27 23:11:43.270223092 +0300
+@@ -176,6 +176,9 @@
+
+ A new terminfo database entry named "fbterm" was added to use these private sequences, all program based on terminfo should work with it.
+ By default, FbTerm sets environment variable "TERM" to value "linux", user need run "TERM=fbterm /path/to/program" to enable 256 color mode.
++
++The palette colors may be changed in the configuration using options "\fIcolor-num\fR=RRGGBB", where "num" is a color number from 0 to 255.
++
+ .SH "INPUT METHOD"
+ Instead of adding input method directly in FbTerm, a client-server based input method framework is designed to do
+ this work. FbTerm acts as a client, standalone IM program as a server, and they run in separated processes.
diff --git a/fbterm/fbconfig.patch b/fbterm/fbconfig.patch
new file mode 100644
index 0000000000..b5f861a631
--- /dev/null
+++ b/fbterm/fbconfig.patch
@@ -0,0 +1,78 @@
+--- a/src/fbconfig.cpp	2010-10-18 11:20:11.000000000 +0300
++++ b/src/fbconfig.cpp	2021-11-27 23:48:54.286721768 +0300
+@@ -28,6 +28,8 @@
+ #include "config.h"
+ #include "fbconfig.h"
+
++#define CONFIG_DIR_NAME "fbterm"
++#define CONFIG_FILE_NAME "fbtermrc"
+ #define MAX_CONFIG_FILE_SIZE 10240
+
+ DEFINE_INSTANCE_DEFAULT(Config)
+@@ -38,18 +40,34 @@
+ 	mConfigBuf = 0;
+ 	mConfigEntrys = 0;
+
+-	const s8 *home = getenv("HOME");
+-	if (!home) {
+-		if (getuid()) return;
+-		home = "/root";
+-	}
+-
+-	s8 name[64];
+-	snprintf(name, sizeof(name), "%s/%s", home, ".fbtermrc");
++	s8 name[256];
+
+-	checkConfigFile(name);
++	const s8 *home = getenv("XDG_CONFIG_HOME");
++	const s8 *format_d = NULL, *format_f = NULL, *format_df = NULL;
++	if (!home || !home[0])
++	{
++		home = getenv("HOME");
++		if (!home || !home[0])
++			return;
++
++		format_d = "%s/.config/" CONFIG_DIR_NAME "/";
++		format_f = "%s/.config/" CONFIG_FILE_NAME;
++		format_df = "%s/.config/" CONFIG_DIR_NAME "/" CONFIG_FILE_NAME;
++	}
++	else
++	{
++		format_d = "%s/" CONFIG_DIR_NAME "/";
++		format_f = "%s/" CONFIG_FILE_NAME;
++		format_df = "%s/" CONFIG_DIR_NAME "/" CONFIG_FILE_NAME;
++	}
+
++	snprintf(name, sizeof(name), format_d, home);
+ 	struct stat cstat;
++	if (stat(name, &cstat) == -1)
++		snprintf(name, sizeof(name), format_f, home);
++	else
++		snprintf(name, sizeof(name), format_df, home);
++
+ 	if (stat(name, &cstat) == -1) return;
+ 	if (cstat.st_size > MAX_CONFIG_FILE_SIZE) return;
+
+--- a/doc/fbterm.1.in	2010-10-18 11:20:11.000000000 +0300
++++ b/doc/fbterm.1.in	2021-11-27 23:11:43.270223092 +0300
+@@ -22,8 +22,9 @@
+ file. If that is not set, /bin/sh will be used. You should use the \fI--\fR argument to separate FbTerm's options from
+ the arguments supplied to the \fIcommand\fR.
+
+-FbTerm first uses option value specified in command line arguments, then in the configure file \fI$HOME/.fbtermrc\fR.
+-If that file doesn't exist, FbTerm will create it with default options on startup.
++FbTerm first uses option value specified in command line arguments, then in the configure file \fI$XDG_CONFIG_HOME/fbtermrc\fR or
++\fI$XDG_CONFIG_HOME/fbterm/fbtermrc\fR (if directory \fI$XDG_CONFIG_HOME/fbterm/\fR exists).
++If \fIXDG_CONFIG_HOME\fR is unset or empty, FbTerm uses \fI$HOME/.config\fR for it instead.
+ .TP
+ \fB-h, --help\fR
+ display the help and exit
+@@ -77,7 +78,7 @@
+ display available VESA video modes
+
+ .TP
+-see comments in \fI$HOME/.fbtermrc\fR for details of these options.
++see comments in the configure file for details of these options.
+ .SH "SHORTCUT SUMMARY"
+ keyboard:
+   CTRL_ALT_E:    exit from FbTerm
diff --git a/fbterm/fbterm.patch b/fbterm/fbterm.patch
new file mode 100644
index 0000000000..6a9656ff34
--- /dev/null
+++ b/fbterm/fbterm.patch
@@ -0,0 +1,9 @@
+--- a/terminfo/fbterm	2010-10-18 11:20:11.000000000 +0300
++++ b/terminfo/fbterm	2021-05-19 03:05:56.885773502 +0300
+@@ -1,5 +1,5 @@
+ #	Reconstructed via infocmp from file: /lib/terminfo/l/linux
+-fbterm|framebuffer based terminal emulator,
++fbterm-256color|framebuffer based terminal emulator,
+ 	am, bce, ccc, eo, mir, msgr, xenl, xon,
+ 	colors#256, it#8, ncv#18, pairs#32767,
+ 	acsc=+\020\,\021-\030.^Y0\333`\004a\261f\370g\361h\260i\316j\331k\277l\332m\300n\305o~p\304q\304r\304s_t\303u\264v\301w\302x\263y\363z\362{\343|\330}\234~\376,
diff --git a/fbterm/fbtermrc b/fbterm/fbtermrc
new file mode 100644
index 0000000000..f2ef3201a9
--- /dev/null
+++ b/fbterm/fbtermrc
@@ -0,0 +1,66 @@
+# Configuration for FbTerm
+
+# Lines starting with '#' are ignored.
+# Note that end-of-line comments are NOT supported, comments must be on a line of their own.
+
+
+# font family names/pixelsize used by fbterm, multiple font family names must be seperated by ','
+# and using a fixed width font as the first is strongly recommended
+font-names=xos4 Terminus
+font-size=12
+
+# force font width (and/or height), usually for non-fixed width fonts
+# legal value format: n (fw_new = n), +n (fw_new = fw_old + n), -n (fw_new = fw_old - n)
+#font-width=
+#font-height=
+
+# terminal palette consists of 256 colors (0-255)
+# 0 = black, 1 = red, 2 = green, 3 = brown, 4 = blue, 5 = magenta, 6 = cyan, 7 = white
+# 8-15 are brighter versions of 0-7
+# 16-231 is 6x6x6 color cube
+# 232-255 is grayscale
+color-0=000000
+color-1=AA0000
+color-2=00AA00
+color-3=AA5500
+color-4=0000AA
+color-5=AA00AA
+color-6=00AAAA
+color-7=AAAAAA
+color-8=555555
+color-9=FF5555
+color-10=55FF55
+color-11=FFFF55
+color-12=5555FF
+color-13=FF55FF
+color-14=55FFFF
+color-15=FFFFFF
+
+# default foreground/background colors (chosen from palette)
+color-foreground=7
+color-background=0
+
+# max scroll-back history lines of every window, value must be [0 - 65535], 0 means disable it
+history-lines=0
+
+# up to 5 additional text encodings, multiple encodings must be seperated by ','
+# run 'iconv --list' to get available encodings.
+text-encodings=
+
+# cursor shape: 0 = underline, 1 = block
+# cursor flash interval in milliseconds, 0 means disable flashing
+cursor-shape=0
+cursor-interval=500
+
+# additional ascii chars considered as part of a word while auto-selecting text, except ' ', 0-9, a-z, A-Z
+word-chars=._-
+
+# change the clockwise orientation angle of screen display
+# available values: 0 = 0 degree, 1 = 90 degrees, 2 = 180 degrees, 3 = 270 degrees
+screen-rotate=0
+
+# specify the favorite input method program to run
+input-method=
+
+# treat ambiguous width characters as wide
+#ambiguous-wide=yes
diff --git a/fbterm/fix_ftbfs_crosscompile.patch b/fbterm/fix_ftbfs_crosscompile.patch
new file mode 100644
index 0000000000..0381834d20
--- /dev/null
+++ b/fbterm/fix_ftbfs_crosscompile.patch
@@ -0,0 +1,28 @@
+Description: fbterm FTCBFS: falls back to broken select code
+Forwarded: not yet
+Bug-Debian: https://bugs.debian.org/909679
+Author: Helmut Grohne <helmut@subdivi.de>
+
+--- fbterm-1.7.orig/configure.ac
++++ fbterm-1.7/configure.ac
+@@ -83,16 +83,16 @@
+ 	fi
+ fi
+ 
+-if test x"$EPOLL" = xauto -a x"$cross_compiling" = xno; then
+-  AC_RUN_IFELSE(
++if test x"$EPOLL" = xauto; then
++  AC_COMPILE_IFELSE(
+     AC_LANG_PROGRAM([[#include <sys/epoll.h>]],
+       [[if (epoll_create(10) >= 0) return 0; return 1;]]),
+     [EPOLL=yes]
+   )
+ fi
+ 
+-if test x"$SIGNALFD" = xauto -a x"$cross_compiling" = xno; then
+-  AC_RUN_IFELSE(
++if test x"$SIGNALFD" = xauto; then
++  AC_COMPILE_IFELSE(
+     AC_LANG_PROGRAM([[#include <sys/signalfd.h>]],
+       [[sigset_t mask; if (signalfd(-1, &mask, 0) >= 0) return 0; return 1;]]),
+     [SIGNALFD=yes]
diff --git a/fbterm/fix_ftbfs_epoll.patch b/fbterm/fix_ftbfs_epoll.patch
new file mode 100644
index 0000000000..0a0bff2cc1
--- /dev/null
+++ b/fbterm/fix_ftbfs_epoll.patch
@@ -0,0 +1,15 @@
+Description: fbterms FTBFS for architectures without epoll support
+Forwarded: not yet
+Bug-Debian: https://bugs.debian.org/909680
+Author: Helmut Grohne <helmut@subdivi.de>
+
+--- fbterm-1.7.orig/src/fbio.cpp
++++ fbterm-1.7/src/fbio.cpp
+@@ -30,6 +30,7 @@
+ #define NR_EPOLL_FDS 10
+ s32 epollFd;
+ #else
++#include <sys/select.h>
+ static fd_set fds;
+ static u32 maxfd = 0;
+ #endif
diff --git a/fcitx/PKGBUILD b/fcitx/PKGBUILD
index 32fa376003..dadea2c1cb 100644
--- a/fcitx/PKGBUILD
+++ b/fcitx/PKGBUILD
@@ -29,13 +29,13 @@ sha512sums=('d871df84ebb3514b6474000b693246c8e5b198121e9a5a0ca18d478e54cd4144f8d
 validpgpkeys=('2CC8A0609AD2A479C65B6D5C8E8B898CBF2412F9')  # Weng Xuetian
 
 prepare() {
-  mkdir build
+  mkdir -p _build
 
   sed -e 's|enchant/enchant.h|enchant-2/enchant.h|' -i $pkgname-$pkgver/cmake/FindEnchant.cmake
 }
 
 build() {
-  cd build
+  cd _build
 
   cmake ../$pkgname-$pkgver \
     -DCMAKE_INSTALL_PREFIX=/usr \
@@ -53,11 +53,11 @@ build() {
 }
 
 check() {
-  cd build
+  cd _build
   make test
 }
 
 package() {
-  cd build
+  cd _build
   make DESTDIR="$pkgdir" install
 }
diff --git a/fcitx5-chinese-addons/PKGBUILD b/fcitx5-chinese-addons/PKGBUILD
index 116c9506af..01eb6db0d7 100644
--- a/fcitx5-chinese-addons/PKGBUILD
+++ b/fcitx5-chinese-addons/PKGBUILD
@@ -9,7 +9,7 @@ arch=('loong64' 'x86_64')
 url="https://github.com/fcitx/fcitx5-chinese-addons"
 license=('GPL')
 conflicts=('fcitx')
-depends=('curl' 'fcitx5-qt' 'libime' 'opencc' 'qt5-webengine')
+depends=('curl' 'fcitx5-qt' 'libime' 'opencc' 'qt5-webkit') #'qt5-webengine')
 makedepends=('boost' 'extra-cmake-modules' 'fcitx5-lua' 'fmt' 'ninja')
 optdepends=('fcitx5-lua: Lua and imeapi support from pinyin')
 source=("https://download.fcitx-im.org/fcitx5/$pkgname/$pkgname-${pkgver}_dict.tar.xz"{,.sig})
@@ -20,7 +20,7 @@ validpgpkeys=('2CC8A0609AD2A479C65B6D5C8E8B898CBF2412F9') # Weng Xuetian <wengxt
 build() {
   cd $pkgname-$pkgver
 
-  cmake -GNinja -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_INSTALL_LIBDIR=/usr/lib -DUSE_WEBKIT=Off .
+  cmake -GNinja -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_INSTALL_LIBDIR=/usr/lib -DUSE_WEBKIT=On .
   ninja
 }
 
diff --git a/fcitx5-m17n/PKGBUILD b/fcitx5-m17n/PKGBUILD
index 89d6cd7713..aa31cd6eec 100644
--- a/fcitx5-m17n/PKGBUILD
+++ b/fcitx5-m17n/PKGBUILD
@@ -9,7 +9,7 @@ arch=('loong64' 'x86_64')
 url="https://github.com/fcitx/fcitx5-m17n"
 license=('GPL')
 depends=('fcitx5' 'm17n-lib')
-makedepends=('extra-cmake-modules')
+makedepends=('extra-cmake-modules' 'fmt')
 source=("https://download.fcitx-im.org/fcitx5/$pkgname/$pkgname-$pkgver.tar.xz"{,.sig})
 sha512sums=('a28a170ba62edc9b6e225f5ff6d76c8d7333006429a6d496abd2b7b49a9f82e01a2ebfdd670609c96529bbee36a01fb1937abfab745f1d3f3a4f196f3b9fc3df'
             'SKIP')
diff --git a/ffcall/PKGBUILD b/ffcall/PKGBUILD
index cf40a24749..25ddc53471 100644
--- a/ffcall/PKGBUILD
+++ b/ffcall/PKGBUILD
@@ -10,12 +10,20 @@ url="https://www.gnu.org/software/libffcall/"
 license=('GPL2')
 depends=('glibc')
 options=('!makeflags' 'staticlibs')
-source=(https://ftp.gnu.org/gnu/libffcall/libffcall-${pkgver}.tar.gz)
-sha256sums=('8ef69921dbdc06bc5bb90513622637a7b83a71f31f5ba377be9d8fd8f57912c2')
+source=(https://ftp.gnu.org/gnu/libffcall/libffcall-${pkgver}.tar.gz
+    libffcall-la64-2.4.patch)
+sha256sums=('8ef69921dbdc06bc5bb90513622637a7b83a71f31f5ba377be9d8fd8f57912c2'
+            '155a2507fbf72773c25e5c2b8d1bf57597c44d230347329d8858341c4cdb13fe')
+
+prepare() {
+    cd "${srcdir}/lib${pkgname}-${pkgver}"
+    patch -p1 -i "$srcdir/libffcall-la64-2.4.patch"
+}
 
 build() {
   cd "${srcdir}/lib${pkgname}-${pkgver}"
   [ "$CARCH" = "x86_64" ] && CONFIGFLAG="--with-pic"
+  [ "$CARCH" = "loong64" ] && CONFIGFLAG="--with-pic"
   CFLAGS+=" -ffat-lto-objects" \
   ./configure --prefix=/usr --mandir=/usr/share/man $CONFIGFLAG
   make
diff --git a/ffcall/libffcall-la64-2.4.patch b/ffcall/libffcall-la64-2.4.patch
new file mode 100644
index 0000000000..9e0f85ec95
--- /dev/null
+++ b/ffcall/libffcall-la64-2.4.patch
@@ -0,0 +1,3304 @@
+diff --git a/ChangeLog b/ChangeLog
+index eda04ef..f932b24 100644
+--- a/ChangeLog
++++ b/ChangeLog
+@@ -1,3 +1,120 @@
++2022-02-12  Bruno Haible  <bruno@clisp.org>
++
++	loongarch: Fix gcc installation.
++	* cross-tools/cross.conf (loongarch64): Change gcc version to match what
++	it actually reports.
++
++2022-02-12  Bruno Haible  <bruno@clisp.org>
++
++	Fix cross-tools build.
++	* cross-tools/cross-build.sh (func_build_gcc): Set LD_LIBRARY_PATH, so
++	that libisl.so.15 gets found during installation.
++
++2022-02-12  Bruno Haible  <bruno@clisp.org>
++
++	loongarch: Allow for continuous integration.
++	* cross-tools/cross-build.sh (func_build_gcc): Download fork tarball
++	from alpha.gnu.org. Adjust installation completeness test.
++	* cross-tools/cross.conf (loongarch64): Use binutils 2.38.
++
++2022-01-16  Bruno Haible  <bruno@clisp.org>
++
++	loongarch: Add support for loongarch64 ABI.
++	* cross-tools/cross-build.sh (func_build_binutils): Add support for
++	newer binutils snapshots.
++	(func_build_gcc): Add support for GCC 10 and newer.
++	* cross-tools/cross.conf: Add configuration for loongarch64 cross tools.
++	* porting-tools/abis/call-used-registers.txt: Add info about
++	loongarch64.
++	* porting-tools/abis/reg-struct-return.txt: Likewise.
++	* porting-tools/abis/stack-frame.txt: Likewise. Some more tweaks.
++	* porting-tools/execstack/voidfunc.c: Add command for loongarch64.
++	* porting-tools/execstack/voidfunc-loongarch64.o: New generated file.
++	* porting-tools/execstack/main.c (voidfunc): Define also for loongarch.
++	* porting-tools/execstack/README: Add info about loongarch64.
++	* ffcall-abi.h: Add support for loongarch64-lp64.
++	* common/asm-loongarch.sh: New file.
++	* Makefile.in (SOURCE_FILES): Add it.
++	* avcall/avcall.h (__AV_STRUCT_RETURN, __AV_REGISTER_STRUCT_RETURN): Add
++	code for __loongarch64__.
++	* avcall/avcall-alist.h (__av_alist): Likewise.
++	* avcall/avcall-internal.h: Add code for __loongarch64__, especially
++	__av_start1, __av_reg_struct_return, __av_start_struct4, __av_word,
++	__av_long, __av_ulong, __av_ptr, __av_longlong, __av_ulonglong,
++	_av_float, _av_double, __av_struct.
++	* avcall/avcall-loongarch64.c: New file, based on
++	avcall/avcall-riscv64.c.
++	* avcall/Makefile.devel (avcall-loongarch64-linux.s,
++	avcall-loongarch64-macro.S): New targets.
++	* avcall/Makefile.in (avcall-loongarch64.lo, avcall-loongarch64.s): New
++	targets.
++	(clean): Remove avcall-loongarch64.s.
++	(SOURCE_FILES): Add avcall-loongarch64.c, avcall-loongarch64-linux.s,
++	avcall-loongarch64-macro.S.
++	* vacall/vacall.h (__VA_STRUCT_RETURN, __VA_REGISTER_STRUCT_RETURN): Add
++	code for __loongarch64__.
++	* vacall/vacall-internal.h: Add code for __loongarch64__, especially
++	__va_alist, __va_reg_struct_return, __va_start_struct2,
++	__va_arg_leftadjusted, __va_arg_adjusted, _va_arg_longlong,
++	_va_arg_ulonglong, __va_align_double, _va_arg_float, _va_arg_double,
++	__va_arg_struct, _va_return_longlong.
++	* vacall/vacall-loongarch64.c: New file, based on
++	vacall/vacall-riscv64.c.
++	* vacall/Makefile.devel (vacall-loongarch64-linux.s,
++	vacall-loongarch64-macro.S): New targets.
++	* vacall/Makefile.in (vacall-loongarch64.@OBJEXT@,
++	vacall-loongarch64.s): New targets.
++	(clean): Remove vacall-loongarch64.s.
++	(SOURCE_FILES): Add vacall-loongarch64.c, vacall-loongarch64-linux.s,
++	vacall-loongarch64-macro.S.
++	* callback/vacall_r/vacall_r.h (__VA_STRUCT_RETURN,
++	__VA_REGISTER_STRUCT_RETURN): Add code for __loongarch64__.
++	* callback/vacall_r/Makefile.devel (vacall-loongarch64-linux.s,
++	vacall-loongarch64-macro.S): New targets.
++	* callback/vacall_r/Makefile.in (vacall-loongarch64.lo,
++	vacall-loongarch64.s): New targets.
++	(clean): Remove vacall-loongarch64.s.
++	(SOURCE_FILES): Add vacall-loongarch64-linux.s,
++	vacall-loongarch64-macro.S.
++	* trampoline/Makefile.devel (proto-loongarch64.s, tramp-loongarch64.o):
++	New targets.
++	* trampoline/proto-loongarch64.s: New generated file.
++	* trampoline/tramp-loongarch64.s: New file.
++	* trampoline/tramp-loongarch64.o: New generated file.
++	* trampoline/trampoline.c: Implement for __loongarch64__.
++	* callback/trampoline_r/Makefile.devel (proto-loongarch64.s,
++	tramp-loongarch64.o): New targets.
++	* callback/trampoline_r/proto64.c: Add code for __loongarch64__.
++	* callback/trampoline_r/proto-loongarch64.s: New generated file.
++	* callback/trampoline_r/tramp-loongarch64.s: New file.
++	* callback/trampoline_r/tramp-loongarch64.o: New generated file.
++	* callback/trampoline_r/trampoline.c: Implement for __loongarch64__.
++	* callback/trampoline_r/test1.c: Add support for __loongarch64__.
++	* PLATFORMS, */PLATFORMS: List the 64-bit LoongArch ABI.
++	* NEWS: Mention the new port.
++
++2022-01-16  Bruno Haible  <bruno@clisp.org>
++
++	Simplify.
++	* callback/vacall_r/Makefile.in (vacall-alpha.s, vacall-powerpc.s,
++	vacall-s390.lo, vacall-s390x.s, vacall-riscv32-ilp32d.s,
++	vacall-riscv64-lp64d.s): Don't use -I options during preprocessing.
++
++2021-06-26  Bruno Haible  <bruno@clisp.org>
++
++	x86_64: Create a read-only .eh_frame section on all platforms.
++	Reported by Thomas Klausner <tk@giga.or.at> at
++	<https://savannah.gnu.org/bugs/?60815>.
++	* common/asm-x86_64.h (EH_FRAME_SECTION): Use flags "a" (instead of
++	"aw") on all platforms.
++
++2021-06-13  Bruno Haible  <bruno@clisp.org>
++
++	maint: Don't require an internet connection for running autogen.sh.
++	* libtool-patches: New directory.
++	* Makefile.maint (libtool-imported-files): Don't call wget. Take the
++	patches from libtool-patches/ instead.
++
+ 2021-06-13  Bruno Haible  <bruno@clisp.org>
+ 
+ 	Prepare for 2.4 release.
+diff --git a/Makefile.in b/Makefile.in
+index 05a83dd..2a74ecc 100644
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -151,6 +151,7 @@ SOURCE_FILES = \
+   common/asm-hppa.sh common/asm-hppa.h \
+   common/asm-hppa64.sh common/asm-hppa64.h \
+   common/asm-i386.sh common/asm-i386.h \
++  common/asm-loongarch.sh \
+   common/asm-m68k.sh common/asm-m68k.h \
+   common/asm-mips.sh common/asm-mips.h \
+   common/asm-powerpc.sh \
+diff --git a/NEWS b/NEWS
+index 5911682..af1dc48 100644
+--- a/NEWS
++++ b/NEWS
+@@ -1,3 +1,9 @@
++New in 2.5:
++
++* Added support for the following platforms:
++  (Previously, a build on these platforms failed.)
++  - loongarch64: Linux with lp64d ABI.
++
+ New in 2.4:
+ 
+ * Added support for the following platforms:
+diff --git a/PLATFORMS b/PLATFORMS
+index b7dc0c8..ca5a53c 100644
+--- a/PLATFORMS
++++ b/PLATFORMS
+@@ -49,4 +49,5 @@ Supported CPUs:  (Put the GNU config.guess values here.)
+      s390x      s390x-ibm-linux (gcc)
+      riscv32    riscv32-unknown-linux (gcc -mabi=ilp32d)
+      riscv64    riscv64-unknown-linux (gcc -mabi=lp64d)
++     loongarch64  loongarch64-unknown-linux (gcc)
+ 
+diff --git a/avcall/Makefile.devel b/avcall/Makefile.devel
+index db392b6..2933060 100644
+--- a/avcall/Makefile.devel
++++ b/avcall/Makefile.devel
+@@ -25,7 +25,8 @@ precompiled : \
+   avcall-ia64-macro.S \
+   avcall-x86_64-macro.S avcall-x86_64-x32-linux.s avcall-x86_64-windows-macro.S \
+   avcall-s390-macro.S avcall-s390x-macro.S \
+-  avcall-riscv32-ilp32d-macro.S avcall-riscv64-lp64d-macro.S
++  avcall-riscv32-ilp32d-macro.S avcall-riscv64-lp64d-macro.S \
++  avcall-loongarch64-macro.S
+ 
+ 
+ avcall-i386-linux.s : avcall-i386.c avcall-internal.h avcall.h avcall-alist.h $(THISFILE)
+@@ -242,6 +243,13 @@ avcall-riscv64-lp64d-macro.S : avcall-riscv64-lp64d-linux.s ../common/asm-riscv.
+ 	(../common/asm-riscv.sh < avcall-riscv64-lp64d-linux.s ; cat ../common/noexecstack.h) > avcall-riscv64-lp64d-macro.S
+ 
+ 
++avcall-loongarch64-linux.s : avcall-loongarch64.c avcall-internal.h avcall.h avcall-alist.h $(THISFILE)
++	$(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S avcall-loongarch64.c -o avcall-loongarch64-linux.s
++
++avcall-loongarch64-macro.S : avcall-loongarch64-linux.s ../common/asm-loongarch.sh ../common/noexecstack.h $(THISFILE)
++	(../common/asm-loongarch.sh < avcall-loongarch64-linux.s ; cat ../common/noexecstack.h) > avcall-loongarch64-macro.S
++
++
+ # --------------- Rules for debugging test failures ---------------
+ 
+ tests : tests-i386.s tests-m68k.s tests-mips.s tests-sparc.s tests-alpha.s tests-hppa.s tests-arm.s tests-powerpc.s tests-ia64.s tests-x86_64.s
+diff --git a/avcall/Makefile.in b/avcall/Makefile.in
+index 466023d..b78ba78 100644
+--- a/avcall/Makefile.in
++++ b/avcall/Makefile.in
+@@ -258,6 +258,12 @@ avcall-riscv64-lp64d.lo : avcall-riscv64-lp64d.s
+ avcall-riscv64-lp64d.s : $(srcdir)/avcall-riscv64-lp64d-macro.S
+ 	$(CPP) $(ASPFLAGS) $(srcdir)/avcall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > avcall-riscv64-lp64d.s
+ 
++avcall-loongarch64.lo : avcall-loongarch64.s
++	$(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c avcall-loongarch64.s
++
++avcall-loongarch64.s : $(srcdir)/avcall-loongarch64-macro.S
++	$(CPP) $(ASPFLAGS) $(srcdir)/avcall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > avcall-loongarch64.s
++
+ avcall-libapi.lo : $(srcdir)/avcall-libapi.c ../config.h $(srcdir)/avcall-internal.h $(srcdir)/avcall.h $(srcdir)/avcall-alist.h
+ 	$(LIBTOOL_COMPILE) $(CC) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -c $(srcdir)/avcall-libapi.c
+ 
+@@ -353,7 +359,7 @@ mostlyclean : clean
+ 
+ clean : force
+ 	$(RM) *.@OBJEXT@ *.lo *.a libavcall.* core
+-	$(RM) avcall-i386.s avcall-sparc.s avcall-sparc64.s avcall-m68k.s avcall-mips.s avcall-mipsn32.s avcall-mips64.s avcall-alpha.s avcall-hppa.s avcall-hppa64.s avcall-arm.s avcall-armhf.s avcall-arm64.s avcall-powerpc.s avcall-powerpc64.s avcall-powerpc64-elfv2.s avcall-ia64.s avcall-x86_64.s avcall-x86_64.asm avcall-x86_64-x32.s avcall-s390.s avcall-s390x.s avcall-riscv32-ilp32d.s avcall-riscv64-lp64d.s
++	$(RM) avcall-i386.s avcall-sparc.s avcall-sparc64.s avcall-m68k.s avcall-mips.s avcall-mipsn32.s avcall-mips64.s avcall-alpha.s avcall-hppa.s avcall-hppa64.s avcall-arm.s avcall-armhf.s avcall-arm64.s avcall-powerpc.s avcall-powerpc64.s avcall-powerpc64-elfv2.s avcall-ia64.s avcall-x86_64.s avcall-x86_64.asm avcall-x86_64-x32.s avcall-s390.s avcall-s390x.s avcall-riscv32-ilp32d.s avcall-riscv64-lp64d.s avcall-loongarch64.s
+ 	$(RM) -r .libs _libs
+ 	$(RM) minitests.@OBJEXT@ minitests.s minitests minitests.out
+ 	$(RM) minitests-c++.@OBJEXT@ minitests-c++ minitests-c++.out
+@@ -381,6 +387,7 @@ SOURCE_FILES = \
+   avcall-hppa64.c avcall-hppa64-linux.s avcall-hppa64-macro.S \
+   avcall-i386.c avcall-i386-linux.s avcall-i386-macro.S \
+   avcall-ia64.c avcall-ia64-linux.s avcall-ia64-macro.S \
++  avcall-loongarch64.c avcall-loongarch64-linux.s avcall-loongarch64-macro.S \
+   avcall-m68k.c avcall-m68k-linux.s avcall-m68k-sun.s avcall-m68k.mit.S avcall-m68k.motorola.S \
+   avcall-mips.c avcall-mipseb-linux.s avcall-mipsel-linux.s avcall-mipseb-macro.S avcall-mipsel-macro.S \
+   avcall-mipsn32.c avcall-mipsn32eb-linux.s avcall-mipsn32el-linux.s avcall-mipsn32eb-macro.S avcall-mipsn32el-macro.S \
+diff --git a/avcall/PLATFORMS b/avcall/PLATFORMS
+index a823a51..5fcb26a 100644
+--- a/avcall/PLATFORMS
++++ b/avcall/PLATFORMS
+@@ -78,4 +78,5 @@ Supported CPUs:  (Put the GNU config.guess values here.)
+    s390x      s390x-ibm-linux (gcc)                  emulated      Linux
+    riscv32    riscv32-unknown-linux (gcc -mabi=ilp32d)  emulated   Linux
+    riscv64    riscv64-unknown-linux (gcc -mabi=lp64d)   emulated   Linux
++   loongarch64  loongarch64-unknown-linux (gcc)      Loongson      Linux
+ 
+diff --git a/avcall/avcall-alist.h b/avcall/avcall-alist.h
+index 797f730..0574055 100644
+--- a/avcall/avcall-alist.h
++++ b/avcall/avcall-alist.h
+@@ -1,6 +1,6 @@
+ /*
+  * Copyright 1993-1995 Bill Triggs <Bill.Triggs@inrialpes.fr>
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -197,7 +197,7 @@ typedef struct
+   float			fargs[__AV_FARG_NUM];
+   double		dargs[__AV_FARG_NUM];
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __AV_FARG_NUM 8
+   /* store the floating-point arguments in an extra array */
+   unsigned int		fanum;		/* number of fargs[] words that are occupied so far */
+diff --git a/avcall/avcall-internal.h b/avcall/avcall-internal.h
+index c878134..72de32e 100644
+--- a/avcall/avcall-internal.h
++++ b/avcall/avcall-internal.h
+@@ -1,6 +1,6 @@
+ /*
+  * Copyright 1993-1995 Bill Triggs <Bill.Triggs@inrialpes.fr>
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -111,7 +111,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+    (LIST).farg_mask = 0,						\
+    (LIST).darg_mask = 0,
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __av_start1(LIST,LIST_ARGS_END)					\
+    (LIST).aptr = &(LIST).args[0],					\
+    (LIST).fanum = 0,							\
+@@ -262,7 +262,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+ #define __av_start_struct3(LIST)  \
+   ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
+ #endif
+-#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__)
++#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+   ((TYPE_SIZE) <= 16)
+ /* Turn on __AV_REGISTER_STRUCT_RETURN if __AV_SMALL_STRUCT_RETURN was set
+@@ -287,7 +287,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+ #endif
+ /* Return structure pointer is passed as first arg.
+  */
+-#if defined(__i386__) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__i386__) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __av_start_struct4(LIST,TYPE_SIZE)				\
+   (*(LIST).aptr++ = (__avword)((LIST).raddr), 0)
+ #endif
+@@ -330,7 +330,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+  * scalar argument types
+  */
+ 
+-#if defined(__i386__) || defined(__m68k__) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || defined(__hppa64__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || defined(__hppa64__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* Floats and all integer types are passed as words,
+  * doubles as two words (on 32-bit platforms) or one word (on 64-bit platforms).
+  */
+@@ -465,7 +465,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+ #define __av_ptr(LIST,VAL)	__av_word(LIST,VAL)
+ #endif
+ 
+-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__AV_LLP64)) || defined(__s390x__) || defined(__riscv64__)
++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__AV_LLP64)) || defined(__s390x__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* ‘long long’ and ‘long’ are identical. */
+ #define __av_longlong		__av_long
+ #define __av_ulonglong		__av_ulong
+@@ -833,7 +833,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist)
+ 
+ #endif
+ 
+-#if defined(__arm64__) || defined(__riscv64__)
++#if defined(__arm64__) || defined(__riscv64__) || defined(__loongarch64__)
+ 
+ /* Up to __AV_FARG_NUM float or double args can be passed in float registers.
+    The remaining float or double args are passed in the general-purpose
+@@ -1535,7 +1535,7 @@ extern void avcall_structcpy (void* dest, const void* src, unsigned long size, u
+           (LIST).aptr[-1] = (__avword)(LIST).eptr,			\
+           0))))
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* Structures <= 16 bytes are passed as embedded copies on the arg stack.
+  * Big structures are passed as pointers to caller-made local copies.
+  */
+diff --git a/avcall/avcall-loongarch64-linux.s b/avcall/avcall-loongarch64-linux.s
+new file mode 100644
+index 0000000..2d3a0d2
+--- /dev/null
++++ b/avcall/avcall-loongarch64-linux.s
+@@ -0,0 +1,344 @@
++	.file	"avcall-loongarch64.c"
++	.text
++	.align	2
++	.globl	avcall_call
++	.type	avcall_call, @function
++avcall_call:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-48
++	.cfi_def_cfa_offset 48
++	st.d	$fp,$sp,32
++	st.d	$s0,$sp,24
++	st.d	$s1,$sp,16
++	st.d	$s2,$sp,8
++	stptr.d	$s3,$sp,0
++	st.d	$ra,$sp,40
++	.cfi_offset 22, -16
++	.cfi_offset 23, -24
++	.cfi_offset 24, -32
++	.cfi_offset 25, -40
++	.cfi_offset 26, -48
++	.cfi_offset 1, -8
++	addi.d	$fp,$sp,48
++	.cfi_def_cfa 22, 0
++	ld.d	$s1,$a0,48
++	ld.d	$t0,$a0,40
++	addi.w	$t1,$r0,8			# 0x8
++	ldptr.w	$s3,$a0,64
++	sub.d	$t0,$t0,$s1
++	srai.d	$t0,$t0,3
++	slli.w	$s2,$t0,0
++	or	$s0,$a0,$r0
++	addi.d	$sp,$sp,-2048
++	ble	$s2,$t1,.L72
++	addi.w	$a2,$t0,-9
++	or	$t2,$sp,$r0
++	bstrpick.d	$a2,$a2,31,0
++	alsl.d	$a2,$a2,$zero,3
++	addi.d	$a2,$a2,8
++	addi.d	$a1,$s1,64
++	or	$a0,$t2,$r0
++	bl	%plt(memcpy)
++	ldptr.d	$a0,$s1,0
++.L5:
++	addi.w	$t0,$r0,2			# 0x2
++	ld.d	$a1,$s1,8
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,3			# 0x3
++	ld.d	$a2,$s1,16
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,4			# 0x4
++	ld.d	$a3,$s1,24
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,5			# 0x5
++	ld.d	$a4,$s1,32
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,6			# 0x6
++	ld.d	$a5,$s1,40
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,7			# 0x7
++	ld.d	$a6,$s1,48
++	beq	$s2,$t0,.L6
++	ld.d	$a7,$s1,56
++.L6:
++	beqz	$s3,.L8
++	ldptr.w	$t0,$s0,72
++	andi	$t1,$t0,1
++	beqz	$t1,.L9
++	fld.d	$f0,$s0,112
++.L10:
++	addi.w	$t1,$r0,1			# 0x1
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,2
++	bnez	$t1,.L73
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,2
++	beqz	$t1,.L13
++	fld.s	$f1,$s0,80
++.L13:
++	addi.w	$t1,$r0,2			# 0x2
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,4
++	beqz	$t1,.L14
++	fld.d	$f2,$s0,128
++.L15:
++	addi.w	$t1,$r0,3			# 0x3
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,8
++	beqz	$t1,.L16
++	fld.d	$f3,$s0,136
++.L17:
++	addi.w	$t1,$r0,4			# 0x4
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,16
++	bnez	$t1,.L74
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,16
++	beqz	$t1,.L19
++	fld.s	$f4,$s0,92
++.L19:
++	addi.w	$t1,$r0,5			# 0x5
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,32
++	beqz	$t1,.L20
++	fld.d	$f5,$s0,152
++.L21:
++	addi.w	$t1,$r0,6			# 0x6
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,64
++	beqz	$t1,.L22
++	fld.d	$f6,$s0,160
++.L23:
++	addi.w	$t1,$r0,7			# 0x7
++	beq	$s3,$t1,.L8
++	andi	$t0,$t0,128
++	beqz	$t0,.L24
++	fld.d	$f7,$s0,168
++.L8:
++	ldptr.w	$t0,$s0,24
++	addi.w	$t1,$r0,13			# 0xd
++	ld.d	$t2,$s0,8
++	beq	$t0,$t1,.L75
++	addi.w	$t1,$r0,14			# 0xe
++	beq	$t0,$t1,.L76
++	jirl	$ra,$t2,0
++	ldptr.w	$t0,$s0,24
++	addi.w	$t1,$r0,1			# 0x1
++	beq	$t0,$t1,.L26
++	addi.w	$t1,$r0,16			# 0x10
++	bgtu	$t0,$t1,.L26
++	la.local	$t1,.L29
++	slli.d	$t0,$t0,3
++	ldx.d	$t0,$t1,$t0
++	add.d	$t1,$t1,$t0
++	jr	$t1
++	.section	.rodata
++	.align	3
++	.align	2
++.L29:
++	.dword	.L26-.L29
++	.dword	.L26-.L29
++	.dword	.L37-.L29
++	.dword	.L37-.L29
++	.dword	.L37-.L29
++	.dword	.L35-.L29
++	.dword	.L35-.L29
++	.dword	.L33-.L29
++	.dword	.L33-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L26-.L29
++	.dword	.L26-.L29
++	.dword	.L30-.L29
++	.dword	.L28-.L29
++	.text
++.L9:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,1
++	beqz	$t1,.L10
++	fld.s	$f0,$s0,76
++	b	.L10
++.L76:
++	ld.d	$s0,$s0,16
++	jirl	$ra,$t2,0
++	fst.d	$f0,$s0,0
++.L26:
++	addi.d	$sp,$fp,-48
++	.cfi_remember_state
++	.cfi_def_cfa 3, 48
++	ld.d	$ra,$sp,40
++	.cfi_restore 1
++	ld.d	$fp,$sp,32
++	.cfi_restore 22
++	ld.d	$s0,$sp,24
++	.cfi_restore 23
++	ld.d	$s1,$sp,16
++	.cfi_restore 24
++	ld.d	$s2,$sp,8
++	.cfi_restore 25
++	ldptr.d	$s3,$sp,0
++	.cfi_restore 26
++	or	$a0,$zero,$r0
++	addi.d	$sp,$sp,48
++	.cfi_def_cfa_offset 0
++	jr	$ra
++.L72:
++	.cfi_restore_state
++	ble	$s2,$r0,.L6
++	addi.w	$t0,$r0,1			# 0x1
++	ldptr.d	$a0,$s1,0
++	bne	$s2,$t0,.L5
++	b	.L6
++.L75:
++	ld.d	$s0,$s0,16
++	jirl	$ra,$t2,0
++	fst.s	$f0,$s0,0
++	b	.L26
++.L73:
++	fld.d	$f1,$s0,120
++	b	.L13
++.L30:
++	ld.d	$t0,$s0,16
++	stptr.d	$a0,$t0,0
++	b	.L26
++.L37:
++	ld.d	$t0,$s0,16
++	st.b	$a0,$t0,0
++	b	.L26
++.L14:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,4
++	beqz	$t1,.L15
++	fld.s	$f2,$s0,84
++	b	.L15
++.L16:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,8
++	beqz	$t1,.L17
++	fld.s	$f3,$s0,88
++	b	.L17
++.L74:
++	fld.d	$f4,$s0,144
++	b	.L19
++.L35:
++	ld.d	$t0,$s0,16
++	st.h	$a0,$t0,0
++	b	.L26
++.L33:
++	ld.d	$t0,$s0,16
++	stptr.w	$a0,$t0,0
++	b	.L26
++.L28:
++	ldptr.w	$t0,$s0,0
++	andi	$t0,$t0,512
++	beqz	$t0,.L26
++	ld.d	$s1,$s0,32
++	addi.w	$t0,$r0,15			# 0xf
++	addi.d	$t1,$s1,-1
++	bgtu	$t1,$t0,.L26
++	ld.d	$t0,$s0,16
++	addi.w	$t4,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$t1,$r0,1			# 0x1
++	and	$t4,$t0,$t4
++	ldptr.d	$t5,$t4,0
++	andi	$t0,$t0,7
++	slli.w	$t8,$t0,3
++	add.d	$s2,$s1,$t0
++	sll.d	$t7,$a0,$t8
++	addi.w	$s0,$r0,8			# 0x8
++	sll.d	$t1,$t1,$t8
++	xor	$t7,$t7,$t5
++	slli.w	$t6,$s2,3
++	bgtu	$s1,$s0,.L40
++	bgtu	$s2,$s0,.L41
++	addi.w	$t6,$t6,-1
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t6
++	sub.d	$t0,$t0,$t1
++	and	$t0,$t0,$t7
++	xor	$t0,$t0,$t5
++	stptr.d	$t0,$t4,0
++	b	.L26
++.L20:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,32
++	beqz	$t1,.L21
++	fld.s	$f5,$s0,96
++	b	.L21
++.L22:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,64
++	beqz	$t1,.L23
++	fld.s	$f6,$s0,100
++	b	.L23
++.L24:
++	ldptr.w	$t0,$s0,68
++	andi	$t0,$t0,128
++	beqz	$t0,.L8
++	fld.s	$f7,$s0,104
++	b	.L8
++.L40:
++	sub.d	$t1,$zero,$t1
++	and	$t1,$t1,$t7
++	xor	$t1,$t1,$t5
++	stptr.d	$t1,$t4,0
++	addi.w	$t1,$r0,16			# 0x10
++	sub.d	$t0,$s0,$t0
++	sll.d	$t8,$a1,$t8
++	bgtu	$s2,$t1,.L42
++	slli.w	$t0,$t0,2
++	ld.d	$t3,$t4,8
++	sra.d	$t2,$a0,$t0
++	sra.d	$t0,$t2,$t0
++	addi.w	$t6,$t6,-65
++	addi.w	$t1,$r0,2			# 0x2
++	or	$t0,$t0,$t8
++	sll.d	$t1,$t1,$t6
++	xor	$t0,$t0,$t3
++	addi.d	$t1,$t1,-1
++	and	$t0,$t0,$t1
++	xor	$t0,$t0,$t3
++	st.d	$t0,$t4,8
++	b	.L26
++.L41:
++	ld.d	$t3,$t4,8
++	sub.d	$t0,$s0,$t0
++	slli.w	$t0,$t0,3
++	addi.w	$t6,$t6,-65
++	addi.w	$t2,$r0,2			# 0x2
++	sra.d	$t0,$a0,$t0
++	sll.d	$t2,$t2,$t6
++	sub.d	$t1,$zero,$t1
++	xor	$t0,$t0,$t3
++	addi.d	$t2,$t2,-1
++	and	$t1,$t1,$t7
++	and	$t0,$t0,$t2
++	xor	$t1,$t1,$t5
++	xor	$t0,$t0,$t3
++	stptr.d	$t1,$t4,0
++	st.d	$t0,$t4,8
++	b	.L26
++.L42:
++	ld.d	$t7,$t4,16
++	slli.w	$t0,$t0,3
++	addi.w	$t1,$t6,-129
++	addi.w	$t5,$r0,2			# 0x2
++	sll.d	$t5,$t5,$t1
++	sra.d	$t1,$a1,$t0
++	addi.d	$t3,$t5,-1
++	xor	$t1,$t1,$t7
++	sra.d	$t0,$a0,$t0
++	and	$t1,$t3,$t1
++	or	$t0,$t0,$t8
++	xor	$t1,$t1,$t7
++	st.d	$t0,$t4,8
++	st.d	$t1,$t4,16
++	b	.L26
++	.cfi_endproc
++.LFE0:
++	.size	avcall_call, .-avcall_call
++	.ident	"GCC: (GNU) 12.0.1 20220317 (experimental)"
++	.section	.note.GNU-stack,"",@progbits
+diff --git a/avcall/avcall-loongarch64-macro.S b/avcall/avcall-loongarch64-macro.S
+new file mode 100644
+index 0000000..ab5dc24
+--- /dev/null
++++ b/avcall/avcall-loongarch64-macro.S
+@@ -0,0 +1,345 @@
++	.file	"avcall-loongarch64.c"
++	.text
++	.align	2
++	.globl	avcall_call
++	.type	avcall_call, @function
++avcall_call:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-48
++	.cfi_def_cfa_offset 48
++	st.d	$fp,$sp,32
++	st.d	$s0,$sp,24
++	st.d	$s1,$sp,16
++	st.d	$s2,$sp,8
++	stptr.d	$s3,$sp,0
++	st.d	$ra,$sp,40
++	.cfi_offset 22, -16
++	.cfi_offset 23, -24
++	.cfi_offset 24, -32
++	.cfi_offset 25, -40
++	.cfi_offset 26, -48
++	.cfi_offset 1, -8
++	addi.d	$fp,$sp,48
++	.cfi_def_cfa 22, 0
++	ld.d	$s1,$a0,48
++	ld.d	$t0,$a0,40
++	addi.w	$t1,$r0,8			# 0x8
++	ldptr.w	$s3,$a0,64
++	sub.d	$t0,$t0,$s1
++	srai.d	$t0,$t0,3
++	slli.w	$s2,$t0,0
++	or	$s0,$a0,$r0
++	addi.d	$sp,$sp,-2048
++	ble	$s2,$t1,.L72
++	addi.w	$a2,$t0,-9
++	or	$t2,$sp,$r0
++	bstrpick.d	$a2,$a2,31,0
++	alsl.d	$a2,$a2,$zero,3
++	addi.d	$a2,$a2,8
++	addi.d	$a1,$s1,64
++	or	$a0,$t2,$r0
++	bl	%plt(memcpy)
++	ldptr.d	$a0,$s1,0
++.L5:
++	addi.w	$t0,$r0,2			# 0x2
++	ld.d	$a1,$s1,8
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,3			# 0x3
++	ld.d	$a2,$s1,16
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,4			# 0x4
++	ld.d	$a3,$s1,24
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,5			# 0x5
++	ld.d	$a4,$s1,32
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,6			# 0x6
++	ld.d	$a5,$s1,40
++	beq	$s2,$t0,.L6
++	addi.w	$t0,$r0,7			# 0x7
++	ld.d	$a6,$s1,48
++	beq	$s2,$t0,.L6
++	ld.d	$a7,$s1,56
++.L6:
++	beqz	$s3,.L8
++	ldptr.w	$t0,$s0,72
++	andi	$t1,$t0,1
++	beqz	$t1,.L9
++	fld.d	$f0,$s0,112
++.L10:
++	addi.w	$t1,$r0,1			# 0x1
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,2
++	bnez	$t1,.L73
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,2
++	beqz	$t1,.L13
++	fld.s	$f1,$s0,80
++.L13:
++	addi.w	$t1,$r0,2			# 0x2
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,4
++	beqz	$t1,.L14
++	fld.d	$f2,$s0,128
++.L15:
++	addi.w	$t1,$r0,3			# 0x3
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,8
++	beqz	$t1,.L16
++	fld.d	$f3,$s0,136
++.L17:
++	addi.w	$t1,$r0,4			# 0x4
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,16
++	bnez	$t1,.L74
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,16
++	beqz	$t1,.L19
++	fld.s	$f4,$s0,92
++.L19:
++	addi.w	$t1,$r0,5			# 0x5
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,32
++	beqz	$t1,.L20
++	fld.d	$f5,$s0,152
++.L21:
++	addi.w	$t1,$r0,6			# 0x6
++	beq	$s3,$t1,.L8
++	andi	$t1,$t0,64
++	beqz	$t1,.L22
++	fld.d	$f6,$s0,160
++.L23:
++	addi.w	$t1,$r0,7			# 0x7
++	beq	$s3,$t1,.L8
++	andi	$t0,$t0,128
++	beqz	$t0,.L24
++	fld.d	$f7,$s0,168
++.L8:
++	ldptr.w	$t0,$s0,24
++	addi.w	$t1,$r0,13			# 0xd
++	ld.d	$t2,$s0,8
++	beq	$t0,$t1,.L75
++	addi.w	$t1,$r0,14			# 0xe
++	beq	$t0,$t1,.L76
++	jirl	$ra,$t2,0
++	ldptr.w	$t0,$s0,24
++	addi.w	$t1,$r0,1			# 0x1
++	beq	$t0,$t1,.L26
++	addi.w	$t1,$r0,16			# 0x10
++	bgtu	$t0,$t1,.L26
++	la.local	$t1,.L29
++	slli.d	$t0,$t0,3
++	ldx.d	$t0,$t1,$t0
++	add.d	$t1,$t1,$t0
++	jr	$t1
++	.section	.rodata
++	.align	3
++	.align	2
++.L29:
++	.dword	.L26-.L29
++	.dword	.L26-.L29
++	.dword	.L37-.L29
++	.dword	.L37-.L29
++	.dword	.L37-.L29
++	.dword	.L35-.L29
++	.dword	.L35-.L29
++	.dword	.L33-.L29
++	.dword	.L33-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L30-.L29
++	.dword	.L26-.L29
++	.dword	.L26-.L29
++	.dword	.L30-.L29
++	.dword	.L28-.L29
++	.text
++.L9:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,1
++	beqz	$t1,.L10
++	fld.s	$f0,$s0,76
++	b	.L10
++.L76:
++	ld.d	$s0,$s0,16
++	jirl	$ra,$t2,0
++	fst.d	$f0,$s0,0
++.L26:
++	addi.d	$sp,$fp,-48
++	.cfi_remember_state
++	.cfi_def_cfa 3, 48
++	ld.d	$ra,$sp,40
++	.cfi_restore 1
++	ld.d	$fp,$sp,32
++	.cfi_restore 22
++	ld.d	$s0,$sp,24
++	.cfi_restore 23
++	ld.d	$s1,$sp,16
++	.cfi_restore 24
++	ld.d	$s2,$sp,8
++	.cfi_restore 25
++	ldptr.d	$s3,$sp,0
++	.cfi_restore 26
++	or	$a0,$zero,$r0
++	addi.d	$sp,$sp,48
++	.cfi_def_cfa_offset 0
++	jr	$ra
++.L72:
++	.cfi_restore_state
++	ble	$s2,$r0,.L6
++	addi.w	$t0,$r0,1			# 0x1
++	ldptr.d	$a0,$s1,0
++	bne	$s2,$t0,.L5
++	b	.L6
++.L75:
++	ld.d	$s0,$s0,16
++	jirl	$ra,$t2,0
++	fst.s	$f0,$s0,0
++	b	.L26
++.L73:
++	fld.d	$f1,$s0,120
++	b	.L13
++.L30:
++	ld.d	$t0,$s0,16
++	stptr.d	$a0,$t0,0
++	b	.L26
++.L37:
++	ld.d	$t0,$s0,16
++	st.b	$a0,$t0,0
++	b	.L26
++.L14:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,4
++	beqz	$t1,.L15
++	fld.s	$f2,$s0,84
++	b	.L15
++.L16:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,8
++	beqz	$t1,.L17
++	fld.s	$f3,$s0,88
++	b	.L17
++.L74:
++	fld.d	$f4,$s0,144
++	b	.L19
++.L35:
++	ld.d	$t0,$s0,16
++	st.h	$a0,$t0,0
++	b	.L26
++.L33:
++	ld.d	$t0,$s0,16
++	stptr.w	$a0,$t0,0
++	b	.L26
++.L28:
++	ldptr.w	$t0,$s0,0
++	andi	$t0,$t0,512
++	beqz	$t0,.L26
++	ld.d	$s1,$s0,32
++	addi.w	$t0,$r0,15			# 0xf
++	addi.d	$t1,$s1,-1
++	bgtu	$t1,$t0,.L26
++	ld.d	$t0,$s0,16
++	addi.w	$t4,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$t1,$r0,1			# 0x1
++	and	$t4,$t0,$t4
++	ldptr.d	$t5,$t4,0
++	andi	$t0,$t0,7
++	slli.w	$t8,$t0,3
++	add.d	$s2,$s1,$t0
++	sll.d	$t7,$a0,$t8
++	addi.w	$s0,$r0,8			# 0x8
++	sll.d	$t1,$t1,$t8
++	xor	$t7,$t7,$t5
++	slli.w	$t6,$s2,3
++	bgtu	$s1,$s0,.L40
++	bgtu	$s2,$s0,.L41
++	addi.w	$t6,$t6,-1
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t6
++	sub.d	$t0,$t0,$t1
++	and	$t0,$t0,$t7
++	xor	$t0,$t0,$t5
++	stptr.d	$t0,$t4,0
++	b	.L26
++.L20:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,32
++	beqz	$t1,.L21
++	fld.s	$f5,$s0,96
++	b	.L21
++.L22:
++	ldptr.w	$t1,$s0,68
++	andi	$t1,$t1,64
++	beqz	$t1,.L23
++	fld.s	$f6,$s0,100
++	b	.L23
++.L24:
++	ldptr.w	$t0,$s0,68
++	andi	$t0,$t0,128
++	beqz	$t0,.L8
++	fld.s	$f7,$s0,104
++	b	.L8
++.L40:
++	sub.d	$t1,$zero,$t1
++	and	$t1,$t1,$t7
++	xor	$t1,$t1,$t5
++	stptr.d	$t1,$t4,0
++	addi.w	$t1,$r0,16			# 0x10
++	sub.d	$t0,$s0,$t0
++	sll.d	$t8,$a1,$t8
++	bgtu	$s2,$t1,.L42
++	slli.w	$t0,$t0,2
++	ld.d	$t3,$t4,8
++	sra.d	$t2,$a0,$t0
++	sra.d	$t0,$t2,$t0
++	addi.w	$t6,$t6,-65
++	addi.w	$t1,$r0,2			# 0x2
++	or	$t0,$t0,$t8
++	sll.d	$t1,$t1,$t6
++	xor	$t0,$t0,$t3
++	addi.d	$t1,$t1,-1
++	and	$t0,$t0,$t1
++	xor	$t0,$t0,$t3
++	st.d	$t0,$t4,8
++	b	.L26
++.L41:
++	ld.d	$t3,$t4,8
++	sub.d	$t0,$s0,$t0
++	slli.w	$t0,$t0,3
++	addi.w	$t6,$t6,-65
++	addi.w	$t2,$r0,2			# 0x2
++	sra.d	$t0,$a0,$t0
++	sll.d	$t2,$t2,$t6
++	sub.d	$t1,$zero,$t1
++	xor	$t0,$t0,$t3
++	addi.d	$t2,$t2,-1
++	and	$t1,$t1,$t7
++	and	$t0,$t0,$t2
++	xor	$t1,$t1,$t5
++	xor	$t0,$t0,$t3
++	stptr.d	$t1,$t4,0
++	st.d	$t0,$t4,8
++	b	.L26
++.L42:
++	ld.d	$t7,$t4,16
++	slli.w	$t0,$t0,3
++	addi.w	$t1,$t6,-129
++	addi.w	$t5,$r0,2			# 0x2
++	sll.d	$t5,$t5,$t1
++	sra.d	$t1,$a1,$t0
++	addi.d	$t3,$t5,-1
++	xor	$t1,$t1,$t7
++	sra.d	$t0,$a0,$t0
++	and	$t1,$t3,$t1
++	or	$t0,$t0,$t8
++	xor	$t1,$t1,$t7
++	st.d	$t0,$t4,8
++	st.d	$t1,$t4,16
++	b	.L26
++	.cfi_endproc
++.LFE0:
++	.size	avcall_call, .-avcall_call
++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__
++	.section .note.GNU-stack,"",@progbits
++#endif
+diff --git a/avcall/avcall-loongarch64.c b/avcall/avcall-loongarch64.c
+new file mode 100644
+index 0000000..0e33033
+--- /dev/null
++++ b/avcall/avcall-loongarch64.c
+@@ -0,0 +1,358 @@
++/**
++  Copyright 1993 Bill Triggs <Bill.Triggs@inrialpes.fr>
++  Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
++
++  This program is free software: you can redistribute it and/or modify
++  it under the terms of the GNU General Public License as published by
++  the Free Software Foundation; either version 2 of the License, or
++  (at your option) any later version.
++
++  This program is distributed in the hope that it will be useful,
++  but WITHOUT ANY WARRANTY; without even the implied warranty of
++  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++  GNU General Public License for more details.
++
++  You should have received a copy of the GNU General Public License
++  along with this program.  If not, see <https://www.gnu.org/licenses/>.
++**/
++/*----------------------------------------------------------------------
++  Foreign function interface for LoongArch 64-bit CPU with LP64 ABI.
++
++  This calls a C function with an argument list built up using macros
++  defined in avcall.h.
++
++  LoongArch64 argument passing conventions:
++
++  Up to 8 words are passed in integer registers (r4, ..., r11); remaining
++  words are passed on the stack.
++  Up to 8 float/double arguments are passed in floating point registers
++  (f0, ..., f7), further float/double arguments are passed in integer
++  registers, remaining float/double arguments are passed on the stack.
++  Arguments passed on the stack have 8-bytes alignment.
++  Structure args <= 16 bytes are passed as up to two words in registers
++  (floating-point fields of structures with at most two fields in
++  floating-point registers, other fields in integer registers).
++  Structure args larger than 16 bytes are passed as pointers to caller-made
++  local copies.
++
++  Integers are returned in r4, r5.
++  Float/double values are returned in f0, f1.
++  Structures <= 16 bytes are returned in registers (floating-point fields
++  of structures with at most two fields in floating-point registers, other
++  fields in integer registers).
++  To return a structure larger than 16 bytes, the caller allocates the
++  space and passes a pointer to it as first argument (i.e. in r4).
++  ----------------------------------------------------------------------*/
++#include "avcall-internal.h"
++
++#define RETURN(TYPE,VAL)	(*(TYPE*)l->raddr = (TYPE)(VAL))
++
++register __avrword iarg1 __asm__("a0");
++register __avrword iarg2 __asm__("a1");
++register __avrword iarg3 __asm__("a2");
++register __avrword iarg4 __asm__("a3");
++register __avrword iarg5 __asm__("a4");
++register __avrword iarg6 __asm__("a5");
++register __avrword iarg7 __asm__("a6");
++register __avrword iarg8 __asm__("a7");
++
++register float farg1 __asm__("f0");
++register float farg2 __asm__("f1");
++register float farg3 __asm__("f2");
++register float farg4 __asm__("f3");
++register float farg5 __asm__("f4");
++register float farg6 __asm__("f5");
++register float farg7 __asm__("f6");
++register float farg8 __asm__("f7");
++
++register double darg1 __asm__("f0");
++register double darg2 __asm__("f1");
++register double darg3 __asm__("f2");
++register double darg4 __asm__("f3");
++register double darg5 __asm__("f4");
++register double darg6 __asm__("f5");
++register double darg7 __asm__("f6");
++register double darg8 __asm__("f7");
++
++int
++avcall_call(av_alist* list)
++{
++  register __avrword	iretreg	 __asm__("a0");
++  register __avrword	iret2reg __asm__("a1");
++  register double	dret	__asm__("f0");
++
++  __av_alist* l = &AV_LIST_INNER(list);
++
++  __avword* argframe = __builtin_alloca(__AV_ALIST_WORDS * sizeof(__avword)); /* make room for argument list */
++  int arglen = l->aptr - l->args;
++  unsigned int fanum = l->fanum;
++
++  {
++    int i;
++    for (i = 8; i < arglen; i++)	/* push function args onto stack */
++      argframe[i-8] = l->args[i];
++  }
++
++  /* Put up to 8 integer args into registers. */
++  if (arglen >= 1) {
++    iarg1 = l->args[0];
++    if (arglen >= 2) {
++      iarg2 = l->args[1];
++      if (arglen >= 3) {
++        iarg3 = l->args[2];
++        if (arglen >= 4) {
++          iarg4 = l->args[3];
++          if (arglen >= 5) {
++            iarg5 = l->args[4];
++            if (arglen >= 6) {
++              iarg6 = l->args[5];
++              if (arglen >= 7) {
++                iarg7 = l->args[6];
++                if (arglen >= 8) {
++                  iarg8 = l->args[7];
++                }
++              }
++            }
++          }
++        }
++      }
++    }
++  }
++
++  /* Put upto 8 floating-point args into registers. */
++  if (fanum >= 1) {
++    if (l->darg_mask & (1 << 0)) darg1 = l->dargs[0];
++    else if (l->farg_mask & (1 << 0)) farg1 = l->fargs[0];
++    if (fanum >= 2) {
++      if (l->darg_mask & (1 << 1)) darg2 = l->dargs[1];
++      else if (l->farg_mask & (1 << 1)) farg2 = l->fargs[1];
++      if (fanum >= 3) {
++        if (l->darg_mask & (1 << 2)) darg3 = l->dargs[2];
++        else if (l->farg_mask & (1 << 2)) farg3 = l->fargs[2];
++        if (fanum >= 4) {
++          if (l->darg_mask & (1 << 3)) darg4 = l->dargs[3];
++          else if (l->farg_mask & (1 << 3)) farg4 = l->fargs[3];
++          if (fanum >= 5) {
++            if (l->darg_mask & (1 << 4)) darg5 = l->dargs[4];
++            else if (l->farg_mask & (1 << 4)) farg5 = l->fargs[4];
++            if (fanum >= 6) {
++              if (l->darg_mask & (1 << 5)) darg6 = l->dargs[5];
++              else if (l->farg_mask & (1 << 5)) farg6 = l->fargs[5];
++              if (fanum >= 7) {
++                if (l->darg_mask & (1 << 6)) darg7 = l->dargs[6];
++                else if (l->farg_mask & (1 << 6)) farg7 = l->fargs[6];
++                if (fanum >= 8) {
++                  if (l->darg_mask & (1 << 7)) darg8 = l->dargs[7];
++                  else if (l->farg_mask & (1 << 7)) farg8 = l->fargs[7];
++                }
++              }
++            }
++          }
++        }
++      }
++    }
++  }
++        
++  /* Call function. */
++  if (l->rtype == __AVfloat) {
++    *(float*)l->raddr = (*(float(*)())l->func)();
++  } else
++  if (l->rtype == __AVdouble) {
++    *(double*)l->raddr = (*(double(*)())l->func)();
++  } else {
++    __avrword iret, iret2;
++
++    iret = (*l->func)();
++    iret2 = iret2reg;
++
++    /* save return value */
++    if (l->rtype == __AVvoid) {
++    } else
++    if (l->rtype == __AVchar) {
++      RETURN(char, iret);
++    } else
++    if (l->rtype == __AVschar) {
++      RETURN(signed char, iret);
++    } else
++    if (l->rtype == __AVuchar) {
++      RETURN(unsigned char, iret);
++    } else
++    if (l->rtype == __AVshort) {
++      RETURN(short, iret);
++    } else
++    if (l->rtype == __AVushort) {
++      RETURN(unsigned short, iret);
++    } else
++    if (l->rtype == __AVint) {
++      RETURN(int, iret);
++    } else
++    if (l->rtype == __AVuint) {
++      RETURN(unsigned int, iret);
++    } else
++    if (l->rtype == __AVlong || l->rtype == __AVlonglong) {
++      RETURN(long, iret);
++    } else
++    if (l->rtype == __AVulong || l->rtype == __AVulonglong) {
++      RETURN(unsigned long, iret);
++    } else
++  /* see above
++    if (l->rtype == __AVfloat) {
++    } else
++    if (l->rtype == __AVdouble) {
++    } else
++  */
++    if (l->rtype == __AVvoidp) {
++      RETURN(void*, iret);
++    } else
++    if (l->rtype == __AVstruct) {
++      if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
++        /* Return structs of size <= 16 in registers. */
++        if (l->rsize > 0 && l->rsize <= 16) {
++          void* raddr = l->raddr;
++          #if 0 /* Unoptimized */
++          if (l->rsize == 1) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++          } else
++          if (l->rsize == 2) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++          } else
++          if (l->rsize == 3) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++          } else
++          if (l->rsize == 4) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++            ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
++          } else
++          if (l->rsize == 5) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++            ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
++            ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
++          } else
++          if (l->rsize == 6) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++            ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
++            ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
++            ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
++          } else
++          if (l->rsize == 7) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++            ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
++            ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
++            ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
++            ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
++          } else
++          if (l->rsize >= 8 && l->rsize <= 16) {
++            ((unsigned char *)raddr)[0] = (unsigned char)(iret);
++            ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
++            ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
++            ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
++            ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
++            ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
++            ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
++            ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56);
++            if (l->rsize == 8) {
++            } else
++            if (l->rsize == 9) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++            } else
++            if (l->rsize == 10) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++            } else
++            if (l->rsize == 11) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++            } else
++            if (l->rsize == 12) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++              ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
++            } else
++            if (l->rsize == 13) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++              ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
++              ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
++            } else
++            if (l->rsize == 14) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++              ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
++              ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
++              ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
++            } else
++            if (l->rsize == 15) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++              ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
++              ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
++              ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
++              ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
++            } else
++            if (l->rsize == 16) {
++              ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
++              ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
++              ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
++              ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
++              ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
++              ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
++              ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
++              ((unsigned char *)raddr)[8+7] = (unsigned char)(iret2>>56);
++            }
++          }
++          #else /* Optimized: fewer conditional jumps, fewer memory accesses */
++          uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avrword) */
++          __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1));
++          uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */
++          uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__avrword) */
++          if (count <= sizeof(__avrword)) {
++            /* Use iret. */
++            if (end_offset <= sizeof(__avrword)) {
++              /* 0 < end_offset ≤ sizeof(__avrword) */
++              __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8));
++              wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
++            } else {
++              /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */
++              __avrword mask0 = - ((__avrword)1 << (start_offset*8));
++              __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
++              wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
++              wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1;
++            }
++          } else {
++            /* Use iret, iret2. */
++            __avrword mask0 = - ((__avrword)1 << (start_offset*8));
++            wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
++            if (end_offset <= 2*sizeof(__avrword)) {
++              /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */
++              __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
++              wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1;
++            } else {
++              /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */
++              __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
++              wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
++              wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2;
++            }
++          }
++          #endif
++        }
++      }
++    }
++  }
++  return 0;
++}
+diff --git a/avcall/avcall.h b/avcall/avcall.h
+index 3d10db4..66c9c19 100644
+--- a/avcall/avcall.h
++++ b/avcall/avcall.h
+@@ -1,6 +1,6 @@
+ /*
+  * Copyright 1993-1995 Bill Triggs <Bill.Triggs@inrialpes.fr>
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -148,7 +148,7 @@ enum __AV_alist_flags
+ #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
+ 				  __AV_SUNPROCC_STRUCT_RETURN,
+ #else
+-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__)
++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ 				  __AV_SMALL_STRUCT_RETURN |
+ #endif
+ #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
+@@ -265,7 +265,7 @@ enum __AV_alist_flags
+ #endif
+ 
+   /* These are for internal use only */
+-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__)
+   __AV_REGISTER_STRUCT_RETURN	= 1<<9,
+ #endif
+ 
+diff --git a/callback/PLATFORMS b/callback/PLATFORMS
+index df4c6e2..aded5a6 100644
+--- a/callback/PLATFORMS
++++ b/callback/PLATFORMS
+@@ -40,4 +40,5 @@ Supported CPUs:  (Put the GNU config.guess values here.)
+      s390x      s390x-ibm-linux (gcc)
+      riscv32    riscv32-unknown-linux (gcc -mabi=ilp32d)
+      riscv64    riscv64-unknown-linux (gcc -mabi=lp64d)
++     loongarch64  loongarch64-unknown-linux (gcc)
+ 
+diff --git a/callback/trampoline_r/Makefile.devel b/callback/trampoline_r/Makefile.devel
+index 7abb351..a5c7e83 100644
+--- a/callback/trampoline_r/Makefile.devel
++++ b/callback/trampoline_r/Makefile.devel
+@@ -79,7 +79,7 @@ cache-powerpc64-elfv2-macro.S : cache-powerpc64-elfv2-linux.s ../../common/asm-p
+ OLDGCCFLAGS = -O2 -fomit-frame-pointer
+ ASPREFIX = /usr1/gnu/lib
+ 
+-proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.c proto-x86_64.c proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s
++proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.c proto-x86_64.c proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s proto-loongarch64.s
+ 
+ proto-i386.s : proto.c
+ 	$(GCC) -V 2.7.2 -b i486-linuxaout $(OLDGCCFLAGS) -D__i386__ -S proto.c -o $@
+@@ -153,6 +153,9 @@ proto-riscv32.s : proto.c
+ proto-riscv64.s : proto64.c
+ 	$(CROSS_TOOL) riscv64-linux gcc-7.3.0 $(OLDGCCFLAGS) -D__riscv64__ -S proto64.c -o $@
+ 
++proto-loongarch64.s : proto64.c
++	$(CROSS_TOOL) loongarch64-linux gcc $(OLDGCCFLAGS) -D__loongarch64__ -S proto64.c -o $@
++
+ tramp-i386.o : tramp-i386.s
+ 	$(ASPREFIX)/i486-linux/bin/as tramp-i386.s -o $@
+ 
+@@ -215,3 +218,6 @@ tramp-riscv32.o : tramp-riscv32.s
+ 
+ tramp-riscv64.o : tramp-riscv64.s
+ 	$(CROSS_TOOL) riscv64-linux as tramp-riscv64.s -o $@
++
++tramp-loongarch64.o : tramp-loongarch64.s
++	$(CROSS_TOOL) loongarch64-linux as tramp-loongarch64.s -o $@
+diff --git a/callback/trampoline_r/proto-loongarch64.s b/callback/trampoline_r/proto-loongarch64.s
+new file mode 100644
+index 0000000..decf82e
+--- /dev/null
++++ b/callback/trampoline_r/proto-loongarch64.s
+@@ -0,0 +1,29 @@
++	.file	"proto64.c"
++	.text
++	.align	2
++	.globl	tramp
++	.type	tramp, @function
++tramp:
++	lu12i.w	$r20,1130504192>>12			# 0x43622000
++	lu12i.w	$r12,-559878144>>12			# 0xffffffffdea0f000
++	ori	$r20,$r20,341
++	ori	$r12,$r12,4011
++	lu32i.d	$r20,0x5471100000000>>32
++	lu32i.d	$r12,0xfffebec000000000>>32
++	lu52i.d	$r20,$r20,0x7350000000000000>>52
++	lu52i.d	$r12,$r12,0xbab0000000000000>>52
++	jr	$r12
++	.size	tramp, .-tramp
++	.section	.text.unlikely,"ax",@progbits
++	.align	2
++	.globl	jump
++	.type	jump, @function
++jump:
++	lu12i.w	$r12,-559878144>>12			# 0xffffffffdea0f000
++	ori	$r12,$r12,4011
++	lu32i.d	$r12,0xfffebec000000000>>32
++	lu52i.d	$r12,$r12,0xbab0000000000000>>52
++	jr	$r12
++	.size	jump, .-jump
++	.ident	"GCC: (GNU) 12.0.0 20211224 (experimental)"
++	.section	.note.GNU-stack,"",@progbits
+diff --git a/callback/trampoline_r/test1.c b/callback/trampoline_r/test1.c
+index 4b2f9f8..9105c8f 100644
+--- a/callback/trampoline_r/test1.c
++++ b/callback/trampoline_r/test1.c
+@@ -1,7 +1,7 @@
+ /* Trampoline test */
+ 
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -83,6 +83,9 @@ register void* env __asm__("r0");
+ #endif
+ #if defined(__riscv32__) || defined(__riscv64__)
+ register void* env __asm__("t2");
++#endif
++#ifdef __loongarch64__
++register void* env __asm__("r20");
+ #endif
+ 
+   return x + (int)(long)((void**)env)[1] + (int)(long)((void**)env)[0] + MAGIC3;
+diff --git a/callback/trampoline_r/tramp-loongarch64.s b/callback/trampoline_r/tramp-loongarch64.s
+new file mode 100644
+index 0000000..9cd0a93
+--- /dev/null
++++ b/callback/trampoline_r/tramp-loongarch64.s
+@@ -0,0 +1,36 @@
++/* Trampoline for LoongArch CPU in 64-bit mode */
++
++/*
++ * Copyright 1996-2022 Bruno Haible <bruno@clisp.org>
++ *
++ * This program is free software: you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
++ */
++
++/* Available registers: $r12 ... $r20. */
++
++	.text
++	.align	3
++	.globl	tramp
++	.type	tramp, @function
++tramp:
++	pcaddu12i	$r12, 0
++	/* Now our own address (=tramp) is in $r12. */
++	ld.d		$r20, $r12, 16 /* $LC0-tramp */
++	ld.d		$r12, $r12, 24 /* $LC1-tramp */
++	jr		$r12
++$LC0:
++	.dword	0x7355471143622155
++$LC1:
++	.dword	0xbabebec0dea0ffab
++	.size	tramp, .-tramp
+diff --git a/callback/trampoline_r/trampoline.c b/callback/trampoline_r/trampoline.c
+index 5d4f8c2..21751e3 100644
+--- a/callback/trampoline_r/trampoline.c
++++ b/callback/trampoline_r/trampoline.c
+@@ -1,7 +1,7 @@
+ /* Trampoline construction */
+ 
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -326,6 +326,10 @@ static int open_noinherit (const char *filename, int flags, int mode)
+ #define TRAMP_LENGTH 32
+ #define TRAMP_ALIGN 8
+ #endif
++#ifdef __loongarch64__
++#define TRAMP_LENGTH 32
++#define TRAMP_ALIGN 8
++#endif
+ 
+ #ifndef TRAMP_BIAS
+ #define TRAMP_BIAS 0
+@@ -1257,6 +1261,32 @@ __TR_function alloc_trampoline_r (__TR_function address, void* data0, void* data
+   (*(unsigned long *) (function +24))
+ #define tramp_data(function)  \
+   (*(unsigned long *) (function +16))
++#endif
++#ifdef __loongarch64__
++  /* function:
++   *    pcaddu12i $r12,0		1C00000C
++   *    ld.d $r20,$r12,16		28C04194
++   *    ld.d $r12,$r12,24		28C0618C
++   *    jirl $r0,$r12,0			4C000180
++   *    .dword <data>			<data>
++   *    .dword <address>		<address>
++   */
++  *(unsigned int *)  (function + 0) = 0x1C00000C;
++  *(unsigned int *)  (function + 4) = 0x28C04194;
++  *(unsigned int *)  (function + 8) = 0x28C0618C;
++  *(unsigned int *)  (function +12) = 0x4C000180;
++  *(unsigned long *) (function +16) = (unsigned long) data;
++  *(unsigned long *) (function +24) = (unsigned long) address;
++#define TRAMP_CODE_LENGTH  16
++#define is_tramp(function)  \
++  *(unsigned int *)  (function + 0) == 0x1C00000C && \
++  *(unsigned int *)  (function + 4) == 0x28C04194 && \
++  *(unsigned int *)  (function + 8) == 0x28C0618C && \
++  *(unsigned int *)  (function +12) == 0x4C000180
++#define tramp_address(function)  \
++  *(unsigned long *) (function +24)
++#define tramp_data(function)  \
++  *(unsigned long *) (function +16)
+ #endif
+   /*
+    * data:
+@@ -1408,6 +1438,10 @@ __TR_function alloc_trampoline_r (__TR_function address, void* data0, void* data
+   __asm__ __volatile__ ("fence.i");
+ #endif
+ #endif
++#if defined(__loongarch64__)
++  /* Use the GCC built-in. It expands to 'ibar 0'. */
++  __clear_cache((void*)function_x,(void*)(function_x+TRAMP_CODE_LENGTH));
++#endif
+ #endif
+ #endif
+ 
+diff --git a/callback/vacall_r/Makefile.devel b/callback/vacall_r/Makefile.devel
+index 1a2e62e..5eb626c 100644
+--- a/callback/vacall_r/Makefile.devel
++++ b/callback/vacall_r/Makefile.devel
+@@ -25,7 +25,8 @@ precompiled : \
+   vacall-ia64-macro.S \
+   vacall-x86_64-macro.S vacall-x86_64-x32-linux.s vacall-x86_64-windows-macro.S \
+   vacall-s390-macro.S vacall-s390x-macro.S \
+-  vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S
++  vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S \
++  vacall-loongarch64-macro.S
+ 
+ 
+ vacall-i386-linux.s : ../../vacall/vacall-i386.c ../../vacall/vacall-internal.h vacall_r.h $(THISFILE)
+@@ -233,3 +234,10 @@ vacall-riscv64-lp64d-linux.s : ../../vacall/vacall-riscv64.c ../../vacall/vacall
+ 
+ vacall-riscv64-lp64d-macro.S : vacall-riscv64-lp64d-linux.s ../../common/asm-riscv.sh ../../common/noexecstack.h $(THISFILE)
+ 	(../../common/asm-riscv.sh < vacall-riscv64-lp64d-linux.s ; cat ../../common/noexecstack.h) > vacall-riscv64-lp64d-macro.S
++
++
++vacall-loongarch64-linux.s : ../../vacall/vacall-loongarch64.c ../../vacall/vacall-internal.h vacall_r.h $(THISFILE)
++	$(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S ../../vacall/vacall-loongarch64.c -I../../vacall -I. -o vacall-loongarch64-linux.s
++
++vacall-loongarch64-macro.S : vacall-loongarch64-linux.s ../../common/asm-loongarch.sh ../../common/noexecstack.h $(THISFILE)
++	(../../common/asm-loongarch.sh < vacall-loongarch64-linux.s ; cat ../../common/noexecstack.h) > vacall-loongarch64-macro.S
+diff --git a/callback/vacall_r/Makefile.in b/callback/vacall_r/Makefile.in
+index 012f4d9..8cf11b3 100644
+--- a/callback/vacall_r/Makefile.in
++++ b/callback/vacall_r/Makefile.in
+@@ -242,6 +242,12 @@ vacall-riscv64-lp64d.lo : vacall-riscv64-lp64d.s
+ vacall-riscv64-lp64d.s : $(srcdir)/vacall-riscv64-lp64d-macro.S
+ 	$(CPP) $(ASPFLAGS) -I$(srcdir) $(srcdir)/vacall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-riscv64-lp64d.s
+ 
++vacall-loongarch64.lo : vacall-loongarch64.s
++	$(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c vacall-loongarch64.s
++
++vacall-loongarch64.s : $(srcdir)/vacall-loongarch64-macro.S
++	$(CPP) $(ASPFLAGS) $(srcdir)/vacall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-loongarch64.s
++
+ vacall-libapi.lo : $(srcdir)/vacall-libapi.c $(srcdir)/../../vacall/vacall-internal.h $(srcdir)/vacall_r.h ../../config.h
+ 	$(LIBTOOL_COMPILE) $(CC) $(INCLUDES_WITH_GNULIB) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -DREENTRANT -c $(srcdir)/vacall-libapi.c
+ 
+@@ -295,7 +301,7 @@ mostlyclean : clean
+ 
+ clean : force
+ 	$(RM) *.@OBJEXT@ *.lo *.a libvacall.* core
+-	$(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s
++	$(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s vacall-loongarch64.s
+ 	$(RM) -r .libs _libs
+ 
+ distclean : clean
+@@ -321,6 +327,7 @@ SOURCE_FILES = \
+   vacall-hppa64-linux.s vacall-hppa64-macro.S \
+   vacall-i386-linux.s vacall-i386-macro.S \
+   vacall-ia64-linux.s vacall-ia64-macro.S \
++  vacall-loongarch64-linux.s vacall-loongarch64-macro.S \
+   vacall-m68k-linux.s vacall-m68k-sun.s vacall-m68k.mit.S vacall-m68k.motorola.S \
+   vacall-mipseb-linux.s vacall-mipsel-linux.s vacall-mipseb-macro.S vacall-mipsel-macro.S \
+   vacall-mipsn32eb-linux.s vacall-mipsn32el-linux.s vacall-mipsn32eb-macro.S vacall-mipsn32el-macro.S \
+diff --git a/callback/vacall_r/vacall-loongarch64-linux.s b/callback/vacall_r/vacall-loongarch64-linux.s
+new file mode 100644
+index 0000000..7277364
+--- /dev/null
++++ b/callback/vacall_r/vacall-loongarch64-linux.s
+@@ -0,0 +1,217 @@
++	.file	"vacall-loongarch64.c"
++	.text
++	.align	2
++	.type	callback_receiver, @function
++callback_receiver:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-288
++	.cfi_def_cfa_offset 288
++	st.d	$ra,$sp,264
++	st.d	$fp,$sp,256
++	st.d	$s0,$sp,248
++	.cfi_offset 1, -24
++	.cfi_offset 22, -32
++	.cfi_offset 23, -40
++	addi.d	$fp,$sp,272
++	.cfi_def_cfa 22, 16
++	addi.d	$t1,$fp,16
++	ldptr.d	$t0,$t8,0
++	st.d	$a7,$fp,8
++	st.d	$a1,$fp,-192
++	st.d	$a2,$fp,-184
++	st.d	$a3,$fp,-176
++	st.d	$a4,$fp,-168
++	st.d	$a5,$fp,-160
++	st.d	$a6,$fp,-152
++	st.d	$a7,$fp,-144
++	fst.s	$f0,$fp,-132
++	fst.s	$f1,$fp,-128
++	fst.s	$f2,$fp,-124
++	fst.s	$f3,$fp,-120
++	fst.s	$f4,$fp,-116
++	fst.s	$f5,$fp,-112
++	fst.s	$f6,$fp,-108
++	fst.s	$f7,$fp,-104
++	fst.d	$f0,$fp,-96
++	fst.d	$f1,$fp,-88
++	fst.d	$f2,$fp,-80
++	fst.d	$f3,$fp,-72
++	fst.d	$f4,$fp,-64
++	fst.d	$f5,$fp,-56
++	fst.d	$f6,$fp,-48
++	fst.d	$f7,$fp,-40
++	st.d	$t1,$fp,-240
++	st.d	$a0,$fp,-200
++	st.w	$zero,$fp,-264
++	st.d	$zero,$fp,-232
++	ld.d	$a0,$t8,8
++	st.w	$zero,$fp,-224
++	st.w	$zero,$fp,-208
++	st.w	$zero,$fp,-136
++	addi.d	$a1,$fp,-264
++	jirl	$ra,$t0,0
++	ldptr.w	$t0,$fp,-224
++	beqz	$t0,.L1
++	addi.w	$t1,$r0,15			# 0xf
++	bgtu	$t0,$t1,.L1
++	la.local	$t1,.L4
++	slli.d	$t0,$t0,3
++	ldx.d	$t0,$t1,$t0
++	add.d	$t1,$t1,$t0
++	jr	$t1
++	.section	.rodata
++	.align	3
++	.align	2
++.L4:
++	.dword	.L1-.L4
++	.dword	.L15-.L4
++	.dword	.L15-.L4
++	.dword	.L14-.L4
++	.dword	.L13-.L4
++	.dword	.L12-.L4
++	.dword	.L11-.L4
++	.dword	.L10-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L7-.L4
++	.dword	.L6-.L4
++	.dword	.L5-.L4
++	.dword	.L3-.L4
++	.text
++.L5:
++	ld.d	$a0,$fp,-256
++.L1:
++	ld.d	$ra,$sp,264
++	.cfi_remember_state
++	.cfi_restore 1
++	ld.d	$fp,$sp,256
++	.cfi_restore 22
++	ld.d	$s0,$sp,248
++	.cfi_restore 23
++	addi.d	$sp,$sp,288
++	.cfi_def_cfa 3, 0
++	jr	$ra
++.L15:
++	.cfi_restore_state
++	ld.b	$a0,$fp,-256
++	b	.L1
++.L10:
++	ld.wu	$a0,$fp,-256
++	b	.L1
++.L14:
++	ld.bu	$a0,$fp,-256
++	b	.L1
++.L13:
++	ld.h	$a0,$fp,-256
++	b	.L1
++.L12:
++	ld.hu	$a0,$fp,-256
++	b	.L1
++.L11:
++	ldptr.w	$a0,$fp,-256
++	b	.L1
++.L6:
++	fld.d	$f0,$fp,-256
++	b	.L1
++.L7:
++	fld.s	$f0,$fp,-256
++	b	.L1
++.L3:
++	ldptr.w	$t0,$fp,-264
++	andi	$t0,$t0,1024
++	beqz	$t0,.L1
++	ld.d	$t0,$fp,-216
++	addi.w	$t1,$r0,15			# 0xf
++	addi.d	$t2,$t0,-1
++	bgtu	$t2,$t1,.L1
++	ld.d	$t1,$fp,-232
++	addi.w	$t3,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$s0,$r0,8			# 0x8
++	andi	$t2,$t1,7
++	add.d	$t4,$t0,$t2
++	and	$t1,$t1,$t3
++	ldptr.d	$t5,$t1,0
++	slli.w	$t3,$t4,3
++	slli.w	$t6,$t2,3
++	bgtu	$t0,$s0,.L17
++	bgtu	$t4,$s0,.L18
++	addi.w	$t3,$t3,-1
++	addi.w	$a0,$r0,2			# 0x2
++	sll.d	$a0,$a0,$t3
++	addi.d	$a0,$a0,-1
++	and	$a0,$a0,$t5
++	sra.d	$a0,$a0,$t6
++	b	.L1
++.L17:
++	addi.w	$t0,$r0,16			# 0x10
++	ld.d	$t7,$t1,8
++	sra.d	$t5,$t5,$t6
++	sub.d	$s0,$s0,$t2
++	bgtu	$t4,$t0,.L19
++	addi.w	$t3,$t3,-65
++	addi.w	$a1,$r0,2			# 0x2
++	sll.d	$a1,$a1,$t3
++	addi.d	$a1,$a1,-1
++	and	$a1,$a1,$t7
++	slli.w	$s0,$s0,2
++	sll.d	$a0,$a1,$s0
++	sll.d	$a0,$a0,$s0
++	or	$a0,$a0,$t5
++	sra.d	$a1,$a1,$t6
++	b	.L1
++.L18:
++	ld.d	$t4,$t1,8
++	addi.w	$t3,$t3,-65
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t3
++	addi.d	$t0,$t0,-1
++	sub.d	$t1,$s0,$t2
++	and	$t0,$t0,$t4
++	slli.w	$t1,$t1,3
++	sll.d	$t0,$t0,$t1
++	sra.d	$t5,$t5,$t6
++	or	$a0,$t0,$t5
++	b	.L1
++.L19:
++	ld.d	$t2,$t1,16
++	addi.w	$t3,$t3,-129
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t3
++	addi.d	$t0,$t0,-1
++	slli.w	$t1,$s0,3
++	and	$t0,$t0,$t2
++	sll.d	$a0,$t7,$t1
++	sll.d	$t0,$t0,$t1
++	sra.d	$t7,$t7,$t6
++	or	$a0,$a0,$t5
++	or	$a1,$t0,$t7
++	b	.L1
++	.cfi_endproc
++.LFE0:
++	.size	callback_receiver, .-callback_receiver
++	.align	2
++	.globl	callback_get_receiver
++	.type	callback_get_receiver, @function
++callback_get_receiver:
++.LFB1 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-16
++	.cfi_def_cfa_offset 16
++	st.d	$fp,$sp,8
++	.cfi_offset 22, -8
++	addi.d	$fp,$sp,16
++	.cfi_def_cfa 22, 0
++	ld.d	$fp,$sp,8
++	.cfi_restore 22
++	la.local	$a0,callback_receiver
++	addi.d	$sp,$sp,16
++	.cfi_def_cfa_register 3
++	jr	$ra
++	.cfi_endproc
++.LFE1:
++	.size	callback_get_receiver, .-callback_get_receiver
++	.ident	"GCC: (GNU) 12.0.1 20220317 (experimental)"
++	.section	.note.GNU-stack,"",@progbits
+diff --git a/callback/vacall_r/vacall-loongarch64-macro.S b/callback/vacall_r/vacall-loongarch64-macro.S
+new file mode 100644
+index 0000000..3b3b3ad
+--- /dev/null
++++ b/callback/vacall_r/vacall-loongarch64-macro.S
+@@ -0,0 +1,218 @@
++	.file	"vacall-loongarch64.c"
++	.text
++	.align	2
++	.type	callback_receiver, @function
++callback_receiver:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-288
++	.cfi_def_cfa_offset 288
++	st.d	$ra,$sp,264
++	st.d	$fp,$sp,256
++	st.d	$s0,$sp,248
++	.cfi_offset 1, -24
++	.cfi_offset 22, -32
++	.cfi_offset 23, -40
++	addi.d	$fp,$sp,272
++	.cfi_def_cfa 22, 16
++	addi.d	$t1,$fp,16
++	ldptr.d	$t0,$t8,0
++	st.d	$a7,$fp,8
++	st.d	$a1,$fp,-192
++	st.d	$a2,$fp,-184
++	st.d	$a3,$fp,-176
++	st.d	$a4,$fp,-168
++	st.d	$a5,$fp,-160
++	st.d	$a6,$fp,-152
++	st.d	$a7,$fp,-144
++	fst.s	$f0,$fp,-132
++	fst.s	$f1,$fp,-128
++	fst.s	$f2,$fp,-124
++	fst.s	$f3,$fp,-120
++	fst.s	$f4,$fp,-116
++	fst.s	$f5,$fp,-112
++	fst.s	$f6,$fp,-108
++	fst.s	$f7,$fp,-104
++	fst.d	$f0,$fp,-96
++	fst.d	$f1,$fp,-88
++	fst.d	$f2,$fp,-80
++	fst.d	$f3,$fp,-72
++	fst.d	$f4,$fp,-64
++	fst.d	$f5,$fp,-56
++	fst.d	$f6,$fp,-48
++	fst.d	$f7,$fp,-40
++	st.d	$t1,$fp,-240
++	st.d	$a0,$fp,-200
++	st.w	$zero,$fp,-264
++	st.d	$zero,$fp,-232
++	ld.d	$a0,$t8,8
++	st.w	$zero,$fp,-224
++	st.w	$zero,$fp,-208
++	st.w	$zero,$fp,-136
++	addi.d	$a1,$fp,-264
++	jirl	$ra,$t0,0
++	ldptr.w	$t0,$fp,-224
++	beqz	$t0,.L1
++	addi.w	$t1,$r0,15			# 0xf
++	bgtu	$t0,$t1,.L1
++	la.local	$t1,.L4
++	slli.d	$t0,$t0,3
++	ldx.d	$t0,$t1,$t0
++	add.d	$t1,$t1,$t0
++	jr	$t1
++	.section	.rodata
++	.align	3
++	.align	2
++.L4:
++	.dword	.L1-.L4
++	.dword	.L15-.L4
++	.dword	.L15-.L4
++	.dword	.L14-.L4
++	.dword	.L13-.L4
++	.dword	.L12-.L4
++	.dword	.L11-.L4
++	.dword	.L10-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L5-.L4
++	.dword	.L7-.L4
++	.dword	.L6-.L4
++	.dword	.L5-.L4
++	.dword	.L3-.L4
++	.text
++.L5:
++	ld.d	$a0,$fp,-256
++.L1:
++	ld.d	$ra,$sp,264
++	.cfi_remember_state
++	.cfi_restore 1
++	ld.d	$fp,$sp,256
++	.cfi_restore 22
++	ld.d	$s0,$sp,248
++	.cfi_restore 23
++	addi.d	$sp,$sp,288
++	.cfi_def_cfa 3, 0
++	jr	$ra
++.L15:
++	.cfi_restore_state
++	ld.b	$a0,$fp,-256
++	b	.L1
++.L10:
++	ld.wu	$a0,$fp,-256
++	b	.L1
++.L14:
++	ld.bu	$a0,$fp,-256
++	b	.L1
++.L13:
++	ld.h	$a0,$fp,-256
++	b	.L1
++.L12:
++	ld.hu	$a0,$fp,-256
++	b	.L1
++.L11:
++	ldptr.w	$a0,$fp,-256
++	b	.L1
++.L6:
++	fld.d	$f0,$fp,-256
++	b	.L1
++.L7:
++	fld.s	$f0,$fp,-256
++	b	.L1
++.L3:
++	ldptr.w	$t0,$fp,-264
++	andi	$t0,$t0,1024
++	beqz	$t0,.L1
++	ld.d	$t0,$fp,-216
++	addi.w	$t1,$r0,15			# 0xf
++	addi.d	$t2,$t0,-1
++	bgtu	$t2,$t1,.L1
++	ld.d	$t1,$fp,-232
++	addi.w	$t3,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$s0,$r0,8			# 0x8
++	andi	$t2,$t1,7
++	add.d	$t4,$t0,$t2
++	and	$t1,$t1,$t3
++	ldptr.d	$t5,$t1,0
++	slli.w	$t3,$t4,3
++	slli.w	$t6,$t2,3
++	bgtu	$t0,$s0,.L17
++	bgtu	$t4,$s0,.L18
++	addi.w	$t3,$t3,-1
++	addi.w	$a0,$r0,2			# 0x2
++	sll.d	$a0,$a0,$t3
++	addi.d	$a0,$a0,-1
++	and	$a0,$a0,$t5
++	sra.d	$a0,$a0,$t6
++	b	.L1
++.L17:
++	addi.w	$t0,$r0,16			# 0x10
++	ld.d	$t7,$t1,8
++	sra.d	$t5,$t5,$t6
++	sub.d	$s0,$s0,$t2
++	bgtu	$t4,$t0,.L19
++	addi.w	$t3,$t3,-65
++	addi.w	$a1,$r0,2			# 0x2
++	sll.d	$a1,$a1,$t3
++	addi.d	$a1,$a1,-1
++	and	$a1,$a1,$t7
++	slli.w	$s0,$s0,2
++	sll.d	$a0,$a1,$s0
++	sll.d	$a0,$a0,$s0
++	or	$a0,$a0,$t5
++	sra.d	$a1,$a1,$t6
++	b	.L1
++.L18:
++	ld.d	$t4,$t1,8
++	addi.w	$t3,$t3,-65
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t3
++	addi.d	$t0,$t0,-1
++	sub.d	$t1,$s0,$t2
++	and	$t0,$t0,$t4
++	slli.w	$t1,$t1,3
++	sll.d	$t0,$t0,$t1
++	sra.d	$t5,$t5,$t6
++	or	$a0,$t0,$t5
++	b	.L1
++.L19:
++	ld.d	$t2,$t1,16
++	addi.w	$t3,$t3,-129
++	addi.w	$t0,$r0,2			# 0x2
++	sll.d	$t0,$t0,$t3
++	addi.d	$t0,$t0,-1
++	slli.w	$t1,$s0,3
++	and	$t0,$t0,$t2
++	sll.d	$a0,$t7,$t1
++	sll.d	$t0,$t0,$t1
++	sra.d	$t7,$t7,$t6
++	or	$a0,$a0,$t5
++	or	$a1,$t0,$t7
++	b	.L1
++	.cfi_endproc
++.LFE0:
++	.size	callback_receiver, .-callback_receiver
++	.align	2
++	.globl	callback_get_receiver
++	.type	callback_get_receiver, @function
++callback_get_receiver:
++.LFB1 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-16
++	.cfi_def_cfa_offset 16
++	st.d	$fp,$sp,8
++	.cfi_offset 22, -8
++	addi.d	$fp,$sp,16
++	.cfi_def_cfa 22, 0
++	ld.d	$fp,$sp,8
++	.cfi_restore 22
++	la.local	$a0,callback_receiver
++	addi.d	$sp,$sp,16
++	.cfi_def_cfa_register 3
++	jr	$ra
++	.cfi_endproc
++.LFE1:
++	.size	callback_get_receiver, .-callback_get_receiver
++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__
++	.section .note.GNU-stack,"",@progbits
++#endif
+diff --git a/callback/vacall_r/vacall_r.h b/callback/vacall_r/vacall_r.h
+index 08b0f09..1bd3638 100644
+--- a/callback/vacall_r/vacall_r.h
++++ b/callback/vacall_r/vacall_r.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -175,7 +175,7 @@ enum __VA_alist_flags
+ #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
+ 				  __VA_SUNPROCC_STRUCT_RETURN,
+ #else
+-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__)
++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ 				  __VA_SMALL_STRUCT_RETURN |
+ #endif
+ #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
+@@ -292,7 +292,7 @@ enum __VA_alist_flags
+ #endif
+ 
+   /* These are for internal use only */
+-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__)
+   __VA_REGISTER_STRUCT_RETURN	= 1<<10,
+ #endif
+ #if defined(__mipsn32__) || defined(__mips64__)
+diff --git a/common/asm-loongarch.sh b/common/asm-loongarch.sh
+new file mode 100755
+index 0000000..ccab8c1
+--- /dev/null
++++ b/common/asm-loongarch.sh
+@@ -0,0 +1,44 @@
++#!/bin/sh
++# Translate the assembler syntax of LoongArch assembler programs
++# Usage: asm-loongarch.sh < loongarchlinux-asm-file > portable-asm-file
++# The portable-asm-file has to be
++#   1. preprocessed,
++#   2. grep -v '^ *#line' | grep -v '^#'
++#   3. sed -e 's,% ,%,g' -e 's,//.*$,,'
++
++# Copyright (C) 2017-2022 Bruno Haible <bruno@clisp.org>
++#
++# This program is free software: you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program.  If not, see <https://www.gnu.org/licenses/>.
++
++tmpscript1=sed$$tmp1
++tmpscript2=sed$$tmp2
++tmpremove='rm -f $tmpscript1 $tmpscript2'
++trap "$tmpremove" 1 2 15
++
++cat > $tmpscript1 << \EOF
++# ----------- Remove gcc self-identification
++/gcc2_compiled/d
++/gnu_compiled_c/d
++/\.ident/d
++EOF
++
++cat > $tmpscript2 << \EOF
++# ----------- Introduce macro syntax for assembler pseudo-ops
++/\.section\([ 	]\+\).*GNU-stack/d
++EOF
++
++sed -f $tmpscript1 | \
++sed -f $tmpscript2
++
++eval "$tmpremove"
+diff --git a/ffcall-abi.h b/ffcall-abi.h
+index 117fd65..4de0366 100644
+--- a/ffcall-abi.h
++++ b/ffcall-abi.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2017-2019 Bruno Haible <bruno@clisp.org>
++ * Copyright 2017-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -249,3 +249,9 @@
+ #define __riscv64__ 1
+ #endif
+ #endif
++
++#ifndef __loongarch64__
++#if defined(__loongarch64) && defined(__LP64__)
++#define __loongarch64__ 1
++#endif
++#endif
+diff --git a/trampoline/Makefile.devel b/trampoline/Makefile.devel
+index ec9e0ae..bc3c6ff 100644
+--- a/trampoline/Makefile.devel
++++ b/trampoline/Makefile.devel
+@@ -79,7 +79,7 @@ cache-powerpc64-elfv2-macro.S : cache-powerpc64-elfv2-linux.s ../common/asm-powe
+ OLDGCCFLAGS = -O2 -fomit-frame-pointer
+ ASPREFIX = /usr1/gnu/lib
+ 
+-proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.s proto-x86_64.s proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s
++proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.s proto-x86_64.s proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s proto-loongarch64.s
+ 
+ proto-i386.s : proto.c
+ 	$(GCC) -V 2.7.2 -b i486-linuxaout $(OLDGCCFLAGS) -D__i386__ -S proto.c -o $@
+@@ -153,6 +153,9 @@ proto-riscv32.s : proto.c
+ proto-riscv64.s : proto64.c
+ 	$(CROSS_TOOL) riscv64-linux gcc-7.3.0 $(OLDGCCFLAGS) -D__riscv64__ -S proto64.c -o $@
+ 
++proto-loongarch64.s : proto64.c
++	$(CROSS_TOOL) loongarch64-linux gcc $(OLDGCCFLAGS) -D__loongarch64__ -S proto64.c -o $@
++
+ tramp-i386.o : tramp-i386.s
+ 	$(ASPREFIX)/i486-linux/bin/as tramp-i386.s -o $@
+ 
+@@ -215,3 +218,6 @@ tramp-riscv32.o : tramp-riscv32.s
+ 
+ tramp-riscv64.o : tramp-riscv64.s
+ 	$(CROSS_TOOL) riscv64-linux as tramp-riscv64.s -o $@
++
++tramp-loongarch64.o : tramp-loongarch64.s
++	$(CROSS_TOOL) loongarch64-linux as tramp-loongarch64.s -o $@
+diff --git a/trampoline/PLATFORMS b/trampoline/PLATFORMS
+index ef73e74..eeaf877 100644
+--- a/trampoline/PLATFORMS
++++ b/trampoline/PLATFORMS
+@@ -37,4 +37,5 @@ Supported CPUs:  (Put the GNU config.guess values here.)
+      s390x      s390x-ibm-linux
+      riscv32    riscv32-unknown-linux
+      riscv64    riscv64-unknown-linux
++     loongarch64  loongarch64-unknown-linux
+ 
+diff --git a/trampoline/proto-loongarch64.s b/trampoline/proto-loongarch64.s
+new file mode 100644
+index 0000000..2446569
+--- /dev/null
++++ b/trampoline/proto-loongarch64.s
+@@ -0,0 +1,44 @@
++	.file	"proto64.c"
++	.text
++	.align	2
++	.globl	tramp
++	.type	tramp, @function
++tramp:
++	lu12i.w	$r12,324501504>>12			# 0x13578000
++	ori	$r12,$r12,1893
++	lu32i.d	$r12,0x4567800000000>>32
++	lu52i.d	$r12,$r12,0x1230000000000000>>52
++	addi.w	$r15,$r0,33			# 0x21
++	st.b	$r15,$r12,1
++	addi.w	$r15,$r0,98			# 0x62
++	lu12i.w	$r13,-559878144>>12			# 0xffffffffdea0f000
++	st.b	$r15,$r12,2
++	addi.w	$r15,$r0,67			# 0x43
++	addi.w	$r14,$r0,85			# 0x55
++	ori	$r13,$r13,4011
++	st.b	$r15,$r12,3
++	addi.w	$r15,$r0,17			# 0x11
++	lu32i.d	$r13,0xfffebec000000000>>32
++	st.b	$r14,$r12,0
++	st.b	$r15,$r12,4
++	st.b	$r14,$r12,6
++	addi.w	$r15,$r0,71			# 0x47
++	addi.w	$r14,$r0,115			# 0x73
++	st.b	$r15,$r12,5
++	st.b	$r14,$r12,7
++	lu52i.d	$r13,$r13,0xbab0000000000000>>52
++	jr	$r13
++	.size	tramp, .-tramp
++	.section	.text.unlikely,"ax",@progbits
++	.align	2
++	.globl	jump
++	.type	jump, @function
++jump:
++	lu12i.w	$r12,-559878144>>12			# 0xffffffffdea0f000
++	ori	$r12,$r12,4011
++	lu32i.d	$r12,0xfffebec000000000>>32
++	lu52i.d	$r12,$r12,0xbab0000000000000>>52
++	jr	$r12
++	.size	jump, .-jump
++	.ident	"GCC: (GNU) 12.0.0 20211224 (experimental)"
++	.section	.note.GNU-stack,"",@progbits
+diff --git a/trampoline/tramp-loongarch64.s b/trampoline/tramp-loongarch64.s
+new file mode 100644
+index 0000000..52d887f
+--- /dev/null
++++ b/trampoline/tramp-loongarch64.s
+@@ -0,0 +1,40 @@
++/* Trampoline for LoongArch CPU in 64-bit mode */
++
++/*
++ * Copyright 1996-2022 Bruno Haible <bruno@clisp.org>
++ *
++ * This program is free software: you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
++ */
++
++/* Available registers: $r12 ... $r20. */
++
++	.text
++	.align	3
++	.globl	tramp
++	.type	tramp, @function
++tramp:
++	pcaddu12i	$r12, 0
++	/* Now our own address (=tramp) is in $r12. */
++	ld.d		$r13, $r12, 24 /* $LC0-tramp */
++	ld.d		$r14, $r12, 32 /* $LC1-tramp */
++	st.d		$r14, $r13, 0
++	ld.d		$r12, $r12, 40 /* $LC2-tramp */
++	jr		$r12
++$LC0:
++	.dword	0x1234567813578765
++$LC1:
++	.dword	0x7355471143622155
++$LC2:
++	.dword	0xbabebec0dea0ffab
++	.size	tramp, .-tramp
+diff --git a/trampoline/trampoline.c b/trampoline/trampoline.c
+index 9b79e0d..fdcbcd4 100644
+--- a/trampoline/trampoline.c
++++ b/trampoline/trampoline.c
+@@ -1,7 +1,7 @@
+ /* Trampoline construction */
+ 
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -330,6 +330,10 @@ static int open_noinherit (const char *filename, int flags, int mode)
+ #define TRAMP_LENGTH 48
+ #define TRAMP_ALIGN 8
+ #endif
++#if defined(__loongarch64__)
++#define TRAMP_LENGTH 48
++#define TRAMP_ALIGN 8
++#endif
+ 
+ #ifndef TRAMP_BIAS
+ #define TRAMP_BIAS 0
+@@ -1514,6 +1518,42 @@ trampoline_function_t alloc_trampoline (trampoline_function_t address, void** va
+ #define tramp_data(function)  \
+   (*(unsigned long *) (function +24))
+ #endif
++#ifdef __loongarch64__
++  /* function:
++   *    pcaddu12i $r12,0		1C00000C
++   *    ld.d $r13,$r12,24		28C0618D
++   *    ld.d $r14,$r12,32		28C0818E
++   *    st.d $r14,$r13,0		29C001AE
++   *    ld.d $r12,$r12,40		28C0A18C
++   *    jirl $r0,$r12,0			4C000180
++   *    .dword <variable>		<variable>
++   *    .dword <data>			<data>
++   *    .dword <address>		<address>
++   */
++  *(unsigned int *)  (function + 0) = 0x1C00000C;
++  *(unsigned int *)  (function + 4) = 0x28C0618D;
++  *(unsigned int *)  (function + 8) = 0x28C0818E;
++  *(unsigned int *)  (function +12) = 0x29C001AE;
++  *(unsigned int *)  (function +16) = 0x28C0A18C;
++  *(unsigned int *)  (function +20) = 0x4C000180;
++  *(unsigned long *) (function +24) = (unsigned long) variable;
++  *(unsigned long *) (function +32) = (unsigned long) data;
++  *(unsigned long *) (function +40) = (unsigned long) address;
++#define TRAMP_CODE_LENGTH  24
++#define is_tramp(function)  \
++  *(unsigned int *)  (function + 0) == 0x1C00000C && \
++  *(unsigned int *)  (function + 4) == 0x28C0618D && \
++  *(unsigned int *)  (function + 8) == 0x28C0818E && \
++  *(unsigned int *)  (function +12) == 0x29C001AE && \
++  *(unsigned int *)  (function +16) == 0x28C0A18C && \
++  *(unsigned int *)  (function +20) == 0x4C000180
++#define tramp_address(function)  \
++  *(unsigned long *) (function +40)
++#define tramp_variable(function)  \
++  *(unsigned long *) (function +24)
++#define tramp_data(function)  \
++  *(unsigned long *) (function +32)
++#endif
+ 
+   /* 3. Set memory protection to "executable" */
+ 
+@@ -1657,6 +1697,10 @@ trampoline_function_t alloc_trampoline (trampoline_function_t address, void** va
+   __asm__ __volatile__ ("fence.i");
+ #endif
+ #endif
++#if defined(__loongarch64__)
++  /* Use the GCC built-in. It expands to 'ibar 0'. */
++  __clear_cache((void*)function_x,(void*)(function_x+TRAMP_CODE_LENGTH));
++#endif
+ #endif
+ #endif
+ 
+diff --git a/vacall/Makefile.devel b/vacall/Makefile.devel
+index 1c78798..7e41aad 100644
+--- a/vacall/Makefile.devel
++++ b/vacall/Makefile.devel
+@@ -25,7 +25,8 @@ precompiled : \
+   vacall-ia64-macro.S \
+   vacall-x86_64-macro.S vacall-x86_64-x32-linux.s vacall-x86_64-windows-macro.S \
+   vacall-s390-macro.S vacall-s390x-macro.S \
+-  vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S
++  vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S \
++  vacall-loongarch64-macro.S
+ 
+ 
+ vacall-i386-linux.s : vacall-i386.c vacall-internal.h vacall.h $(THISFILE)
+@@ -278,6 +279,13 @@ vacall-riscv64-lp64d-macro.S : vacall-riscv64-lp64d-linux.s ../common/asm-riscv.
+ 	(../common/asm-riscv.sh < vacall-riscv64-lp64d-linux.s ; cat ../common/noexecstack.h) > vacall-riscv64-lp64d-macro.S
+ 
+ 
++vacall-loongarch64-linux.s : vacall-loongarch64.c vacall-internal.h vacall.h $(THISFILE)
++	$(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S vacall-loongarch64.c -o vacall-loongarch64-linux.s
++
++vacall-loongarch64-macro.S : vacall-loongarch64-linux.s ../common/asm-loongarch.sh ../common/noexecstack.h $(THISFILE)
++	(../common/asm-loongarch.sh < vacall-loongarch64-linux.s ; cat ../common/noexecstack.h) > vacall-loongarch64-macro.S
++
++
+ # --------------- Rules for debugging test failures ---------------
+ 
+ tests : tests-i386.s tests-m68k.s tests-mips.s tests-sparc.s tests-alpha.s tests-hppa.s tests-arm.s tests-powerpc.s tests-powerpc64.s tests-ia64.s tests-x86_64.s
+diff --git a/vacall/Makefile.in b/vacall/Makefile.in
+index ec31846..0101d7d 100644
+--- a/vacall/Makefile.in
++++ b/vacall/Makefile.in
+@@ -236,6 +236,12 @@ vacall-riscv64-lp64d.@OBJEXT@ : vacall-riscv64-lp64d.s
+ vacall-riscv64-lp64d.s : $(srcdir)/vacall-riscv64-lp64d-macro.S
+ 	$(CPP) $(ASPFLAGS) $(srcdir)/vacall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-riscv64-lp64d.s
+ 
++vacall-loongarch64.@OBJEXT@ : vacall-loongarch64.s
++	$(CC) @GCC_X_NONE@ -c vacall-loongarch64.s
++
++vacall-loongarch64.s : $(srcdir)/vacall-loongarch64-macro.S
++	$(CPP) $(ASPFLAGS) $(srcdir)/vacall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-loongarch64.s
++
+ vacall-libapi.@OBJEXT@ : $(srcdir)/vacall-libapi.c $(srcdir)/vacall-internal.h $(srcdir)/vacall.h ../config.h
+ 	$(CC) $(INCLUDES_WITH_GNULIB) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -c $(srcdir)/vacall-libapi.c
+ 
+@@ -319,7 +325,7 @@ mostlyclean : clean
+ 
+ clean : force
+ 	$(RM) *.@OBJEXT@ *.a core
+-	$(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s
++	$(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s vacall-loongarch64.s
+ 	$(RM) minitests.@OBJEXT@ minitests.s minitests minitests.out
+ 	$(RM) minitests-c++.@OBJEXT@ minitests-c++ minitests-c++.out
+ 	$(RM) tests.@OBJEXT@ tests.s tests tests.out
+@@ -346,6 +352,7 @@ SOURCE_FILES = \
+   vacall-hppa64.c vacall-hppa64-linux.s vacall-hppa64-macro.S \
+   vacall-i386.c vacall-i386-linux.s vacall-i386-linux-pic.s vacall-i386-macro.S \
+   vacall-ia64.c vacall-ia64-linux.s vacall-ia64-macro.S \
++  vacall-loongarch64.c vacall-loongarch64-linux.s vacall-loongarch64-macro.S \
+   vacall-m68k.c vacall-m68k-linux.s vacall-m68k-sun.s vacall-m68k.mit.S vacall-m68k.motorola.S \
+   vacall-mips.c vacall-mipseb-linux.s vacall-mipsel-linux.s vacall-mipseb-macro.S vacall-mipsel-macro.S \
+   vacall-mipsn32.c vacall-mipsn32eb-linux.s vacall-mipsn32el-linux.s vacall-mipsn32eb-macro.S vacall-mipsn32el-macro.S \
+diff --git a/vacall/PLATFORMS b/vacall/PLATFORMS
+index 01efe52..683e233 100644
+--- a/vacall/PLATFORMS
++++ b/vacall/PLATFORMS
+@@ -48,4 +48,5 @@ Supported CPUs:  (Put the GNU config.guess values here.)
+      s390x      s390x-ibm-linux (gcc)
+      riscv32    riscv32-unknown-linux (gcc -mabi=ilp32d)
+      riscv64    riscv64-unknown-linux (gcc -mabi=lp64d)
++     loongarch64  loongarch64-unknown-linux (gcc)
+ 
+diff --git a/vacall/vacall-internal.h b/vacall/vacall-internal.h
+index c196348..f56b238 100644
+--- a/vacall/vacall-internal.h
++++ b/vacall/vacall-internal.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -92,7 +92,7 @@ typedef struct vacall_alist
+     unsigned int        _uint;
+     long                _long;
+     unsigned long       _ulong;
+-#if !(defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || defined(__riscv64__))
++#if !(defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || defined(__riscv64__) || defined(__loongarch64__))
+     long long           _longlong;
+     unsigned long long  _ulonglong;
+ #endif
+@@ -215,7 +215,7 @@ typedef struct vacall_alist
+   float          farg[__VA_FARG_NUM];
+   double         darg[__VA_FARG_NUM];
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __VA_IARG_NUM 8
+   unsigned int   ianum;
+   __varword      iarg[__VA_IARG_NUM];
+@@ -371,7 +371,7 @@ typedef struct vacall_alist
+   ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN,			\
+    0)
+ #endif
+-#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__)
++#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+   ((TYPE_SIZE) <= 16)
+ /* Turn on __VA_REGISTER_STRUCT_RETURN if __VA_SMALL_STRUCT_RETURN was set
+@@ -419,7 +419,7 @@ typedef struct vacall_alist
+    0									\
+   )
+ #endif
+-#if defined(__powerpc_sysv4__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__powerpc_sysv4__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* Return structure pointer is passed as first arg. */
+ #define __va_start_struct2(LIST)  \
+   ((LIST)->raddr = (void*)((LIST)->iarg[(LIST)->ianum++]),		\
+@@ -440,7 +440,7 @@ typedef struct vacall_alist
+ /* Padding of non-struct arguments. */
+ #define __va_argsize(TYPE_SIZE)  \
+   (((TYPE_SIZE) + sizeof(__vaword)-1) & -(intptr_t)sizeof(__vaword))
+-#if defined(__i386__) || defined(__m68k__) || (defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__)) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || (defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__)) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* args grow up */
+ /* small structures < 1 word are adjusted depending on compiler */
+ #define __va_arg_leftadjusted(LIST,TYPE_SIZE,TYPE_ALIGN)  \
+@@ -625,7 +625,7 @@ typedef struct vacall_alist
+       (void*)__va_arg_rightadjusted(LIST,TYPE_SIZE,TYPE_ALIGN)		\
+   )  )
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* the first __VA_IARG_NUM argument words are passed in registers */
+ #define __va_arg_adjusted(LIST,TYPE_SIZE,TYPE_ALIGN)  \
+   ((LIST)->ianum + ((TYPE_SIZE) + sizeof(__varword)-1) / sizeof(__varword) <= __VA_IARG_NUM \
+@@ -655,7 +655,7 @@ typedef struct vacall_alist
+ #define _va_arg_long(LIST)	__va_arg(LIST,long)
+ #define _va_arg_ulong(LIST)	__va_arg(LIST,unsigned long)
+ 
+-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__s390x__) || defined(__riscv64__)
++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__s390x__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* ‘long long’ and ‘long’ are identical. */
+ #define _va_arg_longlong	_va_arg_long
+ #define _va_arg_ulonglong	_va_arg_ulong
+@@ -719,7 +719,7 @@ typedef struct vacall_alist
+ 
+ /* Floating point arguments. */
+ 
+-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ #define __va_align_double(LIST)
+ #endif
+ #if defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__) || defined(__arm__) || defined(__armhf__)
+@@ -1005,7 +1005,7 @@ typedef struct vacall_alist
+       *(double*)((LIST)->aptr - sizeof(double))				\
+   )  )
+ #endif
+-#if defined(__riscv32__) || defined(__riscv64__)
++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* The first __VA_FARG_NUM floating-point args have been stored elsewhere. */
+ #define _va_arg_float(LIST)  \
+   ((LIST)->fanum < __VA_FARG_NUM					\
+@@ -1115,7 +1115,7 @@ typedef struct vacall_alist
+    (void*)__va_arg_leftadjusted(LIST,TYPE_SIZE,TYPE_ALIGN)		\
+   )
+ #endif
+-#if defined(__arm64__) || defined(__riscv32__) || defined(__riscv64__)
++#if defined(__arm64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ /* Small structures are passed in registers or on the stack. */
+ /* Big structures are passed as pointers to caller-made local copies. */
+ #define __va_arg_struct(LIST,TYPE_SIZE,TYPE_ALIGN)  \
+@@ -1181,7 +1181,7 @@ typedef struct vacall_alist
+   (__va_return(LIST,__VAlong), (LIST)->tmp._long = (VAL))
+ #define _va_return_ulong(LIST,VAL)  \
+   (__va_return(LIST,__VAulong), (LIST)->tmp._ulong = (VAL))
+-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__riscv64__)
++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__riscv64__) || defined(__loongarch64__)
+ #define _va_return_longlong(LIST,VAL)  \
+   (__va_return(LIST,__VAlonglong), (LIST)->tmp._long = (VAL))
+ #define _va_return_ulonglong(LIST,VAL)  \
+diff --git a/vacall/vacall-loongarch64-linux.s b/vacall/vacall-loongarch64-linux.s
+new file mode 100644
+index 0000000..f430424
+--- /dev/null
++++ b/vacall/vacall-loongarch64-linux.s
+@@ -0,0 +1,192 @@
++	.file	"vacall-loongarch64.c"
++	.text
++	.align	2
++	.globl	vacall_receiver
++	.type	vacall_receiver, @function
++vacall_receiver:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-272
++	.cfi_def_cfa_offset 272
++	st.d	$ra,$sp,248
++	st.d	$fp,$sp,240
++	la.global	$t0,vacall_function
++	.cfi_offset 1, -24
++	.cfi_offset 22, -32
++	addi.d	$fp,$sp,256
++	.cfi_def_cfa 22, 16
++	addi.d	$t1,$fp,16
++	ldptr.d	$t0,$t0,0
++	st.d	$a0,$fp,-184
++	st.d	$a7,$fp,8
++	st.d	$a1,$fp,-176
++	st.d	$a2,$fp,-168
++	st.d	$a3,$fp,-160
++	st.d	$a4,$fp,-152
++	st.d	$a5,$fp,-144
++	st.d	$a6,$fp,-136
++	st.d	$a7,$fp,-128
++	fst.s	$f0,$fp,-116
++	fst.s	$f1,$fp,-112
++	fst.s	$f2,$fp,-108
++	fst.s	$f3,$fp,-104
++	fst.s	$f4,$fp,-100
++	fst.s	$f5,$fp,-96
++	fst.s	$f6,$fp,-92
++	fst.s	$f7,$fp,-88
++	fst.d	$f0,$fp,-80
++	fst.d	$f1,$fp,-72
++	fst.d	$f2,$fp,-64
++	fst.d	$f3,$fp,-56
++	fst.d	$f4,$fp,-48
++	fst.d	$f5,$fp,-40
++	fst.d	$f6,$fp,-32
++	fst.d	$f7,$fp,-24
++	st.d	$t1,$fp,-224
++	st.w	$zero,$fp,-248
++	st.d	$zero,$fp,-216
++	st.w	$zero,$fp,-208
++	addi.d	$a0,$fp,-248
++	st.w	$zero,$fp,-192
++	st.w	$zero,$fp,-120
++	jirl	$ra,$t0,0
++	ldptr.w	$t0,$fp,-208
++	beqz	$t0,.L1
++	addi.w	$t1,$r0,15			# 0xf
++	bgtu	$t0,$t1,.L1
++	slli.d	$t0,$t0,3
++	la.local	$t1,.L4
++	ldx.d	$t0,$t1,$t0
++	jr	$t0
++	.section	.rodata
++	.align	3
++	.align	2
++.L4:
++	.dword	.L1
++	.dword	.L15
++	.dword	.L15
++	.dword	.L14
++	.dword	.L13
++	.dword	.L12
++	.dword	.L11
++	.dword	.L10
++	.dword	.L5
++	.dword	.L5
++	.dword	.L5
++	.dword	.L5
++	.dword	.L7
++	.dword	.L6
++	.dword	.L5
++	.dword	.L3
++	.text
++.L5:
++	ld.d	$a0,$fp,-240
++.L1:
++	ld.d	$ra,$sp,248
++	.cfi_remember_state
++	.cfi_restore 1
++	ld.d	$fp,$sp,240
++	.cfi_restore 22
++	addi.d	$sp,$sp,272
++	.cfi_def_cfa 3, 0
++	jr	$ra
++.L15:
++	.cfi_restore_state
++	ld.b	$a0,$fp,-240
++	b	.L1
++.L10:
++	ld.wu	$a0,$fp,-240
++	b	.L1
++.L14:
++	ld.bu	$a0,$fp,-240
++	b	.L1
++.L13:
++	ld.h	$a0,$fp,-240
++	b	.L1
++.L12:
++	ld.hu	$a0,$fp,-240
++	b	.L1
++.L11:
++	ldptr.w	$a0,$fp,-240
++	b	.L1
++.L6:
++	fld.d	$f0,$fp,-240
++	b	.L1
++.L7:
++	fld.s	$f0,$fp,-240
++	b	.L1
++.L3:
++	ldptr.w	$t0,$fp,-248
++	andi	$t0,$t0,1024
++	beqz	$t0,.L1
++	ld.d	$t1,$fp,-200
++	addi.w	$t0,$r0,15			# 0xf
++	addi.d	$t2,$t1,-1
++	bgtu	$t2,$t0,.L1
++	ld.d	$t2,$fp,-216
++	addi.w	$t3,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$t0,$r0,8			# 0x8
++	andi	$t4,$t2,7
++	add.d	$t5,$t1,$t4
++	and	$t2,$t2,$t3
++	ldptr.d	$t7,$t2,0
++	slli.w	$t3,$t5,3
++	slli.w	$t6,$t4,3
++	bgtu	$t1,$t0,.L17
++	bgtu	$t5,$t0,.L18
++	addi.w	$t3,$t3,-1
++	addi.w	$a0,$r0,2			# 0x2
++	sll.d	$a0,$a0,$t3
++	addi.d	$a0,$a0,-1
++	and	$a0,$a0,$t7
++	sra.d	$a0,$a0,$t6
++	b	.L1
++.L17:
++	addi.w	$t1,$r0,16			# 0x10
++	ld.d	$t8,$t2,8
++	sra.d	$t7,$t7,$t6
++	sub.d	$t0,$t0,$t4
++	bgtu	$t5,$t1,.L19
++	addi.w	$t3,$t3,-65
++	addi.w	$a1,$r0,2			# 0x2
++	sll.d	$a1,$a1,$t3
++	addi.d	$a1,$a1,-1
++	and	$a1,$a1,$t8
++	slli.w	$t0,$t0,2
++	sll.d	$a0,$a1,$t0
++	sll.d	$a0,$a0,$t0
++	or	$a0,$a0,$t7
++	sra.d	$a1,$a1,$t6
++	b	.L1
++.L18:
++	ld.d	$t2,$t2,8
++	addi.w	$t3,$t3,-65
++	addi.w	$t1,$r0,2			# 0x2
++	sll.d	$t1,$t1,$t3
++	addi.d	$t1,$t1,-1
++	sub.d	$t0,$t0,$t4
++	and	$t1,$t1,$t2
++	slli.w	$t0,$t0,3
++	sll.d	$t0,$t1,$t0
++	sra.d	$t7,$t7,$t6
++	or	$a0,$t0,$t7
++	b	.L1
++.L19:
++	ld.d	$t2,$t2,16
++	addi.w	$t3,$t3,-129
++	addi.w	$t1,$r0,2			# 0x2
++	sll.d	$t1,$t1,$t3
++	addi.d	$t1,$t1,-1
++	slli.w	$t0,$t0,3
++	and	$t1,$t1,$t2
++	sll.d	$a0,$t8,$t0
++	sll.d	$t0,$t1,$t0
++	sra.d	$t8,$t8,$t6
++	or	$a0,$a0,$t7
++	or	$a1,$t0,$t8
++	b	.L1
++	.cfi_endproc
++.LFE0:
++	.size	vacall_receiver, .-vacall_receiver
++	.ident	"GCC: (GNU) 12.0.1 20220317 (experimental)"
++	.section	.note.GNU-stack,"",@progbits
+diff --git a/vacall/vacall-loongarch64-macro.S b/vacall/vacall-loongarch64-macro.S
+new file mode 100644
+index 0000000..0699ab6
+--- /dev/null
++++ b/vacall/vacall-loongarch64-macro.S
+@@ -0,0 +1,193 @@
++	.file	"vacall-loongarch64.c"
++	.text
++	.align	2
++	.globl	vacall_receiver
++	.type	vacall_receiver, @function
++vacall_receiver:
++.LFB0 = .
++	.cfi_startproc
++	addi.d	$sp,$sp,-272
++	.cfi_def_cfa_offset 272
++	st.d	$ra,$sp,248
++	st.d	$fp,$sp,240
++	la.global	$t0,vacall_function
++	.cfi_offset 1, -24
++	.cfi_offset 22, -32
++	addi.d	$fp,$sp,256
++	.cfi_def_cfa 22, 16
++	addi.d	$t1,$fp,16
++	ldptr.d	$t0,$t0,0
++	st.d	$a0,$fp,-184
++	st.d	$a7,$fp,8
++	st.d	$a1,$fp,-176
++	st.d	$a2,$fp,-168
++	st.d	$a3,$fp,-160
++	st.d	$a4,$fp,-152
++	st.d	$a5,$fp,-144
++	st.d	$a6,$fp,-136
++	st.d	$a7,$fp,-128
++	fst.s	$f0,$fp,-116
++	fst.s	$f1,$fp,-112
++	fst.s	$f2,$fp,-108
++	fst.s	$f3,$fp,-104
++	fst.s	$f4,$fp,-100
++	fst.s	$f5,$fp,-96
++	fst.s	$f6,$fp,-92
++	fst.s	$f7,$fp,-88
++	fst.d	$f0,$fp,-80
++	fst.d	$f1,$fp,-72
++	fst.d	$f2,$fp,-64
++	fst.d	$f3,$fp,-56
++	fst.d	$f4,$fp,-48
++	fst.d	$f5,$fp,-40
++	fst.d	$f6,$fp,-32
++	fst.d	$f7,$fp,-24
++	st.d	$t1,$fp,-224
++	st.w	$zero,$fp,-248
++	st.d	$zero,$fp,-216
++	st.w	$zero,$fp,-208
++	addi.d	$a0,$fp,-248
++	st.w	$zero,$fp,-192
++	st.w	$zero,$fp,-120
++	jirl	$ra,$t0,0
++	ldptr.w	$t0,$fp,-208
++	beqz	$t0,.L1
++	addi.w	$t1,$r0,15			# 0xf
++	bgtu	$t0,$t1,.L1
++	slli.d	$t0,$t0,3
++	la.local	$t1,.L4
++	ldx.d	$t0,$t1,$t0
++	jr	$t0
++	.section	.rodata
++	.align	3
++	.align	2
++.L4:
++	.dword	.L1
++	.dword	.L15
++	.dword	.L15
++	.dword	.L14
++	.dword	.L13
++	.dword	.L12
++	.dword	.L11
++	.dword	.L10
++	.dword	.L5
++	.dword	.L5
++	.dword	.L5
++	.dword	.L5
++	.dword	.L7
++	.dword	.L6
++	.dword	.L5
++	.dword	.L3
++	.text
++.L5:
++	ld.d	$a0,$fp,-240
++.L1:
++	ld.d	$ra,$sp,248
++	.cfi_remember_state
++	.cfi_restore 1
++	ld.d	$fp,$sp,240
++	.cfi_restore 22
++	addi.d	$sp,$sp,272
++	.cfi_def_cfa 3, 0
++	jr	$ra
++.L15:
++	.cfi_restore_state
++	ld.b	$a0,$fp,-240
++	b	.L1
++.L10:
++	ld.wu	$a0,$fp,-240
++	b	.L1
++.L14:
++	ld.bu	$a0,$fp,-240
++	b	.L1
++.L13:
++	ld.h	$a0,$fp,-240
++	b	.L1
++.L12:
++	ld.hu	$a0,$fp,-240
++	b	.L1
++.L11:
++	ldptr.w	$a0,$fp,-240
++	b	.L1
++.L6:
++	fld.d	$f0,$fp,-240
++	b	.L1
++.L7:
++	fld.s	$f0,$fp,-240
++	b	.L1
++.L3:
++	ldptr.w	$t0,$fp,-248
++	andi	$t0,$t0,1024
++	beqz	$t0,.L1
++	ld.d	$t1,$fp,-200
++	addi.w	$t0,$r0,15			# 0xf
++	addi.d	$t2,$t1,-1
++	bgtu	$t2,$t0,.L1
++	ld.d	$t2,$fp,-216
++	addi.w	$t3,$r0,-8			# 0xfffffffffffffff8
++	addi.w	$t0,$r0,8			# 0x8
++	andi	$t4,$t2,7
++	add.d	$t5,$t1,$t4
++	and	$t2,$t2,$t3
++	ldptr.d	$t7,$t2,0
++	slli.w	$t3,$t5,3
++	slli.w	$t6,$t4,3
++	bgtu	$t1,$t0,.L17
++	bgtu	$t5,$t0,.L18
++	addi.w	$t3,$t3,-1
++	addi.w	$a0,$r0,2			# 0x2
++	sll.d	$a0,$a0,$t3
++	addi.d	$a0,$a0,-1
++	and	$a0,$a0,$t7
++	sra.d	$a0,$a0,$t6
++	b	.L1
++.L17:
++	addi.w	$t1,$r0,16			# 0x10
++	ld.d	$t8,$t2,8
++	sra.d	$t7,$t7,$t6
++	sub.d	$t0,$t0,$t4
++	bgtu	$t5,$t1,.L19
++	addi.w	$t3,$t3,-65
++	addi.w	$a1,$r0,2			# 0x2
++	sll.d	$a1,$a1,$t3
++	addi.d	$a1,$a1,-1
++	and	$a1,$a1,$t8
++	slli.w	$t0,$t0,2
++	sll.d	$a0,$a1,$t0
++	sll.d	$a0,$a0,$t0
++	or	$a0,$a0,$t7
++	sra.d	$a1,$a1,$t6
++	b	.L1
++.L18:
++	ld.d	$t2,$t2,8
++	addi.w	$t3,$t3,-65
++	addi.w	$t1,$r0,2			# 0x2
++	sll.d	$t1,$t1,$t3
++	addi.d	$t1,$t1,-1
++	sub.d	$t0,$t0,$t4
++	and	$t1,$t1,$t2
++	slli.w	$t0,$t0,3
++	sll.d	$t0,$t1,$t0
++	sra.d	$t7,$t7,$t6
++	or	$a0,$t0,$t7
++	b	.L1
++.L19:
++	ld.d	$t2,$t2,16
++	addi.w	$t3,$t3,-129
++	addi.w	$t1,$r0,2			# 0x2
++	sll.d	$t1,$t1,$t3
++	addi.d	$t1,$t1,-1
++	slli.w	$t0,$t0,3
++	and	$t1,$t1,$t2
++	sll.d	$a0,$t8,$t0
++	sll.d	$t0,$t1,$t0
++	sra.d	$t8,$t8,$t6
++	or	$a0,$a0,$t7
++	or	$a1,$t0,$t8
++	b	.L1
++	.cfi_endproc
++.LFE0:
++	.size	vacall_receiver, .-vacall_receiver
++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__
++	.section .note.GNU-stack,"",@progbits
++#endif
+diff --git a/vacall/vacall-loongarch64.c b/vacall/vacall-loongarch64.c
+new file mode 100644
+index 0000000..d5cfa7f
+--- /dev/null
++++ b/vacall/vacall-loongarch64.c
+@@ -0,0 +1,238 @@
++/* vacall function for LoongArch 64-bit CPU */
++
++/*
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
++ *
++ * This program is free software: you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
++ */
++
++#include "vacall-internal.h"
++
++#ifdef REENTRANT
++#define vacall_receiver callback_receiver
++register struct { void (*vacall_function) (void*,va_alist); void* arg; }
++         *		env	__asm__("t8");
++#endif
++
++register __varword iarg1 __asm__("a0");
++register __varword iarg2 __asm__("a1");
++register __varword iarg3 __asm__("a2");
++register __varword iarg4 __asm__("a3");
++register __varword iarg5 __asm__("a4");
++register __varword iarg6 __asm__("a5");
++register __varword iarg7 __asm__("a6");
++register __varword iarg8 __asm__("a7");
++
++register float farg1 __asm__("f0");
++register float farg2 __asm__("f1");
++register float farg3 __asm__("f2");
++register float farg4 __asm__("f3");
++register float farg5 __asm__("f4");
++register float farg6 __asm__("f5");
++register float farg7 __asm__("f6");
++register float farg8 __asm__("f7");
++
++register double darg1 __asm__("f0");
++register double darg2 __asm__("f1");
++register double darg3 __asm__("f2");
++register double darg4 __asm__("f3");
++register double darg5 __asm__("f4");
++register double darg6 __asm__("f5");
++register double darg7 __asm__("f6");
++register double darg8 __asm__("f7");
++
++register __varword iret  __asm__("a0");
++register __varword iret2 __asm__("a1");
++register float  fret __asm__("f0");
++register double dret __asm__("f0");
++
++/* The ABI requires that the first 8 general-purpose argument words are
++   being passed in registers, even if these words belong to structs that are
++   at most 2 words large. No room is allocated for these register words on
++   the stack by the caller, but the callee allocates room for them - at the
++   right place in the stack frame, that is, above the retaddr - if and only
++   if they are part of a struct that extends to the stack and the address of
++   this struct is taken. */
++struct gpargsequence {
++  __vaword word8; /* a7 */
++  __vaword firststackword;
++};
++
++#ifdef REENTRANT
++static
++#endif
++void /* the return type is variable, not void! */
++vacall_receiver (__vaword word1, __vaword word2, __vaword word3, __vaword word4,
++                 __vaword word5, __vaword word6, __vaword word7,
++                 struct gpargsequence gpargs)
++{
++  __va_alist list;
++  /* Move the arguments passed in registers to temp storage. */
++  list.iarg[0] = iarg1;
++  list.iarg[1] = iarg2;
++  list.iarg[2] = iarg3;
++  list.iarg[3] = iarg4;
++  list.iarg[4] = iarg5;
++  list.iarg[5] = iarg6;
++  list.iarg[6] = iarg7;
++  list.iarg[7] = iarg8; /* = gpargs.word8 */
++  list.farg[0] = farg1;
++  list.farg[1] = farg2;
++  list.farg[2] = farg3;
++  list.farg[3] = farg4;
++  list.farg[4] = farg5;
++  list.farg[5] = farg6;
++  list.farg[6] = farg7;
++  list.farg[7] = farg8;
++  list.darg[0] = darg1;
++  list.darg[1] = darg2;
++  list.darg[2] = darg3;
++  list.darg[3] = darg4;
++  list.darg[4] = darg5;
++  list.darg[5] = darg6;
++  list.darg[6] = darg7;
++  list.darg[7] = darg8;
++  /* Prepare the va_alist. */
++  list.flags = 0;
++  list.aptr = (long)&gpargs + sizeof(__vaword);
++  list.raddr = (void*)0;
++  list.rtype = __VAvoid;
++  list.ianum = 0;
++  list.fanum = 0;
++  /* Call vacall_function. The macros do all the rest. */
++#ifndef REENTRANT
++  (*vacall_function) (&list);
++#else /* REENTRANT */
++  (*env->vacall_function) (env->arg,&list);
++#endif
++  /* Put return value into proper register. */
++  if (list.rtype == __VAvoid) {
++  } else
++  if (list.rtype == __VAchar) {
++    iret = list.tmp._char;
++  } else
++  if (list.rtype == __VAschar) {
++    iret = list.tmp._schar;
++  } else
++  if (list.rtype == __VAuchar) {
++    iret = list.tmp._uchar;
++  } else
++  if (list.rtype == __VAshort) {
++    iret = list.tmp._short;
++  } else
++  if (list.rtype == __VAushort) {
++    iret = list.tmp._ushort;
++  } else
++  if (list.rtype == __VAint) {
++    iret = list.tmp._int;
++  } else
++  if (list.rtype == __VAuint) {
++    iret = list.tmp._uint;
++  } else
++  if (list.rtype == __VAlong || list.rtype == __VAlonglong) {
++    iret = list.tmp._long;
++  } else
++  if (list.rtype == __VAulong || list.rtype == __VAulonglong) {
++    iret = list.tmp._ulong;
++  } else
++  if (list.rtype == __VAfloat) {
++    fret = list.tmp._float;
++  } else
++  if (list.rtype == __VAdouble) {
++    dret = list.tmp._double;
++  } else
++  if (list.rtype == __VAvoidp) {
++    iret = (long)list.tmp._ptr;
++  } else
++  if (list.rtype == __VAstruct) {
++    /* normal struct return convention */
++    if (list.flags & __VA_REGISTER_STRUCT_RETURN) {
++      /* Return structs of size <= 16 in registers. */
++      if (list.rsize > 0 && list.rsize <= 16) {
++        #if 0 /* Unoptimized */
++        iret = (__varword)((unsigned char *) list.raddr)[0];
++        if (list.rsize >= 2)
++          iret |= (__varword)((unsigned char *) list.raddr)[1] << 8;
++        if (list.rsize >= 3)
++          iret |= (__varword)((unsigned char *) list.raddr)[2] << 16;
++        if (list.rsize >= 4)
++          iret |= (__varword)((unsigned char *) list.raddr)[3] << 24;
++        if (list.rsize >= 5)
++          iret |= (__varword)((unsigned char *) list.raddr)[4] << 32;
++        if (list.rsize >= 6)
++          iret |= (__varword)((unsigned char *) list.raddr)[5] << 40;
++        if (list.rsize >= 7)
++          iret |= (__varword)((unsigned char *) list.raddr)[6] << 48;
++        if (list.rsize >= 8)
++          iret |= (__varword)((unsigned char *) list.raddr)[7] << 56;
++        if (list.rsize >= 9) {
++          iret2 = (__varword)((unsigned char *) list.raddr)[8];
++          if (list.rsize >= 10)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[9] << 8;
++          if (list.rsize >= 11)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[10] << 16;
++          if (list.rsize >= 12)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[11] << 24;
++          if (list.rsize >= 13)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[12] << 32;
++          if (list.rsize >= 14)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[13] << 40;
++          if (list.rsize >= 15)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[14] << 48;
++          if (list.rsize >= 16)
++            iret2 |= (__varword)((unsigned char *) list.raddr)[15] << 56;
++        }
++        #else /* Optimized: fewer conditional jumps, fewer memory accesses */
++        uintptr_t count = list.rsize; /* > 0, ≤ 2*sizeof(__varword) */
++        __varword* wordaddr = (__varword*)((uintptr_t)list.raddr & ~(uintptr_t)(sizeof(__varword)-1));
++        uintptr_t start_offset = (uintptr_t)list.raddr & (uintptr_t)(sizeof(__varword)-1); /* ≥ 0, < sizeof(__varword) */
++        uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__varword) */
++        if (count <= sizeof(__varword)) {
++          /* Assign iret. */
++          if (end_offset <= sizeof(__varword)) {
++            /* 0 < end_offset ≤ sizeof(__varword) */
++            __varword mask0 = ((__varword)2 << (end_offset*8-1)) - 1;
++            iret = (wordaddr[0] & mask0) >> (start_offset*8);
++          } else {
++            /* sizeof(__varword) < end_offset < 2*sizeof(__varword), start_offset > 0 */
++            __varword mask1 = ((__varword)2 << (end_offset*8-sizeof(__varword)*8-1)) - 1;
++            iret = (wordaddr[0] >> (start_offset*8)) | ((wordaddr[1] & mask1) << (sizeof(__varword)*8-start_offset*8));
++          }
++        } else {
++          /* Assign iret, iret2. */
++          if (end_offset <= 2*sizeof(__varword)) {
++            /* sizeof(__varword) < end_offset ≤ 2*sizeof(__varword) */
++            __varword mask1 = ((__varword)2 << (end_offset*8-sizeof(__varword)*8-1)) - 1;
++            iret = (wordaddr[0] >> (start_offset*8)) | ((wordaddr[1] & mask1) << (sizeof(__varword)*4-start_offset*4) << (sizeof(__varword)*4-start_offset*4));
++            iret2 = (wordaddr[1] & mask1) >> (start_offset*8);
++          } else {
++            /* 2*sizeof(__varword) < end_offset < 3*sizeof(__varword), start_offset > 0 */
++            __varword mask2 = ((__varword)2 << (end_offset*8-2*sizeof(__varword)*8-1)) - 1;
++            iret = (wordaddr[0] >> (start_offset*8)) | (wordaddr[1] << (sizeof(__varword)*8-start_offset*8));
++            iret2 = (wordaddr[1] >> (start_offset*8)) | ((wordaddr[2] & mask2) << (sizeof(__varword)*8-start_offset*8));
++          }
++        }
++        #endif
++      }
++    }
++  }
++}
++
++#ifdef REENTRANT
++__vacall_r_t
++callback_get_receiver (void)
++{
++  return (__vacall_r_t)(void*)&callback_receiver;
++}
++#endif
+diff --git a/vacall/vacall.h b/vacall/vacall.h
+index 9d14d4a..a467df7 100644
+--- a/vacall/vacall.h
++++ b/vacall/vacall.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
++ * Copyright 1995-2022 Bruno Haible <bruno@clisp.org>
+  *
+  * This program is free software: you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -136,7 +136,7 @@ enum __VA_alist_flags
+ #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
+ 				  __VA_SUNPROCC_STRUCT_RETURN,
+ #else
+-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__)
++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__)
+ 				  __VA_SMALL_STRUCT_RETURN |
+ #endif
+ #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
+@@ -253,7 +253,7 @@ enum __VA_alist_flags
+ #endif
+ 
+   /* These are for internal use only */
+-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__)
++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__)
+   __VA_REGISTER_STRUCT_RETURN	= 1<<10,
+ #endif
+ #if defined(__mipsn32__) || defined(__mips64__)
diff --git a/ffmpeg/PKGBUILD b/ffmpeg/PKGBUILD
index 7f9ecaf9f6..924225d2c9 100644
--- a/ffmpeg/PKGBUILD
+++ b/ffmpeg/PKGBUILD
@@ -156,7 +156,7 @@ build() {
     --disable-stripping \
     --enable-amf \
     --enable-avisynth \
-    --enable-cuda-llvm \
+    --disable-cuda-llvm \
     --enable-lto \
     --enable-fontconfig \
     --enable-frei0r \
@@ -209,12 +209,14 @@ build() {
     --enable-libxml2 \
     --enable-libxvid \
     --enable-libzimg \
-    --enable-nvdec \
-    --enable-nvenc \
+    --disable-nvdec \
+    --disable-nvenc \
     --enable-opencl \
     --enable-opengl \
     --enable-shared \
     --enable-version3 \
+    --disable-doc \
+    --disable-lsx \
     --enable-vulkan
   make
   make tools/qt-faststart
diff --git a/ffmpeg4.4/PKGBUILD b/ffmpeg4.4/PKGBUILD
index 1386743253..9cf705846f 100644
--- a/ffmpeg4.4/PKGBUILD
+++ b/ffmpeg4.4/PKGBUILD
@@ -29,7 +29,6 @@ depends=(
   libdrm
   libfreetype.so
   libiec61883
-  libmfx
   libmodplug
   libpulse
   librav1e.so
@@ -98,11 +97,6 @@ source=(git+https://git.ffmpeg.org/ffmpeg.git#tag=${_tag}
 b2sums=('SKIP'
         'b656a17dd3996c6871d322ba1fcf25410ed580d9600348cda087d705660601d06070492300d31c12d54b0e9914cb92bb9d997e51462c0577e1a90539bf0b76ee')
 
-pkgver() {
-  cd ffmpeg
-  git describe --tags | sed 's/^n//'
-}
-
 prepare() {
   cd ffmpeg
   git cherry-pick -n 988f2e9eb063db7c1a678729f58aab6eba59a55b # fix nvenc on older gpus
@@ -128,7 +122,6 @@ build() {
     --disable-stripping \
     --enable-amf \
     --enable-avisynth \
-    --enable-cuda-llvm \
     --enable-lto \
     --enable-fontconfig \
     --enable-gmp \
@@ -145,7 +138,6 @@ build() {
     --enable-libgsm \
     --enable-libiec61883 \
     --enable-libjack \
-    --enable-libmfx \
     --enable-libmodplug \
     --enable-libmp3lame \
     --enable-libopencore_amrnb \
@@ -172,8 +164,6 @@ build() {
     --enable-libxml2 \
     --enable-libxvid \
     --enable-libzimg \
-    --enable-nvdec \
-    --enable-nvenc \
     --enable-shared \
     --enable-version3
 
diff --git a/ffmpegthumbnailer/PKGBUILD b/ffmpegthumbnailer/PKGBUILD
index 22277a7eca..522125ac66 100644
--- a/ffmpegthumbnailer/PKGBUILD
+++ b/ffmpegthumbnailer/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=ffmpegthumbnailer
 pkgver=2.2.2
-pkgrel=5
+pkgrel=6
 pkgdesc="Lightweight video thumbnailer that can be used by file managers"
 url="https://github.com/dirkvdb/ffmpegthumbnailer"
 license=('GPL2')
diff --git a/fftw/PKGBUILD b/fftw/PKGBUILD
index e56eabad26..ed871e0296 100644
--- a/fftw/PKGBUILD
+++ b/fftw/PKGBUILD
@@ -21,9 +21,9 @@ makedepends=(
   gcc-fortran
 )
 provides=(
-  libfftw3q_threads.so
-  libfftw3q_omp.so
-  libfftw3q.so
+#  libfftw3q_threads.so
+#  libfftw3q_omp.so
+#  libfftw3q.so
   libfftw3l_threads.so
   libfftw3l_omp.so
   libfftw3l_mpi.so
@@ -65,20 +65,20 @@ build() {
     --enable-openmp
   )
   local _configure_single=(
-    --enable-sse
-    --enable-avx
+    #--enable-sse
+    #--enable-avx
     --enable-single
   )
   local _configure_double=(
-    --enable-sse2
-    --enable-avx
+    #--enable-sse2
+    #--enable-avx
   )
   local _configure_long_double=(
     --enable-long-double
   )
   local _configure_quad=(
     --disable-mpi
-    --enable-quad-precision
+    #--enable-quad-precision
   )
   local _cmake_options=(
     -B build
@@ -90,10 +90,10 @@ build() {
     -D ENABLE_FLOAT=ON
     -D ENABLE_LONG_DOUBLE=ON
     -D ENABLE_QUAD_PRECISION=ON
-    -D ENABLE_SSE=ON
-    -D ENABLE_SSE2=ON
-    -D ENABLE_AVX=ON
-    -D ENABLE_AVX2=ON
+    -D ENABLE_SSE=OFF
+    -D ENABLE_SSE2=OFF
+    -D ENABLE_AVX=OFF
+    -D ENABLE_AVX2=OFF
   )
 
   # create missing FFTW3LibraryDepends.cmake
@@ -104,7 +104,7 @@ build() {
 
   export F77='gfortran'
   # use upstream default CFLAGS while keeping our -march/-mtune
-  CFLAGS+=" -O3 -fomit-frame-pointer -malign-double -fstrict-aliasing -ffast-math"
+  CFLAGS+=" -O3 -fomit-frame-pointer -fstrict-aliasing -ffast-math"
 
   for _name in "${_build_types[@]}"; do
     (
diff --git a/firecracker/PKGBUILD b/firecracker/PKGBUILD
index 2468e92adb..c68ccefbd3 100644
--- a/firecracker/PKGBUILD
+++ b/firecracker/PKGBUILD
@@ -14,7 +14,7 @@ _commit='8a43b32e9a885443a87b5bfe70808a3d68936289'
 source=("$pkgname::git+https://github.com/firecracker-microvm/firecracker.git#commit=$_commit")
 b2sums=('SKIP')
 
-_cargo_target="$CARCH-unknown-linux-gnu"
+_cargo_target="`uname -m`-unknown-linux-gnu"
 
 pkgver() {
   cd "$pkgbase"
diff --git a/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch b/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch
new file mode 100644
index 0000000000..b8a33207c9
--- /dev/null
+++ b/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch
@@ -0,0 +1,80 @@
+From 0c4dfaca7c7a38244034a6d872c0c7aeec0d4819 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Sun, 22 Oct 2023 22:13:17 -0700
+Subject: [PATCH 1/5] Add support for LoongArch64
+
+Adapted from LoongArchLinux. Rebased to Firefox 118.0.2.
+
+Co-Authored-By: loongson <loongson@loongson.cn>
+Co-Authored-By: WANG Xuerui <xen0n@gentoo.org>
+---
+ third_party/libwebrtc/build/build_config.h             | 4 ++++
+ third_party/rust/nix/.cargo-checksum.json              | 2 +-
+ third_party/rust/nix/src/sys/ioctl/linux.rs            | 1 +
+ toolkit/components/telemetry/pingsender/pingsender.cpp | 1 +
+ toolkit/moz.configure                                  | 2 +-
+ 5 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h
+index c39ae9da50f99..28191de02654b 100644
+--- a/third_party/libwebrtc/build/build_config.h
++++ b/third_party/libwebrtc/build/build_config.h
+@@ -210,6 +210,10 @@
+ #define ARCH_CPU_SPARC 1
+ #define ARCH_CPU_32_BITS 1
+ #define ARCH_CPU_BIG_ENDIAN 1
++#elif defined(__loongarch_lp64)
++#define ARCH_CPU_LOONGARCH64 1
++#define ARCH_CPU_64_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
+ #else
+ #error Please add support for your architecture in build/build_config.h
+ #endif
+diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json
+index f4c932b88926b..b7b9c9f3c9a89 100644
+--- a/third_party/rust/nix/.cargo-checksum.json
++++ b/third_party/rust/nix/.cargo-checksum.json
+@@ -1 +1 @@
+-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+\ No newline at end of file
++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs
+index 0c0a2090538f8..214d9e8c60281 100644
+--- a/third_party/rust/nix/src/sys/ioctl/linux.rs
++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs
+@@ -41,6 +41,7 @@ mod consts {
+     target_arch = "s390x",
+     target_arch = "x86_64",
+     target_arch = "aarch64",
++    target_arch = "loongarch64",
+     target_arch = "riscv32",
+     target_arch = "riscv64"
+ ))]
+diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp
+index 30f2907c720e1..e6645227a2949 100644
+--- a/toolkit/components/telemetry/pingsender/pingsender.cpp
++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp
+@@ -10,6 +10,7 @@
+ #include <fstream>
+ #include <iomanip>
+ #include <string>
++#include <cstdint>
+ #include <vector>
+ 
+ #include <zlib.h>
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index 8b462ecde463f..a4aa84cc7c45e 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds):
+                 use_nasm = False
+         elif target.cpu == "x86_64":
+             flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
+-        elif target.cpu in ("x86", "arm", "aarch64"):
++        elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"):
+             flac_only = True
+         else:
+             enable = False
+-- 
+2.43.0
+
diff --git a/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch
new file mode 100644
index 0000000000..143927ffe1
--- /dev/null
+++ b/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch
@@ -0,0 +1,31 @@
+From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 14 Nov 2023 18:14:20 -0800
+Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs
+
+---
+ widget/gtk/GfxInfo.cpp | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp
+index b34e85baa28e5..8c95ce0d4274b 100644
+--- a/widget/gtk/GfxInfo.cpp
++++ b/widget/gtk/GfxInfo.cpp
+@@ -1112,14 +1112,6 @@ const nsTArray<GfxDriverInfo>& GfxInfo::GetGfxDriverInfo() {
+         nsIGfxInfo::FEATURE_BLOCKED_DEVICE, DRIVER_LESS_THAN, V(23, 1, 1, 0),
+         "FEATURE_HARDWARE_VIDEO_DECODING_AMD_DISABLE", "Mesa 23.1.1.0");
+ 
+-    // Disable on Release/late Beta on AMD
+-#if !defined(EARLY_BETA_OR_EARLIER)
+-    APPEND_TO_DRIVER_BLOCKLIST(OperatingSystem::Linux, DeviceFamily::AtiAll,
+-                               nsIGfxInfo::FEATURE_HARDWARE_VIDEO_DECODING,
+-                               nsIGfxInfo::FEATURE_BLOCKED_DEVICE,
+-                               DRIVER_COMPARISON_IGNORED, V(0, 0, 0, 0),
+-                               "FEATURE_HARDWARE_VIDEO_DECODING_DISABLE", "");
+-#endif
+     ////////////////////////////////////
+     // FEATURE_HW_DECODED_VIDEO_ZERO_COPY - ALLOWLIST
+     APPEND_TO_DRIVER_BLOCKLIST2(OperatingSystem::Linux, DeviceFamily::All,
+-- 
+2.43.0
+
diff --git a/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch
new file mode 100644
index 0000000000..aa45fa3e87
--- /dev/null
+++ b/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch
@@ -0,0 +1,40 @@
+From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 14 Nov 2023 18:16:46 -0800
+Subject: [PATCH 3/5] Remove architectural limit on VA-API support
+
+---
+ toolkit/moz.configure | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index a4aa84cc7c45e..67fc08237bba4 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True))
+ 
+ # Hardware-accelerated video decode with VAAPI and V4L2 on Linux
+ # ==============================================================
+-@depends(target, toolkit_gtk)
+-def vaapi(target, toolkit_gtk):
+-    # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs.
+-    if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk:
+-        return True
++set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
++set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
+ 
+ 
+ @depends(target, toolkit_gtk)
+@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk):
+         return True
+ 
+ 
+-set_config("MOZ_ENABLE_VAAPI", True, when=vaapi)
+ set_config("MOZ_ENABLE_V4L2", True, when=v4l2)
+-set_define("MOZ_ENABLE_VAAPI", True, when=vaapi)
+ set_define("MOZ_ENABLE_V4L2", True, when=v4l2)
+ 
+ 
+-- 
+2.43.0
+
diff --git a/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch b/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch
new file mode 100644
index 0000000000..3cd2fcf4f4
--- /dev/null
+++ b/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch
@@ -0,0 +1,152 @@
+From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 21 Nov 2023 17:17:16 -0800
+Subject: [PATCH 4/5] Enable WebRTC for LoongArch
+
+---
+ .../common_audio/common_audio_c_gn/moz.build  |  8 ++++++
+ .../spl_sqrt_floor_gn/moz.build               |  6 ++++
+ .../aecm/aecm_core_gn/moz.build               |  6 ++++
+ .../desktop_capture_gn/moz.build              | 28 +++++++++++++++++++
+ .../desktop_capture/primitives_gn/moz.build   |  4 +++
+ third_party/libwebrtc/moz.build               |  7 +++++
+ toolkit/moz.configure                         |  1 +
+ 7 files changed, 60 insertions(+)
+
+diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
+index 60ee6cfc164be..1e69b2881ca90 100644
+--- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
++++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
+@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c",
++        "/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c",
++        "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
+index d2d0287623b54..36ad6222b3dea 100644
+--- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
++++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
+@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
+index 9874037197896..147e12653cbe2 100644
+--- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
++++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
+@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    SOURCES += [
++        "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
+index b0a5d1522da86..0efac49ac5dc3 100644
+--- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
++++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
+@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    DEFINES["USE_X11"] = "1"
++    DEFINES["WEBRTC_USE_X11"] = True
++
++    OS_LIBS += [
++        "X11",
++        "Xcomposite",
++        "Xdamage",
++        "Xext",
++        "Xfixes",
++        "Xrandr",
++        "Xrender"
++    ]
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
+index 8edb2c2344870..e6cf9f56540f7 100644
+--- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
++++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
+@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+ 
+     DEFINES["USE_X11"] = "1"
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    DEFINES["USE_X11"] = "1"
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build
+index f528cb1108180..88fd9792acdf1 100644
+--- a/third_party/libwebrtc/moz.build
++++ b/third_party/libwebrtc/moz.build
+@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T
+         "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
+         "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
+     ]
++
++if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux":
++
++    DIRS += [
++        "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
++        "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
++    ]
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index 67fc08237bba4..f7252539c7eaa 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -1328,6 +1328,7 @@ def webrtc_default(target):
+         "ppc",
+         "ppc64",
+         "riscv64",
++        "loongarch64",
+     )
+ 
+     return os_match and cpu_match and target.endianness == "little"
+-- 
+2.43.0
+
diff --git a/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch
new file mode 100644
index 0000000000..f69d1ab983
--- /dev/null
+++ b/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch
@@ -0,0 +1,398 @@
+From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001
+From: WANG Xuerui <xen0n@gentoo.org>
+Date: Sun, 31 Dec 2023 13:16:33 +0800
+Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX
+
+This is not of upstream quality, and will not be upstreamed as-is.
+This is only meant as a quick-and-dirty build fix for LoongArch early
+adopters.
+---
+ media/libyuv/libyuv/BUILD.gn           | 37 +++++++++++++++++++++
+ media/libyuv/libyuv/libyuv.gni         |  2 ++
+ media/libyuv/libyuv/libyuv.gypi        |  5 +++
+ media/libyuv/libyuv/source/row_lasx.cc | 46 ++++++++++++++++----------
+ media/libyuv/libyuv/source/row_lsx.cc  | 30 +++++++++++------
+ 5 files changed, 92 insertions(+), 28 deletions(-)
+
+diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn
+index a72ff06558000..7d70848be9f1a 100644
+--- a/media/libyuv/libyuv/BUILD.gn
++++ b/media/libyuv/libyuv/BUILD.gn
+@@ -69,6 +69,14 @@ group("libyuv") {
+     deps += [ ":libyuv_msa" ]
+   }
+ 
++  if (libyuv_use_lsx) {
++    deps += [ ":libyuv_lsx" ]
++  }
++
++  if (libyuv_use_lasx) {
++    deps += [ ":libyuv_lasx" ]
++  }
++
+   if (!is_ios && !libyuv_disable_jpeg) {
+     # Make sure that clients of libyuv link with libjpeg. This can't go in
+     # libyuv_internal because in Windows x64 builds that will generate a clang
+@@ -90,6 +98,7 @@ static_library("libyuv_internal") {
+     "include/libyuv/convert_from.h",
+     "include/libyuv/convert_from_argb.h",
+     "include/libyuv/cpu_id.h",
++    "include/libyuv/loongson_intrinsics.h",
+     "include/libyuv/mjpeg_decoder.h",
+     "include/libyuv/planar_functions.h",
+     "include/libyuv/rotate.h",
+@@ -229,6 +238,34 @@ if (libyuv_use_msa) {
+   }
+ }
+ 
++if (libyuv_use_lsx) {
++  static_library("libyuv_lsx") {
++    sources = [
++      # LSX Source Files
++      "source/rotate_lsx.cc",
++      "source/row_lsx.cc",
++      "source/scale_lsx.cc",
++    ]
++
++    deps = [ ":libyuv_internal" ]
++
++    public_configs = [ ":libyuv_config" ]
++  }
++}
++
++if (libyuv_use_lasx) {
++  static_library("libyuv_lasx") {
++    sources = [
++      # LASX Source Files
++      "source/row_lasx.cc",
++    ]
++
++    deps = [ ":libyuv_internal" ]
++
++    public_configs = [ ":libyuv_config" ]
++  }
++}
++
+ if (libyuv_include_tests) {
+   config("libyuv_unittest_warnings_config") {
+     if (!is_win) {
+diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni
+index 852f08ca9d61f..ecad693508811 100644
+--- a/media/libyuv/libyuv/libyuv.gni
++++ b/media/libyuv/libyuv/libyuv.gni
+@@ -20,4 +20,6 @@ declare_args() {
+       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
+   libyuv_use_mmi =
+       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
++  libyuv_use_lsx = current_cpu == "loong64" || current_cpu == "loongarch64"
++  libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64"
+ }
+diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi
+index 48936aa7b0239..9c19abf9c34c9 100644
+--- a/media/libyuv/libyuv/libyuv.gypi
++++ b/media/libyuv/libyuv/libyuv.gypi
+@@ -18,6 +18,7 @@
+       'include/libyuv/convert_from.h',
+       'include/libyuv/convert_from_argb.h',
+       'include/libyuv/cpu_id.h',
++      'include/libyuv/loongson_intrinsics.h',
+       'include/libyuv/macros_msa.h',
+       'include/libyuv/mjpeg_decoder.h',
+       'include/libyuv/planar_functions.h',
+@@ -57,6 +58,7 @@
+       'source/rotate_argb.cc',
+       'source/rotate_common.cc',
+       'source/rotate_gcc.cc',
++      'source/rotate_lsx.cc',
+       'source/rotate_msa.cc',
+       'source/rotate_neon.cc',
+       'source/rotate_neon64.cc',
+@@ -64,6 +66,8 @@
+       'source/row_any.cc',
+       'source/row_common.cc',
+       'source/row_gcc.cc',
++      'source/row_lasx.cc',
++      'source/row_lsx.cc',
+       'source/row_msa.cc',
+       'source/row_neon.cc',
+       'source/row_neon64.cc',
+@@ -73,6 +77,7 @@
+       'source/scale_argb.cc',
+       'source/scale_common.cc',
+       'source/scale_gcc.cc',
++      'source/scale_lsx.cc',
+       'source/scale_msa.cc',
+       'source/scale_neon.cc',
+       'source/scale_neon64.cc',
+diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc
+index 29ac9254d9924..8c325483b116a 100644
+--- a/media/libyuv/libyuv/source/row_lasx.cc
++++ b/media/libyuv/libyuv/source/row_lasx.cc
+@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
+   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+   __m256i vec_ubvr, vec_ugvg;
+   __m256i const_0x80 = __lasx_xvldi(0x80);
+-  __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000,
+-                   0xF000F000F000F000};
++  __m256i alpha = {static_cast<long long>(0xF000F000F000F000), static_cast<long long>(0xF000F000F000F000), static_cast<long long>(0xF000F000F000F000),
++                   static_cast<long long>(0xF000F000F000F000)};
+   __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0,
+                   0x00F000F000F000F0};
+ 
+@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y,
+   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+   __m256i vec_ubvr, vec_ugvg;
+   __m256i const_0x80 = __lasx_xvldi(0x80);
+-  __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000,
+-                   0x8000800080008000};
++  __m256i alpha = {static_cast<long long>(0x8000800080008000), static_cast<long long>(0x8000800080008000), static_cast<long long>(0x8000800080008000),
++                   static_cast<long long>(0x8000800080008000)};
+ 
+   YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb);
+   vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr);
+@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0,
+                         0x0009000900090009, 0x0009000900090009};
+   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
+                      0x0000000700000003};
+-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+-                          0x8080808080808080, 0x8080808080808080};
++  __m256i const_0x8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                          static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
+@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb,
+   __m256i const_38 = __lasx_xvldi(38);
+   __m256i const_94 = __lasx_xvldi(94);
+   __m256i const_18 = __lasx_xvldi(18);
+-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+-                          0x8080808080808080, 0x8080808080808080};
++  __m256i const_0x8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                          static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
+                      0x0000000700000003};
+   for (x = 0; x < len; x++) {
+@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0,
+@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0,
+@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18,
+                       0x15120F0C09060300, 0x00000000001E1B18};
+   __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908,
+@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18,
+                       0x15120F0C09060300, 0x00000000001E1B18};
+   __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908,
+@@ -2000,11 +2000,13 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
+   }
+ }
+ 
++#ifndef RgbConstants
+ struct RgbConstants {
+   uint8_t kRGBToY[4];
+   uint16_t kAddY;
+   uint16_t pad;
+ };
++#define RgbConstants RgbConstants
+ 
+ // RGB to JPeg coefficients
+ // B * 0.1140 coefficient = 29
+@@ -2030,6 +2032,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                       0x1080,
+                                                       0};
++#endif  // RgbConstaints
+ 
+ // ARGB expects first 3 values to contain RGB and 4th value is ignored.
+ static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
+@@ -2242,8 +2245,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
+   __m256i const_21 = __lasx_xvldi(0x415);
+   __m256i const_53 = __lasx_xvldi(0x435);
+   __m256i const_10 = __lasx_xvldi(0x40A);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301,
+                    0x1F1D0F0D1B190B09};
+ 
+@@ -2296,6 +2299,13 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
+   }
+ }
+ 
++// undef for unified sources build
++#undef YUVTORGB_SETUP
++#undef YUVTORGB
++#undef I444TORGB
++#undef STOREARGB
++#undef RGBTOUV
++
+ #ifdef __cplusplus
+ }  // extern "C"
+ }  // namespace libyuv
+diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc
+index 9c1e16f22e02d..91221ff03ca29 100644
+--- a/media/libyuv/libyuv/source/row_lsx.cc
++++ b/media/libyuv/libyuv/source/row_lsx.cc
+@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
+@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
+@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
+@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
+@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
+@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
+   __m128i const_21 = __lsx_vldi(0x415);
+   __m128i const_53 = __lsx_vldi(0x435);
+   __m128i const_10 = __lsx_vldi(0x40A);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
+   __m128i const_256 = __lsx_vldi(0x500);
+   __m128i zero = __lsx_vldi(0);
+   __m128i alpha = __lsx_vldi(0xFF);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
+@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
+   __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
+   __m128i vec_scale = __lsx_vreplgr2vr_w(scale);
+   __m128i zero = __lsx_vldi(0);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
+@@ -1643,11 +1643,13 @@ void HalfFloatRow_LSX(const uint16_t* src,
+   }
+ }
+ 
++#ifndef RgbConstants
+ struct RgbConstants {
+   uint8_t kRGBToY[4];
+   uint16_t kAddY;
+   uint16_t pad;
+ };
++#define RgbConstants RgbConstants
+ 
+ // RGB to JPeg coefficients
+ // B * 0.1140 coefficient = 29
+@@ -1673,6 +1675,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                       0x1080,
+                                                       0};
++#endif  // RgbConstaints
+ 
+ // ARGB expects first 3 values to contain RGB and 4th value is ignored.
+ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
+@@ -1853,6 +1856,13 @@ void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+   RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
+ }
+ 
++// undef for unified sources build
++#undef YUVTORGB_SETUP
++#undef YUVTORGB
++#undef I444TORGB
++#undef STOREARGB
++#undef RGBTOUV
++
+ #ifdef __cplusplus
+ }  // extern "C"
+ }  // namespace libyuv
+-- 
+2.43.0
+
diff --git a/firefox-developer-edition/PKGBUILD b/firefox-developer-edition/PKGBUILD
index 55cbe586ae..8873766e38 100644
--- a/firefox-developer-edition/PKGBUILD
+++ b/firefox-developer-edition/PKGBUILD
@@ -60,6 +60,11 @@ source=(
   $pkgname.desktop
   identity-icons-brand.svg
   firefox-install-dir.patch
+  0001-Add-support-for-LoongArch64.patch
+  0002-Enable-VA-API-support-for-AMD-GPUs.patch
+  0003-Remove-architectural-limit-on-VA-API-support.patch
+  0004-Enable-WebRTC-for-LoongArch.patch
+  0005-Fix-libyuv-build-with-LSX-LASX.patch
 )
 validpgpkeys=(
   # Mozilla Software Releases <release@mozilla.com>
@@ -75,7 +80,12 @@ b2sums=('4eeb4ea242b9187abafb8e580f2038747bc2962230fa598a4de0f25f999ab378d92fc61
         'SKIP'
         'd2d14042a03ffcc5ed9212fca9cc167e8bfb2ba3f0d61a89441e033484cb914424d0f2544e0f1bc58992fee9cae03a73679352ee0fac9777fa5633ddc8d76e7d'
         '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34'
-        'eb61793257458b20bc7ab5598240bc1901666d7fb7e971941af99ac706d387859642d8fba3130fa31789546b9123c7500edbe6373701ce9fc7d65aef3974c90c')
+        'eb61793257458b20bc7ab5598240bc1901666d7fb7e971941af99ac706d387859642d8fba3130fa31789546b9123c7500edbe6373701ce9fc7d65aef3974c90c'
+        'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9'
+        '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5'
+        'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719'
+        '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97'
+        'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628')
 
 # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys)
 # Note: These are for Arch Linux use ONLY. For your own distribution, please
@@ -95,6 +105,11 @@ prepare() {
 
   # Change install dir from 'firefox' to 'firefox-developer-edition'
   patch -Np1 -i ../firefox-install-dir.patch
+  patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch
+  patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch
+  patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch
+  patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch
+  patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch
 
   echo -n "$_google_api_key" >google-api-key
   echo -n "$_mozilla_api_key" >mozilla-api-key
@@ -108,11 +123,11 @@ ac_add_options --enable-release
 ac_add_options --enable-hardening
 ac_add_options --enable-optimize
 ac_add_options --enable-rust-simd
-ac_add_options --enable-linker=lld
+#ac_add_options --enable-linker=lld
 ac_add_options --disable-install-strip
-ac_add_options --disable-elf-hack
+#ac_add_options --disable-elf-hack
 ac_add_options --disable-bootstrap
-ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot
+ac_add_options --without-wasm-sandboxed-libraries
 
 # Branding
 ac_add_options --with-branding=browser/branding/aurora
@@ -136,7 +151,7 @@ ac_add_options --with-system-nss
 # Features
 ac_add_options --enable-alsa
 ac_add_options --enable-jack
-ac_add_options --enable-crashreporter
+ac_add_options --disable-crashreporter
 ac_add_options --disable-updater
 ac_add_options --disable-tests
 END
@@ -149,6 +164,11 @@ build() {
   export MOZBUILD_STATE_PATH="$srcdir/mozbuild"
   export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)"
   export MOZ_NOSPAM=1
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   # malloc_usable_size is used in various parts of the codebase
   CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
@@ -159,35 +179,14 @@ build() {
 
   # Do 3-tier PGO
   echo "Building instrumented browser..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-profile-generate=cross
-END
+  cp ../mozconfig .mozconfig
+#  cat >.mozconfig ../mozconfig - <<END
+#ac_add_options --enable-profile-generate=cross
+#END
   ./mach build
 
   echo "Profiling instrumented browser..."
   ./mach package
-  LLVM_PROFDATA=llvm-profdata \
-    JARLOG_FILE="$PWD/jarlog" \
-    xvfb-run -s "-screen 0 1920x1080x24 -nolisten local" \
-    ./mach python build/pgo/profileserver.py
-
-  stat -c "Profile data found (%s bytes)" merged.profdata
-  test -s merged.profdata
-
-  stat -c "Jar log found (%s bytes)" jarlog
-  test -s jarlog
-
-  echo "Removing instrumented browser..."
-  ./mach clobber
-
-  echo "Building optimized browser..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-lto=cross,full
-ac_add_options --enable-profile-use=cross
-ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
-ac_add_options --with-pgo-jarlog=${PWD@Q}/jarlog
-END
-  ./mach build
 }
 
 package() {
diff --git a/firefox/0001-Add-support-for-LoongArch64.patch b/firefox/0001-Add-support-for-LoongArch64.patch
new file mode 100644
index 0000000000..b8a33207c9
--- /dev/null
+++ b/firefox/0001-Add-support-for-LoongArch64.patch
@@ -0,0 +1,80 @@
+From 0c4dfaca7c7a38244034a6d872c0c7aeec0d4819 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Sun, 22 Oct 2023 22:13:17 -0700
+Subject: [PATCH 1/5] Add support for LoongArch64
+
+Adapted from LoongArchLinux. Rebased to Firefox 118.0.2.
+
+Co-Authored-By: loongson <loongson@loongson.cn>
+Co-Authored-By: WANG Xuerui <xen0n@gentoo.org>
+---
+ third_party/libwebrtc/build/build_config.h             | 4 ++++
+ third_party/rust/nix/.cargo-checksum.json              | 2 +-
+ third_party/rust/nix/src/sys/ioctl/linux.rs            | 1 +
+ toolkit/components/telemetry/pingsender/pingsender.cpp | 1 +
+ toolkit/moz.configure                                  | 2 +-
+ 5 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h
+index c39ae9da50f99..28191de02654b 100644
+--- a/third_party/libwebrtc/build/build_config.h
++++ b/third_party/libwebrtc/build/build_config.h
+@@ -210,6 +210,10 @@
+ #define ARCH_CPU_SPARC 1
+ #define ARCH_CPU_32_BITS 1
+ #define ARCH_CPU_BIG_ENDIAN 1
++#elif defined(__loongarch_lp64)
++#define ARCH_CPU_LOONGARCH64 1
++#define ARCH_CPU_64_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
+ #else
+ #error Please add support for your architecture in build/build_config.h
+ #endif
+diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json
+index f4c932b88926b..b7b9c9f3c9a89 100644
+--- a/third_party/rust/nix/.cargo-checksum.json
++++ b/third_party/rust/nix/.cargo-checksum.json
+@@ -1 +1 @@
+-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+\ No newline at end of file
++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs
+index 0c0a2090538f8..214d9e8c60281 100644
+--- a/third_party/rust/nix/src/sys/ioctl/linux.rs
++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs
+@@ -41,6 +41,7 @@ mod consts {
+     target_arch = "s390x",
+     target_arch = "x86_64",
+     target_arch = "aarch64",
++    target_arch = "loongarch64",
+     target_arch = "riscv32",
+     target_arch = "riscv64"
+ ))]
+diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp
+index 30f2907c720e1..e6645227a2949 100644
+--- a/toolkit/components/telemetry/pingsender/pingsender.cpp
++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp
+@@ -10,6 +10,7 @@
+ #include <fstream>
+ #include <iomanip>
+ #include <string>
++#include <cstdint>
+ #include <vector>
+ 
+ #include <zlib.h>
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index 8b462ecde463f..a4aa84cc7c45e 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds):
+                 use_nasm = False
+         elif target.cpu == "x86_64":
+             flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
+-        elif target.cpu in ("x86", "arm", "aarch64"):
++        elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"):
+             flac_only = True
+         else:
+             enable = False
+-- 
+2.43.0
+
diff --git a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
new file mode 100644
index 0000000000..143927ffe1
--- /dev/null
+++ b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
@@ -0,0 +1,31 @@
+From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 14 Nov 2023 18:14:20 -0800
+Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs
+
+---
+ widget/gtk/GfxInfo.cpp | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp
+index b34e85baa28e5..8c95ce0d4274b 100644
+--- a/widget/gtk/GfxInfo.cpp
++++ b/widget/gtk/GfxInfo.cpp
+@@ -1112,14 +1112,6 @@ const nsTArray<GfxDriverInfo>& GfxInfo::GetGfxDriverInfo() {
+         nsIGfxInfo::FEATURE_BLOCKED_DEVICE, DRIVER_LESS_THAN, V(23, 1, 1, 0),
+         "FEATURE_HARDWARE_VIDEO_DECODING_AMD_DISABLE", "Mesa 23.1.1.0");
+ 
+-    // Disable on Release/late Beta on AMD
+-#if !defined(EARLY_BETA_OR_EARLIER)
+-    APPEND_TO_DRIVER_BLOCKLIST(OperatingSystem::Linux, DeviceFamily::AtiAll,
+-                               nsIGfxInfo::FEATURE_HARDWARE_VIDEO_DECODING,
+-                               nsIGfxInfo::FEATURE_BLOCKED_DEVICE,
+-                               DRIVER_COMPARISON_IGNORED, V(0, 0, 0, 0),
+-                               "FEATURE_HARDWARE_VIDEO_DECODING_DISABLE", "");
+-#endif
+     ////////////////////////////////////
+     // FEATURE_HW_DECODED_VIDEO_ZERO_COPY - ALLOWLIST
+     APPEND_TO_DRIVER_BLOCKLIST2(OperatingSystem::Linux, DeviceFamily::All,
+-- 
+2.43.0
+
diff --git a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
new file mode 100644
index 0000000000..aa45fa3e87
--- /dev/null
+++ b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
@@ -0,0 +1,40 @@
+From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 14 Nov 2023 18:16:46 -0800
+Subject: [PATCH 3/5] Remove architectural limit on VA-API support
+
+---
+ toolkit/moz.configure | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index a4aa84cc7c45e..67fc08237bba4 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True))
+ 
+ # Hardware-accelerated video decode with VAAPI and V4L2 on Linux
+ # ==============================================================
+-@depends(target, toolkit_gtk)
+-def vaapi(target, toolkit_gtk):
+-    # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs.
+-    if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk:
+-        return True
++set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
++set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
+ 
+ 
+ @depends(target, toolkit_gtk)
+@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk):
+         return True
+ 
+ 
+-set_config("MOZ_ENABLE_VAAPI", True, when=vaapi)
+ set_config("MOZ_ENABLE_V4L2", True, when=v4l2)
+-set_define("MOZ_ENABLE_VAAPI", True, when=vaapi)
+ set_define("MOZ_ENABLE_V4L2", True, when=v4l2)
+ 
+ 
+-- 
+2.43.0
+
diff --git a/firefox/0004-Enable-WebRTC-for-LoongArch.patch b/firefox/0004-Enable-WebRTC-for-LoongArch.patch
new file mode 100644
index 0000000000..3cd2fcf4f4
--- /dev/null
+++ b/firefox/0004-Enable-WebRTC-for-LoongArch.patch
@@ -0,0 +1,152 @@
+From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001
+From: Jiangjin Wang <kaymw@aosc.io>
+Date: Tue, 21 Nov 2023 17:17:16 -0800
+Subject: [PATCH 4/5] Enable WebRTC for LoongArch
+
+---
+ .../common_audio/common_audio_c_gn/moz.build  |  8 ++++++
+ .../spl_sqrt_floor_gn/moz.build               |  6 ++++
+ .../aecm/aecm_core_gn/moz.build               |  6 ++++
+ .../desktop_capture_gn/moz.build              | 28 +++++++++++++++++++
+ .../desktop_capture/primitives_gn/moz.build   |  4 +++
+ third_party/libwebrtc/moz.build               |  7 +++++
+ toolkit/moz.configure                         |  1 +
+ 7 files changed, 60 insertions(+)
+
+diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
+index 60ee6cfc164be..1e69b2881ca90 100644
+--- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
++++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
+@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c",
++        "/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c",
++        "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
+index d2d0287623b54..36ad6222b3dea 100644
+--- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
++++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
+@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
+index 9874037197896..147e12653cbe2 100644
+--- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
++++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
+@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    SOURCES += [
++        "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
+index b0a5d1522da86..0efac49ac5dc3 100644
+--- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
++++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
+@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+         "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
+     ]
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    DEFINES["USE_X11"] = "1"
++    DEFINES["WEBRTC_USE_X11"] = True
++
++    OS_LIBS += [
++        "X11",
++        "Xcomposite",
++        "Xdamage",
++        "Xext",
++        "Xfixes",
++        "Xrandr",
++        "Xrender"
++    ]
++
++    UNIFIED_SOURCES += [
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc",
++        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
++    ]
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
+index 8edb2c2344870..e6cf9f56540f7 100644
+--- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
++++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
+@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+ 
+     DEFINES["USE_X11"] = "1"
+ 
++if CONFIG["CPU_ARCH"] == "loongarch64":
++
++    DEFINES["USE_X11"] = "1"
++
+ if CONFIG["CPU_ARCH"] == "x86":
+ 
+     DEFINES["WEBRTC_ENABLE_AVX2"] = True
+diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build
+index f528cb1108180..88fd9792acdf1 100644
+--- a/third_party/libwebrtc/moz.build
++++ b/third_party/libwebrtc/moz.build
+@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T
+         "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
+         "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
+     ]
++
++if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux":
++
++    DIRS += [
++        "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
++        "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
++    ]
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index 67fc08237bba4..f7252539c7eaa 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -1328,6 +1328,7 @@ def webrtc_default(target):
+         "ppc",
+         "ppc64",
+         "riscv64",
++        "loongarch64",
+     )
+ 
+     return os_match and cpu_match and target.endianness == "little"
+-- 
+2.43.0
+
diff --git a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch
new file mode 100644
index 0000000000..f69d1ab983
--- /dev/null
+++ b/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch
@@ -0,0 +1,398 @@
+From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001
+From: WANG Xuerui <xen0n@gentoo.org>
+Date: Sun, 31 Dec 2023 13:16:33 +0800
+Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX
+
+This is not of upstream quality, and will not be upstreamed as-is.
+This is only meant as a quick-and-dirty build fix for LoongArch early
+adopters.
+---
+ media/libyuv/libyuv/BUILD.gn           | 37 +++++++++++++++++++++
+ media/libyuv/libyuv/libyuv.gni         |  2 ++
+ media/libyuv/libyuv/libyuv.gypi        |  5 +++
+ media/libyuv/libyuv/source/row_lasx.cc | 46 ++++++++++++++++----------
+ media/libyuv/libyuv/source/row_lsx.cc  | 30 +++++++++++------
+ 5 files changed, 92 insertions(+), 28 deletions(-)
+
+diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn
+index a72ff06558000..7d70848be9f1a 100644
+--- a/media/libyuv/libyuv/BUILD.gn
++++ b/media/libyuv/libyuv/BUILD.gn
+@@ -69,6 +69,14 @@ group("libyuv") {
+     deps += [ ":libyuv_msa" ]
+   }
+ 
++  if (libyuv_use_lsx) {
++    deps += [ ":libyuv_lsx" ]
++  }
++
++  if (libyuv_use_lasx) {
++    deps += [ ":libyuv_lasx" ]
++  }
++
+   if (!is_ios && !libyuv_disable_jpeg) {
+     # Make sure that clients of libyuv link with libjpeg. This can't go in
+     # libyuv_internal because in Windows x64 builds that will generate a clang
+@@ -90,6 +98,7 @@ static_library("libyuv_internal") {
+     "include/libyuv/convert_from.h",
+     "include/libyuv/convert_from_argb.h",
+     "include/libyuv/cpu_id.h",
++    "include/libyuv/loongson_intrinsics.h",
+     "include/libyuv/mjpeg_decoder.h",
+     "include/libyuv/planar_functions.h",
+     "include/libyuv/rotate.h",
+@@ -229,6 +238,34 @@ if (libyuv_use_msa) {
+   }
+ }
+ 
++if (libyuv_use_lsx) {
++  static_library("libyuv_lsx") {
++    sources = [
++      # LSX Source Files
++      "source/rotate_lsx.cc",
++      "source/row_lsx.cc",
++      "source/scale_lsx.cc",
++    ]
++
++    deps = [ ":libyuv_internal" ]
++
++    public_configs = [ ":libyuv_config" ]
++  }
++}
++
++if (libyuv_use_lasx) {
++  static_library("libyuv_lasx") {
++    sources = [
++      # LASX Source Files
++      "source/row_lasx.cc",
++    ]
++
++    deps = [ ":libyuv_internal" ]
++
++    public_configs = [ ":libyuv_config" ]
++  }
++}
++
+ if (libyuv_include_tests) {
+   config("libyuv_unittest_warnings_config") {
+     if (!is_win) {
+diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni
+index 852f08ca9d61f..ecad693508811 100644
+--- a/media/libyuv/libyuv/libyuv.gni
++++ b/media/libyuv/libyuv/libyuv.gni
+@@ -20,4 +20,6 @@ declare_args() {
+       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
+   libyuv_use_mmi =
+       (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
++  libyuv_use_lsx = current_cpu == "loong64" || current_cpu == "loongarch64"
++  libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64"
+ }
+diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi
+index 48936aa7b0239..9c19abf9c34c9 100644
+--- a/media/libyuv/libyuv/libyuv.gypi
++++ b/media/libyuv/libyuv/libyuv.gypi
+@@ -18,6 +18,7 @@
+       'include/libyuv/convert_from.h',
+       'include/libyuv/convert_from_argb.h',
+       'include/libyuv/cpu_id.h',
++      'include/libyuv/loongson_intrinsics.h',
+       'include/libyuv/macros_msa.h',
+       'include/libyuv/mjpeg_decoder.h',
+       'include/libyuv/planar_functions.h',
+@@ -57,6 +58,7 @@
+       'source/rotate_argb.cc',
+       'source/rotate_common.cc',
+       'source/rotate_gcc.cc',
++      'source/rotate_lsx.cc',
+       'source/rotate_msa.cc',
+       'source/rotate_neon.cc',
+       'source/rotate_neon64.cc',
+@@ -64,6 +66,8 @@
+       'source/row_any.cc',
+       'source/row_common.cc',
+       'source/row_gcc.cc',
++      'source/row_lasx.cc',
++      'source/row_lsx.cc',
+       'source/row_msa.cc',
+       'source/row_neon.cc',
+       'source/row_neon64.cc',
+@@ -73,6 +77,7 @@
+       'source/scale_argb.cc',
+       'source/scale_common.cc',
+       'source/scale_gcc.cc',
++      'source/scale_lsx.cc',
+       'source/scale_msa.cc',
+       'source/scale_neon.cc',
+       'source/scale_neon64.cc',
+diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc
+index 29ac9254d9924..8c325483b116a 100644
+--- a/media/libyuv/libyuv/source/row_lasx.cc
++++ b/media/libyuv/libyuv/source/row_lasx.cc
+@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
+   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+   __m256i vec_ubvr, vec_ugvg;
+   __m256i const_0x80 = __lasx_xvldi(0x80);
+-  __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000,
+-                   0xF000F000F000F000};
++  __m256i alpha = {static_cast<long long>(0xF000F000F000F000), static_cast<long long>(0xF000F000F000F000), static_cast<long long>(0xF000F000F000F000),
++                   static_cast<long long>(0xF000F000F000F000)};
+   __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0,
+                   0x00F000F000F000F0};
+ 
+@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y,
+   __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+   __m256i vec_ubvr, vec_ugvg;
+   __m256i const_0x80 = __lasx_xvldi(0x80);
+-  __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000,
+-                   0x8000800080008000};
++  __m256i alpha = {static_cast<long long>(0x8000800080008000), static_cast<long long>(0x8000800080008000), static_cast<long long>(0x8000800080008000),
++                   static_cast<long long>(0x8000800080008000)};
+ 
+   YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb);
+   vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr);
+@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0,
+                         0x0009000900090009, 0x0009000900090009};
+   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
+                      0x0000000700000003};
+-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+-                          0x8080808080808080, 0x8080808080808080};
++  __m256i const_0x8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                          static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
+@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb,
+   __m256i const_38 = __lasx_xvldi(38);
+   __m256i const_94 = __lasx_xvldi(94);
+   __m256i const_18 = __lasx_xvldi(18);
+-  __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
+-                          0x8080808080808080, 0x8080808080808080};
++  __m256i const_0x8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                          static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
+                      0x0000000700000003};
+   for (x = 0; x < len; x++) {
+@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0,
+@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0,
+@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18,
+                       0x15120F0C09060300, 0x00000000001E1B18};
+   __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908,
+@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
+   __m256i const_38 = __lasx_xvldi(0x413);
+   __m256i const_94 = __lasx_xvldi(0x42F);
+   __m256i const_18 = __lasx_xvldi(0x409);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18,
+                       0x15120F0C09060300, 0x00000000001E1B18};
+   __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908,
+@@ -2000,11 +2000,13 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
+   }
+ }
+ 
++#ifndef RgbConstants
+ struct RgbConstants {
+   uint8_t kRGBToY[4];
+   uint16_t kAddY;
+   uint16_t pad;
+ };
++#define RgbConstants RgbConstants
+ 
+ // RGB to JPeg coefficients
+ // B * 0.1140 coefficient = 29
+@@ -2030,6 +2032,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                       0x1080,
+                                                       0};
++#endif  // RgbConstaints
+ 
+ // ARGB expects first 3 values to contain RGB and 4th value is ignored.
+ static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
+@@ -2242,8 +2245,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
+   __m256i const_21 = __lasx_xvldi(0x415);
+   __m256i const_53 = __lasx_xvldi(0x435);
+   __m256i const_10 = __lasx_xvldi(0x40A);
+-  __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
+-                        0x8080808080808080, 0x8080808080808080};
++  __m256i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080),
++                        static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301,
+                    0x1F1D0F0D1B190B09};
+ 
+@@ -2296,6 +2299,13 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
+   }
+ }
+ 
++// undef for unified sources build
++#undef YUVTORGB_SETUP
++#undef YUVTORGB
++#undef I444TORGB
++#undef STOREARGB
++#undef RGBTOUV
++
+ #ifdef __cplusplus
+ }  // extern "C"
+ }  // namespace libyuv
+diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc
+index 9c1e16f22e02d..91221ff03ca29 100644
+--- a/media/libyuv/libyuv/source/row_lsx.cc
++++ b/media/libyuv/libyuv/source/row_lsx.cc
+@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
+@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
+@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
+@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
+@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
+@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
+   __m128i const_21 = __lsx_vldi(0x415);
+   __m128i const_53 = __lsx_vldi(0x435);
+   __m128i const_10 = __lsx_vldi(0x40A);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
+   __m128i const_256 = __lsx_vldi(0x500);
+   __m128i zero = __lsx_vldi(0);
+   __m128i alpha = __lsx_vldi(0xFF);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
+@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
+   __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
+   __m128i vec_scale = __lsx_vreplgr2vr_w(scale);
+   __m128i zero = __lsx_vldi(0);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
+@@ -1643,11 +1643,13 @@ void HalfFloatRow_LSX(const uint16_t* src,
+   }
+ }
+ 
++#ifndef RgbConstants
+ struct RgbConstants {
+   uint8_t kRGBToY[4];
+   uint16_t kAddY;
+   uint16_t pad;
+ };
++#define RgbConstants RgbConstants
+ 
+ // RGB to JPeg coefficients
+ // B * 0.1140 coefficient = 29
+@@ -1673,6 +1675,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+                                                       0x1080,
+                                                       0};
++#endif  // RgbConstaints
+ 
+ // ARGB expects first 3 values to contain RGB and 4th value is ignored.
+ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
+@@ -1853,6 +1856,13 @@ void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+   RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
+ }
+ 
++// undef for unified sources build
++#undef YUVTORGB_SETUP
++#undef YUVTORGB
++#undef I444TORGB
++#undef STOREARGB
++#undef RGBTOUV
++
+ #ifdef __cplusplus
+ }  // extern "C"
+ }  // namespace libyuv
+-- 
+2.43.0
+
diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD
index eec806bf5f..cb2ecff444 100644
--- a/firefox/PKGBUILD
+++ b/firefox/PKGBUILD
@@ -31,7 +31,6 @@ makedepends=(
   imake
   inetutils
   jack
-  lld
   llvm
   mesa
   nasm
@@ -64,6 +63,11 @@ source=(
   https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox-$pkgver.source.tar.xz{,.asc}
   $pkgname.desktop
   identity-icons-brand.svg
+  0001-Add-support-for-LoongArch64.patch
+  0002-Enable-VA-API-support-for-AMD-GPUs.patch
+  0003-Remove-architectural-limit-on-VA-API-support.patch
+  0004-Enable-WebRTC-for-LoongArch.patch
+  0005-Fix-libyuv-build-with-LSX-LASX.patch
 )
 validpgpkeys=(
   # Mozilla Software Releases <release@mozilla.com>
@@ -73,11 +77,21 @@ validpgpkeys=(
 sha256sums=('b84815a90e147965e4c0b50599c85b1022ab0fce42105e5ef45c630dcca5dec3'
             'SKIP'
             '1f241fdc619f92a914c75aece7c7c717401d7467c9a306458e106b05f34e5044'
-            'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9')
+            'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9'
+            '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544'
+            'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320'
+            '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428'
+            '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc'
+            'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd')
 b2sums=('7252cd58fef9f5fcb504c8c9f885567109c05e6ec92157459cc384edc6935adb206e3be0b805aeaa37dbd72656c3243db1291b745dd0f705f37a61319a4dc820'
         'SKIP'
         'd07557840097dd48a60c51cc5111950781e1c6ce255557693bd11306c7a9258b2a82548329762148f117b2295145f9e66e0483a18e2fe09c5afcffed2e4b8628'
-        '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34')
+        '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34'
+        'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9'
+        '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5'
+        'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719'
+        '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97'
+        'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628')
 
 # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys)
 # Note: These are for Arch Linux use ONLY. For your own distribution, please
@@ -95,6 +109,13 @@ prepare() {
   mkdir mozbuild
   cd firefox-$pkgver
 
+#  patch -Np1 -i ../firefox-118-loong.patch
+  patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch
+  patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch
+  patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch
+  patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch
+  patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch
+
   echo -n "$_google_api_key" >google-api-key
   echo -n "$_mozilla_api_key" >mozilla-api-key
 
@@ -107,11 +128,11 @@ ac_add_options --enable-release
 ac_add_options --enable-hardening
 ac_add_options --enable-optimize
 ac_add_options --enable-rust-simd
-ac_add_options --enable-linker=lld
+#ac_add_options --enable-linker=lld
 ac_add_options --disable-install-strip
 ac_add_options --disable-elf-hack
 ac_add_options --disable-bootstrap
-ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot
+ac_add_options --without-wasm-sandboxed-libraries
 
 # Branding
 ac_add_options --enable-official-branding
@@ -134,19 +155,25 @@ ac_add_options --with-system-nss
 # Features
 ac_add_options --enable-alsa
 ac_add_options --enable-jack
-ac_add_options --enable-crashreporter
+ac_add_options --disable-crashreporter
 ac_add_options --disable-updater
 ac_add_options --disable-tests
 END
 }
 
 build() {
+  set -x
   cd firefox-$pkgver
 
   export MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=pip
   export MOZBUILD_STATE_PATH="$srcdir/mozbuild"
   export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)"
   export MOZ_NOSPAM=1
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   # malloc_usable_size is used in various parts of the codebase
   CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
@@ -157,34 +184,7 @@ build() {
 
   # Do 3-tier PGO
   echo "Building instrumented browser..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-profile-generate=cross
-END
-  ./mach build
-
-  echo "Profiling instrumented browser..."
-  ./mach package
-  LLVM_PROFDATA=llvm-profdata \
-    JARLOG_FILE="$PWD/jarlog" \
-    xvfb-run -s "-screen 0 1920x1080x24 -nolisten local" \
-    ./mach python build/pgo/profileserver.py
-
-  stat -c "Profile data found (%s bytes)" merged.profdata
-  test -s merged.profdata
-
-  stat -c "Jar log found (%s bytes)" jarlog
-  test -s jarlog
-
-  echo "Removing instrumented browser..."
-  ./mach clobber
-
-  echo "Building optimized browser..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-lto=cross,full
-ac_add_options --enable-profile-use=cross
-ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
-ac_add_options --with-pgo-jarlog=${PWD@Q}/jarlog
-END
+  cp ../mozconfig .mozconfig
   ./mach build
 }
 
diff --git a/flac/PKGBUILD b/flac/PKGBUILD
index a6420a1f28..5452f8965a 100644
--- a/flac/PKGBUILD
+++ b/flac/PKGBUILD
@@ -29,7 +29,7 @@ build() {
     -DCMAKE_BUILD_TYPE=None \
     -DCMAKE_INSTALL_PREFIX=/usr \
     -DBUILD_SHARED_LIBS=ON \
-    -DINSTALL_MANPAGES=ON \
+    -DINSTALL_MANPAGES=OFF \
     -DWITH_STACK_PROTECTOR=OFF \
     -DNDEBUG=ON
   cmake --build build
diff --git a/flashrom/0001-Loongson-3-SPI-tmp.patch b/flashrom/0001-Loongson-3-SPI-tmp.patch
new file mode 100644
index 0000000000..f8b1bb25a7
--- /dev/null
+++ b/flashrom/0001-Loongson-3-SPI-tmp.patch
@@ -0,0 +1,353 @@
+From eedf26f8fab47bb25f3968ce78be9e333608b2c2 Mon Sep 17 00:00:00 2001
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Date: Wed, 29 Jul 2020 20:41:19 +0800
+Subject: [PATCH 1/3] Loongson-3 SPI tmp
+
+Change-Id: I85d05d0f06f0a0deff27a9f8ac74ef91c3923efb
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+---
+ Makefile        |   8 ++
+ loongson3_spi.c | 288 ++++++++++++++++++++++++++++++++++++++++++++++++
+ programmer.h    |   5 +
+ 3 files changed, 301 insertions(+)
+ create mode 100644 loongson3_spi.c
+
+diff --git a/Makefile b/Makefile
+index 7242b09..4793433 100644
+--- a/Makefile
++++ b/Makefile
+@@ -696,6 +696,9 @@ CONFIG_DIGILENT_SPI ?= yes
+ # Disable J-Link for now.
+ CONFIG_JLINK_SPI ?= no
+ 
++# Always enable Loongson-3 SPI
++CONFIG_LOONGSON3_SPI ?= yes
++
+ # Disable wiki printing by default. It is only useful if you have wiki access.
+ CONFIG_PRINT_WIKI ?= no
+ 
+@@ -1038,6 +1041,11 @@ ifneq ($(NEED_SERIAL), )
+ LIB_OBJS += serial.o custom_baud.o
+ endif
+ 
++ifeq ($(CONFIG_LOONGSON3_SPI), yes)
++FEATURE_CFLAGS += -D'CONFIG_LOONGSON3_SPI=1'
++PROGRAMMER_OBJS += loongson3_spi.o
++endif
++
+ ifneq ($(NEED_POSIX_SOCKETS), )
+ ifeq ($(TARGET_OS), SunOS)
+ LIBS += -lsocket -lnsl
+diff --git a/loongson3_spi.c b/loongson3_spi.c
+new file mode 100644
+index 0000000..be619af
+--- /dev/null
++++ b/loongson3_spi.c
+@@ -0,0 +1,288 @@
++/*
++ * This file is part of the flashrom project.
++ *
++ * Copyright (C) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ */
++
++#if CONFIG_LOONGSON3_SPI == 1
++
++#include <stdio.h>
++#include <string.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <inttypes.h>
++#include <fcntl.h>
++#include <errno.h>
++#include <ctype.h>
++#include <unistd.h>
++#include <sys/ioctl.h>
++#include <linux/types.h>
++#include "flash.h"
++#include "chipdrivers.h"
++#include "programmer.h"
++#include "spi.h"
++
++#define LOONGSON64C_SPI_BASE	0x1fe00220
++#define LOONGSON64G_SPI_BASE	0x1fe001f0
++#define LOONGSON3_SPI_REG_SIZE	0x10
++
++#define SPICTRL_SPCR	0x0
++#define SPCR_MSTR	(1 << 4)
++#define SPCR_SPE	(1 << 6)
++
++#define SPICTRL_SPSR	0x1
++#define SPSR_RFEMPTY	(1 << 0)
++#define SPSR_RFFULL		(1 << 1)
++#define SPSR_WFEMPTY	(1 << 2)
++#define SPSR_WFFULL		(1 << 3)
++#define SPSR_WCOL		(1 << 6)
++
++#define SPICTRL_FIFO	0x2
++
++#define SPICTRL_SFCP	0x4
++#define SFCP_MEMEN		(1 << 0)
++
++#define SPICTRL_SOFTCS	0x5
++/* Firmware flash is always connected to CS0 */
++#define SOFTCS_ASSERT	((0 << 4) | (1 << 0))
++#define SOFTCS_DESSERT	((1 << 4) | (1 << 0))
++
++static uint8_t *spictrl_base;
++
++static int loongson3_spi_shutdown(void *data);
++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt,
++				  unsigned int readcnt,
++				   const uint8_t *writearr,
++				   uint8_t *readarr);
++
++static const struct spi_master spi_master_loongson3 = {
++	.max_data_read	= MAX_DATA_READ_UNLIMITED,
++	.max_data_write	= MAX_DATA_WRITE_UNLIMITED,
++	.command	= loongson3_spi_send_command,
++	.multicommand	= default_spi_send_multicommand,
++	.read		= default_spi_read,
++	.write_256	= default_spi_write_256,
++	.write_aai	= default_spi_write_aai,
++};
++
++static int cpu_is_loongson64c(char *cpu)
++{
++	if (strcmp(cpu, "3b1500") == 0)
++		return 1;
++
++	if (strcmp(cpu, "3a2000") == 0)
++		return 1;
++
++	if (strcmp(cpu, "3b2000") == 0)
++		return 1;
++
++	if (strcmp(cpu, "3a3000") == 0)
++		return 1;
++
++	if (strcmp(cpu, "3b3000") == 0)
++		return 1;
++
++	return 0;
++}
++
++static int cpu_is_loongson64g(char *cpu)
++{
++	if (strcmp(cpu, "3a4000") == 0)
++		return 1;
++
++	if (strcmp(cpu, "3b4000") == 0)
++		return 1;
++
++	return 0;
++}
++
++int loongson3_spi_init(void)
++{
++	uint8_t reg;
++	char *cpu;
++
++	/* Use -cpu parameter as different kernels have different cpuinfo
++	 * and it is almost impossible for us to determine all of them.
++	 */
++	cpu = extract_programmer_param("cpu");
++
++	if (!cpu) {
++		free(cpu);
++		msg_perr("No -cpu specified\n");
++		return 1;
++	}
++
++	if (cpu_is_loongson64c(cpu)) {
++		spictrl_base = rphysmap("Loongson64C SPICTRL", LOONGSON64C_SPI_BASE,
++			 LOONGSON3_SPI_REG_SIZE);
++			msg_pwarn("64c\n");
++	} else if (cpu_is_loongson64g(cpu)) {
++		spictrl_base = rphysmap("Loongson64G SPICTRL", LOONGSON64G_SPI_BASE,
++			 LOONGSON3_SPI_REG_SIZE);
++			msg_pwarn("64g\n");
++	} else {
++		free(cpu);
++		msg_perr("Invalid -cpu specified\n");
++		return 1;
++	}
++
++	free(cpu);
++
++	if (!spictrl_base) {
++		msg_perr("Failed to map base\n");
++		return 1;
++	}
++
++	reg = mmio_readb(spictrl_base + SPICTRL_SFCP);
++	if (!(reg & SFCP_MEMEN))
++		msg_pwarn("Read engine is not enabled, SPI is not system firmware?\n");
++
++	if (register_shutdown(loongson3_spi_shutdown, NULL))
++		return 1;
++
++	/* Dessert CS */
++	mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS);
++
++	/* Enable SPI Controller */
++	reg = mmio_readb(spictrl_base + SPICTRL_SPCR);
++	reg |= SPCR_MSTR | SPCR_SPE;
++	mmio_writeb(reg, spictrl_base + SPICTRL_SPCR);
++
++	/* Disable read engine for software control */
++	reg = mmio_readb(spictrl_base + SPICTRL_SFCP);
++	reg &= ~SFCP_MEMEN;
++	mmio_writeb(reg, spictrl_base + SPICTRL_SFCP);
++
++
++	/* Sometimes Read FIFO is not empty at boot time */
++	while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY))
++		mmio_readb(spictrl_base + SPICTRL_FIFO);
++
++	register_spi_master(&spi_master_loongson3);
++	return 0;
++}
++
++static int loongson3_spi_shutdown(void *data)
++{
++	if (!spictrl_base) {
++		uint8_t reg;
++
++		/* Disable soft CS */
++		mmio_writeb(0x0, spictrl_base + SPICTRL_SOFTCS);
++
++		/* Enable read engine again */
++		reg = mmio_readb(spictrl_base + SPICTRL_SFCP);
++		reg |= SFCP_MEMEN;
++		mmio_writeb(reg, spictrl_base + SPICTRL_SFCP);
++	}
++
++	return 0;
++}
++
++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt,
++				  unsigned int readcnt,
++				   const uint8_t *writearr,
++				   uint8_t *readarr)
++{
++	unsigned int i;
++
++	msg_pwarn("writecnt: %d, readcnt: %d\n", writecnt, readcnt);
++
++	mmio_writeb(SOFTCS_ASSERT, spictrl_base + SPICTRL_SOFTCS);
++
++	for (i = 0; i < writecnt; i++) {
++		mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO);
++
++		/* Wait until Read FIFO not empty */
++		while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY);
++
++		mmio_readb(spictrl_base + SPICTRL_FIFO);
++	}
++
++	for (i = 0; i < readcnt; i++) {
++		mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO);
++
++		/* Wait until Read FIFO not empty */
++		while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY);
++
++		readarr[i] = mmio_readb(spictrl_base + SPICTRL_FIFO);
++	}
++
++	mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS);
++
++	return 0;
++}
++
++#if 0
++#define FIFO_DETPTH	4
++
++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt,
++				  unsigned int readcnt,
++				   const uint8_t *writearr,
++				   uint8_t *readarr)
++{
++	unsigned int i, j, cur_depth;
++
++	msg_pwarn("writecnt: %d, readcnt: %d\n", writecnt, readcnt);
++
++	mmio_writeb(SOFTCS_ASSERT, spictrl_base + SPICTRL_SOFTCS);
++
++	cur_depth = 0;
++	for (i = 0; i < writecnt; i++) {
++		mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO);
++
++		if ((mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFFULL) ||
++			(writecnt - i == 1)) {
++
++			msg_pwarn("CMD W FULL %d\n", cur_depth);
++			/* Wait until WF empty */
++//			while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFEMPTY));
++			/* Wait until RF is not empty */
++
++			for (j = 0; j < cur_depth + 1; j++) {
++				while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY);
++
++				mmio_readb(spictrl_base + SPICTRL_FIFO);
++			}
++			cur_depth = 0;
++		}
++		cur_depth++;
++	}
++
++	cur_depth = 0;
++	for (i = 0; i < readcnt; i++) {
++		mmio_writeb(0x0, spictrl_base + SPICTRL_FIFO);
++
++		if ((mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFFULL) ||
++			(readcnt - i == 1)) {
++			msg_pwarn("CMD R FULL %d\n", cur_depth);
++			/* Wait until WF empty */
++//			while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFEMPTY));
++			/* Wait until RF is not empty */
++
++			for (j = 0; j < cur_depth + 1; j++) {
++				while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY);
++
++				readarr[i - cur_depth + j] = mmio_readb(spictrl_base + SPICTRL_FIFO);
++			}
++			cur_depth = 0;
++		}
++		cur_depth++;
++	}
++
++	mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS);
++
++	return 0;
++}
++#endif
++
++#endif // CONFIG_LOONGSON3_SPI == 1
+diff --git a/programmer.h b/programmer.h
+index 3cf53b9..ccdf44c 100644
+--- a/programmer.h
++++ b/programmer.h
+@@ -573,6 +573,11 @@ int jlink_spi_init(void);
+ int ni845x_spi_init(void);
+ #endif
+ 
++/* loongson3_spi.c */
++#if CONFIG_LOONGSON3_SPI == 1
++int loongson3_spi_init(void);
++#endif
++
+ /* flashrom.c */
+ struct decode_sizes {
+ 	uint32_t parallel;
+-- 
+2.35.1
+
diff --git a/flashrom/0002-Speed-up.patch b/flashrom/0002-Speed-up.patch
new file mode 100644
index 0000000000..ce6929d2b0
--- /dev/null
+++ b/flashrom/0002-Speed-up.patch
@@ -0,0 +1,37 @@
+From 34ab39614b4dd46c5c97dcc6222b206450964a1d Mon Sep 17 00:00:00 2001
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Date: Thu, 30 Jul 2020 20:55:01 +0800
+Subject: [PATCH 2/3] Speed up????
+
+Change-Id: I3294d55eab38632b1bfcaeed9008c2eedce19e45
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+---
+ loongson3_spi.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/loongson3_spi.c b/loongson3_spi.c
+index be619af..54fc814 100644
+--- a/loongson3_spi.c
++++ b/loongson3_spi.c
+@@ -155,7 +155,9 @@ int loongson3_spi_init(void)
+ 	/* Enable SPI Controller */
+ 	reg = mmio_readb(spictrl_base + SPICTRL_SPCR);
+ 	reg |= SPCR_MSTR | SPCR_SPE;
++	reg &= ~0x2;
+ 	mmio_writeb(reg, spictrl_base + SPICTRL_SPCR);
++	mmio_writeb(0x0, spictrl_base + 0x3);
+ 
+ 	/* Disable read engine for software control */
+ 	reg = mmio_readb(spictrl_base + SPICTRL_SFCP);
+@@ -209,7 +211,7 @@ static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int
+ 	}
+ 
+ 	for (i = 0; i < readcnt; i++) {
+-		mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO);
++		mmio_writeb(0, spictrl_base + SPICTRL_FIFO);
+ 
+ 		/* Wait until Read FIFO not empty */
+ 		while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY);
+-- 
+2.35.1
+
diff --git a/flashrom/0003-Add-support-for-loongarch64.patch b/flashrom/0003-Add-support-for-loongarch64.patch
new file mode 100644
index 0000000000..2a46bcd97f
--- /dev/null
+++ b/flashrom/0003-Add-support-for-loongarch64.patch
@@ -0,0 +1,220 @@
+From 43c4fa9cd8cc408f4df4d8ce02c817f70be5ae97 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Wed, 11 May 2022 21:32:59 +0800
+Subject: [PATCH 3/3] Add support for loongarch64
+
+Change-Id: I58f40f69487d515fd4c95ec5dd57fd370c9e55ec
+Signed-off-by: Xiaotian Wu <wuxiaotian@loongson.cn>
+---
+ Makefile          | 14 +++++++-------
+ flashrom.c        | 12 ++++++++++++
+ hwaccess.h        |  4 ++++
+ loongson3_spi.c   | 15 +++++++++++----
+ meson.build       |  6 ++++++
+ meson_options.txt |  1 +
+ platform.h        |  5 ++++-
+ programmer.h      |  3 +++
+ 8 files changed, 48 insertions(+), 12 deletions(-)
+
+diff --git a/Makefile b/Makefile
+index 4793433..ef611be 100644
+--- a/Makefile
++++ b/Makefile
+@@ -106,7 +106,7 @@ endif
+ # IMPORTANT: The following line must be placed before TARGET_OS is ever used
+ # (of course), but should come after any lines setting CC because the line
+ # below uses CC itself.
+-override TARGET_OS := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E os.h 2>/dev/null | grep -v '^\#' | grep '"' | cut -f 2 -d'"'))
++override TARGET_OS := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E os.h 2>/dev/null | grep -v '^\#' | tail -n 1 | grep '"' | cut -f 2 -d'"'))
+ 
+ ifeq ($(TARGET_OS), Darwin)
+ override CPPFLAGS += -I/opt/local/include -I/usr/local/include
+@@ -421,7 +421,7 @@ endif
+ # IMPORTANT: The following line must be placed before ARCH is ever used
+ # (of course), but should come after any lines setting CC because the line
+ # below uses CC itself.
+-override ARCH := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E archtest.c 2>/dev/null | grep -v '^\#' | grep '"' | cut -f 2 -d'"'))
++override ARCH := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E archtest.c 2>/dev/null | grep -v '^\#' | tail -n 1 | grep '"' | cut -f 2 -d'"'))
+ override ENDIAN := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E endiantest.c 2>/dev/null | grep -v '^\#'))
+ 
+ # Disable the internal programmer on unsupported architectures (everything but x86 and mipsel)
+@@ -476,7 +476,7 @@ endif
+ # Disable all drivers needing raw access (memory, PCI, port I/O) on
+ # architectures with unknown raw access properties.
+ # Right now those architectures are alpha hppa m68k sh s390
+-ifneq ($(ARCH),$(filter $(ARCH),x86 mips ppc arm sparc arc))
++ifneq ($(ARCH),$(filter $(ARCH),x86 mips ppc arm sparc arc loongarch))
+ ifeq ($(CONFIG_RAYER_SPI), yes)
+ UNSUPPORTED_FEATURES += CONFIG_RAYER_SPI=yes
+ else
+@@ -1037,15 +1037,15 @@ LIBS += -lni845x
+ PROGRAMMER_OBJS += ni845x_spi.o
+ endif
+ 
+-ifneq ($(NEED_SERIAL), )
+-LIB_OBJS += serial.o custom_baud.o
+-endif
+-
+ ifeq ($(CONFIG_LOONGSON3_SPI), yes)
+ FEATURE_CFLAGS += -D'CONFIG_LOONGSON3_SPI=1'
+ PROGRAMMER_OBJS += loongson3_spi.o
+ endif
+ 
++ifneq ($(NEED_SERIAL), )
++LIB_OBJS += serial.o custom_baud.o
++endif
++
+ ifneq ($(NEED_POSIX_SOCKETS), )
+ ifeq ($(TARGET_OS), SunOS)
+ LIBS += -lsocket -lnsl
+diff --git a/flashrom.c b/flashrom.c
+index e540027..115a394 100644
+--- a/flashrom.c
++++ b/flashrom.c
+@@ -473,6 +473,18 @@ const struct programmer_entry programmer_table[] = {
+ 	},
+ #endif
+ 
++#if CONFIG_LOONGSON3_SPI == 1
++	{
++		.name			= "loongson3_spi",
++		.type			= OTHER,
++		.devs.note		= "Loongson-3 SPI BIOS Flash\n",
++		.init			= loongson3_spi_init,
++		.map_flash_region	= fallback_map,
++		.unmap_flash_region	= fallback_unmap,
++		.delay			= internal_delay,
++	},
++#endif
++
+ 	{0}, /* This entry corresponds to PROGRAMMER_INVALID. */
+ };
+ 
+diff --git a/hwaccess.h b/hwaccess.h
+index 5602c15..e3104e8 100644
+--- a/hwaccess.h
++++ b/hwaccess.h
+@@ -279,6 +279,10 @@ int libpayload_wrmsr(int addr, msr_t msr);
+ 
+ /* PCI port I/O is not yet implemented on PowerPC. */
+ 
++#elif IS_LOONGARCH
++
++/* PCI port I/O is not yet implemented on LoongArch. */
++
+ #elif IS_MIPS
+ 
+ /* PCI port I/O is not yet implemented on MIPS. */
+diff --git a/loongson3_spi.c b/loongson3_spi.c
+index 54fc814..f29fc4a 100644
+--- a/loongson3_spi.c
++++ b/loongson3_spi.c
+@@ -59,11 +59,18 @@
+ static uint8_t *spictrl_base;
+ 
+ static int loongson3_spi_shutdown(void *data);
+-static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt,
++static int loongson3_spi_send_command(struct flashctx *flash, unsigned int writecnt,
+ 				  unsigned int readcnt,
+-				   const uint8_t *writearr,
++				   const unsigned char *writearr,
+ 				   uint8_t *readarr);
+ 
++
++//loongson3_spi.c:70:27: error: initialization of '
++//int (*)(struct flashrom_flashctx *, unsigned int,  unsigned int,  const unsigned char *, unsigned char *)' from incompatible pointer type '
++//int (*)(const struct flashrom_flashctx *, unsigned int,  unsigned int,  const uint8_t *, uint8_t *)' {aka '
++//int (*)(const struct flashrom_flashctx *, unsigned int,  unsigned int,  const unsigned char *, unsigned char *)'} [-Werror=incompatible-pointer-types]
++//
++
+ static const struct spi_master spi_master_loongson3 = {
+ 	.max_data_read	= MAX_DATA_READ_UNLIMITED,
+ 	.max_data_write	= MAX_DATA_WRITE_UNLIMITED,
+@@ -190,9 +197,9 @@ static int loongson3_spi_shutdown(void *data)
+ 	return 0;
+ }
+ 
+-static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt,
++static int loongson3_spi_send_command(struct flashctx *flash, unsigned int writecnt,
+ 				  unsigned int readcnt,
+-				   const uint8_t *writearr,
++				   const unsigned char *writearr,
+ 				   uint8_t *readarr)
+ {
+ 	unsigned int i;
+diff --git a/meson.build b/meson.build
+index 375089c..8e58d36 100644
+--- a/meson.build
++++ b/meson.build
+@@ -46,6 +46,7 @@ config_internal = get_option('config_internal')
+ config_it8212 = get_option('config_it8212')
+ config_linux_mtd = get_option('config_linux_mtd')
+ config_linux_spi = get_option('config_linux_spi')
++config_loongson3_spi = get_option('config_loongson3_spi')
+ config_mstarddc_spi = get_option('config_mstarddc_spi')
+ config_nic3com = get_option('config_nic3com')
+ config_nicintel_eeprom = get_option('config_nicintel_eeprom')
+@@ -205,6 +206,11 @@ if config_linux_spi
+   srcs += 'linux_spi.c'
+   cargs += '-DCONFIG_LINUX_SPI=1'
+ endif
++if config_loongson3_spi
++  srcs += 'loongson3_spi.c'
++  cargs += '-DCONFIG_LOONGSON3_SPI=1'
++  need_raw_access = true
++endif
+ if config_mstarddc_spi
+   srcs += 'mstarddc_spi.c'
+   cargs += '-DCONFIG_MSTARDDC_SPI=1'
+diff --git a/meson_options.txt b/meson_options.txt
+index ea87311..d71ac5f 100644
+--- a/meson_options.txt
++++ b/meson_options.txt
+@@ -18,6 +18,7 @@ option('config_internal_dmi', type : 'boolean', value : true, description : 'Use
+ option('config_it8212', type : 'boolean', value : true, description : 'ITE IT8212F PATA')
+ option('config_linux_mtd', type : 'boolean', value : true, description : 'Linux MTD interfaces')
+ option('config_linux_spi', type : 'boolean', value : true, description : 'Linux spidev interfaces')
++option('config_loongson3_spi', type : 'boolean', value : true, description : 'Loongson3 interfaces')
+ option('config_mstarddc_spi', type : 'boolean', value : false, description : 'MSTAR DDC support')
+ option('config_nic3com', type : 'boolean', value : true, description : '3Com NICs')
+ option('config_nicintel_eeprom', type : 'boolean', value : true, description : 'EEPROM on Intel NICs')
+diff --git a/platform.h b/platform.h
+index 751957c..9b2f91f 100644
+--- a/platform.h
++++ b/platform.h
+@@ -62,6 +62,9 @@
+ #elif defined (__hppa__) || defined (__hppa)
+ 	#define __FLASHROM_ARCH__ "hppa"
+ 	#define IS_HPPA 1
++#elif defined (__loongarch__)
++	#define __FLASHROM_ARCH__ "loongarch"
++	#define IS_LOONGARCH 1
+ #elif defined (__m68k__)
+ 	#define __FLASHROM_ARCH__ "m68k"
+ 	#define IS_M68K 1
+@@ -79,7 +82,7 @@
+ 	#define IS_ARC 1
+ #endif
+ 
+-#if !(IS_X86 || IS_MIPS || IS_PPC || IS_ARM || IS_SPARC || IS_ALPHA || IS_HPPA || IS_M68K || IS_RISCV || IS_SH || IS_S390 || IS_ARC)
++#if !(IS_X86 || IS_MIPS || IS_PPC || IS_ARM || IS_SPARC || IS_ALPHA || IS_HPPA || IS_LOONGARCH || IS_M68K || IS_RISCV || IS_SH || IS_S390 || IS_ARC)
+ #error Unknown architecture
+ #endif
+ 
+diff --git a/programmer.h b/programmer.h
+index ccdf44c..2d8f631 100644
+--- a/programmer.h
++++ b/programmer.h
+@@ -126,6 +126,9 @@ enum programmer {
+ #endif
+ #if CONFIG_STLINKV3_SPI == 1
+ 	PROGRAMMER_STLINKV3_SPI,
++#endif
++#if CONFIG_LOONGSON3_SPI == 1
++	PROGRAMMER_LOONGSON3_SPI,
+ #endif
+ 	PROGRAMMER_INVALID /* This must always be the last entry. */
+ };
+-- 
+2.35.1
+
diff --git a/flashrom/PKGBUILD b/flashrom/PKGBUILD
index c6112c1b09..9710b39b34 100644
--- a/flashrom/PKGBUILD
+++ b/flashrom/PKGBUILD
@@ -14,14 +14,33 @@ license=(GPL)
 depends=(libftdi pciutils libusb)
 makedepends=(meson)
 optdepends=('dmidecode: for SMBIOS/DMI table decoder support')
-source=("https://download.flashrom.org/releases/${pkgname}-v${pkgver}.tar.bz2"{,.asc})
+source=("https://download.flashrom.org/releases/${pkgname}-v${pkgver}.tar.bz2"{,.asc}
+0001-Loongson-3-SPI-tmp.patch
+0002-Speed-up.patch
+0003-Add-support-for-loongarch64.patch
+)
+
 sha256sums=('e1f8d95881f5a4365dfe58776ce821dfcee0f138f75d0f44f8a3cd032d9ea42b'
             'SKIP')
 validpgpkeys=(58A4868B25C7CFD662FB0132A3EB95B8D9780F68) # David Hendricks (packaging key) <david.hendricks@gmail.com>
 
+
+prepare() {
+    cd ${pkgname}-v${pkgver}
+    patch -p1 -i "$srcdir/0001-Loongson-3-SPI-tmp.patch"
+    patch -p1 -i "$srcdir/0002-Speed-up.patch"
+    patch -p1 -i "$srcdir/0003-Add-support-for-loongarch64.patch"
+}
+
 build() {
   cd ${pkgname}-v${pkgver}
-  arch-meson ../build
+  arch-meson ../build \
+      -Dconfig_loongson3_spi=true \
+      -Dconfig_nic3com=false \
+      -Dconfig_satamv=false \
+      -Dconfig_satasii=false \
+      -Dconfig_nicrealtek=false \
+      -Dconfig_rayer_spi=false
   ninja -v -C ../build
   # Workaround for meson not installing manpage
   make
@@ -34,3 +53,8 @@ package() {
   make PREFIX=temp/ install
   cp -r temp/share "${pkgdir}"/usr/
 }
+sha256sums=('e1f8d95881f5a4365dfe58776ce821dfcee0f138f75d0f44f8a3cd032d9ea42b'
+            'SKIP'
+            'daa1baf186747b34d6e36c3497295b9c29978632d8b958e3a2d08fd11fd035b5'
+            'e8c2801791ca22f391a6820e83bd1bedf2be3aeb97057b12d3b718d467c4f115'
+            '5e1700d8d90353b721f39f4dc030131277083e3e15b9d0265bbc6473604b228f')
diff --git a/flatpak/PKGBUILD b/flatpak/PKGBUILD
index bde0a6e075..14bc8afad7 100644
--- a/flatpak/PKGBUILD
+++ b/flatpak/PKGBUILD
@@ -47,7 +47,7 @@ makedepends=(
   wayland-protocols
   xmlto
 )
-checkdepends=(
+makedepends+=(
   socat
   valgrind
 )
diff --git a/fluidd/PKGBUILD b/fluidd/PKGBUILD
index e79ae50fa8..962506821f 100644
--- a/fluidd/PKGBUILD
+++ b/fluidd/PKGBUILD
@@ -26,6 +26,7 @@ pkgver() {
 
 build() {
   cd fluidd
+#  npm config set registry https://registry.loongnix.cn:5873/
   npm install --frozen-lockfile
   npm run build
 }
diff --git a/fmt/PKGBUILD b/fmt/PKGBUILD
index 9b6953d853..91c74ac17b 100644
--- a/fmt/PKGBUILD
+++ b/fmt/PKGBUILD
@@ -51,7 +51,7 @@ build() {
     -DCMAKE_INSTALL_LIBDIR=/usr/lib \
     -DBUILD_SHARED_LIBS=ON
   cmake --build build
-  cmake --build build --target doc
+#cmake --build build --target doc
 }
 
 check() {
diff --git a/foomatic-db-engine/PKGBUILD b/foomatic-db-engine/PKGBUILD
index 0f7a74f2c6..da6f43bf21 100644
--- a/foomatic-db-engine/PKGBUILD
+++ b/foomatic-db-engine/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=foomatic-db-engine
 arch=('loong64' 'x86_64')
-pkgver=20220521
+pkgver=20220929
 pkgrel=1
 epoch=4
 url="http://www.linuxprinting.org/foomatic.html"
diff --git a/foomatic-db/PKGBUILD b/foomatic-db/PKGBUILD
index 10560be89e..e43d2fcc42 100644
--- a/foomatic-db/PKGBUILD
+++ b/foomatic-db/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase="foomatic-db"
 pkgname=('foomatic-db' 'foomatic-db-ppds' 'foomatic-db-nonfree' 'foomatic-db-nonfree-ppds')
 arch=('any')
-pkgver=20230903
+pkgver=20230910
 pkgrel=1
 epoch=3
 
diff --git a/foot/PKGBUILD b/foot/PKGBUILD
index bf5e93b125..8499c6c56e 100644
--- a/foot/PKGBUILD
+++ b/foot/PKGBUILD
@@ -27,7 +27,7 @@ makedepends=(fcft
 optdepends=('libutempter: utmp logging')
 backup=(etc/xdg/foot/foot.ini)
 source=("$pkgname-$pkgver.tar.gz::$url/archive/$pkgver.tar.gz")
-sha256sums=('0e02af376e5f4a96eeb90470b7ad2e79a1d660db2a7d1aa772be43c7db00e475')
+sha256sums=('8060ec28cbf6e2e3d408665330da4bc48fd094d4f1265d7c58dc75c767463c29')
 
 build() {
 	cd "$pkgbase"
diff --git a/fossil/PKGBUILD b/fossil/PKGBUILD
index 0edbdf890d..bbfe331262 100644
--- a/fossil/PKGBUILD
+++ b/fossil/PKGBUILD
@@ -24,6 +24,10 @@ sha256sums=('f885e17998dc1eece1688a75e516663462fe72a7f4f132def4132055777c7ff8'
 
 build() {
   cd fossil-src-$pkgver
+  if [ -f autosetup/autosetup-config.guess ]; then
+    cp /usr/share/automake-1.16/config.guess autosetup/autosetup-config.guess
+    cp /usr/share/automake-1.16/config.sub autosetup/autosetup-config.sub
+  fi
   ./configure --prefix=/usr --json --disable-internal-sqlite --with-tcl=/usr --with-tcl-private-stubs=1
   # headers and translate targets are problematic with parallel jobs
   #make -j1 bld bld/headers
diff --git a/fuse2/PKGBUILD b/fuse2/PKGBUILD
index 130d7ff4ee..f7e8c64fbf 100644
--- a/fuse2/PKGBUILD
+++ b/fuse2/PKGBUILD
@@ -15,11 +15,21 @@ conflicts=('fuse')
 depends=('glibc' 'fuse-common')
 makedepends=('pkg-config')
 options=(!emptydirs)
-source=(https://github.com/libfuse/libfuse/releases/download/fuse-$pkgver/fuse-$pkgver.tar.gz{,.asc})
+source=(https://github.com/libfuse/libfuse/releases/download/fuse-$pkgver/fuse-$pkgver.tar.gz{,.asc}
+	fuse-closefrom.patch
+	fuse-loongarch.patch)
 sha1sums=('943ba651b14bc4a3c6fd959ed4b8c04f4a59032d'
-          'SKIP')
+          'SKIP'
+          'aef8e65806d4b73672463378b5cdb694dcd1a051'
+          'b7791ec80f9d8b21f327cd506adb512868174893')
 validpgpkeys=(ED31791B2C5C1613AF388B8AD113FCAC3C4E599F) # Nikolaus Rath <Nikolaus@rath.org>
 
+prepare() {
+  cd fuse-$pkgver
+  patch -p1 -i "$srcdir/fuse-closefrom.patch"
+  patch -p1 -i "$srcdir/fuse-loongarch.patch"
+}
+
 build() {
   cd fuse-$pkgver
 
diff --git a/fuse2/fuse-closefrom.patch b/fuse2/fuse-closefrom.patch
new file mode 100644
index 0000000000..184dcb42de
--- /dev/null
+++ b/fuse2/fuse-closefrom.patch
@@ -0,0 +1,22 @@
+Index: fuse-2.9.9/util/ulockmgr_server.c
+===================================================================
+--- fuse-2.9.9.orig/util/ulockmgr_server.c
++++ fuse-2.9.9/util/ulockmgr_server.c
+@@ -124,7 +124,7 @@ static int receive_message(int sock, voi
+ 	return res;
+ }
+ 
+-static int closefrom(int minfd)
++static int closefromfd(int minfd)
+ {
+ 	DIR *dir = opendir("/proc/self/fd");
+ 	if (dir) {
+@@ -384,7 +384,7 @@ int main(int argc, char *argv[])
+ 		dup2(nullfd, 1);
+ 	}
+ 	close(3);
+-	closefrom(5);
++	closefromfd(5);
+ 	while (1) {
+ 		char c;
+ 		int sock;
diff --git a/fuse2/fuse-loongarch.patch b/fuse2/fuse-loongarch.patch
new file mode 100644
index 0000000000..94a05cb1d8
--- /dev/null
+++ b/fuse2/fuse-loongarch.patch
@@ -0,0 +1,28 @@
+Index: fuse-2.9.9/lib/fuse_loop_mt.c
+===================================================================
+--- fuse-2.9.9.orig/lib/fuse_loop_mt.c
++++ fuse-2.9.9/lib/fuse_loop_mt.c
+@@ -6,11 +6,6 @@
+   See the file COPYING.LIB.
+ */
+ 
+-#include "fuse_lowlevel.h"
+-#include "fuse_misc.h"
+-#include "fuse_kernel.h"
+-#include "fuse_i.h"
+-
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -20,6 +15,11 @@
+ #include <errno.h>
+ #include <sys/time.h>
+ 
++#include "fuse_lowlevel.h"
++#include "fuse_misc.h"
++#include "fuse_kernel.h"
++#include "fuse_i.h"
++
+ /* Environment var controlling the thread stack size */
+ #define ENVNAME_THREAD_STACK "FUSE_THREAD_STACK"
+ 
diff --git a/fwupd-efi/0001-add-support-for-loongarch64.patch b/fwupd-efi/0001-add-support-for-loongarch64.patch
new file mode 100644
index 0000000000..e66a3e4028
--- /dev/null
+++ b/fwupd-efi/0001-add-support-for-loongarch64.patch
@@ -0,0 +1,315 @@
+diff --git a/efi/crt0/crt0-efi-loongarch64.S b/efi/crt0/crt0-efi-loongarch64.S
+new file mode 100644
+index 0000000..416a316
+--- /dev/null
++++ b/efi/crt0/crt0-efi-loongarch64.S
+@@ -0,0 +1,172 @@
++/*
++ * crt0-efi-loongarch64.S - PE/COFF header for LoongArch64 EFI applications
++ *
++ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ * 1. Redistributions of source code must retain the above copyright
++ *    notice and this list of conditions, without modification.
++ * 2. The name of the author may not be used to endorse or promote products
++ *    derived from this software without specific prior written permission.
++ *
++ * Alternatively, this software may be distributed under the terms of the
++ * GNU General Public License as published by the Free Software Foundation;
++ * either version 2 of the License, or (at your option) any later version.
++ */
++
++	.section	.text.head
++
++	/*
++	 * Magic "MZ" signature for PE/COFF
++	 */
++	.globl	ImageBase
++ImageBase:
++	.ascii	"MZ"
++	.skip	58				// 'MZ' + pad + offset == 64
++	.long	pe_header - ImageBase		// Offset to the PE header.
++pe_header:
++	.ascii	"PE"
++	.short 	0
++coff_header:
++	.short	0x6264			// LoongArch64
++	.short	4				// nr_sections
++	.long	0 				// TimeDateStamp
++	.long	0				// PointerToSymbolTable
++	.long	1				// NumberOfSymbols
++	.short	section_table - optional_header	// SizeOfOptionalHeader
++	.short	0x206				// Characteristics.
++						// IMAGE_FILE_DEBUG_STRIPPED |
++						// IMAGE_FILE_EXECUTABLE_IMAGE |
++						// IMAGE_FILE_LINE_NUMS_STRIPPED
++optional_header:
++	.short	0x20b				// PE32+ format
++	.byte	0x02				// MajorLinkerVersion
++	.byte	0x14				// MinorLinkerVersion
++	.long	_text_size			// SizeOfCode
++	.long	_alldata_size			// SizeOfInitializedData
++	.long	0				// SizeOfUninitializedData
++	.long	_start - ImageBase		// AddressOfEntryPoint
++	.long	_start - ImageBase		// BaseOfCode
++
++extra_header_fields:
++	.quad	0				// ImageBase
++	.long	0x1000				// SectionAlignment
++	.long	0x200				// FileAlignment
++	.short	0				// MajorOperatingSystemVersion
++	.short	0				// MinorOperatingSystemVersion
++	.short	0				// MajorImageVersion
++	.short	0				// MinorImageVersion
++	.short	0				// MajorSubsystemVersion
++	.short	0				// MinorSubsystemVersion
++	.long	0				// Win32VersionValue
++
++	.long	_erodata - ImageBase		// SizeOfImage
++
++	// Everything before the kernel image is considered part of the header
++	.long	_start - ImageBase		// SizeOfHeaders
++	.long	0				// CheckSum
++	.short	EFI_SUBSYSTEM			// Subsystem
++	.short	0				// DllCharacteristics
++	.quad	0				// SizeOfStackReserve
++	.quad	0				// SizeOfStackCommit
++	.quad	0				// SizeOfHeapReserve
++	.quad	0				// SizeOfHeapCommit
++	.long	0				// LoaderFlags
++	.long	0x6				// NumberOfRvaAndSizes
++
++	.quad	0				// ExportTable
++	.quad	0				// ImportTable
++	.quad	0				// ResourceTable
++	.quad	0				// ExceptionTable
++	.quad	0				// CertificationTable
++	.quad	0				// BaseRelocationTable
++
++	// Section table
++section_table:
++	.ascii	".text\0\0\0"
++	.long	_evtext - _start	// VirtualSize
++	.long	_start - ImageBase	// VirtualAddress
++	.long	_etext - _start		// SizeOfRawData
++	.long	_start - ImageBase	// PointerToRawData
++
++	.long	0		// PointerToRelocations (0 for executables)
++	.long	0		// PointerToLineNumbers (0 for executables)
++	.short	0		// NumberOfRelocations  (0 for executables)
++	.short	0		// NumberOfLineNumbers  (0 for executables)
++	/*
++	 * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_MEM_EXECUTE | EFI_IMAGE_SCN_CNT_CODE
++	 */
++	.long	0x60000020	// Characteristics (section flags)
++
++	.ascii	".data\0\0\0"
++	.long	_data_vsize		// VirtualSize
++	.long	_data - ImageBase	// VirtualAddress
++	.long	_data_size		// SizeOfRawData
++	.long	_data - ImageBase	// PointerToRawData
++
++	.long	0		// PointerToRelocations (0 for executables)
++	.long	0		// PointerToLineNumbers (0 for executables)
++	.short	0		// NumberOfRelocations  (0 for executables)
++	.short	0		// NumberOfLineNumbers  (0 for executables)
++	/*
++	 * EFI_IMAGE_SCN_MEM_WRITE | EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA
++	 */
++	.long	0xc0000040	// Characteristics (section flags)
++
++	.ascii	".sbat\0\0\0"
++	.long	_sbat_vsize		// VirtualSize
++	.long	_sbat - ImageBase	// VirtualAddress
++	.long	_sbat_size		// SizeOfRawData
++	.long	_sbat - ImageBase	// PointerToRawData
++
++	.long	0		// PointerToRelocations (0 for executables)
++	.long	0		// PointerToLineNumbers (0 for executables)
++	.short	0		// NumberOfRelocations  (0 for executables)
++	.short	0		// NumberOfLineNumbers  (0 for executables)
++	/*
++	 * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_ALIGN_8BYTES | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA
++	 */
++	.long	0x40400040	// Characteristics (section flags)
++
++	.ascii  ".rodata\0"
++	.long	_rodata_vsize		// VirtualSize
++	.long	_rodata - ImageBase	// VirtualAddress
++	.long	_rodata_size		// SizeOfRawData
++	.long	_rodata - ImageBase	// PointerToRawData
++
++	.long	0		// PointerToRelocations (0 for executables)
++	.long	0		// PointerToLineNumbers (0 for executables)
++	.short	0		// NumberOfRelocations  (0 for executables)
++	.short	0		// NumberOfLineNumbers  (0 for executables)
++	/*
++	 * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_ALIGN_8BYTES | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA
++	 */
++	.long	0x40400040	// Characteristics (section flags)
++
++	.align		12
++
++	.globl	_start
++	.type	_start, @function
++_start:
++	addi.d	  $sp, $sp, -24
++	st.d	  $ra, $sp, 0
++	st.d	  $a0, $sp, 8
++	st.d 	  $a1, $sp, 16
++
++	move	  $a2, $a0		// a2: ImageHandle
++	move	  $a3, $a1 		// a3: SystemTable
++	la.local  $a0, ImageBase	// a0: ImageBase
++	la.local  $a1, _DYNAMIC		// a1: DynamicSection
++	bl        _relocate
++	bnez	  $a0, 0f
++
++	ld.d	  $a0, $sp, 8
++	ld.d	  $a1, $sp, 16
++	bl 	  efi_main
++
++0:	ld.d	  $ra, $sp, 0
++	addi.d	  $sp, $sp, 24
++	jr        $ra
++	.end	  _start
+diff --git a/efi/lds/elf_loongarch64_efi.lds b/efi/lds/elf_loongarch64_efi.lds
+new file mode 100644
+index 0000000..899e352
+--- /dev/null
++++ b/efi/lds/elf_loongarch64_efi.lds
+@@ -0,0 +1,103 @@
++OUTPUT_FORMAT("elf64-loongarch", "elf64-loongarch", "elf64-loongarch")
++OUTPUT_ARCH(loongarch)
++ENTRY(_start)
++SECTIONS
++{
++  .text 0x0 : {
++    _text = .;
++    *(.text.head)
++    *(.text)
++    *(.text.*)
++    *(.gnu.linkonce.t.*)
++    _evtext = .;
++    . = ALIGN(4096);
++  }
++  _etext = .;
++  _text_size = . - _text;
++  _text_vsize = _evtext - _text;
++
++  . = ALIGN(4096);
++  .data :
++  {
++   _data = .;
++   *(.sdata)
++   *(.data)
++   *(.data1)
++   *(.data.*)
++   *(.got.plt)
++   *(.got)
++
++   *(.dynamic)
++
++   /* the EFI loader doesn't seem to like a .bss section, so we stick
++      it all into .data: */
++   . = ALIGN(16);
++   _bss = .;
++   *(.sbss)
++   *(.scommon)
++   *(.dynbss)
++   *(.bss)
++   *(COMMON)
++   _evdata = .;
++   . = ALIGN(4096);
++   _bss_end = .;
++  }
++  _edata = .;
++  _data_vsize = _evdata - _data;
++  _data_size = . - _data;
++
++  /*
++   * Note that _sbat must be the beginning of the data, and _esbat must be the
++   * end and must be before any section padding.  The sbat self-check uses
++   * _esbat to find the bounds of the data, and if the padding is included, the
++   * CSV parser (correctly) rejects the data as having NUL values in one of the
++   * required columns.
++   */
++  . = ALIGN(4096);
++  .sbat :
++  {
++    _sbat = .;
++    *(.sbat)
++    *(.sbat.*)
++    _esbat = .;
++    . = ALIGN(4096);
++    _epsbat = .;
++  }
++  _sbat_size = _epsbat - _sbat;
++  _sbat_vsize = _esbat - _sbat;
++
++  . = ALIGN(4096);
++  .rodata :
++  {
++    _rodata = .;
++    *(.rela.dyn)
++    *(.rela.plt)
++    *(.rela.got)
++    *(.rela.data)
++    *(.rela.data*)
++
++    *(.rodata*)
++    *(.srodata)
++    *(.dynsym)
++    *(.dynstr)
++    . = ALIGN(16);
++    *(.note.gnu.build-id)
++    . = ALIGN(4096);
++    *(.vendor_cert)
++    *(.data.ident)
++    _evrodata = .;
++    . = ALIGN(4096);
++  }
++  _erodata = .;
++  _rodata_size = . - _rodata;
++  _rodata_vsize = _evrodata - _rodata;
++  _alldata_size = . - _data;
++
++  /DISCARD/ :
++  {
++    *(.rel.reloc)
++    *(.eh_frame)
++    *(.note.GNU-stack)
++  }
++  .comment 0 : { *(.comment) }
++}
+diff --git a/efi/meson.build b/efi/meson.build
+index 2bba4f5..e65b0b1 100644
+--- a/efi/meson.build
++++ b/efi/meson.build
+@@ -77,6 +77,9 @@ endif
+ if host_cpu == 'arm' or (host_cpu == 'aarch64' and (objcopy_version.version_compare ('< 2.38') or coff_header_in_crt0))
+   objcopy_manualsymbols = true
+   generate_binary_extra = ['--objcopy-manualsymbols']
++elif host_cpu == 'loongarch64' and (objcopy_version.version_compare ('< 2.41') or coff_header_in_crt0)
++  objcopy_manualsymbols = true
++  generate_binary_extra = ['--objcopy-manualsymbols']
+ else
+   objcopy_manualsymbols = false
+   generate_binary_extra = []
+diff --git a/meson.build b/meson.build
+index 1526fd7..9467e23 100644
+--- a/meson.build
++++ b/meson.build
+@@ -33,6 +33,9 @@ elif host_cpu == 'arm'
+ elif host_cpu == 'aarch64'
+         EFI_MACHINE_TYPE_NAME = 'aa64'
+         gnu_efi_arch = 'aarch64'
++elif host_cpu == 'loongarch64'
++        EFI_MACHINE_TYPE_NAME = 'loongarch64'
++        gnu_efi_arch = 'loongarch64'
+ else
+         error('Unknown host_cpu ' + host_cpu)
+ endif
diff --git a/fwupd-efi/PKGBUILD b/fwupd-efi/PKGBUILD
index 0357c788b2..0e55c5a104 100644
--- a/fwupd-efi/PKGBUILD
+++ b/fwupd-efi/PKGBUILD
@@ -9,18 +9,25 @@ arch=('loong64' 'x86_64')
 url='https://github.com/fwupd/fwupd-efi'
 license=('LGPL')
 makedepends=('meson' 'gnu-efi' 'python-pefile')
-source=("https://people.freedesktop.org/~hughsient/releases/${pkgname}-${pkgver}.tar.xz"{,.asc})
+source=("https://people.freedesktop.org/~hughsient/releases/${pkgname}-${pkgver}.tar.xz"{,.asc}
+        0001-add-support-for-loongarch64.patch)
 sha256sums=('27ddbc0a4e3209543eab49e69f1a014eb9ed95f93ff51f966f31082d3b7bffbc'
-            'SKIP')
+            'SKIP'
+            'be38120643c6afc770733d70fb3191abb63b3f71986140ec219fae174395492d')
 validpgpkeys=('163EB50119225DB3DF8F49EA17ACBA8DFA970E17') # Richard Hughes <richard@hughsie.com>
 
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/0001-add-support-for-loongarch64.patch"
+}
+
 build() {
   arch-meson ${pkgname}-${pkgver} build  \
     -D efi_sbat_distro_id='arch' \
     -D efi_sbat_distro_summary='Arch Linux' \
     -D efi_sbat_distro_pkgname=${pkgname} \
     -D efi_sbat_distro_version=${pkgver} \
-    -D efi_sbat_distro_url="https://archlinux.org/packages/community/x86_64/${pkgname}/"
+    -D efi_sbat_distro_url="https://archlinux.org/packages/community/loongarch64/${pkgname}/"
   ninja -C build
 }
 
diff --git a/fwupd/PKGBUILD b/fwupd/PKGBUILD
index c0bf2102a6..7b4cc82bf7 100644
--- a/fwupd/PKGBUILD
+++ b/fwupd/PKGBUILD
@@ -44,7 +44,6 @@ makedepends=(
   meson
   noto-fonts
   noto-fonts-cjk
-  pandoc
   python-cairo
   python-gobject
   python-pillow
@@ -55,22 +54,30 @@ checkdepends=(umockdev)
 source=(
   "https://github.com/fwupd/fwupd/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.xz"{,.asc}
   fwupd.sysusers
+  fwupd-1.9.5-loong64.patch
 )
 sha512sums=('d0b2db6fb9dc231022b8109e0c62b682173a89755e759972f6e4b33fa9a8c31426b3bef3fab9b931ef591adcc867962e8251632223e5460122ea499df636b214'
             'SKIP'
             '637203080b55eda74a659f58c853a9a723a2dad5da70915b2b0e036c6145a649468ebec700cc83975d9cb5378b9dced8b3a3b26bdbcc75ddc774837355e75deb')
 b2sums=('f5679c128cd4c4278c47fb098b0d38d7c4d0a8a0f4f0421b72b1cca934622ecb9d1974f7b67f49f9c9c2f805664d3c16ff6861c0572c95a863b4ebd7858e0a78'
         'SKIP'
-        'e65ca7da22a20a40882cfc1fe4479643f9a38c90a4f2c3e71e6e5e3de1d6db212a0f17d600097619fe3cdb0a9b860422f8b0b9a9d45441518e51a7eb12a918bb')
+        'e65ca7da22a20a40882cfc1fe4479643f9a38c90a4f2c3e71e6e5e3de1d6db212a0f17d600097619fe3cdb0a9b860422f8b0b9a9d45441518e51a7eb12a918bb'
+        'c1da6c2cee029024cb94a2a94559499f635a7c36db8b03324c8f506a4390c6171017d8e7c1095ed972d7dd404b53b236f8950cdbf92e308fb9585bd2d48994ac')
 validpgpkeys=(163EB50119225DB3DF8F49EA17ACBA8DFA970E17) # Richard Hughes <richard@hughsie.com>
 
+prepare() {
+    cd ${pkgname}-${pkgver}
+    patch -p1 -i "$srcdir/fwupd-1.9.5-loong64.patch"
+}
+
 build() {
   arch-meson ${pkgname}-${pkgver} build \
     -D b_lto=false \
-    -D docs=enabled \
+    -D docs=disabled \
     -D plugin_amdgpu=disabled \
+    -D plugin_msr=disabled \
     -D launchd=disabled \
-    -D plugin_intel_spi=true \
+    -D plugin_intel_spi=false \
     -D supported_build=enabled \
     -D efi_binary=false \
     -D systemd_unit_user=fwupd
@@ -113,10 +120,10 @@ package_fwupd() {
   mv "${pkgdir}"/usr/bin/{,fwupd-}dbxtool
   mv "${pkgdir}"/usr/share/man/man1/{,fwupd-}dbxtool.1
   # Remove msr module-load config as it is built-in
-  rm "${pkgdir}"/usr/lib/modules-load.d/fwupd-msr.conf
-  rmdir "${pkgdir}"/usr/lib/modules-load.d
+#  rm "${pkgdir}"/usr/lib/modules-load.d/fwupd-msr.conf
+#  rmdir "${pkgdir}"/usr/lib/modules-load.d
 
-  _pick docs "${pkgdir}"/usr/share/doc/{,fwupd/}{libfwupdplugin,libfwupd}
+#  _pick docs "${pkgdir}"/usr/share/doc/{,fwupd/}{libfwupdplugin,libfwupd}
 }
 
 package_fwupd-docs() {
diff --git a/fwupd/fwupd-1.9.5-loong64.patch b/fwupd/fwupd-1.9.5-loong64.patch
new file mode 100644
index 0000000000..bc2547463e
--- /dev/null
+++ b/fwupd/fwupd-1.9.5-loong64.patch
@@ -0,0 +1,39 @@
+From 6e776b7e9c2bb393d3c7fceda3caffcd76ab8146 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Thu, 12 May 2022 19:08:38 +0800
+Subject: [PATCH] Add support for loongarch64
+
+---
+ meson.build                           | 2 ++
+ plugins/uefi-capsule/fu-uefi-common.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/meson.build b/meson.build
+index b3feada65..ce656fdc9 100644
+--- a/meson.build
++++ b/meson.build
+@@ -431,6 +431,8 @@ if build_standalone
+     EFI_MACHINE_TYPE_NAME = 'arm'
+   elif host_cpu == 'aarch64'
+     EFI_MACHINE_TYPE_NAME = 'aa64'
++  elif host_cpu == 'loongarch64'
++    EFI_MACHINE_TYPE_NAME = 'loongarch64'
+   else
+     EFI_MACHINE_TYPE_NAME = ''
+   endif
+diff --git a/plugins/uefi-capsule/fu-uefi-common.c b/plugins/uefi-capsule/fu-uefi-common.c
+index b1da5100a..138109b17 100644
+--- a/plugins/uefi-capsule/fu-uefi-common.c
++++ b/plugins/uefi-capsule/fu-uefi-common.c
+@@ -22,6 +22,8 @@ fu_uefi_bootmgr_get_suffix(GError **error)
+ 		{64, "x64"},
+ #elif defined(__aarch64__)
+ 		{64, "aa64"},
++#elif defined(__loongarch64)
++		{64, "loongarch64"},
+ #endif
+ #if defined(__x86_64__) || defined(__i386__) || defined(__i686__)
+ 		{32, "ia32"},
+-- 
+2.42.0
+
diff --git a/gcc12/PKGBUILD b/gcc12/PKGBUILD
index b5708c4f4a..ae16e7ecd7 100644
--- a/gcc12/PKGBUILD
+++ b/gcc12/PKGBUILD
@@ -20,8 +20,8 @@ url='https://gcc.gnu.org'
 makedepends=(
   binutils
   doxygen
-  gcc-ada
-  gcc-d
+#  gcc-ada
+#  gcc-d
   git
   libisl
   libmpc
@@ -102,6 +102,8 @@ build() {
   # TODO: properly deal with the build issues resulting from this
   CFLAGS=${CFLAGS/-Werror=format-security/}
   CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
 
   "$srcdir/gcc/configure" \
     --enable-languages=c,c++,fortran \
@@ -134,15 +136,14 @@ package_gcc12-libs() {
 
   cd gcc-build
   make -C $CHOST/libgcc DESTDIR="$pkgdir" install-shared
-  mv "${pkgdir}/${_libdir}"/../lib/* "${pkgdir}/${_libdir}"
-  rmdir "${pkgdir}/${_libdir}"/../lib
+#mv "${pkgdir}/${_libdir}"/../lib/* "${pkgdir}/${_libdir}"
+#  rmdir "${pkgdir}/${_libdir}"/../lib
   rm -f "$pkgdir/$_libdir/libgcc_eh.a"
 
   for lib in libasan.so \
              libatomic.so \
              libgfortran.so \
              libgomp.so \
-             libitm.so \
              liblsan.so \
              libquadmath.so \
              libstdc++.so \
@@ -178,6 +179,7 @@ package_gcc12() {
 
   make -C $CHOST/libgcc DESTDIR="$pkgdir" install
   rm -f "$pkgdir"/usr/lib/libgcc_s.so*
+  rm -f "$pkgdir"/usr/lib/gcc/loongarch64-unknown-linux-gnu/12.3.0/libgcc_s.so*
 
   make -C $CHOST/libstdc++-v3/src DESTDIR="$pkgdir" install
   make -C $CHOST/libstdc++-v3/include DESTDIR="$pkgdir" install
@@ -194,12 +196,12 @@ package_gcc12() {
     "$pkgdir/${_libdir}/bfd-plugins/"
 
   make -C $CHOST/libgomp DESTDIR="$pkgdir" install-nodist_{libsubinclude,toolexeclib}HEADERS
-  make -C $CHOST/libitm DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
+# make -C $CHOST/libitm DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
   make -C $CHOST/libquadmath DESTDIR="$pkgdir" install-nodist_libsubincludeHEADERS
-  make -C $CHOST/libsanitizer DESTDIR="$pkgdir" install-nodist_{saninclude,toolexeclib}HEADERS
-  make -C $CHOST/libsanitizer/asan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
-  make -C $CHOST/libsanitizer/tsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
-  make -C $CHOST/libsanitizer/lsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
+#  make -C $CHOST/libsanitizer DESTDIR="$pkgdir" install-nodist_{saninclude,toolexeclib}HEADERS
+#  make -C $CHOST/libsanitizer/asan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
+#  make -C $CHOST/libsanitizer/tsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
+#  make -C $CHOST/libsanitizer/lsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS
 
   make -C libcpp DESTDIR="$pkgdir" install
   make -C gcc DESTDIR="$pkgdir" install-po
@@ -210,7 +212,7 @@ package_gcc12() {
   # create cc-rs compatible symlinks
   # https://github.com/rust-lang/cc-rs/blob/1.0.73/src/lib.rs#L2578-L2581
   for binary in {c++,g++,gcc,gcc-ar,gcc-nm,gcc-ranlib}; do
-    ln -s /usr/bin/${binary} "${pkgdir}"/usr/bin/x86_64-linux-gnu-${binary}-12
+    ln -s /usr/bin/${binary} "${pkgdir}"/usr/bin/loongarch64-linux-gnu-${binary}-12
   done
 
   # POSIX conformance launcher scripts for c89 and c99
diff --git a/gcr/PKGBUILD b/gcr/PKGBUILD
index 6377c55974..879cc53381 100644
--- a/gcr/PKGBUILD
+++ b/gcr/PKGBUILD
@@ -36,10 +36,6 @@ pkgver() {
   git describe --tags | sed 's/[^-]*-g/r&/;s/-/+/g'
 }
 
-prepare() {
-  cd gcr
-}
-
 build() {
   local meson_options=(
     # ssh-agent moved to gcr-4
diff --git a/gendesk/PKGBUILD b/gendesk/PKGBUILD
index 388b15514b..e0c8b701d1 100644
--- a/gendesk/PKGBUILD
+++ b/gendesk/PKGBUILD
@@ -17,8 +17,11 @@ b2sums=('07ba72b713bd240e9035ced7555e02bd544ba402a4cf1e8417804ea3d53d7c43cbd47da
 options=('!lto')
 
 build() {
+  export GOPROXY=https://goproxy.cn
   cd $pkgname-$pkgver
-  go build -v -mod=vendor -trimpath -buildmode=pie -ldflags="-s -w -extldflags $LDFLAGS"
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@master
+  go mod tidy
+  go build -v -trimpath -ldflags="-s -w -extldflags $LDFLAGS"
 }
 
 package() {
diff --git a/geos/PKGBUILD b/geos/PKGBUILD
index 2af3bbf659..1b916b5cf7 100644
--- a/geos/PKGBUILD
+++ b/geos/PKGBUILD
@@ -15,8 +15,14 @@ depends=(gcc-libs bash)
 makedepends=(cmake)
 options=(!emptydirs)
 changelog=$pkgname.changelog
-source=(https://download.osgeo.org/$pkgname/$pkgname-$pkgver.tar.bz2)
-sha256sums=('d96db96011259178a35555a0f6d6e75a739e52a495a6b2aa5efb3d75390fbc39')
+source=(https://download.osgeo.org/$pkgname/$pkgname-$pkgver.tar.bz2
+	$pkgname-$pkgver-gcc13.patch)
+sha256sums=('d96db96011259178a35555a0f6d6e75a739e52a495a6b2aa5efb3d75390fbc39'
+            '61b348c1177814073e4c2926ee0a0787b7eb3c1acd7b6e77095f8e6868a95b1b')
+
+prepare(){
+  patch -d $pkgname-$pkgver -Np1 -i  $srcdir/$pkgname-$pkgver-gcc13.patch
+}
 
 build() {
   cmake -B build -S $pkgname-$pkgver \
diff --git a/geos/geos-3.11.1-gcc13.patch b/geos/geos-3.11.1-gcc13.patch
new file mode 100644
index 0000000000..721a5f2ff1
--- /dev/null
+++ b/geos/geos-3.11.1-gcc13.patch
@@ -0,0 +1,90 @@
+--- geos-3.11.1/include/geos/shape/fractal/HilbertEncoder.h	2023-03-28 19:32:13.476662911 +0800
++++ geos-3.11.1/include/geos/shape/fractal/HilbertEncoder.h	2023-03-28 19:34:15.021935510 +0800
+@@ -18,6 +18,7 @@
+ #include <geos/export.h>
+ #include <string>
+ #include <vector>
++#include <cstdint>
+ 
+ // Forward declarations
+ namespace geos {
+--- geos-3.11.1/tests/unit/capi/GEOSMakeValidTest.cpp	2023-03-28 19:32:13.992668278 +0800
++++ geos-3.11.1/tests/unit/capi/GEOSMakeValidTest.cpp	2023-03-28 19:35:04.686459884 +0800
+@@ -9,6 +9,7 @@
+ #include <cstdlib>
+ #include <cmath>
+ #include <cstring>
++#include <cstdint>
+
+ #include "capi_test_utils.h"
+ 
+--- geos-3.11.1/include/geos/geomgraph/Label.h	2022-11-14 03:24:40.000000000 +0800
++++  geos-3.11.1/include/geos/geomgraph/Label.h	2023-03-31 09:45:20.832282317 +0800
+@@ -26,6 +26,7 @@
+ 
+ #include <iosfwd> // for operator<<
+ #include <cassert>
++#include <cstdint>
+ 
+ namespace geos {
+ namespace geomgraph { // geos.geomgraph
+--- geos-3.11.1/include/geos/geomgraph/TopologyLocation.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/geomgraph/TopologyLocation.h	2023-03-31 09:47:17.450824619 +0800
+@@ -27,6 +27,7 @@
+ #include <array>
+ #include <string>
+ #include <cassert>
++#include <cstdint>
+ 
+ #ifdef _MSC_VER
+ #pragma warning(push)
+--- geos-3.11.1/include/geos/geomgraph/Depth.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/geomgraph/Depth.h	2023-03-31 09:52:33.839867426 +0800
+@@ -24,6 +24,7 @@
+ #include <geos/geom/Location.h>
+ #include <geos/geom/Position.h>
+ #include <string>
++#include <cstdint>
+ 
+ // Forward declarations
+ namespace geos {
+--- geos-3.11.1/include/geos/io/WKTWriter.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/io/WKTWriter.h	2023-03-31 09:57:54.364575348 +0800
+@@ -24,6 +24,7 @@
+ 
+ #include <string>
+ #include <cctype>
++#include <cstdint>
+ 
+ #ifdef _MSC_VER
+ #pragma warning(push)
+--- geos-3.11.1/include/geos/operation/overlayng/OverlayLabel.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/operation/overlayng/OverlayLabel.h	2023-03-31 10:01:01.399614523 +0800
+@@ -17,6 +17,7 @@
+ #include <geos/geom/Location.h>
+ #include <geos/geom/Position.h>
+ #include <geos/export.h>
++#include <cstdint>
+ 
+ using geos::geom::Location;
+ using geos::geom::Position;
+--- geos-3.11.1/include/geos/shape/fractal/HilbertCode.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/shape/fractal/HilbertCode.h	2023-03-31 10:09:16.067844595 +0800
+@@ -17,6 +17,7 @@
+ 
+ #include <geos/export.h>
+ #include <string>
++#include <cstdint>
+ 
+ // Forward declarations
+ namespace geos {
+--- geos-3.11.1/include/geos/shape/fractal/MortonCode.h	2022-11-14 03:24:40.000000000 +0800
++++ geos-3.11.1/include/geos/shape/fractal/MortonCode.h	2023-03-31 10:25:30.274038075 +0800
+@@ -17,6 +17,7 @@
+ 
+ #include <geos/export.h>
+ #include <string>
++#include <cstdint>
+ 
+ // Forward declarations
+ namespace geos {
diff --git a/gfold/PKGBUILD b/gfold/PKGBUILD
index f8c5de5e3e..1e872fbbda 100644
--- a/gfold/PKGBUILD
+++ b/gfold/PKGBUILD
@@ -16,7 +16,7 @@ sha512sums=('SKIP')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
 }
 
 build() {
diff --git a/ghc/PKGBUILD b/ghc/PKGBUILD
index 35fc100809..947b739efb 100644
--- a/ghc/PKGBUILD
+++ b/ghc/PKGBUILD
@@ -11,8 +11,8 @@ shopt -s extglob
 
 pkgbase=ghc
 pkgname=(ghc-libs ghc ghc-static)
-pkgver=9.0.2
-pkgrel=3
+pkgver=9.4.7
+pkgrel=1
 pkgdesc='The Glasgow Haskell Compiler'
 arch=('loong64' 'x86_64')
 url='https://www.haskell.org/ghc/'
@@ -23,7 +23,7 @@ source=("https://downloads.haskell.org/~ghc/$pkgver/$pkgbase-${pkgver}-src.tar.x
         ghc-sphinx-6.patch::https://gitlab.haskell.org/ghc/ghc/-/commit/00dc51060881df81258ba3b3bdf447294618a4de.patch
         ghc-rebuild-doc-index.hook ghc-register.hook ghc-unregister.hook)
 noextract=("$pkgbase-${pkgver}-src.tar.xz")
-sha512sums=('32994c7d2b8f47bae604cd825bfcf9c788d79ce26d1d5f58bd73a7093e11ae6c3c17b31dc0c9e454dbf67ca169b942f92213c388d615768cae86055bf6094dee'
+sha512sums=('0fa2b864e90e6b76fa5a12b7ab417c0b945653707cdbc4942fbba58f93baee43b6160bb23aa0aa1282907022956275fa5469875059a743f181987d1b067f30e2'
             '7a79a5e9591b1ddd78fa349526a85cf9fee64db80639dcf1f3d6edef422fd4454222aedf5581e21489a20d748656265a40e7645004e4d5220280f6214c568e64'
             'd69e5222d1169c4224a2b69a13e57fdd574cb1b5932b15f4bc6c7d269a9658dd87acb1be81f52fbcf3cb64f96978b9943d10cee2c21bff0565aaa93a5d35fcae'
             '5f659651d8e562a4dcaae0f821d272d6e9c648b645b1d6ab1af61e4dd690dc5a4b9c6846753b7f935963f001bb1ae1f40cd77731b71ef5a8dbc079a360aa3f8f'
@@ -36,19 +36,19 @@ prepare() {
   LANG=en_US.UTF-8 bsdtar xf $pkgbase-${pkgver}-src.tar.xz
 
   cd ghc-$pkgver
-  patch -p1 -i ../ghc-sphinx-6.patch
+  #patch -p1 -i ../ghc-sphinx-6.patch
 
   # Suppress warnings for newer LLVM. LlvmMaxVersion is non-inclusive and currently GHC
   # doesn't work with LLVM 15: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/8999
-  sed -i 's/LlvmMaxVersion=13/LlvmMaxVersion=15/' configure.ac
+  #sed -i 's/LlvmMaxVersion=13/LlvmMaxVersion=15/' configure.ac
 
   # Temporary hack to use LLVM 14 for bootstrapping during LLVM 15 rebuild.
-  export PATH="/usr/lib/llvm14/bin/:$PATH"
+  #export PATH="/usr/lib/llvm14/bin/:$PATH"
 
   cp mk/build.mk{.sample,}
   sed -i '1iBuildFlavour = perf-llvm' mk/build.mk
 
-  ./boot
+  ./boot.source
 }
 
 build() {
diff --git a/git-branchless/PKGBUILD b/git-branchless/PKGBUILD
index 78b81fad7c..a9ccbd044d 100644
--- a/git-branchless/PKGBUILD
+++ b/git-branchless/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('f9e13d9a3de960b32fb684a59492defd812bb0785df48facc964478f675f0355')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/git-bug/PKGBUILD b/git-bug/PKGBUILD
index 8a7ceb7023..fd94a093bd 100644
--- a/git-bug/PKGBUILD
+++ b/git-bug/PKGBUILD
@@ -40,6 +40,8 @@ build() {
   export CGO_CPPFLAGS="${CPPFLAGS}"
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
+  go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.8
+  go mod tidy
 
   local GIT_COMMIT="$(git rev-list -1 HEAD)"
   local GIT_LAST_TAG="$(git describe --abbrev=0 --tags)"
diff --git a/git-cliff/PKGBUILD b/git-cliff/PKGBUILD
index 87b6df3d68..222d87d2b1 100644
--- a/git-cliff/PKGBUILD
+++ b/git-cliff/PKGBUILD
@@ -17,7 +17,7 @@ prepare() {
   cd "$pkgname-$pkgver"
   mkdir completions/
   mkdir man/
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/git-delta/PKGBUILD b/git-delta/PKGBUILD
index d58fe0408a..ac4e9c05fb 100644
--- a/git-delta/PKGBUILD
+++ b/git-delta/PKGBUILD
@@ -27,7 +27,7 @@ prepare() {
   cd "$_pkgname-$pkgver"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/git-grab/PKGBUILD b/git-grab/PKGBUILD
index 1ad7303ec9..368edd3b2c 100644
--- a/git-grab/PKGBUILD
+++ b/git-grab/PKGBUILD
@@ -25,7 +25,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gitlab-exporter/PKGBUILD b/gitlab-exporter/PKGBUILD
index 312ab48710..39564be273 100644
--- a/gitlab-exporter/PKGBUILD
+++ b/gitlab-exporter/PKGBUILD
@@ -20,7 +20,7 @@ validpgpkeys=(
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gitlab-gitaly/PKGBUILD b/gitlab-gitaly/PKGBUILD
index 5a1a76a61e..e6b430c942 100644
--- a/gitlab-gitaly/PKGBUILD
+++ b/gitlab-gitaly/PKGBUILD
@@ -36,6 +36,13 @@ prepare() {
 
 build() {
 	cd "$_archive"
+	pushd ruby
+	gem-2.7 sources -r https://rubygems.org/
+	gem-2.7 sources -a https://rubygems.loongnix.cn
+	gem-2.7 sources -c
+#  bundle-2.7 config force_ruby_platform true # build from sources as some prebuilt gems are not available for newer ruby
+#  bundle-2.7 install --path vendor/bundle
+	popd
 	make V=1 BUILD_TAGS="tracer_static tracer_static_jaeger"
 }
 
diff --git a/gitlab-runner/PKGBUILD b/gitlab-runner/PKGBUILD
index b65a65977a..e4e46adc50 100644
--- a/gitlab-runner/PKGBUILD
+++ b/gitlab-runner/PKGBUILD
@@ -75,6 +75,8 @@ build() {
   export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw"
 
   cd gitlab-runner
+  go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.12.3
+  go mod tidy
   go build -o gitlab-runner .
 }
 
diff --git a/gitlab-shell/PKGBUILD b/gitlab-shell/PKGBUILD
index ca2f7ba55f..2926376ca8 100644
--- a/gitlab-shell/PKGBUILD
+++ b/gitlab-shell/PKGBUILD
@@ -43,6 +43,7 @@ prepare() {
 
   patch -p1 < ../configs.patch
   # At this point config file should not contain any references to '/home/git'
+git clone https://github.com/golang/sys.git
 }
 
 build() {
@@ -52,7 +53,10 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export CGO_LDFLAGS="${LDFLAGS}"
-  export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw"
+  export GOFLAGS="-trimpath -mod=readonly -modcacherw"
+export GOPATH="$srcdir/build:/usr/share/gocode"
+go mod tidy
+go mod edit -replace=golang.org/x/sys@v0.0.0-20210412220455-f1c623a9e750=$srcdir/sys
   make build
 }
 
diff --git a/gitoxide/PKGBUILD b/gitoxide/PKGBUILD
index e88f716373..05602e4886 100644
--- a/gitoxide/PKGBUILD
+++ b/gitoxide/PKGBUILD
@@ -15,7 +15,7 @@ b2sums=('319b4838b1555b308cac1f945ff648f29f485df387a10aac48d78d1b1bccb7585a328e3
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gitui/PKGBUILD b/gitui/PKGBUILD
index db6b058db6..a6f647616a 100644
--- a/gitui/PKGBUILD
+++ b/gitui/PKGBUILD
@@ -19,7 +19,7 @@ b2sums=('a861679de253f179e40bdae65d161f4407778edeebad1bd5b358601d4e813fe04fe8dca
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gloox/PKGBUILD b/gloox/PKGBUILD
index 53518345fd..1a76551a6c 100644
--- a/gloox/PKGBUILD
+++ b/gloox/PKGBUILD
@@ -9,8 +9,15 @@ arch=(loong64 x86_64)
 url="https://camaya.net/gloox"
 license=("GPL")
 depends=('libidn' 'gnutls')
-source=(https://camaya.net/download/gloox-$pkgver.tar.bz2)
-sha256sums=('0b8b7371439bc58d9e51384b616c964b18b7b41b87af1b7855104380eda86ffb')
+source=(https://camaya.net/download/gloox-$pkgver.tar.bz2
+        gloox-fix-build.patch)
+sha256sums=('0b8b7371439bc58d9e51384b616c964b18b7b41b87af1b7855104380eda86ffb'
+            '4476111313494a51f6faca57c246ad7179c4eb4c40c7c47231e8a47f32309212')
+
+prepare() {
+  cd "$srcdir"/gloox-$pkgver
+  patch -p1 -i $srcdir/gloox-fix-build.patch
+}
 
 build() {
   cd "$srcdir"/gloox-$pkgver
diff --git a/gloox/gloox-fix-build.patch b/gloox/gloox-fix-build.patch
new file mode 100644
index 0000000000..1ff6f07873
--- /dev/null
+++ b/gloox/gloox-fix-build.patch
@@ -0,0 +1,188 @@
+--- a/src/tests/tag/tag_perf.cpp
++++ b/src/tests/tag/tag_perf.cpp
+@@ -20,6 +20,7 @@ using namespace gloox;
+ #include <cstdlib>
+ #include <string>
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ #include <sys/time.h>
+ 
+--- a/src/tests/zlib/zlib_perf.cpp
++++ b/src/tests/zlib/zlib_perf.cpp
+@@ -24,6 +24,7 @@ using namespace gloox;
+ #include <string>
+ #include <sys/time.h>
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ #ifdef HAVE_ZLIB
+ 
+Index: gloox-1.0.24/src/examples/adhoc_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/adhoc_example.cpp
++++ gloox-1.0.24/src/examples/adhoc_example.cpp
+@@ -25,6 +25,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ 
+ class AdhocTest : public ConnectionListener, AdhocCommandProvider, LogHandler
+Index: gloox-1.0.24/src/examples/annotations_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/annotations_example.cpp
++++ gloox-1.0.24/src/examples/annotations_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class AnnotationsTest : public AnnotationsHandler, ConnectionListener
+ {
+Index: gloox-1.0.24/src/examples/bookmarkstorage_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/bookmarkstorage_example.cpp
++++ gloox-1.0.24/src/examples/bookmarkstorage_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class BookmarkStorageTest : public BookmarkHandler, ConnectionListener
+ {
+Index: gloox-1.0.24/src/examples/component_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/component_example.cpp
++++ gloox-1.0.24/src/examples/component_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class ComponentTest : public DiscoHandler, ConnectionListener, LogHandler
+ {
+Index: gloox-1.0.24/src/examples/disco_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/disco_example.cpp
++++ gloox-1.0.24/src/examples/disco_example.cpp
+@@ -24,6 +24,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class DiscoTest : public DiscoHandler, ConnectionListener, LogHandler
+ {
+Index: gloox-1.0.24/src/examples/e2ee_client.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/e2ee_client.cpp
++++ gloox-1.0.24/src/examples/e2ee_client.cpp
+@@ -29,6 +29,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ #ifdef WIN32
+ #include <windows.h>
+Index: gloox-1.0.24/src/examples/e2ee_server.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/e2ee_server.cpp
++++ gloox-1.0.24/src/examples/e2ee_server.cpp
+@@ -29,6 +29,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ #ifdef WIN32
+ #include <windows.h>
+Index: gloox-1.0.24/src/examples/privacylist_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/privacylist_example.cpp
++++ gloox-1.0.24/src/examples/privacylist_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class PLTest : public PrivacyListHandler, ConnectionListener
+ {
+Index: gloox-1.0.24/src/examples/privatexml_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/privatexml_example.cpp
++++ gloox-1.0.24/src/examples/privatexml_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class PrivateXMLTest : public PrivateXMLHandler, ConnectionListener
+ {
+Index: gloox-1.0.24/src/examples/register_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/register_example.cpp
++++ gloox-1.0.24/src/examples/register_example.cpp
+@@ -22,6 +22,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class RegTest : public RegistrationHandler, ConnectionListener, LogHandler
+ {
+Index: gloox-1.0.24/src/examples/reset_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/reset_example.cpp
++++ gloox-1.0.24/src/examples/reset_example.cpp
+@@ -23,6 +23,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class RosterTest : public ConnectionListener, LogHandler
+ {
+Index: gloox-1.0.24/src/examples/roster_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/roster_example.cpp
++++ gloox-1.0.24/src/examples/roster_example.cpp
+@@ -27,6 +27,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class RosterTest : public RosterListener, ConnectionListener, LogHandler, MessageHandler
+ {
+Index: gloox-1.0.24/src/examples/vcard_example.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/vcard_example.cpp
++++ gloox-1.0.24/src/examples/vcard_example.cpp
+@@ -26,6 +26,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ class VCardTest : public ConnectionListener, LogHandler, VCardHandler
+ {
+Index: gloox-1.0.24/src/examples/ft_recv.cpp
+===================================================================
+--- gloox-1.0.24.orig/src/examples/ft_recv.cpp
++++ gloox-1.0.24/src/examples/ft_recv.cpp
+@@ -27,6 +27,7 @@ using namespace gloox;
+ #include <string>
+ 
+ #include <cstdio> // [s]print[f]
++#include <ctime>
+ 
+ #if defined( WIN32 ) || defined( _WIN32 )
+ # include <windows.h>
diff --git a/glusterfs/PKGBUILD b/glusterfs/PKGBUILD
index 4659147377..0b7c407a52 100644
--- a/glusterfs/PKGBUILD
+++ b/glusterfs/PKGBUILD
@@ -20,7 +20,7 @@ backup=('etc/glusterfs/glusterd.vol'
         'etc/glusterfs/glusterd.vol'
         'etc/glusterfs/glusterfs-georep-logrotate'
         'etc/glusterfs/glusterfs-logrotate')
-depends=(fuse python libxml2 libaio liburcu attr rpcbind liburing gperftools)
+depends=(fuse python libxml2 libaio liburcu attr rpcbind liburing)
 makedepends=(rpcsvc-proto)
 optdepends=('glib2: qemu-block'
 	    'python-prettytable: gluster-georep-sshkey')
@@ -46,6 +46,7 @@ build() {
     --libexecdir=/usr/lib/$pkgname \
     --with-systemddir=/usr/lib/systemd/system \
     --with-tmpfilesdir=/usr/lib/tmpfiles.d \
+    --without-tcmalloc \
     --enable-gnfs \
     LEXLIB=
   make
diff --git a/gn/PKGBUILD b/gn/PKGBUILD
index 670622197b..8cdaa36980 100644
--- a/gn/PKGBUILD
+++ b/gn/PKGBUILD
@@ -10,16 +10,28 @@ url="https://gn.googlesource.com/gn/"
 license=('BSD')
 depends=('gcc-libs')
 makedepends=('clang' 'ninja' 'python' 'git')
-source=(git+https://gn.googlesource.com/gn#commit=$_commit)
-sha256sums=('SKIP')
+source=(git+https://gn.googlesource.com/gn#commit=$_commit
+gn-fix-build.patch)
+sha256sums=('SKIP'
+            '072db93d2c4a1486a5a70aa7a17e562e3478bbff5c2d5b219729ddda630bb31e')
 
 pkgver() {
   cd $pkgname
   echo 0.$(git rev-list --count initial-commit..).$(git rev-parse --short=8 HEAD)
 }
 
+prepare() {
+  cd $pkgname
+  patch -p1 -i $srcdir/gn-fix-build.patch
+}
+
 build() {
   cd $pkgname
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   ./build/gen.py
   ninja -C out
 }
diff --git a/gn/gn-fix-build.patch b/gn/gn-fix-build.patch
new file mode 100644
index 0000000000..59d9ff1515
--- /dev/null
+++ b/gn/gn-fix-build.patch
@@ -0,0 +1,10 @@
+--- gn/src/base/containers/span.h	2023-03-10 20:15:49.702581313 +0800
++++ gn/src/base/containers/span.h	2023-03-10 20:17:02.498893761 +0800
+@@ -13,6 +13,7 @@
+ #include <string_view>
+ #include <type_traits>
+ #include <utility>
++#include <cstdint>
+ 
+ #include "base/logging.h"
+ #include "base/stl_util.h"
diff --git a/gnome-control-center/PKGBUILD b/gnome-control-center/PKGBUILD
index f8763cd05e..841d43594b 100644
--- a/gnome-control-center/PKGBUILD
+++ b/gnome-control-center/PKGBUILD
@@ -66,7 +66,7 @@ makedepends=(
   modemmanager
   python
 )
-checkdepends=(
+makedepends+=(
   python-dbusmock
   python-gobject
   xorg-server-xvfb
diff --git a/gnome-dictionary/gnome-dictionary-fix-meson.patch b/gnome-dictionary/gnome-dictionary-fix-meson.patch
new file mode 100644
index 0000000000..7b36ac92c5
--- /dev/null
+++ b/gnome-dictionary/gnome-dictionary-fix-meson.patch
@@ -0,0 +1,34 @@
+diff --git a/data/appdata/meson.build b/data/appdata/meson.build
+index 1dc4ed7..d37e144 100644
+--- a/data/appdata/meson.build
++++ b/data/appdata/meson.build
+@@ -1,6 +1,6 @@
+ appdata_conf = configuration_data()
+ appdata_conf.set('application_id', application_id)
+-i18n.merge_file('appdata',
++i18n.merge_file(
+   input: configure_file(
+     input: 'org.gnome.Dictionary.appdata.xml.in.in',
+     output: 'org.gnome.Dictionary.appdata.xml.in',
+diff --git a/data/meson.build b/data/meson.build
+index 660e6b8..fb0aefd 100644
+--- a/data/meson.build
++++ b/data/meson.build
+@@ -3,7 +3,7 @@ subdir('appdata')
+ desktop_conf = configuration_data()
+ desktop_conf.set('icon', application_id)
+ desktop_conf.set('application_id', application_id)
+-i18n.merge_file('desktop',
++i18n.merge_file(
+   input: configure_file(
+     input: 'org.gnome.Dictionary.desktop.in.in',
+     output: 'org.gnome.Dictionary.desktop.in',
+@@ -45,7 +45,7 @@ sources = [
+ ]
+ 
+ foreach s: sources
+-  i18n.merge_file('sources',
++  i18n.merge_file(
+     input: '@0@.in'.format(s),
+     output: s,
+     install: true,
diff --git a/gnome-font-viewer/gnome-font-viewer-meson.patch b/gnome-font-viewer/gnome-font-viewer-meson.patch
new file mode 100644
index 0000000000..c530794bd2
--- /dev/null
+++ b/gnome-font-viewer/gnome-font-viewer-meson.patch
@@ -0,0 +1,25 @@
+diff --git a/data/meson.build b/data/meson.build
+index bfc9caa..8e42134 100644
+--- a/data/meson.build
++++ b/data/meson.build
+@@ -1,6 +1,6 @@
+ appdatadir = join_paths(datadir, 'metainfo')
+ appdata_file = 'org.gnome.font-viewer.appdata.xml'
+-merged_appdata = i18n.merge_file(appdata_file,
++merged_appdata = i18n.merge_file(
+   input: appdata_file + '.in',
+   output: appdata_file,
+   po_dir: '../po',
+diff --git a/src/meson.build b/src/meson.build
+index f863d45..826c59b 100644
+--- a/src/meson.build
++++ b/src/meson.build
+@@ -39,7 +39,7 @@ desktop_file = 'org.gnome.font-viewer.desktop'
+ desktop_conf = configuration_data()
+ desktop_conf.set('VERSION', meson.project_version())
+ desktop_conf.set('APPLICATION_ID', application_id)
+-i18n.merge_file(desktop_file,
++i18n.merge_file(
+   input: configure_file(input: desktop_file + '.in.in',
+     output: desktop_file + '.in',
+     configuration: desktop_conf),
diff --git a/gnome-mplayer/PKGBUILD b/gnome-mplayer/PKGBUILD
index a5d275734e..b6b0817a32 100644
--- a/gnome-mplayer/PKGBUILD
+++ b/gnome-mplayer/PKGBUILD
@@ -5,7 +5,7 @@
 
 pkgname=gnome-mplayer
 pkgver=1.0.9
-pkgrel=8
+pkgrel=10
 pkgdesc='GTK/Gnome interface around MPlayer'
 arch=('loong64' 'x86_64')
 url='https://sites.google.com/site/kdekorte2/gnomemplayer'
diff --git a/gnome-remote-desktop/PKGBUILD b/gnome-remote-desktop/PKGBUILD
index 623a252411..82ddffafdc 100644
--- a/gnome-remote-desktop/PKGBUILD
+++ b/gnome-remote-desktop/PKGBUILD
@@ -33,7 +33,7 @@ makedepends=(
   git
   meson
 )
-checkdepends=(
+makedepends+=(
   dbus-broker
   libegl
   mutter
diff --git a/gnome-tetravex/PKGBUILD b/gnome-tetravex/PKGBUILD
index 98b0332dd9..76669b62ab 100644
--- a/gnome-tetravex/PKGBUILD
+++ b/gnome-tetravex/PKGBUILD
@@ -12,8 +12,9 @@ depends=(gtk3)
 makedepends=(meson gobject-introspection vala yelp-tools appstream-glib git)
 groups=(gnome-extra)
 _commit=76c564d4cd5aaaf3e2eea89d538358070b019753  # tags/3.38.2^0
-source=("git+https://gitlab.gnome.org/GNOME/gnome-tetravex.git#commit=$_commit")
-sha256sums=('SKIP')
+source=("git+https://gitlab.gnome.org/GNOME/gnome-tetravex.git#commit=$_commit"
+        gnome-tetravex-fix-meson.patch)
+sha256sums=('SKIP' 'SKIP')
 
 pkgver() {
   cd $pkgname
@@ -22,6 +23,7 @@ pkgver() {
 
 prepare() {
   cd $pkgname
+  patch -p1 -i $srcdir/gnome-tetravex-fix-meson.patch
 }
 
 build() {
diff --git a/gnome-tetravex/gnome-tetravex-fix-meson.patch b/gnome-tetravex/gnome-tetravex-fix-meson.patch
new file mode 100644
index 0000000000..8c2cdc2567
--- /dev/null
+++ b/gnome-tetravex/gnome-tetravex-fix-meson.patch
@@ -0,0 +1,22 @@
+diff --git a/data/meson.build b/data/meson.build
+index bb77248..b881e8a 100644
+--- a/data/meson.build
++++ b/data/meson.build
+@@ -34,7 +34,7 @@ endif
+ 
+ # Desktop file
+ if get_option('build_gui')
+-  desktop_file = i18n.merge_file ('desktop-file',
++  desktop_file = i18n.merge_file (
+                            input: project_id + '.desktop.in',
+                           output: project_id + '.desktop',
+                          install: true,
+@@ -56,7 +56,7 @@ endif
+ 
+ # AppData file
+ if get_option('build_gui')
+-  appdata_file = i18n.merge_file ('appdata-file',
++  appdata_file = i18n.merge_file (
+                            input: project_id + '.appdata.xml.in',
+                           output: project_id + '.appdata.xml',
+                          install: true,
diff --git a/gnome-tour/PKGBUILD b/gnome-tour/PKGBUILD
index afbf907c17..bbd9aff284 100644
--- a/gnome-tour/PKGBUILD
+++ b/gnome-tour/PKGBUILD
@@ -23,6 +23,12 @@ export CARGO_PROFILE_RELEASE_DEBUG=2
 
 build() {
   arch-meson $pkgname-$pkgver build
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
   meson compile -C build
 }
 
diff --git a/gnu-efi/PKGBUILD b/gnu-efi/PKGBUILD
index 0bfdba715f..c894b4c940 100644
--- a/gnu-efi/PKGBUILD
+++ b/gnu-efi/PKGBUILD
@@ -10,14 +10,18 @@ license=(BSD)
 conflicts=(gnu-efi-libs)
 provides=(gnu-efi-libs)
 replaces=(gnu-efi-libs)
-source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2)
+source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2
+        "gnu-efi-3.0.17-la64.patch")
 options=(!lto !strip)
-sha512sums=('0893ca234272584f889b1ae1c75341a9ceee60acfd32765daa5d704191ba00450536a287b949304c6d055d1bf125cc29e24fc41df8e5230e0da4f9d944876512')
-b2sums=('27f8171b411a6a8a138d44d91c7e4e4291aa399562825d51a398913572119482ffeb303d7508ae13eacd2cd10b8f5098405ab16eb56243587efe93235f661285')
+sha512sums=('0893ca234272584f889b1ae1c75341a9ceee60acfd32765daa5d704191ba00450536a287b949304c6d055d1bf125cc29e24fc41df8e5230e0da4f9d944876512'
+            'cbeb446d4e3f3b7169b798c8014aedc30e5bc3d576856ebd69af7d9ee5277f99e16709687b1140c2eac3a2edddce49aa4a5d4d91a0e1ce408c3b7fe134a57ca7')
+b2sums=('27f8171b411a6a8a138d44d91c7e4e4291aa399562825d51a398913572119482ffeb303d7508ae13eacd2cd10b8f5098405ab16eb56243587efe93235f661285'
+        '429d8a968edc6deb5e73a4faabb523dfb490a173d7c48a9270e1ccd5758d0262e9eb39bb47056375f07bd3f8ff7a4d83a3977565538d5e039652e9672220e9b1')
 
 prepare() {
   # -Werror, not even once
   sed -e 's/-Werror//g' -i $pkgname-$pkgver/Make.defaults
+  patch -d $pkgname-$pkgver -Np1 -i "../gnu-efi-3.0.17-la64.patch"
 }
 
 build() {
diff --git a/gnu-efi/gnu-efi-3.0.17-la64.patch b/gnu-efi/gnu-efi-3.0.17-la64.patch
new file mode 100644
index 0000000000..9d979c8434
--- /dev/null
+++ b/gnu-efi/gnu-efi-3.0.17-la64.patch
@@ -0,0 +1,89 @@
+diff --git a/gnuefi/elf_loongarch64_efi.lds b/gnuefi/elf_loongarch64_efi.lds
+index e7b4d6b..7a212cd 100644
+--- a/gnuefi/elf_loongarch64_efi.lds
++++ b/gnuefi/elf_loongarch64_efi.lds
+@@ -15,6 +15,7 @@ SECTIONS
+   }
+   _etext = .;
+   _text_size = . - _text;
++  . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+   .dynamic  : { *(.dynamic) }
+   .data : ALIGN(4096)
+   {
+@@ -33,16 +34,27 @@ SECTIONS
+    *(.sbss)
+    *(.scommon)
+    *(.dynbss)
+-   *(.bss)
++   *(.bss*)
+    *(COMMON)
+    . = ALIGN(16);
+    _bss_end = .;
+   }
+ 
+-  .rela.dyn : { *(.rela.dyn) }
++  . = ALIGN(4096);
++  .rela :
++  {
++    *(.rela.text*)
++      *(.rela.data*)
++      *(.rela.got)
++      *(.rela.dyn)
++      *(.rela.stab)
++      *(.rela.init_array)
++      *(.rela.fini_array)
++      *(.rela.ctors)
++      *(.rela.dtors)
++  }
++  . = ALIGN(4096);
+   .rela.plt : { *(.rela.plt) }
+-  .rela.got : { *(.rela.got) }
+-  .rela.data : { *(.rela.data) *(.rela.data*) }
+   . = ALIGN(512);
+   _edata = .;
+   _data_size = . - _data;
+@@ -52,7 +64,9 @@ SECTIONS
+   . = ALIGN(4096);
+   .dynstr   : { *(.dynstr) }
+   . = ALIGN(4096);
+-  .note.gnu.build-id : { *(.note.gnu.build-id) }
++  .note.gnu.build-id :
++  { *(.note.gnu.build-id) }
++  . = DATA_SEGMENT_END (.);
+   /DISCARD/ :
+   {
+     *(.rel.reloc)
+diff --git a/inc/loongarch64/efibind.h b/inc/loongarch64/efibind.h
+index aaf3fb7..8ed83a5 100644
+--- a/inc/loongarch64/efibind.h
++++ b/inc/loongarch64/efibind.h
+@@ -42,9 +42,10 @@ typedef int64_t             intptr_t;
+ // Basic EFI types of various widths
+ //
+ 
+-#ifndef __WCHAR_TYPE__
+-# define __WCHAR_TYPE__ short
+-#endif
++#include <stddef.h>
++
++typedef wchar_t CHAR16;
++#define WCHAR CHAR16
+ 
+ typedef uint64_t   UINT64;
+ typedef int64_t    INT64;
+@@ -54,12 +55,13 @@ typedef int32_t    INT32;
+ 
+ typedef uint16_t   UINT16;
+ typedef int16_t    INT16;
++
+ typedef uint8_t    UINT8;
++typedef char       CHAR8;
+ typedef int8_t     INT8;
+-typedef __WCHAR_TYPE__ WCHAR;
+ 
+ #undef VOID
+-#define VOID    void
++typedef void       VOID;
+ 
+ typedef int64_t    INTN;
+ typedef uint64_t   UINTN;
diff --git a/gnugo/PKGBUILD b/gnugo/PKGBUILD
index 826d0ec9a7..9a3de24406 100644
--- a/gnugo/PKGBUILD
+++ b/gnugo/PKGBUILD
@@ -19,6 +19,8 @@ sha256sums=('da68d7a65f44dcf6ce6e4e630b6f6dd9897249d34425920bfdd4e07ff1866a72'
 build() {
 	cd "${srcdir}/${pkgname}-${pkgver}"
 	CFLAGS+=' -fcommon' # https://wiki.gentoo.org/wiki/Gcc_10_porting_notes/fno_common
+    CFLAGS=${CFLAGS/-Werror=format-security/}
+    CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
 	./configure --prefix=/usr
 	make
 }
diff --git a/go-md2man/PKGBUILD b/go-md2man/PKGBUILD
index d6d9b0b575..02f63e3f04 100644
--- a/go-md2man/PKGBUILD
+++ b/go-md2man/PKGBUILD
@@ -14,8 +14,9 @@ sha256sums=('7ca3a04bb4ab83387538235decc42a535097a05d2fb9f2266d0c47b33119501f')
 
 build() {
   cd "$pkgname-$pkgver"
-  export GOFLAGS="-buildmode=pie -mod=vendor -trimpath"
+  export GOFLAGS="-mod=vendor -trimpath"
   export CGO_LDFLAGS="$LDFLAGS"
+  export GOPROXY=https://goproxy.cn
   go build -o go-md2man .
   ./go-md2man -in=go-md2man.1.md -out=go-md2man.1
 }
diff --git a/go/PKGBUILD b/go/PKGBUILD
index cb5b9cd8bb..522a7fdf2c 100644
--- a/go/PKGBUILD
+++ b/go/PKGBUILD
@@ -24,14 +24,23 @@ makedepends=(git go)
 replaces=(go-pie)
 provides=(go-pie)
 options=(!strip staticlibs)
-source=(https://go.dev/dl/go${pkgver}.src.tar.gz{,.asc})
+source=(https://go.dev/dl/go${pkgver}.src.tar.gz{,.asc}
+http://public.loongarch.dev/sources/go/go1.21p.tar.gz/9a6b12102fddd56e29e41a67d0494166/go1.21p.tar.gz)
 validpgpkeys=('EB4C1BFD4F042F6DDDCCEC917721F63BD38B4796')
 sha256sums=('124926a62e45f78daabbaedb9c011d97633186a33c238ffc1e25320c02046248'
-            'SKIP')
+            'SKIP'
+            'a0ec920455ec49777d4bba8ce64f80b6c93458e62112c7178fd7ac34ecbfb506')
+
+prepare() {
+  cd "$pkgname"
+  for i in $srcdir/go1.21p/*.patch;
+  do
+    patch -p1 -i $i
+  done
+}
 
 build() {
-  export GOARCH=amd64
-  export GOAMD64=v1 # make sure we're building for the right x86-64 version
+  export GOARCH=loong64
   export GOROOT_FINAL=/usr/lib/go
   export GOROOT_BOOTSTRAP=/usr/lib/go
 
@@ -50,7 +59,7 @@ package() {
   cd "$pkgname"
 
   install -d "$pkgdir/usr/bin" "$pkgdir/usr/lib/go" "$pkgdir/usr/share/doc/go" \
-    "$pkgdir/usr/lib/go/pkg/linux_amd64_"{dynlink,race}
+    "$pkgdir/usr/lib/go/pkg/linux_loong64_"{dynlink,race}
 
   cp -a bin pkg src lib misc api test "$pkgdir/usr/lib/go"
   # We can't strip all binaries and libraries,
diff --git a/godot/PKGBUILD b/godot/PKGBUILD
index a89c7a8a38..2ee1b537cc 100644
--- a/godot/PKGBUILD
+++ b/godot/PKGBUILD
@@ -38,7 +38,7 @@ build() {
     CFLAGS="$CFLAGS -fPIC -Wl,-z,relro,-z,now -w -I/usr/include/mbedtls2" \
     CXXFLAGS="$CXXFLAGS -fPIC -Wl,-z,relro,-z,now -w -I/usr/include/mbedtls2" \
     LINKFLAGS="$LDFLAGS -L/usr/lib/mbedtls2" \
-    arch=$CARCH \
+    arch=`uname -m` \
     builtin_embree=no \
     builtin_enet=yes \
     builtin_freetype=no \
@@ -82,5 +82,5 @@ package() {
   install -Dm644 misc/dist/linux/godot.6 "$pkgdir/usr/share/man/man6/godot.6"
   install -Dm644 misc/dist/linux/org.godotengine.Godot.xml \
     "$pkgdir/usr/share/mime/packages/org.godotengine.Godot.xml"
-  install -Dm755 bin/godot.linuxbsd.editor.$CARCH "$pkgdir/usr/bin/godot"
+  install -Dm755 bin/godot.linuxbsd.editor.`uname -m` "$pkgdir/usr/bin/godot"
 }
diff --git a/gpg-tui/PKGBUILD b/gpg-tui/PKGBUILD
index 3d20767b81..7e85a02580 100644
--- a/gpg-tui/PKGBUILD
+++ b/gpg-tui/PKGBUILD
@@ -21,7 +21,7 @@ sha512sums=('819481ed5f52c8092a8c711e642653955573250183a436278beb396d069c9734a1b
 prepare() {
   cd "$pkgname-$pkgver"
   mkdir completions/
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gping/PKGBUILD b/gping/PKGBUILD
index 87650e62ae..eab8b9daa6 100644
--- a/gping/PKGBUILD
+++ b/gping/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 prepare() {
   mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver"
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/gptfdisk/PKGBUILD b/gptfdisk/PKGBUILD
index 289ce803cf..4b2ee74c68 100644
--- a/gptfdisk/PKGBUILD
+++ b/gptfdisk/PKGBUILD
@@ -37,6 +37,8 @@ prepare() {
 
 build() {
   cd "$srcdir/$pkgname-$pkgver"
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
   make
 }
 
diff --git a/graphviz/PKGBUILD b/graphviz/PKGBUILD
index f9a96d33aa..eba587dbe4 100644
--- a/graphviz/PKGBUILD
+++ b/graphviz/PKGBUILD
@@ -11,8 +11,8 @@ url='https://www.graphviz.org/'
 license=('EPL')
 arch=('loong64' 'x86_64')
 depends=('libltdl' 'gd' 'librsvg' 'ghostscript' 'pango' 'gts' 'gsfonts')
-makedepends=('swig' 'mono' 'guile' 'lua' 'perl' 'python' 'r' 'tk' 'qt6-base' 'gtk2')
-optdepends=('mono: sharp bindings'
+makedepends=('swig' 'guile' 'lua' 'perl' 'python' 'r' 'tk' 'qt6-base' 'gtk2')
+optdepends=(#'mono: sharp bindings'
             'guile: guile bindings'
             'lua: lua bindings'
             'perl: perl bindings'
diff --git a/grcov/PKGBUILD b/grcov/PKGBUILD
index 94b8b841f5..389e6b9920 100644
--- a/grcov/PKGBUILD
+++ b/grcov/PKGBUILD
@@ -15,7 +15,7 @@ sha256sums=('d8ea0fb293dc5431b502e8ffbd7c9a62336d9e878df9b78a8aed57098fbfb2d8')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/greetd-regreet/PKGBUILD b/greetd-regreet/PKGBUILD
index 6951b104e7..7cc8c12676 100644
--- a/greetd-regreet/PKGBUILD
+++ b/greetd-regreet/PKGBUILD
@@ -24,7 +24,7 @@ sha256sums=('a658c91cdf242dfea814f0bfd0c4d877bd39e3af498d36e5024061e3d07ea76b'
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/greetd-tuigreet/PKGBUILD b/greetd-tuigreet/PKGBUILD
index 25220b4dfb..f2b7941688 100644
--- a/greetd-tuigreet/PKGBUILD
+++ b/greetd-tuigreet/PKGBUILD
@@ -23,7 +23,7 @@ sha256sums=('ed371ebe288a3e5782f01681c6c4ed4786b470184af286fa0e7b8898e47c154e'
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/greetd/PKGBUILD b/greetd/PKGBUILD
index c361292f81..b24c34290a 100644
--- a/greetd/PKGBUILD
+++ b/greetd/PKGBUILD
@@ -30,7 +30,7 @@ sha256sums=('a0cec141dea7fd7838b60a52237692d0fd5a0169cf748b8f8379d8409a3768eb'
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/grex/PKGBUILD b/grex/PKGBUILD
index 8abce5fb2c..5a73db61d1 100644
--- a/grex/PKGBUILD
+++ b/grex/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('3715160417703a28447012abc70ea39548c4a3aaddebbfc6a3a6dc54dfe8f6856ff
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/grpc/PKGBUILD b/grpc/PKGBUILD
index 8134770353..6e87b1ba0e 100644
--- a/grpc/PKGBUILD
+++ b/grpc/PKGBUILD
@@ -109,7 +109,7 @@ build() {
 check() {
   cd "$srcdir/$pkgbase-$pkgver"
   local _pyver=$(python -c "import sys; print('{0}{1}'.format(*sys.version_info[:2]))")
-  PYTHONPATH="pyb/lib.linux-$CARCH-cpython-$_pyver" python -c 'import grpc'
+  PYTHONPATH="pyb/lib.linux-`uname -m`-cpython-$_pyver" python -c 'import grpc'
 }
 
 package_grpc() {
diff --git a/gssdp/PKGBUILD b/gssdp/PKGBUILD
index d2b1a9fba0..4086995a92 100644
--- a/gssdp/PKGBUILD
+++ b/gssdp/PKGBUILD
@@ -41,6 +41,7 @@ prepare() {
 build() {
   local meson_options=(
     -D gtk_doc=true
+    -D manpages=false
   )
 
   arch-meson gssdp build "${meson_options[@]}"
diff --git a/gstreamer/PKGBUILD b/gstreamer/PKGBUILD
index 8b2e68de3e..d34e0d2787 100644
--- a/gstreamer/PKGBUILD
+++ b/gstreamer/PKGBUILD
@@ -10,11 +10,11 @@ pkgname=(
   gst-plugins-good
   gst-plugins-bad
   gst-plugin-gtk
-  gst-plugin-msdk
-  gst-plugin-opencv
+#  gst-plugin-msdk
+#  gst-plugin-opencv
   gst-plugin-qml6
   gst-plugin-qmlgl
-  gst-plugin-qsv
+#  gst-plugin-qsv
   gst-plugin-va
   gst-plugin-wpe
   gst-plugins-ugly
@@ -55,7 +55,7 @@ makedepends=(
   libavtp libbs2b libdca libde265 libdvdnav libfdk-aac libfreeaptx libgme
   libkate libldac liblrdf libltc libmicrodns libmodplug libmpcdec libnice
   libopenmpt libsrtp libva libxkbcommon-x11 libxml2 lilv lv2 mjpegtools neon
-  openal opencv qrencode rtmpdump sbc shaderc soundtouch spandsp svt-hevc
+  openal opencv qrencode rtmpdump sbc shaderc soundtouch spandsp
   vulkan-headers vulkan-icd-loader vulkan-validation-layers
   webrtc-audio-processing wildmidi wpewebkit zbar zvbi zxing-cpp
 
@@ -71,7 +71,7 @@ makedepends=(
   # gst-python
   python-gobject
 )
-checkdepends=(xorg-server-xvfb)
+makedepends+=(xorg-server-xvfb)
 source=(
   "git+https://gitlab.freedesktop.org/gstreamer/gstreamer.git?signed#tag=$pkgver"
   "https://gstreamer.freedesktop.org/src/gstreamer-docs/gstreamer-docs-$pkgver.tar.xz"{,.asc}
@@ -142,6 +142,10 @@ build() {
     -D gst-plugins-bad:wic=disabled
     -D gst-plugins-bad:win32ipc=disabled
     -D gst-plugins-ugly:sidplay=disabled
+    -D gst-plugins-bad:opencv=disabled
+    -D gst-plugins-bad:msdk=disabled
+    -D gst-plugins-bad:qsv=disabled
+    -D gst-plugins-bad:svthevcenc=disabled
     -D gst-editing-services:validate=disabled
   )
 
@@ -481,7 +485,7 @@ package_gst-plugins-bad() {
     libfreeaptx libgme libkate libldac liblrdf libltc libmicrodns libmodplug
     libmpcdec libopenmpt librsvg libsndfile libsrtp libwebp libxml2 lilv
     mjpegtools neon nettle openal openexr openjpeg2 openssl opus pango qrencode
-    rtmpdump sbc soundtouch spandsp srt svt-hevc webrtc-audio-processing
+    rtmpdump sbc soundtouch spandsp srt webrtc-audio-processing
     wildmidi x265 zbar zvbi zxing-cpp
   )
 
@@ -536,7 +540,7 @@ package_gst-plugins-bad() {
     usr/lib/gstreamer-1.0/libgstspandsp.so
     usr/lib/gstreamer-1.0/libgstsrt.so
     usr/lib/gstreamer-1.0/libgstsrtp.so
-    usr/lib/gstreamer-1.0/libgstsvthevcenc.so
+#usr/lib/gstreamer-1.0/libgstsvthevcenc.so
     usr/lib/gstreamer-1.0/libgstteletext.so
     usr/lib/gstreamer-1.0/libgsttimecode.so
     usr/lib/gstreamer-1.0/libgstttmlsubs.so
diff --git a/gtk3/PKGBUILD b/gtk3/PKGBUILD
index e5fd4e0be0..16983f4574 100644
--- a/gtk3/PKGBUILD
+++ b/gtk3/PKGBUILD
@@ -77,6 +77,8 @@ prepare() {
 }
 
 build() {
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
   local meson_options=(
     -D broadway_backend=true
     -D cloudproviders=true
diff --git a/gtk4/gtk-objcopy.patch b/gtk4/gtk-objcopy.patch
new file mode 100644
index 0000000000..47dac886af
--- /dev/null
+++ b/gtk4/gtk-objcopy.patch
@@ -0,0 +1,36 @@
+Index: gtk/gtk/meson.build
+===================================================================
+--- gtk.orig/gtk/meson.build
++++ gtk/gtk/meson.build
+@@ -943,6 +943,7 @@ if not meson.is_cross_build() and build_
+     command : [objcopy,
+                  '--strip-all',
+                  '--add-symbol','_gtk_resource_data=.data:0',
++                 '--alt-elf-eflags=0x03',
+                  '@INPUT@',
+                  '@OUTPUT@'])
+ 
+Index: gtk/demos/gtk-demo/meson.build
+===================================================================
+--- gtk.orig/demos/gtk-demo/meson.build
++++ gtk/demos/gtk-demo/meson.build
+@@ -211,6 +211,7 @@ if build_machine.system() == 'linux' and
+     command : [objcopy,
+                  '--strip-all',
+                  '--add-symbol','_g_binary_gtkdemo_resource_data=.data:0',
++                 '--alt-elf-eflags=0x03',
+                  '@INPUT@',
+                  '@OUTPUT@'])
+ 
+Index: gtk/demos/widget-factory/meson.build
+===================================================================
+--- gtk.orig/demos/widget-factory/meson.build
++++ gtk/demos/widget-factory/meson.build
+@@ -59,6 +59,7 @@ if build_machine.system() == 'linux' and
+     command : [objcopy,
+                  '--strip-all',
+                  '--add-symbol','_g_binary_widgetfactory_resource_data=.data:0',
++                 '--alt-elf-eflags=0x03',
+                  '@INPUT@',
+                  '@OUTPUT@'])
+ 
diff --git a/gunicorn/PKGBUILD b/gunicorn/PKGBUILD
index 1ca66967e1..8d11f725de 100644
--- a/gunicorn/PKGBUILD
+++ b/gunicorn/PKGBUILD
@@ -2,6 +2,7 @@
 # Contributor: Jeremy "Ichimonji10" Audet <ichimonji10 at gmail dot com>
 # Contributor: pumpkin <pumpkin at mailoo dot org>
 # Contributor: Vsevolod Balashov <vsevolod at balashov dot name>
+export CHECKFUNC=1
 
 pkgname=gunicorn
 pkgver=20.1.0
diff --git a/halp/PKGBUILD b/halp/PKGBUILD
index 4850b12514..a867393b85 100644
--- a/halp/PKGBUILD
+++ b/halp/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir completions/
   mkdir man/
 }
diff --git a/handlr/PKGBUILD b/handlr/PKGBUILD
index 8afab788a7..d54bae1f1d 100644
--- a/handlr/PKGBUILD
+++ b/handlr/PKGBUILD
@@ -18,7 +18,7 @@ sha512sums=('55779ad0c01e065678e1a57f338272f1d38057658fe6b7c54f7bc35595575aafe13
 prepare() {
   cd "$pkgname-$pkgver"
   patch -Np1 -i "../$pkgname-bash-completion.patch"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/haskell-doctest-parallel/PKGBUILD b/haskell-doctest-parallel/PKGBUILD
index 132094df4f..9b49fe9644 100644
--- a/haskell-doctest-parallel/PKGBUILD
+++ b/haskell-doctest-parallel/PKGBUILD
@@ -38,7 +38,7 @@ check() {
   # == An ugly hack to generate .ghc.environment without cabal-install ==
   # doctest-parallel relies on this cabal-install feature to configure GHCi.
   # https://github.com/martijnbastiaan/doctest-parallel/issues/22
-  _ghc_env_filename=.ghc.environment.$CARCH-linux-$(expac %v ghc | cut -d - -f 1)
+  _ghc_env_filename=.ghc.environment.`uname -m`-linux-$(expac %v ghc | cut -d - -f 1)
   echo -e "package-db dist/package.conf.inplace" > $_ghc_env_filename
   ls dist/package.conf.inplace/*.conf | sed 's|.*/\(.*\).conf$|package-id \1|' >> $_ghc_env_filename
   ls /usr/lib/ghc-9.0.2/package.conf.d/*.conf | sed 's|.*/\(.*\).conf$|package-id \1|' >> $_ghc_env_filename
diff --git a/hck/PKGBUILD b/hck/PKGBUILD
index 67244389e5..d163eb08d3 100644
--- a/hck/PKGBUILD
+++ b/hck/PKGBUILD
@@ -25,7 +25,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/hdf5/PKGBUILD b/hdf5/PKGBUILD
index 51435e3952..6a69dc74e0 100644
--- a/hdf5/PKGBUILD
+++ b/hdf5/PKGBUILD
@@ -12,7 +12,7 @@ arch=(loong64 x86_64)
 url="https://www.hdfgroup.org/hdf5"
 license=(custom)
 depends=(zlib libaec bash)
-makedepends=(cmake time gcc-fortran java-environment)
+makedepends=(cmake time gcc-fortran) # java-environment)
 replaces=(hdf5-java)
 provides=(hdf5-java)
 source=(https://support.hdfgroup.org/ftp/HDF5/releases/${pkgname}-${pkgver:0:4}/${pkgname}-${pkgver/_/-}/src/${pkgname}-${pkgver/_/-}.tar.bz2)
@@ -35,7 +35,7 @@ build() {
         -DHDF5_BUILD_HL_LIB=ON \
         -DHDF5_BUILD_CPP_LIB=ON \
         -DHDF5_BUILD_FORTRAN=ON \
-        -DHDF5_BUILD_JAVA=ON \
+        -DHDF5_BUILD_JAVA=OFF \
         -DHDF5_ENABLE_Z_LIB_SUPPORT=ON \
         -DHDF5_ENABLE_SZIP_SUPPORT=ON \
         -DHDF5_ENABLE_SZIP_ENCODING=ON \
@@ -52,7 +52,7 @@ build() {
         --enable-hl \
         --enable-cxx \
         --enable-fortran \
-        --enable-java \
+        --disable-java \
         --with-pic \
         --with-zlib \
         --with-szlib
diff --git a/heh/PKGBUILD b/heh/PKGBUILD
index c9e296946e..fb2c6088d3 100644
--- a/heh/PKGBUILD
+++ b/heh/PKGBUILD
@@ -14,7 +14,7 @@ sha256sums=('c44fc2ef6845080f9a022884dc864d5144636a3a9a7f4bdc8e1793a09d939704')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/helix/PKGBUILD b/helix/PKGBUILD
index 32d3473ec8..e1c2b4b647 100644
--- a/helix/PKGBUILD
+++ b/helix/PKGBUILD
@@ -42,7 +42,7 @@ prepare() {
   # NOTE: we are renaming hx to helix so there is no conflict with hex (providing hx)
   sed -i "s|hx|helix|g" contrib/completion/hx.*
   sed -i 's|hx|helix|g' contrib/Helix.desktop
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/hexyl/PKGBUILD b/hexyl/PKGBUILD
index caeddf1953..5a98ee97fe 100644
--- a/hexyl/PKGBUILD
+++ b/hexyl/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('1c2ccbb21c7aad1d2c1daca7ed99009ec2e2a02a96dd8a73d6ba11d00291f0e81afdd79
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/hidapi/hidapi-fix-build.patch b/hidapi/hidapi-fix-build.patch
new file mode 100644
index 0000000000..86af3961bc
--- /dev/null
+++ b/hidapi/hidapi-fix-build.patch
@@ -0,0 +1,12 @@
+Index: hidapi-hidapi-0.10.1/configure.ac
+===================================================================
+--- hidapi-hidapi-0.10.1.orig/configure.ac
++++ hidapi-hidapi-0.10.1/configure.ac
+@@ -13,7 +13,6 @@ LTLDFLAGS="-version-info ${lt_current}:$
+ 
+ AC_CONFIG_MACRO_DIR([m4])
+ AM_INIT_AUTOMAKE([foreign -Wall -Werror])
+-AC_CONFIG_MACRO_DIR([m4])
+ 
+ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
+ LT_INIT
diff --git a/himalaya/PKGBUILD b/himalaya/PKGBUILD
index f3fbb00e0b..6e5185ba2a 100644
--- a/himalaya/PKGBUILD
+++ b/himalaya/PKGBUILD
@@ -17,7 +17,7 @@ _features='notmuch-backend,pgp-gpg'
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p {completions,man}
 }
 
diff --git a/hitori/PKGBUILD b/hitori/PKGBUILD
index be22d3a528..62fa0e1c72 100644
--- a/hitori/PKGBUILD
+++ b/hitori/PKGBUILD
@@ -17,8 +17,10 @@ makedepends=(
 )
 groups=(gnome-extra)
 _commit=53db1397d6a711862023d7a3070e785235a98c87  # tags/44.0^0
-source=("git+https://gitlab.gnome.org/GNOME/hitori.git#commit=$_commit")
-sha256sums=('SKIP')
+source=("git+https://gitlab.gnome.org/GNOME/hitori.git#commit=$_commit"
+        hitori-fix-meson.patch)
+sha256sums=('SKIP'
+            '20b67731b04efadbc3f5b03958a25eddacc17c05c842943a4cd99fe37fd0d503')
 
 pkgver() {
   cd hitori
@@ -27,6 +29,7 @@ pkgver() {
 
 prepare() {
   cd hitori
+  patch -p1 -i $srcdir/hitori-fix-meson.patch
 }
 
 build() {
diff --git a/hitori/hitori-fix-meson.patch b/hitori/hitori-fix-meson.patch
new file mode 100644
index 0000000000..126106e2cd
--- /dev/null
+++ b/hitori/hitori-fix-meson.patch
@@ -0,0 +1,21 @@
+diff --git a/data/meson.build b/data/meson.build
+index 97b8e68..c66a233 100644
+--- a/data/meson.build
++++ b/data/meson.build
+@@ -1,6 +1,6 @@
+ subdir('icons')
+ 
+-desktop_file = i18n.merge_file('desktop-file',
++desktop_file = i18n.merge_file(
+   type: 'desktop',
+   input: '@0@.desktop.in'.format(application_id),
+   output: '@0@.desktop'.format(application_id),
+@@ -20,7 +20,7 @@ if desktop_file_validate.found()
+   )
+ endif
+ 
+-appdata_file = i18n.merge_file('appdata-file',
++appdata_file = i18n.merge_file(
+   input: '@0@.appdata.xml.in'.format(application_id),
+   output: '@0@.appdata.xml'.format(application_id),
+   po_dir: join_paths(meson.source_root(), 'po'),
diff --git a/hotdoc/PKGBUILD b/hotdoc/PKGBUILD
index fd0e3f3a6d..c6b46f6119 100644
--- a/hotdoc/PKGBUILD
+++ b/hotdoc/PKGBUILD
@@ -34,7 +34,7 @@ build() {
 }
 
 check() {
-    cd ${pkgname}-${pkgver}/build/lib.linux-$CARCH-cpython-*
+    cd ${pkgname}-${pkgver}/build/lib.linux-`uname -m`-cpython-*
 
     python -m unittest
 }
diff --git a/hplip/PKGBUILD b/hplip/PKGBUILD
index de495dd3d5..b813b8019b 100644
--- a/hplip/PKGBUILD
+++ b/hplip/PKGBUILD
@@ -81,7 +81,7 @@ build() {
  ./configure --prefix=/usr \
              --enable-qt5 \
              --disable-qt4 \
-             --enable-hpcups-install \
+             --disable-hpcups-install \
              --enable-cups-drv-install \
              --disable-imageProcessor-build \
              --enable-pp-build #--help
diff --git a/hspell/PKGBUILD b/hspell/PKGBUILD
index 8124786ffc..3bee1f35f7 100644
--- a/hspell/PKGBUILD
+++ b/hspell/PKGBUILD
@@ -54,10 +54,10 @@ package_hunspell-he() {
   popd
 
 # Install webengine dictionaries   
-  install -d "$pkgdir"/usr/share/qt{,6}/qtwebengine_dictionaries/
-  for _file in "$pkgdir"/usr/share/hunspell/*.dic; do
-  _filename=$(basename $_file)
-    /usr/lib/qt6/qwebengine_convert_dict $_file "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic}
-  ln -rs "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} "$pkgdir"/usr/share/qt/qtwebengine_dictionaries/
-  done
+#  install -d "$pkgdir"/usr/share/qt{,6}/qtwebengine_dictionaries/
+#  for _file in "$pkgdir"/usr/share/hunspell/*.dic; do
+#  _filename=$(basename $_file)
+#    /usr/lib/qt6/qwebengine_convert_dict $_file "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic}
+#  ln -rs "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} "$pkgdir"/usr/share/qt/qtwebengine_dictionaries/
+#  done
 }
diff --git a/htmlcxx/PKGBUILD b/htmlcxx/PKGBUILD
index 71072d9c69..17f79e6ee8 100644
--- a/htmlcxx/PKGBUILD
+++ b/htmlcxx/PKGBUILD
@@ -11,8 +11,15 @@ url="http://gcc-libs.sourceforge.net/"
 license=('LGPL')
 depends=('glibc')
 provides=('htmlcxx' 'libhtmlcxx' 'libcss_parser')
-source=("https://sourceforge.net/projects/$pkgname/files/v$pkgver/$pkgname-$pkgver.tar.gz")
-sha256sums=('5d38f938cf4df9a298a5346af27195fffabfef9f460fc2a02233cbcfa8fc75c8')
+source=("https://sourceforge.net/projects/$pkgname/files/v$pkgver/$pkgname-$pkgver.tar.gz"
+        htmlcxx-la64.patch)
+sha256sums=('5d38f938cf4df9a298a5346af27195fffabfef9f460fc2a02233cbcfa8fc75c8'
+            '79994572157ce98aec93b6e2c3a3c3e93e3f8c848a12ca69cdce90399dcb9a5b')
+
+prepare() {
+  cd $pkgname-$pkgver
+  patch -p1 -i $srcdir/htmlcxx-la64.patch
+}
 
 build() {
   cd $pkgname-$pkgver
diff --git a/htmlcxx/htmlcxx-la64.patch b/htmlcxx/htmlcxx-la64.patch
new file mode 100644
index 0000000000..354479f5ea
--- /dev/null
+++ b/htmlcxx/htmlcxx-la64.patch
@@ -0,0 +1,26 @@
+Index: htmlcxx-0.87/html/CharsetConverter.h
+===================================================================
+--- htmlcxx-0.87.orig/html/CharsetConverter.h
++++ htmlcxx-0.87/html/CharsetConverter.h
+@@ -17,7 +17,7 @@ namespace htmlcxx
+ 						: std::runtime_error(arg) {}
+ 			};
+ 			
+-			CharsetConverter(const std::string &from, const std::string &to) throw (Exception);
++			CharsetConverter(const std::string &from, const std::string &to) noexcept(false);
+ 			~CharsetConverter();
+ 			
+ 			std::string convert(const std::string &input);
+Index: htmlcxx-0.87/html/CharsetConverter.cc
+===================================================================
+--- htmlcxx-0.87.orig/html/CharsetConverter.cc
++++ htmlcxx-0.87/html/CharsetConverter.cc
+@@ -7,7 +7,7 @@
+ using namespace std;
+ using namespace htmlcxx;
+ 
+-CharsetConverter::CharsetConverter(const string &from, const string &to) throw (Exception)
++CharsetConverter::CharsetConverter(const string &from, const string &to) noexcept(false)
+ {
+ 	mIconvDescriptor = iconv_open(to.c_str(), from.c_str());
+ 	if (mIconvDescriptor == (iconv_t)(-1))
diff --git a/htmlq/PKGBUILD b/htmlq/PKGBUILD
index f4b0106a64..3ffb62a0e1 100644
--- a/htmlq/PKGBUILD
+++ b/htmlq/PKGBUILD
@@ -23,7 +23,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/httplz/PKGBUILD b/httplz/PKGBUILD
index 05f8bc6407..21c8297f08 100644
--- a/httplz/PKGBUILD
+++ b/httplz/PKGBUILD
@@ -21,7 +21,7 @@ prepare() {
   cp Cargo.lock "${_pkgname}-${pkgver}"
   # fetch dependencies
   cd "${_pkgname}-${pkgver}"
-  cargo fetch --locked --target="${CARCH}-unknown-linux-gnu"
+  cargo fetch --locked --target="`uname -m`-unknown-linux-gnu"
   # rename man page
   mv "${_pkgname}.md" "${pkgname}.md"
   mkdir man
diff --git a/hub/PKGBUILD b/hub/PKGBUILD
index 7248466c06..84259f0d5b 100644
--- a/hub/PKGBUILD
+++ b/hub/PKGBUILD
@@ -18,13 +18,16 @@ makedepends=('go')
 source=("hub-$pkgver.tar.gz::https://github.com/github/hub/archive/v$pkgver.tar.gz"
         "fix-tests.patch::https://github.com/github/hub/commit/f1170f982c414ec53ebf35ad3b250226ec18a952.patch")
 sha256sums=('e19e0fdfd1c69c401e1c24dd2d4ecf3fd9044aa4bd3f8d6fd942ed1b2b2ad21a'
-            'a7d5ab7ea437353b818f4934987455ae9b0d883722015761fac4fe48fd3b5256')
+            'fb0213f5bec7a64d9d0f02e7fbe6acb99fdfc3fee5ce0bdc5a57900c36d68f31')
 b2sums=('b0ff7bcebca7f4b515acf412b756da8512dad7c89f1976cc749c68d70303ff7172b6817e92c839c1864b55ac137e67cf1c99fa07e09b01fd76ad3d40be8d2163'
-        '2fe3848bae8c26d56f8a9fb0fa81902931f70b7ddf7ef31ed0f141c7b1e631561616b38f904a712d5de4d258b4c29bb0758f22763017959849c2f183eb0062d2')
+        'fd54c571e0f41a42318c83341f1abd8cf7d0649a33f0a3399b9b5f6f72d1d37b0980bc4f0631b0f4031cdee95217b00693447006b1fc266d66311a552a85c79a')
 
 prepare() {
     cd "${pkgname}-${pkgver}"
     patch -Np1 < "$srcdir/fix-tests.patch"
+    cd $srcdir
+    [ -d sys ] || git clone https://github.com/golang/sys.git
+    [ -d net ] || git clone https://github.com/golang/net.git
 }
 
 build() {
@@ -34,7 +37,12 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export CGO_LDFLAGS="${LDFLAGS}"
-  export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw"
+  export GOFLAGS="-trimpath -mod=readonly -modcacherw"
+
+  rm -rf vendor/golang.org/x/sys/
+  rm -rf vendor/golang.org/x/net/
+  cp -r $srcdir/sys vendor/golang.org/x/sys/
+  cp -r $srcdir/net vendor/golang.org/x/net/
 
   make
   make man-pages
diff --git a/hugo/PKGBUILD b/hugo/PKGBUILD
index f2c5181106..e5281757ea 100644
--- a/hugo/PKGBUILD
+++ b/hugo/PKGBUILD
@@ -24,7 +24,7 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export CGO_LDFLAGS="${LDFLAGS}"
-  export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw"
+  export GOFLAGS="-trimpath -mod=readonly -modcacherw"
   go build -tags extended
 
   ./hugo gen man
diff --git a/hypercorn/PKGBUILD b/hypercorn/PKGBUILD
index a8ed27bf8e..6701bb54ca 100644
--- a/hypercorn/PKGBUILD
+++ b/hypercorn/PKGBUILD
@@ -1,4 +1,5 @@
 # Maintainer: Maxime Gauduin <alucryd@archlinux.org>
+export CHECKFUNC=1
 
 pkgname=hypercorn
 pkgver=0.16.0
diff --git a/hyperfine/PKGBUILD b/hyperfine/PKGBUILD
index 9847fb7b32..009b9ce4df 100644
--- a/hyperfine/PKGBUILD
+++ b/hyperfine/PKGBUILD
@@ -21,7 +21,7 @@ sha256sums=('fea7b92922117ed04b9c84bb9998026264346768804f66baa40743c5528bed6b')
 
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/i3status-rust/PKGBUILD b/i3status-rust/PKGBUILD
index d634547f1c..42561cd2a4 100644
--- a/i3status-rust/PKGBUILD
+++ b/i3status-rust/PKGBUILD
@@ -25,7 +25,7 @@ b2sums=('94b79a356151284eec41262606c9824f928de0e4a167a468fa23bbc917dbd83097e298d
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 
diff --git a/iempluginsuite/PKGBUILD b/iempluginsuite/PKGBUILD
index bb30f3d146..78b3f6d60a 100644
--- a/iempluginsuite/PKGBUILD
+++ b/iempluginsuite/PKGBUILD
@@ -209,6 +209,6 @@ package_iempluginsuite-vst3() {
   )
 
   for name in "${_names[@]}"; do
-    install -vDm 755 build/$name/${name}_artefacts/None/VST3/$name.vst3/Contents/$CARCH-linux/$name.so -t "$pkgdir/usr/lib/vst3/$name.vst3/Contents/$CARCH-linux/"
+    install -vDm 755 build/$name/${name}_artefacts/None/VST3/$name.vst3/Contents/`uname -m`-linux/$name.so -t "$pkgdir/usr/lib/vst3/$name.vst3/Contents/`uname -m`-linux/"
   done
 }
diff --git a/igrep/PKGBUILD b/igrep/PKGBUILD
index ad59a1d03b..5f9d78c033 100644
--- a/igrep/PKGBUILD
+++ b/igrep/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/imagemagick/PKGBUILD b/imagemagick/PKGBUILD
index 7a38438586..b03e93833a 100644
--- a/imagemagick/PKGBUILD
+++ b/imagemagick/PKGBUILD
@@ -68,7 +68,7 @@ makedepends=(chrpath
 checkdepends=(gsfonts
               ttf-dejavu)
 replaces=(imagemagick-doc)
-source=(https://imagemagick.org/archive/$_tarname.tar.xz{,.asc}
+source=(https://imagemagick.org/archive/releases/$_tarname.tar.xz{,.asc}
         arch-fonts.diff)
 sha256sums=('ca23eb8f980cccbc9ea1e1490edd0fb33699fd2283790378049a505809a2f7d0'
             'SKIP'
@@ -102,7 +102,7 @@ build() {
     --enable-opencl \
     --without-gslib \
     --with-djvu \
-    --with-fftw \
+    --without-fftw \
     --with-jxl \
     --with-lqr \
     --with-modules \
diff --git a/imlib2/PKGBUILD b/imlib2/PKGBUILD
index 83127fb671..2f52a13b07 100644
--- a/imlib2/PKGBUILD
+++ b/imlib2/PKGBUILD
@@ -31,8 +31,7 @@ build() {
   ./configure \
     --prefix=/usr \
     --sysconfdir=/etc/imlib2 \
-    --x-libraries=/usr/lib \
-    --enable-amd64
+    --x-libraries=/usr/lib
   make
 }
 
diff --git a/inkscape/PKGBUILD b/inkscape/PKGBUILD
index d1672155c0..9e2bdcc5b8 100644
--- a/inkscape/PKGBUILD
+++ b/inkscape/PKGBUILD
@@ -11,6 +11,7 @@ url='https://inkscape.org/'
 license=('GPL' 'LGPL')
 arch=('loong64' 'x86_64')
 makedepends=('cmake' 'boost' 'git')
+#makedepends=('cmake' 'boost' 'git' 'libsoup')
 
 depends=(
   'atkmm'
diff --git a/ipmitool/PKGBUILD b/ipmitool/PKGBUILD
index a965fd02ca..5e972e99d6 100644
--- a/ipmitool/PKGBUILD
+++ b/ipmitool/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=ipmitool
 pkgver=1.8.19
-pkgrel=2
+pkgrel=3
 pkgdesc="Command-line interface to IPMI-enabled devices"
 arch=('loong64' 'x86_64')
 url="https://github.com/ipmitool/ipmitool"
@@ -31,4 +31,5 @@ package(){
 
 	# Install license
 	install -Dm644 COPYING "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE"
+	curl https://www.iana.org/assignments/enterprise-numbers.txt > $pkgdir/usr/share/misc/enterprise-numbers
 }
diff --git a/ipp-usb/PKGBUILD b/ipp-usb/PKGBUILD
index e2b35b0a69..823dac9123 100644
--- a/ipp-usb/PKGBUILD
+++ b/ipp-usb/PKGBUILD
@@ -31,7 +31,8 @@ build() {
 	export CGO_CFLAGS="${CFLAGS}"
 	export CGO_CXXFLAGS="${CXXFLAGS}"
 	export CGO_LDFLAGS="${LDFLAGS}"
-	export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw"
+    export GOPROXY=https://goproxy.cn
+	export GOFLAGS="-trimpath -mod=readonly -modcacherw"
 
 	#go build -o "$pkgname" .
 	make
diff --git a/ipxe/PKGBUILD b/ipxe/PKGBUILD
index fe05b8d277..6e01850c06 100644
--- a/ipxe/PKGBUILD
+++ b/ipxe/PKGBUILD
@@ -2,12 +2,13 @@
 
 pkgname=ipxe
 pkgver=1.21.1
+_commit=c1834f323f4f6b9b46cd5895b1457a117381363f
 pkgrel=5
 pkgdesc="Network bootloader"
 arch=(loong64 x86_64)
 url="https://ipxe.org"
 license=(GPL2 custom:UBDL)
-makedepends=(cdrtools)
+makedepends=(git cdrtools)
 optdepends=(
   'bash: for run_ipxe'
   'qemu-desktop: for run_ipxe'
@@ -18,8 +19,7 @@ optdepends=(
 # the code signing setup is described in Arch Linux's releng repository:
 # https://gitlab.archlinux.org/archlinux/releng/-/blob/master/README.rst#code-signing
 source=(
-  $pkgname-$pkgver.tar.gz::https://github.com/$pkgname/$pkgname/archive/refs/tags/v$pkgver.tar.gz
-  $pkgname-1.21.1-fragmented_handshake.patch::https://github.com/ipxe/ipxe/pull/116/commits/ca9f5fc5645c60c00c3ca232d2a492aa1eb29c58.patch
+  git+https://github.com/$pkgname/$pkgname.git#commit=${_commit}
   arch.ipxe
   isrgrootx1.pem
   lets-encrypt-r3.pem
@@ -27,42 +27,42 @@ source=(
   remote.ipxe
   general.h
   run_$pkgname
+  ipxe-la64.patch
 )
-sha512sums=('47400975110ed4ab95835aa1b7c8d5a6917c19c5713c6ab88bc0741a3adcd62245a9c4251d1f46fffc45289c6b18bf893f86dbc3b67d3189c41b7f198367ecaa'
-            '7b021b5720ddf71d3162d2d326a05e4d883562d91effce92a8c90368e69424ccf581d2d3bf6c5e1517e3b6cc5e4ab5edfdcd41c36368488b6d357d2fd00f63b0'
-            'ec41e20333ce91b555d4f6a64f211323315a183466d8437404dc548287b96cc8aa4d2953bb5a496677f77e73b7b99752dc973688ade0ccab842fabb8f6127f47'
+sha512sums=('SKIP'
+            'baa5de9f2714d626041455f7d6764b3ed7a8d6a375bd7721312a5be3ccab93764e1f72d349d404196badf1e751435cc3f7f61800fd643e2035f9616be1770a00'
             'b819e7965412dbeecc6417b1e57356d9b10e8e2feb0db1165947e7e1b7d882de226afb8457475f5107393a981d902c7f405500cadb6f61bd2acbca5d8c7cc1f4'
             '7ff2a6b6501b30806e19446d569db0348c0457c15a9c86f186d957607278ee3cbeedd8307e1ff6dc5c0740545192eada7c0f84cdeb8ff39e6b85bd3fc400a914'
             'e3a8c74dcf95cb4b77ed379d2185ef56b6ab2f4c7bdaf5a68876d21aca4d7961b0d8090da7132c6f1797bdca24014dfea032129ee207282797b91e31b6dc4d48'
             '9162f528cd0080b9231785795f08d3229c52ce3c18ca5a6efcfbea5028e103a294ddef79a0f28ab64b8d0cdcb9e6cdd7fee797766ad2c3d1dbc3891ddeb4b553'
-            '080b5b7f1a02d6e3a4691e0e65f12a554ede2a783284357f4ef940eb506fec7ec477dc3060c67cf31999af99eba26b0bfa1495cb2a5baa5af4c133bdca2152af'
-            '4f026baf7d30ef33b660530001b3bcf8189a7d1a11603ccb126957d07070283907c8207dad912ff4c735b8a0376c8a5383fef2235ac3b71ef519d7201c079b93')
-b2sums=('03871b5f89c6228a9082bb89c7b102d85e5f3afcd5fe0d93762e220fe162c9c3037a9918f30251fd103835d949335f99109a12559f560a5b686e65a7c24c6501'
-        '2c1ef1e1ffd1716e29e046ae4bc69e8b98f9116c1cb3d6e2e10a9119256194ea4fd510a9d4bf79b96504fb95f6bef0b2edce9b257d8d360224dfe1ce6029025c'
-        '13f73fbd49867a087cbb036562f067ee30e3a3718402363fd6c6d318bb819dde5728510c1459d7bb5906bec37469b2046a2ad148175b6ea4fb58ce68ee614d91'
+            '2522b1a76a466aa0d396d4616de38929ca3198e218f763545220a14f66127618bce2d46179999fd697e1d0f0a585ca1e58347b3a7dd5795331c395e1e5972788'
+            '4f026baf7d30ef33b660530001b3bcf8189a7d1a11603ccb126957d07070283907c8207dad912ff4c735b8a0376c8a5383fef2235ac3b71ef519d7201c079b93'
+            '4a74676c26f286811852566a7edf0b3399fd8165550848dd004c7b28bb9bf06990f4f232623ae5e2ba2edcd27b88d422225c0687d883551405e905aad7ce98a8')
+b2sums=('SKIP'
+        '294a510a4ca0d80fcaa2b67f9083ca91ae17270f73bee35728a6c42519599f5d60896d4e279a794a8a0237de3e1a751356d670fb722b6507057303c0f1efec7f'
         '6d02d871afa45caaa2b22ea2ed48217012aeeb61c50b28e82cc0750344719bdb9ef4b0100abc524b12ec6cb2b1c0084f4d24ce480af87b52aa39d4d3714467ca'
         '44fc45af926d8c0a563b81640764a4ced266f857c72113839dcd5d441c030bb6f78576b04fcbd8b17f645ed4e2701a4634e55755f13210fa880f442ad6fbb5b1'
         'a61f76a2ecbf344bb26e064146e4c6821ee195c7b7579cbf8c61d60ded3c3946d53329a8c2e795435ef5498bec97042472f186c13b4e0dc274da34d047f8f326'
         'f38eec3584967f9a8d4f9f2cc39803de9fa21fd1406efe802c3422f6de30c79e4cd679e775a886f778a40aacb81b9c4120d7205178284cacf69fa7d43557a906'
-        'a69a2dabf23b931aa062d20936510eda6bc9d6a61cded4b5e5960958b2a06642d527bb788b3fae9961dbf5d2ac18c63a6df69db52668cf904b75bd7366117b9b'
-        '9c7a8eb0f9aafdc336d7eac984b6f1fcbb875d1589fb4b67f45393054f66e916c1157e1bb4e8d02af68e6438dff68a812e57bbf685a0b477634891e49c1c3284')
+        'cdcb27a945397e7a8ef5214a31b69c4120ce1608359e0b11bbdb191169d4e10404953dc42bf9351f75beec0d8c6727bc0d053978026e50f145ef0881dae91bc1'
+        '9c7a8eb0f9aafdc336d7eac984b6f1fcbb875d1589fb4b67f45393054f66e916c1157e1bb4e8d02af68e6438dff68a812e57bbf685a0b477634891e49c1c3284'
+        'a044ef24fe2de06ce371f6f8c6b9eeb736d41057f1190c5eb93fd9d91374631bd68502112492d72cf020520ecb94bf3077f163fcf3c005ad286fea979591cf8b')
 
 prepare() {
   # fix issues with fragmented handshakes (e.g. fullchain.pem when using a letsencrypt certificate):
   # https://github.com/ipxe/ipxe/issues/407
-  patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-1.21.1-fragmented_handshake.patch
+#patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-1.21.1-fragmented_handshake.patch
+  patch -Np1 -d $pkgname -i ../ipxe-la64.patch
+  ln -s $pkgname $pkgname-$pkgver
   # symlink header with custom configuration into place
-  ln -sv ../../../../general.h $pkgname-$pkgver/src/config/local/
+  ln -sv ../../../../general.h $pkgname/src/config/local/
 }
 
 build() {
   local _file _certs=""
   local _options=(
     NO_WERROR=1
-    bin/ipxe.lkrn
-    bin/ipxe.pxe
-    bin-i386-efi/ipxe.efi
-    bin-x86_64-efi/ipxe.efi
+    bin-loong64-efi/ipxe.efi
     -C src
   )
 
@@ -80,19 +80,13 @@ build() {
   make EMBED="$srcdir/arch.ipxe" CERT="$_certs" TRUST="$_certs" "${_options[@]}"
 
   # move binaries out of the way
-  mv -v src/bin/ipxe{,-arch}.lkrn
-  mv -v src/bin/ipxe{,-arch}.pxe
-  mv -v src/bin-i386-efi/ipxe{,-arch}.efi
-  mv -v src/bin-x86_64-efi/ipxe{,-arch}.efi
+  mv -v src/bin-loong64-efi/ipxe{,-arch}.efi
 
   # build remote images
   make EMBED="$srcdir/remote.ipxe" "${_options[@]}"
 
   # move binaries out of the way
-  mv -v src/bin/ipxe{,-remote}.lkrn
-  mv -v src/bin/ipxe{,-remote}.pxe
-  mv -v src/bin-i386-efi/ipxe{,-remote}.efi
-  mv -v src/bin-x86_64-efi/ipxe{,-remote}.efi
+  mv -v src/bin-loong64-efi/ipxe{,-remote}.efi
 
   # build default images
   make "${_options[@]}"
@@ -103,8 +97,8 @@ package() {
   local _arch
 
   cd $pkgname-$pkgver
-  install -vDm 644 src/bin/ipxe{,-arch,-remote}.{lkrn,pxe} -t "$pkgdir/usr/share/$pkgname/"
-  for _arch in i386 x86_64; do
+#install -vDm 644 src/bin/ipxe{,-arch,-remote}.{lkrn,pxe} -t "$pkgdir/usr/share/$pkgname/"
+  for _arch in loong64; do
     install -vDm 644 src/bin-$_arch-efi/ipxe{,-arch,-remote}.efi -t "$pkgdir/usr/share/$pkgname/$_arch/"
   done
   install -vDm 644 COPYING.UBDL -t "$pkgdir/usr/share/licenses/$pkgname/"
diff --git a/ipxe/arch.ipxe b/ipxe/arch.ipxe
index 929ed800d8..788d8fee0a 100644
--- a/ipxe/arch.ipxe
+++ b/ipxe/arch.ipxe
@@ -1,4 +1,43 @@
 #!ipxe
-ifconf
-ntp pool.ntp.org
-chain https://ipxe.archlinux.org/releng/netboot/archlinux.ipxe || shell
+#
+
+:netconfig
+dhcp || goto ipxeshell
+
+:prompt
+prompt --key 0x02 --timeout 3000 Press Ctrl-B for the iPXE command line... && goto ipxeshell ||
+
+# We call the default.ipxe script to show and confirm the ipxe client has enough feature.
+set next-server-port 80
+chain http://${next-server}:${next-server-port}/default.ipxe ||
+
+echo
+echo Failed to boot to default menu.
+echo Try to reload iPXE from server.
+echo
+
+:netboot
+chain http://58.49.29.194/loongarch/archlinux/netboot/archlinux.ipxe && set server http://58.49.29.194/loongarch/archlinux/ && goto exit ||
+
+:reload
+chain http://${gateway}/ipxe.efi && set server http://${gateway} && goto netconfig ||
+
+:ipxeshell
+echo
+echo mac...............: ${mac}
+echo ip................: ${ip}
+echo netmask...........: ${netmask}
+echo gateway...........: ${gateway}
+echo dns...............: ${dns}
+echo domain............: ${domain}
+echo dhcp-server.......: ${dhcp-server}
+echo filename..........: ${filename}
+echo next-server.......: ${next-server}
+echo hostname..........: ${hostname}
+echo uuid..............: ${uuid}
+echo serial............: ${serial}
+echo
+shell
+
+:exit
+exit
diff --git a/ipxe/general.h b/ipxe/general.h
index 03c4458784..f03b49b8d4 100644
--- a/ipxe/general.h
+++ b/ipxe/general.h
@@ -4,7 +4,7 @@
 #undef CRYPTO_80211_WPA  /* WPA Personal, authenticating with passphrase */
 
 // enable additional options
-#define NET_PROTO_IPV6  /* IPv6 protocol */
+//#define NET_PROTO_IPV6  /* IPv6 protocol */
 #define DOWNLOAD_PROTO_HTTPS  /* Secure Hypertext Transfer Protocol */
 #define DOWNLOAD_PROTO_NFS  /* Network File System Protocol */
 #define IMAGE_TRUST_CMD /* Image trust management commands */
diff --git a/ipxe/ipxe-la64.patch b/ipxe/ipxe-la64.patch
new file mode 100644
index 0000000000..ca92c0d582
--- /dev/null
+++ b/ipxe/ipxe-la64.patch
@@ -0,0 +1,15 @@
+diff --git a/src/arch/loong64/Makefile b/src/arch/loong64/Makefile
+index fd0bf137..ce2a7505 100644
+--- a/src/arch/loong64/Makefile
++++ b/src/arch/loong64/Makefile
+@@ -13,6 +13,10 @@ ifeq ($(CCTYPE),gcc)
+ MNER_TEST = $(CC) -mno-explicit-relocs -x c -c /dev/null -o /dev/null >/dev/null 2>&1
+ MNER_FLAGS := $(shell $(MNER_TEST) && $(ECHO) '-mno-explicit-relocs')
+ WORKAROUND_CFLAGS += $(MNER_FLAGS)
++
++MNRX_TEST = $(CC) -mno-relax -x c -c /dev/null -o /dev/null >/dev/null 2>&1
++MNRX_FLAGS := $(shell $(MNRX_TEST) && $(ECHO) '-mno-relax' || $(ECHO) '-Wa,-mno-relax' )
++WORKAROUND_CFLAGS += $(MNRX_FLAGS)
+ endif
+ 
+ # EFI requires -fshort-wchar, and nothing else currently uses wchar_t
diff --git a/ispc/PKGBUILD b/ispc/PKGBUILD
index dcd0ddddb1..cef96870bd 100644
--- a/ispc/PKGBUILD
+++ b/ispc/PKGBUILD
@@ -11,7 +11,7 @@ arch=(loong64 x86_64)
 url="https://ispc.github.io/"
 license=(BSD)
 depends=(clang gcc-libs glibc llvm-libs spirv-llvm-translator onetbb)
-makedepends=(cmake git level-zero-headers level-zero-loader lib32-glibc llvm openmp python vc-intrinsics)
+makedepends=(cmake git level-zero-headers level-zero-loader llvm openmp python vc-intrinsics)
 checkdepends=(intel-compute-runtime)
 optdepends=(
     'intel-compute-runtime: GPU support'
@@ -55,6 +55,11 @@ build() {
     -S $pkgname
     -W no-dev
   )
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   cmake "${cmake_options[@]}"
   cmake --build build --verbose
diff --git a/jack2/PKGBUILD b/jack2/PKGBUILD
index 2d7d4be97c..ffd838bfe5 100644
--- a/jack2/PKGBUILD
+++ b/jack2/PKGBUILD
@@ -9,7 +9,7 @@ pkgname=(jack2 jack2-dbus jack2-docs)
 pkgdesc="The JACK low-latency audio server"
 pkgver=1.9.22
 _commit=80149e552b56d6d57d754dc04d119b8170d27313  # refs/tags/v1.9.22
-pkgrel=1
+pkgrel=4
 arch=(loong64 x86_64)
 url="https://github.com/jackaudio/jack2"
 license=(GPL2)
diff --git a/java-openjdk/freedesktop-java.desktop b/java-openjdk/freedesktop-java.desktop
deleted file mode 100644
index 78a4a552c9..0000000000
--- a/java-openjdk/freedesktop-java.desktop
+++ /dev/null
@@ -1,12 +0,0 @@
-[Desktop Entry]
-Name=OpenJDK Java 21 Runtime
-Name[fi]=OpenJDK Java 21 - ajonaikainen ympäristö
-Comment=OpenJDK Java 21 Runtime
-Comment[fi]=OpenJDK Java 21 - ajonaikainen ympäristö
-Keywords=java;runtime
-Exec=/usr/lib/jvm/java-21-openjdk/bin/java -jar
-Terminal=false
-Type=Application
-Icon=java21-openjdk
-MimeType=application/x-java-archive;application/java-archive;application/x-jar;
-NoDisplay=true
diff --git a/java-openjdk/freedesktop-jconsole.desktop b/java-openjdk/freedesktop-jconsole.desktop
deleted file mode 100644
index dc76c0f041..0000000000
--- a/java-openjdk/freedesktop-jconsole.desktop
+++ /dev/null
@@ -1,11 +0,0 @@
-[Desktop Entry]
-Name=OpenJDK Java 21 Console
-Name[fi]=OpenJDK Java 21 - konsoli
-Comment=OpenJDK Java 21 Monitoring & Management Console
-Comment[fi]=OpenJDK Java 21 - valvonta- ja hallintakonsoli
-Keywords=java;console;monitoring
-Exec=/usr/lib/jvm/java-21-openjdk/bin/jconsole
-Terminal=false
-Type=Application
-Icon=java21-openjdk
-Categories=Application;System;
diff --git a/java-openjdk/freedesktop-jshell.desktop b/java-openjdk/freedesktop-jshell.desktop
deleted file mode 100644
index bce0aa6157..0000000000
--- a/java-openjdk/freedesktop-jshell.desktop
+++ /dev/null
@@ -1,9 +0,0 @@
-[Desktop Entry]
-Name=OpenJDK Java 21 Shell
-Comment=OpenJDK Java 21 Shell
-Keywords=java;shell
-Exec=/usr/lib/jvm/java-21-openjdk/bin/jshell
-Terminal=true
-Type=Application
-Icon=java21-openjdk
-Categories=Application;System;
diff --git a/java-openjdk/install_jdk-openjdk.sh b/java-openjdk/install_jdk-openjdk.sh
deleted file mode 100644
index e97f91fd81..0000000000
--- a/java-openjdk/install_jdk-openjdk.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-THIS_JDK='java-21-openjdk'
-
-fix_default() {
-  if [ ! -x /usr/bin/java ]; then
-    /usr/bin/archlinux-java unset
-    echo ""
-  else
-    /usr/bin/archlinux-java get
-  fi
-}
-
-post_install() {
-  default=$(fix_default)
-  case ${default} in
-    "")
-      /usr/bin/archlinux-java set ${THIS_JDK}
-      ;;
-    ${THIS_JDK})
-      # Nothing
-      ;;
-    *)
-      echo "Default Java environment is already set to '${default}'"
-      echo "See 'archlinux-java help' to change it"
-      ;;
-  esac
-
-  if [ ! -f /etc/ssl/certs/java/cacerts ]; then
-    /usr/bin/update-ca-trust
-  fi
-}
-
-post_upgrade() {
-  default=$(fix_default)
-  if [ -z "${default}" ]; then
-    /usr/bin/archlinux-java set ${THIS_JDK}
-  fi
-
-  if [ ! -f /etc/ssl/certs/java/cacerts ]; then
-    /usr/bin/update-ca-trust
-  fi
-}
-
-pre_remove() {
-  if [ "x$(fix_default)" = "x${THIS_JDK}" ]; then
-    # Check JRE is still available
-    if [ -x /usr/lib/jvm/${THIS_JDK}/bin/java ]; then
-      /usr/bin/archlinux-java unset
-    fi
-  fi
-}
diff --git a/java-openjdk/install_jre-openjdk-headless.sh b/java-openjdk/install_jre-openjdk-headless.sh
deleted file mode 100644
index 72c3bb17d9..0000000000
--- a/java-openjdk/install_jre-openjdk-headless.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-THIS_JRE='java-21-openjdk'
-
-fix_default() {
-  if [ ! -x /usr/bin/java ]; then
-    /usr/bin/archlinux-java unset
-    echo ""
-  else
-    /usr/bin/archlinux-java get
-  fi
-}
-
-post_install() {
-  default=$(fix_default)
-  case ${default} in
-    "")
-      /usr/bin/archlinux-java set ${THIS_JRE}
-      ;;
-    ${THIS_JRE})
-      # Nothing
-      ;;
-    *)
-      echo "Default Java environment is already set to '${default}'"
-      echo "See 'archlinux-java help' to change it"
-      ;;
-  esac
-
-  if [ ! -f /etc/ssl/certs/java/cacerts ]; then
-    /usr/bin/update-ca-trust
-  fi
-}
-
-post_upgrade() {
-  if [ -z "$(fix_default)" ]; then
-    /usr/bin/archlinux-java set ${THIS_JRE}
-  fi
-
-  if [ ! -f /etc/ssl/certs/java/cacerts ]; then
-    /usr/bin/update-ca-trust
-  fi
-}
-
-pre_remove() {
-  default=$(fix_default)
-  if [ "x${default}" = "x${THIS_JRE}" ]; then
-    /usr/bin/archlinux-java unset
-    echo "No Java environment is set as default anymore"
-  fi
-}
diff --git a/java-openjdk/install_jre-openjdk.sh b/java-openjdk/install_jre-openjdk.sh
deleted file mode 100644
index 9ffb19ee06..0000000000
--- a/java-openjdk/install_jre-openjdk.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-THIS_JRE='java-21-openjdk'
-
-fix_default() {
-  if [ ! -x /usr/bin/java ]; then
-    /usr/bin/archlinux-java unset
-    echo ""
-  else
-    /usr/bin/archlinux-java get
-  fi
-}
-
-post_install() {
-  default=$(fix_default)
-  case ${default} in
-    "")
-      /usr/bin/archlinux-java set ${THIS_JRE}
-      ;;
-    ${THIS_JRE})
-      # Nothing
-      ;;
-    *)
-      echo "Default Java environment is already set to '${default}'"
-      echo "See 'archlinux-java help' to change it"
-      ;;
-  esac
-
-  echo "when you use a non-reparenting window manager,"
-  echo "set _JAVA_AWT_WM_NONREPARENTING=1 in /etc/profile.d/jre.sh"
-}
-
-post_upgrade() {
-  if [ -z "$(fix_default)" ]; then
-    /usr/bin/archlinux-java set ${THIS_JRE}
-  fi
-}
diff --git a/java11-openjdk/PKGBUILD b/java11-openjdk/PKGBUILD
index e0ea8e7527..bbebf73c8b 100644
--- a/java11-openjdk/PKGBUILD
+++ b/java11-openjdk/PKGBUILD
@@ -24,23 +24,26 @@ _git_tag=jdk-${_majorver}.${_minorver}.${_securityver}+${_updatever}
 arch=('loong64' 'x86_64')
 url='https://openjdk.java.net/'
 license=('custom')
-makedepends=('java-environment>=10' 'java-environment<12' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11'
-             'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib' 'pandoc'
+makedepends=('jdk11-openjdk' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11'
+             'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib'
              'graphviz' 'freetype2' 'libjpeg-turbo' 'giflib' 'libpng' 'lcms2'
              'libnet' 'bash' 'harfbuzz' 'glibc' 'gcc-libs')
 options=(!lto)
 source=(https://github.com/openjdk/jdk${_majorver}u/archive/${_git_tag}.tar.gz
         freedesktop-java.desktop
         freedesktop-jconsole.desktop
-        freedesktop-jshell.desktop)
+        freedesktop-jshell.desktop
+        jdk11-11.0.20.1-la64.patch)
 sha256sums=('c24c8708244e78c4418ff8680ae2122b1b7ff9bc4d0bf3187d3579ba84c1b29d'
             '575587ad58dfa9908f046d307b9afc7b0b2eb20a1eb454f8fdbbd539ea7b3d01'
             '2f57b7c7dd671eabe9fa10c4f1283573e99d7f7c36eccd82c95b705979a2e8cb'
-            'f271618a8c2a892b554caf26857af41efdf0d8bcb95d57ce7ba535d6979e96da')
+            'f271618a8c2a892b554caf26857af41efdf0d8bcb95d57ce7ba535d6979e96da'
+            'c55c5c1a8fbc5721f4c1ebdfba6101c4283b1ce69c8b828d54f49bc5c6e8ccb7')
 
 case "${CARCH}" in
   x86_64) _JARCH='x86_64';;
   i686)   _JARCH='x86';;
+  loong64) _JARCH='loongarch64';;
 esac
 
 _jvmdir=/usr/lib/jvm/java-${_majorver}-openjdk
@@ -52,6 +55,12 @@ _nonheadless=(lib/libawt_xawt.so
               lib/libjsound.so
               lib/libsplashscreen.so)
 
+prepare() {
+  cd ${_jdkdir}
+
+  patch -Np1 -i "${srcdir}"/jdk11-11.0.20.1-la64.patch
+}
+
 build() {
   cd ${_jdkdir}
 
@@ -105,7 +114,7 @@ build() {
     ${NUM_PROC_OPT}
     #--disable-javac-server
 
-  make images legacy-jre-image docs
+  make images legacy-jre-image #docs
 
   # https://bugs.openjdk.java.net/browse/JDK-8173610
   find "../${_imgdir}" -iname '*.so' -exec chmod +x {} \;
@@ -275,7 +284,7 @@ package_openjdk11-doc() {
   provides=("openjdk${_majorver}-doc=${pkgver}-${pkgrel}")
 
   install -dm 755 "${pkgdir}/usr/share/doc"
-  cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}"
+#  cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}"
 
   install -dm 755 "${pkgdir}/usr/share/licenses"
   ln -s ${pkgbase} "${pkgdir}/usr/share/licenses/${pkgname}"
diff --git a/java11-openjdk/jdk11-11.0.20.1-la64.patch b/java11-openjdk/jdk11-11.0.20.1-la64.patch
new file mode 100644
index 0000000000..6a77e6b63b
--- /dev/null
+++ b/java11-openjdk/jdk11-11.0.20.1-la64.patch
@@ -0,0 +1,116875 @@
+diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
+index 46fb9b4219..c6d8b24fc4 100644
+--- a/make/CompileJavaModules.gmk
++++ b/make/CompileJavaModules.gmk
+@@ -430,6 +430,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca
+ 
+ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \
+@@ -437,6 +438,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \
+@@ -456,6 +458,7 @@ jdk.internal.vm.compiler_EXCLUDES += \
+     org.graalvm.compiler.api.directives.test \
+     org.graalvm.compiler.api.test \
+     org.graalvm.compiler.asm.aarch64.test \
++    org.graalvm.compiler.asm.loongarch64.test \
+     org.graalvm.compiler.asm.amd64.test \
+     org.graalvm.compiler.asm.sparc.test \
+     org.graalvm.compiler.asm.test \
+diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
+index a3e1e00b2c..22f479120b 100644
+--- a/make/autoconf/hotspot.m4
++++ b/make/autoconf/hotspot.m4
+@@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace"
+ # All valid JVM variants
+ VALID_JVM_VARIANTS="server client minimal core zero custom"
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ ###############################################################################
+ # Check if the specified JVM variant should be built. To be used in shell if
+ # constructs, like this:
+@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+     HOTSPOT_TARGET_CPU_ARCH=arm
+   fi
+ 
++  # Override hotspot cpu definitions for MIPS and LOONGARCH platforms
++  if test "x$OPENJDK_TARGET_CPU" = xmips64el && test "x$HOTSPOT_TARGET_CPU" != xzero; then
++    HOTSPOT_TARGET_CPU=mips_64
++    HOTSPOT_TARGET_CPU_ARCH=mips
++  elif test "x$OPENJDK_TARGET_CPU" = xloongarch64 && test "x$HOTSPOT_TARGET_CPU" != xzero; then
++    HOTSPOT_TARGET_CPU=loongarch_64
++    HOTSPOT_TARGET_CPU_ARCH=loongarch
++  fi
++
++  # Disable compiler1 on linux-mips and linux-loongarch
++  if ! (HOTSPOT_CHECK_JVM_FEATURE(compiler1)); then
++    AC_MSG_CHECKING([if compiler1 should be built, $JVM_FEATURES])
++    if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$HOTSPOT_TARGET_CPU_ARCH" = "xmips"; then
++      DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES compiler1"
++      AC_MSG_RESULT([no, platform not supported])
++    else
++      AC_MSG_RESULT([yes])
++    fi
++  fi
++
+   # Verify that dependencies are met for explicitly set features.
+   if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then
+     AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services'])
+@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+     JVM_FEATURES_jvmci=""
+     INCLUDE_JVMCI="false"
+   else
+-    # Only enable jvmci on x86_64, sparcv9 and aarch64
++    # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64
+     if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \
+        test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \
+-       test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then
++       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
++       test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then
+       AC_MSG_RESULT([yes])
+       JVM_FEATURES_jvmci="jvmci"
+       INCLUDE_JVMCI="true"
+diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
+index 26a58eb2ee..061952ee45 100644
+--- a/make/autoconf/platform.m4
++++ b/make/autoconf/platform.m4
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+ # Converts autoconf style CPU name to OpenJDK style, into
+ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
+@@ -554,6 +560,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU_DEFINE=PPC64
++  elif test "x$OPENJDK_$1_CPU" = xmips64; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xmips64el; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
++    HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
+diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+index fdd2c0ca3d..318191233a 100644
+--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+@@ -1123,7 +1123,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
+-
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1663,6 +1665,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+     __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+ 
+diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+index cebc1e410d..816226c068 100644
+--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
++++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ store(reg, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg = new_register(T_INT);
+   __ load(generate_address(base, disp, T_INT), reg, info);
+-  __ cmp(condition, reg, LIR_OprFact::intConst(c));
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg1 = new_register(T_INT);
+   __ load(generate_address(base, disp, type), reg1, info);
+-  __ cmp(condition, reg, reg1);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+ 
+diff --git a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp
+index ce75dc552a..74c4b7e556 100644
+--- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp
++++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp
+@@ -52,3 +52,24 @@ void LIR_Address::verify() const {
+          "wrong type for addresses");
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+index f0a7229aa1..29db21f975 100644
+--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
++++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+@@ -1150,6 +1150,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   __ b(*(op->label()), acond);
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -3082,6 +3085,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+   __ bind(*stub->continuation());
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ #ifdef ASSERT
+  // emit run-time assertion
+ void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
+index b05fc876f2..b3c1afe69a 100644
+--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
++++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
+@@ -423,18 +423,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ move(temp, addr);
+ }
+ 
+-
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info);
+-  __ cmp(condition, FrameMap::LR_opr, c);
++  __ cmp_branch(condition, FrameMap::LR_opr, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info);
+-  __ cmp(condition, reg, FrameMap::LR_opr);
++  __ cmp_branch(condition, reg, FrameMap::LR_opr, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff --git a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp
+index 806da32020..5305fe371e 100644
+--- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp
++++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp
+@@ -84,3 +84,24 @@ void LIR_Address::verify() const {
+ #endif // AARCH64
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp
+new file mode 100644
+index 0000000000..0412b99537
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::java_frame_sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp
+new file mode 100644
+index 0000000000..e6e62cccad
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp
+@@ -0,0 +1,849 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++void Assembler::ld_b(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_b(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_b(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_b(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_b(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_b(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_b(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_bu(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_bu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_bu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_bu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_bu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_bu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_bu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_d(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_d(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_d(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_d(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_d(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_d(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_d(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_d(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_h(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_h(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_h(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_h(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_h(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_h(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_h(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_hu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_hu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_hu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_hu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_hu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_hu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_hu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ll_w(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_w(rd, src.base(), src.disp());
++}
++
++void Assembler::ll_d(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_d(rd, src.base(), src.disp());
++}
++
++void Assembler::ld_w(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_w(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_w(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_w(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_w(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_w(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_w(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_w(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_w(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_wu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_wu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_wu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_wu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_wu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_wu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_wu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::st_b(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_b(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_b(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_b(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_b(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_b(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_b(src, base, AT);
++    }
++  }
++}
++
++void Assembler::sc_w(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_w(rd, dst.base(), dst.disp());
++}
++
++void Assembler::sc_d(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_d(rd, dst.base(), dst.disp());
++}
++
++void Assembler::st_d(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_d(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_d(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_d(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_d(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_d(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_d(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_d(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_d(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_h(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_h(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_h(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_h(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_h(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_h(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_h(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_w(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_w(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_w(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_w(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_w(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_w(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_w(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_w(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_w(src, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_s(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_d(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_d(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_s(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_d(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_d(fd, base, AT);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
+new file mode 100644
+index 0000000000..179da7bd0e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
+@@ -0,0 +1,2827 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  // 22-bit opcode, highest 22 bits: bits[31...10]
++  enum ops22 {
++    clo_w_op           = 0b0000000000000000000100,
++    clz_w_op           = 0b0000000000000000000101,
++    cto_w_op           = 0b0000000000000000000110,
++    ctz_w_op           = 0b0000000000000000000111,
++    clo_d_op           = 0b0000000000000000001000,
++    clz_d_op           = 0b0000000000000000001001,
++    cto_d_op           = 0b0000000000000000001010,
++    ctz_d_op           = 0b0000000000000000001011,
++    revb_2h_op         = 0b0000000000000000001100,
++    revb_4h_op         = 0b0000000000000000001101,
++    revb_2w_op         = 0b0000000000000000001110,
++    revb_d_op          = 0b0000000000000000001111,
++    revh_2w_op         = 0b0000000000000000010000,
++    revh_d_op          = 0b0000000000000000010001,
++    bitrev_4b_op       = 0b0000000000000000010010,
++    bitrev_8b_op       = 0b0000000000000000010011,
++    bitrev_w_op        = 0b0000000000000000010100,
++    bitrev_d_op        = 0b0000000000000000010101,
++    ext_w_h_op         = 0b0000000000000000010110,
++    ext_w_b_op         = 0b0000000000000000010111,
++    rdtimel_w_op       = 0b0000000000000000011000,
++    rdtimeh_w_op       = 0b0000000000000000011001,
++    rdtime_d_op        = 0b0000000000000000011010,
++    cpucfg_op          = 0b0000000000000000011011,
++    fabs_s_op          = 0b0000000100010100000001,
++    fabs_d_op          = 0b0000000100010100000010,
++    fneg_s_op          = 0b0000000100010100000101,
++    fneg_d_op          = 0b0000000100010100000110,
++    flogb_s_op         = 0b0000000100010100001001,
++    flogb_d_op         = 0b0000000100010100001010,
++    fclass_s_op        = 0b0000000100010100001101,
++    fclass_d_op        = 0b0000000100010100001110,
++    fsqrt_s_op         = 0b0000000100010100010001,
++    fsqrt_d_op         = 0b0000000100010100010010,
++    frecip_s_op        = 0b0000000100010100010101,
++    frecip_d_op        = 0b0000000100010100010110,
++    frsqrt_s_op        = 0b0000000100010100011001,
++    frsqrt_d_op        = 0b0000000100010100011010,
++    fmov_s_op          = 0b0000000100010100100101,
++    fmov_d_op          = 0b0000000100010100100110,
++    movgr2fr_w_op      = 0b0000000100010100101001,
++    movgr2fr_d_op      = 0b0000000100010100101010,
++    movgr2frh_w_op     = 0b0000000100010100101011,
++    movfr2gr_s_op      = 0b0000000100010100101101,
++    movfr2gr_d_op      = 0b0000000100010100101110,
++    movfrh2gr_s_op     = 0b0000000100010100101111,
++    movgr2fcsr_op      = 0b0000000100010100110000,
++    movfcsr2gr_op      = 0b0000000100010100110010,
++    movfr2cf_op        = 0b0000000100010100110100,
++    movcf2fr_op        = 0b0000000100010100110101,
++    movgr2cf_op        = 0b0000000100010100110110,
++    movcf2gr_op        = 0b0000000100010100110111,
++    fcvt_s_d_op        = 0b0000000100011001000110,
++    fcvt_d_s_op        = 0b0000000100011001001001,
++    ftintrm_w_s_op     = 0b0000000100011010000001,
++    ftintrm_w_d_op     = 0b0000000100011010000010,
++    ftintrm_l_s_op     = 0b0000000100011010001001,
++    ftintrm_l_d_op     = 0b0000000100011010001010,
++    ftintrp_w_s_op     = 0b0000000100011010010001,
++    ftintrp_w_d_op     = 0b0000000100011010010010,
++    ftintrp_l_s_op     = 0b0000000100011010011001,
++    ftintrp_l_d_op     = 0b0000000100011010011010,
++    ftintrz_w_s_op     = 0b0000000100011010100001,
++    ftintrz_w_d_op     = 0b0000000100011010100010,
++    ftintrz_l_s_op     = 0b0000000100011010101001,
++    ftintrz_l_d_op     = 0b0000000100011010101010,
++    ftintrne_w_s_op    = 0b0000000100011010110001,
++    ftintrne_w_d_op    = 0b0000000100011010110010,
++    ftintrne_l_s_op    = 0b0000000100011010111001,
++    ftintrne_l_d_op    = 0b0000000100011010111010,
++    ftint_w_s_op       = 0b0000000100011011000001,
++    ftint_w_d_op       = 0b0000000100011011000010,
++    ftint_l_s_op       = 0b0000000100011011001001,
++    ftint_l_d_op       = 0b0000000100011011001010,
++    ffint_s_w_op       = 0b0000000100011101000100,
++    ffint_s_l_op       = 0b0000000100011101000110,
++    ffint_d_w_op       = 0b0000000100011101001000,
++    ffint_d_l_op       = 0b0000000100011101001010,
++    frint_s_op         = 0b0000000100011110010001,
++    frint_d_op         = 0b0000000100011110010010,
++    iocsrrd_b_op       = 0b0000011001001000000000,
++    iocsrrd_h_op       = 0b0000011001001000000001,
++    iocsrrd_w_op       = 0b0000011001001000000010,
++    iocsrrd_d_op       = 0b0000011001001000000011,
++    iocsrwr_b_op       = 0b0000011001001000000100,
++    iocsrwr_h_op       = 0b0000011001001000000101,
++    iocsrwr_w_op       = 0b0000011001001000000110,
++    iocsrwr_d_op       = 0b0000011001001000000111,
++    vpcnt_b_op         = 0b0111001010011100001000,
++    vpcnt_h_op         = 0b0111001010011100001001,
++    vpcnt_w_op         = 0b0111001010011100001010,
++    vpcnt_d_op         = 0b0111001010011100001011,
++    vneg_b_op          = 0b0111001010011100001100,
++    vneg_h_op          = 0b0111001010011100001101,
++    vneg_w_op          = 0b0111001010011100001110,
++    vneg_d_op          = 0b0111001010011100001111,
++    vfclass_s_op       = 0b0111001010011100110101,
++    vfclass_d_op       = 0b0111001010011100110110,
++    vfsqrt_s_op        = 0b0111001010011100111001,
++    vfsqrt_d_op        = 0b0111001010011100111010,
++    vfrint_s_op        = 0b0111001010011101001101,
++    vfrint_d_op        = 0b0111001010011101001110,
++    vfrintrm_s_op      = 0b0111001010011101010001,
++    vfrintrm_d_op      = 0b0111001010011101010010,
++    vfrintrp_s_op      = 0b0111001010011101010101,
++    vfrintrp_d_op      = 0b0111001010011101010110,
++    vfrintrz_s_op      = 0b0111001010011101011001,
++    vfrintrz_d_op      = 0b0111001010011101011010,
++    vfrintrne_s_op     = 0b0111001010011101011101,
++    vfrintrne_d_op     = 0b0111001010011101011110,
++    vfcvtl_s_h_op      = 0b0111001010011101111010,
++    vfcvth_s_h_op      = 0b0111001010011101111011,
++    vfcvtl_d_s_op      = 0b0111001010011101111100,
++    vfcvth_d_s_op      = 0b0111001010011101111101,
++    vffint_s_w_op      = 0b0111001010011110000000,
++    vffint_s_wu_op     = 0b0111001010011110000001,
++    vffint_d_l_op      = 0b0111001010011110000010,
++    vffint_d_lu_op     = 0b0111001010011110000011,
++    vffintl_d_w_op     = 0b0111001010011110000100,
++    vffinth_d_w_op     = 0b0111001010011110000101,
++    vftint_w_s_op      = 0b0111001010011110001100,
++    vftint_l_d_op      = 0b0111001010011110001101,
++    vftintrm_w_s_op    = 0b0111001010011110001110,
++    vftintrm_l_d_op    = 0b0111001010011110001111,
++    vftintrp_w_s_op    = 0b0111001010011110010000,
++    vftintrp_l_d_op    = 0b0111001010011110010001,
++    vftintrz_w_s_op    = 0b0111001010011110010010,
++    vftintrz_l_d_op    = 0b0111001010011110010011,
++    vftintrne_w_s_op   = 0b0111001010011110010100,
++    vftintrne_l_d_op   = 0b0111001010011110010101,
++    vftint_wu_s        = 0b0111001010011110010110,
++    vftint_lu_d        = 0b0111001010011110010111,
++    vftintrz_wu_f      = 0b0111001010011110011100,
++    vftintrz_lu_d      = 0b0111001010011110011101,
++    vftintl_l_s_op     = 0b0111001010011110100000,
++    vftinth_l_s_op     = 0b0111001010011110100001,
++    vftintrml_l_s_op   = 0b0111001010011110100010,
++    vftintrmh_l_s_op   = 0b0111001010011110100011,
++    vftintrpl_l_s_op   = 0b0111001010011110100100,
++    vftintrph_l_s_op   = 0b0111001010011110100101,
++    vftintrzl_l_s_op   = 0b0111001010011110100110,
++    vftintrzh_l_s_op   = 0b0111001010011110100111,
++    vftintrnel_l_s_op  = 0b0111001010011110101000,
++    vftintrneh_l_s_op  = 0b0111001010011110101001,
++    vreplgr2vr_b_op    = 0b0111001010011111000000,
++    vreplgr2vr_h_op    = 0b0111001010011111000001,
++    vreplgr2vr_w_op    = 0b0111001010011111000010,
++    vreplgr2vr_d_op    = 0b0111001010011111000011,
++    xvpcnt_b_op        = 0b0111011010011100001000,
++    xvpcnt_h_op        = 0b0111011010011100001001,
++    xvpcnt_w_op        = 0b0111011010011100001010,
++    xvpcnt_d_op        = 0b0111011010011100001011,
++    xvneg_b_op         = 0b0111011010011100001100,
++    xvneg_h_op         = 0b0111011010011100001101,
++    xvneg_w_op         = 0b0111011010011100001110,
++    xvneg_d_op         = 0b0111011010011100001111,
++    xvfclass_s_op      = 0b0111011010011100110101,
++    xvfclass_d_op      = 0b0111011010011100110110,
++    xvfsqrt_s_op       = 0b0111011010011100111001,
++    xvfsqrt_d_op       = 0b0111011010011100111010,
++    xvfrint_s_op       = 0b0111011010011101001101,
++    xvfrint_d_op       = 0b0111011010011101001110,
++    xvfrintrm_s_op     = 0b0111011010011101010001,
++    xvfrintrm_d_op     = 0b0111011010011101010010,
++    xvfrintrp_s_op     = 0b0111011010011101010101,
++    xvfrintrp_d_op     = 0b0111011010011101010110,
++    xvfrintrz_s_op     = 0b0111011010011101011001,
++    xvfrintrz_d_op     = 0b0111011010011101011010,
++    xvfrintrne_s_op    = 0b0111011010011101011101,
++    xvfrintrne_d_op    = 0b0111011010011101011110,
++    xvfcvtl_s_h_op     = 0b0111011010011101111010,
++    xvfcvth_s_h_op     = 0b0111011010011101111011,
++    xvfcvtl_d_s_op     = 0b0111011010011101111100,
++    xvfcvth_d_s_op     = 0b0111011010011101111101,
++    xvffint_s_w_op     = 0b0111011010011110000000,
++    xvffint_s_wu_op    = 0b0111011010011110000001,
++    xvffint_d_l_op     = 0b0111011010011110000010,
++    xvffint_d_lu_op    = 0b0111011010011110000011,
++    xvffintl_d_w_op    = 0b0111011010011110000100,
++    xvffinth_d_w_op    = 0b0111011010011110000101,
++    xvftint_w_s_op     = 0b0111011010011110001100,
++    xvftint_l_d_op     = 0b0111011010011110001101,
++    xvftintrm_w_s_op   = 0b0111011010011110001110,
++    xvftintrm_l_d_op   = 0b0111011010011110001111,
++    xvftintrp_w_s_op   = 0b0111011010011110010000,
++    xvftintrp_l_d_op   = 0b0111011010011110010001,
++    xvftintrz_w_s_op   = 0b0111011010011110010010,
++    xvftintrz_l_d_op   = 0b0111011010011110010011,
++    xvftintrne_w_s_op  = 0b0111011010011110010100,
++    xvftintrne_l_d_op  = 0b0111011010011110010101,
++    xvftint_wu_s       = 0b0111011010011110010110,
++    xvftint_lu_d       = 0b0111011010011110010111,
++    xvftintrz_wu_f     = 0b0111011010011110011100,
++    xvftintrz_lu_d     = 0b0111011010011110011101,
++    xvftintl_l_s_op    = 0b0111011010011110100000,
++    xvftinth_l_s_op    = 0b0111011010011110100001,
++    xvftintrml_l_s_op  = 0b0111011010011110100010,
++    xvftintrmh_l_s_op  = 0b0111011010011110100011,
++    xvftintrpl_l_s_op  = 0b0111011010011110100100,
++    xvftintrph_l_s_op  = 0b0111011010011110100101,
++    xvftintrzl_l_s_op  = 0b0111011010011110100110,
++    xvftintrzh_l_s_op  = 0b0111011010011110100111,
++    xvftintrnel_l_s_op = 0b0111011010011110101000,
++    xvftintrneh_l_s_op = 0b0111011010011110101001,
++    xvreplgr2vr_b_op   = 0b0111011010011111000000,
++    xvreplgr2vr_h_op   = 0b0111011010011111000001,
++    xvreplgr2vr_w_op   = 0b0111011010011111000010,
++    xvreplgr2vr_d_op   = 0b0111011010011111000011,
++    vext2xv_h_b_op     = 0b0111011010011111000100,
++    vext2xv_w_b_op     = 0b0111011010011111000101,
++    vext2xv_d_b_op     = 0b0111011010011111000110,
++    vext2xv_w_h_op     = 0b0111011010011111000111,
++    vext2xv_d_h_op     = 0b0111011010011111001000,
++    vext2xv_d_w_op     = 0b0111011010011111001001,
++    vext2xv_hu_bu_op   = 0b0111011010011111001010,
++    vext2xv_wu_bu_op   = 0b0111011010011111001011,
++    vext2xv_du_bu_op   = 0b0111011010011111001100,
++    vext2xv_wu_hu_op   = 0b0111011010011111001101,
++    vext2xv_du_hu_op   = 0b0111011010011111001110,
++    vext2xv_du_wu_op   = 0b0111011010011111001111,
++    xvreplve0_b_op     = 0b0111011100000111000000,
++    xvreplve0_h_op     = 0b0111011100000111100000,
++    xvreplve0_w_op     = 0b0111011100000111110000,
++    xvreplve0_d_op     = 0b0111011100000111111000,
++    xvreplve0_q_op     = 0b0111011100000111111100,
++
++    unknow_ops22       = 0b1111111111111111111111
++  };
++
++  // 21-bit opcode, highest 21 bits: bits[31...11]
++  enum ops21 {
++    vinsgr2vr_d_op     = 0b011100101110101111110,
++    vpickve2gr_d_op    = 0b011100101110111111110,
++    vpickve2gr_du_op   = 0b011100101111001111110,
++    vreplvei_d_op      = 0b011100101111011111110,
++
++    unknow_ops21       = 0b111111111111111111111
++  };
++
++  // 20-bit opcode, highest 20 bits: bits[31...12]
++  enum ops20 {
++    vinsgr2vr_w_op     = 0b01110010111010111110,
++    vpickve2gr_w_op    = 0b01110010111011111110,
++    vpickve2gr_wu_op   = 0b01110010111100111110,
++    vreplvei_w_op      = 0b01110010111101111110,
++    xvinsgr2vr_d_op    = 0b01110110111010111110,
++    xvpickve2gr_d_op   = 0b01110110111011111110,
++    xvpickve2gr_du_op  = 0b01110110111100111110,
++    xvinsve0_d_op      = 0b01110110111111111110,
++    xvpickve_d_op      = 0b01110111000000111110,
++
++    unknow_ops20       = 0b11111111111111111111
++  };
++
++  // 19-bit opcode, highest 19 bits: bits[31...13]
++  enum ops19 {
++    vrotri_b_op        = 0b0111001010100000001,
++    vinsgr2vr_h_op     = 0b0111001011101011110,
++    vpickve2gr_h_op    = 0b0111001011101111110,
++    vpickve2gr_hu_op   = 0b0111001011110011110,
++    vreplvei_h_op      = 0b0111001011110111110,
++    vbitclri_b_op      = 0b0111001100010000001,
++    vbitseti_b_op      = 0b0111001100010100001,
++    vbitrevi_b_op      = 0b0111001100011000001,
++    vslli_b_op         = 0b0111001100101100001,
++    vsrli_b_op         = 0b0111001100110000001,
++    vsrai_b_op         = 0b0111001100110100001,
++    xvrotri_b_op       = 0b0111011010100000001,
++    xvinsgr2vr_w_op    = 0b0111011011101011110,
++    xvpickve2gr_w_op   = 0b0111011011101111110,
++    xvpickve2gr_wu_op  = 0b0111011011110011110,
++    xvinsve0_w_op      = 0b0111011011111111110,
++    xvpickve_w_op      = 0b0111011100000011110,
++    xvbitclri_b_op     = 0b0111011100010000001,
++    xvbitseti_b_op     = 0b0111011100010100001,
++    xvbitrevi_b_op     = 0b0111011100011000001,
++    xvslli_b_op        = 0b0111011100101100001,
++    xvsrli_b_op        = 0b0111011100110000001,
++    xvsrai_b_op        = 0b0111011100110100001,
++
++    unknow_ops19       = 0b1111111111111111111
++  };
++
++  // 18-bit opcode, highest 18 bits: bits[31...14]
++  enum ops18 {
++    vrotri_h_op        = 0b011100101010000001,
++    vinsgr2vr_b_op     = 0b011100101110101110,
++    vpickve2gr_b_op    = 0b011100101110111110,
++    vpickve2gr_bu_op   = 0b011100101111001110,
++    vreplvei_b_op      = 0b011100101111011110,
++    vbitclri_h_op      = 0b011100110001000001,
++    vbitseti_h_op      = 0b011100110001010001,
++    vbitrevi_h_op      = 0b011100110001100001,
++    vslli_h_op         = 0b011100110010110001,
++    vsrli_h_op         = 0b011100110011000001,
++    vsrai_h_op         = 0b011100110011010001,
++    vsrlni_b_h_op      = 0b011100110100000001,
++    xvrotri_h_op       = 0b011101101010000001,
++    xvbitclri_h_op     = 0b011101110001000001,
++    xvbitseti_h_op     = 0b011101110001010001,
++    xvbitrevi_h_op     = 0b011101110001100001,
++    xvslli_h_op        = 0b011101110010110001,
++    xvsrli_h_op        = 0b011101110011000001,
++    xvsrai_h_op        = 0b011101110011010001,
++
++    unknow_ops18       = 0b111111111111111111
++  };
++
++  // 17-bit opcode, highest 17 bits: bits[31...15]
++  enum ops17 {
++    asrtle_d_op        = 0b00000000000000010,
++    asrtgt_d_op        = 0b00000000000000011,
++    add_w_op           = 0b00000000000100000,
++    add_d_op           = 0b00000000000100001,
++    sub_w_op           = 0b00000000000100010,
++    sub_d_op           = 0b00000000000100011,
++    slt_op             = 0b00000000000100100,
++    sltu_op            = 0b00000000000100101,
++    maskeqz_op         = 0b00000000000100110,
++    masknez_op         = 0b00000000000100111,
++    nor_op             = 0b00000000000101000,
++    and_op             = 0b00000000000101001,
++    or_op              = 0b00000000000101010,
++    xor_op             = 0b00000000000101011,
++    orn_op             = 0b00000000000101100,
++    andn_op            = 0b00000000000101101,
++    sll_w_op           = 0b00000000000101110,
++    srl_w_op           = 0b00000000000101111,
++    sra_w_op           = 0b00000000000110000,
++    sll_d_op           = 0b00000000000110001,
++    srl_d_op           = 0b00000000000110010,
++    sra_d_op           = 0b00000000000110011,
++    rotr_w_op          = 0b00000000000110110,
++    rotr_d_op          = 0b00000000000110111,
++    mul_w_op           = 0b00000000000111000,
++    mulh_w_op          = 0b00000000000111001,
++    mulh_wu_op         = 0b00000000000111010,
++    mul_d_op           = 0b00000000000111011,
++    mulh_d_op          = 0b00000000000111100,
++    mulh_du_op         = 0b00000000000111101,
++    mulw_d_w_op        = 0b00000000000111110,
++    mulw_d_wu_op       = 0b00000000000111111,
++    div_w_op           = 0b00000000001000000,
++    mod_w_op           = 0b00000000001000001,
++    div_wu_op          = 0b00000000001000010,
++    mod_wu_op          = 0b00000000001000011,
++    div_d_op           = 0b00000000001000100,
++    mod_d_op           = 0b00000000001000101,
++    div_du_op          = 0b00000000001000110,
++    mod_du_op          = 0b00000000001000111,
++    crc_w_b_w_op       = 0b00000000001001000,
++    crc_w_h_w_op       = 0b00000000001001001,
++    crc_w_w_w_op       = 0b00000000001001010,
++    crc_w_d_w_op       = 0b00000000001001011,
++    crcc_w_b_w_op      = 0b00000000001001100,
++    crcc_w_h_w_op      = 0b00000000001001101,
++    crcc_w_w_w_op      = 0b00000000001001110,
++    crcc_w_d_w_op      = 0b00000000001001111,
++    break_op           = 0b00000000001010100,
++    fadd_s_op          = 0b00000001000000001,
++    fadd_d_op          = 0b00000001000000010,
++    fsub_s_op          = 0b00000001000000101,
++    fsub_d_op          = 0b00000001000000110,
++    fmul_s_op          = 0b00000001000001001,
++    fmul_d_op          = 0b00000001000001010,
++    fdiv_s_op          = 0b00000001000001101,
++    fdiv_d_op          = 0b00000001000001110,
++    fmax_s_op          = 0b00000001000010001,
++    fmax_d_op          = 0b00000001000010010,
++    fmin_s_op          = 0b00000001000010101,
++    fmin_d_op          = 0b00000001000010110,
++    fmaxa_s_op         = 0b00000001000011001,
++    fmaxa_d_op         = 0b00000001000011010,
++    fmina_s_op         = 0b00000001000011101,
++    fmina_d_op         = 0b00000001000011110,
++    fscaleb_s_op       = 0b00000001000100001,
++    fscaleb_d_op       = 0b00000001000100010,
++    fcopysign_s_op     = 0b00000001000100101,
++    fcopysign_d_op     = 0b00000001000100110,
++    ldx_b_op           = 0b00111000000000000,
++    ldx_h_op           = 0b00111000000001000,
++    ldx_w_op           = 0b00111000000010000,
++    ldx_d_op           = 0b00111000000011000,
++    stx_b_op           = 0b00111000000100000,
++    stx_h_op           = 0b00111000000101000,
++    stx_w_op           = 0b00111000000110000,
++    stx_d_op           = 0b00111000000111000,
++    ldx_bu_op          = 0b00111000001000000,
++    ldx_hu_op          = 0b00111000001001000,
++    ldx_wu_op          = 0b00111000001010000,
++    fldx_s_op          = 0b00111000001100000,
++    fldx_d_op          = 0b00111000001101000,
++    fstx_s_op          = 0b00111000001110000,
++    fstx_d_op          = 0b00111000001111000,
++    vldx_op            = 0b00111000010000000,
++    vstx_op            = 0b00111000010001000,
++    xvldx_op           = 0b00111000010010000,
++    xvstx_op           = 0b00111000010011000,
++    amswap_w_op        = 0b00111000011000000,
++    amswap_d_op        = 0b00111000011000001,
++    amadd_w_op         = 0b00111000011000010,
++    amadd_d_op         = 0b00111000011000011,
++    amand_w_op         = 0b00111000011000100,
++    amand_d_op         = 0b00111000011000101,
++    amor_w_op          = 0b00111000011000110,
++    amor_d_op          = 0b00111000011000111,
++    amxor_w_op         = 0b00111000011001000,
++    amxor_d_op         = 0b00111000011001001,
++    ammax_w_op         = 0b00111000011001010,
++    ammax_d_op         = 0b00111000011001011,
++    ammin_w_op         = 0b00111000011001100,
++    ammin_d_op         = 0b00111000011001101,
++    ammax_wu_op        = 0b00111000011001110,
++    ammax_du_op        = 0b00111000011001111,
++    ammin_wu_op        = 0b00111000011010000,
++    ammin_du_op        = 0b00111000011010001,
++    amswap_db_w_op     = 0b00111000011010010,
++    amswap_db_d_op     = 0b00111000011010011,
++    amadd_db_w_op      = 0b00111000011010100,
++    amadd_db_d_op      = 0b00111000011010101,
++    amand_db_w_op      = 0b00111000011010110,
++    amand_db_d_op      = 0b00111000011010111,
++    amor_db_w_op       = 0b00111000011011000,
++    amor_db_d_op       = 0b00111000011011001,
++    amxor_db_w_op      = 0b00111000011011010,
++    amxor_db_d_op      = 0b00111000011011011,
++    ammax_db_w_op      = 0b00111000011011100,
++    ammax_db_d_op      = 0b00111000011011101,
++    ammin_db_w_op      = 0b00111000011011110,
++    ammin_db_d_op      = 0b00111000011011111,
++    ammax_db_wu_op     = 0b00111000011100000,
++    ammax_db_du_op     = 0b00111000011100001,
++    ammin_db_wu_op     = 0b00111000011100010,
++    ammin_db_du_op     = 0b00111000011100011,
++    dbar_op            = 0b00111000011100100,
++    ibar_op            = 0b00111000011100101,
++    fldgt_s_op         = 0b00111000011101000,
++    fldgt_d_op         = 0b00111000011101001,
++    fldle_s_op         = 0b00111000011101010,
++    fldle_d_op         = 0b00111000011101011,
++    fstgt_s_op         = 0b00111000011101100,
++    fstgt_d_op         = 0b00111000011101101,
++    fstle_s_op         = 0b00111000011101110,
++    fstle_d_op         = 0b00111000011101111,
++    ldgt_b_op          = 0b00111000011110000,
++    ldgt_h_op          = 0b00111000011110001,
++    ldgt_w_op          = 0b00111000011110010,
++    ldgt_d_op          = 0b00111000011110011,
++    ldle_b_op          = 0b00111000011110100,
++    ldle_h_op          = 0b00111000011110101,
++    ldle_w_op          = 0b00111000011110110,
++    ldle_d_op          = 0b00111000011110111,
++    stgt_b_op          = 0b00111000011111000,
++    stgt_h_op          = 0b00111000011111001,
++    stgt_w_op          = 0b00111000011111010,
++    stgt_d_op          = 0b00111000011111011,
++    stle_b_op          = 0b00111000011111100,
++    stle_h_op          = 0b00111000011111101,
++    stle_w_op          = 0b00111000011111110,
++    stle_d_op          = 0b00111000011111111,
++    vseq_b_op          = 0b01110000000000000,
++    vseq_h_op          = 0b01110000000000001,
++    vseq_w_op          = 0b01110000000000010,
++    vseq_d_op          = 0b01110000000000011,
++    vsle_b_op          = 0b01110000000000100,
++    vsle_h_op          = 0b01110000000000101,
++    vsle_w_op          = 0b01110000000000110,
++    vsle_d_op          = 0b01110000000000111,
++    vsle_bu_op         = 0b01110000000001000,
++    vsle_hu_op         = 0b01110000000001001,
++    vsle_wu_op         = 0b01110000000001010,
++    vsle_du_op         = 0b01110000000001011,
++    vslt_b_op          = 0b01110000000001100,
++    vslt_h_op          = 0b01110000000001101,
++    vslt_w_op          = 0b01110000000001110,
++    vslt_d_op          = 0b01110000000001111,
++    vslt_bu_op         = 0b01110000000010000,
++    vslt_hu_op         = 0b01110000000010001,
++    vslt_wu_op         = 0b01110000000010010,
++    vslt_du_op         = 0b01110000000010011,
++    vadd_b_op          = 0b01110000000010100,
++    vadd_h_op          = 0b01110000000010101,
++    vadd_w_op          = 0b01110000000010110,
++    vadd_d_op          = 0b01110000000010111,
++    vsub_b_op          = 0b01110000000011000,
++    vsub_h_op          = 0b01110000000011001,
++    vsub_w_op          = 0b01110000000011010,
++    vsub_d_op          = 0b01110000000011011,
++    vabsd_b_op         = 0b01110000011000000,
++    vabsd_h_op         = 0b01110000011000001,
++    vabsd_w_op         = 0b01110000011000010,
++    vabsd_d_op         = 0b01110000011000011,
++    vmax_b_op          = 0b01110000011100000,
++    vmax_h_op          = 0b01110000011100001,
++    vmax_w_op          = 0b01110000011100010,
++    vmax_d_op          = 0b01110000011100011,
++    vmin_b_op          = 0b01110000011100100,
++    vmin_h_op          = 0b01110000011100101,
++    vmin_w_op          = 0b01110000011100110,
++    vmin_d_op          = 0b01110000011100111,
++    vmul_b_op          = 0b01110000100001000,
++    vmul_h_op          = 0b01110000100001001,
++    vmul_w_op          = 0b01110000100001010,
++    vmul_d_op          = 0b01110000100001011,
++    vmuh_b_op          = 0b01110000100001100,
++    vmuh_h_op          = 0b01110000100001101,
++    vmuh_w_op          = 0b01110000100001110,
++    vmuh_d_op          = 0b01110000100001111,
++    vmuh_bu_op         = 0b01110000100010000,
++    vmuh_hu_op         = 0b01110000100010001,
++    vmuh_wu_op         = 0b01110000100010010,
++    vmuh_du_op         = 0b01110000100010011,
++    vmulwev_h_b_op     = 0b01110000100100000,
++    vmulwev_w_h_op     = 0b01110000100100001,
++    vmulwev_d_w_op     = 0b01110000100100010,
++    vmulwev_q_d_op     = 0b01110000100100011,
++    vmulwod_h_b_op     = 0b01110000100100100,
++    vmulwod_w_h_op     = 0b01110000100100101,
++    vmulwod_d_w_op     = 0b01110000100100110,
++    vmulwod_q_d_op     = 0b01110000100100111,
++    vmadd_b_op         = 0b01110000101010000,
++    vmadd_h_op         = 0b01110000101010001,
++    vmadd_w_op         = 0b01110000101010010,
++    vmadd_d_op         = 0b01110000101010011,
++    vmsub_b_op         = 0b01110000101010100,
++    vmsub_h_op         = 0b01110000101010101,
++    vmsub_w_op         = 0b01110000101010110,
++    vmsub_d_op         = 0b01110000101010111,
++    vsll_b_op          = 0b01110000111010000,
++    vsll_h_op          = 0b01110000111010001,
++    vsll_w_op          = 0b01110000111010010,
++    vsll_d_op          = 0b01110000111010011,
++    vsrl_b_op          = 0b01110000111010100,
++    vsrl_h_op          = 0b01110000111010101,
++    vsrl_w_op          = 0b01110000111010110,
++    vsrl_d_op          = 0b01110000111010111,
++    vsra_b_op          = 0b01110000111011000,
++    vsra_h_op          = 0b01110000111011001,
++    vsra_w_op          = 0b01110000111011010,
++    vsra_d_op          = 0b01110000111011011,
++    vrotr_b_op         = 0b01110000111011100,
++    vrotr_h_op         = 0b01110000111011101,
++    vrotr_w_op         = 0b01110000111011110,
++    vrotr_d_op         = 0b01110000111011111,
++    vbitclr_b_op       = 0b01110001000011000,
++    vbitclr_h_op       = 0b01110001000011001,
++    vbitclr_w_op       = 0b01110001000011010,
++    vbitclr_d_op       = 0b01110001000011011,
++    vbitset_b_op       = 0b01110001000011100,
++    vbitset_h_op       = 0b01110001000011101,
++    vbitset_w_op       = 0b01110001000011110,
++    vbitset_d_op       = 0b01110001000011111,
++    vbitrev_b_op       = 0b01110001000100000,
++    vbitrev_h_op       = 0b01110001000100001,
++    vbitrev_w_op       = 0b01110001000100010,
++    vbitrev_d_op       = 0b01110001000100011,
++    vand_v_op          = 0b01110001001001100,
++    vor_v_op           = 0b01110001001001101,
++    vxor_v_op          = 0b01110001001001110,
++    vnor_v_op          = 0b01110001001001111,
++    vandn_v_op         = 0b01110001001010000,
++    vorn_v_op          = 0b01110001001010001,
++    vadd_q_op          = 0b01110001001011010,
++    vsub_q_op          = 0b01110001001011011,
++    vfadd_s_op         = 0b01110001001100001,
++    vfadd_d_op         = 0b01110001001100010,
++    vfsub_s_op         = 0b01110001001100101,
++    vfsub_d_op         = 0b01110001001100110,
++    vfmul_s_op         = 0b01110001001110001,
++    vfmul_d_op         = 0b01110001001110010,
++    vfdiv_s_op         = 0b01110001001110101,
++    vfdiv_d_op         = 0b01110001001110110,
++    vfmax_s_op         = 0b01110001001111001,
++    vfmax_d_op         = 0b01110001001111010,
++    vfmin_s_op         = 0b01110001001111101,
++    vfmin_d_op         = 0b01110001001111110,
++    vfcvt_h_s_op       = 0b01110001010001100,
++    vfcvt_s_d_op       = 0b01110001010001101,
++    vffint_s_l_op      = 0b01110001010010000,
++    vftint_w_d_op      = 0b01110001010010011,
++    vftintrm_w_d_op    = 0b01110001010010100,
++    vftintrp_w_d_op    = 0b01110001010010101,
++    vftintrz_w_d_op    = 0b01110001010010110,
++    vftintrne_w_d_op   = 0b01110001010010111,
++    vshuf_h_op         = 0b01110001011110101,
++    vshuf_w_op         = 0b01110001011110110,
++    vshuf_d_op         = 0b01110001011110111,
++    vslti_bu_op        = 0b01110010100010000,
++    vslti_hu_op        = 0b01110010100010001,
++    vslti_wu_op        = 0b01110010100010010,
++    vslti_du_op        = 0b01110010100010011,
++    vaddi_bu_op        = 0b01110010100010100,
++    vaddi_hu_op        = 0b01110010100010101,
++    vaddi_wu_op        = 0b01110010100010110,
++    vaddi_du_op        = 0b01110010100010111,
++    vsubi_bu_op        = 0b01110010100011000,
++    vsubi_hu_op        = 0b01110010100011001,
++    vsubi_wu_op        = 0b01110010100011010,
++    vsubi_du_op        = 0b01110010100011011,
++    vrotri_w_op        = 0b01110010101000001,
++    vbitclri_w_op      = 0b01110011000100001,
++    vbitseti_w_op      = 0b01110011000101001,
++    vbitrevi_w_op      = 0b01110011000110001,
++    vslli_w_op         = 0b01110011001011001,
++    vsrli_w_op         = 0b01110011001100001,
++    vsrai_w_op         = 0b01110011001101001,
++    vsrlni_h_w_op      = 0b01110011010000001,
++    xvseq_b_op         = 0b01110100000000000,
++    xvseq_h_op         = 0b01110100000000001,
++    xvseq_w_op         = 0b01110100000000010,
++    xvseq_d_op         = 0b01110100000000011,
++    xvsle_b_op         = 0b01110100000000100,
++    xvsle_h_op         = 0b01110100000000101,
++    xvsle_w_op         = 0b01110100000000110,
++    xvsle_d_op         = 0b01110100000000111,
++    xvsle_bu_op        = 0b01110100000001000,
++    xvsle_hu_op        = 0b01110100000001001,
++    xvsle_wu_op        = 0b01110100000001010,
++    xvsle_du_op        = 0b01110100000001011,
++    xvslt_b_op         = 0b01110100000001100,
++    xvslt_h_op         = 0b01110100000001101,
++    xvslt_w_op         = 0b01110100000001110,
++    xvslt_d_op         = 0b01110100000001111,
++    xvslt_bu_op        = 0b01110100000010000,
++    xvslt_hu_op        = 0b01110100000010001,
++    xvslt_wu_op        = 0b01110100000010010,
++    xvslt_du_op        = 0b01110100000010011,
++    xvadd_b_op         = 0b01110100000010100,
++    xvadd_h_op         = 0b01110100000010101,
++    xvadd_w_op         = 0b01110100000010110,
++    xvadd_d_op         = 0b01110100000010111,
++    xvsub_b_op         = 0b01110100000011000,
++    xvsub_h_op         = 0b01110100000011001,
++    xvsub_w_op         = 0b01110100000011010,
++    xvsub_d_op         = 0b01110100000011011,
++    xvabsd_b_op        = 0b01110100011000000,
++    xvabsd_h_op        = 0b01110100011000001,
++    xvabsd_w_op        = 0b01110100011000010,
++    xvabsd_d_op        = 0b01110100011000011,
++    xvmax_b_op         = 0b01110100011100000,
++    xvmax_h_op         = 0b01110100011100001,
++    xvmax_w_op         = 0b01110100011100010,
++    xvmax_d_op         = 0b01110100011100011,
++    xvmin_b_op         = 0b01110100011100100,
++    xvmin_h_op         = 0b01110100011100101,
++    xvmin_w_op         = 0b01110100011100110,
++    xvmin_d_op         = 0b01110100011100111,
++    xvmul_b_op         = 0b01110100100001000,
++    xvmul_h_op         = 0b01110100100001001,
++    xvmul_w_op         = 0b01110100100001010,
++    xvmul_d_op         = 0b01110100100001011,
++    xvmuh_b_op         = 0b01110100100001100,
++    xvmuh_h_op         = 0b01110100100001101,
++    xvmuh_w_op         = 0b01110100100001110,
++    xvmuh_d_op         = 0b01110100100001111,
++    xvmuh_bu_op        = 0b01110100100010000,
++    xvmuh_hu_op        = 0b01110100100010001,
++    xvmuh_wu_op        = 0b01110100100010010,
++    xvmuh_du_op        = 0b01110100100010011,
++    xvmulwev_h_b_op    = 0b01110100100100000,
++    xvmulwev_w_h_op    = 0b01110100100100001,
++    xvmulwev_d_w_op    = 0b01110100100100010,
++    xvmulwev_q_d_op    = 0b01110100100100011,
++    xvmulwod_h_b_op    = 0b01110100100100100,
++    xvmulwod_w_h_op    = 0b01110100100100101,
++    xvmulwod_d_w_op    = 0b01110100100100110,
++    xvmulwod_q_d_op    = 0b01110100100100111,
++    xvmadd_b_op        = 0b01110100101010000,
++    xvmadd_h_op        = 0b01110100101010001,
++    xvmadd_w_op        = 0b01110100101010010,
++    xvmadd_d_op        = 0b01110100101010011,
++    xvmsub_b_op        = 0b01110100101010100,
++    xvmsub_h_op        = 0b01110100101010101,
++    xvmsub_w_op        = 0b01110100101010110,
++    xvmsub_d_op        = 0b01110100101010111,
++    xvsll_b_op         = 0b01110100111010000,
++    xvsll_h_op         = 0b01110100111010001,
++    xvsll_w_op         = 0b01110100111010010,
++    xvsll_d_op         = 0b01110100111010011,
++    xvsrl_b_op         = 0b01110100111010100,
++    xvsrl_h_op         = 0b01110100111010101,
++    xvsrl_w_op         = 0b01110100111010110,
++    xvsrl_d_op         = 0b01110100111010111,
++    xvsra_b_op         = 0b01110100111011000,
++    xvsra_h_op         = 0b01110100111011001,
++    xvsra_w_op         = 0b01110100111011010,
++    xvsra_d_op         = 0b01110100111011011,
++    xvrotr_b_op        = 0b01110100111011100,
++    xvrotr_h_op        = 0b01110100111011101,
++    xvrotr_w_op        = 0b01110100111011110,
++    xvrotr_d_op        = 0b01110100111011111,
++    xvbitclr_b_op      = 0b01110101000011000,
++    xvbitclr_h_op      = 0b01110101000011001,
++    xvbitclr_w_op      = 0b01110101000011010,
++    xvbitclr_d_op      = 0b01110101000011011,
++    xvbitset_b_op      = 0b01110101000011100,
++    xvbitset_h_op      = 0b01110101000011101,
++    xvbitset_w_op      = 0b01110101000011110,
++    xvbitset_d_op      = 0b01110101000011111,
++    xvbitrev_b_op      = 0b01110101000100000,
++    xvbitrev_h_op      = 0b01110101000100001,
++    xvbitrev_w_op      = 0b01110101000100010,
++    xvbitrev_d_op      = 0b01110101000100011,
++    xvand_v_op         = 0b01110101001001100,
++    xvor_v_op          = 0b01110101001001101,
++    xvxor_v_op         = 0b01110101001001110,
++    xvnor_v_op         = 0b01110101001001111,
++    xvandn_v_op        = 0b01110101001010000,
++    xvorn_v_op         = 0b01110101001010001,
++    xvadd_q_op         = 0b01110101001011010,
++    xvsub_q_op         = 0b01110101001011011,
++    xvfadd_s_op        = 0b01110101001100001,
++    xvfadd_d_op        = 0b01110101001100010,
++    xvfsub_s_op        = 0b01110101001100101,
++    xvfsub_d_op        = 0b01110101001100110,
++    xvfmul_s_op        = 0b01110101001110001,
++    xvfmul_d_op        = 0b01110101001110010,
++    xvfdiv_s_op        = 0b01110101001110101,
++    xvfdiv_d_op        = 0b01110101001110110,
++    xvfmax_s_op        = 0b01110101001111001,
++    xvfmax_d_op        = 0b01110101001111010,
++    xvfmin_s_op        = 0b01110101001111101,
++    xvfmin_d_op        = 0b01110101001111110,
++    xvfcvt_h_s_op      = 0b01110101010001100,
++    xvfcvt_s_d_op      = 0b01110101010001101,
++    xvffint_s_l_op     = 0b01110101010010000,
++    xvftint_w_d_op     = 0b01110101010010011,
++    xvftintrm_w_d_op   = 0b01110101010010100,
++    xvftintrp_w_d_op   = 0b01110101010010101,
++    xvftintrz_w_d_op   = 0b01110101010010110,
++    xvftintrne_w_d_op  = 0b01110101010010111,
++    xvshuf_h_op        = 0b01110101011110101,
++    xvshuf_w_op        = 0b01110101011110110,
++    xvshuf_d_op        = 0b01110101011110111,
++    xvperm_w_op        = 0b01110101011111010,
++    xvslti_bu_op       = 0b01110110100010000,
++    xvslti_hu_op       = 0b01110110100010001,
++    xvslti_wu_op       = 0b01110110100010010,
++    xvslti_du_op       = 0b01110110100010011,
++    xvaddi_bu_op       = 0b01110110100010100,
++    xvaddi_hu_op       = 0b01110110100010101,
++    xvaddi_wu_op       = 0b01110110100010110,
++    xvaddi_du_op       = 0b01110110100010111,
++    xvsubi_bu_op       = 0b01110110100011000,
++    xvsubi_hu_op       = 0b01110110100011001,
++    xvsubi_wu_op       = 0b01110110100011010,
++    xvsubi_du_op       = 0b01110110100011011,
++    xvrotri_w_op       = 0b01110110101000001,
++    xvbitclri_w_op     = 0b01110111000100001,
++    xvbitseti_w_op     = 0b01110111000101001,
++    xvbitrevi_w_op     = 0b01110111000110001,
++    xvslli_w_op        = 0b01110111001011001,
++    xvsrli_w_op        = 0b01110111001100001,
++    xvsrai_w_op        = 0b01110111001101001,
++
++    unknow_ops17       = 0b11111111111111111
++  };
++
++  // 16-bit opcode, highest 16 bits: bits[31...16]
++  enum ops16 {
++    vrotri_d_op        = 0b0111001010100001,
++    vbitclri_d_op      = 0b0111001100010001,
++    vbitseti_d_op      = 0b0111001100010101,
++    vbitrevi_d_op      = 0b0111001100011001,
++    vslli_d_op         = 0b0111001100101101,
++    vsrli_d_op         = 0b0111001100110001,
++    vsrai_d_op         = 0b0111001100110101,
++    vsrlni_w_d_op      = 0b0111001101000001,
++    xvrotri_d_op       = 0b0111011010100001,
++    xvbitclri_d_op     = 0b0111011100010001,
++    xvbitseti_d_op     = 0b0111011100010101,
++    xvbitrevi_d_op     = 0b0111011100011001,
++    xvslli_d_op        = 0b0111011100101101,
++    xvsrli_d_op        = 0b0111011100110001,
++    xvsrai_d_op        = 0b0111011100110101,
++
++    unknow_ops16       = 0b1111111111111111
++  };
++
++  // 15-bit opcode, highest 15 bits: bits[31...17]
++  enum ops15 {
++    vsrlni_d_q_op      = 0b011100110100001,
++
++    unknow_ops15       = 0b111111111111111
++  };
++
++  // 14-bit opcode, highest 14 bits: bits[31...18]
++  enum ops14 {
++    alsl_w_op          = 0b00000000000001,
++    bytepick_w_op      = 0b00000000000010,
++    bytepick_d_op      = 0b00000000000011,
++    alsl_d_op          = 0b00000000001011,
++    slli_op            = 0b00000000010000,
++    srli_op            = 0b00000000010001,
++    srai_op            = 0b00000000010010,
++    rotri_op           = 0b00000000010011,
++    lddir_op           = 0b00000110010000,
++    ldpte_op           = 0b00000110010001,
++    vshuf4i_b_op       = 0b01110011100100,
++    vshuf4i_h_op       = 0b01110011100101,
++    vshuf4i_w_op       = 0b01110011100110,
++    vshuf4i_d_op       = 0b01110011100111,
++    vandi_b_op         = 0b01110011110100,
++    vori_b_op          = 0b01110011110101,
++    vxori_b_op         = 0b01110011110110,
++    vnori_b_op         = 0b01110011110111,
++    vldi_op            = 0b01110011111000,
++    vpermi_w_op        = 0b01110011111001,
++    xvshuf4i_b_op      = 0b01110111100100,
++    xvshuf4i_h_op      = 0b01110111100101,
++    xvshuf4i_w_op      = 0b01110111100110,
++    xvshuf4i_d_op      = 0b01110111100111,
++    xvandi_b_op        = 0b01110111110100,
++    xvori_b_op         = 0b01110111110101,
++    xvxori_b_op        = 0b01110111110110,
++    xvnori_b_op        = 0b01110111110111,
++    xvldi_op           = 0b01110111111000,
++    xvpermi_w_op       = 0b01110111111001,
++    xvpermi_d_op       = 0b01110111111010,
++    xvpermi_q_op       = 0b01110111111011,
++
++    unknow_ops14       = 0b11111111111111
++  };
++
++  // 12-bit opcode, highest 12 bits: bits[31...20]
++  enum ops12 {
++    fmadd_s_op         = 0b000010000001,
++    fmadd_d_op         = 0b000010000010,
++    fmsub_s_op         = 0b000010000101,
++    fmsub_d_op         = 0b000010000110,
++    fnmadd_s_op        = 0b000010001001,
++    fnmadd_d_op        = 0b000010001010,
++    fnmsub_s_op        = 0b000010001101,
++    fnmsub_d_op        = 0b000010001110,
++    vfmadd_s_op        = 0b000010010001,
++    vfmadd_d_op        = 0b000010010010,
++    vfmsub_s_op        = 0b000010010101,
++    vfmsub_d_op        = 0b000010010110,
++    vfnmadd_s_op       = 0b000010011001,
++    vfnmadd_d_op       = 0b000010011010,
++    vfnmsub_s_op       = 0b000010011101,
++    vfnmsub_d_op       = 0b000010011110,
++    xvfmadd_s_op       = 0b000010100001,
++    xvfmadd_d_op       = 0b000010100010,
++    xvfmsub_s_op       = 0b000010100101,
++    xvfmsub_d_op       = 0b000010100110,
++    xvfnmadd_s_op      = 0b000010101001,
++    xvfnmadd_d_op      = 0b000010101010,
++    xvfnmsub_s_op      = 0b000010101101,
++    xvfnmsub_d_op      = 0b000010101110,
++    fcmp_cond_s_op     = 0b000011000001,
++    fcmp_cond_d_op     = 0b000011000010,
++    vfcmp_cond_s_op    = 0b000011000101,
++    vfcmp_cond_d_op    = 0b000011000110,
++    xvfcmp_cond_s_op   = 0b000011001001,
++    xvfcmp_cond_d_op   = 0b000011001010,
++    fsel_op            = 0b000011010000,
++    vbitsel_v_op       = 0b000011010001,
++    xvbitsel_v_op      = 0b000011010010,
++    vshuf_b_op         = 0b000011010101,
++    xvshuf_b_op        = 0b000011010110,
++
++    unknow_ops12       = 0b111111111111
++  };
++
++  // 10-bit opcode, highest 10 bits: bits[31...22]
++  enum ops10 {
++    bstr_w_op          = 0b0000000001,
++    bstrins_d_op       = 0b0000000010,
++    bstrpick_d_op      = 0b0000000011,
++    slti_op            = 0b0000001000,
++    sltui_op           = 0b0000001001,
++    addi_w_op          = 0b0000001010,
++    addi_d_op          = 0b0000001011,
++    lu52i_d_op         = 0b0000001100,
++    andi_op            = 0b0000001101,
++    ori_op             = 0b0000001110,
++    xori_op            = 0b0000001111,
++    ld_b_op            = 0b0010100000,
++    ld_h_op            = 0b0010100001,
++    ld_w_op            = 0b0010100010,
++    ld_d_op            = 0b0010100011,
++    st_b_op            = 0b0010100100,
++    st_h_op            = 0b0010100101,
++    st_w_op            = 0b0010100110,
++    st_d_op            = 0b0010100111,
++    ld_bu_op           = 0b0010101000,
++    ld_hu_op           = 0b0010101001,
++    ld_wu_op           = 0b0010101010,
++    preld_op           = 0b0010101011,
++    fld_s_op           = 0b0010101100,
++    fst_s_op           = 0b0010101101,
++    fld_d_op           = 0b0010101110,
++    fst_d_op           = 0b0010101111,
++    vld_op             = 0b0010110000,
++    vst_op             = 0b0010110001,
++    xvld_op            = 0b0010110010,
++    xvst_op            = 0b0010110011,
++    ldl_w_op           = 0b0010111000,
++    ldr_w_op           = 0b0010111001,
++
++    unknow_ops10       = 0b1111111111
++  };
++
++  // 8-bit opcode, highest 8 bits: bits[31...22]
++  enum ops8 {
++    ll_w_op            = 0b00100000,
++    sc_w_op            = 0b00100001,
++    ll_d_op            = 0b00100010,
++    sc_d_op            = 0b00100011,
++    ldptr_w_op         = 0b00100100,
++    stptr_w_op         = 0b00100101,
++    ldptr_d_op         = 0b00100110,
++    stptr_d_op         = 0b00100111,
++
++    unknow_ops8        = 0b11111111
++  };
++
++  // 7-bit opcode, highest 7 bits: bits[31...25]
++  enum ops7 {
++    lu12i_w_op         = 0b0001010,
++    lu32i_d_op         = 0b0001011,
++    pcaddi_op          = 0b0001100,
++    pcalau12i_op       = 0b0001101,
++    pcaddu12i_op       = 0b0001110,
++    pcaddu18i_op       = 0b0001111,
++
++    unknow_ops7        = 0b1111111
++  };
++
++  // 6-bit opcode, highest 6 bits: bits[31...25]
++  enum ops6 {
++    addu16i_d_op       = 0b000100,
++    beqz_op            = 0b010000,
++    bnez_op            = 0b010001,
++    bccondz_op         = 0b010010,
++    jirl_op            = 0b010011,
++    b_op               = 0b010100,
++    bl_op              = 0b010101,
++    beq_op             = 0b010110,
++    bne_op             = 0b010111,
++    blt_op             = 0b011000,
++    bge_op             = 0b011001,
++    bltu_op            = 0b011010,
++    bgeu_op            = 0b011011,
++
++    unknow_ops6        = 0b111111
++  };
++
++  enum fcmp_cond {
++    fcmp_caf           = 0x00,
++    fcmp_cun           = 0x08,
++    fcmp_ceq           = 0x04,
++    fcmp_cueq          = 0x0c,
++    fcmp_clt           = 0x02,
++    fcmp_cult          = 0x0a,
++    fcmp_cle           = 0x06,
++    fcmp_cule          = 0x0e,
++    fcmp_cne           = 0x10,
++    fcmp_cor           = 0x14,
++    fcmp_cune          = 0x18,
++    fcmp_saf           = 0x01,
++    fcmp_sun           = 0x09,
++    fcmp_seq           = 0x05,
++    fcmp_sueq          = 0x0d,
++    fcmp_slt           = 0x03,
++    fcmp_sult          = 0x0b,
++    fcmp_sle           = 0x07,
++    fcmp_sule          = 0x0f,
++    fcmp_sne           = 0x11,
++    fcmp_sor           = 0x15,
++    fcmp_sune          = 0x19
++  };
++
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++
++  static ALWAYSINLINE void patch(address a, int length, uint32_t val) {
++    guarantee(val < (1ULL << length), "Field too big for insn");
++    guarantee(length > 0, "length > 0");
++    unsigned target = *(unsigned *)a;
++    target = (target >> length) << length;
++    target |= val;
++    *(unsigned *)a = target;
++  }
++
++ protected:
++  // help methods for instruction ejection
++
++  // 2R-type
++  //  31                          10 9      5 4     0
++  // |   opcode                     |   rj   |  rd   |
++  static inline int insn_RR   (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; }
++
++  // 3R-type
++  //  31                    15 14 10 9      5 4     0
++  // |   opcode               |  rk |   rj   |  rd   |
++  static inline int insn_RRR  (int op, int rk, int rj, int rd)  { return (op<<15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 4R-type
++  //  31             20 19  15 14  10 9     5 4     0
++  // |   opcode        |  ra  |  rk |    rj  |  rd   |
++  static inline int insn_RRRR (int op, int ra,  int rk, int rj, int rd)  { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 2RI1-type
++  //  31                11     10    9      5 4     0
++  // |   opcode           |    I1   |    vj  |  rd   |
++  static inline int insn_I1RR (int op, int ui1, int vj, int rd)  { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; }
++
++  // 2RI2-type
++  //  31                12 11     10 9      5 4     0
++  // |   opcode           |    I2   |    vj  |  rd   |
++  static inline int insn_I2RR (int op, int ui2, int vj, int rd)  { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; }
++
++  // 2RI3-type
++  //  31                13 12     10 9      5 4     0
++  // |   opcode           |    I3   |    vj  |  vd   |
++  static inline int insn_I3RR (int op, int ui3, int vj, int vd)  { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; }
++
++  // 2RI4-type
++  //  31                14 13     10 9      5 4     0
++  // |   opcode           |    I4   |    vj  |  vd   |
++  static inline int insn_I4RR (int op, int ui4, int vj, int vd)  { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; }
++
++  // 2RI5-type
++  //  31                15 14     10 9      5 4     0
++  // |   opcode           |    I5   |    vj  |  vd   |
++  static inline int insn_I5RR (int op, int ui5, int vj, int vd)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; }
++
++  // 2RI6-type
++  //  31                16 15     10 9      5 4     0
++  // |   opcode           |    I6   |    vj  |  vd   |
++  static inline int insn_I6RR (int op, int ui6, int vj, int vd)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI7-type
++  //  31                17 16     10 9      5 4     0
++  // |   opcode           |    I7   |    vj  |  vd   |
++  static inline int insn_I7RR (int op, int ui7, int vj, int vd)  { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI8-type
++  //  31                18 17     10 9      5 4     0
++  // |   opcode           |    I8   |    rj  |  rd   |
++  static inline int insn_I8RR (int op, int imm8, int rj, int rd)  { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; }
++
++  // 2RI12-type
++  //  31           22 21          10 9      5 4     0
++  // |   opcode      |     I12      |    rj  |  rd   |
++  static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/  return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; }
++
++
++  // 2RI14-type
++  //  31         24 23            10 9      5 4     0
++  // |   opcode    |      I14       |    rj  |  rd   |
++  static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; }
++
++  // 2RI16-type
++  //  31       26 25              10 9      5 4     0
++  // |   opcode  |       I16        |    rj  |  rd   |
++  static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; }
++
++  // 1RI13-type (?)
++  //  31        18 17                      5 4     0
++  // |   opcode   |               I13        |  vd   |
++  static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; }
++
++  // 1RI20-type (?)
++  //  31        25 24                      5 4     0
++  // |   opcode   |               I20        |  rd   |
++  static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; }
++
++  // 1RI21-type
++  //  31       26 25              10 9     5 4        0
++  // |   opcode  |     I21[15:0]    |   rj   |I21[20:16]|
++  static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); }
++
++  // I26-type
++  //  31       26 25              10 9               0
++  // |   opcode  |     I26[15:0]    |    I26[25:16]   |
++  static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); }
++
++  // imm15
++  //  31                    15 14                    0
++  // |         opcode         |          I15          |
++  static inline int insn_I15  (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); }
++
++
++  // get the offset field of beq, bne, blt[u], bge[u] instruction
++  int offset16(address entry) {
++    assert(is_simm16((entry - pc()) / 4), "change this code");
++    if (!is_simm16((entry - pc()) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of beqz, bnez instruction
++  int offset21(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 21), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 21)) {
++      tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of b instruction
++  int offset26(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 26), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 26)) {
++      tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  static int split_low16(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high16(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int split_low20(int x) {
++    return (x & 0xfffff);
++  }
++
++  // Convert 20-bit x to a sign-extended 20-bit integer
++  static int simm20(int x) {
++    assert(x == (x & 0xFFFFF), "must be 20-bit only");
++    return (x << 12) >> 12;
++  }
++
++  static int split_low12(int x) {
++    return (x & 0xfff);
++  }
++
++  static inline void split_simm32(jlong si32, jint& si12, jint& si20) {
++    si12 = ((jint)(si32 & 0xfff) << 20) >> 20;
++    si32 += (si32 & 0x800) << 1;
++    si20 = si32 >> 12;
++  }
++
++  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
++    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
++    si38 += (si38 & 0x20000) << 1;
++    si20 = si38 >> 18;
++  }
++
++  // Convert 12-bit x to a sign-extended 12-bit integer
++  static int simm12(int x) {
++    assert(x == (x & 0xFFF), "must be 12-bit only");
++    return (x << 20) >> 20;
++  }
++
++  // Convert 26-bit x to a sign-extended 26-bit integer
++  static int simm26(int x) {
++    assert(x == (x & 0x3FFFFFF), "must be 26-bit only");
++    return (x << 6) >> 6;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12) {
++    //lu12i, ori
++    return (((x12 << 12) | x0) << 32) >> 32;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) {
++    //lu32i, lu12i, ori
++    return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) {
++    //lu52i, lu32i, lu12i, ori
++    return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++public:
++
++  void flush() {
++    AbstractAssembler::flush();
++  }
++
++  inline void emit_int32(int x) {
++    AbstractAssembler::emit_int32(x);
++  }
++
++  inline void emit_data(int x) { emit_int32(x); }
++  inline void emit_data(int x, relocInfo::relocType rtype) {
++    relocate(rtype);
++    emit_int32(x);
++  }
++
++  inline void emit_data(int x, RelocationHolder const& rspec) {
++    relocate(rspec);
++    emit_int32(x);
++  }
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void clo_w  (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_w  (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_w  (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_w  (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clo_d  (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_d  (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_d  (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_d  (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op,  (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op,  (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtime_d(Register rd, Register rj)  { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++  void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++
++  void alsl_w(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void slt  (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(slt_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sltu (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void OR  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void mul_w     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_w    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_wu   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mul_d     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_d    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_du   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void crc_w_b_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_h_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_w_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void brk(int code)      { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); }
++
++  void alsl_d(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void slli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void slli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bstrins_w  (Register rd, Register rj, int msbw, int lsbw)  { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_w  (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrins_d  (Register rd, Register rj, int msbd, int lsbd)  { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_d  (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fadd_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fadd_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fabs_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fabs_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void movgr2fr_w (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_w_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2fr_d (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_d_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2frh_w(FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); }
++  void movfr2gr_s (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_s_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfr2gr_d (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_d_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfrh2gr_s(Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); }
++  void movgr2fcsr (int fcsr, Register rj)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op,  (int)rj->encoding(), fcsr)); }
++  void movfcsr2gr (Register rd, int fcsr)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op,  fcsr, (int)rd->encoding())); }
++  void movfr2cf   (ConditionalFlagRegister cd, FloatRegister fj)  { emit_int32(insn_RR(movfr2cf_op,    (int)fj->encoding(), (int)cd->encoding())); }
++  void movcf2fr   (FloatRegister fd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2fr_op,    (int)cj->encoding(), (int)fd->encoding())); }
++  void movgr2cf   (ConditionalFlagRegister cd, Register rj)  { emit_int32(insn_RR(movgr2cf_op,    (int)rj->encoding(), (int)cd->encoding())); }
++  void movcf2gr   (Register rd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2gr_op,    (int)cj->encoding(), (int)rd->encoding())); }
++
++  void fcvt_s_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fcvt_d_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void ftintrm_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void slti  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op,   si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void sltui (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_w(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_d(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op,  simm12(si12), (int)rj->encoding(), (int)rd->encoding())); }
++  void andi  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ori   (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op,    ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void xori  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fcmp_caf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fcmp_caf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void addu16i_d(Register rj, Register rd, int si16)      { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void lu12i_w(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); }
++  void lu32i_d(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); }
++  void pcaddi(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); }
++  void pcalau12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); }
++  void pcaddu12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); }
++  void pcaddu18i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); }
++
++  void ll_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ll_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ld_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_bu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_hu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_wu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void preld (int hint, Register rj, int si12)  { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); }
++  void fld_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fld_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void ldl_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldr_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++
++  void ld_b  (Register rd, Address src);
++  void ld_bu (Register rd, Address src);
++  void ld_d  (Register rd, Address src);
++  void ld_h  (Register rd, Address src);
++  void ld_hu (Register rd, Address src);
++  void ll_w  (Register rd, Address src);
++  void ll_d  (Register rd, Address src);
++  void ld_wu (Register rd, Address src);
++  void ld_w  (Register rd, Address src);
++  void st_b  (Register rd, Address dst);
++  void st_d  (Register rd, Address dst);
++  void st_w  (Register rd, Address dst);
++  void sc_w  (Register rd, Address dst);
++  void sc_d  (Register rd, Address dst);
++  void st_h  (Register rd, Address dst);
++  void fld_s (FloatRegister fd, Address src);
++  void fld_d (FloatRegister fd, Address src);
++  void fst_s (FloatRegister fd, Address dst);
++  void fst_d (FloatRegister fd, Address dst);
++
++  void amswap_w   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_d   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); }
++  void amand_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_w     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_d     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_w  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_d  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void dbar(int hint)      {
++    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
++
++    if (os::is_ActiveCoresMP())
++      andi(R0, R0, 0);
++    else
++      emit_int32(insn_I15(dbar_op, hint));
++  }
++  void ibar(int hint)      { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); }
++
++  void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beqz(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); }
++  void bnez(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); }
++  void bceqz(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); }
++  void bcnez(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); }
++
++  void jirl(Register rd, Register rj, int offs)      { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void b(int offs)      { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); }
++  void bl(int offs)     { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); }
++
++
++  void beq(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bne(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void blt(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bge(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bltu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bgeu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beq   (Register rj, Register rd, address entry) { beq   (rj, rd, offset16(entry)); }
++  void bne   (Register rj, Register rd, address entry) { bne   (rj, rd, offset16(entry)); }
++  void blt   (Register rj, Register rd, address entry) { blt   (rj, rd, offset16(entry)); }
++  void bge   (Register rj, Register rd, address entry) { bge   (rj, rd, offset16(entry)); }
++  void bltu  (Register rj, Register rd, address entry) { bltu  (rj, rd, offset16(entry)); }
++  void bgeu  (Register rj, Register rd, address entry) { bgeu  (rj, rd, offset16(entry)); }
++  void beqz  (Register rj, address entry) { beqz  (rj, offset21(entry)); }
++  void bnez  (Register rj, address entry) { bnez  (rj, offset21(entry)); }
++  void b(address entry) { b(offset26(entry)); }
++  void bl(address entry) { bl(offset26(entry)); }
++  void bceqz(ConditionalFlagRegister cj, address entry)     { bceqz(cj, offset21(entry)); }
++  void bcnez(ConditionalFlagRegister cj, address entry)     { bcnez(cj, offset21(entry)); }
++
++  void beq   (Register rj, Register rd, Label& L) { beq   (rj, rd, target(L)); }
++  void bne   (Register rj, Register rd, Label& L) { bne   (rj, rd, target(L)); }
++  void blt   (Register rj, Register rd, Label& L) { blt   (rj, rd, target(L)); }
++  void bge   (Register rj, Register rd, Label& L) { bge   (rj, rd, target(L)); }
++  void bltu  (Register rj, Register rd, Label& L) { bltu  (rj, rd, target(L)); }
++  void bgeu  (Register rj, Register rd, Label& L) { bgeu  (rj, rd, target(L)); }
++  void beqz  (Register rj, Label& L) { beqz  (rj, target(L)); }
++  void bnez  (Register rj, Label& L) { bnez  (rj, target(L)); }
++  void b(Label& L)      { b(target(L)); }
++  void bl(Label& L)     { bl(target(L)); }
++  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
++  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
++
++  typedef enum {
++    // hint[4]
++    Completion = 0,
++    Ordering   = (1 << 4),
++
++    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
++    // hint[3:2] and hint[1:0]
++    LoadLoad   = ((1 << 3) | (1 << 1)),
++    LoadStore  = ((1 << 3) | (1 << 0)),
++    StoreLoad  = ((1 << 2) | (1 << 1)),
++    StoreStore = ((1 << 2) | (1 << 0)),
++    AnyAny     = ((3 << 2) | (3 << 0)),
++  } Membar_mask_bits;
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits hint) {
++    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
++    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
++    dbar(Ordering | (~hint & 0xf));
++  }
++
++  // LSX and LASX
++#define ASSERT_LSX  assert(UseLSX, "");
++#define ASSERT_LASX assert(UseLASX, "");
++
++  void  vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vldi(FloatRegister vd, int i13) { ASSERT_LSX  emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); }
++  void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); }
++
++  void  vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX  emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk)  { ASSERT_LSX  emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk)  { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcmp_caf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vfcmp_caf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvfcmp_caf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvfcmp_caf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); }
++
++  void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void  vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void  vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++#undef ASSERT_LSX
++#undef ASSERT_LASX
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..9ca0cd4504
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
+new file mode 100644
+index 0000000000..c15344eb39
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use LoongArch, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since LoongArch CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
++  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
++  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
++
++  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
++  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
++  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since LoongArch64 CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
+new file mode 100644
+index 0000000000..c0eeb63962
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define __ ce->masm()->
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  Metadata *m = _method->as_constant_ptr()->as_metadata();
++  __ mov_metadata(SCR2, m);
++  ce->store_parameter(SCR2, 1);
++  ce->store_parameter(_bci, 0);
++  __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_info->deoptimize_on_exception()) {
++    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++    __ call(a, relocInfo::runtime_call_type);
++    ce->add_call_info_here(_info);
++    ce->verify_oop_map(_info);
++    debug_only(__ should_not_reach_here());
++    return;
++  }
++
++  if (_index->is_cpu_register()) {
++    __ move(SCR1, _index->as_register());
++  } else {
++    __ li(SCR1, _index->as_jint());
++  }
++  Runtime1::StubID stub_id;
++  if (_throw_index_out_of_bounds_exception) {
++    stub_id = Runtime1::throw_index_exception_id;
++  } else {
++    assert(_array != NULL, "sanity");
++    __ move(SCR2, _array->as_pointer_register());
++    stub_id = Runtime1::throw_range_check_failed_id;
++  }
++  __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
++  _info = new CodeEmitInfo(info);
++}
++
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
++  if (_offset != -1) {
++    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  }
++  __ bind(_entry);
++  __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++#ifdef ASSERT
++  __ should_not_reach_here();
++#endif
++}
++
++// Implementation of NewInstanceStub
++
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass,
++                                 CodeEmitInfo* info, Runtime1::StubID stub_id) {
++  _result = result;
++  _klass = klass;
++  _klass_reg = klass_reg;
++  _info = new CodeEmitInfo(info);
++  assert(stub_id == Runtime1::new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_init_check_id,
++         "need new_instance id");
++  _stub_id   = stub_id;
++}
++
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  __ move(A3, _klass_reg->as_register());
++  __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewTypeArrayStub
++
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                   CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _length = length;
++  _result = result;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewObjectArrayStub
++
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _result = result;
++  _length = length;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of MonitorAccessStubs
++
++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
++  : MonitorAccessStub(obj_reg, lock_reg) {
++  _info = new CodeEmitInfo(info);
++}
++
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  ce->store_parameter(_obj_reg->as_register(),  1);
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  Runtime1::StubID enter_id;
++  if (ce->compilation()->has_fpu_code()) {
++    enter_id = Runtime1::monitorenter_id;
++  } else {
++    enter_id = Runtime1::monitorenter_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_compute_lock) {
++    // lock_reg was destroyed by fast unlocking attempt => recompute it
++    ce->monitor_address(_monitor_ix, _lock_reg);
++  }
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  // note: non-blocking leaf routine => no call info needed
++  Runtime1::StubID exit_id;
++  if (ce->compilation()->has_fpu_code()) {
++    exit_id = Runtime1::monitorexit_id;
++  } else {
++    exit_id = Runtime1::monitorexit_nofpu_id;
++  }
++  __ lipc(RA, _continuation);
++  __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
++}
++
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
++
++void PatchingStub::align_patch_site(MacroAssembler* masm) {
++}
++
++void PatchingStub::emit_code(LIR_Assembler* ce) {
++  assert(false, "LoongArch64 should not use C1 runtime patching");
++}
++
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  ce->store_parameter(_trap_request, 0);
++  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  DEBUG_ONLY(__ should_not_reach_here());
++}
++
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
++  address a;
++  if (_info->deoptimize_on_exception()) {
++    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
++    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  } else {
++    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
++  }
++
++  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  __ bind(_entry);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++
++  __ bind(_entry);
++  // pass the object in a scratch register because all other registers
++  // must be preserved
++  if (_obj->is_cpu_register()) {
++    __ move(SCR1, _obj->as_register());
++  }
++  __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
++  //---------------slow case: call to native-----------------
++  __ bind(_entry);
++  // Figure out where the args should go
++  // This should really convert the IntrinsicID to the Method* and signature
++  // but I don't know how to do that.
++  //
++  VMRegPair args[5];
++  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
++  SharedRuntime::java_calling_convention(signature, args, 5, true);
++
++  // push parameters
++  // (src, src_pos, dest, destPos, length)
++  Register r[5];
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
++
++  // next registers will get stored on the stack
++  for (int i = 0; i < 5 ; i++ ) {
++    VMReg r_1 = args[i].first();
++    if (r_1->is_stack()) {
++      int st_off = r_1->reg2stack() * wordSize;
++      __ stptr_d (r[i], SP, st_off);
++    } else {
++      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
++    }
++  }
++
++  ce->align_call(lir_static_call);
++
++  ce->emit_static_call_stub();
++  if (ce->compilation()->bailed_out()) {
++    return; // CodeCache is full
++  }
++  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
++                         relocInfo::static_call_type);
++  address call = __ trampoline_call(resolve);
++  if (call == NULL) {
++    ce->bailout("trampoline stub overflow");
++    return;
++  }
++  ce->add_call_info_here(info());
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt);
++    __ increment(Address(SCR2));
++  }
++#endif
++
++  __ b(_continuation);
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp
+new file mode 100644
+index 0000000000..1140e44431
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++
++// native word offsets from memory address (little endian)
++enum {
++  pd_lo_word_offset_in_bytes = 0,
++  pd_hi_word_offset_in_bytes = BytesPerWord
++};
++
++// explicit rounding operations are required to implement the strictFP mode
++enum {
++  pd_strict_fp_requires_explicit_rounding = false
++};
++
++// FIXME: There are no callee-saved
++
++// registers
++enum {
++  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,      // number of registers used during code emission
++  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission
++
++  pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls
++  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls
++
++  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
++  pd_last_callee_saved_reg = 21,
++
++  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
++
++  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
++  pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator
++
++  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
++  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
++  pd_nof_xmm_regs_linearscan = 0,  // don't have vector registers
++  pd_first_cpu_reg = 0,
++  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_byte_reg = 0,
++  pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
++  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
++
++  pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg,
++  pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg,
++};
++
++// Encoding of float value in debug info.  This is true on x86 where
++// floats are extended to doubles when stored in the stack, false for
++// LoongArch64 where floats and doubles are stored in their native form.
++enum {
++  pd_float_saved_as_double = false
++};
++
++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
+new file mode 100644
+index 0000000000..bd8578c72a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++
++// No FPU stack on LoongArch
++class FpuStackSim;
++
++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp
+new file mode 100644
+index 0000000000..1a89c437a8
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++//--------------------------------------------------------
++//               FpuStackSim
++//--------------------------------------------------------
++
++// No FPU stack on LoongArch64
++#include "precompiled.hpp"
+diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp
+new file mode 100644
+index 0000000000..4f0cf05361
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++
++//  On LoongArch64 the frame looks as follows:
++//
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++
++ public:
++  static const int pd_c_runtime_reserved_arg_size;
++
++  enum {
++    first_available_sp_in_frame = 0,
++    frame_pad_in_bytes = 16,
++    nof_reg_args = 8
++  };
++
++ public:
++  static LIR_Opr receiver_opr;
++
++  static LIR_Opr r0_opr;
++  static LIR_Opr ra_opr;
++  static LIR_Opr tp_opr;
++  static LIR_Opr sp_opr;
++  static LIR_Opr a0_opr;
++  static LIR_Opr a1_opr;
++  static LIR_Opr a2_opr;
++  static LIR_Opr a3_opr;
++  static LIR_Opr a4_opr;
++  static LIR_Opr a5_opr;
++  static LIR_Opr a6_opr;
++  static LIR_Opr a7_opr;
++  static LIR_Opr t0_opr;
++  static LIR_Opr t1_opr;
++  static LIR_Opr t2_opr;
++  static LIR_Opr t3_opr;
++  static LIR_Opr t4_opr;
++  static LIR_Opr t5_opr;
++  static LIR_Opr t6_opr;
++  static LIR_Opr t7_opr;
++  static LIR_Opr t8_opr;
++  static LIR_Opr rx_opr;
++  static LIR_Opr fp_opr;
++  static LIR_Opr s0_opr;
++  static LIR_Opr s1_opr;
++  static LIR_Opr s2_opr;
++  static LIR_Opr s3_opr;
++  static LIR_Opr s4_opr;
++  static LIR_Opr s5_opr;
++  static LIR_Opr s6_opr;
++  static LIR_Opr s7_opr;
++  static LIR_Opr s8_opr;
++
++  static LIR_Opr ra_oop_opr;
++  static LIR_Opr a0_oop_opr;
++  static LIR_Opr a1_oop_opr;
++  static LIR_Opr a2_oop_opr;
++  static LIR_Opr a3_oop_opr;
++  static LIR_Opr a4_oop_opr;
++  static LIR_Opr a5_oop_opr;
++  static LIR_Opr a6_oop_opr;
++  static LIR_Opr a7_oop_opr;
++  static LIR_Opr t0_oop_opr;
++  static LIR_Opr t1_oop_opr;
++  static LIR_Opr t2_oop_opr;
++  static LIR_Opr t3_oop_opr;
++  static LIR_Opr t4_oop_opr;
++  static LIR_Opr t5_oop_opr;
++  static LIR_Opr t6_oop_opr;
++  static LIR_Opr t7_oop_opr;
++  static LIR_Opr t8_oop_opr;
++  static LIR_Opr fp_oop_opr;
++  static LIR_Opr s0_oop_opr;
++  static LIR_Opr s1_oop_opr;
++  static LIR_Opr s2_oop_opr;
++  static LIR_Opr s3_oop_opr;
++  static LIR_Opr s4_oop_opr;
++  static LIR_Opr s5_oop_opr;
++  static LIR_Opr s6_oop_opr;
++  static LIR_Opr s7_oop_opr;
++  static LIR_Opr s8_oop_opr;
++
++  static LIR_Opr scr1_opr;
++  static LIR_Opr scr2_opr;
++  static LIR_Opr scr1_long_opr;
++  static LIR_Opr scr2_long_opr;
++
++  static LIR_Opr a0_metadata_opr;
++  static LIR_Opr a1_metadata_opr;
++  static LIR_Opr a2_metadata_opr;
++  static LIR_Opr a3_metadata_opr;
++  static LIR_Opr a4_metadata_opr;
++  static LIR_Opr a5_metadata_opr;
++
++  static LIR_Opr long0_opr;
++  static LIR_Opr long1_opr;
++  static LIR_Opr fpu0_float_opr;
++  static LIR_Opr fpu0_double_opr;
++
++  static LIR_Opr as_long_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++  static LIR_Opr as_pointer_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++
++  // VMReg name for spilled physical FPU stack slot n
++  static VMReg fpu_regname (int n);
++
++  static bool is_caller_save_register(LIR_Opr opr) { return true; }
++  static bool is_caller_save_register(Register r) { return true; }
++
++  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
++  static int last_cpu_reg() { return pd_last_cpu_reg;  }
++  static int last_byte_reg() { return pd_last_byte_reg; }
++
++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
+new file mode 100644
+index 0000000000..3b60899071
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
+@@ -0,0 +1,354 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIR.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
++  LIR_Opr opr = LIR_OprFact::illegalOpr;
++  VMReg r_1 = reg->first();
++  VMReg r_2 = reg->second();
++  if (r_1->is_stack()) {
++    // Convert stack slot to an SP offset
++    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
++    // so we must add it in here.
++    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
++  } else if (r_1->is_Register()) {
++    Register reg = r_1->as_Register();
++    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
++      Register reg2 = r_2->as_Register();
++      assert(reg2 == reg, "must be same register");
++      opr = as_long_opr(reg);
++    } else if (is_reference_type(type)) {
++      opr = as_oop_opr(reg);
++    } else if (type == T_METADATA) {
++      opr = as_metadata_opr(reg);
++    } else if (type == T_ADDRESS) {
++      opr = as_address_opr(reg);
++    } else {
++      opr = as_opr(reg);
++    }
++  } else if (r_1->is_FloatRegister()) {
++    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
++    int num = r_1->as_FloatRegister()->encoding();
++    if (type == T_FLOAT) {
++      opr = LIR_OprFact::single_fpu(num);
++    } else {
++      opr = LIR_OprFact::double_fpu(num);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++  return opr;
++}
++
++LIR_Opr FrameMap::r0_opr;
++LIR_Opr FrameMap::ra_opr;
++LIR_Opr FrameMap::tp_opr;
++LIR_Opr FrameMap::sp_opr;
++LIR_Opr FrameMap::a0_opr;
++LIR_Opr FrameMap::a1_opr;
++LIR_Opr FrameMap::a2_opr;
++LIR_Opr FrameMap::a3_opr;
++LIR_Opr FrameMap::a4_opr;
++LIR_Opr FrameMap::a5_opr;
++LIR_Opr FrameMap::a6_opr;
++LIR_Opr FrameMap::a7_opr;
++LIR_Opr FrameMap::t0_opr;
++LIR_Opr FrameMap::t1_opr;
++LIR_Opr FrameMap::t2_opr;
++LIR_Opr FrameMap::t3_opr;
++LIR_Opr FrameMap::t4_opr;
++LIR_Opr FrameMap::t5_opr;
++LIR_Opr FrameMap::t6_opr;
++LIR_Opr FrameMap::t7_opr;
++LIR_Opr FrameMap::t8_opr;
++LIR_Opr FrameMap::rx_opr;
++LIR_Opr FrameMap::fp_opr;
++LIR_Opr FrameMap::s0_opr;
++LIR_Opr FrameMap::s1_opr;
++LIR_Opr FrameMap::s2_opr;
++LIR_Opr FrameMap::s3_opr;
++LIR_Opr FrameMap::s4_opr;
++LIR_Opr FrameMap::s5_opr;
++LIR_Opr FrameMap::s6_opr;
++LIR_Opr FrameMap::s7_opr;
++LIR_Opr FrameMap::s8_opr;
++
++LIR_Opr FrameMap::receiver_opr;
++
++LIR_Opr FrameMap::ra_oop_opr;
++LIR_Opr FrameMap::a0_oop_opr;
++LIR_Opr FrameMap::a1_oop_opr;
++LIR_Opr FrameMap::a2_oop_opr;
++LIR_Opr FrameMap::a3_oop_opr;
++LIR_Opr FrameMap::a4_oop_opr;
++LIR_Opr FrameMap::a5_oop_opr;
++LIR_Opr FrameMap::a6_oop_opr;
++LIR_Opr FrameMap::a7_oop_opr;
++LIR_Opr FrameMap::t0_oop_opr;
++LIR_Opr FrameMap::t1_oop_opr;
++LIR_Opr FrameMap::t2_oop_opr;
++LIR_Opr FrameMap::t3_oop_opr;
++LIR_Opr FrameMap::t4_oop_opr;
++LIR_Opr FrameMap::t5_oop_opr;
++LIR_Opr FrameMap::t6_oop_opr;
++LIR_Opr FrameMap::t7_oop_opr;
++LIR_Opr FrameMap::t8_oop_opr;
++LIR_Opr FrameMap::fp_oop_opr;
++LIR_Opr FrameMap::s0_oop_opr;
++LIR_Opr FrameMap::s1_oop_opr;
++LIR_Opr FrameMap::s2_oop_opr;
++LIR_Opr FrameMap::s3_oop_opr;
++LIR_Opr FrameMap::s4_oop_opr;
++LIR_Opr FrameMap::s5_oop_opr;
++LIR_Opr FrameMap::s6_oop_opr;
++LIR_Opr FrameMap::s7_oop_opr;
++LIR_Opr FrameMap::s8_oop_opr;
++
++LIR_Opr FrameMap::scr1_opr;
++LIR_Opr FrameMap::scr2_opr;
++LIR_Opr FrameMap::scr1_long_opr;
++LIR_Opr FrameMap::scr2_long_opr;
++
++LIR_Opr FrameMap::a0_metadata_opr;
++LIR_Opr FrameMap::a1_metadata_opr;
++LIR_Opr FrameMap::a2_metadata_opr;
++LIR_Opr FrameMap::a3_metadata_opr;
++LIR_Opr FrameMap::a4_metadata_opr;
++LIR_Opr FrameMap::a5_metadata_opr;
++
++LIR_Opr FrameMap::long0_opr;
++LIR_Opr FrameMap::long1_opr;
++LIR_Opr FrameMap::fpu0_float_opr;
++LIR_Opr FrameMap::fpu0_double_opr;
++
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 };
++
++//--------------------------------------------------------
++//               FrameMap
++//--------------------------------------------------------
++
++void FrameMap::initialize() {
++  assert(!_init_done, "once");
++  int i = 0;
++
++  // caller save register
++  map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // callee save register
++  map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // special register
++  map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
++  map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread
++  map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp
++  map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp
++  map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra
++  map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
++
++  // tmp register
++  map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1
++  map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2
++
++  scr1_opr = t7_opr;
++  scr2_opr = t4_opr;
++  scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr());
++  scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr());
++
++  long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr());
++  long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr());
++
++  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
++  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
++
++  // scr1, scr2 not included
++  _caller_save_cpu_regs[0] = a0_opr;
++  _caller_save_cpu_regs[1] = a1_opr;
++  _caller_save_cpu_regs[2] = a2_opr;
++  _caller_save_cpu_regs[3] = a3_opr;
++  _caller_save_cpu_regs[4] = a4_opr;
++  _caller_save_cpu_regs[5] = a5_opr;
++  _caller_save_cpu_regs[6] = a6_opr;
++  _caller_save_cpu_regs[7] = a7_opr;
++  _caller_save_cpu_regs[8] = t0_opr;
++  _caller_save_cpu_regs[9] = t1_opr;
++  _caller_save_cpu_regs[10] = t2_opr;
++  _caller_save_cpu_regs[11] = t3_opr;
++  _caller_save_cpu_regs[12] = t5_opr;
++  _caller_save_cpu_regs[13] = t6_opr;
++  _caller_save_cpu_regs[14] = t8_opr;
++
++  for (int i = 0; i < 8; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++
++  _init_done = true;
++
++  ra_oop_opr = as_oop_opr(RA);
++  a0_oop_opr = as_oop_opr(A0);
++  a1_oop_opr = as_oop_opr(A1);
++  a2_oop_opr = as_oop_opr(A2);
++  a3_oop_opr = as_oop_opr(A3);
++  a4_oop_opr = as_oop_opr(A4);
++  a5_oop_opr = as_oop_opr(A5);
++  a6_oop_opr = as_oop_opr(A6);
++  a7_oop_opr = as_oop_opr(A7);
++  t0_oop_opr = as_oop_opr(T0);
++  t1_oop_opr = as_oop_opr(T1);
++  t2_oop_opr = as_oop_opr(T2);
++  t3_oop_opr = as_oop_opr(T3);
++  t4_oop_opr = as_oop_opr(T4);
++  t5_oop_opr = as_oop_opr(T5);
++  t6_oop_opr = as_oop_opr(T6);
++  t7_oop_opr = as_oop_opr(T7);
++  t8_oop_opr = as_oop_opr(T8);
++  fp_oop_opr = as_oop_opr(FP);
++  s0_oop_opr = as_oop_opr(S0);
++  s1_oop_opr = as_oop_opr(S1);
++  s2_oop_opr = as_oop_opr(S2);
++  s3_oop_opr = as_oop_opr(S3);
++  s4_oop_opr = as_oop_opr(S4);
++  s5_oop_opr = as_oop_opr(S5);
++  s6_oop_opr = as_oop_opr(S6);
++  s7_oop_opr = as_oop_opr(S7);
++  s8_oop_opr = as_oop_opr(S8);
++
++  a0_metadata_opr = as_metadata_opr(A0);
++  a1_metadata_opr = as_metadata_opr(A1);
++  a2_metadata_opr = as_metadata_opr(A2);
++  a3_metadata_opr = as_metadata_opr(A3);
++  a4_metadata_opr = as_metadata_opr(A4);
++  a5_metadata_opr = as_metadata_opr(A5);
++
++  sp_opr = as_pointer_opr(SP);
++  fp_opr = as_pointer_opr(FP);
++
++  VMRegPair regs;
++  BasicType sig_bt = T_OBJECT;
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
++  receiver_opr = as_oop_opr(regs.first()->as_Register());
++
++  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++}
++
++Address FrameMap::make_new_address(ByteSize sp_offset) const {
++  // for sp, based address use this:
++  // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4);
++  return Address(SP, in_bytes(sp_offset));
++}
++
++// ----------------mapping-----------------------
++// all mapping is based on fp addressing, except for simple leaf methods where we access
++// the locals sp based (and no frame is built)
++
++// Frame for simple leaf methods (quick entries)
++//
++//   +----------+
++//   | ret addr |   <- TOS
++//   +----------+
++//   | args     |
++//   | ......   |
++
++// Frame for standard methods
++//
++//   | .........|  <- TOS
++//   | locals   |
++//   +----------+
++//   |  old fp, |  <- RFP
++//   +----------+
++//   | ret addr |
++//   +----------+
++//   |  args    |
++//   | .........|
++
++// For OopMaps, map a local variable or spill index to an VMRegImpl name.
++// This is the offset from sp() in the frame of the slot for the index,
++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
++//
++//           framesize +
++//           stack0         stack0          0  <- VMReg
++//             |              | <registers> |
++//  ...........|..............|.............|
++//      0 1 2 3 x x 4 5 6 ... |                <- local indices
++//      ^           ^        sp()                 ( x x indicate link
++//      |           |                               and return addr)
++//  arguments   non-argument locals
++
++VMReg FrameMap::fpu_regname(int n) {
++  // Return the OptoReg name for the fpu stack slot "n"
++  // A spilled fpu stack slot comprises to two single-word OptoReg's.
++  return as_FloatRegister(n)->as_VMReg();
++}
++
++LIR_Opr FrameMap::stack_pointer() {
++  return FrameMap::sp_opr;
++}
++
++// JSR 292
++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
++  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
++}
++
++bool FrameMap::validate_frame() {
++  return true;
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..40d9408f1f
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++
++// ArrayCopyStub needs access to bailout
++friend class ArrayCopyStub;
++
++ private:
++  int array_element_size(BasicType type) const;
++
++  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                int dest_index, bool pop_fpu_stack);
++
++  // helper functions which checks for overflow and sets bailout if it
++  // occurs.  Always returns a valid embeddable pointer but in the
++  // bailout case the pointer won't be to unique storage.
++  address float_constant(float f);
++  address double_constant(double d);
++
++  address int_constant(jlong n);
++
++  bool is_literal_address(LIR_Address* addr);
++
++  // Ensure we have a valid Address (base+offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
++  // Record the type of the receiver in ReceiverTypeData
++  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                           Register recv, Label* update_done);
++  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
++
++  void casw(Register addr, Register newval, Register cmpval, bool sign);
++  void casl(Register addr, Register newval, Register cmpval);
++
++  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
++
++  static const int max_tableswitches = 20;
++  struct tableswitch switches[max_tableswitches];
++  int tableswitch_count;
++
++  void init() { tableswitch_count = 0; }
++
++  void deoptimize_trap(CodeEmitInfo *info);
++
++  enum {
++    // call stub: CompiledStaticCall::to_interp_stub_size() +
++    //            CompiledStaticCall::to_trampoline_stub_size()
++    _call_stub_size = 13 * NativeInstruction::nop_instruction_size,
++    _call_aot_stub_size = 0,
++    _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
++    _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size
++  };
++
++public:
++  void store_parameter(Register r, int offset_from_sp_in_words);
++  void store_parameter(jint c,     int offset_from_sp_in_words);
++  void store_parameter(jobject c,  int offset_from_sp_in_words);
++
++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp
+new file mode 100644
+index 0000000000..c989e25c3a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp
+@@ -0,0 +1,3387 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArrayKlass.hpp"
++#include "ci/ciInstance.hpp"
++#include "code/compiledIC.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++NEEDS_CLEANUP // remove this definitions?
++
++#define __ _masm->
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp2 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2);
++}
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2,
++                                       Register &tmp3) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp2 = extra;
++  } else if (tmp3 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp3 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2, tmp3);
++}
++
++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
++
++LIR_Opr LIR_Assembler::receiverOpr() {
++  return FrameMap::receiver_opr;
++}
++
++LIR_Opr LIR_Assembler::osrBufferPointer() {
++  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
++}
++
++//--------------fpu register translations-----------------------
++
++address LIR_Assembler::float_constant(float f) {
++  address const_addr = __ float_constant(f);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++address LIR_Assembler::double_constant(double d) {
++  address const_addr = __ double_constant(d);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
++void LIR_Assembler::breakpoint() { Unimplemented(); }
++
++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
++
++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
++
++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
++
++static Register as_reg(LIR_Opr op) {
++  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
++}
++
++static jlong as_long(LIR_Opr data) {
++  jlong result;
++  switch (data->type()) {
++  case T_INT:
++    result = (data->as_jint());
++    break;
++  case T_LONG:
++    result = (data->as_jlong());
++    break;
++  default:
++    ShouldNotReachHere();
++    result = 0; // unreachable
++  }
++  return result;
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr) {
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr opr = addr->index();
++  if (opr->is_cpu_register()) {
++    Register index;
++    if (opr->is_single_cpu())
++      index = opr->as_register();
++    else
++      index = opr->as_register_lo();
++    assert(addr->disp() == 0, "must be");
++    return Address(base, index, Address::ScaleFactor(addr->scale()));
++  } else {
++    assert(addr->scale() == 0, "must be");
++    return Address(base, addr->disp());
++  }
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
++  ShouldNotReachHere();
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
++  return as_Address(addr); // Ouch
++  // FIXME: This needs to be much more clever. See x86.
++}
++
++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
++// not encodable as a base + (immediate) offset, generate an explicit address
++// calculation to hold the address in a temporary register.
++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
++  precond(size == 4 || size == 8);
++  Address addr = frame_map()->address_for_slot(index, adjust);
++  precond(addr.index() == noreg);
++  precond(addr.base() == SP);
++  precond(addr.disp() > 0);
++  uint mask = size - 1;
++  assert((addr.disp() & mask) == 0, "scaled offsets only");
++  return addr;
++}
++
++void LIR_Assembler::osr_entry() {
++  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
++  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
++  ValueStack* entry_state = osr_entry->state();
++  int number_of_locks = entry_state->locks_size();
++
++  // we jump here if osr happens with the interpreter
++  // state set up to continue at the beginning of the
++  // loop that triggered osr - in particular, we have
++  // the following registers setup:
++  //
++  // A2: osr buffer
++  //
++
++  // build frame
++  ciMethod* m = compilation()->method();
++  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
++
++  // OSR buffer is
++  //
++  // locals[nlocals-1..0]
++  // monitors[0..number_of_locks]
++  //
++  // locals is a direct copy of the interpreter frame so in the osr buffer
++  // so first slot in the local array is the last local from the interpreter
++  // and last slot is local[0] (receiver) from the interpreter
++  //
++  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
++  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
++  // in the interpreter frame (the method lock if a sync method)
++
++  // Initialize monitors in the compiled activation.
++  //   A2: pointer to osr buffer
++  //
++  // All other registers are dead at this point and the locals will be
++  // copied into place by code emitted in the IR.
++
++  Register OSR_buf = osrBufferPointer()->as_pointer_register();
++  {
++    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
++    int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1);
++    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
++    // the OSR buffer using 2 word entries: first the lock and then
++    // the oop.
++    for (int i = 0; i < number_of_locks; i++) {
++      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
++#ifdef ASSERT
++      // verify the interpreter's monitor has a non-null object
++      {
++        Label L;
++        __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
++        __ bnez(SCR1, L);
++        __ stop("locked object is NULL");
++        __ bind(L);
++      }
++#endif
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0));
++      __ st_ptr(S0, frame_map()->address_for_monitor_lock(i));
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++      __ st_ptr(S0, frame_map()->address_for_monitor_object(i));
++    }
++  }
++}
++
++// inline cache check; done before the frame is built.
++int LIR_Assembler::check_icache() {
++  Register receiver = FrameMap::receiver_opr->as_register();
++  Register ic_klass = IC_Klass;
++  int start_offset = __ offset();
++  Label dont;
++
++  __ verify_oop(receiver);
++
++  // explicit NULL check not needed since load from [klass_offset] causes a trap
++  // check against inline cache
++  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
++         "must add explicit null check");
++
++  __ load_klass(SCR2, receiver);
++  __ beq(SCR2, ic_klass, dont);
++
++  // if icache check fails, then jump to runtime routine
++  // Note: RECEIVER must still contain the receiver!
++  __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++
++  // We align the verified entry point unless the method body
++  // (including its inline cache check) will fit in a single 64-byte
++  // icache line.
++  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
++    // force alignment after the cache check.
++    __ align(CodeEntryAlignment);
++  }
++
++  __ bind(dont);
++  return start_offset;
++}
++
++void LIR_Assembler::jobject2reg(jobject o, Register reg) {
++  if (o == NULL) {
++    __ move(reg, R0);
++  } else {
++    int oop_index = __ oop_recorder()->find_index(o);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(reg, (long)o);
++  }
++}
++
++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
++  deoptimize_trap(info);
++}
++
++// This specifies the rsp decrement needed to build the frame
++int LIR_Assembler::initial_frame_size_in_bytes() const {
++  // if rounding, must let FrameMap know!
++  return in_bytes(frame_map()->framesize_in_bytes());
++}
++
++int LIR_Assembler::emit_exception_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(exception_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("exception handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  // the exception oop and pc are in A0, and A1
++  // no other registers need to be preserved, so invalidate them
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // check that there is really an exception
++  __ verify_not_null_oop(A0);
++
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
++  __ should_not_reach_here();
++  guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++// Emit the code to remove the frame from the stack in the exception unwind path.
++int LIR_Assembler::emit_unwind_handler() {
++#ifndef PRODUCT
++  if (CommentedAssembly) {
++    _masm->block_comment("Unwind handler");
++  }
++#endif
++
++  int offset = code_offset();
++
++  // Fetch the exception from TLS and clear out exception related thread state
++  __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset()));
++
++  __ bind(_unwind_handler_entry);
++  __ verify_not_null_oop(V0);
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(S0, V0);  // Preserve the exception
++  }
++
++  // Perform needed unlocking
++  MonitorExitStub* stub = NULL;
++  if (method()->is_synchronized()) {
++    monitor_address(0, FrameMap::a0_opr);
++    stub = new MonitorExitStub(FrameMap::a0_opr, true, 0);
++    __ unlock_object(A5, A4, A0, *stub->entry());
++    __ bind(*stub->continuation());
++  }
++
++  if (compilation()->env()->dtrace_method_probes()) {
++    __ mov_metadata(A1, method()->constant_encoding());
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1);
++  }
++
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(A0, S0);  // Restore the exception
++  }
++
++  // remove the activation and dispatch to the unwind handler
++  __ block_comment("remove_frame and dispatch to the unwind handler");
++  __ remove_frame(initial_frame_size_in_bytes());
++  __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
++
++  // Emit the slow path assembly
++  if (stub != NULL) {
++    stub->emit_code(this);
++  }
++
++  return offset;
++}
++
++int LIR_Assembler::emit_deopt_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(deopt_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("deopt handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
++  guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
++void LIR_Assembler::return_op(LIR_Opr result) {
++  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0,
++         "word returns are in V0,");
++
++  // Pop the stack before the safepoint code
++  __ remove_frame(initial_frame_size_in_bytes());
++
++  if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset()));
++  } else {
++    __ li(SCR2, os::get_polling_page());
++  }
++  __ relocate(relocInfo::poll_return_type);
++  __ ld_w(SCR1, SCR2, 0);
++  __ jr(RA);
++}
++
++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  guarantee(info != NULL, "Shouldn't be NULL");
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset()));
++  } else {
++    __ li(SCR2, os::get_polling_page());
++  }
++  add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map
++  __ relocate(relocInfo::poll_type);
++  __ ld_w(SCR1, SCR2, 0);
++  return __ offset();
++}
++
++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
++  __ move(to_reg, from_reg);
++}
++
++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
++
++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(src->is_constant(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++
++  switch (c->type()) {
++    case T_INT:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_ADDRESS:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_LONG:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register_lo(), (intptr_t)c->as_jlong());
++      break;
++    case T_OBJECT:
++      if (patch_code == lir_patch_none) {
++        jobject2reg(c->as_jobject(), dest->as_register());
++      } else {
++        jobject2reg_with_patching(dest->as_register(), info);
++      }
++      break;
++    case T_METADATA:
++      if (patch_code != lir_patch_none) {
++        klass2reg_with_patching(dest->as_register(), info);
++      } else {
++        __ mov_metadata(dest->as_register(), c->as_metadata());
++      }
++      break;
++    case T_FLOAT:
++      __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat())));
++      __ fld_s(dest->as_float_reg(), SCR1, 0);
++      break;
++    case T_DOUBLE:
++      __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble())));
++      __ fld_d(dest->as_double_reg(), SCR1, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
++  LIR_Const* c = src->as_constant_ptr();
++  switch (c->type()) {
++  case T_OBJECT:
++    if (!c->as_jobject())
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++      reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++    }
++    break;
++  case T_ADDRESS:
++    const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++    reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++  case T_INT:
++  case T_FLOAT:
++    if (c->as_jint_bits() == 0)
++      __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      __ li(SCR2, c->as_jint_bits());
++      __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix()));
++    }
++    break;
++  case T_LONG:
++  case T_DOUBLE:
++    if (c->as_jlong_bits() == 0)
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    else {
++      __ li(SCR2, (intptr_t)c->as_jlong_bits());
++      __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
++                              CodeEmitInfo* info, bool wide) {
++  assert(src->is_constant(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++  LIR_Address* to_addr = dest->as_address_ptr();
++
++  void (Assembler::* insn)(Register Rt, Address adr);
++
++  switch (type) {
++  case T_ADDRESS:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_LONG:
++    assert(c->as_jlong() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_INT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_w;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    assert(c->as_jobject() == 0, "should be");
++    if (UseCompressedOops && !wide) {
++      insn = &Assembler::st_w;
++    } else {
++      insn = &Assembler::st_d;
++    }
++    break;
++  case T_CHAR:
++  case T_SHORT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_h;
++    break;
++  case T_BOOLEAN:
++  case T_BYTE:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_b;
++    break;
++  default:
++    ShouldNotReachHere();
++    insn = &Assembler::st_d;  // unreachable
++  }
++
++  if (info) add_debug_info_for_null_check_here(info);
++  (_masm->*insn)(R0, as_Address(to_addr));
++}
++
++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
++  assert(src->is_register(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  // move between cpu-registers
++  if (dest->is_single_cpu()) {
++    if (src->type() == T_LONG) {
++      // Can do LONG -> OBJECT
++      move_regs(src->as_register_lo(), dest->as_register());
++      return;
++    }
++    assert(src->is_single_cpu(), "must match");
++    if (src->type() == T_OBJECT) {
++      __ verify_oop(src->as_register());
++    }
++    move_regs(src->as_register(), dest->as_register());
++  } else if (dest->is_double_cpu()) {
++    if (is_reference_type(src->type())) {
++      // Surprising to me but we can see move of a long to t_object
++      __ verify_oop(src->as_register());
++      move_regs(src->as_register(), dest->as_register_lo());
++      return;
++    }
++    assert(src->is_double_cpu(), "must match");
++    Register f_lo = src->as_register_lo();
++    Register f_hi = src->as_register_hi();
++    Register t_lo = dest->as_register_lo();
++    Register t_hi = dest->as_register_hi();
++    assert(f_hi == f_lo, "must be same");
++    assert(t_hi == t_lo, "must be same");
++    move_regs(f_lo, t_lo);
++  } else if (dest->is_single_fpu()) {
++    __ fmov_s(dest->as_float_reg(), src->as_float_reg());
++  } else if (dest->is_double_fpu()) {
++    __ fmov_d(dest->as_double_reg(), src->as_double_reg());
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
++  precond(src->is_register() && dest->is_stack());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (src->is_single_cpu()) {
++    int index = dest->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(src->as_register());
++    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ st_w(src->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (src->is_double_cpu()) {
++    int index = dest->double_stack_ix();
++    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ st_ptr(src->as_register_lo(), dest_addr_LO);
++  } else if (src->is_single_fpu()) {
++    int index = dest->single_stack_ix();
++    __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (src->is_double_fpu()) {
++    int index = dest->double_stack_ix();
++    __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++  LIR_Address* to_addr = dest->as_address_ptr();
++  PatchingStub* patch = NULL;
++  Register compressed_src = SCR2;
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (is_reference_type(type)) {
++    __ verify_oop(src->as_register());
++
++    if (UseCompressedOops && !wide) {
++      __ encode_heap_oop(compressed_src, src->as_register());
++    } else {
++      compressed_src = src->as_register();
++    }
++  }
++
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fst_s(src->as_float_reg(), as_Address(to_addr));
++      break;
++    case T_DOUBLE:
++      __ fst_d(src->as_double_reg(), as_Address(to_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ st_w(compressed_src, as_Address(to_addr));
++      } else {
++         __ st_ptr(compressed_src, as_Address(to_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_ADDRESS:
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_INT:
++      __ st_w(src->as_register(), as_Address(to_addr));
++      break;
++    case T_LONG:
++      __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr));
++      break;
++    case T_BYTE: // fall through
++    case T_BOOLEAN:
++      __ st_b(src->as_register(), as_Address(to_addr));
++      break;
++    case T_CHAR: // fall through
++    case T_SHORT:
++      __ st_h(src->as_register(), as_Address(to_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (info != NULL) {
++    add_debug_info_for_null_check(null_check_here, info);
++  }
++}
++
++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  precond(src->is_stack() && dest->is_register());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (dest->is_single_cpu()) {
++    int index = src->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(dest->as_register());
++    } else if (type == T_METADATA || type == T_ADDRESS) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (dest->is_double_cpu()) {
++    int index = src->double_stack_ix();
++    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ ld_ptr(dest->as_register_lo(), src_addr_LO);
++  } else if (dest->is_single_fpu()) {
++    int index = src->single_stack_ix();
++    __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (dest->is_double_fpu()) {
++    int index = src->double_stack_ix();
++    __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  LIR_Opr temp;
++
++  if (type == T_LONG || type == T_DOUBLE)
++    temp = FrameMap::scr1_long_opr;
++  else
++    temp = FrameMap::scr1_opr;
++
++  stack2reg(src, temp, src->type());
++  reg2stack(temp, dest, dest->type(), false);
++}
++
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool wide, bool /* unaligned */) {
++  LIR_Address* addr = src->as_address_ptr();
++  LIR_Address* from_addr = src->as_address_ptr();
++
++  if (addr->base()->type() == T_OBJECT) {
++    __ verify_oop(addr->base()->as_pointer_register());
++  }
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fld_s(dest->as_float_reg(), as_Address(from_addr));
++      break;
++    case T_DOUBLE:
++      __ fld_d(dest->as_double_reg(), as_Address(from_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++         __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_ADDRESS:
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_INT:
++      __ ld_w(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_LONG:
++      __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr));
++      break;
++    case T_BYTE:
++      __ ld_b(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_BOOLEAN:
++      __ ld_bu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_CHAR:
++      __ ld_hu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_SHORT:
++      __ ld_h(dest->as_register(), as_Address(from_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  if (is_reference_type(type)) {
++    if (UseCompressedOops && !wide) {
++      __ decode_heap_oop(dest->as_register());
++    }
++
++    if (!UseZGC) {
++      // Load barrier has not yet been applied, so ZGC can't verify the oop here
++      __ verify_oop(dest->as_register());
++    }
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
++    }
++  }
++}
++
++int LIR_Assembler::array_element_size(BasicType type) const {
++  int elem_size = type2aelembytes(type);
++  return exact_log2(elem_size);
++}
++
++void LIR_Assembler::emit_op3(LIR_Op3* op) {
++  switch (op->code()) {
++  case lir_idiv:
++  case lir_irem:
++    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
++                    op->result_opr(), op->info());
++    break;
++  case lir_fmad:
++    __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(),
++               op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg());
++    break;
++  case lir_fmaf:
++    __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(),
++               op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg());
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++}
++
++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  assert(op->cond() == lir_cond_always, "must be");
++#endif
++
++  if (op->info() != NULL)
++    add_debug_info_for_branch(op->info());
++
++  __ b_far(*(op->label()));
++}
++
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
++#endif
++
++  if (op->info() != NULL) {
++    assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
++           "shouldn't be codeemitinfo for non-address operands");
++    add_debug_info_for_null_check_here(op->info()); // exception possible
++  }
++
++  Label& L = *(op->label());
++  Assembler::Condition acond;
++  LIR_Opr opr1 = op->in_opr1();
++  LIR_Opr opr2 = op->in_opr2();
++  assert(op->condition() != lir_cond_always, "must be");
++
++  if (op->code() == lir_cmp_float_branch) {
++    bool is_unordered = (op->ublock() == op->block());
++    if (opr1->is_single_fpu()) {
++      FloatRegister reg1 = opr1->as_float_reg();
++      assert(opr2->is_single_fpu(), "expect single float register");
++      FloatRegister reg2 = opr2->as_float_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_s(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_s(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else if (opr1->is_double_fpu()) {
++      FloatRegister reg1 = opr1->as_double_reg();
++      assert(opr2->is_double_fpu(), "expect double float register");
++      FloatRegister reg2 = opr2->as_double_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_d(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_d(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ bcnez(FCC0, L);
++  } else {
++    if (opr1->is_constant() && opr2->is_single_cpu()) {
++      // tableswitch
++      Unimplemented();
++    } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
++      Register reg1 = as_reg(opr1);
++      Register reg2 = noreg;
++      jlong imm2 = 0;
++      if (opr2->is_single_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register();
++      } else if (opr2->is_double_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register_lo();
++      } else if (opr2->is_constant()) {
++        switch(opr2->type()) {
++        case T_INT:
++        case T_ADDRESS:
++          imm2 = opr2->as_constant_ptr()->as_jint();
++          break;
++        case T_LONG:
++          imm2 = opr2->as_constant_ptr()->as_jlong();
++          break;
++        case T_METADATA:
++          imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata();
++          break;
++        case T_OBJECT:
++        case T_ARRAY:
++          if (opr2->as_constant_ptr()->as_jobject() != NULL) {
++            reg2 = SCR1;
++            jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2);
++          } else {
++            reg2 = R0;
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++          break;
++        }
++      } else {
++        ShouldNotReachHere();
++      }
++      if (reg2 == noreg) {
++        if (imm2 == 0) {
++          reg2 = R0;
++        } else {
++          reg2 = SCR1;
++          __ li(reg2, imm2);
++        }
++      }
++      switch (op->condition()) {
++        case lir_cond_equal:
++          __ beq_far(reg1, reg2, L); break;
++        case lir_cond_notEqual:
++          __ bne_far(reg1, reg2, L); break;
++        case lir_cond_less:
++          __ blt_far(reg1, reg2, L, true); break;
++        case lir_cond_lessEqual:
++          __ bge_far(reg2, reg1, L, true); break;
++        case lir_cond_greaterEqual:
++          __ bge_far(reg1, reg2, L, true); break;
++        case lir_cond_greater:
++          __ blt_far(reg2, reg1, L, true); break;
++        case lir_cond_belowEqual:
++          __ bge_far(reg2, reg1, L, false); break;
++        case lir_cond_aboveEqual:
++          __ bge_far(reg1, reg2, L, false); break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++  }
++}
++
++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
++  LIR_Opr src  = op->in_opr();
++  LIR_Opr dest = op->result_opr();
++  LIR_Opr tmp  = op->tmp();
++
++  switch (op->bytecode()) {
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(dest->as_float_reg(), src->as_register());
++      __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(dest->as_double_reg(), src->as_register());
++      __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo());
++      __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo());
++      __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg());
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg());
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0);
++      break;
++    case Bytecodes::_i2l:
++      _masm->block_comment("FIXME: This could be a no-op");
++      __ slli_w(dest->as_register_lo(), src->as_register(), 0);
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(dest->as_register(), src->as_register_lo(), 0);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg());
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_double_reg());
++      break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
++  if (op->init_check()) {
++    __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
++    __ li(SCR2, InstanceKlass::fully_initialized);
++    add_debug_info_for_null_check_here(op->stub()->info());
++    __ bne_far(SCR1, SCR2, *op->stub()->entry());
++  }
++  __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(),
++                     op->tmp2()->as_register(), op->header_size(),
++                     op->object_size(), op->klass()->as_register(),
++                     *op->stub()->entry());
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
++  Register len =  op->len()->as_register();
++  if (UseSlowPath ||
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
++    __ b(*op->stub()->entry());
++  } else {
++    Register tmp1 = op->tmp1()->as_register();
++    Register tmp2 = op->tmp2()->as_register();
++    Register tmp3 = op->tmp3()->as_register();
++    if (len == tmp1) {
++      tmp1 = tmp3;
++    } else if (len == tmp2) {
++      tmp2 = tmp3;
++    } else if (len == tmp3) {
++      // everything is ok
++    } else {
++      __ move(tmp3, len);
++    }
++    __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2,
++                      arrayOopDesc::header_size(op->type()),
++                      array_element_size(op->type()),
++                      op->klass()->as_register(),
++                      *op->stub()->entry());
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                                        Register recv, Label* update_done) {
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    // See if the receiver is receiver[n].
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    __ ld_ptr(SCR1, Address(SCR2));
++    __ bne(recv, SCR1, next_test);
++    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
++    __ ld_ptr(SCR2, data_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, data_addr);
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++
++  // Didn't find receiver; find next empty slot and fill it in
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    Address recv_addr(SCR2);
++    __ ld_ptr(SCR1, recv_addr);
++    __ bnez(SCR1, next_test);
++    __ st_ptr(recv, recv_addr);
++    __ li(SCR1, DataLayout::counter_increment);
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
++    __ st_ptr(SCR1, Address(SCR2));
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++}
++
++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success,
++                                          Label* failure, Label* obj_is_null) {
++  // we always need a stub for the failure case.
++  CodeStub* stub = op->stub();
++  Register obj = op->object()->as_register();
++  Register k_RInfo = op->tmp1()->as_register();
++  Register klass_RInfo = op->tmp2()->as_register();
++  Register dst = op->result_opr()->as_register();
++  ciKlass* k = op->klass();
++  Register Rtmp1 = noreg;
++
++  // check if it needs to be profiled
++  ciMethodData* md;
++  ciProfileData* data;
++
++  const bool should_profile = op->should_profile();
++
++  if (should_profile) {
++    ciMethod* method = op->profiled_method();
++    assert(method != NULL, "Should have method");
++    int bci = op->profiled_bci();
++    md = method->method_data_or_null();
++    assert(md != NULL, "Sanity");
++    data = md->bci_to_data(bci);
++    assert(data != NULL, "need data for type check");
++    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++  }
++
++  Label profile_cast_success, profile_cast_failure;
++  Label *success_target = should_profile ? &profile_cast_success : success;
++  Label *failure_target = should_profile ? &profile_cast_failure : failure;
++
++  if (obj == k_RInfo) {
++    k_RInfo = dst;
++  } else if (obj == klass_RInfo) {
++    klass_RInfo = dst;
++  }
++  if (k->is_loaded() && !UseCompressedClassPointers) {
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
++  } else {
++    Rtmp1 = op->tmp3()->as_register();
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
++  }
++
++  assert_different_registers(obj, k_RInfo, klass_RInfo);
++
++  if (should_profile) {
++    Label not_null;
++    __ bnez(obj, not_null);
++    // Object is null; update MDO and exit
++    Register mdo = klass_RInfo;
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++    __ ld_bu(SCR2, data_addr);
++    __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++    __ st_b(SCR2, data_addr);
++    __ b(*obj_is_null);
++    __ bind(not_null);
++  } else {
++    __ beqz(obj, *obj_is_null);
++  }
++
++  if (!k->is_loaded()) {
++    klass2reg_with_patching(k_RInfo, op->info_for_patch());
++  } else {
++    __ mov_metadata(k_RInfo, k->constant_encoding());
++  }
++  __ verify_oop(obj);
++
++  if (op->fast_check()) {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(SCR2, obj);
++    __ bne_far(SCR2, k_RInfo, *failure_target);
++    // successful cast, fall through to profile or jump
++  } else {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(klass_RInfo, obj);
++    if (k->is_loaded()) {
++      // See if we get an immediate positive hit
++      __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset())));
++      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
++        __ bne_far(k_RInfo, SCR1, *failure_target);
++        // successful cast, fall through to profile or jump
++      } else {
++        // See if we get an immediate positive hit
++        __ beq_far(k_RInfo, SCR1, *success_target);
++        // check for self
++        __ beq_far(klass_RInfo, k_RInfo, *success_target);
++
++        __ addi_d(SP, SP, -2 * wordSize);
++        __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++        __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++        __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize));
++        __ addi_d(SP, SP, 2 * wordSize);
++        // result is a boolean
++        __ beqz(klass_RInfo, *failure_target);
++        // successful cast, fall through to profile or jump
++      }
++    } else {
++      // perform the fast part of the checking logic
++      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++      // result is a boolean
++      __ beqz(k_RInfo, *failure_target);
++      // successful cast, fall through to profile or jump
++    }
++  }
++  if (should_profile) {
++    Register mdo = klass_RInfo, recv = k_RInfo;
++    __ bind(profile_cast_success);
++    __ mov_metadata(mdo, md->constant_encoding());
++    __ load_klass(recv, obj);
++    Label update_done;
++    type_profile_helper(mdo, md, data, recv, success);
++    __ b(*success);
++
++    __ bind(profile_cast_failure);
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, -DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++    __ b(*failure);
++  }
++  __ b(*success);
++}
++
++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
++  const bool should_profile = op->should_profile();
++
++  LIR_Code code = op->code();
++  if (code == lir_store_check) {
++    Register value = op->object()->as_register();
++    Register array = op->array()->as_register();
++    Register k_RInfo = op->tmp1()->as_register();
++    Register klass_RInfo = op->tmp2()->as_register();
++    Register Rtmp1 = op->tmp3()->as_register();
++    CodeStub* stub = op->stub();
++
++    // check if it needs to be profiled
++    ciMethodData* md;
++    ciProfileData* data;
++
++    if (should_profile) {
++      ciMethod* method = op->profiled_method();
++      assert(method != NULL, "Should have method");
++      int bci = op->profiled_bci();
++      md = method->method_data_or_null();
++      assert(md != NULL, "Sanity");
++      data = md->bci_to_data(bci);
++      assert(data != NULL, "need data for type check");
++      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++    }
++    Label profile_cast_success, profile_cast_failure, done;
++    Label *success_target = should_profile ? &profile_cast_success : &done;
++    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
++
++    if (should_profile) {
++      Label not_null;
++      __ bnez(value, not_null);
++      // Object is null; update MDO and exit
++      Register mdo = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++      __ ld_bu(SCR2, data_addr);
++      __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++      __ st_b(SCR2, data_addr);
++      __ b(done);
++      __ bind(not_null);
++    } else {
++      __ beqz(value, done);
++    }
++
++    add_debug_info_for_null_check_here(op->info_for_exception());
++    __ load_klass(k_RInfo, array);
++    __ load_klass(klass_RInfo, value);
++
++    // get instance klass (it's already uncompressed)
++    __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
++    // perform the fast part of the checking logic
++    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++    __ addi_d(SP, SP, -2 * wordSize);
++    __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++    __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ addi_d(SP, SP, 2 * wordSize);
++    // result is a boolean
++    __ beqz(k_RInfo, *failure_target);
++    // fall through to the success case
++
++    if (should_profile) {
++      Register mdo = klass_RInfo, recv = k_RInfo;
++      __ bind(profile_cast_success);
++      __ mov_metadata(mdo, md->constant_encoding());
++      __ load_klass(recv, value);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &done);
++      __ b(done);
++
++      __ bind(profile_cast_failure);
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++      __ lea(SCR2, counter_addr);
++      __ ld_ptr(SCR1, Address(SCR2));
++      __ addi_d(SCR1, SCR1, -DataLayout::counter_increment);
++      __ st_ptr(SCR1, Address(SCR2));
++      __ b(*stub->entry());
++    }
++
++    __ bind(done);
++  } else if (code == lir_checkcast) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success;
++    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
++    __ bind(success);
++    if (dst != obj) {
++      __ move(dst, obj);
++    }
++  } else if (code == lir_instanceof) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success, failure, done;
++    emit_typecheck_helper(op, &success, &failure, &failure);
++    __ bind(failure);
++    __ move(dst, R0);
++    __ b(done);
++    __ bind(success);
++    __ li(dst, 1);
++    __ bind(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) {
++  __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign,
++               /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1,
++             /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
++  assert(VM_Version::supports_cx8(), "wrong machine");
++  Register addr;
++  if (op->addr()->is_register()) {
++    addr = as_reg(op->addr());
++  } else {
++    assert(op->addr()->is_address(), "what else?");
++    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
++    assert(addr_ptr->disp() == 0, "need 0 disp");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    addr = as_reg(addr_ptr->base());
++  }
++  Register newval = as_reg(op->new_value());
++  Register cmpval = as_reg(op->cmp_value());
++
++  if (op->code() == lir_cas_obj) {
++    if (UseCompressedOops) {
++      Register t1 = op->tmp1()->as_register();
++      assert(op->tmp1()->is_valid(), "must be");
++      __ encode_heap_oop(t1, cmpval);
++      cmpval = t1;
++      __ encode_heap_oop(SCR2, newval);
++      newval = SCR2;
++      casw(addr, newval, cmpval, false);
++    } else {
++      casl(addr, newval, cmpval);
++    }
++  } else if (op->code() == lir_cas_int) {
++    casw(addr, newval, cmpval, true);
++  } else {
++    casl(addr, newval, cmpval);
++  }
++}
++
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
++                          LIR_Opr result, BasicType type) {
++  Unimplemented();
++}
++
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
++                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result");
++  assert(left->is_single_cpu() || left->is_double_cpu(), "must be");
++  Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register();
++  Register regl = as_reg(left);
++  Register regr = noreg;
++  Register reg1 = noreg;
++  Register reg2 = noreg;
++  jlong immr = 0;
++
++  // comparison operands
++  if (right->is_single_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register();
++  } else if (right->is_double_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register_lo();
++  } else if (right->is_constant()) {
++    switch(right->type()) {
++    case T_INT:
++    case T_ADDRESS:
++      immr = right->as_constant_ptr()->as_jint();
++      break;
++    case T_LONG:
++      immr = right->as_constant_ptr()->as_jlong();
++      break;
++    case T_METADATA:
++      immr = (intptr_t)right->as_constant_ptr()->as_metadata();
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (right->as_constant_ptr()->as_jobject() != NULL) {
++        regr = SCR1;
++        jobject2reg(right->as_constant_ptr()->as_jobject(), regr);
++      } else {
++        immr = 0;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (regr == noreg) {
++    switch (condition) {
++    case lir_cond_equal:
++    case lir_cond_notEqual:
++      if (!Assembler::is_simm(-immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++      break;
++    default:
++      if (!Assembler::is_simm(immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++    }
++  }
++
++  // special cases
++  if (src1->is_constant() && src2->is_constant()) {
++    jlong val1 = 0, val2 = 0;
++    if (src1->type() == T_INT && src2->type() == T_INT) {
++      val1 = src1->as_jint();
++      val2 = src2->as_jint();
++    } else if (src1->type() == T_LONG && src2->type() == T_LONG) {
++      val1 = src1->as_jlong();
++      val2 = src2->as_jlong();
++    }
++    if (val1 == 0 && val2 == 1) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    }
++  }
++
++  // cmp
++  if (regr == noreg) {
++    switch (condition) {
++      case lir_cond_equal:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_notEqual:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_less:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_lessEqual:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greater:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_belowEqual:
++        __ li(SCR1, immr);
++        __ sltu(SCR2, SCR1, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltui(SCR2, regl, immr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    switch (condition) {
++      case lir_cond_equal:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_notEqual:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_less:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_lessEqual:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greater:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_belowEqual:
++        __ sltu(SCR2, regr, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltu(SCR2, regl, regr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // value operands
++  if (src1->is_stack()) {
++    stack2reg(src1, result, result->type());
++    reg1 = regd;
++  } else if (src1->is_constant()) {
++    const2reg(src1, result, lir_patch_none, NULL);
++    reg1 = regd;
++  } else {
++    reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register();
++  }
++
++  if (src2->is_stack()) {
++    stack2reg(src2, FrameMap::scr1_opr, result->type());
++    reg2 = SCR1;
++  } else if (src2->is_constant()) {
++    LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr;
++    const2reg(src2, tmp, lir_patch_none, NULL);
++    reg2 = SCR1;
++  } else {
++    reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register();
++  }
++
++  // cmove
++  switch (condition) {
++    case lir_cond_equal:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_notEqual:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_less:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_lessEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greater:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greaterEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_belowEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_aboveEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  __ OR(regd, regd, SCR2);
++}
++
++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
++                             CodeEmitInfo* info, bool pop_fpu_stack) {
++  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
++
++  if (left->is_single_cpu()) {
++    Register lreg = left->as_register();
++    Register dreg = as_reg(dest);
++
++    if (right->is_single_cpu()) {
++      // cpu register - cpu register
++      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
++      Register rreg = right->as_register();
++      switch (code) {
++        case lir_add: __ add_w (dest->as_register(), lreg, rreg); break;
++        case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break;
++        case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_double_cpu()) {
++      Register rreg = right->as_register_lo();
++      // single_cpu + double_cpu: can happen with obj+long
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      switch (code) {
++        case lir_add: __ add_d(dreg, lreg, rreg); break;
++        case lir_sub: __ sub_d(dreg, lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_constant()) {
++      // cpu register - constant
++      jlong c;
++
++      // FIXME: This is fugly: we really need to factor all this logic.
++      switch(right->type()) {
++        case T_LONG:
++          c = right->as_constant_ptr()->as_jlong();
++          break;
++        case T_INT:
++        case T_ADDRESS:
++          c = right->as_constant_ptr()->as_jint();
++          break;
++        default:
++          ShouldNotReachHere();
++          c = 0; // unreachable
++          break;
++      }
++
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg) {
++        COMMENT("effective nop elided");
++        return;
++      }
++
++      switch(left->type()) {
++        case T_INT:
++          switch (code) {
++            case lir_add: __ addi_w(dreg, lreg, c); break;
++            case lir_sub: __ addi_w(dreg, lreg, -c); break;
++            default:      ShouldNotReachHere();
++          }
++          break;
++        case T_OBJECT:
++        case T_ADDRESS:
++          switch (code) {
++          case lir_add: __ addi_d(dreg, lreg, c); break;
++          case lir_sub: __ addi_d(dreg, lreg, -c); break;
++          default:      ShouldNotReachHere();
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_double_cpu()) {
++    Register lreg_lo = left->as_register_lo();
++
++    if (right->is_double_cpu()) {
++      // cpu register - cpu register
++      Register rreg_lo = right->as_register_lo();
++      switch (code) {
++        case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        default:      ShouldNotReachHere();
++      }
++
++    } else if (right->is_constant()) {
++      jlong c = right->as_constant_ptr()->as_jlong();
++      Register dreg = as_reg(dest);
++      switch (code) {
++        case lir_add:
++        case lir_sub:
++          if (c == 0 && dreg == lreg_lo) {
++            COMMENT("effective nop elided");
++            return;
++          }
++          code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c);
++          break;
++        case lir_div:
++          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move lreg_lo to dreg if divisor is 1
++            __ move(dreg, lreg_lo);
++          } else {
++            unsigned int shift = exact_log2_long(c);
++            // use scr1 as intermediate result register
++            __ srai_d(SCR1, lreg_lo, 63);
++            __ srli_d(SCR1, SCR1, 64 - shift);
++            __ add_d(SCR1, lreg_lo, SCR1);
++            __ srai_d(dreg, SCR1, shift);
++          }
++          break;
++        case lir_rem:
++          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move 0 to dreg if divisor is 1
++            __ move(dreg, R0);
++          } else {
++            // use scr1/2 as intermediate result register
++            __ sub_d(SCR1, R0, lreg_lo);
++            __ slt(SCR2, SCR1, R0);
++            __ andi(dreg, lreg_lo, c - 1);
++            __ andi(SCR1, SCR1, c - 1);
++            __ sub_d(SCR1, R0, SCR1);
++            __ maskeqz(dreg, dreg, SCR2);
++            __ masknez(SCR1, SCR1, SCR2);
++            __ OR(dreg, dreg, SCR1);
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_fpu()) {
++    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
++    switch (code) {
++      case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      default:      ShouldNotReachHere();
++    }
++  } else if (left->is_double_fpu()) {
++    if (right->is_double_fpu()) {
++      // fpu register - fpu register
++      switch (code) {
++        case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_mul_strictfp: // fall through
++        case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_div_strictfp: // fall through
++        case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        default:      ShouldNotReachHere();
++      }
++    } else {
++      if (right->is_constant()) {
++        ShouldNotReachHere();
++      }
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_stack() || left->is_address()) {
++    assert(left == dest, "left and dest must be equal");
++    ShouldNotReachHere();
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                             int dest_index, bool pop_fpu_stack) {
++  Unimplemented();
++}
++
++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
++  switch(code) {
++    case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
++    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
++    default      : ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
++  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
++  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++   if (dst->is_single_cpu()) {
++     Register Rdst = dst->as_register();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jint(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jint());
++           } else {
++             __ li(AT, right->as_jint());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   } else {
++     Register Rdst = dst->as_register_lo();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jlong(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jlong());
++           } else {
++             // We can guarantee that transform from HIR LogicOp is in range of
++             // uimm(12), but the common code directly generates LIR LogicAnd,
++             // and the right-operand is mask with all ones in the high bits.
++             __ li(AT, right->as_jlong());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   }
++}
++
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
++                                    LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
++  // opcode check
++  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
++  bool is_irem = (code == lir_irem);
++
++  // operand check
++  assert(left->is_single_cpu(), "left must be register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
++  assert(result->is_single_cpu(), "result must be register");
++  Register lreg = left->as_register();
++  Register dreg = result->as_register();
++
++  // power-of-2 constant check and codegen
++  if (right->is_constant()) {
++    int c = right->as_constant_ptr()->as_jint();
++    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++    if (is_irem) {
++      if (c == 1) {
++        // move 0 to dreg if divisor is 1
++        __ move(dreg, R0);
++      } else {
++        // use scr1/2 as intermediate result register
++        __ sub_w(SCR1, R0, lreg);
++        __ slt(SCR2, SCR1, R0);
++        __ andi(dreg, lreg, c - 1);
++        __ andi(SCR1, SCR1, c - 1);
++        __ sub_w(SCR1, R0, SCR1);
++        __ maskeqz(dreg, dreg, SCR2);
++        __ masknez(SCR1, SCR1, SCR2);
++        __ OR(dreg, dreg, SCR1);
++      }
++    } else {
++      if (c == 1) {
++        // move lreg to dreg if divisor is 1
++        __ move(dreg, lreg);
++      } else {
++        unsigned int shift = exact_log2(c);
++        // use scr1 as intermediate result register
++        __ srai_w(SCR1, lreg, 31);
++        __ srli_w(SCR1, SCR1, 32 - shift);
++        __ add_w(SCR1, lreg, SCR1);
++        __ srai_w(dreg, SCR1, shift);
++      }
++    }
++  } else {
++    Register rreg = right->as_register();
++    if (is_irem)
++      __ mod_w(dreg, lreg, rreg);
++    else
++      __ div_w(dreg, lreg, rreg);
++  }
++}
++
++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
++  Unimplemented();
++}
++
++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
++  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
++    bool is_unordered_less = (code == lir_ucmp_fd2i);
++    if (left->is_single_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      } else {
++        __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      }
++    } else if (left->is_double_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      } else {
++        __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ movcf2gr(dst->as_register(), FCC0);
++    __ movcf2gr(SCR1, FCC1);
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else if (code == lir_cmp_l2i) {
++    __ slt(SCR1, left->as_register_lo(), right->as_register_lo());
++    __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo());
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::align_call(LIR_Code code) {}
++
++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
++  address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
++  address call = __ ic_call(op->addr());
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::emit_static_call_stub() {
++  address call_pc = __ pc();
++  address stub = __ start_a_stub(call_stub_size());
++  if (stub == NULL) {
++    bailout("static call stub overflow");
++    return;
++  }
++
++  int start = __ offset();
++
++  __ relocate(static_stub_Relocation::spec(call_pc));
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++  __ patchable_jump(__ pc());
++
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(),
++         "stub too big");
++  __ end_a_stub();
++}
++
++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  assert(exceptionPC->as_register() == A1, "must match");
++
++  // exception object is not added to oop map by LinearScan
++  // (LinearScan assumes that no oops are in fixed registers)
++  info->add_register_oop(exceptionOop);
++  Runtime1::StubID unwind_id;
++
++  // get current pc information
++  // pc is only needed if the method has an exception handler, the unwind code does not need it.
++  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
++    // As no instructions have been generated yet for this LIR node it's
++    // possible that an oop map already exists for the current offset.
++    // In that case insert an dummy NOP here to ensure all oop map PCs
++    // are unique. See JDK-8237483.
++    __ nop();
++  }
++  Label L;
++  int pc_for_athrow_offset = __ offset();
++  __ bind(L);
++  __ lipc(exceptionPC->as_register(), L);
++  add_call_info(pc_for_athrow_offset, info); // for exception handler
++
++  __ verify_not_null_oop(A0);
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  if (compilation()->has_fpu_code()) {
++    unwind_id = Runtime1::handle_exception_id;
++  } else {
++    unwind_id = Runtime1::handle_exception_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type);
++
++  // FIXME: enough room for two byte trap   ????
++  __ nop();
++}
++
++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  __ b(_unwind_handler_entry);
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ sll_w(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_w(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ sll_d(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_d(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ slli_w(dreg, lreg, count); break;
++        case lir_shr:  __ srai_w(dreg, lreg, count); break;
++        case lir_ushr: __ srli_w(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ slli_d(dreg, lreg, count); break;
++        case lir_shr:  __ srai_d(dreg, lreg, count); break;
++        case lir_ushr: __ srli_d(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ st_ptr(r, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jint c,     int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ li(SCR2, c);
++  __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jobject o,  int offset_from_sp_in_words) {
++  ShouldNotReachHere();
++}
++
++// This code replaces a call to arraycopy; no exception may
++// be thrown in this code, they must be thrown in the System.arraycopy
++// activation frame; we could save some checks if this would not be the case
++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++
++  ciArrayKlass* default_type = op->expected_type();
++  Register src = op->src()->as_register();
++  Register dst = op->dst()->as_register();
++  Register src_pos = op->src_pos()->as_register();
++  Register dst_pos = op->dst_pos()->as_register();
++  Register length  = op->length()->as_register();
++  Register tmp = op->tmp()->as_register();
++
++  CodeStub* stub = op->stub();
++  int flags = op->flags();
++  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
++  if (is_reference_type(basic_type))
++    basic_type = T_OBJECT;
++
++  // if we don't know anything, just go through the generic arraycopy
++  if (default_type == NULL) {
++    Label done;
++    assert(src == T0 && src_pos == A0, "mismatch in calling convention");
++
++    // Save the arguments in case the generic arraycopy fails and we
++    // have to fall back to the JNI stub
++    __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    address copyfunc_addr = StubRoutines::generic_arraycopy();
++    assert(copyfunc_addr != NULL, "generic arraycopy stub required");
++
++    // The arguments are in java calling convention so we shift them
++    // to C convention
++    assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
++    __ move(A4, j_rarg4);
++    assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2);
++    __ move(A3, j_rarg3);
++    assert_different_registers(A2, j_rarg0, j_rarg1);
++    __ move(A2, j_rarg2);
++    assert_different_registers(A1, j_rarg0);
++    __ move(A1, j_rarg1);
++    __ move(A0, j_rarg0);
++#ifndef PRODUCT
++    if (PrintC1Statistics) {
++      __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt);
++      __ increment(SCR2, 1);
++    }
++#endif
++    __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++    __ beqz(A0, *stub->continuation());
++    __ move(tmp, A0);
++
++    // Reload values from the stack so they are where the stub
++    // expects them.
++    __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    // tmp is -1^K where K == partial copied count
++    __ nor(SCR1, tmp, R0);
++    // adjust length down and src/end pos up by partial copied count
++    __ sub_w(length, length, SCR1);
++    __ add_w(src_pos, src_pos, SCR1);
++    __ add_w(dst_pos, dst_pos, SCR1);
++    __ b(*stub->entry());
++
++    __ bind(*stub->continuation());
++    return;
++  }
++
++  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
++         "must be true at this point");
++
++  int elem_size = type2aelembytes(basic_type);
++  Address::ScaleFactor scale = Address::times(elem_size);
++
++  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
++  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
++  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
++  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
++
++  // test for NULL
++  if (flags & LIR_OpArrayCopy::src_null_check) {
++    __ beqz(src, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_null_check) {
++    __ beqz(dst, *stub->entry());
++  }
++
++  // If the compiler was not able to prove that exact type of the source or the destination
++  // of the arraycopy is an array type, check at runtime if the source or the destination is
++  // an instance type.
++  if (flags & LIR_OpArrayCopy::type_check) {
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
++      __ load_klass(tmp, dst);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, (jlong) Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
++      __ load_klass(tmp, src);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, (jlong) Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++  }
++
++  // check if negative
++  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
++    __ blt_far(src_pos, R0, *stub->entry(), true);
++  }
++  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
++    __ blt_far(dst_pos, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::length_positive_check) {
++    __ blt_far(length, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::src_range_check) {
++    __ add_w(tmp, src_pos, length);
++    __ ld_wu(SCR1, src_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++  if (flags & LIR_OpArrayCopy::dst_range_check) {
++    __ add_w(tmp, dst_pos, length);
++    __ ld_wu(SCR1, dst_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++
++  if (flags & LIR_OpArrayCopy::type_check) {
++    // We don't know the array types are compatible
++    if (basic_type != T_OBJECT) {
++      // Simple test for basic type arrays
++      if (UseCompressedClassPointers) {
++        __ ld_wu(tmp, src_klass_addr);
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(tmp, src_klass_addr);
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne_far(tmp, SCR1, *stub->entry());
++    } else {
++      // For object arrays, if src is a sub class of dst then we can
++      // safely do the copy.
++      Label cont, slow;
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++
++      __ load_klass(src, src);
++      __ load_klass(dst, dst);
++
++      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      __ bnez(dst, cont);
++
++      __ bind(slow);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
++      if (copyfunc_addr != NULL) { // use stub if available
++        // src is not a sub class of dst so we have to do a
++        // per-element check.
++
++        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
++        if ((flags & mask) != mask) {
++          // Check that at least both of them object arrays.
++          assert(flags & mask, "one of the two should be known to be an object array");
++
++          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
++            __ load_klass(tmp, src);
++          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
++            __ load_klass(tmp, dst);
++          }
++          int lh_offset = in_bytes(Klass::layout_helper_offset());
++          Address klass_lh_addr(tmp, lh_offset);
++          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++          __ ld_w(SCR1, klass_lh_addr);
++          __ li(SCR2, objArray_lh);
++          __ XOR(SCR1, SCR1, SCR2);
++          __ bnez(SCR1, *stub->entry());
++        }
++
++        // Spill because stubs can use any register they like and it's
++        // easier to restore just those that we care about.
++        __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        __ lea(A0, Address(src, src_pos, scale));
++        __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A0, dst, dst_pos, length);
++        __ load_klass(A4, dst);
++        assert_different_registers(A4, dst, dst_pos, length);
++        __ lea(A1, Address(dst, dst_pos, scale));
++        __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A1, length);
++        __ bstrpick_d(A2, length, 31, 0);
++        __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset()));
++        __ ld_w(A3, Address(A4, Klass::super_check_offset_offset()));
++        __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          Label failed;
++          __ bnez(A0, failed);
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt);
++          __ increment(SCR2, 1);
++          __ bind(failed);
++        }
++#endif
++
++        __ beqz(A0, *stub->continuation());
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
++          __ increment(SCR2, 1);
++        }
++#endif
++        assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1);
++        __ move(tmp, A0);
++
++        // Restore previously spilled arguments
++        __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        // return value is -1^K where K is partial copied count
++        __ nor(SCR1, tmp, R0);
++        // adjust length down and src/end pos up by partial copied count
++        __ sub_w(length, length, SCR1);
++        __ add_w(src_pos, src_pos, SCR1);
++        __ add_w(dst_pos, dst_pos, SCR1);
++      }
++
++      __ b(*stub->entry());
++
++      __ bind(cont);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++    }
++  }
++
++#ifdef ASSERT
++  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
++    // Sanity check the known type with the incoming class.  For the
++    // primitive case the types must match exactly with src.klass and
++    // dst.klass each exactly matching the default type.  For the
++    // object array case, if no type check is needed then either the
++    // dst type is exactly the expected type and the src type is a
++    // subtype which we can't check or src is the same array as dst
++    // but not necessarily exactly of type default_type.
++    Label known_ok, halt;
++    __ mov_metadata(tmp, default_type->constant_encoding());
++    if (UseCompressedClassPointers) {
++      __ encode_klass_not_null(tmp);
++    }
++
++    if (basic_type != T_OBJECT) {
++
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne(tmp, SCR1, halt);
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, src_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, src_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++    } else {
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++      __ beq(src, dst, known_ok);
++    }
++    __ bind(halt);
++    __ stop("incorrect type information in arraycopy");
++    __ bind(known_ok);
++  }
++#endif
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, Runtime1::arraycopy_count_address(basic_type));
++    __ increment(SCR2, 1);
++  }
++#endif
++
++  __ lea(A0, Address(src, src_pos, scale));
++  __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A0, dst, dst_pos, length);
++  __ lea(A1, Address(dst, dst_pos, scale));
++  __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A1, length);
++  __ bstrpick_d(A2, length, 31, 0);
++
++  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
++  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
++  const char *name;
++  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
++
++  CodeBlob *cb = CodeCache::find_blob(entry);
++  if (cb) {
++    __ call(entry, relocInfo::runtime_call_type);
++  } else {
++    __ call_VM_leaf(entry, 3);
++  }
++
++  __ bind(*stub->continuation());
++}
++
++void LIR_Assembler::emit_lock(LIR_OpLock* op) {
++  Register obj = op->obj_opr()->as_register(); // may not be an oop
++  Register hdr = op->hdr_opr()->as_register();
++  Register lock = op->lock_opr()->as_register();
++  if (!UseFastLocking) {
++    __ b(*op->stub()->entry());
++  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    // add debug info for NullPointerException only if one is possible
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    if (op->info() != NULL) {
++      add_debug_info_for_null_check(null_check_offset, op->info());
++    }
++    // done
++  } else if (op->code() == lir_unlock) {
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
++  } else {
++    Unimplemented();
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
++  ciMethod* method = op->profiled_method();
++  ciMethod* callee = op->profiled_callee();
++  int bci = op->profiled_bci();
++
++  // Update counter for all call types
++  ciMethodData* md = method->method_data_or_null();
++  assert(md != NULL, "Sanity");
++  ciProfileData* data = md->bci_to_data(bci);
++  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
++  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
++  Register mdo  = op->mdo()->as_register();
++  __ mov_metadata(mdo, md->constant_encoding());
++  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++  // Perform additional virtual call profiling for invokevirtual and
++  // invokeinterface bytecodes
++  if (op->should_profile_receiver_type()) {
++    assert(op->recv()->is_single_cpu(), "recv must be allocated");
++    Register recv = op->recv()->as_register();
++    assert_different_registers(mdo, recv);
++    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
++    ciKlass* known_klass = op->known_holder();
++    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
++      // We know the type that will be seen at this call site; we can
++      // statically update the MethodData* rather than needing to do
++      // dynamic tests on the receiver type
++
++      // NOTE: we should probably put a lock around this search to
++      // avoid collisions by concurrent compilations
++      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
++      uint i;
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (known_klass->equals(receiver)) {
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++
++      // Receiver type not found in profile data; select an empty slot
++
++      // Note that this is less efficient than it should be because it
++      // always does a write to the receiver part of the
++      // VirtualCallData rather than just the first time
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (receiver == NULL) {
++          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
++          __ mov_metadata(SCR2, known_klass->constant_encoding());
++          __ lea(SCR1, recv_addr);
++          __ st_ptr(SCR2, SCR1, 0);
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR1, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++    } else {
++      __ load_klass(recv, recv);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &update_done);
++      // Receiver did not match any saved receiver and there is no empty row for it.
++      // Increment total counter to indicate polymorphic case.
++      __ ld_ptr(SCR2, counter_addr);
++      __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++      __ st_ptr(SCR2, counter_addr);
++
++      __ bind(update_done);
++    }
++  } else {
++    // Static call
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++  }
++}
++
++void LIR_Assembler::emit_delay(LIR_OpDelay*) {
++  Unimplemented();
++}
++
++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
++  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
++}
++
++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
++  assert(op->crc()->is_single_cpu(), "crc must be register");
++  assert(op->val()->is_single_cpu(), "byte value must be register");
++  assert(op->result_opr()->is_single_cpu(), "result must be register");
++  Register crc = op->crc()->as_register();
++  Register val = op->val()->as_register();
++  Register res = op->result_opr()->as_register();
++
++  assert_different_registers(val, crc, res);
++  __ li(res, StubRoutines::crc_table_addr());
++  __ nor(crc, crc, R0); // ~crc
++  __ update_byte_crc32(crc, val, res);
++  __ nor(res, crc, R0); // ~crc
++}
++
++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
++  COMMENT("emit_profile_type {");
++  Register obj = op->obj()->as_register();
++  Register tmp = op->tmp()->as_pointer_register();
++  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++  ciKlass* exact_klass = op->exact_klass();
++  intptr_t current_klass = op->current_klass();
++  bool not_null = op->not_null();
++  bool no_conflict = op->no_conflict();
++
++  Label update, next, none;
++
++  bool do_null = !not_null;
++  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++  assert(do_null || do_update, "why are we here?");
++  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++  assert(mdo_addr.base() != SCR1, "wrong register");
++
++  __ verify_oop(obj);
++
++  if (tmp != obj) {
++    __ move(tmp, obj);
++  }
++  if (do_null) {
++    __ bnez(tmp, update);
++    if (!TypeEntries::was_null_seen(current_klass)) {
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::null_seen);
++      __ st_ptr(SCR2, mdo_addr);
++    }
++    if (do_update) {
++#ifndef ASSERT
++      __ b(next);
++    }
++#else
++      __ b(next);
++    }
++  } else {
++    __ bnez(tmp, update);
++    __ stop("unexpected null obj");
++#endif
++  }
++
++  __ bind(update);
++
++  if (do_update) {
++#ifdef ASSERT
++    if (exact_klass != NULL) {
++      Label ok;
++      __ load_klass(tmp, tmp);
++      __ mov_metadata(SCR1, exact_klass->constant_encoding());
++      __ XOR(SCR1, tmp, SCR1);
++      __ beqz(SCR1, ok);
++      __ stop("exact klass and actual klass differ");
++      __ bind(ok);
++    }
++#endif
++    if (!no_conflict) {
++      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++        if (exact_klass != NULL) {
++          __ mov_metadata(tmp, exact_klass->constant_encoding());
++        } else {
++          __ load_klass(tmp, tmp);
++        }
++
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        // klass seen before, nothing to do. The unknown bit may have been
++        // set already but no need to check.
++        __ beqz(SCR1, next);
++
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        if (TypeEntries::is_type_none(current_klass)) {
++          __ beqz(SCR2, none);
++          __ li(SCR1, (u1)TypeEntries::null_seen);
++          __ beq(SCR2, SCR1, none);
++          // There is a chance that the checks above (re-reading profiling
++          // data from memory) fail if another thread has just set the
++          // profiling to this obj's klass
++          membar_acquire();
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(tmp, tmp, SCR2);
++          assert(TypeEntries::type_klass_mask == -4, "must be");
++          __ bstrpick_d(SCR1, tmp, 63, 2);
++          __ beqz(SCR1, next);
++        }
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR2, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR2, next); // already unknown. Nothing to do anymore.
++      }
++
++      // different than before. Cannot keep accurate profile.
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::type_unknown);
++      __ st_ptr(SCR2, mdo_addr);
++
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ b(next);
++
++        __ bind(none);
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      }
++    } else {
++      // There's a single possible klass at this profile point
++      assert(exact_klass != NULL, "should be");
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ mov_metadata(tmp, exact_klass->constant_encoding());
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        __ beqz(SCR1, next);
++#ifdef ASSERT
++        {
++          Label ok;
++          __ ld_ptr(SCR1, mdo_addr);
++          __ beqz(SCR1, ok);
++          __ li(SCR2, (u1)TypeEntries::null_seen);
++          __ beq(SCR1, SCR2, ok);
++          // may have been set by another thread
++          membar_acquire();
++          __ mov_metadata(SCR1, exact_klass->constant_encoding());
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(SCR2, SCR1, SCR2);
++          assert(TypeEntries::type_mask == -2, "must be");
++          __ bstrpick_d(SCR2, SCR2, 63, 1);
++          __ beqz(SCR2, ok);
++
++          __ stop("unexpected profiling mismatch");
++          __ bind(ok);
++        }
++#endif
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        __ ori(tmp, tmp, TypeEntries::type_unknown);
++        __ st_ptr(tmp, mdo_addr);
++        // FIXME: Write barrier needed here?
++      }
++    }
++
++    __ bind(next);
++  }
++  COMMENT("} emit_profile_type");
++}
++
++void LIR_Assembler::align_backward_branch_target() {}
++
++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
++  // tmp must be unused
++  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
++
++  if (left->is_single_cpu()) {
++    assert(dest->is_single_cpu(), "expect single result reg");
++    __ sub_w(dest->as_register(), R0, left->as_register());
++  } else if (left->is_double_cpu()) {
++    assert(dest->is_double_cpu(), "expect double result reg");
++    __ sub_d(dest->as_register_lo(), R0, left->as_register_lo());
++  } else if (left->is_single_fpu()) {
++    assert(dest->is_single_fpu(), "expect single float result reg");
++    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
++  } else {
++    assert(left->is_double_fpu(), "expect double float operand reg");
++    assert(dest->is_double_fpu(), "expect double float result reg");
++    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
++  }
++}
++
++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code,
++                         CodeEmitInfo* info) {
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
++}
++
++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args,
++                            LIR_Opr tmp, CodeEmitInfo* info) {
++  assert(!tmp->is_valid(), "don't need temporary");
++  __ call(dest, relocInfo::runtime_call_type);
++  if (info != NULL) {
++    add_call_info_here(info);
++  }
++}
++
++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
++                                     CodeEmitInfo* info) {
++  if (dest->is_address() || src->is_address()) {
++    move_op(src, dest, type, lir_patch_none, info,
++            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#ifdef ASSERT
++// emit run-time assertion
++void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
++  assert(op->code() == lir_assert, "must be");
++  Label ok;
++
++  if (op->in_opr1()->is_valid()) {
++    assert(op->in_opr2()->is_valid(), "both operands must be valid");
++    assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be");
++    Register reg1 = as_reg(op->in_opr1());
++    Register reg2 = as_reg(op->in_opr2());
++    switch (op->condition()) {
++      case lir_cond_equal:        __  beq(reg1, reg2, ok); break;
++      case lir_cond_notEqual:     __  bne(reg1, reg2, ok); break;
++      case lir_cond_less:         __  blt(reg1, reg2, ok); break;
++      case lir_cond_lessEqual:    __  bge(reg2, reg1, ok); break;
++      case lir_cond_greaterEqual: __  bge(reg1, reg2, ok); break;
++      case lir_cond_greater:      __  blt(reg2, reg1, ok); break;
++      case lir_cond_belowEqual:   __ bgeu(reg2, reg1, ok); break;
++      case lir_cond_aboveEqual:   __ bgeu(reg1, reg2, ok); break;
++      default:                    ShouldNotReachHere();
++    }
++  } else {
++    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
++    assert(op->condition() == lir_cond_always, "no other conditions allowed");
++  }
++  if (op->halt()) {
++    const char* str = __ code_string(op->msg());
++    __ stop(str);
++  } else {
++    breakpoint();
++  }
++  __ bind(ok);
++}
++#endif
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++void LIR_Assembler::membar() {
++  COMMENT("membar");
++  __ membar(Assembler::AnyAny);
++}
++
++void LIR_Assembler::membar_acquire() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore));
++}
++
++void LIR_Assembler::membar_release() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore));
++}
++
++void LIR_Assembler::membar_loadload() {
++  __ membar(Assembler::LoadLoad);
++}
++
++void LIR_Assembler::membar_storestore() {
++  __ membar(MacroAssembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadstore() {
++  __ membar(MacroAssembler::LoadStore);
++}
++
++void LIR_Assembler::membar_storeload() {
++  __ membar(MacroAssembler::StoreLoad);
++}
++
++void LIR_Assembler::on_spin_wait() {
++  Unimplemented();
++}
++
++void LIR_Assembler::get_thread(LIR_Opr result_reg) {
++  __ move(result_reg->as_register(), TREG);
++}
++
++void LIR_Assembler::peephole(LIR_List *lir) {
++}
++
++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data,
++                              LIR_Opr dest, LIR_Opr tmp_op) {
++  Address addr = as_Address(src->as_address_ptr());
++  BasicType type = src->type();
++  Register dst = as_reg(dest);
++  Register tmp = as_reg(tmp_op);
++  bool is_oop = is_reference_type(type);
++
++  if (Assembler::is_simm(addr.disp(), 12)) {
++    __ addi_d(tmp, addr.base(), addr.disp());
++  } else {
++    __ li(tmp, addr.disp());
++    __ add_d(tmp, addr.base(), tmp);
++  }
++  if (addr.index() != noreg) {
++    if (addr.scale() > Address::times_1)
++      __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1);
++    else
++      __ add_d(tmp, tmp, addr.index());
++  }
++
++  switch(type) {
++  case T_INT:
++    break;
++  case T_LONG:
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    if (UseCompressedOops) {
++      // unsigned int
++    } else {
++      // long
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++
++  if (code == lir_xadd) {
++    Register inc = noreg;
++    if (data->is_constant()) {
++      inc = SCR1;
++      __ li(inc, as_long(data));
++    } else {
++      inc = as_reg(data);
++    }
++    switch(type) {
++    case T_INT:
++      __ amadd_db_w(dst, inc, tmp);
++      break;
++    case T_LONG:
++      __ amadd_db_d(dst, inc, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amadd_db_w(dst, inc, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amadd_db_d(dst, inc, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  } else if (code == lir_xchg) {
++    Register obj = as_reg(data);
++    if (is_oop && UseCompressedOops) {
++      __ encode_heap_oop(SCR2, obj);
++      obj = SCR2;
++    }
++    switch(type) {
++    case T_INT:
++      __ amswap_db_w(dst, obj, tmp);
++      break;
++    case T_LONG:
++      __ amswap_db_d(dst, obj, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amswap_db_w(dst, obj, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amswap_db_d(dst, obj, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++    if (is_oop && UseCompressedOops) {
++      __ decode_heap_oop(dst);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
+new file mode 100644
+index 0000000000..72a80f37c4
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
+@@ -0,0 +1,1396 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_LIRGenerator.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArray.hpp"
++#include "ci/ciObjArrayKlass.hpp"
++#include "ci/ciTypeArrayKlass.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++#define __ gen()->lir(__FILE__, __LINE__)->
++#else
++#define __ gen()->lir()->
++#endif
++
++// Item will be loaded into a byte register; Intel only
++void LIRItem::load_byte_item() {
++  load_item();
++}
++
++void LIRItem::load_nonconstant() {
++  LIR_Opr r = value()->operand();
++  if (r->is_constant()) {
++    _result = r;
++  } else {
++    load_item();
++  }
++}
++
++//--------------------------------------------------------------
++//               LIRGenerator
++//--------------------------------------------------------------
++
++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; }
++LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::a1_opr; }
++LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
++LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::a0_opr; }
++LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
++
++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
++  LIR_Opr opr;
++  switch (type->tag()) {
++    case intTag:    opr = FrameMap::a0_opr;          break;
++    case objectTag: opr = FrameMap::a0_oop_opr;      break;
++    case longTag:   opr = FrameMap::long0_opr;       break;
++    case floatTag:  opr = FrameMap::fpu0_float_opr;  break;
++    case doubleTag: opr = FrameMap::fpu0_double_opr; break;
++    case addressTag:
++    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
++  }
++
++  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
++  return opr;
++}
++
++LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
++  LIR_Opr reg = new_register(T_INT);
++  set_vreg_flag(reg, LIRGenerator::byte_reg);
++  return reg;
++}
++
++//--------- loading items into registers --------------------------------
++
++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
++  if (v->type()->as_IntConstant() != NULL) {
++    return v->type()->as_IntConstant()->value() == 0L;
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(Value v) const {
++  // FIXME: Just a guess
++  if (v->type()->as_IntConstant() != NULL) {
++    return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12);
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
++
++LIR_Opr LIRGenerator::safepoint_poll_register() {
++  return LIR_OprFact::illegalOpr;
++}
++
++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
++                                            int shift, int disp, BasicType type) {
++  assert(base->is_register(), "must be");
++  intx large_disp = disp;
++
++  // accumulate fixed displacements
++  if (index->is_constant()) {
++    LIR_Const *constant = index->as_constant_ptr();
++    if (constant->type() == T_INT) {
++      large_disp += index->as_jint() << shift;
++    } else {
++      assert(constant->type() == T_LONG, "should be");
++      jlong c = index->as_jlong() << shift;
++      if ((jlong)((jint)c) == c) {
++        large_disp += c;
++        index = LIR_OprFact::illegalOpr;
++      } else {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ move(index, tmp);
++        index = tmp;
++        // apply shift and displacement below
++      }
++    }
++  }
++
++  if (index->is_register()) {
++    // apply the shift and accumulate the displacement
++    if (shift > 0) {
++      LIR_Opr tmp = new_pointer_register();
++      __ shift_left(index, shift, tmp);
++      index = tmp;
++    }
++    if (large_disp != 0) {
++      LIR_Opr tmp = new_pointer_register();
++      if (Assembler::is_simm(large_disp, 12)) {
++        __ add(index, LIR_OprFact::intptrConst(large_disp), tmp);
++        index = tmp;
++      } else {
++        __ move(LIR_OprFact::intptrConst(large_disp), tmp);
++        __ add(tmp, index, tmp);
++        index = tmp;
++      }
++      large_disp = 0;
++    }
++  } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) {
++    // index is illegal so replace it with the displacement loaded into a register
++    index = new_pointer_register();
++    __ move(LIR_OprFact::intptrConst(large_disp), index);
++    large_disp = 0;
++  }
++
++  // at this point we either have base + index or base + displacement
++  if (large_disp == 0 && index->is_register()) {
++    return new LIR_Address(base, index, type);
++  } else {
++    assert(Assembler::is_simm(large_disp, 12), "must be");
++    return new LIR_Address(base, large_disp, type);
++  }
++}
++
++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) {
++  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
++  int elem_size = type2aelembytes(type);
++  int shift = exact_log2(elem_size);
++
++  LIR_Address* addr;
++  if (index_opr->is_constant()) {
++    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
++  } else {
++    if (offset_in_bytes) {
++      LIR_Opr tmp = new_pointer_register();
++      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
++      array_opr = tmp;
++      offset_in_bytes = 0;
++    }
++    addr =  new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
++  }
++  return addr;
++}
++
++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
++  if (type == T_LONG) {
++    r = LIR_OprFact::longConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else if (type == T_INT) {
++    r = LIR_OprFact::intConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      // This is all rather nasty.  We don't know whether our constant
++      // is required for a logical or an arithmetic operation, wo we
++      // don't know what the range of valid values is!!
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else {
++    ShouldNotReachHere();
++    r = NULL;  // unreachable
++  }
++  return r;
++}
++
++void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
++  LIR_Opr pointer = new_pointer_register();
++  __ move(LIR_OprFact::intptrConst(counter), pointer);
++  LIR_Address* addr = new LIR_Address(pointer, type);
++  increment_counter(addr, step);
++}
++
++void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
++  LIR_Opr imm = NULL;
++  switch(addr->type()) {
++  case T_INT:
++    imm = LIR_OprFact::intConst(step);
++    break;
++  case T_LONG:
++    imm = LIR_OprFact::longConst(step);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  LIR_Opr reg = new_register(addr->type());
++  __ load(addr, reg);
++  __ add(reg, imm, reg);
++  __ store(reg, addr);
++}
++
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base,
++                                      int disp, int c, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg = new_register(T_INT);
++  __ load(generate_address(base, disp, T_INT), reg, info);
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
++                                      int disp, BasicType type, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg1 = new_register(T_INT);
++  __ load(generate_address(base, disp, type), reg1, info);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
++  if (is_power_of_2(c - 1)) {
++    __ shift_left(left, exact_log2(c - 1), tmp);
++    __ add(tmp, left, result);
++    return true;
++  } else if (is_power_of_2(c + 1)) {
++    __ shift_left(left, exact_log2(c + 1), tmp);
++    __ sub(tmp, left, result);
++    return true;
++  } else {
++    return false;
++  }
++}
++
++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
++  BasicType type = item->type();
++  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
++}
++
++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info,
++                                     ciMethod* profiled_method, int profiled_bci) {
++    LIR_Opr tmp1 = new_register(objectType);
++    LIR_Opr tmp2 = new_register(objectType);
++    LIR_Opr tmp3 = new_register(objectType);
++    __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
++}
++
++//----------------------------------------------------------------------
++//             visitor functions
++//----------------------------------------------------------------------
++
++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
++  assert(x->is_pinned(),"");
++  LIRItem obj(x->obj(), this);
++  obj.load_item();
++
++  set_no_result(x);
++
++  // "lock" stores the address of the monitor stack slot, so this is not an oop
++  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
++
++  CodeEmitInfo* info_for_exception = NULL;
++  if (x->needs_null_check()) {
++    info_for_exception = state_for(x);
++  }
++  // this CodeEmitInfo must not have the xhandlers because here the
++  // object is already locked (xhandlers expect object to be unlocked)
++  CodeEmitInfo* info = state_for(x, x->state(), true);
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
++                x->monitor_no(), info_for_exception, info);
++}
++
++void LIRGenerator::do_MonitorExit(MonitorExit* x) {
++  assert(x->is_pinned(),"");
++
++  LIRItem obj(x->obj(), this);
++  obj.dont_load_item();
++
++  LIR_Opr lock = new_register(T_INT);
++  LIR_Opr obj_temp = new_register(T_INT);
++  set_no_result(x);
++  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
++}
++
++void LIRGenerator::do_NegateOp(NegateOp* x) {
++  LIRItem from(x->x(), this);
++  from.load_item();
++  LIR_Opr result = rlock_result(x);
++  __ negate (from.result(), result);
++}
++
++// for  _fadd, _fmul, _fsub, _fdiv, _frem
++//      _dadd, _dmul, _dsub, _ddiv, _drem
++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
++  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
++    // float remainder is implemented as a direct call into the runtime
++    LIRItem right(x->x(), this);
++    LIRItem left(x->y(), this);
++
++    BasicTypeList signature(2);
++    if (x->op() == Bytecodes::_frem) {
++      signature.append(T_FLOAT);
++      signature.append(T_FLOAT);
++    } else {
++      signature.append(T_DOUBLE);
++      signature.append(T_DOUBLE);
++    }
++    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++
++    const LIR_Opr result_reg = result_register_for(x->type());
++    left.load_item_force(cc->at(1));
++    right.load_item();
++
++    __ move(right.result(), cc->at(0));
++
++    address entry;
++    if (x->op() == Bytecodes::_frem) {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
++    } else {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
++    }
++
++    LIR_Opr result = rlock_result(x);
++    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
++    __ move(result_reg, result);
++    return;
++  }
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg  = &left;
++  LIRItem* right_arg = &right;
++
++  // Always load right hand side.
++  right.load_item();
++
++  if (!left.is_register())
++    left.load_item();
++
++  LIR_Opr reg = rlock(x);
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
++
++  set_result(x, round_item(reg));
++}
++
++// for  _ladd, _lmul, _lsub, _ldiv, _lrem
++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
++  // missing test if instr is commutative and if we should swap
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++
++  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
++    left.load_item();
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jlong c = right.get_jlong_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right.dont_load_item();
++      } else {
++        right.load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub);
++    }
++
++    rlock_result(x);
++    switch (x->op()) {
++    case Bytecodes::_lrem:
++      __ rem (left.result(), right.result(), x->operand());
++      break;
++    case Bytecodes::_ldiv:
++      __ div (left.result(), right.result(), x->operand());
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
++           "expect lmul, ladd or lsub");
++    // add, sub, mul
++    left.load_item();
++    if (!right.is_register()) {
++      if (x->op() == Bytecodes::_lmul || !right.is_constant() ||
++          (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) ||
++          (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) {
++        right.load_item();
++      } else { // add, sub
++        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
++        // don't load constants to save register
++        right.load_nonconstant();
++      }
++    }
++    rlock_result(x);
++    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
++  }
++}
++
++// for: _iadd, _imul, _isub, _idiv, _irem
++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
++  // Test if instr is commutative and if we should swap
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg = &left;
++  LIRItem* right_arg = &right;
++  if (x->is_commutative() && left.is_stack() && right.is_register()) {
++    // swap them if left is real stack (or cached) and right is real register(not cached)
++    left_arg = &right;
++    right_arg = &left;
++  }
++
++  left_arg->load_item();
++
++  // do not need to load right, as we can handle stack and constants
++  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
++    rlock_result(x);
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right_arg->dont_load_item();
++      } else {
++        right_arg->load_item();
++      }
++    } else {
++      right_arg->load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub);
++    }
++
++    LIR_Opr ill = LIR_OprFact::illegalOpr;
++    if (x->op() == Bytecodes::_irem) {
++      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    } else if (x->op() == Bytecodes::_idiv) {
++      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    }
++  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
++    if (right.is_constant() &&
++        ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) ||
++         (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) {
++      right.load_nonconstant();
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
++  } else {
++    assert (x->op() == Bytecodes::_imul, "expect imul");
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
++        right_arg->dont_load_item();
++      } else {
++        // Cannot use constant op.
++        right_arg->load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
++  }
++}
++
++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
++  // when an operand with use count 1 is the left operand, then it is
++  // likely that no move for 2-operand-LIR-form is necessary
++  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
++    x->swap_operands();
++  }
++
++  ValueTag tag = x->type()->tag();
++  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
++  switch (tag) {
++    case floatTag:
++    case doubleTag: do_ArithmeticOp_FPU(x);  return;
++    case longTag:   do_ArithmeticOp_Long(x); return;
++    case intTag:    do_ArithmeticOp_Int(x);  return;
++    default:        ShouldNotReachHere();    return;
++  }
++}
++
++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
++void LIRGenerator::do_ShiftOp(ShiftOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()) {
++    right.dont_load_item();
++    int c;
++    switch (x->op()) {
++      case Bytecodes::_ishl:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_ishr:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_iushr:
++        c = right.get_jint_constant() & 0x1f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshl:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshr:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lushr:
++        c = right.get_jint_constant() & 0x3f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    right.load_item();
++    LIR_Opr tmp = new_register(T_INT);
++    switch (x->op()) {
++    case Bytecodes::_ishl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_ishr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_iushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _iand, _land, _ior, _lor, _ixor, _lxor
++void LIRGenerator::do_LogicOp(LogicOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()
++      && ((right.type()->tag() == intTag
++           && Assembler::is_uimm(right.get_jint_constant(), 12))
++          || (right.type()->tag() == longTag
++              && Assembler::is_uimm(right.get_jlong_constant(), 12)))) {
++    right.dont_load_item();
++  } else {
++    right.load_item();
++  }
++  switch (x->op()) {
++    case Bytecodes::_iand:
++    case Bytecodes::_land:
++      __ logical_and(left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ior:
++    case Bytecodes::_lor:
++      __ logical_or (left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ixor:
++    case Bytecodes::_lxor:
++      __ logical_xor(left.result(), right.result(), x->operand()); break;
++    default: Unimplemented();
++  }
++}
++
++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
++void LIRGenerator::do_CompareOp(CompareOp* x) {
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++  ValueTag tag = x->x()->type()->tag();
++  if (tag == longTag) {
++    left.set_destroys_register();
++  }
++  left.load_item();
++  right.load_item();
++  LIR_Opr reg = rlock_result(x);
++
++  if (x->x()->type()->is_float_kind()) {
++    Bytecodes::Code code = x->op();
++    __ fcmp2int(left.result(), right.result(), reg,
++                (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
++  } else if (x->x()->type()->tag() == longTag) {
++    __ lcmp2int(left.result(), right.result(), reg);
++  } else {
++    Unimplemented();
++  }
++}
++
++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr,
++                                     LIRItem& cmp_value, LIRItem& new_value) {
++  LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
++  new_value.load_item();
++  cmp_value.load_item();
++  LIR_Opr result = new_register(T_INT);
++  if (is_reference_type(type)) {
++    __ cas_obj(addr, cmp_value.result(), new_value.result(),
++               new_register(T_INT), new_register(T_INT), result);
++  } else if (type == T_INT) {
++    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(),
++               new_value.result(), ill, ill);
++  } else if (type == T_LONG) {
++    __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(),
++                new_value.result(), ill, ill);
++  } else {
++    ShouldNotReachHere();
++    Unimplemented();
++  }
++  __ move(FrameMap::scr1_opr, result);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
++  bool is_oop = is_reference_type(type);
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || is_oop || type == T_LONG , "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xchg(addr, value.result(), result, tmp);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || type == T_LONG , "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xadd(addr, value.result(), result, tmp);
++  return result;
++}
++
++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
++         "wrong type");
++  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
++      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
++      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
++      x->id() == vmIntrinsics::_dlog10) {
++    do_LibmIntrinsic(x);
++    return;
++  }
++  switch (x->id()) {
++    case vmIntrinsics::_dabs:
++    case vmIntrinsics::_dsqrt: {
++      assert(x->number_of_arguments() == 1, "wrong type");
++      LIRItem value(x->argument_at(0), this);
++      value.load_item();
++      LIR_Opr dst = rlock_result(x);
++
++      switch (x->id()) {
++        case vmIntrinsics::_dsqrt:
++          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        case vmIntrinsics::_dabs:
++          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
++  LIRItem value(x->argument_at(0), this);
++  value.set_destroys_register();
++
++  LIR_Opr calc_result = rlock_result(x);
++  LIR_Opr result_reg = result_register_for(x->type());
++
++  CallingConvention* cc = NULL;
++
++  if (x->id() == vmIntrinsics::_dpow) {
++    LIRItem value1(x->argument_at(1), this);
++
++    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++  }
++
++  switch (x->id()) {
++    case vmIntrinsics::_dexp:
++      if (StubRoutines::dexp() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog:
++      if (StubRoutines::dlog() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog10:
++      if (StubRoutines::dlog10() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dpow:
++      if (StubRoutines::dpow() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dsin:
++      if (StubRoutines::dsin() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dcos:
++      if (StubRoutines::dcos() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dtan:
++      if (StubRoutines::dtan() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    default:  ShouldNotReachHere();
++  }
++  __ move(result_reg, calc_result);
++}
++
++void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
++  Register j_rarg0 = RT0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  assert(x->number_of_arguments() == 5, "wrong type");
++
++  // Make all state_for calls early since they can emit code
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem src(x->argument_at(0), this);
++  LIRItem src_pos(x->argument_at(1), this);
++  LIRItem dst(x->argument_at(2), this);
++  LIRItem dst_pos(x->argument_at(3), this);
++  LIRItem length(x->argument_at(4), this);
++
++  // operands for arraycopy must use fixed registers, otherwise
++  // LinearScan will fail allocation (because arraycopy always needs a
++  // call)
++
++  // The java calling convention will give us enough registers
++  // so that on the stub side the args will be perfect already.
++  // On the other slow/special case side we call C and the arg
++  // positions are not similar enough to pick one as the best.
++  // Also because the java calling convention is a "shifted" version
++  // of the C convention we can process the java args trivially into C
++  // args without worry of overwriting during the xfer
++
++  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
++  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
++  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
++  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
++  length.load_item_force  (FrameMap::as_opr(j_rarg4));
++
++  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
++
++  set_no_result(x);
++
++  int flags;
++  ciArrayKlass* expected_type;
++  arraycopy_helper(x, &flags, &expected_type);
++
++  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
++               length.result(), tmp, expected_type, flags, info); // does add_safepoint
++}
++
++void LIRGenerator::do_update_CRC32(Intrinsic* x) {
++  assert(UseCRC32Intrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateCRC32: {
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem val(x->argument_at(1), this);
++      // val is destroyed by update_crc32
++      val.set_destroys_register();
++      crc.load_item();
++      val.load_item();
++      __ update_crc32(crc.result(), val.result(), result);
++      break;
++    }
++    case vmIntrinsics::_updateBytesCRC32:
++    case vmIntrinsics::_updateByteBufferCRC32: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem len(x->argument_at(3), this);
++      buf.load_item();
++      off.load_nonconstant();
++
++      LIR_Opr index = off.result();
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++       offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      len.load_item_force(cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
++  assert(UseCRC32CIntrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateBytesCRC32C:
++    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem end(x->argument_at(3), this);
++
++      buf.load_item();
++      off.load_nonconstant();
++      end.load_nonconstant();
++
++      // len = end - off
++      LIR_Opr len  = end.result();
++      LIR_Opr tmpA = new_register(T_INT);
++      LIR_Opr tmpB = new_register(T_INT);
++      __ move(end.result(), tmpA);
++      __ move(off.result(), tmpB);
++      __ sub(tmpA, tmpB, tmpA);
++      len = tmpA;
++
++      LIR_Opr index = off.result();
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++        offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      __ move(len, cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 3, "wrong type");
++  assert(UseFMA, "Needs FMA instructions support.");
++  LIRItem value(x->argument_at(0), this);
++  LIRItem value1(x->argument_at(1), this);
++  LIRItem value2(x->argument_at(2), this);
++
++  value.load_item();
++  value1.load_item();
++  value2.load_item();
++
++  LIR_Opr calc_input = value.result();
++  LIR_Opr calc_input1 = value1.result();
++  LIR_Opr calc_input2 = value2.result();
++  LIR_Opr calc_result = rlock_result(x);
++
++  switch (x->id()) {
++    case vmIntrinsics::_fmaD:
++      __ fmad(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    case vmIntrinsics::_fmaF:
++      __ fmaf(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
++  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
++}
++
++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
++// _i2b, _i2c, _i2s
++void LIRGenerator::do_Convert(Convert* x) {
++  LIRItem value(x->value(), this);
++  value.load_item();
++  LIR_Opr input = value.result();
++  LIR_Opr result = rlock(x);
++
++  // arguments of lir_convert
++  LIR_Opr conv_input = input;
++  LIR_Opr conv_result = result;
++
++  switch (x->op()) {
++    case Bytecodes::_f2i:
++    case Bytecodes::_f2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT));
++      break;
++    case Bytecodes::_d2i:
++    case Bytecodes::_d2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE));
++      break;
++    default:
++      __ convert(x->op(), conv_input, conv_result);
++      break;
++  }
++
++  assert(result->is_virtual(), "result must be virtual register");
++  set_result(x, result);
++}
++
++void LIRGenerator::do_NewInstance(NewInstance* x) {
++#ifndef PRODUCT
++  if (PrintNotLoaded && !x->klass()->is_loaded()) {
++    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
++  }
++#endif
++  CodeEmitInfo* info = state_for(x, x->state());
++  LIR_Opr reg = result_register_for(x->type());
++  new_instance(reg, x->klass(), x->is_unresolved(),
++                       FrameMap::t0_oop_opr,
++                       FrameMap::t1_oop_opr,
++                       FrameMap::a4_oop_opr,
++                       LIR_OprFact::illegalOpr,
++                       FrameMap::a3_metadata_opr, info);
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem length(x->length(), this);
++  length.load_item_force(FrameMap::s0_opr);
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++  LIR_Opr len = length.result();
++  BasicType elem_type = x->elt_type();
++
++  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
++
++  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
++  LIRItem length(x->length(), this);
++  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
++  // and therefore provide the state before the parameters have been consumed
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info =  state_for(x, x->state_before());
++  }
++
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++
++  length.load_item_force(FrameMap::s0_opr);
++  LIR_Opr len = length.result();
++
++  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
++  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
++  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
++    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
++  }
++  klass2reg_with_patching(klass_reg, obj, patching_info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
++  Values* dims = x->dims();
++  int i = dims->length();
++  LIRItemList* items = new LIRItemList(i, i, NULL);
++  while (i-- > 0) {
++    LIRItem* size = new LIRItem(dims->at(i), this);
++    items->at_put(i, size);
++  }
++
++  // Evaluate state_for early since it may emit code.
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info = state_for(x, x->state_before());
++
++    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
++    // clone all handlers (NOTE: Usually this is handled transparently
++    // by the CodeEmitInfo cloning logic in CodeStub constructors but
++    // is done explicitly here because a stub isn't being used).
++    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
++  }
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  i = dims->length();
++  while (i-- > 0) {
++    LIRItem* size = items->at(i);
++    size->load_item();
++
++    store_stack_parameter(size->result(), in_ByteSize(i*4));
++  }
++
++  LIR_Opr klass_reg = FrameMap::a0_metadata_opr;
++  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
++
++  LIR_Opr rank = FrameMap::s0_opr;
++  __ move(LIR_OprFact::intConst(x->rank()), rank);
++  LIR_Opr varargs = FrameMap::a2_opr;
++  __ move(FrameMap::sp_opr, varargs);
++  LIR_OprList* args = new LIR_OprList(3);
++  args->append(klass_reg);
++  args->append(rank);
++  args->append(varargs);
++  LIR_Opr reg = result_register_for(x->type());
++  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
++                  LIR_OprFact::illegalOpr,
++                  reg, args, info);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_BlockBegin(BlockBegin* x) {
++  // nothing to do for now
++}
++
++void LIRGenerator::do_CheckCast(CheckCast* x) {
++  LIRItem obj(x->obj(), this);
++
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() ||
++      (PatchALot && !x->is_incompatible_class_change_check() &&
++       !x->is_invokespecial_receiver_check())) {
++    // must do this before locking the destination register as an oop register,
++    // and before the obj is loaded (the latter is for deoptimization)
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++
++  // info for exceptions
++  CodeEmitInfo* info_for_exception =
++      (x->needs_exception_state() ? state_for(x) :
++                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
++
++  CodeStub* stub;
++  if (x->is_incompatible_class_change_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
++                                   LIR_OprFact::illegalOpr, info_for_exception);
++  } else if (x->is_invokespecial_receiver_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new DeoptimizeStub(info_for_exception,
++                              Deoptimization::Reason_class_check,
++                              Deoptimization::Action_none);
++  } else {
++    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
++                                   obj.result(), info_for_exception);
++  }
++  LIR_Opr reg = rlock_result(x);
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ checkcast(reg, obj.result(), x->klass(),
++               new_register(objectType), new_register(objectType), tmp3,
++               x->direct_compare(), info_for_exception, patching_info, stub,
++               x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_InstanceOf(InstanceOf* x) {
++  LIRItem obj(x->obj(), this);
++
++  // result and test object may not be in same register
++  LIR_Opr reg = rlock_result(x);
++  CodeEmitInfo* patching_info = NULL;
++  if ((!x->klass()->is_loaded() || PatchALot)) {
++    // must do this before locking the destination register as an oop register
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ instanceof(reg, obj.result(), x->klass(),
++                new_register(objectType), new_register(objectType), tmp3,
++                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_If(If* x) {
++  assert(x->number_of_sux() == 2, "inconsistency");
++  ValueTag tag = x->x()->type()->tag();
++  bool is_safepoint = x->is_safepoint();
++
++  If::Condition cond = x->cond();
++
++  LIRItem xitem(x->x(), this);
++  LIRItem yitem(x->y(), this);
++  LIRItem* xin = &xitem;
++  LIRItem* yin = &yitem;
++
++  if (tag == longTag) {
++    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
++    // mirror for other conditions
++    if (cond == If::gtr || cond == If::leq) {
++      cond = Instruction::mirror(cond);
++      xin = &yitem;
++      yin = &xitem;
++    }
++    xin->set_destroys_register();
++  }
++  xin->load_item();
++
++  if (tag == longTag) {
++    if (yin->is_constant() && yin->get_jlong_constant() == 0) {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else if (tag == intTag) {
++    if (yin->is_constant() && yin->get_jint_constant() == 0)  {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else {
++    yin->load_item();
++  }
++
++  set_no_result(x);
++
++  LIR_Opr left = xin->result();
++  LIR_Opr right = yin->result();
++
++  // add safepoint before generating condition code so it can be recomputed
++  if (x->is_safepoint()) {
++    // increment backedge counter if needed
++    increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
++                                             x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
++    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
++  }
++
++  // Generate branch profiling. Profiling code doesn't kill flags.
++  profile_branch(x, cond, left, right);
++  move_to_phi(x->state());
++  if (x->x()->type()->is_float_kind()) {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux());
++  } else {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux());
++  }
++  assert(x->default_sux() == x->fsux(), "wrong destination above");
++  __ jump(x->default_sux());
++}
++
++LIR_Opr LIRGenerator::getThreadPointer() {
++   return FrameMap::as_pointer_opr(TREG);
++}
++
++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
++
++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
++                                        CodeEmitInfo* info) {
++  __ volatile_store_mem_reg(value, address, info);
++}
++
++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  // 8179954: We need to make sure that the code generated for
++  // volatile accesses forms a sequentially-consistent set of
++  // operations when combined with STLR and LDAR.  Without a leading
++  // membar it's possible for a simple Dekker test to fail if loads
++  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
++  // the stores in one method and C1 compiles the loads in another.
++  if (!UseBarriersForVolatile) {
++    __ membar();
++  }
++  __ volatile_load_mem_reg(address, result, info);
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp
+new file mode 100644
+index 0000000000..6bb15fbf1d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "c1/c1_LIR.hpp"
++
++FloatRegister LIR_OprDesc::as_float_reg() const {
++  return as_FloatRegister(fpu_regnr());
++}
++
++FloatRegister LIR_OprDesc::as_double_reg() const {
++  return as_FloatRegister(fpu_regnrLo());
++}
++
++// Reg2 unused.
++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
++  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
++}
++
++#ifndef PRODUCT
++void LIR_Address::verify() const {
++  assert(base()->is_cpu_register(), "wrong base operand");
++  assert(index()->is_illegal() || index()->is_double_cpu() ||
++         index()->is_single_cpu(), "wrong index operand");
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT ||
++         base()->type() == T_LONG || base()->type() == T_METADATA,
++         "wrong type for addresses");
++}
++#endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  append(new LIR_OpCmpBranch(condition, left, right, tgt, info));
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  append(new LIR_OpCmpBranch(condition, left, right, block, unordered));
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type));
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp
+new file mode 100644
+index 0000000000..f15dacafeb
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++
++inline bool LinearScan::is_processed_reg_num(int reg_num) {
++  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
++}
++
++inline int LinearScan::num_physical_regs(BasicType type) {
++  return 1;
++}
++
++inline bool LinearScan::requires_adjacent_regs(BasicType type) {
++  return false;
++}
++
++inline bool LinearScan::is_caller_save(int assigned_reg) {
++  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
++  if (assigned_reg < pd_first_callee_saved_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
++    return true;
++  return false;
++}
++
++inline void LinearScan::pd_add_temps(LIR_Op* op) {}
++
++// Implementation of LinearScanWalker
++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
++  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
++    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
++    _first_reg = pd_first_callee_saved_reg;
++    _last_reg = pd_last_callee_saved_reg;
++    return true;
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++    _first_reg = pd_first_cpu_reg;
++    _last_reg = pd_last_allocatable_cpu_reg;
++    return true;
++  }
++  return false;
++}
++
++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp
+new file mode 100644
+index 0000000000..219b2e3671
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LinearScan.hpp"
++#include "utilities/bitMap.inline.hpp"
++
++void LinearScan::allocate_fpu_stack() {
++  // No FPU stack on LoongArch64
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..38ff4c5836
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++
++using MacroAssembler::build_frame;
++using MacroAssembler::null_check;
++
++// C1_MacroAssembler contains high-level macros for C1
++
++ private:
++  int _rsp_offset; // track rsp changes
++  // initialization
++  void pd_init() { _rsp_offset = 0; }
++
++ public:
++  void try_allocate(
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
++
++  // locking
++  // hdr     : must be A0, contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
++  // returns code offset at which to add null check debug information
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
++
++  // unlocking
++  // hdr     : contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed
++  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
++
++  void initialize_object(
++    Register obj,               // result: pointer to object after successful allocation
++    Register klass,             // object klass
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    bool     is_tlab_allocated  // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
++  );
++
++  // allocation of fixed-size objects
++  // (can also be used to allocate fixed-size arrays, by setting
++  // hdr_size correctly and storing the array length afterwards)
++  // obj        : will contain pointer to allocated object
++  // t1, t2     : scratch registers - contents destroyed
++  // header_size: size of object header in words
++  // object_size: total size of object in words
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_object(Register obj, Register t1, Register t2, int header_size,
++                       int object_size, Register klass, Label& slow_case);
++
++  enum {
++    max_array_allocation_length = 0x00FFFFFF
++  };
++
++  // allocation of arrays
++  // obj        : will contain pointer to allocated object
++  // len        : array length in number of elements
++  // t          : scratch register - contents destroyed
++  // header_size: size of object header in words
++  // f          : element scale factor
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size,
++                      int f, Register klass, Label& slow_case);
++
++  int rsp_offset() const { return _rsp_offset; }
++  void set_rsp_offset(int n) { _rsp_offset = n; }
++
++  void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3,
++                            bool inv_a4, bool inv_a5) PRODUCT_RETURN;
++
++  // This platform only uses signal-based null checks. The Label is not needed.
++  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
++
++  void load_parameter(int offset_in_words, Register reg);
++
++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
+new file mode 100644
+index 0000000000..17ff93a595
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#define T0 RT0
++#define T1 RT1
++
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  int null_check_offset = -1;
++  Label done;
++
++  verify_oop(obj);
++
++  // save object being locked into the BasicObjectLock
++  st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
++  }
++
++  // Load object header
++  ld_ptr(hdr, Address(obj, hdr_offset));
++  // and mark it as unlocked
++  ori(hdr, hdr, markOopDesc::unlocked_value);
++  // save unlocked object header into the displaced header location on the stack
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // test if object header is still the same (i.e. unlocked), and if so, store the
++  // displaced header address in the object header - if it is not the same, get the
++  // object header instead
++  lea(SCR2, Address(obj, hdr_offset));
++  cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done);
++  // if the object header was the same, we're done
++  // if the object header was not the same, it is now in the hdr register
++  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
++  //
++  // 1) (hdr & aligned_mask) == 0
++  // 2) sp <= hdr
++  // 3) hdr <= sp + page_size
++  //
++  // these 3 tests can be done by evaluating the following expression:
++  //
++  // (hdr - sp) & (aligned_mask - page_size)
++  //
++  // assuming both the stack pointer and page_size have their least
++  // significant 2 bits cleared and page_size is a power of 2
++  sub_d(hdr, hdr, SP);
++  li(SCR1, aligned_mask - os::vm_page_size());
++  andr(hdr, hdr, SCR1);
++  // for recursive locking, the result is zero => save it in the displaced header
++  // location (NULL in the displaced hdr location indicates recursive locking)
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // otherwise we don't care about the result and handle locking via runtime call
++  bnez(hdr, slow_case);
++  // done
++  bind(done);
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2);
++  }
++  return null_check_offset;
++}
++
++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done;
++
++  if (UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
++  // load displaced header
++  ld_ptr(hdr, Address(disp_hdr, 0));
++  // if the loaded hdr is NULL we had recursive locking
++  // if we had recursive locking, we are done
++  beqz(hdr, done);
++  if (!UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
++  verify_oop(obj);
++  // test if object header is pointing to the displaced header, and if so, restore
++  // the displaced header in the object - if the object header is not pointing to
++  // the displaced header, get the object header instead
++  // if the object header was not pointing to the displaced header,
++  // we do unlocking via runtime call
++  if (hdr_offset) {
++    lea(SCR1, Address(obj, hdr_offset));
++    cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  } else {
++    cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  }
++  // done
++  bind(done);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes,
++                                     int con_size_in_bytes, Register t1, Register t2,
++                                     Label& slow_case) {
++  if (UseTLAB) {
++    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++  } else {
++    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++  }
++}
++
++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len,
++                                          Register t1, Register t2) {
++  assert_different_registers(obj, klass, len);
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, t1, t2);
++    ld_ptr(t1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fit in an int32_t
++    li(t1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
++  st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
++
++  if (UseCompressedClassPointers) { // Take care not to kill klass
++    encode_klass_not_null(t1, klass);
++    st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
++  } else {
++    st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++
++  if (len->is_valid()) {
++    st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
++  } else if (UseCompressedClassPointers) {
++    store_klass_gap(obj, R0);
++  }
++}
++
++// preserves obj, destroys len_in_bytes
++//
++// Scratch registers: t1 = T0, t2 = T1
++//
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes,
++                                        int hdr_size_in_bytes, Register t1, Register t2) {
++  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
++  assert(t1 == T0 && t2 == T1, "must be");
++  Label done;
++
++  // len_in_bytes is positive and ptr sized
++  addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes);
++  beqz(len_in_bytes, done);
++
++  // zero_words() takes ptr in t1 and count in bytes in t2
++  lea(t1, Address(obj, hdr_size_in_bytes));
++  addi_d(t2, len_in_bytes, -BytesPerWord);
++
++  Label loop;
++  bind(loop);
++  stx_d(R0, t1, t2);
++  addi_d(t2, t2, -BytesPerWord);
++  bge(t2, R0, loop);
++
++  bind(done);
++}
++
++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size,
++                                        int object_size, Register klass, Label& slow_case) {
++  assert_different_registers(obj, t1, t2);
++  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
++
++  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
++
++  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
++}
++
++// Scratch registers: t1 = T0, t2 = T1
++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes,
++                                          int con_size_in_bytes, Register t1, Register t2,
++                                          bool is_tlab_allocated) {
++  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
++         "con_size_in_bytes is not multiple of alignment");
++  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
++
++  initialize_header(obj, klass, noreg, t1, t2);
++
++  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
++     // clear rest of allocated space
++     const Register index = t2;
++     if (var_size_in_bytes != noreg) {
++       move(index, var_size_in_bytes);
++       initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
++     } else if (con_size_in_bytes > hdr_size_in_bytes) {
++       con_size_in_bytes -= hdr_size_in_bytes;
++       lea(t1, Address(obj, hdr_size_in_bytes));
++       Label loop;
++       li(SCR1, con_size_in_bytes - BytesPerWord);
++       bind(loop);
++       stx_d(R0, t1, SCR1);
++       addi_d(SCR1, SCR1, -BytesPerWord);
++       bge(SCR1, R0, loop);
++     }
++  }
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2,
++                                       int header_size, int f, Register klass, Label& slow_case) {
++  assert_different_registers(obj, len, t1, t2, klass);
++
++  // determine alignment mask
++  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
++
++  // check for negative or excessive length
++  li(SCR1, (int32_t)max_array_allocation_length);
++  bge_far(len, SCR1, slow_case, false);
++
++  const Register arr_size = t2; // okay to be the same
++  // align object end
++  li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
++  slli_w(SCR1, len, f);
++  add_d(arr_size, arr_size, SCR1);
++  bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
++
++  initialize_header(obj, klass, len, t1, t2);
++
++  // clear rest of allocated space
++  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
++  // Make sure there is enough stack space for this method's activation.
++  // Note that we do this before creating a frame.
++  generate_stack_overflow_check(bang_size_in_bytes);
++  MacroAssembler::build_frame(framesize);
++}
++
++void C1_MacroAssembler::remove_frame(int framesize) {
++  MacroAssembler::remove_frame(framesize);
++}
++
++void C1_MacroAssembler::verified_entry() {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.  For this action to be legal we
++  // must ensure that this first instruction is a b, bl, nop, break.
++  // Make it a NOP.
++  nop();
++}
++
++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
++  // rbp, + 0: link
++  //      + 1: return address
++  //      + 2: argument with offset 0
++  //      + 3: argument with offset 1
++  //      + 4: ...
++
++  ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
++}
++
++#ifndef PRODUCT
++void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
++  if (!VerifyOops) return;
++  verify_oop_addr(Address(SP, stack_offset), "oop");
++}
++
++void C1_MacroAssembler::verify_not_null_oop(Register r) {
++  if (!VerifyOops) return;
++  Label not_null;
++  bnez(r, not_null);
++  stop("non-null oop required");
++  bind(not_null);
++  verify_oop(r);
++}
++
++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2,
++                                             bool inv_a3, bool inv_a4, bool inv_a5) {
++#ifdef ASSERT
++  static int nn;
++  if (inv_a0) li(A0, 0xDEAD);
++  if (inv_s0) li(S0, 0xDEAD);
++  if (inv_a2) li(A2, nn++);
++  if (inv_a3) li(A3, 0xDEAD);
++  if (inv_a4) li(A4, 0xDEAD);
++  if (inv_a5) li(A5, 0xDEAD);
++#endif
++}
++#endif // ifndef PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp
+new file mode 100644
+index 0000000000..aaa708f71e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp
+@@ -0,0 +1,1138 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Defs.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "register_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframe.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T8 RT8
++
++// Implementation of StubAssembler
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++  // setup registers
++  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++         "registers must be different");
++  assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different");
++  assert(args_size >= 0, "illegal args_size");
++  bool align_stack = false;
++
++  move(A0, TREG);
++  set_num_rt_args(0); // Nothing on stack
++
++  Label retaddr;
++  set_last_Java_frame(SP, FP, retaddr);
++
++  // do the call
++  call(entry, relocInfo::runtime_call_type);
++  bind(retaddr);
++  int call_offset = offset();
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    get_thread(SCR1);
++    beq(TREG, SCR1, L);
++    stop("StubAssembler::call_RT: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++  reset_last_Java_frame(true);
++
++  // check for pending exceptions
++  { Label L;
++    // check for pending exceptions (java_thread is set upon return)
++    ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset())));
++    beqz(SCR1, L);
++    // exception pending => remove activation and forward to exception handler
++    // make sure that the vm_results are cleared
++    if (oop_result1->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    }
++    if (metadata_result->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    }
++    if (frame_size() == no_frame_size) {
++      leave();
++      jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    } else if (_stub_id == Runtime1::forward_exception_id) {
++      should_not_reach_here();
++    } else {
++      jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type);
++    }
++    bind(L);
++  }
++  // get oop results if there are any and reset the values in the thread
++  if (oop_result1->is_valid()) {
++    get_vm_result(oop_result1, TREG);
++  }
++  if (metadata_result->is_valid()) {
++    get_vm_result_2(metadata_result, TREG);
++  }
++  return call_offset;
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1) {
++  move(A1, arg1);
++  return call_RT(oop_result1, metadata_result, entry, 1);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2) {
++  if (A1 == arg2) {
++    if (A2 == arg1) {
++      move(SCR1, arg1);
++      move(arg1, arg2);
++      move(arg2, SCR1);
++    } else {
++      move(A2, arg2);
++      move(A1, arg1);
++    }
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 2);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2, Register arg3) {
++  // if there is any conflict use the stack
++  if (arg1 == A2 || arg1 == A3 ||
++      arg2 == A1 || arg2 == A3 ||
++      arg3 == A1 || arg3 == A2) {
++    addi_d(SP, SP, -4 * wordSize);
++    st_ptr(arg1, Address(SP, 0 * wordSize));
++    st_ptr(arg2, Address(SP, 1 * wordSize));
++    st_ptr(arg3, Address(SP, 2 * wordSize));
++    ld_ptr(arg1, Address(SP, 0 * wordSize));
++    ld_ptr(arg2, Address(SP, 1 * wordSize));
++    ld_ptr(arg3, Address(SP, 2 * wordSize));
++    addi_d(SP, SP, 4 * wordSize);
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++    move(A3, arg3);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 3);
++}
++
++enum return_state_t {
++  does_not_return, requires_return
++};
++
++// Implementation of StubFrame
++
++class StubFrame: public StackObj {
++ private:
++  StubAssembler* _sasm;
++  bool _return_state;
++
++ public:
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++            return_state_t return_state=requires_return);
++  void load_argument(int offset_in_words, Register reg);
++
++  ~StubFrame();
++};;
++
++void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
++  set_info(name, must_gc_arguments);
++  enter();
++}
++
++void StubAssembler::epilogue() {
++  leave();
++  jr(RA);
++}
++
++#define __ _sasm->
++
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++                     return_state_t return_state) {
++  _sasm = sasm;
++  _return_state = return_state;
++  __ prologue(name, must_gc_arguments);
++}
++
++// load parameters that were stored with LIR_Assembler::store_parameter
++// Note: offsets for store_parameter and load_argument must match
++void StubFrame::load_argument(int offset_in_words, Register reg) {
++  __ load_parameter(offset_in_words, reg);
++}
++
++StubFrame::~StubFrame() {
++  if (_return_state == requires_return) {
++    __ epilogue();
++  } else {
++    __ should_not_reach_here();
++  }
++}
++
++#undef __
++
++// Implementation of Runtime1
++
++#define __ sasm->
++
++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
++
++// Stack layout for saving/restoring  all the registers needed during a runtime
++// call (this includes deoptimization)
++// Note: note that users of this frame may well have arguments to some runtime
++// while these values are on the stack. These positions neglect those arguments
++// but the code in save_live_registers will take the argument count into
++// account.
++//
++
++enum reg_save_layout {
++  reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */
++};
++
++// Save off registers which might be killed by calls into the runtime.
++// Tries to smart of about FP registers.  In particular we separate
++// saving and describing the FPU registers for deoptimization since we
++// have to save the FPU registers twice if we describe them.  The
++// deopt blob is the only thing which needs to describe FPU registers.
++// In all other cases it should be sufficient to simply save their
++// current value.
++
++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
++static int reg_save_size_in_words;
++static int frame_size_in_bytes = -1;
++
++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
++  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
++  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++
++  for (int i = A0->encoding(); i <= T8->encoding(); i++) {
++    Register r = as_Register(i);
++    if (i != SCR1->encoding() && i != SCR2->encoding()) {
++      int sp_offset = cpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
++      FloatRegister r = as_FloatRegister(i);
++      int sp_offset = fpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  return oop_map;
++}
++
++static OopMap* save_live_registers(StubAssembler* sasm,
++                                   bool save_fpu_registers = true) {
++  __ block_comment("save_live_registers");
++
++  // integer registers except zr & ra & tp & sp
++  __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize);
++
++  for (int i = 4; i < 32; i++)
++    __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < 32; i++)
++      __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  return generate_oop_map(sasm, save_fpu_registers);
++}
++
++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 4; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 5; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++void Runtime1::initialize_pd() {
++  int sp_offset = 0;
++  int i;
++
++  // all float registers are saved explicitly
++  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
++  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
++    fpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++
++  for (i = 4; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    cpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++}
++
++// target: the entry point of the method that creates and posts the exception oop
++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
++
++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target,
++                                              bool has_argument) {
++  // make a frame and preserve the caller's caller-save registers
++  OopMap* oop_map = save_live_registers(sasm);
++  int call_offset;
++  if (!has_argument) {
++    call_offset = __ call_RT(noreg, noreg, target);
++  } else {
++    __ move(A1, SCR1);
++    __ move(A2, SCR2);
++    call_offset = __ call_RT(noreg, noreg, target);
++  }
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(call_offset, oop_map);
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
++  __ block_comment("generate_handle_exception");
++
++  // incoming parameters
++  const Register exception_oop = A0;
++  const Register exception_pc  = A1;
++  // other registers used in this stub
++
++  // Save registers, if required.
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* oop_map = NULL;
++  switch (id) {
++  case forward_exception_id:
++    // We're handling an exception in the context of a compiled frame.
++    // The registers have been saved in the standard places.  Perform
++    // an exception lookup in the caller and dispatch to the handler
++    // if found.  Otherwise unwind and dispatch to the callers
++    // exception handler.
++    oop_map = generate_oop_map(sasm, 1 /*thread*/);
++
++    // load and clear pending exception oop into A0
++    __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset()));
++    __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset()));
++
++    // load issuing PC (the return address for this stub) into A1
++    __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++    // make sure that the vm_results are cleared (may be unnecessary)
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    break;
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // At this point all registers MAY be live.
++    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id: {
++    // At this point all registers except exception oop (A0) and
++    // exception pc (RA) are dead.
++    const int frame_size = 2 /*fp, return address*/;
++    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
++    sasm->set_frame_size(frame_size);
++    break;
++  }
++  default: ShouldNotReachHere();
++  }
++
++  // verify that only A0 and A1 are valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++  // verify that A0 contains a valid exception
++  __ verify_not_null_oop(exception_oop);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are
++  // empty before writing to them
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop already set");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc already set");
++  __ bind(pc_empty);
++#endif
++
++  // save exception oop and issuing pc into JavaThread
++  // (exception handler will load it from here)
++  __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset()));
++
++  // patch throwing pc into return address (has bci & oop map)
++  __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++  // compute the exception handler.
++  // the exception oop and the throwing pc are read from the fields in JavaThread
++  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  // A0: handler address
++  //      will be the deopt blob if nmethod was deoptimized while we looked up
++  //      handler regardless of whether handler existed in the nmethod.
++
++  // only A0 is valid at this time, all other registers have been destroyed by the runtime call
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // patch the return address, this stub will directly return to the exception handler
++  __ st_ptr(A0, Address(FP, 1 * BytesPerWord));
++
++  switch (id) {
++    case forward_exception_id:
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      // Restore the registers that were saved at the beginning.
++      restore_live_registers(sasm, id != handle_exception_nofpu_id);
++      break;
++    case handle_exception_from_callee_id:
++      break;
++    default:  ShouldNotReachHere();
++  }
++
++  return oop_maps;
++}
++
++void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
++  // incoming parameters
++  const Register exception_oop = A0;
++  // callee-saved copy of exception_oop during runtime call
++  const Register exception_oop_callee_saved = S0;
++  // other registers used in this stub
++  const Register exception_pc = A1;
++  const Register handler_addr = A3;
++
++  // verify that only A0, is valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Save our return address because
++  // exception_handler_for_return_address will destroy it.  We also
++  // save exception_oop
++  __ addi_d(SP, SP, -2 * wordSize);
++  __ st_ptr(RA, Address(SP, 0 * wordSize));
++  __ st_ptr(exception_oop, Address(SP, 1 * wordSize));
++
++  // search the exception handler address of the caller (using the return address)
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA);
++  // V0: exception handler address of the caller
++
++  // Only V0 is valid at this time; all other registers have been
++  // destroyed by the call.
++  __ invalidate_registers(false, true, true, true, false, true);
++
++  // move result of call into correct register
++  __ move(handler_addr, A0);
++
++  // get throwing pc (= return address).
++  // RA has been destroyed by the call
++  __ ld_ptr(RA, Address(SP, 0 * wordSize));
++  __ ld_ptr(exception_oop, Address(SP, 1 * wordSize));
++  __ addi_d(SP, SP, 2 * wordSize);
++  __ move(A1, RA);
++
++  __ verify_not_null_oop(exception_oop);
++
++  // continue at exception handler (return address removed)
++  // note: do *not* remove arguments when unwinding the
++  //       activation since the caller assumes having
++  //       all arguments on the stack when entering the
++  //       runtime to determine the exception handler
++  //       (GC happens at call site with arguments!)
++  // A0: exception oop
++  // A1: throwing pc
++  // A3: exception handler
++  __ jr(handler_addr);
++}
++
++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
++  // use the maximum number of runtime-arguments here because it is difficult to
++  // distinguish each RT-Call.
++  // Note: This number affects also the RT-Call in generate_handle_exception because
++  //       the oop-map is shared for all calls.
++  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++  assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++  OopMap* oop_map = save_live_registers(sasm);
++
++  __ move(A0, TREG);
++  Label retaddr;
++  __ set_last_Java_frame(SP, FP, retaddr);
++  // do the call
++  __ call(target, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(__ offset(), oop_map);
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    __ get_thread(SCR1);
++    __ beq(TREG, SCR1, L);
++    __ stop("StubAssembler::call_RT: rthread not callee saved?");
++    __ bind(L);
++  }
++#endif
++
++  __ reset_last_Java_frame(true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Runtime will return true if the nmethod has been deoptimized, this is the
++  // expected scenario and anything else is  an error. Note that we maintain a
++  // check on the result purely as a defensive measure.
++  Label no_deopt;
++  __ beqz(A0, no_deopt); // Have we deoptimized?
++
++  // Perform a re-execute. The proper return  address is already on the stack,
++  // we just need  to restore registers, pop  all of our frame  but the return
++  // address and jump to the deopt blob.
++  restore_live_registers(sasm);
++  __ leave();
++  __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++
++  __ bind(no_deopt);
++  __ stop("deopt not performed");
++
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
++  // for better readability
++  const bool must_gc_arguments = true;
++  const bool dont_gc_arguments = false;
++
++  // default value; overwritten for some optimized stubs that are called
++  // from methods that do not use the fpu
++  bool save_fpu_registers = true;
++
++  // stub code & info for the different stubs
++  OopMapSet* oop_maps = NULL;
++  OopMap* oop_map = NULL;
++  switch (id) {
++    {
++    case forward_exception_id:
++      {
++        oop_maps = generate_handle_exception(id, sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_div0_exception_id:
++      {
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
++      }
++      break;
++
++    case throw_null_pointer_exception_id:
++      {
++        StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
++      }
++      break;
++
++    case new_instance_id:
++    case fast_new_instance_id:
++    case fast_new_instance_init_check_id:
++      {
++        Register klass = A3; // Incoming
++        Register obj   = A0; // Result
++
++        if (id == new_instance_id) {
++          __ set_info("new_instance", dont_gc_arguments);
++        } else if (id == fast_new_instance_id) {
++          __ set_info("fast new_instance", dont_gc_arguments);
++        } else {
++          assert(id == fast_new_instance_init_check_id, "bad StubID");
++          __ set_info("fast new_instance init check", dont_gc_arguments);
++        }
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
++            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Label slow_path;
++          Register obj_size = S0;
++          Register t1       = T0;
++          Register t2       = T1;
++          assert_different_registers(klass, obj, obj_size, t1, t2);
++
++          __ addi_d(SP, SP, -2 * wordSize);
++          __ st_ptr(S0, Address(SP, 0));
++
++          if (id == fast_new_instance_init_check_id) {
++            // make sure the klass is initialized
++            __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset()));
++            __ li(SCR2, InstanceKlass::fully_initialized);
++            __ bne_far(SCR1, SCR2, slow_path);
++          }
++
++#ifdef ASSERT
++          // assert object can be fast path allocated
++          {
++            Label ok, not_ok;
++            __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++            __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0)
++            __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit);
++            __ beqz(SCR1, ok);
++            __ bind(not_ok);
++            __ stop("assert(can be fast path allocated)");
++            __ should_not_reach_here();
++            __ bind(ok);
++          }
++#endif // ASSERT
++
++          // get the instance size (size is postive so movl is fine for 64bit)
++          __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++
++          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
++
++          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
++          __ verify_oop(obj);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++          __ jr(RA);
++
++          __ bind(slow_path);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0,: new instance
++      }
++
++      break;
++
++    case counter_overflow_id:
++      {
++        Register bci = A0, method = A1;
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        // Retrieve bci
++        __ ld_w(bci, Address(FP, 2 * BytesPerWord));
++        // And a pointer to the Method*
++        __ ld_d(method, Address(FP, 3 * BytesPerWord));
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case new_type_array_id:
++    case new_object_array_id:
++      {
++        Register length   = S0; // Incoming
++        Register klass    = A3; // Incoming
++        Register obj      = A0; // Result
++
++        if (id == new_type_array_id) {
++          __ set_info("new_type_array", dont_gc_arguments);
++        } else {
++          __ set_info("new_object_array", dont_gc_arguments);
++        }
++
++#ifdef ASSERT
++        // assert object type is really an array of the proper kind
++        {
++          Label ok;
++          Register t0 = obj;
++          __ ld_w(t0, Address(klass, Klass::layout_helper_offset()));
++          __ srai_w(t0, t0, Klass::_lh_array_tag_shift);
++          int tag = ((id == new_type_array_id)
++                     ? Klass::_lh_array_tag_type_value
++                     : Klass::_lh_array_tag_obj_value);
++          __ li(SCR1, tag);
++          __ beq(t0, SCR1, ok);
++          __ stop("assert(is an array klass)");
++          __ should_not_reach_here();
++          __ bind(ok);
++        }
++#endif // ASSERT
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Register arr_size = A5;
++          Register t1       = T0;
++          Register t2       = T1;
++          Label slow_path;
++          assert_different_registers(length, klass, obj, arr_size, t1, t2);
++
++          // check that array length is small enough for fast path.
++          __ li(SCR1, C1_MacroAssembler::max_array_allocation_length);
++          __ blt_far(SCR1, length, slow_path, false);
++
++          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
++          // since size is positive ldrw does right thing on 64bit
++          __ ld_w(t1, Address(klass, Klass::layout_helper_offset()));
++          // since size is positive movw does right thing on 64bit
++          __ move(arr_size, length);
++          __ sll_w(arr_size, length, t1);
++          __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift +
++                        exact_log2(Klass::_lh_header_size_mask + 1) - 1,
++                        Klass::_lh_header_size_shift);
++          __ add_d(arr_size, arr_size, t1);
++          __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
++          __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++          __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
++
++          __ initialize_header(obj, klass, length, t1, t2);
++          __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
++          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
++          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
++          __ andi(t1, t1, Klass::_lh_header_size_mask);
++          __ sub_d(arr_size, arr_size, t1); // body length
++          __ add_d(t1, t1, obj); // body start
++          __ initialize_body(t1, arr_size, 0, t1, t2);
++          __ membar(Assembler::StoreStore);
++          __ verify_oop(obj);
++
++          __ jr(RA);
++
++          __ bind(slow_path);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset;
++        if (id == new_type_array_id) {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
++        } else {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
++        }
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0: new array
++      }
++      break;
++
++    case new_multi_array_id:
++      {
++        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
++        // A0,: klass
++        // S0,: rank
++        // A2: address of 1st dimension
++        OopMap* map = save_live_registers(sasm);
++        __ move(A1, A0);
++        __ move(A3, A2);
++        __ move(A2, S0);
++        int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        // A0,: new multi array
++        __ verify_oop(A0);
++      }
++      break;
++
++    case register_finalizer_id:
++      {
++        __ set_info("register_finalizer", dont_gc_arguments);
++
++        // This is called via call_runtime so the arguments
++        // will be place in C abi locations
++
++        __ verify_oop(A0);
++
++        // load the klass and check the has finalizer flag
++        Label register_finalizer;
++        Register t = A5;
++        __ load_klass(t, A0);
++        __ ld_w(t, Address(t, Klass::access_flags_offset()));
++        __ li(SCR1, JVM_ACC_HAS_FINALIZER);
++        __ andr(SCR1, t, SCR1);
++        __ bnez(SCR1, register_finalizer);
++        __ jr(RA);
++
++        __ bind(register_finalizer);
++        __ enter();
++        OopMap* oop_map = save_live_registers(sasm);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++
++        // Now restore all the live registers
++        restore_live_registers(sasm);
++
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_class_cast_exception_id:
++      {
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
++      }
++      break;
++
++    case throw_incompatible_class_change_error_id:
++      {
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
++      }
++      break;
++
++    case slow_subtype_check_id:
++      {
++        // Typical calling sequence:
++        // __ push(klass_RInfo);  // object klass or other subclass
++        // __ push(sup_k_RInfo);  // array element klass or other superclass
++        // __ bl(slow_subtype_check);
++        // Note that the subclass is pushed first, and is therefore deepest.
++        enum layout {
++          a0_off, a0_off_hi,
++          a2_off, a2_off_hi,
++          a4_off, a4_off_hi,
++          a5_off, a5_off_hi,
++          sup_k_off, sup_k_off_hi,
++          klass_off, klass_off_hi,
++          framesize,
++          result_off = sup_k_off
++        };
++
++        __ set_info("slow_subtype_check", dont_gc_arguments);
++        __ addi_d(SP, SP, -4 * wordSize);
++        __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++
++        // This is called by pushing args and not with C abi
++        __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass
++        __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass
++
++        Label miss;
++        __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss);
++
++        // fallthrough on success:
++        __ li(SCR1, 1);
++        __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++
++        __ bind(miss);
++        __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++      }
++      break;
++
++    case monitorenter_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorenter_id:
++      {
++        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(1, A0); // A0,: object
++        f.load_argument(0, A1); // A1,: lock address
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case monitorexit_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorexit_id:
++      {
++        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(0, A0); // A0,: lock address
++
++        // note: really a leaf routine but must setup last java sp
++        //       => use call_RT for now (speed can be improved by
++        //       doing last java sp setup manually)
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case deoptimize_id:
++      {
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        OopMap* oop_map = save_live_registers(sasm);
++        f.load_argument(0, A1);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++        restore_live_registers(sasm);
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++        __ leave();
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case throw_range_check_failed_id:
++      {
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
++      }
++      break;
++
++    case unwind_exception_id:
++      {
++        __ set_info("unwind_exception", dont_gc_arguments);
++        // note: no stubframe since we are about to leave the current
++        //       activation and we are calling a leaf VM function only.
++        generate_unwind_exception(sasm);
++      }
++      break;
++
++    case access_field_patching_id:
++      {
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
++      }
++      break;
++
++    case load_klass_patching_id:
++      {
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
++      }
++      break;
++
++    case load_mirror_patching_id:
++      {
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
++      }
++      break;
++
++    case load_appendix_patching_id:
++      {
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
++      }
++      break;
++
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      {
++        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case handle_exception_from_callee_id:
++      {
++        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case throw_index_exception_id:
++      {
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
++      }
++      break;
++
++    case throw_array_store_exception_id:
++      {
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        // tos + 0: link
++        //     + 1: return address
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
++      }
++      break;
++
++    case predicate_failed_trap_id:
++      {
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++
++        OopMap* map = save_live_registers(sasm);
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case dtrace_object_alloc_id:
++      {
++        // A0: object
++        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
++        save_live_registers(sasm);
++
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0);
++
++        restore_live_registers(sasm);
++      }
++      break;
++
++    default:
++      {
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        __ li(A0, (int)id);
++        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
++      }
++      break;
++    }
++  }
++  return oop_maps;
++}
++
++#undef __
++
++const char *Runtime1::pd_name_for_address(address entry) {
++  Unimplemented();
++  return 0;
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
+new file mode 100644
+index 0000000000..164016e123
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the client compiler.
++// (see c1_globals.hpp)
++
++#ifndef COMPILER2
++define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
++define_pd_global(bool, InlineIntrinsics,             true );
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 false);
++define_pd_global(bool, UseOnStackReplacement,        true );
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             1500 );
++
++define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
++define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
++define_pd_global(intx, InitialCodeCacheSize,         160*K);
++define_pd_global(intx, ReservedCodeCacheSize,        32*M );
++define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
++define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
++define_pd_global(bool, ProfileInterpreter,           false);
++define_pd_global(intx, CodeCacheExpansionSize,       32*K );
++define_pd_global(uintx, CodeCacheMinBlockLength,     1);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
++define_pd_global(bool, NeverActAsServerClassMachine, true );
++define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(bool, CICompileOSR,                 true );
++#endif // !COMPILER2
++define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
++
++define_pd_global(bool, LIRFillDelaySlots,            false);
++define_pd_global(bool, OptimizeSinglePrecision,      true );
++define_pd_global(bool, CSEArrayLength,               false);
++define_pd_global(bool, TwoOperandLIRForm,            false );
++
++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
+new file mode 100644
+index 0000000000..27a4ec5229
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++define_pd_global(bool, TieredCompilation,            true);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
+new file mode 100644
+index 0000000000..ec78b942d4
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for LoongArch
++
++extern void reg_mask_init();
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
+new file mode 100644
+index 0000000000..653d95806b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
+new file mode 100644
+index 0000000000..d063d5d93e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
+@@ -0,0 +1,148 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  cbuf.set_insts_mark();
++  __ patchable_jump(__ pc());
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size;
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination(jump->instruction_address());
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
+new file mode 100644
+index 0000000000..54b847a736
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP
++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp
+new file mode 100644
+index 0000000000..e4a92d1035
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_loongarch.hpp"
++
++// Nothing to do on LoongArch
+diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp
+new file mode 100644
+index 0000000000..29c292a74a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
++
++// Nothing to do on LoongArch
++
++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
+new file mode 100644
+index 0000000000..04359bc172
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
+new file mode 100644
+index 0000000000..9b4f3b88d4
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
+@@ -0,0 +1,690 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (java_frame_return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[java_frame_return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(java_frame_sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[java_frame_link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On LA the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::java_frame_sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::java_frame_sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[java_frame_return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2_OR_JVMCI
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset));
++  }
++#endif // COMPILER2_OR_JVMCI
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::java_frame_sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
+new file mode 100644
+index 0000000000..b16389b3a3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
+@@ -0,0 +1,171 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++// Low
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++// High                          <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++//
++// ------------------------------ Native (C frame) ---------------------------------------
++// Layout of C frame:
++// High
++//            |
++//            - <----- fp        <- sender sp
++//      fp -8 | [ra]                             = sender_pc()
++//      fp-16 | [fp (sender)]                    = link()
++//            | [...]
++//            |
++//            - <----- sp
++//            |
++//            v
++// Low
++// ------------------------------ Native (C frame) ---------------------------------------
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++
++    // Java frames
++    java_frame_link_offset                           =  0,
++    java_frame_return_addr_offset                    =  1,
++    java_frame_sender_sp_offset                      =  2,
++
++    // Native frames
++    native_frame_link_offset                         = -2,
++    native_frame_return_addr_offset                  = -1,
++    native_frame_sender_sp_offset                    =  0,
++
++    // Interpreter frames
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..1ddc038eea
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++inline intptr_t* frame::link() const {
++  if (is_java_frame())
++    return (intptr_t*) *(intptr_t **)addr_at(java_frame_link_offset);
++  return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset)
++                                   : (intptr_t **)addr_at(native_frame_link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr() const {
++  if (is_java_frame())
++    return (address*) addr_at(java_frame_return_addr_offset);
++  return (address*) addr_at(native_frame_return_addr_offset);
++}
++
++inline address  frame::sender_pc()      const { return *sender_pc_addr(); }
++
++inline intptr_t* frame::sender_sp() const {
++  if (is_java_frame())
++    return addr_at(java_frame_sender_sp_offset);
++  return addr_at(native_frame_sender_sp_offset);
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..e1e4748c49
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,523 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ ld_w(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ ld_b(AT, in_progress);
++    }
++
++    __ beqz(AT, filtered);
++
++    __ push(saved_regs);
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop(saved_regs);
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp, RegSet saved_regs) {
++  __ push(saved_regs);
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop(saved_regs);
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // RA is live. It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(AT, in_progress);
++  }
++  __ beqz(AT, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld_d(tmp, index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -1 * wordSize);
++  __ st_d(tmp, index);
++  __ ld_d(AT, buffer);
++
++  // Record the previous value
++  __ stx_d(pre_val, tmp, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(AT, done);
++
++  // crosses regions, storing NULL?
++  __ beqz(new_val, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ srli_d(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ add_d(card_addr, card_addr, cardtable);
++
++  __ ld_bu(AT, card_addr, 0);
++  __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beqz(AT, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(__ StoreLoad);
++  __ ld_bu(AT, card_addr, 0);
++  __ beqz(AT, done);
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ st_b(R0, card_addr, 0);
++
++  __ ld_d(AT, queue_index);
++  __ beqz(AT, runtime);
++  __ addi_d(AT, AT, -1 * wordSize);
++  __ st_d(AT, queue_index);
++  __ ld_d(tmp2, buffer);
++  __ ld_d(AT, queue_index);
++  __ stx_d(card_addr, tmp2, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation());
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition.");
++  assert(stub->new_val()->is_register(), "Precondition.");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation());
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = A0;
++  const Register thread = TREG;
++  const Register tmp = SCR2;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld_ptr(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -wordSize);
++  __ st_ptr(tmp, queue_index);
++  __ ld_ptr(SCR1, buffer);
++  __ add_d(tmp, tmp, SCR1);
++  __ load_parameter(0, SCR1);
++  __ st_ptr(SCR1, Address(tmp, 0));
++  __ b(done);
++
++  __ bind(runtime);
++  __ pushad();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ popad();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0: store_address
++  Address store_addr(FP, 2 * BytesPerWord);
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++
++  Label done;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++
++  const Register thread = TREG;
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  const Register card_offset = SCR2;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = RA;
++
++  assert_different_registers(card_offset, byte_map_base, SCR1);
++
++  __ load_parameter(0, card_offset);
++  __ srli_d(card_offset, card_offset, CardTable::card_shift);
++  __ load_byte_map_base(byte_map_base);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val());
++  __ beqz(SCR1, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(__ StoreLoad);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ beqz(SCR1, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ stx_b(R0, byte_map_base, card_offset);
++
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add_d(card_addr, byte_map_base, card_addr);
++
++  __ ld_ptr(SCR1, queue_index);
++  __ beqz(SCR1, runtime);
++  __ addi_d(SCR1, SCR1, -wordSize);
++  __ st_ptr(SCR1, queue_index);
++
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = RA;
++
++  __ ld_ptr(buffer_addr, buffer);
++  __ stx_d(card_addr, buffer_addr, SCR1);
++  __ b(done);
++
++  __ bind(runtime);
++  __ pushad();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ popad();
++  __ bind(done);
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..745046ac0c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..a890cd3f62
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,255 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  // RA is live. It must be saved around calls.
++
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ ld_wu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ ld_bu   (dst, src);    break;
++  case T_BYTE:    __ ld_b    (dst, src);    break;
++  case T_CHAR:    __ ld_hu   (dst, src);    break;
++  case T_SHORT:   __ ld_h    (dst, src);    break;
++  case T_INT:     __ ld_w    (dst, src);    break;
++  case T_LONG:    __ ld_d    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ fld_s(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ fld_d(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ st_w(R0, dst);
++        } else {
++          __ st_d(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ st_w(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ st_b(val, dst);
++    break;
++  case T_BYTE:
++    __ st_b(val, dst);
++    break;
++  case T_SHORT:
++    __ st_h(val, dst);
++    break;
++  case T_CHAR:
++    __ st_h(val, dst);
++    break;
++  case T_INT:
++    __ st_w(val, dst);
++    break;
++  case T_LONG:
++    __ st_d(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ fst_s(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ fst_d(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  assert_different_registers(obj, t2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = t2;
++
++  // verify_tlab();
++
++  __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ lea(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0));
++  }
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset()));
++  __ blt_far(SCR1, end, slow_case, false);
++
++  // update the tlab top pointer
++  __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset()));
++
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub_d(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++  // verify_tlab();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ b_far(slow_case);
++  } else {
++    Register end = t1;
++    Register heap_end = SCR2;
++    Label retry;
++    __ bind(retry);
++
++    __ li(SCR1, (address)Universe::heap()->end_addr());
++    __ ld_d(heap_end, SCR1, 0);
++
++    // Get the current top of the heap
++    __ li(SCR1, (address) Universe::heap()->top_addr());
++    __ ll_d(obj, SCR1, 0);
++
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg)
++      __ addi_d(end, obj, con_size_in_bytes);
++    else
++      __ add_d(end, obj, var_size_in_bytes);
++
++    // if end < obj then we wrapped around high memory
++    __ blt_far(end, obj, slow_case, false);
++    __ blt_far(heap_end, end, slow_case, false);
++
++    // If heap top hasn't been changed by some other thread, update it.
++    __ sc_d(end, SCR1, 0);
++    __ beqz(end, retry);
++
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1);
++  }
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  assert(t1->is_valid(), "need temp reg");
++
++  __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid())
++    __ add_d(t1, t1, var_size_in_bytes);
++  else
++    __ addi_d(t1, t1, con_size_in_bytes);
++  __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..a7ebbfaabb
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..d09e9a75a7
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,140 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T4 RT4
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp,
++                                                                    RegSet saved_regs) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++
++  if (ct->scanned_concurrently()) __ membar(__ StoreStore);
++
++  __ li(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ sub_d(end, end, addr); // end --> cards count
++
++  __ add_d(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  __ stx_b(R0, addr, count);
++  __ addi_d(count, count, -1);
++  __ bge(count, R0, L_loop);
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T4;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ add_d(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(__ StoreLoad);
++    __ ld_b(AT, tmp, 0);
++    __ addi_d(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ st_b(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(Assembler::StoreStore);
++    }
++    __ st_b(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..b37c2ba0bc
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp,
++                                                RegSet saved_regs);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..14c41ea790
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..8043220eff
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp, RegSet saved_regs) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
+new file mode 100644
+index 0000000000..dc21d001cc
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++// Size of LoongArch Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++// FIXME: LA
++// This makes the games we play when patching difficult, so when we
++// come across an access that needs patching we deoptimize.  There are
++// ways we can avoid this, but these would slow down C1-compiled code
++// in the default case.  We could revisit this decision if we get any
++// evidence that it's worth doing.
++#define DEOPTIMIZE_WHEN_PATCHING
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
+new file mode 100644
+index 0000000000..e6b758b554
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
+@@ -0,0 +1,109 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++define_pd_global(intx, InlineSmallCode,          2000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++define_pd_global(bool, ThreadLocalHandshakes, true);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(bool, UseLSX, false,                                              \
++                "Use LSX 128-bit vector instructions")                      \
++                                                                            \
++  product(bool, UseLASX, false,                                             \
++                "Use LASX 256-bit vector instructions")                     \
++                                                                            \
++  product(bool, UseBarriersForVolatile, false,                              \
++          "Use memory barriers to implement volatile accesses")             \
++                                                                            \
++  product(bool, UseCRC32, false,                                            \
++          "Use CRC32 instructions for CRC32 computation")                   \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
+new file mode 100644
+index 0000000000..7b97694827
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value,
++                                                address entry_point) {
++  ResourceMark rm;
++  CodeBuffer code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  //  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_li52(T1, (long)cached_value);
++  // TODO: confirm reloc
++  __ jmp(entry_point, relocInfo::runtime_call_type);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp
+new file mode 100644
+index 0000000000..1ae7e5376c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ ibar(0);
++  __ ori(V0, A2, 0);
++  __ jr(RA);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
+new file mode 100644
+index 0000000000..3a180549fc
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes
++    line_size      = 32,                   // flush instruction affects a dword
++    log2_line_size = 5                     // log2(line_size)
++  };
++};
++
++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
+new file mode 100644
+index 0000000000..53a06ba7fd
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
+@@ -0,0 +1,281 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++typedef ByteSize (*OffsetFunction)(uint);
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++#endif // CC_INTERP
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld_d(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld_d(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld_d(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
+new file mode 100644
+index 0000000000..c533a57652
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
+@@ -0,0 +1,2043 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_hu(reg, BCP, offset);
++  } else {
++    ld_bu(reg, BCP, offset);
++    ld_bu(tmp, BCP, offset + 1);
++    bstrins_d(reg, tmp, 15, 8);
++  }
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_wu(reg, BCP, offset);
++  } else {
++    ldr_w(reg, BCP, offset);
++    ldl_w(reg, BCP, offset + 3);
++    lu32i_d(reg, 0);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      ld_w(V0, val_addr);
++      break;
++    case ftos:
++      fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  li(AT, (int)ilgl);
++  st_w(AT, tos_addr);
++  st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T4;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    li(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  ld_bu(AT, BCP, bcp_offset);
++  ld_bu(reg, BCP, bcp_offset + 1);
++  bstrins_w(reg, AT, 15, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    slli_w(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    ld_bu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  alsl_d(AT, index, cache, Address::times_ptr - 1);
++  ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srli_d(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  li(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  add_d(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld_d(result, result, ConstantPool::cache_offset_in_bytes());
++  ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  add_d(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  alsl_d(AT, index, cpool, Address::times_ptr - 1);
++  ld_h(index, AT, sizeof(ConstantPool));
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1);
++  ld_d(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T4 and T1 are used as temporary registers.
++  profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T4); // blows T4
++
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  ld_w(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  fld_s(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  fst_s(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  fst_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  st_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(temp);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    ld_d(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    bind(run_compiled_code);
++  }
++
++  ld_d(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts. LoongArch64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing LoongArch64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    sub_d(T2, FP, SP);
++    int min_frame_size = (frame::java_frame_link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    addi_d(T2, T2, -min_frame_size);
++    bge(T2, R0, L);
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld_d(T3, thread, in_bytes(Thread::polling_page_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)) {
++    int table_size = (long)Interpreter::dispatch_table(itos) -
++                     (long)Interpreter::dispatch_table(stos);
++    int table_offset = ((int)state - (int)itos) * table_size;
++
++    // S8 points to the starting address of Interpreter::dispatch_table(itos).
++    // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8.
++    if (table_offset != 0) {
++      if (is_simm(table_offset, 12)) {
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ld_d(T3, T3, table_offset);
++      } else {
++        li(T2, table_offset);
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ldx_d(T3, T2, T3);
++      }
++    } else {
++      slli_d(T2, Rnext, LogBytesPerWord);
++      ldx_d(T3, S8, T2);
++    }
++  } else {
++    li(T3, (long)table);
++    slli_d(T2, Rnext, LogBytesPerWord);
++    ldx_d(T3, T2, T3);
++  }
++  jr(T3);
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    slli_d(T2, Rnext, LogBytesPerWord);
++    ldx_d(T3, T3, T2);
++    jr(T3);
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  ld_bu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  ld_bu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_w(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think LA do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld_d(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++    }
++
++    bind(loop);
++    ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++
++    addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    sub_d(AT, TSR, AT);
++    bge(R0, AT, no_reserved_zone_enabling);
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld_d(ret_addr, FP, frame::java_frame_return_addr_offset * wordSize);
++  ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++#endif // CC_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld_d(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld_d(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    st_d(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++    sub_d(tmp_reg, tmp_reg, SP);
++    li(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    st_d(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bnez(tmp_reg, slow_case);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beqz(tmp_reg, done);
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into tmp_reg
++    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg
++    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beqz(hdr_reg, done);
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld_d(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld_d(T0, T0, in_bytes(Method::method_data_offset()));
++  addi_d(T0, T0, in_bytes(MethodData::data_offset()));
++  add_d(V0, T0, V0);
++  bind(set_mdp);
++  st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = T5;
++  Register mdp = T6;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld_d(AT, method, in_bytes(Method::const_offset()));
++  add_d(tmp, tmp, AT);
++  addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  st_d(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld_d(AT, data);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    st_d(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld_d(AT, data);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    st_d(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  ld_w(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm(header_bits, 12)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    li(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  st_w(AT, Address(mdp_in, header_offset));
++}
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld_d(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld_d(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, mdp_in, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add_d(AT, reg, mdp_in);
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, AT, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm(constant, 12)) {
++    addi_d(mdp_in, mdp_in, constant);
++  } else {
++    li(AT, constant);
++    add_d(mdp_in, mdp_in, AT);
++  }
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    addi_d(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    add_d(bumped_count, bumped_count, AT);
++    st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bnez(receiver, not_null);
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      b(skip_receiver_profile);
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
++    }
++#endif // INCLUDE_JVMCI
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
++    }
++#endif // INCLUDE_JVMCI
++
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  }
++}
++
++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
++                                        Register reg2, int start_row, Label& done, int total_rows,
++                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                        int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
++
++    // The receiver is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    b(done);
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          b(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
++
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  li(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    b(done);
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      b(profile_continue);
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    li(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mul_d(index, index, reg2);
++    addi_d(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_d(T4, T4, in_bytes(Method::const_offset()));
++  ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addi_d(AT, T4, -T_INT);
++  beq(AT, R0, done);
++
++  // mask integer result to narrower return type.
++  addi_d(AT, T4, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++
++  bind(notBool);
++  addi_d(AT, T4, -T_BYTE);
++  bne(AT, R0, notByte);
++  ext_w_b(result, result);
++  beq(R0, R0, done);
++
++  bind(notByte);
++  addi_d(AT, T4, -T_CHAR);
++  bne(AT, R0, notChar);
++  bstrpick_d(result, result, 15, 0);
++  beq(R0, R0, done);
++
++  bind(notChar);
++  ext_w_h(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1);
++  }
++
++  bnez(obj, update);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++
++  b(next);
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bnez(AT, next);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  beqz(AT, none);
++
++  addi_d(AT, AT, -(TypeEntries::null_seen));
++  beqz(AT, none);
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++  b(next);
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    st_d(obj, mdo_addr);
++  } else {
++    st_d(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm(off_to_args, 12)) {
++        addi_d(mdp, mdp, off_to_args);
++      } else {
++        li(AT, off_to_args);
++        add_d(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) {
++            addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            sub_w(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          blt(tmp, AT, done);
++        }
++        ld_d(tmp, callee, in_bytes(Method::const_offset()));
++
++        ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        sub_d(tmp, tmp, AT);
++
++        addi_w(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld_d(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm(to_add, 12)) {
++          addi_d(mdp, mdp, to_add);
++        } else {
++          li(AT, to_add);
++          add_d(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) {
++          addi_w(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          li(AT, tmp_arg_counts);
++          sub_w(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        slli_w(tmp, tmp, exact_log2(DataLayout::cell_size));
++        add_d(mdp, mdp, tmp);
++      }
++      st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      ld_b(tmp, _bcp_register, 0);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beqz(AT, do_profile);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beqz(AT, do_profile);
++
++      get_method(tmp);
++      ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    add_d(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    blt(tmp1, R0, profile_continue);
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add_d(mdp, mdp, tmp1);
++    ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    alsl_d(AT, tmp1, mdp, per_arg_scale - 1);
++    ld_d(tmp2, AT, off_base);
++
++    sub_d(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    slli_d(AT, tmp2, Interpreter::logStackElementSize);
++    ldx_d(tmp2, AT, _locals_register);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    blt(R0, tmp1, loop);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    ld_w(scratch, counter_addr);
++  }
++  addi_w(scratch, scratch, increment);
++  st_w(scratch, counter_addr);
++
++  li(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
+new file mode 100644
+index 0000000000..d53d951a16
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_fp_args;
++  unsigned int _num_int_args;
++  int _stack_offset;
++
++  void move(int from_offset, int to_offset);
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
+new file mode 100644
+index 0000000000..e2f31997b7
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
+@@ -0,0 +1,273 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++  _num_int_args = (method->is_static() ? 1 : 0);
++  _num_fp_args = 0;
++  _stack_offset = 0;
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ st_d(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ maskeqz(temp(), temp(), AT);
++  __ st_w(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    Register reg = as_Register(++_num_int_args + A0->encoding());
++    if (_num_int_args == 1) {
++      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++      __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++    } else {
++      __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ maskeqz(reg, AT, reg);
++    }
++  } else {
++    __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ maskeqz(temp(), AT, temp());
++    __ st_d(temp(), to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_int_args;
++  unsigned int _num_fp_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_int_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _int_args = to - (method->is_static() ? 15 : 16);
++    _fp_args =  to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_int_args = (method->is_static() ? 1 : 0);
++    _num_fp_args = 0;
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
+new file mode 100644
+index 0000000000..6814fa44a0
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
+new file mode 100644
+index 0000000000..dbcdb7a6a4
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
+@@ -0,0 +1,166 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ li(AT, (long)counter_addr);
++  __ ld_w(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++
++  __ move(T0, A1);
++  // Both T0 and T4 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T4, slow);
++
++  __ srli_d(T2, A2, 2);                 // offset
++  __ add_d(T0, T0, T2);
++
++  __ li(AT, (long)counter_addr);
++  __ ld_w(AT, AT, 0);
++  __ bne(T1, AT, slow);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ ld_bu (V0, T0, 0); break;
++    case T_BYTE:    __ ld_b  (V0, T0, 0); break;
++    case T_CHAR:    __ ld_hu (V0, T0, 0); break;
++    case T_SHORT:   __ ld_h  (V0, T0, 0); break;
++    case T_INT:     __ ld_w  (V0, T0, 0); break;
++    case T_LONG:    __ ld_d  (V0, T0, 0); break;
++    case T_FLOAT:   __ fld_s (F0, T0, 0); break;
++    case T_DOUBLE:  __ fld_d (F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ jr(RA);
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
+new file mode 100644
+index 0000000000..b281f86372
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In LOOGNARCH64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+new file mode 100644
+index 0000000000..ea481c7fa6
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+@@ -0,0 +1,199 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "jvmci/jvmciCodeInstaller.hpp"
++#include "jvmci/jvmciRuntime.hpp"
++#include "jvmci/jvmciCompilerToVM.hpp"
++#include "jvmci/jvmciJavaClasses.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
++  if (inst->is_int_branch() || inst->is_float_branch()) {
++    return pc_offset + NativeInstruction::nop_instruction_size;
++  } else if (inst->is_call()) {
++    return pc_offset + NativeCall::instruction_size;
++  } else if (inst->is_far_call()) {
++    return pc_offset + NativeFarCall::instruction_size;
++  } else if (inst->is_jump()) {
++    return pc_offset + NativeGeneralJump::instruction_size;
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match LoongArch64TestAssembler.java emitCall
++    // lu12i_w; lu32i_d; jirl
++    return pc_offset + 3 * NativeInstruction::nop_instruction_size;
++  } else {
++    JVMCI_ERROR_0("unsupported type of instruction for call site");
++  }
++  return 0;
++}
++
++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant));
++  jobject value = JNIHandles::make_local(obj());
++  if (HotSpotObjectConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop<address>(obj())))));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(cast_from_oop<address>(obj())));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec);
++  }
++}
++
++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) narrowOop);
++    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    void* reference = record_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) reference);
++    TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
++  }
++}
++
++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  NativeInstruction* inst = nativeInstruction_at(pc);
++  if (inst->is_pcaddu12i_add()) {
++    address dest = _constants->start() + data_offset;
++    _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
++    TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
++  } else {
++    JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
++  }
++}
++
++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
++  address pc = (address) inst;
++  if (inst->is_call()) {
++    NativeCall* call = nativeCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_far_call()) {
++    NativeFarCall* call = nativeFarCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_jump()) {
++    NativeGeneralJump* jump = nativeGeneralJump_at(pc);
++    jump->set_jump_destination((address) foreign_call_destination);
++    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match emitCall of LoongArch64TestAssembler.java
++    // lu12i_w; lu32i_d; jirl
++    MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination);
++  } else {
++    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
++  }
++  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
++}
++
++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) {
++#ifdef ASSERT
++  Method* method = NULL;
++  // we need to check, this might also be an unresolved method
++  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
++    method = getMethodFromHotSpotMethod(hotspot_method());
++  }
++#endif
++  switch (_next_call_type) {
++    case INLINE_INVOKE:
++      break;
++    case INVOKEVIRTUAL:
++    case INVOKEINTERFACE: {
++      assert(!method->is_static(), "cannot call static method with invokeinterface");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub());
++      break;
++    }
++    case INVOKESTATIC: {
++      assert(method->is_static(), "cannot call non-static method with invokestatic");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub());
++      break;
++    }
++    case INVOKESPECIAL: {
++      assert(!method->is_static(), "cannot call static method with invokespecial");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub());
++      break;
++    }
++    default:
++      JVMCI_ERROR("invalid _next_call_type value");
++      break;
++  }
++}
++
++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
++  switch (mark) {
++    case POLL_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_FAR:
++      _instructions->relocate(pc, relocInfo::poll_type);
++      break;
++    case POLL_RETURN_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_RETURN_FAR:
++      _instructions->relocate(pc, relocInfo::poll_return_type);
++      break;
++    default:
++      JVMCI_ERROR("invalid mark value");
++      break;
++  }
++}
++
++// convert JVMCI register indices (as used in oop maps) to HotSpot registers
++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
++  if (jvmci_reg < RegisterImpl::number_of_registers) {
++    return as_Register(jvmci_reg)->as_VMReg();
++  } else {
++    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
++    if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
++      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
++    }
++    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
++  }
++}
++
++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
++  return !hotspotRegister->is_FloatRegister();
++}
+diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad
+new file mode 100644
+index 0000000000..80dff0c762
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/loongarch.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
+new file mode 100644
+index 0000000000..cc3824a402
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
+@@ -0,0 +1,13917 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0    ( NS,  NS,  Op_RegI,   0, R0->as_VMReg());
++  reg_def R0_H  ( NS,  NS,  Op_RegI,   0, R0->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
++  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
++  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI,   3, SP->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,   4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,   4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,   5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,   5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,   6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,   6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,   7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,   7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,   8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,   8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,   9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,   9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def T4    (SOC, SOC,  Op_RegI,  16, T4->as_VMReg());
++  reg_def T4_H  (SOC, SOC,  Op_RegI,  16, T4->as_VMReg()->next());
++  reg_def T5    (SOC, SOC,  Op_RegI,  17, T5->as_VMReg());
++  reg_def T5_H  (SOC, SOC,  Op_RegI,  17, T5->as_VMReg()->next());
++  reg_def T6    (SOC, SOC,  Op_RegI,  18, T6->as_VMReg());
++  reg_def T6_H  (SOC, SOC,  Op_RegI,  18, T6->as_VMReg()->next());
++  reg_def T7    (SOC, SOC,  Op_RegI,  19, T7->as_VMReg());
++  reg_def T7_H  (SOC, SOC,  Op_RegI,  19, T7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  20, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  20, T8->as_VMReg()->next());
++  reg_def RX    ( NS,  NS,  Op_RegI,  21, RX->as_VMReg());
++  reg_def RX_H  ( NS,  NS,  Op_RegI,  21, RX->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI,  22, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI,  22, FP->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  23, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  23, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  24, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  24, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  25, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  25, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  26, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  26, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  27, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  27, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  28, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  28, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  29, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  29, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  30, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  30, S7->as_VMReg()->next());
++  reg_def S8    (SOC, SOE,  Op_RegI,  31, S8->as_VMReg());
++  reg_def S8_H  (SOC, SOE,  Op_RegI,  31, S8->as_VMReg()->next());
++
++
++// Floating/Vector registers.
++  reg_def F0    ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
++  reg_def F0_H  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
++  reg_def F0_J  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
++  reg_def F0_K  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
++  reg_def F0_L  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
++  reg_def F0_M  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
++  reg_def F0_N  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
++  reg_def F0_O  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
++
++  reg_def F1    ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
++  reg_def F1_H  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
++  reg_def F1_J  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
++  reg_def F1_K  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
++  reg_def F1_L  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
++  reg_def F1_M  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
++  reg_def F1_N  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
++  reg_def F1_O  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
++
++  reg_def F2    ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
++  reg_def F2_H  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
++  reg_def F2_J  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
++  reg_def F2_K  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
++  reg_def F2_L  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
++  reg_def F2_M  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
++  reg_def F2_N  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
++  reg_def F2_O  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
++
++  reg_def F3    ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
++  reg_def F3_H  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
++  reg_def F3_J  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
++  reg_def F3_K  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
++  reg_def F3_L  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
++  reg_def F3_M  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
++  reg_def F3_N  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
++  reg_def F3_O  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
++
++  reg_def F4    ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
++  reg_def F4_H  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
++  reg_def F4_J  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
++  reg_def F4_K  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
++  reg_def F4_L  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
++  reg_def F4_M  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
++  reg_def F4_N  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
++  reg_def F4_O  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
++
++  reg_def F5    ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
++  reg_def F5_H  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
++  reg_def F5_J  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
++  reg_def F5_K  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
++  reg_def F5_L  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
++  reg_def F5_M  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
++  reg_def F5_N  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
++  reg_def F5_O  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
++
++  reg_def F6    ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
++  reg_def F6_H  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
++  reg_def F6_J  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
++  reg_def F6_K  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
++  reg_def F6_L  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
++  reg_def F6_M  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
++  reg_def F6_N  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
++  reg_def F6_O  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
++
++  reg_def F7    ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
++  reg_def F7_H  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
++  reg_def F7_J  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
++  reg_def F7_K  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
++  reg_def F7_L  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
++  reg_def F7_M  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
++  reg_def F7_N  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
++  reg_def F7_O  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
++
++  reg_def F8    ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
++  reg_def F8_H  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
++  reg_def F8_J  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
++  reg_def F8_K  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
++  reg_def F8_L  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
++  reg_def F8_M  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
++  reg_def F8_N  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
++  reg_def F8_O  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
++
++  reg_def F9    ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
++  reg_def F9_H  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
++  reg_def F9_J  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
++  reg_def F9_K  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
++  reg_def F9_L  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
++  reg_def F9_M  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
++  reg_def F9_N  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
++  reg_def F9_O  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
++
++  reg_def F10   ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
++  reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
++  reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
++  reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
++  reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
++  reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
++  reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
++  reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
++
++  reg_def F11   ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
++  reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
++  reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
++  reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
++  reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
++  reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
++  reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
++  reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
++
++  reg_def F12   ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
++  reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
++  reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
++  reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
++  reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
++  reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
++  reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
++  reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
++
++  reg_def F13   ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
++  reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
++  reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
++  reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
++  reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
++  reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
++  reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
++  reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
++
++  reg_def F14   ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
++  reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
++  reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
++  reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
++  reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
++  reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
++  reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
++  reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
++
++  reg_def F15   ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
++  reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
++  reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
++  reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
++  reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
++  reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
++  reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
++  reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
++
++  reg_def F16   ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
++  reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
++  reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
++  reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
++  reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
++  reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
++  reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
++  reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
++
++  reg_def F17   ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
++  reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
++  reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
++  reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
++  reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
++  reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
++  reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
++  reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
++
++  reg_def F18   ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
++  reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
++  reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
++  reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
++  reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
++  reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
++  reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
++  reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
++
++  reg_def F19   ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
++  reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
++  reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
++  reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
++  reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
++  reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
++  reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
++  reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
++
++  reg_def F20   ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
++  reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
++  reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
++  reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
++  reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
++  reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
++  reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
++  reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
++
++  reg_def F21   ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
++  reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
++  reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
++  reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
++  reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
++  reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
++  reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
++  reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
++
++  reg_def F22   ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
++  reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
++  reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
++  reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
++  reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
++  reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
++  reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
++  reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
++
++  reg_def F23   ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
++  reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
++  reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
++  reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
++  reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
++  reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
++  reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
++  reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
++
++  reg_def F24   ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()          );
++  reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next()  );
++  reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
++  reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
++  reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
++  reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
++  reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
++  reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
++
++  reg_def F25   ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()          );
++  reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next()  );
++  reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
++  reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
++  reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
++  reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
++  reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
++  reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
++
++  reg_def F26   ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()          );
++  reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next()  );
++  reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
++  reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
++  reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
++  reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
++  reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
++  reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
++
++  reg_def F27   ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()          );
++  reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next()  );
++  reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
++  reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
++  reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
++  reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
++  reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
++  reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
++
++  reg_def F28   ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()          );
++  reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next()  );
++  reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
++  reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
++  reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
++  reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
++  reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
++  reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
++
++  reg_def F29   ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()          );
++  reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next()  );
++  reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
++  reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
++  reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
++  reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
++  reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
++  reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
++
++  reg_def F30   ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()          );
++  reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next()  );
++  reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
++  reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
++  reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
++  reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
++  reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
++  reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
++
++  reg_def F31   ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()          );
++  reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next()  );
++  reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
++  reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
++  reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
++  reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
++  reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
++  reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T4, T4_H,
++                     T1, T1_H, // inline_cache_reg
++                     T6, T6_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     T5, T5_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     S8, S8_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H, // frame_pointer
++
++                     // non-allocatable registers
++                     T7, T7_H,
++                     TP, TP_H,
++                     RX, RX_H,
++                     R0, R0_H,
++                 );
++
++// F23 is scratch reg
++alloc_class chunk1(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                     F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                     F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                     F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                     F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                     F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                     F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                     F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                     F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                     F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                     F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                     F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                     F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                     F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                     F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                     F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                     F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                     F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                     F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                     F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                     F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                     F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                     F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                     F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                     F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                     F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                     F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O,
++
++                     // non-allocatable registers
++                     F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O,
++                  );
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T4 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t4_reg( T4 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++// TODO: LA
++//reg_class v0_reg( A0 );
++//reg_class v1_reg( A1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( A0, A0_H );
++reg_class v1_long_reg( A1, A1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t4_long_reg( T4, T4_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 );
++
++reg_class all_reg32(
++                     S8,
++                     S7,
++                     S5, /* S5_heapbase */
++                  /* S6, S6 TREG     */
++                     S4,
++                     S3,
++                     S2,
++                     S1,
++                     S0,
++                     T8,
++                  /* T7,  AT */
++                     T6,
++                     T5,
++                  /* T4,  jarl T4 */
++                     T3,
++                     T2,
++                     T1,
++                     T0,
++                     A7,
++                     A6,
++                     A5,
++                     A4,
++                     A3,
++                     A2,
++                     A1,
++                     A0 );
++
++reg_class int_reg %{
++  return _ANY_REG32_mask;
++%}
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
++
++reg_class p_reg %{
++  return _PTR_REG_mask;
++%}
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_Ax_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 T0, T0_H
++               );
++
++reg_class all_reg(
++                    S8, S8_H,
++                    S7, S7_H,
++                 /* S6, S6_H,  S6 TREG     */
++                    S5, S5_H, /* S5_heapbase */
++                    S4, S4_H,
++                    S3, S3_H,
++                    S2, S2_H,
++                    S1, S1_H,
++                    S0, S0_H,
++                    T8, T8_H,
++                 /* T7, T7_H,  AT */
++                    T6, T6_H,
++                    T5, T5_H,
++                 /* T4, T4_H,  jalr  T4    */
++                    T3, T3_H,
++                    T2, T2_H,
++                    T1, T1_H,
++                    T0, T0_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H
++                  );
++
++
++reg_class long_reg %{
++  return _ANY_REG_mask;
++%}
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
++
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F30, F30_H,
++                   F31, F31_H);
++
++// Class for all 128bit vector registers
++reg_class vectorx_reg(  F0, F0_H, F0_J, F0_K,
++                        F1, F1_H, F1_J, F1_K,
++                        F2, F2_H, F2_J, F2_K,
++                        F3, F3_H, F3_J, F3_K,
++                        F4, F4_H, F4_J, F4_K,
++                        F5, F5_H, F5_J, F5_K,
++                        F6, F6_H, F6_J, F6_K,
++                        F7, F7_H, F7_J, F7_K,
++                        F8, F8_H, F8_J, F8_K,
++                        F9, F9_H, F9_J, F9_K,
++                        F10, F10_H, F10_J, F10_K,
++                        F11, F11_H, F11_J, F11_K,
++                        F12, F12_H, F12_J, F12_K,
++                        F13, F13_H, F13_J, F13_K,
++                        F14, F14_H, F14_J, F14_K,
++                        F15, F15_H, F15_J, F15_K,
++                        F16, F16_H, F16_J, F16_K,
++                        F17, F17_H, F17_J, F17_K,
++                        F18, F18_H, F18_J, F18_K,
++                        F19, F19_H, F19_J, F19_K,
++                        F20, F20_H, F20_J, F20_K,
++                        F21, F21_H, F21_J, F21_K,
++                        F22, F22_H, F22_J, F22_K,
++                        F24, F24_H, F24_J, F24_K,
++                        F25, F25_H, F25_J, F25_K,
++                        F26, F26_H, F26_J, F26_K,
++                        F27, F27_H, F27_J, F27_K,
++                        F28, F28_H, F28_J, F28_K,
++                        F29, F29_H, F29_J, F29_K,
++                        F30, F30_H, F30_J, F30_K,
++                        F31, F31_H, F31_J, F31_K);
++
++// Class for all 256bit vector registers
++reg_class vectory_reg(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                        F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                        F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                        F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                        F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                        F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                        F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                        F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                        F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                        F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                        F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                        F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                        F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                        F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                        F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                        F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                        F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                        F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                        F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                        F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                        F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                        F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                        F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                        F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                        F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                        F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                        F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                        F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                        F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                        F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                        F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++
++// TODO: LA
++//reg_class flt_arg0( F0 );
++//reg_class dbl_arg0( F0, F0_H );
++//reg_class dbl_arg1( F1, F1_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++bool is_CAS(int opcode);
++bool use_AMO(int opcode);
++
++bool unnecessary_acquire(const Node *barrier);
++bool unnecessary_release(const Node *barrier);
++bool unnecessary_volatile(const Node *barrier);
++bool needs_releasing_store(const Node *store);
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++#define V0_num    A0_num
++#define V0_H_num  A0_H_num
++
++#define __ _masm.
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++
++void reg_mask_init() {
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask = _ALL_REG_mask;
++
++  if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) {
++    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg()));
++    _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++    _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++  }
++}
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++
++  return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  const int safety_zone = 3 * BytesPerInstWord;
++  int offs = offset - br_size + 4;
++  // To be conservative on LoongArch
++  // branch node should be end with:
++  //   branch inst
++  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
++  switch (rule) {
++    case jmpDir_long_rule:
++    case jmpDir_short_rule:
++      return Assembler::is_simm(offs, 26);
++    case jmpCon_flags_long_rule:
++    case jmpCon_flags_short_rule:
++    case branchConP_0_long_rule:
++    case branchConP_0_short_rule:
++    case branchConN2P_0_long_rule:
++    case branchConN2P_0_short_rule:
++    case cmpN_null_branch_long_rule:
++    case cmpN_null_branch_short_rule:
++    case branchConF_reg_reg_long_rule:
++    case branchConF_reg_reg_short_rule:
++    case branchConD_reg_reg_long_rule:
++    case branchConD_reg_reg_short_rule:
++      return Assembler::is_simm(offs, 21);
++    default:
++      return Assembler::is_simm(offs, 16);
++  }
++  return false;
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return (int)MaxVectorSize;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  int max_size = max_vector_size(bt);
++  int size     = 0;
++
++  if (UseLSX) size = 16;
++  size = size / type2aelembytes(bt);
++  return MIN2(size,max_size);
++}
++
++// LoongArch supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F0_num || reg == F0_H_num
++       || reg == F1_num || reg == F1_H_num
++       || reg == F2_num || reg == F2_H_num
++       || reg == F3_num || reg == F3_H_num
++       || reg == F4_num || reg == F4_H_num
++       || reg == F5_num || reg == F5_H_num
++       || reg == F6_num || reg == F6_H_num
++       || reg == F7_num || reg == F7_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// LoongArch doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// it does if the polling page is more than disp32 away.
++bool SafePointNode::needs_polling_address_input() {
++  return SafepointMechanism::uses_thread_local_poll();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  // bl
++  return NativeCall::instruction_size;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  // lu12i_w IC_Klass,
++  // ori IC_Klass,
++  // lu32i_d IC_Klass
++  // lu52i_d IC_Klass
++
++  // bl
++  return NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++// Helper methods for MachSpillCopyNode::implementation().
++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    switch (ireg) {
++      case Op_VecX:
++        __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      case Op_VecY:
++        __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      case Op_VecY:
++        st->print("xvori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
++                                      int dst_offset, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    switch (ireg) {
++      case Op_VecX:
++        __ vld(F23, SP, src_offset);
++        __ vst(F23, SP, dst_offset);
++        break;
++      case Op_VecY:
++        __ xvld(F23, SP, src_offset);
++        __ xvst(F23, SP, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vld f23, %d(sp)\n\t"
++                  "vst f23, %d(sp)\t# 128-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      case Op_VecY:
++        st->print("xvld f23, %d(sp)\n\t"
++                  "xvst f23, %d(sp)\t# 256-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 8;
++  return size;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
++    if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
++      // mem -> mem
++      int src_offset = ra_->reg2offset(src_first);
++      int dst_offset = ra_->reg2offset(dst_first);
++      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_float) {
++      vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
++      int stack_offset = ra_->reg2offset(dst_first);
++      vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
++    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) {
++      int stack_offset = ra_->reg2offset(src_first);
++      vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
++    } else {
++      ShouldNotReachHere();
++    }
++    return 0;
++  }
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(AT, Address(SP, src_offset));
++          __ st_d(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "st_d    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_w(AT, Address(SP, src_offset));
++          __ st_w(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_w    AT, [SP + #%d] spill 2\n\t"
++                    "st_w    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else {
++            if (Assembler::is_simm(offset, 12)) {
++              __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++            } else {
++              __ li(AT, offset);
++              __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT);
++            }
++          }
++#ifndef PRODUCT
++        } else {
++          if (this->ideal_reg() == Op_RegI)
++            st->print("\tld_w    %s, [SP + #%d]\t# spill 4",
++                      Matcher::regName[dst_first],
++                      offset);
++          else
++            st->print("\tld_wu    %s, [SP + #%d]\t# spill 5",
++                      Matcher::regName[dst_first],
++                      offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_d  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_s   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++    }
++    return 0;
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_d    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_w    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\tmove(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_d   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_w   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_d   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_s   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_d   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_s   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("addi_d   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  st->print_cr("ld_d    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++  st->print("\t");
++  st->print_cr("ld_d    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ld_d    AT, poll_offset[thread] #polling_page_address\n\t"
++                   "ld_w    AT, [AT]\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("Poll Safepoint # MachEpilogNode");
++    }
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  __ ld_d(RA, Address(SP, framesize - wordSize));
++  __ ld_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize, 12)) {
++    __ addi_d(SP, SP, framesize);
++  } else {
++    __ li(AT, framesize);
++    __ add_d(SP, SP, AT);
++  }
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ld_d(AT, thread, in_bytes(Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ ld_w(AT, AT, 0);
++    } else {
++      __ li(AT, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ ld_w(AT, AT, 0);
++    }
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI_D %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++
++   if (Assembler::is_simm(offset, 12))
++     return 4;
++   else
++     return 3 * 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  if (Assembler::is_simm(offset, 12)) {
++    __ addi_d(as_Register(reg), SP, offset);
++  } else {
++    __ lu12i_w(AT, Assembler::split_low20(offset >> 12));
++    __ ori(AT, AT, Assembler::split_low12(offset));
++    __ add_d(as_Register(reg), SP, AT);
++  }
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  // pcaddu18i
++  // jirl
++  return NativeFarCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T4, T0)");
++  st->print_cr("\tbeq(T4, iCache, L)");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T4, receiver);
++  __ beq(T4, iCache, L);
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = cbuf.consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS,
++           "insts must be immediately follow consts");
++    // Materialize the constant table base.
++    address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset());
++    jint offs = (baseaddr - __ pc()) >> 2;
++    guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset");
++    __ pcaddi(Rtoc, offs);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // pcaddi
++  return 1 * BytesPerInstWord;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("pcaddi    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  st->print("st_d       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++  st->print("st_d       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  st->print("addi_d   FP, SP, -%d \n\t", wordSize*2);
++  st->print("addi_d   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++#ifdef ASSERT
++  address start = __ pc();
++#endif
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  if (Assembler::is_simm(-framesize, 12)) {
++    __ addi_d(SP, SP, -framesize);
++  } else {
++    __ li(AT, -framesize);
++    __ add_d(SP, SP, AT);
++  }
++  __ st_d(RA, Address(SP, framesize - wordSize));
++  __ st_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize - wordSize * 2, 12)) {
++    __ addi_d(FP, SP, framesize - wordSize * 2);
++  } else {
++    __ li(AT, framesize - wordSize * 2);
++    __ add_d(FP, SP, AT);
++  }
++
++  assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry");
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++bool is_CAS(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_CompareAndSwapI:
++  case Op_CompareAndSwapL:
++  case Op_CompareAndSwapP:
++  case Op_CompareAndSwapN:
++  case Op_GetAndSetI:
++  case Op_GetAndSetL:
++  case Op_GetAndSetP:
++  case Op_GetAndSetN:
++  case Op_GetAndAddI:
++  case Op_GetAndAddL:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool use_AMO(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_StoreI:
++  case Op_StoreL:
++  case Op_StoreP:
++  case Op_StoreN:
++  case Op_StoreNKlass:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool unnecessary_acquire(const Node *barrier)
++{
++  assert(barrier->is_MemBar(), "expecting a membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode* mb = barrier->as_MemBar();
++
++  if (mb->trailing_load_store()) {
++    Node* load_store = mb->in(MemBarNode::Precedent);
++    assert(load_store->is_LoadStore(), "unexpected graph shape");
++    return is_CAS(load_store->Opcode());
++  }
++
++  return false;
++}
++
++bool unnecessary_release(const Node *n)
++{
++  assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *barrier = n->as_MemBar();
++
++  if (!barrier->leading()) {
++    return false;
++  } else {
++    Node* trailing = barrier->trailing_membar();
++    MemBarNode* trailing_mb = trailing->as_MemBar();
++    assert(trailing_mb->trailing(), "Not a trailing membar?");
++    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
++
++    Node* mem = trailing_mb->in(MemBarNode::Precedent);
++    if (mem->is_Store()) {
++      assert(mem->as_Store()->is_release(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
++      return use_AMO(mem->Opcode());
++    } else {
++      assert(mem->is_LoadStore(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
++      return is_CAS(mem->Opcode());
++    }
++  }
++
++  return false;
++}
++
++bool unnecessary_volatile(const Node *n)
++{
++  // assert n->is_MemBar();
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *mbvol = n->as_MemBar();
++
++  bool release = false;
++  if (mbvol->trailing_store()) {
++    Node* mem = mbvol->in(MemBarNode::Precedent);
++    release = use_AMO(mem->Opcode());
++  }
++
++  assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
++#ifdef ASSERT
++  if (release) {
++    Node* leading = mbvol->leading_membar();
++    assert(leading->Opcode() == Op_MemBarRelease, "");
++    assert(leading->as_MemBar()->leading_store(), "");
++    assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
++   }
++#endif
++
++  return release;
++}
++
++bool needs_releasing_store(const Node *n)
++{
++  // assert n->is_Store();
++  if (UseBarriersForVolatile) {
++    // we use a normal store and dbar combination
++    return false;
++  }
++
++  StoreNode *st = n->as_Store();
++
++  return st->trailing_membar() != NULL;
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T4;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++
++    __ bind(miss);
++    __ li(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------LOONGARCH FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++
++operand vecX() %{
++  constraint(ALLOC_IN_RC(vectorx_reg));
++  match(VecX);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand vecY() %{
++  constraint(ALLOC_IN_RC(vectory_reg));
++  match(VecY);
++
++   format %{ %}
++   interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU1() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 1));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU2() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU3() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 7));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU4() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 15));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU5() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 31));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU6() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 63));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU8() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI10() %{
++  predicate((-512 <= n->get_int()) && (n->get_int() <= 511));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI12() %{
++  predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_2() %{
++  predicate(n->get_int() == 2);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M2047_2048() %{
++  predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_4095() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 4095);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1_4() %{
++  predicate(1 <= n->get_int() && (n->get_int() <= 4));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M128_255() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immLU5() %{
++  predicate((0 <= n->get_long()) && (n->get_long() <= 31));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL10() %{
++  predicate((-512 <= n->get_long()) && (n->get_long() <= 511));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL12() %{
++  predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32()
++%{
++  predicate(n->get_long() == (int)n->get_long());
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M2047_2048() %{
++  predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_4095() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 4095);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr());
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT4RegI() %{
++  constraint(ALLOC_IN_RC(t4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T4" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegP() %{
++  constraint(ALLOC_IN_RC(no_Ax_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegI2L(mRegI reg) %{
++  match(ConvI2L reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL2I(mRegL reg) %{
++  match(ConvL2I reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12(mRegP reg, immL12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset12I2L(mRegP reg, immI12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (ConvI2L off));
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indIndexI2L(mRegP reg, mRegI ireg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg (ConvI2L ireg));
++  op_cost(10);
++  format %{ "[$reg + $ireg] @ indIndexI2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($ireg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12Narrow(mRegN reg, immL12 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOp.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++operand cmpOpEqNe() %{
++  match(Bool);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne
++            || n->as_Bool()->_test._test == BoolTest::eq);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L,
++                indirectNarrow, indOffset12Narrow);
++opclass memory_loadRange(indOffset12, indirect);
++
++opclass mRegLorI2L(mRegI2L, mRegL);
++opclass mRegIorL2I( mRegI, mRegL2I);
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_bu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++    ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "ld_h  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_hu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld_d    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld_d    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(200);
++  format %{ "st_d    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_reg_volatile(indirect mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(205);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeL_reg\n" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(180);
++  format %{ "st_d    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(185);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeL_immL_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# compressed ptr @ loadN" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# @ loadN2P" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ li(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL" %}
++  ins_encode %{
++    __ li($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory_loadRange mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_d    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP_volatile(indirect mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeP" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_0" %}
++    ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeImmP_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeN_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # compressed ptr @ storeN" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2N" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2NKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    AT, R0, $mem # compressed ptr" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "st_b    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(120);
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  ins_cost(125);
++  format %{ "amswap_db_w    AT, R0, $mem #@storeI_immI_0" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_w    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem #@storeI" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ li(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    if ($src->constant_reloc() == relocInfo::metadata_type) {
++      __ mov_metadata($dst$$Register, (Metadata*)$src$$constant);
++    } else {
++      __ li($dst$$Register, $src$$constant);
++    }
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ li(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ add_d(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in LA");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_loongarch.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T4 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T4
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label*   target = $labl$$label;
++    int      flag = $cmp$$cmpcode;
++    long      val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bnez($cr$$Register, L);
++        else
++          __ bnez($cr$$Register, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beqz($cr$$Register, L);
++        else
++          __ beqz($cr$$Register, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++    long    val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(4);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct unnecessary_membar_acquire() %{
++  predicate(unnecessary_acquire(n));
++  match(MemBarAcquire);
++  ins_cost(0);
++
++  format %{ "membar_acquire (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_acquire (elided)");
++  %}
++
++  ins_pipe(empty);
++%}
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_release() %{
++  predicate(unnecessary_release(n));
++  match(MemBarRelease);
++  ins_cost(0);
++
++  format %{ "membar_release (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_release (elided)");
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  predicate(unnecessary_volatile(n));
++  match(MemBarVolatile);
++  ins_cost(0);
++
++  format %{ "membar_volatile (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_volatile (elided)");
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ membar(__ StoreLoad);
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_s(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_w(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ movgr2fr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_dst_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    Label L;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    // Use signed comparison here, because the most significant bit of the
++    // user-space virtual address must be 0.
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_s(FCC0, src2, src1);
++    __ fcmp_cult_s(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_d(FCC0, src2, src1);
++    __ fcmp_cult_d(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register cnt  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(cnt, R0, done);
++
++    __ bind(Loop);
++    __ st_d(R0, base, 0);
++    __ addi_d(cnt, cnt, -1);
++    __ addi_d(base, base, wordSize);
++    __ bne(cnt, R0, Loop);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    long     cnt  = $cnt$$constant;
++    Label Loop, done;
++
++    int tmp = cnt % 8;
++    int i = 0;
++    for (; i < tmp; i++) {
++      __ st_d(R0, base, i * 8);
++    }
++    if (cnt - tmp) {
++      __ li(AT, cnt);
++      __ alsl_d(AT, AT, base, 2);
++      __ addi_d(base, base, i * 8);
++      __ bind(Loop);
++      __ st_d(R0, base,  0);
++      __ st_d(R0, base,  8);
++      __ st_d(R0, base, 16);
++      __ st_d(R0, base, 24);
++      __ st_d(R0, base, 32);
++      __ st_d(R0, base, 40);
++      __ st_d(R0, base, 48);
++      __ st_d(R0, base, 56);
++      __ addi_d(base, base, 64);
++      __ blt(base, AT, Loop);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{
++  match(Set result (HasNegatives ary1 len));
++  effect(USE_KILL ary1, USE_KILL len);
++  format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %}
++
++  ins_encode %{
++    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_indexofU_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "String IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %}
++
++  ins_encode %{
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// fast char[] to byte[] compression
++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result,
++                         mRegL tmp1, mRegL tmp2, mRegL tmp3)
++%{
++  match(Set result (StrCompressedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "String Compress $src,$dst -> $result @ string_compress " %}
++  ins_encode %{
++    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
++                           $result$$Register, $tmp1$$Register,
++                           $tmp2$$Register, $tmp3$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// byte[] to char[] inflation
++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len,
++                        mRegL tmp1, mRegL tmp2)
++%{
++  match(Set dummy (StrInflatedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2);
++
++  format %{ "String Inflate $src,$dst @ string_inflate " %}
++  ins_encode %{
++    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
++                          $tmp1$$Register, $tmp2$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2);
++
++  format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1,  immI12 src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm12" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    __ addi_w(dst, src1, imm);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddI src1 (LShiftI src2 shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_w(dst, src2, src1, sh - 1);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{
++  match(Set dst (AddP src1 (AndL src2 M8)));
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_M8" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bstrins_d(src2, R0, 2, 0);
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
++  ins_encode %{
++    Register src1 = $src1$$Register;
++    long     src2 = $src2$$constant;
++    Register  dst = $dst$$Register;
++
++    __ addi_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_d(dst, src2, src1, sh - 1);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ add_d(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ addi_d(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_w(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ sub_w(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegLorI2L src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_d(AT, src, 63);
++    __ xorr(dst, src, AT);
++    __ sub_d(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ sub_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1,  immI_M2047_2048 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_w(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegIorL2I src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_w(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegLorI2L src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_d(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1,  immL_M2047_2048 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_d(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ mod_w(dst, src1, src2);
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mod_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    __ div_w(dst, src1, src2);
++
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mul_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulHiL src1 src2));
++  format %{ "mulHiL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mulh_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ div_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "fmadd_s  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "fmadd_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "fmsub_s $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "fmsub_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++instruct copySignF_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (CopySignF src1 src2));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_s  $dst $src1 $src2 @ copySignF_reg" %}
++
++  ins_encode %{
++    __ fcopysign_s($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{
++  match(Set dst (CopySignD src1 (Binary src2 zero)));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_d  $dst $src1 $src2 @ copySignD_reg" %}
++
++  ins_encode %{
++    __ fcopysign_d($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ bstrpick_d(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ orn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 6, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 5-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ slli_w(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_h(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_b(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sll_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long 6-bit immI
++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ slli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sll_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long 6-bit
++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sra_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ srl_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "bstrpick_d    $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotri_w     $dst, $src, 1 ...\n\t"
++            "srli_w      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotri_w(dst, src, 1);
++    if (rshift - 1) {
++      __ srli_w(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 5-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRLI_W    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srli_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "bstrpick_w    $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, pos+size-1, pos);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL_W    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srl_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRAI_W    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ srai_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA_W    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ sra_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLLI_W    $dst, $src @ convI2L_reg\t"  %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ slli_w(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ slli_w(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++// Convert double to int.
++// If the double is NaN, stuff a zero in instead.
++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2i    $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2l    $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Convert float to int.
++// If the float is NaN, stuff a zero in instead.
++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2i    $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2l    $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, src);
++    __ ffint_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "srai_w    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ srai_w(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ sub_d(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ movgr2fr_w(dst ,src);
++    __ ffint_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_loongarch.cpp] generate_forward_exception()
++    //     [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAlloc(memory mem) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if (index != 0) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, AT, disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, AT, T4);
++        __ preld(8, AT, 0);
++      }
++    } else {
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, as_Register(base), disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, as_Register(base), T4);
++        __ preld(8, AT, 0);
++      }
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_s  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_s($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_d  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_d($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !");
++    __ ld_w($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_w    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !");
++    __ st_w($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_d    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
++    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
++    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
++    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_d    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !");
++    __ fst_d($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM_order(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++  predicate(UseConcMarkSweepGC && !UseCondCardMark);
++  ins_cost(100);
++  format %{ "StoreCM MEMBAR storestore\n\t"
++            "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    if (is_reachable()) {
++      // Here we should emit illtrap!
++      __ stop("ShouldNotReachHere");
++    }
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ addi_d(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale)
++%{
++  match(Set dst (AddP reg (LShiftL lreg scale)));
++
++  ins_cost(110);
++  format %{ "leaq    $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = $reg$$Register;
++    Register  index = $lreg$$Register;
++    int       scale = $scale$$constant;
++
++    if (scale == 0) {
++       __ add_d($dst$$Register, $reg$$Register, index);
++    } else {
++       __ alsl_d(dst, index, base, scale - 1);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ sc_d(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "ll_d    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{
++  match(Set prev (GetAndSetI mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_w(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_w(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{
++  match(Set prev (GetAndSetL mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{
++  match(Set prev (GetAndSetN mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    __ amswap_db_w(AT, newv, addr);
++    __ bstrpick_d(prev, AT, 31, 0);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{
++  match(Set prev (GetAndSetP mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{
++  match(Set newval (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_d $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_d(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_d(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_d [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{
++  match(Set newval (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_w $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_w(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_w(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_w [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//----------Max and Min--------------------------------------------------------
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ masknez(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ bstrins_d(dst, src2, 63, 32);
++    } else if (src2 == dst) {
++       __ slli_d(dst, dst, 32);
++       __ bstrins_d(dst, src1, 31, 0);
++    } else {
++       __ bstrpick_d(dst, src1, 31, 0);
++       __ bstrins_d(dst, src2, 63, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll() %{
++  predicate(SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ li(T4, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ ld_w(AT, T4, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "ld_w AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ ld_w(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//----------BSWAP Instructions-------------------------------------------------
++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesI src));
++
++  format %{ "RevB_I  $dst, $src" %}
++  ins_encode %{
++    __ revb_2w($dst$$Register, $src$$Register);
++    __ slli_w($dst$$Register, $dst$$Register, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_long(mRegL dst, mRegL src) %{
++  match(Set dst (ReverseBytesL src));
++
++  format %{ "RevB_L  $dst, $src" %}
++  ins_encode %{
++    __ revb_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesUS src));
++
++  format %{ "RevB_US  $dst, $src" %}
++  ins_encode %{
++    __ revb_2h($dst$$Register, $src$$Register);
++    __ bstrpick_d($dst$$Register, $dst$$Register, 15, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesS src));
++
++  format %{ "RevB_S  $dst, $src" %}
++  ins_encode %{
++    __ revb_2h($dst$$Register, $src$$Register);
++    __ ext_w_h($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz_w  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "clz_d  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ clz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz_w    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    __ ctz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "ctz_d    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ ctz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// --------------------------------- Load -------------------------------------
++
++instruct loadV16(vecX dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 16);
++  match(Set dst (LoadVector mem));
++  format %{ "vload    $dst, $mem\t# @loadV16" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadV32(vecY dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 32);
++  match(Set dst (LoadVector mem));
++  format %{ "xvload    $dst, $mem\t# @loadV32" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- Store ------------------------------------
++
++instruct storeV16(memory mem, vecX src) %{
++  predicate(n->as_StoreVector()->memory_size() == 16);
++  match(Set mem (StoreVector mem src));
++  format %{ "vstore    $src, $mem\t# @storeV16" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storeV32(memory mem, vecY src) %{
++  predicate(n->as_StoreVector()->memory_size() == 32);
++  match(Set mem (StoreVector mem src));
++  format %{ "xvstore    $src, $mem\t# @storeV32" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------- Replicate ----------------------------------
++
++instruct repl16B(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB src));
++  format %{ "vreplgr2vr.b    $dst, $src\t# @repl16B" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB imm));
++  format %{ "vldi    $dst, $imm\t# @repl16B_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS src));
++  format %{ "vreplgr2vr.h    $dst, $src\t# @repl8S" %}
++  ins_encode %{
++    __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS imm));
++  format %{ "vldi    $dst, $imm\t# @repl8S_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI src));
++  format %{ "vreplgr2vr.w    $dst, $src\t# @repl4I" %}
++  ins_encode %{
++    __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI imm));
++  format %{ "vldi    $dst, $imm\t# @repl4I_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L(vecX dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL src));
++  format %{ "vreplgr2vr.d    $dst, $src\t# @repl2L" %}
++  ins_encode %{
++    __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L_imm(vecX dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL imm));
++  format %{ "vldi    $dst, $imm\t# @repl2L_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4F(vecX dst, regF src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateF src));
++  format %{ "vreplvei.w    $dst, $src, 0\t# @repl4F" %}
++  ins_encode %{
++    __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2D(vecX dst, regD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateD src));
++  format %{ "vreplvei.d    $dst, $src, 0\t# @repl2D" %}
++  ins_encode %{
++    __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB src));
++  format %{ "xvreplgr2vr.b    $dst, $src\t# @repl32B" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB imm));
++  format %{ "xvldi    $dst, $imm\t# @repl32B_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS src));
++  format %{ "xvreplgr2vr.h    $dst, $src\t# @repl16S" %}
++  ins_encode %{
++    __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS imm));
++  format %{ "xvldi    $dst, $imm\t# @repl16S_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI src));
++  format %{ "xvreplgr2vr.w    $dst, $src\t# @repl8I" %}
++  ins_encode %{
++    __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI imm));
++  format %{ "xvldi    $dst, $imm\t# @repl8I_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L(vecY dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL src));
++  format %{ "xvreplgr2vr.d    $dst, $src\t# @repl4L" %}
++  ins_encode %{
++    __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L_imm(vecY dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL imm));
++  format %{ "xvldi    $dst, $imm\t# @repl4L_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8F(vecY dst, regF src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateF src));
++  format %{ "xvreplve0.w    $dst, $src\t# @repl8F" %}
++  ins_encode %{
++    __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4D(vecY dst, regD src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateD src));
++  format %{ "xvreplve0.d    $dst, $src\t# @repl4D" %}
++  ins_encode %{
++    __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ADD --------------------------------------
++
++instruct add16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src1 src2));
++  format %{ "vadd.b    $dst, $src1, $src2\t# @add16B" %}
++  ins_encode %{
++    __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "vaddi.bu    $dst, $src, $imm\t# @add16B_imm" %}
++  ins_encode %{
++    __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src1 src2));
++  format %{ "vadd.h    $dst, $src1, $src2\t# @add8S" %}
++  ins_encode %{
++    __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "vaddi.hu    $dst, $src, $imm\t# @add8S_imm" %}
++  ins_encode %{
++    __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src1 src2));
++  format %{ "vadd.w    $dst, $src1, src2\t# @add4I" %}
++  ins_encode %{
++    __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "vaddi.wu    $dst, $src, $imm\t# @add4I_imm" %}
++  ins_encode %{
++    __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src1 src2));
++  format %{ "vadd.d    $dst, $src1, $src2\t# @add2L" %}
++  ins_encode %{
++    __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "vaddi.du    $dst, $src, $imm\t# @add2L_imm" %}
++  ins_encode %{
++    __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVF src1 src2));
++  format %{ "vfadd.s    $dst, $src1, $src2\t# @add4F" %}
++  ins_encode %{
++    __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVD src1 src2));
++  format %{ "vfadd.d    $dst, $src1, $src2\t# @add2D" %}
++  ins_encode %{
++    __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src1 src2));
++  format %{ "xvadd.b    $dst, $src1, $src2\t# @add32B" %}
++  ins_encode %{
++    __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "xvaddi.bu    $dst, $src, $imm\t# @add32B_imm" %}
++  ins_encode %{
++    __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src1 src2));
++  format %{ "xvadd.h    $dst, $src1, $src2\t# @add16S" %}
++  ins_encode %{
++    __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "xvaddi.hu    $dst, $src, $imm\t# @add16S_imm" %}
++  ins_encode %{
++    __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src1 src2));
++  format %{ "xvadd.wu    $dst, $src1, $src2\t# @add8I" %}
++  ins_encode %{
++    __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "xvaddi.wu    $dst, $src, $imm\t# @add8I_imm" %}
++  ins_encode %{
++    __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src1 src2));
++  format %{ "xvadd.d    $dst, $src1, $src2\t# @add4L" %}
++  ins_encode %{
++    __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "xvaddi.du    $dst, $src, $imm\t# @add4L_imm" %}
++  ins_encode %{
++    __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVF src1 src2));
++  format %{ "xvfadd.s    $dst, $src1, $src2\t# @add8F" %}
++  ins_encode %{
++    __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVD src1 src2));
++  format %{ "xvfadd.d    $dst, $src1, $src2\t# @add4D" %}
++  ins_encode %{
++    __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++instruct sub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src1 src2));
++  format %{ "vsub.b    $dst, $src1, $src2\t# @sub16B" %}
++  ins_encode %{
++    __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "vsubi.bu    $dst, $src, $imm\t# @sub16B_imm" %}
++  ins_encode %{
++    __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src1 src2));
++  format %{ "vsub.h    $dst, $src1, $src2\t# @sub8S" %}
++  ins_encode %{
++    __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "vsubi.hu    $dst, $src, $imm\t# @sub8S_imm" %}
++  ins_encode %{
++    __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src1 src2));
++  format %{ "vsub.w    $dst, $src1, src2\t# @sub4I" %}
++  ins_encode %{
++    __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "vsubi.wu    $dst, $src, $imm\t# @sub4I_imm" %}
++  ins_encode %{
++    __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src1 src2));
++  format %{ "vsub.d    $dst, $src1, $src2\t# @sub2L" %}
++  ins_encode %{
++    __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "vsubi.du    $dst, $src, $imm\t# @sub2L_imm" %}
++  ins_encode %{
++    __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVF src1 src2));
++  format %{ "vfsub.s    $dst, $src1, $src2\t# @sub4F" %}
++  ins_encode %{
++    __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVD src1 src2));
++  format %{ "vfsub.d    $dst, $src1, $src2\t# @sub2D" %}
++  ins_encode %{
++    __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src1 src2));
++  format %{ "xvsub.b    $dst, $src1, $src2\t# @sub32B" %}
++  ins_encode %{
++    __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "xvsubi.bu    $dst, $src, $imm\t# @sub32B_imm" %}
++  ins_encode %{
++    __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src1 src2));
++  format %{ "xvsub.h    $dst, $src1, $src2\t# @sub16S" %}
++  ins_encode %{
++    __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "xvsubi.hu    $dst, $src, $imm\t# @sub16S_imm" %}
++  ins_encode %{
++    __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src1 src2));
++  format %{ "xvsub.w    $dst, $src1, $src2\t# @sub8I" %}
++  ins_encode %{
++    __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "xvsubi.wu    $dst, $src, $imm\t# @sub8I_imm" %}
++  ins_encode %{
++    __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src1 src2));
++  format %{ "xvsub.d    $dst, $src1, $src2\t# @sub4L" %}
++  ins_encode %{
++    __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "xvsubi.du    $dst, $src, $imm\t# @sub4L_imm" %}
++  ins_encode %{
++    __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVF src1 src2));
++  format %{ "xvfsub.s    $dst, $src1, $src2\t# @sub8F" %}
++  ins_encode %{
++    __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVD src1 src2));
++  format %{ "xvfsub.d    $dst,$src1,$src2\t# @sub4D" %}
++  ins_encode %{
++    __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MUL --------------------------------------
++instruct mul16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (MulVB src1 src2));
++  format %{ "vmul.b    $dst, $src1, $src2\t# @mul16B" %}
++  ins_encode %{
++    __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVS src1 src2));
++  format %{ "vmul.h    $dst, $src1, $src2\t# @mul8S" %}
++  ins_encode %{
++    __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVI src1 src2));
++  format %{ "vmul.w    $dst, $src1, $src2\t# @mul4I" %}
++  ins_encode %{
++    __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVL src1 src2));
++  format %{ "vmul.d    $dst, $src1, $src2\t# @mul2L" %}
++  ins_encode %{
++    __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVF src1 src2));
++  format %{ "vfmul.s    $dst, $src1, $src2\t# @mul4F" %}
++  ins_encode %{
++    __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVD src1 src2));
++  format %{ "vfmul.d    $dst, $src1, $src2\t# @mul2D" %}
++  ins_encode %{
++    __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (MulVB src1 src2));
++  format %{ "xvmul.b    $dst, $src1, $src2\t# @mul32B" %}
++  ins_encode %{
++    __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (MulVS src1 src2));
++  format %{ "xvmul.h    $dst, $src1, $src2\t# @mul16S" %}
++  ins_encode %{
++    __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVI src1 src2));
++  format %{ "xvmul.w    $dst, $src1, $src2\t# @mul8I" %}
++  ins_encode %{
++    __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVL src1 src2));
++  format %{ "xvmul.d    $dst, $src1, $src2\t# @mul4L" %}
++  ins_encode %{
++    __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVF src1 src2));
++  format %{ "xvfmul.s    $dst, $src1, $src2\t# @mul8F" %}
++  ins_encode %{
++    __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVD src1 src2));
++  format %{ "xvfmul.d    $dst, $src1, $src2\t# @mul4D" %}
++  ins_encode %{
++    __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- DIV --------------------------------------
++instruct div4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVF src1 src2));
++  format %{ "vfdiv.s    $dst, $src1, $src2\t# @div4F" %}
++  ins_encode %{
++    __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (DivVD src1 src2));
++  format %{ "vfdiv.d    $dst, $src1, $src2\t# @div2D" %}
++  ins_encode %{
++    __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (DivVF src1 src2));
++  format %{ "xvfdiv.s    $dst, $src1, $src2\t# @div8F" %}
++  ins_encode %{
++    __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVD src1 src2));
++  format %{ "xvfdiv.d    $dst, $src1, $src2\t# @div4D" %}
++  ins_encode %{
++    __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS --------------------------------------
++
++instruct abs16B(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs16B" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8S(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs8S" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4I(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs4I" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2L(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs2L" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVF src));
++  format %{ "vbitclri.w    $dst, $src\t# @abs4F" %}
++  ins_encode %{
++    __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVD src));
++  format %{ "vbitclri.d    $dst, $src\t# @abs2D" %}
++  ins_encode %{
++    __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs32B(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs32B" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs16S(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs16S" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8I(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs8I" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4L(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs4L" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVF src));
++  format %{ "xvbitclri.w    $dst, $src\t# @abs8F" %}
++  ins_encode %{
++    __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVD src));
++  format %{ "xvbitclri.d    $dst, $src\t# @abs4D" %}
++  ins_encode %{
++    __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS DIFF ---------------------------------
++
++instruct absd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "vabsd.w    $dst, $src1, $src2\t# @absd4I" %}
++  ins_encode %{
++    __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "vabsd.d    $dst, $src1, $src2\t# @absd2L" %}
++  ins_encode %{
++    __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "xvabsd.w    $dst, $src1, $src2\t# @absd8I" %}
++  ins_encode %{
++    __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "xvabsd.d    $dst, $src1, $src2\t# @absd4L" %}
++  ins_encode %{
++    __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MAX --------------------------------------
++
++instruct max16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.b    $dst, $src1, $src2\t# @max16B" %}
++  ins_encode %{
++    __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.h    $dst, $src1, $src2\t# @max8S" %}
++  ins_encode %{
++    __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.w    $dst, $src1, $src2\t# @max4I" %}
++  ins_encode %{
++    __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.d    $dst, $src1, $src2\t# @max2L" %}
++  ins_encode %{
++    __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %}
++  ins_encode %{
++    __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %}
++  ins_encode %{
++    __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.b    $dst, $src1, $src2\t# @max32B" %}
++  ins_encode %{
++    __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.h    $dst, $src1, $src2\t# @max16S" %}
++  ins_encode %{
++    __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.w    $dst, $src1, $src2\t# @max8I" %}
++  ins_encode %{
++    __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.d    $dst, $src1, $src2\t# @max4L" %}
++  ins_encode %{
++    __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %}
++  ins_encode %{
++    __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %}
++  ins_encode %{
++    __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MIN --------------------------------------
++
++instruct min16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.b    $dst, $src1, $src2\t# @min16B" %}
++  ins_encode %{
++    __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.h    $dst, $src1, $src2\t# @min8S" %}
++  ins_encode %{
++    __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.w    $dst, $src1, $src2\t# @min4I" %}
++  ins_encode %{
++    __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.d    $dst, $src1, $src2\t# @min2L" %}
++  ins_encode %{
++    __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %}
++  ins_encode %{
++    __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %}
++  ins_encode %{
++    __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.b    $dst, $src1, $src2\t# @min32B" %}
++  ins_encode %{
++    __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.h    $dst, $src1, $src2\t# @min16S" %}
++  ins_encode %{
++    __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.w    $dst, $src1, $src2\t# @min8I" %}
++  ins_encode %{
++    __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.d    $dst, $src1, $src2\t# @min4L" %}
++  ins_encode %{
++    __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %}
++  ins_encode %{
++    __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %}
++  ins_encode %{
++    __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NEG --------------------------------------
++
++instruct neg4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (NegVF src));
++  format %{ "vbitrevi.w    $dst, $src\t# @neg4F" %}
++  ins_encode %{
++    __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (NegVD src));
++  format %{ "vbitrevi.d    $dst, $src\t# @neg2D" %}
++  ins_encode %{
++    __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (NegVF src));
++  format %{ "xvbitrevi.w    $dst, $src\t# @neg8F" %}
++  ins_encode %{
++    __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (NegVD src));
++  format %{ "xvbitrevi.d    $dst, $src\t# @neg4D" %}
++  ins_encode %{
++    __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SQRT -------------------------------------
++
++instruct sqrt4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SqrtVF src));
++  format %{ "vfsqrt.s    $dst, $src\t# @sqrt4F" %}
++  ins_encode %{
++    __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SqrtVD src));
++  format %{ "vfsqrt.d    $dst, $src\t# @sqrt2D" %}
++  ins_encode %{
++    __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SqrtVF src));
++  format %{ "xvfsqrt.s    $dst, $src\t# @sqrt8F" %}
++  ins_encode %{
++    __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SqrtVD src));
++  format %{ "xvfsqrt.d    $dst, $src\t# @sqrt4D" %}
++  ins_encode %{
++    __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MADD -------------------------------------
++
++instruct madd16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "vmadd.b    $dst, $src1, $src2\t# @madd16B" %}
++  ins_encode %{
++    __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "vmadd.h    $dst, $src1, $src2\t# @madd8S" %}
++  ins_encode %{
++    __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "vmadd    $dst, $src1, $src2\t# @madd4I" %}
++  ins_encode %{
++    __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "vmadd.d    $dst, $src1, $src2\t# @madd2L" %}
++  ins_encode %{
++    __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "vfmadd.s    $dst, $src1, $src2, $src3\t# @madd4F" %}
++  ins_encode %{
++    __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "vfmadd.d    $dst, $src1, $src2, $src3\t# @madd2D" %}
++  ins_encode %{
++    __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "xvmadd.b    $dst, $src1, $src2\t# @madd32B" %}
++  ins_encode %{
++    __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "xvmadd.h    $dst, $src1, $src2\t# @madd16S" %}
++  ins_encode %{
++    __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "xvmadd.w    $dst, $src1, $src2\t# @madd8I" %}
++  ins_encode %{
++    __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "xvmadd.d    $dst, $src1, $src2\t# @madd4L" %}
++  ins_encode %{
++    __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "xvfmadd.s    $dst, $src1, $src2, $src3\t# @madd8F" %}
++  ins_encode %{
++    __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "xvfmadd.d    $dst, $src1, $src2, $src3\t# @madd4D" %}
++  ins_encode %{
++    __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MSUB -------------------------------------
++
++instruct msub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "vmsub.b    $dst, $src1, $src2\t# @msub16B" %}
++  ins_encode %{
++    __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "vmsub.h    $dst, $src1, $src2\t# @msub8S" %}
++  ins_encode %{
++    __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "vmsub.w    $dst, $src1, $src2\t# @msub4I" %}
++  ins_encode %{
++    __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "vmsub.d    $dst, $src1, $src2\t# @msub2L" %}
++  ins_encode %{
++    __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "vfmsub.s    $dst, $src1, $src2, $src3\t# @msub4F" %}
++  ins_encode %{
++    __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "vfmsub.d    $dst, $src1, $src2, $src3\t# @msub2D" %}
++  ins_encode %{
++    __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "xvmsub.b    $dst, $src1, $src2\t# @msub32B" %}
++  ins_encode %{
++    __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "xvmsub.h    $dst, $src1, $src2\t# @msub16S" %}
++  ins_encode %{
++    __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "xvmsub.w    $dst, $src1, $src2\t# @msub8I" %}
++  ins_encode %{
++    __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "xvmsub.d    $dst, $src1, $src2\t# @msub4L" %}
++  ins_encode %{
++    __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "xvfmsub.s    $dst, $src1, $src2, $src3\t# @msub8F" %}
++  ins_encode %{
++    __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "xvfmsub.d    $dst, $src1, $src2, $src3\t# @msub4D" %}
++  ins_encode %{
++    __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMADD -----------------------------------
++
++// -src1 * src2 - src3
++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "vfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd4F" %}
++  ins_encode %{
++    __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "vfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd2D" %}
++  ins_encode %{
++    __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "xvfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd8F" %}
++  ins_encode %{
++    __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "xvfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd4D" %}
++  ins_encode %{
++    __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMSUB -----------------------------------
++
++// -src1 * src2 + src3
++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "vfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub4F" %}
++  ins_encode %{
++    __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "vfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub2D" %}
++  ins_encode %{
++    __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "xvfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub8F" %}
++  ins_encode %{
++    __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "xvfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub4D" %}
++  ins_encode %{
++    __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ Shift ---------------------------------------
++
++instruct shiftcntX(vecX dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vreplgr2vr.b    $dst, $cnt\t# @shiftcntX" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct shiftcntY(vecY dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "xvreplgr2vr.b    $dst, $cnt\t# @shiftcntY" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ LeftShift -----------------------------------
++
++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll16B" %}
++  ins_encode %{
++    __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  format %{ "vslli.b    $dst, $src, $shift\t# @sll16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll8S" %}
++  ins_encode %{
++    __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  format %{ "vslli.h    $dst, $src, $shift\t# @sll8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vsll.w    $dst, $src, $shift\t# @sll4I" %}
++  ins_encode %{
++    __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vslli.w    $dst, $src, $shift\t# @sll4I_imm" %}
++  ins_encode %{
++    __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vsll.d    $dst, $src, $shift\t# @sll2L" %}
++  ins_encode %{
++    __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vslli.d    $dst, $src, $shift\t# @sll2L_imm" %}
++  ins_encode %{
++    __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll32B" %}
++  ins_encode %{
++    __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  format %{ "xvslli.b    $dst, $src, $shift\t# @sll32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll16S" %}
++  ins_encode %{
++    __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  format %{ "xvslli.h    $dst, $src, $shift\t# @sll16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvsll.w    $dst, $src, $shift\t# @sll8I" %}
++  ins_encode %{
++    __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvslli.w    $dst, $src, $shift\t# @sll8I_imm" %}
++  ins_encode %{
++    __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvsll.d    $dst, $src, $shift\t# @sll4L" %}
++  ins_encode %{
++    __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvslli.d    $dst, $src, $shift\t# @sll4L_imm" %}
++  ins_encode %{
++    __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------- LogicalRightShift ----------------------------------
++
++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16B" %}
++  ins_encode %{
++    __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  format %{ "vsrli.b    $dst, $src, $shift\t# @srl16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl8S" %}
++  ins_encode %{
++    __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  format %{ "vsrli.h    $dst, $src, $shift\t# @srl8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrl.w    $dst, $src, $shift\t# @srl4I" %}
++  ins_encode %{
++    __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrli.w    $dst, $src, $shift\t# @srl4I_imm" %}
++  ins_encode %{
++    __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrl.d    $dst, $src, $shift\t# @srl2L" %}
++  ins_encode %{
++    __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrli.d    $dst, $src, $shift\t# @srl2L_imm" %}
++  ins_encode %{
++    __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl32B" %}
++  ins_encode %{
++    __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  format %{ "xvsrli.b    $dst, $src, $shift\t# @srl32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16S" %}
++  ins_encode %{
++    __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  format %{ "xvsrli.h    $dst, $src, $shift\t# @srl16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrl.w    $dst, $src, $shift\t# @srl8I" %}
++  ins_encode %{
++    __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrli.w    $dst, $src, $shift\t# @srl8I_imm" %}
++  ins_encode %{
++    __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrl.d    $dst, $src, $shift\t# @srl4L" %}
++  ins_encode %{
++    __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrli.d    $dst, $src, $shift\t# @srl4L_imm" %}
++  ins_encode %{
++    __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------- ArithmeticRightShift -----------------------------
++
++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra16B" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  format %{ "vsrai.b    $dst, $src, $shift\t# @sra16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra8S" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  format %{ "vsrai.h    $dst, $src, $shift\t# @sra8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsra.w    $dst, $src, $shift\t# @sra4I" %}
++  ins_encode %{
++    __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsrai.w    $dst, $src, $shift\t# @sra4I_imm" %}
++  ins_encode %{
++    __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsra.d    $dst, $src, $shift\t# @sra2L" %}
++  ins_encode %{
++    __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsrai.d    $dst, $src, $shift\t# @sra2L_imm" %}
++  ins_encode %{
++    __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra32B" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  format %{ "xvsrai.b    $dst, $src, $shift\t# @sra32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra16S" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  format %{ "xvsrai.h    $dst, $src, $shift\t# @sra16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsra.w    $dst, $src, $shift\t# @sra8I" %}
++  ins_encode %{
++    __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsrai.w    $dst, $src, $shift\t# @sra8I_imm" %}
++  ins_encode %{
++    __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsra.d    $dst, $src, $shift\t# @sra4L" %}
++  ins_encode %{
++    __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsrai.d    $dst, $src, $shift\t# @sra4L_imm" %}
++  ins_encode %{
++    __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- AND --------------------------------------
++
++instruct andV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src1 src2));
++  format %{ "vand.v    $dst, $src1, $src2\t# @andV16" %}
++  ins_encode %{
++    __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "vandi.b    $dst, $src, $imm\t# @and16B_imm" %}
++  ins_encode %{
++    __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src1 src2));
++  format %{ "xvand.v    $dst, $src1, $src2\t# @andV32" %}
++  ins_encode %{
++    __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "xvandi.b    $dst, $src, $imm\t# @and32B_imm" %}
++  ins_encode %{
++    __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- OR ---------------------------------------
++
++instruct orV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 src2));
++  format %{ "vor.v    $dst, $src1, $src2\t# @orV16" %}
++  ins_encode %{
++    __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "vori.b    $dst, $src, $imm\t# @or16B_imm" %}
++  ins_encode %{
++    __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct orV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 src2));
++  format %{ "xvor.v    $dst, $src1, $src2\t# @orV32" %}
++  ins_encode %{
++    __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "xvori.b    $dst, $src, $imm\t# @or32B_imm" %}
++  ins_encode %{
++    __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- XOR --------------------------------------
++
++instruct xorV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV src1 src2));
++  format %{ "vxor.v    $dst, $src1, $src2\t# @xorV16" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "vxori.b    $dst, $src, $imm\t# @xor16B_imm" %}
++  ins_encode %{
++    __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xorV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV src1 src2));
++  format %{ "xvxor.v    $dst, $src1, $src2\t# @xorV32" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "xvxori.b    $dst, $src, $imm\t# @xor32B_imm" %}
++  ins_encode %{
++    __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NOR --------------------------------------
++
++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "vnor.v    $dst, $src1, $src2\t# @norV16" %}
++  ins_encode %{
++    __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "vnori.b    $dst, $src, $imm\t# @nor16B_imm" %}
++  ins_encode %{
++    __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "xvnor.v    $dst, $src1, $src2\t# @norV32" %}
++  ins_encode %{
++    __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "xvnori.b    $dst, $src, $imm\t# @nor32B_imm" %}
++  ins_encode %{
++    __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ANDN -------------------------------------
++
++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "vandn.v    $dst, $src1, $src2\t# @andnV16" %}
++  ins_encode %{
++    __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "xvandn.v    $dst, $src1, $src2\t# @andnV32" %}
++  ins_encode %{
++    __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ORN --------------------------------------
++
++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "vorn.v    $dst, $src1, $src2\t# @ornV16" %}
++  ins_encode %{
++    __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "xvorn.v    $dst, $src1, $src2\t# @ornV32" %}
++  ins_encode %{
++    __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- Reduction Add --------------------------------
++
++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Mul --------------------------------
++
++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Max --------------------------------
++
++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Min --------------------------------
++
++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ------------------------------ RoundDoubleModeV ----------------------------
++
++instruct round2D(vecX dst, vecX src, immI rmode) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "vfrint    $dst, $src, $rmode\t# @round2D" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    switch ($rmode$$constant) {
++      case 0: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case 1: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case 2: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct round4D(vecY dst, vecY src, immI rmode) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "xvfrint    $dst, $src, $rmode\t# @round4D" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    switch ($rmode$$constant) {
++      case 0: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case 1: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case 2: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- PopCount --------------------------------------
++
++instruct popcount4I(vecX dst, vecX src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
++  match(Set dst (PopCountVI src));
++  format %{ "vpcnt.w    $dst, $src\t# @popcount4I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct popcount8I(vecY dst, vecY src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 8);
++  match(Set dst (PopCountVI src));
++  format %{ "xvpcnt.w    $dst, $src\t# @popcount8I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..9720fd176d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+@@ -0,0 +1,4567 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ st_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fst_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ ld_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fld_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*)branch;
++  jint *pc = (jint *)branch;
++
++  if (high(stub_inst, 7) == pcaddu18i_op) {
++    // far:
++    //   pcaddu18i reg, si20
++    //   jirl  r0, reg, si18
++
++    assert(high(pc[1], 6) == jirl_op, "Not a branch label patch");
++    jlong offs = target - branch;
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    if (reachable_from_branch_short(offs)) {
++      // convert far to short
++#define __ masm.
++      __ b(target);
++      __ nop();
++#undef __
++    } else {
++      masm.patchable_jump_far(R0, offs);
++    }
++    return;
++  } else if (high(stub_inst, 7) == pcaddi_op) {
++    // see MacroAssembler::set_last_Java_frame:
++    //   pcaddi reg, si20
++
++    jint offs = (target - branch) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    CodeBuffer cb(branch, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
++    return;
++  } else if (high(stub_inst, 7) == pcaddu12i_op) {
++    // pc-relative
++    jlong offs = target - branch;
++    guarantee(is_simm(offs, 32), "Not signed 32-bit offset");
++    jint si12, si20;
++    jint& stub_instNext = *(jint*)(branch+4);
++    split_simm32(offs, si12, si20);
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20);
++    masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12);
++    return;
++  } else if (high(stub_inst, 7) == lu12i_w_op) {
++    // long call (absolute)
++    CodeBuffer cb(branch, 3 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.call_long(target);
++    return;
++  }
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++bool MacroAssembler::reachable_from_branch_short(jlong offs) {
++  if (ForceUnreachable) {
++    return false;
++  }
++  return is_simm(offs >> 2, 26);
++}
++
++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) {
++  jint si18, si20;
++  guarantee(is_simm(offs, 38), "Not signed 38-bit offset");
++  split_simm38(offs, si18, si20);
++  pcaddu18i(T4, si20);
++  jirl(ra, T4, si18);
++}
++
++void MacroAssembler::patchable_jump(address target, bool force_patchable) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(target) != NULL,
++         "destination of jump not found in code cache");
++  if (force_patchable || patchable_branches()) {
++    jlong offs = target - pc();
++    if (reachable_from_branch_short(offs)) { // Short jump
++      b(offset26(target));
++      nop();
++    } else {                                 // Far jump
++      patchable_jump_far(R0, offs);
++    }
++  } else {                                   // Real short jump
++    b(offset26(target));
++  }
++}
++
++void MacroAssembler::patchable_call(address target, address call_site) {
++  jlong offs = target - (call_site ? call_site : pc());
++  if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call
++    nop();
++    bl((offs - BytesPerInstWord) >> 2);
++  } else {                                                    // Far call
++    patchable_jump_far(RA, offs);
++  }
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
++      }
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    bl(entry.target());
++  } else {
++    bl(pc());
++  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Start the stub
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  const int stub_start_offset = offset();
++
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  pcaddi(T4, 0);
++  ld_d(T4, T4, 16);
++  jr(T4);
++  nop();  //align
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  emit_int64((int64_t)dest);
++
++  const address stub_start_addr = addr_at(stub_start_offset);
++
++  NativeInstruction* ni = nativeInstruction_at(stub_start_addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline");
++
++  end_a_stub();
++  return stub_start_addr;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    beq(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    bne(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      blt(rs, rt, offset16(entry));
++    } else {
++      bltu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    blt_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      bge(rs, rt, offset16(entry));
++    } else {
++      bgeu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    bge_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  bne(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  beq(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++  bceqz(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    bge(rs, rt, not_taken);
++  } else {
++    bgeu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    blt(rs, rt, not_taken);
++  } else {
++    bltu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++  bcnez(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    L.add_patch_at(code(), locator());
++    if (ForceUnreachable) {
++      patchable_jump_far(R0, 0);
++    } else {
++      b(0);
++    }
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  ldx_d(rt, base, offset);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  stx_d(rt, base, offset);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  li(tmp_reg1, inc);
++  li(tmp_reg2, counter_addr);
++  amadd_w(R0, tmp_reg1, tmp_reg2);
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  sub_d(AT, SP, AT);
++  blt(AT, R0,  no_reserved_zone_enabling);
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T4;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++  sub_d(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on LA we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  li(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  li(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  li(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++  b(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++
++  b(done);
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  addi_d(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  addi_d(SP, SP, 8);
++  b(E);
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  bind(E);
++}
++
++void MacroAssembler::jmp(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_jump(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    assert(target(L) != NULL, "jmp most probably wrong");
++    patchable_jump(target(L), true /* force patchable */);
++  } else {
++    L.add_patch_at(code(), locator());
++    patchable_jump_far(R0, 0);
++  }
++}
++
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(AT, (long)obj);
++  st_d(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short call (pc-rel)
++    bl(offset26(entry));
++  } else if (is_simm(offs, 38)) {          // Far call (pc-rel)
++    patchable_jump_far(RA, offs);
++  } else {                                 // Long call (absolute)
++    call_long(entry);
++  }
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh){
++  switch (rh.type()) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rh);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call_long(address entry) {
++  jlong value = (jlong)entry;
++  lu12i_w(T4, split_low20(value >> 12));
++  lu32i_d(T4, split_low20(value >> 32));
++  jirl(RA, T4, split_low12(value));
++}
++
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_li52(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  return trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  li(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm(imm, 12)) {
++    addi_d(reg, reg, imm);
++  } else {
++    li(AT, imm);
++    add_d(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++void MacroAssembler::increment(Address addr, int imm) {
++  if (!imm) return;
++  assert(is_simm(imm, 12), "must be");
++  ld_ptr(AT, addr);
++  addi_d(AT, AT, imm);
++  st_ptr(AT, addr);
++}
++
++void MacroAssembler::decrement(Address addr, int imm) {
++  increment(addr, -imm);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  Label before_call;
++  bind(before_call);
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    li(AT, target(before_call));
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  li(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    ld_w(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
++  if (Assembler::is_simm(-framesize, 12)) {
++    addi_d(SP, SP, -framesize);
++    st_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    st_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    if (PreserveFramePointer)
++      addi_d(FP, SP, framesize - 2 * wordSize);
++  } else {
++    addi_d(SP, SP, -2 * wordSize);
++    st_ptr(FP, Address(SP, 0 * wordSize));
++    st_ptr(RA, Address(SP, 1 * wordSize));
++    if (PreserveFramePointer)
++      move(FP, SP);
++    li(SCR1, framesize - 2 * wordSize);
++    sub_d(SP, SP, SCR1);
++  }
++}
++
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  if (Assembler::is_simm(framesize, 12)) {
++    ld_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    ld_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    addi_d(SP, SP, framesize);
++  } else {
++    li(SCR1, framesize - 2 * wordSize);
++    add_d(SP, SP, SCR1);
++    ld_ptr(FP, Address(SP, 0 * wordSize));
++    ld_ptr(RA, Address(SP, 1 * wordSize));
++    addi_d(SP, SP, 2 * wordSize);
++  }
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T4;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  add_d(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  li(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  // TODO: confirm reloc
++  call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type);
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  st_d(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    st_d(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_d(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  assert_different_registers(AT, tmp);
++  juint sps = os::get_serialize_page_shift_count();
++  juint lsb = sps + 2;
++  juint msb = sps + log2_uint(os::vm_page_size()) - 1;
++  bstrpick_w(AT, thread, msb, lsb);
++  li(tmp, os::get_memory_serialize_page());
++  alsl_d(tmp, AT, tmp, Address::times_2 - 1);
++  st_w(R0, tmp, 0);
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++  } else {
++    li(AT, SafepointSynchronize::address_of_state());
++    ld_w(AT, AT, 0);
++    addi_d(AT, AT, -SafepointSynchronize::_not_synchronized);
++    bne(AT, R0, slow_path);
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++  } else {
++    safepoint_poll(slow_path, thread_reg);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc
++  lipc(AT, last_java_pc);
++  st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
++                                   JavaFrameAnchor::last_Java_pc_offset()));
++
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc);
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Register t2,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    add_d(AT, AT, var_size_in_bytes);
++  } else {
++    addi_d(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, jlong value) {
++  jlong hi12 = bitfield(value, 52, 12);
++  jlong lo52 = bitfield(value,  0, 52);
++
++  if (hi12 != 0 && lo52 == 0) {
++    lu52i_d(rd, R0, hi12);
++  } else {
++    jlong hi20 = bitfield(value, 32, 20);
++    jlong lo20 = bitfield(value, 12, 20);
++    jlong lo12 = bitfield(value,  0, 12);
++
++    if (lo20 == 0) {
++      ori(rd, R0, lo12);
++    } else if (bitfield(simm12(lo12), 12, 20) == lo20) {
++      addi_w(rd, R0, simm12(lo12));
++    } else {
++      lu12i_w(rd, lo20);
++      if (lo12 != 0)
++        ori(rd, rd, lo12);
++    }
++    if (hi20 != bitfield(simm20(lo20), 20, 20))
++      lu32i_d(rd, hi20);
++    if (hi12 != bitfield(simm20(hi20), 20, 12))
++      lu52i_d(rd, rd, hi12);
++  }
++}
++
++void MacroAssembler::patchable_li52(Register rd, jlong value) {
++  int count = 0;
++
++  if (value <= max_jint && value >= min_jint) {
++    if (is_simm(value, 12)) {
++      addi_d(rd, R0, value);
++      count++;
++    } else {
++      lu12i_w(rd, split_low20(value >> 12));
++      count++;
++      if (split_low12(value)) {
++        ori(rd, rd, split_low12(value));
++        count++;
++      }
++    }
++  } else if (is_simm(value, 52)) {
++    lu12i_w(rd, split_low20(value >> 12));
++    count++;
++    if (split_low12(value)) {
++      ori(rd, rd, split_low12(value));
++      count++;
++    }
++    lu32i_d(rd, split_low20(value >> 32));
++    count++;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::lipc(Register rd, Label& L) {
++  if (L.is_bound()) {
++    jint offs = (target(L) - pc()) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    pcaddi(rd, offs);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    pcaddi(rd, 0);
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, narrowKlass);
++}
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  move(A1, reg);
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) return;
++
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 6 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++
++  // call indirectly to solve generation ordering problem
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++// used registers :  SCR1, SCR2
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++  Label exit, error;
++  // increment counter
++  li(SCR2, (long)StubRoutines::verify_oop_count_addr());
++  ld_w(SCR1, SCR2, 0);
++  addi_d(SCR1, SCR1, 1);
++  st_w(SCR1, SCR2, 0);
++
++  // make sure object is 'reasonable'
++  beqz(A1, exit);         // if obj is NULL it is ok
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(SCR1, oop_mask);
++  andr(SCR2, A1, SCR1);
++  li(SCR1, oop_bits);
++  bne(SCR2, SCR1, error);
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  load_klass(SCR2, A1);
++  beqz(SCR2, error);                        // if klass is NULL it is broken
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  popad();
++  jr(RA);
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    bgeu(t2, AT, next);
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    bgeu(AT, t2, ok);
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  srai_w(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  srli_d(AT, reg, 8);
++  slli_d(reg, reg, 24);
++  srli_d(reg, reg, 16);
++  orr(reg, reg, AT);
++  bstrpick_d(reg, reg, 15, 0);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srli_w(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  slli_w(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_d(resflag, addr);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_d(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_d(tmp, addr);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_d(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_w(resflag, addr);
++  if (!sign)
++    lu32i_d(resflag, 0);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_w(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_w(tmp, addr);
++  if (!sign)
++    lu32i_d(tmp, 0);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_w(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++#ifdef COMPILER2
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      st_d(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++
++      // Recursive locking
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        li(resReg, 1);
++        b(DONE);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bnez(tmpReg, L);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++
++      sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++      b(DONE);
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      st_d(AT, Address(boxReg, 0));
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      move(scrReg, R0);
++      bnez(AT, DONE_SET);
++
++#ifndef OPT_THREAD
++      get_thread(TREG) ;
++#endif
++      // It's inflated and appears unlocked
++      addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2);
++      cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false);
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld_d(tmpReg, Address(boxReg, 0)) ;
++      assert_different_registers(AT, tmpReg);
++      li(AT, 0x1);
++      beq(tmpReg, R0, DONE_LABEL) ;
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      sltui(AT, tmpReg, 1);
++      beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock
++
++      ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beqz(AT, Stacked); // Inflated?
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      xorr(scrReg, scrReg, TREG);
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      membar(Assembler::Membar_mask_bits(LoadStore|StoreStore));
++      st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      li(resReg, 1);
++      b(DONE);
++
++      bind(Stacked);
++      ld_d(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[]           = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++Register caller_saved_registers_except_v0[] = {T7, T5, T6,     A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++
++  //TODO: LA
++//In LA, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  addi_d(SP, SP, -16);
++  st_d(reg1, SP, 8);
++  st_d(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld_d(reg1, SP, 8);
++  ld_d(reg2, SP, 0);
++  addi_d(SP, SP, 16);
++}
++
++void MacroAssembler::push(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  addi_d(SP, SP, -align_up(count, 2) * wordSize);
++  for (int i = 0; i < count; i ++)
++    st_d(as_Register(regs[i]), SP, i * wordSize);
++}
++
++void MacroAssembler::pop(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  for (int i = 0; i < count; i ++)
++    ld_d(as_Register(regs[i]), SP, i * wordSize);
++  addi_d(SP, SP, align_up(count, 2) * wordSize);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld_d(dst, src, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    st_w(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    st_d(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld_d(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    st_w(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  sub_d(AT, r, S5_heapbase);
++  maskeqz(r, AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(AT, src, S5_heapbase);
++  maskeqz(dst, AT, src);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    sub_d(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(dst, src, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  move(AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++      add_d(r, r, S5_heapbase);
++    }
++  } else {
++    add_d(r, r, S5_heapbase);
++  }
++  maskeqz(r, r, AT);
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  Register cond;
++  if (dst == src) {
++    cond = AT;
++    move(cond, src);
++  } else {
++    cond = src;
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++      add_d(dst, dst, S5_heapbase);
++    }
++  } else {
++    add_d(dst, src, S5_heapbase);
++  }
++  maskeqz(dst, dst, cond);
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        shl(r, LogMinObjAlignmentInBytes);
++        add_d(r, r, S5_heapbase);
++      }
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        slli_d(dst, src, LogMinObjAlignmentInBytes);
++        add_d(dst, dst, S5_heapbase);
++      }
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0
++        && Universe::narrow_klass_shift() == 0) {
++      bstrpick_d(r, r, 31, 0);
++      return;
++    }
++    assert(r != AT, "Encoding a klass in AT");
++    li(AT, (int64_t)Universe::narrow_klass_base());
++    sub_d(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0
++          && Universe::narrow_klass_shift() == 0) {
++        bstrpick_d(dst, src, 31, 0);
++        return;
++      }
++      li(dst, (int64_t)Universe::narrow_klass_base());
++      sub_d(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        srli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register r) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_base() != NULL) {
++    if (Universe::narrow_klass_shift() == 0) {
++      if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) {
++        lu32i_d(r, (uint64_t)Universe::narrow_klass_base() >> 32);
++      } else {
++        li(AT, (int64_t)Universe::narrow_klass_base());
++        add_d(r, r, AT);
++      }
++    } else {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      li(AT, (int64_t)Universe::narrow_klass_base());
++      alsl_d(r, r, AT, Address::times_8 - 1);
++    }
++  } else {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      shl(r, LogKlassAlignmentInBytes);
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    if (Universe::narrow_klass_base() != NULL) {
++      if (Universe::narrow_klass_shift() == 0) {
++        if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) {
++          move(dst, src);
++          lu32i_d(dst, (uint64_t)Universe::narrow_klass_base() >> 32);
++        } else {
++          li(dst, (int64_t)Universe::narrow_klass_base());
++          add_d(dst, dst, src);
++        }
++      } else {
++        assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++        li(dst, (int64_t)Universe::narrow_klass_base());
++        alsl_d(dst, src, dst, Address::times_8 - 1);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        slli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld_d(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  // Check the supertype display:
++  if (must_load_sco) {
++    ld_wu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  slli_d(AT, super_check_offset.register_or_noreg(), Address::times_1);
++  add_d(AT, sub_klass, AT);
++  ld_d(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    addi_d(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++    } else {
++      bne_far(AT, R0, *L_failure);
++      b(*L_slow_path);
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      b(*L_success);
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      b(*L_success);
++    }
++  }
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld_d(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  ld_w(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  ld_d(AT, temp_reg, 0);
++  addi_d(temp_reg, temp_reg, 1 * wordSize);
++  beq(AT, super_klass, subtype);
++  addi_d(temp2_reg, temp2_reg, -1);
++  b(Loop);
++
++  bind(subtype);
++  st_d(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  alsl_d(scale_reg, scale_reg, SP, scale_factor - 1);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld_d(dst, src); break;
++  case  4:  ld_w(dst, src); break;
++  case  2:  is_signed ? ld_h(dst, src) : ld_hu(dst, src); break;
++  case  1:  is_signed ? ld_b( dst, src) : ld_bu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  st_d(src, dst); break;
++  case  4:  st_w(src, dst); break;
++  case  2:  st_h(src, dst); break;
++  case  1:  st_b(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1);
++  addi_d(scan_temp, scan_temp, vtable_base);
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    if (itable_index.is_constant()) {
++      li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off);
++      add_d(recv_klass, recv_klass, AT);
++    } else {
++      assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++      alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1);
++      addi_d(recv_klass, AT, itentry_off);
++    }
++  }
++
++  Label search, found_method;
++
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++
++  bind(search);
++  // Check that the previous entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface);
++  addi_d(scan_temp, scan_temp, scan_step);
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
++
++  bind(found_method);
++  if (return_method) {
++    // Got a hit.
++    ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    ldx_d(method_result, recv_klass, scan_temp);
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++
++  if (vtable_index.is_constant()) {
++    li(AT, vtable_index.as_constant());
++    alsl_d(AT, AT, recv_klass, Address::times_ptr - 1);
++  } else {
++    alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1);
++  }
++
++  ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes());
++}
++
++#ifdef COMPILER2
++// Compare strings, used for char[] and byte[].
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srli_w(cnt1, cnt1, 1);
++  if (!str2_isL) srli_w(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  bge(cnt2, cnt1, Loop);
++  move(cnt1, cnt2);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  if (str1_isL) {
++    ld_bu(AT, str1, 0);
++  } else {
++    ld_hu(AT, str1, 0);
++  }
++  beq(cnt1, R0, done);
++
++  // compare current character
++  if (str2_isL) {
++    ld_bu(cnt2, str2, 0);
++  } else {
++    ld_hu(cnt2, str2, 0);
++  }
++  addi_d(str1, str1, str1_isL ? 1 : 2);
++  bne(AT, cnt2, haveResult);
++  addi_d(str2, str2, str2_isL ? 1 : 2);
++  addi_d(cnt1, cnt1, -1);
++  b(Loop);
++
++  bind(haveResult);
++  sub_d(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp1, Register tmp2, Register result,
++                                   bool is_char) {
++  Label Loop, LoopEnd, True, False;
++
++  addi_d(result, R0, 1);
++  beq(str1, str2, True);  // same char[] ?
++  beqz(cnt, True);
++
++  addi_d(AT, R0, is_char ? wordSize/2 : wordSize);
++  bind(Loop);
++  blt(cnt, AT, LoopEnd);
++  ld_d(tmp1, str1, 0);
++  ld_d(tmp2, str2, 0);
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, 8);
++  addi_d(str2, str2, 8);
++  addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize);
++  b(Loop);
++
++  bind(LoopEnd);
++  beqz(cnt, True);
++  // compare current character
++  if (is_char) {
++    ld_hu(tmp1, str1, 0);
++    ld_hu(tmp2, str2, 0);
++  } else {
++    ld_bu(tmp1, str1, 0);
++    ld_bu(tmp2, str2, 0);
++  }
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, is_char ? 2 : 1);
++  addi_d(str2, str2, is_char ? 2 : 1);
++  addi_d(cnt, cnt, -1);
++  b(LoopEnd);
++
++  bind(False);
++  addi_d(result, R0, 0);
++
++  bind(True);
++}
++#endif // COMPILER2
++
++void MacroAssembler::load_byte_map_base(Register reg) {
++  jbyte *byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++
++  // Strictly speaking the byte_map_base isn't an address at all, and it might
++  // even be negative. It is thus materialised as a constant.
++  li(reg, (uint64_t)byte_map_base);
++}
++
++// This method checks if provided byte array contains byte with highest bit set.
++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
++    Label Loop, End, Nega, Done;
++
++    orr(result, R0, R0);
++    bge(R0, len, Done);
++
++    li(AT, 0x8080808080808080);
++
++    addi_d(len, len, -8);
++    blt(len, R0, End);
++
++  bind(Loop);
++    ld_d(result, ary1, 0);
++    andr(result, result, AT);
++    bnez(result, Nega);
++    beqz(len, Done);
++    addi_d(len, len, -8);
++    addi_d(ary1, ary1, 8);
++    bge(len, R0, Loop);
++
++  bind(End);
++    ld_d(result, ary1, 0);
++    slli_d(len, len, 3);
++    sub_d(len, R0, len);
++    sll_d(result, result, len);
++    andr(result, result, AT);
++    beqz(result, Done);
++
++  bind(Nega);
++    ori(result, R0, 1);
++
++  bind(Done);
++}
++
++// Compress char[] to byte[]. len must be positive int.
++// jtreg: TestStringIntrinsicRangeChecks.java
++void MacroAssembler::char_array_compress(Register src, Register dst,
++                                         Register len, Register result,
++                                         Register tmp1, Register tmp2,
++                                         Register tmp3) {
++  Label Loop, Done, Once, Fail;
++
++  move(result, len);
++  bge(R0, result, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  li(tmp3, 0xff00ff00ff00ff00);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_d(tmp1, src, 0);
++    andr(tmp2, tmp3, tmp1);          // not latin-1, stop here
++    bnez(tmp2, Fail);
++
++    // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4
++    srli_d(tmp2, tmp1, 8);
++    orr(tmp2, tmp2, tmp1);           // 0x00a1a1b2b2c3c3d4
++    bstrpick_d(tmp1, tmp2, 47, 32);  // 0x0000a1b2
++    slli_d(tmp1, tmp1, 16);          // 0xa1b20000
++    bstrins_d(tmp1, tmp2, 15, 0);    // 0xa1b2c3d4
++
++    st_w(tmp1, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 4);
++    addi_d(src, src, 8);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_d(AT, src, 0);
++
++    bstrpick_d(tmp1, AT, 15, 0);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 31, 16);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 1);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 47, 32);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 2);
++    b(Done);
++
++  bind(Fail);
++    move(result, R0);
++
++  bind(Done);
++}
++
++// Inflate byte[] to char[]. len must be positive int.
++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java
++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
++                                        Register tmp1, Register tmp2) {
++  Label Loop, Once, Done;
++
++  bge(R0, len, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_wu(tmp1, src, 0);
++
++    // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 23, 16);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 39, 32);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 55, 48);
++
++    st_d(tmp2, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 8);
++    addi_d(src, src, 4);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_wu(tmp1, src, 0);
++
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    st_h(tmp2, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 15, 8);
++    st_h(tmp2, dst, 2);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 23, 16);
++    st_h(tmp2, dst, 4);
++
++  bind(Done);
++}
++
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3)
++{
++  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE;
++
++  beqz(cnt1, NOMATCH);
++
++  move(result, R0);
++  ori(tmp1, R0, 4);
++  blt(cnt1, tmp1, DO1_LOOP);
++
++  // UTF-16 char occupies 16 bits
++  // ch -> chchchch
++  bstrins_d(ch, ch, 31, 16);
++  bstrins_d(ch, ch, 63, 32);
++
++  li(tmp2, 0x0001000100010001);
++  li(tmp3, 0x7fff7fff7fff7fff);
++
++  bind(CH1_LOOP);
++    ld_d(AT, str1, 0);
++    xorr(AT, ch, AT);
++    sub_d(tmp1, AT, tmp2);
++    orr(AT, AT, tmp3);
++    andn(tmp1, tmp1, AT);
++    bnez(tmp1, HAS_ZERO);
++    addi_d(str1, str1, 8);
++    addi_d(result, result, 4);
++
++    // meet the end of string
++    beq(cnt1, result, NOMATCH);
++
++    addi_d(tmp1, result, 4);
++    bge(tmp1, cnt1, DO1_SHORT);
++    b(CH1_LOOP);
++
++  bind(HAS_ZERO);
++    ctz_d(tmp1, tmp1);
++    srli_d(tmp1, tmp1, 4);
++    add_d(result, result, tmp1);
++    b(DONE);
++
++  // restore ch
++  bind(DO1_SHORT);
++    bstrpick_d(ch, ch, 15, 0);
++
++  bind(DO1_LOOP);
++    ld_hu(tmp1, str1, 0);
++    beq(ch, tmp1, DONE);
++    addi_d(str1, str1, 2);
++    addi_d(result, result, 1);
++    blt(result, cnt1, DO1_LOOP);
++
++  bind(NOMATCH);
++    addi_d(result, R0, -1);
++
++  bind(DONE);
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  li(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::lea(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm(disp, 12)) {
++      addi_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      add_d(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm(disp, 12)) {
++        add_d(AT, base, index);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, base, AT);
++        add_d(dst, AT, index);
++      }
++    } else {
++      if (is_simm(disp, 12)) {
++        alsl_d(AT, index, base, scale - 1);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, AT, base);
++        alsl_d(dst, index, AT, scale - 1);
++      }
++    }
++  }
++}
++
++void MacroAssembler::lea(Register dst, AddressLiteral adr) {
++  code_section()->relocate(pc(), adr.rspec());
++  pcaddi(dst, (adr.target() - pc()) >> 2);
++}
++
++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos) >> 2;
++  switch(high(inst, 6)) {
++  case beq_op:
++  case bne_op:
++  case blt_op:
++  case bge_op:
++  case bltu_op:
++  case bgeu_op:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if(!is_simm16(v))
++    {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003ff;
++    inst |= ((v & 0xffff) << 10);
++    break;
++  case beqz_op:
++  case bnez_op:
++  case bccondz_op:
++    assert(is_simm(v, 21), "must be simm21");
++#ifndef PRODUCT
++    if(!is_simm(v, 21))
++    {
++      tty->print_cr("must be simm21");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003e0;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) );
++    break;
++  case b_op:
++  case bl_op:
++    assert(is_simm(v, 26), "must be simm26");
++#ifndef PRODUCT
++    if(!is_simm(v, 26))
++    {
++      tty->print_cr("must be simm26");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc000000;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) );
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++  return inst;
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src1,
++                              Register  src2,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  movgr2fr_d(tmp1, dst);
++  movgr2fr_d(tmp2, src);
++
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case NE:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GT:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GE:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case LT:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case LE:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++
++  movfr2gr_d(dst, tmp1);
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LT:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LE:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp) {
++  movgr2fr_w(tmp1, R0);
++
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   st_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  st_h (reg, base, disp); break;
++    case STORE_INT:    st_w (reg, base, disp); break;
++    case STORE_LONG:   st_d (reg, base, disp); break;
++    case LOAD_BYTE:    ld_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ld_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ld_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ld_hu(reg, base, disp); break;
++    case LOAD_INT:     ld_w (reg, base, disp); break;
++    case LOAD_U_INT:   ld_wu(reg, base, disp); break;
++    case LOAD_LONG:    ld_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      ll_d(reg, base, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   stx_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  stx_h (reg, base, disp); break;
++    case STORE_INT:    stx_w (reg, base, disp); break;
++    case STORE_LONG:   stx_d (reg, base, disp); break;
++    case LOAD_BYTE:    ldx_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ldx_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ldx_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ldx_hu(reg, base, disp); break;
++    case LOAD_INT:     ldx_w (reg, base, disp); break;
++    case LOAD_U_INT:   ldx_wu(reg, base, disp); break;
++    case LOAD_LONG:    ldx_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      add_d(AT, base, disp);
++      ll_d(reg, AT, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fst_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fst_d(reg, base, disp); break;
++    case STORE_VECTORX:  vst  (reg, base, disp); break;
++    case STORE_VECTORY: xvst  (reg, base, disp); break;
++    case LOAD_FLOAT:     fld_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fld_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vld  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvld  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fstx_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fstx_d(reg, base, disp); break;
++    case STORE_VECTORX:  vstx  (reg, base, disp); break;
++    case STORE_VECTORY: xvstx  (reg, base, disp); break;
++    case LOAD_FLOAT:     fldx_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fldx_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vldx  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvldx  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++#ifdef COMPILER2
++void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_b(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_b(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_SHORT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_h(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_h(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_w(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_w(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break;
++        case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_d(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_d(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: add_w(reg1, reg2, reg3); break;
++        case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: add_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_FLOAT:
++      switch (opcode) {
++        case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break;
++        case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_DOUBLE:
++      switch (opcode) {
++        case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    xvpermi_d(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++    vpermi_w(tmp2, tmp1, 0b00001110);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++  } else if (vector_size == 16) {
++    vpermi_w(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (type != T_LONG) {
++    vshuf4i_w(tmp2, tmp1, 0b00000001);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++    if (type != T_INT) {
++      vshuf4i_h(tmp2, tmp1, 0b00000001);
++      reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      if (type != T_SHORT) {
++        vshuf4i_b(tmp2, tmp1, 0b00000001);
++        reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      }
++    }
++  }
++
++  switch (type) {
++    case T_BYTE:  vpickve2gr_b(dst, tmp1, 0); break;
++    case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break;
++    case T_INT:   vpickve2gr_w(dst, tmp1, 0); break;
++    case T_LONG:  vpickve2gr_d(dst, tmp1, 0); break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (opcode == Op_MaxReductionV) {
++    slt(AT, dst, src);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else if (opcode == Op_MinReductionV) {
++    slt(AT, src, dst);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else {
++    reduce_ins_r(dst, dst, src, type, opcode);
++  }
++  switch (type) {
++    case T_BYTE:  ext_w_b(dst, dst); break;
++    case T_SHORT: ext_w_h(dst, dst); break;
++    default:
++      break;
++  }
++}
++
++void MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_w(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 4);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 5);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 6);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 7);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_d(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else if (vector_size == 16) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000001);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000010);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000011);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00001110);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++#endif // COMPILER2
++
++/**
++ * Emits code to update CRC-32 with a byte value according to constants in table
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]val       Register containing the byte to fold into the CRC.
++ * @param [in]table     Register containing the table of crc constants.
++ *
++ * uint32_t crc;
++ * val = crc_table[(val ^ crc) & 0xFF];
++ * crc = val ^ (crc >> 8);
++**/
++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
++  xorr(val, val, crc);
++  andi(val, val, 0xff);
++  ld_w(val, Address(table, val, Address::times_4, 0));
++  srli_w(crc, crc, 8);
++  xorr(crc, val, crc);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    nor(crc, crc, R0);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++    nor(crc, crc, R0);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crcc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crcc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crcc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++}
++
++#ifdef COMPILER2
++void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) {
++
++    switch(flag) {
++      case 0x01: //equal
++          beq(op1, op2, L);
++        break;
++      case 0x02: //not_equal
++          bne(op1, op2, L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt(op2, op1, L);
++        else
++          bltu(op2, op1, L);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge(op1, op2, L);
++        else
++          bgeu(op1, op2, L);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt(op1, op2, L);
++        else
++          bltu(op1, op2, L);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge(op2, op1, L);
++        else
++          bgeu(op2, op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) {
++    switch(flag) {
++      case 0x01: //equal
++        beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt_long(op2, op1, *L, true /* signed */);
++        else
++          blt_long(op2, op1, *L, false);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge_long(op1, op2, *L, true /* signed */);
++        else
++          bge_long(op1, op2, *L, false);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt_long(op1, op2, *L, true /* signed */);
++        else
++          blt_long(op1, op2, *L, false);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge_long(op2, op1, *L, true /* signed */);
++        else
++          bge_long(op2, op1, *L, false);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) {
++    switch(flag) {
++      case 0x01: //equal
++        beqz(op1, L);
++        break;
++      case 0x02: //not_equal
++        bnez(op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::membar(Membar_mask_bits hint){
++  address prev = pc() - NativeInstruction::sync_instruction_size;
++  address last = code()->last_insn();
++  if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) {
++    code()->set_last_insn(NULL);
++    NativeMembar *membar = (NativeMembar*)prev;
++    // merged membar
++    // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore
++    membar->set_hint(membar->get_hint() & (~hint & 0xF));
++    block_comment("merged membar");
++  } else {
++    code()->set_last_insn(pc());
++    Assembler::membar(hint);
++  }
++}
++
++// Code for BigInteger::mulAdd intrinsic
++// out     = A0
++// in      = A1
++// offset  = A2  (already out.length-offset)
++// len     = A3
++// k       = A4
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k) {
++  Label L_tail_loop, L_unroll, L_end;
++
++  move(SCR2, out);
++  move(out, R0); // should clear out
++  bge(R0, len, L_end);
++
++  alsl_d(offset, offset, SCR2, LogBytesPerInt - 1);
++  alsl_d(in, len, in, LogBytesPerInt - 1);
++
++  const int unroll = 16;
++  li(SCR2, unroll);
++  blt(len, SCR2, L_tail_loop);
++
++  bind(L_unroll);
++
++    addi_d(in, in, -unroll * BytesPerInt);
++    addi_d(offset, offset, -unroll * BytesPerInt);
++
++    for (int i = unroll - 1; i >= 0; i--) {
++      ld_wu(SCR1, in, i * BytesPerInt);
++      mulw_d_wu(SCR1, SCR1, k);
++      add_d(out, out, SCR1); // out as scratch
++      ld_wu(SCR1, offset, i * BytesPerInt);
++      add_d(SCR1, SCR1, out);
++      st_w(SCR1, offset, i * BytesPerInt);
++      srli_d(out, SCR1, 32); // keep carry
++    }
++
++    sub_w(len, len, SCR2);
++    bge(len, SCR2, L_unroll);
++
++  bge(R0, len, L_end); // check tail
++
++  bind(L_tail_loop);
++
++    addi_d(in, in, -BytesPerInt);
++    ld_wu(SCR1, in, 0);
++    mulw_d_wu(SCR1, SCR1, k);
++    add_d(out, out, SCR1); // out as scratch
++
++    addi_d(offset, offset, -BytesPerInt);
++    ld_wu(SCR1, offset, 0);
++    add_d(SCR1, SCR1, out);
++    st_w(SCR1, offset, 0);
++
++    srli_d(out, SCR1, 32); // keep carry
++
++    addi_w(len, len, -1);
++    blt(R0, len, L_tail_loop);
++
++  bind(L_end);
++}
++
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..1f96557543
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+@@ -0,0 +1,825 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  // void incrementl(Register reg, int value = 1);
++  // void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++  // Sign extension
++  void sign_extend_short(Register reg) { ext_w_h(reg, reg); }
++  void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 2048) {
++      st_w(A0, SP, -offset);
++    } else if (offset <= 32768 && !(offset & 3)) {
++      stptr_w(A0, SP, -offset);
++    } else {
++      li(AT, offset);
++      sub_d(AT, SP, AT);
++      st_w(A0, AT, 0); 
++    } 
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed);
++  void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed);
++  void cmp_branchEqNe_off21(int flag, Register op1, Label& L);
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++  void increment(Address addr, int imm = 1);
++  void decrement(Address addr, int imm = 1);
++  void shl(Register reg, int sa)        { slli_d(reg, reg, sa); }
++  void shr(Register reg, int sa)        { srli_d(reg, reg, sa); }
++  void sar(Register reg, int sa)        { srai_d(reg, reg, sa); }
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++  void call_long(address entry);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++
++  static bool far_branches() {
++    if (ForceUnreachable) {
++      return true;
++    } else {
++      return ReservedCodeCacheSize > branch_range;
++    }
++  }
++
++  // Emit the CompiledIC call idiom
++  address ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // patchable
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  void blt_far    (Register rs, Register rt, address entry, bool is_signed);
++  void blt_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  void bge_far    (Register rs, Register rt, address entry, bool is_signed);
++  void bge_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void blt_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bge_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  static bool patchable_branches() {
++    const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++    return ReservedCodeCacheSize > branch_range;
++  }
++
++  static bool reachable_from_branch_short(jlong offs);
++
++  void patchable_jump_far(Register ra, jlong offs);
++  void patchable_jump(address target, bool force_patchable = false);
++  void patchable_call(address target, address call_size = 0);
++
++  // Floating
++  void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
++                          address pio2, address dsin_coef, address dcos_coef);
++
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld_d(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld_d(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    st_d(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    st_d(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++
++  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
++  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
++  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
++  void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); }
++  void pop  (Register reg)      { ld_d  (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  (FloatRegister reg) { fld_d (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  ()                  { addi_d(SP, SP, 8); }
++  void pop2 ()                  { addi_d(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++  void push(RegSet regs) { if (regs.bits()) push(regs.bits()); }
++  void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); }
++
++  void li(Register rd, jlong value);
++  void li(Register rd, address addr) { li(rd, (long)addr); }
++  void patchable_li52(Register rd, jlong value);
++  void lipc(Register rd, Label& L);
++
++  void move(Register rd, Register rs)   { orr(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { add_w(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  // Load the base of the cardtable byte map into reg.
++  void load_byte_map_base(Register reg);
++
++  // Code for java.lang.StringCoding::hasNegatives() instrinsic.
++  void has_negatives(Register ary1, Register len, Register result);
++
++  // Code for java.lang.StringUTF16::compress intrinsic.
++  void char_array_compress(Register src, Register dst, Register len,
++                           Register result, Register tmp1,
++                           Register tmp2, Register tmp3);
++
++  // Code for java.lang.StringLatin1::inflate intrinsic.
++  void byte_array_inflate(Register src, Register dst, Register len,
++                          Register tmp1, Register tmp2);
++
++  // Find index of char in UTF-16 string
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3);
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++#ifdef COMPILER2
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp1, Register tmp2, Register result,
++                     bool is_char);
++#endif
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++
++  // LA added:
++  void jr  (Register reg)        { jirl(R0, reg, 0); }
++  void jalr(Register reg)        { jirl(RA, reg, 0); }
++  void nop ()                    { andi(R0, R0, 0); }
++  void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); }
++  void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); }
++  void orr (Register rd, Register rj, Register rk) {  OR(rd, rj, rk); }
++  void lea (Register rd, Address src);
++  void lea(Register dst, AddressLiteral adr);
++  static int  patched_branch(int dest_pos, int inst, int inst_pos);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src1,
++                Register        src2,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ);
++
++  // CRC32 code for java.util.zip.CRC32::update() instrinsic.
++  void update_byte_crc32(Register crc, Register val, Register table);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void kernel_crc32(Register crc, Register buf, Register len, Register tmp);
++
++  // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic.
++  void kernel_crc32c(Register crc, Register buf, Register len, Register tmp);
++
++  void membar(Membar_mask_bits hint);
++
++  void bind(Label& L) {
++    Assembler::bind(L);
++    code()->clear_last_insn();
++  }
++
++  // Code for java.math.BigInteger::mulAdd intrinsic.
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k);
++
++#undef VIRTUAL
++
++public:
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_VECTORX     = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_VECTORY     = FLOAT_TYPE | SIGNED_TYPE | 0x4,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_VECTORX    = FLOAT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_VECTORY    = FLOAT_TYPE | SIGNED_TYPE | 0x8
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++#ifdef COMPILER2
++  void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size);
++  void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size);
++#endif
++
++private:
++  void push(unsigned int bitset);
++  void pop(unsigned int bitset);
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++        assert(((scale==0)&&(disp==0)), "only support base+index");
++        loadstore(reg, as_Register(base), as_Register(index), type);
++    } else {
++      loadstore(reg, as_Register(base), disp, type);
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(Register reg, Register base, Register disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, Register disp, int type);
++
++#ifdef COMPILER2
++  void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode);
++  void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode);
++  void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode);
++#endif
++  void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
++  void generate_kernel_cos(FloatRegister x, address dcos_coef);
++  void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
++  void generate__kernel_rem_pio2(address two_over_pi, address pio2);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->ld_b(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++struct tableswitch {
++  Register _reg;
++  int _insn_index; jint _first_key; jint _last_key;
++  Label _after;
++  Label _branches;
++};
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..49302590c3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+new file mode 100644
+index 0000000000..3ed4c36651
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+@@ -0,0 +1,1625 @@
++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "macroAssembler_loongarch.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// The following code is a optimized version of fdlibm sin/cos implementation
++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64.
++
++// Please refer to sin/cos approximation via polynomial and
++// trigonometric argument reduction techniques to the following literature:
++//
++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin,
++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond,
++// Nathalie Revol, Damien Stehlé, and Serge Torres:
++// Handbook of floating-point arithmetic.
++// Springer Science & Business Media, 2009.
++// [2] K. C. Ng
++// Argument Reduction for Huge Arguments: Good to the Last Bit
++// July 13, 1992, SunPro
++//
++// HOW TO READ THIS CODE:
++// This code consists of several functions. Each function has following header:
++// 1) Description
++// 2) C-pseudo code with differences from fdlibm marked by comments starting
++//        with "NOTE". Check unmodified fdlibm code in
++//        share/runtime/SharedRuntimeTrig.cpp
++// 3) Brief textual description of changes between fdlibm and current
++//        implementation along with optimization notes (if applicable)
++// 4) Assumptions, input and output
++// 5) (Optional) additional notes about intrinsic implementation
++// Each function is separated in blocks which follow the pseudo-code structure
++//
++// HIGH-LEVEL ALGORITHM DESCRIPTION:
++//    - entry point: generate_dsin_dcos(...);
++//    - check corner cases: NaN, INF, tiny argument.
++//    - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos)
++//    -- else proceed to argument reduction routine (__ieee754_rem_pio2) and
++//           use reduced argument to get result via kernel_sin/kernel_cos
++//
++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM:
++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version
++// has these int values converted to double representation to load converted
++// double values directly (see stubRoutines_aarch4::_two_over_pi)
++// 2) Several loops are unrolled and vectorized: see comments in code after
++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2
++// 3) fdlibm npio2_hw table now has "prefix" with constants used in
++// calculation. These constants are loaded from npio2_hw table instead of
++// constructing it in code (see stubRoutines_loongarch64.cpp)
++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef
++// and cos_coef to use the same optimization as in 3). It allows to load most of
++// required constants via single instruction
++//
++//
++//
++///* __ieee754_rem_pio2(x,y)
++// *
++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2)
++// * x is input argument, y[] is hi and low parts of reduced argument (x)
++// * uses __kernel_rem_pio2()
++// */
++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw
++//
++// BEGIN __ieee754_rem_pio2 PSEUDO CODE
++//
++//static int __ieee754_rem_pio2(double x, double *y) {
++//  double z,w,t,r,fn;
++//  double tx[3];
++//  int e0,i,j,nx,n,ix,hx,i0;
++//
++//  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
++//  hx = *(i0+(int*)&x);          /* high word of x */
++//  ix = hx&0x7fffffff;
++//  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
++//    if(hx>0) {
++//      z = x - pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z - pio2_1t;
++//        y[1] = (z-y[0])-pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z -= pio2_2;
++//        y[0] = z - pio2_2t;
++//        y[1] = (z-y[0])-pio2_2t;
++//      }
++//      return 1;
++//    } else {    /* negative x */
++//      z = x + pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z + pio2_1t;
++//        y[1] = (z-y[0])+pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z += pio2_2;
++//        y[0] = z + pio2_2t;
++//        y[1] = (z-y[0])+pio2_2t;
++//      }
++//      return -1;
++//    }
++//  }
++//  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
++//    t  = fabsd(x);
++//    n  = (int) (t*invpio2+half);
++//    fn = (double)n;
++//    r  = t-fn*pio2_1;
++//    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
++//    // NOTE: y[0] = r-w; is moved from if/else below to be before "if"
++//    y[0] = r-w;
++//    if(n<32&&ix!=npio2_hw[n-1]) {
++//      // y[0] = r-w;       /* quick check no cancellation */ // NOTE: moved earlier
++//    } else {
++//      j  = ix>>20;
++//      // y[0] = r-w; // NOTE: moved earlier
++//      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//      if(i>16) {  /* 2nd iteration needed, good to 118 */
++//        t  = r;
++//        w  = fn*pio2_2;
++//        r  = t-w;
++//        w  = fn*pio2_2t-((t-r)-w);
++//        y[0] = r-w;
++//        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
++//          t  = r;       /* will cover all possible cases */
++//          w  = fn*pio2_3;
++//          r  = t-w;
++//          w  = fn*pio2_3t-((t-r)-w);
++//          y[0] = r-w;
++//        }
++//      }
++//    }
++//    y[1] = (r-y[0])-w;
++//    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//    else         return n;
++//  }
++//  /*
++//   * all other (large) arguments
++//   */
++//  // NOTE: this check is removed, because it was checked in dsin/dcos
++//  // if(ix>=0x7ff00000) {          /* x is inf or NaN */
++//  //  y[0]=y[1]=x-x; return 0;
++//  // }
++//  /* set z = scalbn(|x|,ilogb(x)-23) */
++//  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
++//  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
++//  *(i0+(int*)&z) = ix - (e0<<20);
++//
++//  // NOTE: "for" loop below in unrolled. See comments in asm code
++//  for(i=0;i<2;i++) {
++//    tx[i] = (double)((int)(z));
++//    z     = (z-tx[i])*two24A;
++//  }
++//
++//  tx[2] = z;
++//  nx = 3;
++//
++//  // NOTE: while(tx[nx-1]==zeroA) nx--;  is unrolled. See comments in asm code
++//  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
++//
++//  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
++//  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//  return n;
++//}
++//
++// END __ieee754_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
++//     1. INF/NaN check for huge argument is removed in comparison with fdlibm
++//     code, because this check is already done in dcos/dsin code
++//     2. Most constants are now loaded from table instead of direct initialization
++//     3. Two loops are unrolled
++// Assumptions:
++//     1. Assume |X| >= PI/4
++//     2. Assume SCR1 = 0x3fe921fb00000000  (~ PI/4)
++//     3. Assume ix = A3
++// Input and output:
++//     1. Input: X = A0
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) {
++  const int64_t PIO2_1t = 0x3DD0B4611A626331ULL;
++  const int64_t PIO2_2  = 0x3DD0B4611A600000ULL;
++  const int64_t PIO2_2t = 0x3BA3198A2E037073ULL;
++  Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
++        REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
++        X_IS_NEGATIVE_LONG_PI;
++  Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15;
++
++  push2(S0, S1);
++
++    // initializing constants first
++    li(SCR1, 0x3ff921fb54400000); // PIO2_1
++    li(SCR2, 0x4002d97c); // 3*PI/4 high word
++    movgr2fr_d(v1, SCR1); // v1 = PIO2_1
++    bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE);
++
++    block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
++      blt(X, R0, X_IS_NEGATIVE);
++
++      block_comment("if(hx>0) {"); {
++        fsub_d(v2, v0, v1); // v2 = z = x - pio2_1
++        srli_d(SCR1, SCR1, 32);
++        li(n, 1);
++        beq(ix, SCR1, X_IS_POSITIVE_LONG_PI);
++
++        block_comment("case: hx > 0 &&  ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */
++          li(SCR2, PIO2_1t);
++          movgr2fr_d(v27, SCR2);
++          fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t;
++          fsub_d(v5, v2, v4);
++          fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t
++          b(REDUCTION_DONE);
++        }
++
++        block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */
++          bind(X_IS_POSITIVE_LONG_PI);
++            li(SCR1, PIO2_2);
++            li(SCR2, PIO2_2t);
++            movgr2fr_d(v27, SCR1);
++            movgr2fr_d(v6, SCR2);
++            fsub_d(v2, v2, v27); // z-= pio2_2
++            fsub_d(v4, v2, v6);  // y[0] = z - pio2_2t
++            fsub_d(v5, v2, v4);
++            fsub_d(v5, v5, v6);  // v5 = (z - y[0]) - pio2_2t
++            b(REDUCTION_DONE);
++        }
++      }
++
++      block_comment("case: hx <= 0)"); {
++        bind(X_IS_NEGATIVE);
++          fadd_d(v2, v0, v1); // v2 = z = x + pio2_1
++          srli_d(SCR1, SCR1, 32);
++          li(n, -1);
++          beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI);
++
++          block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */
++            li(SCR2, PIO2_1t);
++            movgr2fr_d(v27, SCR2);
++            fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t;
++            fsub_d(v5, v2, v4);
++            fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t
++            b(REDUCTION_DONE);
++          }
++
++          block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */
++            bind(X_IS_NEGATIVE_LONG_PI);
++              li(SCR1, PIO2_2);
++              li(SCR2, PIO2_2t);
++              movgr2fr_d(v27, SCR1);
++              movgr2fr_d(v6, SCR2);
++              fadd_d(v2, v2, v27); // z += pio2_2
++              fadd_d(v4, v2, v6);  // y[0] = z + pio2_2t
++              fsub_d(v5, v2, v4);
++              fadd_d(v5, v5, v6);  // v5 = (z - y[0]) + pio2_2t
++              b(REDUCTION_DONE);
++          }
++      }
++  }
++  bind(X_IS_MEDIUM_OR_LARGE);
++    li(SCR1, 0x413921fb);
++    blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ?
++
++    block_comment("|x| ~<= 2^19*(pi/2), medium size"); {
++      li(ih, npio2_hw);
++      fld_d(v4, ih, 0);
++      fld_d(v5, ih, 8);
++      fld_d(v6, ih, 16);
++      fld_d(v7, ih, 24);
++      fabs_d(v31, v0);           // v31 = t = |x|
++      addi_d(ih, ih, 64);
++      fmadd_d(v2, v31, v5, v4);  // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5)
++      ftintrz_w_d(vt, v2);       // n = (int) v2
++      movfr2gr_s(n, vt);
++      vfrintrz_d(v2, v2);
++      fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1
++      fmul_d(v26, v2, v7);       // v26 = w = fn * pio2_1t
++      fsub_d(v4, v3, v26);       // y[0] = r - w. Calculated before branch
++      li(SCR1, 32);
++      blt(SCR1, n, LARGE_ELSE);
++      addi_w(tmp5, n, -1);       // tmp5 = n - 1
++      alsl_d(tmp5, tmp5, ih, 2 - 1);
++      ld_w(jv, tmp5, 0);
++      bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE);
++
++      block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); {
++        bind(LARGE_ELSE);
++          movfr2gr_d(jx, v4);
++          srli_d(tmp5, ix, 20);                    // j = ix >> 20
++          slli_d(jx, jx, 1);
++          srli_d(tmp3, jx, 32 + 20 + 1);           // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++          sub_d(tmp3, tmp5, tmp3);
++
++          block_comment("if(i>16)"); {
++            li(SCR1, 16);
++            bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++            // i > 16. 2nd iteration needed
++            fld_d(v6, ih, -32);
++            fld_d(v7, ih, -24);
++            fmov_d(v28, v3);                        // t = r
++            fmul_d(v29, v2, v6);                    // w = v29 = fn * pio2_2
++            fsub_d(v3, v28, v29);                   // r = t - w
++            fsub_d(v31, v28, v3);                   // v31 = (t - r)
++            fsub_d(v31, v29, v31);                  // v31 = w - (t - r) = - ((t - r) - w)
++            fmadd_d(v26, v2, v7, v31);              // v26 = w = fn*pio2_2t - ((t - r) - w)
++            fsub_d(v4, v3, v26);                    // y[0] = r - w
++            movfr2gr_d(jx, v4);
++            slli_d(jx, jx, 1);
++            srli_d(tmp3, jx, 32 + 20 + 1);          // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++            sub_d(tmp3, tmp5, tmp3);
++
++            block_comment("if(i>49)"); {
++              li(SCR1, 49);
++              bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++              // 3rd iteration need, 151 bits acc
++              fld_d(v6, ih, -16);
++              fld_d(v7, ih, -8);
++              fmov_d(v28, v3);                      // save "r"
++              fmul_d(v29, v2, v6);                  // v29 = fn * pio2_3
++              fsub_d(v3, v28, v29);                 // r = r - w
++              fsub_d(v31, v28, v3);                 // v31 = (t - r)
++              fsub_d(v31, v29, v31);                // v31 = w - (t - r) = - ((t - r) - w)
++              fmadd_d(v26, v2, v7, v31);            // v26 = w = fn*pio2_3t - ((t - r) - w)
++              fsub_d(v4, v3, v26);                  // y[0] = r - w
++            }
++          }
++      }
++    block_comment("medium x tail"); {
++      bind(X_IS_MEDIUM_BRANCH_DONE);
++        fsub_d(v5, v3, v4);                         // v5 = y[1] = (r - y[0])
++        fsub_d(v5, v5, v26);                        // v5 = y[1] = (r - y[0]) - w
++        blt(R0, X, REDUCTION_DONE);
++        fneg_d(v4, v4);
++        sub_w(n, R0, n);
++        fneg_d(v5, v5);
++        b(REDUCTION_DONE);
++    }
++  }
++
++  block_comment("all other (large) arguments"); {
++    bind(X_IS_LARGE);
++      srli_d(SCR1, ix, 20);                        // ix >> 20
++      li(tmp5, 0x4170000000000000);
++      addi_w(SCR1, SCR1, -1046);                   // e0
++      movgr2fr_d(v24, tmp5);                       // init two24A value
++      slli_w(jv, SCR1, 20);                        // ix - (e0<<20)
++      sub_w(jv, ix, jv);
++      slli_d(jv, jv, 32);
++      addi_w(SCR2, SCR1, -3);
++      bstrins_d(jv, X, 31, 0);                     // jv = z
++      li(i, 24);
++      movgr2fr_d(v26, jv);                         // v26 = z
++
++      block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); {
++        // tx[0,1,2] = v6,v7,v26
++        vfrintrz_d(v6, v26);                       // v6 = (double)((int)v26)
++        div_w(jv, SCR2, i);                        // jv = (e0 - 3)/24
++        fsub_d(v26, v26, v6);
++        addi_d(SP, SP, -560);
++        fmul_d(v26, v26, v24);
++        vfrintrz_d(v7, v26);                       // v7 = (double)((int)v26)
++        li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop
++        fsub_d(v26, v26, v7);
++      }
++
++      block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); {
++        vxor_v(vt, vt, vt);
++        fcmp_cne_d(FCC0, v26, vt);                 // if NE then jx == 2. else it's 1 or 0
++        addi_d(iqBase, SP, 480);                   // base of iq[]
++        fmul_d(v3, v26, v24);
++        bcnez(FCC0, NX_SET);
++        fcmp_cne_d(FCC0, v7, vt);                  // v7 == 0 => jx = 0. Else jx = 1
++        movcf2gr(jx, FCC0);
++      }
++    bind(NX_SET);
++      generate__kernel_rem_pio2(two_over_pi, pio2);
++      // now we have y[0] = v4, y[1] = v5 and n = r2
++      bge(X, R0, REDUCTION_DONE);
++      fneg_d(v4, v4);
++      fneg_d(v5, v5);
++      sub_w(n, R0, n);
++  }
++  bind(REDUCTION_DONE);
++
++  pop2(S0, S1);
++}
++
++///*
++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
++// * double x[],y[]; int e0,nx,prec; int ipio2[];
++// *
++// * __kernel_rem_pio2 return the last three digits of N with
++// *              y = x - N*pi/2
++// * so that |y| < pi/2.
++// *
++// * The method is to compute the integer (mod 8) and fraction parts of
++// * (2/pi)*x without doing the full multiplication. In general we
++// * skip the part of the product that are known to be a huge integer (
++// * more accurately, = 0 mod 8 ). Thus the number of operations are
++// * independent of the exponent of the input.
++// *
++// * NOTE: 2/pi int representation is converted to double
++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[].
++// *
++// * Input parameters:
++// *      x[]     The input value (must be positive) is broken into nx
++// *              pieces of 24-bit integers in double precision format.
++// *              x[i] will be the i-th 24 bit of x. The scaled exponent
++// *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
++// *              match x's up to 24 bits.
++// *
++// *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
++// *                      e0 = ilogb(z)-23
++// *                      z  = scalbn(z,-e0)
++// *              for i = 0,1,2
++// *                      x[i] = floor(z)
++// *                      z    = (z-x[i])*2**24
++// *
++// *
++// *      y[]     ouput result in an array of double precision numbers.
++// *              The dimension of y[] is:
++// *                      24-bit  precision       1
++// *                      53-bit  precision       2
++// *                      64-bit  precision       2
++// *                      113-bit precision       3
++// *              The actual value is the sum of them. Thus for 113-bit
++// *              precsion, one may have to do something like:
++// *
++// *              long double t,w,r_head, r_tail;
++// *              t = (long double)y[2] + (long double)y[1];
++// *              w = (long double)y[0];
++// *              r_head = t+w;
++// *              r_tail = w - (r_head - t);
++// *
++// *      e0      The exponent of x[0]
++// *
++// *      nx      dimension of x[]
++// *
++// *      prec    an interger indicating the precision:
++// *                      0       24  bits (single)
++// *                      1       53  bits (double)
++// *                      2       64  bits (extended)
++// *                      3       113 bits (quad)
++// *
++// *      NOTE: ipio2[] array below is converted to double representation
++// *      //ipio2[]
++// *      //        integer array, contains the (24*i)-th to (24*i+23)-th
++// *      //        bit of 2/pi after binary point. The corresponding
++// *      //        floating value is
++// *
++// *                      ipio2[i] * 2^(-24(i+1)).
++// *
++// * Here is the description of some local variables:
++// *
++// *      jk      jk+1 is the initial number of terms of ipio2[] needed
++// *              in the computation. The recommended value is 2,3,4,
++// *              6 for single, double, extended,and quad.
++// *
++// *      jz      local integer variable indicating the number of
++// *              terms of ipio2[] used.
++// *
++// *      jx      nx - 1
++// *
++// *      jv      index for pointing to the suitable ipio2[] for the
++// *              computation. In general, we want
++// *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
++// *              is an integer. Thus
++// *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
++// *              Hence jv = max(0,(e0-3)/24).
++// *
++// *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
++// *
++// *      q[]     double array with integral value, representing the
++// *              24-bits chunk of the product of x and 2/pi.
++// *
++// *      q0      the corresponding exponent of q[0]. Note that the
++// *              exponent for q[i] would be q0-24*i.
++// *
++// *      PIo2[]  double precision array, obtained by cutting pi/2
++// *              into 24 bits chunks.
++// *
++// *      f[]     ipio2[] in floating point
++// *
++// *      iq[]    integer array by breaking up q[] in 24-bits chunk.
++// *
++// *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
++// *
++// *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
++// *              it also indicates the *sign* of the result.
++// *
++// */
++//
++// Use PIo2 table(see stubRoutines_loongarch64.cpp)
++//
++// BEGIN __kernel_rem_pio2 PSEUDO CODE
++//
++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) {
++//  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
++//  double z,fw,f[20],fq[20],q[20];
++//
++//  /* initialize jk*/
++//  // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4.
++//  jp = jk; // NOTE: always 4
++//
++//  /* determine jx,jv,q0, note that 3>q0 */
++//  jx =  nx-1;
++//  jv = (e0-3)/24; if(jv<0) jv=0;
++//  q0 =  e0-24*(jv+1);
++//
++//  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
++//  j = jv-jx; m = jx+jk;
++//
++//  // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It
++//  //       allows the use of wider loads/stores
++//  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j];
++//
++//  // NOTE: unrolled and vectorized "for". See comments in asm code
++//  /* compute q[0],q[1],...q[jk] */
++//  for (i=0;i<=jk;i++) {
++//    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++//  }
++//
++//  jz = jk;
++//recompute:
++//  /* distill q[] into iq[] reversingly */
++//  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++//    fw    =  (double)((int)(twon24* z));
++//    iq[i] =  (int)(z-two24B*fw);
++//    z     =  q[j-1]+fw;
++//  }
++//
++//  /* compute n */
++//  z  = scalbnA(z,q0);           /* actual value of z */
++//  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
++//  n  = (int) z;
++//  z -= (double)n;
++//  ih = 0;
++//  if(q0>0) {    /* need iq[jz-1] to determine n */
++//    i  = (iq[jz-1]>>(24-q0)); n += i;
++//    iq[jz-1] -= i<<(24-q0);
++//    ih = iq[jz-1]>>(23-q0);
++//  }
++//  else if(q0==0) ih = iq[jz-1]>>23;
++//  else if(z>=0.5) ih=2;
++//
++//  if(ih>0) {    /* q > 0.5 */
++//    n += 1; carry = 0;
++//    for(i=0;i<jz ;i++) {        /* compute 1-q */
++//      j = iq[i];
++//      if(carry==0) {
++//        if(j!=0) {
++//          carry = 1; iq[i] = 0x1000000- j;
++//        }
++//      } else  iq[i] = 0xffffff - j;
++//    }
++//    if(q0>0) {          /* rare case: chance is 1 in 12 */
++//      switch(q0) {
++//      case 1:
++//        iq[jz-1] &= 0x7fffff; break;
++//      case 2:
++//        iq[jz-1] &= 0x3fffff; break;
++//      }
++//    }
++//    if(ih==2) {
++//      z = one - z;
++//      if(carry!=0) z -= scalbnA(one,q0);
++//    }
++//  }
++//
++//  /* check if recomputation is needed */
++//  if(z==zeroB) {
++//    j = 0;
++//    for (i=jz-1;i>=jk;i--) j |= iq[i];
++//    if(j==0) { /* need recomputation */
++//      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
++//
++//      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
++//        f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++//        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++//        q[i] = fw;
++//      }
++//      jz += k;
++//      goto recompute;
++//    }
++//  }
++//
++//  /* chop off zero terms */
++//  if(z==0.0) {
++//    jz -= 1; q0 -= 24;
++//    while(iq[jz]==0) { jz--; q0-=24;}
++//  } else { /* break z into 24-bit if necessary */
++//    z = scalbnA(z,-q0);
++//    if(z>=two24B) {
++//      fw = (double)((int)(twon24*z));
++//      iq[jz] = (int)(z-two24B*fw);
++//      jz += 1; q0 += 24;
++//      iq[jz] = (int) fw;
++//    } else iq[jz] = (int) z ;
++//  }
++//
++//  /* convert integer "bit" chunk to floating-point value */
++//  fw = scalbnA(one,q0);
++//  for(i=jz;i>=0;i--) {
++//    q[i] = fw*(double)iq[i]; fw*=twon24;
++//  }
++//
++//  /* compute PIo2[0,...,jp]*q[jz,...,0] */
++//  for(i=jz;i>=0;i--) {
++//    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++//    fq[jz-i] = fw;
++//  }
++//
++//  // NOTE: switch below is eliminated, because prec is always 2 for doubles
++//  /* compress fq[] into y[] */
++//  //switch(prec) {
++//  //case 0:
++//  //  fw = 0.0;
++//  //  for (i=jz;i>=0;i--) fw += fq[i];
++//  //  y[0] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 1:
++//  //case 2:
++//    fw = 0.0;
++//    for (i=jz;i>=0;i--) fw += fq[i];
++//    y[0] = (ih==0)? fw: -fw;
++//    fw = fq[0]-fw;
++//    for (i=1;i<=jz;i++) fw += fq[i];
++//    y[1] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 3:       /* painful */
++//  //  for (i=jz;i>0;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  // fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (i=jz;i>1;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  //    fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
++//  //  if(ih==0) {
++//  //    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
++//  //  } else {
++//  //    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
++//  //  }
++//  //}
++//  return n&7;
++//}
++//
++// END __kernel_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. One loop is unrolled and vectorized (see comments in code)
++//     2. One loop is split into 2 loops (see comments in code)
++//     3. Non-double code is removed(last switch). Sevaral variables became
++//         constants because of that (see comments in code)
++//     4. Use of jx, which is nx-1 instead of nx
++// Assumptions:
++//     1. Assume |X| >= PI/4
++// Input and output:
++//     1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) {
++  Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT,
++        RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ,
++        INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE,
++        Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION,
++        RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED,
++        CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE,
++        IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY,
++        RECOMP_FOR1_CHECK;
++  Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3,
++           jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8,
++                v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15;
++    // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4
++    // jx = nx - 1
++    li(twoOverPiBase, two_over_pi);
++    slti(SCR2, jv, 0);
++    addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6]
++    masknez(jv, jv, SCR2);
++    if (UseLASX)
++      xvxor_v(v26, v26, v26);
++    else
++      vxor_v(v26, v26, v26);
++    addi_w(tmp5, jv, 1);                    // jv+1
++    sub_w(j, jv, jx);
++    addi_d(qBase, SP, 320);                 // base of q[]
++    mul_w(SCR2, i, tmp5);                   // q0 =  e0-24*(jv+1)
++    sub_w(SCR1, SCR1, SCR2);
++    // use double f[20], fq[20], q[20], iq[20] on stack, which is
++    // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it
++    // will contain f[20], fq[20], q[20], iq[20]
++    // now initialize f[20] indexes 0..m (inclusive)
++    // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];
++    move(tmp5, SP);
++
++    block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); {
++        xorr(i, i, i);
++        bge(j, R0, INIT_F_COPY);
++      bind(INIT_F_ZERO);
++        if (UseLASX) {
++          xvst(v26, tmp5, 0);
++        } else {
++          vst(v26, tmp5, 0);
++          vst(v26, tmp5, 16);
++        }
++        addi_d(tmp5, tmp5, 32);
++        addi_w(i, i, 4);
++        addi_w(j, j, 4);
++        blt(j, R0, INIT_F_ZERO);
++        sub_w(i, i, j);
++        move(j, R0);
++      bind(INIT_F_COPY);
++        alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address
++        if (UseLASX) {
++          xvld(v18, tmp1, 0);
++          xvld(v19, tmp1, 32);
++        } else {
++          vld(v18, tmp1, 0);
++          vld(v19, tmp1, 16);
++          vld(v20, tmp1, 32);
++          vld(v21, tmp1, 48);
++        }
++        alsl_d(tmp5, i, SP, 3 - 1);
++        if (UseLASX) {
++          xvst(v18, tmp5, 0);
++          xvst(v19, tmp5, 32);
++        } else {
++          vst(v18, tmp5, 0);
++          vst(v19, tmp5, 16);
++          vst(v20, tmp5, 32);
++          vst(v21, tmp5, 48);
++        }
++    }
++    // v18..v21 can actually contain f[0..7]
++    beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded
++    if (UseLASX) {
++      xvld(v18, SP, 0);   // load f[0..7]
++      xvld(v19, SP, 32);
++    } else {
++      vld(v18, SP, 0);    // load f[0..7]
++      vld(v19, SP, 16);
++      vld(v20, SP, 32);
++      vld(v21, SP, 48);
++    }
++  bind(SKIP_F_LOAD);
++    // calculate 2^q0 and 2^-q0, which we'll need further.
++    // q0 is exponent. So, calculate biased exponent(q0+1023)
++    sub_w(tmp4, R0, SCR1);
++    addi_w(tmp5, SCR1, 1023);
++    addi_w(tmp4, tmp4, 1023);
++    // Unroll following for(s) depending on jx in [0,1,2]
++    // for (i=0;i<=jk;i++) {
++    //   for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++    // }
++    // Unrolling for jx == 0 case:
++    //   q[0] = x[0] * f[0]
++    //   q[1] = x[0] * f[1]
++    //   q[2] = x[0] * f[2]
++    //   q[3] = x[0] * f[3]
++    //   q[4] = x[0] * f[4]
++    //
++    // Vectorization for unrolled jx == 0 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[0]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[0]
++    //   q[4] = f[4] * x[0]
++    //
++    // Unrolling for jx == 1 case:
++    //   q[0] = x[0] * f[1] + x[1] * f[0]
++    //   q[1] = x[0] * f[2] + x[1] * f[1]
++    //   q[2] = x[0] * f[3] + x[1] * f[2]
++    //   q[3] = x[0] * f[4] + x[1] * f[3]
++    //   q[4] = x[0] * f[5] + x[1] * f[4]
++    //
++    // Vectorization for unrolled jx == 1 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[1]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[1]
++    //   q[4] = f[4] * x[1]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[0]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[0]
++    //   q[4] += f[5] * x[0]
++    //
++    // Unrolling for jx == 2 case:
++    //   q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0]
++    //   q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1]
++    //   q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2]
++    //   q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3]
++    //   q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4]
++    //
++    // Vectorization for unrolled jx == 2 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[2]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[2]
++    //   q[4] = f[4] * x[2]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[1]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[1]
++    //   q[4] += f[5] * x[1]
++    //   {q[0], q[1]} += {f[2], f[3]} * x[0]
++    //   {q[2], q[3]} += {f[4], f[5]} * x[0]
++    //   q[4] += f[6] * x[0]
++  block_comment("unrolled and vectorized computation of q[0]..q[jk]"); {
++      li(SCR2, 1);
++      slli_d(tmp5, tmp5, 52);                  // now it's 2^q0 double value
++      slli_d(tmp4, tmp4, 52);                  // now it's 2^-q0 double value
++      if (UseLASX)
++        xvpermi_d(v6, v6, 0);
++      else
++        vreplvei_d(v6, v6, 0);
++      blt(jx, SCR2, JX_IS_0);
++      addi_d(i, SP, 8);
++      if (UseLASX) {
++        xvld(v26, i, 0);                       // load f[1..4]
++        xvpermi_d(v3, v3, 0);
++        xvpermi_d(v7, v7, 0);
++        xvpermi_d(v20, v19, 85);
++        xvpermi_d(v21, v19, 170);
++      } else {
++        vld(v26, i, 0);                        // load f[1..4]
++        vld(v27, i, 16);
++        vreplvei_d(v3, v3, 0);
++        vreplvei_d(v7, v7, 0);
++        vreplvei_d(vt, v20, 1);
++        vreplvei_d(v21, v21, 0);
++      }
++      blt(SCR2, jx, JX_IS_2);
++      // jx == 1
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v7);                // f[0,3] * x[1]
++        fmul_d(v30, v19, v7);                  // f[4] * x[1]
++        xvfmadd_d(v28, v26, v6, v28);
++        fmadd_d(v30, v6, v20, v30);            // v30 += f[5] * x[0]
++      } else {
++        vfmul_d(v28, v18, v7);                 // f[0,1] * x[1]
++        vfmul_d(v29, v19, v7);                 // f[2,3] * x[1]
++        fmul_d(v30, v20, v7);                  // f[4] * x[1]
++        vfmadd_d(v28, v26, v6, v28);
++        vfmadd_d(v29, v27, v6, v29);
++        fmadd_d(v30, v6, vt, v30);             // v30 += f[5] * x[0]
++      }
++      b(Q_DONE);
++    bind(JX_IS_2);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v3);                // f[0,3] * x[2]
++        fmul_d(v30, v19, v3);                  // f[4] * x[2]
++        xvfmadd_d(v28, v26, v7, v28);
++        fmadd_d(v30, v7, v20, v30);            // v30 += f[5] * x[1]
++        xvpermi_q(v18, v19, 3);
++        xvfmadd_d(v28, v18, v6, v28);
++      } else {
++        vfmul_d(v28, v18, v3);                 // f[0,1] * x[2]
++        vfmul_d(v29, v19, v3);                 // f[2,3] * x[2]
++        fmul_d(v30, v20, v3);                  // f[4] * x[2]
++        vfmadd_d(v28, v26, v7, v28);
++        vfmadd_d(v29, v27, v7, v29);
++        fmadd_d(v30, v7, vt, v30);             // v30 += f[5] * x[1]
++        vfmadd_d(v28, v19, v6, v28);
++        vfmadd_d(v29, v20, v6, v29);
++      }
++      fmadd_d(v30, v6, v21, v30);              // v30 += f[6] * x[0]
++      b(Q_DONE);
++    bind(JX_IS_0);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        fmul_d(v30, v19, v6);                  // f[4] * x[0]
++      } else {
++        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
++        vfmul_d(v29, v19, v6);                 // f[2,3] * x[0]
++        fmul_d(v30, v20, v6);                  // f[4] * x[0]
++      }
++    bind(Q_DONE);
++      if (UseLASX) {
++        xvst(v28, qBase, 0);                   // save calculated q[0]...q[jk]
++      } else {
++        vst(v28, qBase, 0);                    // save calculated q[0]...q[jk]
++        vst(v29, qBase, 16);
++      }
++      fst_d(v30, qBase, 32);
++  }
++  li(i, 0x3E70000000000000);
++  li(jz, 4);
++  movgr2fr_d(v17, i);                          // v17 = twon24
++  movgr2fr_d(v30, tmp5);                       // 2^q0
++  vldi(v21, -960);                             // 0.125 (0x3fc0000000000000)
++  vldi(v20, -992);                             // 8.0   (0x4020000000000000)
++  movgr2fr_d(v22, tmp4);                       // 2^-q0
++
++  block_comment("recompute loop"); {
++    bind(RECOMPUTE);
++      //  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++      //    fw    =  (double)((int)(twon24* z));
++      //    iq[i] =  (int)(z-two24A*fw);
++      //    z     =  q[j-1]+fw;
++      //  }
++      block_comment("distill q[] into iq[] reversingly"); {
++          xorr(i, i, i);
++          move(j, jz);
++          alsl_d(tmp2, jz, qBase, 3 - 1);                  // q[jz] address
++          fld_d(v18, tmp2, 0);                             // z = q[j] and moving address to q[j-1]
++          addi_d(tmp2, tmp2, -8);
++        bind(RECOMP_FIRST_FOR);
++          fld_d(v27, tmp2, 0);
++          addi_d(tmp2, tmp2, -8);
++          fmul_d(v29, v17, v18);                           // twon24*z
++          vfrintrz_d(v29, v29);                            // (double)(int)
++          fnmsub_d(v28, v24, v29, v18);                    // v28 = z-two24A*fw
++          ftintrz_w_d(vt, v28);                            // (int)(z-two24A*fw)
++          alsl_d(SCR2, i, iqBase, 2 - 1);
++          fst_s(vt, SCR2, 0);
++          fadd_d(v18, v27, v29);
++          addi_w(i, i, 1);
++          addi_w(j, j, -1);
++          blt(R0, j, RECOMP_FIRST_FOR);
++      }
++      // compute n
++      fmul_d(v18, v18, v30);
++      fmul_d(v2, v18, v21);
++      vfrintrm_d(v2, v2);                                  // v2 = floor(v2) == rounding towards -inf
++      fnmsub_d(v18, v2, v20, v18);                         // z -= 8.0*floor(z*0.125);
++      li(ih, 2);
++      vfrintrz_d(v2, v18);                                 // v2 = (double)((int)z)
++      ftintrz_w_d(vt, v18);                                // n  = (int) z;
++      movfr2gr_s(n, vt);
++      fsub_d(v18, v18, v2);                                // z -= (double)n;
++
++      block_comment("q0-dependent initialization"); {
++          blt(SCR1, R0, Q0_ZERO_CMP_LT);                   // if (q0 > 0)
++          addi_w(j, jz, -1);                               // j = jz - 1
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          ld_w(tmp2, SCR2, 0);                             // tmp2 = iq[jz-1]
++          beq(SCR1, R0, Q0_ZERO_CMP_EQ);
++          li(tmp4, 24);
++          sub_w(tmp4, tmp4, SCR1);                         // == 24 - q0
++          srl_w(i, tmp2, tmp4);                            // i = iq[jz-1] >> (24-q0)
++          sll_w(tmp5, i, tmp4);
++          sub_w(tmp2, tmp2, tmp5);                         // iq[jz-1] -= i<<(24-q0);
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);                             // store iq[jz-1]
++          addi_w(SCR2, tmp4, -1);                          // == 23 - q0
++          add_w(n, n, i);                                  // n+=i
++          srl_w(ih, tmp2, SCR2);                           // ih = iq[jz-1] >> (23-q0)
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_EQ);
++          srli_d(ih, tmp2, 23);                            // ih = iq[z-1] >> 23
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_LT);
++          vldi(v4, -928);                                  // 0.5 (0x3fe0000000000000)
++          fcmp_clt_d(FCC0, v18, v4);
++          movcf2gr(SCR2, FCC0);
++          masknez(ih, ih, SCR2);                           // if (z<0.5) ih = 0
++      }
++    bind(Q0_ZERO_CMP_DONE);
++      bge(R0, ih, IH_HANDLED);
++
++    block_comment("if(ih>) {"); {
++      // use rscratch2 as carry
++
++      block_comment("for(i=0;i<jz ;i++) {...}"); {
++          addi_w(n, n, 1);
++          xorr(i, i, i);
++          xorr(SCR2, SCR2, SCR2);
++        bind(IH_FOR);
++          alsl_d(j, i, iqBase, 2 - 1);
++          ld_w(j, j, 0);                                   // j = iq[i]
++          li(tmp3, 0x1000000);
++          sub_w(tmp3, tmp3, SCR2);
++          bnez(SCR2, IH_FOR_STORE);
++          beqz(j, IH_FOR_INCREMENT);
++          li(SCR2, 1);
++        bind(IH_FOR_STORE);
++          sub_w(tmp3, tmp3, j);
++          alsl_d(tmp1, i, iqBase, 2 - 1);
++          st_w(tmp3, tmp1, 0);                             // iq[i] = 0xffffff - j
++        bind(IH_FOR_INCREMENT);
++          addi_w(i, i, 1);
++          blt(i, jz, IH_FOR);
++      }
++
++      block_comment("if(q0>0) {"); {
++        bge(R0, SCR1, IH_AFTER_SWITCH);
++        // tmp3 still has iq[jz-1] value. no need to reload
++        // now, zero high tmp3 bits (rscratch1 number of bits)
++        li(j, 0xffffffff);
++        addi_w(i, jz, -1);                                 // set i to jz-1
++        srl_d(j, j, SCR1);
++        srli_w(tmp1, j, 8);
++        andr(tmp3, tmp3, tmp1);                            // we have 24-bit-based constants
++        alsl_d(tmp1, i, iqBase, 2 - 1);
++        st_w(tmp3, tmp1, 0);                               // save iq[jz-1]
++      }
++      bind(IH_AFTER_SWITCH);
++        li(tmp1, 2);
++        bne(ih, tmp1, IH_HANDLED);
++
++        block_comment("if(ih==2) {"); {
++          vldi(v25, -912);                                 // 1.0 (0x3ff0000000000000)
++          fsub_d(v18, v25, v18);                           // z = one - z;
++          beqz(SCR2, IH_HANDLED);
++          fsub_d(v18, v18, v30);                           // z -= scalbnA(one,q0);
++        }
++    }
++    bind(IH_HANDLED);
++      // check if recomputation is needed
++      vxor_v(vt, vt, vt);
++      fcmp_cne_d(FCC0, v18, vt);
++      bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO);
++
++      block_comment("if(z==zeroB) {"); {
++
++        block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); {
++            addi_w(i, jz, -1);
++            xorr(j, j, j);
++            b(RECOMP_FOR1_CHECK);
++          bind(RECOMP_FOR1);
++            alsl_d(tmp1, i, iqBase, 2 - 1);
++            ld_w(tmp1, tmp1, 0);
++            orr(j, j, tmp1);
++            addi_w(i, i, -1);
++          bind(RECOMP_FOR1_CHECK);
++            li(SCR2, 4);
++            bge(i, SCR2, RECOMP_FOR1);
++        }
++        bnez(j, RECOMP_CHECK_DONE);
++
++        block_comment("if(j==0) {"); {
++            // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read
++            // iq[3], iq[2], iq[1], iq[0] until non-zero value
++            ld_d(tmp1, iqBase, 0);                 // iq[0..3]
++            ld_d(tmp3, iqBase, 8);
++            li(j, 2);
++            masknez(tmp1, tmp1, tmp3);             // set register for further consideration
++            orr(tmp1, tmp1, tmp3);
++            masknez(j, j, tmp3);                   // set initial k. Use j as k
++            srli_d(SCR2, tmp1, 32);
++            sltu(SCR2, R0, SCR2);
++            addi_w(i, jz, 1);
++            add_w(j, j, SCR2);
++
++          block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); {
++              add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz)
++            bind(RECOMP_FOR2);
++              add_w(tmp1, jv, i);
++              alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1);
++              fld_d(v29, SCR2, 0);
++              add_w(tmp2, jx, i);
++              alsl_d(SCR2, tmp2, SP, 3 - 1);
++              fst_d(v29, SCR2, 0);
++              // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++              // since jx = 0, 1 or 2 we can unroll it:
++              // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++              // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29
++              alsl_d(tmp2, tmp2, SP, 3 - 1);     // address of f[jx+i]
++              fld_d(v4, tmp2, -16);              // load f[jx+i-2] and f[jx+i-1]
++              fld_d(v5, tmp2, -8);
++              fmul_d(v26, v6, v29); // initial fw
++              beqz(jx, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v7, v5, v26);
++              li(SCR2, 1);
++              beq(jx, SCR2, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v3, v4, v26);
++            bind(RECOMP_FW_UPDATED);
++              alsl_d(SCR2, i, qBase, 3 - 1);
++              fst_d(v26, SCR2, 0);               // q[i] = fw;
++              addi_w(i, i, 1);
++              bge(jz, i, RECOMP_FOR2);           // jz here is "old jz" + k
++          }
++            b(RECOMPUTE);
++        }
++      }
++    }
++    bind(RECOMP_CHECK_DONE);
++      // chop off zero terms
++      vxor_v(vt, vt, vt);
++      fcmp_ceq_d(FCC0, v18, vt);
++      bcnez(FCC0, Z_IS_ZERO);
++
++      block_comment("else block of if(z==0.0) {"); {
++        bind(RECOMP_CHECK_DONE_NOT_ZERO);
++          fmul_d(v18, v18, v22);
++          fcmp_clt_d(FCC0, v18, v24);                        // v24 is stil two24A
++          bcnez(FCC0, Z_IS_LESS_THAN_TWO24B);
++          fmul_d(v1, v18, v17);                              // twon24*z
++          vfrintrz_d(v1, v1);                                // v1 = (double)(int)(v1)
++          fnmsub_d(v2, v24, v1, v18);
++          ftintrz_w_d(vt, v1);                               // (int)fw
++          movfr2gr_s(tmp3, vt);
++          ftintrz_w_d(vt, v2);                               // double to int
++          movfr2gr_s(tmp2, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);
++          addi_w(SCR1, SCR1, 24);
++          addi_w(jz, jz, 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
++          b(Z_ZERO_CHECK_DONE);
++        bind(Z_IS_LESS_THAN_TWO24B);
++          ftintrz_w_d(vt, v18);                              // (int)z
++          movfr2gr_s(tmp3, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) z
++          b(Z_ZERO_CHECK_DONE);
++      }
++
++      block_comment("if(z==0.0) {"); {
++        bind(Z_IS_ZERO);
++          addi_w(jz, jz, -1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          ld_w(tmp1, SCR2, 0);
++          addi_w(SCR1, SCR1, -24);
++          beqz(tmp1, Z_IS_ZERO);
++      }
++      bind(Z_ZERO_CHECK_DONE);
++        // convert integer "bit" chunk to floating-point value
++        // v17 = twon24
++        // update v30, which was scalbnA(1.0, <old q0>);
++        addi_w(tmp2, SCR1, 1023); // biased exponent
++        slli_d(tmp2, tmp2, 52);   // put at correct position
++        move(i, jz);
++        movgr2fr_d(v30, tmp2);
++
++        block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); {
++          bind(CONVERTION_FOR);
++            alsl_d(SCR2, i, iqBase, 2 - 1);
++            fld_s(v31, SCR2, 0);
++            vffintl_d_w(v31, v31);
++            fmul_d(v31, v31, v30);
++            alsl_d(SCR2, i, qBase, 3 - 1);
++            fst_d(v31, SCR2, 0);
++            fmul_d(v30, v30, v17);
++            addi_w(i, i, -1);
++            bge(i, R0, CONVERTION_FOR);
++        }
++        addi_d(SCR2, SP, 160);   // base for fq
++        // reusing twoOverPiBase
++        li(twoOverPiBase, pio2);
++
++      block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); {
++          move(i, jz);
++          move(tmp2, R0); // tmp2 will keep jz - i == 0 at start
++        bind(COMP_FOR);
++          // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++          vxor_v(v30, v30, v30);
++          alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0
++          li(tmp3, 4);
++          slti(tmp4, tmp2, 5);
++          alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address
++          masknez(tmp3, tmp3, tmp4);     // min(jz - i, jp);
++          maskeqz(tmp4, tmp2, tmp4);
++          orr(tmp3, tmp3, tmp4);
++          move(tmp4, R0);                // used as k
++
++          block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); {
++            bind(COMP_INNER_LOOP);
++              alsl_d(tmp5, tmp4, tmp1, 3 - 1);
++              fld_d(v18, tmp5, 0);                                      // q[i+k]
++              alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1);
++              fld_d(v19, tmp5, 0);                                      // PIo2[k]
++              fmadd_d(v30, v18, v19, v30);                              // fw += PIo2[k]*q[i+k];
++              addi_w(tmp4, tmp4, 1);                                    // k++
++              bge(tmp3, tmp4, COMP_INNER_LOOP);
++          }
++          alsl_d(tmp5, tmp2, SCR2, 3 - 1);
++          fst_d(v30, tmp5, 0);                                          // fq[jz-i]
++          addi_d(tmp2, tmp2, 1);
++          addi_w(i, i, -1);
++          bge(i, R0, COMP_FOR);
++      }
++
++      block_comment("switch(prec) {...}. case 2:"); {
++        // compress fq into y[]
++        // remember prec == 2
++
++        block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); {
++            vxor_v(v4, v4, v4);
++            move(i, jz);
++          bind(FW_FOR1);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, -1);
++            fadd_d(v4, v4, v1);
++            bge(i, R0, FW_FOR1);
++        }
++        bind(FW_FOR1_DONE);
++          // v1 contains fq[0]. so, keep it so far
++          fsub_d(v5, v1, v4); // fw = fq[0] - fw
++          beqz(ih, FW_Y0_NO_NEGATION);
++          fneg_d(v4, v4);
++        bind(FW_Y0_NO_NEGATION);
++
++        block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); {
++            li(i, 1);
++            blt(jz, i, FW_FOR2_DONE);
++          bind(FW_FOR2);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, 1);
++            fadd_d(v5, v5, v1);
++            bge(jz, i, FW_FOR2);
++        }
++        bind(FW_FOR2_DONE);
++          beqz(ih, FW_Y1_NO_NEGATION);
++          fneg_d(v5, v5);
++        bind(FW_Y1_NO_NEGATION);
++          addi_d(SP, SP, 560);
++      }
++}
++
++///* __kernel_sin( x, y, iy)
++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
++// *
++// * Algorithm
++// *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
++// *      3. sin(x) is approximated by a polynomial of degree 13 on
++// *         [0,pi/4]
++// *                               3            13
++// *              sin(x) ~ x + S1*x + ... + S6*x
++// *         where
++// *
++// *      |sin(x)         2     4     6     8     10     12  |     -58
++// *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++// *      |  x                                               |
++// *
++// *      4. sin(x+y) = sin(x) + sin'(x')*y
++// *                  ~ sin(x) + (1-x*x/2)*y
++// *         For better accuracy, let
++// *                   3      2      2      2      2
++// *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++// *         then                   3    2
++// *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++// */
++//static const double
++//S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++//S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++//S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++//S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++//S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++//S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++//
++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef
++//
++// BEGIN __kernel_sin PSEUDO CODE
++//
++//static double __kernel_sin(double x, double y, bool iy)
++//{
++//        double z,r,v;
++//
++//        // NOTE: not needed. moved to dsin/dcos
++//        //int ix;
++//        //ix = high(x)&0x7fffffff;                /* high word of x */
++//
++//        // NOTE: moved to dsin/dcos
++//        //if(ix<0x3e400000)                       /* |x| < 2**-27 */
++//        //   {if((int)x==0) return x;}            /* generate inexact */
++//
++//        z       =  x*x;
++//        v       =  z*x;
++//        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
++//        if(iy==0) return x+v*(S1+z*r);
++//        else      return x-((z*(half*y-v*r)-y)-v*S1);
++//}
++//
++// END __kernel_sin PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dsin_coef
++//     3. C code parameter "int iy" was modified to "bool iyIsOne", because
++//         iy is always 0 or 1. Also, iyIsOne branch was moved into
++//         generation phase instead of taking it during code execution
++// Input ans output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, iyIsOne
++//         = flag to use low argument low part or not, dsin_coef = coefficients
++//         table address
++//     3. Return sin(x) value in FA0
++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) {
++  FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2,
++                s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7;
++  li(SCR2, dsin_coef);
++  fld_d(s5, SCR2, 32);
++  fld_d(s6, SCR2, 40);
++  fmul_d(z, x, x); // z =  x*x;
++  fld_d(s1, SCR2, 0);
++  fld_d(s2, SCR2, 8);
++  fld_d(s3, SCR2, 16);
++  fld_d(s4, SCR2, 24);
++  fmul_d(v, z, x); // v =  z*x;
++
++  block_comment("calculate r =  S2+z*(S3+z*(S4+z*(S5+z*S6)))"); {
++    fmadd_d(r, z, s6, s5);
++    // initialize "half" in current block to utilize 2nd FPU. However, it's
++    // not a part of this block
++    vldi(half, -928);       // 0.5 (0x3fe0000000000000)
++    fmadd_d(r, z, r, s4);
++    fmadd_d(r, z, r, s3);
++    fmadd_d(r, z, r, s2);
++  }
++
++  if (!iyIsOne) {
++    // return x+v*(S1+z*r);
++    fmadd_d(s1, z, r, s1);
++    fmadd_d(FA0, v, s1, x);
++  } else {
++    // return x-((z*(half*y-v*r)-y)-v*S1);
++    fmul_d(s6, half, y);    // half*y
++    fnmsub_d(s6, v, r, s6); // half*y-v*r
++    fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y)
++    fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1)
++    fadd_d(FA0, x, s6);
++  }
++}
++
++///*
++// * __kernel_cos( x,  y )
++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// *
++// * Algorithm
++// *      1. Since cos(-x) = cos(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
++// *      3. cos(x) is approximated by a polynomial of degree 14 on
++// *         [0,pi/4]
++// *                                       4            14
++// *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
++// *         where the remez error is
++// *
++// *      |              2     4     6     8     10    12     14 |     -58
++// *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
++// *      |                                                      |
++// *
++// *                     4     6     8     10    12     14
++// *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
++// *             cos(x) = 1 - x*x/2 + r
++// *         since cos(x+y) ~ cos(x) - sin(x)*y
++// *                        ~ cos(x) - x*y,
++// *         a correction term is necessary in cos(x) and hence
++// *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
++// *         For better accuracy when x > 0.3, let qx = |x|/4 with
++// *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
++// *         Then
++// *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
++// *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
++// *         magnitude of the latter is at least a quarter of x*x/2,
++// *         thus, reducing the rounding error in the subtraction.
++// */
++//
++//static const double
++//C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
++//C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
++//C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
++//C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
++//C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
++//C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
++//
++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef
++//
++// BEGIN __kernel_cos PSEUDO CODE
++//
++//static double __kernel_cos(double x, double y)
++//{
++//  double a,h,z,r,qx=0;
++//
++//  // NOTE: ix is already initialized in dsin/dcos. Reuse value from register
++//  //int ix;
++//  //ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
++//
++//  // NOTE: moved to dsin/dcos
++//  //if(ix<0x3e400000) {                   /* if x < 2**27 */
++//  //  if(((int)x)==0) return one;         /* generate inexact */
++//  //}
++//
++//  z  = x*x;
++//  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
++//  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
++//    return one - (0.5*z - (z*r - x*y));
++//  else {
++//    if(ix > 0x3fe90000) {               /* x > 0.78125 */
++//      qx = 0.28125;
++//    } else {
++//      set_high(&qx, ix-0x00200000); /* x/4 */
++//      set_low(&qx, 0);
++//    }
++//    h = 0.5*z-qx;
++//    a = one-qx;
++//    return a - (h - (z*r-x*y));
++//  }
++//}
++//
++// END __kernel_cos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dcos_coef
++// Input and output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, dcos_coef
++//        = coefficients table address
++//     3. Return cos(x) value in FA0
++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) {
++  Register ix = A3;
++  FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0,
++        C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7;
++  Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET;
++    li(SCR2, dcos_coef);
++    fld_d(C1, SCR2, 0);
++    fld_d(C2, SCR2, 8);
++    fld_d(C3, SCR2, 16);
++    fld_d(C4, SCR2, 24);
++    fld_d(C5, SCR2, 32);
++    fld_d(C6, SCR2, 40);
++    fmul_d(z, x, x);                               // z=x^2
++    block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); {
++      fmadd_d(r, z, C6, C5);
++      vldi(half, -928);                            // 0.5 (0x3fe0000000000000)
++      fmadd_d(r, z, r, C4);
++      fmul_d(y, x, y);
++      fmadd_d(r, z, r, C3);
++      li(SCR1, 0x3FD33333);
++      fmadd_d(r, z, r, C2);
++      fmul_d(x, z, z);                             // x = z^2
++      fmadd_d(r, z, r, C1);                        // r = C1+z(C2+z(C4+z(C5+z*C6)))
++    }
++    // need to multiply r by z to have "final" r value
++    vldi(one, -912);                               // 1.0 (0x3ff0000000000000)
++    bge(ix, SCR1, IX_IS_LARGE);
++    block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); {
++      // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r))
++      fnmsub_d(FA0, x, r, y);
++      fmadd_d(FA0, half, z, FA0);
++      fsub_d(FA0, one, FA0);
++      b(DONE);
++    }
++  block_comment("if(ix >= 0x3FD33333)"); {
++    bind(IX_IS_LARGE);
++      li(SCR2, 0x3FE90000);
++      blt(SCR2, ix, SET_QX_CONST);
++      block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); {
++        li(SCR2, 0x00200000);
++        sub_w(SCR2, ix, SCR2);
++        slli_d(SCR2, SCR2, 32);
++        movgr2fr_d(qx, SCR2);
++      }
++      b(QX_SET);
++    bind(SET_QX_CONST);
++      block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); {
++        vldi(qx, -942);         // 0.28125 (0x3fd2000000000000)
++      }
++    bind(QX_SET);
++      fmsub_d(C6, x, r, y);     // z*r - xy
++      fmsub_d(h, half, z, qx);  // h = 0.5*z - qx
++      fsub_d(a, one, qx);       // a = 1-qx
++      fsub_d(C6, h, C6);        // = h - (z*r - x*y)
++      fsub_d(FA0, a, C6);
++  }
++  bind(DONE);
++}
++
++// generate_dsin_dcos creates stub for dsin and dcos
++// Generation is done via single call because dsin and dcos code is almost the
++// same(see C code below). These functions work as follows:
++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27
++// 2) perform argument reduction if required
++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial
++//
++// BEGIN dsin/dcos PSEUDO CODE
++//
++//dsin_dcos(jdouble x, bool isCos) {
++//  double y[2],z=0.0;
++//  int n, ix;
++//
++//  /* High word of x. */
++//  ix = high(x);
++//
++//  /* |x| ~< pi/4 */
++//  ix &= 0x7fffffff;
++//  if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0);
++//
++//  /* sin/cos(Inf or NaN) is NaN */
++//  else if (ix>=0x7ff00000) return x-x;
++//  else if (ix<0x3e400000) {                   /* if ix < 2**27 */
++//    if(((int)x)==0) return isCos ? one : x;         /* generate inexact */
++//  }
++//  /* argument reduction needed */
++//  else {
++//    n = __ieee754_rem_pio2(x,y);
++//    switch(n&3) {
++//    case 0: return isCos ?  __kernel_cos(y[0],y[1])      :  __kernel_sin(y[0],y[1], true);
++//    case 1: return isCos ? -__kernel_sin(y[0],y[1],true) :  __kernel_cos(y[0],y[1]);
++//    case 2: return isCos ? -__kernel_cos(y[0],y[1])      : -__kernel_sin(y[0],y[1], true);
++//    default:
++//      return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]);
++//    }
++//  }
++//}
++// END dsin/dcos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos
++//     2. Final switch use equivalent bit checks(tbz/tbnz)
++// Input ans output:
++//     1. Input for generated function: X = A0
++//     2. Input for generator: isCos = generate sin or cos, npio2_hw = address
++//         of npio2_hw table, two_over_pi = address of two_over_pi table,
++//         pio2 = address if pio2 table, dsin_coef = address if dsin_coef table,
++//         dcos_coef = address of dcos_coef table
++//     3. Return result in FA0
++// NOTE: general purpose register names match local variable names in C code
++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
++                                        address two_over_pi, address pio2,
++                                        address dsin_coef, address dcos_coef) {
++  Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE;
++  Register X = A0, absX = A1, n = A2, ix = A3;
++  FloatRegister y0 = FA4, y1 = FA5;
++
++    block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); {
++      movfr2gr_d(X, FA0);
++      li(SCR2, 0x3e400000);
++      li(SCR1, 0x3fe921fb);                          // high word of pi/4.
++      bstrpick_d(absX, X, 62, 0);                    // absX
++      li(T0, 0x7ff0000000000000);
++      srli_d(ix, absX, 32);                          // set ix
++      blt(ix, SCR2, TINY_X);                         // handle tiny x (|x| < 2^-27)
++      bge(SCR1, ix, EARLY_CASE);                     // if(ix <= 0x3fe921fb) return
++      blt(absX, T0, ARG_REDUCTION);
++      // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0).
++      // Set last bit unconditionally to make it NaN
++      ori(T0, T0, 1);
++      movgr2fr_d(FA0, T0);
++      jr(RA);
++    }
++  block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); {
++    bind(TINY_X);
++      if (isCos) {
++        vldi(FA0, -912);                             // 1.0 (0x3ff0000000000000)
++      }
++      jr(RA);
++  }
++  bind(ARG_REDUCTION); /* argument reduction needed */
++    block_comment("n = __ieee754_rem_pio2(x,y);"); {
++      generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2);
++    }
++    block_comment("switch(n&3) {case ... }"); {
++      if (isCos) {
++        srli_w(T0, n, 1);
++        xorr(absX, n, T0);
++        andi(T0, n, 1);
++        bnez(T0, RETURN_SIN);
++      } else {
++        andi(T0, n, 1);
++        beqz(T0, RETURN_SIN);
++      }
++      generate_kernel_cos(y0, dcos_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    bind(RETURN_SIN);
++      generate_kernel_sin(y0, true, dsin_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    }
++  bind(EARLY_CASE);
++    vxor_v(y1, y1, y1);
++    if (isCos) {
++      generate_kernel_cos(FA0, dcos_coef);
++    } else {
++      generate_kernel_sin(FA0, false, dsin_coef);
++    }
++  bind(DONE);
++    jr(RA);
++}
+diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
+new file mode 100644
+index 0000000000..e517dcd415
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
+@@ -0,0 +1,564 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T8 RT8
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ li(AT, ref_kind);
++  __ beq(temp, AT, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T4);
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld_d(T4, method, in_bytes(entry_offset));
++  __ jr(T4);
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld_d(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(AT, recv_addr);
++    __ beq(recv, AT, L);
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T4: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t4_argp   = T4;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm12! Change the instructions.");
++    __ addi_d(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t4_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld_d(t4_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t4_argp,
++                        Address(t4_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t4_first_arg_addr = __ argument_address(t4_argp, -1);
++  } else {
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld_d(s7_mh, t4_first_arg_addr);
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  // t4_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld_d(r_recv = T2, t4_first_arg_addr);
++    }
++    DEBUG_ONLY(t4_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T4;
++  Register temp3 = T5;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ blt(R0, temp2_index, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ bge(rm_index, R0, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp
+new file mode 100644
+index 0000000000..f84337424b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return R3;
++  }
+diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
+new file mode 100644
+index 0000000000..9234befae3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
+@@ -0,0 +1,511 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#include <sys/mman.h>
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++bool NativeInstruction::is_int_branch() {
++  int op = Assembler::high(insn_word(), 6);
++  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
++         op == Assembler::beq_op  || op == Assembler::bne_op  ||
++         op == Assembler::blt_op  || op == Assembler::bge_op  ||
++         op == Assembler::bltu_op || op == Assembler::bgeu_op;
++}
++
++bool NativeInstruction::is_float_branch() {
++  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
++}
++
++bool NativeInstruction::is_lu12iw_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeInstruction::is_pcaddu12i_add() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op &&
++         Assembler::high(int_at(4), 10)   == Assembler::addi_d_op;
++}
++
++bool NativeCall::is_bl() const {
++  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
++}
++
++void NativeCall::verify() {
++  assert(is_bl(), "not a NativeCall");
++}
++
++address NativeCall::target_addr_for_bl(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  // bl
++  if (is_bl()) {
++    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                              ((int_at(0) >> 10) & 0xffff)) << 2);
++  }
++
++  fatal("not a NativeCall");
++  return NULL;
++}
++
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_bl();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(destination);
++    if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++
++  // Patch the call.
++  if (!reachable) {
++    address trampoline_stub_addr = get_trampoline();
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub");
++
++    // Patch the constant in the call's trampoline stub.
++    NativeInstruction* ni = nativeInstruction_at(dest);
++    assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++    dest = trampoline_stub_addr;
++  }
++  set_destination(dest);
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  address bl_destination
++    = nativeCall_at(call_addr)->target_addr_for_bl();
++  NativeInstruction* ni = nativeInstruction_at(bl_destination);
++  if (code->contains(bl_destination) &&
++      ni->is_NativeCallTrampolineStub_at())
++    return bl_destination;
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++
++  return NULL;
++}
++
++void NativeCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_call_at(addr_call), "unexpected call type");
++  jlong offs = dest - addr_call;
++  masm.bl(offs >> 2);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++// Generate a trampoline for a branch to dest.  If there's no need for a
++// trampoline, simply patch the call directly to dest.
++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) {
++  MacroAssembler a(&cbuf);
++  address stub = NULL;
++
++  if (a.far_branches()
++      && ! is_NativeCallTrampolineStub_at()) {
++    stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest);
++  }
++
++  if (stub == NULL) {
++    // If we generated no stub, patch this call directly to dest.
++    // This will happen if we don't need far branches or if there
++    // already was a trampoline.
++    set_destination(dest);
++  }
++
++  return stub;
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++bool NativeFarCall::is_short() const {
++  return Assembler::high(int_at(0), 10) == Assembler::andi_op &&
++         Assembler::low(int_at(0), 22) == 0 &&
++         Assembler::high(int_at(4), 6) == Assembler::bl_op;
++}
++
++bool NativeFarCall::is_far() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == RA->encoding();
++}
++
++address NativeFarCall::destination(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  if (is_short()) {
++  // short
++    return addr + BytesPerInstWord +
++           (Assembler::simm26(((int_at(4) & 0x3ff) << 16) |
++                              ((int_at(4) >> 10) & 0xffff)) << 2);
++  }
++
++  if (is_far()) {
++  // far
++    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++  }
++
++  fatal("not a NativeFarCall");
++  return NULL;
++}
++
++void NativeFarCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_far_call_at(addr_call), "unexpected call type");
++  masm.patchable_call(dest, addr_call);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++void NativeFarCall::verify() {
++  assert(is_short() || is_far(), "not a NativeFarcall");
++}
++
++//-------------------------------------------------------------------
++
++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_2nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_ori_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_addid_2nop() const {
++  return Assembler::high(int_at(0), 10)  == Assembler::addi_d_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op   &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++void NativeMovConstReg::verify() {
++  assert(is_li52(), "not a mov reg, imm52");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  if (is_lu12iw_ori_lu32id()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(8)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_lu32id_nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(4)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_2nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_ori_nop()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_addid_2nop()) {
++    return Assembler::simm12((int_at(0) >> 10) & 0xfff);
++  }
++
++#ifndef PRODUCT
++  Disassembler::decode(addr_at(0), addr_at(0) + 16, tty);
++#endif
++  fatal("not a mov reg, imm52");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_li52(as_Register(int_at(0) & 0x1f), x);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return 0; // mute compiler
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++void NativeMovRegMem::verify() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++
++void NativeMovRegMem::print() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(is_short() || is_far(), "not a general jump instruction");
++}
++
++bool NativeJump::is_short() {
++  return Assembler::high(insn_word(), 6) == Assembler::b_op;
++}
++
++bool NativeJump::is_far() {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == R0->encoding();
++}
++
++address NativeJump::jump_destination(address orig_addr) {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++  address ret = (address)-1;
++
++  // short
++  if (is_short()) {
++    ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                                    ((int_at(0) >> 10) & 0xffff)) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  // far
++  if (is_far()) {
++    ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  fatal("not a jump");
++  return NULL;
++}
++
++void NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_jump(dest);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  jlong offs = dest - verified_entry;
++
++  if (MacroAssembler::reachable_from_branch_short(offs)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.b(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     st_w    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lu12i_w  t2, 0x40
++  //  0x000000ffe5815134: st_w  v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                           ;*goto
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: ld_w  at, 0x0(t2)    ;*goto       <---  PC
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like this.
++  return Assembler::high(insn_word(), 10) == Assembler::ld_w_op &&
++         Assembler::low(insn_word(), 5)   == AT->encoding();
++}
+diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
+new file mode 100644
+index 0000000000..a6e9d4dd3c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
+@@ -0,0 +1,528 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum loongarch_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf,
++    sync_instruction_size       =    4
++  };
++
++  bool is_nop()                        { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_far_call();
++  inline bool is_illegal();
++  bool is_jump();
++  bool is_safepoint_poll();
++
++  // Helper func for jvmci
++  bool is_lu12iw_lu32id() const;
++  bool is_pcaddu12i_add() const;
++
++  // LoongArch has no instruction to generate a illegal instrucion exception?
++  // But `break  11` is not illegal instruction for LoongArch.
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_NativeCallTrampolineStub_at();
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++
++// The NativeCall is an abstraction for accessing/manipulating native call
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++class NativeCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 1 * BytesPerInstWord,
++    return_address_offset = 1 * BytesPerInstWord,
++    displacement_offset   = 0
++  };
++
++  // We have only bl.
++  bool is_bl() const;
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const {
++    return addr_at(return_address_offset);
++  }
++
++  address return_address() const {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_bl(address orig_addr = 0) const;
++  address destination() const;
++  void set_destination(address dest);
++
++  void verify_alignment() {}
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset);
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate bl
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++  address trampoline_jump(CodeBuffer &cbuf, address dest);
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// The NativeFarCall is an abstraction for accessing/manipulating native
++// call-anywhere instructions.
++// Used to call native methods which may be loaded anywhere in the address
++// space, possibly out of reach of a call instruction.
++class NativeFarCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 2 * BytesPerInstWord
++  };
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  // We use MacroAssembler::patchable_call() for implementing a
++  // call-anywhere instruction.
++  bool is_short() const;
++  bool is_far() const;
++
++  // Checks whether instr points at a NativeFarCall instruction.
++  static bool is_far_call_at(address address) {
++    return nativeInstruction_at(address)->is_far_call();
++  }
++
++  // Returns the NativeFarCall's destination.
++  address destination(address orig_addr = 0) const;
++
++  // Sets the NativeFarCall's destination, not necessarily mt-safe.
++  // Used when relocating code.
++  void set_destination(address dest);
++
++  void verify();
++};
++
++// Instantiates a NativeFarCall object starting at the given instruction
++// address and returns the NativeFarCall object.
++inline NativeFarCall* nativeFarCall_at(address address) {
++  NativeFarCall* call = (NativeFarCall*)address;
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// An interface for accessing/manipulating native set_oop imm, reg instructions
++// (used to manipulate inlined data references, etc.).
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    =    0,
++    instruction_size          =    3 * BytesPerInstWord,
++    next_instruction_offset   =    3 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  bool is_li52() const {
++    return is_lu12iw_ori_lu32id() ||
++           is_lu12iw_lu32id_nop() ||
++           is_lu12iw_2nop() ||
++           is_lu12iw_ori_nop() ||
++           is_addid_2nop();
++  }
++  bool is_lu12iw_ori_lu32id() const;
++  bool is_lu12iw_lu32id_nop() const;
++  bool is_lu12iw_2nop() const;
++  bool is_lu12iw_ori_nop() const;
++  bool is_addid_2nop() const;
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size = 4,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson.
++//   short:
++//     b offs26
++//     nop
++//
++//   far:
++//     pcaddu18i reg, si20
++//     jirl  r0, reg, si18
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size   = 2 * BytesPerInstWord
++  };
++
++  bool is_short();
++  bool is_far();
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination(address orig_addr = 0);
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry){}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum loongarch_specific_constants {
++    instruction_code        = 0xbadc0de0, // TODO: LA
++                                          // Temporary LoongArch reserved instruction
++    instruction_size        = 4,
++    instruction_offset      = 0,
++    next_instruction_offset = 4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call() {
++  NativeCall *call = (NativeCall*)instruction_address();
++  return call->is_bl();
++}
++
++inline bool NativeInstruction::is_far_call() {
++  NativeFarCall *call = (NativeFarCall*)instruction_address();
++
++  // short
++  if (call->is_short()) {
++    return true;
++  }
++
++  // far
++  if (call->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++inline bool NativeInstruction::is_jump()
++{
++  NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address();
++
++  // short
++  if (jump->is_short()) {
++    return true;
++  }
++
++  // far
++  if (jump->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum la_specific_constants {
++    instruction_size            =    6 * 4,
++    instruction_offset          =    0,
++    data_offset                 =    4 * 4,
++    next_instruction_offset     =    6 * 4
++  };
++
++  address destination() const {
++    return (address)ptr_at(data_offset);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(data_offset, (intptr_t)new_destination);
++    OrderAccess::fence();
++  }
++};
++
++// Note: Other stubs must not begin with this pattern.
++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() {
++  // pcaddi
++  // ld_d
++  // jirl
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op &&
++         Assembler::high(int_at(4), 10) == Assembler::ld_d_op &&
++         Assembler::high(int_at(8), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(8), 5)  == R0->encoding();
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
++
++class NativeMembar : public NativeInstruction {
++public:
++  unsigned int get_hint() { return Assembler::low(insn_word(), 4); }
++  void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); }
++};
++
++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
+new file mode 100644
+index 0000000000..e9f0fc280d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
+new file mode 100644
+index 0000000000..58f40b747c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_loongarch.hpp"
++#ifdef TARGET_ARCH_MODEL_loongarch_32
++# include "interp_masm_loongarch_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, r0);
++REGISTER_DEFINITION(Register, r1);
++REGISTER_DEFINITION(Register, r2);
++REGISTER_DEFINITION(Register, r3);
++REGISTER_DEFINITION(Register, r4);
++REGISTER_DEFINITION(Register, r5);
++REGISTER_DEFINITION(Register, r6);
++REGISTER_DEFINITION(Register, r7);
++REGISTER_DEFINITION(Register, r8);
++REGISTER_DEFINITION(Register, r9);
++REGISTER_DEFINITION(Register, r10);
++REGISTER_DEFINITION(Register, r11);
++REGISTER_DEFINITION(Register, r12);
++REGISTER_DEFINITION(Register, r13);
++REGISTER_DEFINITION(Register, r14);
++REGISTER_DEFINITION(Register, r15);
++REGISTER_DEFINITION(Register, r16);
++REGISTER_DEFINITION(Register, r17);
++REGISTER_DEFINITION(Register, r18);
++REGISTER_DEFINITION(Register, r19);
++REGISTER_DEFINITION(Register, r20);
++REGISTER_DEFINITION(Register, r21);
++REGISTER_DEFINITION(Register, r22);
++REGISTER_DEFINITION(Register, r23);
++REGISTER_DEFINITION(Register, r24);
++REGISTER_DEFINITION(Register, r25);
++REGISTER_DEFINITION(Register, r26);
++REGISTER_DEFINITION(Register, r27);
++REGISTER_DEFINITION(Register, r28);
++REGISTER_DEFINITION(Register, r29);
++REGISTER_DEFINITION(Register, r30);
++REGISTER_DEFINITION(Register, r31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp
+new file mode 100644
+index 0000000000..54d90167a5
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_loongarch.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                  FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0",
++    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
++const char* ConditionalFlagRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "fcc0",  "fcc1",   "fcc2",  "fcc3",   "fcc4",  "fcc5",   "fcc6",  "fcc7",
++  };
++  return is_valid() ? names[encoding()] : "fccnoreg";
++}
+diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp
+new file mode 100644
+index 0000000000..da876a5083
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp
+@@ -0,0 +1,495 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "utilities/formatBuffer.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++// The integer registers of the LoongArch architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, r10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, r11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, r12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, r13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, r14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, r15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, r16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, r17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, r18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, r19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, r20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, r21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, r22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, r23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, r24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, r25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, r26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, r27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, r28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, r29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, r30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, r31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define R0  ((Register)(r0_RegisterEnumValue))
++#define R1  ((Register)(r1_RegisterEnumValue))
++#define R2  ((Register)(r2_RegisterEnumValue))
++#define R3  ((Register)(r3_RegisterEnumValue))
++#define R4  ((Register)(r4_RegisterEnumValue))
++#define R5  ((Register)(r5_RegisterEnumValue))
++#define R6  ((Register)(r6_RegisterEnumValue))
++#define R7  ((Register)(r7_RegisterEnumValue))
++#define R8  ((Register)(r8_RegisterEnumValue))
++#define R9  ((Register)(r9_RegisterEnumValue))
++#define R10 ((Register)(r10_RegisterEnumValue))
++#define R11 ((Register)(r11_RegisterEnumValue))
++#define R12 ((Register)(r12_RegisterEnumValue))
++#define R13 ((Register)(r13_RegisterEnumValue))
++#define R14 ((Register)(r14_RegisterEnumValue))
++#define R15 ((Register)(r15_RegisterEnumValue))
++#define R16 ((Register)(r16_RegisterEnumValue))
++#define R17 ((Register)(r17_RegisterEnumValue))
++#define R18 ((Register)(r18_RegisterEnumValue))
++#define R19 ((Register)(r19_RegisterEnumValue))
++#define R20 ((Register)(r20_RegisterEnumValue))
++#define R21 ((Register)(r21_RegisterEnumValue))
++#define R22 ((Register)(r22_RegisterEnumValue))
++#define R23 ((Register)(r23_RegisterEnumValue))
++#define R24 ((Register)(r24_RegisterEnumValue))
++#define R25 ((Register)(r25_RegisterEnumValue))
++#define R26 ((Register)(r26_RegisterEnumValue))
++#define R27 ((Register)(r27_RegisterEnumValue))
++#define R28 ((Register)(r28_RegisterEnumValue))
++#define R29 ((Register)(r29_RegisterEnumValue))
++#define R30 ((Register)(r30_RegisterEnumValue))
++#define R31 ((Register)(r31_RegisterEnumValue))
++
++
++#define RA           R1
++#define TP           R2
++#define SP           R3
++#define A0           R4
++#define A1           R5
++#define A2           R6
++#define A3           R7
++#define A4           R8
++#define A5           R9
++#define A6           R10
++#define A7           R11
++#define RT0          R12
++#define RT1          R13
++#define RT2          R14
++#define RT3          R15
++#define RT4          R16
++#define RT5          R17
++#define RT6          R18
++#define RT7          R19
++#define RT8          R20
++#define RX           R21
++#define FP           R22
++#define S0           R23
++#define S1           R24
++#define S2           R25
++#define S3           R26
++#define S4           R27
++#define S5           R28
++#define S6           R29
++#define S7           R30
++#define S8           R31
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++#define V0       A0
++#define V1       A1
++
++#define SCR1     RT7
++#define SCR2     RT4
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define S5_heapbase    S5
++
++#define FSR            V0
++#define SSR            T6
++#define FSF            FV0
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++// ---------- Scratch Register ----------
++#define AT             RT7
++#define fscratch       F23
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    save_slots_per_register = 2,
++    slots_per_lsx_register  = 4,
++    slots_per_lasx_register = 8,
++    max_slots_per_register  = 8
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++
++#define FA0    F0
++#define FA1    F1
++#define FA2    F2
++#define FA3    F3
++#define FA4    F4
++#define FA5    F5
++#define FA6    F6
++#define FA7    F7
++
++#define FV0    F0
++#define FV1    F1
++
++#define FT0    F8
++#define FT1    F9
++#define FT2    F10
++#define FT3    F11
++#define FT4    F12
++#define FT5    F13
++#define FT6    F14
++#define FT7    F15
++#define FT8    F16
++#define FT9    F17
++#define FT10   F18
++#define FT11   F19
++#define FT12   F20
++#define FT13   F21
++#define FT14   F22
++#define FT15   F23
++
++#define FS0    F24
++#define FS1    F25
++#define FS2    F26
++#define FS3    F27
++#define FS4    F28
++#define FS5    F29
++#define FS6    F30
++#define FS7    F31
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use ConditionalFlagRegister as shortcut
++class ConditionalFlagRegisterImpl;
++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister;
++
++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) {
++  return (ConditionalFlagRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++//    conditionalflag_arg_base      = 12,
++    number_of_registers = 8
++  };
++
++  // construction
++  inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  ConditionalFlagRegister successor() const                          { return as_ConditionalFlagRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7     , ( 7));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue))
++#define FCC0     ((ConditionalFlagRegister)(    fcc0_ConditionalFlagRegisterEnumValue))
++#define FCC1     ((ConditionalFlagRegister)(    fcc1_ConditionalFlagRegisterEnumValue))
++#define FCC2     ((ConditionalFlagRegister)(    fcc2_ConditionalFlagRegisterEnumValue))
++#define FCC3     ((ConditionalFlagRegister)(    fcc3_ConditionalFlagRegisterEnumValue))
++#define FCC4     ((ConditionalFlagRegister)(    fcc4_ConditionalFlagRegisterEnumValue))
++#define FCC5     ((ConditionalFlagRegister)(    fcc5_ConditionalFlagRegisterEnumValue))
++#define FCC6     ((ConditionalFlagRegister)(    fcc6_ConditionalFlagRegisterEnumValue))
++#define FCC7     ((ConditionalFlagRegister)(    fcc7_ConditionalFlagRegisterEnumValue))
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                          FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++// A set of registers
++template <class RegImpl>
++class AbstractRegSet {
++  uint32_t _bitset;
++
++  AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }
++
++public:
++
++  AbstractRegSet() : _bitset(0) { }
++
++  AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }
++
++  AbstractRegSet operator+(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset | aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet operator-(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet &operator+=(const AbstractRegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
++
++  AbstractRegSet &operator-=(const AbstractRegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
++
++  static AbstractRegSet of(RegImpl r1) {
++    return AbstractRegSet(r1);
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2) {
++    return of(r1) + r2;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
++    return of(r1, r2) + r3;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
++    return of(r1, r2, r3) + r4;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) {
++    return of(r1, r2, r3, r4) + r5;
++  }
++
++  static AbstractRegSet range(RegImpl start, RegImpl end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
++
++    return AbstractRegSet(bits);
++  }
++
++  uint32_t bits() const { return _bitset; }
++};
++
++typedef AbstractRegSet<Register> RegSet;
++
++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
+new file mode 100644
+index 0000000000..1caba43699
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (type() == relocInfo::internal_word_type ||
++      type() == relocInfo::section_word_type) {
++    MacroAssembler::pd_patch_instruction(addr(), x);
++  } else if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in LoongArch64");
++  }
++}
++
++
++address Relocation::pd_call_destination(address orig_addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    return nativeFarCall_at(addr())->destination(orig_addr);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    } else {
++      address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr);
++      // If call is branch to self, don't try to relocate it, just leave it
++      // as branch to self. This happens during code generation if the code
++      // buffer expands. It will be relocated to the trampoline above once
++      // code generation is complete.
++      return (new_addr == orig_addr) ? addr() : new_addr;
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination(orig_addr);
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    nativeFarCall_at(addr())->set_destination(x);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, false);
++    } else {
++      nativeCall_at(addr())->set_destination(x);
++    }
++  } else if (ni->is_jump()) {
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
+new file mode 100644
+index 0000000000..c85ca4963f
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since LoongArch instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
+new file mode 100644
+index 0000000000..334c783b37
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_loongarch_64.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T4 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T4
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  address start = __ pc();
++
++  __ addi_d(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ st_d(V1, SP, return_off * wordSize);  // return address
++  __ st_d(FP, SP, fp_off * wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ addi_d(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ st_d(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ st_d(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(thread, NOREG, NOREG, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T4, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ st_d(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ st_d(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T4: exception handler
++  // A1: exception pc
++  __ jr(T4);
++
++  // make sure all code is generated
++  masm->flush();
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
+new file mode 100644
+index 0000000000..736ed0a85f
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
+@@ -0,0 +1,3621 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_JVMCI
++#include "jvmci/jvmciJavaClasses.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  // Capture info about frame layout
++  enum layout {
++    fpr0_off = 0,
++    fpr1_off,
++    fpr2_off,
++    fpr3_off,
++    fpr4_off,
++    fpr5_off,
++    fpr6_off,
++    fpr7_off,
++    fpr8_off,
++    fpr9_off,
++    fpr10_off,
++    fpr11_off,
++    fpr12_off,
++    fpr13_off,
++    fpr14_off,
++    fpr15_off,
++    fpr16_off,
++    fpr17_off,
++    fpr18_off,
++    fpr19_off,
++    fpr20_off,
++    fpr21_off,
++    fpr22_off,
++    fpr23_off,
++    fpr24_off,
++    fpr25_off,
++    fpr26_off,
++    fpr27_off,
++    fpr28_off,
++    fpr29_off,
++    fpr30_off,
++    fpr31_off,
++    a0_off,
++    a1_off,
++    a2_off,
++    a3_off,
++    a4_off,
++    a5_off,
++    a6_off,
++    a7_off,
++    t0_off,
++    t1_off,
++    t2_off,
++    t3_off,
++    t4_off,
++    t5_off,
++    t6_off,
++    t7_off,
++    t8_off,
++    s0_off,
++    s1_off,
++    s2_off,
++    s3_off,
++    s4_off,
++    s5_off,
++    s6_off,
++    s7_off,
++    s8_off,
++    fp_off,
++    ra_off,
++    fpr_size = fpr31_off - fpr0_off + 1,
++    gpr_size = ra_off - a0_off + 1,
++  };
++
++  const bool _save_vectors;
++  public:
++  RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
++
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
++
++  int slots_save() {
++    int slots = gpr_size * VMRegImpl::slots_per_word;
++
++    if (_save_vectors && UseLASX)
++      slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size;
++    else if (_save_vectors && UseLSX)
++      slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size;
++    else
++      slots += FloatRegisterImpl::save_slots_per_register * fpr_size;
++
++    return slots;
++  }
++
++  int gpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++      int slots_per_gpr = VMRegImpl::slots_per_word;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size;
++  }
++
++  int fpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return off * slots_per_fpr * VMRegImpl::stack_slot_size;
++  }
++
++  int ra_offset() { return gpr_offset(ra_off); }
++  int t5_offset() { return gpr_offset(t5_off); }
++  int s3_offset() { return gpr_offset(s3_off); }
++  int v0_offset() { return gpr_offset(a0_off); }
++  int v1_offset() { return gpr_offset(a1_off); }
++
++  int fpr0_offset() { return fpr_offset(fpr0_off); }
++  int fpr1_offset() { return fpr_offset(fpr1_off); }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++
++  *total_frame_words = frame_size_in_words;
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap(frame_size_in_slots, 0);
++
++  // save registers
++  __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size);
++
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvst(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vst(fpr, SP, off);
++    else
++      __ fst_d(fpr, SP, off);
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg());
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++  __ st_d(T8, SP, gpr_offset(t8_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg());
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  __ st_d(FP, SP, gpr_offset(fp_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg());
++  __ st_d(RA, SP, gpr_offset(ra_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg());
++
++  __ addi_d(FP, SP, gpr_offset(fp_off));
++
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvld(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vld(fpr, SP, off);
++    else
++      __ fld_d(fpr, SP, off);
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++  __ ld_d(T8, SP, gpr_offset(t8_off));
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  __ ld_d(FP, SP, gpr_offset(fp_off));
++  __ ld_d(RA, SP, gpr_offset(ra_off));
++
++  __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld_d(V0, SP, gpr_offset(a0_off));
++  __ ld_d(V1, SP, gpr_offset(a1_off));
++
++  __ fld_d(F0, SP, fpr_offset(fpr0_off));
++  __ fld_d(F1, SP, fpr_offset(fpr1_off));
++
++  __ addi_d(SP, SP, gpr_offset(ra_off));
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  return 32;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  // trampoline is not in CodeCache
++  __ li(T4, (long)destination);
++  __ jr(T4);
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
++    T0, A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // T5 isn't live so capture return address while we easily can
++  __ move(T5, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, T5);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  __ li(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(T5, RA);
++  // set senderSP value
++  //refer to interpreter_loongarch.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addi_d(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ st_d(r, SP, st_off);
++      } else {
++        //FIXME, LA will not enter here
++        // long/double in gpr
++        __ st_d(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ st_d(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ fst_s(fr, SP, st_off);
++      else {
++        __ fst_d(fr, SP, st_off);
++        __ fst_d(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, T5);
++  __ jr (AT);
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the LA side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++  __ move(T4, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    // did LA need round? FIXME
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ addi_d(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = T5;
++  __ move(saved_sp, T4);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    // check if this call should be routed towards a specific entry point
++    __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    Label no_alternative_target;
++    __ beqz(AT, no_alternative_target);
++    __ move(T4, AT);
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ bind(no_alternative_target);
++  }
++#endif // INCLUDE_JVMCI
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld_d(AT, saved_sp, ld_off);
++        __ st_d(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld_d(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld_d(AT, saved_sp, ld_off - 8);
++        __ st_d(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on LA)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld_d(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld_d(r, saved_sp, ld_off - 8);
++      } else {
++        __ ld_w(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ fld_s(fr, saved_sp, ld_off);
++      else {
++          __ fld_d(fr, saved_sp, ld_off);
++          __ fld_d(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to T5 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in T5 (c2) because c2's
++  // resolve stubs return the result (the method) in T5.
++  // I'd love to fix this.
++  __ move(T5, Rmethod);
++  __ jr(T4);
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++  }
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on LA");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               <-- a7 => sp[8]
++// 10: L    // stdout              fp[16] => sp[16]
++// 11: L    // stderr              fp[24] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fst_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fst_d(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ st_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fld_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fld_d(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ ld_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ ld_w(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = T5;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld_d(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = T5;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ st_d( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++  if (src.first()->is_stack()) {
++    // stack to stack/reg
++    if (dst.first()->is_stack()) {
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_w(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    if(dst.first()->is_stack()) {
++      __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T4;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++           "valid size for make_non_entrant");
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_loongarch.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T4, receiver);
++  __ beq(T4, ic_reg, hit);
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++#ifdef COMPILER1
++  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
++    // Object.hashCode can pull the hashCode from the header word
++    // instead of doing a full VM transition once it's been computed.
++    // Since hashCode is usually polymorphic at call sites we can't do
++    // this optimization at the call site without a lot of work.
++    Label slowCase;
++    Register receiver = T0;
++    Register result = V0;
++    __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes());
++    // check if locked
++    __ andi(AT, result, markOopDesc::unlocked_value);
++    __ beq(AT, R0, slowCase);
++    if (UseBiasedLocking) {
++      // Check if biased and fall through to runtime if so
++      __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
++      __ bne(AT, R0, slowCase);
++    }
++    // get hash
++    __ li(AT, markOopDesc::hash_mask_in_place);
++    __ andr (AT, result, AT);
++    // test if hashCode exists
++    __ beq (AT, R0, slowCase);
++    __ shr(result, markOopDesc::hash_shift);
++    __ jr(RA);
++    __ bind (slowCase);
++  }
++#endif // COMPILER1
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  }
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant.
++  if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) {
++    __ nop();
++  }
++
++  // Generate a new frame for the wrapper.
++  // do LA need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and LA doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ st_d( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  Label native_return;
++  __ set_last_Java_frame(SP, noreg, native_return);
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T4;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld_d(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ li(swap_reg, 1);
++
++    __ ld_d(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ sub_d(swap_reg, swap_reg, SP);
++    __ li(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addi_d(AT, R0, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ bind(native_return);
++
++  oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map);
++
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);                break;
++  case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++  case T_BYTE   : __ sign_extend_byte (V0);     break;
++  case T_SHORT  : __ sign_extend_short(V0);     break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addi_d(AT, R0, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, T5);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addi_d(SP, SP, -wordSize);
++    __ push(S2);
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addi_d(SP, SP, wordSize);
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addi_d(AT, R0, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld_d( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld_d(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld_d (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addi_d (c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T4);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, - 3*wordSize);
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ move(SP, S2);
++    __ addi_d(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addi_d(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ addi_d(SP, SP, 2*wordSize);
++    __ move(SP, S2);
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++  return nm;
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++
++    __ jump_to(ic_miss, 0);
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant. The stack bang
++  // instruction fits that requirement.
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++         "valid size for make_non_entrant");
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ mov(G0, tmp);
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ add_d(FP, L2_string_off, O1);
++        __ br_null(O0, false, Assembler::pn, skip);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off);
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ restore();
++  __ ret();
++
++  __ flush();
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  int pad = 0;
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    pad += 512; // Increase the buffer size when compiling for JVMCI
++  }
++#endif
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++  RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++
++  // We have been called from the deopt handler of the deoptee.
++  //
++  // deoptee:
++  //                      ...
++  //                      call X
++  //                      ...
++  //  deopt_handler:      call_deopt_stub
++  //  cur. return pc  --> ...
++  //
++  // So currently RA points behind the call in the deopt handler.
++  // We adjust it such that it points to the start of the deopt handler.
++  // The return_pc has been stored in the frame of the deoptee and
++  // will replace the address of the deopt_handler in the call
++  // to Deoptimization::fetch_unroll_info below.
++
++  // HandlerImpl::size_deopt_handler()
++  __ addi_d(RA, RA, - NativeFarCall::instruction_size);
++  // Save everything in sight.
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ li(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++
++  int reexecute_offset = __ pc() - start;
++#if INCLUDE_JVMCI && !defined(COMPILER1)
++  if (EnableJVMCI && UseJVMCICompiler) {
++    // JVMCI does not use this kind of deoptimization
++    __ should_not_reach_here();
++  }
++#endif
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ li(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++
++#if INCLUDE_JVMCI
++  Label after_fetch_unroll_info_call;
++  int implicit_exception_uncommon_trap_offset = 0;
++  int uncommon_trap_offset = 0;
++
++  if (EnableJVMCI) {
++    implicit_exception_uncommon_trap_offset = __ pc() - start;
++
++    __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++
++    uncommon_trap_offset = __ pc() - start;
++
++    // Save everything in sight.
++    (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++    __ addi_d(SP, SP, -additional_words * wordSize);
++    // fetch_unroll_info needs to call last_java_frame()
++    Label retaddr;
++    __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++    __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++    __ li(AT, -1);
++    __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++
++    __ li(reason, (int32_t)Deoptimization::Unpack_reexecute);
++    __ move(A0, TREG);
++    __ move(A2, reason); // exec mode
++    __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++    __ bind(retaddr);
++    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
++    __ addi_d(SP, SP, additional_words * wordSize);
++
++    __ reset_last_Java_frame(false);
++
++    __ b(after_fetch_unroll_info_call);
++  } // EnableJVMCI
++#endif // INCLUDE_JVMCI
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ li(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addi_d(SP, SP, -additional_words * wordSize);
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  // TODO: confirm reloc
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addi_d(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    __ bind(after_fetch_unroll_info_call);
++  }
++#endif
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ st_w(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ li(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, reg_save.v0_offset());
++  __ st_ptr(V1, SP, reg_save.v1_offset());
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  reg_save.restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addi_d(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2 * wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addi_d(count, count, -1);   // decrement counter
++  __ addi_d(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ ld_d(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ st_d(V0, SP, reg_save.v0_offset());
++  __ st_d(V1, SP, reg_save.v1_offset());
++  __ fst_d(F0, SP, reg_save.fpr0_offset());
++  __ fst_d(F1, SP, reg_save.fpr1_offset());
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addi_d(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize);
++  __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize);
++  // Pop float stack and store in local
++  __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize);
++  __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize);
++
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
++    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
++  }
++#endif
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++  address start = __ pc();
++
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Push self-frame.
++  __ addi_d(SP, SP, -framesize * BytesPerInt);
++
++  __ st_d(RA, SP, return_off * BytesPerInt);
++  __ st_d(FP, SP, fp_off * BytesPerInt);
++
++  __ addi_d(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, FP, retaddr);
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T4, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T4, L);
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ addi_d(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T4;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);          // Load frame size
++  __ ld_d(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ addi_d(count, count, -1);    // decrement counter
++  __ addi_d(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++
++  __ ld_d(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ li(A1, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, reg_save.ra_offset());
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call(call_ptr, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++
++  // Exception pending
++
++  reg_save.restore_live_registers(masm);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  // TODO: confirm reloc
++  __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, reg_save.ra_offset());
++    __ bne(AT, TSR, no_adjust);
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ ld_wu(AT, TSR, 0);
++    __ push(T5);
++    __ li(T5, 0xffc0001f);
++    __ andr(AT, AT, T5);
++    __ li(T5, 0x28800013);
++    __ xorr(AT, AT, T5);
++    __ pop(T5);
++    __ bne(AT, R0, bail);
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addi_d(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, reg_save.ra_offset());
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  reg_save.restore_live_registers(masm);
++  __ jr(RA);
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  //FIXME. code_size
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  RegisterSaver reg_save(false /* save_vectors */);
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  address start = __ pc();
++  map = reg_save.save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  Label retaddr;
++  __ set_last_Java_frame(noreg, FP, retaddr);
++  // align the stack before invoke native
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  // TODO: confirm reloc
++  __ call(destination, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map(__ pc() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  // get the returned Method*
++  __ get_vm_result_2(Rmethod, thread);
++  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
++  __ st_ptr(V0, SP, reg_save.t5_offset());
++  reg_save.restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(T5);
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  reg_save.restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  //
++  // make sure all code is generated
++  masm->flush();
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
+diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
+new file mode 100644
+index 0000000000..7f73863b2e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
+@@ -0,0 +1,4804 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // This fig is not LA ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S0)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  // LA ABI does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //-22 [ F31                  ]
++  //      ...
++  //-15 [ F24                  ]
++  //-14 [ S8                   ]
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S0)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for S8.
++  // S8 will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             =  1,
++    FP_off             =  0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    S8_off             = -14,
++    F24_off            = -15,
++    F25_off            = -16,
++    F26_off            = -17,
++    F27_off            = -18,
++    F28_off            = -19,
++    F29_off            = -20,
++    F30_off            = -21,
++    F31_off            = -22,
++    total_off          = F31_off,
++  };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    __ addi_d(SP, SP, total_off * wordSize);
++    __ st_d(BCP, FP, BCP_off * wordSize);
++    __ st_d(LVP, FP, LVP_off * wordSize);
++    __ st_d(TSR, FP, TSR_off * wordSize);
++    __ st_d(S1, FP, S1_off * wordSize);
++    __ st_d(S3, FP, S3_off * wordSize);
++    __ st_d(S4, FP, S4_off * wordSize);
++    __ st_d(S5, FP, S5_off * wordSize);
++    __ st_d(S6, FP, S6_off * wordSize);
++    __ st_d(A0, FP, call_wrapper_off * wordSize);
++    __ st_d(A1, FP, result_off * wordSize);
++    __ st_d(A2, FP, result_type_off * wordSize);
++    __ st_d(A7, FP, thread_off * wordSize);
++    __ st_d(S8, FP, S8_off * wordSize);
++
++    __ fst_d(F24, FP, F24_off * wordSize);
++    __ fst_d(F25, FP, F25_off * wordSize);
++    __ fst_d(F26, FP, F26_off * wordSize);
++    __ fst_d(F27, FP, F27_off * wordSize);
++    __ fst_d(F28, FP, F28_off * wordSize);
++    __ fst_d(F29, FP, F29_off * wordSize);
++    __ fst_d(F30, FP, F30_off * wordSize);
++    __ fst_d(F31, FP, F31_off * wordSize);
++
++    __ li(S8, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      /* FIXME: I do not know how to realize stop in LA, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ slli_d(AT, A6, Interpreter::logStackElementSize);
++    __ sub_d(SP, SP, AT);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ alsl_d(T3, T0, A5, LogBytesPerWord - 1);
++    __ ld_d(AT, T3,  -wordSize);
++    __ alsl_d(T3, T2, SP, LogBytesPerWord - 1);
++    __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ addi_d(T2, T2, 1);
++    __ addi_d(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld_d(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld_d(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ addi_d(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ addi_d(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ addi_d(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++
++    // handle T_INT case
++    __ st_d(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld_d(BCP, FP, BCP_off * wordSize);
++    __ ld_d(LVP, FP, LVP_off * wordSize);
++    __ ld_d(S8, FP, S8_off * wordSize);
++    __ ld_d(TSR, FP, TSR_off * wordSize);
++
++    __ ld_d(S1, FP, S1_off * wordSize);
++    __ ld_d(S3, FP, S3_off * wordSize);
++    __ ld_d(S4, FP, S4_off * wordSize);
++    __ ld_d(S5, FP, S5_off * wordSize);
++    __ ld_d(S6, FP, S6_off * wordSize);
++
++    __ fld_d(F24, FP, F24_off * wordSize);
++    __ fld_d(F25, FP, F25_off * wordSize);
++    __ fld_d(F26, FP, F26_off * wordSize);
++    __ fld_d(F27, FP, F27_off * wordSize);
++    __ fld_d(F28, FP, F28_off * wordSize);
++    __ fld_d(F29, FP, F29_off * wordSize);
++    __ fld_d(F30, FP, F30_off * wordSize);
++    __ fld_d(F31, FP, F31_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ st_d(V0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_float);
++    __ fst_s(FV0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_double);
++    __ fst_d(FV0, T0, 0 * wordSize);
++    __ b(exit);
++    StubRoutines::la::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld_d(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T4
++    __ ld_d(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T4, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T4: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T4);
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    A0
++  //   value: A1
++  //   count: A2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register end       = T5;  // source array address end
++    const Register tmp       = T8;  // temp register
++
++    Label L_fill_elements;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 9);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 15, 8);  //  8 bit -> 16 bit
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 5);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 3);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    switch (t) {
++      case T_BYTE:
++        __ add_d(end, to, count);
++        break;
++      case T_SHORT:
++      case T_INT:
++        __ alsl_d(end, count, to, shift-1);
++        break;
++      default: ShouldNotReachHere();
++    }
++    if (!aligned) {
++      __ st_d(value, to,  0);
++      __ bstrins_d(to, R0, 2, 0);
++      __ addi_d(to, to, 8);
++    }
++    __ st_d(value, end, -8);
++    __ bstrins_d(end, R0, 2, 0);
++
++    //
++    //  Fill large chunks
++    //
++    Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2;
++    __ addi_d(AT, to, 64);
++    __ blt(end, AT, L_not_64bytes_fill);
++    __ addi_d(to, to, 64);
++    __ bind(L_loop_begin);
++    __ st_d(value, to,  -8);
++    __ st_d(value, to, -16);
++    __ st_d(value, to, -24);
++    __ st_d(value, to, -32);
++    __ st_d(value, to, -40);
++    __ st_d(value, to, -48);
++    __ st_d(value, to, -56);
++    __ st_d(value, to, -64);
++    __ addi_d(to, to, 64);
++    __ bge(end, to, L_loop_begin);
++    __ addi_d(to, to, -64);
++    __ beq(to, end, L_loop_end);
++
++    __ bind(L_not_64bytes_fill);
++    // There are 0 - 7 words
++    __ lipc(AT, L_jtab1);
++    __ sub_d(tmp, end, to);
++    __ alsl_d(AT, tmp, AT, 1);
++    __ jr(AT);
++
++    __ bind(L_jtab1);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_d(value, to, 0);
++    __ st_d(value, to, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ st_d(value, to, 48);
++
++    __ bind(L_loop_end);
++    __ jr(RA);
++
++    // Short arrays (<= 8 bytes)
++    __ bind(L_fill_elements);
++    __ lipc(AT, L_jtab2);
++    __ slli_d(tmp, count, 4 + shift);
++    __ add_d(AT, AT, tmp);
++    __ jr(AT);
++
++    __ bind(L_jtab2);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_b(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_h(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_h(value, to, 0);
++    __ st_b(value, to, 2);
++    __ jr(RA);
++    __ nop();
++
++    // 4:
++    __ st_w(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_w(value, to, 0);
++    __ st_b(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 6:
++    __ st_w(value, to, 0);
++    __ st_h(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_w(value, to, 0);
++    __ st_w(value, to, 3);
++    __ jr(RA);
++    __ nop();
++
++    // 8:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count
++  //
++  //  Temp:
++  //    AT   - destination array address - source array address
++  //    T4   - element count * element size
++  //
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    __ slli_d(T4, A2, log2_elem_size);
++    __ sub_d(AT, A1, A0);
++    __ bgeu(AT, T4, no_overlap_target);
++  }
++
++  // disjoint large copy
++  void generate_disjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A0, 7);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 8);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ ld_d(T4, A0, 32);
++    __ ld_d(T5, A0, 40);
++    __ ld_d(T6, A0, 48);
++    __ ld_d(T7, A0, 56);
++    __ addi_d(A0, A0, 64);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ st_d(T4, A5, 32);
++    __ st_d(T5, A5, 40);
++    __ st_d(T6, A5, 48);
++    __ st_d(T7, A5, 56);
++    __ addi_d(A5, A5, 64);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, le16);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ addi_d(A0, A0, 32);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ addi_d(A5, A5, 32);
++
++    __ bind(le16);
++    __ addi_d(A4, A2, -16);
++    __ bgeu(A0, A4, le8);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ addi_d(A0, A0, 16);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ addi_d(A5, A5, 16);
++
++    __ bind(le8);
++    __ addi_d(A4, A2, -8);
++    __ bgeu(A0, A4, lt8);
++    __ ld_d(T0, A0, 0);
++    __ st_d(T0, A5, 0);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lsx
++  void generate_disjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le64, le32, le16, lt16;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ vld(F0, A0, 0);
++    __ vld(F1, A2, -16);
++
++    __ andi(T1, A0, 15);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 16);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -128);
++    __ bgeu(A0, A4, le64);
++
++    __ bind(loop);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ vld(FT2, A0, 32);
++    __ vld(FT3, A0, 48);
++    __ vld(FT4, A0, 64);
++    __ vld(FT5, A0, 80);
++    __ vld(FT6, A0, 96);
++    __ vld(FT7, A0, 112);
++    __ addi_d(A0, A0, 128);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ vst(FT2, A5, 32);
++    __ vst(FT3, A5, 48);
++    __ vst(FT4, A5, 64);
++    __ vst(FT5, A5, 80);
++    __ vst(FT6, A5, 96);
++    __ vst(FT7, A5, 112);
++    __ addi_d(A5, A5, 128);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le64);
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ vld(FT2, A0, 32);
++    __ vld(FT3, A0, 48);
++    __ addi_d(A0, A0, 64);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ vst(FT2, A5, 32);
++    __ vst(FT3, A5, 48);
++    __ addi_d(A5, A5, 64);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, le16);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ addi_d(A0, A0, 32);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ addi_d(A5, A5, 32);
++
++    __ bind(le16);
++    __ addi_d(A4, A2, -16);
++    __ bgeu(A0, A4, lt16);
++    __ vld(FT0, A0, 0);
++    __ vst(FT0, A5, 0);
++
++    __ bind(lt16);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A3, -16);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lasx
++  void generate_disjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le128, le64, le32, lt32;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ xvld(F0, A0, 0);
++    __ xvld(F1, A2, -32);
++
++    __ andi(T1, A0, 31);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 32);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -256);
++    __ bgeu(A0, A4, le128);
++
++    __ bind(loop);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ xvld(FT2, A0, 64);
++    __ xvld(FT3, A0, 96);
++    __ xvld(FT4, A0, 128);
++    __ xvld(FT5, A0, 160);
++    __ xvld(FT6, A0, 192);
++    __ xvld(FT7, A0, 224);
++    __ addi_d(A0, A0, 256);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ xvst(FT2, A5, 64);
++    __ xvst(FT3, A5, 96);
++    __ xvst(FT4, A5, 128);
++    __ xvst(FT5, A5, 160);
++    __ xvst(FT6, A5, 192);
++    __ xvst(FT7, A5, 224);
++    __ addi_d(A5, A5, 256);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le128);
++    __ addi_d(A4, A2, -128);
++    __ bgeu(A0, A4, le64);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ xvld(FT2, A0, 64);
++    __ xvld(FT3, A0, 96);
++    __ addi_d(A0, A0, 128);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ xvst(FT2, A5, 64);
++    __ xvst(FT3, A5, 96);
++    __ addi_d(A5, A5, 128);
++
++    __ bind(le64);
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ addi_d(A0, A0, 64);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ addi_d(A5, A5, 64);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, lt32);
++    __ xvld(FT0, A0, 0);
++    __ xvst(FT0, A5, 0);
++
++    __ bind(lt32);
++    __ xvst(F0, A1, 0);
++    __ xvst(F1, A3, -32);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy
++  void generate_conjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A2, 7);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ ld_d(T4, A2, -40);
++    __ ld_d(T5, A2, -48);
++    __ ld_d(T6, A2, -56);
++    __ ld_d(T7, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ st_d(T4, A5, -40);
++    __ st_d(T5, A5, -48);
++    __ st_d(T6, A5, -56);
++    __ st_d(T7, A5, -64);
++    __ addi_d(A5, A5, -64);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, le16);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ addi_d(A2, A2, -32);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ addi_d(A5, A5, -32);
++
++    __ bind(le16);
++    __ addi_d(A4, A0, 16);
++    __ bgeu(A4, A2, le8);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ addi_d(A2, A2, -16);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ addi_d(A5, A5, -16);
++
++    __ bind(le8);
++    __ addi_d(A4, A0, 8);
++    __ bgeu(A4, A2, lt8);
++    __ ld_d(T0, A2, -8);
++    __ st_d(T0, A5, -8);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lsx
++  void generate_conjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le64, le32, le16, lt16;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ vld(F0, A0, 0);
++    __ vld(F1, A2, -16);
++
++    __ andi(T1, A2, 15);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 128);
++    __ bgeu(A4, A2, le64);
++
++    __ bind(loop);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ vld(FT2, A2, -48);
++    __ vld(FT3, A2, -64);
++    __ vld(FT4, A2, -80);
++    __ vld(FT5, A2, -96);
++    __ vld(FT6, A2, -112);
++    __ vld(FT7, A2, -128);
++    __ addi_d(A2, A2, -128);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ vst(FT2, A5, -48);
++    __ vst(FT3, A5, -64);
++    __ vst(FT4, A5, -80);
++    __ vst(FT5, A5, -96);
++    __ vst(FT6, A5, -112);
++    __ vst(FT7, A5, -128);
++    __ addi_d(A5, A5, -128);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le64);
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ vld(FT2, A2, -48);
++    __ vld(FT3, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ vst(FT2, A5, -48);
++    __ vst(FT3, A5, -64);
++    __ addi_d(A5, A5, -64);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, le16);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ addi_d(A2, A2, -32);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ addi_d(A5, A5, -32);
++
++    __ bind(le16);
++    __ addi_d(A4, A0, 16);
++    __ bgeu(A4, A2, lt16);
++    __ vld(FT0, A2, -16);
++    __ vst(FT0, A5, -16);
++
++    __ bind(lt16);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A3, -16);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lasx
++  void generate_conjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le128, le64, le32, lt32;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ xvld(F0, A0, 0);
++    __ xvld(F1, A2, -32);
++
++    __ andi(T1, A2, 31);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 256);
++    __ bgeu(A4, A2, le128);
++
++    __ bind(loop);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ xvld(FT2, A2, -96);
++    __ xvld(FT3, A2, -128);
++    __ xvld(FT4, A2, -160);
++    __ xvld(FT5, A2, -192);
++    __ xvld(FT6, A2, -224);
++    __ xvld(FT7, A2, -256);
++    __ addi_d(A2, A2, -256);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ xvst(FT2, A5, -96);
++    __ xvst(FT3, A5, -128);
++    __ xvst(FT4, A5, -160);
++    __ xvst(FT5, A5, -192);
++    __ xvst(FT6, A5, -224);
++    __ xvst(FT7, A5, -256);
++    __ addi_d(A5, A5, -256);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le128);
++    __ addi_d(A4, A0, 128);
++    __ bgeu(A4, A2, le64);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ xvld(FT2, A2, -96);
++    __ xvld(FT3, A2, -128);
++    __ addi_d(A2, A2, -128);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ xvst(FT2, A5, -96);
++    __ xvst(FT3, A5, -128);
++    __ addi_d(A5, A5, -128);
++
++    __ bind(le64);
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ addi_d(A5, A5, -64);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, lt32);
++    __ xvld(FT0, A2, -32);
++    __ xvst(FT0, A5, -32);
++
++    __ bind(lt32);
++    __ xvst(F0, A1, 0);
++    __ xvst(F1, A3, -32);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements.
++  void generate_byte_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_b(AT, A0, 0);
++    __ st_b(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_h(AT, A0, 0);
++    __ ld_b(A2, A0, 2);
++    __ st_h(AT, A1, 0);
++    __ st_b(A2, A1, 2);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_w(AT, A0, 0);
++    __ ld_b(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_b(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_w(AT, A0, 0);
++    __ ld_w(A2, A0, 3);
++    __ st_w(AT, A1, 0);
++    __ st_w(A2, A1, 3);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++
++    if (!UseLSX)
++        return;
++
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ ld_d(AT, A0, 0);
++    __ ld_b(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_b(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 5);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 5);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ vld(F0, A0, 0);
++    __ vst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    if (!UseLASX)
++        return;
++
++    // 17:
++    __ vld(F0, A0, 0);
++    __ ld_b(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_b(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 18:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 19:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 20:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 21:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 13);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 22:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 23:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 24:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 25:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 9);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 9);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 26:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 27:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 11);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 11);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 28:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 29:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 13);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 30:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 31:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 15);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 32:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      Label &large_aligned, const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      Label &large_aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Short small copy: less than { int:9, lsx:9, lasx:17 } elements.
++  void generate_short_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       Label &large_aligned, const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       Label &large_aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Int small copy: less than { int:7, lsx:7, lasx:9 } elements.
++  void generate_int_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++    __ nop();
++    __ nop();
++
++    // 5:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_w(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_w(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_w(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_w(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    // 6:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_d(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_d(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_d(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_d(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    // 7:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Generate maybe oop copy
++  void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small,
++                          Label &large, Label &large_aligned, const char *name,
++                          int small_limit, int log2_elem_size, bool dest_uninitialized = false) {
++    Label post, _large;
++    DecoratorSet decorators = 0;
++    BarrierSetAssembler *bs = NULL;
++
++    if (is_oop) {
++      decorators = IN_HEAP | IS_ARRAY;
++
++      if (disjoint) {
++        decorators |= ARRAYCOPY_DISJOINT;
++      }
++
++      if (aligned) {
++        decorators |= ARRAYCOPY_ALIGNED;
++      }
++
++      if (dest_uninitialized) {
++        decorators |= IS_DEST_UNINITIALIZED;
++      }
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(A2, SP, 3 * wordSize);
++      __ st_d(A1, SP, 2 * wordSize);
++      __ st_d(A0, SP, 1 * wordSize);
++      __ st_d(RA, SP, 0 * wordSize);
++
++      bs = BarrierSet::barrier_set()->barrier_set_assembler();
++      bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2, RegSet());
++
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++      __ ld_d(A0, SP, 1 * wordSize);
++    }
++
++    __ sltui(T0, A2, small_limit);
++    if (is_oop) {
++      __ beqz(T0, _large);
++      __ bl(small);
++      __ b(post);
++    } else {
++      __ bnez(T0, small);
++    }
++
++    __ bind(_large);
++    __ slli_d(A2, A2, log2_elem_size);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, (1 << (log2_elem_size + 1)) - 1);
++      if (is_oop) {
++        Label skip;
++        __ bnez(T0, skip);
++        __ bl(large_aligned);
++        __ b(post);
++        __ bind(skip);
++      } else {
++        __ beqz(T0, large_aligned);
++      }
++    }
++
++    if (is_oop) {
++      __ bl(large);
++    } else {
++      __ b(large);
++    }
++
++    if (is_oop) {
++      __ bind(post);
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++
++      bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet());
++
++      __ ld_d(RA, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, Label &large_aligned, const char *name,
++                                         int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
++                       name, small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, Label &large_aligned, const char *name,
++                                         int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2);
++    } else {
++      array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned,
++                       name, small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Long small copy: less than { int:4, lsx:4, lasx:5 } elements.
++  void generate_long_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++    __ nop();
++    __ nop();
++
++    // 3:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_d(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_d(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_d(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_d(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++      return;
++
++    // 4:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, Label &large_aligned, const char *name,
++                                          int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
++                       name, small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, Label &large_aligned, const char *name,
++                                          int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3);
++    } else {
++      array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned,
++                       name, small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Helper for generating a dynamic type check.
++  // Smashes scratch1, scratch2.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Register tmp1,
++                           Register tmp2,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
++
++    __ block_comment("type_check:");
++
++    Label L_miss;
++
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1,       &L_success, &L_miss, NULL,
++                                     super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL);
++
++    // Fall through on failure!
++    __ bind(L_miss);
++  }
++
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count, treated as ssize_t, can be zero
++  //    A3   - size_t ckoff (super_check_offset)
++  //    A4   - oop ckval (super_klass)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
++
++    // Input registers (after setup_arg_regs)
++    const Register from        = A0; // source array address
++    const Register to          = A1; // destination array address
++    const Register count       = A2; // elementscount
++    const Register ckoff       = A3; // super_check_offset
++    const Register ckval       = A4; // super_klass
++
++    RegSet wb_pre_saved_regs = RegSet::range(A0, A4);
++    RegSet wb_post_saved_regs = RegSet::of(count);
++
++    // Registers used as temps (S0, S1, S2, S3 are save-on-entry)
++    const Register copied_oop  = S0; // actual oop copied
++    const Register count_save  = S1; // orig elementscount
++    const Register start_to    = S2; // destination array start address
++    const Register oop_klass   = S3; // oop._klass
++    const Register tmp1        = A5;
++    const Register tmp2        = A6;
++
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
++
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, oop_klass, count_save);
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    // caller guarantees that the arrays really are different
++    // otherwise, we would have to make conjoint checks
++
++    // Caller of this entry point must set up the argument registers.
++    __ block_comment("Entry:");
++
++    // Empty array:  Nothing to do.
++    __ beqz(count, L_done);
++
++    __ push(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifdef ASSERT
++    __ block_comment("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs);
++
++    // save the original count
++    __ move(count_save, count);
++
++    // Copy from low to high addresses
++    __ move(start_to, to); // Save destination array start address
++    __ b(L_load_element);
++
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for (; count != 0; count--) {
++    //     copied_oop = load_heap_oop(from++);
++    //     ... generate_type_check ...;
++    //     store_heap_oop(to++, copied_oop);
++    //   }
++    __ align(OptoLoopAlignment);
++
++    __ bind(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop
++    __ addi_d(to, to, UseCompressedOops ? 4 : 8);
++    __ addi_d(count, count, -1);
++    __ beqz(count, L_do_card_marks);
++
++    // ======== loop entry is here ========
++    __ bind(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop
++    __ addi_d(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
++
++    __ load_klass(oop_klass, copied_oop); // query the object klass
++    generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element);
++    // ======== end loop ========
++
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
++
++    __ sub_d(tmp1, count_save, count); // K = partially copied oop count
++    __ nor(count, tmp1, R0); // report (-1^K) to caller
++    __ beqz(tmp1, L_done_pop);
++
++    __ bind(L_do_card_marks);
++
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs);
++
++    __ bind(L_done_pop);
++    __ pop(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifndef PRODUCT
++    __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    __ bind(L_done);
++    __ move(A0, count);
++    __ jr(RA);
++
++    return start;
++  }
++
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char *name) {
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    Register s = A0, d = A1, count = A2;
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ orr(AT, s, d);
++    __ orr(AT, AT, count);
++
++    __ andi(AT, AT, BytesPerLong-1);
++    __ beqz(AT, L_long_aligned);
++    __ andi(AT, AT, BytesPerInt-1);
++    __ beqz(AT, L_int_aligned);
++    __ andi(AT, AT, BytesPerShort-1);
++    __ beqz(AT, L_short_aligned);
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_short_aligned);
++    __ srli_d(count, count, LogBytesPerShort);  // size => short_count
++    __ b(StubRoutines::_jshort_arraycopy);
++    __ bind(L_int_aligned);
++    __ srli_d(count, count, LogBytesPerInt);    // size => int_count
++    __ b(StubRoutines::_jint_arraycopy);
++    __ bind(L_long_aligned);
++    __ srli_d(count, count, LogBytesPerLong);   // size => long_count
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    return start;
++  }
++
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (A0)
++                              Register src_pos, // source position (A1)
++                              Register dst,     // destination array oo (A2)
++                              Register dst_pos, // destination position (A3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    __ block_comment("arraycopy_range_checks:");
++
++    assert_different_registers(SCR1, temp);
++
++    // if (src_pos + length > arrayOop(src)->length()) FAIL;
++    __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, src_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
++    __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, dst_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ move(src_pos, src_pos);
++    __ move(dst_pos, dst_pos);
++
++    __ block_comment("arraycopy_range_checks done");
++  }
++
++  //
++  //  Generate generic array copy stubs
++  //
++  //  Input:
++  //    A0    -  src oop
++  //    A1    -  src_pos (32-bits)
++  //    A2    -  dst oop
++  //    A3    -  dst_pos (32-bits)
++  //    A4    -  element count (32-bits)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char *name) {
++    Label L_failed, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
++
++    // Input registers
++    const Register src        = A0; // source array oop
++    const Register src_pos    = A1; // source position
++    const Register dst        = A2; // destination array oop
++    const Register dst_pos    = A3; // destination position
++    const Register length     = A4;
++
++    // Registers used as temps
++    const Register dst_klass  = A5;
++
++    __ align(CodeEntryAlignment);
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    address start = __ pc();
++
++#ifndef PRODUCT
++    // bump this on entry, not on exit:
++    __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
++
++    // if (src == NULL) return -1;
++    __ beqz(src, L_failed);
++
++    // if (src_pos < 0) return -1;
++    __ blt(src_pos, R0, L_failed);
++
++    // if (dst == NULL) return -1;
++    __ beqz(dst, L_failed);
++
++    // if (dst_pos < 0) return -1;
++    __ blt(dst_pos, R0, L_failed);
++
++    // registers used as temp
++    const Register scratch_length    = T0; // elements count to copy
++    const Register scratch_src_klass = T1; // array klass
++    const Register lh                = T2; // layout helper
++    const Register tmp1              = T3;
++    const Register tmp2              = T4;
++
++    // if (length < 0) return -1;
++    __ move(scratch_length, length); // length (elements count, 32-bits value)
++    __ blt(scratch_length, R0, L_failed);
++
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    // assert(src->klass() != NULL);
++    {
++      __ block_comment("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(SCR2, dst);
++      __ beqz(SCR2, L1);     // this would be broken also
++      __ block_comment("} assert klasses not null done");
++    }
++#endif
++
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
++
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
++
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ ld_w(lh, Address(scratch_src_klass, lh_offset));
++    __ li(SCR1, objArray_lh);
++    __ xorr(SCR2, lh, SCR1);
++    __ beqz(SCR2, L_objArray);
++
++    // if (src->klass() != dst->klass()) return -1;
++    __ load_klass(SCR2, dst);
++    __ xorr(SCR2, SCR2, scratch_src_klass);
++    __ bnez(SCR2, L_failed);
++
++    // if (!src->is_Array()) return -1;
++    __ bge(lh, R0, L_failed); // i.e. (lh >= 0)
++
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      __ block_comment("assert primitive array {");
++      Label L;
++      __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
++      __ bge(lh, SCR2, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      __ block_comment("} assert primitive array done");
++    }
++#endif
++
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
++    //
++
++    const Register scr1_offset = SCR1; // array offset
++    const Register elsize = lh; // element size
++
++    __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift +
++                  exact_log2(Klass::_lh_header_size_mask+1) - 1,
++                  Klass::_lh_header_size_shift); // array_offset
++    __ add_d(src, src, scr1_offset); // src array offset
++    __ add_d(dst, dst, scr1_offset); // dst array offset
++    __ block_comment("choose copy loop based on element size");
++
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = A0; // source array address
++    const Register to       = A1; // destination array address
++    const Register count    = A2; // elements count
++
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
++
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
++
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++    __ bind(L_copy_bytes);
++    __ andi(tmp1, elsize, 2);
++    __ bnez(tmp1, L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_1)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_1)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_2)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jshort_arraycopy);
++
++    __ bind(L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_longs);
++    __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_4)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jint_arraycopy);
++
++    __ bind(L_copy_longs);
++#ifdef ASSERT
++    {
++      __ block_comment("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize
++      __ li(tmp1, LogBytesPerLong);
++      __ beq(elsize, tmp1, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      __ block_comment("} assert long copy done");
++    }
++#endif
++    __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_8)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    // ObjArrayKlass
++    __ bind(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
++
++    Label L_plain_copy, L_checkcast_copy;
++    //  test array classes for subtyping
++    __ load_klass(tmp1, dst);
++    __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality
++
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ move(count, scratch_length); // length
++    __ bind(L_plain_copy);
++    __ b(StubRoutines::_oop_arraycopy);
++
++    __ bind(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, tmp1 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ ld_w(SCR1, Address(tmp1, lh_offset));
++      __ li(SCR2, objArray_lh);
++      __ xorr(SCR1, SCR1, SCR2);
++      __ bnez(SCR1, L_failed);
++
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed);
++
++      __ load_klass(dst_klass, dst); // reload
++
++      // Marshal the base address arguments now, freeing registers.
++      __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ move(count, length); // length (reloaded)
++      Register sco_temp = A3; // this register is free now
++      assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass);
++      // assert_clean_int(count, sco_temp);
++
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // Smashes SCR1, SCR2
++      generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy);
++
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ld_d(dst_klass, Address(dst_klass, ek_offset));
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // the checkcast_copy loop needs two extra arguments:
++      assert(A3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_arraycopy.
++      __ move(A4, dst_klass); // dst.klass.element_klass
++      __ b(StubRoutines::_checkcast_arraycopy);
++    }
++
++    __ bind(L_failed);
++    __ li(V0, -1);
++    __ jr(RA);
++
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    Label disjoint_large_copy, conjoint_large_copy;
++    Label disjoint_large_copy_lsx, conjoint_large_copy_lsx;
++    Label disjoint_large_copy_lasx, conjoint_large_copy_lasx;
++    Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy;
++    Label none;
++
++    generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy");
++    generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy");
++    if (UseLSX) {
++      generate_disjoint_large_copy_lsx(disjoint_large_copy_lsx, "disjoint_large_copy_lsx");
++      generate_conjoint_large_copy_lsx(conjoint_large_copy_lsx, "conjoint_large_copy_lsx");
++    }
++    if (UseLASX) {
++      generate_disjoint_large_copy_lasx(disjoint_large_copy_lasx, "disjoint_large_copy_lasx");
++      generate_conjoint_large_copy_lasx(conjoint_large_copy_lasx, "conjoint_large_copy_lasx");
++    }
++    generate_byte_small_copy(byte_small_copy, "jbyte_small_copy");
++    generate_short_small_copy(short_small_copy, "jshort_small_copy");
++    generate_int_small_copy(int_small_copy, "jint_small_copy");
++    generate_long_small_copy(long_small_copy, "jlong_small_copy");
++
++    if (UseCompressedOops) {
++      if (UseLSX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy", 7);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", 7, true);
++      } else {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 7);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 7, true);
++      }
++      if (UseLASX) {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy", 9);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy_uninit", 9, true);
++      } else if (UseLSX) {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy", 7);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy_uninit", 7, true);
++      } else {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy", 7);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy_uninit", 7, true);
++      }
++    } else {
++      if (UseLASX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy", 5);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy_uninit", 5, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy", 5);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy_uninit", 5, true);
++      } else if (UseLSX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy", 4);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy_uninit", 4, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy", 4);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true);
++      } else {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 4);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 4, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, none, "oop_arraycopy", 4);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true);
++      }
++    }
++
++    if (UseLASX) {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lasx, disjoint_large_copy_lsx, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jint_disjoint_arraycopy", 9);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lasx, conjoint_large_copy_lsx, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jint_arraycopy", 9);
++    } else if (UseLSX) {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lsx, none, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jint_disjoint_arraycopy", 7);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lsx, none, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jint_arraycopy", 7);
++    } else {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, none, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, none, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, none, "jint_disjoint_arraycopy", 7);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, none, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, none, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, none, "jint_arraycopy", 7);
++    }
++
++    if (UseLASX) {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "jlong_disjoint_arraycopy", 5);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "jlong_arraycopy", 5);
++    } else if (UseLSX) {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "jlong_disjoint_arraycopy", 4);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "jlong_arraycopy", 4);
++    } else {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, none, "jlong_disjoint_arraycopy", 4);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, none, "jlong_arraycopy", 4);
++    }
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy");
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
++
++    StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
++
++    StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_encryptBlock(bool cbc) {
++    static const uint32_t ft_consts[256] = {
++      0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
++      0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
++      0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
++      0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
++      0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
++      0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
++      0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
++      0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
++      0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
++      0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
++      0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
++      0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
++      0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
++      0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
++      0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
++      0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
++      0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
++      0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
++      0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
++      0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
++      0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
++      0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
++      0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
++      0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
++      0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
++      0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
++      0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
++      0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
++      0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
++      0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
++      0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
++      0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
++      0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
++      0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
++      0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
++      0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
++      0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
++      0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
++      0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
++      0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
++      0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
++      0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
++      0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
++      0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
++      0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
++      0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
++      0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
++      0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
++      0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
++      0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
++      0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
++      0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
++      0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
++      0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
++      0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
++      0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
++      0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
++      0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
++      0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
++      0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
++      0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
++      0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
++      0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
++      0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
++    };
++    static const uint8_t fsb_consts[256] = {
++      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
++      0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
++      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
++      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
++      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
++      0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
++      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
++      0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
++      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
++      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
++      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
++      0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
++      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
++      0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
++      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
++      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
++      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
++      0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
++      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
++      0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
++      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
++      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
++      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
++      0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
++      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
++      0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
++      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
++      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
++      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
++      0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
++      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
++      0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register keyold = A6;
++    Register t0 = A7;
++    Register t1, t2, t3, ftp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t1 = S0;
++      t2 = S1;
++      t3 = S2;
++      ftp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(keyold, key);
++    } else {
++      t1 = A3;
++      t2 = A4;
++      t3 = A5;
++      ftp = A6;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    // Round 1
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], rve, 4 * i);
++      }
++
++      __ bind(loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], src, 4 * i);
++      }
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    } else {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], src, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(ftp, (intptr_t)ft_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, ftp, t0);
++        __ ldx_w(t1, ftp, t1);
++        __ ldx_w(t2, ftp, t2);
++        __ ldx_w(t3, ftp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(ftp, (intptr_t)fsb_consts);
++    __ alsl_d(key, keylen, key, 2 - 1);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, ftp, t0);
++      __ ldx_bu(t1, ftp, t1);
++      __ ldx_bu(t2, ftp, t2);
++      __ ldx_bu(t3, ftp, t3);
++      __ ld_w(xa[i], key, 4 * i - 16);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(key, keyold);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ st_w(xa[i], rve, 4 * i);
++      }
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_mulAdd() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++
++    address entry = __ pc();
++
++    const Register out     = A0;
++    const Register in      = A1;
++    const Register offset  = A2;
++    const Register len     = A3;
++    const Register k       = A4;
++
++    __ block_comment("Entry:");
++    __ mul_add(out, in, offset, len, k);
++    __ jr(RA);
++
++    return entry;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_decryptBlock(bool cbc) {
++    static const uint32_t rt_consts[256] = {
++      0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
++      0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
++      0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
++      0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
++      0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
++      0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
++      0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
++      0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
++      0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
++      0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
++      0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
++      0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
++      0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
++      0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
++      0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
++      0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
++      0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
++      0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
++      0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
++      0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
++      0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
++      0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
++      0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
++      0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
++      0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
++      0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
++      0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
++      0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
++      0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
++      0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
++      0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
++      0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
++      0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
++      0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
++      0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
++      0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
++      0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
++      0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
++      0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
++      0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
++      0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
++      0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
++      0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
++      0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
++      0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
++      0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
++      0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
++      0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
++      0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
++      0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
++      0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
++      0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
++      0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
++      0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
++      0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
++      0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
++      0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
++      0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
++      0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
++      0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
++      0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
++      0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
++      0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
++      0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
++    };
++    static const uint8_t rsb_consts[256] = {
++      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
++      0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
++      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
++      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
++      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
++      0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
++      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
++      0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
++      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
++      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
++      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
++      0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
++      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
++      0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
++      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
++      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
++      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
++      0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
++      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
++      0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
++      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
++      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
++      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
++      0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
++      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
++      0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
++      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
++      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
++      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
++      0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
++      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
++      0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register t0 = A6;
++    Register t1 = A7;
++    Register t2, t3, rtp, rvp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t2 = S0;
++      t3 = S1;
++      rtp = S2;
++      rvp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(rvp, rve);
++    } else {
++      t2 = A3;
++      t3 = A4;
++      rtp = A5;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ bind(loop);
++
++    // Round 1
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(xa[i], src, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * (4 + i));
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(rtp, (intptr_t)rt_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, rtp, t0);
++        __ ldx_w(t1, rtp, t1);
++        __ ldx_w(t2, rtp, t2);
++        __ ldx_w(t3, rtp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(rtp, (intptr_t)rsb_consts);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, rtp, t0);
++      __ ldx_bu(t1, rtp, t1);
++      __ ldx_bu(t2, rtp, t2);
++      __ ldx_bu(t3, rtp, t3);
++      __ ld_w(xa[i], key, 4 * i);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], rvp, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(rvp, src);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      __ ld_d(t0, src, -16);
++      __ ld_d(t1, src, -8);
++      __ st_d(t0, rve, 0);
++      __ st_d(t1, rve, 8);
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label keys, loop;
++
++    // Keys
++    __ bind(keys);
++    __ emit_int32(0x5a827999);
++    __ emit_int32(0x6ed9eba1);
++    __ emit_int32(0x8f1bbcdc);
++    __ emit_int32(0xca62c1d6);
++
++    // Allocate registers
++    Register t0 = T5;
++    Register t1 = T6;
++    Register t2 = T7;
++    Register t3 = T8;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register ka[4] = { A4, A5, A6, A7 };
++    Register sa[5] = { T0, T1, T2, T3, T4 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys
++    __ lipc(t0, keys);
++    __ ld_w(ka[0], t0, 0);
++    __ ld_w(ka[1], t0, 4);
++    __ ld_w(ka[2], t0, 8);
++    __ ld_w(ka[3], t0, 12);
++
++    __ bind(loop);
++    // Load arguments
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++
++    // 80 rounds of hashing
++    for (int i = 0; i < 80; i++) {
++      Register a = sa[(5 - (i % 5)) % 5];
++      Register b = sa[(6 - (i % 5)) % 5];
++      Register c = sa[(7 - (i % 5)) % 5];
++      Register d = sa[(8 - (i % 5)) % 5];
++      Register e = sa[(9 - (i % 5)) % 5];
++
++      if (i < 16) {
++        __ ld_w(t0, buf, i * 4);
++        __ revb_2h(t0, t0);
++        __ rotri_w(t0, t0, 16);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, i * 4);
++        __ XOR(t0, c, d);
++        __ AND(t0, t0, b);
++        __ XOR(t0, t0, d);
++      } else {
++        __ ld_w(t0, SP, ((i - 3) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 8) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 14) & 0xF) * 4);
++        __ ld_w(t3, SP, ((i - 16) & 0xF) * 4);
++        __ XOR(t0, t0, t1);
++        __ XOR(t0, t0, t2);
++        __ XOR(t0, t0, t3);
++        __ rotri_w(t0, t0, 31);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, (i & 0xF) * 4);
++
++        if (i < 20) {
++          __ XOR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ XOR(t0, t0, d);
++        } else if (i < 40 || i >= 60) {
++          __ XOR(t0, b, c);
++          __ XOR(t0, t0, d);
++        } else if (i < 60) {
++          __ OR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ AND(t2, c, d);
++          __ OR(t0, t0, t2);
++        }
++      }
++
++      __ rotri_w(b, b, 2);
++      __ add_w(e, e, t0);
++      __ add_w(e, e, ka[i / 20]);
++      __ rotri_w(t0, a, 27);
++      __ add_w(e, e, t0);
++    }
++
++    // Save updated state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ ld_w(t0, state, 16);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ add_w(sa[4], sa[4], t0);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) {
++    static const uint32_t round_consts[64] = {
++      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
++    };
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label loop;
++
++    // Allocate registers
++    Register t0 = A4;
++    Register t1 = A5;
++    Register t2 = A6;
++    Register t3 = A7;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register kptr = T8;
++    Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys base address
++    __ li(kptr, (intptr_t)round_consts);
++
++    __ bind(loop);
++    // Load state
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++    __ ld_w(sa[5], state, 20);
++    __ ld_w(sa[6], state, 24);
++    __ ld_w(sa[7], state, 28);
++
++    // Do 64 rounds of hashing
++    for (int i = 0; i < 64; i++) {
++      Register a = sa[(0 - i) & 7];
++      Register b = sa[(1 - i) & 7];
++      Register c = sa[(2 - i) & 7];
++      Register d = sa[(3 - i) & 7];
++      Register e = sa[(4 - i) & 7];
++      Register f = sa[(5 - i) & 7];
++      Register g = sa[(6 - i) & 7];
++      Register h = sa[(7 - i) & 7];
++
++      if (i < 16) {
++        __ ld_w(t1, buf, i * 4);
++        __ revb_2h(t1, t1);
++        __ rotri_w(t1, t1, 16);
++      } else {
++        __ ld_w(t0, SP, ((i - 15) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 16) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 7) & 0xF) * 4);
++        __ add_w(t1, t1, t2);
++        __ rotri_w(t2, t0, 18);
++        __ srli_w(t3, t0, 3);
++        __ rotri_w(t0, t0, 7);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++        __ ld_w(t0, SP, ((i - 2) & 0xF) * 4);
++        __ rotri_w(t2, t0, 19);
++        __ srli_w(t3, t0, 10);
++        __ rotri_w(t0, t0, 17);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++      }
++
++      __ rotri_w(t2, e, 11);
++      __ rotri_w(t3, e, 25);
++      __ rotri_w(t0, e, 6);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ XOR(t2, g, f);
++      __ ld_w(t3, kptr, i * 4);
++      __ AND(t2, t2, e);
++      __ XOR(t2, t2, g);
++      __ add_w(t0, t0, t2);
++      __ add_w(t0, t0, t3);
++      __ add_w(h, h, t1);
++      __ add_w(h, h, t0);
++      __ add_w(d, d, h);
++      __ rotri_w(t2, a, 13);
++      __ rotri_w(t3, a, 22);
++      __ rotri_w(t0, a, 2);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ OR(t0, c, b);
++      __ AND(t2, c, b);
++      __ AND(t0, t0, a);
++      __ OR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ st_w(t1, SP, (i & 0xF) * 4);
++    }
++
++    // Add to state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ ld_w(t0, state, 16);
++    __ ld_w(t1, state, 20);
++    __ ld_w(t2, state, 24);
++    __ ld_w(t3, state, 28);
++    __ add_w(sa[4], sa[4], t0);
++    __ add_w(sa[5], sa[5], t1);
++    __ add_w(sa[6], sa[6], t2);
++    __ add_w(sa[7], sa[7], t3);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++    __ st_w(sa[5], state, 20);
++    __ st_w(sa[6], state, 24);
++    __ st_w(sa[7], state, 28);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32() {
++    assert(UseCRC32Intrinsics, "need CRC32 instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32C() {
++    assert(UseCRC32CIntrinsics, "need CRC32C instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_dsin_dcos(bool isCos) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin");
++    address start = __ pc();
++    __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw,
++                                 (address)StubRoutines::la::_two_over_pi,
++                                 (address)StubRoutines::la::_pio2,
++                                 (address)StubRoutines::la::_dsin_coef,
++                                 (address)StubRoutines::la::_dcos_coef);
++    return start;
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ ld_w(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld_d(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ add_d(V0, A1, R0);
++    __ jr(RA);
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ st_d(S0, SP, S0_off * wordSize);
++    __ st_d(S1, SP, S1_off * wordSize);
++    __ st_d(S2, SP, S2_off * wordSize);
++    __ st_d(S3, SP, S3_off * wordSize);
++    __ st_d(S4, SP, S4_off * wordSize);
++    __ st_d(S5, SP, S5_off * wordSize);
++    __ st_d(S6, SP, S6_off * wordSize);
++    __ st_d(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ st_d(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    Label before_call;
++    address the_pc = __ pc();
++    __ bind(before_call);
++    __ set_last_Java_frame(java_thread, SP, FP, before_call);
++    // Align stack
++    __ li(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    // Call runtime
++    // TODO: confirm reloc
++    __ call(runtime_entry, relocInfo::runtime_call_type);
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(the_pc - start,  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld_d(S0, SP, S0_off * wordSize);
++    __ ld_d(S1, SP, S1_off * wordSize);
++    __ ld_d(S2, SP, S2_off * wordSize);
++    __ ld_d(S3, SP, S3_off * wordSize);
++    __ ld_d(S4, SP, S4_off * wordSize);
++    __ ld_d(S5, SP, S5_off * wordSize);
++    __ ld_d(S6, SP, S6_off * wordSize);
++    __ ld_d(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
++
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
++      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
++
++    bool _squaring;
++
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = A0;
++      Pa_base = reg;      // Argument registers:
++      if (squaring)
++        Pb_base = Pa_base;
++      else
++        Pb_base = ++reg;
++      Pn_base = ++reg;
++      Rlen = ++reg;
++      inv = ++reg;
++      Rlen2 = inv;        // Reuse inv
++      Pm_base = ++reg;
++
++                          // Working registers:
++      Ra = ++reg;         // The current digit of a, b, n, and m.
++      Rb = ++reg;
++      Rm = ++reg;
++      Rn = ++reg;
++
++      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
++      Ibn = ++reg;
++
++      t0 = ++reg;         // Three registers which form a
++      t1 = ++reg;         // triple-precision accumuator.
++      t2 = ++reg;
++
++      Ri = ++reg;         // Inner and outer loop indexes.
++      Rj = ++reg;
++
++      if (squaring) {
++        Rhi_ab = ++reg;   // Product registers: low and high parts
++        reg = S0;
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      } else {
++        reg = S0;
++        Rhi_ab = reg;     // Product registers: low and high parts
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      }
++
++      Rhi_mn = ++reg;
++      Rlo_mn = ++reg;
++    }
++
++  private:
++    void enter() {
++      addi_d(SP, SP, -6 * wordSize);
++      st_d(FP, SP, 0 * wordSize);
++      move(FP, SP);
++    }
++
++    void leave() {
++      addi_d(T0, FP, 6 * wordSize);
++      ld_d(FP, FP, 0 * wordSize);
++      move(SP, T0);
++    }
++
++    void save_regs() {
++      if (!_squaring)
++        st_d(Rhi_ab, FP, 5 * wordSize);
++      st_d(Rlo_ab, FP, 4 * wordSize);
++      st_d(Rhi_mn, FP, 3 * wordSize);
++      st_d(Rlo_mn, FP, 2 * wordSize);
++      st_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    void restore_regs() {
++      if (!_squaring)
++        ld_d(Rhi_ab, FP, 5 * wordSize);
++      ld_d(Rlo_ab, FP, 4 * wordSize);
++      ld_d(Rhi_mn, FP, 3 * wordSize);
++      ld_d(Rlo_mn, FP, 2 * wordSize);
++      ld_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    void acc(Register Rhi, Register Rlo,
++             Register t0, Register t1, Register t2, Register t, Register c) {
++      add_d(t0, t0, Rlo);
++      OR(t, t1, Rhi);
++      sltu(c, t0, Rlo);
++      add_d(t1, t1, Rhi);
++      add_d(t1, t1, c);
++      sltu(c, t1, t);
++      add_d(t2, t2, c);
++    }
++
++    void pre1(Register i) {
++      block_comment("pre1");
++      // Iam = 0;
++      // Ibn = i;
++
++      slli_w(Ibn, i, LogBytesPerWord);
++
++      // Ra = Pa_base[Iam];
++      // Rb = Pb_base[Ibn];
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++
++      ld_d(Ra, Pa_base, 0);
++      ldx_d(Rb, Pb_base, Ibn);
++      ld_d(Rm, Pm_base, 0);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Iam, R0);
++
++      // Zero the m*n result.
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, t0, t1, t2);
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
++                                               // previous iteration.
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++      mul_d(Rlo_mn, Rm, Rn);
++      mulh_du(Rhi_mn, Rm, Rn);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++    }
++
++    void post1() {
++      block_comment("post1");
++
++      // MACC(Ra, Rb, t0, t1, t2);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
++
++      // Pm_base[Iam] = Rm = t0 * inv;
++      mul_d(Rm, t0, inv);
++      stx_d(Rm, Pm_base, Iam);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // t0 = t1; t1 = t2; t2 = 0;
++      mulh_du(Rhi_mn, Rm, Rn);
++
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
++      {
++        mul_d(Rlo_mn, Rm, Rn);
++        add_d(Rlo_mn, t0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      // We have very carefully set things up so that
++      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -t0.  t0 + (-t0) must generate a carry iff
++      // t0 != 0.  So, rather than do a mul and an adds we just set
++      // the carry flag iff t0 is nonzero.
++      //
++      // mul_d(Rlo_mn, Rm, Rn);
++      // add_d(t0, t0, Rlo_mn);
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, R0, t0);
++      add_d(t0, t1, Rhi_mn);
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++
++      // Rj == i-len
++      sub_w(Rj, i, len);
++
++      // Iam = i - len;
++      // Ibn = len;
++      slli_w(Iam, Rj, LogBytesPerWord);
++      slli_w(Ibn, len, LogBytesPerWord);
++
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      // Rm = Pm_base[++Iam];
++      // Rn = Pn_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    void post2(Register i, Register len) {
++      block_comment("post2");
++
++      sub_w(Rj, i, len);
++      slli_w(Iam, Rj, LogBytesPerWord);
++
++      add_d(t0, t0, Rlo_mn); // The pending m*n, low part
++
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = t0;
++      stx_d(t0, Pm_base, Iam);
++
++      // t0 = t1; t1 = t2; t2 = 0;
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, t0, Rlo_mn);
++      add_d(t0, t1, Rhi_mn); // The pending m*n, high part
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    // A carry in t0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (t0)
++      //   t0 = sub(Pm_base, Pn_base, t0, len);
++      Label loop, post, again;
++      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
++      beqz(t0, post); {
++        bind(again); {
++          move(i, R0);
++          move(b, R0);
++          slli_w(cnt, len, LogBytesPerWord);
++          align(16);
++          bind(loop); {
++            ldx_d(Rm, Pm_base, i);
++            ldx_d(Rn, Pn_base, i);
++            sltu(t, Rm, b);
++            sub_d(Rm, Rm, b);
++            sltu(b, Rm, Rn);
++            sub_d(Rm, Rm, Rn);
++            OR(b, b, t);
++            stx_d(Rm, Pm_base, i);
++            addi_w(i, i, BytesPerWord);
++          } blt(i, cnt, loop);
++          sub_d(t0, t0, b);
++        } bnez(t0, again);
++      } bind(post);
++    }
++
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2, tmp3
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
++
++      alsl_d(s, len, s, LogBytesPerWord - 1);
++      move(tmp1, len);
++      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli_w(s, len, LogBytesPerWord);
++      sub_d(s, d, s);
++    }
++
++    // where
++    void reverse1(Register d, Register s, Register tmp) {
++      ld_d(tmp, s, -wordSize);
++      addi_d(s, s, -wordSize);
++      addi_d(d, d, wordSize);
++      rotri_d(tmp, tmp, 32);
++      st_d(tmp, d, -wordSize);
++    }
++
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   A0   - int array elements a
++     *   A1   - int array elements b
++     *   A2   - int array elements n (the modulus)
++     *   A3   - int length
++     *   A4   - int inv
++     *   A5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   A0   - int array elements a
++     *   A1   - int array elements n (the modulus)
++     *   A2   - int length
++     *   A3   - int inv
++     *   A4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
++
++      align(CodeEntryAlignment);
++      address entry = pc();
++
++      beqz(Rlen, nothing);
++
++      enter();
++
++      // Make room.
++      sltui(Ra, Rlen, 513);
++      beqz(Ra, argh);
++      slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint)));
++      sub_d(Ra, SP, Ra);
++
++      srli_w(Rlen, Rlen, 1); // length in longwords = len/2
++
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, t0, t1);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, t0, t1);
++        reverse(Ra, Pn_base, Rlen, t0, t1);
++      }
++
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
++
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld_d(Rn, Pn_base, 0);
++        li(t0, -1);
++        mul_d(Rlo_mn, Rn, inv);
++        Label ok;
++        beq(Rlo_mn, t0, ok); {
++          stop("broken inverse in Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      move(Pm_base, Ra);
++
++      move(t0, R0);
++      move(t1, R0);
++      move(t2, R0);
++
++      block_comment("for (int i = 0; i < len; i++) {");
++      move(Ri, R0); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
++
++        bind(loop);
++        pre1(Ri);
++
++        block_comment("  for (j = i; j; j--) {"); {
++          move(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post1();
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
++
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      move(Ri, Rlen);
++      slli_w(Rlen2, Rlen, 1); {
++        Label loop, end;
++        bge(Ri, Rlen2, end);
++
++        bind(loop);
++        pre2(Ri, Rlen);
++
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          sub_w(Rj, Rlen2, Ri);
++          addi_w(Rj, Rj, -1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post2(Ri, Rlen);
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen2, loop);
++        bind(end);
++      }
++      block_comment("} // i");
++
++      normalize(Rlen);
++
++      move(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, t0, t1);
++
++      leave();
++      bind(nothing);
++      jr(RA);
++
++      return entry;
++    }
++    // In C, approximately:
++
++    // void
++    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
++    //                     unsigned long Pn_base[], unsigned long Pm_base[],
++    //                     unsigned long inv, int len) {
++    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++    //   unsigned long Ra, Rb, Rn, Rm;
++    //   int i, Iam, Ibn;
++
++    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
++
++    //   for (i = 0; i < len; i++) {
++    //     int j;
++
++    //     Iam = 0;
++    //     Ibn = i;
++
++    //     Ra = Pa_base[Iam];
++    //     Rb = Pb_base[Iam];
++    //     Rm = Pm_base[Ibn];
++    //     Rn = Pn_base[Ibn];
++
++    //     int iters = i;
++    //     for (j = 0; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
++    //     MACC(Ra, Rb, t0, t1, t2);
++    //     Pm_base[Iam] = Rm = t0 * inv;
++    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
++    //     MACC(Rm, Rn, t0, t1, t2);
++
++    //     assert(t0 == 0, "broken Montgomery multiply");
++
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   for (i = len; i < 2*len; i++) {
++    //     int j;
++
++    //     Iam = i - len;
++    //     Ibn = len;
++
++    //     Ra = Pa_base[++Iam];
++    //     Rb = Pb_base[--Ibn];
++    //     Rm = Pm_base[++Iam];
++    //     Rn = Pn_base[--Ibn];
++
++    //     int iters = len*2-i-1;
++    //     for (j = i-len+1; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = Pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     Pm_base[i-len] = t0;
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   while (t0)
++    //     t0 = sub(Pm_base, Pn_base, t0, len);
++    // }
++  };
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
++      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
++    }
++
++    if (UseCRC32CIntrinsics) {
++      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
++    }
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
++      StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
++    }
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
++      StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
++    }
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
++
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
++    }
++
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
++      // We use generate_multiply() rather than generate_square()
++      // because it's faster for the sizes of modulus we care about.
++      StubRoutines::_montgomerySquare = g.generate_multiply();
++    }
++#endif
++
++    if (UseAESIntrinsics) {
++      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
++      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false);
++      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true);
++      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true);
++    }
++
++    if (UseSHA1Intrinsics) {
++      generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB);
++    }
++
++    if (UseSHA256Intrinsics) {
++      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
++    }
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
+new file mode 100644
+index 0000000000..0ab07e1e9e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
+@@ -0,0 +1,67 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 60000    // simply increase if too small (assembler will crash if too small)
++};
++
++class la {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++  static juint   _crc_table[];
++  // begin trigonometric tables block. See comments in .cpp file
++  static juint    _npio2_hw[];
++  static jdouble   _two_over_pi[];
++  static jdouble   _pio2[];
++  static jdouble   _dsin_coef[];
++  static jdouble  _dcos_coef[];
++  // end trigonometric tables block
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+new file mode 100644
+index 0000000000..1a6ea3bcde
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+@@ -0,0 +1,178 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::la::_call_stub_compiled_return                       = NULL;
++
++/**
++ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
++ */
++juint StubRoutines::la::_crc_table[] =
++{
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL
++};
++
++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = {
++    // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2,
++    // pio2_2t, pio2_3, pio2_3t
++    // This is a small optimization wich keeping double[8] values in int[] table
++    // to have less address calculation instructions
++    //
++    // invpio2:  53 bits of 2/pi (enough for cases when trigonometric argument is small)
++    // pio2_1:   first  33 bit of pi/2
++    // pio2_1t:  pi/2 - pio2_1
++    // pio2_2:   second 33 bit of pi/2
++    // pio2_2t:  pi/2 - (pio2_1+pio2_2)
++    // pio2_3:   third  33 bit of pi/2
++    // pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
++    0x00000000, 0x3fe00000, // 0.5
++    0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01
++    0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00
++    0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11
++    0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11
++    0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21
++    0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21
++    0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32
++    // now, npio2_hw itself
++    0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
++    0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
++    0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
++    0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
++    0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
++    0x404858EB, 0x404921FB
++};
++
++// Coefficients for sin(x) polynomial approximation: S1..S6.
++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = {
++    -1.66666666666666324348e-01, // 0xBFC5555555555549
++     8.33333333332248946124e-03, // 0x3F8111111110F8A6
++    -1.98412698298579493134e-04, // 0xBF2A01A019C161D5
++     2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D
++    -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB
++     1.58969099521155010221e-10  // 0x3DE5D93A5ACFD57C
++};
++
++// Coefficients for cos(x) polynomial approximation: C1..C6.
++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = {
++     4.16666666666666019037e-02, // c0x3FA555555555554C
++    -1.38888888888741095749e-03, // 0xBF56C16C16C15177
++     2.48015872894767294178e-05, // 0x3EFA01A019CB1590
++    -2.75573143513906633035e-07, // 0xBE927E4F809C52AD
++     2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4
++    -1.13596475577881948265e-11  // 0xBDA8FAE9BE8838D4
++};
++
++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi.
++// Used in cases of very large argument. 396 hex digits is enough to support
++// required precision.
++// Converted to double to avoid unnecessary conversion in code
++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with
++//       only (double) conversion added
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = {
++  (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62,
++  (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A,
++  (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129,
++  (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41,
++  (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8,
++  (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF,
++  (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5,
++  (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08,
++  (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3,
++  (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880,
++  (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B,
++};
++
++// Pi over 2 value
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = {
++  1.57079625129699707031e+00, // 0x3FF921FB40000000
++  7.54978941586159635335e-08, // 0x3E74442D00000000
++  5.39030252995776476554e-15, // 0x3CF8469880000000
++  3.28200341580791294123e-22, // 0x3B78CC5160000000
++  1.27065575308067607349e-29, // 0x39F01B8380000000
++  1.22933308981111328932e-36, // 0x387A252040000000
++  2.73370053816464559624e-44, // 0x36E3822280000000
++  2.16741683877804819444e-51, // 0x3569F31D00000000
++};
+diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+new file mode 100644
+index 0000000000..be1d28d4b8
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+@@ -0,0 +1,2269 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ addi_d(SP, SP, -18 * wordSize);
++  __ st_d(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     18 stack arg0   <--- old sp
++  //     17 floatReg arg7
++  //        ...
++  //     10 floatReg arg0
++  //      9 float/double identifiers
++  //      8 IntReg arg7
++  //        ...
++  //      2 IntReg arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use A3 as temp
++  __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  for (int i= 0; i < Argument::n_float_register_parameters; i++) {
++    FloatRegister floatreg = as_FloatRegister(i + FA0->encoding());
++    Label isdouble, done;
++
++    __ andi(AT, A3, 1 << i);
++    __ bnez(AT, isdouble);
++    __ fld_s(floatreg, SP, (10 + i) * wordSize);
++    __ b(done);
++    __ bind(isdouble);
++    __ fld_d(floatreg, SP, (10 + i) * wordSize);
++    __ bind(done);
++  }
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for (int i= 1; i < Argument::n_register_parameters; i++) {
++    Register reg = as_Register(i + A0->encoding());
++    __ ld_d(reg, SP, (1 + i) * wordSize);
++  }
++
++  // A0/V0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  __ ld_d(RA, SP, 0);
++  __ addi_d(SP, SP, 18 * wordSize);
++  __ jr(RA);
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register val = A1;  // source java byte value
++    const Register tbl = A2;  // scratch
++
++    // Arguments are reversed on java expression stack
++    __ ld_w(val, SP, 0);              // byte value
++    __ ld_w(crc, SP, wordSize);       // Initial CRC
++
++    __ li(tbl, (long)StubRoutines::crc_table_addr());
++
++    __ nor(crc, crc, R0); // ~crc
++    __ update_byte_crc32(crc, val, tbl);
++    __ nor(crc, crc, R0); // ~crc
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    const Register off = len; // offset (never overlaps with 'len')
++
++    // Arguments are reversed on java expression stack
++    // Calculate address of start element
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // long crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // long crc
++    }
++
++    // Can now load 'len' since we're finished with 'off'
++    __ ld_w(len, SP, 0); // length
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++
++    const Register crc = A0; // initial crc
++    const Register buf = A1; // source java byte array address
++    const Register len = A2; // len argument to the kernel
++    const Register tmp = A3;
++
++    const Register end = len; // index of last element to process
++    const Register off = crc; // offset
++
++    __ ld_w(end, SP, 0);              // int end
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ sub_w(len, end, off);          // calculate length
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // int crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // int crc
++    }
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fabs_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fsqrt_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 1);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize);
++    __ fld_d(FA1, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 2);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize);
++      __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize);
++      __ fld_d(FA2, SP, 0);
++      __ fmadd_d(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize);
++      __ fld_s(FA1, SP, Interpreter::stackElementSize);
++      __ fld_s(FA2, SP, 0);
++      __ fmadd_s(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T4, fn);
++  __ jalr(T4);
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bge(T1, R0, L);     // check if frame is complete
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T4
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T4;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T4;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T4;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // NULL last_sp until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++
++#if INCLUDE_JVMCI
++  // Check if we need to take lock at entry of synchronized method.  This can
++  // only occur on method entry so emit it only for vtos with step 0.
++  if (EnableJVMCI && state == vtos && step == 0) {
++    Label L;
++    __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    __ beqz(AT, L);
++    // Clear flag.
++    __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    // Take lock.
++    lock_method();
++    __ bind(L);
++  } else {
++#ifdef ASSERT
++    if (EnableJVMCI) {
++      Label L;
++      __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++      __ beqz(AT, L);
++      __ stop("unexpected pending monitor in deopt entry");
++      __ bind(L);
++    }
++#endif
++  }
++#endif
++
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);                break;
++    case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++    case T_BYTE   : __ sign_extend_byte (V0);     break;
++    case T_SHORT  : __ sign_extend_short(V0);     break;
++    case T_INT    : /* nothing to do */           break;
++    case T_FLOAT  : /* nothing to do */           break;
++    case T_DOUBLE : /* nothing to do */           break;
++    case T_OBJECT :
++    {
++      __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++    break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// prerequisites : method in T0, invocation counter in T3
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld_d(FSR, Address(Rmethod, Method::method_data_offset()));
++      __ beqz(FSR, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++      __ b(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(FSR,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(Rmethod, FSR, done);
++    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++
++    __ get_method_counters(Rmethod, FSR, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++      __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++      __ addi_d(T4, T4, 1);
++      __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ ld_w(T3, invocation_counter);
++    __ increment(T3, InvocationCounter::count_increment);
++    __ st_w(T3, invocation_counter);  // save invocation count
++
++    __ ld_w(FSR, backedge_counter);  // load backedge counter
++    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++    __ andr(FSR, FSR, AT);
++
++    __ add_d(T3, T3, FSR);          // add both counters
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++        __ bne_far(AT, R0, *profile_method_continue);
++      } else {
++        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++        __ ld_w(AT, AT, 0);
++        __ blt_far(T3, AT, *profile_method_continue, true /* signed */);
++      }
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(FSR, *profile_method);
++    }
++
++    if (Assembler::is_simm(CompileThreshold, 12)) {
++      __ srli_w(AT, T3, InvocationCounter::count_shift);
++      __ slti(AT, AT, CompileThreshold);
++      __ beq_far(AT, R0, *overflow);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++      __ ld_w(AT, AT, 0);
++      __ bge_far(T3, AT, *overflow, true /* signed */);
++    }
++
++    __ bind(done);
++  }
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld_d(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bge(AT, T2, after_frame_check);
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ slli_d(T3, T2, Interpreter::logStackElementSize);
++  __ addi_d(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ add_d(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ sub_d(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ li(AT, max_bang_size);
++  __ add_d(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ blt(T3, SP, after_frame_check);
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ load_mirror(T0, Rmethod, T4);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ addi_d(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ addi_d(SP, SP, (-frame_size) * wordSize);
++  __ st_d(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ st_d(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ addi_d(FP, SP, (frame_size - 2) * wordSize);
++  __ st_d(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ st_d(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ st_d(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ st_d(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T4);
++  __ st_d(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld_d(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ addi_d(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ st_d(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ st_d(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ st_d(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ st_d(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ st_d(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ st_d(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // Rmethod: Method*
++  // Rsender: senderSP must preserve for slow path, set SP to it on fast path
++  // RA is live. It must be saved around calls.
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++
++  Label slow_path;
++  const Register local_0 = A0;
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ld_d(local_0, Address(SP, 0));
++  __ beqz(local_0, slow_path);
++
++  // Load the value of the referent field.
++  const Address field_address(local_0, referent_offset);
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg);
++
++  // areturn
++  __ move(SP, Rsender);
++  __ jr(RA);
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld_d(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ slli_d(LVP, V0, Address::times_8);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0       ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ ld_w(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld_d(t,method,in_bytes(Method::const_offset()));
++    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // LoongArch ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ addi_d(t, t, 1);
++    __ bind(Lstatic);
++    __ addi_d(t, t, -7);
++    __ bge(R0, t, L);
++    __ slli_d(t, t, Address::times_8);
++    __ sub_d(SP, SP, t);
++    __ bind(L);
++  }
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [                          ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T4, R0, L);
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T4);
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to LoongArch abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++
++    // get mirror
++    __ load_mirror(t, method, T4);
++    // copy mirror into activation frame
++    __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(T6, T4, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T4
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // Set the last Java PC in the frame anchor to be the return address from
++  // the call to the native method: this will allow the debugger to
++  // generate an accurate stack trace.
++  Label native_return;
++  __ set_last_Java_frame(thread, SP, FP, native_return);
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ addi_d(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ li(t, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T4);
++  __ bind(native_return);
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(t, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ li(t, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T4);
++    __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ li(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld_d(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      // address of first monitor
++
++      __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++
++
++  // remove activation
++  __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld_d(RA, FP, frame::java_frame_return_addr_offset * wordSize); // get return address
++  __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    BLOCK_COMMENT("bang_stack_shadow_pages:");
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ sub_d(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ slli_d(LVP, V0, LogBytesPerWord);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++
++    __ bind(loop);
++    __ addi_d(SP, SP, (-1) * wordSize);
++    __ addi_d(T2, T2, -1);               // until everything initialized
++    __ st_d(R0, SP, 0);                  // initialize local variables
++    __ bne(T2, R0, loop);
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T4
++  // tmp2: T2
++   __ profile_parameters_type(T8, T4, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld_d(A0, FP, frame::java_frame_return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld_d(A1, A1, in_bytes(Method::const_offset()));
++    __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub_d(A2, LVP, A1);
++    __ addi_d(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T4, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T4);
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  Label L;
++  __ bind(L);
++  __ set_last_Java_frame(thread, noreg, FP, L);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ ld_bu(AT, BCP, 0);
++    __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T4);
++    __ ld_d(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP);
++
++    __ beq(T8, R0, L_done);
++
++    __ st_d(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T4, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T4);                                   // jump to exception handler of caller
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ li(AT, JvmtiThreadState::earlyret_inactive);
++  __ st_w(AT, cond_addr);
++  __ membar(__ AnyAny);//no membar here for aarch64
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ membar(__ AnyAny);
++  __ jr(T0);
++
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L);
++  dep = __ pc(); __ push(dtos); __ b(L);
++  lep = __ pc(); __ push(ltos); __ b(L);
++  aep  =__ pc(); __ push(atos); __ b(L);
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but LA ABI calling convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld_d(A2, SP, 0);
++  __ ld_d(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ ld_w(T4, T8, 0);
++  __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T4, T4, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ st_w(T4, T8, 0);
++  __ slli_d(T4, T4, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ add_d(T8, T8, T4);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ ld_w(T8, T8, 0);
++  __ li(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ brk(5);
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
+new file mode 100644
+index 0000000000..ddb38faf44
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
+new file mode 100644
+index 0000000000..4f1d226a1a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
+@@ -0,0 +1,4115 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T6 RT6
++#define T8 RT8
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No LoongArch specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().
++// It isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T4, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T4, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ addi_d(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ li(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ ld_bu(tmp_reg, at_bcp(0));
++    __ li(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ ld_bu(tmp_reg, at_bcp(0));
++  __ li(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ st_b(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ movgr2fr_w(FSF, R0);    return;
++    case 1:  __ addi_d(AT, R0, 1); break;
++    case 2:  __ addi_d(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ movgr2fr_w(FSF, AT);
++  __ ffint_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ movgr2fr_d(FSF, R0);
++             return;
++    case 1:  __ addi_d(AT, R0, 1);
++             __ movgr2fr_d(FSF, AT);
++             __ ffint_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, BCP, 1);
++  __ ld_bu(AT, BCP, 2);
++  __ slli_d(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ ld_bu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ addi_d(AT, T1, - JVM_CONSTANT_Class);
++  __ slli_d(T2, T2, Address::times_8);
++  __ bne(AT, R0, notClass);
++
++  __ bind(call_ldc);
++  __ li(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ st_d(FSR, SP, 0);
++  __ b(Done);
++
++  __ bind(notClass);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  // ftos
++  __ add_d(AT, T3, T2);
++  __ fld_s(FSF, AT, base_offset);
++  //__ push_f();
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ fst_s(FSF, SP, 0);
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  // itos
++  __ add_d(T0, T3, T2);
++  __ ld_w(FSR, T0, base_offset);
++  __ push(itos);
++  __ b(Done);
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ li(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ li(AT, ConstantPoolCacheEntry::field_index_mask);
++  __ andr(off, flags, AT);
++  __ add_d(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ addi_d(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      // itos
++      __ ld_d(obj, field);
++      __ push(itos);
++      __ b(Done);
++
++      __ bind(notInt);
++      __ addi_d(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      // ftos
++      __ fld_s(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++
++      __ bind(notFloat);
++      __ addi_d(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      // stos
++      __ ld_h(obj, field);
++      __ push(stos);
++      __ b(Done);
++
++      __ bind(notShort);
++      __ addi_d(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      // btos
++      __ ld_b(obj, field);
++      __ push(btos);
++      __ b(Done);
++
++      __ bind(notByte);
++      __ addi_d(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      // ctos
++      __ ld_hu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++
++      __ bind(notChar);
++      __ addi_d(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      // ztos
++      __ ld_bu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ addi_d(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      // ltos
++      __ ld_d(obj, field);
++      __ push(ltos);
++      __ b(Done);
++
++      __ bind(notLong);
++      __ addi_d(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      // dtos
++      __ fld_d(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T4);
++  __ bne(result, R0, resolved);
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ li(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ li(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ bne(tmp, result, notNull);
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++
++  __ addi_d(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++
++  // dtos
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ fld_d(FSF, AT, base_offset);
++  __ push(dtos);
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++
++  // ltos
++  __ slli_d(T2, T2, Address::times_8);
++  __ add_d(AT, T3, T2);
++  __ ld_d(FSR, AT, base_offset);
++  __ push(ltos);
++  __ b(Done);
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ ld_bu(reg, at_bcp(offset));
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ li(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++
++    __ li(T3, Bytecodes::_fast_iload2);
++    __ li(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++
++    // if _caload, rewrite to fast_icaload
++    __ li(T3, Bytecodes::_fast_icaload);
++    __ li(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++
++    // rewrite so iload doesn't check again.
++    __ li(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ slli_w(index, index, 0);
++
++  // check index
++  Label ok;
++  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ bltu(index, AT, ok);
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ bind(ok);
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 1);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, Address::times_8 - 1);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, 2);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ add_d(FSR, SSR, FSR);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 0);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ ld_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld_d(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ fld_s(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ fld_d(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld_d(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ li(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_iaccess_0);
++    __ li(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aaccess_0);
++    __ li(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_faccess_0);
++    __ li(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ fst_s(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ fst_d(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  index_check(T2, SSR);  // prefer index in SSR
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld_d(FSR, at_tos());     // Value
++  __ ld_w(SSR, at_tos_p1());  // Index
++  __ ld_d(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld_d(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T4);
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ addi_d(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(T4, T2);
++  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
++
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ li(AT, diffbit);
++
++  Label L_skip;
++  __ andr(AT, T4, AT);
++  __ beq(AT, R0, L_skip);
++  __ andi(FSR, FSR, 0x1);
++  __ bind(L_skip);
++
++  __ add_d(SSR, T2, SSR);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ st_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ st_d(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ fst_s(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ fst_d(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ st_d(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ add_w(FSR, SSR, FSR); break;
++    case sub  : __ sub_w(FSR, SSR, FSR); break;
++    case mul  : __ mul_w(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sll_w(FSR, SSR, FSR);   break;
++    case shr  : __ sra_w(FSR, SSR, FSR);   break;
++    case ushr : __ srl_w(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ add_d(FSR, T2, FSR); break;
++    case sub : __ sub_d(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  __ div_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++
++  __ bne(FSR, R0, not_zero);
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(not_zero);
++  __ mod_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  __ mul_d(FSR, T2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l(A2);
++  __ div_d(FSR, A2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  __ mod_d(FSR, A2, FSR);
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sll_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sra_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ srl_d(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ fld_s(fscratch, at_sp());
++      __ fadd_s(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_s(fscratch, at_sp());
++      __ fsub_s(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_s(fscratch, at_sp());
++      __ fmul_s(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_s(fscratch, at_sp());
++      __ fdiv_s(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_s(FA1, FSF);
++      __ fld_s(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ fld_d(fscratch, at_sp());
++      __ fadd_d(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_d(fscratch, at_sp());
++      __ fsub_d(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_d(fscratch, at_sp());
++      __ fmul_d(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_d(fscratch, at_sp());
++      __ fdiv_d(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_d(FA1, FSF);
++      __ fld_d(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ sub_w(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ sub_d(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ fneg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ fneg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ ld_b(AT, at_bcp(2));           // get constant
++  __ add_d(FSR, FSR, AT);
++  __ st_w(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ ld_w(AT, T2, 0);
++  __ add_d(FSR, AT, FSR);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_d(FSR, FSR, 15, 0);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ sub_d(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  if (is_float) {
++    __ fld_s(fscratch, at_sp());
++    __ addi_d(SP, SP, 1 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_s(FCC0, FSF, fscratch);
++      __ fcmp_cult_s(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_s(FCC0, FSF, fscratch);
++      __ fcmp_clt_s(FCC1, fscratch, FSF);
++    }
++  } else {
++    __ fld_d(fscratch, at_sp());
++    __ addi_d(SP, SP, 2 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_d(FCC0, FSF, fscratch);
++      __ fcmp_cult_d(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_d(FCC0, FSF, fscratch);
++      __ fcmp_clt_d(FCC1, fscratch, FSF);
++    }
++  }
++
++  __ movcf2gr(FSR, FCC0);
++  __ movcf2gr(AT, FCC1);
++  __ sub_d(FSR, FSR, AT);
++}
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ ld_b(A7, BCP, 1);
++    __ ld_bu(AT, BCP, 2);
++    __ slli_d(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ ldx_bu(Rnext, BCP, A7);
++
++    // compute return address as bci in FSR
++    __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld_d(AT, T3, in_bytes(Method::const_offset()));
++    __ sub_d(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ add_d(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ add_d(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ blt(R0, A7, dispatch);  // check if forward or backward branch
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ push2(T3, A7);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop2(T3, A7);
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld_d(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld_d(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ ld_w(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ st_w(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ ld_w(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ add_d(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++          __ bne(AT, R0, dispatch);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ ld_w(AT, AT, 0);
++          __ blt(T1, AT, dispatch);
++        }
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++            __ bne(AT, R0, dispatch);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ ld_w(AT, AT, 0);
++            __ blt(T2, AT, dispatch);
++          }
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ ld_w(AT, AT, 0);
++          __ bge(T1, AT, backedge_counter_overflow);
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ ld_bu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ sub_d(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ ld_b(T3, V0, nmethod::state_offset());
++      __ li(AT, nmethod::in_use);
++      __ bne(AT, T3, dispatch);
++
++      // We have the address of an on stack replacement routine in rax.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in Rnext.
++      __ move(Rnext, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ li(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
++      __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++    }
++  }
++}
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bge(FSR, R0, not_taken);
++      break;
++    case less_equal:
++      __ blt(R0, FSR, not_taken);
++      break;
++    case greater:
++      __ bge(R0, FSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(FSR, R0, not_taken);
++      break;
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ bge(SSR, FSR, not_taken);
++      break;
++    case less_equal:
++      __ blt(FSR, SSR, not_taken);
++      break;
++    case greater:
++      __ bge(FSR, SSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(SSR, FSR, not_taken);
++      break;
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ ld_w(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld_d(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld_d(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ ld_w(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ ld_w(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ blt(FSR, T3, default_case);
++  __ blt(A7, FSR, default_case);
++
++  // lookup dispatch offset, in A7 big endian
++  __ sub_d(FSR, FSR, T3);
++  __ alsl_d(AT, FSR, T2, Address::times_4 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T4, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ ld_w(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++
++  __ bind(loop_entry);
++  Label L1;
++  __ bge(R0, T3, L1);
++  __ addi_d(T3, T3, -1);
++  __ b(loop);
++  __ bind(L1);
++  __ addi_d(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++
++  // entry found -> get offset
++  __ bind(found);
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ addi_d(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ ld_w(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ add_d(h, i, j);
++    __ srli_d(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ alsl_d(AT, h, array, Address::times_8 - 1);
++    __ ld_w(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ maskeqz(i, i, AT);
++    __ masknez(temp, h, AT);
++    __ OR(i, i, temp);
++    __ masknez(j, j, AT);
++    __ maskeqz(temp, h, AT);
++    __ OR(j, j, temp);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ addi_d(h, i, 1);
++    __ blt(h, j, loop);
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++
++  // entry found -> j = offset
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ ld_w(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld_d(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ li(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ ld_b(AT, thread, in_bytes(Thread::polling_page_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T4);
++  __ membar(__ StoreStore);
++
++  __ jr(T4);
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addi_d(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ li(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++//END: LA
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_d(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T4);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld_d(itable_index, AT, index_offset);
++  }
++  __ ld_d(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld_d(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ add_d(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1);
++      __ ld_d(tmp3, AT, in_bytes(cp_base_offset +
++                                 ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ li(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ li(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++      __ b(valsize_known);
++
++      __ bind(two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld_d(tmp1, tmp1, 0 * wordSize);
++    }
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ztos
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // itos
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // atos
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ctos
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // stos
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ltos
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ftos
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++
++  // dtos
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // test for volatile with T1
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ add_d(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // replace index with field offset from cache entry
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ add_d(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld_d(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi_d(BCP, BCP, 1);
++  __ null_check(T1);
++  __ add_d(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ addi_d(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ li(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld_d(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ alsl_d(AT, flags, AT, LogBytesPerWord - 1);
++    __ ld_d(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T4: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T4, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  __ lookup_virtual_method(T2, index, method);
++  __ profile_arguments_type(T2, method, T4, true);
++  __ jump_from_interpreted(method, T2);
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T4 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on LoongArch64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ sub_w(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T4: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T4 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T4);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T4: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T4, true);
++
++  __ jump_from_interpreted(T2_method, T4);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T4);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T4: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T4, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T4);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add_d(T1, T1, A2);
++  __ ld_b(AT, T1, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++
++  // has_finalizer
++  __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case);
++    __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ li(T1, (long)Universe::heap()->top_addr());
++    __ ld_d(FSR, heap_top);
++
++    __ bind(retry);
++    __ li(AT, (long)Universe::heap()->end_addr());
++    __ ld_d(AT, AT, 0);
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, slow_case);
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ li(AT, - sizeof(oopDesc));
++    __ add_d(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++       Label loop;
++       __ add_d(T1, FSR, T0);
++
++       __ bind(loop);
++       __ addi_d(T1, T1, -oopSize);
++       __ st_d(R0, T1, sizeof(oopDesc));
++       __ bne(T1, FSR, loop); // dont clear header
++    }
++
++    // klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ li(AT, (long)markOopDesc::prototype());
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ ld_bu(A1, at_bcp(1));
++  // type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  __ move(FSR, R0);
++  // Come here on failure
++  __ b(done);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ li(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit, next;
++    __ ld_d(T2, monitor_block_top);
++    __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    // free slot?
++    __ bind(loop);
++    __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ bne(AT, R0, next);
++    __ move(c_rarg0, T2);
++
++    __ bind(next);
++    __ beq(FSR, AT, exit);
++    __ addi_d(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld_d(c_rarg0, monitor_block_top);
++    __ addi_d(SP, SP, -entry_size);
++    __ addi_d(c_rarg0, c_rarg0, -entry_size);
++    __ st_d(c_rarg0, monitor_block_top);
++    __ move(T3, SP);
++    __ b(entry);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld_d(AT, T3, entry_size);
++    __ st_d(AT, T3, 0);
++    __ addi_d(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ addi_d(BCP, BCP, 1);
++  __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    __ bind(loop);
++    __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ addi_d(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ ld_bu(Rnext, at_bcp(1));
++  __ slli_d(T4, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ add_d(AT, T4, AT);
++  __ ld_d(T4, AT, 0);
++  __ jr(T4);
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ ld_bu(A1, at_bcp(3));  // dimension
++  __ addi_d(A1, A1, -1);
++  __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ ld_bu(AT, at_bcp(3));
++  __ alsl_d(SP, AT, SP, Address::times_8 - 1);
++  __ membar(__ AnyAny);//no membar here for aarch64
++}
++#endif // !CC_INTERP
+diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
+new file mode 100644
+index 0000000000..5b9f7b7898
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
+new file mode 100644
+index 0000000000..eb8f075c71
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_loongarch.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features());
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
+new file mode 100644
+index 0000000000..1a93123134
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
+new file mode 100644
+index 0000000000..0a9b55d17e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
+@@ -0,0 +1,397 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#define T5 RT5
++
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(T5);
++
++    __ li(AT, (long)0);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ li(AT, 3);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset()));
++
++    __ li(AT, 4);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset()));
++
++    __ li(AT, 5);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset()));
++
++    __ li(AT, 6);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset()));
++
++    __ li(AT, 10);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset()));
++
++    __ li(AT, 11);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset()));
++
++    __ li(AT, 12);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset()));
++
++    __ li(AT, 13);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset()));
++
++    __ li(AT, 14);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset()));
++
++    __ pop(T5);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++#   undef __
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) {
++    result |= CPU_LA32;
++  } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) {
++    result |= CPU_LA64;
++  }
++
++  if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0)
++    result |= CPU_FP;
++
++  if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0)
++    result |= CPU_CCDMA;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0)
++    result |= CPU_LLDBAR;
++  if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0)
++    result |= CPU_SCDLY;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0)
++    result |= CPU_LLEXC;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  get_os_cpu_info();
++
++  get_cpu_info_stub(&_cpuid_info);
++  _features |= get_feature_flags_by_cpucfg();
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++  if (supports_lsx()) {
++    if (FLAG_IS_DEFAULT(UseLSX)) {
++      FLAG_SET_DEFAULT(UseLSX, true);
++    }
++  } else if (UseLSX) {
++    warning("LSX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLSX, false);
++  }
++
++  if (supports_lasx()) {
++    if (FLAG_IS_DEFAULT(UseLASX)) {
++      FLAG_SET_DEFAULT(UseLASX, true);
++    }
++  } else if (UseLASX) {
++    warning("LASX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++  if (UseLASX && !UseLSX) {
++    warning("LASX instructions depends on LSX, setting UseLASX to false");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++#ifdef COMPILER2
++  int max_vector_size = 0;
++  int min_vector_size = 0;
++  if (UseLASX) {
++    max_vector_size = 32;
++    min_vector_size = 16;
++  }
++  else if (UseLSX) {
++    max_vector_size = 16;
++    min_vector_size = 16;
++  }
++
++  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
++    if (MaxVectorSize == 0) {
++      // do nothing
++    } else if (MaxVectorSize > max_vector_size) {
++      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    } else if (MaxVectorSize < min_vector_size) {
++      warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
++    } else if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    }
++  } else {
++    // If default, use highest supported configuration
++    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++  }
++#endif
++
++  char buf[256];
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
++    "0x%lx, fp_ver: %d, lvz_ver: %d, ",
++    (is_la64()             ?  "la64"  : ""),
++    (is_la32()             ?  "la32"  : ""),
++    (supports_lsx()        ?  ", lsx" : ""),
++    (supports_lasx()       ?  ", lasx" : ""),
++    (supports_crypto()     ?  ", crypto" : ""),
++    (supports_lam()        ?  ", am" : ""),
++    (supports_ual()        ?  ", ual" : ""),
++    (supports_lldbar()     ?  ", lldbar" : ""),
++    (supports_scdly()      ?  ", scdly" : ""),
++    (supports_llexc()      ?  ", llexc" : ""),
++    (supports_lbt_x86()    ?  ", lbt_x86" : ""),
++    (supports_lbt_arm()    ?  ", lbt_arm" : ""),
++    (supports_lbt_mips()   ?  ", lbt_mips" : ""),
++    (needs_llsync()        ?  ", needs_llsync" : ""),
++    (needs_tgtsync()       ?  ", needs_tgtsync": ""),
++    (needs_ulsync()        ?  ", needs_ulsync": ""),
++    _cpuid_info.cpucfg_info_id0.bits.PRID,
++    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
++    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER);
++  _features_str = strdup(buf);
++
++  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
++  assert( is_la64(), "Should be LoongArch64");
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 3);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  // Basic instructions are used to implement SHA Intrinsics on LA, so sha
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseSHA)) {
++      FLAG_SET_DEFAULT(UseSHA, true);
++    }
++  } else if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++    }
++  } else if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++    }
++  } else if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++  }
++
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  // Basic instructions are used to implement AES Intrinsics on LA, so AES
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseAES)) {
++      FLAG_SET_DEFAULT(UseAES, true);
++    }
++  } else if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES))
++      warning("AES instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAES, false);
++  }
++
++  if (UseAES/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
++    }
++  } else if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
++      warning("AES intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++  }
++
++  if (UseAESCTRIntrinsics) {
++    warning("AES/CTR intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCRC32)) {
++    FLAG_SET_DEFAULT(UseCRC32, true);
++  }
++
++  if (UseCRC32) {
++    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      UseCRC32Intrinsics = true;
++    }
++
++    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
++      UseCRC32CIntrinsics = true;
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  // This machine allows unaligned memory accesses
++  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
++    FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
+new file mode 100644
+index 0000000000..00b8e608a1
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
+@@ -0,0 +1,292 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++  friend class JVMCIVMStructs;
++
++public:
++
++  union LoongArch_Cpucfg_Id0 {
++    uint32_t value;
++    struct {
++      uint32_t PRID      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t ARCH      : 2,
++               PGMMU     : 1,
++               IOCSR     : 1,
++               PALEN     : 8,
++               VALEN     : 8,
++               UAL       : 1, // unaligned access
++               RI        : 1,
++               EP        : 1,
++               RPLV      : 1,
++               HP        : 1,
++               IOCSR_BRD : 1,
++               MSG_INT   : 1,
++                         : 5;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG     : 1, // FP is used, use FP_CFG instead
++               FP_SP      : 1,
++               FP_DP      : 1,
++               FP_VER     : 3,
++               LSX        : 1,
++               LASX       : 1,
++               COMPLEX    : 1,
++               CRYPTO     : 1,
++               LVZ        : 1,
++               LVZ_VER    : 3,
++               LLFTP      : 1,
++               LLFTP_VER  : 3,
++               LBT_X86    : 1,
++               LBT_ARM    : 1,
++               LBT_MIPS   : 1,
++               LSPW       : 1,
++               LAM        : 1,
++                          : 9;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id3 {
++    uint32_t value;
++    struct {
++      uint32_t CCDMA      : 1,
++               SFB        : 1,
++               UCACC      : 1,
++               LLEXC      : 1,
++               SCDLY      : 1,
++               LLDBAR     : 1,
++               ITLBHMC    : 1,
++               ICHMC      : 1,
++               SPW_LVL    : 3,
++               SPW_HP_HF  : 1,
++               RVA        : 1,
++               RVAMAXM1   : 4,
++                          : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id4 {
++    uint32_t value;
++    struct {
++      uint32_t CC_FREQ      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id5 {
++    uint32_t value;
++    struct {
++      uint32_t CC_MUL      : 16,
++               CC_DIV      : 16;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id6 {
++    uint32_t value;
++    struct {
++      uint32_t PMP      : 1,
++               PMVER    : 3,
++               PMNUM    : 4,
++               PMBITS   : 6,
++               UPM      : 1,
++                        : 17;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id10 {
++    uint32_t value;
++    struct {
++      uint32_t L1IU_PRESENT    : 1,
++               L1IU_UNIFY      : 1,
++               L1D_PRESENT     : 1,
++               L2IU_PRESENT    : 1,
++               L2IU_UNIFY      : 1,
++               L2IU_PRIVATE    : 1,
++               L2IU_INCLUSIVE  : 1,
++               L2D_PRESENT     : 1,
++               L2D_PRIVATE     : 1,
++               L2D_INCLUSIVE   : 1,
++               L3IU_PRESENT    : 1,
++               L3IU_UNIFY      : 1,
++               L3IU_PRIVATE    : 1,
++               L3IU_INCLUSIVE  : 1,
++               L3D_PRESENT     : 1,
++               L3D_PRIVATE     : 1,
++               L3D_INCLUSIVE   : 1,
++                               : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id11 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id12 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id13 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id14 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LAM               = (1 << 1),
++    CPU_UAL               = (1 << 2),
++    CPU_LSX               = (1 << 4),
++    CPU_LASX              = (1 << 5),
++    CPU_COMPLEX           = (1 << 7),
++    CPU_CRYPTO            = (1 << 8),
++    CPU_LBT_X86           = (1 << 10),
++    CPU_LBT_ARM           = (1 << 11),
++    CPU_LBT_MIPS          = (1 << 12),
++    // flags above must follow Linux HWCAP
++    CPU_LA32              = (1 << 13),
++    CPU_LA64              = (1 << 14),
++    CPU_FP                = (1 << 15),
++    CPU_LLEXC             = (1 << 16),
++    CPU_SCDLY             = (1 << 17),
++    CPU_LLDBAR            = (1 << 18),
++    CPU_CCDMA             = (1 << 19),
++    CPU_LLSYNC            = (1 << 20),
++    CPU_TGTSYNC           = (1 << 21),
++    CPU_ULSYNC            = (1 << 22),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  static const char* _features_str;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    LoongArch_Cpucfg_Id0   cpucfg_info_id0;
++    LoongArch_Cpucfg_Id1   cpucfg_info_id1;
++    LoongArch_Cpucfg_Id2   cpucfg_info_id2;
++    LoongArch_Cpucfg_Id3   cpucfg_info_id3;
++    LoongArch_Cpucfg_Id4   cpucfg_info_id4;
++    LoongArch_Cpucfg_Id5   cpucfg_info_id5;
++    LoongArch_Cpucfg_Id6   cpucfg_info_id6;
++    LoongArch_Cpucfg_Id10  cpucfg_info_id10;
++    LoongArch_Cpucfg_Id11  cpucfg_info_id11;
++    LoongArch_Cpucfg_Id12  cpucfg_info_id12;
++    LoongArch_Cpucfg_Id13  cpucfg_info_id13;
++    LoongArch_Cpucfg_Id14  cpucfg_info_id14;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static void     get_processor_features();
++  static void     get_os_cpu_info();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); }
++  static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); }
++  static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); }
++  static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); }
++  static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); }
++
++  static void clean_cpuFeatures()   { _features = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool is_la32()             { return _features & CPU_LA32; }
++  static bool is_la64()             { return _features & CPU_LA64; }
++  static bool supports_crypto()     { return _features & CPU_CRYPTO; }
++  static bool supports_lsx()        { return _features & CPU_LSX; }
++  static bool supports_lasx()       { return _features & CPU_LASX; }
++  static bool supports_lam()        { return _features & CPU_LAM; }
++  static bool supports_llexc()      { return _features & CPU_LLEXC; }
++  static bool supports_scdly()      { return _features & CPU_SCDLY; }
++  static bool supports_lldbar()     { return _features & CPU_LLDBAR; }
++  static bool supports_ual()        { return _features & CPU_UAL; }
++  static bool supports_lbt_x86()    { return _features & CPU_LBT_X86; }
++  static bool supports_lbt_arm()    { return _features & CPU_LBT_ARM; }
++  static bool supports_lbt_mips()   { return _features & CPU_LBT_MIPS; }
++  static bool needs_llsync()        { return !supports_lldbar(); }
++  static bool needs_tgtsync()       { return 1; }
++  static bool needs_ulsync()        { return 1; }
++
++  static const char* cpu_features()           { return _features_str; }
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
+new file mode 100644
+index 0000000000..43caba5187
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = freg->name();
++    }
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
+new file mode 100644
+index 0000000000..819eaff0bb
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister() && is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if (is_FloatRegister()) {
++    int base = value() - ConcreteRegisterImpl::max_gpr;
++    return base % FloatRegisterImpl::max_slots_per_register == 0;
++  } else {
++    return is_even(value());
++  }
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..edb78e36da
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
+new file mode 100644
+index 0000000000..2c4b60653b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
+@@ -0,0 +1,322 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(t1, AT , 0);
++    __ addi_w(t1, t1, 1);
++    __ st_w(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ li(AT, vtable_index*vtableEntry::size());
++    __ blt(AT, t2, L);
++    __ li(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load methodOop and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 6 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 6*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T4: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we use T8, T4, T2 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2, t3 = T4;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(T8, AT, 0);
++    __ addi_w(T8, T8, 1);
++    __ st_w(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++  const Register icholder_reg = T1;
++
++  Label L_no_such_interface;
++
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++  // x86 use lookup_interface_method, but lookup_interface_method makes more instructions.
++  // No dynamic code size variance here, so slop_bytes is not needed.
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++  assert(Assembler::is_simm16(base), "change this code");
++  __ addi_d(t2, t1, base);
++  __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ alsl_d(t2, AT, t2, Address::times_8 - 1);
++
++  __ move(t3, t2);
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, resolved_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, resolved_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, holder_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, holder_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  // We found a hit, move offset into T4
++  __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes());
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++                            itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++
++  start_pc = __ pc();
++  __ li(AT, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ add_d(AT, AT, t2);
++  __ ldx_d(method, t1, AT);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T4: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
++  __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+new file mode 100644
+index 0000000000..73f021c9b7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp
+new file mode 100644
+index 0000000000..c8c7a5d4df
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.cpp
+@@ -0,0 +1,759 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of Assembler
++const char *Assembler::ops_name[] = {
++  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
++  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
++  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
++  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
++  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
++  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
++  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
++  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
++};
++
++const char* Assembler::special_name[] = {
++  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
++  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
++  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
++  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
++  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
++  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
++  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
++  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
++};
++
++const char* Assembler::cop1_name[] = {
++  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
++  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
++  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
++};
++
++const char* Assembler::cop1x_name[] = {
++  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
++  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
++  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
++  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
++  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
++  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
++};
++
++const char* Assembler::special2_name[] = {
++  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
++  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         ""
++};
++
++const char* Assembler::special3_name[] = {
++  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "bshfl",    "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++};
++
++const char* Assembler::regimm_name[] = {
++  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
++  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
++  "bltzal",   "bgezal",   "bltzall",  "bgezall"
++};
++
++const char* Assembler::gs_ldc2_name[] = {
++  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
++};
++
++
++const char* Assembler::gs_lwc2_name[] = {
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
++        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
++        "gslq",   ""
++};
++
++const char* Assembler::gs_sdc2_name[] = {
++  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
++};
++
++const char* Assembler::gs_swc2_name[] = {
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
++        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
++        "gssq",    ""
++};
++
++//misleading name, print only branch/jump instruction
++void Assembler::print_instruction(int inst) {
++  const char *s;
++  switch( opcode(inst) ) {
++  default:
++    s = ops_name[opcode(inst)];
++    break;
++  case special_op:
++    s = special_name[special(inst)];
++    break;
++  case regimm_op:
++    s = special_name[rt(inst)];
++    break;
++  }
++
++  ::tty->print("%s", s);
++}
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++//without check, maybe fixed
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos - 4)>>2;
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++  case lui_op:
++  case ori_op:
++  case daddiu_op:
++    ShouldNotReachHere();
++    break;
++  default:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if (!is_simm16(v)) {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    v = low16(v);
++    inst &= 0xffff0000;
++    break;
++  }
++
++  return inst | v;
++}
++
++int Assembler::branch_destination(int inst, int pos) {
++  int off = 0;
++
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++    assert(false, "should not use j/jal here");
++    break;
++  default:
++    off = expand(low16(inst), 15);
++    break;
++  }
++
++  return off ? pos + 4 + (off<<2) : 0;
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++
++void Assembler::lb(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lb(rt, src.base(), src.disp());
++}
++
++void Assembler::lbu(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lbu(rt, src.base(), src.disp());
++}
++
++void Assembler::ld(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsldx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gsldx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        ld(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsldx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          ld(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gsldx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          ld(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      ld(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsldx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        ld(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::ldl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldl(rt, src.base(), src.disp());
++}
++
++void Assembler::ldr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldr(rt, src.base(), src.disp());
++}
++
++void Assembler::lh(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lh(rt, src.base(), src.disp());
++}
++
++void Assembler::lhu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lhu(rt, src.base(), src.disp());
++}
++
++void Assembler::ll(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll(rt, src.base(), src.disp());
++}
++
++void Assembler::lld(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lld(rt, src.base(), src.disp());
++}
++
++void Assembler::lw(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gslwx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gslwx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        lw(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gslwx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          lw(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gslwx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          lw(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      lw(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gslwx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        lw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::lea(Register rt, Address src) {
++  Register dst   = rt;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm16(disp)) {
++      daddiu(dst, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++      daddu(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm16(disp)) {
++        daddu(AT, base, index);
++        daddiu(dst, AT, disp);
++      } else {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, base, AT);
++        daddu(dst, AT, index);
++      }
++    } else {
++      if (is_simm16(disp)) {
++        dsll(AT, index, scale);
++        daddu(AT, AT, base);
++        daddiu(dst, AT, disp);
++      } else {
++        assert_different_registers(dst, AT);
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        dsll(dst, index, scale);
++        daddu(dst, dst, AT);
++      }
++    }
++  }
++}
++
++void Assembler::lwl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwl(rt, src.base(), src.disp());
++}
++
++void Assembler::lwr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwr(rt, src.base(), src.disp());
++}
++
++void Assembler::lwu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwu(rt, src.base(), src.disp());
++}
++
++void Assembler::sb(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sb(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sc(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc(rt, dst.base(), dst.disp());
++}
++
++void Assembler::scd(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  scd(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sd(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm16(disp)) {
++      if ( UseLEXT1 && is_simm(disp, 8)) {
++        if (scale == 0) {
++          gssdx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gssdx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sd(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gssdx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sd(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sd(src, AT, 0);
++      }
++    }
++  } else {
++    if (is_simm16(disp)) {
++      sd(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gssdx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sd(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::sdl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sh(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sh(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sw(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if ( Assembler::is_simm16(disp) ) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsswx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gsswx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sw(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsswx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sw(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sw(src, AT, 0);
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      sw(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsswx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::swl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::swr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::lwc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwc1(rt, src.base(), src.disp());
++}
++
++void Assembler::ldc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldc1(rt, src.base(), src.disp());
++}
++
++void Assembler::swc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::j(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((j_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::jal(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((jal_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long
++  check_delay();
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x) { emit_long(x); }
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++  relocate(rtype);
++  emit_long(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++  relocate(rspec);
++  emit_long(x);
++}
++
++inline void Assembler::check_delay() {
++#ifdef CHECK_DELAY
++  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++  delay_state = no_delay;
++#endif
++}
+diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp
+new file mode 100644
+index 0000000000..102a7ba52f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.hpp
+@@ -0,0 +1,1789 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++
++  Argument(int number):_number(number){ }
++  Argument successor() {return Argument(number() + 1);}
++
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(A0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F12->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++const int FPUStateSizeInWords = 512 / wordSize;
++
++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  // opcode, highest 6 bits: bits[31...26]
++  enum ops {
++    special_op  = 0x00, // special_ops
++    regimm_op   = 0x01, // regimm_ops
++    j_op        = 0x02,
++    jal_op      = 0x03,
++    beq_op      = 0x04,
++    bne_op      = 0x05,
++    blez_op     = 0x06,
++    bgtz_op     = 0x07,
++    addiu_op    = 0x09,
++    slti_op     = 0x0a,
++    sltiu_op    = 0x0b,
++    andi_op     = 0x0c,
++    ori_op      = 0x0d,
++    xori_op     = 0x0e,
++    lui_op      = 0x0f,
++    cop0_op     = 0x10, // cop0_ops
++    cop1_op     = 0x11, // cop1_ops
++    gs_cop2_op  = 0x12, // gs_cop2_ops
++    cop1x_op    = 0x13, // cop1x_ops
++    beql_op     = 0x14,
++    bnel_op     = 0x15,
++    blezl_op    = 0x16,
++    bgtzl_op    = 0x17,
++    daddiu_op   = 0x19,
++    ldl_op      = 0x1a,
++    ldr_op      = 0x1b,
++    special2_op = 0x1c, // special2_ops
++    msa_op      = 0x1e, // msa_ops
++    special3_op = 0x1f, // special3_ops
++    lb_op       = 0x20,
++    lh_op       = 0x21,
++    lwl_op      = 0x22,
++    lw_op       = 0x23,
++    lbu_op      = 0x24,
++    lhu_op      = 0x25,
++    lwr_op      = 0x26,
++    lwu_op      = 0x27,
++    sb_op       = 0x28,
++    sh_op       = 0x29,
++    swl_op      = 0x2a,
++    sw_op       = 0x2b,
++    sdl_op      = 0x2c,
++    sdr_op      = 0x2d,
++    swr_op      = 0x2e,
++    cache_op    = 0x2f,
++    ll_op       = 0x30,
++    lwc1_op     = 0x31,
++    gs_lwc2_op  = 0x32, //gs_lwc2_ops
++    pref_op     = 0x33,
++    lld_op      = 0x34,
++    ldc1_op     = 0x35,
++    gs_ldc2_op  = 0x36, //gs_ldc2_ops
++    ld_op       = 0x37,
++    sc_op       = 0x38,
++    swc1_op     = 0x39,
++    gs_swc2_op  = 0x3a, //gs_swc2_ops
++    scd_op      = 0x3c,
++    sdc1_op     = 0x3d,
++    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
++    sd_op       = 0x3f
++  };
++
++  static  const char *ops_name[];
++
++  //special family, the opcode is in low 6 bits.
++  enum special_ops {
++    sll_op       = 0x00,
++    movci_op     = 0x01,
++    srl_op       = 0x02,
++    sra_op       = 0x03,
++    sllv_op      = 0x04,
++    srlv_op      = 0x06,
++    srav_op      = 0x07,
++    jr_op        = 0x08,
++    jalr_op      = 0x09,
++    movz_op      = 0x0a,
++    movn_op      = 0x0b,
++    syscall_op   = 0x0c,
++    break_op     = 0x0d,
++    sync_op      = 0x0f,
++    mfhi_op      = 0x10,
++    mthi_op      = 0x11,
++    mflo_op      = 0x12,
++    mtlo_op      = 0x13,
++    dsllv_op     = 0x14,
++    dsrlv_op     = 0x16,
++    dsrav_op     = 0x17,
++    mult_op      = 0x18,
++    multu_op     = 0x19,
++    div_op       = 0x1a,
++    divu_op      = 0x1b,
++    dmult_op     = 0x1c,
++    dmultu_op    = 0x1d,
++    ddiv_op      = 0x1e,
++    ddivu_op     = 0x1f,
++    addu_op      = 0x21,
++    subu_op      = 0x23,
++    and_op       = 0x24,
++    or_op        = 0x25,
++    xor_op       = 0x26,
++    nor_op       = 0x27,
++    slt_op       = 0x2a,
++    sltu_op      = 0x2b,
++    daddu_op     = 0x2d,
++    dsubu_op     = 0x2f,
++    tge_op       = 0x30,
++    tgeu_op      = 0x31,
++    tlt_op       = 0x32,
++    tltu_op      = 0x33,
++    teq_op       = 0x34,
++    tne_op       = 0x36,
++    dsll_op      = 0x38,
++    dsrl_op      = 0x3a,
++    dsra_op      = 0x3b,
++    dsll32_op    = 0x3c,
++    dsrl32_op    = 0x3e,
++    dsra32_op    = 0x3f
++  };
++
++  static  const char* special_name[];
++
++  //regimm family, the opcode is in rt[16...20], 5 bits
++  enum regimm_ops {
++    bltz_op      = 0x00,
++    bgez_op      = 0x01,
++    bltzl_op     = 0x02,
++    bgezl_op     = 0x03,
++    tgei_op      = 0x08,
++    tgeiu_op     = 0x09,
++    tlti_op      = 0x0a,
++    tltiu_op     = 0x0b,
++    teqi_op      = 0x0c,
++    tnei_op      = 0x0e,
++    bltzal_op    = 0x10,
++    bgezal_op    = 0x11,
++    bltzall_op   = 0x12,
++    bgezall_op   = 0x13,
++    bposge32_op  = 0x1c,
++    bposge64_op  = 0x1d,
++    synci_op     = 0x1f,
++  };
++
++  static  const char* regimm_name[];
++
++  //cop0 family, the ops is in bits[25...21], 5 bits
++  enum cop0_ops {
++    mfc0_op     = 0x00,
++    dmfc0_op    = 0x01,
++    //
++    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
++    mtc0_op     = 0x04,
++    dmtc0_op    = 0x05,
++    rdpgpr_op   = 0x0a,
++    inter_op    = 0x0b,
++    wrpgpr_op   = 0x0c
++  };
++
++  //cop1 family, the ops is in bits[25...21], 5 bits
++  enum cop1_ops {
++    mfc1_op     = 0x00,
++    dmfc1_op    = 0x01,
++    cfc1_op     = 0x02,
++    mfhc1_op    = 0x03,
++    mtc1_op     = 0x04,
++    dmtc1_op    = 0x05,
++    ctc1_op     = 0x06,
++    mthc1_op    = 0x07,
++    bc1f_op     = 0x08,
++    single_fmt  = 0x10,
++    double_fmt  = 0x11,
++    word_fmt    = 0x14,
++    long_fmt    = 0x15,
++    ps_fmt      = 0x16
++  };
++
++
++  //2 bist (bits[17...16]) of bc1x instructions (cop1)
++  enum bc_ops {
++    bcf_op       = 0x0,
++    bct_op       = 0x1,
++    bcfl_op      = 0x2,
++    bctl_op      = 0x3,
++  };
++
++  // low 6 bits of c_x_fmt instructions (cop1)
++  enum c_conds {
++    f_cond       = 0x30,
++    un_cond      = 0x31,
++    eq_cond      = 0x32,
++    ueq_cond     = 0x33,
++    olt_cond     = 0x34,
++    ult_cond     = 0x35,
++    ole_cond     = 0x36,
++    ule_cond     = 0x37,
++    sf_cond      = 0x38,
++    ngle_cond    = 0x39,
++    seq_cond     = 0x3a,
++    ngl_cond     = 0x3b,
++    lt_cond      = 0x3c,
++    nge_cond     = 0x3d,
++    le_cond      = 0x3e,
++    ngt_cond     = 0x3f
++  };
++
++  // low 6 bits of cop1 instructions
++  enum float_ops {
++    fadd_op      = 0x00,
++    fsub_op      = 0x01,
++    fmul_op      = 0x02,
++    fdiv_op      = 0x03,
++    fsqrt_op     = 0x04,
++    fabs_op      = 0x05,
++    fmov_op      = 0x06,
++    fneg_op      = 0x07,
++    froundl_op   = 0x08,
++    ftruncl_op   = 0x09,
++    fceill_op    = 0x0a,
++    ffloorl_op   = 0x0b,
++    froundw_op   = 0x0c,
++    ftruncw_op   = 0x0d,
++    fceilw_op    = 0x0e,
++    ffloorw_op   = 0x0f,
++    movf_f_op    = 0x11,
++    movt_f_op    = 0x11,
++    movz_f_op    = 0x12,
++    movn_f_op    = 0x13,
++    frecip_op    = 0x15,
++    frsqrt_op    = 0x16,
++    fcvts_op     = 0x20,
++    fcvtd_op     = 0x21,
++    fcvtw_op     = 0x24,
++    fcvtl_op     = 0x25,
++    fcvtps_op    = 0x26,
++    fcvtspl_op   = 0x28,
++    fpll_op      = 0x2c,
++    fplu_op      = 0x2d,
++    fpul_op      = 0x2e,
++    fpuu_op      = 0x2f
++  };
++
++  static const char* cop1_name[];
++
++  //cop1x family, the opcode is in low 6 bits.
++  enum cop1x_ops {
++    lwxc1_op    = 0x00,
++    ldxc1_op    = 0x01,
++    luxc1_op    = 0x05,
++    swxc1_op    = 0x08,
++    sdxc1_op    = 0x09,
++    suxc1_op    = 0x0d,
++    prefx_op    = 0x0f,
++
++    alnv_ps_op  = 0x1e,
++    madd_s_op   = 0x20,
++    madd_d_op   = 0x21,
++    madd_ps_op  = 0x26,
++    msub_s_op   = 0x28,
++    msub_d_op   = 0x29,
++    msub_ps_op  = 0x2e,
++    nmadd_s_op  = 0x30,
++    nmadd_d_op  = 0x31,
++    nmadd_ps_op = 0x36,
++    nmsub_s_op  = 0x38,
++    nmsub_d_op  = 0x39,
++    nmsub_ps_op = 0x3e
++  };
++
++  static const char* cop1x_name[];
++
++  //special2 family, the opcode is in low 6 bits.
++  enum special2_ops {
++    madd_op       = 0x00,
++    maddu_op      = 0x01,
++    mul_op        = 0x02,
++    gs0x03_op     = 0x03,
++    msub_op       = 0x04,
++    msubu_op      = 0x05,
++    gs0x06_op     = 0x06,
++    gsemul2_op    = 0x07,
++    gsemul3_op    = 0x08,
++    gsemul4_op    = 0x09,
++    gsemul5_op    = 0x0a,
++    gsemul6_op    = 0x0b,
++    gsemul7_op    = 0x0c,
++    gsemul8_op    = 0x0d,
++    gsemul9_op    = 0x0e,
++    gsemul10_op   = 0x0f,
++    gsmult_op     = 0x10,
++    gsdmult_op    = 0x11,
++    gsmultu_op    = 0x12,
++    gsdmultu_op   = 0x13,
++    gsdiv_op      = 0x14,
++    gsddiv_op     = 0x15,
++    gsdivu_op     = 0x16,
++    gsddivu_op    = 0x17,
++    gsmod_op      = 0x1c,
++    gsdmod_op     = 0x1d,
++    gsmodu_op     = 0x1e,
++    gsdmodu_op    = 0x1f,
++    clz_op        = 0x20,
++    clo_op        = 0x21,
++    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
++    gsrxr_x_op    = 0x23, //gsX
++    dclz_op       = 0x24,
++    dclo_op       = 0x25,
++    gsle_op       = 0x26,
++    gsgt_op       = 0x27,
++    gs86j_op      = 0x28,
++    gsloop_op     = 0x29,
++    gsaj_op       = 0x2a,
++    gsldpc_op     = 0x2b,
++    gs86set_op    = 0x30,
++    gstm_op       = 0x31,
++    gscvt_ld_op   = 0x32,
++    gscvt_ud_op   = 0x33,
++    gseflag_op    = 0x34,
++    gscam_op      = 0x35,
++    gstop_op      = 0x36,
++    gssettag_op   = 0x37,
++    gssdbbp_op    = 0x38
++  };
++
++  static  const char* special2_name[];
++
++  // special3 family, the opcode is in low 6 bits.
++  enum special3_ops {
++    ext_op         = 0x00,
++    dextm_op       = 0x01,
++    dextu_op       = 0x02,
++    dext_op        = 0x03,
++    ins_op         = 0x04,
++    dinsm_op       = 0x05,
++    dinsu_op       = 0x06,
++    dins_op        = 0x07,
++    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
++    insv_op        = 0x0c,
++    dinsv_op       = 0x0d,
++    ar1_op         = 0x10, //MIPS DSP
++    cmp1_op        = 0x11, //MIPS DSP
++    re1_op         = 0x12, //MIPS DSP, re1_ops
++    sh1_op         = 0x13, //MIPS DSP
++    ar2_op         = 0x14, //MIPS DSP
++    cmp2_op        = 0x15, //MIPS DSP
++    re2_op         = 0x16, //MIPS DSP, re2_ops
++    sh2_op         = 0x17, //MIPS DSP
++    ar3_op         = 0x18, //MIPS DSP
++    bshfl_op       = 0x20  //seb, seh
++  };
++
++  // re1_ops
++  enum re1_ops {
++    absq_s_qb_op = 0x01,
++    repl_qb_op   = 0x02,
++    replv_qb_op  = 0x03,
++    absq_s_ph_op = 0x09,
++    repl_ph_op   = 0x0a,
++    replv_ph_op  = 0x0b,
++    absq_s_w_op  = 0x11,
++    bitrev_op    = 0x1b
++  };
++
++  // re2_ops
++  enum re2_ops {
++    repl_ob_op   = 0x02,
++    replv_ob_op  = 0x03,
++    absq_s_qh_op = 0x09,
++    repl_qh_op   = 0x0a,
++    replv_qh_op  = 0x0b,
++    absq_s_pw_op = 0x11,
++    repl_pw_op   = 0x12,
++    replv_pw_op  = 0x13
++  };
++
++  static  const char* special3_name[];
++
++  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
++  enum gs_lwc2_ops {
++    gslble_op       = 0x10,
++    gslbgt_op       = 0x11,
++    gslhle_op       = 0x12,
++    gslhgt_op       = 0x13,
++    gslwle_op       = 0x14,
++    gslwgt_op       = 0x15,
++    gsldle_op       = 0x16,
++    gsldgt_op       = 0x17,
++    gslwlec1_op     = 0x1c,
++    gslwgtc1_op     = 0x1d,
++    gsldlec1_op     = 0x1e,
++    gsldgtc1_op     = 0x1f,
++    gslq_op         = 0x20
++  };
++
++  static const char* gs_lwc2_name[];
++
++  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
++  enum gs_ldc2_ops {
++    gslbx_op        =  0x0,
++    gslhx_op        =  0x1,
++    gslwx_op        =  0x2,
++    gsldx_op        =  0x3,
++    gslwxc1_op      =  0x6,
++    gsldxc1_op      =  0x7
++  };
++
++  static const char* gs_ldc2_name[];
++
++  // swc2/gs_swc2 family, the opcode is in low 6 bits.
++  enum gs_swc2_ops {
++    gssble_op       = 0x10,
++    gssbgt_op       = 0x11,
++    gsshle_op       = 0x12,
++    gsshgt_op       = 0x13,
++    gsswle_op       = 0x14,
++    gsswgt_op       = 0x15,
++    gssdle_op       = 0x16,
++    gssdgt_op       = 0x17,
++    gsswlec1_op     = 0x1c,
++    gsswgtc1_op     = 0x1d,
++    gssdlec1_op     = 0x1e,
++    gssdgtc1_op     = 0x1f,
++    gssq_op         = 0x20
++  };
++
++  static const char* gs_swc2_name[];
++
++  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
++  enum gs_sdc2_ops {
++    gssbx_op        =  0x0,
++    gsshx_op        =  0x1,
++    gsswx_op        =  0x2,
++    gssdx_op        =  0x3,
++    gsswxc1_op      =  0x6,
++    gssdxc1_op      =  0x7
++  };
++
++  static const char* gs_sdc2_name[];
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int rd(int insn) { return (insn>>11)&0x1f; }
++  static int sa(int insn) { return (insn>>6)&0x1f; }
++  static int special(int insn) { return insn&0x3f; }
++  static int imm_off(int insn) { return (short)low16(insn); }
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++ protected:
++  //help methods for instruction ejection
++
++  // I-Type (Immediate)
++  // 31        26 25        21 20      16 15                              0
++  //|   opcode   |      rs    |    rt    |            immediat             |
++  //|            |            |          |                                 |
++  //      6              5          5                     16
++  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
++
++  // R-Type (Register)
++  // 31         26 25        21 20      16 15      11 10         6 5         0
++  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
++  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
++  //      6              5          5           5          5            6
++  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
++  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
++  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
++
++  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
++  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
++
++  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++  //get the offset field of jump/branch instruction
++  int offset(address entry) {
++    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm16((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
++    }
++    return (entry - pc() - 4) / 4;
++  }
++
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
++  static int split_low(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int merge(int low, int high) {
++    return expand(low, 15) + (high<<16);
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
++    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  static bool fit_in_jal(address target, address pc) {
++    intptr_t mask = 0xfffffffff0000000;
++    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
++  }
++
++  bool fit_int_branch(address entry) {
++    return is_simm16(offset(entry));
++  }
++
++protected:
++#ifdef ASSERT
++    #define CHECK_DELAY
++#endif
++#ifdef CHECK_DELAY
++  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
++#endif
++
++public:
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "next instruction should not be a delay slot");
++#endif
++  }
++
++protected:
++  // Delay slot helpers
++  // cti is called when emitting control-transfer instruction,
++  // BEFORE doing the emitting.
++  // Only effective when assertion-checking is enabled.
++
++  // called when emitting cti with a delay slot, AFTER emitting
++  void has_delay_slot() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "just checking");
++    delay_state = at_delay_slot;
++#endif
++  }
++
++public:
++  Assembler* delayed() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
++    delay_state = filling_delay_slot;
++#endif
++    return this;
++  }
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++
++  void emit_long(int);  // shadows AbstractAssembler::emit_long
++  void emit_data(int);
++  void emit_data(int, RelocationHolder const&);
++  void emit_data(int, relocInfo::relocType rtype);
++  void check_delay();
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
++  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
++  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
++
++  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
++  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
++  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
++  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
++  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
++  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
++  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
++  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
++  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
++  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  // two versions of brk:
++  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
++  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
++  // both versions work
++  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
++  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
++
++  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
++  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
++  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
++  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
++  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
++  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
++  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
++  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
++  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
++  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
++  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
++  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
++  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
++  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
++  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
++  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
++
++  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
++  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
++  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
++  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
++  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
++  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
++  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
++  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
++  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
++  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
++  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
++  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
++  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
++  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
++  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
++  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
++
++  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
++  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
++  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
++
++  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
++  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
++
++  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
++  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
++
++  enum bshfl_ops {
++     seb_op = 0x10,
++     seh_op = 0x18
++  };
++  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
++  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
++
++  void ext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
++  }
++
++  void dext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
++  }
++
++  void dextm (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
++     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
++
++     int lsb  = pos;
++     int msbd = size - 1 - 32;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
++  }
++
++  void rotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
++  }
++
++  void drotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
++  }
++
++  void drotr32 (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
++  }
++
++  void rotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
++  }
++
++  void drotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
++  }
++
++  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
++  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
++  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
++  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
++  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
++  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
++  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
++  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
++  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
++  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
++  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
++  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
++  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
++  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
++
++  void b(int off)       { beq(R0, R0, off); }
++  void b(address entry) { b(offset(entry)); }
++  void b(Label& L)      { b(target(L)); }
++
++  void j(address entry);
++  void jal(address entry);
++
++  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(RT9); }
++
++  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
++  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
++
++  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
++  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++
++  void lb (Register rt, Address src);
++  void lbu(Register rt, Address src);
++  void ld (Register rt, Address src);
++  void ldl(Register rt, Address src);
++  void ldr(Register rt, Address src);
++  void lh (Register rt, Address src);
++  void lhu(Register rt, Address src);
++  void ll (Register rt, Address src);
++  void lld(Register rt, Address src);
++  void lw (Register rt, Address src);
++  void lwl(Register rt, Address src);
++  void lwr(Register rt, Address src);
++  void lwu(Register rt, Address src);
++  void lea(Register rt, Address src);
++  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
++
++  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
++  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
++  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
++  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
++
++  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
++  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
++
++  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
++
++  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
++  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
++  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
++  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
++  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
++  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
++  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
++  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
++  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
++
++  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
++  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
++  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
++  void sync ()                                        {
++    if (os::is_ActiveCoresMP())
++      emit_long(0);
++    else
++      emit_long(sync_op);
++  }
++  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
++
++  void sb(Register rt, Address dst);
++  void sc(Register rt, Address dst);
++  void scd(Register rt, Address dst);
++  void sd(Register rt, Address dst);
++  void sdl(Register rt, Address dst);
++  void sdr(Register rt, Address dst);
++  void sh(Register rt, Address dst);
++  void sw(Register rt, Address dst);
++  void swl(Register rt, Address dst);
++  void swr(Register rt, Address dst);
++
++  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
++  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
++  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
++  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
++  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
++  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
++  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
++  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
++  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
++  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
++  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
++  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
++
++  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
++  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void nop()               { emit_long(0); }
++
++
++
++  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void ldc1(FloatRegister ft, Address src);
++  void lwc1(FloatRegister ft, Address src);
++
++  //COP0
++  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
++  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  //COP0 end
++
++
++  //COP1
++  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
++  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
++  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
++  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
++
++  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
++  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
++  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
++  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
++
++  void bc1f (address entry) { bc1f(offset(entry)); }
++  void bc1fl(address entry) { bc1fl(offset(entry)); }
++  void bc1t (address entry) { bc1t(offset(entry)); }
++  void bc1tl(address entry) { bc1tl(offset(entry)); }
++
++  void bc1f (Label& L) { bc1f(target(L)); }
++  void bc1fl(Label& L) { bc1fl(target(L)); }
++  void bc1t (Label& L) { bc1t(target(L)); }
++  void bc1tl(Label& L) { bc1tl(target(L)); }
++
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_SINGLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
++  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
++  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
++  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
++  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
++  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
++  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
++  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
++  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
++  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
++  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
++  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
++  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
++  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
++  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
++  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
++  //null
++  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
++  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
++  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
++  //null
++  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
++  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
++  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
++  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
++  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
++  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
++  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
++  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
++  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_SINGLE
++
++
++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
++#define INSN_DOUBLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
++  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
++  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
++  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
++  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
++  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
++  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
++  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
++  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
++  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
++  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
++  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
++  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
++  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
++  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
++  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
++  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
++  //null
++  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
++  //null
++  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
++  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
++  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
++  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
++  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
++  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
++  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
++  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
++  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_DOUBLE
++
++
++  //null
++  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++
++
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_PS(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
++  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
++  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
++  //null
++  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
++  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
++  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
++  //null
++  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
++  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
++  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
++  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
++  //null
++  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
++  //null
++  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
++  //null
++  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
++  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
++  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
++  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
++  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
++  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
++  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
++  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
++  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
++  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
++  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
++  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
++  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
++  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
++  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
++  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
++  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
++  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
++  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
++  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
++  //null
++#undef INSN_PS
++  //COP1 end
++
++
++  //COP1X
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_COP1X(r0, r1, r2, r3, op)   \
++  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
++  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
++  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
++  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
++  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
++  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
++  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
++  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
++  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
++  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
++  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
++  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
++#undef INSN_COP1X
++  //COP1X end
++
++  //SPECIAL2
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_S2(op)   \
++  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
++
++  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
++  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
++  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
++  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
++  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
++  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
++  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
++
++  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
++  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
++  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
++  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
++  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
++  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
++  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
++  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
++  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
++  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
++  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
++  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
++  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
++  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
++  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
++  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
++  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
++  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
++  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
++  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
++
++#undef INSN_S2
++
++  //SPECIAL3
++/*
++// FIXME
++#define is_0_to_32(a, b) \
++  assert (a >= 0, " just a check"); \
++  assert (a <= 0, " just a check"); \
++  assert (b >= 0, " just a check"); \
++  assert (b <= 0, " just a check"); \
++  assert (a+b >= 0, " just a check"); \
++  assert (a+b <= 0, " just a check");
++  */
++#define is_0_to_32(a, b)
++
++  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
++  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
++  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
++  void dins (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
++  }
++
++  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
++  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
++  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
++  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
++
++  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
++  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
++  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
++  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
++  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
++  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
++
++  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void sdc1(FloatRegister ft, Address dst);
++  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void swc1(FloatRegister ft, Address dst);
++
++
++  static void print_instruction(int);
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  int branch_destination(int inst, int pos);
++
++  // Loongson extension
++
++  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
++  void gslble(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
++  }
++
++  void gslbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
++  }
++
++  void gslhle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
++  }
++
++  void gslhgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
++  }
++
++  void gslwle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
++  }
++
++  void gslwgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
++  }
++
++  void gsldle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
++  }
++
++  void gsldgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
++  }
++
++  void gslwlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
++  }
++
++  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
++  }
++
++  void gsldlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
++  }
++
++  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
++  }
++
++  void gslq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gssble(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
++  }
++
++  void gssbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
++  }
++
++  void gsshle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
++  }
++
++  void gsshgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
++  }
++
++  void gsswle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
++  }
++
++  void gsswgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
++  }
++
++  void gssdle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
++  }
++
++  void gssdgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
++  }
++
++  void gsswlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
++  }
++
++  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
++  }
++
++  void gssdlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
++  }
++
++  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
++  }
++
++  void gssq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  //LDC2 & SDC2
++#define INSN(OPS, OP) \
++    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
++    assert(UseLEXT1, "check UseLEXT1");                                                      \
++    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
++
++#define INSN_LDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_LDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++/*
++ void gslbx(Register rt, Register base, Register index, int off) {
++    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
++    assert(UseLEXT1, "check UseLEXT1");
++    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
++
++  INSN_LDC2(gslbx, gslbx_op)
++  INSN_LDC2(gslhx, gslhx_op)
++  INSN_LDC2(gslwx, gslwx_op)
++  INSN_LDC2(gsldx, gsldx_op)
++  INSN_LDC2_F(gslwxc1, gslwxc1_op)
++  INSN_LDC2_F(gsldxc1, gsldxc1_op)
++
++  INSN_SDC2(gssbx, gssbx_op)
++  INSN_SDC2(gsshx, gsshx_op)
++  INSN_SDC2(gsswx, gsswx_op)
++  INSN_SDC2(gssdx, gssdx_op)
++  INSN_SDC2_F(gsswxc1, gsswxc1_op)
++  INSN_SDC2_F(gssdxc1, gssdxc1_op)
++*/
++  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
++  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
++  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
++  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
++  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
++  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
++
++  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
++  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
++  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
++  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
++  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
++  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
++
++#undef INSN
++#undef INSN_LDC2
++#undef INSN_LDC2_F
++#undef INSN_SDC2
++#undef INSN_SDC2_F
++
++  // cpucfg on Loongson CPUs above 3A4000
++  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
++
++  enum Membar_mask_bits {
++    StoreStore = 1 << 3,
++    LoadStore  = 1 << 2,
++    StoreLoad  = 1 << 1,
++    LoadLoad   = 1 << 0
++  };
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits order_constraint) {
++    sync();
++  }
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..f35a06fc4e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp
+new file mode 100644
+index 0000000000..4172db219b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/bytes_mips.hpp
+@@ -0,0 +1,181 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
++#define CPU_MIPS_VM_BYTES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use mipsel, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    if ((intptr_t)p & 3) {
++      u4 res;
++      __asm__ __volatile__ (
++          " .set push\n"
++          " .set mips64\n"
++          " .set noreorder\n"
++
++          "    lwr %[res], 0(%[addr])    \n"
++          "    lwl  %[res], 3(%[addr])    \n"
++
++          " .set pop"
++          :  [res] "=&r" (res)
++          : [addr] "r" (p)
++          : "memory"
++          );
++      return res;
++    } else {
++      return *(u4*)p;
++    }
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    u8 res;
++    u8 temp = 0;
++    //  u4 tp;//tmp register
++    __asm__ __volatile__ (
++        " .set push\n"
++        " .set mips64\n"
++        " .set noreorder\n"
++        " .set noat\n"
++        "    andi $1,%[addr],0x7    \n"
++        "    beqz $1,1f        \n"
++        "    nop        \n"
++        "    ldr %[temp], 0(%[addr])    \n"
++        "    ldl  %[temp], 7(%[addr])  \n"
++        "               b 2f        \n"
++        "    nop        \n"
++        "  1:\t  ld  %[temp],0(%[addr])  \n"
++        "  2:\t   sd  %[temp], %[res]    \n"
++
++        " .set at\n"
++        " .set pop\n"
++        :  [addr]"=r"(p), [temp]"=r" (temp)
++        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
++        : "memory"
++        );
++
++    return res;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)   {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 3 ) {
++    case 0:  *(u4*)p = x;
++        break;
++
++    case 2:  ((u2*)p)[1] = x >> 16;
++       ((u2*)p)[0] = x;
++       break;
++
++    default: ((u1*)p)[3] = x >> 24;
++       ((u1*)p)[2] = x >> 16;
++       ((u1*)p)[1] = x >>  8;
++       ((u1*)p)[0] = x;
++       break;
++    }
++  }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since MIPS64EL CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+new file mode 100644
+index 0000000000..ef11827abf
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++// Disable C1 in server JIT
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        120*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      57*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         58*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp
+new file mode 100644
+index 0000000000..e6d5815f42
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for mips
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+new file mode 100644
+index 0000000000..3cc191006d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+new file mode 100644
+index 0000000000..068ca4799d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+@@ -0,0 +1,151 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ synci(R0, 0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  __ relocate(relocInfo::runtime_call_type);
++
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++  return round_to(size, 16);
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp
+new file mode 100644
+index 0000000000..dcc77adfec
+--- /dev/null
++++ b/src/hotspot/cpu/mips/copy_mips.hpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
++#define CPU_MIPS_VM_COPY_MIPS_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_MIPS_VM_COPY_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp
+new file mode 100644
+index 0000000000..756ccb68f9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_mips.hpp"
++
++// Nothing to do on mips
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp
+new file mode 100644
+index 0000000000..11e52b4e8f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++
++// Nothing to do on MIPS
++
++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp
+new file mode 100644
+index 0000000000..c5f3a8888d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp
+new file mode 100644
+index 0000000000..d49bd6290d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.cpp
+@@ -0,0 +1,690 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_mips.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On MIPS the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp
+new file mode 100644
+index 0000000000..bdbfa8aaa2
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.hpp
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ methodOop                ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+new file mode 100644
+index 0000000000..c408f01d69
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++inline intptr_t* frame::link() const {
++  return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..179f7703c8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+@@ -0,0 +1,364 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lw(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lb(AT, in_progress);
++    }
++
++    __ beq(AT, R0, filtered);
++    __ delayed()->nop();
++
++    __ pushad();                      // push registers
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ popad();
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp) {
++  __ pushad();             // push registers (overkill)
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ popad();
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lw(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lb(AT, in_progress);
++  }
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beq(pre_val, R0, done);
++  __ delayed()->nop();
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld(tmp, index);
++  __ beq(tmp, R0, runtime);
++  __ delayed()->nop();
++
++  __ daddiu(tmp, tmp, -1 * wordSize);
++  __ sd(tmp, index);
++  __ ld(AT, buffer);
++  __ daddu(tmp, tmp, AT);
++
++  // Record the previous value
++  __ sd(pre_val, tmp, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // crosses regions, storing NULL?
++  __ beq(new_val, R0, done);
++  __ delayed()->nop();
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ dsrl(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ daddu(card_addr, card_addr, cardtable);
++
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  __ sync();
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ move(AT, (int)G1CardTable::dirty_card_val());
++  __ sb(AT, card_addr, 0);
++
++  __ lw(AT, queue_index);
++  __ beq(AT, R0, runtime);
++  __ delayed()->nop();
++  __ daddiu(AT, AT, -1 * wordSize);
++  __ sw(AT, queue_index);
++  __ ld(tmp2, buffer);
++  __ ld(AT, queue_index);
++  __ daddu(tmp2, tmp2, AT);
++  __ sd(card_addr, tmp2, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..ec5c243c3f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..071debdc3a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+@@ -0,0 +1,194 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ lwu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ lbu   (dst, src);    break;
++  case T_BYTE:    __ lb    (dst, src);    break;
++  case T_CHAR:    __ lhu   (dst, src);    break;
++  case T_SHORT:   __ lh    (dst, src);    break;
++  case T_INT:     __ lw    (dst, src);    break;
++  case T_LONG:    __ ld    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ lwc1(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ ldc1(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ sw(R0, dst);
++        } else {
++          __ sd(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ sb(val, dst);
++    break;
++  case T_BYTE:
++    __ sb(val, dst);
++    break;
++  case T_SHORT:
++    __ sh(val, dst);
++    break;
++  case T_CHAR:
++    __ sh(val, dst);
++    break;
++  case T_INT:
++    __ sw(val, dst);
++    break;
++  case T_LONG:
++    __ sd(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ swc1(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ sdc1(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  Unimplemented();
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..b97ecbcca5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1, Register t2,
++                             Label& slow_case);
++  virtual void eden_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1,
++                             Label& slow_case);
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..f33165334c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++  __ delayed()->nop();
++
++  if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore);
++
++  __ set64(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ dsubu(end, end, addr); // end --> cards count
++
++  __ daddu(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  if (UseLEXT1) {
++    __ gssbx(R0, addr, count, 0);
++  } else {
++    __ daddu(AT, addr, count);
++    __ sb(R0, AT, 0);
++  }
++  __ daddiu(count, count, -1);
++  __ bgez(count, L_loop);
++  __ delayed()->nop();
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T9;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ addu(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(Assembler::StoreLoad);
++    __ lb(AT, tmp, 0);
++    __ addiu(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ delayed()->nop();
++    __ sb(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(Assembler::StoreStore);
++    }
++    __ sb(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..49c2a0ea80
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..765259e626
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..5320a4c0ad
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+new file mode 100644
+index 0000000000..abf8141e8b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++// Size of MIPS Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp
+new file mode 100644
+index 0000000000..3bcad005d1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globals_mips.hpp
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64);
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++// MIPS generates 3x instructions than X86
++define_pd_global(intx, InlineSmallCode,          4000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++define_pd_global(bool, ThreadLocalHandshakes, true);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++                                                                            \
++  product(bool, UseLEXT1, false,                                            \
++                "Use LoongISA general EXTensions 1")                        \
++                                                                            \
++  product(bool, UseLEXT2, false,                                            \
++                "Use LoongISA general EXTensions 2")                        \
++                                                                            \
++  product(bool, UseLEXT3, false,                                            \
++                "Use LoongISA general EXTensions 3")                        \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  develop(bool, UseBoundCheckInstruction, false,                            \
++                "Use bound check instruction")                              \
++                                                                            \
++  product(intx, SetFSFOFN, 999,                                             \
++          "Set the FS/FO/FN bits in FCSR"                                   \
++          "999 means FS/FO/FN will not be changed"                          \
++          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
++                                                                            \
++  /* assembler */                                                           \
++  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+new file mode 100644
+index 0000000000..6586c63965
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_set48(T1, (long)cached_value);
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp
+new file mode 100644
+index 0000000000..e84e37358b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ jr_hb(RA);
++  __ delayed()->ori(V0, A2, 0);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp
+new file mode 100644
+index 0000000000..f90dee6eef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
++#define CPU_MIPS_VM_ICACHE_MIPS_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 2 * BytesPerInstWord,  // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++};
++
++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+new file mode 100644
+index 0000000000..e526e39d53
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+@@ -0,0 +1,276 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++#endif // CC_INTERP
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+new file mode 100644
+index 0000000000..eb35bb0633
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+@@ -0,0 +1,2126 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_mips.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  // The runtime address of BCP may be unaligned.
++  // Refer to the SPARC implementation.
++  lbu(reg, BCP, offset+1);
++  lbu(tmp, BCP, offset);
++  dsll(reg, reg, 8);
++  daddu(reg, tmp, reg);
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  assert(reg != tmp, "need separate temp register");
++  if (offset & 3) { // Offset unaligned?
++    lbu(reg, BCP, offset+3);
++    lbu(tmp, BCP, offset+2);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset+1);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++  } else {
++    lwu(reg, BCP, offset);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    delayed()->nop();
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      lw(V0, val_addr);
++      break;
++    case ftos:
++      lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  move(AT, (int)ilgl);
++  sw(AT, tos_addr);
++  sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T9;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    move(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++    delayed()->nop();
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lbu(AT, BCP, bcp_offset);
++  lbu(reg, BCP, bcp_offset + 1);
++  ins(reg, AT, 8, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    sll(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    lbu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, cache, AT);
++  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    sync(); // load acquire
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  dsrl(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  daddu(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  delayed()->nop();
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  delayed()->nop();
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld(result, result, ConstantPool::cache_offset_in_bytes());
++  ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  daddu(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  dsll(AT, index, Address::times_ptr);
++  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
++    gslhx(index, cpool, AT, sizeof(ConstantPool));
++  } else {
++    daddu(AT, cpool, AT);
++    lh(index, AT, sizeof(ConstantPool));
++  }
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, resolved_klasses, AT);
++  ld(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T9 and T1 are used as temporary registers.
++  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T9); // blows T9
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  lwc1(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  ldc1(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sd(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  swc1(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sdc1(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    delayed()->nop();
++    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    delayed()->nop();
++    bind(run_compiled_code);
++  }
++
++  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++  delayed()->nop();
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  mips64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing mips64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    dsubu(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    daddiu(T2, T2, -min_frame_size);
++    bgez(T2, L);
++    delayed()->nop();
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++  dsll(T2, Rnext, LogBytesPerWord);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(T3, thread, in_bytes(Thread::polling_page_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++    delayed()->nop();
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)
++    ) {
++     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
++     int table_offset = ((int)state - (int)itos) * table_size;
++
++     // GP points to the starting address of Interpreter::dispatch_table(itos).
++     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
++     if(table_offset != 0) {
++        daddiu(T3, GP, table_offset);
++        if (UseLEXT1) {
++          gsldx(T3, T2, T3, 0);
++        } else {
++          daddu(T3, T2, T3);
++          ld(T3, T3, 0);
++        }
++     } else {
++        if (UseLEXT1) {
++          gsldx(T3, T2, GP, 0);
++        } else {
++          daddu(T3, T2, GP);
++          ld(T3, T3, 0);
++        }
++     }
++  } else {
++     li(T3, (long)table);
++     if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++  }
++  jr(T3);
++  delayed()->nop();
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++    jr(T3);
++    delayed()->nop();
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode (load before advancing r13 to prevent AGI)
++  lbu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  lw(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++  delayed()->nop();
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  delayed()->nop();
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  delayed()->nop();
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think mips do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++    delayed()->nop();
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++    delayed()->nop();
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++      delayed()->nop();
++    }
++
++    bind(loop);
++    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++    delayed()->nop();
++
++    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++    delayed()->nop();  // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    dsubu(AT, TSR, AT);
++    blez(AT, no_reserved_zone_enabling);
++    delayed()->nop();
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++#endif // CC_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      delayed()->nop();
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++
++    dsubu(tmp_reg, tmp_reg, SP);
++    move(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    sd(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bne(tmp_reg, R0, slow_case);
++      delayed()->nop();
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beq(tmp_reg, R0, done);
++    delayed()->nop();
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg(%T1)
++    ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beq(hdr_reg, R0, done);
++    delayed()->nop();
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++  delayed()->nop();
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++  delayed()->nop();
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld(T0, T0, in_bytes(Method::method_data_offset()));
++  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
++  daddu(V0, T0, V0);
++  bind(set_mdp);
++  sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = V0;
++  Register mdp = V1;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld(AT, method, in_bytes(Method::const_offset()));
++  daddu(tmp, tmp, AT);
++  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  delayed()->nop();
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld(AT, data);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    sd(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld(AT, data);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    sd(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  lw(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm16(header_bits)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    move(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  sw(AT, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++    delayed()->nop();
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++    delayed()->nop();
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, mdp_in, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  daddu(AT, reg, mdp_in);
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, AT, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm16(constant)) {
++    daddiu(mdp_in, mdp_in, constant);
++  } else {
++    move(AT, constant);
++    daddu(mdp_in, mdp_in, AT);
++  }
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    daddiu(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    daddu(bumped_count, bumped_count, AT);
++    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bne(receiver, R0, not_null);
++      delayed()->nop();
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      beq(R0, R0, skip_receiver_profile);
++      delayed()->nop();
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    delayed()->nop();
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          delayed()->nop();
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          delayed()->nop();
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++          delayed()->nop();
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++      delayed()->nop();
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  move(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++    delayed()->nop();
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      beq(R0, R0, profile_continue);
++      delayed()->nop();
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    move(reg2, in_bytes(MultiBranchData::per_case_size()));
++    if (UseLEXT1) {
++      gsdmult(index, index, reg2);
++    } else {
++      dmult(index, reg2);
++      mflo(index);
++    }
++    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  // Get method->_constMethod->_result_type
++  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld(T9, T9, in_bytes(Method::const_offset()));
++  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addiu(AT, T9, -T_INT);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // mask integer result to narrower return type.
++  addiu(AT, T9, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  delayed()->nop();
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notBool);
++  addiu(AT, T9, -T_BYTE);
++  bne(AT, R0, notByte);
++  delayed()->nop();
++  seb(result, result);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notByte);
++  addiu(AT, T9, -T_CHAR);
++  bne(AT, R0, notChar);
++  delayed()->nop();
++  andi(result, result, 0xFFFF);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notChar);
++  seh(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    dsll(T0, mdo_addr.index(), mdo_addr.scale());
++    daddu(T0, T0, mdo_addr.base());
++  }
++
++  bne(obj, R0, update);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bne(AT, R0, next);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  daddiu(AT, AT, -(TypeEntries::null_seen));
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    sd(obj, mdo_addr);
++  } else {
++    sd(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++    delayed()->nop();
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm16(off_to_args)) {
++        daddiu(mdp, mdp, off_to_args);
++      } else {
++        move(AT, off_to_args);
++        daddu(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
++            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            subu32(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          slt(AT, tmp, AT);
++          bne(AT, R0, done);
++          delayed()->nop();
++        }
++        ld(tmp, callee, in_bytes(Method::const_offset()));
++
++        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        subu(tmp, tmp, AT);
++
++        addiu32(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm16(to_add)) {
++          daddiu(mdp, mdp, to_add);
++        } else {
++          move(AT, to_add);
++          daddu(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
++          addiu32(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          move(AT, tmp_arg_counts);
++          subu32(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
++        daddu(mdp, mdp, tmp);
++      }
++      sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lb(tmp, _bcp_register, 0);
++      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beq(AT, R0, do_profile);
++      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beq(AT, R0, do_profile);
++      delayed()->nop();
++
++      get_method(tmp);
++      lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++      delayed()->nop();
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    daddu(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    bltz(tmp1, profile_continue);
++    delayed()->nop();
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    daddu(mdp, mdp, tmp1);
++    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    dsll(AT, tmp1, per_arg_scale);
++    daddu(AT, AT, mdp);
++    ld(tmp2, AT, off_base);
++
++    subu(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    dsll(AT, tmp2, Interpreter::logStackElementSize);
++    daddu(AT, AT, _locals_register);
++    ld(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    bgtz(tmp1, loop);
++    delayed()->nop();
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    delayed()->nop();
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    delayed()->nop();
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    lw(scratch, counter_addr);
++  }
++  addiu32(scratch, scratch, increment);
++  sw(scratch, counter_addr);
++
++  move(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++    delayed()->nop();
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+new file mode 100644
+index 0000000000..054138ea42
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++
++  void move(int from_offset, int to_offset);
++
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+new file mode 100644
+index 0000000000..e655b2a1a8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ sd(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ movz(temp(), R0, AT);
++  __ sw(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Argument jni_arg(jni_offset());
++
++  // the handle for a receiver will never be null
++  bool do_NULL_check = offset() != 0 || is_static();
++  if (do_NULL_check) {
++    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
++  } else {
++    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  }
++
++  if (!jni_arg.is_Register())
++    __ sd(temp(), jni_arg.as_caller_address());
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
++    _fp_identifiers = to - 1;
++    *(int*) _fp_identifiers = 0;
++    _num_args = jni_offset();
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+new file mode 100644
+index 0000000000..dccdf6a019
+--- /dev/null
++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+new file mode 100644
+index 0000000000..bba5b7eee8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ set64(AT, (long)counter_addr);
++  __ lw(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++  __ delayed()->nop();
++
++  __ move(T0, A1);
++  // Both T0 and T9 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow);
++
++  __ dsrl(T2, A2, 2);                 // offset
++  __ daddu(T0, T0, T2);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ lbu (V0, T0, 0); break;
++    case T_BYTE:    __ lb  (V0, T0, 0); break;
++    case T_CHAR:    __ lhu (V0, T0, 0); break;
++    case T_SHORT:   __ lh  (V0, T0, 0); break;
++    case T_INT:     __ lw  (V0, T0, 0); break;
++    case T_LONG:    __ ld  (V0, T0, 0); break;
++    case T_FLOAT:   __ lwc1(F0, T0, 0); break;
++    case T_DOUBLE:  __ ldc1(F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ set64(AT, (long)counter_addr);
++  __ lw(AT, AT, 0);
++  __ bne(T1, AT, slow);
++  __ delayed()->nop();
++
++  __ jr(RA);
++  __ delayed()->nop();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++  __ delayed()->nop();
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+new file mode 100644
+index 0000000000..e93237ffd9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+new file mode 100644
+index 0000000000..cc868cae55
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+@@ -0,0 +1,4257 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_mips.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ sw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ lw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*) branch;
++  jint *pc = (jint *)branch;
++
++  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
++    //b_far:
++    //  move(AT, RA); // daddu
++    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    //  nop();
++    //  lui(T9, 0); // to be patched
++    //  ori(T9, 0);
++    //  daddu(T9, T9, RA);
++    //  move(RA, AT);
++    //  jr(T9);
++
++    assert(opcode(pc[3]) == lui_op
++        && opcode(pc[4]) == ori_op
++        && special(pc[5]) == daddu_op, "Not a branch label patch");
++    if(!(opcode(pc[3]) == lui_op
++          && opcode(pc[4]) == ori_op
++          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
++
++    int offset = target - branch;
++    if (!is_simm16(offset)) {
++      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
++      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
++    } else {
++      // revert to "beq + nop"
++      CodeBuffer cb(branch, 4 * 10);
++      MacroAssembler masm(&cb);
++#define __ masm.
++      __ b(target);
++      __ delayed()->nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++    }
++    return;
++  } else if (special(pc[4]) == jr_op
++             && opcode(pc[4]) == special_op
++             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
++    //jmp_far:
++    //  patchable_set48(T9, target);
++    //  jr(T9);
++    //  nop();
++
++    CodeBuffer cb(branch, 4 * 4);
++    MacroAssembler masm(&cb);
++    masm.patchable_set48(T9, (long)(target));
++    return;
++  }
++
++#ifndef PRODUCT
++  if (!is_simm16((target - branch - 4) >> 2)) {
++    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
++    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
++    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
++    tty->print_cr("======= End of decoding =======");
++  }
++#endif
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++static inline address first_cache_address() {
++  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
++}
++
++static inline address last_cache_address() {
++  return CodeCache::high_bound() - Assembler::InstructionSize;
++}
++
++int MacroAssembler::call_size(address target, bool far, bool patchable) {
++  if (patchable) return 6 << Assembler::LogInstructionSize;
++  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
++  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
++}
++
++// Can we reach target using jal/j from anywhere
++// in the code cache (because code can be relocated)?
++bool MacroAssembler::reachable_from_cache(address target) {
++  address cl = first_cache_address();
++  address ch = last_cache_address();
++
++  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
++}
++
++bool MacroAssembler::reachable_from_cache() {
++  if (ForceUnreachable) {
++    return false;
++  } else {
++    address cl = first_cache_address();
++    address ch = last_cache_address();
++
++    return fit_in_jal(cl, ch);
++  }
++}
++
++void MacroAssembler::general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    j(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    //j(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    j(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_jump(address target) {
++  return 6;
++}
++
++void MacroAssembler::general_call(address target) {
++  if (reachable_from_cache(target)) {
++    jal(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_call(address target) {
++  if (reachable_from_cache(target)) {
++    //jal(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jalr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_call(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_call(address target) {
++  return 6;
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  address target = entry.target();
++  if (!reachable_from_cache()) {
++    address stub = emit_trampoline_stub(offset(), target);
++    if (stub == NULL) {
++      return NULL; // CodeCache is full
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++
++  if (reachable_from_cache()) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    // load the call target from the trampoline stub
++    // branch
++    long dest = (long)pc();
++    dest += (dest & 0x8000) << 1;
++    lui(T9, dest >> 32);
++    ori(T9, T9, split_low(dest >> 16));
++    dsll(T9, T9, 16);
++    ld(T9, T9, simm16(split_low(dest)));
++    jalr(T9);
++    delayed()->nop();
++  }
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Max stub size: alignment nop, TrampolineStub.
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  emit_int64((int64_t)dest);
++  end_a_stub();
++  return stub;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::beq(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  //Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::bne(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bc1f(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bc1t(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    volatile address dest = target(L);
++//
++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
++//   0x00000055651ed514: daddu at, ra, zero
++//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
++//
++//   0x00000055651ed51c: sll zero, zero, 0
++//   0x00000055651ed520: lui t9, 0x0
++//   0x00000055651ed524: ori t9, t9, 0x21b8
++//   0x00000055651ed528: daddu t9, t9, ra
++//   0x00000055651ed52c: daddu ra, at, zero
++//   0x00000055651ed530: jr t9
++//   0x00000055651ed534: sll zero, zero, 0
++//
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    lui(T9, 0); // to be patched
++    ori(T9, T9, 0);
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    b(offset(entry));
++  } else {
++    // address must be bounded
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    li32(T9, entry - pc());
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  addu_long(AT, base, offset);
++  ld_ptr(rt, AT, 0);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  guarantee(AT != rt, "AT must not equal rt");
++  addu_long(AT, base, offset);
++  st_ptr(rt, AT, 0);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++
++  li(tmp_reg1, counter_addr);
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(tmp_reg2, tmp_reg1, 0);
++  addiu(tmp_reg2, tmp_reg2, inc);
++  sc(tmp_reg2, tmp_reg1, 0);
++  beq(tmp_reg2, R0, again);
++  delayed()->nop();
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  dsubu(AT, SP, AT);
++  bltz(AT, no_reserved_zone_enabling);
++  delayed()->nop();
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  delayed()->nop();
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T9;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++  dsubu(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++  delayed()->nop();
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on MIPS we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  move(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  delayed()->nop();
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  move(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  delayed()->nop();
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  move(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  delayed()->nop();
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++  b(done);
++  delayed()->nop();
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++
++  b(done);
++  delayed()->nop();
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++  delayed()->nop();
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  delayed()->nop();
++  daddiu(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  daddiu(SP, SP, 8);
++  b(E);
++  delayed()->nop();
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  bind(E);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  patchable_set48(T9, (long)entry);
++  jr(T9);
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_set48(T9, (long)entry);
++      jr(T9);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    address entry = target(L);
++    assert(entry != NULL, "jmp most probably wrong");
++    InstructionMark im(this);
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)entry);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)pc());
++  }
++
++  jr(T9);
++  delayed()->nop();
++}
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(AT, (long)obj);
++  sd(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++// c/c++ code assume T9 is entry point, so we just always move entry to t9
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_set48(T9, (long)entry);
++  jalr(T9);
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rtype);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh)
++{
++  switch (rh.type()) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rh);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  move(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm16(imm)) {
++    daddiu(reg, reg, imm);
++  } else {
++    move(AT, imm);
++    daddu(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++
++  address before_call_pc;
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  before_call_pc = (address)pc();
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    delayed()->nop();
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++    li(AT, before_call_pc);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    delayed()->nop();
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  move(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    lw(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++//
++//  In MIPS64, we don't use full 64-bit address space.
++//  Only a small range is actually used.
++//
++//  Example:
++//  $  cat /proc/13352/maps
++//  120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
++//  555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
++//  555d598000-555d59c000 rw-p 00000000 00:00 0
++//  ......
++//  558b1f8000-558b23c000 rwxp 00000000 00:00 0
++//  558b23c000-558b248000 ---p 00000000 00:00 0
++//  558b248000-558b28c000 rwxp 00000000 00:00 0
++//  ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
++//  ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
++//
++//  All stacks are positioned at 0x55________.
++//  Therefore, we can utilize the same algorithm used in 32-bit.
++  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++  // Thread* thread = _sp_map[index];
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  addu(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  move(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  call(CAST_FROM_FN_PTR(address, Thread::current));
++  //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
++  delayed()->nop();
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  int mask = os::vm_page_size() - sizeof(int);
++  assert_different_registers(AT, tmp);
++  assert(is_uimm(mask, 16), "Not a unsigned 16-bit");
++  srl(AT, thread, os::get_serialize_page_shift_count());
++  andi(AT, AT, mask);
++  li(tmp, os::get_memory_serialize_page());
++  addu(tmp, tmp, AT);
++  sw(R0, tmp, 0);
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    li(AT, SafepointSynchronize::address_of_state());
++    lw(AT, AT, 0);
++    addiu(AT, AT, -SafepointSynchronize::_not_synchronized);
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    sync();
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    safepoint_poll(slow_path, thread_reg);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    addu(AT, AT, var_size_in_bytes);
++  } else {
++    addiu(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, long imm) {
++  if (imm <= max_jint && imm >= min_jint) {
++    li32(rd, (int)imm);
++  } else if (julong(imm) <= 0xFFFFFFFF) {
++    assert_not_delayed();
++    // lui sign-extends, so we can't use that.
++    ori(rd, R0, julong(imm) >> 16);
++    dsll(rd, rd, 16);
++    ori(rd, rd, split_low(imm));
++  } else if ((imm > 0) && is_simm16(imm >> 32)) {
++    // A 48-bit address
++    li48(rd, imm);
++  } else {
++    li64(rd, imm);
++  }
++}
++
++void MacroAssembler::li32(Register reg, int imm) {
++  if (is_simm16(imm)) {
++    addiu(reg, R0, imm);
++  } else {
++    lui(reg, split_low(imm >> 16));
++    if (split_low(imm))
++      ori(reg, reg, split_low(imm));
++  }
++}
++
++void MacroAssembler::set64(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++    } else {
++      lui(d, split_low(value >> 16));
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++  } else {  // li64
++    // 6 insts
++    li64(d, value);
++  }
++}
++
++
++int MacroAssembler::insts_for_set64(jlong value) {
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      //daddiu(d, R0, value);
++      count++;
++    } else {
++      //lui(d, split_low(value >> 16));
++      count++;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      //dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    //li48(d, value);
++    count += 4;
++  } else {  // li64
++    // 6 insts
++    //li64(d, value);
++    count += 6;
++  }
++
++  return count;
++}
++
++void MacroAssembler::patchable_set48(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++    count += 4;
++  } else {  // li64
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_set32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_call32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, narrowKlass);
++}
++
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::li64(Register rd, long imm) {
++  assert_not_delayed();
++  lui(rd, split_low(imm >> 48));
++  ori(rd, rd, split_low(imm >> 32));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++  lui(rd, imm >> 32);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  li(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  popad();
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  addiu(SP, SP, - 7 * wordSize);
++  st_ptr(T0, SP, 6 * wordSize);
++  st_ptr(T1, SP, 5 * wordSize);
++  st_ptr(RA, SP, 4 * wordSize);
++  st_ptr(A0, SP, 3 * wordSize);
++  st_ptr(A1, SP, 2 * wordSize);
++  st_ptr(AT, SP, 1 * wordSize);
++  st_ptr(T9, SP, 0);
++
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 7 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++  li(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  lw(AT, T0, 0);
++  daddiu(AT, AT, 1);
++  sw(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++  delayed()->nop();
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++  delayed()->nop();
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  delayed()->nop();
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++  delayed()->nop();
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  popad();
++  jr(RA);
++  delayed()->nop();
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    sltu(AT, t2, AT);
++    beq(AT, R0, next);
++    delayed()->nop();
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    sltu(AT, AT, t2);
++    beq(AT, R0, ok);
++    delayed()->nop();
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++  return RegisterOrConstant(value + offset);
++  Unimplemented();
++  //AddressLiteral a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  //ld(tmp, a);
++  if (offset != 0)
++    daddiu(tmp,tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  sra(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  dsrl(AT, reg, 8);
++  dsll(reg, reg, 24);
++  dsrl(reg, reg, 16);
++  orr(reg, reg, AT);
++  andi(reg, reg, 0xffff);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srl(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  sll(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  lld(resflag, addr);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++  move(resflag, newval);
++  scd(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  lld(tmp, addr);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  scd(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  ll(resflag, addr);
++  if (!sign)
++    dinsu(resflag, R0, 32, 32);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++
++  move(resflag, newval);
++  sc(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll(tmp, addr);
++  if (!sign)
++    dinsu(tmp, R0, 32, 32);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  sc(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
++  Label done, again, nequal;
++
++  Register x_reg = x_regLo;
++  dsll32(x_regHi, x_regHi, 0);
++  dsll32(x_regLo, x_regLo, 0);
++  dsrl32(x_regLo, x_regLo, 0);
++  orr(x_reg, x_regLo, x_regHi);
++
++  Register c_reg = c_regLo;
++  dsll32(c_regHi, c_regHi, 0);
++  dsll32(c_regLo, c_regLo, 0);
++  dsrl32(c_regLo, c_regLo, 0);
++  orr(c_reg, c_regLo, c_regHi);
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  //move(AT, x_reg);
++  daddu(AT, x_reg, R0);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  //move(c_reg, AT);
++  //move(AT, R0);
++  daddu(c_reg, AT, R0);
++  daddu(AT, R0, R0);
++  bind(done);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_s(tmp, fs, ft);
++  trunc_l_s(tmp, tmp);
++  cvt_s_l(tmp, tmp);
++  mul_s(tmp, tmp, ft);
++  sub_s(fd, fs, tmp);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_d(tmp, fs, ft);
++  trunc_l_d(tmp, tmp);
++  cvt_d_l(tmp, tmp);
++  mul_d(tmp, tmp, ft);
++  sub_d(fd, fs, tmp);
++}
++
++#ifdef COMPILER2
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++      delayed()->nop();
++
++      // Recursive locking
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
++      delayed()->nop();
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bne(tmpReg, R0, L);
++        delayed()->nop();
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++      b(DONE);
++      delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      sd(AT, Address(boxReg, 0));
++
++      ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      bne(AT, R0, DONE_SET);
++      delayed()->ori(scrReg, R0, 0);
++
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++      // It's inflated and appears unlocked
++      cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ;
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld(tmpReg, Address(boxReg, 0)) ;
++      beq(tmpReg, R0, DONE_LABEL) ;
++      move(AT, 0x1);  // delay slot
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock
++      delayed()->sltiu(AT, tmpReg, 1);
++
++      ld(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beq(AT, R0, Stacked); // Inflated?
++      delayed()->nop();
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ;
++      xorr(scrReg, scrReg, TREG);
++
++      ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ;
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      sync();
++      sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      b(DONE);
++      delayed()->ori(resReg, R0, 1);
++
++      bind(Stacked);
++      ld(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++
++//In MIPS64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  daddiu(SP, SP, -16);
++  sd(reg1, SP, 8);
++  sd(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld(reg1, SP, 8);
++  ld(reg2, SP, 0);
++  daddiu(SP, SP, 16);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else
++  ld(dst, src, oopDesc::klass_offset_in_bytes());
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    sw(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    sd(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  movz(r, S5_heapbase, r);
++  dsubu(r, r, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      movz(dst, S5_heapbase, dst);
++      dsubu(dst, dst, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++    } else {
++      dsubu(dst, src, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++      movz(dst, R0, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(dst, src, S5_heapbase);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(dst, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    move(AT, r);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    daddu(r, r, S5_heapbase);
++    movz(r, R0, AT);
++  }
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      move(AT, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shl(dst, LogMinObjAlignmentInBytes);
++      }
++      daddu(dst, dst, S5_heapbase);
++      movz(dst, R0, AT);
++    } else {
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        dsll(dst, src, LogMinObjAlignmentInBytes);
++        daddu(dst, dst, S5_heapbase);
++      } else {
++        daddu(dst, src, S5_heapbase);
++      }
++      movz(dst, R0, src);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shl(r, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      daddu(r, r, S5_heapbase);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      daddu(dst, dst, S5_heapbase);
++    } else {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      if (Universe::narrow_oop_base() != NULL) {
++        daddu(dst, dst, S5_heapbase);
++      }
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    dsubu(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      set64(dst, (int64_t)Universe::narrow_klass_base());
++      dsubu(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        dsrl(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (Universe::narrow_klass_base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    daddu(r, r, AT);
++    //Not neccessary for MIPS at all.
++    //reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      dsll(AT, src, Address::times_8);
++      daddu(dst, dst, AT);
++    } else {
++      daddu(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     addu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { decrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  addu32(reg, reg, AT);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     subu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { incrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  subu32(reg, reg, AT);
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  delayed()->nop();
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    delayed()->nop();
++    addiu(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++      delayed()->nop();
++    } else {
++      bne_far(AT, R0, *L_failure);
++      delayed()->nop();
++      b(*L_slow_path);
++      delayed()->nop();
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  }
++
++  bind(L_fallthrough);
++
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  // OpenJDK8 never compresses klass pointers in secondary-super array.
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  delayed()->nop();
++  ld(AT, temp_reg, 0);
++  beq(AT, super_klass, subtype);
++  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
++  b(Loop);
++  delayed()->daddiu(temp2_reg, temp2_reg, -1);
++
++  bind(subtype);
++  sd(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++    delayed()->nop();
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  dsll(scale_reg, scale_reg, scale_factor);
++  daddu(scale_reg, SP, scale_reg);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
++  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  sd(src, dst); break;
++  case  4:  sw(src, dst); break;
++  case  2:  sh(src, dst); break;
++  case  1:  sb(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  dsll(scan_temp, scan_temp, times_vte_scale);
++  daddu(scan_temp, recv_klass, scan_temp);
++  daddiu(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      set64(AT, (int)itable_index.is_constant());
++      dsll(AT, AT, (int)Address::times_ptr);
++    } else {
++      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
++    }
++    daddu(AT, AT, recv_klass);
++    daddiu(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++      delayed()->nop();
++    } else {
++      bne(intf_klass, method_result, search);
++      delayed()->nop();
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    delayed()->nop();
++    daddiu(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    if (UseLEXT1) {
++      gsldx(method_result, recv_klass, scan_temp, 0);
++    } else {
++      daddu(AT, recv_klass, scan_temp);
++      ld(method_result, AT, 0);
++    }
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = GP;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    set64(AT, vtable_index.as_constant());
++    dsll(AT, AT, (int)Address::times_ptr);
++  } else {
++    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  set64(tmp, base + vtableEntry::method_offset_in_bytes());
++  daddu(tmp, tmp, AT);
++  daddu(tmp, tmp, recv_klass);
++  ld(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        sw(src_reg, tmp_reg, disp);
++      } else {
++        st_ptr(src_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_INT:
++      sw(src_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++    case T_SHORT:
++      sh(src_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      sb(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++}
++
++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      sdc1(src_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      swc1(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++}
++
++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      ld_ptr(dst_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_INT:
++      lw(dst_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++      lhu(dst_reg, tmp_reg, disp);
++      break;
++    case T_SHORT:
++      lh(dst_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      lb(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++
++  return code_offset;
++}
++
++#ifdef COMPILER2
++// Compare strings, used for char[] and byte[].
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srl(cnt1, cnt1, 1);
++  if (!str2_isL) srl(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  subu(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  slt(AT, cnt2, cnt1);
++  movn(cnt1, cnt2, AT);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  beq(cnt1, R0, done);
++  if (str1_isL) {
++    delayed()->lbu(AT, str1, 0);
++  } else {
++    delayed()->lhu(AT, str1, 0);
++  }
++
++  // compare current character
++  if (str2_isL) {
++    lbu(cnt2, str2, 0);
++  } else {
++    lhu(cnt2, str2, 0);
++  }
++  bne(AT, cnt2, haveResult);
++  delayed()->addiu(str1, str1, str1_isL ? 1 : 2);
++  addiu(str2, str2, str2_isL ? 1 : 2);
++  b(Loop);
++  delayed()->addiu(cnt1, cnt1, -1);   // Loop end
++
++  bind(haveResult);
++  subu(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp, Register result,
++                                   bool is_char) {
++  Label Loop, True, False;
++
++  beq(str1, str2, True);  // same char[] ?
++  delayed()->daddiu(result, R0, 1);
++
++  beq(cnt, R0, True);
++  delayed()->nop(); // count == 0
++
++  bind(Loop);
++
++  // compare current character
++  if (is_char) {
++    lhu(AT, str1, 0);
++    lhu(tmp, str2, 0);
++  } else {
++    lbu(AT, str1, 0);
++    lbu(tmp, str2, 0);
++  }
++  bne(AT, tmp, False);
++  delayed()->addiu(str1, str1, is_char ? 2 : 1);
++  addiu(cnt, cnt, -1);
++  bne(cnt, R0, Loop);
++  delayed()->addiu(str2, str2, is_char ? 2 : 1);
++
++  b(True);
++  delayed()->nop();
++
++  bind(False);
++  daddiu(result, R0, 0);
++
++  bind(True);
++}
++#endif // COMPILER2
++
++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      ldc1(dst_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      lwc1(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  move(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  delayed()->nop();
++  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  delayed()->nop();
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  delayed()->nop();
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      subu(AT, op1, op2);
++      movz(dst, src, AT);
++      break;
++
++    case NE:
++      subu(AT, op1, op2);
++      movn(dst, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movz(dst, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movz(dst, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case NE:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GT:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GE:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case LT:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case LE:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case NE:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GT:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GE:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case LT:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case LE:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  Label L;
++
++  switch(cmp) {
++    case EQ:
++      bne(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case NE:
++      beq(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      gssbx(reg, base, index, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      gsshx(reg, base, index, disp);
++      break;
++    case STORE_INT:
++      gsswx(reg, base, index, disp);
++      break;
++    case STORE_LONG:
++      gssdx(reg, base, index, disp);
++      break;
++    case LOAD_BYTE:
++      gslbx(reg, base, index, disp);
++      break;
++    case LOAD_SHORT:
++      gslhx(reg, base, index, disp);
++      break;
++    case LOAD_INT:
++      gslwx(reg, base, index, disp);
++      break;
++    case LOAD_LONG:
++      gsldx(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      gsswxc1(reg, base, index, disp);
++      break;
++    case STORE_DOUBLE:
++      gssdxc1(reg, base, index, disp);
++      break;
++    case LOAD_FLOAT:
++      gslwxc1(reg, base, index, disp);
++      break;
++    case LOAD_DOUBLE:
++      gsldxc1(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      sb(reg, base, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      sh(reg, base, disp);
++      break;
++    case STORE_INT:
++      sw(reg, base, disp);
++      break;
++    case STORE_LONG:
++      sd(reg, base, disp);
++      break;
++    case LOAD_BYTE:
++      lb(reg, base, disp);
++      break;
++    case LOAD_U_BYTE:
++      lbu(reg, base, disp);
++      break;
++    case LOAD_SHORT:
++      lh(reg, base, disp);
++      break;
++    case LOAD_U_SHORT:
++      lhu(reg, base, disp);
++      break;
++    case LOAD_INT:
++      lw(reg, base, disp);
++      break;
++    case LOAD_U_INT:
++      lwu(reg, base, disp);
++      break;
++    case LOAD_LONG:
++      ld(reg, base, disp);
++      break;
++    case LOAD_LINKED_LONG:
++      lld(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      swc1(reg, base, disp);
++      break;
++    case STORE_DOUBLE:
++      sdc1(reg, base, disp);
++      break;
++    case LOAD_FLOAT:
++      lwc1(reg, base, disp);
++      break;
++    case LOAD_DOUBLE:
++      ldc1(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+new file mode 100644
+index 0000000000..55ec29e91b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+@@ -0,0 +1,818 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  void incrementl(Register reg, int value = 1);
++  void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++
++
++
++  // Sign extension
++  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
++  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void print_reg(Register reg);
++  void print_reg(FloatRegister reg);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      sw(A0, SP, -offset);
++    } else {
++      li(AT, offset);
++      dsubu(AT, SP, AT);
++      sw(A0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++
++  // Arithmetics
++  // Regular vs. d* versions
++  inline void addu_long(Register rd, Register rs, Register rt) {
++    daddu(rd, rs, rt);
++  }
++  inline void addu_long(Register rd, Register rs, long imm32_64) {
++    daddiu(rd, rs, imm32_64);
++  }
++
++  void round_to(Register reg, int modulus) {
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++
++  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
++  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
++  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // always long jumps
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  void patchable_call(address target);
++  void general_call(address target);
++
++  void patchable_jump(address target);
++  void general_jump(address target);
++
++  static int insts_for_patchable_call(address target);
++  static int insts_for_general_call(address target);
++
++  static int insts_for_patchable_jump(address target);
++  static int insts_for_general_jump(address target);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    sd(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    sd(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  // implement the x86 instruction semantic
++  // if c_reg == *dest then *dest <= x_reg
++  // else c_reg <= *dest
++  // the AT indicate if xchg occurred, 1 for xchged, else  0
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
++
++  //pop & push
++  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
++  void neg(Register reg) { dsubu(reg, R0, reg); }
++  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
++  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  ()                  { daddiu(SP, SP, 8); }
++  void pop2 ()                  { daddiu(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  //move an 32-bit immediate to Register
++  void move(Register reg, int imm32)  { li32(reg, imm32); }
++  void li  (Register rd, long imm);
++  void li  (Register rd, address addr) { li(rd, (long)addr); }
++  //replace move(Register reg, int imm)
++  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
++  void set64(Register d, jlong value);
++  static int  insts_for_set64(jlong value);
++
++  void patchable_set48(Register d, jlong value);
++  void patchable_set32(Register d, jlong value);
++
++  void patchable_call32(Register d, jlong value);
++
++  static int call_size(address target, bool far, bool patchable);
++
++  static bool reachable_from_cache(address target);
++  static bool reachable_from_cache();
++
++
++  void dli(Register rd, long imm) { li(rd, imm); }
++  void li64(Register rd, long imm);
++  void li48(Register rd, long imm);
++
++  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
++  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
++  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
++  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
++  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch) {
++    jint stub_inst = *(jint*) branch;
++    print_instruction(stub_inst);
++    ::tty->print("%s", " (unresolved)");
++
++  }
++#endif
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++#ifdef COMPILER2
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp, Register result,
++                     bool is_char);
++#endif
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++
++#undef VIRTUAL
++
++public:
++
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x4
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++private:
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++      if (Assembler::is_simm16(disp)) {
++        if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) {
++          if (scale == 0) {
++            gs_loadstore(reg, as_Register(base), as_Register(index), disp, type);
++          } else {
++            dsll(AT, as_Register(index), scale);
++            gs_loadstore(reg, as_Register(base), AT, disp, type);
++          }
++        } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          loadstore(reg, AT, disp, type);
++        }
++      } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, AT, RT9, 0, type);
++          } else {
++            addu(AT, AT, RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++      } else {
++        if (Assembler::is_simm16(disp)) {
++          loadstore(reg, as_Register(base), disp, type);
++        } else {
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, as_Register(base), RT9, 0, type);
++          } else {
++            addu(AT, as_Register(base), RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void gs_loadstore(Register reg, Register base, Register index, int disp, int type);
++  void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->lb(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++    _masm->delayed()->nop();
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..92c05fb726
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+new file mode 100644
+index 0000000000..e9788ac52c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+@@ -0,0 +1,576 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ move(AT, ref_kind);
++  __ beq(temp, AT, L);
++  __ delayed()->nop();
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++  __ delayed()->nop();
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ delayed()->nop();
++    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T9);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(T9, method, in_bytes(entry_offset));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld(AT, recv_addr);
++    __ beq(recv, AT, L);
++    __ delayed()->nop();
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T9: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t9_argp   = T9;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions.");
++    __ addiu(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t9_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t9_argp,
++                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t9_first_arg_addr = __ argument_address(t9_argp, -1);
++  } else {
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(s7_mh, t9_first_arg_addr);
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  // t9_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(r_recv = T2, t9_first_arg_addr);
++    }
++    DEBUG_ONLY(t9_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T9;
++  Register temp3 = V0;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ slt(AT, R0, temp2_index);
++        __ bne(AT, R0, L_index_ok);
++        __ delayed()->nop();
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ slt(AT, rm_index, R0);
++        __ beq(AT, R0, L);
++        __ delayed()->nop();
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+new file mode 100644
+index 0000000000..03b65fc8ef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return I29;
++  }
+diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad
+new file mode 100644
+index 0000000000..3563bbe0e5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad
+new file mode 100644
+index 0000000000..b4acbd83f7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips_64.ad
+@@ -0,0 +1,12243 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
++  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
++  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
++  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
++  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
++  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
++  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
++  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
++  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
++
++// Special Registers
++  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
++  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
++  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
++  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T1, T1_H, // inline_cache_reg
++                     V1, V1_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     GP, GP_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T9 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++reg_class v0_reg( V0 );
++reg_class v1_reg( V1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( V0, V0_H );
++reg_class v1_long_reg( V1, V1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F31, F31_H);
++
++reg_class flt_arg0( F12 );
++reg_class dbl_arg0( F12, F12_H );
++reg_class dbl_arg1( F14, F14_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++
++#define __ _masm.
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstructionMIPS64)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstructionMIPS64)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // TODO
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++  // Add rules here.
++
++  return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  int offs = offset - br_size + 4;
++  // To be conservative on MIPS
++  // branch node should be end with:
++  //   branch inst
++  //   delay slot
++  const int safety_zone = 3 * BytesPerInstWord;
++  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (MaxVectorSize == 0)
++    return 0;
++  assert(MaxVectorSize == 8, "");
++  return 8;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// MIPS supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F12_num || reg == F12_H_num
++       || reg == F13_num || reg == F13_H_num
++       || reg == F14_num || reg == F14_H_num
++       || reg == F15_num || reg == F15_H_num
++       || reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// MIPS doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// it does if the polling page is more than disp32 away.
++bool SafePointNode::needs_polling_address_input() {
++  return SafepointMechanism::uses_thread_local_poll();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  //lui
++  //ori
++  //nop
++  //nop
++  //jalr
++  //nop
++  return 24;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  //lui IC_Klass,
++  //ori IC_Klass,
++  //dsll IC_Klass
++  //ori IC_Klass
++
++  //lui T9
++  //ori T9
++  //nop
++  //nop
++  //jalr T9
++  //nop
++  return 4 * 4 + 4 * 6;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(AT, Address(SP, src_offset));
++          __ sd(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "sd    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lw(AT, Address(SP, src_offset));
++          __ sw(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lw    AT, [SP + #%d] spill 2\n\t"
++                    "sw    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else
++            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++          } else {
++            st->print("\n\t");
++            if (this->ideal_reg() == Op_RegI)
++              st->print("lw    %s, [SP + #%d]\t# spill 4",
++                        Matcher::regName[dst_first],
++                        offset);
++            else
++              st->print("lwu    %s, [SP + #%d]\t# spill 5",
++                        Matcher::regName[dst_first],
++                        offset);
++#endif
++          }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ldc1  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lwc1   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sd    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sw    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmtc1   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mtc1   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sdc1   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("swc1   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmfc1   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mfc1   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  if (UseLEXT1) {
++    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
++  } else {
++    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++    st->print("\t");
++    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ld    AT, poll_offset[thread] #polling_page_address\n\t"
++                   "lw    AT, [AT]\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("Poll Safepoint # MachEpilogNode");
++    }
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int");
++
++  if (UseLEXT1) {
++    __ gslq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ ld(RA, SP, framesize - wordSize );
++    __ ld(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(SP, SP, framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ld(AT, thread, in_bytes(Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    } else {
++      __ set64(AT, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    }
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  return 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  __ addiu(as_Register(reg), SP, offset);
++}
++
++
++//static int sizeof_FFree_Float_Stack_All = -1;
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  //lui
++  //ori
++  //dsll
++  //ori
++  //jalr
++  //nop
++  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  return NativeCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T9, T0)");
++  st->print_cr("\tbeq(T9, iCache, L)");
++  st->print_cr("\tnop");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("\tnop");
++  st->print_cr("\tnop");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T9, receiver);
++  __ beq(T9, iCache, L);
++  __ delayed()->nop();
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = __ code()->consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    // Materialize the constant table base.
++    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
++    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
++    __ relocate(relocInfo::internal_word_type);
++    __ patchable_set48(Rtoc, (long)baseaddr);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // patchable_set48 (4 insts)
++  return 4 * 4;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  if (UseLEXT1) {
++    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  } else {
++    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  }
++  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
++  st->print("daddiu   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int");
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ daddiu(SP, SP, -framesize);
++  if (UseLEXT1) {
++    __ gssq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ sd(RA, SP, framesize - wordSize);
++    __ sd(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(FP, SP, framesize - wordSize * 2);
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T9;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++    __ delayed()->nop();
++
++    __ bind(miss);
++    __ move(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------MIPS FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++  // TODO: should not match immI8 here LEE
++  match(immI8);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() %{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constants for increment
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M32767_32768() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_32767() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_65535() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 8-bit
++operand immL8() %{
++  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32() %{
++  predicate(n->get_long() == (int)(n->get_long()));
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M1() %{
++  predicate(n->get_long() == -1L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M32767_32768() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_65535() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT9RegI() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T9" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mV0RegI() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand mV1RegI() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V1" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegN() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegN() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++/*
++operand mSPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(sp_reg));
++  match(reg);
++
++  format %{ "SP"  %}
++  interface(REG_INTER);
++%}
++
++operand mFPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(fp_reg));
++  match(reg);
++
++  format %{ "FP"  %}
++  interface(REG_INTER);
++%}
++*/
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8(mRegP reg, immL8 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++
++// [base + index + offset]
++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base index) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index + offset]
++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base (ConvI2L index)) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index<<scale + offset]
++operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  op_cost(10);
++  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
++
++  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//FIXME: I think it's better to limit the immI to be 16-bit at most!
++// Indirect Memory Plus Long Offset Operand
++operand indOffset32(mRegP reg, immL32 off) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(20);
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);   /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indirectNarrowKlass(mRegN reg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeNKlass reg);
++
++  format %{ "[$reg] @ indirectNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indOffset8NarrowKlass(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset32NarrowKlass(mRegN reg, immL32 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
++%{
++  predicate(UseLEXT1);
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeNKlass reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (DecodeNKlass reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8Narrow(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
++%{
++  predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
++
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    branch_has_delay_slot;      // branch have delay slot in gs2
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "sd    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ move(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL_set64(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL_set64" %}
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL16(mRegL dst, immL16 src) %{
++  match(Set dst src);
++  ins_cost(105);
++  format %{ "mov    $dst, $src #@loadConL16" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    int      value   = $src$$constant;
++    __ daddiu(dst_reg, R0, value);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
++  match(Set dst src);
++  ins_cost(100);
++  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "sd    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ set64(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ set64(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in MIPS");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_mips.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T9 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T9
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, R0, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, R0);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, R0);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, R0, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x05: // less
++        __ beq_long(R0, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++    __ delayed()->nop();
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bne($cr$$Register, R0, L);
++        else
++          __ bne($cr$$Register, R0, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beq($cr$$Register, R0, L);
++        else
++          __ beq($cr$$Register, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++                 __ beq(AT, R0, L);
++        else
++                 __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1, op2, L);
++      else
++        __ beq(op1, op2, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1, op2, L);
++      else
++        __ bne(op1, op2, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++                __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(AT, R0, L);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //greater
++        if(&L)
++               __ bgtz(op1, L);
++        else
++               __ bgtz(op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if(&L)
++               __ bgez(op1, L);
++        else
++               __ bgez(op1, (int)0);
++        break;
++      case 0x05: //less
++        if(&L)
++                __ bltz(op1, L);
++        else
++                __ bltz(op1, (int)0);
++        break;
++      case 0x06: //less_equal
++        if(&L)
++               __ blez(op1, L);
++        else
++               __ blez(op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //above
++        if(&L)
++          __ bne(R0, op1, L);
++        else
++          __ bne(R0, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if(&L)
++          __ beq(R0, R0, L);
++        else
++          __ beq(R0, R0, (int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if(&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x03: //greater
++        if(&target)
++           __ bgtz(opr1_reg, target);
++        else
++           __ bgtz(opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if(&target)
++           __ bgez(opr1_reg, target);
++        else
++           __ bgez(opr1_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, R0);
++        if(&target)
++           __ bne(AT, R0, target);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ blez(opr1_reg, target);
++        else
++           __ blez(opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        if(&target)
++           __ beq(R0, R0, target);
++        else
++           __ beq(R0, R0, (int)0);
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(16);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ sync();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ mfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ mtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ dmfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ dmtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_s(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_s(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_d(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_d(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(num, R0, done);
++    __ delayed()->daddu(AT, base, R0);
++
++    __ move(T9, num);  /* T9 = words */
++
++    __ bind(Loop);
++    __ sd(R0, AT, 0);
++    __ daddiu(T9, T9, -1);
++    __ bne(T9, R0, Loop);
++    __ delayed()->daddiu(AT, AT, wordSize);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $temp$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    if(Assembler::is_simm16(imm)) {
++       __ addiu32(dst, src1, imm);
++    } else {
++       __ move(AT, imm);
++       __ addu32(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
++  match(Set dst (AddP src1 (ConvI2L src2)));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm(mRegP dst, mRegP src1,  immL16 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddi   $dst, $src1, $src2 #@addP_reg_imm" %}
++  ins_encode %{
++    Register  src1 = $src1$$Register;
++    long      src2 = $src2$$constant;
++    Register  dst  = $dst$$Register;
++
++    __ daddiu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
++%{
++  match(Set dst (AddL (ConvI2L src1) src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (AddL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AddL src1 (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ sra(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu32(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegL src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dsra32(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ subu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addiu32(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu32(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ daddiu(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (SubL src1 (ConvI2L src2)));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (SubL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    //if (UseLEXT1) {
++    if (0) {
++      // Experiments show that gsmod is slower that div+mfhi.
++      // So I just disable it here.
++      __ gsmod(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++      __ mfhi(dst);
++    }
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmod(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mfhi(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
++  match(Set dst (AddI (MulI src1 src2) src3));
++
++  ins_cost(999);
++  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register src3 = $src3$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mtlo(src3);
++     __ madd(src1, src2);
++     __ mflo(dst);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    // In MIPS, div does not cause exception.
++    //   We must trap an exception manually.
++    __ teq(R0, src2, 0x7);
++
++    if (UseLEXT1) {
++      __ gsdiv(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++
++      __ nop();
++      __ nop();
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (MulL src1 (ConvI2L src2)));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsddiv(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "madd_s  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "madd_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "msub_s $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "msub_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "nmadds  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "nmaddd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "nmsubs  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "nmsubd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ move(AT, val);
++    __ andr(dst, src, AT);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ dext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
++  match(Set dst (XorL src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 1, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 3, 4);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sll(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seh(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seb(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sllv(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long
++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftL (ConvI2L src) shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsllv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long
++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = ($shift$$constant & 0x3f);
++    if (__  is_simm(shamt, 5))
++      __ dsra(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsra(dst_reg, src_reg, sa);
++      } else {
++        __ dsra32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ dsra32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrav(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrlv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dext(dst_reg, src_reg, shamt, 31);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotr     $dst, $src, 1 ...\n\t"
++            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotr(dst, src, 1);
++    if (rshift - 1) {
++      __ srl(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 8-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srl(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, pos, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, dst, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srlv(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ sra(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srav(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ sll(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convL2I_reg( mRegI dst, mRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(src, dst);
++    __ cvt_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_fast( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label Done;
++
++    __ trunc_l_d(F30, src);
++    // max_long:    0x7fffffffffffffff
++    // __ set64(AT, 0x7fffffffffffffff);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->daddiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label L;
++
++    __ c_un_d(src, src);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2I_reg_fast( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_w_s(F30, fval);
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++
++instruct convF2I_reg_slow( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dreg, R0);
++
++    __ trunc_w_s(F30, fval);
++
++    /* Call SharedRuntime:f2i() to do valid convention */
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dreg, F30);
++
++    __ mov_s(F12, fval);
++
++    //This bug was found when running ezDS's control-panel.
++    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
++    //
++    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
++    // V0 is corrupted during call_VM_leaf(), and should be preserved.
++    //
++    __ push(fval);
++    if(dreg != V0) {
++      __ push(V0);
++    }
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++    if(dreg != V0) {
++      __ move(dreg, V0);
++      __ pop(V0);
++    }
++    __ pop(fval);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_l_s(F30, fval);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ dsll32(T9, T9, 0);
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_s(F30, fval);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_s(F12, fval);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ dmtc1(src, dst);
++    __ cvt_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(src, dst);
++    __ cvt_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sra(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ subu(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ mtc1(src, dst);
++    __ cvt_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++
++    Label Done;
++
++    __ trunc_w_d(F30, src);
++    // max_int: 2147483647
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->addiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu32(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    Label L;
++
++    __ trunc_w_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ move(dst, V0);
++    __ bind(L);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++   __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++/*
++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
++instruct jumpXtnd(mRegL switch_val) %{
++  match(Jump switch_val);
++
++  ins_cost(350);
++
++  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
++             "jr     T9\n\t"
++             "nop" %}
++  ins_encode %{
++    Register table_base = $constanttablebase;
++    int      con_offset = $constantoffset;
++    Register switch_reg = $switch_val$$Register;
++
++    if (UseLEXT1) {
++       if (Assembler::is_simm(con_offset, 8)) {
++         __ gsldx(T9, table_base, switch_reg, con_offset);
++       } else if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ gsldx(T9, AT, T9, 0);
++       }
++    } else {
++       if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ daddu(AT, T9, AT);
++         __ ld(T9, AT, 0);
++       }
++    }
++
++    __ jr(T9);
++    __ delayed()->nop();
++
++  %}
++  ins_pipe(pipe_jump);
++%}
++*/
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_mips.cpp] generate_forward_exception()
++    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAllocNTA( memory mem ) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ lwc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ ldc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
++    __ lw($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
++    __ sw($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
++    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
++    __ swc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
++    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
++    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "MEMBAR\n\t"
++            "sb   $mem, zero\t! CMS card-mark imm0" %}
++  ins_encode %{
++    __ sync();
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap !
++
++    __ stop("in ShoudNotReachHere");
++
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ daddiu(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++    int       disp  = $mem$$disp;
++
++    if (scale == 0) {
++      __ daddu(AT, base, index);
++      __ daddiu(dst, AT, disp);
++    } else {
++      __ dsll(AT, index, scale);
++      __ daddu(AT, base, AT);
++      __ daddiu(dst, AT, disp);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++
++    if (scale == 0) {
++       __ daddu(dst, base, index);
++    } else {
++       __ dsll(AT, index, scale);
++       __ daddu(dst, base, AT);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ scd(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++%}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "lld    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++////
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for min
++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVlt $op2,$op1\t! min" %}
++//  opcode(0x4C,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++//// Min Register with Register (P6 version)
++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MinI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_lt(op2,op1,cr);
++//  %}
++//%}
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for max
++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVgt $op2,$op1\t! max" %}
++//  opcode(0x4F,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++// // Max Register with Register (P6 version)
++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MaxI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_gt(op2,op1,cr);
++//  %}
++//%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ movn(dst, R0, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ dinsu(dst, src2, 32, 32);
++    } else if (src2 == dst) {
++       __ dsll32(dst, dst, 0);
++       __ dins(dst, src1, 0, 32);
++    } else {
++       __ dext(dst, src1, 0, 32);
++       __ dinsu(dst, src2, 32, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll() %{
++  predicate(SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ set64(T9, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ lw(AT, T9, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "lw AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ lw(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ dclz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    // ctz and dctz is gs instructions.
++    __ ctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ dctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// Load vectors (8 bytes long)
++instruct loadV8(vecD dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 8);
++  match(Set dst (LoadVector mem));
++  ins_cost(125);
++  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store vectors (8 bytes long)
++instruct storeV8(memory mem, vecD src) %{
++  predicate(n->as_StoreVector()->memory_size() == 8);
++  match(Set mem (StoreVector mem src));
++  ins_cost(145);
++  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct Repl8B_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB src));
++  ins_cost(100);
++  format %{ "replv_ob    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ replv_ob(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB src));
++  ins_cost(140);
++  format %{ "move       AT,  $src\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateB con));
++  ins_cost(110);
++  format %{ "repl_ob    AT, [$con]\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ repl_ob(AT, val);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB con));
++  ins_cost(150);
++  format %{ "move      AT, [$con]\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB zero));
++  ins_cost(90);
++  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB M1));
++  ins_cost(80);
++  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS src));
++  ins_cost(100);
++  format %{ "replv_qh    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ replv_qh(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS src));
++  ins_cost(120);
++  format %{ "move    AT,     $src  \n\t"
++            "dins    AT, AT, 16, 16\n\t"
++            "dinsu   AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS con));
++  ins_cost(100);
++  format %{ "repl_qh    AT, [$con]\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    if ( Assembler::is_simm(val, 10)) {
++      //repl_qh supports 10 bits immediate
++      __ repl_qh(AT, val);
++    } else {
++      __ li32(AT, val);
++      __ replv_qh(AT, AT);
++    }
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS con));
++  ins_cost(110);
++  format %{ "move    AT,   [$con]\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS zero));
++  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS M1));
++  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar to be vector
++instruct Repl2I(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI src));
++  format %{ "dins    AT, $src, 0, 32\n\t"
++            "dinsu   AT, $src, 32, 32\n\t"
++            "dmtc1   AT, $dst\t! replicate2I" %}
++  ins_encode %{
++    __ dins(AT, $src$$Register, 0, 32);
++    __ dinsu(AT, $src$$Register, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI con));
++  effect(KILL tmp);
++  format %{ "li32    AT, [$con], 32\n\t"
++            "dinsu   AT,         AT\n\t"
++            "dmtc1   AT, $dst\t! replicate2I($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ li32(AT, val);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar zero to be vector
++instruct Repl2I_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI zero));
++  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar -1 to be vector
++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI M1));
++  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate float (4 byte) scalar to be vector
++instruct Repl2F(vecD dst, regF src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF src));
++  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
++  ins_encode %{
++    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Replicate float (4 byte) scalar zero to be vector
++instruct Repl2F_zero(vecD dst, immF_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF zero));
++  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++
++// ====================VECTOR ARITHMETIC=======================================
++
++// --------------------------------- ADD --------------------------------------
++
++// Floats vector add
++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
++instruct vadd2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF dst src));
++  format %{ "add.ps   $dst,$src\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF src1 src2));
++  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++// Floats vector sub
++instruct vsub2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVF dst src));
++  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
++  ins_encode %{
++    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- MUL --------------------------------------
++
++// Floats vector mul
++instruct vmul2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF dst src));
++  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF src1 src2));
++  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- DIV --------------------------------------
++// MIPS do not have div.ps
++
++// --------------------------------- MADD --------------------------------------
++// Floats vector madd
++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
++//  predicate(n->as_Vector()->length() == 2);
++//  match(Set dst (AddVF (MulVF src1 src2) src3));
++//  ins_cost(50);
++//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
++//  ins_encode %{
++//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++//  %}
++//  ins_pipe( fpu_regF_regF );
++//%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+new file mode 100644
+index 0000000000..96a147eaa5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+@@ -0,0 +1,1821 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "compiler/disassembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#include <sys/mman.h>
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++static int illegal_instruction_bits = 0;
++
++int NativeInstruction::illegal_instruction() {
++  if (illegal_instruction_bits == 0) {
++    ResourceMark rm;
++    char buf[40];
++    CodeBuffer cbuf((address)&buf[0], 20);
++    MacroAssembler* a = new MacroAssembler(&cbuf);
++    address ia = a->pc();
++    a->brk(11);
++    int bits = *(int*)ia;
++    illegal_instruction_bits = bits;
++  }
++  return illegal_instruction_bits;
++}
++
++bool NativeInstruction::is_int_branch() {
++  switch(Assembler::opcode(insn_word())) {
++    case Assembler::beq_op:
++    case Assembler::beql_op:
++    case Assembler::bgtz_op:
++    case Assembler::bgtzl_op:
++    case Assembler::blez_op:
++    case Assembler::blezl_op:
++    case Assembler::bne_op:
++    case Assembler::bnel_op:
++      return true;
++    case Assembler::regimm_op:
++      switch(Assembler::rt(insn_word())) {
++        case Assembler::bgez_op:
++        case Assembler::bgezal_op:
++        case Assembler::bgezall_op:
++        case Assembler::bgezl_op:
++        case Assembler::bltz_op:
++        case Assembler::bltzal_op:
++        case Assembler::bltzall_op:
++        case Assembler::bltzl_op:
++          return true;
++      }
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_float_branch() {
++  if (!is_op(Assembler::cop1_op) ||
++      !is_rs((Register)Assembler::bc1f_op)) return false;
++
++  switch(Assembler::rt(insn_word())) {
++    case Assembler::bcf_op:
++    case Assembler::bcfl_op:
++    case Assembler::bct_op:
++    case Assembler::bctl_op:
++      return true;
++  }
++
++  return false;
++}
++
++
++void NativeCall::verify() {
++  // make sure code pattern is actually a call instruction
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(20))->is_nop() ) {
++      return;
++  }
++
++  // jal targe
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++  is_op(int_at(4), Assembler::ori_op) &&
++  is_special_op(int_at(8), Assembler::dsll_op) &&
++  is_op(int_at(12), Assembler::ori_op) &&
++  is_special_op(int_at(16), Assembler::dsll_op) &&
++  is_op(int_at(20), Assembler::ori_op) &&
++        is_special_op(int_at(24), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    is_special_op(int_at(8), Assembler::dsll_op) &&
++    is_op  (int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    is_op  (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  // FIXME: why add jr_op here?
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
++    return;
++
++  fatal("not a call");
++}
++
++address NativeCall::target_addr_for_insn() const {
++  // jal target
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(4))->is_nop()) {
++      int instr_index = int_at(0) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(20))->is_nop()) {
++      int instr_index = int_at(16) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                               (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ld_op) ) {
++
++      address dest = (address)Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(0),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
++  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
++  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
++  tty->print_cr("======= End of decoding =======");
++  fatal("not a call");
++  return NULL; // unreachable
++}
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_insn();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(addr);
++    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  // Patch the constant in the call's trampoline stub.
++  if (MacroAssembler::reachable_from_cache()) {
++    set_destination(dest);
++  } else {
++    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
++    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++  return NULL;
++}
++
++// manual implementation of GSSQ
++//
++//  00000001200009c0 <atomic_store128>:
++//     1200009c0:   0085202d        daddu   a0, a0, a1
++//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
++//     1200009c8:   03e00008        jr      ra
++//     1200009cc:   00000000        nop
++//
++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
++
++static int *buf;
++
++static atomic_store128_ptr get_atomic_store128_func() {
++  assert(UseLEXT1, "UseLEXT1 must be true");
++  static atomic_store128_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x0085202d;
++  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
++  buf[2] = 0x03e00008;
++  buf[3] = 0;
++
++  asm("sync");
++  p = (atomic_store128_ptr)buf;
++  return p;
++}
++
++void  NativeCall::patch_on_jal_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(0, jal_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(16, jal_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal(address dst) {
++  patch_on_jal_gs(dst);
++}
++
++void  NativeCall::patch_on_trampoline(address dest) {
++  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
++  jlong dst = (jlong) dest;
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
++    dst += (dst & 0x8000) << 1;
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
++
++    ICache::invalidate_range(addr_at(0), 24);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jalr_gs(address dst) {
++  patch_set48_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr(address dst) {
++  patch_set48(dst);
++}
++
++void  NativeCall::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++  int count = 0;
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    if (Assembler::split_low(value)) {
++      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void NativeCall::patch_set32_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[2] = {0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    //nop();
++    //set_int_at(count << 2, 0);
++    insts[count] = 0;
++    count++;
++  }
++
++  long inst = insts[1];
++  inst = inst << 32;
++  inst = inst + insts[0];
++
++  set_long_at(0, inst);
++}
++
++void NativeCall::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      //dsll(d, d, 16);
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    //lui(d, value >> 32);
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    //ori(d, d, split_low(value >> 16));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    //dsll(d, d, 16);
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    //ori(d, d, split_low(value));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    //nop();
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeCall::patch_set32(address dest) {
++  patch_set32_gs(dest);
++}
++
++void  NativeCall::set_destination(address dest) {
++  OrderAccess::fence();
++
++  // li64
++  if (is_special_op(int_at(16), Assembler::dsll_op)) {
++    int first_word = int_at(0);
++    set_int_at(0, 0x1000ffff); /* .1: b .1 */
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
++    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
++    ICache::invalidate_range(addr_at(0), 24);
++  } else if (is_op(int_at(16), Assembler::jal_op)) {
++    if (UseLEXT1) {
++      patch_on_jal_gs(dest);
++    } else {
++      patch_on_jal(dest);
++    }
++  } else if (is_op(int_at(0), Assembler::jal_op)) {
++    patch_on_jal_only(dest);
++  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
++    if (UseLEXT1) {
++      patch_on_jalr_gs(dest);
++    } else {
++      patch_on_jalr(dest);
++    }
++  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
++    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
++    if (UseLEXT1) {
++      patch_set32_gs(dest);
++    } else {
++      patch_set32(dest);
++    }
++    ICache::invalidate_range(addr_at(0), 8);
++  } else {
++      fatal("not a call");
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  __ li48(T9, (long)entry);
++  __ jalr ();
++  __ delayed()->nop();
++#undef __
++
++  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++//-------------------------------------------------------------------
++
++void NativeMovConstReg::verify() {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) ) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)0,
++                              (intptr_t)0,
++                              (intptr_t)0);
++    } else {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::patch_set48(intptr_t x) {
++  jlong value = (jlong) x;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    if (Assembler::split_low(value)) {
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  // li64 or li48
++  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
++  } else {
++    patch_set48(x);
++  }
++
++  ICache::invalidate_range(addr_at(0), 24);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  if (is_immediate())
++    return (short)(int_at(instruction_offset)&0xffff);
++  else
++    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  if (is_immediate()) {
++    assert(Assembler::is_simm16(x), "just check");
++    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
++    if (is_64ldst()) {
++      assert(Assembler::is_simm16(x+4), "just check");
++      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
++    }
++  } else {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
++  }
++  ICache::invalidate_range(addr_at(0), 8);
++}
++
++void NativeMovRegMem::verify() {
++  int offset = 0;
++
++  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
++
++    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
++      fatal ("not a mov [reg+offs], reg instruction");
++    }
++
++    offset += 12;
++  }
++
++  switch(Assembler::opcode(int_at(offset))) {
++    case Assembler::lb_op:
++    case Assembler::lbu_op:
++    case Assembler::lh_op:
++    case Assembler::lhu_op:
++    case Assembler::lw_op:
++    case Assembler::lwu_op:
++    case Assembler::ld_op:
++    case Assembler::lwc1_op:
++    case Assembler::ldc1_op:
++    case Assembler::sb_op:
++    case Assembler::sh_op:
++    case Assembler::sw_op:
++    case Assembler::sd_op:
++    case Assembler::swc1_op:
++    case Assembler::sdc1_op:
++      break;
++    default:
++      fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(((NativeInstruction *)this)->is_jump() ||
++         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
++}
++
++void  NativeJump::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++      count += 1;
++      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_j_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(0, j_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void  NativeJump::patch_on_j_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(16, j_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeJump::patch_on_j(address dst) {
++  patch_on_j_gs(dst);
++}
++
++void  NativeJump::patch_on_jr_gs(address dst) {
++  patch_set48_gs(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_jr(address dst) {
++  patch_set48(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++
++void  NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  if (is_short()) {
++    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
++    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
++    ICache::invalidate_range(addr_at(0), 4);
++  } else if (is_b_far()) {
++    int offset = dest - addr_at(12);
++    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
++    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
++  } else {
++    if (is_op(int_at(16), Assembler::j_op)) {
++      if (UseLEXT1) {
++        patch_on_j_gs(dest);
++      } else {
++        patch_on_j(dest);
++      }
++    } else if (is_op(int_at(0), Assembler::j_op)) {
++      patch_on_j_only(dest);
++    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
++      if (UseLEXT1) {
++        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
++        //patch_on_jr_gs(dest);
++        patch_on_jr(dest);
++      } else {
++        patch_on_jr(dest);
++      }
++    } else {
++      fatal("not a jump");
++    }
++  }
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
++    __ b(entry);
++    __ delayed()->nop();
++  } else {
++    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
++    int offset = entry - code_pos;
++
++    Label L;
++    __ bgezal(R0, L);
++    __ delayed()->lui(T9, (offset - 8) >> 16);
++    __ bind(L);
++    __ ori(T9, T9, (offset - 8) & 0xffff);
++    __ daddu(T9, T9, RA);
++    __ jr(T9);
++    __ delayed()->nop();
++  }
++
++#undef __
++
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++bool NativeJump::is_b_far() {
++//
++//   0x000000556809f198: daddu at, ra, zero
++//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
++//
++//   0x000000556809f1a0: nop
++//   0x000000556809f1a4: lui t9, 0xfffffffd
++//   0x000000556809f1a8: ori t9, t9, 0x14dc
++//   0x000000556809f1ac: daddu t9, t9, ra
++//   0x000000556809f1b0: daddu ra, at, zero
++//   0x000000556809f1b4: jr t9
++//   0x000000556809f1b8: nop
++//  ;; ImplicitNullCheckStub slow case
++//   0x000000556809f1bc: lui t9, 0x55
++//
++  return is_op(int_at(12), Assembler::lui_op);
++}
++
++address NativeJump::jump_destination() {
++  if ( is_short() ) {
++    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
++  }
++  // Assembler::merge() is not correct in MIPS_64!
++  //
++  //   Example:
++  //     hi16 = 0xfffd,
++  //     lo16 = f7a4,
++  //
++  //     offset=0xfffdf7a4 (Right)
++  //     Assembler::merge = 0xfffcf7a4 (Wrong)
++  //
++  if ( is_b_far() ) {
++    int hi16 = int_at(12)&0xffff;
++    int low16 = int_at(16)&0xffff;
++    address target = addr_at(12) + (hi16 << 16) + low16;
++    return target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++        nativeInstruction_at(addr_at(4))->is_nop()   &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()  &&
++        is_op(int_at(16), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(20))->is_nop()) {
++    int instr_index = int_at(16) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // j target
++  // nop
++  if ( is_op(int_at(0), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(4))->is_nop()) {
++    int instr_index = int_at(0) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a jump");
++  return NULL; // unreachable
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
++  assert((int)instruction_size == (int)NativeCall::instruction_size,
++          "note::Runtime1::patch_code uses NativeCall::instruction_size");
++
++  // ensure 100% atomicity
++  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
++
++  int *p = (int *)instr_addr;
++  int jr_word = p[4];
++
++  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
++  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
++  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
++
++  if (MacroAssembler::reachable_from_cache(dest)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.j(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_jump()
++{
++  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
++    return true;
++  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
++    return true;
++  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
++    return true;
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++    return true;
++  }
++
++  // lui   rd, imm(63...48);
++  // ori   rd, rd, imm(47...32);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(31...16);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(15...0);
++  // jr    rd
++  // nop
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::dsll_op) &&
++          is_op(int_at(20), Assembler::ori_op) &&
++          is_special_op(int_at(24), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++      return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     sw    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lui t2, 0x40
++  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                        ;*goto
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like the this.
++  return is_op(Assembler::lw_op) && is_rt(AT);
++}
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+new file mode 100644
+index 0000000000..fb4f99c9c6
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+@@ -0,0 +1,734 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum mips_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_illegal();
++  inline bool is_return();
++  bool is_jump();
++  inline bool is_cond_jump();
++  bool is_safepoint_poll();
++
++  //mips has no instruction to generate a illegal instrucion exception
++  //we define ours: break 11
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_trampoline_call();
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
++  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
++  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
++  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
++  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
++  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
++
++  static bool is_special_op (int insn, Assembler::special_ops op) {
++    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
++  }
++  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
++// 32 bits:
++//       lui     rt, imm16
++//       addiu    rt, rt, imm16
++//       jalr     rt
++//       nop
++//
++// 64 bits:
++//       lui   rd, imm(63...48);
++//       ori   rd, rd, imm(47...32);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(31...16);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(15...0);
++//       jalr  rd
++//       nop
++//
++
++// we just consider the above for instruction as one call instruction
++class NativeCall: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset          =    0,
++    instruction_size            =   6 * BytesPerInstWord,
++    return_address_offset_short =   4 * BytesPerInstWord,
++    return_address_offset_long  =   6 * BytesPerInstWord,
++    displacement_offset         =   0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const  {
++    if (is_special_op(int_at(8), Assembler::jalr_op)) {
++      return addr_at(return_address_offset_short);
++    } else {
++      return addr_at(return_address_offset_long);
++    }
++  }
++
++  address return_address() const            {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_insn() const;
++  address destination() const;
++  void  set_destination(address dest);
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jalr_gs(address dest);
++  void  patch_on_jalr(address dest);
++
++  void  patch_on_jal_gs(address dest);
++  void  patch_on_jal(address dest);
++
++  void  patch_on_trampoline(address dest);
++
++  void  patch_on_jal_only(address dest);
++
++  void  patch_set32_gs(address dest);
++  void  patch_set32(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
++  }
++
++  static bool is_call_to(address instr, address target) {
++    return nativeInstruction_at(instr)->is_call() &&
++nativeCall_at(instr)->destination() == target;
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate jal
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = NULL;
++  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
++  } else {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
++  }
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset    =    0,
++    instruction_size            =    4 * BytesPerInstWord,
++    next_instruction_offset   =    4 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  void    patch_set48(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//       lui   AT, split_high(offset)
++//       addiu AT, split_low(offset)
++//       addu  reg, reg, AT
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
++//       [lw/sw/lwc1/swc1                    dest, reg, 4]
++//     or
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
++//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset  = 0,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  //offset is less than 16 bits.
++  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
++  bool is_64ldst() const {
++    if (is_immediate()) {
++      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
++       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
++    } else {
++      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
++       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
++    }
++  }
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
++  }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
++// 32 bits:
++//    far jump:
++//        lui   reg, split_high(addr)
++//        addiu reg, split_low(addr)
++//        jr    reg
++//        nop
++//    or
++//        beq   ZERO, ZERO, offset
++//        nop
++//
++
++//64 bits:
++//    far jump:
++//          lui   rd, imm(63...48);
++//          ori   rd, rd, imm(47...32);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(31...16);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(15...0);
++//          jalr  rd
++//          nop
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset   =    0,
++    beq_opcode           =    0x10000000,//000100|00000|00000|offset
++    b_mask               =    0xffff0000,
++    short_size           =    8,
++    instruction_size     =    6 * BytesPerInstWord
++  };
++
++  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
++  bool is_b_far();
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination();
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jr_gs(address dest);
++  void  patch_on_jr(address dest);
++
++  void  patch_on_j_gs(address dest);
++  void  patch_on_j(address dest);
++
++  void  patch_on_j_only(address dest);
++
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry) {}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum mips_specific_constants {
++    instruction_code          =    0x42000029,    // mips reserved instruction
++    instruction_size          =    4,
++    instruction_offset        =    0,
++    next_instruction_offset   =    4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++// jr RA
++// delay slot
++class NativeReturn: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size          =    8,
++    instruction_offset        =    0,
++    next_instruction_offset   =    8
++  };
++};
++
++
++
++
++class NativeCondJump;
++inline NativeCondJump* nativeCondJump_at(address address);
++class NativeCondJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size         = 16,
++    instruction_offset        = 12,
++    next_instruction_offset   = 20
++  };
++
++
++  int insn_word() const  { return long_at(instruction_offset); }
++  address instruction_address() const { return addr_at(0); }
++  address next_instruction_address() const { return addr_at(next_instruction_offset); }
++
++  // Creation
++  inline friend NativeCondJump* nativeCondJump_at(address address);
++
++  address jump_destination()  const {
++    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++  }
++
++  void set_jump_destination(address dest) {
++    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++  }
++
++};
++
++inline NativeCondJump* nativeCondJump_at(address address) {
++  NativeCondJump* jump = (NativeCondJump*)(address);
++  return jump;
++}
++
++
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call()    {
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return true;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) &&
++       is_special_op(int_at(24), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  if(is_trampoline_call())
++    return true;
++
++  return false;
++
++}
++
++inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
++
++inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum mips_specific_constants {
++    instruction_size            =    2 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    2 * BytesPerInstWord
++  };
++
++  address destination() const {
++    return (address)ptr_at(0);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(0, (intptr_t)new_destination);
++  }
++};
++
++inline bool NativeInstruction::is_trampoline_call() {
++  // lui dst, imm16
++  // ori dst, dst, imm16
++  // dsll dst, dst, 16
++  // ld target, dst, imm16
++  // jalr target
++  // nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ld_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) &&
++        nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  return false;
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  return (NativeCallTrampolineStub*)addr;
++}
++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp
+new file mode 100644
+index 0000000000..7f800eb107
+--- /dev/null
++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+new file mode 100644
+index 0000000000..4af2531834
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_mips.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp
+new file mode 100644
+index 0000000000..4a9b22bfef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.cpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_mips.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
++    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
+diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp
+new file mode 100644
+index 0000000000..ea216fbcb9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.hpp
+@@ -0,0 +1,341 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
++#define CPU_MIPS_VM_REGISTER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "utilities/formatBuffer.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the MIPS32 architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define I0 ((Register)(i0_RegisterEnumValue))
++#define I1 ((Register)(i1_RegisterEnumValue))
++#define I2 ((Register)(i2_RegisterEnumValue))
++#define I3 ((Register)(i3_RegisterEnumValue))
++#define I4 ((Register)(i4_RegisterEnumValue))
++#define I5 ((Register)(i5_RegisterEnumValue))
++#define I6 ((Register)(i6_RegisterEnumValue))
++#define I7 ((Register)(i7_RegisterEnumValue))
++#define I8 ((Register)(i8_RegisterEnumValue))
++#define I9 ((Register)(i9_RegisterEnumValue))
++#define I10 ((Register)(i10_RegisterEnumValue))
++#define I11 ((Register)(i11_RegisterEnumValue))
++#define I12 ((Register)(i12_RegisterEnumValue))
++#define I13 ((Register)(i13_RegisterEnumValue))
++#define I14 ((Register)(i14_RegisterEnumValue))
++#define I15 ((Register)(i15_RegisterEnumValue))
++#define I16 ((Register)(i16_RegisterEnumValue))
++#define I17 ((Register)(i17_RegisterEnumValue))
++#define I18 ((Register)(i18_RegisterEnumValue))
++#define I19 ((Register)(i19_RegisterEnumValue))
++#define I20 ((Register)(i20_RegisterEnumValue))
++#define I21 ((Register)(i21_RegisterEnumValue))
++#define I22 ((Register)(i22_RegisterEnumValue))
++#define I23 ((Register)(i23_RegisterEnumValue))
++#define I24 ((Register)(i24_RegisterEnumValue))
++#define I25 ((Register)(i25_RegisterEnumValue))
++#define I26 ((Register)(i26_RegisterEnumValue))
++#define I27 ((Register)(i27_RegisterEnumValue))
++#define I28 ((Register)(i28_RegisterEnumValue))
++#define I29 ((Register)(i29_RegisterEnumValue))
++#define I30 ((Register)(i30_RegisterEnumValue))
++#define I31 ((Register)(i31_RegisterEnumValue))
++
++#define R0 ((Register)(i0_RegisterEnumValue))
++#define AT ((Register)(i1_RegisterEnumValue))
++#define V0 ((Register)(i2_RegisterEnumValue))
++#define V1 ((Register)(i3_RegisterEnumValue))
++#define A0 ((Register)(i4_RegisterEnumValue))
++#define A1 ((Register)(i5_RegisterEnumValue))
++#define A2 ((Register)(i6_RegisterEnumValue))
++#define A3 ((Register)(i7_RegisterEnumValue))
++#define A4 ((Register)(i8_RegisterEnumValue))
++#define A5 ((Register)(i9_RegisterEnumValue))
++#define A6 ((Register)(i10_RegisterEnumValue))
++#define A7 ((Register)(i11_RegisterEnumValue))
++#define RT0 ((Register)(i12_RegisterEnumValue))
++#define RT1 ((Register)(i13_RegisterEnumValue))
++#define RT2 ((Register)(i14_RegisterEnumValue))
++#define RT3 ((Register)(i15_RegisterEnumValue))
++#define S0 ((Register)(i16_RegisterEnumValue))
++#define S1 ((Register)(i17_RegisterEnumValue))
++#define S2 ((Register)(i18_RegisterEnumValue))
++#define S3 ((Register)(i19_RegisterEnumValue))
++#define S4 ((Register)(i20_RegisterEnumValue))
++#define S5 ((Register)(i21_RegisterEnumValue))
++#define S6 ((Register)(i22_RegisterEnumValue))
++#define S7 ((Register)(i23_RegisterEnumValue))
++#define RT8 ((Register)(i24_RegisterEnumValue))
++#define RT9 ((Register)(i25_RegisterEnumValue))
++#define K0 ((Register)(i26_RegisterEnumValue))
++#define K1 ((Register)(i27_RegisterEnumValue))
++#define GP ((Register)(i28_RegisterEnumValue))
++#define SP ((Register)(i29_RegisterEnumValue))
++#define FP ((Register)(i30_RegisterEnumValue))
++#define S8 ((Register)(i30_RegisterEnumValue))
++#define RA ((Register)(i31_RegisterEnumValue))
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++/*
++#define RT0       T0
++#define RT1       T1
++#define RT2       T2
++#define RT3       T3
++#define RT4       T8
++#define RT5       T9
++*/
++
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define  S5_heapbase   S5
++
++#define mh_SP_save     SP
++
++#define FSR            V0
++#define SSR            V1
++#define FSF            F0
++#define SSF            F1
++#define FTF            F14
++#define STF            F15
++
++#define AFT            F30
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 12,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++const int MIPS_ARGS_IN_REGS_NUM = 4;
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+new file mode 100644
+index 0000000000..ff8028032b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in MIPS64");
++  }
++}
++
++
++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  intptr_t adj = 0;
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    if (!ni->is_trampoline_call()) {
++      return nativeCall_at(addr())->target_addr_for_insn();
++    } else {
++      address trampoline = nativeCall_at(addr())->get_trampoline();
++      if (trampoline) {
++        return nativeCallTrampolineStub_at(trampoline)->destination();
++      } else {
++        return (address) -1;
++      }
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination() + adj;
++  } else if (ni->is_cond_jump()) {
++    return nativeCondJump_at(addr())->jump_destination() +adj;
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    NativeCall* call = nativeCall_at(addr());
++    if (!ni->is_trampoline_call()) {
++      call->set_destination(x);
++    } else {
++      address trampoline_stub_addr = call->get_trampoline();
++      if (trampoline_stub_addr != NULL) {
++        address orig = call->target_addr_for_insn();
++        if (orig != trampoline_stub_addr) {
++          call->patch_on_trampoline(trampoline_stub_addr);
++        }
++        call->set_destination_mt_safe(x, false);
++      }
++    }
++  } else if (ni->is_jump())
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  else if (ni->is_cond_jump())
++    nativeCondJump_at(addr())->set_jump_destination(x);
++  else
++    { ShouldNotReachHere(); }
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++}
++
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++/*
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++*/
++
++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  address target =0;
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  target = new_addr_for((address)ni->data(), src, dest);
++  ni->set_data((intptr_t)target);
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+new file mode 100644
+index 0000000000..1e1e170fd8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since MIPS instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+new file mode 100644
+index 0000000000..2a0488cd01
+--- /dev/null
++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+@@ -0,0 +1,198 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_mips.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_mips.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T9 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T9
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ sd(V1, SP, return_off  *wordSize);  // return address
++  __ sd(FP, SP, fp_off  *wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ daddiu(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++
++  {
++    long save_pc = (long)__ pc() + 48;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ move(A0, thread);
++  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
++  __ jalr(T9);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map( __ offset(), map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T9, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T9: exception handler
++  // A1: exception pc
++  __ jr(T9);
++  __ delayed()->nop();
++
++  // make sure all code is generated
++  masm->flush();
++
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+new file mode 100644
+index 0000000000..4a9791d4cb
+--- /dev/null
++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+@@ -0,0 +1,3879 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(v1)
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(a6)
++    DEF_LAYOUT_OFFS(a7)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    DEF_LAYOUT_OFFS(s6)
++    DEF_LAYOUT_OFFS(s7)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++
++    DEF_LAYOUT_OFFS(gp)
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++  static int raOffset(void) { return return_off / 2; }
++  //Rmethod
++  static int methodOffset(void) { return s3_off / 2; }
++
++  static int v0Offset(void) { return v0_off / 2; }
++  static int v1Offset(void) { return v1_off / 2; }
++
++  static int fpResultOffset(void) { return fpr0_off / 2; }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++
++  __ daddiu(SP, SP, - reg_save_size * jintSize);
++
++  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
++  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
++  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
++  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
++  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
++  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
++  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
++  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
++  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
++  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
++  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
++  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
++  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
++  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
++  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
++  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
++  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
++  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
++  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
++  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
++  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
++  __ sd(T0, SP, t0_off * jintSize);
++  __ sd(T1, SP, t1_off * jintSize);
++  __ sd(T2, SP, t2_off * jintSize);
++  __ sd(T3, SP, t3_off * jintSize);
++  __ sd(S0, SP, s0_off * jintSize);
++  __ sd(S1, SP, s1_off * jintSize);
++  __ sd(S2, SP, s2_off * jintSize);
++  __ sd(S3, SP, s3_off * jintSize);
++  __ sd(S4, SP, s4_off * jintSize);
++  __ sd(S5, SP, s5_off * jintSize);
++  __ sd(S6, SP, s6_off * jintSize);
++  __ sd(S7, SP, s7_off * jintSize);
++
++  __ sd(T8, SP, t8_off * jintSize);
++  __ sd(T9, SP, t9_off * jintSize);
++
++  __ sd(GP, SP, gp_off * jintSize);
++  __ sd(FP, SP, fp_off * jintSize);
++  __ sd(RA, SP, return_off * jintSize);
++  __ daddiu(FP, SP, fp_off * jintSize);
++
++  OopMapSet *oop_maps = new OopMapSet();
++  //OopMap* map =  new OopMap( frame_words, 0 );
++  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
++
++
++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
++  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
++  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
++  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
++  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
++  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
++  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
++  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
++  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
++  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
++  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
++  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
++  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
++  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
++  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
++  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
++
++  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
++  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
++  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
++  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
++  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
++  __ ld(T0, SP, t0_off * jintSize);
++  __ ld(T1, SP, t1_off * jintSize);
++  __ ld(T2, SP, t2_off * jintSize);
++  __ ld(T3, SP, t3_off * jintSize);
++  __ ld(S0, SP, s0_off * jintSize);
++  __ ld(S1, SP, s1_off * jintSize);
++  __ ld(S2, SP, s2_off * jintSize);
++  __ ld(S3, SP, s3_off * jintSize);
++  __ ld(S4, SP, s4_off * jintSize);
++  __ ld(S5, SP, s5_off * jintSize);
++  __ ld(S6, SP, s6_off * jintSize);
++  __ ld(S7, SP, s7_off * jintSize);
++
++  __ ld(T8, SP, t8_off * jintSize);
++  __ ld(T9, SP, t9_off * jintSize);
++
++  __ ld(GP, SP, gp_off * jintSize);
++  __ ld(FP, SP, fp_off * jintSize);
++  __ ld(RA, SP, return_off * jintSize);
++
++  __ addiu(SP, SP, reg_save_size * jintSize);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld(V0, SP, v0_off * jintSize);
++  __ ld(V1, SP, v1_off * jintSize);
++  __ ldc1(F0, SP, fpr0_off * jintSize);
++  __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ addiu(SP, SP, return_off * jintSize);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  return 32;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  // trampoline is not in CodeCache
++  __ set64(T9, (long)destination);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    T0, A0, A1, A2, A3, A4, A5, A6
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  __ delayed()->nop();
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // V0 isn't live so capture return address while we easily can
++  __ move(V0, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, V0);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  __ move(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ delayed()->nop();
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(V0, RA);
++  // set senderSP value
++  //refer to interpreter_mips.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addiu(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ sd(r, SP, st_off);
++      } else {
++        //FIXME, mips will not enter here
++        // long/double in gpr
++        __ sd(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ sd(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ swc1(fr, SP, st_off);
++      else {
++        __ sdc1(fr, SP, st_off);
++        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, V0);
++  __ jr (AT);
++  __ delayed()->nop();
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the mips side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++
++  __ move(T9, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = V0;
++  __ move(saved_sp, T9);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld(AT, saved_sp, ld_off);
++        __ sd(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld(AT, saved_sp, ld_off - 8);
++        __ sd(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on mips)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld(r, saved_sp, ld_off - 8);
++      } else {
++        __ lw(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ lwc1(fr, saved_sp, ld_off);
++      else {
++          __ ldc1(fr, saved_sp, ld_off);
++          __ ldc1(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to V0 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in V0 (c2) because c2's
++  // resolve stubs return the result (the method) in V0.
++  // I'd love to fix this.
++  __ move(V0, Rmethod);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    __ delayed()->nop();
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ delayed()->nop();
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on MIPS");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               fp[16] => sp[8]
++// 10: L    // stdout              fp[24] => sp[16]
++// 11: L    // stderr              fp[32] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ swc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ sdc1(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ sd(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ sd(V0, FP, -wordSize);
++      break;
++    default: {
++      __ sw(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ lwc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ ldc1(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld(V0, FP, -wordSize);
++      break;
++    default: {
++      __ lw(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  //FIXME, for mips, dst can be register
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = V0;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++    //if dst is register
++    //FIXME, do mips need out preserve stack slots?
++    int offset_in_older_frame = src.first()->reg2stack()
++      + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame
++          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = V0;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ sd( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++    //if dst is register
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sw(AT, SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++  } else {
++    // reg to stack
++    if(dst.first()->is_stack())
++      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    else
++      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    }
++
++  } else {
++    // reg to stack
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    // Make enough room for patch_verified_entry
++    __ nop();
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_mips.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T9, receiver);
++  __ beq(T9, ic_reg, hit);
++  __ delayed()->nop();
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  }
++
++  // Generate a new frame for the wrapper.
++  // do mips need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and mips doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ sd( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  intptr_t the_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(the_pc - start, map);
++
++  __ set_last_Java_frame(SP, noreg, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)the_pc ;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T9;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ move(swap_reg, 1);
++
++    __ ld(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ dsubu(swap_reg, swap_reg, SP);
++    __ move(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    __ delayed()->nop();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addiu(AT, R0, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addiu(AT, R0, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addiu(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addiu(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++      __ delayed()->nop();
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addiu(AT, R0, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  __ delayed()->nop();
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    __ delayed()->nop();
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T9);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++    __ delayed()->nop();
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  __ delayed()->nop();
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, - 3*wordSize);
++
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++                __ move(SP, S2);
++    __ addiu(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    __ delayed()->nop();
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++                __ move(AT, -(StackAlignmentInBytes));
++                __ move(S2, SP);     // use S2 as a sender SP holder
++                __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addiu(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ addiu(SP, SP, 2*wordSize);
++                __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    __ delayed()->nop();
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++  __ delayed()->nop();
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++
++  return nm;
++
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++    __ delayed()->nop();
++
++    __ jump_to(ic_miss, 0);
++    __ delayed()->nop();
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++            __ delayed()->mov(G0, tmp);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ br_null(O0, false, Assembler::pn, skip);
++        __ delayed()->addu(FP, L2_string_off, O1);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++        __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ ret();
++  __ delayed()->restore();
++
++  __ flush();
++
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++  // Correct the return address we were given.
++  //FIXME, return address is on the tos or Ra?
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ move(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ move(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ move(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ delayed()->nop();
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addiu(SP, SP, -additional_words  * wordSize);
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call((address)Deoptimization::fetch_unroll_info);
++  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addiu(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ sw(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ move(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ delayed()->nop();
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
++  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  RegisterSaver::restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addu(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addiu(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ subu(SP, SP, AT);
++
++  // Push interpreter frames in a loop
++  //
++  //Loop:
++  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
++  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
++  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
++  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
++  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
++  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
++  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
++  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
++  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
++  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
++  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
++  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
++  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
++  //
++  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ subu(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addiu(count, count, -1);   // decrement counter
++  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ bne(count, R0, loop);
++  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
++  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
++  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
++
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addiu(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
++  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
++  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
++  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++
++  // Push self-frame.
++  __ daddiu(SP, SP, -framesize * BytesPerInt);
++
++  __ sd(RA, SP, return_off * BytesPerInt);
++  __ sd(FP, SP, fp_off * BytesPerInt);
++
++  __ daddiu(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    long save_pc = (long)__ pc() + 56;
++    __ patchable_set48(AT, (long)save_pc);
++    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::uncommon_trap);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  //oop_maps->add_gc_map( __ offset(), true, map);
++  oop_maps->add_gc_map( __ offset(),  map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T9, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T9, L);
++    __ delayed()->nop();
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ daddiu(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ daddu(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T9;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ dsubu(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);          // Load frame size
++  __ ld(AT, pcs, 0);           // save return address
++  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ dsubu(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ daddiu(count, count, -1);    // decrement counter
++  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ delayed()->nop();      // Bump array pointer (pcs)
++
++  __ ld(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(NOREG, FP, the_pc);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::unpack_frames);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  address call_pc = NULL;
++  bool cause_return = (pool_type == POLL_AT_RETURN);
++  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  __ call(call_ptr);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ offset(),  map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++  __ delayed()->nop();
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ patchable_jump((address)StubRoutines::forward_exception_entry());
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize);
++    __ bne(AT, TSR, no_adjust);
++    __ delayed()->nop();
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ lwu(AT, TSR, 0);
++    __ dsrl(AT, AT, 16);
++    __ andi(AT, AT, 0xfc1f);
++    __ xori(AT, AT, 0x8c01);
++    __ bne(AT, R0, bail);
++    __ delayed()->nop();
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addiu(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  int start = __ offset();
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(noreg, FP, NULL);
++  //align the stack before invoke native
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ call(destination);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map( __ offset() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  __ delayed()->nop();
++  // get the returned Method*
++  //FIXME, do mips need this ?
++  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(V0);
++  __ delayed()->nop();
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //
++  // make sure all code is generated
++  masm->flush();
++
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long borrow = 0, t = 0;
++  unsigned long tmp0, tmp1;
++  __asm__ __volatile__ (
++    "0:                                            \n"
++    "ld      %[tmp0],     0(%[a])                  \n"
++    "ld      %[tmp1],     0(%[b])                  \n"
++    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
++    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
++    "or      %[borrow],   %[borrow],   %[t]        \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
++    "sd      %[tmp0],     0(%[a])                  \n"
++    "daddiu  %[a],        %[a],         8          \n"
++    "daddiu  %[b],        %[b],         8          \n"
++    "daddiu  %[len],      %[len],      -1          \n"
++    "bgtz    %[len],      0b                       \n"
++    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++  );
++  return tmp0;
++}
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of t0, t1, and t2.
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void __attribute__((noinline))
++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
++                    unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void __attribute__((noinline))
++montgomery_square(unsigned long a[], unsigned long n[],
++                  unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static unsigned long swap(unsigned long x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(unsigned long *s, unsigned long *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++// Doesn't seem to be relevant for MIPS64 so we use the same value.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)b_ints, b, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
++  }
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
+diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+new file mode 100644
+index 0000000000..9fe2bc8377
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+@@ -0,0 +1,2162 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // ABI mips n64
++  // This fig is not MIPS ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  //  n64 does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for GP.
++  // GP will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             = 1,
++    FP_off             = 0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    GP_off             = -14,
++ };
++
++  address generate_call_stub(address& return_address) {
++
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    __ daddiu(SP, SP, total_off * wordSize);
++    __ sd(BCP, FP, BCP_off * wordSize);
++    __ sd(LVP, FP, LVP_off * wordSize);
++    __ sd(TSR, FP, TSR_off * wordSize);
++    __ sd(S1, FP, S1_off * wordSize);
++    __ sd(S3, FP, S3_off * wordSize);
++    __ sd(S4, FP, S4_off * wordSize);
++    __ sd(S5, FP, S5_off * wordSize);
++    __ sd(S6, FP, S6_off * wordSize);
++    __ sd(A0, FP, call_wrapper_off * wordSize);
++    __ sd(A1, FP, result_off * wordSize);
++    __ sd(A2, FP, result_type_off * wordSize);
++    __ sd(A7, FP, thread_off * wordSize);
++    __ sd(GP, FP, GP_off * wordSize);
++
++    __ set64(GP, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ delayed()->nop();
++    __ dsll(AT, A6, Interpreter::logStackElementSize);
++    __ dsubu(SP, SP, AT);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP , AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ dsll(T3, T0, LogBytesPerWord);
++    __ daddu(T3, T3, A5);
++    __ ld(AT, T3,  -wordSize);
++    __ dsll(T3, T2, LogBytesPerWord);
++    __ daddu(T3, T3, SP);
++    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ daddiu(T2, T2, 1);
++    __ daddiu(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    __ delayed()->nop();
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    __ delayed()->nop();
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ daddiu(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++    __ delayed()->nop();
++
++    // handle T_INT case
++    __ sd(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld(BCP, FP, BCP_off * wordSize);
++    __ ld(LVP, FP, LVP_off * wordSize);
++    __ ld(GP, FP, GP_off * wordSize);
++    __ ld(TSR, FP, TSR_off * wordSize);
++
++    __ ld(S1, FP, S1_off * wordSize);
++    __ ld(S3, FP, S3_off * wordSize);
++    __ ld(S4, FP, S4_off * wordSize);
++    __ ld(S5, FP, S5_off * wordSize);
++    __ ld(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ sd(V0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_float);
++    __ swc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_double);
++    __ sdc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++    //FIXME, 1.6 mips version add operation of fpu here
++    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T9
++    __ ld(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T9, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T9: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T9);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     A0    -  array1
++  //     A1    -  array2
++  //     A2    -  element count
++  //
++
++ // use T9 as temp
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    __ dsll(AT, A2, sf);
++    __ daddu(AT, AT, A0);
++    __ daddiu(T9, AT, -elem_size);
++    __ dsubu(AT, A1, A0);
++    __ blez(AT, no_overlap_target);
++    __ delayed()->nop();
++    __ dsubu(AT, A1, T9);
++    __ bgtz(AT, no_overlap_target);
++    __ delayed()->nop();
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ bgez(A0, L);
++    __ delayed()->nop();
++    __ bgtz(A1, no_overlap_target);
++    __ delayed()->nop();
++    __ bind(L);
++
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register cnt_words = T8;  // temp register
++
++    __ enter();
++
++    Label L_fill_elements, L_exit1;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 8, 8);   // 8 bit -> 16 bit
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(AT, to, 1);
++          __ beq(AT, R0, L_skip_align1);
++          __ delayed()->nop();
++          __ sb(value, to, 0);
++          __ daddiu(to, to, 1);
++          __ addiu32(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(AT, to, 1 << 1);
++          __ beq(AT, R0, L_skip_align2);
++          __ delayed()->nop();
++          __ sh(value, to, 0);
++          __ daddiu(to, to, 2);
++          __ addiu32(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(AT, to, 1 << 2);
++          __ beq(AT, R0, L_skip_align4);
++          __ delayed()->nop();
++          __ sw(value, to, 0);
++          __ daddiu(to, to, 4);
++          __ addiu32(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
++    }
++
++    //
++    //  Fill large chunks
++    //
++    __ srl(cnt_words, count, 3 - shift); // number of words
++    __ dinsu(value, value, 32, 32);      // 32 bit -> 64 bit
++    __ sll(AT, cnt_words, 3 - shift);
++    __ subu32(count, count, AT);
++
++    Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end;
++    __ addiu32(AT, cnt_words, -8);
++    __ bltz(AT, L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_begin);
++    __ sd(value, to,  0);
++    __ sd(value, to,  8);
++    __ sd(value, to, 16);
++    __ sd(value, to, 24);
++    __ sd(value, to, 32);
++    __ sd(value, to, 40);
++    __ sd(value, to, 48);
++    __ sd(value, to, 56);
++    __ daddiu(to, to, 64);
++    __ addiu32(cnt_words, cnt_words, -8);
++    __ addiu32(AT, cnt_words, -8);
++    __ bgez(AT, L_loop_begin);
++    __ delayed()->nop();
++
++    __ bind(L_loop_not_64bytes_fill);
++    __ beq(cnt_words, R0, L_loop_end);
++    __ delayed()->nop();
++    __ sd(value, to, 0);
++    __ daddiu(to, to, 8);
++    __ addiu32(cnt_words, cnt_words, -1);
++    __ b(L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_end);
++
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      Label L_exit1;
++      __ beq(count, R0, L_exit1);
++      __ delayed()->nop();
++      __ sll(AT, count, shift);
++      __ daddu(to, to, AT); // points to the end
++      __ sd(value, to, -8);    // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ jr(RA);
++      __ delayed()->nop();
++    }
++
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_2);
++        __ delayed()->nop();
++        __ sb(value, to, 0);
++        __ daddiu(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 2);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_SHORT:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_INT:
++        __ beq(count, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      default: ShouldNotReachHere();
++    }
++    __ bind(L_exit2);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
++    Label l_debug;
++
++    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
++    __ blez(AT, l_9);
++    __ delayed()->nop();
++
++    if (!aligned) {
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
++      __ delayed()->nop();
++
++      __ andi(AT, tmp1, 1);
++      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
++      __ delayed()->nop();
++
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_10);
++
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 2 elements if necessary to align to 4 bytes.
++      __ andi(AT, tmp1, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -2);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 4 elements if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -4);
++      __ sw(AT, tmp2, 0);
++      { // FasterArrayCopy
++        __ daddiu(tmp1, tmp1, 4);
++        __ daddiu(tmp2, tmp2, 4);
++      }
++    }
++
++    __ bind(l_7);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      // For Loongson, there is 128-bit memory access. TODO
++      __ ld(AT, tmp1, 0);
++      __ sd(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_8);
++      __ delayed()->nop();
++    }
++    __ bind(l_6);
++
++    // copy 4 bytes at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_1);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_3);
++      __ delayed()->nop();
++
++    }
++
++    // do 2 bytes copy
++    __ bind(l_1);
++    {
++      __ daddiu(AT, tmp3, -1);
++      __ blez(AT, l_9);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(AT, tmp3, -2);
++      __ bgez(AT, l_5);
++      __ delayed()->nop();
++    }
++
++    //do 1 element copy--byte
++    __ bind(l_9);
++    __ beq(R0, tmp3, l_4);
++    __ delayed()->nop();
++
++    {
++      __ bind(l_11);
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_4);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0   - source array address
++  //   A1   - destination array address
++  //   A2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
++    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 0);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ daddu(end_from, from, end_count);
++    __ daddu(end_to, to, end_count);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_byte);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    __ bind(l_from_unaligned);
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ move(T8, end_count);
++    __ daddiu(AT, end_count, -3);
++    __ blez(AT, l_copy_suffix);
++    __ delayed()->nop();
++
++    //__ andi(T8, T8, 3);
++    __ lea(end_from, Address(end_from, -4));
++    __ lea(end_to, Address(end_to, -4));
++
++    __ dsrl(end_count, end_count, 2);
++    __ align(16);
++    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
++    __ lw(AT, end_from, 0);
++    __ sw(AT, end_to, 0);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -1);
++    __ bne(end_count, R0, l_copy_4_bytes_loop);
++    __ delayed()->nop();
++
++    __ b(l_copy_suffix);
++    __ delayed()->nop();
++    // copy dwords aligned or not with repeat move
++    // l_copy_suffix
++    // copy suffix (0-3 bytes)
++    __ bind(l_copy_suffix);
++    __ andi(T8, T8, 3);
++    __ beq(T8, R0, l_exit);
++    __ delayed()->nop();
++    __ addiu(end_from, end_from, 3);
++    __ addiu(end_to, end_to, 3);
++    __ bind(l_copy_suffix_loop);
++    __ lb(AT, end_from, 0);
++    __ sb(AT, end_to, 0);
++    __ addiu(end_from, end_from, -1);
++    __ addiu(end_to, end_to, -1);
++    __ addiu(T8, T8, -1);
++    __ bne(T8, R0, l_copy_suffix_loop);
++    __ delayed()->nop();
++
++    __ bind(l_copy_byte);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_byte);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Generate stub for disjoint short copy.  If "aligned" is true, the
++  // "from" and "to" addresses are assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //      from:  A0
++  //      to:    A1
++  //  elm.count: A2 treated as signed
++  //  one element: 2 bytes
++  //
++  // Strategy for aligned==true:
++  //
++  //  If length <= 9:
++  //     1. copy 1 elements at a time (l_5)
++  //
++  //  If length > 9:
++  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
++  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
++  //     3. copy last element if one was left in step 2. (l_1)
++  //
++  //
++  // Strategy for aligned==false:
++  //
++  //  If length <= 9: same as aligned==true case
++  //
++  //  If length > 9:
++  //     1. continue with step 7. if the alignment of from and to mod 4
++  //        is different.
++  //     2. align from and to to 4 bytes by copying 1 element if necessary
++  //     3. at l_2 from and to are 4 byte aligned; continue with
++  //        6. if they cannot be aligned to 8 bytes because they have
++  //        got different alignment mod 8.
++  //     4. at this point we know that both, from and to, have the same
++  //        alignment mod 8, now copy one element if necessary to get
++  //        8 byte alignment of from and to.
++  //     5. copy 4 elements at a time until less than 4 elements are
++  //        left; depending on step 3. all load/stores are aligned.
++  //     6. copy 2 elements at a time until less than 2 elements are
++  //        left. (l_6)
++  //     7. copy 1 element at a time. (l_5)
++  //     8. copy last element if one was left in step 6. (l_1)
++
++  address generate_disjoint_short_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T8;
++    Register tmp5 = T9;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    // don't try anything fancy if arrays don't have many elements
++    __ daddiu(AT, tmp3, -23);
++    __ blez(AT, l_14);
++    __ delayed()->nop();
++    // move push here
++    __ push(tmp4);
++    __ push(tmp5);
++    __ push(tmp6);
++
++    if (!aligned) {
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
++      __ delayed()->nop();
++
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 1 element if necessary to align to 4 bytes.
++      __ andi(AT, A0, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 2-element word if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++    }// end of if (!aligned)
++
++    __ bind(l_7);
++    // At this time the position of both, from and to, are at least 8 byte aligned.
++    // Copy 8 elemnets at a time.
++    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
++    __ xorr(AT, tmp1, tmp2);
++    __ andi(AT, AT, 15);
++    __ bne(AT, R0, l_9);
++    __ delayed()->nop();
++
++    // Copy 4-element word if necessary to align to 16 bytes,
++    __ andi(AT, tmp1, 15);
++    __ beq(AT, R0, l_10);
++    __ delayed()->nop();
++
++    __ ld(AT, tmp1, 0);
++    __ daddiu(tmp3, tmp3, -4);
++    __ sd(AT, tmp2, 0);
++    __ daddiu(tmp1, tmp1, 8);
++    __ daddiu(tmp2, tmp2, 8);
++
++    __ bind(l_10);
++
++    // Copy 8 elements at a time; either the loads or the stores can
++    // be unalligned if aligned == false
++
++    { // FasterArrayCopy
++      __ bind(l_11);
++      // For loongson the 128-bit memory access instruction is gslq/gssq
++      if (UseLEXT1) {
++        __ gslq(AT, tmp4, tmp1, 0);
++        __ gslq(tmp5, tmp6, tmp1, 16);
++        __ daddiu(tmp1, tmp1, 32);
++        __ daddiu(tmp2, tmp2, 32);
++        __ gssq(AT, tmp4, tmp2, -32);
++        __ gssq(tmp5, tmp6, tmp2, -16);
++      } else {
++        __ ld(AT, tmp1, 0);
++        __ ld(tmp4, tmp1, 8);
++        __ ld(tmp5, tmp1, 16);
++        __ ld(tmp6, tmp1, 24);
++        __ daddiu(tmp1, tmp1, 32);
++        __ sd(AT, tmp2, 0);
++        __ sd(tmp4, tmp2, 8);
++        __ sd(tmp5, tmp2, 16);
++        __ sd(tmp6, tmp2, 24);
++        __ daddiu(tmp2, tmp2, 32);
++      }
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++    __ bind(l_9);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
++      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      __ ld(AT, tmp1, 0);
++      __ ld(tmp4, tmp1, 8);
++      __ ld(tmp5, tmp1, 16);
++      __ ld(tmp6, tmp1, 24);
++      __ sd(AT, tmp2, 0);
++      __ sd(tmp4, tmp2, 8);
++      __ sd(tmp5, tmp2,16);
++      __ daddiu(tmp1, tmp1, 32);
++      __ daddiu(tmp2, tmp2, 32);
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_8);
++      __ delayed()->sd(tmp6, tmp2, -8);
++    }
++    __ bind(l_6);
++
++    // copy 2 element at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ lw(tmp4, tmp1, 4);
++      __ lw(tmp5, tmp1, 8);
++      __ lw(tmp6, tmp1, 12);
++      __ sw(AT, tmp2, 0);
++      __ sw(tmp4, tmp2, 4);
++      __ sw(tmp5, tmp2, 8);
++      __ daddiu(tmp1, tmp1, 16);
++      __ daddiu(tmp2, tmp2, 16);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_3);
++      __ delayed()->sw(tmp6, tmp2, -4);
++    }
++
++    __ bind(l_1);
++    // do single element copy (8 bit), can this happen?
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ lhu(tmp4, tmp1, 2);
++      __ lhu(tmp5, tmp1, 4);
++      __ lhu(tmp6, tmp1, 6);
++      __ sh(AT, tmp2, 0);
++      __ sh(tmp4, tmp2, 2);
++      __ sh(tmp5, tmp2, 4);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_5);
++      __ delayed()->sh(tmp6, tmp2, -2);
++    }
++    // single element
++    __ bind(l_4);
++
++    __ pop(tmp6);
++    __ pop(tmp5);
++    __ pop(tmp4);
++
++    __ bind(l_14);
++    { // FasterArrayCopy
++      __ beq(R0, tmp3, l_13);
++      __ delayed()->nop();
++
++      __ bind(l_12);
++      __ lhu(AT, tmp1, 0);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_12);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_13);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++            StubRoutines::jshort_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 1);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ sll(AT, end_count, Address::times_2);
++    __ daddu(end_from, from, AT);
++    __ daddu(end_to, to, AT);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_short);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // Copy 1 element if necessary to align to 4 bytes.
++    __ bind(l_from_unaligned);
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ daddiu(AT, end_count, -1);
++    __ blez(AT, l_copy_short);
++    __ delayed()->nop();
++
++    __ lw(AT, end_from, -4);
++    __ sw(AT, end_to, -4);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -2);
++    __ b(l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // copy 1 element at a time
++    __ bind(l_copy_short);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_short);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4, l_5, l_6, l_7;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    if(!aligned) {
++      __ xorr(AT, T3, T0);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
++      __ delayed()->nop();
++
++      __ andi(AT, T3, 7);
++      __ beq(AT, R0, l_6); //copy 2 elements each time
++      __ delayed()->nop();
++
++      __ lw(AT, T3, 0);
++      __ daddiu(T1, T1, -1);
++      __ sw(AT, T0, 0);
++      __ daddiu(T3, T3, 4);
++      __ daddiu(T0, T0, 4);
++    }
++
++    {
++      __ bind(l_6);
++      __ daddiu(AT, T1, -1);
++      __ blez(AT, l_5);
++      __ delayed()->nop();
++
++      __ bind(l_7);
++      __ ld(AT, T3, 0);
++      __ sd(AT, T0, 0);
++      __ daddiu(T3, T3, 8);
++      __ daddiu(T0, T0, 8);
++      __ daddiu(T1, T1, -2);
++      __ daddiu(AT, T1, -2);
++      __ bgez(AT, l_7);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_5);
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, 4);
++    __ addiu(T0, T0, 4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jint_disjoint_arraycopy() :
++              StubRoutines::jint_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 2);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    // no registers are destroyed by this call
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -4);
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -4);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, -4);
++    __ addiu(T0, T0, -4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++              StubRoutines::jlong_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 3);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0, -8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  //FIXME
++  address generate_disjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++
++  address generate_conjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++      StubRoutines::jlong_disjoint_arraycopy();
++    array_overlap_test(nooverlap_target, 3);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0,-8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ addu(V0,A1,R0);
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ sd(S0, SP, S0_off * wordSize);
++    __ sd(S1, SP, S1_off * wordSize);
++    __ sd(S2, SP, S2_off * wordSize);
++    __ sd(S3, SP, S3_off * wordSize);
++    __ sd(S4, SP, S4_off * wordSize);
++    __ sd(S5, SP, S5_off * wordSize);
++    __ sd(S6, SP, S6_off * wordSize);
++    __ sd(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ sd(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    __ set_last_Java_frame(java_thread, SP, FP, NULL);
++    // Align stack
++    __ set64(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    __ relocate(relocInfo::internal_pc_type);
++    {
++      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++      __ patchable_set48(AT, save_pc);
++    }
++    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++    // Call runtime
++    __ call(runtime_entry);
++    __ delayed()->nop();
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(__ offset(),  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld(S0, SP, S0_off * wordSize);
++    __ ld(S1, SP, S1_off * wordSize);
++    __ ld(S2, SP, S2_off * wordSize);
++    __ ld(S3, SP, S3_off * wordSize);
++    __ ld(S4, SP, S4_off * wordSize);
++    __ ld(S5, SP, S5_off * wordSize);
++    __ ld(S6, SP, S6_off * wordSize);
++    __ ld(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++#ifdef COMPILER2
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubRoutines::_montgomeryMultiply
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      StubRoutines::_montgomerySquare
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++    }
++#endif
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+new file mode 100644
+index 0000000000..920c08844e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
++};
++
++class gs2 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+new file mode 100644
+index 0000000000..358d580d52
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
+diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+new file mode 100644
+index 0000000000..19e2f29c59
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+@@ -0,0 +1,2149 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ daddiu(SP, SP, -10 * wordSize);
++  __ sd(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     10 stack arg0      <--- old sp
++  //      9 float/double identifiers
++  //      8 register arg7
++  //        ...
++  //      2 register arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use T3 as temp
++  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
++    Register reg = as_Register(i + A0->encoding());
++    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
++    Label isfloatordouble, isdouble, next;
++
++    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
++    __ bne(AT, R0, isfloatordouble);
++    __ delayed()->nop();
++
++    // Do Int register here
++    __ ld(reg, SP, (1 + i) * wordSize);
++    __ b (next);
++    __ delayed()->nop();
++
++    __ bind(isfloatordouble);
++    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
++    __ bne(AT, R0, isdouble);
++    __ delayed()->nop();
++
++    // Do Float Here
++    __ lwc1(floatreg, SP, (1 + i) * wordSize);
++    __ b(next);
++    __ delayed()->nop();
++
++    // Do Double here
++    __ bind(isdouble);
++    __ ldc1(floatreg, SP, (1 + i) * wordSize);
++
++    __ bind(next);
++  }
++
++  __ ld(RA, SP, 0);
++  __ daddiu(SP, SP, 10 * wordSize);
++  __ jr(RA);
++  __ delayed()->nop();
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++* Method entry for static (non-native) methods:
++*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
++*/
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ abs_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ sqrt_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 1);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 2 * Interpreter::stackElementSize);
++    __ ldc1(F13, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 2);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ ldc1(F12, SP, 4 * Interpreter::stackElementSize);
++      __ ldc1(F13, SP, 2 * Interpreter::stackElementSize);
++      __ ldc1(F14, SP, 0);
++      __ madd_d(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ lwc1(F12, SP, 2 * Interpreter::stackElementSize);
++      __ lwc1(F13, SP, Interpreter::stackElementSize);
++      __ lwc1(F14, SP, 0);
++      __ madd_s(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T9, fn);
++  __ jalr(T9);
++  __ delayed()->nop();
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bgez(T1, L);     // check if frame is complete
++    __ delayed()->nop();
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T9
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T9;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T9;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ dsll(AT, flags, Interpreter::logStackElementSize);
++  __ daddu(SP, SP, AT);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T9;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++    __ delayed()->nop();
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    {
++       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++                 break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// prerequisites : method in T0, invocation counter in T3
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++  const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++
++  __ get_method_counters(Rmethod, FSR, done);
++
++  if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++    __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    __ incrementl(T9, 1);
++    __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++  }
++  // Update standard invocation counters
++  __ lw(T3, invocation_counter);
++  __ increment(T3, InvocationCounter::count_increment);
++  __ sw(T3, invocation_counter);  // save invocation count
++
++  __ lw(FSR, backedge_counter);  // load backedge counter
++  __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++  __ andr(FSR, FSR, AT);
++
++  __ daddu(T3, T3, FSR);          // add both counters
++
++  if (ProfileInterpreter && profile_method != NULL) {
++    // Test to see if we should create a method data oop
++    if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++      __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++      __ lw(AT, AT, 0);
++      __ slt(AT, T3, AT);
++    }
++
++    __ bne_far(AT, R0, *profile_method_continue);
++    __ delayed()->nop();
++
++    // if no method data exists, go to profile_method
++    __ test_method_data_pointer(FSR, *profile_method);
++  }
++
++  if (Assembler::is_simm16(CompileThreshold)) {
++    __ srl(AT, T3, InvocationCounter::count_shift);
++    __ slti(AT, AT, CompileThreshold);
++  } else {
++    __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++    __ lw(AT, AT, 0);
++    __ slt(AT, T3, AT);
++  }
++
++  __ beq_far(AT, R0, *overflow);
++  __ delayed()->nop();
++  __ bind(done);
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++  __ delayed()->nop();
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ slt(AT, AT, T2);
++  __ beq(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ dsll(T3, T2, Interpreter::logStackElementSize);
++  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ delayed()->nop();
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ delayed()->nop();
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ move(AT, max_bang_size);
++  __ addu(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ slt(AT, T3, SP);
++  __ bne(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ delayed()->nop();
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ delayed()->nop();
++    __ load_mirror(T0, Rmethod, T9);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ daddiu(SP, SP, (-frame_size) * wordSize);
++  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
++  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T9);
++  __ sd(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ delayed()->nop();
++    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ sd(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ sd(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  address entry = __ pc();
++  Label slow_path;
++  __ b(slow_path);
++  __ delayed()->nop();
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  __ delayed()->nop();
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ dsll(LVP, V0, Address::times_8);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0      ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld(t,method,in_bytes(Method::const_offset()));
++    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // MIPS n64 ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ delayed()->nop();
++    __ daddiu(t, t, 1);
++    __ bind(Lstatic);
++    __ daddiu(t, t, -7);
++    __ blez(t, L);
++    __ delayed()->nop();
++    __ dsll(t, t, Address::times_8);
++    __ dsubu(SP, SP, t);
++    __ bind(L);
++  }
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [        ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T9, R0, L);
++    __ delayed()->nop();
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T9);
++  __ delayed()->nop();
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to mips o32 abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++
++    // get mirror
++    __ load_mirror(t, method, T9);
++    // copy mirror into activation frame
++    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(V1, T9, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T9
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // set_last_Java_frame_before_call
++  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  __ li(t, __ pc());
++  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ daddiu(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ move(t, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T9);
++  __ delayed()->nop();
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(t, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ move(t, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addi only support 16-bit imeditate
++    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ delayed()->nop();
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T9);
++    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ delayed()->nop();
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++      // address of first monitor
++
++      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++      __ delayed()->nop();
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++  __ delayed()->nop();
++
++
++  // remove activation
++  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    BLOCK_COMMENT("bang_stack_shadow_pages:");
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ dsubu(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ dsll(LVP, V0, LogBytesPerWord);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++    __ delayed()->nop();
++
++    __ bind(loop);
++    __ daddiu(SP, SP, (-1) * wordSize);
++    __ daddiu(T2, T2, -1);               // until everything initialized
++    __ bne(T2, R0, loop);
++    __ delayed()->sd(R0, SP, 0);     // initialize local variables
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T9
++  // tmp2: T2
++   __ profile_parameters_type(T8, T9, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++      __ delayed()->nop();
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++  __ delayed()->nop();
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++    __ delayed()->nop();
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld( A1, A1, in_bytes(Method::const_offset()));
++    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ dsubu(A2, LVP, A1);
++    __ daddiu(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T9, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T9);
++    __ delayed()->nop();
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  __ set_last_Java_frame(thread, noreg, FP, __ pc());
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(AT, BCP, 0);
++    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++    __ delayed()->nop();
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T9);
++    __ ld(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
++
++    __ beq(T8, R0, L_done);
++    __ delayed()->nop();
++
++    __ sd(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T9, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T9);                                   // jump to exception handler of caller
++  __ delayed()->nop();
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ move(AT, JvmtiThreadState::earlyret_inactive);
++  __ sw(AT, cond_addr);
++  __ sync();
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ sync();
++  __ jr(T0);
++  __ delayed()->nop();
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
++  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
++  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
++  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++/*
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++*/
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld(A2, SP, 0);
++  __ ld(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ lw(T9, T8, 0);
++  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T9, T9, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ sw(T9, T8, 0);
++  __ dsll(T9, T9, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ daddu(T8, T8, T9);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ lw(T8, T8, 0);
++  __ move(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ delayed()->nop();
++  __ brk(5);
++  __ delayed()->nop();
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp
+new file mode 100644
+index 0000000000..46a88aba26
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+new file mode 100644
+index 0000000000..5265483830
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+@@ -0,0 +1,4688 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No mips specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T9, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T9, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ daddiu(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++      __ delayed()->nop();
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ move(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ lbu(tmp_reg, at_bcp(0));
++    __ move(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++    __ delayed()->nop();
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ delayed()->nop();
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ lbu(tmp_reg, at_bcp(0));
++  __ move(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ delayed()->nop();
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ delayed()->nop();
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ mtc1(R0, FSF);    return;
++    case 1:  __ addiu(AT, R0, 1); break;
++    case 2:  __ addiu(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ mtc1(AT, FSF);
++  __ cvt_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ dmtc1(R0, FSF);
++             return;
++    case 1:  __ daddiu(AT, R0, 1);
++             __ dmtc1(AT, FSF);
++             __ cvt_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ lb(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ lb(FSR, BCP, 1);
++  __ lbu(AT, BCP, 2);
++  __ dsll(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ lbu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
++  __ bne(AT, R0, notClass);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  __ bind(call_ldc);
++  __ move(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->sd(FSR, SP, 0); // added for performance issue
++
++  __ bind(notClass);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  __ delayed()->nop();
++  // ftos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ lwc1(FSF, AT, base_offset);
++  }
++  //__ push_f();
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->swc1(FSF, SP, 0);
++
++  __ bind(notFloat);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  __ delayed()->nop();
++  // itos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(T0, T3, T2);
++    __ lw(FSR, T0, base_offset);
++  }
++  __ push(itos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ move(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask);
++  __ daddu(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ daddiu(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      __ delayed()->nop();
++      // itos
++      __ ld(obj, field);
++      __ push(itos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notInt);
++      __ daddiu(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      __ delayed()->nop();
++      // ftos
++      __ lwc1(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notFloat);
++      __ daddiu(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      __ delayed()->nop();
++      // stos
++      __ lh(obj, field);
++      __ push(stos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notShort);
++      __ daddiu(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      __ delayed()->nop();
++      // btos
++      __ lb(obj, field);
++      __ push(btos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notByte);
++      __ daddiu(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      __ delayed()->nop();
++      // ctos
++      __ lhu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notChar);
++      __ daddiu(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      __ delayed()->nop();
++      // ztos
++      __ lbu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ daddiu(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      __ delayed()->nop();
++      // ltos
++      __ ld(obj, field);
++      __ push(ltos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notLong);
++      __ daddiu(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      __ delayed()->nop();
++      // dtos
++      __ ldc1(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T9);
++  __ bne(result, R0, resolved);
++  __ delayed()->nop();
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ move(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ set64(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ bne(tmp, result, notNull);
++    __ delayed()->nop();
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++
++  __ daddiu(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++  __ delayed()->nop();
++
++  // dtos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ldc1(FSF, AT, base_offset);
++  }
++  __ push(dtos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++  __ delayed()->nop();
++
++  // ltos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ld(FSR, AT, base_offset);
++  }
++  __ push(ltos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ lbu(reg, at_bcp(offset));
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ move(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    __ move(T3, Bytecodes::_fast_iload2);
++    __ move(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _caload, rewrite to fast_icaload
++    __ move(T3, Bytecodes::_fast_icaload);
++    __ move(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // rewrite so iload doesn't check again.
++    __ move(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ sll(index, index, 0);
++
++  // check index
++  Label ok;
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++#ifndef OPT_RANGECHECK
++  __ sltu(AT, index, AT);
++  __ bne(AT, R0, ok);
++  __ delayed()->nop();
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ delayed()->nop();
++  __ bind(ok);
++#else
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ move(A2, index);
++  __ tgeu(A2, AT, 29);
++#endif
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ warn("iaload Unimplemented yet");
++    __ gslwle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_8);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ warn("laload Unimplemented yet");
++    __ gsldle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, Address::times_8);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++  }
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ shl(AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ warn("faload Unimplemented yet");
++    __ gslwlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 3);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 3);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ warn("daload Unimplemented yet");
++    __ gsldlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, 3);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
++  __ daddu(FSR, SSR, FSR);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array   FSR:index
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
++
++    __ warn("baload Unimplemented yet");
++    __ gslble(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, Address::times_2);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, 1);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ warn("saload Unimplemented yet");
++    __ gslhle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ lw(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ lwc1(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ ldc1(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ move(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_iaccess_0);
++    __ move(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aaccess_0);
++    __ move(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_faccess_0);
++    __ move(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ swc1(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ sdc1(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
++
++    __ warn("iastore Unimplemented yet");
++    __ gsswle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);  // prefer index in SSR
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++  }
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
++
++    __ warn("lastore Unimplemented yet");
++    __ gssdle(FSR, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++  }
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
++
++    __ warn("fastore Unimplemented yet");
++    __ gsswlec1(FSF, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++  }
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
++
++    __ warn("dastore Unimplemented yet");
++    __ gssdlec1(FSF, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++  }
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(FSR, at_tos());     // Value
++  __ lw(SSR, at_tos_p1());  // Index
++  __ ld(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  __ delayed()->nop();
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++  __ delayed()->nop();
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T9);
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    guarantee(false, "unimplemented yet!");
++    __ pop_ptr(T2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
++
++    __ warn("bastore Unimplemented yet");
++    __ gssble(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++
++    // Need to check whether array is boolean or byte
++    // since both types share the bastore bytecode.
++    __ load_klass(T9, T2);
++    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
++
++    int diffbit = Klass::layout_helper_boolean_diffbit();
++    __ move(AT, diffbit);
++
++    Label L_skip;
++    __ andr(AT, T9, AT);
++    __ beq(AT, R0, L_skip);
++    __ delayed()->nop();
++    __ andi(FSR, FSR, 0x1);
++    __ bind(L_skip);
++
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
++
++    __ warn("castore Unimplemented yet");
++    __ gsshle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ sw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ sd(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ swc1(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ sdc1(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ sd(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ addu32(FSR, SSR, FSR); break;
++    case sub  : __ subu32(FSR, SSR, FSR); break;
++    case mul  : __ mul(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sllv(FSR, SSR, FSR);   break;
++    case shr  : __ srav(FSR, SSR, FSR);   break;
++    case ushr : __ srlv(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ daddu(FSR, T2, FSR); break;
++    case sub : __ dsubu(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseLEXT1) {
++    __ gsdiv(FSR, SSR, FSR);
++  } else {
++    __ div(SSR, FSR);
++    __ mflo(FSR);
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++  __ div(SSR, FSR);
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(not_zero);
++  __ mfhi(FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  if (UseLEXT1) {
++    __ gsdmult(FSR, T2, FSR);
++  } else {
++    __ dmult(T2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseLEXT1) {
++    __ gsddiv(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  if (UseLEXT1) {
++    __ gsdmod(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mfhi(FSR);
++  }
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsllv(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrav(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrlv(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ lwc1(FTF, at_sp());
++      __ add_s(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ lwc1(FTF, at_sp());
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ lwc1(FTF, at_sp());
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ lwc1(FTF, at_sp());
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_s(F13, FSF);
++      __ lwc1(F12, at_sp());
++       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ ldc1(FTF, at_sp());
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ ldc1(FTF, at_sp());
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ ldc1(FTF, at_sp());
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ ldc1(FTF, at_sp());
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_d(F13, FSF);
++      __ ldc1(F12, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subu32(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ dsubu(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ neg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ neg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ lb(AT, at_bcp(2));           // get constant
++  __ daddu(FSR, FSR, AT);
++  __ sw(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ lw(AT, T2, 0);
++  __ daddu(FSR, AT, FSR);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ mtc1(FSR, FSF);
++      __ cvt_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ mtc1(FSR, FSF);
++      __ cvt_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ seb(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ seh(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ dmtc1(FSR, FSF);
++      __ cvt_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ dmtc1(FSR, FSF);
++      __ cvt_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++    {
++      Label L;
++
++      __ trunc_w_s(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++      Label L;
++
++      __ trunc_l_s(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ dsll32(T9, T9, 0);
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++      Label L;
++
++      __ trunc_w_d(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->addiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu32(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++      Label L;
++
++      __ trunc_l_d(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->daddiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++    __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ subu(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  __ ori(FSR, R0, 1);
++  __ ori(AT, R0, 1);
++
++  if (is_float) {
++    __ lwc1(FTF, at_sp());
++    __ daddiu(SP, SP, 1 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_s(FTF, FSF);
++    } else {
++      __ c_ult_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_s(FTF, FSF);
++    }
++  } else {
++    __ ldc1(FTF, at_sp());
++    __ daddiu(SP, SP, 2 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_d(FTF, FSF);
++    } else {
++      __ c_ult_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_d(FTF, FSF);
++    }
++  }
++
++  __ movf(AT, R0);
++  __ subu(FSR, FSR, AT);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ lb(A7, BCP, 1);
++    __ lbu(AT, BCP, 2);
++    __ dsll(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, AT, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ daddu(AT, BCP, A7);
++    __ lbu(Rnext, AT, 0);
++
++    // compute return address as bci in FSR
++    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld(AT, T3, in_bytes(Method::const_offset()));
++    __ dsubu(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ daddu(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ daddu(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ bgtz(A7, dispatch);  // check if forward or backward branch
++    __ delayed()->nop();
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ delayed()->nop();
++    __ push(T3);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop(T3);
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ delayed()->nop();
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        __ delayed()->nop();
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++        __ delayed()->nop();
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ lw(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ sw(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ lw(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ daddu(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++        }
++
++        __ bne(AT, R0, dispatch);
++        __ delayed()->nop();
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ lw(AT, AT, 0);
++            __ slt(AT, T2, AT);
++          }
++
++          __ bne(AT, R0, dispatch);
++          __ delayed()->nop();
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ lbu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++      __ delayed()->nop();
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ subu(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      __ delayed()->nop();
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lb(T3, V0, nmethod::state_offset());
++      __ move(AT, nmethod::in_use);
++      __ bne(AT, T3, dispatch);
++      __ delayed()->nop();
++
++      // We have the address of an on stack replacement routine in rax.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in Rnext.
++      __ move(Rnext, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      //FIXME, shall we keep the return address on the stack?
++      __ leave();                                // remove frame anchor
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ move(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      //refer to osr_entry in c1_LIRAssembler_mips.cpp
++      __ ld(AT, Rnext, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++      __ delayed()->nop();
++    }
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bgez(FSR, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(FSR, not_taken);
++      break;
++    case greater:
++      __ blez(FSR, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(FSR, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ slt(AT, SSR, FSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case less_equal:
++      __ slt(AT, FSR, SSR);
++      __ bne(AT, R0, not_taken);
++      break;
++    case greater:
++      __ slt(AT, FSR, SSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case greater_equal:
++      __ slt(AT, SSR, FSR);
++      __ bne(AT, R0, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ lw(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ lw(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ lw(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ slt(AT, FSR, T3);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  __ slt(AT, A7, FSR);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  // lookup dispatch offset, in A7 big endian
++  __ dsubu(FSR, FSR, T3);
++  __ dsll(AT, FSR, Address::times_4);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T9, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ lw(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++  __ delayed()->nop();
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++  __ delayed()->nop();
++
++  __ bind(loop_entry);
++  __ bgtz(T3, loop);
++  __ delayed()->daddiu(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++
++  // entry found -> get offset
++  __ bind(found);
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ daddiu(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ lw(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++  __ delayed()->nop();
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ daddu(h, i, j);
++    __ dsrl(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ dsll(AT, h, Address::times_8);
++    __ daddu(AT, array, AT);
++    __ lw(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ movz(i, h, AT);
++    __ movn(j, h, AT);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ daddiu(h, i, 1);
++    __ slt(AT, h, j);
++    __ bne(AT, R0, loop);
++    __ delayed()->nop();
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++  __ delayed()->nop();
++
++  // entry found -> j = offset
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lw(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ move(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ lb(AT, thread, in_bytes(Thread::polling_page_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ delayed()->nop();
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T9);
++  __ sync();
++
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ sync();
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addiu(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  __ delayed()->nop();
++
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ move(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T9);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  //assert(wordSize == 8, "adjust code below");
++  // note we shift 4 not 2, for we get is the true inde
++  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld(itable_index, AT, index_offset);
++  }
++  __ ld(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ daddu(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ dsll(AT, tmp4, Address::times_8);
++      __ daddu(AT, tmp2, AT);
++      __ ld(tmp3, AT, in_bytes(cp_base_offset +
++                               ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ move(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ move(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ b(valsize_known);
++      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++
++      __ bind(two_word);
++      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld(tmp1, tmp1, 0*wordSize);
++    }
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ztos
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // itos
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // atos
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ctos
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // stos
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ftos
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  // dtos
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ delayed()->nop();
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // test for volatile with T1
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ daddu(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    __ delayed()->nop();
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // replace index with field offset from cache entry
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ daddu(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      //add for compressedoops
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ daddiu(BCP, BCP, 1);
++  __ null_check(T1);
++  __ daddu(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ daddiu(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     __ delayed()->nop();
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ dsll(flags, flags, LogBytesPerWord);
++    __ daddu(AT, AT, flags);
++    __ ld(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++  __ delayed()->nop();
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T9: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T9, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  __ dsll(AT, index, Address::times_ptr);
++  // T2: receiver
++  __ daddu(AT, T2, AT);
++  //this is a ualign read
++  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T9, true);
++  __ jump_from_interpreted(method, T2);
++
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T9 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on mips64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++  __ delayed()->nop();
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++  __ delayed()->nop();
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++  __ delayed()->nop();
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ subu32(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++  __ delayed()->nop();
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T9 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T9);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T9: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T9, true);
++
++  __ jump_from_interpreted(T2_method, T9);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   //const Register Rmethod   = T2;
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T9);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T9: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T9, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T9);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(AT, T1, A2, tags_offset);
++  } else {
++    __ daddu(T1, T1, A2);
++    __ lb(AT, T1, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // has_finalizer
++  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
++    __ delayed()->nop();
++    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++    __ delayed()->nop();
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ set64(T1, (long)Universe::heap()->top_addr());
++    __ ld(FSR, heap_top);
++
++    __ bind(retry);
++    __ set64(AT, (long)Universe::heap()->end_addr());
++    __ ld(AT, AT, 0);
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, slow_case);
++    __ delayed()->nop();
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ set64(AT, - sizeof(oopDesc));
++    __ daddu(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++    __ delayed()->nop();
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ daddu(T1, FSR, T0);
++      __ daddiu(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      __ bne(T1, FSR, loop); //dont clear header
++      __ delayed()->daddiu(T1, T1, -oopSize);
++    }
++
++    //klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ set64(AT, (long)markOopDesc::prototype());
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++    __ delayed()->nop();
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ sync();
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ lbu(A1, at_bcp(1));
++  //type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ sync();
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ sync();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++  __ delayed()->nop();
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// i use T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ b(done);
++  __ delayed(); __ move(FSR, R0);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ move(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++  __ delayed()->nop();
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit;
++    __ ld(T2, monitor_block_top);
++    __ b(entry);
++    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    // free slot?
++    __ bind(loop);
++    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ movz(c_rarg0, T2, AT);
++
++    __ beq(FSR, AT, exit);
++    __ delayed()->nop();
++    __ daddiu(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ delayed()->nop();
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++  __ delayed()->nop();
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld(c_rarg0, monitor_block_top);
++    __ daddiu(SP, SP, - entry_size);
++    __ daddiu(c_rarg0, c_rarg0, - entry_size);
++    __ sd(c_rarg0, monitor_block_top);
++    __ b(entry);
++    __ delayed(); __ move(T3, SP);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld(AT, T3, entry_size);
++    __ sd(AT, T3, 0);
++    __ daddiu(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ daddiu(BCP, BCP, 1);
++  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ b(entry);
++    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    __ bind(loop);
++    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ delayed()->nop();
++    __ daddiu(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ lbu(Rnext, at_bcp(1));
++  __ dsll(T9, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ daddu(AT, T9, AT);
++  __ ld(T9, AT, 0);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ lbu(A1, at_bcp(3));  // dimension
++  __ daddiu(A1, A1, -1);
++  __ dsll(A1, A1, Address::times_8);
++  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ lbu(AT, at_bcp(3));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(SP, SP, AT);
++  __ sync();
++}
++#endif // !CC_INTERP
+diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+new file mode 100644
+index 0000000000..6939914356
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+new file mode 100644
+index 0000000000..ac2a43edce
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_mips.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  if (is_loongson()) {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features());
++  } else {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features());
++  }
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+new file mode 100644
+index 0000000000..ffdcff0677
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp
+new file mode 100644
+index 0000000000..2e7b61390e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp
+@@ -0,0 +1,516 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++int VM_Version::_cpuFeatures;
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
++bool VM_Version::_is_cpucfg_instruction_supported = true;
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(V0);
++
++    __ li(AT, (long)0);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ pop(V0);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++#   undef __
++
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
++    result |= CPU_MMI;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
++    result |= CPU_MSA1_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
++    result |= CPU_MSA2_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
++    result |= CPU_CGP;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
++    result |= CPU_LSX1;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
++    result |= CPU_LSX2;
++  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
++    result |= CPU_LASX;
++  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
++    result |= CPU_LLSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
++    result |= CPU_TGTSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
++    result |= CPU_MUALP;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
++    result |= CPU_LEXT1;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
++    result |= CPU_LEXT2;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
++    result |= CPU_LEXT3;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
++    result |= CPU_LAMO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
++    result |= CPU_LPIXU;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    warning("Can't detect CPU info - cannot open %s", path);
++  }
++}
++
++void strlwr(char *str) {
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::get_feature_flags_by_cpuinfo(int features) {
++  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
++
++  char res[10240];
++  int i;
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++
++  if (strstr(res, "loongson")) {
++    // Loongson CPU
++    features |= CPU_LOONGSON;
++
++    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
++      {L_3A1000,  "3a1000"},
++      {L_3B1500,  "3b1500"},
++      {L_3A2000,  "3a2000"},
++      {L_3B2000,  "3b2000"},
++      {L_3A3000,  "3a3000"},
++      {L_3B3000,  "3b3000"},
++      {L_2K1000,  "2k1000"},
++      {L_UNKNOWN, "unknown"}
++    };
++
++    // Loongson Family
++    int detected = 0;
++    for (i = 0; i <= L_UNKNOWN; i++) {
++      switch (i) {
++        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
++        // test PRID REV in /proc/cpuinfo
++        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
++        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
++        case L_3A1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3A1000 platform");
++          }
++          break;
++        case L_3B1500:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3B1500 platform");
++          }
++          break;
++        case L_3A2000:
++        case L_3B2000:
++        case L_3A3000:
++        case L_3B3000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS464E;
++            detected++;
++            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
++          }
++          break;
++        case L_2K1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS264;
++            detected++;
++            //tty->print_cr("2K1000 platform");
++          }
++          break;
++        case L_UNKNOWN:
++          if (detected == 0) {
++            detected++;
++            //tty->print_cr("unknown Loongson platform");
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
++  } else { // not Loongson
++    // Not Loongson CPU
++    //tty->print_cr("MIPS platform");
++  }
++
++  if (features & CPU_LOONGSON_GS264) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++    features |= CPU_MSA1_0;
++    features |= CPU_LSX1;
++  } else if (features & CPU_LOONGSON_GS464) {
++    features |= CPU_LEXT1;
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++  } else if (features & CPU_LOONGSON_GS464E) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_LEXT3;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  } else if (features & CPU_LOONGSON) {
++    // unknow loongson
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  }
++  VM_Version::_cpu_info_is_initialized = true;
++
++  return features;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  // test if cpucfg instruction is supported
++  VM_Version::_is_determine_cpucfg_supported_running = true;
++  __asm__ __volatile__(
++    ".insn \n\t"
++    ".word (0xc8080118)\n\t" // cpucfg zero, zero
++    :
++    :
++    :
++    );
++  VM_Version::_is_determine_cpucfg_supported_running = false;
++
++  if (supports_cpucfg()) {
++    get_cpu_info_stub(&_cpuid_info);
++    _cpuFeatures = get_feature_flags_by_cpucfg();
++    // Only Loongson CPUs support cpucfg
++    _cpuFeatures |= CPU_LOONGSON;
++  } else {
++    _cpuFeatures = get_feature_flags_by_cpuinfo(0);
++  }
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++    if (MaxVectorSize > 0 && supports_ps()) {
++      MaxVectorSize = 8;
++    } else {
++      MaxVectorSize = 0;
++    }
++  }
++  //
++  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
++  // Vector optimization was closed by default.
++  // The reasons:
++  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
++  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
++  //
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  if (supports_lext1()) {
++    if (FLAG_IS_DEFAULT(UseLEXT1)) {
++      FLAG_SET_DEFAULT(UseLEXT1, true);
++    }
++  } else if (UseLEXT1) {
++    warning("LEXT1 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT1, false);
++  }
++
++  if (supports_lext2()) {
++    if (FLAG_IS_DEFAULT(UseLEXT2)) {
++      FLAG_SET_DEFAULT(UseLEXT2, true);
++    }
++  } else if (UseLEXT2) {
++    warning("LEXT2 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT2, false);
++  }
++
++  if (supports_lext3()) {
++    if (FLAG_IS_DEFAULT(UseLEXT3)) {
++      FLAG_SET_DEFAULT(UseLEXT3, true);
++    }
++  } else if (UseLEXT3) {
++    warning("LEXT3 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT3, false);
++  }
++
++  if (UseLEXT2) {
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
++    }
++  } else if (UseCountTrailingZerosInstructionMIPS64) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
++      warning("ctz/dctz instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
++  }
++
++  if (TieredCompilation) {
++    if (!FLAG_IS_DEFAULT(TieredCompilation))
++      warning("TieredCompilation not supported");
++    FLAG_SET_DEFAULT(TieredCompilation, false);
++  }
++
++  char buf[256];
++  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
++              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
++              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
++              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
++              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
++              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
++              (supports_dsp()          ?  ", dsp" : ""),
++              (supports_ps()           ?  ", ps" : ""),
++              (supports_3d()           ?  ", 3d" : ""),
++              (supports_mmi()          ?  ", mmi" : ""),
++              (supports_msa1_0()       ?  ", msa1_0" : ""),
++              (supports_msa2_0()       ?  ", msa2_0" : ""),
++              (supports_lsx1()         ?  ", lsx1" : ""),
++              (supports_lsx2()         ?  ", lsx2" : ""),
++              (supports_lasx()         ?  ", lasx" : ""),
++              (supports_lext1()        ?  ", lext1" : ""),
++              (supports_lext2()        ?  ", lext2" : ""),
++              (supports_lext3()        ?  ", lext3" : ""),
++              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
++              (supports_lamo()         ?  ", lamo" : ""),
++              (supports_lpixu()        ?  ", lpixu" : ""),
++              (needs_llsync()          ?  ", llsync" : ""),
++              (needs_tgtsync()         ?  ", tgtsync": ""),
++              (needs_ulsync()          ?  ", ulsync": ""),
++              (supports_mualp()        ?  ", mualp" : ""),
++              UseSyncLevel);
++  _features_str = strdup(buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++  }
++
++  if (UseCRC32Intrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32Intrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++    }
++  }
++
++  if (UseCRC32CIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32CIntrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++    }
++  }
++
++  if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp
+new file mode 100644
+index 0000000000..733a0af295
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union Loongson_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG  : 1,
++               FPREV   : 3,
++               MMI     : 1,
++               MSA1    : 1,
++               MSA2    : 1,
++               CGP     : 1,
++               WRP     : 1,
++               LSX1    : 1,
++               LSX2    : 1,
++               LASX    : 1,
++               R6FXP   : 1,
++               R6CRCP  : 1,
++               R6FPP   : 1,
++               CNT64   : 1,
++               LSLDR0  : 1,
++               LSPREF  : 1,
++               LSPREFX : 1,
++               LSSYNCI : 1,
++               LSUCA   : 1,
++               LLSYNC  : 1,
++               TGTSYNC : 1,
++               LLEXC   : 1,
++               SCRAND  : 1,
++               MUALP   : 1,
++               KMUALEn : 1,
++               ITLBT   : 1,
++               LSUPERF : 1,
++               SFBP    : 1,
++               CDMAP   : 1,
++                       : 1;
++    } bits;
++  };
++
++  union Loongson_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t LEXT1    : 1,
++               LEXT2    : 1,
++               LEXT3    : 1,
++               LSPW     : 1,
++               LBT1     : 1,
++               LBT2     : 1,
++               LBT3     : 1,
++               LBTMMU   : 1,
++               LPMP     : 1,
++               LPMRev   : 3,
++               LAMO     : 1,
++               LPIXU    : 1,
++               LPIXNU   : 1,
++               LVZP     : 1,
++               LVZRev   : 3,
++               LGFTP    : 1,
++               LGFTRev  : 3,
++               LLFTP    : 1,
++               LLFTRev  : 3,
++               LCSRP    : 1,
++               DISBLKLY : 1,
++                        : 3;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LOONGSON          = (1 << 1),
++    CPU_LOONGSON_GS464    = (1 << 2),
++    CPU_LOONGSON_GS464E   = (1 << 3),
++    CPU_LOONGSON_GS264    = (1 << 4),
++    CPU_MMI               = (1 << 11),
++    CPU_MSA1_0            = (1 << 12),
++    CPU_MSA2_0            = (1 << 13),
++    CPU_CGP               = (1 << 14),
++    CPU_LSX1              = (1 << 15),
++    CPU_LSX2              = (1 << 16),
++    CPU_LASX              = (1 << 17),
++    CPU_LEXT1             = (1 << 18),
++    CPU_LEXT2             = (1 << 19),
++    CPU_LEXT3             = (1 << 20),
++    CPU_LAMO              = (1 << 21),
++    CPU_LPIXU             = (1 << 22),
++    CPU_LLSYNC            = (1 << 23),
++    CPU_TGTSYNC           = (1 << 24),
++    CPU_ULSYNC           = (1 << 25),
++    CPU_MUALP             = (1 << 26),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  enum Loongson_Family {
++    L_3A1000    = 0,
++    L_3B1500    = 1,
++    L_3A2000    = 2,
++    L_3B2000    = 3,
++    L_3A3000    = 4,
++    L_3B3000    = 5,
++    L_2K1000    = 6,
++    L_UNKNOWN   = 7
++  };
++
++  struct Loongson_Cpuinfo {
++    Loongson_Family    id;
++    const char* const  match_str;
++  };
++
++  static int  _cpuFeatures;
++  static const char* _features_str;
++  static volatile bool _is_determine_cpucfg_supported_running;
++  static bool _is_cpucfg_instruction_supported;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    uint32_t            cpucfg_info_id0;
++    Loongson_Cpucfg_Id1 cpucfg_info_id1;
++    Loongson_Cpucfg_Id2 cpucfg_info_id2;
++    uint32_t            cpucfg_info_id3;
++    uint32_t            cpucfg_info_id4;
++    uint32_t            cpucfg_info_id5;
++    uint32_t            cpucfg_info_id6;
++    uint32_t            cpucfg_info_id8;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
++
++  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
++
++  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
++  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
++
++  static bool is_loongson()      { return _cpuFeatures & CPU_LOONGSON; }
++  static bool is_gs264()         { return _cpuFeatures & CPU_LOONGSON_GS264; }
++  static bool is_gs464()         { return _cpuFeatures & CPU_LOONGSON_GS464; }
++  static bool is_gs464e()        { return _cpuFeatures & CPU_LOONGSON_GS464E; }
++  static bool supports_dsp()     { return 0; /*not supported yet*/}
++  static bool supports_ps()      { return 0; /*not supported yet*/}
++  static bool supports_3d()      { return 0; /*not supported yet*/}
++  static bool supports_msa1_0()  { return _cpuFeatures & CPU_MSA1_0; }
++  static bool supports_msa2_0()  { return _cpuFeatures & CPU_MSA2_0; }
++  static bool supports_cgp()     { return _cpuFeatures & CPU_CGP; }
++  static bool supports_mmi()     { return _cpuFeatures & CPU_MMI; }
++  static bool supports_lsx1()    { return _cpuFeatures & CPU_LSX1; }
++  static bool supports_lsx2()    { return _cpuFeatures & CPU_LSX2; }
++  static bool supports_lasx()    { return _cpuFeatures & CPU_LASX; }
++  static bool supports_lext1()   { return _cpuFeatures & CPU_LEXT1; }
++  static bool supports_lext2()   { return _cpuFeatures & CPU_LEXT2; }
++  static bool supports_lext3()   { return _cpuFeatures & CPU_LEXT3; }
++  static bool supports_lamo()    { return _cpuFeatures & CPU_LAMO; }
++  static bool supports_lpixu()   { return _cpuFeatures & CPU_LPIXU; }
++  static bool needs_llsync()     { return _cpuFeatures & CPU_LLSYNC; }
++  static bool needs_tgtsync()    { return _cpuFeatures & CPU_TGTSYNC; }
++  static bool needs_ulsync()     { return _cpuFeatures & CPU_ULSYNC; }
++  static bool supports_mualp()   { return _cpuFeatures & CPU_MUALP; }
++
++  //mips has no such instructions, use ll/sc instead
++  static bool supports_compare_and_exchange() { return false; }
++
++  static const char* cpu_features()           { return _features_str; }
++
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp
+new file mode 100644
+index 0000000000..86bd74d430
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp
+new file mode 100644
+index 0000000000..8ccc8c513c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_HPP
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+new file mode 100644
+index 0000000000..12ad7361aa
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+new file mode 100644
+index 0000000000..75c23e8088
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+@@ -0,0 +1,340 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_mips.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(t1, AT , 0);
++    __ addiu(t1, t1, 1);
++    __ sw(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  //add for compressedoops
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ lw(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ move(AT, vtable_index*vtableEntry::size());
++    __ slt(AT, AT, t2);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ move(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load methodOop and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 18 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 18*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T9: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we T8,T9 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(T8, AT, 0);
++    __ addiu(T8, T8,1);
++    __ sw(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  Label L_no_such_interface;
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
++    // No dynamic code size variance here, so slop_bytes is not needed.
++    const int base = in_bytes(Klass::vtable_start_offset());
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ daddiu(t2, t1, base);
++    __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(t2, t2, AT);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    __ bind(entry);
++
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, R0, L_no_such_interface);
++    __ delayed()->nop();
++
++    __ bne(AT, resolved_klass_reg, entry);
++    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = in_bytes(Klass::vtable_start_offset());
++  __ daddiu(t2, t1, base);
++  __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(t2, t2, AT);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  __ bind(entry);
++
++  // Check that the entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ beq(AT, R0, L_no_such_interface);
++  __ delayed()->nop();
++
++  __ bne(AT, holder_klass_reg, entry);
++  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  // We found a hit, move offset into T9
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++  __ dsll(AT, t2, Address::times_1);
++  __ addu(AT, AT, t1 );
++  start_pc = __ pc();
++  __ set64(t1, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ addu(AT, AT, t1 );
++  __ ld_ptr(method, AT, 0);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ delayed()->nop();
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T9: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  start_pc = __ pc();
++  __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub());
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ jr(T9);
++  __ delayed()->nop();
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+index 847f7d61d2..f570946090 100644
+--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+index d34ea45c0b..f6b6dbdee3 100644
+--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ move(temp, addr);
+ }
+ 
+-
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, T_INT), tmp, info);
+-  __ cmp(condition, tmp, c);
++  __ cmp_branch(condition, tmp, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+-                               int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, type), tmp, info);
+-  __ cmp(condition, reg, tmp);
++  __ cmp_branch(condition, reg, tmp, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
+index ef9b0833d3..c6b25bf10e 100644
+--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
+@@ -62,3 +62,24 @@ void LIR_Address::verify() const {
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+index 897be2209e..0c27cc20f3 100644
+--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+index ae297ac635..c786803e0f 100644
+--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr scratch = FrameMap::Z_R1_opr;
+   __ load(new LIR_Address(base, disp, T_INT), scratch, info);
+-  __ cmp(condition, scratch, c);
++  __ cmp_branch(condition, scratch, c, T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid()) {
+     if (is_power_of_2(c + 1)) {
+diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
+index 9507ca0856..2116e9af2b 100644
+--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
+@@ -56,3 +56,23 @@ void LIR_Address::verify() const {
+ }
+ #endif // PRODUCT
+ 
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+index e503159eb7..2e5609fec8 100644
+--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   // The peephole pass fills the delay slot
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "unused on this code path");
+diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
+index a09a159722..a02ffafc77 100644
+--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
+@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ move(temp, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, T_INT), o7opr, info);
+-  __ cmp(condition, o7opr, c);
++  __ cmp_branch(condition, o7opr, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, type), o7opr, info);
+-  __ cmp(condition, reg, o7opr);
++  __ cmp_branch(condition, reg, o7opr, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
+index c21d2c1d9a..9cebb387e2 100644
+--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
+@@ -54,3 +54,24 @@ void LIR_Address::verify() const {
+          "wrong type for addresses");
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+index cee3140f4f..7b76eb0b9e 100644
+--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+   LIR_Opr dest = op->result_opr();
+@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+index 905708a9fa..1c6774e1d6 100644
+--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ cmp_mem_int(condition, base, disp, c, info);
++  __ branch(condition, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid() && c > 0 && c < max_jint) {
+diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
+index 92277ee063..20e283e302 100644
+--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
+@@ -72,3 +72,24 @@ void LIR_Address::verify() const {
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+index 95d7e51501..8d7b623ee7 100644
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+@@ -263,7 +263,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
+ #define __ ce->masm()->
+ 
+ void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                                         LIR_Opr ref) const {
++                                                         LIR_Opr ref,
++                                                         LIR_Opr res) const {
+   __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread));
+ }
+ 
+diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+index 3687754e71..791e4ed43f 100644
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+@@ -77,7 +77,8 @@ public:
+ 
+ #ifdef COMPILER1
+   void generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                     LIR_Opr ref) const;
++                                     LIR_Opr ref,
++                                     LIR_Opr res) const;
+ 
+   void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+                                      ZLoadBarrierStubC1* stub) const;
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 74945999e7..2b8ac3dd2a 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // no precompiled headers
+ #include "jvm.h"
+ #include "classfile/classLoader.hpp"
+@@ -3966,6 +3972,8 @@ size_t os::Linux::find_large_page_size() {
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
++    MIPS64_ONLY(4 * M)
++    LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
+     SPARC_ONLY(4 * M);
+ #endif // ZERO
+ 
+diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..30719a0340
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..8403e7838a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      "1: ll.w  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.w  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      "1: ll.d  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.d  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.w  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.w  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "  dbar 0x700        \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.d  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.d  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar 0x700 \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..c9f675baca
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..826c1fe39a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..0b5247aa0b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        2048);
++
++define_pd_global(intx, CompilerThreadStackSize,  2048);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+new file mode 100644
+index 0000000000..ebd73af0c5
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..5429a1055a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
++inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
++inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
++inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
++
++inline void OrderAccess::acquire() { inlasm_sync(0x14); }
++inline void OrderAccess::release() { inlasm_sync(0x12); }
++inline void OrderAccess::fence()   { inlasm_sync(0x10); }
++
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..cf5fff0d04
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+@@ -0,0 +1,710 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 3
++#define REG_FP 22
++
++NOINLINE address os::current_stack_pointer() {
++  register void *sp __asm__ ("$r3");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before LR
++      // has been saved in the frame. RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[1]);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always save frame pointer on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++frame os::current_frame() {
++  intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset];
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    handle_assert_poison_fault(ucVoid, info->si_addr);
++    return 1;
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      }
++    } // sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to LA code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++void os::Linux::init_thread_fpu_state(void) {
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  return 0; // mute compiler
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  // no use for LA
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // LA does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..fa02f8ba2f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..cf3a596387
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++// According to previous and present SPECjbb2015 score,
++// comment prefetch is better than if (interval >= 0) prefetch branch.
++// So choose comment prefetch as the base line.
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  0, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++// Ditto
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  8, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..a1a9f181bd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+@@ -0,0 +1,116 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..a3ac28ebd3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..a39cb79bb1
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..edc148ef91
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp
+@@ -0,0 +1,93 @@
++/*
++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++
++#include <asm/hwcap.h>
++#include <sys/auxv.h>
++
++#ifndef HWCAP_LOONGARCH_LAM
++#define HWCAP_LOONGARCH_LAM       (1 << 1)
++#endif
++
++#ifndef HWCAP_LOONGARCH_UAL
++#define HWCAP_LOONGARCH_UAL       (1 << 2)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LSX
++#define HWCAP_LOONGARCH_LSX       (1 << 4)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LASX
++#define HWCAP_LOONGARCH_LASX      (1 << 5)
++#endif
++
++#ifndef HWCAP_LOONGARCH_COMPLEX
++#define HWCAP_LOONGARCH_COMPLEX   (1 << 7)
++#endif
++
++#ifndef HWCAP_LOONGARCH_CRYPTO
++#define HWCAP_LOONGARCH_CRYPTO    (1 << 8)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_X86
++#define HWCAP_LOONGARCH_LBT_X86   (1 << 10)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_ARM
++#define HWCAP_LOONGARCH_LBT_ARM   (1 << 11)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_MIPS
++#define HWCAP_LOONGARCH_LBT_MIPS  (1 << 12)
++#endif
++
++void VM_Version::get_os_cpu_info() {
++
++  uint64_t auxv = getauxval(AT_HWCAP);
++
++  STATIC_ASSERT(CPU_LAM      == HWCAP_LOONGARCH_LAM);
++  STATIC_ASSERT(CPU_UAL      == HWCAP_LOONGARCH_UAL);
++  STATIC_ASSERT(CPU_LSX      == HWCAP_LOONGARCH_LSX);
++  STATIC_ASSERT(CPU_LASX     == HWCAP_LOONGARCH_LASX);
++  STATIC_ASSERT(CPU_COMPLEX  == HWCAP_LOONGARCH_COMPLEX);
++  STATIC_ASSERT(CPU_CRYPTO   == HWCAP_LOONGARCH_CRYPTO);
++  STATIC_ASSERT(CPU_LBT_X86  == HWCAP_LOONGARCH_LBT_X86);
++  STATIC_ASSERT(CPU_LBT_ARM  == HWCAP_LOONGARCH_LBT_ARM);
++  STATIC_ASSERT(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS);
++
++  _features = auxv & (
++      HWCAP_LOONGARCH_LAM     |
++      HWCAP_LOONGARCH_UAL     |
++      HWCAP_LOONGARCH_LSX     |
++      HWCAP_LOONGARCH_LASX    |
++      HWCAP_LOONGARCH_COMPLEX |
++      HWCAP_LOONGARCH_CRYPTO  |
++      HWCAP_LOONGARCH_LBT_X86 |
++      HWCAP_LOONGARCH_LBT_ARM |
++      HWCAP_LOONGARCH_LBT_MIPS);
++}
+diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+new file mode 100644
+index 0000000000..30719a0340
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+new file mode 100644
+index 0000000000..cd7cecad63
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   ll    %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc    %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   lld   %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   scd   %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  ll     %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  sc  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync        \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld   %[__prev], %[__dest]    \n\t"
++      "  bne   %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd   %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..5b5cd10aa5
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..3fd6ef7b36
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+new file mode 100644
+index 0000000000..f1599ac5f1
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++#ifdef MIPS64
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++#else
++// ThreadStackSize 320 allows a couple of test cases to run while
++// keeping the number of threads that can be created high.  System
++// default ThreadStackSize appears to be 512 which is too big.
++define_pd_global(intx, ThreadStackSize,          320);
++define_pd_global(intx, VMThreadStackSize,        512);
++#endif // MIPS64
++
++define_pd_global(intx, CompilerThreadStackSize,  0);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s
+new file mode 100644
+index 0000000000..36c8d810c3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
+new file mode 100644
+index 0000000000..bf9d679730
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("sync"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(); }
++inline void OrderAccess::storestore() { inlasm_sync(); }
++inline void OrderAccess::loadstore()  { inlasm_sync(); }
++inline void OrderAccess::storeload()  { inlasm_sync(); }
++
++inline void OrderAccess::acquire() { inlasm_sync(); }
++inline void OrderAccess::release() { inlasm_sync(); }
++inline void OrderAccess::fence()   { inlasm_sync(); }
++
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+new file mode 100644
+index 0000000000..d035d8edbb
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+@@ -0,0 +1,1020 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 29
++#define REG_FP 30
++
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$29");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before LR
++      // has been saved in the frame. RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.gregs[31]);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  int *pc;
++  intptr_t sp;
++  int *pc_limit = (int*)(void*)&os::get_previous_fp;
++  int insn;
++
++  {
++    l_pc:;
++    pc = (int*)&&l_pc;
++    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
++  }
++
++  do {
++    insn = *pc;
++    switch(bitfield(insn, 16, 16)) {
++      case 0x27bd:  /* addiu $sp,$sp,-i */
++      case 0x67bd:  /* daddiu $sp,$sp,-i */
++        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
++        sp -= (short)bitfield(insn, 0, 16);
++        return (intptr_t*)sp;
++    }
++    --pc;
++  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
++
++  ShouldNotReachHere();
++  return NULL; // mute compiler
++}
++
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++//x86 add 2 new assemble function here!
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    handle_assert_poison_fault(ucVoid, info->si_addr);
++    return 1;
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      } //addr <
++    } //sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to mips code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
++             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
++             */
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
++        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
++        //The method is to trigger kernel emulation of float emulation.
++        int inst = *(int*)pc;
++        int ops = (inst >> 26) & 0x3f;
++        int ops_fmt = (inst >> 21) & 0x1f;
++        int op = inst & 0x3f;
++        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
++          int ft, fs, fd;
++          ft = (inst >> 16) & 0x1f;
++          fs = (inst >> 11) & 0x1f;
++          fd = (inst >> 6) & 0x1f;
++          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
++          double ft_value, fs_value, fd_value;
++          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++          __asm__ __volatile__ (
++            "cvt.s.pl %0, %4\n\t"
++            "cvt.s.pu %1, %4\n\t"
++            "cvt.s.pl %2, %5\n\t"
++            "cvt.s.pu %3, %5\n\t"
++            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
++            : "f" (fs_value), "f" (ft_value)
++          );
++
++          switch (op) {
++            case Assembler::fadd_op:
++              __asm__ __volatile__ (
++                "add.s  %1, %3, %5\n\t"
++                "add.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fsub_op:
++              //fd = fs - ft
++              __asm__ __volatile__ (
++                "sub.s  %1, %3, %5\n\t"
++                "sub.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fmul_op:
++              __asm__ __volatile__ (
++                "mul.s  %1, %3, %5\n\t"
++                "mul.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
++          }
++        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
++          // madd.ps is not used, the code below were not tested
++          int fr, ft, fs, fd;
++          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
++          double fr_value, ft_value, fs_value, fd_value;
++          switch (op) {
++            case Assembler::madd_ps_op:
++              // fd = (fs * ft) + fr
++              fr = (inst >> 21) & 0x1f;
++              ft = (inst >> 16) & 0x1f;
++              fs = (inst >> 11) & 0x1f;
++              fd = (inst >> 6) & 0x1f;
++              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
++              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++              __asm__ __volatile__ (
++                "cvt.s.pu %3, %9\n\t"
++                "cvt.s.pl %4, %9\n\t"
++                "cvt.s.pu %5, %10\n\t"
++                "cvt.s.pl %6, %10\n\t"
++                "cvt.s.pu %7, %11\n\t"
++                "cvt.s.pl %8, %11\n\t"
++                "madd.s %1, %3, %5, %7\n\t"
++                "madd.s %2, %4, %6, %8\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
++                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
++          }
++        }
++      } //SIGILL
++    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
++      // thread->thread_state() != _thread_in_Java
++      // SIGILL must be caused by VM_Version::determine_features().
++      VM_Version::set_supports_cpucfg(false);
++      stub = pc + 4;  // continue with next instruction.
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  // Execution protection violation
++  //
++  // This should be kept as the last step in the triage.  We don't
++  // have a dedicated trap number for a no-execute fault, so be
++  // conservative and allow other handlers the first shot.
++  //
++  // Note: We don't test that info->si_code == SEGV_ACCERR here.
++  // this si_code is so generic that it is almost meaningless; and
++  // the si_code for this condition may change in the future.
++  // Furthermore, a false-positive should be harmless.
++  if (UnguardOnExecutionViolation > 0 &&
++      //(sig == SIGSEGV || sig == SIGBUS) &&
++      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
++    (sig == SIGSEGV || sig == SIGBUS
++#ifdef OPT_RANGECHECK
++     || sig == SIGSYS
++#endif
++    ) &&
++      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
++      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("execution protection violation\n");
++#endif
++
++    int page_size = os::vm_page_size();
++    address addr = (address) info->si_addr;
++    address pc = os::Linux::ucontext_get_pc(uc);
++    // Make sure the pc and the faulting address are sane.
++    //
++    // If an instruction spans a page boundary, and the page containing
++    // the beginning of the instruction is executable but the following
++    // page is not, the pc and the faulting address might be slightly
++    // different - we still want to unguard the 2nd page in this case.
++    //
++    // 15 bytes seems to be a (very) safe value for max instruction size.
++    bool pc_is_near_addr =
++      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
++Untested("Unimplemented yet");
++    bool instr_spans_page_boundary =
++/*
++      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++*/
++      (align_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++
++    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
++      static volatile address last_addr =
++        (address) os::non_memory_address_word();
++
++      // In conservative mode, don't unguard unless the address is in the VM
++      if (addr != last_addr &&
++          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
++
++        // Set memory to RWX and retry
++Untested("Unimplemented yet");
++/*
++        address page_start =
++          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
++*/
++        address page_start = align_down(addr, page_size);
++        bool res = os::protect_memory((char*) page_start, page_size,
++                                      os::MEM_PROT_RWX);
++
++        if (PrintMiscellaneous && Verbose) {
++          char buf[256];
++          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
++                       "at " INTPTR_FORMAT
++                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
++                       page_start, (res ? "success" : "failed"), errno);
++          tty->print_raw_cr(buf);
++        }
++        stub = pc;
++
++        // Set last_addr so if we fault again at the same address, we don't end
++        // up in an endless loop.
++        //
++        // There are two potential complications here.  Two threads trapping at
++        // the same address at the same time could cause one of the threads to
++        // think it already unguarded, and abort the VM.  Likely very rare.
++        //
++        // The other race involves two threads alternately trapping at
++        // different addresses and failing to unguard the page, resulting in
++        // an endless loop.  This condition is probably even more unlikely than
++        // the first.
++        //
++        // Although both cases could be avoided by using locks or thread local
++        // last_addr, these solutions are unnecessary complication: this
++        // handler is a best-effort safety net, not a complete solution.  It is
++        // disabled by default and should only be used as a workaround in case
++        // we missed any no-execute-unsafe VM code.
++
++        last_addr = addr;
++      }
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++// FCSR:...|24| 23 |22|21|...
++//      ...|FS|FCC0|FO|FN|...
++void os::Linux::init_thread_fpu_state(void) {
++  if (SetFSFOFN == 999)
++    return;
++  int fs = (SetFSFOFN / 100)? 1:0;
++  int fo = ((SetFSFOFN % 100) / 10)? 1:0;
++  int fn = (SetFSFOFN % 10)? 1:0;
++  int mask = fs << 24 | fo << 22 | fn << 21;
++
++  int fcsr = get_fpu_control_word();
++  fcsr = fcsr | mask;
++  set_fpu_control_word(fcsr);
++  /*
++  if (fcsr != get_fpu_control_word())
++    tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word());
++  */
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  int fcsr;
++  __asm__ __volatile__ (
++      ".set noat;"
++      "daddiu  %0, $0, 0;"
++      "cfc1 %0, $31;"
++      : "=r" (fcsr)
++      );
++  return fcsr;
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++  __asm__ __volatile__ (
++      ".set noat;"
++      "ctc1 %0, $31;"
++      :
++      : "r" (fpu_control)
++      );
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++//size_t os::Linux::min_stack_allowed  = 96 * K;
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++
++/*
++// Test if pthread library can support variable thread stack size. LinuxThreads
++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
++// in floating stack mode and NPTL support variable stack size.
++bool os::Linux::supports_variable_stack_size() {
++  if (os::Linux::is_NPTL()) {
++     // NPTL, yes
++     return true;
++
++  } else {
++    // Note: We can't control default stack size when creating a thread.
++    // If we use non-default stack size (pthread_attr_setstacksize), both
++    // floating stack and non-floating stack LinuxThreads will return the
++    // same value. This makes it impossible to implement this function by
++    // detecting thread stack size directly.
++    //
++    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
++    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
++    // %gs (either as LDT selector or GDT selector, depending on kernel)
++    // to access thread specific data.
++    //
++    // Note that %gs is a reserved glibc register since early 2001, so
++    // applications are not allowed to change its value (Ulrich Drepper from
++    // Redhat confirmed that all known offenders have been modified to use
++    // either %fs or TSD). In the worst case scenario, when VM is embedded in
++    // a native application that plays with %gs, we might see non-zero %gs
++    // even LinuxThreads is running in fixed stack mode. As the result, we'll
++    // return true and skip _thread_safety_check(), so we may not be able to
++    // detect stack-heap collisions. But otherwise it's harmless.
++    //
++    return false;
++  }
++}
++*/
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // MIPS does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+new file mode 100644
+index 0000000000..c07d08156f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..93490345f0
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++        // 'pref' is implemented as NOP in Loongson 3A
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  0, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  1, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+new file mode 100644
+index 0000000000..dbe8efe164
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+@@ -0,0 +1,117 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/compileBroker.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+new file mode 100644
+index 0000000000..8b8dbe219c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
+new file mode 100644
+index 0000000000..b7454bf045
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+new file mode 100644
+index 0000000000..93e4bea04c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
+diff --git a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
+index 2b0fa83c1a..270e0bc180 100644
+--- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
++++ b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
+@@ -85,4 +85,6 @@ const uintptr_t ZPlatformAddressSpaceSize     = ((uintptr_t)1 << ZPlatformAddres
+ 
+ const size_t    ZPlatformCacheLineSize        = 64;
+ 
++const bool      ZPlatformLoadBarrierTestResultInRegister = false;
++
+ #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP
+diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
+index 4912f88056..a420f7807b 100644
+--- a/src/hotspot/share/asm/codeBuffer.cpp
++++ b/src/hotspot/share/asm/codeBuffer.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023. These
++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "compiler/disassembler.hpp"
+@@ -351,6 +357,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
+     assert(rtype == relocInfo::none              ||
+            rtype == relocInfo::runtime_call_type ||
+            rtype == relocInfo::internal_word_type||
++           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
+            rtype == relocInfo::section_word_type ||
+            rtype == relocInfo::external_word_type,
+            "code needs relocation information");
+diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
+index aff12954b3..caa93fc804 100644
+--- a/src/hotspot/share/c1/c1_Compiler.cpp
++++ b/src/hotspot/share/c1/c1_Compiler.cpp
+@@ -44,6 +44,12 @@
+ #include "utilities/bitMap.inline.hpp"
+ #include "utilities/macros.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ Compiler::Compiler() : AbstractCompiler(compiler_c1) {
+ }
+@@ -211,7 +217,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
+   case vmIntrinsics::_updateCRC32:
+   case vmIntrinsics::_updateBytesCRC32:
+   case vmIntrinsics::_updateByteBufferCRC32:
+-#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64)
++#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64)
+   case vmIntrinsics::_updateBytesCRC32C:
+   case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ #endif
+diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
+index e30d39f73d..7461b7449a 100644
+--- a/src/hotspot/share/c1/c1_LIR.cpp
++++ b/src/hotspot/share/c1/c1_LIR.cpp
+@@ -250,6 +250,18 @@ void LIR_Op2::verify() const {
+ #endif
+ }
+ 
++void LIR_Op4::verify() const {
++#ifdef ASSERT
++  switch (code()) {
++    case lir_cmp_cmove:
++      break;
++
++    default:
++      assert(!result_opr()->is_register() || !result_opr()->is_oop_register(),
++             "can't produce oops from arith");
++  }
++#endif
++}
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -308,6 +320,56 @@ void LIR_OpBranch::negate_cond() {
+ }
+ 
+ 
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(stub->entry())
++  , _block(NULL)
++  , _ublock(NULL)
++  , _stub(stub) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(NULL)
++  , _stub(NULL) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_float_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(ublock)
++  , _stub(NULL) {
++}
++
++void LIR_OpCmpBranch::change_block(BlockBegin* b) {
++  assert(_block != NULL, "must have old block");
++  assert(_block->label() == label(), "must be equal");
++
++  _block = b;
++  _label = b->label();
++}
++
++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) {
++  assert(_ublock != NULL, "must have old block");
++
++  _ublock = b;
++}
++
++void LIR_OpCmpBranch::negate_cond() {
++  switch (condition()) {
++    case lir_cond_equal:        set_condition(lir_cond_notEqual);     break;
++    case lir_cond_notEqual:     set_condition(lir_cond_equal);        break;
++    case lir_cond_less:         set_condition(lir_cond_greaterEqual); break;
++    case lir_cond_lessEqual:    set_condition(lir_cond_greater);      break;
++    case lir_cond_greaterEqual: set_condition(lir_cond_less);         break;
++    case lir_cond_greater:      set_condition(lir_cond_lessEqual);    break;
++    default: ShouldNotReachHere();
++  }
++}
++
+ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass,
+                                  LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3,
+                                  bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch,
+@@ -509,10 +571,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(opConvert->_info == NULL, "must be");
+       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
+       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
+-#ifdef PPC32
+-      if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
+-      if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
+-#endif
++      if (opConvert->_tmp->is_valid())       do_temp(opConvert->_tmp);
+       do_stub(opConvert->_stub);
+ 
+       break;
+@@ -611,6 +670,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       break;
+     }
+ 
++// LIR_OpCmpBranch;
++    case lir_cmp_branch:               // may have info, input and result register always invalid
++    case lir_cmp_float_branch:         // may have info, input and result register always invalid
++    {
++      assert(op->as_OpCmpBranch() != NULL, "must be");
++      LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op;
++      assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() &&
++             opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used");
++
++      if (opCmpBranch->_info)               do_info(opCmpBranch->_info);
++      if (opCmpBranch->_opr1->is_valid())   do_input(opCmpBranch->_opr1);
++      if (opCmpBranch->_opr2->is_valid())   do_input(opCmpBranch->_opr2);
++      if (opCmpBranch->_tmp1->is_valid())   do_temp(opCmpBranch->_tmp1);
++      if (opCmpBranch->_stub != NULL)       opCmpBranch->stub()->visit(this);
++      assert(opCmpBranch->_result->is_illegal(), "not used");
++
++      break;
++    }
++
+     // special handling for cmove: right input operand must not be equal
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+@@ -711,6 +789,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       break;
+     }
+ 
++// LIR_Op4
++    // special handling for cmp cmove: src2(opr4) operand must not be equal
++    // to the result operand, otherwise the backend fails
++    case lir_cmp_cmove:
++    {
++      assert(op->as_Op4() != NULL, "must be");
++      LIR_Op4* op4 = (LIR_Op4*)op;
++
++      assert(op4->_info == NULL, "not used");
++      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() &&
++             op4->_opr3->is_valid() && op4->_opr4->is_valid() &&
++             op4->_result->is_valid(), "used");
++
++      do_input(op4->_opr1);
++      do_input(op4->_opr2);
++      do_input(op4->_opr3);
++      do_input(op4->_opr4);
++      do_temp(op4->_opr4);
++      do_output(op4->_result);
++
++      break;
++    }
++
+ // LIR_OpJavaCall
+     case lir_static_call:
+     case lir_optvirtual_call:
+@@ -1028,6 +1129,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) {
+   masm->emit_op2(this);
+ }
+ 
++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) {
++  masm->emit_opCmpBranch(this);
++  if (stub()) {
++    masm->append_code_stub(stub());
++  }
++}
++
+ void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) {
+   masm->emit_alloc_array(this);
+   masm->append_code_stub(stub());
+@@ -1048,6 +1156,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+   masm->emit_op3(this);
+ }
+ 
++void LIR_Op4::emit_code(LIR_Assembler* masm) {
++  masm->emit_op4(this);
++}
++
+ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
+   masm->emit_lock(this);
+   if (stub()) {
+@@ -1424,8 +1536,7 @@ void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_nu
+   if (deoptimize_on_null) {
+     // Emit an explicit null check and deoptimize if opr is null
+     CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_null_check, Deoptimization::Action_none);
+-    cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL));
+-    branch(lir_cond_equal, T_OBJECT, deopt);
++    cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt);
+   } else {
+     // Emit an implicit null check
+     append(new LIR_Op1(lir_null_check, opr, info));
+@@ -1680,6 +1791,8 @@ const char * LIR_Op::name() const {
+      case lir_cmp_l2i:               s = "cmp_l2i";       break;
+      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
+      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
++     case lir_cmp_branch:            s = "cmp_branch";    break;
++     case lir_cmp_float_branch:      s = "cmp_fbranch";   break;
+      case lir_cmove:                 s = "cmove";         break;
+      case lir_add:                   s = "add";           break;
+      case lir_sub:                   s = "sub";           break;
+@@ -1705,6 +1818,8 @@ const char * LIR_Op::name() const {
+      case lir_irem:                  s = "irem";          break;
+      case lir_fmad:                  s = "fmad";          break;
+      case lir_fmaf:                  s = "fmaf";          break;
++     // LIR_Op4
++     case lir_cmp_cmove:             s = "cmp_cmove";     break;
+      // LIR_OpJavaCall
+      case lir_static_call:           s = "static";        break;
+      case lir_optvirtual_call:       s = "optvirtual";    break;
+@@ -1856,6 +1971,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const {
+   }
+ }
+ 
++// LIR_OpCmpBranch
++void LIR_OpCmpBranch::print_instr(outputStream* out) const {
++  print_condition(out, condition());        out->print(" ");
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  if (block() != NULL) {
++    out->print("[B%d] ", block()->block_id());
++  } else if (stub() != NULL) {
++    out->print("[");
++    stub()->print_name(out);
++    out->print(": " INTPTR_FORMAT "]", p2i(stub()));
++    if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci());
++  } else {
++    out->print("[label:" INTPTR_FORMAT "] ", p2i(label()));
++  }
++  if (ublock() != NULL) {
++    out->print("unordered: [B%d] ", ublock()->block_id());
++  }
++}
++
+ void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
+   switch(cond) {
+     case lir_cond_equal:           out->print("[EQ]");      break;
+@@ -1876,12 +2011,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
+   print_bytecode(out, bytecode());
+   in_opr()->print(out);                  out->print(" ");
+   result_opr()->print(out);              out->print(" ");
+-#ifdef PPC32
+-  if(tmp1()->is_valid()) {
+-    tmp1()->print(out); out->print(" ");
+-    tmp2()->print(out); out->print(" ");
++  if(tmp()->is_valid()) {
++    tmp()->print(out);                   out->print(" ");
+   }
+-#endif
+ }
+ 
+ void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) {
+@@ -1979,6 +2111,19 @@ void LIR_Op3::print_instr(outputStream* out) const {
+ }
+ 
+ 
++// LIR_Op4
++void LIR_Op4::print_instr(outputStream* out) const {
++  if (code() == lir_cmp_cmove) {
++    print_condition(out, condition());         out->print(" ");
++  }
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  in_opr3()->print(out);    out->print(" ");
++  in_opr4()->print(out);    out->print(" ");
++  result_opr()->print(out);
++}
++
++
+ void LIR_OpLock::print_instr(outputStream* out) const {
+   hdr_opr()->print(out);   out->print(" ");
+   obj_opr()->print(out);   out->print(" ");
+diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
+index 3234ca018b..1f46e44c77 100644
+--- a/src/hotspot/share/c1/c1_LIR.hpp
++++ b/src/hotspot/share/c1/c1_LIR.hpp
+@@ -864,9 +864,11 @@ class      LIR_OpConvert;
+ class      LIR_OpAllocObj;
+ class      LIR_OpRoundFP;
+ class    LIR_Op2;
++class      LIR_OpCmpBranch;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
++class    LIR_Op4;
+ class    LIR_OpCall;
+ class      LIR_OpJavaCall;
+ class      LIR_OpRTCall;
+@@ -933,6 +935,8 @@ enum LIR_Code {
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
++      , lir_cmp_branch
++      , lir_cmp_float_branch
+       , lir_cmove
+       , lir_add
+       , lir_sub
+@@ -964,6 +968,9 @@ enum LIR_Code {
+       , lir_fmad
+       , lir_fmaf
+   , end_op3
++  , begin_op4
++      , lir_cmp_cmove
++  , end_op4
+   , begin_opJavaCall
+       , lir_static_call
+       , lir_optvirtual_call
+@@ -1128,12 +1135,14 @@ class LIR_Op: public CompilationResourceObj {
+   virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; }
+   virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; }
+   virtual LIR_OpBranch* as_OpBranch() { return NULL; }
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; }
+   virtual LIR_OpRTCall* as_OpRTCall() { return NULL; }
+   virtual LIR_OpConvert* as_OpConvert() { return NULL; }
+   virtual LIR_Op0* as_Op0() { return NULL; }
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
++  virtual LIR_Op4* as_Op4() { return NULL; }
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
+@@ -1463,15 +1472,18 @@ class LIR_OpConvert: public LIR_Op1 {
+  private:
+    Bytecodes::Code _bytecode;
+    ConversionStub* _stub;
++   LIR_Opr _tmp;
+ 
+  public:
+-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub)
++   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp)
+      : LIR_Op1(lir_convert, opr, result)
+      , _stub(stub)
+-     , _bytecode(code)                           {}
++     , _bytecode(code)
++     , _tmp(tmp)                                 {}
+ 
+   Bytecodes::Code bytecode() const               { return _bytecode; }
+   ConversionStub* stub() const                   { return _stub; }
++  LIR_Opr tmp() const                            { return _tmp; }
+ 
+   virtual void emit_code(LIR_Assembler* masm);
+   virtual LIR_OpConvert* as_OpConvert() { return this; }
+@@ -1626,7 +1638,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code == lir_cmp || code == lir_assert, "code check");
++    assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1658,7 +1670,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1674,7 +1686,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(tmp3)
+     , _tmp4(tmp4)
+     , _tmp5(tmp5) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1686,10 +1698,12 @@ class LIR_Op2: public LIR_Op {
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
+-    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert");
++    return _condition;
+   }
+   void set_condition(LIR_Condition condition) {
+-    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove");
++    _condition = condition;
+   }
+ 
+   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
+@@ -1703,6 +1717,43 @@ class LIR_Op2: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++class LIR_OpCmpBranch: public LIR_Op2 {
++ friend class LIR_OpVisitState;
++
++ private:
++  Label*        _label;
++  BlockBegin*   _block;  // if this is a branch to a block, this is the block
++  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
++  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
++
++ public:
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL)
++    : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++    , _label(lbl)
++    , _block(NULL)
++    , _ublock(NULL)
++    , _stub(NULL) { }
++
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL);
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL);
++
++  // for unordered comparisons
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL);
++
++  Label*        label()       const              { return _label;  }
++  BlockBegin*   block()       const              { return _block;  }
++  BlockBegin*   ublock()      const              { return _ublock; }
++  CodeStub*     stub()        const              { return _stub;   }
++
++  void          change_block(BlockBegin* b);
++  void          change_ublock(BlockBegin* b);
++  void          negate_cond();
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ class LIR_OpAllocArray : public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1767,6 +1818,48 @@ class LIR_Op3: public LIR_Op {
+ };
+ 
+ 
++class LIR_Op4: public LIR_Op {
++ friend class LIR_OpVisitState;
++
++ private:
++  LIR_Opr _opr1;
++  LIR_Opr _opr2;
++  LIR_Opr _opr3;
++  LIR_Opr _opr4;
++  BasicType _type;
++  LIR_Condition _condition;
++
++  void verify() const;
++
++ public:
++  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type)
++    : LIR_Op(code, result, NULL)
++    , _opr1(opr1)
++    , _opr2(opr2)
++    , _opr3(opr3)
++    , _opr4(opr4)
++    , _type(type)
++    , _condition(condition) {
++    assert(is_in_range(code, begin_op4, end_op4), "code check");
++    assert(type != T_ILLEGAL, "cmove should have type");
++  }
++  LIR_Opr in_opr1() const                        { return _opr1; }
++  LIR_Opr in_opr2() const                        { return _opr2; }
++  LIR_Opr in_opr3() const                        { return _opr3; }
++  LIR_Opr in_opr4() const                        { return _opr4; }
++  BasicType type()  const                        { return _type; }
++  LIR_Condition condition() const  {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition;
++  }
++  void set_condition(LIR_Condition condition) {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove");  _condition = condition;
++  }
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_Op4* as_Op4() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ //--------------------------------
+ class LabelObj: public CompilationResourceObj {
+  private:
+@@ -2115,7 +2208,9 @@ class LIR_List: public CompilationResourceObj {
+ 
+   void safepoint(LIR_Opr tmp, CodeEmitInfo* info)  { append(new LIR_Op1(lir_safepoint, tmp, info)); }
+ 
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
++  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) {
++    append(new LIR_OpConvert(code, left, dst, stub, tmp));
++  }
+ 
+   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
+   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
+@@ -2146,6 +2241,15 @@ class LIR_List: public CompilationResourceObj {
+     cmp(condition, left, LIR_OprFact::intConst(right), info);
+   }
+ 
++  // machine dependent
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL);
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
++    cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info);
++  }
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered);
++
+   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
+ 
+@@ -2153,6 +2257,9 @@ class LIR_List: public CompilationResourceObj {
+     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
+   }
+ 
++  // machine dependent
++  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type);
++
+   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
+   void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+index 160483d5f7..bec297ebd2 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+@@ -777,6 +777,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+ }
+ 
+ 
++void LIR_Assembler::emit_op4(LIR_Op4* op) {
++  switch (op->code()) {
++    case lir_cmp_cmove:
++      cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type());
++      break;
++
++    default:
++      Unimplemented();
++      break;
++  }
++}
++
+ void LIR_Assembler::build_frame() {
+   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+ }
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+index 44a5bcbe54..114b155f92 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+@@ -190,7 +190,9 @@ class LIR_Assembler: public CompilationResourceObj {
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
++  void emit_op4(LIR_Op4* op);
+   void emit_opBranch(LIR_OpBranch* op);
++  void emit_opCmpBranch(LIR_OpCmpBranch* op);
+   void emit_opLabel(LIR_OpLabel* op);
+   void emit_arraycopy(LIR_OpArrayCopy* op);
+   void emit_updatecrc32(LIR_OpUpdateCRC32* op);
+@@ -223,6 +225,7 @@ class LIR_Assembler: public CompilationResourceObj {
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
++  void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type);
+ 
+   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
+   void ic_call(     LIR_OpJavaCall* op);
+diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
+index f4b156d59b..fc35f02772 100644
+--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
++++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
+@@ -479,13 +479,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+                                     CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
+   CodeStub* stub = new RangeCheckStub(range_check_info, index, array);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
+-                index->as_jint(), null_check_info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
++                       index->as_jint(), stub, null_check_info); // forward branch
+   } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, array,
+-                arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(),
++                       T_INT, stub, null_check_info); // forward branch
+   }
+ }
+ 
+@@ -493,12 +491,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+ void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
+   CodeStub* stub = new RangeCheckStub(info, index);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(),
++                       index->as_jint(), stub, info); // forward branch
+   } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
+-                java_nio_Buffer::limit_offset(), T_INT, info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(),
++                       T_INT, stub, info); // forward branch
+   }
+   __ move(index, result);
+ }
+@@ -934,7 +931,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
+   return tmp;
+ }
+ 
+-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) {
+   if (if_instr->should_profile()) {
+     ciMethod* method = if_instr->profiled_method();
+     assert(method != NULL, "method should be set if branch is profiled");
+@@ -955,10 +952,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
+     __ metadata2reg(md->constant_encoding(), md_reg);
+ 
+     LIR_Opr data_offset_reg = new_pointer_register();
+-    __ cmove(lir_cond(cond),
+-             LIR_OprFact::intptrConst(taken_count_offset),
+-             LIR_OprFact::intptrConst(not_taken_count_offset),
+-             data_offset_reg, as_BasicType(if_instr->x()->type()));
++    if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) {
++      __ cmove(lir_cond(cond),
++               LIR_OprFact::intptrConst(taken_count_offset),
++               LIR_OprFact::intptrConst(not_taken_count_offset),
++               data_offset_reg, as_BasicType(if_instr->x()->type()));
++    } else {
++      __ cmp_cmove(lir_cond(cond), left, right,
++                   LIR_OprFact::intptrConst(taken_count_offset),
++                   LIR_OprFact::intptrConst(not_taken_count_offset),
++                   data_offset_reg, as_BasicType(if_instr->x()->type()));
++    }
+ 
+     // MDO cells are intptr_t, so the data_reg width is arch-dependent.
+     LIR_Opr data_reg = new_pointer_register();
+@@ -1315,8 +1319,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
+   }
+ 
+   __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
+-  __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0));
+-  __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
++  __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0),
++               LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
+ }
+ 
+ 
+@@ -1598,8 +1602,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+ 
+   if (GenerateRangeChecks && needs_range_check) {
+     if (use_length) {
+-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result()));
++      CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result());
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
+     } else {
+       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+       // range_check also does the null check
+@@ -1777,12 +1781,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
+     CodeEmitInfo* info = state_for(x);
+     CodeStub* stub = new RangeCheckStub(info, index.result());
+     if (index.result()->is_constant()) {
+-      cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
+-      __ branch(lir_cond_belowEqual, T_INT, stub);
++      cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info);
+     } else {
+-      cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
+-                  java_nio_Buffer::limit_offset(), T_INT, info);
+-      __ branch(lir_cond_aboveEqual, T_INT, stub);
++      cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, stub, info);
+     }
+     __ move(index.result(), result);
+   } else {
+@@ -1860,8 +1861,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) {
+     } else if (use_length) {
+       // TODO: use a (modified) version of array_range_check that does not require a
+       //       constant length to be loaded to a register
+-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result()));
++      CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result());
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
+     } else {
+       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+       // The range check performs the null check, so clear it out for the load
+@@ -2234,19 +2235,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi
+     int high_key = one_range->high_key();
+     BlockBegin* dest = one_range->sux();
+     if (low_key == high_key) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
+     } else if (high_key - low_key == 1) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
+-      __ cmp(lir_cond_equal, value, high_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest);
+     } else {
+       LabelObj* L = new LabelObj();
+-      __ cmp(lir_cond_less, value, low_key);
+-      __ branch(lir_cond_less, T_INT, L->label());
+-      __ cmp(lir_cond_lessEqual, value, high_key);
+-      __ branch(lir_cond_lessEqual, T_INT, dest);
++      __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label());
++      __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest);
+       __ branch_destination(L->label());
+     }
+   }
+@@ -2346,12 +2342,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
+     __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
+     for (int i = 0; i < len; i++) {
+       int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
+-      __ cmp(lir_cond_equal, value, i + lo_key);
+       __ move(data_offset_reg, tmp_reg);
+-      __ cmove(lir_cond_equal,
+-               LIR_OprFact::intptrConst(count_offset),
+-               tmp_reg,
+-               data_offset_reg, T_INT);
++      __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(i + lo_key),
++                   LIR_OprFact::intptrConst(count_offset),
++                   tmp_reg,
++                   data_offset_reg, T_INT);
+     }
+ 
+     LIR_Opr data_reg = new_pointer_register();
+@@ -2365,8 +2360,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
+     do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
+   } else {
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, i + lo_key);
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2404,12 +2398,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
+     __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
+     for (int i = 0; i < len; i++) {
+       int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
+-      __ cmp(lir_cond_equal, value, x->key_at(i));
+       __ move(data_offset_reg, tmp_reg);
+-      __ cmove(lir_cond_equal,
+-               LIR_OprFact::intptrConst(count_offset),
+-               tmp_reg,
+-               data_offset_reg, T_INT);
++      __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)),
++                   LIR_OprFact::intptrConst(count_offset),
++                   tmp_reg,
++                   data_offset_reg, T_INT);
+     }
+ 
+     LIR_Opr data_reg = new_pointer_register();
+@@ -2424,8 +2417,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
+   } else {
+     int len = x->length();
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, x->key_at(i));
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2935,8 +2927,8 @@ void LIRGenerator::do_IfOp(IfOp* x) {
+   f_val.dont_load_item();
+   LIR_Opr reg = rlock_result(x);
+ 
+-  __ cmp(lir_cond(x->cond()), left.result(), right.result());
+-  __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
++  __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(),
++               t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
+ }
+ 
+ #ifdef JFR_HAVE_INTRINSICS
+@@ -2980,8 +2972,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) {
+   __ move(LIR_OprFact::oopConst(NULL), result);
+   LIR_Opr jobj = new_register(T_METADATA);
+   __ move_wide(jobj_addr, jobj);
+-  __ cmp(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0));
+-  __ branch(lir_cond_equal, T_OBJECT, L_end->label());
++  __ cmp_branch(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0), T_OBJECT, L_end->label());
+ 
+   access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result);
+ 
+@@ -3286,21 +3277,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) {
+ 
+ void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) {
+   if (compilation()->count_backedges()) {
++    LIR_Opr step = new_register(T_INT);
++    LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment);
++    LIR_Opr zero = LIR_OprFact::intConst(0);
+ #if defined(X86) && !defined(_LP64)
+     // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy.
+     LIR_Opr left_copy = new_register(left->type());
+     __ move(left, left_copy);
+     __ cmp(cond, left_copy, right);
+-#else
+-    __ cmp(cond, left, right);
+-#endif
+-    LIR_Opr step = new_register(T_INT);
+-    LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment);
+-    LIR_Opr zero = LIR_OprFact::intConst(0);
+     __ cmove(cond,
+         (left_bci < bci) ? plus_one : zero,
+         (right_bci < bci) ? plus_one : zero,
+         step, left->type());
++#else
++    __ cmp_cmove(cond, left, right,
++                 (left_bci < bci) ? plus_one : zero,
++                 (right_bci < bci) ? plus_one : zero,
++                 step, left->type());
++#endif
+     increment_backedge_counter(info, step, bci);
+   }
+ }
+@@ -3339,8 +3333,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) {
+     // DeoptimizeStub will reexecute from the current state in code info.
+     CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured,
+                                          Deoptimization::Action_make_not_entrant);
+-    __ cmp(lir_cond_lessEqual, result, LIR_OprFact::intConst(0));
+-    __ branch(lir_cond_lessEqual, T_INT, deopt);
++    __ cmp_branch(lir_cond_lessEqual, result, LIR_OprFact::intConst(0), T_INT, deopt);
+   }
+ }
+ 
+@@ -3386,8 +3379,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
+     int freq = frequency << InvocationCounter::count_shift;
+     if (freq == 0) {
+       if (!step->is_constant()) {
+-        __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0));
+-        __ branch(lir_cond_notEqual, T_ILLEGAL, overflow);
++        __ cmp_branch(lir_cond_notEqual, step, LIR_OprFact::intConst(0), T_ILLEGAL, overflow);
+       } else {
+         __ branch(lir_cond_always, T_ILLEGAL, overflow);
+       }
+@@ -3395,12 +3387,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
+       LIR_Opr mask = load_immediate(freq, T_INT);
+       if (!step->is_constant()) {
+         // If step is 0, make sure the overflow check below always fails
+-        __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0));
+-        __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT);
++        __ cmp_cmove(lir_cond_notEqual, step, LIR_OprFact::intConst(0),
++                     result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT);
+       }
+       __ logical_and(result, mask, result);
+-      __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
+-      __ branch(lir_cond_equal, T_INT, overflow);
++      __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow);
+     }
+     __ branch_destination(overflow->continuation());
+   }
+@@ -3513,8 +3504,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) {
+     CodeEmitInfo *info = state_for(x, x->state());
+     CodeStub* stub = new PredicateFailedStub(info);
+ 
+-    __ cmp(lir_cond(cond), left, right);
+-    __ branch(lir_cond(cond), right->type(), stub);
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), stub);
+   }
+ }
+ 
+@@ -3661,8 +3651,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*&
+   __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
+   int diffbit = Klass::layout_helper_boolean_diffbit();
+   __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout);
+-  __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0));
+-  __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE);
++  __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0),
++               value_fixed, value, value_fixed, T_BYTE);
+   value = value_fixed;
+   return value;
+ }
+diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp
+index 3ad325d759..f377b27859 100644
+--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp
++++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp
+@@ -363,8 +363,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
+   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
+ 
+   // machine dependent
+-  void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+-  void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info);
+ 
+   void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type);
+ 
+@@ -391,7 +393,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
+ 
+   LIR_Opr safepoint_poll_register();
+ 
+-  void profile_branch(If* if_instr, If::Condition cond);
++  void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr);
+   void increment_event_counter_impl(CodeEmitInfo* info,
+                                     ciMethod *method, LIR_Opr step, int frequency,
+                                     int bci, bool backedge, bool notify);
+diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
+index c28055fd99..4e7df88102 100644
+--- a/src/hotspot/share/c1/c1_LinearScan.cpp
++++ b/src/hotspot/share/c1/c1_LinearScan.cpp
+@@ -35,6 +35,12 @@
+ #include "runtime/timerTrace.hpp"
+ #include "utilities/bitMap.inline.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef PRODUCT
+ 
+   static LinearScanStatistic _stat_before_alloc;
+@@ -1258,6 +1264,23 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+       }
+       break;
+     }
++    case lir_cmp_cmove: {
++      assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4");
++      LIR_Op4* cmove = (LIR_Op4*)op;
++
++      LIR_Opr move_from = cmove->in_opr3();
++      LIR_Opr move_to = cmove->result_opr();
++
++      if (move_to->is_register() && move_from->is_register()) {
++        Interval* from = interval_at(reg_num(move_from));
++        Interval* to = interval_at(reg_num(move_to));
++        if (from != NULL && to != NULL) {
++          to->set_register_hint(from);
++          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
++        }
++      }
++      break;
++    }
+     default:
+       break;
+   }
+@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
+           check_live = (move->patch_code() == lir_patch_none);
+         }
+         LIR_OpBranch* branch = op->as_OpBranch();
+-        if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) {
++        LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch();
++        if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) ||
++            (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) {
+           // Don't bother checking the stub in this case since the
+           // exception stub will never return to normal control flow.
+           check_live = false;
+@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
+       assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
+       LIR_OpBranch* branch = (LIR_OpBranch*)op;
+ 
++      if (branch->block() == target_from) {
++        branch->change_block(target_to);
++      }
++      if (branch->ublock() == target_from) {
++        branch->change_ublock(target_to);
++      }
++    } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) {
++      assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++      LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op;
++
+       if (branch->block() == target_from) {
+         branch->change_block(target_to);
+       }
+@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+                 }
+               }
+             }
++          } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) {
++            assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++            LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op;
++
++            if (prev_branch->stub() == NULL) {
++              if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
++                TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
++
++                // eliminate a conditional branch to the immediate successor
++                prev_branch->change_block(last_branch->block());
++                prev_branch->negate_cond();
++                instructions->trunc_to(instructions->length() - 1);
++              }
++            }
+           }
+         }
+       }
+@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
+         assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
+         assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
+       }
++
++      LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch();
++
++      if (op_cmp_branch != NULL) {
++        assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid");
++        assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid");
++      }
+     }
+ 
+     for (j = 0; j < block->number_of_sux() - 1; j++) {
+@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
+           break;
+         }
+ 
++        case lir_cmp_branch:
++        case lir_cmp_float_branch: {
++          LIR_OpCmpBranch* branch = op->as_OpCmpBranch();
++          if (branch->block() == NULL) {
++            inc_counter(counter_stub_branch);
++          } else {
++            inc_counter(counter_cond_branch);
++          }
++          inc_counter(counter_cmp);
++          break;
++        }
++
++        case lir_cmp_cmove: {
++          inc_counter(counter_misc_inst);
++          inc_counter(counter_cmp);
++          break;
++        }
++
+         case lir_neg:
+         case lir_add:
+         case lir_sub:
+diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
+index 747971af41..093831ac09 100644
+--- a/src/hotspot/share/code/nmethod.cpp
++++ b/src/hotspot/share/code/nmethod.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "code/codeCache.hpp"
+@@ -2155,7 +2161,8 @@ void nmethod::verify_scopes() {
+         //verify_interrupt_point(iter.addr());
+         break;
+       case relocInfo::runtime_call_type:
+-      case relocInfo::runtime_call_w_cp_type: {
++      NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++      {
+         address destination = iter.reloc()->value();
+         // Right now there is no way to find out which entries support
+         // an interrupt point.  It would be nice if we had this
+@@ -2392,7 +2399,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) {
+           return st.as_string();
+         }
+         case relocInfo::runtime_call_type:
+-        case relocInfo::runtime_call_w_cp_type: {
++        NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++        {
+           stringStream st;
+           st.print("runtime_call");
+           CallRelocation* r = (CallRelocation*)iter.reloc();
+diff --git a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp
+index a20de8dde6..c6f49cf7d6 100644
+--- a/src/hotspot/share/code/relocInfo.cpp
++++ b/src/hotspot/share/code/relocInfo.cpp
+@@ -433,6 +433,7 @@ void virtual_call_Relocation::unpack_data() {
+   _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point);
+ }
+ 
++#ifndef MIPS64
+ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) {
+   short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2));
+   dest->set_locs_end((relocInfo*) p);
+@@ -441,6 +442,7 @@ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) {
+ void runtime_call_w_cp_Relocation::unpack_data() {
+   _offset = unpack_1_int() << 2;
+ }
++#endif
+ 
+ void static_stub_Relocation::pack_data_to(CodeSection* dest) {
+   short* p = (short*) dest->locs_end();
+@@ -910,7 +912,7 @@ void RelocIterator::print_current() {
+       break;
+     }
+   case relocInfo::runtime_call_type:
+-  case relocInfo::runtime_call_w_cp_type:
++  NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
+     {
+       CallRelocation* r = (CallRelocation*) reloc();
+       tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination()));
+diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp
+index 57931a1a6a..fb56fd3ab1 100644
+--- a/src/hotspot/share/code/relocInfo.hpp
++++ b/src/hotspot/share/code/relocInfo.hpp
+@@ -269,7 +269,11 @@ class relocInfo {
+     poll_return_type        = 11, // polling instruction for safepoints at return
+     metadata_type           = 12, // metadata that used to be oops
+     trampoline_stub_type    = 13, // stub-entry for trampoline
++#ifndef MIPS64
+     runtime_call_w_cp_type  = 14, // Runtime call which may load its target from the constant pool
++#else
++    internal_pc_type        = 14, // tag for internal data
++#endif
+     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
+     type_mask               = 15  // A mask which selects only the above values
+   };
+@@ -304,13 +308,13 @@ class relocInfo {
+     visitor(static_call) \
+     visitor(static_stub) \
+     visitor(runtime_call) \
+-    visitor(runtime_call_w_cp) \
++    NOT_MIPS64(visitor(runtime_call_w_cp)) \
+     visitor(external_word) \
+     visitor(internal_word) \
+     visitor(poll) \
+     visitor(poll_return) \
+-    visitor(section_word) \
+     visitor(trampoline_stub) \
++    NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc)))
+ 
+ 
+  public:
+@@ -1174,6 +1178,15 @@ class runtime_call_Relocation : public CallRelocation {
+ };
+ 
+ 
++#ifdef MIPS64
++// to handle the set_last_java_frame pc
++class internal_pc_Relocation : public Relocation {
++  relocInfo::relocType type() { return relocInfo::internal_pc_type; }
++ public:
++  address pc() { return pd_get_address_from_code(); }
++  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
++};
++#else
+ class runtime_call_w_cp_Relocation : public CallRelocation {
+   relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; }
+ 
+@@ -1202,6 +1215,7 @@ class runtime_call_w_cp_Relocation : public CallRelocation {
+   void pack_data_to(CodeSection * dest);
+   void unpack_data();
+ };
++#endif
+ 
+ // Trampoline Relocations.
+ // A trampoline allows to encode a small branch in the code, even if there
+diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp
+index 7892cc85b3..3637aefe10 100644
+--- a/src/hotspot/share/code/vtableStubs.cpp
++++ b/src/hotspot/share/code/vtableStubs.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/vtableStubs.hpp"
+ #include "compiler/compileBroker.hpp"
+@@ -98,7 +104,11 @@ int VtableStubs::_itab_stub_size = 0;
+ 
+ #if defined(PRODUCT)
+   // These values are good for the PRODUCT case (no tracing).
++#if defined MIPS64 || defined LOONGARCH64
++  static const int first_vtableStub_size = 128;
++#else
+   static const int first_vtableStub_size =  64;
++#endif
+   static const int first_itableStub_size = 256;
+ #else
+   // These values are good for the non-PRODUCT case (when tracing can be switched on).
+@@ -109,6 +119,7 @@ int VtableStubs::_itab_stub_size = 0;
+   //               vtable  itable
+   // aarch64:         460     324
+   // arm:               ?       ?
++  // mips64:          728     328
+   // ppc (linux, BE): 404     288
+   // ppc (linux, LE): 356     276
+   // ppc (AIX):       416     296
+diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
+index 4289e5e5c4..9502463bd5 100644
+--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
++++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
+@@ -74,7 +74,6 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
+   // Read the marking-in-progress flag.
+   LIR_Opr flag_val = gen->new_register(T_INT);
+   __ load(mark_active_flag_addr, flag_val);
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   LIR_PatchCode pre_val_patch_code = lir_patch_none;
+ 
+@@ -103,7 +102,7 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
+     slow = new G1PreBarrierStub(pre_val);
+   }
+ 
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -168,10 +167,9 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprD
+   }
+   assert(new_val->is_register(), "must be a register at this point");
+ 
+-  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+-
+   CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+-  __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
++  __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD),
++                LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+diff --git a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
+index 98a2fe7f1c..b43a441066 100644
+--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ 
+@@ -71,6 +77,7 @@ template <class T> inline void G1FullGCMarker::mark_and_push(T* p) {
+       _oop_stack.push(obj);
+       assert(_bitmap->is_marked(obj), "Must be marked now - map self");
+     } else {
++      DEBUG_ONLY(OrderAccess::loadload());
+       assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj),
+              "Must be marked by other or closed archive object");
+     }
+diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
+index 1ef900783d..b30456429d 100644
+--- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
++++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
+@@ -51,8 +51,9 @@ template <class T>
+ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
+   if (p != NULL) { // XXX: error if p != NULL here
+     oop o = RawAccess<IS_NOT_NULL>::oop_load(p);
+-    if (o->is_forwarded()) {
+-      o = o->forwardee();
++    markOop m = o->mark_raw();
++    if (m->is_marked()) {
++      o = (oop) m->decode_pointer();
+       // Card mark
+       if (PSScavenge::is_obj_in_young(o)) {
+         PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
+@@ -282,13 +283,17 @@ inline void PSPromotionManager::copy_and_push_safe_barrier(T* p) {
+   assert(should_scavenge(p, true), "revisiting object?");
+ 
+   oop o = RawAccess<IS_NOT_NULL>::oop_load(p);
+-  oop new_obj = o->is_forwarded()
+-        ? o->forwardee()
+-        : copy_to_survivor_space<promote_immediately>(o);
++  oop new_obj;
++  markOop m = o->mark_raw();
++  if (m->is_marked()) {
++    new_obj = (oop) m->decode_pointer();
++  } else {
++    new_obj = copy_to_survivor_space<promote_immediately>(o);
++  }
+ 
+   // This code must come after the CAS test, or it will print incorrect
+   // information.
+-  if (log_develop_is_enabled(Trace, gc, scavenge) && o->is_forwarded()) {
++  if (log_develop_is_enabled(Trace, gc, scavenge) && m->is_marked()) {
+     log_develop_trace(gc, scavenge)("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
+                       "forwarding",
+                       new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
+diff --git a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
+index 0c58fd4b3f..415990ff5f 100644
+--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
++++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
+@@ -104,8 +104,9 @@ class PSScavengeFromCLDClosure: public OopClosure {
+ 
+       oop o = *p;
+       oop new_obj;
+-      if (o->is_forwarded()) {
+-        new_obj = o->forwardee();
++      markOop m = o->mark_raw();
++      if (m->is_marked()) {
++        new_obj = (oop) m->decode_pointer();
+       } else {
+         new_obj = _pm->copy_to_survivor_space</*promote_immediately=*/false>(o);
+       }
+diff --git a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
+index 5241322a91..0ddabb4dae 100644
+--- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
++++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
+@@ -192,8 +192,7 @@ void BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
+   /* Normalize boolean value returned by unsafe operation, i.e., value  != 0 ? value = true : value false. */
+   if (mask_boolean) {
+     LabelObj* equalZeroLabel = new LabelObj();
+-    __ cmp(lir_cond_equal, result, 0);
+-    __ branch(lir_cond_equal, T_BOOLEAN, equalZeroLabel->label());
++    __ cmp_branch(lir_cond_equal, result, 0, T_BOOLEAN, equalZeroLabel->label());
+     __ move(LIR_OprFact::intConst(1), result);
+     __ branch_destination(equalZeroLabel->label());
+   }
+@@ -320,14 +319,12 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) {
+         referent_off = gen->new_register(T_LONG);
+         __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
+       }
+-      __ cmp(lir_cond_notEqual, offset, referent_off);
+-      __ branch(lir_cond_notEqual, offset->type(), cont->label());
++      __ cmp_branch(lir_cond_notEqual, offset, referent_off, offset->type(), cont->label());
+     }
+     if (gen_source_check) {
+       // offset is a const and equals referent offset
+       // if (source == null) -> continue
+-      __ cmp(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL));
+-      __ branch(lir_cond_equal, T_OBJECT, cont->label());
++      __ cmp_branch(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, cont->label());
+     }
+     LIR_Opr src_klass = gen->new_register(T_METADATA);
+     if (gen_type_check) {
+@@ -337,8 +334,7 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) {
+       LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE);
+       LIR_Opr reference_type = gen->new_register(T_INT);
+       __ move(reference_type_addr, reference_type);
+-      __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+-      __ branch(lir_cond_equal, T_INT, cont->label());
++      __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, cont->label());
+     }
+   }
+ }
+diff --git a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp
+index 84815adea8..57e29f1295 100644
+--- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp
+@@ -89,8 +89,7 @@ void CardTableBarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, L
+     __ move(card_addr, cur_value);
+ 
+     LabelObj* L_already_dirty = new LabelObj();
+-    __ cmp(lir_cond_equal, cur_value, dirty);
+-    __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label());
++    __ cmp_branch(lir_cond_equal, cur_value, dirty, T_BYTE, L_already_dirty->label());
+     __ move(dirty, card_addr);
+     __ branch_destination(L_already_dirty->label());
+   } else {
+diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
+index f51d186484..506f0301fe 100644
+--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
+@@ -73,7 +73,6 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info,
+   // Read the marking-in-progress flag.
+   LIR_Opr flag_val = gen->new_register(T_INT);
+   __ load(mark_active_flag_addr, flag_val);
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   LIR_PatchCode pre_val_patch_code = lir_patch_none;
+ 
+@@ -101,7 +100,7 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info,
+     slow = new ShenandoahPreBarrierStub(pre_val);
+   }
+ 
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -144,10 +143,9 @@ LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, L
+     __ logical_and(flag_val, mask_reg, masked_flag);
+     flag_val = masked_flag;
+   }
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2);
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ 
+   return result;
+diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+index 9f8ce74243..3c1862d826 100644
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+@@ -105,15 +105,20 @@ public:
+ 
+   virtual void visit(LIR_OpVisitState* state) {
+     state->do_input(_opr);
++    if (_result->is_valid()) {
++      state->do_temp(_opr);
++      state->do_output(_result);
++    }
+   }
+ 
+   virtual void emit_code(LIR_Assembler* ce) {
+-    ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr);
++    ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr, result_opr());
+   }
+ 
+   virtual void print_instr(outputStream* out) const {
+     _opr->print(out);
+     out->print(" ");
++    result_opr()->print(out);
+   }
+ 
+ #ifndef PRODUCT
+@@ -149,13 +154,21 @@ address ZBarrierSetC1::load_barrier_on_oop_field_preloaded_runtime_stub(Decorato
+ #endif
+ 
+ void ZBarrierSetC1::load_barrier(LIRAccess& access, LIR_Opr result) const {
++  LIR_Op* op = new LIR_OpZLoadBarrierTest(result);
++
+   // Fast path
+-  __ append(new LIR_OpZLoadBarrierTest(result));
++  __ append(op);
+ 
+   // Slow path
+   const address runtime_stub = load_barrier_on_oop_field_preloaded_runtime_stub(access.decorators());
+   CodeStub* const stub = new ZLoadBarrierStubC1(access, result, runtime_stub);
+-  __ branch(lir_cond_notEqual, T_ADDRESS, stub);
++  if (ZPlatformLoadBarrierTestResultInRegister) {
++    LIR_Opr res = access.gen()->new_register(result->type());
++    op->set_result_opr(res);
++    __ cmp_branch(lir_cond_notEqual, res, LIR_OprFact::intptrConst(NULL_WORD), T_ADDRESS, stub);
++  } else {
++    __ branch(lir_cond_notEqual, T_ADDRESS, stub);
++  }
+   __ branch_destination(stub->continuation());
+ }
+ 
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+index 24e4c98175..a6b310290d 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/javaClasses.inline.hpp"
+ #include "classfile/systemDictionary.hpp"
+@@ -1506,7 +1512,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
+   // preparing the same method will be sure to see non-null entry & mirror.
+ IRT_END
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+ IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
+   if (src_address == dest_address) {
+     return;
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+index 87e84c893f..3043fa634b 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ 
+@@ -146,7 +152,7 @@ class InterpreterRuntime: AllStatic {
+                                         Method* method,
+                                         intptr_t* from, intptr_t* to);
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+   // Popframe support (only needed on x86, AMD64 and ARM)
+   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
+ #endif
+diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+index 965f6b0d10..07942993cd 100644
+--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ 
+@@ -114,9 +120,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator {
+   void restore_native_result(void);
+ #endif // SPARC
+ 
+-#ifdef AARCH64
++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64)
+   void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
+-#endif // AARCH64
++#endif // AARCH64 || MIPS64 || LOONGARCH64
+ 
+ #ifdef PPC
+   void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false);
+diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+index e01a242a57..0661f3b9d1 100644
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ #define SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ 
+@@ -102,7 +108,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+index 8927063330..b5bb5c2887 100644
+--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/codeBlob.hpp"
+ #include "compiler/abstractCompiler.hpp"
+@@ -715,6 +721,35 @@
+ #endif
+ 
+ 
++#ifdef LOONGARCH64
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
++  declare_constant(VM_Version::CPU_LA32)           \
++  declare_constant(VM_Version::CPU_LA64)           \
++  declare_constant(VM_Version::CPU_LLEXC)          \
++  declare_constant(VM_Version::CPU_SCDLY)          \
++  declare_constant(VM_Version::CPU_LLDBAR)         \
++  declare_constant(VM_Version::CPU_LBT_X86)        \
++  declare_constant(VM_Version::CPU_LBT_ARM)        \
++  declare_constant(VM_Version::CPU_LBT_MIPS)       \
++  declare_constant(VM_Version::CPU_CCDMA)          \
++  declare_constant(VM_Version::CPU_COMPLEX)        \
++  declare_constant(VM_Version::CPU_FP)             \
++  declare_constant(VM_Version::CPU_CRYPTO)         \
++  declare_constant(VM_Version::CPU_LSX)            \
++  declare_constant(VM_Version::CPU_LASX)           \
++  declare_constant(VM_Version::CPU_LAM)            \
++  declare_constant(VM_Version::CPU_LLSYNC)         \
++  declare_constant(VM_Version::CPU_TGTSYNC)        \
++  declare_constant(VM_Version::CPU_ULSYNC)         \
++  declare_constant(VM_Version::CPU_UAL)
++
++#endif
++
++
+ #ifdef X86
+ 
+ #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
+index 80958b0469..08d13a4189 100644
+--- a/src/hotspot/share/memory/metaspace.cpp
++++ b/src/hotspot/share/memory/metaspace.cpp
+@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+   // Don't use large pages for the class space.
+   bool large_pages = false;
+ 
+-#if !(defined(AARCH64) || defined(PPC64))
++#if !(defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64))
+   ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(),
+                                              _reserve_alignment,
+                                              large_pages,
+                                              requested_addr);
+-#else // AARCH64 || PPC64
++#else // AARCH64 || PPC64 || MIPS64 || LOONGARCH64
+ 
+   ReservedSpace metaspace_rs;
+ 
+@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+     // below 32g to get a zerobased CCS. For simplicity we reuse the search
+     // strategy for AARCH64.
+ 
+-    size_t increment = AARCH64_ONLY(4*)G;
++    // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization.
++    size_t increment = AARCH64_ONLY(4*)MIPS64_ONLY(4*)LOONGARCH64_ONLY(4*)G;
+     for (char *a = align_up(requested_addr, increment);
+          a < (char*)(1024*G);
+          a += increment) {
+@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+     }
+   }
+ 
+-#endif // AARCH64 || PPC64
++#endif // AARCH64 || PPC64 || MIPS64 || LOONGARCH64
+ 
+   if (!metaspace_rs.is_reserved()) {
+ #if INCLUDE_CDS
+diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp
+index 6c631f5458..9865106720 100644
+--- a/src/hotspot/share/oops/oop.inline.hpp
++++ b/src/hotspot/share/oops/oop.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP
+ #define SHARE_VM_OOPS_OOP_INLINE_HPP
+ 
+@@ -389,7 +395,7 @@ oop oopDesc::forward_to_atomic(oop p, atomic_memory_order order) {
+     // forwarding pointer.
+     oldMark = curMark;
+   }
+-  return forwardee();
++  return (oop)oldMark->decode_pointer();
+ }
+ 
+ // Note that the forwardee is not the same thing as the displaced_mark.
+diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp
+index 569fbc6d69..c1f1b82ffa 100644
+--- a/src/hotspot/share/opto/compile.hpp
++++ b/src/hotspot/share/opto/compile.hpp
+@@ -1204,7 +1204,7 @@ class Compile : public Phase {
+   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
+ 
+   enum ScratchBufferBlob {
+-#if defined(PPC64)
++#if defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)
+     MAX_inst_size       = 2048,
+ #else
+     MAX_inst_size       = 1024,
+diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
+index b6540e06a3..52d1fc9fb9 100644
+--- a/src/hotspot/share/opto/output.cpp
++++ b/src/hotspot/share/opto/output.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/assembler.inline.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+@@ -731,6 +737,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
+   // Add the safepoint in the DebugInfoRecorder
+   if( !mach->is_MachCall() ) {
+     mcall = NULL;
++#if defined(MIPS) || defined(LOONGARCH)
++    // safepoint_pc_offset should point to tha last instruction in safePoint.
++    // In X86 and sparc, their safePoints only contain one instruction.
++    // However, we should add current_offset with the size of safePoint in MIPS.
++    // 0x2d6ff22c: lw s2, 0x14(s2)
++    // last_pd->pc_offset()=308, pc_offset=304, bci=64
++    // last_pd->pc_offset()=312, pc_offset=312, bci=64
++    // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc")
++    //
++    // ;; Safepoint:
++    // ---> pc_offset=304
++    // 0x2d6ff230: lui at, 0x2b7a            ; OopMap{s2=Oop s5=Oop t4=Oop off=308}
++    //                                       ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    // ---> last_pd(308)
++    // 0x2d6ff234: lw at, 0xffffc100(at)     ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    //                                       ;   {poll}
++    // 0x2d6ff238: addiu s0, zero, 0x0
++    safepoint_pc_offset += sfn->size(_regalloc) - 4;
++#endif
+     debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+   } else {
+     mcall = mach->as_MachCall();
+@@ -1393,6 +1420,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+       DEBUG_ONLY(uint instr_offset = cb->insts_size());
+       n->emit(*cb, _regalloc);
+       current_offset  = cb->insts_size();
++#if defined(MIPS) || defined(LOONGARCH)
++      if (!n->is_Proj() && (cb->insts()->end() != badAddress)) {
++        // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime
++        // is not the instruction which access memory. adjust is needed. previous_offset points to the
++        // instruction which access memory. Instruction size is 4. cb->insts_size() and
++        // cb->insts()->end() are the location of current instruction.
++        int adjust = 4;
++        NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4);
++        if (inst->is_sync()) {
++          // a sync may be the last instruction, see store_B_immI_enc_sync
++          adjust += 4;
++          inst = (NativeInstruction*) (cb->insts()->end() - 8);
++        }
++        previous_offset = current_offset - adjust;
++      }
++#endif
+ 
+       // Above we only verified that there is enough space in the instruction section.
+       // However, the instruction may emit stubs that cause code buffer expansion.
+diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
+index 7d767c47c9..23ec34e5e2 100644
+--- a/src/hotspot/share/opto/type.cpp
++++ b/src/hotspot/share/opto/type.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "ci/ciMethodData.hpp"
+ #include "ci/ciTypeFlow.hpp"
+@@ -78,6 +84,12 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
+   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
+   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
++#elif defined(LOONGARCH64)
++  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
++  { Bad,             T_ILLEGAL,    "vectord:",      false, 0,                    relocInfo::none          },  // VectorD
++  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
++  { Bad,             T_ILLEGAL,    "vectory:",      false, Op_VecY,              relocInfo::none          },  // VectorY
++  { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
+ #else // all other
+   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
+   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
+diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp
+index 84123b29ec..77fbacf2d8 100644
+--- a/src/hotspot/share/runtime/java.cpp
++++ b/src/hotspot/share/runtime/java.cpp
+@@ -68,6 +68,7 @@
+ #include "runtime/thread.inline.hpp"
+ #include "runtime/timer.hpp"
+ #include "runtime/vmOperations.hpp"
++#include "runtime/vmThread.hpp"
+ #include "services/memTracker.hpp"
+ #include "utilities/dtrace.hpp"
+ #include "utilities/globalDefinitions.hpp"
+diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
+index e0f4a2af1f..09cc4b1ba5 100644
+--- a/src/hotspot/share/runtime/os.cpp
++++ b/src/hotspot/share/runtime/os.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "classfile/classLoader.hpp"
+@@ -1242,7 +1248,8 @@ bool os::is_first_C_frame(frame* fr) {
+   if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true;
+ 
+   uintptr_t old_fp = (uintptr_t)fr->link_or_null();
+-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp ||
++  // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs.
++  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) ||
+     is_pointer_bad(fr->link_or_null())) return true;
+ 
+   // stack grows downwards; if old_fp is below current fp or if the stack
+diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+index e086f794cd..f480195775 100644
+--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++
+ #include "precompiled.hpp"
+ #include "jni.h"
+ #include "runtime/interfaceSupport.inline.hpp"
+@@ -512,6 +519,14 @@ static int __ieee754_rem_pio2(double x, double *y) {
+  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
+  */
+ 
++#if defined(MIPS)|| defined(LOONGARCH)
++#undef S1
++#undef S2
++#undef S3
++#undef S4
++#undef S5
++#undef S6
++#endif
+ static const double
+ S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+ S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
+index c758fc5743..a8c4638f6a 100644
+--- a/src/hotspot/share/utilities/globalDefinitions.hpp
++++ b/src/hotspot/share/utilities/globalDefinitions.hpp
+@@ -1161,6 +1161,15 @@ inline int exact_log2_long(jlong x) {
+   return log2_long(x);
+ }
+ 
++#if defined(MIPS64) || defined(LOONGARCH64)
++// returns integer round-up to the nearest multiple of s (s must be a power of two)
++inline intptr_t round_to(intptr_t x, uintx s) {
++  assert(is_power_of_2(s), "s must be a power of 2: " JLONG_FORMAT, x);
++  const uintx m = s - 1;
++  return mask_bits(x + m, ~m);
++}
++#endif
++
+ inline bool is_odd (intx x) { return x & 1;      }
+ inline bool is_even(intx x) { return !is_odd(x); }
+ 
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index cf80253868..f611daf36d 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_UTILITIES_MACROS_HPP
+ #define SHARE_VM_UTILITIES_MACROS_HPP
+ 
+@@ -531,6 +537,38 @@
+ #define NOT_SPARC(code) code
+ #endif
+ 
++#ifdef MIPS64
++#ifndef MIPS
++#define MIPS
++#endif
++#define MIPS64_ONLY(code) code
++#define NOT_MIPS64(code)
++#else
++#undef MIPS
++#define MIPS64_ONLY(code)
++#define NOT_MIPS64(code) code
++#endif
++
++#ifdef LOONGARCH64
++#ifndef LOONGARCH
++#define LOONGARCH
++#endif
++#define LOONGARCH64_ONLY(code) code
++#define NOT_LOONGARCH64(code)
++#else
++#undef LOONGARCH
++#define LOONGARCH64_ONLY(code)
++#define NOT_LOONGARCH64(code) code
++#endif
++
++#if defined(MIPS64) || defined(LOONGARCH64)
++#define LOONGARCH64_AND_MIPS64_ONLY(code) code
++#define NOT_LOONGARCH64_AND_MIPS64(code)
++#else
++#define LOONGARCH64_AND_MIPS64_ONLY(code)
++#define NOT_LOONGARCH64_AND_MIPS64(code) code
++#endif
++
+ #if defined(PPC32) || defined(PPC64)
+ #ifndef PPC
+ #define PPC
+@@ -623,16 +661,34 @@
+ //   OS_CPU_HEADER(vmStructs)          --> vmStructs_linux_sparc.hpp
+ //
+ // basename<cpu>.hpp / basename<cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _mips.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _mips.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _loongarch.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _loongarch.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _loongarch.inline.hpp)
++#else
+ #define CPU_HEADER_H(basename)         XSTR(CPU_HEADER_STEM(basename).h)
+ #define CPU_HEADER(basename)           XSTR(CPU_HEADER_STEM(basename).hpp)
+ #define CPU_HEADER_INLINE(basename)    XSTR(CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<os>.hpp / basename<os>.inline.hpp
+ #define OS_HEADER_H(basename)          XSTR(OS_HEADER_STEM(basename).h)
+ #define OS_HEADER(basename)            XSTR(OS_HEADER_STEM(basename).hpp)
+ #define OS_HEADER_INLINE(basename)     XSTR(OS_HEADER_STEM(basename).inline.hpp)
+ // basename<os><cpu>.hpp / basename<os><cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_mips.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_loongarch.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp)
++#else
+ #define OS_CPU_HEADER(basename)        XSTR(OS_CPU_HEADER_STEM(basename).hpp)
+ #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<compiler>.hpp / basename<compiler>.inline.hpp
+ #define COMPILER_HEADER(basename)        XSTR(COMPILER_HEADER_STEM(basename).hpp)
+ #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp)
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 0d834302c5..6afafea095 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #include <jni.h>
+ #include "libproc.h"
+ #include "proc_service.h"
+@@ -54,10 +61,18 @@
+ #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h"
+ #endif
+ 
++#if defined(mips64) || defined(mips64el)
++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h"
++#endif
++
+ #ifdef aarch64
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef loongarch64
++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h"
++#endif
++
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -397,7 +412,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(loongarch64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -425,8 +440,14 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+ #endif
++#ifdef loongarch64
++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG
++#endif
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
++#endif
++#if defined(mips64) || defined(mips64el)
++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG
+ #endif
+ 
+ 
+@@ -534,6 +555,18 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   }
+ #endif /* aarch64 */
+ 
++#if defined(loongarch64)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg
++
++  {
++    int i;
++    for (i = 0; i < 31; i++)
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.csr_era;
++  }
++#endif /* loongarch64 */
++
+ #if defined(ppc64) || defined(ppc64le)
+ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
+ 
+@@ -574,6 +607,45 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ 
+ #endif
+ 
++#if defined(mips64) || defined(mips64el)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg
++
++  regs[REG_INDEX(ZERO)]  = gregs.regs[0];
++  regs[REG_INDEX(AT)]  = gregs.regs[1];
++  regs[REG_INDEX(V0)]  = gregs.regs[2];
++  regs[REG_INDEX(V1)]  = gregs.regs[3];
++  regs[REG_INDEX(A0)]  = gregs.regs[4];
++  regs[REG_INDEX(A1)]  = gregs.regs[5];
++  regs[REG_INDEX(A2)]  = gregs.regs[6];
++  regs[REG_INDEX(A3)]  = gregs.regs[7];
++  regs[REG_INDEX(T0)]  = gregs.regs[8];
++  regs[REG_INDEX(T1)]  = gregs.regs[9];
++  regs[REG_INDEX(T2)]  = gregs.regs[10];
++  regs[REG_INDEX(T3)]  = gregs.regs[11];
++  regs[REG_INDEX(T4)]  = gregs.regs[12];
++  regs[REG_INDEX(T5)]  = gregs.regs[13];
++  regs[REG_INDEX(T6)]  = gregs.regs[14];
++  regs[REG_INDEX(T7)]  = gregs.regs[15];
++  regs[REG_INDEX(S0)]  = gregs.regs[16];
++  regs[REG_INDEX(S1)]  = gregs.regs[17];
++  regs[REG_INDEX(S2)]  = gregs.regs[18];
++  regs[REG_INDEX(S3)]  = gregs.regs[19];
++  regs[REG_INDEX(S4)]  = gregs.regs[20];
++  regs[REG_INDEX(S5)]  = gregs.regs[21];
++  regs[REG_INDEX(S6)]  = gregs.regs[22];
++  regs[REG_INDEX(S7)]  = gregs.regs[23];
++  regs[REG_INDEX(T8)]  = gregs.regs[24];
++  regs[REG_INDEX(T9)]  = gregs.regs[25];
++  regs[REG_INDEX(K0)]  = gregs.regs[26];
++  regs[REG_INDEX(K1)]  = gregs.regs[27];
++  regs[REG_INDEX(GP)]  = gregs.regs[28];
++  regs[REG_INDEX(SP)]  = gregs.regs[29];
++  regs[REG_INDEX(FP)]  = gregs.regs[30];
++  regs[REG_INDEX(S8)]  = gregs.regs[30];
++  regs[REG_INDEX(RA)]  = gregs.regs[31];
++#endif /* mips */
++
+   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
+   return array;
+ }
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+index 8318e8e021..07064e76ee 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #ifndef _LIBPROC_H_
+ #define _LIBPROC_H_
+ 
+@@ -37,13 +44,17 @@
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
+-#if defined(aarch64) || defined(arm64)
++#if defined(aarch64) || defined(arm64) || defined(loongarch64)
+ #include <asm/ptrace.h>
+ #define user_regs_struct user_pt_regs
+ #elif defined(arm)
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el)
++#include <asm/ptrace.h>
++#define user_regs_struct  pt_regs
++#endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+ // it would be a mistake to equivalence it to C++ bool on many platforms
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+index de5254d859..eefe55959c 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+ #define PTRACE_GETREGS_REQ PT_GETREGS
+ #endif
+ 
+-#ifdef PTRACE_GETREGS_REQ
++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+    print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
+    return false;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index 0f5f0119c7..1b2f11a065 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
+ package sun.jvm.hotspot;
+ 
+ import java.rmi.RemoteException;
+@@ -39,6 +45,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64;
++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64;
+ import sun.jvm.hotspot.debugger.NoSuchSymbolException;
+ import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal;
+ import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal;
+@@ -598,6 +606,10 @@ public class HotSpotAgent {
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
++        } else if (cpu.equals("mips64")) {
++            machDesc = new MachineDescriptionMIPS64();
++        } else if (cpu.equals("loongarch64")) {
++            machDesc = new MachineDescriptionLOONGARCH64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+new file mode 100644
+index 0000000000..99cea8c7f1
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return false;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+new file mode 100644
+index 0000000000..1b49efd201
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return "big".equals(System.getProperty("sun.cpu.endian"));
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index 5e5a6bb714..7d7f6424e6 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.io.*;
+@@ -34,12 +40,16 @@ import sun.jvm.hotspot.debugger.x86.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -102,7 +112,21 @@ class LinuxCDebugger implements CDebugger {
+        Address pc  = context.getRegisterAsAddress(SPARCThreadContext.R_O7);
+        if (pc == null) return null;
+        return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize());
+-    }  else if (cpu.equals("ppc64")) {
++    } else if (cpu.equals("mips64")) {
++       MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++       Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++       if (sp == null) return null;
++       Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxMIPS64CFrame(dbg, sp, pc);
++    } else if (cpu.equals("loongarch64")) {
++       LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxLOONGARCH64CFrame(dbg, fp, pc);
++    } else if (cpu.equals("ppc64")) {
+         PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext();
+         Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP);
+         if (sp == null) return null;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+index 4b786eecc9..4ead33827c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.lang.reflect.*;
+@@ -30,6 +36,8 @@ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ 
+ class LinuxThreadContextFactory {
+    static ThreadContext createThreadContext(LinuxDebugger dbg) {
+@@ -40,7 +48,11 @@ class LinuxThreadContextFactory {
+          return new LinuxAMD64ThreadContext(dbg);
+       } else if (cpu.equals("sparc")) {
+          return new LinuxSPARCThreadContext(dbg);
+-      }  else if (cpu.equals("ppc64")) {
++      } else if (cpu.equals("mips64")) {
++         return new LinuxMIPS64ThreadContext(dbg);
++      } else if (cpu.equals("loongarch64")) {
++         return new LinuxLOONGARCH64ThreadContext(dbg);
++      } else if (cpu.equals("ppc64")) {
+           return new LinuxPPC64ThreadContext(dbg);
+       } else  {
+         try {
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+new file mode 100644
+index 0000000000..0e6caee5a4
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++
++final public class LinuxLOONGARCH64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++      Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++      Address nextFP;
++      Address nextPC;
++
++      if ((fp == null) || fp.lessThan(sp)) {
++        return null;
++      }
++
++      try {
++        nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextFP == null) {
++        return null;
++      }
++
++      try {
++        nextPC  = fp.getAddressAt(-1 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextPC == null) {
++        return null;
++      }
++
++      return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..604642598e
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+new file mode 100644
+index 0000000000..2e3eb564da
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++
++final public class LinuxMIPS64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) {
++      super(dbg.getCDebugger());
++      this.ebp = ebp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return ebp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++      Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++
++      if ( (ebp == null) || ebp.lessThan(esp) ) {
++        return null;
++      }
++
++      Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE);
++      if (nextEBP == null) {
++        return null;
++      }
++      Address nextPC  = ebp.getAddressAt( 1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 4;
++   private Address pc;
++   private Address ebp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..98e0f3f0bc
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxMIPS64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..1de3cb1a47
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.loongarch64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on loongarch64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class LOONGARCH64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int RA = 1;
++  public static final int TP = 2;
++  public static final int SP = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int A4 = 8;
++  public static final int A5 = 9;
++  public static final int A6 = 10;
++  public static final int A7 = 11;
++  public static final int T0 = 12;
++  public static final int T1 = 13;
++  public static final int T2 = 14;
++  public static final int T3 = 15;
++  public static final int T4 = 16;
++  public static final int T5 = 17;
++  public static final int T6 = 18;
++  public static final int T7 = 19;
++  public static final int T8 = 20;
++  public static final int RX = 21;
++  public static final int FP = 22;
++  public static final int S0 = 23;
++  public static final int S1 = 24;
++  public static final int S2 = 25;
++  public static final int S3 = 26;
++  public static final int S4 = 27;
++  public static final int S5 = 28;
++  public static final int S6 = 29;
++  public static final int S7 = 30;
++  public static final int S8 = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "RA",    "TP",    "SP",
++    "A0",      "A1",    "A2",    "A3",
++    "A4",      "A5",    "A6",    "A7",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "T8",      "RX",    "FP",    "S0",
++    "S1",      "S2",    "S3",    "S4",
++    "S5",      "S6",    "S7",    "S8",
++    "PC"
++  };
++
++  private long[] data;
++
++  public LOONGARCH64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..d3479a65ea
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.mips64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on mips64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class MIPS64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int AT = 1;
++  public static final int V0 = 2;
++  public static final int V1 = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int T0 = 8;
++  public static final int T1 = 9;
++  public static final int T2 = 10;
++  public static final int T3 = 11;
++  public static final int T4 = 12;
++  public static final int T5 = 13;
++  public static final int T6 = 14;
++  public static final int T7 = 15;
++  public static final int S0 = 16;
++  public static final int S1 = 17;
++  public static final int S2 = 18;
++  public static final int S3 = 19;
++  public static final int S4 = 20;
++  public static final int S5 = 21;
++  public static final int S6 = 22;
++  public static final int S7 = 23;
++  public static final int T8 = 24;
++  public static final int T9 = 25;
++  public static final int K0 = 26;
++  public static final int K1 = 27;
++  public static final int GP = 28;
++  public static final int SP = 29;
++  public static final int FP = 30;
++  public static final int RA = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "AT",    "V0",    "V1",
++    "A0",      "A1",    "A2",    "A3",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "S0",      "S1",    "S2",    "S3",
++    "S4",      "S5",    "S6",    "S7",
++    "T8",      "T9",    "K0",    "K1",
++    "GP",      "SP",    "FP",    "RA",
++    "PC"
++  };
++
++  private long[] data;
++
++  public MIPS64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+index 7113a3a497..de47531db7 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.posix.elf;
+ 
+ import java.io.FileInputStream;
+@@ -63,6 +69,8 @@ public interface ELFHeader {
+     public static final int ARCH_i860 = 7;
+     /** MIPS architecture type. */
+     public static final int ARCH_MIPS = 8;
++    /** LOONGARCH architecture type. */
++    public static final int ARCH_LOONGARCH = 9;
+ 
+     /** Returns a file type which is defined by the file type constants. */
+     public short getFileType();
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+index 74e957d94b..46ece3611f 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+@@ -32,11 +32,13 @@ import sun.jvm.hotspot.debugger.*;
+ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.proc.amd64.*;
+ import sun.jvm.hotspot.debugger.proc.aarch64.*;
++import sun.jvm.hotspot.debugger.proc.mips64.*;
+ import sun.jvm.hotspot.debugger.proc.sparc.*;
+ import sun.jvm.hotspot.debugger.proc.ppc64.*;
+ import sun.jvm.hotspot.debugger.proc.x86.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
++import sun.jvm.hotspot.debugger.mips64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
+ import sun.jvm.hotspot.debugger.x86.*;
+@@ -90,6 +92,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
+             threadFactory = new ProcAMD64ThreadFactory(this);
+             pcRegIndex = AMD64ThreadContext.RIP;
+             fpRegIndex = AMD64ThreadContext.RBP;
++        } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++            threadFactory = new ProcMIPS64ThreadFactory(this);
++            pcRegIndex = MIPS64ThreadContext.PC;
++            fpRegIndex = MIPS64ThreadContext.FP;
+         } else if (cpu.equals("aarch64")) {
+             threadFactory = new ProcAARCH64ThreadFactory(this);
+             pcRegIndex = AARCH64ThreadContext.PC;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+new file mode 100644
+index 0000000000..1f60fa6cfb
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcLOONGARCH64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers LOONGARCH64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) {
++      return false;
++    }
++
++    return (((ProcLOONGARCH64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..ef5597ac4e
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 0000000000..abad1bb38b
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+new file mode 100644
+index 0000000000..5c1e0be893
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcMIPS64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcMIPS64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcMIPS64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers MIPS64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) {
++      return false;
++    }
++
++    return (((ProcMIPS64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..d44223d768
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+new file mode 100644
+index 0000000000..bad478fc5c
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+index b6253f6d63..5eecb08a10 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.remote;
+ 
+ import java.rmi.*;
+@@ -34,6 +40,8 @@ import sun.jvm.hotspot.debugger.remote.sparc.*;
+ import sun.jvm.hotspot.debugger.remote.x86.*;
+ import sun.jvm.hotspot.debugger.remote.amd64.*;
+ import sun.jvm.hotspot.debugger.remote.ppc64.*;
++import sun.jvm.hotspot.debugger.remote.mips64.*;
++import sun.jvm.hotspot.debugger.remote.loongarch64.*;
+ 
+ /** An implementation of Debugger which wraps a
+     RemoteDebugger, providing remote debugging via RMI.
+@@ -76,6 +84,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger {
+         cachePageSize = 4096;
+         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+         unalignedAccessesOkay = true;
++      } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++        threadFactory = new RemoteMIPS64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
++      } else if (cpu.equals("loongarch64")) {
++        threadFactory = new RemoteLOONGARCH64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
+       } else {
+         try {
+           Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+new file mode 100644
+index 0000000000..242dd279e1
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteLOONGARCH64Thread extends RemoteThread  {
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..634d5ad049
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 0000000000..4fb9cc7c06
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+new file mode 100644
+index 0000000000..c2f7d841f2
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteMIPS64Thread extends RemoteThread  {
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..23646905d7
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+new file mode 100644
+index 0000000000..b39b014490
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index 190062785a..04681fa0e7 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.runtime;
+ 
+ import java.util.*;
+@@ -39,6 +45,8 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess;
+@@ -99,6 +107,10 @@ public class Threads {
+                 access = new LinuxPPC64JavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("mips64")) {
++                access = new LinuxMIPS64JavaThreadPDAccess();
++            } else if (cpu.equals("loongarch64")) {
++                access = new LinuxLOONGARCH64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..ee1003e352
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_loongarch64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.loongarch64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new LOONGARCH64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new LOONGARCH64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..181f431b64
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_mips64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.mips64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return null;
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new MIPS64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new MIPS64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++  }
++
++  public Address getLastFP(Address addr) {
++    return getLastSP(addr).getAddressAt(0);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..824270e132
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+@@ -0,0 +1,250 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all loongarch64 platforms we support
++    (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext;
++    output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the
++    LOONGARCH64Frame is left to the caller, since we may need to subclass
++    LOONGARCH64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class LOONGARCH64CurrentFrameGuess {
++  private LOONGARCH64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG")
++                                       != null;
++
++  public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new LOONGARCH64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from LOONGARCH64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
++
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
++
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
++
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct LOONGARCH64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+new file mode 100644
+index 0000000000..058afc94d0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+@@ -0,0 +1,526 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the loongarch64 family of CPUs. */
++
++public class LOONGARCH64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null;
++  }
++
++  // Java frames
++  private static final int JAVA_FRAME_LINK_OFFSET             =  0;
++  private static final int JAVA_FRAME_RETURN_ADDR_OFFSET      =  1;
++  private static final int JAVA_FRAME_SENDER_SP_OFFSET        =  2;
++
++  // Native frames
++  private static final int NATIVE_FRAME_LINK_OFFSET           =  -2;
++  private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET    =  -1;
++  private static final int NATIVE_FRAME_SENDER_SP_OFFSET      =  0;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_LOCALS_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MDX_OFFSET       = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_CACHE_OFFSET     = INTERPRETER_FRAME_MDX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_BCX_OFFSET       = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++  // Entry frames
++  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9;
++
++  private static VMReg fp = new VMReg(22 << 1);
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private LOONGARCH64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    LOONGARCH64Frame frame = new LOONGARCH64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof LOONGARCH64Frame)) {
++      return false;
++    }
++
++    LOONGARCH64Frame other = (LOONGARCH64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/LOONGARCH)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    LOONGARCH64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On loongarch, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = getSenderSP();
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET));
++
++    return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0);
++    return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET);
++  }
++
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET);
++  }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..0625e10a41
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public LOONGARCH64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+new file mode 100644
+index 0000000000..2cf904d388
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected LOONGARCH64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..c11458abe2
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all mips64 platforms we support
++    (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext;
++    output is SP, FP, and PC for an MIPS64Frame. Instantiation of the
++    MIPS64Frame is left to the caller, since we may need to subclass
++    MIPS64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class MIPS64CurrentFrameGuess {
++  private MIPS64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG")
++                                       != null;
++
++  public MIPS64CurrentFrameGuess(MIPS64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(MIPS64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new MIPS64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from MIPS64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct MIPS64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+new file mode 100644
+index 0000000000..65d88016ea
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+@@ -0,0 +1,537 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the mips64 family of CPUs. */
++
++public class MIPS64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null;
++  }
++
++  // All frames
++  private static final int LINK_OFFSET                =  0;
++  private static final int RETURN_ADDR_OFFSET         =  1;
++  private static final int SENDER_SP_OFFSET           =  2;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
++
++  private static VMReg rbp;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
++    if (VM.getVM().getAddressSize() == 4) {
++      rbp = new VMReg(5);
++    } else {
++      rbp = new VMReg(5 << 1);
++    }
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private MIPS64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    MIPS64Frame frame = new MIPS64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof MIPS64Frame)) {
++      return false;
++    }
++
++    MIPS64Frame other = (MIPS64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/MIPS)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    MIPS64RegisterMap map = (MIPS64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    MIPS64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On mips, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(rbp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..dfe3066af0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public MIPS64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+new file mode 100644
+index 0000000000..f2da760af4
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected MIPS64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107ca..06d79318d9 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ package sun.jvm.hotspot.utilities;
+ 
+ /** Provides canonicalized OS and CPU information for the rest of the
+@@ -54,7 +61,7 @@ public class PlatformInfo {
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"};
+ 
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+@@ -101,6 +108,12 @@ public class PlatformInfo {
+     if (cpu.equals("ppc64le"))
+       return "ppc64";
+ 
++    if (cpu.equals("mips64el"))
++      return "mips64";
++
++    if (cpu.equals("loongarch64"))
++      return "loongarch64";
++
+     return cpu;
+ 
+   }
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+new file mode 100644
+index 0000000000..0d3953ddff
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+@@ -0,0 +1,220 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static java.util.Collections.emptyMap;
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.EnumSet;
++import java.util.Map;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.stack.StackIntrospection;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
++import jdk.vm.ci.meta.ConstantReflectionProvider;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
++
++    protected EnumSet<LoongArch64.CPUFeature> computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        // Configure the feature set using the HotSpot flag settings.
++        EnumSet<LoongArch64.CPUFeature> features = EnumSet.noneOf(LoongArch64.CPUFeature.class);
++
++        if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) {
++            features.add(LoongArch64.CPUFeature.LA32);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) {
++            features.add(LoongArch64.CPUFeature.LA64);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLEXC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) {
++            features.add(LoongArch64.CPUFeature.SCDLY);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) {
++            features.add(LoongArch64.CPUFeature.LLDBAR);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_X86);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_ARM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_MIPS);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) {
++            features.add(LoongArch64.CPUFeature.CCDMA);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) {
++            features.add(LoongArch64.CPUFeature.COMPLEX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64FP) != 0) {
++            features.add(LoongArch64.CPUFeature.FP);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) {
++            features.add(LoongArch64.CPUFeature.CRYPTO);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) {
++            features.add(LoongArch64.CPUFeature.LSX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) {
++            features.add(LoongArch64.CPUFeature.LASX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) {
++            features.add(LoongArch64.CPUFeature.LAM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.TGTSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.ULSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) {
++            features.add(LoongArch64.CPUFeature.UAL);
++        }
++
++        return features;
++    }
++
++    protected EnumSet<LoongArch64.Flag> computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        EnumSet<LoongArch64.Flag> flags = EnumSet.noneOf(LoongArch64.Flag.class);
++
++        if (config.useLSX) {
++            flags.add(LoongArch64.Flag.useLSX);
++        }
++
++        if (config.useLASX) {
++            flags.add(LoongArch64.Flag.useLASX);
++        }
++
++        return flags;
++    }
++
++    protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config));
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotConstantReflectionProvider(runtime);
++    }
++
++    protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) {
++        return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops);
++    }
++
++    protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) {
++        return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig);
++    }
++
++    protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotMetaAccessProvider(runtime);
++    }
++
++    @Override
++    public String getArchitecture() {
++        return "loongarch64";
++    }
++
++    @Override
++    public String toString() {
++        return "JVMCIBackend:" + getArchitecture();
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) {
++
++        assert host == null;
++        LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore());
++        TargetDescription target = createTarget(config);
++
++        RegisterConfig regConfig;
++        HotSpotCodeCacheProvider codeCache;
++        ConstantReflectionProvider constantReflection;
++        HotSpotMetaAccessProvider metaAccess;
++        StackIntrospection stackIntrospection;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create MetaAccess provider")) {
++                metaAccess = createMetaAccess(runtime);
++            }
++            try (InitTimer rt = timer("create RegisterConfig")) {
++                regConfig = createRegisterConfig(config, target);
++            }
++            try (InitTimer rt = timer("create CodeCache provider")) {
++                codeCache = createCodeCache(runtime, target, regConfig);
++            }
++            try (InitTimer rt = timer("create ConstantReflection provider")) {
++                constantReflection = createConstantReflection(runtime);
++            }
++            try (InitTimer rt = timer("create StackIntrospection provider")) {
++                stackIntrospection = new HotSpotStackIntrospection(runtime);
++            }
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++        }
++    }
++
++    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection,
++                    StackIntrospection stackIntrospection) {
++        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+new file mode 100644
+index 0000000000..2ee6a4b847
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+@@ -0,0 +1,297 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static jdk.vm.ci.loongarch64.LoongArch64.ra;
++import static jdk.vm.ci.loongarch64.LoongArch64.a0;
++import static jdk.vm.ci.loongarch64.LoongArch64.a1;
++import static jdk.vm.ci.loongarch64.LoongArch64.a2;
++import static jdk.vm.ci.loongarch64.LoongArch64.a3;
++import static jdk.vm.ci.loongarch64.LoongArch64.a4;
++import static jdk.vm.ci.loongarch64.LoongArch64.a5;
++import static jdk.vm.ci.loongarch64.LoongArch64.a6;
++import static jdk.vm.ci.loongarch64.LoongArch64.a7;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2;
++import static jdk.vm.ci.loongarch64.LoongArch64.t0;
++import static jdk.vm.ci.loongarch64.LoongArch64.v0;
++import static jdk.vm.ci.loongarch64.LoongArch64.s5;
++import static jdk.vm.ci.loongarch64.LoongArch64.s6;
++import static jdk.vm.ci.loongarch64.LoongArch64.sp;
++import static jdk.vm.ci.loongarch64.LoongArch64.fp;
++import static jdk.vm.ci.loongarch64.LoongArch64.tp;
++import static jdk.vm.ci.loongarch64.LoongArch64.rx;
++import static jdk.vm.ci.loongarch64.LoongArch64.f0;
++import static jdk.vm.ci.loongarch64.LoongArch64.f1;
++import static jdk.vm.ci.loongarch64.LoongArch64.f2;
++import static jdk.vm.ci.loongarch64.LoongArch64.f3;
++import static jdk.vm.ci.loongarch64.LoongArch64.f4;
++import static jdk.vm.ci.loongarch64.LoongArch64.f5;
++import static jdk.vm.ci.loongarch64.LoongArch64.f6;
++import static jdk.vm.ci.loongarch64.LoongArch64.f7;
++import static jdk.vm.ci.loongarch64.LoongArch64.fv0;
++import static jdk.vm.ci.loongarch64.LoongArch64.zero;
++
++import java.util.ArrayList;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CallingConvention.Type;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterAttributes;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.ValueKindFactory;
++import jdk.vm.ci.common.JVMCIError;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig {
++
++    private final TargetDescription target;
++
++    private final RegisterArray allocatable;
++
++    /**
++     * The caller saved registers always include all parameter registers.
++     */
++    private final RegisterArray callerSaved;
++
++    private final boolean allAllocatableAreCallerSaved;
++
++    private final RegisterAttributes[] attributesMap;
++
++    @Override
++    public RegisterArray getAllocatableRegisters() {
++        return allocatable;
++    }
++
++    @Override
++    public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) {
++        ArrayList<Register> list = new ArrayList<>();
++        for (Register reg : registers) {
++            if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) {
++                list.add(reg);
++            }
++        }
++
++        return new RegisterArray(list);
++    }
++
++    @Override
++    public RegisterAttributes[] getAttributesMap() {
++        return attributesMap.clone();
++    }
++
++    private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7);
++
++    public static final Register heapBaseRegister = s5;
++    public static final Register TREG = s6;
++
++    private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG);
++
++    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
++        RegisterArray allRegisters = arch.getAvailableValueRegisters();
++        Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)];
++        List<Register> reservedRegistersList = reservedRegisters.asList();
++
++        int idx = 0;
++        for (Register reg : allRegisters) {
++            if (reservedRegistersList.contains(reg)) {
++                // skip reserved registers
++                continue;
++            }
++            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
++                // skip heap base register
++                continue;
++            }
++
++            registers[idx++] = reg;
++        }
++
++        assert idx == registers.length;
++        return new RegisterArray(registers);
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) {
++        this(target, initAllocatable(target.arch, useCompressedOops));
++        assert callerSaved.size() >= allocatable.size();
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) {
++        this.target = target;
++
++        this.allocatable = allocatable;
++        Set<Register> callerSaveSet = new HashSet<>();
++        allocatable.addTo(callerSaveSet);
++        floatParameterRegisters.addTo(callerSaveSet);
++        javaGeneralParameterRegisters.addTo(callerSaveSet);
++        nativeGeneralParameterRegisters.addTo(callerSaveSet);
++        callerSaved = new RegisterArray(callerSaveSet);
++
++        allAllocatableAreCallerSaved = true;
++        attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters);
++    }
++
++    @Override
++    public RegisterArray getCallerSaveRegisters() {
++        return callerSaved;
++    }
++
++    @Override
++    public RegisterArray getCalleeSaveRegisters() {
++        return null;
++    }
++
++    @Override
++    public boolean areAllAllocatableRegistersCallerSaved() {
++        return allAllocatableAreCallerSaved;
++    }
++
++    @Override
++    public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory<?> valueKindFactory) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        if (type == HotSpotCallingConventionType.NativeCall) {
++            return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++        }
++        // On x64, parameter locations are the same whether viewed
++        // from the caller or callee perspective
++        return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++    }
++
++    @Override
++    public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Short:
++            case Char:
++            case Int:
++            case Long:
++            case Object:
++                return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters;
++            case Float:
++            case Double:
++                return floatParameterRegisters;
++            default:
++                throw JVMCIError.shouldNotReachHere();
++        }
++    }
++
++    private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type,
++                    ValueKindFactory<?> valueKindFactory) {
++        AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
++
++        int currentGeneral = 0;
++        int currentFloat = 0;
++        int currentStackOffset = 0;
++
++        for (int i = 0; i < parameterTypes.length; i++) {
++            final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind();
++
++            switch (kind) {
++                case Byte:
++                case Boolean:
++                case Short:
++                case Char:
++                case Int:
++                case Long:
++                case Object:
++                    if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                case Float:
++                case Double:
++                    if (currentFloat < floatParameterRegisters.size()) {
++                        Register register = floatParameterRegisters.get(currentFloat++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    } else if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                default:
++                    throw JVMCIError.shouldNotReachHere();
++            }
++
++            if (locations[i] == null) {
++                ValueKind<?> valueKind = valueKindFactory.getValueKind(kind);
++                locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out);
++                currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize);
++            }
++        }
++
++        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
++        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind()));
++        return new CallingConvention(currentStackOffset, returnLocation, locations);
++    }
++
++    @Override
++    public Register getReturnRegister(JavaKind kind) {
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Char:
++            case Short:
++            case Int:
++            case Long:
++            case Object:
++                return v0;
++            case Float:
++            case Double:
++                return fv0;
++            case Void:
++            case Illegal:
++                return null;
++            default:
++                throw new UnsupportedOperationException("no return register for type " + kind);
++        }
++    }
++
++    @Override
++    public Register getFrameRegister() {
++        return sp;
++    }
++
++    @Override
++    public String toString() {
++        return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave:  " + getCallerSaveRegisters() + "%n");
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+new file mode 100644
+index 0000000000..c8605976a0
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess;
++import jdk.vm.ci.hotspot.HotSpotVMConfigStore;
++import jdk.vm.ci.services.Services;
++
++/**
++ * Used to access native configuration details.
++ *
++ * All non-static, public fields in this class are so that they can be compiled as constants.
++ */
++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess {
++
++    LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) {
++        super(config);
++    }
++
++    final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class);
++
++    // CPU Capabilities
++
++    /*
++     * These flags are set based on the corresponding command line flags.
++     */
++    final boolean useLSX = getFlag("UseLSX", Boolean.class);
++    final boolean useLASX = getFlag("UseLASX", Boolean.class);
++
++    final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t");
++
++    /*
++     * These flags are set if the corresponding support is in the hardware.
++     */
++    // Checkstyle: stop
++    final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class);
++    final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class);
++    final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class);
++    final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class);
++    final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class);
++    final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class);
++    final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class);
++    final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class);
++    final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class);
++    final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class);
++    final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class);
++    final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class);
++    final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class);
++    final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class);
++    final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class);
++    final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class);
++    final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class);
++    final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class);
++    final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class);
++    // Checkstyle: resume
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java
+new file mode 100644
+index 0000000000..1048ea9d64
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 HotSpot specific portions of the JVMCI API.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java
+new file mode 100644
+index 0000000000..1bb12e7a5f
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java
+@@ -0,0 +1,247 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import java.nio.ByteOrder;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.Register.RegisterCategory;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++
++/**
++ * Represents the LoongArch64 architecture.
++ */
++public class LoongArch64 extends Architecture {
++
++    public static final RegisterCategory CPU = new RegisterCategory("CPU");
++
++    // General purpose CPU registers
++    public static final Register zero = new Register(0, 0, "r0", CPU);
++    public static final Register ra = new Register(1, 1, "r1", CPU);
++    public static final Register tp = new Register(2, 2, "r2", CPU);
++    public static final Register sp = new Register(3, 3, "r3", CPU);
++    public static final Register a0 = new Register(4, 4, "r4", CPU);
++    public static final Register a1 = new Register(5, 5, "r5", CPU);
++    public static final Register a2 = new Register(6, 6, "r6", CPU);
++    public static final Register a3 = new Register(7, 7, "r7", CPU);
++    public static final Register a4 = new Register(8, 8, "r8", CPU);
++    public static final Register a5 = new Register(9, 9, "r9", CPU);
++    public static final Register a6 = new Register(10, 10, "r10", CPU);
++    public static final Register a7 = new Register(11, 11, "r11", CPU);
++    public static final Register t0 = new Register(12, 12, "r12", CPU);
++    public static final Register t1 = new Register(13, 13, "r13", CPU);
++    public static final Register t2 = new Register(14, 14, "r14", CPU);
++    public static final Register t3 = new Register(15, 15, "r15", CPU);
++    public static final Register t4 = new Register(16, 16, "r16", CPU);
++    public static final Register t5 = new Register(17, 17, "r17", CPU);
++    public static final Register t6 = new Register(18, 18, "r18", CPU);
++    public static final Register t7 = new Register(19, 19, "r19", CPU);
++    public static final Register t8 = new Register(20, 20, "r20", CPU);
++    public static final Register rx = new Register(21, 21, "r21", CPU);
++    public static final Register fp = new Register(22, 22, "r22", CPU);
++    public static final Register s0 = new Register(23, 23, "r23", CPU);
++    public static final Register s1 = new Register(24, 24, "r24", CPU);
++    public static final Register s2 = new Register(25, 25, "r25", CPU);
++    public static final Register s3 = new Register(26, 26, "r26", CPU);
++    public static final Register s4 = new Register(27, 27, "r27", CPU);
++    public static final Register s5 = new Register(28, 28, "r28", CPU);
++    public static final Register s6 = new Register(29, 29, "r29", CPU);
++    public static final Register s7 = new Register(30, 30, "r30", CPU);
++    public static final Register s8 = new Register(31, 31, "r31", CPU);
++
++    public static final Register SCR1 = t7;
++    public static final Register SCR2 = t4;
++    public static final Register v0 = a0;
++
++    // @formatter:off
++    public static final RegisterArray cpuRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8
++    );
++    // @formatter:on
++
++    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");
++
++    // Simd registers
++    public static final Register f0 = new Register(32, 0, "f0", SIMD);
++    public static final Register f1 = new Register(33, 1, "f1", SIMD);
++    public static final Register f2 = new Register(34, 2, "f2", SIMD);
++    public static final Register f3 = new Register(35, 3, "f3", SIMD);
++    public static final Register f4 = new Register(36, 4, "f4", SIMD);
++    public static final Register f5 = new Register(37, 5, "f5", SIMD);
++    public static final Register f6 = new Register(38, 6, "f6", SIMD);
++    public static final Register f7 = new Register(39, 7, "f7", SIMD);
++    public static final Register f8 = new Register(40, 8, "f8", SIMD);
++    public static final Register f9 = new Register(41, 9, "f9", SIMD);
++    public static final Register f10 = new Register(42, 10, "f10", SIMD);
++    public static final Register f11 = new Register(43, 11, "f11", SIMD);
++    public static final Register f12 = new Register(44, 12, "f12", SIMD);
++    public static final Register f13 = new Register(45, 13, "f13", SIMD);
++    public static final Register f14 = new Register(46, 14, "f14", SIMD);
++    public static final Register f15 = new Register(47, 15, "f15", SIMD);
++    public static final Register f16 = new Register(48, 16, "f16", SIMD);
++    public static final Register f17 = new Register(49, 17, "f17", SIMD);
++    public static final Register f18 = new Register(50, 18, "f18", SIMD);
++    public static final Register f19 = new Register(51, 19, "f19", SIMD);
++    public static final Register f20 = new Register(52, 20, "f20", SIMD);
++    public static final Register f21 = new Register(53, 21, "f21", SIMD);
++    public static final Register f22 = new Register(54, 22, "f22", SIMD);
++    public static final Register f23 = new Register(55, 23, "f23", SIMD);
++    public static final Register f24 = new Register(56, 24, "f24", SIMD);
++    public static final Register f25 = new Register(57, 25, "f25", SIMD);
++    public static final Register f26 = new Register(58, 26, "f26", SIMD);
++    public static final Register f27 = new Register(59, 27, "f27", SIMD);
++    public static final Register f28 = new Register(60, 28, "f28", SIMD);
++    public static final Register f29 = new Register(61, 29, "f29", SIMD);
++    public static final Register f30 = new Register(62, 30, "f30", SIMD);
++    public static final Register f31 = new Register(63, 31, "f31", SIMD);
++
++    public static final Register fv0 = f0;
++
++    // @formatter:off
++    public static final RegisterArray simdRegisters = new RegisterArray(
++        f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,  f9,  f10, f11, f12, f13, f14, f15,
++        f16, f17, f18, f19, f20, f21, f22, f23,
++        f24, f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    // @formatter:off
++    public static final RegisterArray allRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8,
++
++        f0,   f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,   f9,  f10, f11, f12, f13, f14, f15,
++        f16,  f17, f18, f19, f20, f21, f22, f23,
++        f24,  f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    /**
++     * Basic set of CPU features mirroring what is returned from the cpuid instruction. See:
++     * {@code VM_Version::cpuFeatureFlags}.
++     */
++    public enum CPUFeature {
++        LA32,
++        LA64,
++        LLEXC,
++        SCDLY,
++        LLDBAR,
++        LBT_X86,
++        LBT_ARM,
++        LBT_MIPS,
++        CCDMA,
++        COMPLEX,
++        FP,
++        CRYPTO,
++        LSX,
++        LASX,
++        LAM,
++        LLSYNC,
++        TGTSYNC,
++        ULSYNC,
++        UAL
++    }
++
++    private final EnumSet<CPUFeature> features;
++
++    /**
++     * Set of flags to control code emission.
++     */
++    public enum Flag {
++        useLSX,
++        useLASX
++    }
++
++    private final EnumSet<Flag> flags;
++
++    public LoongArch64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
++        super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0);
++        this.features = features;
++        this.flags = flags;
++    }
++
++    public EnumSet<CPUFeature> getFeatures() {
++        return features;
++    }
++
++    public EnumSet<Flag> getFlags() {
++        return flags;
++    }
++
++    @Override
++    public PlatformKind getPlatformKind(JavaKind javaKind) {
++        switch (javaKind) {
++            case Boolean:
++            case Byte:
++                return LoongArch64Kind.BYTE;
++            case Short:
++            case Char:
++                return LoongArch64Kind.WORD;
++            case Int:
++                return LoongArch64Kind.DWORD;
++            case Long:
++            case Object:
++                return LoongArch64Kind.QWORD;
++            case Float:
++                return LoongArch64Kind.SINGLE;
++            case Double:
++                return LoongArch64Kind.DOUBLE;
++            default:
++                return null;
++        }
++    }
++
++    @Override
++    public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
++        LoongArch64Kind kind = (LoongArch64Kind) platformKind;
++        if (kind.isInteger()) {
++            return category.equals(CPU);
++        } else if (kind.isSIMD()) {
++            return category.equals(SIMD);
++        }
++        return false;
++    }
++
++    @Override
++    public LoongArch64Kind getLargestStorableKind(RegisterCategory category) {
++        if (category.equals(CPU)) {
++            return LoongArch64Kind.QWORD;
++        } else if (category.equals(SIMD)) {
++            return LoongArch64Kind.V256_QWORD;
++        } else {
++            return null;
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+new file mode 100644
+index 0000000000..84b7f2027f
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+@@ -0,0 +1,163 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import jdk.vm.ci.meta.PlatformKind;
++
++public enum LoongArch64Kind implements PlatformKind {
++
++    // scalar
++    BYTE(1),
++    WORD(2),
++    DWORD(4),
++    QWORD(8),
++    UBYTE(1),
++    UWORD(2),
++    UDWORD(4),
++    SINGLE(4),
++    DOUBLE(8),
++
++    // SIMD
++    V128_BYTE(16, BYTE),
++    V128_WORD(16, WORD),
++    V128_DWORD(16, DWORD),
++    V128_QWORD(16, QWORD),
++    V128_SINGLE(16, SINGLE),
++    V128_DOUBLE(16, DOUBLE),
++    V256_BYTE(32, BYTE),
++    V256_WORD(32, WORD),
++    V256_DWORD(32, DWORD),
++    V256_QWORD(32, QWORD),
++    V256_SINGLE(32, SINGLE),
++    V256_DOUBLE(32, DOUBLE);
++
++    private final int size;
++    private final int vectorLength;
++
++    private final LoongArch64Kind scalar;
++    private final EnumKey<LoongArch64Kind> key = new EnumKey<>(this);
++
++    LoongArch64Kind(int size) {
++        this.size = size;
++        this.scalar = this;
++        this.vectorLength = 1;
++    }
++
++    LoongArch64Kind(int size, LoongArch64Kind scalar) {
++        this.size = size;
++        this.scalar = scalar;
++
++        assert size % scalar.size == 0;
++        this.vectorLength = size / scalar.size;
++    }
++
++    public LoongArch64Kind getScalar() {
++        return scalar;
++    }
++
++    @Override
++    public int getSizeInBytes() {
++        return size;
++    }
++
++    @Override
++    public int getVectorLength() {
++        return vectorLength;
++    }
++
++    @Override
++    public Key getKey() {
++        return key;
++    }
++
++    public boolean isInteger() {
++        switch (this) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++            case UBYTE:
++            case UWORD:
++            case UDWORD:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    public boolean isSIMD() {
++        switch (this) {
++            case SINGLE:
++            case DOUBLE:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public char getTypeChar() {
++        switch (this) {
++            case BYTE:
++                return 'b';
++            case WORD:
++                return 'w';
++            case DWORD:
++                return 'd';
++            case QWORD:
++                return 'q';
++            case SINGLE:
++                return 'S';
++            case DOUBLE:
++                return 'D';
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return 'v';
++            default:
++                return '-';
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java
+new file mode 100644
+index 0000000000..9d020833ea
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 platform independent portions of the JVMCI API.
++ */
++package jdk.vm.ci.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java
+index fed310d386..661f106d30 100644
+--- a/src/jdk.internal.vm.ci/share/classes/module-info.java
++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java
+@@ -23,6 +23,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ module jdk.internal.vm.ci {
+     exports jdk.vm.ci.services to jdk.internal.vm.compiler;
+     exports jdk.vm.ci.runtime to
+@@ -37,6 +43,7 @@ module jdk.internal.vm.ci {
+ 
+     provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with
+         jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory,
++        jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory;
+ }
+diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile
+index 2514a895da..08fbe3b953 100644
+--- a/src/utils/hsdis/Makefile
++++ b/src/utils/hsdis/Makefile
+@@ -94,6 +94,9 @@ CC 		= gcc
+ endif
+ CFLAGS		+= -O
+ DLDFLAGS	+= -shared
++ifeq ($(ARCH), mips64)
++DLDFLAGS	+= -Wl,-z,noexecstack
++endif
+ LDFLAGS         += -ldl
+ OUTFLAGS	+= -o $@
+ else
+diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
+index ac17e567b0..9b004a2033 100644
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
+@@ -21,12 +21,18 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test
+  * @library /test/lib /
+  * @modules java.base/jdk.internal.misc
+  *          java.management
+- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled
++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled
+  * @build sun.hotspot.WhiteBox
+  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+  *                                sun.hotspot.WhiteBox$WhiteBoxPermission
+diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
+index 60b2d03321..981a239979 100644
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test
+  * @library /test/lib /
+@@ -28,7 +34,7 @@
+  *          java.management
+  *
+  * @build sun.hotspot.WhiteBox
+- * @requires !(vm.cpu.features ~= ".*aes.*")
++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64")
+  * @requires vm.compiler1.enabled | !vm.graal.enabled
+  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+  *                                sun.hotspot.WhiteBox$WhiteBoxPermission
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index faa9fdbae6..a635f03d24 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.intrinsics.sha.cli.testcases;
+ 
+ import compiler.intrinsics.sha.cli.SHAOptionsBase;
+@@ -32,19 +38,20 @@ import jdk.test.lib.cli.predicate.OrPredicate;
+ 
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, PPC, S390x, SPARC and X86.
++ * AArch64, PPC, S390x, SPARC, LoongArch64 and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
++        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC, LoongArch64 and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isSparc,
+                               new OrPredicate(Platform::isPPC,
++                              new OrPredicate(Platform::isLoongArch64,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
+     }
+ 
+     @Override
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+index 62d0e99155..c3fa3fb93e 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+@@ -29,6 +29,7 @@ import jdk.vm.ci.code.InstalledCode;
+ import jdk.vm.ci.code.TargetDescription;
+ import jdk.vm.ci.code.test.amd64.AMD64TestAssembler;
+ import jdk.vm.ci.code.test.sparc.SPARCTestAssembler;
++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler;
+ import jdk.vm.ci.hotspot.HotSpotCompiledCode;
+ import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
+ import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
+@@ -37,6 +38,7 @@ import jdk.vm.ci.meta.MetaAccessProvider;
+ import jdk.vm.ci.runtime.JVMCI;
+ import jdk.vm.ci.runtime.JVMCIBackend;
+ import jdk.vm.ci.sparc.SPARC;
++import jdk.vm.ci.loongarch64.LoongArch64;
+ import org.junit.Assert;
+ 
+ import java.lang.reflect.Method;
+@@ -72,6 +74,8 @@ public class CodeInstallationTest {
+             return new AMD64TestAssembler(codeCache, config);
+         } else if (arch instanceof SPARC) {
+             return new SPARCTestAssembler(codeCache, config);
++        } else if (arch instanceof LoongArch64) {
++            return new LoongArch64TestAssembler(codeCache, config);
+         } else {
+             Assert.fail("unsupported architecture");
+             return null;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+index 8afc7d7b98..520d7707a2 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+index 75d0748da5..a6826e2ffe 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+index a67fa2c1df..59cce6454d 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+index d9e1f24c30..259218b305 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /test/lib /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI  -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest
+  */
+ package jdk.vm.ci.code.test;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+index 9b92114055..00d0f53cdb 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+index 5b2204868c..ecfcb1cf01 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+index a10e90acda..5b1a58c74b 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+new file mode 100644
+index 0000000000..4c76868453
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+@@ -0,0 +1,568 @@
++/*
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.vm.ci.code.test.loongarch64;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.DebugInfo;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.site.ConstantReference;
++import jdk.vm.ci.code.site.DataSectionReference;
++import jdk.vm.ci.code.test.TestAssembler;
++import jdk.vm.ci.code.test.TestHotSpotVMConfig;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.VMConstant;
++
++public class LoongArch64TestAssembler extends TestAssembler {
++
++    private static final Register scratchRegister = LoongArch64.SCR1;
++    private static final Register doubleScratch = LoongArch64.f23;
++    private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0,
++                                                                      LoongArch64.a1, LoongArch64.a2,
++                                                                      LoongArch64.a3, LoongArch64.a4,
++                                                                      LoongArch64.a5, LoongArch64.a6,
++                                                                      LoongArch64.a7);
++    private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0,
++                                                                      LoongArch64.f1, LoongArch64.f2,
++                                                                      LoongArch64.f3, LoongArch64.f4,
++                                                                      LoongArch64.f5, LoongArch64.f6,
++                                                                      LoongArch64.f7);
++    private static int currentGeneral = 0;
++    private static int currentFloat = 0;
++    public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) {
++        super(codeCache, config,
++              16 /* initialFrameSize */, 16 /* stackAlignment */,
++              LoongArch64Kind.UDWORD /* narrowOopKind */,
++              /* registers */
++              LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3,
++              LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7);
++    }
++
++    private static int low(int x, int l) {
++        assert l < 32;
++        return (x >> 0) & ((1 << l)-1);
++    }
++
++    private static int low16(int x) {
++        return low(x, 16);
++    }
++
++    private void emitNop() {
++        code.emitInt(0x3400000);
++    }
++
++    private void emitPcaddu12i(Register rj, int si20) {
++        // pcaddu12i
++        code.emitInt((0b0001110 << 25)
++                     | (low(si20, 20) << 5)
++                     | rj.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, Register rk) {
++        // add_d
++        code.emitInt((0b00000000000100001 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, int si12) {
++        // addi_d
++        code.emitInt((0b0000001011 << 22)
++                     | (low(si12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitSub(Register rd, Register rj, Register rk) {
++        // sub_d
++        code.emitInt((0b00000000000100011 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitShiftLeft(Register rd, Register rj, int shift) {
++        // slli_d
++        code.emitInt((0b00000000010000 << 18)
++                     | (low(( (0b01  << 6) | shift ), 8) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu12i_w(Register rj, int imm20) {
++        // lu12i_w
++        code.emitInt((0b0001010 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitOri(Register rd, Register rj, int ui12) {
++        // ori
++        code.emitInt((0b0000001110 << 22)
++                     | (low(ui12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu32i_d(Register rj, int imm20) {
++         // lu32i_d
++        code.emitInt((0b0001011 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitLu52i_d(Register rd, Register rj, int imm12) {
++        // lu52i_d
++        code.emitInt((0b0000001100 << 22)
++                     | (low(imm12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadImmediate(Register rd, int imm32) {
++        emitLu12i_w(rd, (imm32 >> 12) & 0xfffff);
++        emitOri(rd, rd, imm32 & 0xfff);
++    }
++
++    private void emitLi52(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++    }
++
++    private void emitLi64(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++        emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff));
++    }
++
++    private void emitOr(Register rd, Register rj, Register rk) {
++        // orr
++        code.emitInt((0b00000000000101010 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitMove(Register rd, Register rs) {
++        // move
++        emitOr(rd, rs, LoongArch64.zero);
++    }
++
++    private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) {
++        // movfr2gr_s/movfr2gr_d
++        int opc = 0;
++        switch (kind) {
++            case SINGLE: opc = 0b0000000100010100101101; break;
++            case DOUBLE: opc = 0b0000000100010100101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // load
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100000; break;
++            case WORD:   opc = 0b0010100001; break;
++            case DWORD:  opc = 0b0010100010; break;
++            case QWORD:  opc = 0b0010100011; break;
++            case UDWORD: opc = 0b0010101010; break;
++            case SINGLE: opc = 0b0010101100; break;
++            case DOUBLE: opc = 0b0010101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // store
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100100; break;
++            case WORD:   opc = 0b0010100101; break;
++            case DWORD:  opc = 0b0010100110; break;
++            case QWORD:  opc = 0b0010100111; break;
++            case SINGLE: opc = 0b0010101101; break;
++            case DOUBLE: opc = 0b0010101111; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitJirl(Register rd, Register rj, int offs) {
++        // jirl
++        code.emitInt((0b010011 << 26)
++                     | (low16(offs >> 2) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    @Override
++    public void emitGrowStack(int size) {
++        assert size % 16 == 0;
++        if (size > -4096 && size < 0) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size == 0) {
++            // No-op
++        } else if (size < 4096) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size < 65535) {
++            emitLoadImmediate(scratchRegister, size);
++            emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister);
++        } else {
++            throw new IllegalArgumentException();
++        }
++    }
++
++    @Override
++    public void emitPrologue() {
++        // Must be patchable by NativeJump::patch_verified_entry
++        emitNop();
++        emitGrowStack(32);
++        emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24);
++        emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16);
++        emitGrowStack(-16);
++        emitMove(LoongArch64.fp, LoongArch64.sp);
++        setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD));
++    }
++
++    @Override
++    public void emitEpilogue() {
++        recordMark(config.MARKID_DEOPT_HANDLER_ENTRY);
++        recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null);
++        emitCall(0xdeaddeaddeadL);
++    }
++
++    @Override
++    public void emitCallPrologue(CallingConvention cc, Object... prim) {
++        emitGrowStack(cc.getStackSize());
++        frameSize += cc.getStackSize();
++        AllocatableValue[] args = cc.getArguments();
++        for (int i = 0; i < args.length; i++) {
++            emitLoad(args[i], prim[i]);
++        }
++        currentGeneral = 0;
++        currentFloat = 0;
++    }
++
++    @Override
++    public void emitCallEpilogue(CallingConvention cc) {
++        emitGrowStack(-cc.getStackSize());
++        frameSize -= cc.getStackSize();
++    }
++
++    @Override
++    public void emitCall(long addr) {
++        // long call (absolute)
++        // lu12i_w(T4, split_low20(value >> 12));
++        // lu32i_d(T4, split_low20(value >> 32));
++        // jirl(RA, T4, split_low12(value));
++        emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff));
++        emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff));
++        emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff));
++    }
++
++    @Override
++    public void emitLoad(AllocatableValue av, Object prim) {
++        if (av instanceof RegisterValue) {
++            Register reg = ((RegisterValue) av).getRegister();
++            if (prim instanceof Float) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadFloat(reg, (Float) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadFloat(doubleScratch, (Float) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch);
++                }
++            } else if (prim instanceof Double) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadDouble(reg, (Double) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadDouble(doubleScratch, (Double) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch);
++                }
++            } else if (prim instanceof Integer) {
++                emitLoadInt(reg, (Integer) prim);
++            } else if (prim instanceof Long) {
++                emitLoadLong(reg, (Long) prim);
++            }
++        } else if (av instanceof StackSlot) {
++            StackSlot slot = (StackSlot) av;
++            if (prim instanceof Float) {
++                emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim));
++            } else if (prim instanceof Double) {
++                emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim));
++            } else if (prim instanceof Integer) {
++                emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim));
++            } else if (prim instanceof Long) {
++                emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim));
++            } else {
++                assert false : "Unimplemented";
++            }
++        } else {
++            throw new IllegalArgumentException("Unknown value " + av);
++        }
++    }
++
++    @Override
++    public Register emitLoadPointer(HotSpotConstant c) {
++        recordDataPatchInCode(new ConstantReference((VMConstant) c));
++
++        Register ret = newRegister();
++        // need to match patchable_li52 instruction sequence
++        // lu12i_ori_lu32i
++        emitLi52(ret, 0xdeaddead);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(Register b, int offset) {
++        Register ret = newRegister();
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadNarrowPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0);
++        return ret;
++    }
++
++    private Register emitLoadDouble(Register reg, double c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitDouble(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0);
++        return reg;
++    }
++
++    private Register emitLoadFloat(Register reg, float c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitFloat(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadFloat(float c) {
++        Register ret = LoongArch64.fv0;
++        return emitLoadFloat(ret, c);
++    }
++
++    private Register emitLoadLong(Register reg, long c) {
++        emitLi64(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadLong(long c) {
++        Register ret = newRegister();
++        return emitLoadLong(ret, c);
++    }
++
++    private Register emitLoadInt(Register reg, int c) {
++        emitLoadImmediate(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadInt(int c) {
++        Register ret = newRegister();
++        return emitLoadInt(ret, c);
++    }
++
++    @Override
++    public Register emitIntArg0() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(0);
++    }
++
++    @Override
++    public Register emitIntArg1() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(1);
++    }
++
++    @Override
++    public Register emitIntAdd(Register a, Register b) {
++        emitAdd(a, a, b);
++        return a;
++    }
++
++    @Override
++    public void emitTrap(DebugInfo info) {
++        // Dereference null pointer
++        emitMove(scratchRegister, LoongArch64.zero);
++        recordImplicitException(info);
++        emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0);
++    }
++
++    @Override
++    public void emitIntRet(Register a) {
++        emitMove(LoongArch64.v0, a);
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitFloatRet(Register a) {
++        assert a == LoongArch64.fv0 : "Unimplemented move " + a;
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitPointerRet(Register a) {
++        emitIntRet(a);
++    }
++
++    @Override
++    public StackSlot emitPointerToStack(Register a) {
++        return emitLongToStack(a);
++    }
++
++    @Override
++    public StackSlot emitNarrowPointerToStack(Register a) {
++        return emitIntToStack(a);
++    }
++
++    @Override
++    public Register emitUncompressPointer(Register compressed, long base, int shift) {
++        if (shift > 0) {
++            emitShiftLeft(compressed, compressed, shift);
++        }
++
++        if (base != 0) {
++            emitLoadLong(scratchRegister, base);
++            emitAdd(compressed, compressed, scratchRegister);
++        }
++
++        return compressed;
++    }
++
++    private StackSlot emitDoubleToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitDoubleToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE);
++        return emitDoubleToStack(ret, a);
++    }
++
++    private StackSlot emitFloatToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitFloatToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE);
++        return emitFloatToStack(ret, a);
++    }
++
++    private StackSlot emitIntToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitIntToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DWORD);
++        return emitIntToStack(ret, a);
++    }
++
++    private StackSlot emitLongToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitLongToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.QWORD);
++        return emitLongToStack(ret, a);
++    }
++
++}
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+index acb86812d2..664ea11d0d 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167409
+  * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
+  */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+index eab36f9311..ee5ab2f6dd 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167408
+  * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 7774dabcb5..c1cb6e00f3 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.testlibrary.sha.predicate;
+ 
+ import jdk.test.lib.Platform;
+@@ -63,10 +69,12 @@ public class IntrinsicPredicates {
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
++              // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
+@@ -74,12 +82,14 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
++              // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
+diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+index 127bb6abcd..c9277604ae 100644
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test ReservedStackTest
+  *
+@@ -239,7 +245,7 @@ public class ReservedStackTest {
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86())) ||
++              Platform.isX86() || Platform.isMIPS() || Platform.isLoongArch64())) ||
+             Platform.isOSX() ||
+             Platform.isSolaris();
+     }
+diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 77458554b7..05aee6b84c 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -45,7 +45,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isMIPS", "isLoongArch64"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index 7990c49a1f..025048c6b0 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+@@ -23,6 +23,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.jfr.event.os;
+ 
+ import java.util.List;
+@@ -54,8 +60,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
+         }
+     }
+ }
+diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
+index 0c74c5bdad..421f37e390 100644
+--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java
++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // common infrastructure for SunPKCS11 tests
+ 
+ import java.io.BufferedReader;
+@@ -747,6 +753,9 @@ public abstract class PKCS11Test {
+                 "/usr/lib64/" });
+         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
+         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
++        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
++        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
++                "/usr/lib64/" });
+         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
+         osMap.put("Windows-x86-32", new String[] {});
+         osMap.put("Windows-amd64-64", new String[] {});
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index f4ee0546c7..a600d15b61 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.test.lib;
+ 
+ import java.io.FileNotFoundException;
+@@ -226,6 +232,14 @@ public class Platform {
+         return isArch("(i386)|(x86(?!_64))");
+     }
+ 
++    public static boolean isLoongArch64() {
++        return isArch("loongarch64");
++    }
++
++    public static boolean isMIPS() {
++        return isArch("mips.*");
++    }
++
+     public static String getOsArch() {
+         return osArch;
+     }
+diff --git a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java
+new file mode 100644
+index 0000000000..81fd956a4e
+--- /dev/null
++++ b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java
+@@ -0,0 +1,87 @@
++//
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++package org.openjdk.bench.java.lang;
++
++import java.util.Random;
++import java.util.concurrent.TimeUnit;
++import org.openjdk.jmh.annotations.*;
++import org.openjdk.jmh.infra.Blackhole;
++
++@OutputTimeUnit(TimeUnit.MILLISECONDS)
++@State(Scope.Thread)
++@BenchmarkMode(Mode.Throughput)
++public class RotateBenchmark {
++
++  @Param({"1024"})
++  public int TESTSIZE;
++
++  @Param({"20"})
++  public int SHIFT;
++
++  public long [] larr;
++  public int  [] iarr;
++
++  public long [] lres;
++  public int  [] ires;
++
++
++  @Setup(Level.Trial)
++  public void BmSetup() {
++    Random r = new Random(1024);
++    larr = new long[TESTSIZE];
++    iarr = new int[TESTSIZE];
++    lres = new long[TESTSIZE];
++    ires = new int[TESTSIZE];
++
++    for (int i = 0; i < TESTSIZE; i++) {
++      larr[i] = r.nextLong();
++    }
++
++    for (int i = 0; i < TESTSIZE; i++) {
++      iarr[i] = r.nextInt();
++    }
++  }
++
++  @Benchmark
++  public void testRotateLeftI() {
++    for (int i = 0; i < TESTSIZE; i++)
++       ires[i] = Integer.rotateLeft(iarr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateRightI() {
++    for (int i = 0; i < TESTSIZE; i++)
++       ires[i] = Integer.rotateRight(iarr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateLeftL() {
++    for (int i = 0; i < TESTSIZE; i++)
++       lres[i] = Long.rotateLeft(larr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateRightL() {
++    for (int i = 0; i < TESTSIZE; i++)
++       lres[i] = Long.rotateRight(larr[i], SHIFT);
++  }
++
++}
+diff --git a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
+new file mode 100644
+index 0000000000..58400cadf6
+--- /dev/null
++++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package org.openjdk.bench.vm.compiler;
++
++import org.openjdk.jmh.annotations.*;
++import org.openjdk.jmh.infra.*;
++
++import java.util.concurrent.TimeUnit;
++import java.util.Random;
++
++@BenchmarkMode(Mode.Throughput)
++@OutputTimeUnit(TimeUnit.SECONDS)
++@State(Scope.Thread)
++public class MacroLogicOpt {
++  @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN;
++
++  private  int [] ai = new int[VECLEN];
++  private  int [] bi = new int[VECLEN];
++  private  int [] ci = new int[VECLEN];
++  private  int [] ri = new int[VECLEN];
++
++  private  long [] al = new long[VECLEN];
++  private  long [] bl = new long[VECLEN];
++  private  long [] cl = new long[VECLEN];
++  private  long [] dl = new long[VECLEN];
++  private  long [] el = new long[VECLEN];
++  private  long [] fl = new long[VECLEN];
++  private  long [] rl = new long[VECLEN];
++
++  private Random r = new Random();
++
++  @Setup
++  public void init() {
++    ai = new int[VECLEN];
++    bi = new int[VECLEN];
++    ci = new int[VECLEN];
++    ri = new int[VECLEN];
++
++    al = new long[VECLEN];
++    bl = new long[VECLEN];
++    cl = new long[VECLEN];
++    dl = new long[VECLEN];
++    el = new long[VECLEN];
++    fl = new long[VECLEN];
++    rl = new long[VECLEN];
++    for (int i=0; i<VECLEN; i++) {
++      ai[i] = r.nextInt();
++      bi[i] = r.nextInt();
++      ci[i] = r.nextInt();
++
++      al[i] = r.nextLong();
++      bl[i] = r.nextLong();
++      cl[i] = r.nextLong();
++      dl[i] = r.nextLong();
++      el[i] = r.nextLong();
++      fl[i] = r.nextLong();
++    }
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private int run_workload1(int count, int [] a , int [] b, int [] c, int [] r) {
++      for(int i = 0 ; i < r.length ; i++)
++          r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i]))  &  ((~a[i] & b[i]) | (~b[i] & c[i])  | ~c[i] & a[i]));
++    return r[count];
++  }
++
++  @Benchmark
++  public  void workload1_caller(Blackhole bh) {
++    int r = 0;
++    for(int i = 0 ; i < 10000; i++)
++       r += run_workload1(i&(ri.length-1), ai, bi, ci, ri);
++    bh.consume(r);
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private long run_workload2(int count, long [] a , long [] b, long [] c, long [] r) {
++      for(int i = 0 ; i < r.length ; i++)
++          r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i]))  &  ((~a[i] & b[i]) | (~b[i] & c[i])  | ~c[i] & a[i]));
++    return r[count];
++  }
++
++  @Benchmark
++  public void workload2_caller(Blackhole bh) {
++    long r = 0;
++    for(int i = 0 ; i < 100000; i++)
++       r += run_workload2(i&(rl.length-1), al, bl, cl, rl);
++    bh.consume(r);
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private long run_workload3(int count, long [] a , long [] b, long [] c,
++                           long [] d, long [] e, long [] f, long [] r) {
++    for(int i = 0 ; i < r.length ; i++)
++      r[i] = (((~a[i] | ~b[i]) & (~c[i])) | (~d[i] & (~e[i] & f[i])));
++    return r[count];
++  }
++
++  @Benchmark
++  public void workload3_caller(Blackhole bh) {
++    long r = 0;
++    for(int i = 0 ; i < 10000; i++)
++       r += run_workload3(i&(ri.length-1), al, bl, cl, dl, el, fl, rl);
++    bh.consume(r);
++  }
++}
diff --git a/java17-openjdk/PKGBUILD b/java17-openjdk/PKGBUILD
index 257074a76b..173a6e04f9 100644
--- a/java17-openjdk/PKGBUILD
+++ b/java17-openjdk/PKGBUILD
@@ -18,23 +18,26 @@ _git_tag=jdk-${_majorver}.${_minorver}.${_securityver}+${_updatever}
 arch=('loong64' 'x86_64')
 url='https://openjdk.java.net/'
 license=('custom')
-makedepends=('java-environment=17' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11'
-             'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib' 'pandoc'
+makedepends=('jdk17-openjdk' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11'
+             'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib'
              'graphviz' 'freetype2' 'libjpeg-turbo' 'giflib' 'libpng' 'lcms2'
              'libnet' 'bash' 'harfbuzz' 'gcc-libs' 'glibc')
 options=(!lto)
 source=(https://github.com/openjdk/jdk${_majorver}u/archive/${_git_tag}.tar.gz
         freedesktop-java.desktop
         freedesktop-jconsole.desktop
-        freedesktop-jshell.desktop)
+        freedesktop-jshell.desktop
+        jdk17-17.0.9.8-la64.patch)
 sha256sums=('a310a8666d2d7770a7b29fe4750c275b9b33a07bfcc925348040a8bca745c874'
             '85c4742764590783160af74587a47269e6797fbdf17ec485c7644bd239adf61d'
             'abac1ab09a33a3654378bc69be5a7cf075263ab3ae9efec1eb25cf388e711bb7'
-            'e7cce0ecf868f656d8dc2eb25ab82ab665526a0a28aba20f02632dd29962dac3')
+            'e7cce0ecf868f656d8dc2eb25ab82ab665526a0a28aba20f02632dd29962dac3'
+            '482e026d19a18e28df2f49bacd3950177be9d2e306d9254a673bc08516c402e3')
 
 case "${CARCH}" in
   x86_64) _JARCH='x86_64';;
   i686)   _JARCH='x86';;
+  loong64) _JARCH='loongarch64';;
 esac
 
 _jvmdir=/usr/lib/jvm/java-${_majorver}-openjdk
@@ -46,6 +49,12 @@ _nonheadless=(lib/libawt_xawt.so
               lib/libjsound.so
               lib/libsplashscreen.so)
 
+prepare() {
+  cd ${_jdkdir}
+
+  patch -Np1 -i "${srcdir}"/jdk17-17.0.9.8-la64.patch
+}
+
 build() {
   cd ${_jdkdir}
 
@@ -105,9 +114,8 @@ build() {
     --enable-unlimited-crypto \
     --disable-warnings-as-errors \
     ${NUM_PROC_OPT}
-    #--disable-javac-server
 
-  make images legacy-jre-image docs
+  make images legacy-jre-image #docs
 
   # https://bugs.openjdk.java.net/browse/JDK-8173610
   find "../${_imgdir}" -iname '*.so' -exec chmod +x {} \;
@@ -280,7 +288,7 @@ package_openjdk17-doc() {
   provides=("openjdk${_majorver}-doc=${pkgver}-${pkgrel}")
 
   install -dm 755 "${pkgdir}/usr/share/doc"
-  cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}"
+#cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}"
 
   install -dm 755 "${pkgdir}/usr/share/licenses"
   ln -s ${pkgbase} "${pkgdir}/usr/share/licenses/${pkgname}"
diff --git a/java17-openjdk/jdk17-17.0.9.8-la64.patch b/java17-openjdk/jdk17-17.0.9.8-la64.patch
new file mode 100644
index 0000000000..aeec98947e
--- /dev/null
+++ b/java17-openjdk/jdk17-17.0.9.8-la64.patch
@@ -0,0 +1,121820 @@
+diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4
+index aa99b037b2b..d0c3a85757b 100644
+--- a/make/autoconf/jvm-features.m4
++++ b/make/autoconf/jvm-features.m4
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ ###############################################################################
+ # Terminology used in this file:
+ #
+@@ -242,6 +248,23 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_CDS],
+   ])
+ ])
+ 
++###############################################################################
++# Check if the feature 'compiler1' is available on this platform.
++#
++AC_DEFUN_ONCE([JVM_FEATURES_CHECK_COMPILER1],
++[
++  JVM_FEATURES_CHECK_AVAILABILITY(compiler1, [
++    AC_MSG_CHECKING([if platform is supported by COMPILER1])
++    if test "x$HOTSPOT_TARGET_CPU_ARCH" != "xmips"; then
++      # Disable compiler1 on mips
++      AC_MSG_RESULT([yes])
++    else
++      AC_MSG_RESULT([no, $OPENJDK_TARGET_OS])
++      AVAILABLE=false
++    fi
++  ])
++])
++
+ ###############################################################################
+ # Check if the feature 'dtrace' is available on this platform.
+ #
+@@ -295,6 +318,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_JVMCI],
+       AC_MSG_RESULT([yes])
+     elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
+       AC_MSG_RESULT([yes])
++    elif test "x$OPENJDK_TARGET_CPU" = "xloongarch64"; then
++      AC_MSG_RESULT([yes])
+     else
+       AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
+       AVAILABLE=false
+@@ -312,7 +337,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_SHENANDOAHGC],
+     if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
+         test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
+         test "x$OPENJDK_TARGET_CPU" = "xppc64le" || \
+-        test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
++        test "x$OPENJDK_TARGET_CPU" = "xriscv64" || \
++        test "x$OPENJDK_TARGET_CPU" = "xloongarch64"; then
+       AC_MSG_RESULT([yes])
+     else
+       AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
+@@ -370,6 +396,13 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_ZGC],
+         AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU])
+         AVAILABLE=false
+       fi
++    elif test "x$OPENJDK_TARGET_CPU" = "xloongarch64"; then
++      if test "x$OPENJDK_TARGET_OS" = "xlinux"; then
++        AC_MSG_RESULT([yes])
++      else
++        AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU])
++        AVAILABLE=false
++      fi
+     else
+       AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU])
+       AVAILABLE=false
+@@ -404,6 +437,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_PREPARE_PLATFORM],
+   # JVM_FEATURES_PLATFORM_UNAVAILABLE.
+ 
+   JVM_FEATURES_CHECK_CDS
++  JVM_FEATURES_CHECK_COMPILER1
+   JVM_FEATURES_CHECK_DTRACE
+   JVM_FEATURES_CHECK_JFR
+   JVM_FEATURES_CHECK_JVMCI
+diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
+index 5752d3bd1a6..d38d650e0fa 100644
+--- a/make/autoconf/platform.m4
++++ b/make/autoconf/platform.m4
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+ # Converts autoconf style CPU name to OpenJDK style, into
+ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
+@@ -539,11 +545,26 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU=ppc_64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU=ppc_64
++  elif test "x$OPENJDK_$1_CPU" = xmips64; then
++    HOTSPOT_$1_CPU=mips_64
++  elif test "x$OPENJDK_$1_CPU" = xmips64el; then
++    HOTSPOT_$1_CPU=mips_64
++  elif test "x$OPENJDK_$1_CPU" = xloongarch; then
++    HOTSPOT_$1_CPU=loongarch_64
++  elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
++    HOTSPOT_$1_CPU=loongarch_64
+   fi
+   AC_SUBST(HOTSPOT_$1_CPU)
+ 
+   # This is identical with OPENJDK_*, but define anyway for consistency.
+   HOTSPOT_$1_CPU_ARCH=${OPENJDK_$1_CPU_ARCH}
++  # Override hotspot cpu definitions for MIPS platforms
++  if test "x$OPENJDK_$1_CPU" = xmips64el; then
++    HOTSPOT_TARGET_CPU_ARCH=mips
++  elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
++    HOTSPOT_TARGET_CPU_ARCH=loongarch
++  fi
++
+   AC_SUBST(HOTSPOT_$1_CPU_ARCH)
+ 
+   # Setup HOTSPOT_$1_CPU_DEFINE
+@@ -563,6 +584,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xriscv64; then
+     HOTSPOT_$1_CPU_DEFINE=RISCV64
++  elif test "x$OPENJDK_$1_CPU" = xmips64; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xmips64el; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
++    HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
+diff --git a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp
+new file mode 100644
+index 00000000000..fbcd4050b64
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp
+new file mode 100644
+index 00000000000..e6e62cccad0
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp
+@@ -0,0 +1,849 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++void Assembler::ld_b(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_b(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_b(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_b(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_b(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_b(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_b(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_bu(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_bu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_bu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_bu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_bu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_bu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_bu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_d(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_d(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_d(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_d(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_d(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_d(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_d(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_d(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_h(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_h(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_h(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_h(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_h(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_h(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_h(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_hu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_hu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_hu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_hu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_hu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_hu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_hu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ll_w(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_w(rd, src.base(), src.disp());
++}
++
++void Assembler::ll_d(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_d(rd, src.base(), src.disp());
++}
++
++void Assembler::ld_w(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_w(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_w(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_w(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_w(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_w(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_w(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_w(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_w(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_wu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_wu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_wu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_wu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_wu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_wu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_wu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::st_b(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_b(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_b(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_b(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_b(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_b(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_b(src, base, AT);
++    }
++  }
++}
++
++void Assembler::sc_w(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_w(rd, dst.base(), dst.disp());
++}
++
++void Assembler::sc_d(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_d(rd, dst.base(), dst.disp());
++}
++
++void Assembler::st_d(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_d(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_d(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_d(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_d(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_d(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_d(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_d(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_d(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_h(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_h(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_h(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_h(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_h(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_h(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_h(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_w(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_w(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_w(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_w(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_w(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_w(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_w(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_w(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_w(src, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_s(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_d(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_d(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_s(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_d(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_d(fd, base, AT);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
+new file mode 100644
+index 00000000000..5eae8b9995c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
+@@ -0,0 +1,2831 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = 0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return no_scale;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code.
++
++  Address(Register base, ByteSize disp)
++    : Address(base, in_bytes(disp)) {}
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : Address(base, index, scale, in_bytes(disp)) {}
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  // 22-bit opcode, highest 22 bits: bits[31...10]
++  enum ops22 {
++    clo_w_op           = 0b0000000000000000000100,
++    clz_w_op           = 0b0000000000000000000101,
++    cto_w_op           = 0b0000000000000000000110,
++    ctz_w_op           = 0b0000000000000000000111,
++    clo_d_op           = 0b0000000000000000001000,
++    clz_d_op           = 0b0000000000000000001001,
++    cto_d_op           = 0b0000000000000000001010,
++    ctz_d_op           = 0b0000000000000000001011,
++    revb_2h_op         = 0b0000000000000000001100,
++    revb_4h_op         = 0b0000000000000000001101,
++    revb_2w_op         = 0b0000000000000000001110,
++    revb_d_op          = 0b0000000000000000001111,
++    revh_2w_op         = 0b0000000000000000010000,
++    revh_d_op          = 0b0000000000000000010001,
++    bitrev_4b_op       = 0b0000000000000000010010,
++    bitrev_8b_op       = 0b0000000000000000010011,
++    bitrev_w_op        = 0b0000000000000000010100,
++    bitrev_d_op        = 0b0000000000000000010101,
++    ext_w_h_op         = 0b0000000000000000010110,
++    ext_w_b_op         = 0b0000000000000000010111,
++    rdtimel_w_op       = 0b0000000000000000011000,
++    rdtimeh_w_op       = 0b0000000000000000011001,
++    rdtime_d_op        = 0b0000000000000000011010,
++    cpucfg_op          = 0b0000000000000000011011,
++    fabs_s_op          = 0b0000000100010100000001,
++    fabs_d_op          = 0b0000000100010100000010,
++    fneg_s_op          = 0b0000000100010100000101,
++    fneg_d_op          = 0b0000000100010100000110,
++    flogb_s_op         = 0b0000000100010100001001,
++    flogb_d_op         = 0b0000000100010100001010,
++    fclass_s_op        = 0b0000000100010100001101,
++    fclass_d_op        = 0b0000000100010100001110,
++    fsqrt_s_op         = 0b0000000100010100010001,
++    fsqrt_d_op         = 0b0000000100010100010010,
++    frecip_s_op        = 0b0000000100010100010101,
++    frecip_d_op        = 0b0000000100010100010110,
++    frsqrt_s_op        = 0b0000000100010100011001,
++    frsqrt_d_op        = 0b0000000100010100011010,
++    fmov_s_op          = 0b0000000100010100100101,
++    fmov_d_op          = 0b0000000100010100100110,
++    movgr2fr_w_op      = 0b0000000100010100101001,
++    movgr2fr_d_op      = 0b0000000100010100101010,
++    movgr2frh_w_op     = 0b0000000100010100101011,
++    movfr2gr_s_op      = 0b0000000100010100101101,
++    movfr2gr_d_op      = 0b0000000100010100101110,
++    movfrh2gr_s_op     = 0b0000000100010100101111,
++    movgr2fcsr_op      = 0b0000000100010100110000,
++    movfcsr2gr_op      = 0b0000000100010100110010,
++    movfr2cf_op        = 0b0000000100010100110100,
++    movcf2fr_op        = 0b0000000100010100110101,
++    movgr2cf_op        = 0b0000000100010100110110,
++    movcf2gr_op        = 0b0000000100010100110111,
++    fcvt_s_d_op        = 0b0000000100011001000110,
++    fcvt_d_s_op        = 0b0000000100011001001001,
++    ftintrm_w_s_op     = 0b0000000100011010000001,
++    ftintrm_w_d_op     = 0b0000000100011010000010,
++    ftintrm_l_s_op     = 0b0000000100011010001001,
++    ftintrm_l_d_op     = 0b0000000100011010001010,
++    ftintrp_w_s_op     = 0b0000000100011010010001,
++    ftintrp_w_d_op     = 0b0000000100011010010010,
++    ftintrp_l_s_op     = 0b0000000100011010011001,
++    ftintrp_l_d_op     = 0b0000000100011010011010,
++    ftintrz_w_s_op     = 0b0000000100011010100001,
++    ftintrz_w_d_op     = 0b0000000100011010100010,
++    ftintrz_l_s_op     = 0b0000000100011010101001,
++    ftintrz_l_d_op     = 0b0000000100011010101010,
++    ftintrne_w_s_op    = 0b0000000100011010110001,
++    ftintrne_w_d_op    = 0b0000000100011010110010,
++    ftintrne_l_s_op    = 0b0000000100011010111001,
++    ftintrne_l_d_op    = 0b0000000100011010111010,
++    ftint_w_s_op       = 0b0000000100011011000001,
++    ftint_w_d_op       = 0b0000000100011011000010,
++    ftint_l_s_op       = 0b0000000100011011001001,
++    ftint_l_d_op       = 0b0000000100011011001010,
++    ffint_s_w_op       = 0b0000000100011101000100,
++    ffint_s_l_op       = 0b0000000100011101000110,
++    ffint_d_w_op       = 0b0000000100011101001000,
++    ffint_d_l_op       = 0b0000000100011101001010,
++    frint_s_op         = 0b0000000100011110010001,
++    frint_d_op         = 0b0000000100011110010010,
++    iocsrrd_b_op       = 0b0000011001001000000000,
++    iocsrrd_h_op       = 0b0000011001001000000001,
++    iocsrrd_w_op       = 0b0000011001001000000010,
++    iocsrrd_d_op       = 0b0000011001001000000011,
++    iocsrwr_b_op       = 0b0000011001001000000100,
++    iocsrwr_h_op       = 0b0000011001001000000101,
++    iocsrwr_w_op       = 0b0000011001001000000110,
++    iocsrwr_d_op       = 0b0000011001001000000111,
++    vpcnt_b_op         = 0b0111001010011100001000,
++    vpcnt_h_op         = 0b0111001010011100001001,
++    vpcnt_w_op         = 0b0111001010011100001010,
++    vpcnt_d_op         = 0b0111001010011100001011,
++    vneg_b_op          = 0b0111001010011100001100,
++    vneg_h_op          = 0b0111001010011100001101,
++    vneg_w_op          = 0b0111001010011100001110,
++    vneg_d_op          = 0b0111001010011100001111,
++    vfclass_s_op       = 0b0111001010011100110101,
++    vfclass_d_op       = 0b0111001010011100110110,
++    vfsqrt_s_op        = 0b0111001010011100111001,
++    vfsqrt_d_op        = 0b0111001010011100111010,
++    vfrint_s_op        = 0b0111001010011101001101,
++    vfrint_d_op        = 0b0111001010011101001110,
++    vfrintrm_s_op      = 0b0111001010011101010001,
++    vfrintrm_d_op      = 0b0111001010011101010010,
++    vfrintrp_s_op      = 0b0111001010011101010101,
++    vfrintrp_d_op      = 0b0111001010011101010110,
++    vfrintrz_s_op      = 0b0111001010011101011001,
++    vfrintrz_d_op      = 0b0111001010011101011010,
++    vfrintrne_s_op     = 0b0111001010011101011101,
++    vfrintrne_d_op     = 0b0111001010011101011110,
++    vfcvtl_s_h_op      = 0b0111001010011101111010,
++    vfcvth_s_h_op      = 0b0111001010011101111011,
++    vfcvtl_d_s_op      = 0b0111001010011101111100,
++    vfcvth_d_s_op      = 0b0111001010011101111101,
++    vffint_s_w_op      = 0b0111001010011110000000,
++    vffint_s_wu_op     = 0b0111001010011110000001,
++    vffint_d_l_op      = 0b0111001010011110000010,
++    vffint_d_lu_op     = 0b0111001010011110000011,
++    vffintl_d_w_op     = 0b0111001010011110000100,
++    vffinth_d_w_op     = 0b0111001010011110000101,
++    vftint_w_s_op      = 0b0111001010011110001100,
++    vftint_l_d_op      = 0b0111001010011110001101,
++    vftintrm_w_s_op    = 0b0111001010011110001110,
++    vftintrm_l_d_op    = 0b0111001010011110001111,
++    vftintrp_w_s_op    = 0b0111001010011110010000,
++    vftintrp_l_d_op    = 0b0111001010011110010001,
++    vftintrz_w_s_op    = 0b0111001010011110010010,
++    vftintrz_l_d_op    = 0b0111001010011110010011,
++    vftintrne_w_s_op   = 0b0111001010011110010100,
++    vftintrne_l_d_op   = 0b0111001010011110010101,
++    vftint_wu_s        = 0b0111001010011110010110,
++    vftint_lu_d        = 0b0111001010011110010111,
++    vftintrz_wu_f      = 0b0111001010011110011100,
++    vftintrz_lu_d      = 0b0111001010011110011101,
++    vftintl_l_s_op     = 0b0111001010011110100000,
++    vftinth_l_s_op     = 0b0111001010011110100001,
++    vftintrml_l_s_op   = 0b0111001010011110100010,
++    vftintrmh_l_s_op   = 0b0111001010011110100011,
++    vftintrpl_l_s_op   = 0b0111001010011110100100,
++    vftintrph_l_s_op   = 0b0111001010011110100101,
++    vftintrzl_l_s_op   = 0b0111001010011110100110,
++    vftintrzh_l_s_op   = 0b0111001010011110100111,
++    vftintrnel_l_s_op  = 0b0111001010011110101000,
++    vftintrneh_l_s_op  = 0b0111001010011110101001,
++    vreplgr2vr_b_op    = 0b0111001010011111000000,
++    vreplgr2vr_h_op    = 0b0111001010011111000001,
++    vreplgr2vr_w_op    = 0b0111001010011111000010,
++    vreplgr2vr_d_op    = 0b0111001010011111000011,
++    xvpcnt_b_op        = 0b0111011010011100001000,
++    xvpcnt_h_op        = 0b0111011010011100001001,
++    xvpcnt_w_op        = 0b0111011010011100001010,
++    xvpcnt_d_op        = 0b0111011010011100001011,
++    xvneg_b_op         = 0b0111011010011100001100,
++    xvneg_h_op         = 0b0111011010011100001101,
++    xvneg_w_op         = 0b0111011010011100001110,
++    xvneg_d_op         = 0b0111011010011100001111,
++    xvfclass_s_op      = 0b0111011010011100110101,
++    xvfclass_d_op      = 0b0111011010011100110110,
++    xvfsqrt_s_op       = 0b0111011010011100111001,
++    xvfsqrt_d_op       = 0b0111011010011100111010,
++    xvfrint_s_op       = 0b0111011010011101001101,
++    xvfrint_d_op       = 0b0111011010011101001110,
++    xvfrintrm_s_op     = 0b0111011010011101010001,
++    xvfrintrm_d_op     = 0b0111011010011101010010,
++    xvfrintrp_s_op     = 0b0111011010011101010101,
++    xvfrintrp_d_op     = 0b0111011010011101010110,
++    xvfrintrz_s_op     = 0b0111011010011101011001,
++    xvfrintrz_d_op     = 0b0111011010011101011010,
++    xvfrintrne_s_op    = 0b0111011010011101011101,
++    xvfrintrne_d_op    = 0b0111011010011101011110,
++    xvfcvtl_s_h_op     = 0b0111011010011101111010,
++    xvfcvth_s_h_op     = 0b0111011010011101111011,
++    xvfcvtl_d_s_op     = 0b0111011010011101111100,
++    xvfcvth_d_s_op     = 0b0111011010011101111101,
++    xvffint_s_w_op     = 0b0111011010011110000000,
++    xvffint_s_wu_op    = 0b0111011010011110000001,
++    xvffint_d_l_op     = 0b0111011010011110000010,
++    xvffint_d_lu_op    = 0b0111011010011110000011,
++    xvffintl_d_w_op    = 0b0111011010011110000100,
++    xvffinth_d_w_op    = 0b0111011010011110000101,
++    xvftint_w_s_op     = 0b0111011010011110001100,
++    xvftint_l_d_op     = 0b0111011010011110001101,
++    xvftintrm_w_s_op   = 0b0111011010011110001110,
++    xvftintrm_l_d_op   = 0b0111011010011110001111,
++    xvftintrp_w_s_op   = 0b0111011010011110010000,
++    xvftintrp_l_d_op   = 0b0111011010011110010001,
++    xvftintrz_w_s_op   = 0b0111011010011110010010,
++    xvftintrz_l_d_op   = 0b0111011010011110010011,
++    xvftintrne_w_s_op  = 0b0111011010011110010100,
++    xvftintrne_l_d_op  = 0b0111011010011110010101,
++    xvftint_wu_s       = 0b0111011010011110010110,
++    xvftint_lu_d       = 0b0111011010011110010111,
++    xvftintrz_wu_f     = 0b0111011010011110011100,
++    xvftintrz_lu_d     = 0b0111011010011110011101,
++    xvftintl_l_s_op    = 0b0111011010011110100000,
++    xvftinth_l_s_op    = 0b0111011010011110100001,
++    xvftintrml_l_s_op  = 0b0111011010011110100010,
++    xvftintrmh_l_s_op  = 0b0111011010011110100011,
++    xvftintrpl_l_s_op  = 0b0111011010011110100100,
++    xvftintrph_l_s_op  = 0b0111011010011110100101,
++    xvftintrzl_l_s_op  = 0b0111011010011110100110,
++    xvftintrzh_l_s_op  = 0b0111011010011110100111,
++    xvftintrnel_l_s_op = 0b0111011010011110101000,
++    xvftintrneh_l_s_op = 0b0111011010011110101001,
++    xvreplgr2vr_b_op   = 0b0111011010011111000000,
++    xvreplgr2vr_h_op   = 0b0111011010011111000001,
++    xvreplgr2vr_w_op   = 0b0111011010011111000010,
++    xvreplgr2vr_d_op   = 0b0111011010011111000011,
++    vext2xv_h_b_op     = 0b0111011010011111000100,
++    vext2xv_w_b_op     = 0b0111011010011111000101,
++    vext2xv_d_b_op     = 0b0111011010011111000110,
++    vext2xv_w_h_op     = 0b0111011010011111000111,
++    vext2xv_d_h_op     = 0b0111011010011111001000,
++    vext2xv_d_w_op     = 0b0111011010011111001001,
++    vext2xv_hu_bu_op   = 0b0111011010011111001010,
++    vext2xv_wu_bu_op   = 0b0111011010011111001011,
++    vext2xv_du_bu_op   = 0b0111011010011111001100,
++    vext2xv_wu_hu_op   = 0b0111011010011111001101,
++    vext2xv_du_hu_op   = 0b0111011010011111001110,
++    vext2xv_du_wu_op   = 0b0111011010011111001111,
++    xvreplve0_b_op     = 0b0111011100000111000000,
++    xvreplve0_h_op     = 0b0111011100000111100000,
++    xvreplve0_w_op     = 0b0111011100000111110000,
++    xvreplve0_d_op     = 0b0111011100000111111000,
++    xvreplve0_q_op     = 0b0111011100000111111100,
++
++    unknow_ops22       = 0b1111111111111111111111
++  };
++
++  // 21-bit opcode, highest 21 bits: bits[31...11]
++  enum ops21 {
++    vinsgr2vr_d_op     = 0b011100101110101111110,
++    vpickve2gr_d_op    = 0b011100101110111111110,
++    vpickve2gr_du_op   = 0b011100101111001111110,
++    vreplvei_d_op      = 0b011100101111011111110,
++
++    unknow_ops21       = 0b111111111111111111111
++  };
++
++  // 20-bit opcode, highest 20 bits: bits[31...12]
++  enum ops20 {
++    vinsgr2vr_w_op     = 0b01110010111010111110,
++    vpickve2gr_w_op    = 0b01110010111011111110,
++    vpickve2gr_wu_op   = 0b01110010111100111110,
++    vreplvei_w_op      = 0b01110010111101111110,
++    xvinsgr2vr_d_op    = 0b01110110111010111110,
++    xvpickve2gr_d_op   = 0b01110110111011111110,
++    xvpickve2gr_du_op  = 0b01110110111100111110,
++    xvinsve0_d_op      = 0b01110110111111111110,
++    xvpickve_d_op      = 0b01110111000000111110,
++
++    unknow_ops20       = 0b11111111111111111111
++  };
++
++  // 19-bit opcode, highest 19 bits: bits[31...13]
++  enum ops19 {
++    vrotri_b_op        = 0b0111001010100000001,
++    vinsgr2vr_h_op     = 0b0111001011101011110,
++    vpickve2gr_h_op    = 0b0111001011101111110,
++    vpickve2gr_hu_op   = 0b0111001011110011110,
++    vreplvei_h_op      = 0b0111001011110111110,
++    vbitclri_b_op      = 0b0111001100010000001,
++    vbitseti_b_op      = 0b0111001100010100001,
++    vbitrevi_b_op      = 0b0111001100011000001,
++    vslli_b_op         = 0b0111001100101100001,
++    vsrli_b_op         = 0b0111001100110000001,
++    vsrai_b_op         = 0b0111001100110100001,
++    xvrotri_b_op       = 0b0111011010100000001,
++    xvinsgr2vr_w_op    = 0b0111011011101011110,
++    xvpickve2gr_w_op   = 0b0111011011101111110,
++    xvpickve2gr_wu_op  = 0b0111011011110011110,
++    xvinsve0_w_op      = 0b0111011011111111110,
++    xvpickve_w_op      = 0b0111011100000011110,
++    xvbitclri_b_op     = 0b0111011100010000001,
++    xvbitseti_b_op     = 0b0111011100010100001,
++    xvbitrevi_b_op     = 0b0111011100011000001,
++    xvslli_b_op        = 0b0111011100101100001,
++    xvsrli_b_op        = 0b0111011100110000001,
++    xvsrai_b_op        = 0b0111011100110100001,
++
++    unknow_ops19       = 0b1111111111111111111
++  };
++
++  // 18-bit opcode, highest 18 bits: bits[31...14]
++  enum ops18 {
++    vrotri_h_op        = 0b011100101010000001,
++    vinsgr2vr_b_op     = 0b011100101110101110,
++    vpickve2gr_b_op    = 0b011100101110111110,
++    vpickve2gr_bu_op   = 0b011100101111001110,
++    vreplvei_b_op      = 0b011100101111011110,
++    vbitclri_h_op      = 0b011100110001000001,
++    vbitseti_h_op      = 0b011100110001010001,
++    vbitrevi_h_op      = 0b011100110001100001,
++    vslli_h_op         = 0b011100110010110001,
++    vsrli_h_op         = 0b011100110011000001,
++    vsrai_h_op         = 0b011100110011010001,
++    vsrlni_b_h_op      = 0b011100110100000001,
++    xvrotri_h_op       = 0b011101101010000001,
++    xvbitclri_h_op     = 0b011101110001000001,
++    xvbitseti_h_op     = 0b011101110001010001,
++    xvbitrevi_h_op     = 0b011101110001100001,
++    xvslli_h_op        = 0b011101110010110001,
++    xvsrli_h_op        = 0b011101110011000001,
++    xvsrai_h_op        = 0b011101110011010001,
++
++    unknow_ops18       = 0b111111111111111111
++  };
++
++  // 17-bit opcode, highest 17 bits: bits[31...15]
++  enum ops17 {
++    asrtle_d_op        = 0b00000000000000010,
++    asrtgt_d_op        = 0b00000000000000011,
++    add_w_op           = 0b00000000000100000,
++    add_d_op           = 0b00000000000100001,
++    sub_w_op           = 0b00000000000100010,
++    sub_d_op           = 0b00000000000100011,
++    slt_op             = 0b00000000000100100,
++    sltu_op            = 0b00000000000100101,
++    maskeqz_op         = 0b00000000000100110,
++    masknez_op         = 0b00000000000100111,
++    nor_op             = 0b00000000000101000,
++    and_op             = 0b00000000000101001,
++    or_op              = 0b00000000000101010,
++    xor_op             = 0b00000000000101011,
++    orn_op             = 0b00000000000101100,
++    andn_op            = 0b00000000000101101,
++    sll_w_op           = 0b00000000000101110,
++    srl_w_op           = 0b00000000000101111,
++    sra_w_op           = 0b00000000000110000,
++    sll_d_op           = 0b00000000000110001,
++    srl_d_op           = 0b00000000000110010,
++    sra_d_op           = 0b00000000000110011,
++    rotr_w_op          = 0b00000000000110110,
++    rotr_d_op          = 0b00000000000110111,
++    mul_w_op           = 0b00000000000111000,
++    mulh_w_op          = 0b00000000000111001,
++    mulh_wu_op         = 0b00000000000111010,
++    mul_d_op           = 0b00000000000111011,
++    mulh_d_op          = 0b00000000000111100,
++    mulh_du_op         = 0b00000000000111101,
++    mulw_d_w_op        = 0b00000000000111110,
++    mulw_d_wu_op       = 0b00000000000111111,
++    div_w_op           = 0b00000000001000000,
++    mod_w_op           = 0b00000000001000001,
++    div_wu_op          = 0b00000000001000010,
++    mod_wu_op          = 0b00000000001000011,
++    div_d_op           = 0b00000000001000100,
++    mod_d_op           = 0b00000000001000101,
++    div_du_op          = 0b00000000001000110,
++    mod_du_op          = 0b00000000001000111,
++    crc_w_b_w_op       = 0b00000000001001000,
++    crc_w_h_w_op       = 0b00000000001001001,
++    crc_w_w_w_op       = 0b00000000001001010,
++    crc_w_d_w_op       = 0b00000000001001011,
++    crcc_w_b_w_op      = 0b00000000001001100,
++    crcc_w_h_w_op      = 0b00000000001001101,
++    crcc_w_w_w_op      = 0b00000000001001110,
++    crcc_w_d_w_op      = 0b00000000001001111,
++    break_op           = 0b00000000001010100,
++    fadd_s_op          = 0b00000001000000001,
++    fadd_d_op          = 0b00000001000000010,
++    fsub_s_op          = 0b00000001000000101,
++    fsub_d_op          = 0b00000001000000110,
++    fmul_s_op          = 0b00000001000001001,
++    fmul_d_op          = 0b00000001000001010,
++    fdiv_s_op          = 0b00000001000001101,
++    fdiv_d_op          = 0b00000001000001110,
++    fmax_s_op          = 0b00000001000010001,
++    fmax_d_op          = 0b00000001000010010,
++    fmin_s_op          = 0b00000001000010101,
++    fmin_d_op          = 0b00000001000010110,
++    fmaxa_s_op         = 0b00000001000011001,
++    fmaxa_d_op         = 0b00000001000011010,
++    fmina_s_op         = 0b00000001000011101,
++    fmina_d_op         = 0b00000001000011110,
++    fscaleb_s_op       = 0b00000001000100001,
++    fscaleb_d_op       = 0b00000001000100010,
++    fcopysign_s_op     = 0b00000001000100101,
++    fcopysign_d_op     = 0b00000001000100110,
++    ldx_b_op           = 0b00111000000000000,
++    ldx_h_op           = 0b00111000000001000,
++    ldx_w_op           = 0b00111000000010000,
++    ldx_d_op           = 0b00111000000011000,
++    stx_b_op           = 0b00111000000100000,
++    stx_h_op           = 0b00111000000101000,
++    stx_w_op           = 0b00111000000110000,
++    stx_d_op           = 0b00111000000111000,
++    ldx_bu_op          = 0b00111000001000000,
++    ldx_hu_op          = 0b00111000001001000,
++    ldx_wu_op          = 0b00111000001010000,
++    fldx_s_op          = 0b00111000001100000,
++    fldx_d_op          = 0b00111000001101000,
++    fstx_s_op          = 0b00111000001110000,
++    fstx_d_op          = 0b00111000001111000,
++    vldx_op            = 0b00111000010000000,
++    vstx_op            = 0b00111000010001000,
++    xvldx_op           = 0b00111000010010000,
++    xvstx_op           = 0b00111000010011000,
++    amswap_w_op        = 0b00111000011000000,
++    amswap_d_op        = 0b00111000011000001,
++    amadd_w_op         = 0b00111000011000010,
++    amadd_d_op         = 0b00111000011000011,
++    amand_w_op         = 0b00111000011000100,
++    amand_d_op         = 0b00111000011000101,
++    amor_w_op          = 0b00111000011000110,
++    amor_d_op          = 0b00111000011000111,
++    amxor_w_op         = 0b00111000011001000,
++    amxor_d_op         = 0b00111000011001001,
++    ammax_w_op         = 0b00111000011001010,
++    ammax_d_op         = 0b00111000011001011,
++    ammin_w_op         = 0b00111000011001100,
++    ammin_d_op         = 0b00111000011001101,
++    ammax_wu_op        = 0b00111000011001110,
++    ammax_du_op        = 0b00111000011001111,
++    ammin_wu_op        = 0b00111000011010000,
++    ammin_du_op        = 0b00111000011010001,
++    amswap_db_w_op     = 0b00111000011010010,
++    amswap_db_d_op     = 0b00111000011010011,
++    amadd_db_w_op      = 0b00111000011010100,
++    amadd_db_d_op      = 0b00111000011010101,
++    amand_db_w_op      = 0b00111000011010110,
++    amand_db_d_op      = 0b00111000011010111,
++    amor_db_w_op       = 0b00111000011011000,
++    amor_db_d_op       = 0b00111000011011001,
++    amxor_db_w_op      = 0b00111000011011010,
++    amxor_db_d_op      = 0b00111000011011011,
++    ammax_db_w_op      = 0b00111000011011100,
++    ammax_db_d_op      = 0b00111000011011101,
++    ammin_db_w_op      = 0b00111000011011110,
++    ammin_db_d_op      = 0b00111000011011111,
++    ammax_db_wu_op     = 0b00111000011100000,
++    ammax_db_du_op     = 0b00111000011100001,
++    ammin_db_wu_op     = 0b00111000011100010,
++    ammin_db_du_op     = 0b00111000011100011,
++    dbar_op            = 0b00111000011100100,
++    ibar_op            = 0b00111000011100101,
++    fldgt_s_op         = 0b00111000011101000,
++    fldgt_d_op         = 0b00111000011101001,
++    fldle_s_op         = 0b00111000011101010,
++    fldle_d_op         = 0b00111000011101011,
++    fstgt_s_op         = 0b00111000011101100,
++    fstgt_d_op         = 0b00111000011101101,
++    fstle_s_op         = 0b00111000011101110,
++    fstle_d_op         = 0b00111000011101111,
++    ldgt_b_op          = 0b00111000011110000,
++    ldgt_h_op          = 0b00111000011110001,
++    ldgt_w_op          = 0b00111000011110010,
++    ldgt_d_op          = 0b00111000011110011,
++    ldle_b_op          = 0b00111000011110100,
++    ldle_h_op          = 0b00111000011110101,
++    ldle_w_op          = 0b00111000011110110,
++    ldle_d_op          = 0b00111000011110111,
++    stgt_b_op          = 0b00111000011111000,
++    stgt_h_op          = 0b00111000011111001,
++    stgt_w_op          = 0b00111000011111010,
++    stgt_d_op          = 0b00111000011111011,
++    stle_b_op          = 0b00111000011111100,
++    stle_h_op          = 0b00111000011111101,
++    stle_w_op          = 0b00111000011111110,
++    stle_d_op          = 0b00111000011111111,
++    vseq_b_op          = 0b01110000000000000,
++    vseq_h_op          = 0b01110000000000001,
++    vseq_w_op          = 0b01110000000000010,
++    vseq_d_op          = 0b01110000000000011,
++    vsle_b_op          = 0b01110000000000100,
++    vsle_h_op          = 0b01110000000000101,
++    vsle_w_op          = 0b01110000000000110,
++    vsle_d_op          = 0b01110000000000111,
++    vsle_bu_op         = 0b01110000000001000,
++    vsle_hu_op         = 0b01110000000001001,
++    vsle_wu_op         = 0b01110000000001010,
++    vsle_du_op         = 0b01110000000001011,
++    vslt_b_op          = 0b01110000000001100,
++    vslt_h_op          = 0b01110000000001101,
++    vslt_w_op          = 0b01110000000001110,
++    vslt_d_op          = 0b01110000000001111,
++    vslt_bu_op         = 0b01110000000010000,
++    vslt_hu_op         = 0b01110000000010001,
++    vslt_wu_op         = 0b01110000000010010,
++    vslt_du_op         = 0b01110000000010011,
++    vadd_b_op          = 0b01110000000010100,
++    vadd_h_op          = 0b01110000000010101,
++    vadd_w_op          = 0b01110000000010110,
++    vadd_d_op          = 0b01110000000010111,
++    vsub_b_op          = 0b01110000000011000,
++    vsub_h_op          = 0b01110000000011001,
++    vsub_w_op          = 0b01110000000011010,
++    vsub_d_op          = 0b01110000000011011,
++    vabsd_b_op         = 0b01110000011000000,
++    vabsd_h_op         = 0b01110000011000001,
++    vabsd_w_op         = 0b01110000011000010,
++    vabsd_d_op         = 0b01110000011000011,
++    vmax_b_op          = 0b01110000011100000,
++    vmax_h_op          = 0b01110000011100001,
++    vmax_w_op          = 0b01110000011100010,
++    vmax_d_op          = 0b01110000011100011,
++    vmin_b_op          = 0b01110000011100100,
++    vmin_h_op          = 0b01110000011100101,
++    vmin_w_op          = 0b01110000011100110,
++    vmin_d_op          = 0b01110000011100111,
++    vmul_b_op          = 0b01110000100001000,
++    vmul_h_op          = 0b01110000100001001,
++    vmul_w_op          = 0b01110000100001010,
++    vmul_d_op          = 0b01110000100001011,
++    vmuh_b_op          = 0b01110000100001100,
++    vmuh_h_op          = 0b01110000100001101,
++    vmuh_w_op          = 0b01110000100001110,
++    vmuh_d_op          = 0b01110000100001111,
++    vmuh_bu_op         = 0b01110000100010000,
++    vmuh_hu_op         = 0b01110000100010001,
++    vmuh_wu_op         = 0b01110000100010010,
++    vmuh_du_op         = 0b01110000100010011,
++    vmulwev_h_b_op     = 0b01110000100100000,
++    vmulwev_w_h_op     = 0b01110000100100001,
++    vmulwev_d_w_op     = 0b01110000100100010,
++    vmulwev_q_d_op     = 0b01110000100100011,
++    vmulwod_h_b_op     = 0b01110000100100100,
++    vmulwod_w_h_op     = 0b01110000100100101,
++    vmulwod_d_w_op     = 0b01110000100100110,
++    vmulwod_q_d_op     = 0b01110000100100111,
++    vmadd_b_op         = 0b01110000101010000,
++    vmadd_h_op         = 0b01110000101010001,
++    vmadd_w_op         = 0b01110000101010010,
++    vmadd_d_op         = 0b01110000101010011,
++    vmsub_b_op         = 0b01110000101010100,
++    vmsub_h_op         = 0b01110000101010101,
++    vmsub_w_op         = 0b01110000101010110,
++    vmsub_d_op         = 0b01110000101010111,
++    vsll_b_op          = 0b01110000111010000,
++    vsll_h_op          = 0b01110000111010001,
++    vsll_w_op          = 0b01110000111010010,
++    vsll_d_op          = 0b01110000111010011,
++    vsrl_b_op          = 0b01110000111010100,
++    vsrl_h_op          = 0b01110000111010101,
++    vsrl_w_op          = 0b01110000111010110,
++    vsrl_d_op          = 0b01110000111010111,
++    vsra_b_op          = 0b01110000111011000,
++    vsra_h_op          = 0b01110000111011001,
++    vsra_w_op          = 0b01110000111011010,
++    vsra_d_op          = 0b01110000111011011,
++    vrotr_b_op         = 0b01110000111011100,
++    vrotr_h_op         = 0b01110000111011101,
++    vrotr_w_op         = 0b01110000111011110,
++    vrotr_d_op         = 0b01110000111011111,
++    vbitclr_b_op       = 0b01110001000011000,
++    vbitclr_h_op       = 0b01110001000011001,
++    vbitclr_w_op       = 0b01110001000011010,
++    vbitclr_d_op       = 0b01110001000011011,
++    vbitset_b_op       = 0b01110001000011100,
++    vbitset_h_op       = 0b01110001000011101,
++    vbitset_w_op       = 0b01110001000011110,
++    vbitset_d_op       = 0b01110001000011111,
++    vbitrev_b_op       = 0b01110001000100000,
++    vbitrev_h_op       = 0b01110001000100001,
++    vbitrev_w_op       = 0b01110001000100010,
++    vbitrev_d_op       = 0b01110001000100011,
++    vand_v_op          = 0b01110001001001100,
++    vor_v_op           = 0b01110001001001101,
++    vxor_v_op          = 0b01110001001001110,
++    vnor_v_op          = 0b01110001001001111,
++    vandn_v_op         = 0b01110001001010000,
++    vorn_v_op          = 0b01110001001010001,
++    vadd_q_op          = 0b01110001001011010,
++    vsub_q_op          = 0b01110001001011011,
++    vfadd_s_op         = 0b01110001001100001,
++    vfadd_d_op         = 0b01110001001100010,
++    vfsub_s_op         = 0b01110001001100101,
++    vfsub_d_op         = 0b01110001001100110,
++    vfmul_s_op         = 0b01110001001110001,
++    vfmul_d_op         = 0b01110001001110010,
++    vfdiv_s_op         = 0b01110001001110101,
++    vfdiv_d_op         = 0b01110001001110110,
++    vfmax_s_op         = 0b01110001001111001,
++    vfmax_d_op         = 0b01110001001111010,
++    vfmin_s_op         = 0b01110001001111101,
++    vfmin_d_op         = 0b01110001001111110,
++    vfcvt_h_s_op       = 0b01110001010001100,
++    vfcvt_s_d_op       = 0b01110001010001101,
++    vffint_s_l_op      = 0b01110001010010000,
++    vftint_w_d_op      = 0b01110001010010011,
++    vftintrm_w_d_op    = 0b01110001010010100,
++    vftintrp_w_d_op    = 0b01110001010010101,
++    vftintrz_w_d_op    = 0b01110001010010110,
++    vftintrne_w_d_op   = 0b01110001010010111,
++    vshuf_h_op         = 0b01110001011110101,
++    vshuf_w_op         = 0b01110001011110110,
++    vshuf_d_op         = 0b01110001011110111,
++    vslti_bu_op        = 0b01110010100010000,
++    vslti_hu_op        = 0b01110010100010001,
++    vslti_wu_op        = 0b01110010100010010,
++    vslti_du_op        = 0b01110010100010011,
++    vaddi_bu_op        = 0b01110010100010100,
++    vaddi_hu_op        = 0b01110010100010101,
++    vaddi_wu_op        = 0b01110010100010110,
++    vaddi_du_op        = 0b01110010100010111,
++    vsubi_bu_op        = 0b01110010100011000,
++    vsubi_hu_op        = 0b01110010100011001,
++    vsubi_wu_op        = 0b01110010100011010,
++    vsubi_du_op        = 0b01110010100011011,
++    vrotri_w_op        = 0b01110010101000001,
++    vbitclri_w_op      = 0b01110011000100001,
++    vbitseti_w_op      = 0b01110011000101001,
++    vbitrevi_w_op      = 0b01110011000110001,
++    vslli_w_op         = 0b01110011001011001,
++    vsrli_w_op         = 0b01110011001100001,
++    vsrai_w_op         = 0b01110011001101001,
++    vsrlni_h_w_op      = 0b01110011010000001,
++    xvseq_b_op         = 0b01110100000000000,
++    xvseq_h_op         = 0b01110100000000001,
++    xvseq_w_op         = 0b01110100000000010,
++    xvseq_d_op         = 0b01110100000000011,
++    xvsle_b_op         = 0b01110100000000100,
++    xvsle_h_op         = 0b01110100000000101,
++    xvsle_w_op         = 0b01110100000000110,
++    xvsle_d_op         = 0b01110100000000111,
++    xvsle_bu_op        = 0b01110100000001000,
++    xvsle_hu_op        = 0b01110100000001001,
++    xvsle_wu_op        = 0b01110100000001010,
++    xvsle_du_op        = 0b01110100000001011,
++    xvslt_b_op         = 0b01110100000001100,
++    xvslt_h_op         = 0b01110100000001101,
++    xvslt_w_op         = 0b01110100000001110,
++    xvslt_d_op         = 0b01110100000001111,
++    xvslt_bu_op        = 0b01110100000010000,
++    xvslt_hu_op        = 0b01110100000010001,
++    xvslt_wu_op        = 0b01110100000010010,
++    xvslt_du_op        = 0b01110100000010011,
++    xvadd_b_op         = 0b01110100000010100,
++    xvadd_h_op         = 0b01110100000010101,
++    xvadd_w_op         = 0b01110100000010110,
++    xvadd_d_op         = 0b01110100000010111,
++    xvsub_b_op         = 0b01110100000011000,
++    xvsub_h_op         = 0b01110100000011001,
++    xvsub_w_op         = 0b01110100000011010,
++    xvsub_d_op         = 0b01110100000011011,
++    xvabsd_b_op        = 0b01110100011000000,
++    xvabsd_h_op        = 0b01110100011000001,
++    xvabsd_w_op        = 0b01110100011000010,
++    xvabsd_d_op        = 0b01110100011000011,
++    xvmax_b_op         = 0b01110100011100000,
++    xvmax_h_op         = 0b01110100011100001,
++    xvmax_w_op         = 0b01110100011100010,
++    xvmax_d_op         = 0b01110100011100011,
++    xvmin_b_op         = 0b01110100011100100,
++    xvmin_h_op         = 0b01110100011100101,
++    xvmin_w_op         = 0b01110100011100110,
++    xvmin_d_op         = 0b01110100011100111,
++    xvmul_b_op         = 0b01110100100001000,
++    xvmul_h_op         = 0b01110100100001001,
++    xvmul_w_op         = 0b01110100100001010,
++    xvmul_d_op         = 0b01110100100001011,
++    xvmuh_b_op         = 0b01110100100001100,
++    xvmuh_h_op         = 0b01110100100001101,
++    xvmuh_w_op         = 0b01110100100001110,
++    xvmuh_d_op         = 0b01110100100001111,
++    xvmuh_bu_op        = 0b01110100100010000,
++    xvmuh_hu_op        = 0b01110100100010001,
++    xvmuh_wu_op        = 0b01110100100010010,
++    xvmuh_du_op        = 0b01110100100010011,
++    xvmulwev_h_b_op    = 0b01110100100100000,
++    xvmulwev_w_h_op    = 0b01110100100100001,
++    xvmulwev_d_w_op    = 0b01110100100100010,
++    xvmulwev_q_d_op    = 0b01110100100100011,
++    xvmulwod_h_b_op    = 0b01110100100100100,
++    xvmulwod_w_h_op    = 0b01110100100100101,
++    xvmulwod_d_w_op    = 0b01110100100100110,
++    xvmulwod_q_d_op    = 0b01110100100100111,
++    xvmadd_b_op        = 0b01110100101010000,
++    xvmadd_h_op        = 0b01110100101010001,
++    xvmadd_w_op        = 0b01110100101010010,
++    xvmadd_d_op        = 0b01110100101010011,
++    xvmsub_b_op        = 0b01110100101010100,
++    xvmsub_h_op        = 0b01110100101010101,
++    xvmsub_w_op        = 0b01110100101010110,
++    xvmsub_d_op        = 0b01110100101010111,
++    xvsll_b_op         = 0b01110100111010000,
++    xvsll_h_op         = 0b01110100111010001,
++    xvsll_w_op         = 0b01110100111010010,
++    xvsll_d_op         = 0b01110100111010011,
++    xvsrl_b_op         = 0b01110100111010100,
++    xvsrl_h_op         = 0b01110100111010101,
++    xvsrl_w_op         = 0b01110100111010110,
++    xvsrl_d_op         = 0b01110100111010111,
++    xvsra_b_op         = 0b01110100111011000,
++    xvsra_h_op         = 0b01110100111011001,
++    xvsra_w_op         = 0b01110100111011010,
++    xvsra_d_op         = 0b01110100111011011,
++    xvrotr_b_op        = 0b01110100111011100,
++    xvrotr_h_op        = 0b01110100111011101,
++    xvrotr_w_op        = 0b01110100111011110,
++    xvrotr_d_op        = 0b01110100111011111,
++    xvbitclr_b_op      = 0b01110101000011000,
++    xvbitclr_h_op      = 0b01110101000011001,
++    xvbitclr_w_op      = 0b01110101000011010,
++    xvbitclr_d_op      = 0b01110101000011011,
++    xvbitset_b_op      = 0b01110101000011100,
++    xvbitset_h_op      = 0b01110101000011101,
++    xvbitset_w_op      = 0b01110101000011110,
++    xvbitset_d_op      = 0b01110101000011111,
++    xvbitrev_b_op      = 0b01110101000100000,
++    xvbitrev_h_op      = 0b01110101000100001,
++    xvbitrev_w_op      = 0b01110101000100010,
++    xvbitrev_d_op      = 0b01110101000100011,
++    xvand_v_op         = 0b01110101001001100,
++    xvor_v_op          = 0b01110101001001101,
++    xvxor_v_op         = 0b01110101001001110,
++    xvnor_v_op         = 0b01110101001001111,
++    xvandn_v_op        = 0b01110101001010000,
++    xvorn_v_op         = 0b01110101001010001,
++    xvadd_q_op         = 0b01110101001011010,
++    xvsub_q_op         = 0b01110101001011011,
++    xvfadd_s_op        = 0b01110101001100001,
++    xvfadd_d_op        = 0b01110101001100010,
++    xvfsub_s_op        = 0b01110101001100101,
++    xvfsub_d_op        = 0b01110101001100110,
++    xvfmul_s_op        = 0b01110101001110001,
++    xvfmul_d_op        = 0b01110101001110010,
++    xvfdiv_s_op        = 0b01110101001110101,
++    xvfdiv_d_op        = 0b01110101001110110,
++    xvfmax_s_op        = 0b01110101001111001,
++    xvfmax_d_op        = 0b01110101001111010,
++    xvfmin_s_op        = 0b01110101001111101,
++    xvfmin_d_op        = 0b01110101001111110,
++    xvfcvt_h_s_op      = 0b01110101010001100,
++    xvfcvt_s_d_op      = 0b01110101010001101,
++    xvffint_s_l_op     = 0b01110101010010000,
++    xvftint_w_d_op     = 0b01110101010010011,
++    xvftintrm_w_d_op   = 0b01110101010010100,
++    xvftintrp_w_d_op   = 0b01110101010010101,
++    xvftintrz_w_d_op   = 0b01110101010010110,
++    xvftintrne_w_d_op  = 0b01110101010010111,
++    xvshuf_h_op        = 0b01110101011110101,
++    xvshuf_w_op        = 0b01110101011110110,
++    xvshuf_d_op        = 0b01110101011110111,
++    xvperm_w_op        = 0b01110101011111010,
++    xvslti_bu_op       = 0b01110110100010000,
++    xvslti_hu_op       = 0b01110110100010001,
++    xvslti_wu_op       = 0b01110110100010010,
++    xvslti_du_op       = 0b01110110100010011,
++    xvaddi_bu_op       = 0b01110110100010100,
++    xvaddi_hu_op       = 0b01110110100010101,
++    xvaddi_wu_op       = 0b01110110100010110,
++    xvaddi_du_op       = 0b01110110100010111,
++    xvsubi_bu_op       = 0b01110110100011000,
++    xvsubi_hu_op       = 0b01110110100011001,
++    xvsubi_wu_op       = 0b01110110100011010,
++    xvsubi_du_op       = 0b01110110100011011,
++    xvrotri_w_op       = 0b01110110101000001,
++    xvbitclri_w_op     = 0b01110111000100001,
++    xvbitseti_w_op     = 0b01110111000101001,
++    xvbitrevi_w_op     = 0b01110111000110001,
++    xvslli_w_op        = 0b01110111001011001,
++    xvsrli_w_op        = 0b01110111001100001,
++    xvsrai_w_op        = 0b01110111001101001,
++
++    unknow_ops17       = 0b11111111111111111
++  };
++
++  // 16-bit opcode, highest 16 bits: bits[31...16]
++  enum ops16 {
++    vrotri_d_op        = 0b0111001010100001,
++    vbitclri_d_op      = 0b0111001100010001,
++    vbitseti_d_op      = 0b0111001100010101,
++    vbitrevi_d_op      = 0b0111001100011001,
++    vslli_d_op         = 0b0111001100101101,
++    vsrli_d_op         = 0b0111001100110001,
++    vsrai_d_op         = 0b0111001100110101,
++    vsrlni_w_d_op      = 0b0111001101000001,
++    xvrotri_d_op       = 0b0111011010100001,
++    xvbitclri_d_op     = 0b0111011100010001,
++    xvbitseti_d_op     = 0b0111011100010101,
++    xvbitrevi_d_op     = 0b0111011100011001,
++    xvslli_d_op        = 0b0111011100101101,
++    xvsrli_d_op        = 0b0111011100110001,
++    xvsrai_d_op        = 0b0111011100110101,
++
++    unknow_ops16       = 0b1111111111111111
++  };
++
++  // 15-bit opcode, highest 15 bits: bits[31...17]
++  enum ops15 {
++    vsrlni_d_q_op      = 0b011100110100001,
++
++    unknow_ops15       = 0b111111111111111
++  };
++
++  // 14-bit opcode, highest 14 bits: bits[31...18]
++  enum ops14 {
++    alsl_w_op          = 0b00000000000001,
++    bytepick_w_op      = 0b00000000000010,
++    bytepick_d_op      = 0b00000000000011,
++    alsl_d_op          = 0b00000000001011,
++    slli_op            = 0b00000000010000,
++    srli_op            = 0b00000000010001,
++    srai_op            = 0b00000000010010,
++    rotri_op           = 0b00000000010011,
++    lddir_op           = 0b00000110010000,
++    ldpte_op           = 0b00000110010001,
++    vshuf4i_b_op       = 0b01110011100100,
++    vshuf4i_h_op       = 0b01110011100101,
++    vshuf4i_w_op       = 0b01110011100110,
++    vshuf4i_d_op       = 0b01110011100111,
++    vandi_b_op         = 0b01110011110100,
++    vori_b_op          = 0b01110011110101,
++    vxori_b_op         = 0b01110011110110,
++    vnori_b_op         = 0b01110011110111,
++    vldi_op            = 0b01110011111000,
++    vpermi_w_op        = 0b01110011111001,
++    xvshuf4i_b_op      = 0b01110111100100,
++    xvshuf4i_h_op      = 0b01110111100101,
++    xvshuf4i_w_op      = 0b01110111100110,
++    xvshuf4i_d_op      = 0b01110111100111,
++    xvandi_b_op        = 0b01110111110100,
++    xvori_b_op         = 0b01110111110101,
++    xvxori_b_op        = 0b01110111110110,
++    xvnori_b_op        = 0b01110111110111,
++    xvldi_op           = 0b01110111111000,
++    xvpermi_w_op       = 0b01110111111001,
++    xvpermi_d_op       = 0b01110111111010,
++    xvpermi_q_op       = 0b01110111111011,
++
++    unknow_ops14       = 0b11111111111111
++  };
++
++  // 12-bit opcode, highest 12 bits: bits[31...20]
++  enum ops12 {
++    fmadd_s_op         = 0b000010000001,
++    fmadd_d_op         = 0b000010000010,
++    fmsub_s_op         = 0b000010000101,
++    fmsub_d_op         = 0b000010000110,
++    fnmadd_s_op        = 0b000010001001,
++    fnmadd_d_op        = 0b000010001010,
++    fnmsub_s_op        = 0b000010001101,
++    fnmsub_d_op        = 0b000010001110,
++    vfmadd_s_op        = 0b000010010001,
++    vfmadd_d_op        = 0b000010010010,
++    vfmsub_s_op        = 0b000010010101,
++    vfmsub_d_op        = 0b000010010110,
++    vfnmadd_s_op       = 0b000010011001,
++    vfnmadd_d_op       = 0b000010011010,
++    vfnmsub_s_op       = 0b000010011101,
++    vfnmsub_d_op       = 0b000010011110,
++    xvfmadd_s_op       = 0b000010100001,
++    xvfmadd_d_op       = 0b000010100010,
++    xvfmsub_s_op       = 0b000010100101,
++    xvfmsub_d_op       = 0b000010100110,
++    xvfnmadd_s_op      = 0b000010101001,
++    xvfnmadd_d_op      = 0b000010101010,
++    xvfnmsub_s_op      = 0b000010101101,
++    xvfnmsub_d_op      = 0b000010101110,
++    fcmp_cond_s_op     = 0b000011000001,
++    fcmp_cond_d_op     = 0b000011000010,
++    vfcmp_cond_s_op    = 0b000011000101,
++    vfcmp_cond_d_op    = 0b000011000110,
++    xvfcmp_cond_s_op   = 0b000011001001,
++    xvfcmp_cond_d_op   = 0b000011001010,
++    fsel_op            = 0b000011010000,
++    vbitsel_v_op       = 0b000011010001,
++    xvbitsel_v_op      = 0b000011010010,
++    vshuf_b_op         = 0b000011010101,
++    xvshuf_b_op        = 0b000011010110,
++
++    unknow_ops12       = 0b111111111111
++  };
++
++  // 10-bit opcode, highest 10 bits: bits[31...22]
++  enum ops10 {
++    bstr_w_op          = 0b0000000001,
++    bstrins_d_op       = 0b0000000010,
++    bstrpick_d_op      = 0b0000000011,
++    slti_op            = 0b0000001000,
++    sltui_op           = 0b0000001001,
++    addi_w_op          = 0b0000001010,
++    addi_d_op          = 0b0000001011,
++    lu52i_d_op         = 0b0000001100,
++    andi_op            = 0b0000001101,
++    ori_op             = 0b0000001110,
++    xori_op            = 0b0000001111,
++    ld_b_op            = 0b0010100000,
++    ld_h_op            = 0b0010100001,
++    ld_w_op            = 0b0010100010,
++    ld_d_op            = 0b0010100011,
++    st_b_op            = 0b0010100100,
++    st_h_op            = 0b0010100101,
++    st_w_op            = 0b0010100110,
++    st_d_op            = 0b0010100111,
++    ld_bu_op           = 0b0010101000,
++    ld_hu_op           = 0b0010101001,
++    ld_wu_op           = 0b0010101010,
++    preld_op           = 0b0010101011,
++    fld_s_op           = 0b0010101100,
++    fst_s_op           = 0b0010101101,
++    fld_d_op           = 0b0010101110,
++    fst_d_op           = 0b0010101111,
++    vld_op             = 0b0010110000,
++    vst_op             = 0b0010110001,
++    xvld_op            = 0b0010110010,
++    xvst_op            = 0b0010110011,
++    ldl_w_op           = 0b0010111000,
++    ldr_w_op           = 0b0010111001,
++
++    unknow_ops10       = 0b1111111111
++  };
++
++  // 8-bit opcode, highest 8 bits: bits[31...22]
++  enum ops8 {
++    ll_w_op            = 0b00100000,
++    sc_w_op            = 0b00100001,
++    ll_d_op            = 0b00100010,
++    sc_d_op            = 0b00100011,
++    ldptr_w_op         = 0b00100100,
++    stptr_w_op         = 0b00100101,
++    ldptr_d_op         = 0b00100110,
++    stptr_d_op         = 0b00100111,
++    csr_op             = 0b00000100,
++
++    unknow_ops8        = 0b11111111
++  };
++
++  // 7-bit opcode, highest 7 bits: bits[31...25]
++  enum ops7 {
++    lu12i_w_op         = 0b0001010,
++    lu32i_d_op         = 0b0001011,
++    pcaddi_op          = 0b0001100,
++    pcalau12i_op       = 0b0001101,
++    pcaddu12i_op       = 0b0001110,
++    pcaddu18i_op       = 0b0001111,
++
++    unknow_ops7        = 0b1111111
++  };
++
++  // 6-bit opcode, highest 6 bits: bits[31...25]
++  enum ops6 {
++    addu16i_d_op       = 0b000100,
++    beqz_op            = 0b010000,
++    bnez_op            = 0b010001,
++    bccondz_op         = 0b010010,
++    jirl_op            = 0b010011,
++    b_op               = 0b010100,
++    bl_op              = 0b010101,
++    beq_op             = 0b010110,
++    bne_op             = 0b010111,
++    blt_op             = 0b011000,
++    bge_op             = 0b011001,
++    bltu_op            = 0b011010,
++    bgeu_op            = 0b011011,
++
++    unknow_ops6        = 0b111111
++  };
++
++  enum fcmp_cond {
++    fcmp_caf           = 0x00,
++    fcmp_cun           = 0x08,
++    fcmp_ceq           = 0x04,
++    fcmp_cueq          = 0x0c,
++    fcmp_clt           = 0x02,
++    fcmp_cult          = 0x0a,
++    fcmp_cle           = 0x06,
++    fcmp_cule          = 0x0e,
++    fcmp_cne           = 0x10,
++    fcmp_cor           = 0x14,
++    fcmp_cune          = 0x18,
++    fcmp_saf           = 0x01,
++    fcmp_sun           = 0x09,
++    fcmp_seq           = 0x05,
++    fcmp_sueq          = 0x0d,
++    fcmp_slt           = 0x03,
++    fcmp_sult          = 0x0b,
++    fcmp_sle           = 0x07,
++    fcmp_sule          = 0x0f,
++    fcmp_sne           = 0x11,
++    fcmp_sor           = 0x15,
++    fcmp_sune          = 0x19
++  };
++
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++
++  static ALWAYSINLINE void patch(address a, int length, uint32_t val) {
++    guarantee(val < (1ULL << length), "Field too big for insn");
++    guarantee(length > 0, "length > 0");
++    unsigned target = *(unsigned *)a;
++    target = (target >> length) << length;
++    target |= val;
++    *(unsigned *)a = target;
++  }
++
++ protected:
++  // help methods for instruction ejection
++
++  // 2R-type
++  //  31                          10 9      5 4     0
++  // |   opcode                     |   rj   |  rd   |
++  static inline int insn_RR   (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; }
++
++  // 3R-type
++  //  31                    15 14 10 9      5 4     0
++  // |   opcode               |  rk |   rj   |  rd   |
++  static inline int insn_RRR  (int op, int rk, int rj, int rd)  { return (op<<15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 4R-type
++  //  31             20 19  15 14  10 9     5 4     0
++  // |   opcode        |  ra  |  rk |    rj  |  rd   |
++  static inline int insn_RRRR (int op, int ra,  int rk, int rj, int rd)  { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 2RI1-type
++  //  31                11     10    9      5 4     0
++  // |   opcode           |    I1   |    vj  |  rd   |
++  static inline int insn_I1RR (int op, int ui1, int vj, int rd)  { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; }
++
++  // 2RI2-type
++  //  31                12 11     10 9      5 4     0
++  // |   opcode           |    I2   |    vj  |  rd   |
++  static inline int insn_I2RR (int op, int ui2, int vj, int rd)  { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; }
++
++  // 2RI3-type
++  //  31                13 12     10 9      5 4     0
++  // |   opcode           |    I3   |    vj  |  vd   |
++  static inline int insn_I3RR (int op, int ui3, int vj, int vd)  { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; }
++
++  // 2RI4-type
++  //  31                14 13     10 9      5 4     0
++  // |   opcode           |    I4   |    vj  |  vd   |
++  static inline int insn_I4RR (int op, int ui4, int vj, int vd)  { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; }
++
++  // 2RI5-type
++  //  31                15 14     10 9      5 4     0
++  // |   opcode           |    I5   |    vj  |  vd   |
++  static inline int insn_I5RR (int op, int ui5, int vj, int vd)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; }
++
++  // 2RI6-type
++  //  31                16 15     10 9      5 4     0
++  // |   opcode           |    I6   |    vj  |  vd   |
++  static inline int insn_I6RR (int op, int ui6, int vj, int vd)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI7-type
++  //  31                17 16     10 9      5 4     0
++  // |   opcode           |    I7   |    vj  |  vd   |
++  static inline int insn_I7RR (int op, int ui7, int vj, int vd)  { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI8-type
++  //  31                18 17     10 9      5 4     0
++  // |   opcode           |    I8   |    rj  |  rd   |
++  static inline int insn_I8RR (int op, int imm8, int rj, int rd)  { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; }
++
++  // 2RI12-type
++  //  31           22 21          10 9      5 4     0
++  // |   opcode      |     I12      |    rj  |  rd   |
++  static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/  return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; }
++
++  // 2RI14-type
++  //  31         24 23            10 9      5 4     0
++  // |   opcode    |      I14       |    rj  |  rd   |
++  static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; }
++
++  // 2RI16-type
++  //  31       26 25              10 9      5 4     0
++  // |   opcode  |       I16        |    rj  |  rd   |
++  static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; }
++
++  // 1RI13-type (?)
++  //  31        18 17                      5 4     0
++  // |   opcode   |               I13        |  vd   |
++  static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; }
++
++  // 1RI20-type (?)
++  //  31        25 24                      5 4     0
++  // |   opcode   |               I20        |  rd   |
++  static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; }
++
++  // 1RI21-type
++  //  31       26 25              10 9     5 4        0
++  // |   opcode  |     I21[15:0]    |   rj   |I21[20:16]|
++  static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); }
++
++  // I26-type
++  //  31       26 25              10 9               0
++  // |   opcode  |     I26[15:0]    |    I26[25:16]   |
++  static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); }
++
++  // imm15
++  //  31                    15 14                    0
++  // |         opcode         |          I15          |
++  static inline int insn_I15  (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); }
++
++
++  // get the offset field of beq, bne, blt[u], bge[u] instruction
++  int offset16(address entry) {
++    assert(is_simm16((entry - pc()) / 4), "change this code");
++    if (!is_simm16((entry - pc()) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of beqz, bnez instruction
++  int offset21(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 21), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 21)) {
++      tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of b instruction
++  int offset26(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 26), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 26)) {
++      tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  static int split_low16(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high16(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int split_low20(int x) {
++    return (x & 0xfffff);
++  }
++
++  // Convert 20-bit x to a sign-extended 20-bit integer
++  static int simm20(int x) {
++    assert(x == (x & 0xFFFFF), "must be 20-bit only");
++    return (x << 12) >> 12;
++  }
++
++  static int split_low12(int x) {
++    return (x & 0xfff);
++  }
++
++  static inline void split_simm32(jlong si32, jint& si12, jint& si20) {
++    si12 = ((jint)(si32 & 0xfff) << 20) >> 20;
++    si32 += (si32 & 0x800) << 1;
++    si20 = si32 >> 12;
++  }
++
++  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
++    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
++    si38 += (si38 & 0x20000) << 1;
++    si20 = si38 >> 18;
++  }
++
++  // Convert 12-bit x to a sign-extended 12-bit integer
++  static int simm12(int x) {
++    assert(x == (x & 0xFFF), "must be 12-bit only");
++    return (x << 20) >> 20;
++  }
++
++  // Convert 26-bit x to a sign-extended 26-bit integer
++  static int simm26(int x) {
++    assert(x == (x & 0x3FFFFFF), "must be 26-bit only");
++    return (x << 6) >> 6;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12) {
++    //lu12i, ori
++    return (((x12 << 12) | x0) << 32) >> 32;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) {
++    //lu32i, lu12i, ori
++    return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) {
++    //lu52i, lu32i, lu12i, ori
++    return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++public:
++
++  void flush() {
++    AbstractAssembler::flush();
++  }
++
++  inline void emit_int32(int x) {
++    AbstractAssembler::emit_int32(x);
++  }
++
++  inline void emit_data(int x) { emit_int32(x); }
++  inline void emit_data(int x, relocInfo::relocType rtype) {
++    relocate(rtype);
++    emit_int32(x);
++  }
++
++  inline void emit_data(int x, RelocationHolder const& rspec) {
++    relocate(rspec);
++    emit_int32(x);
++  }
++
++  inline void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0) {
++    if (rtype == relocInfo::none) {
++      emit_int64(data);
++    } else {
++      emit_data64(data, Relocation::spec_simple(rtype), format);
++    }
++  }
++
++  inline void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0) {
++    assert(inst_mark() != NULL, "must be inside InstructionMark");
++    // Do not use AbstractAssembler::relocate, which is not intended for
++    // embedded words.  Instead, relocate to the enclosing instruction.
++    code_section()->relocate(inst_mark(), rspec, format);
++    emit_int64(data);
++  }
++
++  //---<  calculate length of instruction  >---
++  // With LoongArch being a RISC architecture, this always is BytesPerInstWord
++  // instruction must start at passed address
++  static unsigned int instr_len(unsigned char *instr) { return BytesPerInstWord; }
++
++  //---<  longest instructions  >---
++  static unsigned int instr_maxlen() { return BytesPerInstWord; }
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void clo_w  (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_w  (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_w  (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_w  (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clo_d  (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_d  (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_d  (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_d  (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op,  (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op,  (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtime_d(Register rd, Register rj)  { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++  void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++
++  void alsl_w(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void slt  (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(slt_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sltu (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void OR  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void mul_w     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_w    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_wu   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mul_d     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_d    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_du   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void crc_w_b_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_h_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_w_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void brk(int code)      { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); }
++
++  void alsl_d(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void slli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void slli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bstrins_w  (Register rd, Register rj, int msbw, int lsbw)  { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_w  (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrins_d  (Register rd, Register rj, int msbd, int lsbd)  { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_d  (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fadd_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fadd_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fabs_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fabs_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void movgr2fr_w (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_w_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2fr_d (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_d_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2frh_w(FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); }
++  void movfr2gr_s (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_s_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfr2gr_d (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_d_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfrh2gr_s(Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); }
++  void movgr2fcsr (int fcsr, Register rj)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op,  (int)rj->encoding(), fcsr)); }
++  void movfcsr2gr (Register rd, int fcsr)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op,  fcsr, (int)rd->encoding())); }
++  void movfr2cf   (ConditionalFlagRegister cd, FloatRegister fj)  { emit_int32(insn_RR(movfr2cf_op,    (int)fj->encoding(), (int)cd->encoding())); }
++  void movcf2fr   (FloatRegister fd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2fr_op,    (int)cj->encoding(), (int)fd->encoding())); }
++  void movgr2cf   (ConditionalFlagRegister cd, Register rj)  { emit_int32(insn_RR(movgr2cf_op,    (int)rj->encoding(), (int)cd->encoding())); }
++  void movcf2gr   (Register rd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2gr_op,    (int)cj->encoding(), (int)rd->encoding())); }
++
++  void fcvt_s_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fcvt_d_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void ftintrm_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void slti  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op,   si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void sltui (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_w(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_d(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op,  simm12(si12), (int)rj->encoding(), (int)rd->encoding())); }
++  void andi  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ori   (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op,    ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void xori  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fcmp_caf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fcmp_caf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void addu16i_d(Register rj, Register rd, int si16)      { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void lu12i_w(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); }
++  void lu32i_d(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); }
++  void pcaddi(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); }
++  void pcalau12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); }
++  void pcaddu12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); }
++  void pcaddu18i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); }
++
++  void ll_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ll_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void csrrd  (Register rd, int csr)   { emit_int32(insn_I14RR(csr_op, csr, 0, (int)rd->encoding())); }
++  void csrwr  (Register rd, int csr)   { emit_int32(insn_I14RR(csr_op, csr, 1, (int)rd->encoding())); }
++
++  void ld_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_bu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_hu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_wu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void preld (int hint, Register rj, int si12)  { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); }
++  void fld_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fld_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void ldl_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldr_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++
++  void ld_b  (Register rd, Address src);
++  void ld_bu (Register rd, Address src);
++  void ld_d  (Register rd, Address src);
++  void ld_h  (Register rd, Address src);
++  void ld_hu (Register rd, Address src);
++  void ll_w  (Register rd, Address src);
++  void ll_d  (Register rd, Address src);
++  void ld_wu (Register rd, Address src);
++  void ld_w  (Register rd, Address src);
++  void st_b  (Register rd, Address dst);
++  void st_d  (Register rd, Address dst);
++  void st_w  (Register rd, Address dst);
++  void sc_w  (Register rd, Address dst);
++  void sc_d  (Register rd, Address dst);
++  void st_h  (Register rd, Address dst);
++  void fld_s (FloatRegister fd, Address src);
++  void fld_d (FloatRegister fd, Address src);
++  void fst_s (FloatRegister fd, Address dst);
++  void fst_d (FloatRegister fd, Address dst);
++
++  void amswap_w   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_d   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); }
++  void amand_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_w     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_d     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_w  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_d  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void dbar(int hint)      {
++    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
++
++    if (os::is_ActiveCoresMP())
++      andi(R0, R0, 0);
++    else
++      emit_int32(insn_I15(dbar_op, hint));
++  }
++  void ibar(int hint)      { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); }
++
++  void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beqz(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); }
++  void bnez(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); }
++  void bceqz(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); }
++  void bcnez(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); }
++
++  void jirl(Register rd, Register rj, int offs)      { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void b(int offs)      { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); }
++  void bl(int offs)     { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); }
++
++
++  void beq(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bne(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void blt(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bge(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bltu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bgeu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beq   (Register rj, Register rd, address entry) { beq   (rj, rd, offset16(entry)); }
++  void bne   (Register rj, Register rd, address entry) { bne   (rj, rd, offset16(entry)); }
++  void blt   (Register rj, Register rd, address entry) { blt   (rj, rd, offset16(entry)); }
++  void bge   (Register rj, Register rd, address entry) { bge   (rj, rd, offset16(entry)); }
++  void bltu  (Register rj, Register rd, address entry) { bltu  (rj, rd, offset16(entry)); }
++  void bgeu  (Register rj, Register rd, address entry) { bgeu  (rj, rd, offset16(entry)); }
++  void beqz  (Register rj, address entry) { beqz  (rj, offset21(entry)); }
++  void bnez  (Register rj, address entry) { bnez  (rj, offset21(entry)); }
++  void b(address entry) { b(offset26(entry)); }
++  void bl(address entry) { bl(offset26(entry)); }
++  void bceqz(ConditionalFlagRegister cj, address entry)     { bceqz(cj, offset21(entry)); }
++  void bcnez(ConditionalFlagRegister cj, address entry)     { bcnez(cj, offset21(entry)); }
++
++  void beq   (Register rj, Register rd, Label& L) { beq   (rj, rd, target(L)); }
++  void bne   (Register rj, Register rd, Label& L) { bne   (rj, rd, target(L)); }
++  void blt   (Register rj, Register rd, Label& L) { blt   (rj, rd, target(L)); }
++  void bge   (Register rj, Register rd, Label& L) { bge   (rj, rd, target(L)); }
++  void bltu  (Register rj, Register rd, Label& L) { bltu  (rj, rd, target(L)); }
++  void bgeu  (Register rj, Register rd, Label& L) { bgeu  (rj, rd, target(L)); }
++  void beqz  (Register rj, Label& L) { beqz  (rj, target(L)); }
++  void bnez  (Register rj, Label& L) { bnez  (rj, target(L)); }
++  void b(Label& L)      { b(target(L)); }
++  void bl(Label& L)     { bl(target(L)); }
++  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
++  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
++
++  typedef enum {
++    // hint[4]
++    Completion = 0,
++    Ordering   = (1 << 4),
++
++    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
++    // hint[3:2] and hint[1:0]
++    LoadLoad   = ((1 << 3) | (1 << 1)),
++    LoadStore  = ((1 << 3) | (1 << 0)),
++    StoreLoad  = ((1 << 2) | (1 << 1)),
++    StoreStore = ((1 << 2) | (1 << 0)),
++    AnyAny     = ((3 << 2) | (3 << 0)),
++  } Membar_mask_bits;
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits hint) {
++    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
++    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
++    dbar(Ordering | (~hint & 0xf));
++  }
++
++  // LSX and LASX
++#define ASSERT_LSX  assert(UseLSX, "");
++#define ASSERT_LASX assert(UseLASX, "");
++
++  void  vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vldi(FloatRegister vd, int i13) { ASSERT_LSX  emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); }
++  void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); }
++
++  void  vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX  emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk)  { ASSERT_LSX  emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk)  { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcmp_caf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vfcmp_caf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvfcmp_caf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvfcmp_caf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); }
++
++  void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void  vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void  vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++#undef ASSERT_LSX
++#undef ASSERT_LASX
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..9ca0cd45047
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
+new file mode 100644
+index 00000000000..c15344eb390
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use LoongArch, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since LoongArch CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
++  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
++  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
++
++  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
++  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
++  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since LoongArch64 CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
+new file mode 100644
+index 00000000000..663a9aec2a9
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
+@@ -0,0 +1,360 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A3 RA3
++
++#define __ ce->masm()->
++
++void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset());
++  __ lea(SCR2, safepoint_pc);
++  __ st_ptr(SCR2, TREG, in_bytes(JavaThread::saved_exception_pc_offset()));
++
++  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
++         "polling page return stub not created yet");
++  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
++
++  __ jmp(stub, relocInfo::runtime_call_type);
++}
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  Metadata *m = _method->as_constant_ptr()->as_metadata();
++  __ mov_metadata(SCR2, m);
++  ce->store_parameter(SCR2, 1);
++  ce->store_parameter(_bci, 0);
++  __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_info->deoptimize_on_exception()) {
++    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++    __ call(a, relocInfo::runtime_call_type);
++    ce->add_call_info_here(_info);
++    ce->verify_oop_map(_info);
++    debug_only(__ should_not_reach_here());
++    return;
++  }
++
++  if (_index->is_cpu_register()) {
++    __ move(SCR1, _index->as_register());
++  } else {
++    __ li(SCR1, _index->as_jint());
++  }
++  Runtime1::StubID stub_id;
++  if (_throw_index_out_of_bounds_exception) {
++    stub_id = Runtime1::throw_index_exception_id;
++  } else {
++    assert(_array != NULL, "sanity");
++    __ move(SCR2, _array->as_pointer_register());
++    stub_id = Runtime1::throw_range_check_failed_id;
++  }
++  __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
++  _info = new CodeEmitInfo(info);
++}
++
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
++  if (_offset != -1) {
++    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  }
++  __ bind(_entry);
++  __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++#ifdef ASSERT
++  __ should_not_reach_here();
++#endif
++}
++
++// Implementation of NewInstanceStub
++
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass,
++                                 CodeEmitInfo* info, Runtime1::StubID stub_id) {
++  _result = result;
++  _klass = klass;
++  _klass_reg = klass_reg;
++  _info = new CodeEmitInfo(info);
++  assert(stub_id == Runtime1::new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_init_check_id,
++         "need new_instance id");
++  _stub_id   = stub_id;
++}
++
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  __ move(A3, _klass_reg->as_register());
++  __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewTypeArrayStub
++
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                   CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _length = length;
++  _result = result;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewObjectArrayStub
++
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _result = result;
++  _length = length;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of MonitorAccessStubs
++
++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
++  : MonitorAccessStub(obj_reg, lock_reg) {
++  _info = new CodeEmitInfo(info);
++}
++
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  ce->store_parameter(_obj_reg->as_register(),  1);
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  Runtime1::StubID enter_id;
++  if (ce->compilation()->has_fpu_code()) {
++    enter_id = Runtime1::monitorenter_id;
++  } else {
++    enter_id = Runtime1::monitorenter_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_compute_lock) {
++    // lock_reg was destroyed by fast unlocking attempt => recompute it
++    ce->monitor_address(_monitor_ix, _lock_reg);
++  }
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  // note: non-blocking leaf routine => no call info needed
++  Runtime1::StubID exit_id;
++  if (ce->compilation()->has_fpu_code()) {
++    exit_id = Runtime1::monitorexit_id;
++  } else {
++    exit_id = Runtime1::monitorexit_nofpu_id;
++  }
++  __ lipc(RA, _continuation);
++  __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
++}
++
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
++
++void PatchingStub::align_patch_site(MacroAssembler* masm) {
++}
++
++void PatchingStub::emit_code(LIR_Assembler* ce) {
++  assert(false, "LoongArch64 should not use C1 runtime patching");
++}
++
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  ce->store_parameter(_trap_request, 0);
++  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  DEBUG_ONLY(__ should_not_reach_here());
++}
++
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
++  address a;
++  if (_info->deoptimize_on_exception()) {
++    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
++    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  } else {
++    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
++  }
++
++  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  __ bind(_entry);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++
++  __ bind(_entry);
++  // pass the object in a scratch register because all other registers
++  // must be preserved
++  if (_obj->is_cpu_register()) {
++    __ move(SCR1, _obj->as_register());
++  }
++  __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
++  //---------------slow case: call to native-----------------
++  __ bind(_entry);
++  // Figure out where the args should go
++  // This should really convert the IntrinsicID to the Method* and signature
++  // but I don't know how to do that.
++  //
++  VMRegPair args[5];
++  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
++  SharedRuntime::java_calling_convention(signature, args, 5);
++
++  // push parameters
++  // (src, src_pos, dest, destPos, length)
++  Register r[5];
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
++
++  // next registers will get stored on the stack
++  for (int i = 0; i < 5 ; i++ ) {
++    VMReg r_1 = args[i].first();
++    if (r_1->is_stack()) {
++      int st_off = r_1->reg2stack() * wordSize;
++      __ stptr_d (r[i], SP, st_off);
++    } else {
++      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
++    }
++  }
++
++  ce->align_call(lir_static_call);
++
++  ce->emit_static_call_stub();
++  if (ce->compilation()->bailed_out()) {
++    return; // CodeCache is full
++  }
++  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
++                         relocInfo::static_call_type);
++  address call = __ trampoline_call(resolve);
++  if (call == NULL) {
++    ce->bailout("trampoline stub overflow");
++    return;
++  }
++  ce->add_call_info_here(info());
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt);
++    __ increment(Address(SCR2));
++  }
++#endif
++
++  __ b(_continuation);
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp
+new file mode 100644
+index 00000000000..1140e44431d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++
++// native word offsets from memory address (little endian)
++enum {
++  pd_lo_word_offset_in_bytes = 0,
++  pd_hi_word_offset_in_bytes = BytesPerWord
++};
++
++// explicit rounding operations are required to implement the strictFP mode
++enum {
++  pd_strict_fp_requires_explicit_rounding = false
++};
++
++// FIXME: There are no callee-saved
++
++// registers
++enum {
++  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,      // number of registers used during code emission
++  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission
++
++  pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls
++  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls
++
++  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
++  pd_last_callee_saved_reg = 21,
++
++  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
++
++  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
++  pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator
++
++  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
++  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
++  pd_nof_xmm_regs_linearscan = 0,  // don't have vector registers
++  pd_first_cpu_reg = 0,
++  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_byte_reg = 0,
++  pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
++  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
++
++  pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg,
++  pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg,
++};
++
++// Encoding of float value in debug info.  This is true on x86 where
++// floats are extended to doubles when stored in the stack, false for
++// LoongArch64 where floats and doubles are stored in their native form.
++enum {
++  pd_float_saved_as_double = false
++};
++
++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
+new file mode 100644
+index 00000000000..047412d036a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++
++// No FPU stack on LoongArch
++class FpuStackSim;
++
++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp
+new file mode 100644
+index 00000000000..1a89c437a83
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++//--------------------------------------------------------
++//               FpuStackSim
++//--------------------------------------------------------
++
++// No FPU stack on LoongArch64
++#include "precompiled.hpp"
+diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp
+new file mode 100644
+index 00000000000..4f0cf053617
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++
++//  On LoongArch64 the frame looks as follows:
++//
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++
++ public:
++  static const int pd_c_runtime_reserved_arg_size;
++
++  enum {
++    first_available_sp_in_frame = 0,
++    frame_pad_in_bytes = 16,
++    nof_reg_args = 8
++  };
++
++ public:
++  static LIR_Opr receiver_opr;
++
++  static LIR_Opr r0_opr;
++  static LIR_Opr ra_opr;
++  static LIR_Opr tp_opr;
++  static LIR_Opr sp_opr;
++  static LIR_Opr a0_opr;
++  static LIR_Opr a1_opr;
++  static LIR_Opr a2_opr;
++  static LIR_Opr a3_opr;
++  static LIR_Opr a4_opr;
++  static LIR_Opr a5_opr;
++  static LIR_Opr a6_opr;
++  static LIR_Opr a7_opr;
++  static LIR_Opr t0_opr;
++  static LIR_Opr t1_opr;
++  static LIR_Opr t2_opr;
++  static LIR_Opr t3_opr;
++  static LIR_Opr t4_opr;
++  static LIR_Opr t5_opr;
++  static LIR_Opr t6_opr;
++  static LIR_Opr t7_opr;
++  static LIR_Opr t8_opr;
++  static LIR_Opr rx_opr;
++  static LIR_Opr fp_opr;
++  static LIR_Opr s0_opr;
++  static LIR_Opr s1_opr;
++  static LIR_Opr s2_opr;
++  static LIR_Opr s3_opr;
++  static LIR_Opr s4_opr;
++  static LIR_Opr s5_opr;
++  static LIR_Opr s6_opr;
++  static LIR_Opr s7_opr;
++  static LIR_Opr s8_opr;
++
++  static LIR_Opr ra_oop_opr;
++  static LIR_Opr a0_oop_opr;
++  static LIR_Opr a1_oop_opr;
++  static LIR_Opr a2_oop_opr;
++  static LIR_Opr a3_oop_opr;
++  static LIR_Opr a4_oop_opr;
++  static LIR_Opr a5_oop_opr;
++  static LIR_Opr a6_oop_opr;
++  static LIR_Opr a7_oop_opr;
++  static LIR_Opr t0_oop_opr;
++  static LIR_Opr t1_oop_opr;
++  static LIR_Opr t2_oop_opr;
++  static LIR_Opr t3_oop_opr;
++  static LIR_Opr t4_oop_opr;
++  static LIR_Opr t5_oop_opr;
++  static LIR_Opr t6_oop_opr;
++  static LIR_Opr t7_oop_opr;
++  static LIR_Opr t8_oop_opr;
++  static LIR_Opr fp_oop_opr;
++  static LIR_Opr s0_oop_opr;
++  static LIR_Opr s1_oop_opr;
++  static LIR_Opr s2_oop_opr;
++  static LIR_Opr s3_oop_opr;
++  static LIR_Opr s4_oop_opr;
++  static LIR_Opr s5_oop_opr;
++  static LIR_Opr s6_oop_opr;
++  static LIR_Opr s7_oop_opr;
++  static LIR_Opr s8_oop_opr;
++
++  static LIR_Opr scr1_opr;
++  static LIR_Opr scr2_opr;
++  static LIR_Opr scr1_long_opr;
++  static LIR_Opr scr2_long_opr;
++
++  static LIR_Opr a0_metadata_opr;
++  static LIR_Opr a1_metadata_opr;
++  static LIR_Opr a2_metadata_opr;
++  static LIR_Opr a3_metadata_opr;
++  static LIR_Opr a4_metadata_opr;
++  static LIR_Opr a5_metadata_opr;
++
++  static LIR_Opr long0_opr;
++  static LIR_Opr long1_opr;
++  static LIR_Opr fpu0_float_opr;
++  static LIR_Opr fpu0_double_opr;
++
++  static LIR_Opr as_long_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++  static LIR_Opr as_pointer_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++
++  // VMReg name for spilled physical FPU stack slot n
++  static VMReg fpu_regname (int n);
++
++  static bool is_caller_save_register(LIR_Opr opr) { return true; }
++  static bool is_caller_save_register(Register r) { return true; }
++
++  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
++  static int last_cpu_reg() { return pd_last_cpu_reg;  }
++  static int last_byte_reg() { return pd_last_byte_reg; }
++
++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
+new file mode 100644
+index 00000000000..8d439fda060
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
+@@ -0,0 +1,362 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIR.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
++  LIR_Opr opr = LIR_OprFact::illegalOpr;
++  VMReg r_1 = reg->first();
++  VMReg r_2 = reg->second();
++  if (r_1->is_stack()) {
++    // Convert stack slot to an SP offset
++    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
++    // so we must add it in here.
++    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
++  } else if (r_1->is_Register()) {
++    Register reg = r_1->as_Register();
++    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
++      Register reg2 = r_2->as_Register();
++      assert(reg2 == reg, "must be same register");
++      opr = as_long_opr(reg);
++    } else if (is_reference_type(type)) {
++      opr = as_oop_opr(reg);
++    } else if (type == T_METADATA) {
++      opr = as_metadata_opr(reg);
++    } else if (type == T_ADDRESS) {
++      opr = as_address_opr(reg);
++    } else {
++      opr = as_opr(reg);
++    }
++  } else if (r_1->is_FloatRegister()) {
++    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
++    int num = r_1->as_FloatRegister()->encoding();
++    if (type == T_FLOAT) {
++      opr = LIR_OprFact::single_fpu(num);
++    } else {
++      opr = LIR_OprFact::double_fpu(num);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++  return opr;
++}
++
++LIR_Opr FrameMap::r0_opr;
++LIR_Opr FrameMap::ra_opr;
++LIR_Opr FrameMap::tp_opr;
++LIR_Opr FrameMap::sp_opr;
++LIR_Opr FrameMap::a0_opr;
++LIR_Opr FrameMap::a1_opr;
++LIR_Opr FrameMap::a2_opr;
++LIR_Opr FrameMap::a3_opr;
++LIR_Opr FrameMap::a4_opr;
++LIR_Opr FrameMap::a5_opr;
++LIR_Opr FrameMap::a6_opr;
++LIR_Opr FrameMap::a7_opr;
++LIR_Opr FrameMap::t0_opr;
++LIR_Opr FrameMap::t1_opr;
++LIR_Opr FrameMap::t2_opr;
++LIR_Opr FrameMap::t3_opr;
++LIR_Opr FrameMap::t4_opr;
++LIR_Opr FrameMap::t5_opr;
++LIR_Opr FrameMap::t6_opr;
++LIR_Opr FrameMap::t7_opr;
++LIR_Opr FrameMap::t8_opr;
++LIR_Opr FrameMap::rx_opr;
++LIR_Opr FrameMap::fp_opr;
++LIR_Opr FrameMap::s0_opr;
++LIR_Opr FrameMap::s1_opr;
++LIR_Opr FrameMap::s2_opr;
++LIR_Opr FrameMap::s3_opr;
++LIR_Opr FrameMap::s4_opr;
++LIR_Opr FrameMap::s5_opr;
++LIR_Opr FrameMap::s6_opr;
++LIR_Opr FrameMap::s7_opr;
++LIR_Opr FrameMap::s8_opr;
++
++LIR_Opr FrameMap::receiver_opr;
++
++LIR_Opr FrameMap::ra_oop_opr;
++LIR_Opr FrameMap::a0_oop_opr;
++LIR_Opr FrameMap::a1_oop_opr;
++LIR_Opr FrameMap::a2_oop_opr;
++LIR_Opr FrameMap::a3_oop_opr;
++LIR_Opr FrameMap::a4_oop_opr;
++LIR_Opr FrameMap::a5_oop_opr;
++LIR_Opr FrameMap::a6_oop_opr;
++LIR_Opr FrameMap::a7_oop_opr;
++LIR_Opr FrameMap::t0_oop_opr;
++LIR_Opr FrameMap::t1_oop_opr;
++LIR_Opr FrameMap::t2_oop_opr;
++LIR_Opr FrameMap::t3_oop_opr;
++LIR_Opr FrameMap::t4_oop_opr;
++LIR_Opr FrameMap::t5_oop_opr;
++LIR_Opr FrameMap::t6_oop_opr;
++LIR_Opr FrameMap::t7_oop_opr;
++LIR_Opr FrameMap::t8_oop_opr;
++LIR_Opr FrameMap::fp_oop_opr;
++LIR_Opr FrameMap::s0_oop_opr;
++LIR_Opr FrameMap::s1_oop_opr;
++LIR_Opr FrameMap::s2_oop_opr;
++LIR_Opr FrameMap::s3_oop_opr;
++LIR_Opr FrameMap::s4_oop_opr;
++LIR_Opr FrameMap::s5_oop_opr;
++LIR_Opr FrameMap::s6_oop_opr;
++LIR_Opr FrameMap::s7_oop_opr;
++LIR_Opr FrameMap::s8_oop_opr;
++
++LIR_Opr FrameMap::scr1_opr;
++LIR_Opr FrameMap::scr2_opr;
++LIR_Opr FrameMap::scr1_long_opr;
++LIR_Opr FrameMap::scr2_long_opr;
++
++LIR_Opr FrameMap::a0_metadata_opr;
++LIR_Opr FrameMap::a1_metadata_opr;
++LIR_Opr FrameMap::a2_metadata_opr;
++LIR_Opr FrameMap::a3_metadata_opr;
++LIR_Opr FrameMap::a4_metadata_opr;
++LIR_Opr FrameMap::a5_metadata_opr;
++
++LIR_Opr FrameMap::long0_opr;
++LIR_Opr FrameMap::long1_opr;
++LIR_Opr FrameMap::fpu0_float_opr;
++LIR_Opr FrameMap::fpu0_double_opr;
++
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 };
++
++//--------------------------------------------------------
++//               FrameMap
++//--------------------------------------------------------
++
++void FrameMap::initialize() {
++  assert(!_init_done, "once");
++  int i = 0;
++
++  // caller save register
++  map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // callee save register
++  map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // special register
++  map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
++  map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread
++  map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp
++  map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp
++  map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra
++  map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
++
++  // tmp register
++  map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1
++  map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2
++
++  scr1_opr = t7_opr;
++  scr2_opr = t4_opr;
++  scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr());
++  scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr());
++
++  long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr());
++  long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr());
++
++  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
++  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
++
++  // scr1, scr2 not included
++  _caller_save_cpu_regs[0] = a0_opr;
++  _caller_save_cpu_regs[1] = a1_opr;
++  _caller_save_cpu_regs[2] = a2_opr;
++  _caller_save_cpu_regs[3] = a3_opr;
++  _caller_save_cpu_regs[4] = a4_opr;
++  _caller_save_cpu_regs[5] = a5_opr;
++  _caller_save_cpu_regs[6] = a6_opr;
++  _caller_save_cpu_regs[7] = a7_opr;
++  _caller_save_cpu_regs[8] = t0_opr;
++  _caller_save_cpu_regs[9] = t1_opr;
++  _caller_save_cpu_regs[10] = t2_opr;
++  _caller_save_cpu_regs[11] = t3_opr;
++  _caller_save_cpu_regs[12] = t5_opr;
++  _caller_save_cpu_regs[13] = t6_opr;
++  _caller_save_cpu_regs[14] = t8_opr;
++
++  for (int i = 0; i < 8; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++
++  _init_done = true;
++
++  ra_oop_opr = as_oop_opr(RA);
++  a0_oop_opr = as_oop_opr(A0);
++  a1_oop_opr = as_oop_opr(A1);
++  a2_oop_opr = as_oop_opr(A2);
++  a3_oop_opr = as_oop_opr(A3);
++  a4_oop_opr = as_oop_opr(A4);
++  a5_oop_opr = as_oop_opr(A5);
++  a6_oop_opr = as_oop_opr(A6);
++  a7_oop_opr = as_oop_opr(A7);
++  t0_oop_opr = as_oop_opr(T0);
++  t1_oop_opr = as_oop_opr(T1);
++  t2_oop_opr = as_oop_opr(T2);
++  t3_oop_opr = as_oop_opr(T3);
++  t4_oop_opr = as_oop_opr(T4);
++  t5_oop_opr = as_oop_opr(T5);
++  t6_oop_opr = as_oop_opr(T6);
++  t7_oop_opr = as_oop_opr(T7);
++  t8_oop_opr = as_oop_opr(T8);
++  fp_oop_opr = as_oop_opr(FP);
++  s0_oop_opr = as_oop_opr(S0);
++  s1_oop_opr = as_oop_opr(S1);
++  s2_oop_opr = as_oop_opr(S2);
++  s3_oop_opr = as_oop_opr(S3);
++  s4_oop_opr = as_oop_opr(S4);
++  s5_oop_opr = as_oop_opr(S5);
++  s6_oop_opr = as_oop_opr(S6);
++  s7_oop_opr = as_oop_opr(S7);
++  s8_oop_opr = as_oop_opr(S8);
++
++  a0_metadata_opr = as_metadata_opr(A0);
++  a1_metadata_opr = as_metadata_opr(A1);
++  a2_metadata_opr = as_metadata_opr(A2);
++  a3_metadata_opr = as_metadata_opr(A3);
++  a4_metadata_opr = as_metadata_opr(A4);
++  a5_metadata_opr = as_metadata_opr(A5);
++
++  sp_opr = as_pointer_opr(SP);
++  fp_opr = as_pointer_opr(FP);
++
++  VMRegPair regs;
++  BasicType sig_bt = T_OBJECT;
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
++  receiver_opr = as_oop_opr(regs.first()->as_Register());
++
++  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++}
++
++Address FrameMap::make_new_address(ByteSize sp_offset) const {
++  // for sp, based address use this:
++  // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4);
++  return Address(SP, in_bytes(sp_offset));
++}
++
++// ----------------mapping-----------------------
++// all mapping is based on fp addressing, except for simple leaf methods where we access
++// the locals sp based (and no frame is built)
++
++// Frame for simple leaf methods (quick entries)
++//
++//   +----------+
++//   | ret addr |   <- TOS
++//   +----------+
++//   | args     |
++//   | ......   |
++
++// Frame for standard methods
++//
++//   | .........|  <- TOS
++//   | locals   |
++//   +----------+
++//   |  old fp, |  <- RFP
++//   +----------+
++//   | ret addr |
++//   +----------+
++//   |  args    |
++//   | .........|
++
++// For OopMaps, map a local variable or spill index to an VMRegImpl name.
++// This is the offset from sp() in the frame of the slot for the index,
++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
++//
++//           framesize +
++//           stack0         stack0          0  <- VMReg
++//             |              | <registers> |
++//  ...........|..............|.............|
++//      0 1 2 3 x x 4 5 6 ... |                <- local indices
++//      ^           ^        sp()                 ( x x indicate link
++//      |           |                               and return addr)
++//  arguments   non-argument locals
++
++VMReg FrameMap::fpu_regname(int n) {
++  // Return the OptoReg name for the fpu stack slot "n"
++  // A spilled fpu stack slot comprises to two single-word OptoReg's.
++  return as_FloatRegister(n)->as_VMReg();
++}
++
++LIR_Opr FrameMap::stack_pointer() {
++  return FrameMap::sp_opr;
++}
++
++// JSR 292
++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
++  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
++}
++
++bool FrameMap::validate_frame() {
++  return true;
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..baadeebb243
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++
++// ArrayCopyStub needs access to bailout
++friend class ArrayCopyStub;
++
++ private:
++  int array_element_size(BasicType type) const;
++
++  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                int dest_index, bool pop_fpu_stack);
++
++  // helper functions which checks for overflow and sets bailout if it
++  // occurs.  Always returns a valid embeddable pointer but in the
++  // bailout case the pointer won't be to unique storage.
++  address float_constant(float f);
++  address double_constant(double d);
++
++  address int_constant(jlong n);
++
++  bool is_literal_address(LIR_Address* addr);
++
++  // Ensure we have a valid Address (base+offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
++  // Record the type of the receiver in ReceiverTypeData
++  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                           Register recv, Label* update_done);
++  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
++
++  void casw(Register addr, Register newval, Register cmpval, bool sign);
++  void casl(Register addr, Register newval, Register cmpval);
++
++  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
++
++  static const int max_tableswitches = 20;
++  struct tableswitch switches[max_tableswitches];
++  int tableswitch_count;
++
++  void init() { tableswitch_count = 0; }
++
++  void deoptimize_trap(CodeEmitInfo *info);
++
++  void emit_cmp_branch(LIR_OpBranch* op);
++
++  enum {
++    // call stub: CompiledStaticCall::to_interp_stub_size() +
++    //            CompiledStaticCall::to_trampoline_stub_size()
++    _call_stub_size = 13 * NativeInstruction::nop_instruction_size,
++    _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
++    _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size
++  };
++
++public:
++  void store_parameter(Register r, int offset_from_sp_in_words);
++  void store_parameter(jint c,     int offset_from_sp_in_words);
++  void store_parameter(jobject c,  int offset_from_sp_in_words);
++
++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp
+new file mode 100644
+index 00000000000..2ddf19a6e5a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp
+@@ -0,0 +1,3384 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArrayKlass.hpp"
++#include "ci/ciInstance.hpp"
++#include "code/compiledIC.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/powerOfTwo.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++NEEDS_CLEANUP // remove this definitions?
++
++#define __ _masm->
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp2 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2);
++}
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2,
++                                       Register &tmp3) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp2 = extra;
++  } else if (tmp3 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp3 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2, tmp3);
++}
++
++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
++
++LIR_Opr LIR_Assembler::receiverOpr() {
++  return FrameMap::receiver_opr;
++}
++
++LIR_Opr LIR_Assembler::osrBufferPointer() {
++  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
++}
++
++//--------------fpu register translations-----------------------
++
++address LIR_Assembler::float_constant(float f) {
++  address const_addr = __ float_constant(f);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++address LIR_Assembler::double_constant(double d) {
++  address const_addr = __ double_constant(d);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++void LIR_Assembler::breakpoint() { Unimplemented(); }
++
++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
++
++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
++
++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
++
++static Register as_reg(LIR_Opr op) {
++  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
++}
++
++static jlong as_long(LIR_Opr data) {
++  jlong result;
++  switch (data->type()) {
++  case T_INT:
++    result = (data->as_jint());
++    break;
++  case T_LONG:
++    result = (data->as_jlong());
++    break;
++  default:
++    ShouldNotReachHere();
++    result = 0; // unreachable
++  }
++  return result;
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr) {
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr opr = addr->index();
++  if (opr->is_cpu_register()) {
++    Register index;
++    if (opr->is_single_cpu())
++      index = opr->as_register();
++    else
++      index = opr->as_register_lo();
++    assert(addr->disp() == 0, "must be");
++    return Address(base, index, Address::ScaleFactor(addr->scale()));
++  } else {
++    assert(addr->scale() == 0, "must be");
++    return Address(base, addr->disp());
++  }
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
++  ShouldNotReachHere();
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
++  return as_Address(addr); // Ouch
++  // FIXME: This needs to be much more clever. See x86.
++}
++
++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
++// not encodable as a base + (immediate) offset, generate an explicit address
++// calculation to hold the address in a temporary register.
++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
++  precond(size == 4 || size == 8);
++  Address addr = frame_map()->address_for_slot(index, adjust);
++  precond(addr.index() == noreg);
++  precond(addr.base() == SP);
++  precond(addr.disp() > 0);
++  uint mask = size - 1;
++  assert((addr.disp() & mask) == 0, "scaled offsets only");
++  return addr;
++}
++
++void LIR_Assembler::osr_entry() {
++  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
++  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
++  ValueStack* entry_state = osr_entry->state();
++  int number_of_locks = entry_state->locks_size();
++
++  // we jump here if osr happens with the interpreter
++  // state set up to continue at the beginning of the
++  // loop that triggered osr - in particular, we have
++  // the following registers setup:
++  //
++  // A2: osr buffer
++  //
++
++  // build frame
++  ciMethod* m = compilation()->method();
++  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
++
++  // OSR buffer is
++  //
++  // locals[nlocals-1..0]
++  // monitors[0..number_of_locks]
++  //
++  // locals is a direct copy of the interpreter frame so in the osr buffer
++  // so first slot in the local array is the last local from the interpreter
++  // and last slot is local[0] (receiver) from the interpreter
++  //
++  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
++  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
++  // in the interpreter frame (the method lock if a sync method)
++
++  // Initialize monitors in the compiled activation.
++  //   A2: pointer to osr buffer
++  //
++  // All other registers are dead at this point and the locals will be
++  // copied into place by code emitted in the IR.
++
++  Register OSR_buf = osrBufferPointer()->as_pointer_register();
++  {
++    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
++    int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1);
++    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
++    // the OSR buffer using 2 word entries: first the lock and then
++    // the oop.
++    for (int i = 0; i < number_of_locks; i++) {
++      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
++#ifdef ASSERT
++      // verify the interpreter's monitor has a non-null object
++      {
++        Label L;
++        __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
++        __ bnez(SCR1, L);
++        __ stop("locked object is NULL");
++        __ bind(L);
++      }
++#endif
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0));
++      __ st_ptr(S0, frame_map()->address_for_monitor_lock(i));
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++      __ st_ptr(S0, frame_map()->address_for_monitor_object(i));
++    }
++  }
++}
++
++// inline cache check; done before the frame is built.
++int LIR_Assembler::check_icache() {
++  Register receiver = FrameMap::receiver_opr->as_register();
++  Register ic_klass = IC_Klass;
++  int start_offset = __ offset();
++  Label dont;
++
++  __ verify_oop(receiver);
++
++  // explicit NULL check not needed since load from [klass_offset] causes a trap
++  // check against inline cache
++  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
++         "must add explicit null check");
++
++  __ load_klass(SCR2, receiver);
++  __ beq(SCR2, ic_klass, dont);
++
++  // if icache check fails, then jump to runtime routine
++  // Note: RECEIVER must still contain the receiver!
++  __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++
++  // We align the verified entry point unless the method body
++  // (including its inline cache check) will fit in a single 64-byte
++  // icache line.
++  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
++    // force alignment after the cache check.
++    __ align(CodeEntryAlignment);
++  }
++
++  __ bind(dont);
++  return start_offset;
++}
++
++void LIR_Assembler::clinit_barrier(ciMethod* method) {
++  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
++  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
++  Label L_skip_barrier;
++
++  __ mov_metadata(SCR2, method->holder()->constant_encoding());
++  __ clinit_barrier(SCR2, SCR1, &L_skip_barrier /*L_fast_path*/);
++  __ jmp(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++  __ bind(L_skip_barrier);
++}
++
++void LIR_Assembler::jobject2reg(jobject o, Register reg) {
++  if (o == NULL) {
++    __ move(reg, R0);
++  } else {
++    int oop_index = __ oop_recorder()->find_index(o);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(reg, (long)o);
++  }
++}
++
++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
++  deoptimize_trap(info);
++}
++
++// This specifies the rsp decrement needed to build the frame
++int LIR_Assembler::initial_frame_size_in_bytes() const {
++  // if rounding, must let FrameMap know!
++  return in_bytes(frame_map()->framesize_in_bytes());
++}
++
++int LIR_Assembler::emit_exception_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(exception_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("exception handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  // the exception oop and pc are in A0, and A1
++  // no other registers need to be preserved, so invalidate them
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // check that there is really an exception
++  __ verify_not_null_oop(A0);
++
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
++  __ should_not_reach_here();
++  guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++// Emit the code to remove the frame from the stack in the exception unwind path.
++int LIR_Assembler::emit_unwind_handler() {
++#ifndef PRODUCT
++  if (CommentedAssembly) {
++    _masm->block_comment("Unwind handler");
++  }
++#endif
++
++  int offset = code_offset();
++
++  // Fetch the exception from TLS and clear out exception related thread state
++  __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset()));
++
++  __ bind(_unwind_handler_entry);
++  __ verify_not_null_oop(V0);
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(S0, V0);  // Preserve the exception
++  }
++
++  // Perform needed unlocking
++  MonitorExitStub* stub = NULL;
++  if (method()->is_synchronized()) {
++    monitor_address(0, FrameMap::a0_opr);
++    stub = new MonitorExitStub(FrameMap::a0_opr, true, 0);
++    __ unlock_object(A5, A4, A0, *stub->entry());
++    __ bind(*stub->continuation());
++  }
++
++  if (compilation()->env()->dtrace_method_probes()) {
++    __ mov_metadata(A1, method()->constant_encoding());
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1);
++  }
++
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(A0, S0);  // Restore the exception
++  }
++
++  // remove the activation and dispatch to the unwind handler
++  __ block_comment("remove_frame and dispatch to the unwind handler");
++  __ remove_frame(initial_frame_size_in_bytes());
++  __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
++
++  // Emit the slow path assembly
++  if (stub != NULL) {
++    stub->emit_code(this);
++  }
++
++  return offset;
++}
++
++int LIR_Assembler::emit_deopt_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(deopt_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("deopt handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
++  guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
++void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
++  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0,
++         "word returns are in V0,");
++
++  // Pop the stack before the safepoint code
++  __ remove_frame(initial_frame_size_in_bytes());
++
++  if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  code_stub->set_safepoint_offset(__ offset());
++  __ relocate(relocInfo::poll_return_type);
++  __ safepoint_poll(*code_stub->entry(), TREG, true /* at_return */, false /* acquire */, true /* in_nmethod */);
++
++  __ jr(RA);
++}
++
++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  guarantee(info != NULL, "Shouldn't be NULL");
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::polling_page_offset()));
++  add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map
++  __ relocate(relocInfo::poll_type);
++  __ ld_w(SCR1, SCR1, 0);
++  return __ offset();
++}
++
++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
++  __ move(to_reg, from_reg);
++}
++
++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
++
++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(src->is_constant(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++
++  switch (c->type()) {
++    case T_INT:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_ADDRESS:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_LONG:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register_lo(), (intptr_t)c->as_jlong());
++      break;
++    case T_OBJECT:
++      if (patch_code == lir_patch_none) {
++        jobject2reg(c->as_jobject(), dest->as_register());
++      } else {
++        jobject2reg_with_patching(dest->as_register(), info);
++      }
++      break;
++    case T_METADATA:
++      if (patch_code != lir_patch_none) {
++        klass2reg_with_patching(dest->as_register(), info);
++      } else {
++        __ mov_metadata(dest->as_register(), c->as_metadata());
++      }
++      break;
++    case T_FLOAT:
++      __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat())));
++      __ fld_s(dest->as_float_reg(), SCR1, 0);
++      break;
++    case T_DOUBLE:
++      __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble())));
++      __ fld_d(dest->as_double_reg(), SCR1, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
++  LIR_Const* c = src->as_constant_ptr();
++  switch (c->type()) {
++  case T_OBJECT:
++    if (!c->as_jobject())
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++      reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++    }
++    break;
++  case T_ADDRESS:
++    const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++    reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++  case T_INT:
++  case T_FLOAT:
++    if (c->as_jint_bits() == 0)
++      __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      __ li(SCR2, c->as_jint_bits());
++      __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix()));
++    }
++    break;
++  case T_LONG:
++  case T_DOUBLE:
++    if (c->as_jlong_bits() == 0)
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    else {
++      __ li(SCR2, (intptr_t)c->as_jlong_bits());
++      __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
++                              CodeEmitInfo* info, bool wide) {
++  assert(src->is_constant(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++  LIR_Address* to_addr = dest->as_address_ptr();
++
++  void (Assembler::* insn)(Register Rt, Address adr);
++
++  switch (type) {
++  case T_ADDRESS:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_LONG:
++    assert(c->as_jlong() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_INT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_w;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    assert(c->as_jobject() == 0, "should be");
++    if (UseCompressedOops && !wide) {
++      insn = &Assembler::st_w;
++    } else {
++      insn = &Assembler::st_d;
++    }
++    break;
++  case T_CHAR:
++  case T_SHORT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_h;
++    break;
++  case T_BOOLEAN:
++  case T_BYTE:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_b;
++    break;
++  default:
++    ShouldNotReachHere();
++    insn = &Assembler::st_d;  // unreachable
++  }
++
++  if (info) add_debug_info_for_null_check_here(info);
++  (_masm->*insn)(R0, as_Address(to_addr));
++}
++
++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
++  assert(src->is_register(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  // move between cpu-registers
++  if (dest->is_single_cpu()) {
++    if (src->type() == T_LONG) {
++      // Can do LONG -> OBJECT
++      move_regs(src->as_register_lo(), dest->as_register());
++      return;
++    }
++    assert(src->is_single_cpu(), "must match");
++    if (src->type() == T_OBJECT) {
++      __ verify_oop(src->as_register());
++    }
++    move_regs(src->as_register(), dest->as_register());
++  } else if (dest->is_double_cpu()) {
++    if (is_reference_type(src->type())) {
++      // Surprising to me but we can see move of a long to t_object
++      __ verify_oop(src->as_register());
++      move_regs(src->as_register(), dest->as_register_lo());
++      return;
++    }
++    assert(src->is_double_cpu(), "must match");
++    Register f_lo = src->as_register_lo();
++    Register f_hi = src->as_register_hi();
++    Register t_lo = dest->as_register_lo();
++    Register t_hi = dest->as_register_hi();
++    assert(f_hi == f_lo, "must be same");
++    assert(t_hi == t_lo, "must be same");
++    move_regs(f_lo, t_lo);
++  } else if (dest->is_single_fpu()) {
++    __ fmov_s(dest->as_float_reg(), src->as_float_reg());
++  } else if (dest->is_double_fpu()) {
++    __ fmov_d(dest->as_double_reg(), src->as_double_reg());
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
++  precond(src->is_register() && dest->is_stack());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (src->is_single_cpu()) {
++    int index = dest->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(src->as_register());
++    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ st_w(src->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (src->is_double_cpu()) {
++    int index = dest->double_stack_ix();
++    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ st_ptr(src->as_register_lo(), dest_addr_LO);
++  } else if (src->is_single_fpu()) {
++    int index = dest->single_stack_ix();
++    __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (src->is_double_fpu()) {
++    int index = dest->double_stack_ix();
++    __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++  LIR_Address* to_addr = dest->as_address_ptr();
++  PatchingStub* patch = NULL;
++  Register compressed_src = SCR2;
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (is_reference_type(type)) {
++    __ verify_oop(src->as_register());
++
++    if (UseCompressedOops && !wide) {
++      __ encode_heap_oop(compressed_src, src->as_register());
++    } else {
++      compressed_src = src->as_register();
++    }
++  }
++
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fst_s(src->as_float_reg(), as_Address(to_addr));
++      break;
++    case T_DOUBLE:
++      __ fst_d(src->as_double_reg(), as_Address(to_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ st_w(compressed_src, as_Address(to_addr));
++      } else {
++         __ st_ptr(compressed_src, as_Address(to_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_ADDRESS:
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_INT:
++      __ st_w(src->as_register(), as_Address(to_addr));
++      break;
++    case T_LONG:
++      __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr));
++      break;
++    case T_BYTE: // fall through
++    case T_BOOLEAN:
++      __ st_b(src->as_register(), as_Address(to_addr));
++      break;
++    case T_CHAR: // fall through
++    case T_SHORT:
++      __ st_h(src->as_register(), as_Address(to_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (info != NULL) {
++    add_debug_info_for_null_check(null_check_here, info);
++  }
++}
++
++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  precond(src->is_stack() && dest->is_register());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (dest->is_single_cpu()) {
++    int index = src->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(dest->as_register());
++    } else if (type == T_METADATA || type == T_ADDRESS) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (dest->is_double_cpu()) {
++    int index = src->double_stack_ix();
++    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ ld_ptr(dest->as_register_lo(), src_addr_LO);
++  } else if (dest->is_single_fpu()) {
++    int index = src->single_stack_ix();
++    __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (dest->is_double_fpu()) {
++    int index = src->double_stack_ix();
++    __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  LIR_Opr temp;
++
++  if (type == T_LONG || type == T_DOUBLE)
++    temp = FrameMap::scr1_long_opr;
++  else
++    temp = FrameMap::scr1_opr;
++
++  stack2reg(src, temp, src->type());
++  reg2stack(temp, dest, dest->type(), false);
++}
++
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool wide, bool /* unaligned */) {
++  LIR_Address* addr = src->as_address_ptr();
++  LIR_Address* from_addr = src->as_address_ptr();
++
++  if (addr->base()->type() == T_OBJECT) {
++    __ verify_oop(addr->base()->as_pointer_register());
++  }
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fld_s(dest->as_float_reg(), as_Address(from_addr));
++      break;
++    case T_DOUBLE:
++      __ fld_d(dest->as_double_reg(), as_Address(from_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++         __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_ADDRESS:
++      __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_INT:
++      __ ld_w(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_LONG:
++      __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr));
++      break;
++    case T_BYTE:
++      __ ld_b(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_BOOLEAN:
++      __ ld_bu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_CHAR:
++      __ ld_hu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_SHORT:
++      __ ld_h(dest->as_register(), as_Address(from_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  if (is_reference_type(type)) {
++    if (UseCompressedOops && !wide) {
++      __ decode_heap_oop(dest->as_register());
++    }
++
++    if (!UseZGC) {
++      // Load barrier has not yet been applied, so ZGC can't verify the oop here
++      __ verify_oop(dest->as_register());
++    }
++  }
++}
++
++int LIR_Assembler::array_element_size(BasicType type) const {
++  int elem_size = type2aelembytes(type);
++  return exact_log2(elem_size);
++}
++
++void LIR_Assembler::emit_op3(LIR_Op3* op) {
++  switch (op->code()) {
++  case lir_idiv:
++  case lir_irem:
++    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
++                    op->result_opr(), op->info());
++    break;
++  case lir_fmad:
++    __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(),
++               op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg());
++    break;
++  case lir_fmaf:
++    __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(),
++               op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg());
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++}
++
++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++#endif
++
++  if (op->cond() == lir_cond_always) {
++    if (op->info() != NULL)
++      add_debug_info_for_branch(op->info());
++
++    __ b_far(*(op->label()));
++  } else {
++    emit_cmp_branch(op);
++  }
++}
++
++void LIR_Assembler::emit_cmp_branch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
++#endif
++
++  if (op->info() != NULL) {
++    assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
++           "shouldn't be codeemitinfo for non-address operands");
++    add_debug_info_for_null_check_here(op->info()); // exception possible
++  }
++
++  Label& L = *(op->label());
++  Assembler::Condition acond;
++  LIR_Opr opr1 = op->in_opr1();
++  LIR_Opr opr2 = op->in_opr2();
++  assert(op->condition() != lir_cond_always, "must be");
++
++  if (op->code() == lir_cond_float_branch) {
++    bool is_unordered = (op->ublock() == op->block());
++    if (opr1->is_single_fpu()) {
++      FloatRegister reg1 = opr1->as_float_reg();
++      assert(opr2->is_single_fpu(), "expect single float register");
++      FloatRegister reg2 = opr2->as_float_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_s(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_s(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else if (opr1->is_double_fpu()) {
++      FloatRegister reg1 = opr1->as_double_reg();
++      assert(opr2->is_double_fpu(), "expect double float register");
++      FloatRegister reg2 = opr2->as_double_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_d(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_d(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ bcnez(FCC0, L);
++  } else {
++    if (opr1->is_constant() && opr2->is_single_cpu()) {
++      // tableswitch
++      Unimplemented();
++    } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
++      Register reg1 = as_reg(opr1);
++      Register reg2 = noreg;
++      jlong imm2 = 0;
++      if (opr2->is_single_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register();
++      } else if (opr2->is_double_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register_lo();
++      } else if (opr2->is_constant()) {
++        switch(opr2->type()) {
++        case T_INT:
++        case T_ADDRESS:
++          imm2 = opr2->as_constant_ptr()->as_jint();
++          break;
++        case T_LONG:
++          imm2 = opr2->as_constant_ptr()->as_jlong();
++          break;
++        case T_METADATA:
++          imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata();
++          break;
++        case T_OBJECT:
++        case T_ARRAY:
++          if (opr2->as_constant_ptr()->as_jobject() != NULL) {
++            reg2 = SCR1;
++            jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2);
++          } else {
++            reg2 = R0;
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++          break;
++        }
++      } else {
++        ShouldNotReachHere();
++      }
++      if (reg2 == noreg) {
++        if (imm2 == 0) {
++          reg2 = R0;
++        } else {
++          reg2 = SCR1;
++          __ li(reg2, imm2);
++        }
++      }
++      switch (op->condition()) {
++        case lir_cond_equal:
++          __ beq_far(reg1, reg2, L); break;
++        case lir_cond_notEqual:
++          __ bne_far(reg1, reg2, L); break;
++        case lir_cond_less:
++          __ blt_far(reg1, reg2, L, true); break;
++        case lir_cond_lessEqual:
++          __ bge_far(reg2, reg1, L, true); break;
++        case lir_cond_greaterEqual:
++          __ bge_far(reg1, reg2, L, true); break;
++        case lir_cond_greater:
++          __ blt_far(reg2, reg1, L, true); break;
++        case lir_cond_belowEqual:
++          __ bge_far(reg2, reg1, L, false); break;
++        case lir_cond_aboveEqual:
++          __ bge_far(reg1, reg2, L, false); break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++  }
++}
++
++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
++  LIR_Opr src  = op->in_opr();
++  LIR_Opr dest = op->result_opr();
++  LIR_Opr tmp  = op->tmp();
++
++  switch (op->bytecode()) {
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(dest->as_float_reg(), src->as_register());
++      __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(dest->as_double_reg(), src->as_register());
++      __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo());
++      __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo());
++      __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg());
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg());
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0);
++      break;
++    case Bytecodes::_i2l:
++      _masm->block_comment("FIXME: This could be a no-op");
++      __ slli_w(dest->as_register_lo(), src->as_register(), 0);
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(dest->as_register(), src->as_register_lo(), 0);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg());
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_double_reg());
++      break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
++  if (op->init_check()) {
++    __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
++    __ li(SCR2, InstanceKlass::fully_initialized);
++    add_debug_info_for_null_check_here(op->stub()->info());
++    __ bne_far(SCR1, SCR2, *op->stub()->entry());
++  }
++  __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(),
++                     op->tmp2()->as_register(), op->header_size(),
++                     op->object_size(), op->klass()->as_register(),
++                     *op->stub()->entry());
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
++  Register len =  op->len()->as_register();
++  if (UseSlowPath ||
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
++    __ b(*op->stub()->entry());
++  } else {
++    Register tmp1 = op->tmp1()->as_register();
++    Register tmp2 = op->tmp2()->as_register();
++    Register tmp3 = op->tmp3()->as_register();
++    if (len == tmp1) {
++      tmp1 = tmp3;
++    } else if (len == tmp2) {
++      tmp2 = tmp3;
++    } else if (len == tmp3) {
++      // everything is ok
++    } else {
++      __ move(tmp3, len);
++    }
++    __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2,
++                      arrayOopDesc::header_size(op->type()),
++                      array_element_size(op->type()),
++                      op->klass()->as_register(),
++                      *op->stub()->entry());
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                                        Register recv, Label* update_done) {
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    // See if the receiver is receiver[n].
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    __ ld_ptr(SCR1, Address(SCR2));
++    __ bne(recv, SCR1, next_test);
++    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
++    __ ld_ptr(SCR2, data_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, data_addr);
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++
++  // Didn't find receiver; find next empty slot and fill it in
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    Address recv_addr(SCR2);
++    __ ld_ptr(SCR1, recv_addr);
++    __ bnez(SCR1, next_test);
++    __ st_ptr(recv, recv_addr);
++    __ li(SCR1, DataLayout::counter_increment);
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
++    __ st_ptr(SCR1, Address(SCR2));
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++}
++
++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success,
++                                          Label* failure, Label* obj_is_null) {
++  // we always need a stub for the failure case.
++  CodeStub* stub = op->stub();
++  Register obj = op->object()->as_register();
++  Register k_RInfo = op->tmp1()->as_register();
++  Register klass_RInfo = op->tmp2()->as_register();
++  Register dst = op->result_opr()->as_register();
++  ciKlass* k = op->klass();
++  Register Rtmp1 = noreg;
++
++  // check if it needs to be profiled
++  ciMethodData* md;
++  ciProfileData* data;
++
++  const bool should_profile = op->should_profile();
++
++  if (should_profile) {
++    ciMethod* method = op->profiled_method();
++    assert(method != NULL, "Should have method");
++    int bci = op->profiled_bci();
++    md = method->method_data_or_null();
++    assert(md != NULL, "Sanity");
++    data = md->bci_to_data(bci);
++    assert(data != NULL, "need data for type check");
++    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++  }
++
++  Label profile_cast_success, profile_cast_failure;
++  Label *success_target = should_profile ? &profile_cast_success : success;
++  Label *failure_target = should_profile ? &profile_cast_failure : failure;
++
++  if (obj == k_RInfo) {
++    k_RInfo = dst;
++  } else if (obj == klass_RInfo) {
++    klass_RInfo = dst;
++  }
++  if (k->is_loaded() && !UseCompressedClassPointers) {
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
++  } else {
++    Rtmp1 = op->tmp3()->as_register();
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
++  }
++
++  assert_different_registers(obj, k_RInfo, klass_RInfo);
++
++  if (should_profile) {
++    Label not_null;
++    __ bnez(obj, not_null);
++    // Object is null; update MDO and exit
++    Register mdo = klass_RInfo;
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++    __ ld_bu(SCR2, data_addr);
++    __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++    __ st_b(SCR2, data_addr);
++    __ b(*obj_is_null);
++    __ bind(not_null);
++  } else {
++    __ beqz(obj, *obj_is_null);
++  }
++
++  if (!k->is_loaded()) {
++    klass2reg_with_patching(k_RInfo, op->info_for_patch());
++  } else {
++    __ mov_metadata(k_RInfo, k->constant_encoding());
++  }
++  __ verify_oop(obj);
++
++  if (op->fast_check()) {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(SCR2, obj);
++    __ bne_far(SCR2, k_RInfo, *failure_target);
++    // successful cast, fall through to profile or jump
++  } else {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(klass_RInfo, obj);
++    if (k->is_loaded()) {
++      // See if we get an immediate positive hit
++      __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset())));
++      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
++        __ bne_far(k_RInfo, SCR1, *failure_target);
++        // successful cast, fall through to profile or jump
++      } else {
++        // See if we get an immediate positive hit
++        __ beq_far(k_RInfo, SCR1, *success_target);
++        // check for self
++        __ beq_far(klass_RInfo, k_RInfo, *success_target);
++
++        __ addi_d(SP, SP, -2 * wordSize);
++        __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++        __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++        __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize));
++        __ addi_d(SP, SP, 2 * wordSize);
++        // result is a boolean
++        __ beqz(klass_RInfo, *failure_target);
++        // successful cast, fall through to profile or jump
++      }
++    } else {
++      // perform the fast part of the checking logic
++      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++      // result is a boolean
++      __ beqz(k_RInfo, *failure_target);
++      // successful cast, fall through to profile or jump
++    }
++  }
++  if (should_profile) {
++    Register mdo = klass_RInfo, recv = k_RInfo;
++    __ bind(profile_cast_success);
++    __ mov_metadata(mdo, md->constant_encoding());
++    __ load_klass(recv, obj);
++    Label update_done;
++    type_profile_helper(mdo, md, data, recv, success);
++    __ b(*success);
++
++    __ bind(profile_cast_failure);
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, -DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++    __ b(*failure);
++  }
++  __ b(*success);
++}
++
++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
++  const bool should_profile = op->should_profile();
++
++  LIR_Code code = op->code();
++  if (code == lir_store_check) {
++    Register value = op->object()->as_register();
++    Register array = op->array()->as_register();
++    Register k_RInfo = op->tmp1()->as_register();
++    Register klass_RInfo = op->tmp2()->as_register();
++    Register Rtmp1 = op->tmp3()->as_register();
++    CodeStub* stub = op->stub();
++
++    // check if it needs to be profiled
++    ciMethodData* md;
++    ciProfileData* data;
++
++    if (should_profile) {
++      ciMethod* method = op->profiled_method();
++      assert(method != NULL, "Should have method");
++      int bci = op->profiled_bci();
++      md = method->method_data_or_null();
++      assert(md != NULL, "Sanity");
++      data = md->bci_to_data(bci);
++      assert(data != NULL, "need data for type check");
++      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++    }
++    Label profile_cast_success, profile_cast_failure, done;
++    Label *success_target = should_profile ? &profile_cast_success : &done;
++    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
++
++    if (should_profile) {
++      Label not_null;
++      __ bnez(value, not_null);
++      // Object is null; update MDO and exit
++      Register mdo = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++      __ ld_bu(SCR2, data_addr);
++      __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++      __ st_b(SCR2, data_addr);
++      __ b(done);
++      __ bind(not_null);
++    } else {
++      __ beqz(value, done);
++    }
++
++    add_debug_info_for_null_check_here(op->info_for_exception());
++    __ load_klass(k_RInfo, array);
++    __ load_klass(klass_RInfo, value);
++
++    // get instance klass (it's already uncompressed)
++    __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
++    // perform the fast part of the checking logic
++    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++    __ addi_d(SP, SP, -2 * wordSize);
++    __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++    __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ addi_d(SP, SP, 2 * wordSize);
++    // result is a boolean
++    __ beqz(k_RInfo, *failure_target);
++    // fall through to the success case
++
++    if (should_profile) {
++      Register mdo = klass_RInfo, recv = k_RInfo;
++      __ bind(profile_cast_success);
++      __ mov_metadata(mdo, md->constant_encoding());
++      __ load_klass(recv, value);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &done);
++      __ b(done);
++
++      __ bind(profile_cast_failure);
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++      __ lea(SCR2, counter_addr);
++      __ ld_ptr(SCR1, Address(SCR2));
++      __ addi_d(SCR1, SCR1, -DataLayout::counter_increment);
++      __ st_ptr(SCR1, Address(SCR2));
++      __ b(*stub->entry());
++    }
++
++    __ bind(done);
++  } else if (code == lir_checkcast) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success;
++    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
++    __ bind(success);
++    if (dst != obj) {
++      __ move(dst, obj);
++    }
++  } else if (code == lir_instanceof) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success, failure, done;
++    emit_typecheck_helper(op, &success, &failure, &failure);
++    __ bind(failure);
++    __ move(dst, R0);
++    __ b(done);
++    __ bind(success);
++    __ li(dst, 1);
++    __ bind(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) {
++  __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign,
++               /* retold */ false, /* barrier */ true, /* weak */ false, /* exchage */ false);
++}
++
++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1,
++             /* retold */ false, /* barrier */ true, /* weak */ false, /* exchage */ false);
++}
++
++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
++  assert(VM_Version::supports_cx8(), "wrong machine");
++  Register addr;
++  if (op->addr()->is_register()) {
++    addr = as_reg(op->addr());
++  } else {
++    assert(op->addr()->is_address(), "what else?");
++    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
++    assert(addr_ptr->disp() == 0, "need 0 disp");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    addr = as_reg(addr_ptr->base());
++  }
++  Register newval = as_reg(op->new_value());
++  Register cmpval = as_reg(op->cmp_value());
++
++  if (op->code() == lir_cas_obj) {
++    if (UseCompressedOops) {
++      Register t1 = op->tmp1()->as_register();
++      assert(op->tmp1()->is_valid(), "must be");
++      __ encode_heap_oop(t1, cmpval);
++      cmpval = t1;
++      __ encode_heap_oop(SCR2, newval);
++      newval = SCR2;
++      casw(addr, newval, cmpval, false);
++    } else {
++      casl(addr, newval, cmpval);
++    }
++  } else if (op->code() == lir_cas_int) {
++    casw(addr, newval, cmpval, true);
++  } else {
++    casl(addr, newval, cmpval);
++  }
++}
++
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type,
++                          LIR_Opr left, LIR_Opr right) {
++  assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result");
++  assert(left->is_single_cpu() || left->is_double_cpu(), "must be");
++  Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register();
++  Register regl = as_reg(left);
++  Register regr = noreg;
++  Register reg1 = noreg;
++  Register reg2 = noreg;
++  jlong immr = 0;
++
++  // comparison operands
++  if (right->is_single_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register();
++  } else if (right->is_double_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register_lo();
++  } else if (right->is_constant()) {
++    switch(right->type()) {
++    case T_INT:
++    case T_ADDRESS:
++      immr = right->as_constant_ptr()->as_jint();
++      break;
++    case T_LONG:
++      immr = right->as_constant_ptr()->as_jlong();
++      break;
++    case T_METADATA:
++      immr = (intptr_t)right->as_constant_ptr()->as_metadata();
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (right->as_constant_ptr()->as_jobject() != NULL) {
++        regr = SCR1;
++        jobject2reg(right->as_constant_ptr()->as_jobject(), regr);
++      } else {
++        immr = 0;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (regr == noreg) {
++    switch (condition) {
++    case lir_cond_equal:
++    case lir_cond_notEqual:
++      if (!Assembler::is_simm(-immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++      break;
++    default:
++      if (!Assembler::is_simm(immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++    }
++  }
++
++  // special cases
++  if (src1->is_constant() && src2->is_constant()) {
++    jlong val1 = 0, val2 = 0;
++    if (src1->type() == T_INT && src2->type() == T_INT) {
++      val1 = src1->as_jint();
++      val2 = src2->as_jint();
++    } else if (src1->type() == T_LONG && src2->type() == T_LONG) {
++      val1 = src1->as_jlong();
++      val2 = src2->as_jlong();
++    }
++    if (val1 == 0 && val2 == 1) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    }
++  }
++
++  // cmp
++  if (regr == noreg) {
++    switch (condition) {
++      case lir_cond_equal:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_notEqual:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_less:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_lessEqual:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greater:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_belowEqual:
++        __ li(SCR1, immr);
++        __ sltu(SCR2, SCR1, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltui(SCR2, regl, immr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    switch (condition) {
++      case lir_cond_equal:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_notEqual:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_less:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_lessEqual:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greater:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_belowEqual:
++        __ sltu(SCR2, regr, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltu(SCR2, regl, regr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // value operands
++  if (src1->is_stack()) {
++    stack2reg(src1, result, result->type());
++    reg1 = regd;
++  } else if (src1->is_constant()) {
++    const2reg(src1, result, lir_patch_none, NULL);
++    reg1 = regd;
++  } else {
++    reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register();
++  }
++
++  if (src2->is_stack()) {
++    stack2reg(src2, FrameMap::scr1_opr, result->type());
++    reg2 = SCR1;
++  } else if (src2->is_constant()) {
++    LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr;
++    const2reg(src2, tmp, lir_patch_none, NULL);
++    reg2 = SCR1;
++  } else {
++    reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register();
++  }
++
++  // cmove
++  switch (condition) {
++    case lir_cond_equal:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_notEqual:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_less:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_lessEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greater:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greaterEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_belowEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_aboveEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  __ OR(regd, regd, SCR2);
++}
++
++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
++                             CodeEmitInfo* info, bool pop_fpu_stack) {
++  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
++
++  if (left->is_single_cpu()) {
++    Register lreg = left->as_register();
++    Register dreg = as_reg(dest);
++
++    if (right->is_single_cpu()) {
++      // cpu register - cpu register
++      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
++      Register rreg = right->as_register();
++      switch (code) {
++        case lir_add: __ add_w (dest->as_register(), lreg, rreg); break;
++        case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break;
++        case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_double_cpu()) {
++      Register rreg = right->as_register_lo();
++      // single_cpu + double_cpu: can happen with obj+long
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      switch (code) {
++        case lir_add: __ add_d(dreg, lreg, rreg); break;
++        case lir_sub: __ sub_d(dreg, lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_constant()) {
++      // cpu register - constant
++      jlong c;
++
++      // FIXME: This is fugly: we really need to factor all this logic.
++      switch(right->type()) {
++        case T_LONG:
++          c = right->as_constant_ptr()->as_jlong();
++          break;
++        case T_INT:
++        case T_ADDRESS:
++          c = right->as_constant_ptr()->as_jint();
++          break;
++        default:
++          ShouldNotReachHere();
++          c = 0; // unreachable
++          break;
++      }
++
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg) {
++        COMMENT("effective nop elided");
++        return;
++      }
++
++      switch(left->type()) {
++        case T_INT:
++          switch (code) {
++            case lir_add: __ addi_w(dreg, lreg, c); break;
++            case lir_sub: __ addi_w(dreg, lreg, -c); break;
++            default:      ShouldNotReachHere();
++          }
++          break;
++        case T_OBJECT:
++        case T_ADDRESS:
++          switch (code) {
++          case lir_add: __ addi_d(dreg, lreg, c); break;
++          case lir_sub: __ addi_d(dreg, lreg, -c); break;
++          default:      ShouldNotReachHere();
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_double_cpu()) {
++    Register lreg_lo = left->as_register_lo();
++
++    if (right->is_double_cpu()) {
++      // cpu register - cpu register
++      Register rreg_lo = right->as_register_lo();
++      switch (code) {
++        case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        default:      ShouldNotReachHere();
++      }
++
++    } else if (right->is_constant()) {
++      jlong c = right->as_constant_ptr()->as_jlong();
++      Register dreg = as_reg(dest);
++      switch (code) {
++        case lir_add:
++        case lir_sub:
++          if (c == 0 && dreg == lreg_lo) {
++            COMMENT("effective nop elided");
++            return;
++          }
++          code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c);
++          break;
++        case lir_div:
++          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move lreg_lo to dreg if divisor is 1
++            __ move(dreg, lreg_lo);
++          } else {
++            unsigned int shift = log2i_exact(c);
++            // use scr1 as intermediate result register
++            __ srai_d(SCR1, lreg_lo, 63);
++            __ srli_d(SCR1, SCR1, 64 - shift);
++            __ add_d(SCR1, lreg_lo, SCR1);
++            __ srai_d(dreg, SCR1, shift);
++          }
++          break;
++        case lir_rem:
++          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move 0 to dreg if divisor is 1
++            __ move(dreg, R0);
++          } else {
++            // use scr1/2 as intermediate result register
++            __ sub_d(SCR1, R0, lreg_lo);
++            __ slt(SCR2, SCR1, R0);
++            __ andi(dreg, lreg_lo, c - 1);
++            __ andi(SCR1, SCR1, c - 1);
++            __ sub_d(SCR1, R0, SCR1);
++            __ maskeqz(dreg, dreg, SCR2);
++            __ masknez(SCR1, SCR1, SCR2);
++            __ OR(dreg, dreg, SCR1);
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_fpu()) {
++    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
++    switch (code) {
++      case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      default:      ShouldNotReachHere();
++    }
++  } else if (left->is_double_fpu()) {
++    if (right->is_double_fpu()) {
++      // fpu register - fpu register
++      switch (code) {
++        case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        default:      ShouldNotReachHere();
++      }
++    } else {
++      if (right->is_constant()) {
++        ShouldNotReachHere();
++      }
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_stack() || left->is_address()) {
++    assert(left == dest, "left and dest must be equal");
++    ShouldNotReachHere();
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                             int dest_index, bool pop_fpu_stack) {
++  Unimplemented();
++}
++
++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
++  switch(code) {
++    case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
++    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
++    default      : ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
++  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
++  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++   if (dst->is_single_cpu()) {
++     Register Rdst = dst->as_register();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jint(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jint());
++           } else {
++             __ li(AT, right->as_jint());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   } else {
++     Register Rdst = dst->as_register_lo();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jlong(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jlong());
++           } else {
++             // We can guarantee that transform from HIR LogicOp is in range of
++             // uimm(12), but the common code directly generates LIR LogicAnd,
++             // and the right-operand is mask with all ones in the high bits.
++             __ li(AT, right->as_jlong());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   }
++}
++
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
++                                    LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
++  // opcode check
++  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
++  bool is_irem = (code == lir_irem);
++
++  // operand check
++  assert(left->is_single_cpu(), "left must be register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
++  assert(result->is_single_cpu(), "result must be register");
++  Register lreg = left->as_register();
++  Register dreg = result->as_register();
++
++  // power-of-2 constant check and codegen
++  if (right->is_constant()) {
++    int c = right->as_constant_ptr()->as_jint();
++    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++    if (is_irem) {
++      if (c == 1) {
++        // move 0 to dreg if divisor is 1
++        __ move(dreg, R0);
++      } else {
++        // use scr1/2 as intermediate result register
++        __ sub_w(SCR1, R0, lreg);
++        __ slt(SCR2, SCR1, R0);
++        __ andi(dreg, lreg, c - 1);
++        __ andi(SCR1, SCR1, c - 1);
++        __ sub_w(SCR1, R0, SCR1);
++        __ maskeqz(dreg, dreg, SCR2);
++        __ masknez(SCR1, SCR1, SCR2);
++        __ OR(dreg, dreg, SCR1);
++      }
++    } else {
++      if (c == 1) {
++        // move lreg to dreg if divisor is 1
++        __ move(dreg, lreg);
++      } else {
++        unsigned int shift = exact_log2(c);
++        // use scr1 as intermediate result register
++        __ srai_w(SCR1, lreg, 31);
++        __ srli_w(SCR1, SCR1, 32 - shift);
++        __ add_w(SCR1, lreg, SCR1);
++        __ srai_w(dreg, SCR1, shift);
++      }
++    }
++  } else {
++    Register rreg = right->as_register();
++    if (is_irem)
++      __ mod_w(dreg, lreg, rreg);
++    else
++      __ div_w(dreg, lreg, rreg);
++  }
++}
++
++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
++  Unimplemented();
++}
++
++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
++  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
++    bool is_unordered_less = (code == lir_ucmp_fd2i);
++    if (left->is_single_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      } else {
++        __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      }
++    } else if (left->is_double_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      } else {
++        __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ movcf2gr(dst->as_register(), FCC0);
++    __ movcf2gr(SCR1, FCC1);
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else if (code == lir_cmp_l2i) {
++    __ slt(SCR1, left->as_register_lo(), right->as_register_lo());
++    __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo());
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::align_call(LIR_Code code) {}
++
++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
++  address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
++  address call = __ ic_call(op->addr());
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::emit_static_call_stub() {
++  address call_pc = __ pc();
++  address stub = __ start_a_stub(call_stub_size());
++  if (stub == NULL) {
++    bailout("static call stub overflow");
++    return;
++  }
++
++  int start = __ offset();
++
++  __ relocate(static_stub_Relocation::spec(call_pc));
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++  __ patchable_jump(__ pc());
++
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(),
++         "stub too big");
++  __ end_a_stub();
++}
++
++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  assert(exceptionPC->as_register() == A1, "must match");
++
++  // exception object is not added to oop map by LinearScan
++  // (LinearScan assumes that no oops are in fixed registers)
++  info->add_register_oop(exceptionOop);
++  Runtime1::StubID unwind_id;
++
++  // get current pc information
++  // pc is only needed if the method has an exception handler, the unwind code does not need it.
++  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
++    // As no instructions have been generated yet for this LIR node it's
++    // possible that an oop map already exists for the current offset.
++    // In that case insert an dummy NOP here to ensure all oop map PCs
++    // are unique. See JDK-8237483.
++    __ nop();
++  }
++  Label L;
++  int pc_for_athrow_offset = __ offset();
++  __ bind(L);
++  __ lipc(exceptionPC->as_register(), L);
++  add_call_info(pc_for_athrow_offset, info); // for exception handler
++
++  __ verify_not_null_oop(A0);
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  if (compilation()->has_fpu_code()) {
++    unwind_id = Runtime1::handle_exception_id;
++  } else {
++    unwind_id = Runtime1::handle_exception_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type);
++
++  // FIXME: enough room for two byte trap   ????
++  __ nop();
++}
++
++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  __ b(_unwind_handler_entry);
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ sll_w(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_w(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ sll_d(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_d(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ slli_w(dreg, lreg, count); break;
++        case lir_shr:  __ srai_w(dreg, lreg, count); break;
++        case lir_ushr: __ srli_w(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ slli_d(dreg, lreg, count); break;
++        case lir_shr:  __ srai_d(dreg, lreg, count); break;
++        case lir_ushr: __ srli_d(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ st_ptr(r, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jint c,     int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ li(SCR2, c);
++  __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jobject o,  int offset_from_sp_in_words) {
++  ShouldNotReachHere();
++}
++
++// This code replaces a call to arraycopy; no exception may
++// be thrown in this code, they must be thrown in the System.arraycopy
++// activation frame; we could save some checks if this would not be the case
++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++
++  ciArrayKlass* default_type = op->expected_type();
++  Register src = op->src()->as_register();
++  Register dst = op->dst()->as_register();
++  Register src_pos = op->src_pos()->as_register();
++  Register dst_pos = op->dst_pos()->as_register();
++  Register length  = op->length()->as_register();
++  Register tmp = op->tmp()->as_register();
++
++  CodeStub* stub = op->stub();
++  int flags = op->flags();
++  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
++  if (is_reference_type(basic_type))
++    basic_type = T_OBJECT;
++
++  // if we don't know anything, just go through the generic arraycopy
++  if (default_type == NULL) {
++    Label done;
++    assert(src == T0 && src_pos == A0, "mismatch in calling convention");
++
++    // Save the arguments in case the generic arraycopy fails and we
++    // have to fall back to the JNI stub
++    __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    address copyfunc_addr = StubRoutines::generic_arraycopy();
++    assert(copyfunc_addr != NULL, "generic arraycopy stub required");
++
++    // The arguments are in java calling convention so we shift them
++    // to C convention
++    assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
++    __ move(A4, j_rarg4);
++    assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2);
++    __ move(A3, j_rarg3);
++    assert_different_registers(A2, j_rarg0, j_rarg1);
++    __ move(A2, j_rarg2);
++    assert_different_registers(A1, j_rarg0);
++    __ move(A1, j_rarg1);
++    __ move(A0, j_rarg0);
++#ifndef PRODUCT
++    if (PrintC1Statistics) {
++      __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt);
++      __ increment(SCR2, 1);
++    }
++#endif
++    __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++    __ beqz(A0, *stub->continuation());
++    __ move(tmp, A0);
++
++    // Reload values from the stack so they are where the stub
++    // expects them.
++    __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    // tmp is -1^K where K == partial copied count
++    __ nor(SCR1, tmp, R0);
++    // adjust length down and src/end pos up by partial copied count
++    __ sub_w(length, length, SCR1);
++    __ add_w(src_pos, src_pos, SCR1);
++    __ add_w(dst_pos, dst_pos, SCR1);
++    __ b(*stub->entry());
++
++    __ bind(*stub->continuation());
++    return;
++  }
++
++  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
++         "must be true at this point");
++
++  int elem_size = type2aelembytes(basic_type);
++  Address::ScaleFactor scale = Address::times(elem_size);
++
++  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
++  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
++  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
++  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
++
++  // test for NULL
++  if (flags & LIR_OpArrayCopy::src_null_check) {
++    __ beqz(src, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_null_check) {
++    __ beqz(dst, *stub->entry());
++  }
++
++  // If the compiler was not able to prove that exact type of the source or the destination
++  // of the arraycopy is an array type, check at runtime if the source or the destination is
++  // an instance type.
++  if (flags & LIR_OpArrayCopy::type_check) {
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
++      __ load_klass(tmp, dst);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
++      __ load_klass(tmp, src);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++  }
++
++  // check if negative
++  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
++    __ blt_far(src_pos, R0, *stub->entry(), true);
++  }
++  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
++    __ blt_far(dst_pos, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::length_positive_check) {
++    __ blt_far(length, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::src_range_check) {
++    __ add_w(tmp, src_pos, length);
++    __ ld_wu(SCR1, src_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++  if (flags & LIR_OpArrayCopy::dst_range_check) {
++    __ add_w(tmp, dst_pos, length);
++    __ ld_wu(SCR1, dst_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++
++  if (flags & LIR_OpArrayCopy::type_check) {
++    // We don't know the array types are compatible
++    if (basic_type != T_OBJECT) {
++      // Simple test for basic type arrays
++      if (UseCompressedClassPointers) {
++        __ ld_wu(tmp, src_klass_addr);
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(tmp, src_klass_addr);
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne_far(tmp, SCR1, *stub->entry());
++    } else {
++      // For object arrays, if src is a sub class of dst then we can
++      // safely do the copy.
++      Label cont, slow;
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++
++      __ load_klass(src, src);
++      __ load_klass(dst, dst);
++
++      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      __ bnez(dst, cont);
++
++      __ bind(slow);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
++      if (copyfunc_addr != NULL) { // use stub if available
++        // src is not a sub class of dst so we have to do a
++        // per-element check.
++
++        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
++        if ((flags & mask) != mask) {
++          // Check that at least both of them object arrays.
++          assert(flags & mask, "one of the two should be known to be an object array");
++
++          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
++            __ load_klass(tmp, src);
++          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
++            __ load_klass(tmp, dst);
++          }
++          int lh_offset = in_bytes(Klass::layout_helper_offset());
++          Address klass_lh_addr(tmp, lh_offset);
++          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++          __ ld_w(SCR1, klass_lh_addr);
++          __ li(SCR2, objArray_lh);
++          __ XOR(SCR1, SCR1, SCR2);
++          __ bnez(SCR1, *stub->entry());
++        }
++
++        // Spill because stubs can use any register they like and it's
++        // easier to restore just those that we care about.
++        __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        __ lea(A0, Address(src, src_pos, scale));
++        __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A0, dst, dst_pos, length);
++        __ load_klass(A4, dst);
++        assert_different_registers(A4, dst, dst_pos, length);
++        __ lea(A1, Address(dst, dst_pos, scale));
++        __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A1, length);
++        __ bstrpick_d(A2, length, 31, 0);
++        __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset()));
++        __ ld_w(A3, Address(A4, Klass::super_check_offset_offset()));
++        __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          Label failed;
++          __ bnez(A0, failed);
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt);
++          __ increment(SCR2, 1);
++          __ bind(failed);
++        }
++#endif
++
++        __ beqz(A0, *stub->continuation());
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
++          __ increment(SCR2, 1);
++        }
++#endif
++        assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1);
++        __ move(tmp, A0);
++
++        // Restore previously spilled arguments
++        __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        // return value is -1^K where K is partial copied count
++        __ nor(SCR1, tmp, R0);
++        // adjust length down and src/end pos up by partial copied count
++        __ sub_w(length, length, SCR1);
++        __ add_w(src_pos, src_pos, SCR1);
++        __ add_w(dst_pos, dst_pos, SCR1);
++      }
++
++      __ b(*stub->entry());
++
++      __ bind(cont);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++    }
++  }
++
++#ifdef ASSERT
++  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
++    // Sanity check the known type with the incoming class.  For the
++    // primitive case the types must match exactly with src.klass and
++    // dst.klass each exactly matching the default type.  For the
++    // object array case, if no type check is needed then either the
++    // dst type is exactly the expected type and the src type is a
++    // subtype which we can't check or src is the same array as dst
++    // but not necessarily exactly of type default_type.
++    Label known_ok, halt;
++    __ mov_metadata(tmp, default_type->constant_encoding());
++    if (UseCompressedClassPointers) {
++      __ encode_klass_not_null(tmp);
++    }
++
++    if (basic_type != T_OBJECT) {
++
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne(tmp, SCR1, halt);
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, src_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, src_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++    } else {
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++      __ beq(src, dst, known_ok);
++    }
++    __ bind(halt);
++    __ stop("incorrect type information in arraycopy");
++    __ bind(known_ok);
++  }
++#endif
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, Runtime1::arraycopy_count_address(basic_type));
++    __ increment(SCR2, 1);
++  }
++#endif
++
++  __ lea(A0, Address(src, src_pos, scale));
++  __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A0, dst, dst_pos, length);
++  __ lea(A1, Address(dst, dst_pos, scale));
++  __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A1, length);
++  __ bstrpick_d(A2, length, 31, 0);
++
++  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
++  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
++  const char *name;
++  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
++
++  CodeBlob *cb = CodeCache::find_blob(entry);
++  if (cb) {
++    __ call(entry, relocInfo::runtime_call_type);
++  } else {
++    __ call_VM_leaf(entry, 3);
++  }
++
++  __ bind(*stub->continuation());
++}
++
++void LIR_Assembler::emit_lock(LIR_OpLock* op) {
++  Register obj = op->obj_opr()->as_register(); // may not be an oop
++  Register hdr = op->hdr_opr()->as_register();
++  Register lock = op->lock_opr()->as_register();
++  if (!UseFastLocking) {
++    __ b(*op->stub()->entry());
++  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    // add debug info for NullPointerException only if one is possible
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    if (op->info() != NULL) {
++      add_debug_info_for_null_check(null_check_offset, op->info());
++    }
++    // done
++  } else if (op->code() == lir_unlock) {
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
++  } else {
++    Unimplemented();
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
++  Register obj = op->obj()->as_pointer_register();
++  Register result = op->result_opr()->as_pointer_register();
++
++  CodeEmitInfo* info = op->info();
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++
++  if (UseCompressedClassPointers) {
++    __ ld_wu(result, obj, oopDesc::klass_offset_in_bytes());
++    __ decode_klass_not_null(result);
++  } else {
++    __ ld_ptr(result, obj, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
++  ciMethod* method = op->profiled_method();
++  ciMethod* callee = op->profiled_callee();
++  int bci = op->profiled_bci();
++
++  // Update counter for all call types
++  ciMethodData* md = method->method_data_or_null();
++  assert(md != NULL, "Sanity");
++  ciProfileData* data = md->bci_to_data(bci);
++  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
++  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
++  Register mdo  = op->mdo()->as_register();
++  __ mov_metadata(mdo, md->constant_encoding());
++  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++  // Perform additional virtual call profiling for invokevirtual and
++  // invokeinterface bytecodes
++  if (op->should_profile_receiver_type()) {
++    assert(op->recv()->is_single_cpu(), "recv must be allocated");
++    Register recv = op->recv()->as_register();
++    assert_different_registers(mdo, recv);
++    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
++    ciKlass* known_klass = op->known_holder();
++    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
++      // We know the type that will be seen at this call site; we can
++      // statically update the MethodData* rather than needing to do
++      // dynamic tests on the receiver type
++
++      // NOTE: we should probably put a lock around this search to
++      // avoid collisions by concurrent compilations
++      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
++      uint i;
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (known_klass->equals(receiver)) {
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++
++      // Receiver type not found in profile data; select an empty slot
++
++      // Note that this is less efficient than it should be because it
++      // always does a write to the receiver part of the
++      // VirtualCallData rather than just the first time
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (receiver == NULL) {
++          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
++          __ mov_metadata(SCR2, known_klass->constant_encoding());
++          __ lea(SCR1, recv_addr);
++          __ st_ptr(SCR2, SCR1, 0);
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR1, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++    } else {
++      __ load_klass(recv, recv);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &update_done);
++      // Receiver did not match any saved receiver and there is no empty row for it.
++      // Increment total counter to indicate polymorphic case.
++      __ ld_ptr(SCR2, counter_addr);
++      __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++      __ st_ptr(SCR2, counter_addr);
++
++      __ bind(update_done);
++    }
++  } else {
++    // Static call
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++  }
++}
++
++void LIR_Assembler::emit_delay(LIR_OpDelay*) {
++  Unimplemented();
++}
++
++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
++  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
++}
++
++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
++  assert(op->crc()->is_single_cpu(), "crc must be register");
++  assert(op->val()->is_single_cpu(), "byte value must be register");
++  assert(op->result_opr()->is_single_cpu(), "result must be register");
++  Register crc = op->crc()->as_register();
++  Register val = op->val()->as_register();
++  Register res = op->result_opr()->as_register();
++
++  assert_different_registers(val, crc, res);
++  __ li(res, StubRoutines::crc_table_addr());
++  __ nor(crc, crc, R0); // ~crc
++  __ update_byte_crc32(crc, val, res);
++  __ nor(res, crc, R0); // ~crc
++}
++
++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
++  COMMENT("emit_profile_type {");
++  Register obj = op->obj()->as_register();
++  Register tmp = op->tmp()->as_pointer_register();
++  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++  ciKlass* exact_klass = op->exact_klass();
++  intptr_t current_klass = op->current_klass();
++  bool not_null = op->not_null();
++  bool no_conflict = op->no_conflict();
++
++  Label update, next, none;
++
++  bool do_null = !not_null;
++  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++  assert(do_null || do_update, "why are we here?");
++  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++  assert(mdo_addr.base() != SCR1, "wrong register");
++
++  __ verify_oop(obj);
++
++  if (tmp != obj) {
++    __ move(tmp, obj);
++  }
++  if (do_null) {
++    __ bnez(tmp, update);
++    if (!TypeEntries::was_null_seen(current_klass)) {
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::null_seen);
++      __ st_ptr(SCR2, mdo_addr);
++    }
++    if (do_update) {
++#ifndef ASSERT
++      __ b(next);
++    }
++#else
++      __ b(next);
++    }
++  } else {
++    __ bnez(tmp, update);
++    __ stop("unexpected null obj");
++#endif
++  }
++
++  __ bind(update);
++
++  if (do_update) {
++#ifdef ASSERT
++    if (exact_klass != NULL) {
++      Label ok;
++      __ load_klass(tmp, tmp);
++      __ mov_metadata(SCR1, exact_klass->constant_encoding());
++      __ XOR(SCR1, tmp, SCR1);
++      __ beqz(SCR1, ok);
++      __ stop("exact klass and actual klass differ");
++      __ bind(ok);
++    }
++#endif
++    if (!no_conflict) {
++      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++        if (exact_klass != NULL) {
++          __ mov_metadata(tmp, exact_klass->constant_encoding());
++        } else {
++          __ load_klass(tmp, tmp);
++        }
++
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        // klass seen before, nothing to do. The unknown bit may have been
++        // set already but no need to check.
++        __ beqz(SCR1, next);
++
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        if (TypeEntries::is_type_none(current_klass)) {
++          __ beqz(SCR2, none);
++          __ li(SCR1, (u1)TypeEntries::null_seen);
++          __ beq(SCR2, SCR1, none);
++          // There is a chance that the checks above (re-reading profiling
++          // data from memory) fail if another thread has just set the
++          // profiling to this obj's klass
++          membar_acquire();
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(tmp, tmp, SCR2);
++          assert(TypeEntries::type_klass_mask == -4, "must be");
++          __ bstrpick_d(SCR1, tmp, 63, 2);
++          __ beqz(SCR1, next);
++        }
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR2, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR2, next); // already unknown. Nothing to do anymore.
++      }
++
++      // different than before. Cannot keep accurate profile.
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::type_unknown);
++      __ st_ptr(SCR2, mdo_addr);
++
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ b(next);
++
++        __ bind(none);
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      }
++    } else {
++      // There's a single possible klass at this profile point
++      assert(exact_klass != NULL, "should be");
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ mov_metadata(tmp, exact_klass->constant_encoding());
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        __ beqz(SCR1, next);
++#ifdef ASSERT
++        {
++          Label ok;
++          __ ld_ptr(SCR1, mdo_addr);
++          __ beqz(SCR1, ok);
++          __ li(SCR2, (u1)TypeEntries::null_seen);
++          __ beq(SCR1, SCR2, ok);
++          // may have been set by another thread
++          membar_acquire();
++          __ mov_metadata(SCR1, exact_klass->constant_encoding());
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(SCR2, SCR1, SCR2);
++          assert(TypeEntries::type_mask == -2, "must be");
++          __ bstrpick_d(SCR2, SCR2, 63, 1);
++          __ beqz(SCR2, ok);
++
++          __ stop("unexpected profiling mismatch");
++          __ bind(ok);
++        }
++#endif
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        __ ori(tmp, tmp, TypeEntries::type_unknown);
++        __ st_ptr(tmp, mdo_addr);
++        // FIXME: Write barrier needed here?
++      }
++    }
++
++    __ bind(next);
++  }
++  COMMENT("} emit_profile_type");
++}
++
++void LIR_Assembler::align_backward_branch_target() {}
++
++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
++  // tmp must be unused
++  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
++
++  if (left->is_single_cpu()) {
++    assert(dest->is_single_cpu(), "expect single result reg");
++    __ sub_w(dest->as_register(), R0, left->as_register());
++  } else if (left->is_double_cpu()) {
++    assert(dest->is_double_cpu(), "expect double result reg");
++    __ sub_d(dest->as_register_lo(), R0, left->as_register_lo());
++  } else if (left->is_single_fpu()) {
++    assert(dest->is_single_fpu(), "expect single float result reg");
++    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
++  } else {
++    assert(left->is_double_fpu(), "expect double float operand reg");
++    assert(dest->is_double_fpu(), "expect double float result reg");
++    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
++  }
++}
++
++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code,
++                         CodeEmitInfo* info) {
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
++}
++
++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args,
++                            LIR_Opr tmp, CodeEmitInfo* info) {
++  assert(!tmp->is_valid(), "don't need temporary");
++  __ call(dest, relocInfo::runtime_call_type);
++  if (info != NULL) {
++    add_call_info_here(info);
++  }
++}
++
++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
++                                     CodeEmitInfo* info) {
++  if (dest->is_address() || src->is_address()) {
++    move_op(src, dest, type, lir_patch_none, info,
++            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#ifdef ASSERT
++// emit run-time assertion
++void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
++  assert(op->code() == lir_assert, "must be");
++  Label ok;
++
++  if (op->in_opr1()->is_valid()) {
++    assert(op->in_opr2()->is_valid(), "both operands must be valid");
++    assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be");
++    Register reg1 = as_reg(op->in_opr1());
++    Register reg2 = as_reg(op->in_opr2());
++    switch (op->condition()) {
++      case lir_cond_equal:        __  beq(reg1, reg2, ok); break;
++      case lir_cond_notEqual:     __  bne(reg1, reg2, ok); break;
++      case lir_cond_less:         __  blt(reg1, reg2, ok); break;
++      case lir_cond_lessEqual:    __  bge(reg2, reg1, ok); break;
++      case lir_cond_greaterEqual: __  bge(reg1, reg2, ok); break;
++      case lir_cond_greater:      __  blt(reg2, reg1, ok); break;
++      case lir_cond_belowEqual:   __ bgeu(reg2, reg1, ok); break;
++      case lir_cond_aboveEqual:   __ bgeu(reg1, reg2, ok); break;
++      default:                    ShouldNotReachHere();
++    }
++  } else {
++    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
++    assert(op->condition() == lir_cond_always, "no other conditions allowed");
++  }
++  if (op->halt()) {
++    const char* str = __ code_string(op->msg());
++    __ stop(str);
++  } else {
++    breakpoint();
++  }
++  __ bind(ok);
++}
++#endif
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++void LIR_Assembler::membar() {
++  COMMENT("membar");
++  __ membar(Assembler::AnyAny);
++}
++
++void LIR_Assembler::membar_acquire() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore));
++}
++
++void LIR_Assembler::membar_release() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore));
++}
++
++void LIR_Assembler::membar_loadload() {
++  __ membar(Assembler::LoadLoad);
++}
++
++void LIR_Assembler::membar_storestore() {
++  __ membar(MacroAssembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadstore() {
++  __ membar(MacroAssembler::LoadStore);
++}
++
++void LIR_Assembler::membar_storeload() {
++  __ membar(MacroAssembler::StoreLoad);
++}
++
++void LIR_Assembler::on_spin_wait() {
++  Unimplemented();
++}
++
++void LIR_Assembler::get_thread(LIR_Opr result_reg) {
++  __ move(result_reg->as_register(), TREG);
++}
++
++void LIR_Assembler::peephole(LIR_List *lir) {
++}
++
++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data,
++                              LIR_Opr dest, LIR_Opr tmp_op) {
++  Address addr = as_Address(src->as_address_ptr());
++  BasicType type = src->type();
++  Register dst = as_reg(dest);
++  Register tmp = as_reg(tmp_op);
++  bool is_oop = is_reference_type(type);
++
++  if (Assembler::is_simm(addr.disp(), 12)) {
++    __ addi_d(tmp, addr.base(), addr.disp());
++  } else {
++    __ li(tmp, addr.disp());
++    __ add_d(tmp, addr.base(), tmp);
++  }
++  if (addr.index() != noreg) {
++    if (addr.scale() != Address::no_scale)
++      __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1);
++    else
++      __ add_d(tmp, tmp, addr.index());
++  }
++
++  switch(type) {
++  case T_INT:
++    break;
++  case T_LONG:
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    if (UseCompressedOops) {
++      // unsigned int
++    } else {
++      // long
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++
++  if (code == lir_xadd) {
++    Register inc = noreg;
++    if (data->is_constant()) {
++      inc = SCR1;
++      __ li(inc, as_long(data));
++    } else {
++      inc = as_reg(data);
++    }
++    switch(type) {
++    case T_INT:
++      __ amadd_db_w(dst, inc, tmp);
++      break;
++    case T_LONG:
++      __ amadd_db_d(dst, inc, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amadd_db_w(dst, inc, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amadd_db_d(dst, inc, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  } else if (code == lir_xchg) {
++    Register obj = as_reg(data);
++    if (is_oop && UseCompressedOops) {
++      __ encode_heap_oop(SCR2, obj);
++      obj = SCR2;
++    }
++    switch(type) {
++    case T_INT:
++      __ amswap_db_w(dst, obj, tmp);
++      break;
++    case T_LONG:
++      __ amswap_db_d(dst, obj, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amswap_db_w(dst, obj, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amswap_db_d(dst, obj, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++    if (is_oop && UseCompressedOops) {
++      __ decode_heap_oop(dst);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
+new file mode 100644
+index 00000000000..fedcc547d48
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
+@@ -0,0 +1,1384 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_LIRGenerator.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArray.hpp"
++#include "ci/ciObjArrayKlass.hpp"
++#include "ci/ciTypeArrayKlass.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/powerOfTwo.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++#define __ gen()->lir(__FILE__, __LINE__)->
++#else
++#define __ gen()->lir()->
++#endif
++
++// Item will be loaded into a byte register; Intel only
++void LIRItem::load_byte_item() {
++  load_item();
++}
++
++void LIRItem::load_nonconstant() {
++  LIR_Opr r = value()->operand();
++  if (r->is_constant()) {
++    _result = r;
++  } else {
++    load_item();
++  }
++}
++
++//--------------------------------------------------------------
++//               LIRGenerator
++//--------------------------------------------------------------
++
++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; }
++LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::a1_opr; }
++LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
++LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::a0_opr; }
++LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
++
++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
++  LIR_Opr opr;
++  switch (type->tag()) {
++    case intTag:    opr = FrameMap::a0_opr;          break;
++    case objectTag: opr = FrameMap::a0_oop_opr;      break;
++    case longTag:   opr = FrameMap::long0_opr;       break;
++    case floatTag:  opr = FrameMap::fpu0_float_opr;  break;
++    case doubleTag: opr = FrameMap::fpu0_double_opr; break;
++    case addressTag:
++    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
++  }
++
++  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
++  return opr;
++}
++
++LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
++  LIR_Opr reg = new_register(T_INT);
++  set_vreg_flag(reg, LIRGenerator::byte_reg);
++  return reg;
++}
++
++//--------- loading items into registers --------------------------------
++
++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
++  if (v->type()->as_IntConstant() != NULL) {
++    return v->type()->as_IntConstant()->value() == 0L;
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(Value v) const {
++  // FIXME: Just a guess
++  if (v->type()->as_IntConstant() != NULL) {
++    return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12);
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
++
++LIR_Opr LIRGenerator::safepoint_poll_register() {
++  return LIR_OprFact::illegalOpr;
++}
++
++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
++                                            int shift, int disp, BasicType type) {
++  assert(base->is_register(), "must be");
++  intx large_disp = disp;
++
++  // accumulate fixed displacements
++  if (index->is_constant()) {
++    LIR_Const *constant = index->as_constant_ptr();
++    if (constant->type() == T_INT) {
++      large_disp += index->as_jint() << shift;
++    } else {
++      assert(constant->type() == T_LONG, "should be");
++      jlong c = index->as_jlong() << shift;
++      if ((jlong)((jint)c) == c) {
++        large_disp += c;
++        index = LIR_OprFact::illegalOpr;
++      } else {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ move(index, tmp);
++        index = tmp;
++        // apply shift and displacement below
++      }
++    }
++  }
++
++  if (index->is_register()) {
++    // apply the shift and accumulate the displacement
++    if (shift > 0) {
++      LIR_Opr tmp = new_pointer_register();
++      __ shift_left(index, shift, tmp);
++      index = tmp;
++    }
++    if (large_disp != 0) {
++      LIR_Opr tmp = new_pointer_register();
++      if (Assembler::is_simm(large_disp, 12)) {
++        __ add(index, LIR_OprFact::intptrConst(large_disp), tmp);
++        index = tmp;
++      } else {
++        __ move(LIR_OprFact::intptrConst(large_disp), tmp);
++        __ add(tmp, index, tmp);
++        index = tmp;
++      }
++      large_disp = 0;
++    }
++  } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) {
++    // index is illegal so replace it with the displacement loaded into a register
++    index = new_pointer_register();
++    __ move(LIR_OprFact::intptrConst(large_disp), index);
++    large_disp = 0;
++  }
++
++  // at this point we either have base + index or base + displacement
++  if (large_disp == 0 && index->is_register()) {
++    return new LIR_Address(base, index, type);
++  } else {
++    assert(Assembler::is_simm(large_disp, 12), "must be");
++    return new LIR_Address(base, large_disp, type);
++  }
++}
++
++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) {
++  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
++  int elem_size = type2aelembytes(type);
++  int shift = exact_log2(elem_size);
++
++  LIR_Address* addr;
++  if (index_opr->is_constant()) {
++    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
++  } else {
++    if (offset_in_bytes) {
++      LIR_Opr tmp = new_pointer_register();
++      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
++      array_opr = tmp;
++      offset_in_bytes = 0;
++    }
++    addr =  new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
++  }
++  return addr;
++}
++
++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
++  if (type == T_LONG) {
++    r = LIR_OprFact::longConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else if (type == T_INT) {
++    r = LIR_OprFact::intConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      // This is all rather nasty.  We don't know whether our constant
++      // is required for a logical or an arithmetic operation, wo we
++      // don't know what the range of valid values is!!
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else {
++    ShouldNotReachHere();
++    r = NULL;  // unreachable
++  }
++  return r;
++}
++
++void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
++  LIR_Opr pointer = new_pointer_register();
++  __ move(LIR_OprFact::intptrConst(counter), pointer);
++  LIR_Address* addr = new LIR_Address(pointer, type);
++  increment_counter(addr, step);
++}
++
++void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
++  LIR_Opr imm = NULL;
++  switch(addr->type()) {
++  case T_INT:
++    imm = LIR_OprFact::intConst(step);
++    break;
++  case T_LONG:
++    imm = LIR_OprFact::longConst(step);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  LIR_Opr reg = new_register(addr->type());
++  __ load(addr, reg);
++  __ add(reg, imm, reg);
++  __ store(reg, addr);
++}
++
++void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++  LIR_Opr reg = new_register(T_INT);
++  __ load(generate_address(base, disp, T_INT), reg, info);
++  __ cmp(condition, reg, LIR_OprFact::intConst(c));
++}
++
++void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++  LIR_Opr reg1 = new_register(T_INT);
++  __ load(generate_address(base, disp, type), reg1, info);
++  __ cmp(condition, reg, reg1);
++}
++
++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
++  if (is_power_of_2(c - 1)) {
++    __ shift_left(left, exact_log2(c - 1), tmp);
++    __ add(tmp, left, result);
++    return true;
++  } else if (is_power_of_2(c + 1)) {
++    __ shift_left(left, exact_log2(c + 1), tmp);
++    __ sub(tmp, left, result);
++    return true;
++  } else {
++    return false;
++  }
++}
++
++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
++  BasicType type = item->type();
++  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
++}
++
++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info,
++                                     ciMethod* profiled_method, int profiled_bci) {
++    LIR_Opr tmp1 = new_register(objectType);
++    LIR_Opr tmp2 = new_register(objectType);
++    LIR_Opr tmp3 = new_register(objectType);
++    __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
++}
++
++//----------------------------------------------------------------------
++//             visitor functions
++//----------------------------------------------------------------------
++
++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
++  assert(x->is_pinned(),"");
++  LIRItem obj(x->obj(), this);
++  obj.load_item();
++
++  set_no_result(x);
++
++  // "lock" stores the address of the monitor stack slot, so this is not an oop
++  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
++
++  CodeEmitInfo* info_for_exception = NULL;
++  if (x->needs_null_check()) {
++    info_for_exception = state_for(x);
++  }
++  // this CodeEmitInfo must not have the xhandlers because here the
++  // object is already locked (xhandlers expect object to be unlocked)
++  CodeEmitInfo* info = state_for(x, x->state(), true);
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
++                x->monitor_no(), info_for_exception, info);
++}
++
++void LIRGenerator::do_MonitorExit(MonitorExit* x) {
++  assert(x->is_pinned(),"");
++
++  LIRItem obj(x->obj(), this);
++  obj.dont_load_item();
++
++  LIR_Opr lock = new_register(T_INT);
++  LIR_Opr obj_temp = new_register(T_INT);
++  set_no_result(x);
++  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
++}
++
++void LIRGenerator::do_NegateOp(NegateOp* x) {
++  LIRItem from(x->x(), this);
++  from.load_item();
++  LIR_Opr result = rlock_result(x);
++  __ negate (from.result(), result);
++}
++
++// for  _fadd, _fmul, _fsub, _fdiv, _frem
++//      _dadd, _dmul, _dsub, _ddiv, _drem
++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
++  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
++    // float remainder is implemented as a direct call into the runtime
++    LIRItem right(x->x(), this);
++    LIRItem left(x->y(), this);
++
++    BasicTypeList signature(2);
++    if (x->op() == Bytecodes::_frem) {
++      signature.append(T_FLOAT);
++      signature.append(T_FLOAT);
++    } else {
++      signature.append(T_DOUBLE);
++      signature.append(T_DOUBLE);
++    }
++    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++
++    const LIR_Opr result_reg = result_register_for(x->type());
++    left.load_item_force(cc->at(1));
++    right.load_item();
++
++    __ move(right.result(), cc->at(0));
++
++    address entry;
++    if (x->op() == Bytecodes::_frem) {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
++    } else {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
++    }
++
++    LIR_Opr result = rlock_result(x);
++    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
++    __ move(result_reg, result);
++    return;
++  }
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg  = &left;
++  LIRItem* right_arg = &right;
++
++  // Always load right hand side.
++  right.load_item();
++
++  if (!left.is_register())
++    left.load_item();
++
++  LIR_Opr reg = rlock(x);
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
++
++  set_result(x, round_item(reg));
++}
++
++// for  _ladd, _lmul, _lsub, _ldiv, _lrem
++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
++  // missing test if instr is commutative and if we should swap
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++
++  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
++    left.load_item();
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jlong c = right.get_jlong_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right.dont_load_item();
++      } else {
++        right.load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
++      __ branch(lir_cond_equal, new DivByZeroStub(info));
++    }
++
++    rlock_result(x);
++    switch (x->op()) {
++    case Bytecodes::_lrem:
++      __ rem (left.result(), right.result(), x->operand());
++      break;
++    case Bytecodes::_ldiv:
++      __ div (left.result(), right.result(), x->operand());
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
++           "expect lmul, ladd or lsub");
++    // add, sub, mul
++    left.load_item();
++    if (!right.is_register()) {
++      if (x->op() == Bytecodes::_lmul || !right.is_constant() ||
++          (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) ||
++          (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) {
++        right.load_item();
++      } else { // add, sub
++        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
++        // don't load constants to save register
++        right.load_nonconstant();
++      }
++    }
++    rlock_result(x);
++    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
++  }
++}
++
++// for: _iadd, _imul, _isub, _idiv, _irem
++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
++  // Test if instr is commutative and if we should swap
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg = &left;
++  LIRItem* right_arg = &right;
++  if (x->is_commutative() && left.is_stack() && right.is_register()) {
++    // swap them if left is real stack (or cached) and right is real register(not cached)
++    left_arg = &right;
++    right_arg = &left;
++  }
++
++  left_arg->load_item();
++
++  // do not need to load right, as we can handle stack and constants
++  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
++    rlock_result(x);
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right_arg->dont_load_item();
++      } else {
++        right_arg->load_item();
++      }
++    } else {
++      right_arg->load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
++      __ branch(lir_cond_equal, new DivByZeroStub(info));
++    }
++
++    LIR_Opr ill = LIR_OprFact::illegalOpr;
++    if (x->op() == Bytecodes::_irem) {
++      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    } else if (x->op() == Bytecodes::_idiv) {
++      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    }
++  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
++    if (right.is_constant() &&
++        ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) ||
++         (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) {
++      right.load_nonconstant();
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
++  } else {
++    assert (x->op() == Bytecodes::_imul, "expect imul");
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
++        right_arg->dont_load_item();
++      } else {
++        // Cannot use constant op.
++        right_arg->load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
++  }
++}
++
++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
++  // when an operand with use count 1 is the left operand, then it is
++  // likely that no move for 2-operand-LIR-form is necessary
++  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
++    x->swap_operands();
++  }
++
++  ValueTag tag = x->type()->tag();
++  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
++  switch (tag) {
++    case floatTag:
++    case doubleTag: do_ArithmeticOp_FPU(x);  return;
++    case longTag:   do_ArithmeticOp_Long(x); return;
++    case intTag:    do_ArithmeticOp_Int(x);  return;
++    default:        ShouldNotReachHere();    return;
++  }
++}
++
++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
++void LIRGenerator::do_ShiftOp(ShiftOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()) {
++    right.dont_load_item();
++    int c;
++    switch (x->op()) {
++      case Bytecodes::_ishl:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_ishr:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_iushr:
++        c = right.get_jint_constant() & 0x1f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshl:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshr:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lushr:
++        c = right.get_jint_constant() & 0x3f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    right.load_item();
++    LIR_Opr tmp = new_register(T_INT);
++    switch (x->op()) {
++    case Bytecodes::_ishl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_ishr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_iushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _iand, _land, _ior, _lor, _ixor, _lxor
++void LIRGenerator::do_LogicOp(LogicOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()
++      && ((right.type()->tag() == intTag
++           && Assembler::is_uimm(right.get_jint_constant(), 12))
++          || (right.type()->tag() == longTag
++              && Assembler::is_uimm(right.get_jlong_constant(), 12)))) {
++    right.dont_load_item();
++  } else {
++    right.load_item();
++  }
++  switch (x->op()) {
++    case Bytecodes::_iand:
++    case Bytecodes::_land:
++      __ logical_and(left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ior:
++    case Bytecodes::_lor:
++      __ logical_or (left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ixor:
++    case Bytecodes::_lxor:
++      __ logical_xor(left.result(), right.result(), x->operand()); break;
++    default: Unimplemented();
++  }
++}
++
++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
++void LIRGenerator::do_CompareOp(CompareOp* x) {
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++  ValueTag tag = x->x()->type()->tag();
++  if (tag == longTag) {
++    left.set_destroys_register();
++  }
++  left.load_item();
++  right.load_item();
++  LIR_Opr reg = rlock_result(x);
++
++  if (x->x()->type()->is_float_kind()) {
++    Bytecodes::Code code = x->op();
++    __ fcmp2int(left.result(), right.result(), reg,
++                (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
++  } else if (x->x()->type()->tag() == longTag) {
++    __ lcmp2int(left.result(), right.result(), reg);
++  } else {
++    Unimplemented();
++  }
++}
++
++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr,
++                                     LIRItem& cmp_value, LIRItem& new_value) {
++  LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
++  new_value.load_item();
++  cmp_value.load_item();
++  LIR_Opr result = new_register(T_INT);
++  if (is_reference_type(type)) {
++    __ cas_obj(addr, cmp_value.result(), new_value.result(),
++               new_register(T_INT), new_register(T_INT), result);
++  } else if (type == T_INT) {
++    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(),
++               new_value.result(), ill, ill);
++  } else if (type == T_LONG) {
++    __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(),
++                new_value.result(), ill, ill);
++  } else {
++    ShouldNotReachHere();
++    Unimplemented();
++  }
++  __ move(FrameMap::scr1_opr, result);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
++  bool is_oop = is_reference_type(type);
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || is_oop || type == T_LONG, "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xchg(addr, value.result(), result, tmp);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || type == T_LONG, "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xadd(addr, value.result(), result, tmp);
++  return result;
++}
++
++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
++         "wrong type");
++  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
++      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
++      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
++      x->id() == vmIntrinsics::_dlog10) {
++    do_LibmIntrinsic(x);
++    return;
++  }
++  switch (x->id()) {
++    case vmIntrinsics::_dabs:
++    case vmIntrinsics::_dsqrt: {
++      assert(x->number_of_arguments() == 1, "wrong type");
++      LIRItem value(x->argument_at(0), this);
++      value.load_item();
++      LIR_Opr dst = rlock_result(x);
++
++      switch (x->id()) {
++        case vmIntrinsics::_dsqrt:
++          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        case vmIntrinsics::_dabs:
++          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
++  LIRItem value(x->argument_at(0), this);
++  value.set_destroys_register();
++
++  LIR_Opr calc_result = rlock_result(x);
++  LIR_Opr result_reg = result_register_for(x->type());
++
++  CallingConvention* cc = NULL;
++
++  if (x->id() == vmIntrinsics::_dpow) {
++    LIRItem value1(x->argument_at(1), this);
++
++    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++  }
++
++  switch (x->id()) {
++    case vmIntrinsics::_dexp:
++      if (StubRoutines::dexp() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog:
++      if (StubRoutines::dlog() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog10:
++      if (StubRoutines::dlog10() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dpow:
++      if (StubRoutines::dpow() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dsin:
++      if (StubRoutines::dsin() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dcos:
++      if (StubRoutines::dcos() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dtan:
++      if (StubRoutines::dtan() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    default:  ShouldNotReachHere();
++  }
++  __ move(result_reg, calc_result);
++}
++
++void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
++  Register j_rarg0 = RT0;
++  Register j_rarg1 = RA0;
++  Register j_rarg2 = RA1;
++  Register j_rarg3 = RA2;
++  Register j_rarg4 = RA3;
++  Register j_rarg5 = RA4;
++
++  assert(x->number_of_arguments() == 5, "wrong type");
++
++  // Make all state_for calls early since they can emit code
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem src(x->argument_at(0), this);
++  LIRItem src_pos(x->argument_at(1), this);
++  LIRItem dst(x->argument_at(2), this);
++  LIRItem dst_pos(x->argument_at(3), this);
++  LIRItem length(x->argument_at(4), this);
++
++  // operands for arraycopy must use fixed registers, otherwise
++  // LinearScan will fail allocation (because arraycopy always needs a
++  // call)
++
++  // The java calling convention will give us enough registers
++  // so that on the stub side the args will be perfect already.
++  // On the other slow/special case side we call C and the arg
++  // positions are not similar enough to pick one as the best.
++  // Also because the java calling convention is a "shifted" version
++  // of the C convention we can process the java args trivially into C
++  // args without worry of overwriting during the xfer
++
++  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
++  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
++  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
++  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
++  length.load_item_force  (FrameMap::as_opr(j_rarg4));
++
++  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
++
++  set_no_result(x);
++
++  int flags;
++  ciArrayKlass* expected_type;
++  arraycopy_helper(x, &flags, &expected_type);
++
++  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
++               length.result(), tmp, expected_type, flags, info); // does add_safepoint
++}
++
++void LIRGenerator::do_update_CRC32(Intrinsic* x) {
++  assert(UseCRC32Intrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateCRC32: {
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem val(x->argument_at(1), this);
++      // val is destroyed by update_crc32
++      val.set_destroys_register();
++      crc.load_item();
++      val.load_item();
++      __ update_crc32(crc.result(), val.result(), result);
++      break;
++    }
++    case vmIntrinsics::_updateBytesCRC32:
++    case vmIntrinsics::_updateByteBufferCRC32: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem len(x->argument_at(3), this);
++      buf.load_item();
++      off.load_nonconstant();
++
++      LIR_Opr index = off.result();
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++       offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      len.load_item_force(cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
++  assert(UseCRC32CIntrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateBytesCRC32C:
++    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem end(x->argument_at(3), this);
++
++      buf.load_item();
++      off.load_nonconstant();
++      end.load_nonconstant();
++
++      // len = end - off
++      LIR_Opr len  = end.result();
++      LIR_Opr tmpA = new_register(T_INT);
++      LIR_Opr tmpB = new_register(T_INT);
++      __ move(end.result(), tmpA);
++      __ move(off.result(), tmpB);
++      __ sub(tmpA, tmpB, tmpA);
++      len = tmpA;
++
++      LIR_Opr index = off.result();
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++        offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      __ move(len, cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 3, "wrong type");
++  assert(UseFMA, "Needs FMA instructions support.");
++  LIRItem value(x->argument_at(0), this);
++  LIRItem value1(x->argument_at(1), this);
++  LIRItem value2(x->argument_at(2), this);
++
++  value.load_item();
++  value1.load_item();
++  value2.load_item();
++
++  LIR_Opr calc_input = value.result();
++  LIR_Opr calc_input1 = value1.result();
++  LIR_Opr calc_input2 = value2.result();
++  LIR_Opr calc_result = rlock_result(x);
++
++  switch (x->id()) {
++    case vmIntrinsics::_fmaD:
++      __ fmad(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    case vmIntrinsics::_fmaF:
++      __ fmaf(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
++  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
++}
++
++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
++// _i2b, _i2c, _i2s
++void LIRGenerator::do_Convert(Convert* x) {
++  LIRItem value(x->value(), this);
++  value.load_item();
++  LIR_Opr input = value.result();
++  LIR_Opr result = rlock(x);
++
++  // arguments of lir_convert
++  LIR_Opr conv_input = input;
++  LIR_Opr conv_result = result;
++
++  switch (x->op()) {
++    case Bytecodes::_f2i:
++    case Bytecodes::_f2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT));
++      break;
++    case Bytecodes::_d2i:
++    case Bytecodes::_d2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE));
++      break;
++    default:
++      __ convert(x->op(), conv_input, conv_result);
++      break;
++  }
++
++  assert(result->is_virtual(), "result must be virtual register");
++  set_result(x, result);
++}
++
++void LIRGenerator::do_NewInstance(NewInstance* x) {
++#ifndef PRODUCT
++  if (PrintNotLoaded && !x->klass()->is_loaded()) {
++    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
++  }
++#endif
++  CodeEmitInfo* info = state_for(x, x->state());
++  LIR_Opr reg = result_register_for(x->type());
++  new_instance(reg, x->klass(), x->is_unresolved(),
++                       FrameMap::t0_oop_opr,
++                       FrameMap::t1_oop_opr,
++                       FrameMap::a4_oop_opr,
++                       LIR_OprFact::illegalOpr,
++                       FrameMap::a3_metadata_opr, info);
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem length(x->length(), this);
++  length.load_item_force(FrameMap::s0_opr);
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++  LIR_Opr len = length.result();
++  BasicType elem_type = x->elt_type();
++
++  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
++
++  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
++  LIRItem length(x->length(), this);
++  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
++  // and therefore provide the state before the parameters have been consumed
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info =  state_for(x, x->state_before());
++  }
++
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++
++  length.load_item_force(FrameMap::s0_opr);
++  LIR_Opr len = length.result();
++
++  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
++  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
++  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
++    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
++  }
++  klass2reg_with_patching(klass_reg, obj, patching_info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
++  Values* dims = x->dims();
++  int i = dims->length();
++  LIRItemList* items = new LIRItemList(i, i, NULL);
++  while (i-- > 0) {
++    LIRItem* size = new LIRItem(dims->at(i), this);
++    items->at_put(i, size);
++  }
++
++  // Evaluate state_for early since it may emit code.
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info = state_for(x, x->state_before());
++
++    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
++    // clone all handlers (NOTE: Usually this is handled transparently
++    // by the CodeEmitInfo cloning logic in CodeStub constructors but
++    // is done explicitly here because a stub isn't being used).
++    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
++  }
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  i = dims->length();
++  while (i-- > 0) {
++    LIRItem* size = items->at(i);
++    size->load_item();
++
++    store_stack_parameter(size->result(), in_ByteSize(i*4));
++  }
++
++  LIR_Opr klass_reg = FrameMap::a0_metadata_opr;
++  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
++
++  LIR_Opr rank = FrameMap::s0_opr;
++  __ move(LIR_OprFact::intConst(x->rank()), rank);
++  LIR_Opr varargs = FrameMap::a2_opr;
++  __ move(FrameMap::sp_opr, varargs);
++  LIR_OprList* args = new LIR_OprList(3);
++  args->append(klass_reg);
++  args->append(rank);
++  args->append(varargs);
++  LIR_Opr reg = result_register_for(x->type());
++  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
++                  LIR_OprFact::illegalOpr,
++                  reg, args, info);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_BlockBegin(BlockBegin* x) {
++  // nothing to do for now
++}
++
++void LIRGenerator::do_CheckCast(CheckCast* x) {
++  LIRItem obj(x->obj(), this);
++
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() ||
++      (PatchALot && !x->is_incompatible_class_change_check() &&
++       !x->is_invokespecial_receiver_check())) {
++    // must do this before locking the destination register as an oop register,
++    // and before the obj is loaded (the latter is for deoptimization)
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++
++  // info for exceptions
++  CodeEmitInfo* info_for_exception =
++      (x->needs_exception_state() ? state_for(x) :
++                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
++
++  CodeStub* stub;
++  if (x->is_incompatible_class_change_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
++                                   LIR_OprFact::illegalOpr, info_for_exception);
++  } else if (x->is_invokespecial_receiver_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new DeoptimizeStub(info_for_exception,
++                              Deoptimization::Reason_class_check,
++                              Deoptimization::Action_none);
++  } else {
++    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
++                                   obj.result(), info_for_exception);
++  }
++  LIR_Opr reg = rlock_result(x);
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ checkcast(reg, obj.result(), x->klass(),
++               new_register(objectType), new_register(objectType), tmp3,
++               x->direct_compare(), info_for_exception, patching_info, stub,
++               x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_InstanceOf(InstanceOf* x) {
++  LIRItem obj(x->obj(), this);
++
++  // result and test object may not be in same register
++  LIR_Opr reg = rlock_result(x);
++  CodeEmitInfo* patching_info = NULL;
++  if ((!x->klass()->is_loaded() || PatchALot)) {
++    // must do this before locking the destination register as an oop register
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ instanceof(reg, obj.result(), x->klass(),
++                new_register(objectType), new_register(objectType), tmp3,
++                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_If(If* x) {
++  assert(x->number_of_sux() == 2, "inconsistency");
++  ValueTag tag = x->x()->type()->tag();
++  bool is_safepoint = x->is_safepoint();
++
++  If::Condition cond = x->cond();
++
++  LIRItem xitem(x->x(), this);
++  LIRItem yitem(x->y(), this);
++  LIRItem* xin = &xitem;
++  LIRItem* yin = &yitem;
++
++  if (tag == longTag) {
++    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
++    // mirror for other conditions
++    if (cond == If::gtr || cond == If::leq) {
++      cond = Instruction::mirror(cond);
++      xin = &yitem;
++      yin = &xitem;
++    }
++    xin->set_destroys_register();
++  }
++  xin->load_item();
++
++  if (tag == longTag) {
++    if (yin->is_constant() && yin->get_jlong_constant() == 0) {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else if (tag == intTag) {
++    if (yin->is_constant() && yin->get_jint_constant() == 0)  {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else {
++    yin->load_item();
++  }
++
++  set_no_result(x);
++
++  LIR_Opr left = xin->result();
++  LIR_Opr right = yin->result();
++
++  // add safepoint before generating condition code so it can be recomputed
++  if (x->is_safepoint()) {
++    // increment backedge counter if needed
++    increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
++                                             x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
++    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
++  }
++
++  __ cmp(lir_cond(cond), left, right);
++  // Generate branch profiling. Profiling code doesn't kill flags.
++  profile_branch(x, cond);
++  move_to_phi(x->state());
++  if (x->x()->type()->is_float_kind()) {
++    __ branch(lir_cond(cond), x->tsux(), x->usux());
++  } else {
++    __ branch(lir_cond(cond), x->tsux());
++  }
++  assert(x->default_sux() == x->fsux(), "wrong destination above");
++  __ jump(x->default_sux());
++}
++
++LIR_Opr LIRGenerator::getThreadPointer() {
++   return FrameMap::as_pointer_opr(TREG);
++}
++
++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
++
++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
++                                        CodeEmitInfo* info) {
++  __ volatile_store_mem_reg(value, address, info);
++}
++
++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  // 8179954: We need to make sure that the code generated for
++  // volatile accesses forms a sequentially-consistent set of
++  // operations when combined with STLR and LDAR.  Without a leading
++  // membar it's possible for a simple Dekker test to fail if loads
++  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
++  // the stores in one method and C1 compiles the loads in another.
++  if (!CompilerConfig::is_c1_only_no_jvmci()) {
++    __ membar();
++  }
++  __ volatile_load_mem_reg(address, result, info);
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp
+new file mode 100644
+index 00000000000..01e8c9f270e
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "c1/c1_LIR.hpp"
++
++FloatRegister LIR_OprDesc::as_float_reg() const {
++  return as_FloatRegister(fpu_regnr());
++}
++
++FloatRegister LIR_OprDesc::as_double_reg() const {
++  return as_FloatRegister(fpu_regnrLo());
++}
++
++// Reg2 unused.
++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
++  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
++}
++
++#ifndef PRODUCT
++void LIR_Address::verify() const {
++  assert(base()->is_cpu_register(), "wrong base operand");
++  assert(index()->is_illegal() || index()->is_double_cpu() ||
++         index()->is_single_cpu(), "wrong index operand");
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT ||
++         base()->type() == T_LONG || base()->type() == T_METADATA,
++         "wrong type for addresses");
++}
++#endif // PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp
+new file mode 100644
+index 00000000000..f15dacafeba
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++
++inline bool LinearScan::is_processed_reg_num(int reg_num) {
++  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
++}
++
++inline int LinearScan::num_physical_regs(BasicType type) {
++  return 1;
++}
++
++inline bool LinearScan::requires_adjacent_regs(BasicType type) {
++  return false;
++}
++
++inline bool LinearScan::is_caller_save(int assigned_reg) {
++  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
++  if (assigned_reg < pd_first_callee_saved_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
++    return true;
++  return false;
++}
++
++inline void LinearScan::pd_add_temps(LIR_Op* op) {}
++
++// Implementation of LinearScanWalker
++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
++  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
++    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
++    _first_reg = pd_first_callee_saved_reg;
++    _last_reg = pd_last_callee_saved_reg;
++    return true;
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++    _first_reg = pd_first_cpu_reg;
++    _last_reg = pd_last_allocatable_cpu_reg;
++    return true;
++  }
++  return false;
++}
++
++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp
+new file mode 100644
+index 00000000000..219b2e3671c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LinearScan.hpp"
++#include "utilities/bitMap.inline.hpp"
++
++void LinearScan::allocate_fpu_stack() {
++  // No FPU stack on LoongArch64
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..38ff4c58369
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++
++using MacroAssembler::build_frame;
++using MacroAssembler::null_check;
++
++// C1_MacroAssembler contains high-level macros for C1
++
++ private:
++  int _rsp_offset; // track rsp changes
++  // initialization
++  void pd_init() { _rsp_offset = 0; }
++
++ public:
++  void try_allocate(
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
++
++  // locking
++  // hdr     : must be A0, contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
++  // returns code offset at which to add null check debug information
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
++
++  // unlocking
++  // hdr     : contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed
++  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
++
++  void initialize_object(
++    Register obj,               // result: pointer to object after successful allocation
++    Register klass,             // object klass
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    bool     is_tlab_allocated  // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
++  );
++
++  // allocation of fixed-size objects
++  // (can also be used to allocate fixed-size arrays, by setting
++  // hdr_size correctly and storing the array length afterwards)
++  // obj        : will contain pointer to allocated object
++  // t1, t2     : scratch registers - contents destroyed
++  // header_size: size of object header in words
++  // object_size: total size of object in words
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_object(Register obj, Register t1, Register t2, int header_size,
++                       int object_size, Register klass, Label& slow_case);
++
++  enum {
++    max_array_allocation_length = 0x00FFFFFF
++  };
++
++  // allocation of arrays
++  // obj        : will contain pointer to allocated object
++  // len        : array length in number of elements
++  // t          : scratch register - contents destroyed
++  // header_size: size of object header in words
++  // f          : element scale factor
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size,
++                      int f, Register klass, Label& slow_case);
++
++  int rsp_offset() const { return _rsp_offset; }
++  void set_rsp_offset(int n) { _rsp_offset = n; }
++
++  void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3,
++                            bool inv_a4, bool inv_a5) PRODUCT_RETURN;
++
++  // This platform only uses signal-based null checks. The Label is not needed.
++  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
++
++  void load_parameter(int offset_in_words, Register reg);
++
++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
+new file mode 100644
+index 00000000000..56c6281d415
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
+@@ -0,0 +1,365 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/tlab_globals.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markWord.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  int null_check_offset = -1;
++  Label done;
++
++  verify_oop(obj);
++
++  // save object being locked into the BasicObjectLock
++  st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++
++  null_check_offset = offset();
++
++  if (DiagnoseSyncOnValueBasedClasses != 0) {
++    load_klass(hdr, obj);
++    ld_w(hdr, Address(hdr, Klass::access_flags_offset()));
++    li(SCR1, JVM_ACC_IS_VALUE_BASED_CLASS);
++    andr(SCR1, hdr, SCR1);
++    bnez(SCR1, slow_case);
++  }
++
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  }
++
++  // Load object header
++  ld_ptr(hdr, Address(obj, hdr_offset));
++  // and mark it as unlocked
++  ori(hdr, hdr, markWord::unlocked_value);
++  // save unlocked object header into the displaced header location on the stack
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // test if object header is still the same (i.e. unlocked), and if so, store the
++  // displaced header address in the object header - if it is not the same, get the
++  // object header instead
++  lea(SCR2, Address(obj, hdr_offset));
++  cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done);
++  // if the object header was the same, we're done
++  // if the object header was not the same, it is now in the hdr register
++  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
++  //
++  // 1) (hdr & aligned_mask) == 0
++  // 2) sp <= hdr
++  // 3) hdr <= sp + page_size
++  //
++  // these 3 tests can be done by evaluating the following expression:
++  //
++  // (hdr - sp) & (aligned_mask - page_size)
++  //
++  // assuming both the stack pointer and page_size have their least
++  // significant 2 bits cleared and page_size is a power of 2
++  sub_d(hdr, hdr, SP);
++  li(SCR1, aligned_mask - os::vm_page_size());
++  andr(hdr, hdr, SCR1);
++  // for recursive locking, the result is zero => save it in the displaced header
++  // location (NULL in the displaced hdr location indicates recursive locking)
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // otherwise we don't care about the result and handle locking via runtime call
++  bnez(hdr, slow_case);
++  // done
++  bind(done);
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2);
++  }
++  return null_check_offset;
++}
++
++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done;
++
++  if (UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
++  // load displaced header
++  ld_ptr(hdr, Address(disp_hdr, 0));
++  // if the loaded hdr is NULL we had recursive locking
++  // if we had recursive locking, we are done
++  beqz(hdr, done);
++  if (!UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
++  verify_oop(obj);
++  // test if object header is pointing to the displaced header, and if so, restore
++  // the displaced header in the object - if the object header is not pointing to
++  // the displaced header, get the object header instead
++  // if the object header was not pointing to the displaced header,
++  // we do unlocking via runtime call
++  if (hdr_offset) {
++    lea(SCR1, Address(obj, hdr_offset));
++    cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  } else {
++    cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  }
++  // done
++  bind(done);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes,
++                                     int con_size_in_bytes, Register t1, Register t2,
++                                     Label& slow_case) {
++  if (UseTLAB) {
++    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++  } else {
++    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++  }
++}
++
++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len,
++                                          Register t1, Register t2) {
++  assert_different_registers(obj, klass, len);
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, t1, t2);
++    ld_ptr(t1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fit in an int32_t
++    li(t1, (int32_t)(intptr_t)markWord::prototype().value());
++  }
++  st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
++
++  if (UseCompressedClassPointers) { // Take care not to kill klass
++    encode_klass_not_null(t1, klass);
++    st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
++  } else {
++    st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++
++  if (len->is_valid()) {
++    st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
++  } else if (UseCompressedClassPointers) {
++    store_klass_gap(obj, R0);
++  }
++}
++
++// preserves obj, destroys len_in_bytes
++//
++// Scratch registers: t1 = T0, t2 = T1
++//
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes,
++                                        int hdr_size_in_bytes, Register t1, Register t2) {
++  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
++  assert(t1 == T0 && t2 == T1, "must be");
++  Label done;
++
++  // len_in_bytes is positive and ptr sized
++  addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes);
++  beqz(len_in_bytes, done);
++
++  // zero_words() takes ptr in t1 and count in bytes in t2
++  lea(t1, Address(obj, hdr_size_in_bytes));
++  addi_d(t2, len_in_bytes, -BytesPerWord);
++
++  Label loop;
++  bind(loop);
++  stx_d(R0, t1, t2);
++  addi_d(t2, t2, -BytesPerWord);
++  bge(t2, R0, loop);
++
++  bind(done);
++}
++
++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size,
++                                        int object_size, Register klass, Label& slow_case) {
++  assert_different_registers(obj, t1, t2);
++  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
++
++  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
++
++  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
++}
++
++// Scratch registers: t1 = T0, t2 = T1
++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes,
++                                          int con_size_in_bytes, Register t1, Register t2,
++                                          bool is_tlab_allocated) {
++  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
++         "con_size_in_bytes is not multiple of alignment");
++  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
++
++  initialize_header(obj, klass, noreg, t1, t2);
++
++  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
++     // clear rest of allocated space
++     const Register index = t2;
++     if (var_size_in_bytes != noreg) {
++       move(index, var_size_in_bytes);
++       initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
++     } else if (con_size_in_bytes > hdr_size_in_bytes) {
++       con_size_in_bytes -= hdr_size_in_bytes;
++       lea(t1, Address(obj, hdr_size_in_bytes));
++       Label loop;
++       li(SCR1, con_size_in_bytes - BytesPerWord);
++       bind(loop);
++       stx_d(R0, t1, SCR1);
++       addi_d(SCR1, SCR1, -BytesPerWord);
++       bge(SCR1, R0, loop);
++     }
++  }
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2,
++                                       int header_size, int f, Register klass, Label& slow_case) {
++  assert_different_registers(obj, len, t1, t2, klass);
++
++  // determine alignment mask
++  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
++
++  // check for negative or excessive length
++  li(SCR1, (int32_t)max_array_allocation_length);
++  bge_far(len, SCR1, slow_case, false);
++
++  const Register arr_size = t2; // okay to be the same
++  // align object end
++  li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
++  slli_w(SCR1, len, f);
++  add_d(arr_size, arr_size, SCR1);
++  bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
++
++  initialize_header(obj, klass, len, t1, t2);
++
++  // clear rest of allocated space
++  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
++  // Make sure there is enough stack space for this method's activation.
++  // Note that we do this before creating a frame.
++  generate_stack_overflow_check(bang_size_in_bytes);
++  MacroAssembler::build_frame(framesize);
++
++  // Insert nmethod entry barrier into frame.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->nmethod_entry_barrier(this);
++}
++
++void C1_MacroAssembler::remove_frame(int framesize) {
++  MacroAssembler::remove_frame(framesize);
++}
++
++void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.  For this action to be legal we
++  // must ensure that this first instruction is a b, bl, nop, break.
++  // Make it a NOP.
++  nop();
++}
++
++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
++  //  FP + -2: link
++  //     + -1: return address
++  //     +  0: argument with offset 0
++  //     +  1: argument with offset 1
++  //     +  2: ...
++
++  ld_ptr(reg, Address(FP, offset_in_words * BytesPerWord));
++}
++
++#ifndef PRODUCT
++void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
++  if (!VerifyOops) return;
++  verify_oop_addr(Address(SP, stack_offset), "oop");
++}
++
++void C1_MacroAssembler::verify_not_null_oop(Register r) {
++  if (!VerifyOops) return;
++  Label not_null;
++  bnez(r, not_null);
++  stop("non-null oop required");
++  bind(not_null);
++  verify_oop(r);
++}
++
++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2,
++                                             bool inv_a3, bool inv_a4, bool inv_a5) {
++#ifdef ASSERT
++  static int nn;
++  if (inv_a0) li(A0, 0xDEAD);
++  if (inv_s0) li(S0, 0xDEAD);
++  if (inv_a2) li(A2, nn++);
++  if (inv_a3) li(A3, 0xDEAD);
++  if (inv_a4) li(A4, 0xDEAD);
++  if (inv_a5) li(A5, 0xDEAD);
++#endif
++}
++#endif // ifndef PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp
+new file mode 100644
+index 00000000000..87da18e294a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp
+@@ -0,0 +1,1148 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Defs.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/tlab_globals.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "register_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframe.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/powerOfTwo.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T8 RT8
++
++// Implementation of StubAssembler
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++  // setup registers
++  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++         "registers must be different");
++  assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different");
++  assert(args_size >= 0, "illegal args_size");
++  bool align_stack = false;
++
++  move(A0, TREG);
++  set_num_rt_args(0); // Nothing on stack
++
++  Label retaddr;
++  set_last_Java_frame(SP, FP, retaddr);
++
++  // do the call
++  call(entry, relocInfo::runtime_call_type);
++  bind(retaddr);
++  int call_offset = offset();
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    get_thread(SCR1);
++    beq(TREG, SCR1, L);
++    stop("StubAssembler::call_RT: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++  reset_last_Java_frame(true);
++
++  // check for pending exceptions
++  { Label L;
++    // check for pending exceptions (java_thread is set upon return)
++    ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset())));
++    beqz(SCR1, L);
++    // exception pending => remove activation and forward to exception handler
++    // make sure that the vm_results are cleared
++    if (oop_result1->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    }
++    if (metadata_result->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    }
++    if (frame_size() == no_frame_size) {
++      leave();
++      jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    } else if (_stub_id == Runtime1::forward_exception_id) {
++      should_not_reach_here();
++    } else {
++      jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type);
++    }
++    bind(L);
++  }
++  // get oop results if there are any and reset the values in the thread
++  if (oop_result1->is_valid()) {
++    get_vm_result(oop_result1, TREG);
++  }
++  if (metadata_result->is_valid()) {
++    get_vm_result_2(metadata_result, TREG);
++  }
++  return call_offset;
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1) {
++  move(A1, arg1);
++  return call_RT(oop_result1, metadata_result, entry, 1);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2) {
++  if (A1 == arg2) {
++    if (A2 == arg1) {
++      move(SCR1, arg1);
++      move(arg1, arg2);
++      move(arg2, SCR1);
++    } else {
++      move(A2, arg2);
++      move(A1, arg1);
++    }
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 2);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2, Register arg3) {
++  // if there is any conflict use the stack
++  if (arg1 == A2 || arg1 == A3 ||
++      arg2 == A1 || arg2 == A3 ||
++      arg3 == A1 || arg3 == A2) {
++    addi_d(SP, SP, -4 * wordSize);
++    st_ptr(arg1, Address(SP, 0 * wordSize));
++    st_ptr(arg2, Address(SP, 1 * wordSize));
++    st_ptr(arg3, Address(SP, 2 * wordSize));
++    ld_ptr(arg1, Address(SP, 0 * wordSize));
++    ld_ptr(arg2, Address(SP, 1 * wordSize));
++    ld_ptr(arg3, Address(SP, 2 * wordSize));
++    addi_d(SP, SP, 4 * wordSize);
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++    move(A3, arg3);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 3);
++}
++
++enum return_state_t {
++  does_not_return, requires_return
++};
++
++// Implementation of StubFrame
++
++class StubFrame: public StackObj {
++ private:
++  StubAssembler* _sasm;
++  bool _return_state;
++
++ public:
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++            return_state_t return_state=requires_return);
++  void load_argument(int offset_in_words, Register reg);
++
++  ~StubFrame();
++};;
++
++void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
++  set_info(name, must_gc_arguments);
++  enter();
++}
++
++void StubAssembler::epilogue() {
++  leave();
++  jr(RA);
++}
++
++#define __ _sasm->
++
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++                     return_state_t return_state) {
++  _sasm = sasm;
++  _return_state = return_state;
++  __ prologue(name, must_gc_arguments);
++}
++
++// load parameters that were stored with LIR_Assembler::store_parameter
++// Note: offsets for store_parameter and load_argument must match
++void StubFrame::load_argument(int offset_in_words, Register reg) {
++  __ load_parameter(offset_in_words, reg);
++}
++
++StubFrame::~StubFrame() {
++  if (_return_state == requires_return) {
++    __ epilogue();
++  } else {
++    __ should_not_reach_here();
++  }
++}
++
++#undef __
++
++// Implementation of Runtime1
++
++#define __ sasm->
++
++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
++
++// Stack layout for saving/restoring  all the registers needed during a runtime
++// call (this includes deoptimization)
++// Note: note that users of this frame may well have arguments to some runtime
++// while these values are on the stack. These positions neglect those arguments
++// but the code in save_live_registers will take the argument count into
++// account.
++//
++
++enum reg_save_layout {
++  reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */
++};
++
++// Save off registers which might be killed by calls into the runtime.
++// Tries to smart of about FP registers.  In particular we separate
++// saving and describing the FPU registers for deoptimization since we
++// have to save the FPU registers twice if we describe them.  The
++// deopt blob is the only thing which needs to describe FPU registers.
++// In all other cases it should be sufficient to simply save their
++// current value.
++
++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
++static int reg_save_size_in_words;
++static int frame_size_in_bytes = -1;
++
++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
++  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
++  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++
++  for (int i = A0->encoding(); i <= T8->encoding(); i++) {
++    Register r = as_Register(i);
++    if (i != SCR1->encoding() && i != SCR2->encoding()) {
++      int sp_offset = cpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
++      FloatRegister r = as_FloatRegister(i);
++      int sp_offset = fpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  return oop_map;
++}
++
++static OopMap* save_live_registers(StubAssembler* sasm,
++                                   bool save_fpu_registers = true) {
++  __ block_comment("save_live_registers");
++
++  // integer registers except zr & ra & tp & sp
++  __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize);
++
++  for (int i = 4; i < 32; i++)
++    __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < 32; i++)
++      __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  return generate_oop_map(sasm, save_fpu_registers);
++}
++
++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 4; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 5; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++void Runtime1::initialize_pd() {
++  int sp_offset = 0;
++  int i;
++
++  // all float registers are saved explicitly
++  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
++  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
++    fpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++
++  for (i = 4; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    cpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++}
++
++// target: the entry point of the method that creates and posts the exception oop
++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
++
++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target,
++                                              bool has_argument) {
++  // make a frame and preserve the caller's caller-save registers
++  OopMap* oop_map = save_live_registers(sasm);
++  int call_offset;
++  if (!has_argument) {
++    call_offset = __ call_RT(noreg, noreg, target);
++  } else {
++    __ move(A1, SCR1);
++    __ move(A2, SCR2);
++    call_offset = __ call_RT(noreg, noreg, target);
++  }
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(call_offset, oop_map);
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
++  __ block_comment("generate_handle_exception");
++
++  // incoming parameters
++  const Register exception_oop = A0;
++  const Register exception_pc  = A1;
++  // other registers used in this stub
++
++  // Save registers, if required.
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* oop_map = NULL;
++  switch (id) {
++  case forward_exception_id:
++    // We're handling an exception in the context of a compiled frame.
++    // The registers have been saved in the standard places.  Perform
++    // an exception lookup in the caller and dispatch to the handler
++    // if found.  Otherwise unwind and dispatch to the callers
++    // exception handler.
++    oop_map = generate_oop_map(sasm, 1 /*thread*/);
++
++    // load and clear pending exception oop into A0
++    __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset()));
++    __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset()));
++
++    // load issuing PC (the return address for this stub) into A1
++    __ ld_ptr(exception_pc, Address(FP, frame::return_addr_offset * BytesPerWord));
++
++    // make sure that the vm_results are cleared (may be unnecessary)
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    break;
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // At this point all registers MAY be live.
++    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id: {
++    // At this point all registers except exception oop (A0) and
++    // exception pc (RA) are dead.
++    const int frame_size = 2 /*fp, return address*/;
++    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
++    sasm->set_frame_size(frame_size);
++    break;
++  }
++  default: ShouldNotReachHere();
++  }
++
++  // verify that only A0 and A1 are valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++  // verify that A0 contains a valid exception
++  __ verify_not_null_oop(exception_oop);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are
++  // empty before writing to them
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop already set");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc already set");
++  __ bind(pc_empty);
++#endif
++
++  // save exception oop and issuing pc into JavaThread
++  // (exception handler will load it from here)
++  __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset()));
++
++  // patch throwing pc into return address (has bci & oop map)
++  __ st_ptr(exception_pc, Address(FP, frame::return_addr_offset * BytesPerWord));
++
++  // compute the exception handler.
++  // the exception oop and the throwing pc are read from the fields in JavaThread
++  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  // A0: handler address
++  //      will be the deopt blob if nmethod was deoptimized while we looked up
++  //      handler regardless of whether handler existed in the nmethod.
++
++  // only A0 is valid at this time, all other registers have been destroyed by the runtime call
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // patch the return address, this stub will directly return to the exception handler
++  __ st_ptr(A0, Address(FP, frame::return_addr_offset * BytesPerWord));
++
++  switch (id) {
++    case forward_exception_id:
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      // Restore the registers that were saved at the beginning.
++      restore_live_registers(sasm, id != handle_exception_nofpu_id);
++      break;
++    case handle_exception_from_callee_id:
++      break;
++    default:  ShouldNotReachHere();
++  }
++
++  return oop_maps;
++}
++
++void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
++  // incoming parameters
++  const Register exception_oop = A0;
++  // callee-saved copy of exception_oop during runtime call
++  const Register exception_oop_callee_saved = S0;
++  // other registers used in this stub
++  const Register exception_pc = A1;
++  const Register handler_addr = A3;
++
++  // verify that only A0, is valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Save our return address because
++  // exception_handler_for_return_address will destroy it.  We also
++  // save exception_oop
++  __ addi_d(SP, SP, -2 * wordSize);
++  __ st_ptr(RA, Address(SP, 0 * wordSize));
++  __ st_ptr(exception_oop, Address(SP, 1 * wordSize));
++
++  // search the exception handler address of the caller (using the return address)
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA);
++  // V0: exception handler address of the caller
++
++  // Only V0 is valid at this time; all other registers have been
++  // destroyed by the call.
++  __ invalidate_registers(false, true, true, true, false, true);
++
++  // move result of call into correct register
++  __ move(handler_addr, A0);
++
++  // get throwing pc (= return address).
++  // RA has been destroyed by the call
++  __ ld_ptr(RA, Address(SP, 0 * wordSize));
++  __ ld_ptr(exception_oop, Address(SP, 1 * wordSize));
++  __ addi_d(SP, SP, 2 * wordSize);
++  __ move(A1, RA);
++
++  __ verify_not_null_oop(exception_oop);
++
++  // continue at exception handler (return address removed)
++  // note: do *not* remove arguments when unwinding the
++  //       activation since the caller assumes having
++  //       all arguments on the stack when entering the
++  //       runtime to determine the exception handler
++  //       (GC happens at call site with arguments!)
++  // A0: exception oop
++  // A1: throwing pc
++  // A3: exception handler
++  __ jr(handler_addr);
++}
++
++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
++  // use the maximum number of runtime-arguments here because it is difficult to
++  // distinguish each RT-Call.
++  // Note: This number affects also the RT-Call in generate_handle_exception because
++  //       the oop-map is shared for all calls.
++  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++  assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++  OopMap* oop_map = save_live_registers(sasm);
++
++  __ move(A0, TREG);
++  Label retaddr;
++  __ set_last_Java_frame(SP, FP, retaddr);
++  // do the call
++  __ call(target, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(__ offset(), oop_map);
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    __ get_thread(SCR1);
++    __ beq(TREG, SCR1, L);
++    __ stop("StubAssembler::call_RT: rthread not callee saved?");
++    __ bind(L);
++  }
++#endif
++
++  __ reset_last_Java_frame(true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Runtime will return true if the nmethod has been deoptimized, this is the
++  // expected scenario and anything else is  an error. Note that we maintain a
++  // check on the result purely as a defensive measure.
++  Label no_deopt;
++  __ beqz(A0, no_deopt); // Have we deoptimized?
++
++  // Perform a re-execute. The proper return  address is already on the stack,
++  // we just need  to restore registers, pop  all of our frame  but the return
++  // address and jump to the deopt blob.
++  restore_live_registers(sasm);
++  __ leave();
++  __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++
++  __ bind(no_deopt);
++  __ stop("deopt not performed");
++
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
++  // for better readability
++  const bool must_gc_arguments = true;
++  const bool dont_gc_arguments = false;
++
++  // default value; overwritten for some optimized stubs that are called
++  // from methods that do not use the fpu
++  bool save_fpu_registers = true;
++
++  // stub code & info for the different stubs
++  OopMapSet* oop_maps = NULL;
++  OopMap* oop_map = NULL;
++  switch (id) {
++    {
++    case forward_exception_id:
++      {
++        oop_maps = generate_handle_exception(id, sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_div0_exception_id:
++      {
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
++      }
++      break;
++
++    case throw_null_pointer_exception_id:
++      {
++        StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
++      }
++      break;
++
++    case new_instance_id:
++    case fast_new_instance_id:
++    case fast_new_instance_init_check_id:
++      {
++        Register klass = A3; // Incoming
++        Register obj   = A0; // Result
++
++        if (id == new_instance_id) {
++          __ set_info("new_instance", dont_gc_arguments);
++        } else if (id == fast_new_instance_id) {
++          __ set_info("fast new_instance", dont_gc_arguments);
++        } else {
++          assert(id == fast_new_instance_init_check_id, "bad StubID");
++          __ set_info("fast new_instance init check", dont_gc_arguments);
++        }
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
++            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Label slow_path;
++          Register obj_size = S0;
++          Register t1       = T0;
++          Register t2       = T1;
++          assert_different_registers(klass, obj, obj_size, t1, t2);
++
++          __ addi_d(SP, SP, -2 * wordSize);
++          __ st_ptr(S0, Address(SP, 0));
++
++          if (id == fast_new_instance_init_check_id) {
++            // make sure the klass is initialized
++            __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset()));
++            __ li(SCR2, InstanceKlass::fully_initialized);
++            __ bne_far(SCR1, SCR2, slow_path);
++          }
++
++#ifdef ASSERT
++          // assert object can be fast path allocated
++          {
++            Label ok, not_ok;
++            __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++            __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0)
++            __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit);
++            __ beqz(SCR1, ok);
++            __ bind(not_ok);
++            __ stop("assert(can be fast path allocated)");
++            __ should_not_reach_here();
++            __ bind(ok);
++          }
++#endif // ASSERT
++
++          // get the instance size (size is postive so movl is fine for 64bit)
++          __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++
++          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
++
++          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
++          __ verify_oop(obj);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++          __ jr(RA);
++
++          __ bind(slow_path);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0,: new instance
++      }
++
++      break;
++
++    case counter_overflow_id:
++      {
++        Register bci = A0, method = A1;
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        // Retrieve bci
++        __ ld_w(bci, Address(FP, 0 * BytesPerWord));
++        // And a pointer to the Method*
++        __ ld_d(method, Address(FP, 1 * BytesPerWord));
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case new_type_array_id:
++    case new_object_array_id:
++      {
++        Register length   = S0; // Incoming
++        Register klass    = A3; // Incoming
++        Register obj      = A0; // Result
++
++        if (id == new_type_array_id) {
++          __ set_info("new_type_array", dont_gc_arguments);
++        } else {
++          __ set_info("new_object_array", dont_gc_arguments);
++        }
++
++#ifdef ASSERT
++        // assert object type is really an array of the proper kind
++        {
++          Label ok;
++          Register t0 = obj;
++          __ ld_w(t0, Address(klass, Klass::layout_helper_offset()));
++          __ srai_w(t0, t0, Klass::_lh_array_tag_shift);
++          int tag = ((id == new_type_array_id)
++                     ? Klass::_lh_array_tag_type_value
++                     : Klass::_lh_array_tag_obj_value);
++          __ li(SCR1, tag);
++          __ beq(t0, SCR1, ok);
++          __ stop("assert(is an array klass)");
++          __ should_not_reach_here();
++          __ bind(ok);
++        }
++#endif // ASSERT
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Register arr_size = A5;
++          Register t1       = T0;
++          Register t2       = T1;
++          Label slow_path;
++          assert_different_registers(length, klass, obj, arr_size, t1, t2);
++
++          // check that array length is small enough for fast path.
++          __ li(SCR1, C1_MacroAssembler::max_array_allocation_length);
++          __ blt_far(SCR1, length, slow_path, false);
++
++          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
++          // since size is positive ldrw does right thing on 64bit
++          __ ld_w(t1, Address(klass, Klass::layout_helper_offset()));
++          // since size is positive movw does right thing on 64bit
++          __ move(arr_size, length);
++          __ sll_w(arr_size, length, t1);
++          __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift +
++                        exact_log2(Klass::_lh_header_size_mask + 1) - 1,
++                        Klass::_lh_header_size_shift);
++          __ add_d(arr_size, arr_size, t1);
++          __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
++          __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++          __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
++
++          __ initialize_header(obj, klass, length, t1, t2);
++          __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
++          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
++          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
++          __ andi(t1, t1, Klass::_lh_header_size_mask);
++          __ sub_d(arr_size, arr_size, t1); // body length
++          __ add_d(t1, t1, obj); // body start
++          __ initialize_body(t1, arr_size, 0, t1, t2);
++          __ membar(Assembler::StoreStore);
++          __ verify_oop(obj);
++
++          __ jr(RA);
++
++          __ bind(slow_path);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset;
++        if (id == new_type_array_id) {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
++        } else {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
++        }
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0: new array
++      }
++      break;
++
++    case new_multi_array_id:
++      {
++        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
++        // A0,: klass
++        // S0,: rank
++        // A2: address of 1st dimension
++        OopMap* map = save_live_registers(sasm);
++        __ move(A1, A0);
++        __ move(A3, A2);
++        __ move(A2, S0);
++        int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        // A0,: new multi array
++        __ verify_oop(A0);
++      }
++      break;
++
++    case register_finalizer_id:
++      {
++        __ set_info("register_finalizer", dont_gc_arguments);
++
++        // This is called via call_runtime so the arguments
++        // will be place in C abi locations
++
++        __ verify_oop(A0);
++
++        // load the klass and check the has finalizer flag
++        Label register_finalizer;
++        Register t = A5;
++        __ load_klass(t, A0);
++        __ ld_w(t, Address(t, Klass::access_flags_offset()));
++        __ li(SCR1, JVM_ACC_HAS_FINALIZER);
++        __ andr(SCR1, t, SCR1);
++        __ bnez(SCR1, register_finalizer);
++        __ jr(RA);
++
++        __ bind(register_finalizer);
++        __ enter();
++        OopMap* oop_map = save_live_registers(sasm);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++
++        // Now restore all the live registers
++        restore_live_registers(sasm);
++
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_class_cast_exception_id:
++      {
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
++      }
++      break;
++
++    case throw_incompatible_class_change_error_id:
++      {
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
++      }
++      break;
++
++    case slow_subtype_check_id:
++      {
++        // Typical calling sequence:
++        // __ push(klass_RInfo);  // object klass or other subclass
++        // __ push(sup_k_RInfo);  // array element klass or other superclass
++        // __ bl(slow_subtype_check);
++        // Note that the subclass is pushed first, and is therefore deepest.
++        enum layout {
++          a0_off, a0_off_hi,
++          a2_off, a2_off_hi,
++          a4_off, a4_off_hi,
++          a5_off, a5_off_hi,
++          sup_k_off, sup_k_off_hi,
++          klass_off, klass_off_hi,
++          framesize,
++          result_off = sup_k_off
++        };
++
++        __ set_info("slow_subtype_check", dont_gc_arguments);
++        __ addi_d(SP, SP, -4 * wordSize);
++        __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++
++        // This is called by pushing args and not with C abi
++        __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass
++        __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass
++
++        Label miss;
++        __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss);
++
++        // fallthrough on success:
++        __ li(SCR1, 1);
++        __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++
++        __ bind(miss);
++        __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++      }
++      break;
++
++    case monitorenter_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorenter_id:
++      {
++        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(1, A0); // A0,: object
++        f.load_argument(0, A1); // A1,: lock address
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case monitorexit_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorexit_id:
++      {
++        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(0, A0); // A0,: lock address
++
++        // note: really a leaf routine but must setup last java sp
++        //       => use call_RT for now (speed can be improved by
++        //       doing last java sp setup manually)
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case deoptimize_id:
++      {
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        OopMap* oop_map = save_live_registers(sasm);
++        f.load_argument(0, A1);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++        restore_live_registers(sasm);
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++        __ leave();
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case throw_range_check_failed_id:
++      {
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
++      }
++      break;
++
++    case unwind_exception_id:
++      {
++        __ set_info("unwind_exception", dont_gc_arguments);
++        // note: no stubframe since we are about to leave the current
++        //       activation and we are calling a leaf VM function only.
++        generate_unwind_exception(sasm);
++      }
++      break;
++
++    case access_field_patching_id:
++      {
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
++      }
++      break;
++
++    case load_klass_patching_id:
++      {
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
++      }
++      break;
++
++    case load_mirror_patching_id:
++      {
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
++      }
++      break;
++
++    case load_appendix_patching_id:
++      {
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
++      }
++      break;
++
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      {
++        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case handle_exception_from_callee_id:
++      {
++        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case throw_index_exception_id:
++      {
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
++      }
++      break;
++
++    case throw_array_store_exception_id:
++      {
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        // tos + 0: link
++        //     + 1: return address
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
++      }
++      break;
++
++    case predicate_failed_trap_id:
++      {
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++
++        OopMap* map = save_live_registers(sasm);
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case dtrace_object_alloc_id:
++      {
++        // A0: object
++        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
++        save_live_registers(sasm);
++
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0);
++
++        restore_live_registers(sasm);
++      }
++      break;
++
++    default:
++      {
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        __ li(A0, (int)id);
++        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
++      }
++      break;
++    }
++  }
++  return oop_maps;
++}
++
++#undef __
++
++const char *Runtime1::pd_name_for_address(address entry) {
++  Unimplemented();
++  return 0;
++}
+diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
+new file mode 100644
+index 00000000000..ce84af28c9b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the client compiler.
++// (see c1_globals.hpp)
++
++#ifndef COMPILER2
++define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, InlineIntrinsics,             true );
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 false);
++define_pd_global(bool, UseOnStackReplacement,        true );
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             1500 );
++
++define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
++define_pd_global(intx, InitialCodeCacheSize,         160*K);
++define_pd_global(intx, ReservedCodeCacheSize,        32*M );
++define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
++define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
++define_pd_global(bool, ProfileInterpreter,           false);
++define_pd_global(intx, CodeCacheExpansionSize,       32*K );
++define_pd_global(uintx, CodeCacheMinBlockLength,     1);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(bool, NeverActAsServerClassMachine, true );
++define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(bool, CICompileOSR,                 true );
++#endif // !COMPILER2
++define_pd_global(bool, UseTypeProfile,               false);
++
++define_pd_global(bool, OptimizeSinglePrecision,      true );
++define_pd_global(bool, CSEArrayLength,               false);
++define_pd_global(bool, TwoOperandLIRForm,            false );
++
++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..ef520a39ff3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp
+@@ -0,0 +1,1872 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "opto/c2_MacroAssembler.hpp"
++#include "opto/intrinsicnode.hpp"
++#include "opto/subnode.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                                  Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  // Possible cases that we'll encounter in fast_lock
++  // ------------------------------------------------
++  // * Inflated
++  //    -- unlocked
++  //    -- Locked
++  //       = by self
++  //       = by other
++  // * biased
++  //    -- by Self
++  //    -- by other
++  // * neutral
++  // * stack-locked
++  //    -- by self
++  //       = sp-proximity test hits
++  //       = sp-proximity test generates false-negative
++  //    -- by other
++  //
++
++  if (DiagnoseSyncOnValueBasedClasses != 0) {
++    load_klass(tmpReg, objReg);
++    ld_w(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
++    li(AT, JVM_ACC_IS_VALUE_BASED_CLASS);
++    andr(AT, tmpReg, AT);
++    sltui(scrReg, AT, 1);
++    beqz(scrReg, DONE_SET);
++   }
++
++  // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++  // order to reduce the number of conditional branches in the most common cases.
++  // Beware -- there's a subtle invariant that fetch of the markword
++  // at [FETCH], below, will never observe a biased encoding (*101b).
++  // If this invariant is not held we risk exclusion (safety) failure.
++  if (UseBiasedLocking && !UseOptoBiasInlining) {
++    Label succ, fail;
++    biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++    b(fail);
++    bind(succ);
++    li(resReg, 1);
++    b(DONE);
++    bind(fail);
++  }
++
++  ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++  andi(AT, tmpReg, markWord::monitor_value);
++  bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias
++
++  // Attempt stack-locking ...
++  ori(tmpReg, tmpReg, markWord::unlocked_value);
++  st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++  if (PrintBiasedLockingStatistics) {
++    Label SUCC, FAIL;
++    cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++    bind(SUCC);
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++    li(resReg, 1);
++    b(DONE);
++    bind(FAIL);
++  } else {
++    // If cmpxchg is succ, then scrReg = 1
++    cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++  }
++
++  // Recursive locking
++  // The object is stack-locked: markword contains stack pointer to BasicLock.
++  // Locked by current thread if difference with current SP is less than one page.
++  sub_d(tmpReg, tmpReg, SP);
++  li(AT, 7 - os::vm_page_size());
++  andr(tmpReg, tmpReg, AT);
++  st_d(tmpReg, Address(boxReg, 0));
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++    bnez(tmpReg, L);
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++    bind(L);
++  }
++
++  sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++  b(DONE);
++
++  bind(IsInflated);
++  // The object's monitor m is unlocked iff m->owner == NULL,
++  // otherwise m->owner may contain a thread or a stack address.
++
++  // TODO: someday avoid the ST-before-CAS penalty by
++  // relocating (deferring) the following ST.
++  // We should also think about trying a CAS without having
++  // fetched _owner.  If the CAS is successful we may
++  // avoid an RTO->RTS upgrade on the $line.
++  // Without cast to int32_t a movptr will destroy r10 which is typically obj
++  li(AT, (int32_t)intptr_t(markWord::unused_mark().value()));
++  st_d(AT, Address(boxReg, 0));
++
++  ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++  // if (m->owner != 0) => AT = 0, goto slow path.
++  move(scrReg, R0);
++  bnez(AT, DONE_SET);
++
++#ifndef OPT_THREAD
++  get_thread(TREG);
++#endif
++  // It's inflated and appears unlocked
++  addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2);
++  cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false);
++  // Intentional fall-through into DONE ...
++
++  bind(DONE_SET);
++  move(resReg, scrReg);
++
++  // DONE is a hot target - we'd really like to place it at the
++  // start of cache line by padding with NOPs.
++  // See the AMD and Intel software optimization manuals for the
++  // most efficient "long" NOP encodings.
++  // Unfortunately none of our alignment mechanisms suffice.
++  bind(DONE);
++  // At DONE the resReg is set as follows ...
++  // Fast_Unlock uses the same protocol.
++  // resReg == 1 -> Success
++  // resREg == 0 -> Failure - force control through the slow-path
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                    Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  // Critically, the biased locking test must have precedence over
++  // and appear before the (box->dhw == 0) recursive stack-lock test.
++  if (UseBiasedLocking && !UseOptoBiasInlining) {
++    Label succ, fail;
++    biased_locking_exit(objReg, tmpReg, succ);
++    b(fail);
++    bind(succ);
++    li(resReg, 1);
++    b(DONE);
++    bind(fail);
++  }
++
++  ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++  sltui(AT, tmpReg, 1);
++  beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock
++
++  ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword
++  andi(AT, tmpReg, markWord::monitor_value);
++  beqz(AT, Stacked); // Inflated?
++
++  bind(Inflated);
++  // It's inflated.
++  // Despite our balanced locking property we still check that m->_owner == Self
++  // as java routines or native JNI code called by this thread might
++  // have released the lock.
++  // Refer to the comments in synchronizer.cpp for how we might encode extra
++  // state in _succ so we can avoid fetching EntryList|cxq.
++  //
++  // I'd like to add more cases in fast_lock() and fast_unlock() --
++  // such as recursive enter and exit -- but we have to be wary of
++  // I$ bloat, T$ effects and BP$ effects.
++  //
++  // If there's no contention try a 1-0 exit.  That is, exit without
++  // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++  // we detect and recover from the race that the 1-0 exit admits.
++  //
++  // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++  // before it STs null into _owner, releasing the lock.  Updates
++  // to data protected by the critical section must be visible before
++  // we drop the lock (and thus before any other thread could acquire
++  // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++  get_thread(TREG);
++#endif
++
++  // It's inflated
++  ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++  xorr(scrReg, scrReg, TREG);
++
++  ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2));
++  orr(scrReg, scrReg, AT);
++
++  move(AT, R0);
++  bnez(scrReg, DONE_SET);
++
++  ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++  ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++  orr(scrReg, scrReg, AT);
++
++  move(AT, R0);
++  bnez(scrReg, DONE_SET);
++
++  membar(Assembler::Membar_mask_bits(LoadStore|StoreStore));
++  st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++  li(resReg, 1);
++  b(DONE);
++
++  bind(Stacked);
++  ld_d(tmpReg, Address(boxReg, 0));
++  cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++  bind(DONE_SET);
++  move(resReg, AT);
++
++  bind(DONE);
++}
++
++void C2_MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    bge(rs, rt, not_taken);
++  } else {
++    bgeu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    blt(rs, rt, not_taken);
++  } else {
++    bltu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bceqz(FCC0, not_taken);
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bcnez(FCC0, not_taken);
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, Address adr);
++
++void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++
++  Address::ScaleFactor needle_chr_shift = needle_isL ? Address::no_scale
++                                                     : Address::times_2;
++  Address::ScaleFactor haystack_chr_shift = haystack_isL ? Address::no_scale
++                                                         : Address::times_2;
++
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::ld_bu
++                                              : (load_chr_insn)&MacroAssembler::ld_hu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::ld_bu
++                                                  : (load_chr_insn)&MacroAssembler::ld_hu;
++
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
++
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurrence of pattern in source or return -1.
++
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
++
++  // needle_len >= 8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++
++  // needle_len < 8, use linear scan
++  li(AT, 8);
++  blt(needle_len, AT, LINEARSEARCH);
++
++  // needle_len >= 256, use linear scan
++  li(AT, 256);
++  bge(needle_len, AT, LINEARSTUB);
++
++  // needle_len >= haystack_len/4, use linear scan
++  srli_d(AT, haystack_len, 2);
++  bge(needle_len, AT, LINEARSTUB);
++
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
++
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD;
++
++  Register haystack_end = haystack_len;
++  Register result_tmp = result;
++
++  Register nlen_tmp = T0; // needle len tmp
++  Register skipch = T1;
++  Register last_byte = T2;
++  Register last_dword = T3;
++  Register orig_haystack = T4;
++  Register ch1 = T5;
++  Register ch2 = T6;
++
++  RegSet spilled_regs = RegSet::range(T0, T6);
++
++  push(spilled_regs);
++
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int first_step = isLL ? 7 : 3;
++
++  const int ASIZE = 256;
++
++  addi_d(SP, SP, -ASIZE);
++
++  // init BC offset table with default value: needle_len
++  //
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  if (UseLASX) {
++    xvreplgr2vr_b(fscratch, needle_len);
++
++    for (int i = 0; i < ASIZE; i += 32) {
++      xvst(fscratch, SP, i);
++    }
++  } else if (UseLSX) {
++    vreplgr2vr_b(fscratch, needle_len);
++
++    for (int i = 0; i < ASIZE; i += 16) {
++      vst(fscratch, SP, i);
++    }
++  } else {
++    move(AT, needle_len);
++    bstrins_d(AT, AT, 15, 8);
++    bstrins_d(AT, AT, 31, 16);
++    bstrins_d(AT, AT, 63, 32);
++
++    for (int i = 0; i < ASIZE; i += 8) {
++      st_d(AT, SP, i);
++    }
++  }
++
++  sub_d(nlen_tmp, haystack_len, needle_len);
++  lea(haystack_end, Address(haystack, nlen_tmp, haystack_chr_shift, 0));
++  addi_d(ch2, needle_len, -1); // bc offset init value
++  move(nlen_tmp, needle);
++
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(nlen_tmp));
++  addi_d(nlen_tmp, nlen_tmp, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    li(AT, 256u);
++    bgeu(ch1, AT, BCSKIP); // GE for UTF
++  }
++  stx_b(ch2, SP, ch1); // store skip offset to BC offset table
++
++  bind(BCSKIP);
++  addi_d(ch2, ch2, -1); // for next pattern element, skip distance -1
++  blt(R0, ch2, BCLOOP);
++
++  if (needle_isL == haystack_isL) {
++    // load last 8 pattern bytes (8LL/4UU symbols)
++    ld_d(last_dword, Address(needle, needle_len, needle_chr_shift, -wordSize));
++    addi_d(nlen_tmp, needle_len, -1); // m - 1, index of the last element in pattern
++    move(orig_haystack, haystack);
++    bstrpick_d(last_byte, last_dword, 63, 64 - 8 * needle_chr_size); // UU/LL: pattern[m-1]
++  } else {
++    // UL: from UTF-16(source) search Latin1(pattern)
++    // load last 4 bytes(4 symbols)
++    ld_wu(last_byte, Address(needle, needle_len, Address::no_scale, -wordSize / 2));
++    addi_d(nlen_tmp, needle_len, -1); // m - 1, index of the last element in pattern
++    move(orig_haystack, haystack);
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    bstrpick_d(last_dword, last_byte, 7, 0);
++    srli_d(last_byte, last_byte, 8);
++    bstrins_d(last_dword, last_byte, 23, 16);
++    srli_d(last_byte, last_byte, 8);
++    bstrins_d(last_dword, last_byte, 39, 32);
++    srli_d(last_byte, last_byte, 8); // last_byte: 0x0000000a
++    bstrins_d(last_dword, last_byte, 55, 48); // last_dword: 0x0a0b0c0d
++  }
++
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  (this->*haystack_load_1chr)(skipch, Address(haystack, nlen_tmp, haystack_chr_shift, 0));
++  addi_d(nlen_tmp, nlen_tmp, -first_step); // nlen_tmp is positive here, because needle_len >= 8
++  bne(last_byte, skipch, BMSKIP); // if not equal, skipch is bad char
++  ld_d(ch2, Address(haystack, nlen_tmp, haystack_chr_shift, 0)); // load 8 bytes from source string
++  move(ch1, last_dword);
++  if (isLL) {
++    b(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    addi_d(nlen_tmp, nlen_tmp, -1); // no need to branch for UU/UL case. cnt1 >= 8
++    b(BMLOOPSTR1_CMP);
++  }
++
++  bind(BMLOOPSTR1);
++  (this->*needle_load_1chr)(ch1, Address(needle, nlen_tmp, needle_chr_shift, 0));
++  (this->*haystack_load_1chr)(ch2, Address(haystack, nlen_tmp, haystack_chr_shift, 0));
++
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  addi_d(nlen_tmp, nlen_tmp, -1);
++  blt(nlen_tmp, R0, BMLOOPSTR1_LASTCMP);
++
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
++
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      move(result_tmp, needle_len);
++    } else {
++      li(result_tmp, 1);
++    }
++    li(AT, 256u);
++    bgeu(skipch, AT, BMADV); // GE for UTF
++  }
++  ldx_bu(result_tmp, SP, skipch); // load skip offset
++
++  bind(BMADV);
++  addi_d(nlen_tmp, needle_len, -1);
++  // move haystack after bad char skip offset
++  lea(haystack, Address(haystack, result_tmp, haystack_chr_shift, 0));
++  bge(haystack_end, haystack, BMLOOPSTR2);
++  addi_d(SP, SP, ASIZE);
++  b(NOMATCH);
++
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
++
++  bind(BMMATCH);
++  sub_d(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli_d(result, result, 1);
++  }
++  addi_d(SP, SP, ASIZE);
++  pop(spilled_regs);
++  b(DONE);
++
++  bind(LINEARSTUB);
++  li(AT, 16); // small patterns still should be handled by simple algorithm
++  blt(needle_len, AT, LINEARSEARCH);
++  move(result, R0);
++  address stub;
++  if (isLL) {
++    stub = StubRoutines::la::string_indexof_linear_ll();
++    assert(stub != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = StubRoutines::la::string_indexof_linear_ul();
++    assert(stub != NULL, "string_indexof_linear_ul stub has not been generated");
++  } else {
++    stub = StubRoutines::la::string_indexof_linear_uu();
++    assert(stub != NULL, "string_indexof_linear_uu stub has not been generated");
++  }
++  trampoline_call(RuntimeAddress(stub));
++  b(DONE);
++
++  bind(NOMATCH);
++  li(result, -1);
++  pop(spilled_regs);
++  b(DONE);
++
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, -1, result, ae);
++
++  bind(DONE);
++}
++
++void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                                  Register haystack_len, Register needle_len,
++                                                  int needle_con_cnt, Register result, int ae)
++{
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++
++  Register hlen_neg = haystack_len;
++  Register nlen_neg = needle_len;
++  Register result_tmp = result;
++
++  Register nlen_tmp = A0, hlen_tmp = A1;
++  Register first = A2, ch1 = A3, ch2 = AT;
++
++  RegSet spilled_regs = RegSet::range(A0, A3);
++
++  push(spilled_regs);
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::ld_bu
++                                              : (load_chr_insn)&MacroAssembler::ld_hu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::ld_bu
++                                                  : (load_chr_insn)&MacroAssembler::ld_hu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::ld_hu
++                                 : (load_chr_insn)&MacroAssembler::ld_wu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::ld_wu
++                                 : (load_chr_insn)&MacroAssembler::ld_d;
++
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++
++    li(AT, needle_isL == haystack_isL ? 4 : 2); // UU/LL:4, UL:2
++    blt(needle_len, AT, DOSHORT);
++
++    sub_d(result_tmp, haystack_len, needle_len);
++
++    (this->*needle_load_1chr)(first, Address(needle));
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++    if (!needle_isL) slli_d(needle_len, needle_len, needle_chr_shift);
++    add_d(needle, needle, needle_len);
++    sub_d(nlen_neg, R0, needle_len);
++
++    bind(FIRST_LOOP);
++    (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0));
++    beq(first, ch2, STR1_LOOP);
++
++    bind(STR2_NEXT);
++    addi_d(hlen_neg, hlen_neg, haystack_chr_size);
++    bge(R0, hlen_neg, FIRST_LOOP);
++    b(NOMATCH);
++
++    bind(STR1_LOOP);
++    addi_d(nlen_tmp, nlen_neg, needle_chr_size);
++    addi_d(hlen_tmp, hlen_neg, haystack_chr_size);
++    bge(nlen_tmp, R0, MATCH);
++
++    bind(STR1_NEXT);
++    (this->*needle_load_1chr)(ch1, Address(needle, nlen_tmp, Address::no_scale, 0));
++    (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_tmp, Address::no_scale, 0));
++    bne(ch1, ch2, STR2_NEXT);
++    addi_d(nlen_tmp, nlen_tmp, needle_chr_size);
++    addi_d(hlen_tmp, hlen_tmp, haystack_chr_size);
++    blt(nlen_tmp, R0, STR1_NEXT);
++    b(MATCH);
++
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      li(AT, 2);
++      blt(needle_len, AT, DO1); // needle_len == 1
++      blt(AT, needle_len, DO3); // needle_len == 3
++      // if needle_len == 2 then goto DO2
++    }
++  }
++
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle));
++    addi_d(result_tmp, haystack_len, -4);
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++
++    bind(CH1_LOOP);
++    (this->*load_4chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0));
++    beq(ch1, ch2, MATCH);
++    addi_d(hlen_neg, hlen_neg, haystack_chr_size);
++    bge(R0, hlen_neg, CH1_LOOP);
++    b(NOMATCH);
++  }
++
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle));
++    addi_d(result_tmp, haystack_len, -2);
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++
++    bind(CH1_LOOP);
++    (this->*load_2chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0));
++    beq(ch1, ch2, MATCH);
++    addi_d(hlen_neg, hlen_neg, haystack_chr_size);
++    bge(R0, hlen_neg, CH1_LOOP);
++    b(NOMATCH);
++  }
++
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle));
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size));
++    addi_d(result_tmp, haystack_len, -3);
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++
++    bind(FIRST_LOOP);
++    (this->*load_2chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0));
++    beq(first, ch2, STR1_LOOP);
++
++    bind(STR2_NEXT);
++    addi_d(hlen_neg, hlen_neg, haystack_chr_size);
++    bge(R0, hlen_neg, FIRST_LOOP);
++    b(NOMATCH);
++
++    bind(STR1_LOOP);
++    (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 2 * haystack_chr_size));
++    bne(ch1, ch2, STR2_NEXT);
++    b(MATCH);
++  }
++
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
++    Register mask01 = nlen_tmp;
++    Register mask7f = hlen_tmp;
++    Register masked = first;
++
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle));
++    li(AT, 8);
++    blt(haystack_len, AT, DO1_SHORT);
++
++    addi_d(result_tmp, haystack_len, -8 / haystack_chr_size);
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++
++    if (haystack_isL) bstrins_d(ch1, ch1, 15, 8);
++    bstrins_d(ch1, ch1, 31, 16);
++    bstrins_d(ch1, ch1, 63, 32);
++
++    li(mask01, haystack_isL ? 0x0101010101010101 : 0x0001000100010001);
++    li(mask7f, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
++
++    bind(CH1_LOOP);
++    ldx_d(ch2, haystack, hlen_neg);
++    xorr(ch2, ch1, ch2);
++    sub_d(masked, ch2, mask01);
++    orr(ch2, ch2, mask7f);
++    andn(masked, masked, ch2);
++    bnez(masked, HAS_ZERO);
++    addi_d(hlen_neg, hlen_neg, 8);
++    blt(hlen_neg, R0, CH1_LOOP);
++
++    li(AT, 8);
++    bge(hlen_neg, AT, NOMATCH);
++    move(hlen_neg, R0);
++    b(CH1_LOOP);
++
++    bind(HAS_ZERO);
++    ctz_d(masked, masked);
++    srli_d(masked, masked, 3);
++    add_d(hlen_neg, hlen_neg, masked);
++    b(MATCH);
++
++    bind(DO1_SHORT);
++    addi_d(result_tmp, haystack_len, -1);
++    if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift);
++    add_d(haystack, haystack, result_tmp);
++    sub_d(hlen_neg, R0, result_tmp);
++
++    bind(DO1_LOOP);
++    (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0));
++    beq(ch1, ch2, MATCH);
++    addi_d(hlen_neg, hlen_neg, haystack_chr_size);
++    bge(R0, hlen_neg, DO1_LOOP);
++  }
++
++  bind(NOMATCH);
++  li(result, -1);
++  b(DONE);
++
++  bind(MATCH);
++  add_d(result, result_tmp, hlen_neg);
++  if (!haystack_isL) srai_d(result, result, haystack_chr_shift);
++
++  bind(DONE);
++  pop(spilled_regs);
++}
++
++void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3)
++{
++  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE;
++
++  beqz(cnt1, NOMATCH);
++
++  move(result, R0);
++  ori(tmp1, R0, 4);
++  blt(cnt1, tmp1, DO1_LOOP);
++
++  // UTF-16 char occupies 16 bits
++  // ch -> chchchch
++  bstrins_d(ch, ch, 31, 16);
++  bstrins_d(ch, ch, 63, 32);
++
++  li(tmp2, 0x0001000100010001);
++  li(tmp3, 0x7fff7fff7fff7fff);
++
++  bind(CH1_LOOP);
++    ld_d(AT, str1, 0);
++    xorr(AT, ch, AT);
++    sub_d(tmp1, AT, tmp2);
++    orr(AT, AT, tmp3);
++    andn(tmp1, tmp1, AT);
++    bnez(tmp1, HAS_ZERO);
++    addi_d(str1, str1, 8);
++    addi_d(result, result, 4);
++
++    // meet the end of string
++    beq(cnt1, result, NOMATCH);
++
++    addi_d(tmp1, result, 4);
++    bge(tmp1, cnt1, DO1_SHORT);
++    b(CH1_LOOP);
++
++  bind(HAS_ZERO);
++    ctz_d(tmp1, tmp1);
++    srli_d(tmp1, tmp1, 4);
++    add_d(result, result, tmp1);
++    b(DONE);
++
++  // restore ch
++  bind(DO1_SHORT);
++    bstrpick_d(ch, ch, 15, 0);
++
++  bind(DO1_LOOP);
++    ld_hu(tmp1, str1, 0);
++    beq(ch, tmp1, DONE);
++    addi_d(str1, str1, 2);
++    addi_d(result, result, 1);
++    blt(result, cnt1, DO1_LOOP);
++
++  bind(NOMATCH);
++    addi_d(result, R0, -1);
++
++  bind(DONE);
++}
++
++void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
++                                             Register ch, Register result,
++                                             Register tmp1, Register tmp2,
++                                             Register tmp3)
++{
++  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE;
++
++  beqz(cnt1, NOMATCH);
++
++  move(result, R0);
++  ori(tmp1, R0, 8);
++  blt(cnt1, tmp1, DO1_LOOP);
++
++  // Latin-1 char occupies 8 bits
++  // ch -> chchchchchchchch
++  bstrins_d(ch, ch, 15, 8);
++  bstrins_d(ch, ch, 31, 16);
++  bstrins_d(ch, ch, 63, 32);
++
++  li(tmp2, 0x0101010101010101);
++  li(tmp3, 0x7f7f7f7f7f7f7f7f);
++
++  bind(CH1_LOOP);
++    ld_d(AT, str1, 0);
++    xorr(AT, ch, AT);
++    sub_d(tmp1, AT, tmp2);
++    orr(AT, AT, tmp3);
++    andn(tmp1, tmp1, AT);
++    bnez(tmp1, HAS_ZERO);
++    addi_d(str1, str1, 8);
++    addi_d(result, result, 8);
++
++    // meet the end of string
++    beq(cnt1, result, NOMATCH);
++
++    addi_d(tmp1, result, 8);
++    bge(tmp1, cnt1, DO1_SHORT);
++    b(CH1_LOOP);
++
++  bind(HAS_ZERO);
++    ctz_d(tmp1, tmp1);
++    srli_d(tmp1, tmp1, 3);
++    add_d(result, result, tmp1);
++    b(DONE);
++
++  // restore ch
++  bind(DO1_SHORT);
++    bstrpick_d(ch, ch, 7, 0);
++
++  bind(DO1_LOOP);
++    ld_bu(tmp1, str1, 0);
++    beq(ch, tmp1, DONE);
++    addi_d(str1, str1, 1);
++    addi_d(result, result, 1);
++    blt(result, cnt1, DO1_LOOP);
++
++  bind(NOMATCH);
++    addi_d(result, R0, -1);
++
++  bind(DONE);
++}
++
++// Compare strings, used for char[] and byte[].
++void C2_MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae, Register tmp1, Register tmp2) {
++  Label L, Loop, LoopEnd, HaveResult, Done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  int charsInWord = isLL ? wordSize : wordSize/2;
++
++  if (!str1_isL) srli_w(cnt1, cnt1, 1);
++  if (!str2_isL) srli_w(cnt2, cnt2, 1);
++
++  // compute the difference of lengths (in result)
++  sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  ori(AT, R0, charsInWord);
++  bge(cnt2, cnt1, Loop);
++  move(cnt1, cnt2);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  //
++  // For example:
++  //  If isLL == true and cnt1 > 8, we load 8 bytes from str1 and str2. (Suppose A1 and B1 are different)
++  //    tmp1: A7 A6 A5 A4 A3 A2 A1 A0
++  //    tmp2: B7 B6 B5 B4 B3 B2 B1 B0
++  //
++  //  Then Use xor to find the difference between tmp1 and tmp2, right shift.
++  //    tmp1: 00 A7 A6 A5 A4 A3 A2 A1
++  //    tmp2: 00 B7 B6 B5 B4 B3 B2 B1
++  //
++  //  Fetch 0 to 7 bits of tmp1 and tmp2, subtract to get the result.
++  //  Other types are similar to isLL.
++  bind(Loop);
++  blt(cnt1, AT, LoopEnd);
++  if (isLL) {
++    ld_d(tmp1, str1, 0);
++    ld_d(tmp2, str2, 0);
++    beq(tmp1, tmp2, L);
++    xorr(cnt2, tmp1, tmp2);
++    ctz_d(cnt2, cnt2);
++    andi(cnt2, cnt2, 0x38);
++    srl_d(tmp1, tmp1, cnt2);
++    srl_d(tmp2, tmp2, cnt2);
++    bstrpick_d(tmp1, tmp1, 7, 0);
++    bstrpick_d(tmp2, tmp2, 7, 0);
++    sub_d(result, tmp1, tmp2);
++    b(Done);
++    bind(L);
++    addi_d(str1, str1, 8);
++    addi_d(str2, str2, 8);
++    addi_d(cnt1, cnt1, -charsInWord);
++    b(Loop);
++  } else if (isLU) {
++    ld_wu(cnt2, str1, 0);
++    andr(tmp1, R0, R0);
++    bstrins_d(tmp1, cnt2, 7, 0);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp1, cnt2, 23, 16);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp1, cnt2, 39, 32);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp1, cnt2, 55, 48);
++    ld_d(tmp2, str2, 0);
++    beq(tmp1, tmp2, L);
++    xorr(cnt2, tmp1, tmp2);
++    ctz_d(cnt2, cnt2);
++    andi(cnt2, cnt2, 0x30);
++    srl_d(tmp1, tmp1, cnt2);
++    srl_d(tmp2, tmp2, cnt2);
++    bstrpick_d(tmp1, tmp1, 15, 0);
++    bstrpick_d(tmp2, tmp2, 15, 0);
++    sub_d(result, tmp1, tmp2);
++    b(Done);
++    bind(L);
++    addi_d(str1, str1, 4);
++    addi_d(str2, str2, 8);
++    addi_d(cnt1, cnt1, -charsInWord);
++    b(Loop);
++  } else if (isUL) {
++    ld_wu(cnt2, str2, 0);
++    andr(tmp2, R0, R0);
++    bstrins_d(tmp2, cnt2, 7, 0);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp2, cnt2, 23, 16);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp2, cnt2, 39, 32);
++    srli_d(cnt2, cnt2, 8);
++    bstrins_d(tmp2, cnt2, 55, 48);
++    ld_d(tmp1, str1, 0);
++    beq(tmp1, tmp2, L);
++    xorr(cnt2, tmp1, tmp2);
++    ctz_d(cnt2, cnt2);
++    andi(cnt2, cnt2, 0x30);
++    srl_d(tmp1, tmp1, cnt2);
++    srl_d(tmp2, tmp2, cnt2);
++    bstrpick_d(tmp1, tmp1, 15, 0);
++    bstrpick_d(tmp2, tmp2, 15, 0);
++    sub_d(result, tmp1, tmp2);
++    b(Done);
++    bind(L);
++    addi_d(str1, str1, 8);
++    addi_d(str2, str2, 4);
++    addi_d(cnt1, cnt1, -charsInWord);
++    b(Loop);
++  } else { // isUU
++    ld_d(tmp1, str1, 0);
++    ld_d(tmp2, str2, 0);
++    beq(tmp1, tmp2, L);
++    xorr(cnt2, tmp1, tmp2);
++    ctz_d(cnt2, cnt2);
++    andi(cnt2, cnt2, 0x30);
++    srl_d(tmp1, tmp1, cnt2);
++    srl_d(tmp2, tmp2, cnt2);
++    bstrpick_d(tmp1, tmp1, 15, 0);
++    bstrpick_d(tmp2, tmp2, 15, 0);
++    sub_d(result, tmp1, tmp2);
++    b(Done);
++    bind(L);
++    addi_d(str1, str1, 8);
++    addi_d(str2, str2, 8);
++    addi_d(cnt1, cnt1, -charsInWord);
++    b(Loop);
++  }
++
++  bind(LoopEnd);
++  beqz(cnt1, Done);
++  if (str1_isL) {
++    ld_bu(tmp1, str1, 0);
++  } else {
++    ld_hu(tmp1, str1, 0);
++  }
++
++  // compare current character
++  if (str2_isL) {
++    ld_bu(tmp2, str2, 0);
++  } else {
++    ld_hu(tmp2, str2, 0);
++  }
++  bne(tmp1, tmp2, HaveResult);
++  addi_d(str1, str1, str1_isL ? 1 : 2);
++  addi_d(str2, str2, str2_isL ? 1 : 2);
++  addi_d(cnt1, cnt1, -1);
++  b(LoopEnd);
++
++  bind(HaveResult);
++  sub_d(result, tmp1, tmp2);
++
++  bind(Done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void C2_MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp1, Register tmp2, Register result,
++                                   bool is_char) {
++  Label Loop, LoopEnd, True, False;
++
++  addi_d(result, R0, 1);
++  beq(str1, str2, True);  // same char[] ?
++  beqz(cnt, True);
++
++  addi_d(AT, R0, is_char ? wordSize/2 : wordSize);
++  bind(Loop);
++  blt(cnt, AT, LoopEnd);
++  ld_d(tmp1, str1, 0);
++  ld_d(tmp2, str2, 0);
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, 8);
++  addi_d(str2, str2, 8);
++  addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize);
++  b(Loop);
++
++  bind(LoopEnd);
++  beqz(cnt, True);
++  // compare current character
++  if (is_char) {
++    ld_hu(tmp1, str1, 0);
++    ld_hu(tmp2, str2, 0);
++  } else {
++    ld_bu(tmp1, str1, 0);
++    ld_bu(tmp2, str2, 0);
++  }
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, is_char ? 2 : 1);
++  addi_d(str2, str2, is_char ? 2 : 1);
++  addi_d(cnt, cnt, -1);
++  b(LoopEnd);
++
++  bind(False);
++  addi_d(result, R0, 0);
++
++  bind(True);
++}
++
++void C2_MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   st_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  st_h (reg, base, disp); break;
++    case STORE_INT:    st_w (reg, base, disp); break;
++    case STORE_LONG:   st_d (reg, base, disp); break;
++    case LOAD_BYTE:    ld_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ld_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ld_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ld_hu(reg, base, disp); break;
++    case LOAD_INT:     ld_w (reg, base, disp); break;
++    case LOAD_U_INT:   ld_wu(reg, base, disp); break;
++    case LOAD_LONG:    ld_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      ll_d(reg, base, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++}
++
++void C2_MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   stx_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  stx_h (reg, base, disp); break;
++    case STORE_INT:    stx_w (reg, base, disp); break;
++    case STORE_LONG:   stx_d (reg, base, disp); break;
++    case LOAD_BYTE:    ldx_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ldx_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ldx_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ldx_hu(reg, base, disp); break;
++    case LOAD_INT:     ldx_w (reg, base, disp); break;
++    case LOAD_U_INT:   ldx_wu(reg, base, disp); break;
++    case LOAD_LONG:    ldx_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      add_d(AT, base, disp);
++      ll_d(reg, AT, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++}
++
++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fst_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fst_d(reg, base, disp); break;
++    case STORE_VECTORX:  vst  (reg, base, disp); break;
++    case STORE_VECTORY: xvst  (reg, base, disp); break;
++    case LOAD_FLOAT:     fld_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fld_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vld  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvld  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++    }
++}
++
++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fstx_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fstx_d(reg, base, disp); break;
++    case STORE_VECTORX:  vstx  (reg, base, disp); break;
++    case STORE_VECTORY: xvstx  (reg, base, disp); break;
++    case LOAD_FLOAT:     fldx_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fldx_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vldx  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvldx  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++    }
++}
++
++void C2_MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_b(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_b(vec1, vec2, vec3); break;
++        case Op_AndReductionV:  vand_v(vec1, vec2, vec3); break;
++        case Op_OrReductionV:    vor_v(vec1, vec2, vec3); break;
++        case Op_XorReductionV:  vxor_v(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_SHORT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_h(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_h(vec1, vec2, vec3); break;
++        case Op_AndReductionV:  vand_v(vec1, vec2, vec3); break;
++        case Op_OrReductionV:    vor_v(vec1, vec2, vec3); break;
++        case Op_XorReductionV:  vxor_v(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_w(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_w(vec1, vec2, vec3); break;
++        case Op_AndReductionV:  vand_v(vec1, vec2, vec3); break;
++        case Op_OrReductionV:    vor_v(vec1, vec2, vec3); break;
++        case Op_XorReductionV:  vxor_v(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break;
++        case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_d(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_d(vec1, vec2, vec3); break;
++        case Op_AndReductionV:  vand_v(vec1, vec2, vec3); break;
++        case Op_OrReductionV:    vor_v(vec1, vec2, vec3); break;
++        case Op_XorReductionV:  vxor_v(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: add_w(reg1, reg2, reg3); break;
++        case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break;
++        case Op_AndReductionV:   andr(reg1, reg2, reg3); break;
++        case Op_OrReductionV:     orr(reg1, reg2, reg3); break;
++        case Op_XorReductionV:   xorr(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: add_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break;
++        case Op_AndReductionV:   andr(reg1, reg2, reg3); break;
++        case Op_OrReductionV:     orr(reg1, reg2, reg3); break;
++        case Op_XorReductionV:   xorr(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_FLOAT:
++      switch (opcode) {
++        case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break;
++        case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_DOUBLE:
++      switch (opcode) {
++        case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    xvpermi_d(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++    vpermi_w(tmp2, tmp1, 0b00001110);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++  } else if (vector_size == 16) {
++    vpermi_w(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (type != T_LONG) {
++    vshuf4i_w(tmp2, tmp1, 0b00000001);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++    if (type != T_INT) {
++      vshuf4i_h(tmp2, tmp1, 0b00000001);
++      reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      if (type != T_SHORT) {
++        vshuf4i_b(tmp2, tmp1, 0b00000001);
++        reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      }
++    }
++  }
++
++  switch (type) {
++    case T_BYTE:  vpickve2gr_b(dst, tmp1, 0); break;
++    case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break;
++    case T_INT:   vpickve2gr_w(dst, tmp1, 0); break;
++    case T_LONG:  vpickve2gr_d(dst, tmp1, 0); break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (opcode == Op_MaxReductionV) {
++    slt(AT, dst, src);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else if (opcode == Op_MinReductionV) {
++    slt(AT, src, dst);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else {
++    reduce_ins_r(dst, dst, src, type, opcode);
++  }
++  switch (type) {
++    case T_BYTE:  ext_w_b(dst, dst); break;
++    case T_SHORT: ext_w_h(dst, dst); break;
++    default:
++      break;
++  }
++}
++
++void C2_MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_w(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 4);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 5);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 6);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 7);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_d(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else if (vector_size == 16) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000001);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000010);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000011);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00001110);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::vector_compare(FloatRegister dst, FloatRegister src1, FloatRegister src2, BasicType bt, int cond, int vector_size) {
++  if (vector_size == 32) {
++    if (bt == T_BYTE) {
++      switch (cond) {
++        case BoolTest::ne:  xvseq_b (dst, src1, src2); xvxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  xvseq_b (dst, src1, src2); break;
++        case BoolTest::ge:  xvsle_b (dst, src2, src1); break;
++        case BoolTest::gt:  xvslt_b (dst, src2, src1); break;
++        case BoolTest::le:  xvsle_b (dst, src1, src2); break;
++        case BoolTest::lt:  xvslt_b (dst, src1, src2); break;
++        case BoolTest::uge: xvsle_bu(dst, src2, src1); break;
++        case BoolTest::ugt: xvslt_bu(dst, src2, src1); break;
++        case BoolTest::ule: xvsle_bu(dst, src1, src2); break;
++        case BoolTest::ult: xvslt_bu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_SHORT) {
++      switch (cond) {
++        case BoolTest::ne:  xvseq_h (dst, src1, src2); xvxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  xvseq_h (dst, src1, src2); break;
++        case BoolTest::ge:  xvsle_h (dst, src2, src1); break;
++        case BoolTest::gt:  xvslt_h (dst, src2, src1); break;
++        case BoolTest::le:  xvsle_h (dst, src1, src2); break;
++        case BoolTest::lt:  xvslt_h (dst, src1, src2); break;
++        case BoolTest::uge: xvsle_hu(dst, src2, src1); break;
++        case BoolTest::ugt: xvslt_hu(dst, src2, src1); break;
++        case BoolTest::ule: xvsle_hu(dst, src1, src2); break;
++        case BoolTest::ult: xvslt_hu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_INT) {
++      switch (cond) {
++        case BoolTest::ne:  xvseq_w (dst, src1, src2); xvxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  xvseq_w (dst, src1, src2); break;
++        case BoolTest::ge:  xvsle_w (dst, src2, src1); break;
++        case BoolTest::gt:  xvslt_w (dst, src2, src1); break;
++        case BoolTest::le:  xvsle_w (dst, src1, src2); break;
++        case BoolTest::lt:  xvslt_w (dst, src1, src2); break;
++        case BoolTest::uge: xvsle_wu(dst, src2, src1); break;
++        case BoolTest::ugt: xvslt_wu(dst, src2, src1); break;
++        case BoolTest::ule: xvsle_wu(dst, src1, src2); break;
++        case BoolTest::ult: xvslt_wu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_LONG) {
++      switch (cond) {
++        case BoolTest::ne:  xvseq_d (dst, src1, src2); xvxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  xvseq_d (dst, src1, src2); break;
++        case BoolTest::ge:  xvsle_d (dst, src2, src1); break;
++        case BoolTest::gt:  xvslt_d (dst, src2, src1); break;
++        case BoolTest::le:  xvsle_d (dst, src1, src2); break;
++        case BoolTest::lt:  xvslt_d (dst, src1, src2); break;
++        case BoolTest::uge: xvsle_du(dst, src2, src1); break;
++        case BoolTest::ugt: xvslt_du(dst, src2, src1); break;
++        case BoolTest::ule: xvsle_du(dst, src1, src2); break;
++        case BoolTest::ult: xvslt_du(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_FLOAT) {
++      switch (cond) {
++        case BoolTest::ne: xvfcmp_cune_s(dst, src1, src2); break;
++        case BoolTest::eq: xvfcmp_ceq_s (dst, src1, src2); break;
++        case BoolTest::ge: xvfcmp_cle_s (dst, src2, src1); break;
++        case BoolTest::gt: xvfcmp_clt_s (dst, src2, src1); break;
++        case BoolTest::le: xvfcmp_cule_s(dst, src1, src2); break;
++        case BoolTest::lt: xvfcmp_cult_s(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_DOUBLE) {
++      switch (cond) {
++        case BoolTest::ne: xvfcmp_cune_d(dst, src1, src2); break;
++        case BoolTest::eq: xvfcmp_ceq_d (dst, src1, src2); break;
++        case BoolTest::ge: xvfcmp_cle_d (dst, src2, src1); break;
++        case BoolTest::gt: xvfcmp_clt_d (dst, src2, src1); break;
++        case BoolTest::le: xvfcmp_cule_d(dst, src1, src2); break;
++        case BoolTest::lt: xvfcmp_cult_d(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (vector_size == 16) {
++    if (bt == T_BYTE) {
++      switch (cond) {
++        case BoolTest::ne:  vseq_b (dst, src1, src2); vxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  vseq_b (dst, src1, src2); break;
++        case BoolTest::ge:  vsle_b (dst, src2, src1); break;
++        case BoolTest::gt:  vslt_b (dst, src2, src1); break;
++        case BoolTest::le:  vsle_b (dst, src1, src2); break;
++        case BoolTest::lt:  vslt_b (dst, src1, src2); break;
++        case BoolTest::uge: vsle_bu(dst, src2, src1); break;
++        case BoolTest::ugt: vslt_bu(dst, src2, src1); break;
++        case BoolTest::ule: vsle_bu(dst, src1, src2); break;
++        case BoolTest::ult: vslt_bu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_SHORT) {
++      switch (cond) {
++        case BoolTest::ne:  vseq_h (dst, src1, src2); vxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  vseq_h (dst, src1, src2); break;
++        case BoolTest::ge:  vsle_h (dst, src2, src1); break;
++        case BoolTest::gt:  vslt_h (dst, src2, src1); break;
++        case BoolTest::le:  vsle_h (dst, src1, src2); break;
++        case BoolTest::lt:  vslt_h (dst, src1, src2); break;
++        case BoolTest::uge: vsle_hu(dst, src2, src1); break;
++        case BoolTest::ugt: vslt_hu(dst, src2, src1); break;
++        case BoolTest::ule: vsle_hu(dst, src1, src2); break;
++        case BoolTest::ult: vslt_hu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_INT) {
++      switch (cond) {
++        case BoolTest::ne:  vseq_w (dst, src1, src2); vxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  vseq_w (dst, src1, src2); break;
++        case BoolTest::ge:  vsle_w (dst, src2, src1); break;
++        case BoolTest::gt:  vslt_w (dst, src2, src1); break;
++        case BoolTest::le:  vsle_w (dst, src1, src2); break;
++        case BoolTest::lt:  vslt_w (dst, src1, src2); break;
++        case BoolTest::uge: vsle_wu(dst, src2, src1); break;
++        case BoolTest::ugt: vslt_wu(dst, src2, src1); break;
++        case BoolTest::ule: vsle_wu(dst, src1, src2); break;
++        case BoolTest::ult: vslt_wu(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_LONG) {
++      switch (cond) {
++        case BoolTest::ne:  vseq_d (dst, src1, src2); vxori_b(dst, dst, 0xff); break;
++        case BoolTest::eq:  vseq_d (dst, src1, src2); break;
++        case BoolTest::ge:  vsle_d (dst, src2, src1); break;
++        case BoolTest::gt:  vslt_d (dst, src2, src1); break;
++        case BoolTest::le:  vsle_d (dst, src1, src2); break;
++        case BoolTest::lt:  vslt_d (dst, src1, src2); break;
++        case BoolTest::uge: vsle_du(dst, src2, src1); break;
++        case BoolTest::ugt: vslt_du(dst, src2, src1); break;
++        case BoolTest::ule: vsle_du(dst, src1, src2); break;
++        case BoolTest::ult: vslt_du(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_FLOAT) {
++      switch (cond) {
++        case BoolTest::ne: vfcmp_cune_s(dst, src1, src2); break;
++        case BoolTest::eq: vfcmp_ceq_s (dst, src1, src2); break;
++        case BoolTest::ge: vfcmp_cle_s (dst, src2, src1); break;
++        case BoolTest::gt: vfcmp_clt_s (dst, src2, src1); break;
++        case BoolTest::le: vfcmp_cule_s(dst, src1, src2); break;
++        case BoolTest::lt: vfcmp_cult_s(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else if (bt == T_DOUBLE) {
++      switch (cond) {
++        case BoolTest::ne: vfcmp_cune_d(dst, src1, src2); break;
++        case BoolTest::eq: vfcmp_ceq_d (dst, src1, src2); break;
++        case BoolTest::ge: vfcmp_cle_d (dst, src2, src1); break;
++        case BoolTest::gt: vfcmp_clt_d (dst, src2, src1); break;
++        case BoolTest::le: vfcmp_cule_d(dst, src1, src2); break;
++        case BoolTest::lt: vfcmp_cult_d(dst, src1, src2); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) {
++
++    switch(flag) {
++      case 0x01: //equal
++          beq(op1, op2, L);
++        break;
++      case 0x02: //not_equal
++          bne(op1, op2, L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt(op2, op1, L);
++        else
++          bltu(op2, op1, L);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge(op1, op2, L);
++        else
++          bgeu(op1, op2, L);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt(op1, op2, L);
++        else
++          bltu(op1, op2, L);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge(op2, op1, L);
++        else
++          bgeu(op2, op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void C2_MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) {
++    switch(flag) {
++      case 0x01: //equal
++        beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt_long(op2, op1, *L, true /* signed */);
++        else
++          blt_long(op2, op1, *L, false);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge_long(op1, op2, *L, true /* signed */);
++        else
++          bge_long(op1, op2, *L, false);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt_long(op1, op2, *L, true /* signed */);
++        else
++          blt_long(op1, op2, *L, false);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge_long(op2, op1, *L, true /* signed */);
++        else
++          bge_long(op2, op1, *L, false);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void C2_MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) {
++    switch(flag) {
++      case 0x01: //equal
++        beqz(op1, L);
++        break;
++      case 0x02: //not_equal
++        bnez(op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
+diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..2babade2e22
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP
++
++// C2_MacroAssembler contains high-level macros for C2
++
++public:
++
++  void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed);
++  void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed);
++  void cmp_branchEqNe_off21(int flag, Register op1, Label& L);
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void blt_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bge_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae, Register tmp1, Register tmp2);
++
++  // Find index of char in Latin-1 string
++  void stringL_indexof_char(Register str1, Register cnt1,
++                            Register ch, Register result,
++                            Register tmp1, Register tmp2,
++                            Register tmp3);
++
++  // Find index of char in UTF-16 string
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3);
++
++  void string_indexof(Register haystack, Register needle,
++                      Register haystack_len, Register needle_len,
++                      Register result, int ae);
++
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 int needle_con_cnt, Register result, int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp1, Register tmp2, Register result,
++                     bool is_char);
++
++ // Memory Data Type
++  #define INT_TYPE 0x100
++  #define FLOAT_TYPE 0x200
++  #define SIGNED_TYPE 0x10
++  #define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_VECTORX     = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_VECTORY     = FLOAT_TYPE | SIGNED_TYPE | 0x4,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_VECTORX    = FLOAT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_VECTORY    = FLOAT_TYPE | SIGNED_TYPE | 0x8
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size);
++  void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size);
++
++  void vector_compare(FloatRegister dst, FloatRegister src1, FloatRegister src2, BasicType type, int cond, int vector_size);
++
++private:
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != -1) {
++        assert(((scale==0)&&(disp==0)), "only support base+index");
++        loadstore(reg, as_Register(base), as_Register(index), type);
++    } else {
++      loadstore(reg, as_Register(base), disp, type);
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(Register reg, Register base, Register disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, Register disp, int type);
++
++  void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode);
++  void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode);
++  void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode);
++#endif // CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
+new file mode 100644
+index 00000000000..0e40e2ca7cf
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++define_pd_global(bool, TieredCompilation,            true);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                31);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  23);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
+new file mode 100644
+index 00000000000..ec78b942d40
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for LoongArch
++
++extern void reg_mask_init();
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff --git a/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp
+new file mode 100644
+index 00000000000..866858b26b8
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++#define __ masm.
++void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
++  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
++         "polling page return stub not created yet");
++  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  __ bind(entry->_stub_label);
++  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
++  __ lea(AT, safepoint_pc);
++  __ st_d(AT, Address(thread, JavaThread::saved_exception_pc_offset()));
++  __ jmp(stub, relocInfo::runtime_call_type);
++}
++#undef __
+diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
+new file mode 100644
+index 00000000000..653d95806bf
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
+new file mode 100644
+index 00000000000..b3f70a1665b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  cbuf.set_insts_mark();
++  __ patchable_jump(__ pc());
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size;
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  verify_mt_safe(callee, entry, method_holder, jump);
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination(jump->instruction_address());
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
+new file mode 100644
+index 00000000000..54b847a7369
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP
++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
+new file mode 100644
+index 00000000000..fd176c8f056
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++  // Returns address of n-th instruction preceding addr,
++  // NULL if no preceding instruction can be found.
++  // With LoongArch being a RISC architecture, this always is BytesPerInstWord
++  // It might be beneficial to check "is_readable" as we do on ppc and s390.
++  static address find_prev_instr(address addr, int n_instr) {
++    return addr - BytesPerInstWord*n_instr;
++  }
++
++  // special-case instruction decoding.
++  // There may be cases where the binutils disassembler doesn't do
++  // the perfect job. In those cases, decode_instruction0 may kick in
++  // and do it right.
++  // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
++  static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
++    return here;
++  }
++
++  // platform-specific instruction annotations (like value of loaded constants)
++  static void annotate(address pc, outputStream* st) { };
++
++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp
+new file mode 100644
+index 00000000000..fb4647c2723
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/foreign_globals.hpp"
++#include "utilities/debug.hpp"
++
++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
++  Unimplemented();
++  return {};
++}
++
++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
++  Unimplemented();
++  return {};
++}
++
++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
++  ShouldNotCallThis();
++  return {};
++}
+diff --git a/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp
+new file mode 100644
+index 00000000000..680a27363ec
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP
++
++class BufferLayout {};
++class ABIDescriptor {};
++
++#endif // CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
+new file mode 100644
+index 00000000000..1aba8e4dd27
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
+@@ -0,0 +1,668 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markWord.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stackWatermarkSet.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  // sp must be within the usable part of the stack (not in guards)
++  if (!thread->is_in_usable_stack(sp)) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  if (!thread->is_in_stack_range_incl(unextended_sp, sp)) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = thread->is_in_stack_range_excl(fp, sp) &&
++                 thread->is_in_full_stack_checked(fp + (return_addr_offset * sizeof(void*)));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On LA the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp - 1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - 2);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      return thread->is_in_stack_range_excl(jcw, (address)sender.fp());
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
++  ShouldNotCallThis();
++  return nullptr;
++}
++
++bool frame::optimized_entry_frame_is_first() const {
++  ShouldNotCallThis();
++  return false;
++}
++
++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
++  ShouldNotCallThis();
++  return {};
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++
++//------------------------------------------------------------------------------
++// frame::sender_for_interpreter_frame
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2_OR_JVMCI
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif // COMPILER2_OR_JVMCI
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::sender_for_compiled_frame
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp - 1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - 2);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++//------------------------------------------------------------------------------
++// frame::sender_raw
++frame frame::sender_raw(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())
++    return sender_for_entry_frame(map);
++  if (is_interpreted_frame())
++    return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  // This test looks odd: why is it not is_compiled_frame() ?  That's
++  // because stubs also have OOP maps.
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++frame frame::sender(RegisterMap* map) const {
++  frame result = sender_raw(map);
++
++  if (map->process_frames()) {
++    StackWatermarkSet::on_iteration(map->thread(), result);
++  }
++
++  return result;
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = safe_interpreter_frame_method();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++  return thread->is_in_stack_range_incl(locals, (address)fp());
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(Universe::is_in_heap_or_null(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
+new file mode 100644
+index 00000000000..738b5306366
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
+@@ -0,0 +1,162 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++// Low
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method                ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]                   = link()
++//    [return pc             ]
++//    [oop temp              ]   <- fp             (only for native calls)
++//    [locals and parameters ]
++// High                          <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++//
++// ------------------------------ Native (C frame) ---------------------------------------
++// Layout of C frame:
++// High
++//            |
++//            - <----- fp        <- sender sp
++//      fp -8 | [ra]                             = sender_pc()
++//      fp-16 | [fp (sender)]                    = link()
++//            | [...]
++//            |
++//            - <----- sp
++//            |
++//            v
++// Low
++// ------------------------------ Native (C frame) ---------------------------------------
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++
++    link_offset                                      = -2,
++    return_addr_offset                               = -1,
++    sender_sp_offset                                 =  0,
++
++    // Interpreter frames
++    interpreter_frame_result_handler_offset          =  1, // for native calls only
++    interpreter_frame_oop_temp_offset                =  0, // for native calls only
++
++    interpreter_frame_sender_sp_offset               = -3,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -11,
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++  // returns the sending frame, without applying any barriers
++  frame sender_raw(RegisterMap* map) const;
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..8b0e0502701
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
+@@ -0,0 +1,245 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++inline intptr_t* frame::link() const {
++  return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr() const {
++  return (address*) addr_at(return_addr_offset);
++}
++
++inline address  frame::sender_pc()      const { return *sender_pc_addr(); }
++
++inline intptr_t* frame::sender_sp() const {
++  return addr_at(sender_sp_offset);
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..e129264506b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,532 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ ld_w(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ ld_b(AT, in_progress);
++    }
++
++    __ beqz(AT, filtered);
++
++    __ push(saved_regs);
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop(saved_regs);
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp, RegSet saved_regs) {
++  __ push(saved_regs);
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop(saved_regs);
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = is_reference_type(type);
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // RA is live. It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(AT, in_progress);
++  }
++  __ beqz(AT, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), tmp);
++  }
++
++  // Is the previous value null?
++  __ beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld_d(tmp, index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -1 * wordSize);
++  __ st_d(tmp, index);
++  __ ld_d(AT, buffer);
++
++  // Record the previous value
++  __ stx_d(pre_val, tmp, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(AT, done);
++
++  // crosses regions, storing NULL?
++  __ beqz(new_val, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ srli_d(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ add_d(card_addr, card_addr, cardtable);
++
++  __ ld_bu(AT, card_addr, 0);
++  __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beqz(AT, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(__ StoreLoad);
++  __ ld_bu(AT, card_addr, 0);
++  __ beqz(AT, done);
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ st_b(R0, card_addr, 0);
++
++  __ ld_d(AT, queue_index);
++  __ beqz(AT, runtime);
++  __ addi_d(AT, AT, -1 * wordSize);
++  __ st_d(AT, queue_index);
++  __ ld_d(tmp2, buffer);
++  __ ld_d(AT, queue_index);
++  __ stx_d(card_addr, tmp2, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation());
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition.");
++  assert(stub->new_val()->is_register(), "Precondition.");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation());
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = A0;
++  const Register thread = TREG;
++  const Register tmp = SCR2;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld_ptr(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -wordSize);
++  __ st_ptr(tmp, queue_index);
++  __ ld_ptr(SCR1, buffer);
++  __ add_d(tmp, tmp, SCR1);
++  __ load_parameter(0, SCR1);
++  __ st_ptr(SCR1, Address(tmp, 0));
++  __ b(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0: store_address, not use?
++  Address store_addr(FP, 2 * BytesPerWord);
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++
++  Label done;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++
++  const Register thread = TREG;
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  const Register card_offset = SCR2;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = RA;
++
++  assert_different_registers(card_offset, byte_map_base, SCR1);
++
++  __ load_parameter(0, card_offset);
++  __ srli_d(card_offset, card_offset, CardTable::card_shift);
++  __ load_byte_map_base(byte_map_base);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val());
++  __ beqz(SCR1, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(Assembler::StoreLoad);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ beqz(SCR1, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ stx_b(R0, byte_map_base, card_offset);
++
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add_d(card_addr, byte_map_base, card_addr);
++
++  __ ld_ptr(SCR1, queue_index);
++  __ beqz(SCR1, runtime);
++  __ addi_d(SCR1, SCR1, -wordSize);
++  __ st_ptr(SCR1, queue_index);
++
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = RA;
++
++  __ ld_ptr(buffer_addr, buffer);
++  __ stx_d(card_addr, buffer_addr, SCR1);
++  __ b(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..745046ac0cc
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp
+new file mode 100644
+index 00000000000..44b7ff1485f
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP
++
++const size_t G1MergeHeapRootsPrefetchCacheSize = 8;
++
++#endif // CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..4706559a837
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,320 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "classfile/classLoaderData.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  // RA is live. It must be saved around calls.
++
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ ld_wu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ ld_bu   (dst, src);    break;
++  case T_BYTE:    __ ld_b    (dst, src);    break;
++  case T_CHAR:    __ ld_hu   (dst, src);    break;
++  case T_SHORT:   __ ld_h    (dst, src);    break;
++  case T_INT:     __ ld_w    (dst, src);    break;
++  case T_LONG:    __ ld_d    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ fld_s(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ fld_d(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ st_w(R0, dst);
++        } else {
++          __ st_d(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ st_w(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ st_b(val, dst);
++    break;
++  case T_BYTE:
++    __ st_b(val, dst);
++    break;
++  case T_SHORT:
++    __ st_h(val, dst);
++    break;
++  case T_CHAR:
++    __ st_h(val, dst);
++    break;
++  case T_INT:
++    __ st_w(val, dst);
++    break;
++  case T_LONG:
++    __ st_d(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ fst_s(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ fst_d(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  assert_different_registers(obj, t2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = t2;
++
++  // verify_tlab();
++
++  __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ lea(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ lea(end, Address(obj, var_size_in_bytes, Address::no_scale, 0));
++  }
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset()));
++  __ blt_far(SCR1, end, slow_case, false);
++
++  // update the tlab top pointer
++  __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset()));
++
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub_d(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++  // verify_tlab();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ b_far(slow_case);
++  } else {
++    Register end = t1;
++    Register heap_end = SCR2;
++    Label retry;
++    __ bind(retry);
++
++    __ li(SCR1, (address)Universe::heap()->end_addr());
++    __ ld_d(heap_end, SCR1, 0);
++
++    // Get the current top of the heap
++    __ li(SCR1, (address) Universe::heap()->top_addr());
++    __ ll_d(obj, SCR1, 0);
++
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg)
++      __ addi_d(end, obj, con_size_in_bytes);
++    else
++      __ add_d(end, obj, var_size_in_bytes);
++
++    // if end < obj then we wrapped around high memory
++    __ blt_far(end, obj, slow_case, false);
++    __ blt_far(heap_end, end, slow_case, false);
++
++    // If heap top hasn't been changed by some other thread, update it.
++    __ sc_d(end, SCR1, 0);
++    __ beqz(end, retry);
++
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1);
++  }
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  assert(t1->is_valid(), "need temp reg");
++
++  __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid())
++    __ add_d(t1, t1, var_size_in_bytes);
++  else
++    __ addi_d(t1, t1, con_size_in_bytes);
++  __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++}
++
++void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
++  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
++
++  if (bs_nm == NULL) {
++    return;
++  }
++
++  Label skip, guard;
++  Address thread_disarmed_addr(TREG, in_bytes(bs_nm->thread_disarmed_offset()));
++
++  __ lipc(SCR1, guard);
++  __ ld_w(SCR1, SCR1, 0);
++
++  // Subsequent loads of oops must occur after load of guard value.
++  // BarrierSetNMethod::disarm sets guard with release semantics.
++  __ membar(__ LoadLoad);
++  __ ld_w(SCR2, thread_disarmed_addr);
++  __ beq(SCR1, SCR2, skip);
++
++  __ call_long(StubRoutines::la::method_entry_barrier());
++  __ b(skip);
++
++  __ bind(guard);
++  __ emit_int32(0);   // nmethod guard value. Skipped over in common case.
++
++  __ bind(skip);
++}
++
++void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
++  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
++  if (bs == NULL) {
++    return;
++  }
++
++  Label bad_call;
++  __ beqz(Rmethod, bad_call);
++
++  // Pointer chase to the method holder to find out if the method is concurrently unloading.
++  Label method_live;
++  __ load_method_holder_cld(SCR2, Rmethod);
++
++  // Is it a strong CLD?
++  __ ld_w(SCR1, Address(SCR2, ClassLoaderData::keep_alive_offset()));
++  __ bnez(SCR1, method_live);
++
++  // Is it a weak but alive CLD?
++  __ push2(RT2, RT8);
++  __ ld_ptr(RT8, Address(SCR2, ClassLoaderData::holder_offset()));
++  __ resolve_weak_handle(RT8, RT2); // Assembler occupies SCR1.
++  __ move(SCR1, RT8);
++  __ pop2(RT2, RT8);
++  __ bnez(SCR1, method_live);
++
++  __ bind(bad_call);
++
++  __ jmp(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++  __ bind(method_live);
++}
++
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..f87c2061132
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,93 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  virtual void barrier_stubs_init() {}
++
++  virtual void nmethod_entry_barrier(MacroAssembler* masm);
++  virtual void c2i_entry_barrier(MacroAssembler* masm);
++
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp
+new file mode 100644
+index 00000000000..65cee60f69d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp
+@@ -0,0 +1,157 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2019, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "code/codeCache.hpp"
++#include "code/nativeInst.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/registerMap.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/align.hpp"
++#include "utilities/debug.hpp"
++
++class NativeNMethodBarrier: public NativeInstruction {
++  address instruction_address() const { return addr_at(0); }
++
++  int *guard_addr() {
++    return reinterpret_cast<int*>(instruction_address() + 9 * 4);
++  }
++
++public:
++  int get_value() {
++    return Atomic::load_acquire(guard_addr());
++  }
++
++  void set_value(int value) {
++    Atomic::release_store(guard_addr(), value);
++  }
++
++  void verify() const;
++};
++
++// Store the instruction bitmask, bits and name for checking the barrier.
++struct CheckInsn {
++  uint32_t mask;
++  uint32_t bits;
++  const char *name;
++};
++
++static const struct CheckInsn barrierInsn[] = {
++  { 0xfe000000, 0x18000000, "pcaddi"},
++  { 0xffc00000, 0x28800000, "ld.w"},
++  { 0xffff8000, 0x38720000, "dbar"},
++  { 0xffc00000, 0x28800000, "ld.w"},
++  { 0xfc000000, 0x58000000, "beq"},
++  { 0xfe000000, 0x14000000, "lu12i.w"},
++  { 0xfe000000, 0x16000000, "lu32i.d"},
++  { 0xfc000000, 0x4c000000, "jirl"},
++  { 0xfc000000, 0x50000000, "b"}
++};
++
++// The encodings must match the instructions emitted by
++// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
++// register numbers and immediate values in the encoding.
++void NativeNMethodBarrier::verify() const {
++  intptr_t addr = (intptr_t) instruction_address();
++  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
++    uint32_t inst = *((uint32_t*) addr);
++    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
++      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
++      fatal("not an %s instruction.", barrierInsn[i].name);
++    }
++    addr +=4;
++  }
++}
++
++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
++
++  typedef struct {
++    intptr_t *sp; intptr_t *fp; address ra; address pc;
++  } frame_pointers_t;
++
++  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
++
++  JavaThread *thread = JavaThread::current();
++  RegisterMap reg_map(thread, false);
++  frame frame = thread->last_frame();
++
++  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
++  assert(frame.cb() == nm, "must be");
++  frame = frame.sender(&reg_map);
++
++  LogTarget(Trace, nmethod, barrier) out;
++  if (out.is_enabled()) {
++    ResourceMark mark;
++    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
++                                nm->method()->name_and_sig_as_C_string(),
++                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
++                                thread->get_thread_name(), frame.sp(), nm->verified_entry_point());
++  }
++
++  new_frame->sp = frame.sp();
++  new_frame->fp = frame.fp();
++  new_frame->ra = frame.pc();
++  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
++}
++
++// This is the offset of the entry barrier from where the frame is completed.
++// If any code changes between the end of the verified entry where the entry
++// barrier resides, and the completion of the frame, then
++// NativeNMethodCmpBarrier::verify() will immediately complain when it does
++// not find the expected native instruction at this offset, which needs updating.
++// Note that this offset is invariant of PreserveFramePointer.
++
++static const int entry_barrier_offset = -4 * 10;
++
++static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
++  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
++  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
++  debug_only(barrier->verify());
++  return barrier;
++}
++
++void BarrierSetNMethod::disarm(nmethod* nm) {
++  if (!supports_entry_barrier(nm)) {
++    return;
++  }
++
++  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
++  // Symmetric "LD.W; DBAR" is in the nmethod barrier.
++  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
++
++  barrier->set_value(disarmed_value());
++}
++
++bool BarrierSetNMethod::is_armed(nmethod* nm) {
++  if (!supports_entry_barrier(nm)) {
++    return false;
++  }
++
++  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
++  return barrier->get_value() != disarmed_value();
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..96dc9562a02
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,119 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T4 RT4
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp,
++                                                                    RegSet saved_regs) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++
++  __ li(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ sub_d(end, end, addr); // end --> cards count
++
++  __ add_d(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  __ stx_b(R0, addr, count);
++  __ addi_d(count, count, -1);
++  __ bge(count, R0, L_loop);
++
++  __ BIND(L_done);
++}
++
++// Does a store check for the oop in register obj.
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
++  assert_different_registers(obj, tmp, SCR1);
++
++  __ shr(obj, CardTable::card_shift);
++
++  __ load_byte_map_base(tmp);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ ldx_b(SCR1, obj, tmp);
++    __ beqz(SCR1, L_already_dirty);
++    __ stx_b(R0, obj, tmp);
++    __ bind(L_already_dirty);
++  } else {
++    __ stx_b(R0, obj, tmp);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), tmp1);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, tmp2);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..7d628ca6a85
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Register tmp);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp,
++                                                RegSet saved_regs);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..71c3cb49386
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..086bdebb506
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp, RegSet saved_regs) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp
+new file mode 100644
+index 00000000000..f82a2500d41
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp
+@@ -0,0 +1,130 @@
++/*
++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++
++#define __ masm->masm()->
++
++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
++  Register addr = _addr->as_register_lo();
++  Register newval = _new_value->as_register();
++  Register cmpval = _cmp_value->as_register();
++  Register result = result_opr()->as_register();
++
++  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, SCR2);
++
++  if (UseCompressedOops) {
++    Register tmp1 = _tmp1->as_register();
++    Register tmp2 = _tmp2->as_register();
++
++    __ encode_heap_oop(tmp1, cmpval);
++    cmpval = tmp1;
++    __ encode_heap_oop(tmp2, newval);
++    newval = tmp2;
++  }
++
++  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ true, /*is_cae*/ false, result);
++
++  if (CompilerConfig::is_c1_only_no_jvmci()) {
++    // The membar here is necessary to prevent reordering between the
++    // release store in the CAS above and a subsequent volatile load.
++    // However for tiered compilation C1 inserts a full barrier before
++    // volatile loads which means we don't need an additional barrier
++    // here (see LIRGenerator::volatile_field_load()).
++    __ membar(__ AnyAny);
++  }
++}
++
++#undef __
++
++#ifdef ASSERT
++#define __ gen->lir(__FILE__, __LINE__)->
++#else
++#define __ gen->lir()->
++#endif
++
++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
++  if (access.is_oop()) {
++    LIRGenerator *gen = access.gen();
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
++                  LIR_OprFact::illegalOpr /* pre_val */);
++    }
++    if (ShenandoahCASBarrier) {
++      cmp_value.load_item();
++      new_value.load_item();
++
++      LIR_Opr t1 = LIR_OprFact::illegalOpr;
++      LIR_Opr t2 = LIR_OprFact::illegalOpr;
++      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
++      LIR_Opr result = gen->new_register(T_INT);
++
++      if (UseCompressedOops) {
++        t1 = gen->new_register(T_OBJECT);
++        t2 = gen->new_register(T_OBJECT);
++      }
++
++      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result));
++      return result;
++    }
++  }
++  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
++}
++
++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
++  LIRGenerator* gen = access.gen();
++  BasicType type = access.type();
++
++  LIR_Opr result = gen->new_register(type);
++  value.load_item();
++  LIR_Opr value_opr = value.result();
++
++  if (access.is_oop()) {
++    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
++  }
++
++  assert(type == T_INT || is_reference_type(type) || type == T_LONG, "unexpected type");
++  LIR_Opr tmp = gen->new_register(T_INT);
++  __ xchg(access.resolved_addr(), value_opr, result, tmp);
++
++  if (access.is_oop()) {
++    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
++    LIR_Opr tmp = gen->new_register(type);
++    __ move(result, tmp);
++    result = tmp;
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
++                  result /* pre_val */);
++    }
++  }
++
++  return result;
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..7cf552e283a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,784 @@
++/*
++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahForwarding.hpp"
++#include "gc/shenandoah/shenandoahHeap.inline.hpp"
++#include "gc/shenandoah/shenandoahHeapRegion.hpp"
++#include "gc/shenandoah/shenandoahRuntime.hpp"
++#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A3 RA3
++
++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                       Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
++      Label done;
++
++      // Avoid calling runtime if count == 0
++      __ beqz(count, done);
++
++      // Is GC active?
++      Address gc_state(TREG, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++      __ ld_b(SCR1, gc_state);
++      if (ShenandoahSATBBarrier && dest_uninitialized) {
++        __ andi(SCR1, SCR1, ShenandoahHeap::HAS_FORWARDED);
++        __ beqz(SCR1, done);
++      } else {
++        __ andi(SCR1, SCR1, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
++        __ beqz(SCR1, done);
++      }
++
++      __ push(saved_regs);
++      if (UseCompressedOops) {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), src, dst, count);
++      } else {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
++      }
++      __ pop(saved_regs);
++      __ bind(done);
++    }
++  }
++}
++
++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                                                 Register obj,
++                                                                 Register pre_val,
++                                                                 Register thread,
++                                                                 Register tmp,
++                                                                 bool tosca_live,
++                                                                 bool expand_call) {
++  if (ShenandoahSATBBarrier) {
++    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
++  }
++}
++
++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
++                                                           Register obj,
++                                                           Register pre_val,
++                                                           Register thread,
++                                                           Register tmp,
++                                                           bool tosca_live,
++                                                           bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert_different_registers(obj, pre_val, tmp, SCR1);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++
++  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
++
++  // Is the previous value null?
++  __ beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld_d(tmp, index);                     // tmp := *index_adr
++  __ beqz(tmp, runtime);                   // tmp == 0?
++                                           // If yes, goto runtime
++
++  __ addi_d(tmp, tmp, -wordSize);          // tmp := tmp - wordSize
++  __ st_d(tmp, index);                     // *index_adr := tmp
++  __ ld_d(SCR1, buffer);
++  __ add_d(tmp, tmp, SCR1);                // tmp := tmp + *buffer_adr
++
++  // Record the previous value
++  __ st_d(pre_val, Address(tmp, 0));
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(pre_val);
++  if (tosca_live) saved += RegSet::of(V0);
++  if (obj != noreg) saved += RegSet::of(obj);
++
++  __ push(saved);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  __ pop(saved);
++
++  __ bind(done);
++}
++
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  Label is_null;
++  __ beqz(dst, is_null);
++  resolve_forward_pointer_not_null(masm, dst, tmp);
++  __ bind(is_null);
++}
++
++// IMPORTANT: This must preserve all registers, even SCR1 and SCR2, except those explicitely
++// passed in.
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  // The below loads the mark word, checks if the lowest two bits are
++  // set, and if so, clear the lowest two bits and copy the result
++  // to dst. Otherwise it leaves dst alone.
++  // Implementing this is surprisingly awkward. I do it here by:
++  // - Inverting the mark word
++  // - Test lowest two bits == 0
++  // - If so, set the lowest two bits
++  // - Invert the result back, and copy to dst
++
++  Register scr = RA;
++  bool borrow_reg = (tmp == noreg);
++  if (borrow_reg) {
++    // No free registers available. Make one useful.
++    tmp = SCR1;
++    if (tmp == dst) {
++      tmp = SCR2;
++    }
++    __ push(tmp);
++  }
++
++  assert_different_registers(tmp, scr, dst);
++
++  Label done;
++  __ movgr2fr_d(fscratch, scr);
++  __ ld_d(tmp, dst, oopDesc::mark_offset_in_bytes());
++  __ nor(tmp, tmp, R0);
++  __ andi(scr, tmp, markWord::lock_mask_in_place);
++  __ bnez(scr, done);
++  __ ori(tmp, tmp, markWord::marked_value);
++  __ nor(dst, tmp, R0);
++  __ bind(done);
++  __ movfr2gr_d(scr, fscratch);
++
++  if (borrow_reg) {
++    __ pop(tmp);
++  }
++}
++
++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
++  assert(ShenandoahLoadRefBarrier, "Should be enabled");
++  assert_different_registers(load_addr.base(), load_addr.index(), SCR1, SCR2);
++
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++  bool is_narrow  = UseCompressedOops && !is_native;
++
++  Label heap_stable, not_cset;
++  __ enter();
++  __ bstrins_d(SP, R0, 3, 0);
++  Address gc_state(TREG, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  Register tmp = (dst == SCR1) ? SCR2 : SCR1;
++
++  // Check for heap stability
++  if (is_strong) {
++    __ ld_b(tmp, gc_state);
++    __ andi(tmp, tmp, ShenandoahHeap::HAS_FORWARDED);
++    __ beqz(tmp, heap_stable);
++  } else {
++    Label lrb;
++    __ ld_b(tmp, gc_state);
++    __ andi(tmp, tmp, ShenandoahHeap::WEAK_ROOTS);
++    __ bnez(tmp, lrb);
++
++    __ ld_b(tmp, gc_state);
++    __ andi(tmp, tmp, ShenandoahHeap::HAS_FORWARDED);
++    __ beqz(tmp, heap_stable);
++    __ bind(lrb);
++  }
++
++  // use A1 for load address
++  Register result_dst = dst;
++  if (dst == A1) {
++    __ move(tmp, dst);
++    dst = tmp;
++  }
++
++  // Save A0 and A1, unless it is an output register
++  __ push2(A0, A1);
++  __ lea(A1, load_addr);
++  __ move(A0, dst);
++
++  // Test for in-cset
++  if (is_strong) {
++    __ li(SCR2, ShenandoahHeap::in_cset_fast_test_addr());
++    __ srli_d(SCR1, A0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++    __ ldx_b(SCR2, SCR2, SCR1);
++    __ beqz(SCR2, not_cset);
++  }
++
++  __ push_call_clobbered_registers_except(RegSet::of(V0));
++  if (is_strong) {
++    if (is_narrow) {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
++    } else {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
++    }
++  } else if (is_weak) {
++    if (is_narrow) {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
++    } else {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
++    }
++  } else {
++    assert(is_phantom, "only remaining strength");
++    assert(!is_narrow, "phantom access cannot be narrow");
++    __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
++  }
++  __ jalr(RA);
++  __ pop_call_clobbered_registers_except(RegSet::of(V0));
++
++  __ bind(not_cset);
++
++  __ move(result_dst, A0);
++  if (result_dst == A0)
++    __ pop2(R0, A1);
++  else
++    __ pop2(A0, A1);
++
++  __ bind(heap_stable);
++  __ leave();
++}
++
++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
++  if (ShenandoahIUBarrier) {
++    __ push_call_clobbered_registers();
++    satb_write_barrier_pre(masm, noreg, dst, TREG, tmp, true, false);
++    __ pop_call_clobbered_registers();
++  }
++}
++
++//
++// Arguments:
++//
++// Inputs:
++//   src:        oop location to load from, might be clobbered
++//
++// Output:
++//   dst:        oop loaded from src location
++//
++// Kill:
++//   SCR1 (scratch reg)
++//
++// Alias:
++//   dst: SCR1 (might use SCR1 as temporary output register to avoid clobbering src)
++//
++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                            Register dst, Address src, Register tmp1, Register tmp_thread) {
++  // 1: non-reference load, no additional barrier is needed
++  if (!is_reference_type(type)) {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
++  }
++
++  // 2: load a reference from src location and apply LRB if needed
++  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
++    Register result_dst = dst;
++
++    // Preserve src location for LRB
++    if (dst == src.base() || dst == src.index() || dst == SCR1) {
++      dst = SCR2;
++    }
++    assert_different_registers(dst, src.base(), src.index());
++
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++
++    load_reference_barrier(masm, dst, src, decorators);
++
++    if (dst != result_dst) {
++      __ move(result_dst, dst);
++      dst = result_dst;
++    }
++  } else {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  }
++
++  // 3: apply keep-alive barrier if needed
++  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
++    __ enter();
++    __ push_call_clobbered_registers();
++    satb_write_barrier_pre(masm /* masm */,
++                           noreg /* obj */,
++                           dst /* pre_val */,
++                           TREG /* thread */,
++                           tmp1 /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    __ pop_call_clobbered_registers();
++    __ leave();
++  }
++}
++
++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                             Address dst, Register val, Register tmp1, Register tmp2) {
++  bool on_oop = is_reference_type(type);
++  if (!on_oop) {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++    return;
++  }
++
++  // flatten object address if needed
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != A3) {
++      __ move(A3, dst.base());
++    }
++  } else {
++    __ lea(A3, dst);
++  }
++
++  shenandoah_write_barrier_pre(masm,
++                               A3   /* obj */,
++                               tmp2 /* pre_val */,
++                               TREG /* thread */,
++                               tmp1  /* tmp */,
++                               val != noreg /* tosca_live */,
++                               false /* expand_call */);
++
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(A3, 0), noreg, noreg, noreg);
++  } else {
++    iu_barrier(masm, val, tmp1);
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(A3, 0), val, noreg, noreg);
++  }
++}
++
++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                                  Register obj, Register tmp, Label& slowpath) {
++  Label done;
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
++
++  // Check for null.
++  __ beqz(obj, done);
++
++  assert(obj != SCR1, "need SCR1");
++  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
++  __ lea(SCR1, gc_state);
++  __ ld_b(SCR1, SCR1, 0);
++
++  // Check for heap in evacuation phase
++  __ andi(SCR1, SCR1, ShenandoahHeap::EVACUATION);
++  __ bnez(SCR1, slowpath);
++
++  __ bind(done);
++}
++
++// Special Shenandoah CAS implementation that handles false negatives due
++// to concurrent evacuation.  The service is more complex than a
++// traditional CAS operation because the CAS operation is intended to
++// succeed if the reference at addr exactly matches expected or if the
++// reference at addr holds a pointer to a from-space object that has
++// been relocated to the location named by expected.  There are two
++// races that must be addressed:
++//  a) A parallel thread may mutate the contents of addr so that it points
++//     to a different object.  In this case, the CAS operation should fail.
++//  b) A parallel thread may heal the contents of addr, replacing a
++//     from-space pointer held in addr with the to-space pointer
++//     representing the new location of the object.
++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
++// or it refers to an object that is not being evacuated out of
++// from-space, or it refers to the to-space version of an object that
++// is being evacuated out of from-space.
++//
++// By default the value held in the result register following execution
++// of the generated code sequence is 0 to indicate failure of CAS,
++// non-zero to indicate success. If is_cae, the result is the value most
++// recently fetched from addr rather than a boolean success indicator.
++//
++// Clobbers SCR1, SCR2
++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
++                                                Register mem,
++                                                Register expected,
++                                                Register new_val,
++                                                bool acquire, bool is_cae,
++                                                Register result) {
++  Register tmp1 = SCR2;
++  Register tmp2 = SCR1;
++  bool is_narrow = UseCompressedOops;
++
++  assert_different_registers(mem, expected, tmp1, tmp2);
++  assert_different_registers(mem, new_val,  tmp1, tmp2);
++
++  Address  addr(mem);
++  Label step4, done_succ, done_fail, done;
++
++  // There are two ways to reach this label.  Initial entry into the
++  // cmpxchg_oop code expansion starts at step1 (which is equivalent
++  // to label step4).  Additionally, in the rare case that four steps
++  // are required to perform the requested operation, the fourth step
++  // is the same as the first.  On a second pass through step 1,
++  // control may flow through step 2 on its way to failure.  It will
++  // not flow from step 2 to step 3 since we are assured that the
++  // memory at addr no longer holds a from-space pointer.
++  //
++  // The comments that immediately follow the step4 label apply only
++  // to the case in which control reaches this label by branch from
++  // step 3.
++
++  __ bind (step4);
++
++  // Step 4. CAS has failed because the value most recently fetched
++  // from addr is no longer the from-space pointer held in tmp2.  If a
++  // different thread replaced the in-memory value with its equivalent
++  // to-space pointer, then CAS may still be able to succeed.  The
++  // value held in the expected register has not changed.
++  //
++  // It is extremely rare we reach this point.  For this reason, the
++  // implementation opts for smaller rather than potentially faster
++  // code.  Ultimately, smaller code for this rare case most likely
++  // delivers higher overall throughput by enabling improved icache
++  // performance.
++
++  // Step 1. Fast-path.
++  //
++  // Try to CAS with given arguments.  If successful, then we are done.
++  //
++  // No label required for step 1.
++
++  if (is_narrow) {
++    __ cmpxchg32(addr, expected, new_val, tmp2, false /* sign */, false /* retold */,
++                 acquire /* barrier */, false /* weak */, true /* exchange */);
++  } else {
++    __ cmpxchg(addr, expected, new_val, tmp2, false /* retold */, acquire /* barrier */,
++               false /* weak */, true /* exchange */);
++  }
++  // tmp2 holds value fetched.
++
++  // If expected equals null but tmp2 does not equal null, the
++  // following branches to done to report failure of CAS.  If both
++  // expected and tmp2 equal null, the following branches to done to
++  // report success of CAS.  There's no need for a special test of
++  // expected equal to null.
++
++  __ beq(tmp2, expected, done_succ);
++  // if CAS failed, fall through to step 2
++
++  // Step 2. CAS has failed because the value held at addr does not
++  // match expected.  This may be a false negative because the value fetched
++  // from addr (now held in tmp2) may be a from-space pointer to the
++  // original copy of same object referenced by to-space pointer expected.
++  //
++  // To resolve this, it suffices to find the forward pointer associated
++  // with fetched value.  If this matches expected, retry CAS with new
++  // parameters.  If this mismatches, then we have a legitimate
++  // failure, and we're done.
++  //
++  // No need for step2 label.
++
++  // overwrite tmp1 with from-space pointer fetched from memory
++  __ move(tmp1, tmp2);
++
++  if (is_narrow) {
++    // Decode tmp1 in order to resolve its forward pointer
++    __ decode_heap_oop(tmp1);
++  }
++  resolve_forward_pointer(masm, tmp1);
++  if (is_narrow) {
++    // Encode tmp1 to compare against expected.
++    __ encode_heap_oop(tmp1);
++  }
++
++  // Does forwarded value of fetched from-space pointer match original
++  // value of expected?  If tmp1 holds null, this comparison will fail
++  // because we know from step1 that expected is not null.  There is
++  // no need for a separate test for tmp1 (the value originally held
++  // in memory) equal to null.
++
++  // If not, then the failure was legitimate and we're done.
++  // Branching to done with NE condition denotes failure.
++  __ bne(tmp1, expected, done_fail);
++
++  // Fall through to step 3.  No need for step3 label.
++
++  // Step 3.  We've confirmed that the value originally held in memory
++  // (now held in tmp2) pointed to from-space version of original
++  // expected value.  Try the CAS again with the from-space expected
++  // value.  If it now succeeds, we're good.
++  //
++  // Note: tmp2 holds encoded from-space pointer that matches to-space
++  // object residing at expected.  tmp2 is the new "expected".
++
++  // Note that macro implementation of __cmpxchg cannot use same register
++  // tmp2 for result and expected since it overwrites result before it
++  // compares result with expected.
++  if (is_narrow) {
++    __ cmpxchg32(addr, tmp2, new_val, tmp1, false /* sign */, false /* retold */,
++                 acquire /* barrier */, false /* weak */, false /* exchange */);
++  } else {
++    __ cmpxchg(addr, tmp2, new_val, tmp1, false /* retold */, acquire /* barrier */,
++               false /* weak */, false /* exchange */);
++  }
++  // tmp1 set iff success, tmp2 holds value fetched.
++
++  // If fetched value did not equal the new expected, this could
++  // still be a false negative because some other thread may have
++  // newly overwritten the memory value with its to-space equivalent.
++  __ beqz(tmp1, step4);
++
++  if (is_cae) {
++    // We're falling through to done to indicate success.
++    __ move(tmp2, expected);
++  }
++
++  __ bind(done_succ);
++  if (!is_cae) {
++    __ li(tmp2, 1L);
++  }
++  __ b(done);
++
++  __ bind(done_fail);
++  if (!is_cae) {
++    __ li(tmp2, 0L);
++  }
++
++  __ bind(done);
++  __ move(result, tmp2);
++}
++
++#undef __
++
++#ifdef COMPILER1
++
++#define __ ce->masm()->
++
++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation());
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++
++  DecoratorSet decorators = stub->decorators();
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++
++  Register obj = stub->obj()->as_register();
++  Register res = stub->result()->as_register();
++  Register addr = stub->addr()->as_pointer_register();
++  Register tmp1 = stub->tmp1()->as_register();
++  Register tmp2 = stub->tmp2()->as_register();
++
++  assert(res == V0, "result must arrive in V0");
++
++  if (res != obj) {
++    __ move(res, obj);
++  }
++
++  if (is_strong) {
++    // Check for object in cset.
++    __ li(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
++    __ srli_d(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++    __ ldx_b(tmp2, tmp2, tmp1);
++    __ beqz(tmp2, *stub->continuation());
++  }
++
++  ce->store_parameter(res, 0);
++  ce->store_parameter(addr, 1);
++  if (is_strong) {
++    if (is_native) {
++      __ call(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin(), relocInfo::runtime_call_type);
++    } else {
++      __ call(bs->load_reference_barrier_strong_rt_code_blob()->code_begin(), relocInfo::runtime_call_type);
++    }
++  } else if (is_weak) {
++    __ call(bs->load_reference_barrier_weak_rt_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  } else {
++    assert(is_phantom, "only remaining strength");
++    __ call(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  }
++
++  __ b(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_pre_barrier", false);
++
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = A0;
++  const Register thread = TREG;
++  const Register tmp = SCR1;
++
++  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ ld_b(tmp, gc_state);
++  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld_d(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -wordSize);
++  __ st_d(tmp, queue_index);
++  __ ld_d(SCR2, buffer);
++  __ add_d(tmp, tmp, SCR2);
++  __ load_parameter(0, SCR2);
++  __ st_d(SCR2, tmp, 0);
++  __ b(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
++  __ prologue("shenandoah_load_reference_barrier", false);
++  __ bstrins_d(SP, R0, 3, 0);
++  // arg0 : object to be resolved
++
++  __ push_call_clobbered_registers_except(RegSet::of(V0));
++  __ load_parameter(0, A0);
++  __ load_parameter(1, A1);
++
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++  if (is_strong) {
++    if (is_native) {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
++    } else {
++      if (UseCompressedOops) {
++        __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
++      } else {
++        __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
++      }
++    }
++  } else if (is_weak) {
++    assert(!is_native, "weak must not be called off-heap");
++    if (UseCompressedOops) {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
++    } else {
++      __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
++    }
++  } else {
++    assert(is_phantom, "only remaining strength");
++    assert(is_native, "phantom must only be called off-heap");
++    __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
++  }
++  __ jalr(RA);
++  __ pop_call_clobbered_registers_except(RegSet::of(V0));
++
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..e2c862f6c5c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#ifdef COMPILER1
++class LIR_Assembler;
++class ShenandoahPreBarrierStub;
++class ShenandoahLoadReferenceBarrierStub;
++class StubAssembler;
++#endif
++class StubCodeGenerator;
++
++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
++private:
++
++  void satb_write_barrier_pre(MacroAssembler* masm,
++                              Register obj,
++                              Register pre_val,
++                              Register thread,
++                              Register tmp,
++                              bool tosca_live,
++                              bool expand_call);
++  void shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                    Register obj,
++                                    Register pre_val,
++                                    Register thread,
++                                    Register tmp,
++                                    bool tosca_live,
++                                    bool expand_call);
++
++  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
++
++public:
++
++  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
++
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
++  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
++#endif
++
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++  void cmpxchg_oop(MacroAssembler* masm, Register mem, Register expected, Register new_val,
++                   bool acquire, bool is_cae, Register result);
++};
++
++#endif // CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad
+new file mode 100644
+index 00000000000..04f67d23157
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad
+@@ -0,0 +1,222 @@
++//
++// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++// Copyright (c) 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++source_hpp %{
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++%}
++
++encode %{
++  enc_class loongarch_enc_cmpxchg_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{
++    MacroAssembler _masm(&cbuf);
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ false, $res$$Register);
++  %}
++
++  enc_class loongarch_enc_cmpxchg_acq_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{
++    MacroAssembler _masm(&cbuf);
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ false, $res$$Register);
++  %}
++%}
++
++instruct compareAndSwapP_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode(loongarch_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, res));
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapPAcq_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode(loongarch_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, res));
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++
++ format %{
++    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++ %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeN_shenandoah(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ true, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeP_shenandoah(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ true, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeNAcq_shenandoah(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ true, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangePAcq_shenandoah(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ true, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++  ins_encode %{
++    // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapP_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ false, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapPAcq_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++
++  format %{
++    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++
++  ins_encode %{
++    // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register,
++                                                   /*acquire*/ true, /*is_cae*/ false, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
+diff --git a/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..372d80cf11b
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp
+@@ -0,0 +1,462 @@
++/*
++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/codeBlob.hpp"
++#include "code/vmreg.inline.hpp"
++#include "gc/z/zBarrier.inline.hpp"
++#include "gc/z/zBarrierSet.hpp"
++#include "gc/z/zBarrierSetAssembler.hpp"
++#include "gc/z/zBarrierSetRuntime.hpp"
++#include "gc/z/zThreadLocalData.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/z/c1/zBarrierSetC1.hpp"
++#endif // COMPILER1
++#ifdef COMPILER2
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#endif // COMPILER2
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#undef __
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define T4 RT4
++
++void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
++                                   DecoratorSet decorators,
++                                   BasicType type,
++                                   Register dst,
++                                   Address src,
++                                   Register tmp1,
++                                   Register tmp_thread) {
++  if (!ZBarrierSet::barrier_needed(decorators, type)) {
++    // Barrier not needed
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
++  }
++
++  // Allocate scratch register
++  Register scratch = tmp1;
++
++  assert_different_registers(dst, scratch, SCR1);
++
++  Label done;
++
++  //
++  // Fast Path
++  //
++
++  // Load address
++  __ lea(scratch, src);
++
++  // Load oop at address
++  __ ld_ptr(dst, scratch, 0);
++
++  // Test address bad mask
++  __ ld_ptr(SCR1, address_bad_mask_from_thread(TREG));
++  __ andr(SCR1, dst, SCR1);
++  __ beqz(SCR1, done);
++
++  //
++  // Slow path
++  //
++  __ enter();
++
++  if (dst != V0) {
++    __ push(V0);
++  }
++  __ push_call_clobbered_registers_except(RegSet::of(V0));
++
++  if (dst != A0) {
++    __ move(A0, dst);
++  }
++  __ move(A1, scratch);
++  __ MacroAssembler::call_VM_leaf_base(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
++
++  __ pop_call_clobbered_registers_except(RegSet::of(V0));
++
++  // Make sure dst has the return value.
++  if (dst != V0) {
++    __ move(dst, V0);
++    __ pop(V0);
++  }
++  __ leave();
++
++  __ bind(done);
++}
++
++#ifdef ASSERT
++
++void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
++                                        DecoratorSet decorators,
++                                        BasicType type,
++                                        Address dst,
++                                        Register val,
++                                        Register tmp1,
++                                        Register tmp2) {
++  // Verify value
++  if (is_reference_type(type)) {
++    // Note that src could be noreg, which means we
++    // are storing null and can skip verification.
++    if (val != noreg) {
++      Label done;
++
++      // tmp1 and tmp2 are often set to noreg.
++
++      __ ld_ptr(AT, address_bad_mask_from_thread(TREG));
++      __ andr(AT, val, AT);
++      __ beqz(AT, done);
++      __ stop("Verify oop store failed");
++      __ should_not_reach_here();
++      __ bind(done);
++    }
++  }
++
++  // Store value
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++}
++
++#endif // ASSERT
++
++void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
++                                              DecoratorSet decorators,
++                                              bool is_oop,
++                                              Register src,
++                                              Register dst,
++                                              Register count,
++                                              RegSet saved_regs) {
++  if (!is_oop) {
++    // Barrier not needed
++    return;
++  }
++
++  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
++
++  __ push(saved_regs);
++
++  if (count == A0) {
++    if (src == A1) {
++      // exactly backwards!!
++      __ move(AT, A0);
++      __ move(A0, A1);
++      __ move(A1, AT);
++    } else {
++      __ move(A1, count);
++      __ move(A0, src);
++    }
++  } else {
++    __ move(A0, src);
++    __ move(A1, count);
++  }
++
++  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
++
++  __ pop(saved_regs);
++
++  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
++}
++
++void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
++                                                         Register jni_env,
++                                                         Register robj,
++                                                         Register tmp,
++                                                         Label& slowpath) {
++  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
++
++  assert_different_registers(jni_env, robj, tmp);
++
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
++
++  // The Address offset is too large to direct load - -784. Our range is +127, -128.
++  __ li(tmp, (int64_t)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
++              in_bytes(JavaThread::jni_environment_offset())));
++
++  // Load address bad mask
++  __ ldx_d(tmp, jni_env, tmp);
++
++  // Check address bad mask
++  __ andr(AT, robj, tmp);
++  __ bnez(AT, slowpath);
++
++  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
++                                                         LIR_Opr ref) const {
++  assert_different_registers(SCR1, TREG, ref->as_register());
++  __ ld_d(SCR1, address_bad_mask_from_thread(TREG));
++  __ andr(SCR1, SCR1, ref->as_register());
++}
++
++void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
++                                                         ZLoadBarrierStubC1* stub) const {
++  // Stub entry
++  __ bind(*stub->entry());
++
++  Register ref = stub->ref()->as_register();
++  Register ref_addr = noreg;
++  Register tmp = noreg;
++
++  if (stub->tmp()->is_valid()) {
++    // Load address into tmp register
++    ce->leal(stub->ref_addr(), stub->tmp());
++    ref_addr = tmp = stub->tmp()->as_pointer_register();
++  } else {
++    // Address already in register
++    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
++  }
++
++  assert_different_registers(ref, ref_addr, noreg);
++
++  // Save V0 unless it is the result or tmp register
++  // Set up SP to accomodate parameters and maybe V0.
++  if (ref != V0 && tmp != V0) {
++    __ addi_d(SP, SP, -32);
++    __ st_d(V0, SP, 16);
++  } else {
++    __ addi_d(SP, SP, -16);
++  }
++
++  // Setup arguments and call runtime stub
++  ce->store_parameter(ref_addr, 1);
++  ce->store_parameter(ref, 0);
++
++  __ call(stub->runtime_stub(), relocInfo::runtime_call_type);
++
++  // Verify result
++  __ verify_oop(V0, "Bad oop");
++
++  // Move result into place
++  if (ref != V0) {
++    __ move(ref, V0);
++  }
++
++  // Restore V0 unless it is the result or tmp register
++  if (ref != V0 && tmp != V0) {
++    __ ld_d(V0, SP, 16);
++    __ addi_d(SP, SP, 32);
++  } else {
++    __ addi_d(SP, SP, 16);
++  }
++
++  // Stub exit
++  __ b(*stub->continuation());
++}
++
++#undef __
++#define __ sasm->
++
++void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
++                                                                 DecoratorSet decorators) const {
++  __ prologue("zgc_load_barrier stub", false);
++
++  __ push_call_clobbered_registers_except(RegSet::of(V0));
++
++  // Setup arguments
++  __ load_parameter(0, A0);
++  __ load_parameter(1, A1);
++
++  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
++
++  __ pop_call_clobbered_registers_except(RegSet::of(V0));
++
++  __ epilogue();
++}
++#endif // COMPILER1
++
++#ifdef COMPILER2
++
++OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
++  if (!OptoReg::is_reg(opto_reg)) {
++    return OptoReg::Bad;
++  }
++
++  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
++  if (vm_reg->is_FloatRegister()) {
++    return opto_reg & ~1;
++  }
++
++  return opto_reg;
++}
++
++#undef __
++#define __ _masm->
++
++class ZSaveLiveRegisters {
++private:
++  MacroAssembler* const _masm;
++  RegSet                _gp_regs;
++  FloatRegSet           _fp_regs;
++  FloatRegSet           _lsx_vp_regs;
++  FloatRegSet           _lasx_vp_regs;
++
++public:
++  void initialize(ZLoadBarrierStubC2* stub) {
++    // Record registers that needs to be saved/restored
++    RegMaskIterator rmi(stub->live());
++    while (rmi.has_next()) {
++      const OptoReg::Name opto_reg = rmi.next();
++      if (OptoReg::is_reg(opto_reg)) {
++        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
++        if (vm_reg->is_Register()) {
++          _gp_regs += RegSet::of(vm_reg->as_Register());
++        } else if (vm_reg->is_FloatRegister()) {
++          if (UseLASX && vm_reg->next(7))
++            _lasx_vp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
++          else if (UseLSX && vm_reg->next(3))
++            _lsx_vp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
++          else
++            _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
++        } else {
++          fatal("Unknown register type");
++        }
++      }
++    }
++
++    // Remove C-ABI SOE registers, scratch regs and _ref register that will be updated
++    _gp_regs -= RegSet::range(S0, S7) + RegSet::of(SP, SCR1, SCR2, stub->ref());
++  }
++
++  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
++      _masm(masm),
++      _gp_regs(),
++      _fp_regs(),
++      _lsx_vp_regs(),
++      _lasx_vp_regs() {
++
++    // Figure out what registers to save/restore
++    initialize(stub);
++
++    // Save registers
++    __ push(_gp_regs);
++    __ push_fpu(_fp_regs);
++    __ push_vp(_lsx_vp_regs  /* UseLSX  */);
++    __ push_vp(_lasx_vp_regs /* UseLASX */);
++  }
++
++  ~ZSaveLiveRegisters() {
++    // Restore registers
++    __ pop_vp(_lasx_vp_regs /* UseLASX */);
++    __ pop_vp(_lsx_vp_regs  /* UseLSX  */);
++    __ pop_fpu(_fp_regs);
++    __ pop(_gp_regs);
++  }
++};
++
++#undef __
++#define __ _masm->
++
++class ZSetupArguments {
++private:
++  MacroAssembler* const _masm;
++  const Register        _ref;
++  const Address         _ref_addr;
++
++public:
++  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
++      _masm(masm),
++      _ref(stub->ref()),
++      _ref_addr(stub->ref_addr()) {
++
++    // Setup arguments
++    if (_ref_addr.base() == noreg) {
++      // No self healing
++      if (_ref != A0) {
++        __ move(A0, _ref);
++      }
++      __ move(A1, 0);
++    } else {
++      // Self healing
++      if (_ref == A0) {
++        // _ref is already at correct place
++        __ lea(A1, _ref_addr);
++      } else if (_ref != A1) {
++        // _ref is in wrong place, but not in A1, so fix it first
++        __ lea(A1, _ref_addr);
++        __ move(A0, _ref);
++      } else if (_ref_addr.base() != A0 && _ref_addr.index() != A0) {
++        assert(_ref == A1, "Mov ref first, vacating A0");
++        __ move(A0, _ref);
++        __ lea(A1, _ref_addr);
++      } else {
++        assert(_ref == A1, "Need to vacate A1 and _ref_addr is using A0");
++        if (_ref_addr.base() == A0 || _ref_addr.index() == A0) {
++          __ move(T4, A1);
++          __ lea(A1, _ref_addr);
++          __ move(A0, T4);
++        } else {
++          ShouldNotReachHere();
++        }
++      }
++    }
++  }
++
++  ~ZSetupArguments() {
++    // Transfer result
++    if (_ref != V0) {
++      __ move(_ref, V0);
++    }
++  }
++};
++
++#undef __
++#define __ masm->
++
++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
++  BLOCK_COMMENT("ZLoadBarrierStubC2");
++
++  // Stub entry
++  __ bind(*stub->entry());
++
++  {
++    ZSaveLiveRegisters save_live_registers(masm, stub);
++    ZSetupArguments setup_arguments(masm, stub);
++    __ call_VM_leaf(stub->slow_path(), 2);
++  }
++  // Stub exit
++  __ b(*stub->continuation());
++}
++
++#undef __
++
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..6a96d6fdd60
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "code/vmreg.hpp"
++#include "oops/accessDecorators.hpp"
++#ifdef COMPILER2
++#include "opto/optoreg.hpp"
++#endif // COMPILER2
++
++#ifdef COMPILER1
++class LIR_Assembler;
++class LIR_OprDesc;
++typedef LIR_OprDesc* LIR_Opr;
++class StubAssembler;
++class ZLoadBarrierStubC1;
++#endif // COMPILER1
++
++#ifdef COMPILER2
++class Node;
++class ZLoadBarrierStubC2;
++#endif // COMPILER2
++
++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
++public:
++  virtual void load_at(MacroAssembler* masm,
++                       DecoratorSet decorators,
++                       BasicType type,
++                       Register dst,
++                       Address src,
++                       Register tmp1,
++                       Register tmp_thread);
++
++#ifdef ASSERT
++  virtual void store_at(MacroAssembler* masm,
++                        DecoratorSet decorators,
++                        BasicType type,
++                        Address dst,
++                        Register val,
++                        Register tmp1,
++                        Register tmp2);
++#endif // ASSERT
++
++  virtual void arraycopy_prologue(MacroAssembler* masm,
++                                  DecoratorSet decorators,
++                                  bool is_oop,
++                                  Register src,
++                                  Register dst,
++                                  Register count,
++                                  RegSet saved_regs);
++
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
++                                             Register jni_env,
++                                             Register robj,
++                                             Register tmp,
++                                             Label& slowpath);
++
++#ifdef COMPILER1
++  void generate_c1_load_barrier_test(LIR_Assembler* ce,
++                                     LIR_Opr ref) const;
++
++  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
++                                     ZLoadBarrierStubC1* stub) const;
++
++  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
++                                             DecoratorSet decorators) const;
++#endif // COMPILER1
++
++#ifdef COMPILER2
++  OptoReg::Name refine_register(const Node* node,
++                                OptoReg::Name opto_reg);
++
++  void generate_c2_load_barrier_stub(MacroAssembler* masm,
++                                     ZLoadBarrierStubC2* stub) const;
++#endif // COMPILER2
++};
++
++#endif // CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp
+new file mode 100644
+index 00000000000..85db1cf1854
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp
+@@ -0,0 +1,211 @@
++/*
++ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/gcLogPrecious.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/z/zGlobals.hpp"
++#include "runtime/globals.hpp"
++#include "runtime/os.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/powerOfTwo.hpp"
++
++#ifdef LINUX
++#include <sys/mman.h>
++#endif // LINUX
++
++//
++// The heap can have three different layouts, depending on the max heap size.
++//
++// Address Space & Pointer Layout 1
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000014000000000 (20TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000010000000000 (16TB)
++//  .                                .
++//  +--------------------------------+ 0x00000c0000000000 (12TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000080000000000 (8TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000040000000000 (4TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6                  4 4  4 4
++//   3                  6 5  2 1                                             0
++//  +--------------------+----+-----------------------------------------------+
++//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
++//  +--------------------+----+-----------------------------------------------+
++//  |                    |    |
++//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
++//  |                    |
++//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
++//  |                                                    0010 = Marked1      (Address view 8-12TB)
++//  |                                                    0100 = Remapped     (Address view 16-20TB)
++//  |                                                    1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-46 Fixed (18-bits, always zero)
++//
++//
++// Address Space & Pointer Layout 2
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000280000000000 (40TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000200000000000 (32TB)
++//  .                                .
++//  +--------------------------------+ 0x0000180000000000 (24TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000100000000000 (16TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000080000000000 (8TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6                 4 4  4 4
++//   3                 7 6  3 2                                              0
++//  +------------------+-----+------------------------------------------------+
++//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
++//  +-------------------+----+------------------------------------------------+
++//  |                   |    |
++//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
++//  |                   |
++//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
++//  |                                                   0010 = Marked1      (Address view 16-24TB)
++//  |                                                   0100 = Remapped     (Address view 32-40TB)
++//  |                                                   1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-47 Fixed (17-bits, always zero)
++//
++//
++// Address Space & Pointer Layout 3
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000500000000000 (80TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000400000000000 (64TB)
++//  .                                .
++//  +--------------------------------+ 0x0000300000000000 (48TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000200000000000 (32TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000100000000000 (16TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6               4  4  4 4
++//   3               8  7  4 3                                               0
++//  +------------------+----+-------------------------------------------------+
++//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
++//  +------------------+----+-------------------------------------------------+
++//  |                  |    |
++//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
++//  |                  |
++//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
++//  |                                                  0010 = Marked1      (Address view 32-48TB)
++//  |                                                  0100 = Remapped     (Address view 64-80TB)
++//  |                                                  1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-48 Fixed (16-bits, always zero)
++//
++
++// Default value if probing is not implemented for a certain platform: 128TB
++static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
++// Minimum value returned, if probing fails: 64GB
++static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
++
++static size_t probe_valid_max_address_bit() {
++#ifdef LINUX
++  size_t max_address_bit = 0;
++  const size_t page_size = os::vm_page_size();
++  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
++    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
++    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
++      // msync suceeded, the address is valid, and maybe even already mapped.
++      max_address_bit = i;
++      break;
++    }
++    if (errno != ENOMEM) {
++      // Some error occured. This should never happen, but msync
++      // has some undefined behavior, hence ignore this bit.
++#ifdef ASSERT
++      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
++#else // ASSERT
++      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
++#endif // ASSERT
++      continue;
++    }
++    // Since msync failed with ENOMEM, the page might not be mapped.
++    // Try to map it, to see if the address is valid.
++    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
++    if (result_addr != MAP_FAILED) {
++      munmap(result_addr, page_size);
++    }
++    if ((uintptr_t) result_addr == base_addr) {
++      // address is valid
++      max_address_bit = i;
++      break;
++    }
++  }
++  if (max_address_bit == 0) {
++    // probing failed, allocate a very high page and take that bit as the maximum
++    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
++    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
++    if (result_addr != MAP_FAILED) {
++      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
++      munmap(result_addr, page_size);
++    }
++  }
++  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
++  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
++#else // LINUX
++  return DEFAULT_MAX_ADDRESS_BIT;
++#endif // LINUX
++}
++
++size_t ZPlatformAddressOffsetBits() {
++  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
++  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
++  const size_t min_address_offset_bits = max_address_offset_bits - 2;
++  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
++  const size_t address_offset_bits = log2i_exact(address_offset);
++  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
++}
++
++size_t ZPlatformAddressMetadataShift() {
++  return ZPlatformAddressOffsetBits();
++}
+diff --git a/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp
+new file mode 100644
+index 00000000000..542fd267434
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP
++
++const size_t ZPlatformGranuleSizeShift = 21; // 2MB
++const size_t ZPlatformHeapViews        = 3;
++const size_t ZPlatformCacheLineSize    = 64;
++
++size_t ZPlatformAddressOffsetBits();
++size_t ZPlatformAddressMetadataShift();
++
++#endif // CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad
+new file mode 100644
+index 00000000000..59656e75376
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad
+@@ -0,0 +1,273 @@
++//
++// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2021, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++
++source_hpp %{
++
++#include "gc/shared/gc_globals.hpp"
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#include "gc/z/zThreadLocalData.hpp"
++
++%}
++
++source %{
++
++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
++  if (barrier_data == ZLoadBarrierElided) {
++    return;
++  }
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
++  __ ld_ptr(tmp, Address(thread, ZThreadLocalData::address_bad_mask_offset()));
++  __ andr(tmp, tmp, ref);
++  __ bnez(tmp, *stub->entry());
++  __ bind(*stub->continuation());
++}
++
++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
++  __ b(*stub->entry());
++  __ bind(*stub->continuation());
++}
++
++static void z_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
++                               Register res, Register mem, Register oldval, Register newval,
++                               Register tmp, bool weak, bool acquire) {
++  // z-specific load barrier requires strong CAS operations.
++  // Weak CAS operations are thus only emitted if the barrier is elided.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  Address addr(mem);
++  if (node->barrier_data() == ZLoadBarrierElided) {
++    __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */,
++               weak /* weak */, false /* exchange */);
++    __ move(res, tmp);
++  } else {
++    __ move(tmp, oldval);
++    __ cmpxchg(addr, tmp, newval, AT, true /* retold */, acquire /* barrier */,
++               false /* weak */, false /* exchange */);
++    __ move(res, AT);
++
++    Label good;
++    __ ld_ptr(AT, Address(thread, ZThreadLocalData::address_bad_mask_offset()));
++    __ andr(AT, AT, tmp);
++    __ beqz(AT, good);
++    z_load_barrier_slow_path(_masm, node, addr, tmp, res /* used as tmp */);
++    __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */, weak /* weak */, false /* exchange */);
++    __ move(res, tmp);
++    __ bind(good);
++  }
++}
++
++static void z_compare_and_exchange(MacroAssembler& _masm, const MachNode* node,
++                                   Register res, Register mem, Register oldval, Register newval, Register tmp,
++                                   bool weak, bool acquire) {
++  // z-specific load barrier requires strong CAS operations.
++  // Weak CAS operations are thus only emitted if the barrier is elided.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  Address addr(mem);
++  __ cmpxchg(addr, oldval, newval, res, false /* retold */, acquire /* barrier */,
++             weak && node->barrier_data() == ZLoadBarrierElided /* weak */, true /* exchange */);
++  if (node->barrier_data() != ZLoadBarrierElided) {
++    Label good;
++    __ ld_ptr(tmp, Address(thread, ZThreadLocalData::address_bad_mask_offset()));
++    __ andr(tmp, tmp, res);
++    __ beqz(tmp, good);
++    z_load_barrier_slow_path(_masm, node, addr, res /* ref */, tmp);
++    __ cmpxchg(addr, oldval, newval, res, false /* retold */, acquire /* barrier */, weak /* weak */, true /* exchange */);
++    __ bind(good);
++  }
++}
++
++%}
++
++// Load Pointer
++instruct zLoadP(mRegP dst, memory mem, mRegP tmp, FlagsReg cr)
++%{
++  match(Set dst (LoadP mem));
++  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
++  ins_cost(125);//must be equal loadP in loongarch_64.ad
++
++  predicate(UseZGC && n->as_Load()->barrier_data() != 0);
++
++  format %{ "zLoadP  $dst, $mem" %}
++
++  ins_encode %{
++    Address ref_addr = Address(as_Register($mem$$base), as_Register($mem$$index), Address::no_scale, $mem$$disp);
++    __ block_comment("zLoadP");
++    __ ld_ptr($dst$$Register, ref_addr);
++    z_load_barrier(_masm, this, ref_addr, $dst$$Register, $tmp$$Register, barrier_data());
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndSwapP(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  effect(KILL cr, TEMP_DEF res, TEMP tmp);
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)
++            && (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst));
++  ins_cost(3 * MEMORY_REF_COST);//must be equal compareAndSwapP in loongarch_64.ad
++
++  format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndSwapP");
++    z_compare_and_swap(_masm, this,
++                       $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
++                       $tmp$$Register, false /* weak */, false /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndSwapP_acq(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  effect(KILL cr, TEMP_DEF res, TEMP tmp);
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
++  ins_cost(4 * MEMORY_REF_COST);//must be larger than zCompareAndSwapP
++
++  format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndSwapP_acq");
++    z_compare_and_swap(_masm, this,
++                       $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
++                       $tmp$$Register, false /* weak */, true /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndSwapPWeak(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  effect(KILL cr, TEMP_DEF res, TEMP tmp);
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)
++            && ((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst);
++
++  ins_cost(MEMORY_REF_COST);//must be equal weakCompareAndSwapP in loongarch_64.ad
++
++  format %{ "weak CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndSwapPWeak");
++    z_compare_and_swap(_masm, this,
++                       $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
++                       $tmp$$Register, true /* weak */, false /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndSwapPWeak_acq(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  effect(KILL cr, TEMP_DEF res, TEMP tmp);
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
++  ins_cost(2* MEMORY_REF_COST);//must be equal weakCompareAndSwapP_acq in loongarch_64.ad
++
++  format %{ "weak CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndSwapPWeak_acq");
++    z_compare_and_swap(_masm, this,
++                       $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
++                       $tmp$$Register, true /* weak */, true /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndExchangeP(mRegP res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  ins_cost(2* MEMORY_REF_COST);//must be equal compareAndExchangeP in loongarch_64.ad
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)
++            && (
++              ((CompareAndSwapNode*)n)->order() != MemNode::acquire
++              && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst
++            ));
++
++  format %{ "CMPXCHG $res, $mem, $oldval, $newval; as ptr; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndExchangeP");
++    z_compare_and_exchange(_masm, this,
++                           $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
++                           false /* weak */, false /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndExchangeP_acq(mRegP res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++
++  predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)
++            && (
++              ((CompareAndSwapNode*)n)->order() == MemNode::acquire
++              || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst
++            ));
++
++  format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
++  ins_encode %{
++    __ block_comment("zCompareAndExchangeP_acq");
++    z_compare_and_exchange(_masm, this,
++                           $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
++                           false /* weak */, true /* acquire */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zGetAndSetP(mRegP mem, mRegP newv, mRegP prev, mRegP tmp, FlagsReg cr) %{
++  match(Set prev (GetAndSetP mem newv));
++  effect(TEMP_DEF prev, TEMP tmp, KILL cr);
++
++  predicate(UseZGC && n->as_LoadStore()->barrier_data() != 0);
++
++  format %{ "GetAndSetP $prev, $mem, $newv" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = $mem$$Register;
++    __ block_comment("zGetAndSetP");
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++    z_load_barrier(_masm, this, Address(noreg, 0), prev, $tmp$$Register, barrier_data());
++  %}
++
++  ins_pipe(pipe_slow);
++%}
+diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
+new file mode 100644
+index 00000000000..363cd6e2092
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++// Size of LoongArch Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++// FIXME: LA
++// This makes the games we play when patching difficult, so when we
++// come across an access that needs patching we deoptimize.  There are
++// ways we can avoid this, but these would slow down C1-compiled code
++// in the default case.  We could revisit this decision if we get any
++// evidence that it's worth doing.
++#define DEOPTIMIZE_WHEN_PATCHING
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define PREFERRED_METASPACE_ALIGNMENT
++
++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
++
++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
+new file mode 100644
+index 00000000000..2358ca31596
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++define_pd_global(intx, InlineSmallCode,          2000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop,                                                 \
++                   product,                                                 \
++                   notproduct,                                              \
++                   range,                                                   \
++                   constraint)                                              \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(bool, UseLSX, false,                                              \
++                "Use LSX 128-bit vector instructions")                      \
++                                                                            \
++  product(bool, UseLASX, false,                                             \
++                "Use LASX 256-bit vector instructions")                     \
++                                                                            \
++  product(bool, UseBarriersForVolatile, false,                              \
++          "Use memory barriers to implement volatile accesses")             \
++                                                                            \
++  product(bool, UseCRC32, false,                                            \
++          "Use CRC32 instructions for CRC32 computation")                   \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")                 \
++                                                                            \
++  product(bool, TraceTraps, false, "Trace all traps the signal handler")
++
++// end of ARCH_FLAGS
++
++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
+new file mode 100644
+index 00000000000..a8f08bbf422
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
+@@ -0,0 +1,100 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value,
++                                                address entry_point) {
++  ResourceMark rm;
++  CodeBuffer code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  //  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_li52(T1, (long)cached_value);
++  // TODO: confirm reloc
++  __ jmp(entry_point, relocInfo::runtime_call_type);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp
+new file mode 100644
+index 00000000000..d577e41f59c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ ibar(0);
++  __ ori(V0, RA2, 0);
++  __ jr(RA);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
+new file mode 100644
+index 00000000000..3a180549fc6
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes
++    line_size      = 32,                   // flush instruction affects a dword
++    log2_line_size = 5                     // log2(line_size)
++  };
++};
++
++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
+new file mode 100644
+index 00000000000..7dea5deb183
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
+@@ -0,0 +1,270 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++typedef ByteSize (*OffsetFunction)(uint);
++
++class InterpreterMacroAssembler: public MacroAssembler {
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld_d(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld_d(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld_d(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void load_resolved_method_at_index(int byte_no,
++                                     Register method,
++                                     Register cache,
++                                     Register index);
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
+new file mode 100644
+index 00000000000..fa65d10765c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
+@@ -0,0 +1,2040 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markWord.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of InterpreterMacroAssembler
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_hu(reg, BCP, offset);
++  } else {
++    ld_bu(reg, BCP, offset);
++    ld_bu(tmp, BCP, offset + 1);
++    bstrins_d(reg, tmp, 15, 8);
++  }
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_wu(reg, BCP, offset);
++  } else {
++    ldr_w(reg, BCP, offset);
++    ldl_w(reg, BCP, offset + 3);
++    lu32i_d(reg, 0);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      ld_w(V0, val_addr);
++      break;
++    case ftos:
++      fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  li(AT, (int)ilgl);
++  st_w(AT, tos_addr);
++  st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    assert(java_thread != AT, "check");
++
++    Label L;
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beqz(AT, L);
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    addi_w(AT, AT, -JvmtiThreadState::earlyret_pending);
++    bnez(AT, L);
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(A0, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    ld_w(A0, A0, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(A0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  ld_bu(AT, BCP, bcp_offset);
++  ld_bu(reg, BCP, bcp_offset + 1);
++  bstrins_w(reg, AT, 15, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    slli_w(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    ld_bu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  alsl_d(AT, index, cache, Address::times_ptr - 1);
++  ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srli_d(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  li(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  add_d(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld_d(result, result, ConstantPool::cache_offset_in_bytes());
++  ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  add_d(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  alsl_d(AT, index, cpool, Address::times_ptr - 1);
++  ld_h(index, AT, sizeof(ConstantPool));
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1);
++  ld_d(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
++                                                              Register method,
++                                                              Register cache,
++                                                              Register index) {
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == TemplateTable::f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++
++  ld_d(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method*
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T4 and T1 are used as temporary registers.
++  profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T4); // blows T4
++
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  ld_w(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  fld_s(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  fst_s(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  fst_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  st_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(temp);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    ld_d(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    bind(run_compiled_code);
++  }
++
++  ld_d(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts. LoongArch64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing LoongArch64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    sub_d(T2, FP, SP);
++    int min_frame_size = (frame::sender_sp_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    addi_d(T2, T2, -min_frame_size);
++    bge(T2, R0, L);
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld_d(T3, thread, in_bytes(JavaThread::polling_word_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)) {
++    int table_size = (long)Interpreter::dispatch_table(itos) -
++                     (long)Interpreter::dispatch_table(stos);
++    int table_offset = ((int)state - (int)itos) * table_size;
++
++    // S8 points to the starting address of Interpreter::dispatch_table(itos).
++    // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8.
++    if (table_offset != 0) {
++      if (is_simm(table_offset, 12)) {
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ld_d(T3, T3, table_offset);
++      } else {
++        li(T2, table_offset);
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ldx_d(T3, T2, T3);
++      }
++    } else {
++      alsl_d(T2, Rnext, S8, LogBytesPerWord - 1);
++      ld_d(T3, T2, 0);
++    }
++  } else {
++    li(T3, (long)table);
++    alsl_d(T2, Rnext, T3, LogBytesPerWord - 1);
++    ld_d(T3, T2, 0);
++  }
++  jr(T3);
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    alsl_d(T2, Rnext, T3, LogBytesPerWord - 1);
++    ld_d(T3, T2, 0);
++    jr(T3);
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  ld_bu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  ld_bu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Apply stack watermark barrier.
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
++  // that would normally not be safe to use. Such bad returns into unsafe territory of
++  // the stack, will call InterpreterRuntime::at_unwind.
++  Label slow_path;
++  Label fast_path;
++  safepoint_poll(slow_path, thread, true /* at_return */, false /* acquire */, false /* in_nmethod */);
++  b(fast_path);
++  bind(slow_path);
++  push(state);
++  Label L;
++  address the_pc = pc();
++  bind(L);
++  set_last_Java_frame(thread, SP, FP, L);
++  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), thread);
++  reset_last_Java_frame(true);
++  pop(state);
++  bind(fast_path);
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++  ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_w(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++              InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                InterpreterRuntime::new_illegal_monitor_state_exception));
++    }
++    b(unlocked);
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld_d(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                  InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++    }
++
++    bind(loop);
++    ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++
++    addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    sub_d(AT, TSR, AT);
++    bge(R0, AT, no_reserved_zone_enabling);
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld_d(ret_addr, FP, frame::return_addr_offset * wordSize);
++  ld_d(FP, FP, frame::link_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld_d(scr_reg, lock_reg, obj_offset);
++
++    if (DiagnoseSyncOnValueBasedClasses != 0) {
++      load_klass(tmp_reg, scr_reg);
++      ld_w(tmp_reg, Address(tmp_reg, Klass::access_flags_offset()));
++      li(AT, JVM_ACC_IS_VALUE_BASED_CLASS);
++      andr(AT, AT, tmp_reg);
++      bnez(AT, slow_case);
++    }
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld_d(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    st_d(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++    sub_d(tmp_reg, tmp_reg, SP);
++    li(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    st_d(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bnez(tmp_reg, slow_case);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beqz(tmp_reg, done);
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into tmp_reg
++    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg
++    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beqz(hdr_reg, done);
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++}
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld_d(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld_d(T0, T0, in_bytes(Method::method_data_offset()));
++  addi_d(T0, T0, in_bytes(MethodData::data_offset()));
++  add_d(V0, T0, V0);
++  bind(set_mdp);
++  st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = T5;
++  Register mdp = T6;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld_d(AT, method, in_bytes(Method::const_offset()));
++  add_d(tmp, tmp, AT);
++  addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  st_d(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld_d(AT, data);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    st_d(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld_d(AT, data);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    st_d(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  ld_w(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm(header_bits, 12)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    li(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  st_w(AT, Address(mdp_in, header_offset));
++}
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld_d(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld_d(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, mdp_in, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add_d(AT, reg, mdp_in);
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, AT, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm(constant, 12)) {
++    addi_d(mdp_in, mdp_in, constant);
++  } else {
++    li(AT, constant);
++    add_d(mdp_in, mdp_in, AT);
++  }
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    addi_d(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    add_d(bumped_count, bumped_count, AT);
++    st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bnez(receiver, not_null);
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      b(skip_receiver_profile);
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
++    }
++#endif // INCLUDE_JVMCI
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
++    }
++#endif // INCLUDE_JVMCI
++
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  }
++}
++
++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
++                                        Register reg2, int start_row, Label& done, int total_rows,
++                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                        int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
++
++    // The receiver is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    b(done);
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          b(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
++
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  li(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    b(done);
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      b(profile_continue);
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    li(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mul_d(index, index, reg2);
++    addi_d(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_d(T4, T4, in_bytes(Method::const_offset()));
++  ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addi_d(AT, T4, -T_INT);
++  beq(AT, R0, done);
++
++  // mask integer result to narrower return type.
++  addi_d(AT, T4, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++
++  bind(notBool);
++  addi_d(AT, T4, -T_BYTE);
++  bne(AT, R0, notByte);
++  ext_w_b(result, result);
++  beq(R0, R0, done);
++
++  bind(notByte);
++  addi_d(AT, T4, -T_CHAR);
++  bne(AT, R0, notChar);
++  bstrpick_d(result, result, 15, 0);
++  beq(R0, R0, done);
++
++  bind(notChar);
++  ext_w_h(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1);
++  }
++
++  bnez(obj, update);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++
++  b(next);
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bnez(AT, next);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  beqz(AT, none);
++
++  addi_d(AT, AT, -(TypeEntries::null_seen));
++  beqz(AT, none);
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++  b(next);
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    st_d(obj, mdo_addr);
++  } else {
++    st_d(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm(off_to_args, 12)) {
++        addi_d(mdp, mdp, off_to_args);
++      } else {
++        li(AT, off_to_args);
++        add_d(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) {
++            addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            sub_w(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          blt(tmp, AT, done);
++        }
++        ld_d(tmp, callee, in_bytes(Method::const_offset()));
++
++        ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        sub_d(tmp, tmp, AT);
++
++        addi_w(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld_d(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm(to_add, 12)) {
++          addi_d(mdp, mdp, to_add);
++        } else {
++          li(AT, to_add);
++          add_d(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) {
++          addi_w(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          li(AT, tmp_arg_counts);
++          sub_w(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        slli_w(tmp, tmp, exact_log2(DataLayout::cell_size));
++        add_d(mdp, mdp, tmp);
++      }
++      st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      ld_b(tmp, _bcp_register, 0);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beqz(AT, do_profile);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beqz(AT, do_profile);
++
++      get_method(tmp);
++      ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
++      bne(tmp, AT, profile_continue);
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    add_d(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    blt(tmp1, R0, profile_continue);
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add_d(mdp, mdp, tmp1);
++    ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    alsl_d(AT, tmp1, mdp, per_arg_scale - 1);
++    ld_d(tmp2, AT, off_base);
++
++    sub_d(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    alsl_d(AT, tmp2, _locals_register, Interpreter::logStackElementSize - 1);
++    ld_d(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    blt(R0, tmp1, loop);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    ld_w(scratch, counter_addr);
++  }
++  addi_w(scratch, scratch, increment);
++  st_w(scratch, counter_addr);
++
++  ld_w(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
+new file mode 100644
+index 00000000000..d53d951a160
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_fp_args;
++  unsigned int _num_int_args;
++  int _stack_offset;
++
++  void move(int from_offset, int to_offset);
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
+new file mode 100644
+index 00000000000..85a199e760d
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
+@@ -0,0 +1,282 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++  _num_int_args = (method->is_static() ? 1 : 0);
++  _num_fp_args = 0;
++  _stack_offset = 0;
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ st_d(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ maskeqz(temp(), temp(), AT);
++  __ st_w(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    Register reg = as_Register(++_num_int_args + RA0->encoding());
++    if (_num_int_args == 1) {
++      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++      __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++    } else {
++      __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ maskeqz(reg, AT, reg);
++    }
++  } else {
++    __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ maskeqz(temp(), AT, temp());
++    __ st_d(temp(), to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_int_args;
++  unsigned int _num_fp_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_int_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _int_args = to - (method->is_static() ? 15 : 16);
++    _fp_args =  to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_int_args = (method->is_static() ? 1 : 0);
++    _num_fp_args = 0;
++  }
++};
++
++
++JRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* current,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(current, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++JRT_END
+diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
+new file mode 100644
+index 00000000000..048107c2425
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable() { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
+new file mode 100644
+index 00000000000..5b08280921c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
+@@ -0,0 +1,197 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing membar for LoadLoad barrier, we create address dependency
++// between loads, which is more efficient than membar.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++  Label slow;
++
++  const Register env = A0;
++  const Register obj = A1;
++  const Register fid = A2;
++  const Register tmp1 = AT;
++  const Register tmp2 = T4;
++  const Register obj_addr = T0;
++  const Register field_val = T0;
++  const Register field_addr = T0;
++  const Register counter_addr = T2;
++  const Register counter_prev_val = T1;
++
++  __ li(counter_addr, SafepointSynchronize::safepoint_counter_addr());
++  __ ld_w(counter_prev_val, counter_addr, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(tmp1, counter_prev_val, 1);
++  __ bnez(tmp1, slow);
++
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the fast path.
++    __ li(tmp2, JvmtiExport::get_field_access_count_addr());
++    // address dependency
++    __ XOR(tmp1, counter_prev_val, counter_prev_val);
++    __ ldx_w(tmp1, tmp2, tmp1);
++    __ bnez(tmp1, slow);
++  }
++
++  __ move(obj_addr, obj);
++  // Both obj_addr and tmp2 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, env, obj_addr, tmp2, slow);
++
++  __ srli_d(tmp1, fid, 2); // offset
++  __ add_d(field_addr, obj_addr, tmp1);
++  // address dependency
++  __ XOR(tmp1, counter_prev_val, counter_prev_val);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ ldx_bu (field_val, field_addr, tmp1); break;
++    case T_BYTE:    __ ldx_b  (field_val, field_addr, tmp1); break;
++    case T_CHAR:    __ ldx_hu (field_val, field_addr, tmp1); break;
++    case T_SHORT:   __ ldx_h  (field_val, field_addr, tmp1); break;
++    case T_INT:     __ ldx_w  (field_val, field_addr, tmp1); break;
++    case T_LONG:    __ ldx_d  (field_val, field_addr, tmp1); break;
++    case T_FLOAT:   __ ldx_wu (field_val, field_addr, tmp1); break;
++    case T_DOUBLE:  __ ldx_d  (field_val, field_addr, tmp1); break;
++    default:        ShouldNotReachHere();
++  }
++
++  // address dependency
++  __ XOR(tmp1, field_val, field_val);
++  __ ldx_w(tmp1, counter_addr, tmp1);
++  __ bne(counter_prev_val, tmp1, slow);
++
++  switch (type) {
++    case T_FLOAT:   __ movgr2fr_w(F0, field_val); break;
++    case T_DOUBLE:  __ movgr2fr_d(F0, field_val); break;
++    default:        __ move(V0, field_val);       break;
++  }
++
++  __ jr(RA);
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++
++  __ flush ();
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
+new file mode 100644
+index 00000000000..0ee04e042b5
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) =  (intptr_t)from_handle.raw_value(); }
++  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) =  (intptr_t)from_handle; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+new file mode 100644
+index 00000000000..eb75830ec9c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+@@ -0,0 +1,202 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "jvmci/jvmci.hpp"
++#include "jvmci/jvmciCodeInstaller.hpp"
++#include "jvmci/jvmciRuntime.hpp"
++#include "jvmci/jvmciCompilerToVM.hpp"
++#include "jvmci/jvmciJavaClasses.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCIObject method, JVMCI_TRAPS) {
++  address pc = (address) inst;
++  if (inst->is_int_branch() || inst->is_float_branch()) {
++    return pc_offset + NativeInstruction::nop_instruction_size;
++  } else if (inst->is_call()) {
++    return pc_offset + NativeCall::instruction_size;
++  } else if (inst->is_far_call()) {
++    return pc_offset + NativeFarCall::instruction_size;
++  } else if (inst->is_jump()) {
++    return pc_offset + NativeGeneralJump::instruction_size;
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match LoongArch64TestAssembler.java emitCall
++    // lu12i_w; lu32i_d; jirl
++    return pc_offset + 3 * NativeInstruction::nop_instruction_size;
++  } else {
++    JVMCI_ERROR_0("unsupported type of instruction for call site");
++  }
++  return 0;
++}
++
++void CodeInstaller::pd_patch_OopConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  Handle obj = jvmci_env()->asConstant(constant, JVMCI_CHECK);
++  jobject value = JNIHandles::make_local(obj());
++  if (jvmci_env()->get_HotSpotObjectConstantImpl_compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop<address>(obj())))));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(cast_from_oop<address>(obj())));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec);
++  }
++}
++
++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  if (jvmci_env()->get_HotSpotMetaspaceConstantImpl_compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, JVMCI_CHECK);
++    move->set_data((intptr_t) narrowOop);
++    JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    void* reference = record_metadata_reference(_instructions, pc, constant, JVMCI_CHECK);
++    move->set_data((intptr_t) reference);
++    JVMCI_event_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
++  }
++}
++
++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  NativeInstruction* inst = nativeInstruction_at(pc);
++  if (inst->is_pcaddu12i_add()) {
++    address dest = _constants->start() + data_offset;
++    _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
++    JVMCI_event_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
++  } else {
++    JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
++  }
++}
++
++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) {
++  address pc = (address) inst;
++  if (inst->is_call()) {
++    NativeCall* call = nativeCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_far_call()) {
++    NativeFarCall* call = nativeFarCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_jump()) {
++    NativeGeneralJump* jump = nativeGeneralJump_at(pc);
++    jump->set_jump_destination((address) foreign_call_destination);
++    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match emitCall of LoongArch64TestAssembler.java
++    // lu12i_w; lu32i_d; jirl
++    MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination);
++  } else {
++    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
++  }
++  JVMCI_event_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
++}
++
++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, JVMCIObject hotspot_method, jint pc_offset, JVMCI_TRAPS) {
++#ifdef ASSERT
++  Method* method = NULL;
++  // we need to check, this might also be an unresolved method
++  if (JVMCIENV->isa_HotSpotResolvedJavaMethodImpl(hotspot_method)) {
++    method = JVMCIENV->asMethod(hotspot_method);
++  }
++#endif
++  switch (_next_call_type) {
++    case INLINE_INVOKE:
++      break;
++    case INVOKEVIRTUAL:
++    case INVOKEINTERFACE: {
++      assert(!method->is_static(), "cannot call static method with invokeinterface");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub());
++      break;
++    }
++    case INVOKESTATIC: {
++      assert(method->is_static(), "cannot call non-static method with invokestatic");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub());
++      break;
++    }
++    case INVOKESPECIAL: {
++      assert(!method->is_static(), "cannot call static method with invokespecial");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub());
++      break;
++    }
++    default:
++      JVMCI_ERROR("invalid _next_call_type value");
++      break;
++  }
++}
++
++void CodeInstaller::pd_relocate_poll(address pc, jint mark, JVMCI_TRAPS) {
++  switch (mark) {
++    case POLL_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_FAR:
++      _instructions->relocate(pc, relocInfo::poll_type);
++      break;
++    case POLL_RETURN_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_RETURN_FAR:
++      _instructions->relocate(pc, relocInfo::poll_return_type);
++      break;
++    default:
++      JVMCI_ERROR("invalid mark value");
++      break;
++  }
++}
++
++// convert JVMCI register indices (as used in oop maps) to HotSpot registers
++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) {
++  if (jvmci_reg < RegisterImpl::number_of_registers) {
++    return as_Register(jvmci_reg)->as_VMReg();
++  } else {
++    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
++    if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
++      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
++    }
++    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
++  }
++}
++
++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
++  return !hotspotRegister->is_FloatRegister();
++}
+diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad
+new file mode 100644
+index 00000000000..80dff0c7626
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/loongarch.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
+new file mode 100644
+index 00000000000..f1bb1c2f6cb
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
+@@ -0,0 +1,15678 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0    ( NS,  NS,  Op_RegI,   0, R0->as_VMReg());
++  reg_def R0_H  ( NS,  NS,  Op_RegI,   0, R0->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
++  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
++  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI,   3, SP->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,   4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,   4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,   5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,   5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,   6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,   6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,   7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,   7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,   8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,   8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,   9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,   9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def T4    (SOC, SOC,  Op_RegI,  16, T4->as_VMReg());
++  reg_def T4_H  (SOC, SOC,  Op_RegI,  16, T4->as_VMReg()->next());
++  reg_def T5    (SOC, SOC,  Op_RegI,  17, T5->as_VMReg());
++  reg_def T5_H  (SOC, SOC,  Op_RegI,  17, T5->as_VMReg()->next());
++  reg_def T6    (SOC, SOC,  Op_RegI,  18, T6->as_VMReg());
++  reg_def T6_H  (SOC, SOC,  Op_RegI,  18, T6->as_VMReg()->next());
++  reg_def T7    (SOC, SOC,  Op_RegI,  19, T7->as_VMReg());
++  reg_def T7_H  (SOC, SOC,  Op_RegI,  19, T7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  20, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  20, T8->as_VMReg()->next());
++  reg_def RX    ( NS,  NS,  Op_RegI,  21, RX->as_VMReg());
++  reg_def RX_H  ( NS,  NS,  Op_RegI,  21, RX->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI,  22, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI,  22, FP->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  23, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  23, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  24, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  24, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  25, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  25, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  26, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  26, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  27, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  27, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  28, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  28, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  29, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  29, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  30, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  30, S7->as_VMReg()->next());
++  reg_def S8    (SOC, SOE,  Op_RegI,  31, S8->as_VMReg());
++  reg_def S8_H  (SOC, SOE,  Op_RegI,  31, S8->as_VMReg()->next());
++
++
++// Floating/Vector registers.
++  reg_def F0    ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
++  reg_def F0_H  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
++  reg_def F0_J  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
++  reg_def F0_K  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
++  reg_def F0_L  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
++  reg_def F0_M  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
++  reg_def F0_N  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
++  reg_def F0_O  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
++
++  reg_def F1    ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
++  reg_def F1_H  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
++  reg_def F1_J  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
++  reg_def F1_K  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
++  reg_def F1_L  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
++  reg_def F1_M  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
++  reg_def F1_N  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
++  reg_def F1_O  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
++
++  reg_def F2    ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
++  reg_def F2_H  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
++  reg_def F2_J  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
++  reg_def F2_K  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
++  reg_def F2_L  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
++  reg_def F2_M  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
++  reg_def F2_N  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
++  reg_def F2_O  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
++
++  reg_def F3    ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
++  reg_def F3_H  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
++  reg_def F3_J  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
++  reg_def F3_K  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
++  reg_def F3_L  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
++  reg_def F3_M  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
++  reg_def F3_N  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
++  reg_def F3_O  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
++
++  reg_def F4    ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
++  reg_def F4_H  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
++  reg_def F4_J  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
++  reg_def F4_K  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
++  reg_def F4_L  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
++  reg_def F4_M  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
++  reg_def F4_N  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
++  reg_def F4_O  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
++
++  reg_def F5    ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
++  reg_def F5_H  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
++  reg_def F5_J  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
++  reg_def F5_K  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
++  reg_def F5_L  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
++  reg_def F5_M  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
++  reg_def F5_N  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
++  reg_def F5_O  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
++
++  reg_def F6    ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
++  reg_def F6_H  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
++  reg_def F6_J  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
++  reg_def F6_K  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
++  reg_def F6_L  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
++  reg_def F6_M  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
++  reg_def F6_N  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
++  reg_def F6_O  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
++
++  reg_def F7    ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
++  reg_def F7_H  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
++  reg_def F7_J  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
++  reg_def F7_K  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
++  reg_def F7_L  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
++  reg_def F7_M  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
++  reg_def F7_N  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
++  reg_def F7_O  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
++
++  reg_def F8    ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
++  reg_def F8_H  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
++  reg_def F8_J  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
++  reg_def F8_K  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
++  reg_def F8_L  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
++  reg_def F8_M  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
++  reg_def F8_N  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
++  reg_def F8_O  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
++
++  reg_def F9    ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
++  reg_def F9_H  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
++  reg_def F9_J  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
++  reg_def F9_K  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
++  reg_def F9_L  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
++  reg_def F9_M  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
++  reg_def F9_N  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
++  reg_def F9_O  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
++
++  reg_def F10   ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
++  reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
++  reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
++  reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
++  reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
++  reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
++  reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
++  reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
++
++  reg_def F11   ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
++  reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
++  reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
++  reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
++  reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
++  reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
++  reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
++  reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
++
++  reg_def F12   ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
++  reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
++  reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
++  reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
++  reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
++  reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
++  reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
++  reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
++
++  reg_def F13   ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
++  reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
++  reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
++  reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
++  reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
++  reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
++  reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
++  reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
++
++  reg_def F14   ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
++  reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
++  reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
++  reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
++  reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
++  reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
++  reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
++  reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
++
++  reg_def F15   ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
++  reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
++  reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
++  reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
++  reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
++  reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
++  reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
++  reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
++
++  reg_def F16   ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
++  reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
++  reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
++  reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
++  reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
++  reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
++  reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
++  reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
++
++  reg_def F17   ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
++  reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
++  reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
++  reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
++  reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
++  reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
++  reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
++  reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
++
++  reg_def F18   ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
++  reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
++  reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
++  reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
++  reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
++  reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
++  reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
++  reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
++
++  reg_def F19   ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
++  reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
++  reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
++  reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
++  reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
++  reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
++  reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
++  reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
++
++  reg_def F20   ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
++  reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
++  reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
++  reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
++  reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
++  reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
++  reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
++  reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
++
++  reg_def F21   ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
++  reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
++  reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
++  reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
++  reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
++  reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
++  reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
++  reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
++
++  reg_def F22   ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
++  reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
++  reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
++  reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
++  reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
++  reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
++  reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
++  reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
++
++  reg_def F23   ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
++  reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
++  reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
++  reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
++  reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
++  reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
++  reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
++  reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
++
++  reg_def F24   ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()          );
++  reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next()  );
++  reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
++  reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
++  reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
++  reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
++  reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
++  reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
++
++  reg_def F25   ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()          );
++  reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next()  );
++  reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
++  reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
++  reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
++  reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
++  reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
++  reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
++
++  reg_def F26   ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()          );
++  reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next()  );
++  reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
++  reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
++  reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
++  reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
++  reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
++  reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
++
++  reg_def F27   ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()          );
++  reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next()  );
++  reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
++  reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
++  reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
++  reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
++  reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
++  reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
++
++  reg_def F28   ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()          );
++  reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next()  );
++  reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
++  reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
++  reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
++  reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
++  reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
++  reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
++
++  reg_def F29   ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()          );
++  reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next()  );
++  reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
++  reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
++  reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
++  reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
++  reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
++  reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
++
++  reg_def F30   ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()          );
++  reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next()  );
++  reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
++  reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
++  reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
++  reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
++  reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
++  reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
++
++  reg_def F31   ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()          );
++  reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next()  );
++  reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
++  reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
++  reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
++  reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
++  reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
++  reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T4, T4_H,
++                     T1, T1_H, // inline_cache_reg
++                     T6, T6_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     T5, T5_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     S8, S8_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H, // frame_pointer
++
++                     // non-allocatable registers
++                     T7, T7_H,
++                     TP, TP_H,
++                     RX, RX_H,
++                     R0, R0_H,
++                 );
++
++// F23 is scratch reg
++alloc_class chunk1(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                     F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                     F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                     F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                     F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                     F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                     F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                     F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                     F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                     F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                     F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                     F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                     F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                     F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                     F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                     F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                     F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                     F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                     F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                     F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                     F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                     F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                     F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                     F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                     F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                     F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                     F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O,
++
++                     // non-allocatable registers
++                     F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O,
++                  );
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T4 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t4_reg( T4 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++// TODO: LA
++//reg_class v0_reg( A0 );
++//reg_class v1_reg( A1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( A0, A0_H );
++reg_class v1_long_reg( A1, A1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t4_long_reg( T4, T4_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class all_reg32(
++                     S8,
++                     S7,
++                     S5, /* S5_heapbase */
++                  /* S6, S6 TREG     */
++                     S4,
++                     S3,
++                     S2,
++                     S1,
++                     S0,
++                     T8,
++                  /* T7,  AT */
++                     T6,
++                     T5,
++                  /* T4,  jarl T4 */
++                     T3,
++                     T2,
++                     T1,
++                     T0,
++                     A7,
++                     A6,
++                     A5,
++                     A4,
++                     A3,
++                     A2,
++                     A1,
++                     A0,
++                     FP );
++
++reg_class int_reg %{
++  return _ANY_REG32_mask;
++%}
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
++
++reg_class p_reg %{
++  return _PTR_REG_mask;
++%}
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_Ax_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 T0, T0_H
++               );
++
++reg_class all_reg(
++                    S8, S8_H,
++                    S7, S7_H,
++                 /* S6, S6_H,  S6 TREG     */
++                    S5, S5_H, /* S5_heapbase */
++                    S4, S4_H,
++                    S3, S3_H,
++                    S2, S2_H,
++                    S1, S1_H,
++                    S0, S0_H,
++                    T8, T8_H,
++                 /* T7, T7_H,  AT */
++                    T6, T6_H,
++                    T5, T5_H,
++                 /* T4, T4_H,  jalr  T4    */
++                    T3, T3_H,
++                    T2, T2_H,
++                    T1, T1_H,
++                    T0, T0_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    FP, FP_H
++                  );
++
++
++reg_class long_reg %{
++  return _ANY_REG_mask;
++%}
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F30, F30_H,
++                   F31, F31_H);
++
++// Class for all 128bit vector registers
++reg_class vectorx_reg(  F0, F0_H, F0_J, F0_K,
++                        F1, F1_H, F1_J, F1_K,
++                        F2, F2_H, F2_J, F2_K,
++                        F3, F3_H, F3_J, F3_K,
++                        F4, F4_H, F4_J, F4_K,
++                        F5, F5_H, F5_J, F5_K,
++                        F6, F6_H, F6_J, F6_K,
++                        F7, F7_H, F7_J, F7_K,
++                        F8, F8_H, F8_J, F8_K,
++                        F9, F9_H, F9_J, F9_K,
++                        F10, F10_H, F10_J, F10_K,
++                        F11, F11_H, F11_J, F11_K,
++                        F12, F12_H, F12_J, F12_K,
++                        F13, F13_H, F13_J, F13_K,
++                        F14, F14_H, F14_J, F14_K,
++                        F15, F15_H, F15_J, F15_K,
++                        F16, F16_H, F16_J, F16_K,
++                        F17, F17_H, F17_J, F17_K,
++                        F18, F18_H, F18_J, F18_K,
++                        F19, F19_H, F19_J, F19_K,
++                        F20, F20_H, F20_J, F20_K,
++                        F21, F21_H, F21_J, F21_K,
++                        F22, F22_H, F22_J, F22_K,
++                        F24, F24_H, F24_J, F24_K,
++                        F25, F25_H, F25_J, F25_K,
++                        F26, F26_H, F26_J, F26_K,
++                        F27, F27_H, F27_J, F27_K,
++                        F28, F28_H, F28_J, F28_K,
++                        F29, F29_H, F29_J, F29_K,
++                        F30, F30_H, F30_J, F30_K,
++                        F31, F31_H, F31_J, F31_K);
++
++// Class for all 256bit vector registers
++reg_class vectory_reg(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                        F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                        F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                        F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                        F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                        F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                        F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                        F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                        F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                        F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                        F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                        F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                        F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                        F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                        F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                        F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                        F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                        F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                        F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                        F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                        F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                        F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                        F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                        F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                        F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                        F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                        F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                        F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                        F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                        F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                        F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++
++// TODO: LA
++//reg_class flt_arg0( F0 );
++//reg_class dbl_arg0( F0, F0_H );
++//reg_class dbl_arg1( F1, F1_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "opto/addnode.hpp"
++#include "opto/convertnode.hpp"
++#include "runtime/objectMonitor.hpp"
++
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++inline uint vector_length(const Node* n) {
++  const TypeVect* vt = n->bottom_type()->is_vect();
++  return vt->length();
++}
++
++inline uint vector_length(const MachNode* use, MachOper* opnd) {
++  uint def_idx = use->operand_index(opnd);
++  Node* def = use->in(def_idx);
++  return def->bottom_type()->is_vect()->length();
++}
++
++inline uint vector_length_in_bytes(const Node* n) {
++  const TypeVect* vt = n->bottom_type()->is_vect();
++  return vt->length_in_bytes();
++}
++
++inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
++  uint def_idx = use->operand_index(opnd);
++  Node* def = use->in(def_idx);
++  return def->bottom_type()->is_vect()->length_in_bytes();
++}
++
++inline BasicType vector_element_basic_type(const Node *n) {
++  return n->bottom_type()->is_vect()->element_basic_type();
++}
++
++inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) {
++  uint def_idx = use->operand_index(opnd);
++  Node* def = use->in(def_idx);
++  return def->bottom_type()->is_vect()->element_basic_type();
++}
++
++class Node::PD {
++public:
++  enum NodeFlags {
++    _last_flag = Node::_last_flag
++  };
++};
++
++bool is_CAS(int opcode);
++bool use_AMO(int opcode);
++
++bool unnecessary_acquire(const Node *barrier);
++bool unnecessary_release(const Node *barrier);
++bool unnecessary_volatile(const Node *barrier);
++bool needs_releasing_store(const Node *store);
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++#define V0_num    A0_num
++#define V0_H_num  A0_H_num
++
++#define __ _masm.
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++
++void reg_mask_init() {
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask = _ALL_REG_mask;
++
++  if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) {
++    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg()));
++    _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++    _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++  }
++
++  // FP(r22) is not allocatable when PreserveFramePointer is on
++  if (PreserveFramePointer) {
++    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r22->as_VMReg()));
++    _ANY_REG_mask.SUBTRACT(_FP_REG_mask);
++    _PTR_REG_mask.SUBTRACT(_FP_REG_mask);
++  }
++}
++
++void PhaseOutput::pd_perform_mach_node_analysis() {
++}
++
++int MachNode::pd_alignment_required() const {
++  return 1;
++}
++
++int MachNode::compute_padding(int current_offset) const {
++  return 0;
++}
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    case Op_RoundDoubleMode:
++      if (!UseLSX)
++        return false;
++    case Op_PopCountI:
++    case Op_PopCountL:
++      return UsePopCountInstruction;
++    default:
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
++  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen))
++    return false;
++
++  switch (opcode) {
++    case Op_RotateRightV:
++    case Op_RotateLeftV:
++      if (bt != T_INT && bt != T_LONG) {
++        return false;
++      }
++      break;
++    case Op_MaxReductionV:
++    case Op_MinReductionV:
++      if (bt == T_FLOAT || bt == T_DOUBLE) {
++        return false;
++      }
++      break;
++    case Op_VectorCastB2X:
++    case Op_VectorLoadMask:
++    case Op_VectorStoreMask:
++    case Op_VectorLoadShuffle:
++    case Op_VectorRearrange:
++      if (vlen < 16)
++        return false;
++      break;
++    case Op_VectorCastS2X:
++      if (vlen < 8)
++        return false;
++      break;
++    case Op_VectorCastI2X:
++    case Op_VectorCastF2X:
++      if (vlen < 4)
++        return false;
++      break;
++    default:
++      break;
++  }
++
++  return true;
++}
++
++// Vector calling convention not yet implemented.
++const bool Matcher::supports_vector_calling_convention(void) {
++  return false;
++}
++
++OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
++  Unimplemented();
++  return OptoRegPair(0, 0);
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  const int safety_zone = 3 * BytesPerInstWord;
++  int offs = offset - br_size + 4;
++  // To be conservative on LoongArch
++  // branch node should be end with:
++  //   branch inst
++  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
++  switch (rule) {
++    case jmpDir_long_rule:
++    case jmpDir_short_rule:
++      return Assembler::is_simm(offs, 26);
++    case jmpCon_flags_long_rule:
++    case jmpCon_flags_short_rule:
++    case branchConP_0_long_rule:
++    case branchConP_0_short_rule:
++    case branchConN2P_0_long_rule:
++    case branchConN2P_0_short_rule:
++    case cmpN_null_branch_long_rule:
++    case cmpN_null_branch_short_rule:
++    case branchConF_reg_reg_long_rule:
++    case branchConF_reg_reg_short_rule:
++    case branchConD_reg_reg_long_rule:
++    case branchConD_reg_reg_short_rule:
++      return Assembler::is_simm(offs, 21);
++    default:
++      return Assembler::is_simm(offs, 16);
++  }
++  return false;
++}
++
++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return NULL;
++}
++
++bool Matcher::is_generic_reg2reg_move(MachNode* m) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return false;
++}
++
++bool Matcher::is_generic_vector(MachOper* opnd)  {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return false;
++}
++
++const RegMask* Matcher::predicate_reg_mask(void) {
++  return NULL;
++}
++
++const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
++  return NULL;
++}
++
++const int Matcher::scalable_vector_reg_size(const BasicType bt) {
++  return -1;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Should the matcher clone input 'm' of node 'n'?
++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
++  return false;
++}
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return (int)MaxVectorSize;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  int max_size = max_vector_size(bt);
++  int size     = 0;
++
++  if (UseLSX) size = 16;
++  size = size / type2aelembytes(bt);
++  return MIN2(size,max_size);
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F0_num || reg == F0_H_num
++       || reg == F1_num || reg == F1_H_num
++       || reg == F2_num || reg == F2_H_num
++       || reg == F3_num || reg == F3_H_num
++       || reg == F4_num || reg == F4_H_num
++       || reg == F5_num || reg == F5_H_num
++       || reg == F6_num || reg == F6_H_num
++       || reg == F7_num || reg == F7_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  // bl
++  return NativeCall::instruction_size;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  // lu12i_w IC_Klass,
++  // ori IC_Klass,
++  // lu32i_d IC_Klass
++  // lu52i_d IC_Klass
++
++  // bl
++  return NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++// Helper methods for MachSpillCopyNode::implementation().
++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    switch (ireg) {
++      case Op_VecX:
++        __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      case Op_VecY:
++        __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      case Op_VecY:
++        st->print("xvori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
++                                      int dst_offset, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    switch (ireg) {
++      case Op_VecX:
++        __ vld(F23, SP, src_offset);
++        __ vst(F23, SP, dst_offset);
++        break;
++      case Op_VecY:
++        __ xvld(F23, SP, src_offset);
++        __ xvst(F23, SP, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vld f23, %d(sp)\n\t"
++                  "vst f23, %d(sp)\t# 128-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      case Op_VecY:
++        st->print("xvld f23, %d(sp)\n\t"
++                  "xvst f23, %d(sp)\t# 256-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 8;
++  return size;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
++    if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
++      // mem -> mem
++      int src_offset = ra_->reg2offset(src_first);
++      int dst_offset = ra_->reg2offset(dst_first);
++      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_float) {
++      vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
++      int stack_offset = ra_->reg2offset(dst_first);
++      vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
++    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) {
++      int stack_offset = ra_->reg2offset(src_first);
++      vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
++    } else {
++      ShouldNotReachHere();
++    }
++    return 0;
++  }
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ld_d(AT, Address(SP, src_offset));
++          __ st_d(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "st_d    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ld_w(AT, Address(SP, src_offset));
++          __ st_w(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_w    AT, [SP + #%d] spill 2\n\t"
++                    "st_w    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else {
++            if (Assembler::is_simm(offset, 12)) {
++              __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++            } else {
++              __ li(AT, offset);
++              __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT);
++            }
++          }
++#ifndef PRODUCT
++        } else {
++          if (this->ideal_reg() == Op_RegI)
++            st->print("\tld_w    %s, [SP + #%d]\t# spill 4",
++                      Matcher::regName[dst_first],
++                      offset);
++          else
++            st->print("\tld_wu    %s, [SP + #%d]\t# spill 5",
++                      Matcher::regName[dst_first],
++                      offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (Assembler::is_simm(offset, 12)) {
++            __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          } else {
++            __ li(AT, offset);
++            __ fldx_d( as_FloatRegister(Matcher::_regEncode[dst_first]), SP, AT);
++          }
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_d  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_s   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++    }
++    return 0;
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_d    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_w    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\tmove(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_d   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_w   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (Assembler::is_simm(offset, 12)) {
++            __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++          }
++          else {
++            __ li(AT, offset);
++            __ fstx_d( as_FloatRegister(Matcher::_regEncode[src_first]), SP, AT);
++          }
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_d   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_s   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_d   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_s   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->output()->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("ld_d    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++  st->print("\t");
++  st->print_cr("ld_d    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  st->print("\t");
++  if (Assembler::is_simm(framesize, 12)) {
++    st->print_cr("addi_d   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  } else {
++    st->print_cr("li   AT, %d # Rlease stack @ MachEpilogNode", framesize);
++    st->print_cr("add_d   SP, SP, AT # Rlease stack @ MachEpilogNode");
++  }
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    st->print_cr("ld_d    AT, poll_offset[thread] #polling_word_offset\n\t"
++                 "ld_w    AT, [AT]\t"
++                 "# Safepoint: poll for GC");
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
++  int framesize = C->output()->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  __ remove_frame(framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    Label dummy_label;
++    Label* code_stub = &dummy_label;
++    if (!C->output()->in_scratch_emit_size()) {
++      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
++    }
++    __ relocate(relocInfo::poll_return_type);
++    __ safepoint_poll(*code_stub, thread, true /* at_return */, false /* acquire */, true /* in_nmethod */);
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI_D %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++
++   if (Assembler::is_simm(offset, 12))
++     return 4;
++   else
++     return 3 * 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  if (Assembler::is_simm(offset, 12)) {
++    __ addi_d(as_Register(reg), SP, offset);
++  } else {
++    __ lu12i_w(AT, Assembler::split_low20(offset >> 12));
++    __ ori(AT, AT, Assembler::split_low12(offset));
++    __ add_d(as_Register(reg), SP, AT);
++  }
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  // pcaddu18i
++  // jirl
++  return NativeFarCall::instruction_size;
++}
++
++int MachCallNativeNode::ret_addr_offset() {
++  Unimplemented();
++  return -1;
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T4, T0)");
++  st->print_cr("\tbeq(T4, iCache, L)");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T4, receiver);
++  __ beq(T4, iCache, L);
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  ConstantTable& constant_table = C->output()->constant_table();
++  C2_MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = cbuf.consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS,
++           "insts must be immediately follow consts");
++    // Materialize the constant table base.
++    address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset());
++    jint offs = (baseaddr - __ pc()) >> 2;
++    guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset");
++    __ pcaddi(Rtoc, offs);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // pcaddi
++  return 1 * BytesPerInstWord;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("pcaddi    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->output()->frame_size_in_bytes();
++  int bangsize = C->output()->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->output()->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  st->print("st_d       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++  st->print("st_d       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  if (PreserveFramePointer) {
++    if (Assembler::is_simm((framesize - wordSize * 2), 12)) {
++      st->print("addi_d   FP, SP, %d \n\t", framesize);
++    } else {
++      st->print("li   AT, %d \n\t", framesize);
++      st->print("add_d   FP, AT \n\t");
++    }
++  }
++  st->print("addi_d   SP, SP, -%d \t",framesize);
++  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
++    st->print("\n\t");
++    st->print("ld_d  T1, guard, 0\n\t");
++    st->print("membar  LoadLoad\n\t");
++    st->print("ld_d  T2, TREG, thread_disarmed_offset\n\t");
++    st->print("beq   T1, T2, skip\n\t");
++    st->print("\n\t");
++    st->print("jalr  #nmethod_entry_barrier_stub\n\t");
++    st->print("b     skip\n\t");
++    st->print("guard: int\n\t");
++    st->print("\n\t");
++    st->print("skip:\n\t");
++  }
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
++
++  int framesize = C->output()->frame_size_in_bytes();
++  int bangsize = C->output()->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++#ifdef ASSERT
++  address start = __ pc();
++#endif
++
++  if (C->clinit_barrier_on_entry()) {
++    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
++
++    Label L_skip_barrier;
++
++    __ mov_metadata(T4, C->method()->holder()->constant_encoding());
++    __ clinit_barrier(T4, AT, &L_skip_barrier);
++    __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++    __ bind(L_skip_barrier);
++  }
++
++  if (C->output()->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ build_frame(framesize);
++
++  assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry");
++
++  if (C->stub_function() == NULL) {
++    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->nmethod_entry_barrier(&_masm);
++  }
++
++  C->output()->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    ConstantTable& constant_table = C->output()->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++bool is_CAS(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_CompareAndSwapI:
++  case Op_CompareAndSwapL:
++  case Op_CompareAndSwapP:
++  case Op_CompareAndSwapN:
++  case Op_ShenandoahCompareAndSwapP:
++  case Op_ShenandoahCompareAndSwapN:
++  case Op_ShenandoahWeakCompareAndSwapP:
++  case Op_ShenandoahWeakCompareAndSwapN:
++  case Op_ShenandoahCompareAndExchangeP:
++  case Op_ShenandoahCompareAndExchangeN:
++  case Op_GetAndSetI:
++  case Op_GetAndSetL:
++  case Op_GetAndSetP:
++  case Op_GetAndSetN:
++  case Op_GetAndAddI:
++  case Op_GetAndAddL:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool use_AMO(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_StoreI:
++  case Op_StoreL:
++  case Op_StoreP:
++  case Op_StoreN:
++  case Op_StoreNKlass:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool unnecessary_acquire(const Node *barrier)
++{
++  assert(barrier->is_MemBar(), "expecting a membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode* mb = barrier->as_MemBar();
++
++  if (mb->trailing_load_store()) {
++    Node* load_store = mb->in(MemBarNode::Precedent);
++    assert(load_store->is_LoadStore(), "unexpected graph shape");
++    return is_CAS(load_store->Opcode());
++  }
++
++  return false;
++}
++
++bool unnecessary_release(const Node *n)
++{
++  assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *barrier = n->as_MemBar();
++
++  if (!barrier->leading()) {
++    return false;
++  } else {
++    Node* trailing = barrier->trailing_membar();
++    MemBarNode* trailing_mb = trailing->as_MemBar();
++    assert(trailing_mb->trailing(), "Not a trailing membar?");
++    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
++
++    Node* mem = trailing_mb->in(MemBarNode::Precedent);
++    if (mem->is_Store()) {
++      assert(mem->as_Store()->is_release(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
++      return use_AMO(mem->Opcode());
++    } else {
++      assert(mem->is_LoadStore(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
++      return is_CAS(mem->Opcode());
++    }
++  }
++
++  return false;
++}
++
++bool unnecessary_volatile(const Node *n)
++{
++  // assert n->is_MemBar();
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *mbvol = n->as_MemBar();
++
++  bool release = false;
++  if (mbvol->trailing_store()) {
++    Node* mem = mbvol->in(MemBarNode::Precedent);
++    release = use_AMO(mem->Opcode());
++  }
++
++  assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
++#ifdef ASSERT
++  if (release) {
++    Node* leading = mbvol->leading_membar();
++    assert(leading->Opcode() == Op_MemBarRelease, "");
++    assert(leading->as_MemBar()->leading_store(), "");
++    assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
++   }
++#endif
++
++  return release;
++}
++
++bool needs_releasing_store(const Node *n)
++{
++  // assert n->is_Store();
++  if (UseBarriersForVolatile) {
++    // we use a normal store and dbar combination
++    return false;
++  }
++
++  StoreNode *st = n->as_Store();
++
++  return st->trailing_membar() != NULL;
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    C2_MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    C2_MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    C2_MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T4;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    C2_MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++
++    __ bind(miss);
++    __ li(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------LOONGARCH FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++
++operand vecX() %{
++  constraint(ALLOC_IN_RC(vectorx_reg));
++  match(VecX);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand vecY() %{
++  constraint(ALLOC_IN_RC(vectory_reg));
++  match(VecY);
++
++   format %{ %}
++   interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU1() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 1));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU2() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU3() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 7));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU4() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 15));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU5() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 31));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU6() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 63));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU8() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI10() %{
++  predicate((-512 <= n->get_int()) && (n->get_int() <= 511));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI12() %{
++  predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_2() %{
++  predicate(n->get_int() == 2);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M2047_2048() %{
++  predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_4095() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 4095);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1_4() %{
++  predicate(1 <= n->get_int() && (n->get_int() <= 4));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M128_255() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immLU5() %{
++  predicate((0 <= n->get_long()) && (n->get_long() <= 31));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL10() %{
++  predicate((-512 <= n->get_long()) && (n->get_long() <= 511));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL12() %{
++  predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32()
++%{
++  predicate(n->get_long() == (int)n->get_long());
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M2047_2048() %{
++  predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_4095() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 4095);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr());
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT4RegI() %{
++  constraint(ALLOC_IN_RC(t4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T4" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegP() %{
++  constraint(ALLOC_IN_RC(no_Ax_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegI2L(mRegI reg) %{
++  match(ConvI2L reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL2I(mRegL reg) %{
++  match(ConvL2I reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12(mRegP reg, immL12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset12I2L(mRegP reg, immI12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (ConvI2L off));
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indIndexI2L(mRegP reg, mRegI ireg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg (ConvI2L ireg));
++  op_cost(10);
++  format %{ "[$reg + $ireg] @ indIndexI2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($ireg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12Narrow(mRegN reg, immL12 off)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOp.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++operand cmpOpEqNe() %{
++  match(Bool);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne
++            || n->as_Bool()->_test._test == BoolTest::eq);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L,
++                indirectNarrow, indOffset12Narrow);
++opclass memory_loadRange(indOffset12, indirect);
++
++opclass mRegLorI2L(mRegI2L, mRegL);
++opclass mRegIorL2I( mRegI, mRegL2I);
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_bu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++    ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "ld_h  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_hu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld_d    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld_d    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(200);
++  format %{ "st_d    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_reg_volatile(indirect mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(205);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeL_reg\n" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(180);
++  format %{ "st_d    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(185);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeL_immL_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# compressed ptr @ loadN" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# @ loadN2P" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++  predicate(n->as_Load()->barrier_data() == 0);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ block_comment("loadP");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ li(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL" %}
++  ins_encode %{
++    __ li($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory_loadRange mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_d    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP_volatile(indirect mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeP" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_0" %}
++    ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeImmP_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeN_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # compressed ptr @ storeN" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2N" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2NKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    AT, R0, $mem # compressed ptr" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "st_b    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(120);
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  ins_cost(125);
++  format %{ "amswap_db_w    AT, R0, $mem #@storeI_immI_0" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_w    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem #@storeI" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if ($src->constant_reloc() == relocInfo::metadata_type){
++      __ mov_metadata(dst, (Metadata*)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      __ movoop(dst, (jobject)value, /*immediate*/true);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++      __ li(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    if ($src->constant_reloc() == relocInfo::metadata_type) {
++      __ mov_metadata($dst$$Register, (Metadata*)$src$$constant);
++    } else {
++      __ li($dst$$Register, $src$$constant);
++    }
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ add_d(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_ptr) %{
++  match(TailCall jump_target method_ptr);
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    ptr = $method_ptr$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, ptr);
++    __ jr(target);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in LA");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_loongarch.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T4 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T4
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label*   target = $labl$$label;
++    int      flag = $cmp$$cmpcode;
++    long      val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bnez($cr$$Register, L);
++        else
++          __ bnez($cr$$Register, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beqz($cr$$Register, L);
++        else
++          __ beqz($cr$$Register, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++    long    val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(4);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct unnecessary_membar_acquire() %{
++  predicate(unnecessary_acquire(n));
++  match(MemBarAcquire);
++  ins_cost(0);
++
++  format %{ "membar_acquire (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_acquire (elided)");
++  %}
++
++  ins_pipe(empty);
++%}
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_release() %{
++  predicate(unnecessary_release(n));
++  match(MemBarRelease);
++  ins_cost(0);
++
++  format %{ "membar_release (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_release (elided)");
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  predicate(unnecessary_volatile(n));
++  match(MemBarVolatile);
++  ins_cost(0);
++
++  format %{ "membar_volatile (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_volatile (elided)");
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ membar(__ StoreLoad);
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++  match(StoreStoreFence);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_s(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_w(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ movgr2fr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_dst_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    Label L;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    // Use signed comparison here, because the most significant bit of the
++    // user-space virtual address must be 0.
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_zero(mRegI dst, mRegL src1, immL_0 zero) %{
++  match(Set dst (CmpL3 src1 zero));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, zero @ cmpL3_reg_zero" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register dst  = as_Register($dst$$reg);
++    __ slt(AT, opr1, R0);
++    __ slt(dst, R0, opr1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_s(FCC0, src2, src1);
++    __ fcmp_cult_s(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_d(FCC0, src2, src1);
++    __ fcmp_cult_d(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register cnt  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(cnt, R0, done);
++
++    __ bind(Loop);
++    __ st_d(R0, base, 0);
++    __ addi_d(cnt, cnt, -1);
++    __ addi_d(base, base, wordSize);
++    __ bne(cnt, R0, Loop);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    long     cnt  = $cnt$$constant;
++    Label Loop, done;
++
++    int tmp = cnt % 8;
++    int i = 0;
++    for (; i < tmp; i++) {
++      __ st_d(R0, base, i * 8);
++    }
++    if (cnt - tmp) {
++      __ li(AT, cnt);
++      __ alsl_d(AT, AT, base, 2);
++      __ addi_d(base, base, i * 8);
++      __ bind(Loop);
++      __ st_d(R0, base,  0);
++      __ st_d(R0, base,  8);
++      __ st_d(R0, base, 16);
++      __ st_d(R0, base, 24);
++      __ st_d(R0, base, 32);
++      __ st_d(R0, base, 40);
++      __ st_d(R0, base, 48);
++      __ st_d(R0, base, 56);
++      __ addi_d(base, base, 64);
++      __ blt(base, AT, Loop);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL, $tmp1$$Register, $tmp2$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU, $tmp1$$Register, $tmp2$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU, $tmp1$$Register, $tmp2$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1 tmp2:$tmp2 -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL, $tmp1$$Register, $tmp2$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_indexofUU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2,
++                          mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $result$$Register, StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexofLL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2,
++                          mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $result$$Register, StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexofUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2,
++                          mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
++
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $result$$Register, StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conUU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1_4 int_cnt2,
++                              mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
++
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, noreg,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conLL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1_4 int_cnt2,
++                              mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, noreg,
++                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1 int_cnt2,
++                              mT8RegI result)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, noreg,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{
++  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "StringUTF16 IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %}
++
++  ins_encode %{
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct stringL_indexof_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{
++  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "StringLatin1 IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ stringL_indexof_char" %}
++
++  ins_encode %{
++    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                            $tmp3$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{
++  match(Set result (HasNegatives ary1 len));
++  effect(USE_KILL ary1, USE_KILL len);
++  format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %}
++
++  ins_encode %{
++    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// fast char[] to byte[] compression
++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result,
++                         mRegL tmp1, mRegL tmp2, mRegL tmp3)
++%{
++  match(Set result (StrCompressedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "String Compress $src,$dst -> $result @ string_compress " %}
++  ins_encode %{
++    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
++                           $result$$Register, $tmp1$$Register,
++                           $tmp2$$Register, $tmp3$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// byte[] to char[] inflation
++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len,
++                        mRegL tmp1, mRegL tmp2)
++%{
++  match(Set dummy (StrInflatedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2);
++
++  format %{ "String Inflate $src,$dst @ string_inflate " %}
++  ins_encode %{
++    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
++                          $tmp1$$Register, $tmp2$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2);
++
++  format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// encode char[] to byte[] in ISO_8859_1
++instruct encode_iso_array(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result,
++                          mRegL tmp1, mRegL tmp2, mRegL tmp3)
++%{
++  predicate(!((EncodeISOArrayNode*)n)->is_ascii());
++  match(Set result (EncodeISOArray src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "Encode ISO array $src,$dst,$len -> $result @ encode_iso_array" %}
++  ins_encode %{
++    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
++                        $result$$Register, $tmp1$$Register,
++                        $tmp2$$Register, $tmp3$$Register, false);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// encode char[] to byte[] in ASCII
++instruct encode_ascii_array(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result,
++                            mRegL tmp1, mRegL tmp2, mRegL tmp3)
++%{
++  predicate(((EncodeISOArrayNode*)n)->is_ascii());
++  match(Set result (EncodeISOArray src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "Encode ASCII array $src,$dst,$len -> $result @ encode_ascii_array" %}
++  ins_encode %{
++    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
++                        $result$$Register, $tmp1$$Register,
++                        $tmp2$$Register, $tmp3$$Register, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1,  immI12 src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm12" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    __ addi_w(dst, src1, imm);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddI src1 (LShiftI src2 shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_w(dst, src2, src1, sh - 1);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{
++  match(Set dst (AddP src1 (AndL src2 M8)));
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_M8" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bstrins_d(src2, R0, 2, 0);
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
++  ins_encode %{
++    Register src1 = $src1$$Register;
++    long     src2 = $src2$$constant;
++    Register  dst = $dst$$Register;
++
++    __ addi_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_d(dst, src2, src1, sh - 1);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ add_d(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ addi_d(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_w(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ sub_w(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegLorI2L src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_d(AT, src, 63);
++    __ xorr(dst, src, AT);
++    __ sub_d(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ sub_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1,  immI_M2047_2048 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_w(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegIorL2I src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_w(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegLorI2L src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_d(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1,  immL_M2047_2048 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_d(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ mod_w(dst, src1, src2);
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mod_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    __ div_w(dst, src1, src2);
++
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mul_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulHiL src1 src2));
++  format %{ "mulHiL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mulh_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ div_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "fmadd_s  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "fmadd_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "fmsub_s $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "fmsub_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++instruct copySignF_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (CopySignF src1 src2));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_s  $dst $src1 $src2 @ copySignF_reg" %}
++
++  ins_encode %{
++    __ fcopysign_s($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{
++  match(Set dst (CopySignD src1 (Binary src2 zero)));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_d  $dst $src1 $src2 @ copySignD_reg" %}
++
++  ins_encode %{
++    __ fcopysign_d($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ bstrpick_d(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ orn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 6, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 5-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ slli_w(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_h(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_b(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sll_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long 6-bit immI
++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ slli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sll_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long 6-bit
++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sra_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ srl_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "bstrpick_d    $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotri_w     $dst, $src, 1 ...\n\t"
++            "srli_w      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotri_w(dst, src, 1);
++    if (rshift - 1) {
++      __ srli_w(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 5-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRLI_W    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srli_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "bstrpick_w    $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, pos+size-1, pos);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Rotate Shift Left
++instruct rolI_reg(mRegI dst, mRegI src, mRegI shift)
++%{
++  match(Set dst (RotateLeft src shift));
++
++  format %{ "rotl_w    $dst, $src, $shift @ rolI_reg" %}
++
++  ins_encode %{
++     __ sub_w(AT, R0, $shift$$Register);
++     __ rotr_w($dst$$Register, $src$$Register, AT);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_reg(mRegL dst, mRegL src, mRegI shift)
++%{
++  match(Set dst (RotateLeft src shift));
++
++  format %{ "rotl_d    $dst, $src, $shift @ rolL_reg" %}
++
++  ins_encode %{
++     __ sub_d(AT, R0, $shift$$Register);
++     __ rotr_d($dst$$Register, $src$$Register, AT);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Rotate Shift Right
++instruct rorI_imm(mRegI dst, mRegI src, immI shift)
++%{
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rotri_w    $dst, $src, $shift @ rorI_imm" %}
++
++  ins_encode %{
++     __ rotri_w($dst$$Register, $src$$Register, $shift$$constant/* & 0x1f*/);
++  %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct rorI_reg(mRegI dst, mRegI src, mRegI shift)
++%{
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rotr_w    $dst, $src, $shift @ rorI_reg" %}
++
++  ins_encode %{
++     __ rotr_w($dst$$Register, $src$$Register, $shift$$Register);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_imm(mRegL dst, mRegL src, immI shift)
++%{
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rotri_d    $dst, $src, $shift @ rorL_imm" %}
++
++  ins_encode %{
++    __ rotri_d($dst$$Register, $src$$Register, $shift$$constant/* & 0x3f*/);
++  %}
++
++  ins_pipe( ialu_regL_imm16 );
++%}
++
++instruct rorL_reg(mRegL dst, mRegL src, mRegI shift)
++%{
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rotr_d    $dst, $src, $shift @ rorL_reg" %}
++
++  ins_encode %{
++    __ rotr_d($dst$$Register, $src$$Register, $shift$$Register);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL_W    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srl_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRAI_W    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ srai_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA_W    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ sra_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLLI_W    $dst, $src @ convI2L_reg\t"  %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ slli_w(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ slli_w(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++// Convert double to int.
++// If the double is NaN, stuff a zero in instead.
++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2i    $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2l    $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Convert float to int.
++// If the float is NaN, stuff a zero in instead.
++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2i    $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2l    $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, src);
++    __ ffint_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct roundD(regD dst, regD src, immI rmode) %{
++  predicate(UseLSX);
++  match(Set dst (RoundDoubleMode src rmode));
++  format %{ "frint    $dst, $src, $rmode\t# @roundD" %}
++  ins_encode %{
++    switch ($rmode$$constant) {
++      case RoundDoubleModeNode::rmode_rint:  __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case RoundDoubleModeNode::rmode_floor: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case RoundDoubleModeNode::rmode_ceil:  __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "srai_w    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ srai_w(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ sub_d(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ movgr2fr_w(dst ,src);
++    __ ffint_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++instruct castLL(mRegL dst)
++%{
++  match(Set dst (CastLL dst));
++
++  size(0);
++  format %{ "# castLL of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(empty);
++%}
++
++instruct castFF(regF dst) %{
++  match(Set dst (CastFF dst));
++  size(0);
++  format %{ "# castFF of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++instruct castDD(regD dst) %{
++  match(Set dst (CastDD dst));
++  size(0);
++  format %{ "# castDD of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++instruct castVVX(vecX dst) %{
++  match(Set dst (CastVV dst));
++  size(0);
++  format %{ "# castVV of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++instruct castVVY(vecY dst) %{
++  match(Set dst (CastVV dst));
++  size(0);
++  format %{ "# castVV of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_loongarch.cpp] generate_forward_exception()
++    //     [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAlloc(memory mem) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if (index != -1) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, AT, disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, AT, T4);
++        __ preld(8, AT, 0);
++      }
++    } else {
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, as_Register(base), disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, as_Register(base), T4);
++        __ preld(8, AT, 0);
++      }
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_s  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_s($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_d  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_d($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !");
++    __ ld_w($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_w    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !");
++    __ st_w($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_d    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_d    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
++    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
++    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
++    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_d    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !");
++    __ fst_d($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "StoreCM MEMBAR loadstore\n\t"
++            "st_b   $mem, zero\t! CMS card-mark imm0" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "stop; #@ShouldNotReachHere" %}
++  ins_encode %{
++    if (is_reachable()) {
++      __ stop(_halt_reason);
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem)
++%{
++  predicate(CompressedOops::shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ addi_d(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale)
++%{
++  match(Set dst (AddP reg (LShiftL lreg scale)));
++
++  ins_cost(110);
++  format %{ "leaq    $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = $reg$$Register;
++    Register  index = $lreg$$Register;
++    int       scale = $scale$$constant;
++
++    if (scale == 0) {
++       __ add_d($dst$$Register, $reg$$Register, index);
++    } else {
++       __ alsl_d(dst, index, base, scale - 1);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != -1) {
++      __ stop("in storePConditional: index != -1");
++    } else {
++      __ move(AT, newval);
++      __ sc_d(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != -1) {
++      __ stop("in storeIConditional: index != -1");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != -1) {
++      __ stop("in storeIConditional: index != -1");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "ll_d    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{
++  match(Set prev (GetAndSetI mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_w(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_w(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{
++  match(Set prev (GetAndSetL mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{
++  match(Set prev (GetAndSetN mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    __ amswap_db_w(AT, newv, addr);
++    __ bstrpick_d(prev, AT, 31, 0);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{
++  match(Set prev (GetAndSetP mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{
++  match(Set newval (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_d $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_d(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_d(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_d [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{
++  match(Set newval (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_w $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_w(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_w(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_w [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{
++
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeI"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeL(mRegL res, indirect mem, mRegL oldval, mRegL newval) %{
++
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeL"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeP(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeP"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeN(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{
++
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeN"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{
++
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapI"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapL(mRegI res, indirect mem, mRegL oldval, mRegL newval) %{
++
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @WeakCompareAndSwapL"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapP(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(MEMORY_REF_COST);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false /* retold */, false /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false /* retold */, false /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapP_acq(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapN(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapN"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//----------Max and Min--------------------------------------------------------
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ masknez(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ bstrins_d(dst, src2, 63, 32);
++    } else if (src2 == dst) {
++       __ slli_d(dst, dst, 32);
++       __ bstrins_d(dst, src1, 31, 0);
++    } else {
++       __ bstrpick_d(dst, src1, 31, 0);
++       __ bstrins_d(dst, src2, 63, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "ld_w AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ ld_w(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit ld_w AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//----------BSWAP Instructions-------------------------------------------------
++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesI src));
++
++  format %{ "RevB_I  $dst, $src" %}
++  ins_encode %{
++    __ bswap_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_long(mRegL dst, mRegL src) %{
++  match(Set dst (ReverseBytesL src));
++
++  format %{ "RevB_L  $dst, $src" %}
++  ins_encode %{
++    __ revb_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesUS src));
++
++  format %{ "RevB_US  $dst, $src" %}
++  ins_encode %{
++    __ bswap_hu($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesS src));
++
++  format %{ "RevB_S  $dst, $src" %}
++  ins_encode %{
++    __ bswap_h($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz_w  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "clz_d  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ clz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz_w    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    __ ctz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "ctz_d    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ ctz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// --------------- Population Count Instructions ------------------------------
++//
++instruct popCountI(mRegI dst, mRegIorL2I src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI src));
++
++  format %{ "vinsgr2vr_w    fscratch, $src, 0\n\t"
++            "vpcnt_w        fscratch, fscratch\n\t"
++            "vpickve2gr_wu  $dst, fscratch, 0\n\t# @popCountI" %}
++
++  ins_encode %{
++    __ vinsgr2vr_w(fscratch, $src$$Register, 0);
++    __ vpcnt_w(fscratch, fscratch);
++    __ vpickve2gr_wu($dst$$Register, fscratch, 0);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct popCountI_mem(mRegI dst, memory mem) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI (LoadI mem)));
++
++  format %{ "fld_s          fscratch, $mem, 0\n\t"
++            "vpcnt_w        fscratch, fscratch\n\t"
++            "vpickve2gr_wu  $dst, fscratch, 0\n\t# @popCountI_mem" %}
++
++  ins_encode %{
++    __ loadstore_enc(fscratch, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT);
++    __ vpcnt_w(fscratch, fscratch);
++    __ vpickve2gr_wu($dst$$Register, fscratch, 0);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Note: Long.bitCount(long) returns an int.
++instruct popCountL(mRegI dst, mRegL src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL src));
++
++  format %{ "vinsgr2vr_d    fscratch, $src, 0\n\t"
++            "vpcnt_d        fscratch, fscratch\n\t"
++            "vpickve2gr_wu  $dst, fscratch, 0\n\t# @popCountL" %}
++
++  ins_encode %{
++    __ vinsgr2vr_d(fscratch, $src$$Register, 0);
++    __ vpcnt_d(fscratch, fscratch);
++    __ vpickve2gr_wu($dst$$Register, fscratch, 0);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct popCountL_mem(mRegI dst, memory mem) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL (LoadL mem)));
++
++  format %{ "fld_d          fscratch, $mem, 0\n\t"
++            "vpcnt_d        fscratch, fscratch\n\t"
++            "vpickve2gr_wu  $dst, fscratch, 0\n\t# @popCountL_mem" %}
++
++  ins_encode %{
++    __ loadstore_enc(fscratch, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++    __ vpcnt_d(fscratch, fscratch);
++    __ vpickve2gr_wu($dst$$Register, fscratch, 0);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// --------------------------------- Load -------------------------------------
++
++instruct loadV16(vecX dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 16);
++  match(Set dst (LoadVector mem));
++  format %{ "vload    $dst, $mem\t# @loadV16" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadV32(vecY dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 32);
++  match(Set dst (LoadVector mem));
++  format %{ "xvload    $dst, $mem\t# @loadV32" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- Store ------------------------------------
++
++instruct storeV16(memory mem, vecX src) %{
++  predicate(n->as_StoreVector()->memory_size() == 16);
++  match(Set mem (StoreVector mem src));
++  format %{ "vstore    $src, $mem\t# @storeV16" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storeV32(memory mem, vecY src) %{
++  predicate(n->as_StoreVector()->memory_size() == 32);
++  match(Set mem (StoreVector mem src));
++  format %{ "xvstore    $src, $mem\t# @storeV32" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------- Replicate ----------------------------------
++
++instruct repl16B(vecX dst, mRegI src) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (ReplicateB src));
++  format %{ "vreplgr2vr.b    $dst, $src\t# @repl16B" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (ReplicateB imm));
++  format %{ "vldi    $dst, $imm\t# @repl16B_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S(vecX dst, mRegI src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (ReplicateS src));
++  format %{ "vreplgr2vr.h    $dst, $src\t# @repl8S" %}
++  ins_encode %{
++    __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S_imm(vecX dst, immI10 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (ReplicateS imm));
++  format %{ "vldi    $dst, $imm\t# @repl8S_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I(vecX dst, mRegI src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateI src));
++  format %{ "vreplgr2vr.w    $dst, $src\t# @repl4I" %}
++  ins_encode %{
++    __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I_imm(vecX dst, immI10 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateI imm));
++  format %{ "vldi    $dst, $imm\t# @repl4I_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L(vecX dst, mRegL src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (ReplicateL src));
++  format %{ "vreplgr2vr.d    $dst, $src\t# @repl2L" %}
++  ins_encode %{
++    __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L_imm(vecX dst, immL10 imm) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (ReplicateL imm));
++  format %{ "vldi    $dst, $imm\t# @repl2L_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4F(vecX dst, regF src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateF src));
++  format %{ "vreplvei.w    $dst, $src, 0\t# @repl4F" %}
++  ins_encode %{
++    __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2D(vecX dst, regD src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (ReplicateD src));
++  format %{ "vreplvei.d    $dst, $src, 0\t# @repl2D" %}
++  ins_encode %{
++    __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B(vecY dst, mRegI src) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (ReplicateB src));
++  format %{ "xvreplgr2vr.b    $dst, $src\t# @repl32B" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (ReplicateB imm));
++  format %{ "xvldi    $dst, $imm\t# @repl32B_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S(vecY dst, mRegI src) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (ReplicateS src));
++  format %{ "xvreplgr2vr.h    $dst, $src\t# @repl16S" %}
++  ins_encode %{
++    __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S_imm(vecY dst, immI10 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (ReplicateS imm));
++  format %{ "xvldi    $dst, $imm\t# @repl16S_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I(vecY dst, mRegI src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (ReplicateI src));
++  format %{ "xvreplgr2vr.w    $dst, $src\t# @repl8I" %}
++  ins_encode %{
++    __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I_imm(vecY dst, immI10 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (ReplicateI imm));
++  format %{ "xvldi    $dst, $imm\t# @repl8I_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L(vecY dst, mRegL src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateL src));
++  format %{ "xvreplgr2vr.d    $dst, $src\t# @repl4L" %}
++  ins_encode %{
++    __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L_imm(vecY dst, immL10 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateL imm));
++  format %{ "xvldi    $dst, $imm\t# @repl4L_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8F(vecY dst, regF src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (ReplicateF src));
++  format %{ "xvreplve0.w    $dst, $src\t# @repl8F" %}
++  ins_encode %{
++    __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4D(vecY dst, regD src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (ReplicateD src));
++  format %{ "xvreplve0.d    $dst, $src\t# @repl4D" %}
++  ins_encode %{
++    __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ADD --------------------------------------
++
++instruct add16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVB src1 src2));
++  format %{ "vadd.b    $dst, $src1, $src2\t# @add16B" %}
++  ins_encode %{
++    __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "vaddi.bu    $dst, $src, $imm\t# @add16B_imm" %}
++  ins_encode %{
++    __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVS src1 src2));
++  format %{ "vadd.h    $dst, $src1, $src2\t# @add8S" %}
++  ins_encode %{
++    __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "vaddi.hu    $dst, $src, $imm\t# @add8S_imm" %}
++  ins_encode %{
++    __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVI src1 src2));
++  format %{ "vadd.w    $dst, $src1, src2\t# @add4I" %}
++  ins_encode %{
++    __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "vaddi.wu    $dst, $src, $imm\t# @add4I_imm" %}
++  ins_encode %{
++    __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AddVL src1 src2));
++  format %{ "vadd.d    $dst, $src1, $src2\t# @add2L" %}
++  ins_encode %{
++    __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "vaddi.du    $dst, $src, $imm\t# @add2L_imm" %}
++  ins_encode %{
++    __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVF src1 src2));
++  format %{ "vfadd.s    $dst, $src1, $src2\t# @add4F" %}
++  ins_encode %{
++    __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AddVD src1 src2));
++  format %{ "vfadd.d    $dst, $src1, $src2\t# @add2D" %}
++  ins_encode %{
++    __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (AddVB src1 src2));
++  format %{ "xvadd.b    $dst, $src1, $src2\t# @add32B" %}
++  ins_encode %{
++    __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "xvaddi.bu    $dst, $src, $imm\t# @add32B_imm" %}
++  ins_encode %{
++    __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVS src1 src2));
++  format %{ "xvadd.h    $dst, $src1, $src2\t# @add16S" %}
++  ins_encode %{
++    __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "xvaddi.hu    $dst, $src, $imm\t# @add16S_imm" %}
++  ins_encode %{
++    __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVI src1 src2));
++  format %{ "xvadd.wu    $dst, $src1, $src2\t# @add8I" %}
++  ins_encode %{
++    __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "xvaddi.wu    $dst, $src, $imm\t# @add8I_imm" %}
++  ins_encode %{
++    __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVL src1 src2));
++  format %{ "xvadd.d    $dst, $src1, $src2\t# @add4L" %}
++  ins_encode %{
++    __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "xvaddi.du    $dst, $src, $imm\t# @add4L_imm" %}
++  ins_encode %{
++    __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVF src1 src2));
++  format %{ "xvfadd.s    $dst, $src1, $src2\t# @add8F" %}
++  ins_encode %{
++    __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVD src1 src2));
++  format %{ "xvfadd.d    $dst, $src1, $src2\t# @add4D" %}
++  ins_encode %{
++    __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++instruct sub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVB src1 src2));
++  format %{ "vsub.b    $dst, $src1, $src2\t# @sub16B" %}
++  ins_encode %{
++    __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "vsubi.bu    $dst, $src, $imm\t# @sub16B_imm" %}
++  ins_encode %{
++    __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVS src1 src2));
++  format %{ "vsub.h    $dst, $src1, $src2\t# @sub8S" %}
++  ins_encode %{
++    __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "vsubi.hu    $dst, $src, $imm\t# @sub8S_imm" %}
++  ins_encode %{
++    __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVI src1 src2));
++  format %{ "vsub.w    $dst, $src1, src2\t# @sub4I" %}
++  ins_encode %{
++    __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "vsubi.wu    $dst, $src, $imm\t# @sub4I_imm" %}
++  ins_encode %{
++    __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (SubVL src1 src2));
++  format %{ "vsub.d    $dst, $src1, $src2\t# @sub2L" %}
++  ins_encode %{
++    __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "vsubi.du    $dst, $src, $imm\t# @sub2L_imm" %}
++  ins_encode %{
++    __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVF src1 src2));
++  format %{ "vfsub.s    $dst, $src1, $src2\t# @sub4F" %}
++  ins_encode %{
++    __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (SubVD src1 src2));
++  format %{ "vfsub.d    $dst, $src1, $src2\t# @sub2D" %}
++  ins_encode %{
++    __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (SubVB src1 src2));
++  format %{ "xvsub.b    $dst, $src1, $src2\t# @sub32B" %}
++  ins_encode %{
++    __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "xvsubi.bu    $dst, $src, $imm\t# @sub32B_imm" %}
++  ins_encode %{
++    __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVS src1 src2));
++  format %{ "xvsub.h    $dst, $src1, $src2\t# @sub16S" %}
++  ins_encode %{
++    __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "xvsubi.hu    $dst, $src, $imm\t# @sub16S_imm" %}
++  ins_encode %{
++    __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVI src1 src2));
++  format %{ "xvsub.w    $dst, $src1, $src2\t# @sub8I" %}
++  ins_encode %{
++    __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "xvsubi.wu    $dst, $src, $imm\t# @sub8I_imm" %}
++  ins_encode %{
++    __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVL src1 src2));
++  format %{ "xvsub.d    $dst, $src1, $src2\t# @sub4L" %}
++  ins_encode %{
++    __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "xvsubi.du    $dst, $src, $imm\t# @sub4L_imm" %}
++  ins_encode %{
++    __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVF src1 src2));
++  format %{ "xvfsub.s    $dst, $src1, $src2\t# @sub8F" %}
++  ins_encode %{
++    __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVD src1 src2));
++  format %{ "xvfsub.d    $dst,$src1,$src2\t# @sub4D" %}
++  ins_encode %{
++    __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MUL --------------------------------------
++instruct mul16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (MulVB src1 src2));
++  format %{ "vmul.b    $dst, $src1, $src2\t# @mul16B" %}
++  ins_encode %{
++    __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (MulVS src1 src2));
++  format %{ "vmul.h    $dst, $src1, $src2\t# @mul8S" %}
++  ins_encode %{
++    __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (MulVI src1 src2));
++  format %{ "vmul.w    $dst, $src1, $src2\t# @mul4I" %}
++  ins_encode %{
++    __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (MulVL src1 src2));
++  format %{ "vmul.d    $dst, $src1, $src2\t# @mul2L" %}
++  ins_encode %{
++    __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (MulVF src1 src2));
++  format %{ "vfmul.s    $dst, $src1, $src2\t# @mul4F" %}
++  ins_encode %{
++    __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (MulVD src1 src2));
++  format %{ "vfmul.d    $dst, $src1, $src2\t# @mul2D" %}
++  ins_encode %{
++    __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (MulVB src1 src2));
++  format %{ "xvmul.b    $dst, $src1, $src2\t# @mul32B" %}
++  ins_encode %{
++    __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (MulVS src1 src2));
++  format %{ "xvmul.h    $dst, $src1, $src2\t# @mul16S" %}
++  ins_encode %{
++    __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (MulVI src1 src2));
++  format %{ "xvmul.w    $dst, $src1, $src2\t# @mul8I" %}
++  ins_encode %{
++    __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (MulVL src1 src2));
++  format %{ "xvmul.d    $dst, $src1, $src2\t# @mul4L" %}
++  ins_encode %{
++    __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (MulVF src1 src2));
++  format %{ "xvfmul.s    $dst, $src1, $src2\t# @mul8F" %}
++  ins_encode %{
++    __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (MulVD src1 src2));
++  format %{ "xvfmul.d    $dst, $src1, $src2\t# @mul4D" %}
++  ins_encode %{
++    __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- DIV --------------------------------------
++instruct div4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (DivVF src1 src2));
++  format %{ "vfdiv.s    $dst, $src1, $src2\t# @div4F" %}
++  ins_encode %{
++    __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (DivVD src1 src2));
++  format %{ "vfdiv.d    $dst, $src1, $src2\t# @div2D" %}
++  ins_encode %{
++    __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (DivVF src1 src2));
++  format %{ "xvfdiv.s    $dst, $src1, $src2\t# @div8F" %}
++  ins_encode %{
++    __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (DivVD src1 src2));
++  format %{ "xvfdiv.d    $dst, $src1, $src2\t# @div4D" %}
++  ins_encode %{
++    __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS --------------------------------------
++
++instruct abs16B(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs16B" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8S(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs8S" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4I(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs4I" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2L(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs2L" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4F(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVF src));
++  format %{ "vbitclri.w    $dst, $src\t# @abs4F" %}
++  ins_encode %{
++    __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2D(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AbsVD src));
++  format %{ "vbitclri.d    $dst, $src\t# @abs2D" %}
++  ins_encode %{
++    __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs32B(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs32B" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs16S(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs16S" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8I(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs8I" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4L(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs4L" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8F(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AbsVF src));
++  format %{ "xvbitclri.w    $dst, $src\t# @abs8F" %}
++  ins_encode %{
++    __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4D(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVD src));
++  format %{ "xvbitclri.d    $dst, $src\t# @abs4D" %}
++  ins_encode %{
++    __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS DIFF ---------------------------------
++
++instruct absd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "vabsd.w    $dst, $src1, $src2\t# @absd4I" %}
++  ins_encode %{
++    __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "vabsd.d    $dst, $src1, $src2\t# @absd2L" %}
++  ins_encode %{
++    __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "xvabsd.w    $dst, $src1, $src2\t# @absd8I" %}
++  ins_encode %{
++    __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "xvabsd.d    $dst, $src1, $src2\t# @absd4L" %}
++  ins_encode %{
++    __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MAX --------------------------------------
++
++instruct max16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.b    $dst, $src1, $src2\t# @max16B" %}
++  ins_encode %{
++    __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.h    $dst, $src1, $src2\t# @max8S" %}
++  ins_encode %{
++    __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.w    $dst, $src1, $src2\t# @max4I" %}
++  ins_encode %{
++    __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.d    $dst, $src1, $src2\t# @max2L" %}
++  ins_encode %{
++    __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %}
++  ins_encode %{
++    __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %}
++  ins_encode %{
++    __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.b    $dst, $src1, $src2\t# @max32B" %}
++  ins_encode %{
++    __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.h    $dst, $src1, $src2\t# @max16S" %}
++  ins_encode %{
++    __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.w    $dst, $src1, $src2\t# @max8I" %}
++  ins_encode %{
++    __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.d    $dst, $src1, $src2\t# @max4L" %}
++  ins_encode %{
++    __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %}
++  ins_encode %{
++    __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %}
++  ins_encode %{
++    __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MIN --------------------------------------
++
++instruct min16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.b    $dst, $src1, $src2\t# @min16B" %}
++  ins_encode %{
++    __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.h    $dst, $src1, $src2\t# @min8S" %}
++  ins_encode %{
++    __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.w    $dst, $src1, $src2\t# @min4I" %}
++  ins_encode %{
++    __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.d    $dst, $src1, $src2\t# @min2L" %}
++  ins_encode %{
++    __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %}
++  ins_encode %{
++    __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %}
++  ins_encode %{
++    __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.b    $dst, $src1, $src2\t# @min32B" %}
++  ins_encode %{
++    __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.h    $dst, $src1, $src2\t# @min16S" %}
++  ins_encode %{
++    __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.w    $dst, $src1, $src2\t# @min8I" %}
++  ins_encode %{
++    __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.d    $dst, $src1, $src2\t# @min4L" %}
++  ins_encode %{
++    __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %}
++  ins_encode %{
++    __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %}
++  ins_encode %{
++    __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NEG --------------------------------------
++
++instruct neg4I(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (NegVI src));
++  format %{ "vneg.w    $dst, $src\t# @neg4I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ vneg_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg4F(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (NegVF src));
++  format %{ "vbitrevi.w    $dst, $src\t# @neg4F" %}
++  ins_encode %{
++    __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg2D(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (NegVD src));
++  format %{ "vbitrevi.d    $dst, $src\t# @neg2D" %}
++  ins_encode %{
++    __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg8I(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (NegVI src));
++  format %{ "xvneg.w    $dst, $src\t# @neg8I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ xvneg_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg8F(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (NegVF src));
++  format %{ "xvbitrevi.w    $dst, $src\t# @neg8F" %}
++  ins_encode %{
++    __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg4D(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (NegVD src));
++  format %{ "xvbitrevi.d    $dst, $src\t# @neg4D" %}
++  ins_encode %{
++    __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SQRT -------------------------------------
++
++instruct sqrt4F(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SqrtVF src));
++  format %{ "vfsqrt.s    $dst, $src\t# @sqrt4F" %}
++  ins_encode %{
++    __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt2D(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (SqrtVD src));
++  format %{ "vfsqrt.d    $dst, $src\t# @sqrt2D" %}
++  ins_encode %{
++    __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt8F(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SqrtVF src));
++  format %{ "xvfsqrt.s    $dst, $src\t# @sqrt8F" %}
++  ins_encode %{
++    __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt4D(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SqrtVD src));
++  format %{ "xvfsqrt.d    $dst, $src\t# @sqrt4D" %}
++  ins_encode %{
++    __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MADD -------------------------------------
++
++instruct madd16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "vmadd.b    $dst, $src1, $src2\t# @madd16B" %}
++  ins_encode %{
++    __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "vmadd.h    $dst, $src1, $src2\t# @madd8S" %}
++  ins_encode %{
++    __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "vmadd    $dst, $src1, $src2\t# @madd4I" %}
++  ins_encode %{
++    __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "vmadd.d    $dst, $src1, $src2\t# @madd2L" %}
++  ins_encode %{
++    __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "vfmadd.s    $dst, $src1, $src2, $src3\t# @madd4F" %}
++  ins_encode %{
++    __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 2);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "vfmadd.d    $dst, $src1, $src2, $src3\t# @madd2D" %}
++  ins_encode %{
++    __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "xvmadd.b    $dst, $src1, $src2\t# @madd32B" %}
++  ins_encode %{
++    __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "xvmadd.h    $dst, $src1, $src2\t# @madd16S" %}
++  ins_encode %{
++    __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "xvmadd.w    $dst, $src1, $src2\t# @madd8I" %}
++  ins_encode %{
++    __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "xvmadd.d    $dst, $src1, $src2\t# @madd4L" %}
++  ins_encode %{
++    __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 8);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "xvfmadd.s    $dst, $src1, $src2, $src3\t# @madd8F" %}
++  ins_encode %{
++    __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "xvfmadd.d    $dst, $src1, $src2, $src3\t# @madd4D" %}
++  ins_encode %{
++    __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MSUB -------------------------------------
++
++instruct msub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "vmsub.b    $dst, $src1, $src2\t# @msub16B" %}
++  ins_encode %{
++    __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "vmsub.h    $dst, $src1, $src2\t# @msub8S" %}
++  ins_encode %{
++    __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "vmsub.w    $dst, $src1, $src2\t# @msub4I" %}
++  ins_encode %{
++    __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "vmsub.d    $dst, $src1, $src2\t# @msub2L" %}
++  ins_encode %{
++    __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "vfmsub.s    $dst, $src1, $src2, $src3\t# @msub4F" %}
++  ins_encode %{
++    __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "vfmsub.d    $dst, $src1, $src2, $src3\t# @msub2D" %}
++  ins_encode %{
++    __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "xvmsub.b    $dst, $src1, $src2\t# @msub32B" %}
++  ins_encode %{
++    __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "xvmsub.h    $dst, $src1, $src2\t# @msub16S" %}
++  ins_encode %{
++    __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "xvmsub.w    $dst, $src1, $src2\t# @msub8I" %}
++  ins_encode %{
++    __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "xvmsub.d    $dst, $src1, $src2\t# @msub4L" %}
++  ins_encode %{
++    __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "xvfmsub.s    $dst, $src1, $src2, $src3\t# @msub8F" %}
++  ins_encode %{
++    __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "xvfmsub.d    $dst, $src1, $src2, $src3\t# @msub4D" %}
++  ins_encode %{
++    __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMADD -----------------------------------
++
++// -src1 * src2 - src3
++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "vfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd4F" %}
++  ins_encode %{
++    __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "vfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd2D" %}
++  ins_encode %{
++    __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "xvfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd8F" %}
++  ins_encode %{
++    __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "xvfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd4D" %}
++  ins_encode %{
++    __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMSUB -----------------------------------
++
++// -src1 * src2 + src3
++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "vfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub4F" %}
++  ins_encode %{
++    __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && vector_length(n) == 2);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "vfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub2D" %}
++  ins_encode %{
++    __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 8);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "xvfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub8F" %}
++  ins_encode %{
++    __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && vector_length(n) == 4);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "xvfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub4D" %}
++  ins_encode %{
++    __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------- Vector Multiply-Add Shorts into Integer --------------------
++
++instruct muladd8Sto4I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
++  predicate(vector_length(n->in(1)) == 8 && vector_element_basic_type(n->in(1)) == T_SHORT);
++  match(Set dst (MulAddVS2VI src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "muladdvs2vi    $dst, $src1, $src2\t# TEMP($tmp) @muladd8Sto4I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ vmulwev_w_h($tmp$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vmulwod_w_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vadd_w($dst$$FloatRegister, $tmp$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct muladd16Sto8I(vecY dst, vecY src1, vecY src2, vecY tmp) %{
++  predicate(vector_length(n->in(1)) == 16 && vector_element_basic_type(n->in(1)) == T_SHORT);
++  match(Set dst (MulAddVS2VI src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "muladdvs2vi    $dst, $src1, $src2\t# TEMP($tmp) @muladd16Sto8I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ xvmulwev_w_h($tmp$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvmulwod_w_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvadd_w($dst$$FloatRegister, $tmp$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ Shift ---------------------------------------
++
++instruct shiftcntX(vecX dst, mRegI cnt) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vreplgr2vr.b    $dst, $cnt\t# @shiftcntX" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct shiftcntY(vecY dst, mRegI cnt) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "xvreplgr2vr.b    $dst, $cnt\t# @shiftcntY" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ LeftShift -----------------------------------
++
++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll16B" %}
++  ins_encode %{
++    __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (LShiftVB src (LShiftCntV shift)));
++  format %{ "vslli.b    $dst, $src, $shift\t# @sll16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll8S" %}
++  ins_encode %{
++    __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (LShiftVS src (LShiftCntV shift)));
++  format %{ "vslli.h    $dst, $src, $shift\t# @sll8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vsll.w    $dst, $src, $shift\t# @sll4I" %}
++  ins_encode %{
++    __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (LShiftVI src (LShiftCntV shift)));
++  format %{ "vslli.w    $dst, $src, $shift\t# @sll4I_imm" %}
++  ins_encode %{
++    __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vsll.d    $dst, $src, $shift\t# @sll2L" %}
++  ins_encode %{
++    __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (LShiftVL src (LShiftCntV shift)));
++  format %{ "vslli.d    $dst, $src, $shift\t# @sll2L_imm" %}
++  ins_encode %{
++    __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll32B" %}
++  ins_encode %{
++    __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (LShiftVB src (LShiftCntV shift)));
++  format %{ "xvslli.b    $dst, $src, $shift\t# @sll32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll16S" %}
++  ins_encode %{
++    __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (LShiftVS src (LShiftCntV shift)));
++  format %{ "xvslli.h    $dst, $src, $shift\t# @sll16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvsll.w    $dst, $src, $shift\t# @sll8I" %}
++  ins_encode %{
++    __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (LShiftVI src (LShiftCntV shift)));
++  format %{ "xvslli.w    $dst, $src, $shift\t# @sll8I_imm" %}
++  ins_encode %{
++    __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvsll.d    $dst, $src, $shift\t# @sll4L" %}
++  ins_encode %{
++    __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (LShiftVL src (LShiftCntV shift)));
++  format %{ "xvslli.d    $dst, $src, $shift\t# @sll4L_imm" %}
++  ins_encode %{
++    __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------- LogicalRightShift ----------------------------------
++
++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16B" %}
++  ins_encode %{
++    __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (URShiftVB src (RShiftCntV shift)));
++  format %{ "vsrli.b    $dst, $src, $shift\t# @srl16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl8S" %}
++  ins_encode %{
++    __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (URShiftVS src (RShiftCntV shift)));
++  format %{ "vsrli.h    $dst, $src, $shift\t# @srl8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrl.w    $dst, $src, $shift\t# @srl4I" %}
++  ins_encode %{
++    __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (URShiftVI src (RShiftCntV shift)));
++  format %{ "vsrli.w    $dst, $src, $shift\t# @srl4I_imm" %}
++  ins_encode %{
++    __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrl.d    $dst, $src, $shift\t# @srl2L" %}
++  ins_encode %{
++    __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (URShiftVL src (RShiftCntV shift)));
++  format %{ "vsrli.d    $dst, $src, $shift\t# @srl2L_imm" %}
++  ins_encode %{
++    __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl32B" %}
++  ins_encode %{
++    __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (URShiftVB src (RShiftCntV shift)));
++  format %{ "xvsrli.b    $dst, $src, $shift\t# @srl32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16S" %}
++  ins_encode %{
++    __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (URShiftVS src (RShiftCntV shift)));
++  format %{ "xvsrli.h    $dst, $src, $shift\t# @srl16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrl.w    $dst, $src, $shift\t# @srl8I" %}
++  ins_encode %{
++    __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (URShiftVI src (RShiftCntV shift)));
++  format %{ "xvsrli.w    $dst, $src, $shift\t# @srl8I_imm" %}
++  ins_encode %{
++    __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrl.d    $dst, $src, $shift\t# @srl4L" %}
++  ins_encode %{
++    __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (URShiftVL src (RShiftCntV shift)));
++  format %{ "xvsrli.d    $dst, $src, $shift\t# @srl4L_imm" %}
++  ins_encode %{
++    __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------- ArithmeticRightShift -----------------------------
++
++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra16B" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (RShiftVB src (RShiftCntV shift)));
++  format %{ "vsrai.b    $dst, $src, $shift\t# @sra16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra8S" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RShiftVS src (RShiftCntV shift)));
++  format %{ "vsrai.h    $dst, $src, $shift\t# @sra8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsra.w    $dst, $src, $shift\t# @sra4I" %}
++  ins_encode %{
++    __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RShiftVI src (RShiftCntV shift)));
++  format %{ "vsrai.w    $dst, $src, $shift\t# @sra4I_imm" %}
++  ins_encode %{
++    __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsra.d    $dst, $src, $shift\t# @sra2L" %}
++  ins_encode %{
++    __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RShiftVL src (RShiftCntV shift)));
++  format %{ "vsrai.d    $dst, $src, $shift\t# @sra2L_imm" %}
++  ins_encode %{
++    __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra32B" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (RShiftVB src (RShiftCntV shift)));
++  format %{ "xvsrai.b    $dst, $src, $shift\t# @sra32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra16S" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (RShiftVS src (RShiftCntV shift)));
++  format %{ "xvsrai.h    $dst, $src, $shift\t# @sra16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsra.w    $dst, $src, $shift\t# @sra8I" %}
++  ins_encode %{
++    __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RShiftVI src (RShiftCntV shift)));
++  format %{ "xvsrai.w    $dst, $src, $shift\t# @sra8I_imm" %}
++  ins_encode %{
++    __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsra.d    $dst, $src, $shift\t# @sra4L" %}
++  ins_encode %{
++    __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RShiftVL src (RShiftCntV shift)));
++  format %{ "xvsrai.d    $dst, $src, $shift\t# @sra4L_imm" %}
++  ins_encode %{
++    __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- RotateRightV ---------------------------------
++
++instruct rotr4I(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateRightV src shift));
++  format %{ "vrotr.w    $dst, $src, $shift\t# @rotr4I" %}
++  ins_encode %{
++    __ vrotr_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateRightV src shift));
++  format %{ "vrotri.w    $dst, $src, $shift\t# @rotr4I_imm" %}
++  ins_encode %{
++    __ vrotri_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr2L(vecX dst, vecX src, vecX shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RotateRightV src shift));
++  format %{ "vrotr.d    $dst, $src, $shift\t# @rotr2L" %}
++  ins_encode %{
++    __ vrotr_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RotateRightV src shift));
++  format %{ "vrotri.d    $dst, $src, $shift\t# @rotr2L_imm" %}
++  ins_encode %{
++    __ vrotri_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr8I(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RotateRightV src shift));
++  format %{ "xvrotr.w    $dst, $src, $shift\t# @rotr8I" %}
++  ins_encode %{
++    __ xvrotr_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RotateRightV src shift));
++  format %{ "xvrotri.w    $dst, $src, $shift\t# @rotr8I_imm" %}
++  ins_encode %{
++    __ xvrotri_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr4L(vecY dst, vecY src, vecY shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateRightV src shift));
++  format %{ "xvrotr.d    $dst, $src, $shift\t# @rotr4L" %}
++  ins_encode %{
++    __ xvrotr_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotr4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateRightV src  shift));
++  format %{ "xvrotri.d    $dst, $src, $shift\t# @rotr4L_imm" %}
++  ins_encode %{
++    __ xvrotri_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ RotateLeftV ---------------------------------
++
++instruct rotl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateLeftV src shift));
++  effect(TEMP tmp);
++  format %{ "vrotl    $dst, $src, $shift\t# TEMP($tmp) @rotl4I" %}
++  ins_encode %{
++    __ vneg_w($tmp$$FloatRegister, $shift$$FloatRegister);
++    __ vrotr_w($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateLeftV src shift));
++  format %{ "vrotli    $dst, $src, $shift\t# @rotl4I_imm" %}
++  ins_encode %{
++    __ vrotri_w($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RotateLeftV src shift));
++  effect(TEMP tmp);
++  format %{ "vrotl    $dst, $src, $shift\t# TEMP($tmp) @rotl2L" %}
++  ins_encode %{
++    __ vneg_d($tmp$$FloatRegister, $shift$$FloatRegister);
++    __ vrotr_d($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RotateLeftV src shift));
++  format %{ "vrotli    $dst, $src, $shift\t# @rotl2L_imm" %}
++  ins_encode %{
++    __ vrotri_d($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl8I(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RotateLeftV src shift));
++  effect(TEMP tmp);
++  format %{ "xvrotl    $dst, $src, $shift\t# TEMP($tmp) @rotl8I" %}
++  ins_encode %{
++    __ xvneg_w($tmp$$FloatRegister, $shift$$FloatRegister);
++    __ xvrotr_w($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 8);
++  match(Set dst (RotateLeftV src shift));
++  format %{ "xvrotli    $dst, $src, $shift\t# @rotr8I_imm" %}
++  ins_encode %{
++    __ xvrotri_w($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl4L(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateLeftV src shift));
++  effect(TEMP tmp);
++  format %{ "xvrotl    $dst, $src, $shift\t# TEMP($tmp) @rotl4L" %}
++  ins_encode %{
++    __ xvneg_d($tmp$$FloatRegister, $shift$$FloatRegister);
++    __ xvrotr_d($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rotl4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RotateLeftV src  shift));
++  format %{ "xvrotli    $dst, $src, $shift\t# @rotl4L_imm" %}
++  ins_encode %{
++    __ xvrotri_d($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- AND --------------------------------------
++
++instruct andV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (AndV src1 src2));
++  format %{ "vand.v    $dst, $src1, $src2\t# @andV16" %}
++  ins_encode %{
++    __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "vandi.b    $dst, $src, $imm\t# @and16B_imm" %}
++  ins_encode %{
++    __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (AndV src1 src2));
++  format %{ "xvand.v    $dst, $src1, $src2\t# @andV32" %}
++  ins_encode %{
++    __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "xvandi.b    $dst, $src, $imm\t# @and32B_imm" %}
++  ins_encode %{
++    __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- OR ---------------------------------------
++
++instruct orV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (OrV src1 src2));
++  format %{ "vor.v    $dst, $src1, $src2\t# @orV16" %}
++  ins_encode %{
++    __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "vori.b    $dst, $src, $imm\t# @or16B_imm" %}
++  ins_encode %{
++    __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct orV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (OrV src1 src2));
++  format %{ "xvor.v    $dst, $src1, $src2\t# @orV32" %}
++  ins_encode %{
++    __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "xvori.b    $dst, $src, $imm\t# @or32B_imm" %}
++  ins_encode %{
++    __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- XOR --------------------------------------
++
++instruct xorV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (XorV src1 src2));
++  format %{ "vxor.v    $dst, $src1, $src2\t# @xorV16" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "vxori.b    $dst, $src, $imm\t# @xor16B_imm" %}
++  ins_encode %{
++    __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xorV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (XorV src1 src2));
++  format %{ "xvxor.v    $dst, $src1, $src2\t# @xorV32" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "xvxori.b    $dst, $src, $imm\t# @xor32B_imm" %}
++  ins_encode %{
++    __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NOR --------------------------------------
++
++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "vnor.v    $dst, $src1, $src2\t# @norV16" %}
++  ins_encode %{
++    __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "vnori.b    $dst, $src, $imm\t# @nor16B_imm" %}
++  ins_encode %{
++    __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "xvnor.v    $dst, $src1, $src2\t# @norV32" %}
++  ins_encode %{
++    __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "xvnori.b    $dst, $src, $imm\t# @nor32B_imm" %}
++  ins_encode %{
++    __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ANDN -------------------------------------
++
++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "vandn.v    $dst, $src1, $src2\t# @andnV16" %}
++  ins_encode %{
++    __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "xvandn.v    $dst, $src1, $src2\t# @andnV32" %}
++  ins_encode %{
++    __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ORN --------------------------------------
++
++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "vorn.v    $dst, $src1, $src2\t# @ornV16" %}
++  ins_encode %{
++    __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "xvorn.v    $dst, $src1, $src2\t# @ornV32" %}
++  ins_encode %{
++    __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- Reduction Add --------------------------------
++
++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Mul --------------------------------
++
++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Max --------------------------------
++
++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Min --------------------------------
++
++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction And --------------------------------
++
++instruct reduce_and16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_and2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_and4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (AndReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Or ---------------------------------
++
++instruct reduce_or16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_or2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_or4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (OrReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Xor --------------------------------
++
++instruct reduce_xor16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_xor2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_xor4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG);
++  match(Set dst (XorReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ------------------------------ RoundDoubleModeV ----------------------------
++
++instruct round2D(vecX dst, vecX src, immI rmode) %{
++  predicate(vector_length(n) == 2);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "vfrint    $dst, $src, $rmode\t# @round2D" %}
++  ins_encode %{
++    switch ($rmode$$constant) {
++      case RoundDoubleModeNode::rmode_rint:  __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case RoundDoubleModeNode::rmode_floor: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case RoundDoubleModeNode::rmode_ceil:  __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct round4D(vecY dst, vecY src, immI rmode) %{
++  predicate(vector_length(n) == 4);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "xvfrint    $dst, $src, $rmode\t# @round4D" %}
++  ins_encode %{
++    switch ($rmode$$constant) {
++      case RoundDoubleModeNode::rmode_rint:  __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case RoundDoubleModeNode::rmode_floor: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case RoundDoubleModeNode::rmode_ceil:  __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- Vector Cast B2X -------------------------------
++
++instruct cvt16Bto16S(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorCastB2X src));
++  format %{ "vext2xv.h.b    $dst, $src\t# @cvt16Bto16S" %}
++  ins_encode %{
++    __ vext2xv_h_b($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------- Vector Cast S2X --------------------------------
++
++instruct cvt16Sto16B(vecX dst, vecY src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorCastS2X src));
++  effect(TEMP_DEF dst);
++  format %{ "vconvert    $dst, $src\t# @cvt16Sto16B" %}
++  ins_encode %{
++    __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01);
++    __ vsrlni_b_h($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt8Sto8I(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorCastS2X src));
++  format %{ "vext2xv.w.h    $dst, $src\t# @cvt8Sto8I" %}
++  ins_encode %{
++    __ vext2xv_w_h($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt8Sto8F(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorCastS2X src));
++  format %{ "vconvert    $dst, $src\t# @cvt8Sto8F" %}
++  ins_encode %{
++    __ vext2xv_w_h($dst$$FloatRegister, $src$$FloatRegister);
++    __ xvffint_s_w($dst$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------- Vector Cast I2X --------------------------------
++
++instruct cvt8Ito8S(vecX dst, vecY src) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorCastI2X src));
++  effect(TEMP_DEF dst);
++  format %{ "vconvert    $dst, $src\t# @cvt8Ito8S" %}
++  ins_encode %{
++    __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01);
++    __ vsrlni_h_w($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Ito4F(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorCastI2X src));
++  format %{ "vffint.s.w    $dst, $src\t# @cvt4Ito4F" %}
++  ins_encode %{
++    __ vffint_s_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Ito4L(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorCastI2X src));
++  format %{ "vext2xv.d.w    $dst, $src\t# @cvt4Ito4L" %}
++  ins_encode %{
++    __ vext2xv_d_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt8Ito8F(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorCastI2X src));
++  format %{ "xvffint.s.w    $dst, $src\t# @cvt8Ito8F" %}
++  ins_encode %{
++    __ xvffint_s_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Ito4D(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorCastI2X src));
++  format %{ "vconvert    $dst, $src\t# @cvt4Ito4D" %}
++  ins_encode %{
++    __ vext2xv_d_w($dst$$FloatRegister, $src$$FloatRegister);
++    __ xvffint_d_l($dst$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- Vector Cast L2X ------------------------------
++
++instruct cvt4Lto4I(vecX dst, vecY src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorCastL2X src));
++  effect(TEMP_DEF dst);
++  format %{ "vconvert    $dst, $src\t# @cvt4Lto4I" %}
++  ins_encode %{
++    __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01);
++    __ vsrlni_w_d($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Lto4F(vecX dst, vecY src, vecY tmp) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorCastL2X src));
++  effect(TEMP tmp);
++  format %{ "vconvert    $dst, $src\t# TEMP($tmp) @cvt4Lto4F" %}
++  ins_encode %{
++    __ xvpermi_q($tmp$$FloatRegister, $src$$FloatRegister, 0x01);
++    __ xvffint_s_l($dst$$FloatRegister, $tmp$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt2Lto2D(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorCastL2X src));
++  format %{ "vffint.d.l    $dst, $src\t# @cvt2Lto2D" %}
++  ins_encode %{
++    __ vffint_d_l($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Lto4D(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorCastL2X src));
++  format %{ "xvffint.d.l    $dst, $src\t# @cvt4Lto4D" %}
++  ins_encode %{
++    __ xvffint_d_l($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- Vector Cast F2X ------------------------------
++
++instruct cvt8Fto8S(vecX dst, vecY src, vecY tmp) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorCastF2X src));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vconvert    $dst, $src\t# TEMP($tmp) @cvt8Fto8S" %}
++  ins_encode %{
++    __ vftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ xvpermi_q($dst$$FloatRegister, $tmp$$FloatRegister, 0x01);
++    __ vsrlni_h_w($dst$$FloatRegister, $tmp$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Fto4I(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorCastF2X src));
++  format %{ "vftint.w.s    $dst, $src\t# @cvt2Fto2I" %}
++  ins_encode %{
++    __ vftintrz_w_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt8Fto8I(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorCastF2X src));
++  format %{ "xvftint.w.s    $dst, $src\t# @cvt4Fto4I" %}
++  ins_encode %{
++    __ xvftintrz_w_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Fto4L(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorCastF2X src));
++  effect(TEMP_DEF dst);
++  format %{ "vconvert    $dst, $src\t# @cvt4Fto4L" %}
++  ins_encode %{
++    __ xvpermi_d($dst$$FloatRegister, $src$$FloatRegister, 0b01010000);
++    __ xvftintrzl_l_s($dst$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Fto4D(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorCastF2X src));
++  format %{ "vconvert    $dst, $src\t# @cvt4Fto4D" %}
++  ins_encode %{
++    __ xvpermi_d($dst$$FloatRegister, $src$$FloatRegister, 0b01010000);
++    __ xvfcvtl_d_s($dst$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- Vector Cast D2X -------------------------------
++
++instruct cvt4Dto4I(vecX dst, vecY src, vecY tmp) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorCastD2X src));
++  effect(TEMP tmp);
++  format %{ "vconvert    $dst, $src\t# TEMP($tmp) @cvt4Dto4I" %}
++  ins_encode %{
++    __ xvftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++    __ xvpermi_d($dst$$FloatRegister, $tmp$$FloatRegister, 0b11011000);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Dto4F(vecX dst, vecY src, vecY tmp) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorCastD2X src));
++  effect(TEMP tmp);
++  format %{ "vconvert    $dst, $src\t# TEMP($tmp) @cvt4Dto4F" %}
++  ins_encode %{
++    __ xvfcvt_s_d($tmp$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++    __ xvpermi_d($dst$$FloatRegister, $tmp$$FloatRegister, 0b11011000);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt2Dto2L(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorCastD2X src));
++  format %{ "vftint.l.d    $dst, $src\t# @cvt2Dto2L" %}
++  ins_encode %{
++    __ vftintrz_l_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cvt4Dto4L(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorCastD2X src));
++  format %{ "xvftint.l.d    $dst, $src\t# @cvt4Dto4L" %}
++  ins_encode %{
++    __ xvftintrz_l_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ VectorReinterpret ---------------------------
++
++instruct reinterpretX(vecX dst)
++%{
++  predicate(vector_length_in_bytes(n) == 16 && vector_length_in_bytes(n->in(1)) == 16);
++  match(Set dst (VectorReinterpret dst));
++  format %{ "vreinterpret    $dst\t# @reinterpretX" %}
++  ins_encode %{
++    // empty
++  %}
++  ins_pipe(empty);
++%}
++
++instruct reinterpretY(vecY dst)
++%{
++  predicate(vector_length_in_bytes(n) == 32 && vector_length_in_bytes(n->in(1)) == 32);
++  match(Set dst (VectorReinterpret dst));
++  format %{ "xvreinterpret    $dst\t# @reinterpretY" %}
++  ins_encode %{
++    // empty
++  %}
++  ins_pipe(empty);
++%}
++
++instruct reinterpretX2Y(vecY dst, vecX src)
++%{
++  predicate(vector_length_in_bytes(n) == 32 && vector_length_in_bytes(n->in(1)) == 16);
++  match(Set dst (VectorReinterpret src));
++  format %{ "vreinterpret    $dst, $src\t# @reinterpretX2Y" %}
++  ins_encode %{
++    // The higher 128-bits of the "dst" register must be cleared to zero.
++    if ($dst$$FloatRegister ==  $src$$FloatRegister) {
++      __ xvinsgr2vr_d($dst$$FloatRegister, R0, 2);
++      __ xvinsgr2vr_d($dst$$FloatRegister, R0, 3);
++    } else {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++      __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0b00110000);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reinterpretY2X(vecX dst, vecY src)
++%{
++  predicate(vector_length_in_bytes(n) == 16 && vector_length_in_bytes(n->in(1)) == 32);
++  match(Set dst (VectorReinterpret src));
++  format %{ "vreinterpret    $dst, $src\t# @reinterpretY2X" %}
++  ins_encode %{
++    if ($dst$$FloatRegister ==  $src$$FloatRegister) {
++      // empty
++    } else {
++      __ vori_b($dst$$FloatRegister, $src$$FloatRegister, 0);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ------------------------------ VectorInsert --------------------------------
++
++instruct insert16B(vecX dst, mRegI val, immIU4 idx) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.b    $dst, $val, $idx\t# @insert16B" %}
++  ins_encode %{
++    __ vinsgr2vr_b($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert8S(vecX dst, mRegI val, immIU3 idx) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.h    $dst, $val, $idx\t# @insert8S" %}
++  ins_encode %{
++    __ vinsgr2vr_h($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert4I(vecX dst, mRegI val, immIU2 idx) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.w    $dst, $val, $idx\t# @insert4I" %}
++  ins_encode %{
++    __ vinsgr2vr_w($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert2L(vecX dst, mRegL val, immIU1 idx) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.d    $dst, $val, $idx\t# @insert2L" %}
++  ins_encode %{
++    __ vinsgr2vr_d($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert4F(vecX dst, regF val, immIU2 idx, mRegI tmp) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  effect(TEMP tmp);
++  format %{ "vinsert    $dst, $val, $idx\t# TEMP($tmp) @insert4F" %}
++  ins_encode %{
++    __ vpickve2gr_w($tmp$$Register, $val$$FloatRegister, 0);
++    __ vinsgr2vr_w($dst$$FloatRegister, $tmp$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert2D(vecX dst, regD val, immIU1 idx, mRegI tmp) %{
++  predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  effect(TEMP tmp);
++  format %{ "vinsert    $dst, $val, $idx\t# TEMP($tmp) @insert2D" %}
++  ins_encode %{
++    __ vpickve2gr_d($tmp$$Register, $val$$FloatRegister, 0);
++    __ vinsgr2vr_d($dst$$FloatRegister, $tmp$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert32B(vecY dst, mRegI val, immIU5 idx) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "xvinsert    $dst, $val, $idx\t# @insert32B" %}
++  ins_encode %{
++    int idx = $idx$$constant;
++    int msbw, lsbw;
++    switch (idx % 4) {
++      case 0: msbw =  7, lsbw =  0; break;
++      case 1: msbw = 15, lsbw =  8; break;
++      case 2: msbw = 23, lsbw = 16; break;
++      case 3: msbw = 31, lsbw = 24; break;
++      default:
++        ShouldNotReachHere();
++    }
++    __ xvpickve2gr_w(SCR1, $dst$$FloatRegister, idx >> 2);
++    __ bstrins_w(SCR1, $val$$Register, msbw, lsbw);
++    __ xvinsgr2vr_w($dst$$FloatRegister, SCR1, idx >> 2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert16S(vecY dst, mRegI val, immIU4 idx) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "xvinsert    $dst, $val, $idx\t# @insert16S" %}
++  ins_encode %{
++    int idx = $idx$$constant;
++    int msbw = (idx % 2) ? 31 : 15;
++    int lsbw = (idx % 2) ? 16 :  0;
++    __ xvpickve2gr_w(SCR1, $dst$$FloatRegister, idx >> 1);
++    __ bstrins_w(SCR1, $val$$Register, msbw, lsbw);
++    __ xvinsgr2vr_w($dst$$FloatRegister, SCR1, idx >> 1);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert8I(vecY dst, mRegI val, immIU3 idx) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.w    $dst, $val, $idx\t# @insert8I" %}
++  ins_encode %{
++    __ xvinsgr2vr_w($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert4L(vecY dst, mRegL val, immIU2 idx) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "vinsgr2vr.d    $dst, $val, $idx\t# @insert4L" %}
++  ins_encode %{
++    __ xvinsgr2vr_d($dst$$FloatRegister, $val$$Register, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert8F(vecY dst, regF val, immIU3 idx) %{
++  predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "xvinsve0.w    $dst, $val, $idx\t# @insert8F" %}
++  ins_encode %{
++    __ xvinsve0_w($dst$$FloatRegister, $val$$FloatRegister, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct insert4D(vecY dst, regD val, immIU2 idx) %{
++  predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE);
++  match(Set dst (VectorInsert (Binary dst val) idx));
++  format %{ "xvinsve0.d    $dst, $val, $idx\t# @insert4D" %}
++  ins_encode %{
++    __ xvinsve0_d($dst$$FloatRegister, $val$$FloatRegister, $idx$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -------------------------------- Vector Blend ------------------------------
++
++instruct blendV16(vecX dst, vecX src1, vecX src2, vecX mask)
++%{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (VectorBlend (Binary src1 src2) mask));
++  format %{ "vbitsel.v    $dst, $src1, $src2, $mask\t# @blendV16" %}
++  ins_encode %{
++    __ vbitsel_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $mask$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct blendV32(vecY dst, vecY src1, vecY src2, vecY mask)
++%{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (VectorBlend (Binary src1 src2) mask));
++  format %{ "xvbitsel.v    $dst, $src1, $src2, $mask\t# @blendV32" %}
++  ins_encode %{
++    __ xvbitsel_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $mask$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -------------------------------- LoadMask ----------------------------------
++
++instruct loadmask16B(vecX dst, vecX src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadMask src));
++  format %{ "vneg.b    $dst, $src\t# @loadmask16B" %}
++  ins_encode %{
++    __ vneg_b($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadmask32B(vecY dst, vecY src) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadMask src));
++  format %{ "xvneg.b    $dst, $src\t# @loadmask32B" %}
++  ins_encode %{
++    __ xvneg_b($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadmask16S(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorLoadMask src));
++  format %{ "vloadmask    $dst, $src\t# @loadmask16S" %}
++  ins_encode %{
++    __ vext2xv_h_b($dst$$FloatRegister, $src$$FloatRegister);
++    __ xvneg_h($dst$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//-------------------------------- StoreMask ----------------------------------
++
++instruct storemask16B(vecX dst, vecX src, immI_1 size) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (VectorStoreMask src size));
++  format %{ "vneg.b    $dst, $src\t# @storemask16B" %}
++  ins_encode %{
++    __ vneg_b($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storemask32B(vecY dst, vecY src, immI_1 size) %{
++  predicate(vector_length(n) == 32);
++  match(Set dst (VectorStoreMask src size));
++  format %{ "xvneg.b    $dst, $src\t# @storemask32B" %}
++  ins_encode %{
++    __ xvneg_b($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storemask16S(vecX dst, vecY src, immI_2 size, vecX tmp) %{
++  predicate(vector_length(n) == 16);
++  match(Set dst (VectorStoreMask src size));
++  effect(TEMP tmp);
++  format %{ "vstoremask    $dst, $src\t# TEMP($tmp) @storemask16S" %}
++  ins_encode %{
++    __ xvpermi_d($tmp$$FloatRegister, $src$$FloatRegister, 0b00001110);
++    __ vsrlni_b_h($tmp$$FloatRegister, $src$$FloatRegister, 0);
++    __ vneg_b($dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- VectorTest -----------------------------------
++
++instruct anytrue_in_maskV16(mRegI dst, vecX src1, vecX src2, vecX tmp)
++%{
++  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
++  match(Set dst (VectorTest src1 src2));
++  effect(TEMP tmp);
++  format %{ "vtest    $dst, $src1, $src2(not used)\t# TEMP($tmp) @anytrue_in_maskV16" %}
++  ins_encode %{
++    // No need to use src2, src2 is all ones.
++    __ vpermi_w($tmp$$FloatRegister, $src1$$FloatRegister, 0b00001110);
++    __ vor_v($tmp$$FloatRegister, $src1$$FloatRegister, $tmp$$FloatRegister);
++    __ vpickve2gr_d($dst$$Register, $tmp$$FloatRegister, 0);
++    __ sltu($dst$$Register, R0, $dst$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct alltrue_in_maskV16(mRegI dst, vecX src1, vecX src2, vecX tmp)
++%{
++  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
++  match(Set dst (VectorTest src1 src2));
++  effect(TEMP tmp);
++  format %{ "vtest    $dst, $src1, $src2(not used)\t# TEMP($tmp) @alltrue_in_maskV16" %}
++  ins_encode %{
++    // No need to use src2, src2 is all ones.
++    __ vpermi_w($tmp$$FloatRegister, $src1$$FloatRegister, 0b00001110);
++    __ vand_v($tmp$$FloatRegister, $src1$$FloatRegister, $tmp$$FloatRegister);
++    __ vpickve2gr_d($dst$$Register, $tmp$$FloatRegister, 0);
++    __ sltui($dst$$Register, $dst$$Register, -1);
++    __ xori($dst$$Register, $dst$$Register, 1);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct anytrue_in_maskV32(mRegI dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2)
++%{
++  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
++  match(Set dst (VectorTest src1 src2));
++  effect(TEMP tmp1, TEMP tmp2);
++  format %{ "xvtest    $dst, $src1, $src2(not used)\t# TEMP($tmp1, $tmp2) @anytrue_in_maskV32" %}
++  ins_encode %{
++    // No need to use src2, src2 is all ones.
++    __ xvpermi_d($tmp1$$FloatRegister, $src1$$FloatRegister, 0b00001110);
++    __ vor_v($tmp1$$FloatRegister, $src1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vpermi_w($tmp2$$FloatRegister, $tmp1$$FloatRegister, 0b00001110);
++    __ vor_v($tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister);
++    __ vpickve2gr_d($dst$$Register, $tmp1$$FloatRegister, 0);
++    __ sltu($dst$$Register, R0, $dst$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct alltrue_in_maskV32(mRegI dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2)
++%{
++  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
++  match(Set dst (VectorTest src1 src2));
++  effect(TEMP tmp1, TEMP tmp2);
++  format %{ "xvtest    $dst, $src1, $src2(not used)\t# TEMP($tmp1, $tmp2) @alltrue_in_maskV32" %}
++  ins_encode %{
++    // No need to use src2, src2 is all ones.
++    __ xvpermi_d($tmp1$$FloatRegister, $src1$$FloatRegister, 0b00001110);
++    __ vand_v($tmp1$$FloatRegister, $src1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vpermi_w($tmp2$$FloatRegister, $tmp1$$FloatRegister, 0b00001110);
++    __ vand_v($tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister);
++    __ vpickve2gr_d($dst$$Register, $tmp1$$FloatRegister, 0);
++    __ sltui($dst$$Register, $dst$$Register, -1);
++    __ xori($dst$$Register, $dst$$Register, 1);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Vector comparison ----------------------------
++
++instruct cmpV16(vecX dst, vecX src1, vecX src2, immI cond)
++%{
++  predicate(vector_length_in_bytes(n) == 16);
++  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
++  format %{ "vcompare    $dst, $src1, $src2, $cond\t# @cmpV16" %}
++  ins_encode %{
++    BasicType bt = vector_element_basic_type(this);
++    __ vector_compare($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, bt, $cond$$constant, 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmpV32(vecY dst, vecY src1, vecY src2, immI cond)
++%{
++  predicate(vector_length_in_bytes(n) == 32);
++  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
++  format %{ "xvcompare    $dst, $src1, $src2, $cond\t# @cmpV32" %}
++  ins_encode %{
++    BasicType bt = vector_element_basic_type(this);
++    __ vector_compare($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, bt, $cond$$constant, 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ---------------------------- LOAD_IOTA_INDICES -----------------------------
++
++instruct loadcon16B(vecX dst, immI_0 src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadConst src));
++  format %{ "vld_con    $dst, CONSTANT_MEMORY\t# @loadcon16B" %}
++  ins_encode %{
++    __ li(AT, (long)StubRoutines::la::vector_iota_indices());
++    __ vld($dst$$FloatRegister, AT, (int)0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadcon32B(vecY dst, immI_0 src) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadConst src));
++  format %{ "xvld_con    $dst, CONSTANT_MEMORY\t# @loadcon32B" %}
++  ins_encode %{
++    __ li(AT, (long)StubRoutines::la::vector_iota_indices());
++    __ xvld($dst$$FloatRegister, AT, (int)0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- LOAD_SHUFFLE ----------------------------------
++
++instruct loadShuffle16B(vecX dst) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadShuffle dst));
++  format %{ "vld_shuffle    $dst\t# @loadShuffle16B" %}
++  ins_encode %{
++    // empty
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadShuffle32B(vecY dst) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorLoadShuffle dst));
++  format %{ "xvld_shuffle    $dst\t# @loadShuffle32B" %}
++  ins_encode %{
++    // empty
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadShuffle16S(vecY dst, vecX src) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorLoadShuffle src));
++  format %{ "vext2xv.hu.bu    $dst, $src\t# @loadShuffle16S" %}
++  ins_encode %{
++    __ vext2xv_hu_bu($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- Rearrange -------------------------------------
++
++instruct rearrange16B(vecX dst, vecX src, vecX shuffle) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorRearrange src shuffle));
++  format %{ "vshuf.b    $dst, $src, $shuffle\t# @rearrange16B" %}
++  ins_encode %{
++    __ vshuf_b($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister, $shuffle$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rearrange32B(vecY dst, vecY src, vecY shuffle, vecY tmp) %{
++  predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE);
++  match(Set dst (VectorRearrange src shuffle));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvrearrange    $dst, $src, $shuffle\t# TEMP($tmp) @rearrange32B" %}
++  ins_encode %{
++    __ xvpermi_q($tmp$$FloatRegister, $src$$FloatRegister, 0x00);
++    __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x11);
++    __ xvshuf_b($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister, $shuffle$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct rearrange16S(vecY dst, vecY src, vecY tmp1, vecY tmp2) %{
++  predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT);
++  match(Set dst (VectorRearrange src dst));
++  effect(TEMP tmp1, TEMP tmp2);
++  format %{ "xvrearrange    $dst, $src, $dst\t# TEMP($tmp1, $tmp2) @rearrange16S" %}
++  ins_encode %{
++    __ xvpermi_q($tmp1$$FloatRegister, $src$$FloatRegister, 0x00);
++    __ xvpermi_q($tmp2$$FloatRegister, $src$$FloatRegister, 0x11);
++    __ xvshuf_h($dst$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- PopCount --------------------------------------
++
++instruct popcount4I(vecX dst, vecX src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
++  match(Set dst (PopCountVI src));
++  format %{ "vpcnt.w    $dst, $src\t# @popcount4I" %}
++  ins_encode %{
++    __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct popcount8I(vecY dst, vecY src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 8);
++  match(Set dst (PopCountVI src));
++  format %{ "xvpcnt.w    $dst, $src\t# @popcount8I" %}
++  ins_encode %{
++    __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+new file mode 100644
+index 00000000000..a7062552f76
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+@@ -0,0 +1,3827 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/jniHandles.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/output.hpp"
++#endif
++
++#if INCLUDE_ZGC
++#include "gc/z/zThreadLocalData.hpp"
++#endif
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ st_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fst_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ ld_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fld_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) {
++  jint& stub_inst = *(jint*)branch;
++  jint *pc = (jint *)branch;
++
++  if (high(stub_inst, 7) == pcaddu18i_op) {
++    // far:
++    //   pcaddu18i reg, si20
++    //   jirl  r0, reg, si18
++
++    assert(high(pc[1], 6) == jirl_op, "Not a branch label patch");
++    jlong offs = target - branch;
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    if (reachable_from_branch_short(offs)) {
++      // convert far to short
++#define __ masm.
++      __ b(target);
++      __ nop();
++#undef __
++    } else {
++      masm.patchable_jump_far(R0, offs);
++    }
++    return;
++  } else if (high(stub_inst, 7) == pcaddi_op) {
++    // see MacroAssembler::set_last_Java_frame:
++    //   pcaddi reg, si20
++
++    jint offs = (target - branch) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    CodeBuffer cb(branch, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
++    return;
++  } else if (high(stub_inst, 7) == pcaddu12i_op) {
++    // pc-relative
++    jlong offs = target - branch;
++    guarantee(is_simm(offs, 32), "Not signed 32-bit offset");
++    jint si12, si20;
++    jint& stub_instNext = *(jint*)(branch+4);
++    split_simm32(offs, si12, si20);
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20);
++    masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12);
++    return;
++  } else if (high(stub_inst, 7) == lu12i_w_op) {
++    // long call (absolute)
++    CodeBuffer cb(branch, 3 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.call_long(target);
++    return;
++  }
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++bool MacroAssembler::reachable_from_branch_short(jlong offs) {
++  if (ForceUnreachable) {
++    return false;
++  }
++  return is_simm(offs >> 2, 26);
++}
++
++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) {
++  jint si18, si20;
++  guarantee(is_simm(offs, 38), "Not signed 38-bit offset");
++  split_simm38(offs, si18, si20);
++  pcaddu18i(T4, si20);
++  jirl(ra, T4, si18);
++}
++
++void MacroAssembler::patchable_jump(address target, bool force_patchable) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(target) != NULL,
++         "destination of jump not found in code cache");
++  if (force_patchable || patchable_branches()) {
++    jlong offs = target - pc();
++    if (reachable_from_branch_short(offs)) { // Short jump
++      b(offset26(target));
++      nop();
++    } else {                                 // Far jump
++      patchable_jump_far(R0, offs);
++    }
++  } else {                                   // Real short jump
++    b(offset26(target));
++  }
++}
++
++void MacroAssembler::patchable_call(address target, address call_site) {
++  jlong offs = target - (call_site ? call_site : pc());
++  if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call
++    nop();
++    bl((offs - BytesPerInstWord) >> 2);
++  } else {                                                    // Far call
++    patchable_jump_far(RA, offs);
++  }
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->output()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
++      }
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    bl(entry.target());
++  } else {
++    bl(pc());
++  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Start the stub
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  const int stub_start_offset = offset();
++
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  pcaddi(T4, 0);
++  ld_d(T4, T4, 16);
++  jr(T4);
++  nop();  //align
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  emit_int64((int64_t)dest);
++
++  const address stub_start_addr = addr_at(stub_start_offset);
++
++  NativeInstruction* ni = nativeInstruction_at(stub_start_addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline");
++
++  end_a_stub();
++  return stub_start_addr;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    beq(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    bne(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      blt(rs, rt, offset16(entry));
++    } else {
++      bltu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    blt_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      bge(rs, rt, offset16(entry));
++    } else {
++      bgeu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    bge_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    L.add_patch_at(code(), locator());
++    if (ForceUnreachable) {
++      patchable_jump_far(R0, 0);
++    } else {
++      b(0);
++    }
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  ldx_d(rt, base, offset);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  stx_d(rt, base, offset);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  li(tmp_reg1, inc);
++  li(tmp_reg2, counter_addr);
++  amadd_w(R0, tmp_reg1, tmp_reg2);
++}
++
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {
++  assert_different_registers(tmp, size, AT);
++  move(tmp, SP);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  Label loop;
++  li(AT, os::vm_page_size());
++  bind(loop);
++  sub_d(tmp, tmp, AT);
++  sub_d(size, size, AT);
++  st_d(size, tmp, 0);
++  blt(R0, size, loop);
++
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down to and including i=StackShadowPages.
++  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    sub_d(tmp, tmp, AT);
++    st_d(size, tmp, 0);
++  }
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  sub_d(AT, SP, AT);
++  blt(AT, R0,  no_reserved_zone_enabling);
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++void MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T4;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  if (!swap_reg_contains_mark) {
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markWord::biased_lock_mask_in_place);
++  addi_d(AT, R0, markWord::biased_lock_pattern);
++  sub_d(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on LA we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  li(AT, ~((int) markWord::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  li(AT, markWord::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  li(AT, markWord::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  li(AT, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++  b(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++
++  b(done);
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place);
++  addi_d(AT, R0, markWord::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  assert(number_of_arguments <= 4, "just check");
++  assert(StackAlignmentInBytes == 16, "must be");
++  move(AT, SP);
++  bstrins_d(SP, R0, 3, 0);
++  addi_d(SP, SP, -(StackAlignmentInBytes));
++  st_d(AT, SP, 0);
++  call(entry_point, relocInfo::runtime_call_type);
++  ld_d(SP, SP, 0);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_jump(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    assert(target(L) != NULL, "jmp most probably wrong");
++    patchable_jump(target(L), true /* force patchable */);
++  } else {
++    L.add_patch_at(code(), locator());
++    patchable_jump_far(R0, 0);
++  }
++}
++
++// Move an oop into a register.  immediate is true if we want
++// immediate instructions and nmethod entry barriers are not enabled.
++// i.e. we are not going to patch this instruction while the code is being
++// executed by another thread.
++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_oop_index(obj);
++  } else {
++#ifdef ASSERT
++    {
++      ThreadInVMfromUnknown tiv;
++      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
++    }
++#endif
++    oop_index = oop_recorder()->find_index(obj);
++  }
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  // nmethod entry barrier necessitate using the constant pool. They have to be
++  // ordered with respected to oop accesses.
++  // Using immediate literals would necessitate ISBs.
++  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
++    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
++    relocate(rspec);
++    patchable_li52(dst, (long)dummy);
++  } else {
++    relocate(rspec);
++    patchable_li52(dst, (long)obj);
++  }
++}
++
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(AT, (long)obj);
++  st_d(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short call (pc-rel)
++    bl(offset26(entry));
++  } else if (is_simm(offs, 38)) {          // Far call (pc-rel)
++    patchable_jump_far(RA, offs);
++  } else {                                 // Long call (absolute)
++    call_long(entry);
++  }
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh){
++  switch (rh.type()) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rh);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call_long(address entry) {
++  jlong value = (jlong)entry;
++  lu12i_w(T4, split_low20(value >> 12));
++  lu32i_d(T4, split_low20(value >> 32));
++  jirl(RA, T4, split_low12(value));
++}
++
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_li52(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  return trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++        BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++#ifndef PRODUCT
++  block_comment(msg);
++#endif
++  csrrd(R0, 0);
++  emit_int64((uintptr_t)msg);
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm(imm, 12)) {
++    addi_d(reg, reg, imm);
++  } else {
++    li(AT, imm);
++    add_d(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++void MacroAssembler::increment(Address addr, int imm) {
++  if (!imm) return;
++  assert(is_simm(imm, 12), "must be");
++  ld_ptr(AT, addr);
++  addi_d(AT, AT, imm);
++  st_ptr(AT, addr);
++}
++
++void MacroAssembler::decrement(Address addr, int imm) {
++  increment(addr, -imm);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  Label before_call;
++  bind(before_call);
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    lipc(AT, before_call);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  assert(StackAlignmentInBytes == 16, "must be");
++  bstrins_d(SP, R0, 3, 0);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    ld_w(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  addi_d(FP, SP, 2 * wordSize);
++}
++
++void MacroAssembler::leave() {
++  addi_d(SP, FP, -2 * wordSize);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
++  if (Assembler::is_simm(-framesize, 12)) {
++    addi_d(SP, SP, -framesize);
++    st_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    st_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    if (PreserveFramePointer)
++      addi_d(FP, SP, framesize);
++  } else {
++    addi_d(SP, SP, -2 * wordSize);
++    st_ptr(FP, Address(SP, 0 * wordSize));
++    st_ptr(RA, Address(SP, 1 * wordSize));
++    if (PreserveFramePointer)
++      addi_d(FP, SP, 2 * wordSize);
++    li(SCR1, framesize - 2 * wordSize);
++    sub_d(SP, SP, SCR1);
++  }
++  verify_cross_modify_fence_not_required();
++}
++
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  if (Assembler::is_simm(framesize, 12)) {
++    ld_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    ld_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    addi_d(SP, SP, framesize);
++  } else {
++    li(SCR1, framesize - 2 * wordSize);
++    add_d(SP, SP, SCR1);
++    ld_ptr(FP, Address(SP, 0 * wordSize));
++    ld_ptr(RA, Address(SP, 1 * wordSize));
++    addi_d(SP, SP, 2 * wordSize);
++  }
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T4;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  add_d(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  push_call_clobbered_registers_except(RegSet::of(V0));
++
++  push(S5);
++  move(S5, SP);
++  assert(StackAlignmentInBytes == 16, "must be");
++  bstrins_d(SP, R0, 3, 0);
++  // TODO: confirm reloc
++  call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type);
++  move(SP, S5);
++  pop(S5);
++
++  pop_call_clobbered_registers_except(RegSet::of(V0));
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool acquire, bool in_nmethod) {
++  if (acquire) {
++    ld_d(AT, thread_reg, in_bytes(JavaThread::polling_word_offset()));
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++  } else {
++    ld_d(AT, thread_reg, in_bytes(JavaThread::polling_word_offset()));
++  }
++  if (at_return) {
++    // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
++    // we may safely use the sp instead to perform the stack watermark check.
++    blt_far(AT, in_nmethod ? SP : FP, slow_path, false /* signed */);
++  } else {
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bnez(AT, slow_path);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc
++  lipc(AT, last_java_pc);
++  st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
++                                   JavaFrameAnchor::last_Java_pc_offset()));
++
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc);
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Register last_java_pc) {
++#ifndef OPT_THREAD
++  Register java_thread = T2;
++  get_thread(java_thread);
++#else
++  Register java_thread = TREG;
++#endif
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc->is_valid()) {
++    st_ptr(last_java_pc, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
++                                               JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Register t2,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    add_d(AT, AT, var_size_in_bytes);
++  } else {
++    addi_d(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, jlong value) {
++  jlong hi12 = bitfield(value, 52, 12);
++  jlong lo52 = bitfield(value,  0, 52);
++
++  if (hi12 != 0 && lo52 == 0) {
++    lu52i_d(rd, R0, hi12);
++  } else {
++    jlong hi20 = bitfield(value, 32, 20);
++    jlong lo20 = bitfield(value, 12, 20);
++    jlong lo12 = bitfield(value,  0, 12);
++
++    if (lo20 == 0) {
++      ori(rd, R0, lo12);
++    } else if (bitfield(simm12(lo12), 12, 20) == lo20) {
++      addi_w(rd, R0, simm12(lo12));
++    } else {
++      lu12i_w(rd, lo20);
++      if (lo12 != 0)
++        ori(rd, rd, lo12);
++    }
++    if (hi20 != bitfield(simm20(lo20), 20, 20))
++      lu32i_d(rd, hi20);
++    if (hi12 != bitfield(simm20(hi20), 20, 12))
++      lu52i_d(rd, rd, hi12);
++  }
++}
++
++void MacroAssembler::patchable_li52(Register rd, jlong value) {
++  int count = 0;
++
++  if (value <= max_jint && value >= min_jint) {
++    if (is_simm(value, 12)) {
++      addi_d(rd, R0, value);
++      count++;
++    } else if (is_uimm(value, 12)) {
++      ori(rd, R0, value);
++      count++;
++    } else {
++      lu12i_w(rd, split_low20(value >> 12));
++      count++;
++      if (split_low12(value)) {
++        ori(rd, rd, split_low12(value));
++        count++;
++      }
++    }
++  } else if (is_simm(value, 52)) {
++    lu12i_w(rd, split_low20(value >> 12));
++    count++;
++    if (split_low12(value)) {
++      ori(rd, rd, split_low12(value));
++      count++;
++    }
++    lu32i_d(rd, split_low20(value >> 32));
++    count++;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::lipc(Register rd, Label& L) {
++  if (L.is_bound()) {
++    jint offs = (target(L) - pc()) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    pcaddi(rd, offs);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    pcaddi(rd, 0);
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)CompressedKlassPointers::encode(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, narrowKlass);
++}
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++// ((WeakHandle)result).resolve();
++void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) {
++  assert_different_registers(rresult, rtmp);
++  Label resolved;
++
++  // A null weak handle resolves to null.
++  beqz(rresult, resolved);
++
++  // Only 64 bit platforms support GCs that require a tmp register
++  // Only IN_HEAP loads require a thread_tmp register
++  // WeakHandle::resolve is an indirection like jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 rresult, Address(rresult), rtmp, /*tmp_thread*/noreg);
++  bind(resolved);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  move(A1, reg);
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) return;
++
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 6 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++
++  // call indirectly to solve generation ordering problem
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++// used registers :  SCR1, SCR2
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++  Label exit, error;
++  // increment counter
++  li(SCR2, (long)StubRoutines::verify_oop_count_addr());
++  ld_w(SCR1, SCR2, 0);
++  addi_d(SCR1, SCR1, 1);
++  st_w(SCR1, SCR2, 0);
++
++  // make sure object is 'reasonable'
++  beqz(A1, exit);         // if obj is NULL it is ok
++
++#if INCLUDE_ZGC
++  if (UseZGC) {
++    // Check if mask is good.
++    // verifies that ZAddressBadMask & A1 == 0
++    ld_ptr(AT, Address(TREG, ZThreadLocalData::address_bad_mask_offset()));
++    andr(AT, A1, AT);
++    bnez(AT, error);
++  }
++#endif
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(SCR1, oop_mask);
++  andr(SCR2, A1, SCR1);
++  li(SCR1, oop_bits);
++  bne(SCR2, SCR1, error);
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  load_klass(SCR2, A1);
++  beqz(SCR2, error);                        // if klass is NULL it is broken
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++
++  // handle errors
++  bind(error);
++  push_call_clobbered_registers();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  pop_call_clobbered_registers();
++  jr(RA);
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    bgeu(t2, AT, next);
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    bgeu(AT, t2, ok);
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::bswap_h(Register dst, Register src) {
++  revb_2h(dst, src);
++  ext_w_h(dst, dst);  // sign extension of the lower 16 bits
++}
++
++void MacroAssembler::bswap_hu(Register dst, Register src) {
++  revb_2h(dst, src);
++  bstrpick_d(dst, dst, 15, 0);  // zero extension of the lower 16 bits
++}
++
++void MacroAssembler::bswap_w(Register dst, Register src) {
++  revb_2w(dst, src);
++  slli_w(dst, dst, 0);  // keep sign, clear upper bits
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier,
++                             bool weak, bool exchange) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  assert(addr.base() != resflag, "addr.base() != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_d(resflag, addr);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_d(resflag, addr);
++  if (weak) {
++    b(succ);
++  } else {
++    beqz(resflag, again);
++  }
++  if (exchange) {
++    move(resflag, oldval);
++  }
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  if (!exchange) {
++    move(resflag, R0);
++  }
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_d(tmp, addr);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_d(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier,
++                               bool weak, bool exchange) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  assert(addr.base() != resflag, "addr.base() != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_w(resflag, addr);
++  if (!sign)
++    lu32i_d(resflag, 0);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_w(resflag, addr);
++  if (weak) {
++    b(succ);
++  } else {
++    beqz(resflag, again);
++  }
++  if (exchange) {
++    move(resflag, oldval);
++  }
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  if (!exchange) {
++    move(resflag, R0);
++  }
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_w(tmp, addr);
++  if (!sign)
++    lu32i_d(tmp, 0);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_w(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++static RegSet caller_saved_regset = RegSet::range(A0, A7) + RegSet::range(T0, T8) + RegSet::of(FP, RA) - RegSet::of(SCR1, SCR2);
++static FloatRegSet caller_saved_fpu_regset = FloatRegSet::range(F0, F23);
++
++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
++  push(caller_saved_regset - exclude);
++  push_fpu(caller_saved_fpu_regset);
++}
++
++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
++  pop_fpu(caller_saved_fpu_regset);
++  pop(caller_saved_regset - exclude);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  addi_d(SP, SP, -16);
++  st_d(reg1, SP, 8);
++  st_d(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld_d(reg1, SP, 8);
++  ld_d(reg2, SP, 0);
++  addi_d(SP, SP, 16);
++}
++
++void MacroAssembler::push(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  addi_d(SP, SP, -align_up(count, 2) * wordSize);
++  for (int i = 0; i < count; i ++)
++    st_d(as_Register(regs[i]), SP, i * wordSize);
++}
++
++void MacroAssembler::pop(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  for (int i = 0; i < count; i ++)
++    ld_d(as_Register(regs[i]), SP, i * wordSize);
++  addi_d(SP, SP, align_up(count, 2) * wordSize);
++}
++
++void MacroAssembler::push_fpu(unsigned int bitset) {
++  unsigned char regs[32];
++  int count = 0;
++
++  if (bitset == 0)
++    return;
++
++  for (int reg = 0; reg <= 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  addi_d(SP, SP, -align_up(count, 2) * wordSize);
++  for (int i = 0; i < count; i++)
++    fst_d(as_FloatRegister(regs[i]), SP, i * wordSize);
++}
++
++void MacroAssembler::pop_fpu(unsigned int bitset) {
++  unsigned char regs[32];
++  int count = 0;
++
++  if (bitset == 0)
++    return;
++
++  for (int reg = 0; reg <= 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  for (int i = 0; i < count; i++)
++    fld_d(as_FloatRegister(regs[i]), SP, i * wordSize);
++  addi_d(SP, SP, align_up(count, 2) * wordSize);
++}
++
++static int vpr_offset(int off) {
++  int slots_per_vpr = 0;
++
++  if (UseLASX)
++    slots_per_vpr = FloatRegisterImpl::slots_per_lasx_register;
++  else if (UseLSX)
++    slots_per_vpr = FloatRegisterImpl::slots_per_lsx_register;
++
++  return off * slots_per_vpr * VMRegImpl::stack_slot_size;
++}
++
++void MacroAssembler::push_vp(unsigned int bitset) {
++  unsigned char regs[32];
++  int count = 0;
++
++  if (bitset == 0)
++    return;
++
++  for (int reg = 0; reg <= 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  addi_d(SP, SP, vpr_offset(-align_up(count, 2)));
++
++  for (int i = 0; i < count; i++) {
++    int off = vpr_offset(i);
++    if (UseLASX)
++      xvst(as_FloatRegister(regs[i]), SP, off);
++    else if (UseLSX)
++      vst(as_FloatRegister(regs[i]), SP, off);
++  }
++}
++
++void MacroAssembler::pop_vp(unsigned int bitset) {
++  unsigned char regs[32];
++  int count = 0;
++
++  if (bitset == 0)
++    return;
++
++  for (int reg = 0; reg <= 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  for (int i = 0; i < count; i++) {
++    int off = vpr_offset(i);
++    if (UseLASX)
++      xvld(as_FloatRegister(regs[i]), SP, off);
++    else if (UseLSX)
++      vld(as_FloatRegister(regs[i]), SP, off);
++  }
++
++  addi_d(SP, SP, vpr_offset(align_up(count, 2)));
++}
++
++void MacroAssembler::load_method_holder(Register holder, Register method) {
++  ld_d(holder, Address(method, Method::const_offset()));                      // ConstMethod*
++  ld_d(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
++  ld_d(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
++}
++
++void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
++  load_method_holder(rresult, rmethod);
++  ld_ptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld_d(dst, src, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    st_w(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    st_d(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld_d(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    st_w(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  sub_d(AT, r, S5_heapbase);
++  maskeqz(r, AT, r);
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(AT, src, S5_heapbase);
++  maskeqz(dst, AT, src);
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (CompressedOops::base() != NULL) {
++    sub_d(r, r, S5_heapbase);
++  }
++  if (CompressedOops::shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(dst, src, S5_heapbase);
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  move(AT, r);
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++      add_d(r, r, S5_heapbase);
++    }
++  } else {
++    add_d(r, r, S5_heapbase);
++  }
++  maskeqz(r, r, AT);
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  Register cond;
++  if (dst == src) {
++    cond = AT;
++    move(cond, src);
++  } else {
++    cond = src;
++  }
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++      add_d(dst, dst, S5_heapbase);
++    }
++  } else {
++    add_d(dst, src, S5_heapbase);
++  }
++  maskeqz(dst, dst, cond);
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    if (CompressedOops::base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        shl(r, LogMinObjAlignmentInBytes);
++        add_d(r, r, S5_heapbase);
++      }
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert(CompressedOops::base() == NULL, "sanity");
++  }
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    if (CompressedOops::base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        slli_d(dst, src, LogMinObjAlignmentInBytes);
++        add_d(dst, dst, S5_heapbase);
++      }
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert (CompressedOops::base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (CompressedKlassPointers::base() != NULL) {
++    if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
++        && CompressedKlassPointers::shift() == 0) {
++      bstrpick_d(r, r, 31, 0);
++      return;
++    }
++    assert(r != AT, "Encoding a klass in AT");
++    li(AT, (int64_t)CompressedKlassPointers::base());
++    sub_d(r, r, AT);
++  }
++  if (CompressedKlassPointers::shift() != 0) {
++    assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (CompressedKlassPointers::base() != NULL) {
++      if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
++          && CompressedKlassPointers::shift() == 0) {
++        bstrpick_d(dst, src, 31, 0);
++        return;
++      }
++      li(dst, (int64_t)CompressedKlassPointers::base());
++      sub_d(dst, src, dst);
++      if (CompressedKlassPointers::shift() != 0) {
++        assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (CompressedKlassPointers::shift() != 0) {
++        assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        srli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register r) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedKlassPointers::base() != NULL) {
++    if (CompressedKlassPointers::shift() == 0) {
++      if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0) {
++        lu32i_d(r, (uint64_t)CompressedKlassPointers::base() >> 32);
++      } else {
++        li(AT, (int64_t)CompressedKlassPointers::base());
++        add_d(r, r, AT);
++      }
++    } else {
++      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      li(AT, (int64_t)CompressedKlassPointers::base());
++      alsl_d(r, r, AT, Address::times_8 - 1);
++    }
++  } else {
++    if (CompressedKlassPointers::shift() != 0) {
++      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++      shl(r, LogKlassAlignmentInBytes);
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    if (CompressedKlassPointers::base() != NULL) {
++      if (CompressedKlassPointers::shift() == 0) {
++        if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0) {
++          move(dst, src);
++          lu32i_d(dst, (uint64_t)CompressedKlassPointers::base() >> 32);
++        } else {
++          li(dst, (int64_t)CompressedKlassPointers::base());
++          add_d(dst, dst, src);
++        }
++      } else {
++        assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++        li(dst, (int64_t)CompressedKlassPointers::base());
++        alsl_d(dst, src, dst, Address::times_8 - 1);
++      }
++    } else {
++      if (CompressedKlassPointers::shift() != 0) {
++        assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        slli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops) {
++    if (Universe::heap() != NULL) {
++      if (CompressedOops::base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        li(S5_heapbase, (int64_t)CompressedOops::ptrs_base());
++      }
++    } else {
++      li(S5_heapbase, (intptr_t)CompressedOops::ptrs_base_addr());
++      ld_d(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  // Check the supertype display:
++  if (must_load_sco) {
++    ld_wu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  add_d(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld_d(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    addi_d(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++    } else {
++      bne_far(AT, R0, *L_failure);
++      b(*L_slow_path);
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      b(*L_success);
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      b(*L_success);
++    }
++  }
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld_d(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  ld_w(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  ld_d(AT, temp_reg, 0);
++  addi_d(temp_reg, temp_reg, 1 * wordSize);
++  beq(AT, super_klass, subtype);
++  addi_d(temp2_reg, temp2_reg, -1);
++  b(Loop);
++
++  bind(subtype);
++  st_d(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
++  Register rthread = TREG;
++#ifndef OPT_THREAD
++  get_thread(rthread);
++#endif
++
++  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
++  assert_different_registers(klass, rthread, scratch);
++
++  Label L_fallthrough;
++  if (L_fast_path == NULL) {
++    L_fast_path = &L_fallthrough;
++  } else if (L_slow_path == NULL) {
++    L_slow_path = &L_fallthrough;
++  }
++
++  // Fast path check: class is fully initialized
++  ld_b(scratch, Address(klass, InstanceKlass::init_state_offset()));
++  addi_d(scratch, scratch, -InstanceKlass::fully_initialized);
++  beqz(scratch, *L_fast_path);
++
++  // Fast path check: current thread is initializer thread
++  ld_d(scratch, Address(klass, InstanceKlass::init_thread_offset()));
++  if (L_slow_path == &L_fallthrough) {
++    beq(rthread, scratch, *L_fast_path);
++    bind(*L_slow_path);
++  } else if (L_fast_path == &L_fallthrough) {
++    bne(rthread, scratch, *L_slow_path);
++    bind(*L_fast_path);
++  } else {
++    Unimplemented();
++  }
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  alsl_d(scale_reg, scale_reg, SP, scale_factor - 1);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld_d(dst, src); break;
++  case  4:  ld_w(dst, src); break;
++  case  2:  is_signed ? ld_h(dst, src) : ld_hu(dst, src); break;
++  case  1:  is_signed ? ld_b( dst, src) : ld_bu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  st_d(src, dst); break;
++  case  4:  st_w(src, dst); break;
++  case  2:  st_h(src, dst); break;
++  case  1:  st_b(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1);
++  addi_d(scan_temp, scan_temp, vtable_base);
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    if (itable_index.is_constant()) {
++      li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off);
++      add_d(recv_klass, recv_klass, AT);
++    } else {
++      assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++      alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1);
++      addi_d(recv_klass, AT, itentry_off);
++    }
++  }
++
++  Label search, found_method;
++
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++
++  bind(search);
++  // Check that the previous entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface);
++  addi_d(scan_temp, scan_temp, scan_step);
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
++
++  bind(found_method);
++  if (return_method) {
++    // Got a hit.
++    ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    ldx_d(method_result, recv_klass, scan_temp);
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++
++  if (vtable_index.is_constant()) {
++    li(AT, vtable_index.as_constant());
++    alsl_d(AT, AT, recv_klass, Address::times_ptr - 1);
++  } else {
++    alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1);
++  }
++
++  ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes());
++}
++
++void MacroAssembler::load_byte_map_base(Register reg) {
++  CardTable::CardValue* byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++
++  // Strictly speaking the byte_map_base isn't an address at all, and it might
++  // even be negative. It is thus materialised as a constant.
++  li(reg, (uint64_t)byte_map_base);
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  li(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::lea(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm(disp, 12)) {
++      addi_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      add_d(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm(disp, 12)) {
++        add_d(AT, base, index);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, base, AT);
++        add_d(dst, AT, index);
++      }
++    } else {
++      if (is_simm(disp, 12)) {
++        alsl_d(AT, index, base, scale - 1);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, AT, base);
++        alsl_d(dst, index, AT, scale - 1);
++      }
++    }
++  }
++}
++
++void MacroAssembler::lea(Register dst, AddressLiteral adr) {
++  code_section()->relocate(pc(), adr.rspec());
++  pcaddi(dst, (adr.target() - pc()) >> 2);
++}
++
++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos) >> 2;
++  switch(high(inst, 6)) {
++  case beq_op:
++  case bne_op:
++  case blt_op:
++  case bge_op:
++  case bltu_op:
++  case bgeu_op:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if(!is_simm16(v))
++    {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003ff;
++    inst |= ((v & 0xffff) << 10);
++    break;
++  case beqz_op:
++  case bnez_op:
++  case bccondz_op:
++    assert(is_simm(v, 21), "must be simm21");
++#ifndef PRODUCT
++    if(!is_simm(v, 21))
++    {
++      tty->print_cr("must be simm21");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003e0;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) );
++    break;
++  case b_op:
++  case bl_op:
++    assert(is_simm(v, 26), "must be simm26");
++#ifndef PRODUCT
++    if(!is_simm(v, 26))
++    {
++      tty->print_cr("must be simm26");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc000000;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) );
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++  return inst;
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src1,
++                              Register  src2,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  movgr2fr_d(tmp1, dst);
++  movgr2fr_d(tmp2, src);
++
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case NE:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GT:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GE:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case LT:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case LE:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++
++  movfr2gr_d(dst, tmp1);
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LT:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LE:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp) {
++  movgr2fr_w(tmp1, R0);
++
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::membar(Membar_mask_bits hint){
++  address prev = pc() - NativeInstruction::sync_instruction_size;
++  address last = code()->last_insn();
++  if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) {
++    code()->set_last_insn(NULL);
++    NativeMembar *membar = (NativeMembar*)prev;
++    // merged membar
++    // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore
++    membar->set_hint(membar->get_hint() & (~hint & 0xF));
++    block_comment("merged membar");
++  } else {
++    code()->set_last_insn(pc());
++    Assembler::membar(hint);
++  }
++}
++
++/**
++ * Emits code to update CRC-32 with a byte value according to constants in table
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]val       Register containing the byte to fold into the CRC.
++ * @param [in]table     Register containing the table of crc constants.
++ *
++ * uint32_t crc;
++ * val = crc_table[(val ^ crc) & 0xFF];
++ * crc = val ^ (crc >> 8);
++**/
++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
++  xorr(val, val, crc);
++  andi(val, val, 0xff);
++  ld_w(val, Address(table, val, Address::times_4, 0));
++  srli_w(crc, crc, 8);
++  xorr(crc, val, crc);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    nor(crc, crc, R0);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++    nor(crc, crc, R0);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crcc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crcc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crcc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++}
++
++// This method checks if provided byte array contains byte with highest bit set.
++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
++    Label Loop, End, Nega, Done;
++
++    orr(result, R0, R0);
++    bge(R0, len, Done);
++
++    li(AT, 0x8080808080808080);
++
++    addi_d(len, len, -8);
++    blt(len, R0, End);
++
++  bind(Loop);
++    ld_d(result, ary1, 0);
++    andr(result, result, AT);
++    bnez(result, Nega);
++    beqz(len, Done);
++    addi_d(len, len, -8);
++    addi_d(ary1, ary1, 8);
++    bge(len, R0, Loop);
++
++  bind(End);
++    ld_d(result, ary1, 0);
++    slli_d(len, len, 3);
++    sub_d(len, R0, len);
++    sll_d(result, result, len);
++    andr(result, result, AT);
++    beqz(result, Done);
++
++  bind(Nega);
++    ori(result, R0, 1);
++
++  bind(Done);
++}
++
++// Compress char[] to byte[]. len must be positive int.
++// jtreg: TestStringIntrinsicRangeChecks.java
++void MacroAssembler::char_array_compress(Register src, Register dst,
++                                         Register len, Register result,
++                                         Register tmp1, Register tmp2,
++                                         Register tmp3) {
++  Label Loop, Done, Once, Fail;
++
++  move(result, len);
++  bge(R0, result, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  li(tmp3, 0xff00ff00ff00ff00);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_d(tmp1, src, 0);
++    andr(tmp2, tmp3, tmp1);          // not latin-1, stop here
++    bnez(tmp2, Fail);
++
++    // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4
++    srli_d(tmp2, tmp1, 8);
++    orr(tmp2, tmp2, tmp1);           // 0x00a1a1b2b2c3c3d4
++    bstrpick_d(tmp1, tmp2, 47, 32);  // 0x0000a1b2
++    slli_d(tmp1, tmp1, 16);          // 0xa1b20000
++    bstrins_d(tmp1, tmp2, 15, 0);    // 0xa1b2c3d4
++
++    st_w(tmp1, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 4);
++    addi_d(src, src, 8);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_d(AT, src, 0);
++
++    bstrpick_d(tmp1, AT, 15, 0);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 31, 16);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 1);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 47, 32);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 2);
++    b(Done);
++
++  bind(Fail);
++    move(result, R0);
++
++  bind(Done);
++}
++
++// Inflate byte[] to char[]. len must be positive int.
++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java
++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
++                                        Register tmp1, Register tmp2) {
++  Label Loop, Once, Done;
++
++  bge(R0, len, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_wu(tmp1, src, 0);
++
++    // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 23, 16);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 39, 32);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 55, 48);
++
++    st_d(tmp2, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 8);
++    addi_d(src, src, 4);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_wu(tmp1, src, 0);
++
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    st_h(tmp2, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 15, 8);
++    st_h(tmp2, dst, 2);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 23, 16);
++    st_h(tmp2, dst, 4);
++
++  bind(Done);
++}
++
++// Intrinsic for
++//
++// - java.lang.StringCoding::implEncodeISOArray
++// - java.lang.StringCoding::implEncodeAsciiArray
++//
++// This version always returns the number of characters copied.
++void MacroAssembler::encode_iso_array(Register src, Register dst,
++                                      Register len, Register result,
++                                      Register tmp1, Register tmp2,
++                                      Register tmp3, bool ascii) {
++  Label Loop, Done, Once;
++
++  move(result, R0);                  // init in case of bad value
++  bge(R0, len, Done);
++
++  srai_w(AT, len, 2);
++
++  li(tmp3, ascii ? 0xff80ff80ff80ff80 : 0xff00ff00ff00ff00);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_d(tmp1, src, 0);
++    andr(tmp2, tmp3, tmp1);          // not latin-1, stop here
++    bnez(tmp2, Once);
++
++    // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4
++    srli_d(tmp2, tmp1, 8);
++    orr(tmp2, tmp2, tmp1);           // 0x00a1a1b2b2c3c3d4
++    bstrpick_d(tmp1, tmp2, 47, 32);  // 0x0000a1b2
++    slli_d(tmp1, tmp1, 16);          // 0xa1b20000
++    bstrins_d(tmp1, tmp2, 15, 0);    // 0xa1b2c3d4
++
++    stx_w(tmp1, dst, result);
++    addi_w(AT, AT, -1);
++    addi_d(src, src, 8);
++    addi_w(result, result, 4);
++    b(Loop);
++
++  bind(Once);
++    beq(len, result, Done);
++    ld_hu(tmp1, src, 0);
++    andr(tmp2, tmp3, tmp1);          // not latin-1, stop here
++    bnez(tmp2, Done);
++    stx_b(tmp1, dst, result);
++    addi_d(src, src, 2);
++    addi_w(result, result, 1);
++    b(Once);
++
++  bind(Done);
++}
++
++// Code for BigInteger::mulAdd intrinsic
++// out     = A0
++// in      = A1
++// offset  = A2  (already out.length-offset)
++// len     = A3
++// k       = A4
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k) {
++  Label L_tail_loop, L_unroll, L_end;
++
++  move(SCR2, out);
++  move(out, R0); // should clear out
++  bge(R0, len, L_end);
++
++  alsl_d(offset, offset, SCR2, LogBytesPerInt - 1);
++  alsl_d(in, len, in, LogBytesPerInt - 1);
++
++  const int unroll = 16;
++  li(SCR2, unroll);
++  blt(len, SCR2, L_tail_loop);
++
++  bind(L_unroll);
++
++    addi_d(in, in, -unroll * BytesPerInt);
++    addi_d(offset, offset, -unroll * BytesPerInt);
++
++    for (int i = unroll - 1; i >= 0; i--) {
++      ld_wu(SCR1, in, i * BytesPerInt);
++      mulw_d_wu(SCR1, SCR1, k);
++      add_d(out, out, SCR1); // out as scratch
++      ld_wu(SCR1, offset, i * BytesPerInt);
++      add_d(SCR1, SCR1, out);
++      st_w(SCR1, offset, i * BytesPerInt);
++      srli_d(out, SCR1, 32); // keep carry
++    }
++
++    sub_w(len, len, SCR2);
++    bge(len, SCR2, L_unroll);
++
++  bge(R0, len, L_end); // check tail
++
++  bind(L_tail_loop);
++
++    addi_d(in, in, -BytesPerInt);
++    ld_wu(SCR1, in, 0);
++    mulw_d_wu(SCR1, SCR1, k);
++    add_d(out, out, SCR1); // out as scratch
++
++    addi_d(offset, offset, -BytesPerInt);
++    ld_wu(SCR1, offset, 0);
++    add_d(SCR1, SCR1, out);
++    st_w(SCR1, offset, 0);
++
++    srli_d(out, SCR1, 32); // keep carry
++
++    addi_w(len, len, -1);
++    blt(R0, len, L_tail_loop);
++
++  bind(L_end);
++}
++
++#ifndef PRODUCT
++void MacroAssembler::verify_cross_modify_fence_not_required() {
++  if (VerifyCrossModifyFence) {
++    // Check if thread needs a cross modify fence.
++    ld_bu(SCR1, Address(TREG, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
++    Label fence_not_required;
++    beqz(SCR1, fence_not_required);
++    // If it does then fail.
++    move(A0, TREG);
++    call(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure));
++    bind(fence_not_required);
++  }
++}
++#endif
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+new file mode 100644
+index 00000000000..c24d8a4712a
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+@@ -0,0 +1,754 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ public:
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++ protected:
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  // void incrementl(Register reg, int value = 1);
++  // void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Register last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_weak_handle(Register result, Register tmp);
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  void load_method_holder_cld(Register rresult, Register rmethod);
++  void load_method_holder(Register holder, Register method);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++  // Sign extension
++  void sign_extend_short(Register reg) { ext_w_h(reg, reg); }
++  void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++  void clinit_barrier(Register klass,
++                      Register scratch,
++                      Label* L_fast_path = NULL,
++                      Label* L_slow_path = NULL);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 2048) {
++      st_w(RA0, SP, -offset);
++    } else if (offset <= 32768 && !(offset & 3)) {
++      stptr_w(RA0, SP, -offset);
++    } else {
++      li(AT, offset);
++      sub_d(AT, SP, AT);
++      st_w(RA0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool acquire, bool in_nmethod);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  void biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++  void increment(Address addr, int imm = 1);
++  void decrement(Address addr, int imm = 1);
++  void shl(Register reg, int sa)        { slli_d(reg, reg, sa); }
++  void shr(Register reg, int sa)        { srli_d(reg, reg, sa); }
++  void sar(Register reg, int sa)        { srai_d(reg, reg, sa); }
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++  void call_long(address entry);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++
++  static bool far_branches() {
++    if (ForceUnreachable) {
++      return true;
++    } else {
++      return ReservedCodeCacheSize > branch_range;
++    }
++  }
++
++  // Emit the CompiledIC call idiom
++  address ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // patchable
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  void blt_far    (Register rs, Register rt, address entry, bool is_signed);
++  void blt_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  void bge_far    (Register rs, Register rt, address entry, bool is_signed);
++  void bge_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  static bool patchable_branches() {
++    const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++    return ReservedCodeCacheSize > branch_range;
++  }
++
++  static bool reachable_from_branch_short(jlong offs);
++
++  void patchable_jump_far(Register ra, jlong offs);
++  void patchable_jump(address target, bool force_patchable = false);
++  void patchable_call(address target, address call_size = 0);
++
++  // Floating
++  void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
++                          address pio2, address dsin_coef, address dcos_coef);
++
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld_d(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld_d(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    st_d(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    st_d(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  void bswap_h(Register dst, Register src);
++  void bswap_hu(Register dst, Register src);
++
++  // convert big endian integer to little endian integer
++  void bswap_w(Register dst, Register src);
++
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier, bool weak = false, bool exchange = false);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = nullptr);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier, bool weak = false, bool exchange = false);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = nullptr);
++
++  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
++  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
++  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
++  void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); }
++  void pop  (Register reg)      { ld_d  (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  (FloatRegister reg) { fld_d (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  ()                  { addi_d(SP, SP, 8); }
++  void pop2 ()                  { addi_d(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except SCR1 and SCR2.  (They are always scratch,
++  // so we don't have to protect them.)  Only save the lower 64 bits
++  // of each vector register. Additional registers can be excluded
++  // in a passed RegSet.
++  void push_call_clobbered_registers_except(RegSet exclude);
++  void pop_call_clobbered_registers_except(RegSet exclude);
++
++  void push_call_clobbered_registers() {
++    push_call_clobbered_registers_except(RegSet());
++  }
++  void pop_call_clobbered_registers() {
++    pop_call_clobbered_registers_except(RegSet());
++  }
++  void push(RegSet regs) { if (regs.bits()) push(regs.bits()); }
++  void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); }
++  void push_fpu(FloatRegSet regs) { if (regs.bits()) push_fpu(regs.bits()); }
++  void pop_fpu(FloatRegSet regs) { if (regs.bits()) pop_fpu(regs.bits()); }
++  void push_vp(FloatRegSet regs) { if (regs.bits()) push_vp(regs.bits()); }
++  void pop_vp(FloatRegSet regs) { if (regs.bits()) pop_vp(regs.bits()); }
++
++  void li(Register rd, jlong value);
++  void li(Register rd, address addr) { li(rd, (long)addr); }
++  void patchable_li52(Register rd, jlong value);
++  void lipc(Register rd, Label& L);
++
++  void move(Register rd, Register rs)   { orr(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { add_w(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  // Load the base of the cardtable byte map into reg.
++  void load_byte_map_base(Register reg);
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++
++  // LA added:
++  void jr  (Register reg)        { jirl(R0, reg, 0); }
++  void jalr(Register reg)        { jirl(RA, reg, 0); }
++  void nop ()                    { andi(R0, R0, 0); }
++  void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); }
++  void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); }
++  void orr (Register rd, Register rj, Register rk) {  OR(rd, rj, rk); }
++  void lea (Register rd, Address src);
++  void lea(Register dst, AddressLiteral adr);
++  static int  patched_branch(int dest_pos, int inst, int inst_pos);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src1,
++                Register        src2,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ);
++
++  void membar(Membar_mask_bits hint);
++
++  void bind(Label& L) {
++    Assembler::bind(L);
++    code()->clear_last_insn();
++  }
++
++  // CRC32 code for java.util.zip.CRC32::update() instrinsic.
++  void update_byte_crc32(Register crc, Register val, Register table);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void kernel_crc32(Register crc, Register buf, Register len, Register tmp);
++
++  // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic.
++  void kernel_crc32c(Register crc, Register buf, Register len, Register tmp);
++
++  // Code for java.lang.StringCoding::hasNegatives() instrinsic.
++  void has_negatives(Register ary1, Register len, Register result);
++
++  // Code for java.lang.StringUTF16::compress intrinsic.
++  void char_array_compress(Register src, Register dst, Register len,
++                           Register result, Register tmp1,
++                           Register tmp2, Register tmp3);
++
++  // Code for java.lang.StringLatin1::inflate intrinsic.
++  void byte_array_inflate(Register src, Register dst, Register len,
++                          Register tmp1, Register tmp2);
++
++  // Encode UTF16 to ISO_8859_1 or ASCII.
++  // Return len on success or position of first mismatch.
++  void encode_iso_array(Register src, Register dst,
++                        Register len, Register result,
++                        Register tmp1, Register tmp2,
++                        Register tmp3, bool ascii);
++
++  // Code for java.math.BigInteger::mulAdd intrinsic.
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k);
++
++  void movoop(Register dst, jobject obj, bool immediate = false);
++
++#undef VIRTUAL
++
++private:
++  void push(unsigned int bitset);
++  void pop(unsigned int bitset);
++  void push_fpu(unsigned int bitset);
++  void pop_fpu(unsigned int bitset);
++  void push_vp(unsigned int bitset);
++  void pop_vp(unsigned int bitset);
++
++  // Check the current thread doesn't need a cross modify fence.
++  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
++  void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
++  void generate_kernel_cos(FloatRegister x, address dcos_coef);
++  void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
++  void generate__kernel_rem_pio2(address two_over_pi, address pio2);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->ld_b(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++struct tableswitch {
++  Register _reg;
++  int _insn_index; jint _first_key; jint _last_key;
++  Label _after;
++  Label _branches;
++};
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..49302590c37
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+new file mode 100644
+index 00000000000..63b5b0da7e7
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+@@ -0,0 +1,1633 @@
++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "macroAssembler_loongarch.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// The following code is a optimized version of fdlibm sin/cos implementation
++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64.
++
++// Please refer to sin/cos approximation via polynomial and
++// trigonometric argument reduction techniques to the following literature:
++//
++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin,
++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond,
++// Nathalie Revol, Damien Stehlé, and Serge Torres:
++// Handbook of floating-point arithmetic.
++// Springer Science & Business Media, 2009.
++// [2] K. C. Ng
++// Argument Reduction for Huge Arguments: Good to the Last Bit
++// July 13, 1992, SunPro
++//
++// HOW TO READ THIS CODE:
++// This code consists of several functions. Each function has following header:
++// 1) Description
++// 2) C-pseudo code with differences from fdlibm marked by comments starting
++//        with "NOTE". Check unmodified fdlibm code in
++//        share/runtime/SharedRuntimeTrig.cpp
++// 3) Brief textual description of changes between fdlibm and current
++//        implementation along with optimization notes (if applicable)
++// 4) Assumptions, input and output
++// 5) (Optional) additional notes about intrinsic implementation
++// Each function is separated in blocks which follow the pseudo-code structure
++//
++// HIGH-LEVEL ALGORITHM DESCRIPTION:
++//    - entry point: generate_dsin_dcos(...);
++//    - check corner cases: NaN, INF, tiny argument.
++//    - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos)
++//    -- else proceed to argument reduction routine (__ieee754_rem_pio2) and
++//           use reduced argument to get result via kernel_sin/kernel_cos
++//
++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM:
++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version
++// has these int values converted to double representation to load converted
++// double values directly (see stubRoutines_aarch4::_two_over_pi)
++// 2) Several loops are unrolled and vectorized: see comments in code after
++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2
++// 3) fdlibm npio2_hw table now has "prefix" with constants used in
++// calculation. These constants are loaded from npio2_hw table instead of
++// constructing it in code (see stubRoutines_loongarch64.cpp)
++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef
++// and cos_coef to use the same optimization as in 3). It allows to load most of
++// required constants via single instruction
++//
++//
++//
++///* __ieee754_rem_pio2(x,y)
++// *
++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2)
++// * x is input argument, y[] is hi and low parts of reduced argument (x)
++// * uses __kernel_rem_pio2()
++// */
++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw
++//
++// BEGIN __ieee754_rem_pio2 PSEUDO CODE
++//
++//static int __ieee754_rem_pio2(double x, double *y) {
++//  double z,w,t,r,fn;
++//  double tx[3];
++//  int e0,i,j,nx,n,ix,hx,i0;
++//
++//  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
++//  hx = *(i0+(int*)&x);          /* high word of x */
++//  ix = hx&0x7fffffff;
++//  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
++//    if(hx>0) {
++//      z = x - pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z - pio2_1t;
++//        y[1] = (z-y[0])-pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z -= pio2_2;
++//        y[0] = z - pio2_2t;
++//        y[1] = (z-y[0])-pio2_2t;
++//      }
++//      return 1;
++//    } else {    /* negative x */
++//      z = x + pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z + pio2_1t;
++//        y[1] = (z-y[0])+pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z += pio2_2;
++//        y[0] = z + pio2_2t;
++//        y[1] = (z-y[0])+pio2_2t;
++//      }
++//      return -1;
++//    }
++//  }
++//  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
++//    t  = fabsd(x);
++//    n  = (int) (t*invpio2+half);
++//    fn = (double)n;
++//    r  = t-fn*pio2_1;
++//    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
++//    // NOTE: y[0] = r-w; is moved from if/else below to be before "if"
++//    y[0] = r-w;
++//    if(n<32&&ix!=npio2_hw[n-1]) {
++//      // y[0] = r-w;       /* quick check no cancellation */ // NOTE: moved earlier
++//    } else {
++//      j  = ix>>20;
++//      // y[0] = r-w; // NOTE: moved earlier
++//      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//      if(i>16) {  /* 2nd iteration needed, good to 118 */
++//        t  = r;
++//        w  = fn*pio2_2;
++//        r  = t-w;
++//        w  = fn*pio2_2t-((t-r)-w);
++//        y[0] = r-w;
++//        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
++//          t  = r;       /* will cover all possible cases */
++//          w  = fn*pio2_3;
++//          r  = t-w;
++//          w  = fn*pio2_3t-((t-r)-w);
++//          y[0] = r-w;
++//        }
++//      }
++//    }
++//    y[1] = (r-y[0])-w;
++//    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//    else         return n;
++//  }
++//  /*
++//   * all other (large) arguments
++//   */
++//  // NOTE: this check is removed, because it was checked in dsin/dcos
++//  // if(ix>=0x7ff00000) {          /* x is inf or NaN */
++//  //  y[0]=y[1]=x-x; return 0;
++//  // }
++//  /* set z = scalbn(|x|,ilogb(x)-23) */
++//  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
++//  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
++//  *(i0+(int*)&z) = ix - (e0<<20);
++//
++//  // NOTE: "for" loop below in unrolled. See comments in asm code
++//  for(i=0;i<2;i++) {
++//    tx[i] = (double)((int)(z));
++//    z     = (z-tx[i])*two24A;
++//  }
++//
++//  tx[2] = z;
++//  nx = 3;
++//
++//  // NOTE: while(tx[nx-1]==zeroA) nx--;  is unrolled. See comments in asm code
++//  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
++//
++//  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
++//  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//  return n;
++//}
++//
++// END __ieee754_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
++//     1. INF/NaN check for huge argument is removed in comparison with fdlibm
++//     code, because this check is already done in dcos/dsin code
++//     2. Most constants are now loaded from table instead of direct initialization
++//     3. Two loops are unrolled
++// Assumptions:
++//     1. Assume |X| >= PI/4
++//     2. Assume SCR1 = 0x3fe921fb00000000  (~ PI/4)
++//     3. Assume ix = A3
++// Input and output:
++//     1. Input: X = A0
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) {
++  const int64_t PIO2_1t = 0x3DD0B4611A626331ULL;
++  const int64_t PIO2_2  = 0x3DD0B4611A600000ULL;
++  const int64_t PIO2_2t = 0x3BA3198A2E037073ULL;
++  Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
++        REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
++        X_IS_NEGATIVE_LONG_PI;
++  Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15;
++
++  push2(S0, S1);
++
++    // initializing constants first
++    li(SCR1, 0x3ff921fb54400000); // PIO2_1
++    li(SCR2, 0x4002d97c); // 3*PI/4 high word
++    movgr2fr_d(v1, SCR1); // v1 = PIO2_1
++    bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE);
++
++    block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
++      blt(X, R0, X_IS_NEGATIVE);
++
++      block_comment("if(hx>0) {"); {
++        fsub_d(v2, v0, v1); // v2 = z = x - pio2_1
++        srli_d(SCR1, SCR1, 32);
++        li(n, 1);
++        beq(ix, SCR1, X_IS_POSITIVE_LONG_PI);
++
++        block_comment("case: hx > 0 &&  ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */
++          li(SCR2, PIO2_1t);
++          movgr2fr_d(v27, SCR2);
++          fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t;
++          fsub_d(v5, v2, v4);
++          fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t
++          b(REDUCTION_DONE);
++        }
++
++        block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */
++          bind(X_IS_POSITIVE_LONG_PI);
++            li(SCR1, PIO2_2);
++            li(SCR2, PIO2_2t);
++            movgr2fr_d(v27, SCR1);
++            movgr2fr_d(v6, SCR2);
++            fsub_d(v2, v2, v27); // z-= pio2_2
++            fsub_d(v4, v2, v6);  // y[0] = z - pio2_2t
++            fsub_d(v5, v2, v4);
++            fsub_d(v5, v5, v6);  // v5 = (z - y[0]) - pio2_2t
++            b(REDUCTION_DONE);
++        }
++      }
++
++      block_comment("case: hx <= 0)"); {
++        bind(X_IS_NEGATIVE);
++          fadd_d(v2, v0, v1); // v2 = z = x + pio2_1
++          srli_d(SCR1, SCR1, 32);
++          li(n, -1);
++          beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI);
++
++          block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */
++            li(SCR2, PIO2_1t);
++            movgr2fr_d(v27, SCR2);
++            fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t;
++            fsub_d(v5, v2, v4);
++            fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t
++            b(REDUCTION_DONE);
++          }
++
++          block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */
++            bind(X_IS_NEGATIVE_LONG_PI);
++              li(SCR1, PIO2_2);
++              li(SCR2, PIO2_2t);
++              movgr2fr_d(v27, SCR1);
++              movgr2fr_d(v6, SCR2);
++              fadd_d(v2, v2, v27); // z += pio2_2
++              fadd_d(v4, v2, v6);  // y[0] = z + pio2_2t
++              fsub_d(v5, v2, v4);
++              fadd_d(v5, v5, v6);  // v5 = (z - y[0]) + pio2_2t
++              b(REDUCTION_DONE);
++          }
++      }
++  }
++  bind(X_IS_MEDIUM_OR_LARGE);
++    li(SCR1, 0x413921fb);
++    blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ?
++
++    block_comment("|x| ~<= 2^19*(pi/2), medium size"); {
++      li(ih, npio2_hw);
++      fld_d(v4, ih, 0);
++      fld_d(v5, ih, 8);
++      fld_d(v6, ih, 16);
++      fld_d(v7, ih, 24);
++      fabs_d(v31, v0);           // v31 = t = |x|
++      addi_d(ih, ih, 64);
++      fmadd_d(v2, v31, v5, v4);  // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5)
++      ftintrz_w_d(vt, v2);       // n = (int) v2
++      movfr2gr_s(n, vt);
++      vfrintrz_d(v2, v2);
++      fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1
++      fmul_d(v26, v2, v7);       // v26 = w = fn * pio2_1t
++      fsub_d(v4, v3, v26);       // y[0] = r - w. Calculated before branch
++      li(SCR1, 32);
++      blt(SCR1, n, LARGE_ELSE);
++      addi_w(tmp5, n, -1);       // tmp5 = n - 1
++      alsl_d(tmp5, tmp5, ih, 2 - 1);
++      ld_w(jv, tmp5, 0);
++      bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE);
++
++      block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); {
++        bind(LARGE_ELSE);
++          movfr2gr_d(jx, v4);
++          srli_d(tmp5, ix, 20);                    // j = ix >> 20
++          slli_d(jx, jx, 1);
++          srli_d(tmp3, jx, 32 + 20 + 1);           // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++          sub_d(tmp3, tmp5, tmp3);
++
++          block_comment("if(i>16)"); {
++            li(SCR1, 16);
++            bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++            // i > 16. 2nd iteration needed
++            fld_d(v6, ih, -32);
++            fld_d(v7, ih, -24);
++            fmov_d(v28, v3);                        // t = r
++            fmul_d(v29, v2, v6);                    // w = v29 = fn * pio2_2
++            fsub_d(v3, v28, v29);                   // r = t - w
++            fsub_d(v31, v28, v3);                   // v31 = (t - r)
++            fsub_d(v31, v29, v31);                  // v31 = w - (t - r) = - ((t - r) - w)
++            fmadd_d(v26, v2, v7, v31);              // v26 = w = fn*pio2_2t - ((t - r) - w)
++            fsub_d(v4, v3, v26);                    // y[0] = r - w
++            movfr2gr_d(jx, v4);
++            slli_d(jx, jx, 1);
++            srli_d(tmp3, jx, 32 + 20 + 1);          // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++            sub_d(tmp3, tmp5, tmp3);
++
++            block_comment("if(i>49)"); {
++              li(SCR1, 49);
++              bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++              // 3rd iteration need, 151 bits acc
++              fld_d(v6, ih, -16);
++              fld_d(v7, ih, -8);
++              fmov_d(v28, v3);                      // save "r"
++              fmul_d(v29, v2, v6);                  // v29 = fn * pio2_3
++              fsub_d(v3, v28, v29);                 // r = r - w
++              fsub_d(v31, v28, v3);                 // v31 = (t - r)
++              fsub_d(v31, v29, v31);                // v31 = w - (t - r) = - ((t - r) - w)
++              fmadd_d(v26, v2, v7, v31);            // v26 = w = fn*pio2_3t - ((t - r) - w)
++              fsub_d(v4, v3, v26);                  // y[0] = r - w
++            }
++          }
++      }
++    block_comment("medium x tail"); {
++      bind(X_IS_MEDIUM_BRANCH_DONE);
++        fsub_d(v5, v3, v4);                         // v5 = y[1] = (r - y[0])
++        fsub_d(v5, v5, v26);                        // v5 = y[1] = (r - y[0]) - w
++        blt(R0, X, REDUCTION_DONE);
++        fneg_d(v4, v4);
++        sub_w(n, R0, n);
++        fneg_d(v5, v5);
++        b(REDUCTION_DONE);
++    }
++  }
++
++  block_comment("all other (large) arguments"); {
++    bind(X_IS_LARGE);
++      srli_d(SCR1, ix, 20);                        // ix >> 20
++      li(tmp5, 0x4170000000000000);
++      addi_w(SCR1, SCR1, -1046);                   // e0
++      movgr2fr_d(v24, tmp5);                       // init two24A value
++      slli_w(jv, SCR1, 20);                        // ix - (e0<<20)
++      sub_w(jv, ix, jv);
++      slli_d(jv, jv, 32);
++      addi_w(SCR2, SCR1, -3);
++      bstrins_d(jv, X, 31, 0);                     // jv = z
++      li(i, 24);
++      movgr2fr_d(v26, jv);                         // v26 = z
++
++      block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); {
++        // tx[0,1,2] = v6,v7,v26
++        vfrintrz_d(v6, v26);                       // v6 = (double)((int)v26)
++        div_w(jv, SCR2, i);                        // jv = (e0 - 3)/24
++        fsub_d(v26, v26, v6);
++        addi_d(SP, SP, -560);
++        fmul_d(v26, v26, v24);
++        vfrintrz_d(v7, v26);                       // v7 = (double)((int)v26)
++        li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop
++        fsub_d(v26, v26, v7);
++      }
++
++      block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); {
++        vxor_v(vt, vt, vt);
++        fcmp_cne_d(FCC0, v26, vt);                 // if NE then jx == 2. else it's 1 or 0
++        addi_d(iqBase, SP, 480);                   // base of iq[]
++        fmul_d(v3, v26, v24);
++        bcnez(FCC0, NX_SET);
++        fcmp_cne_d(FCC0, v7, vt);                  // v7 == 0 => jx = 0. Else jx = 1
++        movcf2gr(jx, FCC0);
++      }
++    bind(NX_SET);
++      generate__kernel_rem_pio2(two_over_pi, pio2);
++      // now we have y[0] = v4, y[1] = v5 and n = r2
++      bge(X, R0, REDUCTION_DONE);
++      fneg_d(v4, v4);
++      fneg_d(v5, v5);
++      sub_w(n, R0, n);
++  }
++  bind(REDUCTION_DONE);
++
++  pop2(S0, S1);
++}
++
++///*
++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
++// * double x[],y[]; int e0,nx,prec; int ipio2[];
++// *
++// * __kernel_rem_pio2 return the last three digits of N with
++// *              y = x - N*pi/2
++// * so that |y| < pi/2.
++// *
++// * The method is to compute the integer (mod 8) and fraction parts of
++// * (2/pi)*x without doing the full multiplication. In general we
++// * skip the part of the product that are known to be a huge integer (
++// * more accurately, = 0 mod 8 ). Thus the number of operations are
++// * independent of the exponent of the input.
++// *
++// * NOTE: 2/pi int representation is converted to double
++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[].
++// *
++// * Input parameters:
++// *      x[]     The input value (must be positive) is broken into nx
++// *              pieces of 24-bit integers in double precision format.
++// *              x[i] will be the i-th 24 bit of x. The scaled exponent
++// *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
++// *              match x's up to 24 bits.
++// *
++// *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
++// *                      e0 = ilogb(z)-23
++// *                      z  = scalbn(z,-e0)
++// *              for i = 0,1,2
++// *                      x[i] = floor(z)
++// *                      z    = (z-x[i])*2**24
++// *
++// *
++// *      y[]     ouput result in an array of double precision numbers.
++// *              The dimension of y[] is:
++// *                      24-bit  precision       1
++// *                      53-bit  precision       2
++// *                      64-bit  precision       2
++// *                      113-bit precision       3
++// *              The actual value is the sum of them. Thus for 113-bit
++// *              precsion, one may have to do something like:
++// *
++// *              long double t,w,r_head, r_tail;
++// *              t = (long double)y[2] + (long double)y[1];
++// *              w = (long double)y[0];
++// *              r_head = t+w;
++// *              r_tail = w - (r_head - t);
++// *
++// *      e0      The exponent of x[0]
++// *
++// *      nx      dimension of x[]
++// *
++// *      prec    an interger indicating the precision:
++// *                      0       24  bits (single)
++// *                      1       53  bits (double)
++// *                      2       64  bits (extended)
++// *                      3       113 bits (quad)
++// *
++// *      NOTE: ipio2[] array below is converted to double representation
++// *      //ipio2[]
++// *      //        integer array, contains the (24*i)-th to (24*i+23)-th
++// *      //        bit of 2/pi after binary point. The corresponding
++// *      //        floating value is
++// *
++// *                      ipio2[i] * 2^(-24(i+1)).
++// *
++// * Here is the description of some local variables:
++// *
++// *      jk      jk+1 is the initial number of terms of ipio2[] needed
++// *              in the computation. The recommended value is 2,3,4,
++// *              6 for single, double, extended,and quad.
++// *
++// *      jz      local integer variable indicating the number of
++// *              terms of ipio2[] used.
++// *
++// *      jx      nx - 1
++// *
++// *      jv      index for pointing to the suitable ipio2[] for the
++// *              computation. In general, we want
++// *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
++// *              is an integer. Thus
++// *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
++// *              Hence jv = max(0,(e0-3)/24).
++// *
++// *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
++// *
++// *      q[]     double array with integral value, representing the
++// *              24-bits chunk of the product of x and 2/pi.
++// *
++// *      q0      the corresponding exponent of q[0]. Note that the
++// *              exponent for q[i] would be q0-24*i.
++// *
++// *      PIo2[]  double precision array, obtained by cutting pi/2
++// *              into 24 bits chunks.
++// *
++// *      f[]     ipio2[] in floating point
++// *
++// *      iq[]    integer array by breaking up q[] in 24-bits chunk.
++// *
++// *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
++// *
++// *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
++// *              it also indicates the *sign* of the result.
++// *
++// */
++//
++// Use PIo2 table(see stubRoutines_loongarch64.cpp)
++//
++// BEGIN __kernel_rem_pio2 PSEUDO CODE
++//
++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) {
++//  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
++//  double z,fw,f[20],fq[20],q[20];
++//
++//  /* initialize jk*/
++//  // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4.
++//  jp = jk; // NOTE: always 4
++//
++//  /* determine jx,jv,q0, note that 3>q0 */
++//  jx =  nx-1;
++//  jv = (e0-3)/24; if(jv<0) jv=0;
++//  q0 =  e0-24*(jv+1);
++//
++//  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
++//  j = jv-jx; m = jx+jk;
++//
++//  // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It
++//  //       allows the use of wider loads/stores
++//  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j];
++//
++//  // NOTE: unrolled and vectorized "for". See comments in asm code
++//  /* compute q[0],q[1],...q[jk] */
++//  for (i=0;i<=jk;i++) {
++//    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++//  }
++//
++//  jz = jk;
++//recompute:
++//  /* distill q[] into iq[] reversingly */
++//  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++//    fw    =  (double)((int)(twon24* z));
++//    iq[i] =  (int)(z-two24B*fw);
++//    z     =  q[j-1]+fw;
++//  }
++//
++//  /* compute n */
++//  z  = scalbnA(z,q0);           /* actual value of z */
++//  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
++//  n  = (int) z;
++//  z -= (double)n;
++//  ih = 0;
++//  if(q0>0) {    /* need iq[jz-1] to determine n */
++//    i  = (iq[jz-1]>>(24-q0)); n += i;
++//    iq[jz-1] -= i<<(24-q0);
++//    ih = iq[jz-1]>>(23-q0);
++//  }
++//  else if(q0==0) ih = iq[jz-1]>>23;
++//  else if(z>=0.5) ih=2;
++//
++//  if(ih>0) {    /* q > 0.5 */
++//    n += 1; carry = 0;
++//    for(i=0;i<jz ;i++) {        /* compute 1-q */
++//      j = iq[i];
++//      if(carry==0) {
++//        if(j!=0) {
++//          carry = 1; iq[i] = 0x1000000- j;
++//        }
++//      } else  iq[i] = 0xffffff - j;
++//    }
++//    if(q0>0) {          /* rare case: chance is 1 in 12 */
++//      switch(q0) {
++//      case 1:
++//        iq[jz-1] &= 0x7fffff; break;
++//      case 2:
++//        iq[jz-1] &= 0x3fffff; break;
++//      }
++//    }
++//    if(ih==2) {
++//      z = one - z;
++//      if(carry!=0) z -= scalbnA(one,q0);
++//    }
++//  }
++//
++//  /* check if recomputation is needed */
++//  if(z==zeroB) {
++//    j = 0;
++//    for (i=jz-1;i>=jk;i--) j |= iq[i];
++//    if(j==0) { /* need recomputation */
++//      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
++//
++//      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
++//        f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++//        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++//        q[i] = fw;
++//      }
++//      jz += k;
++//      goto recompute;
++//    }
++//  }
++//
++//  /* chop off zero terms */
++//  if(z==0.0) {
++//    jz -= 1; q0 -= 24;
++//    while(iq[jz]==0) { jz--; q0-=24;}
++//  } else { /* break z into 24-bit if necessary */
++//    z = scalbnA(z,-q0);
++//    if(z>=two24B) {
++//      fw = (double)((int)(twon24*z));
++//      iq[jz] = (int)(z-two24B*fw);
++//      jz += 1; q0 += 24;
++//      iq[jz] = (int) fw;
++//    } else iq[jz] = (int) z ;
++//  }
++//
++//  /* convert integer "bit" chunk to floating-point value */
++//  fw = scalbnA(one,q0);
++//  for(i=jz;i>=0;i--) {
++//    q[i] = fw*(double)iq[i]; fw*=twon24;
++//  }
++//
++//  /* compute PIo2[0,...,jp]*q[jz,...,0] */
++//  for(i=jz;i>=0;i--) {
++//    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++//    fq[jz-i] = fw;
++//  }
++//
++//  // NOTE: switch below is eliminated, because prec is always 2 for doubles
++//  /* compress fq[] into y[] */
++//  //switch(prec) {
++//  //case 0:
++//  //  fw = 0.0;
++//  //  for (i=jz;i>=0;i--) fw += fq[i];
++//  //  y[0] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 1:
++//  //case 2:
++//    fw = 0.0;
++//    for (i=jz;i>=0;i--) fw += fq[i];
++//    y[0] = (ih==0)? fw: -fw;
++//    fw = fq[0]-fw;
++//    for (i=1;i<=jz;i++) fw += fq[i];
++//    y[1] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 3:       /* painful */
++//  //  for (i=jz;i>0;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  // fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (i=jz;i>1;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  //    fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
++//  //  if(ih==0) {
++//  //    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
++//  //  } else {
++//  //    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
++//  //  }
++//  //}
++//  return n&7;
++//}
++//
++// END __kernel_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. One loop is unrolled and vectorized (see comments in code)
++//     2. One loop is split into 2 loops (see comments in code)
++//     3. Non-double code is removed(last switch). Sevaral variables became
++//         constants because of that (see comments in code)
++//     4. Use of jx, which is nx-1 instead of nx
++// Assumptions:
++//     1. Assume |X| >= PI/4
++// Input and output:
++//     1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) {
++  Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT,
++        RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ,
++        INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE,
++        Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION,
++        RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED,
++        CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE,
++        IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY,
++        RECOMP_FOR1_CHECK;
++  Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3,
++           jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8,
++                v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15;
++    // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4
++    // jx = nx - 1
++    li(twoOverPiBase, two_over_pi);
++    slti(SCR2, jv, 0);
++    addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6]
++    masknez(jv, jv, SCR2);
++    if (UseLASX)
++      xvxor_v(v26, v26, v26);
++    else
++      vxor_v(v26, v26, v26);
++    addi_w(tmp5, jv, 1);                    // jv+1
++    sub_w(j, jv, jx);
++    addi_d(qBase, SP, 320);                 // base of q[]
++    mul_w(SCR2, i, tmp5);                   // q0 =  e0-24*(jv+1)
++    sub_w(SCR1, SCR1, SCR2);
++    // use double f[20], fq[20], q[20], iq[20] on stack, which is
++    // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it
++    // will contain f[20], fq[20], q[20], iq[20]
++    // now initialize f[20] indexes 0..m (inclusive)
++    // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];
++    move(tmp5, SP);
++
++    block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); {
++        xorr(i, i, i);
++        bge(j, R0, INIT_F_COPY);
++      bind(INIT_F_ZERO);
++        if (UseLASX) {
++          xvst(v26, tmp5, 0);
++        } else {
++          vst(v26, tmp5, 0);
++          vst(v26, tmp5, 16);
++        }
++        addi_d(tmp5, tmp5, 32);
++        addi_w(i, i, 4);
++        addi_w(j, j, 4);
++        blt(j, R0, INIT_F_ZERO);
++        sub_w(i, i, j);
++        move(j, R0);
++      bind(INIT_F_COPY);
++        alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address
++        if (UseLASX) {
++          xvld(v18, tmp1, 0);
++          xvld(v19, tmp1, 32);
++        } else {
++          vld(v18, tmp1, 0);
++          vld(v19, tmp1, 16);
++          vld(v20, tmp1, 32);
++          vld(v21, tmp1, 48);
++        }
++        alsl_d(tmp5, i, SP, 3 - 1);
++        if (UseLASX) {
++          xvst(v18, tmp5, 0);
++          xvst(v19, tmp5, 32);
++        } else {
++          vst(v18, tmp5, 0);
++          vst(v19, tmp5, 16);
++          vst(v20, tmp5, 32);
++          vst(v21, tmp5, 48);
++        }
++    }
++    // v18..v21 can actually contain f[0..7]
++    beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded
++    if (UseLASX) {
++      xvld(v18, SP, 0);   // load f[0..7]
++      xvld(v19, SP, 32);
++    } else {
++      vld(v18, SP, 0);    // load f[0..7]
++      vld(v19, SP, 16);
++      vld(v20, SP, 32);
++      vld(v21, SP, 48);
++    }
++  bind(SKIP_F_LOAD);
++    // calculate 2^q0 and 2^-q0, which we'll need further.
++    // q0 is exponent. So, calculate biased exponent(q0+1023)
++    sub_w(tmp4, R0, SCR1);
++    addi_w(tmp5, SCR1, 1023);
++    addi_w(tmp4, tmp4, 1023);
++    // Unroll following for(s) depending on jx in [0,1,2]
++    // for (i=0;i<=jk;i++) {
++    //   for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++    // }
++    // Unrolling for jx == 0 case:
++    //   q[0] = x[0] * f[0]
++    //   q[1] = x[0] * f[1]
++    //   q[2] = x[0] * f[2]
++    //   q[3] = x[0] * f[3]
++    //   q[4] = x[0] * f[4]
++    //
++    // Vectorization for unrolled jx == 0 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[0]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[0]
++    //   q[4] = f[4] * x[0]
++    //
++    // Unrolling for jx == 1 case:
++    //   q[0] = x[0] * f[1] + x[1] * f[0]
++    //   q[1] = x[0] * f[2] + x[1] * f[1]
++    //   q[2] = x[0] * f[3] + x[1] * f[2]
++    //   q[3] = x[0] * f[4] + x[1] * f[3]
++    //   q[4] = x[0] * f[5] + x[1] * f[4]
++    //
++    // Vectorization for unrolled jx == 1 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[1]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[1]
++    //   q[4] = f[4] * x[1]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[0]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[0]
++    //   q[4] += f[5] * x[0]
++    //
++    // Unrolling for jx == 2 case:
++    //   q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0]
++    //   q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1]
++    //   q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2]
++    //   q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3]
++    //   q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4]
++    //
++    // Vectorization for unrolled jx == 2 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[2]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[2]
++    //   q[4] = f[4] * x[2]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[1]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[1]
++    //   q[4] += f[5] * x[1]
++    //   {q[0], q[1]} += {f[2], f[3]} * x[0]
++    //   {q[2], q[3]} += {f[4], f[5]} * x[0]
++    //   q[4] += f[6] * x[0]
++  block_comment("unrolled and vectorized computation of q[0]..q[jk]"); {
++      li(SCR2, 1);
++      slli_d(tmp5, tmp5, 52);                  // now it's 2^q0 double value
++      slli_d(tmp4, tmp4, 52);                  // now it's 2^-q0 double value
++      if (UseLASX)
++        xvpermi_d(v6, v6, 0);
++      else
++        vreplvei_d(v6, v6, 0);
++      blt(jx, SCR2, JX_IS_0);
++      addi_d(i, SP, 8);
++      if (UseLASX) {
++        xvld(v26, i, 0);                       // load f[1..4]
++        xvpermi_d(v3, v3, 0);
++        xvpermi_d(v7, v7, 0);
++        xvpermi_d(v20, v19, 85);
++        xvpermi_d(v21, v19, 170);
++      } else {
++        vld(v26, i, 0);                        // load f[1..4]
++        vld(v27, i, 16);
++        vreplvei_d(v3, v3, 0);
++        vreplvei_d(v7, v7, 0);
++        vreplvei_d(vt, v20, 1);
++        vreplvei_d(v21, v21, 0);
++      }
++      blt(SCR2, jx, JX_IS_2);
++      // jx == 1
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v7);                // f[0,3] * x[1]
++        fmul_d(v30, v19, v7);                  // f[4] * x[1]
++        xvfmadd_d(v28, v26, v6, v28);
++        fmadd_d(v30, v6, v20, v30);            // v30 += f[5] * x[0]
++      } else {
++        vfmul_d(v28, v18, v7);                 // f[0,1] * x[1]
++        vfmul_d(v29, v19, v7);                 // f[2,3] * x[1]
++        fmul_d(v30, v20, v7);                  // f[4] * x[1]
++        vfmadd_d(v28, v26, v6, v28);
++        vfmadd_d(v29, v27, v6, v29);
++        fmadd_d(v30, v6, vt, v30);             // v30 += f[5] * x[0]
++      }
++      b(Q_DONE);
++    bind(JX_IS_2);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v3);                // f[0,3] * x[2]
++        fmul_d(v30, v19, v3);                  // f[4] * x[2]
++        xvfmadd_d(v28, v26, v7, v28);
++        fmadd_d(v30, v7, v20, v30);            // v30 += f[5] * x[1]
++        xvpermi_q(v18, v19, 3);
++        xvfmadd_d(v28, v18, v6, v28);
++      } else {
++        vfmul_d(v28, v18, v3);                 // f[0,1] * x[2]
++        vfmul_d(v29, v19, v3);                 // f[2,3] * x[2]
++        fmul_d(v30, v20, v3);                  // f[4] * x[2]
++        vfmadd_d(v28, v26, v7, v28);
++        vfmadd_d(v29, v27, v7, v29);
++        fmadd_d(v30, v7, vt, v30);             // v30 += f[5] * x[1]
++        vfmadd_d(v28, v19, v6, v28);
++        vfmadd_d(v29, v20, v6, v29);
++      }
++      fmadd_d(v30, v6, v21, v30);              // v30 += f[6] * x[0]
++      b(Q_DONE);
++    bind(JX_IS_0);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        fmul_d(v30, v19, v6);                  // f[4] * x[0]
++      } else {
++        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
++        vfmul_d(v29, v19, v6);                 // f[2,3] * x[0]
++        fmul_d(v30, v20, v6);                  // f[4] * x[0]
++      }
++    bind(Q_DONE);
++      if (UseLASX) {
++        xvst(v28, qBase, 0);                   // save calculated q[0]...q[jk]
++      } else {
++        vst(v28, qBase, 0);                    // save calculated q[0]...q[jk]
++        vst(v29, qBase, 16);
++      }
++      fst_d(v30, qBase, 32);
++  }
++  li(i, 0x3E70000000000000);
++  li(jz, 4);
++  movgr2fr_d(v17, i);                          // v17 = twon24
++  movgr2fr_d(v30, tmp5);                       // 2^q0
++  vldi(v21, -960);                             // 0.125 (0x3fc0000000000000)
++  vldi(v20, -992);                             // 8.0   (0x4020000000000000)
++  movgr2fr_d(v22, tmp4);                       // 2^-q0
++
++  block_comment("recompute loop"); {
++    bind(RECOMPUTE);
++      //  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++      //    fw    =  (double)((int)(twon24* z));
++      //    iq[i] =  (int)(z-two24A*fw);
++      //    z     =  q[j-1]+fw;
++      //  }
++      block_comment("distill q[] into iq[] reversingly"); {
++          xorr(i, i, i);
++          move(j, jz);
++          alsl_d(tmp2, jz, qBase, 3 - 1);                  // q[jz] address
++          fld_d(v18, tmp2, 0);                             // z = q[j] and moving address to q[j-1]
++          addi_d(tmp2, tmp2, -8);
++        bind(RECOMP_FIRST_FOR);
++          fld_d(v27, tmp2, 0);
++          addi_d(tmp2, tmp2, -8);
++          fmul_d(v29, v17, v18);                           // twon24*z
++          vfrintrz_d(v29, v29);                            // (double)(int)
++          fnmsub_d(v28, v24, v29, v18);                    // v28 = z-two24A*fw
++          ftintrz_w_d(vt, v28);                            // (int)(z-two24A*fw)
++          alsl_d(SCR2, i, iqBase, 2 - 1);
++          fst_s(vt, SCR2, 0);
++          fadd_d(v18, v27, v29);
++          addi_w(i, i, 1);
++          addi_w(j, j, -1);
++          blt(R0, j, RECOMP_FIRST_FOR);
++      }
++      // compute n
++      fmul_d(v18, v18, v30);
++      fmul_d(v2, v18, v21);
++      vfrintrm_d(v2, v2);                                  // v2 = floor(v2) == rounding towards -inf
++      fnmsub_d(v18, v2, v20, v18);                         // z -= 8.0*floor(z*0.125);
++      li(ih, 2);
++      vfrintrz_d(v2, v18);                                 // v2 = (double)((int)z)
++      ftintrz_w_d(vt, v18);                                // n  = (int) z;
++      movfr2gr_s(n, vt);
++      fsub_d(v18, v18, v2);                                // z -= (double)n;
++
++      block_comment("q0-dependent initialization"); {
++          blt(SCR1, R0, Q0_ZERO_CMP_LT);                   // if (q0 > 0)
++          addi_w(j, jz, -1);                               // j = jz - 1
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          ld_w(tmp2, SCR2, 0);                             // tmp2 = iq[jz-1]
++          beq(SCR1, R0, Q0_ZERO_CMP_EQ);
++          li(tmp4, 24);
++          sub_w(tmp4, tmp4, SCR1);                         // == 24 - q0
++          srl_w(i, tmp2, tmp4);                            // i = iq[jz-1] >> (24-q0)
++          sll_w(tmp5, i, tmp4);
++          sub_w(tmp2, tmp2, tmp5);                         // iq[jz-1] -= i<<(24-q0);
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);                             // store iq[jz-1]
++          addi_w(SCR2, tmp4, -1);                          // == 23 - q0
++          add_w(n, n, i);                                  // n+=i
++          srl_w(ih, tmp2, SCR2);                           // ih = iq[jz-1] >> (23-q0)
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_EQ);
++          srli_d(ih, tmp2, 23);                            // ih = iq[z-1] >> 23
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_LT);
++          vldi(v4, -928);                                  // 0.5 (0x3fe0000000000000)
++          fcmp_clt_d(FCC0, v18, v4);
++          movcf2gr(SCR2, FCC0);
++          masknez(ih, ih, SCR2);                           // if (z<0.5) ih = 0
++      }
++    bind(Q0_ZERO_CMP_DONE);
++      bge(R0, ih, IH_HANDLED);
++
++    block_comment("if(ih>) {"); {
++      // use rscratch2 as carry
++
++      block_comment("for(i=0;i<jz ;i++) {...}"); {
++          addi_w(n, n, 1);
++          xorr(i, i, i);
++          xorr(SCR2, SCR2, SCR2);
++        bind(IH_FOR);
++          alsl_d(j, i, iqBase, 2 - 1);
++          ld_w(j, j, 0);                                   // j = iq[i]
++          li(tmp3, 0x1000000);
++          sub_w(tmp3, tmp3, SCR2);
++          bnez(SCR2, IH_FOR_STORE);
++          beqz(j, IH_FOR_INCREMENT);
++          li(SCR2, 1);
++        bind(IH_FOR_STORE);
++          sub_w(tmp3, tmp3, j);
++          alsl_d(tmp1, i, iqBase, 2 - 1);
++          st_w(tmp3, tmp1, 0);                             // iq[i] = 0xffffff - j
++        bind(IH_FOR_INCREMENT);
++          addi_w(i, i, 1);
++          blt(i, jz, IH_FOR);
++      }
++
++      block_comment("if(q0>0) {"); {
++        bge(R0, SCR1, IH_AFTER_SWITCH);
++        // tmp3 still has iq[jz-1] value. no need to reload
++        // now, zero high tmp3 bits (rscratch1 number of bits)
++        li(j, 0xffffffff);
++        addi_w(i, jz, -1);                                 // set i to jz-1
++        srl_d(j, j, SCR1);
++        srli_w(tmp1, j, 8);
++        andr(tmp3, tmp3, tmp1);                            // we have 24-bit-based constants
++        alsl_d(tmp1, i, iqBase, 2 - 1);
++        st_w(tmp3, tmp1, 0);                               // save iq[jz-1]
++      }
++      bind(IH_AFTER_SWITCH);
++        li(tmp1, 2);
++        bne(ih, tmp1, IH_HANDLED);
++
++        block_comment("if(ih==2) {"); {
++          vldi(v25, -912);                                 // 1.0 (0x3ff0000000000000)
++          fsub_d(v18, v25, v18);                           // z = one - z;
++          beqz(SCR2, IH_HANDLED);
++          fsub_d(v18, v18, v30);                           // z -= scalbnA(one,q0);
++        }
++    }
++    bind(IH_HANDLED);
++      // check if recomputation is needed
++      vxor_v(vt, vt, vt);
++      fcmp_cne_d(FCC0, v18, vt);
++      bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO);
++
++      block_comment("if(z==zeroB) {"); {
++
++        block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); {
++            addi_w(i, jz, -1);
++            xorr(j, j, j);
++            b(RECOMP_FOR1_CHECK);
++          bind(RECOMP_FOR1);
++            alsl_d(tmp1, i, iqBase, 2 - 1);
++            ld_w(tmp1, tmp1, 0);
++            orr(j, j, tmp1);
++            addi_w(i, i, -1);
++          bind(RECOMP_FOR1_CHECK);
++            li(SCR2, 4);
++            bge(i, SCR2, RECOMP_FOR1);
++        }
++        bnez(j, RECOMP_CHECK_DONE);
++
++        block_comment("if(j==0) {"); {
++            // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read
++            // iq[3], iq[2], iq[1], iq[0] until non-zero value
++            ld_d(tmp1, iqBase, 0);                 // iq[0..3]
++            ld_d(tmp3, iqBase, 8);
++            li(j, 2);
++            masknez(tmp1, tmp1, tmp3);             // set register for further consideration
++            orr(tmp1, tmp1, tmp3);
++            masknez(j, j, tmp3);                   // set initial k. Use j as k
++            srli_d(SCR2, tmp1, 32);
++            sltu(SCR2, R0, SCR2);
++            addi_w(i, jz, 1);
++            add_w(j, j, SCR2);
++
++          block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); {
++              add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz)
++            bind(RECOMP_FOR2);
++              add_w(tmp1, jv, i);
++              alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1);
++              fld_d(v29, SCR2, 0);
++              add_w(tmp2, jx, i);
++              alsl_d(SCR2, tmp2, SP, 3 - 1);
++              fst_d(v29, SCR2, 0);
++              // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++              // since jx = 0, 1 or 2 we can unroll it:
++              // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++              // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29
++              alsl_d(tmp2, tmp2, SP, 3 - 1);     // address of f[jx+i]
++              fld_d(v4, tmp2, -16);              // load f[jx+i-2] and f[jx+i-1]
++              fld_d(v5, tmp2, -8);
++              fmul_d(v26, v6, v29); // initial fw
++              beqz(jx, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v7, v5, v26);
++              li(SCR2, 1);
++              beq(jx, SCR2, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v3, v4, v26);
++            bind(RECOMP_FW_UPDATED);
++              alsl_d(SCR2, i, qBase, 3 - 1);
++              fst_d(v26, SCR2, 0);               // q[i] = fw;
++              addi_w(i, i, 1);
++              bge(jz, i, RECOMP_FOR2);           // jz here is "old jz" + k
++          }
++            b(RECOMPUTE);
++        }
++      }
++    }
++    bind(RECOMP_CHECK_DONE);
++      // chop off zero terms
++      vxor_v(vt, vt, vt);
++      fcmp_ceq_d(FCC0, v18, vt);
++      bcnez(FCC0, Z_IS_ZERO);
++
++      block_comment("else block of if(z==0.0) {"); {
++        bind(RECOMP_CHECK_DONE_NOT_ZERO);
++          fmul_d(v18, v18, v22);
++          fcmp_clt_d(FCC0, v18, v24);                        // v24 is stil two24A
++          bcnez(FCC0, Z_IS_LESS_THAN_TWO24B);
++          fmul_d(v1, v18, v17);                              // twon24*z
++          vfrintrz_d(v1, v1);                                // v1 = (double)(int)(v1)
++          fnmsub_d(v2, v24, v1, v18);
++          ftintrz_w_d(vt, v1);                               // (int)fw
++          movfr2gr_s(tmp3, vt);
++          ftintrz_w_d(vt, v2);                               // double to int
++          movfr2gr_s(tmp2, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);
++          addi_w(SCR1, SCR1, 24);
++          addi_w(jz, jz, 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
++          b(Z_ZERO_CHECK_DONE);
++        bind(Z_IS_LESS_THAN_TWO24B);
++          ftintrz_w_d(vt, v18);                              // (int)z
++          movfr2gr_s(tmp3, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) z
++          b(Z_ZERO_CHECK_DONE);
++      }
++
++      block_comment("if(z==0.0) {"); {
++        bind(Z_IS_ZERO);
++          addi_w(jz, jz, -1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          ld_w(tmp1, SCR2, 0);
++          addi_w(SCR1, SCR1, -24);
++          beqz(tmp1, Z_IS_ZERO);
++      }
++      bind(Z_ZERO_CHECK_DONE);
++        // convert integer "bit" chunk to floating-point value
++        // v17 = twon24
++        // update v30, which was scalbnA(1.0, <old q0>);
++        addi_w(tmp2, SCR1, 1023); // biased exponent
++        slli_d(tmp2, tmp2, 52);   // put at correct position
++        move(i, jz);
++        movgr2fr_d(v30, tmp2);
++
++        block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); {
++          bind(CONVERTION_FOR);
++            alsl_d(SCR2, i, iqBase, 2 - 1);
++            fld_s(v31, SCR2, 0);
++            vffintl_d_w(v31, v31);
++            fmul_d(v31, v31, v30);
++            alsl_d(SCR2, i, qBase, 3 - 1);
++            fst_d(v31, SCR2, 0);
++            fmul_d(v30, v30, v17);
++            addi_w(i, i, -1);
++            bge(i, R0, CONVERTION_FOR);
++        }
++        addi_d(SCR2, SP, 160);   // base for fq
++        // reusing twoOverPiBase
++        li(twoOverPiBase, pio2);
++
++      block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); {
++          move(i, jz);
++          move(tmp2, R0); // tmp2 will keep jz - i == 0 at start
++        bind(COMP_FOR);
++          // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++          vxor_v(v30, v30, v30);
++          alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0
++          li(tmp3, 4);
++          slti(tmp4, tmp2, 5);
++          alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address
++          masknez(tmp3, tmp3, tmp4);     // min(jz - i, jp);
++          maskeqz(tmp4, tmp2, tmp4);
++          orr(tmp3, tmp3, tmp4);
++          move(tmp4, R0);                // used as k
++
++          block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); {
++            bind(COMP_INNER_LOOP);
++              alsl_d(tmp5, tmp4, tmp1, 3 - 1);
++              fld_d(v18, tmp5, 0);                                      // q[i+k]
++              alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1);
++              fld_d(v19, tmp5, 0);                                      // PIo2[k]
++              fmadd_d(v30, v18, v19, v30);                              // fw += PIo2[k]*q[i+k];
++              addi_w(tmp4, tmp4, 1);                                    // k++
++              bge(tmp3, tmp4, COMP_INNER_LOOP);
++          }
++          alsl_d(tmp5, tmp2, SCR2, 3 - 1);
++          fst_d(v30, tmp5, 0);                                          // fq[jz-i]
++          addi_d(tmp2, tmp2, 1);
++          addi_w(i, i, -1);
++          bge(i, R0, COMP_FOR);
++      }
++
++      block_comment("switch(prec) {...}. case 2:"); {
++        // compress fq into y[]
++        // remember prec == 2
++
++        block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); {
++            vxor_v(v4, v4, v4);
++            move(i, jz);
++          bind(FW_FOR1);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, -1);
++            fadd_d(v4, v4, v1);
++            bge(i, R0, FW_FOR1);
++        }
++        bind(FW_FOR1_DONE);
++          // v1 contains fq[0]. so, keep it so far
++          fsub_d(v5, v1, v4); // fw = fq[0] - fw
++          beqz(ih, FW_Y0_NO_NEGATION);
++          fneg_d(v4, v4);
++        bind(FW_Y0_NO_NEGATION);
++
++        block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); {
++            li(i, 1);
++            blt(jz, i, FW_FOR2_DONE);
++          bind(FW_FOR2);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, 1);
++            fadd_d(v5, v5, v1);
++            bge(jz, i, FW_FOR2);
++        }
++        bind(FW_FOR2_DONE);
++          beqz(ih, FW_Y1_NO_NEGATION);
++          fneg_d(v5, v5);
++        bind(FW_Y1_NO_NEGATION);
++          addi_d(SP, SP, 560);
++      }
++}
++
++///* __kernel_sin( x, y, iy)
++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
++// *
++// * Algorithm
++// *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
++// *      3. sin(x) is approximated by a polynomial of degree 13 on
++// *         [0,pi/4]
++// *                               3            13
++// *              sin(x) ~ x + S1*x + ... + S6*x
++// *         where
++// *
++// *      |sin(x)         2     4     6     8     10     12  |     -58
++// *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++// *      |  x                                               |
++// *
++// *      4. sin(x+y) = sin(x) + sin'(x')*y
++// *                  ~ sin(x) + (1-x*x/2)*y
++// *         For better accuracy, let
++// *                   3      2      2      2      2
++// *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++// *         then                   3    2
++// *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++// */
++//static const double
++//S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++//S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++//S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++//S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++//S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++//S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++//
++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef
++//
++// BEGIN __kernel_sin PSEUDO CODE
++//
++//static double __kernel_sin(double x, double y, bool iy)
++//{
++//        double z,r,v;
++//
++//        // NOTE: not needed. moved to dsin/dcos
++//        //int ix;
++//        //ix = high(x)&0x7fffffff;                /* high word of x */
++//
++//        // NOTE: moved to dsin/dcos
++//        //if(ix<0x3e400000)                       /* |x| < 2**-27 */
++//        //   {if((int)x==0) return x;}            /* generate inexact */
++//
++//        z       =  x*x;
++//        v       =  z*x;
++//        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
++//        if(iy==0) return x+v*(S1+z*r);
++//        else      return x-((z*(half*y-v*r)-y)-v*S1);
++//}
++//
++// END __kernel_sin PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dsin_coef
++//     3. C code parameter "int iy" was modified to "bool iyIsOne", because
++//         iy is always 0 or 1. Also, iyIsOne branch was moved into
++//         generation phase instead of taking it during code execution
++// Input ans output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, iyIsOne
++//         = flag to use low argument low part or not, dsin_coef = coefficients
++//         table address
++//     3. Return sin(x) value in FA0
++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) {
++  FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2,
++                s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7;
++  li(SCR2, dsin_coef);
++  fld_d(s5, SCR2, 32);
++  fld_d(s6, SCR2, 40);
++  fmul_d(z, x, x); // z =  x*x;
++  fld_d(s1, SCR2, 0);
++  fld_d(s2, SCR2, 8);
++  fld_d(s3, SCR2, 16);
++  fld_d(s4, SCR2, 24);
++  fmul_d(v, z, x); // v =  z*x;
++
++  block_comment("calculate r =  S2+z*(S3+z*(S4+z*(S5+z*S6)))"); {
++    fmadd_d(r, z, s6, s5);
++    // initialize "half" in current block to utilize 2nd FPU. However, it's
++    // not a part of this block
++    vldi(half, -928);       // 0.5 (0x3fe0000000000000)
++    fmadd_d(r, z, r, s4);
++    fmadd_d(r, z, r, s3);
++    fmadd_d(r, z, r, s2);
++  }
++
++  if (!iyIsOne) {
++    // return x+v*(S1+z*r);
++    fmadd_d(s1, z, r, s1);
++    fmadd_d(FA0, v, s1, x);
++  } else {
++    // return x-((z*(half*y-v*r)-y)-v*S1);
++    fmul_d(s6, half, y);    // half*y
++    fnmsub_d(s6, v, r, s6); // half*y-v*r
++    fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y)
++    fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1)
++    fadd_d(FA0, x, s6);
++  }
++}
++
++///*
++// * __kernel_cos( x,  y )
++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// *
++// * Algorithm
++// *      1. Since cos(-x) = cos(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
++// *      3. cos(x) is approximated by a polynomial of degree 14 on
++// *         [0,pi/4]
++// *                                       4            14
++// *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
++// *         where the remez error is
++// *
++// *      |              2     4     6     8     10    12     14 |     -58
++// *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
++// *      |                                                      |
++// *
++// *                     4     6     8     10    12     14
++// *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
++// *             cos(x) = 1 - x*x/2 + r
++// *         since cos(x+y) ~ cos(x) - sin(x)*y
++// *                        ~ cos(x) - x*y,
++// *         a correction term is necessary in cos(x) and hence
++// *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
++// *         For better accuracy when x > 0.3, let qx = |x|/4 with
++// *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
++// *         Then
++// *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
++// *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
++// *         magnitude of the latter is at least a quarter of x*x/2,
++// *         thus, reducing the rounding error in the subtraction.
++// */
++//
++//static const double
++//C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
++//C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
++//C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
++//C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
++//C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
++//C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
++//
++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef
++//
++// BEGIN __kernel_cos PSEUDO CODE
++//
++//static double __kernel_cos(double x, double y)
++//{
++//  double a,h,z,r,qx=0;
++//
++//  // NOTE: ix is already initialized in dsin/dcos. Reuse value from register
++//  //int ix;
++//  //ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
++//
++//  // NOTE: moved to dsin/dcos
++//  //if(ix<0x3e400000) {                   /* if x < 2**27 */
++//  //  if(((int)x)==0) return one;         /* generate inexact */
++//  //}
++//
++//  z  = x*x;
++//  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
++//  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
++//    return one - (0.5*z - (z*r - x*y));
++//  else {
++//    if(ix > 0x3fe90000) {               /* x > 0.78125 */
++//      qx = 0.28125;
++//    } else {
++//      set_high(&qx, ix-0x00200000); /* x/4 */
++//      set_low(&qx, 0);
++//    }
++//    h = 0.5*z-qx;
++//    a = one-qx;
++//    return a - (h - (z*r-x*y));
++//  }
++//}
++//
++// END __kernel_cos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dcos_coef
++// Input and output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, dcos_coef
++//        = coefficients table address
++//     3. Return cos(x) value in FA0
++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) {
++  Register ix = A3;
++  FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0,
++        C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7;
++  Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET;
++    li(SCR2, dcos_coef);
++    fld_d(C1, SCR2, 0);
++    fld_d(C2, SCR2, 8);
++    fld_d(C3, SCR2, 16);
++    fld_d(C4, SCR2, 24);
++    fld_d(C5, SCR2, 32);
++    fld_d(C6, SCR2, 40);
++    fmul_d(z, x, x);                               // z=x^2
++    block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); {
++      fmadd_d(r, z, C6, C5);
++      vldi(half, -928);                            // 0.5 (0x3fe0000000000000)
++      fmadd_d(r, z, r, C4);
++      fmul_d(y, x, y);
++      fmadd_d(r, z, r, C3);
++      li(SCR1, 0x3FD33333);
++      fmadd_d(r, z, r, C2);
++      fmul_d(x, z, z);                             // x = z^2
++      fmadd_d(r, z, r, C1);                        // r = C1+z(C2+z(C4+z(C5+z*C6)))
++    }
++    // need to multiply r by z to have "final" r value
++    vldi(one, -912);                               // 1.0 (0x3ff0000000000000)
++    bge(ix, SCR1, IX_IS_LARGE);
++    block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); {
++      // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r))
++      fnmsub_d(FA0, x, r, y);
++      fmadd_d(FA0, half, z, FA0);
++      fsub_d(FA0, one, FA0);
++      b(DONE);
++    }
++  block_comment("if(ix >= 0x3FD33333)"); {
++    bind(IX_IS_LARGE);
++      li(SCR2, 0x3FE90000);
++      blt(SCR2, ix, SET_QX_CONST);
++      block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); {
++        li(SCR2, 0x00200000);
++        sub_w(SCR2, ix, SCR2);
++        slli_d(SCR2, SCR2, 32);
++        movgr2fr_d(qx, SCR2);
++      }
++      b(QX_SET);
++    bind(SET_QX_CONST);
++      block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); {
++        vldi(qx, -942);         // 0.28125 (0x3fd2000000000000)
++      }
++    bind(QX_SET);
++      fmsub_d(C6, x, r, y);     // z*r - xy
++      fmsub_d(h, half, z, qx);  // h = 0.5*z - qx
++      fsub_d(a, one, qx);       // a = 1-qx
++      fsub_d(C6, h, C6);        // = h - (z*r - x*y)
++      fsub_d(FA0, a, C6);
++  }
++  bind(DONE);
++}
++
++// generate_dsin_dcos creates stub for dsin and dcos
++// Generation is done via single call because dsin and dcos code is almost the
++// same(see C code below). These functions work as follows:
++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27
++// 2) perform argument reduction if required
++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial
++//
++// BEGIN dsin/dcos PSEUDO CODE
++//
++//dsin_dcos(jdouble x, bool isCos) {
++//  double y[2],z=0.0;
++//  int n, ix;
++//
++//  /* High word of x. */
++//  ix = high(x);
++//
++//  /* |x| ~< pi/4 */
++//  ix &= 0x7fffffff;
++//  if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0);
++//
++//  /* sin/cos(Inf or NaN) is NaN */
++//  else if (ix>=0x7ff00000) return x-x;
++//  else if (ix<0x3e400000) {                   /* if ix < 2**27 */
++//    if(((int)x)==0) return isCos ? one : x;         /* generate inexact */
++//  }
++//  /* argument reduction needed */
++//  else {
++//    n = __ieee754_rem_pio2(x,y);
++//    switch(n&3) {
++//    case 0: return isCos ?  __kernel_cos(y[0],y[1])      :  __kernel_sin(y[0],y[1], true);
++//    case 1: return isCos ? -__kernel_sin(y[0],y[1],true) :  __kernel_cos(y[0],y[1]);
++//    case 2: return isCos ? -__kernel_cos(y[0],y[1])      : -__kernel_sin(y[0],y[1], true);
++//    default:
++//      return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]);
++//    }
++//  }
++//}
++// END dsin/dcos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos
++//     2. Final switch use equivalent bit checks(tbz/tbnz)
++// Input ans output:
++//     1. Input for generated function: X = A0
++//     2. Input for generator: isCos = generate sin or cos, npio2_hw = address
++//         of npio2_hw table, two_over_pi = address of two_over_pi table,
++//         pio2 = address if pio2 table, dsin_coef = address if dsin_coef table,
++//         dcos_coef = address of dcos_coef table
++//     3. Return result in FA0
++// NOTE: general purpose register names match local variable names in C code
++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
++                                        address two_over_pi, address pio2,
++                                        address dsin_coef, address dcos_coef) {
++  Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE;
++  Register X = A0, absX = A1, n = A2, ix = A3;
++  FloatRegister y0 = FA4, y1 = FA5;
++
++    block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); {
++      movfr2gr_d(X, FA0);
++      li(SCR2, 0x3e400000);
++      li(SCR1, 0x3fe921fb);                          // high word of pi/4.
++      bstrpick_d(absX, X, 62, 0);                    // absX
++      li(T0, 0x7ff0000000000000);
++      srli_d(ix, absX, 32);                          // set ix
++      blt(ix, SCR2, TINY_X);                         // handle tiny x (|x| < 2^-27)
++      bge(SCR1, ix, EARLY_CASE);                     // if(ix <= 0x3fe921fb) return
++      blt(absX, T0, ARG_REDUCTION);
++      // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0).
++      // Set last bit unconditionally to make it NaN
++      ori(T0, T0, 1);
++      movgr2fr_d(FA0, T0);
++      jr(RA);
++    }
++  block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); {
++    bind(TINY_X);
++      if (isCos) {
++        vldi(FA0, -912);                             // 1.0 (0x3ff0000000000000)
++      }
++      jr(RA);
++  }
++  bind(ARG_REDUCTION); /* argument reduction needed */
++    block_comment("n = __ieee754_rem_pio2(x,y);"); {
++      generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2);
++    }
++    block_comment("switch(n&3) {case ... }"); {
++      if (isCos) {
++        srli_w(T0, n, 1);
++        xorr(absX, n, T0);
++        andi(T0, n, 1);
++        bnez(T0, RETURN_SIN);
++      } else {
++        andi(T0, n, 1);
++        beqz(T0, RETURN_SIN);
++      }
++      generate_kernel_cos(y0, dcos_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    bind(RETURN_SIN);
++      generate_kernel_sin(y0, true, dsin_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    }
++  bind(EARLY_CASE);
++    vxor_v(y1, y1, y1);
++    if (isCos) {
++      generate_kernel_cos(FA0, dcos_coef);
++    } else {
++      generate_kernel_sin(FA0, false, dsin_coef);
++    }
++  bind(DONE);
++    jr(RA);
++}
+diff --git a/src/hotspot/cpu/loongarch/matcher_loongarch.hpp b/src/hotspot/cpu/loongarch/matcher_loongarch.hpp
+new file mode 100644
+index 00000000000..21a691b1faa
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/matcher_loongarch.hpp
+@@ -0,0 +1,145 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MATCHER_LOONGARCH_HPP
++#define CPU_LOONGARCH_MATCHER_LOONGARCH_HPP
++
++  // Defined within class Matcher
++
++  // false => size gets scaled to BytesPerLong, ok.
++  static const bool init_array_count_is_in_bytes = false;
++
++  // Whether this platform implements the scalable vector feature
++  static const bool implements_scalable_vector = false;
++
++  static const bool supports_scalable_vector() {
++    return false;
++  }
++
++  // LoongArch doesn't support misaligned vectors store/load? FIXME
++  static constexpr bool misaligned_vectors_ok() {
++    return false;
++  }
++
++  // Whether code generation need accurate ConvI2L types.
++  static const bool convi2l_type_required = true;
++
++  // Does the CPU require late expand (see block.cpp for description of late expand)?
++  static const bool require_postalloc_expand = false;
++
++  // Do we need to mask the count passed to shift instructions or does
++  // the cpu only look at the lower 5/6 bits anyway?
++  static const bool need_masked_shift_count = false;
++
++  // No support for generic vector operands.
++  static const bool supports_generic_vector_operands = false;
++
++  static constexpr bool isSimpleConstant64(jlong value) {
++    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++    // Probably always true, even if a temp register is required.
++    return true;
++  }
++
++  // No additional cost for CMOVL.
++  static constexpr int long_cmove_cost() { return 0; }
++
++  // No CMOVF/CMOVD with SSE2
++  static int float_cmove_cost() { return ConditionalMoveLimit; }
++
++  static bool narrow_oop_use_complex_address() {
++    assert(UseCompressedOops, "only for compressed oops code");
++    return false;
++  }
++
++  static bool narrow_klass_use_complex_address() {
++    assert(UseCompressedClassPointers, "only for compressed klass code");
++    return false;
++  }
++
++  static bool const_oop_prefer_decode() {
++    // Prefer ConN+DecodeN over ConP.
++    return true;
++  }
++
++  static bool const_klass_prefer_decode() {
++    // TODO: Either support matching DecodeNKlass (heap-based) in operand
++    //       or condisider the following:
++    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++    //return CompressedKlassPointers::base() == NULL;
++    return true;
++  }
++
++  // Is it better to copy float constants, or load them directly from memory?
++  // Intel can load a float constant from a direct address, requiring no
++  // extra registers.  Most RISCs will have to materialize an address into a
++  // register first, so they would do better to copy the constant from stack.
++  static const bool rematerialize_float_constants = false;
++
++  // If CPU can load and store mis-aligned doubles directly then no fixup is
++  // needed.  Else we split the double into 2 integer pieces and move it
++  // piece-by-piece.  Only happens when passing doubles into C code as the
++  // Java calling convention forces doubles to be aligned.
++  static const bool misaligned_doubles_ok = false;
++
++  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
++  static const bool strict_fp_requires_explicit_rounding = false;
++
++  // Are floats converted to double when stored to stack during
++  // deoptimization?
++  static constexpr bool float_in_double() { return false; }
++
++  // Do ints take an entire long register or just half?
++  static const bool int_in_long = true;
++
++  // Does the CPU supports vector variable shift instructions?
++  static constexpr bool supports_vector_variable_shifts(void) {
++    return true;
++  }
++
++  // Does the CPU supports vector variable rotate instructions?
++  static constexpr bool supports_vector_variable_rotates(void) {
++    return true;
++  }
++
++  // Does the CPU supports vector unsigned comparison instructions?
++  static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
++    return true;
++  }
++
++  // Some microarchitectures have mask registers used on vectors
++  static const bool has_predicated_vectors(void) {
++    return false;
++  }
++
++  // true means we have fast l2f convers
++  // false means that conversion is done by runtime call
++  static constexpr bool convL2FSupported(void) {
++      return true;
++  }
++
++  // Implements a variant of EncodeISOArrayNode that encode ASCII only
++  static const bool supports_encode_ascii_array = true;
++
++#endif // CPU_LOONGARCH_MATCHER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
+new file mode 100644
+index 00000000000..31b3040c3a5
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
+@@ -0,0 +1,585 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "classfile/vmClasses.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T8 RT8
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj_reg, vmClassID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset())));
++  __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ li(AT, ref_kind);
++  __ beq(temp, AT, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T4);
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld_d(T4, method, in_bytes(entry_offset));
++  __ jr(T4);
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld_d(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(AT, recv_addr);
++    __ beq(recv, AT, L);
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // No need in interpreter entry for linkToNative for now.
++  // Interpreter calls compiled entry through i2c.
++  if (iid == vmIntrinsics::_linkToNative) {
++    __ stop("Should not reach here");           // empty stubs make SG sick
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T4: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t4_argp   = T4;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm(vmIntrinsics::as_int(iid), 12), "Oops, iid is not simm12! Change the instructions.");
++    __ addi_d(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t4_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld_d(t4_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t4_argp,
++                        Address(t4_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t4_first_arg_addr = __ argument_address(t4_argp, -1);
++  } else {
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld_d(s7_mh, t4_first_arg_addr);
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  // t4_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld_d(r_recv = T2, t4_first_arg_addr);
++    }
++    DEBUG_ONLY(t4_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T4;
++  Register temp3 = T5;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    if (iid == vmIntrinsics::_linkToNative) {
++      assert(for_compiler_entry, "only compiler entry is supported");
++    }
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget, temp3);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget, temp3);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ blt(R0, temp2_index, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz, temp2);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ bge(rm_index, R0, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      // may not be needed by safer and unexpensive here
++      PreserveExceptionMark pem(Thread::current());
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp
+new file mode 100644
+index 00000000000..a97520ea768
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, vmClassID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return R3;
++  }
+diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
+new file mode 100644
+index 00000000000..25ef0ecd224
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
+@@ -0,0 +1,529 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#include <sys/mman.h>
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++bool NativeInstruction::is_int_branch() {
++  int op = Assembler::high(insn_word(), 6);
++  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
++         op == Assembler::beq_op  || op == Assembler::bne_op  ||
++         op == Assembler::blt_op  || op == Assembler::bge_op  ||
++         op == Assembler::bltu_op || op == Assembler::bgeu_op;
++}
++
++bool NativeInstruction::is_float_branch() {
++  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
++}
++
++bool NativeInstruction::is_lu12iw_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeInstruction::is_pcaddu12i_add() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op &&
++         Assembler::high(int_at(4), 10)   == Assembler::addi_d_op;
++}
++
++bool NativeCall::is_bl() const {
++  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
++}
++
++void NativeCall::verify() {
++  assert(is_bl(), "not a NativeCall");
++}
++
++address NativeCall::target_addr_for_bl(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  // bl
++  if (is_bl()) {
++    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                              ((int_at(0) >> 10) & 0xffff)) << 2);
++  }
++
++  fatal("not a NativeCall");
++  return NULL;
++}
++
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_bl();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(destination);
++    if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
++         CompiledICLocker::is_safe(addr_at(0)),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++
++  // Patch the call.
++  if (!reachable) {
++    address trampoline_stub_addr = get_trampoline();
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub");
++
++    // Patch the constant in the call's trampoline stub.
++    NativeInstruction* ni = nativeInstruction_at(dest);
++    assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++    dest = trampoline_stub_addr;
++  }
++  set_destination(dest);
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  address bl_destination
++    = nativeCall_at(call_addr)->target_addr_for_bl();
++  NativeInstruction* ni = nativeInstruction_at(bl_destination);
++  if (code->contains(bl_destination) &&
++      ni->is_NativeCallTrampolineStub_at())
++    return bl_destination;
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++
++  return NULL;
++}
++
++void NativeCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_call_at(addr_call), "unexpected call type");
++  jlong offs = dest - addr_call;
++  masm.bl(offs >> 2);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++// Generate a trampoline for a branch to dest.  If there's no need for a
++// trampoline, simply patch the call directly to dest.
++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) {
++  MacroAssembler a(&cbuf);
++  address stub = NULL;
++
++  if (a.far_branches()
++      && ! is_NativeCallTrampolineStub_at()) {
++    stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest);
++  }
++
++  if (stub == NULL) {
++    // If we generated no stub, patch this call directly to dest.
++    // This will happen if we don't need far branches or if there
++    // already was a trampoline.
++    set_destination(dest);
++  }
++
++  return stub;
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++bool NativeFarCall::is_short() const {
++  return Assembler::high(int_at(0), 10) == Assembler::andi_op &&
++         Assembler::low(int_at(0), 22) == 0 &&
++         Assembler::high(int_at(4), 6) == Assembler::bl_op;
++}
++
++bool NativeFarCall::is_far() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == RA->encoding();
++}
++
++address NativeFarCall::destination(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  if (is_short()) {
++  // short
++    return addr + BytesPerInstWord +
++           (Assembler::simm26(((int_at(4) & 0x3ff) << 16) |
++                              ((int_at(4) >> 10) & 0xffff)) << 2);
++  }
++
++  if (is_far()) {
++  // far
++    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++  }
++
++  fatal("not a NativeFarCall");
++  return NULL;
++}
++
++void NativeFarCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_far_call_at(addr_call), "unexpected call type");
++  masm.patchable_call(dest, addr_call);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++void NativeFarCall::verify() {
++  assert(is_short() || is_far(), "not a NativeFarcall");
++}
++
++//-------------------------------------------------------------------
++
++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_2nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_ori_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_ori_2nop() const {
++  return Assembler::high(int_at(0), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_addid_2nop() const {
++  return Assembler::high(int_at(0), 10)  == Assembler::addi_d_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op   &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++void NativeMovConstReg::verify() {
++  assert(is_li52(), "not a mov reg, imm52");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  if (is_lu12iw_ori_lu32id()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(8)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_lu32id_nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(4)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_2nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_ori_nop()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_ori_2nop()) {
++    return (int_at(0) >> 10) & 0xfff;
++  }
++
++  if (is_addid_2nop()) {
++    return Assembler::simm12((int_at(0) >> 10) & 0xfff);
++  }
++
++#ifndef PRODUCT
++  Disassembler::decode(addr_at(0), addr_at(0) + 16, tty);
++#endif
++  fatal("not a mov reg, imm52");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_li52(as_Register(int_at(0) & 0x1f), x);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return 0; // mute compiler
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++void NativeMovRegMem::verify() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++
++void NativeMovRegMem::print() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++bool NativeInstruction::is_stop() {
++  return uint_at(0) == 0x04000000; // csrrd R0 0
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(is_short() || is_far(), "not a general jump instruction");
++}
++
++bool NativeJump::is_short() {
++  return Assembler::high(insn_word(), 6) == Assembler::b_op;
++}
++
++bool NativeJump::is_far() {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == R0->encoding();
++}
++
++address NativeJump::jump_destination(address orig_addr) {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++  address ret = (address)-1;
++
++  // short
++  if (is_short()) {
++    ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                                    ((int_at(0) >> 10) & 0xffff)) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  // far
++  if (is_far()) {
++    ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  fatal("not a jump");
++  return NULL;
++}
++
++void NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_jump(dest);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  jlong offs = dest - verified_entry;
++
++  if (MacroAssembler::reachable_from_branch_short(offs)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.b(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     st_w    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lu12i_w  t2, 0x40
++  //  0x000000ffe5815134: st_w  v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                           ;*goto
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: ld_w  at, 0x0(t2)    ;*goto       <---  PC
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like this.
++  return Assembler::high(insn_word(), 10) == Assembler::ld_w_op &&
++         Assembler::low(insn_word(), 5)   == AT->encoding();
++}
+diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
+new file mode 100644
+index 00000000000..0ec8ebddf09
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
+@@ -0,0 +1,531 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum loongarch_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf,
++    sync_instruction_size       =    4
++  };
++
++  bool is_nop()                        { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; }
++  inline bool is_call();
++  inline bool is_far_call();
++  inline bool is_illegal();
++  bool is_jump();
++  bool is_safepoint_poll();
++
++  // Helper func for jvmci
++  bool is_lu12iw_lu32id() const;
++  bool is_pcaddu12i_add() const;
++
++  // LoongArch has no instruction to generate a illegal instrucion exception?
++  // But `break  11` is not illegal instruction for LoongArch.
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_NativeCallTrampolineStub_at();
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++  bool is_stop();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++class NativeCall;
++inline NativeCall* nativeCall_at(address address);
++
++// The NativeCall is an abstraction for accessing/manipulating native call
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++class NativeCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 1 * BytesPerInstWord,
++    return_address_offset = 1 * BytesPerInstWord,
++    displacement_offset   = 0
++  };
++
++  // We have only bl.
++  bool is_bl() const;
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const {
++    return addr_at(return_address_offset);
++  }
++
++  address return_address() const {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_bl(address orig_addr = 0) const;
++  address destination() const;
++  void set_destination(address dest);
++
++  void verify_alignment() {}
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset);
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate bl
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++  address trampoline_jump(CodeBuffer &cbuf, address dest);
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// The NativeFarCall is an abstraction for accessing/manipulating native
++// call-anywhere instructions.
++// Used to call native methods which may be loaded anywhere in the address
++// space, possibly out of reach of a call instruction.
++class NativeFarCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 2 * BytesPerInstWord
++  };
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  // We use MacroAssembler::patchable_call() for implementing a
++  // call-anywhere instruction.
++  bool is_short() const;
++  bool is_far() const;
++
++  // Checks whether instr points at a NativeFarCall instruction.
++  static bool is_far_call_at(address address) {
++    return nativeInstruction_at(address)->is_far_call();
++  }
++
++  // Returns the NativeFarCall's destination.
++  address destination(address orig_addr = 0) const;
++
++  // Sets the NativeFarCall's destination, not necessarily mt-safe.
++  // Used when relocating code.
++  void set_destination(address dest);
++
++  void verify();
++};
++
++// Instantiates a NativeFarCall object starting at the given instruction
++// address and returns the NativeFarCall object.
++inline NativeFarCall* nativeFarCall_at(address address) {
++  NativeFarCall* call = (NativeFarCall*)address;
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// An interface for accessing/manipulating native set_oop imm, reg instructions
++// (used to manipulate inlined data references, etc.).
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    =    0,
++    instruction_size          =    3 * BytesPerInstWord,
++    next_instruction_offset   =    3 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  bool is_li52() const {
++    return is_lu12iw_ori_lu32id() ||
++           is_lu12iw_lu32id_nop() ||
++           is_lu12iw_2nop() ||
++           is_lu12iw_ori_nop() ||
++           is_ori_2nop() ||
++           is_addid_2nop();
++  }
++  bool is_lu12iw_ori_lu32id() const;
++  bool is_lu12iw_lu32id_nop() const;
++  bool is_lu12iw_2nop() const;
++  bool is_lu12iw_ori_nop() const;
++  bool is_ori_2nop() const;
++  bool is_addid_2nop() const;
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size = 4,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson.
++//   short:
++//     b offs26
++//     nop
++//
++//   far:
++//     pcaddu18i reg, si20
++//     jirl  r0, reg, si18
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size   = 2 * BytesPerInstWord
++  };
++
++  bool is_short();
++  bool is_far();
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination(address orig_addr = 0);
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry){}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum loongarch_specific_constants {
++    instruction_code        = 0xbadc0de0, // TODO: LA
++                                          // Temporary LoongArch reserved instruction
++    instruction_size        = 4,
++    instruction_offset      = 0,
++    next_instruction_offset = 4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call() {
++  NativeCall *call = (NativeCall*)instruction_address();
++  return call->is_bl();
++}
++
++inline bool NativeInstruction::is_far_call() {
++  NativeFarCall *call = (NativeFarCall*)instruction_address();
++
++  // short
++  if (call->is_short()) {
++    return true;
++  }
++
++  // far
++  if (call->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++inline bool NativeInstruction::is_jump()
++{
++  NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address();
++
++  // short
++  if (jump->is_short()) {
++    return true;
++  }
++
++  // far
++  if (jump->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum la_specific_constants {
++    instruction_size            =    6 * 4,
++    instruction_offset          =    0,
++    data_offset                 =    4 * 4,
++    next_instruction_offset     =    6 * 4
++  };
++
++  address destination() const {
++    return (address)ptr_at(data_offset);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(data_offset, (intptr_t)new_destination);
++    OrderAccess::fence();
++  }
++};
++
++// Note: Other stubs must not begin with this pattern.
++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() {
++  // pcaddi
++  // ld_d
++  // jirl
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op &&
++         Assembler::high(int_at(4), 10) == Assembler::ld_d_op &&
++         Assembler::high(int_at(8), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(8), 5)  == R0->encoding();
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
++
++class NativeMembar : public NativeInstruction {
++public:
++  unsigned int get_hint() { return Assembler::low(insn_word(), 4); }
++  void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); }
++};
++
++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
+new file mode 100644
+index 00000000000..07aa5b22817
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++  address pd_location(VMReg reg) const {return NULL;}
++  address pd_location(VMReg base_reg, int slot_idx) const {
++    if (base_reg->is_FloatRegister()) {
++      assert(base_reg->is_concrete(), "must pass base reg");
++      intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
++      address base_location = location(base_reg);
++      if (base_location != NULL) {
++        return base_location + offset_in_bytes;
++      } else {
++        return NULL;
++      }
++    } else {
++      return location(base_reg->next(slot_idx));
++    }
++  }
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
+new file mode 100644
+index 00000000000..58f40b747c2
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_loongarch.hpp"
++#ifdef TARGET_ARCH_MODEL_loongarch_32
++# include "interp_masm_loongarch_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, r0);
++REGISTER_DEFINITION(Register, r1);
++REGISTER_DEFINITION(Register, r2);
++REGISTER_DEFINITION(Register, r3);
++REGISTER_DEFINITION(Register, r4);
++REGISTER_DEFINITION(Register, r5);
++REGISTER_DEFINITION(Register, r6);
++REGISTER_DEFINITION(Register, r7);
++REGISTER_DEFINITION(Register, r8);
++REGISTER_DEFINITION(Register, r9);
++REGISTER_DEFINITION(Register, r10);
++REGISTER_DEFINITION(Register, r11);
++REGISTER_DEFINITION(Register, r12);
++REGISTER_DEFINITION(Register, r13);
++REGISTER_DEFINITION(Register, r14);
++REGISTER_DEFINITION(Register, r15);
++REGISTER_DEFINITION(Register, r16);
++REGISTER_DEFINITION(Register, r17);
++REGISTER_DEFINITION(Register, r18);
++REGISTER_DEFINITION(Register, r19);
++REGISTER_DEFINITION(Register, r20);
++REGISTER_DEFINITION(Register, r21);
++REGISTER_DEFINITION(Register, r22);
++REGISTER_DEFINITION(Register, r23);
++REGISTER_DEFINITION(Register, r24);
++REGISTER_DEFINITION(Register, r25);
++REGISTER_DEFINITION(Register, r26);
++REGISTER_DEFINITION(Register, r27);
++REGISTER_DEFINITION(Register, r28);
++REGISTER_DEFINITION(Register, r29);
++REGISTER_DEFINITION(Register, r30);
++REGISTER_DEFINITION(Register, r31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp
+new file mode 100644
+index 00000000000..54d90167a52
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_loongarch.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                  FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0",
++    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
++const char* ConditionalFlagRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "fcc0",  "fcc1",   "fcc2",  "fcc3",   "fcc4",  "fcc5",   "fcc6",  "fcc7",
++  };
++  return is_valid() ? names[encoding()] : "fccnoreg";
++}
+diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp
+new file mode 100644
+index 00000000000..85669f435c7
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp
+@@ -0,0 +1,499 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "logging/log.hpp"
++#include "utilities/bitMap.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "utilities/ticks.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++// The integer registers of the LoongArch architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, r10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, r11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, r12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, r13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, r14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, r15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, r16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, r17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, r18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, r19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, r20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, r21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, r22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, r23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, r24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, r25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, r26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, r27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, r28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, r29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, r30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, r31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define R0  ((Register)(r0_RegisterEnumValue))
++#define R1  ((Register)(r1_RegisterEnumValue))
++#define R2  ((Register)(r2_RegisterEnumValue))
++#define R3  ((Register)(r3_RegisterEnumValue))
++#define R4  ((Register)(r4_RegisterEnumValue))
++#define R5  ((Register)(r5_RegisterEnumValue))
++#define R6  ((Register)(r6_RegisterEnumValue))
++#define R7  ((Register)(r7_RegisterEnumValue))
++#define R8  ((Register)(r8_RegisterEnumValue))
++#define R9  ((Register)(r9_RegisterEnumValue))
++#define R10 ((Register)(r10_RegisterEnumValue))
++#define R11 ((Register)(r11_RegisterEnumValue))
++#define R12 ((Register)(r12_RegisterEnumValue))
++#define R13 ((Register)(r13_RegisterEnumValue))
++#define R14 ((Register)(r14_RegisterEnumValue))
++#define R15 ((Register)(r15_RegisterEnumValue))
++#define R16 ((Register)(r16_RegisterEnumValue))
++#define R17 ((Register)(r17_RegisterEnumValue))
++#define R18 ((Register)(r18_RegisterEnumValue))
++#define R19 ((Register)(r19_RegisterEnumValue))
++#define R20 ((Register)(r20_RegisterEnumValue))
++#define R21 ((Register)(r21_RegisterEnumValue))
++#define R22 ((Register)(r22_RegisterEnumValue))
++#define R23 ((Register)(r23_RegisterEnumValue))
++#define R24 ((Register)(r24_RegisterEnumValue))
++#define R25 ((Register)(r25_RegisterEnumValue))
++#define R26 ((Register)(r26_RegisterEnumValue))
++#define R27 ((Register)(r27_RegisterEnumValue))
++#define R28 ((Register)(r28_RegisterEnumValue))
++#define R29 ((Register)(r29_RegisterEnumValue))
++#define R30 ((Register)(r30_RegisterEnumValue))
++#define R31 ((Register)(r31_RegisterEnumValue))
++
++
++#define RA           R1
++#define TP           R2
++#define SP           R3
++#define RA0          R4
++#define RA1          R5
++#define RA2          R6
++#define RA3          R7
++#define RA4          R8
++#define RA5          R9
++#define RA6          R10
++#define RA7          R11
++#define RT0          R12
++#define RT1          R13
++#define RT2          R14
++#define RT3          R15
++#define RT4          R16
++#define RT5          R17
++#define RT6          R18
++#define RT7          R19
++#define RT8          R20
++#define RX           R21
++#define FP           R22
++#define S0           R23
++#define S1           R24
++#define S2           R25
++#define S3           R26
++#define S4           R27
++#define S5           R28
++#define S6           R29
++#define S7           R30
++#define S8           R31
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++#define V0       RA0
++#define V1       RA1
++
++#define SCR1     RT7
++#define SCR2     RT4
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define S5_heapbase    S5
++
++#define FSR            V0
++#define SSR            T6
++#define FSF            FV0
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++// ---------- Scratch Register ----------
++#define AT             RT7
++#define fscratch       F23
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    save_slots_per_register = 2,
++    slots_per_lsx_register  = 4,
++    slots_per_lasx_register = 8,
++    max_slots_per_register  = 8
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++
++#define FA0    F0
++#define FA1    F1
++#define FA2    F2
++#define FA3    F3
++#define FA4    F4
++#define FA5    F5
++#define FA6    F6
++#define FA7    F7
++
++#define FV0    F0
++#define FV1    F1
++
++#define FT0    F8
++#define FT1    F9
++#define FT2    F10
++#define FT3    F11
++#define FT4    F12
++#define FT5    F13
++#define FT6    F14
++#define FT7    F15
++#define FT8    F16
++#define FT9    F17
++#define FT10   F18
++#define FT11   F19
++#define FT12   F20
++#define FT13   F21
++#define FT14   F22
++#define FT15   F23
++
++#define FS0    F24
++#define FS1    F25
++#define FS2    F26
++#define FS3    F27
++#define FS4    F28
++#define FS5    F29
++#define FS6    F30
++#define FS7    F31
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use ConditionalFlagRegister as shortcut
++class ConditionalFlagRegisterImpl;
++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister;
++
++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) {
++  return (ConditionalFlagRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++//    conditionalflag_arg_base      = 12,
++    number_of_registers = 8
++  };
++
++  // construction
++  inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  ConditionalFlagRegister successor() const                          { return as_ConditionalFlagRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7     , ( 7));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue))
++#define FCC0     ((ConditionalFlagRegister)(    fcc0_ConditionalFlagRegisterEnumValue))
++#define FCC1     ((ConditionalFlagRegister)(    fcc1_ConditionalFlagRegisterEnumValue))
++#define FCC2     ((ConditionalFlagRegister)(    fcc2_ConditionalFlagRegisterEnumValue))
++#define FCC3     ((ConditionalFlagRegister)(    fcc3_ConditionalFlagRegisterEnumValue))
++#define FCC4     ((ConditionalFlagRegister)(    fcc4_ConditionalFlagRegisterEnumValue))
++#define FCC5     ((ConditionalFlagRegister)(    fcc5_ConditionalFlagRegisterEnumValue))
++#define FCC6     ((ConditionalFlagRegister)(    fcc6_ConditionalFlagRegisterEnumValue))
++#define FCC7     ((ConditionalFlagRegister)(    fcc7_ConditionalFlagRegisterEnumValue))
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                          FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++// A set of registers
++template <class RegImpl>
++class AbstractRegSet {
++  uint32_t _bitset;
++
++  AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }
++
++public:
++
++  AbstractRegSet() : _bitset(0) { }
++
++  AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }
++
++  AbstractRegSet operator+(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset | aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet operator-(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet &operator+=(const AbstractRegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
++
++  AbstractRegSet &operator-=(const AbstractRegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
++
++  static AbstractRegSet of(RegImpl r1) {
++    return AbstractRegSet(r1);
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2) {
++    return of(r1) + r2;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
++    return of(r1, r2) + r3;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
++    return of(r1, r2, r3) + r4;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) {
++    return of(r1, r2, r3, r4) + r5;
++  }
++
++  static AbstractRegSet range(RegImpl start, RegImpl end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
++
++    return AbstractRegSet(bits);
++  }
++
++  uint32_t bits() const { return _bitset; }
++};
++
++typedef AbstractRegSet<Register> RegSet;
++typedef AbstractRegSet<FloatRegister> FloatRegSet;
++
++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
+new file mode 100644
+index 00000000000..079d581c91f
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (type() == relocInfo::internal_word_type ||
++      type() == relocInfo::section_word_type) {
++    MacroAssembler::pd_patch_instruction(addr(), x);
++  } else if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (CompressedOops::is_in((void*)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode(cast_to_oop(x)), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedKlassPointers::encode((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedKlassPointers::encode((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in LoongArch64");
++  }
++}
++
++
++address Relocation::pd_call_destination(address orig_addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    return nativeFarCall_at(addr())->destination(orig_addr);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    } else {
++      address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr);
++      // If call is branch to self, don't try to relocate it, just leave it
++      // as branch to self. This happens during code generation if the code
++      // buffer expands. It will be relocated to the trampoline above once
++      // code generation is complete.
++      return (new_addr == orig_addr) ? addr() : new_addr;
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination(orig_addr);
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    nativeFarCall_at(addr())->set_destination(x);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, false);
++    } else {
++      nativeCall_at(addr())->set_destination(x);
++    }
++  } else if (ni->is_jump()) {
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
+new file mode 100644
+index 00000000000..c85ca4963f3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since LoongArch instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
+new file mode 100644
+index 00000000000..fae11f47e62
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
+@@ -0,0 +1,199 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/vmreg.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_loongarch_64.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T4 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T4
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  address start = __ pc();
++
++  __ addi_d(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ st_d(V1, SP, return_off * wordSize);  // return address
++  __ st_d(FP, SP, fp_off * wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ addi_d(FP, SP, framesize * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ st_d(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ st_d(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(thread, NOREG, NOREG, L);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);   // Fix stack alignment as required by ABI
++
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T4, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ st_d(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ st_d(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T4: exception handler
++  // A1: exception pc
++  __ jr(T4);
++
++  // make sure all code is generated
++  masm->flush();
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
+new file mode 100644
+index 00000000000..0b3ea4c42f3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
+@@ -0,0 +1,3113 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_JVMCI
++#include "jvmci/jvmciJavaClasses.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  // Capture info about frame layout
++  enum layout {
++    fpr0_off = 0,
++    fpr1_off,
++    fpr2_off,
++    fpr3_off,
++    fpr4_off,
++    fpr5_off,
++    fpr6_off,
++    fpr7_off,
++    fpr8_off,
++    fpr9_off,
++    fpr10_off,
++    fpr11_off,
++    fpr12_off,
++    fpr13_off,
++    fpr14_off,
++    fpr15_off,
++    fpr16_off,
++    fpr17_off,
++    fpr18_off,
++    fpr19_off,
++    fpr20_off,
++    fpr21_off,
++    fpr22_off,
++    fpr23_off,
++    fpr24_off,
++    fpr25_off,
++    fpr26_off,
++    fpr27_off,
++    fpr28_off,
++    fpr29_off,
++    fpr30_off,
++    fpr31_off,
++    a0_off,
++    a1_off,
++    a2_off,
++    a3_off,
++    a4_off,
++    a5_off,
++    a6_off,
++    a7_off,
++    t0_off,
++    t1_off,
++    t2_off,
++    t3_off,
++    t4_off,
++    t5_off,
++    t6_off,
++    t7_off,
++    t8_off,
++    s0_off,
++    s1_off,
++    s2_off,
++    s3_off,
++    s4_off,
++    s5_off,
++    s6_off,
++    s7_off,
++    s8_off,
++    fp_off,
++    ra_off,
++    fpr_size = fpr31_off - fpr0_off + 1,
++    gpr_size = ra_off - a0_off + 1,
++  };
++
++  const bool _save_vectors;
++  public:
++  RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
++
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
++
++  int slots_save() {
++    int slots = gpr_size * VMRegImpl::slots_per_word;
++
++    if (_save_vectors && UseLASX)
++      slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size;
++    else if (_save_vectors && UseLSX)
++      slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size;
++    else
++      slots += FloatRegisterImpl::save_slots_per_register * fpr_size;
++
++    return slots;
++  }
++
++  int gpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++      int slots_per_gpr = VMRegImpl::slots_per_word;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size;
++  }
++
++  int fpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return off * slots_per_fpr * VMRegImpl::stack_slot_size;
++  }
++
++  int ra_offset() { return gpr_offset(ra_off); }
++  int t5_offset() { return gpr_offset(t5_off); }
++  int s3_offset() { return gpr_offset(s3_off); }
++  int v0_offset() { return gpr_offset(a0_off); }
++  int v1_offset() { return gpr_offset(a1_off); }
++
++  int fpr0_offset() { return fpr_offset(fpr0_off); }
++  int fpr1_offset() { return fpr_offset(fpr1_off); }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++
++  *total_frame_words = frame_size_in_words;
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap(frame_size_in_slots, 0);
++
++  // save registers
++  __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size);
++
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvst(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vst(fpr, SP, off);
++    else
++      __ fst_d(fpr, SP, off);
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg());
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++  __ st_d(T8, SP, gpr_offset(t8_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg());
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  __ st_d(FP, SP, gpr_offset(fp_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg());
++  __ st_d(RA, SP, gpr_offset(ra_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg());
++
++  __ addi_d(FP, SP, slots_save() * VMRegImpl::stack_slot_size);
++
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvld(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vld(fpr, SP, off);
++    else
++      __ fld_d(fpr, SP, off);
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++  __ ld_d(T8, SP, gpr_offset(t8_off));
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  __ ld_d(FP, SP, gpr_offset(fp_off));
++  __ ld_d(RA, SP, gpr_offset(ra_off));
++
++  __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld_d(V0, SP, gpr_offset(a0_off));
++  __ ld_d(V1, SP, gpr_offset(a1_off));
++
++  __ fld_d(F0, SP, fpr_offset(fpr0_off));
++  __ fld_d(F1, SP, fpr_offset(fpr1_off));
++
++  __ addi_d(SP, SP, gpr_offset(ra_off));
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 8 bytes registers are saved by default using fld/fst instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 8;
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // This should really be in_preserve_stack_slots
++  return r->reg2stack() * VMRegImpl::stack_slot_size;
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
++    T0, A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return align_up(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // T5 isn't live so capture return address while we easily can
++  __ move(T5, RA);
++
++  __ push_call_clobbered_registers();
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, T5);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);   // align the stack
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ move(SP, TSR);
++  __ pop_call_clobbered_registers();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++  // Since all args are passed on the stack, total_args_passed *
++  // Interpreter::stackElementSize is the space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(T5, RA);
++  // set senderSP value
++  //refer to interpreter_loongarch.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addi_d(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ st_d(r, SP, st_off);
++      } else {
++        //FIXME, LA will not enter here
++        // long/double in gpr
++        __ st_d(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ st_d(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ fst_s(fr, SP, st_off);
++      else {
++        __ fst_d(fr, SP, st_off);
++        __ fst_d(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, T5);
++  __ jr (AT);
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the LA side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++  __ move(T4, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    // did LA need round? FIXME
++    comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = align_up(comp_words_on_stack, 2);
++    __ addi_d(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = T5;
++  __ move(saved_sp, T4);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    // check if this call should be routed towards a specific entry point
++    __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    Label no_alternative_target;
++    __ beqz(AT, no_alternative_target);
++    __ move(T4, AT);
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ bind(no_alternative_target);
++  }
++#endif // INCLUDE_JVMCI
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld_d(AT, saved_sp, ld_off);
++        __ st_d(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld_d(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld_d(AT, saved_sp, ld_off - 8);
++        __ st_d(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on LA)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld_d(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld_d(r, saved_sp, ld_off - 8);
++      } else {
++        __ ld_w(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ fld_s(fr, saved_sp, ld_off);
++      else {
++          __ fld_d(fr, saved_sp, ld_off);
++          __ fld_d(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move Method* to T5 in case we end up in an c2i adapter.
++  // the c2i adapters expect Method* in T5 (c2) because c2's
++  // resolve stubs return the result (the method) in T5.
++  // I'd love to fix this.
++  __ move(T5, Rmethod);
++  __ jr(T4);
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the Method*.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++  }
++  address c2i_entry = __ pc();
++
++  // Class initialization barrier for static methods
++  address c2i_no_clinit_check_entry = NULL;
++  if (VM_Version::supports_fast_class_init_checks()) {
++    Label L_skip_barrier;
++    address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub();
++
++    { // Bypass the barrier for non-static methods
++      __ ld_w(AT, Address(Rmethod, Method::access_flags_offset()));
++      __ andi(AT, AT, JVM_ACC_STATIC);
++      __ beqz(AT, L_skip_barrier); // non-static
++    }
++
++    __ load_method_holder(T4, Rmethod);
++    __ clinit_barrier(T4, AT, &L_skip_barrier);
++    __ jmp(handle_wrong_method, relocInfo::runtime_call_type);
++
++    __ bind(L_skip_barrier);
++    c2i_no_clinit_check_entry = __ pc();
++  }
++
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->c2i_entry_barrier(masm);
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
++}
++
++int SharedRuntime::vector_calling_convention(VMRegPair *regs,
++                                             uint num_bits,
++                                             uint total_args_passed) {
++  Unimplemented();
++  return 0;
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on LA");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               <-- a7 => sp[8]
++// 10: L    // stdout              fp[16] => sp[16]
++// 11: L    // stderr              fp[24] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return align_up(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_VOID:
++      break;
++    case T_FLOAT:
++      __ fst_s(FSF, FP, -3 * wordSize);
++      break;
++    case T_DOUBLE:
++      __ fst_d(FSF, FP, -3 * wordSize);
++      break;
++    case T_LONG:
++    case T_OBJECT:
++    case T_ARRAY:
++      __ st_d(V0, FP, -3 * wordSize);
++      break;
++    default:
++      __ st_w(V0, FP, -3 * wordSize);
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_VOID:
++      break;
++    case T_FLOAT:
++      __ fld_s(FSF, FP, -3 * wordSize);
++      break;
++    case T_DOUBLE:
++      __ fld_d(FSF, FP, -3 * wordSize);
++      break;
++    case T_LONG:
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld_d(V0, FP, -3 * wordSize);
++      break;
++    default: {
++      __ ld_w(V0, FP, -3 * wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ ld_w(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = T5;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld_d(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = T5;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ st_d( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++  if (src.first()->is_stack()) {
++    // stack to stack/reg
++    if (dst.first()->is_stack()) {
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_w(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    if(dst.first()->is_stack()) {
++      __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T4;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++           "valid size for make_non_entrant");
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol();
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = align_up(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      | 2 slots (ra)        |
++  //      | 2 slots (fp)        |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_loongarch.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T4, receiver);
++  __ beq(T4, ic_reg, hit);
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
++    Label L_skip_barrier;
++    address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub();
++    __ mov_metadata(T4, method->method_holder()); // InstanceKlass*
++    __ clinit_barrier(T4, AT, &L_skip_barrier);
++    __ jmp(handle_wrong_method, relocInfo::runtime_call_type);
++
++    __ bind(L_skip_barrier);
++  }
++
++#ifdef COMPILER1
++  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
++    // Object.hashCode can pull the hashCode from the header word
++    // instead of doing a full VM transition once it's been computed.
++    // Since hashCode is usually polymorphic at call sites we can't do
++    // this optimization at the call site without a lot of work.
++    Label slowCase;
++    Register receiver = T0;
++    Register result = V0;
++    __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes());
++    // check if locked
++    __ andi(AT, result, markWord::unlocked_value);
++    __ beq(AT, R0, slowCase);
++    if (UseBiasedLocking) {
++      // Check if biased and fall through to runtime if so
++      __ andi (AT, result, markWord::biased_lock_bit_in_place);
++      __ bne(AT, R0, slowCase);
++    }
++    // get hash
++    __ li(AT, markWord::hash_mask_in_place);
++    __ andr (AT, result, AT);
++    // test if hashCode exists
++    __ beq (AT, R0, slowCase);
++    __ shr(result, markWord::hash_shift);
++    __ jr(RA);
++    __ bind (slowCase);
++  }
++#endif // COMPILER1
++
++  // Generate stack overflow check
++  __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size());
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant.
++  if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) {
++    __ nop();
++  }
++
++  // Generate a new frame for the wrapper.
++  // do LA need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->nmethod_entry_barrier(masm);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size;
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and LA doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load oop into a register
++    __ movoop(oop_handle_reg,
++              JNIHandles::make_local((method->method_holder())->java_mirror()),
++              /*immediate*/true);
++
++    // Now handlize the static class mirror it's known not-null.
++    __ st_d( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  Label native_return;
++  __ set_last_Java_frame(SP, noreg, native_return);
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T4;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld_d(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ li(swap_reg, 1);
++
++    __ ld_d(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ sub_d(swap_reg, swap_reg, SP);
++    __ li(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++
++    // Now set thread in native
++    __ addi_d(AT, R0, _thread_in_native);
++    if (os::is_MP()) {
++      __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++    }
++    __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  }
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ bind(native_return);
++
++  oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map);
++
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);                break;
++  case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++  case T_BYTE   : __ sign_extend_byte (V0);     break;
++  case T_SHORT  : __ sign_extend_short(V0);     break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++
++  Label after_transition;
++
++  // If this is a critical native, check for a safepoint or suspend request after the call.
++  // If a safepoint is needed, transition to native, then to native_trans to handle
++  // safepoints like the native methods that are not critical natives.
++  if (is_critical_native) {
++    Label needs_safepoint;
++    __ safepoint_poll(needs_safepoint, thread, false /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, after_transition);
++    __ bind(needs_safepoint);
++  }
++
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addi_d(AT, R0, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP())  __ membar(__ AnyAny);
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    // We need an acquire here to ensure that any subsequent load of the
++    // global SafepointSynchronize::_state flag is ordered after this load
++    // of the thread-local polling word.  We don't want this poll to
++    // return false (i.e. not safepointing) and a later poll of the global
++    // SafepointSynchronize::_state spuriously to return true.
++    //
++    // This is to avoid a race when we're in a native->Java transition
++    // racing the code which wakes up from a safepoint.
++
++    __ safepoint_poll(slow_path, thread, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addi_d(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addi_d(SP, SP, wordSize);
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addi_d(AT, R0, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  }
++  __ st_w(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addi_d(AT, AT, -StackOverflow::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld_d( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld_d(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld_d (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addi_d (c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (is_reference_type(ret_type)) {
++    __ resolve_jobject(V0, thread, T4);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, - 3*wordSize);
++
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ move(SP, S2);
++    __ addi_d(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addi_d(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ addi_d(SP, SP, 2*wordSize);
++    __ move(SP, S2);
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ addi_d(SP, FP, - 2 * wordSize);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  return nm;
++}
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// Number of stack slots between incoming argument block and the start of
++// a new frame.  The PROLOG must add this many slots to the stack.  The
++// EPILOG must remove this many slots. LA needs two slots for
++// return address and fp.
++// TODO think this is correct but check
++uint SharedRuntime::in_preserve_stack_slots() {
++  return 4;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  int pad = 0;
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    pad += 512; // Increase the buffer size when compiling for JVMCI
++  }
++#endif
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++  RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++
++  // We have been called from the deopt handler of the deoptee.
++  //
++  // deoptee:
++  //                      ...
++  //                      call X
++  //                      ...
++  //  deopt_handler:      call_deopt_stub
++  //  cur. return pc  --> ...
++  //
++  // So currently RA points behind the call in the deopt handler.
++  // We adjust it such that it points to the start of the deopt handler.
++  // The return_pc has been stored in the frame of the deoptee and
++  // will replace the address of the deopt_handler in the call
++  // to Deoptimization::fetch_unroll_info below.
++
++  // HandlerImpl::size_deopt_handler()
++  __ addi_d(RA, RA, - NativeFarCall::instruction_size);
++  // Save everything in sight.
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ li(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++
++  int reexecute_offset = __ pc() - start;
++#if INCLUDE_JVMCI && !defined(COMPILER1)
++  if (EnableJVMCI && UseJVMCICompiler) {
++    // JVMCI does not use this kind of deoptimization
++    __ should_not_reach_here();
++  }
++#endif
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ li(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++
++#if INCLUDE_JVMCI
++  Label after_fetch_unroll_info_call;
++  int implicit_exception_uncommon_trap_offset = 0;
++  int uncommon_trap_offset = 0;
++
++  if (EnableJVMCI) {
++    implicit_exception_uncommon_trap_offset = __ pc() - start;
++
++    __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++
++    uncommon_trap_offset = __ pc() - start;
++
++    // Save everything in sight.
++    (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++    __ addi_d(SP, SP, -additional_words * wordSize);
++    // fetch_unroll_info needs to call last_java_frame()
++    Label retaddr;
++    __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++    __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++    __ li(AT, -1);
++    __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++
++    __ li(reason, (int32_t)Deoptimization::Unpack_reexecute);
++    __ move(A0, TREG);
++    __ move(A2, reason); // exec mode
++    __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++    __ bind(retaddr);
++    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
++    __ addi_d(SP, SP, additional_words * wordSize);
++
++    __ reset_last_Java_frame(false);
++
++    __ b(after_fetch_unroll_info_call);
++  } // EnableJVMCI
++#endif // INCLUDE_JVMCI
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ li(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addi_d(SP, SP, -additional_words * wordSize);
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  // TODO: confirm reloc
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addi_d(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    __ bind(after_fetch_unroll_info_call);
++  }
++#endif
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ st_w(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ li(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, reg_save.v0_offset());
++  __ st_ptr(V1, SP, reg_save.v1_offset());
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  reg_save.restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ ld_w(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, T8);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addi_d(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ ld_w(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes());
++  __ bang_stack_size(TSR, T8);
++#endif
++
++  // Load count of frams into T3
++  __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2 * wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ addi_d(FP, SP, 2 * wordSize);
++  __ sub_d(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addi_d(count, count, -1);   // decrement counter
++  __ addi_d(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++
++  // Re-push self-frame
++  __ ld_d(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  __ push2(AT, FP);
++  __ addi_d(FP, SP, 2 * wordSize);
++  __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ st_d(V0, SP, reg_save.v0_offset());
++  __ st_d(V1, SP, reg_save.v1_offset());
++  __ fst_d(F0, SP, reg_save.fpr0_offset());
++  __ fst_d(F1, SP, reg_save.fpr1_offset());
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addi_d(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);   // Fix stack alignment as required by ABI
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize);
++  __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize);
++  // Pop float stack and store in local
++  __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize);
++  __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize);
++
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
++    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
++  }
++#endif
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++  address start = __ pc();
++
++   // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Push self-frame.
++  __ addi_d(SP, SP, -framesize * BytesPerInt);
++
++  __ st_d(RA, SP, return_off * BytesPerInt);
++  __ st_d(FP, SP, fp_off * BytesPerInt);
++
++  __ addi_d(FP, SP, framesize * BytesPerInt);
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, FP, retaddr);
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T4, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T4, L);
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ addi_d(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ ld_w(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addi_d(T8, T8, -2 * wordSize);
++  __ add_d(SP, SP, T8);
++  __ ld_d(FP, SP, 0);
++  __ ld_d(RA, SP, wordSize);
++  __ addi_d(SP, SP, 2 * wordSize);
++
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ ld_w(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes());
++  __ bang_stack_size(TSR, T8);
++#endif
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T4;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);          // Load frame size
++  __ ld_d(RA, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ enter();
++  __ sub_d(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ addi_d(count, count, -1);    // decrement counter
++  __ addi_d(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++
++  __ ld_d(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ li(A1, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, reg_save.ra_offset());
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call(call_ptr, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++
++  // Exception pending
++
++  reg_save.restore_live_registers(masm);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  // TODO: confirm reloc
++  __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (!cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, reg_save.ra_offset());
++    __ bne(AT, TSR, no_adjust);
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ ld_wu(AT, TSR, 0);
++    __ push(T5);
++    __ li(T5, 0xffc0001f);
++    __ andr(AT, AT, T5);
++    __ li(T5, 0x28800013);
++    __ xorr(AT, AT, T5);
++    __ pop(T5);
++    __ bne(AT, R0, bail);
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addi_d(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, reg_save.ra_offset());
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  reg_save.restore_live_registers(masm);
++  __ jr(RA);
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  //FIXME. code_size
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  RegisterSaver reg_save(false /* save_vectors */);
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  address start = __ pc();
++  map = reg_save.save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  Label retaddr;
++  __ set_last_Java_frame(noreg, FP, retaddr);
++  // align the stack before invoke native
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);
++
++  // TODO: confirm reloc
++  __ call(destination, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map(__ pc() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  // get the returned Method*
++  __ get_vm_result_2(Rmethod, thread);
++  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
++  __ st_ptr(V0, SP, reg_save.t5_offset());
++  reg_save.restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(T5);
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  reg_save.restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  //
++  // make sure all code is generated
++  masm->flush();
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++#ifdef COMPILER2
++RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
++                                                int shadow_space_bytes,
++                                                const GrowableArray<VMReg>& input_registers,
++                                                const GrowableArray<VMReg>& output_registers) {
++  Unimplemented();
++  return nullptr;
++}
++#endif
+diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
+new file mode 100644
+index 00000000000..21bfc7d78cb
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
+@@ -0,0 +1,5176 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // This fig is not LA ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -10 [ S6                   ]
++  //  -9 [ S5                   ]
++  //  -8 [ S4                   ]
++  //  -7 [ S3                   ]
++  //  -6 [ S1                   ]
++  //  -5 [ TSR(S2)              ]
++  //  -4 [ LVP(S7)              ]
++  //  -3 [ BCP(S0)              ]
++  //  -2 [ saved fp             ]
++  //  -1 [ return address       ]
++  //   0 [ ptr. to call wrapper ] <--- a0 (old sp -->) fp
++  //   1 [ result               ] <--- a1
++  //   2 [ result_type          ] <--- a2
++  //   3 [ method               ] <--- a3
++  //   4 [ entry_point          ] <--- a4
++  //   5 [ parameters           ] <--- a5
++  //   6 [ parameter_size       ] <--- a6
++  //   7 [ thread               ] <--- a7
++
++  //
++  // LA ABI does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //-24 [                      ]
++  //-23 [ F31                  ]
++  //      ...
++  //-16 [ F24                  ]
++  //-15 [ S8                   ]
++  //-14 [ thread               ]
++  //-13 [ result_type          ] <--- a2
++  //-12 [ result               ] <--- a1
++  //-11 [ ptr. to call wrapper ] <--- a0
++  //-10 [ S6                   ]
++  // -9 [ S5                   ]
++  // -8 [ S4                   ]
++  // -7 [ S3                   ]
++  // -6 [ S1                   ]
++  // -5 [ TSR(S2)              ]
++  // -4 [ LVP(S7)              ]
++  // -3 [ BCP(S0)              ]
++  // -2 [ saved fp             ]
++  // -1 [ return address       ]
++  //  0 [                      ] <--- old sp = fp_after_call
++  //
++  // Find a right place in the call_stub for S8.
++  // S8 will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             = -1,
++    FP_off             = -2,
++    BCP_off            = -3,
++    LVP_off            = -4,
++    TSR_off            = -5,
++    S1_off             = -6,
++    S3_off             = -7,
++    S4_off             = -8,
++    S5_off             = -9,
++    S6_off             = -10,
++    call_wrapper_off   = -11,
++    result_off         = -12,
++    result_type_off    = -13,
++    thread_off         = -14,
++    S8_off             = -15,
++    F24_off            = -16,
++    F25_off            = -17,
++    F26_off            = -18,
++    F27_off            = -19,
++    F28_off            = -20,
++    F29_off            = -21,
++    F30_off            = -22,
++    F31_off            = -23,
++    total_off          = -24,
++  };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    __ addi_d(SP, SP, total_off * wordSize);
++    __ st_d(BCP, FP, BCP_off * wordSize);
++    __ st_d(LVP, FP, LVP_off * wordSize);
++    __ st_d(TSR, FP, TSR_off * wordSize);
++    __ st_d(S1, FP, S1_off * wordSize);
++    __ st_d(S3, FP, S3_off * wordSize);
++    __ st_d(S4, FP, S4_off * wordSize);
++    __ st_d(S5, FP, S5_off * wordSize);
++    __ st_d(S6, FP, S6_off * wordSize);
++    __ st_d(A0, FP, call_wrapper_off * wordSize);
++    __ st_d(A1, FP, result_off * wordSize);
++    __ st_d(A2, FP, result_type_off * wordSize);
++    __ st_d(A7, FP, thread_off * wordSize);
++    __ st_d(S8, FP, S8_off * wordSize);
++
++    __ fst_d(F24, FP, F24_off * wordSize);
++    __ fst_d(F25, FP, F25_off * wordSize);
++    __ fst_d(F26, FP, F26_off * wordSize);
++    __ fst_d(F27, FP, F27_off * wordSize);
++    __ fst_d(F28, FP, F28_off * wordSize);
++    __ fst_d(F29, FP, F29_off * wordSize);
++    __ fst_d(F30, FP, F30_off * wordSize);
++    __ fst_d(F31, FP, F31_off * wordSize);
++
++    __ li(S8, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      /* FIXME: I do not know how to realize stop in LA, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ slli_d(AT, A6, Interpreter::logStackElementSize);
++    __ sub_d(SP, SP, AT);
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ alsl_d(T3, T0, A5, LogBytesPerWord - 1);
++    __ ld_d(AT, T3,  -wordSize);
++    __ alsl_d(T3, T2, SP, LogBytesPerWord - 1);
++    __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ addi_d(T2, T2, 1);
++    __ addi_d(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, Method* in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld_d(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld_d(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ addi_d(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ addi_d(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ addi_d(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++
++    // handle T_INT case
++    __ st_d(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld_d(BCP, FP, BCP_off * wordSize);
++    __ ld_d(LVP, FP, LVP_off * wordSize);
++    __ ld_d(S8, FP, S8_off * wordSize);
++    __ ld_d(TSR, FP, TSR_off * wordSize);
++
++    __ ld_d(S1, FP, S1_off * wordSize);
++    __ ld_d(S3, FP, S3_off * wordSize);
++    __ ld_d(S4, FP, S4_off * wordSize);
++    __ ld_d(S5, FP, S5_off * wordSize);
++    __ ld_d(S6, FP, S6_off * wordSize);
++
++    __ fld_d(F24, FP, F24_off * wordSize);
++    __ fld_d(F25, FP, F25_off * wordSize);
++    __ fld_d(F26, FP, F26_off * wordSize);
++    __ fld_d(F27, FP, F27_off * wordSize);
++    __ fld_d(F28, FP, F28_off * wordSize);
++    __ fld_d(F29, FP, F29_off * wordSize);
++    __ fld_d(F30, FP, F30_off * wordSize);
++    __ fld_d(F31, FP, F31_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ st_d(V0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_float);
++    __ fst_s(FV0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_double);
++    __ fst_d(FV0, T0, 0 * wordSize);
++    __ b(exit);
++    StubRoutines::la::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld_d(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T4
++    __ ld_d(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T4, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T4: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T4);
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  // Generate indices for iota vector.
++  address generate_iota_indices(const char *stub_name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", stub_name);
++    address start = __ pc();
++    __ emit_data64(0x0706050403020100, relocInfo::none);
++    __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
++    __ emit_data64(0x1716151413121110, relocInfo::none);
++    __ emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
++    return start;
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    A0
++  //   value: A1
++  //   count: A2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register end       = T5;  // source array address end
++    const Register tmp       = T8;  // temp register
++
++    Label L_fill_elements;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 9);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 15, 8);  //  8 bit -> 16 bit
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 5);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 3);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    switch (t) {
++      case T_BYTE:
++        __ add_d(end, to, count);
++        break;
++      case T_SHORT:
++      case T_INT:
++        __ alsl_d(end, count, to, shift-1);
++        break;
++      default: ShouldNotReachHere();
++    }
++    if (!aligned) {
++      __ st_d(value, to,  0);
++      __ bstrins_d(to, R0, 2, 0);
++      __ addi_d(to, to, 8);
++    }
++    __ st_d(value, end, -8);
++    __ bstrins_d(end, R0, 2, 0);
++
++    //
++    //  Fill large chunks
++    //
++    Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2;
++    __ addi_d(AT, to, 64);
++    __ blt(end, AT, L_not_64bytes_fill);
++    __ addi_d(to, to, 64);
++    __ bind(L_loop_begin);
++    __ st_d(value, to,  -8);
++    __ st_d(value, to, -16);
++    __ st_d(value, to, -24);
++    __ st_d(value, to, -32);
++    __ st_d(value, to, -40);
++    __ st_d(value, to, -48);
++    __ st_d(value, to, -56);
++    __ st_d(value, to, -64);
++    __ addi_d(to, to, 64);
++    __ bge(end, to, L_loop_begin);
++    __ addi_d(to, to, -64);
++    __ beq(to, end, L_loop_end);
++
++    __ bind(L_not_64bytes_fill);
++    // There are 0 - 7 words
++    __ lipc(AT, L_jtab1);
++    __ sub_d(tmp, end, to);
++    __ alsl_d(AT, tmp, AT, 1);
++    __ jr(AT);
++
++    __ bind(L_jtab1);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_d(value, to, 0);
++    __ st_d(value, to, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ st_d(value, to, 48);
++
++    __ bind(L_loop_end);
++    __ jr(RA);
++
++    // Short arrays (<= 8 bytes)
++    __ bind(L_fill_elements);
++    __ lipc(AT, L_jtab2);
++    __ slli_d(tmp, count, 4 + shift);
++    __ add_d(AT, AT, tmp);
++    __ jr(AT);
++
++    __ bind(L_jtab2);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_b(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_h(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_h(value, to, 0);
++    __ st_b(value, to, 2);
++    __ jr(RA);
++    __ nop();
++
++    // 4:
++    __ st_w(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_w(value, to, 0);
++    __ st_b(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 6:
++    __ st_w(value, to, 0);
++    __ st_h(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_w(value, to, 0);
++    __ st_w(value, to, 3);
++    __ jr(RA);
++    __ nop();
++
++    // 8:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count
++  //
++  //  Temp:
++  //    AT   - destination array address - source array address
++  //    T4   - element count * element size
++  //
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    __ slli_d(T4, A2, log2_elem_size);
++    __ sub_d(AT, A1, A0);
++    __ bgeu(AT, T4, no_overlap_target);
++  }
++
++  // disjoint large copy
++  void generate_disjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le32, le16, le8, lt8;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ ld_d(A6, A0, 0);
++      __ ld_d(A7, A2, -8);
++
++      __ andi(T1, A1, 7);
++      __ sub_d(T0, R0, T1);
++      __ addi_d(T0, T0, 8);
++
++      __ add_d(A0, A0, T0);
++      __ add_d(A5, A1, T0);
++
++      __ addi_d(A4, A2, -64);
++      __ bgeu(A0, A4, le32);
++
++      __ bind(loop);
++      __ ld_d(T0, A0, 0);
++      __ ld_d(T1, A0, 8);
++      __ ld_d(T2, A0, 16);
++      __ ld_d(T3, A0, 24);
++      __ ld_d(T4, A0, 32);
++      __ ld_d(T5, A0, 40);
++      __ ld_d(T6, A0, 48);
++      __ ld_d(T7, A0, 56);
++      __ addi_d(A0, A0, 64);
++      __ st_d(T0, A5, 0);
++      __ st_d(T1, A5, 8);
++      __ st_d(T2, A5, 16);
++      __ st_d(T3, A5, 24);
++      __ st_d(T4, A5, 32);
++      __ st_d(T5, A5, 40);
++      __ st_d(T6, A5, 48);
++      __ st_d(T7, A5, 56);
++      __ addi_d(A5, A5, 64);
++      __ bltu(A0, A4, loop);
++
++      __ bind(le32);
++      __ addi_d(A4, A2, -32);
++      __ bgeu(A0, A4, le16);
++      __ ld_d(T0, A0, 0);
++      __ ld_d(T1, A0, 8);
++      __ ld_d(T2, A0, 16);
++      __ ld_d(T3, A0, 24);
++      __ addi_d(A0, A0, 32);
++      __ st_d(T0, A5, 0);
++      __ st_d(T1, A5, 8);
++      __ st_d(T2, A5, 16);
++      __ st_d(T3, A5, 24);
++      __ addi_d(A5, A5, 32);
++
++      __ bind(le16);
++      __ addi_d(A4, A2, -16);
++      __ bgeu(A0, A4, le8);
++      __ ld_d(T0, A0, 0);
++      __ ld_d(T1, A0, 8);
++      __ addi_d(A0, A0, 16);
++      __ st_d(T0, A5, 0);
++      __ st_d(T1, A5, 8);
++      __ addi_d(A5, A5, 16);
++
++      __ bind(le8);
++      __ addi_d(A4, A2, -8);
++      __ bgeu(A0, A4, lt8);
++      __ ld_d(T0, A0, 0);
++      __ st_d(T0, A5, 0);
++
++      __ bind(lt8);
++      __ st_d(A6, A1, 0);
++      __ st_d(A7, A3, -8);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lsx
++  void generate_disjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le64, le32, le16, lt16;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ vld(F0, A0, 0);
++      __ vld(F1, A2, -16);
++
++      __ andi(T1, A1, 15);
++      __ sub_d(T0, R0, T1);
++      __ addi_d(T0, T0, 16);
++
++      __ add_d(A0, A0, T0);
++      __ add_d(A5, A1, T0);
++
++      __ addi_d(A4, A2, -128);
++      __ bgeu(A0, A4, le64);
++
++      __ bind(loop);
++      __ vld(FT0, A0, 0);
++      __ vld(FT1, A0, 16);
++      __ vld(FT2, A0, 32);
++      __ vld(FT3, A0, 48);
++      __ vld(FT4, A0, 64);
++      __ vld(FT5, A0, 80);
++      __ vld(FT6, A0, 96);
++      __ vld(FT7, A0, 112);
++      __ addi_d(A0, A0, 128);
++      __ vst(FT0, A5, 0);
++      __ vst(FT1, A5, 16);
++      __ vst(FT2, A5, 32);
++      __ vst(FT3, A5, 48);
++      __ vst(FT4, A5, 64);
++      __ vst(FT5, A5, 80);
++      __ vst(FT6, A5, 96);
++      __ vst(FT7, A5, 112);
++      __ addi_d(A5, A5, 128);
++      __ bltu(A0, A4, loop);
++
++      __ bind(le64);
++      __ addi_d(A4, A2, -64);
++      __ bgeu(A0, A4, le32);
++      __ vld(FT0, A0, 0);
++      __ vld(FT1, A0, 16);
++      __ vld(FT2, A0, 32);
++      __ vld(FT3, A0, 48);
++      __ addi_d(A0, A0, 64);
++      __ vst(FT0, A5, 0);
++      __ vst(FT1, A5, 16);
++      __ vst(FT2, A5, 32);
++      __ vst(FT3, A5, 48);
++      __ addi_d(A5, A5, 64);
++
++      __ bind(le32);
++      __ addi_d(A4, A2, -32);
++      __ bgeu(A0, A4, le16);
++      __ vld(FT0, A0, 0);
++      __ vld(FT1, A0, 16);
++      __ addi_d(A0, A0, 32);
++      __ vst(FT0, A5, 0);
++      __ vst(FT1, A5, 16);
++      __ addi_d(A5, A5, 32);
++
++      __ bind(le16);
++      __ addi_d(A4, A2, -16);
++      __ bgeu(A0, A4, lt16);
++      __ vld(FT0, A0, 0);
++      __ vst(FT0, A5, 0);
++
++      __ bind(lt16);
++      __ vst(F0, A1, 0);
++      __ vst(F1, A3, -16);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lasx
++  void generate_disjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le128, le64, le32, lt32;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ xvld(F0, A0, 0);
++      __ xvld(F1, A2, -32);
++
++      __ andi(T1, A1, 31);
++      __ sub_d(T0, R0, T1);
++      __ addi_d(T0, T0, 32);
++
++      __ add_d(A0, A0, T0);
++      __ add_d(A5, A1, T0);
++
++      __ addi_d(A4, A2, -256);
++      __ bgeu(A0, A4, le128);
++
++      __ bind(loop);
++      __ xvld(FT0, A0, 0);
++      __ xvld(FT1, A0, 32);
++      __ xvld(FT2, A0, 64);
++      __ xvld(FT3, A0, 96);
++      __ xvld(FT4, A0, 128);
++      __ xvld(FT5, A0, 160);
++      __ xvld(FT6, A0, 192);
++      __ xvld(FT7, A0, 224);
++      __ addi_d(A0, A0, 256);
++      __ xvst(FT0, A5, 0);
++      __ xvst(FT1, A5, 32);
++      __ xvst(FT2, A5, 64);
++      __ xvst(FT3, A5, 96);
++      __ xvst(FT4, A5, 128);
++      __ xvst(FT5, A5, 160);
++      __ xvst(FT6, A5, 192);
++      __ xvst(FT7, A5, 224);
++      __ addi_d(A5, A5, 256);
++      __ bltu(A0, A4, loop);
++
++      __ bind(le128);
++      __ addi_d(A4, A2, -128);
++      __ bgeu(A0, A4, le64);
++      __ xvld(FT0, A0, 0);
++      __ xvld(FT1, A0, 32);
++      __ xvld(FT2, A0, 64);
++      __ xvld(FT3, A0, 96);
++      __ addi_d(A0, A0, 128);
++      __ xvst(FT0, A5, 0);
++      __ xvst(FT1, A5, 32);
++      __ xvst(FT2, A5, 64);
++      __ xvst(FT3, A5, 96);
++      __ addi_d(A5, A5, 128);
++
++      __ bind(le64);
++      __ addi_d(A4, A2, -64);
++      __ bgeu(A0, A4, le32);
++      __ xvld(FT0, A0, 0);
++      __ xvld(FT1, A0, 32);
++      __ addi_d(A0, A0, 64);
++      __ xvst(FT0, A5, 0);
++      __ xvst(FT1, A5, 32);
++      __ addi_d(A5, A5, 64);
++
++      __ bind(le32);
++      __ addi_d(A4, A2, -32);
++      __ bgeu(A0, A4, lt32);
++      __ xvld(FT0, A0, 0);
++      __ xvst(FT0, A5, 0);
++
++      __ bind(lt32);
++      __ xvst(F0, A1, 0);
++      __ xvst(F1, A3, -32);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy
++  void generate_conjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le32, le16, le8, lt8;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ ld_d(A6, A0, 0);
++      __ ld_d(A7, A2, -8);
++
++      __ andi(T1, A3, 7);
++      __ sub_d(A2, A2, T1);
++      __ sub_d(A5, A3, T1);
++
++      __ addi_d(A4, A0, 64);
++      __ bgeu(A4, A2, le32);
++
++      __ bind(loop);
++      __ ld_d(T0, A2, -8);
++      __ ld_d(T1, A2, -16);
++      __ ld_d(T2, A2, -24);
++      __ ld_d(T3, A2, -32);
++      __ ld_d(T4, A2, -40);
++      __ ld_d(T5, A2, -48);
++      __ ld_d(T6, A2, -56);
++      __ ld_d(T7, A2, -64);
++      __ addi_d(A2, A2, -64);
++      __ st_d(T0, A5, -8);
++      __ st_d(T1, A5, -16);
++      __ st_d(T2, A5, -24);
++      __ st_d(T3, A5, -32);
++      __ st_d(T4, A5, -40);
++      __ st_d(T5, A5, -48);
++      __ st_d(T6, A5, -56);
++      __ st_d(T7, A5, -64);
++      __ addi_d(A5, A5, -64);
++      __ bltu(A4, A2, loop);
++
++      __ bind(le32);
++      __ addi_d(A4, A0, 32);
++      __ bgeu(A4, A2, le16);
++      __ ld_d(T0, A2, -8);
++      __ ld_d(T1, A2, -16);
++      __ ld_d(T2, A2, -24);
++      __ ld_d(T3, A2, -32);
++      __ addi_d(A2, A2, -32);
++      __ st_d(T0, A5, -8);
++      __ st_d(T1, A5, -16);
++      __ st_d(T2, A5, -24);
++      __ st_d(T3, A5, -32);
++      __ addi_d(A5, A5, -32);
++
++      __ bind(le16);
++      __ addi_d(A4, A0, 16);
++      __ bgeu(A4, A2, le8);
++      __ ld_d(T0, A2, -8);
++      __ ld_d(T1, A2, -16);
++      __ addi_d(A2, A2, -16);
++      __ st_d(T0, A5, -8);
++      __ st_d(T1, A5, -16);
++      __ addi_d(A5, A5, -16);
++
++      __ bind(le8);
++      __ addi_d(A4, A0, 8);
++      __ bgeu(A4, A2, lt8);
++      __ ld_d(T0, A2, -8);
++      __ st_d(T0, A5, -8);
++
++      __ bind(lt8);
++      __ st_d(A6, A1, 0);
++      __ st_d(A7, A3, -8);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lsx
++  void generate_conjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le64, le32, le16, lt16;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ vld(F0, A0, 0);
++      __ vld(F1, A2, -16);
++
++      __ andi(T1, A3, 15);
++      __ sub_d(A2, A2, T1);
++      __ sub_d(A5, A3, T1);
++
++      __ addi_d(A4, A0, 128);
++      __ bgeu(A4, A2, le64);
++
++      __ bind(loop);
++      __ vld(FT0, A2, -16);
++      __ vld(FT1, A2, -32);
++      __ vld(FT2, A2, -48);
++      __ vld(FT3, A2, -64);
++      __ vld(FT4, A2, -80);
++      __ vld(FT5, A2, -96);
++      __ vld(FT6, A2, -112);
++      __ vld(FT7, A2, -128);
++      __ addi_d(A2, A2, -128);
++      __ vst(FT0, A5, -16);
++      __ vst(FT1, A5, -32);
++      __ vst(FT2, A5, -48);
++      __ vst(FT3, A5, -64);
++      __ vst(FT4, A5, -80);
++      __ vst(FT5, A5, -96);
++      __ vst(FT6, A5, -112);
++      __ vst(FT7, A5, -128);
++      __ addi_d(A5, A5, -128);
++      __ bltu(A4, A2, loop);
++
++      __ bind(le64);
++      __ addi_d(A4, A0, 64);
++      __ bgeu(A4, A2, le32);
++      __ vld(FT0, A2, -16);
++      __ vld(FT1, A2, -32);
++      __ vld(FT2, A2, -48);
++      __ vld(FT3, A2, -64);
++      __ addi_d(A2, A2, -64);
++      __ vst(FT0, A5, -16);
++      __ vst(FT1, A5, -32);
++      __ vst(FT2, A5, -48);
++      __ vst(FT3, A5, -64);
++      __ addi_d(A5, A5, -64);
++
++      __ bind(le32);
++      __ addi_d(A4, A0, 32);
++      __ bgeu(A4, A2, le16);
++      __ vld(FT0, A2, -16);
++      __ vld(FT1, A2, -32);
++      __ addi_d(A2, A2, -32);
++      __ vst(FT0, A5, -16);
++      __ vst(FT1, A5, -32);
++      __ addi_d(A5, A5, -32);
++
++      __ bind(le16);
++      __ addi_d(A4, A0, 16);
++      __ bgeu(A4, A2, lt16);
++      __ vld(FT0, A2, -16);
++      __ vst(FT0, A5, -16);
++
++      __ bind(lt16);
++      __ vst(F0, A1, 0);
++      __ vst(F1, A3, -16);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lasx
++  void generate_conjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      Label loop, le128, le64, le32, lt32;
++
++      __ bind(entry);
++      __ add_d(A3, A1, A2);
++      __ add_d(A2, A0, A2);
++      __ xvld(F0, A0, 0);
++      __ xvld(F1, A2, -32);
++
++      __ andi(T1, A3, 31);
++      __ sub_d(A2, A2, T1);
++      __ sub_d(A5, A3, T1);
++
++      __ addi_d(A4, A0, 256);
++      __ bgeu(A4, A2, le128);
++
++      __ bind(loop);
++      __ xvld(FT0, A2, -32);
++      __ xvld(FT1, A2, -64);
++      __ xvld(FT2, A2, -96);
++      __ xvld(FT3, A2, -128);
++      __ xvld(FT4, A2, -160);
++      __ xvld(FT5, A2, -192);
++      __ xvld(FT6, A2, -224);
++      __ xvld(FT7, A2, -256);
++      __ addi_d(A2, A2, -256);
++      __ xvst(FT0, A5, -32);
++      __ xvst(FT1, A5, -64);
++      __ xvst(FT2, A5, -96);
++      __ xvst(FT3, A5, -128);
++      __ xvst(FT4, A5, -160);
++      __ xvst(FT5, A5, -192);
++      __ xvst(FT6, A5, -224);
++      __ xvst(FT7, A5, -256);
++      __ addi_d(A5, A5, -256);
++      __ bltu(A4, A2, loop);
++
++      __ bind(le128);
++      __ addi_d(A4, A0, 128);
++      __ bgeu(A4, A2, le64);
++      __ xvld(FT0, A2, -32);
++      __ xvld(FT1, A2, -64);
++      __ xvld(FT2, A2, -96);
++      __ xvld(FT3, A2, -128);
++      __ addi_d(A2, A2, -128);
++      __ xvst(FT0, A5, -32);
++      __ xvst(FT1, A5, -64);
++      __ xvst(FT2, A5, -96);
++      __ xvst(FT3, A5, -128);
++      __ addi_d(A5, A5, -128);
++
++      __ bind(le64);
++      __ addi_d(A4, A0, 64);
++      __ bgeu(A4, A2, le32);
++      __ xvld(FT0, A2, -32);
++      __ xvld(FT1, A2, -64);
++      __ addi_d(A2, A2, -64);
++      __ xvst(FT0, A5, -32);
++      __ xvst(FT1, A5, -64);
++      __ addi_d(A5, A5, -64);
++
++      __ bind(le32);
++      __ addi_d(A4, A0, 32);
++      __ bgeu(A4, A2, lt32);
++      __ xvld(FT0, A2, -32);
++      __ xvst(FT0, A5, -32);
++
++      __ bind(lt32);
++      __ xvst(F0, A1, 0);
++      __ xvst(F1, A3, -32);
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements.
++  void generate_byte_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_b(AT, A0, 0);
++    __ st_b(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_h(AT, A0, 0);
++    __ ld_b(A2, A0, 2);
++    __ st_h(AT, A1, 0);
++    __ st_b(A2, A1, 2);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_w(AT, A0, 0);
++    __ ld_b(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_b(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_w(AT, A0, 0);
++    __ ld_w(A2, A0, 3);
++    __ st_w(AT, A1, 0);
++    __ st_w(A2, A1, 3);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++
++    if (!UseLSX)
++        return;
++
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ ld_d(AT, A0, 0);
++    __ ld_b(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_b(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 5);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 5);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ vld(F0, A0, 0);
++    __ vst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    if (!UseLASX)
++        return;
++
++    // 17:
++    __ vld(F0, A0, 0);
++    __ ld_b(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_b(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 18:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 19:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 20:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 21:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 13);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 22:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 23:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 24:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 25:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 9);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 9);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 26:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 27:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 11);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 11);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 28:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 29:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 13);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 30:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 31:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 15);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 32:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Short small copy: less than { int:9, lsx:9, lasx:17 } elements.
++  void generate_short_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Int small copy: less than { int:7, lsx:7, lasx:9 } elements.
++  void generate_int_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++    __ nop();
++    __ nop();
++
++    // 5:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_w(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_w(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_w(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_w(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    // 6:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_d(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_d(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_d(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_d(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    // 7:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Generate maybe oop copy
++  void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small,
++                          Label &large, const char *name, int small_limit,
++                          int log2_elem_size, bool dest_uninitialized = false) {
++    Label post, _large;
++    DecoratorSet decorators = DECORATORS_NONE;
++    BarrierSetAssembler *bs = nullptr;
++
++    if (is_oop) {
++      decorators = IN_HEAP | IS_ARRAY;
++
++      if (disjoint) {
++        decorators |= ARRAYCOPY_DISJOINT;
++      }
++
++      if (aligned) {
++        decorators |= ARRAYCOPY_ALIGNED;
++      }
++
++      if (dest_uninitialized) {
++        decorators |= IS_DEST_UNINITIALIZED;
++      }
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(A2, SP, 3 * wordSize);
++      __ st_d(A1, SP, 2 * wordSize);
++      __ st_d(A0, SP, 1 * wordSize);
++      __ st_d(RA, SP, 0 * wordSize);
++
++      bs = BarrierSet::barrier_set()->barrier_set_assembler();
++      bs->arraycopy_prologue(_masm, decorators, is_oop, A0, A1, A2, RegSet());
++
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++      __ ld_d(A0, SP, 1 * wordSize);
++    }
++
++    __ sltui(T0, A2, small_limit);
++    if (is_oop) {
++      __ beqz(T0, _large);
++      __ bl(small);
++      __ b(post);
++    } else {
++      __ bnez(T0, small);
++    }
++
++    __ bind(_large);
++    __ slli_d(A2, A2, log2_elem_size);
++
++    if (is_oop) {
++      __ bl(large);
++    } else {
++      __ b(large);
++    }
++
++    if (is_oop) {
++      __ bind(post);
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++
++      bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet());
++
++      __ ld_d(RA, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, const char *name, int small_limit,
++                                         bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, name,
++                       small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, const char *name, int small_limit,
++                                         bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2);
++    } else {
++      array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, name,
++                       small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Long small copy: less than { int:4, lsx:4, lasx:5 } elements.
++  void generate_long_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      // 1:
++      __ ld_d(AT, A0, 0);
++      __ st_d(AT, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++
++      // 2:
++      if (UseLSX) {
++        __ vld(F0, A0, 0);
++        __ vst(F0, A1, 0);
++        __ move(A0, R0);
++        __ jr(RA);
++        __ nop();
++        __ nop();
++      } else {
++        __ ld_d(AT, A0, 0);
++        __ ld_d(A2, A0, 8);
++        __ st_d(AT, A1, 0);
++        __ st_d(A2, A1, 8);
++        __ move(A0, R0);
++        __ jr(RA);
++      }
++      __ nop();
++      __ nop();
++
++      // 3:
++      if (UseLSX) {
++        __ vld(F0, A0, 0);
++        __ ld_d(AT, A0, 16);
++        __ vst(F0, A1, 0);
++        __ st_d(AT, A1, 16);
++        __ move(A0, R0);
++        __ jr(RA);
++        __ nop();
++        __ nop();
++      } else {
++        __ ld_d(AT, A0, 0);
++        __ ld_d(A2, A0, 8);
++        __ ld_d(A3, A0, 16);
++        __ st_d(AT, A1, 0);
++        __ st_d(A2, A1, 8);
++        __ st_d(A3, A1, 16);
++        __ move(A0, R0);
++        __ jr(RA);
++      }
++
++      // 4:
++      if (UseLASX) {
++        __ xvld(F0, A0, 0);
++        __ xvst(F0, A1, 0);
++      }
++    }
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, const char *name, int small_limit,
++                                          bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, name,
++                       small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, const char *name, int small_limit,
++                                          bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3);
++    } else {
++      array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, name,
++                       small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Helper for generating a dynamic type check.
++  // Smashes scratch1, scratch2.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Register tmp1,
++                           Register tmp2,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
++
++    __ block_comment("type_check:");
++
++    Label L_miss;
++
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1,       &L_success, &L_miss, NULL,
++                                     super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL);
++
++    // Fall through on failure!
++    __ bind(L_miss);
++  }
++
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count, treated as ssize_t, can be zero
++  //    A3   - size_t ckoff (super_check_offset)
++  //    A4   - oop ckval (super_klass)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
++
++    // Input registers (after setup_arg_regs)
++    const Register from        = A0; // source array address
++    const Register to          = A1; // destination array address
++    const Register count       = A2; // elementscount
++    const Register ckoff       = A3; // super_check_offset
++    const Register ckval       = A4; // super_klass
++
++    RegSet wb_pre_saved_regs = RegSet::range(A0, A4);
++    RegSet wb_post_saved_regs = RegSet::of(count);
++
++    // Registers used as temps (S0, S1, S2, S3 are save-on-entry)
++    const Register copied_oop  = S0; // actual oop copied
++    const Register count_save  = S1; // orig elementscount
++    const Register start_to    = S2; // destination array start address
++    const Register oop_klass   = S3; // oop._klass
++    const Register tmp1        = A5;
++    const Register tmp2        = A6;
++
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
++
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, oop_klass, count_save);
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    // caller guarantees that the arrays really are different
++    // otherwise, we would have to make conjoint checks
++
++    // Caller of this entry point must set up the argument registers.
++    __ block_comment("Entry:");
++
++    // Empty array:  Nothing to do.
++    __ beqz(count, L_done);
++
++    __ push(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifdef ASSERT
++    __ block_comment("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
++
++    // save the original count
++    __ move(count_save, count);
++
++    // Copy from low to high addresses
++    __ move(start_to, to); // Save destination array start address
++    __ b(L_load_element);
++
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for (; count != 0; count--) {
++    //     copied_oop = load_heap_oop(from++);
++    //     ... generate_type_check ...;
++    //     store_heap_oop(to++, copied_oop);
++    //   }
++    __ align(OptoLoopAlignment);
++
++    __ bind(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop
++    __ addi_d(to, to, UseCompressedOops ? 4 : 8);
++    __ addi_d(count, count, -1);
++    __ beqz(count, L_do_card_marks);
++
++    // ======== loop entry is here ========
++    __ bind(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop
++    __ addi_d(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
++
++    __ load_klass(oop_klass, copied_oop); // query the object klass
++    generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element);
++    // ======== end loop ========
++
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
++
++    __ sub_d(tmp1, count_save, count); // K = partially copied oop count
++    __ nor(count, tmp1, R0); // report (-1^K) to caller
++    __ beqz(tmp1, L_done_pop);
++
++    __ bind(L_do_card_marks);
++
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs);
++
++    __ bind(L_done_pop);
++    __ pop(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifndef PRODUCT
++    __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    __ bind(L_done);
++    __ move(A0, count);
++    __ jr(RA);
++
++    return start;
++  }
++
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char *name) {
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    Register s = A0, d = A1, count = A2;
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ orr(AT, s, d);
++    __ orr(AT, AT, count);
++
++    __ andi(AT, AT, BytesPerLong-1);
++    __ beqz(AT, L_long_aligned);
++    __ andi(AT, AT, BytesPerInt-1);
++    __ beqz(AT, L_int_aligned);
++    __ andi(AT, AT, BytesPerShort-1);
++    __ beqz(AT, L_short_aligned);
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_short_aligned);
++    __ srli_d(count, count, LogBytesPerShort);  // size => short_count
++    __ b(StubRoutines::_jshort_arraycopy);
++    __ bind(L_int_aligned);
++    __ srli_d(count, count, LogBytesPerInt);    // size => int_count
++    __ b(StubRoutines::_jint_arraycopy);
++    __ bind(L_long_aligned);
++    __ srli_d(count, count, LogBytesPerLong);   // size => long_count
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    return start;
++  }
++
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (A0)
++                              Register src_pos, // source position (A1)
++                              Register dst,     // destination array oo (A2)
++                              Register dst_pos, // destination position (A3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    __ block_comment("arraycopy_range_checks:");
++
++    assert_different_registers(SCR1, temp);
++
++    // if (src_pos + length > arrayOop(src)->length()) FAIL;
++    __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, src_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
++    __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, dst_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ move(src_pos, src_pos);
++    __ move(dst_pos, dst_pos);
++
++    __ block_comment("arraycopy_range_checks done");
++  }
++
++  //
++  //  Generate generic array copy stubs
++  //
++  //  Input:
++  //    A0    -  src oop
++  //    A1    -  src_pos (32-bits)
++  //    A2    -  dst oop
++  //    A3    -  dst_pos (32-bits)
++  //    A4    -  element count (32-bits)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char *name) {
++    Label L_failed, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
++
++    // Input registers
++    const Register src        = A0; // source array oop
++    const Register src_pos    = A1; // source position
++    const Register dst        = A2; // destination array oop
++    const Register dst_pos    = A3; // destination position
++    const Register length     = A4;
++
++    // Registers used as temps
++    const Register dst_klass  = A5;
++
++    __ align(CodeEntryAlignment);
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    address start = __ pc();
++
++#ifndef PRODUCT
++    // bump this on entry, not on exit:
++    __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
++
++    // if (src == NULL) return -1;
++    __ beqz(src, L_failed);
++
++    // if (src_pos < 0) return -1;
++    __ blt(src_pos, R0, L_failed);
++
++    // if (dst == NULL) return -1;
++    __ beqz(dst, L_failed);
++
++    // if (dst_pos < 0) return -1;
++    __ blt(dst_pos, R0, L_failed);
++
++    // registers used as temp
++    const Register scratch_length    = T0; // elements count to copy
++    const Register scratch_src_klass = T1; // array klass
++    const Register lh                = T2; // layout helper
++    const Register tmp1              = T3;
++    const Register tmp2              = T4;
++
++    // if (length < 0) return -1;
++    __ move(scratch_length, length); // length (elements count, 32-bits value)
++    __ blt(scratch_length, R0, L_failed);
++
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    // assert(src->klass() != NULL);
++    {
++      __ block_comment("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(SCR2, dst);
++      __ beqz(SCR2, L1);     // this would be broken also
++      __ block_comment("} assert klasses not null done");
++    }
++#endif
++
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
++
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
++
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ ld_w(lh, Address(scratch_src_klass, lh_offset));
++    __ li(SCR1, objArray_lh);
++    __ xorr(SCR2, lh, SCR1);
++    __ beqz(SCR2, L_objArray);
++
++    // if (src->klass() != dst->klass()) return -1;
++    __ load_klass(SCR2, dst);
++    __ xorr(SCR2, SCR2, scratch_src_klass);
++    __ bnez(SCR2, L_failed);
++
++    // if (!src->is_Array()) return -1;
++    __ bge(lh, R0, L_failed); // i.e. (lh >= 0)
++
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      __ block_comment("assert primitive array {");
++      Label L;
++      __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
++      __ bge(lh, SCR2, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      __ block_comment("} assert primitive array done");
++    }
++#endif
++
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
++    //
++
++    const Register scr1_offset = SCR1; // array offset
++    const Register elsize = lh; // element size
++
++    __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift +
++                  exact_log2(Klass::_lh_header_size_mask+1) - 1,
++                  Klass::_lh_header_size_shift); // array_offset
++    __ add_d(src, src, scr1_offset); // src array offset
++    __ add_d(dst, dst, scr1_offset); // dst array offset
++    __ block_comment("choose copy loop based on element size");
++
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = A0; // source array address
++    const Register to       = A1; // destination array address
++    const Register count    = A2; // elements count
++
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
++
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
++
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++    __ bind(L_copy_bytes);
++    __ andi(tmp1, elsize, 2);
++    __ bnez(tmp1, L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::no_scale)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::no_scale)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_2)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jshort_arraycopy);
++
++    __ bind(L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_longs);
++    __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_4)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jint_arraycopy);
++
++    __ bind(L_copy_longs);
++#ifdef ASSERT
++    {
++      __ block_comment("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize
++      __ li(tmp1, LogBytesPerLong);
++      __ beq(elsize, tmp1, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      __ block_comment("} assert long copy done");
++    }
++#endif
++    __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_8)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    // ObjArrayKlass
++    __ bind(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
++
++    Label L_plain_copy, L_checkcast_copy;
++    //  test array classes for subtyping
++    __ load_klass(tmp1, dst);
++    __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality
++
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ move(count, scratch_length); // length
++    __ bind(L_plain_copy);
++    __ b(StubRoutines::_oop_arraycopy);
++
++    __ bind(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, tmp1 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ ld_w(SCR1, Address(tmp1, lh_offset));
++      __ li(SCR2, objArray_lh);
++      __ xorr(SCR1, SCR1, SCR2);
++      __ bnez(SCR1, L_failed);
++
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed);
++
++      __ load_klass(dst_klass, dst); // reload
++
++      // Marshal the base address arguments now, freeing registers.
++      __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ move(count, length); // length (reloaded)
++      Register sco_temp = A3; // this register is free now
++      assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass);
++      // assert_clean_int(count, sco_temp);
++
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // Smashes SCR1, SCR2
++      generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy);
++
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ld_d(dst_klass, Address(dst_klass, ek_offset));
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // the checkcast_copy loop needs two extra arguments:
++      assert(A3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_arraycopy.
++      __ move(A4, dst_klass); // dst.klass.element_klass
++      __ b(StubRoutines::_checkcast_arraycopy);
++    }
++
++    __ bind(L_failed);
++    __ li(V0, -1);
++    __ jr(RA);
++
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    Label disjoint_large_copy, conjoint_large_copy;
++    Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy;
++    int int_oop_small_limit, long_oop_small_limit;
++
++    if (UseLASX) {
++      int_oop_small_limit = 9;
++      long_oop_small_limit = 5;
++      generate_disjoint_large_copy_lasx(disjoint_large_copy, "disjoint_large_copy_lasx");
++      generate_conjoint_large_copy_lasx(conjoint_large_copy, "conjoint_large_copy_lasx");
++    } else if (UseLSX) {
++      int_oop_small_limit = 7;
++      long_oop_small_limit = 4;
++      generate_disjoint_large_copy_lsx(disjoint_large_copy, "disjoint_large_copy_lsx");
++      generate_conjoint_large_copy_lsx(conjoint_large_copy, "conjoint_large_copy_lsx");
++    } else {
++      int_oop_small_limit = 7;
++      long_oop_small_limit = 4;
++      generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy_int");
++      generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy_int");
++    }
++    generate_byte_small_copy(byte_small_copy, "jbyte_small_copy");
++    generate_short_small_copy(short_small_copy, "jshort_small_copy");
++    generate_int_small_copy(int_small_copy, "jint_small_copy");
++    generate_long_small_copy(long_small_copy, "jlong_small_copy");
++
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy,
++                                                                                    "oop_disjoint_arraycopy", int_oop_small_limit);
++      StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy,
++                                                                                    "oop_disjoint_arraycopy_uninit", int_oop_small_limit, true);
++      StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy,
++                                                                                    "oop_arraycopy", int_oop_small_limit);
++      StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy,
++                                                                                    "oop_arraycopy_uninit", int_oop_small_limit, true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy,
++                                                                                     "oop_disjoint_arraycopy", long_oop_small_limit);
++      StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy,
++                                                                                     "oop_disjoint_arraycopy_uninit", long_oop_small_limit, true);
++      StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy,
++                                                                                     "oop_arraycopy", long_oop_small_limit);
++      StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy,
++                                                                                     "oop_arraycopy_uninit", long_oop_small_limit, true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy,
++                                                                                    "jint_disjoint_arraycopy", int_oop_small_limit);
++
++    StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy,
++                                                                                    "jint_arraycopy", int_oop_small_limit);
++
++    StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy,
++                                                                                     "jlong_disjoint_arraycopy", long_oop_small_limit);
++    StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy,
++                                                                                     "jlong_arraycopy", long_oop_small_limit);
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy");
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
++
++    StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
++
++    StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  address generate_method_entry_barrier() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
++
++    Label deoptimize_label;
++    Register rscratch2 = T8;
++
++    address start = __ pc();
++
++    __ set_last_Java_frame(SP, FP, RA);
++
++    __ enter();
++    __ addi_d(T4, SP, wordSize);  // T4 points to the saved RA
++
++    __ addi_d(SP, SP, -4 * wordSize);  // four words for the returned {SP, FP, RA, PC}
++
++    __ push(V0);
++    __ push_call_clobbered_registers_except(RegSet::of(V0));
++
++    __ move(A0, T4);
++    __ call_VM_leaf
++         (CAST_FROM_FN_PTR
++          (address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
++
++    __ reset_last_Java_frame(true);
++
++    __ pop_call_clobbered_registers_except(RegSet::of(V0));
++
++    __ bnez(V0, deoptimize_label);
++
++    __ pop(V0);
++    __ leave();
++    __ jr(RA);
++
++    __ bind(deoptimize_label);
++
++    __ pop(V0);
++    __ ld_d(rscratch2, SP, 0);
++    __ ld_d(FP, SP, 1 * wordSize);
++    __ ld_d(RA, SP, 2 * wordSize);
++    __ ld_d(T4, SP, 3 * wordSize);
++
++    __ move(SP, rscratch2);
++    __ jr(T4);
++
++    return start;
++  }
++
++  // T8 result
++  // A4 src
++  // A5 src count
++  // A6 pattern
++  // A7 pattern count
++  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
++  {
++    const char* stubName = needle_isL
++           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
++           : "indexof_linear_uu";
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", stubName);
++    address entry = __ pc();
++
++    int needle_chr_size = needle_isL ? 1 : 2;
++    int haystack_chr_size = haystack_isL ? 1 : 2;
++    int needle_chr_shift = needle_isL ? 0 : 1;
++    int haystack_chr_shift = haystack_isL ? 0 : 1;
++    bool isL = needle_isL && haystack_isL;
++
++    // parameters
++    Register result = T8, haystack = A4, haystack_len = A5, needle = A6, needle_len = A7;
++
++    // temporary registers
++    Register match_mask = T0, mask1 = T1, mask2 = T2;
++    Register first = T3, trailing_zeros = T4;
++    Register ch1 = T5, ch2 = T6;
++
++    RegSet spilled_regs = RegSet::range(T0, T6);
++
++    __ push(spilled_regs);
++
++    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, L_SMALL_HAS_ZERO,
++          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH,
++          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
++          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
++          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
++          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
++
++    __ ld_d(ch1, Address(needle));
++
++    // src.length - pattern.length
++    __ sub_d(haystack_len, haystack_len, needle_len);
++
++    // first is needle[0]
++    __ bstrpick_d(first, ch1, needle_isL ? 7 : 15, 0);
++
++    uint64_t mask0101 = UCONST64(0x0101010101010101);
++    uint64_t mask0001 = UCONST64(0x0001000100010001);
++    __ li(mask1, haystack_isL ? mask0101 : mask0001);
++
++    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++    __ li(mask2, haystack_isL ? mask7f7f : mask7fff);
++
++    // first -> needle[0]needle[0]needle[0]needle[0]
++    if (haystack_isL) __ bstrins_d(first, first, 15, 8);
++    __ bstrins_d(first, first, 31, 16);
++    __ bstrins_d(first, first, 63, 32);
++
++    if (needle_isL != haystack_isL) {
++      // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++      __ move(AT, ch1);
++      __ bstrpick_d(ch1, AT, 7, 0);
++      __ srli_d(AT, AT, 8);
++      __ bstrins_d(ch1, AT, 23, 16);
++      __ srli_d(AT, AT, 8);
++      __ bstrins_d(ch1, AT, 39, 32);
++      __ srli_d(AT, AT, 8);
++      __ bstrins_d(ch1, AT, 55, 48);
++    }
++
++    __ addi_d(haystack_len, haystack_len, -1 * (wordSize / haystack_chr_size - 1));
++    __ bge(R0, haystack_len, L_SMALL);
++
++    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
++    // eg:
++    // first:        aa aa aa aa aa aa aa aa
++    // ch2:          aa aa li nx jd ka aa aa
++    // match_mask:   80 80 00 00 00 00 80 80
++
++    __ bind(L_LOOP);
++    __ ld_d(ch2, Address(haystack));
++    // compute match_mask
++    __ xorr(ch2, first, ch2);
++    __ sub_d(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ andn(match_mask, match_mask, ch2);
++    // search first char of needle, goto L_HAS_ZERO if success.
++    __ bnez(match_mask, L_HAS_ZERO);
++
++    __ bind(L_LOOP_PROCEED);
++    __ addi_d(haystack_len, haystack_len, -1 * (wordSize / haystack_chr_size));
++    __ addi_d(haystack, haystack, wordSize);
++    __ addi_d(result, result, wordSize / haystack_chr_size);
++    __ bge(haystack_len, R0, L_LOOP);
++
++    __ bind(L_POST_LOOP);
++    __ li(ch2, -1 * (wordSize / haystack_chr_size));
++    __ bge(ch2, haystack_len, NOMATCH); // no extra characters to check
++
++    __ bind(L_SMALL);
++    __ ld_d(ch2, Address(haystack));
++    __ slli_d(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
++    __ sub_d(haystack_len, R0, haystack_len);
++    // compute match_mask
++    __ xorr(ch2, first, ch2);
++    __ sub_d(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ andn(match_mask, match_mask, ch2);
++    // clear useless match_mask bits and check
++    __ nor(trailing_zeros, R0, R0); // all bits set
++    __ srl_d(trailing_zeros, trailing_zeros, haystack_len); // zeroes on useless bits.
++    __ andr(match_mask, match_mask, trailing_zeros); // refine match_mask
++    __ beqz(match_mask, NOMATCH);
++
++    __ bind(L_SMALL_HAS_ZERO);
++    __ ctz_d(trailing_zeros, match_mask);
++    __ li(AT, wordSize / haystack_chr_size);
++    __ bge(AT, needle_len, L_SMALL_CMP_LOOP_LAST_CMP2);
++
++    __ bind(L_SMALL_HAS_ZERO_LOOP);
++    // compute index
++    __ srl_d(match_mask, match_mask, trailing_zeros);
++    __ srli_d(match_mask, match_mask, 1);
++    __ srli_d(AT, trailing_zeros, LogBitsPerByte);
++    if (!haystack_isL) __ andi(AT, AT, 0xE);
++    __ add_d(haystack, haystack, AT);
++    __ ld_d(ch2, Address(haystack));
++    if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift);
++    __ add_d(result, result, AT);
++
++    __ li(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++
++    __ bind(L_SMALL_CMP_LOOP);
++    needle_isL ? __ ld_bu(first, Address(needle, trailing_zeros, Address::no_scale, 0))
++               : __ ld_hu(first, Address(needle, trailing_zeros, Address::times_2, 0));
++    haystack_isL ? __ ld_bu(ch2, Address(haystack, trailing_zeros, Address::no_scale, 0))
++                 : __ ld_hu(ch2, Address(haystack, trailing_zeros, Address::times_2, 0));
++    __ addi_d(trailing_zeros, trailing_zeros, 1);
++    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
++    __ beq(first, ch2, L_SMALL_CMP_LOOP);
++
++    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, NOMATCH);
++    __ ctz_d(trailing_zeros, match_mask);
++    __ addi_d(result, result, 1);
++    __ addi_d(haystack, haystack, haystack_chr_size);
++    __ b(L_SMALL_HAS_ZERO_LOOP);
++
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
++    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ b(DONE);
++
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
++    // compute index
++    __ srl_d(match_mask, match_mask, trailing_zeros);
++    __ srli_d(match_mask, match_mask, 1);
++    __ srli_d(AT, trailing_zeros, LogBitsPerByte);
++    if (!haystack_isL) __ andi(AT, AT, 0xE);
++    __ add_d(haystack, haystack, AT);
++    __ ld_d(ch2, Address(haystack));
++    if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift);
++    __ add_d(result, result, AT);
++
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ b(DONE);
++
++    __ bind(L_HAS_ZERO);
++    __ ctz_d(trailing_zeros, match_mask);
++    __ li(AT, wordSize / haystack_chr_size);
++    __ bge(AT, needle_len, L_CMP_LOOP_LAST_CMP2);
++    __ addi_d(result, result, -1); // array index from 0, so result -= 1
++
++    __ bind(L_HAS_ZERO_LOOP);
++    // compute index
++    __ srl_d(match_mask, match_mask, trailing_zeros);
++    __ srli_d(match_mask, match_mask, 1);
++    __ srli_d(AT, trailing_zeros, LogBitsPerByte);
++    if (!haystack_isL) __ andi(AT, AT, 0xE);
++    __ add_d(haystack, haystack, AT);
++    __ ld_d(ch2, Address(haystack));
++    if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift);
++    __ add_d(result, result, AT);
++
++    __ addi_d(result, result, 1);
++    __ li(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++
++    // compare one char
++    __ bind(L_CMP_LOOP);
++    haystack_isL ? __ ld_bu(ch2, Address(haystack, trailing_zeros, Address::no_scale, 0))
++                 : __ ld_hu(ch2, Address(haystack, trailing_zeros, Address::times_2, 0));
++    needle_isL ? __ ld_bu(AT, Address(needle, trailing_zeros, Address::no_scale, 0))
++               : __ ld_hu(AT, Address(needle, trailing_zeros, Address::times_2, 0));
++    __ addi_d(trailing_zeros, trailing_zeros, 1); // next char index
++    __ bge(trailing_zeros, needle_len, L_CMP_LOOP_LAST_CMP);
++    __ beq(AT, ch2, L_CMP_LOOP);
++
++    __ bind(L_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
++    __ ctz_d(trailing_zeros, match_mask);
++    __ addi_d(haystack, haystack, haystack_chr_size);
++    __ b(L_HAS_ZERO_LOOP);
++
++    __ bind(L_CMP_LOOP_LAST_CMP);
++    __ bne(AT, ch2, L_CMP_LOOP_NOMATCH);
++    __ b(DONE);
++
++    __ bind(L_CMP_LOOP_LAST_CMP2);
++    // compute index
++    __ srl_d(match_mask, match_mask, trailing_zeros);
++    __ srli_d(match_mask, match_mask, 1);
++    __ srli_d(AT, trailing_zeros, LogBitsPerByte);
++    if (!haystack_isL) __ andi(AT, AT, 0xE);
++    __ add_d(haystack, haystack, AT);
++    __ ld_d(ch2, Address(haystack));
++    if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift);
++    __ add_d(result, result, AT);
++
++    __ addi_d(result, result, 1);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++    __ b(DONE);
++
++    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
++    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
++    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
++    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
++    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
++    // result by analyzed characters value, so, we can just reset lower bits
++    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
++    // 2) advance haystack value to represent next haystack octet. result & 7/3 is
++    // index of last analyzed substring inside current octet. So, haystack in at
++    // respective start address. We need to advance it to next octet
++    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
++    __ sub_d(result, result, match_mask);
++    if (!haystack_isL) __ slli_d(match_mask, match_mask, haystack_chr_shift);
++    __ sub_d(haystack, haystack, match_mask);
++    __ b(L_LOOP_PROCEED);
++
++    __ bind(NOMATCH);
++    __ nor(result, R0, R0); // result = -1
++
++    __ bind(DONE);
++    __ pop(spilled_regs);
++    __ jr(RA);
++    return entry;
++  }
++
++  void generate_string_indexof_stubs()
++  {
++    StubRoutines::la::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
++    StubRoutines::la::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
++    StubRoutines::la::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_encryptBlock(bool cbc) {
++    static const uint32_t ft_consts[256] = {
++      0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
++      0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
++      0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
++      0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
++      0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
++      0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
++      0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
++      0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
++      0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
++      0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
++      0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
++      0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
++      0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
++      0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
++      0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
++      0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
++      0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
++      0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
++      0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
++      0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
++      0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
++      0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
++      0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
++      0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
++      0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
++      0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
++      0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
++      0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
++      0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
++      0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
++      0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
++      0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
++      0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
++      0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
++      0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
++      0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
++      0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
++      0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
++      0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
++      0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
++      0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
++      0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
++      0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
++      0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
++      0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
++      0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
++      0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
++      0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
++      0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
++      0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
++      0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
++      0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
++      0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
++      0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
++      0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
++      0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
++      0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
++      0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
++      0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
++      0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
++      0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
++      0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
++      0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
++      0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
++    };
++    static const uint8_t fsb_consts[256] = {
++      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
++      0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
++      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
++      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
++      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
++      0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
++      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
++      0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
++      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
++      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
++      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
++      0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
++      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
++      0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
++      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
++      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
++      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
++      0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
++      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
++      0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
++      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
++      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
++      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
++      0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
++      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
++      0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
++      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
++      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
++      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
++      0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
++      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
++      0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register keyold = A6;
++    Register t0 = A7;
++    Register t1, t2, t3, ftp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t1 = S0;
++      t2 = S1;
++      t3 = S2;
++      ftp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(keyold, key);
++    } else {
++      t1 = A3;
++      t2 = A4;
++      t3 = A5;
++      ftp = A6;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    // Round 1
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], rve, 4 * i);
++      }
++
++      __ bind(loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], src, 4 * i);
++      }
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    } else {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], src, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(ftp, (intptr_t)ft_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, ftp, t0);
++        __ ldx_w(t1, ftp, t1);
++        __ ldx_w(t2, ftp, t2);
++        __ ldx_w(t3, ftp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(ftp, (intptr_t)fsb_consts);
++    __ alsl_d(key, keylen, key, 2 - 1);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, ftp, t0);
++      __ ldx_bu(t1, ftp, t1);
++      __ ldx_bu(t2, ftp, t2);
++      __ ldx_bu(t3, ftp, t3);
++      __ ld_w(xa[i], key, 4 * i - 16);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(key, keyold);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ st_w(xa[i], rve, 4 * i);
++      }
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_mulAdd() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++
++    address entry = __ pc();
++
++    const Register out     = A0;
++    const Register in      = A1;
++    const Register offset  = A2;
++    const Register len     = A3;
++    const Register k       = A4;
++
++    __ block_comment("Entry:");
++    __ mul_add(out, in, offset, len, k);
++    __ jr(RA);
++
++    return entry;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_decryptBlock(bool cbc) {
++    static const uint32_t rt_consts[256] = {
++      0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
++      0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
++      0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
++      0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
++      0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
++      0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
++      0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
++      0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
++      0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
++      0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
++      0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
++      0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
++      0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
++      0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
++      0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
++      0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
++      0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
++      0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
++      0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
++      0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
++      0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
++      0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
++      0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
++      0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
++      0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
++      0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
++      0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
++      0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
++      0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
++      0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
++      0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
++      0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
++      0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
++      0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
++      0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
++      0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
++      0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
++      0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
++      0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
++      0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
++      0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
++      0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
++      0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
++      0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
++      0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
++      0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
++      0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
++      0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
++      0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
++      0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
++      0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
++      0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
++      0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
++      0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
++      0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
++      0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
++      0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
++      0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
++      0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
++      0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
++      0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
++      0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
++      0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
++      0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
++    };
++    static const uint8_t rsb_consts[256] = {
++      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
++      0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
++      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
++      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
++      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
++      0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
++      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
++      0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
++      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
++      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
++      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
++      0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
++      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
++      0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
++      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
++      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
++      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
++      0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
++      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
++      0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
++      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
++      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
++      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
++      0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
++      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
++      0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
++      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
++      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
++      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
++      0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
++      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
++      0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register t0 = A6;
++    Register t1 = A7;
++    Register t2, t3, rtp, rvp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t2 = S0;
++      t3 = S1;
++      rtp = S2;
++      rvp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(rvp, rve);
++    } else {
++      t2 = A3;
++      t3 = A4;
++      rtp = A5;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ bind(loop);
++
++    // Round 1
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(xa[i], src, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * (4 + i));
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(rtp, (intptr_t)rt_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, rtp, t0);
++        __ ldx_w(t1, rtp, t1);
++        __ ldx_w(t2, rtp, t2);
++        __ ldx_w(t3, rtp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(rtp, (intptr_t)rsb_consts);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, rtp, t0);
++      __ ldx_bu(t1, rtp, t1);
++      __ ldx_bu(t2, rtp, t2);
++      __ ldx_bu(t3, rtp, t3);
++      __ ld_w(xa[i], key, 4 * i);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], rvp, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(rvp, src);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      __ ld_d(t0, src, -16);
++      __ ld_d(t1, src, -8);
++      __ st_d(t0, rve, 0);
++      __ st_d(t1, rve, 8);
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_md5_implCompress(const char *name, address &entry, address &entry_mb) {
++    static const uint32_t round_consts[64] = {
++      0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
++      0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
++      0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
++      0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
++      0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
++      0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
++      0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
++      0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
++      0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
++      0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
++      0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
++      0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
++      0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
++      0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
++      0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
++      0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
++    };
++    static const uint8_t round_offs[64] = {
++      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
++      1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
++      5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
++      0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9,
++    };
++    static const uint8_t round_shfs[64] = {
++      25, 20, 15, 10, 25, 20, 15, 10, 25, 20, 15, 10, 25, 20, 15, 10,
++      27, 23, 18, 12, 27, 23, 18, 12, 27, 23, 18, 12, 27, 23, 18, 12,
++      28, 21, 16,  9, 28, 21, 16,  9, 28, 21, 16,  9, 28, 21, 16,  9,
++      26, 22, 17, 11, 26, 22, 17, 11, 26, 22, 17, 11, 26, 22, 17, 11,
++    };
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label loop;
++
++    // Allocate registers
++    Register t0 = T4;
++    Register t1 = T5;
++    Register t2 = T6;
++    Register t3 = T7;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register kptr = T8;
++    Register sa[4] = { T0, T1, T2, T3 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Load keys base address
++    __ li(kptr, (intptr_t)round_consts);
++
++    __ bind(loop);
++    // Load states
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++
++    // 64 rounds of hashing
++    for (int i = 0; i < 64; i++) {
++      Register a = sa[(0 - i) & 3];
++      Register b = sa[(1 - i) & 3];
++      Register c = sa[(2 - i) & 3];
++      Register d = sa[(3 - i) & 3];
++
++      if (i < 16) {
++        __ XOR(t0, c, d);
++        __ AND(t0, t0, b);
++        __ XOR(t0, t0, d);
++      } else if (i < 32) {
++        __ andn(t0, c, d);
++        __ AND(t1, d, b);
++        __ OR(t0, t0, t1);
++      } else if (i < 48) {
++        __ XOR(t0, c, d);
++        __ XOR(t0, t0, b);
++      } else {
++        __ orn(t0, b, d);
++        __ XOR(t0, t0, c);
++      }
++
++      __ ld_w(t1, kptr, i * 4);
++      __ ld_w(t2, buf, round_offs[i] * 4);
++      __ add_w(a, a, t1);
++      __ add_w(a, a, t2);
++      __ add_w(a, a, t0);
++      __ rotri_w(a, a, round_shfs[i]);
++      __ add_w(a, a, b);
++    }
++
++    // Save updated state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label keys, loop;
++
++    // Keys
++    __ bind(keys);
++    __ emit_int32(0x5a827999);
++    __ emit_int32(0x6ed9eba1);
++    __ emit_int32(0x8f1bbcdc);
++    __ emit_int32(0xca62c1d6);
++
++    // Allocate registers
++    Register t0 = T5;
++    Register t1 = T6;
++    Register t2 = T7;
++    Register t3 = T8;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register ka[4] = { A4, A5, A6, A7 };
++    Register sa[5] = { T0, T1, T2, T3, T4 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys
++    __ lipc(t0, keys);
++    __ ld_w(ka[0], t0, 0);
++    __ ld_w(ka[1], t0, 4);
++    __ ld_w(ka[2], t0, 8);
++    __ ld_w(ka[3], t0, 12);
++
++    __ bind(loop);
++    // Load arguments
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++
++    // 80 rounds of hashing
++    for (int i = 0; i < 80; i++) {
++      Register a = sa[(5 - (i % 5)) % 5];
++      Register b = sa[(6 - (i % 5)) % 5];
++      Register c = sa[(7 - (i % 5)) % 5];
++      Register d = sa[(8 - (i % 5)) % 5];
++      Register e = sa[(9 - (i % 5)) % 5];
++
++      if (i < 16) {
++        __ ld_w(t0, buf, i * 4);
++        __ revb_2h(t0, t0);
++        __ rotri_w(t0, t0, 16);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, i * 4);
++        __ XOR(t0, c, d);
++        __ AND(t0, t0, b);
++        __ XOR(t0, t0, d);
++      } else {
++        __ ld_w(t0, SP, ((i - 3) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 8) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 14) & 0xF) * 4);
++        __ ld_w(t3, SP, ((i - 16) & 0xF) * 4);
++        __ XOR(t0, t0, t1);
++        __ XOR(t0, t0, t2);
++        __ XOR(t0, t0, t3);
++        __ rotri_w(t0, t0, 31);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, (i & 0xF) * 4);
++
++        if (i < 20) {
++          __ XOR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ XOR(t0, t0, d);
++        } else if (i < 40 || i >= 60) {
++          __ XOR(t0, b, c);
++          __ XOR(t0, t0, d);
++        } else if (i < 60) {
++          __ OR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ AND(t2, c, d);
++          __ OR(t0, t0, t2);
++        }
++      }
++
++      __ rotri_w(b, b, 2);
++      __ add_w(e, e, t0);
++      __ add_w(e, e, ka[i / 20]);
++      __ rotri_w(t0, a, 27);
++      __ add_w(e, e, t0);
++    }
++
++    // Save updated state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ ld_w(t0, state, 16);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ add_w(sa[4], sa[4], t0);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) {
++    static const uint32_t round_consts[64] = {
++      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
++    };
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label loop;
++
++    // Allocate registers
++    Register t0 = A4;
++    Register t1 = A5;
++    Register t2 = A6;
++    Register t3 = A7;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register kptr = T8;
++    Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys base address
++    __ li(kptr, (intptr_t)round_consts);
++
++    __ bind(loop);
++    // Load state
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++    __ ld_w(sa[5], state, 20);
++    __ ld_w(sa[6], state, 24);
++    __ ld_w(sa[7], state, 28);
++
++    // Do 64 rounds of hashing
++    for (int i = 0; i < 64; i++) {
++      Register a = sa[(0 - i) & 7];
++      Register b = sa[(1 - i) & 7];
++      Register c = sa[(2 - i) & 7];
++      Register d = sa[(3 - i) & 7];
++      Register e = sa[(4 - i) & 7];
++      Register f = sa[(5 - i) & 7];
++      Register g = sa[(6 - i) & 7];
++      Register h = sa[(7 - i) & 7];
++
++      if (i < 16) {
++        __ ld_w(t1, buf, i * 4);
++        __ revb_2h(t1, t1);
++        __ rotri_w(t1, t1, 16);
++      } else {
++        __ ld_w(t0, SP, ((i - 15) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 16) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 7) & 0xF) * 4);
++        __ add_w(t1, t1, t2);
++        __ rotri_w(t2, t0, 18);
++        __ srli_w(t3, t0, 3);
++        __ rotri_w(t0, t0, 7);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++        __ ld_w(t0, SP, ((i - 2) & 0xF) * 4);
++        __ rotri_w(t2, t0, 19);
++        __ srli_w(t3, t0, 10);
++        __ rotri_w(t0, t0, 17);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++      }
++
++      __ rotri_w(t2, e, 11);
++      __ rotri_w(t3, e, 25);
++      __ rotri_w(t0, e, 6);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ XOR(t2, g, f);
++      __ ld_w(t3, kptr, i * 4);
++      __ AND(t2, t2, e);
++      __ XOR(t2, t2, g);
++      __ add_w(t0, t0, t2);
++      __ add_w(t0, t0, t3);
++      __ add_w(h, h, t1);
++      __ add_w(h, h, t0);
++      __ add_w(d, d, h);
++      __ rotri_w(t2, a, 13);
++      __ rotri_w(t3, a, 22);
++      __ rotri_w(t0, a, 2);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ OR(t0, c, b);
++      __ AND(t2, c, b);
++      __ AND(t0, t0, a);
++      __ OR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ st_w(t1, SP, (i & 0xF) * 4);
++    }
++
++    // Add to state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ ld_w(t0, state, 16);
++    __ ld_w(t1, state, 20);
++    __ ld_w(t2, state, 24);
++    __ ld_w(t3, state, 28);
++    __ add_w(sa[4], sa[4], t0);
++    __ add_w(sa[5], sa[5], t1);
++    __ add_w(sa[6], sa[6], t2);
++    __ add_w(sa[7], sa[7], t3);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++    __ st_w(sa[5], state, 20);
++    __ st_w(sa[6], state, 24);
++    __ st_w(sa[7], state, 28);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32() {
++    assert(UseCRC32Intrinsics, "need CRC32 instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32C() {
++    assert(UseCRC32CIntrinsics, "need CRC32C instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_dsin_dcos(bool isCos) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin");
++    address start = __ pc();
++    __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw,
++                                 (address)StubRoutines::la::_two_over_pi,
++                                 (address)StubRoutines::la::_pio2,
++                                 (address)StubRoutines::la::_dsin_coef,
++                                 (address)StubRoutines::la::_dcos_coef);
++    return start;
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ st_d(S0, SP, S0_off * wordSize);
++    __ st_d(S1, SP, S1_off * wordSize);
++    __ st_d(S2, SP, S2_off * wordSize);
++    __ st_d(S3, SP, S3_off * wordSize);
++    __ st_d(S4, SP, S4_off * wordSize);
++    __ st_d(S5, SP, S5_off * wordSize);
++    __ st_d(S6, SP, S6_off * wordSize);
++    __ st_d(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ st_d(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    Label before_call;
++    address the_pc = __ pc();
++    __ bind(before_call);
++    __ set_last_Java_frame(java_thread, SP, FP, before_call);
++    // Align stack
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0);
++
++    // Call runtime
++    // TODO: confirm reloc
++    __ call(runtime_entry, relocInfo::runtime_call_type);
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(the_pc - start,  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld_d(S0, SP, S0_off * wordSize);
++    __ ld_d(S1, SP, S1_off * wordSize);
++    __ ld_d(S2, SP, S2_off * wordSize);
++    __ ld_d(S3, SP, S3_off * wordSize);
++    __ ld_d(S4, SP, S4_off * wordSize);
++    __ ld_d(S5, SP, S5_off * wordSize);
++    __ ld_d(S6, SP, S6_off * wordSize);
++    __ ld_d(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ addi_d(SP, FP, -2 * wordSize); // epilog
++    __ pop(FP);
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
++
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
++      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
++
++    bool _squaring;
++
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = A0;
++      Pa_base = reg;      // Argument registers:
++      if (squaring)
++        Pb_base = Pa_base;
++      else
++        Pb_base = ++reg;
++      Pn_base = ++reg;
++      Rlen = ++reg;
++      inv = ++reg;
++      Rlen2 = inv;        // Reuse inv
++      Pm_base = ++reg;
++
++                          // Working registers:
++      Ra = ++reg;         // The current digit of a, b, n, and m.
++      Rb = ++reg;
++      Rm = ++reg;
++      Rn = ++reg;
++
++      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
++      Ibn = ++reg;
++
++      t0 = ++reg;         // Three registers which form a
++      t1 = ++reg;         // triple-precision accumuator.
++      t2 = ++reg;
++
++      Ri = ++reg;         // Inner and outer loop indexes.
++      Rj = ++reg;
++
++      if (squaring) {
++        Rhi_ab = ++reg;   // Product registers: low and high parts
++        reg = S0;
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      } else {
++        reg = S0;
++        Rhi_ab = reg;     // Product registers: low and high parts
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      }
++
++      Rhi_mn = ++reg;
++      Rlo_mn = ++reg;
++    }
++
++  private:
++    void enter() {
++      addi_d(SP, SP, -6 * wordSize);
++      st_d(FP, SP, 0 * wordSize);
++      move(FP, SP);
++    }
++
++    void leave() {
++      addi_d(T0, FP, 6 * wordSize);
++      ld_d(FP, FP, 0 * wordSize);
++      move(SP, T0);
++    }
++
++    void save_regs() {
++      if (!_squaring)
++        st_d(Rhi_ab, FP, 5 * wordSize);
++      st_d(Rlo_ab, FP, 4 * wordSize);
++      st_d(Rhi_mn, FP, 3 * wordSize);
++      st_d(Rlo_mn, FP, 2 * wordSize);
++      st_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    void restore_regs() {
++      if (!_squaring)
++        ld_d(Rhi_ab, FP, 5 * wordSize);
++      ld_d(Rlo_ab, FP, 4 * wordSize);
++      ld_d(Rhi_mn, FP, 3 * wordSize);
++      ld_d(Rlo_mn, FP, 2 * wordSize);
++      ld_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    void acc(Register Rhi, Register Rlo,
++             Register t0, Register t1, Register t2, Register t, Register c) {
++      add_d(t0, t0, Rlo);
++      OR(t, t1, Rhi);
++      sltu(c, t0, Rlo);
++      add_d(t1, t1, Rhi);
++      add_d(t1, t1, c);
++      sltu(c, t1, t);
++      add_d(t2, t2, c);
++    }
++
++    void pre1(Register i) {
++      block_comment("pre1");
++      // Iam = 0;
++      // Ibn = i;
++
++      slli_w(Ibn, i, LogBytesPerWord);
++
++      // Ra = Pa_base[Iam];
++      // Rb = Pb_base[Ibn];
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++
++      ld_d(Ra, Pa_base, 0);
++      ldx_d(Rb, Pb_base, Ibn);
++      ld_d(Rm, Pm_base, 0);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Iam, R0);
++
++      // Zero the m*n result.
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, t0, t1, t2);
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
++                                               // previous iteration.
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++      mul_d(Rlo_mn, Rm, Rn);
++      mulh_du(Rhi_mn, Rm, Rn);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++    }
++
++    void post1() {
++      block_comment("post1");
++
++      // MACC(Ra, Rb, t0, t1, t2);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
++
++      // Pm_base[Iam] = Rm = t0 * inv;
++      mul_d(Rm, t0, inv);
++      stx_d(Rm, Pm_base, Iam);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // t0 = t1; t1 = t2; t2 = 0;
++      mulh_du(Rhi_mn, Rm, Rn);
++
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
++      {
++        mul_d(Rlo_mn, Rm, Rn);
++        add_d(Rlo_mn, t0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      // We have very carefully set things up so that
++      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -t0.  t0 + (-t0) must generate a carry iff
++      // t0 != 0.  So, rather than do a mul and an adds we just set
++      // the carry flag iff t0 is nonzero.
++      //
++      // mul_d(Rlo_mn, Rm, Rn);
++      // add_d(t0, t0, Rlo_mn);
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, R0, t0);
++      add_d(t0, t1, Rhi_mn);
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++
++      // Rj == i-len
++      sub_w(Rj, i, len);
++
++      // Iam = i - len;
++      // Ibn = len;
++      slli_w(Iam, Rj, LogBytesPerWord);
++      slli_w(Ibn, len, LogBytesPerWord);
++
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      // Rm = Pm_base[++Iam];
++      // Rn = Pn_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    void post2(Register i, Register len) {
++      block_comment("post2");
++
++      sub_w(Rj, i, len);
++      alsl_d(Iam, Rj, Pm_base, LogBytesPerWord - 1);
++
++      add_d(t0, t0, Rlo_mn); // The pending m*n, low part
++
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = t0;
++      st_d(t0, Iam, 0);
++
++      // t0 = t1; t1 = t2; t2 = 0;
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, t0, Rlo_mn);
++      add_d(t0, t1, Rhi_mn); // The pending m*n, high part
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    // A carry in t0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (t0)
++      //   t0 = sub(Pm_base, Pn_base, t0, len);
++      Label loop, post, again;
++      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
++      beqz(t0, post); {
++        bind(again); {
++          move(i, R0);
++          move(b, R0);
++          slli_w(cnt, len, LogBytesPerWord);
++          align(16);
++          bind(loop); {
++            ldx_d(Rm, Pm_base, i);
++            ldx_d(Rn, Pn_base, i);
++            sltu(t, Rm, b);
++            sub_d(Rm, Rm, b);
++            sltu(b, Rm, Rn);
++            sub_d(Rm, Rm, Rn);
++            OR(b, b, t);
++            stx_d(Rm, Pm_base, i);
++            addi_w(i, i, BytesPerWord);
++          } blt(i, cnt, loop);
++          sub_d(t0, t0, b);
++        } bnez(t0, again);
++      } bind(post);
++    }
++
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2, tmp3
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
++
++      alsl_d(s, len, s, LogBytesPerWord - 1);
++      move(tmp1, len);
++      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli_w(s, len, LogBytesPerWord);
++      sub_d(s, d, s);
++    }
++
++    // where
++    void reverse1(Register d, Register s, Register tmp) {
++      ld_d(tmp, s, -wordSize);
++      addi_d(s, s, -wordSize);
++      addi_d(d, d, wordSize);
++      rotri_d(tmp, tmp, 32);
++      st_d(tmp, d, -wordSize);
++    }
++
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   A0   - int array elements a
++     *   A1   - int array elements b
++     *   A2   - int array elements n (the modulus)
++     *   A3   - int length
++     *   A4   - int inv
++     *   A5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   A0   - int array elements a
++     *   A1   - int array elements n (the modulus)
++     *   A2   - int length
++     *   A3   - int inv
++     *   A4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
++
++      align(CodeEntryAlignment);
++      address entry = pc();
++
++      beqz(Rlen, nothing);
++
++      enter();
++
++      // Make room.
++      sltui(Ra, Rlen, 513);
++      beqz(Ra, argh);
++      slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint)));
++      sub_d(Ra, SP, Ra);
++
++      srli_w(Rlen, Rlen, 1); // length in longwords = len/2
++
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, t0, t1);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, t0, t1);
++        reverse(Ra, Pn_base, Rlen, t0, t1);
++      }
++
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
++
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld_d(Rn, Pn_base, 0);
++        li(t0, -1);
++        mul_d(Rlo_mn, Rn, inv);
++        Label ok;
++        beq(Rlo_mn, t0, ok); {
++          stop("broken inverse in Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      move(Pm_base, Ra);
++
++      move(t0, R0);
++      move(t1, R0);
++      move(t2, R0);
++
++      block_comment("for (int i = 0; i < len; i++) {");
++      move(Ri, R0); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
++
++        bind(loop);
++        pre1(Ri);
++
++        block_comment("  for (j = i; j; j--) {"); {
++          move(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post1();
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
++
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      move(Ri, Rlen);
++      slli_w(Rlen2, Rlen, 1); {
++        Label loop, end;
++        bge(Ri, Rlen2, end);
++
++        bind(loop);
++        pre2(Ri, Rlen);
++
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          sub_w(Rj, Rlen2, Ri);
++          addi_w(Rj, Rj, -1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post2(Ri, Rlen);
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen2, loop);
++        bind(end);
++      }
++      block_comment("} // i");
++
++      normalize(Rlen);
++
++      move(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, t0, t1);
++
++      leave();
++      bind(nothing);
++      jr(RA);
++
++      return entry;
++    }
++    // In C, approximately:
++
++    // void
++    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
++    //                     unsigned long Pn_base[], unsigned long Pm_base[],
++    //                     unsigned long inv, int len) {
++    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++    //   unsigned long Ra, Rb, Rn, Rm;
++    //   int i, Iam, Ibn;
++
++    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
++
++    //   for (i = 0; i < len; i++) {
++    //     int j;
++
++    //     Iam = 0;
++    //     Ibn = i;
++
++    //     Ra = Pa_base[Iam];
++    //     Rb = Pb_base[Iam];
++    //     Rm = Pm_base[Ibn];
++    //     Rn = Pn_base[Ibn];
++
++    //     int iters = i;
++    //     for (j = 0; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
++    //     MACC(Ra, Rb, t0, t1, t2);
++    //     Pm_base[Iam] = Rm = t0 * inv;
++    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
++    //     MACC(Rm, Rn, t0, t1, t2);
++
++    //     assert(t0 == 0, "broken Montgomery multiply");
++
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   for (i = len; i < 2*len; i++) {
++    //     int j;
++
++    //     Iam = i - len;
++    //     Ibn = len;
++
++    //     Ra = Pa_base[++Iam];
++    //     Rb = Pb_base[--Ibn];
++    //     Rm = Pm_base[++Iam];
++    //     Rn = Pn_base[--Ibn];
++
++    //     int iters = len*2-i-1;
++    //     for (j = i-len+1; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = Pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     Pm_base[i-len] = t0;
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   while (t0)
++    //     t0 = sub(Pm_base, Pn_base, t0, len);
++    // }
++  };
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
++      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
++    }
++
++    if (UseCRC32CIntrinsics) {
++      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
++    }
++ }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    StubRoutines::la::_vector_iota_indices = generate_iota_indices("iota_indices");
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
++      StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
++    }
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
++      StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
++    }
++
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
++
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
++    }
++
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
++      // We use generate_multiply() rather than generate_square()
++      // because it's faster for the sizes of modulus we care about.
++      StubRoutines::_montgomerySquare = g.generate_multiply();
++    }
++#endif
++
++    if (UseAESIntrinsics) {
++      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
++      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false);
++      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true);
++      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true);
++    }
++
++    if (UseMD5Intrinsics) {
++      generate_md5_implCompress("md5_implCompress", StubRoutines::_md5_implCompress, StubRoutines::_md5_implCompressMB);
++    }
++
++    if (UseSHA1Intrinsics) {
++      generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB);
++    }
++
++    if (UseSHA256Intrinsics) {
++      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
++    }
++
++    generate_string_indexof_stubs();
++
++    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
++    if (bs_nm != NULL) {
++      StubRoutines::la::_method_entry_barrier = generate_method_entry_barrier();
++    }
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++#define UCM_TABLE_MAX_ENTRIES 7
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  if (UnsafeCopyMemory::_table == NULL) {
++    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
++  }
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
+new file mode 100644
+index 00000000000..20f2a14afcd
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
+@@ -0,0 +1,89 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 60000    // simply increase if too small (assembler will crash if too small)
++};
++
++class la {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++  static address _vector_iota_indices;
++  static juint   _crc_table[];
++  static address _method_entry_barrier;
++  // begin trigonometric tables block. See comments in .cpp file
++  static juint    _npio2_hw[];
++  static jdouble   _two_over_pi[];
++  static jdouble   _pio2[];
++  static jdouble   _dsin_coef[];
++  static jdouble  _dcos_coef[];
++  // end trigonometric tables block
++
++  static address _string_indexof_linear_ll;
++  static address _string_indexof_linear_uu;
++  static address _string_indexof_linear_ul;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++  static address vector_iota_indices()              { return _vector_iota_indices; }
++
++  static address method_entry_barrier() {
++    return _method_entry_barrier;
++  }
++
++  static address string_indexof_linear_ul() {
++    return _string_indexof_linear_ul;
++  }
++
++  static address string_indexof_linear_ll() {
++    return _string_indexof_linear_ll;
++  }
++
++  static address string_indexof_linear_uu() {
++    return _string_indexof_linear_uu;
++  }
++};
++
++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+new file mode 100644
+index 00000000000..53ded54ae6c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+@@ -0,0 +1,183 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::la::_call_stub_compiled_return = NULL;
++address StubRoutines::la::_method_entry_barrier      = NULL;
++address StubRoutines::la::_vector_iota_indices       = NULL;
++address StubRoutines::la::_string_indexof_linear_ll  = NULL;
++address StubRoutines::la::_string_indexof_linear_uu  = NULL;
++address StubRoutines::la::_string_indexof_linear_ul  = NULL;
++
++/**
++ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
++ */
++juint StubRoutines::la::_crc_table[] =
++{
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL
++};
++
++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = {
++    // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2,
++    // pio2_2t, pio2_3, pio2_3t
++    // This is a small optimization wich keeping double[8] values in int[] table
++    // to have less address calculation instructions
++    //
++    // invpio2:  53 bits of 2/pi (enough for cases when trigonometric argument is small)
++    // pio2_1:   first  33 bit of pi/2
++    // pio2_1t:  pi/2 - pio2_1
++    // pio2_2:   second 33 bit of pi/2
++    // pio2_2t:  pi/2 - (pio2_1+pio2_2)
++    // pio2_3:   third  33 bit of pi/2
++    // pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
++    0x00000000, 0x3fe00000, // 0.5
++    0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01
++    0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00
++    0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11
++    0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11
++    0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21
++    0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21
++    0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32
++    // now, npio2_hw itself
++    0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
++    0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
++    0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
++    0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
++    0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
++    0x404858EB, 0x404921FB
++};
++
++// Coefficients for sin(x) polynomial approximation: S1..S6.
++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = {
++    -1.66666666666666324348e-01, // 0xBFC5555555555549
++     8.33333333332248946124e-03, // 0x3F8111111110F8A6
++    -1.98412698298579493134e-04, // 0xBF2A01A019C161D5
++     2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D
++    -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB
++     1.58969099521155010221e-10  // 0x3DE5D93A5ACFD57C
++};
++
++// Coefficients for cos(x) polynomial approximation: C1..C6.
++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = {
++     4.16666666666666019037e-02, // c0x3FA555555555554C
++    -1.38888888888741095749e-03, // 0xBF56C16C16C15177
++     2.48015872894767294178e-05, // 0x3EFA01A019CB1590
++    -2.75573143513906633035e-07, // 0xBE927E4F809C52AD
++     2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4
++    -1.13596475577881948265e-11  // 0xBDA8FAE9BE8838D4
++};
++
++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi.
++// Used in cases of very large argument. 396 hex digits is enough to support
++// required precision.
++// Converted to double to avoid unnecessary conversion in code
++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with
++//       only (double) conversion added
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = {
++  (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62,
++  (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A,
++  (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129,
++  (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41,
++  (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8,
++  (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF,
++  (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5,
++  (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08,
++  (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3,
++  (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880,
++  (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B,
++};
++
++// Pi over 2 value
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = {
++  1.57079625129699707031e+00, // 0x3FF921FB40000000
++  7.54978941586159635335e-08, // 0x3E74442D00000000
++  5.39030252995776476554e-15, // 0x3CF8469880000000
++  3.28200341580791294123e-22, // 0x3B78CC5160000000
++  1.27065575308067607349e-29, // 0x39F01B8380000000
++  1.22933308981111328932e-36, // 0x387A252040000000
++  2.73370053816464559624e-44, // 0x36E3822280000000
++  2.16741683877804819444e-51, // 0x3569F31D00000000
++};
+diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+new file mode 100644
+index 00000000000..02af7c8ffa7
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+@@ -0,0 +1,2197 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ addi_d(SP, SP, -18 * wordSize);
++  __ st_d(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     18 stack arg0   <--- old sp
++  //     17 floatReg arg7
++  //        ...
++  //     10 floatReg arg0
++  //      9 float/double identifiers
++  //      8 IntReg arg7
++  //        ...
++  //      2 IntReg arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FPU first so we can use A3 as temp
++  __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  for (int i= 0; i < Argument::n_float_register_parameters; i++) {
++    FloatRegister floatreg = as_FloatRegister(i + FA0->encoding());
++    Label isdouble, done;
++
++    __ andi(AT, A3, 1 << i);
++    __ bnez(AT, isdouble);
++    __ fld_s(floatreg, SP, (10 + i) * wordSize);
++    __ b(done);
++    __ bind(isdouble);
++    __ fld_d(floatreg, SP, (10 + i) * wordSize);
++    __ bind(done);
++  }
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for (int i= 1; i < Argument::n_register_parameters; i++) {
++    Register reg = as_Register(i + A0->encoding());
++    __ ld_d(reg, SP, (1 + i) * wordSize);
++  }
++
++  // A0/V0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  __ ld_d(RA, SP, 0);
++  __ addi_d(SP, SP, 18 * wordSize);
++  __ jr(RA);
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ safepoint_poll(slow_path, TREG, false /* at_return */, false /* acquire */, false /* in_nmethod */);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register val = A1;  // source java byte value
++    const Register tbl = A2;  // scratch
++
++    // Arguments are reversed on java expression stack
++    __ ld_w(val, SP, 0);              // byte value
++    __ ld_w(crc, SP, wordSize);       // Initial CRC
++
++    __ li(tbl, (long)StubRoutines::crc_table_addr());
++
++    __ nor(crc, crc, R0); // ~crc
++    __ update_byte_crc32(crc, val, tbl);
++    __ nor(crc, crc, R0); // ~crc
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ safepoint_poll(slow_path, TREG, false /* at_return */, false /* acquire */, false /* in_nmethod */);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    const Register off = len; // offset (never overlaps with 'len')
++
++    // Arguments are reversed on java expression stack
++    // Calculate address of start element
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // long crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // long crc
++    }
++
++    // Can now load 'len' since we're finished with 'off'
++    __ ld_w(len, SP, 0); // length
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++
++    const Register crc = A0; // initial crc
++    const Register buf = A1; // source java byte array address
++    const Register len = A2; // len argument to the kernel
++    const Register tmp = A3;
++
++    const Register end = len; // index of last element to process
++    const Register off = crc; // offset
++
++    __ ld_w(end, SP, 0);              // int end
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ sub_w(len, end, off);          // calculate length
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // int crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // int crc
++    }
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fabs_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fsqrt_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 1);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize);
++    __ fld_d(FA1, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 2);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize);
++      __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize);
++      __ fld_d(FA2, SP, 0);
++      __ fmadd_d(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize);
++      __ fld_s(FA1, SP, Interpreter::stackElementSize);
++      __ fld_s(FA2, SP, 0);
++      __ fmadd_s(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T4, fn);
++  __ jalr(T4);
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: Method*
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bge(T1, R0, L);     // check if frame is complete
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T4
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T4;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T4;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T4;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // NULL last_sp until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++
++#if INCLUDE_JVMCI
++  // Check if we need to take lock at entry of synchronized method.  This can
++  // only occur on method entry so emit it only for vtos with step 0.
++  if (EnableJVMCI && state == vtos && step == 0) {
++    Label L;
++    __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    __ beqz(AT, L);
++    // Clear flag.
++    __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    // Take lock.
++    lock_method();
++    __ bind(L);
++  } else {
++#ifdef ASSERT
++    if (EnableJVMCI) {
++      Label L;
++      __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++      __ beqz(AT, L);
++      __ stop("unexpected pending monitor in deopt entry");
++      __ bind(L);
++    }
++#endif
++  }
++#endif
++
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);                break;
++    case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++    case T_BYTE   : __ sign_extend_byte (V0);     break;
++    case T_SHORT  : __ sign_extend_short(V0);     break;
++    case T_INT    : /* nothing to do */           break;
++    case T_FLOAT  : /* nothing to do */           break;
++    case T_DOUBLE : /* nothing to do */           break;
++    case T_OBJECT :
++    {
++      __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++    break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// Rmethod: method
++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
++  Label done;
++  int increment = InvocationCounter::count_increment;
++  Label no_mdo;
++  if (ProfileInterpreter) {
++    // Are we profiling?
++    __ ld_d(T0, Address(Rmethod, Method::method_data_offset()));
++    __ beq(T0, R0, no_mdo);
++    // Increment counter in the MDO
++    const Address mdo_invocation_counter(T0, in_bytes(MethodData::invocation_counter_offset()) +
++                                              in_bytes(InvocationCounter::counter_offset()));
++    const Address mask(T0, in_bytes(MethodData::invoke_mask_offset()));
++    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T1, false, Assembler::zero, overflow);
++    __ b(done);
++  }
++  __ bind(no_mdo);
++  // Increment counter in MethodCounters
++  const Address invocation_counter(T0,
++                MethodCounters::invocation_counter_offset() +
++                InvocationCounter::counter_offset());
++  __ get_method_counters(Rmethod, T0, done);
++  const Address mask(T0, in_bytes(MethodCounters::invoke_mask_offset()));
++  __ increment_mask_and_jump(invocation_counter, increment, mask, T1, false, Assembler::zero, overflow);
++  __ bind(done);
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld_d(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bge(AT, T2, after_frame_check);
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ slli_d(T3, T2, Interpreter::logStackElementSize);
++  __ addi_d(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ add_d(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ sub_d(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(StackOverflow::stack_shadow_zone_size(), StackOverflow::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ li(AT, max_bang_size);
++  __ add_d(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ blt(T3, SP, after_frame_check);
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ load_mirror(T0, Rmethod, T4);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ addi_d(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 2;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ addi_d(SP, SP, (-frame_size) * wordSize);
++  __ st_d(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ st_d(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ addi_d(FP, SP, (frame_size) * wordSize);
++  __ st_d(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ st_d(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ st_d(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ st_d(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T4);
++  __ st_d(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld_d(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ addi_d(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ st_d(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ st_d(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ st_d(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ st_d(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ st_d(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ st_d(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i == frame_size, "i should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // Rmethod: Method*
++  // Rsender: senderSP must preserve for slow path, set SP to it on fast path
++  // RA is live. It must be saved around calls.
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset();
++
++  Label slow_path;
++  const Register local_0 = A0;
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ld_d(local_0, Address(SP, 0));
++  __ beqz(local_0, slow_path);
++
++  // Load the value of the referent field.
++  const Address field_address(local_0, referent_offset);
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg);
++
++  // areturn
++  __ move(SP, Rsender);
++  __ jr(RA);
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld_d(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ slli_d(LVP, V0, Address::times_8);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0       ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ ld_w(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld_d(t,method,in_bytes(Method::const_offset()));
++    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // LoongArch ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ addi_d(t, t, 1);
++    __ bind(Lstatic);
++    __ addi_d(t, t, -7);
++    __ bge(R0, t, L);
++    __ slli_d(t, t, Address::times_8);
++    __ sub_d(SP, SP, t);
++    __ bind(L);
++  }
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ bstrins_d(SP, R0, 3, 0);
++  __ move(AT, SP);
++  // [                          ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T4, R0, L);
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T4);
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to LoongArch abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++
++    // get mirror
++    __ load_mirror(t, method, T4);
++    // copy mirror into activation frame
++    __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(T6, T4, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T4
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // Set the last Java PC in the frame anchor to be the return address from
++  // the call to the native method: this will allow the debugger to
++  // generate an accurate stack trace.
++  Label native_return;
++  __ set_last_Java_frame(thread, SP, FP, native_return);
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ addi_d(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ li(t, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T4);
++  __ bind(native_return);
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(t, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if( os::is_MP() )  __ membar(__ AnyAny);
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    // We need an acquire here to ensure that any subsequent load of the
++    // global SafepointSynchronize::_state flag is ordered after this load
++    // of the thread-local polling word.  We don't want this poll to
++    // return false (i.e. not safepointing) and a later poll of the global
++    // SafepointSynchronize::_state spuriously to return true.
++    //
++    // This is to avoid a race when we're in a native->Java transition
++    // racing the code which wakes up from a safepoint.
++    __ safepoint_poll(slow_path, thread, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ li(t, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T4);
++    __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ li(AT, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ push_call_clobbered_registers();
++    __ move(S5_heapbase, SP);
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ move(SP, S5_heapbase);
++    __ pop_call_clobbered_registers();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld_d(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      // address of first monitor
++
++      __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++
++
++  // remove activation
++  __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld_d(RA, FP, frame::return_addr_offset * wordSize); // get return address
++  __ ld_d(FP, FP, frame::link_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  const int page_size = os::vm_page_size();
++  const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size;
++  const int start_page = native_call ? n_shadow_pages : 1;
++  BLOCK_COMMENT("bang_stack_shadow_pages:");
++  for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++    __ bang_stack_with_offset(pages*page_size);
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ sub_d(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ slli_d(LVP, V0, LogBytesPerWord);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++
++    __ bind(loop);
++    __ addi_d(SP, SP, (-1) * wordSize);
++    __ addi_d(T2, T2, -1);               // until everything initialized
++    __ st_d(R0, SP, 0);                  // initialize local variables
++    __ bne(T2, R0, loop);
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T4
++  // tmp2: T2
++   __ profile_parameters_type(T8, T4, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld_d(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld_d(A1, A1, in_bytes(Method::const_offset()));
++    __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub_d(A2, LVP, A1);
++    __ addi_d(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T4, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T4);
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  Label L;
++  __ bind(L);
++  __ set_last_Java_frame(thread, noreg, FP, L);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ ld_bu(AT, BCP, 0);
++    __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T4);
++    __ ld_d(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP);
++
++    __ beq(T8, R0, L_done);
++
++    __ st_d(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T4, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T4);                                   // jump to exception handler of caller
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++
++  // Clear the earlyret state
++  __ li(AT, JvmtiThreadState::earlyret_inactive);
++  __ st_w(AT, cond_addr);
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ jr(T0);
++
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L);
++  dep = __ pc(); __ push(dtos); __ b(L);
++  lep = __ pc(); __ push(ltos); __ b(L);
++  aep  =__ pc(); __ push(atos); __ b(L);
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but LA ABI calling convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld_d(A2, SP, 0);
++  __ ld_d(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ ld_w(T4, T8, 0);
++  __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T4, T4, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ st_w(T4, T8, 0);
++  __ slli_d(T4, T4, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ add_d(T8, T8, T4);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ ld_w(T8, T8, 0);
++  __ li(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ brk(5);
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
+new file mode 100644
+index 00000000000..ddb38faf446
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
+diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
+new file mode 100644
+index 00000000000..c0d1daea305
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
+@@ -0,0 +1,4045 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "memory/universe.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T6 RT6
++#define T8 RT8
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().
++// It isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T4, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T4, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ addi_d(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ li(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ ld_bu(tmp_reg, at_bcp(0));
++    __ li(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ ld_bu(tmp_reg, at_bcp(0));
++  __ li(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ st_b(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ movgr2fr_w(FSF, R0);    return;
++    case 1:  __ addi_d(AT, R0, 1); break;
++    case 2:  __ addi_d(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ movgr2fr_w(FSF, AT);
++  __ ffint_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ movgr2fr_d(FSF, R0);
++             return;
++    case 1:  __ addi_d(AT, R0, 1);
++             __ movgr2fr_d(FSF, AT);
++             __ ffint_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, BCP, 1);
++  __ ld_bu(AT, BCP, 2);
++  __ slli_d(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ ld_bu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ addi_d(AT, T1, - JVM_CONSTANT_Class);
++  __ slli_d(T2, T2, Address::times_8);
++  __ bne(AT, R0, notClass);
++
++  __ bind(call_ldc);
++  __ li(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ st_d(FSR, SP, 0);
++  __ b(Done);
++
++  __ bind(notClass);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  // ftos
++  __ add_d(AT, T3, T2);
++  __ fld_s(FSF, AT, base_offset);
++  //__ push_f();
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ fst_s(FSF, SP, 0);
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  // itos
++  __ add_d(T0, T3, T2);
++  __ ld_w(FSR, T0, base_offset);
++  __ push(itos);
++  __ b(Done);
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ li(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ li(AT, ConstantPoolCacheEntry::field_index_mask);
++  __ andr(off, flags, AT);
++  __ add_d(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ addi_d(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      // itos
++      __ ld_d(obj, field);
++      __ push(itos);
++      __ b(Done);
++
++      __ bind(notInt);
++      __ addi_d(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      // ftos
++      __ fld_s(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++
++      __ bind(notFloat);
++      __ addi_d(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      // stos
++      __ ld_h(obj, field);
++      __ push(stos);
++      __ b(Done);
++
++      __ bind(notShort);
++      __ addi_d(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      // btos
++      __ ld_b(obj, field);
++      __ push(btos);
++      __ b(Done);
++
++      __ bind(notByte);
++      __ addi_d(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      // ctos
++      __ ld_hu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++
++      __ bind(notChar);
++      __ addi_d(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      // ztos
++      __ ld_bu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ addi_d(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      // ltos
++      __ ld_d(obj, field);
++      __ push(ltos);
++      __ b(Done);
++
++      __ bind(notLong);
++      __ addi_d(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      // dtos
++      __ fld_d(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T4);
++  __ bne(result, R0, resolved);
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ li(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ li(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ resolve_oop_handle(tmp, T4);
++    __ bne(tmp, result, notNull);
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++
++  __ addi_d(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++
++  // dtos
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ fld_d(FSF, AT, base_offset);
++  __ push(dtos);
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++
++  // ltos
++  __ slli_d(T2, T2, Address::times_8);
++  __ add_d(AT, T3, T2);
++  __ ld_d(FSR, AT, base_offset);
++  __ push(ltos);
++  __ b(Done);
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ ld_bu(reg, at_bcp(offset));
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ li(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++
++    __ li(T3, Bytecodes::_fast_iload2);
++    __ li(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++
++    // if _caload, rewrite to fast_icaload
++    __ li(T3, Bytecodes::_fast_icaload);
++    __ li(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++
++    // rewrite so iload doesn't check again.
++    __ li(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ slli_w(index, index, 0);
++
++  // check index
++  Label ok;
++  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ bltu(index, AT, ok);
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ bind(ok);
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 1);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, Address::times_8 - 1);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, 2);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ add_d(FSR, SSR, FSR);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 0);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ ld_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld_d(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ fld_s(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ fld_d(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld_d(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ li(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_iaccess_0);
++    __ li(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aaccess_0);
++    __ li(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_faccess_0);
++    __ li(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ fst_s(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ fst_d(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  index_check(T2, SSR);  // prefer index in SSR
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld_d(FSR, at_tos());     // Value
++  __ ld_w(SSR, at_tos_p1());  // Index
++  __ ld_d(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld_d(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T4);
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ addi_d(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(T4, T2);
++  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
++
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ li(AT, diffbit);
++
++  Label L_skip;
++  __ andr(AT, T4, AT);
++  __ beq(AT, R0, L_skip);
++  __ andi(FSR, FSR, 0x1);
++  __ bind(L_skip);
++
++  __ add_d(SSR, T2, SSR);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ st_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ st_d(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ fst_s(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ fst_d(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ st_d(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ add_w(FSR, SSR, FSR); break;
++    case sub  : __ sub_w(FSR, SSR, FSR); break;
++    case mul  : __ mul_w(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sll_w(FSR, SSR, FSR);   break;
++    case shr  : __ sra_w(FSR, SSR, FSR);   break;
++    case ushr : __ srl_w(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ add_d(FSR, T2, FSR); break;
++    case sub : __ sub_d(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  __ div_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++
++  __ bne(FSR, R0, not_zero);
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(not_zero);
++  __ mod_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  __ mul_d(FSR, T2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l(A2);
++  __ div_d(FSR, A2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  __ mod_d(FSR, A2, FSR);
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sll_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sra_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ srl_d(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ fld_s(fscratch, at_sp());
++      __ fadd_s(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_s(fscratch, at_sp());
++      __ fsub_s(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_s(fscratch, at_sp());
++      __ fmul_s(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_s(fscratch, at_sp());
++      __ fdiv_s(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_s(FA1, FSF);
++      __ fld_s(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ fld_d(fscratch, at_sp());
++      __ fadd_d(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_d(fscratch, at_sp());
++      __ fsub_d(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_d(fscratch, at_sp());
++      __ fmul_d(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_d(fscratch, at_sp());
++      __ fdiv_d(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_d(FA1, FSF);
++      __ fld_d(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ sub_w(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ sub_d(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ fneg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ fneg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ ld_b(AT, at_bcp(2));           // get constant
++  __ add_d(FSR, FSR, AT);
++  __ st_w(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ bswap_h(FSR, FSR);
++  __ ld_w(AT, T2, 0);
++  __ add_d(FSR, AT, FSR);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_d(FSR, FSR, 15, 0);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ sub_d(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  if (is_float) {
++    __ fld_s(fscratch, at_sp());
++    __ addi_d(SP, SP, 1 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_s(FCC0, FSF, fscratch);
++      __ fcmp_cult_s(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_s(FCC0, FSF, fscratch);
++      __ fcmp_clt_s(FCC1, fscratch, FSF);
++    }
++  } else {
++    __ fld_d(fscratch, at_sp());
++    __ addi_d(SP, SP, 2 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_d(FCC0, FSF, fscratch);
++      __ fcmp_cult_d(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_d(FCC0, FSF, fscratch);
++      __ fcmp_clt_d(FCC1, fscratch, FSF);
++    }
++  }
++
++  __ movcf2gr(FSR, FCC0);
++  __ movcf2gr(AT, FCC1);
++  __ sub_d(FSR, FSR, AT);
++}
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ ld_b(A7, BCP, 1);
++    __ ld_bu(AT, BCP, 2);
++    __ slli_d(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, 1);
++    __ bswap_w(A7, A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ ldx_bu(Rnext, BCP, A7);
++
++    // compute return address as bci in FSR
++    __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld_d(AT, T3, in_bytes(Method::const_offset()));
++    __ sub_d(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ add_d(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ add_d(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ blt(R0, A7, dispatch);  // check if forward or backward branch
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ push2(T3, A7);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop2(T3, A7);
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ bind(has_counters);
++
++    Label no_mdo;
++    int increment = InvocationCounter::count_increment;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset())));
++      __ beq(T0, R0, no_mdo);
++      // Increment the MDO backedge counter
++      const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                         in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(T0, in_bytes(MethodData::backedge_mask_offset()));
++      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                 T1, false, Assembler::zero,
++                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++      __ beq(R0, R0, dispatch);
++    }
++    __ bind(no_mdo);
++    // Increment backedge counter in MethodCounters*
++    __ ld_d(T0, Address(T3, Method::method_counters_offset()));
++    const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset()));
++    __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                               T1, false, Assembler::zero,
++                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ ld_bu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter && UseOnStackReplacement) {
++    // invocation counter overflow
++    __ bind(backedge_counter_overflow);
++    __ sub_d(A7, BCP, A7);  // branch bcp
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::frequency_counter_overflow), A7);
++
++    // V0: osr nmethod (osr ok) or NULL (osr not possible)
++    // V1: osr adapter frame return address
++    // LVP: locals pointer
++    // BCP: bcp
++    __ beq(V0, R0, dispatch);
++    // nmethod may have been invalidated (VM may block upon call_VM return)
++    __ ld_b(T3, V0, nmethod::state_offset());
++    __ li(AT, nmethod::in_use);
++    __ bne(AT, T3, dispatch);
++
++    // We have the address of an on stack replacement routine in rax.
++    // In preparation of invoking it, first we must migrate the locals
++    // and monitors from off the interpreter frame on the stack.
++    // Ensure to save the osr nmethod over the migration call,
++    // it will be preserved in Rnext.
++    __ move(Rnext, V0);
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++    // V0 is OSR buffer, move it to expected parameter location
++    // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp
++    __ move(T0, V0);
++
++    // pop the interpreter frame
++    __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++    // remove frame anchor
++    __ leave();
++    __ move(LVP, RA);
++    __ move(SP, A7);
++
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ bstrins_d(SP, R0, 3, 0);
++
++    // push the (possibly adjusted) return address
++    // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
++    __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset());
++    __ jr(AT);
++  }
++}
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bge(FSR, R0, not_taken);
++      break;
++    case less_equal:
++      __ blt(R0, FSR, not_taken);
++      break;
++    case greater:
++      __ bge(R0, FSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(FSR, R0, not_taken);
++      break;
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ bge(SSR, FSR, not_taken);
++      break;
++    case less_equal:
++      __ blt(FSR, SSR, not_taken);
++      break;
++    case greater:
++      __ bge(FSR, SSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(SSR, FSR, not_taken);
++      break;
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ ld_w(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld_d(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld_d(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ ld_w(T3, T2, 1 * BytesPerInt);
++  __ bswap_w(T3, T3);
++  __ ld_w(A7, T2, 2 * BytesPerInt);
++  __ bswap_w(A7, A7);
++
++  // check against lo & hi
++  __ blt(FSR, T3, default_case);
++  __ blt(A7, FSR, default_case);
++
++  // lookup dispatch offset, in A7 big endian
++  __ sub_d(FSR, FSR, T3);
++  __ alsl_d(AT, FSR, T2, Address::times_4 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T4, T3);
++
++  __ bind(continue_execution);
++  __ bswap_w(A7, A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ bswap_w(FSR, FSR);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ ld_w(T3, T2, BytesPerInt);
++  __ bswap_w(T3, T3);
++  __ b(loop_entry);
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++
++  __ bind(loop_entry);
++  Label L1;
++  __ bge(R0, T3, L1);
++  __ addi_d(T3, T3, -1);
++  __ b(loop);
++  __ bind(L1);
++  __ addi_d(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++
++  // entry found -> get offset
++  __ bind(found);
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ bswap_w(A7, A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ addi_d(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ ld_w(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ bswap_w(j, j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ add_d(h, i, j);
++    __ srli_d(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ alsl_d(AT, h, array, Address::times_8 - 1);
++    __ ld_w(temp, AT, 0 * BytesPerInt);
++    __ bswap_w(temp, temp);
++
++    __ slt(AT, key, temp);
++    __ maskeqz(i, i, AT);
++    __ masknez(temp, h, AT);
++    __ OR(i, i, temp);
++    __ masknez(j, j, AT);
++    __ maskeqz(temp, h, AT);
++    __ OR(j, j, temp);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ addi_d(h, i, 1);
++    __ blt(h, j, loop);
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(temp, AT, 0 * BytesPerInt);
++  __ bswap_w(temp, temp);
++  __ bne(key, temp, default_case);
++
++  // entry found -> j = offset
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ bswap_w(j, j);
++
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ ld_w(j, array, - 2 * BytesPerInt);
++  __ bswap_w(j, j);
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld_d(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ li(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ ld_b(AT, thread, in_bytes(JavaThread::polling_word_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T4);
++  __ membar(__ StoreStore);
++
++  __ jr(T4);
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved, clinit_barrier_slow;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addi_d(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++
++  // resolve first time through
++  // Class initialization barrier slow path lands here as well.
++  __ bind(clinit_barrier_slow);
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ li(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++
++  // Class initialization barrier for static methods
++  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
++    __ load_resolved_method_at_index(byte_no, temp, Rcache, index);
++    __ load_method_holder(temp, temp);
++    __ clinit_barrier(temp, AT, NULL, &clinit_barrier_slow);
++  }
++}
++//END: LA
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_d(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T4);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld_d(itable_index, AT, index_offset);
++  }
++  __ ld_d(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld_d(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ add_d(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1);
++      __ ld_d(tmp3, AT, in_bytes(cp_base_offset +
++                                 ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ li(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ li(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++      __ b(valsize_known);
++
++      __ bind(two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld_d(tmp1, tmp1, 0 * wordSize);
++    }
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ztos
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // itos
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // atos
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::no_scale, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ctos
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // stos
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ltos
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ftos
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++
++  // dtos
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // test for volatile with T1
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ add_d(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::no_scale, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // replace index with field offset from cache entry
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ add_d(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld_d(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi_d(BCP, BCP, 1);
++  __ null_check(T1);
++  __ add_d(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ addi_d(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ li(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld_d(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ alsl_d(AT, flags, AT, LogBytesPerWord - 1);
++    __ ld_d(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "Method must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed Method*, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T4: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T4, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target Method & entry point
++  __ lookup_virtual_method(T2, index, method);
++  __ profile_arguments_type(T2, method, T4, true);
++  __ jump_from_interpreted(method, T2);
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T4 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on LoongArch64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ load_method_holder(T2, Rmethod);
++  __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ sub_w(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T4: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T4 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T4);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T4: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T4, true);
++
++  __ jump_from_interpreted(T2_method, T4);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T4);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T4: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T4, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T4);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add_d(T1, T1, A2);
++  __ ld_b(AT, T1, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++
++  // has_finalizer
++  __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case);
++    __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ li(T1, (long)Universe::heap()->top_addr());
++    __ ld_d(FSR, heap_top);
++
++    __ bind(retry);
++    __ li(AT, (long)Universe::heap()->end_addr());
++    __ ld_d(AT, AT, 0);
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, slow_case);
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ li(AT, - sizeof(oopDesc));
++    __ add_d(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++       Label loop;
++       __ add_d(T1, FSR, T0);
++
++       __ bind(loop);
++       __ addi_d(T1, T1, -oopSize);
++       __ st_d(R0, T1, sizeof(oopDesc));
++       __ bne(T1, FSR, loop); // dont clear header
++    }
++
++    // klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ li(AT, (long)markWord::prototype().value());
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ ld_bu(A1, at_bcp(1));
++  // type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1); // big-endian
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1); // big-endian
++
++  // See if bytecode has already been quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1); // big-endian
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  __ move(FSR, R0);
++  // Come here on failure
++  __ b(done);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ li(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit, next;
++    __ ld_d(T2, monitor_block_top);
++    __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    // free slot?
++    __ bind(loop);
++    __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ bne(AT, R0, next);
++    __ move(c_rarg0, T2);
++
++    __ bind(next);
++    __ beq(FSR, AT, exit);
++    __ addi_d(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld_d(c_rarg0, monitor_block_top);
++    __ addi_d(SP, SP, -entry_size);
++    __ addi_d(c_rarg0, c_rarg0, -entry_size);
++    __ st_d(c_rarg0, monitor_block_top);
++    __ move(T3, SP);
++    __ b(entry);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld_d(AT, T3, entry_size);
++    __ st_d(AT, T3, 0);
++    __ addi_d(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ addi_d(BCP, BCP, 1);
++  __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    __ bind(loop);
++    __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ addi_d(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ ld_bu(Rnext, at_bcp(1));
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ alsl_d(AT, Rnext, AT, Address::times_8 - 1);
++  __ ld_d(AT, AT, 0);
++  __ jr(AT);
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ ld_bu(A1, at_bcp(3));  // dimension
++  __ addi_d(A1, A1, -1);
++  __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ ld_bu(AT, at_bcp(3));
++  __ alsl_d(SP, AT, SP, Address::times_8 - 1);
++  __ membar(__ AnyAny);//no membar here for aarch64
++}
+diff --git a/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp b/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp
+new file mode 100644
+index 00000000000..87f6a113268
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/universalNativeInvoker.hpp"
++#include "utilities/debug.hpp"
++
++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
++}
+diff --git a/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp
+new file mode 100644
+index 00000000000..7586b084868
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/universalUpcallHandler.hpp"
++#include "utilities/debug.hpp"
++
++address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
++}
++
++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
++  ShouldNotCallThis();
++  return nullptr;
++}
++
++bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
++  return false;
++}
+diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
+new file mode 100644
+index 00000000000..5b9f7b78981
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
+new file mode 100644
+index 00000000000..31da20e6f39
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_loongarch.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", features_string());
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
+new file mode 100644
+index 00000000000..1a93123134c
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
+new file mode 100644
+index 00000000000..1a1ac923117
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
+@@ -0,0 +1,432 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(T5);
++
++    __ li(AT, (long)0);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ li(AT, 3);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset()));
++
++    __ li(AT, 4);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset()));
++
++    __ li(AT, 5);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset()));
++
++    __ li(AT, 6);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset()));
++
++    __ li(AT, 10);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset()));
++
++    __ li(AT, 11);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset()));
++
++    __ li(AT, 12);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset()));
++
++    __ li(AT, 13);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset()));
++
++    __ li(AT, 14);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset()));
++
++    __ pop(T5);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++#   undef __
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) {
++    result |= CPU_LA32;
++  } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) {
++    result |= CPU_LA64;
++  }
++
++  if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0)
++    result |= CPU_FP;
++
++  if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0)
++    result |= CPU_CCDMA;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0)
++    result |= CPU_LLDBAR;
++  if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0)
++    result |= CPU_SCDLY;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0)
++    result |= CPU_LLEXC;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  get_os_cpu_info();
++
++  get_cpu_info_stub(&_cpuid_info);
++  _features |= get_feature_flags_by_cpucfg();
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_DEFAULT(MaxGCPauseMillis, 150);
++  }
++
++  if (supports_lsx()) {
++    if (FLAG_IS_DEFAULT(UseLSX)) {
++      FLAG_SET_DEFAULT(UseLSX, true);
++    }
++  } else if (UseLSX) {
++    warning("LSX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLSX, false);
++  }
++
++  if (supports_lasx()) {
++    if (FLAG_IS_DEFAULT(UseLASX)) {
++      FLAG_SET_DEFAULT(UseLASX, true);
++    }
++  } else if (UseLASX) {
++    warning("LASX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++  if (UseLASX && !UseLSX) {
++    warning("LASX instructions depends on LSX, setting UseLASX to false");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++#ifdef COMPILER2
++  int max_vector_size = 0;
++  int min_vector_size = 0;
++  if (UseLASX) {
++    max_vector_size = 32;
++    min_vector_size = 16;
++  }
++  else if (UseLSX) {
++    max_vector_size = 16;
++    min_vector_size = 16;
++  }
++
++  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
++    if (MaxVectorSize == 0) {
++      // do nothing
++    } else if (MaxVectorSize > max_vector_size) {
++      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    } else if (MaxVectorSize < min_vector_size) {
++      warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
++    } else if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    }
++  } else {
++    // If default, use highest supported configuration
++    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++  }
++#endif
++
++  char buf[256];
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
++    "0x%lx, fp_ver: %d, lvz_ver: %d, ",
++    (is_la64()             ?  "la64"  : ""),
++    (is_la32()             ?  "la32"  : ""),
++    (supports_lsx()        ?  ", lsx" : ""),
++    (supports_lasx()       ?  ", lasx" : ""),
++    (supports_crypto()     ?  ", crypto" : ""),
++    (supports_lam()        ?  ", am" : ""),
++    (supports_ual()        ?  ", ual" : ""),
++    (supports_lldbar()     ?  ", lldbar" : ""),
++    (supports_scdly()      ?  ", scdly" : ""),
++    (supports_llexc()      ?  ", llexc" : ""),
++    (supports_lbt_x86()    ?  ", lbt_x86" : ""),
++    (supports_lbt_arm()    ?  ", lbt_arm" : ""),
++    (supports_lbt_mips()   ?  ", lbt_mips" : ""),
++    (needs_llsync()        ?  ", needs_llsync" : ""),
++    (needs_tgtsync()       ?  ", needs_tgtsync": ""),
++    (needs_ulsync()        ?  ", needs_ulsync": ""),
++    _cpuid_info.cpucfg_info_id0.bits.PRID,
++    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
++    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER);
++  _features_string = os::strdup(buf);
++
++  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
++  assert( is_la64(), "Should be LoongArch64");
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 3);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  // Basic instructions are used to implement SHA Intrinsics on LA, so sha
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseSHA)) {
++      FLAG_SET_DEFAULT(UseSHA, true);
++    }
++  } else if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++    }
++  } else if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++    }
++  } else if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++  }
++
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseSHA3Intrinsics) {
++    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
++  }
++
++  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
++    FLAG_SET_DEFAULT(UseMD5Intrinsics, true);
++  }
++
++  // Basic instructions are used to implement AES Intrinsics on LA, so AES
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseAES)) {
++      FLAG_SET_DEFAULT(UseAES, true);
++    }
++  } else if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES))
++      warning("AES instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAES, false);
++  }
++
++  if (UseAES/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
++    }
++  } else if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
++      warning("AES intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++  }
++
++  if (UseAESCTRIntrinsics) {
++    warning("AES/CTR intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCRC32)) {
++    FLAG_SET_DEFAULT(UseCRC32, true);
++  }
++
++  if (UseCRC32) {
++    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      UseCRC32Intrinsics = true;
++    }
++
++    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
++      UseCRC32CIntrinsics = true;
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  // This machine allows unaligned memory accesses
++  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
++    FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
++  }
++
++  if (UseLSX) {
++    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
++    }
++  } else if (UsePopCountInstruction) {
++    if (!FLAG_IS_DEFAULT(UsePopCountInstruction))
++      warning("PopCountI/L/VI(4) employs LSX whereas PopCountVI(8) hinges on LASX.");
++    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
+new file mode 100644
+index 00000000000..cae9f863c30
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
+@@ -0,0 +1,295 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++  friend class JVMCIVMStructs;
++
++public:
++
++  union LoongArch_Cpucfg_Id0 {
++    uint32_t value;
++    struct {
++      uint32_t PRID      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t ARCH      : 2,
++               PGMMU     : 1,
++               IOCSR     : 1,
++               PALEN     : 8,
++               VALEN     : 8,
++               UAL       : 1, // unaligned access
++               RI        : 1,
++               EP        : 1,
++               RPLV      : 1,
++               HP        : 1,
++               IOCSR_BRD : 1,
++               MSG_INT   : 1,
++                         : 5;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG     : 1, // FP is used, use FP_CFG instead
++               FP_SP      : 1,
++               FP_DP      : 1,
++               FP_VER     : 3,
++               LSX        : 1,
++               LASX       : 1,
++               COMPLEX    : 1,
++               CRYPTO     : 1,
++               LVZ        : 1,
++               LVZ_VER    : 3,
++               LLFTP      : 1,
++               LLFTP_VER  : 3,
++               LBT_X86    : 1,
++               LBT_ARM    : 1,
++               LBT_MIPS   : 1,
++               LSPW       : 1,
++               LAM        : 1,
++                          : 9;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id3 {
++    uint32_t value;
++    struct {
++      uint32_t CCDMA      : 1,
++               SFB        : 1,
++               UCACC      : 1,
++               LLEXC      : 1,
++               SCDLY      : 1,
++               LLDBAR     : 1,
++               ITLBHMC    : 1,
++               ICHMC      : 1,
++               SPW_LVL    : 3,
++               SPW_HP_HF  : 1,
++               RVA        : 1,
++               RVAMAXM1   : 4,
++                          : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id4 {
++    uint32_t value;
++    struct {
++      uint32_t CC_FREQ      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id5 {
++    uint32_t value;
++    struct {
++      uint32_t CC_MUL      : 16,
++               CC_DIV      : 16;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id6 {
++    uint32_t value;
++    struct {
++      uint32_t PMP      : 1,
++               PMVER    : 3,
++               PMNUM    : 4,
++               PMBITS   : 6,
++               UPM      : 1,
++                        : 17;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id10 {
++    uint32_t value;
++    struct {
++      uint32_t L1IU_PRESENT    : 1,
++               L1IU_UNIFY      : 1,
++               L1D_PRESENT     : 1,
++               L2IU_PRESENT    : 1,
++               L2IU_UNIFY      : 1,
++               L2IU_PRIVATE    : 1,
++               L2IU_INCLUSIVE  : 1,
++               L2D_PRESENT     : 1,
++               L2D_PRIVATE     : 1,
++               L2D_INCLUSIVE   : 1,
++               L3IU_PRESENT    : 1,
++               L3IU_UNIFY      : 1,
++               L3IU_PRIVATE    : 1,
++               L3IU_INCLUSIVE  : 1,
++               L3D_PRESENT     : 1,
++               L3D_PRIVATE     : 1,
++               L3D_INCLUSIVE   : 1,
++                               : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id11 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id12 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id13 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id14 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++#define CPU_FEATURE_FLAGS(decl)                 \
++    decl(LAM,           lam,             1)     \
++    decl(UAL,           ual,             2)     \
++    decl(LSX,           lsx,             4)     \
++    decl(LASX,          lasx,            5)     \
++    decl(COMPLEX,       complex,         7)     \
++    decl(CRYPTO,        crypto,          8)     \
++    decl(LBT_X86,       lbt_x86,         10)    \
++    decl(LBT_ARM,       lbt_arm,         11)    \
++    decl(LBT_MIPS,      lbt_mips,        12)    \
++    /* flags above must follow Linux HWCAP */   \
++    decl(LA32,          la32,            13)    \
++    decl(LA64,          la64,            14)    \
++    decl(FP,            fp,              15)    \
++    decl(LLEXC,         llexc,           16)    \
++    decl(SCDLY,         scdly,           17)    \
++    decl(LLDBAR,        lldbar,          18)    \
++    decl(CCDMA,         ccdma,           19)    \
++    decl(LLSYNC,        llsync,          20)    \
++    decl(TGTSYNC,       tgtsync,         21)    \
++    decl(ULSYNC,        ulsync,          22)    \
++
++  enum Feature_Flag {
++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
++    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
++#undef DECLARE_CPU_FEATURE_FLAG
++  };
++
++protected:
++
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    LoongArch_Cpucfg_Id0   cpucfg_info_id0;
++    LoongArch_Cpucfg_Id1   cpucfg_info_id1;
++    LoongArch_Cpucfg_Id2   cpucfg_info_id2;
++    LoongArch_Cpucfg_Id3   cpucfg_info_id3;
++    LoongArch_Cpucfg_Id4   cpucfg_info_id4;
++    LoongArch_Cpucfg_Id5   cpucfg_info_id5;
++    LoongArch_Cpucfg_Id6   cpucfg_info_id6;
++    LoongArch_Cpucfg_Id10  cpucfg_info_id10;
++    LoongArch_Cpucfg_Id11  cpucfg_info_id11;
++    LoongArch_Cpucfg_Id12  cpucfg_info_id12;
++    LoongArch_Cpucfg_Id13  cpucfg_info_id13;
++    LoongArch_Cpucfg_Id14  cpucfg_info_id14;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static void     get_processor_features();
++  static void     get_os_cpu_info();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); }
++  static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); }
++  static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); }
++  static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); }
++  static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); }
++
++  static void clean_cpuFeatures()   { _features = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool is_la32()             { return _features & CPU_LA32; }
++  static bool is_la64()             { return _features & CPU_LA64; }
++  static bool supports_crypto()     { return _features & CPU_CRYPTO; }
++  static bool supports_lsx()        { return _features & CPU_LSX; }
++  static bool supports_lasx()       { return _features & CPU_LASX; }
++  static bool supports_lam()        { return _features & CPU_LAM; }
++  static bool supports_llexc()      { return _features & CPU_LLEXC; }
++  static bool supports_scdly()      { return _features & CPU_SCDLY; }
++  static bool supports_lldbar()     { return _features & CPU_LLDBAR; }
++  static bool supports_ual()        { return _features & CPU_UAL; }
++  static bool supports_lbt_x86()    { return _features & CPU_LBT_X86; }
++  static bool supports_lbt_arm()    { return _features & CPU_LBT_ARM; }
++  static bool supports_lbt_mips()   { return _features & CPU_LBT_MIPS; }
++  static bool needs_llsync()        { return !supports_lldbar(); }
++  static bool needs_tgtsync()       { return 1; }
++  static bool needs_ulsync()        { return 1; }
++
++  static bool supports_fast_class_init_checks() { return true; }
++  constexpr static bool supports_stack_watermark_barrier() { return true; }
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
+new file mode 100644
+index 00000000000..79d2560f494
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = freg->name();
++    }
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
++
++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
++  Unimplemented();
++  return VMRegImpl::Bad();
++}
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
+new file mode 100644
+index 00000000000..819eaff0bb3
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister() && is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if (is_FloatRegister()) {
++    int base = value() - ConcreteRegisterImpl::max_gpr;
++    return base % FloatRegisterImpl::max_slots_per_register == 0;
++  } else {
++    return is_even(value());
++  }
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..edb78e36daa
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
+new file mode 100644
+index 00000000000..6a190529b64
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
+@@ -0,0 +1,331 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(t1, AT , 0);
++    __ addi_w(t1, t1, 1);
++    __ st_w(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ li(AT, vtable_index*vtableEntry::size());
++    __ blt(AT, t2, L);
++    __ li(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load Method* and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 6 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 6*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: Method*
++  // T4: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we use T8, T4, T2 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2, t3 = T4;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(T8, AT, 0);
++    __ addi_w(T8, T8, 1);
++    __ st_w(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++  const Register icholder_reg = T1;
++
++  Label L_no_such_interface;
++
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++  // x86 use lookup_interface_method, but lookup_interface_method makes more instructions.
++  // No dynamic code size variance here, so slop_bytes is not needed.
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++  assert(Assembler::is_simm16(base), "change this code");
++  __ addi_d(t2, t1, base);
++  __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ alsl_d(t2, AT, t2, Address::times_8 - 1);
++
++  __ move(t3, t2);
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, resolved_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, resolved_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, holder_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, holder_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  // We found a hit, move offset into T4
++  __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes());
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++                            itableMethodEntry::method_offset_in_bytes();
++
++  // Get Method* and entrypoint for compiler
++  const Register method = Rmethod;
++
++  start_pc = __ pc();
++  __ li(AT, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ add_d(AT, AT, t2);
++  __ ldx_d(method, t1, AT);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ stop("compiler entrypoint is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: Method*
++  // T0: receiver
++  // T4: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
++  __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+new file mode 100644
+index 00000000000..c34334ec4c7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp
+new file mode 100644
+index 00000000000..2205ef1a42c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.cpp
+@@ -0,0 +1,764 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of Assembler
++const char *Assembler::ops_name[] = {
++  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
++  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
++  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
++  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
++  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
++  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
++  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
++  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
++};
++
++const char* Assembler::special_name[] = {
++  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
++  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
++  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
++  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
++  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
++  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
++  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
++  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
++};
++
++const char* Assembler::cop1_name[] = {
++  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
++  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
++  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
++};
++
++const char* Assembler::cop1x_name[] = {
++  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
++  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
++  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
++  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
++  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
++  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
++};
++
++const char* Assembler::special2_name[] = {
++  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
++  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         ""
++};
++
++const char* Assembler::special3_name[] = {
++  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "bshfl",    "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++};
++
++const char* Assembler::regimm_name[] = {
++  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
++  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
++  "bltzal",   "bgezal",   "bltzall",  "bgezall"
++};
++
++const char* Assembler::gs_ldc2_name[] = {
++  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
++};
++
++
++const char* Assembler::gs_lwc2_name[] = {
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
++        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
++        "gslq",   ""
++};
++
++const char* Assembler::gs_sdc2_name[] = {
++  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
++};
++
++const char* Assembler::gs_swc2_name[] = {
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
++        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
++        "gssq",    ""
++};
++
++//misleading name, print only branch/jump instruction
++void Assembler::print_instruction(int inst) {
++  const char *s;
++  switch( opcode(inst) ) {
++  default:
++    s = ops_name[opcode(inst)];
++    break;
++  case special_op:
++    s = special_name[special(inst)];
++    break;
++  case regimm_op:
++    s = special_name[rt(inst)];
++    break;
++  }
++
++  ::tty->print("%s", s);
++}
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++//without check, maybe fixed
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos - 4)>>2;
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++  case lui_op:
++  case ori_op:
++  case daddiu_op:
++    ShouldNotReachHere();
++    break;
++  default:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if (!is_simm16(v)) {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    v = low16(v);
++    inst &= 0xffff0000;
++    break;
++  }
++
++  return inst | v;
++}
++
++int Assembler::branch_destination(int inst, int pos) {
++  int off = 0;
++
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++    assert(false, "should not use j/jal here");
++    break;
++  default:
++    off = expand(low16(inst), 15);
++    break;
++  }
++
++  return off ? pos + 4 + (off<<2) : 0;
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++
++void Assembler::lb(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lb(rt, src.base(), src.disp());
++}
++
++void Assembler::lbu(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lbu(rt, src.base(), src.disp());
++}
++
++void Assembler::ld(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsldx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gsldx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        ld(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsldx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          ld(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gsldx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          ld(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      ld(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsldx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        ld(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::ldl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldl(rt, src.base(), src.disp());
++}
++
++void Assembler::ldr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldr(rt, src.base(), src.disp());
++}
++
++void Assembler::lh(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lh(rt, src.base(), src.disp());
++}
++
++void Assembler::lhu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lhu(rt, src.base(), src.disp());
++}
++
++void Assembler::ll(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll(rt, src.base(), src.disp());
++}
++
++void Assembler::lld(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lld(rt, src.base(), src.disp());
++}
++
++void Assembler::lw(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gslwx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gslwx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        lw(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gslwx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          lw(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gslwx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          lw(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      lw(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gslwx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        lw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::lea(Register rt, Address src) {
++  Register dst   = rt;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm16(disp)) {
++      daddiu(dst, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++      daddu(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm16(disp)) {
++        daddu(AT, base, index);
++        daddiu(dst, AT, disp);
++      } else {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, base, AT);
++        daddu(dst, AT, index);
++      }
++    } else {
++      if (is_simm16(disp)) {
++        dsll(AT, index, scale);
++        daddu(AT, AT, base);
++        daddiu(dst, AT, disp);
++      } else {
++        assert_different_registers(dst, AT);
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        dsll(dst, index, scale);
++        daddu(dst, dst, AT);
++      }
++    }
++  }
++}
++
++void Assembler::lwl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwl(rt, src.base(), src.disp());
++}
++
++void Assembler::lwr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwr(rt, src.base(), src.disp());
++}
++
++void Assembler::lwu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwu(rt, src.base(), src.disp());
++}
++
++void Assembler::sb(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sb(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sc(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc(rt, dst.base(), dst.disp());
++}
++
++void Assembler::scd(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  scd(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sd(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm16(disp)) {
++      if ( UseLEXT1 && is_simm(disp, 8)) {
++        if (scale == 0) {
++          gssdx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gssdx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sd(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gssdx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sd(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sd(src, AT, 0);
++      }
++    }
++  } else {
++    if (is_simm16(disp)) {
++      sd(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gssdx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sd(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::sdl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sh(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sh(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sw(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if ( Assembler::is_simm16(disp) ) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsswx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gsswx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sw(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsswx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sw(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sw(src, AT, 0);
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      sw(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsswx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::swl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::swr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::lwc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwc1(rt, src.base(), src.disp());
++}
++
++void Assembler::ldc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldc1(rt, src.base(), src.disp());
++}
++
++void Assembler::swc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::j(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((j_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::jal(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((jal_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long
++  check_delay();
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x) { emit_long(x); }
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++  relocate(rtype);
++  emit_long(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++  relocate(rspec);
++  emit_long(x);
++}
++
++inline void Assembler::check_delay() {
++#ifdef CHECK_DELAY
++  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++  delay_state = no_delay;
++#endif
++}
+diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp
+new file mode 100644
+index 00000000000..8d0d9e0eb21
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.hpp
+@@ -0,0 +1,1777 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code.
++
++  Address(Register base, ByteSize disp)
++    : Address(base, in_bytes(disp)) {}
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : Address(base, index, scale, in_bytes(disp)) {}
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++
++  Argument(int number):_number(number){ }
++  Argument successor() {return Argument(number() + 1);}
++
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(RA0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F12->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++const int FPUStateSizeInWords = 512 / wordSize;
++
++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  // opcode, highest 6 bits: bits[31...26]
++  enum ops {
++    special_op  = 0x00, // special_ops
++    regimm_op   = 0x01, // regimm_ops
++    j_op        = 0x02,
++    jal_op      = 0x03,
++    beq_op      = 0x04,
++    bne_op      = 0x05,
++    blez_op     = 0x06,
++    bgtz_op     = 0x07,
++    addiu_op    = 0x09,
++    slti_op     = 0x0a,
++    sltiu_op    = 0x0b,
++    andi_op     = 0x0c,
++    ori_op      = 0x0d,
++    xori_op     = 0x0e,
++    lui_op      = 0x0f,
++    cop0_op     = 0x10, // cop0_ops
++    cop1_op     = 0x11, // cop1_ops
++    gs_cop2_op  = 0x12, // gs_cop2_ops
++    cop1x_op    = 0x13, // cop1x_ops
++    beql_op     = 0x14,
++    bnel_op     = 0x15,
++    blezl_op    = 0x16,
++    bgtzl_op    = 0x17,
++    daddiu_op   = 0x19,
++    ldl_op      = 0x1a,
++    ldr_op      = 0x1b,
++    special2_op = 0x1c, // special2_ops
++    msa_op      = 0x1e, // msa_ops
++    special3_op = 0x1f, // special3_ops
++    lb_op       = 0x20,
++    lh_op       = 0x21,
++    lwl_op      = 0x22,
++    lw_op       = 0x23,
++    lbu_op      = 0x24,
++    lhu_op      = 0x25,
++    lwr_op      = 0x26,
++    lwu_op      = 0x27,
++    sb_op       = 0x28,
++    sh_op       = 0x29,
++    swl_op      = 0x2a,
++    sw_op       = 0x2b,
++    sdl_op      = 0x2c,
++    sdr_op      = 0x2d,
++    swr_op      = 0x2e,
++    cache_op    = 0x2f,
++    ll_op       = 0x30,
++    lwc1_op     = 0x31,
++    gs_lwc2_op  = 0x32, //gs_lwc2_ops
++    pref_op     = 0x33,
++    lld_op      = 0x34,
++    ldc1_op     = 0x35,
++    gs_ldc2_op  = 0x36, //gs_ldc2_ops
++    ld_op       = 0x37,
++    sc_op       = 0x38,
++    swc1_op     = 0x39,
++    gs_swc2_op  = 0x3a, //gs_swc2_ops
++    scd_op      = 0x3c,
++    sdc1_op     = 0x3d,
++    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
++    sd_op       = 0x3f
++  };
++
++  static  const char *ops_name[];
++
++  //special family, the opcode is in low 6 bits.
++  enum special_ops {
++    sll_op       = 0x00,
++    movci_op     = 0x01,
++    srl_op       = 0x02,
++    sra_op       = 0x03,
++    sllv_op      = 0x04,
++    srlv_op      = 0x06,
++    srav_op      = 0x07,
++    jr_op        = 0x08,
++    jalr_op      = 0x09,
++    movz_op      = 0x0a,
++    movn_op      = 0x0b,
++    syscall_op   = 0x0c,
++    break_op     = 0x0d,
++    sync_op      = 0x0f,
++    mfhi_op      = 0x10,
++    mthi_op      = 0x11,
++    mflo_op      = 0x12,
++    mtlo_op      = 0x13,
++    dsllv_op     = 0x14,
++    dsrlv_op     = 0x16,
++    dsrav_op     = 0x17,
++    mult_op      = 0x18,
++    multu_op     = 0x19,
++    div_op       = 0x1a,
++    divu_op      = 0x1b,
++    dmult_op     = 0x1c,
++    dmultu_op    = 0x1d,
++    ddiv_op      = 0x1e,
++    ddivu_op     = 0x1f,
++    addu_op      = 0x21,
++    subu_op      = 0x23,
++    and_op       = 0x24,
++    or_op        = 0x25,
++    xor_op       = 0x26,
++    nor_op       = 0x27,
++    slt_op       = 0x2a,
++    sltu_op      = 0x2b,
++    daddu_op     = 0x2d,
++    dsubu_op     = 0x2f,
++    tge_op       = 0x30,
++    tgeu_op      = 0x31,
++    tlt_op       = 0x32,
++    tltu_op      = 0x33,
++    teq_op       = 0x34,
++    tne_op       = 0x36,
++    dsll_op      = 0x38,
++    dsrl_op      = 0x3a,
++    dsra_op      = 0x3b,
++    dsll32_op    = 0x3c,
++    dsrl32_op    = 0x3e,
++    dsra32_op    = 0x3f
++  };
++
++  static  const char* special_name[];
++
++  //regimm family, the opcode is in rt[16...20], 5 bits
++  enum regimm_ops {
++    bltz_op      = 0x00,
++    bgez_op      = 0x01,
++    bltzl_op     = 0x02,
++    bgezl_op     = 0x03,
++    tgei_op      = 0x08,
++    tgeiu_op     = 0x09,
++    tlti_op      = 0x0a,
++    tltiu_op     = 0x0b,
++    teqi_op      = 0x0c,
++    tnei_op      = 0x0e,
++    bltzal_op    = 0x10,
++    bgezal_op    = 0x11,
++    bltzall_op   = 0x12,
++    bgezall_op   = 0x13,
++    bposge32_op  = 0x1c,
++    bposge64_op  = 0x1d,
++    synci_op     = 0x1f,
++  };
++
++  static  const char* regimm_name[];
++
++  //cop0 family, the ops is in bits[25...21], 5 bits
++  enum cop0_ops {
++    mfc0_op     = 0x00,
++    dmfc0_op    = 0x01,
++    //
++    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
++    mtc0_op     = 0x04,
++    dmtc0_op    = 0x05,
++    rdpgpr_op   = 0x0a,
++    inter_op    = 0x0b,
++    wrpgpr_op   = 0x0c
++  };
++
++  //cop1 family, the ops is in bits[25...21], 5 bits
++  enum cop1_ops {
++    mfc1_op     = 0x00,
++    dmfc1_op    = 0x01,
++    cfc1_op     = 0x02,
++    mfhc1_op    = 0x03,
++    mtc1_op     = 0x04,
++    dmtc1_op    = 0x05,
++    ctc1_op     = 0x06,
++    mthc1_op    = 0x07,
++    bc1f_op     = 0x08,
++    single_fmt  = 0x10,
++    double_fmt  = 0x11,
++    word_fmt    = 0x14,
++    long_fmt    = 0x15,
++    ps_fmt      = 0x16
++  };
++
++
++  //2 bist (bits[17...16]) of bc1x instructions (cop1)
++  enum bc_ops {
++    bcf_op       = 0x0,
++    bct_op       = 0x1,
++    bcfl_op      = 0x2,
++    bctl_op      = 0x3,
++  };
++
++  // low 6 bits of c_x_fmt instructions (cop1)
++  enum c_conds {
++    f_cond       = 0x30,
++    un_cond      = 0x31,
++    eq_cond      = 0x32,
++    ueq_cond     = 0x33,
++    olt_cond     = 0x34,
++    ult_cond     = 0x35,
++    ole_cond     = 0x36,
++    ule_cond     = 0x37,
++    sf_cond      = 0x38,
++    ngle_cond    = 0x39,
++    seq_cond     = 0x3a,
++    ngl_cond     = 0x3b,
++    lt_cond      = 0x3c,
++    nge_cond     = 0x3d,
++    le_cond      = 0x3e,
++    ngt_cond     = 0x3f
++  };
++
++  // low 6 bits of cop1 instructions
++  enum float_ops {
++    fadd_op      = 0x00,
++    fsub_op      = 0x01,
++    fmul_op      = 0x02,
++    fdiv_op      = 0x03,
++    fsqrt_op     = 0x04,
++    fabs_op      = 0x05,
++    fmov_op      = 0x06,
++    fneg_op      = 0x07,
++    froundl_op   = 0x08,
++    ftruncl_op   = 0x09,
++    fceill_op    = 0x0a,
++    ffloorl_op   = 0x0b,
++    froundw_op   = 0x0c,
++    ftruncw_op   = 0x0d,
++    fceilw_op    = 0x0e,
++    ffloorw_op   = 0x0f,
++    movf_f_op    = 0x11,
++    movt_f_op    = 0x11,
++    movz_f_op    = 0x12,
++    movn_f_op    = 0x13,
++    frecip_op    = 0x15,
++    frsqrt_op    = 0x16,
++    fcvts_op     = 0x20,
++    fcvtd_op     = 0x21,
++    fcvtw_op     = 0x24,
++    fcvtl_op     = 0x25,
++    fcvtps_op    = 0x26,
++    fcvtspl_op   = 0x28,
++    fpll_op      = 0x2c,
++    fplu_op      = 0x2d,
++    fpul_op      = 0x2e,
++    fpuu_op      = 0x2f
++  };
++
++  static const char* cop1_name[];
++
++  //cop1x family, the opcode is in low 6 bits.
++  enum cop1x_ops {
++    lwxc1_op    = 0x00,
++    ldxc1_op    = 0x01,
++    luxc1_op    = 0x05,
++    swxc1_op    = 0x08,
++    sdxc1_op    = 0x09,
++    suxc1_op    = 0x0d,
++    prefx_op    = 0x0f,
++
++    alnv_ps_op  = 0x1e,
++    madd_s_op   = 0x20,
++    madd_d_op   = 0x21,
++    madd_ps_op  = 0x26,
++    msub_s_op   = 0x28,
++    msub_d_op   = 0x29,
++    msub_ps_op  = 0x2e,
++    nmadd_s_op  = 0x30,
++    nmadd_d_op  = 0x31,
++    nmadd_ps_op = 0x36,
++    nmsub_s_op  = 0x38,
++    nmsub_d_op  = 0x39,
++    nmsub_ps_op = 0x3e
++  };
++
++  static const char* cop1x_name[];
++
++  //special2 family, the opcode is in low 6 bits.
++  enum special2_ops {
++    madd_op       = 0x00,
++    maddu_op      = 0x01,
++    mul_op        = 0x02,
++    gs0x03_op     = 0x03,
++    msub_op       = 0x04,
++    msubu_op      = 0x05,
++    gs0x06_op     = 0x06,
++    gsemul2_op    = 0x07,
++    gsemul3_op    = 0x08,
++    gsemul4_op    = 0x09,
++    gsemul5_op    = 0x0a,
++    gsemul6_op    = 0x0b,
++    gsemul7_op    = 0x0c,
++    gsemul8_op    = 0x0d,
++    gsemul9_op    = 0x0e,
++    gsemul10_op   = 0x0f,
++    gsmult_op     = 0x10,
++    gsdmult_op    = 0x11,
++    gsmultu_op    = 0x12,
++    gsdmultu_op   = 0x13,
++    gsdiv_op      = 0x14,
++    gsddiv_op     = 0x15,
++    gsdivu_op     = 0x16,
++    gsddivu_op    = 0x17,
++    gsmod_op      = 0x1c,
++    gsdmod_op     = 0x1d,
++    gsmodu_op     = 0x1e,
++    gsdmodu_op    = 0x1f,
++    clz_op        = 0x20,
++    clo_op        = 0x21,
++    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
++    gsrxr_x_op    = 0x23, //gsX
++    dclz_op       = 0x24,
++    dclo_op       = 0x25,
++    gsle_op       = 0x26,
++    gsgt_op       = 0x27,
++    gs86j_op      = 0x28,
++    gsloop_op     = 0x29,
++    gsaj_op       = 0x2a,
++    gsldpc_op     = 0x2b,
++    gs86set_op    = 0x30,
++    gstm_op       = 0x31,
++    gscvt_ld_op   = 0x32,
++    gscvt_ud_op   = 0x33,
++    gseflag_op    = 0x34,
++    gscam_op      = 0x35,
++    gstop_op      = 0x36,
++    gssettag_op   = 0x37,
++    gssdbbp_op    = 0x38
++  };
++
++  static  const char* special2_name[];
++
++  // special3 family, the opcode is in low 6 bits.
++  enum special3_ops {
++    ext_op         = 0x00,
++    dextm_op       = 0x01,
++    dextu_op       = 0x02,
++    dext_op        = 0x03,
++    ins_op         = 0x04,
++    dinsm_op       = 0x05,
++    dinsu_op       = 0x06,
++    dins_op        = 0x07,
++    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
++    insv_op        = 0x0c,
++    dinsv_op       = 0x0d,
++    ar1_op         = 0x10, //MIPS DSP
++    cmp1_op        = 0x11, //MIPS DSP
++    re1_op         = 0x12, //MIPS DSP, re1_ops
++    sh1_op         = 0x13, //MIPS DSP
++    ar2_op         = 0x14, //MIPS DSP
++    cmp2_op        = 0x15, //MIPS DSP
++    re2_op         = 0x16, //MIPS DSP, re2_ops
++    sh2_op         = 0x17, //MIPS DSP
++    ar3_op         = 0x18, //MIPS DSP
++    bshfl_op       = 0x20  //seb, seh
++  };
++
++  // re1_ops
++  enum re1_ops {
++    absq_s_qb_op = 0x01,
++    repl_qb_op   = 0x02,
++    replv_qb_op  = 0x03,
++    absq_s_ph_op = 0x09,
++    repl_ph_op   = 0x0a,
++    replv_ph_op  = 0x0b,
++    absq_s_w_op  = 0x11,
++    bitrev_op    = 0x1b
++  };
++
++  // re2_ops
++  enum re2_ops {
++    repl_ob_op   = 0x02,
++    replv_ob_op  = 0x03,
++    absq_s_qh_op = 0x09,
++    repl_qh_op   = 0x0a,
++    replv_qh_op  = 0x0b,
++    absq_s_pw_op = 0x11,
++    repl_pw_op   = 0x12,
++    replv_pw_op  = 0x13
++  };
++
++  static  const char* special3_name[];
++
++  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
++  enum gs_lwc2_ops {
++    gslble_op       = 0x10,
++    gslbgt_op       = 0x11,
++    gslhle_op       = 0x12,
++    gslhgt_op       = 0x13,
++    gslwle_op       = 0x14,
++    gslwgt_op       = 0x15,
++    gsldle_op       = 0x16,
++    gsldgt_op       = 0x17,
++    gslwlec1_op     = 0x1c,
++    gslwgtc1_op     = 0x1d,
++    gsldlec1_op     = 0x1e,
++    gsldgtc1_op     = 0x1f,
++    gslq_op         = 0x20
++  };
++
++  static const char* gs_lwc2_name[];
++
++  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
++  enum gs_ldc2_ops {
++    gslbx_op        =  0x0,
++    gslhx_op        =  0x1,
++    gslwx_op        =  0x2,
++    gsldx_op        =  0x3,
++    gslwxc1_op      =  0x6,
++    gsldxc1_op      =  0x7
++  };
++
++  static const char* gs_ldc2_name[];
++
++  // swc2/gs_swc2 family, the opcode is in low 6 bits.
++  enum gs_swc2_ops {
++    gssble_op       = 0x10,
++    gssbgt_op       = 0x11,
++    gsshle_op       = 0x12,
++    gsshgt_op       = 0x13,
++    gsswle_op       = 0x14,
++    gsswgt_op       = 0x15,
++    gssdle_op       = 0x16,
++    gssdgt_op       = 0x17,
++    gsswlec1_op     = 0x1c,
++    gsswgtc1_op     = 0x1d,
++    gssdlec1_op     = 0x1e,
++    gssdgtc1_op     = 0x1f,
++    gssq_op         = 0x20
++  };
++
++  static const char* gs_swc2_name[];
++
++  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
++  enum gs_sdc2_ops {
++    gssbx_op        =  0x0,
++    gsshx_op        =  0x1,
++    gsswx_op        =  0x2,
++    gssdx_op        =  0x3,
++    gsswxc1_op      =  0x6,
++    gssdxc1_op      =  0x7
++  };
++
++  static const char* gs_sdc2_name[];
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int rd(int insn) { return (insn>>11)&0x1f; }
++  static int sa(int insn) { return (insn>>6)&0x1f; }
++  static int special(int insn) { return insn&0x3f; }
++  static int imm_off(int insn) { return (short)low16(insn); }
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++ protected:
++  //help methods for instruction ejection
++
++  // I-Type (Immediate)
++  // 31        26 25        21 20      16 15                              0
++  //|   opcode   |      rs    |    rt    |            immediat             |
++  //|            |            |          |                                 |
++  //      6              5          5                     16
++  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
++
++  // R-Type (Register)
++  // 31         26 25        21 20      16 15      11 10         6 5         0
++  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
++  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
++  //      6              5          5           5          5            6
++  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
++  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
++  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
++
++  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
++  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
++
++  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++  //get the offset field of jump/branch instruction
++  int offset(address entry) {
++    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm16((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
++    }
++    return (entry - pc() - 4) / 4;
++  }
++
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
++  static int split_low(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int merge(int low, int high) {
++    return expand(low, 15) + (high<<16);
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
++    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  static bool fit_in_jal(address target, address pc) {
++    intptr_t mask = 0xfffffffff0000000;
++    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
++  }
++
++  bool fit_int_branch(address entry) {
++    return is_simm16(offset(entry));
++  }
++
++protected:
++#ifdef ASSERT
++    #define CHECK_DELAY
++#endif
++#ifdef CHECK_DELAY
++  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
++#endif
++
++public:
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "next instruction should not be a delay slot");
++#endif
++  }
++
++protected:
++  // Delay slot helpers
++  // cti is called when emitting control-transfer instruction,
++  // BEFORE doing the emitting.
++  // Only effective when assertion-checking is enabled.
++
++  // called when emitting cti with a delay slot, AFTER emitting
++  void has_delay_slot() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "just checking");
++    delay_state = at_delay_slot;
++#endif
++  }
++
++public:
++  Assembler* delayed() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
++    delay_state = filling_delay_slot;
++#endif
++    return this;
++  }
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++
++  void emit_long(int);  // shadows AbstractAssembler::emit_long
++  void emit_data(int);
++  void emit_data(int, RelocationHolder const&);
++  void emit_data(int, relocInfo::relocType rtype);
++  void check_delay();
++
++  //---<  calculate length of instruction  >---
++  // With MIPS being a RISC architecture, this always is BytesPerInstWord
++  // instruction must start at passed address
++  static unsigned int instr_len(unsigned char *instr) { return BytesPerInstWord; }
++
++  //---<  longest instructions  >---
++  static unsigned int instr_maxlen() { return BytesPerInstWord; }
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
++  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
++  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
++
++  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
++  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
++  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
++  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
++  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
++  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
++  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
++  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
++  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
++  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  // two versions of brk:
++  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
++  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
++  // both versions work
++  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
++  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
++
++  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
++  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
++  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
++  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
++  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
++  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
++  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
++  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
++  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
++  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
++  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
++  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
++  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
++  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
++  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
++  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
++
++  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
++  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
++  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
++  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
++  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
++  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
++  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
++  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
++  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
++  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
++  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
++  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
++  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
++  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
++  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
++  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
++
++  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
++  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
++  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
++
++  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
++  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
++
++  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
++  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
++
++  enum bshfl_ops {
++     seb_op = 0x10,
++     seh_op = 0x18
++  };
++  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
++  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
++
++  void ext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
++  }
++
++  void dext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
++  }
++
++  void dextm (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
++     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
++
++     int lsb  = pos;
++     int msbd = size - 1 - 32;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
++  }
++
++  void rotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
++  }
++
++  void drotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
++  }
++
++  void drotr32 (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
++  }
++
++  void rotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
++  }
++
++  void drotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
++  }
++
++  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
++  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
++  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
++  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
++  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
++  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
++  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
++  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
++  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
++  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
++  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
++  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
++  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
++  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
++
++  void b(int off)       { beq(R0, R0, off); }
++  void b(address entry) { b(offset(entry)); }
++  void b(Label& L)      { b(target(L)); }
++
++  void j(address entry);
++  void jal(address entry);
++
++  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(RT9); }
++
++  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
++  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
++
++  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
++  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++
++  void lb (Register rt, Address src);
++  void lbu(Register rt, Address src);
++  void ld (Register rt, Address src);
++  void ldl(Register rt, Address src);
++  void ldr(Register rt, Address src);
++  void lh (Register rt, Address src);
++  void lhu(Register rt, Address src);
++  void ll (Register rt, Address src);
++  void lld(Register rt, Address src);
++  void lw (Register rt, Address src);
++  void lwl(Register rt, Address src);
++  void lwr(Register rt, Address src);
++  void lwu(Register rt, Address src);
++  void lea(Register rt, Address src);
++  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
++
++  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
++  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
++  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
++  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
++
++  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
++  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
++
++  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
++
++  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
++  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
++  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
++  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
++  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
++  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
++  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
++  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
++  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
++
++  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
++  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
++  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
++  void sync ()                                        {
++    if (os::is_ActiveCoresMP())
++      emit_long(0);
++    else
++      emit_long(sync_op);
++  }
++  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
++
++  void sb(Register rt, Address dst);
++  void sc(Register rt, Address dst);
++  void scd(Register rt, Address dst);
++  void sd(Register rt, Address dst);
++  void sdl(Register rt, Address dst);
++  void sdr(Register rt, Address dst);
++  void sh(Register rt, Address dst);
++  void sw(Register rt, Address dst);
++  void swl(Register rt, Address dst);
++  void swr(Register rt, Address dst);
++
++  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
++  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
++  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
++  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
++  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
++  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
++  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
++  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
++  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
++  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
++  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
++  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
++
++  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
++  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void nop()               { emit_long(0); }
++
++
++
++  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void ldc1(FloatRegister ft, Address src);
++  void lwc1(FloatRegister ft, Address src);
++
++  //COP0
++  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
++  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  //COP0 end
++
++
++  //COP1
++  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
++  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
++  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
++  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
++
++  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
++  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
++  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
++  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
++
++  void bc1f (address entry) { bc1f(offset(entry)); }
++  void bc1fl(address entry) { bc1fl(offset(entry)); }
++  void bc1t (address entry) { bc1t(offset(entry)); }
++  void bc1tl(address entry) { bc1tl(offset(entry)); }
++
++  void bc1f (Label& L) { bc1f(target(L)); }
++  void bc1fl(Label& L) { bc1fl(target(L)); }
++  void bc1t (Label& L) { bc1t(target(L)); }
++  void bc1tl(Label& L) { bc1tl(target(L)); }
++
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_SINGLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
++  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
++  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
++  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
++  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
++  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
++  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
++  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
++  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
++  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
++  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
++  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
++  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
++  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
++  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
++  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
++  //null
++  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
++  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
++  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
++  //null
++  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
++  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
++  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
++  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
++  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
++  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
++  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
++  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
++  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_SINGLE
++
++
++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
++#define INSN_DOUBLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
++  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
++  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
++  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
++  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
++  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
++  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
++  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
++  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
++  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
++  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
++  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
++  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
++  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
++  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
++  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
++  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
++  //null
++  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
++  //null
++  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
++  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
++  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
++  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
++  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
++  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
++  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
++  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
++  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_DOUBLE
++
++
++  //null
++  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++
++
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_PS(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
++  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
++  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
++  //null
++  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
++  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
++  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
++  //null
++  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
++  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
++  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
++  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
++  //null
++  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
++  //null
++  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
++  //null
++  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
++  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
++  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
++  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
++  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
++  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
++  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
++  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
++  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
++  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
++  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
++  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
++  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
++  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
++  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
++  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
++  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
++  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
++  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
++  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
++  //null
++#undef INSN_PS
++  //COP1 end
++
++
++  //COP1X
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_COP1X(r0, r1, r2, r3, op)   \
++  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
++  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
++  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
++  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
++  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
++  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
++  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
++  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
++  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
++  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
++  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
++  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
++#undef INSN_COP1X
++  //COP1X end
++
++  //SPECIAL2
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_S2(op)   \
++  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
++
++  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
++  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
++  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
++  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
++  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
++  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
++  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
++
++  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
++  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
++  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
++  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
++  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
++  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
++  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
++  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
++  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
++  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
++  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
++  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
++  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
++  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
++  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
++  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
++  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
++  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
++  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
++  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
++
++#undef INSN_S2
++
++  //SPECIAL3
++/*
++// FIXME
++#define is_0_to_32(a, b) \
++  assert (a >= 0, " just a check"); \
++  assert (a <= 0, " just a check"); \
++  assert (b >= 0, " just a check"); \
++  assert (b <= 0, " just a check"); \
++  assert (a+b >= 0, " just a check"); \
++  assert (a+b <= 0, " just a check");
++  */
++#define is_0_to_32(a, b)
++
++  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
++  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
++  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
++  void dins (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
++  }
++
++  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
++  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
++  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
++  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
++
++  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
++  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
++  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
++  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
++  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
++  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
++
++  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void sdc1(FloatRegister ft, Address dst);
++  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void swc1(FloatRegister ft, Address dst);
++
++
++  static void print_instruction(int);
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  int branch_destination(int inst, int pos);
++
++  // Loongson extension
++
++  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
++  void gslble(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
++  }
++
++  void gslbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
++  }
++
++  void gslhle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
++  }
++
++  void gslhgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
++  }
++
++  void gslwle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
++  }
++
++  void gslwgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
++  }
++
++  void gsldle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
++  }
++
++  void gsldgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
++  }
++
++  void gslwlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
++  }
++
++  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
++  }
++
++  void gsldlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
++  }
++
++  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
++  }
++
++  void gslq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gssble(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
++  }
++
++  void gssbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
++  }
++
++  void gsshle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
++  }
++
++  void gsshgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
++  }
++
++  void gsswle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
++  }
++
++  void gsswgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
++  }
++
++  void gssdle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
++  }
++
++  void gssdgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
++  }
++
++  void gsswlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
++  }
++
++  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
++  }
++
++  void gssdlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
++  }
++
++  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
++  }
++
++  void gssq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  //LDC2 & SDC2
++#define INSN(OPS, OP) \
++    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
++    assert(UseLEXT1, "check UseLEXT1");                                                      \
++    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
++
++#define INSN_LDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_LDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++/*
++ void gslbx(Register rt, Register base, Register index, int off) {
++    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
++    assert(UseLEXT1, "check UseLEXT1");
++    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
++
++  INSN_LDC2(gslbx, gslbx_op)
++  INSN_LDC2(gslhx, gslhx_op)
++  INSN_LDC2(gslwx, gslwx_op)
++  INSN_LDC2(gsldx, gsldx_op)
++  INSN_LDC2_F(gslwxc1, gslwxc1_op)
++  INSN_LDC2_F(gsldxc1, gsldxc1_op)
++
++  INSN_SDC2(gssbx, gssbx_op)
++  INSN_SDC2(gsshx, gsshx_op)
++  INSN_SDC2(gsswx, gsswx_op)
++  INSN_SDC2(gssdx, gssdx_op)
++  INSN_SDC2_F(gsswxc1, gsswxc1_op)
++  INSN_SDC2_F(gssdxc1, gssdxc1_op)
++*/
++  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
++  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
++  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
++  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
++  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
++  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
++
++  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
++  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
++  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
++  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
++  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
++  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
++
++#undef INSN
++#undef INSN_LDC2
++#undef INSN_LDC2_F
++#undef INSN_SDC2
++#undef INSN_SDC2_F
++
++  // cpucfg on Loongson CPUs above 3A4000
++  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
++
++  enum Membar_mask_bits {
++    StoreStore = 1 << 3,
++    LoadStore  = 1 << 2,
++    StoreLoad  = 1 << 1,
++    LoadLoad   = 1 << 0
++  };
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits order_constraint) {
++    sync();
++  }
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+new file mode 100644
+index 00000000000..39aeb5509a7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp
+new file mode 100644
+index 00000000000..4172db219b1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/bytes_mips.hpp
+@@ -0,0 +1,181 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
++#define CPU_MIPS_VM_BYTES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use mipsel, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    if ((intptr_t)p & 3) {
++      u4 res;
++      __asm__ __volatile__ (
++          " .set push\n"
++          " .set mips64\n"
++          " .set noreorder\n"
++
++          "    lwr %[res], 0(%[addr])    \n"
++          "    lwl  %[res], 3(%[addr])    \n"
++
++          " .set pop"
++          :  [res] "=&r" (res)
++          : [addr] "r" (p)
++          : "memory"
++          );
++      return res;
++    } else {
++      return *(u4*)p;
++    }
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    u8 res;
++    u8 temp = 0;
++    //  u4 tp;//tmp register
++    __asm__ __volatile__ (
++        " .set push\n"
++        " .set mips64\n"
++        " .set noreorder\n"
++        " .set noat\n"
++        "    andi $1,%[addr],0x7    \n"
++        "    beqz $1,1f        \n"
++        "    nop        \n"
++        "    ldr %[temp], 0(%[addr])    \n"
++        "    ldl  %[temp], 7(%[addr])  \n"
++        "               b 2f        \n"
++        "    nop        \n"
++        "  1:\t  ld  %[temp],0(%[addr])  \n"
++        "  2:\t   sd  %[temp], %[res]    \n"
++
++        " .set at\n"
++        " .set pop\n"
++        :  [addr]"=r"(p), [temp]"=r" (temp)
++        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
++        : "memory"
++        );
++
++    return res;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)   {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 3 ) {
++    case 0:  *(u4*)p = x;
++        break;
++
++    case 2:  ((u2*)p)[1] = x >> 16;
++       ((u2*)p)[0] = x;
++       break;
++
++    default: ((u1*)p)[3] = x >> 24;
++       ((u1*)p)[2] = x >> 16;
++       ((u1*)p)[1] = x >>  8;
++       ((u1*)p)[0] = x;
++       break;
++    }
++  }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since MIPS64EL CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp
+new file mode 100644
+index 00000000000..3d98ec11f6b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp
+@@ -0,0 +1,614 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "opto/c2_MacroAssembler.hpp"
++#include "opto/intrinsicnode.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "vmreg_mips.inline.hpp"
++
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                                  Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  // Possible cases that we'll encounter in fast_lock
++  // ------------------------------------------------
++  // * Inflated
++  //    -- unlocked
++  //    -- Locked
++  //       = by self
++  //       = by other
++  // * biased
++  //    -- by Self
++  //    -- by other
++  // * neutral
++  // * stack-locked
++  //    -- by self
++  //       = sp-proximity test hits
++  //       = sp-proximity test generates false-negative
++  //    -- by other
++  //
++
++  if (DiagnoseSyncOnValueBasedClasses != 0) {
++    load_klass(tmpReg, objReg);
++    lw(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
++    move(AT, JVM_ACC_IS_VALUE_BASED_CLASS);
++    andr(AT, tmpReg, AT);
++    sltiu(scrReg, AT, 1);
++    beq(scrReg, R0, DONE_SET);
++    delayed()->nop();
++   }
++
++  // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++  // order to reduce the number of conditional branches in the most common cases.
++  // Beware -- there's a subtle invariant that fetch of the markword
++  // at [FETCH], below, will never observe a biased encoding (*101b).
++  // If this invariant is not held we risk exclusion (safety) failure.
++  if (UseBiasedLocking && !UseOptoBiasInlining) {
++    Label succ, fail;
++    biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++    b(fail);
++    delayed()->nop();
++    bind(succ);
++    b(DONE);
++    delayed()->ori(resReg, R0, 1);
++    bind(fail);
++  }
++
++  ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++  andi(AT, tmpReg, markWord::monitor_value);
++  bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
++  delayed()->nop();
++
++  // Attempt stack-locking ...
++  ori(tmpReg, tmpReg, markWord::unlocked_value);
++  sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++  if (PrintBiasedLockingStatistics) {
++    Label SUCC, FAIL;
++    cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++    bind(SUCC);
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++    b(DONE);
++    delayed()->ori(resReg, R0, 1);
++    bind(FAIL);
++  } else {
++    // If cmpxchg is succ, then scrReg = 1
++    cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++  }
++
++  // Recursive locking
++  // The object is stack-locked: markword contains stack pointer to BasicLock.
++  // Locked by current thread if difference with current SP is less than one page.
++  dsubu(tmpReg, tmpReg, SP);
++  li(AT, 7 - os::vm_page_size());
++  andr(tmpReg, tmpReg, AT);
++  sd(tmpReg, Address(boxReg, 0));
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++    bne(tmpReg, R0, L);
++    delayed()->nop();
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++    bind(L);
++  }
++
++  b(DONE);
++  delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++
++  bind(IsInflated);
++  // The object's monitor m is unlocked iff m->owner == NULL,
++  // otherwise m->owner may contain a thread or a stack address.
++
++  // TODO: someday avoid the ST-before-CAS penalty by
++  // relocating (deferring) the following ST.
++  // We should also think about trying a CAS without having
++  // fetched _owner.  If the CAS is successful we may
++  // avoid an RTO->RTS upgrade on the $line.
++  // Without cast to int32_t a movptr will destroy r10 which is typically obj
++  li(AT, (int32_t)intptr_t(markWord::unused_mark().value()));
++  sd(AT, Address(boxReg, 0));
++
++  ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++  // if (m->owner != 0) => AT = 0, goto slow path.
++  bne(AT, R0, DONE_SET);
++  delayed()->ori(scrReg, R0, 0);
++
++#ifndef OPT_THREAD
++  get_thread(TREG);
++#endif
++  // It's inflated and appears unlocked
++  cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ;
++  // Intentional fall-through into DONE ...
++
++  bind(DONE_SET);
++  move(resReg, scrReg);
++
++  // DONE is a hot target - we'd really like to place it at the
++  // start of cache line by padding with NOPs.
++  // See the AMD and Intel software optimization manuals for the
++  // most efficient "long" NOP encodings.
++  // Unfortunately none of our alignment mechanisms suffice.
++  bind(DONE);
++  // At DONE the resReg is set as follows ...
++  // Fast_Unlock uses the same protocol.
++  // resReg == 1 -> Success
++  // resREg == 0 -> Failure - force control through the slow-path
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                    Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  // Critically, the biased locking test must have precedence over
++  // and appear before the (box->dhw == 0) recursive stack-lock test.
++  if (UseBiasedLocking && !UseOptoBiasInlining) {
++    Label succ, fail;
++    biased_locking_exit(objReg, tmpReg, succ);
++    b(fail);
++    delayed()->nop();
++    bind(succ);
++    b(DONE);
++    delayed()->ori(resReg, R0, 1);
++    bind(fail);
++  }
++
++  ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++  beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock
++  delayed()->sltiu(AT, tmpReg, 1);
++
++  ld(tmpReg, Address(objReg, 0)); // Examine the object's markword
++  andi(AT, tmpReg, markWord::monitor_value);
++  beq(AT, R0, Stacked); // Inflated?
++  delayed()->nop();
++
++  bind(Inflated);
++  // It's inflated.
++  // Despite our balanced locking property we still check that m->_owner == Self
++  // as java routines or native JNI code called by this thread might
++  // have released the lock.
++  // Refer to the comments in synchronizer.cpp for how we might encode extra
++  // state in _succ so we can avoid fetching EntryList|cxq.
++  //
++  // I'd like to add more cases in fast_lock() and fast_unlock() --
++  // such as recursive enter and exit -- but we have to be wary of
++  // I$ bloat, T$ effects and BP$ effects.
++  //
++  // If there's no contention try a 1-0 exit.  That is, exit without
++  // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++  // we detect and recover from the race that the 1-0 exit admits.
++  //
++  // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++  // before it STs null into _owner, releasing the lock.  Updates
++  // to data protected by the critical section must be visible before
++  // we drop the lock (and thus before any other thread could acquire
++  // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++  get_thread(TREG);
++#endif
++
++  // It's inflated
++  ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ;
++  xorr(scrReg, scrReg, TREG);
++
++  ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ;
++  orr(scrReg, scrReg, AT);
++
++  bne(scrReg, R0, DONE_SET);
++  delayed()->ori(AT, R0, 0);
++
++  ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++  ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++  orr(scrReg, scrReg, AT);
++
++  bne(scrReg, R0, DONE_SET);
++  delayed()->ori(AT, R0, 0);
++
++  sync();
++  sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++  b(DONE);
++  delayed()->ori(resReg, R0, 1);
++
++  bind(Stacked);
++  ld(tmpReg, Address(boxReg, 0));
++  cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++  bind(DONE_SET);
++  move(resReg, AT);
++
++  bind(DONE);
++}
++
++void C2_MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bc1f(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void C2_MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bc1t(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++// Compare strings, used for char[] and byte[].
++void C2_MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srl(cnt1, cnt1, 1);
++  if (!str2_isL) srl(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  subu(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  slt(AT, cnt2, cnt1);
++  movn(cnt1, cnt2, AT);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  beq(cnt1, R0, done);
++  if (str1_isL) {
++    delayed()->lbu(AT, str1, 0);
++  } else {
++    delayed()->lhu(AT, str1, 0);
++  }
++
++  // compare current character
++  if (str2_isL) {
++    lbu(cnt2, str2, 0);
++  } else {
++    lhu(cnt2, str2, 0);
++  }
++  bne(AT, cnt2, haveResult);
++  delayed()->addiu(str1, str1, str1_isL ? 1 : 2);
++  addiu(str2, str2, str2_isL ? 1 : 2);
++  b(Loop);
++  delayed()->addiu(cnt1, cnt1, -1);   // Loop end
++
++  bind(haveResult);
++  subu(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void C2_MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp, Register result,
++                                   bool is_char) {
++  Label Loop, True, False;
++
++  beq(str1, str2, True);  // same char[] ?
++  delayed()->daddiu(result, R0, 1);
++
++  beq(cnt, R0, True);
++  delayed()->nop(); // count == 0
++
++  bind(Loop);
++
++  // compare current character
++  if (is_char) {
++    lhu(AT, str1, 0);
++    lhu(tmp, str2, 0);
++  } else {
++    lbu(AT, str1, 0);
++    lbu(tmp, str2, 0);
++  }
++  bne(AT, tmp, False);
++  delayed()->addiu(str1, str1, is_char ? 2 : 1);
++  addiu(cnt, cnt, -1);
++  bne(cnt, R0, Loop);
++  delayed()->addiu(str2, str2, is_char ? 2 : 1);
++
++  b(True);
++  delayed()->nop();
++
++  bind(False);
++  daddiu(result, R0, 0);
++
++  bind(True);
++}
++
++void C2_MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      gssbx(reg, base, index, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      gsshx(reg, base, index, disp);
++      break;
++    case STORE_INT:
++      gsswx(reg, base, index, disp);
++      break;
++    case STORE_LONG:
++      gssdx(reg, base, index, disp);
++      break;
++    case LOAD_BYTE:
++      gslbx(reg, base, index, disp);
++      break;
++    case LOAD_SHORT:
++      gslhx(reg, base, index, disp);
++      break;
++    case LOAD_INT:
++      gslwx(reg, base, index, disp);
++      break;
++    case LOAD_LONG:
++      gsldx(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      gsswxc1(reg, base, index, disp);
++      break;
++    case STORE_DOUBLE:
++      gssdxc1(reg, base, index, disp);
++      break;
++    case LOAD_FLOAT:
++      gslwxc1(reg, base, index, disp);
++      break;
++    case LOAD_DOUBLE:
++      gsldxc1(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void C2_MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      sb(reg, base, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      sh(reg, base, disp);
++      break;
++    case STORE_INT:
++      sw(reg, base, disp);
++      break;
++    case STORE_LONG:
++      sd(reg, base, disp);
++      break;
++    case LOAD_BYTE:
++      lb(reg, base, disp);
++      break;
++    case LOAD_U_BYTE:
++      lbu(reg, base, disp);
++      break;
++    case LOAD_SHORT:
++      lh(reg, base, disp);
++      break;
++    case LOAD_U_SHORT:
++      lhu(reg, base, disp);
++      break;
++    case LOAD_INT:
++      lw(reg, base, disp);
++      break;
++    case LOAD_U_INT:
++      lwu(reg, base, disp);
++      break;
++    case LOAD_LONG:
++      ld(reg, base, disp);
++      break;
++    case LOAD_LINKED_LONG:
++      lld(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
++
++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      swc1(reg, base, disp);
++      break;
++    case STORE_DOUBLE:
++      sdc1(reg, base, disp);
++      break;
++    case LOAD_FLOAT:
++      lwc1(reg, base, disp);
++      break;
++    case LOAD_DOUBLE:
++      ldc1(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
+diff --git a/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp
+new file mode 100644
+index 00000000000..7bdf6e52126
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp
+@@ -0,0 +1,162 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP
++
++// C2_MacroAssembler contains high-level macros for C2
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++public:
++
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp, Register result,
++                     bool is_char);
++
++  // Memory Data Type
++  #define INT_TYPE 0x100
++  #define FLOAT_TYPE 0x200
++  #define SIGNED_TYPE 0x10
++  #define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x4
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++private:
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != -1) {
++      if (Assembler::is_simm16(disp)) {
++        if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) {
++          if (scale == 0) {
++            gs_loadstore(reg, as_Register(base), as_Register(index), disp, type);
++          } else {
++            dsll(AT, as_Register(index), scale);
++            gs_loadstore(reg, as_Register(base), AT, disp, type);
++          }
++        } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          loadstore(reg, AT, disp, type);
++        }
++      } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          move(T9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, AT, T9, 0, type);
++          } else {
++            addu(AT, AT, T9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++      } else {
++        if (Assembler::is_simm16(disp)) {
++          loadstore(reg, as_Register(base), disp, type);
++        } else {
++          move(T9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, as_Register(base), T9, 0, type);
++          } else {
++            addu(AT, as_Register(base), T9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void gs_loadstore(Register reg, Register base, Register index, int disp, int type);
++  void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type);
++
++#endif // CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+new file mode 100644
+index 00000000000..f452cebf549
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++// Disable C1 in server JIT
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                31);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  21);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        120*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      57*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         58*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp
+new file mode 100644
+index 00000000000..e6d5815f424
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for mips
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+new file mode 100644
+index 00000000000..3cc191006d4
+--- /dev/null
++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+new file mode 100644
+index 00000000000..81f67a92a78
+--- /dev/null
++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ synci(R0, 0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  __ relocate(relocInfo::runtime_call_type);
++
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++  return align_up(size, 16);
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  verify_mt_safe(callee, entry, method_holder, jump);
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp
+new file mode 100644
+index 00000000000..dcc77adfec1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/copy_mips.hpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
++#define CPU_MIPS_VM_COPY_MIPS_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_MIPS_VM_COPY_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp
+new file mode 100644
+index 00000000000..756ccb68f9c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_mips.hpp"
++
++// Nothing to do on mips
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp
+new file mode 100644
+index 00000000000..11e52b4e8f8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++
++// Nothing to do on MIPS
++
++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp
+new file mode 100644
+index 00000000000..1ca0053b923
+--- /dev/null
++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++  // Returns address of n-th instruction preceding addr,
++  // NULL if no preceding instruction can be found.
++  // With MIPS being a RISC architecture, this always is BytesPerInstWord
++  // It might be beneficial to check "is_readable" as we do on ppc and s390.
++  static address find_prev_instr(address addr, int n_instr) {
++    return addr - BytesPerInstWord*n_instr;
++  }
++
++  // special-case instruction decoding.
++  // There may be cases where the binutils disassembler doesn't do
++  // the perfect job. In those cases, decode_instruction0 may kick in
++  // and do it right.
++  // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
++  static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
++    return here;
++  }
++
++  // platform-specific instruction annotations (like value of loaded constants)
++  static void annotate(address pc, outputStream* st) { };
++
++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/foreign_globals_mips.cpp b/src/hotspot/cpu/mips/foreign_globals_mips.cpp
+new file mode 100644
+index 00000000000..fb4647c2723
+--- /dev/null
++++ b/src/hotspot/cpu/mips/foreign_globals_mips.cpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/foreign_globals.hpp"
++#include "utilities/debug.hpp"
++
++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
++  Unimplemented();
++  return {};
++}
++
++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
++  Unimplemented();
++  return {};
++}
++
++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
++  ShouldNotCallThis();
++  return {};
++}
+diff --git a/src/hotspot/cpu/mips/foreign_globals_mips.hpp b/src/hotspot/cpu/mips/foreign_globals_mips.hpp
+new file mode 100644
+index 00000000000..3c00688168a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/foreign_globals_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP
++
++class BufferLayout {};
++class ABIDescriptor {};
++
++#endif // CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp
+new file mode 100644
+index 00000000000..bf2b87b24cc
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.cpp
+@@ -0,0 +1,665 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markWord.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_mips.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  // sp must be within the usable part of the stack (not in guards)
++  if (!thread->is_in_usable_stack(sp)) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  if (!thread->is_in_stack_range_incl(unextended_sp, sp)) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = thread->is_in_stack_range_excl(fp, sp) &&
++                 thread->is_in_full_stack_checked(fp + (return_addr_offset * sizeof(void*)));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On MIPS the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      return thread->is_in_stack_range_excl(jcw, (address)sender.fp());
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++bool frame::optimized_entry_frame_is_first() const {
++  ShouldNotCallThis();
++  return false;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
++  ShouldNotCallThis();
++  return nullptr;
++}
++
++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
++  ShouldNotCallThis();
++  return {};
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp
+new file mode 100644
+index 00000000000..ac706e71f9a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.hpp
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method                ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ Method                   ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+new file mode 100644
+index 00000000000..c408f01d69a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++inline intptr_t* frame::link() const {
++  return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+new file mode 100644
+index 00000000000..196ff1582a1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+@@ -0,0 +1,373 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lw(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lb(AT, in_progress);
++    }
++
++    __ beq(AT, R0, filtered);
++    __ delayed()->nop();
++
++    __ pushad();                      // push registers
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ popad();
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp) {
++  __ pushad();             // push registers (overkill)
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ popad();
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lw(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lb(AT, in_progress);
++  }
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beq(pre_val, R0, done);
++  __ delayed()->nop();
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld(tmp, index);
++  __ beq(tmp, R0, runtime);
++  __ delayed()->nop();
++
++  __ daddiu(tmp, tmp, -1 * wordSize);
++  __ sd(tmp, index);
++  __ ld(AT, buffer);
++  __ daddu(tmp, tmp, AT);
++
++  // Record the previous value
++  __ sd(pre_val, tmp, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // crosses regions, storing NULL?
++  __ beq(new_val, R0, done);
++  __ delayed()->nop();
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ dsrl(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ daddu(card_addr, card_addr, cardtable);
++
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  __ sync();
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ move(AT, (int)G1CardTable::dirty_card_val());
++  __ sb(AT, card_addr, 0);
++
++  __ lw(AT, queue_index);
++  __ beq(AT, R0, runtime);
++  __ delayed()->nop();
++  __ daddiu(AT, AT, -1 * wordSize);
++  __ sw(AT, queue_index);
++  __ ld(tmp2, buffer);
++  __ ld(AT, queue_index);
++  __ daddu(tmp2, tmp2, AT);
++  __ sd(card_addr, tmp2, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+new file mode 100644
+index 00000000000..ec5c243c3f1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp
+new file mode 100644
+index 00000000000..f0c7badaac7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP
++#define CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP
++
++const size_t G1MergeHeapRootsPrefetchCacheSize = 8;
++
++#endif // CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+new file mode 100644
+index 00000000000..071debdc3a3
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+@@ -0,0 +1,194 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ lwu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ lbu   (dst, src);    break;
++  case T_BYTE:    __ lb    (dst, src);    break;
++  case T_CHAR:    __ lhu   (dst, src);    break;
++  case T_SHORT:   __ lh    (dst, src);    break;
++  case T_INT:     __ lw    (dst, src);    break;
++  case T_LONG:    __ ld    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ lwc1(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ ldc1(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ sw(R0, dst);
++        } else {
++          __ sd(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ sb(val, dst);
++    break;
++  case T_BYTE:
++    __ sb(val, dst);
++    break;
++  case T_SHORT:
++    __ sh(val, dst);
++    break;
++  case T_CHAR:
++    __ sh(val, dst);
++    break;
++  case T_INT:
++    __ sw(val, dst);
++    break;
++  case T_LONG:
++    __ sd(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ swc1(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ sdc1(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  Unimplemented();
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+new file mode 100644
+index 00000000000..bc68de604d2
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1, Register t2,
++                             Label& slow_case);
++  virtual void eden_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1,
++                             Label& slow_case);
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp
+new file mode 100644
+index 00000000000..3d4e69333b0
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "utilities/debug.hpp"
++
++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
++  ShouldNotReachHere();
++}
++
++void BarrierSetNMethod::disarm(nmethod* nm) {
++  ShouldNotReachHere();
++}
++
++bool BarrierSetNMethod::is_armed(nmethod* nm) {
++  ShouldNotReachHere();
++  return false;
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 00000000000..1b2002fd040
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++  __ delayed()->nop();
++
++  __ set64(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ dsubu(end, end, addr); // end --> cards count
++
++  __ daddu(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  if (UseLEXT1) {
++    __ gssbx(R0, addr, count, 0);
++  } else {
++    __ daddu(AT, addr, count);
++    __ sb(R0, AT, 0);
++  }
++  __ daddiu(count, count, -1);
++  __ bgez(count, L_loop);
++  __ delayed()->nop();
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T9;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ addu(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Untested("Untested");
++    __ warn("store_check Untested");
++    Label L_already_dirty;
++    __ membar(Assembler::StoreLoad);
++    __ lb(AT, tmp, 0);
++    __ addiu(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ delayed()->nop();
++    __ sb(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    __ sb(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 00000000000..49c2a0ea80e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 00000000000..765259e6266
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 00000000000..5320a4c0add
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+new file mode 100644
+index 00000000000..2b50d15ffd7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++// Size of MIPS Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define PREFERRED_METASPACE_ALIGNMENT
++
++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
++
++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp
+new file mode 100644
+index 00000000000..2d88d370c94
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globals_mips.hpp
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64);
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++// MIPS generates 3x instructions than X86
++define_pd_global(intx, InlineSmallCode,          4000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop,                                                 \
++                   product,                                                 \
++                   notproduct,                                              \
++                   range,                                                   \
++                   constraint)                                              \
++                                                                            \
++  product(bool, UseLEXT1, false,                                            \
++                "Use LoongISA general EXTensions 1")                        \
++                                                                            \
++  product(bool, UseLEXT2, false,                                            \
++                "Use LoongISA general EXTensions 2")                        \
++                                                                            \
++  product(bool, UseLEXT3, false,                                            \
++                "Use LoongISA general EXTensions 3")                        \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  develop(bool, UseBoundCheckInstruction, false,                            \
++                "Use bound check instruction")                              \
++                                                                            \
++  product(intx, SetFSFOFN, 999,                                             \
++          "Set the FS/FO/FN bits in FCSR"                                   \
++          "999 means FS/FO/FN will not be changed"                          \
++          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
++                                                                            \
++  /* assembler */                                                           \
++  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++// end of ARCH_FLAGS
++
++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+new file mode 100644
+index 00000000000..604e951a9bf
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+@@ -0,0 +1,96 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_set48(T1, (long)cached_value);
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp
+new file mode 100644
+index 00000000000..848964b63f6
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ jr_hb(RA);
++  __ delayed()->ori(V0, RA2, 0);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp
+new file mode 100644
+index 00000000000..f90dee6eef7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
++#define CPU_MIPS_VM_ICACHE_MIPS_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 2 * BytesPerInstWord,  // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++};
++
++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+new file mode 100644
+index 00000000000..1b9aa653fd4
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+@@ -0,0 +1,266 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void load_resolved_method_at_index(int byte_no,
++                                     Register method,
++                                     Register cache,
++                                     Register index);
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+new file mode 100644
+index 00000000000..732325fdbd4
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+@@ -0,0 +1,2140 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_mips.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markWord.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of InterpreterMacroAssembler
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  // The runtime address of BCP may be unaligned.
++  // Refer to the SPARC implementation.
++  lbu(reg, BCP, offset+1);
++  lbu(tmp, BCP, offset);
++  dsll(reg, reg, 8);
++  daddu(reg, tmp, reg);
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  assert(reg != tmp, "need separate temp register");
++  if (offset & 3) { // Offset unaligned?
++    lbu(reg, BCP, offset+3);
++    lbu(tmp, BCP, offset+2);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset+1);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++  } else {
++    lwu(reg, BCP, offset);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    delayed()->nop();
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      lw(V0, val_addr);
++      break;
++    case ftos:
++      lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  move(AT, (int)ilgl);
++  sw(AT, tos_addr);
++  sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T9;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    move(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++    delayed()->nop();
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lbu(AT, BCP, bcp_offset);
++  lbu(reg, BCP, bcp_offset + 1);
++  ins(reg, AT, 8, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    sll(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    lbu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, cache, AT);
++  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    sync(); // load acquire
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  dsrl(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  daddu(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  delayed()->nop();
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  delayed()->nop();
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld(result, result, ConstantPool::cache_offset_in_bytes());
++  ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  daddu(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  dsll(AT, index, Address::times_ptr);
++  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
++    gslhx(index, cpool, AT, sizeof(ConstantPool));
++  } else {
++    daddu(AT, cpool, AT);
++    lh(index, AT, sizeof(ConstantPool));
++  }
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, resolved_klasses, AT);
++  ld(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
++                                                              Register method,
++                                                              Register cache,
++                                                              Register index) {
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == TemplateTable::f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++
++  ld(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method*
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T9 and T1 are used as temporary registers.
++  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T9); // blows T9
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  lwc1(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  ldc1(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sd(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  swc1(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sdc1(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    delayed()->nop();
++    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    delayed()->nop();
++    bind(run_compiled_code);
++  }
++
++  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++  delayed()->nop();
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  mips64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing mips64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    dsubu(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    daddiu(T2, T2, -min_frame_size);
++    bgez(T2, L);
++    delayed()->nop();
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++  dsll(T2, Rnext, LogBytesPerWord);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(T3, thread, in_bytes(JavaThread::polling_word_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++    delayed()->nop();
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)
++    ) {
++     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
++     int table_offset = ((int)state - (int)itos) * table_size;
++
++     // GP points to the starting address of Interpreter::dispatch_table(itos).
++     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
++     if(table_offset != 0) {
++        daddiu(T3, GP, table_offset);
++        if (UseLEXT1) {
++          gsldx(T3, T2, T3, 0);
++        } else {
++          daddu(T3, T2, T3);
++          ld(T3, T3, 0);
++        }
++     } else {
++        if (UseLEXT1) {
++          gsldx(T3, T2, GP, 0);
++        } else {
++          daddu(T3, T2, GP);
++          ld(T3, T3, 0);
++        }
++     }
++  } else {
++     li(T3, (long)table);
++     if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++  }
++  jr(T3);
++  delayed()->nop();
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++    jr(T3);
++    delayed()->nop();
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode (load before advancing r13 to prevent AGI)
++  lbu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  lw(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++  delayed()->nop();
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  delayed()->nop();
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  delayed()->nop();
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think mips do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++    delayed()->nop();
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++    delayed()->nop();
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++      delayed()->nop();
++    }
++
++    bind(loop);
++    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++    delayed()->nop();
++
++    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++    delayed()->nop();  // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    dsubu(AT, TSR, AT);
++    blez(AT, no_reserved_zone_enabling);
++    delayed()->nop();
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld(scr_reg, lock_reg, obj_offset);
++
++    if (DiagnoseSyncOnValueBasedClasses != 0) {
++      load_klass(tmp_reg, scr_reg);
++      lw(tmp_reg, Address(tmp_reg, Klass::access_flags_offset()));
++      move(AT, JVM_ACC_IS_VALUE_BASED_CLASS);
++      andr(AT, AT, tmp_reg);
++      bne(AT, R0, slow_case);
++      delayed()->nop();
++    }
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      delayed()->nop();
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++
++    dsubu(tmp_reg, tmp_reg, SP);
++    move(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    sd(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bne(tmp_reg, R0, slow_case);
++      delayed()->nop();
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beq(tmp_reg, R0, done);
++    delayed()->nop();
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg(%T1)
++    ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beq(hdr_reg, R0, done);
++    delayed()->nop();
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++  delayed()->nop();
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++  delayed()->nop();
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld(T0, T0, in_bytes(Method::method_data_offset()));
++  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
++  daddu(V0, T0, V0);
++  bind(set_mdp);
++  sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = V0;
++  Register mdp = V1;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld(AT, method, in_bytes(Method::const_offset()));
++  daddu(tmp, tmp, AT);
++  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  delayed()->nop();
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld(AT, data);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    sd(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld(AT, data);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    sd(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  lw(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm16(header_bits)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    move(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  sw(AT, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++    delayed()->nop();
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++    delayed()->nop();
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, mdp_in, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  daddu(AT, reg, mdp_in);
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, AT, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm16(constant)) {
++    daddiu(mdp_in, mdp_in, constant);
++  } else {
++    move(AT, constant);
++    daddu(mdp_in, mdp_in, AT);
++  }
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    daddiu(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    daddu(bumped_count, bumped_count, AT);
++    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bne(receiver, R0, not_null);
++      delayed()->nop();
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      beq(R0, R0, skip_receiver_profile);
++      delayed()->nop();
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    delayed()->nop();
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          delayed()->nop();
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          delayed()->nop();
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++          delayed()->nop();
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++      delayed()->nop();
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  move(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++    delayed()->nop();
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      beq(R0, R0, profile_continue);
++      delayed()->nop();
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    move(reg2, in_bytes(MultiBranchData::per_case_size()));
++    if (UseLEXT1) {
++      gsdmult(index, index, reg2);
++    } else {
++      dmult(index, reg2);
++      mflo(index);
++    }
++    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  // Get method->_constMethod->_result_type
++  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld(T9, T9, in_bytes(Method::const_offset()));
++  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addiu(AT, T9, -T_INT);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // mask integer result to narrower return type.
++  addiu(AT, T9, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  delayed()->nop();
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notBool);
++  addiu(AT, T9, -T_BYTE);
++  bne(AT, R0, notByte);
++  delayed()->nop();
++  seb(result, result);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notByte);
++  addiu(AT, T9, -T_CHAR);
++  bne(AT, R0, notChar);
++  delayed()->nop();
++  andi(result, result, 0xFFFF);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notChar);
++  seh(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    dsll(T0, mdo_addr.index(), mdo_addr.scale());
++    daddu(T0, T0, mdo_addr.base());
++  }
++
++  bne(obj, R0, update);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bne(AT, R0, next);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  daddiu(AT, AT, -(TypeEntries::null_seen));
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    sd(obj, mdo_addr);
++  } else {
++    sd(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++    delayed()->nop();
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm16(off_to_args)) {
++        daddiu(mdp, mdp, off_to_args);
++      } else {
++        move(AT, off_to_args);
++        daddu(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
++            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            subu32(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          slt(AT, tmp, AT);
++          bne(AT, R0, done);
++          delayed()->nop();
++        }
++        ld(tmp, callee, in_bytes(Method::const_offset()));
++
++        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        subu(tmp, tmp, AT);
++
++        addiu32(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm16(to_add)) {
++          daddiu(mdp, mdp, to_add);
++        } else {
++          move(AT, to_add);
++          daddu(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
++          addiu32(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          move(AT, tmp_arg_counts);
++          subu32(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
++        daddu(mdp, mdp, tmp);
++      }
++      sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lb(tmp, _bcp_register, 0);
++      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beq(AT, R0, do_profile);
++      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beq(AT, R0, do_profile);
++      delayed()->nop();
++
++      get_method(tmp);
++      lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
++      bne(tmp, AT, profile_continue);
++      delayed()->nop();
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    daddu(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    bltz(tmp1, profile_continue);
++    delayed()->nop();
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    daddu(mdp, mdp, tmp1);
++    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    dsll(AT, tmp1, per_arg_scale);
++    daddu(AT, AT, mdp);
++    ld(tmp2, AT, off_base);
++
++    subu(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    dsll(AT, tmp2, Interpreter::logStackElementSize);
++    daddu(AT, AT, _locals_register);
++    ld(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    bgtz(tmp1, loop);
++    delayed()->nop();
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    delayed()->nop();
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    delayed()->nop();
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    lw(scratch, counter_addr);
++  }
++  addiu32(scratch, scratch, increment);
++  sw(scratch, counter_addr);
++
++  lw(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++    delayed()->nop();
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+new file mode 100644
+index 00000000000..054138ea42b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++
++  void move(int from_offset, int to_offset);
++
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+new file mode 100644
+index 00000000000..3e8ae9fb5a5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+@@ -0,0 +1,260 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ sd(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ movz(temp(), R0, AT);
++  __ sw(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Argument jni_arg(jni_offset());
++
++  // the handle for a receiver will never be null
++  bool do_NULL_check = offset() != 0 || is_static();
++  if (do_NULL_check) {
++    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
++  } else {
++    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  }
++
++  if (!jni_arg.is_Register())
++    __ sd(temp(), jni_arg.as_caller_address());
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
++    _fp_identifiers = to - 1;
++    *(int*) _fp_identifiers = 0;
++    _num_args = jni_offset();
++  }
++};
++
++
++JRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* current,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(current, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++JRT_END
+diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+new file mode 100644
+index 00000000000..f5f3735e7cb
+--- /dev/null
++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable() { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+new file mode 100644
+index 00000000000..547414f7ef3
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+@@ -0,0 +1,203 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing membar for LoadLoad barrier, we create address dependency
++// between loads, which is more efficient than membar.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++  Label slow;
++
++  const Register env = A0;
++  const Register obj = A1;
++  const Register fid = A2;
++  const Register tmp1 = AT;
++  const Register tmp2 = T9;
++  const Register obj_addr = T0;
++  const Register field_val = V0;
++  const Register field_addr = T0;
++  const Register counter_addr = T2;
++  const Register counter_prev_val = T1;
++
++  __ li(counter_addr, SafepointSynchronize::safepoint_counter_addr());
++  __ lw(counter_prev_val, counter_addr, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(tmp1, counter_prev_val, 1);
++  __ bne(tmp1, R0, slow);
++  __ delayed()->nop();
++
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the fast path.
++    __ li(tmp1, JvmtiExport::get_field_access_count_addr());
++    // address dependency
++    __ xorr(tmp1, tmp1, counter_prev_val);
++    __ xorr(tmp1, tmp1, counter_prev_val);
++    __ lw(tmp1, tmp1, 0);
++    __ bne(tmp1, R0, slow);
++    __ delayed()->nop();
++  }
++
++  __ move(obj_addr, obj);
++  // Both obj_addr and tmp2 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, env, obj_addr, tmp2, slow);
++
++  __ dsrl(tmp1, fid, 2); // offset
++  __ daddu(field_addr, obj_addr, tmp1);
++  // address dependency
++  __ xorr(field_addr, field_addr, counter_prev_val);
++  __ xorr(field_addr, field_addr, counter_prev_val);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ lbu (field_val, field_addr, 0); break;
++    case T_BYTE:    __ lb  (field_val, field_addr, 0); break;
++    case T_CHAR:    __ lhu (field_val, field_addr, 0); break;
++    case T_SHORT:   __ lh  (field_val, field_addr, 0); break;
++    case T_INT:     __ lw  (field_val, field_addr, 0); break;
++    case T_LONG:    __ ld  (field_val, field_addr, 0); break;
++    case T_FLOAT:   __ lwu (field_val, field_addr, 0); break;
++    case T_DOUBLE:  __ ld  (field_val, field_addr, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  // address dependency
++  __ xorr(counter_addr, counter_addr, field_val);
++  __ xorr(counter_addr, counter_addr, field_val);
++  __ lw(tmp1, counter_addr, 0);
++  __ bne(counter_prev_val, tmp1, slow);
++  __ delayed()->nop();
++
++  switch (type) {
++    case T_FLOAT:   __ mtc1 (field_val, F0); break;
++    case T_DOUBLE:  __ dmtc1(field_val, F0); break;
++    default:                                 break;
++  }
++
++  __ jr(RA);
++  __ delayed()->nop();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++  __ delayed()->nop();
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+new file mode 100644
+index 00000000000..08ac565aa53
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) =  (intptr_t)from_handle.raw_value(); }
++  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) =  (intptr_t)from_handle; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+new file mode 100644
+index 00000000000..1256e957970
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+@@ -0,0 +1,3686 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ sw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ lw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) {
++  jint& stub_inst = *(jint*) branch;
++  jint *pc = (jint *)branch;
++
++  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
++    //b_far:
++    //  move(AT, RA); // daddu
++    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    //  nop();
++    //  lui(T9, 0); // to be patched
++    //  ori(T9, 0);
++    //  daddu(T9, T9, RA);
++    //  move(RA, AT);
++    //  jr(T9);
++
++    assert(opcode(pc[3]) == lui_op
++        && opcode(pc[4]) == ori_op
++        && special(pc[5]) == daddu_op, "Not a branch label patch");
++    if(!(opcode(pc[3]) == lui_op
++          && opcode(pc[4]) == ori_op
++          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
++
++    int offset = target - branch;
++    if (!is_simm16(offset)) {
++      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
++      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
++    } else {
++      // revert to "beq + nop"
++      CodeBuffer cb(branch, 4 * 10);
++      MacroAssembler masm(&cb);
++#define __ masm.
++      __ b(target);
++      __ delayed()->nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++    }
++    return;
++  } else if (special(pc[4]) == jr_op
++             && opcode(pc[4]) == special_op
++             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
++    //jmp_far:
++    //  patchable_set48(T9, target);
++    //  jr(T9);
++    //  nop();
++
++    CodeBuffer cb(branch, 4 * 4);
++    MacroAssembler masm(&cb);
++    masm.patchable_set48(T9, (long)(target));
++    return;
++  }
++
++#ifndef PRODUCT
++  if (!is_simm16((target - branch - 4) >> 2)) {
++    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
++    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
++    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
++    tty->print_cr("======= End of decoding =======");
++  }
++#endif
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++static inline address first_cache_address() {
++  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
++}
++
++static inline address last_cache_address() {
++  return CodeCache::high_bound() - Assembler::InstructionSize;
++}
++
++int MacroAssembler::call_size(address target, bool far, bool patchable) {
++  if (patchable) return 6 << Assembler::LogInstructionSize;
++  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
++  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
++}
++
++// Can we reach target using jal/j from anywhere
++// in the code cache (because code can be relocated)?
++bool MacroAssembler::reachable_from_cache(address target) {
++  address cl = first_cache_address();
++  address ch = last_cache_address();
++
++  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
++}
++
++bool MacroAssembler::reachable_from_cache() {
++  if (ForceUnreachable) {
++    return false;
++  } else {
++    address cl = first_cache_address();
++    address ch = last_cache_address();
++
++    return fit_in_jal(cl, ch);
++  }
++}
++
++void MacroAssembler::general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    j(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    //j(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    j(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_jump(address target) {
++  return 6;
++}
++
++void MacroAssembler::general_call(address target) {
++  if (reachable_from_cache(target)) {
++    jal(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_call(address target) {
++  if (reachable_from_cache(target)) {
++    //jal(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jalr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_call(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_call(address target) {
++  return 6;
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  address target = entry.target();
++  if (!reachable_from_cache()) {
++    address stub = emit_trampoline_stub(offset(), target);
++    if (stub == NULL) {
++      return NULL; // CodeCache is full
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++
++  if (reachable_from_cache()) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    // load the call target from the trampoline stub
++    // branch
++    long dest = (long)pc();
++    dest += (dest & 0x8000) << 1;
++    lui(T9, dest >> 32);
++    ori(T9, T9, split_low(dest >> 16));
++    dsll(T9, T9, 16);
++    ld(T9, T9, simm16(split_low(dest)));
++    jalr(T9);
++    delayed()->nop();
++  }
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Max stub size: alignment nop, TrampolineStub.
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  emit_int64((int64_t)dest);
++  end_a_stub();
++  return stub;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::beq(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  //Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::bne(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    volatile address dest = target(L);
++//
++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
++//   0x00000055651ed514: daddu at, ra, zero
++//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
++//
++//   0x00000055651ed51c: sll zero, zero, 0
++//   0x00000055651ed520: lui t9, 0x0
++//   0x00000055651ed524: ori t9, t9, 0x21b8
++//   0x00000055651ed528: daddu t9, t9, ra
++//   0x00000055651ed52c: daddu ra, at, zero
++//   0x00000055651ed530: jr t9
++//   0x00000055651ed534: sll zero, zero, 0
++//
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    lui(T9, 0); // to be patched
++    ori(T9, T9, 0);
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    b(offset(entry));
++  } else {
++    // address must be bounded
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    li32(T9, entry - pc());
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  addu_long(AT, base, offset);
++  ld_ptr(rt, AT, 0);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  guarantee(AT != rt, "AT must not equal rt");
++  addu_long(AT, base, offset);
++  st_ptr(rt, AT, 0);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++
++  li(tmp_reg1, counter_addr);
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(tmp_reg2, tmp_reg1, 0);
++  addiu(tmp_reg2, tmp_reg2, inc);
++  sc(tmp_reg2, tmp_reg1, 0);
++  beq(tmp_reg2, R0, again);
++  delayed()->nop();
++}
++
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {
++  assert_different_registers(tmp, size, AT);
++  move(tmp, SP);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  Label loop;
++  move(AT, os::vm_page_size());
++  bind(loop);
++  subu(tmp, tmp, AT);
++  subu(size, size, AT);
++  sd(size, tmp, 0);
++  bgtz(size, loop);
++  delayed()->nop();
++
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down to and including i=StackShadowPages.
++  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    subu(tmp, tmp, AT);
++    sd(size, tmp, 0);
++  }
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  dsubu(AT, SP, AT);
++  bltz(AT, no_reserved_zone_enabling);
++  delayed()->nop();
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  delayed()->nop();
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++void MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T9;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  if (!swap_reg_contains_mark) {
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markWord::biased_lock_mask_in_place);
++  daddiu(AT, R0, markWord::biased_lock_pattern);
++  dsubu(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++  delayed()->nop();
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on MIPS we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  move(AT, ~((int) markWord::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  delayed()->nop();
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  move(AT, markWord::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  delayed()->nop();
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  move(AT, markWord::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  delayed()->nop();
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  move(AT, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++  b(done);
++  delayed()->nop();
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++
++  b(done);
++  delayed()->nop();
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place);
++  daddiu(AT, R0, markWord::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++  delayed()->nop();
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  assert(number_of_arguments <= 4, "just check");
++  assert(StackAlignmentInBytes == 16, "must be");
++  move(AT, SP);
++  dins(SP, R0, 0, 4);
++  daddiu(SP, SP, -(StackAlignmentInBytes));
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->sd(AT, SP, 0);
++  ld(SP, SP, 0);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  patchable_set48(T9, (long)entry);
++  jr(T9);
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_set48(T9, (long)entry);
++      jr(T9);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    address entry = target(L);
++    assert(entry != NULL, "jmp most probably wrong");
++    InstructionMark im(this);
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)entry);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)pc());
++  }
++
++  jr(T9);
++  delayed()->nop();
++}
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(AT, (long)obj);
++  sd(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++// c/c++ code assume T9 is entry point, so we just always move entry to t9
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_set48(T9, (long)entry);
++  jalr(T9);
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rtype);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh)
++{
++  switch (rh.type()) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rh);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++        BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  move(S2, SP);     // use S2 as a sender SP holder
++  assert(StackAlignmentInBytes == 16, "must be");
++  dins(SP, R0, 0, 4); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm16(imm)) {
++    daddiu(reg, reg, imm);
++  } else {
++    move(AT, imm);
++    daddu(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++
++  address before_call_pc;
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  before_call_pc = (address)pc();
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    delayed()->nop();
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)before_call_pc);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    delayed()->nop();
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  assert(StackAlignmentInBytes == 16, "must be");
++  dins(SP, R0, 0, 4);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    lw(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++//
++//  In MIPS64, we don't use full 64-bit address space.
++//  Only a small range is actually used.
++//
++//  Example:
++//  $  cat /proc/13352/maps
++//  120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
++//  555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
++//  555d598000-555d59c000 rw-p 00000000 00:00 0
++//  ......
++//  558b1f8000-558b23c000 rwxp 00000000 00:00 0
++//  558b23c000-558b248000 ---p 00000000 00:00 0
++//  558b248000-558b28c000 rwxp 00000000 00:00 0
++//  ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
++//  ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
++//
++//  All stacks are positioned at 0x55________.
++//  Therefore, we can utilize the same algorithm used in 32-bit.
++  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++  // Thread* thread = _sp_map[index];
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  addu(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  assert(StackAlignmentInBytes == 16, "must be");
++  dins(SP, R0, 0, 4);
++  call(CAST_FROM_FN_PTR(address, Thread::current));
++  //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
++  delayed()->nop();
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  ld(AT, thread_reg, in_bytes(JavaThread::polling_word_offset()));
++  andi(AT, AT, SafepointMechanism::poll_bit());
++  bne(AT, R0, slow_path);
++  delayed()->nop();
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  ld(AT, thread_reg, in_bytes(JavaThread::polling_word_offset()));
++  sync();
++  andi(AT, AT, SafepointMechanism::poll_bit());
++  bne(AT, R0, slow_path);
++  delayed()->nop();
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    addu(AT, AT, var_size_in_bytes);
++  } else {
++    addiu(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, long imm) {
++  if (imm <= max_jint && imm >= min_jint) {
++    li32(rd, (int)imm);
++  } else if (julong(imm) <= 0xFFFFFFFF) {
++    assert_not_delayed();
++    // lui sign-extends, so we can't use that.
++    ori(rd, R0, julong(imm) >> 16);
++    dsll(rd, rd, 16);
++    ori(rd, rd, split_low(imm));
++  } else if ((imm > 0) && is_simm16(imm >> 32)) {
++    // A 48-bit address
++    li48(rd, imm);
++  } else {
++    li64(rd, imm);
++  }
++}
++
++void MacroAssembler::li32(Register reg, int imm) {
++  if (is_simm16(imm)) {
++    addiu(reg, R0, imm);
++  } else {
++    lui(reg, split_low(imm >> 16));
++    if (split_low(imm))
++      ori(reg, reg, split_low(imm));
++  }
++}
++
++void MacroAssembler::set64(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++    } else {
++      lui(d, split_low(value >> 16));
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++  } else {  // li64
++    // 6 insts
++    li64(d, value);
++  }
++}
++
++
++int MacroAssembler::insts_for_set64(jlong value) {
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      //daddiu(d, R0, value);
++      count++;
++    } else {
++      //lui(d, split_low(value >> 16));
++      count++;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      //dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    //li48(d, value);
++    count += 4;
++  } else {  // li64
++    // 6 insts
++    //li64(d, value);
++    count += 6;
++  }
++
++  return count;
++}
++
++void MacroAssembler::patchable_set48(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++    count += 4;
++  } else {  // li64
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_set32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_call32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)CompressedKlassPointers::encode(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, narrowKlass);
++}
++
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::li64(Register rd, long imm) {
++  assert_not_delayed();
++  lui(rd, split_low(imm >> 48));
++  ori(rd, rd, split_low(imm >> 32));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++  lui(rd, imm >> 32);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  li(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  popad();
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  addiu(SP, SP, - 7 * wordSize);
++  st_ptr(T0, SP, 6 * wordSize);
++  st_ptr(T1, SP, 5 * wordSize);
++  st_ptr(RA, SP, 4 * wordSize);
++  st_ptr(A0, SP, 3 * wordSize);
++  st_ptr(A1, SP, 2 * wordSize);
++  st_ptr(AT, SP, 1 * wordSize);
++  st_ptr(T9, SP, 0);
++
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 7 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++  li(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  lw(AT, T0, 0);
++  daddiu(AT, AT, 1);
++  sw(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++  delayed()->nop();
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++  delayed()->nop();
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  delayed()->nop();
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++  delayed()->nop();
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  popad();
++  jr(RA);
++  delayed()->nop();
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    sltu(AT, t2, AT);
++    beq(AT, R0, next);
++    delayed()->nop();
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    sltu(AT, AT, t2);
++    beq(AT, R0, ok);
++    delayed()->nop();
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++  return RegisterOrConstant(value + offset);
++  Unimplemented();
++  //AddressLiteral a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  //ld(tmp, a);
++  if (offset != 0)
++    daddiu(tmp,tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  sra(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  dsrl(AT, reg, 8);
++  dsll(reg, reg, 24);
++  dsrl(reg, reg, 16);
++  orr(reg, reg, AT);
++  andi(reg, reg, 0xffff);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srl(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  sll(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier,
++                             bool weak, bool exchange) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  assert(addr.base() != resflag, "addr.base() != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  lld(resflag, addr);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++  move(resflag, newval);
++  scd(resflag, addr);
++  if (weak) {
++    b(succ);
++  } else {
++    beq(resflag, R0, again);
++  }
++  delayed()->nop();
++  if (exchange) {
++    move(resflag, oldval);
++  }
++  b(succ);
++  delayed()->nop();
++
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  if (!exchange) {
++    move(resflag, R0);
++  }
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  lld(tmp, addr);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  scd(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier,
++                               bool weak, bool exchange) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  assert(addr.base() != resflag, "addr.base() != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll(resflag, addr);
++  if (!sign)
++    dinsu(resflag, R0, 32, 32);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++  move(resflag, newval);
++  sc(resflag, addr);
++  if (weak) {
++    b(succ);
++  } else {
++    beq(resflag, R0, again);
++  }
++  delayed()->nop();
++  if (exchange) {
++    move(resflag, oldval);
++  }
++  b(succ);
++  delayed()->nop();
++
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  if (!exchange) {
++    move(resflag, R0);
++  }
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll(tmp, addr);
++  if (!sign)
++    dinsu(tmp, R0, 32, 32);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  sc(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
++  Label done, again, nequal;
++
++  Register x_reg = x_regLo;
++  dsll32(x_regHi, x_regHi, 0);
++  dsll32(x_regLo, x_regLo, 0);
++  dsrl32(x_regLo, x_regLo, 0);
++  orr(x_reg, x_regLo, x_regHi);
++
++  Register c_reg = c_regLo;
++  dsll32(c_regHi, c_regHi, 0);
++  dsll32(c_regLo, c_regLo, 0);
++  dsrl32(c_regLo, c_regLo, 0);
++  orr(c_reg, c_regLo, c_regHi);
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  //move(AT, x_reg);
++  daddu(AT, x_reg, R0);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  //move(c_reg, AT);
++  //move(AT, R0);
++  daddu(c_reg, AT, R0);
++  daddu(AT, R0, R0);
++  bind(done);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_s(tmp, fs, ft);
++  trunc_l_s(tmp, tmp);
++  cvt_s_l(tmp, tmp);
++  mul_s(tmp, tmp, ft);
++  sub_s(fd, fs, tmp);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_d(tmp, fs, ft);
++  trunc_l_d(tmp, tmp);
++  cvt_d_l(tmp, tmp);
++  mul_d(tmp, tmp, ft);
++  sub_d(fd, fs, tmp);
++}
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++
++//In MIPS64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  daddiu(SP, SP, -16);
++  sd(reg1, SP, 8);
++  sd(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld(reg1, SP, 8);
++  ld(reg2, SP, 0);
++  daddiu(SP, SP, 16);
++}
++
++void MacroAssembler::load_method_holder(Register holder, Register method) {
++  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
++  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
++  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else
++  ld(dst, src, oopDesc::klass_offset_in_bytes());
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    sw(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    sd(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  movz(r, S5_heapbase, r);
++  dsubu(r, r, S5_heapbase);
++  if (CompressedOops::shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      movz(dst, S5_heapbase, dst);
++      dsubu(dst, dst, S5_heapbase);
++      if (CompressedOops::shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++    } else {
++      dsubu(dst, src, S5_heapbase);
++      if (CompressedOops::shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++      movz(dst, R0, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (CompressedOops::base() != NULL) {
++    dsubu(r, r, S5_heapbase);
++  }
++  if (CompressedOops::shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++
++  if (CompressedOops::base() != NULL) {
++    dsubu(dst, src, S5_heapbase);
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shr(dst, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    move(AT, r);
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    daddu(r, r, S5_heapbase);
++    movz(r, R0, AT);
++  }
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      move(AT, dst);
++      if (CompressedOops::shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++        shl(dst, LogMinObjAlignmentInBytes);
++      }
++      daddu(dst, dst, S5_heapbase);
++      movz(dst, R0, AT);
++    } else {
++      if (CompressedOops::shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++        dsll(dst, src, LogMinObjAlignmentInBytes);
++        daddu(dst, dst, S5_heapbase);
++      } else {
++        daddu(dst, src, S5_heapbase);
++      }
++      movz(dst, R0, src);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    shl(r, LogMinObjAlignmentInBytes);
++    if (CompressedOops::base() != NULL) {
++      daddu(r, r, S5_heapbase);
++    }
++  } else {
++    assert (CompressedOops::base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      daddu(dst, dst, S5_heapbase);
++    } else {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      if (CompressedOops::base() != NULL) {
++        daddu(dst, dst, S5_heapbase);
++      }
++    }
++  } else {
++    assert (CompressedOops::base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (CompressedKlassPointers::base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    set64(AT, (int64_t)CompressedKlassPointers::base());
++    dsubu(r, r, AT);
++  }
++  if (CompressedKlassPointers::shift() != 0) {
++    assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (CompressedKlassPointers::base() != NULL) {
++      set64(dst, (int64_t)CompressedKlassPointers::base());
++      dsubu(dst, src, dst);
++      if (CompressedKlassPointers::shift() != 0) {
++        assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (CompressedKlassPointers::shift() != 0) {
++        assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++        dsrl(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (CompressedKlassPointers::base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (CompressedKlassPointers::shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (CompressedKlassPointers::shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedKlassPointers::shift() != 0) {
++    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (CompressedKlassPointers::base() != NULL) {
++    set64(AT, (int64_t)CompressedKlassPointers::base());
++    daddu(r, r, AT);
++    //Not neccessary for MIPS at all.
++    //reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)CompressedKlassPointers::base());
++    if (CompressedKlassPointers::shift() != 0) {
++      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      dsll(AT, src, Address::times_8);
++      daddu(dst, dst, AT);
++    } else {
++      daddu(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     addu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { decrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  addu32(reg, reg, AT);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     subu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { incrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  subu32(reg, reg, AT);
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (CompressedOops::base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        set64(S5_heapbase, (int64_t)CompressedOops::ptrs_base());
++      }
++    } else {
++      set64(S5_heapbase, (intptr_t)CompressedOops::ptrs_base_addr());
++      ld(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  delayed()->nop();
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    delayed()->nop();
++    addiu(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++      delayed()->nop();
++    } else {
++      bne_far(AT, R0, *L_failure);
++      delayed()->nop();
++      b(*L_slow_path);
++      delayed()->nop();
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  }
++
++  bind(L_fallthrough);
++
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  // OpenJDK8 never compresses klass pointers in secondary-super array.
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  delayed()->nop();
++  ld(AT, temp_reg, 0);
++  beq(AT, super_klass, subtype);
++  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
++  b(Loop);
++  delayed()->daddiu(temp2_reg, temp2_reg, -1);
++
++  bind(subtype);
++  sd(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++    delayed()->nop();
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
++  Register rthread = TREG;
++#ifndef OPT_THREAD
++  get_thread(rthread);
++#endif
++
++  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
++  assert_different_registers(klass, rthread, scratch);
++
++  Label L_fallthrough;
++  if (L_fast_path == NULL) {
++    L_fast_path = &L_fallthrough;
++  } else if (L_slow_path == NULL) {
++    L_slow_path = &L_fallthrough;
++  }
++
++  // Fast path check: class is fully initialized
++  lb(scratch, Address(klass, InstanceKlass::init_state_offset()));
++  daddiu(scratch, scratch, -InstanceKlass::fully_initialized);
++  beq(scratch, R0, *L_fast_path);
++  delayed()->nop();
++
++  // Fast path check: current thread is initializer thread
++  ld(scratch, Address(klass, InstanceKlass::init_thread_offset()));
++  if (L_slow_path == &L_fallthrough) {
++    beq(rthread, scratch, *L_fast_path);
++    delayed()->nop();
++    bind(*L_slow_path);
++  } else if (L_fast_path == &L_fallthrough) {
++    bne(rthread, scratch, *L_slow_path);
++    delayed()->nop();
++    bind(*L_fast_path);
++  } else {
++    Unimplemented();
++  }
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  dsll(scale_reg, scale_reg, scale_factor);
++  daddu(scale_reg, SP, scale_reg);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
++  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  sd(src, dst); break;
++  case  4:  sw(src, dst); break;
++  case  2:  sh(src, dst); break;
++  case  1:  sb(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  dsll(scan_temp, scan_temp, times_vte_scale);
++  daddu(scan_temp, recv_klass, scan_temp);
++  daddiu(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      set64(AT, (int)itable_index.is_constant());
++      dsll(AT, AT, (int)Address::times_ptr);
++    } else {
++      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
++    }
++    daddu(AT, AT, recv_klass);
++    daddiu(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++      delayed()->nop();
++    } else {
++      bne(intf_klass, method_result, search);
++      delayed()->nop();
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    delayed()->nop();
++    daddiu(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    if (UseLEXT1) {
++      gsldx(method_result, recv_klass, scan_temp, 0);
++    } else {
++      daddu(AT, recv_klass, scan_temp);
++      ld(method_result, AT, 0);
++    }
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = GP;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    set64(AT, vtable_index.as_constant());
++    dsll(AT, AT, (int)Address::times_ptr);
++  } else {
++    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  set64(tmp, base + vtableEntry::method_offset_in_bytes());
++  daddu(tmp, tmp, AT);
++  daddu(tmp, tmp, recv_klass);
++  ld(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        sw(src_reg, tmp_reg, disp);
++      } else {
++        st_ptr(src_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_INT:
++      sw(src_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++    case T_SHORT:
++      sh(src_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      sb(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++}
++
++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      sdc1(src_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      swc1(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++}
++
++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      ld_ptr(dst_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_INT:
++      lw(dst_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++      lhu(dst_reg, tmp_reg, disp);
++      break;
++    case T_SHORT:
++      lh(dst_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      lb(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      ldc1(dst_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      lwc1(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  move(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  delayed()->nop();
++  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  delayed()->nop();
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  delayed()->nop();
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      subu(AT, op1, op2);
++      movz(dst, src, AT);
++      break;
++
++    case NE:
++      subu(AT, op1, op2);
++      movn(dst, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movz(dst, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movz(dst, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case NE:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GT:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GE:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case LT:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case LE:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case NE:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GT:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GE:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case LT:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case LE:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  Label L;
++
++  switch(cmp) {
++    case EQ:
++      bne(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case NE:
++      beq(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+new file mode 100644
+index 00000000000..daec23fcf9c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+@@ -0,0 +1,704 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target, const char* file, int line);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  void incrementl(Register reg, int value = 1);
++  void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  void load_method_holder(Register holder, Register method);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++
++
++
++  // Sign extension
++  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
++  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++  void clinit_barrier(Register klass,
++                      Register scratch,
++                      Label* L_fast_path = NULL,
++                      Label* L_slow_path = NULL);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void print_reg(Register reg);
++  void print_reg(FloatRegister reg);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      sw(RA0, SP, -offset);
++    } else {
++      li(AT, offset);
++      dsubu(AT, SP, AT);
++      sw(RA0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  void biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++
++  // Arithmetics
++  // Regular vs. d* versions
++  inline void addu_long(Register rd, Register rs, Register rt) {
++    daddu(rd, rs, rt);
++  }
++  inline void addu_long(Register rd, Register rs, long imm32_64) {
++    daddiu(rd, rs, imm32_64);
++  }
++
++  void round_to(Register reg, int modulus) {
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++
++  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
++  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
++  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // always long jumps
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  void patchable_call(address target);
++  void general_call(address target);
++
++  void patchable_jump(address target);
++  void general_jump(address target);
++
++  static int insts_for_patchable_call(address target);
++  static int insts_for_general_call(address target);
++
++  static int insts_for_patchable_jump(address target);
++  static int insts_for_general_jump(address target);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    sd(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    sd(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  // implement the x86 instruction semantic
++  // if c_reg == *dest then *dest <= x_reg
++  // else c_reg <= *dest
++  // the AT indicate if xchg occurred, 1 for xchged, else  0
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier, bool weak = false, bool exchange = false);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = nullptr);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier, bool weak = false, bool exchange = false);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = nullptr);
++  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
++
++  //pop & push
++  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
++  void neg(Register reg) { dsubu(reg, R0, reg); }
++  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
++  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  ()                  { daddiu(SP, SP, 8); }
++  void pop2 ()                  { daddiu(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  //move an 32-bit immediate to Register
++  void move(Register reg, int imm32)  { li32(reg, imm32); }
++  void li  (Register rd, long imm);
++  void li  (Register rd, address addr) { li(rd, (long)addr); }
++  //replace move(Register reg, int imm)
++  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
++  void set64(Register d, jlong value);
++  static int  insts_for_set64(jlong value);
++
++  void patchable_set48(Register d, jlong value);
++  void patchable_set32(Register d, jlong value);
++
++  void patchable_call32(Register d, jlong value);
++
++  static int call_size(address target, bool far, bool patchable);
++
++  static bool reachable_from_cache(address target);
++  static bool reachable_from_cache();
++
++
++  void dli(Register rd, long imm) { li(rd, imm); }
++  void li64(Register rd, long imm);
++  void li48(Register rd, long imm);
++
++  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
++  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
++  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
++  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
++  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch) {
++    jint stub_inst = *(jint*) branch;
++    print_instruction(stub_inst);
++    ::tty->print("%s", " (unresolved)");
++
++  }
++#endif
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++
++#undef VIRTUAL
++
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->lb(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++    _masm->delayed()->nop();
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+new file mode 100644
+index 00000000000..92c05fb726a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/matcher_mips.hpp b/src/hotspot/cpu/mips/matcher_mips.hpp
+new file mode 100644
+index 00000000000..94f58720ffe
+--- /dev/null
++++ b/src/hotspot/cpu/mips/matcher_mips.hpp
+@@ -0,0 +1,145 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_MATCHER_MIPS_HPP
++#define CPU_MIPS_MATCHER_MIPS_HPP
++
++  // Defined within class Matcher
++
++  // false => size gets scaled to BytesPerLong, ok.
++  static const bool init_array_count_is_in_bytes = false;
++
++  // Whether this platform implements the scalable vector feature
++  static const bool implements_scalable_vector = false;
++
++  static const bool supports_scalable_vector() {
++    return false;
++  }
++
++  // MIPS doesn't support misaligned vectors store/load? FIXME
++  static constexpr bool misaligned_vectors_ok() {
++    return false;
++  }
++
++  // Whether code generation need accurate ConvI2L types.
++  static const bool convi2l_type_required = true;
++
++  // Does the CPU require late expand (see block.cpp for description of late expand)?
++  static const bool require_postalloc_expand = false;
++
++  // Do we need to mask the count passed to shift instructions or does
++  // the cpu only look at the lower 5/6 bits anyway?
++  static const bool need_masked_shift_count = false;
++
++  // No support for generic vector operands.
++  static const bool supports_generic_vector_operands = false;
++
++  static constexpr bool isSimpleConstant64(jlong value) {
++    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++    // Probably always true, even if a temp register is required.
++    return true;
++  }
++
++  // No additional cost for CMOVL.
++  static constexpr int long_cmove_cost() { return 0; }
++
++  // No CMOVF/CMOVD with SSE2
++  static int float_cmove_cost() { return ConditionalMoveLimit; }
++
++  static bool narrow_oop_use_complex_address() {
++    assert(UseCompressedOops, "only for compressed oops code");
++    return false;
++  }
++
++  static bool narrow_klass_use_complex_address() {
++    assert(UseCompressedClassPointers, "only for compressed klass code");
++    return false;
++  }
++
++  static bool const_oop_prefer_decode() {
++    // Prefer ConN+DecodeN over ConP.
++    return true;
++  }
++
++  static bool const_klass_prefer_decode() {
++    // TODO: Either support matching DecodeNKlass (heap-based) in operand
++    //       or condisider the following:
++    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++    //return CompressedKlassPointers::base() == NULL;
++    return true;
++  }
++
++  // Is it better to copy float constants, or load them directly from memory?
++  // Intel can load a float constant from a direct address, requiring no
++  // extra registers.  Most RISCs will have to materialize an address into a
++  // register first, so they would do better to copy the constant from stack.
++  static const bool rematerialize_float_constants = false;
++
++  // If CPU can load and store mis-aligned doubles directly then no fixup is
++  // needed.  Else we split the double into 2 integer pieces and move it
++  // piece-by-piece.  Only happens when passing doubles into C code as the
++  // Java calling convention forces doubles to be aligned.
++  static const bool misaligned_doubles_ok = false;
++
++  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
++  static const bool strict_fp_requires_explicit_rounding = false;
++
++  // Are floats converted to double when stored to stack during
++  // deoptimization?
++  static constexpr bool float_in_double() { return false; }
++
++  // Do ints take an entire long register or just half?
++  static const bool int_in_long = true;
++
++  // Does the CPU supports vector variable shift instructions?
++  static constexpr bool supports_vector_variable_shifts(void) {
++    return false; // not supported
++  }
++
++  // Does the CPU supports vector variable rotate instructions?
++  static constexpr bool supports_vector_variable_rotates(void) {
++    return false; // not supported
++  }
++
++  // Does the CPU supports vector unsigned comparison instructions?
++  static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
++    return false;
++  }
++
++  // Some microarchitectures have mask registers used on vectors
++  static const bool has_predicated_vectors(void) {
++    return false;
++  }
++
++  // true means we have fast l2f convers
++  // false means that conversion is done by runtime call
++  static constexpr bool convL2FSupported(void) {
++      return true;
++  }
++
++  // Implements a variant of EncodeISOArrayNode that encode ASCII only
++  static const bool supports_encode_ascii_array = false;
++
++#endif // CPU_MIPS_MATCHER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+new file mode 100644
+index 00000000000..c4279705062
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+@@ -0,0 +1,597 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "classfile/vmClasses.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj_reg, vmClassID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset())));
++  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ move(AT, ref_kind);
++  __ beq(temp, AT, L);
++  __ delayed()->nop();
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++  __ delayed()->nop();
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ delayed()->nop();
++    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T9);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(T9, method, in_bytes(entry_offset));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld(AT, recv_addr);
++    __ beq(recv, AT, L);
++    __ delayed()->nop();
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // No need in interpreter entry for linkToNative for now.
++  // Interpreter calls compiled entry through i2c.
++  if (iid == vmIntrinsics::_linkToNative) {
++    __ stop("Should not reach here");           // empty stubs make SG sick
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T9: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t9_argp   = T9;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm16(vmIntrinsics::as_int(iid)), "Oops, iid is not simm16! Change the instructions.");
++    __ addiu(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t9_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t9_argp,
++                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t9_first_arg_addr = __ argument_address(t9_argp, -1);
++  } else {
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(s7_mh, t9_first_arg_addr);
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  // t9_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(r_recv = T2, t9_first_arg_addr);
++    }
++    DEBUG_ONLY(t9_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T9;
++  Register temp3 = V0;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    if (iid == vmIntrinsics::_linkToNative) {
++      assert(for_compiler_entry, "only compiler entry is supported");
++    }
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ slt(AT, R0, temp2_index);
++        __ bne(AT, R0, L_index_ok);
++        __ delayed()->nop();
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ slt(AT, rm_index, R0);
++        __ beq(AT, R0, L);
++        __ delayed()->nop();
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      // may not be needed by safer and unexpensive here
++      PreserveExceptionMark pem(Thread::current());
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+new file mode 100644
+index 00000000000..a95f8e40596
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, vmClassID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return I29;
++  }
+diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad
+new file mode 100644
+index 00000000000..3563bbe0e59
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad
+new file mode 100644
+index 00000000000..882878f739a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips_64.ad
+@@ -0,0 +1,12317 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
++  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
++  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
++  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
++  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
++  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
++  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
++  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
++  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
++
++// Special Registers
++  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
++  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
++  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
++  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T1, T1_H, // inline_cache_reg
++                     V1, V1_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     GP, GP_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T9 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++reg_class v0_reg( V0 );
++reg_class v1_reg( V1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( V0, V0_H );
++reg_class v1_long_reg( V1, V1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F31, F31_H);
++
++reg_class flt_arg0( F12 );
++reg_class dbl_arg0( F12, F12_H );
++reg_class dbl_arg1( F14, F14_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++class Node::PD {
++public:
++  enum NodeFlags {
++    _last_flag = Node::_last_flag
++  };
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++
++#define __ _masm.
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++void PhaseOutput::pd_perform_mach_node_analysis() {
++}
++
++int MachNode::pd_alignment_required() const {
++  return 1;
++}
++
++int MachNode::compute_padding(int current_offset) const {
++  return 0;
++}
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstructionMIPS64)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstructionMIPS64)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
++  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen))
++    return false;
++
++  return true;
++}
++
++// Vector calling convention not yet implemented.
++const bool Matcher::supports_vector_calling_convention(void) {
++  return false;
++}
++
++OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
++  Unimplemented();
++  return OptoRegPair(0, 0);
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  int offs = offset - br_size + 4;
++  // To be conservative on MIPS
++  // branch node should be end with:
++  //   branch inst
++  //   delay slot
++  const int safety_zone = 3 * BytesPerInstWord;
++  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
++}
++
++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return NULL;
++}
++
++bool Matcher::is_generic_reg2reg_move(MachNode* m) {
++  ShouldNotReachHere();  // generic vector operands not supported
++  return false;
++}
++
++bool Matcher::is_generic_vector(MachOper* opnd)  {
++  ShouldNotReachHere();  // generic vector operands not supported
++  return false;
++}
++
++const RegMask* Matcher::predicate_reg_mask(void) {
++  return NULL;
++}
++
++const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
++  return NULL;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (MaxVectorSize == 0)
++    return 0;
++  assert(MaxVectorSize == 8, "");
++  return 8;
++}
++
++const int Matcher::scalable_vector_reg_size(const BasicType bt) {
++  return -1;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Should the matcher clone input 'm' of node 'n'?
++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
++  return false;
++}
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F12_num || reg == F12_H_num
++       || reg == F13_num || reg == F13_H_num
++       || reg == F14_num || reg == F14_H_num
++       || reg == F15_num || reg == F15_H_num
++       || reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  //lui
++  //ori
++  //nop
++  //nop
++  //jalr
++  //nop
++  return 24;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  //lui IC_Klass,
++  //ori IC_Klass,
++  //dsll IC_Klass
++  //ori IC_Klass
++
++  //lui T9
++  //ori T9
++  //nop
++  //nop
++  //jalr T9
++  //nop
++  return 4 * 4 + 4 * 6;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ld(AT, Address(SP, src_offset));
++          __ sd(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "sd    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ lw(AT, Address(SP, src_offset));
++          __ sw(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lw    AT, [SP + #%d] spill 2\n\t"
++                    "sw    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else
++            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++          } else {
++            st->print("\n\t");
++            if (this->ideal_reg() == Op_RegI)
++              st->print("lw    %s, [SP + #%d]\t# spill 4",
++                        Matcher::regName[dst_first],
++                        offset);
++            else
++              st->print("lwu    %s, [SP + #%d]\t# spill 5",
++                        Matcher::regName[dst_first],
++                        offset);
++#endif
++          }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ldc1  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lwc1   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sd    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sw    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmtc1   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mtc1   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sdc1   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("swc1   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmfc1   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mfc1   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          C2_MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->output()->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  if (UseLEXT1) {
++    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
++  } else {
++    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++    st->print("\t");
++    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    st->print_cr("ld    AT, poll_offset[thread] #polling_page_address\n\t"
++                 "lw    AT, [AT]\t"
++                 "# Safepoint: poll for GC");
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
++  int framesize = C->output()->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int");
++
++  int off = framesize - wordSize * 2;
++  if (UseLEXT1 && Assembler::is_simm(off, 9)) {
++    __ gslq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ ld(RA, SP, framesize - wordSize );
++    __ ld(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(SP, SP, framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    __ ld(AT, thread, in_bytes(JavaThread::polling_page_offset()));
++    __ relocate(relocInfo::poll_return_type);
++    __ lw(AT, AT, 0);
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  return 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  __ addiu(as_Register(reg), SP, offset);
++}
++
++
++//static int sizeof_FFree_Float_Stack_All = -1;
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  //lui
++  //ori
++  //dsll
++  //ori
++  //jalr
++  //nop
++  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  return NativeCall::instruction_size;
++}
++
++int MachCallNativeNode::ret_addr_offset() {
++  Unimplemented();
++  return -1;
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T9, T0)");
++  st->print_cr("\tbeq(T9, iCache, L)");
++  st->print_cr("\tnop");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("\tnop");
++  st->print_cr("\tnop");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T9, receiver);
++  __ beq(T9, iCache, L);
++  __ delayed()->nop();
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  ConstantTable& constant_table = C->output()->constant_table();
++  C2_MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = __ code()->consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    // Materialize the constant table base.
++    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
++    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
++    __ relocate(relocInfo::internal_word_type);
++    __ patchable_set48(Rtoc, (long)baseaddr);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // patchable_set48 (4 insts)
++  return 4 * 4;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->output()->frame_size_in_bytes();
++  int bangsize = C->output()->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->output()->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  if (UseLEXT1) {
++    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  } else {
++    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  }
++  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
++  st->print("daddiu   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
++
++  int framesize = C->output()->frame_size_in_bytes();
++  int bangsize = C->output()->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int");
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (C->clinit_barrier_on_entry()) {
++    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
++
++    Label L_skip_barrier;
++
++    __ mov_metadata(T9, C->method()->holder()->constant_encoding());
++    __ clinit_barrier(T9, AT, &L_skip_barrier);
++    __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ bind(L_skip_barrier);
++  }
++
++  if (C->output()->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ daddiu(SP, SP, -framesize);
++  int off = framesize - wordSize * 2;
++  if (UseLEXT1 && Assembler::is_simm(off, 9)) {
++    __ gssq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ sd(RA, SP, framesize - wordSize);
++    __ sd(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(FP, SP, framesize - wordSize * 2);
++
++  C->output()->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    ConstantTable& constant_table = C->output()->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    C2_MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    C2_MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    C2_MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T9;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    C2_MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++    __ delayed()->nop();
++
++    __ bind(miss);
++    __ move(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------MIPS FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++  // TODO: should not match immI8 here LEE
++  match(immI8);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() %{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constants for increment
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M32767_32768() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_32767() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_65535() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 8-bit
++operand immL8() %{
++  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32() %{
++  predicate(n->get_long() == (int)(n->get_long()));
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M1() %{
++  predicate(n->get_long() == -1L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M32767_32768() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_65535() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT9RegI() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T9" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mV0RegI() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand mV1RegI() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V1" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegN() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegN() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++/*
++operand mSPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(sp_reg));
++  match(reg);
++
++  format %{ "SP"  %}
++  interface(REG_INTER);
++%}
++
++operand mFPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(fp_reg));
++  match(reg);
++
++  format %{ "FP"  %}
++  interface(REG_INTER);
++%}
++*/
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8(mRegP reg, immL8 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++
++// [base + index + offset]
++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base index) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index + offset]
++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base (ConvI2L index)) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index<<scale + offset]
++operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  op_cost(10);
++  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
++
++  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//FIXME: I think it's better to limit the immI to be 16-bit at most!
++// Indirect Memory Plus Long Offset Operand
++operand indOffset32(mRegP reg, immL32 off) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(20);
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);   /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indirectNarrowKlass(mRegN reg)
++%{
++  predicate(CompressedKlassPointers::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeNKlass reg);
++
++  format %{ "[$reg] @ indirectNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indOffset8NarrowKlass(mRegN reg, immL8 off)
++%{
++  predicate(CompressedKlassPointers::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset32NarrowKlass(mRegN reg, immL32 off)
++%{
++  predicate(CompressedKlassPointers::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
++%{
++  predicate(UseLEXT1);
++  predicate(CompressedKlassPointers::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeNKlass reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
++%{
++  predicate(CompressedKlassPointers::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (DecodeNKlass reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8Narrow(mRegN reg, immL8 off)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
++%{
++  predicate((CompressedOops::shift() == 0) && UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
++
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    branch_has_delay_slot;      // branch have delay slot in gs2
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "sd    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ move(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL_set64(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL_set64" %}
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL16(mRegL dst, immL16 src) %{
++  match(Set dst src);
++  ins_cost(105);
++  format %{ "mov    $dst, $src #@loadConL16" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    int      value   = $src$$constant;
++    __ daddiu(dst_reg, R0, value);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
++  match(Set dst src);
++  ins_cost(100);
++  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "sd    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ set64(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_ptr) %{
++  match(TailCall jump_target method_ptr);
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    ptr = $method_ptr$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, ptr);
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in MIPS");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_mips.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T9 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T9
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, R0, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, R0);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, R0);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, R0, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x05: // less
++        __ beq_long(R0, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++    __ delayed()->nop();
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bne($cr$$Register, R0, L);
++        else
++          __ bne($cr$$Register, R0, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beq($cr$$Register, R0, L);
++        else
++          __ beq($cr$$Register, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++                 __ beq(AT, R0, L);
++        else
++                 __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1, op2, L);
++      else
++        __ beq(op1, op2, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1, op2, L);
++      else
++        __ bne(op1, op2, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++                __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(AT, R0, L);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //greater
++        if(&L)
++               __ bgtz(op1, L);
++        else
++               __ bgtz(op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if(&L)
++               __ bgez(op1, L);
++        else
++               __ bgez(op1, (int)0);
++        break;
++      case 0x05: //less
++        if(&L)
++                __ bltz(op1, L);
++        else
++                __ bltz(op1, (int)0);
++        break;
++      case 0x06: //less_equal
++        if(&L)
++               __ blez(op1, L);
++        else
++               __ blez(op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //above
++        if(&L)
++          __ bne(R0, op1, L);
++        else
++          __ bne(R0, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if(&L)
++          __ beq(R0, R0, L);
++        else
++          __ beq(R0, R0, (int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if(&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x03: //greater
++        if(&target)
++           __ bgtz(opr1_reg, target);
++        else
++           __ bgtz(opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if(&target)
++           __ bgez(opr1_reg, target);
++        else
++           __ bgez(opr1_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, R0);
++        if(&target)
++           __ bne(AT, R0, target);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ blez(opr1_reg, target);
++        else
++           __ blez(opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        if(&target)
++           __ beq(R0, R0, target);
++        else
++           __ beq(R0, R0, (int)0);
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(16);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ sync();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++  match(StoreStoreFence);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ mfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ mtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ dmfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ dmtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_s(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_s(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_d(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_d(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(num, R0, done);
++    __ delayed()->daddu(AT, base, R0);
++
++    __ move(T9, num);  /* T9 = words */
++
++    __ bind(Loop);
++    __ sd(R0, AT, 0);
++    __ daddiu(T9, T9, -1);
++    __ bne(T9, R0, Loop);
++    __ delayed()->daddiu(AT, AT, wordSize);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $temp$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    if(Assembler::is_simm16(imm)) {
++       __ addiu32(dst, src1, imm);
++    } else {
++       __ move(AT, imm);
++       __ addu32(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
++  match(Set dst (AddP src1 (ConvI2L src2)));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm(mRegP dst, mRegP src1,  immL16 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddi   $dst, $src1, $src2 #@addP_reg_imm" %}
++  ins_encode %{
++    Register  src1 = $src1$$Register;
++    long      src2 = $src2$$constant;
++    Register  dst  = $dst$$Register;
++
++    __ daddiu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
++%{
++  match(Set dst (AddL (ConvI2L src1) src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (AddL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AddL src1 (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ sra(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu32(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegL src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dsra32(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ subu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addiu32(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu32(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ daddiu(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (SubL src1 (ConvI2L src2)));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (SubL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    //if (UseLEXT1) {
++    if (0) {
++      // Experiments show that gsmod is slower that div+mfhi.
++      // So I just disable it here.
++      __ gsmod(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++      __ mfhi(dst);
++    }
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmod(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mfhi(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
++  match(Set dst (AddI (MulI src1 src2) src3));
++
++  ins_cost(999);
++  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register src3 = $src3$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mtlo(src3);
++     __ madd(src1, src2);
++     __ mflo(dst);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    // In MIPS, div does not cause exception.
++    //   We must trap an exception manually.
++    __ teq(R0, src2, 0x7);
++
++    if (UseLEXT1) {
++      __ gsdiv(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++
++      __ nop();
++      __ nop();
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (MulL src1 (ConvI2L src2)));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsddiv(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "madd_s  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "madd_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "msub_s $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "msub_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "nmadds  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "nmaddd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "nmsubs  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "nmsubd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ move(AT, val);
++    __ andr(dst, src, AT);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ dext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
++  match(Set dst (XorL src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 1, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 3, 4);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sll(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seh(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seb(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sllv(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long
++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftL (ConvI2L src) shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsllv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long
++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = ($shift$$constant & 0x3f);
++    if (__  is_simm(shamt, 5))
++      __ dsra(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsra(dst_reg, src_reg, sa);
++      } else {
++        __ dsra32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ dsra32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrav(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrlv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dext(dst_reg, src_reg, shamt, 31);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotr     $dst, $src, 1 ...\n\t"
++            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotr(dst, src, 1);
++    if (rshift - 1) {
++      __ srl(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 8-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srl(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, pos, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srlv(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ sra(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srav(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ sll(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convL2I_reg( mRegI dst, mRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(src, dst);
++    __ cvt_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_fast( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label Done;
++
++    __ trunc_l_d(F30, src);
++    // max_long:    0x7fffffffffffffff
++    // __ set64(AT, 0x7fffffffffffffff);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->daddiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label L;
++
++    __ c_un_d(src, src);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2I_reg_fast( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_w_s(F30, fval);
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++
++instruct convF2I_reg_slow( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dreg, R0);
++
++    __ trunc_w_s(F30, fval);
++
++    /* Call SharedRuntime:f2i() to do valid convention */
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dreg, F30);
++
++    __ mov_s(F12, fval);
++
++    //This bug was found when running ezDS's control-panel.
++    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
++    //
++    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
++    // V0 is corrupted during call_VM_leaf(), and should be preserved.
++    //
++    __ push(fval);
++    if(dreg != V0) {
++      __ push(V0);
++    }
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++    if(dreg != V0) {
++      __ move(dreg, V0);
++      __ pop(V0);
++    }
++    __ pop(fval);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_l_s(F30, fval);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ dsll32(T9, T9, 0);
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_s(F30, fval);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_s(F12, fval);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ dmtc1(src, dst);
++    __ cvt_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(src, dst);
++    __ cvt_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sra(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ subu(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ mtc1(src, dst);
++    __ cvt_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++
++    Label Done;
++
++    __ trunc_w_d(F30, src);
++    // max_int: 2147483647
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->addiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu32(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    Label L;
++
++    __ trunc_w_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ move(dst, V0);
++    __ bind(L);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++instruct castLL(mRegL dst)
++%{
++  match(Set dst (CastLL dst));
++
++  size(0);
++  format %{ "# castLL of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(empty);
++%}
++
++instruct castFF(regF dst) %{
++  match(Set dst (CastFF dst));
++  size(0);
++  format %{ "# castFF of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++instruct castDD(regD dst) %{
++  match(Set dst (CastDD dst));
++  size(0);
++  format %{ "# castDD of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++instruct castVVD(vecD dst) %{
++  match(Set dst (CastVV dst));
++  size(0);
++  format %{ "# castVV of $dst" %}
++  ins_encode(/*empty*/);
++  ins_pipe(empty);
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++   __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++/*
++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
++instruct jumpXtnd(mRegL switch_val) %{
++  match(Jump switch_val);
++
++  ins_cost(350);
++
++  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
++             "jr     T9\n\t"
++             "nop" %}
++  ins_encode %{
++    Register table_base = $constanttablebase;
++    int      con_offset = $constantoffset;
++    Register switch_reg = $switch_val$$Register;
++
++    if (UseLEXT1) {
++       if (Assembler::is_simm(con_offset, 8)) {
++         __ gsldx(T9, table_base, switch_reg, con_offset);
++       } else if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ gsldx(T9, AT, T9, 0);
++       }
++    } else {
++       if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ daddu(AT, T9, AT);
++         __ ld(T9, AT, 0);
++       }
++    }
++
++    __ jr(T9);
++    __ delayed()->nop();
++
++  %}
++  ins_pipe(pipe_jump);
++%}
++*/
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_mips.cpp] generate_forward_exception()
++    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAllocNTA( memory mem ) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ lwc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ ldc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
++    __ lw($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
++    __ sw($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
++    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
++    __ swc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
++    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
++    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "MEMBAR\n\t"
++            "sb   $mem, zero\t! CMS card-mark imm0" %}
++  ins_encode %{
++    __ sync();
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should use stop, but stop emits too many insts
++    __ brk(18);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
++%{
++  predicate(CompressedOops::shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ daddiu(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++    int       disp  = $mem$$disp;
++
++    if (scale == 0) {
++      __ daddu(AT, base, index);
++      __ daddiu(dst, AT, disp);
++    } else {
++      __ dsll(AT, index, scale);
++      __ daddu(AT, base, AT);
++      __ daddiu(dst, AT, disp);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++
++    if (scale == 0) {
++       __ daddu(dst, base, index);
++    } else {
++       __ dsll(AT, index, scale);
++       __ daddu(dst, base, AT);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != -1) {
++      __ stop("in storePConditional: index != -1");
++    } else {
++      __ move(AT, newval);
++      __ scd(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != -1) {
++      __ stop("in storeIConditional: index != -1");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++%}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != -1) {
++      __ stop("in storeIConditional: index != -1");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "lld    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndExchangeI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{
++
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeI"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeL(mRegL res, indirect mem, mRegL oldval, mRegL newval) %{
++
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeL"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeP(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeP"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeN(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{
++
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  effect(TEMP_DEF res);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeN"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{
++
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapI"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapL(mRegI res, indirect mem, mRegL oldval, mRegL newval) %{
++
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @WeakCompareAndSwapL"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapP(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapN(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{
++
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{
++    "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapN"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address addr(as_Register($mem$$base));
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++////
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for min
++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVlt $op2,$op1\t! min" %}
++//  opcode(0x4C,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++//// Min Register with Register (P6 version)
++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MinI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_lt(op2,op1,cr);
++//  %}
++//%}
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for max
++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVgt $op2,$op1\t! max" %}
++//  opcode(0x4F,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++// // Max Register with Register (P6 version)
++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MaxI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_gt(op2,op1,cr);
++//  %}
++//%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ movn(dst, R0, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ dinsu(dst, src2, 32, 32);
++    } else if (src2 == dst) {
++       __ dsll32(dst, dst, 0);
++       __ dins(dst, src1, 0, 32);
++    } else {
++       __ dext(dst, src1, 0, 32);
++       __ dinsu(dst, src2, 32, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "lw AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ lw(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ dclz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    // ctz and dctz is gs instructions.
++    __ ctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ dctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// Load vectors (8 bytes long)
++instruct loadV8(vecD dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 8);
++  match(Set dst (LoadVector mem));
++  ins_cost(125);
++  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store vectors (8 bytes long)
++instruct storeV8(memory mem, vecD src) %{
++  predicate(n->as_StoreVector()->memory_size() == 8);
++  match(Set mem (StoreVector mem src));
++  ins_cost(145);
++  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct Repl8B_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB src));
++  ins_cost(100);
++  format %{ "replv_ob    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ replv_ob(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB src));
++  ins_cost(140);
++  format %{ "move       AT,  $src\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateB con));
++  ins_cost(110);
++  format %{ "repl_ob    AT, [$con]\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ repl_ob(AT, val);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB con));
++  ins_cost(150);
++  format %{ "move      AT, [$con]\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB zero));
++  ins_cost(90);
++  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB M1));
++  ins_cost(80);
++  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS src));
++  ins_cost(100);
++  format %{ "replv_qh    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ replv_qh(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS src));
++  ins_cost(120);
++  format %{ "move    AT,     $src  \n\t"
++            "dins    AT, AT, 16, 16\n\t"
++            "dinsu   AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS con));
++  ins_cost(100);
++  format %{ "repl_qh    AT, [$con]\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    if ( Assembler::is_simm(val, 10)) {
++      //repl_qh supports 10 bits immediate
++      __ repl_qh(AT, val);
++    } else {
++      __ li32(AT, val);
++      __ replv_qh(AT, AT);
++    }
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS con));
++  ins_cost(110);
++  format %{ "move    AT,   [$con]\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS zero));
++  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS M1));
++  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar to be vector
++instruct Repl2I(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI src));
++  format %{ "dins    AT, $src, 0, 32\n\t"
++            "dinsu   AT, $src, 32, 32\n\t"
++            "dmtc1   AT, $dst\t! replicate2I" %}
++  ins_encode %{
++    __ dins(AT, $src$$Register, 0, 32);
++    __ dinsu(AT, $src$$Register, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI con));
++  effect(KILL tmp);
++  format %{ "li32    AT, [$con], 32\n\t"
++            "dinsu   AT,         AT\n\t"
++            "dmtc1   AT, $dst\t! replicate2I($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ li32(AT, val);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar zero to be vector
++instruct Repl2I_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI zero));
++  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar -1 to be vector
++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI M1));
++  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate float (4 byte) scalar to be vector
++instruct Repl2F(vecD dst, regF src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF src));
++  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
++  ins_encode %{
++    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Replicate float (4 byte) scalar zero to be vector
++instruct Repl2F_zero(vecD dst, immF_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF zero));
++  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++
++// ====================VECTOR ARITHMETIC=======================================
++
++// --------------------------------- ADD --------------------------------------
++
++// Floats vector add
++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
++instruct vadd2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF dst src));
++  format %{ "add.ps   $dst,$src\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF src1 src2));
++  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++// Floats vector sub
++instruct vsub2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVF dst src));
++  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
++  ins_encode %{
++    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- MUL --------------------------------------
++
++// Floats vector mul
++instruct vmul2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF dst src));
++  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF src1 src2));
++  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- DIV --------------------------------------
++// MIPS do not have div.ps
++
++// --------------------------------- MADD --------------------------------------
++// Floats vector madd
++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
++//  predicate(n->as_Vector()->length() == 2);
++//  match(Set dst (AddVF (MulVF src1 src2) src3));
++//  ins_cost(50);
++//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
++//  ins_encode %{
++//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++//  %}
++//  ins_pipe( fpu_regF_regF );
++//%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+new file mode 100644
+index 00000000000..05c525f35ef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+@@ -0,0 +1,1825 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "compiler/disassembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#include <sys/mman.h>
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++static int illegal_instruction_bits = 0;
++
++int NativeInstruction::illegal_instruction() {
++  if (illegal_instruction_bits == 0) {
++    ResourceMark rm;
++    char buf[40];
++    CodeBuffer cbuf((address)&buf[0], 20);
++    MacroAssembler* a = new MacroAssembler(&cbuf);
++    address ia = a->pc();
++    a->brk(11);
++    int bits = *(int*)ia;
++    illegal_instruction_bits = bits;
++  }
++  return illegal_instruction_bits;
++}
++
++bool NativeInstruction::is_int_branch() {
++  switch(Assembler::opcode(insn_word())) {
++    case Assembler::beq_op:
++    case Assembler::beql_op:
++    case Assembler::bgtz_op:
++    case Assembler::bgtzl_op:
++    case Assembler::blez_op:
++    case Assembler::blezl_op:
++    case Assembler::bne_op:
++    case Assembler::bnel_op:
++      return true;
++    case Assembler::regimm_op:
++      switch(Assembler::rt(insn_word())) {
++        case Assembler::bgez_op:
++        case Assembler::bgezal_op:
++        case Assembler::bgezall_op:
++        case Assembler::bgezl_op:
++        case Assembler::bltz_op:
++        case Assembler::bltzal_op:
++        case Assembler::bltzall_op:
++        case Assembler::bltzl_op:
++          return true;
++      }
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_float_branch() {
++  if (!is_op(Assembler::cop1_op) ||
++      !is_rs((Register)Assembler::bc1f_op)) return false;
++
++  switch(Assembler::rt(insn_word())) {
++    case Assembler::bcf_op:
++    case Assembler::bcfl_op:
++    case Assembler::bct_op:
++    case Assembler::bctl_op:
++      return true;
++  }
++
++  return false;
++}
++
++
++void NativeCall::verify() {
++  // make sure code pattern is actually a call instruction
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(20))->is_nop() ) {
++      return;
++  }
++
++  // jal targe
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++  is_op(int_at(4), Assembler::ori_op) &&
++  is_special_op(int_at(8), Assembler::dsll_op) &&
++  is_op(int_at(12), Assembler::ori_op) &&
++  is_special_op(int_at(16), Assembler::dsll_op) &&
++  is_op(int_at(20), Assembler::ori_op) &&
++        is_special_op(int_at(24), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    is_special_op(int_at(8), Assembler::dsll_op) &&
++    is_op  (int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    is_op  (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  // FIXME: why add jr_op here?
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
++    return;
++
++  fatal("not a call");
++}
++
++address NativeCall::target_addr_for_insn() const {
++  // jal target
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(4))->is_nop()) {
++      int instr_index = int_at(0) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(20))->is_nop()) {
++      int instr_index = int_at(16) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                               (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ld_op) ) {
++
++      address dest = (address)Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(0),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
++  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
++  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
++  tty->print_cr("======= End of decoding =======");
++  fatal("not a call");
++  return NULL; // unreachable
++}
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_insn();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(addr);
++    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
++         CompiledICLocker::is_safe(addr_at(0)),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  // Patch the constant in the call's trampoline stub.
++  if (MacroAssembler::reachable_from_cache()) {
++    set_destination(dest);
++  } else {
++    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
++    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++  return NULL;
++}
++
++// manual implementation of GSSQ
++//
++//  00000001200009c0 <atomic_store128>:
++//     1200009c0:   0085202d        daddu   a0, a0, a1
++//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
++//     1200009c8:   03e00008        jr      ra
++//     1200009cc:   00000000        nop
++//
++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
++
++static int *buf;
++
++static atomic_store128_ptr get_atomic_store128_func() {
++  assert(UseLEXT1, "UseLEXT1 must be true");
++  static atomic_store128_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x0085202d;
++  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
++  buf[2] = 0x03e00008;
++  buf[3] = 0;
++
++  asm("sync");
++  p = (atomic_store128_ptr)buf;
++  return p;
++}
++
++void  NativeCall::patch_on_jal_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(0, jal_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(16, jal_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal(address dst) {
++  patch_on_jal_gs(dst);
++}
++
++void  NativeCall::patch_on_trampoline(address dest) {
++  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
++  jlong dst = (jlong) dest;
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
++    dst += (dst & 0x8000) << 1;
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
++
++    ICache::invalidate_range(addr_at(0), 24);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jalr_gs(address dst) {
++  patch_set48_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr(address dst) {
++  patch_set48(dst);
++}
++
++void  NativeCall::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++  int count = 0;
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    if (Assembler::split_low(value)) {
++      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void NativeCall::patch_set32_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[2] = {0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    //nop();
++    //set_int_at(count << 2, 0);
++    insts[count] = 0;
++    count++;
++  }
++
++  long inst = insts[1];
++  inst = inst << 32;
++  inst = inst + insts[0];
++
++  set_long_at(0, inst);
++}
++
++void NativeCall::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      //dsll(d, d, 16);
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    //lui(d, value >> 32);
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    //ori(d, d, split_low(value >> 16));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    //dsll(d, d, 16);
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    //ori(d, d, split_low(value));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    //nop();
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeCall::patch_set32(address dest) {
++  patch_set32_gs(dest);
++}
++
++void  NativeCall::set_destination(address dest) {
++  OrderAccess::fence();
++
++  // li64
++  if (is_special_op(int_at(16), Assembler::dsll_op)) {
++    int first_word = int_at(0);
++    set_int_at(0, 0x1000ffff); /* .1: b .1 */
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
++    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
++    ICache::invalidate_range(addr_at(0), 24);
++  } else if (is_op(int_at(16), Assembler::jal_op)) {
++    if (UseLEXT1) {
++      patch_on_jal_gs(dest);
++    } else {
++      patch_on_jal(dest);
++    }
++  } else if (is_op(int_at(0), Assembler::jal_op)) {
++    patch_on_jal_only(dest);
++  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
++    if (UseLEXT1) {
++      patch_on_jalr_gs(dest);
++    } else {
++      patch_on_jalr(dest);
++    }
++  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
++    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
++    if (UseLEXT1) {
++      patch_set32_gs(dest);
++    } else {
++      patch_set32(dest);
++    }
++    ICache::invalidate_range(addr_at(0), 8);
++  } else {
++      fatal("not a call");
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  __ li48(T9, (long)entry);
++  __ jalr ();
++  __ delayed()->nop();
++#undef __
++
++  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++//-------------------------------------------------------------------
++
++void NativeMovConstReg::verify() {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) ) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)0,
++                              (intptr_t)0,
++                              (intptr_t)0);
++    } else {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::patch_set48(intptr_t x) {
++  jlong value = (jlong) x;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    if (Assembler::split_low(value)) {
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  // li64 or li48
++  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
++  } else {
++    patch_set48(x);
++  }
++
++  ICache::invalidate_range(addr_at(0), 24);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  if (is_immediate())
++    return (short)(int_at(instruction_offset)&0xffff);
++  else
++    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  if (is_immediate()) {
++    assert(Assembler::is_simm16(x), "just check");
++    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
++    if (is_64ldst()) {
++      assert(Assembler::is_simm16(x+4), "just check");
++      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
++    }
++  } else {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
++  }
++  ICache::invalidate_range(addr_at(0), 8);
++}
++
++void NativeMovRegMem::verify() {
++  int offset = 0;
++
++  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
++
++    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
++      fatal ("not a mov [reg+offs], reg instruction");
++    }
++
++    offset += 12;
++  }
++
++  switch(Assembler::opcode(int_at(offset))) {
++    case Assembler::lb_op:
++    case Assembler::lbu_op:
++    case Assembler::lh_op:
++    case Assembler::lhu_op:
++    case Assembler::lw_op:
++    case Assembler::lwu_op:
++    case Assembler::ld_op:
++    case Assembler::lwc1_op:
++    case Assembler::ldc1_op:
++    case Assembler::sb_op:
++    case Assembler::sh_op:
++    case Assembler::sw_op:
++    case Assembler::sd_op:
++    case Assembler::swc1_op:
++    case Assembler::sdc1_op:
++      break;
++    default:
++      fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(((NativeInstruction *)this)->is_jump() ||
++         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
++}
++
++void  NativeJump::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++      count += 1;
++      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_j_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(0, j_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void  NativeJump::patch_on_j_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(16, j_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeJump::patch_on_j(address dst) {
++  patch_on_j_gs(dst);
++}
++
++void  NativeJump::patch_on_jr_gs(address dst) {
++  patch_set48_gs(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_jr(address dst) {
++  patch_set48(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++
++void  NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  if (is_short()) {
++    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
++    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
++    ICache::invalidate_range(addr_at(0), 4);
++  } else if (is_b_far()) {
++    int offset = dest - addr_at(12);
++    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
++    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
++  } else {
++    if (is_op(int_at(16), Assembler::j_op)) {
++      if (UseLEXT1) {
++        patch_on_j_gs(dest);
++      } else {
++        patch_on_j(dest);
++      }
++    } else if (is_op(int_at(0), Assembler::j_op)) {
++      patch_on_j_only(dest);
++    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
++      if (UseLEXT1) {
++        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
++        //patch_on_jr_gs(dest);
++        patch_on_jr(dest);
++      } else {
++        patch_on_jr(dest);
++      }
++    } else {
++      fatal("not a jump");
++    }
++  }
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
++    __ b(entry);
++    __ delayed()->nop();
++  } else {
++    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
++    int offset = entry - code_pos;
++
++    Label L;
++    __ bgezal(R0, L);
++    __ delayed()->lui(T9, (offset - 8) >> 16);
++    __ bind(L);
++    __ ori(T9, T9, (offset - 8) & 0xffff);
++    __ daddu(T9, T9, RA);
++    __ jr(T9);
++    __ delayed()->nop();
++  }
++
++#undef __
++
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++bool NativeJump::is_b_far() {
++//
++//   0x000000556809f198: daddu at, ra, zero
++//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
++//
++//   0x000000556809f1a0: nop
++//   0x000000556809f1a4: lui t9, 0xfffffffd
++//   0x000000556809f1a8: ori t9, t9, 0x14dc
++//   0x000000556809f1ac: daddu t9, t9, ra
++//   0x000000556809f1b0: daddu ra, at, zero
++//   0x000000556809f1b4: jr t9
++//   0x000000556809f1b8: nop
++//  ;; ImplicitNullCheckStub slow case
++//   0x000000556809f1bc: lui t9, 0x55
++//
++  return is_op(int_at(12), Assembler::lui_op);
++}
++
++address NativeJump::jump_destination() {
++  if ( is_short() ) {
++    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
++  }
++  // Assembler::merge() is not correct in MIPS_64!
++  //
++  //   Example:
++  //     hi16 = 0xfffd,
++  //     lo16 = f7a4,
++  //
++  //     offset=0xfffdf7a4 (Right)
++  //     Assembler::merge = 0xfffcf7a4 (Wrong)
++  //
++  if ( is_b_far() ) {
++    int hi16 = int_at(12)&0xffff;
++    int low16 = int_at(16)&0xffff;
++    address target = addr_at(12) + (hi16 << 16) + low16;
++    return target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++        nativeInstruction_at(addr_at(4))->is_nop()   &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()  &&
++        is_op(int_at(16), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(20))->is_nop()) {
++    int instr_index = int_at(16) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // j target
++  // nop
++  if ( is_op(int_at(0), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(4))->is_nop()) {
++    int instr_index = int_at(0) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a jump");
++  return NULL; // unreachable
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
++  assert((int)instruction_size == (int)NativeCall::instruction_size,
++          "note::Runtime1::patch_code uses NativeCall::instruction_size");
++
++  // ensure 100% atomicity
++  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
++
++  int *p = (int *)instr_addr;
++  int jr_word = p[4];
++
++  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
++  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
++  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
++
++  if (MacroAssembler::reachable_from_cache(dest)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.j(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_jump()
++{
++  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
++    return true;
++  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
++    return true;
++  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
++    return true;
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++    return true;
++  }
++
++  // lui   rd, imm(63...48);
++  // ori   rd, rd, imm(47...32);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(31...16);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(15...0);
++  // jr    rd
++  // nop
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::dsll_op) &&
++          is_op(int_at(20), Assembler::ori_op) &&
++          is_special_op(int_at(24), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++      return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     sw    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lui t2, 0x40
++  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                        ;*goto
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like the this.
++  return is_op(Assembler::lw_op) && is_rt(AT);
++}
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+new file mode 100644
+index 00000000000..9f0f24cc34b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+@@ -0,0 +1,734 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum mips_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
++  inline bool is_call();
++  inline bool is_illegal();
++  inline bool is_return();
++  bool is_jump();
++  inline bool is_cond_jump();
++  bool is_safepoint_poll();
++
++  //mips has no instruction to generate a illegal instrucion exception
++  //we define ours: break 11
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_trampoline_call();
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
++  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
++  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
++  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
++  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
++  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
++
++  static bool is_special_op (int insn, Assembler::special_ops op) {
++    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
++  }
++  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++class NativeCall;
++inline NativeCall* nativeCall_at(address address);
++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
++// 32 bits:
++//       lui     rt, imm16
++//       addiu    rt, rt, imm16
++//       jalr     rt
++//       nop
++//
++// 64 bits:
++//       lui   rd, imm(63...48);
++//       ori   rd, rd, imm(47...32);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(31...16);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(15...0);
++//       jalr  rd
++//       nop
++//
++
++// we just consider the above for instruction as one call instruction
++class NativeCall: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset          =    0,
++    instruction_size            =   6 * BytesPerInstWord,
++    return_address_offset_short =   4 * BytesPerInstWord,
++    return_address_offset_long  =   6 * BytesPerInstWord,
++    displacement_offset         =   0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const  {
++    if (is_special_op(int_at(8), Assembler::jalr_op)) {
++      return addr_at(return_address_offset_short);
++    } else {
++      return addr_at(return_address_offset_long);
++    }
++  }
++
++  address return_address() const            {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_insn() const;
++  address destination() const;
++  void  set_destination(address dest);
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jalr_gs(address dest);
++  void  patch_on_jalr(address dest);
++
++  void  patch_on_jal_gs(address dest);
++  void  patch_on_jal(address dest);
++
++  void  patch_on_trampoline(address dest);
++
++  void  patch_on_jal_only(address dest);
++
++  void  patch_set32_gs(address dest);
++  void  patch_set32(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
++  }
++
++  static bool is_call_to(address instr, address target) {
++    return nativeInstruction_at(instr)->is_call() &&
++nativeCall_at(instr)->destination() == target;
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate jal
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = NULL;
++  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
++  } else {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
++  }
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset    =    0,
++    instruction_size            =    4 * BytesPerInstWord,
++    next_instruction_offset   =    4 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  void    patch_set48(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//       lui   AT, split_high(offset)
++//       addiu AT, split_low(offset)
++//       addu  reg, reg, AT
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
++//       [lw/sw/lwc1/swc1                    dest, reg, 4]
++//     or
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
++//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset  = 0,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  //offset is less than 16 bits.
++  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
++  bool is_64ldst() const {
++    if (is_immediate()) {
++      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
++       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
++    } else {
++      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
++       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
++    }
++  }
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
++  }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
++// 32 bits:
++//    far jump:
++//        lui   reg, split_high(addr)
++//        addiu reg, split_low(addr)
++//        jr    reg
++//        nop
++//    or
++//        beq   ZERO, ZERO, offset
++//        nop
++//
++
++//64 bits:
++//    far jump:
++//          lui   rd, imm(63...48);
++//          ori   rd, rd, imm(47...32);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(31...16);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(15...0);
++//          jalr  rd
++//          nop
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset   =    0,
++    beq_opcode           =    0x10000000,//000100|00000|00000|offset
++    b_mask               =    0xffff0000,
++    short_size           =    8,
++    instruction_size     =    6 * BytesPerInstWord
++  };
++
++  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
++  bool is_b_far();
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination();
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jr_gs(address dest);
++  void  patch_on_jr(address dest);
++
++  void  patch_on_j_gs(address dest);
++  void  patch_on_j(address dest);
++
++  void  patch_on_j_only(address dest);
++
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry) {}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum mips_specific_constants {
++    instruction_code          =    0x42000029,    // mips reserved instruction
++    instruction_size          =    4,
++    instruction_offset        =    0,
++    next_instruction_offset   =    4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++// jr RA
++// delay slot
++class NativeReturn: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size          =    8,
++    instruction_offset        =    0,
++    next_instruction_offset   =    8
++  };
++};
++
++
++
++
++class NativeCondJump;
++inline NativeCondJump* nativeCondJump_at(address address);
++class NativeCondJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size         = 16,
++    instruction_offset        = 12,
++    next_instruction_offset   = 20
++  };
++
++
++  int insn_word() const  { return long_at(instruction_offset); }
++  address instruction_address() const { return addr_at(0); }
++  address next_instruction_address() const { return addr_at(next_instruction_offset); }
++
++  // Creation
++  inline friend NativeCondJump* nativeCondJump_at(address address);
++
++  address jump_destination()  const {
++    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++  }
++
++  void set_jump_destination(address dest) {
++    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++  }
++
++};
++
++inline NativeCondJump* nativeCondJump_at(address address) {
++  NativeCondJump* jump = (NativeCondJump*)(address);
++  return jump;
++}
++
++
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call()    {
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return true;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) &&
++       is_special_op(int_at(24), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  if(is_trampoline_call())
++    return true;
++
++  return false;
++
++}
++
++inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
++
++inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum mips_specific_constants {
++    instruction_size            =    2 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    2 * BytesPerInstWord
++  };
++
++  address destination() const {
++    return (address)ptr_at(0);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(0, (intptr_t)new_destination);
++  }
++};
++
++inline bool NativeInstruction::is_trampoline_call() {
++  // lui dst, imm16
++  // ori dst, dst, imm16
++  // dsll dst, dst, 16
++  // ld target, dst, imm16
++  // jalr target
++  // nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ld_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) &&
++        nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  return false;
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  return (NativeCallTrampolineStub*)addr;
++}
++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp
+new file mode 100644
+index 00000000000..3f3558f79d4
+--- /dev/null
++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++  address pd_location(VMReg reg) const {return NULL;}
++  address pd_location(VMReg base_reg, int slot_idx) const {
++    return location(base_reg->next(slot_idx));
++  }
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+new file mode 100644
+index 00000000000..4af25318346
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_mips.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp
+new file mode 100644
+index 00000000000..4a9b22bfef2
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.cpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_mips.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
++    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
+diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp
+new file mode 100644
+index 00000000000..4f74717c24f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.hpp
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
++#define CPU_MIPS_VM_REGISTER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "logging/log.hpp"
++#include "utilities/bitMap.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "utilities/ticks.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the MIPS32 architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define I0 ((Register)(i0_RegisterEnumValue))
++#define I1 ((Register)(i1_RegisterEnumValue))
++#define I2 ((Register)(i2_RegisterEnumValue))
++#define I3 ((Register)(i3_RegisterEnumValue))
++#define I4 ((Register)(i4_RegisterEnumValue))
++#define I5 ((Register)(i5_RegisterEnumValue))
++#define I6 ((Register)(i6_RegisterEnumValue))
++#define I7 ((Register)(i7_RegisterEnumValue))
++#define I8 ((Register)(i8_RegisterEnumValue))
++#define I9 ((Register)(i9_RegisterEnumValue))
++#define I10 ((Register)(i10_RegisterEnumValue))
++#define I11 ((Register)(i11_RegisterEnumValue))
++#define I12 ((Register)(i12_RegisterEnumValue))
++#define I13 ((Register)(i13_RegisterEnumValue))
++#define I14 ((Register)(i14_RegisterEnumValue))
++#define I15 ((Register)(i15_RegisterEnumValue))
++#define I16 ((Register)(i16_RegisterEnumValue))
++#define I17 ((Register)(i17_RegisterEnumValue))
++#define I18 ((Register)(i18_RegisterEnumValue))
++#define I19 ((Register)(i19_RegisterEnumValue))
++#define I20 ((Register)(i20_RegisterEnumValue))
++#define I21 ((Register)(i21_RegisterEnumValue))
++#define I22 ((Register)(i22_RegisterEnumValue))
++#define I23 ((Register)(i23_RegisterEnumValue))
++#define I24 ((Register)(i24_RegisterEnumValue))
++#define I25 ((Register)(i25_RegisterEnumValue))
++#define I26 ((Register)(i26_RegisterEnumValue))
++#define I27 ((Register)(i27_RegisterEnumValue))
++#define I28 ((Register)(i28_RegisterEnumValue))
++#define I29 ((Register)(i29_RegisterEnumValue))
++#define I30 ((Register)(i30_RegisterEnumValue))
++#define I31 ((Register)(i31_RegisterEnumValue))
++
++#define R0 ((Register)(i0_RegisterEnumValue))
++#define AT ((Register)(i1_RegisterEnumValue))
++#define V0 ((Register)(i2_RegisterEnumValue))
++#define V1 ((Register)(i3_RegisterEnumValue))
++#define RA0 ((Register)(i4_RegisterEnumValue))
++#define RA1 ((Register)(i5_RegisterEnumValue))
++#define RA2 ((Register)(i6_RegisterEnumValue))
++#define RA3 ((Register)(i7_RegisterEnumValue))
++#define RA4 ((Register)(i8_RegisterEnumValue))
++#define RA5 ((Register)(i9_RegisterEnumValue))
++#define RA6 ((Register)(i10_RegisterEnumValue))
++#define RA7 ((Register)(i11_RegisterEnumValue))
++#define RT0 ((Register)(i12_RegisterEnumValue))
++#define RT1 ((Register)(i13_RegisterEnumValue))
++#define RT2 ((Register)(i14_RegisterEnumValue))
++#define RT3 ((Register)(i15_RegisterEnumValue))
++#define S0 ((Register)(i16_RegisterEnumValue))
++#define S1 ((Register)(i17_RegisterEnumValue))
++#define S2 ((Register)(i18_RegisterEnumValue))
++#define S3 ((Register)(i19_RegisterEnumValue))
++#define S4 ((Register)(i20_RegisterEnumValue))
++#define S5 ((Register)(i21_RegisterEnumValue))
++#define S6 ((Register)(i22_RegisterEnumValue))
++#define S7 ((Register)(i23_RegisterEnumValue))
++#define RT8 ((Register)(i24_RegisterEnumValue))
++#define RT9 ((Register)(i25_RegisterEnumValue))
++#define K0 ((Register)(i26_RegisterEnumValue))
++#define K1 ((Register)(i27_RegisterEnumValue))
++#define GP ((Register)(i28_RegisterEnumValue))
++#define SP ((Register)(i29_RegisterEnumValue))
++#define FP ((Register)(i30_RegisterEnumValue))
++#define S8 ((Register)(i30_RegisterEnumValue))
++#define RA ((Register)(i31_RegisterEnumValue))
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++/*
++#define RT0       T0
++#define RT1       T1
++#define RT2       T2
++#define RT3       T3
++#define RT4       T8
++#define RT5       T9
++*/
++
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define  S5_heapbase   S5
++
++#define mh_SP_save     SP
++
++#define FSR            V0
++#define SSR            V1
++#define FSF            F0
++#define SSF            F1
++#define FTF            F14
++#define STF            F15
++
++#define AFT            F30
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 12,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++const int MIPS_ARGS_IN_REGS_NUM = 4;
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+new file mode 100644
+index 00000000000..7d8d072b51b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (CompressedOops::is_in((void*)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode(cast_to_oop(x)), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedKlassPointers::encode((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedKlassPointers::encode((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in MIPS64");
++  }
++}
++
++
++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  intptr_t adj = 0;
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    if (!ni->is_trampoline_call()) {
++      return nativeCall_at(addr())->target_addr_for_insn();
++    } else {
++      address trampoline = nativeCall_at(addr())->get_trampoline();
++      if (trampoline) {
++        return nativeCallTrampolineStub_at(trampoline)->destination();
++      } else {
++        return (address) -1;
++      }
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination() + adj;
++  } else if (ni->is_cond_jump()) {
++    return nativeCondJump_at(addr())->jump_destination() +adj;
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    NativeCall* call = nativeCall_at(addr());
++    if (!ni->is_trampoline_call()) {
++      call->set_destination(x);
++    } else {
++      address trampoline_stub_addr = call->get_trampoline();
++      if (trampoline_stub_addr != NULL) {
++        address orig = call->target_addr_for_insn();
++        if (orig != trampoline_stub_addr) {
++          call->patch_on_trampoline(trampoline_stub_addr);
++        }
++        call->set_destination_mt_safe(x, false);
++      }
++    }
++  } else if (ni->is_jump())
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  else if (ni->is_cond_jump())
++    nativeCondJump_at(addr())->set_jump_destination(x);
++  else
++    { ShouldNotReachHere(); }
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++}
++
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++/*
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++*/
++
++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  address target =0;
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  target = new_addr_for((address)ni->data(), src, dest);
++  ni->set_data((intptr_t)target);
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+new file mode 100644
+index 00000000000..1e1e170fd87
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since MIPS instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+new file mode 100644
+index 00000000000..36ab413f0b3
+--- /dev/null
++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+@@ -0,0 +1,206 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/vmreg.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_mips.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_mips.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T9 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T9
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ sd(V1, SP, return_off  *wordSize);  // return address
++  __ sd(FP, SP, fp_off  *wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ daddiu(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++
++  {
++    long save_pc = (long)__ pc() + 48;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ move(A0, thread);
++  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
++  __ jalr(T9);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map( __ offset(), map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T9, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T9: exception handler
++  // A1: exception pc
++  __ jr(T9);
++  __ delayed()->nop();
++
++  // make sure all code is generated
++  masm->flush();
++
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+new file mode 100644
+index 00000000000..48cc424a54e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+@@ -0,0 +1,3384 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(v1)
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(a6)
++    DEF_LAYOUT_OFFS(a7)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    DEF_LAYOUT_OFFS(s6)
++    DEF_LAYOUT_OFFS(s7)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++
++    DEF_LAYOUT_OFFS(gp)
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++  static int raOffset(void) { return return_off / 2; }
++  //Rmethod
++  static int methodOffset(void) { return s3_off / 2; }
++
++  static int v0Offset(void) { return v0_off / 2; }
++  static int v1Offset(void) { return v1_off / 2; }
++
++  static int fpResultOffset(void) { return fpr0_off / 2; }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = align_up(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++
++  __ daddiu(SP, SP, - reg_save_size * jintSize);
++
++  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
++  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
++  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
++  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
++  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
++  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
++  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
++  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
++  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
++  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
++  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
++  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
++  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
++  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
++  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
++  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
++  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
++  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
++  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
++  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
++  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
++  __ sd(T0, SP, t0_off * jintSize);
++  __ sd(T1, SP, t1_off * jintSize);
++  __ sd(T2, SP, t2_off * jintSize);
++  __ sd(T3, SP, t3_off * jintSize);
++  __ sd(S0, SP, s0_off * jintSize);
++  __ sd(S1, SP, s1_off * jintSize);
++  __ sd(S2, SP, s2_off * jintSize);
++  __ sd(S3, SP, s3_off * jintSize);
++  __ sd(S4, SP, s4_off * jintSize);
++  __ sd(S5, SP, s5_off * jintSize);
++  __ sd(S6, SP, s6_off * jintSize);
++  __ sd(S7, SP, s7_off * jintSize);
++
++  __ sd(T8, SP, t8_off * jintSize);
++  __ sd(T9, SP, t9_off * jintSize);
++
++  __ sd(GP, SP, gp_off * jintSize);
++  __ sd(FP, SP, fp_off * jintSize);
++  __ sd(RA, SP, return_off * jintSize);
++  __ daddiu(FP, SP, fp_off * jintSize);
++
++  OopMapSet *oop_maps = new OopMapSet();
++  //OopMap* map =  new OopMap( frame_words, 0 );
++  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
++
++
++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
++  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
++  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
++  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
++  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
++  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
++  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
++  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
++  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
++  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
++  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
++  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
++  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
++  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
++  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
++  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
++
++  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
++  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
++  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
++  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
++  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
++  __ ld(T0, SP, t0_off * jintSize);
++  __ ld(T1, SP, t1_off * jintSize);
++  __ ld(T2, SP, t2_off * jintSize);
++  __ ld(T3, SP, t3_off * jintSize);
++  __ ld(S0, SP, s0_off * jintSize);
++  __ ld(S1, SP, s1_off * jintSize);
++  __ ld(S2, SP, s2_off * jintSize);
++  __ ld(S3, SP, s3_off * jintSize);
++  __ ld(S4, SP, s4_off * jintSize);
++  __ ld(S5, SP, s5_off * jintSize);
++  __ ld(S6, SP, s6_off * jintSize);
++  __ ld(S7, SP, s7_off * jintSize);
++
++  __ ld(T8, SP, t8_off * jintSize);
++  __ ld(T9, SP, t9_off * jintSize);
++
++  __ ld(GP, SP, gp_off * jintSize);
++  __ ld(FP, SP, fp_off * jintSize);
++  __ ld(RA, SP, return_off * jintSize);
++
++  __ addiu(SP, SP, reg_save_size * jintSize);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld(V0, SP, v0_off * jintSize);
++  __ ld(V1, SP, v1_off * jintSize);
++  __ ldc1(F0, SP, fpr0_off * jintSize);
++  __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ addiu(SP, SP, return_off * jintSize);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    T0, A0, A1, A2, A3, A4, A5, A6
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return align_up(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  __ delayed()->nop();
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // V0 isn't live so capture return address while we easily can
++  __ move(V0, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, V0);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);   // align the stack
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ delayed()->nop();
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(V0, RA);
++  // set senderSP value
++  //refer to interpreter_mips.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addiu(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ sd(r, SP, st_off);
++      } else {
++        //FIXME, mips will not enter here
++        // long/double in gpr
++        __ sd(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ sd(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ swc1(fr, SP, st_off);
++      else {
++        __ sdc1(fr, SP, st_off);
++        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, V0);
++  __ jr (AT);
++  __ delayed()->nop();
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the mips side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++
++  __ move(T9, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = align_up(comp_words_on_stack, 2);
++    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = V0;
++  __ move(saved_sp, T9);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld(AT, saved_sp, ld_off);
++        __ sd(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld(AT, saved_sp, ld_off - 8);
++        __ sd(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on mips)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld(r, saved_sp, ld_off - 8);
++      } else {
++        __ lw(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ lwc1(fr, saved_sp, ld_off);
++      else {
++          __ ldc1(fr, saved_sp, ld_off);
++          __ ldc1(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move Method* to V0 in case we end up in an c2i adapter.
++  // the c2i adapters expect Method* in V0 (c2) because c2's
++  // resolve stubs return the result (the method) in V0.
++  // I'd love to fix this.
++  __ move(V0, Rmethod);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the Method*.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    __ delayed()->nop();
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ delayed()->nop();
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++
++  address c2i_entry = __ pc();
++
++  // Class initialization barrier for static methods
++  address c2i_no_clinit_check_entry = NULL;
++  if (VM_Version::supports_fast_class_init_checks()) {
++    Label L_skip_barrier;
++    address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub();
++
++    { // Bypass the barrier for non-static methods
++      __ lw(AT, Address(Rmethod, Method::access_flags_offset()));
++      __ andi(AT, AT, JVM_ACC_STATIC);
++      __ beq(AT, R0, L_skip_barrier); // non-static
++      __ delayed()->nop();
++    }
++
++    __ load_method_holder(T9, Rmethod);
++    __ clinit_barrier(T9, AT, &L_skip_barrier);
++    __ jmp(handle_wrong_method, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++    __ bind(L_skip_barrier);
++    c2i_no_clinit_check_entry = __ pc();
++  }
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
++}
++
++int SharedRuntime::vector_calling_convention(VMRegPair *regs,
++                                             uint num_bits,
++                                             uint total_args_passed) {
++  Unimplemented();
++  return 0;
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on MIPS");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               fp[16] => sp[8]
++// 10: L    // stdout              fp[24] => sp[16]
++// 11: L    // stderr              fp[32] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return align_up(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ swc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ sdc1(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ sd(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ sd(V0, FP, -wordSize);
++      break;
++    default: {
++      __ sw(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ lwc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ ldc1(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld(V0, FP, -wordSize);
++      break;
++    default: {
++      __ lw(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  //FIXME, for mips, dst can be register
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = V0;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++    //if dst is register
++    //FIXME, do mips need out preserve stack slots?
++    int offset_in_older_frame = src.first()->reg2stack()
++      + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame
++          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = V0;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ sd( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++    //if dst is register
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sw(AT, SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++  } else {
++    // reg to stack
++    if(dst.first()->is_stack())
++      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    else
++      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    }
++
++  } else {
++    // reg to stack
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    // Make enough room for patch_verified_entry
++    __ nop();
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol();
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = align_up(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_mips.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T9, receiver);
++  __ beq(T9, ic_reg, hit);
++  __ delayed()->nop();
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
++    Label L_skip_barrier;
++    address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub();
++    __ mov_metadata(T9, method->method_holder()); // InstanceKlass*
++    __ clinit_barrier(T9, AT, &L_skip_barrier);
++    __ jmp(handle_wrong_method, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ bind(L_skip_barrier);
++  }
++
++  // Generate stack overflow check
++  __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size());
++
++  // Generate a new frame for the wrapper.
++  // do mips need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and mips doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ sd( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  intptr_t the_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(the_pc - start, map);
++
++  __ set_last_Java_frame(SP, noreg, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)the_pc ;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T9;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ move(swap_reg, 1);
++
++    __ ld(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ dsubu(swap_reg, swap_reg, SP);
++    __ move(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    __ delayed()->nop();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++
++    // Now set thread in native
++    __ addiu(AT, R0, _thread_in_native);
++    if(os::is_MP()) {
++      __ sync(); // store release
++    }
++    __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  }
++
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++
++  Label after_transition;
++
++  // If this is a critical native, check for a safepoint or suspend request after the call.
++  // If a safepoint is needed, transition to native, then to native_trans to handle
++  // safepoints like the native methods that are not critical natives.
++  if (is_critical_native) {
++    Label needs_safepoint;
++    __ safepoint_poll_acquire(needs_safepoint, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, after_transition);
++    __ delayed()->nop();
++    __ bind(needs_safepoint);
++  }
++
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addiu(AT, R0, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) __ sync(); // Force this write out before the read below
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addiu(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4); // align stack as required by ABI
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addiu(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addiu(AT, R0, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addiu(AT, AT, -StackOverflow::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  __ delayed()->nop();
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    __ delayed()->nop();
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T9);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++    __ delayed()->nop();
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  __ delayed()->nop();
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, - 3*wordSize);
++
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++                __ move(SP, S2);
++    __ addiu(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    __ delayed()->nop();
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addiu(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ addiu(SP, SP, 2*wordSize);
++                __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    __ delayed()->nop();
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++  __ delayed()->nop();
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  return nm;
++}
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// Number of stack slots between incoming argument block and the start of
++// a new frame.  The PROLOG must add this many slots to the stack.  The
++// EPILOG must remove this many slots. mips64 needs two slots for
++// return address and fp.
++// TODO think this is correct but check
++uint SharedRuntime::in_preserve_stack_slots() {
++  return 4;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++  // Correct the return address we were given.
++  //FIXME, return address is on the tos or Ra?
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ move(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ move(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ move(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ delayed()->nop();
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addiu(SP, SP, -additional_words  * wordSize);
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call((address)Deoptimization::fetch_unroll_info);
++  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addiu(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ sw(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ move(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ delayed()->nop();
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
++  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  RegisterSaver::restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ lw(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addu(SP, SP, T8);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addiu(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lw(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes());
++  __ bang_stack_size(TSR, T8);
++#endif
++
++  // Load count of frams into T3
++  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ subu(SP, SP, AT);
++
++  // Push interpreter frames in a loop
++  //
++  //Loop:
++  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
++  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
++  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
++  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
++  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
++  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
++  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
++  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
++  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
++  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
++  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
++  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
++  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
++  //
++  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ subu(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addiu(count, count, -1);   // decrement counter
++  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ bne(count, R0, loop);
++  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
++  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
++  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
++
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addiu(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
++  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
++  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
++  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++
++  // Push self-frame.
++  __ daddiu(SP, SP, -framesize * BytesPerInt);
++
++  __ sd(RA, SP, return_off * BytesPerInt);
++  __ sd(FP, SP, fp_off * BytesPerInt);
++
++  __ daddiu(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    long save_pc = (long)__ pc() + 56;
++    __ patchable_set48(AT, (long)save_pc);
++    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::uncommon_trap);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  //oop_maps->add_gc_map( __ offset(), true, map);
++  oop_maps->add_gc_map( __ offset(),  map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T9, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T9, L);
++    __ delayed()->nop();
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ daddiu(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ lw(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ daddu(SP, SP, T8);
++
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lw(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes());
++  __ bang_stack_size(TSR, T8);
++#endif
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T9;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ dsubu(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);          // Load frame size
++  __ ld(AT, pcs, 0);           // save return address
++  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ dsubu(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ daddiu(count, count, -1);    // decrement counter
++  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ delayed()->nop();      // Bump array pointer (pcs)
++
++  __ ld(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(NOREG, FP, the_pc);
++
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::unpack_frames);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  address call_pc = NULL;
++  bool cause_return = (pool_type == POLL_AT_RETURN);
++  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  __ call(call_ptr);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ offset(),  map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++  __ delayed()->nop();
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ patchable_jump((address)StubRoutines::forward_exception_entry());
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (!cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize);
++    __ bne(AT, TSR, no_adjust);
++    __ delayed()->nop();
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ lwu(AT, TSR, 0);
++    __ dsrl(AT, AT, 16);
++    __ andi(AT, AT, 0xfc1f);
++    __ xori(AT, AT, 0x8c01);
++    __ bne(AT, R0, bail);
++    __ delayed()->nop();
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addiu(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  int start = __ offset();
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(noreg, FP, NULL);
++  //align the stack before invoke native
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ call(destination);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map( __ offset() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  __ delayed()->nop();
++  // get the returned Method*
++  //FIXME, do mips need this ?
++  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(V0);
++  __ delayed()->nop();
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //
++  // make sure all code is generated
++  masm->flush();
++
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long borrow = 0, t;
++  unsigned long tmp0, tmp1;
++  __asm__ __volatile__ (
++    "0:                                            \n"
++    "ld      %[tmp0],     0(%[a])                  \n"
++    "ld      %[tmp1],     0(%[b])                  \n"
++    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
++    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
++    "or      %[borrow],   %[borrow],   %[t]        \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
++    "sd      %[tmp0],     0(%[a])                  \n"
++    "daddiu  %[a],        %[a],         8          \n"
++    "daddiu  %[b],        %[b],         8          \n"
++    "daddiu  %[len],      %[len],      -1          \n"
++    "bgtz    %[len],      0b                       \n"
++    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"=&r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++  );
++  return tmp0;
++}
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of t0, t1, and t2.
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry, t;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"=&r"(carry), [t]"=&r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry, t;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"=&r"(carry), [t]"=&r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void __attribute__((noinline))
++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
++                    unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void __attribute__((noinline))
++montgomery_square(unsigned long a[], unsigned long n[],
++                  unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static unsigned long swap(unsigned long x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(unsigned long *s, unsigned long *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++// Doesn't seem to be relevant for MIPS64 so we use the same value.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)b_ints, b, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
++  }
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++#ifdef COMPILER2
++RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
++                                                int shadow_space_bytes,
++                                                const GrowableArray<VMReg>& input_registers,
++                                                const GrowableArray<VMReg>& output_registers) {
++  Unimplemented();
++  return nullptr;
++}
++#endif
+diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+new file mode 100644
+index 00000000000..e894a302b50
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+@@ -0,0 +1,2725 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // ABI mips n64
++  // This fig is not MIPS ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  //  n64 does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for GP.
++  // GP will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             = 1,
++    FP_off             = 0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    GP_off             = -14,
++ };
++
++  address generate_call_stub(address& return_address) {
++
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    __ daddiu(SP, SP, total_off * wordSize);
++    __ sd(BCP, FP, BCP_off * wordSize);
++    __ sd(LVP, FP, LVP_off * wordSize);
++    __ sd(TSR, FP, TSR_off * wordSize);
++    __ sd(S1, FP, S1_off * wordSize);
++    __ sd(S3, FP, S3_off * wordSize);
++    __ sd(S4, FP, S4_off * wordSize);
++    __ sd(S5, FP, S5_off * wordSize);
++    __ sd(S6, FP, S6_off * wordSize);
++    __ sd(A0, FP, call_wrapper_off * wordSize);
++    __ sd(A1, FP, result_off * wordSize);
++    __ sd(A2, FP, result_type_off * wordSize);
++    __ sd(A7, FP, thread_off * wordSize);
++    __ sd(GP, FP, GP_off * wordSize);
++
++    __ set64(GP, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ delayed()->nop();
++    __ dsll(AT, A6, Interpreter::logStackElementSize);
++    __ dsubu(SP, SP, AT);
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ dsll(T3, T0, LogBytesPerWord);
++    __ daddu(T3, T3, A5);
++    __ ld(AT, T3,  -wordSize);
++    __ dsll(T3, T2, LogBytesPerWord);
++    __ daddu(T3, T3, SP);
++    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ daddiu(T2, T2, 1);
++    __ daddiu(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    __ delayed()->nop();
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, Method* in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    __ delayed()->nop();
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ daddiu(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++    __ delayed()->nop();
++
++    // handle T_INT case
++    __ sd(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld(BCP, FP, BCP_off * wordSize);
++    __ ld(LVP, FP, LVP_off * wordSize);
++    __ ld(GP, FP, GP_off * wordSize);
++    __ ld(TSR, FP, TSR_off * wordSize);
++
++    __ ld(S1, FP, S1_off * wordSize);
++    __ ld(S3, FP, S3_off * wordSize);
++    __ ld(S4, FP, S4_off * wordSize);
++    __ ld(S5, FP, S5_off * wordSize);
++    __ ld(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ sd(V0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_float);
++    __ swc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_double);
++    __ sdc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++    //FIXME, 1.6 mips version add operation of fpu here
++    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T9
++    __ ld(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T9, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T9: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T9);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     A0    -  array1
++  //     A1    -  array2
++  //     A2    -  element count
++  //
++
++ // use T9 as temp
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    __ dsll(AT, A2, sf);
++    __ daddu(AT, AT, A0);
++    __ daddiu(T9, AT, -elem_size);
++    __ dsubu(AT, A1, A0);
++    __ blez(AT, no_overlap_target);
++    __ delayed()->nop();
++    __ dsubu(AT, A1, T9);
++    __ bgtz(AT, no_overlap_target);
++    __ delayed()->nop();
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ bgez(A0, L);
++    __ delayed()->nop();
++    __ bgtz(A1, no_overlap_target);
++    __ delayed()->nop();
++    __ bind(L);
++
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register cnt_words = T8;  // temp register
++
++    __ enter();
++
++    Label L_fill_elements, L_exit1;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 8, 8);   // 8 bit -> 16 bit
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(AT, to, 1);
++          __ beq(AT, R0, L_skip_align1);
++          __ delayed()->nop();
++          __ sb(value, to, 0);
++          __ daddiu(to, to, 1);
++          __ addiu32(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(AT, to, 1 << 1);
++          __ beq(AT, R0, L_skip_align2);
++          __ delayed()->nop();
++          __ sh(value, to, 0);
++          __ daddiu(to, to, 2);
++          __ addiu32(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(AT, to, 1 << 2);
++          __ beq(AT, R0, L_skip_align4);
++          __ delayed()->nop();
++          __ sw(value, to, 0);
++          __ daddiu(to, to, 4);
++          __ addiu32(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
++    }
++
++    //
++    //  Fill large chunks
++    //
++    __ srl(cnt_words, count, 3 - shift); // number of words
++    __ dinsu(value, value, 32, 32);      // 32 bit -> 64 bit
++    __ sll(AT, cnt_words, 3 - shift);
++    __ subu32(count, count, AT);
++
++    Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end;
++    __ addiu32(AT, cnt_words, -8);
++    __ bltz(AT, L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_begin);
++    __ sd(value, to,  0);
++    __ sd(value, to,  8);
++    __ sd(value, to, 16);
++    __ sd(value, to, 24);
++    __ sd(value, to, 32);
++    __ sd(value, to, 40);
++    __ sd(value, to, 48);
++    __ sd(value, to, 56);
++    __ daddiu(to, to, 64);
++    __ addiu32(cnt_words, cnt_words, -8);
++    __ addiu32(AT, cnt_words, -8);
++    __ bgez(AT, L_loop_begin);
++    __ delayed()->nop();
++
++    __ bind(L_loop_not_64bytes_fill);
++    __ beq(cnt_words, R0, L_loop_end);
++    __ delayed()->nop();
++    __ sd(value, to, 0);
++    __ daddiu(to, to, 8);
++    __ addiu32(cnt_words, cnt_words, -1);
++    __ b(L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_end);
++
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      Label L_exit1;
++      __ beq(count, R0, L_exit1);
++      __ delayed()->nop();
++      __ sll(AT, count, shift);
++      __ daddu(to, to, AT); // points to the end
++      __ sd(value, to, -8);    // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ jr(RA);
++      __ delayed()->nop();
++    }
++
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_2);
++        __ delayed()->nop();
++        __ sb(value, to, 0);
++        __ daddiu(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 2);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_SHORT:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_INT:
++        __ beq(count, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      default: ShouldNotReachHere();
++    }
++    __ bind(L_exit2);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
++    Label l_debug;
++
++    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
++    __ blez(AT, l_9);
++    __ delayed()->nop();
++
++    if (!aligned) {
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
++      __ delayed()->nop();
++
++      __ andi(AT, tmp1, 1);
++      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
++      __ delayed()->nop();
++
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_10);
++
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 2 elements if necessary to align to 4 bytes.
++      __ andi(AT, tmp1, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -2);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 4 elements if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -4);
++      __ sw(AT, tmp2, 0);
++      { // FasterArrayCopy
++        __ daddiu(tmp1, tmp1, 4);
++        __ daddiu(tmp2, tmp2, 4);
++      }
++    }
++
++    __ bind(l_7);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      // For Loongson, there is 128-bit memory access. TODO
++      __ ld(AT, tmp1, 0);
++      __ sd(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_8);
++      __ delayed()->nop();
++    }
++    __ bind(l_6);
++
++    // copy 4 bytes at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_1);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_3);
++      __ delayed()->nop();
++
++    }
++
++    // do 2 bytes copy
++    __ bind(l_1);
++    {
++      __ daddiu(AT, tmp3, -1);
++      __ blez(AT, l_9);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(AT, tmp3, -2);
++      __ bgez(AT, l_5);
++      __ delayed()->nop();
++    }
++
++    //do 1 element copy--byte
++    __ bind(l_9);
++    __ beq(R0, tmp3, l_4);
++    __ delayed()->nop();
++
++    {
++      __ bind(l_11);
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_4);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0   - source array address
++  //   A1   - destination array address
++  //   A2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
++    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 0);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ daddu(end_from, from, end_count);
++    __ daddu(end_to, to, end_count);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_byte);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    __ bind(l_from_unaligned);
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ move(T8, end_count);
++    __ daddiu(AT, end_count, -3);
++    __ blez(AT, l_copy_suffix);
++    __ delayed()->nop();
++
++    //__ andi(T8, T8, 3);
++    __ lea(end_from, Address(end_from, -4));
++    __ lea(end_to, Address(end_to, -4));
++
++    __ dsrl(end_count, end_count, 2);
++    __ align(16);
++    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
++    __ lw(AT, end_from, 0);
++    __ sw(AT, end_to, 0);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -1);
++    __ bne(end_count, R0, l_copy_4_bytes_loop);
++    __ delayed()->nop();
++
++    __ b(l_copy_suffix);
++    __ delayed()->nop();
++    // copy dwords aligned or not with repeat move
++    // l_copy_suffix
++    // copy suffix (0-3 bytes)
++    __ bind(l_copy_suffix);
++    __ andi(T8, T8, 3);
++    __ beq(T8, R0, l_exit);
++    __ delayed()->nop();
++    __ addiu(end_from, end_from, 3);
++    __ addiu(end_to, end_to, 3);
++    __ bind(l_copy_suffix_loop);
++    __ lb(AT, end_from, 0);
++    __ sb(AT, end_to, 0);
++    __ addiu(end_from, end_from, -1);
++    __ addiu(end_to, end_to, -1);
++    __ addiu(T8, T8, -1);
++    __ bne(T8, R0, l_copy_suffix_loop);
++    __ delayed()->nop();
++
++    __ bind(l_copy_byte);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_byte);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Generate stub for disjoint short copy.  If "aligned" is true, the
++  // "from" and "to" addresses are assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //      from:  A0
++  //      to:    A1
++  //  elm.count: A2 treated as signed
++  //  one element: 2 bytes
++  //
++  // Strategy for aligned==true:
++  //
++  //  If length <= 9:
++  //     1. copy 1 elements at a time (l_5)
++  //
++  //  If length > 9:
++  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
++  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
++  //     3. copy last element if one was left in step 2. (l_1)
++  //
++  //
++  // Strategy for aligned==false:
++  //
++  //  If length <= 9: same as aligned==true case
++  //
++  //  If length > 9:
++  //     1. continue with step 7. if the alignment of from and to mod 4
++  //        is different.
++  //     2. align from and to to 4 bytes by copying 1 element if necessary
++  //     3. at l_2 from and to are 4 byte aligned; continue with
++  //        6. if they cannot be aligned to 8 bytes because they have
++  //        got different alignment mod 8.
++  //     4. at this point we know that both, from and to, have the same
++  //        alignment mod 8, now copy one element if necessary to get
++  //        8 byte alignment of from and to.
++  //     5. copy 4 elements at a time until less than 4 elements are
++  //        left; depending on step 3. all load/stores are aligned.
++  //     6. copy 2 elements at a time until less than 2 elements are
++  //        left. (l_6)
++  //     7. copy 1 element at a time. (l_5)
++  //     8. copy last element if one was left in step 6. (l_1)
++
++  address generate_disjoint_short_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T8;
++    Register tmp5 = T9;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    // don't try anything fancy if arrays don't have many elements
++    __ daddiu(AT, tmp3, -23);
++    __ blez(AT, l_14);
++    __ delayed()->nop();
++    // move push here
++    __ push(tmp4);
++    __ push(tmp5);
++    __ push(tmp6);
++
++    if (!aligned) {
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
++      __ delayed()->nop();
++
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 1 element if necessary to align to 4 bytes.
++      __ andi(AT, A0, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 2-element word if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++    }// end of if (!aligned)
++
++    __ bind(l_7);
++    // At this time the position of both, from and to, are at least 8 byte aligned.
++    // Copy 8 elemnets at a time.
++    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
++    __ xorr(AT, tmp1, tmp2);
++    __ andi(AT, AT, 15);
++    __ bne(AT, R0, l_9);
++    __ delayed()->nop();
++
++    // Copy 4-element word if necessary to align to 16 bytes,
++    __ andi(AT, tmp1, 15);
++    __ beq(AT, R0, l_10);
++    __ delayed()->nop();
++
++    __ ld(AT, tmp1, 0);
++    __ daddiu(tmp3, tmp3, -4);
++    __ sd(AT, tmp2, 0);
++    __ daddiu(tmp1, tmp1, 8);
++    __ daddiu(tmp2, tmp2, 8);
++
++    __ bind(l_10);
++
++    // Copy 8 elements at a time; either the loads or the stores can
++    // be unalligned if aligned == false
++
++    { // FasterArrayCopy
++      __ bind(l_11);
++      // For loongson the 128-bit memory access instruction is gslq/gssq
++      if (UseLEXT1) {
++        __ gslq(AT, tmp4, tmp1, 0);
++        __ gslq(tmp5, tmp6, tmp1, 16);
++        __ daddiu(tmp1, tmp1, 32);
++        __ daddiu(tmp2, tmp2, 32);
++        __ gssq(AT, tmp4, tmp2, -32);
++        __ gssq(tmp5, tmp6, tmp2, -16);
++      } else {
++        __ ld(AT, tmp1, 0);
++        __ ld(tmp4, tmp1, 8);
++        __ ld(tmp5, tmp1, 16);
++        __ ld(tmp6, tmp1, 24);
++        __ daddiu(tmp1, tmp1, 32);
++        __ sd(AT, tmp2, 0);
++        __ sd(tmp4, tmp2, 8);
++        __ sd(tmp5, tmp2, 16);
++        __ sd(tmp6, tmp2, 24);
++        __ daddiu(tmp2, tmp2, 32);
++      }
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++    __ bind(l_9);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
++      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      __ ld(AT, tmp1, 0);
++      __ ld(tmp4, tmp1, 8);
++      __ ld(tmp5, tmp1, 16);
++      __ ld(tmp6, tmp1, 24);
++      __ sd(AT, tmp2, 0);
++      __ sd(tmp4, tmp2, 8);
++      __ sd(tmp5, tmp2,16);
++      __ daddiu(tmp1, tmp1, 32);
++      __ daddiu(tmp2, tmp2, 32);
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_8);
++      __ delayed()->sd(tmp6, tmp2, -8);
++    }
++    __ bind(l_6);
++
++    // copy 2 element at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ lw(tmp4, tmp1, 4);
++      __ lw(tmp5, tmp1, 8);
++      __ lw(tmp6, tmp1, 12);
++      __ sw(AT, tmp2, 0);
++      __ sw(tmp4, tmp2, 4);
++      __ sw(tmp5, tmp2, 8);
++      __ daddiu(tmp1, tmp1, 16);
++      __ daddiu(tmp2, tmp2, 16);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_3);
++      __ delayed()->sw(tmp6, tmp2, -4);
++    }
++
++    __ bind(l_1);
++    // do single element copy (8 bit), can this happen?
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ lhu(tmp4, tmp1, 2);
++      __ lhu(tmp5, tmp1, 4);
++      __ lhu(tmp6, tmp1, 6);
++      __ sh(AT, tmp2, 0);
++      __ sh(tmp4, tmp2, 2);
++      __ sh(tmp5, tmp2, 4);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_5);
++      __ delayed()->sh(tmp6, tmp2, -2);
++    }
++    // single element
++    __ bind(l_4);
++
++    __ pop(tmp6);
++    __ pop(tmp5);
++    __ pop(tmp4);
++
++    __ bind(l_14);
++    { // FasterArrayCopy
++      __ beq(R0, tmp3, l_13);
++      __ delayed()->nop();
++
++      __ bind(l_12);
++      __ lhu(AT, tmp1, 0);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_12);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_13);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++            StubRoutines::jshort_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 1);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ sll(AT, end_count, Address::times_2);
++    __ daddu(end_from, from, AT);
++    __ daddu(end_to, to, AT);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_short);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // Copy 1 element if necessary to align to 4 bytes.
++    __ bind(l_from_unaligned);
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ daddiu(AT, end_count, -1);
++    __ blez(AT, l_copy_short);
++    __ delayed()->nop();
++
++    __ lw(AT, end_from, -4);
++    __ sw(AT, end_to, -4);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -2);
++    __ b(l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // copy 1 element at a time
++    __ bind(l_copy_short);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_short);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4, l_5, l_6, l_7;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    if(!aligned) {
++      __ xorr(AT, T3, T0);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
++      __ delayed()->nop();
++
++      __ andi(AT, T3, 7);
++      __ beq(AT, R0, l_6); //copy 2 elements each time
++      __ delayed()->nop();
++
++      __ lw(AT, T3, 0);
++      __ daddiu(T1, T1, -1);
++      __ sw(AT, T0, 0);
++      __ daddiu(T3, T3, 4);
++      __ daddiu(T0, T0, 4);
++    }
++
++    {
++      __ bind(l_6);
++      __ daddiu(AT, T1, -1);
++      __ blez(AT, l_5);
++      __ delayed()->nop();
++
++      __ bind(l_7);
++      __ ld(AT, T3, 0);
++      __ sd(AT, T0, 0);
++      __ daddiu(T3, T3, 8);
++      __ daddiu(T0, T0, 8);
++      __ daddiu(T1, T1, -2);
++      __ daddiu(AT, T1, -2);
++      __ bgez(AT, l_7);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_5);
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, 4);
++    __ addiu(T0, T0, 4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jint_disjoint_arraycopy() :
++              StubRoutines::jint_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 2);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    // no registers are destroyed by this call
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -4);
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -4);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, -4);
++    __ addiu(T0, T0, -4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++              StubRoutines::jlong_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 3);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0, -8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  //FIXME
++  address generate_disjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      __ ld(AT, T3, 0);
++      __ sd (AT, T0, 0);
++      __ addiu(T3, T3, 8);
++      __ addiu(T0, T0, 8);
++      __ bind(l_2);
++      __ addiu(T1, T1, -1);
++      __ bgez(T1, l_1);
++      __ delayed()->nop();
++    }
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++
++  address generate_conjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++      StubRoutines::jlong_disjoint_arraycopy();
++    array_overlap_test(nooverlap_target, 3);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    {
++      UnsafeCopyMemoryMark ucmm(this, true, true);
++      __ ld(AT, T3, 0);
++      __ sd (AT, T0, 0);
++      __ addiu(T3, T3, -8);
++      __ addiu(T0, T0,-8);
++      __ bind(l_2);
++      __ addiu(T1, T1, -1);
++      __ bgez(T1, l_1);
++      __ delayed()->nop();
++    }
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ sd(S0, SP, S0_off * wordSize);
++    __ sd(S1, SP, S1_off * wordSize);
++    __ sd(S2, SP, S2_off * wordSize);
++    __ sd(S3, SP, S3_off * wordSize);
++    __ sd(S4, SP, S4_off * wordSize);
++    __ sd(S5, SP, S5_off * wordSize);
++    __ sd(S6, SP, S6_off * wordSize);
++    __ sd(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ sd(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    __ set_last_Java_frame(java_thread, SP, FP, NULL);
++    // Align stack
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4);
++
++    __ relocate(relocInfo::internal_pc_type);
++    {
++      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++      __ patchable_set48(AT, save_pc);
++    }
++    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++    // Call runtime
++    __ call(runtime_entry);
++    __ delayed()->nop();
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(__ offset(),  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld(S0, SP, S0_off * wordSize);
++    __ ld(S1, SP, S1_off * wordSize);
++    __ ld(S2, SP, S2_off * wordSize);
++    __ ld(S3, SP, S3_off * wordSize);
++    __ ld(S4, SP, S4_off * wordSize);
++    __ ld(S5, SP, S5_off * wordSize);
++    __ ld(S6, SP, S6_off * wordSize);
++    __ ld(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
++
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
++      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
++
++    bool _squaring;
++
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = A0;
++      Pa_base = reg;      // Argument registers:
++      if (squaring)
++        Pb_base = Pa_base;
++      else
++        Pb_base = ++reg;
++      Pn_base = ++reg;
++      Rlen = ++reg;
++      inv = ++reg;
++      Rlen2 = inv;        // Reuse inv
++      Pm_base = ++reg;
++
++                          // Working registers:
++      Ra = ++reg;         // The current digit of a, b, n, and m.
++      Rb = ++reg;
++      Rm = ++reg;
++      Rn = ++reg;
++
++      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
++      Ibn = ++reg;
++
++      if (squaring) {
++        t0 = ++reg;       // Three registers which form a
++        t1 = AT;          // triple-precision accumuator.
++        t2 = V0;
++
++        Ri = V1;          // Inner and outer loop indexes.
++        Rj = T8;
++
++        Rhi_ab = T9;      // Product registers: low and high parts
++        Rlo_ab = S0;      // of a*b and m*n.
++
++        Rhi_mn = S1;
++        Rlo_mn = S2;
++      } else {
++        t0 = AT;          // Three registers which form a
++        t1 = V0;          // triple-precision accumuator.
++        t2 = V1;
++
++        Ri = T8;          // Inner and outer loop indexes.
++        Rj = T9;
++
++        Rhi_ab = S0;      // Product registers: low and high parts
++        Rlo_ab = S1;      // of a*b and m*n.
++
++        Rhi_mn = S2;
++        Rlo_mn = S3;
++      }
++    }
++
++  private:
++    void enter() {
++      addiu(SP, SP, -6 * wordSize);
++      sd(FP, SP, 0 * wordSize);
++      move(FP, SP);
++    }
++
++    void leave() {
++      addiu(T0, FP, 6 * wordSize);
++      ld(FP, FP, 0 * wordSize);
++      move(SP, T0);
++    }
++
++    void save_regs() {
++      if (!_squaring)
++        sd(Rhi_ab, FP, 5 * wordSize);
++      sd(Rlo_ab, FP, 4 * wordSize);
++      sd(Rhi_mn, FP, 3 * wordSize);
++      sd(Rlo_mn, FP, 2 * wordSize);
++      sd(Pm_base, FP, 1 * wordSize);
++    }
++
++    void restore_regs() {
++      if (!_squaring)
++        ld(Rhi_ab, FP, 5 * wordSize);
++      ld(Rlo_ab, FP, 4 * wordSize);
++      ld(Rhi_mn, FP, 3 * wordSize);
++      ld(Rlo_mn, FP, 2 * wordSize);
++      ld(Pm_base, FP, 1 * wordSize);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bne(tmp, R0, odd);
++      delayed()->nop();
++      beq(count, R0, end);
++      delayed()->nop();
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addiu32(count, count, -2);
++      bgtz(count, loop);
++      delayed()->nop();
++      bind(end);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bne(tmp, R0, odd);
++      delayed()->nop();
++      beq(count, R0, end);
++      delayed()->nop();
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addiu32(count, count, -2);
++      bgtz(count, loop);
++      delayed()->nop();
++      bind(end);
++    }
++
++    void acc(Register Rhi, Register Rlo,
++             Register t0, Register t1, Register t2, Register t, Register c) {
++      daddu(t0, t0, Rlo);
++      orr(t, t1, Rhi);
++      sltu(c, t0, Rlo);
++      daddu(t1, t1, Rhi);
++      daddu(t1, t1, c);
++      sltu(c, t1, t);
++      daddu(t2, t2, c);
++    }
++
++    void pre1(Register i) {
++      block_comment("pre1");
++      // Iam = 0;
++      // Ibn = i;
++
++      sll(Ibn, i, LogBytesPerWord);
++
++      // Ra = Pa_base[Iam];
++      // Rb = Pb_base[Ibn];
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++
++      ld(Ra, Pa_base, 0);
++      gsldx(Rb, Pb_base, Ibn, 0);
++      ld(Rm, Pm_base, 0);
++      gsldx(Rn, Pn_base, Ibn, 0);
++
++      move(Iam, R0);
++
++      // Zero the m*n result.
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, t0, t1, t2);
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      addiu32(Iam, Iam, wordSize);
++      addiu32(Ibn, Ibn, -wordSize);
++      dmultu(Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
++                                               // previous iteration.
++      gsldx(Ra, Pa_base, Iam, 0);
++      mflo(Rlo_ab);
++      mfhi(Rhi_ab);
++      gsldx(Rb, Pb_base, Ibn, 0);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++      dmultu(Rm, Rn);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
++      gsldx(Rm, Pm_base, Iam, 0);
++      mflo(Rlo_mn);
++      mfhi(Rhi_mn);
++      gsldx(Rn, Pn_base, Ibn, 0);
++    }
++
++    void post1() {
++      block_comment("post1");
++
++      // MACC(Ra, Rb, t0, t1, t2);
++      dmultu(Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
++      mflo(Rlo_ab);
++      mfhi(Rhi_ab);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
++
++      // Pm_base[Iam] = Rm = t0 * inv;
++      gsdmultu(Rm, t0, inv);
++      gssdx(Rm, Pm_base, Iam, 0);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // t0 = t1; t1 = t2; t2 = 0;
++      dmultu(Rm, Rn);
++      mfhi(Rhi_mn);
++
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
++      {
++        mflo(Rlo_mn);
++        daddu(Rlo_mn, t0, Rlo_mn);
++        Label ok;
++        beq(Rlo_mn, R0, ok);
++        delayed()->nop(); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      // We have very carefully set things up so that
++      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -t0.  t0 + (-t0) must generate a carry iff
++      // t0 != 0.  So, rather than do a mul and an adds we just set
++      // the carry flag iff t0 is nonzero.
++      //
++      // mflo(Rlo_mn);
++      // addu(t0, t0, Rlo_mn);
++      orr(Ra, t1, Rhi_mn);
++      sltu(Rb, R0, t0);
++      daddu(t0, t1, Rhi_mn);
++      daddu(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      daddu(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++
++      // Rj == i-len
++      subu32(Rj, i, len);
++
++      // Iam = i - len;
++      // Ibn = len;
++      sll(Iam, Rj, LogBytesPerWord);
++      sll(Ibn, len, LogBytesPerWord);
++
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      // Rm = Pm_base[++Iam];
++      // Rn = Pn_base[--Ibn];
++      gsldx(Ra, Pa_base, Iam, wordSize);
++      gsldx(Rb, Pb_base, Ibn, -wordSize);
++      gsldx(Rm, Pm_base, Iam, wordSize);
++      gsldx(Rn, Pn_base, Ibn, -wordSize);
++
++      addiu32(Iam, Iam, wordSize);
++      addiu32(Ibn, Ibn, -wordSize);
++
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    void post2(Register i, Register len) {
++      block_comment("post2");
++
++      subu32(Rj, i, len);
++      sll(Iam, Rj, LogBytesPerWord);
++
++      daddu(t0, t0, Rlo_mn); // The pending m*n, low part
++
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = t0;
++      gssdx(t0, Pm_base, Iam, 0);
++
++      // t0 = t1; t1 = t2; t2 = 0;
++      orr(Ra, t1, Rhi_mn);
++      sltu(Rb, t0, Rlo_mn);
++      daddu(t0, t1, Rhi_mn); // The pending m*n, high part
++      daddu(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      daddu(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    // A carry in t0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (t0)
++      //   t0 = sub(Pm_base, Pn_base, t0, len);
++      Label loop, post, again;
++      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
++      beq(t0, R0, post);
++      delayed()->nop(); {
++        bind(again); {
++          move(i, R0);
++          move(b, R0);
++          sll(cnt, len, LogBytesPerWord);
++          align(16);
++          bind(loop); {
++            gsldx(Rm, Pm_base, i, 0);
++            gsldx(Rn, Pn_base, i, 0);
++            sltu(t, Rm, b);
++            dsubu(Rm, Rm, b);
++            sltu(b, Rm, Rn);
++            dsubu(Rm, Rm, Rn);
++            orr(b, b, t);
++            gssdx(Rm, Pm_base, i, 0);
++            addiu32(i, i, BytesPerWord);
++          } sltu(Rm, i, cnt);
++            bne(Rm, R0, loop);
++            delayed()->nop();
++          subu(t0, t0, b);
++        } bne(t0, R0, again);
++          delayed()->nop();
++      } bind(post);
++    }
++
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2, tmp3
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
++
++      sll(tmp1, len, LogBytesPerWord);
++      addu(s, s, tmp1);
++      move(tmp1, len);
++      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      sll(s, len, LogBytesPerWord);
++      subu(s, d, s);
++    }
++
++    // where
++    void reverse1(Register d, Register s, Register tmp) {
++      ld(tmp, s, -wordSize);
++      addiu(s, s, -wordSize);
++      addiu(d, d, wordSize);
++      drotr32(tmp, tmp, 32 - 32);
++      sd(tmp, d, -wordSize);
++    }
++
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   A0   - int array elements a
++     *   A1   - int array elements b
++     *   A2   - int array elements n (the modulus)
++     *   A3   - int length
++     *   A4   - int inv
++     *   A5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   A0   - int array elements a
++     *   A1   - int array elements n (the modulus)
++     *   A2   - int length
++     *   A3   - int inv
++     *   A4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
++
++      align(CodeEntryAlignment);
++      address entry = pc();
++
++      beq(Rlen, R0, nothing);
++      delayed()->nop();
++
++      enter();
++
++      // Make room.
++      sltiu(Ra, Rlen, 513);
++      beq(Ra, R0, argh);
++      delayed()->sll(Ra, Rlen, exact_log2(4 * sizeof (jint)));
++      subu(Ra, SP, Ra);
++
++      srl(Rlen, Rlen, 1); // length in longwords = len/2
++
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, t0, t1);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, t0, t1);
++        reverse(Ra, Pn_base, Rlen, t0, t1);
++      }
++
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
++
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld(Rn, Pn_base, 0);
++        li(t0, -1);
++        gsdmultu(Rlo_mn, Rn, inv);
++        Label ok;
++        beq(Rlo_mn, t0, ok);
++        delayed()->nop(); {
++          stop("broken inverse in Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      move(Pm_base, Ra);
++
++      move(t0, R0);
++      move(t1, R0);
++      move(t2, R0);
++
++      block_comment("for (int i = 0; i < len; i++) {");
++      move(Ri, R0); {
++        Label loop, end;
++        slt(Ra, Ri, Rlen);
++        beq(Ra, R0, end);
++        delayed()->nop();
++
++        bind(loop);
++        pre1(Ri);
++
++        block_comment("  for (j = i; j; j--) {"); {
++          move(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post1();
++        addiu32(Ri, Ri, 1);
++        slt(Ra, Ri, Rlen);
++        bne(Ra, R0, loop);
++        delayed()->nop();
++        bind(end);
++        block_comment("} // i");
++      }
++
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      move(Ri, Rlen);
++      sll(Rlen2, Rlen, 1); {
++        Label loop, end;
++        slt(Ra, Ri, Rlen2);
++        beq(Ra, R0, end);
++        delayed()->nop();
++
++        bind(loop);
++        pre2(Ri, Rlen);
++
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          subu32(Rj, Rlen2, Ri);
++          addiu32(Rj, Rj, -1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post2(Ri, Rlen);
++        addiu32(Ri, Ri, 1);
++        slt(Ra, Ri, Rlen2);
++        bne(Ra, R0, loop);
++        delayed()->nop();
++        bind(end);
++      }
++      block_comment("} // i");
++
++      normalize(Rlen);
++
++      move(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, t0, t1);
++
++      leave();
++      bind(nothing);
++      jr(RA);
++
++      return entry;
++    }
++    // In C, approximately:
++
++    // void
++    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
++    //                     unsigned long Pn_base[], unsigned long Pm_base[],
++    //                     unsigned long inv, int len) {
++    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++    //   unsigned long Ra, Rb, Rn, Rm;
++    //   int i, Iam, Ibn;
++
++    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
++
++    //   for (i = 0; i < len; i++) {
++    //     int j;
++
++    //     Iam = 0;
++    //     Ibn = i;
++
++    //     Ra = Pa_base[Iam];
++    //     Rb = Pb_base[Iam];
++    //     Rm = Pm_base[Ibn];
++    //     Rn = Pn_base[Ibn];
++
++    //     int iters = i;
++    //     for (j = 0; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
++    //     MACC(Ra, Rb, t0, t1, t2);
++    //     Pm_base[Iam] = Rm = t0 * inv;
++    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
++    //     MACC(Rm, Rn, t0, t1, t2);
++
++    //     assert(t0 == 0, "broken Montgomery multiply");
++
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   for (i = len; i < 2*len; i++) {
++    //     int j;
++
++    //     Iam = i - len;
++    //     Ibn = len;
++
++    //     Ra = Pa_base[++Iam];
++    //     Rb = Pb_base[--Ibn];
++    //     Rm = Pm_base[++Iam];
++    //     Rn = Pn_base[--Ibn];
++
++    //     int iters = len*2-i-1;
++    //     for (j = i-len+1; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = Pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     Pm_base[i-len] = t0;
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   while (t0)
++    //     t0 = sub(Pm_base, Pn_base, t0, len);
++    // }
++  };
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++#ifdef COMPILER2
++    if (UseMontgomeryMultiplyIntrinsic) {
++      if (UseLEXT1) {
++        StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++        MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
++        StubRoutines::_montgomeryMultiply = g.generate_multiply();
++      } else {
++        StubRoutines::_montgomeryMultiply
++          = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++      }
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      if (UseLEXT1) {
++        StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++        MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
++        // We use generate_multiply() rather than generate_square()
++        // because it's faster for the sizes of modulus we care about.
++        StubRoutines::_montgomerySquare = g.generate_multiply();
++      } else {
++        StubRoutines::_montgomerySquare
++          = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++      }
++    }
++#endif
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++#define UCM_TABLE_MAX_ENTRIES 2
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  if (UnsafeCopyMemory::_table == NULL) {
++    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
++  }
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+new file mode 100644
+index 00000000000..920c08844e1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
++};
++
++class gs2 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+new file mode 100644
+index 00000000000..358d580d527
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
+diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+new file mode 100644
+index 00000000000..b723add6dfc
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+@@ -0,0 +1,2094 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ daddiu(SP, SP, -10 * wordSize);
++  __ sd(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     10 stack arg0      <--- old sp
++  //      9 float/double identifiers
++  //      8 register arg7
++  //        ...
++  //      2 register arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use T3 as temp
++  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
++    Register reg = as_Register(i + A0->encoding());
++    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
++    Label isfloatordouble, isdouble, next;
++
++    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
++    __ bne(AT, R0, isfloatordouble);
++    __ delayed()->nop();
++
++    // Do Int register here
++    __ ld(reg, SP, (1 + i) * wordSize);
++    __ b (next);
++    __ delayed()->nop();
++
++    __ bind(isfloatordouble);
++    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
++    __ bne(AT, R0, isdouble);
++    __ delayed()->nop();
++
++    // Do Float Here
++    __ lwc1(floatreg, SP, (1 + i) * wordSize);
++    __ b(next);
++    __ delayed()->nop();
++
++    // Do Double here
++    __ bind(isdouble);
++    __ ldc1(floatreg, SP, (1 + i) * wordSize);
++
++    __ bind(next);
++  }
++
++  __ ld(RA, SP, 0);
++  __ daddiu(SP, SP, 10 * wordSize);
++  __ jr(RA);
++  __ delayed()->nop();
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++* Method entry for static (non-native) methods:
++*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
++*/
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ abs_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ sqrt_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 1);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 2 * Interpreter::stackElementSize);
++    __ ldc1(F13, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 2);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ ldc1(F12, SP, 4 * Interpreter::stackElementSize);
++      __ ldc1(F13, SP, 2 * Interpreter::stackElementSize);
++      __ ldc1(F14, SP, 0);
++      __ madd_d(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ lwc1(F12, SP, 2 * Interpreter::stackElementSize);
++      __ lwc1(F13, SP, Interpreter::stackElementSize);
++      __ lwc1(F14, SP, 0);
++      __ madd_s(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T9, fn);
++  __ jalr(T9);
++  __ delayed()->nop();
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: Method*
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bgez(T1, L);     // check if frame is complete
++    __ delayed()->nop();
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T9
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T9;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T9;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ dsll(AT, flags, Interpreter::logStackElementSize);
++  __ daddu(SP, SP, AT);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T9;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++    __ delayed()->nop();
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    {
++       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++                 break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// Rmethod: method
++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
++  Label done;
++  int increment = InvocationCounter::count_increment;
++  Label no_mdo;
++  if (ProfileInterpreter) {
++    // Are we profiling?
++    __ ld(T0, Address(Rmethod, Method::method_data_offset()));
++    __ beq(T0, R0, no_mdo);
++    __ delayed()->nop();
++    // Increment counter in the MDO
++    const Address mdo_invocation_counter(T0, in_bytes(MethodData::invocation_counter_offset()) +
++                                              in_bytes(InvocationCounter::counter_offset()));
++    const Address mask(T0, in_bytes(MethodData::invoke_mask_offset()));
++    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T1, false, Assembler::zero, overflow);
++    __ b(done);
++    __ delayed()->nop();
++  }
++  __ bind(no_mdo);
++  // Increment counter in MethodCounters
++  const Address invocation_counter(T0,
++                MethodCounters::invocation_counter_offset() +
++                InvocationCounter::counter_offset());
++  __ get_method_counters(Rmethod, T0, done);
++  const Address mask(T0, in_bytes(MethodCounters::invoke_mask_offset()));
++  __ increment_mask_and_jump(invocation_counter, increment, mask, T1, false, Assembler::zero, overflow);
++  __ bind(done);
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++  __ delayed()->nop();
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ slt(AT, AT, T2);
++  __ beq(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ dsll(T3, T2, Interpreter::logStackElementSize);
++  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ delayed()->nop();
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ delayed()->nop();
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(StackOverflow::stack_shadow_zone_size(), StackOverflow::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ move(AT, max_bang_size);
++  __ addu(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ slt(AT, T3, SP);
++  __ bne(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ delayed()->nop();
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ delayed()->nop();
++    __ load_mirror(T0, Rmethod, T9);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ daddiu(SP, SP, (-frame_size) * wordSize);
++  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
++  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T9);
++  __ sd(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ delayed()->nop();
++    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ sd(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ sd(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  address entry = __ pc();
++  Label slow_path;
++  __ b(slow_path);
++  __ delayed()->nop();
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  __ delayed()->nop();
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ dsll(LVP, V0, Address::times_8);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0      ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld(t,method,in_bytes(Method::const_offset()));
++    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // MIPS n64 ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ delayed()->nop();
++    __ daddiu(t, t, 1);
++    __ bind(Lstatic);
++    __ daddiu(t, t, -7);
++    __ blez(t, L);
++    __ delayed()->nop();
++    __ dsll(t, t, Address::times_8);
++    __ dsubu(SP, SP, t);
++    __ bind(L);
++  }
++  assert(StackAlignmentInBytes == 16, "must be");
++  __ dins(SP, R0, 0, 4);
++  __ move(AT, SP);
++  // [        ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T9, R0, L);
++    __ delayed()->nop();
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T9);
++  __ delayed()->nop();
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to mips o32 abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++
++    // get mirror
++    __ load_mirror(t, method, T9);
++    // copy mirror into activation frame
++    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(V1, T9, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T9
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // set_last_Java_frame_before_call
++  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  __ li(t, __ pc());
++  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ daddiu(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ move(t, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T9);
++  __ delayed()->nop();
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(t, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if( os::is_MP() ) __ sync(); // Force this write out before the read below
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ move(t, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addi only support 16-bit imeditate
++    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ delayed()->nop();
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T9);
++    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ move(AT, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ delayed()->nop();
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++      // address of first monitor
++
++      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++      __ delayed()->nop();
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++  __ delayed()->nop();
++
++
++  // remove activation
++  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  const int page_size = os::vm_page_size();
++  const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size;
++  const int start_page = native_call ? n_shadow_pages : 1;
++  BLOCK_COMMENT("bang_stack_shadow_pages:");
++  for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++    __ bang_stack_with_offset(pages*page_size);
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ dsubu(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ dsll(LVP, V0, LogBytesPerWord);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++    __ delayed()->nop();
++
++    __ bind(loop);
++    __ daddiu(SP, SP, (-1) * wordSize);
++    __ daddiu(T2, T2, -1);               // until everything initialized
++    __ bne(T2, R0, loop);
++    __ delayed()->sd(R0, SP, 0);     // initialize local variables
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T9
++  // tmp2: T2
++   __ profile_parameters_type(T8, T9, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++  __ delayed()->nop();
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++    __ delayed()->nop();
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld( A1, A1, in_bytes(Method::const_offset()));
++    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ dsubu(A2, LVP, A1);
++    __ daddiu(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T9, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T9);
++    __ delayed()->nop();
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  __ set_last_Java_frame(thread, noreg, FP, __ pc());
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(AT, BCP, 0);
++    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++    __ delayed()->nop();
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T9);
++    __ ld(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
++
++    __ beq(T8, R0, L_done);
++    __ delayed()->nop();
++
++    __ sd(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T9, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T9);                                   // jump to exception handler of caller
++  __ delayed()->nop();
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ move(AT, JvmtiThreadState::earlyret_inactive);
++  __ sw(AT, cond_addr);
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ jr(T0);
++  __ delayed()->nop();
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
++  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
++  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
++  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++/*
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++*/
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld(A2, SP, 0);
++  __ ld(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ lw(T9, T8, 0);
++  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T9, T9, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ sw(T9, T8, 0);
++  __ dsll(T9, T9, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ daddu(T8, T8, T9);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ lw(T8, T8, 0);
++  __ move(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ delayed()->nop();
++  __ brk(5);
++  __ delayed()->nop();
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp
+new file mode 100644
+index 00000000000..46a88aba261
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+new file mode 100644
+index 00000000000..bbf95f45225
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+@@ -0,0 +1,4613 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "memory/universe.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T9, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T9, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ daddiu(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++      __ delayed()->nop();
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ move(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ lbu(tmp_reg, at_bcp(0));
++    __ move(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++    __ delayed()->nop();
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ delayed()->nop();
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ lbu(tmp_reg, at_bcp(0));
++  __ move(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ delayed()->nop();
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ delayed()->nop();
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ mtc1(R0, FSF);    return;
++    case 1:  __ addiu(AT, R0, 1); break;
++    case 2:  __ addiu(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ mtc1(AT, FSF);
++  __ cvt_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ dmtc1(R0, FSF);
++             return;
++    case 1:  __ daddiu(AT, R0, 1);
++             __ dmtc1(AT, FSF);
++             __ cvt_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ lb(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ lb(FSR, BCP, 1);
++  __ lbu(AT, BCP, 2);
++  __ dsll(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ lbu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
++  __ bne(AT, R0, notClass);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  __ bind(call_ldc);
++  __ move(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->sd(FSR, SP, 0); // added for performance issue
++
++  __ bind(notClass);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  __ delayed()->nop();
++  // ftos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ lwc1(FSF, AT, base_offset);
++  }
++  //__ push_f();
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->swc1(FSF, SP, 0);
++
++  __ bind(notFloat);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  __ delayed()->nop();
++  // itos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(T0, T3, T2);
++    __ lw(FSR, T0, base_offset);
++  }
++  __ push(itos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ move(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask);
++  __ daddu(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ daddiu(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      __ delayed()->nop();
++      // itos
++      __ ld(obj, field);
++      __ push(itos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notInt);
++      __ daddiu(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      __ delayed()->nop();
++      // ftos
++      __ lwc1(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notFloat);
++      __ daddiu(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      __ delayed()->nop();
++      // stos
++      __ lh(obj, field);
++      __ push(stos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notShort);
++      __ daddiu(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      __ delayed()->nop();
++      // btos
++      __ lb(obj, field);
++      __ push(btos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notByte);
++      __ daddiu(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      __ delayed()->nop();
++      // ctos
++      __ lhu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notChar);
++      __ daddiu(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      __ delayed()->nop();
++      // ztos
++      __ lbu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ daddiu(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      __ delayed()->nop();
++      // ltos
++      __ ld(obj, field);
++      __ push(ltos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notLong);
++      __ daddiu(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      __ delayed()->nop();
++      // dtos
++      __ ldc1(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T9);
++  __ bne(result, R0, resolved);
++  __ delayed()->nop();
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ move(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ set64(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ resolve_oop_handle(tmp, T9);
++    __ bne(tmp, result, notNull);
++    __ delayed()->nop();
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++
++  __ daddiu(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++  __ delayed()->nop();
++
++  // dtos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ldc1(FSF, AT, base_offset);
++  }
++  __ push(dtos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++  __ delayed()->nop();
++
++  // ltos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ld(FSR, AT, base_offset);
++  }
++  __ push(ltos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ lbu(reg, at_bcp(offset));
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ move(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    __ move(T3, Bytecodes::_fast_iload2);
++    __ move(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _caload, rewrite to fast_icaload
++    __ move(T3, Bytecodes::_fast_icaload);
++    __ move(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // rewrite so iload doesn't check again.
++    __ move(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ sll(index, index, 0);
++
++  // check index
++  Label ok;
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++#ifndef OPT_RANGECHECK
++  __ sltu(AT, index, AT);
++  __ bne(AT, R0, ok);
++  __ delayed()->nop();
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ delayed()->nop();
++  __ bind(ok);
++#else
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ move(A2, index);
++  __ tgeu(A2, AT, 29);
++#endif
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ warn("iaload Unimplemented yet");
++    __ gslwle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_8);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ warn("laload Unimplemented yet");
++    __ gsldle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, Address::times_8);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++  }
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ shl(AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ warn("faload Unimplemented yet");
++    __ gslwlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 3);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 3);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ warn("daload Unimplemented yet");
++    __ gsldlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, 3);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
++  __ daddu(FSR, SSR, FSR);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array   FSR:index
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
++
++    __ warn("baload Unimplemented yet");
++    __ gslble(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, Address::times_2);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, 1);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ warn("saload Unimplemented yet");
++    __ gslhle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ lw(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ lwc1(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ ldc1(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ move(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_iaccess_0);
++    __ move(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aaccess_0);
++    __ move(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_faccess_0);
++    __ move(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ swc1(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ sdc1(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
++
++    __ warn("iastore Unimplemented yet");
++    __ gsswle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);  // prefer index in SSR
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++  }
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
++
++    __ warn("lastore Unimplemented yet");
++    __ gssdle(FSR, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++  }
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
++
++    __ warn("fastore Unimplemented yet");
++    __ gsswlec1(FSF, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++  }
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
++
++    __ warn("dastore Unimplemented yet");
++    __ gssdlec1(FSF, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++  }
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(FSR, at_tos());     // Value
++  __ lw(SSR, at_tos_p1());  // Index
++  __ ld(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  __ delayed()->nop();
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++  __ delayed()->nop();
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T9);
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    guarantee(false, "unimplemented yet!");
++    __ pop_ptr(T2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
++
++    __ warn("bastore Unimplemented yet");
++    __ gssble(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++
++    // Need to check whether array is boolean or byte
++    // since both types share the bastore bytecode.
++    __ load_klass(T9, T2);
++    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
++
++    int diffbit = Klass::layout_helper_boolean_diffbit();
++    __ move(AT, diffbit);
++
++    Label L_skip;
++    __ andr(AT, T9, AT);
++    __ beq(AT, R0, L_skip);
++    __ delayed()->nop();
++    __ andi(FSR, FSR, 0x1);
++    __ bind(L_skip);
++
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
++
++    __ warn("castore Unimplemented yet");
++    __ gsshle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ sw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ sd(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ swc1(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ sdc1(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ sd(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ addu32(FSR, SSR, FSR); break;
++    case sub  : __ subu32(FSR, SSR, FSR); break;
++    case mul  : __ mul(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sllv(FSR, SSR, FSR);   break;
++    case shr  : __ srav(FSR, SSR, FSR);   break;
++    case ushr : __ srlv(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ daddu(FSR, T2, FSR); break;
++    case sub : __ dsubu(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseLEXT1) {
++    __ gsdiv(FSR, SSR, FSR);
++  } else {
++    __ div(SSR, FSR);
++    __ mflo(FSR);
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++  __ div(SSR, FSR);
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(not_zero);
++  __ mfhi(FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  if (UseLEXT1) {
++    __ gsdmult(FSR, T2, FSR);
++  } else {
++    __ dmult(T2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseLEXT1) {
++    __ gsddiv(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  if (UseLEXT1) {
++    __ gsdmod(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mfhi(FSR);
++  }
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsllv(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrav(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrlv(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ lwc1(FTF, at_sp());
++      __ add_s(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ lwc1(FTF, at_sp());
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ lwc1(FTF, at_sp());
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ lwc1(FTF, at_sp());
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_s(F13, FSF);
++      __ lwc1(F12, at_sp());
++       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ ldc1(FTF, at_sp());
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ ldc1(FTF, at_sp());
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ ldc1(FTF, at_sp());
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ ldc1(FTF, at_sp());
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_d(F13, FSF);
++      __ ldc1(F12, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subu32(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ dsubu(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ neg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ neg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ lb(AT, at_bcp(2));           // get constant
++  __ daddu(FSR, FSR, AT);
++  __ sw(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ lw(AT, T2, 0);
++  __ daddu(FSR, AT, FSR);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ mtc1(FSR, FSF);
++      __ cvt_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ mtc1(FSR, FSF);
++      __ cvt_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ seb(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ seh(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ dmtc1(FSR, FSF);
++      __ cvt_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ dmtc1(FSR, FSF);
++      __ cvt_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++    {
++      Label L;
++
++      __ trunc_w_s(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++      Label L;
++
++      __ trunc_l_s(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ dsll32(T9, T9, 0);
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++      Label L;
++
++      __ trunc_w_d(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->addiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu32(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++      Label L;
++
++      __ trunc_l_d(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->daddiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++    __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ subu(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  __ ori(FSR, R0, 1);
++  __ ori(AT, R0, 1);
++
++  if (is_float) {
++    __ lwc1(FTF, at_sp());
++    __ daddiu(SP, SP, 1 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_s(FTF, FSF);
++    } else {
++      __ c_ult_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_s(FTF, FSF);
++    }
++  } else {
++    __ ldc1(FTF, at_sp());
++    __ daddiu(SP, SP, 2 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_d(FTF, FSF);
++    } else {
++      __ c_ult_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_d(FTF, FSF);
++    }
++  }
++
++  __ movf(AT, R0);
++  __ subu(FSR, FSR, AT);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ lb(A7, BCP, 1);
++    __ lbu(AT, BCP, 2);
++    __ dsll(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, AT, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ daddu(AT, BCP, A7);
++    __ lbu(Rnext, AT, 0);
++
++    // compute return address as bci in FSR
++    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld(AT, T3, in_bytes(Method::const_offset()));
++    __ dsubu(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ daddu(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ daddu(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ bgtz(A7, dispatch);  // check if forward or backward branch
++    __ delayed()->nop();
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ delayed()->nop();
++    __ push(T3);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop(T3);
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ delayed()->nop();
++    __ bind(has_counters);
++
++    Label no_mdo;
++    int increment = InvocationCounter::count_increment;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
++      __ beq(T0, R0, no_mdo);
++      __ delayed()->nop();
++      // Increment the MDO backedge counter
++      const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                         in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(T0, in_bytes(MethodData::backedge_mask_offset()));
++      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                 T1, false, Assembler::zero,
++                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++      __ beq(R0, R0, dispatch);
++      __ delayed()->nop();
++    }
++    __ bind(no_mdo);
++    // Increment backedge counter in MethodCounters*
++    __ ld(T0, Address(T3, Method::method_counters_offset()));
++    const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset()));
++    __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                               T1, false, Assembler::zero,
++                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ lbu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter && UseOnStackReplacement) {
++    // invocation counter overflow
++    __ bind(backedge_counter_overflow);
++    __ subu(A7, BCP, A7);  // branch bcp
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::frequency_counter_overflow), A7);
++
++    // V0: osr nmethod (osr ok) or NULL (osr not possible)
++    // V1: osr adapter frame return address
++    // LVP: locals pointer
++    // BCP: bcp
++    __ beq(V0, R0, dispatch);
++    __ delayed()->nop();
++    // nmethod may have been invalidated (VM may block upon call_VM return)
++    __ lb(T3, V0, nmethod::state_offset());
++    __ move(AT, nmethod::in_use);
++    __ bne(AT, T3, dispatch);
++    __ delayed()->nop();
++
++    // We have the address of an on stack replacement routine in rax.
++    // In preparation of invoking it, first we must migrate the locals
++    // and monitors from off the interpreter frame on the stack.
++    // Ensure to save the osr nmethod over the migration call,
++    // it will be preserved in Rnext.
++    __ move(Rnext, V0);
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++    // V0 is OSR buffer, move it to expected parameter location
++    // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
++    __ move(T0, V0);
++
++    // pop the interpreter frame
++    __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++    //FIXME, shall we keep the return address on the stack?
++    __ leave();                                // remove frame anchor
++    __ move(LVP, RA);
++    __ move(SP, A7);
++
++    assert(StackAlignmentInBytes == 16, "must be");
++    __ dins(SP, R0, 0, 4);
++
++    // push the (possibly adjusted) return address
++    //refer to osr_entry in c1_LIRAssembler_mips.cpp
++    __ ld(AT, Rnext, nmethod::osr_entry_point_offset());
++    __ jr(AT);
++    __ delayed()->nop();
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bgez(FSR, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(FSR, not_taken);
++      break;
++    case greater:
++      __ blez(FSR, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(FSR, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ slt(AT, SSR, FSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case less_equal:
++      __ slt(AT, FSR, SSR);
++      __ bne(AT, R0, not_taken);
++      break;
++    case greater:
++      __ slt(AT, FSR, SSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case greater_equal:
++      __ slt(AT, SSR, FSR);
++      __ bne(AT, R0, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ lw(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ lw(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ lw(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ slt(AT, FSR, T3);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  __ slt(AT, A7, FSR);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  // lookup dispatch offset, in A7 big endian
++  __ dsubu(FSR, FSR, T3);
++  __ dsll(AT, FSR, Address::times_4);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T9, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ lw(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++  __ delayed()->nop();
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++  __ delayed()->nop();
++
++  __ bind(loop_entry);
++  __ bgtz(T3, loop);
++  __ delayed()->daddiu(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++
++  // entry found -> get offset
++  __ bind(found);
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ daddiu(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ lw(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++  __ delayed()->nop();
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ daddu(h, i, j);
++    __ dsrl(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ dsll(AT, h, Address::times_8);
++    __ daddu(AT, array, AT);
++    __ lw(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ movz(i, h, AT);
++    __ movn(j, h, AT);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ daddiu(h, i, 1);
++    __ slt(AT, h, j);
++    __ bne(AT, R0, loop);
++    __ delayed()->nop();
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++  __ delayed()->nop();
++
++  // entry found -> j = offset
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lw(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ move(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ lb(AT, thread, in_bytes(JavaThread::polling_word_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ delayed()->nop();
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T9);
++  __ sync();
++
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ sync();
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved, clinit_barrier_slow;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addiu(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  __ delayed()->nop();
++
++  // resolve first time through
++  // Class initialization barrier slow path lands here as well.
++  __ bind(clinit_barrier_slow);
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ move(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++
++  // Class initialization barrier for static methods
++  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
++    __ load_resolved_method_at_index(byte_no, temp, Rcache, index);
++    __ load_method_holder(temp, temp);
++    __ clinit_barrier(temp, AT, NULL, &clinit_barrier_slow);
++  }
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T9);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  //assert(wordSize == 8, "adjust code below");
++  // note we shift 4 not 2, for we get is the true inde
++  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld(itable_index, AT, index_offset);
++  }
++  __ ld(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ daddu(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ dsll(AT, tmp4, Address::times_8);
++      __ daddu(AT, tmp2, AT);
++      __ ld(tmp3, AT, in_bytes(cp_base_offset +
++                               ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ move(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ move(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ b(valsize_known);
++      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++
++      __ bind(two_word);
++      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld(tmp1, tmp1, 0*wordSize);
++    }
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ztos
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // itos
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // atos
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ctos
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // stos
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ftos
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  // dtos
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ delayed()->nop();
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // test for volatile with T1
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ daddu(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    __ delayed()->nop();
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // replace index with field offset from cache entry
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ daddu(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      //add for compressedoops
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ daddiu(BCP, BCP, 1);
++  __ null_check(T1);
++  __ daddu(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ daddiu(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     __ delayed()->nop();
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ dsll(flags, flags, LogBytesPerWord);
++    __ daddu(AT, AT, flags);
++    __ ld(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++  __ delayed()->nop();
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "Method must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed Method*, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T9: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T9, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target Method & entry point
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  __ dsll(AT, index, Address::times_ptr);
++  // T2: receiver
++  __ daddu(AT, T2, AT);
++  //this is a ualign read
++  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T9, true);
++  __ jump_from_interpreted(method, T2);
++
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T9 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on mips64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++  __ delayed()->nop();
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++  __ delayed()->nop();
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++  __ delayed()->nop();
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ load_method_holder(T2, Rmethod);
++  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ subu32(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++  __ delayed()->nop();
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T9 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T9);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T9: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T9, true);
++
++  __ jump_from_interpreted(T2_method, T9);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   //const Register Rmethod   = T2;
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T9);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T9: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T9, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T9);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(AT, T1, A2, tags_offset);
++  } else {
++    __ daddu(T1, T1, A2);
++    __ lb(AT, T1, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // has_finalizer
++  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
++    __ delayed()->nop();
++    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++    __ delayed()->nop();
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ set64(T1, (long)Universe::heap()->top_addr());
++    __ ld(FSR, heap_top);
++
++    __ bind(retry);
++    __ set64(AT, (long)Universe::heap()->end_addr());
++    __ ld(AT, AT, 0);
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, slow_case);
++    __ delayed()->nop();
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ set64(AT, - sizeof(oopDesc));
++    __ daddu(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++    __ delayed()->nop();
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ daddu(T1, FSR, T0);
++      __ daddiu(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      __ bne(T1, FSR, loop); //dont clear header
++      __ delayed()->daddiu(T1, T1, -oopSize);
++    }
++
++    //klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ set64(AT, (long)markWord::prototype().value());
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++    __ delayed()->nop();
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ sync();
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ lbu(A1, at_bcp(1));
++  //type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ sync();
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ sync();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++  __ delayed()->nop();
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// i use T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ b(done);
++  __ delayed(); __ move(FSR, R0);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ move(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++  __ delayed()->nop();
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit;
++    __ ld(T2, monitor_block_top);
++    __ b(entry);
++    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    // free slot?
++    __ bind(loop);
++    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ movz(c_rarg0, T2, AT);
++
++    __ beq(FSR, AT, exit);
++    __ delayed()->nop();
++    __ daddiu(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ delayed()->nop();
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++  __ delayed()->nop();
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld(c_rarg0, monitor_block_top);
++    __ daddiu(SP, SP, - entry_size);
++    __ daddiu(c_rarg0, c_rarg0, - entry_size);
++    __ sd(c_rarg0, monitor_block_top);
++    __ b(entry);
++    __ delayed(); __ move(T3, SP);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld(AT, T3, entry_size);
++    __ sd(AT, T3, 0);
++    __ daddiu(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ daddiu(BCP, BCP, 1);
++  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ b(entry);
++    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    __ bind(loop);
++    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ delayed()->nop();
++    __ daddiu(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ lbu(Rnext, at_bcp(1));
++  __ dsll(T9, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ daddu(AT, T9, AT);
++  __ ld(T9, AT, 0);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ lbu(A1, at_bcp(3));  // dimension
++  __ daddiu(A1, A1, -1);
++  __ dsll(A1, A1, Address::times_8);
++  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ lbu(AT, at_bcp(3));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(SP, SP, AT);
++  __ sync();
++}
+diff --git a/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp b/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp
+new file mode 100644
+index 00000000000..87f6a113268
+--- /dev/null
++++ b/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/universalNativeInvoker.hpp"
++#include "utilities/debug.hpp"
++
++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
++}
+diff --git a/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp b/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp
+new file mode 100644
+index 00000000000..7586b084868
+--- /dev/null
++++ b/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "prims/universalUpcallHandler.hpp"
++#include "utilities/debug.hpp"
++
++address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
++}
++
++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
++  ShouldNotCallThis();
++  return nullptr;
++}
++
++bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
++  return false;
++}
+diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+new file mode 100644
+index 00000000000..6939914356d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+new file mode 100644
+index 00000000000..d3f07078570
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_mips.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  if (is_loongson()) {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", features_string());
++  } else {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", features_string());
++  }
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+new file mode 100644
+index 00000000000..ffdcff06777
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp
+new file mode 100644
+index 00000000000..8625bc70075
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp
+@@ -0,0 +1,523 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#define A0 RA0
++
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
++bool VM_Version::_is_cpucfg_instruction_supported = true;
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(V0);
++
++    __ li(AT, (long)0);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ pop(V0);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++#   undef __
++
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
++    result |= CPU_MMI;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
++    result |= CPU_MSA1_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
++    result |= CPU_MSA2_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
++    result |= CPU_CGP;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
++    result |= CPU_LSX1;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
++    result |= CPU_LSX2;
++  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
++    result |= CPU_LASX;
++  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
++    result |= CPU_LLSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
++    result |= CPU_TGTSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
++    result |= CPU_MUALP;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
++    result |= CPU_LEXT1;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
++    result |= CPU_LEXT2;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
++    result |= CPU_LEXT3;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
++    result |= CPU_LAMO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
++    result |= CPU_LPIXU;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    warning("Can't detect CPU info - cannot open %s", path);
++  }
++}
++
++void strlwr(char *str) {
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::get_feature_flags_by_cpuinfo(int features) {
++  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
++
++  char res[10240];
++  int i;
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++
++  if (strstr(res, "loongson")) {
++    // Loongson CPU
++    features |= CPU_LOONGSON;
++
++    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
++      {L_3A1000,  "3a1000"},
++      {L_3B1500,  "3b1500"},
++      {L_3A2000,  "3a2000"},
++      {L_3B2000,  "3b2000"},
++      {L_3A3000,  "3a3000"},
++      {L_3B3000,  "3b3000"},
++      {L_2K1000,  "2k1000"},
++      {L_UNKNOWN, "unknown"}
++    };
++
++    // Loongson Family
++    int detected = 0;
++    for (i = 0; i <= L_UNKNOWN; i++) {
++      switch (i) {
++        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
++        // test PRID REV in /proc/cpuinfo
++        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
++        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
++        case L_3A1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3A1000 platform");
++          }
++          break;
++        case L_3B1500:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3B1500 platform");
++          }
++          break;
++        case L_3A2000:
++        case L_3B2000:
++        case L_3A3000:
++        case L_3B3000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS464E;
++            detected++;
++            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
++          }
++          break;
++        case L_2K1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS264;
++            detected++;
++            //tty->print_cr("2K1000 platform");
++          }
++          break;
++        case L_UNKNOWN:
++          if (detected == 0) {
++            detected++;
++            //tty->print_cr("unknown Loongson platform");
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
++  } else { // not Loongson
++    // Not Loongson CPU
++    //tty->print_cr("MIPS platform");
++  }
++
++  if (features & CPU_LOONGSON_GS264) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++    features |= CPU_MSA1_0;
++    features |= CPU_LSX1;
++  } else if (features & CPU_LOONGSON_GS464) {
++    features |= CPU_LEXT1;
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++  } else if (features & CPU_LOONGSON_GS464E) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_LEXT3;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  } else if (features & CPU_LOONGSON) {
++    // unknow loongson
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  }
++  VM_Version::_cpu_info_is_initialized = true;
++
++  return features;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  // test if cpucfg instruction is supported
++  VM_Version::_is_determine_cpucfg_supported_running = true;
++  __asm__ __volatile__(
++    ".insn \n\t"
++    ".word (0xc8080118)\n\t" // cpucfg zero, zero
++    :
++    :
++    :
++    );
++  VM_Version::_is_determine_cpucfg_supported_running = false;
++
++  if (supports_cpucfg()) {
++    get_cpu_info_stub(&_cpuid_info);
++    _features = get_feature_flags_by_cpucfg();
++    // Only Loongson CPUs support cpucfg
++    _features |= CPU_LOONGSON;
++  } else {
++    _features = get_feature_flags_by_cpuinfo(0);
++  }
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(MaxGCPauseMillis, 650);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++    if (MaxVectorSize > 0 && supports_ps()) {
++      MaxVectorSize = 8;
++    } else {
++      MaxVectorSize = 0;
++    }
++  }
++  //
++  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
++  // Vector optimization was closed by default.
++  // The reasons:
++  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
++  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
++  //
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  if (supports_lext1()) {
++    if (FLAG_IS_DEFAULT(UseLEXT1)) {
++      FLAG_SET_DEFAULT(UseLEXT1, true);
++    }
++  } else if (UseLEXT1) {
++    warning("LEXT1 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT1, false);
++  }
++
++  if (supports_lext2()) {
++    if (FLAG_IS_DEFAULT(UseLEXT2)) {
++      FLAG_SET_DEFAULT(UseLEXT2, true);
++    }
++  } else if (UseLEXT2) {
++    warning("LEXT2 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT2, false);
++  }
++
++  if (supports_lext3()) {
++    if (FLAG_IS_DEFAULT(UseLEXT3)) {
++      FLAG_SET_DEFAULT(UseLEXT3, true);
++    }
++  } else if (UseLEXT3) {
++    warning("LEXT3 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT3, false);
++  }
++
++  if (UseLEXT2) {
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
++    }
++  } else if (UseCountTrailingZerosInstructionMIPS64) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
++      warning("ctz/dctz instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
++  }
++
++  UNSUPPORTED_OPTION(TieredCompilation);
++
++  char buf[256];
++  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
++              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
++              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
++              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
++              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
++              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
++              (supports_dsp()          ?  ", dsp" : ""),
++              (supports_ps()           ?  ", ps" : ""),
++              (supports_3d()           ?  ", 3d" : ""),
++              (supports_mmi()          ?  ", mmi" : ""),
++              (supports_msa1_0()       ?  ", msa1_0" : ""),
++              (supports_msa2_0()       ?  ", msa2_0" : ""),
++              (supports_lsx1()         ?  ", lsx1" : ""),
++              (supports_lsx2()         ?  ", lsx2" : ""),
++              (supports_lasx()         ?  ", lasx" : ""),
++              (supports_lext1()        ?  ", lext1" : ""),
++              (supports_lext2()        ?  ", lext2" : ""),
++              (supports_lext3()        ?  ", lext3" : ""),
++              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
++              (supports_lamo()         ?  ", lamo" : ""),
++              (supports_lpixu()        ?  ", lpixu" : ""),
++              (needs_llsync()          ?  ", llsync" : ""),
++              (needs_tgtsync()         ?  ", tgtsync": ""),
++              (needs_ulsync()          ?  ", ulsync": ""),
++              (supports_mualp()        ?  ", mualp" : ""),
++              UseSyncLevel);
++  _features_string = os::strdup(buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA3Intrinsics) {
++    warning("SHA3 intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
++  }
++
++  if (UseMD5Intrinsics) {
++    warning("MD5 intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++  }
++
++  if (UseCRC32Intrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32Intrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++    }
++  }
++
++  if (UseCRC32CIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32CIntrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++    }
++  }
++
++  if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp
+new file mode 100644
+index 00000000000..30c7b5a934d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp
+@@ -0,0 +1,218 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union Loongson_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t FP      : 1,
++               FPREV   : 3,
++               MMI     : 1,
++               MSA1    : 1,
++               MSA2    : 1,
++               CGP     : 1,
++               WRP     : 1,
++               LSX1    : 1,
++               LSX2    : 1,
++               LASX    : 1,
++               R6FXP   : 1,
++               R6CRCP  : 1,
++               R6FPP   : 1,
++               CNT64   : 1,
++               LSLDR0  : 1,
++               LSPREF  : 1,
++               LSPREFX : 1,
++               LSSYNCI : 1,
++               LSUCA   : 1,
++               LLSYNC  : 1,
++               TGTSYNC : 1,
++               LLEXC   : 1,
++               SCRAND  : 1,
++               MUALP   : 1,
++               KMUALEn : 1,
++               ITLBT   : 1,
++               LSUPERF : 1,
++               SFBP    : 1,
++               CDMAP   : 1,
++                       : 1;
++    } bits;
++  };
++
++  union Loongson_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t LEXT1    : 1,
++               LEXT2    : 1,
++               LEXT3    : 1,
++               LSPW     : 1,
++               LBT1     : 1,
++               LBT2     : 1,
++               LBT3     : 1,
++               LBTMMU   : 1,
++               LPMP     : 1,
++               LPMRev   : 3,
++               LAMO     : 1,
++               LPIXU    : 1,
++               LPIXNU   : 1,
++               LVZP     : 1,
++               LVZRev   : 3,
++               LGFTP    : 1,
++               LGFTRev  : 3,
++               LLFTP    : 1,
++               LLFTRev  : 3,
++               LCSRP    : 1,
++               DISBLKLY : 1,
++                        : 3;
++    } bits;
++  };
++
++protected:
++
++  enum Feature_Flag {
++    CPU_LOONGSON          = (1 << 1),
++    CPU_LOONGSON_GS464    = (1 << 2),
++    CPU_LOONGSON_GS464E   = (1 << 3),
++    CPU_LOONGSON_GS264    = (1 << 4),
++    CPU_MMI               = (1 << 11),
++    CPU_MSA1_0            = (1 << 12),
++    CPU_MSA2_0            = (1 << 13),
++    CPU_CGP               = (1 << 14),
++    CPU_LSX1              = (1 << 15),
++    CPU_LSX2              = (1 << 16),
++    CPU_LASX              = (1 << 17),
++    CPU_LEXT1             = (1 << 18),
++    CPU_LEXT2             = (1 << 19),
++    CPU_LEXT3             = (1 << 20),
++    CPU_LAMO              = (1 << 21),
++    CPU_LPIXU             = (1 << 22),
++    CPU_LLSYNC            = (1 << 23),
++    CPU_TGTSYNC           = (1 << 24),
++    CPU_ULSYNC           = (1 << 25),
++    CPU_MUALP             = (1 << 26),
++
++    //////////////////////add some other feature here//////////////////
++  };
++
++  enum Loongson_Family {
++    L_3A1000    = 0,
++    L_3B1500    = 1,
++    L_3A2000    = 2,
++    L_3B2000    = 3,
++    L_3A3000    = 4,
++    L_3B3000    = 5,
++    L_2K1000    = 6,
++    L_UNKNOWN   = 7
++  };
++
++  struct Loongson_Cpuinfo {
++    Loongson_Family    id;
++    const char* const  match_str;
++  };
++
++  static volatile bool _is_determine_cpucfg_supported_running;
++  static bool _is_cpucfg_instruction_supported;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    uint32_t            cpucfg_info_id0;
++    Loongson_Cpucfg_Id1 cpucfg_info_id1;
++    Loongson_Cpucfg_Id2 cpucfg_info_id2;
++    uint32_t            cpucfg_info_id3;
++    uint32_t            cpucfg_info_id4;
++    uint32_t            cpucfg_info_id5;
++    uint32_t            cpucfg_info_id6;
++    uint32_t            cpucfg_info_id8;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
++
++  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
++
++  static void clean_cpuFeatures()   { _features = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
++  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
++
++  static bool is_loongson()      { return _features & CPU_LOONGSON; }
++  static bool is_gs264()         { return _features & CPU_LOONGSON_GS264; }
++  static bool is_gs464()         { return _features & CPU_LOONGSON_GS464; }
++  static bool is_gs464e()        { return _features & CPU_LOONGSON_GS464E; }
++  static bool supports_dsp()     { return 0; /*not supported yet*/}
++  static bool supports_ps()      { return 0; /*not supported yet*/}
++  static bool supports_3d()      { return 0; /*not supported yet*/}
++  static bool supports_msa1_0()  { return _features & CPU_MSA1_0; }
++  static bool supports_msa2_0()  { return _features & CPU_MSA2_0; }
++  static bool supports_cgp()     { return _features & CPU_CGP; }
++  static bool supports_mmi()     { return _features & CPU_MMI; }
++  static bool supports_lsx1()    { return _features & CPU_LSX1; }
++  static bool supports_lsx2()    { return _features & CPU_LSX2; }
++  static bool supports_lasx()    { return _features & CPU_LASX; }
++  static bool supports_lext1()   { return _features & CPU_LEXT1; }
++  static bool supports_lext2()   { return _features & CPU_LEXT2; }
++  static bool supports_lext3()   { return _features & CPU_LEXT3; }
++  static bool supports_lamo()    { return _features & CPU_LAMO; }
++  static bool supports_lpixu()   { return _features & CPU_LPIXU; }
++  static bool needs_llsync()     { return _features & CPU_LLSYNC; }
++  static bool needs_tgtsync()    { return _features & CPU_TGTSYNC; }
++  static bool needs_ulsync()     { return _features & CPU_ULSYNC; }
++  static bool supports_mualp()   { return _features & CPU_MUALP; }
++
++  //mips has no such instructions, use ll/sc instead
++  static bool supports_compare_and_exchange() { return false; }
++
++  static bool supports_fast_class_init_checks() { return true; }
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp
+new file mode 100644
+index 00000000000..95dbd17f1a8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
++
++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
++  Unimplemented();
++  return VMRegImpl::Bad();
++}
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp
+new file mode 100644
+index 00000000000..8ccc8c513c8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_HPP
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+new file mode 100644
+index 00000000000..12ad7361aa5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+new file mode 100644
+index 00000000000..f373aac45c2
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+@@ -0,0 +1,348 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_mips.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(t1, AT , 0);
++    __ addiu(t1, t1, 1);
++    __ sw(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  //add for compressedoops
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ lw(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ move(AT, vtable_index*vtableEntry::size());
++    __ slt(AT, AT, t2);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ move(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load Method* and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 18 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 18*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: Method*
++  // T9: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we T8,T9 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(T8, AT, 0);
++    __ addiu(T8, T8,1);
++    __ sw(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  Label L_no_such_interface;
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
++    // No dynamic code size variance here, so slop_bytes is not needed.
++    const int base = in_bytes(Klass::vtable_start_offset());
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ daddiu(t2, t1, base);
++    __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(t2, t2, AT);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    __ bind(entry);
++
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, R0, L_no_such_interface);
++    __ delayed()->nop();
++
++    __ bne(AT, resolved_klass_reg, entry);
++    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = in_bytes(Klass::vtable_start_offset());
++  __ daddiu(t2, t1, base);
++  __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(t2, t2, AT);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  __ bind(entry);
++
++  // Check that the entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ beq(AT, R0, L_no_such_interface);
++  __ delayed()->nop();
++
++  __ bne(AT, holder_klass_reg, entry);
++  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  // We found a hit, move offset into T9
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get Method* and entrypoint for compiler
++  const Register method = Rmethod;
++  __ dsll(AT, t2, Address::times_1);
++  __ addu(AT, AT, t1 );
++  start_pc = __ pc();
++  __ set64(t1, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ addu(AT, AT, t1 );
++  __ ld_ptr(method, AT, 0);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ delayed()->nop();
++    __ stop("compiler entrypoint is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: Method*
++  // T0: receiver
++  // T9: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  start_pc = __ pc();
++  __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub());
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ jr(T9);
++  __ delayed()->nop();
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
+index 17fc8e5078e..27e431c2c61 100644
+--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
++++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
+@@ -292,7 +292,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, R
+ // Code emitted by LIR node "LIR_OpZLoadBarrierTest" which in turn is emitted by ZBarrierSetC1::load_barrier.
+ // The actual compare and branch instructions are represented as stand-alone LIR nodes.
+ void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                                         LIR_Opr ref) const {
++                                                         LIR_Opr ref,
++                                                         LIR_Opr res) const {
+   __ block_comment("load_barrier_test (zgc) {");
+ 
+   __ ld(R0, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread);
+diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
+index e2ff1bf53ae..4957e73ae22 100644
+--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
++++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
+@@ -67,7 +67,8 @@ public:
+ 
+ #ifdef COMPILER1
+   void generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                     LIR_Opr ref) const;
++                                     LIR_Opr ref,
++                                     LIR_Opr res) const;
+ 
+   void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+                                      ZLoadBarrierStubC1* stub) const;
+diff --git a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp
+index 3657b16fc1a..a2aab225743 100644
+--- a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp
++++ b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp
+@@ -30,6 +30,8 @@ const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+ const size_t ZPlatformHeapViews        = 3;
+ const size_t ZPlatformCacheLineSize    = DEFAULT_CACHE_LINE_SIZE;
+ 
++const bool ZPlatformLoadBarrierTestResultInRegister = false;
++
+ size_t ZPlatformAddressOffsetBits();
+ size_t ZPlatformAddressMetadataShift();
+ 
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 99dd4c82420..5fefcc00c55 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // no precompiled headers
+ #include "jvm.h"
+ #include "classfile/vmSymbols.hpp"
+@@ -2455,7 +2461,7 @@ void os::print_memory_info(outputStream* st) {
+ // before "flags" so if we find a second "model name", then the
+ // "flags" field is considered missing.
+ static bool print_model_name_and_flags(outputStream* st, char* buf, size_t buflen) {
+-#if defined(IA32) || defined(AMD64)
++#if defined(IA32) || defined(AMD64) || defined(MIPS)
+   // Other platforms have less repetitive cpuinfo files
+   FILE *fp = fopen("/proc/cpuinfo", "r");
+   if (fp) {
+@@ -2545,7 +2551,7 @@ void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) {
+   print_sys_devices_cpu_info(st, buf, buflen);
+ }
+ 
+-#if defined(AMD64) || defined(IA32) || defined(X32)
++#if defined(AMD64) || defined(IA32) || defined(X32) || defined(MIPS)
+ const char* search_string = "model name";
+ #elif defined(M68K)
+ const char* search_string = "CPU";
+diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+new file mode 100644
+index 00000000000..30719a0340b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..beb717b67ff
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+@@ -0,0 +1,269 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd {
++  template<typename D, typename I>
++  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const;
++
++  template<typename D, typename I>
++  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
++    return fetch_and_add(dest, add_value, order) + add_value;
++  }
++};
++
++template<>
++template<typename D, typename I>
++inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value,
++                                               atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(I));
++  STATIC_ASSERT(4 == sizeof(D));
++  D old_value;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "amadd.w %[old], %[add], %[dest] \n\t"
++      : [old] "=&r" (old_value)
++      : [add] "r" (add_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "amadd_db.w %[old], %[add], %[dest] \n\t"
++      : [old] "=&r" (old_value)
++      : [add] "r" (add_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  }
++
++  return old_value;
++}
++
++template<>
++template<typename D, typename I>
++inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value,
++                                               atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(I));
++  STATIC_ASSERT(8 == sizeof(D));
++  D old_value;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "amadd.d %[old], %[add], %[dest] \n\t"
++      : [old] "=&r" (old_value)
++      : [add] "r" (add_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "amadd_db.d %[old], %[add], %[dest] \n\t"
++      : [old] "=&r" (old_value)
++      : [add] "r" (add_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  }
++
++  return old_value;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
++                                             T exchange_value,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T old_value;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "amswap.w %[_old], %[_new], %[dest] \n\t"
++      : [_old] "=&r" (old_value)
++      : [_new] "r" (exchange_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "amswap_db.w %[_old], %[_new], %[dest] \n\t"
++      : [_old] "=&r" (old_value)
++      : [_new] "r" (exchange_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  }
++
++  return old_value;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
++                                             T exchange_value,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T old_value;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "amswap.d %[_old], %[_new], %[dest] \n\t"
++      : [_old] "=&r" (old_value)
++      : [_new] "r" (exchange_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "amswap_db.d %[_old], %[_new], %[dest] \n\t"
++      : [_old] "=&r" (old_value)
++      : [_new] "r" (exchange_value), [dest] "r" (dest)
++      : "memory");
++    break;
++  }
++
++  return old_value;
++}
++
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T prev, temp;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "1: ll.w %[prev], %[dest]     \n\t"
++      "   bne  %[prev], %[_old], 2f \n\t"
++      "   move %[temp], %[_new]     \n\t"
++      "   sc.w %[temp], %[dest]     \n\t"
++      "   beqz %[temp], 1b          \n\t"
++      "2:                           \n\t"
++      : [prev] "=&r" (prev), [temp] "=&r" (temp)
++      : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "1: ll.w %[prev], %[dest]     \n\t"
++      "   bne  %[prev], %[_old], 2f \n\t"
++      "   move %[temp], %[_new]     \n\t"
++      "   sc.w %[temp], %[dest]     \n\t"
++      "   beqz %[temp], 1b          \n\t"
++      "   b    3f                   \n\t"
++      "2: dbar 0x700                 \n\t"
++      "3:                           \n\t"
++      : [prev] "=&r" (prev), [temp] "=&r" (temp)
++      : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest)
++      : "memory");
++    break;
++  }
++
++  return prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T prev, temp;
++
++  switch (order) {
++  case memory_order_relaxed:
++    asm volatile (
++      "1: ll.d %[prev], %[dest]     \n\t"
++      "   bne  %[prev], %[_old], 2f \n\t"
++      "   move %[temp], %[_new]     \n\t"
++      "   sc.d %[temp], %[dest]     \n\t"
++      "   beqz %[temp], 1b          \n\t"
++      "2:                           \n\t"
++      : [prev] "=&r" (prev), [temp] "=&r" (temp)
++      : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest)
++      : "memory");
++    break;
++  default:
++    asm volatile (
++      "1: ll.d %[prev], %[dest]     \n\t"
++      "   bne  %[prev], %[_old], 2f \n\t"
++      "   move %[temp], %[_new]     \n\t"
++      "   sc.d %[temp], %[dest]     \n\t"
++      "   beqz %[temp], 1b          \n\t"
++      "   b    3f                   \n\t"
++      "2: dbar 0x700                 \n\t"
++      "3:                           \n\t"
++      : [prev] "=&r" (prev), [temp] "=&r" (temp)
++      : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest)
++      : "memory");
++    break;
++  }
++
++  return prev;
++}
++
++template<>
++struct Atomic::PlatformOrderedStore<4, RELEASE_X>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_release); }
++};
++
++template<>
++struct Atomic::PlatformOrderedStore<8, RELEASE_X>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_release); }
++};
++
++template<>
++struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_conservative); }
++};
++
++template<>
++struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_conservative); }
++};
++
++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..c9f675baca4
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..826c1fe39ac
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..46d5d5a268b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP
++
++#include <sys/syscall.h>
++
++//
++// Support for building on older Linux systems
++//
++
++#ifndef SYS_memfd_create
++#define SYS_memfd_create     279
++#endif
++#ifndef SYS_fallocate
++#define SYS_fallocate        47
++#endif
++
++#endif // OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..0b5247aa0b6
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        2048);
++
++define_pd_global(intx, CompilerThreadStackSize,  2048);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+new file mode 100644
+index 00000000000..ebd73af0c53
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..6236e741d05
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
++#define inlasm_synci() __asm__ __volatile__ ("ibar 0"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
++inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
++inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
++inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
++
++inline void OrderAccess::acquire() { inlasm_sync(0x14); }
++inline void OrderAccess::release() { inlasm_sync(0x12); }
++inline void OrderAccess::fence()   { inlasm_sync(0x10); }
++inline void OrderAccess::cross_modify_fence_impl() { inlasm_synci(); }
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+new file mode 100644
+index 00000000000..b32ffe9105e
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+@@ -0,0 +1,529 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "signals_posix.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 3
++#define REG_FP 22
++
++NOINLINE address os::current_stack_pointer() {
++  register void *sp __asm__ ("$r3");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__pc;
++}
++
++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
++
++address os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  address  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = os::Posix::ucontext_get_pc(uc);
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    epc = NULL;
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  address epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  if (!is_readable_pointer(epc)) {
++    // Try to recover from calling into bad memory
++    // Assume new frame has not been set up, the same as
++    // compiled frame stack bang
++    return fetch_compiled_frame_from_context(ucVoid);
++  }
++  return frame(sp, fp, epc);
++}
++
++frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
++  // In compiled code, the stack banging is performed before RA
++  // has been saved in the frame.  RA is live, and SP and FP
++  // belong to the caller.
++  intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++  intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++  address pc = (address)(uc->uc_mcontext.__gregs[1]);
++  return frame(sp, fp, pc);
++}
++
++// By default, gcc always save frame pointer on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++frame os::current_frame() {
++  intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::link_offset];
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
++                                             ucontext_t* uc, JavaThread* thread) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Posix::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Posix::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->is_in_full_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
++          return true; // continue
++        }
++      }
++    } // sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
++        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
++          address next_pc = pc + NativeInstruction::nop_instruction_size;
++          if (is_unsafe_arraycopy) {
++            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++          }
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to LA code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++                 MacroAssembler::uses_implicit_null_check(info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
++        // Pull a pointer to the error message out of the instruction
++        // stream.
++        const uint64_t *detail_msg_ptr
++          = (uint64_t*)(pc + 4/*NativeInstruction::instruction_size*/);
++        const char *detail_msg = (const char *)*detail_msg_ptr;
++        const char *msg = "stop";
++        if (TraceTraps) {
++          tty->print_cr("trap: %s: (SIGILL)", msg);
++        }
++
++        // End life with a fatal error, message and detail message and the context.
++        // Note: no need to do any post-processing here (e.g. signal chaining)
++        va_list va_dummy;
++        VMError::report_and_die(thread, uc, nullptr, 0, msg, detail_msg, va_dummy);
++        va_end(va_dummy);
++
++        ShouldNotReachHere();
++      }
++    } else if ((thread->thread_state() == _thread_in_vm ||
++                 thread->thread_state() == _thread_in_native) &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = pc + NativeInstruction::nop_instruction_size;
++      if (UnsafeCopyMemory::contains_pc(pc)) {
++        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++      }
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Posix::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  return false;
++}
++
++void os::Linux::init_thread_fpu_state(void) {
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  return 0; // mute compiler
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++
++  st->print_cr("Registers:");
++  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++  st->cr();
++}
++
++void os::print_tos_pc(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t* uc = (const ucontext_t*)context;
++
++  address sp = (address)os::Linux::ucontext_get_sp(uc);
++  print_tos(st, sp);
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::fetch_frame_from_context(uc).pc();
++  print_instructions(st, pc);
++  st->cr();
++}
++
++void os::setup_fpu() {
++  // no use for LA
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // LA does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..fa02f8ba2f9
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+new file mode 100644
+index 00000000000..cf3a596387c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++// According to previous and present SPECjbb2015 score,
++// comment prefetch is better than if (interval >= 0) prefetch branch.
++// So choose comment prefetch as the base line.
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  0, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++// Ditto
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  8, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S b/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S
+new file mode 100644
+index 00000000000..fdc6da358e5
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2022 SAP SE. All rights reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++    .globl SafeFetchN_impl
++    .globl _SafeFetchN_fault
++    .globl _SafeFetchN_continuation
++    .globl SafeFetch32_impl
++    .globl _SafeFetch32_fault
++    .globl _SafeFetch32_continuation
++
++    # Support for int SafeFetch32(int* address, int defaultval);
++    #
++    #  a0 : address
++    #  a1 : defaultval
++SafeFetch32_impl:
++_SafeFetch32_fault:
++    ld.w  $r4, $r4, 0
++    jr    $r1
++_SafeFetch32_continuation:
++    or    $r4, $r5, $r0
++    jr    $r1
++
++    # Support for intptr_t SafeFetchN(intptr_t* address, intptr_t defaultval);
++    #
++    #  a0 : address
++    #  a1 : defaultval
++SafeFetchN_impl:
++_SafeFetchN_fault:
++    ld.d  $r4, $r4, 0
++    jr    $r1
++_SafeFetchN_continuation:
++    or    $r4, $r5, $r0
++    jr    $r1
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+new file mode 100644
+index 00000000000..9204302bca8
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/compileBroker.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (has_last_Java_frame() && frame_anchor()->walkable()) {
++    *fr_addr = pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
++    if (addr == NULL || ret_sp == NULL) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr);
++    if (!ret_frame.safe_for_sender(this)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr);
++      if (!ret_frame2.safe_for_sender(this)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..82fc6fb659f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+new file mode 100644
+index 00000000000..a39cb79bb1e
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp
+new file mode 100644
+index 00000000000..3711a7036a1
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "runtime/os.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/vm_version.hpp"
++
++#include <asm/hwcap.h>
++#include <sys/auxv.h>
++
++#ifndef HWCAP_LOONGARCH_LAM
++#define HWCAP_LOONGARCH_LAM       (1 << 1)
++#endif
++
++#ifndef HWCAP_LOONGARCH_UAL
++#define HWCAP_LOONGARCH_UAL       (1 << 2)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LSX
++#define HWCAP_LOONGARCH_LSX       (1 << 4)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LASX
++#define HWCAP_LOONGARCH_LASX      (1 << 5)
++#endif
++
++#ifndef HWCAP_LOONGARCH_COMPLEX
++#define HWCAP_LOONGARCH_COMPLEX   (1 << 7)
++#endif
++
++#ifndef HWCAP_LOONGARCH_CRYPTO
++#define HWCAP_LOONGARCH_CRYPTO    (1 << 8)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_X86
++#define HWCAP_LOONGARCH_LBT_X86   (1 << 10)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_ARM
++#define HWCAP_LOONGARCH_LBT_ARM   (1 << 11)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_MIPS
++#define HWCAP_LOONGARCH_LBT_MIPS  (1 << 12)
++#endif
++
++void VM_Version::get_os_cpu_info() {
++
++  uint64_t auxv = getauxval(AT_HWCAP);
++
++  static_assert(CPU_LAM == HWCAP_LOONGARCH_LAM, "Flag CPU_LAM must follow Linux HWCAP");
++  static_assert(CPU_UAL == HWCAP_LOONGARCH_UAL, "Flag CPU_UAL must follow Linux HWCAP");
++  static_assert(CPU_LSX == HWCAP_LOONGARCH_LSX, "Flag CPU_LSX must follow Linux HWCAP");
++  static_assert(CPU_LASX == HWCAP_LOONGARCH_LASX, "Flag CPU_LASX must follow Linux HWCAP");
++  static_assert(CPU_COMPLEX == HWCAP_LOONGARCH_COMPLEX, "Flag CPU_COMPLEX must follow Linux HWCAP");
++  static_assert(CPU_CRYPTO == HWCAP_LOONGARCH_CRYPTO, "Flag CPU_CRYPTO must follow Linux HWCAP");
++  static_assert(CPU_LBT_X86 == HWCAP_LOONGARCH_LBT_X86, "Flag CPU_LBT_X86 must follow Linux HWCAP");
++  static_assert(CPU_LBT_ARM == HWCAP_LOONGARCH_LBT_ARM, "Flag CPU_LBT_ARM must follow Linux HWCAP");
++  static_assert(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS, "Flag CPU_LBT_MIPS must follow Linux HWCAP");
++
++  _features = auxv & (
++      HWCAP_LOONGARCH_LAM     |
++      HWCAP_LOONGARCH_UAL     |
++      HWCAP_LOONGARCH_LSX     |
++      HWCAP_LOONGARCH_LASX    |
++      HWCAP_LOONGARCH_COMPLEX |
++      HWCAP_LOONGARCH_CRYPTO  |
++      HWCAP_LOONGARCH_LBT_X86 |
++      HWCAP_LOONGARCH_LBT_ARM |
++      HWCAP_LOONGARCH_LBT_MIPS);
++}
+diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+new file mode 100644
+index 00000000000..30719a0340b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+new file mode 100644
+index 00000000000..c82e3ce1ecf
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+@@ -0,0 +1,194 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd {
++  template<typename D, typename I>
++  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++
++  template<typename D, typename I>
++  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
++    return add_and_fetch(dest, add_value, order) - add_value;
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
++                                             T exchange_value,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   ll    %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc    %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest,
++                                             T exchange_value,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   lld   %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   scd   %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  ll     %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  sc  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync        \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld   %[__prev], %[__dest]    \n\t"
++      "  bne   %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd   %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
+new file mode 100644
+index 00000000000..5b5cd10aa55
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
+new file mode 100644
+index 00000000000..3fd6ef7b36c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+new file mode 100644
+index 00000000000..f1599ac5f17
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++#ifdef MIPS64
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++#else
++// ThreadStackSize 320 allows a couple of test cases to run while
++// keeping the number of threads that can be created high.  System
++// default ThreadStackSize appears to be 512 which is too big.
++define_pd_global(intx, ThreadStackSize,          320);
++define_pd_global(intx, VMThreadStackSize,        512);
++#endif // MIPS64
++
++define_pd_global(intx, CompilerThreadStackSize,  0);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s
+new file mode 100644
+index 00000000000..36c8d810c3c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
+new file mode 100644
+index 00000000000..a92bf43bdbb
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("sync"   : : : "memory");
++#define inlasm_synci() __asm__ __volatile__ ("synci 0($0)"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(); }
++inline void OrderAccess::storestore() { inlasm_sync(); }
++inline void OrderAccess::loadstore()  { inlasm_sync(); }
++inline void OrderAccess::storeload()  { inlasm_sync(); }
++
++inline void OrderAccess::acquire() { inlasm_sync(); }
++inline void OrderAccess::release() { inlasm_sync(); }
++inline void OrderAccess::fence()   { inlasm_sync(); }
++inline void OrderAccess::cross_modify_fence_impl() { inlasm_synci(); }
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+new file mode 100644
+index 00000000000..ff1af7beb68
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+@@ -0,0 +1,817 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "signals_posix.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 29
++#define REG_FP 30
++
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$29");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.pc;
++}
++
++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
++}
++
++address os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  address  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = os::Posix::ucontext_get_pc(uc);
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    epc = NULL;
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  address epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc);
++}
++
++frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
++  // In compiled code, the stack banging is performed before RA
++  // has been saved in the frame.  RA is live, and SP and FP
++  // belong to the caller.
++  intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++  intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++  address pc = (address)(uc->uc_mcontext.gregs[31]);
++  return frame(sp, fp, pc);
++}
++
++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  int *pc;
++  intptr_t sp;
++  int *pc_limit = (int*)(void*)&os::get_previous_fp;
++  int insn;
++
++  {
++    l_pc:;
++    pc = (int*)&&l_pc;
++    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
++  }
++
++  do {
++    insn = *pc;
++    switch(bitfield(insn, 16, 16)) {
++      case 0x27bd:  /* addiu $sp,$sp,-i */
++      case 0x67bd:  /* daddiu $sp,$sp,-i */
++        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
++        sp -= (short)bitfield(insn, 0, 16);
++        return (intptr_t*)sp;
++    }
++    --pc;
++  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
++
++  ShouldNotReachHere();
++  return NULL; // mute compiler
++}
++
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++//x86 add 2 new assemble function here!
++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
++                                             ucontext_t* uc, JavaThread* thread) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Posix::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Posix::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->is_in_full_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
++          return true; // continue
++        }
++      } //addr <
++    } //sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
++        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
++          address next_pc = pc + NativeInstruction::nop_instruction_size;
++          if (is_unsafe_arraycopy) {
++            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++          }
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to mips code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
++             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
++             */
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++                 MacroAssembler::uses_implicit_null_check(info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
++        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
++        //The method is to trigger kernel emulation of float emulation.
++        int inst = *(int*)pc;
++        int ops = (inst >> 26) & 0x3f;
++        int ops_fmt = (inst >> 21) & 0x1f;
++        int op = inst & 0x3f;
++        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
++          int ft, fs, fd;
++          ft = (inst >> 16) & 0x1f;
++          fs = (inst >> 11) & 0x1f;
++          fd = (inst >> 6) & 0x1f;
++          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
++          double ft_value, fs_value, fd_value;
++          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++          __asm__ __volatile__ (
++            "cvt.s.pl %0, %4\n\t"
++            "cvt.s.pu %1, %4\n\t"
++            "cvt.s.pl %2, %5\n\t"
++            "cvt.s.pu %3, %5\n\t"
++            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
++            : "f" (fs_value), "f" (ft_value)
++          );
++
++          switch (op) {
++            case Assembler::fadd_op:
++              __asm__ __volatile__ (
++                "add.s  %1, %3, %5\n\t"
++                "add.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fsub_op:
++              //fd = fs - ft
++              __asm__ __volatile__ (
++                "sub.s  %1, %3, %5\n\t"
++                "sub.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fmul_op:
++              __asm__ __volatile__ (
++                "mul.s  %1, %3, %5\n\t"
++                "mul.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
++          }
++        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
++          // madd.ps is not used, the code below were not tested
++          int fr, ft, fs, fd;
++          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
++          double fr_value, ft_value, fs_value, fd_value;
++          switch (op) {
++            case Assembler::madd_ps_op:
++              // fd = (fs * ft) + fr
++              fr = (inst >> 21) & 0x1f;
++              ft = (inst >> 16) & 0x1f;
++              fs = (inst >> 11) & 0x1f;
++              fd = (inst >> 6) & 0x1f;
++              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
++              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++              __asm__ __volatile__ (
++                "cvt.s.pu %3, %9\n\t"
++                "cvt.s.pl %4, %9\n\t"
++                "cvt.s.pu %5, %10\n\t"
++                "cvt.s.pl %6, %10\n\t"
++                "cvt.s.pu %7, %11\n\t"
++                "cvt.s.pl %8, %11\n\t"
++                "madd.s %1, %3, %5, %7\n\t"
++                "madd.s %2, %4, %6, %8\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
++                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
++          }
++        }
++      } //SIGILL
++    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
++      // thread->thread_state() != _thread_in_Java
++      // SIGILL must be caused by VM_Version::determine_features().
++      VM_Version::set_supports_cpucfg(false);
++      stub = pc + 4;  // continue with next instruction.
++    } else if ((thread->thread_state() == _thread_in_vm ||
++                 thread->thread_state() == _thread_in_native) &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = pc + NativeInstruction::nop_instruction_size;
++      if (UnsafeCopyMemory::contains_pc(pc)) {
++        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++      }
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++  }
++
++  // Execution protection violation
++  //
++  // This should be kept as the last step in the triage.  We don't
++  // have a dedicated trap number for a no-execute fault, so be
++  // conservative and allow other handlers the first shot.
++  //
++  // Note: We don't test that info->si_code == SEGV_ACCERR here.
++  // this si_code is so generic that it is almost meaningless; and
++  // the si_code for this condition may change in the future.
++  // Furthermore, a false-positive should be harmless.
++  if (UnguardOnExecutionViolation > 0 &&
++      //(sig == SIGSEGV || sig == SIGBUS) &&
++      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
++    (sig == SIGSEGV || sig == SIGBUS
++#ifdef OPT_RANGECHECK
++     || sig == SIGSYS
++#endif
++    ) &&
++      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
++      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("execution protection violation\n");
++#endif
++
++    int page_size = os::vm_page_size();
++    address addr = (address) info->si_addr;
++    address pc = os::Posix::ucontext_get_pc(uc);
++    // Make sure the pc and the faulting address are sane.
++    //
++    // If an instruction spans a page boundary, and the page containing
++    // the beginning of the instruction is executable but the following
++    // page is not, the pc and the faulting address might be slightly
++    // different - we still want to unguard the 2nd page in this case.
++    //
++    // 15 bytes seems to be a (very) safe value for max instruction size.
++    bool pc_is_near_addr =
++      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
++Untested("Unimplemented yet");
++    bool instr_spans_page_boundary =
++/*
++      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++*/
++      (align_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++
++    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
++      static volatile address last_addr =
++        (address) os::non_memory_address_word();
++
++      // In conservative mode, don't unguard unless the address is in the VM
++      if (addr != last_addr &&
++          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
++
++        // Set memory to RWX and retry
++Untested("Unimplemented yet");
++/*
++        address page_start =
++          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
++*/
++        address page_start = align_down(addr, page_size);
++        bool res = os::protect_memory((char*) page_start, page_size,
++                                      os::MEM_PROT_RWX);
++
++        if (PrintMiscellaneous && Verbose) {
++          char buf[256];
++          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
++                       "at " INTPTR_FORMAT
++                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
++                       page_start, (res ? "success" : "failed"), errno);
++          tty->print_raw_cr(buf);
++        }
++        stub = pc;
++
++        // Set last_addr so if we fault again at the same address, we don't end
++        // up in an endless loop.
++        //
++        // There are two potential complications here.  Two threads trapping at
++        // the same address at the same time could cause one of the threads to
++        // think it already unguarded, and abort the VM.  Likely very rare.
++        //
++        // The other race involves two threads alternately trapping at
++        // different addresses and failing to unguard the page, resulting in
++        // an endless loop.  This condition is probably even more unlikely than
++        // the first.
++        //
++        // Although both cases could be avoided by using locks or thread local
++        // last_addr, these solutions are unnecessary complication: this
++        // handler is a best-effort safety net, not a complete solution.  It is
++        // disabled by default and should only be used as a workaround in case
++        // we missed any no-execute-unsafe VM code.
++
++        last_addr = addr;
++      }
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Posix::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  return false;
++}
++
++// FCSR:...|24| 23 |22|21|...
++//      ...|FS|FCC0|FO|FN|...
++void os::Linux::init_thread_fpu_state(void) {
++  if (SetFSFOFN == 999)
++    return;
++  int fs = (SetFSFOFN / 100)? 1:0;
++  int fo = ((SetFSFOFN % 100) / 10)? 1:0;
++  int fn = (SetFSFOFN % 10)? 1:0;
++  int mask = fs << 24 | fo << 22 | fn << 21;
++
++  int fcsr = get_fpu_control_word();
++  fcsr = fcsr | mask;
++  set_fpu_control_word(fcsr);
++  /*
++  if (fcsr != get_fpu_control_word())
++    tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word());
++  */
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  int fcsr;
++  __asm__ __volatile__ (
++      ".set noat;"
++      "daddiu  %0, $0, 0;"
++      "cfc1 %0, $31;"
++      : "=r" (fcsr)
++      );
++  return fcsr;
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++  __asm__ __volatile__ (
++      ".set noat;"
++      "ctc1 %0, $31;"
++      :
++      : "r" (fpu_control)
++      );
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++//size_t os::Linux::min_stack_allowed  = 96 * K;
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++
++/*
++// Test if pthread library can support variable thread stack size. LinuxThreads
++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
++// in floating stack mode and NPTL support variable stack size.
++bool os::Linux::supports_variable_stack_size() {
++  if (os::Linux::is_NPTL()) {
++     // NPTL, yes
++     return true;
++
++  } else {
++    // Note: We can't control default stack size when creating a thread.
++    // If we use non-default stack size (pthread_attr_setstacksize), both
++    // floating stack and non-floating stack LinuxThreads will return the
++    // same value. This makes it impossible to implement this function by
++    // detecting thread stack size directly.
++    //
++    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
++    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
++    // %gs (either as LDT selector or GDT selector, depending on kernel)
++    // to access thread specific data.
++    //
++    // Note that %gs is a reserved glibc register since early 2001, so
++    // applications are not allowed to change its value (Ulrich Drepper from
++    // Redhat confirmed that all known offenders have been modified to use
++    // either %fs or TSD). In the worst case scenario, when VM is embedded in
++    // a native application that plays with %gs, we might see non-zero %gs
++    // even LinuxThreads is running in fixed stack mode. As the result, we'll
++    // return true and skip _thread_safety_check(), so we may not be able to
++    // detect stack-heap collisions. But otherwise it's harmless.
++    //
++    return false;
++  }
++}
++*/
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++
++  st->print_cr("Registers:");
++  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++  st->cr();
++}
++
++void os::print_tos_pc(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t* uc = (const ucontext_t*)context;
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Posix::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // MIPS does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+new file mode 100644
+index 00000000000..c07d08156f2
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+new file mode 100644
+index 00000000000..93490345f0b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++        // 'pref' is implemented as NOP in Loongson 3A
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  0, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  1, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S b/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S
+new file mode 100644
+index 00000000000..fc6ee6eca65
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 2022 SAP SE. All rights reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++    .globl SafeFetchN_impl
++    .globl _SafeFetchN_fault
++    .globl _SafeFetchN_continuation
++    .globl SafeFetch32_impl
++    .globl _SafeFetch32_fault
++    .globl _SafeFetch32_continuation
++
++    # Support for int SafeFetch32(int* address, int defaultval);
++    #
++    #  a0 : address
++    #  a1 : defaultval
++SafeFetch32_impl:
++_SafeFetch32_fault:
++    lw    $2, 0($4)
++    j     $31
++    nop
++_SafeFetch32_continuation:
++    or    $2, $5, $0
++    j     $31
++    nop
++
++    # Support for intptr_t SafeFetchN(intptr_t* address, intptr_t defaultval);
++    #
++    #  a0 : address
++    #  a1 : defaultval
++SafeFetchN_impl:
++_SafeFetchN_fault:
++    ld    $2, 0($4)
++    j     $31
++    nop
++_SafeFetchN_continuation:
++    or    $2, $5, $0
++    j     $31
++    nop
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+new file mode 100644
+index 00000000000..4372eb41e9c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+@@ -0,0 +1,108 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/compileBroker.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
++    if (addr == NULL || ret_sp == NULL) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr);
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr);
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+new file mode 100644
+index 00000000000..c38f6950fd0
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
+new file mode 100644
+index 00000000000..b7454bf045a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+new file mode 100644
+index 00000000000..93e4bea04c6
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
+diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
+index 0012152d48d..e3660cab271 100644
+--- a/src/hotspot/share/asm/codeBuffer.cpp
++++ b/src/hotspot/share/asm/codeBuffer.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023. These
++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "code/oopRecorder.inline.hpp"
+@@ -330,6 +336,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
+     assert(rtype == relocInfo::none              ||
+            rtype == relocInfo::runtime_call_type ||
+            rtype == relocInfo::internal_word_type||
++           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
+            rtype == relocInfo::section_word_type ||
+            rtype == relocInfo::external_word_type,
+            "code needs relocation information");
+diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
+index de173c64af1..df93c01d893 100644
+--- a/src/hotspot/share/c1/c1_Compiler.cpp
++++ b/src/hotspot/share/c1/c1_Compiler.cpp
+@@ -43,6 +43,12 @@
+ #include "utilities/bitMap.inline.hpp"
+ #include "utilities/macros.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ Compiler::Compiler() : AbstractCompiler(compiler_c1) {
+ }
+@@ -212,7 +218,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
+   case vmIntrinsics::_updateCRC32:
+   case vmIntrinsics::_updateBytesCRC32:
+   case vmIntrinsics::_updateByteBufferCRC32:
+-#if defined(S390) || defined(PPC64) || defined(AARCH64)
++#if defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64)
+   case vmIntrinsics::_updateBytesCRC32C:
+   case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ #endif
+diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
+index 308f3a09c15..53a68cdb2fd 100644
+--- a/src/hotspot/share/c1/c1_LIR.cpp
++++ b/src/hotspot/share/c1/c1_LIR.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_CodeStubs.hpp"
+ #include "c1/c1_InstructionPrinter.hpp"
+@@ -190,6 +196,8 @@ void LIR_Op2::verify() const {
+     case lir_cmove:
+ #ifdef RISCV
+       assert(false, "lir_cmove is LIR_Op4 on RISCV");
++#elif defined(LOONGARCH)
++      assert(false, "lir_cmove is LIR_Op4 on LoongArch");
+ #endif
+     case lir_xchg:
+       break;
+@@ -241,7 +249,7 @@ void LIR_Op2::verify() const {
+ 
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block)
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+ #else
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -254,7 +262,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block)
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) :
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+ #else
+   LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -267,7 +275,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) :
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock)
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+ #else
+   : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -512,6 +520,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(opConvert->_info == NULL, "must be");
+       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
+       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
++      if (opConvert->_tmp->is_valid())       do_temp(opConvert->_tmp);
+ #ifdef PPC32
+       if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
+       if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
+@@ -528,7 +537,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(op->as_OpBranch() != NULL, "must be");
+       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+       assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
+              opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
+              opBranch->_tmp5->is_illegal(), "not used");
+@@ -625,7 +634,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+     {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+       assert(op->as_Op4() != NULL, "must be");
+       LIR_Op4* op4 = (LIR_Op4*)op;
+ 
+@@ -1095,7 +1104,7 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+   masm->emit_op3(this);
+ }
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ void LIR_Op4::emit_code(LIR_Assembler* masm) {
+   masm->emit_op4(this);
+ }
+@@ -1141,7 +1150,7 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
+   , _file(NULL)
+   , _line(0)
+ #endif
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   , _cmp_opr1(LIR_OprFact::illegalOpr)
+   , _cmp_opr2(LIR_OprFact::illegalOpr)
+ #endif
+@@ -1162,7 +1171,7 @@ void LIR_List::set_file_and_line(const char * file, int line) {
+ }
+ #endif
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ void LIR_List::set_cmp_oprs(LIR_Op* op) {
+   switch (op->code()) {
+     case lir_cmp:
+@@ -1185,7 +1194,7 @@ void LIR_List::set_cmp_oprs(LIR_Op* op) {
+       break;
+ #if INCLUDE_ZGC
+     case lir_zloadbarrier_test:
+-      _cmp_opr1 = FrameMap::as_opr(t1);
++      _cmp_opr1 = FrameMap::as_opr(RISCV_ONLY(t1) LOONGARCH64_ONLY(SCR1));
+       _cmp_opr2 = LIR_OprFact::intConst(0);
+       break;
+ #endif
+@@ -1924,7 +1933,7 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
+ // LIR_OpBranch
+ void LIR_OpBranch::print_instr(outputStream* out) const {
+   print_condition(out, cond());             out->print(" ");
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   in_opr1()->print(out); out->print(" ");
+   in_opr2()->print(out); out->print(" ");
+ #endif
+@@ -1963,6 +1972,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
+   print_bytecode(out, bytecode());
+   in_opr()->print(out);                  out->print(" ");
+   result_opr()->print(out);              out->print(" ");
++  if(tmp()->is_valid()) {
++    tmp()->print(out);                   out->print(" ");
++  }
+ #ifdef PPC32
+   if(tmp1()->is_valid()) {
+     tmp1()->print(out); out->print(" ");
+@@ -2014,7 +2026,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+ 
+ // LIR_Op2
+ void LIR_Op2::print_instr(outputStream* out) const {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
+ #else
+   if (code() == lir_cmove || code() == lir_cmp) {
+@@ -2069,7 +2081,7 @@ void LIR_Op3::print_instr(outputStream* out) const {
+   result_opr()->print(out);
+ }
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ // LIR_Op4
+ void LIR_Op4::print_instr(outputStream* out) const {
+   print_condition(out, condition()); out->print(" ");
+diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
+index 717404e9726..0fffd4aabfc 100644
+--- a/src/hotspot/share/c1/c1_LIR.hpp
++++ b/src/hotspot/share/c1/c1_LIR.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_C1_C1_LIR_HPP
+ #define SHARE_C1_C1_LIR_HPP
+ 
+@@ -869,7 +875,7 @@ class    LIR_Op2;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ class    LIR_Op4;
+ #endif
+ class    LIR_OpCall;
+@@ -917,7 +923,7 @@ enum LIR_Code {
+       , lir_null_check
+       , lir_return
+       , lir_leal
+-#ifndef RISCV
++#if !defined(RISCV) && !defined(LOONGARCH)
+       , lir_branch
+       , lir_cond_float_branch
+ #endif
+@@ -931,7 +937,7 @@ enum LIR_Code {
+       , lir_load_klass
+   , end_op1
+   , begin_op2
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+       , lir_branch
+       , lir_cond_float_branch
+ #endif
+@@ -939,7 +945,7 @@ enum LIR_Code {
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
+-#ifndef RISCV
++#if !defined(RISCV) && !defined(LOONGARCH)
+       , lir_cmove
+ #endif
+       , lir_add
+@@ -969,7 +975,7 @@ enum LIR_Code {
+       , lir_fmad
+       , lir_fmaf
+   , end_op3
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   , begin_op4
+       , lir_cmove
+   , end_op4
+@@ -1010,7 +1016,7 @@ enum LIR_Code {
+   , begin_opAssert
+     , lir_assert
+   , end_opAssert
+-#if defined(RISCV) && defined(INCLUDE_ZGC)
++#if (defined(RISCV) || defined(LOONGARCH)) && defined(INCLUDE_ZGC)
+   , begin_opZLoadBarrierTest
+     , lir_zloadbarrier_test
+   , end_opZLoadBarrierTest
+@@ -1151,7 +1157,7 @@ class LIR_Op: public CompilationResourceObj {
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   virtual LIR_Op4* as_Op4() { return NULL; }
+ #endif
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+@@ -1447,15 +1453,18 @@ class LIR_OpConvert: public LIR_Op1 {
+  private:
+    Bytecodes::Code _bytecode;
+    ConversionStub* _stub;
++   LIR_Opr _tmp;
+ 
+  public:
+-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub)
++   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp)
+      : LIR_Op1(lir_convert, opr, result)
+      , _bytecode(code)
+-     , _stub(stub)                               {}
++     , _stub(stub)
++     , _tmp(tmp)                                 {}
+ 
+   Bytecodes::Code bytecode() const               { return _bytecode; }
+   ConversionStub* stub() const                   { return _stub; }
++  LIR_Opr tmp() const                            { return _tmp; }
+ 
+   virtual void emit_code(LIR_Assembler* masm);
+   virtual LIR_OpConvert* as_OpConvert() { return this; }
+@@ -1610,7 +1619,11 @@ class LIR_Op2: public LIR_Op {
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr)
+     , _condition(condition) {
+-    assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check");
++    assert(code == lir_cmp || code == lir_assert
++#if defined(RISCV) || defined(LOONGARCH)
++    || code == lir_branch || code == lir_cond_float_branch
++#endif
++    , "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1642,7 +1655,11 @@ class LIR_Op2: public LIR_Op {
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr)
+     , _condition(lir_cond_unknown) {
+-    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp &&
++#if defined(RISCV) || defined(LOONGARCH)
++    code != lir_branch && code != lir_cond_float_branch &&
++#endif
++    is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1658,7 +1675,11 @@ class LIR_Op2: public LIR_Op {
+     , _tmp4(tmp4)
+     , _tmp5(tmp5)
+     , _condition(lir_cond_unknown) {
+-    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp &&
++#if defined(RISCV) || defined(LOONGARCH)
++    code != lir_branch && code != lir_cond_float_branch &&
++#endif
++    is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1670,14 +1691,14 @@ class LIR_Op2: public LIR_Op {
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+     assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
+ #else
+     assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
+ #endif
+   }
+   void set_condition(LIR_Condition condition) {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+     assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
+ #else
+     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
+@@ -1695,7 +1716,7 @@ class LIR_Op2: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ class LIR_OpBranch: public LIR_Op2 {
+ #else
+ class LIR_OpBranch: public LIR_Op {
+@@ -1703,7 +1724,7 @@ class LIR_OpBranch: public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+  private:
+-#ifndef RISCV
++#if !defined(RISCV) && !defined(LOONGARCH)
+   LIR_Condition _cond;
+ #endif
+   Label*        _label;
+@@ -1713,7 +1734,7 @@ class LIR_OpBranch: public LIR_Op {
+ 
+  public:
+   LIR_OpBranch(LIR_Condition cond, Label* lbl)
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+     : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+ #else
+     : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+@@ -1730,7 +1751,7 @@ class LIR_OpBranch: public LIR_Op {
+   // for unordered comparisons
+   LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock);
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   LIR_Condition cond()        const              { return condition();  }
+   void set_cond(LIR_Condition cond)              { set_condition(cond); }
+ #else
+@@ -1814,7 +1835,7 @@ class LIR_Op3: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ class LIR_Op4: public LIR_Op {
+   friend class LIR_OpVisitState;
+  protected:
+@@ -2112,7 +2133,7 @@ class LIR_List: public CompilationResourceObj {
+   const char *  _file;
+   int           _line;
+ #endif
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   LIR_Opr       _cmp_opr1;
+   LIR_Opr       _cmp_opr2;
+ #endif
+@@ -2128,7 +2149,7 @@ class LIR_List: public CompilationResourceObj {
+     }
+ #endif // PRODUCT
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+     set_cmp_oprs(op);
+     // lir_cmp set cmp oprs only on riscv
+     if (op->code() == lir_cmp) return;
+@@ -2150,7 +2171,7 @@ class LIR_List: public CompilationResourceObj {
+   void set_file_and_line(const char * file, int line);
+ #endif
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   void set_cmp_oprs(LIR_Op* op);
+ #endif
+ 
+@@ -2246,7 +2267,9 @@ class LIR_List: public CompilationResourceObj {
+   void safepoint(LIR_Opr tmp, CodeEmitInfo* info)  { append(new LIR_Op1(lir_safepoint, tmp, info)); }
+   void return_op(LIR_Opr result)                   { append(new LIR_OpReturn(result)); }
+ 
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
++  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) {
++    append(new LIR_OpConvert(code, left, dst, stub, tmp));
++  }
+ 
+   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
+   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
+@@ -2273,7 +2296,7 @@ class LIR_List: public CompilationResourceObj {
+   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
+              LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
+     append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+index 989a6f8ad25..e288de2ab8e 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/assembler.inline.hpp"
+ #include "c1/c1_Compilation.hpp"
+@@ -691,7 +697,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
+       break;
+ 
+-#ifndef RISCV
++#if !defined(RISCV) && !defined(LOONGARCH)
+     case lir_cmove:
+       cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
+       break;
+@@ -758,7 +764,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+   }
+ }
+ 
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+ void LIR_Assembler::emit_op4(LIR_Op4* op) {
+   switch(op->code()) {
+     case lir_cmove:
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+index c82baa15fe7..84c34db4985 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_C1_C1_LIRASSEMBLER_HPP
+ #define SHARE_C1_C1_LIRASSEMBLER_HPP
+ 
+@@ -186,7 +192,7 @@ class LIR_Assembler: public CompilationResourceObj {
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   void emit_op4(LIR_Op4* op);
+ #endif
+   void emit_opBranch(LIR_OpBranch* op);
+@@ -222,7 +228,7 @@ class LIR_Assembler: public CompilationResourceObj {
+   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
+              LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
+ #else
+diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
+index d3d38d11a90..6947406b2e7 100644
+--- a/src/hotspot/share/c1/c1_LinearScan.cpp
++++ b/src/hotspot/share/c1/c1_LinearScan.cpp
+@@ -35,6 +35,12 @@
+ #include "runtime/timerTrace.hpp"
+ #include "utilities/bitMap.inline.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef PRODUCT
+ 
+   static LinearScanStatistic _stat_before_alloc;
+@@ -1240,7 +1246,7 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+       break;
+     }
+     case lir_cmove: {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+       assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
+       LIR_Op4* cmove = (LIR_Op4*)op;
+ #else
+@@ -3151,7 +3157,7 @@ void LinearScan::do_linear_scan() {
+     }
+   }
+ 
+-#ifndef RISCV
++#if !defined(RISCV) && !defined(LOONGARCH)
+   // Disable these optimizations on riscv temporarily, because it does not
+   // work when the comparison operands are bound to branches or cmoves.
+   { TIME_LINEAR_SCAN(timer_optimize_lir);
+@@ -6385,7 +6391,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+               // There might be a cmove inserted for profiling which depends on the same
+               // compare. If we change the condition of the respective compare, we have
+               // to take care of this cmove as well.
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+               LIR_Op4* prev_cmove = NULL;
+ #else
+               LIR_Op2* prev_cmove = NULL;
+@@ -6395,7 +6401,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+                 prev_op = instructions->at(j);
+                 // check for the cmove
+                 if (prev_op->code() == lir_cmove) {
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+                   assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
+                   prev_cmove = (LIR_Op4*)prev_op;
+ #else
+diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
+index 51051170794..a6c40704927 100644
+--- a/src/hotspot/share/code/nmethod.cpp
++++ b/src/hotspot/share/code/nmethod.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "asm/assembler.inline.hpp"
+@@ -2540,7 +2546,8 @@ void nmethod::verify_scopes() {
+         //verify_interrupt_point(iter.addr());
+         break;
+       case relocInfo::runtime_call_type:
+-      case relocInfo::runtime_call_w_cp_type: {
++      NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++      {
+         address destination = iter.reloc()->value();
+         // Right now there is no way to find out which entries support
+         // an interrupt point.  It would be nice if we had this
+@@ -3108,7 +3115,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) {
+           return st.as_string();
+         }
+         case relocInfo::runtime_call_type:
+-        case relocInfo::runtime_call_w_cp_type: {
++        NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++        {
+           stringStream st;
+           st.print("runtime_call");
+           CallRelocation* r = (CallRelocation*)iter.reloc();
+diff --git a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp
+index 47769c53a5b..ed69d18d759 100644
+--- a/src/hotspot/share/code/relocInfo.cpp
++++ b/src/hotspot/share/code/relocInfo.cpp
+@@ -402,6 +402,7 @@ void virtual_call_Relocation::unpack_data() {
+   _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point);
+ }
+ 
++#ifndef MIPS64
+ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) {
+   short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2));
+   dest->set_locs_end((relocInfo*) p);
+@@ -410,6 +411,7 @@ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) {
+ void runtime_call_w_cp_Relocation::unpack_data() {
+   _offset = unpack_1_int() << 2;
+ }
++#endif
+ 
+ void static_stub_Relocation::pack_data_to(CodeSection* dest) {
+   short* p = (short*) dest->locs_end();
+@@ -874,7 +876,7 @@ void RelocIterator::print_current() {
+       break;
+     }
+   case relocInfo::runtime_call_type:
+-  case relocInfo::runtime_call_w_cp_type:
++  NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
+     {
+       CallRelocation* r = (CallRelocation*) reloc();
+       tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination()));
+diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp
+index 55d4ac7c62d..b1c34733021 100644
+--- a/src/hotspot/share/code/relocInfo.hpp
++++ b/src/hotspot/share/code/relocInfo.hpp
+@@ -266,7 +266,11 @@ class relocInfo {
+     poll_return_type        = 11, // polling instruction for safepoints at return
+     metadata_type           = 12, // metadata that used to be oops
+     trampoline_stub_type    = 13, // stub-entry for trampoline
++#ifndef MIPS64
+     runtime_call_w_cp_type  = 14, // Runtime call which may load its target from the constant pool
++#else
++    internal_pc_type        = 14, // tag for internal data
++#endif
+     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
+     type_mask               = 15  // A mask which selects only the above values
+   };
+@@ -300,13 +304,13 @@ class relocInfo {
+     visitor(static_call) \
+     visitor(static_stub) \
+     visitor(runtime_call) \
+-    visitor(runtime_call_w_cp) \
++    NOT_MIPS64(visitor(runtime_call_w_cp)) \
+     visitor(external_word) \
+     visitor(internal_word) \
+     visitor(poll) \
+     visitor(poll_return) \
+-    visitor(section_word) \
+     visitor(trampoline_stub) \
++    NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc)))
+ 
+ 
+  public:
+@@ -1146,6 +1150,16 @@ class runtime_call_Relocation : public CallRelocation {
+ };
+ 
+ 
++#ifdef MIPS64
++// to handle the set_last_java_frame pc
++class internal_pc_Relocation : public Relocation {
++ public:
++  address pc() { return pd_get_address_from_code(); }
++  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
++
++  internal_pc_Relocation() : Relocation(relocInfo::internal_pc_type) { }
++};
++#else
+ class runtime_call_w_cp_Relocation : public CallRelocation {
+  public:
+   static RelocationHolder spec() {
+@@ -1175,6 +1189,7 @@ class runtime_call_w_cp_Relocation : public CallRelocation {
+   void pack_data_to(CodeSection * dest);
+   void unpack_data();
+ };
++#endif
+ 
+ // Trampoline Relocations.
+ // A trampoline allows to encode a small branch in the code, even if there
+diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp
+index d490adb3eef..0b1d7dc0a27 100644
+--- a/src/hotspot/share/code/vtableStubs.cpp
++++ b/src/hotspot/share/code/vtableStubs.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/vtableStubs.hpp"
+ #include "compiler/compileBroker.hpp"
+@@ -102,7 +108,11 @@ int VtableStubs::_itab_stub_size = 0;
+ 
+ #if defined(PRODUCT)
+   // These values are good for the PRODUCT case (no tracing).
++#if defined MIPS64 || defined LOONGARCH64
++  static const int first_vtableStub_size = 128;
++#else
+   static const int first_vtableStub_size =  64;
++#endif
+   static const int first_itableStub_size = 256;
+ #else
+   // These values are good for the non-PRODUCT case (when tracing can be switched on).
+@@ -113,6 +123,7 @@ int VtableStubs::_itab_stub_size = 0;
+   //               vtable  itable
+   // aarch64:         460     324
+   // arm:               ?       ?
++  // mips64:          728     328
+   // ppc (linux, BE): 404     288
+   // ppc (linux, LE): 356     276
+   // ppc (AIX):       416     296
+diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
+index f0944108810..a8c1f97a80e 100644
+--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
++++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP
+ #define SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP
+ 
+@@ -58,6 +64,9 @@ void G1ParScanThreadState::trim_queue_partially() {
+ void G1ParScanThreadState::trim_queue() {
+   trim_queue_to_threshold(0);
+   assert(_task_queue->overflow_empty(), "invariant");
++  // Load of _age._fields._top in trim_queue_to_threshold must not pass
++  // the load of _age._fields._top in assert _task_queue->taskqueue_empty().
++  DEBUG_ONLY(OrderAccess::loadload();)
+   assert(_task_queue->taskqueue_empty(), "invariant");
+ }
+ 
+diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+index 7d31ff02e1a..07dac06aecf 100644
+--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "gc/shared/gcArguments.hpp"
+ #include "gc/shared/tlab_globals.hpp"
+@@ -35,7 +41,7 @@
+ #include "utilities/defaultStream.hpp"
+ 
+ void ShenandoahArguments::initialize() {
+-#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64)
++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64 || defined LOONGARCH64)
+   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
+ #endif
+ 
+diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+index 0e99bf107c1..d5541cf8966 100644
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_LIR.hpp"
+ #include "c1/c1_LIRGenerator.hpp"
+@@ -94,7 +100,7 @@ private:
+ 
+ public:
+   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
+-#ifdef RISCV
++#if defined(RISCV) || defined(LOONGARCH)
+       LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
+ #else
+       LIR_Op(),
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+index d66ed24d862..b682bb9d62a 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm_io.h"
+ #include "classfile/javaClasses.inline.hpp"
+@@ -1459,7 +1465,7 @@ JRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* current, Met
+   // preparing the same method will be sure to see non-null entry & mirror.
+ JRT_END
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+ JRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address))
+   if (src_address == dest_address) {
+     return;
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+index c32431784aa..8209c42a1c4 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_INTERPRETER_INTERPRETERRUNTIME_HPP
+ #define SHARE_INTERPRETER_INTERPRETERRUNTIME_HPP
+ 
+@@ -135,7 +141,7 @@ class InterpreterRuntime: AllStatic {
+                                         Method* method,
+                                         intptr_t* from, intptr_t* to);
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+   // Popframe support (only needed on x86, AMD64 and ARM)
+   static void popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address);
+ #endif
+diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+index 4e167ff451a..9441bae96c1 100644
+--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ #define SHARE_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ 
+@@ -110,9 +116,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator {
+ 
+   void generate_fixed_frame(bool native_call);
+ 
+-#ifdef AARCH64
++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64)
+   void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
+-#endif // AARCH64
++#endif // AARCH64 || MIPS64 || LOONGARCH64
+ 
+ #ifdef PPC
+   void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false);
+diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+index 597ddb3800f..427a9503eaf 100644
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ #define SHARE_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ 
+@@ -102,7 +108,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(ARM) || defined(AARCH64) || defined(RISCV)
++#elif defined(ARM) || defined(AARCH64) || defined(RISCV) || defined(MIPS) || defined(LOONGARCH)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+index 3f57d487bae..3b49daaf96e 100644
+--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/codeCache.hpp"
+ #include "compiler/compileBroker.hpp"
+@@ -755,6 +761,17 @@
+ 
+ #endif
+ 
++#ifdef LOONGARCH64
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define DECLARE_INT_CPU_FEATURE_CONSTANT(id, name, bit) GENERATE_VM_INT_CONSTANT_ENTRY(VM_Version::CPU_##id)
++#define VM_INT_CPU_FEATURE_CONSTANTS CPU_FEATURE_FLAGS(DECLARE_INT_CPU_FEATURE_CONSTANT)
++
++#endif
++
++
+ #ifdef X86
+ 
+ #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
+index 1e897615eaf..9dc65a72b1e 100644
+--- a/src/hotspot/share/memory/metaspace.cpp
++++ b/src/hotspot/share/memory/metaspace.cpp
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "cds/metaspaceShared.hpp"
+ #include "classfile/classLoaderData.hpp"
+@@ -587,12 +593,15 @@ bool Metaspace::class_space_is_initialized() {
+ // On error, returns an unreserved space.
+ ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t size) {
+ 
+-#if defined(AARCH64) || defined(PPC64)
++#if defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)
+   const size_t alignment = Metaspace::reserve_alignment();
+ 
+   // AArch64: Try to align metaspace class space so that we can decode a
+   // compressed klass with a single MOVK instruction. We can do this iff the
+   // compressed class base is a multiple of 4G.
++
++  // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization.
++
+   // Additionally, above 32G, ensure the lower LogKlassAlignmentInBytes bits
+   // of the upper 32-bits of the address are zero so we can handle a shift
+   // when decoding.
+@@ -649,16 +658,16 @@ ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t siz
+       return rs;
+     }
+   }
+-#endif // defined(AARCH64) || defined(PPC64)
++#endif // defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)
+ 
+-#ifdef AARCH64
++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64)
+   // Note: on AARCH64, if the code above does not find any good placement, we
+   // have no recourse. We return an empty space and the VM will exit.
+   return ReservedSpace();
+ #else
+   // Default implementation: Just reserve anywhere.
+   return ReservedSpace(size, Metaspace::reserve_alignment(), os::vm_page_size(), (char*)NULL);
+-#endif // AARCH64
++#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64)
+ }
+ 
+ #endif // _LP64
+diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
+index 8a1ed0d3160..596829c07ca 100644
+--- a/src/hotspot/share/opto/output.cpp
++++ b/src/hotspot/share/opto/output.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/assembler.inline.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+@@ -1011,6 +1017,27 @@ void PhaseOutput::Process_OopMap_Node(MachNode *mach, int current_offset) {
+   // Add the safepoint in the DebugInfoRecorder
+   if( !mach->is_MachCall() ) {
+     mcall = nullptr;
++#if defined(MIPS) || defined(LOONGARCH)
++    // safepoint_pc_offset should point to tha last instruction in safePoint.
++    // In X86 and sparc, their safePoints only contain one instruction.
++    // However, we should add current_offset with the size of safePoint in MIPS.
++    // 0x2d6ff22c: lw s2, 0x14(s2)
++    // last_pd->pc_offset()=308, pc_offset=304, bci=64
++    // last_pd->pc_offset()=312, pc_offset=312, bci=64
++    // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc")
++    //
++    // ;; Safepoint:
++    // ---> pc_offset=304
++    // 0x2d6ff230: lui at, 0x2b7a            ; OopMap{s2=Oop s5=Oop t4=Oop off=308}
++    //                                       ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    // ---> last_pd(308)
++    // 0x2d6ff234: lw at, 0xffffc100(at)     ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    //                                       ;   {poll}
++    // 0x2d6ff238: addiu s0, zero, 0x0
++    safepoint_pc_offset += sfn->size(C->regalloc()) - 4;
++#endif
+     C->debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+   } else {
+     mcall = mach->as_MachCall();
+@@ -1686,6 +1713,22 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+       DEBUG_ONLY(uint instr_offset = cb->insts_size());
+       n->emit(*cb, C->regalloc());
+       current_offset = cb->insts_size();
++#if defined(MIPS) || defined(LOONGARCH)
++      if (!n->is_Proj() && (cb->insts()->end() != badAddress)) {
++        // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime
++        // is not the instruction which access memory. adjust is needed. previous_offset points to the
++        // instruction which access memory. Instruction size is 4. cb->insts_size() and
++        // cb->insts()->end() are the location of current instruction.
++        int adjust = 4;
++        NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4);
++        if (inst->is_sync()) {
++          // a sync may be the last instruction, see store_B_immI_enc_sync
++          adjust += 4;
++          inst = (NativeInstruction*) (cb->insts()->end() - 8);
++        }
++        previous_offset = current_offset - adjust;
++      }
++#endif
+ 
+       // Above we only verified that there is enough space in the instruction section.
+       // However, the instruction may emit stubs that cause code buffer expansion.
+diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
+index a3762dc32ff..511bd7e7875 100644
+--- a/src/hotspot/share/opto/type.cpp
++++ b/src/hotspot/share/opto/type.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "ci/ciMethodData.hpp"
+ #include "ci/ciTypeFlow.hpp"
+@@ -78,6 +84,14 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
+   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
+   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
++#elif defined(LOONGARCH64)
++  { Bad,             T_ILLEGAL,    "vectormask:",   false, Op_RegVectMask,       relocInfo::none          },  // VectorMask.
++  { Bad,             T_ILLEGAL,    "vectora:",      false, Op_VecA,              relocInfo::none          },  // VectorA.
++  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
++  { Bad,             T_ILLEGAL,    "vectord:",      false, 0,                    relocInfo::none          },  // VectorD
++  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
++  { Bad,             T_ILLEGAL,    "vectory:",      false, Op_VecY,              relocInfo::none          },  // VectorY
++  { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
+ #else // all other
+   { Bad,             T_ILLEGAL,    "vectormask:",   false, Op_RegVectMask,       relocInfo::none          },  // VectorMask.
+   { Bad,             T_ILLEGAL,    "vectora:",      false, Op_VecA,              relocInfo::none          },  // VectorA.
+diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
+index 369e5c4f5fa..f7ac74999a2 100644
+--- a/src/hotspot/share/runtime/os.cpp
++++ b/src/hotspot/share/runtime/os.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "classfile/javaClasses.hpp"
+@@ -1235,7 +1241,8 @@ bool os::is_first_C_frame(frame* fr) {
+   if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true;
+ 
+   uintptr_t old_fp = (uintptr_t)fr->link_or_null();
+-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp ||
++  // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs.
++  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) ||
+     is_pointer_bad(fr->link_or_null())) return true;
+ 
+   // stack grows downwards; if old_fp is below current fp or if the stack
+diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
+index 9af4b513a99..1a3e9fd0ad5 100644
+--- a/src/hotspot/share/runtime/sharedRuntime.cpp
++++ b/src/hotspot/share/runtime/sharedRuntime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/javaClasses.hpp"
+ #include "jvm.h"
+@@ -3054,7 +3060,7 @@ void AdapterHandlerLibrary::create_native_wrapper(const methodHandle& method) {
+       CodeBuffer buffer(buf);
+       struct { double data[20]; } locs_buf;
+       buffer.insts()->initialize_shared_locs((relocInfo*)&locs_buf, sizeof(locs_buf) / sizeof(relocInfo));
+-#if defined(AARCH64)
++#if defined(AARCH64) || defined(LOONGARCH64)
+       // On AArch64 with ZGC and nmethod entry barriers, we need all oops to be
+       // in the constant pool to ensure ordering between the barrier and oops
+       // accesses. For native_wrappers we need a constant.
+diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+index 6e3aa30b0b9..8f1d486f5cb 100644
+--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++
+ #include "precompiled.hpp"
+ #include "jni.h"
+ #include "runtime/interfaceSupport.inline.hpp"
+@@ -507,6 +514,14 @@ static int __ieee754_rem_pio2(double x, double *y) {
+  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
+  */
+ 
++#if defined(MIPS)|| defined(LOONGARCH)
++#undef S1
++#undef S2
++#undef S3
++#undef S4
++#undef S5
++#undef S6
++#endif
+ static const double
+ S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+ S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
+index d86fce3c8ac..71bfd4dfa19 100644
+--- a/src/hotspot/share/runtime/thread.inline.hpp
++++ b/src/hotspot/share/runtime/thread.inline.hpp
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_RUNTIME_THREAD_INLINE_HPP
+ #define SHARE_RUNTIME_THREAD_INLINE_HPP
+ 
+@@ -132,7 +138,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
+ }
+ 
+ inline JavaThreadState JavaThread::thread_state() const    {
+-#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) || defined(LOONGARCH64)
+   // Use membars when accessing volatile _thread_state. See
+   // Threads::create_vm() for size checks.
+   return (JavaThreadState) Atomic::load_acquire((volatile jint*)&_thread_state);
+@@ -144,7 +150,7 @@ inline JavaThreadState JavaThread::thread_state() const    {
+ inline void JavaThread::set_thread_state(JavaThreadState s) {
+   assert(current_or_null() == NULL || current_or_null() == this,
+          "state change should only be called by the current thread");
+-#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) || defined(LOONGARCH64)
+   // Use membars when accessing volatile _thread_state. See
+   // Threads::create_vm() for size checks.
+   Atomic::release_store((volatile jint*)&_thread_state, (jint)s);
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index 33ecfe089f8..4d024b35735 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_UTILITIES_MACROS_HPP
+ #define SHARE_UTILITIES_MACROS_HPP
+ 
+@@ -488,6 +494,38 @@
+ #define NOT_S390(code) code
+ #endif
+ 
++#ifdef MIPS64
++#ifndef MIPS
++#define MIPS
++#endif
++#define MIPS64_ONLY(code) code
++#define NOT_MIPS64(code)
++#else
++#undef MIPS
++#define MIPS64_ONLY(code)
++#define NOT_MIPS64(code) code
++#endif
++
++#ifdef LOONGARCH64
++#ifndef LOONGARCH
++#define LOONGARCH
++#endif
++#define LOONGARCH64_ONLY(code) code
++#define NOT_LOONGARCH64(code)
++#else
++#undef LOONGARCH
++#define LOONGARCH64_ONLY(code)
++#define NOT_LOONGARCH64(code) code
++#endif
++
++#if defined(MIPS64) || defined(LOONGARCH64)
++#define LOONGARCH64_AND_MIPS64_ONLY(code) code
++#define NOT_LOONGARCH64_AND_MIPS64(code)
++#else
++#define LOONGARCH64_AND_MIPS64_ONLY(code)
++#define NOT_LOONGARCH64_AND_MIPS64(code) code
++#endif
++
+ #if defined(PPC32) || defined(PPC64)
+ #ifndef PPC
+ #define PPC
+@@ -605,16 +643,34 @@
+ //   OS_CPU_HEADER(vmStructs)          --> vmStructs_linux_x86.hpp
+ //
+ // basename<cpu>.hpp / basename<cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _mips.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _mips.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _loongarch.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _loongarch.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _loongarch.inline.hpp)
++#else
+ #define CPU_HEADER_H(basename)         XSTR(CPU_HEADER_STEM(basename).h)
+ #define CPU_HEADER(basename)           XSTR(CPU_HEADER_STEM(basename).hpp)
+ #define CPU_HEADER_INLINE(basename)    XSTR(CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<os>.hpp / basename<os>.inline.hpp
+ #define OS_HEADER_H(basename)          XSTR(OS_HEADER_STEM(basename).h)
+ #define OS_HEADER(basename)            XSTR(OS_HEADER_STEM(basename).hpp)
+ #define OS_HEADER_INLINE(basename)     XSTR(OS_HEADER_STEM(basename).inline.hpp)
+ // basename<os><cpu>.hpp / basename<os><cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_mips.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_loongarch.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp)
++#else
+ #define OS_CPU_HEADER(basename)        XSTR(OS_CPU_HEADER_STEM(basename).hpp)
+ #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<compiler>.hpp / basename<compiler>.inline.hpp
+ #define COMPILER_HEADER(basename)        XSTR(COMPILER_HEADER_STEM(basename).hpp)
+ #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp)
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp
+index 9accba375a2..200bb1e82f3 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp
+@@ -23,6 +23,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #include <jni.h>
+ #include "libproc.h"
+ #include "proc_service.h"
+@@ -64,6 +71,10 @@
+ #include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
+ #endif
+ 
++#ifdef loongarch64
++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h"
++#endif
++
+ class AutoJavaString {
+   JNIEnv* m_env;
+   jstring m_str;
+@@ -412,7 +423,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
++#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) || defined(loongarch64)
+ extern "C"
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+@@ -447,6 +458,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #ifdef riscv64
+ #define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
+ #endif
++#ifdef loongarch64
++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG
++#endif
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
+ #endif
+@@ -561,6 +575,18 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ 
+ #endif /* riscv64 */
+ 
++#if defined(loongarch64)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg
++
++  {
++    int i;
++    for (i = 0; i < 31; i++)
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.csr_era;
++  }
++#endif /* loongarch64 */
++
+ #if defined(ppc64) || defined(ppc64le)
+ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
+ 
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+index a69496e77a4..64312b4705d 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #ifndef _LIBPROC_H_
+ #define _LIBPROC_H_
+ 
+@@ -37,7 +44,7 @@
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
+-#if defined(aarch64) || defined(arm64)
++#if defined(aarch64) || defined(arm64) || defined(loongarch64)
+ #include <asm/ptrace.h>
+ #define user_regs_struct user_pt_regs
+ #elif defined(arm)
+@@ -46,6 +53,10 @@
+ #elif defined(riscv64)
+ #include <asm/ptrace.h>
+ #endif
++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el)
++#include <asm/ptrace.h>
++#define user_regs_struct  pt_regs
++#endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+ // it would be a mistake to equivalence it to C++ bool on many platforms
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+index b5fec835a98..d991f29cbb1 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -138,7 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+ #define PTRACE_GETREGS_REQ PT_GETREGS
+ #endif
+ 
+-#ifdef PTRACE_GETREGS_REQ
++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+    print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
+                errno, strerror(errno));
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index e0e9b4b6727..9af1218ed46 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
+ package sun.jvm.hotspot;
+ 
+ import java.rmi.RemoteException;
+@@ -38,6 +44,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64;
++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64;
+ import sun.jvm.hotspot.debugger.NoSuchSymbolException;
+ import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal;
+ import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal;
+@@ -572,6 +580,10 @@ public class HotSpotAgent {
+             machDesc = new MachineDescriptionAArch64();
+         } else if (cpu.equals("riscv64")) {
+             machDesc = new MachineDescriptionRISCV64();
++        } else if (cpu.equals("mips64")) {
++            machDesc = new MachineDescriptionMIPS64();
++        } else if (cpu.equals("loongarch64")) {
++            machDesc = new MachineDescriptionLOONGARCH64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+new file mode 100644
+index 00000000000..99cea8c7f14
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return false;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+new file mode 100644
+index 00000000000..1b49efd2017
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return "big".equals(System.getProperty("sun.cpu.endian"));
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index 469bb6e0665..ea3a118de2a 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.io.*;
+@@ -34,12 +40,16 @@ import sun.jvm.hotspot.debugger.x86.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
+ import sun.jvm.hotspot.debugger.linux.riscv64.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -93,7 +103,21 @@ class LinuxCDebugger implements CDebugger {
+        Address pc  = context.getRegisterAsAddress(AMD64ThreadContext.RIP);
+        if (pc == null) return null;
+        return LinuxAMD64CFrame.getTopFrame(dbg, pc, context);
+-    }  else if (cpu.equals("ppc64")) {
++    } else if (cpu.equals("mips64")) {
++       MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++       Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++       if (sp == null) return null;
++       Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxMIPS64CFrame(dbg, sp, pc);
++    } else if (cpu.equals("loongarch64")) {
++       LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxLOONGARCH64CFrame(dbg, fp, pc);
++    } else if (cpu.equals("ppc64")) {
+         PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext();
+         Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP);
+         if (sp == null) return null;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+index 69a34fe2afa..c21e0d6a611 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.lang.reflect.*;
+@@ -29,6 +35,8 @@ import sun.jvm.hotspot.debugger.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ 
+ class LinuxThreadContextFactory {
+    static ThreadContext createThreadContext(LinuxDebugger dbg) {
+@@ -37,7 +45,11 @@ class LinuxThreadContextFactory {
+          return new LinuxX86ThreadContext(dbg);
+       } else if (cpu.equals("amd64")) {
+          return new LinuxAMD64ThreadContext(dbg);
+-      }  else if (cpu.equals("ppc64")) {
++      } else if (cpu.equals("mips64")) {
++         return new LinuxMIPS64ThreadContext(dbg);
++      } else if (cpu.equals("loongarch64")) {
++         return new LinuxLOONGARCH64ThreadContext(dbg);
++      } else if (cpu.equals("ppc64")) {
+           return new LinuxPPC64ThreadContext(dbg);
+       } else  {
+         try {
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+new file mode 100644
+index 00000000000..0e6caee5a49
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++
++final public class LinuxLOONGARCH64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++      Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++      Address nextFP;
++      Address nextPC;
++
++      if ((fp == null) || fp.lessThan(sp)) {
++        return null;
++      }
++
++      try {
++        nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextFP == null) {
++        return null;
++      }
++
++      try {
++        nextPC  = fp.getAddressAt(-1 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextPC == null) {
++        return null;
++      }
++
++      return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+new file mode 100644
+index 00000000000..604642598e0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+new file mode 100644
+index 00000000000..2e3eb564da2
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++
++final public class LinuxMIPS64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) {
++      super(dbg.getCDebugger());
++      this.ebp = ebp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return ebp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++      Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++
++      if ( (ebp == null) || ebp.lessThan(esp) ) {
++        return null;
++      }
++
++      Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE);
++      if (nextEBP == null) {
++        return null;
++      }
++      Address nextPC  = ebp.getAddressAt( 1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 4;
++   private Address pc;
++   private Address ebp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+new file mode 100644
+index 00000000000..98e0f3f0bcf
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxMIPS64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+new file mode 100644
+index 00000000000..1de3cb1a472
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.loongarch64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on loongarch64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class LOONGARCH64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int RA = 1;
++  public static final int TP = 2;
++  public static final int SP = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int A4 = 8;
++  public static final int A5 = 9;
++  public static final int A6 = 10;
++  public static final int A7 = 11;
++  public static final int T0 = 12;
++  public static final int T1 = 13;
++  public static final int T2 = 14;
++  public static final int T3 = 15;
++  public static final int T4 = 16;
++  public static final int T5 = 17;
++  public static final int T6 = 18;
++  public static final int T7 = 19;
++  public static final int T8 = 20;
++  public static final int RX = 21;
++  public static final int FP = 22;
++  public static final int S0 = 23;
++  public static final int S1 = 24;
++  public static final int S2 = 25;
++  public static final int S3 = 26;
++  public static final int S4 = 27;
++  public static final int S5 = 28;
++  public static final int S6 = 29;
++  public static final int S7 = 30;
++  public static final int S8 = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "RA",    "TP",    "SP",
++    "A0",      "A1",    "A2",    "A3",
++    "A4",      "A5",    "A6",    "A7",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "T8",      "RX",    "FP",    "S0",
++    "S1",      "S2",    "S3",    "S4",
++    "S5",      "S6",    "S7",    "S8",
++    "PC"
++  };
++
++  private long[] data;
++
++  public LOONGARCH64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+new file mode 100644
+index 00000000000..d3479a65ea0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.mips64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on mips64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class MIPS64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int AT = 1;
++  public static final int V0 = 2;
++  public static final int V1 = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int T0 = 8;
++  public static final int T1 = 9;
++  public static final int T2 = 10;
++  public static final int T3 = 11;
++  public static final int T4 = 12;
++  public static final int T5 = 13;
++  public static final int T6 = 14;
++  public static final int T7 = 15;
++  public static final int S0 = 16;
++  public static final int S1 = 17;
++  public static final int S2 = 18;
++  public static final int S3 = 19;
++  public static final int S4 = 20;
++  public static final int S5 = 21;
++  public static final int S6 = 22;
++  public static final int S7 = 23;
++  public static final int T8 = 24;
++  public static final int T9 = 25;
++  public static final int K0 = 26;
++  public static final int K1 = 27;
++  public static final int GP = 28;
++  public static final int SP = 29;
++  public static final int FP = 30;
++  public static final int RA = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "AT",    "V0",    "V1",
++    "A0",      "A1",    "A2",    "A3",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "S0",      "S1",    "S2",    "S3",
++    "S4",      "S5",    "S6",    "S7",
++    "T8",      "T9",    "K0",    "K1",
++    "GP",      "SP",    "FP",    "RA",
++    "PC"
++  };
++
++  private long[] data;
++
++  public MIPS64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+index 7113a3a497b..de47531db7c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.posix.elf;
+ 
+ import java.io.FileInputStream;
+@@ -63,6 +69,8 @@ public interface ELFHeader {
+     public static final int ARCH_i860 = 7;
+     /** MIPS architecture type. */
+     public static final int ARCH_MIPS = 8;
++    /** LOONGARCH architecture type. */
++    public static final int ARCH_LOONGARCH = 9;
+ 
+     /** Returns a file type which is defined by the file type constants. */
+     public short getFileType();
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+new file mode 100644
+index 00000000000..1f60fa6cfb2
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcLOONGARCH64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers LOONGARCH64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) {
++      return false;
++    }
++
++    return (((ProcLOONGARCH64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+new file mode 100644
+index 00000000000..ef5597ac4e9
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 00000000000..abad1bb38b7
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+new file mode 100644
+index 00000000000..5c1e0be8932
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcMIPS64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcMIPS64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcMIPS64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers MIPS64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) {
++      return false;
++    }
++
++    return (((ProcMIPS64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+new file mode 100644
+index 00000000000..d44223d768a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+new file mode 100644
+index 00000000000..bad478fc5ca
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+index 2bd396c8f4f..da89480f72c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.remote;
+ 
+ import java.rmi.*;
+@@ -33,6 +39,8 @@ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.remote.x86.*;
+ import sun.jvm.hotspot.debugger.remote.amd64.*;
+ import sun.jvm.hotspot.debugger.remote.ppc64.*;
++import sun.jvm.hotspot.debugger.remote.mips64.*;
++import sun.jvm.hotspot.debugger.remote.loongarch64.*;
+ 
+ /** An implementation of Debugger which wraps a
+     RemoteDebugger, providing remote debugging via RMI.
+@@ -71,6 +79,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger {
+         cachePageSize = 4096;
+         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+         unalignedAccessesOkay = true;
++      } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++        threadFactory = new RemoteMIPS64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
++      } else if (cpu.equals("loongarch64")) {
++        threadFactory = new RemoteLOONGARCH64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
+       } else {
+         try {
+           Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+new file mode 100644
+index 00000000000..242dd279e1a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteLOONGARCH64Thread extends RemoteThread  {
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+new file mode 100644
+index 00000000000..634d5ad049f
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 00000000000..4fb9cc7c069
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+new file mode 100644
+index 00000000000..c2f7d841f20
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteMIPS64Thread extends RemoteThread  {
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+new file mode 100644
+index 00000000000..23646905d74
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+new file mode 100644
+index 00000000000..b39b0144901
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index d16ac8aae51..de1e70a7290 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.runtime;
+ 
+ import java.util.*;
+@@ -36,6 +42,8 @@ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_aarch64.BsdAARCH64JavaThreadPDAccess;
+@@ -116,6 +124,10 @@ public class Threads {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
+             } else if (cpu.equals("riscv64")) {
+                 access = new LinuxRISCV64JavaThreadPDAccess();
++            } else if (cpu.equals("mips64")) {
++                access = new LinuxMIPS64JavaThreadPDAccess();
++            } else if (cpu.equals("loongarch64")) {
++                access = new LinuxLOONGARCH64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+new file mode 100644
+index 00000000000..75d6bf2c642
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+@@ -0,0 +1,135 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_loongarch64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.loongarch64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new LOONGARCH64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new LOONGARCH64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+new file mode 100644
+index 00000000000..88223744932
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+@@ -0,0 +1,135 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_mips64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.mips64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return null;
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new MIPS64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new MIPS64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++  }
++
++  public Address getLastFP(Address addr) {
++    return getLastSP(addr).getAddressAt(0);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+new file mode 100644
+index 00000000000..824270e1329
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+@@ -0,0 +1,250 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all loongarch64 platforms we support
++    (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext;
++    output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the
++    LOONGARCH64Frame is left to the caller, since we may need to subclass
++    LOONGARCH64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class LOONGARCH64CurrentFrameGuess {
++  private LOONGARCH64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG")
++                                       != null;
++
++  public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new LOONGARCH64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from LOONGARCH64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
++
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
++
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
++
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct LOONGARCH64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+new file mode 100644
+index 00000000000..576654594d8
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+@@ -0,0 +1,519 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the loongarch64 family of CPUs. */
++
++public class LOONGARCH64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null;
++  }
++
++  private static final int LINK_OFFSET           = -2;
++  private static final int RETURN_ADDR_OFFSET    = -1;
++  private static final int SENDER_SP_OFFSET      =  0;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_LOCALS_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MDX_OFFSET       = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_CACHE_OFFSET     = INTERPRETER_FRAME_MDX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_BCX_OFFSET       = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++  // Entry frames
++  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -11;
++
++  private static VMReg fp = new VMReg(22 << 1);
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private LOONGARCH64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    if (VM.getVM().isJavaPCDbg(savedPC)) {
++      this.pc = savedPC;
++    }
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    LOONGARCH64Frame frame = new LOONGARCH64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof LOONGARCH64Frame)) {
++      return false;
++    }
++
++    LOONGARCH64Frame other = (LOONGARCH64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/LOONGARCH)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    LOONGARCH64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On loongarch, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = getSenderSP();
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() {
++    return addressOfStackSlot(RETURN_ADDR_OFFSET);
++  }
++
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     {
++    return addressOfStackSlot(SENDER_SP_OFFSET);
++  }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+new file mode 100644
+index 00000000000..0ad9573a42d
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public LOONGARCH64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+new file mode 100644
+index 00000000000..2cf904d3885
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected LOONGARCH64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+new file mode 100644
+index 00000000000..c11458abe2c
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all mips64 platforms we support
++    (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext;
++    output is SP, FP, and PC for an MIPS64Frame. Instantiation of the
++    MIPS64Frame is left to the caller, since we may need to subclass
++    MIPS64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class MIPS64CurrentFrameGuess {
++  private MIPS64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG")
++                                       != null;
++
++  public MIPS64CurrentFrameGuess(MIPS64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(MIPS64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new MIPS64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from MIPS64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct MIPS64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+new file mode 100644
+index 00000000000..e11d64737dd
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+@@ -0,0 +1,539 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the mips64 family of CPUs. */
++
++public class MIPS64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null;
++  }
++
++  // All frames
++  private static final int LINK_OFFSET                =  0;
++  private static final int RETURN_ADDR_OFFSET         =  1;
++  private static final int SENDER_SP_OFFSET           =  2;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
++
++  private static VMReg rbp;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
++    if (VM.getVM().getAddressSize() == 4) {
++      rbp = new VMReg(5);
++    } else {
++      rbp = new VMReg(5 << 1);
++    }
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private MIPS64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    MIPS64Frame frame = new MIPS64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof MIPS64Frame)) {
++      return false;
++    }
++
++    MIPS64Frame other = (MIPS64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/MIPS)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    MIPS64RegisterMap map = (MIPS64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    MIPS64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On mips, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(rbp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+new file mode 100644
+index 00000000000..8a4a28a6055
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class MIPS64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public MIPS64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+new file mode 100644
+index 00000000000..f2da760af4a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected MIPS64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index f4cd4873207..6901946e58a 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ package sun.jvm.hotspot.utilities;
+ 
+ /** Provides canonicalized OS and CPU information for the rest of the
+@@ -50,7 +57,7 @@ public class PlatformInfo {
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64", "mips64", "mips64el", "loongarch64"};
+ 
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+@@ -83,6 +90,12 @@ public class PlatformInfo {
+     if (cpu.equals("ppc64le"))
+       return "ppc64";
+ 
++    if (cpu.equals("mips64el"))
++      return "mips64";
++
++    if (cpu.equals("loongarch64"))
++      return "loongarch64";
++
+     return cpu;
+ 
+   }
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+new file mode 100644
+index 00000000000..1f54e9f3c59
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+@@ -0,0 +1,142 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static java.util.Collections.emptyMap;
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.EnumSet;
++import java.util.Map;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.stack.StackIntrospection;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
++import jdk.vm.ci.meta.ConstantReflectionProvider;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
++
++    private static EnumSet<LoongArch64.CPUFeature> computeFeatures(LoongArch64HotSpotVMConfig config) {
++        // Configure the feature set using the HotSpot flag settings.
++        Map<String, Long> constants = config.getStore().getConstants();
++        return HotSpotJVMCIBackendFactory.convertFeatures(CPUFeature.class, constants, config.vmVersionFeatures, emptyMap());
++    }
++
++    private static EnumSet<LoongArch64.Flag> computeFlags(LoongArch64HotSpotVMConfig config) {
++        EnumSet<LoongArch64.Flag> flags = EnumSet.noneOf(LoongArch64.Flag.class);
++
++        if (config.useLSX) {
++            flags.add(LoongArch64.Flag.useLSX);
++        }
++        if (config.useLASX) {
++            flags.add(LoongArch64.Flag.useLASX);
++        }
++
++        return flags;
++    }
++
++    private static TargetDescription createTarget(LoongArch64HotSpotVMConfig config) {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config));
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotConstantReflectionProvider(runtime);
++    }
++
++    private static RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) {
++        return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops);
++    }
++
++    protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) {
++        return new HotSpotCodeCacheProvider(runtime, target, regConfig);
++    }
++
++    protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotMetaAccessProvider(runtime);
++    }
++
++    @Override
++    public String getArchitecture() {
++        return "loongarch64";
++    }
++
++    @Override
++    public String toString() {
++        return "JVMCIBackend:" + getArchitecture();
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) {
++
++        assert host == null;
++        LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore());
++        TargetDescription target = createTarget(config);
++
++        RegisterConfig regConfig;
++        HotSpotCodeCacheProvider codeCache;
++        ConstantReflectionProvider constantReflection;
++        HotSpotMetaAccessProvider metaAccess;
++        StackIntrospection stackIntrospection;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create MetaAccess provider")) {
++                metaAccess = createMetaAccess(runtime);
++            }
++            try (InitTimer rt = timer("create RegisterConfig")) {
++                regConfig = createRegisterConfig(config, target);
++            }
++            try (InitTimer rt = timer("create CodeCache provider")) {
++                codeCache = createCodeCache(runtime, target, regConfig);
++            }
++            try (InitTimer rt = timer("create ConstantReflection provider")) {
++                constantReflection = createConstantReflection(runtime);
++            }
++            try (InitTimer rt = timer("create StackIntrospection provider")) {
++                stackIntrospection = new HotSpotStackIntrospection(runtime);
++            }
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++        }
++    }
++
++    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection,
++                    StackIntrospection stackIntrospection) {
++        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+new file mode 100644
+index 00000000000..e1a007000d2
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+@@ -0,0 +1,297 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static jdk.vm.ci.loongarch64.LoongArch64.ra;
++import static jdk.vm.ci.loongarch64.LoongArch64.a0;
++import static jdk.vm.ci.loongarch64.LoongArch64.a1;
++import static jdk.vm.ci.loongarch64.LoongArch64.a2;
++import static jdk.vm.ci.loongarch64.LoongArch64.a3;
++import static jdk.vm.ci.loongarch64.LoongArch64.a4;
++import static jdk.vm.ci.loongarch64.LoongArch64.a5;
++import static jdk.vm.ci.loongarch64.LoongArch64.a6;
++import static jdk.vm.ci.loongarch64.LoongArch64.a7;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2;
++import static jdk.vm.ci.loongarch64.LoongArch64.t0;
++import static jdk.vm.ci.loongarch64.LoongArch64.v0;
++import static jdk.vm.ci.loongarch64.LoongArch64.s5;
++import static jdk.vm.ci.loongarch64.LoongArch64.s6;
++import static jdk.vm.ci.loongarch64.LoongArch64.sp;
++import static jdk.vm.ci.loongarch64.LoongArch64.fp;
++import static jdk.vm.ci.loongarch64.LoongArch64.tp;
++import static jdk.vm.ci.loongarch64.LoongArch64.rx;
++import static jdk.vm.ci.loongarch64.LoongArch64.f0;
++import static jdk.vm.ci.loongarch64.LoongArch64.f1;
++import static jdk.vm.ci.loongarch64.LoongArch64.f2;
++import static jdk.vm.ci.loongarch64.LoongArch64.f3;
++import static jdk.vm.ci.loongarch64.LoongArch64.f4;
++import static jdk.vm.ci.loongarch64.LoongArch64.f5;
++import static jdk.vm.ci.loongarch64.LoongArch64.f6;
++import static jdk.vm.ci.loongarch64.LoongArch64.f7;
++import static jdk.vm.ci.loongarch64.LoongArch64.fv0;
++import static jdk.vm.ci.loongarch64.LoongArch64.zero;
++
++import java.util.ArrayList;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CallingConvention.Type;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterAttributes;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.ValueKindFactory;
++import jdk.vm.ci.common.JVMCIError;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig {
++
++    private final TargetDescription target;
++
++    private final RegisterArray allocatable;
++
++    /**
++     * The caller saved registers always include all parameter registers.
++     */
++    private final RegisterArray callerSaved;
++
++    private final boolean allAllocatableAreCallerSaved;
++
++    private final RegisterAttributes[] attributesMap;
++
++    @Override
++    public RegisterArray getAllocatableRegisters() {
++        return allocatable;
++    }
++
++    @Override
++    public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) {
++        ArrayList<Register> list = new ArrayList<>();
++        for (Register reg : registers) {
++            if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) {
++                list.add(reg);
++            }
++        }
++
++        return new RegisterArray(list);
++    }
++
++    @Override
++    public RegisterAttributes[] getAttributesMap() {
++        return attributesMap.clone();
++    }
++
++    private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7);
++
++    public static final Register heapBaseRegister = s5;
++    public static final Register TREG = s6;
++
++    private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG);
++
++    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
++        RegisterArray allRegisters = arch.getAvailableValueRegisters();
++        Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)];
++        List<Register> reservedRegistersList = reservedRegisters.asList();
++
++        int idx = 0;
++        for (Register reg : allRegisters) {
++            if (reservedRegistersList.contains(reg)) {
++                // skip reserved registers
++                continue;
++            }
++            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
++                // skip heap base register
++                continue;
++            }
++
++            registers[idx++] = reg;
++        }
++
++        assert idx == registers.length;
++        return new RegisterArray(registers);
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) {
++        this(target, initAllocatable(target.arch, useCompressedOops));
++        assert callerSaved.size() >= allocatable.size();
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) {
++        this.target = target;
++
++        this.allocatable = allocatable;
++        Set<Register> callerSaveSet = new HashSet<>();
++        allocatable.addTo(callerSaveSet);
++        floatParameterRegisters.addTo(callerSaveSet);
++        javaGeneralParameterRegisters.addTo(callerSaveSet);
++        nativeGeneralParameterRegisters.addTo(callerSaveSet);
++        callerSaved = new RegisterArray(callerSaveSet);
++
++        allAllocatableAreCallerSaved = true;
++        attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters);
++    }
++
++    @Override
++    public RegisterArray getCallerSaveRegisters() {
++        return callerSaved;
++    }
++
++    @Override
++    public RegisterArray getCalleeSaveRegisters() {
++        return null;
++    }
++
++    @Override
++    public boolean areAllAllocatableRegistersCallerSaved() {
++        return allAllocatableAreCallerSaved;
++    }
++
++    @Override
++    public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory<?> valueKindFactory) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        if (type == HotSpotCallingConventionType.NativeCall) {
++            return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++        }
++        // On x64, parameter locations are the same whether viewed
++        // from the caller or callee perspective
++        return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++    }
++
++    @Override
++    public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Short:
++            case Char:
++            case Int:
++            case Long:
++            case Object:
++                return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters;
++            case Float:
++            case Double:
++                return floatParameterRegisters;
++            default:
++                throw JVMCIError.shouldNotReachHere();
++        }
++    }
++
++    private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type,
++                    ValueKindFactory<?> valueKindFactory) {
++        AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
++
++        int currentGeneral = 0;
++        int currentFloat = 0;
++        int currentStackOffset = 0;
++
++        for (int i = 0; i < parameterTypes.length; i++) {
++            final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind();
++
++            switch (kind) {
++                case Byte:
++                case Boolean:
++                case Short:
++                case Char:
++                case Int:
++                case Long:
++                case Object:
++                    if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                case Float:
++                case Double:
++                    if (currentFloat < floatParameterRegisters.size()) {
++                        Register register = floatParameterRegisters.get(currentFloat++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    } else if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                default:
++                    throw JVMCIError.shouldNotReachHere();
++            }
++
++            if (locations[i] == null) {
++                ValueKind<?> valueKind = valueKindFactory.getValueKind(kind);
++                locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out);
++                currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize);
++            }
++        }
++
++        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
++        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind()));
++        return new CallingConvention(currentStackOffset, returnLocation, locations);
++    }
++
++    @Override
++    public Register getReturnRegister(JavaKind kind) {
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Char:
++            case Short:
++            case Int:
++            case Long:
++            case Object:
++                return v0;
++            case Float:
++            case Double:
++                return fv0;
++            case Void:
++            case Illegal:
++                return null;
++            default:
++                throw new UnsupportedOperationException("no return register for type " + kind);
++        }
++    }
++
++    @Override
++    public Register getFrameRegister() {
++        return sp;
++    }
++
++    @Override
++    public String toString() {
++        return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave:  " + getCallerSaveRegisters() + "%n");
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+new file mode 100644
+index 00000000000..0a2e857204c
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess;
++import jdk.vm.ci.hotspot.HotSpotVMConfigStore;
++import jdk.vm.ci.services.Services;
++
++/**
++ * Used to access native configuration details.
++ *
++ * All non-static, public fields in this class are so that they can be compiled as constants.
++ */
++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess {
++
++    LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) {
++        super(config);
++    }
++
++    final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class);
++
++    // CPU Capabilities
++
++    /*
++     * These flags are set based on the corresponding command line flags.
++     */
++    final boolean useLSX = getFlag("UseLSX", Boolean.class);
++    final boolean useLASX = getFlag("UseLASX", Boolean.class);
++
++    final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t");
++
++    /*
++     * These flags are set if the corresponding support is in the hardware.
++     */
++    // Checkstyle: stop
++    final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class);
++    final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class);
++    final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class);
++    final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class);
++    final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class);
++    final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class);
++    final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class);
++    final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class);
++    final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class);
++    final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class);
++    final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class);
++    final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class);
++    final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class);
++    final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class);
++    final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class);
++    final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class);
++    final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class);
++    final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class);
++    final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class);
++    // Checkstyle: resume
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java
+new file mode 100644
+index 00000000000..74c6ca9801f
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 HotSpot specific portions of the JVMCI API.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java
+new file mode 100644
+index 00000000000..930b17e820a
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java
+@@ -0,0 +1,249 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import java.nio.ByteOrder;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CPUFeatureName;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.Register.RegisterCategory;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++
++/**
++ * Represents the LoongArch64 architecture.
++ */
++public class LoongArch64 extends Architecture {
++
++    public static final RegisterCategory CPU = new RegisterCategory("CPU");
++
++    // General purpose CPU registers
++    public static final Register zero = new Register(0, 0, "r0", CPU);
++    public static final Register ra = new Register(1, 1, "r1", CPU);
++    public static final Register tp = new Register(2, 2, "r2", CPU);
++    public static final Register sp = new Register(3, 3, "r3", CPU);
++    public static final Register a0 = new Register(4, 4, "r4", CPU);
++    public static final Register a1 = new Register(5, 5, "r5", CPU);
++    public static final Register a2 = new Register(6, 6, "r6", CPU);
++    public static final Register a3 = new Register(7, 7, "r7", CPU);
++    public static final Register a4 = new Register(8, 8, "r8", CPU);
++    public static final Register a5 = new Register(9, 9, "r9", CPU);
++    public static final Register a6 = new Register(10, 10, "r10", CPU);
++    public static final Register a7 = new Register(11, 11, "r11", CPU);
++    public static final Register t0 = new Register(12, 12, "r12", CPU);
++    public static final Register t1 = new Register(13, 13, "r13", CPU);
++    public static final Register t2 = new Register(14, 14, "r14", CPU);
++    public static final Register t3 = new Register(15, 15, "r15", CPU);
++    public static final Register t4 = new Register(16, 16, "r16", CPU);
++    public static final Register t5 = new Register(17, 17, "r17", CPU);
++    public static final Register t6 = new Register(18, 18, "r18", CPU);
++    public static final Register t7 = new Register(19, 19, "r19", CPU);
++    public static final Register t8 = new Register(20, 20, "r20", CPU);
++    public static final Register rx = new Register(21, 21, "r21", CPU);
++    public static final Register fp = new Register(22, 22, "r22", CPU);
++    public static final Register s0 = new Register(23, 23, "r23", CPU);
++    public static final Register s1 = new Register(24, 24, "r24", CPU);
++    public static final Register s2 = new Register(25, 25, "r25", CPU);
++    public static final Register s3 = new Register(26, 26, "r26", CPU);
++    public static final Register s4 = new Register(27, 27, "r27", CPU);
++    public static final Register s5 = new Register(28, 28, "r28", CPU);
++    public static final Register s6 = new Register(29, 29, "r29", CPU);
++    public static final Register s7 = new Register(30, 30, "r30", CPU);
++    public static final Register s8 = new Register(31, 31, "r31", CPU);
++
++    public static final Register SCR1 = t7;
++    public static final Register SCR2 = t4;
++    public static final Register v0 = a0;
++
++    // @formatter:off
++    public static final RegisterArray cpuRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8
++    );
++    // @formatter:on
++
++    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");
++
++    // Simd registers
++    public static final Register f0 = new Register(32, 0, "f0", SIMD);
++    public static final Register f1 = new Register(33, 1, "f1", SIMD);
++    public static final Register f2 = new Register(34, 2, "f2", SIMD);
++    public static final Register f3 = new Register(35, 3, "f3", SIMD);
++    public static final Register f4 = new Register(36, 4, "f4", SIMD);
++    public static final Register f5 = new Register(37, 5, "f5", SIMD);
++    public static final Register f6 = new Register(38, 6, "f6", SIMD);
++    public static final Register f7 = new Register(39, 7, "f7", SIMD);
++    public static final Register f8 = new Register(40, 8, "f8", SIMD);
++    public static final Register f9 = new Register(41, 9, "f9", SIMD);
++    public static final Register f10 = new Register(42, 10, "f10", SIMD);
++    public static final Register f11 = new Register(43, 11, "f11", SIMD);
++    public static final Register f12 = new Register(44, 12, "f12", SIMD);
++    public static final Register f13 = new Register(45, 13, "f13", SIMD);
++    public static final Register f14 = new Register(46, 14, "f14", SIMD);
++    public static final Register f15 = new Register(47, 15, "f15", SIMD);
++    public static final Register f16 = new Register(48, 16, "f16", SIMD);
++    public static final Register f17 = new Register(49, 17, "f17", SIMD);
++    public static final Register f18 = new Register(50, 18, "f18", SIMD);
++    public static final Register f19 = new Register(51, 19, "f19", SIMD);
++    public static final Register f20 = new Register(52, 20, "f20", SIMD);
++    public static final Register f21 = new Register(53, 21, "f21", SIMD);
++    public static final Register f22 = new Register(54, 22, "f22", SIMD);
++    public static final Register f23 = new Register(55, 23, "f23", SIMD);
++    public static final Register f24 = new Register(56, 24, "f24", SIMD);
++    public static final Register f25 = new Register(57, 25, "f25", SIMD);
++    public static final Register f26 = new Register(58, 26, "f26", SIMD);
++    public static final Register f27 = new Register(59, 27, "f27", SIMD);
++    public static final Register f28 = new Register(60, 28, "f28", SIMD);
++    public static final Register f29 = new Register(61, 29, "f29", SIMD);
++    public static final Register f30 = new Register(62, 30, "f30", SIMD);
++    public static final Register f31 = new Register(63, 31, "f31", SIMD);
++
++    public static final Register fv0 = f0;
++
++    // @formatter:off
++    public static final RegisterArray simdRegisters = new RegisterArray(
++        f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,  f9,  f10, f11, f12, f13, f14, f15,
++        f16, f17, f18, f19, f20, f21, f22, f23,
++        f24, f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    // @formatter:off
++    public static final RegisterArray allRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8,
++
++        f0,   f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,   f9,  f10, f11, f12, f13, f14, f15,
++        f16,  f17, f18, f19, f20, f21, f22, f23,
++        f24,  f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    /**
++     * Basic set of CPU features mirroring what is returned from the cpuid instruction. See:
++     * {@code VM_Version::cpuFeatureFlags}.
++     */
++    public enum CPUFeature implements CPUFeatureName {
++        LA32,
++        LA64,
++        LLEXC,
++        SCDLY,
++        LLDBAR,
++        LBT_X86,
++        LBT_ARM,
++        LBT_MIPS,
++        CCDMA,
++        COMPLEX,
++        FP,
++        CRYPTO,
++        LSX,
++        LASX,
++        LAM,
++        LLSYNC,
++        TGTSYNC,
++        ULSYNC,
++        UAL
++    }
++
++    private final EnumSet<CPUFeature> features;
++
++    /**
++     * Set of flags to control code emission.
++     */
++    public enum Flag {
++        useLSX,
++        useLASX
++    }
++
++    private final EnumSet<Flag> flags;
++
++    public LoongArch64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
++        super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0);
++        this.features = features;
++        this.flags = flags;
++    }
++
++    @Override
++    public EnumSet<CPUFeature> getFeatures() {
++        return features;
++    }
++
++    public EnumSet<Flag> getFlags() {
++        return flags;
++    }
++
++    @Override
++    public PlatformKind getPlatformKind(JavaKind javaKind) {
++        switch (javaKind) {
++            case Boolean:
++            case Byte:
++                return LoongArch64Kind.BYTE;
++            case Short:
++            case Char:
++                return LoongArch64Kind.WORD;
++            case Int:
++                return LoongArch64Kind.DWORD;
++            case Long:
++            case Object:
++                return LoongArch64Kind.QWORD;
++            case Float:
++                return LoongArch64Kind.SINGLE;
++            case Double:
++                return LoongArch64Kind.DOUBLE;
++            default:
++                return null;
++        }
++    }
++
++    @Override
++    public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
++        LoongArch64Kind kind = (LoongArch64Kind) platformKind;
++        if (kind.isInteger()) {
++            return category.equals(CPU);
++        } else if (kind.isSIMD()) {
++            return category.equals(SIMD);
++        }
++        return false;
++    }
++
++    @Override
++    public LoongArch64Kind getLargestStorableKind(RegisterCategory category) {
++        if (category.equals(CPU)) {
++            return LoongArch64Kind.QWORD;
++        } else if (category.equals(SIMD)) {
++            return LoongArch64Kind.V256_QWORD;
++        } else {
++            return null;
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+new file mode 100644
+index 00000000000..047a1dbbe36
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+@@ -0,0 +1,163 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import jdk.vm.ci.meta.PlatformKind;
++
++public enum LoongArch64Kind implements PlatformKind {
++
++    // scalar
++    BYTE(1),
++    WORD(2),
++    DWORD(4),
++    QWORD(8),
++    UBYTE(1),
++    UWORD(2),
++    UDWORD(4),
++    SINGLE(4),
++    DOUBLE(8),
++
++    // SIMD
++    V128_BYTE(16, BYTE),
++    V128_WORD(16, WORD),
++    V128_DWORD(16, DWORD),
++    V128_QWORD(16, QWORD),
++    V128_SINGLE(16, SINGLE),
++    V128_DOUBLE(16, DOUBLE),
++    V256_BYTE(32, BYTE),
++    V256_WORD(32, WORD),
++    V256_DWORD(32, DWORD),
++    V256_QWORD(32, QWORD),
++    V256_SINGLE(32, SINGLE),
++    V256_DOUBLE(32, DOUBLE);
++
++    private final int size;
++    private final int vectorLength;
++
++    private final LoongArch64Kind scalar;
++    private final EnumKey<LoongArch64Kind> key = new EnumKey<>(this);
++
++    LoongArch64Kind(int size) {
++        this.size = size;
++        this.scalar = this;
++        this.vectorLength = 1;
++    }
++
++    LoongArch64Kind(int size, LoongArch64Kind scalar) {
++        this.size = size;
++        this.scalar = scalar;
++
++        assert size % scalar.size == 0;
++        this.vectorLength = size / scalar.size;
++    }
++
++    public LoongArch64Kind getScalar() {
++        return scalar;
++    }
++
++    @Override
++    public int getSizeInBytes() {
++        return size;
++    }
++
++    @Override
++    public int getVectorLength() {
++        return vectorLength;
++    }
++
++    @Override
++    public Key getKey() {
++        return key;
++    }
++
++    public boolean isInteger() {
++        switch (this) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++            case UBYTE:
++            case UWORD:
++            case UDWORD:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    public boolean isSIMD() {
++        switch (this) {
++            case SINGLE:
++            case DOUBLE:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public char getTypeChar() {
++        switch (this) {
++            case BYTE:
++                return 'b';
++            case WORD:
++                return 'w';
++            case DWORD:
++                return 'd';
++            case QWORD:
++                return 'q';
++            case SINGLE:
++                return 'S';
++            case DOUBLE:
++                return 'D';
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return 'v';
++            default:
++                return '-';
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java
+new file mode 100644
+index 00000000000..6df1b7b3a92
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 platform independent portions of the JVMCI API.
++ */
++package jdk.vm.ci.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java
+index ed197695720..62a4ff4be0f 100644
+--- a/src/jdk.internal.vm.ci/share/classes/module-info.java
++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java
+@@ -23,6 +23,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ module jdk.internal.vm.ci {
+     exports jdk.vm.ci.services to
+         jdk.internal.vm.compiler,
+@@ -39,5 +45,6 @@ module jdk.internal.vm.ci {
+ 
+     provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with
+         jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory,
++        jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory;
+ }
+diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile
+index 66dac7130bd..dd2ccd340f2 100644
+--- a/src/utils/hsdis/Makefile
++++ b/src/utils/hsdis/Makefile
+@@ -89,6 +89,9 @@ CC 		= gcc
+ endif
+ CFLAGS		+= -O
+ DLDFLAGS	+= -shared
++ifeq ($(ARCH), mips64)
++DLDFLAGS	+= -Wl,-z,noexecstack
++endif
+ LDFLAGS         += -ldl
+ OUTFLAGS	+= -o $@
+ else
+diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
+index 4c56daebfb8..92836130408 100644
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
+@@ -21,12 +21,18 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test
+  * @library /test/lib /
+  * @modules java.base/jdk.internal.misc
+  *          java.management
+- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled
++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled
+  * @build jdk.test.whitebox.WhiteBox
+  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+  * @run main/othervm/timeout=600 -Xbootclasspath/a:.
+diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
+index 03016ea3dd6..62ce6c1a7a5 100644
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test
+  * @library /test/lib /
+@@ -28,7 +34,7 @@
+  *          java.management
+  *
+  * @build jdk.test.whitebox.WhiteBox
+- * @requires !(vm.cpu.features ~= ".*aes.*")
++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64")
+  * @requires vm.compiler1.enabled | !vm.graal.enabled
+  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index 468cd83d7a2..40d2b03e301 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.intrinsics.sha.cli.testcases;
+ 
+ import compiler.intrinsics.sha.cli.DigestOptionsBase;
+@@ -32,7 +38,7 @@ import jdk.test.lib.cli.predicate.OrPredicate;
+ 
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, RISCV64, PPC, S390x, and X86.
++ * AArch64, RISCV64, PPC, S390x, LoongArch64, and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         DigestOptionsBase.TestCase {
+@@ -44,14 +50,15 @@ public class GenericTestCaseForOtherCPU extends
+     }
+ 
+     public GenericTestCaseForOtherCPU(String optionName, boolean checkUseSHA) {
+-        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, and X86.
++        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, LoongArch64, and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
+                               new OrPredicate(Platform::isRISCV64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isPPC,
++                              new OrPredicate(Platform::isLoongArch64,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
+ 
+         this.checkUseSHA = checkUseSHA;
+     }
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+index d7ecc7c04ef..0d47a2f3037 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+@@ -20,16 +20,25 @@
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  */
++
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.vm.ci.code.test;
+ 
+ import jdk.vm.ci.aarch64.AArch64;
+ import jdk.vm.ci.amd64.AMD64;
++import jdk.vm.ci.loongarch64.LoongArch64;
+ import jdk.vm.ci.code.Architecture;
+ import jdk.vm.ci.code.CodeCacheProvider;
+ import jdk.vm.ci.code.InstalledCode;
+ import jdk.vm.ci.code.TargetDescription;
+ import jdk.vm.ci.code.test.aarch64.AArch64TestAssembler;
+ import jdk.vm.ci.code.test.amd64.AMD64TestAssembler;
++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler;
+ import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
+ import jdk.vm.ci.hotspot.HotSpotCompiledCode;
+ import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
+@@ -75,6 +84,8 @@ public class CodeInstallationTest {
+             return new AMD64TestAssembler(codeCache, config);
+         } else if (arch instanceof AArch64) {
+             return new AArch64TestAssembler(codeCache, config);
++        } else if (arch instanceof LoongArch64) {
++            return new LoongArch64TestAssembler(codeCache, config);
+         } else {
+             Assert.fail("unsupported architecture");
+             return null;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+index 2e3f90368b1..a07fcc8af94 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,8 +38,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.DataPatchTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+index b88832677eb..00860c8a66a 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+@@ -32,8 +38,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.InterpreterFrameSizeTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+index f473d089a54..6ca7b76f1e7 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -33,8 +39,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.MaxOopMapStackOffsetTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+index dce107095d5..d8c855dfb3a 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /test/lib /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+@@ -33,8 +39,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI  -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest
+  */
+ package jdk.vm.ci.code.test;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+index e5fc53e8013..75494d5ccf1 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,8 +38,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.SimpleCodeInstallationTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+index bfd611312a2..08be94ac132 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,8 +38,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.SimpleDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+index 1fb0d77eb73..b2d40f70a80 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+@@ -21,10 +21,16 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @requires vm.jvmci
+- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64"
++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64"
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,8 +38,9 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.aarch64
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.VirtualObjectDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+new file mode 100644
+index 00000000000..4c76868453a
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+@@ -0,0 +1,568 @@
++/*
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.vm.ci.code.test.loongarch64;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.DebugInfo;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.site.ConstantReference;
++import jdk.vm.ci.code.site.DataSectionReference;
++import jdk.vm.ci.code.test.TestAssembler;
++import jdk.vm.ci.code.test.TestHotSpotVMConfig;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.VMConstant;
++
++public class LoongArch64TestAssembler extends TestAssembler {
++
++    private static final Register scratchRegister = LoongArch64.SCR1;
++    private static final Register doubleScratch = LoongArch64.f23;
++    private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0,
++                                                                      LoongArch64.a1, LoongArch64.a2,
++                                                                      LoongArch64.a3, LoongArch64.a4,
++                                                                      LoongArch64.a5, LoongArch64.a6,
++                                                                      LoongArch64.a7);
++    private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0,
++                                                                      LoongArch64.f1, LoongArch64.f2,
++                                                                      LoongArch64.f3, LoongArch64.f4,
++                                                                      LoongArch64.f5, LoongArch64.f6,
++                                                                      LoongArch64.f7);
++    private static int currentGeneral = 0;
++    private static int currentFloat = 0;
++    public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) {
++        super(codeCache, config,
++              16 /* initialFrameSize */, 16 /* stackAlignment */,
++              LoongArch64Kind.UDWORD /* narrowOopKind */,
++              /* registers */
++              LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3,
++              LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7);
++    }
++
++    private static int low(int x, int l) {
++        assert l < 32;
++        return (x >> 0) & ((1 << l)-1);
++    }
++
++    private static int low16(int x) {
++        return low(x, 16);
++    }
++
++    private void emitNop() {
++        code.emitInt(0x3400000);
++    }
++
++    private void emitPcaddu12i(Register rj, int si20) {
++        // pcaddu12i
++        code.emitInt((0b0001110 << 25)
++                     | (low(si20, 20) << 5)
++                     | rj.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, Register rk) {
++        // add_d
++        code.emitInt((0b00000000000100001 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, int si12) {
++        // addi_d
++        code.emitInt((0b0000001011 << 22)
++                     | (low(si12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitSub(Register rd, Register rj, Register rk) {
++        // sub_d
++        code.emitInt((0b00000000000100011 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitShiftLeft(Register rd, Register rj, int shift) {
++        // slli_d
++        code.emitInt((0b00000000010000 << 18)
++                     | (low(( (0b01  << 6) | shift ), 8) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu12i_w(Register rj, int imm20) {
++        // lu12i_w
++        code.emitInt((0b0001010 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitOri(Register rd, Register rj, int ui12) {
++        // ori
++        code.emitInt((0b0000001110 << 22)
++                     | (low(ui12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu32i_d(Register rj, int imm20) {
++         // lu32i_d
++        code.emitInt((0b0001011 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitLu52i_d(Register rd, Register rj, int imm12) {
++        // lu52i_d
++        code.emitInt((0b0000001100 << 22)
++                     | (low(imm12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadImmediate(Register rd, int imm32) {
++        emitLu12i_w(rd, (imm32 >> 12) & 0xfffff);
++        emitOri(rd, rd, imm32 & 0xfff);
++    }
++
++    private void emitLi52(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++    }
++
++    private void emitLi64(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++        emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff));
++    }
++
++    private void emitOr(Register rd, Register rj, Register rk) {
++        // orr
++        code.emitInt((0b00000000000101010 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitMove(Register rd, Register rs) {
++        // move
++        emitOr(rd, rs, LoongArch64.zero);
++    }
++
++    private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) {
++        // movfr2gr_s/movfr2gr_d
++        int opc = 0;
++        switch (kind) {
++            case SINGLE: opc = 0b0000000100010100101101; break;
++            case DOUBLE: opc = 0b0000000100010100101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // load
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100000; break;
++            case WORD:   opc = 0b0010100001; break;
++            case DWORD:  opc = 0b0010100010; break;
++            case QWORD:  opc = 0b0010100011; break;
++            case UDWORD: opc = 0b0010101010; break;
++            case SINGLE: opc = 0b0010101100; break;
++            case DOUBLE: opc = 0b0010101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // store
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100100; break;
++            case WORD:   opc = 0b0010100101; break;
++            case DWORD:  opc = 0b0010100110; break;
++            case QWORD:  opc = 0b0010100111; break;
++            case SINGLE: opc = 0b0010101101; break;
++            case DOUBLE: opc = 0b0010101111; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitJirl(Register rd, Register rj, int offs) {
++        // jirl
++        code.emitInt((0b010011 << 26)
++                     | (low16(offs >> 2) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    @Override
++    public void emitGrowStack(int size) {
++        assert size % 16 == 0;
++        if (size > -4096 && size < 0) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size == 0) {
++            // No-op
++        } else if (size < 4096) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size < 65535) {
++            emitLoadImmediate(scratchRegister, size);
++            emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister);
++        } else {
++            throw new IllegalArgumentException();
++        }
++    }
++
++    @Override
++    public void emitPrologue() {
++        // Must be patchable by NativeJump::patch_verified_entry
++        emitNop();
++        emitGrowStack(32);
++        emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24);
++        emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16);
++        emitGrowStack(-16);
++        emitMove(LoongArch64.fp, LoongArch64.sp);
++        setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD));
++    }
++
++    @Override
++    public void emitEpilogue() {
++        recordMark(config.MARKID_DEOPT_HANDLER_ENTRY);
++        recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null);
++        emitCall(0xdeaddeaddeadL);
++    }
++
++    @Override
++    public void emitCallPrologue(CallingConvention cc, Object... prim) {
++        emitGrowStack(cc.getStackSize());
++        frameSize += cc.getStackSize();
++        AllocatableValue[] args = cc.getArguments();
++        for (int i = 0; i < args.length; i++) {
++            emitLoad(args[i], prim[i]);
++        }
++        currentGeneral = 0;
++        currentFloat = 0;
++    }
++
++    @Override
++    public void emitCallEpilogue(CallingConvention cc) {
++        emitGrowStack(-cc.getStackSize());
++        frameSize -= cc.getStackSize();
++    }
++
++    @Override
++    public void emitCall(long addr) {
++        // long call (absolute)
++        // lu12i_w(T4, split_low20(value >> 12));
++        // lu32i_d(T4, split_low20(value >> 32));
++        // jirl(RA, T4, split_low12(value));
++        emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff));
++        emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff));
++        emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff));
++    }
++
++    @Override
++    public void emitLoad(AllocatableValue av, Object prim) {
++        if (av instanceof RegisterValue) {
++            Register reg = ((RegisterValue) av).getRegister();
++            if (prim instanceof Float) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadFloat(reg, (Float) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadFloat(doubleScratch, (Float) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch);
++                }
++            } else if (prim instanceof Double) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadDouble(reg, (Double) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadDouble(doubleScratch, (Double) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch);
++                }
++            } else if (prim instanceof Integer) {
++                emitLoadInt(reg, (Integer) prim);
++            } else if (prim instanceof Long) {
++                emitLoadLong(reg, (Long) prim);
++            }
++        } else if (av instanceof StackSlot) {
++            StackSlot slot = (StackSlot) av;
++            if (prim instanceof Float) {
++                emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim));
++            } else if (prim instanceof Double) {
++                emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim));
++            } else if (prim instanceof Integer) {
++                emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim));
++            } else if (prim instanceof Long) {
++                emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim));
++            } else {
++                assert false : "Unimplemented";
++            }
++        } else {
++            throw new IllegalArgumentException("Unknown value " + av);
++        }
++    }
++
++    @Override
++    public Register emitLoadPointer(HotSpotConstant c) {
++        recordDataPatchInCode(new ConstantReference((VMConstant) c));
++
++        Register ret = newRegister();
++        // need to match patchable_li52 instruction sequence
++        // lu12i_ori_lu32i
++        emitLi52(ret, 0xdeaddead);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(Register b, int offset) {
++        Register ret = newRegister();
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadNarrowPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0);
++        return ret;
++    }
++
++    private Register emitLoadDouble(Register reg, double c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitDouble(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0);
++        return reg;
++    }
++
++    private Register emitLoadFloat(Register reg, float c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitFloat(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadFloat(float c) {
++        Register ret = LoongArch64.fv0;
++        return emitLoadFloat(ret, c);
++    }
++
++    private Register emitLoadLong(Register reg, long c) {
++        emitLi64(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadLong(long c) {
++        Register ret = newRegister();
++        return emitLoadLong(ret, c);
++    }
++
++    private Register emitLoadInt(Register reg, int c) {
++        emitLoadImmediate(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadInt(int c) {
++        Register ret = newRegister();
++        return emitLoadInt(ret, c);
++    }
++
++    @Override
++    public Register emitIntArg0() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(0);
++    }
++
++    @Override
++    public Register emitIntArg1() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(1);
++    }
++
++    @Override
++    public Register emitIntAdd(Register a, Register b) {
++        emitAdd(a, a, b);
++        return a;
++    }
++
++    @Override
++    public void emitTrap(DebugInfo info) {
++        // Dereference null pointer
++        emitMove(scratchRegister, LoongArch64.zero);
++        recordImplicitException(info);
++        emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0);
++    }
++
++    @Override
++    public void emitIntRet(Register a) {
++        emitMove(LoongArch64.v0, a);
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitFloatRet(Register a) {
++        assert a == LoongArch64.fv0 : "Unimplemented move " + a;
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitPointerRet(Register a) {
++        emitIntRet(a);
++    }
++
++    @Override
++    public StackSlot emitPointerToStack(Register a) {
++        return emitLongToStack(a);
++    }
++
++    @Override
++    public StackSlot emitNarrowPointerToStack(Register a) {
++        return emitIntToStack(a);
++    }
++
++    @Override
++    public Register emitUncompressPointer(Register compressed, long base, int shift) {
++        if (shift > 0) {
++            emitShiftLeft(compressed, compressed, shift);
++        }
++
++        if (base != 0) {
++            emitLoadLong(scratchRegister, base);
++            emitAdd(compressed, compressed, scratchRegister);
++        }
++
++        return compressed;
++    }
++
++    private StackSlot emitDoubleToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitDoubleToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE);
++        return emitDoubleToStack(ret, a);
++    }
++
++    private StackSlot emitFloatToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitFloatToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE);
++        return emitFloatToStack(ret, a);
++    }
++
++    private StackSlot emitIntToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitIntToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DWORD);
++        return emitIntToStack(ret, a);
++    }
++
++    private StackSlot emitLongToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitLongToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.QWORD);
++        return emitLongToStack(ret, a);
++    }
++
++}
+diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+index 2f2395b77c6..58482edb32e 100644
+--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
++++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.lib.ir_framework;
+ 
+ import compiler.lib.ir_framework.driver.irmatching.IRMatcher;
+@@ -58,8 +64,8 @@ public class IRNode {
+     public static final String ALLOC_ARRAY = "(.*precise klass \\[L.*\\R((.*(?i:mov|mv|xor|nop|spill).*|\\s*|.*LGHI.*)\\R)*.*(?i:call,static).*wrapper for: _new_array_Java" + END;
+     public static final String ALLOC_ARRAY_OF = COMPOSITE_PREFIX + "(.*precise klass \\[L.*" + IS_REPLACED + ";:.*\\R((.*(?i:mov|mv|xorl|nop|spill).*|\\s*|.*LGHI.*)\\R)*.*(?i:call,static).*wrapper for: _new_array_Java" + END;
+ 
+-    public static final String CHECKCAST_ARRAY = "(((?i:cmp|CLFI|CLR).*precise klass \\[.*;:|.*(?i:mov|mv|or).*precise klass \\[.*;:.*\\R.*(cmp|CMP|CLR))" + END;
+-    public static final String CHECKCAST_ARRAY_OF = COMPOSITE_PREFIX + "(((?i:cmp|CLFI|CLR).*precise klass \\[.*" + IS_REPLACED + ";:|.*(?i:mov|mv|or).*precise klass \\[.*" + IS_REPLACED + ";:.*\\R.*(cmp|CMP|CLR))" + END;
++    public static final String CHECKCAST_ARRAY = "(((?i:cmp|CLFI|CLR).*precise klass \\[.*;:|.*(?i:mov|mv|or|li).*precise klass \\[.*;:.*\\R.*(cmp|CMP|CLR))" + END;
++    public static final String CHECKCAST_ARRAY_OF = COMPOSITE_PREFIX + "(((?i:cmp|CLFI|CLR).*precise klass \\[.*" + IS_REPLACED + ";:|.*(?i:mov|mv|or|li).*precise klass \\[.*" + IS_REPLACED + ";:.*\\R.*(cmp|CMP|CLR))" + END;
+     // Does not work on s390 (a rule containing this regex will be skipped on s390).
+     public static final String CHECKCAST_ARRAYCOPY = "(.*((?i:call_leaf_nofp,runtime)|CALL,\\s?runtime leaf nofp|BCTRL.*.leaf call).*checkcast_arraycopy.*" + END;
+ 
+diff --git a/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java b/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java
+index 85fd3fa938d..0655f2b0bd1 100644
+--- a/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java
++++ b/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test
+  * @bug 8279822
+@@ -130,7 +136,7 @@ public abstract class TestConstantsInError implements OutputProcessor {
+             results.shouldMatch("Test_C1/.*::test \\(3 bytes\\)$")
+                    .shouldMatch("Test_C2/.*::test \\(3 bytes\\)$");
+ 
+-            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching
++            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching
+                 results.shouldMatch("Test_C1/.*::test \\(3 bytes\\)   made not entrant")
+                        .shouldMatch("Test_C2/.*::test \\(3 bytes\\)   made not entrant");
+             } else {
+@@ -168,7 +174,7 @@ public abstract class TestConstantsInError implements OutputProcessor {
+                    .shouldMatch("Test_MH3/.*::test \\(3 bytes\\)$")
+                    .shouldMatch("Test_MH4/.*::test \\(3 bytes\\)$");
+ 
+-            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching
++            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching
+                 results.shouldMatch("Test_MH1/.*::test \\(3 bytes\\)   made not entrant")
+                        .shouldMatch("Test_MH2/.*::test \\(3 bytes\\)   made not entrant")
+                        .shouldMatch("Test_MH3/.*::test \\(3 bytes\\)   made not entrant")
+@@ -191,7 +197,7 @@ public abstract class TestConstantsInError implements OutputProcessor {
+             results.shouldMatch("Test_MT1/.*::test \\(3 bytes\\)$")
+                    .shouldMatch("Test_MT2/.*::test \\(3 bytes\\)$");
+ 
+-            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching
++            if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching
+                 results.shouldMatch("Test_MT1/.*::test \\(3 bytes\\)   made not entrant")
+                        .shouldMatch("Test_MT2/.*::test \\(3 bytes\\)   made not entrant");
+             } else {
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+index 10d87d51f0f..dbea76741d6 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167409
+  * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") & (vm.flavor != "zero")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") & (vm.flavor != "zero")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
+  */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+index 23c1e6e6acb..2f402d567d9 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167408
+  * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") & (vm.flavor != "zero")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") & (vm.flavor != "zero")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 689c7c8cc2f..f734c1baa3f 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.testlibrary.sha.predicate;
+ 
+ import jdk.test.lib.Platform;
+@@ -61,19 +67,22 @@ public class IntrinsicPredicates {
+ 
+     public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null),
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   null, null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    null, null),
+-                              new CPUSpecificPredicate("x86.*",     null, null))));
++                              new CPUSpecificPredicate("x86.*",     null, null)))));
+ 
+     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
++              // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
+@@ -81,12 +90,14 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
++              // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
+diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+index 36f74d01b54..035b91b9d8e 100644
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /*
+  * @test ReservedStackTest
+  *
+@@ -240,7 +246,8 @@ public class ReservedStackTest {
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86() || Platform.isAArch64() || Platform.isRISCV64())) ||
++              Platform.isX86() || Platform.isAArch64() || Platform.isRISCV64() ||
++              Platform.isMIPS() || Platform.isLoongArch64())) ||
+             Platform.isOSX();
+     }
+ 
+diff --git a/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java b/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java
+index 26dd3514e8e..2818343ec3d 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java
++++ b/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package ir_framework.tests;
+ 
+ import compiler.lib.ir_framework.*;
+@@ -215,7 +221,7 @@ public class TestIRMatching {
+         runCheck(BadFailOnConstraint.create(Membar.class, "membar()", 1, "MemBar"));
+ 
+         String cmp;
+-        if (Platform.isPPC() || Platform.isX86()) {
++        if (Platform.isPPC() || Platform.isX86() || Platform.isLoongArch64()) {
+             cmp = "CMP";
+         } else if (Platform.isS390x()){
+             cmp = "CLFI";
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index c5166580010..913136a1fd1 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.jfr.event.os;
+ 
+ import java.util.List;
+@@ -52,8 +58,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390", "MIPS", "LoongArch");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390", "MIPS", "LoongArch");
+         }
+     }
+ }
+diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
+index a9a8a8178ee..99295d779c5 100644
+--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java
++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // common infrastructure for SunPKCS11 tests
+ 
+ import java.io.ByteArrayOutputStream;
+@@ -693,6 +699,9 @@ public abstract class PKCS11Test {
+                 "/usr/lib64/" });
+         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
+         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
++        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
++        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
++                "/usr/lib64/" });
+         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
+         osMap.put("Windows-x86-32", new String[] {});
+         osMap.put("Windows-amd64-64", new String[] {});
+diff --git a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java
+index c71a6034748..427ebda770f 100644
+--- a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java
+@@ -33,6 +33,12 @@ import java.util.HashSet;
+ import java.util.List;
+ import java.util.Set;
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ /**
+  * @test
+  * @summary Verify that for each group of mutually exclusive predicates defined
+@@ -45,7 +51,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86", "isMIPS", "isLoongArch64"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index a4f2c03d10f..7d3b1a62ecb 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.test.lib;
+ 
+ import java.io.BufferedReader;
+@@ -233,6 +239,14 @@ public class Platform {
+         return isArch("(i386)|(x86(?!_64))");
+     }
+ 
++    public static boolean isMIPS() {
++        return isArch("mips.*");
++    }
++
++    public static boolean isLoongArch64() {
++        return isArch("loongarch64");
++    }
++
+     public static String getOsArch() {
+         return osArch;
+     }
diff --git a/java8-openjdk/PKGBUILD b/java8-openjdk/PKGBUILD
index d576f6e8dd..cb75a3bda9 100644
--- a/java8-openjdk/PKGBUILD
+++ b/java8-openjdk/PKGBUILD
@@ -10,7 +10,7 @@ _majorver=8
 _minorver=402
 _updatever=06
 pkgver=${_majorver}.${_minorver}.u${_updatever}
-pkgrel=1
+pkgrel=7
 arch=('loong64' 'x86_64')
 url='https://openjdk.java.net/'
 license=('custom')
@@ -31,13 +31,17 @@ makedepends=(
 )
 options=(!lto)
 source=(https://github.com/openjdk/jdk${_majorver}u/archive/refs/tags/jdk${_majorver}u${_minorver}-b${_updatever}.tar.gz
-        gcc11.patch)
+        gcc11.patch
+        jdk8u382-la64.patch)
 b2sums=('dee05e214756da4d1dcce0f923a0c10b9e385b5945689039c370ae8ac60f3e1324c629c24d9194f63471430b3c94680f0dcb2c3bdfd13d1e2034673cf9123cae'
-        '9679e4dfb6027a87376081489c09810812d6849573afac4ea96abe3a3e00ca5b6af7d0ffb010c43b93cfa913f9e97fbb9f11e19fcc86a89b4548442671c32da1')
+        '9679e4dfb6027a87376081489c09810812d6849573afac4ea96abe3a3e00ca5b6af7d0ffb010c43b93cfa913f9e97fbb9f11e19fcc86a89b4548442671c32da1'
+        '8010001cc05570986c901353e6e4c52849faf41e879c7356b35d628b84af50fa78a2c3a5476f3c93bc3f49d0de8c0ca21879e779824648cbe5aadd5a6207ab02')
 
+SKIPCONFIG=1
 case "${CARCH}" in
   'x86_64') _JARCH=amd64 ; _DOC_ARCH=x86_64 ;;
   'i686'  ) _JARCH=i386  ; _DOC_ARCH=x86    ;;
+  'loong64'  ) _JARCH=loongarch64  ; _DOC_ARCH=loongarch64    ;;
 esac
 
 _jdkname=openjdk8
@@ -54,6 +58,7 @@ prepare() {
 
   # Fix build with C++17 (Fedora)
   patch -Np1 -i "${srcdir}"/gcc11.patch
+  patch -Np1 -i "${srcdir}"/jdk8u382-la64.patch
 }
 
 build() {
@@ -181,6 +186,8 @@ package_jre8-openjdk-headless() {
     install -D -m 644 "${pkgdir}${_filepkgpath}" "${pkgdir}/${file}"
     ln -sf /${file} "${pkgdir}${_filepkgpath}"
   done
+  # The built out libjvm.so is error, so copy it from the current system.
+  cp /usr/lib/jvm/java-8-openjdk/jre/lib/loongarch64/server/libjvm.so ${pkgdir}/usr/lib/jvm/java-8-openjdk/jre/lib/loongarch64/server/libjvm.so
 }
 
 package_jre8-openjdk() {
diff --git a/java8-openjdk/jdk8u382-la64.patch b/java8-openjdk/jdk8u382-la64.patch
new file mode 100644
index 0000000000..c3bf3c60be
--- /dev/null
+++ b/java8-openjdk/jdk8u382-la64.patch
@@ -0,0 +1,116949 @@
+diff --git a/common/autoconf/build-aux/autoconf-config.guess b/common/autoconf/build-aux/autoconf-config.guess
+index 15ee438926..3d7555b52d 100644
+--- a/common/autoconf/build-aux/autoconf-config.guess
++++ b/common/autoconf/build-aux/autoconf-config.guess
+@@ -977,6 +977,9 @@ EOF
+ 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+ 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+ 	;;
++    loongarch64:Linux:*:*)
++	echo ${UNAME_MACHINE}-unknown-linux-gnu
++	exit ;;
+     or32:Linux:*:*)
+ 	echo ${UNAME_MACHINE}-unknown-linux-gnu
+ 	exit ;;
+diff --git a/common/autoconf/build-aux/autoconf-config.sub b/common/autoconf/build-aux/autoconf-config.sub
+index 1aab2b303e..bd910bddbe 100644
+--- a/common/autoconf/build-aux/autoconf-config.sub
++++ b/common/autoconf/build-aux/autoconf-config.sub
+@@ -275,6 +275,7 @@ case $basic_machine in
+ 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ 	| i370 | i860 | i960 | ia64 \
+ 	| ip2k | iq2000 \
++	| loongarch | loongarch64 \
+ 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+ 	| maxq | mb | microblaze | mcore | mep \
+ 	| mips | mipsbe | mipseb | mipsel | mipsle \
+diff --git a/common/autoconf/build-aux/config.guess b/common/autoconf/build-aux/config.guess
+index 355c91e4eb..d03d029ce3 100644
+--- a/common/autoconf/build-aux/config.guess
++++ b/common/autoconf/build-aux/config.guess
+@@ -86,4 +86,15 @@ if [ "x$OUT" = x ]; then
+   fi
+ fi
+ 
++# Test and fix little endian MIPS.
++if [ "x$OUT" = x ]; then
++  if [ `uname -s` = Linux ]; then
++    if [ `uname -m` = mipsel ]; then
++      OUT=mipsel-unknown-linux-gnu
++    elif [ `uname -m` = mips64el ]; then
++      OUT=mips64el-unknown-linux-gnu
++    fi
++  fi
++fi
++
+ echo $OUT
+diff --git a/common/autoconf/configure.ac b/common/autoconf/configure.ac
+index 151e5a109f..5072409dd4 100644
+--- a/common/autoconf/configure.ac
++++ b/common/autoconf/configure.ac
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2018. These
++# modifications are Copyright (c) 2018 Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ ###############################################################################
+ #
+ # Includes and boilerplate
+@@ -186,6 +192,7 @@ FLAGS_SETUP_INIT_FLAGS
+ # Now we can test some aspects on the target using configure macros.
+ PLATFORM_SETUP_OPENJDK_TARGET_BITS
+ PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS
++GET_BUILDER_AND_HOST_DATA
+ 
+ # Configure flags for the tools
+ FLAGS_SETUP_COMPILER_FLAGS_FOR_LIBS
+diff --git a/common/autoconf/generated-configure.sh b/common/autoconf/generated-configure.sh
+index b3c5819161..bae7e64749 100644
+--- a/common/autoconf/generated-configure.sh
++++ b/common/autoconf/generated-configure.sh
+@@ -716,6 +716,9 @@ SET_EXECUTABLE_ORIGIN
+ SHARED_LIBRARY_FLAGS
+ CXX_FLAG_REORDER
+ C_FLAG_REORDER
++HOST_NAME
++BUILDER_NAME
++BUILDER_ID
+ SYSROOT_LDFLAGS
+ SYSROOT_CFLAGS
+ RC_FLAGS
+@@ -4078,6 +4081,12 @@ fi
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2022. These
++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+ # Converts autoconf style CPU name to OpenJDK style, into
+ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
+@@ -13741,6 +13750,18 @@ test -n "$target_alias" &&
+       VAR_CPU_BITS=64
+       VAR_CPU_ENDIAN=big
+       ;;
++    mips64el)
++      VAR_CPU=mips64
++      VAR_CPU_ARCH=mips
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
++    loongarch64)
++      VAR_CPU=loongarch64
++      VAR_CPU_ARCH=loongarch
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
+     *)
+       as_fn_error $? "unsupported cpu $build_cpu" "$LINENO" 5
+       ;;
+@@ -13879,6 +13900,18 @@ $as_echo "$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU" >&6; }
+       VAR_CPU_BITS=64
+       VAR_CPU_ENDIAN=big
+       ;;
++    mips64el)
++      VAR_CPU=mips64
++      VAR_CPU_ARCH=mips
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
++    loongarch64)
++      VAR_CPU=loongarch64
++      VAR_CPU_ARCH=loongarch
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
+     *)
+       as_fn_error $? "unsupported cpu $host_cpu" "$LINENO" 5
+       ;;
+@@ -14001,6 +14034,8 @@ $as_echo "$COMPILE_TYPE" >&6; }
+     OPENJDK_TARGET_CPU_LEGACY_LIB="i386"
+   elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     OPENJDK_TARGET_CPU_LEGACY_LIB="amd64"
++  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el"
+   fi
+ 
+ 
+@@ -14034,6 +14069,9 @@ $as_echo "$COMPILE_TYPE" >&6; }
+   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     # On all platforms except macosx, we replace x86_64 with amd64.
+     OPENJDK_TARGET_CPU_OSARCH="amd64"
++  elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    # System.getProperty("os.arch"): mips64 -> mips64el
++    OPENJDK_TARGET_CPU_OSARCH="mips64el"
+   fi
+ 
+ 
+@@ -14043,6 +14081,8 @@ $as_echo "$COMPILE_TYPE" >&6; }
+   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     # On all platforms except macosx, we replace x86_64 with amd64.
+     OPENJDK_TARGET_CPU_JLI="amd64"
++  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_JLI="mips64el"
+   fi
+   # Now setup the -D flags for building libjli.
+   OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'"
+@@ -14055,6 +14095,9 @@ $as_echo "$COMPILE_TYPE" >&6; }
+   elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then
+     OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)"
+   fi
++  if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'"
++  fi
+ 
+ 
+   # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths.
+@@ -42235,6 +42278,47 @@ $as_echo "$ac_cv_c_bigendian" >&6; }
+   fi
+ 
+ 
++BUILDER_NAME="$build_os"
++BUILDER_ID="Custom build ($(date))"
++if test -f /etc/issue; then
++  etc_issue_info=`cat /etc/issue`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -f /etc/redhat-release; then
++  etc_issue_info=`cat /etc/redhat-release`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -f /etc/neokylin-release; then
++  etc_issue_info=`cat /etc/neokylin-release`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -z "$BUILDER_NAME"; then
++  BUILDER_NAME="unknown"
++fi
++BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"`
++if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then
++  HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH"
++else
++  HOST_NAME="unknown"
++fi
++if test -f "/usr/bin/cpp"; then
++  # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'`
++  gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'`
++  if test -n "$gcc_with_arch_info"; then
++    HOST_NAME="$gcc_with_arch_info"
++  fi
++fi
++
++
++
++
++
+ # Configure flags for the tools
+ 
+   ###############################################################################
+diff --git a/common/autoconf/platform.m4 b/common/autoconf/platform.m4
+index 51df988f61..51cc28c312 100644
+--- a/common/autoconf/platform.m4
++++ b/common/autoconf/platform.m4
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2022. These
++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+ # Converts autoconf style CPU name to OpenJDK style, into
+ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
+@@ -96,6 +102,18 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
+       VAR_CPU_BITS=64
+       VAR_CPU_ENDIAN=big
+       ;;
++    mips64el)
++      VAR_CPU=mips64
++      VAR_CPU_ARCH=mips
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
++    loongarch64)
++      VAR_CPU=loongarch64
++      VAR_CPU_ARCH=loongarch
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
+     *)
+       AC_MSG_ERROR([unsupported cpu $1])
+       ;;
+@@ -283,6 +301,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
+     OPENJDK_TARGET_CPU_LEGACY_LIB="i386"
+   elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     OPENJDK_TARGET_CPU_LEGACY_LIB="amd64"
++  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el"
+   fi
+   AC_SUBST(OPENJDK_TARGET_CPU_LEGACY_LIB)
+ 
+@@ -316,6 +336,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
+   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     # On all platforms except macosx, we replace x86_64 with amd64.
+     OPENJDK_TARGET_CPU_OSARCH="amd64"
++  elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    # System.getProperty("os.arch"): mips64 -> mips64el
++    OPENJDK_TARGET_CPU_OSARCH="mips64el"
+   fi
+   AC_SUBST(OPENJDK_TARGET_CPU_OSARCH)
+ 
+@@ -325,6 +348,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
+   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
+     # On all platforms except macosx, we replace x86_64 with amd64.
+     OPENJDK_TARGET_CPU_JLI="amd64"
++  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_JLI="mips64el"
+   fi
+   # Now setup the -D flags for building libjli.
+   OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'"
+@@ -337,6 +362,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
+   elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then
+     OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)"
+   fi
++  if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
++    OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'"
++  fi
+   AC_SUBST(OPENJDK_TARGET_CPU_JLI_CFLAGS)
+ 
+   # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths.
+@@ -550,3 +578,46 @@ AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS],
+     AC_MSG_ERROR([The tested endian in the target ($ENDIAN) differs from the endian expected to be found in the target ($OPENJDK_TARGET_CPU_ENDIAN)])
+   fi
+ ])
++
++AC_DEFUN([GET_BUILDER_AND_HOST_DATA],
++[
++BUILDER_NAME="$build_os"
++BUILDER_ID="Custom build ($(date))"
++if test -f /etc/issue; then
++  etc_issue_info=`cat /etc/issue`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -f /etc/redhat-release; then
++  etc_issue_info=`cat /etc/redhat-release`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -f /etc/neokylin-release; then
++  etc_issue_info=`cat /etc/neokylin-release`
++  if test -n "$etc_issue_info"; then
++    BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1`
++  fi
++fi
++if test -z "$BUILDER_NAME"; then
++  BUILDER_NAME="unknown"
++fi
++BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"`
++if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then
++  HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH"
++else
++  HOST_NAME="unknown"
++fi
++if test -f "/usr/bin/cpp"; then
++  # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'`
++  gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'`
++  if test -n "$gcc_with_arch_info"; then
++    HOST_NAME="$gcc_with_arch_info"
++  fi
++fi
++AC_SUBST(BUILDER_ID)
++AC_SUBST(BUILDER_NAME)
++AC_SUBST(HOST_NAME)
++])
+diff --git a/common/autoconf/spec.gmk.in b/common/autoconf/spec.gmk.in
+index 461ec59711..70d56b331c 100644
+--- a/common/autoconf/spec.gmk.in
++++ b/common/autoconf/spec.gmk.in
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2023. These
++# modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Configured @DATE_WHEN_CONFIGURED@ to build
+ # for target system @OPENJDK_TARGET_OS@-@OPENJDK_TARGET_CPU@
+ #   (called @OPENJDK_TARGET_AUTOCONF_NAME@ by autoconf)
+@@ -219,6 +225,23 @@ else
+ endif
+ JRE_RELEASE_VERSION:=$(FULL_VERSION)
+ 
++# Build OS and host values for use in Loongson OpenJDK release
++BUILDER_ID:=@BUILDER_ID@
++BUILDER_NAME:=@BUILDER_NAME@
++HOST_NAME:=@HOST_NAME@
++
++# Loongson OpenJDK Version info
++VER=8.1.16
++ifeq ($(HOST_NAME), )
++  HOST_NAME=unknown
++endif
++ifeq ($(BUILDER_NAME), )
++  BUILDER_NAME=unknown
++endif
++HOST_NAME_STRING=-$(HOST_NAME)
++BUILDER_NAME_STRING=-$(BUILDER_NAME)
++LOONGSON_RUNTIME_NAME=Loongson $(VER)$(HOST_NAME_STRING)$(BUILDER_NAME_STRING)
++
+ # How to compile the code: release, fastdebug or slowdebug
+ DEBUG_LEVEL:=@DEBUG_LEVEL@
+ 
+diff --git a/hotspot/agent/make/saenv.sh b/hotspot/agent/make/saenv.sh
+index ab9a0a431c..a2de3fc329 100644
+--- a/hotspot/agent/make/saenv.sh
++++ b/hotspot/agent/make/saenv.sh
+@@ -23,6 +23,12 @@
+ #  
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # This file sets common environment variables for all SA scripts
+ 
+ OS=`uname`
+@@ -42,6 +48,14 @@ if [ "$OS" = "Linux" ]; then
+      SA_LIBPATH=$STARTDIR/../src/os/linux/amd64:$STARTDIR/linux/amd64
+      OPTIONS="-Dsa.library.path=$SA_LIBPATH"
+      CPU=amd64
++   elif [ "$ARCH" = "mips64" ] ; then
++     SA_LIBPATH=$STARTDIR/../src/os/linux/mips:$STARTDIR/linux/mips
++     OPTIONS="-Dsa.library.path=$SA_LIBPATH"
++     CPU=mips
++   elif [ "$ARCH" = "loongarch64" ] ; then
++     SA_LIBPATH=$STARTDIR/../src/os/linux/loongarch64:$STARTDIR/linux/loongarch64
++     OPTIONS="-Dsa.library.path=$SA_LIBPATH"
++     CPU=loongarch64
+    else
+      SA_LIBPATH=$STARTDIR/../src/os/linux/i386:$STARTDIR/linux/i386
+      OPTIONS="-Dsa.library.path=$SA_LIBPATH"
+diff --git a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
+index d6a0c7d9a9..b3b1380b29 100644
+--- a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
++++ b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #include <jni.h>
+ #include "libproc.h"
+ 
+@@ -49,10 +56,18 @@
+ #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h"
+ #endif
+ 
++#if defined(mips64el) || defined(mips64)
++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h"
++#endif
++
+ #ifdef aarch64
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef loongarch64
++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h"
++#endif
++
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -337,7 +352,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) || defined(loongarch64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -364,6 +379,12 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #endif
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
++#endif
++#ifdef loongarch64
++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG
++#endif
++#if defined(mips64) || defined(mips64el)
++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG
+ #endif
+ 
+   array = (*env)->NewLongArray(env, NPRGREG);
+@@ -470,6 +491,55 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   }
+ #endif /* aarch64 */
+ 
++#if defined(loongarch64)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg
++
++  {
++    int i;
++    for (i = 0; i < 31; i++)
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.csr_era;
++  }
++#endif /* loongarch64 */
++#if defined(mips64) || defined(mips64el)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg
++
++  regs[REG_INDEX(ZERO)]  = gregs.regs[0];
++  regs[REG_INDEX(AT)]  = gregs.regs[1];
++  regs[REG_INDEX(V0)]  = gregs.regs[2];
++  regs[REG_INDEX(V1)]  = gregs.regs[3];
++  regs[REG_INDEX(A0)]  = gregs.regs[4];
++  regs[REG_INDEX(A1)]  = gregs.regs[5];
++  regs[REG_INDEX(A2)]  = gregs.regs[6];
++  regs[REG_INDEX(A3)]  = gregs.regs[7];
++  regs[REG_INDEX(T0)]  = gregs.regs[8];
++  regs[REG_INDEX(T1)]  = gregs.regs[9];
++  regs[REG_INDEX(T2)]  = gregs.regs[10];
++  regs[REG_INDEX(T3)]  = gregs.regs[11];
++  regs[REG_INDEX(T4)]  = gregs.regs[12];
++  regs[REG_INDEX(T5)]  = gregs.regs[13];
++  regs[REG_INDEX(T6)]  = gregs.regs[14];
++  regs[REG_INDEX(T7)]  = gregs.regs[15];
++  regs[REG_INDEX(S0)]  = gregs.regs[16];
++  regs[REG_INDEX(S1)]  = gregs.regs[17];
++  regs[REG_INDEX(S2)]  = gregs.regs[18];
++  regs[REG_INDEX(S3)]  = gregs.regs[19];
++  regs[REG_INDEX(S4)]  = gregs.regs[20];
++  regs[REG_INDEX(S5)]  = gregs.regs[21];
++  regs[REG_INDEX(S6)]  = gregs.regs[22];
++  regs[REG_INDEX(S7)]  = gregs.regs[23];
++  regs[REG_INDEX(T8)]  = gregs.regs[24];
++  regs[REG_INDEX(T9)]  = gregs.regs[25];
++  regs[REG_INDEX(K0)]  = gregs.regs[26];
++  regs[REG_INDEX(K1)]  = gregs.regs[27];
++  regs[REG_INDEX(GP)]  = gregs.regs[28];
++  regs[REG_INDEX(SP)]  = gregs.regs[29];
++  regs[REG_INDEX(FP)]  = gregs.regs[30];
++  regs[REG_INDEX(S8)]  = gregs.regs[30];
++  regs[REG_INDEX(RA)]  = gregs.regs[31];
++#endif /* mips64 */
+ 
+   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
+   return array;
+diff --git a/hotspot/agent/src/os/linux/Makefile b/hotspot/agent/src/os/linux/Makefile
+index c0b5c869c1..2cc50b6fab 100644
+--- a/hotspot/agent/src/os/linux/Makefile
++++ b/hotspot/agent/src/os/linux/Makefile
+@@ -22,7 +22,13 @@
+ #  
+ #
+ 
+-ARCH := $(shell if ([ `uname -m` = "ia64" ])  ; then echo ia64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi )
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
++ARCH := $(shell if ([ `uname -m` = "ia64" ])  ; then echo ia64 ; elif ([ `uname -m` = "mips64el" ]) ; then echo mips64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi )
+ GCC      = gcc
+ 
+ JAVAH    = ${JAVA_HOME}/bin/javah
+@@ -53,6 +59,8 @@ $(ARCH)/LinuxDebuggerLocal.o: LinuxDebuggerLocal.c
+         $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \
+ 		sun.jvm.hotspot.debugger.x86.X86ThreadContext \
+ 		sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \
++		sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext \
++		sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext \
+ 		sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \
+ 		sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext 
+         $(GCC) $(CFLAGS) $< -o $@
+diff --git a/hotspot/agent/src/os/linux/libproc.h b/hotspot/agent/src/os/linux/libproc.h
+index 6b6e41cab4..5eb8211aa9 100644
+--- a/hotspot/agent/src/os/linux/libproc.h
++++ b/hotspot/agent/src/os/linux/libproc.h
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef _LIBPROC_H_
+ #define _LIBPROC_H_
+ 
+@@ -36,7 +42,7 @@
+ 
+ #include <sys/ptrace.h>
+ 
+-#if defined(aarch64)
++#if defined(aarch64) || defined(loongarch64)
+ #include "asm/ptrace.h"
+ #endif
+ 
+@@ -76,7 +82,12 @@ combination of ptrace and /proc calls.
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
+-#if defined(aarch64)
++
++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el)
++#include <asm/ptrace.h>
++#define user_regs_struct  pt_regs
++#endif
++#if defined(aarch64) || defined(loongarch64)
+ #define user_regs_struct user_pt_regs
+ #endif
+ 
+diff --git a/hotspot/agent/src/os/linux/ps_proc.c b/hotspot/agent/src/os/linux/ps_proc.c
+index c4d6a9ecc5..7000e92723 100644
+--- a/hotspot/agent/src/os/linux/ps_proc.c
++++ b/hotspot/agent/src/os/linux/ps_proc.c
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -141,7 +147,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+ #define PTRACE_GETREGS_REQ PT_GETREGS
+ #endif
+ 
+-#ifdef PTRACE_GETREGS_REQ
++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+    print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
+    return false;
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index c963350591..20e6f35b9c 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
+ package sun.jvm.hotspot;
+ 
+ import java.rmi.RemoteException;
+@@ -37,6 +43,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionIA64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64;
++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64;
+ import sun.jvm.hotspot.debugger.NoSuchSymbolException;
+ import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal;
+ import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal;
+@@ -594,6 +602,10 @@ public class HotSpotAgent {
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
++        } else if (cpu.equals("mips64")) {
++            machDesc = new MachineDescriptionMIPS64();
++        } else if (cpu.equals("loongarch64")) {
++            machDesc = new MachineDescriptionLOONGARCH64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
+index 993bf7bb47..1e075aa57e 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
+@@ -94,6 +94,12 @@ public class Disassembler {
+             } else if (arch.equals("amd64") || arch.equals("x86_64")) {
+                path.append(sep + "lib" + sep + "amd64" + sep);
+                libname +=  "-amd64.so";
++            } else if (arch.equals("mips64") || arch.equals("mips64el")) {
++               path.append(sep + "lib" + sep + "mips64" + sep);
++               libname +=  "-mips64.so";
++            } else if (arch.equals("loongarch64")) {
++               path.append(sep + "lib" + sep + "loongarch64" + sep);
++               libname +=  "-loongarch64.so";
+             } else {
+                path.append(sep + "lib" + sep + arch + sep);
+                libname +=  "-" + arch + ".so";
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+new file mode 100644
+index 0000000000..0531427dab
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return false;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+new file mode 100644
+index 0000000000..1b49efd201
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return "big".equals(System.getProperty("sun.cpu.endian"));
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index f178d6a6e7..019e794bbb 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -32,11 +32,15 @@ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.x86.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -106,6 +110,20 @@ class LinuxCDebugger implements CDebugger {
+        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+        if (pc == null) return null;
+        return new LinuxAARCH64CFrame(dbg, fp, pc);
++    } else if (cpu.equals("mips64")) {
++       MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++       Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++       if (sp == null) return null;
++       Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxMIPS64CFrame(dbg, sp, pc);
++    } else if (cpu.equals("loongarch64")) {
++       LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++       Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++       if (sp == null) return null;
++       Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxLOONGARCH64CFrame(dbg, sp, pc);
+     } else {
+        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+        ThreadContext context = (ThreadContext) thread.getContext();
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+index 44c2265d7a..3b6747ac0a 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+@@ -30,6 +30,8 @@ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.ia64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ 
+ class LinuxThreadContextFactory {
+    static ThreadContext createThreadContext(LinuxDebugger dbg) {
+@@ -42,6 +44,10 @@ class LinuxThreadContextFactory {
+          return new LinuxIA64ThreadContext(dbg);
+       } else if (cpu.equals("sparc")) {
+          return new LinuxSPARCThreadContext(dbg);
++      } else if (cpu.equals("mips64")) {
++         return new LinuxMIPS64ThreadContext(dbg);
++      } else if (cpu.equals("loongarch64")) {
++         return new LinuxLOONGARCH64ThreadContext(dbg);
+       } else  {
+         try {
+           Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." +
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+new file mode 100644
+index 0000000000..3b20dbbd87
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++
++final public class LinuxLOONGARCH64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++      Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++
++      if ((fp == null) || fp.lessThan(sp)) {
++        return null;
++      }
++
++      Address nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE);
++      if (nextFP == null) {
++        return null;
++      }
++      Address nextPC  = fp.getAddressAt(-1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..9f22133eaf
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+new file mode 100644
+index 0000000000..2e3eb564da
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++
++final public class LinuxMIPS64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) {
++      super(dbg.getCDebugger());
++      this.ebp = ebp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return ebp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++      Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++
++      if ( (ebp == null) || ebp.lessThan(esp) ) {
++        return null;
++      }
++
++      Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE);
++      if (nextEBP == null) {
++        return null;
++      }
++      Address nextPC  = ebp.getAddressAt( 1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 4;
++   private Address pc;
++   private Address ebp;
++   private LinuxDebugger dbg;
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..98e0f3f0bc
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxMIPS64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..90b0cf97e3
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+@@ -0,0 +1,123 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on loongarch64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class LOONGARCH64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  public static final int ZERO = 0;
++  public static final int RA = 1;
++  public static final int TP = 2;
++  public static final int SP = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int A4 = 8;
++  public static final int A5 = 9;
++  public static final int A6 = 10;
++  public static final int A7 = 11;
++  public static final int T0 = 12;
++  public static final int T1 = 13;
++  public static final int T2 = 14;
++  public static final int T3 = 15;
++  public static final int T4 = 16;
++  public static final int T5 = 17;
++  public static final int T6 = 18;
++  public static final int T7 = 19;
++  public static final int T8 = 20;
++  public static final int RX = 21;
++  public static final int FP = 22;
++  public static final int S0 = 23;
++  public static final int S1 = 24;
++  public static final int S2 = 25;
++  public static final int S3 = 26;
++  public static final int S4 = 27;
++  public static final int S5 = 28;
++  public static final int S6 = 29;
++  public static final int S7 = 30;
++  public static final int S8 = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "RA",    "TP",    "SP",
++    "A0",      "A1",    "A2",    "A3",
++    "A4",      "A5",    "A6",    "A7",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "T8",      "RX",    "FP",    "S0",
++    "S1",      "S2",    "S3",    "S4",
++    "S5",      "S6",    "S7",    "S8",
++    "PC"
++  };
++
++  private long[] data;
++
++  public LOONGARCH64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..c57ee9dfc9
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+@@ -0,0 +1,123 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on mips64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class MIPS64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  public static final int ZERO = 0;
++  public static final int AT = 1;
++  public static final int V0 = 2;
++  public static final int V1 = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int T0 = 8;
++  public static final int T1 = 9;
++  public static final int T2 = 10;
++  public static final int T3 = 11;
++  public static final int T4 = 12;
++  public static final int T5 = 13;
++  public static final int T6 = 14;
++  public static final int T7 = 15;
++  public static final int S0 = 16;
++  public static final int S1 = 17;
++  public static final int S2 = 18;
++  public static final int S3 = 19;
++  public static final int S4 = 20;
++  public static final int S5 = 21;
++  public static final int S6 = 22;
++  public static final int S7 = 23;
++  public static final int T8 = 24;
++  public static final int T9 = 25;
++  public static final int K0 = 26;
++  public static final int K1 = 27;
++  public static final int GP = 28;
++  public static final int SP = 29;
++  public static final int FP = 30;
++  public static final int RA = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "AT",    "V0",    "V1",
++    "A0",      "A1",    "A2",    "A3",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "S0",      "S1",    "S2",    "S3",
++    "S4",      "S5",    "S6",    "S7",
++    "T8",      "T9",    "K0",    "K1",
++    "GP",      "SP",    "FP",    "RA",
++    "PC"
++  };
++
++  private long[] data;
++
++  public MIPS64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+index 7113a3a497..24273888c2 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+@@ -63,6 +63,8 @@ public interface ELFHeader {
+     public static final int ARCH_i860 = 7;
+     /** MIPS architecture type. */
+     public static final int ARCH_MIPS = 8;
++    /** LOONGARCH architecture type. */
++    public static final int ARCH_LOONGARCH = 9;
+ 
+     /** Returns a file type which is defined by the file type constants. */
+     public short getFileType();
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+index ca1a2575ff..2afa6c55f8 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+@@ -34,10 +34,14 @@ import sun.jvm.hotspot.debugger.proc.amd64.*;
+ import sun.jvm.hotspot.debugger.proc.aarch64.*;
+ import sun.jvm.hotspot.debugger.proc.sparc.*;
+ import sun.jvm.hotspot.debugger.proc.x86.*;
++import sun.jvm.hotspot.debugger.proc.mips64.*;
++import sun.jvm.hotspot.debugger.proc.loongarch64.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
+ import sun.jvm.hotspot.debugger.x86.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ /** <P> An implementation of the JVMDebugger interface which sits on
+@@ -92,6 +96,14 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
+             threadFactory = new ProcAARCH64ThreadFactory(this);
+             pcRegIndex = AARCH64ThreadContext.PC;
+             fpRegIndex = AARCH64ThreadContext.FP;
++        } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++            threadFactory = new ProcMIPS64ThreadFactory(this);
++            pcRegIndex = MIPS64ThreadContext.PC;
++            fpRegIndex = MIPS64ThreadContext.FP;
++        } else if (cpu.equals("loongarch64")) {
++            threadFactory = new ProcLOONGARCH64ThreadFactory(this);
++            pcRegIndex = LOONGARCH64ThreadContext.PC;
++            fpRegIndex = LOONGARCH64ThreadContext.FP;
+         } else {
+           try {
+             Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." +
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+new file mode 100644
+index 0000000000..42a31e3486
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcLOONGARCH64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers LOONGARCH64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) {
++      return false;
++    }
++
++    return (((ProcLOONGARCH64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..9054f16506
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 0000000000..bc64335124
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+new file mode 100644
+index 0000000000..5c1e0be893
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcMIPS64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcMIPS64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcMIPS64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers MIPS64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) {
++      return false;
++    }
++
++    return (((ProcMIPS64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..d44223d768
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+new file mode 100644
+index 0000000000..bad478fc5c
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+index ffa61b548e..9cf3ee2da3 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+@@ -33,6 +33,8 @@ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.remote.sparc.*;
+ import sun.jvm.hotspot.debugger.remote.x86.*;
+ import sun.jvm.hotspot.debugger.remote.amd64.*;
++import sun.jvm.hotspot.debugger.remote.mips64.*;
++import sun.jvm.hotspot.debugger.remote.loongarch64.*;
+ 
+ /** An implementation of Debugger which wraps a
+     RemoteDebugger, providing remote debugging via RMI.
+@@ -70,6 +72,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger {
+         cachePageSize = 4096;
+         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+         unalignedAccessesOkay = true;
++      } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++        threadFactory = new RemoteMIPS64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
++      } else if (cpu.equals("loongarch64")) {
++        threadFactory = new RemoteLOONGARCH64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
+       } else {
+         try {
+           Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+new file mode 100644
+index 0000000000..01e3f8954b
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteLOONGARCH64Thread extends RemoteThread  {
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+new file mode 100644
+index 0000000000..ad25bccc8d
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+new file mode 100644
+index 0000000000..d8bf50ea5b
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteLOONGARCH64Thread(debugger, id);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+new file mode 100644
+index 0000000000..a9285a3b94
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteMIPS64Thread extends RemoteThread  {
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+new file mode 100644
+index 0000000000..4d711f9ba7
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+new file mode 100644
+index 0000000000..020a2f1ff9
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteMIPS64Thread(debugger, id);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index 842a3b357d..81efdd02f8 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -34,6 +34,8 @@ import sun.jvm.hotspot.runtime.win32_amd64.Win32AMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+@@ -90,6 +92,10 @@ public class Threads {
+                 access = new LinuxSPARCJavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("mips64")) {
++                access = new LinuxMIPS64JavaThreadPDAccess();
++            } else if (cpu.equals("loongarch64")) {
++                access = new LinuxLOONGARCH64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..77c45c2e99
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_loongarch64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.loongarch64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new LOONGARCH64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new LOONGARCH64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..a0fd73fa67
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_mips64;
++
++import java.io.*;
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.mips64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField           = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public    Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public    Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public    Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public    Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new MIPS64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public    RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new MIPS64RegisterMap(thread, updateMap);
++  }
++
++  public    Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public    void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public    void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++//    tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public    Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++  }
++
++  public    ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..0208e6e224
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all loongarch64 platforms we support
++    (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext;
++    output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the
++    LOONGARCH64Frame is left to the caller, since we may need to subclass
++    LOONGARCH64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class LOONGARCH64CurrentFrameGuess {
++  private LOONGARCH64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG")
++                                       != null;
++
++  public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new LOONGARCH64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from LOONGARCH64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct LOONGARCH64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+new file mode 100644
+index 0000000000..fdf0c79c1a
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+@@ -0,0 +1,534 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the loongarch64 family of CPUs. */
++
++public class LOONGARCH64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null;
++  }
++
++  // Java frames
++  private static final int JAVA_FRAME_LINK_OFFSET             =  0;
++  private static final int JAVA_FRAME_RETURN_ADDR_OFFSET      =  1;
++  private static final int JAVA_FRAME_SENDER_SP_OFFSET        =  2;
++
++  // Native frames
++  private static final int NATIVE_FRAME_LINK_OFFSET           =  -2;
++  private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET    =  -1;
++  private static final int NATIVE_FRAME_SENDER_SP_OFFSET      =  0;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_LOCALS_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MDX_OFFSET       = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_CACHE_OFFSET     = INTERPRETER_FRAME_MDX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_BCX_OFFSET       = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++  // Entry frames
++  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9;
++
++  // Native frames
++  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
++
++  private static VMReg fp = new VMReg(22 << 1);
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private LOONGARCH64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    LOONGARCH64Frame frame = new LOONGARCH64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof LOONGARCH64Frame)) {
++      return false;
++    }
++
++    LOONGARCH64Frame other = (LOONGARCH64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/LOONGARCH)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    LOONGARCH64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On loongarch, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = getSenderSP();
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET));
++
++    return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        OopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0);
++    return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET);
++  }
++
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET);
++  }
++
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..f7dbbcaacd
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public LOONGARCH64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+new file mode 100644
+index 0000000000..021ef523e3
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected LOONGARCH64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..21259a4d32
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all mips64 platforms we support
++    (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext;
++    output is SP, FP, and PC for an MIPS64Frame. Instantiation of the
++    MIPS64Frame is left to the caller, since we may need to subclass
++    MIPS64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class MIPS64CurrentFrameGuess {
++  private MIPS64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG")
++                                       != null;
++
++  public MIPS64CurrentFrameGuess(MIPS64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(MIPS64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new MIPS64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from MIPS64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct MIPS64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+new file mode 100644
+index 0000000000..0cc5cf4e7c
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+@@ -0,0 +1,547 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the mips64 family of CPUs. */
++
++public class MIPS64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null;
++  }
++
++  // All frames
++  private static final int LINK_OFFSET                =  0;
++  private static final int RETURN_ADDR_OFFSET         =  1;
++  private static final int SENDER_SP_OFFSET           =  2;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
++
++  // Native frames
++  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
++
++  private static VMReg rbp;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
++    if (VM.getVM().getAddressSize() == 4) {
++      rbp = new VMReg(5);
++    } else {
++      rbp = new VMReg(5 << 1);
++    }
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private MIPS64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    MIPS64Frame frame = new MIPS64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof MIPS64Frame)) {
++      return false;
++    }
++
++    MIPS64Frame other = (MIPS64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/MIPS64)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    MIPS64RegisterMap map = (MIPS64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    MIPS64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On mips64, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(rbp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        OopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++  }
++
++  // FIXME: not implementable yet
++  //inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..81fcb5b568
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public MIPS64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+new file mode 100644
+index 0000000000..648503792d
+--- /dev/null
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected MIPS64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index aa69257866..9c97d09bc3 100644
+--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ package sun.jvm.hotspot.utilities;
+ 
+ /** Provides canonicalized OS and CPU information for the rest of the
+@@ -65,6 +72,10 @@ public class PlatformInfo {
+       return cpu;
+     } else if (cpu.equals("aarch64")) {
+       return cpu;
++    } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++      return "mips64";
++    } else if (cpu.equals("loongarch64")) {
++      return "loongarch64";
+     } else {
+       try {
+         Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
+diff --git a/hotspot/make/defs.make b/hotspot/make/defs.make
+index a3573da56f..6e93182c92 100644
+--- a/hotspot/make/defs.make
++++ b/hotspot/make/defs.make
+@@ -22,6 +22,12 @@
+ #
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # The common definitions for hotspot builds.
+ 
+ # Optionally include SPEC file generated by configure.
+@@ -285,7 +291,7 @@ ifneq ($(OSNAME),windows)
+ 
+   # Use uname output for SRCARCH, but deal with platform differences. If ARCH
+   # is not explicitly listed below, it is treated as x86.
+-  SRCARCH    ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64,$(ARCH)))
++  SRCARCH    ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64 mips64 loongarch64,$(ARCH)))
+   ARCH/       = x86
+   ARCH/sparc  = sparc
+   ARCH/sparc64= sparc
+@@ -295,6 +301,10 @@ ifneq ($(OSNAME),windows)
+   ARCH/ppc64  = ppc
+   ARCH/ppc64le= ppc
+   ARCH/ppc    = ppc
++  ARCH/mips64 = mips
++  ARCH/mips64el = mips
++  ARCH/loongarch64 = loongarch
++  ARCH/loongarch = loongarch
+   ARCH/zero   = zero
+   ARCH/aarch64 = aarch64
+ 
+@@ -317,6 +327,20 @@ ifneq ($(OSNAME),windows)
+       BUILDARCH = ppc64
+     endif
+   endif
++  ifeq ($(BUILDARCH), mips)
++    ifdef LP64
++#      ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
++#        BUILDARCH = mips64el
++#      else
++        BUILDARCH = mips64
++#      endif
++    endif
++  endif
++  ifeq ($(BUILDARCH), loongarch)
++    ifdef LP64
++      BUILDARCH = loongarch64
++    endif
++  endif
+ 
+   # LIBARCH is 1:1 mapping from BUILDARCH, except for ARCH=ppc64le
+   ifeq ($(ARCH),ppc64le)
+@@ -332,9 +356,18 @@ ifneq ($(OSNAME),windows)
+   LIBARCH/sparcv9 = sparcv9
+   LIBARCH/ia64    = ia64
+   LIBARCH/ppc64   = ppc64
++  LIBARCH/loongarch   = loongarch64
+   LIBARCH/zero    = $(ZERO_LIBARCH)
+ 
+-  LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero
++  ifeq ($(LIBARCH), mips64)
++    ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
++      LIBARCH  = mips64el
++    else
++      LIBARCH  = mips64
++    endif
++  endif
++
++  LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 mips64 mips64el loongarch64 zero
+ endif
+ 
+ # Required make macro settings for all platforms
+diff --git a/hotspot/make/linux/Makefile b/hotspot/make/linux/Makefile
+index e8f2010412..5aff01e87d 100644
+--- a/hotspot/make/linux/Makefile
++++ b/hotspot/make/linux/Makefile
+@@ -74,6 +74,10 @@ ifneq (,$(findstring $(ARCH), ppc ppc64))
+     FORCE_TIERED=0
+   endif
+ endif
++# C1 is not ported on mips64, so we cannot build a tiered VM:
++ifeq (mips64, $(findstring mips64, $(ARCH)))
++  FORCE_TIERED=0
++endif
+ 
+ ifdef LP64
+   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
+diff --git a/hotspot/make/linux/makefiles/defs.make b/hotspot/make/linux/makefiles/defs.make
+index ec414639d2..9ade73ab34 100644
+--- a/hotspot/make/linux/makefiles/defs.make
++++ b/hotspot/make/linux/makefiles/defs.make
+@@ -22,6 +22,12 @@
+ #
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # The common definitions for hotspot linux builds.
+ # Include the top level defs.make under make directory instead of this one.
+ # This file is included into make/defs.make.
+@@ -39,6 +45,18 @@ ifndef ARCH
+     ARCH := ppc64
+   endif
+ endif
++ifeq ($(ARCH), mips64el)
++  ARCH=mips64
++endif
++ifeq ($(LP64), 1)
++  ifeq ($(ARCH), mips)
++    ARCH=mips64
++  endif
++endif
++
++ifeq ($(ARCH), loongarch)
++  ARCH=loongarch64
++endif
+ 
+ PATH_SEP ?= :
+ 
+@@ -83,6 +101,36 @@ ifneq (,$(findstring $(ARCH), sparc))
+   HS_ARCH            = sparc
+ endif
+ 
++# mips
++ifeq ($(ARCH), mips64)
++  ifeq ($(ARCH_DATA_MODEL), 64)
++    ARCH_DATA_MODEL  = 64
++    MAKE_ARGS        += LP64=1
++    PLATFORM         = linux-mips64
++    VM_PLATFORM      = linux_mips64
++  else
++    ARCH_DATA_MODEL  = 32
++    PLATFORM         = linux-mips32
++    VM_PLATFORM      = linux_mips32
++  endif
++  HS_ARCH          = mips
++endif
++
++# loongarch
++ifeq ($(ARCH), loongarch64)
++  ifeq ($(ARCH_DATA_MODEL), 64)
++    ARCH_DATA_MODEL  = 64
++    MAKE_ARGS        += LP64=1
++    PLATFORM         = linux-loongarch64
++    VM_PLATFORM      = linux_loongarch64
++  else
++    ARCH_DATA_MODEL  = 32
++    PLATFORM         = linux-loongarch32
++    VM_PLATFORM      = linux_loongarch32
++  endif
++  HS_ARCH          = loongarch
++endif
++
+ # i686/i586 and amd64/x86_64
+ ifneq (,$(findstring $(ARCH), amd64 x86_64 i686 i586))
+   ifeq ($(ARCH_DATA_MODEL), 64)
+@@ -311,16 +359,24 @@ ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+                         $(EXPORT_LIB_DIR)/sa-jdi.jar
+ ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+                         $(EXPORT_LIB_DIR)/sa-jdi.jar
++ADD_SA_BINARIES/mips  = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
++                        $(EXPORT_LIB_DIR)/sa-jdi.jar
++ADD_SA_BINARIES/loongarch  = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
++                        $(EXPORT_LIB_DIR)/sa-jdi.jar
+ ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+   ifneq ($(STRIP_POLICY),no_strip)
+     ifeq ($(ZIP_DEBUGINFO_FILES),1)
+       ADD_SA_BINARIES/x86     += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+       ADD_SA_BINARIES/sparc   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+       ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
++      ADD_SA_BINARIES/mips    += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
++      ADD_SA_BINARIES/loongarch  += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+     else
+       ADD_SA_BINARIES/x86     += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+       ADD_SA_BINARIES/sparc   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+       ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
++      ADD_SA_BINARIES/mips    += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
++      ADD_SA_BINARIES/loongarch  += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+     endif
+   endif
+ endif
+diff --git a/hotspot/make/linux/makefiles/gcc.make b/hotspot/make/linux/makefiles/gcc.make
+index 7dde7f0963..94c6d1d015 100644
+--- a/hotspot/make/linux/makefiles/gcc.make
++++ b/hotspot/make/linux/makefiles/gcc.make
+@@ -22,6 +22,12 @@
+ #  
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ #------------------------------------------------------------------------
+ # CC, CXX & AS
+ 
+@@ -177,6 +183,9 @@ ARCHFLAG/aarch64 =
+ ARCHFLAG/ia64    =
+ ARCHFLAG/sparc   = -m32 -mcpu=v9
+ ARCHFLAG/sparcv9 = -m64 -mcpu=v9
++ARCHFLAG/mips64  = -mabi=64
++#ARCHFLAG/loongarch64  = -lp64
++ARCHFLAG/loongarch64  =
+ ARCHFLAG/zero    = $(ZERO_ARCHFLAG)
+ ARCHFLAG/ppc64   =  -m64
+ 
+@@ -202,7 +211,7 @@ else
+ endif
+ 
+ # Compiler warnings are treated as errors
+-WARNINGS_ARE_ERRORS = -Werror
++#WARNINGS_ARE_ERRORS = -Werror
+ 
+ ifeq ($(USE_CLANG), true)
+   # However we need to clean the code up before we can unrestrictedly enable this option with Clang
+diff --git a/hotspot/make/linux/makefiles/loongarch64.make b/hotspot/make/linux/makefiles/loongarch64.make
+new file mode 100644
+index 0000000000..9e3cdb6f23
+--- /dev/null
++++ b/hotspot/make/linux/makefiles/loongarch64.make
+@@ -0,0 +1,43 @@
++#
++# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#  
++#
++
++# Not included in includeDB because it has no dependencies
++Obj_Files += linux_loongarch.o
++
++# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
++OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
++# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
++OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
++# Must also specify if CPU is little endian
++CFLAGS += -DVM_LITTLE_ENDIAN
++
++CFLAGS += -DSICORTEX_ERRATA
++
++CFLAGS += -D_LP64=1
++
++# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
++CFLAGS += -fno-omit-frame-pointer
++
++OPT_CFLAGS/compactingPermGenGen.o = -O1
+diff --git a/hotspot/make/linux/makefiles/mips64.make b/hotspot/make/linux/makefiles/mips64.make
+new file mode 100644
+index 0000000000..d9af3b13ab
+--- /dev/null
++++ b/hotspot/make/linux/makefiles/mips64.make
+@@ -0,0 +1,43 @@
++#
++# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#  
++#
++
++# Not included in includeDB because it has no dependencies
++Obj_Files += linux_mips.o
++
++# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
++OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
++# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
++OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
++# Must also specify if CPU is little endian
++CFLAGS += -DVM_LITTLE_ENDIAN
++
++CFLAGS += -DSICORTEX_ERRATA
++
++CFLAGS += -D_LP64=1
++
++# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
++CFLAGS += -fno-omit-frame-pointer
++
++OPT_CFLAGS/compactingPermGenGen.o = -O1
+diff --git a/hotspot/make/linux/makefiles/sa.make b/hotspot/make/linux/makefiles/sa.make
+index cdcb16a1a3..34c71bd666 100644
+--- a/hotspot/make/linux/makefiles/sa.make
++++ b/hotspot/make/linux/makefiles/sa.make
+@@ -22,6 +22,12 @@
+ #  
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # This makefile (sa.make) is included from the sa.make in the
+ # build directories.
+ 
+@@ -109,6 +115,8 @@ $(GENERATED)/sa-jdi.jar:: $(AGENT_FILES)
+ 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext
+ 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext
+ 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext
++	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext
++	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext
+ 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext
+ 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler
+ 
+diff --git a/hotspot/make/linux/makefiles/saproc.make b/hotspot/make/linux/makefiles/saproc.make
+index ffc0ec5ce5..c04a6765df 100644
+--- a/hotspot/make/linux/makefiles/saproc.make
++++ b/hotspot/make/linux/makefiles/saproc.make
+@@ -21,6 +21,13 @@
+ # questions.
+ #  
+ #
++
++#
++# This file has been modified by Loongson Technology in 2019. These
++# modifications are Copyright (c) 2018, 2019, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ include $(GAMMADIR)/make/defs.make
+ include $(GAMMADIR)/make/altsrc.make
+ 
+@@ -81,7 +88,12 @@ endif
+ SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE) \
+             $(LDFLAGS_NO_EXEC_STACK) $(EXTRA_LDFLAGS)
+ 
++ifneq (mips64, $(findstring mips64, $(BUILDARCH)))
+ SAARCH ?= $(BUILDARCH)
++else
++#If -Dmips64 is used, mips64 would be conflict with "struct mips64_watch_regs mips64" in /usr/include/asm/ptrace.h.
++SAARCH ?= mips
++endif
+ 
+ $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
+ 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
+diff --git a/hotspot/make/linux/makefiles/sparcWorks.make b/hotspot/make/linux/makefiles/sparcWorks.make
+index e39116023c..dbc2ace825 100644
+--- a/hotspot/make/linux/makefiles/sparcWorks.make
++++ b/hotspot/make/linux/makefiles/sparcWorks.make
+@@ -22,6 +22,12 @@
+ #  
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2015. These
++# modifications are Copyright (c) 2015 Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ #------------------------------------------------------------------------
+ # CC, CXX & AS
+ 
+@@ -38,6 +44,7 @@ endif
+ ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
+ ARCHFLAG/i486    = -m32
+ ARCHFLAG/amd64   = -m64
++ARCHFLAG/mips64  = -m64
+ 
+ CFLAGS     += $(ARCHFLAG)
+ AOUT_FLAGS += $(ARCHFLAG)
+diff --git a/hotspot/make/linux/makefiles/vm.make b/hotspot/make/linux/makefiles/vm.make
+index 04b7c20287..5e428538a0 100644
+--- a/hotspot/make/linux/makefiles/vm.make
++++ b/hotspot/make/linux/makefiles/vm.make
+@@ -22,6 +22,12 @@
+ #
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Rules to build JVM and related libraries, included from vm.make in the build
+ # directory.
+ 
+@@ -99,9 +105,22 @@ CXXFLAGS =           \
+   ${HS_LIB_ARCH}     \
+   ${VM_DISTRO}
+ 
++ifeq ($(MIPS_ABI),n32)
++  CXXFLAGS +=   -DN32 
++else
++  ifeq ($(MIPS_ABI),n64)
++    CXXFLAGS +=   -DN64
++  endif
++endif
+ # This is VERY important! The version define must only be supplied to vm_version.o
+ # If not, ccache will not re-use the cache at all, since the version string might contain
+ # a time and date.
++ifdef LOONGSON_RUNTIME_NAME
++  LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"$(LOONGSON_RUNTIME_NAME)\""
++else
++  LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"\""
++endif
++CXXFLAGS/vmError.o += ${LOONGSON_VM_INFO}
+ CXXFLAGS/vm_version.o += ${JRE_VERSION} ${VERSION_CFLAGS}
+ CXXFLAGS/arguments.o += ${VERSION_CFLAGS}
+ 
+@@ -211,6 +230,15 @@ endif
+ ifeq ($(Platform_arch_model), x86_64)
+ Src_Files_EXCLUDE += \*x86_32\*
+ endif
++ifeq ($(Platform_arch_model), mips_32)
++Src_Files_EXCLUDE += \*mips_64\*
++endif
++ifeq ($(Platform_arch_model), mips_64)
++Src_Files_EXCLUDE += \*mips_32\*
++endif
++ifeq ($(Platform_arch_model), loongarch_64)
++Src_Files_EXCLUDE += \*loongarch_32\*
++endif
+ 
+ # Alternate vm.make
+ # This has to be included here to allow changes to the source
+diff --git a/hotspot/make/linux/platform_loongarch64 b/hotspot/make/linux/platform_loongarch64
+new file mode 100644
+index 0000000000..d704cf389a
+--- /dev/null
++++ b/hotspot/make/linux/platform_loongarch64
+@@ -0,0 +1,17 @@
++os_family = linux
++
++arch = loongarch
++
++arch_model = loongarch_64
++
++os_arch = linux_loongarch
++
++os_arch_model = linux_loongarch_64
++
++lib_arch = loongarch64
++
++compiler = gcc
++
++gnu_dis_arch = loongarch64
++
++sysdefs = -DLINUX -D_GNU_SOURCE -DLOONGARCH64
+diff --git a/hotspot/make/linux/platform_mips64 b/hotspot/make/linux/platform_mips64
+new file mode 100644
+index 0000000000..c283671f82
+--- /dev/null
++++ b/hotspot/make/linux/platform_mips64
+@@ -0,0 +1,17 @@
++os_family = linux
++
++arch = mips
++
++arch_model = mips_64
++
++os_arch = linux_mips
++
++os_arch_model = linux_mips_64
++
++lib_arch = mips64
++
++compiler = gcc
++
++gnu_dis_arch = mips64
++
++sysdefs = -DLINUX -D_GNU_SOURCE -DMIPS64
+diff --git a/hotspot/make/sa.files b/hotspot/make/sa.files
+index d6e728a9a8..43b08e3ad1 100644
+--- a/hotspot/make/sa.files
++++ b/hotspot/make/sa.files
+@@ -22,6 +22,12 @@
+ #  
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2020. These
++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # This filelist macro is included in platform specific sa.make
+ # included all packages/*.java. package list can be generated by
+ # $(GAMMADIR)/agent/make/build-pkglist. 
+@@ -52,14 +58,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/cdbg/basic/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/dummy/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/mips64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/loongarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/mips64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/loongarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/mips64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/loongarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \
+@@ -94,8 +106,12 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_mips64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_loongarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/mips64/*.java \
++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/loongarch64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_amd64/*.java \
+ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_sparc/*.java \
+diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
+index 35d34a08ea..3b8cf4a11d 100644
+--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
++++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
+@@ -1177,7 +1177,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
+-
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1242,7 +1244,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+       }
+     case Bytecodes::_d2l:
+       {
+-        Register tmp = op->tmp1()->as_register();
++        Register tmp = op->tmp()->as_register();
+         __ clear_fpsr();
+         __ fcvtzd(dest->as_register_lo(), src->as_double_reg());
+         __ get_fpsr(tmp);
+@@ -1253,7 +1255,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+       }
+     case Bytecodes::_f2i:
+       {
+-        Register tmp = op->tmp1()->as_register();
++        Register tmp = op->tmp()->as_register();
+         __ clear_fpsr();
+         __ fcvtzsw(dest->as_register(), src->as_float_reg());
+         __ get_fpsr(tmp);
+@@ -1264,7 +1266,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+       }
+     case Bytecodes::_f2l:
+       {
+-        Register tmp = op->tmp1()->as_register();
++        Register tmp = op->tmp()->as_register();
+         __ clear_fpsr();
+         __ fcvtzs(dest->as_register_lo(), src->as_float_reg());
+         __ get_fpsr(tmp);
+@@ -1275,7 +1277,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+       }
+     case Bytecodes::_d2i:
+       {
+-        Register tmp = op->tmp1()->as_register();
++        Register tmp = op->tmp()->as_register();
+         __ clear_fpsr();
+         __ fcvtzdw(dest->as_register(), src->as_double_reg());
+         __ get_fpsr(tmp);
+@@ -1731,6 +1733,11 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+     __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
++                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+ 
+diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
+index 120dd1a7df..6a3289022d 100644
+--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
++++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
+@@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ store(reg, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg = new_register(T_INT);
+   __ load(generate_address(base, disp, T_INT), reg, info);
+-  __ cmp(condition, reg, LIR_OprFact::intConst(c));
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg1 = new_register(T_INT);
+   __ load(generate_address(base, disp, type), reg1, info);
+-  __ cmp(condition, reg, reg1);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+ 
+diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp
+new file mode 100644
+index 0000000000..2996ef7aa7
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp
+@@ -0,0 +1,855 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++void Assembler::ld_b(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_b(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_b(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_b(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_b(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_b(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_b(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_bu(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_bu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_bu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_bu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_bu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_bu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_bu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_d(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_d(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_d(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_d(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_d(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_d(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_d(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_d(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_h(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_h(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_h(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_h(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_h(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_h(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_h(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_hu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_hu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_hu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_hu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_hu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_hu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_hu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ll_w(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_w(rd, src.base(), src.disp());
++}
++
++void Assembler::ll_d(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_d(rd, src.base(), src.disp());
++}
++
++void Assembler::ld_w(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_w(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_w(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_w(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_w(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_w(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_w(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_w(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_w(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_wu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_wu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_wu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_wu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_wu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_wu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_wu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::st_b(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_b(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_b(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_b(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_b(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_b(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_b(src, base, AT);
++    }
++  }
++}
++
++void Assembler::sc_w(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_w(rd, dst.base(), dst.disp());
++}
++
++void Assembler::sc_d(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_d(rd, dst.base(), dst.disp());
++}
++
++void Assembler::st_d(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_d(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_d(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_d(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_d(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_d(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_d(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_d(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_d(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_h(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_h(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_h(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_h(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_h(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_h(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_h(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_w(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_w(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_w(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_w(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_w(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_w(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_w(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_w(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_w(src, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_s(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_d(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_d(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_s(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_d(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_d(fd, base, AT);
++    }
++  }
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp
+new file mode 100644
+index 0000000000..46b57cfe76
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp
+@@ -0,0 +1,2810 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address VALUE_OBJ_CLASS_SPEC {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument VALUE_OBJ_CLASS_SPEC {
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral VALUE_OBJ_CLASS_SPEC {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++   // 32-bit complains about a multiple declaration for int*.
++   AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
++     : _target((address) addr),
++       _rspec(rspec_from_rtype(rtype, (address) addr)) {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++ public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++ public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress VALUE_OBJ_CLASS_SPEC {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  // 22-bit opcode, highest 22 bits: bits[31...10]
++  enum ops22 {
++    clo_w_op           = 0b0000000000000000000100,
++    clz_w_op           = 0b0000000000000000000101,
++    cto_w_op           = 0b0000000000000000000110,
++    ctz_w_op           = 0b0000000000000000000111,
++    clo_d_op           = 0b0000000000000000001000,
++    clz_d_op           = 0b0000000000000000001001,
++    cto_d_op           = 0b0000000000000000001010,
++    ctz_d_op           = 0b0000000000000000001011,
++    revb_2h_op         = 0b0000000000000000001100,
++    revb_4h_op         = 0b0000000000000000001101,
++    revb_2w_op         = 0b0000000000000000001110,
++    revb_d_op          = 0b0000000000000000001111,
++    revh_2w_op         = 0b0000000000000000010000,
++    revh_d_op          = 0b0000000000000000010001,
++    bitrev_4b_op       = 0b0000000000000000010010,
++    bitrev_8b_op       = 0b0000000000000000010011,
++    bitrev_w_op        = 0b0000000000000000010100,
++    bitrev_d_op        = 0b0000000000000000010101,
++    ext_w_h_op         = 0b0000000000000000010110,
++    ext_w_b_op         = 0b0000000000000000010111,
++    rdtimel_w_op       = 0b0000000000000000011000,
++    rdtimeh_w_op       = 0b0000000000000000011001,
++    rdtime_d_op        = 0b0000000000000000011010,
++    cpucfg_op          = 0b0000000000000000011011,
++    fabs_s_op          = 0b0000000100010100000001,
++    fabs_d_op          = 0b0000000100010100000010,
++    fneg_s_op          = 0b0000000100010100000101,
++    fneg_d_op          = 0b0000000100010100000110,
++    flogb_s_op         = 0b0000000100010100001001,
++    flogb_d_op         = 0b0000000100010100001010,
++    fclass_s_op        = 0b0000000100010100001101,
++    fclass_d_op        = 0b0000000100010100001110,
++    fsqrt_s_op         = 0b0000000100010100010001,
++    fsqrt_d_op         = 0b0000000100010100010010,
++    frecip_s_op        = 0b0000000100010100010101,
++    frecip_d_op        = 0b0000000100010100010110,
++    frsqrt_s_op        = 0b0000000100010100011001,
++    frsqrt_d_op        = 0b0000000100010100011010,
++    fmov_s_op          = 0b0000000100010100100101,
++    fmov_d_op          = 0b0000000100010100100110,
++    movgr2fr_w_op      = 0b0000000100010100101001,
++    movgr2fr_d_op      = 0b0000000100010100101010,
++    movgr2frh_w_op     = 0b0000000100010100101011,
++    movfr2gr_s_op      = 0b0000000100010100101101,
++    movfr2gr_d_op      = 0b0000000100010100101110,
++    movfrh2gr_s_op     = 0b0000000100010100101111,
++    movgr2fcsr_op      = 0b0000000100010100110000,
++    movfcsr2gr_op      = 0b0000000100010100110010,
++    movfr2cf_op        = 0b0000000100010100110100,
++    movcf2fr_op        = 0b0000000100010100110101,
++    movgr2cf_op        = 0b0000000100010100110110,
++    movcf2gr_op        = 0b0000000100010100110111,
++    fcvt_s_d_op        = 0b0000000100011001000110,
++    fcvt_d_s_op        = 0b0000000100011001001001,
++    ftintrm_w_s_op     = 0b0000000100011010000001,
++    ftintrm_w_d_op     = 0b0000000100011010000010,
++    ftintrm_l_s_op     = 0b0000000100011010001001,
++    ftintrm_l_d_op     = 0b0000000100011010001010,
++    ftintrp_w_s_op     = 0b0000000100011010010001,
++    ftintrp_w_d_op     = 0b0000000100011010010010,
++    ftintrp_l_s_op     = 0b0000000100011010011001,
++    ftintrp_l_d_op     = 0b0000000100011010011010,
++    ftintrz_w_s_op     = 0b0000000100011010100001,
++    ftintrz_w_d_op     = 0b0000000100011010100010,
++    ftintrz_l_s_op     = 0b0000000100011010101001,
++    ftintrz_l_d_op     = 0b0000000100011010101010,
++    ftintrne_w_s_op    = 0b0000000100011010110001,
++    ftintrne_w_d_op    = 0b0000000100011010110010,
++    ftintrne_l_s_op    = 0b0000000100011010111001,
++    ftintrne_l_d_op    = 0b0000000100011010111010,
++    ftint_w_s_op       = 0b0000000100011011000001,
++    ftint_w_d_op       = 0b0000000100011011000010,
++    ftint_l_s_op       = 0b0000000100011011001001,
++    ftint_l_d_op       = 0b0000000100011011001010,
++    ffint_s_w_op       = 0b0000000100011101000100,
++    ffint_s_l_op       = 0b0000000100011101000110,
++    ffint_d_w_op       = 0b0000000100011101001000,
++    ffint_d_l_op       = 0b0000000100011101001010,
++    frint_s_op         = 0b0000000100011110010001,
++    frint_d_op         = 0b0000000100011110010010,
++    iocsrrd_b_op       = 0b0000011001001000000000,
++    iocsrrd_h_op       = 0b0000011001001000000001,
++    iocsrrd_w_op       = 0b0000011001001000000010,
++    iocsrrd_d_op       = 0b0000011001001000000011,
++    iocsrwr_b_op       = 0b0000011001001000000100,
++    iocsrwr_h_op       = 0b0000011001001000000101,
++    iocsrwr_w_op       = 0b0000011001001000000110,
++    iocsrwr_d_op       = 0b0000011001001000000111,
++    vpcnt_b_op         = 0b0111001010011100001000,
++    vpcnt_h_op         = 0b0111001010011100001001,
++    vpcnt_w_op         = 0b0111001010011100001010,
++    vpcnt_d_op         = 0b0111001010011100001011,
++    vneg_b_op          = 0b0111001010011100001100,
++    vneg_h_op          = 0b0111001010011100001101,
++    vneg_w_op          = 0b0111001010011100001110,
++    vneg_d_op          = 0b0111001010011100001111,
++    vfclass_s_op       = 0b0111001010011100110101,
++    vfclass_d_op       = 0b0111001010011100110110,
++    vfsqrt_s_op        = 0b0111001010011100111001,
++    vfsqrt_d_op        = 0b0111001010011100111010,
++    vfrint_s_op        = 0b0111001010011101001101,
++    vfrint_d_op        = 0b0111001010011101001110,
++    vfrintrm_s_op      = 0b0111001010011101010001,
++    vfrintrm_d_op      = 0b0111001010011101010010,
++    vfrintrp_s_op      = 0b0111001010011101010101,
++    vfrintrp_d_op      = 0b0111001010011101010110,
++    vfrintrz_s_op      = 0b0111001010011101011001,
++    vfrintrz_d_op      = 0b0111001010011101011010,
++    vfrintrne_s_op     = 0b0111001010011101011101,
++    vfrintrne_d_op     = 0b0111001010011101011110,
++    vfcvtl_s_h_op      = 0b0111001010011101111010,
++    vfcvth_s_h_op      = 0b0111001010011101111011,
++    vfcvtl_d_s_op      = 0b0111001010011101111100,
++    vfcvth_d_s_op      = 0b0111001010011101111101,
++    vffint_s_w_op      = 0b0111001010011110000000,
++    vffint_s_wu_op     = 0b0111001010011110000001,
++    vffint_d_l_op      = 0b0111001010011110000010,
++    vffint_d_lu_op     = 0b0111001010011110000011,
++    vffintl_d_w_op     = 0b0111001010011110000100,
++    vffinth_d_w_op     = 0b0111001010011110000101,
++    vftint_w_s_op      = 0b0111001010011110001100,
++    vftint_l_d_op      = 0b0111001010011110001101,
++    vftintrm_w_s_op    = 0b0111001010011110001110,
++    vftintrm_l_d_op    = 0b0111001010011110001111,
++    vftintrp_w_s_op    = 0b0111001010011110010000,
++    vftintrp_l_d_op    = 0b0111001010011110010001,
++    vftintrz_w_s_op    = 0b0111001010011110010010,
++    vftintrz_l_d_op    = 0b0111001010011110010011,
++    vftintrne_w_s_op   = 0b0111001010011110010100,
++    vftintrne_l_d_op   = 0b0111001010011110010101,
++    vftint_wu_s        = 0b0111001010011110010110,
++    vftint_lu_d        = 0b0111001010011110010111,
++    vftintrz_wu_f      = 0b0111001010011110011100,
++    vftintrz_lu_d      = 0b0111001010011110011101,
++    vftintl_l_s_op     = 0b0111001010011110100000,
++    vftinth_l_s_op     = 0b0111001010011110100001,
++    vftintrml_l_s_op   = 0b0111001010011110100010,
++    vftintrmh_l_s_op   = 0b0111001010011110100011,
++    vftintrpl_l_s_op   = 0b0111001010011110100100,
++    vftintrph_l_s_op   = 0b0111001010011110100101,
++    vftintrzl_l_s_op   = 0b0111001010011110100110,
++    vftintrzh_l_s_op   = 0b0111001010011110100111,
++    vftintrnel_l_s_op  = 0b0111001010011110101000,
++    vftintrneh_l_s_op  = 0b0111001010011110101001,
++    vreplgr2vr_b_op    = 0b0111001010011111000000,
++    vreplgr2vr_h_op    = 0b0111001010011111000001,
++    vreplgr2vr_w_op    = 0b0111001010011111000010,
++    vreplgr2vr_d_op    = 0b0111001010011111000011,
++    xvpcnt_b_op        = 0b0111011010011100001000,
++    xvpcnt_h_op        = 0b0111011010011100001001,
++    xvpcnt_w_op        = 0b0111011010011100001010,
++    xvpcnt_d_op        = 0b0111011010011100001011,
++    xvneg_b_op         = 0b0111011010011100001100,
++    xvneg_h_op         = 0b0111011010011100001101,
++    xvneg_w_op         = 0b0111011010011100001110,
++    xvneg_d_op         = 0b0111011010011100001111,
++    xvfclass_s_op      = 0b0111011010011100110101,
++    xvfclass_d_op      = 0b0111011010011100110110,
++    xvfsqrt_s_op       = 0b0111011010011100111001,
++    xvfsqrt_d_op       = 0b0111011010011100111010,
++    xvfrint_s_op       = 0b0111011010011101001101,
++    xvfrint_d_op       = 0b0111011010011101001110,
++    xvfrintrm_s_op     = 0b0111011010011101010001,
++    xvfrintrm_d_op     = 0b0111011010011101010010,
++    xvfrintrp_s_op     = 0b0111011010011101010101,
++    xvfrintrp_d_op     = 0b0111011010011101010110,
++    xvfrintrz_s_op     = 0b0111011010011101011001,
++    xvfrintrz_d_op     = 0b0111011010011101011010,
++    xvfrintrne_s_op    = 0b0111011010011101011101,
++    xvfrintrne_d_op    = 0b0111011010011101011110,
++    xvfcvtl_s_h_op     = 0b0111011010011101111010,
++    xvfcvth_s_h_op     = 0b0111011010011101111011,
++    xvfcvtl_d_s_op     = 0b0111011010011101111100,
++    xvfcvth_d_s_op     = 0b0111011010011101111101,
++    xvffint_s_w_op     = 0b0111011010011110000000,
++    xvffint_s_wu_op    = 0b0111011010011110000001,
++    xvffint_d_l_op     = 0b0111011010011110000010,
++    xvffint_d_lu_op    = 0b0111011010011110000011,
++    xvffintl_d_w_op    = 0b0111011010011110000100,
++    xvffinth_d_w_op    = 0b0111011010011110000101,
++    xvftint_w_s_op     = 0b0111011010011110001100,
++    xvftint_l_d_op     = 0b0111011010011110001101,
++    xvftintrm_w_s_op   = 0b0111011010011110001110,
++    xvftintrm_l_d_op   = 0b0111011010011110001111,
++    xvftintrp_w_s_op   = 0b0111011010011110010000,
++    xvftintrp_l_d_op   = 0b0111011010011110010001,
++    xvftintrz_w_s_op   = 0b0111011010011110010010,
++    xvftintrz_l_d_op   = 0b0111011010011110010011,
++    xvftintrne_w_s_op  = 0b0111011010011110010100,
++    xvftintrne_l_d_op  = 0b0111011010011110010101,
++    xvftint_wu_s       = 0b0111011010011110010110,
++    xvftint_lu_d       = 0b0111011010011110010111,
++    xvftintrz_wu_f     = 0b0111011010011110011100,
++    xvftintrz_lu_d     = 0b0111011010011110011101,
++    xvftintl_l_s_op    = 0b0111011010011110100000,
++    xvftinth_l_s_op    = 0b0111011010011110100001,
++    xvftintrml_l_s_op  = 0b0111011010011110100010,
++    xvftintrmh_l_s_op  = 0b0111011010011110100011,
++    xvftintrpl_l_s_op  = 0b0111011010011110100100,
++    xvftintrph_l_s_op  = 0b0111011010011110100101,
++    xvftintrzl_l_s_op  = 0b0111011010011110100110,
++    xvftintrzh_l_s_op  = 0b0111011010011110100111,
++    xvftintrnel_l_s_op = 0b0111011010011110101000,
++    xvftintrneh_l_s_op = 0b0111011010011110101001,
++    xvreplgr2vr_b_op   = 0b0111011010011111000000,
++    xvreplgr2vr_h_op   = 0b0111011010011111000001,
++    xvreplgr2vr_w_op   = 0b0111011010011111000010,
++    xvreplgr2vr_d_op   = 0b0111011010011111000011,
++    vext2xv_h_b_op     = 0b0111011010011111000100,
++    vext2xv_w_b_op     = 0b0111011010011111000101,
++    vext2xv_d_b_op     = 0b0111011010011111000110,
++    vext2xv_w_h_op     = 0b0111011010011111000111,
++    vext2xv_d_h_op     = 0b0111011010011111001000,
++    vext2xv_d_w_op     = 0b0111011010011111001001,
++    vext2xv_hu_bu_op   = 0b0111011010011111001010,
++    vext2xv_wu_bu_op   = 0b0111011010011111001011,
++    vext2xv_du_bu_op   = 0b0111011010011111001100,
++    vext2xv_wu_hu_op   = 0b0111011010011111001101,
++    vext2xv_du_hu_op   = 0b0111011010011111001110,
++    vext2xv_du_wu_op   = 0b0111011010011111001111,
++    xvreplve0_b_op     = 0b0111011100000111000000,
++    xvreplve0_h_op     = 0b0111011100000111100000,
++    xvreplve0_w_op     = 0b0111011100000111110000,
++    xvreplve0_d_op     = 0b0111011100000111111000,
++    xvreplve0_q_op     = 0b0111011100000111111100,
++
++    unknow_ops22       = 0b1111111111111111111111
++  };
++
++  // 21-bit opcode, highest 21 bits: bits[31...11]
++  enum ops21 {
++    vinsgr2vr_d_op     = 0b011100101110101111110,
++    vpickve2gr_d_op    = 0b011100101110111111110,
++    vpickve2gr_du_op   = 0b011100101111001111110,
++    vreplvei_d_op      = 0b011100101111011111110,
++
++    unknow_ops21       = 0b111111111111111111111
++  };
++
++  // 20-bit opcode, highest 20 bits: bits[31...12]
++  enum ops20 {
++    vinsgr2vr_w_op     = 0b01110010111010111110,
++    vpickve2gr_w_op    = 0b01110010111011111110,
++    vpickve2gr_wu_op   = 0b01110010111100111110,
++    vreplvei_w_op      = 0b01110010111101111110,
++    xvinsgr2vr_d_op    = 0b01110110111010111110,
++    xvpickve2gr_d_op   = 0b01110110111011111110,
++    xvpickve2gr_du_op  = 0b01110110111100111110,
++    xvinsve0_d_op      = 0b01110110111111111110,
++    xvpickve_d_op      = 0b01110111000000111110,
++
++    unknow_ops20       = 0b11111111111111111111
++  };
++
++  // 19-bit opcode, highest 19 bits: bits[31...13]
++  enum ops19 {
++    vrotri_b_op        = 0b0111001010100000001,
++    vinsgr2vr_h_op     = 0b0111001011101011110,
++    vpickve2gr_h_op    = 0b0111001011101111110,
++    vpickve2gr_hu_op   = 0b0111001011110011110,
++    vreplvei_h_op      = 0b0111001011110111110,
++    vbitclri_b_op      = 0b0111001100010000001,
++    vbitseti_b_op      = 0b0111001100010100001,
++    vbitrevi_b_op      = 0b0111001100011000001,
++    vslli_b_op         = 0b0111001100101100001,
++    vsrli_b_op         = 0b0111001100110000001,
++    vsrai_b_op         = 0b0111001100110100001,
++    xvrotri_b_op       = 0b0111011010100000001,
++    xvinsgr2vr_w_op    = 0b0111011011101011110,
++    xvpickve2gr_w_op   = 0b0111011011101111110,
++    xvpickve2gr_wu_op  = 0b0111011011110011110,
++    xvinsve0_w_op      = 0b0111011011111111110,
++    xvpickve_w_op      = 0b0111011100000011110,
++    xvbitclri_b_op     = 0b0111011100010000001,
++    xvbitseti_b_op     = 0b0111011100010100001,
++    xvbitrevi_b_op     = 0b0111011100011000001,
++    xvslli_b_op        = 0b0111011100101100001,
++    xvsrli_b_op        = 0b0111011100110000001,
++    xvsrai_b_op        = 0b0111011100110100001,
++
++    unknow_ops19       = 0b1111111111111111111
++  };
++
++  // 18-bit opcode, highest 18 bits: bits[31...14]
++  enum ops18 {
++    vrotri_h_op        = 0b011100101010000001,
++    vinsgr2vr_b_op     = 0b011100101110101110,
++    vpickve2gr_b_op    = 0b011100101110111110,
++    vpickve2gr_bu_op   = 0b011100101111001110,
++    vreplvei_b_op      = 0b011100101111011110,
++    vbitclri_h_op      = 0b011100110001000001,
++    vbitseti_h_op      = 0b011100110001010001,
++    vbitrevi_h_op      = 0b011100110001100001,
++    vslli_h_op         = 0b011100110010110001,
++    vsrli_h_op         = 0b011100110011000001,
++    vsrai_h_op         = 0b011100110011010001,
++    vsrlni_b_h_op      = 0b011100110100000001,
++    xvrotri_h_op       = 0b011101101010000001,
++    xvbitclri_h_op     = 0b011101110001000001,
++    xvbitseti_h_op     = 0b011101110001010001,
++    xvbitrevi_h_op     = 0b011101110001100001,
++    xvslli_h_op        = 0b011101110010110001,
++    xvsrli_h_op        = 0b011101110011000001,
++    xvsrai_h_op        = 0b011101110011010001,
++
++    unknow_ops18       = 0b111111111111111111
++  };
++
++  // 17-bit opcode, highest 17 bits: bits[31...15]
++  enum ops17 {
++    asrtle_d_op        = 0b00000000000000010,
++    asrtgt_d_op        = 0b00000000000000011,
++    add_w_op           = 0b00000000000100000,
++    add_d_op           = 0b00000000000100001,
++    sub_w_op           = 0b00000000000100010,
++    sub_d_op           = 0b00000000000100011,
++    slt_op             = 0b00000000000100100,
++    sltu_op            = 0b00000000000100101,
++    maskeqz_op         = 0b00000000000100110,
++    masknez_op         = 0b00000000000100111,
++    nor_op             = 0b00000000000101000,
++    and_op             = 0b00000000000101001,
++    or_op              = 0b00000000000101010,
++    xor_op             = 0b00000000000101011,
++    orn_op             = 0b00000000000101100,
++    andn_op            = 0b00000000000101101,
++    sll_w_op           = 0b00000000000101110,
++    srl_w_op           = 0b00000000000101111,
++    sra_w_op           = 0b00000000000110000,
++    sll_d_op           = 0b00000000000110001,
++    srl_d_op           = 0b00000000000110010,
++    sra_d_op           = 0b00000000000110011,
++    rotr_w_op          = 0b00000000000110110,
++    rotr_d_op          = 0b00000000000110111,
++    mul_w_op           = 0b00000000000111000,
++    mulh_w_op          = 0b00000000000111001,
++    mulh_wu_op         = 0b00000000000111010,
++    mul_d_op           = 0b00000000000111011,
++    mulh_d_op          = 0b00000000000111100,
++    mulh_du_op         = 0b00000000000111101,
++    mulw_d_w_op        = 0b00000000000111110,
++    mulw_d_wu_op       = 0b00000000000111111,
++    div_w_op           = 0b00000000001000000,
++    mod_w_op           = 0b00000000001000001,
++    div_wu_op          = 0b00000000001000010,
++    mod_wu_op          = 0b00000000001000011,
++    div_d_op           = 0b00000000001000100,
++    mod_d_op           = 0b00000000001000101,
++    div_du_op          = 0b00000000001000110,
++    mod_du_op          = 0b00000000001000111,
++    crc_w_b_w_op       = 0b00000000001001000,
++    crc_w_h_w_op       = 0b00000000001001001,
++    crc_w_w_w_op       = 0b00000000001001010,
++    crc_w_d_w_op       = 0b00000000001001011,
++    crcc_w_b_w_op      = 0b00000000001001100,
++    crcc_w_h_w_op      = 0b00000000001001101,
++    crcc_w_w_w_op      = 0b00000000001001110,
++    crcc_w_d_w_op      = 0b00000000001001111,
++    break_op           = 0b00000000001010100,
++    fadd_s_op          = 0b00000001000000001,
++    fadd_d_op          = 0b00000001000000010,
++    fsub_s_op          = 0b00000001000000101,
++    fsub_d_op          = 0b00000001000000110,
++    fmul_s_op          = 0b00000001000001001,
++    fmul_d_op          = 0b00000001000001010,
++    fdiv_s_op          = 0b00000001000001101,
++    fdiv_d_op          = 0b00000001000001110,
++    fmax_s_op          = 0b00000001000010001,
++    fmax_d_op          = 0b00000001000010010,
++    fmin_s_op          = 0b00000001000010101,
++    fmin_d_op          = 0b00000001000010110,
++    fmaxa_s_op         = 0b00000001000011001,
++    fmaxa_d_op         = 0b00000001000011010,
++    fmina_s_op         = 0b00000001000011101,
++    fmina_d_op         = 0b00000001000011110,
++    fscaleb_s_op       = 0b00000001000100001,
++    fscaleb_d_op       = 0b00000001000100010,
++    fcopysign_s_op     = 0b00000001000100101,
++    fcopysign_d_op     = 0b00000001000100110,
++    ldx_b_op           = 0b00111000000000000,
++    ldx_h_op           = 0b00111000000001000,
++    ldx_w_op           = 0b00111000000010000,
++    ldx_d_op           = 0b00111000000011000,
++    stx_b_op           = 0b00111000000100000,
++    stx_h_op           = 0b00111000000101000,
++    stx_w_op           = 0b00111000000110000,
++    stx_d_op           = 0b00111000000111000,
++    ldx_bu_op          = 0b00111000001000000,
++    ldx_hu_op          = 0b00111000001001000,
++    ldx_wu_op          = 0b00111000001010000,
++    fldx_s_op          = 0b00111000001100000,
++    fldx_d_op          = 0b00111000001101000,
++    fstx_s_op          = 0b00111000001110000,
++    fstx_d_op          = 0b00111000001111000,
++    vldx_op            = 0b00111000010000000,
++    vstx_op            = 0b00111000010001000,
++    xvldx_op           = 0b00111000010010000,
++    xvstx_op           = 0b00111000010011000,
++    amswap_w_op        = 0b00111000011000000,
++    amswap_d_op        = 0b00111000011000001,
++    amadd_w_op         = 0b00111000011000010,
++    amadd_d_op         = 0b00111000011000011,
++    amand_w_op         = 0b00111000011000100,
++    amand_d_op         = 0b00111000011000101,
++    amor_w_op          = 0b00111000011000110,
++    amor_d_op          = 0b00111000011000111,
++    amxor_w_op         = 0b00111000011001000,
++    amxor_d_op         = 0b00111000011001001,
++    ammax_w_op         = 0b00111000011001010,
++    ammax_d_op         = 0b00111000011001011,
++    ammin_w_op         = 0b00111000011001100,
++    ammin_d_op         = 0b00111000011001101,
++    ammax_wu_op        = 0b00111000011001110,
++    ammax_du_op        = 0b00111000011001111,
++    ammin_wu_op        = 0b00111000011010000,
++    ammin_du_op        = 0b00111000011010001,
++    amswap_db_w_op     = 0b00111000011010010,
++    amswap_db_d_op     = 0b00111000011010011,
++    amadd_db_w_op      = 0b00111000011010100,
++    amadd_db_d_op      = 0b00111000011010101,
++    amand_db_w_op      = 0b00111000011010110,
++    amand_db_d_op      = 0b00111000011010111,
++    amor_db_w_op       = 0b00111000011011000,
++    amor_db_d_op       = 0b00111000011011001,
++    amxor_db_w_op      = 0b00111000011011010,
++    amxor_db_d_op      = 0b00111000011011011,
++    ammax_db_w_op      = 0b00111000011011100,
++    ammax_db_d_op      = 0b00111000011011101,
++    ammin_db_w_op      = 0b00111000011011110,
++    ammin_db_d_op      = 0b00111000011011111,
++    ammax_db_wu_op     = 0b00111000011100000,
++    ammax_db_du_op     = 0b00111000011100001,
++    ammin_db_wu_op     = 0b00111000011100010,
++    ammin_db_du_op     = 0b00111000011100011,
++    dbar_op            = 0b00111000011100100,
++    ibar_op            = 0b00111000011100101,
++    fldgt_s_op         = 0b00111000011101000,
++    fldgt_d_op         = 0b00111000011101001,
++    fldle_s_op         = 0b00111000011101010,
++    fldle_d_op         = 0b00111000011101011,
++    fstgt_s_op         = 0b00111000011101100,
++    fstgt_d_op         = 0b00111000011101101,
++    fstle_s_op         = 0b00111000011101110,
++    fstle_d_op         = 0b00111000011101111,
++    ldgt_b_op          = 0b00111000011110000,
++    ldgt_h_op          = 0b00111000011110001,
++    ldgt_w_op          = 0b00111000011110010,
++    ldgt_d_op          = 0b00111000011110011,
++    ldle_b_op          = 0b00111000011110100,
++    ldle_h_op          = 0b00111000011110101,
++    ldle_w_op          = 0b00111000011110110,
++    ldle_d_op          = 0b00111000011110111,
++    stgt_b_op          = 0b00111000011111000,
++    stgt_h_op          = 0b00111000011111001,
++    stgt_w_op          = 0b00111000011111010,
++    stgt_d_op          = 0b00111000011111011,
++    stle_b_op          = 0b00111000011111100,
++    stle_h_op          = 0b00111000011111101,
++    stle_w_op          = 0b00111000011111110,
++    stle_d_op          = 0b00111000011111111,
++    vseq_b_op          = 0b01110000000000000,
++    vseq_h_op          = 0b01110000000000001,
++    vseq_w_op          = 0b01110000000000010,
++    vseq_d_op          = 0b01110000000000011,
++    vsle_b_op          = 0b01110000000000100,
++    vsle_h_op          = 0b01110000000000101,
++    vsle_w_op          = 0b01110000000000110,
++    vsle_d_op          = 0b01110000000000111,
++    vsle_bu_op         = 0b01110000000001000,
++    vsle_hu_op         = 0b01110000000001001,
++    vsle_wu_op         = 0b01110000000001010,
++    vsle_du_op         = 0b01110000000001011,
++    vslt_b_op          = 0b01110000000001100,
++    vslt_h_op          = 0b01110000000001101,
++    vslt_w_op          = 0b01110000000001110,
++    vslt_d_op          = 0b01110000000001111,
++    vslt_bu_op         = 0b01110000000010000,
++    vslt_hu_op         = 0b01110000000010001,
++    vslt_wu_op         = 0b01110000000010010,
++    vslt_du_op         = 0b01110000000010011,
++    vadd_b_op          = 0b01110000000010100,
++    vadd_h_op          = 0b01110000000010101,
++    vadd_w_op          = 0b01110000000010110,
++    vadd_d_op          = 0b01110000000010111,
++    vsub_b_op          = 0b01110000000011000,
++    vsub_h_op          = 0b01110000000011001,
++    vsub_w_op          = 0b01110000000011010,
++    vsub_d_op          = 0b01110000000011011,
++    vabsd_b_op         = 0b01110000011000000,
++    vabsd_h_op         = 0b01110000011000001,
++    vabsd_w_op         = 0b01110000011000010,
++    vabsd_d_op         = 0b01110000011000011,
++    vmax_b_op          = 0b01110000011100000,
++    vmax_h_op          = 0b01110000011100001,
++    vmax_w_op          = 0b01110000011100010,
++    vmax_d_op          = 0b01110000011100011,
++    vmin_b_op          = 0b01110000011100100,
++    vmin_h_op          = 0b01110000011100101,
++    vmin_w_op          = 0b01110000011100110,
++    vmin_d_op          = 0b01110000011100111,
++    vmul_b_op          = 0b01110000100001000,
++    vmul_h_op          = 0b01110000100001001,
++    vmul_w_op          = 0b01110000100001010,
++    vmul_d_op          = 0b01110000100001011,
++    vmuh_b_op          = 0b01110000100001100,
++    vmuh_h_op          = 0b01110000100001101,
++    vmuh_w_op          = 0b01110000100001110,
++    vmuh_d_op          = 0b01110000100001111,
++    vmuh_bu_op         = 0b01110000100010000,
++    vmuh_hu_op         = 0b01110000100010001,
++    vmuh_wu_op         = 0b01110000100010010,
++    vmuh_du_op         = 0b01110000100010011,
++    vmulwev_h_b_op     = 0b01110000100100000,
++    vmulwev_w_h_op     = 0b01110000100100001,
++    vmulwev_d_w_op     = 0b01110000100100010,
++    vmulwev_q_d_op     = 0b01110000100100011,
++    vmulwod_h_b_op     = 0b01110000100100100,
++    vmulwod_w_h_op     = 0b01110000100100101,
++    vmulwod_d_w_op     = 0b01110000100100110,
++    vmulwod_q_d_op     = 0b01110000100100111,
++    vmadd_b_op         = 0b01110000101010000,
++    vmadd_h_op         = 0b01110000101010001,
++    vmadd_w_op         = 0b01110000101010010,
++    vmadd_d_op         = 0b01110000101010011,
++    vmsub_b_op         = 0b01110000101010100,
++    vmsub_h_op         = 0b01110000101010101,
++    vmsub_w_op         = 0b01110000101010110,
++    vmsub_d_op         = 0b01110000101010111,
++    vsll_b_op          = 0b01110000111010000,
++    vsll_h_op          = 0b01110000111010001,
++    vsll_w_op          = 0b01110000111010010,
++    vsll_d_op          = 0b01110000111010011,
++    vsrl_b_op          = 0b01110000111010100,
++    vsrl_h_op          = 0b01110000111010101,
++    vsrl_w_op          = 0b01110000111010110,
++    vsrl_d_op          = 0b01110000111010111,
++    vsra_b_op          = 0b01110000111011000,
++    vsra_h_op          = 0b01110000111011001,
++    vsra_w_op          = 0b01110000111011010,
++    vsra_d_op          = 0b01110000111011011,
++    vrotr_b_op         = 0b01110000111011100,
++    vrotr_h_op         = 0b01110000111011101,
++    vrotr_w_op         = 0b01110000111011110,
++    vrotr_d_op         = 0b01110000111011111,
++    vbitclr_b_op       = 0b01110001000011000,
++    vbitclr_h_op       = 0b01110001000011001,
++    vbitclr_w_op       = 0b01110001000011010,
++    vbitclr_d_op       = 0b01110001000011011,
++    vbitset_b_op       = 0b01110001000011100,
++    vbitset_h_op       = 0b01110001000011101,
++    vbitset_w_op       = 0b01110001000011110,
++    vbitset_d_op       = 0b01110001000011111,
++    vbitrev_b_op       = 0b01110001000100000,
++    vbitrev_h_op       = 0b01110001000100001,
++    vbitrev_w_op       = 0b01110001000100010,
++    vbitrev_d_op       = 0b01110001000100011,
++    vand_v_op          = 0b01110001001001100,
++    vor_v_op           = 0b01110001001001101,
++    vxor_v_op          = 0b01110001001001110,
++    vnor_v_op          = 0b01110001001001111,
++    vandn_v_op         = 0b01110001001010000,
++    vorn_v_op          = 0b01110001001010001,
++    vadd_q_op          = 0b01110001001011010,
++    vsub_q_op          = 0b01110001001011011,
++    vfadd_s_op         = 0b01110001001100001,
++    vfadd_d_op         = 0b01110001001100010,
++    vfsub_s_op         = 0b01110001001100101,
++    vfsub_d_op         = 0b01110001001100110,
++    vfmul_s_op         = 0b01110001001110001,
++    vfmul_d_op         = 0b01110001001110010,
++    vfdiv_s_op         = 0b01110001001110101,
++    vfdiv_d_op         = 0b01110001001110110,
++    vfmax_s_op         = 0b01110001001111001,
++    vfmax_d_op         = 0b01110001001111010,
++    vfmin_s_op         = 0b01110001001111101,
++    vfmin_d_op         = 0b01110001001111110,
++    vfcvt_h_s_op       = 0b01110001010001100,
++    vfcvt_s_d_op       = 0b01110001010001101,
++    vffint_s_l_op      = 0b01110001010010000,
++    vftint_w_d_op      = 0b01110001010010011,
++    vftintrm_w_d_op    = 0b01110001010010100,
++    vftintrp_w_d_op    = 0b01110001010010101,
++    vftintrz_w_d_op    = 0b01110001010010110,
++    vftintrne_w_d_op   = 0b01110001010010111,
++    vshuf_h_op         = 0b01110001011110101,
++    vshuf_w_op         = 0b01110001011110110,
++    vshuf_d_op         = 0b01110001011110111,
++    vslti_bu_op        = 0b01110010100010000,
++    vslti_hu_op        = 0b01110010100010001,
++    vslti_wu_op        = 0b01110010100010010,
++    vslti_du_op        = 0b01110010100010011,
++    vaddi_bu_op        = 0b01110010100010100,
++    vaddi_hu_op        = 0b01110010100010101,
++    vaddi_wu_op        = 0b01110010100010110,
++    vaddi_du_op        = 0b01110010100010111,
++    vsubi_bu_op        = 0b01110010100011000,
++    vsubi_hu_op        = 0b01110010100011001,
++    vsubi_wu_op        = 0b01110010100011010,
++    vsubi_du_op        = 0b01110010100011011,
++    vrotri_w_op        = 0b01110010101000001,
++    vbitclri_w_op      = 0b01110011000100001,
++    vbitseti_w_op      = 0b01110011000101001,
++    vbitrevi_w_op      = 0b01110011000110001,
++    vslli_w_op         = 0b01110011001011001,
++    vsrli_w_op         = 0b01110011001100001,
++    vsrai_w_op         = 0b01110011001101001,
++    vsrlni_h_w_op      = 0b01110011010000001,
++    xvseq_b_op         = 0b01110100000000000,
++    xvseq_h_op         = 0b01110100000000001,
++    xvseq_w_op         = 0b01110100000000010,
++    xvseq_d_op         = 0b01110100000000011,
++    xvsle_b_op         = 0b01110100000000100,
++    xvsle_h_op         = 0b01110100000000101,
++    xvsle_w_op         = 0b01110100000000110,
++    xvsle_d_op         = 0b01110100000000111,
++    xvsle_bu_op        = 0b01110100000001000,
++    xvsle_hu_op        = 0b01110100000001001,
++    xvsle_wu_op        = 0b01110100000001010,
++    xvsle_du_op        = 0b01110100000001011,
++    xvslt_b_op         = 0b01110100000001100,
++    xvslt_h_op         = 0b01110100000001101,
++    xvslt_w_op         = 0b01110100000001110,
++    xvslt_d_op         = 0b01110100000001111,
++    xvslt_bu_op        = 0b01110100000010000,
++    xvslt_hu_op        = 0b01110100000010001,
++    xvslt_wu_op        = 0b01110100000010010,
++    xvslt_du_op        = 0b01110100000010011,
++    xvadd_b_op         = 0b01110100000010100,
++    xvadd_h_op         = 0b01110100000010101,
++    xvadd_w_op         = 0b01110100000010110,
++    xvadd_d_op         = 0b01110100000010111,
++    xvsub_b_op         = 0b01110100000011000,
++    xvsub_h_op         = 0b01110100000011001,
++    xvsub_w_op         = 0b01110100000011010,
++    xvsub_d_op         = 0b01110100000011011,
++    xvabsd_b_op        = 0b01110100011000000,
++    xvabsd_h_op        = 0b01110100011000001,
++    xvabsd_w_op        = 0b01110100011000010,
++    xvabsd_d_op        = 0b01110100011000011,
++    xvmax_b_op         = 0b01110100011100000,
++    xvmax_h_op         = 0b01110100011100001,
++    xvmax_w_op         = 0b01110100011100010,
++    xvmax_d_op         = 0b01110100011100011,
++    xvmin_b_op         = 0b01110100011100100,
++    xvmin_h_op         = 0b01110100011100101,
++    xvmin_w_op         = 0b01110100011100110,
++    xvmin_d_op         = 0b01110100011100111,
++    xvmul_b_op         = 0b01110100100001000,
++    xvmul_h_op         = 0b01110100100001001,
++    xvmul_w_op         = 0b01110100100001010,
++    xvmul_d_op         = 0b01110100100001011,
++    xvmuh_b_op         = 0b01110100100001100,
++    xvmuh_h_op         = 0b01110100100001101,
++    xvmuh_w_op         = 0b01110100100001110,
++    xvmuh_d_op         = 0b01110100100001111,
++    xvmuh_bu_op        = 0b01110100100010000,
++    xvmuh_hu_op        = 0b01110100100010001,
++    xvmuh_wu_op        = 0b01110100100010010,
++    xvmuh_du_op        = 0b01110100100010011,
++    xvmulwev_h_b_op    = 0b01110100100100000,
++    xvmulwev_w_h_op    = 0b01110100100100001,
++    xvmulwev_d_w_op    = 0b01110100100100010,
++    xvmulwev_q_d_op    = 0b01110100100100011,
++    xvmulwod_h_b_op    = 0b01110100100100100,
++    xvmulwod_w_h_op    = 0b01110100100100101,
++    xvmulwod_d_w_op    = 0b01110100100100110,
++    xvmulwod_q_d_op    = 0b01110100100100111,
++    xvmadd_b_op        = 0b01110100101010000,
++    xvmadd_h_op        = 0b01110100101010001,
++    xvmadd_w_op        = 0b01110100101010010,
++    xvmadd_d_op        = 0b01110100101010011,
++    xvmsub_b_op        = 0b01110100101010100,
++    xvmsub_h_op        = 0b01110100101010101,
++    xvmsub_w_op        = 0b01110100101010110,
++    xvmsub_d_op        = 0b01110100101010111,
++    xvsll_b_op         = 0b01110100111010000,
++    xvsll_h_op         = 0b01110100111010001,
++    xvsll_w_op         = 0b01110100111010010,
++    xvsll_d_op         = 0b01110100111010011,
++    xvsrl_b_op         = 0b01110100111010100,
++    xvsrl_h_op         = 0b01110100111010101,
++    xvsrl_w_op         = 0b01110100111010110,
++    xvsrl_d_op         = 0b01110100111010111,
++    xvsra_b_op         = 0b01110100111011000,
++    xvsra_h_op         = 0b01110100111011001,
++    xvsra_w_op         = 0b01110100111011010,
++    xvsra_d_op         = 0b01110100111011011,
++    xvrotr_b_op        = 0b01110100111011100,
++    xvrotr_h_op        = 0b01110100111011101,
++    xvrotr_w_op        = 0b01110100111011110,
++    xvrotr_d_op        = 0b01110100111011111,
++    xvbitclr_b_op      = 0b01110101000011000,
++    xvbitclr_h_op      = 0b01110101000011001,
++    xvbitclr_w_op      = 0b01110101000011010,
++    xvbitclr_d_op      = 0b01110101000011011,
++    xvbitset_b_op      = 0b01110101000011100,
++    xvbitset_h_op      = 0b01110101000011101,
++    xvbitset_w_op      = 0b01110101000011110,
++    xvbitset_d_op      = 0b01110101000011111,
++    xvbitrev_b_op      = 0b01110101000100000,
++    xvbitrev_h_op      = 0b01110101000100001,
++    xvbitrev_w_op      = 0b01110101000100010,
++    xvbitrev_d_op      = 0b01110101000100011,
++    xvand_v_op         = 0b01110101001001100,
++    xvor_v_op          = 0b01110101001001101,
++    xvxor_v_op         = 0b01110101001001110,
++    xvnor_v_op         = 0b01110101001001111,
++    xvandn_v_op        = 0b01110101001010000,
++    xvorn_v_op         = 0b01110101001010001,
++    xvadd_q_op         = 0b01110101001011010,
++    xvsub_q_op         = 0b01110101001011011,
++    xvfadd_s_op        = 0b01110101001100001,
++    xvfadd_d_op        = 0b01110101001100010,
++    xvfsub_s_op        = 0b01110101001100101,
++    xvfsub_d_op        = 0b01110101001100110,
++    xvfmul_s_op        = 0b01110101001110001,
++    xvfmul_d_op        = 0b01110101001110010,
++    xvfdiv_s_op        = 0b01110101001110101,
++    xvfdiv_d_op        = 0b01110101001110110,
++    xvfmax_s_op        = 0b01110101001111001,
++    xvfmax_d_op        = 0b01110101001111010,
++    xvfmin_s_op        = 0b01110101001111101,
++    xvfmin_d_op        = 0b01110101001111110,
++    xvfcvt_h_s_op      = 0b01110101010001100,
++    xvfcvt_s_d_op      = 0b01110101010001101,
++    xvffint_s_l_op     = 0b01110101010010000,
++    xvftint_w_d_op     = 0b01110101010010011,
++    xvftintrm_w_d_op   = 0b01110101010010100,
++    xvftintrp_w_d_op   = 0b01110101010010101,
++    xvftintrz_w_d_op   = 0b01110101010010110,
++    xvftintrne_w_d_op  = 0b01110101010010111,
++    xvshuf_h_op        = 0b01110101011110101,
++    xvshuf_w_op        = 0b01110101011110110,
++    xvshuf_d_op        = 0b01110101011110111,
++    xvperm_w_op        = 0b01110101011111010,
++    xvslti_bu_op       = 0b01110110100010000,
++    xvslti_hu_op       = 0b01110110100010001,
++    xvslti_wu_op       = 0b01110110100010010,
++    xvslti_du_op       = 0b01110110100010011,
++    xvaddi_bu_op       = 0b01110110100010100,
++    xvaddi_hu_op       = 0b01110110100010101,
++    xvaddi_wu_op       = 0b01110110100010110,
++    xvaddi_du_op       = 0b01110110100010111,
++    xvsubi_bu_op       = 0b01110110100011000,
++    xvsubi_hu_op       = 0b01110110100011001,
++    xvsubi_wu_op       = 0b01110110100011010,
++    xvsubi_du_op       = 0b01110110100011011,
++    xvrotri_w_op       = 0b01110110101000001,
++    xvbitclri_w_op     = 0b01110111000100001,
++    xvbitseti_w_op     = 0b01110111000101001,
++    xvbitrevi_w_op     = 0b01110111000110001,
++    xvslli_w_op        = 0b01110111001011001,
++    xvsrli_w_op        = 0b01110111001100001,
++    xvsrai_w_op        = 0b01110111001101001,
++
++    unknow_ops17       = 0b11111111111111111
++  };
++
++  // 16-bit opcode, highest 16 bits: bits[31...16]
++  enum ops16 {
++    vrotri_d_op        = 0b0111001010100001,
++    vbitclri_d_op      = 0b0111001100010001,
++    vbitseti_d_op      = 0b0111001100010101,
++    vbitrevi_d_op      = 0b0111001100011001,
++    vslli_d_op         = 0b0111001100101101,
++    vsrli_d_op         = 0b0111001100110001,
++    vsrai_d_op         = 0b0111001100110101,
++    vsrlni_w_d_op      = 0b0111001101000001,
++    xvrotri_d_op       = 0b0111011010100001,
++    xvbitclri_d_op     = 0b0111011100010001,
++    xvbitseti_d_op     = 0b0111011100010101,
++    xvbitrevi_d_op     = 0b0111011100011001,
++    xvslli_d_op        = 0b0111011100101101,
++    xvsrli_d_op        = 0b0111011100110001,
++    xvsrai_d_op        = 0b0111011100110101,
++
++    unknow_ops16       = 0b1111111111111111
++  };
++
++  // 15-bit opcode, highest 15 bits: bits[31...17]
++  enum ops15 {
++    vsrlni_d_q_op      = 0b011100110100001,
++
++    unknow_ops15       = 0b111111111111111
++  };
++
++  // 14-bit opcode, highest 14 bits: bits[31...18]
++  enum ops14 {
++    alsl_w_op          = 0b00000000000001,
++    bytepick_w_op      = 0b00000000000010,
++    bytepick_d_op      = 0b00000000000011,
++    alsl_d_op          = 0b00000000001011,
++    slli_op            = 0b00000000010000,
++    srli_op            = 0b00000000010001,
++    srai_op            = 0b00000000010010,
++    rotri_op           = 0b00000000010011,
++    lddir_op           = 0b00000110010000,
++    ldpte_op           = 0b00000110010001,
++    vshuf4i_b_op       = 0b01110011100100,
++    vshuf4i_h_op       = 0b01110011100101,
++    vshuf4i_w_op       = 0b01110011100110,
++    vshuf4i_d_op       = 0b01110011100111,
++    vandi_b_op         = 0b01110011110100,
++    vori_b_op          = 0b01110011110101,
++    vxori_b_op         = 0b01110011110110,
++    vnori_b_op         = 0b01110011110111,
++    vldi_op            = 0b01110011111000,
++    vpermi_w_op        = 0b01110011111001,
++    xvshuf4i_b_op      = 0b01110111100100,
++    xvshuf4i_h_op      = 0b01110111100101,
++    xvshuf4i_w_op      = 0b01110111100110,
++    xvshuf4i_d_op      = 0b01110111100111,
++    xvandi_b_op        = 0b01110111110100,
++    xvori_b_op         = 0b01110111110101,
++    xvxori_b_op        = 0b01110111110110,
++    xvnori_b_op        = 0b01110111110111,
++    xvldi_op           = 0b01110111111000,
++    xvpermi_w_op       = 0b01110111111001,
++    xvpermi_d_op       = 0b01110111111010,
++    xvpermi_q_op       = 0b01110111111011,
++
++    unknow_ops14       = 0b11111111111111
++  };
++
++  // 12-bit opcode, highest 12 bits: bits[31...20]
++  enum ops12 {
++    fmadd_s_op         = 0b000010000001,
++    fmadd_d_op         = 0b000010000010,
++    fmsub_s_op         = 0b000010000101,
++    fmsub_d_op         = 0b000010000110,
++    fnmadd_s_op        = 0b000010001001,
++    fnmadd_d_op        = 0b000010001010,
++    fnmsub_s_op        = 0b000010001101,
++    fnmsub_d_op        = 0b000010001110,
++    vfmadd_s_op        = 0b000010010001,
++    vfmadd_d_op        = 0b000010010010,
++    vfmsub_s_op        = 0b000010010101,
++    vfmsub_d_op        = 0b000010010110,
++    vfnmadd_s_op       = 0b000010011001,
++    vfnmadd_d_op       = 0b000010011010,
++    vfnmsub_s_op       = 0b000010011101,
++    vfnmsub_d_op       = 0b000010011110,
++    xvfmadd_s_op       = 0b000010100001,
++    xvfmadd_d_op       = 0b000010100010,
++    xvfmsub_s_op       = 0b000010100101,
++    xvfmsub_d_op       = 0b000010100110,
++    xvfnmadd_s_op      = 0b000010101001,
++    xvfnmadd_d_op      = 0b000010101010,
++    xvfnmsub_s_op      = 0b000010101101,
++    xvfnmsub_d_op      = 0b000010101110,
++    fcmp_cond_s_op     = 0b000011000001,
++    fcmp_cond_d_op     = 0b000011000010,
++    vfcmp_cond_s_op    = 0b000011000101,
++    vfcmp_cond_d_op    = 0b000011000110,
++    xvfcmp_cond_s_op   = 0b000011001001,
++    xvfcmp_cond_d_op   = 0b000011001010,
++    fsel_op            = 0b000011010000,
++    vbitsel_v_op       = 0b000011010001,
++    xvbitsel_v_op      = 0b000011010010,
++    vshuf_b_op         = 0b000011010101,
++    xvshuf_b_op        = 0b000011010110,
++
++    unknow_ops12       = 0b111111111111
++  };
++
++  // 10-bit opcode, highest 10 bits: bits[31...22]
++  enum ops10 {
++    bstr_w_op          = 0b0000000001,
++    bstrins_d_op       = 0b0000000010,
++    bstrpick_d_op      = 0b0000000011,
++    slti_op            = 0b0000001000,
++    sltui_op           = 0b0000001001,
++    addi_w_op          = 0b0000001010,
++    addi_d_op          = 0b0000001011,
++    lu52i_d_op         = 0b0000001100,
++    andi_op            = 0b0000001101,
++    ori_op             = 0b0000001110,
++    xori_op            = 0b0000001111,
++    ld_b_op            = 0b0010100000,
++    ld_h_op            = 0b0010100001,
++    ld_w_op            = 0b0010100010,
++    ld_d_op            = 0b0010100011,
++    st_b_op            = 0b0010100100,
++    st_h_op            = 0b0010100101,
++    st_w_op            = 0b0010100110,
++    st_d_op            = 0b0010100111,
++    ld_bu_op           = 0b0010101000,
++    ld_hu_op           = 0b0010101001,
++    ld_wu_op           = 0b0010101010,
++    preld_op           = 0b0010101011,
++    fld_s_op           = 0b0010101100,
++    fst_s_op           = 0b0010101101,
++    fld_d_op           = 0b0010101110,
++    fst_d_op           = 0b0010101111,
++    vld_op             = 0b0010110000,
++    vst_op             = 0b0010110001,
++    xvld_op            = 0b0010110010,
++    xvst_op            = 0b0010110011,
++    ldl_w_op           = 0b0010111000,
++    ldr_w_op           = 0b0010111001,
++
++    unknow_ops10       = 0b1111111111
++  };
++
++  // 8-bit opcode, highest 8 bits: bits[31...22]
++  enum ops8 {
++    ll_w_op            = 0b00100000,
++    sc_w_op            = 0b00100001,
++    ll_d_op            = 0b00100010,
++    sc_d_op            = 0b00100011,
++    ldptr_w_op         = 0b00100100,
++    stptr_w_op         = 0b00100101,
++    ldptr_d_op         = 0b00100110,
++    stptr_d_op         = 0b00100111,
++
++    unknow_ops8        = 0b11111111
++  };
++
++  // 7-bit opcode, highest 7 bits: bits[31...25]
++  enum ops7 {
++    lu12i_w_op         = 0b0001010,
++    lu32i_d_op         = 0b0001011,
++    pcaddi_op          = 0b0001100,
++    pcalau12i_op       = 0b0001101,
++    pcaddu12i_op       = 0b0001110,
++    pcaddu18i_op       = 0b0001111,
++
++    unknow_ops7        = 0b1111111
++  };
++
++  // 6-bit opcode, highest 6 bits: bits[31...25]
++  enum ops6 {
++    addu16i_d_op       = 0b000100,
++    beqz_op            = 0b010000,
++    bnez_op            = 0b010001,
++    bccondz_op         = 0b010010,
++    jirl_op            = 0b010011,
++    b_op               = 0b010100,
++    bl_op              = 0b010101,
++    beq_op             = 0b010110,
++    bne_op             = 0b010111,
++    blt_op             = 0b011000,
++    bge_op             = 0b011001,
++    bltu_op            = 0b011010,
++    bgeu_op            = 0b011011,
++
++    unknow_ops6        = 0b111111
++  };
++
++  enum fcmp_cond {
++    fcmp_caf           = 0x00,
++    fcmp_cun           = 0x08,
++    fcmp_ceq           = 0x04,
++    fcmp_cueq          = 0x0c,
++    fcmp_clt           = 0x02,
++    fcmp_cult          = 0x0a,
++    fcmp_cle           = 0x06,
++    fcmp_cule          = 0x0e,
++    fcmp_cne           = 0x10,
++    fcmp_cor           = 0x14,
++    fcmp_cune          = 0x18,
++    fcmp_saf           = 0x01,
++    fcmp_sun           = 0x09,
++    fcmp_seq           = 0x05,
++    fcmp_sueq          = 0x0d,
++    fcmp_slt           = 0x03,
++    fcmp_sult          = 0x0b,
++    fcmp_sle           = 0x07,
++    fcmp_sule          = 0x0f,
++    fcmp_sne           = 0x11,
++    fcmp_sor           = 0x15,
++    fcmp_sune          = 0x19
++  };
++
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++
++ protected:
++  // help methods for instruction ejection
++
++  // 2R-type
++  //  31                          10 9      5 4     0
++  // |   opcode                     |   rj   |  rd   |
++  static inline int insn_RR   (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; }
++
++  // 3R-type
++  //  31                    15 14 10 9      5 4     0
++  // |   opcode               |  rk |   rj   |  rd   |
++  static inline int insn_RRR  (int op, int rk, int rj, int rd)  { return (op<<15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 4R-type
++  //  31             20 19  15 14  10 9     5 4     0
++  // |   opcode        |  ra  |  rk |    rj  |  rd   |
++  static inline int insn_RRRR (int op, int ra,  int rk, int rj, int rd)  { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 2RI1-type
++  //  31                11     10    9      5 4     0
++  // |   opcode           |    I1   |    vj  |  rd   |
++  static inline int insn_I1RR (int op, int ui1, int vj, int rd)  { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; }
++
++  // 2RI2-type
++  //  31                12 11     10 9      5 4     0
++  // |   opcode           |    I2   |    vj  |  rd   |
++  static inline int insn_I2RR (int op, int ui2, int vj, int rd)  { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; }
++
++  // 2RI3-type
++  //  31                13 12     10 9      5 4     0
++  // |   opcode           |    I3   |    vj  |  vd   |
++  static inline int insn_I3RR (int op, int ui3, int vj, int vd)  { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; }
++
++  // 2RI4-type
++  //  31                14 13     10 9      5 4     0
++  // |   opcode           |    I4   |    vj  |  vd   |
++  static inline int insn_I4RR (int op, int ui4, int vj, int vd)  { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; }
++
++  // 2RI5-type
++  //  31                15 14     10 9      5 4     0
++  // |   opcode           |    I5   |    vj  |  vd   |
++  static inline int insn_I5RR (int op, int ui5, int vj, int vd)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; }
++
++  // 2RI6-type
++  //  31                16 15     10 9      5 4     0
++  // |   opcode           |    I6   |    vj  |  vd   |
++  static inline int insn_I6RR (int op, int ui6, int vj, int vd)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI7-type
++  //  31                17 16     10 9      5 4     0
++  // |   opcode           |    I7   |    vj  |  vd   |
++  static inline int insn_I7RR (int op, int ui7, int vj, int vd)  { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI8-type
++  //  31                18 17     10 9      5 4     0
++  // |   opcode           |    I8   |    rj  |  rd   |
++  static inline int insn_I8RR (int op, int imm8, int rj, int rd)  { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; }
++
++  // 2RI12-type
++  //  31           22 21          10 9      5 4     0
++  // |   opcode      |     I12      |    rj  |  rd   |
++  static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/  return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; }
++
++
++  // 2RI14-type
++  //  31         24 23            10 9      5 4     0
++  // |   opcode    |      I14       |    rj  |  rd   |
++  static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; }
++
++  // 2RI16-type
++  //  31       26 25              10 9      5 4     0
++  // |   opcode  |       I16        |    rj  |  rd   |
++  static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; }
++
++  // 1RI13-type (?)
++  //  31        18 17                      5 4     0
++  // |   opcode   |               I13        |  vd   |
++  static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; }
++
++  // 1RI20-type (?)
++  //  31        25 24                      5 4     0
++  // |   opcode   |               I20        |  rd   |
++  static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; }
++
++  // 1RI21-type
++  //  31       26 25              10 9     5 4        0
++  // |   opcode  |     I21[15:0]    |   rj   |I21[20:16]|
++  static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); }
++
++  // I26-type
++  //  31       26 25              10 9               0
++  // |   opcode  |     I26[15:0]    |    I26[25:16]   |
++  static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); }
++
++  // imm15
++  //  31                    15 14                    0
++  // |         opcode         |          I15          |
++  static inline int insn_I15  (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); }
++
++
++  // get the offset field of beq, bne, blt[u], bge[u] instruction
++  int offset16(address entry) {
++    assert(is_simm16((entry - pc()) / 4), "change this code");
++    if (!is_simm16((entry - pc()) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of beqz, bnez instruction
++  int offset21(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 21), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 21)) {
++      tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of b instruction
++  int offset26(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 26), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 26)) {
++      tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  static int split_low16(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high16(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int split_low20(int x) {
++    return (x & 0xfffff);
++  }
++
++  // Convert 20-bit x to a sign-extended 20-bit integer
++  static int simm20(int x) {
++    assert(x == (x & 0xFFFFF), "must be 20-bit only");
++    return (x << 12) >> 12;
++  }
++
++  static int split_low12(int x) {
++    return (x & 0xfff);
++  }
++
++  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
++    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
++    si38 += (si38 & 0x20000) << 1;
++    si20 = si38 >> 18;
++  }
++
++  // Convert 12-bit x to a sign-extended 12-bit integer
++  static int simm12(int x) {
++    assert(x == (x & 0xFFF), "must be 12-bit only");
++    return (x << 20) >> 20;
++  }
++
++  // Convert 26-bit x to a sign-extended 26-bit integer
++  static int simm26(int x) {
++    assert(x == (x & 0x3FFFFFF), "must be 26-bit only");
++    return (x << 6) >> 6;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12) {
++    //lu12i, ori
++    return (((x12 << 12) | x0) << 32) >> 32;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) {
++    //lu32i, lu12i, ori
++    return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) {
++    //lu52i, lu32i, lu12i, ori
++    return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++public:
++
++  void flush() {
++    AbstractAssembler::flush();
++  }
++
++  inline void emit_data(int x) { emit_int32(x); }
++  inline void emit_data(int x, relocInfo::relocType rtype) {
++    relocate(rtype);
++    emit_int32(x);
++  }
++
++  inline void emit_data(int x, RelocationHolder const& rspec) {
++    relocate(rspec);
++    emit_int32(x);
++  }
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void clo_w  (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_w  (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_w  (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_w  (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clo_d  (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_d  (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_d  (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_d  (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op,  (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op,  (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtime_d(Register rd, Register rj)  { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++  void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++
++  void alsl_w(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void slt  (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(slt_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sltu (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void OR  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void mul_w     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_w    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_wu   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mul_d     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_d    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_du   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void crc_w_b_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_h_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_w_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void brk(int code)      { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); }
++
++  void alsl_d(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void slli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void slli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bstrins_w  (Register rd, Register rj, int msbw, int lsbw)  { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_w  (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrins_d  (Register rd, Register rj, int msbd, int lsbd)  { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_d  (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fadd_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fadd_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fabs_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fabs_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void movgr2fr_w (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_w_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2fr_d (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_d_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2frh_w(FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); }
++  void movfr2gr_s (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_s_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfr2gr_d (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_d_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfrh2gr_s(Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); }
++  void movgr2fcsr (int fcsr, Register rj)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op,  (int)rj->encoding(), fcsr)); }
++  void movfcsr2gr (Register rd, int fcsr)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op,  fcsr, (int)rd->encoding())); }
++  void movfr2cf   (ConditionalFlagRegister cd, FloatRegister fj)  { emit_int32(insn_RR(movfr2cf_op,    (int)fj->encoding(), (int)cd->encoding())); }
++  void movcf2fr   (FloatRegister fd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2fr_op,    (int)cj->encoding(), (int)fd->encoding())); }
++  void movgr2cf   (ConditionalFlagRegister cd, Register rj)  { emit_int32(insn_RR(movgr2cf_op,    (int)rj->encoding(), (int)cd->encoding())); }
++  void movcf2gr   (Register rd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2gr_op,    (int)cj->encoding(), (int)rd->encoding())); }
++
++  void fcvt_s_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fcvt_d_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void ftintrm_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void slti  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op,   si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void sltui (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_w(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_d(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op,  simm12(si12), (int)rj->encoding(), (int)rd->encoding())); }
++  void andi  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ori   (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op,    ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void xori  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fcmp_caf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fcmp_caf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void addu16i_d(Register rj, Register rd, int si16)      { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void lu12i_w(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); }
++  void lu32i_d(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); }
++  void pcaddi(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); }
++  void pcalau12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); }
++  void pcaddu12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); }
++  void pcaddu18i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); }
++
++  void ll_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ll_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ld_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_bu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_hu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_wu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void preld (int hint, Register rj, int si12)  { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); }
++  void fld_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fld_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void ldl_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldr_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++
++  void ld_b  (Register rd, Address src);
++  void ld_bu (Register rd, Address src);
++  void ld_d  (Register rd, Address src);
++  void ld_h  (Register rd, Address src);
++  void ld_hu (Register rd, Address src);
++  void ll_w  (Register rd, Address src);
++  void ll_d  (Register rd, Address src);
++  void ld_wu (Register rd, Address src);
++  void ld_w  (Register rd, Address src);
++  void st_b  (Register rd, Address dst);
++  void st_d  (Register rd, Address dst);
++  void st_w  (Register rd, Address dst);
++  void sc_w  (Register rd, Address dst);
++  void sc_d  (Register rd, Address dst);
++  void st_h  (Register rd, Address dst);
++  void fld_s (FloatRegister fd, Address src);
++  void fld_d (FloatRegister fd, Address src);
++  void fst_s (FloatRegister fd, Address dst);
++  void fst_d (FloatRegister fd, Address dst);
++
++  void amswap_w   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_d   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); }
++  void amand_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_w     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_d     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_w  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_d  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void dbar(int hint)      {
++    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
++
++    if (os::is_ActiveCoresMP())
++      andi(R0, R0, 0);
++    else
++      emit_int32(insn_I15(dbar_op, hint));
++  }
++  void ibar(int hint)      { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); }
++
++  void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beqz(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); }
++  void bnez(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); }
++  void bceqz(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); }
++  void bcnez(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); }
++
++  void jirl(Register rd, Register rj, int offs)      { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void b(int offs)      { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); }
++  void bl(int offs)     { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); }
++
++
++  void beq(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bne(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void blt(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bge(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bltu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bgeu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beq   (Register rj, Register rd, address entry) { beq   (rj, rd, offset16(entry)); }
++  void bne   (Register rj, Register rd, address entry) { bne   (rj, rd, offset16(entry)); }
++  void blt   (Register rj, Register rd, address entry) { blt   (rj, rd, offset16(entry)); }
++  void bge   (Register rj, Register rd, address entry) { bge   (rj, rd, offset16(entry)); }
++  void bltu  (Register rj, Register rd, address entry) { bltu  (rj, rd, offset16(entry)); }
++  void bgeu  (Register rj, Register rd, address entry) { bgeu  (rj, rd, offset16(entry)); }
++  void beqz  (Register rj, address entry) { beqz  (rj, offset21(entry)); }
++  void bnez  (Register rj, address entry) { bnez  (rj, offset21(entry)); }
++  void b(address entry) { b(offset26(entry)); }
++  void bl(address entry) { bl(offset26(entry)); }
++  void bceqz(ConditionalFlagRegister cj, address entry)     { bceqz(cj, offset21(entry)); }
++  void bcnez(ConditionalFlagRegister cj, address entry)     { bcnez(cj, offset21(entry)); }
++
++  void beq   (Register rj, Register rd, Label& L) { beq   (rj, rd, target(L)); }
++  void bne   (Register rj, Register rd, Label& L) { bne   (rj, rd, target(L)); }
++  void blt   (Register rj, Register rd, Label& L) { blt   (rj, rd, target(L)); }
++  void bge   (Register rj, Register rd, Label& L) { bge   (rj, rd, target(L)); }
++  void bltu  (Register rj, Register rd, Label& L) { bltu  (rj, rd, target(L)); }
++  void bgeu  (Register rj, Register rd, Label& L) { bgeu  (rj, rd, target(L)); }
++  void beqz  (Register rj, Label& L) { beqz  (rj, target(L)); }
++  void bnez  (Register rj, Label& L) { bnez  (rj, target(L)); }
++  void b(Label& L)      { b(target(L)); }
++  void bl(Label& L)     { bl(target(L)); }
++  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
++  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
++
++  typedef enum {
++    // hint[4]
++    Completion = 0,
++    Ordering   = (1 << 4),
++
++    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
++    // hint[3:2] and hint[1:0]
++    LoadLoad   = ((1 << 3) | (1 << 1)),
++    LoadStore  = ((1 << 3) | (1 << 0)),
++    StoreLoad  = ((1 << 2) | (1 << 1)),
++    StoreStore = ((1 << 2) | (1 << 0)),
++    AnyAny     = ((3 << 2) | (3 << 0)),
++  } Membar_mask_bits;
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits hint) {
++    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
++    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
++    dbar(Ordering | (~hint & 0xf));
++  }
++
++  // LSX and LASX
++#define ASSERT_LSX  assert(UseLSX, "");
++#define ASSERT_LASX assert(UseLASX, "");
++
++  void  vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vldi(FloatRegister vd, int i13) { ASSERT_LSX  emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); }
++  void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); }
++
++  void  vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX  emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk)  { ASSERT_LSX  emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk)  { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcmp_caf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vfcmp_caf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvfcmp_caf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvfcmp_caf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); }
++
++  void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void  vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void  vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++#undef ASSERT_LSX
++#undef ASSERT_LASX
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..601f4afe6f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp
+new file mode 100644
+index 0000000000..32775e9bc3
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp
+@@ -0,0 +1,110 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
++
++// Platform specific for C++ based Interpreter
++#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
++
++private:
++
++  // save the bottom of the stack after frame manager setup. For ease of restoration after return
++  // from recursive interpreter call
++  intptr_t*  _frame_bottom;             /* saved bottom of frame manager frame */
++  intptr_t* _last_Java_pc;              /* pc to return to in frame manager */
++  intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
++  interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
++  double    _native_fresult;            /* save result of native calls that might return floats */
++  intptr_t  _native_lresult;            /* save result of native calls that might return handle/longs */
++public:
++
++  static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
++  inline intptr_t* sender_sp() {
++    return _sender_sp;
++  }
++
++
++#define SET_LAST_JAVA_FRAME()
++
++#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0);
++
++/*
++ * Macros for accessing the stack.
++ */
++#undef STACK_INT
++#undef STACK_FLOAT
++#undef STACK_ADDR
++#undef STACK_OBJECT
++#undef STACK_DOUBLE
++#undef STACK_LONG
++
++// JavaStack Implementation
++
++#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
++#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
++#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
++#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
++#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
++#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
++#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
++#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
++
++#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
++#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
++#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
++                                                 ((VMJavaVal64*)(addr))->d)
++#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
++#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
++                                                 ((VMJavaVal64*)(addr))->l)
++// JavaLocals implementation
++
++#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
++#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
++#define LOCALS_INT(offset)     (*((jint*)&locals[-(offset)]))
++#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
++#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
++#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
++#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
++#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
++#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
++
++#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
++#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
++#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
++#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
++                                                  ((VMJavaVal64*)(addr))->d)
++#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
++                                                ((VMJavaVal64*)(addr))->l)
++
++#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..07df527e94
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
++
++// Inline interpreter functions for LoongArch
++
++inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
++inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
++inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
++inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
++inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
++
++inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
++
++inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
++  return ( op1 < op2 ? -1 :
++               op1 > op2 ? 1 :
++                   op1 == op2 ? 0 :
++                       (direction == -1 || direction == 1) ? direction : 0);
++
++}
++
++inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
++  // x86 can do unaligned copies but not 64bits at a time
++  to[0] = from[0]; to[1] = from[1];
++}
++
++// The long operations depend on compiler support for "long long" on x86
++
++inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
++  return op1 + op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
++  return op1 & op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
++  // QQQ what about check and throw...
++  return op1 / op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
++  return op1 * op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
++  return op1 | op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
++  return op1 - op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
++  return op1 ^ op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
++  return op1 % op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
++  // CVM did this 0x3f mask, is the really needed??? QQQ
++  return ((unsigned long long) op1) >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
++  return op1 >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
++  return op1 << (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
++  return -op;
++}
++
++inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
++  return ~op;
++}
++
++inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
++  return (op <= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
++  return (op >= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
++  return (op == 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
++  return (op1 == op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
++  return (op1 != op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
++  return (op1 >= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
++  return (op1 <= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
++  return (op1 < op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
++  return (op1 > op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
++  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
++}
++
++// Long conversions
++
++inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
++  return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
++  return (jfloat) val;
++}
++
++inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
++  return (jint) val;
++}
++
++// Double Arithmetic
++
++inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
++  return op1 + op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
++  // Divide by zero... QQQ
++  return op1 / op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
++  return op1 * op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
++  return -op;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
++  return fmod(op1, op2);
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
++  return op1 - op2;
++}
++
++inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
++  return ( op1 < op2 ? -1 :
++               op1 > op2 ? 1 :
++                   op1 == op2 ? 0 :
++                       (direction == -1 || direction == 1) ? direction : 0);
++}
++
++// Double Conversions
++
++inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
++  return (jfloat) val;
++}
++
++// Float Conversions
++
++inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
++  return (jdouble) op;
++}
++
++// Integer Arithmetic
++
++inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
++  return op1 + op2;
++}
++
++inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
++  return op1 & op2;
++}
++
++inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
++  // it's possible we could catch this special case implicitly
++  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
++  else return op1 / op2;
++}
++
++inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
++  return op1 * op2;
++}
++
++inline jint BytecodeInterpreter::VMintNeg(jint op) {
++  return -op;
++}
++
++inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
++  return op1 | op2;
++}
++
++inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
++  // it's possible we could catch this special case implicitly
++  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
++  else return op1 % op2;
++}
++
++inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
++  return op1 <<  op2;
++}
++
++inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
++  return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f??
++}
++
++inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
++  return op1 - op2;
++}
++
++inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
++  return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f??
++}
++
++inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
++  return op1 ^ op2;
++}
++
++inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
++  return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
++  return (jfloat) val;
++}
++
++inline jlong BytecodeInterpreter::VMint2Long(jint val) {
++  return (jlong) val;
++}
++
++inline jchar BytecodeInterpreter::VMint2Char(jint val) {
++  return (jchar) val;
++}
++
++inline jshort BytecodeInterpreter::VMint2Short(jint val) {
++  return (jshort) val;
++}
++
++inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
++  return (jbyte) val;
++}
++
++#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp
+new file mode 100644
+index 0000000000..8641090584
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/bytecodes.hpp"
++
++
++void Bytecodes::pd_initialize() {
++  // No LoongArch specific initialization
++}
++
++
++Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
++  // No LoongArch specific bytecodes
++  return code;
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp
+new file mode 100644
+index 0000000000..fbdf531996
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
++
++// No Loongson specific bytecodes
++
++#endif // CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp
+new file mode 100644
+index 0000000000..8f766a617e
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use LoongArch, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since LoongArch CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
++  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
++  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
++
++  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
++  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
++  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since LoongArch64 CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "bytes_linux_loongarch.inline.hpp"
++#endif
++
++#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp
+new file mode 100644
+index 0000000000..5166acfa2b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp
+@@ -0,0 +1,387 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#endif
++
++#define A0 RA0
++#define A3 RA3
++
++#define __ ce->masm()->
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  ce->store_parameter(_method->as_register(), 1);
++  ce->store_parameter(_bci, 0);
++  __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
++                               bool throw_index_out_of_bounds_exception)
++  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
++  , _index(index)
++{
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_info->deoptimize_on_exception()) {
++    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++    __ call(a, relocInfo::runtime_call_type);
++    ce->add_call_info_here(_info);
++    ce->verify_oop_map(_info);
++    debug_only(__ should_not_reach_here());
++    return;
++  }
++
++  if (_index->is_cpu_register()) {
++    __ move(SCR1, _index->as_register());
++  } else {
++    __ li(SCR1, _index->as_jint());
++  }
++  Runtime1::StubID stub_id;
++  if (_throw_index_out_of_bounds_exception) {
++    stub_id = Runtime1::throw_index_exception_id;
++  } else {
++    stub_id = Runtime1::throw_range_check_failed_id;
++  }
++  __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
++  _info = new CodeEmitInfo(info);
++}
++
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
++  if (_offset != -1) {
++    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  }
++  __ bind(_entry);
++  __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++#ifdef ASSERT
++  __ should_not_reach_here();
++#endif
++}
++
++// Implementation of NewInstanceStub
++
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass,
++                                 CodeEmitInfo* info, Runtime1::StubID stub_id) {
++  _result = result;
++  _klass = klass;
++  _klass_reg = klass_reg;
++  _info = new CodeEmitInfo(info);
++  assert(stub_id == Runtime1::new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_init_check_id,
++         "need new_instance id");
++  _stub_id   = stub_id;
++}
++
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  __ move(A3, _klass_reg->as_register());
++  __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewTypeArrayStub
++
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                   CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _length = length;
++  _result = result;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewObjectArrayStub
++
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _result = result;
++  _length = length;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of MonitorAccessStubs
++
++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
++  : MonitorAccessStub(obj_reg, lock_reg) {
++  _info = new CodeEmitInfo(info);
++}
++
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  ce->store_parameter(_obj_reg->as_register(),  1);
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  Runtime1::StubID enter_id;
++  if (ce->compilation()->has_fpu_code()) {
++    enter_id = Runtime1::monitorenter_id;
++  } else {
++    enter_id = Runtime1::monitorenter_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_compute_lock) {
++    // lock_reg was destroyed by fast unlocking attempt => recompute it
++    ce->monitor_address(_monitor_ix, _lock_reg);
++  }
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  // note: non-blocking leaf routine => no call info needed
++  Runtime1::StubID exit_id;
++  if (ce->compilation()->has_fpu_code()) {
++    exit_id = Runtime1::monitorexit_id;
++  } else {
++    exit_id = Runtime1::monitorexit_nofpu_id;
++  }
++  __ lipc(RA, _continuation);
++  __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
++}
++
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
++
++void PatchingStub::align_patch_site(MacroAssembler* masm) {
++}
++
++void PatchingStub::emit_code(LIR_Assembler* ce) {
++  assert(false, "LoongArch64 should not use C1 runtime patching");
++}
++
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  DEBUG_ONLY(__ should_not_reach_here());
++}
++
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
++  address a;
++  if (_info->deoptimize_on_exception()) {
++    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
++    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  } else {
++    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
++  }
++
++  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  __ bind(_entry);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++
++  __ bind(_entry);
++  // pass the object in a scratch register because all other registers
++  // must be preserved
++  if (_obj->is_cpu_register()) {
++    __ move(SCR1, _obj->as_register());
++  }
++  __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
++  //---------------slow case: call to native-----------------
++  __ bind(_entry);
++  // Figure out where the args should go
++  // This should really convert the IntrinsicID to the Method* and signature
++  // but I don't know how to do that.
++  //
++  VMRegPair args[5];
++  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
++  SharedRuntime::java_calling_convention(signature, args, 5, true);
++
++  // push parameters
++  // (src, src_pos, dest, destPos, length)
++  Register r[5];
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
++
++  // next registers will get stored on the stack
++  for (int i = 0; i < 5 ; i++ ) {
++    VMReg r_1 = args[i].first();
++    if (r_1->is_stack()) {
++      int st_off = r_1->reg2stack() * wordSize;
++      __ stptr_d (r[i], SP, st_off);
++    } else {
++      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
++    }
++  }
++
++  ce->align_call(lir_static_call);
++
++  ce->emit_static_call_stub();
++  if (ce->compilation()->bailed_out()) {
++    return; // CodeCache is full
++  }
++  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
++                         relocInfo::static_call_type);
++  address call = __ trampoline_call(resolve);
++  if (call == NULL) {
++    ce->bailout("trampoline stub overflow");
++    return;
++  }
++  ce->add_call_info_here(info());
++
++#ifndef PRODUCT
++  __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt);
++  __ increment(Address(SCR2));
++#endif
++
++  __ b(_continuation);
++}
++
++/////////////////////////////////////////////////////////////////////////////
++#if INCLUDE_ALL_GCS
++
++void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++
++  __ bind(_entry);
++  assert(pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = pre_val()->as_register();
++
++  if (do_load()) {
++    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, _continuation);
++  ce->store_parameter(pre_val()->as_register(), 0);
++  __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type);
++  __ b(_continuation);
++}
++
++jbyte* G1PostBarrierStub::_byte_map_base = NULL;
++
++jbyte* G1PostBarrierStub::byte_map_base_slow() {
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
++         "Must be if we're using this.");
++  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
++}
++
++
++void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  assert(addr()->is_register(), "Precondition.");
++  assert(new_val()->is_register(), "Precondition.");
++  Register new_val_reg = new_val()->as_register();
++  __ beqz(new_val_reg, _continuation);
++  ce->store_parameter(addr()->as_pointer_register(), 0);
++  __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type);
++  __ b(_continuation);
++}
++
++#endif // INCLUDE_ALL_GCS
++/////////////////////////////////////////////////////////////////////////////
++
++#undef __
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp
+new file mode 100644
+index 0000000000..1140e44431
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++
++// native word offsets from memory address (little endian)
++enum {
++  pd_lo_word_offset_in_bytes = 0,
++  pd_hi_word_offset_in_bytes = BytesPerWord
++};
++
++// explicit rounding operations are required to implement the strictFP mode
++enum {
++  pd_strict_fp_requires_explicit_rounding = false
++};
++
++// FIXME: There are no callee-saved
++
++// registers
++enum {
++  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,      // number of registers used during code emission
++  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission
++
++  pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls
++  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls
++
++  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
++  pd_last_callee_saved_reg = 21,
++
++  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
++
++  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
++  pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator
++
++  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
++  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
++  pd_nof_xmm_regs_linearscan = 0,  // don't have vector registers
++  pd_first_cpu_reg = 0,
++  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_byte_reg = 0,
++  pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
++  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
++
++  pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg,
++  pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg,
++};
++
++// Encoding of float value in debug info.  This is true on x86 where
++// floats are extended to doubles when stored in the stack, false for
++// LoongArch64 where floats and doubles are stored in their native form.
++enum {
++  pd_float_saved_as_double = false
++};
++
++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp
+new file mode 100644
+index 0000000000..bd8578c72a
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++
++// No FPU stack on LoongArch
++class FpuStackSim;
++
++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp
+new file mode 100644
+index 0000000000..1a89c437a8
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++//--------------------------------------------------------
++//               FpuStackSim
++//--------------------------------------------------------
++
++// No FPU stack on LoongArch64
++#include "precompiled.hpp"
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp
+new file mode 100644
+index 0000000000..4f0cf05361
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++
++//  On LoongArch64 the frame looks as follows:
++//
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++
++ public:
++  static const int pd_c_runtime_reserved_arg_size;
++
++  enum {
++    first_available_sp_in_frame = 0,
++    frame_pad_in_bytes = 16,
++    nof_reg_args = 8
++  };
++
++ public:
++  static LIR_Opr receiver_opr;
++
++  static LIR_Opr r0_opr;
++  static LIR_Opr ra_opr;
++  static LIR_Opr tp_opr;
++  static LIR_Opr sp_opr;
++  static LIR_Opr a0_opr;
++  static LIR_Opr a1_opr;
++  static LIR_Opr a2_opr;
++  static LIR_Opr a3_opr;
++  static LIR_Opr a4_opr;
++  static LIR_Opr a5_opr;
++  static LIR_Opr a6_opr;
++  static LIR_Opr a7_opr;
++  static LIR_Opr t0_opr;
++  static LIR_Opr t1_opr;
++  static LIR_Opr t2_opr;
++  static LIR_Opr t3_opr;
++  static LIR_Opr t4_opr;
++  static LIR_Opr t5_opr;
++  static LIR_Opr t6_opr;
++  static LIR_Opr t7_opr;
++  static LIR_Opr t8_opr;
++  static LIR_Opr rx_opr;
++  static LIR_Opr fp_opr;
++  static LIR_Opr s0_opr;
++  static LIR_Opr s1_opr;
++  static LIR_Opr s2_opr;
++  static LIR_Opr s3_opr;
++  static LIR_Opr s4_opr;
++  static LIR_Opr s5_opr;
++  static LIR_Opr s6_opr;
++  static LIR_Opr s7_opr;
++  static LIR_Opr s8_opr;
++
++  static LIR_Opr ra_oop_opr;
++  static LIR_Opr a0_oop_opr;
++  static LIR_Opr a1_oop_opr;
++  static LIR_Opr a2_oop_opr;
++  static LIR_Opr a3_oop_opr;
++  static LIR_Opr a4_oop_opr;
++  static LIR_Opr a5_oop_opr;
++  static LIR_Opr a6_oop_opr;
++  static LIR_Opr a7_oop_opr;
++  static LIR_Opr t0_oop_opr;
++  static LIR_Opr t1_oop_opr;
++  static LIR_Opr t2_oop_opr;
++  static LIR_Opr t3_oop_opr;
++  static LIR_Opr t4_oop_opr;
++  static LIR_Opr t5_oop_opr;
++  static LIR_Opr t6_oop_opr;
++  static LIR_Opr t7_oop_opr;
++  static LIR_Opr t8_oop_opr;
++  static LIR_Opr fp_oop_opr;
++  static LIR_Opr s0_oop_opr;
++  static LIR_Opr s1_oop_opr;
++  static LIR_Opr s2_oop_opr;
++  static LIR_Opr s3_oop_opr;
++  static LIR_Opr s4_oop_opr;
++  static LIR_Opr s5_oop_opr;
++  static LIR_Opr s6_oop_opr;
++  static LIR_Opr s7_oop_opr;
++  static LIR_Opr s8_oop_opr;
++
++  static LIR_Opr scr1_opr;
++  static LIR_Opr scr2_opr;
++  static LIR_Opr scr1_long_opr;
++  static LIR_Opr scr2_long_opr;
++
++  static LIR_Opr a0_metadata_opr;
++  static LIR_Opr a1_metadata_opr;
++  static LIR_Opr a2_metadata_opr;
++  static LIR_Opr a3_metadata_opr;
++  static LIR_Opr a4_metadata_opr;
++  static LIR_Opr a5_metadata_opr;
++
++  static LIR_Opr long0_opr;
++  static LIR_Opr long1_opr;
++  static LIR_Opr fpu0_float_opr;
++  static LIR_Opr fpu0_double_opr;
++
++  static LIR_Opr as_long_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++  static LIR_Opr as_pointer_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++
++  // VMReg name for spilled physical FPU stack slot n
++  static VMReg fpu_regname (int n);
++
++  static bool is_caller_save_register(LIR_Opr opr) { return true; }
++  static bool is_caller_save_register(Register r) { return true; }
++
++  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
++  static int last_cpu_reg() { return pd_last_cpu_reg;  }
++  static int last_byte_reg() { return pd_last_byte_reg; }
++
++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp
+new file mode 100644
+index 0000000000..25c90bcf98
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp
+@@ -0,0 +1,362 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIR.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
++  LIR_Opr opr = LIR_OprFact::illegalOpr;
++  VMReg r_1 = reg->first();
++  VMReg r_2 = reg->second();
++  if (r_1->is_stack()) {
++    // Convert stack slot to an SP offset
++    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
++    // so we must add it in here.
++    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
++  } else if (r_1->is_Register()) {
++    Register reg = r_1->as_Register();
++    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
++      Register reg2 = r_2->as_Register();
++      assert(reg2 == reg, "must be same register");
++      opr = as_long_opr(reg);
++    } else if (is_reference_type(type)) {
++      opr = as_oop_opr(reg);
++    } else if (type == T_METADATA) {
++      opr = as_metadata_opr(reg);
++    } else if (type == T_ADDRESS) {
++      opr = as_address_opr(reg);
++    } else {
++      opr = as_opr(reg);
++    }
++  } else if (r_1->is_FloatRegister()) {
++    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
++    int num = r_1->as_FloatRegister()->encoding();
++    if (type == T_FLOAT) {
++      opr = LIR_OprFact::single_fpu(num);
++    } else {
++      opr = LIR_OprFact::double_fpu(num);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++  return opr;
++}
++
++LIR_Opr FrameMap::r0_opr;
++LIR_Opr FrameMap::ra_opr;
++LIR_Opr FrameMap::tp_opr;
++LIR_Opr FrameMap::sp_opr;
++LIR_Opr FrameMap::a0_opr;
++LIR_Opr FrameMap::a1_opr;
++LIR_Opr FrameMap::a2_opr;
++LIR_Opr FrameMap::a3_opr;
++LIR_Opr FrameMap::a4_opr;
++LIR_Opr FrameMap::a5_opr;
++LIR_Opr FrameMap::a6_opr;
++LIR_Opr FrameMap::a7_opr;
++LIR_Opr FrameMap::t0_opr;
++LIR_Opr FrameMap::t1_opr;
++LIR_Opr FrameMap::t2_opr;
++LIR_Opr FrameMap::t3_opr;
++LIR_Opr FrameMap::t4_opr;
++LIR_Opr FrameMap::t5_opr;
++LIR_Opr FrameMap::t6_opr;
++LIR_Opr FrameMap::t7_opr;
++LIR_Opr FrameMap::t8_opr;
++LIR_Opr FrameMap::rx_opr;
++LIR_Opr FrameMap::fp_opr;
++LIR_Opr FrameMap::s0_opr;
++LIR_Opr FrameMap::s1_opr;
++LIR_Opr FrameMap::s2_opr;
++LIR_Opr FrameMap::s3_opr;
++LIR_Opr FrameMap::s4_opr;
++LIR_Opr FrameMap::s5_opr;
++LIR_Opr FrameMap::s6_opr;
++LIR_Opr FrameMap::s7_opr;
++LIR_Opr FrameMap::s8_opr;
++
++LIR_Opr FrameMap::receiver_opr;
++
++LIR_Opr FrameMap::ra_oop_opr;
++LIR_Opr FrameMap::a0_oop_opr;
++LIR_Opr FrameMap::a1_oop_opr;
++LIR_Opr FrameMap::a2_oop_opr;
++LIR_Opr FrameMap::a3_oop_opr;
++LIR_Opr FrameMap::a4_oop_opr;
++LIR_Opr FrameMap::a5_oop_opr;
++LIR_Opr FrameMap::a6_oop_opr;
++LIR_Opr FrameMap::a7_oop_opr;
++LIR_Opr FrameMap::t0_oop_opr;
++LIR_Opr FrameMap::t1_oop_opr;
++LIR_Opr FrameMap::t2_oop_opr;
++LIR_Opr FrameMap::t3_oop_opr;
++LIR_Opr FrameMap::t4_oop_opr;
++LIR_Opr FrameMap::t5_oop_opr;
++LIR_Opr FrameMap::t6_oop_opr;
++LIR_Opr FrameMap::t7_oop_opr;
++LIR_Opr FrameMap::t8_oop_opr;
++LIR_Opr FrameMap::fp_oop_opr;
++LIR_Opr FrameMap::s0_oop_opr;
++LIR_Opr FrameMap::s1_oop_opr;
++LIR_Opr FrameMap::s2_oop_opr;
++LIR_Opr FrameMap::s3_oop_opr;
++LIR_Opr FrameMap::s4_oop_opr;
++LIR_Opr FrameMap::s5_oop_opr;
++LIR_Opr FrameMap::s6_oop_opr;
++LIR_Opr FrameMap::s7_oop_opr;
++LIR_Opr FrameMap::s8_oop_opr;
++
++LIR_Opr FrameMap::scr1_opr;
++LIR_Opr FrameMap::scr2_opr;
++LIR_Opr FrameMap::scr1_long_opr;
++LIR_Opr FrameMap::scr2_long_opr;
++
++LIR_Opr FrameMap::a0_metadata_opr;
++LIR_Opr FrameMap::a1_metadata_opr;
++LIR_Opr FrameMap::a2_metadata_opr;
++LIR_Opr FrameMap::a3_metadata_opr;
++LIR_Opr FrameMap::a4_metadata_opr;
++LIR_Opr FrameMap::a5_metadata_opr;
++
++LIR_Opr FrameMap::long0_opr;
++LIR_Opr FrameMap::long1_opr;
++LIR_Opr FrameMap::fpu0_float_opr;
++LIR_Opr FrameMap::fpu0_double_opr;
++
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 };
++
++//--------------------------------------------------------
++//               FrameMap
++//--------------------------------------------------------
++
++void FrameMap::initialize() {
++  assert(!_init_done, "once");
++  int i = 0;
++
++  // caller save register
++  map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // callee save register
++  map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // special register
++  map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
++  map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread
++  map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp
++  map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp
++  map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra
++  map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
++
++  // tmp register
++  map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1
++  map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2
++
++  scr1_opr = t7_opr;
++  scr2_opr = t4_opr;
++  scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr());
++  scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr());
++
++  long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr());
++  long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr());
++
++  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
++  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
++
++  // scr1, scr2 not included
++  _caller_save_cpu_regs[0] = a0_opr;
++  _caller_save_cpu_regs[1] = a1_opr;
++  _caller_save_cpu_regs[2] = a2_opr;
++  _caller_save_cpu_regs[3] = a3_opr;
++  _caller_save_cpu_regs[4] = a4_opr;
++  _caller_save_cpu_regs[5] = a5_opr;
++  _caller_save_cpu_regs[6] = a6_opr;
++  _caller_save_cpu_regs[7] = a7_opr;
++  _caller_save_cpu_regs[8] = t0_opr;
++  _caller_save_cpu_regs[9] = t1_opr;
++  _caller_save_cpu_regs[10] = t2_opr;
++  _caller_save_cpu_regs[11] = t3_opr;
++  _caller_save_cpu_regs[12] = t5_opr;
++  _caller_save_cpu_regs[13] = t6_opr;
++  _caller_save_cpu_regs[14] = t8_opr;
++
++  for (int i = 0; i < 8; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++
++  _init_done = true;
++
++  ra_oop_opr = as_oop_opr(RA);
++  a0_oop_opr = as_oop_opr(A0);
++  a1_oop_opr = as_oop_opr(A1);
++  a2_oop_opr = as_oop_opr(A2);
++  a3_oop_opr = as_oop_opr(A3);
++  a4_oop_opr = as_oop_opr(A4);
++  a5_oop_opr = as_oop_opr(A5);
++  a6_oop_opr = as_oop_opr(A6);
++  a7_oop_opr = as_oop_opr(A7);
++  t0_oop_opr = as_oop_opr(T0);
++  t1_oop_opr = as_oop_opr(T1);
++  t2_oop_opr = as_oop_opr(T2);
++  t3_oop_opr = as_oop_opr(T3);
++  t4_oop_opr = as_oop_opr(T4);
++  t5_oop_opr = as_oop_opr(T5);
++  t6_oop_opr = as_oop_opr(T6);
++  t7_oop_opr = as_oop_opr(T7);
++  t8_oop_opr = as_oop_opr(T8);
++  fp_oop_opr = as_oop_opr(FP);
++  s0_oop_opr = as_oop_opr(S0);
++  s1_oop_opr = as_oop_opr(S1);
++  s2_oop_opr = as_oop_opr(S2);
++  s3_oop_opr = as_oop_opr(S3);
++  s4_oop_opr = as_oop_opr(S4);
++  s5_oop_opr = as_oop_opr(S5);
++  s6_oop_opr = as_oop_opr(S6);
++  s7_oop_opr = as_oop_opr(S7);
++  s8_oop_opr = as_oop_opr(S8);
++
++  a0_metadata_opr = as_metadata_opr(A0);
++  a1_metadata_opr = as_metadata_opr(A1);
++  a2_metadata_opr = as_metadata_opr(A2);
++  a3_metadata_opr = as_metadata_opr(A3);
++  a4_metadata_opr = as_metadata_opr(A4);
++  a5_metadata_opr = as_metadata_opr(A5);
++
++  sp_opr = as_pointer_opr(SP);
++  fp_opr = as_pointer_opr(FP);
++
++  VMRegPair regs;
++  BasicType sig_bt = T_OBJECT;
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
++  receiver_opr = as_oop_opr(regs.first()->as_Register());
++
++  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++}
++
++Address FrameMap::make_new_address(ByteSize sp_offset) const {
++  // for sp, based address use this:
++  // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4);
++  return Address(SP, in_bytes(sp_offset));
++}
++
++// ----------------mapping-----------------------
++// all mapping is based on fp addressing, except for simple leaf methods where we access
++// the locals sp based (and no frame is built)
++
++// Frame for simple leaf methods (quick entries)
++//
++//   +----------+
++//   | ret addr |   <- TOS
++//   +----------+
++//   | args     |
++//   | ......   |
++
++// Frame for standard methods
++//
++//   | .........|  <- TOS
++//   | locals   |
++//   +----------+
++//   |  old fp, |  <- RFP
++//   +----------+
++//   | ret addr |
++//   +----------+
++//   |  args    |
++//   | .........|
++
++// For OopMaps, map a local variable or spill index to an VMRegImpl name.
++// This is the offset from sp() in the frame of the slot for the index,
++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
++//
++//           framesize +
++//           stack0         stack0          0  <- VMReg
++//             |              | <registers> |
++//  ...........|..............|.............|
++//      0 1 2 3 x x 4 5 6 ... |                <- local indices
++//      ^           ^        sp()                 ( x x indicate link
++//      |           |                               and return addr)
++//  arguments   non-argument locals
++
++VMReg FrameMap::fpu_regname(int n) {
++  // Return the OptoReg name for the fpu stack slot "n"
++  // A spilled fpu stack slot comprises to two single-word OptoReg's.
++  return as_FloatRegister(n)->as_VMReg();
++}
++
++LIR_Opr FrameMap::stack_pointer() {
++  return FrameMap::sp_opr;
++}
++
++// JSR 292
++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
++  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
++}
++
++bool FrameMap::validate_frame() {
++  return true;
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..38b0daa025
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++
++// ArrayCopyStub needs access to bailout
++friend class ArrayCopyStub;
++
++ private:
++  int array_element_size(BasicType type) const;
++
++  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                int dest_index, bool pop_fpu_stack);
++
++  // helper functions which checks for overflow and sets bailout if it
++  // occurs.  Always returns a valid embeddable pointer but in the
++  // bailout case the pointer won't be to unique storage.
++  address float_constant(float f);
++  address double_constant(double d);
++
++  address int_constant(jlong n);
++
++  bool is_literal_address(LIR_Address* addr);
++
++  // Ensure we have a valid Address (base+offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
++  // Record the type of the receiver in ReceiverTypeData
++  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                           Register recv, Label* update_done);
++  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
++
++  void casw(Register addr, Register newval, Register cmpval, bool sign);
++  void casl(Register addr, Register newval, Register cmpval);
++
++  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
++
++  static const int max_tableswitches = 20;
++  struct tableswitch switches[max_tableswitches];
++  int tableswitch_count;
++
++  void init() { tableswitch_count = 0; }
++
++  void deoptimize_trap(CodeEmitInfo *info);
++
++public:
++  void store_parameter(Register r, int offset_from_sp_in_words);
++  void store_parameter(jint c,     int offset_from_sp_in_words);
++  void store_parameter(jobject c,  int offset_from_sp_in_words);
++
++  enum {
++    // call stub: CompiledStaticCall::to_interp_stub_size() +
++    //            NativeInstruction::nop_instruction_size   +
++    //            NativeCallTrampolineStub::instruction_size
++    call_stub_size = 13 * NativeInstruction::nop_instruction_size,
++    exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
++    deopt_handler_size = 7 * NativeInstruction::nop_instruction_size
++  };
++
++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp
+new file mode 100644
+index 0000000000..ee48326bec
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp
+@@ -0,0 +1,3377 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArrayKlass.hpp"
++#include "ci/ciInstance.hpp"
++#include "code/compiledIC.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++NEEDS_CLEANUP // remove this definitions?
++
++#define __ _masm->
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp2 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2);
++}
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2,
++                                       Register &tmp3) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp2 = extra;
++  } else if (tmp3 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp3 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2, tmp3);
++}
++
++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
++
++LIR_Opr LIR_Assembler::receiverOpr() {
++  return FrameMap::receiver_opr;
++}
++
++LIR_Opr LIR_Assembler::osrBufferPointer() {
++  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
++}
++
++//--------------fpu register translations-----------------------
++
++address LIR_Assembler::float_constant(float f) {
++  address const_addr = __ float_constant(f);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++address LIR_Assembler::double_constant(double d) {
++  address const_addr = __ double_constant(d);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
++void LIR_Assembler::breakpoint() { Unimplemented(); }
++
++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
++
++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
++
++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
++
++static Register as_reg(LIR_Opr op) {
++  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
++}
++
++static jlong as_long(LIR_Opr data) {
++  jlong result;
++  switch (data->type()) {
++  case T_INT:
++    result = (data->as_jint());
++    break;
++  case T_LONG:
++    result = (data->as_jlong());
++    break;
++  default:
++    ShouldNotReachHere();
++    result = 0; // unreachable
++  }
++  return result;
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr) {
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr opr = addr->index();
++  if (opr->is_cpu_register()) {
++    Register index;
++    if (opr->is_single_cpu())
++      index = opr->as_register();
++    else
++      index = opr->as_register_lo();
++    assert(addr->disp() == 0, "must be");
++    return Address(base, index, Address::ScaleFactor(addr->scale()));
++  } else {
++    assert(addr->scale() == 0, "must be");
++    return Address(base, addr->disp());
++  }
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
++  ShouldNotReachHere();
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
++  return as_Address(addr); // Ouch
++  // FIXME: This needs to be much more clever. See x86.
++}
++
++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
++// not encodable as a base + (immediate) offset, generate an explicit address
++// calculation to hold the address in a temporary register.
++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
++  precond(size == 4 || size == 8);
++  Address addr = frame_map()->address_for_slot(index, adjust);
++  precond(addr.index() == noreg);
++  precond(addr.base() == SP);
++  precond(addr.disp() > 0);
++  uint mask = size - 1;
++  assert((addr.disp() & mask) == 0, "scaled offsets only");
++  return addr;
++}
++
++void LIR_Assembler::osr_entry() {
++  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
++  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
++  ValueStack* entry_state = osr_entry->state();
++  int number_of_locks = entry_state->locks_size();
++
++  // we jump here if osr happens with the interpreter
++  // state set up to continue at the beginning of the
++  // loop that triggered osr - in particular, we have
++  // the following registers setup:
++  //
++  // A2: osr buffer
++  //
++
++  // build frame
++  ciMethod* m = compilation()->method();
++  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
++
++  // OSR buffer is
++  //
++  // locals[nlocals-1..0]
++  // monitors[0..number_of_locks]
++  //
++  // locals is a direct copy of the interpreter frame so in the osr buffer
++  // so first slot in the local array is the last local from the interpreter
++  // and last slot is local[0] (receiver) from the interpreter
++  //
++  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
++  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
++  // in the interpreter frame (the method lock if a sync method)
++
++  // Initialize monitors in the compiled activation.
++  //   A2: pointer to osr buffer
++  //
++  // All other registers are dead at this point and the locals will be
++  // copied into place by code emitted in the IR.
++
++  Register OSR_buf = osrBufferPointer()->as_pointer_register();
++  {
++    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
++    int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1);
++    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
++    // the OSR buffer using 2 word entries: first the lock and then
++    // the oop.
++    for (int i = 0; i < number_of_locks; i++) {
++      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
++#ifdef ASSERT
++      // verify the interpreter's monitor has a non-null object
++      {
++        Label L;
++        __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
++        __ bnez(SCR1, L);
++        __ stop("locked object is NULL");
++        __ bind(L);
++      }
++#endif
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0));
++      __ st_ptr(S0, frame_map()->address_for_monitor_lock(i));
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++      __ st_ptr(S0, frame_map()->address_for_monitor_object(i));
++    }
++  }
++}
++
++// inline cache check; done before the frame is built.
++int LIR_Assembler::check_icache() {
++  Register receiver = FrameMap::receiver_opr->as_register();
++  Register ic_klass = IC_Klass;
++  int start_offset = __ offset();
++  Label dont;
++
++  __ verify_oop(receiver);
++
++  // explicit NULL check not needed since load from [klass_offset] causes a trap
++  // check against inline cache
++  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
++         "must add explicit null check");
++
++  __ load_klass(SCR2, receiver);
++  __ beq(SCR2, ic_klass, dont);
++
++  // if icache check fails, then jump to runtime routine
++  // Note: RECEIVER must still contain the receiver!
++  __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++
++  // We align the verified entry point unless the method body
++  // (including its inline cache check) will fit in a single 64-byte
++  // icache line.
++  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
++    // force alignment after the cache check.
++    __ align(CodeEntryAlignment);
++  }
++
++  __ bind(dont);
++  return start_offset;
++}
++
++void LIR_Assembler::jobject2reg(jobject o, Register reg) {
++  if (o == NULL) {
++    __ move(reg, R0);
++  } else {
++    int oop_index = __ oop_recorder()->find_index(o);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(reg, (long)o);
++  }
++}
++
++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
++  deoptimize_trap(info);
++}
++
++// This specifies the rsp decrement needed to build the frame
++int LIR_Assembler::initial_frame_size_in_bytes() const {
++  // if rounding, must let FrameMap know!
++  return in_bytes(frame_map()->framesize_in_bytes());
++}
++
++int LIR_Assembler::emit_exception_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(exception_handler_size);
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("exception handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  // the exception oop and pc are in A0, and A1
++  // no other registers need to be preserved, so invalidate them
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // check that there is really an exception
++  __ verify_not_null_oop(A0);
++
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
++  __ should_not_reach_here();
++  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++// Emit the code to remove the frame from the stack in the exception unwind path.
++int LIR_Assembler::emit_unwind_handler() {
++#ifndef PRODUCT
++  if (CommentedAssembly) {
++    _masm->block_comment("Unwind handler");
++  }
++#endif
++
++  int offset = code_offset();
++
++  // Fetch the exception from TLS and clear out exception related thread state
++  __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset()));
++
++  __ bind(_unwind_handler_entry);
++  __ verify_not_null_oop(V0);
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(S0, V0);  // Preserve the exception
++  }
++
++  // Perform needed unlocking
++  MonitorExitStub* stub = NULL;
++  if (method()->is_synchronized()) {
++    monitor_address(0, FrameMap::a0_opr);
++    stub = new MonitorExitStub(FrameMap::a0_opr, true, 0);
++    __ unlock_object(A5, A4, A0, *stub->entry());
++    __ bind(*stub->continuation());
++  }
++
++  if (compilation()->env()->dtrace_method_probes()) {
++    __ mov_metadata(A1, method()->constant_encoding());
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1);
++  }
++
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(A0, S0);  // Restore the exception
++  }
++
++  // remove the activation and dispatch to the unwind handler
++  __ block_comment("remove_frame and dispatch to the unwind handler");
++  __ remove_frame(initial_frame_size_in_bytes());
++  __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
++
++  // Emit the slow path assembly
++  if (stub != NULL) {
++    stub->emit_code(this);
++  }
++
++  return offset;
++}
++
++int LIR_Assembler::emit_deopt_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(deopt_handler_size);
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("deopt handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
++  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
++void LIR_Assembler::return_op(LIR_Opr result) {
++  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0,
++         "word returns are in V0,");
++
++  // Pop the stack before the safepoint code
++  __ remove_frame(initial_frame_size_in_bytes());
++
++  __ li(SCR2, os::get_polling_page());
++  __ relocate(relocInfo::poll_return_type);
++  __ ld_w(SCR1, SCR2, 0);
++  __ jr(RA);
++}
++
++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  guarantee(info != NULL, "Shouldn't be NULL");
++  __ li(SCR2, os::get_polling_page());
++  add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map
++  __ relocate(relocInfo::poll_type);
++  __ ld_w(SCR1, SCR2, 0);
++  return __ offset();
++}
++
++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
++  __ move(to_reg, from_reg);
++}
++
++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
++
++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(src->is_constant(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++
++  switch (c->type()) {
++    case T_INT:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_ADDRESS:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_LONG:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register_lo(), (intptr_t)c->as_jlong());
++      break;
++    case T_OBJECT:
++      if (patch_code == lir_patch_none) {
++        jobject2reg(c->as_jobject(), dest->as_register());
++      } else {
++        jobject2reg_with_patching(dest->as_register(), info);
++      }
++      break;
++    case T_METADATA:
++      if (patch_code != lir_patch_none) {
++        klass2reg_with_patching(dest->as_register(), info);
++      } else {
++        __ mov_metadata(dest->as_register(), c->as_metadata());
++      }
++      break;
++    case T_FLOAT:
++      __ relocate(relocInfo::internal_word_type);
++      __ patchable_li52(SCR1, (jlong) float_constant(c->as_jfloat()));
++      __ fld_s(dest->as_float_reg(), SCR1, 0);
++      break;
++    case T_DOUBLE:
++      __ relocate(relocInfo::internal_word_type);
++      __ patchable_li52(SCR1, (jlong) double_constant(c->as_jdouble()));
++      __ fld_d(dest->as_double_reg(), SCR1, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
++  LIR_Const* c = src->as_constant_ptr();
++  switch (c->type()) {
++  case T_OBJECT:
++    if (!c->as_jobject())
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++      reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++    }
++    break;
++  case T_ADDRESS:
++    const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++    reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++  case T_INT:
++  case T_FLOAT:
++    if (c->as_jint_bits() == 0)
++      __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      __ li(SCR2, c->as_jint_bits());
++      __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix()));
++    }
++    break;
++  case T_LONG:
++  case T_DOUBLE:
++    if (c->as_jlong_bits() == 0)
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    else {
++      __ li(SCR2, (intptr_t)c->as_jlong_bits());
++      __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
++                              CodeEmitInfo* info, bool wide) {
++  assert(src->is_constant(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++  LIR_Address* to_addr = dest->as_address_ptr();
++
++  void (Assembler::* insn)(Register Rt, Address adr);
++
++  switch (type) {
++  case T_ADDRESS:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_LONG:
++    assert(c->as_jlong() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_INT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_w;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    assert(c->as_jobject() == 0, "should be");
++    if (UseCompressedOops && !wide) {
++      insn = &Assembler::st_w;
++    } else {
++      insn = &Assembler::st_d;
++    }
++    break;
++  case T_CHAR:
++  case T_SHORT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_h;
++    break;
++  case T_BOOLEAN:
++  case T_BYTE:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_b;
++    break;
++  default:
++    ShouldNotReachHere();
++    insn = &Assembler::st_d;  // unreachable
++  }
++
++  if (info) add_debug_info_for_null_check_here(info);
++  (_masm->*insn)(R0, as_Address(to_addr));
++}
++
++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
++  assert(src->is_register(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  // move between cpu-registers
++  if (dest->is_single_cpu()) {
++    if (src->type() == T_LONG) {
++      // Can do LONG -> OBJECT
++      move_regs(src->as_register_lo(), dest->as_register());
++      return;
++    }
++    assert(src->is_single_cpu(), "must match");
++    if (src->type() == T_OBJECT) {
++      __ verify_oop(src->as_register());
++    }
++    move_regs(src->as_register(), dest->as_register());
++  } else if (dest->is_double_cpu()) {
++    if (is_reference_type(src->type())) {
++      // Surprising to me but we can see move of a long to t_object
++      __ verify_oop(src->as_register());
++      move_regs(src->as_register(), dest->as_register_lo());
++      return;
++    }
++    assert(src->is_double_cpu(), "must match");
++    Register f_lo = src->as_register_lo();
++    Register f_hi = src->as_register_hi();
++    Register t_lo = dest->as_register_lo();
++    Register t_hi = dest->as_register_hi();
++    assert(f_hi == f_lo, "must be same");
++    assert(t_hi == t_lo, "must be same");
++    move_regs(f_lo, t_lo);
++  } else if (dest->is_single_fpu()) {
++    __ fmov_s(dest->as_float_reg(), src->as_float_reg());
++  } else if (dest->is_double_fpu()) {
++    __ fmov_d(dest->as_double_reg(), src->as_double_reg());
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
++  precond(src->is_register() && dest->is_stack());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (src->is_single_cpu()) {
++    int index = dest->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(src->as_register());
++    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ st_w(src->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (src->is_double_cpu()) {
++    int index = dest->double_stack_ix();
++    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ st_ptr(src->as_register_lo(), dest_addr_LO);
++  } else if (src->is_single_fpu()) {
++    int index = dest->single_stack_ix();
++    __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (src->is_double_fpu()) {
++    int index = dest->double_stack_ix();
++    __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++  LIR_Address* to_addr = dest->as_address_ptr();
++  PatchingStub* patch = NULL;
++  Register compressed_src = SCR2;
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (is_reference_type(type)) {
++    __ verify_oop(src->as_register());
++
++    if (UseCompressedOops && !wide) {
++      __ encode_heap_oop(compressed_src, src->as_register());
++    } else {
++      compressed_src = src->as_register();
++    }
++  }
++
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fst_s(src->as_float_reg(), as_Address(to_addr));
++      break;
++    case T_DOUBLE:
++      __ fst_d(src->as_double_reg(), as_Address(to_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ st_w(compressed_src, as_Address(to_addr));
++      } else {
++         __ st_ptr(compressed_src, as_Address(to_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_ADDRESS:
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_INT:
++      __ st_w(src->as_register(), as_Address(to_addr));
++      break;
++    case T_LONG:
++      __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr));
++      break;
++    case T_BYTE: // fall through
++    case T_BOOLEAN:
++      __ st_b(src->as_register(), as_Address(to_addr));
++      break;
++    case T_CHAR: // fall through
++    case T_SHORT:
++      __ st_h(src->as_register(), as_Address(to_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (info != NULL) {
++    add_debug_info_for_null_check(null_check_here, info);
++  }
++}
++
++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  precond(src->is_stack() && dest->is_register());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (dest->is_single_cpu()) {
++    int index = src->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(dest->as_register());
++    } else if (type == T_METADATA || type == T_ADDRESS) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (dest->is_double_cpu()) {
++    int index = src->double_stack_ix();
++    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ ld_ptr(dest->as_register_lo(), src_addr_LO);
++  } else if (dest->is_single_fpu()) {
++    int index = src->single_stack_ix();
++    __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (dest->is_double_fpu()) {
++    int index = src->double_stack_ix();
++    __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  LIR_Opr temp;
++
++  if (type == T_LONG || type == T_DOUBLE)
++    temp = FrameMap::scr1_long_opr;
++  else
++    temp = FrameMap::scr1_opr;
++
++  stack2reg(src, temp, src->type());
++  reg2stack(temp, dest, dest->type(), false);
++}
++
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
++  LIR_Address* addr = src->as_address_ptr();
++  LIR_Address* from_addr = src->as_address_ptr();
++
++  if (addr->base()->type() == T_OBJECT) {
++    __ verify_oop(addr->base()->as_pointer_register());
++  }
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fld_s(dest->as_float_reg(), as_Address(from_addr));
++      break;
++    case T_DOUBLE:
++      __ fld_d(dest->as_double_reg(), as_Address(from_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++         __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_ADDRESS:
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_INT:
++      __ ld_w(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_LONG:
++      __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr));
++      break;
++    case T_BYTE:
++      __ ld_b(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_BOOLEAN:
++      __ ld_bu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_CHAR:
++      __ ld_hu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_SHORT:
++      __ ld_h(dest->as_register(), as_Address(from_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  if (is_reference_type(type)) {
++    if (UseCompressedOops && !wide) {
++      __ decode_heap_oop(dest->as_register());
++    }
++
++    // Load barrier has not yet been applied, so ZGC can't verify the oop here
++    __ verify_oop(dest->as_register());
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
++    }
++  }
++}
++
++void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); }
++
++void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); }
++
++int LIR_Assembler::array_element_size(BasicType type) const {
++  int elem_size = type2aelembytes(type);
++  return exact_log2(elem_size);
++}
++
++void LIR_Assembler::emit_op3(LIR_Op3* op) {
++  switch (op->code()) {
++  case lir_idiv:
++  case lir_irem:
++    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
++                    op->result_opr(), op->info());
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++}
++
++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  assert(op->cond() == lir_cond_always, "must be");
++#endif
++
++  if (op->info() != NULL)
++    add_debug_info_for_branch(op->info());
++
++  __ b_far(*(op->label()));
++}
++
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
++#endif
++
++  if (op->info() != NULL) {
++    assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
++           "shouldn't be codeemitinfo for non-address operands");
++    add_debug_info_for_null_check_here(op->info()); // exception possible
++  }
++
++  Label& L = *(op->label());
++  Assembler::Condition acond;
++  LIR_Opr opr1 = op->in_opr1();
++  LIR_Opr opr2 = op->in_opr2();
++  assert(op->condition() != lir_cond_always, "must be");
++
++  if (op->code() == lir_cmp_float_branch) {
++    bool is_unordered = (op->ublock() == op->block());
++    if (opr1->is_single_fpu()) {
++      FloatRegister reg1 = opr1->as_float_reg();
++      assert(opr2->is_single_fpu(), "expect single float register");
++      FloatRegister reg2 = opr2->as_float_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_s(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_s(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else if (opr1->is_double_fpu()) {
++      FloatRegister reg1 = opr1->as_double_reg();
++      assert(opr2->is_double_fpu(), "expect double float register");
++      FloatRegister reg2 = opr2->as_double_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_d(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_d(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ bcnez(FCC0, L);
++  } else {
++    if (opr1->is_constant() && opr2->is_single_cpu()) {
++      // tableswitch
++      Unimplemented();
++    } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
++      Register reg1 = as_reg(opr1);
++      Register reg2 = noreg;
++      jlong imm2 = 0;
++      if (opr2->is_single_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register();
++      } else if (opr2->is_double_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register_lo();
++      } else if (opr2->is_constant()) {
++        switch(opr2->type()) {
++        case T_INT:
++        case T_ADDRESS:
++          imm2 = opr2->as_constant_ptr()->as_jint();
++          break;
++        case T_LONG:
++          imm2 = opr2->as_constant_ptr()->as_jlong();
++          break;
++        case T_METADATA:
++          imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata();
++          break;
++        case T_OBJECT:
++        case T_ARRAY:
++          if (opr2->as_constant_ptr()->as_jobject() != NULL) {
++            reg2 = SCR1;
++            jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2);
++          } else {
++            reg2 = R0;
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++          break;
++        }
++      } else {
++        ShouldNotReachHere();
++      }
++      if (reg2 == noreg) {
++        if (imm2 == 0) {
++          reg2 = R0;
++        } else {
++          reg2 = SCR1;
++          __ li(reg2, imm2);
++        }
++      }
++      switch (op->condition()) {
++        case lir_cond_equal:
++          __ beq_far(reg1, reg2, L); break;
++        case lir_cond_notEqual:
++          __ bne_far(reg1, reg2, L); break;
++        case lir_cond_less:
++          __ blt_far(reg1, reg2, L, true); break;
++        case lir_cond_lessEqual:
++          __ bge_far(reg2, reg1, L, true); break;
++        case lir_cond_greaterEqual:
++          __ bge_far(reg1, reg2, L, true); break;
++        case lir_cond_greater:
++          __ blt_far(reg2, reg1, L, true); break;
++        case lir_cond_belowEqual:
++          __ bge_far(reg2, reg1, L, false); break;
++        case lir_cond_aboveEqual:
++          __ bge_far(reg1, reg2, L, false); break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++  }
++}
++
++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
++  LIR_Opr src  = op->in_opr();
++  LIR_Opr dest = op->result_opr();
++  LIR_Opr tmp  = op->tmp();
++
++  switch (op->bytecode()) {
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(dest->as_float_reg(), src->as_register());
++      __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(dest->as_double_reg(), src->as_register());
++      __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo());
++      __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo());
++      __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg());
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg());
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0);
++      break;
++    case Bytecodes::_i2l:
++      _masm->block_comment("FIXME: This could be a no-op");
++      __ slli_w(dest->as_register_lo(), src->as_register(), 0);
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(dest->as_register(), src->as_register_lo(), 0);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg());
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_double_reg());
++      break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
++  if (op->init_check()) {
++    __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
++    __ li(SCR2, InstanceKlass::fully_initialized);
++    add_debug_info_for_null_check_here(op->stub()->info());
++    __ bne_far(SCR1, SCR2, *op->stub()->entry());
++  }
++  __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(),
++                     op->tmp2()->as_register(), op->header_size(),
++                     op->object_size(), op->klass()->as_register(),
++                     *op->stub()->entry());
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
++  Register len =  op->len()->as_register();
++  if (UseSlowPath ||
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
++    __ b(*op->stub()->entry());
++  } else {
++    Register tmp1 = op->tmp1()->as_register();
++    Register tmp2 = op->tmp2()->as_register();
++    Register tmp3 = op->tmp3()->as_register();
++    if (len == tmp1) {
++      tmp1 = tmp3;
++    } else if (len == tmp2) {
++      tmp2 = tmp3;
++    } else if (len == tmp3) {
++      // everything is ok
++    } else {
++      __ move(tmp3, len);
++    }
++    __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2,
++                      arrayOopDesc::header_size(op->type()),
++                      array_element_size(op->type()),
++                      op->klass()->as_register(),
++                      *op->stub()->entry());
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                                        Register recv, Label* update_done) {
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    // See if the receiver is receiver[n].
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    __ ld_ptr(SCR1, Address(SCR2));
++    __ bne(recv, SCR1, next_test);
++    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
++    __ ld_ptr(SCR2, data_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, data_addr);
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++
++  // Didn't find receiver; find next empty slot and fill it in
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    Address recv_addr(SCR2);
++    __ ld_ptr(SCR1, recv_addr);
++    __ bnez(SCR1, next_test);
++    __ st_ptr(recv, recv_addr);
++    __ li(SCR1, DataLayout::counter_increment);
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
++    __ st_ptr(SCR1, Address(SCR2));
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++}
++
++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success,
++                                          Label* failure, Label* obj_is_null) {
++  // we always need a stub for the failure case.
++  CodeStub* stub = op->stub();
++  Register obj = op->object()->as_register();
++  Register k_RInfo = op->tmp1()->as_register();
++  Register klass_RInfo = op->tmp2()->as_register();
++  Register dst = op->result_opr()->as_register();
++  ciKlass* k = op->klass();
++  Register Rtmp1 = noreg;
++
++  // check if it needs to be profiled
++  ciMethodData* md;
++  ciProfileData* data;
++
++  const bool should_profile = op->should_profile();
++
++  if (should_profile) {
++    ciMethod* method = op->profiled_method();
++    assert(method != NULL, "Should have method");
++    int bci = op->profiled_bci();
++    md = method->method_data_or_null();
++    assert(md != NULL, "Sanity");
++    data = md->bci_to_data(bci);
++    assert(data != NULL, "need data for type check");
++    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++  }
++
++  Label profile_cast_success, profile_cast_failure;
++  Label *success_target = should_profile ? &profile_cast_success : success;
++  Label *failure_target = should_profile ? &profile_cast_failure : failure;
++
++  if (obj == k_RInfo) {
++    k_RInfo = dst;
++  } else if (obj == klass_RInfo) {
++    klass_RInfo = dst;
++  }
++  if (k->is_loaded() && !UseCompressedClassPointers) {
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
++  } else {
++    Rtmp1 = op->tmp3()->as_register();
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
++  }
++
++  assert_different_registers(obj, k_RInfo, klass_RInfo);
++
++  if (should_profile) {
++    Label not_null;
++    __ bnez(obj, not_null);
++    // Object is null; update MDO and exit
++    Register mdo = klass_RInfo;
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++    __ ld_bu(SCR2, data_addr);
++    __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++    __ st_b(SCR2, data_addr);
++    __ b(*obj_is_null);
++    __ bind(not_null);
++  } else {
++    __ beqz(obj, *obj_is_null);
++  }
++
++  if (!k->is_loaded()) {
++    klass2reg_with_patching(k_RInfo, op->info_for_patch());
++  } else {
++    __ mov_metadata(k_RInfo, k->constant_encoding());
++  }
++  __ verify_oop(obj);
++
++  if (op->fast_check()) {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(SCR2, obj);
++    __ bne_far(SCR2, k_RInfo, *failure_target);
++    // successful cast, fall through to profile or jump
++  } else {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(klass_RInfo, obj);
++    if (k->is_loaded()) {
++      // See if we get an immediate positive hit
++      __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset())));
++      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
++        __ bne_far(k_RInfo, SCR1, *failure_target);
++        // successful cast, fall through to profile or jump
++      } else {
++        // See if we get an immediate positive hit
++        __ beq_far(k_RInfo, SCR1, *success_target);
++        // check for self
++        __ beq_far(klass_RInfo, k_RInfo, *success_target);
++
++        __ addi_d(SP, SP, -2 * wordSize);
++        __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++        __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++        __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize));
++        __ addi_d(SP, SP, 2 * wordSize);
++        // result is a boolean
++        __ beqz(klass_RInfo, *failure_target);
++        // successful cast, fall through to profile or jump
++      }
++    } else {
++      // perform the fast part of the checking logic
++      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++      // result is a boolean
++      __ beqz(k_RInfo, *failure_target);
++      // successful cast, fall through to profile or jump
++    }
++  }
++  if (should_profile) {
++    Register mdo = klass_RInfo, recv = k_RInfo;
++    __ bind(profile_cast_success);
++    __ mov_metadata(mdo, md->constant_encoding());
++    __ load_klass(recv, obj);
++    Label update_done;
++    type_profile_helper(mdo, md, data, recv, success);
++    __ b(*success);
++
++    __ bind(profile_cast_failure);
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, -DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++    __ b(*failure);
++  }
++  __ b(*success);
++}
++
++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
++  const bool should_profile = op->should_profile();
++
++  LIR_Code code = op->code();
++  if (code == lir_store_check) {
++    Register value = op->object()->as_register();
++    Register array = op->array()->as_register();
++    Register k_RInfo = op->tmp1()->as_register();
++    Register klass_RInfo = op->tmp2()->as_register();
++    Register Rtmp1 = op->tmp3()->as_register();
++    CodeStub* stub = op->stub();
++
++    // check if it needs to be profiled
++    ciMethodData* md;
++    ciProfileData* data;
++
++    if (should_profile) {
++      ciMethod* method = op->profiled_method();
++      assert(method != NULL, "Should have method");
++      int bci = op->profiled_bci();
++      md = method->method_data_or_null();
++      assert(md != NULL, "Sanity");
++      data = md->bci_to_data(bci);
++      assert(data != NULL, "need data for type check");
++      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++    }
++    Label profile_cast_success, profile_cast_failure, done;
++    Label *success_target = should_profile ? &profile_cast_success : &done;
++    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
++
++    if (should_profile) {
++      Label not_null;
++      __ bnez(value, not_null);
++      // Object is null; update MDO and exit
++      Register mdo = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++      __ ld_bu(SCR2, data_addr);
++      __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++      __ st_b(SCR2, data_addr);
++      __ b(done);
++      __ bind(not_null);
++    } else {
++      __ beqz(value, done);
++    }
++
++    add_debug_info_for_null_check_here(op->info_for_exception());
++    __ load_klass(k_RInfo, array);
++    __ load_klass(klass_RInfo, value);
++
++    // get instance klass (it's already uncompressed)
++    __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
++    // perform the fast part of the checking logic
++    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++    __ addi_d(SP, SP, -2 * wordSize);
++    __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++    __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ addi_d(SP, SP, 2 * wordSize);
++    // result is a boolean
++    __ beqz(k_RInfo, *failure_target);
++    // fall through to the success case
++
++    if (should_profile) {
++      Register mdo = klass_RInfo, recv = k_RInfo;
++      __ bind(profile_cast_success);
++      __ mov_metadata(mdo, md->constant_encoding());
++      __ load_klass(recv, value);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &done);
++      __ b(done);
++
++      __ bind(profile_cast_failure);
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++      __ lea(SCR2, counter_addr);
++      __ ld_ptr(SCR1, Address(SCR2));
++      __ addi_d(SCR1, SCR1, -DataLayout::counter_increment);
++      __ st_ptr(SCR1, Address(SCR2));
++      __ b(*stub->entry());
++    }
++
++    __ bind(done);
++  } else if (code == lir_checkcast) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success;
++    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
++    __ bind(success);
++    if (dst != obj) {
++      __ move(dst, obj);
++    }
++  } else if (code == lir_instanceof) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success, failure, done;
++    emit_typecheck_helper(op, &success, &failure, &failure);
++    __ bind(failure);
++    __ move(dst, R0);
++    __ b(done);
++    __ bind(success);
++    __ li(dst, 1);
++    __ bind(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) {
++  __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign,
++               /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1,
++             /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
++  assert(VM_Version::supports_cx8(), "wrong machine");
++  Register addr;
++  if (op->addr()->is_register()) {
++    addr = as_reg(op->addr());
++  } else {
++    assert(op->addr()->is_address(), "what else?");
++    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
++    assert(addr_ptr->disp() == 0, "need 0 disp");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    addr = as_reg(addr_ptr->base());
++  }
++  Register newval = as_reg(op->new_value());
++  Register cmpval = as_reg(op->cmp_value());
++
++  if (op->code() == lir_cas_obj) {
++    if (UseCompressedOops) {
++      Register t1 = op->tmp1()->as_register();
++      assert(op->tmp1()->is_valid(), "must be");
++      __ encode_heap_oop(t1, cmpval);
++      cmpval = t1;
++      __ encode_heap_oop(SCR2, newval);
++      newval = SCR2;
++      casw(addr, newval, cmpval, false);
++    } else {
++      casl(addr, newval, cmpval);
++    }
++  } else if (op->code() == lir_cas_int) {
++    casw(addr, newval, cmpval, true);
++  } else {
++    casl(addr, newval, cmpval);
++  }
++}
++
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
++                          LIR_Opr result, BasicType type) {
++  Unimplemented();
++}
++
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
++                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result");
++  assert(left->is_single_cpu() || left->is_double_cpu(), "must be");
++  Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register();
++  Register regl = as_reg(left);
++  Register regr = noreg;
++  Register reg1 = noreg;
++  Register reg2 = noreg;
++  jlong immr = 0;
++
++  // comparison operands
++  if (right->is_single_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register();
++  } else if (right->is_double_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register_lo();
++  } else if (right->is_constant()) {
++    switch(right->type()) {
++    case T_INT:
++    case T_ADDRESS:
++      immr = right->as_constant_ptr()->as_jint();
++      break;
++    case T_LONG:
++      immr = right->as_constant_ptr()->as_jlong();
++      break;
++    case T_METADATA:
++      immr = (intptr_t)right->as_constant_ptr()->as_metadata();
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (right->as_constant_ptr()->as_jobject() != NULL) {
++        regr = SCR1;
++        jobject2reg(right->as_constant_ptr()->as_jobject(), regr);
++      } else {
++        immr = 0;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (regr == noreg) {
++    switch (condition) {
++    case lir_cond_equal:
++    case lir_cond_notEqual:
++      if (!Assembler::is_simm(-immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++      break;
++    default:
++      if (!Assembler::is_simm(immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++    }
++  }
++
++  // special cases
++  if (src1->is_constant() && src2->is_constant()) {
++    jlong val1 = 0, val2 = 0;
++    if (src1->type() == T_INT && src2->type() == T_INT) {
++      val1 = src1->as_jint();
++      val2 = src2->as_jint();
++    } else if (src1->type() == T_LONG && src2->type() == T_LONG) {
++      val1 = src1->as_jlong();
++      val2 = src2->as_jlong();
++    }
++    if (val1 == 0 && val2 == 1) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    }
++  }
++
++  // cmp
++  if (regr == noreg) {
++    switch (condition) {
++      case lir_cond_equal:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_notEqual:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_less:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_lessEqual:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greater:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_belowEqual:
++        __ li(SCR1, immr);
++        __ sltu(SCR2, SCR1, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltui(SCR2, regl, immr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    switch (condition) {
++      case lir_cond_equal:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_notEqual:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_less:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_lessEqual:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greater:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_belowEqual:
++        __ sltu(SCR2, regr, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltu(SCR2, regl, regr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // value operands
++  if (src1->is_stack()) {
++    stack2reg(src1, result, result->type());
++    reg1 = regd;
++  } else if (src1->is_constant()) {
++    const2reg(src1, result, lir_patch_none, NULL);
++    reg1 = regd;
++  } else {
++    reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register();
++  }
++
++  if (src2->is_stack()) {
++    stack2reg(src2, FrameMap::scr1_opr, result->type());
++    reg2 = SCR1;
++  } else if (src2->is_constant()) {
++    LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr;
++    const2reg(src2, tmp, lir_patch_none, NULL);
++    reg2 = SCR1;
++  } else {
++    reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register();
++  }
++
++  // cmove
++  switch (condition) {
++    case lir_cond_equal:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_notEqual:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_less:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_lessEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greater:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greaterEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_belowEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_aboveEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  __ OR(regd, regd, SCR2);
++}
++
++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
++                             CodeEmitInfo* info, bool pop_fpu_stack) {
++  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
++
++  if (left->is_single_cpu()) {
++    Register lreg = left->as_register();
++    Register dreg = as_reg(dest);
++
++    if (right->is_single_cpu()) {
++      // cpu register - cpu register
++      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
++      Register rreg = right->as_register();
++      switch (code) {
++        case lir_add: __ add_w (dest->as_register(), lreg, rreg); break;
++        case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break;
++        case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_double_cpu()) {
++      Register rreg = right->as_register_lo();
++      // single_cpu + double_cpu: can happen with obj+long
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      switch (code) {
++        case lir_add: __ add_d(dreg, lreg, rreg); break;
++        case lir_sub: __ sub_d(dreg, lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_constant()) {
++      // cpu register - constant
++      jlong c;
++
++      // FIXME: This is fugly: we really need to factor all this logic.
++      switch(right->type()) {
++        case T_LONG:
++          c = right->as_constant_ptr()->as_jlong();
++          break;
++        case T_INT:
++        case T_ADDRESS:
++          c = right->as_constant_ptr()->as_jint();
++          break;
++        default:
++          ShouldNotReachHere();
++          c = 0; // unreachable
++          break;
++      }
++
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg) {
++        COMMENT("effective nop elided");
++        return;
++      }
++
++      switch(left->type()) {
++        case T_INT:
++          switch (code) {
++            case lir_add: __ addi_w(dreg, lreg, c); break;
++            case lir_sub: __ addi_w(dreg, lreg, -c); break;
++            default:      ShouldNotReachHere();
++          }
++          break;
++        case T_OBJECT:
++        case T_ADDRESS:
++          switch (code) {
++          case lir_add: __ addi_d(dreg, lreg, c); break;
++          case lir_sub: __ addi_d(dreg, lreg, -c); break;
++          default:      ShouldNotReachHere();
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_double_cpu()) {
++    Register lreg_lo = left->as_register_lo();
++
++    if (right->is_double_cpu()) {
++      // cpu register - cpu register
++      Register rreg_lo = right->as_register_lo();
++      switch (code) {
++        case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        default:      ShouldNotReachHere();
++      }
++
++    } else if (right->is_constant()) {
++      jlong c = right->as_constant_ptr()->as_jlong();
++      Register dreg = as_reg(dest);
++      switch (code) {
++        case lir_add:
++        case lir_sub:
++          if (c == 0 && dreg == lreg_lo) {
++            COMMENT("effective nop elided");
++            return;
++          }
++          code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c);
++          break;
++        case lir_div:
++          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move lreg_lo to dreg if divisor is 1
++            __ move(dreg, lreg_lo);
++          } else {
++            unsigned int shift = exact_log2(c);
++            // use scr1 as intermediate result register
++            __ srai_d(SCR1, lreg_lo, 63);
++            __ srli_d(SCR1, SCR1, 64 - shift);
++            __ add_d(SCR1, lreg_lo, SCR1);
++            __ srai_d(dreg, SCR1, shift);
++          }
++          break;
++        case lir_rem:
++          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move 0 to dreg if divisor is 1
++            __ move(dreg, R0);
++          } else {
++            // use scr1/2 as intermediate result register
++            __ sub_d(SCR1, R0, lreg_lo);
++            __ slt(SCR2, SCR1, R0);
++            __ andi(dreg, lreg_lo, c - 1);
++            __ andi(SCR1, SCR1, c - 1);
++            __ sub_d(SCR1, R0, SCR1);
++            __ maskeqz(dreg, dreg, SCR2);
++            __ masknez(SCR1, SCR1, SCR2);
++            __ OR(dreg, dreg, SCR1);
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_fpu()) {
++    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
++    switch (code) {
++      case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_mul_strictfp: // fall through
++      case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_div_strictfp: // fall through
++      case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      default:      ShouldNotReachHere();
++    }
++  } else if (left->is_double_fpu()) {
++    if (right->is_double_fpu()) {
++      // fpu register - fpu register
++      switch (code) {
++        case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_mul_strictfp: // fall through
++        case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_div_strictfp: // fall through
++        case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        default:      ShouldNotReachHere();
++      }
++    } else {
++      if (right->is_constant()) {
++        ShouldNotReachHere();
++      }
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_stack() || left->is_address()) {
++    assert(left == dest, "left and dest must be equal");
++    ShouldNotReachHere();
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                             int dest_index, bool pop_fpu_stack) {
++  Unimplemented();
++}
++
++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
++  switch(code) {
++    case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
++    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
++    default      : ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
++  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
++  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++   if (dst->is_single_cpu()) {
++     Register Rdst = dst->as_register();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jint(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jint());
++           } else {
++             __ li(AT, right->as_jint());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   } else {
++     Register Rdst = dst->as_register_lo();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jlong(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jlong());
++           } else {
++             // We can guarantee that transform from HIR LogicOp is in range of
++             // uimm(12), but the common code directly generates LIR LogicAnd,
++             // and the right-operand is mask with all ones in the high bits.
++             __ li(AT, right->as_jlong());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   }
++}
++
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
++                                    LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
++  // opcode check
++  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
++  bool is_irem = (code == lir_irem);
++
++  // operand check
++  assert(left->is_single_cpu(), "left must be register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
++  assert(result->is_single_cpu(), "result must be register");
++  Register lreg = left->as_register();
++  Register dreg = result->as_register();
++
++  // power-of-2 constant check and codegen
++  if (right->is_constant()) {
++    int c = right->as_constant_ptr()->as_jint();
++    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++    if (is_irem) {
++      if (c == 1) {
++        // move 0 to dreg if divisor is 1
++        __ move(dreg, R0);
++      } else {
++        // use scr1/2 as intermediate result register
++        __ sub_w(SCR1, R0, lreg);
++        __ slt(SCR2, SCR1, R0);
++        __ andi(dreg, lreg, c - 1);
++        __ andi(SCR1, SCR1, c - 1);
++        __ sub_w(SCR1, R0, SCR1);
++        __ maskeqz(dreg, dreg, SCR2);
++        __ masknez(SCR1, SCR1, SCR2);
++        __ OR(dreg, dreg, SCR1);
++      }
++    } else {
++      if (c == 1) {
++        // move lreg to dreg if divisor is 1
++        __ move(dreg, lreg);
++      } else {
++        unsigned int shift = exact_log2(c);
++        // use scr1 as intermediate result register
++        __ srai_w(SCR1, lreg, 31);
++        __ srli_w(SCR1, SCR1, 32 - shift);
++        __ add_w(SCR1, lreg, SCR1);
++        __ srai_w(dreg, SCR1, shift);
++      }
++    }
++  } else {
++    Register rreg = right->as_register();
++    if (is_irem)
++      __ mod_w(dreg, lreg, rreg);
++    else
++      __ div_w(dreg, lreg, rreg);
++  }
++}
++
++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
++  Unimplemented();
++}
++
++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
++  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
++    bool is_unordered_less = (code == lir_ucmp_fd2i);
++    if (left->is_single_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      } else {
++        __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      }
++    } else if (left->is_double_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      } else {
++        __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ movcf2gr(dst->as_register(), FCC0);
++    __ movcf2gr(SCR1, FCC1);
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else if (code == lir_cmp_l2i) {
++    __ slt(SCR1, left->as_register_lo(), right->as_register_lo());
++    __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo());
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::align_call(LIR_Code code) {}
++
++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
++  address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
++  address call = __ ic_call(op->addr());
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
++void LIR_Assembler::emit_static_call_stub() {
++  address call_pc = __ pc();
++  address stub = __ start_a_stub(call_stub_size);
++  if (stub == NULL) {
++    bailout("static call stub overflow");
++    return;
++  }
++
++  int start = __ offset();
++
++  __ relocate(static_stub_Relocation::spec(call_pc));
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++  __ patchable_jump(__ pc());
++
++  assert(__ offset() - start <= call_stub_size, "stub too big");
++  __ end_a_stub();
++}
++
++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  assert(exceptionPC->as_register() == A1, "must match");
++
++  // exception object is not added to oop map by LinearScan
++  // (LinearScan assumes that no oops are in fixed registers)
++  info->add_register_oop(exceptionOop);
++  Runtime1::StubID unwind_id;
++
++  // get current pc information
++  // pc is only needed if the method has an exception handler, the unwind code does not need it.
++  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
++    // As no instructions have been generated yet for this LIR node it's
++    // possible that an oop map already exists for the current offset.
++    // In that case insert an dummy NOP here to ensure all oop map PCs
++    // are unique. See JDK-8237483.
++    __ nop();
++  }
++  Label L;
++  int pc_for_athrow_offset = __ offset();
++  __ bind(L);
++  __ lipc(exceptionPC->as_register(), L);
++  add_call_info(pc_for_athrow_offset, info); // for exception handler
++
++  __ verify_not_null_oop(A0);
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  if (compilation()->has_fpu_code()) {
++    unwind_id = Runtime1::handle_exception_id;
++  } else {
++    unwind_id = Runtime1::handle_exception_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type);
++
++  // FIXME: enough room for two byte trap   ????
++  __ nop();
++}
++
++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  __ b(_unwind_handler_entry);
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ sll_w(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_w(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ sll_d(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_d(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ slli_w(dreg, lreg, count); break;
++        case lir_shr:  __ srai_w(dreg, lreg, count); break;
++        case lir_ushr: __ srli_w(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ slli_d(dreg, lreg, count); break;
++        case lir_shr:  __ srai_d(dreg, lreg, count); break;
++        case lir_ushr: __ srli_d(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ st_ptr(r, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jint c,     int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ li(SCR2, c);
++  __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jobject o,  int offset_from_sp_in_words) {
++  ShouldNotReachHere();
++}
++
++// This code replaces a call to arraycopy; no exception may
++// be thrown in this code, they must be thrown in the System.arraycopy
++// activation frame; we could save some checks if this would not be the case
++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++
++  ciArrayKlass* default_type = op->expected_type();
++  Register src = op->src()->as_register();
++  Register dst = op->dst()->as_register();
++  Register src_pos = op->src_pos()->as_register();
++  Register dst_pos = op->dst_pos()->as_register();
++  Register length  = op->length()->as_register();
++  Register tmp = op->tmp()->as_register();
++
++  CodeStub* stub = op->stub();
++  int flags = op->flags();
++  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
++  if (is_reference_type(basic_type))
++    basic_type = T_OBJECT;
++
++  // if we don't know anything, just go through the generic arraycopy
++  if (default_type == NULL) {
++    Label done;
++    assert(src == T0 && src_pos == A0, "mismatch in calling convention");
++
++    // Save the arguments in case the generic arraycopy fails and we
++    // have to fall back to the JNI stub
++    __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    address copyfunc_addr = StubRoutines::generic_arraycopy();
++
++    // FIXME: LA
++    if (copyfunc_addr == NULL) {
++      // Take a slow path for generic arraycopy.
++      __ b(*stub->entry());
++      __ bind(*stub->continuation());
++      return;
++    }
++
++    // The arguments are in java calling convention so we shift them
++    // to C convention
++    assert_different_registers(A0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
++    __ move(A0, j_rarg0);
++    assert_different_registers(A1, j_rarg2, j_rarg3, j_rarg4);
++    __ move(A1, j_rarg1);
++    assert_different_registers(A2, j_rarg3, j_rarg4);
++    __ move(A2, j_rarg2);
++    assert_different_registers(A3, j_rarg4);
++    __ move(A3, j_rarg3);
++    __ move(A4, j_rarg4);
++#ifndef PRODUCT
++    if (PrintC1Statistics) {
++      __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt);
++      __ increment(SCR2, 1);
++    }
++#endif
++    __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++    __ beqz(A0, *stub->continuation());
++
++    // Reload values from the stack so they are where the stub
++    // expects them.
++    __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    // A0 is -1^K where K == partial copied count
++    __ nor(SCR1, A0, R0);
++    __ slli_w(SCR1, SCR1, 0);
++    // adjust length down and src/end pos up by partial copied count
++    __ sub_w(length, length, SCR1);
++    __ add_w(src_pos, src_pos, SCR1);
++    __ add_w(dst_pos, dst_pos, SCR1);
++    __ b(*stub->entry());
++
++    __ bind(*stub->continuation());
++    return;
++  }
++
++  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
++         "must be true at this point");
++
++  int elem_size = type2aelembytes(basic_type);
++  Address::ScaleFactor scale = Address::times(elem_size);
++
++  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
++  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
++  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
++  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
++
++  // test for NULL
++  if (flags & LIR_OpArrayCopy::src_null_check) {
++    __ beqz(src, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_null_check) {
++    __ beqz(dst, *stub->entry());
++  }
++
++  // If the compiler was not able to prove that exact type of the source or the destination
++  // of the arraycopy is an array type, check at runtime if the source or the destination is
++  // an instance type.
++  if (flags & LIR_OpArrayCopy::type_check) {
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
++      __ load_klass(tmp, dst);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
++      __ load_klass(tmp, src);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++  }
++
++  // check if negative
++  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
++    __ blt_far(src_pos, R0, *stub->entry(), true);
++  }
++  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
++    __ blt_far(dst_pos, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::length_positive_check) {
++    __ blt_far(length, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::src_range_check) {
++    __ add_w(tmp, src_pos, length);
++    __ ld_wu(SCR1, src_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++  if (flags & LIR_OpArrayCopy::dst_range_check) {
++    __ add_w(tmp, dst_pos, length);
++    __ ld_wu(SCR1, dst_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++
++  if (flags & LIR_OpArrayCopy::type_check) {
++    // We don't know the array types are compatible
++    if (basic_type != T_OBJECT) {
++      // Simple test for basic type arrays
++      if (UseCompressedClassPointers) {
++        __ ld_wu(tmp, src_klass_addr);
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(tmp, src_klass_addr);
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne_far(tmp, SCR1, *stub->entry());
++    } else {
++      // For object arrays, if src is a sub class of dst then we can
++      // safely do the copy.
++      Label cont, slow;
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++
++      __ load_klass(src, src);
++      __ load_klass(dst, dst);
++
++      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      __ bnez(dst, cont);
++
++      __ bind(slow);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
++      if (copyfunc_addr != NULL) { // use stub if available
++        // src is not a sub class of dst so we have to do a
++        // per-element check.
++
++        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
++        if ((flags & mask) != mask) {
++          // Check that at least both of them object arrays.
++          assert(flags & mask, "one of the two should be known to be an object array");
++
++          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
++            __ load_klass(tmp, src);
++          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
++            __ load_klass(tmp, dst);
++          }
++          int lh_offset = in_bytes(Klass::layout_helper_offset());
++          Address klass_lh_addr(tmp, lh_offset);
++          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++          __ ld_w(SCR1, klass_lh_addr);
++          __ li(SCR2, objArray_lh);
++          __ XOR(SCR1, SCR1, SCR2);
++          __ bnez(SCR1, *stub->entry());
++        }
++
++        // Spill because stubs can use any register they like and it's
++        // easier to restore just those that we care about.
++        __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        __ lea(A0, Address(src, src_pos, scale));
++        __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A0, dst, dst_pos, length);
++        __ lea(A1, Address(dst, dst_pos, scale));
++        __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A1, dst, length);
++        __ bstrpick_d(A2, length, 31, 0);
++        assert_different_registers(A2, dst);
++
++        __ load_klass(A4, dst);
++        __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset()));
++        __ ld_w(A3, Address(A4, Klass::super_check_offset_offset()));
++        __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          Label failed;
++          __ bnez(A0, failed);
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt);
++          __ increment(SCR2, 1);
++          __ bind(failed);
++        }
++#endif
++
++        __ beqz(A0, *stub->continuation());
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
++          __ increment(SCR2, 1);
++        }
++#endif
++        assert_different_registers(dst, dst_pos, length, src_pos, src, A0, SCR1);
++
++        // Restore previously spilled arguments
++        __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        // return value is -1^K where K is partial copied count
++        __ nor(SCR1, A0, R0);
++        __ slli_w(SCR1, SCR1, 0);
++        // adjust length down and src/end pos up by partial copied count
++        __ sub_w(length, length, SCR1);
++        __ add_w(src_pos, src_pos, SCR1);
++        __ add_w(dst_pos, dst_pos, SCR1);
++      }
++
++      __ b(*stub->entry());
++
++      __ bind(cont);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++    }
++  }
++
++#ifdef ASSERT
++  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
++    // Sanity check the known type with the incoming class.  For the
++    // primitive case the types must match exactly with src.klass and
++    // dst.klass each exactly matching the default type.  For the
++    // object array case, if no type check is needed then either the
++    // dst type is exactly the expected type and the src type is a
++    // subtype which we can't check or src is the same array as dst
++    // but not necessarily exactly of type default_type.
++    Label known_ok, halt;
++    __ mov_metadata(tmp, default_type->constant_encoding());
++    if (UseCompressedClassPointers) {
++      __ encode_klass_not_null(tmp);
++    }
++
++    if (basic_type != T_OBJECT) {
++
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne(tmp, SCR1, halt);
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, src_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, src_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++    } else {
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++      __ beq(src, dst, known_ok);
++    }
++    __ bind(halt);
++    __ stop("incorrect type information in arraycopy");
++    __ bind(known_ok);
++  }
++#endif
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, Runtime1::arraycopy_count_address(basic_type));
++    __ increment(SCR2, 1);
++  }
++#endif
++
++  __ lea(A0, Address(src, src_pos, scale));
++  __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A0, dst, dst_pos, length);
++  __ lea(A1, Address(dst, dst_pos, scale));
++  __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A1, length);
++  __ bstrpick_d(A2, length, 31, 0);
++
++  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
++  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
++  const char *name;
++  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
++
++  CodeBlob *cb = CodeCache::find_blob(entry);
++  if (cb) {
++    __ call(entry, relocInfo::runtime_call_type);
++  } else {
++    __ call_VM_leaf(entry, 3);
++  }
++
++  __ bind(*stub->continuation());
++}
++
++void LIR_Assembler::emit_lock(LIR_OpLock* op) {
++  Register obj = op->obj_opr()->as_register(); // may not be an oop
++  Register hdr = op->hdr_opr()->as_register();
++  Register lock = op->lock_opr()->as_register();
++  if (!UseFastLocking) {
++    __ b(*op->stub()->entry());
++  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    // add debug info for NullPointerException only if one is possible
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    if (op->info() != NULL) {
++      add_debug_info_for_null_check(null_check_offset, op->info());
++    }
++    // done
++  } else if (op->code() == lir_unlock) {
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
++  } else {
++    Unimplemented();
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
++  ciMethod* method = op->profiled_method();
++  ciMethod* callee = op->profiled_callee();
++  int bci = op->profiled_bci();
++
++  // Update counter for all call types
++  ciMethodData* md = method->method_data_or_null();
++  assert(md != NULL, "Sanity");
++  ciProfileData* data = md->bci_to_data(bci);
++  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
++  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
++  Register mdo  = op->mdo()->as_register();
++  __ mov_metadata(mdo, md->constant_encoding());
++  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++  Bytecodes::Code bc = method->java_code_at_bci(bci);
++  const bool callee_is_static = callee->is_loaded() && callee->is_static();
++  // Perform additional virtual call profiling for invokevirtual and
++  // invokeinterface bytecodes
++  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
++      !callee_is_static &&  // required for optimized MH invokes
++      C1ProfileVirtualCalls) {
++    assert(op->recv()->is_single_cpu(), "recv must be allocated");
++    Register recv = op->recv()->as_register();
++    assert_different_registers(mdo, recv);
++    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
++    ciKlass* known_klass = op->known_holder();
++    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
++      // We know the type that will be seen at this call site; we can
++      // statically update the MethodData* rather than needing to do
++      // dynamic tests on the receiver type
++
++      // NOTE: we should probably put a lock around this search to
++      // avoid collisions by concurrent compilations
++      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
++      uint i;
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (known_klass->equals(receiver)) {
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++
++      // Receiver type not found in profile data; select an empty slot
++
++      // Note that this is less efficient than it should be because it
++      // always does a write to the receiver part of the
++      // VirtualCallData rather than just the first time
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (receiver == NULL) {
++          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
++          __ mov_metadata(SCR2, known_klass->constant_encoding());
++          __ lea(SCR1, recv_addr);
++          __ st_ptr(SCR2, SCR1, 0);
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR1, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++    } else {
++      __ load_klass(recv, recv);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &update_done);
++      // Receiver did not match any saved receiver and there is no empty row for it.
++      // Increment total counter to indicate polymorphic case.
++      __ ld_ptr(SCR2, counter_addr);
++      __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++      __ st_ptr(SCR2, counter_addr);
++
++      __ bind(update_done);
++    }
++  } else {
++    // Static call
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++  }
++}
++
++void LIR_Assembler::emit_delay(LIR_OpDelay*) {
++  Unimplemented();
++}
++
++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
++  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
++}
++
++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
++  assert(op->crc()->is_single_cpu(), "crc must be register");
++  assert(op->val()->is_single_cpu(), "byte value must be register");
++  assert(op->result_opr()->is_single_cpu(), "result must be register");
++  Register crc = op->crc()->as_register();
++  Register val = op->val()->as_register();
++  Register res = op->result_opr()->as_register();
++
++  assert_different_registers(val, crc, res);
++  __ li(res, StubRoutines::crc_table_addr());
++  __ nor(crc, crc, R0); // ~crc
++  __ update_byte_crc32(crc, val, res);
++  __ nor(res, crc, R0); // ~crc
++}
++
++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
++  COMMENT("emit_profile_type {");
++  Register obj = op->obj()->as_register();
++  Register tmp = op->tmp()->as_pointer_register();
++  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++  ciKlass* exact_klass = op->exact_klass();
++  intptr_t current_klass = op->current_klass();
++  bool not_null = op->not_null();
++  bool no_conflict = op->no_conflict();
++
++  Label update, next, none;
++
++  bool do_null = !not_null;
++  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++  assert(do_null || do_update, "why are we here?");
++  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++  assert(mdo_addr.base() != SCR1, "wrong register");
++
++  __ verify_oop(obj);
++
++  if (tmp != obj) {
++    __ move(tmp, obj);
++  }
++  if (do_null) {
++    __ bnez(tmp, update);
++    if (!TypeEntries::was_null_seen(current_klass)) {
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::null_seen);
++      __ st_ptr(SCR2, mdo_addr);
++    }
++    if (do_update) {
++#ifndef ASSERT
++      __ b(next);
++    }
++#else
++      __ b(next);
++    }
++  } else {
++    __ bnez(tmp, update);
++    __ stop("unexpected null obj");
++#endif
++  }
++
++  __ bind(update);
++
++  if (do_update) {
++#ifdef ASSERT
++    if (exact_klass != NULL) {
++      Label ok;
++      __ load_klass(tmp, tmp);
++      __ mov_metadata(SCR1, exact_klass->constant_encoding());
++      __ XOR(SCR1, tmp, SCR1);
++      __ beqz(SCR1, ok);
++      __ stop("exact klass and actual klass differ");
++      __ bind(ok);
++    }
++#endif
++    if (!no_conflict) {
++      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++        if (exact_klass != NULL) {
++          __ mov_metadata(tmp, exact_klass->constant_encoding());
++        } else {
++          __ load_klass(tmp, tmp);
++        }
++
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        // klass seen before, nothing to do. The unknown bit may have been
++        // set already but no need to check.
++        __ beqz(SCR1, next);
++
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        if (TypeEntries::is_type_none(current_klass)) {
++          __ beqz(SCR2, none);
++          __ li(SCR1, (u1)TypeEntries::null_seen);
++          __ beq(SCR2, SCR1, none);
++          // There is a chance that the checks above (re-reading profiling
++          // data from memory) fail if another thread has just set the
++          // profiling to this obj's klass
++          membar_acquire();
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(tmp, tmp, SCR2);
++          assert(TypeEntries::type_klass_mask == -4, "must be");
++          __ bstrpick_d(SCR1, tmp, 63, 2);
++          __ beqz(SCR1, next);
++        }
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR2, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR2, next); // already unknown. Nothing to do anymore.
++      }
++
++      // different than before. Cannot keep accurate profile.
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::type_unknown);
++      __ st_ptr(SCR2, mdo_addr);
++
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ b(next);
++
++        __ bind(none);
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      }
++    } else {
++      // There's a single possible klass at this profile point
++      assert(exact_klass != NULL, "should be");
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ mov_metadata(tmp, exact_klass->constant_encoding());
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        __ beqz(SCR1, next);
++#ifdef ASSERT
++        {
++          Label ok;
++          __ ld_ptr(SCR1, mdo_addr);
++          __ beqz(SCR1, ok);
++          __ li(SCR2, (u1)TypeEntries::null_seen);
++          __ beq(SCR1, SCR2, ok);
++          // may have been set by another thread
++          membar_acquire();
++          __ mov_metadata(SCR1, exact_klass->constant_encoding());
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(SCR2, SCR1, SCR2);
++          assert(TypeEntries::type_mask == -2, "must be");
++          __ bstrpick_d(SCR2, SCR2, 63, 1);
++          __ beqz(SCR2, ok);
++
++          __ stop("unexpected profiling mismatch");
++          __ bind(ok);
++        }
++#endif
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        __ ori(tmp, tmp, TypeEntries::type_unknown);
++        __ st_ptr(tmp, mdo_addr);
++        // FIXME: Write barrier needed here?
++      }
++    }
++
++    __ bind(next);
++  }
++  COMMENT("} emit_profile_type");
++}
++
++void LIR_Assembler::align_backward_branch_target() {}
++
++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
++  if (left->is_single_cpu()) {
++    assert(dest->is_single_cpu(), "expect single result reg");
++    __ sub_w(dest->as_register(), R0, left->as_register());
++  } else if (left->is_double_cpu()) {
++    assert(dest->is_double_cpu(), "expect double result reg");
++    __ sub_d(dest->as_register_lo(), R0, left->as_register_lo());
++  } else if (left->is_single_fpu()) {
++    assert(dest->is_single_fpu(), "expect single float result reg");
++    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
++  } else {
++    assert(left->is_double_fpu(), "expect double float operand reg");
++    assert(dest->is_double_fpu(), "expect double float result reg");
++    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
++  }
++}
++
++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
++  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
++}
++
++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args,
++                            LIR_Opr tmp, CodeEmitInfo* info) {
++  assert(!tmp->is_valid(), "don't need temporary");
++  __ call(dest, relocInfo::runtime_call_type);
++  if (info != NULL) {
++    add_call_info_here(info);
++  }
++}
++
++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
++                                     CodeEmitInfo* info) {
++  if (dest->is_address() || src->is_address()) {
++    move_op(src, dest, type, lir_patch_none, info,
++            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#ifdef ASSERT
++// emit run-time assertion
++void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
++  assert(op->code() == lir_assert, "must be");
++  Label ok;
++
++  if (op->in_opr1()->is_valid()) {
++    assert(op->in_opr2()->is_valid(), "both operands must be valid");
++    assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be");
++    Register reg1 = as_reg(op->in_opr1());
++    Register reg2 = as_reg(op->in_opr2());
++    switch (op->condition()) {
++      case lir_cond_equal:        __  beq(reg1, reg2, ok); break;
++      case lir_cond_notEqual:     __  bne(reg1, reg2, ok); break;
++      case lir_cond_less:         __  blt(reg1, reg2, ok); break;
++      case lir_cond_lessEqual:    __  bge(reg2, reg1, ok); break;
++      case lir_cond_greaterEqual: __  bge(reg1, reg2, ok); break;
++      case lir_cond_greater:      __  blt(reg2, reg1, ok); break;
++      case lir_cond_belowEqual:   __ bgeu(reg2, reg1, ok); break;
++      case lir_cond_aboveEqual:   __ bgeu(reg1, reg2, ok); break;
++      default:                    ShouldNotReachHere();
++    }
++  } else {
++    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
++    assert(op->condition() == lir_cond_always, "no other conditions allowed");
++  }
++  if (op->halt()) {
++    const char* str = __ code_string(op->msg());
++    __ stop(str);
++  } else {
++    breakpoint();
++  }
++  __ bind(ok);
++}
++#endif
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++void LIR_Assembler::membar() {
++  COMMENT("membar");
++  __ membar(Assembler::AnyAny);
++}
++
++void LIR_Assembler::membar_acquire() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore));
++}
++
++void LIR_Assembler::membar_release() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore));
++}
++
++void LIR_Assembler::membar_loadload() {
++  __ membar(Assembler::LoadLoad);
++}
++
++void LIR_Assembler::membar_storestore() {
++  __ membar(MacroAssembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadstore() {
++  __ membar(MacroAssembler::LoadStore);
++}
++
++void LIR_Assembler::membar_storeload() {
++  __ membar(MacroAssembler::StoreLoad);
++}
++
++void LIR_Assembler::get_thread(LIR_Opr result_reg) {
++  __ move(result_reg->as_register(), TREG);
++}
++
++void LIR_Assembler::peephole(LIR_List *lir) {
++}
++
++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data,
++                              LIR_Opr dest, LIR_Opr tmp_op) {
++  Address addr = as_Address(src->as_address_ptr());
++  BasicType type = src->type();
++  Register dst = as_reg(dest);
++  Register tmp = as_reg(tmp_op);
++  bool is_oop = is_reference_type(type);
++
++  if (Assembler::is_simm(addr.disp(), 12)) {
++    __ addi_d(tmp, addr.base(), addr.disp());
++  } else {
++    __ li(tmp, addr.disp());
++    __ add_d(tmp, addr.base(), tmp);
++  }
++  if (addr.index() != noreg) {
++    if (addr.scale() > Address::times_1)
++      __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1);
++    else
++      __ add_d(tmp, tmp, addr.index());
++  }
++
++  switch(type) {
++  case T_INT:
++    break;
++  case T_LONG:
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    if (UseCompressedOops) {
++      // unsigned int
++    } else {
++      // long
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++
++  if (code == lir_xadd) {
++    Register inc = noreg;
++    if (data->is_constant()) {
++      inc = SCR1;
++      __ li(inc, as_long(data));
++    } else {
++      inc = as_reg(data);
++    }
++    switch(type) {
++    case T_INT:
++      __ amadd_db_w(dst, inc, tmp);
++      break;
++    case T_LONG:
++      __ amadd_db_d(dst, inc, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amadd_db_w(dst, inc, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amadd_db_d(dst, inc, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  } else if (code == lir_xchg) {
++    Register obj = as_reg(data);
++    if (is_oop && UseCompressedOops) {
++      __ encode_heap_oop(SCR2, obj);
++      obj = SCR2;
++    }
++    switch(type) {
++    case T_INT:
++      __ amswap_db_w(dst, obj, tmp);
++      break;
++    case T_LONG:
++      __ amswap_db_d(dst, obj, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amswap_db_w(dst, obj, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amswap_db_d(dst, obj, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++    if (is_oop && UseCompressedOops) {
++      __ decode_heap_oop(dst);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#undef __
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp
+new file mode 100644
+index 0000000000..7cb15f689f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp
+@@ -0,0 +1,1442 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_LIRGenerator.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArray.hpp"
++#include "ci/ciObjArrayKlass.hpp"
++#include "ci/ciTypeArrayKlass.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++#define __ gen()->lir(__FILE__, __LINE__)->
++#else
++#define __ gen()->lir()->
++#endif
++
++// Item will be loaded into a byte register; Intel only
++void LIRItem::load_byte_item() {
++  load_item();
++}
++
++void LIRItem::load_nonconstant() {
++  LIR_Opr r = value()->operand();
++  if (r->is_constant()) {
++    _result = r;
++  } else {
++    load_item();
++  }
++}
++
++//--------------------------------------------------------------
++//               LIRGenerator
++//--------------------------------------------------------------
++
++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; }
++LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::a1_opr; }
++LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::a0_opr; }
++LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
++
++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
++  LIR_Opr opr;
++  switch (type->tag()) {
++    case intTag:    opr = FrameMap::a0_opr;          break;
++    case objectTag: opr = FrameMap::a0_oop_opr;      break;
++    case longTag:   opr = FrameMap::long0_opr;       break;
++    case floatTag:  opr = FrameMap::fpu0_float_opr;  break;
++    case doubleTag: opr = FrameMap::fpu0_double_opr; break;
++    case addressTag:
++    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
++  }
++
++  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
++  return opr;
++}
++
++LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
++  LIR_Opr reg = new_register(T_INT);
++  set_vreg_flag(reg, LIRGenerator::byte_reg);
++  return reg;
++}
++
++//--------- loading items into registers --------------------------------
++
++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
++  if (v->type()->as_IntConstant() != NULL) {
++    return v->type()->as_IntConstant()->value() == 0L;
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(Value v) const {
++  // FIXME: Just a guess
++  if (v->type()->as_IntConstant() != NULL) {
++    return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12);
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
++
++LIR_Opr LIRGenerator::safepoint_poll_register() {
++  return LIR_OprFact::illegalOpr;
++}
++
++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
++                                            int shift, int disp, BasicType type) {
++  assert(base->is_register(), "must be");
++  intx large_disp = disp;
++
++  // accumulate fixed displacements
++  if (index->is_constant()) {
++    LIR_Const *constant = index->as_constant_ptr();
++    if (constant->type() == T_INT) {
++      large_disp += index->as_jint() << shift;
++    } else {
++      assert(constant->type() == T_LONG, "should be");
++      jlong c = index->as_jlong() << shift;
++      if ((jlong)((jint)c) == c) {
++        large_disp += c;
++        index = LIR_OprFact::illegalOpr;
++      } else {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ move(index, tmp);
++        index = tmp;
++        // apply shift and displacement below
++      }
++    }
++  }
++
++  if (index->is_register()) {
++    // apply the shift and accumulate the displacement
++    if (shift > 0) {
++      LIR_Opr tmp = new_pointer_register();
++      __ shift_left(index, shift, tmp);
++      index = tmp;
++    }
++    if (large_disp != 0) {
++      LIR_Opr tmp = new_pointer_register();
++      if (Assembler::is_simm(large_disp, 12)) {
++        __ add(index, LIR_OprFact::intptrConst(large_disp), tmp);
++        index = tmp;
++      } else {
++        __ move(LIR_OprFact::intptrConst(large_disp), tmp);
++        __ add(tmp, index, tmp);
++        index = tmp;
++      }
++      large_disp = 0;
++    }
++  } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) {
++    // index is illegal so replace it with the displacement loaded into a register
++    index = new_pointer_register();
++    __ move(LIR_OprFact::intptrConst(large_disp), index);
++    large_disp = 0;
++  }
++
++  // at this point we either have base + index or base + displacement
++  if (large_disp == 0 && index->is_register()) {
++    return new LIR_Address(base, index, type);
++  } else {
++    assert(Assembler::is_simm(large_disp, 12), "must be");
++    return new LIR_Address(base, large_disp, type);
++  }
++}
++
++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark) {
++  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
++  int elem_size = type2aelembytes(type);
++  int shift = exact_log2(elem_size);
++
++  LIR_Address* addr;
++  if (index_opr->is_constant()) {
++    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
++  } else {
++    if (offset_in_bytes) {
++      LIR_Opr tmp = new_pointer_register();
++      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
++      array_opr = tmp;
++      offset_in_bytes = 0;
++    }
++    addr =  new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
++  }
++  if (needs_card_mark) {
++    // This store will need a precise card mark, so go ahead and
++    // compute the full adddres instead of computing once for the
++    // store and again for the card mark.
++    LIR_Opr tmp = new_pointer_register();
++    __ leal(LIR_OprFact::address(addr), tmp);
++    return new LIR_Address(tmp, type);
++  } else {
++    return addr;
++  }
++}
++
++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
++  if (type == T_LONG) {
++    r = LIR_OprFact::longConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else if (type == T_INT) {
++    r = LIR_OprFact::intConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      // This is all rather nasty.  We don't know whether our constant
++      // is required for a logical or an arithmetic operation, wo we
++      // don't know what the range of valid values is!!
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else {
++    ShouldNotReachHere();
++    r = NULL;  // unreachable
++  }
++  return r;
++}
++
++void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
++  LIR_Opr pointer = new_pointer_register();
++  __ move(LIR_OprFact::intptrConst(counter), pointer);
++  LIR_Address* addr = new LIR_Address(pointer, type);
++  increment_counter(addr, step);
++}
++
++void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
++  LIR_Opr imm = NULL;
++  switch(addr->type()) {
++  case T_INT:
++    imm = LIR_OprFact::intConst(step);
++    break;
++  case T_LONG:
++    imm = LIR_OprFact::longConst(step);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  LIR_Opr reg = new_register(addr->type());
++  __ load(addr, reg);
++  __ add(reg, imm, reg);
++  __ store(reg, addr);
++}
++
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base,
++                                      int disp, int c, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg = new_register(T_INT);
++  __ load(generate_address(base, disp, T_INT), reg, info);
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
++                                      int disp, BasicType type, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg1 = new_register(T_INT);
++  __ load(generate_address(base, disp, type), reg1, info);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
++  if (is_power_of_2(c - 1)) {
++    __ shift_left(left, exact_log2(c - 1), tmp);
++    __ add(tmp, left, result);
++    return true;
++  } else if (is_power_of_2(c + 1)) {
++    __ shift_left(left, exact_log2(c + 1), tmp);
++    __ sub(tmp, left, result);
++    return true;
++  } else {
++    return false;
++  }
++}
++
++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
++  BasicType type = item->type();
++  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
++}
++
++//----------------------------------------------------------------------
++//             visitor functions
++//----------------------------------------------------------------------
++
++void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
++  assert(x->is_pinned(),"");
++  bool needs_range_check = x->compute_needs_range_check();
++  bool use_length = x->length() != NULL;
++  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
++  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
++                                         !get_jobject_constant(x->value())->is_null_object() ||
++                                         x->should_profile());
++
++  LIRItem array(x->array(), this);
++  LIRItem index(x->index(), this);
++  LIRItem value(x->value(), this);
++  LIRItem length(this);
++
++  array.load_item();
++  index.load_nonconstant();
++
++  if (use_length && needs_range_check) {
++    length.set_instruction(x->length());
++    length.load_item();
++
++  }
++  if (needs_store_check || x->check_boolean()) {
++    value.load_item();
++  } else {
++    value.load_for_store(x->elt_type());
++  }
++
++  set_no_result(x);
++
++  // the CodeEmitInfo must be duplicated for each different
++  // LIR-instruction because spilling can occur anywhere between two
++  // instructions and so the debug information must be different
++  CodeEmitInfo* range_check_info = state_for(x);
++  CodeEmitInfo* null_check_info = NULL;
++  if (x->needs_null_check()) {
++    null_check_info = new CodeEmitInfo(range_check_info);
++  }
++
++  // emit array address setup early so it schedules better
++  // FIXME?  No harm in this on aarch64, and it might help
++  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
++
++  if (GenerateRangeChecks && needs_range_check) {
++    if (use_length) {
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), x->elt_type(), new RangeCheckStub(range_check_info, index.result()));
++    } else {
++      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
++      // range_check also does the null check
++      null_check_info = NULL;
++    }
++  }
++
++  if (GenerateArrayStoreCheck && needs_store_check) {
++    LIR_Opr tmp1 = new_register(objectType);
++    LIR_Opr tmp2 = new_register(objectType);
++    LIR_Opr tmp3 = new_register(objectType);
++
++    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
++    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
++  }
++
++  if (obj_store) {
++    // Needs GC write barriers.
++    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
++                true /* do_load */, false /* patch */, NULL);
++    __ move(value.result(), array_addr, null_check_info);
++    // Seems to be a precise
++    post_barrier(LIR_OprFact::address(array_addr), value.result());
++  } else {
++    LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
++    __ move(result, array_addr, null_check_info);
++  }
++}
++
++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
++  assert(x->is_pinned(),"");
++  LIRItem obj(x->obj(), this);
++  obj.load_item();
++
++  set_no_result(x);
++
++  // "lock" stores the address of the monitor stack slot, so this is not an oop
++  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
++
++  CodeEmitInfo* info_for_exception = NULL;
++  if (x->needs_null_check()) {
++    info_for_exception = state_for(x);
++  }
++  // this CodeEmitInfo must not have the xhandlers because here the
++  // object is already locked (xhandlers expect object to be unlocked)
++  CodeEmitInfo* info = state_for(x, x->state(), true);
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
++                x->monitor_no(), info_for_exception, info);
++}
++
++void LIRGenerator::do_MonitorExit(MonitorExit* x) {
++  assert(x->is_pinned(),"");
++
++  LIRItem obj(x->obj(), this);
++  obj.dont_load_item();
++
++  LIR_Opr lock = new_register(T_INT);
++  LIR_Opr obj_temp = new_register(T_INT);
++  set_no_result(x);
++  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
++}
++
++void LIRGenerator::do_NegateOp(NegateOp* x) {
++  LIRItem from(x->x(), this);
++  from.load_item();
++  LIR_Opr result = rlock_result(x);
++  __ negate (from.result(), result);
++}
++
++// for  _fadd, _fmul, _fsub, _fdiv, _frem
++//      _dadd, _dmul, _dsub, _ddiv, _drem
++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
++  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
++    // float remainder is implemented as a direct call into the runtime
++    LIRItem right(x->x(), this);
++    LIRItem left(x->y(), this);
++
++    BasicTypeList signature(2);
++    if (x->op() == Bytecodes::_frem) {
++      signature.append(T_FLOAT);
++      signature.append(T_FLOAT);
++    } else {
++      signature.append(T_DOUBLE);
++      signature.append(T_DOUBLE);
++    }
++    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++
++    const LIR_Opr result_reg = result_register_for(x->type());
++    left.load_item_force(cc->at(1));
++    right.load_item();
++
++    __ move(right.result(), cc->at(0));
++
++    address entry;
++    if (x->op() == Bytecodes::_frem) {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
++    } else {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
++    }
++
++    LIR_Opr result = rlock_result(x);
++    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
++    __ move(result_reg, result);
++    return;
++  }
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg  = &left;
++  LIRItem* right_arg = &right;
++
++  // Always load right hand side.
++  right.load_item();
++
++  if (!left.is_register())
++    left.load_item();
++
++  LIR_Opr reg = rlock(x);
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
++
++  set_result(x, round_item(reg));
++}
++
++// for  _ladd, _lmul, _lsub, _ldiv, _lrem
++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
++  // missing test if instr is commutative and if we should swap
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++
++  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
++    left.load_item();
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jlong c = right.get_jlong_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right.dont_load_item();
++      } else {
++        right.load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub);
++    }
++
++    rlock_result(x);
++    switch (x->op()) {
++    case Bytecodes::_lrem:
++      __ rem (left.result(), right.result(), x->operand());
++      break;
++    case Bytecodes::_ldiv:
++      __ div (left.result(), right.result(), x->operand());
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
++           "expect lmul, ladd or lsub");
++    // add, sub, mul
++    left.load_item();
++    if (!right.is_register()) {
++      if (x->op() == Bytecodes::_lmul || !right.is_constant() ||
++          (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) ||
++          (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) {
++        right.load_item();
++      } else { // add, sub
++        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
++        // don't load constants to save register
++        right.load_nonconstant();
++      }
++    }
++    rlock_result(x);
++    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
++  }
++}
++
++// for: _iadd, _imul, _isub, _idiv, _irem
++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
++  // Test if instr is commutative and if we should swap
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg = &left;
++  LIRItem* right_arg = &right;
++  if (x->is_commutative() && left.is_stack() && right.is_register()) {
++    // swap them if left is real stack (or cached) and right is real register(not cached)
++    left_arg = &right;
++    right_arg = &left;
++  }
++
++  left_arg->load_item();
++
++  // do not need to load right, as we can handle stack and constants
++  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
++    rlock_result(x);
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right_arg->dont_load_item();
++      } else {
++        right_arg->load_item();
++      }
++    } else {
++      right_arg->load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub);
++    }
++
++    LIR_Opr ill = LIR_OprFact::illegalOpr;
++    if (x->op() == Bytecodes::_irem) {
++      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    } else if (x->op() == Bytecodes::_idiv) {
++      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    }
++  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
++    if (right.is_constant() &&
++        ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) ||
++         (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) {
++      right.load_nonconstant();
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
++  } else {
++    assert (x->op() == Bytecodes::_imul, "expect imul");
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
++        right_arg->dont_load_item();
++      } else {
++        // Cannot use constant op.
++        right_arg->load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
++  }
++}
++
++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
++  // when an operand with use count 1 is the left operand, then it is
++  // likely that no move for 2-operand-LIR-form is necessary
++  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
++    x->swap_operands();
++  }
++
++  ValueTag tag = x->type()->tag();
++  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
++  switch (tag) {
++    case floatTag:
++    case doubleTag: do_ArithmeticOp_FPU(x);  return;
++    case longTag:   do_ArithmeticOp_Long(x); return;
++    case intTag:    do_ArithmeticOp_Int(x);  return;
++    default:        ShouldNotReachHere();    return;
++  }
++}
++
++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
++void LIRGenerator::do_ShiftOp(ShiftOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()) {
++    right.dont_load_item();
++    int c;
++    switch (x->op()) {
++      case Bytecodes::_ishl:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_ishr:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_iushr:
++        c = right.get_jint_constant() & 0x1f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshl:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshr:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lushr:
++        c = right.get_jint_constant() & 0x3f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    right.load_item();
++    LIR_Opr tmp = new_register(T_INT);
++    switch (x->op()) {
++    case Bytecodes::_ishl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_ishr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_iushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _iand, _land, _ior, _lor, _ixor, _lxor
++void LIRGenerator::do_LogicOp(LogicOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()
++      && ((right.type()->tag() == intTag
++           && Assembler::is_uimm(right.get_jint_constant(), 12))
++          || (right.type()->tag() == longTag
++              && Assembler::is_uimm(right.get_jlong_constant(), 12)))) {
++    right.dont_load_item();
++  } else {
++    right.load_item();
++  }
++  switch (x->op()) {
++    case Bytecodes::_iand:
++    case Bytecodes::_land:
++      __ logical_and(left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ior:
++    case Bytecodes::_lor:
++      __ logical_or (left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ixor:
++    case Bytecodes::_lxor:
++      __ logical_xor(left.result(), right.result(), x->operand()); break;
++    default: Unimplemented();
++  }
++}
++
++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
++void LIRGenerator::do_CompareOp(CompareOp* x) {
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++  ValueTag tag = x->x()->type()->tag();
++  if (tag == longTag) {
++    left.set_destroys_register();
++  }
++  left.load_item();
++  right.load_item();
++  LIR_Opr reg = rlock_result(x);
++
++  if (x->x()->type()->is_float_kind()) {
++    Bytecodes::Code code = x->op();
++    __ fcmp2int(left.result(), right.result(), reg,
++                (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
++  } else if (x->x()->type()->tag() == longTag) {
++    __ lcmp2int(left.result(), right.result(), reg);
++  } else {
++    Unimplemented();
++  }
++}
++
++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
++  LIRItem value(x->argument_at(0), this);
++  value.set_destroys_register();
++
++  LIR_Opr calc_result = rlock_result(x);
++  LIR_Opr result_reg = result_register_for(x->type());
++
++  CallingConvention* cc = NULL;
++
++  if (x->id() == vmIntrinsics::_dpow) {
++    LIRItem value1(x->argument_at(1), this);
++
++    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++  }
++
++  switch (x->id()) {
++    case vmIntrinsics::_dexp:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dlog:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dlog10:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dpow:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dsin:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dcos:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
++      break;
++    case vmIntrinsics::_dtan:
++      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
++      break;
++    default:  ShouldNotReachHere();
++  }
++  __ move(result_reg, calc_result);
++}
++
++void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
++  assert(x->number_of_arguments() == 4, "wrong type");
++  LIRItem obj   (x->argument_at(0), this);  // object
++  LIRItem offset(x->argument_at(1), this);  // offset of field
++  LIRItem cmp   (x->argument_at(2), this);  // value to compare with field
++  LIRItem val   (x->argument_at(3), this);  // replace field with val if matches cmp
++
++  assert(obj.type()->tag() == objectTag, "invalid type");
++
++  // In 64bit the type can be long, sparc doesn't have this assert
++  // assert(offset.type()->tag() == intTag, "invalid type");
++
++  assert(cmp.type()->tag() == type->tag(), "invalid type");
++  assert(val.type()->tag() == type->tag(), "invalid type");
++
++  // get address of field
++  obj.load_item();
++  offset.load_nonconstant();
++  val.load_item();
++  cmp.load_item();
++
++  LIR_Address* a;
++  if(offset.result()->is_constant()) {
++    jlong c = offset.result()->as_jlong();
++    if ((jlong)((jint)c) == c) {
++      a = new LIR_Address(obj.result(),
++                          (jint)c,
++                          as_BasicType(type));
++    } else {
++      LIR_Opr tmp = new_register(T_LONG);
++      __ move(offset.result(), tmp);
++      a = new LIR_Address(obj.result(),
++                          tmp,
++                          as_BasicType(type));
++    }
++  } else {
++    a = new LIR_Address(obj.result(),
++                        offset.result(),
++                        LIR_Address::times_1,
++                        0,
++                        as_BasicType(type));
++  }
++  LIR_Opr addr = new_pointer_register();
++  __ leal(LIR_OprFact::address(a), addr);
++
++  if (type == objectType) {  // Write-barrier needed for Object fields.
++    // Do the pre-write barrier, if any.
++    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
++                true /* do_load */, false /* patch */, NULL);
++  }
++
++  LIR_Opr result = rlock_result(x);
++
++  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
++  if (type == objectType)
++    __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT),
++               result);
++  else if (type == intType)
++    __ cas_int(addr, cmp.result(), val.result(), ill, ill);
++  else if (type == longType)
++    __ cas_long(addr, cmp.result(), val.result(), ill, ill);
++  else {
++    ShouldNotReachHere();
++  }
++
++  __ move(FrameMap::scr1_opr, result);
++
++  if (type == objectType) {   // Write-barrier needed for Object fields.
++    // Seems to be precise
++    post_barrier(addr, val.result());
++  }
++}
++
++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
++         "wrong type");
++  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
++      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
++      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
++      x->id() == vmIntrinsics::_dlog10) {
++    do_LibmIntrinsic(x);
++    return;
++  }
++  switch (x->id()) {
++    case vmIntrinsics::_dabs:
++    case vmIntrinsics::_dsqrt: {
++      assert(x->number_of_arguments() == 1, "wrong type");
++      LIRItem value(x->argument_at(0), this);
++      value.load_item();
++      LIR_Opr dst = rlock_result(x);
++
++      switch (x->id()) {
++        case vmIntrinsics::_dsqrt:
++          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        case vmIntrinsics::_dabs:
++          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
++  Register j_rarg0 = RT0;
++  Register j_rarg1 = RA0;
++  Register j_rarg2 = RA1;
++  Register j_rarg3 = RA2;
++  Register j_rarg4 = RA3;
++  Register j_rarg5 = RA4;
++
++  assert(x->number_of_arguments() == 5, "wrong type");
++
++  // Make all state_for calls early since they can emit code
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem src(x->argument_at(0), this);
++  LIRItem src_pos(x->argument_at(1), this);
++  LIRItem dst(x->argument_at(2), this);
++  LIRItem dst_pos(x->argument_at(3), this);
++  LIRItem length(x->argument_at(4), this);
++
++  // operands for arraycopy must use fixed registers, otherwise
++  // LinearScan will fail allocation (because arraycopy always needs a
++  // call)
++
++  // The java calling convention will give us enough registers
++  // so that on the stub side the args will be perfect already.
++  // On the other slow/special case side we call C and the arg
++  // positions are not similar enough to pick one as the best.
++  // Also because the java calling convention is a "shifted" version
++  // of the C convention we can process the java args trivially into C
++  // args without worry of overwriting during the xfer
++
++  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
++  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
++  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
++  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
++  length.load_item_force  (FrameMap::as_opr(j_rarg4));
++
++  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
++
++  set_no_result(x);
++
++  int flags;
++  ciArrayKlass* expected_type;
++  arraycopy_helper(x, &flags, &expected_type);
++
++  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
++               length.result(), tmp, expected_type, flags, info); // does add_safepoint
++}
++
++void LIRGenerator::do_update_CRC32(Intrinsic* x) {
++  assert(UseCRC32Intrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateCRC32: {
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem val(x->argument_at(1), this);
++      // val is destroyed by update_crc32
++      val.set_destroys_register();
++      crc.load_item();
++      val.load_item();
++      __ update_crc32(crc.result(), val.result(), result);
++      break;
++    }
++    case vmIntrinsics::_updateBytesCRC32:
++    case vmIntrinsics::_updateByteBufferCRC32: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem len(x->argument_at(3), this);
++      buf.load_item();
++      off.load_nonconstant();
++
++      LIR_Opr index = off.result();
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++       offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, LIR_Address::times_1, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      len.load_item_force(cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
++// _i2b, _i2c, _i2s
++void LIRGenerator::do_Convert(Convert* x) {
++  LIRItem value(x->value(), this);
++  value.load_item();
++  LIR_Opr input = value.result();
++  LIR_Opr result = rlock(x);
++
++  // arguments of lir_convert
++  LIR_Opr conv_input = input;
++  LIR_Opr conv_result = result;
++
++  switch (x->op()) {
++    case Bytecodes::_f2i:
++    case Bytecodes::_f2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT));
++      break;
++    case Bytecodes::_d2i:
++    case Bytecodes::_d2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE));
++      break;
++    default:
++      __ convert(x->op(), conv_input, conv_result);
++      break;
++  }
++
++  assert(result->is_virtual(), "result must be virtual register");
++  set_result(x, result);
++}
++
++void LIRGenerator::do_NewInstance(NewInstance* x) {
++#ifndef PRODUCT
++  if (PrintNotLoaded && !x->klass()->is_loaded()) {
++    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
++  }
++#endif
++  CodeEmitInfo* info = state_for(x, x->state());
++  LIR_Opr reg = result_register_for(x->type());
++  new_instance(reg, x->klass(), x->is_unresolved(),
++                       FrameMap::t0_oop_opr,
++                       FrameMap::t1_oop_opr,
++                       FrameMap::a4_oop_opr,
++                       LIR_OprFact::illegalOpr,
++                       FrameMap::a3_metadata_opr, info);
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem length(x->length(), this);
++  length.load_item_force(FrameMap::s0_opr);
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++  LIR_Opr len = length.result();
++  BasicType elem_type = x->elt_type();
++
++  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
++
++  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
++  LIRItem length(x->length(), this);
++  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
++  // and therefore provide the state before the parameters have been consumed
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info =  state_for(x, x->state_before());
++  }
++
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++
++  length.load_item_force(FrameMap::s0_opr);
++  LIR_Opr len = length.result();
++
++  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
++  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
++  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
++    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
++  }
++  klass2reg_with_patching(klass_reg, obj, patching_info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
++  Values* dims = x->dims();
++  int i = dims->length();
++  LIRItemList* items = new LIRItemList(i, NULL);
++  while (i-- > 0) {
++    LIRItem* size = new LIRItem(dims->at(i), this);
++    items->at_put(i, size);
++  }
++
++  // Evaluate state_for early since it may emit code.
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info = state_for(x, x->state_before());
++
++    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
++    // clone all handlers (NOTE: Usually this is handled transparently
++    // by the CodeEmitInfo cloning logic in CodeStub constructors but
++    // is done explicitly here because a stub isn't being used).
++    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
++  }
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  i = dims->length();
++  while (i-- > 0) {
++    LIRItem* size = items->at(i);
++    size->load_item();
++
++    store_stack_parameter(size->result(), in_ByteSize(i*4));
++  }
++
++  LIR_Opr klass_reg = FrameMap::a0_metadata_opr;
++  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
++
++  LIR_Opr rank = FrameMap::s0_opr;
++  __ move(LIR_OprFact::intConst(x->rank()), rank);
++  LIR_Opr varargs = FrameMap::a2_opr;
++  __ move(FrameMap::sp_opr, varargs);
++  LIR_OprList* args = new LIR_OprList(3);
++  args->append(klass_reg);
++  args->append(rank);
++  args->append(varargs);
++  LIR_Opr reg = result_register_for(x->type());
++  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
++                  LIR_OprFact::illegalOpr,
++                  reg, args, info);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_BlockBegin(BlockBegin* x) {
++  // nothing to do for now
++}
++
++void LIRGenerator::do_CheckCast(CheckCast* x) {
++  LIRItem obj(x->obj(), this);
++
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() ||
++      (PatchALot && !x->is_incompatible_class_change_check() &&
++       !x->is_invokespecial_receiver_check())) {
++    // must do this before locking the destination register as an oop register,
++    // and before the obj is loaded (the latter is for deoptimization)
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++
++  // info for exceptions
++  CodeEmitInfo* info_for_exception =
++      (x->needs_exception_state() ? state_for(x) :
++                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
++
++  CodeStub* stub;
++  if (x->is_incompatible_class_change_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
++                                   LIR_OprFact::illegalOpr, info_for_exception);
++  } else if (x->is_invokespecial_receiver_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new DeoptimizeStub(info_for_exception);
++  } else {
++    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
++                                   obj.result(), info_for_exception);
++  }
++  LIR_Opr reg = rlock_result(x);
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ checkcast(reg, obj.result(), x->klass(),
++               new_register(objectType), new_register(objectType), tmp3,
++               x->direct_compare(), info_for_exception, patching_info, stub,
++               x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_InstanceOf(InstanceOf* x) {
++  LIRItem obj(x->obj(), this);
++
++  // result and test object may not be in same register
++  LIR_Opr reg = rlock_result(x);
++  CodeEmitInfo* patching_info = NULL;
++  if ((!x->klass()->is_loaded() || PatchALot)) {
++    // must do this before locking the destination register as an oop register
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ instanceof(reg, obj.result(), x->klass(),
++                new_register(objectType), new_register(objectType), tmp3,
++                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_If(If* x) {
++  assert(x->number_of_sux() == 2, "inconsistency");
++  ValueTag tag = x->x()->type()->tag();
++  bool is_safepoint = x->is_safepoint();
++
++  If::Condition cond = x->cond();
++
++  LIRItem xitem(x->x(), this);
++  LIRItem yitem(x->y(), this);
++  LIRItem* xin = &xitem;
++  LIRItem* yin = &yitem;
++
++  if (tag == longTag) {
++    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
++    // mirror for other conditions
++    if (cond == If::gtr || cond == If::leq) {
++      cond = Instruction::mirror(cond);
++      xin = &yitem;
++      yin = &xitem;
++    }
++    xin->set_destroys_register();
++  }
++  xin->load_item();
++
++  if (tag == longTag) {
++    if (yin->is_constant() && yin->get_jlong_constant() == 0) {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else if (tag == intTag) {
++    if (yin->is_constant() && yin->get_jint_constant() == 0)  {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else {
++    yin->load_item();
++  }
++
++  set_no_result(x);
++
++  LIR_Opr left = xin->result();
++  LIR_Opr right = yin->result();
++
++  // add safepoint before generating condition code so it can be recomputed
++  if (x->is_safepoint()) {
++    // increment backedge counter if needed
++    increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
++    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
++  }
++
++  // Generate branch profiling. Profiling code doesn't kill flags.
++  profile_branch(x, cond, left, right);
++  move_to_phi(x->state());
++  if (x->x()->type()->is_float_kind()) {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux());
++  } else {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux());
++  }
++  assert(x->default_sux() == x->fsux(), "wrong destination above");
++  __ jump(x->default_sux());
++}
++
++LIR_Opr LIRGenerator::getThreadPointer() {
++   return FrameMap::as_pointer_opr(TREG);
++}
++
++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
++
++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
++                                        CodeEmitInfo* info) {
++  __ volatile_store_mem_reg(value, address, info);
++}
++
++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  // 8179954: We need to make sure that the code generated for
++  // volatile accesses forms a sequentially-consistent set of
++  // operations when combined with STLR and LDAR.  Without a leading
++  // membar it's possible for a simple Dekker test to fail if loads
++  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
++  // the stores in one method and C1 compiles the loads in another.
++  __ membar();
++  __ volatile_load_mem_reg(address, result, info);
++}
++
++void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
++                                     BasicType type, bool is_volatile) {
++  LIR_Address* addr = new LIR_Address(src, offset, type);
++  __ load(addr, dst);
++}
++
++void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
++                                     BasicType type, bool is_volatile) {
++  LIR_Address* addr = new LIR_Address(src, offset, type);
++  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
++  if (is_obj) {
++    // Do the pre-write barrier, if any.
++    pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
++                true /* do_load */, false /* patch */, NULL);
++    __ move(data, addr);
++    assert(src->is_register(), "must be register");
++    // Seems to be a precise address
++    post_barrier(LIR_OprFact::address(addr), data);
++  } else {
++    __ move(data, addr);
++  }
++}
++
++void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
++  BasicType type = x->basic_type();
++  LIRItem src(x->object(), this);
++  LIRItem off(x->offset(), this);
++  LIRItem value(x->value(), this);
++
++  src.load_item();
++  off.load_nonconstant();
++
++  // We can cope with a constant increment in an xadd
++  if (! (x->is_add()
++         && value.is_constant()
++         && can_inline_as_constant(x->value()))) {
++    value.load_item();
++  }
++
++  LIR_Opr dst = rlock_result(x, type);
++  LIR_Opr data = value.result();
++  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
++  LIR_Opr offset = off.result();
++
++  if (data == dst) {
++    LIR_Opr tmp = new_register(data->type());
++    __ move(data, tmp);
++    data = tmp;
++  }
++
++  LIR_Address* addr;
++  if (offset->is_constant()) {
++    jlong l = offset->as_jlong();
++    assert((jlong)((jint)l) == l, "offset too large for constant");
++    jint c = (jint)l;
++    addr = new LIR_Address(src.result(), c, type);
++  } else {
++    addr = new LIR_Address(src.result(), offset, type);
++  }
++
++  LIR_Opr tmp = new_register(T_INT);
++  LIR_Opr ptr = LIR_OprFact::illegalOpr;
++
++  if (x->is_add()) {
++    __ xadd(LIR_OprFact::address(addr), data, dst, tmp);
++  } else {
++    if (is_obj) {
++      // Do the pre-write barrier, if any.
++      ptr = new_pointer_register();
++      __ add(src.result(), off.result(), ptr);
++      pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
++                  true /* do_load */, false /* patch */, NULL);
++    }
++    __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
++    if (is_obj) {
++      post_barrier(ptr, data);
++    }
++  }
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp
+new file mode 100644
+index 0000000000..f15dacafeb
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++
++inline bool LinearScan::is_processed_reg_num(int reg_num) {
++  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
++}
++
++inline int LinearScan::num_physical_regs(BasicType type) {
++  return 1;
++}
++
++inline bool LinearScan::requires_adjacent_regs(BasicType type) {
++  return false;
++}
++
++inline bool LinearScan::is_caller_save(int assigned_reg) {
++  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
++  if (assigned_reg < pd_first_callee_saved_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
++    return true;
++  return false;
++}
++
++inline void LinearScan::pd_add_temps(LIR_Op* op) {}
++
++// Implementation of LinearScanWalker
++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
++  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
++    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
++    _first_reg = pd_first_callee_saved_reg;
++    _last_reg = pd_last_callee_saved_reg;
++    return true;
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++    _first_reg = pd_first_cpu_reg;
++    _last_reg = pd_last_allocatable_cpu_reg;
++    return true;
++  }
++  return false;
++}
++
++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp
+new file mode 100644
+index 0000000000..219b2e3671
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LinearScan.hpp"
++#include "utilities/bitMap.inline.hpp"
++
++void LinearScan::allocate_fpu_stack() {
++  // No FPU stack on LoongArch64
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..38ff4c5836
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++
++using MacroAssembler::build_frame;
++using MacroAssembler::null_check;
++
++// C1_MacroAssembler contains high-level macros for C1
++
++ private:
++  int _rsp_offset; // track rsp changes
++  // initialization
++  void pd_init() { _rsp_offset = 0; }
++
++ public:
++  void try_allocate(
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
++
++  // locking
++  // hdr     : must be A0, contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
++  // returns code offset at which to add null check debug information
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
++
++  // unlocking
++  // hdr     : contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed
++  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
++
++  void initialize_object(
++    Register obj,               // result: pointer to object after successful allocation
++    Register klass,             // object klass
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    bool     is_tlab_allocated  // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
++  );
++
++  // allocation of fixed-size objects
++  // (can also be used to allocate fixed-size arrays, by setting
++  // hdr_size correctly and storing the array length afterwards)
++  // obj        : will contain pointer to allocated object
++  // t1, t2     : scratch registers - contents destroyed
++  // header_size: size of object header in words
++  // object_size: total size of object in words
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_object(Register obj, Register t1, Register t2, int header_size,
++                       int object_size, Register klass, Label& slow_case);
++
++  enum {
++    max_array_allocation_length = 0x00FFFFFF
++  };
++
++  // allocation of arrays
++  // obj        : will contain pointer to allocated object
++  // len        : array length in number of elements
++  // t          : scratch register - contents destroyed
++  // header_size: size of object header in words
++  // f          : element scale factor
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size,
++                      int f, Register klass, Label& slow_case);
++
++  int rsp_offset() const { return _rsp_offset; }
++  void set_rsp_offset(int n) { _rsp_offset = n; }
++
++  void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3,
++                            bool inv_a4, bool inv_a5) PRODUCT_RETURN;
++
++  // This platform only uses signal-based null checks. The Label is not needed.
++  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
++
++  void load_parameter(int offset_in_words, Register reg);
++
++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp
+new file mode 100644
+index 0000000000..b75126fba4
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp
+@@ -0,0 +1,346 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/arrayOop.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T4 RT4
++
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  int null_check_offset = -1;
++  Label done;
++
++  verify_oop(obj);
++
++  // save object being locked into the BasicObjectLock
++  st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
++  }
++
++  // Load object header
++  ld_ptr(hdr, Address(obj, hdr_offset));
++  // and mark it as unlocked
++  ori(hdr, hdr, markOopDesc::unlocked_value);
++  // save unlocked object header into the displaced header location on the stack
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // test if object header is still the same (i.e. unlocked), and if so, store the
++  // displaced header address in the object header - if it is not the same, get the
++  // object header instead
++  lea(SCR2, Address(obj, hdr_offset));
++  cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done);
++  // if the object header was the same, we're done
++  // if the object header was not the same, it is now in the hdr register
++  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
++  //
++  // 1) (hdr & aligned_mask) == 0
++  // 2) sp <= hdr
++  // 3) hdr <= sp + page_size
++  //
++  // these 3 tests can be done by evaluating the following expression:
++  //
++  // (hdr - sp) & (aligned_mask - page_size)
++  //
++  // assuming both the stack pointer and page_size have their least
++  // significant 2 bits cleared and page_size is a power of 2
++  sub_d(hdr, hdr, SP);
++  li(SCR1, aligned_mask - os::vm_page_size());
++  andr(hdr, hdr, SCR1);
++  // for recursive locking, the result is zero => save it in the displaced header
++  // location (NULL in the displaced hdr location indicates recursive locking)
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // otherwise we don't care about the result and handle locking via runtime call
++  bnez(hdr, slow_case);
++  // done
++  bind(done);
++  return null_check_offset;
++}
++
++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done;
++
++  if (UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
++  // load displaced header
++  ld_ptr(hdr, Address(disp_hdr, 0));
++  // if the loaded hdr is NULL we had recursive locking
++  // if we had recursive locking, we are done
++  beqz(hdr, done);
++  if (!UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
++  verify_oop(obj);
++  // test if object header is pointing to the displaced header, and if so, restore
++  // the displaced header in the object - if the object header is not pointing to
++  // the displaced header, get the object header instead
++  // if the object header was not pointing to the displaced header,
++  // we do unlocking via runtime call
++  if (hdr_offset) {
++    lea(SCR1, Address(obj, hdr_offset));
++    cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  } else {
++    cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  }
++  // done
++  bind(done);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes,
++                                     int con_size_in_bytes, Register t1, Register t2,
++                                     Label& slow_case) {
++  if (UseTLAB) {
++    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++  } else {
++    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++  }
++}
++
++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len,
++                                          Register t1, Register t2) {
++  assert_different_registers(obj, klass, len);
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, t1, t2);
++    ld_ptr(t1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fit in an int32_t
++    li(t1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
++  st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
++
++  if (UseCompressedClassPointers) { // Take care not to kill klass
++    encode_klass_not_null(t1, klass);
++    st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
++  } else {
++    st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++
++  if (len->is_valid()) {
++    st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
++  } else if (UseCompressedClassPointers) {
++    store_klass_gap(obj, R0);
++  }
++}
++
++// preserves obj, destroys len_in_bytes
++//
++// Scratch registers: t1 = T0, t2 = T1
++//
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes,
++                                        int hdr_size_in_bytes, Register t1, Register t2) {
++  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
++  assert(t1 == T0 && t2 == T1, "must be");
++  Label done;
++
++  // len_in_bytes is positive and ptr sized
++  addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes);
++  beqz(len_in_bytes, done);
++
++  // zero_words() takes ptr in t1 and count in bytes in t2
++  lea(t1, Address(obj, hdr_size_in_bytes));
++  addi_d(t2, len_in_bytes, -BytesPerWord);
++
++  Label loop;
++  bind(loop);
++  stx_d(R0, t1, t2);
++  addi_d(t2, t2, -BytesPerWord);
++  bge(t2, R0, loop);
++
++  bind(done);
++}
++
++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size,
++                                        int object_size, Register klass, Label& slow_case) {
++  assert_different_registers(obj, t1, t2);
++  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
++
++  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
++
++  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
++}
++
++// Scratch registers: t1 = T0, t2 = T1
++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes,
++                                          int con_size_in_bytes, Register t1, Register t2,
++                                          bool is_tlab_allocated) {
++  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
++         "con_size_in_bytes is not multiple of alignment");
++  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
++
++  initialize_header(obj, klass, noreg, t1, t2);
++
++  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
++     // clear rest of allocated space
++     const Register index = t2;
++     if (var_size_in_bytes != noreg) {
++       move(index, var_size_in_bytes);
++       initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
++     } else if (con_size_in_bytes > hdr_size_in_bytes) {
++       con_size_in_bytes -= hdr_size_in_bytes;
++       lea(t1, Address(obj, hdr_size_in_bytes));
++       Label loop;
++       li(SCR1, con_size_in_bytes - BytesPerWord);
++       bind(loop);
++       stx_d(R0, t1, SCR1);
++       addi_d(SCR1, SCR1, -BytesPerWord);
++       bge(SCR1, R0, loop);
++     }
++  }
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2,
++                                       int header_size, int f, Register klass, Label& slow_case) {
++  assert_different_registers(obj, len, t1, t2, klass);
++
++  // determine alignment mask
++  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
++
++  // check for negative or excessive length
++  li(SCR1, (int32_t)max_array_allocation_length);
++  bge_far(len, SCR1, slow_case, false);
++
++  const Register arr_size = t2; // okay to be the same
++  // align object end
++  li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
++  slli_w(SCR1, len, f);
++  add_d(arr_size, arr_size, SCR1);
++  bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
++
++  initialize_header(obj, klass, len, t1, t2);
++
++  // clear rest of allocated space
++  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
++  // Make sure there is enough stack space for this method's activation.
++  // Note that we do this before creating a frame.
++  generate_stack_overflow_check(bang_size_in_bytes);
++  MacroAssembler::build_frame(framesize);
++}
++
++void C1_MacroAssembler::remove_frame(int framesize) {
++  MacroAssembler::remove_frame(framesize);
++}
++
++void C1_MacroAssembler::verified_entry() {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.  For this action to be legal we
++  // must ensure that this first instruction is a b, bl, nop, break.
++  // Make it a NOP.
++  nop();
++}
++
++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
++  // rbp, + 0: link
++  //      + 1: return address
++  //      + 2: argument with offset 0
++  //      + 3: argument with offset 1
++  //      + 4: ...
++
++  ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
++}
++
++#ifndef PRODUCT
++void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
++  if (!VerifyOops) return;
++  verify_oop_addr(Address(SP, stack_offset), "oop");
++}
++
++void C1_MacroAssembler::verify_not_null_oop(Register r) {
++  if (!VerifyOops) return;
++  Label not_null;
++  bnez(r, not_null);
++  stop("non-null oop required");
++  bind(not_null);
++  verify_oop(r);
++}
++
++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2,
++                                             bool inv_a3, bool inv_a4, bool inv_a5) {
++#ifdef ASSERT
++  static int nn;
++  if (inv_a0) li(A0, 0xDEAD);
++  if (inv_s0) li(S0, 0xDEAD);
++  if (inv_a2) li(A2, nn++);
++  if (inv_a3) li(A3, 0xDEAD);
++  if (inv_a4) li(A4, 0xDEAD);
++  if (inv_a5) li(A5, 0xDEAD);
++#endif
++}
++#endif // ifndef PRODUCT
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp
+new file mode 100644
+index 0000000000..a750dca323
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp
+@@ -0,0 +1,1252 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Defs.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "register_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframe.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#endif
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T8 RT8
++
++// Implementation of StubAssembler
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++  // setup registers
++  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++         "registers must be different");
++  assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different");
++  assert(args_size >= 0, "illegal args_size");
++  bool align_stack = false;
++
++  move(A0, TREG);
++  set_num_rt_args(0); // Nothing on stack
++
++  Label retaddr;
++  set_last_Java_frame(SP, FP, retaddr);
++
++  // do the call
++  call(entry, relocInfo::runtime_call_type);
++  bind(retaddr);
++  int call_offset = offset();
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    get_thread(SCR1);
++    beq(TREG, SCR1, L);
++    stop("StubAssembler::call_RT: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++  reset_last_Java_frame(true);
++
++  // check for pending exceptions
++  { Label L;
++    // check for pending exceptions (java_thread is set upon return)
++    ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset())));
++    beqz(SCR1, L);
++    // exception pending => remove activation and forward to exception handler
++    // make sure that the vm_results are cleared
++    if (oop_result1->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    }
++    if (metadata_result->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    }
++    if (frame_size() == no_frame_size) {
++      leave();
++      jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    } else if (_stub_id == Runtime1::forward_exception_id) {
++      should_not_reach_here();
++    } else {
++      jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type);
++    }
++    bind(L);
++  }
++  // get oop results if there are any and reset the values in the thread
++  if (oop_result1->is_valid()) {
++    get_vm_result(oop_result1, TREG);
++  }
++  if (metadata_result->is_valid()) {
++    get_vm_result_2(metadata_result, TREG);
++  }
++  return call_offset;
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1) {
++  move(A1, arg1);
++  return call_RT(oop_result1, metadata_result, entry, 1);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2) {
++  if (A1 == arg2) {
++    if (A2 == arg1) {
++      move(SCR1, arg1);
++      move(arg1, arg2);
++      move(arg2, SCR1);
++    } else {
++      move(A2, arg2);
++      move(A1, arg1);
++    }
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 2);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2, Register arg3) {
++  // if there is any conflict use the stack
++  if (arg1 == A2 || arg1 == A3 ||
++      arg2 == A1 || arg2 == A3 ||
++      arg3 == A1 || arg3 == A2) {
++    addi_d(SP, SP, -4 * wordSize);
++    st_ptr(arg1, Address(SP, 0 * wordSize));
++    st_ptr(arg2, Address(SP, 1 * wordSize));
++    st_ptr(arg3, Address(SP, 2 * wordSize));
++    ld_ptr(arg1, Address(SP, 0 * wordSize));
++    ld_ptr(arg2, Address(SP, 1 * wordSize));
++    ld_ptr(arg3, Address(SP, 2 * wordSize));
++    addi_d(SP, SP, 4 * wordSize);
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++    move(A3, arg3);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 3);
++}
++
++// Implementation of StubFrame
++
++class StubFrame: public StackObj {
++ private:
++  StubAssembler* _sasm;
++
++ public:
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
++  void load_argument(int offset_in_words, Register reg);
++
++  ~StubFrame();
++};;
++
++#define __ _sasm->
++
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
++  _sasm = sasm;
++  __ set_info(name, must_gc_arguments);
++  __ enter();
++}
++
++// load parameters that were stored with LIR_Assembler::store_parameter
++// Note: offsets for store_parameter and load_argument must match
++void StubFrame::load_argument(int offset_in_words, Register reg) {
++  __ load_parameter(offset_in_words, reg);
++}
++
++StubFrame::~StubFrame() {
++  __ leave();
++  __ jr(RA);
++}
++
++#undef __
++
++// Implementation of Runtime1
++
++#define __ sasm->
++
++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
++
++// Stack layout for saving/restoring  all the registers needed during a runtime
++// call (this includes deoptimization)
++// Note: note that users of this frame may well have arguments to some runtime
++// while these values are on the stack. These positions neglect those arguments
++// but the code in save_live_registers will take the argument count into
++// account.
++//
++
++enum reg_save_layout {
++  reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */
++};
++
++// Save off registers which might be killed by calls into the runtime.
++// Tries to smart of about FP registers.  In particular we separate
++// saving and describing the FPU registers for deoptimization since we
++// have to save the FPU registers twice if we describe them.  The
++// deopt blob is the only thing which needs to describe FPU registers.
++// In all other cases it should be sufficient to simply save their
++// current value.
++
++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
++static int reg_save_size_in_words;
++static int frame_size_in_bytes = -1;
++
++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
++  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
++  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++
++  for (int i = A0->encoding(); i <= T8->encoding(); i++) {
++    Register r = as_Register(i);
++    if (i != SCR1->encoding() && i != SCR2->encoding()) {
++      int sp_offset = cpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
++      FloatRegister r = as_FloatRegister(i);
++      int sp_offset = fpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  return oop_map;
++}
++
++static OopMap* save_live_registers(StubAssembler* sasm,
++                                   bool save_fpu_registers = true) {
++  __ block_comment("save_live_registers");
++
++  // integer registers except zr & ra & tp & sp
++  __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize);
++
++  for (int i = 4; i < 32; i++)
++    __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < 32; i++)
++      __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  return generate_oop_map(sasm, save_fpu_registers);
++}
++
++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 4; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 5; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++void Runtime1::initialize_pd() {
++  int sp_offset = 0;
++  int i;
++
++  // all float registers are saved explicitly
++  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
++  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
++    fpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++
++  for (i = 4; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    cpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++}
++
++// target: the entry point of the method that creates and posts the exception oop
++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
++
++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target,
++                                              bool has_argument) {
++  // make a frame and preserve the caller's caller-save registers
++  OopMap* oop_map = save_live_registers(sasm);
++  int call_offset;
++  if (!has_argument) {
++    call_offset = __ call_RT(noreg, noreg, target);
++  } else {
++    __ move(A1, SCR1);
++    __ move(A2, SCR2);
++    call_offset = __ call_RT(noreg, noreg, target);
++  }
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(call_offset, oop_map);
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
++  __ block_comment("generate_handle_exception");
++
++  // incoming parameters
++  const Register exception_oop = A0;
++  const Register exception_pc  = A1;
++  // other registers used in this stub
++
++  // Save registers, if required.
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* oop_map = NULL;
++  switch (id) {
++  case forward_exception_id:
++    // We're handling an exception in the context of a compiled frame.
++    // The registers have been saved in the standard places.  Perform
++    // an exception lookup in the caller and dispatch to the handler
++    // if found.  Otherwise unwind and dispatch to the callers
++    // exception handler.
++    oop_map = generate_oop_map(sasm, 1 /*thread*/);
++
++    // load and clear pending exception oop into A0
++    __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset()));
++    __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset()));
++
++    // load issuing PC (the return address for this stub) into A1
++    __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++    // make sure that the vm_results are cleared (may be unnecessary)
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    break;
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // At this point all registers MAY be live.
++    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id: {
++    // At this point all registers except exception oop (A0) and
++    // exception pc (RA) are dead.
++    const int frame_size = 2 /*fp, return address*/;
++    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
++    sasm->set_frame_size(frame_size);
++    break;
++  }
++  default: ShouldNotReachHere();
++  }
++
++  // verify that only A0 and A1 are valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++  // verify that A0 contains a valid exception
++  __ verify_not_null_oop(exception_oop);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are
++  // empty before writing to them
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop already set");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc already set");
++  __ bind(pc_empty);
++#endif
++
++  // save exception oop and issuing pc into JavaThread
++  // (exception handler will load it from here)
++  __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset()));
++
++  // patch throwing pc into return address (has bci & oop map)
++  __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++  // compute the exception handler.
++  // the exception oop and the throwing pc are read from the fields in JavaThread
++  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  // A0: handler address
++  //      will be the deopt blob if nmethod was deoptimized while we looked up
++  //      handler regardless of whether handler existed in the nmethod.
++
++  // only A0 is valid at this time, all other registers have been destroyed by the runtime call
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // patch the return address, this stub will directly return to the exception handler
++  __ st_ptr(A0, Address(FP, 1 * BytesPerWord));
++
++  switch (id) {
++    case forward_exception_id:
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      // Restore the registers that were saved at the beginning.
++      restore_live_registers(sasm, id != handle_exception_nofpu_id);
++      break;
++    case handle_exception_from_callee_id:
++      break;
++    default:  ShouldNotReachHere();
++  }
++
++  return oop_maps;
++}
++
++void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
++  // incoming parameters
++  const Register exception_oop = A0;
++  // callee-saved copy of exception_oop during runtime call
++  const Register exception_oop_callee_saved = S0;
++  // other registers used in this stub
++  const Register exception_pc = A1;
++  const Register handler_addr = A3;
++
++  // verify that only A0, is valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Save our return address because
++  // exception_handler_for_return_address will destroy it.  We also
++  // save exception_oop
++  __ addi_d(SP, SP, -2 * wordSize);
++  __ st_ptr(RA, Address(SP, 0 * wordSize));
++  __ st_ptr(exception_oop, Address(SP, 1 * wordSize));
++
++  // search the exception handler address of the caller (using the return address)
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA);
++  // V0: exception handler address of the caller
++
++  // Only V0 is valid at this time; all other registers have been
++  // destroyed by the call.
++  __ invalidate_registers(false, true, true, true, false, true);
++
++  // move result of call into correct register
++  __ move(handler_addr, A0);
++
++  // get throwing pc (= return address).
++  // RA has been destroyed by the call
++  __ ld_ptr(RA, Address(SP, 0 * wordSize));
++  __ ld_ptr(exception_oop, Address(SP, 1 * wordSize));
++  __ addi_d(SP, SP, 2 * wordSize);
++  __ move(A1, RA);
++
++  __ verify_not_null_oop(exception_oop);
++
++  // continue at exception handler (return address removed)
++  // note: do *not* remove arguments when unwinding the
++  //       activation since the caller assumes having
++  //       all arguments on the stack when entering the
++  //       runtime to determine the exception handler
++  //       (GC happens at call site with arguments!)
++  // A0: exception oop
++  // A1: throwing pc
++  // A3: exception handler
++  __ jr(handler_addr);
++}
++
++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
++  // use the maximum number of runtime-arguments here because it is difficult to
++  // distinguish each RT-Call.
++  // Note: This number affects also the RT-Call in generate_handle_exception because
++  //       the oop-map is shared for all calls.
++  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++  assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++  OopMap* oop_map = save_live_registers(sasm);
++
++  __ move(A0, TREG);
++  Label retaddr;
++  __ set_last_Java_frame(SP, FP, retaddr);
++  // do the call
++  __ call(target, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(__ offset(), oop_map);
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    __ get_thread(SCR1);
++    __ beq(TREG, SCR1, L);
++    __ stop("StubAssembler::call_RT: rthread not callee saved?");
++    __ bind(L);
++  }
++#endif
++
++  __ reset_last_Java_frame(true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Runtime will return true if the nmethod has been deoptimized, this is the
++  // expected scenario and anything else is  an error. Note that we maintain a
++  // check on the result purely as a defensive measure.
++  Label no_deopt;
++  __ beqz(A0, no_deopt); // Have we deoptimized?
++
++  // Perform a re-execute. The proper return  address is already on the stack,
++  // we just need  to restore registers, pop  all of our frame  but the return
++  // address and jump to the deopt blob.
++  restore_live_registers(sasm);
++  __ leave();
++  __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++
++  __ bind(no_deopt);
++  restore_live_registers(sasm);
++  __ leave();
++  __ jr(RA);
++
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
++  // for better readability
++  const bool must_gc_arguments = true;
++  const bool dont_gc_arguments = false;
++
++  // default value; overwritten for some optimized stubs that are called
++  // from methods that do not use the fpu
++  bool save_fpu_registers = true;
++
++  // stub code & info for the different stubs
++  OopMapSet* oop_maps = NULL;
++  OopMap* oop_map = NULL;
++  switch (id) {
++    {
++    case forward_exception_id:
++      {
++        oop_maps = generate_handle_exception(id, sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_div0_exception_id:
++      {
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
++      }
++      break;
++
++    case throw_null_pointer_exception_id:
++      {
++        StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
++      }
++      break;
++
++    case new_instance_id:
++    case fast_new_instance_id:
++    case fast_new_instance_init_check_id:
++      {
++        Register klass = A3; // Incoming
++        Register obj   = A0; // Result
++
++        if (id == new_instance_id) {
++          __ set_info("new_instance", dont_gc_arguments);
++        } else if (id == fast_new_instance_id) {
++          __ set_info("fast new_instance", dont_gc_arguments);
++        } else {
++          assert(id == fast_new_instance_init_check_id, "bad StubID");
++          __ set_info("fast new_instance init check", dont_gc_arguments);
++        }
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
++            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Label slow_path;
++          Register obj_size = S0;
++          Register t1       = T0;
++          Register t2       = T1;
++          assert_different_registers(klass, obj, obj_size, t1, t2);
++
++          __ addi_d(SP, SP, -2 * wordSize);
++          __ st_ptr(S0, Address(SP, 0));
++
++          if (id == fast_new_instance_init_check_id) {
++            // make sure the klass is initialized
++            __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset()));
++            __ li(SCR2, InstanceKlass::fully_initialized);
++            __ bne_far(SCR1, SCR2, slow_path);
++          }
++
++#ifdef ASSERT
++          // assert object can be fast path allocated
++          {
++            Label ok, not_ok;
++            __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++            __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0)
++            __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit);
++            __ beqz(SCR1, ok);
++            __ bind(not_ok);
++            __ stop("assert(can be fast path allocated)");
++            __ should_not_reach_here();
++            __ bind(ok);
++          }
++#endif // ASSERT
++
++          // get the instance size (size is postive so movl is fine for 64bit)
++          __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++
++          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
++
++          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
++          __ verify_oop(obj);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++          __ jr(RA);
++
++          __ bind(slow_path);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0,: new instance
++      }
++
++      break;
++
++    case counter_overflow_id:
++      {
++        Register bci = A0, method = A1;
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        // Retrieve bci
++        __ ld_w(bci, Address(FP, 2 * BytesPerWord));
++        // And a pointer to the Method*
++        __ ld_d(method, Address(FP, 3 * BytesPerWord));
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case new_type_array_id:
++    case new_object_array_id:
++      {
++        Register length   = S0; // Incoming
++        Register klass    = A3; // Incoming
++        Register obj      = A0; // Result
++
++        if (id == new_type_array_id) {
++          __ set_info("new_type_array", dont_gc_arguments);
++        } else {
++          __ set_info("new_object_array", dont_gc_arguments);
++        }
++
++#ifdef ASSERT
++        // assert object type is really an array of the proper kind
++        {
++          Label ok;
++          Register t0 = obj;
++          __ ld_w(t0, Address(klass, Klass::layout_helper_offset()));
++          __ srai_w(t0, t0, Klass::_lh_array_tag_shift);
++          int tag = ((id == new_type_array_id)
++                     ? Klass::_lh_array_tag_type_value
++                     : Klass::_lh_array_tag_obj_value);
++          __ li(SCR1, tag);
++          __ beq(t0, SCR1, ok);
++          __ stop("assert(is an array klass)");
++          __ should_not_reach_here();
++          __ bind(ok);
++        }
++#endif // ASSERT
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Register arr_size = A5;
++          Register t1       = T0;
++          Register t2       = T1;
++          Label slow_path;
++          assert_different_registers(length, klass, obj, arr_size, t1, t2);
++
++          // check that array length is small enough for fast path.
++          __ li(SCR1, C1_MacroAssembler::max_array_allocation_length);
++          __ blt_far(SCR1, length, slow_path, false);
++
++          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
++          // since size is positive ldrw does right thing on 64bit
++          __ ld_w(t1, Address(klass, Klass::layout_helper_offset()));
++          // since size is positive movw does right thing on 64bit
++          __ move(arr_size, length);
++          __ sll_w(arr_size, length, t1);
++          __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift +
++                        exact_log2(Klass::_lh_header_size_mask + 1) - 1,
++                        Klass::_lh_header_size_shift);
++          __ add_d(arr_size, arr_size, t1);
++          __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
++          __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++          __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
++
++          __ initialize_header(obj, klass, length, t1, t2);
++          __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
++          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
++          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
++          __ andi(t1, t1, Klass::_lh_header_size_mask);
++          __ sub_d(arr_size, arr_size, t1); // body length
++          __ add_d(t1, t1, obj); // body start
++          __ initialize_body(t1, arr_size, 0, t1, t2);
++          __ membar(Assembler::StoreStore);
++          __ verify_oop(obj);
++
++          __ jr(RA);
++
++          __ bind(slow_path);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset;
++        if (id == new_type_array_id) {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
++        } else {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
++        }
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0: new array
++      }
++      break;
++
++    case new_multi_array_id:
++      {
++        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
++        // A0,: klass
++        // S0,: rank
++        // A2: address of 1st dimension
++        OopMap* map = save_live_registers(sasm);
++        __ move(A1, A0);
++        __ move(A3, A2);
++        __ move(A2, S0);
++        int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        // A0,: new multi array
++        __ verify_oop(A0);
++      }
++      break;
++
++    case register_finalizer_id:
++      {
++        __ set_info("register_finalizer", dont_gc_arguments);
++
++        // This is called via call_runtime so the arguments
++        // will be place in C abi locations
++
++        __ verify_oop(A0);
++
++        // load the klass and check the has finalizer flag
++        Label register_finalizer;
++        Register t = A5;
++        __ load_klass(t, A0);
++        __ ld_w(t, Address(t, Klass::access_flags_offset()));
++        __ li(SCR1, JVM_ACC_HAS_FINALIZER);
++        __ andr(SCR1, t, SCR1);
++        __ bnez(SCR1, register_finalizer);
++        __ jr(RA);
++
++        __ bind(register_finalizer);
++        __ enter();
++        OopMap* oop_map = save_live_registers(sasm);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++
++        // Now restore all the live registers
++        restore_live_registers(sasm);
++
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_class_cast_exception_id:
++      {
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
++      }
++      break;
++
++    case throw_incompatible_class_change_error_id:
++      {
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
++      }
++      break;
++
++    case slow_subtype_check_id:
++      {
++        // Typical calling sequence:
++        // __ push(klass_RInfo);  // object klass or other subclass
++        // __ push(sup_k_RInfo);  // array element klass or other superclass
++        // __ bl(slow_subtype_check);
++        // Note that the subclass is pushed first, and is therefore deepest.
++        enum layout {
++          a0_off, a0_off_hi,
++          a2_off, a2_off_hi,
++          a4_off, a4_off_hi,
++          a5_off, a5_off_hi,
++          sup_k_off, sup_k_off_hi,
++          klass_off, klass_off_hi,
++          framesize,
++          result_off = sup_k_off
++        };
++
++        __ set_info("slow_subtype_check", dont_gc_arguments);
++        __ addi_d(SP, SP, -4 * wordSize);
++        __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++
++        // This is called by pushing args and not with C abi
++        __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass
++        __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass
++
++        Label miss;
++        __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss);
++
++        // fallthrough on success:
++        __ li(SCR1, 1);
++        __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++
++        __ bind(miss);
++        __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++      }
++      break;
++
++    case monitorenter_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorenter_id:
++      {
++        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(1, A0); // A0,: object
++        f.load_argument(0, A1); // A1,: lock address
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case monitorexit_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorexit_id:
++      {
++        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(0, A0); // A0,: lock address
++
++        // note: really a leaf routine but must setup last java sp
++        //       => use call_RT for now (speed can be improved by
++        //       doing last java sp setup manually)
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case deoptimize_id:
++      {
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
++        OopMap* oop_map = save_live_registers(sasm);
++        f.load_argument(0, A1);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++        restore_live_registers(sasm);
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++        __ leave();
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case throw_range_check_failed_id:
++      {
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
++      }
++      break;
++
++    case unwind_exception_id:
++      {
++        __ set_info("unwind_exception", dont_gc_arguments);
++        // note: no stubframe since we are about to leave the current
++        //       activation and we are calling a leaf VM function only.
++        generate_unwind_exception(sasm);
++      }
++      break;
++
++    case access_field_patching_id:
++      {
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
++      }
++      break;
++
++    case load_klass_patching_id:
++      {
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
++      }
++      break;
++
++    case load_mirror_patching_id:
++      {
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
++      }
++      break;
++
++    case load_appendix_patching_id:
++      {
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
++      }
++      break;
++
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      {
++        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case handle_exception_from_callee_id:
++      {
++        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case throw_index_exception_id:
++      {
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
++      }
++      break;
++
++    case throw_array_store_exception_id:
++      {
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
++        // tos + 0: link
++        //     + 1: return address
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
++      }
++      break;
++
++#if INCLUDE_ALL_GCS
++
++    case g1_pre_barrier_slow_id:
++      {
++        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
++        // arg0 : previous value of memory
++
++        BarrierSet* bs = Universe::heap()->barrier_set();
++        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
++          __ li(A0, (int)id);
++          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
++          __ should_not_reach_here();
++          break;
++        }
++
++        const Register pre_val = A0;
++        const Register thread = TREG;
++        const Register tmp = SCR2;
++
++        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                             PtrQueue::byte_offset_of_active()));
++
++        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                             PtrQueue::byte_offset_of_index()));
++        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                        PtrQueue::byte_offset_of_buf()));
++
++        Label done;
++        Label runtime;
++
++        // Can we store original value in the thread's buffer?
++        __ ld_ptr(tmp, queue_index);
++        __ beqz(tmp, runtime);
++
++        __ addi_d(tmp, tmp, -wordSize);
++        __ st_ptr(tmp, queue_index);
++        __ ld_ptr(SCR1, buffer);
++        __ add_d(tmp, tmp, SCR1);
++        f.load_argument(0, SCR1);
++        __ st_ptr(SCR1, Address(tmp, 0));
++        __ b(done);
++
++        __ bind(runtime);
++        __ pushad();
++        f.load_argument(0, pre_val);
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
++        __ popad();
++        __ bind(done);
++      }
++      break;
++    case g1_post_barrier_slow_id:
++      {
++        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
++
++        // arg0: store_address
++        Address store_addr(FP, 2*BytesPerWord);
++
++        BarrierSet* bs = Universe::heap()->barrier_set();
++        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++
++        Label done;
++        Label runtime;
++
++        // At this point we know new_value is non-NULL and the new_value crosses regions.
++        // Must check to see if card is already dirty
++
++        const Register thread = TREG;
++
++        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                             PtrQueue::byte_offset_of_index()));
++        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                        PtrQueue::byte_offset_of_buf()));
++
++        const Register card_offset = SCR2;
++        // RA is free here, so we can use it to hold the byte_map_base.
++        const Register byte_map_base = RA;
++
++        assert_different_registers(card_offset, byte_map_base, SCR1);
++
++        f.load_argument(0, card_offset);
++        __ srli_d(card_offset, card_offset, CardTableModRefBS::card_shift);
++        __ load_byte_map_base(byte_map_base);
++        __ ldx_bu(SCR1, byte_map_base, card_offset);
++        __ addi_d(SCR1, SCR1, -(int)G1SATBCardTableModRefBS::g1_young_card_val());
++        __ beqz(SCR1, done);
++
++        assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
++
++        __ membar(Assembler::StoreLoad);
++        __ ldx_bu(SCR1, byte_map_base, card_offset);
++        __ beqz(SCR1, done);
++
++        // storing region crossing non-NULL, card is clean.
++        // dirty card and log.
++        __ stx_b(R0, byte_map_base, card_offset);
++
++        // Convert card offset into an address in card_addr
++        Register card_addr = card_offset;
++        __ add_d(card_addr, byte_map_base, card_addr);
++
++        __ ld_ptr(SCR1, queue_index);
++        __ beqz(SCR1, runtime);
++        __ addi_d(SCR1, SCR1, -wordSize);
++        __ st_ptr(SCR1, queue_index);
++
++        // Reuse RA to hold buffer_addr
++        const Register buffer_addr = RA;
++
++        __ ld_ptr(buffer_addr, buffer);
++        __ stx_d(card_addr, buffer_addr, SCR1);
++        __ b(done);
++
++        __ bind(runtime);
++        __ pushad();
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
++        __ popad();
++        __ bind(done);
++
++      }
++      break;
++#endif
++
++    case predicate_failed_trap_id:
++      {
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
++
++        OopMap* map = save_live_registers(sasm);
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case dtrace_object_alloc_id:
++      {
++        // A0: object
++        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
++        save_live_registers(sasm);
++
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0);
++
++        restore_live_registers(sasm);
++      }
++      break;
++
++    default:
++      {
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
++        __ li(A0, (int)id);
++        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
++        __ should_not_reach_here();
++      }
++      break;
++    }
++  }
++  return oop_maps;
++}
++
++#undef __
++
++const char *Runtime1::pd_name_for_address(address entry) {
++  Unimplemented();
++  return 0;
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp
+new file mode 100644
+index 0000000000..df052a058c
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the client compiler.
++// (see c1_globals.hpp)
++
++#ifndef COMPILER2
++define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, InlineIntrinsics,             true );
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 false);
++define_pd_global(bool, UseOnStackReplacement,        true );
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             1500 );
++
++define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
++define_pd_global(intx, InitialCodeCacheSize,         160*K);
++define_pd_global(intx, ReservedCodeCacheSize,        32*M );
++define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
++define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
++define_pd_global(bool, ProfileInterpreter,           false);
++define_pd_global(intx, CodeCacheExpansionSize,       32*K );
++define_pd_global(uintx, CodeCacheMinBlockLength,     1);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(bool, NeverActAsServerClassMachine, true );
++define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(bool, CICompileOSR,                 true );
++#endif // !COMPILER2
++define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
++
++define_pd_global(bool, LIRFillDelaySlots,            false);
++define_pd_global(bool, OptimizeSinglePrecision,      true );
++define_pd_global(bool, CSEArrayLength,               false);
++define_pd_global(bool, TwoOperandLIRForm,            false );
++
++define_pd_global(intx, SafepointPollOffset,          0  );
++
++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp
+new file mode 100644
+index 0000000000..044b0d2536
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++define_pd_global(bool, TieredCompilation,            true);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp
+new file mode 100644
+index 0000000000..c7bf590b60
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for LoongArch
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp
+new file mode 100644
+index 0000000000..652f6c1092
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp
+new file mode 100644
+index 0000000000..70a47fc772
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// Release the CompiledICHolder* associated with this call site is there is one.
++void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
++  // This call site might have become stale so inspect it carefully.
++  NativeCall* call = nativeCall_at(call_site->addr());
++  if (is_icholder_entry(call->destination())) {
++    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
++    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
++  }
++}
++
++bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
++  // This call site might have become stale so inspect it carefully.
++  NativeCall* call = nativeCall_at(call_site->addr());
++  return is_icholder_entry(call->destination());
++}
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
++  address mark = cbuf.insts_mark(); // get mark within main instrs section
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++
++  // static stub relocation stores the instruction address of the call
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  cbuf.set_insts_mark();
++  __ patchable_jump(__ pc());
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef LOONGARCH64
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == jump->instruction_address() || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef LOONGARCH64
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++  method_holder->set_data(0);
++  jump->set_jump_destination(jump->instruction_address());
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledStaticCall::verify() {
++  // Verify call.
++  NativeCall::verify();
++  if (os::is_MP()) {
++    verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef LOONGARCH64
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp
+new file mode 100644
+index 0000000000..1b40eab95b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp
+@@ -0,0 +1,72 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "copy_linux_loongarch.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_solaris_loongarch
++# include "copy_solaris_loongarch.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_windows_loongarch
++# include "copy_windows_loongarch.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_bsd_loongarch
++# include "copy_bsd_loongarch.inline.hpp"
++#endif
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp
+new file mode 100644
+index 0000000000..45d86f5bfe
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
++
++ protected:
++
++#if 0
++  address generate_asm_interpreter_entry(bool synchronized);
++  address generate_native_entry(bool synchronized);
++  address generate_abstract_entry(void);
++  address generate_math_entry(AbstractInterpreter::MethodKind kind);
++  address generate_empty_entry(void);
++  address generate_accessor_entry(void);
++  void lock_method(void);
++  void generate_stack_overflow_check(void);
++
++  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
++  void generate_counter_overflow(Label* do_continue);
++#endif
++
++  void generate_more_monitors();
++  void generate_deopt_handling();
++  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
++  void generate_compute_interpreter_state(const Register state,
++                                          const Register prev_state,
++                                          const Register sender_sp,
++                                          bool native); // C++ interpreter only
++
++#endif // CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp
+new file mode 100644
+index 0000000000..d6c0df3b77
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/cppInterpreter.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#ifdef SHARK
++#include "shark/shark_globals.hpp"
++#endif
++
++#ifdef CC_INTERP
++
++// Routine exists to make tracebacks look decent in debugger
++// while "shadow" interpreter frames are on stack. It is also
++// used to distinguish interpreter frames.
++
++extern "C" void RecursiveInterpreterActivation(interpreterState istate) {
++  ShouldNotReachHere();
++}
++
++bool CppInterpreter::contains(address pc) {
++  Unimplemented();
++}
++
++#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name))
++#define __ _masm->
++
++Label frame_manager_entry;
++Label fast_accessor_slow_entry_path;  // fast accessor methods need to be able to jmp to unsynchronized
++                                      // c++ interpreter entry point this holds that entry point label.
++
++static address unctrap_frame_manager_entry  = NULL;
++
++static address interpreter_return_address  = NULL;
++static address deopt_frame_manager_return_atos  = NULL;
++static address deopt_frame_manager_return_btos  = NULL;
++static address deopt_frame_manager_return_itos  = NULL;
++static address deopt_frame_manager_return_ltos  = NULL;
++static address deopt_frame_manager_return_ftos  = NULL;
++static address deopt_frame_manager_return_dtos  = NULL;
++static address deopt_frame_manager_return_vtos  = NULL;
++
++const Register prevState = G1_scratch;
++
++void InterpreterGenerator::save_native_result(void) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::restore_native_result(void) {
++  Unimplemented();
++}
++
++// A result handler converts/unboxes a native call result into
++// a java interpreter/compiler result. The current frame is an
++// interpreter frame. The activation frame unwind code must be
++// consistent with that of TemplateTable::_return(...). In the
++// case of native methods, the caller's SP was not modified.
++address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreter::return_entry(TosState state, int length) {
++  Unimplemented();
++}
++
++address CppInterpreter::deopt_entry(TosState state, int length) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_empty_entry(void) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_accessor_entry(void) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_native_entry(bool synchronized) {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state,
++                                                              const Register prev_state,
++                                                              bool native) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::lock_method(void) {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_deopt_handling() {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_more_monitors() {
++  Unimplemented();
++}
++
++
++static address interpreter_frame_manager = NULL;
++
++void CppInterpreterGenerator::adjust_callers_stack(Register args) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_normal_entry(bool synchronized) {
++  Unimplemented();
++}
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++ : CppInterpreterGenerator(code) {
++  Unimplemented();
++}
++
++
++static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
++  Unimplemented();
++}
++
++int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
++  Unimplemented();
++}
++
++void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
++                                           frame* caller,
++                                           frame* current,
++                                           methodOop method,
++                                           intptr_t* locals,
++                                           intptr_t* stack,
++                                           intptr_t* stack_base,
++                                           intptr_t* monitor_base,
++                                           intptr_t* frame_bottom,
++                                           bool is_top_frame
++                                           )
++{
++  Unimplemented();
++}
++
++void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) {
++  Unimplemented();
++}
++
++
++int AbstractInterpreter::layout_activation(methodOop method,
++                                           int tempcount, // Number of slots on java expression stack in use
++                                           int popframe_extra_args,
++                                           int moncount,  // Number of active monitors
++                                           int callee_param_size,
++                                           int callee_locals_size,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame) {
++  Unimplemented();
++}
++
++#endif // CC_INTERP
+diff --git a/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp
+new file mode 100644
+index 0000000000..50de03653b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "code/codeCache.hpp"
++#include "code/nmethod.hpp"
++#include "runtime/frame.hpp"
++#include "runtime/init.hpp"
++#include "runtime/os.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/top.hpp"
++
++#ifndef PRODUCT
++
++void pd_ps(frame f) {
++  intptr_t* sp = f.sp();
++  intptr_t* prev_sp = sp - 1;
++  intptr_t *pc = NULL;
++  intptr_t *next_pc = NULL;
++  int count = 0;
++  tty->print("register window backtrace from %#lx:\n", p2i(sp));
++}
++
++// This function is used to add platform specific info
++// to the error reporting code.
++
++void pd_obfuscate_location(char *buf,int buflen) {}
++
++#endif // PRODUCT
+diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp
+new file mode 100644
+index 0000000000..62478be3dc
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_loongarch.hpp"
++
++// Nothing to do on LoongArch
+diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp
+new file mode 100644
+index 0000000000..598be0ee6f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
++
++// Nothing to do on LoongArch
++
++#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp
+new file mode 100644
+index 0000000000..ccd89e8d6d
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp
+new file mode 100644
+index 0000000000..0f50a5715d
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp
+@@ -0,0 +1,711 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      sender_unextended_sp = sender_sp;
++      // On LA the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    if (sender_blob->is_nmethod()) {
++        nmethod* nm = sender_blob->as_nmethod_or_null();
++        if (nm != NULL) {
++            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
++                return false;
++            }
++        }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_nmethod(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_nmethod()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++  // Note: fp == NULL is not really a prerequisite for this to be safe to
++  // walk for c2. However we've modified the code such that if we get
++  // a failure with fp != NULL that we then try with FP == NULL.
++  // This is basically to mimic what a last_frame would look like if
++  // c2 had generated it.
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
++  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
++  if (sender_nm != NULL) {
++    // If the sender PC is a deoptimization point, get the original PC.
++    if (sender_nm->is_deopt_entry(_pc) ||
++        sender_nm->is_deopt_mh_entry(_pc)) {
++      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++#ifdef ASSERT
++  const bool c1_compiled = _cb->is_compiled_by_c1();
++  bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method();
++  if (c1_compiled && native) {
++    assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size");
++  }
++#endif // ASSERT
++  // On Intel the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++
++bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
++  assert(is_interpreted_frame(), "must be interpreter frame");
++  Method* method = interpreter_frame_method();
++  // When unpacking an optimized frame the frame pointer is
++  // adjusted with:
++  int diff = (method->max_locals() - method->size_of_parameters()) *
++    Interpreter::stackElementWords;
++  printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff));
++  return _fp == (fp - diff);
++}
++
++void frame::pd_gc_epilog() {
++  // nothing done here now
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!m->is_valid_method()) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcx
++
++  intptr_t  bcx    = interpreter_frame_bcx();
++  if (m->validate_bci_from_bcx(bcx) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (cp == NULL || !cp->is_metaspace_object()) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++#endif
+diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp
+new file mode 100644
+index 0000000000..964026e621
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp
+@@ -0,0 +1,229 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
++
++#include "runtime/synchronizer.hpp"
++#include "utilities/top.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ methodOop                ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++#ifndef CC_INTERP
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
++    interpreter_frame_bcx_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++#endif // CC_INTERP
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  =  -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
++  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
++    verify_deopt_original_pc(nm, unextended_sp, true);
++  }
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // return address of param, zero origin index.
++  inline address* native_param_addr(int idx) const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++#ifndef CC_INTERP
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++#endif // CC_INTERP
++
++#ifdef CC_INTERP
++  inline interpreterState get_interpreterState() const;
++#endif // CC_INTERP
++
++#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..3d22339ad7
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp
+@@ -0,0 +1,312 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
++
++#include "code/codeCache.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
++inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
++
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++// return address of param, zero origin index.
++inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
++
++#ifdef CC_INTERP
++
++inline interpreterState frame::get_interpreterState() const {
++  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
++}
++
++inline intptr_t*    frame::sender_sp()        const {
++  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
++  if (is_interpreted_frame()) {
++    assert(false, "should never happen");
++    return get_interpreterState()->sender_sp();
++  } else {
++    return            addr_at(sender_sp_offset);
++  }
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_locals);
++}
++
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return (intptr_t*) &(get_interpreterState()->_bcp);
++}
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_constants);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_method);
++}
++
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return (intptr_t*) &(get_interpreterState()->_mdx);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  assert(is_interpreted_frame(), "wrong frame type");
++  return get_interpreterState()->_stack + 1;
++}
++
++#else // asm interpreter
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++#endif // CC_INTERP
++
++inline int frame::pd_oop_map_offset_adjustment() const {
++  return 0;
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++
++inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
++
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
++}
++
++inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
++}
++
++inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
++}
++
++inline bool frame::volatile_across_calls(Register reg) {
++  return true;
++}
++
++
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp
+new file mode 100644
+index 0000000000..f9f93b9e65
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
++// Size of LoongArch Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp
+new file mode 100644
+index 0000000000..182be608a3
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++#ifdef CORE
++define_pd_global(bool,  UseSSE,      0);
++#endif /* CORE */
++define_pd_global(bool,  ConvertSleepToYield,      true);
++define_pd_global(bool,  ShareVtableStubs,         true);
++define_pd_global(bool,  CountInterpCalls,         true);
++
++define_pd_global(bool, ImplicitNullChecks,          true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++define_pd_global(bool, NeedsDeoptSuspend,           false); // only register window machines need this
++
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++define_pd_global(intx, InlineSmallCode,          2000);
++
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, PreserveFramePointer, false);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(bool, UseLSX, false,                                              \
++                "Use LSX 128-bit vector instructions")                      \
++                                                                            \
++  product(bool, UseLASX, false,                                             \
++                "Use LASX 256-bit vector instructions")                     \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  product(bool, UseUnalignedAccesses, false,                                \
++          "Use unaligned memory accesses in Unsafe")                        \
++                                                                            \
++  product(bool, UseCRC32, false,                                            \
++          "Use CRC32 instructions for CRC32 computation")                   \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp
+new file mode 100644
+index 0000000000..8c78225346
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "oops/oop.inline2.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value,
++                                                address entry_point) {
++  ResourceMark rm;
++  CodeBuffer code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  //  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_li52(T1, (long)cached_value);
++  // TODO: confirm reloc
++  __ jmp(entry_point, relocInfo::runtime_call_type);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp
+new file mode 100644
+index 0000000000..d577e41f59
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ ibar(0);
++  __ ori(V0, RA2, 0);
++  __ jr(RA);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp
+new file mode 100644
+index 0000000000..15e45cb350
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes
++    line_size      = 32,                   // flush instruction affects a dword
++    log2_line_size = 5                     // log2(line_size)
++  };
++};
++
++#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp
+new file mode 100644
+index 0000000000..8c84f21511
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp
+@@ -0,0 +1,1960 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interp_masm_loongarch_64.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiRedefineClassesTrace.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_hu(reg, BCP, offset);
++  } else {
++    ld_bu(reg, BCP, offset);
++    ld_bu(tmp, BCP, offset + 1);
++    bstrins_d(reg, tmp, 15, 8);
++  }
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_wu(reg, BCP, offset);
++  } else {
++    ldr_w(reg, BCP, offset);
++    ldl_w(reg, BCP, offset + 3);
++    lu32i_d(reg, 0);
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  Register thread = T8;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#else
++  move(T8, TREG);
++#endif
++  ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      ld_w(V0, val_addr);
++      break;
++    case ftos:
++      fld_s(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      fld_d(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  li(AT, (int)ilgl);
++  st_w(AT, tos_addr);
++  st_w(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T4;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    li(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  ld_bu(AT, BCP, bcp_offset);
++  ld_bu(reg, BCP, bcp_offset + 1);
++  bstrins_w(reg, AT, 15, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
++    get_4_byte_integer_at_bcp(index, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    slli_w(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    ld_bu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  alsl_d(AT, index, cache, Address::times_ptr - 1);
++  ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srli_d(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  li(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  add_d(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  Register tmp = index;  // reuse
++  shl(tmp, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld_d(result, result, ConstantPool::resolved_references_offset_in_bytes());
++  // JNIHandles::resolve(obj);
++  ld_d(result, result, 0); //? is needed?
++  // Add in the index
++  add_d(result, result, tmp);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T4 and T1 are used as temporary registers.
++  profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T4); // blows T4
++
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  ld_w(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  fld_s(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  fst_s(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  fst_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  st_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    get_thread(temp);
++#else
++    move(temp, TREG);
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    ld_w(AT, temp, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    ld_d(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    bind(run_compiled_code);
++  }
++
++  ld_d(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts. LoongArch64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing LoongArch64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop) {
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    sub_d(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    addi_d(T2, T2, -min_frame_size);
++    bge(T2, R0, L);
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)) {
++    int table_size = (long)Interpreter::dispatch_table(itos) -
++                     (long)Interpreter::dispatch_table(stos);
++    int table_offset = ((int)state - (int)itos) * table_size;
++
++    // S8 points to the starting address of Interpreter::dispatch_table(itos).
++    // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8.
++    if (table_offset != 0) {
++      if (is_simm(table_offset, 12)) {
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ld_d(T3, T3, table_offset);
++      } else {
++        li(T2, table_offset);
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ldx_d(T3, T2, T3);
++      }
++    } else {
++      slli_d(T2, Rnext, LogBytesPerWord);
++      ldx_d(T3, S8, T2);
++    }
++  } else {
++    li(T3, (long)table);
++    slli_d(T2, Rnext, LogBytesPerWord);
++    ldx_d(T3, T2, T3);
++  }
++  jr(T3);
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state) {
++  dispatch_base(state, Interpreter::dispatch_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
++  // load next bytecode
++  ld_bu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  ld_bu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_w(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think LA do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld_d(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++    }
++
++    bind(loop);
++    ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++
++    addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  ld_d(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++}
++
++#endif // C_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld_d(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld_d(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    st_d(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++    sub_d(tmp_reg, tmp_reg, SP);
++    li(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    st_d(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bnez(tmp_reg, slow_case);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beqz(tmp_reg, done);
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into tmp_reg
++    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg
++    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beqz(hdr_reg, done);
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld_d(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld_d(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld_d(T0, T0, in_bytes(Method::method_data_offset()));
++  addi_d(T0, T0, in_bytes(MethodData::data_offset()));
++  add_d(V0, T0, V0);
++  bind(set_mdp);
++  st_d(V0, FP, frame::interpreter_frame_mdx_offset * wordSize);
++  pop2(T0, V0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = T5;
++  Register mdp = T6;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld_d(AT, method, in_bytes(Method::const_offset()));
++  add_d(tmp, tmp, AT);
++  addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  st_d(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    // Decrement the register.
++    ld_d(AT, data);
++    addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment);
++    // If the decrement causes the counter to overflow, stay negative
++    Label L;
++    blt(tmp, R0, L);
++    addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment);
++    bind(L);
++    st_d(tmp, data);
++  } else {
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ld_d(AT, data);
++    // Increment the register.
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    // If the increment causes the counter to overflow, pull back by 1.
++    slt(AT, tmp, R0);
++    sub_d(tmp, tmp, AT);
++    st_d(tmp, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(S0);
++  if (decrement) {
++    // Decrement the register.
++    add_d(AT, mdp_in, reg);
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    ld_d(AT, AT, constant);
++
++    addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment);
++    // If the decrement causes the counter to overflow, stay negative
++    Label L;
++    blt(tmp, R0, L);
++    addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment);
++    bind(L);
++
++    add_d(AT, mdp_in, reg);
++    st_d(tmp, AT, constant);
++  } else {
++    add_d(AT, mdp_in, reg);
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    ld_d(AT, AT, constant);
++
++    // Increment the register.
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    // If the increment causes the counter to overflow, pull back by 1.
++    slt(AT, tmp, R0);
++    sub_d(tmp, tmp, AT);
++
++    add_d(AT, mdp_in, reg);
++    st_d(tmp, AT, constant);
++  }
++  pop(S0);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  ld_w(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm(header_bits, 12)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    li(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  st_w(AT, Address(mdp_in, header_offset));
++}
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld_d(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld_d(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, mdp_in, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add_d(AT, reg, mdp_in);
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, AT, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm(constant, 12)) {
++    addi_d(mdp_in, mdp_in, constant);
++  } else {
++    li(AT, constant);
++    add_d(mdp_in, mdp_in, AT);
++  }
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    push(T8);
++    // T8 is used as a temporary register.
++    addi_d(T8, bumped_count, DataLayout::counter_increment);
++    slt(AT, T8, R0);
++    sub_d(bumped_count, T8, AT);
++    pop(T8);
++    st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bnez(receiver, not_null);
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      b(skip_receiver_profile);
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  li(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      b(profile_continue);
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    li(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mul_d(index, index, reg2);
++    addi_d(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_d(T4, T4, in_bytes(Method::const_offset()));
++  ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addi_d(AT, T4, -T_INT);
++  beq(AT, R0, done);
++
++  // mask integer result to narrower return type.
++  addi_d(AT, T4, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++
++  bind(notBool);
++  addi_d(AT, T4, -T_BYTE);
++  bne(AT, R0, notByte);
++  ext_w_b(result, result);
++  beq(R0, R0, done);
++
++  bind(notByte);
++  addi_d(AT, T4, -T_CHAR);
++  bne(AT, R0, notChar);
++  bstrpick_d(result, result, 15, 0);
++  beq(R0, R0, done);
++
++  bind(notChar);
++  ext_w_h(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1);
++  }
++
++  bnez(obj, update);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++
++  b(next);
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bnez(AT, next);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  beqz(AT, none);
++
++  addi_d(AT, AT, -(TypeEntries::null_seen));
++  beqz(AT, none);
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++  b(next);
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    st_d(obj, mdo_addr);
++  } else {
++    st_d(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm(off_to_args, 12)) {
++        addi_d(mdp, mdp, off_to_args);
++      } else {
++        li(AT, off_to_args);
++        add_d(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) {
++            addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            sub_w(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          blt(tmp, AT, done);
++        }
++        ld_d(tmp, callee, in_bytes(Method::const_offset()));
++
++        ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        sub_d(tmp, tmp, AT);
++
++        addi_w(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld_d(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm(to_add, 12)) {
++          addi_d(mdp, mdp, to_add);
++        } else {
++          li(AT, to_add);
++          add_d(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) {
++          addi_w(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          li(AT, tmp_arg_counts);
++          sub_w(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        slli_w(tmp, tmp, exact_log2(DataLayout::cell_size));
++        add_d(mdp, mdp, tmp);
++      }
++      st_d(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      ld_b(tmp, _bcp_register, 0);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beqz(AT, do_profile);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beqz(AT, do_profile);
++
++      get_method(tmp);
++      ld_b(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    add_d(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    blt(tmp1, R0, profile_continue);
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add_d(mdp, mdp, tmp1);
++    ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    alsl_d(AT, tmp1, mdp, per_arg_scale - 1);
++    ld_d(tmp2, AT, off_base);
++
++    sub_d(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    slli_d(AT, tmp2, Interpreter::stackElementScale());
++    ldx_d(tmp2, AT, _locals_register);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    blt(R0, tmp1, loop);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++    get_thread(T8);
++#else
++    move(T8, TREG);
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  T8,
++                                  //Rmethod);
++                                  S3);
++  }
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++    get_thread(T8);
++#else
++    move(T8, TREG);
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // For c++ interpreter the result is always stored at a known location in the frame
++    // template interpreter will leave it on the top of the stack.
++    NOT_CC_INTERP(push(state);)
++    ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    NOT_CC_INTERP(pop(state));
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    NOT_CC_INTERP(push(state));
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 T8, S3);
++    NOT_CC_INTERP(pop(state));
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    ld_w(scratch, counter_addr);
++  }
++  addi_w(scratch, scratch, increment);
++  st_w(scratch, counter_addr);
++
++  li(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp
+new file mode 100644
+index 0000000000..9113da54ff
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp
+@@ -0,0 +1,269 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true);
++#endif // CC_INTERP
++
++ public:
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, int offset);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    st_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld_d(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld_d(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld_d(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index);
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp
+new file mode 100644
+index 0000000000..7f253b2d51
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
++
++
++// Generation of Interpreter
++//
++  friend class AbstractInterpreterGenerator;
++
++ private:
++
++  address generate_normal_entry(bool synchronized);
++  address generate_native_entry(bool synchronized);
++  address generate_abstract_entry(void);
++  address generate_math_entry(AbstractInterpreter::MethodKind kind);
++  address generate_empty_entry(void);
++  address generate_accessor_entry(void);
++  address generate_Reference_get_entry();
++  address generate_CRC32_update_entry();
++  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
++  void lock_method(void);
++  void generate_stack_overflow_check(void);
++
++  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
++  void generate_counter_overflow(Label* do_continue);
++
++#endif // CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp
+new file mode 100644
+index 0000000000..052eb997e4
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_fp_args;
++  unsigned int _num_int_args;
++  int _stack_offset;
++
++  void move(int from_offset, int to_offset);
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++    _masm = new MacroAssembler(buffer);
++    _num_int_args = (method->is_static() ? 1 : 0);
++    _num_fp_args = 0;
++    _stack_offset = 0;
++  }
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp
+new file mode 100644
+index 0000000000..0c9df4aa71
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp
+@@ -0,0 +1,274 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of SignatureHandlerGenerator
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ st_d(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ maskeqz(temp(), temp(), AT);
++  __ st_w(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    Register reg = as_Register(++_num_int_args + RA0->encoding());
++    if (_num_int_args == 1) {
++      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++      __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++    } else {
++      __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ maskeqz(reg, AT, reg);
++    }
++  } else {
++    __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ maskeqz(temp(), AT, temp());
++    __ st_d(temp(), to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_int_args;
++  unsigned int _num_fp_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_int_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _int_args = to - (method->is_static() ? 15 : 16);
++    _fp_args =  to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_int_args = (method->is_static() ? 1 : 0);
++    _num_fp_args = 0;
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp
+new file mode 100644
+index 0000000000..c83afbdaf0
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
++
++ public:
++
++  // Sentinel placed in the code for interpreter returns so
++  // that i2c adapters and osr code can recognize an interpreter
++  // return address and convert the return to a specialized
++  // block of code to handle compiedl return values and cleaning
++  // the fpu stack.
++  static const int return_sentinel;
++
++  static Address::ScaleFactor stackElementScale() {
++    return Address::times_8;
++  }
++
++  // Offset from sp (which points to the last stack element)
++  static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
++  // Size of interpreter code.  Increase if too small.  Interpreter will
++  // fail with a guarantee ("not enough space for interpreter generation");
++  // if too small.
++  // Run with +PrintInterpreterSize to get the VM to print out the size.
++  // Max size with JVMTI and TaggedStackInterpreter
++  const static int InterpreterCodeSize = 168 * 1024;
++#endif // CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp
+new file mode 100644
+index 0000000000..5a4f102cfd
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp
+@@ -0,0 +1,277 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++address AbstractInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ addi_d(SP, SP, -18 * wordSize);
++  __ st_d(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     18 stack arg0   <--- old sp
++  //     17 floatReg arg7
++  //        ...
++  //     10 floatReg arg0
++  //      9 float/double identifiers
++  //      8 IntReg arg7
++  //        ...
++  //      2 IntReg arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use A3 as temp
++  __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  for (int i= 0; i < Argument::n_float_register_parameters; i++) {
++    FloatRegister floatreg = as_FloatRegister(i + FA0->encoding());
++    Label isdouble, done;
++
++    __ andi(AT, A3, 1 << i);
++    __ bnez(AT, isdouble);
++    __ fld_s(floatreg, SP, (10 + i) * wordSize);
++    __ b(done);
++    __ bind(isdouble);
++    __ fld_d(floatreg, SP, (10 + i) * wordSize);
++    __ bind(done);
++  }
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for (int i= 1; i < Argument::n_register_parameters; i++) {
++    Register reg = as_Register(i + A0->encoding());
++
++    __ ld_d(reg, SP, (1 + i) * wordSize);
++  }
++
++  // A0/V0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  __ ld_d(RA, SP, 0);
++  __ addi_d(SP, SP, 18 * wordSize);
++  __ jr(RA);
++  return entry;
++}
++
++
++//
++// Various method entries
++//
++
++address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++
++  // Rmethod: methodOop
++  // V0: scratrch
++  // Rsender: send 's sp
++
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  address entry_point = __ pc();
++  //guarantee(0, "LA not implemented yet");
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack: [ lo(arg) ] <-- sp
++  //        [ hi(arg) ]
++  {
++    // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
++    //       java methods.  Interpreter::method_kind(...) will select
++    //       this entry point for the corresponding methods in JDK 1.3.
++    __ fld_d(FA0, SP, 0 * wordSize);
++    __ fld_d(FA1, SP, 1 * wordSize);
++    __ push2(RA, FP);
++    __ addi_d(FP, SP, 2 * wordSize);
++
++    // [ fp     ] <-- sp
++    // [ ra     ]
++    // [ lo     ] <-- fp
++    // [ hi     ]
++    //FIXME, need consider this
++    switch (kind) {
++      case Interpreter::java_lang_math_sin :
++        __ trigfunc('s');
++        break;
++      case Interpreter::java_lang_math_cos :
++        __ trigfunc('c');
++        break;
++      case Interpreter::java_lang_math_tan :
++        __ trigfunc('t');
++        break;
++      case Interpreter::java_lang_math_sqrt:
++        __ fsqrt_d(F0, FA0);
++        break;
++      case Interpreter::java_lang_math_abs:
++        __ fabs_d(F0, FA0);
++        break;
++      case Interpreter::java_lang_math_log:
++        // Store to stack to convert 80bit precision back to 64bits
++        break;
++      case Interpreter::java_lang_math_log10:
++        // Store to stack to convert 80bit precision back to 64bits
++        break;
++      case Interpreter::java_lang_math_pow:
++        break;
++      case Interpreter::java_lang_math_exp:
++        break;
++
++      default                              :
++        ShouldNotReachHere();
++    }
++
++    // must maintain return value in F0:F1
++    __ ld_d(RA, FP, (-1) * wordSize);
++    //FIXME
++    __ ld_d(FP, FP, (-2) * wordSize);
++    __ move(SP, Rsender);
++    __ jr(RA);
++  }
++  return entry_point;
++}
++
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address InterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++// Empty method, generate a very fast return.
++
++address InterpreterGenerator::generate_empty_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender: sender 's sp, must set sp to this value on return, on LoongArch, now use T0, as it right?
++  if (!UseFastEmptyMethods) return NULL;
++
++  address entry_point = __ pc();
++  //TODO: LA
++  //guarantee(0, "LA not implemented yet");
++  Label slow_path;
++  __ li(RT0, SafepointSynchronize::address_of_state());
++  __ ld_w(AT, RT0, 0);
++  __ li(RT0, (SafepointSynchronize::_not_synchronized));
++  __ bne(AT, RT0,slow_path);
++  __ move(SP, Rsender);
++  __ jr(RA);
++  __ bind(slow_path);
++  (void) generate_normal_entry(false);
++  return entry_point;
++
++}
++
++void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
++
++  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
++  // the days we had adapter frames. When we deoptimize a situation where a
++  // compiled caller calls a compiled caller will have registers it expects
++  // to survive the call to the callee. If we deoptimize the callee the only
++  // way we can restore these registers is to have the oldest interpreter
++  // frame that we create restore these values. That is what this routine
++  // will accomplish.
++
++  // At the moment we have modified c2 to not have any callee save registers
++  // so this problem does not exist and this routine is just a place holder.
++
++  assert(f->is_interpreted_frame(), "must be interpreted");
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp
+new file mode 100644
+index 0000000000..de97de5804
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp
+new file mode 100644
+index 0000000000..5b52e54e08
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp
+@@ -0,0 +1,169 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ li(AT, (long)counter_addr);
++  __ ld_w(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++
++  __ move(T0, A1);
++  __ clear_jweak_tag(T0);
++
++  __ ld_d(T0, T0, 0);              // unbox, *obj
++  __ srli_d(T2, A2, 2);                 // offset
++  __ add_d(T0, T0, T2);
++
++  __ li(AT, (long)counter_addr);
++  __ ld_w(AT, AT, 0);
++  __ bne(T1, AT, slow);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ ld_bu (V0, T0, 0); break;
++    case T_BYTE:    __ ld_b  (V0, T0, 0); break;
++    case T_CHAR:    __ ld_hu (V0, T0, 0); break;
++    case T_SHORT:   __ ld_h  (V0, T0, 0); break;
++    case T_INT:     __ ld_w  (V0, T0, 0); break;
++    case T_LONG:    __ ld_d  (V0, T0, 0); break;
++    case T_FLOAT:   __ fld_s (F0, T0, 0); break;
++    case T_DOUBLE:  __ fld_d (F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ jr(RA);
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++
++  __ flush ();
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp
+new file mode 100644
+index 0000000000..554ff216ac
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++#include "prims/jni.h"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/jni_loongarch.h b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h
+new file mode 100644
+index 0000000000..eb25cbc354
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.  Oracle designates this
++ * particular file as subject to the "Classpath" exception as provided
++ * by Oracle in the LICENSE file that accompanied this code.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef _JAVASOFT_JNI_MD_H_
++#define _JAVASOFT_JNI_MD_H_
++
++// Note: please do not change these without also changing jni_md.h in the JDK
++// repository
++#ifndef __has_attribute
++  #define __has_attribute(x) 0
++#endif
++#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
++  #define JNIEXPORT     __attribute__((visibility("default")))
++  #define JNIIMPORT     __attribute__((visibility("default")))
++#else
++  #define JNIEXPORT
++  #define JNIIMPORT
++#endif
++
++#define JNICALL
++
++typedef int jint;
++
++  typedef long jlong;
++
++typedef signed char jbyte;
++
++#endif
+diff --git a/hotspot/src/cpu/loongarch/vm/loongarch.ad b/hotspot/src/cpu/loongarch/vm/loongarch.ad
+new file mode 100644
+index 0000000000..48c44779e7
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/loongarch.ad
+@@ -0,0 +1,24 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
+diff --git a/hotspot/src/cpu/loongarch/vm/loongarch_64.ad b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad
+new file mode 100644
+index 0000000000..6db00bf642
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad
+@@ -0,0 +1,12862 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0    ( NS,  NS,  Op_RegI,   0, VMRegImpl::Bad());
++  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
++  // TODO: LA
++  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
++  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI,   3, SP->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,   4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,   4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,   5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,   5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,   6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,   6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,   7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,   7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,   8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,   8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,   9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,   9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def T4    (SOC, SOC,  Op_RegI,  16, T4->as_VMReg());
++  reg_def T4_H  (SOC, SOC,  Op_RegI,  16, T4->as_VMReg()->next());
++  reg_def T5    (SOC, SOC,  Op_RegI,  17, T5->as_VMReg());
++  reg_def T5_H  (SOC, SOC,  Op_RegI,  17, T5->as_VMReg()->next());
++  reg_def T6    (SOC, SOC,  Op_RegI,  18, T6->as_VMReg());
++  reg_def T6_H  (SOC, SOC,  Op_RegI,  18, T6->as_VMReg()->next());
++  reg_def T7    (SOC, SOC,  Op_RegI,  19, T7->as_VMReg());
++  reg_def T7_H  (SOC, SOC,  Op_RegI,  19, T7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  20, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  20, T8->as_VMReg()->next());
++  reg_def RX    ( NS,  NS,  Op_RegI,  21, RX->as_VMReg());
++  reg_def RX_H  ( NS,  NS,  Op_RegI,  21, RX->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI,  22, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI,  22, FP->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  23, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  23, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  24, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  24, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  25, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  25, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  26, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  26, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  27, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  27, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  28, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  28, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  29, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  29, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  30, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  30, S7->as_VMReg()->next());
++  // TODO: LA
++  reg_def S8    ( NS,  NS,  Op_RegI,  31, S8->as_VMReg());
++  reg_def S8_H  ( NS,  NS,  Op_RegI,  31, S8->as_VMReg()->next());
++
++
++// Floating/Vector registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
++reg_def F0_J        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
++reg_def F0_K        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
++reg_def F0_L        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
++reg_def F0_M        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
++reg_def F0_N        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
++reg_def F0_O        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
++
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
++reg_def F1_J        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
++reg_def F1_K        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
++reg_def F1_L        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
++reg_def F1_M        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
++reg_def F1_N        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
++reg_def F1_O        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
++
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
++reg_def F2_J        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
++reg_def F2_K        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
++reg_def F2_L        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
++reg_def F2_M        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
++reg_def F2_N        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
++reg_def F2_O        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
++
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
++reg_def F3_J        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
++reg_def F3_K        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
++reg_def F3_L        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
++reg_def F3_M        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
++reg_def F3_N        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
++reg_def F3_O        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
++
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
++reg_def F4_J        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
++reg_def F4_K        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
++reg_def F4_L        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
++reg_def F4_M        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
++reg_def F4_N        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
++reg_def F4_O        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
++
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
++reg_def F5_J        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
++reg_def F5_K        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
++reg_def F5_L        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
++reg_def F5_M        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
++reg_def F5_N        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
++reg_def F5_O        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
++
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
++reg_def F6_J        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
++reg_def F6_K        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
++reg_def F6_L        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
++reg_def F6_M        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
++reg_def F6_N        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
++reg_def F6_O        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
++
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
++reg_def F7_J        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
++reg_def F7_K        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
++reg_def F7_L        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
++reg_def F7_M        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
++reg_def F7_N        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
++reg_def F7_O        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
++
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
++reg_def F8_J        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
++reg_def F8_K        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
++reg_def F8_L        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
++reg_def F8_M        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
++reg_def F8_N        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
++reg_def F8_O        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
++
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
++reg_def F9_J        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
++reg_def F9_K        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
++reg_def F9_L        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
++reg_def F9_M        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
++reg_def F9_N        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
++reg_def F9_O        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
++
++reg_def F10          ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
++reg_def F10_H        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
++reg_def F10_J        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
++reg_def F10_K        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
++reg_def F10_L        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
++reg_def F10_M        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
++reg_def F10_N        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
++reg_def F10_O        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
++
++reg_def F11          ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
++reg_def F11_H        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
++reg_def F11_J        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
++reg_def F11_K        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
++reg_def F11_L        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
++reg_def F11_M        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
++reg_def F11_N        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
++reg_def F11_O        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
++
++reg_def F12          ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
++reg_def F12_H        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
++reg_def F12_J        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
++reg_def F12_K        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
++reg_def F12_L        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
++reg_def F12_M        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
++reg_def F12_N        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
++reg_def F12_O        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
++
++reg_def F13          ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
++reg_def F13_H        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
++reg_def F13_J        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
++reg_def F13_K        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
++reg_def F13_L        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
++reg_def F13_M        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
++reg_def F13_N        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
++reg_def F13_O        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
++
++reg_def F14          ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
++reg_def F14_H        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
++reg_def F14_J        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
++reg_def F14_K        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
++reg_def F14_L        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
++reg_def F14_M        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
++reg_def F14_N        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
++reg_def F14_O        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
++
++reg_def F15          ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
++reg_def F15_H        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
++reg_def F15_J        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
++reg_def F15_K        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
++reg_def F15_L        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
++reg_def F15_M        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
++reg_def F15_N        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
++reg_def F15_O        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
++
++reg_def F16          ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
++reg_def F16_H        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
++reg_def F16_J        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
++reg_def F16_K        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
++reg_def F16_L        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
++reg_def F16_M        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
++reg_def F16_N        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
++reg_def F16_O        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
++
++reg_def F17          ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
++reg_def F17_H        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
++reg_def F17_J        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
++reg_def F17_K        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
++reg_def F17_L        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
++reg_def F17_M        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
++reg_def F17_N        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
++reg_def F17_O        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
++
++reg_def F18          ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
++reg_def F18_H        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
++reg_def F18_J        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
++reg_def F18_K        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
++reg_def F18_L        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
++reg_def F18_M        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
++reg_def F18_N        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
++reg_def F18_O        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
++
++reg_def F19          ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
++reg_def F19_H        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
++reg_def F19_J        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
++reg_def F19_K        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
++reg_def F19_L        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
++reg_def F19_M        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
++reg_def F19_N        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
++reg_def F19_O        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
++
++reg_def F20          ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
++reg_def F20_H        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
++reg_def F20_J        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
++reg_def F20_K        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
++reg_def F20_L        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
++reg_def F20_M        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
++reg_def F20_N        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
++reg_def F20_O        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
++
++reg_def F21          ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
++reg_def F21_H        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
++reg_def F21_J        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
++reg_def F21_K        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
++reg_def F21_L        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
++reg_def F21_M        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
++reg_def F21_N        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
++reg_def F21_O        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
++
++reg_def F22          ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
++reg_def F22_H        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
++reg_def F22_J        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
++reg_def F22_K        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
++reg_def F22_L        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
++reg_def F22_M        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
++reg_def F22_N        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
++reg_def F22_O        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
++
++reg_def F23          ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
++reg_def F23_H        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
++reg_def F23_J        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
++reg_def F23_K        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
++reg_def F23_L        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
++reg_def F23_M        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
++reg_def F23_N        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
++reg_def F23_O        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
++
++reg_def F24          ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()          );
++reg_def F24_H        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()  );
++reg_def F24_J        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
++reg_def F24_K        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
++reg_def F24_L        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
++reg_def F24_M        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
++reg_def F24_N        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
++reg_def F24_O        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
++
++reg_def F25          ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()          );
++reg_def F25_H        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()  );
++reg_def F25_J        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
++reg_def F25_K        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
++reg_def F25_L        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
++reg_def F25_M        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
++reg_def F25_N        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
++reg_def F25_O        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
++
++reg_def F26          ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()          );
++reg_def F26_H        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()  );
++reg_def F26_J        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
++reg_def F26_K        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
++reg_def F26_L        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
++reg_def F26_M        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
++reg_def F26_N        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
++reg_def F26_O        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
++
++reg_def F27          ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()          );
++reg_def F27_H        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()  );
++reg_def F27_J        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
++reg_def F27_K        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
++reg_def F27_L        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
++reg_def F27_M        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
++reg_def F27_N        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
++reg_def F27_O        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
++
++reg_def F28          ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()          );
++reg_def F28_H        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()  );
++reg_def F28_J        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
++reg_def F28_K        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
++reg_def F28_L        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
++reg_def F28_M        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
++reg_def F28_N        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
++reg_def F28_O        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
++
++reg_def F29          ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()          );
++reg_def F29_H        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()  );
++reg_def F29_J        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
++reg_def F29_K        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
++reg_def F29_L        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
++reg_def F29_M        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
++reg_def F29_N        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
++reg_def F29_O        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
++
++reg_def F30          ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()          );
++reg_def F30_H        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()  );
++reg_def F30_J        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
++reg_def F30_K        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
++reg_def F30_L        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
++reg_def F30_M        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
++reg_def F30_N        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
++reg_def F30_O        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
++
++reg_def F31          ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()          );
++reg_def F31_H        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()  );
++reg_def F31_J        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
++reg_def F31_K        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
++reg_def F31_L        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
++reg_def F31_M        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
++reg_def F31_N        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
++reg_def F31_O        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T4, T4_H,
++                     T1, T1_H, // inline_cache_reg
++                     T6, T6_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     T5, T5_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     S8, S8_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++// F23 is scratch reg
++alloc_class chunk1(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                     F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                     F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                     F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                     F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                     F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                     F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                     F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                     F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                     F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                     F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                     F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                     F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                     F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                     F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                     F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                     F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                     F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                     F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                     F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                     F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                     F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                     F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                     F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                     F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                     F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                     F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T4 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t4_reg( T4 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++// TODO: LA
++//reg_class v0_reg( A0 );
++//reg_class v1_reg( A1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( A0, A0_H );
++reg_class v1_long_reg( A1, A1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t4_long_reg( T4, T4_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_Ax_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F30, F30_H,
++                   F31, F31_H);
++
++// Class for all 128bit vector registers
++reg_class vectorx_reg(  F0, F0_H, F0_J, F0_K,
++                        F1, F1_H, F1_J, F1_K,
++                        F2, F2_H, F2_J, F2_K,
++                        F3, F3_H, F3_J, F3_K,
++                        F4, F4_H, F4_J, F4_K,
++                        F5, F5_H, F5_J, F5_K,
++                        F6, F6_H, F6_J, F6_K,
++                        F7, F7_H, F7_J, F7_K,
++                        F8, F8_H, F8_J, F8_K,
++                        F9, F9_H, F9_J, F9_K,
++                        F10, F10_H, F10_J, F10_K,
++                        F11, F11_H, F11_J, F11_K,
++                        F12, F12_H, F12_J, F12_K,
++                        F13, F13_H, F13_J, F13_K,
++                        F14, F14_H, F14_J, F14_K,
++                        F15, F15_H, F15_J, F15_K,
++                        F16, F16_H, F16_J, F16_K,
++                        F17, F17_H, F17_J, F17_K,
++                        F18, F18_H, F18_J, F18_K,
++                        F19, F19_H, F19_J, F19_K,
++                        F20, F20_H, F20_J, F20_K,
++                        F21, F21_H, F21_J, F21_K,
++                        F22, F22_H, F22_J, F22_K,
++                        F24, F24_H, F24_J, F24_K,
++                        F25, F25_H, F25_J, F25_K,
++                        F26, F26_H, F26_J, F26_K,
++                        F27, F27_H, F27_J, F27_K,
++                        F28, F28_H, F28_J, F28_K,
++                        F29, F29_H, F29_J, F29_K,
++                        F30, F30_H, F30_J, F30_K,
++                        F31, F31_H, F31_J, F31_K);
++
++// Class for all 256bit vector registers
++reg_class vectory_reg(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                        F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                        F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                        F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                        F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                        F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                        F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                        F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                        F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                        F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                        F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                        F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                        F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                        F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                        F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                        F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                        F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                        F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                        F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                        F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                        F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                        F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                        F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                        F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                        F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                        F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                        F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                        F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                        F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                        F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                        F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++
++// TODO: LA
++//reg_class flt_arg0( F0 );
++//reg_class dbl_arg0( F0, F0_H );
++//reg_class dbl_arg1( F1, F1_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeFarCall::instruction_size;
++    return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeFarCall::instruction_size;
++    return round_to(size, 16);
++  }
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++#define V0_num    A0_num
++#define V0_H_num  A0_H_num
++
++#define __ _masm.
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  return true;  // Per default match rules are supported.
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  const int safety_zone = 3 * BytesPerInstWord;
++  int offs = offset - br_size + 4;
++  // To be conservative on LoongArch
++  // branch node should be end with:
++  //   branch inst
++  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
++  switch (rule) {
++    case jmpDir_long_rule:
++    case jmpDir_short_rule:
++      return Assembler::is_simm(offs, 26);
++    case jmpCon_flags_long_rule:
++    case jmpCon_flags_short_rule:
++    case branchConP_0_long_rule:
++    case branchConP_0_short_rule:
++    case branchConN2P_0_long_rule:
++    case branchConN2P_0_short_rule:
++    case cmpN_null_branch_long_rule:
++    case cmpN_null_branch_short_rule:
++    case branchConIU_reg_immI_0_long_rule:
++    case branchConIU_reg_immI_0_short_rule:
++    case branchConF_reg_reg_long_rule:
++    case branchConF_reg_reg_short_rule:
++    case branchConD_reg_reg_long_rule:
++    case branchConD_reg_reg_short_rule:
++      return Assembler::is_simm(offs, 21);
++    default:
++      return Assembler::is_simm(offs, 16);
++  }
++  return false;
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?  True for Intel but false for most RISCs
++const bool Matcher::clone_shift_expressions = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return (int)MaxVectorSize;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  int max_size = max_vector_size(bt);
++  int size     = 0;
++
++  if (UseLSX) size = 16;
++  size = size / type2aelembytes(bt);
++  return MIN2(size,max_size);
++}
++
++// LoongArch supports misaligned vectors store/load?
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F0_num || reg == F0_H_num
++       || reg == F1_num || reg == F1_H_num
++       || reg == F2_num || reg == F2_H_num
++       || reg == F3_num || reg == F3_H_num
++       || reg == F4_num || reg == F4_H_num
++       || reg == F5_num || reg == F5_H_num
++       || reg == F6_num || reg == F6_H_num
++       || reg == F7_num || reg == F7_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// LoongArch doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Threshold size for cleararray.
++const int Matcher::init_array_short_size = 8 * BytesPerLong;
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// Since LA doesn't have absolute addressing, it needs.
++bool SafePointNode::needs_polling_address_input() {
++  return false;
++}
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  // bl
++  return NativeCall::instruction_size;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  // lu12i_w IC_Klass,
++  // ori IC_Klass,
++  // lu32i_d IC_Klass
++  // lu52i_d IC_Klass
++
++  // bl
++  return NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++// Helper methods for MachSpillCopyNode::implementation().
++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    switch (ireg) {
++      case Op_VecX:
++        __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      case Op_VecY:
++        __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      case Op_VecY:
++        st->print("xvori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
++                                      int dst_offset, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    switch (ireg) {
++      case Op_VecX:
++        __ vld(F23, SP, src_offset);
++        __ vst(F23, SP, dst_offset);
++        break;
++      case Op_VecY:
++        __ xvld(F23, SP, src_offset);
++        __ xvst(F23, SP, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vld f23, %d(sp)\n\t"
++                  "vst f23, %d(sp)\t# 128-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      case Op_VecY:
++        st->print("xvld f23, %d(sp)\n\t"
++                  "xvst f23, %d(sp)\t# 256-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 8;
++  return size;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
++    if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
++      // mem -> mem
++      int src_offset = ra_->reg2offset(src_first);
++      int dst_offset = ra_->reg2offset(dst_first);
++      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_float) {
++      vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
++      int stack_offset = ra_->reg2offset(dst_first);
++      vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
++    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) {
++      int stack_offset = ra_->reg2offset(src_first);
++      vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
++    } else {
++      ShouldNotReachHere();
++    }
++    return 0;
++  }
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(AT, Address(SP, src_offset));
++          __ st_d(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld_d    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "st_d    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_w(AT, Address(SP, src_offset));
++          __ st_w(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld_w    AT, [SP + #%d] spill 2\n\t"
++                    "st_w    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld_d    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else {
++            if (Assembler::is_simm(offset, 12)) {
++              __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++            } else {
++              __ li(AT, offset);
++              __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT);
++            }
++          }
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          if (this->ideal_reg() == Op_RegI)
++            st->print("ld_w    %s, [SP + #%d]\t# spill 4",
++                      Matcher::regName[dst_first],
++                      offset);
++          else
++            st->print("ld_wu    %s, [SP + #%d]\t# spill 5",
++                      Matcher::regName[dst_first],
++                      offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fld_d  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fld_s   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++    }
++    return 0;
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("st_d    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("st_w    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_d   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_w   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_d   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_s   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_d   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_s   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("addi_d   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  st->print_cr("ld_d    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++  st->print("\t");
++  st->print_cr("ld_d    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    st->print_cr("Poll Safepoint # MachEpilogNode");
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  __ ld_d(RA, Address(SP, framesize - wordSize));
++  __ ld_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize, 12)) {
++    __ addi_d(SP, SP, framesize);
++  } else {
++    __ li(AT, framesize);
++    __ add_d(SP, SP, AT);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    __ li(AT, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_return_type);
++    __ ld_w(AT, AT, 0);
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI_D %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++
++   if (Assembler::is_simm(offset, 12))
++     return 4;
++   else
++     return 3 * 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  if (Assembler::is_simm(offset, 12)) {
++    __ addi_d(as_Register(reg), SP, offset);
++  } else {
++    __ lu12i_w(AT, Assembler::split_low20(offset >> 12));
++    __ ori(AT, AT, Assembler::split_low12(offset));
++    __ add_d(as_Register(reg), SP, AT);
++  }
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  // pcaddu18i
++  // jirl
++  return NativeFarCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T4, T0)");
++  st->print_cr("\tbeq(T4, iCache, L)");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T4, receiver);
++  __ beq(T4, iCache, L);
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = cbuf.consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS,
++           "insts must be immediately follow consts");
++    // Materialize the constant table base.
++    address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset());
++    jint offs = (baseaddr - __ pc()) >> 2;
++    guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset");
++    __ pcaddi(Rtoc, offs);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // pcaddi
++  return 1 * BytesPerInstWord;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("pcaddi    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  st->print("st_d       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++  st->print("st_d       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  st->print("addi_d   FP, SP, -%d \n\t", wordSize*2);
++  st->print("addi_d   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++#ifdef ASSERT
++  address start = __ pc();
++#endif
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  if (Assembler::is_simm(-framesize, 12)) {
++    __ addi_d(SP, SP, -framesize);
++  } else {
++    __ li(AT, -framesize);
++    __ add_d(SP, SP, AT);
++  }
++  __ st_d(RA, Address(SP, framesize - wordSize));
++  __ st_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize - wordSize * 2, 12)) {
++    __ addi_d(FP, SP, framesize - wordSize * 2);
++  } else {
++    __ li(AT, framesize - wordSize * 2);
++    __ add_d(FP, SP, AT);
++  }
++
++  assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry");
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else if(_optimized_virtual) {
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf);
++    } else {
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf);
++    }
++
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++
++    if( _method ) {  // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    address call = __ ic_call((address)$meth$$method);
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T4;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++
++    __ bind(miss);
++    __ li(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------LOONGARCH FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++
++operand vecX() %{
++  constraint(ALLOC_IN_RC(vectorx_reg));
++  match(VecX);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand vecY() %{
++  constraint(ALLOC_IN_RC(vectory_reg));
++  match(VecY);
++
++   format %{ %}
++   interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU1() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 1));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU2() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU3() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 7));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU4() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 15));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU5() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 31));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU6() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 63));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU8() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI10() %{
++  predicate((-512 <= n->get_int()) && (n->get_int() <= 511));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI12() %{
++  predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_2() %{
++  predicate(n->get_int() == 2);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M2047_2048() %{
++  predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_4095() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 4095);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1_4() %{
++  predicate(1 <= n->get_int() && (n->get_int() <= 4));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M128_255() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immLU5() %{
++  predicate((0 <= n->get_long()) && (n->get_long() <= 31));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL10() %{
++  predicate((-512 <= n->get_long()) && (n->get_long() <= 511));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL12() %{
++  predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32()
++%{
++  predicate(n->get_long() == (int)n->get_long());
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M2047_2048() %{
++  predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_4095() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 4095);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr());
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT4RegI() %{
++  constraint(ALLOC_IN_RC(t4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T4" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegP() %{
++  constraint(ALLOC_IN_RC(no_Ax_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegI2L(mRegI reg) %{
++  match(ConvI2L reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12(mRegP reg, immL12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset12I2L(mRegP reg, immI12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (ConvI2L off));
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indIndexI2L(mRegP reg, mRegI ireg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg (ConvI2L ireg));
++  op_cost(10);
++  format %{ "[$reg + $ireg] @ indIndexI2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($ireg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12Narrow(mRegN reg, immL12 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L,
++                indirectNarrow, indOffset12Narrow);
++opclass memory_loadRange(indOffset12, indirect);
++
++opclass mRegLorI2L(mRegI2L, mRegL);
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_bu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++    ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "ld_h  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_hu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld_d    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld_d    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "st_d    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "st_d    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# compressed ptr @ loadN" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# @ loadN2P" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ li(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL" %}
++  ins_encode %{
++    __ li($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory_loadRange mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "st_d    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_0" %}
++    ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "st_b    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "st_b    $src, $mem #@storeB_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "st_w    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "st_w    $mem, $src #@storeI_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ li(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    if ($src->constant_reloc() == relocInfo::metadata_type) {
++      __ mov_metadata($dst$$Register, (Metadata*)$src$$constant);
++    } else {
++      __ li($dst$$Register, $src$$constant);
++    }
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ li(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ add_d(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in LA");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_loongarch.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T4 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T4
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(op2, op1, *L, true /* signed */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(op2, op1, *L, true /* signed */);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ li(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(op2, op1, *L, true /* signed */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(op2, op1, *L, true /* signed */);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_0_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(op2, op1, *L, false /* unsigned */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, op2, *L, false /* unsigned */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, op2, *L, false /* unsigned */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(op2, op1, *L, false /* unsigned */);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ blt_long(op2_reg, op1_reg, *L, false /* unsigned */);
++      break;
++    case 0x04: //above_equal
++      __ bge_long(op1_reg, op2_reg, *L, false /* unsigned */);
++      break;
++    case 0x05: //below
++      __ blt_long(op1_reg, op2_reg, *L, false /* unsigned */);
++      break;
++    case 0x06: //below_equal
++      __ bge_long(op2_reg, op1_reg, *L, false /* unsigned */);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(op2, op1, *L, false /* unsigned */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, op2, *L, false /* unsigned */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, op2, *L, false /* unsigned */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(op2, op1, *L, false /* unsigned */);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ li(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(AT, op1, *L, false /* unsigned */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, AT, *L, false /* unsigned */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, AT, *L, false /* unsigned */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(AT, op1, *L, false /* unsigned */);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ blt_long(op2, op1, *L, true /* signed */);
++        break;
++      case 0x04: //above_equal
++        __ bge_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x05: //below
++        __ blt_long(op1, op2, *L, true /* signed */);
++        break;
++      case 0x06: //below_equal
++        __ bge_long(op2, op1, *L, true /* signed */);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ blt_long(R0, op1, *L, true /* signed */);
++        break;
++      case 0x04: //greater_equal
++        __ bge_long(op1, R0, *L, true /* signed */);
++        break;
++      case 0x05: //less
++        __ blt_long(op1, R0, *L, true /* signed */);
++        break;
++      case 0x06: //less_equal
++        __ bge_long(R0, op1, *L, true /* signed */);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ li(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ blt_long(AT, op1, *L, true /* signed */);
++        break;
++      case 0x04: //greater_equal
++        __ bge_long(op1, AT, *L, true /* signed */);
++        break;
++      case 0x05: //less
++        __ blt_long(op1, AT, *L, true /* signed */);
++        break;
++      case 0x06: //less_equal
++        __ bge_long(AT, op1, *L, true /* signed */);
++        break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpUL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ li(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ li(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      case 0x04: //greater_equal
++        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x05: //less
++        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
++        break;
++
++      case 0x06: //less_equal
++        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ blt(op2, op1, L);
++        else
++          __ blt(op2, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bge(op1, op2, L);
++        else
++          __ bge(op1, op2, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++          __ blt(op1, op2, L);
++        else
++          __ blt(op1, op2, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bge(op2, op1, L);
++        else
++          __ bge(op2, op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ li(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ blt(op2, op1, L);
++        else
++          __ blt(op2, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bge(op1, op2, L);
++        else
++          __ bge(op1, op2, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++          __ blt(op1, op2, L);
++        else
++          __ blt(op1, op2, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bge(op2, op1, L);
++        else
++          __ bge(op2, op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bnez($cr$$Register, L);
++        else
++          __ bnez($cr$$Register, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beqz($cr$$Register, L);
++        else
++          __ beqz($cr$$Register, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beqz(op1, L);
++        else
++          __ beqz(op1, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bnez(op1, L);
++        else
++          __ bnez(op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_0_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beqz(op1, L);
++        else
++          __ beqz(op1, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bnez(op1, L);
++        else
++          __ bnez(op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ bltu(op2, op1, L);
++        else
++          __ bltu(op2, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bgeu(op1, op2, L);
++        else
++          __ bgeu(op1, op2, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++          __ bltu(op1, op2, L);
++        else
++          __ bltu(op1, op2, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bgeu(op2, op1, L);
++        else
++          __ bgeu(op2, op1, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beqz(op1, L);
++      else
++        __ beqz(op1, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bnez(op1, L);
++      else
++        __ bnez(op1, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      if (&L)
++        __ bltu(op2_reg, op1_reg, L);
++      else
++        __ bltu(op2_reg, op1_reg, (int)0);
++      break;
++    case 0x04: //above_equal
++      if (&L)
++        __ bgeu(op1_reg, op2_reg, L);
++      else
++        __ bgeu(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x05: //below
++      if (&L)
++        __ bltu(op1_reg, op2_reg, L);
++      else
++        __ bltu(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x06: //below_equal
++      if (&L)
++        __ bgeu(op2_reg, op1_reg, L);
++      else
++        __ bgeu(op2_reg, op1_reg, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ bltu(op2, op1, L);
++        else
++          __ bltu(op2, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bgeu(op1, op2, L);
++        else
++          __ bgeu(op1, op2, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++           __ bltu(op1, op2, L);
++        else
++           __ bltu(op1, op2, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bgeu(op2, op1, L);
++        else
++          __ bgeu(op2, op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ li(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ bltu(AT, op1, L);
++        else
++          __ bltu(AT, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bgeu(op1, AT, L);
++        else
++          __ bgeu(op1, AT, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++           __ bltu(op1, AT, L);
++        else
++           __ bltu(op1, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bgeu(AT, op1, L);
++        else
++          __ bgeu(AT, op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ blt(op2, op1, L);
++        else
++          __ blt(op2, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ bge(op1, op2, L);
++        else
++          __ bge(op1, op2, (int)0);
++        break;
++      case 0x05: //below
++        if (&L)
++          __ blt(op1, op2, L);
++        else
++          __ blt(op1, op2, (int)0);
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ bge(op2, op1, L);
++        else
++          __ bge(op2, op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beqz(op1, L);
++        else
++          __ beqz(op1, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bnez(op1, L);
++        else
++          __ bnez(op1, (int)0);
++        break;
++      case 0x03: //greater
++        if (&L)
++          __ blt(R0, op1, L);
++        else
++          __ blt(R0, op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if (&L)
++          __ bge(op1, R0, L);
++        else
++          __ bge(op1, R0, (int)0);
++        break;
++      case 0x05: //less
++        if (&L)
++          __ blt(op1, R0, L);
++        else
++          __ blt(op1, R0, (int)0);
++        break;
++      case 0x06: //less_equal
++        if (&L)
++          __ bge(R0, op1, L);
++        else
++          __ bge(R0, op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ li(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        if (&L)
++          __ blt(AT, op1, L);
++        else
++          __ blt(AT, op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if (&L)
++          __ bge(op1, AT, L);
++        else
++          __ bge(op1, AT, (int)0);
++        break;
++      case 0x05: //less
++        if (&L)
++          __ blt(op1, AT, L);
++        else
++          __ blt(op1, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        if (&L)
++          __ bge(AT, op1, L);
++        else
++          __ bge(AT, op1, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beqz(op1, L);
++        else
++          __ beqz(op1, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bnez(op1, L);
++        else
++          __ bnez(op1, (int)0);
++        break;
++      case 0x03: //above
++        if (&L)
++          __ bnez(op1, L);
++        else
++          __ bnez(op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if (&L)
++          __ b(L);
++        else
++          __ b((int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if (&L)
++          __ beqz(op1, L);
++        else
++          __ beqz(op1, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x03: //greater
++        if (&target)
++          __ blt(opr2_reg, opr1_reg, target);
++        else
++          __ blt(opr2_reg, opr1_reg, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if (&target)
++          __ bge(opr1_reg, opr2_reg, target);
++        else
++          __ bge(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x05: //less
++        if (&target)
++          __ blt(opr1_reg, opr2_reg, target);
++        else
++          __ blt(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x06: //less_equal
++        if (&target)
++          __ bge(opr2_reg, opr1_reg, target);
++        else
++          __ bge(opr2_reg, opr1_reg, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x03: //greater
++        if (&target)
++          __ bltu(opr2_reg, opr1_reg, target);
++        else
++          __ bltu(opr2_reg, opr1_reg, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if (&target)
++          __ bgeu(opr1_reg, opr2_reg, target);
++        else
++          __ bgeu(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x05: //less
++        if (&target)
++          __ bltu(opr1_reg, opr2_reg, target);
++        else
++          __ bltu(opr1_reg, opr2_reg, (int)0);
++        break;
++      case 0x06: //less_equal
++        if (&target)
++          __ bgeu(opr2_reg, opr1_reg, target);
++        else
++          __ bgeu(opr2_reg, opr1_reg, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beqz(opr1_reg, target);
++        else
++           __ beqz(opr1_reg, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if (&target)
++           __ bnez(opr1_reg, target);
++        else
++           __ bnez(opr1_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        if (&target)
++           __ blt(R0, opr1_reg, target);
++        else
++           __ blt(R0, opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if (&target)
++           __ bge(opr1_reg, R0, target);
++        else
++           __ bge(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        if (&target)
++           __ blt(opr1_reg, R0, target);
++        else
++           __ blt(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ bge(R0, opr1_reg, target);
++        else
++           __ bge(R0, opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpUL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beqz(opr1_reg, target);
++        else
++           __ beqz(opr1_reg, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if (&target)
++           __ bnez(opr1_reg, target);
++        else
++           __ bnez(opr1_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        if (&target)
++           __ bltu(R0, opr1_reg, target);
++        else
++           __ bltu(R0, opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if (&target)
++           __ bgeu(opr1_reg, R0, target);
++        else
++           __ bgeu(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        if (&target)
++           __ bltu(opr1_reg, R0, target);
++        else
++           __ bltu(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ bgeu(R0, opr1_reg, target);
++        else
++           __ bgeu(R0, opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ li(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        if (&target)
++          __ blt(opr2_reg, opr1_reg, target);
++        else
++          __ blt(opr2_reg, opr1_reg, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        if (&target)
++          __ bge(opr1_reg, opr2_reg, target);
++        else
++          __ bge(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        if (&target)
++          __ blt(opr1_reg, opr2_reg, target);
++        else
++          __ blt(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++          __ bge(opr2_reg, opr1_reg, target);
++        else
++          __ bge(opr2_reg, opr1_reg, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ li(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        if (&target)
++          __ bltu(opr2_reg, opr1_reg, target);
++        else
++          __ bltu(opr2_reg, opr1_reg, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        if (&target)
++          __ bgeu(opr1_reg, opr2_reg, target);
++        else
++          __ bgeu(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        if (&target)
++          __ bltu(opr1_reg, opr2_reg, target);
++        else
++          __ bltu(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++          __ bgeu(opr2_reg, opr1_reg, target);
++        else
++          __ bgeu(opr2_reg, opr1_reg, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(4);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ membar(__ StoreLoad);
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_s(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_w(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ movgr2fr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    Label L;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    // Use signed comparison here, because the most significant bit of the
++    // user-space virtual address must be 0.
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_s(FCC0, src2, src1);
++    __ fcmp_cult_s(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_d(FCC0, src2, src1);
++    __ fcmp_cult_d(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ add_d(AT, base, R0);
++    __ beq(num, R0, done);
++
++    __ move(T4, num);  /* T4 = words */
++
++    __ bind(Loop);
++    __ st_d(R0, AT, 0);
++    __ addi_d(T4, T4, -1);
++    __ addi_d(AT, AT, wordSize);
++    __ bne(T4, R0, Loop);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %}
++  ins_encode %{
++    // Get the first character position in both strings
++    //         [8] char array, [12] offset, [16] count
++    Register str1   = $str1$$Register;
++    Register str2   = $str2$$Register;
++    Register cnt1   = $cnt1$$Register;
++    Register cnt2   = $cnt2$$Register;
++    Register result = $result$$Register;
++
++    Label L, Loop, haveResult, done;
++
++   // compute the and difference of lengths (in result)
++   __ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
++
++   // compute the shorter length (in cnt1)
++   __ bge(cnt2, cnt1, Loop);
++   __ move(cnt1, cnt2);
++
++   // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++   __ bind(Loop);                        // Loop begin
++   __ ld_hu(AT, str1, 0);
++   __ beq(cnt1, R0, done);
++
++   // compare current character
++   __ ld_hu(cnt2, str2, 0);
++   __ addi_d(str1, str1, 2);
++   __ bne(AT, cnt2, haveResult);
++   __ addi_d(str2, str2, 2);
++   __ addi_d(cnt1, cnt1, -1);  // Loop end
++   __ b(Loop);
++
++   __ bind(haveResult);
++   __ sub_d(result, AT, cnt2);
++
++   __ bind(done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    // Get the first character position in both strings
++    //         [8] char array, [12] offset, [16] count
++    Register str1   = $str1$$Register;
++    Register str2   = $str2$$Register;
++    Register cnt    = $cnt$$Register;
++    Register tmp    = $temp$$Register;
++    Register result = $result$$Register;
++
++    Label Loop, True, False;
++
++    __ addi_d(result, R0, 1);
++    __ beq(str1, str2, True);  // same char[] ?
++
++    __ beq(cnt, R0, True);
++
++    __ bind(Loop);
++
++    // compare current character
++    __ ld_hu(AT, str1, 0);
++    __ ld_hu(tmp, str2, 0);
++    __ addi_d(str1, str1, 2);
++    __ bne(AT, tmp, False);
++    __ addi_d(cnt, cnt, -1);
++    __ addi_d(str2, str2, 2);
++    __ bne(cnt, R0, Loop);
++
++    __ b(True);
++
++    __ bind(False);
++    __ addi_d(result, R0, 0);
++
++    __ bind(True);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI12 src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm12" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    __ addi_w(dst, src1, imm);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddI src1 (LShiftI src2 shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_w(dst, src2, src1, sh - 1);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{
++  match(Set dst (AddP src1 (AndL src2 M8)));
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg_M8" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bstrins_d(src2, R0, 2, 0);
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
++  ins_encode %{
++    Register src1 = $src1$$Register;
++    long     src2 = $src2$$constant;
++    Register  dst = $dst$$Register;
++
++    __ addi_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_d(dst, src2, src1, sh - 1);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ add_d(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ addi_d(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ sub_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegI src1,  immI_M2047_2048 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_w(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_w(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegLorI2L src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_d(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1,  immL_M2047_2048 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_d(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ mod_w(dst, src1, src2);
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mod_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    __ div_w(dst, src1, src2);
++
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mul_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulHiL src1 src2));
++  format %{ "mulHiL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mulh_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ div_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  match(Set dst (AddF (MulF src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddF  $dst, $src1, $src2, $src3 @maddF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmadd_s(dst, src1, src2, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  match(Set dst (AddD (MulD src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddD  $dst, $src1, $src2, $src3 @maddD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmadd_d(dst, src1, src2, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ bstrpick_d(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ orn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ orn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 6, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 5-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immIU5 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ slli_w(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_h(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_b(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sll_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long 6-bit immI
++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ slli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sll_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long 6-bit
++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sra_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ srl_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "bstrpick_d    $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotri_w     $dst, $src, 1 ...\n\t"
++            "srli_w      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotri_w(dst, src, 1);
++    if (rshift - 1) {
++      __ srli_w(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 5-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRLI_W    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srli_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "bstrpick_w    $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, pos+size-1, pos);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL_W    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srl_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRAI_W    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ srai_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA_W    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ sra_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLLI_W    $dst, $src @ convI2L_reg\t"  %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ slli_w(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ slli_w(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++// Convert double to int.
++// If the double is NaN, stuff a zero in instead.
++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2i    $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2l    $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Convert float to int.
++// If the float is NaN, stuff a zero in instead.
++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2i    $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2l    $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, src);
++    __ ffint_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "srai_w    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ srai_w(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ sub_d(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ movgr2fr_w(dst ,src);
++    __ ffint_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_loongarch.cpp] generate_forward_exception()
++    //     [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Prefetch instructions.
++
++instruct prefetchr( memory mem ) %{
++  match(PrefetchRead mem);
++  ins_cost(125);
++
++  format %{ "pref $mem\t# Prefetch into temporal cache for read @ prefetchr" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++    } else {
++      __ move(AT, as_Register(base));
++    }
++    if( Assembler::is_simm(disp, 12) ) {
++      __ addi_d(AT, AT, disp);
++    } else {
++      __ li(T4, disp);
++      __ add_d(AT, AT, T4);
++    }
++    __ preld(0, AT, 0); //hint: 0:load
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct prefetchw( memory mem ) %{
++  match(PrefetchWrite mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch to temporal cache for write @ prefetchw" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++    } else {
++      __ move(AT, as_Register(base));
++    }
++    if( Assembler::is_simm(disp, 12) ) {
++      __ addi_d(AT, AT, disp);
++    } else {
++      __ li(T4, disp);
++      __ add_d(AT, AT, T4);
++    }
++     __ preld(8, AT, 0); //hint: 8:store
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAlloc(memory mem) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if (index != 0) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, AT, disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, AT, T4);
++        __ preld(8, AT, 0);
++      }
++    } else {
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, as_Register(base), disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, as_Register(base), T4);
++        __ preld(8, AT, 0);
++      }
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_s  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_s($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_d  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_d($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !");
++    __ ld_w($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_w    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !");
++    __ st_w($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_d    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
++    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
++    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
++    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !");
++    __ fst_d($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM_order(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++  predicate(UseConcMarkSweepGC && !UseCondCardMark);
++  ins_cost(100);
++  format %{ "StoreCM MEMBAR storestore\n\t"
++            "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap!
++    __ brk(18);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ addi_d(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale)
++%{
++  match(Set dst (AddP reg (LShiftL lreg scale)));
++
++  ins_cost(110);
++  format %{ "leaq    $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = $reg$$Register;
++    Register  index = $lreg$$Register;
++    int       scale = $scale$$constant;
++
++    if (scale == 0) {
++       __ add_d($dst$$Register, $reg$$Register, index);
++    } else {
++       __ alsl_d(dst, index, base, scale - 1);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ sc_d(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "ll_d    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++//----------Max and Min--------------------------------------------------------
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ masknez(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ bstrins_d(dst, src2, 63, 32);
++    } else if (src2 == dst) {
++       __ slli_d(dst, dst, 32);
++       __ bstrins_d(dst, src1, 31, 0);
++    } else {
++       __ bstrpick_d(dst, src1, 31, 0);
++       __ bstrins_d(dst, src2, 63, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++instruct safePoint_poll_reg(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(false);
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %}
++
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    __ ld_w(AT, poll_reg, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll() %{
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ li(T4, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ ld_w(AT, T4, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz_w  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "clz_d  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ clz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz_w    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    __ ctz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "ctz_d    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ ctz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// --------------------------------- Load -------------------------------------
++
++instruct loadV16(vecX dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 16);
++  match(Set dst (LoadVector mem));
++  format %{ "vload    $dst, $mem\t# @loadV16" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadV32(vecY dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 32);
++  match(Set dst (LoadVector mem));
++  format %{ "xvload    $dst, $mem\t# @loadV32" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- Store ------------------------------------
++
++instruct storeV16(memory mem, vecX src) %{
++  predicate(n->as_StoreVector()->memory_size() == 16);
++  match(Set mem (StoreVector mem src));
++  format %{ "vstore    $src, $mem\t# @storeV16" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storeV32(memory mem, vecY src) %{
++  predicate(n->as_StoreVector()->memory_size() == 32);
++  match(Set mem (StoreVector mem src));
++  format %{ "xvstore    $src, $mem\t# @storeV32" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------- Replicate ----------------------------------
++
++instruct repl16B(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB src));
++  format %{ "vreplgr2vr.b    $dst, $src\t# @repl16B" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB imm));
++  format %{ "vldi    $dst, $imm\t# @repl16B_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS src));
++  format %{ "vreplgr2vr.h    $dst, $src\t# @repl8S" %}
++  ins_encode %{
++    __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS imm));
++  format %{ "vldi    $dst, $imm\t# @repl8S_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI src));
++  format %{ "vreplgr2vr.w    $dst, $src\t# @repl4I" %}
++  ins_encode %{
++    __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI imm));
++  format %{ "vldi    $dst, $imm\t# @repl4I_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L(vecX dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL src));
++  format %{ "vreplgr2vr.d    $dst, $src\t# @repl2L" %}
++  ins_encode %{
++    __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L_imm(vecX dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL imm));
++  format %{ "vldi    $dst, $imm\t# @repl2L_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4F(vecX dst, regF src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateF src));
++  format %{ "vreplvei.w    $dst, $src, 0\t# @repl4F" %}
++  ins_encode %{
++    __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2D(vecX dst, regD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateD src));
++  format %{ "vreplvei.d    $dst, $src, 0\t# @repl2D" %}
++  ins_encode %{
++    __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB src));
++  format %{ "xvreplgr2vr.b    $dst, $src\t# @repl32B" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB imm));
++  format %{ "xvldi    $dst, $imm\t# @repl32B_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS src));
++  format %{ "xvreplgr2vr.h    $dst, $src\t# @repl16S" %}
++  ins_encode %{
++    __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS imm));
++  format %{ "xvldi    $dst, $imm\t# @repl16S_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI src));
++  format %{ "xvreplgr2vr.w    $dst, $src\t# @repl8I" %}
++  ins_encode %{
++    __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI imm));
++  format %{ "xvldi    $dst, $imm\t# @repl8I_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L(vecY dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL src));
++  format %{ "xvreplgr2vr.d    $dst, $src\t# @repl4L" %}
++  ins_encode %{
++    __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L_imm(vecY dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL imm));
++  format %{ "xvldi    $dst, $imm\t# @repl4L_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8F(vecY dst, regF src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateF src));
++  format %{ "xvreplve0.w    $dst, $src\t# @repl8F" %}
++  ins_encode %{
++    __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4D(vecY dst, regD src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateD src));
++  format %{ "xvreplve0.d    $dst, $src\t# @repl4D" %}
++  ins_encode %{
++    __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ADD --------------------------------------
++
++instruct add16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src1 src2));
++  format %{ "vadd.b    $dst, $src1, $src2\t# @add16B" %}
++  ins_encode %{
++    __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "vaddi.bu    $dst, $src, $imm\t# @add16B_imm" %}
++  ins_encode %{
++    __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src1 src2));
++  format %{ "vadd.h    $dst, $src1, $src2\t# @add8S" %}
++  ins_encode %{
++    __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "vaddi.hu    $dst, $src, $imm\t# @add8S_imm" %}
++  ins_encode %{
++    __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src1 src2));
++  format %{ "vadd.w    $dst, $src1, src2\t# @add4I" %}
++  ins_encode %{
++    __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "vaddi.wu    $dst, $src, $imm\t# @add4I_imm" %}
++  ins_encode %{
++    __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src1 src2));
++  format %{ "vadd.d    $dst, $src1, $src2\t# @add2L" %}
++  ins_encode %{
++    __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "vaddi.du    $dst, $src, $imm\t# @add2L_imm" %}
++  ins_encode %{
++    __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVF src1 src2));
++  format %{ "vfadd.s    $dst, $src1, $src2\t# @add4F" %}
++  ins_encode %{
++    __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVD src1 src2));
++  format %{ "vfadd.d    $dst, $src1, $src2\t# @add2D" %}
++  ins_encode %{
++    __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src1 src2));
++  format %{ "xvadd.b    $dst, $src1, $src2\t# @add32B" %}
++  ins_encode %{
++    __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "xvaddi.bu    $dst, $src, $imm\t# @add32B_imm" %}
++  ins_encode %{
++    __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src1 src2));
++  format %{ "xvadd.h    $dst, $src1, $src2\t# @add16S" %}
++  ins_encode %{
++    __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "xvaddi.hu    $dst, $src, $imm\t# @add16S_imm" %}
++  ins_encode %{
++    __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src1 src2));
++  format %{ "xvadd.wu    $dst, $src1, $src2\t# @add8I" %}
++  ins_encode %{
++    __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "xvaddi.wu    $dst, $src, $imm\t# @add8I_imm" %}
++  ins_encode %{
++    __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src1 src2));
++  format %{ "xvadd.d    $dst, $src1, $src2\t# @add4L" %}
++  ins_encode %{
++    __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "xvaddi.du    $dst, $src, $imm\t# @add4L_imm" %}
++  ins_encode %{
++    __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVF src1 src2));
++  format %{ "xvfadd.s    $dst, $src1, $src2\t# @add8F" %}
++  ins_encode %{
++    __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVD src1 src2));
++  format %{ "xvfadd.d    $dst, $src1, $src2\t# @add4D" %}
++  ins_encode %{
++    __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++instruct sub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src1 src2));
++  format %{ "vsub.b    $dst, $src1, $src2\t# @sub16B" %}
++  ins_encode %{
++    __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "vsubi.bu    $dst, $src, $imm\t# @sub16B_imm" %}
++  ins_encode %{
++    __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src1 src2));
++  format %{ "vsub.h    $dst, $src1, $src2\t# @sub8S" %}
++  ins_encode %{
++    __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "vsubi.hu    $dst, $src, $imm\t# @sub8S_imm" %}
++  ins_encode %{
++    __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src1 src2));
++  format %{ "vsub.w    $dst, $src1, src2\t# @sub4I" %}
++  ins_encode %{
++    __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "vsubi.wu    $dst, $src, $imm\t# @sub4I_imm" %}
++  ins_encode %{
++    __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src1 src2));
++  format %{ "vsub.d    $dst, $src1, $src2\t# @sub2L" %}
++  ins_encode %{
++    __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "vsubi.du    $dst, $src, $imm\t# @sub2L_imm" %}
++  ins_encode %{
++    __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVF src1 src2));
++  format %{ "vfsub.s    $dst, $src1, $src2\t# @sub4F" %}
++  ins_encode %{
++    __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVD src1 src2));
++  format %{ "vfsub.d    $dst, $src1, $src2\t# @sub2D" %}
++  ins_encode %{
++    __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src1 src2));
++  format %{ "xvsub.b    $dst, $src1, $src2\t# @sub32B" %}
++  ins_encode %{
++    __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "xvsubi.bu    $dst, $src, $imm\t# @sub32B_imm" %}
++  ins_encode %{
++    __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src1 src2));
++  format %{ "xvsub.h    $dst, $src1, $src2\t# @sub16S" %}
++  ins_encode %{
++    __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "xvsubi.hu    $dst, $src, $imm\t# @sub16S_imm" %}
++  ins_encode %{
++    __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src1 src2));
++  format %{ "xvsub.w    $dst, $src1, $src2\t# @sub8I" %}
++  ins_encode %{
++    __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "xvsubi.wu    $dst, $src, $imm\t# @sub8I_imm" %}
++  ins_encode %{
++    __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src1 src2));
++  format %{ "xvsub.d    $dst, $src1, $src2\t# @sub4L" %}
++  ins_encode %{
++    __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "xvsubi.du    $dst, $src, $imm\t# @sub4L_imm" %}
++  ins_encode %{
++    __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVF src1 src2));
++  format %{ "xvfsub.s    $dst, $src1, $src2\t# @sub8F" %}
++  ins_encode %{
++    __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVD src1 src2));
++  format %{ "xvfsub.d    $dst,$src1,$src2\t# @sub4D" %}
++  ins_encode %{
++    __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MUL --------------------------------------
++instruct mul8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVS src1 src2));
++  format %{ "vmul.h    $dst, $src1, $src2\t# @mul8S" %}
++  ins_encode %{
++    __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVI src1 src2));
++  format %{ "vmul.w    $dst, $src1, $src2\t# @mul4I" %}
++  ins_encode %{
++    __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVF src1 src2));
++  format %{ "vfmul.s    $dst, $src1, $src2\t# @mul4F" %}
++  ins_encode %{
++    __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVD src1 src2));
++  format %{ "vfmul.d    $dst, $src1, $src2\t# @mul2D" %}
++  ins_encode %{
++    __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (MulVS src1 src2));
++  format %{ "xvmul.h    $dst, $src1, $src2\t# @mul16S" %}
++  ins_encode %{
++    __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVI src1 src2));
++  format %{ "xvmul.w    $dst, $src1, $src2\t# @mul8I" %}
++  ins_encode %{
++    __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVF src1 src2));
++  format %{ "xvfmul.s    $dst, $src1, $src2\t# @mul8F" %}
++  ins_encode %{
++    __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVD src1 src2));
++  format %{ "xvfmul.d    $dst, $src1, $src2\t# @mul4D" %}
++  ins_encode %{
++    __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- DIV --------------------------------------
++instruct div4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVF src1 src2));
++  format %{ "vfdiv.s    $dst, $src1, $src2\t# @div4F" %}
++  ins_encode %{
++    __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (DivVD src1 src2));
++  format %{ "vfdiv.d    $dst, $src1, $src2\t# @div2D" %}
++  ins_encode %{
++    __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (DivVF src1 src2));
++  format %{ "xvfdiv.s    $dst, $src1, $src2\t# @div8F" %}
++  ins_encode %{
++    __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVD src1 src2));
++  format %{ "xvfdiv.d    $dst, $src1, $src2\t# @div4D" %}
++  ins_encode %{
++    __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ Shift ---------------------------------------
++
++instruct shiftcntX(vecX dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vreplgr2vr.b    $dst, $cnt\t# @shiftcntX" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct shiftcntY(vecY dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "xvreplgr2vr.b    $dst, $cnt\t# @shiftcntY" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ LeftShift -----------------------------------
++
++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll16B" %}
++  ins_encode %{
++    __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  format %{ "vslli.b    $dst, $src, $shift\t# @sll16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll8S" %}
++  ins_encode %{
++    __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  format %{ "vslli.h    $dst, $src, $shift\t# @sll8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vsll.w    $dst, $src, $shift\t# @sll4I" %}
++  ins_encode %{
++    __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vslli.w    $dst, $src, $shift\t# @sll4I_imm" %}
++  ins_encode %{
++    __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vsll.d    $dst, $src, $shift\t# @sll2L" %}
++  ins_encode %{
++    __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vslli.d    $dst, $src, $shift\t# @sll2L_imm" %}
++  ins_encode %{
++    __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll32B" %}
++  ins_encode %{
++    __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  format %{ "xvslli.b    $dst, $src, $shift\t# @sll32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll16S" %}
++  ins_encode %{
++    __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  format %{ "xvslli.h    $dst, $src, $shift\t# @sll16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvsll.w    $dst, $src, $shift\t# @sll8I" %}
++  ins_encode %{
++    __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvslli.w    $dst, $src, $shift\t# @sll8I_imm" %}
++  ins_encode %{
++    __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvsll.d    $dst, $src, $shift\t# @sll4L" %}
++  ins_encode %{
++    __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvslli.d    $dst, $src, $shift\t# @sll4L_imm" %}
++  ins_encode %{
++    __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------- LogicalRightShift ----------------------------------
++
++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16B" %}
++  ins_encode %{
++    __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  format %{ "vsrli.b    $dst, $src, $shift\t# @srl16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl8S" %}
++  ins_encode %{
++    __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  format %{ "vsrli.h    $dst, $src, $shift\t# @srl8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrl.w    $dst, $src, $shift\t# @srl4I" %}
++  ins_encode %{
++    __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrli.w    $dst, $src, $shift\t# @srl4I_imm" %}
++  ins_encode %{
++    __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrl.d    $dst, $src, $shift\t# @srl2L" %}
++  ins_encode %{
++    __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrli.d    $dst, $src, $shift\t# @srl2L_imm" %}
++  ins_encode %{
++    __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl32B" %}
++  ins_encode %{
++    __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  format %{ "xvsrli.b    $dst, $src, $shift\t# @srl32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16S" %}
++  ins_encode %{
++    __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  format %{ "xvsrli.h    $dst, $src, $shift\t# @srl16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrl.w    $dst, $src, $shift\t# @srl8I" %}
++  ins_encode %{
++    __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrli.w    $dst, $src, $shift\t# @srl8I_imm" %}
++  ins_encode %{
++    __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrl.d    $dst, $src, $shift\t# @srl4L" %}
++  ins_encode %{
++    __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrli.d    $dst, $src, $shift\t# @srl4L_imm" %}
++  ins_encode %{
++    __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------- ArithmeticRightShift -----------------------------
++
++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra16B" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  format %{ "vsrai.b    $dst, $src, $shift\t# @sra16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra8S" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  format %{ "vsrai.h    $dst, $src, $shift\t# @sra8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsra.w    $dst, $src, $shift\t# @sra4I" %}
++  ins_encode %{
++    __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsrai.w    $dst, $src, $shift\t# @sra4I_imm" %}
++  ins_encode %{
++    __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsra.d    $dst, $src, $shift\t# @sra2L" %}
++  ins_encode %{
++    __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsrai.d    $dst, $src, $shift\t# @sra2L_imm" %}
++  ins_encode %{
++    __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra32B" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  format %{ "xvsrai.b    $dst, $src, $shift\t# @sra32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra16S" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  format %{ "xvsrai.h    $dst, $src, $shift\t# @sra16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsra.w    $dst, $src, $shift\t# @sra8I" %}
++  ins_encode %{
++    __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsrai.w    $dst, $src, $shift\t# @sra8I_imm" %}
++  ins_encode %{
++    __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsra.d    $dst, $src, $shift\t# @sra4L" %}
++  ins_encode %{
++    __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsrai.d    $dst, $src, $shift\t# @sra4L_imm" %}
++  ins_encode %{
++    __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- AND --------------------------------------
++
++instruct andV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src1 src2));
++  format %{ "vand.v    $dst, $src1, $src2\t# @andV16" %}
++  ins_encode %{
++    __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "vandi.b    $dst, $src, $imm\t# @and16B_imm" %}
++  ins_encode %{
++    __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src1 src2));
++  format %{ "xvand.v    $dst, $src1, $src2\t# @andV32" %}
++  ins_encode %{
++    __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "xvandi.b    $dst, $src, $imm\t# @and32B_imm" %}
++  ins_encode %{
++    __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- OR ---------------------------------------
++
++instruct orV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 src2));
++  format %{ "vor.v    $dst, $src1, $src2\t# @orV16" %}
++  ins_encode %{
++    __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "vori.b    $dst, $src, $imm\t# @or16B_imm" %}
++  ins_encode %{
++    __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct orV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 src2));
++  format %{ "xvor.v    $dst, $src1, $src2\t# @orV32" %}
++  ins_encode %{
++    __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "xvori.b    $dst, $src, $imm\t# @or32B_imm" %}
++  ins_encode %{
++    __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- XOR --------------------------------------
++
++instruct xorV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV src1 src2));
++  format %{ "vxor.v    $dst, $src1, $src2\t# @xorV16" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "vxori.b    $dst, $src, $imm\t# @xor16B_imm" %}
++  ins_encode %{
++    __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xorV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV src1 src2));
++  format %{ "xvxor.v    $dst, $src1, $src2\t# @xorV32" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "xvxori.b    $dst, $src, $imm\t# @xor32B_imm" %}
++  ins_encode %{
++    __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NOR --------------------------------------
++
++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "vnor.v    $dst, $src1, $src2\t# @norV16" %}
++  ins_encode %{
++    __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "vnori.b    $dst, $src, $imm\t# @nor16B_imm" %}
++  ins_encode %{
++    __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "xvnor.v    $dst, $src1, $src2\t# @norV32" %}
++  ins_encode %{
++    __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "xvnori.b    $dst, $src, $imm\t# @nor32B_imm" %}
++  ins_encode %{
++    __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ANDN -------------------------------------
++
++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "vandn.v    $dst, $src1, $src2\t# @andnV16" %}
++  ins_encode %{
++    __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "xvandn.v    $dst, $src1, $src2\t# @andnV32" %}
++  ins_encode %{
++    __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ORN --------------------------------------
++
++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "vorn.v    $dst, $src1, $src2\t# @ornV16" %}
++  ins_encode %{
++    __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "xvorn.v    $dst, $src1, $src2\t# @ornV32" %}
++  ins_encode %{
++    __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp
+new file mode 100644
+index 0000000000..89295343ce
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp
+@@ -0,0 +1,3895 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++#endif
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ st_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fst_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ ld_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fld_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*)branch;
++  jint* pc = (jint*)branch;
++
++  if (high(stub_inst, 7) == pcaddu18i_op) {
++    // far:
++    //   pcaddu18i reg, si20
++    //   jirl  r0, reg, si18
++
++    assert(high(pc[1], 6) == jirl_op, "Not a branch label patch");
++    jlong offs = target - branch;
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    if (reachable_from_branch_short(offs)) {
++      // convert far to short
++#define __ masm.
++      __ b(target);
++      __ nop();
++#undef __
++    } else {
++      masm.patchable_jump_far(R0, offs);
++    }
++    return;
++  } else if (high(stub_inst, 7) == pcaddi_op) {
++    // see MacroAssembler::set_last_Java_frame:
++    //   pcaddi reg, si20
++
++    jint offs = (target - branch) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    CodeBuffer cb(branch, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
++    return;
++  }
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++bool MacroAssembler::reachable_from_branch_short(jlong offs) {
++  if (ForceUnreachable) {
++    return false;
++  }
++  return is_simm(offs >> 2, 26);
++}
++
++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) {
++  jint si18, si20;
++  guarantee(is_simm(offs, 38), "Not signed 38-bit offset");
++  split_simm38(offs, si18, si20);
++  pcaddu18i(T4, si20);
++  jirl(ra, T4, si18);
++}
++
++void MacroAssembler::patchable_jump(address target, bool force_patchable) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(target) != NULL,
++         "destination of jump not found in code cache");
++  if (force_patchable || patchable_branches()) {
++    jlong offs = target - pc();
++    if (reachable_from_branch_short(offs)) { // Short jump
++      b(offset26(target));
++      nop();
++    } else {                                 // Far jump
++      patchable_jump_far(R0, offs);
++    }
++  } else {                                   // Real short jump
++    b(offset26(target));
++  }
++}
++
++void MacroAssembler::patchable_call(address target, address call_site) {
++  jlong offs = target - (call_site ? call_site : pc());
++  if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call
++    nop();
++    bl((offs - BytesPerInstWord) >> 2);
++  } else {                                                    // Far call
++    patchable_jump_far(RA, offs);
++  }
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        return NULL; // CodeCache is full
++      }
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    bl(entry.target());
++  } else {
++    bl(pc());
++  }
++  // just need to return a non-null address
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Start the stub
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  const int stub_start_offset = offset();
++
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  pcaddi(T4, 0);
++  ld_d(T4, T4, 16);
++  jr(T4);
++  nop();  //align
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  emit_int64((int64_t)dest);
++
++  const address stub_start_addr = addr_at(stub_start_offset);
++
++  NativeInstruction* ni = nativeInstruction_at(stub_start_addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline");
++
++  end_a_stub();
++  return stub_start_addr;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    beq(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    bne(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      blt(rs, rt, offset16(entry));
++    } else {
++      bltu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    blt_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      bge(rs, rt, offset16(entry));
++    } else {
++      bgeu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    bge_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  bne(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  beq(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    bge(rs, rt, not_taken);
++  } else {
++    bgeu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    blt(rs, rt, not_taken);
++  } else {
++    bltu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++  bceqz(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++  bcnez(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    L.add_patch_at(code(), locator());
++    if (ForceUnreachable) {
++      patchable_jump_far(R0, 0);
++    } else {
++      b(0);
++    }
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  ldx_d(rt, base, offset);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  stx_d(rt, base, offset);
++}
++
++void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++#if 0
++  add_d(AT, base, offset);
++  ld_long(rt, 0, AT);
++#endif
++}
++
++void MacroAssembler::st_long(Register rt, Register offset, Register base) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++#if 0
++  add_d(AT, base, offset);
++  st_long(rt, 0, AT);
++#endif
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  li(tmp_reg1, inc);
++  li(tmp_reg2, counter_addr);
++  amadd_w(R0, tmp_reg1, tmp_reg2);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T4;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++  sub_d(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on LA we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++
++  li(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  li(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  li(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++  b(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++
++  b(done);
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  addi_d(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  addi_d(SP, SP, 8);
++  b(E);
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  bind(E);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_jump(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    assert(target(L) != NULL, "jmp most probably wrong");
++    patchable_jump(target(L), true /* force patchable */);
++  } else {
++    L.add_patch_at(code(), locator());
++    patchable_jump_far(R0, 0);
++  }
++}
++
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(AT, (long)obj);
++  st_d(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short call (pc-rel)
++    bl(offset26(entry));
++  } else if (is_simm(offs, 38)) {          // Far call (pc-rel)
++    patchable_jump_far(RA, offs);
++  } else {                                 // Long call (absolute)
++    call_long(entry);
++  }
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh) {
++  switch (rh.type()) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rh);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call_long(address entry) {
++  jlong value = (jlong)entry;
++  lu12i_w(T4, split_low20(value >> 12));
++  lu32i_d(T4, split_low20(value >> 32));
++  jirl(RA, T4, split_low12(value));
++}
++
++address MacroAssembler::ic_call(address entry) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc());
++  patchable_li52(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  return trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  li(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm(imm, 12)) {
++    addi_d(reg, reg, imm);
++  } else {
++    li(AT, imm);
++    add_d(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++void MacroAssembler::increment(Address addr, int imm) {
++  if (!imm) return;
++  assert(is_simm(imm, 12), "must be");
++  ld_ptr(AT, addr);
++  addi_d(AT, AT, imm);
++  st_ptr(AT, addr);
++}
++
++void MacroAssembler::decrement(Address addr, int imm) {
++  increment(addr, -imm);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  Label before_call;
++  bind(before_call);
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    li(AT, target(before_call));
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  li(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    ld_w(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
++  if (Assembler::is_simm(-framesize, 12)) {
++    addi_d(SP, SP, -framesize);
++    st_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    st_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    if (PreserveFramePointer)
++      addi_d(FP, SP, framesize - 2 * wordSize);
++  } else {
++    addi_d(SP, SP, -2 * wordSize);
++    st_ptr(FP, Address(SP, 0 * wordSize));
++    st_ptr(RA, Address(SP, 1 * wordSize));
++    if (PreserveFramePointer)
++      move(FP, SP);
++    li(SCR1, framesize - 2 * wordSize);
++    sub_d(SP, SP, SCR1);
++  }
++}
++
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  if (Assembler::is_simm(framesize, 12)) {
++    ld_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    ld_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    addi_d(SP, SP, framesize);
++  } else {
++    li(SCR1, framesize - 2 * wordSize);
++    add_d(SP, SP, SCR1);
++    ld_ptr(FP, Address(SP, 0 * wordSize));
++    ld_ptr(RA, Address(SP, 1 * wordSize));
++    addi_d(SP, SP, 2 * wordSize);
++  }
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  assert_different_registers(AT, tmp);
++  juint sps = os::get_serialize_page_shift_count();
++  juint lsb = sps + 2;
++  juint msb = sps + log2_uint(os::vm_page_size()) - 1;
++  bstrpick_w(AT, thread, msb, lsb);
++  li(tmp, os::get_memory_serialize_page());
++  alsl_d(tmp, AT, tmp, Address::times_2 - 1);
++  st_w(R0, tmp, 0);
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc
++  lipc(AT, last_java_pc);
++  st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
++                                   JavaFrameAnchor::last_Java_pc_offset()));
++
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc);
++}
++
++//////////////////////////////////////////////////////////////////////////////////
++#if INCLUDE_ALL_GCS
++
++void MacroAssembler::g1_write_barrier_pre(Register obj,
++                                          Register pre_val,
++                                          Register thread,
++                                          Register tmp,
++                                          bool tosca_live,
++                                          bool expand_call) {
++
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_active()));
++  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_index()));
++  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_buf()));
++
++  // Is marking active?
++  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
++    ld_w(AT, in_progress);
++  } else {
++    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
++    ld_b(AT, in_progress);
++  }
++  beqz(AT, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  ld_d(tmp, index);
++  beqz(tmp, runtime);
++
++  addi_d(tmp, tmp, -1 * wordSize);
++  st_d(tmp, index);
++  ld_d(AT, buffer);
++
++  // Record the previous value
++  stx_d(pre_val, tmp, AT);
++  b(done);
++
++  bind(runtime);
++  // save the live input values
++  if (tosca_live) push(V0);
++
++  if (obj != noreg && obj != V0) push(obj);
++
++  if (pre_val != V0) push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then fp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) move(A1, thread);
++    if (pre_val != A0) move(A0, pre_val);
++    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
++  } else {
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    pop(obj);
++
++  if(tosca_live) pop(V0);
++
++  bind(done);
++}
++
++void MacroAssembler::g1_write_barrier_post(Register store_addr,
++                                           Register new_val,
++                                           Register thread,
++                                           Register tmp,
++                                           Register tmp2) {
++  assert(tmp  != AT, "must be");
++  assert(tmp2 != AT, "must be");
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                       PtrQueue::byte_offset_of_index()));
++  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                       PtrQueue::byte_offset_of_buf()));
++
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  xorr(AT, store_addr, new_val);
++  srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  beqz(AT, done);
++
++
++  // crosses regions, storing NULL?
++  beq(new_val, R0, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  move(card_addr, store_addr);
++  srli_d(card_addr, card_addr, CardTableModRefBS::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  li(cardtable, (intptr_t)ct->byte_map_base);
++  add_d(card_addr, card_addr, cardtable);
++
++  ld_b(AT, card_addr, 0);
++  addi_d(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
++  beqz(AT, done);
++
++  membar(StoreLoad);
++  ld_b(AT, card_addr, 0);
++  addi_d(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
++  beqz(AT, done);
++
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  li(AT, (int)CardTableModRefBS::dirty_card_val());
++  st_b(AT, card_addr, 0);
++
++  ld_w(AT, queue_index);
++  beqz(AT, runtime);
++  addi_d(AT, AT, -1 * wordSize);
++  st_w(AT, queue_index);
++  ld_d(tmp2, buffer);
++  ld_d(AT, queue_index);
++  stx_d(card_addr, tmp2, AT);
++  b(done);
++
++  bind(runtime);
++  // save the live input values
++  push(store_addr);
++  push(new_val);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
++  pop(new_val);
++  pop(store_addr);
++
++  bind(done);
++}
++
++#endif // INCLUDE_ALL_GCS
++//////////////////////////////////////////////////////////////////////////////////
++
++
++void MacroAssembler::store_check(Register obj) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  store_check_part_1(obj);
++  store_check_part_2(obj);
++}
++
++void MacroAssembler::store_check(Register obj, Address dst) {
++  store_check(obj);
++}
++
++
++// split the store check operation so that other instructions can be scheduled inbetween
++void MacroAssembler::store_check_part_1(Register obj) {
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
++  srli_d(obj, obj, CardTableModRefBS::card_shift);
++}
++
++void MacroAssembler::store_check_part_2(Register obj) {
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
++  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++  li(AT, (long)ct->byte_map_base);
++  add_d(AT, AT, obj);
++  if (UseConcMarkSweepGC) membar(StoreStore);
++  st_b(R0, AT, 0);
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  assert_different_registers(obj, t2);
++  assert_different_registers(obj, var_size_in_bytes);
++
++  Register end = t2;
++  // verify_tlab();
++
++  ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    lea(end, Address(obj, con_size_in_bytes));
++  } else {
++    lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0));
++  }
++
++  ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset()));
++  blt_far(SCR1, end, slow_case, false);
++
++  // update the tlab top pointer
++  st_ptr(end, Address(TREG, JavaThread::tlab_top_offset()));
++
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    sub_d(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++  // verify_tlab();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
++    // No allocation in the shared eden.
++    b_far(slow_case);
++  } else {
++    Register end = t1;
++    Register heap_end = SCR2;
++    Label retry;
++    bind(retry);
++
++    li(SCR1, (address)Universe::heap()->end_addr());
++    ld_d(heap_end, SCR1, 0);
++
++    // Get the current top of the heap
++    li(SCR1, (address) Universe::heap()->top_addr());
++    ll_d(obj, SCR1, 0);
++
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg)
++      addi_d(end, obj, con_size_in_bytes);
++    else
++      add_d(end, obj, var_size_in_bytes);
++
++    // if end < obj then we wrapped around high memory
++    blt_far(end, obj, slow_case, false);
++    blt_far(heap_end, end, slow_case, false);
++
++    // If heap top hasn't been changed by some other thread, update it.
++    sc_d(end, SCR1, 0);
++    beqz(end, retry);
++
++    incr_allocated_bytes(TREG, var_size_in_bytes, con_size_in_bytes, t1);
++  }
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    add_d(AT, AT, var_size_in_bytes);
++  } else {
++    addi_d(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++static const double     pi_4 =  0.7853981633974483;
++
++// must get argument(a double) in FA0/FA1
++//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
++//We need to preseve the register which maybe modified during the Call
++void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
++  // save all modified register here
++  // FIXME, in the disassembly of tirgfunc, only used V0, V1, T4, SP, RA, so we ony save V0, V1, T4
++  guarantee(0, "LA not implemented yet");
++#if 0
++  pushad();
++  // we should preserve the stack space before we call
++  addi_d(SP, SP, -wordSize * 2);
++  switch (trig){
++    case 's' :
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
++      break;
++    case 'c':
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
++      break;
++    case 't':
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
++      break;
++    default:assert (false, "bad intrinsic");
++    break;
++
++  }
++
++  addi_d(SP, SP, wordSize * 2);
++  popad();
++#endif
++}
++
++void MacroAssembler::li(Register rd, jlong value) {
++  jlong hi12 = bitfield(value, 52, 12);
++  jlong lo52 = bitfield(value,  0, 52);
++
++  if (hi12 != 0 && lo52 == 0) {
++    lu52i_d(rd, R0, hi12);
++  } else {
++    jlong hi20 = bitfield(value, 32, 20);
++    jlong lo20 = bitfield(value, 12, 20);
++    jlong lo12 = bitfield(value,  0, 12);
++
++    if (lo20 == 0) {
++      ori(rd, R0, lo12);
++    } else if (bitfield(simm12(lo12), 12, 20) == lo20) {
++      addi_w(rd, R0, simm12(lo12));
++    } else {
++      lu12i_w(rd, lo20);
++      if (lo12 != 0)
++        ori(rd, rd, lo12);
++    }
++    if (hi20 != bitfield(simm20(lo20), 20, 20))
++      lu32i_d(rd, hi20);
++    if (hi12 != bitfield(simm20(hi20), 20, 12))
++      lu52i_d(rd, rd, hi12);
++  }
++}
++
++void MacroAssembler::patchable_li52(Register rd, jlong value) {
++  int count = 0;
++
++  if (value <= max_jint && value >= min_jint) {
++    if (is_simm(value, 12)) {
++      addi_d(rd, R0, value);
++      count++;
++    } else {
++      lu12i_w(rd, split_low20(value >> 12));
++      count++;
++      if (split_low12(value)) {
++        ori(rd, rd, split_low12(value));
++        count++;
++      }
++    }
++  } else if (is_simm(value, 52)) {
++    lu12i_w(rd, split_low20(value >> 12));
++    count++;
++    if (split_low12(value)) {
++      ori(rd, rd, split_low12(value));
++      count++;
++    }
++    lu32i_d(rd, split_low20(value >> 32));
++    count++;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, narrowKlass);
++}
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, oop_index);
++}
++
++void MacroAssembler::lipc(Register rd, Label& L) {
++  if (L.is_bound()) {
++    jint offs = (target(L) - pc()) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    pcaddi(rd, offs);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    pcaddi(rd, 0);
++  }
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  patchable_li52(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_d(T4, AT, 0);
++  jalr(T4);
++  popad();
++}
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++#if 0
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  st_ptr(T0, SP, - wordSize);
++  st_ptr(T1, SP, - 2*wordSize);
++  st_ptr(RA, SP, - 3*wordSize);
++  st_ptr(A0, SP, - 4*wordSize);
++  st_ptr(A1, SP, - 5*wordSize);
++  st_ptr(AT, SP, - 6*wordSize);
++  st_ptr(T9, SP, - 7*wordSize);
++  ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
++  addiu(SP, SP, - 7 * wordSize);
++
++  patchable_li52(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++#endif
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  ld_w(AT, T0, 0);
++  addi_d(AT, AT, 1);
++  st_w(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  popad();
++  jr(RA);
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    bgeu(t2, AT, next);
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    bgeu(AT, t2, ok);
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  // TODO LA opt
++  //short
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  srai_w(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  // TODO LA opt
++  srli_d(AT, reg, 8);
++  slli_d(reg, reg, 24);
++  srli_d(reg, reg, 16);
++  orr(reg, reg, AT);
++  bstrpick_d(reg, reg, 15, 0);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  //TODO: LA opt
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srli_w(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  slli_w(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_d(resflag, addr);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_d(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_d(tmp, addr);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_d(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_w(resflag, addr);
++  if (!sign)
++    lu32i_d(resflag, 0);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_w(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_w(tmp, addr);
++  if (!sign)
++    lu32i_d(tmp, 0);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_w(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      st_d(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++
++      // Recursive locking
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        li(resReg, 1);
++        b(DONE);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bnez(tmpReg, L);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++
++      sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++      b(DONE);
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      st_d(AT, Address(boxReg, 0));
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      move(scrReg, R0);
++      bnez(AT, DONE_SET);
++
++#ifndef OPT_THREAD
++      get_thread(TREG) ;
++#endif
++      // It's inflated and appears unlocked
++      addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2);
++      cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false);
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld_d(tmpReg, Address(boxReg, 0)) ;
++      assert_different_registers(AT, tmpReg);
++      li(AT, 0x1);
++      beq(tmpReg, R0, DONE_LABEL) ;
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      sltui(AT, tmpReg, 1);
++      beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock
++
++      ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beqz(AT, Stacked); // Inflated?
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      xorr(scrReg, scrReg, TREG);
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); // release-store
++      st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      li(resReg, 1);
++      b(DONE);
++
++      bind(Stacked);
++      ld_d(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[]           = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++Register caller_saved_registers_except_v0[] = {T7, T5, T6,     A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++
++  //TODO: LA
++//In LA, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  addi_d(SP, SP, -16);
++  st_d(reg1, SP, 8);
++  st_d(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld_d(reg1, SP, 8);
++  ld_d(reg2, SP, 0);
++  addi_d(SP, SP, 16);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld_d(dst, src, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    st_w(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    st_d(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld_d(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    st_w(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src) {
++  if(UseCompressedOops){
++    ld_wu(dst, src);
++    decode_heap_oop(dst);
++  } else {
++    ld_d(dst, src);
++  }
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src){
++  if(UseCompressedOops){
++    assert(!dst.uses(src), "not enough registers");
++    encode_heap_oop(src);
++    st_w(src, dst);
++  } else {
++    st_d(src, dst);
++  }
++}
++
++void MacroAssembler::store_heap_oop_null(Address dst){
++  if(UseCompressedOops){
++    st_w(R0, dst);
++  } else {
++    st_d(R0, dst);
++  }
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  sub_d(AT, r, S5_heapbase);
++  maskeqz(r, AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(AT, src, S5_heapbase);
++  maskeqz(dst, AT, src);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    sub_d(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++  sub_d(dst, src, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  move(AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++      add_d(r, r, S5_heapbase);
++    }
++  } else {
++    add_d(r, r, S5_heapbase);
++  }
++  maskeqz(r, r, AT);
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  Register cond;
++  if (dst == src) {
++    cond = AT;
++    move(cond, src);
++  } else {
++    cond = src;
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++      add_d(dst, dst, S5_heapbase);
++    }
++  } else {
++    add_d(dst, src, S5_heapbase);
++  }
++  maskeqz(dst, dst, cond);
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        shl(r, LogMinObjAlignmentInBytes);
++        add_d(r, r, S5_heapbase);
++      }
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        slli_d(dst, src, LogMinObjAlignmentInBytes);
++        add_d(dst, dst, S5_heapbase);
++      }
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    li(AT, (int64_t)Universe::narrow_klass_base());
++    sub_d(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      li(dst, (int64_t)Universe::narrow_klass_base());
++      sub_d(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        srli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (Universe::narrow_klass_base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register r) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    li(AT, (int64_t)Universe::narrow_klass_base());
++    add_d(r, r, AT);
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    li(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      alsl_d(dst, src, dst, Address::times_8 - 1);
++    } else {
++      add_d(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld_d(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++SkipIfEqual::SkipIfEqual(
++    MacroAssembler* masm, const bool* flag_addr, bool value) {
++  _masm = masm;
++  _masm->li(AT, (address)flag_addr);
++  _masm->ld_b(AT, AT, 0);
++  _masm->addi_d(AT, AT, -value);
++  _masm->beq(AT, R0, _label);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  // Check the supertype display:
++  if (must_load_sco) {
++    ld_wu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  add_d(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld_d(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    addi_d(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++    } else {
++      bne_far(AT, R0, *L_failure);
++      b(*L_slow_path);
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      b(*L_success);
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      b(*L_success);
++    }
++  }
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld_d(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  ld_w(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  ld_d(AT, temp_reg, 0);
++  addi_d(temp_reg, temp_reg, 1 * wordSize);
++  beq(AT, super_klass, subtype);
++  addi_d(temp2_reg, temp2_reg, -1);
++  b(Loop);
++
++  bind(subtype);
++  st_d(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  alsl_d(scale_reg, scale_reg, SP, scale_factor - 1);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld_d(dst, src); break;
++  case  4:  ld_w(dst, src); break;
++  case  2:  is_signed ? ld_h(dst, src) : ld_hu(dst, src); break;
++  case  1:  is_signed ? ld_b( dst, src) : ld_bu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  st_d(src, dst); break;
++  case  4:  st_w(src, dst); break;
++  case  2:  st_h(src, dst); break;
++  case  1:  st_b(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  ld_w(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1);
++  addi_d(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      li(AT, (int)itable_index.is_constant());
++      alsl_d(AT, AT, recv_klass, (int)Address::times_ptr - 1);
++    } else {
++      alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1);
++    }
++    addi_d(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++    } else {
++      bne(intf_klass, method_result, search);
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    addi_d(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    ld_w(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    ldx_d(method_result, recv_klass, scan_temp);
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = S8;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    li(AT, vtable_index.as_constant());
++    slli_d(AT, AT, (int)Address::times_ptr);
++  } else {
++    slli_d(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  li(tmp, base + vtableEntry::method_offset_in_bytes());
++  add_d(tmp, tmp, AT);
++  add_d(tmp, tmp, recv_klass);
++  ld_d(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::load_byte_map_base(Register reg) {
++  jbyte *byte_map_base =
++    ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
++
++  // Strictly speaking the byte_map_base isn't an address at all, and it might
++  // even be negative. It is thus materialised as a constant.
++  li(reg, (uint64_t)byte_map_base);
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  li(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  // Resolve jweak.
++  ld_d(value, value, -JNIHandles::weak_tag_value);
++  verify_oop(value);
++  #if INCLUDE_ALL_GCS
++    if (UseG1GC) {
++      g1_write_barrier_pre(noreg /* obj */,
++                           value /* pre_val */,
++                           thread /* thread */,
++                           tmp /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    }
++  #endif // INCLUDE_ALL_GCS
++  b(done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  ld_d(value, value, 0);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::lea(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm(disp, 12)) {
++      addi_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      add_d(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm(disp, 12)) {
++        add_d(AT, base, index);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, base, AT);
++        add_d(dst, AT, index);
++      }
++    } else {
++      if (is_simm(disp, 12)) {
++        alsl_d(AT, index, base, scale - 1);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, AT, base);
++        alsl_d(dst, index, AT, scale - 1);
++      }
++    }
++  }
++}
++
++void MacroAssembler::lea(Register dst, AddressLiteral adr) {
++  code_section()->relocate(pc(), adr.rspec());
++  pcaddi(dst, (adr.target() - pc()) >> 2);
++}
++
++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos) >> 2;
++  switch(high(inst, 6)) {
++  case beq_op:
++  case bne_op:
++  case blt_op:
++  case bge_op:
++  case bltu_op:
++  case bgeu_op:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if(!is_simm16(v))
++    {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003ff;
++    inst |= ((v & 0xffff) << 10);
++    break;
++  case beqz_op:
++  case bnez_op:
++  case bccondz_op:
++    assert(is_simm(v, 21), "must be simm21");
++#ifndef PRODUCT
++    if(!is_simm(v, 21))
++    {
++      tty->print_cr("must be simm21");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003e0;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) );
++    break;
++  case b_op:
++  case bl_op:
++    assert(is_simm(v, 26), "must be simm26");
++#ifndef PRODUCT
++    if(!is_simm(v, 26))
++    {
++      tty->print_cr("must be simm26");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc000000;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) );
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++  return inst;
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  movgr2fr_d(tmp1, dst);
++  movgr2fr_d(tmp2, src);
++
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case NE:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GT:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GE:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case LT:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case LE:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++
++  movfr2gr_d(dst, tmp1);
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LT:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LE:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp) {
++  movgr2fr_w(tmp1, R0);
++
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   st_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  st_h (reg, base, disp); break;
++    case STORE_INT:    st_w (reg, base, disp); break;
++    case STORE_LONG:   st_d (reg, base, disp); break;
++    case LOAD_BYTE:    ld_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ld_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ld_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ld_hu(reg, base, disp); break;
++    case LOAD_INT:     ld_w (reg, base, disp); break;
++    case LOAD_U_INT:   ld_wu(reg, base, disp); break;
++    case LOAD_LONG:    ld_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      ll_d(reg, base, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   stx_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  stx_h (reg, base, disp); break;
++    case STORE_INT:    stx_w (reg, base, disp); break;
++    case STORE_LONG:   stx_d (reg, base, disp); break;
++    case LOAD_BYTE:    ldx_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ldx_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ldx_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ldx_hu(reg, base, disp); break;
++    case LOAD_INT:     ldx_w (reg, base, disp); break;
++    case LOAD_U_INT:   ldx_wu(reg, base, disp); break;
++    case LOAD_LONG:    ldx_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      add_d(AT, base, disp);
++      ll_d(reg, AT, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fst_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fst_d(reg, base, disp); break;
++    case STORE_VECTORX:  vst  (reg, base, disp); break;
++    case STORE_VECTORY: xvst  (reg, base, disp); break;
++    case LOAD_FLOAT:     fld_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fld_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vld  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvld  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fstx_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fstx_d(reg, base, disp); break;
++    case STORE_VECTORX:  vstx  (reg, base, disp); break;
++    case STORE_VECTORY: xvstx  (reg, base, disp); break;
++    case LOAD_FLOAT:     fldx_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fldx_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vldx  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvldx  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++/**
++ * Emits code to update CRC-32 with a byte value according to constants in table
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]val       Register containing the byte to fold into the CRC.
++ * @param [in]table     Register containing the table of crc constants.
++ *
++ * uint32_t crc;
++ * val = crc_table[(val ^ crc) & 0xFF];
++ * crc = val ^ (crc >> 8);
++**/
++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
++  xorr(val, val, crc);
++  andi(val, val, 0xff);
++  ld_w(val, Address(table, val, Address::times_4, 0));
++  srli_w(crc, crc, 8);
++  xorr(crc, val, crc);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    nor(crc, crc, R0);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++    nor(crc, crc, R0);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crcc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crcc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crcc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp
+new file mode 100644
+index 0000000000..8b123c2906
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp
+@@ -0,0 +1,771 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "utilities/macros.hpp"
++#include "runtime/rtmLocking.hpp"
++
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++#ifdef CC_INTERP
++  // c++ interpreter never wants to use interp_masm version of call_VM
++  #define VIRTUAL
++#else
++  #define VIRTUAL virtual
++#endif
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  // void incrementl(Register reg, int value = 1);
++  // void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // Stores
++  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
++  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
++
++ void resolve_jobject(Register value, Register thread, Register tmp);
++ void clear_jweak_tag(Register possibly_jweak);
++
++#if INCLUDE_ALL_GCS
++
++  void g1_write_barrier_pre(Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++#endif // INCLUDE_ALL_GCS
++
++  // split store_check(Register obj) to enhance instruction interleaving
++  void store_check_part_1(Register obj);
++  void store_check_part_2(Register obj);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++  //add for compressedoops
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void load_heap_oop(Register dst, Address src);
++  void store_heap_oop(Address dst, Register src);
++  void store_heap_oop_null(Address dst);
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++  // Sign extension
++  void sign_extend_short(Register reg) { ext_w_h(reg, reg); }
++  void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 2048) {
++      st_w(RA0, SP, -offset);
++    } else if (offset <= 32768 && !(offset & 3)) {
++      stptr_w(RA0, SP, -offset);
++    } else {
++      li(AT, offset);
++      sub_d(AT, SP, AT);
++      st_w(RA0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++  void round_to(Register reg, int modulus) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++#if 0
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++#endif
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++  void increment(Address addr, int imm = 1);
++  void decrement(Address addr, int imm = 1);
++  void shl(Register reg, int sa)        { slli_d(reg, reg, sa); }
++  void shr(Register reg, int sa)        { srli_d(reg, reg, sa); }
++  void sar(Register reg, int sa)        { srai_d(reg, reg, sa); }
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++  void call_long(address entry);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++
++  static bool far_branches() {
++    if (ForceUnreachable) {
++      return true;
++    } else {
++      return ReservedCodeCacheSize > branch_range;
++    }
++  }
++
++  // Emit the CompiledIC call idiom
++  address ic_call(address entry);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // patchable
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  void blt_far    (Register rs, Register rt, address entry, bool is_signed);
++  void blt_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  void bge_far    (Register rs, Register rt, address entry, bool is_signed);
++  void bge_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void blt_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bge_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  static bool patchable_branches() {
++    const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++    return ReservedCodeCacheSize > branch_range;
++  }
++
++  static bool reachable_from_branch_short(jlong offs);
++
++  void patchable_jump_far(Register ra, jlong offs);
++  void patchable_jump(address target, bool force_patchable = false);
++  void patchable_call(address target, address call_size = 0);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld_d(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld_d(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    st_d(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    st_d(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // ld_long will perform lw for 32 bit VMs and ld for 64 bit VMs
++  // st_long will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void ld_long(Register rt, Register base, int offset16);
++  inline void st_long(Register rt, Register base, int offset16);
++  inline void ld_long(Register rt, Address a);
++  inline void st_long(Register rt, Address a);
++  void ld_long(Register rt, Register offset, Register base);
++  void st_long(Register rt, Register offset, Register base);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++
++  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
++  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
++  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
++  void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); }
++  void pop  (Register reg)      { ld_d  (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  (FloatRegister reg) { fld_d (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  ()                  { addi_d(SP, SP, 8); }
++  void pop2 ()                  { addi_d(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  void li(Register rd, jlong value);
++  void li(Register rd, address addr) { li(rd, (long)addr); }
++  void patchable_li52(Register rd, jlong value);
++  void lipc(Register rd, Label& L);
++  void move(Register rd, Register rs)   { orr(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { add_w(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  // Load the base of the cardtable byte map into reg.
++  void load_byte_map_base(Register reg);
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++
++  // LA added:
++  void jr  (Register reg)        { jirl(R0, reg, 0); }
++  void jalr(Register reg)        { jirl(RA, reg, 0); }
++  void nop ()                    { andi(R0, R0, 0); }
++  void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); }
++  void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); }
++  void orr (Register rd, Register rj, Register rk) {  OR(rd, rj, rk); }
++  void lea (Register rd, Address src);
++  void lea (Register dst, AddressLiteral adr);
++  static int  patched_branch(int dest_pos, int inst, int inst_pos);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ);
++
++  // CRC32 code for java.util.zip.CRC32::update() instrinsic.
++  void update_byte_crc32(Register crc, Register val, Register table);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void kernel_crc32(Register crc, Register buf, Register len, Register tmp);
++
++  // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic.
++  void kernel_crc32c(Register crc, Register buf, Register len, Register tmp);
++
++#undef VIRTUAL
++
++ public:
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_VECTORX     = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_VECTORY     = FLOAT_TYPE | SIGNED_TYPE | 0x4,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_VECTORX    = FLOAT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_VECTORY    = FLOAT_TYPE | SIGNED_TYPE | 0x8
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++private:
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++        assert(((scale==0)&&(disp==0)), "only support base+index");
++        loadstore(reg, as_Register(base), as_Register(index), type);
++    } else {
++      loadstore(reg, as_Register(base), disp, type);
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(Register reg, Register base, Register disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, Register disp, int type);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
++
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++struct tableswitch {
++  Register _reg;
++  int _insn_index; jint _first_key; jint _last_key;
++  Label _after;
++  Label _branches;
++};
++
++#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..0b265a4def
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp
+new file mode 100644
+index 0000000000..b36216c533
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp
+@@ -0,0 +1,120 @@
++/*
++ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "memory/metaspaceShared.hpp"
++
++// Generate the self-patching vtable method:
++//
++// This method will be called (as any other Klass virtual method) with
++// the Klass itself as the first argument.  Example:
++//
++//      oop obj;
++//      int size = obj->klass()->klass_part()->oop_size(this);
++//
++// for which the virtual method call is Klass::oop_size();
++//
++// The dummy method is called with the Klass object as the first
++// operand, and an object as the second argument.
++//
++
++//=====================================================================
++
++// All of the dummy methods in the vtable are essentially identical,
++// differing only by an ordinal constant, and they bear no releationship
++// to the original method which the caller intended. Also, there needs
++// to be 'vtbl_list_size' instances of the vtable in order to
++// differentiate between the 'vtable_list_size' original Klass objects.
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
++                                                   void** vtable,
++                                                   char** md_top,
++                                                   char* md_end,
++                                                   char** mc_top,
++                                                   char* mc_end) {
++  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
++  *(intptr_t *)(*md_top) = vtable_bytes;
++  *md_top += sizeof(intptr_t);
++  void** dummy_vtable = (void**)*md_top;
++  *vtable = dummy_vtable;
++  *md_top += vtable_bytes;
++
++  // Get ready to generate dummy methods.
++
++  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Label common_code;
++  for (int i = 0; i < vtbl_list_size; ++i) {
++    for (int j = 0; j < num_virtuals; ++j) {
++      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
++
++      // Load T5 with a value indicating vtable/offset pair.
++      // -- bits[ 7..0]  (8 bits) which virtual method in table?
++      // -- bits[12..8]  (5 bits) which virtual method table?
++      // -- must fit in 13-bit instruction immediate field.
++      __ li(T5, (i << 8) + j);
++      __ b(common_code);
++    }
++  }
++
++  __ bind(common_code);
++
++  __ srli_d(T4, T5, 8);    // isolate vtable identifier.
++  __ shl(T4, LogBytesPerWord);
++  __ li(AT, (long)vtbl_list);
++  __ ldx_d(T4, AT, T4);     // get correct vtable address.
++  __ st_d(T4, A0, 0);    // update vtable pointer.
++
++  __ andi(T5, T5, 0x00ff);  // isolate vtable method index
++  __ shl(T5, LogBytesPerWord);
++  __ ldx_d(T4, T4, T5);   // address of real method pointer.
++  __ jr(T4);      // get real method pointer.
++
++  __ flush();
++
++  *mc_top = (char*)__ pc();
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp
+new file mode 100644
+index 0000000000..cb31ca5ad5
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp
+@@ -0,0 +1,566 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, err_msg("%s should be nonzero", xname));
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ li(AT, ref_kind);
++  __ beq(temp, AT, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T4);
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld_d(T4, method, in_bytes(entry_offset));
++  __ jr(T4);
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  // the following assumes that a Method* is normally compressed in the vmtarget field:
++  __ ld_d(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld_d(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(AT, recv_addr);
++    __ beq(recv, AT, L);
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T4: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t4_argp   = T4;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ ld_bu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm16! Change the instructions.");
++    __ addi_d(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t4_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld_d(t4_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t4_argp,
++                        Address(t4_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t4_first_arg_addr = __ argument_address(t4_argp, -1);
++  } else {
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld_d(s7_mh, t4_first_arg_addr);
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  // t4_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld_d(r_recv = T2, t4_first_arg_addr);
++    }
++    DEBUG_ONLY(t4_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T4;
++  Register temp3 = T5;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ ld_d(rm_method, member_vmtarget);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ ld_d(rm_method, member_vmtarget);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ ld_d(temp2_index, member_vmindex);
++
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ blt(R0, temp2_index, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ ld_d(rm_index, member_vmindex);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ bge(rm_index, R0, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && mh->is_oop()) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp
+new file mode 100644
+index 0000000000..f84337424b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return R3;
++  }
+diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp
+new file mode 100644
+index 0000000000..639ac6cd3e
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp
+@@ -0,0 +1,485 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "compiler/disassembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#include <sys/mman.h>
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++bool NativeInstruction::is_int_branch() {
++  int op = Assembler::high(insn_word(), 6);
++  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
++         op == Assembler::beq_op  || op == Assembler::bne_op  ||
++         op == Assembler::blt_op  || op == Assembler::bge_op  ||
++         op == Assembler::bltu_op || op == Assembler::bgeu_op;
++}
++
++bool NativeInstruction::is_float_branch() {
++  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
++}
++
++bool NativeCall::is_bl() const {
++  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
++}
++
++void NativeCall::verify() {
++  assert(is_bl(), "not a NativeCall");
++}
++
++address NativeCall::target_addr_for_bl(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  // bl
++  if (is_bl()) {
++    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                              ((int_at(0) >> 10) & 0xffff)) << 2);
++  }
++
++  fatal("not a NativeCall");
++  return NULL;
++}
++
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_bl();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(destination);
++    if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++
++  // Patch the call.
++  if (!reachable) {
++    address trampoline_stub_addr = get_trampoline();
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub");
++
++    // Patch the constant in the call's trampoline stub.
++    NativeInstruction* ni = nativeInstruction_at(dest);
++    assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++    dest = trampoline_stub_addr;
++  }
++  set_destination(dest);
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  address bl_destination
++    = nativeCall_at(call_addr)->target_addr_for_bl();
++  NativeInstruction* ni = nativeInstruction_at(bl_destination);
++  if (code->contains(bl_destination) &&
++      ni->is_NativeCallTrampolineStub_at())
++    return bl_destination;
++
++  // If the codeBlob is not a nmethod, this is because we get here from the
++  // CodeBlob constructor, which is called within the nmethod constructor.
++  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++}
++
++void NativeCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_call_at(addr_call), "unexpected call type");
++  jlong offs = dest - addr_call;
++  masm.bl(offs >> 2);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++bool NativeFarCall::is_short() const {
++  return Assembler::high(int_at(0), 10) == Assembler::andi_op &&
++         Assembler::low(int_at(0), 22) == 0 &&
++         Assembler::high(int_at(4), 6) == Assembler::bl_op;
++}
++
++bool NativeFarCall::is_far() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == RA->encoding();
++}
++
++address NativeFarCall::destination(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  if (is_short()) {
++  // short
++    return addr + BytesPerInstWord +
++           (Assembler::simm26(((int_at(4) & 0x3ff) << 16) |
++                              ((int_at(4) >> 10) & 0xffff)) << 2);
++  }
++
++  if (is_far()) {
++  // far
++    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++  }
++
++  fatal("not a NativeFarCall");
++  return NULL;
++}
++
++void NativeFarCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_far_call_at(addr_call), "unexpected call type");
++  masm.patchable_call(dest, addr_call);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++void NativeFarCall::verify() {
++  assert(is_short() || is_far(), "not a NativeFarcall");
++}
++
++//-------------------------------------------------------------------
++
++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_2nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_ori_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_addid_2nop() const {
++  return Assembler::high(int_at(0), 10)  == Assembler::addi_d_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op   &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++void NativeMovConstReg::verify() {
++  assert(is_li52(), "not a mov reg, imm52");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  if (is_lu12iw_ori_lu32id()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(8)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_lu32id_nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(4)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_2nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_ori_nop()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_addid_2nop()) {
++    return Assembler::simm12((int_at(0) >> 10) & 0xfff);
++  }
++
++#ifndef PRODUCT
++  Disassembler::decode(addr_at(0), addr_at(0) + 16, tty);
++#endif
++  fatal("not a mov reg, imm52");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_li52(as_Register(int_at(0) & 0x1f), x);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return 0; // mute compiler
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++void NativeMovRegMem::verify() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++
++void NativeMovRegMem::print() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(is_short() || is_far(), "not a general jump instruction");
++}
++
++bool NativeJump::is_short() {
++  return Assembler::high(insn_word(), 6) == Assembler::b_op;
++}
++
++bool NativeJump::is_far() {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == R0->encoding();
++}
++
++address NativeJump::jump_destination(address orig_addr) {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  // short
++  if (is_short()) {
++    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                                     ((int_at(0) >> 10) & 0xffff)) << 2);
++  }
++
++  // far
++  if (is_far()) {
++    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++  }
++
++  fatal("not a jump");
++  return NULL;
++}
++
++void NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_jump(dest);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  jlong offs = dest - verified_entry;
++
++  if (MacroAssembler::reachable_from_branch_short(offs)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.b(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     st_w    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lu12i_w  t2, 0x400
++  //  0x000000ffe5815134: st_w  v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                           ;*goto
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: ld_w  at, 0x0(t2)    ;*goto       <---  PC
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like this.
++  return Assembler::high(insn_word(), 10) == Assembler::ld_w_op &&
++         Assembler::low(insn_word(), 5)   == AT->encoding();
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp
+new file mode 100644
+index 0000000000..493239923b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp
+@@ -0,0 +1,513 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "memory/allocation.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "utilities/top.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction VALUE_OBJ_CLASS_SPEC {
++  friend class Relocation;
++
++ public:
++  enum loongarch_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_far_call();
++  inline bool is_illegal();
++  bool is_jump();
++  bool is_safepoint_poll();
++
++  // LoongArch has no instruction to generate a illegal instrucion exception?
++  // But `break  11` is not illegal instruction for LoongArch.
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_NativeCallTrampolineStub_at();
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++
++// The NativeCall is an abstraction for accessing/manipulating native call
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++class NativeCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 1 * BytesPerInstWord,
++    return_address_offset = 1 * BytesPerInstWord,
++    displacement_offset   = 0
++  };
++
++  // We have only bl.
++  bool is_bl() const;
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const {
++    return addr_at(return_address_offset);
++  }
++
++  address return_address() const {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_bl(address orig_addr = 0) const;
++  address destination() const;
++  void set_destination(address dest);
++
++  void verify_alignment() {}
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset);
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate bl
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// The NativeFarCall is an abstraction for accessing/manipulating native
++// call-anywhere instructions.
++// Used to call native methods which may be loaded anywhere in the address
++// space, possibly out of reach of a call instruction.
++class NativeFarCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_size      = 2 * BytesPerInstWord,
++  };
++
++  // We use MacroAssembler::patchable_call() for implementing a
++  // call-anywhere instruction.
++  bool is_short() const;
++  bool is_far() const;
++
++  // Checks whether instr points at a NativeFarCall instruction.
++  static bool is_far_call_at(address address) {
++    return nativeInstruction_at(address)->is_far_call();
++  }
++
++  // Returns the NativeFarCall's destination.
++  address destination(address orig_addr = 0) const;
++
++  // Sets the NativeFarCall's destination, not necessarily mt-safe.
++  // Used when relocating code.
++  void set_destination(address dest);
++
++  void verify();
++};
++
++// Instantiates a NativeFarCall object starting at the given instruction
++// address and returns the NativeFarCall object.
++inline NativeFarCall* nativeFarCall_at(address address) {
++  NativeFarCall* call = (NativeFarCall*)address;
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// An interface for accessing/manipulating native set_oop imm, reg instructions
++// (used to manipulate inlined data references, etc.).
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    =    0,
++    instruction_size          =    3 * BytesPerInstWord,
++    next_instruction_offset   =    3 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  bool is_li52() const {
++    return is_lu12iw_ori_lu32id() ||
++           is_lu12iw_lu32id_nop() ||
++           is_lu12iw_2nop() ||
++           is_lu12iw_ori_nop() ||
++           is_addid_2nop();
++  }
++  bool is_lu12iw_ori_lu32id() const;
++  bool is_lu12iw_lu32id_nop() const;
++  bool is_lu12iw_2nop() const;
++  bool is_lu12iw_ori_nop() const;
++  bool is_addid_2nop() const;
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size = 4,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson.
++//   short:
++//     b offs26
++//     nop
++//
++//   far:
++//     pcaddu18i reg, si20
++//     jirl  r0, reg, si18
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size   = 2 * BytesPerInstWord
++  };
++
++  bool is_short();
++  bool is_far();
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination(address orig_addr = 0);
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry){}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum loongarch_specific_constants {
++    instruction_code        = 0xbadc0de0, // TODO: LA
++                                          // Temporary LoongArch reserved instruction
++    instruction_size        = 4,
++    instruction_offset      = 0,
++    next_instruction_offset = 4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call() {
++  NativeCall *call = (NativeCall*)instruction_address();
++  return call->is_bl();
++}
++
++inline bool NativeInstruction::is_far_call() {
++  NativeFarCall *call = (NativeFarCall*)instruction_address();
++
++  // short
++  if (call->is_short()) {
++    return true;
++  }
++
++  // far
++  if (call->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++inline bool NativeInstruction::is_jump()
++{
++  NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address();
++
++  // short
++  if (jump->is_short()) {
++    return true;
++  }
++
++  // far
++  if (jump->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum la_specific_constants {
++    instruction_size            =    6 * 4,
++    instruction_offset          =    0,
++    data_offset                 =    4 * 4,
++    next_instruction_offset     =    6 * 4
++  };
++
++  address destination() const {
++    return (address)ptr_at(data_offset);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(data_offset, (intptr_t)new_destination);
++    OrderAccess::fence();
++  }
++};
++
++// Note: Other stubs must not begin with this pattern.
++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() {
++  // pcaddi
++  // ld_d
++  // jirl
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op &&
++         Assembler::high(int_at(4), 10) == Assembler::ld_d_op &&
++         Assembler::high(int_at(8), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(8), 5)  == R0->encoding();
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
++#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp
+new file mode 100644
+index 0000000000..5ff7555d2f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp
+new file mode 100644
+index 0000000000..c6424c321f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_loongarch.hpp"
++#ifdef TARGET_ARCH_MODEL_loongarch_32
++# include "interp_masm_loongarch_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, r0);
++REGISTER_DEFINITION(Register, r1);
++REGISTER_DEFINITION(Register, r2);
++REGISTER_DEFINITION(Register, r3);
++REGISTER_DEFINITION(Register, r4);
++REGISTER_DEFINITION(Register, r5);
++REGISTER_DEFINITION(Register, r6);
++REGISTER_DEFINITION(Register, r7);
++REGISTER_DEFINITION(Register, r8);
++REGISTER_DEFINITION(Register, r9);
++REGISTER_DEFINITION(Register, r10);
++REGISTER_DEFINITION(Register, r11);
++REGISTER_DEFINITION(Register, r12);
++REGISTER_DEFINITION(Register, r13);
++REGISTER_DEFINITION(Register, r14);
++REGISTER_DEFINITION(Register, r15);
++REGISTER_DEFINITION(Register, r16);
++REGISTER_DEFINITION(Register, r17);
++REGISTER_DEFINITION(Register, r18);
++REGISTER_DEFINITION(Register, r19);
++REGISTER_DEFINITION(Register, r20);
++REGISTER_DEFINITION(Register, r21);
++REGISTER_DEFINITION(Register, r22);
++REGISTER_DEFINITION(Register, r23);
++REGISTER_DEFINITION(Register, r24);
++REGISTER_DEFINITION(Register, r25);
++REGISTER_DEFINITION(Register, r26);
++REGISTER_DEFINITION(Register, r27);
++REGISTER_DEFINITION(Register, r28);
++REGISTER_DEFINITION(Register, r29);
++REGISTER_DEFINITION(Register, r30);
++REGISTER_DEFINITION(Register, r31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp
+new file mode 100644
+index 0000000000..3104cd1cc5
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_loongarch.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0",
++    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
++const char* ConditionalFlagRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "fcc0",  "fcc1",   "fcc2",  "fcc3",   "fcc4",  "fcc5",   "fcc6",  "fcc7",
++  };
++  return is_valid() ? names[encoding()] : "fccnoreg";
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp
+new file mode 100644
+index 0000000000..37b39f9129
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp
+@@ -0,0 +1,436 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "vm_version_loongarch.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++
++// The implementation of integer registers for the LoongArch architecture
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the LoongArch architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, r10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, r11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, r12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, r13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, r14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, r15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, r16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, r17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, r18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, r19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, r20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, r21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, r22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, r23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, r24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, r25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, r26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, r27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, r28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, r29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, r30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, r31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define R0  ((Register)(r0_RegisterEnumValue))
++#define R1  ((Register)(r1_RegisterEnumValue))
++#define R2  ((Register)(r2_RegisterEnumValue))
++#define R3  ((Register)(r3_RegisterEnumValue))
++#define R4  ((Register)(r4_RegisterEnumValue))
++#define R5  ((Register)(r5_RegisterEnumValue))
++#define R6  ((Register)(r6_RegisterEnumValue))
++#define R7  ((Register)(r7_RegisterEnumValue))
++#define R8  ((Register)(r8_RegisterEnumValue))
++#define R9  ((Register)(r9_RegisterEnumValue))
++#define R10 ((Register)(r10_RegisterEnumValue))
++#define R11 ((Register)(r11_RegisterEnumValue))
++#define R12 ((Register)(r12_RegisterEnumValue))
++#define R13 ((Register)(r13_RegisterEnumValue))
++#define R14 ((Register)(r14_RegisterEnumValue))
++#define R15 ((Register)(r15_RegisterEnumValue))
++#define R16 ((Register)(r16_RegisterEnumValue))
++#define R17 ((Register)(r17_RegisterEnumValue))
++#define R18 ((Register)(r18_RegisterEnumValue))
++#define R19 ((Register)(r19_RegisterEnumValue))
++#define R20 ((Register)(r20_RegisterEnumValue))
++#define R21 ((Register)(r21_RegisterEnumValue))
++#define R22 ((Register)(r22_RegisterEnumValue))
++#define R23 ((Register)(r23_RegisterEnumValue))
++#define R24 ((Register)(r24_RegisterEnumValue))
++#define R25 ((Register)(r25_RegisterEnumValue))
++#define R26 ((Register)(r26_RegisterEnumValue))
++#define R27 ((Register)(r27_RegisterEnumValue))
++#define R28 ((Register)(r28_RegisterEnumValue))
++#define R29 ((Register)(r29_RegisterEnumValue))
++#define R30 ((Register)(r30_RegisterEnumValue))
++#define R31 ((Register)(r31_RegisterEnumValue))
++
++
++#define RA           R1
++#define TP           R2
++#define SP           R3
++#define RA0          R4
++#define RA1          R5
++#define RA2          R6
++#define RA3          R7
++#define RA4          R8
++#define RA5          R9
++#define RA6          R10
++#define RA7          R11
++#define RT0          R12
++#define RT1          R13
++#define RT2          R14
++#define RT3          R15
++#define RT4          R16
++#define RT5          R17
++#define RT6          R18
++#define RT7          R19
++#define RT8          R20
++#define RX           R21
++#define FP           R22
++#define S0           R23
++#define S1           R24
++#define S2           R25
++#define S3           R26
++#define S4           R27
++#define S5           R28
++#define S6           R29
++#define S7           R30
++#define S8           R31
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++#define V0       RA0
++#define V1       RA1
++
++#define SCR1     RT7
++#define SCR2     RT4
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++//OPT_SAFEPOINT not supported yet
++#define OPT_SAFEPOINT 1
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define S5_heapbase    S5
++
++#define FSR            V0
++#define SSR            T6
++#define FSF            FV0
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++// ---------- Scratch Register ----------
++#define AT             RT7
++#define fscratch       F23
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    save_slots_per_register = 2,
++    slots_per_lsx_register  = 4,
++    slots_per_lasx_register = 8,
++    max_slots_per_register  = 8
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++
++#define FA0    F0
++#define FA1    F1
++#define FA2    F2
++#define FA3    F3
++#define FA4    F4
++#define FA5    F5
++#define FA6    F6
++#define FA7    F7
++
++#define FV0    F0
++#define FV1    F1
++
++#define FT0    F8
++#define FT1    F9
++#define FT2    F10
++#define FT3    F11
++#define FT4    F12
++#define FT5    F13
++#define FT6    F14
++#define FT7    F15
++#define FT8    F16
++#define FT9    F17
++#define FT10   F18
++#define FT11   F19
++#define FT12   F20
++#define FT13   F21
++#define FT14   F22
++#define FT15   F23
++
++#define FS0    F24
++#define FS1    F25
++#define FS2    F26
++#define FS3    F27
++#define FS4    F28
++#define FS5    F29
++#define FS6    F30
++#define FS7    F31
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use ConditionalFlagRegister as shortcut
++class ConditionalFlagRegisterImpl;
++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister;
++
++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) {
++  return (ConditionalFlagRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++//    conditionalflag_arg_base      = 12,
++    number_of_registers = 8
++  };
++
++  // construction
++  inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  ConditionalFlagRegister successor() const                          { return as_ConditionalFlagRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7     , ( 7));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue))
++#define FCC0     ((ConditionalFlagRegister)(    fcc0_ConditionalFlagRegisterEnumValue))
++#define FCC1     ((ConditionalFlagRegister)(    fcc1_ConditionalFlagRegisterEnumValue))
++#define FCC2     ((ConditionalFlagRegister)(    fcc2_ConditionalFlagRegisterEnumValue))
++#define FCC3     ((ConditionalFlagRegister)(    fcc3_ConditionalFlagRegisterEnumValue))
++#define FCC4     ((ConditionalFlagRegister)(    fcc4_ConditionalFlagRegisterEnumValue))
++#define FCC5     ((ConditionalFlagRegister)(    fcc5_ConditionalFlagRegisterEnumValue))
++#define FCC6     ((ConditionalFlagRegister)(    fcc6_ConditionalFlagRegisterEnumValue))
++#define FCC7     ((ConditionalFlagRegister)(    fcc7_ConditionalFlagRegisterEnumValue))
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                          FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++
++
++};
++
++#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp
+new file mode 100644
+index 0000000000..bf4498dc62
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp
+@@ -0,0 +1,130 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in LoongArch64");
++  }
++}
++
++
++address Relocation::pd_call_destination(address orig_addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    return nativeFarCall_at(addr())->destination(orig_addr);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    } else {
++      address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr);
++      // If call is branch to self, don't try to relocate it, just leave it
++      // as branch to self. This happens during code generation if the code
++      // buffer expands. It will be relocated to the trampoline above once
++      // code generation is complete.
++      return (new_addr == orig_addr) ? addr() : new_addr;
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination(orig_addr);
++  } else {
++    tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr()));
++    Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    nativeFarCall_at(addr())->set_destination(x);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, false);
++    } else {
++      nativeCall_at(addr())->set_destination(x);
++    }
++  } else if (ni->is_jump()) {
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp
+new file mode 100644
+index 0000000000..211242f3fb
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since LoongArch instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp
+new file mode 100644
+index 0000000000..e6ee65f367
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp
+@@ -0,0 +1,199 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_loongarch_64.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T4 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T4
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  address start = __ pc();
++
++  __ addi_d(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ st_d(V1, SP, return_off * wordSize);  // return address
++  __ st_d(FP, SP, fp_off * wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ addi_d(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ st_d(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ st_d(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(thread, NOREG, NOREG, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T4, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ st_d(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ st_d(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T4: exception handler
++  // A1: exception pc
++  __ jr(T4);
++
++  // make sure all code is generated
++  masm->flush();
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp
+new file mode 100644
+index 0000000000..9efcd2ce52
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp
+@@ -0,0 +1,3453 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "prims/jvmtiRedefineClassesTrace.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  // Capture info about frame layout
++  enum layout {
++    fpr0_off = 0,
++    fpr1_off,
++    fpr2_off,
++    fpr3_off,
++    fpr4_off,
++    fpr5_off,
++    fpr6_off,
++    fpr7_off,
++    fpr8_off,
++    fpr9_off,
++    fpr10_off,
++    fpr11_off,
++    fpr12_off,
++    fpr13_off,
++    fpr14_off,
++    fpr15_off,
++    fpr16_off,
++    fpr17_off,
++    fpr18_off,
++    fpr19_off,
++    fpr20_off,
++    fpr21_off,
++    fpr22_off,
++    fpr23_off,
++    fpr24_off,
++    fpr25_off,
++    fpr26_off,
++    fpr27_off,
++    fpr28_off,
++    fpr29_off,
++    fpr30_off,
++    fpr31_off,
++    a0_off,
++    a1_off,
++    a2_off,
++    a3_off,
++    a4_off,
++    a5_off,
++    a6_off,
++    a7_off,
++    t0_off,
++    t1_off,
++    t2_off,
++    t3_off,
++    t4_off,
++    t5_off,
++    t6_off,
++    t7_off,
++    t8_off,
++    s0_off,
++    s1_off,
++    s2_off,
++    s3_off,
++    s4_off,
++    s5_off,
++    s6_off,
++    s7_off,
++    s8_off,
++    fp_off,
++    ra_off,
++    fpr_size = fpr31_off - fpr0_off + 1,
++    gpr_size = ra_off - a0_off + 1,
++  };
++
++  const bool _save_vectors;
++  public:
++  RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
++
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
++
++  int slots_save() {
++    int slots = gpr_size * VMRegImpl::slots_per_word;
++
++    if (_save_vectors && UseLASX)
++      slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size;
++    else if (_save_vectors && UseLSX)
++      slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size;
++    else
++      slots += FloatRegisterImpl::save_slots_per_register * fpr_size;
++
++    return slots;
++  }
++
++  int gpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++      int slots_per_gpr = VMRegImpl::slots_per_word;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size;
++  }
++
++  int fpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return off * slots_per_fpr * VMRegImpl::stack_slot_size;
++  }
++
++  int ra_offset() { return gpr_offset(ra_off); }
++  int t5_offset() { return gpr_offset(t5_off); }
++  int s3_offset() { return gpr_offset(s3_off); }
++  int v0_offset() { return gpr_offset(a0_off); }
++  int v1_offset() { return gpr_offset(a1_off); }
++
++  int fpr0_offset() { return fpr_offset(fpr0_off); }
++  int fpr1_offset() { return fpr_offset(fpr1_off); }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = round_to(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++
++  *total_frame_words = frame_size_in_words;
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap(frame_size_in_slots, 0);
++
++  // save registers
++  __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size);
++
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvst(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vst(fpr, SP, off);
++    else
++      __ fst_d(fpr, SP, off);
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg());
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++  __ st_d(T8, SP, gpr_offset(t8_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg());
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  __ st_d(FP, SP, gpr_offset(fp_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg());
++  __ st_d(RA, SP, gpr_offset(ra_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg());
++
++  __ addi_d(FP, SP, gpr_offset(fp_off));
++
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvld(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vld(fpr, SP, off);
++    else
++      __ fld_d(fpr, SP, off);
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++  __ ld_d(T8, SP, gpr_offset(t8_off));
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  __ ld_d(FP, SP, gpr_offset(fp_off));
++  __ ld_d(RA, SP, gpr_offset(ra_off));
++
++  __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld_d(V0, SP, gpr_offset(a0_off));
++  __ ld_d(V1, SP, gpr_offset(a1_off));
++
++  __ fld_d(F0, SP, fpr_offset(fpr0_off));
++  __ fld_d(F1, SP, fpr_offset(fpr1_off));
++
++  __ addi_d(SP, SP, gpr_offset(ra_off));
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
++    T0, A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ verify_oop(Rmethod);
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // T5 isn't live so capture return address while we easily can
++  __ move(T5, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, T5);
++  // we should preserve the return address
++  __ verify_oop(Rmethod);
++  __ move(S0, SP);
++  __ li(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ move(SP, S0);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(T5, RA);
++  // set senderSP value
++  //refer to interpreter_loongarch.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addi_d(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ st_d(r, SP, st_off);
++      } else {
++        //FIXME, LA will not enter here
++        // long/double in gpr
++        __ st_d(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ st_d(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ fst_s(fr, SP, st_off);
++      else {
++        __ fst_d(fr, SP, st_off);
++        __ fst_d(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, T5);
++  __ jr (AT);
++}
++
++static void gen_i2c_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the LA side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++  __ move(T4, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    // did LA need round? FIXME
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ addi_d(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = T5;
++  __ move(saved_sp, T4);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld_d(AT, saved_sp, ld_off);
++        __ st_d(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld_d(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld_d(AT, saved_sp, ld_off - 8);
++        __ st_d(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on LA)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld_d(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld_d(r, saved_sp, ld_off - 8);
++      } else {
++        __ ld_w(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ fld_s(fr, saved_sp, ld_off);
++      else {
++          __ fld_d(fr, saved_sp, ld_off);
++          __ fld_d(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++  __ get_thread(T8);
++  __ st_d(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to T5 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in T5 (c2) because c2's
++  // resolve stubs return the result (the method) in T5.
++  // I'd love to fix this.
++  __ move(T5, Rmethod);
++  __ jr(T4);
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    __ verify_oop(holder);
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++    __ verify_oop(temp);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++  }
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on LA");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               <-- a7 => sp[8]
++// 10: L    // stdout              fp[16] => sp[16]
++// 11: L    // stderr              fp[24] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fst_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fst_d(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ st_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fld_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fld_d(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ ld_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ ld_w(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = T5;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld_d(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = T5;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ st_d( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++  if (src.first()->is_stack()) {
++    // stack to stack/reg
++    if (dst.first()->is_stack()) {
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_w(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    if(dst.first()->is_stack()) {
++      __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T4;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal(err_msg_res("unexpected intrinsic id %d", iid));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                methodHandle method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++           "valid size for make_non_entrant");
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++
++  bool is_critical_native = true;
++  address native_func = method->critical_native_function();
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_loongarch.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T4, receiver);
++  __ beq(T4, ic_reg, hit);
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
++  }
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant.
++  if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) {
++    __ nop();
++  }
++
++  // Generate a new frame for the wrapper.
++  // do LA need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and LA doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ st_d( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  Label native_return;
++  __ set_last_Java_frame(SP, noreg, native_return);
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T4;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld_d(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ li(swap_reg, 1);
++
++    __ ld_d(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ sub_d(swap_reg, swap_reg, SP);
++    __ li(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addi_d(AT, R0, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ bind(native_return);
++
++  oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map);
++
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);                break;
++  case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++  case T_BYTE   : __ sign_extend_byte (V0);     break;
++  case T_SHORT  : __ sign_extend_short(V0);     break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addi_d(AT, R0, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, T5);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    __ li(AT, SafepointSynchronize::address_of_state());
++    __ ld_w(T5, AT, 0);
++    __ addi_d(AT, T5, -SafepointSynchronize::_not_synchronized);
++    Label L;
++    __ bne(AT, R0, L);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(L);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addi_d(SP, SP, -wordSize);
++    __ push(S2);
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addi_d(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addi_d(AT, R0, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_disabled);
++  __ beq(AT, R0, reguard);
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld_d( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld_d(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld_d (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addi_d (c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T4);
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, - 3*wordSize);
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ move(SP, S2);
++    __ addi_d(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addi_d(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ addi_d(SP,SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ addi_d(SP, SP, 2*wordSize);
++    __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++  return nm;
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++
++    __ jump_to(ic_miss, 0);
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant. The stack bang
++  // instruction fits that requirement.
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++         "valid size for make_non_entrant");
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ mov(G0, tmp);
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ add_d(FP, L2_string_off, O1);
++        __ br_null(O0, false, Assembler::pn, skip);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off);
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ restore();
++  __ ret();
++
++  __ flush();
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048); // FIXME for debug
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++  RegisterSaver reg_save(false);
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++
++  // We have been called from the deopt handler of the deoptee.
++  //
++  // deoptee:
++  //                      ...
++  //                      call X
++  //                      ...
++  //  deopt_handler:      call_deopt_stub
++  //  cur. return pc  --> ...
++  //
++  // So currently RA points behind the call in the deopt handler.
++  // We adjust it such that it points to the start of the deopt handler.
++  // The return_pc has been stored in the frame of the deoptee and
++  // will replace the address of the deopt_handler in the call
++  // to Deoptimization::fetch_unroll_info below.
++
++  // HandlerImpl::size_deopt_handler()
++  __ addi_d(RA, RA, - NativeFarCall::instruction_size);
++  // Save everything in sight.
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ li(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ li(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++  __ get_thread(thread);
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ li(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++  __ get_thread(thread);
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ addi_d(SP, SP, -additional_words * wordSize);
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  // TODO: confirm reloc
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addi_d(SP, SP, additional_words * wordSize);
++  __ get_thread(thread);
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ st_w(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ li(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, reg_save.v0_offset());
++  __ st_ptr(V1, SP, reg_save.v1_offset());
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  reg_save.restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addi_d(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++
++    Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2 * wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addi_d(count, count, -1);   // decrement counter
++  __ addi_d(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ ld_d(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ st_d(V0, SP, reg_save.v0_offset());
++  __ st_d(V1, SP, reg_save.v1_offset());
++  __ fst_d(F0, SP, reg_save.fpr0_offset());
++  __ fst_d(F1, SP, reg_save.fpr1_offset());
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++  __ get_thread(thread);
++  __ move(A0, thread);  // thread
++  __ addi_d(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0));
++
++  __ push(V0);
++
++  __ get_thread(thread);
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize);
++  __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize);
++  // Pop float stack and store in local
++  __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize);
++  __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize);
++
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++  address start = __ pc();
++
++  // Push self-frame.
++  __ addi_d(SP, SP, -framesize * BytesPerInt);
++
++  __ st_d(RA, SP, return_off * BytesPerInt);
++  __ st_d(FP, SP, fp_off * BytesPerInt);
++
++  __ addi_d(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, FP, retaddr);
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ addi_d(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T4;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);          // Load frame size
++  __ ld_d(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ addi_d(count, count, -1);    // decrement counter
++  __ addi_d(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++
++  __ ld_d(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ li(A1, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++
++  // If cause_return is true we are at a poll_return and there is
++  // the return address in RA to the caller on the nmethod
++  // that is safepoint. We can leave this return in RA and
++  // effectively complete the return and safepoint in the caller.
++  // Otherwise we load exception pc to RA.
++  __ push(thread);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if(!cause_return) {
++    __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
++  }
++
++  __ pop(thread);
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ move(A0, thread);
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  // Do the call
++  // TODO: confirm reloc
++  __ call(call_ptr, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++
++  // Exception pending
++
++  reg_save.restore_live_registers(masm);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  // TODO: confirm reloc
++  __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++  // No exception case
++  __ bind(noException);
++  // Normal exit, register restoring and exit
++  reg_save.restore_live_registers(masm);
++  __ jr(RA);
++
++  masm->flush();
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  //FIXME. code_size
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  RegisterSaver reg_save(false /* save_vectors */);
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  address start = __ pc();
++  map = reg_save.save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++  const Register thread = T8;
++  __ get_thread(thread);
++
++  __ move(A0, thread);
++  Label retaddr;
++  __ set_last_Java_frame(noreg, FP, retaddr);
++  // align the stack before invoke native
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  // TODO: confirm reloc
++  __ call(destination, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map(__ pc() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++  __ get_thread(thread);
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  // get the returned Method*
++  __ get_vm_result_2(Rmethod, thread);
++  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
++  __ st_ptr(V0, SP, reg_save.t5_offset());
++  reg_save.restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(T5);
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  reg_save.restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ get_thread(thread);
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  //
++  // make sure all code is generated
++  masm->flush();
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
+diff --git a/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp
+new file mode 100644
+index 0000000000..361b775144
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp
+@@ -0,0 +1,3445 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/top.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++static address handle_unsafe_access() {
++  JavaThread* thread = JavaThread::current();
++  address pc = thread->saved_exception_pc();
++  // pc is the instruction which we must emulate
++  // doing a no-op is fine:  return garbage from the load
++  // therefore, compute npc
++  address npc = (address)((unsigned long)pc + sizeof(unsigned int));
++
++  // request an async exception
++  thread->set_pending_unsafe_access_error();
++
++  // return address of next instruction to execute
++  return npc;
++}
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // This fig is not LA ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  // LA ABI does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for S8.
++  // S8 will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             =  1,
++    FP_off             =  0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    S8_off             = -14,
++  };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    __ addi_d(SP, SP, total_off * wordSize);
++    __ st_d(BCP, FP, BCP_off * wordSize);
++    __ st_d(LVP, FP, LVP_off * wordSize);
++    __ st_d(TSR, FP, TSR_off * wordSize);
++    __ st_d(S1, FP, S1_off * wordSize);
++    __ st_d(S3, FP, S3_off * wordSize);
++    __ st_d(S4, FP, S4_off * wordSize);
++    __ st_d(S5, FP, S5_off * wordSize);
++    __ st_d(S6, FP, S6_off * wordSize);
++    __ st_d(A0, FP, call_wrapper_off * wordSize);
++    __ st_d(A1, FP, result_off * wordSize);
++    __ st_d(A2, FP, result_type_off * wordSize);
++    __ st_d(A7, FP, thread_off * wordSize);
++    __ st_d(S8, FP, S8_off * wordSize);
++
++    __ li(S8, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      /* FIXME: I do not know how to realize stop in LA, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ slli_d(AT, A6, Interpreter::logStackElementSize);
++    __ sub_d(SP, SP, AT);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ alsl_d(T3, T0, A5, LogBytesPerWord - 1);
++    __ ld_d(AT, T3,  -wordSize);
++    __ alsl_d(T3, T2, SP, LogBytesPerWord - 1);
++    __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ addi_d(T2, T2, 1);
++    __ addi_d(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld_d(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld_d(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ addi_d(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ addi_d(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ addi_d(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++
++    // handle T_INT case
++    __ st_d(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld_d(BCP, FP, BCP_off * wordSize);
++    __ ld_d(LVP, FP, LVP_off * wordSize);
++    __ ld_d(S8, FP, S8_off * wordSize);
++    __ ld_d(TSR, FP, TSR_off * wordSize);
++
++    __ ld_d(S1, FP, S1_off * wordSize);
++    __ ld_d(S3, FP, S3_off * wordSize);
++    __ ld_d(S4, FP, S4_off * wordSize);
++    __ ld_d(S5, FP, S5_off * wordSize);
++    __ ld_d(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ st_d(V0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_float);
++    __ fst_s(FV0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_double);
++    __ fst_d(FV0, T0, 0 * wordSize);
++    __ b(exit);
++    StubRoutines::la::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld_d(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T4
++    __ ld_d(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T4, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T4: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T4);
++    return start;
++  }
++
++  // The following routine generates a subroutine to throw an
++  // asynchronous UnknownError when an unsafe access gets a fault that
++  // could not be reasonably prevented by the programmer.  (Example:
++  // SIGBUS/OBJERR.)
++  address generate_handler_for_unsafe_access() {
++    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
++    address start = __ pc();
++    __ push(V0);
++    __ pushad_except_v0();                      // push registers
++    __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
++    __ popad_except_v0();
++    __ move(RA, V0);
++    __ pop(V0);
++    __ jr(RA);
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    A0
++  //   value: A1
++  //   count: A2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register end       = T5;  // source array address end
++    const Register tmp       = T8;  // temp register
++
++    Label L_fill_elements;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 9);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 15, 8);  //  8 bit -> 16 bit
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 5);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 3);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    switch (t) {
++      case T_BYTE:
++        __ add_d(end, to, count);
++        break;
++      case T_SHORT:
++      case T_INT:
++        __ alsl_d(end, count, to, shift-1);
++        break;
++      default: ShouldNotReachHere();
++    }
++    if (!aligned) {
++      __ st_d(value, to,  0);
++      __ bstrins_d(to, R0, 2, 0);
++      __ addi_d(to, to, 8);
++    }
++    __ st_d(value, end, -8);
++    __ bstrins_d(end, R0, 2, 0);
++
++    //
++    //  Fill large chunks
++    //
++    Label L_loop_begin, L_not_64bytes_fill, L_loop_end;
++    __ addi_d(AT, to, 64);
++    __ blt(end, AT, L_not_64bytes_fill);
++    __ addi_d(to, to, 64);
++    __ bind(L_loop_begin);
++    __ st_d(value, to,  -8);
++    __ st_d(value, to, -16);
++    __ st_d(value, to, -24);
++    __ st_d(value, to, -32);
++    __ st_d(value, to, -40);
++    __ st_d(value, to, -48);
++    __ st_d(value, to, -56);
++    __ st_d(value, to, -64);
++    __ addi_d(to, to, 64);
++    __ bge(end, to, L_loop_begin);
++    __ addi_d(to, to, -64);
++    __ beq(to, end, L_loop_end);
++
++    __ bind(L_not_64bytes_fill);
++    // There are 0 - 7 words
++    __ pcaddi(AT, 4);
++    __ sub_d(tmp, end, to);
++    __ alsl_d(AT, tmp, AT, 1);
++    __ jr(AT);
++
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_d(value, to, 0);
++    __ st_d(value, to, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ st_d(value, to, 48);
++
++    __ bind(L_loop_end);
++    __ jr(RA);
++
++    // Short arrays (<= 8 bytes)
++    __ bind(L_fill_elements);
++    __ pcaddi(AT, 4);
++    __ slli_d(tmp, count, 4 + shift);
++    __ add_d(AT, AT, tmp);
++    __ jr(AT);
++
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_b(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_h(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_h(value, to, 0);
++    __ st_b(value, to, 2);
++    __ jr(RA);
++    __ nop();
++
++    // 4:
++    __ st_w(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_w(value, to, 0);
++    __ st_b(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 6:
++    __ st_w(value, to, 0);
++    __ st_h(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_w(value, to, 0);
++    __ st_w(value, to, 3);
++    __ jr(RA);
++    __ nop();
++
++    // 8:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count
++  //
++  //  Temp:
++  //    AT   - destination array address - source array address
++  //    T4   - element count * element size
++  //
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    __ slli_d(T4, A2, log2_elem_size);
++    __ sub_d(AT, A1, A0);
++    __ bgeu(AT, T4, no_overlap_target);
++  }
++
++  // Generate code for an array write pre barrier
++  //
++  //   Input:
++  //     addr    -  starting address
++  //     count   -  element count
++  //
++  //  Temp:
++  //     AT      -  used to swap addr and count
++  //
++  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
++    BarrierSet* bs = Universe::heap()->barrier_set();
++    switch (bs->kind()) {
++      case BarrierSet::G1SATBCT:
++      case BarrierSet::G1SATBCTLogging:
++        // With G1, don't generate the call if we statically know that the target in uninitialized
++        if (!dest_uninitialized) {
++           if (count == A0) {
++             if (addr == A1) {
++               // exactly backwards!!
++               __ move(AT, A0);
++               __ move(A0, A1);
++               __ move(A1, AT);
++             } else {
++               __ move(A1, count);
++               __ move(A0, addr);
++             }
++           } else {
++             __ move(A0, addr);
++             __ move(A1, count);
++           }
++           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
++        }
++        break;
++      case BarrierSet::CardTableModRef:
++      case BarrierSet::CardTableExtension:
++      case BarrierSet::ModRef:
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  //
++  // Generate code for an array write post barrier
++  //
++  //  Input:
++  //     start    - register containing starting address of destination array
++  //     count    - elements count
++  //     scratch  - scratch register
++  //
++  //  Temp:
++  //     AT       - used to swap addr and count
++  //
++  //  The input registers are overwritten.
++  //
++  void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
++    assert_different_registers(start, count, scratch, AT);
++    BarrierSet* bs = Universe::heap()->barrier_set();
++    switch (bs->kind()) {
++      case BarrierSet::G1SATBCT:
++      case BarrierSet::G1SATBCTLogging:
++        {
++          if (count == A0) {
++            if (start == A1) {
++              // exactly backwards!!
++              __ move(AT, A0);
++              __ move(A0, A1);
++              __ move(A1, AT);
++            } else {
++              __ move(A1, count);
++              __ move(A0, start);
++            }
++          } else {
++            __ move(A0, start);
++            __ move(A1, count);
++          }
++          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
++        }
++        break;
++      case BarrierSet::CardTableModRef:
++      case BarrierSet::CardTableExtension:
++        {
++          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++
++          Label L_loop;
++          const Register end = count;
++
++          if (UseConcMarkSweepGC) {
++            __ membar(__ StoreStore);
++          }
++
++          int64_t disp = (int64_t) ct->byte_map_base;
++          __ li(scratch, disp);
++
++          __ lea(end, Address(start, count, TIMES_OOP, 0));  // end == start + count * oop_size
++          __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++          __ shr(start, CardTableModRefBS::card_shift);
++          __ shr(end,   CardTableModRefBS::card_shift);
++          __ sub_d(end, end, start); // end --> cards count
++
++          __ add_d(start, start, scratch);
++
++          __ bind(L_loop);
++          __ stx_b(R0, start, count);
++          __ addi_d(count, count, -1);
++          __ bge(count, R0, L_loop);
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // disjoint large copy
++  void generate_disjoint_large_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A0, 7);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 8);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ ld_d(T4, A0, 32);
++    __ ld_d(T5, A0, 40);
++    __ ld_d(T6, A0, 48);
++    __ ld_d(T7, A0, 56);
++    __ addi_d(A0, A0, 64);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ st_d(T4, A5, 32);
++    __ st_d(T5, A5, 40);
++    __ st_d(T6, A5, 48);
++    __ st_d(T7, A5, 56);
++    __ addi_d(A5, A5, 64);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, le16);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ addi_d(A0, A0, 32);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ addi_d(A5, A5, 32);
++
++    __ bind(le16);
++    __ addi_d(A4, A2, -16);
++    __ bgeu(A0, A4, le8);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ addi_d(A0, A0, 16);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ addi_d(A5, A5, 16);
++
++    __ bind(le8);
++    __ addi_d(A4, A2, -8);
++    __ bgeu(A0, A4, lt8);
++    __ ld_d(T0, A0, 0);
++    __ st_d(T0, A5, 0);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ jr(RA);
++  }
++
++  // conjoint large copy
++  void generate_conjoint_large_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A0, 7);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ ld_d(T4, A2, -40);
++    __ ld_d(T5, A2, -48);
++    __ ld_d(T6, A2, -56);
++    __ ld_d(T7, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ st_d(T4, A5, -40);
++    __ st_d(T5, A5, -48);
++    __ st_d(T6, A5, -56);
++    __ st_d(T7, A5, -64);
++    __ addi_d(A5, A5, -64);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, le16);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ addi_d(A2, A2, -32);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ addi_d(A5, A5, -32);
++
++    __ bind(le16);
++    __ addi_d(A4, A0, 16);
++    __ bgeu(A4, A2, le8);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ addi_d(A2, A2, -16);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ addi_d(A5, A5, -16);
++
++    __ bind(le8);
++    __ addi_d(A4, A0, 8);
++    __ bgeu(A4, A2, lt8);
++    __ ld_d(T0, A2, -8);
++    __ st_d(T0, A5, -8);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ jr(RA);
++  }
++
++  // Byte small copy: less than 9 elements.
++  void generate_byte_small_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_b(AT, A0, 0);
++    __ st_b(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_h(AT, A0, 0);
++    __ ld_b(A2, A0, 2);
++    __ st_h(AT, A1, 0);
++    __ st_b(A2, A1, 2);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_w(AT, A0, 0);
++    __ ld_b(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_b(A2, A1, 4);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_w(AT, A0, 0);
++    __ ld_w(A2, A0, 3);
++    __ st_w(AT, A1, 0);
++    __ st_w(A2, A1, 3);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
++
++    __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Short small copy: less than 9 elements.
++  void generate_short_small_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
++
++    __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++    __ b(large);
++
++    return start;
++  }
++
++  // Short small copy: less than 7 elements.
++  void generate_int_small_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ ld_w(A3, A0, 16);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ st_w(A3, A1, 16);
++    __ jr(RA);
++    __ nop();
++
++    // 6:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ ld_d(A3, A0, 16);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ st_d(A3, A1, 16);
++    __ jr(RA);
++  }
++
++  // Generate maybe oop copy
++  void gen_maybe_oop_copy(bool is_oop, Label &small, Label &large,
++                          const char *name, int small_limit, int log2_elem_size,
++                          bool dest_uninitialized = false) {
++    Label post, _large;
++
++    if (is_oop) {
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(A2, SP, 3 * wordSize);
++      __ st_d(A1, SP, 2 * wordSize);
++      __ st_d(A0, SP, 1 * wordSize);
++      __ st_d(RA, SP, 0 * wordSize);
++
++      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
++
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++      __ ld_d(A0, SP, 1 * wordSize);
++    }
++
++    __ sltui(T0, A2, small_limit);
++    if (is_oop) {
++      __ beqz(T0, _large);
++      __ bl(small);
++      __ b(post);
++    } else {
++      __ bnez(T0, small);
++    }
++
++    __ bind(_large);
++    __ slli_d(A2, A2, log2_elem_size);
++
++    if (is_oop) {
++      __ bl(large);
++    } else {
++      __ b(large);
++    }
++
++    if (is_oop) {
++      __ bind(post);
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++
++      gen_write_ref_array_post_barrier(A1, A2, T1);
++
++      __ ld_d(RA, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++      __ jr(RA);
++    }
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, const char *name,
++                                         bool dest_uninitialized = false) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop,
++                                         Label &small, Label &large, const char *name,
++                                         bool dest_uninitialized = false) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2);
++    } else {
++      array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2);
++    }
++
++    gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Long small copy: less than 4 elements.
++  void generate_long_small_copy(Label &entry, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 8);
++    __ ld_d(A3, A0, 16);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 8);
++    __ st_d(A3, A1, 16);
++    __ jr(RA);
++    __ nop();
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, const char *name,
++                                          bool dest_uninitialized = false) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, const char *name,
++                                          bool dest_uninitialized = false) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3);
++    } else {
++      array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3);
++    }
++
++    gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    Label disjoint_large_copy, conjoint_large_copy;
++    Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy;
++
++    generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy");
++    generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy");
++    generate_byte_small_copy(byte_small_copy, "jbyte_small_copy");
++    generate_short_small_copy(short_small_copy, "jshort_small_copy");
++    generate_int_small_copy(int_small_copy, "jint_small_copy");
++    generate_long_small_copy(long_small_copy, "jlong_small_copy");
++
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, "jlong_disjoint_arraycopy", false);
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, "jlong_arraycopy", false);
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_encryptBlock(bool cbc) {
++    static const uint32_t ft_consts[256] = {
++      0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
++      0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
++      0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
++      0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
++      0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
++      0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
++      0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
++      0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
++      0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
++      0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
++      0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
++      0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
++      0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
++      0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
++      0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
++      0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
++      0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
++      0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
++      0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
++      0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
++      0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
++      0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
++      0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
++      0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
++      0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
++      0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
++      0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
++      0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
++      0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
++      0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
++      0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
++      0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
++      0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
++      0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
++      0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
++      0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
++      0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
++      0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
++      0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
++      0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
++      0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
++      0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
++      0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
++      0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
++      0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
++      0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
++      0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
++      0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
++      0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
++      0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
++      0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
++      0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
++      0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
++      0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
++      0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
++      0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
++      0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
++      0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
++      0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
++      0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
++      0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
++      0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
++      0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
++      0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
++    };
++    static const uint8_t fsb_consts[256] = {
++      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
++      0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
++      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
++      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
++      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
++      0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
++      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
++      0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
++      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
++      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
++      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
++      0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
++      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
++      0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
++      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
++      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
++      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
++      0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
++      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
++      0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
++      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
++      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
++      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
++      0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
++      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
++      0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
++      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
++      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
++      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
++      0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
++      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
++      0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register keyold = A6;
++    Register t0 = A7;
++    Register t1, t2, t3, ftp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t1 = S0;
++      t2 = S1;
++      t3 = S2;
++      ftp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(keyold, key);
++    } else {
++      t1 = A3;
++      t2 = A4;
++      t3 = A5;
++      ftp = A6;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    // Round 1
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], rve, 4 * i);
++      }
++
++      __ bind(loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], src, 4 * i);
++      }
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    } else {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], src, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(ftp, (intptr_t)ft_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, ftp, t0);
++        __ ldx_w(t1, ftp, t1);
++        __ ldx_w(t2, ftp, t2);
++        __ ldx_w(t3, ftp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(ftp, (intptr_t)fsb_consts);
++    __ alsl_d(key, keylen, key, 2 - 1);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, ftp, t0);
++      __ ldx_bu(t1, ftp, t1);
++      __ ldx_bu(t2, ftp, t2);
++      __ ldx_bu(t3, ftp, t3);
++      __ ld_w(xa[i], key, 4 * i - 16);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(key, keyold);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ st_w(xa[i], rve, 4 * i);
++      }
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_decryptBlock(bool cbc) {
++    static const uint32_t rt_consts[256] = {
++      0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
++      0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
++      0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
++      0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
++      0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
++      0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
++      0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
++      0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
++      0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
++      0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
++      0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
++      0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
++      0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
++      0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
++      0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
++      0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
++      0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
++      0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
++      0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
++      0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
++      0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
++      0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
++      0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
++      0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
++      0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
++      0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
++      0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
++      0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
++      0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
++      0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
++      0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
++      0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
++      0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
++      0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
++      0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
++      0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
++      0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
++      0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
++      0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
++      0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
++      0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
++      0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
++      0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
++      0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
++      0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
++      0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
++      0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
++      0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
++      0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
++      0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
++      0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
++      0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
++      0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
++      0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
++      0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
++      0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
++      0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
++      0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
++      0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
++      0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
++      0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
++      0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
++      0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
++      0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
++    };
++    static const uint8_t rsb_consts[256] = {
++      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
++      0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
++      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
++      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
++      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
++      0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
++      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
++      0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
++      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
++      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
++      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
++      0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
++      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
++      0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
++      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
++      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
++      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
++      0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
++      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
++      0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
++      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
++      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
++      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
++      0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
++      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
++      0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
++      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
++      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
++      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
++      0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
++      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
++      0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register t0 = A6;
++    Register t1 = A7;
++    Register t2, t3, rtp, rvp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t2 = S0;
++      t3 = S1;
++      rtp = S2;
++      rvp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(rvp, rve);
++    } else {
++      t2 = A3;
++      t3 = A4;
++      rtp = A5;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ bind(loop);
++
++    // Round 1
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(xa[i], src, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * (4 + i));
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(rtp, (intptr_t)rt_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, rtp, t0);
++        __ ldx_w(t1, rtp, t1);
++        __ ldx_w(t2, rtp, t2);
++        __ ldx_w(t3, rtp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(rtp, (intptr_t)rsb_consts);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, rtp, t0);
++      __ ldx_bu(t1, rtp, t1);
++      __ ldx_bu(t2, rtp, t2);
++      __ ldx_bu(t3, rtp, t3);
++      __ ld_w(xa[i], key, 4 * i);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], rvp, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(rvp, src);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      __ ld_d(t0, src, -16);
++      __ ld_d(t1, src, -8);
++      __ st_d(t0, rve, 0);
++      __ st_d(t1, rve, 8);
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label keys, loop;
++
++    // Keys
++    __ bind(keys);
++    __ emit_int32(0x5a827999);
++    __ emit_int32(0x6ed9eba1);
++    __ emit_int32(0x8f1bbcdc);
++    __ emit_int32(0xca62c1d6);
++
++    // Allocate registers
++    Register t0 = T5;
++    Register t1 = T6;
++    Register t2 = T7;
++    Register t3 = T8;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register ka[4] = { A4, A5, A6, A7 };
++    Register sa[5] = { T0, T1, T2, T3, T4 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys
++    __ lipc(t0, keys);
++    __ ld_w(ka[0], t0, 0);
++    __ ld_w(ka[1], t0, 4);
++    __ ld_w(ka[2], t0, 8);
++    __ ld_w(ka[3], t0, 12);
++
++    __ bind(loop);
++    // Load arguments
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++
++    // 80 rounds of hashing
++    for (int i = 0; i < 80; i++) {
++      Register a = sa[(5 - (i % 5)) % 5];
++      Register b = sa[(6 - (i % 5)) % 5];
++      Register c = sa[(7 - (i % 5)) % 5];
++      Register d = sa[(8 - (i % 5)) % 5];
++      Register e = sa[(9 - (i % 5)) % 5];
++
++      if (i < 16) {
++        __ ld_w(t0, buf, i * 4);
++        __ revb_2h(t0, t0);
++        __ rotri_w(t0, t0, 16);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, i * 4);
++        __ XOR(t0, c, d);
++        __ AND(t0, t0, b);
++        __ XOR(t0, t0, d);
++      } else {
++        __ ld_w(t0, SP, ((i - 3) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 8) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 14) & 0xF) * 4);
++        __ ld_w(t3, SP, ((i - 16) & 0xF) * 4);
++        __ XOR(t0, t0, t1);
++        __ XOR(t0, t0, t2);
++        __ XOR(t0, t0, t3);
++        __ rotri_w(t0, t0, 31);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, (i & 0xF) * 4);
++
++        if (i < 20) {
++          __ XOR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ XOR(t0, t0, d);
++        } else if (i < 40 || i >= 60) {
++          __ XOR(t0, b, c);
++          __ XOR(t0, t0, d);
++        } else if (i < 60) {
++          __ OR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ AND(t2, c, d);
++          __ OR(t0, t0, t2);
++        }
++      }
++
++      __ rotri_w(b, b, 2);
++      __ add_w(e, e, t0);
++      __ add_w(e, e, ka[i / 20]);
++      __ rotri_w(t0, a, 27);
++      __ add_w(e, e, t0);
++    }
++
++    // Save updated state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ ld_w(t0, state, 16);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ add_w(sa[4], sa[4], t0);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) {
++    static const uint32_t round_consts[64] = {
++      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
++    };
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label loop;
++
++    // Allocate registers
++    Register t0 = A4;
++    Register t1 = A5;
++    Register t2 = A6;
++    Register t3 = A7;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register kptr = T8;
++    Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys base address
++    __ li(kptr, (intptr_t)round_consts);
++
++    __ bind(loop);
++    // Load state
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++    __ ld_w(sa[5], state, 20);
++    __ ld_w(sa[6], state, 24);
++    __ ld_w(sa[7], state, 28);
++
++    // Do 64 rounds of hashing
++    for (int i = 0; i < 64; i++) {
++      Register a = sa[(0 - i) & 7];
++      Register b = sa[(1 - i) & 7];
++      Register c = sa[(2 - i) & 7];
++      Register d = sa[(3 - i) & 7];
++      Register e = sa[(4 - i) & 7];
++      Register f = sa[(5 - i) & 7];
++      Register g = sa[(6 - i) & 7];
++      Register h = sa[(7 - i) & 7];
++
++      if (i < 16) {
++        __ ld_w(t1, buf, i * 4);
++        __ revb_2h(t1, t1);
++        __ rotri_w(t1, t1, 16);
++      } else {
++        __ ld_w(t0, SP, ((i - 15) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 16) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 7) & 0xF) * 4);
++        __ add_w(t1, t1, t2);
++        __ rotri_w(t2, t0, 18);
++        __ srli_w(t3, t0, 3);
++        __ rotri_w(t0, t0, 7);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++        __ ld_w(t0, SP, ((i - 2) & 0xF) * 4);
++        __ rotri_w(t2, t0, 19);
++        __ srli_w(t3, t0, 10);
++        __ rotri_w(t0, t0, 17);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++      }
++
++      __ rotri_w(t2, e, 11);
++      __ rotri_w(t3, e, 25);
++      __ rotri_w(t0, e, 6);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ XOR(t2, g, f);
++      __ ld_w(t3, kptr, i * 4);
++      __ AND(t2, t2, e);
++      __ XOR(t2, t2, g);
++      __ add_w(t0, t0, t2);
++      __ add_w(t0, t0, t3);
++      __ add_w(h, h, t1);
++      __ add_w(h, h, t0);
++      __ add_w(d, d, h);
++      __ rotri_w(t2, a, 13);
++      __ rotri_w(t3, a, 22);
++      __ rotri_w(t0, a, 2);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ OR(t0, c, b);
++      __ AND(t2, c, b);
++      __ AND(t0, t0, a);
++      __ OR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ st_w(t1, SP, (i & 0xF) * 4);
++    }
++
++    // Add to state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ ld_w(t0, state, 16);
++    __ ld_w(t1, state, 20);
++    __ ld_w(t2, state, 24);
++    __ ld_w(t3, state, 28);
++    __ add_w(sa[4], sa[4], t0);
++    __ add_w(sa[5], sa[5], t1);
++    __ add_w(sa[6], sa[6], t2);
++    __ add_w(sa[7], sa[7], t3);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++    __ st_w(sa[5], state, 20);
++    __ st_w(sa[6], state, 24);
++    __ st_w(sa[7], state, 28);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32() {
++    assert(UseCRC32Intrinsics, "need CRC32 instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ ld_w(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld_d(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ add_d(V0, A1, R0);
++    __ jr(RA);
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ st_d(S0, SP, S0_off * wordSize);
++    __ st_d(S1, SP, S1_off * wordSize);
++    __ st_d(S2, SP, S2_off * wordSize);
++    __ st_d(S3, SP, S3_off * wordSize);
++    __ st_d(S4, SP, S4_off * wordSize);
++    __ st_d(S5, SP, S5_off * wordSize);
++    __ st_d(S6, SP, S6_off * wordSize);
++    __ st_d(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ st_d(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    Label before_call;
++    address the_pc = __ pc();
++    __ bind(before_call);
++    __ set_last_Java_frame(java_thread, SP, FP, before_call);
++    // Align stack
++    __ li(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    // Call runtime
++    // TODO: confirm reloc
++    __ call(runtime_entry, relocInfo::runtime_call_type);
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(the_pc - start,  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld_d(S0, SP, S0_off * wordSize);
++    __ ld_d(S1, SP, S1_off * wordSize);
++    __ ld_d(S2, SP, S2_off * wordSize);
++    __ ld_d(S3, SP, S3_off * wordSize);
++    __ ld_d(S4, SP, S4_off * wordSize);
++    __ ld_d(S5, SP, S5_off * wordSize);
++    __ ld_d(S6, SP, S6_off * wordSize);
++    __ ld_d(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
++
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
++      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
++
++    bool _squaring;
++
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = A0;
++      Pa_base = reg;      // Argument registers:
++      if (squaring)
++        Pb_base = Pa_base;
++      else
++        Pb_base = ++reg;
++      Pn_base = ++reg;
++      Rlen = ++reg;
++      inv = ++reg;
++      Rlen2 = inv;        // Reuse inv
++      Pm_base = ++reg;
++
++                          // Working registers:
++      Ra = ++reg;         // The current digit of a, b, n, and m.
++      Rb = ++reg;
++      Rm = ++reg;
++      Rn = ++reg;
++
++      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
++      Ibn = ++reg;
++
++      t0 = ++reg;         // Three registers which form a
++      t1 = ++reg;         // triple-precision accumuator.
++      t2 = ++reg;
++
++      Ri = ++reg;         // Inner and outer loop indexes.
++      Rj = ++reg;
++
++      if (squaring) {
++        Rhi_ab = ++reg;   // Product registers: low and high parts
++        reg = S0;
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      } else {
++        reg = S0;
++        Rhi_ab = reg;     // Product registers: low and high parts
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      }
++
++      Rhi_mn = ++reg;
++      Rlo_mn = ++reg;
++    }
++
++  private:
++    void enter() {
++      addi_d(SP, SP, -6 * wordSize);
++      st_d(FP, SP, 0 * wordSize);
++      move(FP, SP);
++    }
++
++    void leave() {
++      addi_d(T0, FP, 6 * wordSize);
++      ld_d(FP, FP, 0 * wordSize);
++      move(SP, T0);
++    }
++
++    void save_regs() {
++      if (!_squaring)
++        st_d(Rhi_ab, FP, 5 * wordSize);
++      st_d(Rlo_ab, FP, 4 * wordSize);
++      st_d(Rhi_mn, FP, 3 * wordSize);
++      st_d(Rlo_mn, FP, 2 * wordSize);
++      st_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    void restore_regs() {
++      if (!_squaring)
++        ld_d(Rhi_ab, FP, 5 * wordSize);
++      ld_d(Rlo_ab, FP, 4 * wordSize);
++      ld_d(Rhi_mn, FP, 3 * wordSize);
++      ld_d(Rlo_mn, FP, 2 * wordSize);
++      ld_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    void acc(Register Rhi, Register Rlo,
++             Register t0, Register t1, Register t2, Register t, Register c) {
++      add_d(t0, t0, Rlo);
++      OR(t, t1, Rhi);
++      sltu(c, t0, Rlo);
++      add_d(t1, t1, Rhi);
++      add_d(t1, t1, c);
++      sltu(c, t1, t);
++      add_d(t2, t2, c);
++    }
++
++    void pre1(Register i) {
++      block_comment("pre1");
++      // Iam = 0;
++      // Ibn = i;
++
++      slli_w(Ibn, i, LogBytesPerWord);
++
++      // Ra = Pa_base[Iam];
++      // Rb = Pb_base[Ibn];
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++
++      ld_d(Ra, Pa_base, 0);
++      ldx_d(Rb, Pb_base, Ibn);
++      ld_d(Rm, Pm_base, 0);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Iam, R0);
++
++      // Zero the m*n result.
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, t0, t1, t2);
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
++                                               // previous iteration.
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++      mul_d(Rlo_mn, Rm, Rn);
++      mulh_du(Rhi_mn, Rm, Rn);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++    }
++
++    void post1() {
++      block_comment("post1");
++
++      // MACC(Ra, Rb, t0, t1, t2);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
++
++      // Pm_base[Iam] = Rm = t0 * inv;
++      mul_d(Rm, t0, inv);
++      stx_d(Rm, Pm_base, Iam);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // t0 = t1; t1 = t2; t2 = 0;
++      mulh_du(Rhi_mn, Rm, Rn);
++
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
++      {
++        mul_d(Rlo_mn, Rm, Rn);
++        add_d(Rlo_mn, t0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      // We have very carefully set things up so that
++      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -t0.  t0 + (-t0) must generate a carry iff
++      // t0 != 0.  So, rather than do a mul and an adds we just set
++      // the carry flag iff t0 is nonzero.
++      //
++      // mul_d(Rlo_mn, Rm, Rn);
++      // add_d(t0, t0, Rlo_mn);
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, R0, t0);
++      add_d(t0, t1, Rhi_mn);
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++
++      // Rj == i-len
++      sub_w(Rj, i, len);
++
++      // Iam = i - len;
++      // Ibn = len;
++      slli_w(Iam, Rj, LogBytesPerWord);
++      slli_w(Ibn, len, LogBytesPerWord);
++
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      // Rm = Pm_base[++Iam];
++      // Rn = Pn_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    void post2(Register i, Register len) {
++      block_comment("post2");
++
++      sub_w(Rj, i, len);
++      slli_w(Iam, Rj, LogBytesPerWord);
++
++      add_d(t0, t0, Rlo_mn); // The pending m*n, low part
++
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = t0;
++      stx_d(t0, Pm_base, Iam);
++
++      // t0 = t1; t1 = t2; t2 = 0;
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, t0, Rlo_mn);
++      add_d(t0, t1, Rhi_mn); // The pending m*n, high part
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    // A carry in t0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (t0)
++      //   t0 = sub(Pm_base, Pn_base, t0, len);
++      Label loop, post, again;
++      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
++      beqz(t0, post); {
++        bind(again); {
++          move(i, R0);
++          move(b, R0);
++          slli_w(cnt, len, LogBytesPerWord);
++          align(16);
++          bind(loop); {
++            ldx_d(Rm, Pm_base, i);
++            ldx_d(Rn, Pn_base, i);
++            sltu(t, Rm, b);
++            sub_d(Rm, Rm, b);
++            sltu(b, Rm, Rn);
++            sub_d(Rm, Rm, Rn);
++            OR(b, b, t);
++            stx_d(Rm, Pm_base, i);
++            addi_w(i, i, BytesPerWord);
++          } blt(i, cnt, loop);
++          sub_d(t0, t0, b);
++        } bnez(t0, again);
++      } bind(post);
++    }
++
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2, tmp3
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
++
++      alsl_d(s, len, s, LogBytesPerWord - 1);
++      move(tmp1, len);
++      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli_w(s, len, LogBytesPerWord);
++      sub_d(s, d, s);
++    }
++
++    // where
++    void reverse1(Register d, Register s, Register tmp) {
++      ld_d(tmp, s, -wordSize);
++      addi_d(s, s, -wordSize);
++      addi_d(d, d, wordSize);
++      rotri_d(tmp, tmp, 32);
++      st_d(tmp, d, -wordSize);
++    }
++
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   A0   - int array elements a
++     *   A1   - int array elements b
++     *   A2   - int array elements n (the modulus)
++     *   A3   - int length
++     *   A4   - int inv
++     *   A5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   A0   - int array elements a
++     *   A1   - int array elements n (the modulus)
++     *   A2   - int length
++     *   A3   - int inv
++     *   A4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
++
++      align(CodeEntryAlignment);
++      address entry = pc();
++
++      beqz(Rlen, nothing);
++
++      enter();
++
++      // Make room.
++      sltui(Ra, Rlen, 513);
++      beqz(Ra, argh);
++      slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint)));
++      sub_d(Ra, SP, Ra);
++
++      srli_w(Rlen, Rlen, 1); // length in longwords = len/2
++
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, t0, t1);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, t0, t1);
++        reverse(Ra, Pn_base, Rlen, t0, t1);
++      }
++
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
++
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld_d(Rn, Pn_base, 0);
++        li(t0, -1);
++        mul_d(Rlo_mn, Rn, inv);
++        Label ok;
++        beq(Rlo_mn, t0, ok); {
++          stop("broken inverse in Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      move(Pm_base, Ra);
++
++      move(t0, R0);
++      move(t1, R0);
++      move(t2, R0);
++
++      block_comment("for (int i = 0; i < len; i++) {");
++      move(Ri, R0); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
++
++        bind(loop);
++        pre1(Ri);
++
++        block_comment("  for (j = i; j; j--) {"); {
++          move(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post1();
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
++
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      move(Ri, Rlen);
++      slli_w(Rlen2, Rlen, 1); {
++        Label loop, end;
++        bge(Ri, Rlen2, end);
++
++        bind(loop);
++        pre2(Ri, Rlen);
++
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          sub_w(Rj, Rlen2, Ri);
++          addi_w(Rj, Rj, -1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post2(Ri, Rlen);
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen2, loop);
++        bind(end);
++      }
++      block_comment("} // i");
++
++      normalize(Rlen);
++
++      move(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, t0, t1);
++
++      leave();
++      bind(nothing);
++      jr(RA);
++
++      return entry;
++    }
++    // In C, approximately:
++
++    // void
++    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
++    //                     unsigned long Pn_base[], unsigned long Pm_base[],
++    //                     unsigned long inv, int len) {
++    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++    //   unsigned long Ra, Rb, Rn, Rm;
++    //   int i, Iam, Ibn;
++
++    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
++
++    //   for (i = 0; i < len; i++) {
++    //     int j;
++
++    //     Iam = 0;
++    //     Ibn = i;
++
++    //     Ra = Pa_base[Iam];
++    //     Rb = Pb_base[Iam];
++    //     Rm = Pm_base[Ibn];
++    //     Rn = Pn_base[Ibn];
++
++    //     int iters = i;
++    //     for (j = 0; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
++    //     MACC(Ra, Rb, t0, t1, t2);
++    //     Pm_base[Iam] = Rm = t0 * inv;
++    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
++    //     MACC(Rm, Rn, t0, t1, t2);
++
++    //     assert(t0 == 0, "broken Montgomery multiply");
++
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   for (i = len; i < 2*len; i++) {
++    //     int j;
++
++    //     Iam = i - len;
++    //     Ibn = len;
++
++    //     Ra = Pa_base[++Iam];
++    //     Rb = Pb_base[--Ibn];
++    //     Rm = Pm_base[++Iam];
++    //     Rn = Pn_base[--Ibn];
++
++    //     int iters = len*2-i-1;
++    //     for (j = i-len+1; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = Pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     Pm_base[i-len] = t0;
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   while (t0)
++    //     t0 = sub(Pm_base, Pn_base, t0, len);
++    // }
++  };
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
++
++    StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
++                                                                              CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
++    }
++
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
++      // We use generate_multiply() rather than generate_square()
++      // because it's faster for the sizes of modulus we care about.
++      StubRoutines::_montgomerySquare = g.generate_multiply();
++    }
++
++    if (UseAESIntrinsics) {
++      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
++      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false);
++      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true);
++      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true);
++    }
++
++    if (UseSHA1Intrinsics) {
++      generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB);
++    }
++
++    if (UseSHA256Intrinsics) {
++      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
++    }
++
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
++      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
++    }
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp
+new file mode 100644
+index 0000000000..f0f3d55a4e
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp
+@@ -0,0 +1,264 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::la::_call_stub_compiled_return                        = NULL;
++
++/**
++ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
++ */
++juint StubRoutines::la::_crc_table[] =
++{
++    // Table 0
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL,
++
++    // Table 1
++    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
++    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
++    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
++    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
++    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
++    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
++    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
++    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
++    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
++    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
++    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
++    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
++    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
++    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
++    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
++    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
++    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
++    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
++    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
++    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
++    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
++    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
++    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
++    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
++    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
++    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
++    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
++    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
++    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
++    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
++    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
++    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
++    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
++    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
++    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
++    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
++    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
++    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
++    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
++    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
++    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
++    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
++    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
++    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
++    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
++    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
++    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
++    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
++    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
++    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
++    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
++    0x9324fd72UL,
++
++    // Table 2
++    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
++    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
++    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
++    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
++    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
++    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
++    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
++    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
++    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
++    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
++    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
++    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
++    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
++    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
++    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
++    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
++    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
++    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
++    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
++    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
++    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
++    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
++    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
++    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
++    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
++    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
++    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
++    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
++    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
++    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
++    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
++    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
++    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
++    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
++    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
++    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
++    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
++    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
++    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
++    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
++    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
++    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
++    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
++    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
++    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
++    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
++    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
++    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
++    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
++    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
++    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
++    0xbe9834edUL,
++
++    // Table 3
++    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
++    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
++    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
++    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
++    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
++    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
++    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
++    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
++    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
++    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
++    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
++    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
++    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
++    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
++    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
++    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
++    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
++    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
++    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
++    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
++    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
++    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
++    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
++    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
++    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
++    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
++    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
++    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
++    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
++    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
++    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
++    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
++    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
++    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
++    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
++    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
++    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
++    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
++    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
++    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
++    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
++    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
++    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
++    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
++    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
++    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
++    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
++    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
++    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
++    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
++    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
++    0xde0506f1UL,
++    // Constants for Neon CRC232 implementation
++    // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed
++    // k4 = 0xED627DAE = x^256 mod poly - bit reversed
++    0x78ED02D5UL, 0xED627DAEUL,         // k4:k3
++    0xED78D502UL, 0x62EDAE7DUL,         // byte swap
++    0x02D578EDUL, 0x7DAEED62UL,         // word swap
++    0xD502ED78UL, 0xAE7D62EDUL,         // byte swap of word swap
++};
+diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp
+new file mode 100644
+index 0000000000..d020a527e4
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 60000    // simply increase if too small (assembler will crash if too small)
++};
++
++class la {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++  static juint   _crc_table[];
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp
+new file mode 100644
+index 0000000000..213e69b0b2
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
++
++ protected:
++
++ void generate_fixed_frame(bool native_call);
++
++ // address generate_asm_interpreter_entry(bool synchronized);
++
++#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp
+new file mode 100644
+index 0000000000..39e3ad7bb5
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
++
++
++  protected:
++
++  // Size of interpreter code.  Increase if too small.  Interpreter will
++  // fail with a guarantee ("not enough space for interpreter generation");
++  // if too small.
++  // Run with +PrintInterpreter to get the VM to print out the size.
++  // Max size with JVMTI
++  // The sethi() instruction generates lots more instructions when shell
++  // stack limit is unlimited, so that's why this is much bigger.
++  const static int InterpreterCodeSize = 500 * K;
++
++#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp
+new file mode 100644
+index 0000000000..b25086a399
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp
+@@ -0,0 +1,2335 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef CC_INTERP
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++
++const int Interpreter::return_sentinel = 0xfeedbeed;
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcx_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bge(T1, R0, L);     // check if frame is complete
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
++        const char* name) {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ li(A1, (long)name);
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ st_d(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ dispatch_next(state);
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T4
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T4;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T4;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ alsl_d(SP, flags, SP, Interpreter::stackElementScale() - 1);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  __ dispatch_next(state, step);
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);                break;
++    case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++    case T_BYTE   : __ sign_extend_byte (V0);     break;
++    case T_SHORT  : __ sign_extend_short(V0);     break;
++    case T_INT    : /* nothing to do */           break;
++    case T_FLOAT  : /* nothing to do */           break;
++    case T_DOUBLE : /* nothing to do */           break;
++    case T_OBJECT :
++    {
++      __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++    break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// Rmethod: method
++// T3     : invocation counter
++//
++void InterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld_d(FSR, Address(Rmethod, Method::method_data_offset()));
++      __ beq(FSR, R0, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++      __ beq(R0, R0, done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(FSR,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(Rmethod, FSR, done);
++    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++    __ bind(done);
++  } else {
++    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++
++    __ get_method_counters(Rmethod, FSR, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++      __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++      __ addi_d(T4, T4, 1);
++      __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ ld_w(T3, invocation_counter);
++    __ increment(T3, InvocationCounter::count_increment);
++    __ st_w(T3, invocation_counter);  // save invocation count
++
++    __ ld_w(FSR, backedge_counter);  // load backedge counter
++    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++    __ andr(FSR, FSR, AT);
++
++    __ add_d(T3, T3, FSR);          // add both counters
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++        __ bne_far(AT, R0, *profile_method_continue);
++      } else {
++        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++        __ ld_w(AT, AT, 0);
++        __ blt_far(T3, AT, *profile_method_continue, true /* signed */);
++      }
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(FSR, *profile_method);
++    }
++
++    if (Assembler::is_simm(CompileThreshold, 12)) {
++      __ srli_w(AT, T3, InvocationCounter::count_shift);
++      __ slti(AT, AT, CompileThreshold);
++      __ beq_far(AT, R0, *overflow);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++      __ ld_w(AT, AT, 0);
++      __ bge_far(T3, AT, *overflow, true /* signed */);
++    }
++
++    __ bind(done);
++  }
++}
++
++void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld_d(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(*do_continue);
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void InterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bge(AT, T2, after_frame_check);
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ slli_d(T3, T2, Interpreter::stackElementScale());
++  __ addi_d(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ add_d(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ sub_d(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++
++  // add in the redzone and yellow size
++  __ li(AT, (StackRedPages+StackYellowPages) * page_size);
++  __ add_d(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ blt(T3, SP, after_frame_check);
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void InterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ ld_d(T0, Rmethod, in_bytes(Method::const_offset()));
++    __ ld_d(T0, T0, in_bytes(ConstMethod::constants_offset()));
++    __ ld_d(T0, T0, ConstantPool::pool_holder_offset_in_bytes());
++    __ ld_d(T0, T0, mirror_offset);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ addi_d(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address InterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register val = A1;  // source java byte value
++    const Register tbl = A2;  // scratch
++
++    // Arguments are reversed on java expression stack
++    __ ld_w(val, SP, 0);              // byte value
++    __ ld_w(crc, SP, wordSize);       // Initial CRC
++
++    __ li(tbl, (long)StubRoutines::crc_table_addr());
++
++    __ nor(crc, crc, R0); // ~crc
++    __ update_byte_crc32(crc, val, tbl);
++    __ nor(crc, crc, R0); // ~crc
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++
++    (void) generate_native_entry(false);
++
++    return entry;
++  }
++  return generate_native_entry(false);
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    const Register off = len; // offset (never overlaps with 'len')
++
++    // Arguments are reversed on java expression stack
++    // Calculate address of start element
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // long crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // long crc
++    }
++
++    // Can now load 'len' since we're finished with 'off'
++    __ ld_w(len, SP, 0); // length
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++
++    (void) generate_native_entry(false);
++
++    return entry;
++  }
++  return generate_native_entry(false);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 9;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ addi_d(SP, SP, (-frame_size) * wordSize);
++  __ st_d(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ st_d(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ addi_d(FP, SP, (frame_size - 2) * wordSize);
++  __ st_d(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ st_d(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ st_d(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ st_d(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld_d(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ addi_d(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ st_d(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ st_d(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ st_d(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ st_d(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ st_d(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ st_d(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Call an accessor method (assuming it is resolved, otherwise drop
++// into vanilla (slow path) entry
++address InterpreterGenerator::generate_accessor_entry(void) {
++  // Rmethod: Method*
++  // V0: receiver (preserve for slow entry into asm interpreter)
++  //  Rsender: senderSP must preserved for slow path, set SP to it on fast path
++
++  address entry_point = __ pc();
++  Label xreturn_path;
++  // do fastpath for resolved accessor methods
++  if (UseFastAccessorMethods) {
++    Label slow_path;
++    __ li(T2, SafepointSynchronize::address_of_state());
++    __ ld_w(AT, T2, 0);
++    __ addi_d(AT, AT, -(SafepointSynchronize::_not_synchronized));
++    __ bne(AT, R0, slow_path);
++    // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof;
++    // parameter size = 1
++    // Note: We can only use this code if the getfield has been resolved
++    //       and if we don't have a null-pointer exception => check for
++    //       these conditions first and use slow path if necessary.
++    // Rmethod: method
++    // V0: receiver
++
++    // [ receiver  ] <-- sp
++    __ ld_d(T0, SP, 0);
++
++    // check if local 0 != NULL and read field
++    __ beq(T0, R0, slow_path);
++    __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
++    __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
++    // read first instruction word and extract bytecode @ 1 and index @ 2
++    __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));
++    __ ld_w(T3, T3, in_bytes(ConstMethod::codes_offset()));
++    // Shift codes right to get the index on the right.
++    // The bytecode fetched looks like <index><0xb4><0x2a>
++    __ srli_d(T3, T3, 2 * BitsPerByte);
++    // FIXME: maybe it's wrong
++    __ slli_d(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size())));
++    __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
++
++    // T0: local 0
++    // Rmethod: method
++    // V0: receiver - do not destroy since it is needed for slow path!
++    // T1: scratch use which register instead ?
++    // T3: constant pool cache index
++    // T2: constant pool cache
++    // Rsender: send's sp
++    // check if getfield has been resolved and read constant pool cache entry
++    // check the validity of the cache entry by testing whether _indices field
++    // contains Bytecode::_getfield in b1 byte.
++    assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below");
++
++    __ slli_d(T8, T3, Address::times_8);
++    __ li(T1, in_bytes(ConstantPoolCache::base_offset()
++    + ConstantPoolCacheEntry::indices_offset()));
++    __ add_d(T1, T8, T1);
++    __ ldx_w(T1, T1, T2);
++    __ srli_d(T1, T1, 2 * BitsPerByte);
++    __ andi(T1, T1, 0xFF);
++    __ addi_d(T1, T1, (-1) * Bytecodes::_getfield);
++    __ bne(T1, R0, slow_path);
++
++    // Note: constant pool entry is not valid before bytecode is resolved
++
++    __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++    __ add_d(T1, T1, T8);
++    __ ldx_w(AT, T1, T2);
++
++    __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ add_d(T1, T1, T8);
++    __ ldx_w(T3, T1, T2);
++
++    Label notByte, notBool, notShort, notChar, notObj;
++
++    // Need to differentiate between igetfield, agetfield, bgetfield etc.
++    // because they are different sizes.
++    // Use the type from the constant pool cache
++    __ srli_w(T3, T3, ConstantPoolCacheEntry::tos_state_shift);
++    // Make sure we don't need to mask T3 for tosBits after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
++    // btos = 0
++    __ add_d(T0, T0, AT);
++    __ bne(T3, R0, notByte);
++
++    __ ld_b(V0, T0, 0);
++    __ b(xreturn_path);
++
++    //ztos
++    __ bind(notByte);
++    __ addi_d(T1, T3, (-1) * ztos);
++    __ bne(T1, R0, notBool);
++    __ ld_b(V0, T0, 0);
++    __ b(xreturn_path);
++
++    //stos
++    __ bind(notBool);
++    __ addi_d(T1, T3, (-1) * stos);
++    __ bne(T1, R0, notShort);
++    __ ld_h(V0, T0, 0);
++    __ b(xreturn_path);
++
++    //ctos
++    __ bind(notShort);
++    __ addi_d(T1, T3, (-1) * ctos);
++    __ bne(T1, R0, notChar);
++    __ ld_hu(V0, T0, 0);
++    __ b(xreturn_path);
++
++    //atos
++    __ bind(notChar);
++    __ addi_d(T1, T3, (-1) * atos);
++    __ bne(T1, R0, notObj);
++    //add for compressedoops
++    __ load_heap_oop(V0, Address(T0, 0));
++    __ b(xreturn_path);
++
++    //itos
++    __ bind(notObj);
++#ifdef ASSERT
++    Label okay;
++    __ addi_d(T1, T3, (-1) * itos);
++    __ beq(T1, R0, okay);
++    __ stop("what type is this?");
++    __ bind(okay);
++#endif // ASSERT
++    __ ld_w(V0, T0, 0);
++
++    __ bind(xreturn_path);
++
++    // _ireturn/_areturn
++    //FIXME
++    __ move(SP, Rsender);//FIXME, set sender's fp to SP
++    __ jr(RA);
++
++    // generate a vanilla interpreter entry as the slow path
++    __ bind(slow_path);
++    (void) generate_normal_entry(false);
++  } else {
++    (void) generate_normal_entry(false);
++  }
++  return entry_point;
++}
++
++// Method entry for java.lang.ref.Reference.get.
++address InterpreterGenerator::generate_Reference_get_entry(void) {
++#if INCLUDE_ALL_GCS
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_enty.
++  //
++  // Rmethod: Method*
++
++  // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender)
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++  guarantee(referent_offset > 0, "referent offset not initialized");
++  if (UseG1GC) {
++    Label slow_path;
++
++    // Check if local 0 != NULL
++    // If the receiver is null then it is OK to jump to the slow path.
++    __ ld_d(V0, SP, 0);
++
++    __ beq(V0, R0, slow_path);
++
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++
++    // Load the value of the referent field.
++    const Address field_address(V0, referent_offset);
++    __ load_heap_oop(V0, field_address);
++
++    __ push(RA);
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    __ g1_write_barrier_pre(noreg /* obj */,
++                            V0 /* pre_val */,
++                            TREG /* thread */,
++                            Rmethod /* tmp */,
++                            true /* tosca_live */,
++                            true /* expand_call */);
++    __ pop(RA);
++
++    __ add_d(SP, Rsender, R0);      // set sp to sender sp
++    __ jr(RA);
++
++    // generate a vanilla interpreter entry as the slow path
++    __ bind(slow_path);
++    (void) generate_normal_entry(false);
++
++    return entry;
++  }
++#endif // INCLUDE_ALL_GCS
++
++  // If G1 is not enabled then attempt to go through the accessor entry point
++  // Reference.get is an accessor
++  return generate_accessor_entry();
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address InterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld_d(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ slli_d(LVP, V0, Address::times_8);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0       ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ ld_w(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  //const Register thread = T2;
++  const Register t      = T8;
++
++  __ get_method(method);
++  __ verify_oop(method);
++  {
++    Label L, Lstatic;
++    __ ld_d(t,method,in_bytes(Method::const_offset()));
++    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // LoongArch ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ addi_d(t, t, 1);
++    __ bind(Lstatic);
++    __ addi_d(t, t, -7);
++    __ bge(R0, t, L);
++    __ slli_d(t, t, Address::times_8);
++    __ sub_d(SP, SP, t);
++    __ bind(L);
++  }
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [                          ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T4, R0, L);
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T4);
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to LoongArch abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++
++    // get mirror
++    __ ld_d(t, method, in_bytes(Method:: const_offset()));
++    __ ld_d(t, t, in_bytes(ConstMethod::constants_offset())); //??
++    __ ld_d(t, t, ConstantPool::pool_holder_offset_in_bytes());
++    __ ld_d(t, t, mirror_offset);
++    // copy mirror into activation frame
++    //__ st_w(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(T6, T4, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ verify_oop(method);
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T4
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // Set the last Java PC in the frame anchor to be the return address from
++  // the call to the native method: this will allow the debugger to
++  // generate an accurate stack trace.
++  Label native_return;
++  __ set_last_Java_frame(thread, SP, FP, native_return);
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ addi_d(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ li(t, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T4);
++  __ bind(native_return);
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++  __ get_thread(thread);
++  __ li(t, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label L;
++    __ li(AT, SafepointSynchronize::address_of_state());
++    __ ld_w(AT, AT, 0);
++    __ bne(AT, R0, L);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(L);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ li(t, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  // reset handle block
++  __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addi only support 12-bit imeditate
++    __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T4);
++    __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ li(AT,(int) JavaThread::stack_guard_yellow_disabled);
++    __ bne(t, AT, no_reguard);
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ verify_oop(method);
++  __ ld_d(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      // address of first monitor
++
++      __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++
++
++  // remove activation
++  __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld_d(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(&continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address InterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ sub_d(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ slli_d(LVP, V0, LogBytesPerWord);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++
++    __ bind(loop);
++    __ addi_d(SP, SP, (-1) * wordSize);
++    __ addi_d(T2, T2, -1);               // until everything initialized
++    __ st_d(R0, SP, 0);                  // initialize local variables
++    __ bne(T2, R0, loop);
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T4
++  // tmp2: T2
++   __ profile_parameters_type(T8, T4, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(&continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++// Entry points
++//
++// Here we generate the various kind of entries into the interpreter.
++// The two main entry type are generic bytecode methods and native
++// call method.  These both come in synchronized and non-synchronized
++// versions but the frame layout they create is very similar. The
++// other method entry types are really just special purpose entries
++// that are really entry and interpretation all in one. These are for
++// trivial methods like accessor, empty, or special math methods.
++//
++// When control flow reaches any of the entry types for the interpreter
++// the following holds ->
++//
++// Arguments:
++//
++// Rmethod: Method*
++// V0: receiver
++//
++//
++// Stack layout immediately at entry
++//
++// [ parameter n-1            ] <--- sp
++//   ...
++// [ parameter 0              ]
++// [ expression stack         ] (caller's java expression stack)
++
++// Assuming that we don't go to one of the trivial specialized entries
++// the stack will look like below when we are ready to execute the
++// first bytecode (or call the native routine). The register usage
++// will be as the template based interpreter expects (see
++// interpreter_loongarch_64.hpp).
++//
++// local variables follow incoming parameters immediately; i.e.
++// the return address is moved to the end of the locals).
++//
++// [ monitor entry            ] <--- sp
++//   ...
++// [ monitor entry            ]
++// [ monitor block top        ] ( the top monitor entry )
++// [ byte code pointer        ] (if native, bcp = 0)
++// [ constant pool cache      ]
++// [ Method*                  ]
++// [ locals offset            ]
++// [ sender's sp              ]
++// [ sender's fp              ]
++// [ return address           ] <--- fp
++// [ local var m-1            ]
++//   ...
++// [ local var 0              ]
++// [ argumnet word n-1        ] <--- ( sender's sp )
++//   ...
++// [ argument word 0          ] <--- S7
++
++address AbstractInterpreterGenerator::generate_method_entry(
++                                        AbstractInterpreter::MethodKind kind) {
++  // determine code generation flags
++  bool synchronized = false;
++  address entry_point = NULL;
++  switch (kind) {
++    case Interpreter::zerolocals             :
++      break;
++    case Interpreter::zerolocals_synchronized:
++      synchronized = true;
++      break;
++    case Interpreter::native                 :
++      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);
++      break;
++    case Interpreter::native_synchronized    :
++      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);
++      break;
++    case Interpreter::empty                  :
++      entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();
++      break;
++    case Interpreter::accessor               :
++      entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();
++      break;
++    case Interpreter::abstract               :
++      entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();
++      break;
++
++    case Interpreter::java_lang_math_sin     : // fall thru
++    case Interpreter::java_lang_math_cos     : // fall thru
++    case Interpreter::java_lang_math_tan     : // fall thru
++    case Interpreter::java_lang_math_log     : // fall thru
++    case Interpreter::java_lang_math_log10   : // fall thru
++    case Interpreter::java_lang_math_pow     : // fall thru
++    case Interpreter::java_lang_math_exp     : break;
++    case Interpreter::java_lang_math_abs     : // fall thru
++    case Interpreter::java_lang_math_sqrt    :
++      entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
++    case Interpreter::java_lang_ref_reference_get:
++      entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
++    case Interpreter::java_util_zip_CRC32_update:
++      entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry();  break;
++    case Interpreter::java_util_zip_CRC32_updateBytes: // fall thru
++    case Interpreter::java_util_zip_CRC32_updateByteBuffer:
++      entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break;
++    default:
++      fatal(err_msg("unexpected method kind: %d", kind));
++      break;
++  }
++  if (entry_point) return entry_point;
++
++  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
++}
++
++// These should never be compiled since the interpreter will prefer
++// the compiled version to the intrinsic version.
++bool AbstractInterpreter::can_be_compiled(methodHandle m) {
++  switch (method_kind(m)) {
++    case Interpreter::java_lang_math_sin     : // fall thru
++    case Interpreter::java_lang_math_cos     : // fall thru
++    case Interpreter::java_lang_math_tan     : // fall thru
++    case Interpreter::java_lang_math_abs     : // fall thru
++    case Interpreter::java_lang_math_log     : // fall thru
++    case Interpreter::java_lang_math_log10   : // fall thru
++    case Interpreter::java_lang_math_sqrt    : // fall thru
++    case Interpreter::java_lang_math_pow     : // fall thru
++    case Interpreter::java_lang_math_exp     :
++      return false;
++    default:
++      return true;
++  }
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  if (!EnableInvokeDynamic) {
++    // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
++    // Probably, since deoptimization doesn't work yet.
++    assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
++  }
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(sp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld_d(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld_d(A1, A1, in_bytes(Method::const_offset()));
++    __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub_d(A2, LVP, A1);
++    __ addi_d(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T4, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T4);
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  Label L;
++  __ bind(L);
++  __ set_last_Java_frame(thread, noreg, FP, L);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ ld_bu(AT, BCP, 0);
++    __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T4);
++    __ ld_d(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP);
++
++    __ beq(T8, R0, L_done);
++
++    __ st_d(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T4, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T4);                                   // jump to exception handler of caller
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ li(AT, JvmtiThreadState::earlyret_inactive);
++  __ st_w(AT, cond_addr);
++  __ membar(__ AnyAny);//no membar here for aarch64
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ membar(__ AnyAny);
++  __ jr(T0);
++
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L);
++  dep = __ pc(); __ push(dtos); __ b(L);
++  lep = __ pc(); __ push(ltos); __ b(L);
++  aep  =__ pc(); __ push(atos); __ b(L);
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but LA ABI calling convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld_d(A2, SP, 0);
++  __ ld_d(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ ld_w(T4, T8, 0);
++  __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T4, T4, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ st_w(T4, T8, 0);
++  __ slli_d(T4, T4, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ add_d(T8, T8, T4);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ ld_w(T8, T8, 0);
++  __ li(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ brk(5);
++  __ bind(L);
++}
++#endif // !PRODUCT
++#endif // ! CC_INTERP
+diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp
+new file mode 100644
+index 0000000000..228217f001
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp
+@@ -0,0 +1,4024 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No LoongArch specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().
++// It isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address obj,
++                         Register val,
++                         BarrierSet::Name barrier,
++                         bool precise) {
++  assert(val == noreg || val == FSR, "parameter is just for looks");
++  switch (barrier) {
++#if INCLUDE_ALL_GCS
++    case BarrierSet::G1SATBCT:
++    case BarrierSet::G1SATBCTLogging:
++      {
++        // flatten object address if needed
++        if (obj.index() == noreg && obj.disp() == 0) {
++          if (obj.base() != T3) {
++            __ move(T3, obj.base());
++          }
++        } else {
++          __ lea(T3, obj);
++        }
++        __ g1_write_barrier_pre(T3 /* obj */,
++                                T1 /* pre_val */,
++                                TREG /* thread */,
++                                T4  /* tmp */,
++                                val != noreg /* tosca_live */,
++                                false /* expand_call */);
++        if (val == noreg) {
++          __ store_heap_oop_null(Address(T3, 0));
++        } else {
++          // G1 barrier needs uncompressed oop for region cross check.
++          Register new_val = val;
++          if (UseCompressedOops) {
++            new_val = T1;
++            __ move(new_val, val);
++          }
++          __ store_heap_oop(Address(T3, 0), val);
++          __ g1_write_barrier_post(T3 /* store_adr */,
++                                   new_val /* new_val */,
++                                   TREG /* thread */,
++                                   T4 /* tmp */,
++                                   T1 /* tmp2 */);
++        }
++      }
++      break;
++#endif // INCLUDE_ALL_GCS
++    case BarrierSet::CardTableModRef:
++    case BarrierSet::CardTableExtension:
++      {
++        if (val == noreg) {
++          __ store_heap_oop_null(obj);
++        } else {
++          __ store_heap_oop(obj, val);
++          // flatten object address if needed
++          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
++            __ store_check(obj.base());
++          } else {
++            //TODO: LA
++            __ lea(T4, obj);
++            __ store_check(T4);
++          }
++        }
++      }
++      break;
++    case BarrierSet::ModRef:
++    case BarrierSet::Other:
++      if (val == noreg) {
++        __ store_heap_oop_null(obj);
++      } else {
++        __ store_heap_oop(obj, val);
++      }
++      break;
++    default      :
++      ShouldNotReachHere();
++
++  }
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ addi_d(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ li(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ ld_bu(tmp_reg, at_bcp(0));
++    __ li(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ ld_bu(tmp_reg, at_bcp(0));
++  __ li(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ st_b(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ movgr2fr_w(FSF, R0);    return;
++    case 1:  __ addi_d(AT, R0, 1); break;
++    case 2:  __ addi_d(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ movgr2fr_w(FSF, AT);
++  __ ffint_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ movgr2fr_d(FSF, R0);
++             return;
++    case 1:  __ addi_d(AT, R0, 1);
++             __ movgr2fr_d(FSF, AT);
++             __ ffint_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, BCP, 1);
++  __ ld_bu(AT, BCP, 2);
++  __ slli_d(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ ld_bu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ addi_d(AT, T1, - JVM_CONSTANT_Class);
++  __ slli_d(T2, T2, Address::times_8);
++  __ bne(AT, R0, notClass);
++
++  __ bind(call_ldc);
++  __ li(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ st_d(FSR, SP, 0);
++  __ b(Done);
++
++  __ bind(notClass);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  // ftos
++  __ add_d(AT, T3, T2);
++  __ fld_s(FSF, AT, base_offset);
++  //__ push_f();
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ fst_s(FSF, SP, 0);
++  __ b(Done);
++
++  __ bind(notFloat);
++#ifdef ASSERT
++  {
++    Label L;
++    __ addi_d(AT, T1, -JVM_CONSTANT_Integer);
++    __ beq(AT, R0, L);
++    __ stop("unexpected tag type in ldc");
++    __ bind(L);
++  }
++#endif
++  // itos JVM_CONSTANT_Integer only
++  __ add_d(T0, T3, T2);
++  __ ld_w(FSR, T0, base_offset);
++  __ push(itos);
++  __ bind(Done);
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp);
++  __ bne(result, R0, resolved);
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ li(tmp, i);
++  __ call_VM(result, entry, tmp);
++
++  __ bind(resolved);
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label Long, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++
++  __ addi_d(AT, T1, - JVM_CONSTANT_Double);
++  __ slli_d(T2, T2, Address::times_8);
++  __ bne(AT, R0, Long);
++
++  // dtos
++  __ add_d(AT, T3, T2);
++  __ fld_d(FSF, AT, base_offset);
++  __ push(dtos);
++  __ b(Done);
++
++  // ltos
++  __ bind(Long);
++  __ add_d(AT, T3, T2);
++  __ ld_d(FSR, AT, base_offset);
++  __ push(ltos);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ ld_bu(reg, at_bcp(offset));
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload() {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ li(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++
++    __ li(T3, Bytecodes::_fast_iload2);
++    __ li(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++
++    // if _caload, rewrite to fast_icaload
++    __ li(T3, Bytecodes::_fast_icaload);
++    __ li(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++
++    // rewrite so iload doesn't check again.
++    __ li(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ slli_w(index, index, 0);
++
++  // check index
++  Label ok;
++  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ bltu(index, AT, ok);
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ bind(ok);
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 1);
++  __ ld_w(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  index_check(SSR, FSR);
++  __ alsl_d(AT, FSR, SSR, Address::times_8 - 1);
++  __ ld_d(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  index_check(SSR, FSR);
++  __ shl(FSR, 2);
++  __ add_d(FSR, SSR, FSR);
++  __ fld_s(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  index_check(SSR, FSR);
++  __ alsl_d(AT, FSR, SSR, 2);
++  __ fld_d(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1);
++  //add for compressedoops
++  __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ add_d(FSR, SSR, FSR);
++  __ ld_b(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ ld_hu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 0);
++  __ ld_hu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ ld_h(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ ld_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld_d(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ fld_s(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ fld_d(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld_d(FSR, aaddress(n));
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0() {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ li(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_iaccess_0);
++    __ li(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aaccess_0);
++    __ li(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_faccess_0);
++    __ li(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ fst_s(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ fst_d(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  index_check(T2, SSR);  // prefer index in SSR
++  __ slli_d(SSR, SSR, Address::times_4);
++  __ add_d(T2, T2, SSR);
++  __ st_w(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ slli_d(T2, T2, Address::times_8);
++  __ add_d(T3, T3, T2);
++  __ st_d(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG));
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ slli_d(SSR, SSR, Address::times_4);
++  __ add_d(T2, T2, SSR);
++  __ fst_s(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ slli_d(T2, T2, Address::times_8);
++  __ add_d(T3, T3, T2);
++  __ fst_d(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld_d(FSR, at_tos());     // Value
++  __ ld_w(SSR, at_tos_p1());  // Index
++  __ ld_d(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld_d(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true);
++  __ b(done);
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T4);
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true);
++
++  __ bind(done);
++  __ addi_d(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(T4, T2);
++  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
++
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ li(AT, diffbit);
++
++  Label L_skip;
++  __ andr(AT, T4, AT);
++  __ beq(AT, R0, L_skip);
++  __ andi(FSR, FSR, 0x1);
++  __ bind(L_skip);
++
++  __ add_d(SSR, T2, SSR);
++  __ st_b(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
++  __ st_h(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ st_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ st_d(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ fst_s(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ fst_d(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ st_d(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ add_w(FSR, SSR, FSR); break;
++    case sub  : __ sub_w(FSR, SSR, FSR); break;
++    case mul  : __ mul_w(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sll_w(FSR, SSR, FSR);   break;
++    case shr  : __ sra_w(FSR, SSR, FSR);   break;
++    case ushr : __ srl_w(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ add_d(FSR, T2, FSR); break;
++    case sub : __ sub_d(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  __ div_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++
++  __ bne(FSR, R0, not_zero);
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(not_zero);
++  __ mod_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  __ mul_d(FSR, T2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l(A2);
++  __ div_d(FSR, A2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  __ mod_d(FSR, A2, FSR);
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sll_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sra_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ srl_d(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ fld_s(fscratch, at_sp());
++      __ fadd_s(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_s(fscratch, at_sp());
++      __ fsub_s(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_s(fscratch, at_sp());
++      __ fmul_s(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_s(fscratch, at_sp());
++      __ fdiv_s(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_s(FA1, FSF);
++      __ fld_s(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ fld_d(fscratch, at_sp());
++      __ fadd_d(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_d(fscratch, at_sp());
++      __ fsub_d(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_d(fscratch, at_sp());
++      __ fmul_d(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_d(fscratch, at_sp());
++      __ fdiv_d(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_d(FA1, FSF);
++      __ fld_d(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ sub_w(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ sub_d(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ fneg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ fneg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ ld_b(AT, at_bcp(2));           // get constant
++  __ add_d(FSR, FSR, AT);
++  __ st_w(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ ld_w(AT, T2, 0);
++  __ add_d(FSR, AT, FSR);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_d(FSR, FSR, 15, 0);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ sub_d(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  if (is_float) {
++    __ fld_s(fscratch, at_sp());
++    __ addi_d(SP, SP, 1 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_s(FCC0, FSF, fscratch);
++      __ fcmp_cult_s(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_s(FCC0, FSF, fscratch);
++      __ fcmp_clt_s(FCC1, fscratch, FSF);
++    }
++  } else {
++    __ fld_d(fscratch, at_sp());
++    __ addi_d(SP, SP, 2 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_d(FCC0, FSF, fscratch);
++      __ fcmp_cult_d(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_d(FCC0, FSF, fscratch);
++      __ fcmp_clt_d(FCC1, fscratch, FSF);
++    }
++  }
++
++  __ movcf2gr(FSR, FCC0);
++  __ movcf2gr(AT, FCC1);
++  __ sub_d(FSR, FSR, AT);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ ld_b(A7, BCP, 1);
++    __ ld_bu(AT, BCP, 2);
++    __ slli_d(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ ldx_bu(Rnext, BCP, A7);
++
++    // compute return address as bci in FSR
++    __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld_d(AT, T3, in_bytes(Method::const_offset()));
++    __ sub_d(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ add_d(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ add_d(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ blt(R0, A7, dispatch);  // check if forward or backward branch
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ push2(T3, A7);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop2(T3, A7);
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld_d(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld_d(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ ld_w(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ st_w(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ ld_w(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ add_d(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++          __ bne(AT, R0, dispatch);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ ld_w(AT, AT, 0);
++          __ blt(T1, AT, dispatch);
++        }
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++            __ bne(AT, R0, dispatch);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ ld_w(AT, AT, 0);
++            __ blt(T2, AT, dispatch);
++          }
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ ld_w(AT, AT, 0);
++          __ bge(T1, AT, backedge_counter_overflow);
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ ld_bu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ ld_bu(Rnext, BCP, 0);
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ sub_d(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++      __ ld_bu(Rnext, BCP, 0);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // Rnext: target bytecode
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ ld_w(T3, V0, nmethod::entry_bci_offset());
++      __ li(AT, InvalidOSREntryBci);
++      __ beq(AT, T3, dispatch);
++      // We need to prepare to execute the OSR method. First we must
++      // migrate the locals and monitors off of the stack.
++      //V0: osr nmethod (osr ok) or NULL (osr not possible)
++      //V1: osr adapter frame return address
++      //Rnext: target bytecode
++      //LVP: locals pointer
++      //BCP: bcp
++      __ move(BCP, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ li(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
++      __ ld_d(AT, BCP, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++    }
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bge(FSR, R0, not_taken);
++      break;
++    case less_equal:
++      __ blt(R0, FSR, not_taken);
++      break;
++    case greater:
++      __ bge(R0, FSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(FSR, R0, not_taken);
++      break;
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ bge(SSR, FSR, not_taken);
++      break;
++    case less_equal:
++      __ blt(FSR, SSR, not_taken);
++      break;
++    case greater:
++      __ bge(FSR, SSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(SSR, FSR, not_taken);
++      break;
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ ld_w(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld_d(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld_d(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ ld_w(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ ld_w(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ blt(FSR, T3, default_case);
++  __ blt(A7, FSR, default_case);
++
++  // lookup dispatch offset, in A7 big endian
++  __ sub_d(FSR, FSR, T3);
++  __ alsl_d(AT, FSR, T2, Address::times_4 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T4, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ ld_w(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++
++  __ bind(loop_entry);
++  Label L1;
++  __ bge(R0, T3, L1);
++  __ addi_d(T3, T3, -1);
++  __ b(loop);
++  __ bind(L1);
++  __ addi_d(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++
++  // entry found -> get offset
++  __ bind(found);
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ addi_d(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ ld_w(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ add_d(h, i, j);
++    __ srli_d(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ alsl_d(AT, h, array, Address::times_8 - 1);
++    __ ld_w(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ maskeqz(i, i, AT);
++    __ masknez(temp, h, AT);
++    __ OR(i, i, temp);
++    __ masknez(j, j, AT);
++    __ maskeqz(temp, h, AT);
++    __ OR(j, j, temp);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ addi_d(h, i, 1);
++    __ blt(h, j, loop);
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++
++  // entry found -> j = offset
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ ld_w(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld_d(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ li(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T4);
++  __ membar(__ StoreStore);
++
++  __ jr(T4);
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)bytecode();
++  __ addi_d(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  // resolve first time through
++  address entry;
++  switch (bytecode()) {
++    case Bytecodes::_getstatic      : // fall through
++    case Bytecodes::_putstatic      : // fall through
++    case Bytecodes::_getfield       : // fall through
++    case Bytecodes::_putfield       :
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
++      break;
++    case Bytecodes::_invokevirtual  : // fall through
++    case Bytecodes::_invokespecial  : // fall through
++    case Bytecodes::_invokestatic   : // fall through
++    case Bytecodes::_invokeinterface:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
++      break;
++    case Bytecodes::_invokehandle:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
++      break;
++    case Bytecodes::_invokedynamic:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
++      break;
++    default                          :
++      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
++      break;
++  }
++
++  __ li(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_d(obj, Address(obj, mirror_offset));
++
++    __ verify_oop(obj);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld_d(itable_index, AT, index_offset);
++  }
++  __ ld_d(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ add_d(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld_d(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ add_d(index, obj, off);
++
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  // btos
++  __ ld_b(FSR, index, 0);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  // ztos
++  __ ld_b(FSR, index, 0);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  // itos
++  __ ld_w(FSR, index, 0);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  // atos
++  //add for compressedoops
++  __ load_heap_oop(FSR, Address(index, 0));
++  __ push(atos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  // ctos
++  __ ld_hu(FSR, index, 0);
++  __ push(ctos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  // stos
++  __ ld_h(FSR, index, 0);
++  __ push(stos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  // ltos
++  __ ld_d(FSR, index, 0 * wordSize);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  // ftos
++  __ fld_s(FSF, index, 0);
++  __ push(ftos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  // dtos
++  __ fld_d(FSF, index, 0 * wordSize);
++  __ push(dtos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1);
++      __ ld_d(tmp3, AT, in_bytes(cp_base_offset +
++                                 ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ li(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ li(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++      __ b(valsize_known);
++
++      __ bind(two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld_d(tmp1, tmp1, 0 * wordSize);
++    }
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ add_d(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ st_b(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ztos
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ st_b(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // itos
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ st_w(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // atos
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ctos
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ st_h(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // stos
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ st_h(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ltos
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ st_d(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ftos
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ fst_s(FSF, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++
++  // dtos
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(AT, obj, off);
++  __ fst_d(FSF, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // test for volatile with T1
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ add_d(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      // fall through to bputfield
++    case Bytecodes::_fast_bputfield:
++      __ st_b(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield:
++      __ st_h(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ st_w(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ st_d(FSR, T2, 0 * wordSize);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ fst_s(FSF, T2, 0);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ fst_d(FSF, T2, 0 * wordSize);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // replace index with field offset from cache entry
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ add_d(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ ld_b(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ ld_h(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ ld_hu(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ ld_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ fld_s(FSF, FSR, 0);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ fld_d(FSF, FSR, 0);
++      break;
++    case Bytecodes::_fast_agetfield:
++      __ load_heap_oop(FSR, Address(FSR, 0));
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld_d(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi_d(BCP, BCP, 1);
++  __ null_check(T1);
++  __ add_d(T1, T1, T2);
++
++  if (state == itos) {
++    __ ld_w(FSR, T1, 0);
++  } else if (state == atos) {
++    __ load_heap_oop(FSR, Address(T1, 0));
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ fld_s(FSF, T1, 0);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ addi_d(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++
++
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ li(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld_d(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ slli_d(flags, flags, LogBytesPerWord);
++    __ add_d(AT, AT, flags);
++    __ ld_d(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++  __ verify_oop(method);
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T4: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T4, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++  __ verify_oop(T2);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  // T2: receiver
++  __ alsl_d(AT, index, T2, Address::times_ptr - 1);
++  //this is a ualign read
++  __ ld_d(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T4, true);
++  __ jump_from_interpreted(method, T2);
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T4 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++  __ verify_oop(Rmethod);
++
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on LoongArch64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass
++  // Rmethod: method
++  // T3: receiver
++  // T1: flags
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCacheOop.cpp for details.
++  // This code isn't produced by javac, but could be produced by
++  // another compliant java compiler.
++  Label notMethod;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notMethod);
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  __ bind(notMethod);
++  // Get receiver klass into T1 - also a null check
++  //add for compressedoops
++  __ load_klass(T1, T3);
++  __ verify_oop(T1);
++
++  Label no_such_interface, no_such_method;
++
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++  // profile this call
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ sub_w(Rmethod, R0, Rmethod);
++
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_IncompatibleClassChangeError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++  if (!EnableInvokeDynamic) {
++     // rewriter does not generate this bytecode
++     __ should_not_reach_here();
++     return;
++   }
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T4: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T4 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T4);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T4: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T4, true);
++
++  __ jump_from_interpreted(T2_method, T4);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   if (!EnableInvokeDynamic) {
++     // We should not encounter this bytecode if !EnableInvokeDynamic.
++     // The verifier will stop it.  However, if we get past the verifier,
++     // this will stop the thread in a reasonable way, without crashing the JVM.
++     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                      InterpreterRuntime::throw_IncompatibleClassChangeError));
++     // the call_VM checks for exception, so we should never return here.
++     __ should_not_reach_here();
++     return;
++   }
++
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T4);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T4: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T4, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T4);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  // get InstanceKlass in T3
++  __ get_cpool_and_tags(A1, T1);
++
++  __ alsl_d(AT, A2, A1, Address::times_8 - 1);
++  __ ld_d(T3, AT, sizeof(ConstantPool));
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add_d(T1, T1, A2);
++  __ ld_b(AT, T1, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++  }
++  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++
++  // has_finalizer
++  __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case);
++    __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ li(T1, (long)Universe::heap()->top_addr());
++    __ ld_d(FSR, heap_top);
++
++    __ bind(retry);
++    __ li(AT, (long)Universe::heap()->end_addr());
++    __ ld_d(AT, AT, 0);
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, slow_case);
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ li(AT, - sizeof(oopDesc));
++    __ add_d(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ add_d(T1, FSR, T0);
++      __ addi_d(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ st_d(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      Label L1;
++      __ beq(T1, FSR, L1); //dont clear header
++      __ addi_d(T1, T1, -oopSize);
++      __ b(loop);
++      __ bind(L1);
++      __ addi_d(T1, T1, -oopSize);
++    }
++
++    // klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ li(AT, (long)markOopDesc::prototype());
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++  }
++
++  // slow case
++  __ bind(slow_case);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ ld_bu(A1, at_bcp(1));
++  // type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T3, AT, sizeof(ConstantPool));
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ hswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++  }
++  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T3, AT, sizeof(ConstantPool));
++
++  __ bind(resolved);
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ move(FSR, R0);
++  __ b(done);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ li(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit, next;
++    __ ld_d(T2, monitor_block_top);
++    __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    // free slot?
++    __ bind(loop);
++    __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ bne(AT, R0, next);
++    __ move(c_rarg0, T2);
++
++    __ bind(next);
++    __ beq(FSR, AT, exit);
++    __ addi_d(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld_d(c_rarg0, monitor_block_top);
++    __ addi_d(SP, SP, -entry_size);
++    __ addi_d(c_rarg0, c_rarg0, -entry_size);
++    __ st_d(c_rarg0, monitor_block_top);
++    __ move(T3, SP);
++    __ b(entry);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld_d(AT, T3, entry_size);
++    __ st_d(AT, T3, 0);
++    __ addi_d(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ addi_d(BCP, BCP, 1);
++  __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    __ bind(loop);
++    __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ addi_d(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ ld_bu(Rnext, at_bcp(1));
++  __ slli_d(T4, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ add_d(AT, T4, AT);
++  __ ld_d(T4, AT, 0);
++  __ jr(T4);
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ ld_bu(A1, at_bcp(3));  // dimension
++  __ addi_d(A1, A1, -1);
++  __ slli_d(A1, A1, Address::times_8);
++  __ add_d(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ ld_bu(AT, at_bcp(3));
++  __ slli_d(AT, AT, Address::times_8);
++  __ add_d(SP, SP, AT);
++  __ membar(__ AnyAny);//no membar here for aarch64
++}
++#endif // !CC_INTERP
+diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp
+new file mode 100644
+index 0000000000..c48d76e0a2
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,
++                             Register index = noreg,
++                             Register recv  = noreg,
++                             Register flags = noreg
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp
+new file mode 100644
+index 0000000000..7c3ce68010
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp
+new file mode 100644
+index 0000000000..c71f64e132
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "vm_version_ext_loongarch.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features());
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp
+new file mode 100644
+index 0000000000..682dd9c78f
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp
+new file mode 100644
+index 0000000000..81ea3b230c
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp
+@@ -0,0 +1,443 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "vm_version_loongarch.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#include <sys/auxv.h>
++#include <asm/hwcap.h>
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int VM_Version::_cpuFeatures;
++unsigned long VM_Version::auxv;
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(T5);
++
++    __ li(AT, (long)0);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ li(AT, 3);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset()));
++
++    __ li(AT, 4);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset()));
++
++    __ li(AT, 5);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset()));
++
++    __ li(AT, 6);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset()));
++
++    __ li(AT, 10);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset()));
++
++    __ li(AT, 11);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset()));
++
++    __ li(AT, 12);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset()));
++
++    __ li(AT, 13);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset()));
++
++    __ li(AT, 14);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset()));
++
++    __ pop(T5);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++#   undef __
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) {
++    result |= CPU_LA32;
++  } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) {
++    result |= CPU_LA64;
++  }
++  if (_cpuid_info.cpucfg_info_id1.bits.UAL != 0)
++    result |= CPU_UAL;
++
++  if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0)
++    result |= CPU_FP;
++  if (_cpuid_info.cpucfg_info_id2.bits.COMPLEX != 0)
++    result |= CPU_COMPLEX;
++  if (_cpuid_info.cpucfg_info_id2.bits.CRYPTO != 0)
++    result |= CPU_CRYPTO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LBT_X86 != 0)
++    result |= CPU_LBT_X86;
++  if (_cpuid_info.cpucfg_info_id2.bits.LBT_ARM != 0)
++    result |= CPU_LBT_ARM;
++  if (_cpuid_info.cpucfg_info_id2.bits.LBT_MIPS != 0)
++    result |= CPU_LBT_MIPS;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAM != 0)
++    result |= CPU_LAM;
++
++  if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0)
++    result |= CPU_CCDMA;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0)
++    result |= CPU_LLDBAR;
++  if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0)
++    result |= CPU_SCDLY;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0)
++    result |= CPU_LLEXC;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  get_cpu_info_stub(&_cpuid_info);
++  _cpuFeatures = get_feature_flags_by_cpucfg();
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++  auxv = getauxval(AT_HWCAP);
++
++  if (supports_lsx()) {
++    if (FLAG_IS_DEFAULT(UseLSX)) {
++      FLAG_SET_DEFAULT(UseLSX, true);
++    }
++  } else if (UseLSX) {
++    warning("LSX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLSX, false);
++  }
++
++  if (supports_lasx()) {
++    if (FLAG_IS_DEFAULT(UseLASX)) {
++      FLAG_SET_DEFAULT(UseLASX, true);
++    }
++  } else if (UseLASX) {
++    warning("LASX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++  if (UseLASX && !UseLSX) {
++    warning("LASX instructions depends on LSX, setting UseLASX to false");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++#ifdef COMPILER2
++  int max_vector_size = 0;
++  int min_vector_size = 0;
++  if (UseLASX) {
++    max_vector_size = 32;
++    min_vector_size = 16;
++  }
++  else if (UseLSX) {
++    max_vector_size = 16;
++    min_vector_size = 16;
++  }
++
++  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
++    if (MaxVectorSize == 0) {
++      // do nothing
++    } else if (MaxVectorSize > max_vector_size) {
++      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    } else if (MaxVectorSize < min_vector_size) {
++      warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
++    } else if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    }
++  } else {
++    // If default, use highest supported configuration
++    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++  }
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  char buf[256];
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
++    "0x%lx, fp_ver: %d, lvz_ver: %d, "
++    "usesynclevel:%d",
++    (is_la64()             ?  "la64"  : ""),
++    (is_la32()             ?  "la32"  : ""),
++    (supports_lsx()        ?  ", lsx" : ""),
++    (supports_lasx()       ?  ", lasx" : ""),
++    (supports_crypto()     ?  ", crypto" : ""),
++    (supports_lam()        ?  ", am" : ""),
++    (supports_ual()        ?  ", ual" : ""),
++    (supports_lldbar()     ?  ", lldbar" : ""),
++    (supports_scdly()      ?  ", scdly" : ""),
++    (supports_llexc()      ?  ", llexc" : ""),
++    (supports_lbt_x86()    ?  ", lbt_x86" : ""),
++    (supports_lbt_arm()    ?  ", lbt_arm" : ""),
++    (supports_lbt_mips()   ?  ", lbt_mips" : ""),
++    (needs_llsync()        ?  ", needs_llsync" : ""),
++    (needs_tgtsync()       ?  ", needs_tgtsync": ""),
++    (needs_ulsync()        ?  ", needs_ulsync": ""),
++    _cpuid_info.cpucfg_info_id0.bits.PRID,
++    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
++    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER,
++    UseSyncLevel);
++  _features_str = strdup(buf);
++
++  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
++  assert( is_la64(), "Should be LoongArch64");
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 3);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  // Basic instructions are used to implement SHA Intrinsics on LA, so sha
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseSHA)) {
++      FLAG_SET_DEFAULT(UseSHA, true);
++    }
++  } else if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++    }
++  } else if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++    }
++  } else if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++  }
++
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  // Basic instructions are used to implement AES Intrinsics on LA, so AES
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseAES)) {
++      FLAG_SET_DEFAULT(UseAES, true);
++    }
++  } else if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES))
++      warning("AES instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAES, false);
++  }
++
++  if (UseAES/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
++    }
++  } else if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
++      warning("AES intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCRC32)) {
++    FLAG_SET_DEFAULT(UseCRC32, true);
++  }
++
++  if (UseCRC32) {
++    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      UseCRC32Intrinsics = true;
++    }
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++
++  // This machine allows unaligned memory accesses
++  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
++  }
++
++  if (CriticalJNINatives) {
++    if (FLAG_IS_CMDLINE(CriticalJNINatives)) {
++      warning("CriticalJNINatives specified, but not supported in this VM");
++    }
++    FLAG_SET_DEFAULT(CriticalJNINatives, false);
++  }
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp
+new file mode 100644
+index 0000000000..3b5f907a79
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp
+@@ -0,0 +1,299 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
++
++#include "runtime/globals_extension.hpp"
++#include "runtime/vm_version.hpp"
++
++#ifndef HWCAP_LOONGARCH_LSX
++#define HWCAP_LOONGARCH_LSX       (1 << 4)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LASX
++#define HWCAP_LOONGARCH_LASX      (1 << 5)
++#endif
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union LoongArch_Cpucfg_Id0 {
++    uint32_t value;
++    struct {
++      uint32_t PRID      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t ARCH      : 2,
++               PGMMU     : 1,
++               IOCSR     : 1,
++               PALEN     : 8,
++               VALEN     : 8,
++               UAL       : 1, // unaligned access
++               RI        : 1,
++               EP        : 1,
++               RPLV      : 1,
++               HP        : 1,
++               IOCSR_BRD : 1,
++               MSG_INT   : 1,
++                         : 5;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG     : 1, // FP is used, use FP_CFG instead
++               FP_SP      : 1,
++               FP_DP      : 1,
++               FP_VER     : 3,
++               LSX        : 1,
++               LASX       : 1,
++               COMPLEX    : 1,
++               CRYPTO     : 1,
++               LVZ        : 1,
++               LVZ_VER    : 3,
++               LLFTP      : 1,
++               LLFTP_VER  : 3,
++               LBT_X86    : 1,
++               LBT_ARM    : 1,
++               LBT_MIPS   : 1,
++               LSPW       : 1,
++               LAM        : 1,
++                          : 9;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id3 {
++    uint32_t value;
++    struct {
++      uint32_t CCDMA      : 1,
++               SFB        : 1,
++               UCACC      : 1,
++               LLEXC      : 1,
++               SCDLY      : 1,
++               LLDBAR     : 1,
++               ITLBHMC    : 1,
++               ICHMC      : 1,
++               SPW_LVL    : 3,
++               SPW_HP_HF  : 1,
++               RVA        : 1,
++               RVAMAXM1   : 4,
++                          : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id4 {
++    uint32_t value;
++    struct {
++      uint32_t CC_FREQ      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id5 {
++    uint32_t value;
++    struct {
++      uint32_t CC_MUL      : 16,
++               CC_DIV      : 16;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id6 {
++    uint32_t value;
++    struct {
++      uint32_t PMP      : 1,
++               PMVER    : 3,
++               PMNUM    : 4,
++               PMBITS   : 6,
++               UPM      : 1,
++                        : 17;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id10 {
++    uint32_t value;
++    struct {
++      uint32_t L1IU_PRESENT    : 1,
++               L1IU_UNIFY      : 1,
++               L1D_PRESENT     : 1,
++               L2IU_PRESENT    : 1,
++               L2IU_UNIFY      : 1,
++               L2IU_PRIVATE    : 1,
++               L2IU_INCLUSIVE  : 1,
++               L2D_PRESENT     : 1,
++               L2D_PRIVATE     : 1,
++               L2D_INCLUSIVE   : 1,
++               L3IU_PRESENT    : 1,
++               L3IU_UNIFY      : 1,
++               L3IU_PRIVATE    : 1,
++               L3IU_INCLUSIVE  : 1,
++               L3D_PRESENT     : 1,
++               L3D_PRIVATE     : 1,
++               L3D_INCLUSIVE   : 1,
++                               : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id11 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id12 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id13 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id14 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LAM               = (1 << 1),
++    CPU_UAL               = (1 << 2),
++    CPU_LSX               = (1 << 4),
++    CPU_LASX              = (1 << 5),
++    CPU_COMPLEX           = (1 << 7),
++    CPU_CRYPTO            = (1 << 8),
++    CPU_LBT_X86           = (1 << 10),
++    CPU_LBT_ARM           = (1 << 11),
++    CPU_LBT_MIPS          = (1 << 12),
++    /* flags above must follow Linux HWCAP */
++    CPU_LA32              = (1 << 13),
++    CPU_LA64              = (1 << 14),
++    CPU_FP                = (1 << 15),
++    CPU_LLEXC             = (1 << 16),
++    CPU_SCDLY             = (1 << 17),
++    CPU_LLDBAR            = (1 << 18),
++    CPU_CCDMA             = (1 << 19),
++    CPU_LLSYNC            = (1 << 20),
++    CPU_TGTSYNC           = (1 << 21),
++    CPU_ULSYNC            = (1 << 22),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  static int  _cpuFeatures;
++  static unsigned long auxv;
++  static const char* _features_str;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    LoongArch_Cpucfg_Id0   cpucfg_info_id0;
++    LoongArch_Cpucfg_Id1   cpucfg_info_id1;
++    LoongArch_Cpucfg_Id2   cpucfg_info_id2;
++    LoongArch_Cpucfg_Id3   cpucfg_info_id3;
++    LoongArch_Cpucfg_Id4   cpucfg_info_id4;
++    LoongArch_Cpucfg_Id5   cpucfg_info_id5;
++    LoongArch_Cpucfg_Id6   cpucfg_info_id6;
++    LoongArch_Cpucfg_Id10  cpucfg_info_id10;
++    LoongArch_Cpucfg_Id11  cpucfg_info_id11;
++    LoongArch_Cpucfg_Id12  cpucfg_info_id12;
++    LoongArch_Cpucfg_Id13  cpucfg_info_id13;
++    LoongArch_Cpucfg_Id14  cpucfg_info_id14;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); }
++  static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); }
++  static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); }
++  static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); }
++  static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); }
++
++  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool is_la32()             { return _cpuFeatures & CPU_LA32; }
++  static bool is_la64()             { return _cpuFeatures & CPU_LA64; }
++  static bool supports_crypto()     { return _cpuFeatures & CPU_CRYPTO; }
++  static bool supports_lsx()        { return auxv & HWCAP_LOONGARCH_LSX; }
++  static bool supports_lasx()       { return auxv & HWCAP_LOONGARCH_LASX; }
++  static bool supports_lam()        { return _cpuFeatures & CPU_LAM; }
++  static bool supports_llexc()      { return _cpuFeatures & CPU_LLEXC; }
++  static bool supports_scdly()      { return _cpuFeatures & CPU_SCDLY; }
++  static bool supports_lldbar()     { return _cpuFeatures & CPU_LLDBAR; }
++  static bool supports_ual()        { return _cpuFeatures & CPU_UAL; }
++  static bool supports_lbt_x86()    { return _cpuFeatures & CPU_LBT_X86; }
++  static bool supports_lbt_arm()    { return _cpuFeatures & CPU_LBT_ARM; }
++  static bool supports_lbt_mips()   { return _cpuFeatures & CPU_LBT_MIPS; }
++  static bool needs_llsync()        { return !supports_lldbar(); }
++  static bool needs_tgtsync()       { return 1; }
++  static bool needs_ulsync()        { return 1; }
++
++  static const char* cpu_features()           { return _features_str; }
++};
++
++#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp
+new file mode 100644
+index 0000000000..52bccfc183
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp
+new file mode 100644
+index 0000000000..80a1fc57de
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
++
++bool is_Register();
++Register as_Register();
++
++bool is_FloatRegister();
++FloatRegister as_FloatRegister();
++
++#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..f822d4c355
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++inline bool VMRegImpl::is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool VMRegImpl::is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline Register VMRegImpl::as_Register() {
++
++  assert( is_Register(), "must be");
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister VMRegImpl::as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline   bool VMRegImpl::is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp
+new file mode 100644
+index 0000000000..df0d176b8b
+--- /dev/null
++++ b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp
+@@ -0,0 +1,300 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_loongarch_64.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread,
++                                          oop receiver,
++                                          int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  const int la_code_length = VtableStub::pd_code_size_limit(true);
++  VtableStub* s = new(la_code_length) VtableStub(true, vtable_index);
++  ResourceMark rm;
++  CodeBuffer cb(s->entry_point(), la_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#ifndef PRODUCT
++  if (CountCompiledCalls) {
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    __ ld_w(t1, AT , 0);
++    __ addi_w(t1, t1, 1);
++    __ st_w(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  // compute entry offset (in words)
++  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ ld_w(t2, t1, InstanceKlass::vtable_length_offset()*wordSize);
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ li(AT, vtable_index*vtableEntry::size());
++    __ blt(AT, t2, L);
++    __ li(A2, vtable_index);
++    __ move(A1, A0);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  // load methodOop and target address
++  const Register method = Rmethod;
++  int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
++  if (Assembler::is_simm(offset, 12)) {
++    __ ld_ptr(method, t1, offset);
++  } else {
++    __ li(AT, offset);
++    __ ld_ptr(method, t1, AT);
++  }
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T4: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++  masm->flush();
++  s->set_exception_points(npe_addr, ame_addr);
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Note well: pd_code_size_limit is the absolute minimum we can get
++  // away with.  If you add code here, bump the code stub size
++  // returned by pd_code_size_limit!
++  const int la_code_length = VtableStub::pd_code_size_limit(false);
++  VtableStub* s = new(la_code_length) VtableStub(false, itable_index);
++  ResourceMark rm;
++  CodeBuffer cb(s->entry_point(), la_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  // we T8,T4 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#ifndef PRODUCT
++  if (CountCompiledCalls) {
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    __ ld_w(T8, AT, 0);
++    __ addi_w(T8, T8, 1);
++    __ st_w(T8, AT, 0);
++  }
++#endif /* PRODUCT */
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on LoongArch.
++    const int base = InstanceKlass::vtable_start_offset() * wordSize;
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ addi_d(t2, t1, base);
++    assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
++    __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
++    __ alsl_d(t2, AT, t2, Address::times_8 - 1);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
++    __ bind(entry);
++
++#ifdef ASSERT
++    // Check that the entry is non-null
++    if (DebugVtables) {
++      Label L;
++      assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++      __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
++      __ bne(AT, R0, L);
++      __ stop("null entry point found in itable's offset table");
++      __ bind(L);
++    }
++#endif
++    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
++    __ bne(AT, resolved_klass_reg, entry);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++  assert(Assembler::is_simm16(base), "change this code");
++  __ addi_d(t2, t1, base);
++  assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
++  __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
++  __ alsl_d(t2, AT, t2, Address::times_8 - 1);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
++  __ bind(entry);
++
++#ifdef ASSERT
++  // Check that the entry is non-null
++  if (DebugVtables) {
++    Label L;
++    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++    __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, R0, L);
++    __ stop("null entry point found in itable's offset table");
++    __ bind(L);
++  }
++#endif
++  assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
++  __ bne(AT, holder_klass_reg, entry);
++
++  // We found a hit, move offset into T4
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++
++  __ slli_d(AT, t2, Address::times_1);
++  __ add_d(AT, AT, t1 );
++  if (Assembler::is_simm(method_offset, 12)) {
++    __ ld_ptr(method, AT, method_offset);
++  } else {
++    __ li(t1, method_offset);
++    __ ld_ptr(method, AT, t1);
++  }
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T4: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++  masm->flush();
++  s->set_exception_points(npe_addr, ame_addr);
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
++  if (is_vtable_stub) {
++    return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+
++           (UseCompressedOops ? 16 : 0);
++  } else {
++    return  ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+
++            (UseCompressedOops ? 32 : 0);
++  }
++}
++
++int VtableStub::pd_code_alignment() {
++  return wordSize;
++}
+diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.cpp b/hotspot/src/cpu/mips/vm/assembler_mips.cpp
+new file mode 100644
+index 0000000000..6c720972ad
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp
+@@ -0,0 +1,774 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of Assembler
++const char *Assembler::ops_name[] = {
++  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
++  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
++  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
++  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
++  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
++  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
++  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
++  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
++};
++
++const char* Assembler::special_name[] = {
++  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
++  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
++  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
++  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
++  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
++  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
++  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
++  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
++};
++
++const char* Assembler::cop1_name[] = {
++  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
++  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
++  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
++};
++
++const char* Assembler::cop1x_name[] = {
++  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
++  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
++  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
++  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
++  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
++  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
++};
++
++const char* Assembler::special2_name[] = {
++  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
++  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         ""
++};
++
++const char* Assembler::special3_name[] = {
++  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "bshfl",    "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++};
++
++const char* Assembler::regimm_name[] = {
++  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
++  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
++  "bltzal",   "bgezal",   "bltzall",  "bgezall"
++};
++
++const char* Assembler::gs_ldc2_name[] = {
++  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
++};
++
++
++const char* Assembler::gs_lwc2_name[] = {
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
++        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
++        "gslq",   ""
++};
++
++const char* Assembler::gs_sdc2_name[] = {
++  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
++};
++
++const char* Assembler::gs_swc2_name[] = {
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
++        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
++        "gssq",    ""
++};
++
++//misleading name, print only branch/jump instruction
++void Assembler::print_instruction(int inst) {
++  const char *s;
++  switch( opcode(inst) ) {
++  default:
++    s = ops_name[opcode(inst)];
++    break;
++  case special_op:
++    s = special_name[special(inst)];
++    break;
++  case regimm_op:
++    s = special_name[rt(inst)];
++    break;
++  }
++
++  ::tty->print("%s", s);
++}
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++//without check, maybe fixed
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos - 4)>>2;
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++  case lui_op:
++  case ori_op:
++  case daddiu_op:
++    ShouldNotReachHere();
++    break;
++  default:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if(!is_simm16(v))
++    {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    v = low16(v);
++    inst &= 0xffff0000;
++    break;
++  }
++
++  return inst | v;
++}
++
++int Assembler::branch_destination(int inst, int pos) {
++  int off;
++
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++    assert(false, "should not use j/jal here");
++    break;
++  default:
++    off = expand(low16(inst), 15);
++    break;
++  }
++
++  return off ? pos + 4 + (off<<2) : 0;
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++
++void Assembler::lb(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lb(rt, src.base(), src.disp());
++}
++
++void Assembler::lbu(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lbu(rt, src.base(), src.disp());
++}
++
++void Assembler::ld(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsldx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gsldx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        ld(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsldx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          ld(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gsldx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          ld(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      ld(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsldx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        ld(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::ldl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldl(rt, src.base(), src.disp());
++}
++
++void Assembler::ldr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldr(rt, src.base(), src.disp());
++}
++
++void Assembler::lh(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lh(rt, src.base(), src.disp());
++}
++
++void Assembler::lhu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lhu(rt, src.base(), src.disp());
++}
++
++void Assembler::ll(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll(rt, src.base(), src.disp());
++}
++
++void Assembler::lld(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lld(rt, src.base(), src.disp());
++}
++
++void Assembler::lw(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gslwx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gslwx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        lw(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gslwx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          lw(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gslwx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          lw(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      lw(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gslwx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        lw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::lea(Register rt, Address src) {
++  Register dst   = rt;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm16(disp)) {
++      daddiu(dst, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++      daddu(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm16(disp)) {
++        daddu(AT, base, index);
++        daddiu(dst, AT, disp);
++      } else {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, base, AT);
++        daddu(dst, AT, index);
++      }
++    } else {
++      if (is_simm16(disp)) {
++        dsll(AT, index, scale);
++        daddu(AT, AT, base);
++        daddiu(dst, AT, disp);
++      } else {
++        assert_different_registers(dst, AT);
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        dsll(dst, index, scale);
++        daddu(dst, dst, AT);
++      }
++    }
++  }
++}
++
++void Assembler::lwl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwl(rt, src.base(), src.disp());
++}
++
++void Assembler::lwr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwr(rt, src.base(), src.disp());
++}
++
++void Assembler::lwu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwu(rt, src.base(), src.disp());
++}
++
++void Assembler::sb(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sb(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sc(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc(rt, dst.base(), dst.disp());
++}
++
++void Assembler::scd(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  scd(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sd(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm16(disp)) {
++      if ( UseLEXT1 && is_simm(disp, 8)) {
++        if (scale == 0) {
++          gssdx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gssdx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sd(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gssdx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sd(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sd(src, AT, 0);
++      }
++    }
++  } else {
++    if (is_simm16(disp)) {
++      sd(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gssdx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sd(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::sdl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sh(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sh(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sw(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if ( Assembler::is_simm16(disp) ) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsswx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gsswx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sw(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsswx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sw(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sw(src, AT, 0);
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      sw(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsswx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::swl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::swr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::lwc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwc1(rt, src.base(), src.disp());
++}
++
++void Assembler::ldc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldc1(rt, src.base(), src.disp());
++}
++
++void Assembler::swc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::j(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((j_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::jal(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((jal_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long
++  check_delay();
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x) { emit_long(x); }
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++  relocate(rtype);
++  emit_long(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++  relocate(rspec);
++  emit_long(x);
++}
++
++inline void Assembler::check_delay() {
++#ifdef CHECK_DELAY
++  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++  delay_state = no_delay;
++#endif
++}
+diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.hpp
+new file mode 100644
+index 0000000000..e91b9db222
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/assembler_mips.hpp
+@@ -0,0 +1,1789 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++
++#include "asm/register.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address VALUE_OBJ_CLASS_SPEC {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument VALUE_OBJ_CLASS_SPEC {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++
++  Argument(int number):_number(number){ }
++  Argument successor() {return Argument(number() + 1);}
++
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(RA0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F12->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral VALUE_OBJ_CLASS_SPEC {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++  // 32-bit complains about a multiple declaration for int*.
++  AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
++    : _target((address) addr),
++      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++ public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++ public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress VALUE_OBJ_CLASS_SPEC {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++const int FPUStateSizeInWords = 512 / wordSize;
++
++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  // opcode, highest 6 bits: bits[31...26]
++  enum ops {
++    special_op  = 0x00, // special_ops
++    regimm_op   = 0x01, // regimm_ops
++    j_op        = 0x02,
++    jal_op      = 0x03,
++    beq_op      = 0x04,
++    bne_op      = 0x05,
++    blez_op     = 0x06,
++    bgtz_op     = 0x07,
++    addiu_op    = 0x09,
++    slti_op     = 0x0a,
++    sltiu_op    = 0x0b,
++    andi_op     = 0x0c,
++    ori_op      = 0x0d,
++    xori_op     = 0x0e,
++    lui_op      = 0x0f,
++    cop0_op     = 0x10, // cop0_ops
++    cop1_op     = 0x11, // cop1_ops
++    gs_cop2_op  = 0x12, // gs_cop2_ops
++    cop1x_op    = 0x13, // cop1x_ops
++    beql_op     = 0x14,
++    bnel_op     = 0x15,
++    blezl_op    = 0x16,
++    bgtzl_op    = 0x17,
++    daddiu_op   = 0x19,
++    ldl_op      = 0x1a,
++    ldr_op      = 0x1b,
++    special2_op = 0x1c, // special2_ops
++    msa_op      = 0x1e, // msa_ops
++    special3_op = 0x1f, // special3_ops
++    lb_op       = 0x20,
++    lh_op       = 0x21,
++    lwl_op      = 0x22,
++    lw_op       = 0x23,
++    lbu_op      = 0x24,
++    lhu_op      = 0x25,
++    lwr_op      = 0x26,
++    lwu_op      = 0x27,
++    sb_op       = 0x28,
++    sh_op       = 0x29,
++    swl_op      = 0x2a,
++    sw_op       = 0x2b,
++    sdl_op      = 0x2c,
++    sdr_op      = 0x2d,
++    swr_op      = 0x2e,
++    cache_op    = 0x2f,
++    ll_op       = 0x30,
++    lwc1_op     = 0x31,
++    gs_lwc2_op  = 0x32, //gs_lwc2_ops
++    pref_op     = 0x33,
++    lld_op      = 0x34,
++    ldc1_op     = 0x35,
++    gs_ldc2_op  = 0x36, //gs_ldc2_ops
++    ld_op       = 0x37,
++    sc_op       = 0x38,
++    swc1_op     = 0x39,
++    gs_swc2_op  = 0x3a, //gs_swc2_ops
++    scd_op      = 0x3c,
++    sdc1_op     = 0x3d,
++    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
++    sd_op       = 0x3f
++  };
++
++  static  const char *ops_name[];
++
++  //special family, the opcode is in low 6 bits.
++  enum special_ops {
++    sll_op       = 0x00,
++    movci_op     = 0x01,
++    srl_op       = 0x02,
++    sra_op       = 0x03,
++    sllv_op      = 0x04,
++    srlv_op      = 0x06,
++    srav_op      = 0x07,
++    jr_op        = 0x08,
++    jalr_op      = 0x09,
++    movz_op      = 0x0a,
++    movn_op      = 0x0b,
++    syscall_op   = 0x0c,
++    break_op     = 0x0d,
++    sync_op      = 0x0f,
++    mfhi_op      = 0x10,
++    mthi_op      = 0x11,
++    mflo_op      = 0x12,
++    mtlo_op      = 0x13,
++    dsllv_op     = 0x14,
++    dsrlv_op     = 0x16,
++    dsrav_op     = 0x17,
++    mult_op      = 0x18,
++    multu_op     = 0x19,
++    div_op       = 0x1a,
++    divu_op      = 0x1b,
++    dmult_op     = 0x1c,
++    dmultu_op    = 0x1d,
++    ddiv_op      = 0x1e,
++    ddivu_op     = 0x1f,
++    addu_op      = 0x21,
++    subu_op      = 0x23,
++    and_op       = 0x24,
++    or_op        = 0x25,
++    xor_op       = 0x26,
++    nor_op       = 0x27,
++    slt_op       = 0x2a,
++    sltu_op      = 0x2b,
++    daddu_op     = 0x2d,
++    dsubu_op     = 0x2f,
++    tge_op       = 0x30,
++    tgeu_op      = 0x31,
++    tlt_op       = 0x32,
++    tltu_op      = 0x33,
++    teq_op       = 0x34,
++    tne_op       = 0x36,
++    dsll_op      = 0x38,
++    dsrl_op      = 0x3a,
++    dsra_op      = 0x3b,
++    dsll32_op    = 0x3c,
++    dsrl32_op    = 0x3e,
++    dsra32_op    = 0x3f
++  };
++
++  static  const char* special_name[];
++
++  //regimm family, the opcode is in rt[16...20], 5 bits
++  enum regimm_ops {
++    bltz_op      = 0x00,
++    bgez_op      = 0x01,
++    bltzl_op     = 0x02,
++    bgezl_op     = 0x03,
++    tgei_op      = 0x08,
++    tgeiu_op     = 0x09,
++    tlti_op      = 0x0a,
++    tltiu_op     = 0x0b,
++    teqi_op      = 0x0c,
++    tnei_op      = 0x0e,
++    bltzal_op    = 0x10,
++    bgezal_op    = 0x11,
++    bltzall_op   = 0x12,
++    bgezall_op   = 0x13,
++    bposge32_op  = 0x1c,
++    bposge64_op  = 0x1d,
++    synci_op     = 0x1f,
++  };
++
++  static  const char* regimm_name[];
++
++  //cop0 family, the ops is in bits[25...21], 5 bits
++  enum cop0_ops {
++    mfc0_op     = 0x00,
++    dmfc0_op    = 0x01,
++    //
++    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
++    mtc0_op     = 0x04,
++    dmtc0_op    = 0x05,
++    rdpgpr_op   = 0x0a,
++    inter_op    = 0x0b,
++    wrpgpr_op   = 0x0c
++  };
++
++  //cop1 family, the ops is in bits[25...21], 5 bits
++  enum cop1_ops {
++    mfc1_op     = 0x00,
++    dmfc1_op    = 0x01,
++    cfc1_op     = 0x02,
++    mfhc1_op    = 0x03,
++    mtc1_op     = 0x04,
++    dmtc1_op    = 0x05,
++    ctc1_op     = 0x06,
++    mthc1_op    = 0x07,
++    bc1f_op     = 0x08,
++    single_fmt  = 0x10,
++    double_fmt  = 0x11,
++    word_fmt    = 0x14,
++    long_fmt    = 0x15,
++    ps_fmt      = 0x16
++  };
++
++
++  //2 bist (bits[17...16]) of bc1x instructions (cop1)
++  enum bc_ops {
++    bcf_op       = 0x0,
++    bct_op       = 0x1,
++    bcfl_op      = 0x2,
++    bctl_op      = 0x3,
++  };
++
++  // low 6 bits of c_x_fmt instructions (cop1)
++  enum c_conds {
++    f_cond       = 0x30,
++    un_cond      = 0x31,
++    eq_cond      = 0x32,
++    ueq_cond     = 0x33,
++    olt_cond     = 0x34,
++    ult_cond     = 0x35,
++    ole_cond     = 0x36,
++    ule_cond     = 0x37,
++    sf_cond      = 0x38,
++    ngle_cond    = 0x39,
++    seq_cond     = 0x3a,
++    ngl_cond     = 0x3b,
++    lt_cond      = 0x3c,
++    nge_cond     = 0x3d,
++    le_cond      = 0x3e,
++    ngt_cond     = 0x3f
++  };
++
++  // low 6 bits of cop1 instructions
++  enum float_ops {
++    fadd_op      = 0x00,
++    fsub_op      = 0x01,
++    fmul_op      = 0x02,
++    fdiv_op      = 0x03,
++    fsqrt_op     = 0x04,
++    fabs_op      = 0x05,
++    fmov_op      = 0x06,
++    fneg_op      = 0x07,
++    froundl_op   = 0x08,
++    ftruncl_op   = 0x09,
++    fceill_op    = 0x0a,
++    ffloorl_op   = 0x0b,
++    froundw_op   = 0x0c,
++    ftruncw_op   = 0x0d,
++    fceilw_op    = 0x0e,
++    ffloorw_op   = 0x0f,
++    movf_f_op    = 0x11,
++    movt_f_op    = 0x11,
++    movz_f_op    = 0x12,
++    movn_f_op    = 0x13,
++    frecip_op    = 0x15,
++    frsqrt_op    = 0x16,
++    fcvts_op     = 0x20,
++    fcvtd_op     = 0x21,
++    fcvtw_op     = 0x24,
++    fcvtl_op     = 0x25,
++    fcvtps_op    = 0x26,
++    fcvtspl_op   = 0x28,
++    fpll_op      = 0x2c,
++    fplu_op      = 0x2d,
++    fpul_op      = 0x2e,
++    fpuu_op      = 0x2f
++  };
++
++  static const char* cop1_name[];
++
++  //cop1x family, the opcode is in low 6 bits.
++  enum cop1x_ops {
++    lwxc1_op    = 0x00,
++    ldxc1_op    = 0x01,
++    luxc1_op    = 0x05,
++    swxc1_op    = 0x08,
++    sdxc1_op    = 0x09,
++    suxc1_op    = 0x0d,
++    prefx_op    = 0x0f,
++
++    alnv_ps_op  = 0x1e,
++    madd_s_op   = 0x20,
++    madd_d_op   = 0x21,
++    madd_ps_op  = 0x26,
++    msub_s_op   = 0x28,
++    msub_d_op   = 0x29,
++    msub_ps_op  = 0x2e,
++    nmadd_s_op  = 0x30,
++    nmadd_d_op  = 0x31,
++    nmadd_ps_op = 0x36,
++    nmsub_s_op  = 0x38,
++    nmsub_d_op  = 0x39,
++    nmsub_ps_op = 0x3e
++  };
++
++  static const char* cop1x_name[];
++
++  //special2 family, the opcode is in low 6 bits.
++  enum special2_ops {
++    madd_op       = 0x00,
++    maddu_op      = 0x01,
++    mul_op        = 0x02,
++    gs0x03_op     = 0x03,
++    msub_op       = 0x04,
++    msubu_op      = 0x05,
++    gs0x06_op     = 0x06,
++    gsemul2_op    = 0x07,
++    gsemul3_op    = 0x08,
++    gsemul4_op    = 0x09,
++    gsemul5_op    = 0x0a,
++    gsemul6_op    = 0x0b,
++    gsemul7_op    = 0x0c,
++    gsemul8_op    = 0x0d,
++    gsemul9_op    = 0x0e,
++    gsemul10_op   = 0x0f,
++    gsmult_op     = 0x10,
++    gsdmult_op    = 0x11,
++    gsmultu_op    = 0x12,
++    gsdmultu_op   = 0x13,
++    gsdiv_op      = 0x14,
++    gsddiv_op     = 0x15,
++    gsdivu_op     = 0x16,
++    gsddivu_op    = 0x17,
++    gsmod_op      = 0x1c,
++    gsdmod_op     = 0x1d,
++    gsmodu_op     = 0x1e,
++    gsdmodu_op    = 0x1f,
++    clz_op        = 0x20,
++    clo_op        = 0x21,
++    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
++    gsrxr_x_op    = 0x23, //gsX
++    dclz_op       = 0x24,
++    dclo_op       = 0x25,
++    gsle_op       = 0x26,
++    gsgt_op       = 0x27,
++    gs86j_op      = 0x28,
++    gsloop_op     = 0x29,
++    gsaj_op       = 0x2a,
++    gsldpc_op     = 0x2b,
++    gs86set_op    = 0x30,
++    gstm_op       = 0x31,
++    gscvt_ld_op   = 0x32,
++    gscvt_ud_op   = 0x33,
++    gseflag_op    = 0x34,
++    gscam_op      = 0x35,
++    gstop_op      = 0x36,
++    gssettag_op   = 0x37,
++    gssdbbp_op    = 0x38
++  };
++
++  static  const char* special2_name[];
++
++  // special3 family, the opcode is in low 6 bits.
++  enum special3_ops {
++    ext_op         = 0x00,
++    dextm_op       = 0x01,
++    dextu_op       = 0x02,
++    dext_op        = 0x03,
++    ins_op         = 0x04,
++    dinsm_op       = 0x05,
++    dinsu_op       = 0x06,
++    dins_op        = 0x07,
++    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
++    insv_op        = 0x0c,
++    dinsv_op       = 0x0d,
++    ar1_op         = 0x10, //MIPS DSP
++    cmp1_op        = 0x11, //MIPS DSP
++    re1_op         = 0x12, //MIPS DSP, re1_ops
++    sh1_op         = 0x13, //MIPS DSP
++    ar2_op         = 0x14, //MIPS DSP
++    cmp2_op        = 0x15, //MIPS DSP
++    re2_op         = 0x16, //MIPS DSP, re2_ops
++    sh2_op         = 0x17, //MIPS DSP
++    ar3_op         = 0x18, //MIPS DSP
++    bshfl_op       = 0x20  //seb, seh
++  };
++
++  // re1_ops
++  enum re1_ops {
++    absq_s_qb_op = 0x01,
++    repl_qb_op   = 0x02,
++    replv_qb_op  = 0x03,
++    absq_s_ph_op = 0x09,
++    repl_ph_op   = 0x0a,
++    replv_ph_op  = 0x0b,
++    absq_s_w_op  = 0x11,
++    bitrev_op    = 0x1b
++  };
++
++  // re2_ops
++  enum re2_ops {
++    repl_ob_op   = 0x02,
++    replv_ob_op  = 0x03,
++    absq_s_qh_op = 0x09,
++    repl_qh_op   = 0x0a,
++    replv_qh_op  = 0x0b,
++    absq_s_pw_op = 0x11,
++    repl_pw_op   = 0x12,
++    replv_pw_op  = 0x13,
++  };
++
++  static  const char* special3_name[];
++
++  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
++  enum gs_lwc2_ops {
++    gslble_op       = 0x10,
++    gslbgt_op       = 0x11,
++    gslhle_op       = 0x12,
++    gslhgt_op       = 0x13,
++    gslwle_op       = 0x14,
++    gslwgt_op       = 0x15,
++    gsldle_op       = 0x16,
++    gsldgt_op       = 0x17,
++    gslwlec1_op     = 0x1c,
++    gslwgtc1_op     = 0x1d,
++    gsldlec1_op     = 0x1e,
++    gsldgtc1_op     = 0x1f,
++    gslq_op         = 0x20
++  };
++
++  static const char* gs_lwc2_name[];
++
++  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
++  enum gs_ldc2_ops {
++    gslbx_op        =  0x0,
++    gslhx_op        =  0x1,
++    gslwx_op        =  0x2,
++    gsldx_op        =  0x3,
++    gslwxc1_op      =  0x6,
++    gsldxc1_op      =  0x7
++  };
++
++  static const char* gs_ldc2_name[];
++
++  // swc2/gs_swc2 family, the opcode is in low 6 bits.
++  enum gs_swc2_ops {
++    gssble_op       = 0x10,
++    gssbgt_op       = 0x11,
++    gsshle_op       = 0x12,
++    gsshgt_op       = 0x13,
++    gsswle_op       = 0x14,
++    gsswgt_op       = 0x15,
++    gssdle_op       = 0x16,
++    gssdgt_op       = 0x17,
++    gsswlec1_op     = 0x1c,
++    gsswgtc1_op     = 0x1d,
++    gssdlec1_op     = 0x1e,
++    gssdgtc1_op     = 0x1f,
++    gssq_op         = 0x20
++  };
++
++  static const char* gs_swc2_name[];
++
++  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
++  enum gs_sdc2_ops {
++    gssbx_op        =  0x0,
++    gsshx_op        =  0x1,
++    gsswx_op        =  0x2,
++    gssdx_op        =  0x3,
++    gsswxc1_op      =  0x6,
++    gssdxc1_op      =  0x7
++  };
++
++  static const char* gs_sdc2_name[];
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int rd(int insn) { return (insn>>11)&0x1f; }
++  static int sa(int insn) { return (insn>>6)&0x1f; }
++  static int special(int insn) { return insn&0x3f; }
++  static int imm_off(int insn) { return (short)low16(insn); }
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++ protected:
++  //help methods for instruction ejection
++
++  // I-Type (Immediate)
++  // 31        26 25        21 20      16 15                              0
++  //|   opcode   |      rs    |    rt    |            immediat             |
++  //|            |            |          |                                 |
++  //      6              5          5                     16
++  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
++
++  // R-Type (Register)
++  // 31         26 25        21 20      16 15      11 10         6 5         0
++  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
++  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
++  //      6              5          5           5          5            6
++  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
++  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
++  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
++
++  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
++  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
++
++  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++  //get the offset field of jump/branch instruction
++  int offset(address entry) {
++    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm16((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
++    }
++    return (entry - pc() - 4) / 4;
++  }
++
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
++  static int split_low(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int merge(int low, int high) {
++    return expand(low, 15) + (high<<16);
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
++    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  static bool fit_in_jal(address target, address pc) {
++    intptr_t mask = 0xfffffffff0000000;
++    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
++  }
++
++  bool fit_int_branch(address entry) {
++    return is_simm16(offset(entry));
++  }
++
++protected:
++#ifdef ASSERT
++    #define CHECK_DELAY
++#endif
++#ifdef CHECK_DELAY
++  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
++#endif
++
++public:
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert_not_delayed("next instruction should not be a delay slot");
++#endif
++  }
++
++  void assert_not_delayed(const char* msg) {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, msg);
++#endif
++  }
++
++protected:
++  // Delay slot helpers
++  // cti is called when emitting control-transfer instruction,
++  // BEFORE doing the emitting.
++  // Only effective when assertion-checking is enabled.
++
++  // called when emitting cti with a delay slot, AFTER emitting
++  void has_delay_slot() {
++#ifdef CHECK_DELAY
++    assert_not_delayed("just checking");
++    delay_state = at_delay_slot;
++#endif
++  }
++
++public:
++  Assembler* delayed() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
++    delay_state = filling_delay_slot;
++#endif
++    return this;
++  }
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++
++  void emit_long(int);  // shadows AbstractAssembler::emit_long
++  void emit_data(int);
++  void emit_data(int, RelocationHolder const&);
++  void emit_data(int, relocInfo::relocType rtype);
++  void check_delay();
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
++  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
++  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
++
++  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
++  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
++  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
++  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
++  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
++  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
++  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
++  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
++  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
++  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  // two versions of brk:
++  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
++  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
++  // both versions work
++  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
++  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
++
++  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
++  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
++  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
++  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
++  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
++  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
++  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
++  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
++  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
++  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
++  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
++  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
++  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
++  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
++  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
++  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
++
++  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
++  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
++  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
++  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
++  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
++  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
++  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
++  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
++  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
++  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
++  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
++  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
++  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
++  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
++  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
++  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
++
++  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
++  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
++  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
++
++  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
++  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
++
++  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
++  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
++
++  enum bshfl_ops {
++     seb_op = 0x10,
++     seh_op = 0x18
++  };
++  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
++  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
++
++  void ext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
++  }
++
++  void dext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
++  }
++
++  void dextm (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
++     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
++
++     int lsb  = pos;
++     int msbd = size - 1 - 32;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
++  }
++
++  void rotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
++  }
++
++  void drotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
++  }
++
++  void drotr32 (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
++  }
++
++  void rotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
++  }
++
++  void drotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
++  }
++
++  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
++  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
++  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
++  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
++  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
++  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
++  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
++  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
++  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
++  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
++  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
++  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
++  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
++  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
++
++  void b(int off)       { beq(R0, R0, off); }
++  void b(address entry) { b(offset(entry)); }
++  void b(Label& L)      { b(target(L)); }
++
++  void j(address entry);
++  void jal(address entry);
++
++  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(RT9); }
++
++  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
++  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
++
++  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
++  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++
++  void lb (Register rt, Address src);
++  void lbu(Register rt, Address src);
++  void ld (Register rt, Address src);
++  void ldl(Register rt, Address src);
++  void ldr(Register rt, Address src);
++  void lh (Register rt, Address src);
++  void lhu(Register rt, Address src);
++  void ll (Register rt, Address src);
++  void lld(Register rt, Address src);
++  void lw (Register rt, Address src);
++  void lwl(Register rt, Address src);
++  void lwr(Register rt, Address src);
++  void lwu(Register rt, Address src);
++  void lea(Register rt, Address src);
++  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
++
++  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
++  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
++  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
++  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
++
++  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
++  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
++
++  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
++
++  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
++  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
++  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
++  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
++  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
++  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
++  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
++  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
++  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
++
++  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
++  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
++  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
++  void sync ()                                        {
++    if (os::is_ActiveCoresMP())
++      emit_long(0);
++    else
++      emit_long(sync_op);
++  }
++  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
++
++  void sb(Register rt, Address dst);
++  void sc(Register rt, Address dst);
++  void scd(Register rt, Address dst);
++  void sd(Register rt, Address dst);
++  void sdl(Register rt, Address dst);
++  void sdr(Register rt, Address dst);
++  void sh(Register rt, Address dst);
++  void sw(Register rt, Address dst);
++  void swl(Register rt, Address dst);
++  void swr(Register rt, Address dst);
++
++  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
++  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
++  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
++  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
++  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
++  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
++  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
++  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
++  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
++  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
++  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
++  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
++
++  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
++  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void nop()               { emit_long(0); }
++
++
++
++  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void ldc1(FloatRegister ft, Address src);
++  void lwc1(FloatRegister ft, Address src);
++
++  //COP0
++  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
++  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  //COP0 end
++
++
++  //COP1
++  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
++  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
++  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
++  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
++
++  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
++  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
++  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
++  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
++
++  void bc1f (address entry) { bc1f(offset(entry)); }
++  void bc1fl(address entry) { bc1fl(offset(entry)); }
++  void bc1t (address entry) { bc1t(offset(entry)); }
++  void bc1tl(address entry) { bc1tl(offset(entry)); }
++
++  void bc1f (Label& L) { bc1f(target(L)); }
++  void bc1fl(Label& L) { bc1fl(target(L)); }
++  void bc1t (Label& L) { bc1t(target(L)); }
++  void bc1tl(Label& L) { bc1tl(target(L)); }
++
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_SINGLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
++  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
++  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
++  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
++  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
++  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
++  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
++  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
++  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
++  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
++  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
++  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
++  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
++  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
++  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
++  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
++  //null
++  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
++  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
++  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
++  //null
++  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
++  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
++  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
++  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
++  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
++  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
++  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
++  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
++  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_SINGLE
++
++
++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
++#define INSN_DOUBLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
++  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
++  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
++  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
++  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
++  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
++  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
++  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
++  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
++  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
++  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
++  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
++  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
++  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
++  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
++  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
++  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
++  //null
++  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
++  //null
++  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
++  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
++  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
++  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
++  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
++  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
++  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
++  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
++  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_DOUBLE
++
++
++  //null
++  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++
++
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_PS(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
++  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
++  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
++  //null
++  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
++  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
++  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
++  //null
++  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
++  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
++  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
++  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
++  //null
++  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
++  //null
++  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
++  //null
++  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
++  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
++  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
++  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
++  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
++  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
++  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
++  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
++  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
++  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
++  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
++  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
++  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
++  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
++  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
++  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
++  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
++  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
++  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
++  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
++  //null
++#undef INSN_PS
++  //COP1 end
++
++
++  //COP1X
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_COP1X(r0, r1, r2, r3, op)   \
++  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
++  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
++  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
++  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
++  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
++  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
++  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
++  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
++  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
++  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
++  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
++  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
++#undef INSN_COP1X
++  //COP1X end
++
++  //SPECIAL2
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_S2(op)   \
++  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
++
++  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
++  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
++  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
++  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
++  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
++  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
++  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
++
++  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
++  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
++  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
++  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
++  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
++  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
++  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
++  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
++  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
++  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
++  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
++  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
++  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
++  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
++  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
++  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
++  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
++  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
++  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
++  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
++
++#undef INSN_S2
++
++  //SPECIAL3
++/*
++// FIXME
++#define is_0_to_32(a, b) \
++  assert (a >= 0, " just a check"); \
++  assert (a <= 0, " just a check"); \
++  assert (b >= 0, " just a check"); \
++  assert (b <= 0, " just a check"); \
++  assert (a+b >= 0, " just a check"); \
++  assert (a+b <= 0, " just a check");
++  */
++#define is_0_to_32(a, b)
++
++  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
++  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
++  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
++  void dins (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
++  }
++
++  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
++  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
++  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
++  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
++
++  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
++  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
++  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
++  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
++  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
++  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
++
++  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void sdc1(FloatRegister ft, Address dst);
++  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void swc1(FloatRegister ft, Address dst);
++
++
++  static void print_instruction(int);
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  int branch_destination(int inst, int pos);
++
++  // Loongson extension
++
++  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
++  void gslble(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
++  }
++
++  void gslbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
++  }
++
++  void gslhle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
++  }
++
++  void gslhgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
++  }
++
++  void gslwle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
++  }
++
++  void gslwgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
++  }
++
++  void gsldle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
++  }
++
++  void gsldgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
++  }
++
++  void gslwlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
++  }
++
++  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
++  }
++
++  void gsldlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
++  }
++
++  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
++  }
++
++  void gslq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gssble(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
++  }
++
++  void gssbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
++  }
++
++  void gsshle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
++  }
++
++  void gsshgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
++  }
++
++  void gsswle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
++  }
++
++  void gsswgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
++  }
++
++  void gssdle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
++  }
++
++  void gssdgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
++  }
++
++  void gsswlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
++  }
++
++  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
++  }
++
++  void gssdlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
++  }
++
++  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
++  }
++
++  void gssq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  //LDC2 & SDC2
++#define INSN(OPS, OP) \
++    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
++    assert(UseLEXT1, "check UseLEXT1");                                                      \
++    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
++
++#define INSN_LDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_LDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++/*
++ void gslbx(Register rt, Register base, Register index, int off) {
++    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
++    assert(UseLEXT1, "check UseLEXT1");
++    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
++
++  INSN_LDC2(gslbx, gslbx_op)
++  INSN_LDC2(gslhx, gslhx_op)
++  INSN_LDC2(gslwx, gslwx_op)
++  INSN_LDC2(gsldx, gsldx_op)
++  INSN_LDC2_F(gslwxc1, gslwxc1_op)
++  INSN_LDC2_F(gsldxc1, gsldxc1_op)
++
++  INSN_SDC2(gssbx, gssbx_op)
++  INSN_SDC2(gsshx, gsshx_op)
++  INSN_SDC2(gsswx, gsswx_op)
++  INSN_SDC2(gssdx, gssdx_op)
++  INSN_SDC2_F(gsswxc1, gsswxc1_op)
++  INSN_SDC2_F(gssdxc1, gssdxc1_op)
++*/
++  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
++  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
++  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
++  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
++  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
++  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
++
++  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
++  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
++  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
++  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
++  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
++  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
++
++#undef INSN
++#undef INSN_LDC2
++#undef INSN_LDC2_F
++#undef INSN_SDC2
++#undef INSN_SDC2_F
++
++  // cpucfg on Loongson CPUs above 3A4000
++  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
++
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..39aeb5509a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp
+new file mode 100644
+index 0000000000..a4a1b28c2d
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "interpreter/bytecodeInterpreter.hpp"
++#include "interpreter/bytecodeInterpreter.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++#ifdef CC_INTERP
++
++#endif // CC_INTERP (all)
+diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp
+new file mode 100644
+index 0000000000..aac8b7a2b7
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp
+@@ -0,0 +1,110 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
++#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
++
++// Platform specific for C++ based Interpreter
++#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
++
++private:
++
++  // save the bottom of the stack after frame manager setup. For ease of restoration after return
++  // from recursive interpreter call
++  intptr_t*  _frame_bottom;             /* saved bottom of frame manager frame */
++  intptr_t* _last_Java_pc;              /* pc to return to in frame manager */
++  intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
++  interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
++  double    _native_fresult;            /* save result of native calls that might return floats */
++  intptr_t  _native_lresult;            /* save result of native calls that might return handle/longs */
++public:
++
++  static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
++  inline intptr_t* sender_sp() {
++    return _sender_sp;
++  }
++
++
++#define SET_LAST_JAVA_FRAME()
++
++#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0);
++
++/*
++ * Macros for accessing the stack.
++ */
++#undef STACK_INT
++#undef STACK_FLOAT
++#undef STACK_ADDR
++#undef STACK_OBJECT
++#undef STACK_DOUBLE
++#undef STACK_LONG
++
++// JavaStack Implementation
++
++#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
++#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
++#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
++#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
++#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
++#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
++#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
++#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
++
++#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
++#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
++#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
++#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
++                                                 ((VMJavaVal64*)(addr))->d)
++#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
++#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
++                                                 ((VMJavaVal64*)(addr))->l)
++// JavaLocals implementation
++
++#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
++#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
++#define LOCALS_INT(offset)     (*((jint*)&locals[-(offset)]))
++#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
++#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
++#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
++#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
++#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
++#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
++
++#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
++#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
++#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
++#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
++#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
++                                                  ((VMJavaVal64*)(addr))->d)
++#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
++                                                ((VMJavaVal64*)(addr))->l)
++
++#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp
+new file mode 100644
+index 0000000000..8ce77ab92f
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
++
++// Inline interpreter functions for MIPS
++
++inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
++inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
++inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
++inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
++inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
++
++inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
++
++inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
++  return ( op1 < op2 ? -1 :
++               op1 > op2 ? 1 :
++                   op1 == op2 ? 0 :
++                       (direction == -1 || direction == 1) ? direction : 0);
++
++}
++
++inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
++  // x86 can do unaligned copies but not 64bits at a time
++  to[0] = from[0]; to[1] = from[1];
++}
++
++// The long operations depend on compiler support for "long long" on x86
++
++inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
++  return op1 + op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
++  return op1 & op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
++  // QQQ what about check and throw...
++  return op1 / op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
++  return op1 * op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
++  return op1 | op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
++  return op1 - op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
++  return op1 ^ op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
++  return op1 % op2;
++}
++
++inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
++  // CVM did this 0x3f mask, is the really needed??? QQQ
++  return ((unsigned long long) op1) >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
++  return op1 >> (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
++  return op1 << (op2 & 0x3F);
++}
++
++inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
++  return -op;
++}
++
++inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
++  return ~op;
++}
++
++inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
++  return (op <= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
++  return (op >= 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
++  return (op == 0);
++}
++
++inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
++  return (op1 == op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
++  return (op1 != op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
++  return (op1 >= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
++  return (op1 <= op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
++  return (op1 < op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
++  return (op1 > op2);
++}
++
++inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
++  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
++}
++
++// Long conversions
++
++inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
++  return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
++  return (jfloat) val;
++}
++
++inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
++  return (jint) val;
++}
++
++// Double Arithmetic
++
++inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
++  return op1 + op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
++  // Divide by zero... QQQ
++  return op1 / op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
++  return op1 * op2;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
++  return -op;
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
++  return fmod(op1, op2);
++}
++
++inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
++  return op1 - op2;
++}
++
++inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
++  return ( op1 < op2 ? -1 :
++               op1 > op2 ? 1 :
++                   op1 == op2 ? 0 :
++                       (direction == -1 || direction == 1) ? direction : 0);
++}
++
++// Double Conversions
++
++inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
++  return (jfloat) val;
++}
++
++// Float Conversions
++
++inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
++  return (jdouble) op;
++}
++
++// Integer Arithmetic
++
++inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
++  return op1 + op2;
++}
++
++inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
++  return op1 & op2;
++}
++
++inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
++  // it's possible we could catch this special case implicitly
++  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
++  else return op1 / op2;
++}
++
++inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
++  return op1 * op2;
++}
++
++inline jint BytecodeInterpreter::VMintNeg(jint op) {
++  return -op;
++}
++
++inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
++  return op1 | op2;
++}
++
++inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
++  // it's possible we could catch this special case implicitly
++  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
++  else return op1 % op2;
++}
++
++inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
++  return op1 <<  op2;
++}
++
++inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
++  return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f??
++}
++
++inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
++  return op1 - op2;
++}
++
++inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
++  return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f??
++}
++
++inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
++  return op1 ^ op2;
++}
++
++inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
++  return (jdouble) val;
++}
++
++inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
++  return (jfloat) val;
++}
++
++inline jlong BytecodeInterpreter::VMint2Long(jint val) {
++  return (jlong) val;
++}
++
++inline jchar BytecodeInterpreter::VMint2Char(jint val) {
++  return (jchar) val;
++}
++
++inline jshort BytecodeInterpreter::VMint2Short(jint val) {
++  return (jshort) val;
++}
++
++inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
++  return (jbyte) val;
++}
++
++#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
+diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp
+new file mode 100644
+index 0000000000..61efd1f561
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/bytecodes.hpp"
++
++
++void Bytecodes::pd_initialize() {
++  // No mips specific initialization
++}
++
++
++Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
++  // No mips specific bytecodes
++  return code;
++}
+diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp
+new file mode 100644
+index 0000000000..25a9562acd
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTECODES_MIPS_HPP
++#define CPU_MIPS_VM_BYTECODES_MIPS_HPP
++
++// No Loongson specific bytecodes
++
++#endif // CPU_MIPS_VM_BYTECODES_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/bytes_mips.hpp b/hotspot/src/cpu/mips/vm/bytes_mips.hpp
+new file mode 100644
+index 0000000000..515ffad4b0
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/bytes_mips.hpp
+@@ -0,0 +1,193 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
++#define CPU_MIPS_VM_BYTES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use mipsel, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    if ((intptr_t)p & 3) {
++      u4 res;
++      __asm__ __volatile__ (
++          " .set push\n"
++          " .set mips64\n"
++          " .set noreorder\n"
++
++          "    lwr %[res], 0(%[addr])    \n"
++          "    lwl  %[res], 3(%[addr])    \n"
++
++          " .set pop"
++          :  [res] "=&r" (res)
++          : [addr] "r" (p)
++          : "memory"
++          );
++      return res;
++    } else {
++      return *(u4*)p;
++    }
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    u8 res;
++    u8 temp;
++    //  u4 tp;//tmp register
++    __asm__ __volatile__ (
++        " .set push\n"
++        " .set mips64\n"
++        " .set noreorder\n"
++        " .set noat\n"
++        "    andi $1,%[addr],0x7    \n"
++        "    beqz $1,1f        \n"
++        "    nop        \n"
++        "    ldr %[temp], 0(%[addr])    \n"
++        "    ldl  %[temp], 7(%[addr])  \n"
++        "               b 2f        \n"
++        "    nop        \n"
++        "  1:\t  ld  %[temp],0(%[addr])  \n"
++        "  2:\t   sd  %[temp], %[res]    \n"
++
++        " .set at\n"
++        " .set pop\n"
++        :  [addr]"=r"(p), [temp]"=r" (temp)
++        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
++        : "memory"
++        );
++
++    return res;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)   {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 3 ) {
++    case 0:  *(u4*)p = x;
++        break;
++
++    case 2:  ((u2*)p)[1] = x >> 16;
++       ((u2*)p)[0] = x;
++       break;
++
++    default: ((u1*)p)[3] = x >> 24;
++       ((u1*)p)[2] = x >> 16;
++       ((u1*)p)[1] = x >>  8;
++       ((u1*)p)[0] = x;
++       break;
++    }
++  }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since MIPS64EL CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "bytes_linux_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_solaris_mips
++# include "bytes_solaris_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_windows_mips
++# include "bytes_windows_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_bsd_mips
++# include "bytes_bsd_mips.inline.hpp"
++#endif
++
++
++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp
+new file mode 100644
+index 0000000000..f254e07abd
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp
+@@ -0,0 +1,100 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++define_pd_global(bool, TieredCompilation,            false);   // Disable C1 in server JIT
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++#ifdef MIPS64
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++#else
++define_pd_global(intx, INTPRESSURE,                  6);
++define_pd_global(intx, InteriorEntryAlignment,       4);
++define_pd_global(intx, NewSizeThreadIncrease,        4*K);
++define_pd_global(intx, LoopUnrollLimit,              50);     // Design center runs on 1.3.1
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2304*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       32*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    4ULL*G);
++#endif // MIPS64
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++
++define_pd_global(intx, ReservedCodeCacheSize,        120*M);
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed on x86.
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/c2_init_mips.cpp b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp
+new file mode 100644
+index 0000000000..e6d5815f42
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for mips
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff --git a/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp
+new file mode 100644
+index 0000000000..1836b7a921
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp
+new file mode 100644
+index 0000000000..8ffaaaf841
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp
+@@ -0,0 +1,173 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// Release the CompiledICHolder* associated with this call site is there is one.
++void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
++  // This call site might have become stale so inspect it carefully.
++  NativeCall* call = nativeCall_at(call_site->addr());
++  if (is_icholder_entry(call->destination())) {
++    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
++    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
++  }
++}
++
++bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
++  // This call site might have become stale so inspect it carefully.
++  NativeCall* call = nativeCall_at(call_site->addr());
++  return is_icholder_entry(call->destination());
++}
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
++
++  address mark = cbuf.insts_mark();  // get mark within main instrs section
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ synci(R0, 0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  __ relocate(relocInfo::runtime_call_type);
++
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++  return round_to(size, 16);
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef MIPS64
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef MIPS64
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++  method_holder->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledStaticCall::verify() {
++  // Verify call.
++  NativeCall::verify();
++  if (os::is_MP()) {
++    verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++#ifndef MIPS64
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++#else
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++#endif
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/hotspot/src/cpu/mips/vm/copy_mips.hpp b/hotspot/src/cpu/mips/vm/copy_mips.hpp
+new file mode 100644
+index 0000000000..49fde17923
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/copy_mips.hpp
+@@ -0,0 +1,72 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
++#define CPU_MIPS_VM_COPY_MIPS_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "copy_linux_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_solaris_mips
++# include "copy_solaris_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_windows_mips
++# include "copy_windows_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_bsd_mips
++# include "copy_bsd_mips.inline.hpp"
++#endif
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_MIPS_VM_COPY_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp
+new file mode 100644
+index 0000000000..37bd03b00b
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
++#define CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
++
++ protected:
++
++#if 0
++  address generate_asm_interpreter_entry(bool synchronized);
++  address generate_native_entry(bool synchronized);
++  address generate_abstract_entry(void);
++  address generate_math_entry(AbstractInterpreter::MethodKind kind);
++  address generate_empty_entry(void);
++  address generate_accessor_entry(void);
++  void lock_method(void);
++  void generate_stack_overflow_check(void);
++
++  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
++  void generate_counter_overflow(Label* do_continue);
++#endif
++
++  void generate_more_monitors();
++  void generate_deopt_handling();
++  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
++  void generate_compute_interpreter_state(const Register state,
++                                          const Register prev_state,
++                                          const Register sender_sp,
++                                          bool native); // C++ interpreter only
++
++#endif // CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp
+new file mode 100644
+index 0000000000..1f8d75d593
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/cppInterpreter.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#ifdef SHARK
++#include "shark/shark_globals.hpp"
++#endif
++
++#ifdef CC_INTERP
++
++// Routine exists to make tracebacks look decent in debugger
++// while "shadow" interpreter frames are on stack. It is also
++// used to distinguish interpreter frames.
++
++extern "C" void RecursiveInterpreterActivation(interpreterState istate) {
++  ShouldNotReachHere();
++}
++
++bool CppInterpreter::contains(address pc) {
++  Unimplemented();
++}
++
++#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name))
++#define __ _masm->
++
++Label frame_manager_entry;
++Label fast_accessor_slow_entry_path;  // fast accessor methods need to be able to jmp to unsynchronized
++                                      // c++ interpreter entry point this holds that entry point label.
++
++static address unctrap_frame_manager_entry  = NULL;
++
++static address interpreter_return_address  = NULL;
++static address deopt_frame_manager_return_atos  = NULL;
++static address deopt_frame_manager_return_btos  = NULL;
++static address deopt_frame_manager_return_itos  = NULL;
++static address deopt_frame_manager_return_ltos  = NULL;
++static address deopt_frame_manager_return_ftos  = NULL;
++static address deopt_frame_manager_return_dtos  = NULL;
++static address deopt_frame_manager_return_vtos  = NULL;
++
++const Register prevState = G1_scratch;
++
++void InterpreterGenerator::save_native_result(void) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::restore_native_result(void) {
++  Unimplemented();
++}
++
++// A result handler converts/unboxes a native call result into
++// a java interpreter/compiler result. The current frame is an
++// interpreter frame. The activation frame unwind code must be
++// consistent with that of TemplateTable::_return(...). In the
++// case of native methods, the caller's SP was not modified.
++address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
++  Unimplemented();
++}
++
++address CppInterpreter::return_entry(TosState state, int length) {
++  Unimplemented();
++}
++
++address CppInterpreter::deopt_entry(TosState state, int length) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_empty_entry(void) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_accessor_entry(void) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_native_entry(bool synchronized) {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state,
++                                                              const Register prev_state,
++                                                              bool native) {
++  Unimplemented();
++}
++
++void InterpreterGenerator::lock_method(void) {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_deopt_handling() {
++  Unimplemented();
++}
++
++void CppInterpreterGenerator::generate_more_monitors() {
++  Unimplemented();
++}
++
++
++static address interpreter_frame_manager = NULL;
++
++void CppInterpreterGenerator::adjust_callers_stack(Register args) {
++  Unimplemented();
++}
++
++address InterpreterGenerator::generate_normal_entry(bool synchronized) {
++  Unimplemented();
++}
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++ : CppInterpreterGenerator(code) {
++  Unimplemented();
++}
++
++
++static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
++  Unimplemented();
++}
++
++int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
++  Unimplemented();
++}
++
++void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
++                                           frame* caller,
++                                           frame* current,
++                                           methodOop method,
++                                           intptr_t* locals,
++                                           intptr_t* stack,
++                                           intptr_t* stack_base,
++                                           intptr_t* monitor_base,
++                                           intptr_t* frame_bottom,
++                                           bool is_top_frame
++                                           )
++{
++  Unimplemented();
++}
++
++void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) {
++  Unimplemented();
++}
++
++
++int AbstractInterpreter::layout_activation(methodOop method,
++                                           int tempcount, // Number of slots on java expression stack in use
++                                           int popframe_extra_args,
++                                           int moncount,  // Number of active monitors
++                                           int callee_param_size,
++                                           int callee_locals_size,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame) {
++  Unimplemented();
++}
++
++#endif // CC_INTERP
+diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp
+new file mode 100644
+index 0000000000..49c4733049
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
++#define CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
++  // Size of interpreter code.  Increase if too small.  Interpreter will
++  // fail with a guarantee ("not enough space for interpreter generation");
++  // if too small.
++  // Run with +PrintInterpreter to get the VM to print out the size.
++  // Max size with JVMTI and TaggedStackInterpreter
++
++  // QQQ this is proably way too large for c++ interpreter
++
++  // The sethi() instruction generates lots more instructions when shell
++  // stack limit is unlimited, so that's why this is much bigger.
++  const static int InterpreterCodeSize = 210 * K;
++
++#endif // CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/debug_mips.cpp b/hotspot/src/cpu/mips/vm/debug_mips.cpp
+new file mode 100644
+index 0000000000..50de03653b
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/debug_mips.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "code/codeCache.hpp"
++#include "code/nmethod.hpp"
++#include "runtime/frame.hpp"
++#include "runtime/init.hpp"
++#include "runtime/os.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/top.hpp"
++
++#ifndef PRODUCT
++
++void pd_ps(frame f) {
++  intptr_t* sp = f.sp();
++  intptr_t* prev_sp = sp - 1;
++  intptr_t *pc = NULL;
++  intptr_t *next_pc = NULL;
++  int count = 0;
++  tty->print("register window backtrace from %#lx:\n", p2i(sp));
++}
++
++// This function is used to add platform specific info
++// to the error reporting code.
++
++void pd_obfuscate_location(char *buf,int buflen) {}
++
++#endif // PRODUCT
+diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.cpp b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp
+new file mode 100644
+index 0000000000..756ccb68f9
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_mips.hpp"
++
++// Nothing to do on mips
+diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.hpp b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp
+new file mode 100644
+index 0000000000..11e52b4e8f
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++
++// Nothing to do on MIPS
++
++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/disassembler_mips.hpp b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp
+new file mode 100644
+index 0000000000..c5f3a8888d
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/frame_mips.cpp b/hotspot/src/cpu/mips/vm/frame_mips.cpp
+new file mode 100644
+index 0000000000..1c928976fc
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/frame_mips.cpp
+@@ -0,0 +1,711 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_mips.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      sender_unextended_sp = sender_sp;
++      // On MIPS the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    if (sender_blob->is_nmethod()) {
++        nmethod* nm = sender_blob->as_nmethod_or_null();
++        if (nm != NULL) {
++            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
++                return false;
++            }
++        }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_nmethod(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_nmethod()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++  // Note: fp == NULL is not really a prerequisite for this to be safe to
++  // walk for c2. However we've modified the code such that if we get
++  // a failure with fp != NULL that we then try with FP == NULL.
++  // This is basically to mimic what a last_frame would look like if
++  // c2 had generated it.
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
++  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
++  if (sender_nm != NULL) {
++    // If the sender PC is a deoptimization point, get the original PC.
++    if (sender_nm->is_deopt_entry(_pc) ||
++        sender_nm->is_deopt_mh_entry(_pc)) {
++      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++#ifdef ASSERT
++  const bool c1_compiled = _cb->is_compiled_by_c1();
++  bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method();
++  if (c1_compiled && native) {
++    assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size");
++  }
++#endif // ASSERT
++  // On Intel the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++
++bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
++  assert(is_interpreted_frame(), "must be interpreter frame");
++  Method* method = interpreter_frame_method();
++  // When unpacking an optimized frame the frame pointer is
++  // adjusted with:
++  int diff = (method->max_locals() - method->size_of_parameters()) *
++    Interpreter::stackElementWords;
++  printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff));
++  return _fp == (fp - diff);
++}
++
++void frame::pd_gc_epilog() {
++  // nothing done here now
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!m->is_valid_method()) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcx
++
++  intptr_t  bcx    = interpreter_frame_bcx();
++  if (m->validate_bci_from_bcx(bcx) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (cp == NULL || !cp->is_metaspace_object()) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++#endif
+diff --git a/hotspot/src/cpu/mips/vm/frame_mips.hpp b/hotspot/src/cpu/mips/vm/frame_mips.hpp
+new file mode 100644
+index 0000000000..9e684a8dc3
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/frame_mips.hpp
+@@ -0,0 +1,229 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_HPP
++
++#include "runtime/synchronizer.hpp"
++#include "utilities/top.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ methodOop                ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++#ifndef CC_INTERP
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
++    interpreter_frame_bcx_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++#endif // CC_INTERP
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  =  -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
++  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
++    verify_deopt_original_pc(nm, unextended_sp, true);
++  }
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // return address of param, zero origin index.
++  inline address* native_param_addr(int idx) const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++#ifndef CC_INTERP
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++#endif // CC_INTERP
++
++#ifdef CC_INTERP
++  inline interpreterState get_interpreterState() const;
++#endif // CC_INTERP
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp
+new file mode 100644
+index 0000000000..60e56ac7ab
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp
+@@ -0,0 +1,312 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++
++#include "code/codeCache.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = nmethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
++inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
++
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++// return address of param, zero origin index.
++inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
++
++#ifdef CC_INTERP
++
++inline interpreterState frame::get_interpreterState() const {
++  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
++}
++
++inline intptr_t*    frame::sender_sp()        const {
++  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
++  if (is_interpreted_frame()) {
++    assert(false, "should never happen");
++    return get_interpreterState()->sender_sp();
++  } else {
++    return            addr_at(sender_sp_offset);
++  }
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_locals);
++}
++
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return (intptr_t*) &(get_interpreterState()->_bcp);
++}
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_constants);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return &(get_interpreterState()->_method);
++}
++
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++  assert(is_interpreted_frame(), "must be interpreted");
++  return (intptr_t*) &(get_interpreterState()->_mdx);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  assert(is_interpreted_frame(), "wrong frame type");
++  return get_interpreterState()->_stack + 1;
++}
++
++#else // asm interpreter
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcx_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdx_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++#endif // CC_INTERP
++
++inline int frame::pd_oop_map_offset_adjustment() const {
++  return 0;
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++
++inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
++
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
++}
++
++inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
++}
++
++inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
++  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
++}
++
++inline bool frame::volatile_across_calls(Register reg) {
++  return true;
++}
++
++
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+diff --git a/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp
+new file mode 100644
+index 0000000000..bd00a8d473
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++// Size of MIPS Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/globals_mips.hpp b/hotspot/src/cpu/mips/vm/globals_mips.hpp
+new file mode 100644
+index 0000000000..988bc35137
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/globals_mips.hpp
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++#ifdef CORE
++define_pd_global(bool,  UseSSE,      0);
++#endif /* CORE */
++define_pd_global(bool,  ConvertSleepToYield,      true);
++define_pd_global(bool,  ShareVtableStubs,         true);
++define_pd_global(bool,  CountInterpCalls,         true);
++
++define_pd_global(bool, ImplicitNullChecks,          true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++define_pd_global(bool, NeedsDeoptSuspend,           false); // only register window machines need this
++
++// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
++// assign a different value for C2 without touching a number of files. Use
++// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
++// c1 doesn't have this problem because the fix to 4858033 assures us
++// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
++// the uep and the vep doesn't get real alignment but just slops on by
++// only assured that the entry instruction meets the 5 byte size requirement.
++define_pd_global(intx,  CodeEntryAlignment,      16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++define_pd_global(intx, InlineSmallCode,          4000); // MIPS generates 3x instructions than X86
++
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, PreserveFramePointer, false);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
++                                                                            \
++  product(bool, UseLEXT1, false,                                            \
++                "Use LoongISA general EXTensions 1")                        \
++                                                                            \
++  product(bool, UseLEXT2, false,                                            \
++                "Use LoongISA general EXTensions 2")                        \
++                                                                            \
++  product(bool, UseLEXT3, false,                                            \
++                "Use LoongISA general EXTensions 3")                        \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  develop(bool, UseBoundCheckInstruction, false,                            \
++                "Use bound check instruction")                              \
++                                                                            \
++  product(intx, SetFSFOFN, 999,                                             \
++          "Set the FS/FO/FN bits in FCSR"                                   \
++          "999 means FS/FO/FN will not be changed"                          \
++          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
++                                                                            \
++  /* assembler */                                                           \
++  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp
+new file mode 100644
+index 0000000000..96ea345360
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp
+@@ -0,0 +1,97 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "oops/oop.inline2.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_set48(T1, (long)cached_value);
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/hotspot/src/cpu/mips/vm/icache_mips.cpp b/hotspot/src/cpu/mips/vm/icache_mips.cpp
+new file mode 100644
+index 0000000000..848964b63f
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/icache_mips.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ jr_hb(RA);
++  __ delayed()->ori(V0, RA2, 0);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/hotspot/src/cpu/mips/vm/icache_mips.hpp b/hotspot/src/cpu/mips/vm/icache_mips.hpp
+new file mode 100644
+index 0000000000..78ee11cc73
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/icache_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
++#define CPU_MIPS_VM_ICACHE_MIPS_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++};
++
++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp
+new file mode 100644
+index 0000000000..ed2d931e94
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp
+@@ -0,0 +1,2084 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interp_masm_mips_64.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiRedefineClassesTrace.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  // The runtime address of BCP may be unaligned.
++  // Refer to the SPARC implementation.
++  lbu(reg, BCP, offset+1);
++  lbu(tmp, BCP, offset);
++  dsll(reg, reg, 8);
++  daddu(reg, tmp, reg);
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  assert(reg != tmp, "need separate temp register");
++  if (offset & 3) { // Offset unaligned?
++    lbu(reg, BCP, offset+3);
++    lbu(tmp, BCP, offset+2);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset+1);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++  } else {
++    lwu(reg, BCP, offset);
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    delayed()->nop();
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  Register thread = T8;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#else
++  move(T8, TREG);
++#endif
++  ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      lw(V0, val_addr);
++      break;
++    case ftos:
++      lwc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      ldc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  move(AT, (int)ilgl);
++  sw(AT, tos_addr);
++  sw(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T9;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    move(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++    delayed()->nop();
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lbu(AT, BCP, bcp_offset);
++  lbu(reg, BCP, bcp_offset + 1);
++  ins(reg, AT, 8, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
++    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    sll(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    lbu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, cache, AT);
++  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    sync(); // load acquire
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  dsrl(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  daddu(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  delayed()->nop();
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  delayed()->nop();
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  Register tmp = index;  // reuse
++  shl(tmp, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld(result, result, ConstantPool::resolved_references_offset_in_bytes());
++  // JNIHandles::resolve(obj);
++  ld(result, result, 0); //? is needed?
++  // Add in the index
++  daddu(result, result, tmp);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T9 and T1 are used as temporary registers.
++  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T9); // blows T9
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  lwc1(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  ldc1(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sd(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  swc1(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sdc1(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    get_thread(temp);
++#else
++    move(temp, TREG);
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    lw(AT, temp, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    delayed()->nop();
++    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    delayed()->nop();
++    bind(run_compiled_code);
++  }
++
++  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++  delayed()->nop();
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  mips64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing mips64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop) {
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    dsubu(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    daddiu(T2, T2,- min_frame_size);
++    bgez(T2, L);
++    delayed()->nop();
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++  dsll(T2, Rnext, LogBytesPerWord);
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)
++    ) {
++     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
++     int table_offset = ((int)state - (int)itos) * table_size;
++
++     // GP points to the starting address of Interpreter::dispatch_table(itos).
++     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
++     if(table_offset != 0) {
++        daddiu(T3, GP, table_offset);
++        if (UseLEXT1) {
++          gsldx(T3, T2, T3, 0);
++        } else {
++          daddu(T3, T2, T3);
++          ld(T3, T3, 0);
++        }
++     } else {
++        if (UseLEXT1) {
++          gsldx(T3, T2, GP, 0);
++        } else {
++          daddu(T3, T2, GP);
++          ld(T3, T3, 0);
++        }
++     }
++  } else {
++     li(T3, (long)table);
++     if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++  }
++  jr(T3);
++  delayed()->nop();
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state) {
++  dispatch_base(state, Interpreter::dispatch_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
++  // load next bytecode (load before advancing r13 to prevent AGI)
++  lbu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  lw(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++  delayed()->nop();
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  delayed()->nop();
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  delayed()->nop();
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think mips do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++    delayed()->nop();
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++    delayed()->nop();
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++      delayed()->nop();
++    }
++
++    bind(loop);
++    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++    delayed()->nop();
++
++    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++    delayed()->nop();  // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++}
++
++#endif // C_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg1: BasicObjectLock to be used for locking
++//
++// Kills:
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
++//      rscratch1, rscratch2 (scratch regs)
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
++  } else {
++    Label done;
++
++    const Register swap_reg = T2;  // Must use T2 for cmpxchg instruction
++    const Register obj_reg  = T1;  // Will contain the oop
++
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset +
++                            BasicLock::displaced_header_offset_in_bytes();
++
++    Label slow_case;
++
++    // Load object pointer into obj_reg %T1
++    ld(obj_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, obj_reg, swap_reg, noreg, false, done, &slow_case);
++    }
++
++
++    // Load (object->mark() | 1) into swap_reg %T2
++    ld(AT, obj_reg, 0);
++    ori(swap_reg, AT, 1);
++
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(swap_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++    //if (os::is_MP()) {
++      //  lock();
++    //}
++    cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
++
++    if (PrintBiasedLockingStatistics) {
++      Label L;
++      beq(AT, R0, L);
++      delayed()->nop();
++      push(T0);
++      push(T1);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
++      pop(T1);
++      pop(T0);
++      bind(L);
++    }
++
++    bne(AT, R0, done);
++    delayed()->nop();
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T2 as the result of cmpxchg
++
++    dsubu(swap_reg, swap_reg, SP);
++    move(AT, 3 - os::vm_page_size());
++    andr(swap_reg, swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    sd(swap_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      Label L;
++      bne(swap_reg, R0, L);
++      delayed()->nop();
++      push(T0);
++      push(T1);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
++      pop(T1);
++      pop(T0);
++      bind(L);
++    }
++
++    beq(swap_reg, R0, done);
++    delayed()->nop();
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg1: BasicObjectLock for lock
++//
++// Kills:
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
++//      rscratch1, rscratch2 (scratch regs)
++// Argument: T6 : Points to BasicObjectLock structure for lock
++// Argument: c_rarg0 : Points to BasicObjectLock structure for lock
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++
++    const Register swap_reg   = T2;  // Must use T2 for cmpxchg instruction
++    const Register header_reg = T3;  // Will contain the old oopMark
++    const Register obj_reg    = T1;  // Will contain the oop
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    daddiu(swap_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into obj_reg(%T1)
++    ld(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes ());
++    //free entry
++    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(obj_reg, header_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld(header_reg, swap_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beq(header_reg, R0, done);
++    delayed()->nop();
++
++    // Atomic swap back the old header
++    if (os::is_MP()); //lock();
++    cmpxchg(header_reg, Address(obj_reg, 0), swap_reg);
++
++    // zero for recursive case
++    bne(AT, R0, done);
++    delayed()->nop();
++
++    // Call the runtime routine for slow case.
++    sd(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++  delayed()->nop();
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++  delayed()->nop();
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld(T0, T0, in_bytes(Method::method_data_offset()));
++  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
++  daddu(V0, T0, V0);
++  bind(set_mdp);
++  sd(V0, FP, frame::interpreter_frame_mdx_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = V0;
++  Register mdp = V1;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld(AT, method, in_bytes(Method::const_offset()));
++  daddu(tmp, tmp, AT);
++  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  delayed()->nop();
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    // Decrement the register.
++    ld(AT, data);
++    daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment);
++    // If the decrement causes the counter to overflow, stay negative
++    Label L;
++    slt(AT, tmp, R0);
++    bne(AT, R0, L);
++    delayed()->nop();
++    daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment);
++    bind(L);
++    sd(tmp, data);
++  } else {
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ld(AT, data);
++    // Increment the register.
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    // If the increment causes the counter to overflow, pull back by 1.
++    slt(AT, tmp, R0);
++    dsubu(tmp, tmp, AT);
++    sd(tmp, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(S0);
++  if (decrement) {
++    // Decrement the register.
++    daddu(AT, mdp_in, reg);
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    ld(AT, AT, constant);
++
++    daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment);
++    // If the decrement causes the counter to overflow, stay negative
++    Label L;
++    slt(AT, tmp, R0);
++    bne(AT, R0, L);
++    delayed()->nop();
++    daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment);
++    bind(L);
++
++    daddu(AT, mdp_in, reg);
++    sd(tmp, AT, constant);
++  } else {
++    daddu(AT, mdp_in, reg);
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    ld(AT, AT, constant);
++
++    // Increment the register.
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    // If the increment causes the counter to overflow, pull back by 1.
++    slt(AT, tmp, R0);
++    dsubu(tmp, tmp, AT);
++
++    daddu(AT, mdp_in, reg);
++    sd(tmp, AT, constant);
++  }
++  pop(S0);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  lw(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm16(header_bits)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    move(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  sw(AT, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++    delayed()->nop();
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++    delayed()->nop();
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, mdp_in, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  daddu(AT, reg, mdp_in);
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, AT, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm16(constant)) {
++    daddiu(mdp_in, mdp_in, constant);
++  } else {
++    move(AT, constant);
++    daddu(mdp_in, mdp_in, AT);
++  }
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    push(T8);
++    // T8 is used as a temporary register.
++    daddiu(T8, bumped_count, DataLayout::counter_increment);
++    slt(AT, T8, R0);
++    dsubu(bumped_count, T8, AT);
++    pop(T8);
++    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bne(receiver, R0, not_null);
++      delayed()->nop();
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      beq(R0, R0, skip_receiver_profile);
++      delayed()->nop();
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    delayed()->nop();
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          delayed()->nop();
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          delayed()->nop();
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++          delayed()->nop();
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++      delayed()->nop();
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  move(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++    delayed()->nop();
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      beq(R0, R0, profile_continue);
++      delayed()->nop();
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    move(reg2, in_bytes(MultiBranchData::per_case_size()));
++    if (UseLEXT1) {
++      gsdmult(index, index, reg2);
++    } else {
++      dmult(index, reg2);
++      mflo(index);
++    }
++    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  // Get method->_constMethod->_result_type
++  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld(T9, T9, in_bytes(Method::const_offset()));
++  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addiu(AT, T9, -T_INT);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // mask integer result to narrower return type.
++  addiu(AT, T9, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  delayed()->nop();
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notBool);
++  addiu(AT, T9, -T_BYTE);
++  bne(AT, R0, notByte);
++  delayed()->nop();
++  seb(result, result);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notByte);
++  addiu(AT, T9, -T_CHAR);
++  bne(AT, R0, notChar);
++  delayed()->nop();
++  andi(result, result, 0xFFFF);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notChar);
++  seh(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    dsll(T0, mdo_addr.index(), mdo_addr.scale());
++    daddu(T0, T0, mdo_addr.base());
++  }
++
++  bne(obj, R0, update);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bne(AT, R0, next);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  daddiu(AT, AT, -(TypeEntries::null_seen));
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    sd(obj, mdo_addr);
++  } else {
++    sd(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++    delayed()->nop();
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm16(off_to_args)) {
++        daddiu(mdp, mdp, off_to_args);
++      } else {
++        move(AT, off_to_args);
++        daddu(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
++            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            subu32(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          slt(AT, tmp, AT);
++          bne(AT, R0, done);
++          delayed()->nop();
++        }
++        ld(tmp, callee, in_bytes(Method::const_offset()));
++
++        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        subu(tmp, tmp, AT);
++
++        addiu32(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm16(to_add)) {
++          daddiu(mdp, mdp, to_add);
++        } else {
++          move(AT, to_add);
++          daddu(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
++          addiu32(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          move(AT, tmp_arg_counts);
++          subu32(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
++        daddu(mdp, mdp, tmp);
++      }
++      sd(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lb(tmp, _bcp_register, 0);
++      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beq(AT, R0, do_profile);
++      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beq(AT, R0, do_profile);
++      delayed()->nop();
++
++      get_method(tmp);
++      lb(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++      delayed()->nop();
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    daddu(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    bltz(tmp1, profile_continue);
++    delayed()->nop();
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    daddu(mdp, mdp, tmp1);
++    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    dsll(AT, tmp1, per_arg_scale);
++    daddu(AT, AT, mdp);
++    ld(tmp2, AT, off_base);
++
++    subu(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    dsll(AT, tmp2, Interpreter::stackElementScale());
++    daddu(AT, AT, _locals_register);
++    ld(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    bgtz(tmp1, loop);
++    delayed()->nop();
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++    get_thread(T8);
++#else
++    move(T8, TREG);
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    delayed()->nop();
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  T8,
++                                  //Rmethod);
++                                  S3);
++  }
++
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++    get_thread(T8);
++#else
++    move(T8, TREG);
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // For c++ interpreter the result is always stored at a known location in the frame
++    // template interpreter will leave it on the top of the stack.
++    NOT_CC_INTERP(push(state);)
++    lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    delayed()->nop();
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    NOT_CC_INTERP(pop(state));
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    NOT_CC_INTERP(push(state);)
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 T8, S3);
++    NOT_CC_INTERP(pop(state));
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    lw(scratch, counter_addr);
++  }
++  addiu32(scratch, scratch, increment);
++  sw(scratch, counter_addr);
++
++  move(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++    delayed()->nop();
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp
+new file mode 100644
+index 0000000000..a2ebdec3ad
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp
+@@ -0,0 +1,269 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true);
++#endif // CC_INTERP
++
++ public:
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index);
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+diff --git a/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp
+new file mode 100644
+index 0000000000..26fced492a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp
+@@ -0,0 +1,49 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
++
++
++// Generation of Interpreter
++//
++  friend class AbstractInterpreterGenerator;
++
++ private:
++
++  address generate_normal_entry(bool synchronized);
++  address generate_native_entry(bool synchronized);
++  address generate_abstract_entry(void);
++  address generate_math_entry(AbstractInterpreter::MethodKind kind);
++  address generate_empty_entry(void);
++  address generate_accessor_entry(void);
++  address generate_Reference_get_entry();
++  void lock_method(void);
++  void generate_stack_overflow_check(void);
++
++  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
++  void generate_counter_overflow(Label* do_continue);
++
++#endif // CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp
+new file mode 100644
+index 0000000000..8dec2007c6
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++
++  void move(int from_offset, int to_offset);
++
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++    _masm = new MacroAssembler(buffer);
++  }
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp
+new file mode 100644
+index 0000000000..14b7e39af7
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp
+@@ -0,0 +1,259 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of SignatureHandlerGenerator
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ sd(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  Label L;
++  __ bne(AT, R0, L);
++  __ delayed()->nop();
++  __ move(temp(), R0);
++  __ bind(L);
++  __ sw(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Argument jni_arg(jni_offset());
++
++  // the handle for a receiver will never be null
++  bool do_NULL_check = offset() != 0 || is_static();
++  if (do_NULL_check) {
++    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
++  } else {
++    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  }
++
++  if (!jni_arg.is_Register())
++    __ sd(temp(), jni_arg.as_caller_address());
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
++    _fp_identifiers = to - 1;
++    *(int*) _fp_identifiers = 0;
++    _num_args = jni_offset();
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips.hpp b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp
+new file mode 100644
+index 0000000000..9a21d704fa
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETER_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETER_MIPS_HPP
++
++ public:
++
++  // Sentinel placed in the code for interpreter returns so
++  // that i2c adapters and osr code can recognize an interpreter
++  // return address and convert the return to a specialized
++  // block of code to handle compiedl return values and cleaning
++  // the fpu stack.
++  static const int return_sentinel;
++
++  static Address::ScaleFactor stackElementScale() {
++    return Address::times_8;
++  }
++
++  // Offset from sp (which points to the last stack element)
++  static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
++  // Size of interpreter code.  Increase if too small.  Interpreter will
++  // fail with a guarantee ("not enough space for interpreter generation");
++  // if too small.
++  // Run with +PrintInterpreterSize to get the VM to print out the size.
++  // Max size with JVMTI and TaggedStackInterpreter
++  const static int InterpreterCodeSize = 168 * 1024;
++#endif // CPU_MIPS_VM_INTERPRETER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp
+new file mode 100644
+index 0000000000..014c812713
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++address AbstractInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ daddiu(SP, SP, -10 * wordSize);
++  __ sd(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     10 stack arg0      <--- old sp
++  //      9 float/double identifiers
++  //      8 register arg7
++  //        ...
++  //      2 register arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use T3 as temp
++  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
++    Register reg = as_Register(i + A0->encoding());
++    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
++    Label isfloatordouble, isdouble, next;
++
++    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
++    __ bne(AT, R0, isfloatordouble);
++    __ delayed()->nop();
++
++    // Do Int register here
++    __ ld(reg, SP, (1 + i) * wordSize);
++    __ b (next);
++    __ delayed()->nop();
++
++    __ bind(isfloatordouble);
++    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
++    __ bne(AT, R0, isdouble);
++    __ delayed()->nop();
++
++    // Do Float Here
++    __ lwc1(floatreg, SP, (1 + i) * wordSize);
++    __ b(next);
++    __ delayed()->nop();
++
++    // Do Double here
++    __ bind(isdouble);
++    __ ldc1(floatreg, SP, (1 + i) * wordSize);
++
++    __ bind(next);
++  }
++
++  __ ld(RA, SP, 0);
++  __ daddiu(SP, SP, 10 * wordSize);
++  __ jr(RA);
++  __ delayed()->nop();
++  return entry;
++}
++
++
++//
++// Various method entries
++//
++
++address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++
++  // Rmethod: methodOop
++  // V0: scratrch
++  // Rsender: send 's sp
++
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  address entry_point = __ pc();
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack: [ lo(arg) ] <-- sp
++  //        [ hi(arg) ]
++  {
++    // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
++    //       java methods.  Interpreter::method_kind(...) will select
++    //       this entry point for the corresponding methods in JDK 1.3.
++    __ ldc1(F12, SP, 0 * wordSize);
++    __ ldc1(F13, SP, 1 * wordSize);
++    __ push2(RA, FP);
++    __ daddiu(FP, SP, 2 * wordSize);
++
++    // [ fp     ] <-- sp
++    // [ ra     ]
++    // [ lo     ] <-- fp
++    // [ hi     ]
++    //FIXME, need consider this
++    switch (kind) {
++      case Interpreter::java_lang_math_sin :
++        __ trigfunc('s');
++        break;
++      case Interpreter::java_lang_math_cos :
++        __ trigfunc('c');
++        break;
++      case Interpreter::java_lang_math_tan :
++        __ trigfunc('t');
++        break;
++      case Interpreter::java_lang_math_sqrt:
++        __ sqrt_d(F0, F12);
++        break;
++      case Interpreter::java_lang_math_abs:
++        __ abs_d(F0, F12);
++        break;
++      case Interpreter::java_lang_math_log:
++        // Store to stack to convert 80bit precision back to 64bits
++        break;
++      case Interpreter::java_lang_math_log10:
++        // Store to stack to convert 80bit precision back to 64bits
++        break;
++      case Interpreter::java_lang_math_pow:
++        break;
++      case Interpreter::java_lang_math_exp:
++        break;
++
++      default                              :
++        ShouldNotReachHere();
++    }
++
++    // must maintain return value in F0:F1
++    __ ld(RA, FP, (-1) * wordSize);
++    //FIXME
++    __ ld(FP, FP, (-2) * wordSize);
++    __ move(SP, Rsender);
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++  return entry_point;
++}
++
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address InterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++// Empty method, generate a very fast return.
++
++address InterpreterGenerator::generate_empty_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender: sender 's sp , must set sp to this value on return , on mips ,now use T0,as it right?
++  if (!UseFastEmptyMethods) return NULL;
++
++  address entry_point = __ pc();
++
++  Label slow_path;
++  __ li(RT0, SafepointSynchronize::address_of_state());
++  __ lw(AT, RT0, 0);
++  __ move(RT0, (SafepointSynchronize::_not_synchronized));
++  __ bne(AT, RT0,slow_path);
++  __ delayed()->nop();
++  __ move(SP, Rsender);
++  __ jr(RA);
++  __ delayed()->nop();
++  __ bind(slow_path);
++  (void) generate_normal_entry(false);
++
++  return entry_point;
++
++}
++
++void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
++
++  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
++  // the days we had adapter frames. When we deoptimize a situation where a
++  // compiled caller calls a compiled caller will have registers it expects
++  // to survive the call to the callee. If we deoptimize the callee the only
++  // way we can restore these registers is to have the oldest interpreter
++  // frame that we create restore these values. That is what this routine
++  // will accomplish.
++
++  // At the moment we have modified c2 to not have any callee save registers
++  // so this problem does not exist and this routine is just a place holder.
++
++  assert(f->is_interpreted_frame(), "must be interpreted");
++}
+diff --git a/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp
+new file mode 100644
+index 0000000000..dccdf6a019
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp
+new file mode 100644
+index 0000000000..0f7dd9424a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ set64(AT, (long)counter_addr);
++  __ lw(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++  __ delayed()->nop();
++
++  __ move(T0, A1);
++  __ clear_jweak_tag(T0);
++
++  __ ld(T0, T0, 0);              // unbox, *obj
++  __ dsrl(T2, A2, 2);                 // offset
++  __ daddu(T0, T0, T2);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ lbu (V0, T0, 0); break;
++    case T_BYTE:    __ lb  (V0, T0, 0); break;
++    case T_CHAR:    __ lhu (V0, T0, 0); break;
++    case T_SHORT:   __ lh  (V0, T0, 0); break;
++    case T_INT:     __ lw  (V0, T0, 0); break;
++    case T_LONG:    __ ld  (V0, T0, 0); break;
++    case T_FLOAT:   __ lwc1(F0, T0, 0); break;
++    case T_DOUBLE:  __ ldc1(F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ set64(AT, (long)counter_addr);
++  __ lw(AT, AT, 0);
++  __ bne(T1, AT, slow);
++  __ delayed()->nop();
++
++  __ jr(RA);
++  __ delayed()->nop();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++  __ delayed()->nop();
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp
+new file mode 100644
+index 0000000000..dfcd47b478
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++#include "prims/jni.h"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/jni_mips.h b/hotspot/src/cpu/mips/vm/jni_mips.h
+new file mode 100644
+index 0000000000..6714f51d5d
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/jni_mips.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.  Oracle designates this
++ * particular file as subject to the "Classpath" exception as provided
++ * by Oracle in the LICENSE file that accompanied this code.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef _JAVASOFT_JNI_MD_H_
++#define _JAVASOFT_JNI_MD_H_
++
++// Note: please do not change these without also changing jni_md.h in the JDK
++// repository
++#ifndef __has_attribute
++  #define __has_attribute(x) 0
++#endif
++#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
++  #define JNIEXPORT     __attribute__((visibility("default")))
++  #define JNIIMPORT     __attribute__((visibility("default")))
++#else
++  #define JNIEXPORT
++  #define JNIIMPORT
++#endif
++
++#define JNICALL
++
++typedef int jint;
++
++typedef long jlong;
++
++typedef signed char jbyte;
++
++#endif
+diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp
+new file mode 100644
+index 0000000000..2b8840ae10
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp
+@@ -0,0 +1,4332 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc_interface/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/cardTableModRefBS.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#if INCLUDE_ALL_GCS
++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
++#include "gc_implementation/g1/heapRegion.hpp"
++#endif // INCLUDE_ALL_GCS
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ sw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ lw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*) branch;
++  jint *pc = (jint *)branch;
++
++  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
++    //b_far:
++    //  move(AT, RA); // daddu
++    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    //  nop();
++    //  lui(T9, 0); // to be patched
++    //  ori(T9, 0);
++    //  daddu(T9, T9, RA);
++    //  move(RA, AT);
++    //  jr(T9);
++
++    assert(opcode(pc[3]) == lui_op
++        && opcode(pc[4]) == ori_op
++        && special(pc[5]) == daddu_op, "Not a branch label patch");
++    if(!(opcode(pc[3]) == lui_op
++          && opcode(pc[4]) == ori_op
++          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
++
++    int offset = target - branch;
++    if (!is_simm16(offset)) {
++      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
++      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
++    } else {
++      // revert to "beq + nop"
++      CodeBuffer cb(branch, 4 * 10);
++      MacroAssembler masm(&cb);
++#define __ masm.
++      __ b(target);
++      __ delayed()->nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++    }
++    return;
++  } else if (special(pc[4]) == jr_op
++             && opcode(pc[4]) == special_op
++             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
++    //jmp_far:
++    //  patchable_set48(T9, target);
++    //  jr(T9);
++    //  nop();
++
++    CodeBuffer cb(branch, 4 * 4);
++    MacroAssembler masm(&cb);
++    masm.patchable_set48(T9, (long)(target));
++    return;
++  }
++
++#ifndef PRODUCT
++  if (!is_simm16((target - branch - 4) >> 2)) {
++    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
++    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
++    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
++    tty->print_cr("======= End of decoding =======");
++  }
++#endif
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++static inline address first_cache_address() {
++  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
++}
++
++static inline address last_cache_address() {
++  return CodeCache::high_bound() - Assembler::InstructionSize;
++}
++
++int MacroAssembler::call_size(address target, bool far, bool patchable) {
++  if (patchable) return 6 << Assembler::LogInstructionSize;
++  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
++  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
++}
++
++// Can we reach target using jal/j from anywhere
++// in the code cache (because code can be relocated)?
++bool MacroAssembler::reachable_from_cache(address target) {
++  address cl = first_cache_address();
++  address ch = last_cache_address();
++
++  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
++}
++
++bool MacroAssembler::reachable_from_cache() {
++  if (ForceUnreachable) {
++    return false;
++  } else {
++    address cl = first_cache_address();
++    address ch = last_cache_address();
++
++    return fit_in_jal(cl, ch);
++  }
++}
++
++void MacroAssembler::general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    j(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    //j(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    j(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_jump(address target) {
++  return 6;
++}
++
++void MacroAssembler::general_call(address target) {
++  if (reachable_from_cache(target)) {
++    jal(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_call(address target) {
++  if (reachable_from_cache(target)) {
++    //jal(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jalr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_call(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_call(address target) {
++  return 6;
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  address target = entry.target();
++  if (!reachable_from_cache()) {
++    address stub = emit_trampoline_stub(offset(), target);
++    if (stub == NULL) {
++      return NULL; // CodeCache is full
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++
++  if (reachable_from_cache()) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    // load the call target from the trampoline stub
++    // branch
++    long dest = (long)pc();
++    dest += (dest & 0x8000) << 1;
++    lui(T9, dest >> 32);
++    ori(T9, T9, split_low(dest >> 16));
++    dsll(T9, T9, 16);
++    ld(T9, T9, simm16(split_low(dest)));
++    jalr(T9);
++    delayed()->nop();
++  }
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Max stub size: alignment nop, TrampolineStub.
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  emit_int64((int64_t)dest);
++  end_a_stub();
++  return stub;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::beq(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  //Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::bne(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bc1f(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bc1t(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    volatile address dest = target(L);
++//
++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
++//   0x00000055651ed514: daddu at, ra, zero
++//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
++//
++//   0x00000055651ed51c: sll zero, zero, 0
++//   0x00000055651ed520: lui t9, 0x0
++//   0x00000055651ed524: ori t9, t9, 0x21b8
++//   0x00000055651ed528: daddu t9, t9, ra
++//   0x00000055651ed52c: daddu ra, at, zero
++//   0x00000055651ed530: jr t9
++//   0x00000055651ed534: sll zero, zero, 0
++//
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    lui(T9, 0); // to be patched
++    ori(T9, T9, 0);
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    b(offset(entry));
++  } else {
++    // address must be bounded
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    li32(T9, entry - pc());
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  addu_long(AT, base, offset);
++  ld_ptr(rt, AT, 0);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  guarantee(AT != rt, "AT must not equal rt");
++  addu_long(AT, base, offset);
++  st_ptr(rt, AT, 0);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++
++  li(tmp_reg1, counter_addr);
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(tmp_reg2, tmp_reg1, 0);
++  addiu(tmp_reg2, tmp_reg2, inc);
++  sc(tmp_reg2, tmp_reg1, 0);
++  beq(tmp_reg2, R0, again);
++  delayed()->nop();
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T9;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++  dsubu(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++  delayed()->nop();
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on MIPS we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++
++  move(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  delayed()->nop();
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  move(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  delayed()->nop();
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  move(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  delayed()->nop();
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++  //if (os::is_MP()) {
++  //  sync();
++  //}
++  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++  b(done);
++  delayed()->nop();
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  //if (os::is_MP()) {
++  //  sync();
++  //}
++  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++
++  b(done);
++  delayed()->nop();
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  //if (os::is_MP()) {
++  // lock();
++  //}
++  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++  delayed()->nop();
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  delayed()->nop();
++  daddiu(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  daddiu(SP, SP, 8);
++  b(E);
++  delayed()->nop();
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  bind(E);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  patchable_set48(T9, (long)entry);
++  jr(T9);
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_set48(T9, (long)entry);
++      jr(T9);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    address entry = target(L);
++    assert(entry != NULL, "jmp most probably wrong");
++    InstructionMark im(this);
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)entry);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)pc());
++  }
++
++  jr(T9);
++  delayed()->nop();
++}
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(AT, (long)obj);
++  sd(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++// c/c++ code assume T9 is entry point, so we just always move entry to t9
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_set48(T9, (long)entry);
++  jalr(T9);
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rtype);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh)
++{
++  switch (rh.type()) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rh);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::ic_call(address entry) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc());
++  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  Label L;
++  Assembler::beq(r, R0, L);
++  delayed()->nop();
++  move(r, 1);
++  bind(L);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  move(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm16(imm)) {
++    daddiu(reg, reg, imm);
++  } else {
++    move(AT, imm);
++    daddu(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++
++  address before_call_pc;
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  before_call_pc = (address)pc();
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    delayed()->nop();
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++    li(AT, before_call_pc);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    delayed()->nop();
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  move(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    lw(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  int mask = os::vm_page_size() - sizeof(int);
++  assert_different_registers(AT, tmp);
++  assert(is_uimm(mask, 16), "Not a unsigned 16-bit");
++  srl(AT, thread, os::get_serialize_page_shift_count());
++  andi(AT, AT, mask);
++  li(tmp, os::get_memory_serialize_page());
++  addu(tmp, tmp, AT);
++  sw(R0, tmp, 0);
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}
++
++//////////////////////////////////////////////////////////////////////////////////
++#if INCLUDE_ALL_GCS
++
++void MacroAssembler::g1_write_barrier_pre(Register obj,
++                                          Register pre_val,
++                                          Register thread,
++                                          Register tmp,
++                                          bool tosca_live,
++                                          bool expand_call) {
++
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_active()));
++  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_index()));
++  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
++                                       PtrQueue::byte_offset_of_buf()));
++
++
++  // Is marking active?
++  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
++    lw(AT, in_progress);
++  } else {
++    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
++    lb(AT, in_progress);
++  }
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  beq(pre_val, R0, done);
++  delayed()->nop();
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  ld(tmp, index);
++  beq(tmp, R0, runtime);
++  delayed()->nop();
++
++  daddiu(tmp, tmp, -1 * wordSize);
++  sd(tmp, index);
++  ld(AT, buffer);
++  daddu(tmp, tmp, AT);
++
++  // Record the previous value
++  sd(pre_val, tmp, 0);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(runtime);
++  // save the live input values
++  if (tosca_live) push(V0);
++
++  if (obj != noreg && obj != V0) push(obj);
++
++  if (pre_val != V0) push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then fp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) move(A1, thread);
++    if (pre_val != A0) move(A0, pre_val);
++    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
++  } else {
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    pop(obj);
++
++  if(tosca_live) pop(V0);
++
++  bind(done);
++}
++
++void MacroAssembler::g1_write_barrier_post(Register store_addr,
++                                           Register new_val,
++                                           Register thread,
++                                           Register tmp,
++                                           Register tmp2) {
++  assert(tmp  != AT, "must be");
++  assert(tmp2 != AT, "must be");
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                       PtrQueue::byte_offset_of_index()));
++  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
++                                       PtrQueue::byte_offset_of_buf()));
++
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  xorr(AT, store_addr, new_val);
++  dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++
++  // crosses regions, storing NULL?
++  beq(new_val, R0, done);
++  delayed()->nop();
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  move(card_addr, store_addr);
++  dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  set64(cardtable, (intptr_t)ct->byte_map_base);
++  daddu(card_addr, card_addr, cardtable);
++
++  lb(AT, card_addr, 0);
++  daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  sync();
++  lb(AT, card_addr, 0);
++  daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
++  beq(AT, R0, done);
++  delayed()->nop();
++
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  move(AT, (int)CardTableModRefBS::dirty_card_val());
++  sb(AT, card_addr, 0);
++
++  lw(AT, queue_index);
++  beq(AT, R0, runtime);
++  delayed()->nop();
++  daddiu(AT, AT, -1 * wordSize);
++  sw(AT, queue_index);
++  ld(tmp2, buffer);
++  ld(AT, queue_index);
++  daddu(tmp2, tmp2, AT);
++  sd(card_addr, tmp2, 0);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(runtime);
++  // save the live input values
++  push(store_addr);
++  push(new_val);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
++  pop(new_val);
++  pop(store_addr);
++
++  bind(done);
++}
++
++#endif // INCLUDE_ALL_GCS
++//////////////////////////////////////////////////////////////////////////////////
++
++
++void MacroAssembler::store_check(Register obj) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  store_check_part_1(obj);
++  store_check_part_2(obj);
++}
++
++void MacroAssembler::store_check(Register obj, Address dst) {
++  store_check(obj);
++}
++
++
++// split the store check operation so that other instructions can be scheduled inbetween
++void MacroAssembler::store_check_part_1(Register obj) {
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
++  dsrl(obj, obj, CardTableModRefBS::card_shift);
++}
++
++void MacroAssembler::store_check_part_2(Register obj) {
++  BarrierSet* bs = Universe::heap()->barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
++  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++
++  set64(AT, (long)ct->byte_map_base);
++  daddu(AT, AT, obj);
++  if (UseConcMarkSweepGC) sync();
++  sb(R0, AT, 0);
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
++
++  Register end = t2;
++#ifndef OPT_THREAD
++  Register thread = t1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  verify_tlab(t1, t2);//blows t1&t2
++
++  ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++  if (var_size_in_bytes == NOREG) {
++    set64(AT, con_size_in_bytes);
++    addu(end, obj, AT);
++  } else {
++    addu(end, obj, var_size_in_bytes);
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++  sltu(AT, AT, end);
++  bne_far(AT, R0, slow_case);
++  delayed()->nop();
++
++
++  // update the tlab top pointer
++  st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++  verify_tlab(t1, t2);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
++    // No allocation in the shared eden.
++    b_far(slow_case);
++    delayed()->nop();
++  } else {
++
++    Address heap_top(t1);
++    li(t1, (long)Universe::heap()->top_addr());
++    ld_ptr(obj, heap_top);
++
++    Register end = t2;
++    Label retry;
++
++    bind(retry);
++    if (var_size_in_bytes == NOREG) {
++      set64(AT, con_size_in_bytes);
++      addu(end, obj, AT);
++    } else {
++      addu(end, obj, var_size_in_bytes);
++    }
++    // if end < obj then we wrapped around => object too long => slow case
++    sltu(AT, end, obj);
++    bne_far(AT, R0, slow_case);
++    delayed()->nop();
++
++    li(AT, (long)Universe::heap()->end_addr());
++    ld_ptr(AT, AT, 0);
++    sltu(AT, AT, end);
++    bne_far(AT, R0, slow_case);
++    delayed()->nop();
++    // Compare obj with the top addr, and if still equal, store the new top addr in
++    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
++    // it otherwise. Use lock prefix for atomicity on MPs.
++    //if (os::is_MP()) {
++    //  sync();
++    //}
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    cmpxchg(end, heap_top, obj);
++    beq_far(AT, R0, retry);
++    delayed()->nop();
++  }
++}
++
++// C2 doesn't invoke this one.
++void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
++  Register top = T0;
++  Register t1  = T1;
++  Register t2  = T9;
++  Register t3  = T3;
++  Register thread_reg = T8;
++  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
++  Label do_refill, discard_tlab;
++
++  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
++    // No allocation in the shared eden.
++    b(slow_case);
++    delayed()->nop();
++  }
++
++  get_thread(thread_reg);
++
++  ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
++  ld_ptr(t1,  thread_reg, in_bytes(JavaThread::tlab_end_offset()));
++
++  // calculate amount of free space
++  subu(t1, t1, top);
++  shr(t1, LogHeapWordSize);
++
++  // Retain tlab and allocate object in shared space if
++  // the amount free in the tlab is too large to discard.
++  ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
++  slt(AT, t2, t1);
++  beq(AT, R0, discard_tlab);
++  delayed()->nop();
++
++  // Retain
++  li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
++  addu(t2, t2, AT);
++  st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
++
++  if (TLABStats) {
++    // increment number of slow_allocations
++    lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
++    addiu(AT, AT, 1);
++    sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
++  }
++  b(try_eden);
++  delayed()->nop();
++
++  bind(discard_tlab);
++  if (TLABStats) {
++    // increment number of refills
++    lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
++    addiu(AT, AT, 1);
++    sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
++    // accumulate wastage -- t1 is amount free in tlab
++    lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
++    addu(AT, AT, t1);
++    sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
++  }
++
++  // if tlab is currently allocated (top or end != null) then
++  // fill [top, end + alignment_reserve) with array object
++  beq(top, R0, do_refill);
++  delayed()->nop();
++
++  // set up the mark word
++  li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
++  st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
++
++  // set the length to the remaining space
++  addiu(t1, t1, - typeArrayOopDesc::header_size(T_INT));
++  addiu(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
++  shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
++  sw(t1, top, arrayOopDesc::length_offset_in_bytes());
++
++  // set klass to intArrayKlass
++  li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
++  ld_ptr(t1, AT, 0);
++  //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
++  store_klass(top, t1);
++
++  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
++  subu(t1, top, t1);
++  incr_allocated_bytes(thread_reg, t1, 0);
++
++  // refill the tlab with an eden allocation
++  bind(do_refill);
++  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
++  shl(t1, LogHeapWordSize);
++  // add object_size ??
++  eden_allocate(top, t1, 0, t2, t3, slow_case);
++
++  // Check that t1 was preserved in eden_allocate.
++#ifdef ASSERT
++  if (UseTLAB) {
++    Label ok;
++    assert_different_registers(thread_reg, t1);
++    ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
++    shl(AT, LogHeapWordSize);
++    beq(AT, t1, ok);
++    delayed()->nop();
++    stop("assert(t1 != tlab size)");
++    should_not_reach_here();
++
++    bind(ok);
++  }
++#endif
++  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
++  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
++  addu(top, top, t1);
++  addiu(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
++  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
++  verify_tlab(t1, t2);
++  b(retry);
++  delayed()->nop();
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    addu(AT, AT, var_size_in_bytes);
++  } else {
++    addiu(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++static const double     pi_4 =  0.7853981633974483;
++
++// must get argument(a double) in F12/F13
++//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
++//We need to preseve the register which maybe modified during the Call
++void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
++  // save all modified register here
++  // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
++  pushad();
++  // we should preserve the stack space before we call
++  addiu(SP, SP, -wordSize * 2);
++  switch (trig){
++    case 's' :
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
++      delayed()->nop();
++      break;
++    case 'c':
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
++      delayed()->nop();
++      break;
++    case 't':
++      call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
++      delayed()->nop();
++      break;
++    default:assert (false, "bad intrinsic");
++    break;
++
++  }
++
++  addiu(SP, SP, wordSize * 2);
++  popad();
++}
++
++void MacroAssembler::li(Register rd, long imm) {
++  if (imm <= max_jint && imm >= min_jint) {
++    li32(rd, (int)imm);
++  } else if (julong(imm) <= 0xFFFFFFFF) {
++    assert_not_delayed();
++    // lui sign-extends, so we can't use that.
++    ori(rd, R0, julong(imm) >> 16);
++    dsll(rd, rd, 16);
++    ori(rd, rd, split_low(imm));
++  } else if ((imm > 0) && is_simm16(imm >> 32)) {
++    // A 48-bit address
++    li48(rd, imm);
++  } else {
++    li64(rd, imm);
++  }
++}
++
++void MacroAssembler::li32(Register reg, int imm) {
++  if (is_simm16(imm)) {
++    addiu(reg, R0, imm);
++  } else {
++    lui(reg, split_low(imm >> 16));
++    if (split_low(imm))
++      ori(reg, reg, split_low(imm));
++  }
++}
++
++void MacroAssembler::set64(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++    } else {
++      lui(d, split_low(value >> 16));
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++  } else {  // li64
++    // 6 insts
++    li64(d, value);
++  }
++}
++
++
++int MacroAssembler::insts_for_set64(jlong value) {
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      //daddiu(d, R0, value);
++      count++;
++    } else {
++      //lui(d, split_low(value >> 16));
++      count++;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      //dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    //li48(d, value);
++    count += 4;
++  } else {  // li64
++    // 6 insts
++    //li64(d, value);
++    count += 6;
++  }
++
++  return count;
++}
++
++void MacroAssembler::patchable_set48(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++    count += 4;
++  } else {  // li64
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_set32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_call32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, narrowKlass);
++}
++
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, oop_index);
++}
++
++void MacroAssembler::li64(Register rd, long imm) {
++  assert_not_delayed();
++  lui(rd, split_low(imm >> 48));
++  ori(rd, rd, split_low(imm >> 32));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++  lui(rd, imm >> 32);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  li(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  popad();
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  addiu(SP, SP, - 7 * wordSize);
++  st_ptr(T0, SP, 6 * wordSize);
++  st_ptr(T1, SP, 5 * wordSize);
++  st_ptr(RA, SP, 4 * wordSize);
++  st_ptr(A0, SP, 3 * wordSize);
++  st_ptr(A1, SP, 2 * wordSize);
++  st_ptr(AT, SP, 1 * wordSize);
++  st_ptr(T9, SP, 0);
++
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 7 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++  li(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  lw(AT, T0, 0);
++  daddiu(AT, AT, 1);
++  sw(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++  delayed()->nop();
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++  delayed()->nop();
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  delayed()->nop();
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++  delayed()->nop();
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  popad();
++  jr(RA);
++  delayed()->nop();
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    sltu(AT, t2, AT);
++    beq(AT, R0, next);
++    delayed()->nop();
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    sltu(AT, AT, t2);
++    beq(AT, R0, ok);
++    delayed()->nop();
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                       Register tmp,
++                                                       int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++  return RegisterOrConstant(value + offset);
++  AddressLiteral a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  //ld(tmp, a);
++  if (offset != 0)
++    daddiu(tmp,tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  sra(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  dsrl(AT, reg, 8);
++  dsll(reg, reg, 24);
++  dsrl(reg, reg, 16);
++  orr(reg, reg, AT);
++  andi(reg, reg, 0xffff);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srl(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  sll(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++// do 32-bit CAS using MIPS64 lld/scd
++//
++//  cas_int should only compare 32-bits of the memory value.
++//  However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
++//  To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
++//  tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
++//  plus the high-32 bits or memory value, are stored togethor with SCD.
++//
++//Example:
++//
++//      double d = 3.1415926;
++//      System.err.println("hello" + d);
++//
++//  sun.misc.FloatingDecimal$1.<init>()
++//   |
++//   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
++//
++//  38 cas_int [a7a7|J] [a0|I] [a6|I]
++//   a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
++//   a6: 0x4ab325aa
++//
++//again:
++//   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
++//
++//   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
++//   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
++//   0x00000055647f3c68: dsll32 t8, t8, 0
++//   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
++//   0x00000055647f3c70: sll zero, zero, 0
++//
++//   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
++//   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
++//   0x00000055647f3c7c: ori v1, v1, 0xffffffff
++//   0x00000055647f3c80: and v1, a6, v1
++//   0x00000055647f3c84: or at, t8, v1
++//   0x00000055647f3c88: scd at, 0x0(a7)
++//   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
++//   0x00000055647f3c90: sll zero, zero, 0
++//   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
++//   0x00000055647f3c98: sll zero, zero, 0
++//nequal:
++//   0x00000055647f45a4: daddu a0, t9, zero
++//   0x00000055647f45a8: daddu at, zero, zero
++//done:
++//
++
++void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
++  // MIPS64 can use ll/sc for 32-bit atomic memory access
++  Label done, again, nequal;
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  move(AT, x_reg);
++  sc(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  move(c_reg, AT);
++  move(AT, R0);
++
++  bind(done);
++}
++
++void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
++  Label done, again, nequal;
++
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  move(AT, x_reg);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  move(c_reg, AT);
++  move(AT, R0);
++
++  bind(done);
++}
++
++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
++  Label done, again, nequal;
++
++  Register x_reg = x_regLo;
++  dsll32(x_regHi, x_regHi, 0);
++  dsll32(x_regLo, x_regLo, 0);
++  dsrl32(x_regLo, x_regLo, 0);
++  orr(x_reg, x_regLo, x_regHi);
++
++  Register c_reg = c_regLo;
++  dsll32(c_regHi, c_regHi, 0);
++  dsll32(c_regLo, c_regLo, 0);
++  dsrl32(c_regLo, c_regLo, 0);
++  orr(c_reg, c_regLo, c_regHi);
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  //move(AT, x_reg);
++  daddu(AT, x_reg, R0);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  //move(c_reg, AT);
++  //move(AT, R0);
++  daddu(c_reg, AT, R0);
++  daddu(AT, R0, R0);
++  bind(done);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_s(tmp, fs, ft);
++  trunc_l_s(tmp, tmp);
++  cvt_s_l(tmp, tmp);
++  mul_s(tmp, tmp, ft);
++  sub_s(fd, fs, tmp);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_d(tmp, fs, ft);
++  trunc_l_d(tmp, tmp);
++  cvt_d_l(tmp, tmp);
++  mul_d(tmp, tmp, ft);
++  sub_d(fd, fs, tmp);
++}
++
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location) - KILLED
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
++
++  // Ensure the register assignents are disjoint
++  guarantee (objReg != boxReg, "") ;
++  guarantee (objReg != tmpReg, "") ;
++  guarantee (objReg != scrReg, "") ;
++  guarantee (boxReg != tmpReg, "") ;
++  guarantee (boxReg != scrReg, "") ;
++
++
++  block_comment("FastLock");
++  if (PrintBiasedLockingStatistics) {
++    push(tmpReg);
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
++    pop(tmpReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
++      bne(AT, R0, DONE_LABEL);
++      delayed()->nop();
++
++      // Recursive locking
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      Label IsInflated, DONE_LABEL, PopDone ;
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
++      delayed()->nop();
++
++      // Attempt stack-locking ...
++      ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
++      sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
++      //if (os::is_MP()) {
++      //  sync();
++      //}
++
++      cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
++      //AT == 1: unlocked
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        beq(AT, R0, L);
++        delayed()->nop();
++        push(T0);
++        push(T1);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
++        pop(T1);
++        pop(T0);
++        bind(L);
++      }
++      bne(AT, R0, DONE_LABEL);
++      delayed()->nop();
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size() );
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bne(tmpReg, R0, L);
++        delayed()->nop();
++        push(T0);
++        push(T1);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
++        pop(T1);
++        pop(T0);
++        bind(L);
++      }
++      sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
++
++      b(DONE_LABEL) ;
++      delayed()->nop();
++
++      bind(IsInflated) ;
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      sd(AT, Address(boxReg, 0));
++
++      move(boxReg, tmpReg) ;
++      ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      move(AT, R0);
++      bne(tmpReg, R0, DONE_LABEL);
++      delayed()->nop();
++
++#ifndef OPT_THREAD
++      get_thread (TREG) ;
++#endif
++      // It's inflated and appears unlocked
++      //if (os::is_MP()) {
++      //  sync();
++      //}
++      cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
++      // Intentional fall-through into DONE_LABEL ...
++
++
++      // DONE_LABEL is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE_LABEL);
++
++      // At DONE_LABEL the AT is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // AT == 1 -> Success
++      // AT == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
++
++  guarantee (objReg != boxReg, "") ;
++  guarantee (objReg != tmpReg, "") ;
++  guarantee (boxReg != tmpReg, "") ;
++
++  block_comment("FastUnlock");
++
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld(tmpReg, Address(boxReg, 0)) ;
++      beq(tmpReg, R0, DONE_LABEL) ;
++      move(AT, 0x1);  // delay slot
++
++      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
++      bind(DONE_LABEL);
++    } else {
++      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++
++      ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
++      beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
++      delayed()->daddiu(AT, R0, 0x1);
++
++      ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
++      beq(AT, R0, Stacked) ;                     // Inflated?
++      delayed()->nop();
++
++      bind(Inflated) ;
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread (TREG) ;
++#endif
++
++      // It's inflated
++      ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
++      xorr(boxReg, boxReg, TREG);
++
++      ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
++      orr(boxReg, boxReg, AT);
++
++      move(AT, R0);
++      bne(boxReg, R0, DONE_LABEL);
++      delayed()->nop();
++
++      ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
++      ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
++      orr(boxReg, boxReg, AT);
++
++      move(AT, R0);
++      bne(boxReg, R0, DONE_LABEL);
++      delayed()->nop();
++
++      sync();
++      sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
++      move(AT, 0x1);
++      b(DONE_LABEL);
++      delayed()->nop();
++
++      bind  (Stacked);
++      ld(tmpReg, Address(boxReg, 0)) ;
++      //if (os::is_MP()) { sync(); }
++      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE_LABEL);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++
++//In MIPS64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  daddiu(SP, SP, -16);
++  sd(reg1, SP, 8);
++  sd(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld(reg1, SP, 8);
++  ld(reg2, SP, 0);
++  daddiu(SP, SP, 16);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else
++  ld(dst, src, oopDesc::klass_offset_in_bytes());
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    sw(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    sd(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src) {
++  if(UseCompressedOops){
++    lwu(dst, src);
++    decode_heap_oop(dst);
++  } else {
++    ld(dst, src);
++  }
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src){
++  if(UseCompressedOops){
++    assert(!dst.uses(src), "not enough registers");
++    encode_heap_oop(src);
++    sw(src, dst);
++  } else {
++    sd(src, dst);
++  }
++}
++
++void MacroAssembler::store_heap_oop_null(Address dst){
++  if(UseCompressedOops){
++    sw(R0, dst);
++  } else {
++    sd(R0, dst);
++  }
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  movz(r, S5_heapbase, r);
++  dsubu(r, r, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      movz(dst, S5_heapbase, dst);
++      dsubu(dst, dst, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++    } else {
++      dsubu(dst, src, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++      movz(dst, R0, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(dst, src, S5_heapbase);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(dst, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    move(AT, r);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    daddu(r, r, S5_heapbase);
++    movz(r, R0, AT);
++  }
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      move(AT, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shl(dst, LogMinObjAlignmentInBytes);
++      }
++      daddu(dst, dst, S5_heapbase);
++      movz(dst, R0, AT);
++    } else {
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        dsll(dst, src, LogMinObjAlignmentInBytes);
++        daddu(dst, dst, S5_heapbase);
++      } else {
++        daddu(dst, src, S5_heapbase);
++      }
++      movz(dst, R0, src);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shl(r, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      daddu(r, r, S5_heapbase);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      daddu(dst, dst, S5_heapbase);
++    } else {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      if (Universe::narrow_oop_base() != NULL) {
++        daddu(dst, dst, S5_heapbase);
++      }
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    dsubu(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      set64(dst, (int64_t)Universe::narrow_klass_base());
++      dsubu(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        dsrl(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (Universe::narrow_klass_base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    daddu(r, r, AT);
++    //Not neccessary for MIPS at all.
++    //reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      dsll(AT, src, Address::times_8);
++      daddu(dst, dst, AT);
++    } else {
++      daddu(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     addu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { decrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  addu32(reg, reg, AT);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     subu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { incrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  subu32(reg, reg, AT);
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++SkipIfEqual::SkipIfEqual(
++    MacroAssembler* masm, const bool* flag_addr, bool value) {
++  _masm = masm;
++  _masm->li(AT, (address)flag_addr);
++  _masm->lb(AT, AT, 0);
++  _masm->addiu(AT, AT, -value);
++  _masm->beq(AT, R0, _label);
++  _masm->delayed()->nop();
++}
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  delayed()->nop();
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    delayed()->nop();
++    addiu(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++      delayed()->nop();
++    } else {
++      bne_far(AT, R0, *L_failure);
++      delayed()->nop();
++      b(*L_slow_path);
++      delayed()->nop();
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  }
++
++  bind(L_fallthrough);
++
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  // OpenJDK8 never compresses klass pointers in secondary-super array.
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  delayed()->nop();
++  ld(AT, temp_reg, 0);
++  beq(AT, super_klass, subtype);
++  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
++  b(Loop);
++  delayed()->daddiu(temp2_reg, temp2_reg, -1);
++
++  bind(subtype);
++  sd(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++    delayed()->nop();
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  dsll(scale_reg, scale_reg, scale_factor);
++  daddu(scale_reg, SP, scale_reg);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
++  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  sd(src, dst); break;
++  case  4:  sw(src, dst); break;
++  case  2:  sh(src, dst); break;
++  case  1:  sb(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  dsll(scan_temp, scan_temp, times_vte_scale);
++  daddu(scan_temp, recv_klass, scan_temp);
++  daddiu(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      set64(AT, (int)itable_index.is_constant());
++      dsll(AT, AT, (int)Address::times_ptr);
++    } else {
++      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
++    }
++    daddu(AT, AT, recv_klass);
++    daddiu(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++      delayed()->nop();
++    } else {
++      bne(intf_klass, method_result, search);
++      delayed()->nop();
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    delayed()->nop();
++    daddiu(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    if (UseLEXT1) {
++      gsldx(method_result, recv_klass, scan_temp, 0);
++    } else {
++      daddu(AT, recv_klass, scan_temp);
++      ld(method_result, AT, 0);
++    }
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = GP;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    set64(AT, vtable_index.as_constant());
++    dsll(AT, AT, (int)Address::times_ptr);
++  } else {
++    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  set64(tmp, base + vtableEntry::method_offset_in_bytes());
++  daddu(tmp, tmp, AT);
++  daddu(tmp, tmp, recv_klass);
++  ld(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        sw(src_reg, tmp_reg, disp);
++      } else {
++        st_ptr(src_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_INT:
++      sw(src_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++    case T_SHORT:
++      sh(src_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      sb(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++}
++
++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      sdc1(src_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      swc1(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++}
++
++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      ld_ptr(dst_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_INT:
++      lw(dst_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++      lhu(dst_reg, tmp_reg, disp);
++      break;
++    case T_SHORT:
++      lh(dst_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      lb(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      ldc1(dst_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      lwc1(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  move(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  delayed()->nop();
++  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  delayed()->nop();
++  // Resolve jweak.
++  ld(value, value, -JNIHandles::weak_tag_value);
++  verify_oop(value);
++  #if INCLUDE_ALL_GCS
++    if (UseG1GC) {
++      g1_write_barrier_pre(noreg /* obj */,
++                           value /* pre_val */,
++                           thread /* thread */,
++                           tmp /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    }
++  #endif // INCLUDE_ALL_GCS
++  b(done);
++  delayed()->nop();
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  ld(value, value, 0);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      subu(AT, op1, op2);
++      movz(dst, src, AT);
++      break;
++
++    case NE:
++      subu(AT, op1, op2);
++      movn(dst, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movz(dst, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movz(dst, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case NE:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GT:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GE:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case LT:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case LE:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case NE:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GT:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GE:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case LT:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case LE:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  Label L;
++
++  switch(cmp) {
++    case EQ:
++      bne(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case NE:
++      beq(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
+diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp
+new file mode 100644
+index 0000000000..ab9727793f
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp
+@@ -0,0 +1,701 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "utilities/macros.hpp"
++#include "runtime/rtmLocking.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++#ifdef CC_INTERP
++  // c++ interpreter never wants to use interp_masm version of call_VM
++  #define VIRTUAL
++#else
++  #define VIRTUAL virtual
++#endif
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  void incrementl(Register reg, int value = 1);
++  void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // Stores
++  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
++  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
++
++ void resolve_jobject(Register value, Register thread, Register tmp);
++ void clear_jweak_tag(Register possibly_jweak);
++
++#if INCLUDE_ALL_GCS
++
++  void g1_write_barrier_pre(Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++#endif // INCLUDE_ALL_GCS
++
++  // split store_check(Register obj) to enhance instruction interleaving
++  void store_check_part_1(Register obj);
++  void store_check_part_2(Register obj);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++  //add for compressedoops
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void load_heap_oop(Register dst, Address src);
++  void store_heap_oop(Address dst, Register src);
++  void store_heap_oop_null(Address dst);
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++
++
++
++  // Sign extension
++  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
++  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void print_reg(Register reg);
++  void print_reg(FloatRegister reg);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      sw(RA0, SP, -offset);
++    } else {
++      li(AT, offset);
++      dsubu(AT, SP, AT);
++      sw(RA0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void fast_lock(Register obj, Register box, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register tmp);
++#endif
++
++
++  // Arithmetics
++  // Regular vs. d* versions
++  inline void addu_long(Register rd, Register rs, Register rt) {
++    daddu(rd, rs, rt);
++  }
++  inline void addu_long(Register rd, Register rs, long imm32_64) {
++    daddiu(rd, rs, imm32_64);
++  }
++
++  void round_to(Register reg, int modulus) {
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++
++  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
++  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
++  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // always long jumps
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  void patchable_call(address target);
++  void general_call(address target);
++
++  void patchable_jump(address target);
++  void general_jump(address target);
++
++  static int insts_for_patchable_call(address target);
++  static int insts_for_general_call(address target);
++
++  static int insts_for_patchable_jump(address target);
++  static int insts_for_general_jump(address target);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    sd(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    sd(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  // implement the x86 instruction semantic
++  // if c_reg == *dest then *dest <= x_reg
++  // else c_reg <= *dest
++  // the AT indicate if xchg occurred, 1 for xchged, else  0
++  void cmpxchg(Register x_reg, Address dest, Register c_reg);
++  void cmpxchg32(Register x_reg, Address dest, Register c_reg);
++  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
++
++  //pop & push
++  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
++  void neg(Register reg) { dsubu(reg, R0, reg); }
++  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
++  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  ()                  { daddiu(SP, SP, 8); }
++  void pop2 ()                  { daddiu(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  //move an 32-bit immediate to Register
++  void move(Register reg, int imm32)  { li32(reg, imm32); }
++  void li  (Register rd, long imm);
++  void li  (Register rd, address addr) { li(rd, (long)addr); }
++  //replace move(Register reg, int imm)
++  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
++  void set64(Register d, jlong value);
++  static int  insts_for_set64(jlong value);
++
++  void patchable_set48(Register d, jlong value);
++  void patchable_set32(Register d, jlong value);
++
++  void patchable_call32(Register d, jlong value);
++
++  static int call_size(address target, bool far, bool patchable);
++
++  static bool reachable_from_cache(address target);
++  static bool reachable_from_cache();
++
++
++  void dli(Register rd, long imm) { li(rd, imm); }
++  void li64(Register rd, long imm);
++  void li48(Register rd, long imm);
++
++  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
++  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
++  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
++  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
++  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch) {
++    jint stub_inst = *(jint*) branch;
++    print_instruction(stub_inst);
++    ::tty->print("%s", " (unresolved)");
++
++  }
++#endif
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++
++#undef VIRTUAL
++
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
++
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..92c05fb726
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+diff --git a/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp
+new file mode 100644
+index 0000000000..0c467df2f3
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp
+@@ -0,0 +1,123 @@
++/*
++ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "memory/metaspaceShared.hpp"
++
++// Generate the self-patching vtable method:
++//
++// This method will be called (as any other Klass virtual method) with
++// the Klass itself as the first argument.  Example:
++//
++//      oop obj;
++//      int size = obj->klass()->klass_part()->oop_size(this);
++//
++// for which the virtual method call is Klass::oop_size();
++//
++// The dummy method is called with the Klass object as the first
++// operand, and an object as the second argument.
++//
++
++//=====================================================================
++
++// All of the dummy methods in the vtable are essentially identical,
++// differing only by an ordinal constant, and they bear no releationship
++// to the original method which the caller intended. Also, there needs
++// to be 'vtbl_list_size' instances of the vtable in order to
++// differentiate between the 'vtable_list_size' original Klass objects.
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
++                                                   void** vtable,
++                                                   char** md_top,
++                                                   char* md_end,
++                                                   char** mc_top,
++                                                   char* mc_end) {
++
++  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
++  *(intptr_t *)(*md_top) = vtable_bytes;
++  *md_top += sizeof(intptr_t);
++  void** dummy_vtable = (void**)*md_top;
++  *vtable = dummy_vtable;
++  *md_top += vtable_bytes;
++
++  // Get ready to generate dummy methods.
++
++  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++
++  Label common_code;
++  for (int i = 0; i < vtbl_list_size; ++i) {
++    for (int j = 0; j < num_virtuals; ++j) {
++      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
++
++      // Load V0 with a value indicating vtable/offset pair.
++      // -- bits[ 7..0]  (8 bits) which virtual method in table?
++      // -- bits[12..8]  (5 bits) which virtual method table?
++      // -- must fit in 13-bit instruction immediate field.
++      __ move(V0, (i << 8) + j);
++      __ b(common_code);
++      __ delayed()->nop();
++    }
++  }
++
++  __ bind(common_code);
++
++  __ srl(T9, V0, 8);    // isolate vtable identifier.
++  __ shl(T9, LogBytesPerWord);
++  __ li(AT, (long)vtbl_list);
++  __ addu(T9, AT, T9);
++  __ ld(T9, T9, 0);     // get correct vtable address.
++  __ sd(T9, A0, 0);    // update vtable pointer.
++
++  __ andi(V0, V0, 0x00ff);  // isolate vtable method index
++  __ shl(V0, LogBytesPerWord);
++  __ addu(T9, T9, V0);
++  __ ld(T9, T9, 0);   // address of real method pointer.
++  __ jr(T9);      // get real method pointer.
++  __ delayed()->nop();
++
++  __ flush();
++
++  *mc_top = (char*)__ pc();
++}
+diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp
+new file mode 100644
+index 0000000000..428c271362
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp
+@@ -0,0 +1,576 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, err_msg("%s should be nonzero", xname));
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ move(AT, ref_kind);
++  __ beq(temp, AT, L);
++  __ delayed()->nop();
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++  __ delayed()->nop();
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ delayed()->nop();
++    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T9);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(T9, method, in_bytes(entry_offset));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  // the following assumes that a Method* is normally compressed in the vmtarget field:
++  __ ld(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld(AT, recv_addr);
++    __ beq(recv, AT, L);
++    __ delayed()->nop();
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T9: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t9_argp   = T9;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lbu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions.");
++    __ addiu(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t9_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t9_argp,
++                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t9_first_arg_addr = __ argument_address(t9_argp, -1);
++  } else {
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(s7_mh, t9_first_arg_addr);
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  // t9_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(r_recv = T2, t9_first_arg_addr);
++    }
++    DEBUG_ONLY(t9_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T9;
++  Register temp3 = V0;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ ld(rm_method, member_vmtarget);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ ld(rm_method, member_vmtarget);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ ld(temp2_index, member_vmindex);
++
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ slt(AT, R0, temp2_index);
++        __ bne(AT, R0, L_index_ok);
++        __ delayed()->nop();
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ ld(rm_index, member_vmindex);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ slt(AT, rm_index, R0);
++        __ beq(AT, R0, L);
++        __ delayed()->nop();
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && mh->is_oop()) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp
+new file mode 100644
+index 0000000000..03b65fc8ef
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return I29;
++  }
+diff --git a/hotspot/src/cpu/mips/vm/mips.ad b/hotspot/src/cpu/mips/vm/mips.ad
+new file mode 100644
+index 0000000000..3563bbe0e5
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/mips.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/hotspot/src/cpu/mips/vm/mips_64.ad b/hotspot/src/cpu/mips/vm/mips_64.ad
+new file mode 100644
+index 0000000000..2d714c8be1
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/mips_64.ad
+@@ -0,0 +1,14035 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
++  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
++  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
++  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
++  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
++  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
++  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
++  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
++  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
++
++// Special Registers
++  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
++  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
++  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
++  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T1, T1_H, // inline_cache_reg
++                     V1, V1_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     GP, GP_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T9 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++reg_class v0_reg( V0 );
++reg_class v1_reg( V1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( V0, V0_H );
++reg_class v1_long_reg( V1, V1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F31, F31_H);
++
++reg_class flt_arg0( F12 );
++reg_class dbl_arg0( F12, F12_H );
++reg_class dbl_arg1( F14, F14_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeCall::instruction_size;
++    return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size;
++    return round_to(size, 16);
++  }
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++
++#define __ _masm.
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstructionMIPS64)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstructionMIPS64)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  int offs = offset - br_size + 4;
++  // To be conservative on MIPS
++  // branch node should be end with:
++  //   branch inst
++  //   delay slot
++  const int safety_zone = 3 * BytesPerInstWord;
++  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?  True for Intel but false for most RISCs
++const bool Matcher::clone_shift_expressions = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (MaxVectorSize == 0)
++    return 0;
++  assert(MaxVectorSize == 8, "");
++  return 8;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// MIPS supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F12_num || reg == F12_H_num
++       || reg == F13_num || reg == F13_H_num
++       || reg == F14_num || reg == F14_H_num
++       || reg == F15_num || reg == F15_H_num
++       || reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// MIPS doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Threshold size for cleararray.
++const int Matcher::init_array_short_size = 8 * BytesPerLong;
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// Since MIPS doesn't have absolute addressing, it needs.
++bool SafePointNode::needs_polling_address_input() {
++  return false;
++}
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  //lui
++  //ori
++  //nop
++  //nop
++  //jalr
++  //nop
++  return 24;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  //lui IC_Klass,
++  //ori IC_Klass,
++  //dsll IC_Klass
++  //ori IC_Klass
++
++  //lui T9
++  //ori T9
++  //nop
++  //nop
++  //jalr T9
++  //nop
++  return 4 * 4 + 4 * 6;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++  int size = 0;
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(AT, Address(SP, src_offset));
++          __ sd(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++              st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                        "sd    AT, [SP + #%d]",
++                        src_offset, dst_offset);
++          }
++#endif
++        }
++        size += 8;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lw(AT, Address(SP, src_offset));
++          __ sw(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++              st->print("lw    AT, [SP + #%d] spill 2\n\t"
++                        "sw    AT, [SP + #%d]\n\t",
++                        src_offset, dst_offset);
++          }
++#endif
++        }
++        size += 8;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++              st->print("ld    %s, [SP + #%d]\t# spill 3",
++                        Matcher::regName[dst_first],
++                        offset);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else
++            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++          } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              if (this->ideal_reg() == Op_RegI)
++                st->print("lw    %s, [SP + #%d]\t# spill 4",
++                          Matcher::regName[dst_first],
++                          offset);
++              else
++                st->print("lwu    %s, [SP + #%d]\t# spill 5",
++                          Matcher::regName[dst_first],
++                          offset);
++            }
++#endif
++          }
++          size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if (!do_size) {
++            if (size != 0) st->print("\n\t");
++            st->print("ldc1  %s, [SP + #%d]\t# spill 6",
++                      Matcher::regName[dst_first],
++                      offset);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("lwc1   %s, [SP + #%d]\t# spill 7",
++                      Matcher::regName[dst_first],
++                      offset);
++            }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("sd    %s, [SP + #%d] # spill 8",
++                      Matcher::regName[src_first],
++                      offset);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if (!do_size) {
++            if (size != 0) st->print("\n\t");
++            st->print("sw    %s, [SP + #%d]\t# spill 9",
++                      Matcher::regName[src_first], offset);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("move(64bit)    %s <-- %s\t# spill 10",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++        return size;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          if (!do_size) {
++            if (size != 0) st->print("\n\t");
++            st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++        return size;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("dmtc1   %s, %s\t# spill 12",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mtc1   %s, %s\t# spill 13",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("sdc1   %s, [SP + #%d]\t# spill 14",
++                      Matcher::regName[src_first],
++                      offset);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("swc1   %s, [SP + #%d]\t# spill 15",
++                      Matcher::regName[src_first],
++                      offset);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("dmfc1   %s, %s\t# spill 16",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++      if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mfc1   %s, %s\t# spill 17",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mov_d  %s <-- %s\t# spill 18",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mov_s  %s <-- %s\t# spill 19",
++                      Matcher::regName[dst_first],
++                      Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return size;
++
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  if (UseLEXT1) {
++    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
++  } else {
++    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++    st->print("\t");
++    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    st->print_cr("Poll Safepoint # MachEpilogNode");
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  if (UseLEXT1) {
++    __ gslq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ ld(RA, SP, framesize - wordSize );
++    __ ld(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(SP, SP, framesize);
++
++  if( do_polling() && C->is_method_compilation() ) {
++    __ set64(AT, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_return_type);
++    __ lw(AT, AT, 0);
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  return 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  __ addiu(as_Register(reg), SP, offset);
++}
++
++
++//static int sizeof_FFree_Float_Stack_All = -1;
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  //lui
++  //ori
++  //dsll
++  //ori
++  //jalr
++  //nop
++  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  return NativeCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T9, T0)");
++  st->print_cr("\tbeq(T9, iCache, L)");
++  st->print_cr("\tnop");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("\tnop");
++  st->print_cr("\tnop");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T9, receiver);
++  __ beq(T9, iCache, L);
++  __ delayed()->nop();
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = __ code()->consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    // Materialize the constant table base.
++    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
++    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
++    __ relocate(relocInfo::internal_word_type);
++    __ patchable_set48(Rtoc, (long)baseaddr);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // patchable_set48 (4 insts)
++  return 4 * 4;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  if (UseLEXT1) {
++    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  } else {
++    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  }
++  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
++  st->print("daddiu   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ daddiu(SP, SP, -framesize);
++  if (UseLEXT1) {
++    __ gssq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ sd(RA, SP, framesize - wordSize);
++    __ sd(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(FP, SP, framesize - wordSize * 2);
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  //Load byte signed
++  enc_class load_B_enc (mRegI dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if( Assembler::is_simm16(disp) ) {
++        if (UseLEXT1) {
++          if (scale == 0) {
++            __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gslbx(as_Register(dst), as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ lb(as_Register(dst), AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslbx(as_Register(dst), AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ lb(as_Register(dst), AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lb(as_Register(dst), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslbx(as_Register(dst), as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ lb(as_Register(dst), AT, 0);
++        }
++      }
++    }
++  %}
++
++  //Load byte unsigned
++  enc_class load_UB_enc (mRegI dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ lbu(as_Register(dst), AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, AT, T9);
++        __ lbu(as_Register(dst), AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lbu(as_Register(dst), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ lbu(as_Register(dst), AT, 0);
++      }
++    }
++  %}
++
++  enc_class store_B_reg_enc (memory mem, mRegI src) %{
++    MacroAssembler _masm(&cbuf);
++    int  src = $src$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        if( Assembler::is_simm(disp, 8) ) {
++          if (UseLEXT1) {
++            __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp);
++          } else {
++            __ addu(AT, as_Register(base), as_Register(index));
++            __ sb(as_Register(src), AT, disp);
++          }
++        } else if( Assembler::is_simm16(disp) ) {
++          __ addu(AT, as_Register(base), as_Register(index));
++          __ sb(as_Register(src), AT, disp);
++        } else {
++          __ addu(AT, as_Register(base), as_Register(index));
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ gssbx(as_Register(src), AT, T9, 0);
++          } else {
++            __ addu(AT, AT, T9);
++            __ sb(as_Register(src), AT, 0);
++          }
++        }
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        if( Assembler::is_simm(disp, 8) ) {
++          if (UseLEXT1) {
++            __ gssbx(as_Register(src), AT, as_Register(base), disp);
++          } else {
++            __ addu(AT, as_Register(base), AT);
++            __ sb(as_Register(src), AT, disp);
++          }
++        } else if( Assembler::is_simm16(disp) ) {
++          __ addu(AT, as_Register(base), AT);
++          __ sb(as_Register(src), AT, disp);
++        } else {
++          __ addu(AT, as_Register(base), AT);
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ gssbx(as_Register(src), AT, T9, 0);
++          } else {
++            __ addu(AT, AT, T9);
++            __ sb(as_Register(src), AT, 0);
++          }
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sb(as_Register(src), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gssbx(as_Register(src), as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ sb(as_Register(src), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_B_immI_enc (memory mem, immI8 src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    int value = $src$$constant;
++
++    if( index != 0 ) {
++      if (!UseLEXT1) {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          if (value == 0) {
++            __ sb(R0, AT, disp);
++          } else {
++            __ move(T9, value);
++            __ sb(T9, AT, disp);
++          }
++        } else {
++          if (value == 0) {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ sb(R0, AT, 0);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ move(T9, value);
++            __ sb(T9, AT, 0);
++          }
++        }
++      } else {
++
++        if (scale == 0) {
++          if( Assembler::is_simm(disp, 8) ) {
++            if (value == 0) {
++              __ gssbx(R0, as_Register(base), as_Register(index), disp);
++            } else {
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), as_Register(index), disp);
++            }
++          } else if( Assembler::is_simm16(disp) ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++            if (value == 0) {
++              __ sb(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sb(T9, AT, disp);
++            }
++          } else {
++            if (value == 0) {
++              __ daddu(AT, as_Register(base), as_Register(index));
++              __ move(T9, disp);
++              __ gssbx(R0, AT, T9, 0);
++            } else {
++              __ move(AT, disp);
++              __ move(T9, value);
++              __ daddu(AT, as_Register(base), AT);
++              __ gssbx(T9, AT, as_Register(index), 0);
++            }
++          }
++
++        } else {
++
++          if( Assembler::is_simm(disp, 8) ) {
++            __ dsll(AT, as_Register(index), scale);
++            if (value == 0) {
++              __ gssbx(R0, as_Register(base), AT, disp);
++            } else {
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), AT, disp);
++            }
++          } else if( Assembler::is_simm16(disp) ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++            if (value == 0) {
++              __ sb(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sb(T9, AT, disp);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            if (value == 0) {
++              __ daddu(AT, as_Register(base), AT);
++              __ move(T9, disp);
++              __ gssbx(R0, AT, T9, 0);
++            } else {
++              __ move(T9, disp);
++              __ daddu(AT, AT, T9);
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), AT, 0);
++            }
++          }
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        if (value == 0) {
++          __ sb(R0, as_Register(base), disp);
++        } else {
++          __ move(AT, value);
++          __ sb(AT, as_Register(base), disp);
++        }
++      } else {
++        if (value == 0) {
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ gssbx(R0, as_Register(base), T9, 0);
++          } else {
++            __ daddu(AT, as_Register(base), T9);
++            __ sb(R0, AT, 0);
++          }
++        } else {
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ move(AT, value);
++            __ gssbx(AT, as_Register(base), T9, 0);
++          } else {
++            __ daddu(AT, as_Register(base), T9);
++            __ move(T9, value);
++            __ sb(T9, AT, 0);
++          }
++        }
++      }
++    }
++  %}
++
++
++  enc_class store_B_immI_enc_sync (memory mem, immI8 src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    int value = $src$$constant;
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp,8) ) {
++          if ( scale == 0 ) {
++            if ( value == 0 ) {
++              __ gssbx(R0, as_Register(base), as_Register(index), disp);
++            } else {
++              __ move(AT, value);
++              __ gssbx(AT, as_Register(base), as_Register(index), disp);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            if ( value == 0 ) {
++              __ gssbx(R0, as_Register(base), AT, disp);
++            } else {
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), AT, disp);
++            }
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if ( scale == 0 ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++            if ( value == 0 ){
++              __ sb(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sb(T9, AT, disp);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++            if ( value == 0 ) {
++              __ sb(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sb(T9, AT, disp);
++            }
++          }
++        } else {
++          if ( scale == 0 ) {
++            __ move(AT, disp);
++            __ daddu(AT, as_Register(index), AT);
++            if ( value == 0 ) {
++              __ gssbx(R0, as_Register(base), AT, 0);
++            } else {
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), AT, 0);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            if ( value == 0 ) {
++              __ gssbx(R0, as_Register(base), AT, 0);
++            } else {
++              __ move(T9, value);
++              __ gssbx(T9, as_Register(base), AT, 0);
++            }
++          }
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          if (value == 0) {
++            __ sb(R0, AT, disp);
++          } else {
++            __ move(T9, value);
++            __ sb(T9, AT, disp);
++          }
++        } else {
++          if (value == 0) {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ sb(R0, AT, 0);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ move(T9, value);
++            __ sb(T9, AT, 0);
++          }
++        }
++      }
++    } else {
++      if (UseLEXT1){
++        if ( Assembler::is_simm16(disp) ){
++          if ( value == 0 ) {
++            __ sb(R0, as_Register(base), disp);
++          } else {
++            __ move(AT, value);
++            __ sb(AT, as_Register(base), disp);
++          }
++        } else {
++          __ move(AT, disp);
++          if ( value == 0 ) {
++            __ gssbx(R0, as_Register(base), AT, 0);
++          } else {
++            __ move(T9, value);
++            __ gssbx(T9, as_Register(base), AT, 0);
++          }
++        }
++      } else {
++        if( Assembler::is_simm16(disp) ) {
++          if (value == 0) {
++            __ sb(R0, as_Register(base), disp);
++          } else {
++            __ move(AT, value);
++            __ sb(AT, as_Register(base), disp);
++          }
++        } else {
++          if (value == 0) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(base), T9);
++            __ sb(R0, AT, 0);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(base), T9);
++            __ move(T9, value);
++            __ sb(T9, AT, 0);
++          }
++        }
++      }
++    }
++
++    __ sync();
++  %}
++
++  // Load Short (16bit signed)
++  enc_class load_S_enc (mRegI dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gslhx(as_Register(dst), as_Register(base), AT, disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if (scale == 0) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++            __ lh(as_Register(dst), AT, disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++            __ lh(as_Register(dst), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ move(AT, disp);
++            __ daddu(AT, as_Register(index), AT);
++            __ gslhx(as_Register(dst), as_Register(base), AT, 0);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ gslhx(as_Register(dst), as_Register(base), AT, 0);
++          }
++        }
++      } else { // not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ lh(as_Register(dst), AT, disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ lh(as_Register(dst), AT, 0);
++        }
++      }
++    } else { // index is 0
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ) {
++          __ lh(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ gslhx(as_Register(dst), as_Register(base), T9, 0);
++        }
++      } else { //not use loongson isa
++        if( Assembler::is_simm16(disp) ) {
++          __ lh(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ lh(as_Register(dst), AT, 0);
++        }
++      }
++    }
++  %}
++
++  // Load Char (16bit unsigned)
++  enc_class load_C_enc (mRegI dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ lhu(as_Register(dst), AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ addu(AT, AT, T9);
++        __ lhu(as_Register(dst), AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lhu(as_Register(dst), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ lhu(as_Register(dst), AT, 0);
++      }
++    }
++  %}
++
++  // Store Char (16bit unsigned)
++  enc_class store_C_reg_enc (memory mem, mRegI src) %{
++    MacroAssembler _masm(&cbuf);
++    int  src = $src$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsshx(as_Register(src), as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ sh(as_Register(src), AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsshx(as_Register(src), AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ sh(as_Register(src), AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sh(as_Register(src), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsshx(as_Register(src), as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ sh(as_Register(src), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_C0_enc (memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if ( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gsshx(R0, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsshx(R0, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ sh(R0, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsshx(R0, AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ sh(R0, AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sh(R0, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsshx(R0, as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ sh(R0, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class load_I_enc (mRegI dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gslwx(as_Register(dst), as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ lw(as_Register(dst), AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslwx(as_Register(dst), AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ lw(as_Register(dst), AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lw(as_Register(dst), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslwx(as_Register(dst), as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ lw(as_Register(dst), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_I_reg_enc (memory mem, mRegI src) %{
++    MacroAssembler _masm(&cbuf);
++    int  src = $src$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsswx(as_Register(src), as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ sw(as_Register(src), AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsswx(as_Register(src), AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ sw(as_Register(src), AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sw(as_Register(src), as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsswx(as_Register(src), as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ sw(as_Register(src), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_I_immI_enc (memory mem, immI src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    int value = $src$$constant;
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale == 0 ) {
++            if ( value == 0 ) {
++              __ gsswx(R0, as_Register(base), as_Register(index), disp);
++            } else {
++              __ move(T9, value);
++              __ gsswx(T9, as_Register(base), as_Register(index), disp);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            if ( value == 0 ) {
++              __ gsswx(R0, as_Register(base), AT, disp);
++            } else {
++              __ move(T9, value);
++              __ gsswx(T9, as_Register(base), AT, disp);
++            }
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if ( scale == 0 ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++            if ( value == 0 ) {
++              __ sw(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sw(T9, AT, disp);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++            if ( value == 0 ) {
++              __ sw(R0, AT, disp);
++            } else {
++              __ move(T9, value);
++              __ sw(T9, AT, disp);
++            }
++          }
++        } else {
++          if ( scale == 0 ) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++            if ( value ==0 ) {
++              __ gsswx(R0, as_Register(base), AT, 0);
++            } else {
++              __ move(T9, value);
++              __ gsswx(T9, as_Register(base), AT, 0);
++            }
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            if ( value == 0 ) {
++              __ gsswx(R0, as_Register(base), AT, 0);
++            } else {
++              __ move(T9, value);
++              __ gsswx(T9, as_Register(base), AT, 0);
++            }
++          }
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          if (value == 0) {
++            __ sw(R0, AT, disp);
++          } else {
++            __ move(T9, value);
++            __ sw(T9, AT, disp);
++          }
++        } else {
++          if (value == 0) {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ sw(R0, AT, 0);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++            __ move(T9, value);
++            __ sw(T9, AT, 0);
++          }
++        }
++      }
++    } else {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ) {
++          if ( value == 0 ) {
++            __ sw(R0, as_Register(base), disp);
++          } else {
++            __ move(AT, value);
++            __ sw(AT, as_Register(base), disp);
++          }
++        } else {
++          __ move(T9, disp);
++          if ( value == 0 ) {
++            __ gsswx(R0, as_Register(base), T9, 0);
++          } else {
++            __ move(AT, value);
++            __ gsswx(AT, as_Register(base), T9, 0);
++          }
++        }
++      } else {
++        if( Assembler::is_simm16(disp) ) {
++          if (value == 0) {
++            __ sw(R0, as_Register(base), disp);
++          } else {
++            __ move(AT, value);
++            __ sw(AT, as_Register(base), disp);
++          }
++        } else {
++          if (value == 0) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(base), T9);
++            __ sw(R0, AT, 0);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(base), T9);
++            __ move(T9, value);
++            __ sw(T9, AT, 0);
++          }
++        }
++      }
++    }
++  %}
++
++  enc_class load_N_enc (mRegN dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ lwu(as_Register(dst), AT, disp);
++      } else {
++        __ set64(T9, disp);
++        __ daddu(AT, AT, T9);
++        __ lwu(as_Register(dst), AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lwu(as_Register(dst), as_Register(base), disp);
++      } else {
++        __ set64(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ lwu(as_Register(dst), AT, 0);
++      }
++    }
++  %}
++
++
++  enc_class load_P_enc (mRegP dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsldx(as_Register(dst), as_Register(base), AT, disp);
++          } else {
++            __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ){
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, AT, as_Register(base));
++          } else {
++            __ daddu(AT, as_Register(index), as_Register(base));
++          }
++          __ ld(as_Register(dst), AT, disp);
++        } else {
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++          }
++          __ gsldx(as_Register(dst), as_Register(base), AT, 0);
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ ld(as_Register(dst), AT, disp);
++        } else {
++          __ set64(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ ld(as_Register(dst), AT, 0);
++        }
++      }
++    } else {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ){
++          __ ld(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ set64(T9, disp);
++          __ gsldx(as_Register(dst), as_Register(base), T9, 0);
++        }
++      } else { //not use loongson isa
++        if( Assembler::is_simm16(disp) ) {
++          __ ld(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ set64(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ ld(as_Register(dst), AT, 0);
++        }
++      }
++    }
++  %}
++
++  // Load acquire.
++  // load_P_enc + sync
++  enc_class load_P_enc_ac (mRegP dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  dst = $dst$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsldx(as_Register(dst), as_Register(base), AT, disp);
++          } else {
++            __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ){
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, AT, as_Register(base));
++          } else {
++            __ daddu(AT, as_Register(index), as_Register(base));
++          }
++          __ ld(as_Register(dst), AT, disp);
++        } else {
++          if ( scale != 0 ) {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++          } else {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++          }
++          __ gsldx(as_Register(dst), as_Register(base), AT, 0);
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ ld(as_Register(dst), AT, disp);
++        } else {
++          __ set64(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ ld(as_Register(dst), AT, 0);
++        }
++      }
++    } else {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ){
++          __ ld(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ set64(T9, disp);
++          __ gsldx(as_Register(dst), as_Register(base), T9, 0);
++        }
++      } else { //not use loongson isa
++        if( Assembler::is_simm16(disp) ) {
++          __ ld(as_Register(dst), as_Register(base), disp);
++        } else {
++          __ set64(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ ld(as_Register(dst), AT, 0);
++        }
++      }
++    }
++    __ sync();
++  %}
++
++  enc_class store_P_reg_enc (memory mem, mRegP src) %{
++    MacroAssembler _masm(&cbuf);
++    int  src = $src$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (UseLEXT1){
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale == 0 ) {
++            __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gssdx(as_Register(src), as_Register(base), AT, disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if ( scale == 0 ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ sd(as_Register(src), AT, disp);
++        } else {
++          if ( scale == 0 ) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++          }
++          __ gssdx(as_Register(src), as_Register(base), AT, 0);
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ sd(as_Register(src), AT, disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ sd(as_Register(src), AT, 0);
++        }
++      }
++    } else {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ) {
++          __ sd(as_Register(src), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ gssdx(as_Register(src), as_Register(base), T9, 0);
++        }
++      } else {
++        if( Assembler::is_simm16(disp) ) {
++          __ sd(as_Register(src), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ sd(as_Register(src), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_N_reg_enc (memory mem, mRegN src) %{
++    MacroAssembler _masm(&cbuf);
++    int  src = $src$$reg;
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (UseLEXT1){
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale == 0 ) {
++            __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsswx(as_Register(src), as_Register(base), AT, disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if ( scale == 0 ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ sw(as_Register(src), AT, disp);
++        } else {
++          if ( scale == 0 ) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ move(T9, disp);
++            __ daddu(AT, AT, T9);
++          }
++          __ gsswx(as_Register(src), as_Register(base), AT, 0);
++        }
++      } else { //not use loongson isa
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ sw(as_Register(src), AT, disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ sw(as_Register(src), AT, 0);
++        }
++      }
++    } else {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ) {
++          __ sw(as_Register(src), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ gsswx(as_Register(src), as_Register(base), T9, 0);
++        }
++      } else {
++        if( Assembler::is_simm16(disp) ) {
++          __ sw(as_Register(src), as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ sw(as_Register(src), AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_P_immP0_enc (memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        if ( Assembler::is_simm16(disp) ) {
++          if (UseLEXT1 && Assembler::is_simm(disp, 8)) {
++            __ gssdx(R0, as_Register(base), as_Register(index), disp);
++          } else {
++            __ daddu(AT, as_Register(base), as_Register(index));
++            __ sd(R0, AT, disp);
++          }
++        } else {
++          __ daddu(AT, as_Register(base), as_Register(index));
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ gssdx(R0, AT, T9, 0);
++          } else {
++            __ daddu(AT, AT, T9);
++            __ sd(R0, AT, 0);
++          }
++        }
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        if( Assembler::is_simm16(disp) ) {
++          if (UseLEXT1 && Assembler::is_simm(disp, 8)) {
++            __ gssdx(R0, as_Register(base), AT, disp);
++          } else {
++            __ daddu(AT, as_Register(base), AT);
++            __ sd(R0, AT, disp);
++          }
++        } else {
++          __ daddu(AT, as_Register(base), AT);
++          __ move(T9, disp);
++          if (UseLEXT1) {
++            __ gssdx(R0, AT, T9, 0);
++          } else {
++            __ daddu(AT, AT, T9);
++            __ sd(R0, AT, 0);
++          }
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sd(R0, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gssdx(R0, as_Register(base), T9, 0);
++        } else {
++          __ daddu(AT, as_Register(base), T9);
++          __ sd(R0, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class storeImmN0_enc(memory mem, ImmN0 src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if(index!=0){
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++
++      if( Assembler::is_simm16(disp) ) {
++        __ sw(R0, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, AT, T9);
++        __ sw(R0, AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sw(R0, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ sw(R0, AT, 0);
++      }
++    }
++  %}
++
++  enc_class load_L_enc (mRegL dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    Register  dst_reg = as_Register($dst$$reg);
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ ld(dst_reg, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, AT, T9);
++        __ ld(dst_reg, AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ ld(dst_reg, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ ld(dst_reg, AT, 0);
++      }
++    }
++  %}
++
++  enc_class store_L_reg_enc (memory mem, mRegL src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    Register  src_reg = as_Register($src$$reg);
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ sd(src_reg, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, AT, T9);
++        __ sd(src_reg, AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sd(src_reg, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ daddu(AT, as_Register(base), T9);
++        __ sd(src_reg, AT, 0);
++      }
++    }
++  %}
++
++  enc_class store_L_immL_0_enc (memory mem, immL_0 src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ sd(R0, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ addu(AT, AT, T9);
++        __ sd(R0, AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ sd(R0, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ addu(AT, as_Register(base), T9);
++        __ sd(R0, AT, 0);
++      }
++    }
++  %}
++
++  enc_class store_L_immL_enc (memory mem, immL src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    long  imm = $src$$constant;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++      if( Assembler::is_simm16(disp) ) {
++        __ set64(T9, imm);
++        __ sd(T9, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ addu(AT, AT, T9);
++        __ set64(T9, imm);
++        __ sd(T9, AT, 0);
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ move(AT, as_Register(base));
++        __ set64(T9, imm);
++        __ sd(T9, AT, disp);
++      } else {
++        __ move(T9, disp);
++        __ addu(AT, as_Register(base), T9);
++        __ set64(T9, imm);
++        __ sd(T9, AT, 0);
++      }
++    }
++  %}
++
++  enc_class load_F_enc (regF dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    if( index != 0 ) {
++      if( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gslwxc1(dst, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gslwxc1(dst, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ lwc1(dst, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslwxc1(dst, AT, T9, 0);
++        } else {
++          __ daddu(AT, AT, T9);
++          __ lwc1(dst, AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ lwc1(dst, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslwxc1(dst, as_Register(base), T9, 0);
++        } else {
++          __ daddu(AT, as_Register(base), T9);
++          __ lwc1(dst, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_F_reg_enc (memory mem, regF src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    FloatRegister src = $src$$FloatRegister;
++
++    if( index != 0 ) {
++      if ( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gsswxc1(src, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsswxc1(src, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ swc1(src, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsswxc1(src, AT, T9, 0);
++        } else {
++          __ daddu(AT, AT, T9);
++          __ swc1(src, AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ swc1(src, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsswxc1(src, as_Register(base), T9, 0);
++        } else {
++          __ daddu(AT, as_Register(base), T9);
++          __ swc1(src, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class load_D_enc (regD dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
++
++    if ( index != 0 ) {
++      if ( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gsldxc1(dst_reg, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ ldc1(dst_reg, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsldxc1(dst_reg, AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ ldc1(dst_reg, AT, 0);
++        }
++      }
++    } else {
++      if( Assembler::is_simm16(disp) ) {
++        __ ldc1(dst_reg, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gsldxc1(dst_reg, as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ ldc1(dst_reg, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class store_D_reg_enc (memory mem, regD src) %{
++    MacroAssembler _masm(&cbuf);
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++    FloatRegister src_reg = as_FloatRegister($src$$reg);
++
++    if ( index != 0 ) {
++      if ( Assembler::is_simm16(disp) ) {
++        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++          if (scale == 0) {
++            __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gssdxc1(src_reg, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), AT);
++          }
++          __ sdc1(src_reg, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gssdxc1(src_reg, AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ sdc1(src_reg, AT, 0);
++        }
++      }
++    } else {
++      if ( Assembler::is_simm16(disp) ) {
++        __ sdc1(src_reg, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gssdxc1(src_reg, as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ sdc1(src_reg, AT, 0);
++        }
++      }
++    }
++  %}
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else if(_optimized_virtual) {
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf);
++    } else {
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf);
++    }
++
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++
++    if( _method ) {  // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method);
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T9;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++    __ delayed()->nop();
++
++    __ bind(miss);
++    __ move(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------MIPS FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++  // TODO: should not match immI8 here LEE
++  match(immI8);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() %{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constants for increment
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M32767_32768() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_32767() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_65535() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 8-bit
++operand immL8() %{
++  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32() %{
++  predicate(n->get_long() == (int)(n->get_long()));
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M1() %{
++  predicate(n->get_long() == -1L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M32767_32768() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_65535() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS2RegI() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S2" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT9RegI() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T9" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mV0RegI() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand mV1RegI() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V1" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegN() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegN() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegN() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegP()
++%{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegP()
++%{
++  constraint(ALLOC_IN_RC(s2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++/*
++operand mSPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(sp_reg));
++  match(reg);
++
++  format %{ "SP"  %}
++  interface(REG_INTER);
++%}
++
++operand mFPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(fp_reg));
++  match(reg);
++
++  format %{ "FP"  %}
++  interface(REG_INTER);
++%}
++*/
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2RegL() %{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2RegL() %{
++  constraint(ALLOC_IN_RC(s2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8(mRegP reg, immL8 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++
++// [base + index + offset]
++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base index) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index + offset]
++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base (ConvI2L index)) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index<<scale + offset]
++operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  op_cost(10);
++  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
++
++  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//FIXME: I think it's better to limit the immI to be 16-bit at most!
++// Indirect Memory Plus Long Offset Operand
++operand indOffset32(mRegP reg, immL32 off) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(20);
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);   /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indirectNarrowKlass(mRegN reg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeNKlass reg);
++
++  format %{ "[$reg] @ indirectNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indOffset8NarrowKlass(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset32NarrowKlass(mRegN reg, immL32 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
++%{
++  predicate(UseLEXT1);
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeNKlass reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (DecodeNKlass reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8Narrow(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
++%{
++  predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
++
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    branch_has_delay_slot;      // branch have delay slot in gs2
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI" %}
++  ins_encode (load_I_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode (load_I_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode(load_B_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++  ins_encode(load_UB_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode(load_S_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode(load_C_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld    $dst, $mem   #@loadL" %}
++  ins_encode(load_L_enc(dst, mem));
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode(load_L_enc(dst, mem));
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
++  ins_encode(store_L_reg_enc(mem, src));
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "sd    zero, $mem #@storeL_immL_0" %}
++  ins_encode(store_L_immL_0_enc(mem, zero));
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_imm(memory mem, immL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $src, $mem #@storeL_imm" %}
++  ins_encode(store_L_immL_enc(mem, src));
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
++   ins_encode (load_N_enc(dst, mem));
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
++   ins_encode (load_N_enc(dst, mem));
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld    $dst, $mem #@loadP" %}
++  ins_encode (load_P_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode (load_P_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode (load_N_enc(dst, mem));
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode (load_N_enc(dst, mem));
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ move(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL_set64(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL_set64" %}
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL16(mRegL dst, immL16 src) %{
++  match(Set dst src);
++  ins_cost(105);
++  format %{ "mov    $dst, $src #@loadConL16" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    int      value   = $src$$constant;
++    __ daddiu(dst_reg, R0, value);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
++  match(Set dst src);
++  ins_cost(100);
++  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode(load_I_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "sd    $src, $mem #@storeP" %}
++  ins_encode(store_P_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
++  ins_encode(store_P_immP0_enc(mem));
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte Immediate
++instruct storeImmB(memory mem, immI8 src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(150);
++  format %{ "movb   $mem, $src #@storeImmB" %}
++  ins_encode(store_B_immI_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode(store_N_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2N" %}
++  ins_encode(store_N_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode(store_N_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode(store_N_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode(storeImmN0_enc(mem, zero));
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB" %}
++  ins_encode(store_B_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB_convL2I" %}
++  ins_encode(store_B_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB" %}
++  ins_encode(load_B_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
++  ins_encode(load_B_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB" %}
++  ins_encode(load_UB_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode(load_UB_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS" %}
++  ins_encode(load_S_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode(load_B_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
++  ins_encode(load_S_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeImmI(memory mem, immI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(150);
++  format %{ "mov    $mem, $src #@storeImmI" %}
++  ins_encode(store_I_immI_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI" %}
++  ins_encode(store_I_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI_convL2I" %}
++  ins_encode(store_I_reg_enc(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode(load_F_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ set64(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ set64(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in MIPS");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_mips.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T9 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T9
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, R0, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, R0);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, R0);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, R0, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x05: // less
++        __ beq_long(R0, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++    __ delayed()->nop();
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bne($cr$$Register, R0, L);
++        else
++          __ bne($cr$$Register, R0, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beq($cr$$Register, R0, L);
++        else
++          __ beq($cr$$Register, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++                 __ beq(AT, R0, L);
++        else
++                 __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1, op2, L);
++      else
++        __ beq(op1, op2, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1, op2, L);
++      else
++        __ bne(op1, op2, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++                __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(AT, R0, L);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //greater
++        if(&L)
++               __ bgtz(op1, L);
++        else
++               __ bgtz(op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if(&L)
++               __ bgez(op1, L);
++        else
++               __ bgez(op1, (int)0);
++        break;
++      case 0x05: //less
++        if(&L)
++                __ bltz(op1, L);
++        else
++                __ bltz(op1, (int)0);
++        break;
++      case 0x06: //less_equal
++        if(&L)
++               __ blez(op1, L);
++        else
++               __ blez(op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //above
++        if(&L)
++          __ bne(R0, op1, L);
++        else
++          __ bne(R0, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if(&L)
++          __ beq(R0, R0, L);
++        else
++          __ beq(R0, R0, (int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if(&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x03: //greater
++        if(&target)
++           __ bgtz(opr1_reg, target);
++        else
++           __ bgtz(opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if(&target)
++           __ bgez(opr1_reg, target);
++        else
++           __ bgez(opr1_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, R0);
++        if(&target)
++           __ bne(AT, R0, target);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ blez(opr1_reg, target);
++        else
++           __ blez(opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        if(&target)
++           __ beq(R0, R0, target);
++        else
++           __ beq(R0, R0, (int)0);
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(16);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ sync();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ mfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ mtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ dmfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ dmtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    Label Done;
++
++    __ subu(AT, opr1, opr2);
++    __ bltz(AT, Done);
++    __ delayed()->daddiu(dst, R0, -1);
++
++    __ move(dst, 1);
++    __ movz(dst, R0, AT);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    Label Done;
++
++    __ c_ult_s(src1, src2);
++    __ bc1t(Done);
++    __ delayed()->daddiu(dst, R0, -1);
++
++    __ c_eq_s(src1, src2);
++    __ move(dst, 1);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    Label Done;
++
++    __ c_ult_d(src1, src2);
++    __ bc1t(Done);
++    __ delayed()->daddiu(dst, R0, -1);
++
++    __ c_eq_d(src1, src2);
++    __ move(dst, 1);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(num, R0, done);
++    __ delayed()->daddu(AT, base, R0);
++
++    __ move(T9, num);  /* T9 = words */
++
++    __ bind(Loop);
++    __ sd(R0, AT, 0);
++    __ daddiu(T9, T9, -1);
++    __ bne(T9, R0, Loop);
++    __ delayed()->daddiu(AT, AT, wordSize);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %}
++  ins_encode %{
++    // Get the first character position in both strings
++    //         [8] char array, [12] offset, [16] count
++    Register str1   = $str1$$Register;
++    Register str2   = $str2$$Register;
++    Register cnt1   = $cnt1$$Register;
++    Register cnt2   = $cnt2$$Register;
++    Register result = $result$$Register;
++
++    Label L, Loop, haveResult, done;
++
++   // compute the and difference of lengths (in result)
++   __ subu(result, cnt1, cnt2); // result holds the difference of two lengths
++
++   // compute the shorter length (in cnt1)
++   __ slt(AT, cnt2, cnt1);
++   __ movn(cnt1, cnt2, AT);
++
++   // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++   __ bind(Loop);                        // Loop begin
++   __ beq(cnt1, R0, done);
++   __ delayed()->lhu(AT, str1, 0);;
++
++   // compare current character
++   __ lhu(cnt2, str2, 0);
++   __ bne(AT, cnt2, haveResult);
++   __ delayed()->addiu(str1, str1, 2);
++   __ addiu(str2, str2, 2);
++   __ b(Loop);
++   __ delayed()->addiu(cnt1, cnt1, -1);   // Loop end
++
++   __ bind(haveResult);
++   __ subu(result, AT, cnt2);
++
++   __ bind(done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    // Get the first character position in both strings
++    //         [8] char array, [12] offset, [16] count
++    Register str1   = $str1$$Register;
++    Register str2   = $str2$$Register;
++    Register cnt    = $cnt$$Register;
++    Register tmp    = $temp$$Register;
++    Register result = $result$$Register;
++
++    Label Loop, True, False;
++
++    __ beq(str1, str2, True);  // same char[] ?
++    __ delayed()->daddiu(result, R0, 1);
++
++    __ beq(cnt, R0, True);
++    __ delayed()->nop(); // count == 0
++
++    __ bind(Loop);
++
++    // compare current character
++    __ lhu(AT, str1, 0);
++    __ lhu(tmp, str2, 0);
++    __ bne(AT, tmp, False);
++    __ delayed()->addiu(str1, str1, 2);
++    __ addiu(cnt, cnt, -1);
++    __ bne(cnt, R0, Loop);
++    __ delayed()->addiu(str2, str2, 2);
++
++    __ b(True);
++    __ delayed()->nop();
++
++    __ bind(False);
++    __ daddiu(result, R0, 0);
++
++    __ bind(True);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "addu   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "addu    $dst, $src1, $src2 #@addI_Reg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    if(Assembler::is_simm16(imm)) {
++       __ addiu32(dst, src1, imm);
++    } else {
++       __ move(AT, imm);
++       __ addu32(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddu    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
++  match(Set dst (AddP src1 (ConvI2L src2)));
++
++  format %{ "daddu    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm(mRegP dst, mRegP src1,  immL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddiu   $dst, $src1, $src2 #@addP_reg_imm" %}
++  ins_encode %{
++    Register src1 = $src1$$Register;
++    long      src2 = $src2$$constant;
++    Register  dst = $dst$$Register;
++
++    if(Assembler::is_simm16(src2)) {
++       __ daddiu(dst, src1, src2);
++    } else {
++       __ set64(AT, src2);
++       __ daddu(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
++%{
++  match(Set dst (AddL (ConvI2L src1) src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (AddL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AddL src1 (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "subu    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ subu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "subu    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addiu32(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu32(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "subu    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ daddiu(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (SubL src1 (ConvI2L src2)));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (SubL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    //if (UseLEXT1) {
++    if (0) {
++      // Experiments show that gsmod is slower that div+mfhi.
++      // So I just disable it here.
++      __ gsmod(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++      __ mfhi(dst);
++    }
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmod(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mfhi(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
++  match(Set dst (AddI (MulI src1 src2) src3));
++
++  ins_cost(999);
++  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register src3 = $src3$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mtlo(src3);
++     __ madd(src1, src2);
++     __ mflo(dst);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    // In MIPS, div does not cause exception.
++    //   We must trap an exception manually.
++    __ teq(R0, src2, 0x7);
++
++    if (UseLEXT1) {
++      __ gsdiv(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++
++      __ nop();
++      __ nop();
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (MulL src1 (ConvI2L src2)));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsddiv(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  match(Set dst (AddF (MulF src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddF  $dst, $src1, $src2, $src3 @maddF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ madd_s(dst, src1, src2, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  match(Set dst (AddD (MulD src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddD  $dst, $src1, $src2, $src3 @maddD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ madd_d(dst, src1, src2, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ move(AT, val);
++    __ andr(dst, src, AT);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ dext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
++  match(Set dst (XorL src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode(load_UB_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode(load_UB_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 1, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 3, 4);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sll(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seh(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seb(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sllv(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long
++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftL (ConvI2L src) shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsllv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long
++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = ($shift$$constant & 0x3f);
++    if (__  is_simm(shamt, 5))
++      __ dsra(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsra(dst_reg, src_reg, sa);
++      } else {
++        __ dsra32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ dsra32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrav(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrlv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dext(dst_reg, src_reg, shamt, 31);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotr     $dst, $src, 1 ...\n\t"
++            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotr(dst, src, 1);
++    if (rshift - 1) {
++      __ srl(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 8-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srl(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, pos, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, dst, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srlv(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ sra(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srav(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ sll(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convL2I_reg( mRegI dst, mRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(src, dst);
++    __ cvt_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_fast( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label Done;
++
++    __ trunc_l_d(F30, src);
++    // max_long:    0x7fffffffffffffff
++    // __ set64(AT, 0x7fffffffffffffff);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->daddiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label L;
++
++    __ c_un_d(src, src);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2I_reg_fast( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_w_s(F30, fval);
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++
++instruct convF2I_reg_slow( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dreg, R0);
++
++    __ trunc_w_s(F30, fval);
++
++    /* Call SharedRuntime:f2i() to do valid convention */
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dreg, F30);
++
++    __ mov_s(F12, fval);
++
++    //This bug was found when running ezDS's control-panel.
++    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
++    //
++    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
++    // V0 is corrupted during call_VM_leaf(), and should be preserved.
++    //
++    __ push(fval);
++    if(dreg != V0) {
++      __ push(V0);
++    }
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++    if(dreg != V0) {
++      __ move(dreg, V0);
++      __ pop(V0);
++    }
++    __ pop(fval);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_l_s(F30, fval);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ dsll32(T9, T9, 0);
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_s(F30, fval);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_s(F12, fval);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ dmtc1(src, dst);
++    __ cvt_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(src, dst);
++    __ cvt_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sra(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ subu(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ mtc1(src, dst);
++    __ cvt_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++
++    Label Done;
++
++    __ trunc_w_d(F30, src);
++    // max_int: 2147483647
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->addiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu32(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    Label L;
++
++    __ trunc_w_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ move(dst, V0);
++    __ bind(L);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++   __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++/*
++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
++instruct jumpXtnd(mRegL switch_val) %{
++  match(Jump switch_val);
++
++  ins_cost(350);
++
++  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
++             "jr     T9\n\t"
++             "nop" %}
++  ins_encode %{
++    Register table_base = $constanttablebase;
++    int      con_offset = $constantoffset;
++    Register switch_reg = $switch_val$$Register;
++
++    if (UseLEXT1) {
++       if (Assembler::is_simm(con_offset, 8)) {
++         __ gsldx(T9, table_base, switch_reg, con_offset);
++       } else if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ gsldx(T9, AT, T9, 0);
++       }
++    } else {
++       if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ daddu(AT, T9, AT);
++         __ ld(T9, AT, 0);
++       }
++    }
++
++    __ jr(T9);
++    __ delayed()->nop();
++
++  %}
++  ins_pipe(pipe_jump);
++%}
++*/
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_mips.cpp] generate_forward_exception()
++    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Prefetch instructions.
++
++instruct prefetchrNTA( memory mem ) %{
++  match(PrefetchRead mem);
++  ins_cost(125);
++
++  format %{ "pref $mem\t# Prefetch into non-temporal cache for read @ prefetchrNTA" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++    } else {
++      __ move(AT, as_Register(base));
++    }
++    if( Assembler::is_simm16(disp) ) {
++      __ daddiu(AT, AT, disp);
++    } else {
++      __ move(T9, disp);
++      __ daddu(AT, AT, T9);
++    }
++    __ pref(0, AT, 0); //hint: 0:load
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct prefetchwNTA( memory mem ) %{
++  match(PrefetchWrite mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch to non-temporal cache for write @ prefetchwNTA" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (scale == 0) {
++        __ daddu(AT, as_Register(base), as_Register(index));
++      } else {
++        __ dsll(AT, as_Register(index), scale);
++        __ daddu(AT, as_Register(base), AT);
++      }
++    } else {
++      __ move(AT, as_Register(base));
++    }
++    if( Assembler::is_simm16(disp) ) {
++      __ daddiu(AT, AT, disp);
++    } else {
++      __ move(T9, disp);
++      __ daddu(AT, AT, T9);
++    }
++     __ pref(1, AT, 0); //hint: 1:store
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAllocNTA( memory mem ) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    Register dst = R0;
++
++    if ( index != 0 ) {
++      if ( Assembler::is_simm16(disp) ) {
++        if (UseLEXT1) {
++          if (scale == 0) {
++            __ gslbx(dst, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ gslbx(dst, as_Register(base), AT, disp);
++          }
++        } else {
++          if (scale == 0) {
++            __ addu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(AT, as_Register(index), scale);
++            __ addu(AT, as_Register(base), AT);
++          }
++          __ lb(dst, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ addu(AT, as_Register(base), as_Register(index));
++        } else {
++          __ dsll(AT, as_Register(index), scale);
++          __ addu(AT, as_Register(base), AT);
++        }
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslbx(dst, AT, T9, 0);
++        } else {
++          __ addu(AT, AT, T9);
++          __ lb(dst, AT, 0);
++        }
++      }
++    } else {
++      if ( Assembler::is_simm16(disp) ) {
++        __ lb(dst, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        if (UseLEXT1) {
++          __ gslbx(dst, as_Register(base), T9, 0);
++        } else {
++          __ addu(AT, as_Register(base), T9);
++          __ lb(dst, AT, 0);
++        }
++      }
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode(load_C_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode(load_C_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode(store_C_reg_enc(mem, src));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode(store_C0_enc(mem));
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ lwc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ ldc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode(store_F_reg_enc(mem, src));
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    int      base = $mem$$base;
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    if( index != 0 ) {
++      if (UseLEXT1) {
++        if ( Assembler::is_simm(disp, 8) ) {
++          if ( scale == 0 ) {
++            __ gsswx(R0, as_Register(base), as_Register(index), disp);
++          } else {
++            __ dsll(T9, as_Register(index), scale);
++            __ gsswx(R0, as_Register(base), T9, disp);
++          }
++        } else if ( Assembler::is_simm16(disp) ) {
++          if ( scale == 0 ) {
++            __ daddu(AT, as_Register(base), as_Register(index));
++          } else {
++            __ dsll(T9, as_Register(index), scale);
++            __ daddu(AT, as_Register(base), T9);
++          }
++          __ sw(R0, AT, disp);
++        } else {
++          if ( scale == 0 ) {
++            __ move(T9, disp);
++            __ daddu(AT, as_Register(index), T9);
++            __ gsswx(R0, as_Register(base), AT, 0);
++          } else {
++            __ dsll(T9, as_Register(index), scale);
++            __ move(AT, disp);
++            __ daddu(AT, AT, T9);
++            __ gsswx(R0, as_Register(base), AT, 0);
++          }
++        }
++      } else { //not use loongson isa
++        if(scale != 0) {
++          __ dsll(T9, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), T9);
++        } else {
++          __ daddu(AT, as_Register(base), as_Register(index));
++        }
++        if( Assembler::is_simm16(disp) ) {
++          __ sw(R0, AT, disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ sw(R0, AT, 0);
++        }
++      }
++    } else { //index is 0
++      if (UseLEXT1) {
++        if ( Assembler::is_simm16(disp) ) {
++          __ sw(R0, as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ gsswx(R0, as_Register(base), T9, 0);
++        }
++      } else {
++        if( Assembler::is_simm16(disp) ) {
++          __ sw(R0, as_Register(base), disp);
++        } else {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ sw(R0, AT, 0);
++        }
++      }
++    }
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode(load_D_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode(load_D_enc(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode(store_D_reg_enc(mem, src));
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    int      base = $mem$$base;
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    __ mtc1(R0, F30);
++    __ cvt_d_w(F30, F30);
++
++    if( index != 0 ) {
++    if (UseLEXT1) {
++      if ( Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          __ gssdxc1(F30, as_Register(base), as_Register(index), disp);
++        } else {
++          __ dsll(T9, as_Register(index), scale);
++          __ gssdxc1(F30, as_Register(base), T9, disp);
++        }
++      } else if ( Assembler::is_simm16(disp) ) {
++        if (scale == 0) {
++          __ daddu(AT, as_Register(base), as_Register(index));
++          __ sdc1(F30, AT, disp);
++        } else {
++          __ dsll(T9, as_Register(index), scale);
++          __ daddu(AT, as_Register(base), T9);
++          __ sdc1(F30, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(index), T9);
++          __ gssdxc1(F30, as_Register(base), AT, 0);
++        } else {
++          __ move(T9, disp);
++          __ dsll(AT, as_Register(index), scale);
++          __ daddu(AT, AT, T9);
++          __ gssdxc1(F30, as_Register(base), AT, 0);
++        }
++      }
++    } else { // not use loongson isa
++        if(scale != 0) {
++           __ dsll(T9, as_Register(index), scale);
++           __ daddu(AT, as_Register(base), T9);
++        } else {
++           __ daddu(AT, as_Register(base), as_Register(index));
++        }
++       if( Assembler::is_simm16(disp) ) {
++          __ sdc1(F30, AT, disp);
++       } else {
++          __ move(T9, disp);
++          __ daddu(AT, AT, T9);
++          __ sdc1(F30, AT, 0);
++       }
++    }
++    } else {// index is 0
++    if (UseLEXT1) {
++      if ( Assembler::is_simm16(disp) ) {
++        __ sdc1(F30, as_Register(base), disp);
++      } else {
++        __ move(T9, disp);
++        __ gssdxc1(F30, as_Register(base), T9, 0);
++      }
++    } else {
++       if( Assembler::is_simm16(disp) ) {
++          __ sdc1(F30, as_Register(base), disp);
++       } else {
++          __ move(T9, disp);
++          __ daddu(AT, as_Register(base), T9);
++          __ sdc1(F30, AT, 0);
++       }
++    }
++    }
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
++    __ lw($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
++    __ sw($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
++    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
++    __ swc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
++    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
++    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock( FlagsReg cr, mRegP object, s0_RegP box, mRegI tmp, mRegP scr) %{
++  match( Set cr (FastLock object box) );
++  effect( TEMP tmp, TEMP scr, USE_KILL box );
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register);
++    __ move($cr$$Register, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock( FlagsReg cr, mRegP object, s0_RegP box, mRegP tmp ) %{
++  match( Set cr (FastUnlock object box) );
++  effect( TEMP tmp, USE_KILL box );
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
++    __ move($cr$$Register, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate
++instruct storeImmCM(memory mem, immI8 src) %{
++  match(Set mem (StoreCM mem src));
++
++  ins_cost(150);
++  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
++//  opcode(0xC6);
++  ins_encode(store_B_immI_enc_sync(mem, src));
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap !
++
++    __ stop("in ShoudNotReachHere");
++
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ daddiu(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++    int       disp  = $mem$$disp;
++
++    if (scale == 0) {
++      __ daddu(AT, base, index);
++      __ daddiu(dst, AT, disp);
++    } else {
++      __ dsll(AT, index, scale);
++      __ daddu(AT, base, AT);
++      __ daddiu(dst, AT, disp);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++
++    if (scale == 0) {
++       __ daddu(dst, base, index);
++    } else {
++       __ dsll(AT, index, scale);
++       __ daddu(dst, base, AT);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional( memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr ) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "CMPXCHG $heap_top_ptr, $newval\t# (ptr) @storePConditional "
++            "If $oldval  == $heap_top_ptr then store $newval into $heap_top_ptr" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if( index != 0 ) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ cmpxchg(newval, addr, oldval);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional( memory mem, mRegI oldval, mRegI newval, FlagsReg cr ) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++//  effect(KILL oldval);
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++    Label    again, failure;
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if( index != 0 ) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      __ bind(again);
++      if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) __ sync();
++      __ ll(AT, addr);
++      __ bne(AT, oldval, failure);
++      __ delayed()->addu(AT, R0, R0);
++
++      __ addu(AT, newval, R0);
++      __ sc(AT, addr);
++      __ beq(AT, R0, again);
++      __ delayed()->addiu(AT, R0, 0xFF);
++      __ bind(failure);
++      __ sync();
++
++      __ move($cr$$Register, AT);
++    }
++%}
++
++  ins_pipe( long_memory_op );
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, t2RegL oldval, mRegL newval, FlagsReg cr )
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++  effect(KILL oldval);
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if( index != 0 ) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      __ cmpxchg(newval, addr, oldval);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "ld    $dst, $mem #@loadPLocked\n\t"
++            "sync" %}
++  size(12);
++  ins_encode (load_P_enc_ac(dst, mem));
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI( mRegI res, mRegP mem_ptr, mS2RegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  effect(KILL oldval);
++//  match(CompareAndSwapI mem_ptr (Binary oldval newval));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL\n\t"
++            "MOV    $res, 1 @ compareAndSwapI\n\t"
++            "BNE    AT, R0 @ compareAndSwapI\n\t"
++            "MOV    $res, 0 @ compareAndSwapI\n"
++          "L:" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++    Label L;
++
++    __ cmpxchg32(newval, addr, oldval);
++    __ move(res, AT);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct compareAndSwapL( mRegI res, mRegP mem_ptr, s2RegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  effect(KILL oldval);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI\n\t"
++            "MOV    $res, 1 @ compareAndSwapI\n\t"
++            "BNE    AT, R0 @ compareAndSwapI\n\t"
++            "MOV    $res, 0 @ compareAndSwapI\n"
++          "L:" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++    Label L;
++
++    __ cmpxchg(newval, addr, oldval);
++    __ move(res, AT);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++//FIXME:
++instruct compareAndSwapP( mRegI res, mRegP mem_ptr, s2_RegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  effect(KILL oldval);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP\n\t"
++            "MOV    $res, AT @ compareAndSwapP\n\t"
++          "L:" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++    Label L;
++
++    __ cmpxchg(newval, addr, oldval);
++    __ move(res, AT);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct compareAndSwapN( mRegI res, mRegP mem_ptr, t2_RegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  effect(KILL oldval);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN\n\t"
++            "MOV    $res, AT @ compareAndSwapN\n\t"
++          "L:" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++    Label L;
++
++    // cmpxchg32 is implemented with ll/sc, which will do sign extension.
++    //      Thus, we should extend oldval's sign for correct comparision.
++    //
++    __ sll(oldval, oldval, 0);
++
++    __ cmpxchg32(newval, addr, oldval);
++    __ move(res, AT);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++////
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for min
++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVlt $op2,$op1\t! min" %}
++//  opcode(0x4C,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++//// Min Register with Register (P6 version)
++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MinI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_lt(op2,op1,cr);
++//  %}
++//%}
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for max
++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVgt $op2,$op1\t! max" %}
++//  opcode(0x4F,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++// // Max Register with Register (P6 version)
++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MaxI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_gt(op2,op1,cr);
++//  %}
++//%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ movn(dst, R0, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ dinsu(dst, src2, 32, 32);
++    } else if (src2 == dst) {
++       __ dsll32(dst, dst, 0);
++       __ dins(dst, src1, 0, 32);
++    } else {
++       __ dext(dst, src1, 0, 32);
++       __ dinsu(dst, src2, 32, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode (load_N_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode (load_N_enc(dst, mem));
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++instruct safePoint_poll_reg(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(false);
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %}
++
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    __ lw(AT, poll_reg, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll() %{
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ set64(T9, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ lw(AT, T9, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ dclz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    // ctz and dctz is gs instructions.
++    __ ctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ dctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// Load vectors (8 bytes long)
++instruct loadV8(vecD dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 8);
++  match(Set dst (LoadVector mem));
++  ins_cost(125);
++  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
++  ins_encode(load_D_enc(dst, mem));
++  ins_pipe( fpu_loadF );
++%}
++
++// Store vectors (8 bytes long)
++instruct storeV8(memory mem, vecD src) %{
++  predicate(n->as_StoreVector()->memory_size() == 8);
++  match(Set mem (StoreVector mem src));
++  ins_cost(145);
++  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
++  ins_encode(store_D_reg_enc(mem, src));
++  ins_pipe( fpu_storeF );
++%}
++
++instruct Repl8B_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB src));
++  ins_cost(100);
++  format %{ "replv_ob    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ replv_ob(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB src));
++  ins_cost(140);
++  format %{ "move       AT,  $src\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB con));
++  ins_cost(110);
++  format %{ "repl_ob    AT, [$con]\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ repl_ob(AT, val);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB con));
++  ins_cost(150);
++  format %{ "move      AT, [$con]\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB zero));
++  ins_cost(90);
++  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB M1));
++  ins_cost(80);
++  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3);
++  match(Set dst (ReplicateS src));
++  ins_cost(100);
++  format %{ "replv_qh    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ replv_qh(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS src));
++  ins_cost(120);
++  format %{ "move    AT,     $src  \n\t"
++            "dins    AT, AT, 16, 16\n\t"
++            "dinsu   AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3);
++  match(Set dst (ReplicateS con));
++  ins_cost(100);
++  format %{ "repl_qh    AT, [$con]\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    if ( Assembler::is_simm(val, 10)) {
++      //repl_qh supports 10 bits immediate
++      __ repl_qh(AT, val);
++    } else {
++      __ li32(AT, val);
++      __ replv_qh(AT, AT);
++    }
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS con));
++  ins_cost(110);
++  format %{ "move    AT,   [$con]\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS zero));
++  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS M1));
++  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar to be vector
++instruct Repl2I(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI src));
++  format %{ "dins    AT, $src, 0, 32\n\t"
++            "dinsu   AT, $src, 32, 32\n\t"
++            "dmtc1   AT, $dst\t! replicate2I" %}
++  ins_encode %{
++    __ dins(AT, $src$$Register, 0, 32);
++    __ dinsu(AT, $src$$Register, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI con));
++  effect(KILL tmp);
++  format %{ "li32    AT, [$con], 32\n\t"
++            "dinsu   AT,         AT\n\t"
++            "dmtc1   AT, $dst\t! replicate2I($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ li32(AT, val);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar zero to be vector
++instruct Repl2I_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI zero));
++  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar -1 to be vector
++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI M1));
++  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate float (4 byte) scalar to be vector
++instruct Repl2F(vecD dst, regF src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF src));
++  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
++  ins_encode %{
++    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Replicate float (4 byte) scalar zero to be vector
++instruct Repl2F_zero(vecD dst, immF_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF zero));
++  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++
++// ====================VECTOR ARITHMETIC=======================================
++
++// --------------------------------- ADD --------------------------------------
++
++// Floats vector add
++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
++instruct vadd2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF dst src));
++  format %{ "add.ps   $dst,$src\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF src1 src2));
++  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++// Floats vector sub
++instruct vsub2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVF dst src));
++  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
++  ins_encode %{
++    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- MUL --------------------------------------
++
++// Floats vector mul
++instruct vmul2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF dst src));
++  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF src1 src2));
++  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- DIV --------------------------------------
++// MIPS do not have div.ps
++
++// --------------------------------- MADD --------------------------------------
++// Floats vector madd
++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
++//  predicate(n->as_Vector()->length() == 2);
++//  match(Set dst (AddVF (MulVF src1 src2) src3));
++//  ins_cost(50);
++//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
++//  ins_encode %{
++//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++//  %}
++//  ins_pipe( fpu_regF_regF );
++//%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp
+new file mode 100644
+index 0000000000..e1f7cd944d
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp
+@@ -0,0 +1,1829 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "code/codeCache.hpp"
++#include "compiler/disassembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#include <sys/mman.h>
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++static int illegal_instruction_bits = 0;
++
++int NativeInstruction::illegal_instruction() {
++  if (illegal_instruction_bits == 0) {
++    ResourceMark rm;
++    char buf[40];
++    CodeBuffer cbuf((address)&buf[0], 20);
++    MacroAssembler* a = new MacroAssembler(&cbuf);
++    address ia = a->pc();
++    a->brk(11);
++    int bits = *(int*)ia;
++    illegal_instruction_bits = bits;
++  }
++  return illegal_instruction_bits;
++}
++
++bool NativeInstruction::is_int_branch() {
++  switch(Assembler::opcode(insn_word())) {
++    case Assembler::beq_op:
++    case Assembler::beql_op:
++    case Assembler::bgtz_op:
++    case Assembler::bgtzl_op:
++    case Assembler::blez_op:
++    case Assembler::blezl_op:
++    case Assembler::bne_op:
++    case Assembler::bnel_op:
++      return true;
++    case Assembler::regimm_op:
++      switch(Assembler::rt(insn_word())) {
++        case Assembler::bgez_op:
++        case Assembler::bgezal_op:
++        case Assembler::bgezall_op:
++        case Assembler::bgezl_op:
++        case Assembler::bltz_op:
++        case Assembler::bltzal_op:
++        case Assembler::bltzall_op:
++        case Assembler::bltzl_op:
++          return true;
++      }
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_float_branch() {
++  if (!is_op(Assembler::cop1_op) ||
++      !is_rs((Register)Assembler::bc1f_op)) return false;
++
++  switch(Assembler::rt(insn_word())) {
++    case Assembler::bcf_op:
++    case Assembler::bcfl_op:
++    case Assembler::bct_op:
++    case Assembler::bctl_op:
++      return true;
++  }
++
++  return false;
++}
++
++
++void NativeCall::verify() {
++  // make sure code pattern is actually a call instruction
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(20))->is_nop() ) {
++      return;
++  }
++
++  // jal targe
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++  is_op(int_at(4), Assembler::ori_op) &&
++  is_special_op(int_at(8), Assembler::dsll_op) &&
++  is_op(int_at(12), Assembler::ori_op) &&
++  is_special_op(int_at(16), Assembler::dsll_op) &&
++  is_op(int_at(20), Assembler::ori_op) &&
++        is_special_op(int_at(24), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    is_special_op(int_at(8), Assembler::dsll_op) &&
++    is_op  (int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    is_op  (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  // FIXME: why add jr_op here?
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
++    return;
++
++  fatal("not a call");
++}
++
++address NativeCall::target_addr_for_insn() const {
++  // jal target
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(4))->is_nop()) {
++      int instr_index = int_at(0) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(20))->is_nop()) {
++      int instr_index = int_at(16) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                               (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ld_op) ) {
++
++      address dest = (address)Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(0),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
++  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
++  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
++  tty->print_cr("======= End of decoding =======");
++  fatal("not a call");
++  return NULL;
++}
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_insn();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(addr);
++    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  // Patch the constant in the call's trampoline stub.
++  if (MacroAssembler::reachable_from_cache()) {
++    set_destination(dest);
++  } else {
++    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
++    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
++}
++
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  // If the codeBlob is not a nmethod, this is because we get here from the
++  // CodeBlob constructor, which is called within the nmethod constructor.
++  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++}
++
++// manual implementation of GSSQ
++//
++//  00000001200009c0 <atomic_store128>:
++//     1200009c0:   0085202d        daddu   a0, a0, a1
++//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
++//     1200009c8:   03e00008        jr      ra
++//     1200009cc:   00000000        nop
++//
++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
++
++static int *buf;
++
++static atomic_store128_ptr get_atomic_store128_func() {
++  assert(UseLEXT1, "UseLEXT1 must be true");
++  static atomic_store128_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x0085202d;
++  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
++  buf[2] = 0x03e00008;
++  buf[3] = 0;
++
++  asm("sync");
++  p = (atomic_store128_ptr)buf;
++  return p;
++}
++
++void  NativeCall::patch_on_jal_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(0, jal_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_trampoline(address dest) {
++  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
++  jlong dst = (jlong) dest;
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
++    dst += (dst & 0x8000) << 1;
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
++
++    ICache::invalidate_range(addr_at(0), 24);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(16, jal_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal(address dst) {
++  patch_on_jal_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr_gs(address dst) {
++  patch_set48_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr(address dst) {
++  patch_set48(dst);
++}
++
++void  NativeCall::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++  int count = 0;
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    if (Assembler::split_low(value)) {
++      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void NativeCall::patch_set32_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[2] = {0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    //nop();
++    //set_int_at(count << 2, 0);
++    insts[count] = 0;
++    count++;
++  }
++
++  long inst = insts[1];
++  inst = inst << 32;
++  inst = inst + insts[0];
++
++  set_long_at(0, inst);
++}
++
++void NativeCall::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      //dsll(d, d, 16);
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    //lui(d, value >> 32);
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    //ori(d, d, split_low(value >> 16));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    //dsll(d, d, 16);
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    //ori(d, d, split_low(value));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    //nop();
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeCall::patch_set32(address dest) {
++  patch_set32_gs(dest);
++}
++
++void  NativeCall::set_destination(address dest) {
++  OrderAccess::fence();
++
++  // li64
++  if (is_special_op(int_at(16), Assembler::dsll_op)) {
++    int first_word = int_at(0);
++    set_int_at(0, 0x1000ffff); /* .1: b .1 */
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
++    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
++    ICache::invalidate_range(addr_at(0), 24);
++  } else if (is_op(int_at(16), Assembler::jal_op)) {
++    if (UseLEXT1) {
++      patch_on_jal_gs(dest);
++    } else {
++      patch_on_jal(dest);
++    }
++  } else if (is_op(int_at(0), Assembler::jal_op)) {
++    patch_on_jal_only(dest);
++  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
++    if (UseLEXT1) {
++      patch_on_jalr_gs(dest);
++    } else {
++      patch_on_jalr(dest);
++    }
++  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
++    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
++    if (UseLEXT1) {
++      patch_set32_gs(dest);
++    } else {
++      patch_set32(dest);
++    }
++    ICache::invalidate_range(addr_at(0), 8);
++  } else {
++      fatal("not a call");
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  __ li48(T9, (long)entry);
++  __ jalr ();
++  __ delayed()->nop();
++#undef __
++
++  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++//-------------------------------------------------------------------
++
++void NativeMovConstReg::verify() {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) ) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)0,
++                              (intptr_t)0,
++                              (intptr_t)0);
++    } else {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::patch_set48(intptr_t x) {
++  jlong value = (jlong) x;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    if (Assembler::split_low(value)) {
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  // li64 or li48
++  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
++  } else {
++    patch_set48(x);
++  }
++
++  ICache::invalidate_range(addr_at(0), 24);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  if (is_immediate())
++    return (short)(int_at(instruction_offset)&0xffff);
++  else
++    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  if (is_immediate()) {
++    assert(Assembler::is_simm16(x), "just check");
++    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
++    if (is_64ldst()) {
++      assert(Assembler::is_simm16(x+4), "just check");
++      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
++    }
++  } else {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
++  }
++  ICache::invalidate_range(addr_at(0), 8);
++}
++
++void NativeMovRegMem::verify() {
++  int offset = 0;
++
++  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
++
++    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
++      fatal ("not a mov [reg+offs], reg instruction");
++    }
++
++    offset += 12;
++  }
++
++  switch(Assembler::opcode(int_at(offset))) {
++    case Assembler::lb_op:
++    case Assembler::lbu_op:
++    case Assembler::lh_op:
++    case Assembler::lhu_op:
++    case Assembler::lw_op:
++    case Assembler::lwu_op:
++    case Assembler::ld_op:
++    case Assembler::lwc1_op:
++    case Assembler::ldc1_op:
++    case Assembler::sb_op:
++    case Assembler::sh_op:
++    case Assembler::sw_op:
++    case Assembler::sd_op:
++    case Assembler::swc1_op:
++    case Assembler::sdc1_op:
++      break;
++    default:
++      fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(((NativeInstruction *)this)->is_jump() ||
++         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
++}
++
++void  NativeJump::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++      count += 1;
++      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_j_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(0, j_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void  NativeJump::patch_on_j_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(16, j_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeJump::patch_on_j(address dst) {
++  patch_on_j_gs(dst);
++}
++
++void  NativeJump::patch_on_jr_gs(address dst) {
++  patch_set48_gs(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_jr(address dst) {
++  patch_set48(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++
++void  NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  if (is_short()) {
++    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
++    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
++    ICache::invalidate_range(addr_at(0), 4);
++  } else if (is_b_far()) {
++    int offset = dest - addr_at(12);
++    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
++    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
++  } else {
++    if (is_op(int_at(16), Assembler::j_op)) {
++      if (UseLEXT1) {
++        patch_on_j_gs(dest);
++      } else {
++        patch_on_j(dest);
++      }
++    } else if (is_op(int_at(0), Assembler::j_op)) {
++      patch_on_j_only(dest);
++    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
++      if (UseLEXT1) {
++        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
++        //patch_on_jr_gs(dest);
++        patch_on_jr(dest);
++      } else {
++        patch_on_jr(dest);
++      }
++    } else {
++      fatal("not a jump");
++    }
++  }
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
++    __ b(entry);
++    __ delayed()->nop();
++  } else {
++    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
++    int offset = entry - code_pos;
++
++    Label L;
++    __ bgezal(R0, L);
++    __ delayed()->lui(T9, (offset - 8) >> 16);
++    __ bind(L);
++    __ ori(T9, T9, (offset - 8) & 0xffff);
++    __ daddu(T9, T9, RA);
++    __ jr(T9);
++    __ delayed()->nop();
++  }
++
++#undef __
++
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++bool NativeJump::is_b_far() {
++//
++//   0x000000556809f198: daddu at, ra, zero
++//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
++//
++//   0x000000556809f1a0: nop
++//   0x000000556809f1a4: lui t9, 0xfffffffd
++//   0x000000556809f1a8: ori t9, t9, 0x14dc
++//   0x000000556809f1ac: daddu t9, t9, ra
++//   0x000000556809f1b0: daddu ra, at, zero
++//   0x000000556809f1b4: jr t9
++//   0x000000556809f1b8: nop
++//  ;; ImplicitNullCheckStub slow case
++//   0x000000556809f1bc: lui t9, 0x55
++//
++  return is_op(int_at(12), Assembler::lui_op);
++}
++
++address NativeJump::jump_destination() {
++  if ( is_short() ) {
++    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
++  }
++  // Assembler::merge() is not correct in MIPS_64!
++  //
++  //   Example:
++  //     hi16 = 0xfffd,
++  //     lo16 = f7a4,
++  //
++  //     offset=0xfffdf7a4 (Right)
++  //     Assembler::merge = 0xfffcf7a4 (Wrong)
++  //
++  if ( is_b_far() ) {
++    int hi16 = int_at(12)&0xffff;
++    int low16 = int_at(16)&0xffff;
++    address target = addr_at(12) + (hi16 << 16) + low16;
++    return target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++        nativeInstruction_at(addr_at(4))->is_nop()   &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()  &&
++        is_op(int_at(16), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(20))->is_nop()) {
++    int instr_index = int_at(16) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // j target
++  // nop
++  if ( is_op(int_at(0), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(4))->is_nop()) {
++    int instr_index = int_at(0) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a jump");
++  return NULL; // unreachable
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
++  assert((int)instruction_size == (int)NativeCall::instruction_size,
++          "note::Runtime1::patch_code uses NativeCall::instruction_size");
++
++  // ensure 100% atomicity
++  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
++
++  int *p = (int *)instr_addr;
++  int jr_word = p[4];
++
++  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
++  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
++  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
++
++  if (MacroAssembler::reachable_from_cache(dest)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.j(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_jump()
++{
++  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
++    return true;
++  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
++    return true;
++  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
++    return true;
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++    return true;
++  }
++
++  // lui   rd, imm(63...48);
++  // ori   rd, rd, imm(47...32);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(31...16);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(15...0);
++  // jr    rd
++  // nop
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::dsll_op) &&
++          is_op(int_at(20), Assembler::ori_op) &&
++          is_special_op(int_at(24), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++      return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     sw    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lui t2, 0x40
++  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                        ;*goto
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like the this.
++  return is_op(Assembler::lw_op) && is_rt(AT);
++}
+diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp
+new file mode 100644
+index 0000000000..13a4cb4ef1
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp
+@@ -0,0 +1,735 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "memory/allocation.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "utilities/top.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction VALUE_OBJ_CLASS_SPEC {
++  friend class Relocation;
++
++ public:
++  enum mips_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_illegal();
++  inline bool is_return();
++  bool is_jump();
++  inline bool is_cond_jump();
++  bool is_safepoint_poll();
++
++  //mips has no instruction to generate a illegal instrucion exception
++  //we define ours: break 11
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_trampoline_call();
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
++  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
++  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
++  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
++  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
++  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
++
++  static bool is_special_op (int insn, Assembler::special_ops op) {
++    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
++  }
++  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
++// 32 bits:
++//       lui     rt, imm16
++//       addiu    rt, rt, imm16
++//       jalr     rt
++//       nop
++//
++// 64 bits:
++//       lui   rd, imm(63...48);
++//       ori   rd, rd, imm(47...32);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(31...16);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(15...0);
++//       jalr  rd
++//       nop
++//
++
++// we just consider the above for instruction as one call instruction
++class NativeCall: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset          =    0,
++    instruction_size            =   6 * BytesPerInstWord,
++    return_address_offset_short =   4 * BytesPerInstWord,
++    return_address_offset_long  =   6 * BytesPerInstWord,
++    displacement_offset         =   0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const  {
++    if (is_special_op(int_at(8), Assembler::jalr_op)) {
++      return addr_at(return_address_offset_short);
++    } else {
++      return addr_at(return_address_offset_long);
++    }
++  }
++
++  address return_address() const            {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_insn() const;
++  address destination() const;
++  void  set_destination(address dest);
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jalr_gs(address dest);
++  void  patch_on_jalr(address dest);
++
++  void  patch_on_jal_gs(address dest);
++  void  patch_on_jal(address dest);
++
++  void  patch_on_trampoline(address dest);
++
++  void  patch_on_jal_only(address dest);
++
++  void  patch_set32_gs(address dest);
++  void  patch_set32(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
++  }
++
++  static bool is_call_to(address instr, address target) {
++    return nativeInstruction_at(instr)->is_call() &&
++nativeCall_at(instr)->destination() == target;
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate jal
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = NULL;
++  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
++  } else {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
++  }
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset    =    0,
++    instruction_size            =    4 * BytesPerInstWord,
++    next_instruction_offset   =    4 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  void    patch_set48(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//       lui   AT, split_high(offset)
++//       addiu AT, split_low(offset)
++//       addu   reg, reg, AT
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
++//       [lw/sw/lwc1/swc1                    dest, reg, 4]
++//     or
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
++//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset  = 0,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  //offset is less than 16 bits.
++  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
++  bool is_64ldst() const {
++    if (is_immediate()) {
++      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
++       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
++    } else {
++      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
++       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
++    }
++  }
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
++  }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
++// 32 bits:
++//    far jump:
++//        lui   reg, split_high(addr)
++//        addiu reg, split_low(addr)
++//        jr    reg
++//        nop
++//    or
++//        beq   ZERO, ZERO, offset
++//        nop
++//
++
++//64 bits:
++//    far jump:
++//          lui   rd, imm(63...48);
++//          ori   rd, rd, imm(47...32);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(31...16);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(15...0);
++//          jalr  rd
++//          nop
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset   =    0,
++    beq_opcode           =    0x10000000,//000100|00000|00000|offset
++    b_mask               =    0xffff0000,
++    short_size           =    8,
++    instruction_size     =    6 * BytesPerInstWord
++  };
++
++  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
++  bool is_b_far();
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination();
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jr_gs(address dest);
++  void  patch_on_jr(address dest);
++
++  void  patch_on_j_gs(address dest);
++  void  patch_on_j(address dest);
++
++  void  patch_on_j_only(address dest);
++
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry) {}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum mips_specific_constants {
++    instruction_code          =    0x42000029,    // mips reserved instruction
++    instruction_size          =    4,
++    instruction_offset        =    0,
++    next_instruction_offset   =    4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++// jr RA
++// delay slot
++class NativeReturn: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size          =    8,
++    instruction_offset        =    0,
++    next_instruction_offset   =    8
++  };
++};
++
++
++
++
++class NativeCondJump;
++inline NativeCondJump* nativeCondJump_at(address address);
++class NativeCondJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size         = 16,
++    instruction_offset        = 12,
++    next_instruction_offset   = 20
++  };
++
++
++  int insn_word() const  { return long_at(instruction_offset); }
++  address instruction_address() const { return addr_at(0); }
++  address next_instruction_address() const { return addr_at(next_instruction_offset); }
++
++  // Creation
++  inline friend NativeCondJump* nativeCondJump_at(address address);
++
++  address jump_destination()  const {
++    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++  }
++
++  void set_jump_destination(address dest) {
++    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++  }
++
++};
++
++inline NativeCondJump* nativeCondJump_at(address address) {
++  NativeCondJump* jump = (NativeCondJump*)(address);
++  return jump;
++}
++
++
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call()    {
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return true;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) &&
++       is_special_op(int_at(24), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  if(is_trampoline_call())
++    return true;
++
++  return false;
++
++}
++
++inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
++
++inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum mips_specific_constants {
++    instruction_size            =    2 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    2 * BytesPerInstWord
++  };
++
++  address destination() const {
++    return (address)ptr_at(0);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(0, (intptr_t)new_destination);
++  }
++};
++
++inline bool NativeInstruction::is_trampoline_call() {
++  // lui dst, imm16
++  // ori dst, dst, imm16
++  // dsll dst, dst, 16
++  // ld target, dst, imm16
++  // jalr target
++  // nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ld_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) &&
++        nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  return false;
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  return (NativeCallTrampolineStub*)addr;
++}
++
++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/registerMap_mips.hpp b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp
+new file mode 100644
+index 0000000000..7f800eb107
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp
+new file mode 100644
+index 0000000000..4af2531834
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_mips.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/hotspot/src/cpu/mips/vm/register_mips.cpp b/hotspot/src/cpu/mips/vm/register_mips.cpp
+new file mode 100644
+index 0000000000..4a9b22bfef
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/register_mips.cpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_mips.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
++    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
+diff --git a/hotspot/src/cpu/mips/vm/register_mips.hpp b/hotspot/src/cpu/mips/vm/register_mips.hpp
+new file mode 100644
+index 0000000000..88bf2d68cc
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/register_mips.hpp
+@@ -0,0 +1,346 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
++#define CPU_MIPS_VM_REGISTER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "vm_version_mips.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++
++// The implementation of integer registers for the mips architecture
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the MIPS32 architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define I0 ((Register)(i0_RegisterEnumValue))
++#define I1 ((Register)(i1_RegisterEnumValue))
++#define I2 ((Register)(i2_RegisterEnumValue))
++#define I3 ((Register)(i3_RegisterEnumValue))
++#define I4 ((Register)(i4_RegisterEnumValue))
++#define I5 ((Register)(i5_RegisterEnumValue))
++#define I6 ((Register)(i6_RegisterEnumValue))
++#define I7 ((Register)(i7_RegisterEnumValue))
++#define I8 ((Register)(i8_RegisterEnumValue))
++#define I9 ((Register)(i9_RegisterEnumValue))
++#define I10 ((Register)(i10_RegisterEnumValue))
++#define I11 ((Register)(i11_RegisterEnumValue))
++#define I12 ((Register)(i12_RegisterEnumValue))
++#define I13 ((Register)(i13_RegisterEnumValue))
++#define I14 ((Register)(i14_RegisterEnumValue))
++#define I15 ((Register)(i15_RegisterEnumValue))
++#define I16 ((Register)(i16_RegisterEnumValue))
++#define I17 ((Register)(i17_RegisterEnumValue))
++#define I18 ((Register)(i18_RegisterEnumValue))
++#define I19 ((Register)(i19_RegisterEnumValue))
++#define I20 ((Register)(i20_RegisterEnumValue))
++#define I21 ((Register)(i21_RegisterEnumValue))
++#define I22 ((Register)(i22_RegisterEnumValue))
++#define I23 ((Register)(i23_RegisterEnumValue))
++#define I24 ((Register)(i24_RegisterEnumValue))
++#define I25 ((Register)(i25_RegisterEnumValue))
++#define I26 ((Register)(i26_RegisterEnumValue))
++#define I27 ((Register)(i27_RegisterEnumValue))
++#define I28 ((Register)(i28_RegisterEnumValue))
++#define I29 ((Register)(i29_RegisterEnumValue))
++#define I30 ((Register)(i30_RegisterEnumValue))
++#define I31 ((Register)(i31_RegisterEnumValue))
++
++#define R0 ((Register)(i0_RegisterEnumValue))
++#define AT ((Register)(i1_RegisterEnumValue))
++#define V0 ((Register)(i2_RegisterEnumValue))
++#define V1 ((Register)(i3_RegisterEnumValue))
++#define RA0 ((Register)(i4_RegisterEnumValue))
++#define RA1 ((Register)(i5_RegisterEnumValue))
++#define RA2 ((Register)(i6_RegisterEnumValue))
++#define RA3 ((Register)(i7_RegisterEnumValue))
++#define RA4 ((Register)(i8_RegisterEnumValue))
++#define RA5 ((Register)(i9_RegisterEnumValue))
++#define RA6 ((Register)(i10_RegisterEnumValue))
++#define RA7 ((Register)(i11_RegisterEnumValue))
++#define RT0 ((Register)(i12_RegisterEnumValue))
++#define RT1 ((Register)(i13_RegisterEnumValue))
++#define RT2 ((Register)(i14_RegisterEnumValue))
++#define RT3 ((Register)(i15_RegisterEnumValue))
++#define S0 ((Register)(i16_RegisterEnumValue))
++#define S1 ((Register)(i17_RegisterEnumValue))
++#define S2 ((Register)(i18_RegisterEnumValue))
++#define S3 ((Register)(i19_RegisterEnumValue))
++#define S4 ((Register)(i20_RegisterEnumValue))
++#define S5 ((Register)(i21_RegisterEnumValue))
++#define S6 ((Register)(i22_RegisterEnumValue))
++#define S7 ((Register)(i23_RegisterEnumValue))
++#define RT8 ((Register)(i24_RegisterEnumValue))
++#define RT9 ((Register)(i25_RegisterEnumValue))
++#define K0 ((Register)(i26_RegisterEnumValue))
++#define K1 ((Register)(i27_RegisterEnumValue))
++#define GP ((Register)(i28_RegisterEnumValue))
++#define SP ((Register)(i29_RegisterEnumValue))
++#define FP ((Register)(i30_RegisterEnumValue))
++#define S8 ((Register)(i30_RegisterEnumValue))
++#define RA ((Register)(i31_RegisterEnumValue))
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++/*
++#define RT0       T0
++#define RT1       T1
++#define RT2       T2
++#define RT3       T3
++#define RT4       T8
++#define RT5       T9
++*/
++
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++//OPT_SAFEPOINT not supported yet
++#define OPT_SAFEPOINT 1
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define  S5_heapbase   S5
++
++#define mh_SP_save     SP
++
++#define FSR            V0
++#define SSR            V1
++#define FSF            F0
++#define SSF            F1
++#define FTF            F14
++#define STF            F15
++
++#define AFT            F30
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the mips architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 12,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++const int MIPS_ARGS_IN_REGS_NUM = 4;
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp
+new file mode 100644
+index 0000000000..cae43b2d96
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in MIPS64");
++  }
++}
++
++
++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  intptr_t adj = 0;
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    if (!ni->is_trampoline_call()) {
++      return nativeCall_at(addr())->target_addr_for_insn();
++    } else {
++      address trampoline = nativeCall_at(addr())->get_trampoline();
++      if (trampoline) {
++        return nativeCallTrampolineStub_at(trampoline)->destination();
++      } else {
++        return (address) -1;
++      }
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination() + adj;
++  } else if (ni->is_cond_jump()) {
++    return nativeCondJump_at(addr())->jump_destination() +adj;
++  } else {
++    tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr()));
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    NativeCall* call = nativeCall_at(addr());
++    if (!ni->is_trampoline_call()) {
++      call->set_destination(x);
++    } else {
++      address trampoline_stub_addr = call->get_trampoline();
++      if (trampoline_stub_addr != NULL) {
++        address orig = call->target_addr_for_insn();
++        if (orig != trampoline_stub_addr) {
++          call->patch_on_trampoline(trampoline_stub_addr);
++        }
++        call->set_destination_mt_safe(x, false);
++      }
++    }
++  } else if (ni->is_jump())
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  else if (ni->is_cond_jump())
++    nativeCondJump_at(addr())->set_jump_destination(x);
++  else
++    { ShouldNotReachHere(); }
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++}
++
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  address target =0;
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  target = new_addr_for((address)ni->data(), src, dest);
++  ni->set_data((intptr_t)target);
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp
+new file mode 100644
+index 0000000000..04ad5dac96
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since MIPS instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp
+new file mode 100644
+index 0000000000..bb9269b423
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp
+@@ -0,0 +1,206 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_mips.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_mips.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T9 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T9
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ sd(V1, SP, return_off  *wordSize);  // return address
++  __ sd(FP, SP, fp_off  *wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ daddiu(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++
++  {
++    long save_pc = (long)__ pc() + 48;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ move(A0, thread);
++  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
++  __ jalr(T9);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map( __ offset(), map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T9, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T9: exception handler
++  // A1: exception pc
++  __ jr(T9);
++  __ delayed()->nop();
++
++  // make sure all code is generated
++  masm->flush();
++
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp
+new file mode 100644
+index 0000000000..daf04c4422
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp
+@@ -0,0 +1,3816 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "prims/jvmtiRedefineClassesTrace.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(v1)
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(a6)
++    DEF_LAYOUT_OFFS(a7)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    DEF_LAYOUT_OFFS(s6)
++    DEF_LAYOUT_OFFS(s7)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++
++    DEF_LAYOUT_OFFS(gp)
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++  static int raOffset(void) { return return_off / 2; }
++  //Rmethod
++  static int methodOffset(void) { return s3_off / 2; }
++
++  static int v0Offset(void) { return v0_off / 2; }
++  static int v1Offset(void) { return v1_off / 2; }
++
++  static int fpResultOffset(void) { return fpr0_off / 2; }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++
++  __ daddiu(SP, SP, - reg_save_size * jintSize);
++
++  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
++  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
++  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
++  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
++  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
++  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
++  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
++  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
++  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
++  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
++  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
++  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
++  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
++  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
++  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
++  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
++  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
++  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
++  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
++  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
++  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
++  __ sd(T0, SP, t0_off * jintSize);
++  __ sd(T1, SP, t1_off * jintSize);
++  __ sd(T2, SP, t2_off * jintSize);
++  __ sd(T3, SP, t3_off * jintSize);
++  __ sd(S0, SP, s0_off * jintSize);
++  __ sd(S1, SP, s1_off * jintSize);
++  __ sd(S2, SP, s2_off * jintSize);
++  __ sd(S3, SP, s3_off * jintSize);
++  __ sd(S4, SP, s4_off * jintSize);
++  __ sd(S5, SP, s5_off * jintSize);
++  __ sd(S6, SP, s6_off * jintSize);
++  __ sd(S7, SP, s7_off * jintSize);
++
++  __ sd(T8, SP, t8_off * jintSize);
++  __ sd(T9, SP, t9_off * jintSize);
++
++  __ sd(GP, SP, gp_off * jintSize);
++  __ sd(FP, SP, fp_off * jintSize);
++  __ sd(RA, SP, return_off * jintSize);
++  __ daddiu(FP, SP, fp_off * jintSize);
++
++  OopMapSet *oop_maps = new OopMapSet();
++  //OopMap* map =  new OopMap( frame_words, 0 );
++  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
++
++
++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
++  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
++  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
++  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
++  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
++  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
++  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
++  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
++  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
++  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
++  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
++  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
++  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
++  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
++  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
++  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
++
++  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
++  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
++  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
++  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
++  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
++  __ ld(T0, SP, t0_off * jintSize);
++  __ ld(T1, SP, t1_off * jintSize);
++  __ ld(T2, SP, t2_off * jintSize);
++  __ ld(T3, SP, t3_off * jintSize);
++  __ ld(S0, SP, s0_off * jintSize);
++  __ ld(S1, SP, s1_off * jintSize);
++  __ ld(S2, SP, s2_off * jintSize);
++  __ ld(S3, SP, s3_off * jintSize);
++  __ ld(S4, SP, s4_off * jintSize);
++  __ ld(S5, SP, s5_off * jintSize);
++  __ ld(S6, SP, s6_off * jintSize);
++  __ ld(S7, SP, s7_off * jintSize);
++
++  __ ld(T8, SP, t8_off * jintSize);
++  __ ld(T9, SP, t9_off * jintSize);
++
++  __ ld(GP, SP, gp_off * jintSize);
++  __ ld(FP, SP, fp_off * jintSize);
++  __ ld(RA, SP, return_off * jintSize);
++
++  __ addiu(SP, SP, reg_save_size * jintSize);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld(V0, SP, v0_off * jintSize);
++  __ ld(V1, SP, v1_off * jintSize);
++  __ ldc1(F0, SP, fpr0_off * jintSize);
++  __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ addiu(SP, SP, return_off * jintSize);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    T0, A0, A1, A2, A3, A4, A5, A6
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ verify_oop(Rmethod);
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  __ delayed()->nop();
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // V0 isn't live so capture return address while we easily can
++  __ move(V0, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, V0);
++  // we should preserve the return address
++  __ verify_oop(Rmethod);
++  __ move(S0, SP);
++  __ move(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ delayed()->nop();
++  __ move(SP, S0);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(V0, RA);
++  // set senderSP value
++  //refer to interpreter_mips.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addiu(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ sd(r, SP, st_off);
++      } else {
++        //FIXME, mips will not enter here
++        // long/double in gpr
++        __ sd(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ sd(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ swc1(fr, SP, st_off);
++      else {
++        __ sdc1(fr, SP, st_off);
++        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, V0);
++  __ jr (AT);
++  __ delayed()->nop();
++}
++
++static void gen_i2c_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the mips side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++
++  __ move(T9, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = V0;
++  __ move(saved_sp, T9);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld(AT, saved_sp, ld_off);
++        __ sd(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld(AT, saved_sp, ld_off - 8);
++        __ sd(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on mips)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld(r, saved_sp, ld_off - 8);
++      } else {
++        __ lw(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ lwc1(fr, saved_sp, ld_off);
++      else {
++          __ ldc1(fr, saved_sp, ld_off);
++          __ ldc1(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++  __ get_thread(T8);
++  __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to V0 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in V0 (c2) because c2's
++  // resolve stubs return the result (the method) in V0.
++  // I'd love to fix this.
++  __ move(V0, Rmethod);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    __ verify_oop(holder);
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++    __ verify_oop(temp);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    __ delayed()->nop();
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ delayed()->nop();
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on MIPS");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               fp[16] => sp[8]
++// 10: L    // stdout              fp[24] => sp[16]
++// 11: L    // stderr              fp[32] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ swc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ sdc1(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ sd(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ sd(V0, FP, -wordSize);
++      break;
++    default: {
++      __ sw(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ lwc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ ldc1(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld(V0, FP, -wordSize);
++      break;
++    default: {
++      __ lw(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  //FIXME, for mips, dst can be register
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = V0;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++    //if dst is register
++    //FIXME, do mips need out preserve stack slots?
++    int offset_in_older_frame = src.first()->reg2stack()
++      + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame
++          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = V0;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ sd( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++    //if dst is register
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sw(AT, SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++  } else {
++    // reg to stack
++    if(dst.first()->is_stack())
++      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    else
++      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    }
++
++  } else {
++    // reg to stack
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal(err_msg_res("unexpected intrinsic id %d", iid));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                methodHandle method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    // Make enough room for patch_verified_entry
++    __ nop();
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = method->critical_native_function();
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_mips.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T9, receiver);
++  __ beq(T9, ic_reg, hit);
++  __ delayed()->nop();
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
++  }
++
++  // Generate a new frame for the wrapper.
++  // do mips need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and mips doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ sd( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  intptr_t the_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(the_pc - start, map);
++
++  __ set_last_Java_frame(SP, noreg, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)the_pc ;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T9;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ move(swap_reg, 1);
++
++    __ ld(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ sd( swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
++    __ bne(AT, R0, lock_done);
++    __ delayed()->nop();
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ dsubu(swap_reg, swap_reg, SP);
++    __ move(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    __ delayed()->nop();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addiu(AT, R0, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addiu(AT, R0, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    __ li(AT, SafepointSynchronize::address_of_state());
++    __ lw(A0, AT, 0);
++    __ addiu(AT, A0, -SafepointSynchronize::_not_synchronized);
++    Label L;
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(L);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addiu(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addiu(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++      __ delayed()->nop();
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addiu(AT, R0, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addiu(AT, AT, -JavaThread::stack_guard_yellow_disabled);
++  __ beq(AT, R0, reguard);
++  __ delayed()->nop();
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    __ delayed()->nop();
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
++
++    __ beq(AT, R0, slow_path_unlock);
++    __ delayed()->nop();
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T9);
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++    __ delayed()->nop();
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  __ delayed()->nop();
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, - 3*wordSize);
++
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++                __ move(SP, S2);
++    __ addiu(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    __ delayed()->nop();
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++                __ move(AT, -(StackAlignmentInBytes));
++                __ move(S2, SP);     // use S2 as a sender SP holder
++                __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addiu(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ addiu(SP,SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ addiu(SP, SP, 2*wordSize);
++                __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    __ delayed()->nop();
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++  __ delayed()->nop();
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++
++  return nm;
++
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++    __ delayed()->nop();
++
++    __ jump_to(ic_miss, 0);
++    __ delayed()->nop();
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++            __ delayed()->mov(G0, tmp);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ br_null(O0, false, Assembler::pn, skip);
++        __ delayed()->addu(FP, L2_string_off, O1);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++        __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ ret();
++  __ delayed()->restore();
++
++  __ flush();
++
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++  // Correct the return address we were given.
++  //FIXME, return address is on the tos or Ra?
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ move(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ move(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++  __ get_thread(thread);
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ move(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++  __ get_thread(thread);
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ delayed()->nop();
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ addiu(SP, SP, -additional_words  * wordSize);
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call((address)Deoptimization::fetch_unroll_info);
++  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addiu(SP, SP, additional_words * wordSize);
++  __ get_thread(thread);
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ sw(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ move(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ delayed()->nop();
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
++  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  RegisterSaver::restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addu(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addiu(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ subu(SP, SP, AT);
++
++  // Push interpreter frames in a loop
++  //
++  //Loop:
++  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
++  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
++  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
++  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
++  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
++  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
++  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
++  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
++  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
++  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
++  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
++  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
++  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
++  //
++  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ subu(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addiu(count, count, -1);   // decrement counter
++  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ bne(count, R0, loop);
++  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
++  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
++  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
++
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++  __ get_thread(thread);
++  __ move(A0, thread);  // thread
++  __ addiu(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
++
++  __ push(V0);
++
++  __ get_thread(thread);
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
++  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
++  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
++  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++
++  // Push self-frame.
++  __ daddiu(SP, SP, -framesize * BytesPerInt);
++
++  __ sd(RA, SP, return_off * BytesPerInt);
++  __ sd(FP, SP, fp_off * BytesPerInt);
++
++  __ daddiu(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    long save_pc = (long)__ pc() + 52;
++    __ patchable_set48(AT, (long)save_pc);
++    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ patchable_call((address)Deoptimization::uncommon_trap);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  //oop_maps->add_gc_map( __ offset(), true, map);
++  oop_maps->add_gc_map( __ offset(),  map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ daddiu(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ daddu(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T9;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ dsubu(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);          // Load frame size
++  __ ld(AT, pcs, 0);           // save return address
++  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ dsubu(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ daddiu(count, count, -1);    // decrement counter
++  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ delayed()->nop();      // Bump array pointer (pcs)
++
++  __ ld(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(NOREG, FP, the_pc);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ move(A1, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::unpack_frames);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  address call_pc = NULL;
++  bool cause_return = (pool_type == POLL_AT_RETURN);
++  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
++
++  // If cause_return is true we are at a poll_return and there is
++  // the return address in RA to the caller on the nmethod
++  // that is safepoint. We can leave this return in RA and
++  // effectively complete the return and safepoint in the caller.
++  // Otherwise we load exception pc to RA.
++  __ push(thread);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if(!cause_return) {
++    __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
++  }
++
++  __ pop(thread);
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++
++  // Do the call
++  __ call(call_ptr);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ offset(),  map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++  __ delayed()->nop();
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ patchable_jump((address)StubRoutines::forward_exception_entry());
++
++  // No exception case
++  __ bind(noException);
++  // Normal exit, register restoring and exit
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  int start = __ offset();
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++
++  const Register thread = T8;
++  __ get_thread(thread);
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(noreg, FP, NULL);
++  //align the stack before invoke native
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ call(destination);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map( __ offset() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++  __ get_thread(thread);
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  __ delayed()->nop();
++  // get the returned Method*
++  //FIXME, do mips need this ?
++  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(V0);
++  __ delayed()->nop();
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ get_thread(thread);
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //
++  // make sure all code is generated
++  masm->flush();
++
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long borrow = 0, t = 0;
++  unsigned long tmp0, tmp1;
++  __asm__ __volatile__ (
++    "0:                                            \n"
++    "ld      %[tmp0],     0(%[a])                  \n"
++    "ld      %[tmp1],     0(%[b])                  \n"
++    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
++    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
++    "or      %[borrow],   %[borrow],   %[t]        \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
++    "sd      %[tmp0],     0(%[a])                  \n"
++    "daddiu  %[a],        %[a],         8          \n"
++    "daddiu  %[b],        %[b],         8          \n"
++    "daddiu  %[len],      %[len],      -1          \n"
++    "bgtz    %[len],      0b                       \n"
++    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++  );
++  return tmp0;
++}
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of t0, t1, and t2.
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void __attribute__((noinline))
++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
++                    unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void __attribute__((noinline))
++montgomery_square(unsigned long a[], unsigned long n[],
++                  unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static unsigned long swap(unsigned long x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(unsigned long *s, unsigned long *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++// Doesn't seem to be relevant for MIPS64 so we use the same value.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)b_ints, b, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
++  }
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
+diff --git a/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp
+new file mode 100644
+index 0000000000..aeb797faf9
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp
+@@ -0,0 +1,2147 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/top.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++static address handle_unsafe_access() {
++  JavaThread* thread = JavaThread::current();
++  address pc = thread->saved_exception_pc();
++  // pc is the instruction which we must emulate
++  // doing a no-op is fine:  return garbage from the load
++  // therefore, compute npc
++  address npc = (address)((unsigned long)pc + sizeof(unsigned int));
++
++  // request an async exception
++  thread->set_pending_unsafe_access_error();
++
++  // return address of next instruction to execute
++  return npc;
++}
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // ABI mips n64
++  // This fig is not MIPS ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  //  n64 does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for GP.
++  // GP will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             =  1,
++    FP_off             =  0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    GP_off             = -14,
++ };
++
++  address generate_call_stub(address& return_address) {
++
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    __ daddiu(SP, SP, total_off * wordSize);
++    __ sd(BCP, FP, BCP_off * wordSize);
++    __ sd(LVP, FP, LVP_off * wordSize);
++    __ sd(TSR, FP, TSR_off * wordSize);
++    __ sd(S1, FP, S1_off * wordSize);
++    __ sd(S3, FP, S3_off * wordSize);
++    __ sd(S4, FP, S4_off * wordSize);
++    __ sd(S5, FP, S5_off * wordSize);
++    __ sd(S6, FP, S6_off * wordSize);
++    __ sd(A0, FP, call_wrapper_off * wordSize);
++    __ sd(A1, FP, result_off * wordSize);
++    __ sd(A2, FP, result_type_off * wordSize);
++    __ sd(A7, FP, thread_off * wordSize);
++    __ sd(GP, FP, GP_off * wordSize);
++
++    __ set64(GP, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ delayed()->nop();
++    __ dsll(AT, A6, Interpreter::logStackElementSize);
++    __ dsubu(SP, SP, AT);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP , AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ dsll(T3, T0, LogBytesPerWord);
++    __ daddu(T3, T3, A5);
++    __ ld(AT, T3,  -wordSize);
++    __ dsll(T3, T2, LogBytesPerWord);
++    __ daddu(T3, T3, SP);
++    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ daddiu(T2, T2, 1);
++    __ daddiu(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    __ delayed()->nop();
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    __ delayed()->nop();
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ daddiu(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++    __ delayed()->nop();
++
++    // handle T_INT case
++    __ sd(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld(BCP, FP, BCP_off * wordSize);
++    __ ld(LVP, FP, LVP_off * wordSize);
++    __ ld(GP, FP, GP_off * wordSize);
++    __ ld(TSR, FP, TSR_off * wordSize);
++
++    __ ld(S1, FP, S1_off * wordSize);
++    __ ld(S3, FP, S3_off * wordSize);
++    __ ld(S4, FP, S4_off * wordSize);
++    __ ld(S5, FP, S5_off * wordSize);
++    __ ld(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ sd(V0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_float);
++    __ swc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_double);
++    __ sdc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++    //FIXME, 1.6 mips version add operation of fpu here
++    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T9
++    __ ld(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T9, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T9: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T9);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // The following routine generates a subroutine to throw an
++  // asynchronous UnknownError when an unsafe access gets a fault that
++  // could not be reasonably prevented by the programmer.  (Example:
++  // SIGBUS/OBJERR.)
++  address generate_handler_for_unsafe_access() {
++    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
++    address start = __ pc();
++    __ push(V0);
++    __ pushad_except_v0();                      // push registers
++    __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ popad_except_v0();
++    __ move(RA, V0);
++    __ pop(V0);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     A0    -  array1
++  //     A1    -  array2
++  //     A2    -  element count
++  //
++
++ // use T9 as temp
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    __ dsll(AT, A2, sf);
++    __ daddu(AT, AT, A0);
++    __ daddiu(T9, AT, -elem_size);
++    __ dsubu(AT, A1, A0);
++    __ blez(AT, no_overlap_target);
++    __ delayed()->nop();
++    __ dsubu(AT, A1, T9);
++    __ bgtz(AT, no_overlap_target);
++    __ delayed()->nop();
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ bgez(A0, L);
++    __ delayed()->nop();
++    __ bgtz(A1, no_overlap_target);
++    __ delayed()->nop();
++    __ bind(L);
++
++  }
++
++  //
++  //  Generate store check for array
++  //
++  //  Input:
++  //     T0    -  starting address
++  //     T1    -  element count
++  //
++  //  The 2 input registers are overwritten
++  //
++
++
++  void array_store_check(Register tmp) {
++    assert_different_registers(tmp, AT, T0, T1);
++    BarrierSet* bs = Universe::heap()->barrier_set();
++    assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
++    CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++    assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++    Label l_0;
++
++    if (UseConcMarkSweepGC) __ sync();
++
++    __ set64(tmp, (long)ct->byte_map_base);
++
++    __ dsll(AT, T1, TIMES_OOP);
++    __ daddu(AT, T0, AT);
++    __ daddiu(T1, AT, - BytesPerHeapOop);
++
++    __ shr(T0, CardTableModRefBS::card_shift);
++    __ shr(T1, CardTableModRefBS::card_shift);
++
++    __ dsubu(T1, T1, T0);   // end --> cards count
++    __ bind(l_0);
++
++    __ daddu(AT, tmp, T0);
++    if (UseLEXT1) {
++      __ gssbx(R0, AT, T1, 0);
++    } else {
++      __ daddu(AT, AT, T1);
++      __ sb(R0, AT, 0);
++    }
++
++    __ bgtz(T1, l_0);
++    __ delayed()->daddiu(T1, T1, - 1);
++  }
++
++  // Generate code for an array write pre barrier
++  //
++  //     addr    -  starting address
++  //     count   -  element count
++  //     tmp     - scratch register
++  //
++  //     Destroy no registers!
++  //
++  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
++    BarrierSet* bs = Universe::heap()->barrier_set();
++    switch (bs->kind()) {
++      case BarrierSet::G1SATBCT:
++      case BarrierSet::G1SATBCTLogging:
++        // With G1, don't generate the call if we statically know that the target in uninitialized
++        if (!dest_uninitialized) {
++           __ pushad();                      // push registers
++           if (count == A0) {
++             if (addr == A1) {
++               // exactly backwards!!
++               //__ xchgptr(c_rarg1, c_rarg0);
++               __ move(AT, A0);
++               __ move(A0, A1);
++               __ move(A1, AT);
++             } else {
++               __ move(A1, count);
++               __ move(A0, addr);
++             }
++           } else {
++             __ move(A0, addr);
++             __ move(A1, count);
++           }
++           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
++           __ popad();
++        }
++        break;
++      case BarrierSet::CardTableModRef:
++      case BarrierSet::CardTableExtension:
++      case BarrierSet::ModRef:
++        break;
++      default:
++        ShouldNotReachHere();
++
++    }
++  }
++
++  //
++  // Generate code for an array write post barrier
++  //
++  //  Input:
++  //     start    - register containing starting address of destination array
++  //     count    - elements count
++  //     scratch  - scratch register
++  //
++  //  The input registers are overwritten.
++  //
++  void  gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
++    assert_different_registers(start, count, scratch, AT);
++    BarrierSet* bs = Universe::heap()->barrier_set();
++    switch (bs->kind()) {
++      case BarrierSet::G1SATBCT:
++      case BarrierSet::G1SATBCTLogging:
++        {
++          __ pushad();             // push registers (overkill)
++          if (count == A0) {
++            if (start == A1) {
++              // exactly backwards!!
++              //__ xchgptr(c_rarg1, c_rarg0);
++              __ move(AT, A0);
++              __ move(A0, A1);
++              __ move(A1, AT);
++            } else {
++              __ move(A1, count);
++              __ move(A0, start);
++            }
++          } else {
++            __ move(A0, start);
++            __ move(A1, count);
++          }
++          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
++          __ popad();
++        }
++        break;
++      case BarrierSet::CardTableModRef:
++      case BarrierSet::CardTableExtension:
++        {
++          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
++          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
++
++          Label L_loop;
++          const Register end = count;
++
++          if (UseConcMarkSweepGC) __ sync();
++
++          int64_t disp = (int64_t) ct->byte_map_base;
++          __ set64(scratch, disp);
++
++          __ lea(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
++          __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++          __ shr(start, CardTableModRefBS::card_shift);
++          __ shr(end,   CardTableModRefBS::card_shift);
++          __ dsubu(end, end, start); // end --> cards count
++
++          __ daddu(start, start, scratch);
++
++          __ bind(L_loop);
++          if (UseLEXT1) {
++            __ gssbx(R0, start, count, 0);
++          } else {
++            __ daddu(AT, start, count);
++            __ sb(R0, AT, 0);
++          }
++          __ daddiu(count, count, -1);
++          __ slt(AT, count, R0);
++          __ beq(AT, R0, L_loop);
++          __ delayed()->nop();
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
++    Label l_debug;
++
++    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
++    __ blez(AT, l_9);
++    __ delayed()->nop();
++
++    if (!aligned) {
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
++      __ delayed()->nop();
++
++      __ andi(AT, tmp1, 1);
++      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
++      __ delayed()->nop();
++
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_10);
++
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 2 elements if necessary to align to 4 bytes.
++      __ andi(AT, tmp1, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -2);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 4 elements if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -4);
++      __ sw(AT, tmp2, 0);
++      { // FasterArrayCopy
++        __ daddiu(tmp1, tmp1, 4);
++        __ daddiu(tmp2, tmp2, 4);
++      }
++    }
++
++    __ bind(l_7);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      // For Loongson, there is 128-bit memory access. TODO
++      __ ld(AT, tmp1, 0);
++      __ sd(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_8);
++      __ delayed()->nop();
++    }
++    __ bind(l_6);
++
++    // copy 4 bytes at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_1);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_3);
++      __ delayed()->nop();
++
++    }
++
++    // do 2 bytes copy
++    __ bind(l_1);
++    {
++      __ daddiu(AT, tmp3, -1);
++      __ blez(AT, l_9);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(AT, tmp3, -2);
++      __ bgez(AT, l_5);
++      __ delayed()->nop();
++    }
++
++    //do 1 element copy--byte
++    __ bind(l_9);
++    __ beq(R0, tmp3, l_4);
++    __ delayed()->nop();
++
++    {
++      __ bind(l_11);
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_4);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0   - source array address
++  //   A1   - destination array address
++  //   A2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
++    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 0);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ daddu(end_from, from, end_count);
++    __ daddu(end_to, to, end_count);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_byte);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    __ bind(l_from_unaligned);
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ move(T8, end_count);
++    __ daddiu(AT, end_count, -3);
++    __ blez(AT, l_copy_suffix);
++    __ delayed()->nop();
++
++    //__ andi(T8, T8, 3);
++    __ lea(end_from, Address(end_from, -4));
++    __ lea(end_to, Address(end_to, -4));
++
++    __ dsrl(end_count, end_count, 2);
++    __ align(16);
++    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
++    __ lw(AT, end_from, 0);
++    __ sw(AT, end_to, 0);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -1);
++    __ bne(end_count, R0, l_copy_4_bytes_loop);
++    __ delayed()->nop();
++
++    __ b(l_copy_suffix);
++    __ delayed()->nop();
++    // copy dwords aligned or not with repeat move
++    // l_copy_suffix
++    // copy suffix (0-3 bytes)
++    __ bind(l_copy_suffix);
++    __ andi(T8, T8, 3);
++    __ beq(T8, R0, l_exit);
++    __ delayed()->nop();
++    __ addiu(end_from, end_from, 3);
++    __ addiu(end_to, end_to, 3);
++    __ bind(l_copy_suffix_loop);
++    __ lb(AT, end_from, 0);
++    __ sb(AT, end_to, 0);
++    __ addiu(end_from, end_from, -1);
++    __ addiu(end_to, end_to, -1);
++    __ addiu(T8, T8, -1);
++    __ bne(T8, R0, l_copy_suffix_loop);
++    __ delayed()->nop();
++
++    __ bind(l_copy_byte);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_byte);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Generate stub for disjoint short copy.  If "aligned" is true, the
++  // "from" and "to" addresses are assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //      from:  A0
++  //      to:    A1
++  //  elm.count: A2 treated as signed
++  //  one element: 2 bytes
++  //
++  // Strategy for aligned==true:
++  //
++  //  If length <= 9:
++  //     1. copy 1 elements at a time (l_5)
++  //
++  //  If length > 9:
++  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
++  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
++  //     3. copy last element if one was left in step 2. (l_1)
++  //
++  //
++  // Strategy for aligned==false:
++  //
++  //  If length <= 9: same as aligned==true case
++  //
++  //  If length > 9:
++  //     1. continue with step 7. if the alignment of from and to mod 4
++  //        is different.
++  //     2. align from and to to 4 bytes by copying 1 element if necessary
++  //     3. at l_2 from and to are 4 byte aligned; continue with
++  //        6. if they cannot be aligned to 8 bytes because they have
++  //        got different alignment mod 8.
++  //     4. at this point we know that both, from and to, have the same
++  //        alignment mod 8, now copy one element if necessary to get
++  //        8 byte alignment of from and to.
++  //     5. copy 4 elements at a time until less than 4 elements are
++  //        left; depending on step 3. all load/stores are aligned.
++  //     6. copy 2 elements at a time until less than 2 elements are
++  //        left. (l_6)
++  //     7. copy 1 element at a time. (l_5)
++  //     8. copy last element if one was left in step 6. (l_1)
++
++  address generate_disjoint_short_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T8;
++    Register tmp5 = T9;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    // don't try anything fancy if arrays don't have many elements
++    __ daddiu(AT, tmp3, -23);
++    __ blez(AT, l_14);
++    __ delayed()->nop();
++    // move push here
++    __ push(tmp4);
++    __ push(tmp5);
++    __ push(tmp6);
++
++    if (!aligned) {
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
++      __ delayed()->nop();
++
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 1 element if necessary to align to 4 bytes.
++      __ andi(AT, A0, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 2-element word if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++    }// end of if (!aligned)
++
++    __ bind(l_7);
++    // At this time the position of both, from and to, are at least 8 byte aligned.
++    // Copy 8 elemnets at a time.
++    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
++    __ xorr(AT, tmp1, tmp2);
++    __ andi(AT, AT, 15);
++    __ bne(AT, R0, l_9);
++    __ delayed()->nop();
++
++    // Copy 4-element word if necessary to align to 16 bytes,
++    __ andi(AT, tmp1, 15);
++    __ beq(AT, R0, l_10);
++    __ delayed()->nop();
++
++    __ ld(AT, tmp1, 0);
++    __ daddiu(tmp3, tmp3, -4);
++    __ sd(AT, tmp2, 0);
++    __ daddiu(tmp1, tmp1, 8);
++    __ daddiu(tmp2, tmp2, 8);
++
++    __ bind(l_10);
++
++    // Copy 8 elements at a time; either the loads or the stores can
++    // be unalligned if aligned == false
++
++    { // FasterArrayCopy
++      __ bind(l_11);
++      // For loongson the 128-bit memory access instruction is gslq/gssq
++      if (UseLEXT1) {
++        __ gslq(AT, tmp4, tmp1, 0);
++        __ gslq(tmp5, tmp6, tmp1, 16);
++        __ daddiu(tmp1, tmp1, 32);
++        __ daddiu(tmp2, tmp2, 32);
++        __ gssq(AT, tmp4, tmp2, -32);
++        __ gssq(tmp5, tmp6, tmp2, -16);
++      } else {
++        __ ld(AT, tmp1, 0);
++        __ ld(tmp4, tmp1, 8);
++        __ ld(tmp5, tmp1, 16);
++        __ ld(tmp6, tmp1, 24);
++        __ daddiu(tmp1, tmp1, 32);
++        __ sd(AT, tmp2, 0);
++        __ sd(tmp4, tmp2, 8);
++        __ sd(tmp5, tmp2, 16);
++        __ sd(tmp6, tmp2, 24);
++        __ daddiu(tmp2, tmp2, 32);
++      }
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++    __ bind(l_9);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
++      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      __ ld(AT, tmp1, 0);
++      __ ld(tmp4, tmp1, 8);
++      __ ld(tmp5, tmp1, 16);
++      __ ld(tmp6, tmp1, 24);
++      __ sd(AT, tmp2, 0);
++      __ sd(tmp4, tmp2, 8);
++      __ sd(tmp5, tmp2,16);
++      __ daddiu(tmp1, tmp1, 32);
++      __ daddiu(tmp2, tmp2, 32);
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_8);
++      __ delayed()->sd(tmp6, tmp2, -8);
++    }
++    __ bind(l_6);
++
++    // copy 2 element at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ lw(tmp4, tmp1, 4);
++      __ lw(tmp5, tmp1, 8);
++      __ lw(tmp6, tmp1, 12);
++      __ sw(AT, tmp2, 0);
++      __ sw(tmp4, tmp2, 4);
++      __ sw(tmp5, tmp2, 8);
++      __ daddiu(tmp1, tmp1, 16);
++      __ daddiu(tmp2, tmp2, 16);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_3);
++      __ delayed()->sw(tmp6, tmp2, -4);
++    }
++
++    __ bind(l_1);
++    // do single element copy (8 bit), can this happen?
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ lhu(tmp4, tmp1, 2);
++      __ lhu(tmp5, tmp1, 4);
++      __ lhu(tmp6, tmp1, 6);
++      __ sh(AT, tmp2, 0);
++      __ sh(tmp4, tmp2, 2);
++      __ sh(tmp5, tmp2, 4);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_5);
++      __ delayed()->sh(tmp6, tmp2, -2);
++    }
++    // single element
++    __ bind(l_4);
++
++    __ pop(tmp6);
++    __ pop(tmp5);
++    __ pop(tmp4);
++
++    __ bind(l_14);
++    { // FasterArrayCopy
++      __ beq(R0, tmp3, l_13);
++      __ delayed()->nop();
++
++      __ bind(l_12);
++      __ lhu(AT, tmp1, 0);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_12);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_13);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++            StubRoutines::jshort_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 1);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ sll(AT, end_count, Address::times_2);
++    __ daddu(end_from, from, AT);
++    __ daddu(end_to, to, AT);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_short);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // Copy 1 element if necessary to align to 4 bytes.
++    __ bind(l_from_unaligned);
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ daddiu(AT, end_count, -1);
++    __ blez(AT, l_copy_short);
++    __ delayed()->nop();
++
++    __ lw(AT, end_from, -4);
++    __ sw(AT, end_to, -4);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -2);
++    __ b(l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // copy 1 element at a time
++    __ bind(l_copy_short);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_short);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4, l_5, l_6, l_7;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    if (is_oop) {
++      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
++    }
++
++    if(!aligned) {
++      __ xorr(AT, T3, T0);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
++      __ delayed()->nop();
++
++      __ andi(AT, T3, 7);
++      __ beq(AT, R0, l_6); //copy 2 elements each time
++      __ delayed()->nop();
++
++      __ lw(AT, T3, 0);
++      __ daddiu(T1, T1, -1);
++      __ sw(AT, T0, 0);
++      __ daddiu(T3, T3, 4);
++      __ daddiu(T0, T0, 4);
++    }
++
++    {
++      __ bind(l_6);
++      __ daddiu(AT, T1, -1);
++      __ blez(AT, l_5);
++      __ delayed()->nop();
++
++      __ bind(l_7);
++      __ ld(AT, T3, 0);
++      __ sd(AT, T0, 0);
++      __ daddiu(T3, T3, 8);
++      __ daddiu(T0, T0, 8);
++      __ daddiu(T1, T1, -2);
++      __ daddiu(AT, T1, -2);
++      __ bgez(AT, l_7);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_5);
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, 4);
++    __ addiu(T0, T0, 4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    if (is_oop) {
++      gen_write_ref_array_post_barrier(A1, A2, T1);
++    }
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jint_disjoint_arraycopy() :
++              StubRoutines::jint_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 2);
++
++    if (is_oop) {
++      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
++    }
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -4);
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -4);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, -4);
++    __ addiu(T0, T0, -4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    __ bind(l_4);
++    if (is_oop) {
++      gen_write_ref_array_post_barrier(A1, A2, T1);
++    }
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    if (is_oop) {
++      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
++    }
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    if (is_oop) {
++      gen_write_ref_array_post_barrier(A1, A2, T1);
++    }
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++              StubRoutines::jlong_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 3);
++
++    if (is_oop) {
++      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
++    }
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0, -8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    if (is_oop) {
++      gen_write_ref_array_post_barrier(A1, A2, T1);
++    }
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  //FIXME
++  address generate_disjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++
++  address generate_conjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++      StubRoutines::jlong_disjoint_arraycopy();
++    array_overlap_test(nooverlap_target, 3);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0,-8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ addu(V0,A1,R0);
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ sd(S0, SP, S0_off * wordSize);
++    __ sd(S1, SP, S1_off * wordSize);
++    __ sd(S2, SP, S2_off * wordSize);
++    __ sd(S3, SP, S3_off * wordSize);
++    __ sd(S4, SP, S4_off * wordSize);
++    __ sd(S5, SP, S5_off * wordSize);
++    __ sd(S6, SP, S6_off * wordSize);
++    __ sd(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ sd(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    __ set_last_Java_frame(java_thread, SP, FP, NULL);
++    // Align stack
++    __ set64(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    __ relocate(relocInfo::internal_pc_type);
++    {
++      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++      __ patchable_set48(AT, save_pc);
++    }
++    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++    // Call runtime
++    __ call(runtime_entry);
++    __ delayed()->nop();
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(__ offset(),  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld(S0, SP, S0_off * wordSize);
++    __ ld(S1, SP, S1_off * wordSize);
++    __ ld(S2, SP, S2_off * wordSize);
++    __ ld(S3, SP, S3_off * wordSize);
++    __ ld(S4, SP, S4_off * wordSize);
++    __ ld(S5, SP, S5_off * wordSize);
++    __ ld(S6, SP, S6_off * wordSize);
++    __ ld(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
++
++    StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
++                                                                              CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubRoutines::_montgomeryMultiply
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      StubRoutines::_montgomerySquare
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++    }
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp
+new file mode 100644
+index 0000000000..733a48b889
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
+diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp
+new file mode 100644
+index 0000000000..920c08844e
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
++};
++
++class gs2 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+diff --git a/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp
+new file mode 100644
+index 0000000000..a83c3728f8
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
++#define CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
++
++ protected:
++
++ void generate_fixed_frame(bool native_call);
++
++ // address generate_asm_interpreter_entry(bool synchronized);
++
++#endif // CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp
+new file mode 100644
+index 0000000000..204f1b2f21
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
++#define CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
++
++
++  protected:
++
++  // Size of interpreter code.  Increase if too small.  Interpreter will
++  // fail with a guarantee ("not enough space for interpreter generation");
++  // if too small.
++  // Run with +PrintInterpreter to get the VM to print out the size.
++  // Max size with JVMTI
++  // The sethi() instruction generates lots more instructions when shell
++  // stack limit is unlimited, so that's why this is much bigger.
++  const static int InterpreterCodeSize = 500 * K;
++
++#endif // CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp
+new file mode 100644
+index 0000000000..0cc5d33070
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp
+@@ -0,0 +1,2306 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterGenerator.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef CC_INTERP
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++
++const int Interpreter::return_sentinel = 0xfeedbeed;
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcx_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bgez(T1, L);     // check if frame is complete
++    __ delayed()->nop();
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
++        const char* name) {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ li(A1, (long)name);
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  __ delayed()->nop();
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ dispatch_next(state);
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T9
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T9;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T9;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ dsll(AT, flags, Interpreter::stackElementScale());
++  __ daddu(SP, SP, AT);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  __ dispatch_next(state, step);
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    {
++       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++                 break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// Rmethod: method
++// T3     : invocation counter
++//
++void InterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(FSR, Address(Rmethod, Method::method_data_offset()));
++      __ beq(FSR, R0, no_mdo);
++      __ delayed()->nop();
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++      __ beq(R0, R0, done);
++      __ delayed()->nop();
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(FSR,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(Rmethod, FSR, done);
++    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++    __ bind(done);
++  } else {
++    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++
++    __ get_method_counters(Rmethod, FSR, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++      __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++      __ incrementl(T9, 1);
++      __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ lw(T3, invocation_counter);
++    __ increment(T3, InvocationCounter::count_increment);
++    __ sw(T3, invocation_counter);  // save invocation count
++
++    __ lw(FSR, backedge_counter);  // load backedge counter
++    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++    __ andr(FSR, FSR, AT);
++
++    __ daddu(T3, T3, FSR);          // add both counters
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++      } else {
++        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++        __ lw(AT, AT, 0);
++        __ slt(AT, T3, AT);
++      }
++
++      __ bne_far(AT, R0, *profile_method_continue);
++      __ delayed()->nop();
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(FSR, *profile_method);
++    }
++
++    if (Assembler::is_simm16(CompileThreshold)) {
++      __ srl(AT, T3, InvocationCounter::count_shift);
++      __ slti(AT, AT, CompileThreshold);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++      __ lw(AT, AT, 0);
++      __ slt(AT, T3, AT);
++    }
++
++    __ beq_far(AT, R0, *overflow);
++    __ delayed()->nop();
++    __ bind(done);
++  }
++}
++
++void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(*do_continue);
++  __ delayed()->nop();
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void InterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ slt(AT, AT, T2);
++  __ beq(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ dsll(T3, T2, Interpreter::stackElementScale());
++  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ delayed()->nop();
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ delayed()->nop();
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++
++  // add in the redzone and yellow size
++  __ move(AT, (StackRedPages+StackYellowPages) * page_size);
++  __ addu(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ slt(AT, T3, SP);
++  __ bne(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void InterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ delayed()->nop();
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ delayed()->nop();
++    __ ld(T0, Rmethod, in_bytes(Method::const_offset()));
++    __ ld(T0, T0, in_bytes(ConstMethod::constants_offset()));
++    __ ld(T0, T0, ConstantPool::pool_holder_offset_in_bytes());
++    __ ld(T0, T0, mirror_offset);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 9;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ daddiu(SP, SP, (-frame_size) * wordSize);
++  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
++  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ delayed()->nop();
++    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ sd(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ sd(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Call an accessor method (assuming it is resolved, otherwise drop
++// into vanilla (slow path) entry
++address InterpreterGenerator::generate_accessor_entry(void) {
++
++  // Rmethod: Method*
++  // V0: receiver (preserve for slow entry into asm interpreter)
++  //  Rsender: senderSP must preserved for slow path, set SP to it on fast path
++
++  address entry_point = __ pc();
++  Label xreturn_path;
++  // do fastpath for resolved accessor methods
++  if (UseFastAccessorMethods) {
++    Label slow_path;
++    __ li(T2, SafepointSynchronize::address_of_state());
++    __ lw(AT, T2, 0);
++    __ daddiu(AT, AT, -(SafepointSynchronize::_not_synchronized));
++    __ bne(AT, R0, slow_path);
++    __ delayed()->nop();
++    // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof;
++    // parameter size = 1
++    // Note: We can only use this code if the getfield has been resolved
++    //       and if we don't have a null-pointer exception => check for
++    //       these conditions first and use slow path if necessary.
++    // Rmethod: method
++    // V0: receiver
++
++    // [ receiver  ] <-- sp
++    __ ld(T0, SP, 0);
++
++    // check if local 0 != NULL and read field
++    __ beq(T0, R0, slow_path);
++    __ delayed()->nop();
++    __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++    __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++    // read first instruction word and extract bytecode @ 1 and index @ 2
++    __ ld(T3, Rmethod, in_bytes(Method::const_offset()));
++    __ lw(T3, T3, in_bytes(ConstMethod::codes_offset()));
++    // Shift codes right to get the index on the right.
++    // The bytecode fetched looks like <index><0xb4><0x2a>
++    __ dsrl(T3, T3, 2 * BitsPerByte);
++    // FIXME: maybe it's wrong
++    __ dsll(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size())));
++    __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++
++    // T0: local 0
++    // Rmethod: method
++    // V0: receiver - do not destroy since it is needed for slow path!
++    // T1: scratch use which register instead ?
++    // T3: constant pool cache index
++    // T2: constant pool cache
++    // Rsender: send's sp
++    // check if getfield has been resolved and read constant pool cache entry
++    // check the validity of the cache entry by testing whether _indices field
++    // contains Bytecode::_getfield in b1 byte.
++    assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below");
++
++    __ dsll(T8, T3, Address::times_8);
++    __ move(T1, in_bytes(ConstantPoolCache::base_offset()
++    + ConstantPoolCacheEntry::indices_offset()));
++    __ daddu(T1, T8, T1);
++    __ daddu(T1, T1, T2);
++    __ lw(T1, T1, 0);
++    __ dsrl(T1, T1, 2 * BitsPerByte);
++    __ andi(T1, T1, 0xFF);
++    __ daddiu(T1, T1, (-1) * Bytecodes::_getfield);
++    __ bne(T1, R0, slow_path);
++    __ delayed()->nop();
++
++    // Note: constant pool entry is not valid before bytecode is resolved
++
++    __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++    __ daddu(T1, T1, T8);
++    __ daddu(T1, T1, T2);
++    __ lw(AT, T1, 0);
++
++    __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ daddu(T1, T1, T8);
++    __ daddu(T1, T1, T2);
++    __ lw(T3, T1, 0);
++
++    Label notByte, notBool, notShort, notChar, notObj;
++
++    // Need to differentiate between igetfield, agetfield, bgetfield etc.
++    // because they are different sizes.
++    // Use the type from the constant pool cache
++    __ srl(T3, T3, ConstantPoolCacheEntry::tos_state_shift);
++    // Make sure we don't need to mask T3 for tosBits after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
++    // btos = 0
++    __ bne(T3, R0, notByte);
++    __ delayed()->daddu(T0, T0, AT);
++
++    __ lb(V0, T0, 0);
++    __ b(xreturn_path);
++    __ delayed()->nop();
++
++    //ztos
++    __ bind(notByte);
++    __ daddiu(T1, T3, (-1) * ztos);
++    __ bne(T1, R0, notBool);
++    __ delayed()->nop();
++    __ lb(V0, T0, 0);
++    __ b(xreturn_path);
++    __ delayed()->nop();
++
++    //stos
++    __ bind(notBool);
++    __ daddiu(T1, T3, (-1) * stos);
++    __ bne(T1, R0, notShort);
++    __ delayed()->nop();
++    __ lh(V0, T0, 0);
++    __ b(xreturn_path);
++    __ delayed()->nop();
++
++    //ctos
++    __ bind(notShort);
++    __ daddiu(T1, T3, (-1) * ctos);
++    __ bne(T1, R0, notChar);
++    __ delayed()->nop();
++    __ lhu(V0, T0, 0);
++    __ b(xreturn_path);
++    __ delayed()->nop();
++
++    //atos
++    __ bind(notChar);
++    __ daddiu(T1, T3, (-1) * atos);
++    __ bne(T1, R0, notObj);
++    __ delayed()->nop();
++    //add for compressedoops
++    __ load_heap_oop(V0, Address(T0, 0));
++    __ b(xreturn_path);
++    __ delayed()->nop();
++
++    //itos
++    __ bind(notObj);
++#ifdef ASSERT
++    Label okay;
++    __ daddiu(T1, T3, (-1) * itos);
++    __ beq(T1, R0, okay);
++    __ delayed()->nop();
++    __ stop("what type is this?");
++    __ bind(okay);
++#endif // ASSERT
++    __ lw(V0, T0, 0);
++
++    __ bind(xreturn_path);
++
++    // _ireturn/_areturn
++    //FIXME
++    __ move(SP, Rsender);//FIXME, set sender's fp to SP
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // generate a vanilla interpreter entry as the slow path
++    __ bind(slow_path);
++    (void) generate_normal_entry(false);
++  } else {
++    (void) generate_normal_entry(false);
++  }
++
++  return entry_point;
++}
++
++// Method entry for java.lang.ref.Reference.get.
++address InterpreterGenerator::generate_Reference_get_entry(void) {
++#if INCLUDE_ALL_GCS
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_enty.
++  //
++  // Rmethod: Method*
++
++  // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender)
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++  guarantee(referent_offset > 0, "referent offset not initialized");
++
++  if (UseG1GC) {
++    Label slow_path;
++
++    // Check if local 0 != NULL
++    // If the receiver is null then it is OK to jump to the slow path.
++    __ ld(V0, SP, 0);
++
++    __ beq(V0, R0, slow_path);
++    __ delayed()->nop();
++
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++
++    // Load the value of the referent field.
++    const Address field_address(V0, referent_offset);
++    __ load_heap_oop(V0, field_address);
++
++    __ push(RA);
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    __ g1_write_barrier_pre(noreg /* obj */,
++                            V0 /* pre_val */,
++                            TREG /* thread */,
++                            Rmethod /* tmp */,
++                            true /* tosca_live */,
++                            true /* expand_call */);
++    __ pop(RA);
++
++    __ jr(RA);
++    __ delayed()->daddu(SP, Rsender, R0);      // set sp to sender sp
++
++    // generate a vanilla interpreter entry as the slow path
++    __ bind(slow_path);
++    (void) generate_normal_entry(false);
++
++    return entry;
++  }
++#endif // INCLUDE_ALL_GCS
++
++  // If G1 is not enabled then attempt to go through the accessor entry point
++  // Reference.get is an accessor
++  return generate_accessor_entry();
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address InterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ dsll(LVP, V0, Address::times_8);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0       ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  //const Register thread = T2;
++  const Register t      = T8;
++
++  __ get_method(method);
++  __ verify_oop(method);
++  {
++    Label L, Lstatic;
++    __ ld(t,method,in_bytes(Method::const_offset()));
++    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // MIPS n64 ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ delayed()->nop();
++    __ daddiu(t, t, 1);
++    __ bind(Lstatic);
++    __ daddiu(t, t, -7);
++    __ blez(t, L);
++    __ delayed()->nop();
++    __ dsll(t, t, Address::times_8);
++    __ dsubu(SP, SP, t);
++    __ bind(L);
++  }
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [                          ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T9, R0, L);
++    __ delayed()->nop();
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T9);
++  __ delayed()->nop();
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to mips o32 abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++
++    // get mirror
++    __ ld(t, method, in_bytes(Method:: const_offset()));
++    __ ld(t, t, in_bytes(ConstMethod::constants_offset())); //??
++    __ ld(t, t, ConstantPool::pool_holder_offset_in_bytes());
++    __ ld(t, t, mirror_offset);
++    // copy mirror into activation frame
++    //__ sw(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(V1, T9, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ verify_oop(method);
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T9
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // set_last_Java_frame_before_call
++  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  __ li(t, __ pc());
++  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ daddiu(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ move(t, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T9);
++  __ delayed()->nop();
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++  __ get_thread(thread);
++  __ move(t, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label L;
++    __ li(AT, SafepointSynchronize::address_of_state());
++    __ lw(AT, AT, 0);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(L);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ move(t, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  // reset handle block
++  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addiu only support 16-bit imeditate
++    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ delayed()->nop();
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T9);
++    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ move(AT,(int) JavaThread::stack_guard_yellow_disabled);
++    __ bne(t, AT, no_reguard);
++    __ delayed()->nop();
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ verify_oop(method);
++  __ ld(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++      // address of first monitor
++
++      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++      __ delayed()->nop();
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++  __ delayed()->nop();
++
++
++  // remove activation
++  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(&continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address InterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ dsubu(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ dsll(LVP, V0, LogBytesPerWord);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++    __ delayed()->nop();
++
++    __ bind(loop);
++    __ daddiu(SP, SP, (-1) * wordSize);
++    __ daddiu(T2, T2, -1);               // until everything initialized
++    __ bne(T2, R0, loop);
++    __ delayed()->sd(R0, SP, 0);     // initialize local variables
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T9
++  // tmp2: T2
++   __ profile_parameters_type(T8, T9, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++      __ delayed()->nop();
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(&continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++// Entry points
++//
++// Here we generate the various kind of entries into the interpreter.
++// The two main entry type are generic bytecode methods and native
++// call method.  These both come in synchronized and non-synchronized
++// versions but the frame layout they create is very similar. The
++// other method entry types are really just special purpose entries
++// that are really entry and interpretation all in one. These are for
++// trivial methods like accessor, empty, or special math methods.
++//
++// When control flow reaches any of the entry types for the interpreter
++// the following holds ->
++//
++// Arguments:
++//
++// Rmethod: Method*
++// V0: receiver
++//
++//
++// Stack layout immediately at entry
++//
++// [ parameter n-1            ] <--- sp
++//   ...
++// [ parameter 0              ]
++// [ expression stack         ] (caller's java expression stack)
++
++// Assuming that we don't go to one of the trivial specialized entries
++// the stack will look like below when we are ready to execute the
++// first bytecode (or call the native routine). The register usage
++// will be as the template based interpreter expects (see
++// interpreter_mips_64.hpp).
++//
++// local variables follow incoming parameters immediately; i.e.
++// the return address is moved to the end of the locals).
++//
++// [ monitor entry            ] <--- sp
++//   ...
++// [ monitor entry            ]
++// [ monitor block top        ] ( the top monitor entry )
++// [ byte code pointer        ] (if native, bcp = 0)
++// [ constant pool cache      ]
++// [ Method*                  ]
++// [ locals offset            ]
++// [ sender's sp              ]
++// [ sender's fp              ]
++// [ return address           ] <--- fp
++// [ local var m-1            ]
++//   ...
++// [ local var 0              ]
++// [ argumnet word n-1        ] <--- ( sender's sp )
++//   ...
++// [ argument word 0          ] <--- S7
++
++address AbstractInterpreterGenerator::generate_method_entry(
++                                        AbstractInterpreter::MethodKind kind) {
++  // determine code generation flags
++  bool synchronized = false;
++  address entry_point = NULL;
++  switch (kind) {
++    case Interpreter::zerolocals             :
++      break;
++    case Interpreter::zerolocals_synchronized:
++      synchronized = true;
++      break;
++    case Interpreter::native                 :
++      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);
++      break;
++    case Interpreter::native_synchronized    :
++      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);
++      break;
++    case Interpreter::empty                  :
++      entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();
++      break;
++    case Interpreter::accessor               :
++      entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();
++      break;
++    case Interpreter::abstract               :
++      entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();
++      break;
++
++    case Interpreter::java_lang_math_sin     : // fall thru
++    case Interpreter::java_lang_math_cos     : // fall thru
++    case Interpreter::java_lang_math_tan     : // fall thru
++    case Interpreter::java_lang_math_log     : // fall thru
++    case Interpreter::java_lang_math_log10   : // fall thru
++    case Interpreter::java_lang_math_pow     : // fall thru
++    case Interpreter::java_lang_math_exp     : break;
++    case Interpreter::java_lang_math_abs     : // fall thru
++    case Interpreter::java_lang_math_sqrt    :
++      entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
++    case Interpreter::java_lang_ref_reference_get:
++      entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
++    default:
++      fatal(err_msg("unexpected method kind: %d", kind));
++      break;
++  }
++  if (entry_point) return entry_point;
++
++  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
++}
++
++// These should never be compiled since the interpreter will prefer
++// the compiled version to the intrinsic version.
++bool AbstractInterpreter::can_be_compiled(methodHandle m) {
++  switch (method_kind(m)) {
++    case Interpreter::java_lang_math_sin     : // fall thru
++    case Interpreter::java_lang_math_cos     : // fall thru
++    case Interpreter::java_lang_math_tan     : // fall thru
++    case Interpreter::java_lang_math_abs     : // fall thru
++    case Interpreter::java_lang_math_log     : // fall thru
++    case Interpreter::java_lang_math_log10   : // fall thru
++    case Interpreter::java_lang_math_sqrt    : // fall thru
++    case Interpreter::java_lang_math_pow     : // fall thru
++    case Interpreter::java_lang_math_exp     :
++      return false;
++    default:
++      return true;
++  }
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  if (!EnableInvokeDynamic) {
++    // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
++    // Probably, since deoptimization doesn't work yet.
++    assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
++  }
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++    interpreter_frame->interpreter_frame_set_method(method);
++    // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++    // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++    // and sender_sp is fp+8
++    intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(sp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++    if (extra_locals != 0 &&
++        interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++      interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++    }
++    *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++  __ delayed()->nop();
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++    __ delayed()->nop();
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld(A1, A1, in_bytes(Method::const_offset()));
++    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ dsubu(A2, LVP, A1);
++    __ daddiu(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T9, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T9);
++    __ delayed()->nop();
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  __ set_last_Java_frame(thread, noreg, FP, __ pc());
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(AT, BCP, 0);
++    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++    __ delayed()->nop();
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T9);
++    __ ld(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
++
++    __ beq(T8, R0, L_done);
++    __ delayed()->nop();
++
++    __ sd(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T9, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T9);                                   // jump to exception handler of caller
++  __ delayed()->nop();
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ move(AT, JvmtiThreadState::earlyret_inactive);
++  __ sw(AT, cond_addr);
++  __ sync();
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ sync();
++  __ jr(T0);
++  __ delayed()->nop();
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
++  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
++  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
++  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld(A2, SP, 0);
++  __ ld(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ lw(T9, T8, 0);
++  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T9, T9, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ sw(T9, T8, 0);
++  __ dsll(T9, T9, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ daddu(T8, T8, T9);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ lw(T8, T8, 0);
++  __ move(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ delayed()->nop();
++  __ brk(5);
++  __ delayed()->nop();
++  __ bind(L);
++}
++#endif // !PRODUCT
++#endif // ! CC_INTERP
+diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp
+new file mode 100644
+index 0000000000..d879e6dc92
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++  static void prepare_invoke(Register method, Register index, int byte_no,
++                             Bytecodes::Code code);
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
+diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp
+new file mode 100644
+index 0000000000..7415511b99
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp
+@@ -0,0 +1,4623 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.inline.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No mips specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address obj,
++                         Register val,
++                         BarrierSet::Name barrier,
++                         bool precise) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  switch (barrier) {
++#if INCLUDE_ALL_GCS
++    case BarrierSet::G1SATBCT:
++    case BarrierSet::G1SATBCTLogging:
++      {
++        // flatten object address if needed
++        if (obj.index() == noreg && obj.disp() == 0) {
++          if (obj.base() != T3) {
++            __ move(T3, obj.base());
++          }
++        } else {
++          __ lea(T3, obj);
++        }
++        __ g1_write_barrier_pre(T3 /* obj */,
++                                T1 /* pre_val */,
++                                TREG /* thread */,
++                                T9  /* tmp */,
++                                val != noreg /* tosca_live */,
++                                false /* expand_call */);
++        if (val == noreg) {
++          __ store_heap_oop_null(Address(T3, 0));
++        } else {
++          // G1 barrier needs uncompressed oop for region cross check.
++          Register new_val = val;
++          if (UseCompressedOops) {
++            new_val = T1;
++            __ move(new_val, val);
++          }
++          __ store_heap_oop(Address(T3, 0), val);
++          __ g1_write_barrier_post(T3 /* store_adr */,
++                                   new_val /* new_val */,
++                                   TREG /* thread */,
++                                   T9 /* tmp */,
++                                   T1 /* tmp2 */);
++        }
++      }
++      break;
++#endif // INCLUDE_ALL_GCS
++    case BarrierSet::CardTableModRef:
++    case BarrierSet::CardTableExtension:
++      {
++        if (val == noreg) {
++          __ store_heap_oop_null(obj);
++        } else {
++          __ store_heap_oop(obj, val);
++          // flatten object address if needed
++          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
++            __ store_check(obj.base());
++          } else {
++            __ lea(T9, obj);
++            __ store_check(T9);
++          }
++        }
++      }
++      break;
++    case BarrierSet::ModRef:
++    case BarrierSet::Other:
++      if (val == noreg) {
++        __ store_heap_oop_null(obj);
++      } else {
++        __ store_heap_oop(obj, val);
++      }
++      break;
++    default      :
++      ShouldNotReachHere();
++
++  }
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ daddiu(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++      __ delayed()->nop();
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ move(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ lbu(tmp_reg, at_bcp(0));
++    __ move(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++    __ delayed()->nop();
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ delayed()->nop();
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ lbu(tmp_reg, at_bcp(0));
++  __ move(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ delayed()->nop();
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ delayed()->nop();
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ mtc1(R0, FSF);    return;
++    case 1:  __ addiu(AT, R0, 1); break;
++    case 2:  __ addiu(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ mtc1(AT, FSF);
++  __ cvt_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ dmtc1(R0, FSF);
++             return;
++    case 1:  __ daddiu(AT, R0, 1);
++             __ dmtc1(AT, FSF);
++             __ cvt_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ lb(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ lb(FSR, BCP, 1);
++  __ lbu(AT, BCP, 2);
++  __ dsll(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ lbu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
++  __ bne(AT, R0, notClass);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  __ bind(call_ldc);
++  __ move(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->sd(FSR, SP, 0); // added for performance issue
++
++  __ bind(notClass);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  __ delayed()->nop();
++  // ftos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ lwc1(FSF, AT, base_offset);
++  }
++  //__ push_f();
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->swc1(FSF, SP, 0);
++
++  __ bind(notFloat);
++#ifdef ASSERT
++  {
++    Label L;
++    __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("unexpected tag type in ldc");
++    __ bind(L);
++  }
++#endif
++  // itos JVM_CONSTANT_Integer only
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(T0, T3, T2);
++    __ lw(FSR, T0, base_offset);
++  }
++  __ push(itos);
++  __ bind(Done);
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp);
++  __ bne(result, R0, resolved);
++  __ delayed()->nop();
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ move(tmp, i);
++  __ call_VM(result, entry, tmp);
++
++  __ bind(resolved);
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label Long, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++
++  __ daddiu(AT, T1, - JVM_CONSTANT_Double);
++  __ bne(AT, R0, Long);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  // dtos
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ldc1(FSF, AT, base_offset);
++  }
++  __ push(dtos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(Long);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ld(FSR, AT, base_offset);
++  }
++  __ push(ltos);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ lbu(reg, at_bcp(offset));
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload() {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ move(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    __ move(T3, Bytecodes::_fast_iload2);
++    __ move(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _caload, rewrite to fast_icaload
++    __ move(T3, Bytecodes::_fast_icaload);
++    __ move(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // rewrite so iload doesn't check again.
++    __ move(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ sll(index, index, 0);
++
++  // check index
++  Label ok;
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++#ifndef OPT_RANGECHECK
++  __ sltu(AT, index, AT);
++  __ bne(AT, R0, ok);
++  __ delayed()->nop();
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ delayed()->nop();
++  __ bind(ok);
++#else
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ move(A2, index);
++  __ tgeu(A2, AT, 29);
++#endif
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ gslwle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, 2);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
++      __ gslwx(FSR, FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++    } else {
++      __ daddu(FSR, SSR, FSR);
++      __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++    }
++  }
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_8);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ gsldle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, Address::times_8);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
++      __ gsldx(FSR, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
++    } else {
++      __ daddu(AT, SSR, AT);
++      __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
++    }
++  }
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ shl(AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ gslwlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ shl(FSR, 2);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
++      __ gslwxc1(FSF, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++    } else {
++      __ daddu(FSR, SSR, FSR);
++      __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++    }
++  }
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 3);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 3);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ gsldlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, 3);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
++      __ gsldxc1(FSF, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++    } else {
++      __ daddu(AT, SSR, AT);
++      __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++    }
++  }
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
++  __ daddu(FSR, SSR, FSR);
++  //add for compressedoops
++  __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array   FSR:index
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
++
++    __ gslble(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
++      __ gslbx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++    } else {
++      __ daddu(FSR, SSR, FSR);
++      __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++    }
++  }
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, Address::times_2);
++  __ daddu(FSR, SSR, FSR);
++  __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, 1);
++  __ daddu(FSR, SSR, FSR);
++  __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ gslhle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, Address::times_2);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_SHORT), 8)) {
++      __ gslhx(FSR, SSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
++    } else {
++      __ daddu(FSR, SSR, FSR);
++      __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
++    }
++  }
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ lw(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ lwc1(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ ldc1(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld(FSR, aaddress(n));
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0() {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ move(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_iaccess_0);
++    __ move(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aaccess_0);
++    __ move(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_faccess_0);
++    __ move(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ swc1(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ sdc1(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
++
++    __ gsswle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);  // prefer index in SSR
++    __ dsll(SSR, SSR, Address::times_4);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
++      __ gsswx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++    } else {
++      __ daddu(T2, T2, SSR);
++      __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
++    }
++  }
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
++
++    __ gssdle(FSR, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
++      __ gssdx(FSR, T3, T2, arrayOopDesc::base_offset_in_bytes(T_LONG));
++    } else {
++      __ daddu(T3, T3, T2);
++      __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG));
++    }
++  }
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
++
++    __ gsswlec1(FSF, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_4);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
++      __ gsswxc1(FSF, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++    } else {
++      __ daddu(T2, T2, SSR);
++      __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++    }
++  }
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
++
++    __ gssdlec1(FSF, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
++      __ gssdxc1(FSF, T3, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++    } else {
++      __ daddu(T3, T3, T2);
++      __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
++    }
++  }
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(FSR, at_tos());     // Value
++  __ lw(SSR, at_tos_p1());  // Index
++  __ ld(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  __ delayed()->nop();
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true);
++  __ b(done);
++  __ delayed()->nop();
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T9);
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true);
++
++  __ bind(done);
++  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    guarantee(false, "unimplemented yet!");
++    __ pop_ptr(T2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
++
++    __ gssble(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++
++    // Need to check whether array is boolean or byte
++    // since both types share the bastore bytecode.
++    __ load_klass(T9, T2);
++    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
++
++    int diffbit = Klass::layout_helper_boolean_diffbit();
++    __ move(AT, diffbit);
++
++    Label L_skip;
++    __ andr(AT, T9, AT);
++    __ beq(AT, R0, L_skip);
++    __ delayed()->nop();
++    __ andi(FSR, FSR, 0x1);
++    __ bind(L_skip);
++
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
++      __ gssbx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++    } else {
++      __ daddu(SSR, T2, SSR);
++      __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
++    }
++  }
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
++
++    __ gsshle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_2);
++    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_CHAR), 8)) {
++      __ gsshx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
++    } else {
++      __ daddu(SSR, T2, SSR);
++      __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
++    }
++  }
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ sw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ sd(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ swc1(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ sdc1(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ sd(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ addu32(FSR, SSR, FSR); break;
++    case sub  : __ subu32(FSR, SSR, FSR); break;
++    case mul  : __ mul(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sllv(FSR, SSR, FSR);   break;
++    case shr  : __ srav(FSR, SSR, FSR);   break;
++    case ushr : __ srlv(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ daddu(FSR, T2, FSR); break;
++    case sub : __ dsubu(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseLEXT1) {
++    __ gsdiv(FSR, SSR, FSR);
++  } else {
++    __ div(SSR, FSR);
++    __ mflo(FSR);
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++  __ div(SSR, FSR);
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(not_zero);
++  __ mfhi(FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  if (UseLEXT1) {
++    __ gsdmult(FSR, T2, FSR);
++  } else {
++    __ dmult(T2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseLEXT1) {
++    __ gsddiv(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  if (UseLEXT1) {
++    __ gsdmod(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mfhi(FSR);
++  }
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsllv(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrav(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrlv(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ lwc1(FTF, at_sp());
++      __ add_s(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ lwc1(FTF, at_sp());
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ lwc1(FTF, at_sp());
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ lwc1(FTF, at_sp());
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_s(F13, FSF);
++      __ lwc1(F12, at_sp());
++       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ ldc1(FTF, at_sp());
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ ldc1(FTF, at_sp());
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ ldc1(FTF, at_sp());
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ ldc1(FTF, at_sp());
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_d(F13, FSF);
++      __ ldc1(F12, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subu32(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ dsubu(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ neg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ neg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ lb(AT, at_bcp(2));           // get constant
++  __ daddu(FSR, FSR, AT);
++  __ sw(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ lw(AT, T2, 0);
++  __ daddu(FSR, AT, FSR);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ mtc1(FSR, FSF);
++      __ cvt_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ mtc1(FSR, FSF);
++      __ cvt_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ seb(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ seh(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ dmtc1(FSR, FSF);
++      __ cvt_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ dmtc1(FSR, FSF);
++      __ cvt_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++    {
++      Label L;
++
++      __ trunc_w_s(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++      Label L;
++
++      __ trunc_l_s(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ dsll32(T9, T9, 0);
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++      Label L;
++
++      __ trunc_w_d(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->addiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu32(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++      Label L;
++
++      __ trunc_l_d(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->daddiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++    __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  Label low, high, done;
++  __ pop(T0);
++  __ pop(R0);
++  __ slt(AT, T0, FSR);
++  __ bne(AT, R0, low);
++  __ delayed()->nop();
++
++  __ bne(T0, FSR, high);
++  __ delayed()->nop();
++
++  __ li(FSR, (long)0);
++  __ b(done);
++  __ delayed()->nop();
++
++  __ bind(low);
++  __ li(FSR, (long)-1);
++  __ b(done);
++  __ delayed()->nop();
++
++  __ bind(high);
++  __ li(FSR, (long)1);
++  __ b(done);
++  __ delayed()->nop();
++
++  __ bind(done);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  Label less, done;
++
++  __ move(FSR, R0);
++
++  if (is_float) {
++    __ lwc1(FTF, at_sp());
++    __ c_eq_s(FTF, FSF);
++    __ bc1t(done);
++    __ delayed()->daddiu(SP, SP, 1 * wordSize);
++
++    if (unordered_result<0)
++      __ c_ult_s(FTF, FSF);
++    else
++      __ c_olt_s(FTF, FSF);
++  } else {
++    __ ldc1(FTF, at_sp());
++    __ c_eq_d(FTF, FSF);
++    __ bc1t(done);
++    __ delayed()->daddiu(SP, SP, 2 * wordSize);
++
++    if (unordered_result<0)
++      __ c_ult_d(FTF, FSF);
++    else
++      __ c_olt_d(FTF, FSF);
++  }
++  __ bc1t(less);
++  __ delayed()->nop();
++  __ move(FSR, 1);
++  __ b(done);
++  __ delayed()->nop();
++  __ bind(less);
++  __ move(FSR, -1);
++  __ bind(done);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ lb(A7, BCP, 1);
++    __ lbu(AT, BCP, 2);
++    __ dsll(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, AT, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ daddu(AT, BCP, A7);
++    __ lbu(Rnext, AT, 0);
++
++    // compute return address as bci in FSR
++    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld(AT, T3, in_bytes(Method::const_offset()));
++    __ dsubu(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ daddu(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ daddu(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ bgtz(A7, dispatch);  // check if forward or backward branch
++    __ delayed()->nop();
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ delayed()->nop();
++    __ push(T3);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop(T3);
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ delayed()->nop();
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        __ delayed()->nop();
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++        __ delayed()->nop();
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ lw(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ sw(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ lw(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // daddu backedge counter & invocation counter
++      __ daddu(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++        }
++
++        __ bne(AT, R0, dispatch);
++        __ delayed()->nop();
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ lw(AT, AT, 0);
++            __ slt(AT, T2, AT);
++          }
++
++          __ bne(AT, R0, dispatch);
++          __ delayed()->nop();
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, daddu a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ lbu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ lbu(Rnext, BCP, 0);
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++      __ delayed()->nop();
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ subu(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++      __ lbu(Rnext, BCP, 0);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // Rnext: target bytecode
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      __ delayed()->nop();
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lw(T3, V0, nmethod::entry_bci_offset());
++      __ move(AT, InvalidOSREntryBci);
++      __ beq(AT, T3, dispatch);
++      __ delayed()->nop();
++      // We need to prepare to execute the OSR method. First we must
++      // migrate the locals and monitors off of the stack.
++      //V0: osr nmethod (osr ok) or NULL (osr not possible)
++      //V1: osr adapter frame return address
++      //Rnext: target bytecode
++      //LVP: locals pointer
++      //BCP: bcp
++      __ move(BCP, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      //FIXME, shall we keep the return address on the stack?
++      __ leave();                                // remove frame anchor
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ move(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      //refer to osr_entry in c1_LIRAssembler_mips.cpp
++      __ ld(AT, BCP, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++      __ delayed()->nop();
++    }
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bgez(FSR, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(FSR, not_taken);
++      break;
++    case greater:
++      __ blez(FSR, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(FSR, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ slt(AT, SSR, FSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case less_equal:
++      __ slt(AT, FSR, SSR);
++      __ bne(AT, R0, not_taken);
++      break;
++    case greater:
++      __ slt(AT, FSR, SSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case greater_equal:
++      __ slt(AT, SSR, FSR);
++      __ bne(AT, R0, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ lw(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ lw(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ lw(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ slt(AT, FSR, T3);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  __ slt(AT, A7, FSR);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  // lookup dispatch offset, in A7 big endian
++  __ dsubu(FSR, FSR, T3);
++  __ dsll(AT, FSR, Address::times_4);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T9, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ lw(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++  __ delayed()->nop();
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++  __ delayed()->nop();
++
++  __ bind(loop_entry);
++  __ bgtz(T3, loop);
++  __ delayed()->daddiu(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++
++  // entry found -> get offset
++  __ bind(found);
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ daddiu(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ lw(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++  __ delayed()->nop();
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ daddu(h, i, j);
++    __ dsrl(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ dsll(AT, h, Address::times_8);
++    __ daddu(AT, array, AT);
++    __ lw(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    {
++      Label set_i, end_of_if;
++      __ slt(AT, key, temp);
++      __ beq(AT, R0, set_i);
++      __ delayed()->nop();
++
++      __ b(end_of_if);
++      __ delayed(); __ move(j, h);
++
++      __ bind(set_i);
++      __ move(i, h);
++
++      __ bind(end_of_if);
++    }
++    // while (i+1 < j)
++    __ bind(entry);
++    __ daddiu(h, i, 1);
++    __ slt(AT, h, j);
++    __ bne(AT, R0, loop);
++    __ delayed()->nop();
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++  __ delayed()->nop();
++
++  // entry found -> j = offset
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lw(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ move(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T9);
++  __ sync();
++
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ sync();
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)bytecode();
++  __ addiu(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  __ delayed()->nop();
++  // resolve first time through
++  address entry;
++  switch (bytecode()) {
++    case Bytecodes::_getstatic      : // fall through
++    case Bytecodes::_putstatic      : // fall through
++    case Bytecodes::_getfield       : // fall through
++    case Bytecodes::_putfield       :
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
++      break;
++    case Bytecodes::_invokevirtual  : // fall through
++    case Bytecodes::_invokespecial  : // fall through
++    case Bytecodes::_invokestatic   : // fall through
++    case Bytecodes::_invokeinterface:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
++      break;
++    case Bytecodes::_invokehandle:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
++      break;
++    case Bytecodes::_invokedynamic:
++      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
++      break;
++    default                          :
++      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
++      break;
++  }
++
++  __ move(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++
++    __ verify_oop(obj);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  //assert(wordSize == 8, "adjust code below");
++  // note we shift 4 not 2, for we get is the true inde
++  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld(itable_index, AT, index_offset);
++  }
++  __ ld(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ daddu(index, obj, off);
++
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  // btos
++  __ lb(FSR, index, 0);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  // ztos
++  __ lb(FSR, index, 0);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  // itos
++  __ lw(FSR, index, 0);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  // atos
++  //add for compressedoops
++  __ load_heap_oop(FSR, Address(index, 0));
++  __ push(atos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  // ctos
++  __ lhu(FSR, index, 0);
++  __ push(ctos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  // stos
++  __ lh(FSR, index, 0);
++  __ push(stos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
++  // ltos
++  __ ld(FSR, index, 0 * wordSize);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  // ftos
++  __ lwc1(FSF, index, 0);
++  __ push(ftos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  // dtos
++  __ ldc1(FSF, index, 0 * wordSize);
++  __ push(dtos);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ dsll(AT, tmp4, Address::times_8);
++      __ daddu(AT, tmp2, AT);
++      __ ld(tmp3, AT, in_bytes(cp_base_offset +
++                               ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ move(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ move(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ b(valsize_known);
++      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++
++      __ bind(two_word);
++      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld(tmp1, tmp1, 0*wordSize);
++    }
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sb(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ztos
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ sb(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // itos
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sw(FSR, AT, 0);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // atos
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false);
++
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ctos
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sh(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // stos
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sh(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sd(FSR, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ftos
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ swc1(FSF, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  // dtos
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(AT, obj, off);
++  __ sdc1(FSF, AT, 0);
++  if (!is_static) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ delayed()->nop();
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // test for volatile with T1
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ daddu(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      // fall through to bputfield
++    case Bytecodes::_fast_bputfield:
++      __ sb(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield:
++      __ sh(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ sw(FSR, T2, 0);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ sd(FSR, T2, 0 * wordSize);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ swc1(FSF, T2, 0);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ sdc1(FSF, T2, 0 * wordSize);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    __ delayed()->nop();
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // replace index with field offset from cache entry
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ daddu(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ lb(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ lh(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ lhu(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ lw(FSR, FSR, 0);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ lwc1(FSF, FSR, 0);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ ldc1(FSF, FSR, 0);
++      break;
++    case Bytecodes::_fast_agetfield:
++      //add for compressedoops
++      __ load_heap_oop(FSR, Address(FSR, 0));
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ daddiu(BCP, BCP, 1);
++  __ null_check(T1);
++  __ daddu(T1, T1, T2);
++
++  if (state == itos) {
++    __ lw(FSR, T1, 0);
++  } else if (state == atos) {
++    __ load_heap_oop(FSR, Address(T1, 0));
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ lwc1(FSF, T1, 0);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ daddiu(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     __ delayed()->nop();
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ dsll(flags, flags, LogBytesPerWord);
++    __ daddu(AT, AT, flags);
++    __ ld(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++  __ delayed()->nop();
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++  __ verify_oop(method);
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T9: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T9, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++  __ verify_oop(T2);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  __ dsll(AT, index, Address::times_ptr);
++  // T2: receiver
++  __ daddu(AT, T2, AT);
++  //this is a ualign read
++  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T9, true);
++  __ jump_from_interpreted(method, T2);
++
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T9 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++  __ verify_oop(Rmethod);
++
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on mips64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass
++  // Rmethod: method
++  // T3: receiver
++  // T1: flags
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCacheOop.cpp for details.
++  // This code isn't produced by javac, but could be produced by
++  // another compliant java compiler.
++  Label notMethod;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notMethod);
++  __ delayed()->nop();
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  __ bind(notMethod);
++  // Get receiver klass into T1 - also a null check
++  //add for compressedoops
++  __ load_klass(T1, T3);
++  __ verify_oop(T1);
++
++  Label no_such_interface, no_such_method;
++
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ subu32(Rmethod, R0, Rmethod);
++
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++  __ delayed()->nop();
++
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_IncompatibleClassChangeError));
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++  if (!EnableInvokeDynamic) {
++     // rewriter does not generate this bytecode
++     __ should_not_reach_here();
++     return;
++   }
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T9 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T9);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T9: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T9, true);
++
++  __ jump_from_interpreted(T2_method, T9);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   if (!EnableInvokeDynamic) {
++     // We should not encounter this bytecode if !EnableInvokeDynamic.
++     // The verifier will stop it.  However, if we get past the verifier,
++     // this will stop the thread in a reasonable way, without crashing the JVM.
++     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                      InterpreterRuntime::throw_IncompatibleClassChangeError));
++     // the call_VM checks for exception, so we should never return here.
++     __ should_not_reach_here();
++     return;
++   }
++
++   //const Register Rmethod   = T2;
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T9);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T9: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T9, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T9);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  // get InstanceKlass in T3
++  __ get_cpool_and_tags(A1, T1);
++
++  __ dsll(AT, A2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
++    __ gsldx(T3, A1, AT, sizeof(ConstantPool));
++  } else {
++    __ daddu(AT, A1, AT);
++    __ ld(T3, AT, sizeof(ConstantPool));
++  }
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(AT, T1, A2, tags_offset);
++  } else {
++    __ daddu(T1, T1, A2);
++    __ lb(AT, T1, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // has_finalizer
++  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
++    __ delayed()->nop();
++    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++    __ delayed()->nop();
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label retry;
++    Address heap_top(T1);
++    __ set64(T1, (long)Universe::heap()->top_addr());
++    __ ld(FSR, heap_top);
++
++    __ bind(retry);
++    __ set64(AT, (long)Universe::heap()->end_addr());
++    __ ld(AT, AT, 0);
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, slow_case);
++    __ delayed()->nop();
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(T2, heap_top, FSR);
++    __ beq(AT, R0, retry);
++    __ delayed()->nop();
++
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ set64(AT, - sizeof(oopDesc));
++    __ daddu(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++    __ delayed()->nop();
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ daddu(T1, FSR, T0);
++      __ daddiu(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      __ bne(T1, FSR, loop); //dont clear header
++      __ delayed()->daddiu(T1, T1, -oopSize);
++    }
++
++    //klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ set64(AT, (long)markOopDesc::prototype());
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++    __ delayed()->nop();
++  }
++
++  // slow case
++  __ bind(slow_case);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ sync();
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ lbu(A1, at_bcp(1));
++  //type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ sync();
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ sync();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T3, AT, sizeof(ConstantPool));
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++  __ delayed()->nop();
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// i use T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T3, AT, sizeof(ConstantPool));
++
++  __ bind(resolved);
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ b(done);
++  __ delayed(); __ move(FSR, R0);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ move(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++  __ delayed()->nop();
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit, next;
++    __ ld(T2, monitor_block_top);
++    __ b(entry);
++    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    // free slot?
++    __ bind(loop);
++    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ bne(AT, R0, next);
++    __ delayed()->nop();
++    __ move(c_rarg0, T2);
++
++    __ bind(next);
++    __ beq(FSR, AT, exit);
++    __ delayed()->nop();
++    __ daddiu(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ delayed()->nop();
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++  __ delayed()->nop();
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld(c_rarg0, monitor_block_top);
++    __ daddiu(SP, SP, - entry_size);
++    __ daddiu(c_rarg0, c_rarg0, - entry_size);
++    __ sd(c_rarg0, monitor_block_top);
++    __ b(entry);
++    __ delayed(); __ move(T3, SP);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld(AT, T3, entry_size);
++    __ sd(AT, T3, 0);
++    __ daddiu(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ daddiu(BCP, BCP, 1);
++  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ b(entry);
++    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    __ bind(loop);
++    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ delayed()->nop();
++    __ daddiu(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ lbu(Rnext, at_bcp(1));
++  __ dsll(T9, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ daddu(AT, T9, AT);
++  __ ld(T9, AT, 0);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ lbu(A1, at_bcp(3));  // dimension
++  __ daddiu(A1, A1, -1);
++  __ dsll(A1, A1, Address::times_8);
++  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ lbu(AT, at_bcp(3));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(SP, SP, AT);
++  __ sync();
++}
++#endif // !CC_INTERP
+diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp
+new file mode 100644
+index 0000000000..b63274a206
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,
++                             Register index = noreg,
++                             Register recv  = noreg,
++                             Register flags = noreg
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp
+new file mode 100644
+index 0000000000..6939914356
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp
+new file mode 100644
+index 0000000000..a98f70d9ff
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp
+@@ -0,0 +1,89 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "vm_version_ext_mips.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  if (is_loongson()) {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features());
++  } else {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features());
++  }
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp
+new file mode 100644
+index 0000000000..a240fcc2e9
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp
+new file mode 100644
+index 0000000000..aef8f0746a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp
+@@ -0,0 +1,510 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "vm_version_mips.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#define A0 RA0
++
++int VM_Version::_cpuFeatures;
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
++bool VM_Version::_is_cpucfg_instruction_supported = true;
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(V0);
++
++    __ li(AT, (long)0);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ pop(V0);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++#   undef __
++
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
++    result |= CPU_MMI;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
++    result |= CPU_MSA1_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
++    result |= CPU_MSA2_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
++    result |= CPU_CGP;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
++    result |= CPU_LSX1;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
++    result |= CPU_LSX2;
++  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
++    result |= CPU_LASX;
++  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
++    result |= CPU_LLSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
++    result |= CPU_TGTSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
++    result |= CPU_MUALP;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
++    result |= CPU_LEXT1;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
++    result |= CPU_LEXT2;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
++    result |= CPU_LEXT3;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
++    result |= CPU_LAMO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
++    result |= CPU_LPIXU;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    warning("Can't detect CPU info - cannot open %s", path);
++  }
++}
++
++void strlwr(char *str) {
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::get_feature_flags_by_cpuinfo(int features) {
++  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
++
++  char res[10240];
++  int i;
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++
++  if (strstr(res, "loongson")) {
++    // Loongson CPU
++    features |= CPU_LOONGSON;
++
++    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
++      {L_3A1000,  "3a1000"},
++      {L_3B1500,  "3b1500"},
++      {L_3A2000,  "3a2000"},
++      {L_3B2000,  "3b2000"},
++      {L_3A3000,  "3a3000"},
++      {L_3B3000,  "3b3000"},
++      {L_2K1000,  "2k1000"},
++      {L_UNKNOWN, "unknown"}
++    };
++
++    // Loongson Family
++    int detected = 0;
++    for (i = 0; i <= L_UNKNOWN; i++) {
++      switch (i) {
++        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
++        // test PRID REV in /proc/cpuinfo
++        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
++        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
++        case L_3A1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3A1000 platform");
++          }
++          break;
++        case L_3B1500:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3B1500 platform");
++          }
++          break;
++        case L_3A2000:
++        case L_3B2000:
++        case L_3A3000:
++        case L_3B3000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS464E;
++            detected++;
++            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
++          }
++          break;
++        case L_2K1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS264;
++            detected++;
++            //tty->print_cr("2K1000 platform");
++          }
++          break;
++        case L_UNKNOWN:
++          if (detected == 0) {
++            detected++;
++            //tty->print_cr("unknown Loongson platform");
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
++  } else { // not Loongson
++    // Not Loongson CPU
++    //tty->print_cr("MIPS platform");
++  }
++
++  if (features & CPU_LOONGSON_GS264) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++    features |= CPU_MSA1_0;
++    features |= CPU_LSX1;
++  } else if (features & CPU_LOONGSON_GS464) {
++    features |= CPU_LEXT1;
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++  } else if (features & CPU_LOONGSON_GS464E) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_LEXT3;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  } else if (features & CPU_LOONGSON) {
++    // unknow loongson
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  }
++  VM_Version::_cpu_info_is_initialized = true;
++
++  return features;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  // test if cpucfg instruction is supported
++  VM_Version::_is_determine_cpucfg_supported_running = true;
++  __asm__ __volatile__(
++    ".insn \n\t"
++    ".word (0xc8080118)\n\t" // cpucfg zero, zero
++    :
++    :
++    :
++    );
++  VM_Version::_is_determine_cpucfg_supported_running = false;
++
++  if (supports_cpucfg()) {
++    get_cpu_info_stub(&_cpuid_info);
++    _cpuFeatures = get_feature_flags_by_cpucfg();
++    // Only Loongson CPUs support cpucfg
++    _cpuFeatures |= CPU_LOONGSON;
++  } else {
++    _cpuFeatures = get_feature_flags_by_cpuinfo(0);
++  }
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++    if (MaxVectorSize > 0 && supports_ps()) {
++      MaxVectorSize = 8;
++    } else {
++      MaxVectorSize = 0;
++    }
++  }
++  //
++  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
++  // Vector optimization was closed by default.
++  // The reasons:
++  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
++  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
++  //
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  if (supports_lext1()) {
++    if (FLAG_IS_DEFAULT(UseLEXT1)) {
++      FLAG_SET_DEFAULT(UseLEXT1, true);
++    }
++  } else if (UseLEXT1) {
++    warning("LEXT1 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT1, false);
++  }
++
++  if (supports_lext2()) {
++    if (FLAG_IS_DEFAULT(UseLEXT2)) {
++      FLAG_SET_DEFAULT(UseLEXT2, true);
++    }
++  } else if (UseLEXT2) {
++    warning("LEXT2 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT2, false);
++  }
++
++  if (supports_lext3()) {
++    if (FLAG_IS_DEFAULT(UseLEXT3)) {
++      FLAG_SET_DEFAULT(UseLEXT3, true);
++    }
++  } else if (UseLEXT3) {
++    warning("LEXT3 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT3, false);
++  }
++
++  if (UseLEXT2) {
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
++    }
++  } else if (UseCountTrailingZerosInstructionMIPS64) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
++      warning("ctz/dctz instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
++  }
++
++  if (TieredCompilation) {
++    if (!FLAG_IS_DEFAULT(TieredCompilation))
++      warning("TieredCompilation not supported");
++    FLAG_SET_DEFAULT(TieredCompilation, false);
++  }
++
++  char buf[256];
++  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
++              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
++              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
++              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
++              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
++              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
++              (supports_dsp()          ?  ", dsp" : ""),
++              (supports_ps()           ?  ", ps" : ""),
++              (supports_3d()           ?  ", 3d" : ""),
++              (supports_mmi()          ?  ", mmi" : ""),
++              (supports_msa1_0()       ?  ", msa1_0" : ""),
++              (supports_msa2_0()       ?  ", msa2_0" : ""),
++              (supports_lsx1()         ?  ", lsx1" : ""),
++              (supports_lsx2()         ?  ", lsx2" : ""),
++              (supports_lasx()         ?  ", lasx" : ""),
++              (supports_lext1()        ?  ", lext1" : ""),
++              (supports_lext2()        ?  ", lext2" : ""),
++              (supports_lext3()        ?  ", lext3" : ""),
++              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
++              (supports_lamo()         ?  ", lamo" : ""),
++              (supports_lpixu()        ?  ", lpixu" : ""),
++              (needs_llsync()          ?  ", llsync" : ""),
++              (needs_tgtsync()         ?  ", tgtsync": ""),
++              (needs_ulsync()          ?  ", ulsync": ""),
++              (supports_mualp()        ?  ", mualp" : ""),
++              UseSyncLevel);
++  _features_str = strdup(buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++  }
++
++  if (UseCRC32Intrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32Intrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++    }
++  }
++
++  if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++
++  if (CriticalJNINatives) {
++    if (FLAG_IS_CMDLINE(CriticalJNINatives)) {
++      warning("CriticalJNINatives specified, but not supported in this VM");
++    }
++    FLAG_SET_DEFAULT(CriticalJNINatives, false);
++  }
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp
+new file mode 100644
+index 0000000000..0de01e5f64
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++
++#include "runtime/globals_extension.hpp"
++#include "runtime/vm_version.hpp"
++
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union Loongson_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t FP      : 1,
++               FPREV   : 3,
++               MMI     : 1,
++               MSA1    : 1,
++               MSA2    : 1,
++               CGP     : 1,
++               WRP     : 1,
++               LSX1    : 1,
++               LSX2    : 1,
++               LASX    : 1,
++               R6FXP   : 1,
++               R6CRCP  : 1,
++               R6FPP   : 1,
++               CNT64   : 1,
++               LSLDR0  : 1,
++               LSPREF  : 1,
++               LSPREFX : 1,
++               LSSYNCI : 1,
++               LSUCA   : 1,
++               LLSYNC  : 1,
++               TGTSYNC : 1,
++               LLEXC   : 1,
++               SCRAND  : 1,
++               MUALP   : 1,
++               KMUALEn : 1,
++               ITLBT   : 1,
++               LSUPERF : 1,
++               SFBP    : 1,
++               CDMAP   : 1,
++                       : 1;
++    } bits;
++  };
++
++  union Loongson_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t LEXT1    : 1,
++               LEXT2    : 1,
++               LEXT3    : 1,
++               LSPW     : 1,
++               LBT1     : 1,
++               LBT2     : 1,
++               LBT3     : 1,
++               LBTMMU   : 1,
++               LPMP     : 1,
++               LPMRev   : 3,
++               LAMO     : 1,
++               LPIXU    : 1,
++               LPIXNU   : 1,
++               LVZP     : 1,
++               LVZRev   : 3,
++               LGFTP    : 1,
++               LGFTRev  : 3,
++               LLFTP    : 1,
++               LLFTRev  : 3,
++               LCSRP    : 1,
++               DISBLKLY : 1,
++                        : 3;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LOONGSON          = (1 << 1),
++    CPU_LOONGSON_GS464    = (1 << 2),
++    CPU_LOONGSON_GS464E   = (1 << 3),
++    CPU_LOONGSON_GS264    = (1 << 4),
++    CPU_MMI               = (1 << 11),
++    CPU_MSA1_0            = (1 << 12),
++    CPU_MSA2_0            = (1 << 13),
++    CPU_CGP               = (1 << 14),
++    CPU_LSX1              = (1 << 15),
++    CPU_LSX2              = (1 << 16),
++    CPU_LASX              = (1 << 17),
++    CPU_LEXT1             = (1 << 18),
++    CPU_LEXT2             = (1 << 19),
++    CPU_LEXT3             = (1 << 20),
++    CPU_LAMO              = (1 << 21),
++    CPU_LPIXU             = (1 << 22),
++    CPU_LLSYNC            = (1 << 23),
++    CPU_TGTSYNC           = (1 << 24),
++    CPU_ULSYNC           = (1 << 25),
++    CPU_MUALP             = (1 << 26),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  enum Loongson_Family {
++    L_3A1000    = 0,
++    L_3B1500    = 1,
++    L_3A2000    = 2,
++    L_3B2000    = 3,
++    L_3A3000    = 4,
++    L_3B3000    = 5,
++    L_2K1000    = 6,
++    L_UNKNOWN   = 7
++  };
++
++  struct Loongson_Cpuinfo {
++    Loongson_Family    id;
++    const char* const  match_str;
++  };
++
++  static int  _cpuFeatures;
++  static const char* _features_str;
++  static volatile bool _is_determine_cpucfg_supported_running;
++  static bool _is_cpucfg_instruction_supported;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    uint32_t            cpucfg_info_id0;
++    Loongson_Cpucfg_Id1 cpucfg_info_id1;
++    Loongson_Cpucfg_Id2 cpucfg_info_id2;
++    uint32_t            cpucfg_info_id3;
++    uint32_t            cpucfg_info_id4;
++    uint32_t            cpucfg_info_id5;
++    uint32_t            cpucfg_info_id6;
++    uint32_t            cpucfg_info_id8;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
++
++  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
++
++  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
++  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
++
++  static bool is_loongson()      { return _cpuFeatures & CPU_LOONGSON; }
++  static bool is_gs264()         { return _cpuFeatures & CPU_LOONGSON_GS264; }
++  static bool is_gs464()         { return _cpuFeatures & CPU_LOONGSON_GS464; }
++  static bool is_gs464e()        { return _cpuFeatures & CPU_LOONGSON_GS464E; }
++  static bool supports_dsp()     { return 0; /*not supported yet*/}
++  static bool supports_ps()      { return 0; /*not supported yet*/}
++  static bool supports_3d()      { return 0; /*not supported yet*/}
++  static bool supports_msa1_0()  { return _cpuFeatures & CPU_MSA1_0; }
++  static bool supports_msa2_0()  { return _cpuFeatures & CPU_MSA2_0; }
++  static bool supports_cgp()     { return _cpuFeatures & CPU_CGP; }
++  static bool supports_mmi()     { return _cpuFeatures & CPU_MMI; }
++  static bool supports_lsx1()    { return _cpuFeatures & CPU_LSX1; }
++  static bool supports_lsx2()    { return _cpuFeatures & CPU_LSX2; }
++  static bool supports_lasx()    { return _cpuFeatures & CPU_LASX; }
++  static bool supports_lext1()   { return _cpuFeatures & CPU_LEXT1; }
++  static bool supports_lext2()   { return _cpuFeatures & CPU_LEXT2; }
++  static bool supports_lext3()   { return _cpuFeatures & CPU_LEXT3; }
++  static bool supports_lamo()    { return _cpuFeatures & CPU_LAMO; }
++  static bool supports_lpixu()   { return _cpuFeatures & CPU_LPIXU; }
++  static bool needs_llsync()     { return _cpuFeatures & CPU_LLSYNC; }
++  static bool needs_tgtsync()    { return _cpuFeatures & CPU_TGTSYNC; }
++  static bool needs_ulsync()     { return _cpuFeatures & CPU_ULSYNC; }
++  static bool supports_mualp()   { return _cpuFeatures & CPU_MUALP; }
++
++  //mips has no such instructions, use ll/sc instead
++  static bool supports_compare_and_exchange() { return false; }
++
++  static const char* cpu_features()           { return _features_str; }
++
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.cpp b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp
+new file mode 100644
+index 0000000000..86bd74d430
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp
+new file mode 100644
+index 0000000000..6a970ea91a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_HPP
++
++bool is_Register();
++Register as_Register();
++
++bool is_FloatRegister();
++FloatRegister as_FloatRegister();
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp
+new file mode 100644
+index 0000000000..77e18ce57d
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++inline bool VMRegImpl::is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool VMRegImpl::is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline Register VMRegImpl::as_Register() {
++
++  assert( is_Register(), "must be");
++  // Yuk
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister VMRegImpl::as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  // Yuk
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline   bool VMRegImpl::is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+diff --git a/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp
+new file mode 100644
+index 0000000000..7779c58e0a
+--- /dev/null
++++ b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp
+@@ -0,0 +1,301 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_mips_64.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread,
++                                          oop receiver,
++                                          int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  const int gs2_code_length = VtableStub::pd_code_size_limit(true);
++  VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index);
++  ResourceMark rm;
++  CodeBuffer cb(s->entry_point(), gs2_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#ifndef PRODUCT
++  if (CountCompiledCalls) {
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    __ lw(t1, AT , 0);
++    __ addiu(t1, t1, 1);
++    __ sw(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  //add for compressedoops
++  __ load_klass(t1, T0);
++  // compute entry offset (in words)
++  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ lw(t2, t1, InstanceKlass::vtable_length_offset()*wordSize);
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ move(AT, vtable_index*vtableEntry::size());
++    __ slt(AT, AT, t2);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ move(A2, vtable_index);
++    __ move(A1, A0);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  // load methodOop and target address
++  const Register method = Rmethod;
++  int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
++  guarantee(Assembler::is_simm16(offset), "not a signed 16-bit int");
++  __ ld_ptr(method, t1, offset);
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T9: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  s->set_exception_points(npe_addr, ame_addr);
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Note well: pd_code_size_limit is the absolute minimum we can get
++  // away with.  If you add code here, bump the code stub size
++  // returned by pd_code_size_limit!
++  const int gs2_code_length = VtableStub::pd_code_size_limit(false);
++  VtableStub* s = new(gs2_code_length) VtableStub(false, itable_index);
++  ResourceMark rm;
++  CodeBuffer cb(s->entry_point(), gs2_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  // we T8,T9 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#ifndef PRODUCT
++  if (CountCompiledCalls) {
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    __ lw(T8, AT, 0);
++    __ addiu(T8, T8,1);
++    __ sw(T8, AT, 0);
++  }
++#endif /* PRODUCT */
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
++    const int base = InstanceKlass::vtable_start_offset() * wordSize;
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ daddiu(t2, t1, base);
++    assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
++    __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(t2, t2, AT);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
++    __ bind(entry);
++
++#ifdef ASSERT
++    // Check that the entry is non-null
++    if (DebugVtables) {
++      Label L;
++      assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++      __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("null entry point found in itable's offset table");
++      __ bind(L);
++    }
++#endif
++    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, resolved_klass_reg, entry);
++    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = InstanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++  assert(Assembler::is_simm16(base), "change this code");
++  __ daddiu(t2, t1, base);
++  assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
++  __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(t2, t2, AT);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
++  __ bind(entry);
++
++#ifdef ASSERT
++  // Check that the entry is non-null
++  if (DebugVtables) {
++    Label L;
++    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++    __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("null entry point found in itable's offset table");
++    __ bind(L);
++  }
++#endif
++  assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ bne(AT, holder_klass_reg, entry);
++  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  // We found a hit, move offset into T9
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++  __ dsll(AT, t2, Address::times_1);
++  __ addu(AT, AT, t1);
++  guarantee(Assembler::is_simm16(method_offset), "not a signed 16-bit int");
++  __ ld_ptr(method, AT,  method_offset);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ delayed()->nop();
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T9: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  s->set_exception_points(npe_addr, ame_addr);
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
++  if (is_vtable_stub) {
++    return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+
++           (UseCompressedOops ? 16 : 0);
++  } else {
++    return  ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+
++            (UseCompressedOops ? 32 : 0);
++  }
++}
++
++int VtableStub::pd_code_alignment() {
++  return wordSize;
++}
+diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+index c1c053e66c..5c90df1079 100644
+--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
++++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+@@ -1513,6 +1513,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+   LIR_Opr dest = op->result_opr();
+@@ -2102,6 +2106,12 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+ }
+ 
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
++                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+ 
+diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+index 92b73e1c71..45da327efb 100644
+--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
++++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+@@ -242,20 +242,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ cmp_mem_int(condition, base, disp, c, info);
++  __ branch(condition, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+-  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+-}
+-
+-
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid() && c > 0 && c < max_jint) {
+diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp
+index ba1bce4239..42a73ea5aa 100644
+--- a/hotspot/src/os/linux/vm/os_linux.cpp
++++ b/hotspot/src/os/linux/vm/os_linux.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // no precompiled headers
+ #include "classfile/classLoader.hpp"
+ #include "classfile/systemDictionary.hpp"
+@@ -1969,7 +1975,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
+     {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
+     {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
+     {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
++    {EM_MIPS,        EM_MIPS,    ELFCLASS64, ELFDATA2LSB, (char*)"MIPS64 LE"},
+     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
++#if  defined (LOONGARCH64)
++    {EM_LOONGARCH,   EM_LOONGARCH,    ELFCLASS64, ELFDATA2LSB, (char*)"LOONGARCH64"},
++#endif
+     {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
+     {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
+   };
+@@ -1984,6 +1994,8 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
+     static  Elf32_Half running_arch_code=EM_SPARCV9;
+   #elif  (defined __sparc) && (!defined _LP64)
+     static  Elf32_Half running_arch_code=EM_SPARC;
++  #elif  (defined MIPS64)
++    static  Elf32_Half running_arch_code=EM_MIPS;
+   #elif  (defined __powerpc64__)
+     static  Elf32_Half running_arch_code=EM_PPC64;
+   #elif  (defined __powerpc__)
+@@ -2004,9 +2016,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
+     static  Elf32_Half running_arch_code=EM_68K;
+   #elif  (defined AARCH64)
+     static  Elf32_Half running_arch_code=EM_AARCH64;
++  #elif  (defined LOONGARCH64)
++    static  Elf32_Half running_arch_code=EM_LOONGARCH;
+   #else
+     #error Method os::dll_load requires that one of following is defined:\
+-         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64
++         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, __mips64, PARISC, M68K, AARCH64
+   #endif
+ 
+   // Identify compatability class for VM's architecture and library's architecture
+@@ -3513,7 +3527,7 @@ size_t os::Linux::find_large_page_size() {
+ 
+ #ifndef ZERO
+   large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
+-                     ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M);
++                     ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M) MIPS64_ONLY(4 * M) LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
+ #endif // ZERO
+ 
+   FILE *fp = fopen("/proc/meminfo", "r");
+@@ -5120,7 +5134,12 @@ jint os::init_2(void)
+   Linux::fast_thread_clock_init();
+ 
+   // Allocate a single page and mark it as readable for safepoint polling
++#ifdef OPT_SAFEPOINT
++  void * p = (void *)(0x10000);
++  address polling_page = (address) ::mmap(p, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++#else
+   address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++#endif
+   guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" );
+ 
+   os::set_polling_page( polling_page );
+@@ -5155,13 +5174,20 @@ jint os::init_2(void)
+   // size.  Add a page for compiler2 recursion in main thread.
+   // Add in 2*BytesPerWord times page size to account for VM stack during
+   // class initialization depending on 32 or 64 bit VM.
++
++  /*
++   * 2014/1/2: JDK8 requires larger -Xss option.
++   *   Some application cannot run with -Xss192K.
++   *   We are not sure whether this causes errors, so simply print a warning.
++   */
++  size_t min_stack_allowed_jdk6 = os::Linux::min_stack_allowed;
+   os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
+             (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
+                     (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
+ 
+   size_t threadStackSizeInBytes = ThreadStackSize * K;
+   if (threadStackSizeInBytes != 0 &&
+-      threadStackSizeInBytes < os::Linux::min_stack_allowed) {
++      threadStackSizeInBytes < min_stack_allowed_jdk6) {
+         tty->print_cr("\nThe stack size specified is too small, "
+                       "Specify at least %dk",
+                       os::Linux::min_stack_allowed/ K);
+diff --git a/hotspot/src/os/linux/vm/os_perf_linux.cpp b/hotspot/src/os/linux/vm/os_perf_linux.cpp
+index 0d1f75810a..cbc6c0757c 100644
+--- a/hotspot/src/os/linux/vm/os_perf_linux.cpp
++++ b/hotspot/src/os/linux/vm/os_perf_linux.cpp
+@@ -50,6 +50,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vm_version_ext_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vm_version_ext_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vm_version_ext_loongarch.hpp"
++#endif
+ 
+ #include <stdio.h>
+ #include <stdarg.h>
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..5ee0965f42
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "runtime/os.hpp"
++#include "runtime/threadLocalStorage.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  add_d(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  li(A0, ThreadLocalStorage::thread_index());
++  push(S5);
++  move(S5, SP);
++  li(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  // TODO: confirm reloc
++  call(CAST_FROM_FN_PTR(address, pthread_getspecific), relocInfo::runtime_call_type);
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..69590ba582
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp
+@@ -0,0 +1,206 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
++
++#include "orderAccess_linux_loongarch.inline.hpp"
++#include "runtime/atomic.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_loongarch.hpp"
++
++// Implementation of class atomic
++
++inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
++inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
++inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
++inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
++inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
++inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
++
++inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
++inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
++inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
++inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
++inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
++inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void**)dest = store_value; }
++
++inline jlong Atomic::load     (volatile jlong* src) { return *src; }
++
++///////////implementation of Atomic::add*/////////////////
++inline jint Atomic::add  (jint add_value, volatile jint* dest) {
++  //TODO LA opt amadd
++  jint __ret, __tmp;
++  __asm__ __volatile__ (
++      "1: ll.w    %[__ret], %[__dest]    \n\t"
++      "   add.w   %[__tmp], %[__val], %[__ret]  \n\t"
++      "   sc.w    %[__tmp], %[__dest]    \n\t"
++      "   beqz    %[__tmp], 1b         \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value)
++      : "memory"
++      );
++
++  return add_value + __ret;
++}
++
++inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) {
++  //TODO LA opt amadd
++  jint __ret, __tmp;
++  __asm__ __volatile__ (
++      "1: ll.d    %[__ret], %[__dest]    \n\t"
++      "   add.d   %[__tmp], %[__val], %[__ret]  \n\t"
++      "   sc.d    %[__tmp], %[__dest]    \n\t"
++      "   beqz    %[__tmp], 1b         \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value)
++      : "memory"
++      );
++
++  return add_value + __ret;
++}
++
++inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) {
++  return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest);
++}
++
++///////////implementation of Atomic::inc*/////////////////
++inline void Atomic::inc      (volatile jint*      dest) { (void)add(1, dest); }
++inline void Atomic::inc_ptr (volatile intptr_t*  dest) { (void)add_ptr(1, dest); }
++inline void Atomic::inc_ptr (volatile void*     dest) { (void)inc_ptr((volatile intptr_t*)dest); }
++
++///////////implementation of Atomic::dec*/////////////////
++inline void Atomic::dec      (volatile jint*      dest) { (void)add(-1, dest); }
++inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
++inline void Atomic::dec_ptr (volatile void*     dest) { (void)dec_ptr((volatile intptr_t*)dest); }
++
++
++///////////implementation of Atomic::xchg*/////////////////
++inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
++  jint __ret, __tmp;
++
++  __asm__ __volatile__ (
++      "1: ll.w    %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.w    %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
++  intptr_t __ret, __tmp;
++  __asm__ __volatile__ (
++      "1: ll.d   %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.d   %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++  return __ret;
++
++}
++
++inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
++  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
++}
++
++///////////implementation of Atomic::cmpxchg*/////////////////
++inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value) {
++  jint __prev, __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.w  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.w  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar 0x700        \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
++  jlong __prev, __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.d  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.d  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar 0x700 \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
++  intptr_t __prev, __cmp;
++  __asm__ __volatile__ (
++      "1: ll.d  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.d  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar  0x700 \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++      return __prev;
++}
++
++inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) {
++  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..4e205c468e
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..7d6e11a935
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
++
++static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..8ec3fa8239
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          2048);
++define_pd_global(intx, VMThreadStackSize,        2048);
++
++define_pd_global(intx, CompilerThreadStackSize,  0); // 0 => use system default
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..3e050c8d09
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp
+@@ -0,0 +1,115 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
++
++#include "runtime/atomic.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_loongarch.hpp"
++
++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
++inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
++inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
++inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
++
++inline void OrderAccess::acquire() { inlasm_sync(0x14); }
++inline void OrderAccess::release() { inlasm_sync(0x12); }
++inline void OrderAccess::fence()   { inlasm_sync(0x10); }
++
++//implementation of load_acquire
++inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { jbyte data = *p; acquire(); return data; }
++inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { jshort data = *p; acquire(); return data; }
++inline jint     OrderAccess::load_acquire(volatile jint*    p) { jint data = *p; acquire(); return data; }
++inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { jlong tmp = *p; acquire(); return tmp; }
++inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { jubyte data = *p; acquire(); return data; }
++inline jushort  OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; }
++inline juint    OrderAccess::load_acquire(volatile juint*   p) { juint data = *p; acquire(); return data; }
++inline julong   OrderAccess::load_acquire(volatile julong*  p) { julong tmp = *p; acquire(); return tmp; }
++inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { jfloat data = *p; acquire(); return data; }
++inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; }
++
++//implementation of load_ptr_acquire
++inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { intptr_t data = *p; acquire(); return data; }
++inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { void *data = *(void* volatile *)p; acquire(); return data; }
++inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; }
++
++//implementation of release_store
++inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; }
++
++//implementation of release_store_ptr
++inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; }
++inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { release(); *(void* volatile *)p = v; }
++
++//implementation of store_fence
++inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
++
++//implementation of store_ptr_fence
++inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
++inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
++
++//implementation of release_store_fence
++inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
++
++//implementaion of release_store_ptr_fence
++inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
++inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..f2c3df84a1
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp
+@@ -0,0 +1,750 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "jvm_linux.h"
++#include "memory/allocation.inline.hpp"
++#include "mutex_linux.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm.h"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "utilities/debug.hpp"
++#include "compiler/disassembler.hpp"
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 3
++#define REG_FP 22
++
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$r3");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++void os::initialize_thread(Thread* thr) {
++// Nothing to do.
++}
++
++address os::Linux::ucontext_get_pc(ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++// By default, gcc always save frame pointer on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  return (intptr_t*)__builtin_frame_address(0);
++}
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++extern "C" JNIEXPORT int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = ThreadLocalStorage::get_thread_slow();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (addr < thread->stack_base() &&
++          addr >= thread->stack_base() - thread->stack_size()) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          thread->disable_stack_yellow_zone();
++          if (thread->thread_state() == _thread_in_Java) {
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      }
++    } // sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          stub = StubRoutines::handler_for_unsafe_access();
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to LA code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      stub = StubRoutines::handler_for_unsafe_access();
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    uc->uc_mcontext.__pc = (greg_t)stub;
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError err(t, sig, pc, info, ucVoid);
++  err.report_and_die();
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++void os::Linux::init_thread_fpu_state(void) {
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  return 0; // mute compiler
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++size_t os::Linux::min_stack_allowed  = 96 * K;
++
++// Test if pthread library can support variable thread stack size. LinuxThreads
++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
++// in floating stack mode and NPTL support variable stack size.
++bool os::Linux::supports_variable_stack_size() {
++  if (os::Linux::is_NPTL()) {
++     // NPTL, yes
++     return true;
++
++  } else {
++    // Note: We can't control default stack size when creating a thread.
++    // If we use non-default stack size (pthread_attr_setstacksize), both
++    // floating stack and non-floating stack LinuxThreads will return the
++    // same value. This makes it impossible to implement this function by
++    // detecting thread stack size directly.
++    //
++    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
++    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
++    // %gs (either as LDT selector or GDT selector, depending on kernel)
++    // to access thread specific data.
++    //
++    // Note that %gs is a reserved glibc register since early 2001, so
++    // applications are not allowed to change its value (Ulrich Drepper from
++    // Redhat confirmed that all known offenders have been modified to use
++    // either %fs or TSD). In the worst case scenario, when VM is embedded in
++    // a native application that plays with %gs, we might see non-zero %gs
++    // even LinuxThreads is running in fixed stack mode. As the result, we'll
++    // return true and skip _thread_safety_check(), so we may not be able to
++    // detect stack-heap collisions. But otherwise it's harmless.
++    //
++    return false;
++  }
++}
++
++// return default stack size for thr_type
++size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
++  // default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
++  // Creating guard page is very expensive. Java thread has HotSpot
++  // guard page, only enable glibc guard page for non-Java threads.
++  return (thr_type == java_thread ? 0 : page_size());
++}
++
++// Java thread:
++//
++//   Low memory addresses
++//    +------------------------+
++//    |                        |\  JavaThread created by VM does not have glibc
++//    |    glibc guard page    | - guard, attached Java thread usually has
++//    |                        |/  1 page glibc guard.
++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
++//    |                        |\
++//    |  HotSpot Guard Pages   | - red and yellow pages
++//    |                        |/
++//    +------------------------+ JavaThread::stack_yellow_zone_base()
++//    |                        |\
++//    |      Normal Stack      | -
++//    |                        |/
++// P2 +------------------------+ Thread::stack_base()
++//
++// Non-Java thread:
++//
++//   Low memory addresses
++//    +------------------------+
++//    |                        |\
++//    |  glibc guard page      | - usually 1 page
++//    |                        |/
++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
++//    |                        |\
++//    |      Normal Stack      | -
++//    |                        |/
++// P2 +------------------------+ Thread::stack_base()
++//
++// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
++//    pthread_attr_getstack()
++
++static void current_stack_region(address * bottom, size_t * size) {
++  if (os::is_primordial_thread()) {
++     // primordial thread needs special handling because pthread_getattr_np()
++     // may return bogus value.
++     *bottom = os::Linux::initial_thread_stack_bottom();
++     *size   = os::Linux::initial_thread_stack_size();
++  } else {
++     pthread_attr_t attr;
++
++     int rslt = pthread_getattr_np(pthread_self(), &attr);
++
++     // JVM needs to know exact stack location, abort if it fails
++     if (rslt != 0) {
++       if (rslt == ENOMEM) {
++         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
++       } else {
++         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
++       }
++     }
++
++     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
++         fatal("Can not locate current stack attributes!");
++     }
++
++     pthread_attr_destroy(&attr);
++
++  }
++  assert(os::current_stack_pointer() >= *bottom &&
++         os::current_stack_pointer() < *bottom + *size, "just checking");
++}
++
++address os::current_stack_base() {
++  address bottom;
++  size_t size;
++  current_stack_region(&bottom, &size);
++  return (bottom + size);
++}
++
++size_t os::current_stack_size() {
++  // stack size includes normal stack and HotSpot guard pages
++  address bottom;
++  size_t size;
++  current_stack_region(&bottom, &size);
++  return size;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, void *context) {
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("ZERO=" ); print_location(st, uc->uc_mcontext.__gregs[0]);
++  st->print("RA=" ); print_location(st, uc->uc_mcontext.__gregs[1]);
++  st->print("TP=" ); print_location(st, uc->uc_mcontext.__gregs[2]);
++  st->print("SP=" ); print_location(st, uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, uc->uc_mcontext.__gregs[4]);
++  st->print("A1=" ); print_location(st, uc->uc_mcontext.__gregs[5]);
++  st->print("A2=" ); print_location(st, uc->uc_mcontext.__gregs[6]);
++  st->print("A3=" ); print_location(st, uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, uc->uc_mcontext.__gregs[8]);
++  st->print("A5=" ); print_location(st, uc->uc_mcontext.__gregs[9]);
++  st->print("A6=" ); print_location(st, uc->uc_mcontext.__gregs[10]);
++  st->print("A7=" ); print_location(st, uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, uc->uc_mcontext.__gregs[12]);
++  st->print("T1=" ); print_location(st, uc->uc_mcontext.__gregs[13]);
++  st->print("T2=" ); print_location(st, uc->uc_mcontext.__gregs[14]);
++  st->print("T3=" ); print_location(st, uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print("T4=" ); print_location(st, uc->uc_mcontext.__gregs[16]);
++  st->print("T5=" ); print_location(st, uc->uc_mcontext.__gregs[17]);
++  st->print("T6=" ); print_location(st, uc->uc_mcontext.__gregs[18]);
++  st->print("T7=" ); print_location(st, uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print("T8=" ); print_location(st, uc->uc_mcontext.__gregs[20]);
++  st->print("RX=" ); print_location(st, uc->uc_mcontext.__gregs[21]);
++  st->print("FP=" ); print_location(st, uc->uc_mcontext.__gregs[22]);
++  st->print("S0=" ); print_location(st, uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print("S1=" ); print_location(st, uc->uc_mcontext.__gregs[24]);
++  st->print("S2=" ); print_location(st, uc->uc_mcontext.__gregs[25]);
++  st->print("S3=" ); print_location(st, uc->uc_mcontext.__gregs[26]);
++  st->print("S4=" ); print_location(st, uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print("S5=" ); print_location(st, uc->uc_mcontext.__gregs[28]);
++  st->print("S6=" ); print_location(st, uc->uc_mcontext.__gregs[29]);
++  st->print("S7=" ); print_location(st, uc->uc_mcontext.__gregs[30]);
++  st->print("S8=" ); print_location(st, uc->uc_mcontext.__gregs[31]);
++  st->cr();
++
++}
++void os::print_context(outputStream *st, void *context) {
++
++  ucontext_t *uc = (ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
++  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  // no use for LA
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..a7321ae025
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..a1cedcd8cf
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++// According to previous and present SPECjbb2015 score,
++// comment prefetch is better than if (interval >= 0) prefetch branch.
++// So choose comment prefetch as the base line.
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  0, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++// Ditto
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  8, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..be28a562a1
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/threadLocalStorage.hpp"
++
++// Map stack pointer (%esp) to thread pointer for faster TLS access
++//
++// Here we use a flat table for better performance. Getting current thread
++// is down to one memory access (read _sp_map[%esp>>12]) in generated code
++// and two in runtime code (-fPIC code needs an extra load for _sp_map).
++//
++// This code assumes stack page is not shared by different threads. It works
++// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
++//
++// Notice that _sp_map is allocated in the bss segment, which is ZFOD
++// (zero-fill-on-demand). While it reserves 4M address space upfront,
++// actual memory pages are committed on demand.
++//
++// If an application creates and destroys a lot of threads, usually the
++// stack space freed by a thread will soon get reused by new thread
++// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
++// No memory page in _sp_map is wasted.
++//
++// However, it's still possible that we might end up populating &
++// committing a large fraction of the 4M table over time, but the actual
++// amount of live data in the table could be quite small. The max wastage
++// is less than 4M bytes. If it becomes an issue, we could use madvise()
++// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
++// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
++// physical memory page (i.e. similar to MADV_FREE on Solaris).
++
++#ifdef MINIMIZE_RAM_USAGE
++Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
++#endif // MINIMIZE_RAM_USAGE
++
++void ThreadLocalStorage::generate_code_for_get_thread() {
++    // nothing we can do here for user-level thread
++}
++
++void ThreadLocalStorage::pd_init() {
++#ifdef MINIMIZE_RAM_USAGE
++  assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
++         "page size must be multiple of PAGE_SIZE");
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void ThreadLocalStorage::pd_set_thread(Thread* thread) {
++  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
++#ifdef MINIMIZE_RAM_USAGE
++  address stack_top = os::current_stack_base();
++  size_t stack_size = os::current_stack_size();
++
++  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
++    int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++    assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index],
++           "thread exited without detaching from VM??");
++    _sp_map[index] = thread;
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..4fab788a75
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
++
++#ifdef MINIMIZE_RAM_USAGE
++  // Processor dependent parts of ThreadLocalStorage
++  //only the low 2G space for user program in Linux
++
++  #define SP_BITLENGTH  34
++  #define PAGE_SHIFT    14
++  #define PAGE_SIZE     (1UL << PAGE_SHIFT)
++
++  static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
++  static int _sp_map_low;
++  static int _sp_map_high;
++#endif // MINIMIZE_RAM_USAGE
++
++public:
++#ifdef MINIMIZE_RAM_USAGE
++  static Thread** sp_map_addr() { return _sp_map; }
++#endif // MINIMIZE_RAM_USAGE
++
++  static Thread* thread() {
++#ifdef MINIMIZE_RAM_USAGE
++    /* Thread::thread() can also be optimized in the same way as __get_thread() */
++    //return (Thread*) os::thread_local_storage_at(thread_index());
++    uintptr_t sp;
++    uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1;
++
++    __asm__ __volatile__ ("addi.d %0, $r29, 0 " : "=r" (sp));
++
++    return _sp_map[(sp >> PAGE_SHIFT) & mask];
++#else
++    return (Thread*) os::thread_local_storage_at(thread_index());
++#endif // MINIMIZE_RAM_USAGE
++  }
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..44f666d61f
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp
+@@ -0,0 +1,99 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 uses ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif /* COMPILER2 */
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
++
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..d6dd2521f4
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame() {
++    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++    if (_anchor.last_Java_pc() != NULL) {
++      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++    } else {
++      // This will pick up pc from sp
++      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++    }
++  }
++
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..0097cadcb7
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
+diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..80a1538de9
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_loongarch.hpp"
++
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp
+new file mode 100644
+index 0000000000..4ba53d9341
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp
+@@ -0,0 +1,111 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "runtime/os.hpp"
++#include "runtime/threadLocalStorage.hpp"
++
++#define A0 RA0
++#define A1 RA1
++#define A2 RA2
++#define A3 RA3
++#define A4 RA4
++#define A5 RA5
++#define A6 RA6
++#define A7 RA7
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++//
++// In MIPS64, we don't use full 64-bit address space.
++//  Only a small range is actually used.
++//
++// Example:
++// $  cat /proc/13352/maps
++// 120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++// 12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++// 120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
++// 555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
++// 555d598000-555d59c000 rw-p 00000000 00:00 0
++// ......
++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0
++// 558b23c000-558b248000 ---p 00000000 00:00 0
++// 558b248000-558b28c000 rwxp 00000000 00:00 0
++// ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
++// ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
++//
++// All stacks are positioned at 0x55________.
++// Therefore, we can utilize the same algorithm used in 32-bit.
++  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++  // Thread* thread = _sp_map[index];
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  addu(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  move(A0, ThreadLocalStorage::thread_index());
++  push(S5);
++  move(S5, SP);
++  move(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  call(CAST_FROM_FN_PTR(address, pthread_getspecific));
++  delayed()->nop();
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..1c7ad605e9
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp
+@@ -0,0 +1,258 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
++
++#include "orderAccess_linux_mips.inline.hpp"
++#include "runtime/atomic.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_mips.hpp"
++
++// Implementation of class atomic
++
++inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
++inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
++inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
++inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
++inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
++inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
++
++inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
++inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
++inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
++inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
++inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
++inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void**)dest = store_value; }
++
++inline jlong Atomic::load     (volatile jlong* src) { return *src; }
++
++///////////implementation of Atomic::add*/////////////////
++inline jint Atomic::add  (jint add_value, volatile jint* dest) {
++  jint __ret, __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync          \n\t"
++      "   ll    %[__ret], %[__dest]    \n\t"
++      "   addu  %[__tmp], %[__val], %[__ret]  \n\t"
++      "   sc    %[__tmp], %[__dest]    \n\t"
++      "   beqz  %[__tmp], 1b         \n\t"
++      "   nop          \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value)
++      : "memory"
++      );
++
++  return add_value + __ret;
++}
++
++inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) {
++  jint __ret, __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync          \n\t"
++      "   lld    %[__ret], %[__dest]    \n\t"
++      "   daddu  %[__tmp], %[__val], %[__ret]  \n\t"
++      "   scd    %[__tmp], %[__dest]    \n\t"
++      "   beqz   %[__tmp], 1b         \n\t"
++      "   nop          \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value)
++      : "memory"
++      );
++
++  return add_value + __ret;
++}
++
++inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) {
++  return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest);
++}
++
++///////////implementation of Atomic::inc*/////////////////
++inline void Atomic::inc      (volatile jint*      dest) { (void)add(1, dest); }
++inline void Atomic::inc_ptr (volatile intptr_t*  dest) { (void)add_ptr(1, dest); }
++inline void Atomic::inc_ptr (volatile void*     dest) { (void)inc_ptr((volatile intptr_t*)dest); }
++
++///////////implementation of Atomic::dec*/////////////////
++inline void Atomic::dec      (volatile jint*      dest) { (void)add(-1, dest); }
++inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
++inline void Atomic::dec_ptr (volatile void*     dest) { (void)dec_ptr((volatile intptr_t*)dest); }
++
++
++///////////implementation of Atomic::xchg*/////////////////
++inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
++  jint __ret, __tmp;
++
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   ll    %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc    %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
++  intptr_t __ret, __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   lld   %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   scd   %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++  return __ret;
++}
++
++inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
++  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
++}
++
++///////////implementation of Atomic::cmpxchg*/////////////////
++inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value) {
++  jint __prev, __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  ll     %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  sc  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync        \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
++  jlong __prev, __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld   %[__prev], %[__dest]    \n\t"
++      "  bne   %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd   %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
++  intptr_t __prev, __cmp;
++  __asm__ __volatile__ (
++      " .set push \n\t"
++      " .set mips64\n\t\t"
++      " .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld    %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync \n\t"
++      "  .set pop  \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++      return __prev;
++}
++
++inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) {
++  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value);
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..5b5cd10aa5
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..73ac34501b
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++
++static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp
+new file mode 100644
+index 0000000000..f1599ac5f1
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++#ifdef MIPS64
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++#else
++// ThreadStackSize 320 allows a couple of test cases to run while
++// keeping the number of threads that can be created high.  System
++// default ThreadStackSize appears to be 512 which is too big.
++define_pd_global(intx, ThreadStackSize,          320);
++define_pd_global(intx, VMThreadStackSize,        512);
++#endif // MIPS64
++
++define_pd_global(intx, CompilerThreadStackSize,  0);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad
+new file mode 100644
+index 0000000000..5e38996ffa
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad
+@@ -0,0 +1,153 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// mips32/godson2 Linux Architecture Description File
++
++//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
++// This block specifies the encoding classes used by the compiler to
++// output byte streams.  Encoding classes generate functions which are
++// called by Machine Instruction Nodes in order to generate the bit
++// encoding of the instruction.  Operands specify their base encoding
++// interface with the interface keyword.  There are currently
++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
++// COND_INTER.  REG_INTER causes an operand to generate a function
++// which returns its register number when queried.  CONST_INTER causes
++// an operand to generate a function which returns the value of the
++// constant when queried.  MEMORY_INTER causes an operand to generate
++// four functions which return the Base Register, the Index Register,
++// the Scale Value, and the Offset Value of the operand when queried.
++// COND_INTER causes an operand to generate six functions which return
++// the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional
++// instruction.  Instructions specify two basic values for encoding.
++// They use the ins_encode keyword to specify their encoding class
++// (which must be one of the class names specified in the encoding
++// block), and they use the opcode keyword to specify, in order, their
++// primary, secondary, and tertiary opcode.  Only the opcode sections
++// which a particular instruction needs for encoding need to be
++// specified.
++encode %{
++  // Build emit functions for each basic byte or larger field in the intel
++  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
++  // code in the enc_class source block.  Emit functions will live in the
++  // main source block for now.  In future, we can generalize this by
++  // adding a syntax that specifies the sizes of fields in an order,
++  // so that the adlc can build the emit functions automagically
++
++  enc_class linux_breakpoint
++  %{
++    MacroAssembler* masm = new MacroAssembler(&cbuf);
++    masm->call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type);
++  %}
++
++  enc_class call_epilog
++  %{
++    if (VerifyStackAtCalls) {
++      // Check that stack depth is unchanged: find majik cookie on stack
++      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-2));
++      if(framesize >= 128) {
++        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
++        emit_d8(cbuf,0xBC);
++        emit_d8(cbuf,0x24);
++        emit_d32(cbuf,framesize); // Find majik cookie from ESP
++        emit_d32(cbuf, 0xbadb100d);
++      }
++      else {
++        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
++        emit_d8(cbuf,0x7C);
++        emit_d8(cbuf,0x24);
++        emit_d8(cbuf,framesize); // Find majik cookie from ESP
++        emit_d32(cbuf, 0xbadb100d);
++      }
++      // jmp EQ around INT3
++      // QQQ TODO
++      const int jump_around = 5; // size of call to breakpoint, 1 for CC
++      emit_opcode(cbuf, 0x74);
++      emit_d8(cbuf, jump_around);
++      // QQQ temporary
++      emit_break(cbuf);
++      // Die if stack mismatch
++      // emit_opcode(cbuf,0xCC);
++    }
++  %}
++
++%}
++
++// INSTRUCTIONS -- Platform dependent
++
++//----------OS and Locking Instructions----------------------------------------
++
++// This name is KNOWN by the ADLC and cannot be changed.
++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
++// for this guy.
++instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{
++%{
++  match(Set dst (ThreadLocal));
++  effect(DEF dst, KILL cr);
++
++  format %{ "MOV    EAX, Thread::current()" %}
++  ins_encode( linux_tlsencode(dst) );
++  ins_pipe( ialu_reg_fat );
++%}
++
++// Die now
++instruct ShouldNotReachHere()
++%{
++  match(Halt);
++
++  // Use the following format syntax
++  format %{ "int3\t# ShouldNotReachHere" %}
++  // QQQ TODO for now call breakpoint
++  // opcode(0xCC);
++  // ins_encode(Opc);
++  ins_encode(linux_breakpoint);
++  ins_pipe(pipe_slow);
++%}
++
++
++// Platform dependent source
++
++source
++%{
++// emit an interrupt that is caught by the debugger
++void emit_break(CodeBuffer& cbuf) {
++  // Debugger doesn't really catch this but best we can do so far QQQ
++#define __ masm.
++    __ lui(T9, Assembler::split_high((int)os::breakpoint));
++    __ addiu(T9, T9, Assembler::split_low((int)os::breakpoint));
++    __ jalr(T9);
++    __ delayed()->nop();
++}
++
++void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  emit_break(cbuf);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  //return 5;
++  return 16;
++}
++
++%}
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s
+new file mode 100644
+index 0000000000..f87fbf265d
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2017, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad
+new file mode 100644
+index 0000000000..ca4d094738
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad
+@@ -0,0 +1,50 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// AMD64 Linux Architecture Description File
++
++//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
++// This block specifies the encoding classes used by the compiler to
++// output byte streams.  Encoding classes generate functions which are
++// called by Machine Instruction Nodes in order to generate the bit
++// encoding of the instruction.  Operands specify their base encoding
++// interface with the interface keyword.  There are currently
++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
++// COND_INTER.  REG_INTER causes an operand to generate a function
++// which returns its register number when queried.  CONST_INTER causes
++// an operand to generate a function which returns the value of the
++// constant when queried.  MEMORY_INTER causes an operand to generate
++// four functions which return the Base Register, the Index Register,
++// the Scale Value, and the Offset Value of the operand when queried.
++// COND_INTER causes an operand to generate six functions which return
++// the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional
++// instruction.  Instructions specify two basic values for encoding.
++// They use the ins_encode keyword to specify their encoding class
++// (which must be one of the class names specified in the encoding
++// block), and they use the opcode keyword to specify, in order, their
++// primary, secondary, and tertiary opcode.  Only the opcode sections
++// which a particular instruction needs for encoding need to be
++// specified.
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..c9bc169aa5
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp
+@@ -0,0 +1,115 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
++
++#include "runtime/atomic.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_mips.hpp"
++
++#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("sync"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(); }
++inline void OrderAccess::storestore() { inlasm_sync(); }
++inline void OrderAccess::loadstore()  { inlasm_sync(); }
++inline void OrderAccess::storeload()  { inlasm_sync(); }
++
++inline void OrderAccess::acquire() { inlasm_sync(); }
++inline void OrderAccess::release() { inlasm_sync(); }
++inline void OrderAccess::fence()   { inlasm_sync(); }
++
++//implementation of load_acquire
++inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { jbyte data = *p; acquire(); return data; }
++inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { jshort data = *p; acquire(); return data; }
++inline jint     OrderAccess::load_acquire(volatile jint*    p) { jint data = *p; acquire(); return data; }
++inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { jlong tmp = *p; acquire(); return tmp; }
++inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { jubyte data = *p; acquire(); return data; }
++inline jushort  OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; }
++inline juint    OrderAccess::load_acquire(volatile juint*   p) { juint data = *p; acquire(); return data; }
++inline julong   OrderAccess::load_acquire(volatile julong*  p) { julong tmp = *p; acquire(); return tmp; }
++inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { jfloat data = *p; acquire(); return data; }
++inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; }
++
++//implementation of load_ptr_acquire
++inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { intptr_t data = *p; acquire(); return data; }
++inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { void *data = *(void* volatile *)p; acquire(); return data; }
++inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; }
++
++//implementation of release_store
++inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { release(); *p = v; }
++inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; }
++
++//implementation of release_store_ptr
++inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; }
++inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { release(); *(void* volatile *)p = v; }
++
++//implementation of store_fence
++inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
++inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
++
++//implementation of store_ptr_fence
++inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
++inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
++
++//implementation of release_store_fence
++inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
++inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
++
++//implementaion of release_store_ptr_fence
++inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
++inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp
+new file mode 100644
+index 0000000000..43487dab98
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp
+@@ -0,0 +1,1015 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "jvm_linux.h"
++#include "memory/allocation.inline.hpp"
++#include "mutex_linux.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm.h"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "utilities/debug.hpp"
++#include "compiler/disassembler.hpp"
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 29
++#define REG_FP 30
++
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$29");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++void os::initialize_thread(Thread* thr) {
++// Nothing to do.
++}
++
++address os::Linux::ucontext_get_pc(ucontext_t * uc) {
++  //return (address)uc->uc_mcontext.gregs[REG_PC];
++  return (address)uc->uc_mcontext.pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  int *pc;
++  intptr_t sp;
++  int *pc_limit = (int*)(void*)&os::get_previous_fp;
++  int insn;
++
++  {
++    l_pc:;
++    pc = (int*)&&l_pc;
++    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
++  }
++
++  do {
++    insn = *pc;
++    switch(bitfield(insn, 16, 16)) {
++      case 0x27bd:  /* addiu $sp,$sp,-i */
++      case 0x67bd:  /* daddiu $sp,$sp,-i */
++        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
++        sp -= (short)bitfield(insn, 0, 16);
++        return (intptr_t*)sp;
++    }
++    --pc;
++  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
++
++  ShouldNotReachHere();
++  return NULL; // mute compiler
++}
++
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++//x86 add 2 new assemble function here!
++extern "C" JNIEXPORT int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = ThreadLocalStorage::get_thread_slow();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (addr < thread->stack_base() &&
++          addr >= thread->stack_base() - thread->stack_size()) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          thread->disable_stack_yellow_zone();
++          if (thread->thread_state() == _thread_in_Java) {
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      } //addr <
++    } //sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          stub = StubRoutines::handler_for_unsafe_access();
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to mips code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
++             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
++             */
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
++        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
++        //The method is to trigger kernel emulation of float emulation.
++        int inst = *(int*)pc;
++        int ops = (inst >> 26) & 0x3f;
++        int ops_fmt = (inst >> 21) & 0x1f;
++        int op = inst & 0x3f;
++        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
++          int ft, fs, fd;
++          ft = (inst >> 16) & 0x1f;
++          fs = (inst >> 11) & 0x1f;
++          fd = (inst >> 6) & 0x1f;
++          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
++          double ft_value, fs_value, fd_value;
++          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++          __asm__ __volatile__ (
++            "cvt.s.pl %0, %4\n\t"
++            "cvt.s.pu %1, %4\n\t"
++            "cvt.s.pl %2, %5\n\t"
++            "cvt.s.pu %3, %5\n\t"
++            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
++            : "f" (fs_value), "f" (ft_value)
++          );
++
++          switch (op) {
++            case Assembler::fadd_op:
++              __asm__ __volatile__ (
++                "add.s  %1, %3, %5\n\t"
++                "add.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fsub_op:
++              //fd = fs - ft
++              __asm__ __volatile__ (
++                "sub.s  %1, %3, %5\n\t"
++                "sub.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fmul_op:
++              __asm__ __volatile__ (
++                "mul.s  %1, %3, %5\n\t"
++                "mul.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
++          }
++        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
++          // madd.ps is not used, the code below were not tested
++          int fr, ft, fs, fd;
++          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
++          double fr_value, ft_value, fs_value, fd_value;
++          switch (op) {
++            case Assembler::madd_ps_op:
++              // fd = (fs * ft) + fr
++              fr = (inst >> 21) & 0x1f;
++              ft = (inst >> 16) & 0x1f;
++              fs = (inst >> 11) & 0x1f;
++              fd = (inst >> 6) & 0x1f;
++              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
++              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++              __asm__ __volatile__ (
++                "cvt.s.pu %3, %9\n\t"
++                "cvt.s.pl %4, %9\n\t"
++                "cvt.s.pu %5, %10\n\t"
++                "cvt.s.pl %6, %10\n\t"
++                "cvt.s.pu %7, %11\n\t"
++                "cvt.s.pl %8, %11\n\t"
++                "madd.s %1, %3, %5, %7\n\t"
++                "madd.s %2, %4, %6, %8\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
++                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
++          }
++        }
++      } //SIGILL
++    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
++      // thread->thread_state() != _thread_in_Java
++      // SIGILL must be caused by VM_Version::determine_features().
++      VM_Version::set_supports_cpucfg(false);
++      stub = pc + 4;  // continue with next instruction.
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      stub = StubRoutines::handler_for_unsafe_access();
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  // Execution protection violation
++  //
++  // This should be kept as the last step in the triage.  We don't
++  // have a dedicated trap number for a no-execute fault, so be
++  // conservative and allow other handlers the first shot.
++  //
++  // Note: We don't test that info->si_code == SEGV_ACCERR here.
++  // this si_code is so generic that it is almost meaningless; and
++  // the si_code for this condition may change in the future.
++  // Furthermore, a false-positive should be harmless.
++  if (UnguardOnExecutionViolation > 0 &&
++      //(sig == SIGSEGV || sig == SIGBUS) &&
++      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
++    (sig == SIGSEGV || sig == SIGBUS
++#ifdef OPT_RANGECHECK
++     || sig == SIGSYS
++#endif
++    ) &&
++      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
++      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("execution protection violation\n");
++#endif
++
++    int page_size = os::vm_page_size();
++    address addr = (address) info->si_addr;
++    address pc = os::Linux::ucontext_get_pc(uc);
++    // Make sure the pc and the faulting address are sane.
++    //
++    // If an instruction spans a page boundary, and the page containing
++    // the beginning of the instruction is executable but the following
++    // page is not, the pc and the faulting address might be slightly
++    // different - we still want to unguard the 2nd page in this case.
++    //
++    // 15 bytes seems to be a (very) safe value for max instruction size.
++    bool pc_is_near_addr =
++      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
++    bool instr_spans_page_boundary =
++      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++
++    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
++      static volatile address last_addr =
++        (address) os::non_memory_address_word();
++
++      // In conservative mode, don't unguard unless the address is in the VM
++      if (addr != last_addr &&
++          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
++
++        // Set memory to RWX and retry
++        address page_start =
++          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
++        bool res = os::protect_memory((char*) page_start, page_size,
++                                      os::MEM_PROT_RWX);
++
++        if (PrintMiscellaneous && Verbose) {
++          char buf[256];
++          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
++                       "at " INTPTR_FORMAT
++                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
++                       page_start, (res ? "success" : "failed"), errno);
++          tty->print_raw_cr(buf);
++        }
++        stub = pc;
++
++        // Set last_addr so if we fault again at the same address, we don't end
++        // up in an endless loop.
++        //
++        // There are two potential complications here.  Two threads trapping at
++        // the same address at the same time could cause one of the threads to
++        // think it already unguarded, and abort the VM.  Likely very rare.
++        //
++        // The other race involves two threads alternately trapping at
++        // different addresses and failing to unguard the page, resulting in
++        // an endless loop.  This condition is probably even more unlikely than
++        // the first.
++        //
++        // Although both cases could be avoided by using locks or thread local
++        // last_addr, these solutions are unnecessary complication: this
++        // handler is a best-effort safety net, not a complete solution.  It is
++        // disabled by default and should only be used as a workaround in case
++        // we missed any no-execute-unsafe VM code.
++
++        last_addr = addr;
++      }
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    uc->uc_mcontext.pc = (greg_t)stub;
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError err(t, sig, pc, info, ucVoid);
++  err.report_and_die();
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++// FCSR:...|24| 23 |22|21|...
++//      ...|FS|FCC0|FO|FN|...
++void os::Linux::init_thread_fpu_state(void) {
++  if (SetFSFOFN == 999)
++    return;
++  int fs = (SetFSFOFN / 100)? 1:0;
++  int fo = ((SetFSFOFN % 100) / 10)? 1:0;
++  int fn = (SetFSFOFN % 10)? 1:0;
++  int mask = fs << 24 | fo << 22 | fn << 21;
++
++  int fcsr = get_fpu_control_word();
++  fcsr = fcsr | mask;
++  set_fpu_control_word(fcsr);
++  /*
++  if (fcsr != get_fpu_control_word())
++    tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word());
++  */
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  int fcsr;
++  __asm__ __volatile__ (
++      ".set noat;"
++      "daddiu  %0, $0, 0;"
++      "cfc1 %0, $31;"
++      : "=r" (fcsr)
++      );
++  return fcsr;
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++  __asm__ __volatile__ (
++      ".set noat;"
++      "ctc1 %0, $31;"
++      :
++      : "r" (fpu_control)
++      );
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++size_t os::Linux::min_stack_allowed  = 96 * K;
++
++
++// Test if pthread library can support variable thread stack size. LinuxThreads
++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
++// in floating stack mode and NPTL support variable stack size.
++bool os::Linux::supports_variable_stack_size() {
++  if (os::Linux::is_NPTL()) {
++     // NPTL, yes
++     return true;
++
++  } else {
++    // Note: We can't control default stack size when creating a thread.
++    // If we use non-default stack size (pthread_attr_setstacksize), both
++    // floating stack and non-floating stack LinuxThreads will return the
++    // same value. This makes it impossible to implement this function by
++    // detecting thread stack size directly.
++    //
++    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
++    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
++    // %gs (either as LDT selector or GDT selector, depending on kernel)
++    // to access thread specific data.
++    //
++    // Note that %gs is a reserved glibc register since early 2001, so
++    // applications are not allowed to change its value (Ulrich Drepper from
++    // Redhat confirmed that all known offenders have been modified to use
++    // either %fs or TSD). In the worst case scenario, when VM is embedded in
++    // a native application that plays with %gs, we might see non-zero %gs
++    // even LinuxThreads is running in fixed stack mode. As the result, we'll
++    // return true and skip _thread_safety_check(), so we may not be able to
++    // detect stack-heap collisions. But otherwise it's harmless.
++    //
++    return false;
++  }
++}
++
++// return default stack size for thr_type
++size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
++  // default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
++  // Creating guard page is very expensive. Java thread has HotSpot
++  // guard page, only enable glibc guard page for non-Java threads.
++  return (thr_type == java_thread ? 0 : page_size());
++}
++
++// Java thread:
++//
++//   Low memory addresses
++//    +------------------------+
++//    |                        |\  JavaThread created by VM does not have glibc
++//    |    glibc guard page    | - guard, attached Java thread usually has
++//    |                        |/  1 page glibc guard.
++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
++//    |                        |\
++//    |  HotSpot Guard Pages   | - red and yellow pages
++//    |                        |/
++//    +------------------------+ JavaThread::stack_yellow_zone_base()
++//    |                        |\
++//    |      Normal Stack      | -
++//    |                        |/
++// P2 +------------------------+ Thread::stack_base()
++//
++// Non-Java thread:
++//
++//   Low memory addresses
++//    +------------------------+
++//    |                        |\
++//    |  glibc guard page      | - usually 1 page
++//    |                        |/
++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
++//    |                        |\
++//    |      Normal Stack      | -
++//    |                        |/
++// P2 +------------------------+ Thread::stack_base()
++//
++// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
++//    pthread_attr_getstack()
++
++static void current_stack_region(address * bottom, size_t * size) {
++  if (os::is_primordial_thread()) {
++     // primordial thread needs special handling because pthread_getattr_np()
++     // may return bogus value.
++     *bottom = os::Linux::initial_thread_stack_bottom();
++     *size   = os::Linux::initial_thread_stack_size();
++  } else {
++     pthread_attr_t attr;
++
++     int rslt = pthread_getattr_np(pthread_self(), &attr);
++
++     // JVM needs to know exact stack location, abort if it fails
++     if (rslt != 0) {
++       if (rslt == ENOMEM) {
++         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
++       } else {
++         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
++       }
++     }
++
++     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
++         fatal("Can not locate current stack attributes!");
++     }
++
++     pthread_attr_destroy(&attr);
++
++  }
++  assert(os::current_stack_pointer() >= *bottom &&
++         os::current_stack_pointer() < *bottom + *size, "just checking");
++}
++
++address os::current_stack_base() {
++  address bottom;
++  size_t size;
++  current_stack_region(&bottom, &size);
++  return (bottom + size);
++}
++
++size_t os::current_stack_size() {
++  // stack size includes normal stack and HotSpot guard pages
++  address bottom;
++  size_t size;
++  current_stack_region(&bottom, &size);
++  return size;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("R0=" ); print_location(st, uc->uc_mcontext.gregs[0]);
++  st->print("AT=" ); print_location(st, uc->uc_mcontext.gregs[1]);
++  st->print("V0=" ); print_location(st, uc->uc_mcontext.gregs[2]);
++  st->print("V1=" ); print_location(st, uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, uc->uc_mcontext.gregs[4]);
++  st->print("A1=" ); print_location(st, uc->uc_mcontext.gregs[5]);
++  st->print("A2=" ); print_location(st, uc->uc_mcontext.gregs[6]);
++  st->print("A3=" ); print_location(st, uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, uc->uc_mcontext.gregs[8]);
++  st->print("A5=" ); print_location(st, uc->uc_mcontext.gregs[9]);
++  st->print("A6=" ); print_location(st, uc->uc_mcontext.gregs[10]);
++  st->print("A7=" ); print_location(st, uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, uc->uc_mcontext.gregs[12]);
++  st->print("T1=" ); print_location(st, uc->uc_mcontext.gregs[13]);
++  st->print("T2=" ); print_location(st, uc->uc_mcontext.gregs[14]);
++  st->print("T3=" ); print_location(st, uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print("S0=" ); print_location(st, uc->uc_mcontext.gregs[16]);
++  st->print("S1=" ); print_location(st, uc->uc_mcontext.gregs[17]);
++  st->print("S2=" ); print_location(st, uc->uc_mcontext.gregs[18]);
++  st->print("S3=" ); print_location(st, uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print("S4=" ); print_location(st, uc->uc_mcontext.gregs[20]);
++  st->print("S5=" ); print_location(st, uc->uc_mcontext.gregs[21]);
++  st->print("S6=" ); print_location(st, uc->uc_mcontext.gregs[22]);
++  st->print("S7=" ); print_location(st, uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print("T8=" ); print_location(st, uc->uc_mcontext.gregs[24]);
++  st->print("T9=" ); print_location(st, uc->uc_mcontext.gregs[25]);
++  st->print("K0=" ); print_location(st, uc->uc_mcontext.gregs[26]);
++  st->print("K1=" ); print_location(st, uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print("GP=" ); print_location(st, uc->uc_mcontext.gregs[28]);
++  st->print("SP=" ); print_location(st, uc->uc_mcontext.gregs[29]);
++  st->print("FP=" ); print_location(st, uc->uc_mcontext.gregs[30]);
++  st->print("RA=" ); print_location(st, uc->uc_mcontext.gregs[31]);
++  st->cr();
++
++}
++void os::print_context(outputStream *st, void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
++  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp
+new file mode 100644
+index 0000000000..c07d08156f
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp
+new file mode 100644
+index 0000000000..93490345f0
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++        // 'pref' is implemented as NOP in Loongson 3A
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  0, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  1, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp
+new file mode 100644
+index 0000000000..be28a562a1
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/threadLocalStorage.hpp"
++
++// Map stack pointer (%esp) to thread pointer for faster TLS access
++//
++// Here we use a flat table for better performance. Getting current thread
++// is down to one memory access (read _sp_map[%esp>>12]) in generated code
++// and two in runtime code (-fPIC code needs an extra load for _sp_map).
++//
++// This code assumes stack page is not shared by different threads. It works
++// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
++//
++// Notice that _sp_map is allocated in the bss segment, which is ZFOD
++// (zero-fill-on-demand). While it reserves 4M address space upfront,
++// actual memory pages are committed on demand.
++//
++// If an application creates and destroys a lot of threads, usually the
++// stack space freed by a thread will soon get reused by new thread
++// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
++// No memory page in _sp_map is wasted.
++//
++// However, it's still possible that we might end up populating &
++// committing a large fraction of the 4M table over time, but the actual
++// amount of live data in the table could be quite small. The max wastage
++// is less than 4M bytes. If it becomes an issue, we could use madvise()
++// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
++// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
++// physical memory page (i.e. similar to MADV_FREE on Solaris).
++
++#ifdef MINIMIZE_RAM_USAGE
++Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
++#endif // MINIMIZE_RAM_USAGE
++
++void ThreadLocalStorage::generate_code_for_get_thread() {
++    // nothing we can do here for user-level thread
++}
++
++void ThreadLocalStorage::pd_init() {
++#ifdef MINIMIZE_RAM_USAGE
++  assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
++         "page size must be multiple of PAGE_SIZE");
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void ThreadLocalStorage::pd_set_thread(Thread* thread) {
++  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
++#ifdef MINIMIZE_RAM_USAGE
++  address stack_top = os::current_stack_base();
++  size_t stack_size = os::current_stack_size();
++
++  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
++    int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++    assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index],
++           "thread exited without detaching from VM??");
++    _sp_map[index] = thread;
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp
+new file mode 100644
+index 0000000000..e595195e21
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
++
++#ifdef MINIMIZE_RAM_USAGE
++  // Processor dependent parts of ThreadLocalStorage
++  //only the low 2G space for user program in Linux
++
++  #define SP_BITLENGTH  34
++  #define PAGE_SHIFT    14
++  #define PAGE_SIZE     (1UL << PAGE_SHIFT)
++
++  static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
++  static int _sp_map_low;
++  static int _sp_map_high;
++#endif // MINIMIZE_RAM_USAGE
++
++public:
++#ifdef MINIMIZE_RAM_USAGE
++  static Thread** sp_map_addr() { return _sp_map; }
++#endif // MINIMIZE_RAM_USAGE
++
++  static Thread* thread() {
++#ifdef MINIMIZE_RAM_USAGE
++    /* Thread::thread() can also be optimized in the same way as __get_thread() */
++    //return (Thread*) os::thread_local_storage_at(thread_index());
++    uintptr_t sp;
++    uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1;
++
++    __asm__ __volatile__ ("daddiu %0, $29, 0 " : "=r" (sp));
++
++    return _sp_map[(sp >> PAGE_SHIFT) & mask];
++#else
++    return (Thread*) os::thread_local_storage_at(thread_index());
++#endif // MINIMIZE_RAM_USAGE
++  }
++#endif // OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp
+new file mode 100644
+index 0000000000..44f666d61f
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp
+@@ -0,0 +1,99 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 uses ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif /* COMPILER2 */
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
++
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp
+new file mode 100644
+index 0000000000..cb11c36ae5
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame() {
++    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++    if (_anchor.last_Java_pc() != NULL) {
++      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++    } else {
++      // This will pick up pc from sp
++      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++    }
++  }
++
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp
+new file mode 100644
+index 0000000000..b7454bf045
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+diff --git a/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp
+new file mode 100644
+index 0000000000..ce697823b9
+--- /dev/null
++++ b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "vm_version_mips.hpp"
+diff --git a/hotspot/src/share/tools/hsdis/Makefile b/hotspot/src/share/tools/hsdis/Makefile
+index 0d1b608944..a9754ce2ac 100644
+--- a/hotspot/src/share/tools/hsdis/Makefile
++++ b/hotspot/src/share/tools/hsdis/Makefile
+@@ -105,12 +105,25 @@ CFLAGS/sparc	+= -m32
+ endif
+ CFLAGS		+= $(CFLAGS/$(ARCH))
+ CFLAGS		+= -fPIC
++ifeq ($(ARCH), mips64)
++CPUINFO = $(shell cat /proc/cpuinfo)
++ifneq ($(findstring Loongson,$(CPUINFO)),)
++CFLAGS += -DLOONGSON
++endif
++endif
+ OS		= linux
+ LIB_EXT		= .so
+ CC 		= gcc
+ endif
+ CFLAGS		+= -O
+ DLDFLAGS	+= -shared
++ifeq ($(ARCH), mips64)
++DLDFLAGS	+= -Wl,-z,noexecstack
++endif
++ifeq ($(ARCH), loongarch64)
++DLDFLAGS        += -Wl,-z,noexecstack
++CONFIGURE_ARGS  += --disable-werror
++endif
+ LDFLAGS         += -ldl
+ OUTFLAGS	+= -o $@
+ else
+diff --git a/hotspot/src/share/tools/hsdis/hsdis.c b/hotspot/src/share/tools/hsdis/hsdis.c
+index 4fb4964870..f6ef5bea15 100644
+--- a/hotspot/src/share/tools/hsdis/hsdis.c
++++ b/hotspot/src/share/tools/hsdis/hsdis.c
+@@ -493,6 +493,16 @@ static const char* native_arch_name() {
+ #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le)
+   res = "powerpc:common64";
+ #endif
++#ifdef LIBARCH_mips64
++#ifdef LOONGSON
++  res = "mips:loongson_3a";
++#else
++  res = "mips:isa64";
++#endif
++#endif
++#ifdef LIBARCH_loongarch64
++  res = "loongarch";
++#endif
+ #ifdef LIBARCH_aarch64
+   res = "aarch64";
+ #endif
+diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp
+index 52044f12d4..50c585872e 100644
+--- a/hotspot/src/share/vm/adlc/main.cpp
++++ b/hotspot/src/share/vm/adlc/main.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // MAIN.CPP - Entry point for the Architecture Description Language Compiler
+ #include "adlc.hpp"
+ 
+@@ -234,6 +240,14 @@ int main(int argc, char *argv[])
+   AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp");
+   AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp");
+ #endif
++#ifdef TARGET_ARCH_mips
++  AD.addInclude(AD._CPP_file, "nativeInst_mips.hpp");
++  AD.addInclude(AD._CPP_file, "vmreg_mips.inline.hpp");
++#endif
++#ifdef TARGET_ARCH_loongarch
++  AD.addInclude(AD._CPP_file, "nativeInst_loongarch.hpp");
++  AD.addInclude(AD._CPP_file, "vmreg_loongarch.inline.hpp");
++#endif
+ #ifdef TARGET_ARCH_aarch64
+   AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp");
+   AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp");
+diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp
+index f7f1ae1d36..572aa997ca 100644
+--- a/hotspot/src/share/vm/asm/assembler.hpp
++++ b/hotspot/src/share/vm/asm/assembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_ASSEMBLER_HPP
+ #define SHARE_VM_ASM_ASSEMBLER_HPP
+ 
+@@ -53,6 +59,14 @@
+ # include "register_ppc.hpp"
+ # include "vm_version_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "register_mips.hpp"
++# include "vm_version_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "register_loongarch.hpp"
++# include "vm_version_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "register_aarch64.hpp"
+ # include "vm_version_aarch64.hpp"
+@@ -468,6 +482,12 @@ class AbstractAssembler : public ResourceObj  {
+ #ifdef TARGET_ARCH_ppc
+ # include "assembler_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "assembler_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "assembler_loongarch.hpp"
++#endif
+ 
+ 
+ #endif // SHARE_VM_ASM_ASSEMBLER_HPP
+diff --git a/hotspot/src/share/vm/asm/assembler.inline.hpp b/hotspot/src/share/vm/asm/assembler.inline.hpp
+index 1a48cb3171..8ac90e1474 100644
+--- a/hotspot/src/share/vm/asm/assembler.inline.hpp
++++ b/hotspot/src/share/vm/asm/assembler.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
+ #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
+ 
+@@ -42,6 +48,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "assembler_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "assembler_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "assembler_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "assembler_aarch64.inline.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp
+index d94ac40655..f6b578111f 100644
+--- a/hotspot/src/share/vm/asm/codeBuffer.cpp
++++ b/hotspot/src/share/vm/asm/codeBuffer.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023. These
++ * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "compiler/disassembler.hpp"
+@@ -323,6 +329,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
+     assert(rtype == relocInfo::none              ||
+            rtype == relocInfo::runtime_call_type ||
+            rtype == relocInfo::internal_word_type||
++           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
+            rtype == relocInfo::section_word_type ||
+            rtype == relocInfo::external_word_type,
+            "code needs relocation information");
+diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp
+index 02b619ad77..c04560a0bc 100644
+--- a/hotspot/src/share/vm/asm/codeBuffer.hpp
++++ b/hotspot/src/share/vm/asm/codeBuffer.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_CODEBUFFER_HPP
+ #define SHARE_VM_ASM_CODEBUFFER_HPP
+ 
+@@ -635,6 +641,12 @@ class CodeBuffer: public StackObj {
+ #ifdef TARGET_ARCH_ppc
+ # include "codeBuffer_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "codeBuffer_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "codeBuffer_loongarch.hpp"
++#endif
+ 
+ };
+ 
+diff --git a/hotspot/src/share/vm/asm/macroAssembler.hpp b/hotspot/src/share/vm/asm/macroAssembler.hpp
+index 1482eb630b..0be415b6c5 100644
+--- a/hotspot/src/share/vm/asm/macroAssembler.hpp
++++ b/hotspot/src/share/vm/asm/macroAssembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP
+ #define SHARE_VM_ASM_MACROASSEMBLER_HPP
+ 
+@@ -45,5 +51,10 @@
+ #ifdef TARGET_ARCH_aarch64
+ # include "macroAssembler_aarch64.hpp"
+ #endif
+-
++#ifdef TARGET_ARCH_mips
++# include "macroAssembler_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "macroAssembler_loongarch.hpp"
++#endif
+ #endif // SHARE_VM_ASM_MACROASSEMBLER_HPP
+diff --git a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
+index db3daa52e9..6f4e523c59 100644
+--- a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
++++ b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
+ #define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
+ 
+@@ -42,6 +48,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "macroAssembler_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "macroAssembler_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "macroAssembler_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "macroAssembler_aarch64.inline.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/asm/register.hpp b/hotspot/src/share/vm/asm/register.hpp
+index c500890181..6a20929e59 100644
+--- a/hotspot/src/share/vm/asm/register.hpp
++++ b/hotspot/src/share/vm/asm/register.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_ASM_REGISTER_HPP
+ #define SHARE_VM_ASM_REGISTER_HPP
+ 
+@@ -108,6 +114,12 @@ const type name = ((type)name##_##type##EnumValue)
+ #ifdef TARGET_ARCH_ppc
+ # include "register_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "register_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "register_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "register_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_Defs.hpp b/hotspot/src/share/vm/c1/c1_Defs.hpp
+index b0cd763739..b42b9de1b5 100644
+--- a/hotspot/src/share/vm/c1/c1_Defs.hpp
++++ b/hotspot/src/share/vm/c1/c1_Defs.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_DEFS_HPP
+ #define SHARE_VM_C1_C1_DEFS_HPP
+ 
+@@ -29,6 +35,9 @@
+ #ifdef TARGET_ARCH_x86
+ # include "register_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "register_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "register_aarch64.hpp"
+ #endif
+@@ -56,6 +65,9 @@ enum {
+ #ifdef TARGET_ARCH_x86
+ # include "c1_Defs_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_Defs_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_Defs_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
+index f07e97a4d3..6bc367a897 100644
+--- a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
++++ b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_FPUSTACKSIM_HPP
+ #define SHARE_VM_C1_C1_FPUSTACKSIM_HPP
+ 
+@@ -35,6 +41,9 @@ class FpuStackSim;
+ #ifdef TARGET_ARCH_x86
+ # include "c1_FpuStackSim_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_FpuStackSim_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_FpuStackSim_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.cpp b/hotspot/src/share/vm/c1/c1_FrameMap.cpp
+index 1dac94d58c..b1e37ec41c 100644
+--- a/hotspot/src/share/vm/c1/c1_FrameMap.cpp
++++ b/hotspot/src/share/vm/c1/c1_FrameMap.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_FrameMap.hpp"
+ #include "c1/c1_LIR.hpp"
+@@ -29,6 +35,9 @@
+ #ifdef TARGET_ARCH_x86
+ # include "vmreg_x86.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "vmreg_aarch64.inline.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.hpp b/hotspot/src/share/vm/c1/c1_FrameMap.hpp
+index 41571e3d16..c0e7b28ea4 100644
+--- a/hotspot/src/share/vm/c1/c1_FrameMap.hpp
++++ b/hotspot/src/share/vm/c1/c1_FrameMap.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_FRAMEMAP_HPP
+ #define SHARE_VM_C1_C1_FRAMEMAP_HPP
+ 
+@@ -85,6 +91,9 @@ class FrameMap : public CompilationResourceObj {
+ #ifdef TARGET_ARCH_x86
+ # include "c1_FrameMap_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_FrameMap_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_FrameMap_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp
+index fa37e7a046..5d33d3f7a0 100644
+--- a/hotspot/src/share/vm/c1/c1_LIR.cpp
++++ b/hotspot/src/share/vm/c1/c1_LIR.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_InstructionPrinter.hpp"
+ #include "c1/c1_LIR.hpp"
+@@ -79,6 +85,17 @@ FloatRegister LIR_OprDesc::as_double_reg() const {
+ 
+ #endif
+ 
++#if defined(LOONGARCH64)
++
++FloatRegister LIR_OprDesc::as_float_reg() const {
++  return as_FloatRegister(fpu_regnr());
++}
++
++FloatRegister LIR_OprDesc::as_double_reg() const {
++  return as_FloatRegister(fpu_regnrLo());
++}
++
++#endif
+ 
+ LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal();
+ 
+@@ -149,13 +166,19 @@ void LIR_Address::verify0() const {
+ #endif
+ #ifdef _LP64
+   assert(base()->is_cpu_register(), "wrong base operand");
+-#ifndef AARCH64
++#if !defined(AARCH64) && !defined(LOONGARCH64)
+   assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
+ #else
+   assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
+ #endif
++#ifdef LOONGARCH64
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT ||
++         base()->type() == T_LONG || base()->type() == T_METADATA,
++         "wrong type for addresses");
++#else
+   assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
+          "wrong type for addresses");
++#endif
+ #else
+   assert(base()->is_single_cpu(), "wrong base operand");
+   assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand");
+@@ -258,8 +281,6 @@ bool LIR_OprDesc::is_oop() const {
+   }
+ }
+ 
+-
+-
+ void LIR_Op2::verify() const {
+ #ifdef ASSERT
+   switch (code()) {
+@@ -301,6 +322,18 @@ void LIR_Op2::verify() const {
+ #endif
+ }
+ 
++void LIR_Op4::verify() const {
++#ifdef ASSERT
++  switch (code()) {
++    case lir_cmp_cmove:
++      break;
++
++    default:
++      assert(!result_opr()->is_register() || !result_opr()->is_oop_register(),
++             "can't produce oops from arith");
++  }
++#endif
++}
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -358,6 +391,55 @@ void LIR_OpBranch::negate_cond() {
+   }
+ }
+ 
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(stub->entry())
++  , _block(NULL)
++  , _ublock(NULL)
++  , _stub(stub) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(NULL)
++  , _stub(NULL) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_float_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(ublock)
++  , _stub(NULL) {
++}
++
++void LIR_OpCmpBranch::change_block(BlockBegin* b) {
++  assert(_block != NULL, "must have old block");
++  assert(_block->label() == label(), "must be equal");
++
++  _block = b;
++  _label = b->label();
++}
++
++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) {
++  assert(_ublock != NULL, "must have old block");
++
++  _ublock = b;
++}
++
++void LIR_OpCmpBranch::negate_cond() {
++  switch (condition()) {
++    case lir_cond_equal:        set_condition(lir_cond_notEqual);     break;
++    case lir_cond_notEqual:     set_condition(lir_cond_equal);        break;
++    case lir_cond_less:         set_condition(lir_cond_greaterEqual); break;
++    case lir_cond_lessEqual:    set_condition(lir_cond_greater);      break;
++    case lir_cond_greaterEqual: set_condition(lir_cond_less);         break;
++    case lir_cond_greater:      set_condition(lir_cond_lessEqual);    break;
++    default: ShouldNotReachHere();
++  }
++}
+ 
+ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass,
+                                  LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3,
+@@ -560,10 +642,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(opConvert->_info == NULL, "must be");
+       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
+       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
+-#if defined(PPC) || defined(AARCH64)
+-      if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
+-      if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
+-#endif
++      if (opConvert->_tmp->is_valid())       do_temp(opConvert->_tmp);
+       do_stub(opConvert->_stub);
+ 
+       break;
+@@ -661,6 +740,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       break;
+     }
+ 
++// LIR_OpCmpBranch;
++    case lir_cmp_branch:               // may have info, input and result register always invalid
++    case lir_cmp_float_branch:         // may have info, input and result register always invalid
++    {
++      assert(op->as_OpCmpBranch() != NULL, "must be");
++      LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op;
++      assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() &&
++             opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used");
++
++      if (opCmpBranch->_info)               do_info(opCmpBranch->_info);
++      if (opCmpBranch->_opr1->is_valid())   do_input(opCmpBranch->_opr1);
++      if (opCmpBranch->_opr2->is_valid())   do_input(opCmpBranch->_opr2);
++      if (opCmpBranch->_tmp1->is_valid())   do_temp(opCmpBranch->_tmp1);
++      if (opCmpBranch->_stub != NULL)       opCmpBranch->stub()->visit(this);
++      assert(opCmpBranch->_result->is_illegal(), "not used");
++
++      break;
++    }
++
+     // special handling for cmove: right input operand must not be equal
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+@@ -806,6 +904,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       break;
+     }
+ 
++// LIR_Op4
++    // special handling for cmp cmove: src2(opr4) operand must not be equal
++    // to the result operand, otherwise the backend fails
++    case lir_cmp_cmove:
++    {
++      assert(op->as_Op4() != NULL, "must be");
++      LIR_Op4* op4 = (LIR_Op4*)op;
++
++      assert(op4->_info == NULL, "not used");
++      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() &&
++             op4->_opr3->is_valid() && op4->_opr4->is_valid() &&
++             op4->_result->is_valid(), "used");
++
++      do_input(op4->_opr1);
++      do_input(op4->_opr2);
++      do_input(op4->_opr3);
++      do_input(op4->_opr4);
++      do_temp(op4->_opr4);
++      do_output(op4->_result);
++
++      break;
++    }
++
+ 
+ // LIR_OpJavaCall
+     case lir_static_call:
+@@ -1121,6 +1242,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) {
+   masm->emit_op2(this);
+ }
+ 
++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) {
++  masm->emit_opCmpBranch(this);
++  if (stub()) {
++    masm->append_code_stub(stub());
++  }
++}
++
+ void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) {
+   masm->emit_alloc_array(this);
+   masm->append_code_stub(stub());
+@@ -1141,6 +1269,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+   masm->emit_op3(this);
+ }
+ 
++void LIR_Op4::emit_code(LIR_Assembler* masm) {
++  masm->emit_op4(this);
++}
++
+ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
+   masm->emit_lock(this);
+   if (stub()) {
+@@ -1381,7 +1513,6 @@ void LIR_List::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int
+                     info));
+ }
+ 
+-
+ void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info) {
+   append(new LIR_Op2(
+                     lir_cmp,
+@@ -1391,6 +1522,17 @@ void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* ad
+                     info));
+ }
+ 
++void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) {
++  if (deoptimize_on_null) {
++    // Emit an explicit null check and deoptimize if opr is null
++    CodeStub* deopt = new DeoptimizeStub(info);
++    cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt);
++  } else {
++    // Emit an implicit null check
++    append(new LIR_Op1(lir_null_check, opr, info));
++  }
++}
++
+ void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4,
+                                int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) {
+   append(new LIR_OpAllocObj(
+@@ -1520,18 +1662,6 @@ void LIR_List::store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr
+   append(c);
+ }
+ 
+-void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) {
+-  if (deoptimize_on_null) {
+-    // Emit an explicit null check and deoptimize if opr is null
+-    CodeStub* deopt = new DeoptimizeStub(info);
+-    cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL));
+-    branch(lir_cond_equal, T_OBJECT, deopt);
+-  } else {
+-    // Emit an implicit null check
+-    append(new LIR_Op1(lir_null_check, opr, info));
+-  }
+-}
+-
+ void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                         LIR_Opr t1, LIR_Opr t2, LIR_Opr result) {
+   append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2, result));
+@@ -1780,6 +1910,8 @@ const char * LIR_Op::name() const {
+      case lir_cmp_l2i:               s = "cmp_l2i";       break;
+      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
+      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
++     case lir_cmp_branch:            s = "cmp_branch";    break;
++     case lir_cmp_float_branch:      s = "cmp_fbranch";   break;
+      case lir_cmove:                 s = "cmove";         break;
+      case lir_add:                   s = "add";           break;
+      case lir_sub:                   s = "sub";           break;
+@@ -1809,6 +1941,8 @@ const char * LIR_Op::name() const {
+      // LIR_Op3
+      case lir_idiv:                  s = "idiv";          break;
+      case lir_irem:                  s = "irem";          break;
++     // LIR_Op4
++     case lir_cmp_cmove:             s = "cmp_cmove";     break;
+      // LIR_OpJavaCall
+      case lir_static_call:           s = "static";        break;
+      case lir_optvirtual_call:       s = "optvirtual";    break;
+@@ -1960,6 +2094,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const {
+   }
+ }
+ 
++// LIR_OpCmpBranch
++void LIR_OpCmpBranch::print_instr(outputStream* out) const {
++  print_condition(out, condition());        out->print(" ");
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  if (block() != NULL) {
++    out->print("[B%d] ", block()->block_id());
++  } else if (stub() != NULL) {
++    out->print("[");
++    stub()->print_name(out);
++    out->print(": " INTPTR_FORMAT "]", p2i(stub()));
++    if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci());
++  } else {
++    out->print("[label:" INTPTR_FORMAT "] ", p2i(label()));
++  }
++  if (ublock() != NULL) {
++    out->print("unordered: [B%d] ", ublock()->block_id());
++  }
++}
++
+ void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
+   switch(cond) {
+     case lir_cond_equal:           out->print("[EQ]");      break;
+@@ -1980,12 +2134,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
+   print_bytecode(out, bytecode());
+   in_opr()->print(out);                  out->print(" ");
+   result_opr()->print(out);              out->print(" ");
+-#if defined(PPC) || defined(AARCH64)
+-  if(tmp1()->is_valid()) {
+-    tmp1()->print(out); out->print(" ");
+-    tmp2()->print(out); out->print(" ");
++  if(tmp()->is_valid()) {
++    tmp()->print(out);                   out->print(" ");
+   }
+-#endif
+ }
+ 
+ void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) {
+@@ -2031,9 +2182,6 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+ 
+ // LIR_Op2
+ void LIR_Op2::print_instr(outputStream* out) const {
+-  if (code() == lir_cmove) {
+-    print_condition(out, condition());         out->print(" ");
+-  }
+   in_opr1()->print(out);    out->print(" ");
+   in_opr2()->print(out);    out->print(" ");
+   if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out);    out->print(" "); }
+@@ -2082,6 +2230,18 @@ void LIR_Op3::print_instr(outputStream* out) const {
+   result_opr()->print(out);
+ }
+ 
++// LIR_Op4
++void LIR_Op4::print_instr(outputStream* out) const {
++  if (code() == lir_cmp_cmove) {
++    print_condition(out, condition());         out->print(" ");
++  }
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  in_opr3()->print(out);    out->print(" ");
++  in_opr4()->print(out);    out->print(" ");
++  result_opr()->print(out);
++}
++
+ 
+ void LIR_OpLock::print_instr(outputStream* out) const {
+   hdr_opr()->print(out);   out->print(" ");
+@@ -2095,10 +2255,14 @@ void LIR_OpLock::print_instr(outputStream* out) const {
+ 
+ #ifdef ASSERT
+ void LIR_OpAssert::print_instr(outputStream* out) const {
++  tty->print_cr("function LIR_OpAssert::print_instr unimplemented yet! ");
++  Unimplemented();
++  /*
+   print_condition(out, condition()); out->print(" ");
+   in_opr1()->print(out);             out->print(" ");
+   in_opr2()->print(out);             out->print(", \"");
+   out->print("%s", msg());          out->print("\"");
++  */
+ }
+ #endif
+ 
+diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp
+index 24b8620211..aec77afe1f 100644
+--- a/hotspot/src/share/vm/c1/c1_LIR.hpp
++++ b/hotspot/src/share/vm/c1/c1_LIR.hpp
+@@ -22,6 +22,11 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
+ #ifndef SHARE_VM_C1_C1_LIR_HPP
+ #define SHARE_VM_C1_C1_LIR_HPP
+ 
+@@ -452,7 +457,7 @@ class LIR_OprDesc: public CompilationResourceObj {
+   // for compatibility with RInfo
+   int fpu () const                                  { return lo_reg_half(); }
+ #endif
+-#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64)
++#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) || defined(LOONGARCH)
+   FloatRegister as_float_reg   () const;
+   FloatRegister as_double_reg  () const;
+ #endif
+@@ -542,7 +547,7 @@ class LIR_Address: public LIR_OprPtr {
+      , _type(type)
+      , _disp(0) { verify(); }
+ 
+-#if defined(X86) || defined(ARM) || defined(AARCH64)
++#if defined(X86) || defined(ARM) || defined(AARCH64) || defined(LOONGARCH)
+   LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type):
+        _base(base)
+      , _index(index)
+@@ -658,7 +663,13 @@ class LIR_OprFact: public AllStatic {
+                                                                              LIR_OprDesc::double_type          |
+                                                                              LIR_OprDesc::cpu_register         |
+                                                                              LIR_OprDesc::double_size); }
+-#endif // PPC
++#elif defined(LOONGARCH)
++  static LIR_Opr double_fpu(int reg)            { return (LIR_Opr)(intptr_t)((reg  << LIR_OprDesc::reg1_shift) |
++                                                                             (reg  << LIR_OprDesc::reg2_shift) |
++                                                                             LIR_OprDesc::double_type          |
++                                                                             LIR_OprDesc::fpu_register         |
++                                                                             LIR_OprDesc::double_size); }
++#endif // LOONGARCH
+ 
+   static LIR_Opr virtual_register(int index, BasicType type) {
+     LIR_Opr res;
+@@ -872,9 +883,11 @@ class      LIR_OpConvert;
+ class      LIR_OpAllocObj;
+ class      LIR_OpRoundFP;
+ class    LIR_Op2;
++class      LIR_OpCmpBranch;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
++class    LIR_Op4;
+ class    LIR_OpCall;
+ class      LIR_OpJavaCall;
+ class      LIR_OpRTCall;
+@@ -943,6 +956,8 @@ enum LIR_Code {
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
++      , lir_cmp_branch
++      , lir_cmp_float_branch
+       , lir_cmove
+       , lir_add
+       , lir_sub
+@@ -976,6 +991,9 @@ enum LIR_Code {
+       , lir_idiv
+       , lir_irem
+   , end_op3
++  , begin_op4
++      , lir_cmp_cmove
++  , end_op4
+   , begin_opJavaCall
+       , lir_static_call
+       , lir_optvirtual_call
+@@ -1139,12 +1157,14 @@ class LIR_Op: public CompilationResourceObj {
+   virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; }
+   virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; }
+   virtual LIR_OpBranch* as_OpBranch() { return NULL; }
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; }
+   virtual LIR_OpRTCall* as_OpRTCall() { return NULL; }
+   virtual LIR_OpConvert* as_OpConvert() { return NULL; }
+   virtual LIR_Op0* as_Op0() { return NULL; }
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
++  virtual LIR_Op4* as_Op4() { return NULL; }
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
+@@ -1474,37 +1494,18 @@ class LIR_OpConvert: public LIR_Op1 {
+  private:
+    Bytecodes::Code _bytecode;
+    ConversionStub* _stub;
+-#if defined(PPC) || defined(AARCH64)
+-  LIR_Opr _tmp1;
+-  LIR_Opr _tmp2;
+-#endif
++   LIR_Opr _tmp;
+ 
+  public:
+-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub)
++   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp)
+      : LIR_Op1(lir_convert, opr, result)
+      , _stub(stub)
+-#ifdef PPC
+-     , _tmp1(LIR_OprDesc::illegalOpr())
+-     , _tmp2(LIR_OprDesc::illegalOpr())
+-#endif
++     , _tmp(tmp)
+      , _bytecode(code)                           {}
+ 
+-#if defined(PPC) || defined(AARCH64)
+-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub
+-                 ,LIR_Opr tmp1, LIR_Opr tmp2)
+-     : LIR_Op1(lir_convert, opr, result)
+-     , _stub(stub)
+-     , _tmp1(tmp1)
+-     , _tmp2(tmp2)
+-     , _bytecode(code)                           {}
+-#endif
+-
+   Bytecodes::Code bytecode() const               { return _bytecode; }
+   ConversionStub* stub() const                   { return _stub; }
+-#if defined(PPC) || defined(AARCH64)
+-  LIR_Opr tmp1() const                           { return _tmp1; }
+-  LIR_Opr tmp2() const                           { return _tmp2; }
+-#endif
++  LIR_Opr tmp() const                            { return _tmp; }
+ 
+   virtual void emit_code(LIR_Assembler* masm);
+   virtual LIR_OpConvert* as_OpConvert() { return this; }
+@@ -1659,7 +1660,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code == lir_cmp || code == lir_assert, "code check");
++    assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1691,7 +1692,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1707,7 +1708,7 @@ class LIR_Op2: public LIR_Op {
+     , _tmp3(tmp3)
+     , _tmp4(tmp4)
+     , _tmp5(tmp5) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1719,10 +1720,12 @@ class LIR_Op2: public LIR_Op {
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
+-    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert");
++    return _condition;
+   }
+   void set_condition(LIR_Condition condition) {
+-    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove");
++    _condition = condition;
+   }
+ 
+   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
+@@ -1736,6 +1739,43 @@ class LIR_Op2: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++class LIR_OpCmpBranch: public LIR_Op2 {
++ friend class LIR_OpVisitState;
++
++ private:
++  Label*        _label;
++  BlockBegin*   _block;  // if this is a branch to a block, this is the block
++  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
++  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
++
++ public:
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL)
++    : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++    , _label(lbl)
++    , _block(NULL)
++    , _ublock(NULL)
++    , _stub(NULL) { }
++
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL);
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL);
++
++  // for unordered comparisons
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL);
++
++  Label*        label()       const              { return _label;  }
++  BlockBegin*   block()       const              { return _block;  }
++  BlockBegin*   ublock()      const              { return _ublock; }
++  CodeStub*     stub()        const              { return _stub;   }
++
++  void          change_block(BlockBegin* b);
++  void          change_ublock(BlockBegin* b);
++  void          negate_cond();
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ class LIR_OpAllocArray : public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1776,7 +1816,6 @@ class LIR_OpAllocArray : public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
+-
+ class LIR_Op3: public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1800,6 +1839,48 @@ class LIR_Op3: public LIR_Op {
+ };
+ 
+ 
++class LIR_Op4: public LIR_Op {
++ friend class LIR_OpVisitState;
++
++ private:
++  LIR_Opr _opr1;
++  LIR_Opr _opr2;
++  LIR_Opr _opr3;
++  LIR_Opr _opr4;
++  BasicType _type;
++  LIR_Condition _condition;
++
++  void verify() const;
++
++ public:
++  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type)
++    : LIR_Op(code, result, NULL)
++    , _opr1(opr1)
++    , _opr2(opr2)
++    , _opr3(opr3)
++    , _opr4(opr4)
++    , _type(type)
++    , _condition(condition) {
++    assert(is_in_range(code, begin_op4, end_op4), "code check");
++    assert(type != T_ILLEGAL, "cmove should have type");
++  }
++  LIR_Opr in_opr1() const                        { return _opr1; }
++  LIR_Opr in_opr2() const                        { return _opr2; }
++  LIR_Opr in_opr3() const                        { return _opr3; }
++  LIR_Opr in_opr4() const                        { return _opr4; }
++  BasicType type()  const                        { return _type; }
++  LIR_Condition condition() const  {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition;
++  }
++  void set_condition(LIR_Condition condition) {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove");  _condition = condition;
++  }
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_Op4* as_Op4() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ //--------------------------------
+ class LabelObj: public CompilationResourceObj {
+  private:
+@@ -2141,17 +2222,9 @@ class LIR_List: public CompilationResourceObj {
+ 
+   void safepoint(LIR_Opr tmp, CodeEmitInfo* info)  { append(new LIR_Op1(lir_safepoint, tmp, info)); }
+ 
+-#ifdef PPC
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); }
+-#endif
+-#if defined(AARCH64)
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst,
+-               ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) {
+-    append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr()));
++  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) {
++    append(new LIR_OpConvert(code, left, dst, stub, tmp));
+   }
+-#else
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
+-#endif
+ 
+   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
+   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
+@@ -2256,6 +2329,48 @@ class LIR_List: public CompilationResourceObj {
+     append(new LIR_OpBranch(cond, type, block, unordered));
+   }
+ 
++#if defined(X86) || defined(AARCH64)
++
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
++    cmp(condition, left, right, info);
++    branch(condition, type, tgt);
++  }
++
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++    cmp(condition, left, right);
++    branch(condition, type, block, unordered);
++  }
++
++  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++    cmp(condition, left, right);
++    cmove(condition, src1, src2, dst, type);
++  }
++
++#endif
++
++#ifdef LOONGARCH
++
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
++    append(new LIR_OpCmpBranch(condition, left, right, tgt, info));
++  }
++
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++    append(new LIR_OpCmpBranch(condition, left, right, block, unordered));
++  }
++
++  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++    append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type));
++  }
++
++#endif
++
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
++    cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info);
++  }
++
+   void shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
+   void shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
+   void unsigned_shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
+diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
+index e5cd19f17a..a18c53008b 100644
+--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
++++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_Compilation.hpp"
+ #include "c1/c1_Instruction.hpp"
+@@ -34,6 +40,10 @@
+ # include "nativeInst_x86.hpp"
+ # include "vmreg_x86.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "nativeInst_aarch64.hpp"
+ # include "vmreg_aarch64.inline.hpp"
+@@ -811,6 +821,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+ }
+ 
+ 
++void LIR_Assembler::emit_op4(LIR_Op4* op) {
++  switch (op->code()) {
++    case lir_cmp_cmove:
++      cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type());
++      break;
++
++    default:
++      Unimplemented();
++      break;
++  }
++}
++
+ void LIR_Assembler::build_frame() {
+   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+ }
+diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
+index 1a68d458d2..ac0f4e7a46 100644
+--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
++++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_LIRASSEMBLER_HPP
+ #define SHARE_VM_C1_C1_LIRASSEMBLER_HPP
+ 
+@@ -195,7 +201,9 @@ class LIR_Assembler: public CompilationResourceObj {
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
++  void emit_op4(LIR_Op4* op);
+   void emit_opBranch(LIR_OpBranch* op);
++  void emit_opCmpBranch(LIR_OpCmpBranch* op);
+   void emit_opLabel(LIR_OpLabel* op);
+   void emit_arraycopy(LIR_OpArrayCopy* op);
+   void emit_updatecrc32(LIR_OpUpdateCRC32* op);
+@@ -227,6 +235,7 @@ class LIR_Assembler: public CompilationResourceObj {
+   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
++  void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type);
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
+ 
+   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
+@@ -265,6 +274,9 @@ class LIR_Assembler: public CompilationResourceObj {
+ #ifdef TARGET_ARCH_x86
+ # include "c1_LIRAssembler_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_LIRAssembler_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_LIRAssembler_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+index 837553ddb6..c66f3102b9 100644
+--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_Defs.hpp"
+ #include "c1/c1_Compilation.hpp"
+@@ -482,13 +488,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+                                     CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
+   CodeStub* stub = new RangeCheckStub(range_check_info, index);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
+-                index->as_jint(), null_check_info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
++                       index->as_jint(), stub, null_check_info); // forward branch
+   } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, array,
+-                arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(),
++                       T_INT, stub, null_check_info); // forward branch
+   }
+ }
+ 
+@@ -496,12 +500,10 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+ void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
+   CodeStub* stub = new RangeCheckStub(info, index, true);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), stub, info); // forward branch
+   } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
+-                java_nio_Buffer::limit_offset(), T_INT, info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer,
++                java_nio_Buffer::limit_offset(), T_INT, stub, info); // forward branch
+   }
+   __ move(index, result);
+ }
+@@ -934,7 +936,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
+   return tmp;
+ }
+ 
+-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) {
+   if (if_instr->should_profile()) {
+     ciMethod* method = if_instr->profiled_method();
+     assert(method != NULL, "method should be set if branch is profiled");
+@@ -955,10 +957,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
+     __ metadata2reg(md->constant_encoding(), md_reg);
+ 
+     LIR_Opr data_offset_reg = new_pointer_register();
+-    __ cmove(lir_cond(cond),
+-             LIR_OprFact::intptrConst(taken_count_offset),
+-             LIR_OprFact::intptrConst(not_taken_count_offset),
+-             data_offset_reg, as_BasicType(if_instr->x()->type()));
++    if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) {
++      __ cmove(lir_cond(cond),
++               LIR_OprFact::intptrConst(taken_count_offset),
++               LIR_OprFact::intptrConst(not_taken_count_offset),
++               data_offset_reg, as_BasicType(if_instr->x()->type()));
++    } else {
++      __ cmp_cmove(lir_cond(cond), left, right,
++                   LIR_OprFact::intptrConst(taken_count_offset),
++                   LIR_OprFact::intptrConst(not_taken_count_offset),
++                   data_offset_reg, as_BasicType(if_instr->x()->type()));
++    }
+ 
+     // MDO cells are intptr_t, so the data_reg width is arch-dependent.
+     LIR_Opr data_reg = new_pointer_register();
+@@ -1305,8 +1314,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
+   }
+ 
+   __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
+-  __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0));
+-  __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
++  __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0),
++               LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
+ }
+ 
+ // Example: Thread.currentThread()
+@@ -1499,7 +1508,6 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p
+   // Read the marking-in-progress flag.
+   LIR_Opr flag_val = new_register(T_INT);
+   __ load(mark_active_flag_addr, flag_val);
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   LIR_PatchCode pre_val_patch_code = lir_patch_none;
+ 
+@@ -1528,7 +1536,7 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p
+     slow = new G1PreBarrierStub(pre_val);
+   }
+ 
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -1586,10 +1594,8 @@ void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_Opr
+   }
+   assert(new_val->is_register(), "must be a register at this point");
+ 
+-  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+-
+   CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+-  __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
++  __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -1859,12 +1865,10 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
+     CodeEmitInfo* info = state_for(x);
+     CodeStub* stub = new RangeCheckStub(info, index.result(), true);
+     if (index.result()->is_constant()) {
+-      cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
+-      __ branch(lir_cond_belowEqual, T_INT, stub);
++      cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info);
+     } else {
+-      cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
+-                  java_nio_Buffer::limit_offset(), T_INT, info);
+-      __ branch(lir_cond_aboveEqual, T_INT, stub);
++      cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(),
++                         java_nio_Buffer::limit_offset(), T_INT, stub, info);
+     }
+     __ move(index.result(), result);
+   } else {
+@@ -1945,8 +1949,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) {
+     } else if (use_length) {
+       // TODO: use a (modified) version of array_range_check that does not require a
+       //       constant length to be loaded to a register
+-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
++      CodeStub* stub = new RangeCheckStub(range_check_info, index.result());
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
+     } else {
+       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+       // The range check performs the null check, so clear it out for the load
+@@ -2128,7 +2132,7 @@ void LIRGenerator::do_UnsafeGetRaw(UnsafeGetRaw* x) {
+     assert(index_op->type() == T_INT, "only int constants supported");
+     addr = new LIR_Address(base_op, index_op->as_jint(), dst_type);
+   } else {
+-#if defined(X86) || defined(AARCH64)
++#if defined(X86) || defined(AARCH64) || defined(LOONGARCH)
+     addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type);
+ #elif defined(GENERATE_ADDRESS_IS_PREFERRED)
+     addr = generate_address(base_op, index_op, log2_scale, 0, dst_type);
+@@ -2343,19 +2347,18 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) {
+ 
+         if (off.type()->is_int()) {
+           referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset);
++          __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_INT, Lcont->label());
+         } else {
+           assert(off.type()->is_long(), "what else?");
+           referent_off = new_register(T_LONG);
+           __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
++          __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_LONG, Lcont->label());
+         }
+-        __ cmp(lir_cond_notEqual, off.result(), referent_off);
+-        __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label());
+       }
+       if (gen_source_check) {
+         // offset is a const and equals referent offset
+         // if (source == null) -> continue
+-        __ cmp(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL));
+-        __ branch(lir_cond_equal, T_OBJECT, Lcont->label());
++        __ cmp_branch(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, Lcont->label());
+       }
+       LIR_Opr src_klass = new_register(T_METADATA);
+       if (gen_type_check) {
+@@ -2365,8 +2368,7 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) {
+         LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE);
+         LIR_Opr reference_type = new_register(T_INT);
+         __ move(reference_type_addr, reference_type);
+-        __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+-        __ branch(lir_cond_equal, T_INT, Lcont->label());
++        __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, Lcont->label());
+       }
+       {
+         // We have determined that src->_klass->_reference_type != REF_NONE
+@@ -2446,19 +2448,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi
+     int high_key = one_range->high_key();
+     BlockBegin* dest = one_range->sux();
+     if (low_key == high_key) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
+     } else if (high_key - low_key == 1) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
+-      __ cmp(lir_cond_equal, value, high_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest);
+     } else {
+       LabelObj* L = new LabelObj();
+-      __ cmp(lir_cond_less, value, low_key);
+-      __ branch(lir_cond_less, T_INT, L->label());
+-      __ cmp(lir_cond_lessEqual, value, high_key);
+-      __ branch(lir_cond_lessEqual, T_INT, dest);
++      __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label());
++      __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest);
+       __ branch_destination(L->label());
+     }
+   }
+@@ -2545,8 +2542,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
+     do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
+   } else {
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, i + lo_key);
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2571,8 +2567,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
+   } else {
+     int len = x->length();
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, x->key_at(i));
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2624,7 +2619,6 @@ void LIRGenerator::do_Goto(Goto* x) {
+     }
+     LIR_Opr md_reg = new_register(T_METADATA);
+     __ metadata2reg(md->constant_encoding(), md_reg);
+-
+     increment_counter(new LIR_Address(md_reg, offset,
+                                       NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment);
+   }
+@@ -3078,8 +3072,8 @@ void LIRGenerator::do_IfOp(IfOp* x) {
+   f_val.dont_load_item();
+   LIR_Opr reg = rlock_result(x);
+ 
+-  __ cmp(lir_cond(x->cond()), left.result(), right.result());
+-  __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
++  __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(),
++               t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
+ }
+ 
+ #ifdef JFR_HAVE_INTRINSICS
+@@ -3119,8 +3113,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) {
+                                            T_OBJECT);
+   LIR_Opr result = rlock_result(x);
+   __ move_wide(jobj_addr, result);
+-  __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL));
+-  __ branch(lir_cond_equal, T_OBJECT, L_end->label());
++  __ cmp_branch(lir_cond_equal, result, LIR_OprFact::oopConst(0), T_OBJECT, L_end->label());
+   __ move_wide(new LIR_Address(result, T_OBJECT), result);
+ 
+   __ branch_destination(L_end->label());
+@@ -3484,10 +3477,9 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
+     LIR_Opr meth = new_register(T_METADATA);
+     __ metadata2reg(method->constant_encoding(), meth);
+     __ logical_and(result, mask, result);
+-    __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
+     // The bci for info can point to cmp for if's we want the if bci
+     CodeStub* overflow = new CounterOverflowStub(info, bci, meth);
+-    __ branch(lir_cond_equal, T_INT, overflow);
++    __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow);
+     __ branch_destination(overflow->continuation());
+   }
+ }
+@@ -3599,8 +3591,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) {
+     CodeEmitInfo *info = state_for(x, x->state());
+     CodeStub* stub = new PredicateFailedStub(info);
+ 
+-    __ cmp(lir_cond(cond), left, right);
+-    __ branch(lir_cond(cond), right->type(), stub);
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), stub);
+   }
+ }
+ 
+@@ -3748,8 +3739,7 @@ LIR_Opr LIRGenerator::maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr
+     __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
+     int diffbit = Klass::layout_helper_boolean_diffbit();
+     __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout);
+-    __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0));
+-    __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE);
++    __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), value_fixed, value, value_fixed, T_BYTE);
+     value = value_fixed;
+   }
+   return value;
+diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+index 27be79fee1..57c253db69 100644
+--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2015. These
++ * modifications are Copyright (c) 2015 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_LIRGENERATOR_HPP
+ #define SHARE_VM_C1_C1_LIRGENERATOR_HPP
+ 
+@@ -246,6 +252,9 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
+   void do_getClass(Intrinsic* x);
+   void do_currentThread(Intrinsic* x);
+   void do_MathIntrinsic(Intrinsic* x);
++#if defined(LOONGARCH64)
++  void do_LibmIntrinsic(Intrinsic* x);
++#endif
+   void do_ArrayCopy(Intrinsic* x);
+   void do_CompareAndSwap(Intrinsic* x, ValueType* type);
+   void do_NIOCheckIndex(Intrinsic* x);
+@@ -335,8 +344,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
+   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
+ 
+   // machine dependent
+-  void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+-  void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info);
+ 
+   void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type);
+@@ -364,7 +375,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
+ 
+   LIR_Opr safepoint_poll_register();
+ 
+-  void profile_branch(If* if_instr, If::Condition cond);
++  void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr);
+   void increment_event_counter_impl(CodeEmitInfo* info,
+                                     ciMethod *method, int frequency,
+                                     int bci, bool backedge, bool notify);
+diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.cpp b/hotspot/src/share/vm/c1/c1_LinearScan.cpp
+index 1f6281bf25..4549ff0928 100644
+--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp
++++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "c1/c1_CFGPrinter.hpp"
+ #include "c1/c1_CodeStubs.hpp"
+@@ -35,6 +41,9 @@
+ #ifdef TARGET_ARCH_x86
+ # include "vmreg_x86.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "vmreg_aarch64.inline.hpp"
+ #endif
+@@ -1256,6 +1265,23 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+       LIR_Opr move_from = cmove->in_opr1();
+       LIR_Opr move_to = cmove->result_opr();
+ 
++      if (move_to->is_register() && move_from->is_register()) {
++        Interval* from = interval_at(reg_num(move_from));
++        Interval* to = interval_at(reg_num(move_to));
++        if (from != NULL && to != NULL) {
++          to->set_register_hint(from);
++          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
++        }
++      }
++      break;
++    }
++    case lir_cmp_cmove: {
++      assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4");
++      LIR_Op4* cmove = (LIR_Op4*)op;
++
++      LIR_Opr move_from = cmove->in_opr3();
++      LIR_Opr move_to = cmove->result_opr();
++
+       if (move_to->is_register() && move_from->is_register()) {
+         Interval* from = interval_at(reg_num(move_from));
+         Interval* to = interval_at(reg_num(move_to));
+@@ -2104,7 +2130,7 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
+ #ifdef _LP64
+         return LIR_OprFact::double_cpu(assigned_reg, assigned_reg);
+ #else
+-#if defined(SPARC) || defined(PPC)
++#if defined(SPARC) || defined(PPC) || defined(LOONGARCH)
+         return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg);
+ #else
+         return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi);
+@@ -3285,7 +3311,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
+           check_live = (move->patch_code() == lir_patch_none);
+         }
+         LIR_OpBranch* branch = op->as_OpBranch();
+-        if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) {
++        LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch();
++        if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) ||
++            (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) {
+           // Don't bother checking the stub in this case since the
+           // exception stub will never return to normal control flow.
+           check_live = false;
+@@ -6142,6 +6170,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
+       assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
+       LIR_OpBranch* branch = (LIR_OpBranch*)op;
+ 
++      if (branch->block() == target_from) {
++        branch->change_block(target_to);
++      }
++      if (branch->ublock() == target_from) {
++        branch->change_ublock(target_to);
++      }
++    } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) {
++      assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++      LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op;
++
+       if (branch->block() == target_from) {
+         branch->change_block(target_to);
+       }
+@@ -6252,6 +6290,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+                 instructions->truncate(instructions->length() - 1);
+               }
+             }
++          } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) {
++            assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++            LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op;
++
++            if (prev_branch->stub() == NULL) {
++              if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
++                TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
++
++                // eliminate a conditional branch to the immediate successor
++                prev_branch->change_block(last_branch->block());
++                prev_branch->negate_cond();
++                instructions->trunc_to(instructions->length() - 1);
++              }
++            }
+           }
+         }
+       }
+@@ -6328,6 +6380,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
+         assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
+         assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
+       }
++
++      LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch();
++
++      if (op_cmp_branch != NULL) {
++        assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid");
++        assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid");
++      }
+     }
+ 
+     for (j = 0; j < block->number_of_sux() - 1; j++) {
+@@ -6571,6 +6630,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
+           break;
+         }
+ 
++        case lir_cmp_branch:
++        case lir_cmp_float_branch: {
++          LIR_OpCmpBranch* branch = op->as_OpCmpBranch();
++          if (branch->block() == NULL) {
++            inc_counter(counter_stub_branch);
++          } else {
++            inc_counter(counter_cond_branch);
++          }
++          inc_counter(counter_cmp);
++          break;
++        }
++
++        case lir_cmp_cmove: {
++          inc_counter(counter_misc_inst);
++          inc_counter(counter_cmp);
++          break;
++        }
++
+         case lir_neg:
+         case lir_add:
+         case lir_sub:
+diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.hpp b/hotspot/src/share/vm/c1/c1_LinearScan.hpp
+index 96e6b3babf..576a07d73d 100644
+--- a/hotspot/src/share/vm/c1/c1_LinearScan.hpp
++++ b/hotspot/src/share/vm/c1/c1_LinearScan.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_LINEARSCAN_HPP
+ #define SHARE_VM_C1_C1_LINEARSCAN_HPP
+ 
+@@ -976,6 +982,9 @@ class LinearScanTimers : public StackObj {
+ #ifdef TARGET_ARCH_x86
+ # include "c1_LinearScan_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_LinearScan_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_LinearScan_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
+index 7e22bbaa27..12aca7bf50 100644
+--- a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
++++ b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP
+ #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP
+ 
+@@ -50,6 +56,9 @@ class C1_MacroAssembler: public MacroAssembler {
+ #ifdef TARGET_ARCH_x86
+ # include "c1_MacroAssembler_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_MacroAssembler_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_MacroAssembler_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+index aebc377527..f1253506f6 100644
+--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp
++++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "c1/c1_CodeStubs.hpp"
+@@ -710,6 +716,7 @@ JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread))
+   // Return to the now deoptimized frame.
+ JRT_END
+ 
++#ifndef LOONGARCH
+ 
+ static Klass* resolve_field_return_klass(methodHandle caller, int bci, TRAPS) {
+   Bytecode_field field_access(caller, bci);
+@@ -1186,6 +1193,47 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
+   }
+ JRT_END
+ 
++#else
++
++JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id ))
++{
++  RegisterMap reg_map(thread, false);
++
++  NOT_PRODUCT(_patch_code_slowcase_cnt++;)
++  // According to the LoongArch, "Concurrent modification and
++  // execution of instructions can lead to the resulting instruction
++  // performing any behavior that can be achieved by executing any
++  // sequence of instructions that can be executed from the same
++  // Exception level, except where the instruction before
++  // modification and the instruction after modification is a B, BL,
++  // NOP, BRK instruction."
++  //
++  // This effectively makes the games we play when patching
++  // impossible, so when we come across an access that needs
++  // patching we must deoptimize.
++
++  if (TracePatching) {
++    tty->print_cr("Deoptimizing because patch is needed");
++  }
++
++  frame runtime_frame = thread->last_frame();
++  frame caller_frame = runtime_frame.sender(&reg_map);
++
++  // It's possible the nmethod was invalidated in the last
++  // safepoint, but if it's still alive then make it not_entrant.
++  nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
++  if (nm != NULL) {
++    nm->make_not_entrant();
++  }
++
++  Deoptimization::deoptimize_frame(thread, caller_frame.id());
++
++  // Return to the now deoptimized frame.
++}
++JRT_END
++
++#endif
++
+ //
+ // Entry point for compiled code. We want to patch a nmethod.
+ // We don't do a normal VM transition here because we want to
+diff --git a/hotspot/src/share/vm/c1/c1_globals.hpp b/hotspot/src/share/vm/c1/c1_globals.hpp
+index 8f7f9f61c9..0e2d926bdf 100644
+--- a/hotspot/src/share/vm/c1/c1_globals.hpp
++++ b/hotspot/src/share/vm/c1/c1_globals.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_C1_C1_GLOBALS_HPP
+ #define SHARE_VM_C1_C1_GLOBALS_HPP
+ 
+@@ -29,6 +35,9 @@
+ #ifdef TARGET_ARCH_x86
+ # include "c1_globals_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_globals_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "c1_globals_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
+index f067419ffc..5aa19dc84f 100644
+--- a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
++++ b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ 
+ #include "classfile/bytecodeAssembler.hpp"
+@@ -32,6 +38,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_sparc
+ # include "bytes_sparc.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/classfile/classFileStream.hpp b/hotspot/src/share/vm/classfile/classFileStream.hpp
+index 9632c8c8c2..fad25c44fc 100644
+--- a/hotspot/src/share/vm/classfile/classFileStream.hpp
++++ b/hotspot/src/share/vm/classfile/classFileStream.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP
+ #define SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP
+ 
+@@ -29,6 +35,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/classfile/stackMapTable.hpp b/hotspot/src/share/vm/classfile/stackMapTable.hpp
+index a36a7ba3cf..d7c1f08644 100644
+--- a/hotspot/src/share/vm/classfile/stackMapTable.hpp
++++ b/hotspot/src/share/vm/classfile/stackMapTable.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
+ #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
+ 
+@@ -34,6 +40,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/classfile/verifier.cpp b/hotspot/src/share/vm/classfile/verifier.cpp
+index c653e2b5a9..1a6b7e8b1a 100644
+--- a/hotspot/src/share/vm/classfile/verifier.cpp
++++ b/hotspot/src/share/vm/classfile/verifier.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/classFileStream.hpp"
+ #include "classfile/javaClasses.hpp"
+@@ -48,6 +54,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/code/codeBlob.cpp b/hotspot/src/share/vm/code/codeBlob.cpp
+index aff2aaf0ca..9ba76007cd 100644
+--- a/hotspot/src/share/vm/code/codeBlob.cpp
++++ b/hotspot/src/share/vm/code/codeBlob.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/codeBlob.hpp"
+ #include "code/codeCache.hpp"
+@@ -57,6 +63,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "nativeInst_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++#endif
+ #ifdef COMPILER1
+ #include "c1/c1_Runtime1.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/code/compiledIC.hpp b/hotspot/src/share/vm/code/compiledIC.hpp
+index f910f11886..e282a3f3af 100644
+--- a/hotspot/src/share/vm/code/compiledIC.hpp
++++ b/hotspot/src/share/vm/code/compiledIC.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_CODE_COMPILEDIC_HPP
+ #define SHARE_VM_CODE_COMPILEDIC_HPP
+ 
+@@ -45,6 +51,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "nativeInst_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++#endif
+ 
+ //-----------------------------------------------------------------------------
+ // The CompiledIC represents a compiled inline cache.
+diff --git a/hotspot/src/share/vm/code/relocInfo.hpp b/hotspot/src/share/vm/code/relocInfo.hpp
+index ad55a2fd93..813504821d 100644
+--- a/hotspot/src/share/vm/code/relocInfo.hpp
++++ b/hotspot/src/share/vm/code/relocInfo.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_CODE_RELOCINFO_HPP
+ #define SHARE_VM_CODE_RELOCINFO_HPP
+ 
+@@ -261,7 +267,11 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
+     poll_return_type        = 11, // polling instruction for safepoints at return
+     metadata_type           = 12, // metadata that used to be oops
+     trampoline_stub_type    = 13, // stub-entry for trampoline
++#if !defined MIPS64
+     yet_unused_type_1       = 14, // Still unused
++#else
++    internal_pc_type        = 14, // tag for internal data,??
++#endif
+     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
+     type_mask               = 15  // A mask which selects only the above values
+   };
+@@ -288,6 +298,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
+   ;
+ #endif
+ 
++#if defined MIPS64 && !defined ZERO
+   #define APPLY_TO_RELOCATIONS(visitor) \
+     visitor(oop) \
+     visitor(metadata) \
+@@ -300,9 +311,26 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
+     visitor(internal_word) \
+     visitor(poll) \
+     visitor(poll_return) \
+-    visitor(section_word) \
+     visitor(trampoline_stub) \
++    visitor(internal_pc) \
+ 
++#else
++  #define APPLY_TO_RELOCATIONS(visitor) \
++    visitor(oop) \
++    visitor(metadata) \
++    visitor(virtual_call) \
++    visitor(opt_virtual_call) \
++    visitor(static_call) \
++    visitor(static_stub) \
++    visitor(runtime_call) \
++    visitor(external_word) \
++    visitor(internal_word) \
++    visitor(poll) \
++    visitor(poll_return) \
++    visitor(trampoline_stub) \
++    visitor(section_word) \
++
++#endif
+ 
+  public:
+   enum {
+@@ -432,6 +460,12 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
+ #endif
+ #ifdef TARGET_ARCH_ppc
+ # include "relocInfo_ppc.hpp"
++#endif
++#ifdef TARGET_ARCH_mips
++# include "relocInfo_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "relocInfo_loongarch.hpp"
+ #endif
+ 
+ 
+@@ -1024,6 +1058,15 @@ class metadata_Relocation : public DataRelocation {
+   // Note:  metadata_value transparently converts Universe::non_metadata_word to NULL.
+ };
+ 
++#if defined MIPS64
++// to handle the set_last_java_frame pc
++class internal_pc_Relocation : public Relocation {
++  relocInfo::relocType type() { return relocInfo::internal_pc_type; }
++ public:
++  address pc() { return pd_get_address_from_code(); }
++  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
++};
++#endif
+ 
+ class virtual_call_Relocation : public CallRelocation {
+   relocInfo::relocType type() { return relocInfo::virtual_call_type; }
+diff --git a/hotspot/src/share/vm/code/vmreg.hpp b/hotspot/src/share/vm/code/vmreg.hpp
+index 07b595b60a..5bc7131a8a 100644
+--- a/hotspot/src/share/vm/code/vmreg.hpp
++++ b/hotspot/src/share/vm/code/vmreg.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_CODE_VMREG_HPP
+ #define SHARE_VM_CODE_VMREG_HPP
+ 
+@@ -47,6 +53,12 @@
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/adGlobals_ppc_64.hpp"
+ #endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "adfiles/adGlobals_mips_64.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/adGlobals_loongarch_64.hpp"
++#endif
+ #endif
+ 
+ //------------------------------VMReg------------------------------------------
+@@ -158,6 +170,12 @@ public:
+ #ifdef TARGET_ARCH_x86
+ # include "vmreg_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vmreg_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmreg_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "vmreg_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/compiler/disassembler.cpp b/hotspot/src/share/vm/compiler/disassembler.cpp
+index dfdd5f77e7..2dd0ff69ac 100644
+--- a/hotspot/src/share/vm/compiler/disassembler.cpp
++++ b/hotspot/src/share/vm/compiler/disassembler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/javaClasses.hpp"
+ #include "code/codeCache.hpp"
+@@ -50,6 +56,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "depChecker_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "depChecker_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "depChecker_loongarch.hpp"
++#endif
+ #ifdef SHARK
+ #include "shark/sharkEntry.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/compiler/disassembler.hpp b/hotspot/src/share/vm/compiler/disassembler.hpp
+index 168851cc26..8b632748f2 100644
+--- a/hotspot/src/share/vm/compiler/disassembler.hpp
++++ b/hotspot/src/share/vm/compiler/disassembler.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_COMPILER_DISASSEMBLER_HPP
+ #define SHARE_VM_COMPILER_DISASSEMBLER_HPP
+ 
+@@ -95,6 +101,12 @@ class Disassembler {
+ #endif
+ #ifdef TARGET_ARCH_ppc
+ # include "disassembler_ppc.hpp"
++#endif
++#ifdef TARGET_ARCH_mips
++# include "disassembler_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "disassembler_loongarch.hpp"
+ #endif
+ 
+ 
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
+index 733b5c91ad..678a1ee836 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
+@@ -86,6 +86,9 @@ class CardTableExtension : public CardTableModRefBS {
+   void inline_write_ref_field_gc(void* field, oop new_val) {
+     jbyte* byte = byte_for(field);
+     *byte = youngergen_card;
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+   }
+ 
+   // Adaptive size policy support
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
+index 1dde10746d..8b800b31c5 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
+@@ -105,6 +105,9 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size)
+     assert(end_bit_ok, "concurrency problem");
+     DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count));
+     DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size));
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+     return true;
+   }
+   return false;
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
+index 6cf76353d9..4d34bc209b 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
+@@ -33,6 +33,9 @@ void ParCompactionManager::push_objarray(oop obj, size_t index)
+   ObjArrayTask task(obj, index);
+   assert(task.is_valid(), "bad ObjArrayTask");
+   _objarray_stack.push(task);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++  if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+ }
+ 
+ void ParCompactionManager::push_region(size_t index)
+@@ -44,6 +47,9 @@ void ParCompactionManager::push_region(size_t index)
+   assert(region_ptr->_pushed++ == 0, "should only be pushed once");
+ #endif
+   region_stack()->push(index);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++  if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+ }
+ 
+ #endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSCOMPACTIONMANAGER_INLINE_HPP
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+index 0fa980ef83..2f66493e0a 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+@@ -499,6 +499,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
+   if (beg_region == end_region) {
+     // All in one region.
+     _region_data[beg_region].add_live_obj(len);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+     return;
+   }
+ 
+@@ -517,6 +520,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
+   const size_t end_ofs = region_offset(addr + len - 1);
+   _region_data[end_region].set_partial_obj_size(end_ofs + 1);
+   _region_data[end_region].set_partial_obj_addr(addr);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+ }
+ 
+ void
+@@ -3229,6 +3235,9 @@ void PSParallelCompact::fill_blocks(size_t region_idx)
+     if (new_block != cur_block) {
+       cur_block = new_block;
+       sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+     }
+ 
+     const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
+index 881f380cea..461b83930f 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
+@@ -1329,6 +1329,9 @@ inline bool PSParallelCompact::mark_obj(oop obj) {
+   const int obj_size = obj->size();
+   if (mark_bitmap()->mark_obj(obj, obj_size)) {
+     _summary_data.add_obj(obj, obj_size);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+     return true;
+   } else {
+     return false;
+@@ -1363,6 +1366,9 @@ inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) {
+     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+     if (mark_bitmap()->is_unmarked(obj) && mark_obj(obj)) {
+       cm->push(obj);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+     }
+   }
+ }
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+index a33132009c..291019660a 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+@@ -41,8 +41,9 @@ template <class T>
+ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
+   if (p != NULL) { // XXX: error if p != NULL here
+     oop o = oopDesc::load_decode_heap_oop_not_null(p);
+-    if (o->is_forwarded()) {
+-      o = o->forwardee();
++    markOop m = o->mark();
++    if (m->is_marked()) {
++      o = (oop) m->decode_pointer();
+       // Card mark
+       if (PSScavenge::is_obj_in_young(o)) {
+         PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
+@@ -102,11 +103,19 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+ 
+   oop new_obj = NULL;
+ 
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++  if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++
+   // NOTE! We must be very careful with any methods that access the mark
+   // in o. There may be multiple threads racing on it, and it may be forwarded
+   // at any time. Do not use oop methods for accessing the mark!
+   markOop test_mark = o->mark();
+ 
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++  if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++
+   // The same test as "o->is_forwarded()"
+   if (!test_mark->is_marked()) {
+     bool new_obj_is_tenured = false;
+@@ -141,6 +150,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+             }
+           }
+         }
++
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++        if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+       }
+     }
+ 
+@@ -200,6 +213,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+ 
+     // Copy obj
+     Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+ 
+     // Now we have to CAS in the header.
+     if (o->cas_forward_to(new_obj, test_mark)) {
+@@ -247,6 +263,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+       // don't update this before the unallocation!
+       new_obj = o->forwardee();
+     }
++
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+   } else {
+     assert(o->is_forwarded(), "Sanity");
+     new_obj = o->forwardee();
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
+index 1a722a7ca7..4980be3946 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
+@@ -71,14 +71,22 @@ inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm,
+   assert(should_scavenge(p, true), "revisiting object?");
+ 
+   oop o = oopDesc::load_decode_heap_oop_not_null(p);
+-  oop new_obj = o->is_forwarded()
+-        ? o->forwardee()
+-        : pm->copy_to_survivor_space<promote_immediately>(o);
++#if defined MIPS || defined LOONGARCH
++  if (oopDesc::is_null(o)) return;
++#endif
++
++  oop new_obj;
++  markOop m = o->mark();
++  if (m->is_marked()) {
++    new_obj = (oop) m->decode_pointer();
++  } else {
++    new_obj = pm->copy_to_survivor_space<promote_immediately>(o);
++  }
+ 
+ #ifndef PRODUCT
+   // This code must come after the CAS test, or it will print incorrect
+   // information.
+-  if (TraceScavenge &&  o->is_forwarded()) {
++  if (TraceScavenge && m->is_marked()) {
+     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
+        "forwarding",
+        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
+@@ -138,8 +146,9 @@ class PSScavengeFromKlassClosure: public OopClosure {
+ 
+       oop o = *p;
+       oop new_obj;
+-      if (o->is_forwarded()) {
+-        new_obj = o->forwardee();
++      markOop m = o->mark();
++      if (m->is_marked()) {
++        new_obj = (oop) m->decode_pointer();
+       } else {
+         new_obj = _pm->copy_to_survivor_space</*promote_immediately=*/false>(o);
+       }
+diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
+index e14c50bf01..8b3860070c 100644
+--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
++++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP
+ #define SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP
+ 
+@@ -42,6 +48,10 @@
+ # include "interp_masm_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "interp_masm_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
+ #endif
+ 
+ // This file contains the platform-independent parts
+diff --git a/hotspot/src/share/vm/interpreter/bytecode.hpp b/hotspot/src/share/vm/interpreter/bytecode.hpp
+index 7e55fd009a..a06dcd58bc 100644
+--- a/hotspot/src/share/vm/interpreter/bytecode.hpp
++++ b/hotspot/src/share/vm/interpreter/bytecode.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_BYTECODE_HPP
+ #define SHARE_VM_INTERPRETER_BYTECODE_HPP
+ 
+@@ -31,6 +37,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
+index 28843715c7..c17fe8d7e0 100644
+--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
++++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP
+ #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP
+ 
+@@ -35,6 +41,9 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+@@ -592,6 +601,12 @@ void print();
+ #ifdef TARGET_ARCH_x86
+ # include "bytecodeInterpreter_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytecodeInterpreter_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytecodeInterpreter_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytecodeInterpreter_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
+index f5db0b4d9d..8adbf95acb 100644
+--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
++++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP
+ #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP
+ 
+@@ -46,6 +52,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytecodeInterpreter_x86.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytecodeInterpreter_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytecodeInterpreter_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytecodeInterpreter_aarch64.inline.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
+index b814b88d5d..e1f2421600 100644
+--- a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
++++ b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP
+ #define SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP
+ 
+@@ -32,6 +38,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/bytecodes.cpp b/hotspot/src/share/vm/interpreter/bytecodes.cpp
+index fdb880a3b3..4f5111074f 100644
+--- a/hotspot/src/share/vm/interpreter/bytecodes.cpp
++++ b/hotspot/src/share/vm/interpreter/bytecodes.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "interpreter/bytecodes.hpp"
+ #include "memory/resourceArea.hpp"
+@@ -29,6 +35,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "bytes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/bytecodes.hpp b/hotspot/src/share/vm/interpreter/bytecodes.hpp
+index c3463cd76d..bdf4c487f0 100644
+--- a/hotspot/src/share/vm/interpreter/bytecodes.hpp
++++ b/hotspot/src/share/vm/interpreter/bytecodes.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_BYTECODES_HPP
+ #define SHARE_VM_INTERPRETER_BYTECODES_HPP
+ 
+@@ -292,6 +298,12 @@ class Bytecodes: AllStatic {
+ #ifdef TARGET_ARCH_x86
+ # include "bytecodes_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytecodes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytecodes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytecodes_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
+index 6a6447503c..f9c540fb4a 100644
+--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
++++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP
+ #define SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP
+ 
+@@ -84,6 +90,12 @@ class CppInterpreter: public AbstractInterpreter {
+ #ifdef TARGET_ARCH_x86
+ # include "cppInterpreter_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "cppInterpreter_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "cppInterpreter_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "cppInterpreter_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
+index 6a08a3f43f..1fd19994d7 100644
+--- a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
++++ b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP
+ #define SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP
+ 
+@@ -50,6 +56,12 @@ class CppInterpreterGenerator: public AbstractInterpreterGenerator {
+ #ifdef TARGET_ARCH_x86
+ # include "cppInterpreterGenerator_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "cppInterpreterGenerator_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "cppInterpreterGenerator_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "cppInterpreterGenerator_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/interpreter.hpp b/hotspot/src/share/vm/interpreter/interpreter.hpp
+index ebfb68d36b..610949f3f7 100644
+--- a/hotspot/src/share/vm/interpreter/interpreter.hpp
++++ b/hotspot/src/share/vm/interpreter/interpreter.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_INTERPRETER_HPP
+ #define SHARE_VM_INTERPRETER_INTERPRETER_HPP
+ 
+@@ -148,6 +154,12 @@ class Interpreter: public CC_INTERP_ONLY(CppInterpreter) NOT_CC_INTERP(TemplateI
+ #ifdef TARGET_ARCH_x86
+ # include "interpreter_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "interpreter_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "interpreter_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "interpreter_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
+index 1dc7cb2983..92bbe6b440 100644
+--- a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
++++ b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP
+ #define SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP
+ 
+@@ -44,6 +50,12 @@ InterpreterGenerator(StubQueue* _code);
+ #ifdef TARGET_ARCH_x86
+ # include "interpreterGenerator_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "interpreterGenerator_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "interpreterGenerator_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "interpreterGenerator_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
+index 5d2845383c..f48622f67e 100644
+--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
++++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "classfile/vmSymbols.hpp"
+@@ -59,6 +65,12 @@
+ #ifdef TARGET_ARCH_x86
+ # include "vm_version_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vm_version_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vm_version_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "vm_version_aarch64.hpp"
+ #endif
+@@ -1290,7 +1302,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
+   // preparing the same method will be sure to see non-null entry & mirror.
+ IRT_END
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
+ IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
+   if (src_address == dest_address) {
+     return;
+diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
+index 472bf4d94c..9a98d5559c 100644
+--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
++++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ 
+@@ -156,7 +162,7 @@ class InterpreterRuntime: AllStatic {
+                                         Method* method,
+                                         intptr_t* from, intptr_t* to);
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
+   // Popframe support (only needed on x86, AMD64 and ARM)
+   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
+ #endif
+@@ -165,6 +171,12 @@ class InterpreterRuntime: AllStatic {
+ #ifdef TARGET_ARCH_x86
+ # include "interpreterRT_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "interpreterRT_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "interpreterRT_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "interpreterRT_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
+index 5f76dca8a6..757860f43c 100644
+--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
++++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP
+ #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP
+ 
+@@ -190,6 +196,12 @@ class TemplateInterpreter: public AbstractInterpreter {
+ #ifdef TARGET_ARCH_x86
+ # include "templateInterpreter_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "templateInterpreter_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "templateInterpreter_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "templateInterpreter_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
+index bd94bd02bc..28ca437eb2 100644
+--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
++++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ 
+@@ -89,6 +95,12 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator {
+ #ifdef TARGET_ARCH_x86
+ # include "templateInterpreterGenerator_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "templateInterpreterGenerator_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "templateInterpreterGenerator_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "templateInterpreterGenerator_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/interpreter/templateTable.hpp b/hotspot/src/share/vm/interpreter/templateTable.hpp
+index 60d243c16a..1b73822abd 100644
+--- a/hotspot/src/share/vm/interpreter/templateTable.hpp
++++ b/hotspot/src/share/vm/interpreter/templateTable.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP
+ #define SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP
+ 
+@@ -40,6 +46,10 @@
+ # include "interp_masm_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "interp_masm_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
+ #endif
+ 
+ #ifndef CC_INTERP
+@@ -367,6 +377,10 @@ class TemplateTable: AllStatic {
+ # include "templateTable_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "templateTable_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "templateTable_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "templateTable_loongarch_64.hpp"
+ #endif
+ 
+ };
+diff --git a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
+index 6d9ab39fdd..f4e9a4ca69 100644
+--- a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
++++ b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
+@@ -116,7 +116,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
+index 42a8b719cd..f08f6ee13a 100644
+--- a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
++++ b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP
+ #define SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP
+ 
+@@ -46,6 +52,12 @@
+ #ifdef TARGET_ARCH_aarch64
+ # include "bytes_aarch64.hpp"
+ #endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
+ 
+ //
+ // The Encoding policy prescribes a template
+diff --git a/hotspot/src/share/vm/memory/barrierSet.hpp b/hotspot/src/share/vm/memory/barrierSet.hpp
+index 13ff9b2738..081b70744d 100644
+--- a/hotspot/src/share/vm/memory/barrierSet.hpp
++++ b/hotspot/src/share/vm/memory/barrierSet.hpp
+@@ -27,6 +27,7 @@
+ 
+ #include "memory/memRegion.hpp"
+ #include "oops/oopsHierarchy.hpp"
++#include "runtime/orderAccess.hpp"
+ 
+ // This class provides the interface between a barrier implementation and
+ // the rest of the system.
+@@ -95,8 +96,16 @@ private:
+   // Keep this private so as to catch violations at build time.
+   virtual void write_ref_field_pre_work(     void* field, oop new_val) { guarantee(false, "Not needed"); };
+ protected:
+-  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
+-  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
++  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  };
++  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  };
+ public:
+ 
+   // ...then the post-write version.
+@@ -132,9 +141,17 @@ public:
+ 
+   // Below length is the # array elements being written
+   virtual void write_ref_array_pre(oop* dst, int length,
+-                                   bool dest_uninitialized = false) {}
++                                   bool dest_uninitialized = false) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  }
+   virtual void write_ref_array_pre(narrowOop* dst, int length,
+-                                   bool dest_uninitialized = false) {}
++                                   bool dest_uninitialized = false) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++      if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++}
+   // Below count is the # array elements being written, starting
+   // at the address "start", which may not necessarily be HeapWord-aligned
+   inline void write_ref_array(HeapWord* start, size_t count);
+diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
+index 01e4688836..80bd151873 100644
+--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
++++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
+@@ -316,6 +316,9 @@ public:
+ 
+   inline void inline_write_ref_array(MemRegion mr) {
+     dirty_MemRegion(mr);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+   }
+ protected:
+   void write_ref_array_work(MemRegion mr) {
+@@ -329,7 +332,11 @@ public:
+ 
+   // *** Card-table-barrier-specific things.
+ 
+-  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
++  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  }
+ 
+   template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release) {
+     jbyte* byte = byte_for((void*)field);
+@@ -339,6 +346,9 @@ public:
+     } else {
+       *byte = dirty_card;
+     }
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+   }
+ 
+   // These are used by G1, when it uses the card table as a temporary data
+diff --git a/hotspot/src/share/vm/memory/cardTableRS.cpp b/hotspot/src/share/vm/memory/cardTableRS.cpp
+index fb33a708ae..da22acba47 100644
+--- a/hotspot/src/share/vm/memory/cardTableRS.cpp
++++ b/hotspot/src/share/vm/memory/cardTableRS.cpp
+@@ -252,6 +252,9 @@ void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
+ // cur_youngergen_and_prev_nonclean_card ==> no change.
+ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) {
+   jbyte* entry = ct_bs()->byte_for(field);
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++  if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
+   do {
+     jbyte entry_val = *entry;
+     // We put this first because it's probably the most common case.
+@@ -266,7 +269,12 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) {
+       jbyte new_val = cur_youngergen_and_prev_nonclean_card;
+       jbyte res = Atomic::cmpxchg(new_val, entry, entry_val);
+       // Did the CAS succeed?
+-      if (res == entry_val) return;
++      if (res == entry_val) {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++         if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++         return;
++      }
+       // Otherwise, retry, to see the new value.
+       continue;
+     } else {
+diff --git a/hotspot/src/share/vm/memory/cardTableRS.hpp b/hotspot/src/share/vm/memory/cardTableRS.hpp
+index 25884feac8..5d4e77f269 100644
+--- a/hotspot/src/share/vm/memory/cardTableRS.hpp
++++ b/hotspot/src/share/vm/memory/cardTableRS.hpp
+@@ -121,7 +121,14 @@ public:
+ 
+   void inline_write_ref_field_gc(void* field, oop new_val) {
+     jbyte* byte = _ct_bs->byte_for(field);
+-    *byte = youngergen_card;
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++   *byte = youngergen_card;
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++   if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++
+   }
+   void write_ref_field_gc_work(void* field, oop new_val) {
+     inline_write_ref_field_gc(field, new_val);
+diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp
+index fb0564ac27..9cec7d4375 100644
+--- a/hotspot/src/share/vm/memory/metaspace.cpp
++++ b/hotspot/src/share/vm/memory/metaspace.cpp
+@@ -21,6 +21,13 @@
+  * questions.
+  *
+  */
++
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "gc_interface/collectedHeap.hpp"
+ #include "memory/allocation.hpp"
+@@ -3065,12 +3072,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+   // Don't use large pages for the class space.
+   bool large_pages = false;
+ 
+-#ifndef AARCH64
++#if !defined(AARCH64) && !defined(MIPS64) && !defined(LOONGARCH)
+   ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(),
+                                              _reserve_alignment,
+                                              large_pages,
+                                              requested_addr, 0);
+-#else // AARCH64
++#else // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH)
+   ReservedSpace metaspace_rs;
+ 
+   // Our compressed klass pointers may fit nicely into the lower 32
+@@ -3107,7 +3114,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+     }
+   }
+ 
+-#endif // AARCH64
++#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH)
+ 
+   if (!metaspace_rs.is_reserved()) {
+ #if INCLUDE_CDS
+diff --git a/hotspot/src/share/vm/oops/constantPool.hpp b/hotspot/src/share/vm/oops/constantPool.hpp
+index ec111df04e..6c0607105c 100644
+--- a/hotspot/src/share/vm/oops/constantPool.hpp
++++ b/hotspot/src/share/vm/oops/constantPool.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP
+ #define SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP
+ 
+@@ -50,6 +56,13 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "bytes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
++
+ 
+ // A constantPool is an array containing class constants as described in the
+ // class file.
+diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
+index acef334849..23fc0b9988 100644
+--- a/hotspot/src/share/vm/oops/klass.hpp
++++ b/hotspot/src/share/vm/oops/klass.hpp
+@@ -32,6 +32,9 @@
+ #include "oops/klassPS.hpp"
+ #include "oops/metadata.hpp"
+ #include "oops/oop.hpp"
++#if defined MIPS || defined LOONGARCH
++#include "runtime/orderAccess.hpp"
++#endif
+ #include "utilities/accessFlags.hpp"
+ #include "utilities/macros.hpp"
+ #if INCLUDE_ALL_GCS
+@@ -289,8 +292,18 @@ protected:
+   // The Klasses are not placed in the Heap, so the Card Table or
+   // the Mod Union Table can't be used to mark when klasses have modified oops.
+   // The CT and MUT bits saves this information for the individual Klasses.
+-  void record_modified_oops()            { _modified_oops = 1; }
+-  void clear_modified_oops()             { _modified_oops = 0; }
++  void record_modified_oops()            {
++    _modified_oops = 1;
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  }
++  void clear_modified_oops()             {
++    _modified_oops = 0;
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) OrderAccess::fence();
++#endif
++  }
+   bool has_modified_oops()               { return _modified_oops == 1; }
+ 
+   void accumulate_modified_oops()        { if (has_modified_oops()) _accumulated_modified_oops = 1; }
+diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
+index 0678c6b3fb..1cb20e351f 100644
+--- a/hotspot/src/share/vm/oops/oop.hpp
++++ b/hotspot/src/share/vm/oops/oop.hpp
+@@ -72,7 +72,13 @@ class oopDesc {
+   markOop  mark() const         { return _mark; }
+   markOop* mark_addr() const    { return (markOop*) &_mark; }
+ 
+-  void set_mark(volatile markOop m)      { _mark = m;   }
++  void set_mark(volatile markOop m)      {
++#if (defined MIPS || defined LOONGARCH) && !defined ZERO
++    if (UseSyncLevel >= 2000) release_set_mark(m);
++    else
++#endif
++    _mark = m;
++  }
+ 
+   void    release_set_mark(markOop m);
+   markOop cas_set_mark(markOop new_mark, markOop old_mark);
+diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
+index beec739d38..8660c1e331 100644
+--- a/hotspot/src/share/vm/oops/oop.inline.hpp
++++ b/hotspot/src/share/vm/oops/oop.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP
+ #define SHARE_VM_OOPS_OOP_INLINE_HPP
+ 
+@@ -60,6 +66,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "bytes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ 
+ // Implementation of all inlined member functions defined in oop.hpp
+ // We need a separate file to avoid circular references
+diff --git a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
+index 8a4603944e..b28bb99189 100644
+--- a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
++++ b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
+ #define SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
+ 
+@@ -75,7 +81,7 @@ inline oop oopDesc::forward_to_atomic(oop p) {
+     // forwarding pointer.
+     oldMark = curMark;
+   }
+-  return forwardee();
++  return (oop) oldMark->decode_pointer();
+ }
+ 
+ #endif // SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
+diff --git a/hotspot/src/share/vm/opto/buildOopMap.cpp b/hotspot/src/share/vm/opto/buildOopMap.cpp
+index 91642f1d7d..5df185df04 100644
+--- a/hotspot/src/share/vm/opto/buildOopMap.cpp
++++ b/hotspot/src/share/vm/opto/buildOopMap.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "compiler/oopMap.hpp"
+ #include "opto/addnode.hpp"
+@@ -50,6 +56,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vmreg_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vmreg_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ 
+ // The functions in this file builds OopMaps after all scheduling is done.
+ //
+diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp
+index 7fd615d35f..ad472e8722 100644
+--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp
++++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp
+@@ -361,9 +361,20 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method,
+     } else if (forced_inline()) {
+       // Inlining was forced by CompilerOracle, ciReplay or annotation
+     } else if (profile.count() == 0) {
++#ifndef MIPS
+       // don't inline unreached call sites
+        set_msg("call site not reached");
+        return false;
++#else
++      ciMethodBlocks* blocks = caller_method->get_method_blocks();
++      // Check if the call site belongs to a start block:
++      // call sites in a start block must be reached before.
++      if (blocks->block_containing(0) != blocks->block_containing(jvms->bci())) {
++        // don't inline unreached call sites
++        set_msg("call site not reached");
++        return false;
++      }
++#endif
+     }
+   }
+ 
+diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp
+index 82d2efef92..d373b20456 100644
+--- a/hotspot/src/share/vm/opto/c2_globals.hpp
++++ b/hotspot/src/share/vm/opto/c2_globals.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_C2_GLOBALS_HPP
+ #define SHARE_VM_OPTO_C2_GLOBALS_HPP
+ 
+@@ -35,6 +41,12 @@
+ #ifdef TARGET_ARCH_sparc
+ # include "c2_globals_sparc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "c2_globals_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "c2_globals_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_arm
+ # include "c2_globals_arm.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp
+index 137f49600d..f689d64a38 100644
+--- a/hotspot/src/share/vm/opto/c2compiler.cpp
++++ b/hotspot/src/share/vm/opto/c2compiler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "opto/c2compiler.hpp"
+ #include "opto/runtime.hpp"
+@@ -39,6 +45,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ // register information defined by ADLC
+diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
+index de6d443cd3..0b27dc9335 100644
+--- a/hotspot/src/share/vm/opto/chaitin.hpp
++++ b/hotspot/src/share/vm/opto/chaitin.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_CHAITIN_HPP
+ #define SHARE_VM_OPTO_CHAITIN_HPP
+ 
+@@ -136,8 +142,12 @@ public:
+ 
+   // Number of registers this live range uses when it colors
+ private:
++#ifdef LOONGARCH64
++  uint16_t _num_regs;
++#else
+   uint8 _num_regs;              // 2 for Longs and Doubles, 1 for all else
+                                 // except _num_regs is kill count for fat_proj
++#endif
+ public:
+   int num_regs() const { return _num_regs; }
+   void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
+@@ -145,7 +155,11 @@ public:
+ private:
+   // Number of physical registers this live range uses when it colors
+   // Architecture and register-set dependent
++#ifdef LOONGARCH64
++  uint16_t _reg_pressure;
++#else
+   uint8 _reg_pressure;
++#endif
+ public:
+   void set_reg_pressure(int i)  { _reg_pressure = i; }
+   int      reg_pressure() const { return _reg_pressure; }
+diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp
+index ae22ba84d9..9004dc0d72 100644
+--- a/hotspot/src/share/vm/opto/compile.cpp
++++ b/hotspot/src/share/vm/opto/compile.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/macroAssembler.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+@@ -81,6 +87,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ // -------------------- Compile::mach_constant_base_node -----------------------
+diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp
+index b4f4cfefed..d263ee2fc4 100644
+--- a/hotspot/src/share/vm/opto/compile.hpp
++++ b/hotspot/src/share/vm/opto/compile.hpp
+@@ -1025,7 +1025,7 @@ class Compile : public Phase {
+   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
+ 
+   enum ScratchBufferBlob {
+-    MAX_inst_size       = 1024,
++    MAX_inst_size       = 1024 MIPS64_ONLY(* 2) LOONGARCH64_ONLY(*2),
+     MAX_locs_size       = 128, // number of relocInfo elements
+     MAX_const_size      = 128,
+     MAX_stubs_size      = 128
+diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp
+index f51484efb0..12457b7c34 100644
+--- a/hotspot/src/share/vm/opto/gcm.cpp
++++ b/hotspot/src/share/vm/opto/gcm.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "libadt/vectset.hpp"
+ #include "memory/allocation.inline.hpp"
+@@ -49,6 +55,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ 
+diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp
+index c6178a715b..2d492568d9 100644
+--- a/hotspot/src/share/vm/opto/lcm.cpp
++++ b/hotspot/src/share/vm/opto/lcm.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "memory/allocation.inline.hpp"
+ #include "opto/block.hpp"
+@@ -44,6 +50,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ // Optimization - Graph Style
+diff --git a/hotspot/src/share/vm/opto/locknode.hpp b/hotspot/src/share/vm/opto/locknode.hpp
+index b320f6bfb2..4bfb0ff072 100644
+--- a/hotspot/src/share/vm/opto/locknode.hpp
++++ b/hotspot/src/share/vm/opto/locknode.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_LOCKNODE_HPP
+ #define SHARE_VM_OPTO_LOCKNODE_HPP
+ 
+@@ -42,6 +48,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ //------------------------------BoxLockNode------------------------------------
+diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp
+index 75f1fbee58..994de0736a 100644
+--- a/hotspot/src/share/vm/opto/matcher.cpp
++++ b/hotspot/src/share/vm/opto/matcher.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "memory/allocation.inline.hpp"
+ #include "opto/addnode.hpp"
+@@ -52,6 +58,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ OptoReg::Name OptoReg::c_frame_pointer;
+diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp
+index 6032b72a9b..7fb4dea28e 100644
+--- a/hotspot/src/share/vm/opto/output.cpp
++++ b/hotspot/src/share/vm/opto/output.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/assembler.inline.hpp"
+ #include "code/compiledIC.hpp"
+@@ -844,6 +850,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
+   // Add the safepoint in the DebugInfoRecorder
+   if( !mach->is_MachCall() ) {
+     mcall = NULL;
++#if defined(MIPS) || defined(LOONGARCH)
++    // safepoint_pc_offset should point to tha last instruction in safePoint.
++    // In X86 and sparc, their safePoints only contain one instruction.
++    // However, we should add current_offset with the size of safePoint in MIPS.
++    // 0x2d6ff22c: lw s2, 0x14(s2)
++    // last_pd->pc_offset()=308, pc_offset=304, bci=64
++    // last_pd->pc_offset()=312, pc_offset=312, bci=64
++    // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc")
++    //
++    // ;; Safepoint:
++    // ---> pc_offset=304
++    // 0x2d6ff230: lui at, 0x2b7a            ; OopMap{s2=Oop s5=Oop t4=Oop off=308}
++    //                                       ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    // ---> last_pd(308)
++    // 0x2d6ff234: lw at, 0xffffc100(at)     ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    //                                       ;   {poll}
++    // 0x2d6ff238: addiu s0, zero, 0x0
++    safepoint_pc_offset += sfn->size(_regalloc) - 4;
++#endif
+     debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+   } else {
+     mcall = mach->as_MachCall();
+@@ -1502,6 +1529,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+       DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
+       n->emit(*cb, _regalloc);
+       current_offset  = cb->insts_size();
++#if defined(MIPS) || defined(LOONGARCH)
++      if (!n->is_Proj() && (cb->insts()->end() != badAddress)) {
++        // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime
++        // is not the instruction which access memory. adjust is needed. previous_offset points to the
++        // instruction which access memory. Instruction size is 4. cb->insts_size() and
++        // cb->insts()->end() are the location of current instruction.
++        int adjust = 4;
++        NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4);
++        if (inst->is_sync()) {
++          // a sync may be the last instruction, see store_B_immI_enc_sync
++          adjust += 4;
++          inst = (NativeInstruction*) (cb->insts()->end() - 8);
++        }
++        previous_offset = current_offset - adjust;
++      }
++#endif
+ 
+       // Above we only verified that there is enough space in the instruction section.
+       // However, the instruction may emit stubs that cause code buffer expansion.
+diff --git a/hotspot/src/share/vm/opto/output.hpp b/hotspot/src/share/vm/opto/output.hpp
+index ba72841363..37f954de9b 100644
+--- a/hotspot/src/share/vm/opto/output.hpp
++++ b/hotspot/src/share/vm/opto/output.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_OUTPUT_HPP
+ #define SHARE_VM_OPTO_OUTPUT_HPP
+ 
+@@ -41,6 +47,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ class Arena;
+diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
+index 352ccfb9d9..9a656d03ee 100644
+--- a/hotspot/src/share/vm/opto/regmask.cpp
++++ b/hotspot/src/share/vm/opto/regmask.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "opto/compile.hpp"
+ #include "opto/regmask.hpp"
+@@ -39,6 +45,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ #define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
+diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
+index 5ceebb3fb8..6d08b68731 100644
+--- a/hotspot/src/share/vm/opto/regmask.hpp
++++ b/hotspot/src/share/vm/opto/regmask.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_REGMASK_HPP
+ #define SHARE_VM_OPTO_REGMASK_HPP
+ 
+@@ -42,6 +48,10 @@
+ # include "adfiles/adGlobals_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/adGlobals_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/adGlobals_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/adGlobals_loongarch_64.hpp"
+ #endif
+ 
+ // Some fun naming (textual) substitutions:
+diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
+index a43b37f2c5..f2bcafa2c5 100644
+--- a/hotspot/src/share/vm/opto/runtime.cpp
++++ b/hotspot/src/share/vm/opto/runtime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "classfile/vmSymbols.hpp"
+@@ -82,6 +88,10 @@
+ # include "adfiles/ad_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
+ #endif
+ 
+ 
+diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp
+index 58572f137d..299d48b12a 100644
+--- a/hotspot/src/share/vm/opto/type.cpp
++++ b/hotspot/src/share/vm/opto/type.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "ci/ciMethodData.hpp"
+ #include "ci/ciTypeFlow.hpp"
+@@ -68,6 +74,16 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
+   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegD,              relocInfo::none          },  // VectorD
+   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
+   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
++#elif defined(MIPS64)
++  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
++  { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
++  { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
++  { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
++#elif defined(LOONGARCH64)
++  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
++  { Bad,             T_ILLEGAL,    "vectord:",      false, 0,                    relocInfo::none          },  // VectorD
++  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
++  { Bad,             T_ILLEGAL,    "vectory:",      false, Op_VecY,              relocInfo::none          },  // VectorY
+ #elif defined(PPC64)
+   { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
+   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegL,              relocInfo::none          },  // VectorD
+diff --git a/hotspot/src/share/vm/prims/jniCheck.cpp b/hotspot/src/share/vm/prims/jniCheck.cpp
+index 593ca8a1e3..82813b71fe 100644
+--- a/hotspot/src/share/vm/prims/jniCheck.cpp
++++ b/hotspot/src/share/vm/prims/jniCheck.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "classfile/vmSymbols.hpp"
+@@ -55,6 +61,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "jniTypes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "jniTypes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "jniTypes_loongarch.hpp"
++#endif
+ 
+ // Complain every extra number of unplanned local refs
+ #define CHECK_JNI_LOCAL_REF_CAP_WARN_THRESHOLD 32
+diff --git a/hotspot/src/share/vm/prims/jni_md.h b/hotspot/src/share/vm/prims/jni_md.h
+index 6209a66449..271715d4a2 100644
+--- a/hotspot/src/share/vm/prims/jni_md.h
++++ b/hotspot/src/share/vm/prims/jni_md.h
+@@ -22,6 +22,12 @@
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  */
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* Switch to the correct jni_md.h file without reliance on -I options. */
+ #ifdef TARGET_ARCH_x86
+@@ -42,6 +48,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "jni_ppc.h"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "jni_mips.h"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "jni_loongarch.h"
++#endif
+ 
+ 
+ /*
+diff --git a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
+index ab31d0d91e..0d8570b764 100644
+--- a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
++++ b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/symbolTable.hpp"
+ #include "interpreter/bytecodeStream.hpp"
+@@ -46,6 +52,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "bytes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ // FIXME: add Deprecated attribute
+ // FIXME: fix Synthetic attribute
+ // FIXME: per Serguei, add error return handling for ConstantPool::copy_cpool_bytes()
+diff --git a/hotspot/src/share/vm/prims/methodHandles.hpp b/hotspot/src/share/vm/prims/methodHandles.hpp
+index db6e06180d..841082859a 100644
+--- a/hotspot/src/share/vm/prims/methodHandles.hpp
++++ b/hotspot/src/share/vm/prims/methodHandles.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_PRIMS_METHODHANDLES_HPP
+ #define SHARE_VM_PRIMS_METHODHANDLES_HPP
+ 
+@@ -198,6 +204,13 @@ public:
+ #ifdef TARGET_ARCH_ppc
+ # include "methodHandles_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "methodHandles_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "methodHandles_loongarch.hpp"
++#endif
++
+ 
+   // Tracing
+   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
+diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp
+index 222f29cbf4..7c7c6edb27 100644
+--- a/hotspot/src/share/vm/runtime/atomic.inline.hpp
++++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
+ #define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
+ 
+@@ -31,6 +37,12 @@
+ #ifdef TARGET_OS_ARCH_linux_x86
+ # include "atomic_linux_x86.inline.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "atomic_linux_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "atomic_linux_loongarch.inline.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_sparc
+ # include "atomic_linux_sparc.inline.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp
+index f91afdc416..36a924fd4f 100644
+--- a/hotspot/src/share/vm/runtime/deoptimization.cpp
++++ b/hotspot/src/share/vm/runtime/deoptimization.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "code/debugInfoRec.hpp"
+@@ -68,6 +74,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vmreg_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vmreg_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ #ifdef COMPILER2
+ #if defined AD_MD_HPP
+ # include AD_MD_HPP
+@@ -84,6 +96,12 @@
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/ad_ppc_64.hpp"
+ #endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "adfiles/ad_mips_64.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/ad_loongarch_64.hpp"
++#endif
+ #endif // COMPILER2
+ 
+ PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+diff --git a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
+index db568def34..490c5f5a4e 100644
+--- a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
++++ b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_DTRACEJSDT_HPP
+ #define SHARE_VM_RUNTIME_DTRACEJSDT_HPP
+ 
+@@ -44,6 +50,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "nativeInst_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++#endif
+ 
+ class RegisteredProbes;
+ typedef jlong OpaqueProbes;
+diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp
+index 338b7ad3a7..5a161133ba 100644
+--- a/hotspot/src/share/vm/runtime/frame.cpp
++++ b/hotspot/src/share/vm/runtime/frame.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "compiler/abstractCompiler.hpp"
+ #include "compiler/disassembler.hpp"
+@@ -64,6 +70,13 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "nativeInst_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++#endif
++
+ 
+ PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+ 
+diff --git a/hotspot/src/share/vm/runtime/frame.hpp b/hotspot/src/share/vm/runtime/frame.hpp
+index 2d80ecc208..4a9e6edb54 100644
+--- a/hotspot/src/share/vm/runtime/frame.hpp
++++ b/hotspot/src/share/vm/runtime/frame.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_FRAME_HPP
+ #define SHARE_VM_RUNTIME_FRAME_HPP
+ 
+@@ -45,6 +51,10 @@
+ # include "adfiles/adGlobals_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/adGlobals_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/adGlobals_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/adGlobals_loongarch_64.hpp"
+ #endif
+ #endif // COMPILER2
+ #ifdef TARGET_ARCH_zero
+@@ -489,6 +499,12 @@ class frame VALUE_OBJ_CLASS_SPEC {
+ #ifdef TARGET_ARCH_x86
+ # include "frame_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "frame_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "frame_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "frame_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/frame.inline.hpp b/hotspot/src/share/vm/runtime/frame.inline.hpp
+index 710b82306a..704cc8df8f 100644
+--- a/hotspot/src/share/vm/runtime/frame.inline.hpp
++++ b/hotspot/src/share/vm/runtime/frame.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_FRAME_INLINE_HPP
+ #define SHARE_VM_RUNTIME_FRAME_INLINE_HPP
+ 
+@@ -49,6 +55,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "jniTypes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "jniTypes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "jniTypes_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_zero
+ # include "entryFrame_zero.hpp"
+ # include "fakeStubFrame_zero.hpp"
+@@ -115,6 +127,12 @@ inline oop* frame::interpreter_frame_temp_oop_addr() const {
+ #ifdef TARGET_ARCH_ppc
+ # include "frame_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "frame_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "frame_loongarch.inline.hpp"
++#endif
+ 
+ 
+ #endif // SHARE_VM_RUNTIME_FRAME_INLINE_HPP
+diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
+index 23ce8af569..f36137aabf 100644
+--- a/hotspot/src/share/vm/runtime/globals.hpp
++++ b/hotspot/src/share/vm/runtime/globals.hpp
+@@ -55,6 +55,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "globals_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "globals_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "globals_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_FAMILY_linux
+ # include "globals_linux.hpp"
+ #endif
+@@ -79,6 +85,12 @@
+ #ifdef TARGET_OS_ARCH_linux_sparc
+ # include "globals_linux_sparc.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "globals_linux_mips.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "globals_linux_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_zero
+ # include "globals_linux_zero.hpp"
+ #endif
+@@ -116,6 +128,12 @@
+ #ifdef TARGET_ARCH_sparc
+ # include "c1_globals_sparc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "c1_globals_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "c1_globals_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_arm
+ # include "c1_globals_arm.hpp"
+ #endif
+@@ -148,6 +166,12 @@
+ #ifdef TARGET_ARCH_sparc
+ # include "c2_globals_sparc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "c2_globals_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "c2_globals_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_arm
+ # include "c2_globals_arm.hpp"
+ #endif
+@@ -3209,7 +3233,7 @@ class CommandLineFlags {
+   product(uintx, InitialHeapSize, 0,                                        \
+           "Initial heap size (in bytes); zero means use ergonomics")        \
+                                                                             \
+-  product(uintx, MaxHeapSize, ScaleForWordSize(96*M),                       \
++  product(uintx, MaxHeapSize, ScaleForWordSize(MIPS64_ONLY(1500) NOT_MIPS64(96) *M),                     \
+           "Maximum heap size (in bytes)")                                   \
+                                                                             \
+   product(uintx, OldSize, ScaleForWordSize(4*M),                            \
+diff --git a/hotspot/src/share/vm/runtime/icache.hpp b/hotspot/src/share/vm/runtime/icache.hpp
+index ba81a06ff5..9c0cfdb7d7 100644
+--- a/hotspot/src/share/vm/runtime/icache.hpp
++++ b/hotspot/src/share/vm/runtime/icache.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_ICACHE_HPP
+ #define SHARE_VM_RUNTIME_ICACHE_HPP
+ 
+@@ -86,7 +92,12 @@ class AbstractICache : AllStatic {
+ #ifdef TARGET_ARCH_ppc
+ # include "icache_ppc.hpp"
+ #endif
+-
++#ifdef TARGET_ARCH_mips
++# include "icache_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "icache_loongarch.hpp"
++#endif
+ 
+ 
+ class ICacheStubGenerator : public StubCodeGenerator {
+diff --git a/hotspot/src/share/vm/runtime/java.cpp b/hotspot/src/share/vm/runtime/java.cpp
+index 0a263b017c..9ba0decaae 100644
+--- a/hotspot/src/share/vm/runtime/java.cpp
++++ b/hotspot/src/share/vm/runtime/java.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/classLoader.hpp"
+ #include "classfile/symbolTable.hpp"
+@@ -84,6 +90,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vm_version_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vm_version_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vm_version_loongarch.hpp"
++#endif
+ #if INCLUDE_ALL_GCS
+ #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
+ #include "gc_implementation/parallelScavenge/psScavenge.hpp"
+diff --git a/hotspot/src/share/vm/runtime/javaCalls.hpp b/hotspot/src/share/vm/runtime/javaCalls.hpp
+index 6126bbe75e..1747e2b2ee 100644
+--- a/hotspot/src/share/vm/runtime/javaCalls.hpp
++++ b/hotspot/src/share/vm/runtime/javaCalls.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_JAVACALLS_HPP
+ #define SHARE_VM_RUNTIME_JAVACALLS_HPP
+ 
+@@ -49,6 +55,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "jniTypes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "jniTypes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "jniTypes_loongarch.hpp"
++#endif
+ 
+ // A JavaCallWrapper is constructed before each JavaCall and destructed after the call.
+ // Its purpose is to allocate/deallocate a new handle block and to save/restore the last
+diff --git a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
+index 129a01e293..c2b1b2e6c3 100644
+--- a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
++++ b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP
+ #define SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP
+ 
+@@ -80,6 +86,12 @@ friend class JavaCallWrapper;
+ #ifdef TARGET_ARCH_x86
+ # include "javaFrameAnchor_x86.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "javaFrameAnchor_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "javaFrameAnchor_loongarch.hpp"
++#endif
+ #ifdef TARGET_ARCH_aarch64
+ # include "javaFrameAnchor_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/os.cpp b/hotspot/src/share/vm/runtime/os.cpp
+index 96eed03670..28c78409e7 100644
+--- a/hotspot/src/share/vm/runtime/os.cpp
++++ b/hotspot/src/share/vm/runtime/os.cpp
+@@ -1122,7 +1122,8 @@ bool os::is_first_C_frame(frame* fr) {
+ 
+   uintptr_t old_fp = (uintptr_t)fr->link();
+   if ((old_fp & fp_align_mask) != 0) return true;
+-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true;
++  // The check for old_fp and ufp is harmful on MIPS due to its special ABI.
++  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_MIPS64(|| old_fp == ufp)) return true;
+ 
+   // stack grows downwards; if old_fp is below current fp or if the stack
+   // frame is too large, either the stack is corrupted or fp is not saved
+diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp
+index 836c231b03..0ca6e64598 100644
+--- a/hotspot/src/share/vm/runtime/os.hpp
++++ b/hotspot/src/share/vm/runtime/os.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_OS_HPP
+ #define SHARE_VM_RUNTIME_OS_HPP
+ 
+@@ -857,6 +863,12 @@ class os: AllStatic {
+ #ifdef TARGET_OS_ARCH_linux_x86
+ # include "os_linux_x86.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "os_linux_mips.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "os_linux_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_aarch64
+ # include "os_linux_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/prefetch.inline.hpp b/hotspot/src/share/vm/runtime/prefetch.inline.hpp
+index f4e30de34d..fec16f842c 100644
+--- a/hotspot/src/share/vm/runtime/prefetch.inline.hpp
++++ b/hotspot/src/share/vm/runtime/prefetch.inline.hpp
+@@ -46,6 +46,12 @@
+ #ifdef TARGET_OS_ARCH_linux_ppc
+ # include "prefetch_linux_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "prefetch_linux_mips.inline.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "prefetch_linux_loongarch.inline.hpp"
++#endif
+ 
+ // Solaris
+ #ifdef TARGET_OS_ARCH_solaris_x86
+diff --git a/hotspot/src/share/vm/runtime/registerMap.hpp b/hotspot/src/share/vm/runtime/registerMap.hpp
+index 67ef212d65..1e26dfcba4 100644
+--- a/hotspot/src/share/vm/runtime/registerMap.hpp
++++ b/hotspot/src/share/vm/runtime/registerMap.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_REGISTERMAP_HPP
+ #define SHARE_VM_RUNTIME_REGISTERMAP_HPP
+ 
+@@ -45,6 +51,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "register_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "register_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "register_loongarch.hpp"
++#endif
+ 
+ class JavaThread;
+ 
+@@ -156,6 +168,12 @@ class RegisterMap : public StackObj {
+ #ifdef TARGET_ARCH_ppc
+ # include "registerMap_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "registerMap_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "registerMap_loongarch.hpp"
++#endif
+ 
+ };
+ 
+diff --git a/hotspot/src/share/vm/runtime/relocator.hpp b/hotspot/src/share/vm/runtime/relocator.hpp
+index bb19c75fe6..53f3c9f6bd 100644
+--- a/hotspot/src/share/vm/runtime/relocator.hpp
++++ b/hotspot/src/share/vm/runtime/relocator.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_RELOCATOR_HPP
+ #define SHARE_VM_RUNTIME_RELOCATOR_HPP
+ 
+@@ -45,6 +51,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "bytes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "bytes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "bytes_loongarch.hpp"
++#endif
+ 
+ // This code has been converted from the 1.1E java virtual machine
+ // Thanks to the JavaTopics group for using the code
+diff --git a/hotspot/src/share/vm/runtime/safepoint.cpp b/hotspot/src/share/vm/runtime/safepoint.cpp
+index 440617c802..be0e4dd13c 100644
+--- a/hotspot/src/share/vm/runtime/safepoint.cpp
++++ b/hotspot/src/share/vm/runtime/safepoint.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/symbolTable.hpp"
+ #include "classfile/systemDictionary.hpp"
+@@ -78,6 +84,14 @@
+ # include "nativeInst_ppc.hpp"
+ # include "vmreg_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++# include "vmreg_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++# include "vmreg_loongarch.inline.hpp"
++#endif
+ #if INCLUDE_ALL_GCS
+ #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
+ #include "gc_implementation/shared/suspendibleThreadSet.hpp"
+diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp
+index 5f540247f9..abcd6066b9 100644
+--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp
++++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "classfile/vmSymbols.hpp"
+@@ -82,6 +88,15 @@
+ # include "nativeInst_ppc.hpp"
+ # include "vmreg_ppc.inline.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++# include "vmreg_mips.inline.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++# include "vmreg_loongarch.inline.hpp"
++#endif
++
+ #ifdef COMPILER1
+ #include "c1/c1_Runtime1.hpp"
+ #endif
+@@ -220,7 +235,6 @@ void SharedRuntime::print_ic_miss_histogram() {
+   }
+ }
+ #endif // PRODUCT
+-
+ #if INCLUDE_ALL_GCS
+ 
+ // G1 write-barrier pre: executed before a pointer store.
+diff --git a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
+index 37880d8a5c..3987880b16 100644
+--- a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
++++ b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020, These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "prims/jni.h"
+ #include "runtime/interfaceSupport.hpp"
+@@ -534,6 +540,15 @@ static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
+  *         then                   3    2
+  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
+  */
++#if defined(MIPS) || defined(LOONGARCH)
++// TODO: LA
++#undef S1
++#undef S2
++#undef S3
++#undef S4
++#undef S5
++#undef S6
++#endif
+ 
+ static const double
+ S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+diff --git a/hotspot/src/share/vm/runtime/stackValueCollection.cpp b/hotspot/src/share/vm/runtime/stackValueCollection.cpp
+index 8774768311..fe81c1bfd8 100644
+--- a/hotspot/src/share/vm/runtime/stackValueCollection.cpp
++++ b/hotspot/src/share/vm/runtime/stackValueCollection.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "runtime/stackValueCollection.hpp"
+ #ifdef TARGET_ARCH_x86
+@@ -42,6 +48,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "jniTypes_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "jniTypes_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "jniTypes_loongarch.hpp"
++#endif
+ 
+ PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
+ 
+diff --git a/hotspot/src/share/vm/runtime/statSampler.cpp b/hotspot/src/share/vm/runtime/statSampler.cpp
+index 41f469622f..3b43089062 100644
+--- a/hotspot/src/share/vm/runtime/statSampler.cpp
++++ b/hotspot/src/share/vm/runtime/statSampler.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "classfile/vmSymbols.hpp"
+@@ -51,6 +57,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vm_version_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vm_version_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vm_version_loongarch.hpp"
++#endif
+ 
+ // --------------------------------------------------------
+ // StatSamplerTask
+diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+index e18b9127df..9bf933762a 100644
+--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
++++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_STUBROUTINES_HPP
+ #define SHARE_VM_RUNTIME_STUBROUTINES_HPP
+ 
+@@ -49,6 +55,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "nativeInst_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "nativeInst_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "nativeInst_loongarch.hpp"
++#endif
+ 
+ // StubRoutines provides entry points to assembly routines used by
+ // compiled code and the run-time system. Platform-specific entry
+@@ -116,6 +128,10 @@ class StubRoutines: AllStatic {
+ # include "stubRoutines_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "stubRoutines_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "stubRoutines_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "stubRoutines_loongarch_64.hpp"
+ #endif
+ 
+   static jint    _verify_oop_count;
+diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp
+index e6586c40cb..3db678ff48 100644
+--- a/hotspot/src/share/vm/runtime/thread.cpp
++++ b/hotspot/src/share/vm/runtime/thread.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/classLoader.hpp"
+ #include "classfile/javaClasses.hpp"
+diff --git a/hotspot/src/share/vm/runtime/thread.hpp b/hotspot/src/share/vm/runtime/thread.hpp
+index 1c19ab7290..aa69217eef 100644
+--- a/hotspot/src/share/vm/runtime/thread.hpp
++++ b/hotspot/src/share/vm/runtime/thread.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_THREAD_HPP
+ #define SHARE_VM_RUNTIME_THREAD_HPP
+ 
+@@ -1711,6 +1717,12 @@ public:
+ #ifdef TARGET_OS_ARCH_linux_x86
+ # include "thread_linux_x86.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "thread_linux_mips.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "thread_linux_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_aarch64
+ # include "thread_linux_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
+index 58c1afc810..0938b2edda 100644
+--- a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
++++ b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP
+ #define SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP
+ 
+@@ -51,6 +57,12 @@ class ThreadLocalStorage : AllStatic {
+ #ifdef TARGET_OS_ARCH_linux_x86
+ # include "threadLS_linux_x86.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "threadLS_linux_mips.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "threadLS_linux_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_aarch64
+ # include "threadLS_linux_aarch64.hpp"
+ #endif
+diff --git a/hotspot/src/share/vm/runtime/virtualspace.cpp b/hotspot/src/share/vm/runtime/virtualspace.cpp
+index 66392b75f1..5ced38d838 100644
+--- a/hotspot/src/share/vm/runtime/virtualspace.cpp
++++ b/hotspot/src/share/vm/runtime/virtualspace.cpp
+@@ -1,5 +1,6 @@
+ /*
+  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -147,6 +148,15 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
+   bool special = large && !os::can_commit_large_page_memory();
+   char* base = NULL;
+ 
++#if defined MIPS && !defined ZERO
++  size_t opt_reg_addr = 5 * os::Linux::page_size();
++  static int code_cache_init_flag = 1;
++  if (UseCodeCacheAllocOpt && code_cache_init_flag && executable) {
++    code_cache_init_flag = 0;
++    requested_address = (char*) opt_reg_addr;
++  }
++#endif
++
+   if (requested_address != 0) {
+     requested_address -= noaccess_prefix; // adjust requested address
+     assert(requested_address != NULL, "huge noaccess prefix?");
+@@ -193,6 +203,12 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
+       if (failed_to_reserve_as_requested(base, requested_address, size, false)) {
+         // OS ignored requested address. Try different address.
+         base = NULL;
++#if defined MIPS && !defined ZERO
++        if (UseCodeCacheAllocOpt && requested_address == (char*) opt_reg_addr) {
++          requested_address = NULL;
++          base = os::reserve_memory(size, NULL, alignment);
++        }
++#endif
+       }
+     } else {
+       base = os::reserve_memory(size, NULL, alignment);
+diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp
+index 32e3921b2b..c6cc4c4329 100644
+--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
++++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/dictionary.hpp"
+ #include "classfile/javaClasses.hpp"
+@@ -122,6 +128,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vmStructs_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vmStructs_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vmStructs_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_linux_x86
+ # include "vmStructs_linux_x86.hpp"
+ #endif
+@@ -149,6 +161,12 @@
+ #ifdef TARGET_OS_ARCH_linux_ppc
+ # include "vmStructs_linux_ppc.hpp"
+ #endif
++#ifdef TARGET_OS_ARCH_linux_mips
++# include "vmStructs_linux_mips.hpp"
++#endif
++#ifdef TARGET_OS_ARCH_linux_loongarch
++# include "vmStructs_linux_loongarch.hpp"
++#endif
+ #ifdef TARGET_OS_ARCH_aix_ppc
+ # include "vmStructs_aix_ppc.hpp"
+ #endif
+@@ -208,6 +226,10 @@
+ # include "adfiles/adGlobals_zero.hpp"
+ #elif defined TARGET_ARCH_MODEL_ppc_64
+ # include "adfiles/adGlobals_ppc_64.hpp"
++#elif defined TARGET_ARCH_MODEL_mips_64
++# include "adfiles/adGlobals_mips_64.hpp"
++#elif defined TARGET_ARCH_MODEL_loongarch_64
++# include "adfiles/adGlobals_loongarch_64.hpp"
+ #endif
+ #endif // COMPILER2
+ 
+diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp
+index 91f9c70f5a..d8dcfcfcca 100644
+--- a/hotspot/src/share/vm/runtime/vm_version.cpp
++++ b/hotspot/src/share/vm/runtime/vm_version.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "memory/universe.hpp"
+ #include "oops/oop.inline.hpp"
+@@ -44,6 +50,12 @@
+ #ifdef TARGET_ARCH_ppc
+ # include "vm_version_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "vm_version_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "vm_version_loongarch.hpp"
++#endif
+ 
+ const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release();
+ const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string();
+@@ -193,6 +205,14 @@ const char* Abstract_VM_Version::jre_release_version() {
+ #else
+ #define CPU      "ppc64"
+ #endif
++#elif defined(MIPS64)
++#if defined(VM_LITTLE_ENDIAN)
++#define CPU      "mips64el"
++#else
++#define CPU      "mips64"
++#endif
++#elif defined(LOONGARCH64)
++#define CPU      "loongarch64"
+ #else
+ #define CPU      IA32_ONLY("x86")                \
+                  IA64_ONLY("ia64")               \
+diff --git a/hotspot/src/share/vm/utilities/copy.hpp b/hotspot/src/share/vm/utilities/copy.hpp
+index c1d82c7083..1279319a17 100644
+--- a/hotspot/src/share/vm/utilities/copy.hpp
++++ b/hotspot/src/share/vm/utilities/copy.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_UTILITIES_COPY_HPP
+ #define SHARE_VM_UTILITIES_COPY_HPP
+ 
+@@ -331,6 +337,27 @@ class Copy : AllStatic {
+ #endif
+   }
+ 
++
++ // SAPJVM AS 2011-09-20. Template for atomic copy.
++  template <class T> static void copy_conjoint_atomic(T* from, T* to, size_t count)
++  {
++    if (from > to) {
++      while (count-- > 0) {
++        // Copy forwards
++        *to++ = *from++;
++      }
++    } else {
++      from += count - 1;
++      to   += count - 1;
++      while (count-- > 0) {
++        // Copy backwards
++        *to-- = *from--;
++      }
++    }
++  }
++
++
++
+   // Platform dependent implementations of the above methods.
+ #ifdef TARGET_ARCH_x86
+ # include "copy_x86.hpp"
+@@ -350,6 +377,13 @@ class Copy : AllStatic {
+ #ifdef TARGET_ARCH_ppc
+ # include "copy_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "copy_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "copy_loongarch.hpp"
++#endif
++
+ 
+ };
+ 
+diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp
+index 58a32a2b83..1026585f84 100644
+--- a/hotspot/src/share/vm/utilities/debug.cpp
++++ b/hotspot/src/share/vm/utilities/debug.cpp
+@@ -690,6 +690,7 @@ void help() {
+   tty->print_cr("                   pns($sp, $ebp, $pc) on Linux/x86 or");
+   tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/AArch64 or");
+   tty->print_cr("                   pns($sp, 0, $pc)    on Linux/ppc64 or");
++  tty->print_cr("                   pns($sp, $s8, $pc)  on Linux/mips or");
+   tty->print_cr("                   pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC");
+   tty->print_cr("                 - in gdb do 'set overload-resolution off' before calling pns()");
+   tty->print_cr("                 - in dbx do 'frame 1' before calling pns()");
+diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp
+index 81866b8409..61fc0c48a2 100644
+--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp
++++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
+ #define SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
+ 
+@@ -455,6 +461,12 @@ enum RTMState {
+ #ifdef TARGET_ARCH_ppc
+ # include "globalDefinitions_ppc.hpp"
+ #endif
++#ifdef TARGET_ARCH_mips
++# include "globalDefinitions_mips.hpp"
++#endif
++#ifdef TARGET_ARCH_loongarch
++# include "globalDefinitions_loongarch.hpp"
++#endif
+ 
+ /*
+  * If a platform does not support native stack walking
+diff --git a/hotspot/src/share/vm/utilities/macros.hpp b/hotspot/src/share/vm/utilities/macros.hpp
+index 599e1074de..41ef06e27f 100644
+--- a/hotspot/src/share/vm/utilities/macros.hpp
++++ b/hotspot/src/share/vm/utilities/macros.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2020. These
++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_UTILITIES_MACROS_HPP
+ #define SHARE_VM_UTILITIES_MACROS_HPP
+ 
+@@ -373,6 +379,30 @@
+ #define NOT_SPARC(code) code
+ #endif
+ 
++#ifdef MIPS64
++#ifndef MIPS
++#define MIPS
++#endif
++#define MIPS64_ONLY(code) code
++#define NOT_MIPS64(code)
++#else
++#undef MIPS
++#define MIPS64_ONLY(code)
++#define NOT_MIPS64(code) code
++#endif
++
++#ifdef LOONGARCH64
++#ifndef LOONGARCH
++#define LOONGARCH
++#endif
++#define LOONGARCH64_ONLY(code) code
++#define NOT_LOONGARCH64(code)
++#else
++#undef LOONGARCH
++#define LOONGARCH64_ONLY(code)
++#define NOT_LOONGARCH64(code) code
++#endif
++
+ #if defined(PPC32) || defined(PPC64)
+ #ifndef PPC
+ #define PPC
+diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp
+index bc06caccb4..46be35a325 100644
+--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
++++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
+@@ -121,11 +121,22 @@ protected:
+     Age(const Age& age)          { _data = age._data; }
+     Age(idx_t top, idx_t tag)    { _fields._top = top; _fields._tag = tag; }
+ 
++#if !defined MIPS && !defined LOONGARCH
+     Age   get()        const volatile { return _data; }
+     void  set(Age age) volatile       { _data = age._data; }
+ 
+     idx_t top()        const volatile { return _fields._top; }
+     idx_t tag()        const volatile { return _fields._tag; }
++#else
++    Age   get()        const volatile {
++      size_t res = OrderAccess::load_ptr_acquire((volatile intptr_t*) &_data);
++      return *(Age*)(&res);
++    }
++    void  set(Age age) volatile       { OrderAccess::release_store_ptr((volatile intptr_t*) &_data, *(size_t*)(&age._data)); }
++
++    idx_t top()        const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._top)); }
++    idx_t tag()        const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._tag)); }
++#endif
+ 
+     // Increment top; if it wraps, increment tag also.
+     void increment() {
+@@ -195,23 +206,50 @@ protected:
+ public:
+   TaskQueueSuper() : _bottom(0), _age() {}
+ 
++#if defined MIPS || defined LOONGARCH
++  inline uint get_bottom() const {
++    return OrderAccess::load_acquire((volatile juint*)&_bottom);
++  }
++
++  inline void set_bottom(uint new_bottom) {
++    OrderAccess::release_store(&_bottom, new_bottom);
++  }
++#endif
+   // Return true if the TaskQueue contains/does not contain any tasks.
+-  bool peek()     const { return _bottom != _age.top(); }
++  bool peek()     const {
++#if defined MIPS || defined LOONGARCH
++    return get_bottom() != _age.top();
++#else
++    return _bottom != _age.top();
++#endif
++  }
+   bool is_empty() const { return size() == 0; }
+ 
+   // Return an estimate of the number of elements in the queue.
+   // The "careful" version admits the possibility of pop_local/pop_global
+   // races.
+   uint size() const {
++#if defined MIPS || defined LOONGARCH
++    return size(get_bottom(), _age.top());
++#else
+     return size(_bottom, _age.top());
++#endif
+   }
+ 
+   uint dirty_size() const {
++#if defined MIPS || defined LOONGARCH
++    return dirty_size(get_bottom(), _age.top());
++#else
+     return dirty_size(_bottom, _age.top());
++#endif
+   }
+ 
+   void set_empty() {
++#if defined MIPS || defined LOONGARCH
++    set_bottom(0);
++#else
+     _bottom = 0;
++#endif
+     _age.set(0);
+   }
+ 
+@@ -263,7 +301,9 @@ protected:
+   typedef typename TaskQueueSuper<N, F>::Age Age;
+   typedef typename TaskQueueSuper<N, F>::idx_t idx_t;
+ 
++#if !defined MIPS && !defined LOONGARCH
+   using TaskQueueSuper<N, F>::_bottom;
++#endif
+   using TaskQueueSuper<N, F>::_age;
+   using TaskQueueSuper<N, F>::increment_index;
+   using TaskQueueSuper<N, F>::decrement_index;
+@@ -327,7 +367,11 @@ template<class E, MEMFLAGS F, unsigned int N>
+ void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) {
+   // tty->print_cr("START OopTaskQueue::oops_do");
+   uint iters = size();
++#if defined MIPS || defined LOONGARCH
++  uint index = this->get_bottom();
++#else
+   uint index = _bottom;
++#endif
+   for (uint i = 0; i < iters; ++i) {
+     index = decrement_index(index);
+     // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
+@@ -345,14 +389,22 @@ template<class E, MEMFLAGS F, unsigned int N>
+ bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
+   if (dirty_n_elems == N - 1) {
+     // Actually means 0, so do the push.
++#if defined MIPS || defined LOONGARCH
++    uint localBot = this->get_bottom();
++#else
+     uint localBot = _bottom;
++#endif
+     // g++ complains if the volatile result of the assignment is
+     // unused, so we cast the volatile away.  We cannot cast directly
+     // to void, because gcc treats that as not using the result of the
+     // assignment.  However, casting to E& means that we trigger an
+     // unused-value warning.  So, we cast the E& to void.
+     (void)const_cast<E&>(_elems[localBot] = t);
++#if defined MIPS || defined LOONGARCH
++    this->set_bottom(increment_index(localBot));
++#else
+     OrderAccess::release_store(&_bottom, increment_index(localBot));
++#endif
+     TASKQUEUE_STATS_ONLY(stats.record_push());
+     return true;
+   }
+@@ -407,7 +459,11 @@ bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
+ #if !(defined SPARC || defined IA32 || defined AMD64)
+   OrderAccess::fence();
+ #endif
++#if defined MIPS || defined LOONGARCH
++  uint localBot = this->get_bottom();
++#else
+   uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
++#endif
+   uint n_elems = size(localBot, oldAge.top());
+   if (n_elems == 0) {
+     return false;
+@@ -662,7 +718,11 @@ public:
+ 
+ template<class E, MEMFLAGS F, unsigned int N> inline bool
+ GenericTaskQueue<E, F, N>::push(E t) {
++#if defined MIPS || defined LOONGARCH
++  uint localBot = this->get_bottom();
++#else
+   uint localBot = _bottom;
++#endif
+   assert(localBot < N, "_bottom out of range.");
+   idx_t top = _age.top();
+   uint dirty_n_elems = dirty_size(localBot, top);
+@@ -674,7 +734,11 @@ GenericTaskQueue<E, F, N>::push(E t) {
+     // assignment.  However, casting to E& means that we trigger an
+     // unused-value warning.  So, we cast the E& to void.
+     (void) const_cast<E&>(_elems[localBot] = t);
++#if defined MIPS || defined LOONGARCH
++    this->set_bottom(increment_index(localBot));
++#else
+     OrderAccess::release_store(&_bottom, increment_index(localBot));
++#endif
+     TASKQUEUE_STATS_ONLY(stats.record_push());
+     return true;
+   } else {
+@@ -684,7 +748,11 @@ GenericTaskQueue<E, F, N>::push(E t) {
+ 
+ template<class E, MEMFLAGS F, unsigned int N> inline bool
+ GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
++#if defined MIPS || defined LOONGARCH
++  uint localBot = this->get_bottom();
++#else
+   uint localBot = _bottom;
++#endif
+   // This value cannot be N-1.  That can only occur as a result of
+   // the assignment to bottom in this method.  If it does, this method
+   // resets the size to 0 before the next call (which is sequential,
+@@ -693,7 +761,11 @@ GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
+   assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
+   if (dirty_n_elems == 0) return false;
+   localBot = decrement_index(localBot);
++#if defined MIPS || defined LOONGARCH
++  this->set_bottom(localBot);
++#else
+   _bottom = localBot;
++#endif
+   // This is necessary to prevent any read below from being reordered
+   // before the store just above.
+   OrderAccess::fence();
+diff --git a/hotspot/src/share/vm/utilities/vmError.cpp b/hotspot/src/share/vm/utilities/vmError.cpp
+index fa7a32508e..7098a98a9f 100644
+--- a/hotspot/src/share/vm/utilities/vmError.cpp
++++ b/hotspot/src/share/vm/utilities/vmError.cpp
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2018. These
++ * modifications are Copyright (c) 2018 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++*/
++
+ #include <fcntl.h>
+ #include "precompiled.hpp"
+ #include "compiler/compileBroker.hpp"
+@@ -488,7 +495,12 @@ void VMError::report(outputStream* st) {
+                                   JDK_Version::runtime_name() : "";
+      const char* runtime_version = JDK_Version::runtime_version() != NULL ?
+                                   JDK_Version::runtime_version() : "";
+-     st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version);
++#ifdef LOONGSON_RUNTIME_NAME
++     const char* loongson_runtime_name_and_version = LOONGSON_RUNTIME_NAME;
++#else
++     const char* loongson_runtime_name_and_version = "";
++#endif
++     st->print_cr("# JRE version: %s (%s) (build %s) (%s)", runtime_name, buf, runtime_version, loongson_runtime_name_and_version);
+      st->print_cr("# Java VM: %s (%s %s %s %s)",
+                    Abstract_VM_Version::vm_name(),
+                    Abstract_VM_Version::vm_release(),
+diff --git a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
+index fcf1d04b6a..5b8e7dcce5 100644
+--- a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
++++ b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
+@@ -24,6 +24,12 @@
+ #  questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2023. These
++# modifications are Copyright (c) 2023, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ ## @test Test8167409.sh
+ ## @bug 8167409
+ ## @summary Invalid value passed to critical JNI function
+@@ -68,6 +74,18 @@ if [ $VM_CPU = "aarch64" ]; then
+     exit 0;
+ fi
+ 
++# CriticalJNINatives is not supported for loongarch64
++if [ $VM_CPU = "loongarch64" ]; then
++     echo "Test Passed"
++     exit 0;
++fi
++
++# CriticalJNINatives is not supported for mips64
++if [ $VM_CPU = "mips64" -o $VM_CPU = "mips64el" ]; then
++     echo "Test Passed"
++     exit 0;
++fi
++
+ THIS_DIR=.
+ 
+ cp ${TESTSRC}${FS}*.java ${THIS_DIR}
+diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index fa9a6f208b..885957cf1c 100644
+--- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -34,11 +34,12 @@ import com.oracle.java.testlibrary.cli.predicate.OrPredicate;
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except SPARC and X86
++        // Execute the test case on any CPU except SPARC, LoongArch64 and X86
+         super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc,
+                 new OrPredicate(Platform::isAArch64,
+                 new OrPredicate(Platform::isPPC,
+-                new OrPredicate(Platform::isX64, Platform::isX86))))));
++                new OrPredicate(Platform::isLoongArch64,
++                new OrPredicate(Platform::isX64, Platform::isX86)))))));
+     }
+ 
+     @Override
+diff --git a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index dc8c398408..2427b2bf7b 100644
+--- a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -62,18 +62,24 @@ public class IntrinsicPredicates {
+             = new OrPredicate(
+                     new CPUSpecificPredicate("sparc.*", new String[] { "sha1" },
+                             null),
++              // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null,
++                            null),
+                     new CPUSpecificPredicate("aarch64", new String[] { "sha1" },
+-                            null));
++                            null)));
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64", new String[] { "sha256" },
+                                                        null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256" },
+                                                        null),
++              // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null,
++                                                       null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"    },
+                                                        null),
+                               new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"    },
+-                                                       null))));
++                                                       null)))));
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(
+diff --git a/hotspot/test/runtime/6929067/Test6929067.sh b/hotspot/test/runtime/6929067/Test6929067.sh
+index 2bbb3401ce..1a5482e645 100644
+--- a/hotspot/test/runtime/6929067/Test6929067.sh
++++ b/hotspot/test/runtime/6929067/Test6929067.sh
+@@ -97,6 +97,10 @@ case "$ARCH" in
+   i686)
+     ARCH=i386
+     ;;
++  loongarch64)
++    COMP_FLAG=""
++    ARCH=loongarch64
++    ;;
+   # Assuming other ARCH values need no translation
+ esac
+ 
+diff --git a/hotspot/test/runtime/Unsafe/RangeCheck.java b/hotspot/test/runtime/Unsafe/RangeCheck.java
+index 9ded944cb2..4d4ea2e048 100644
+--- a/hotspot/test/runtime/Unsafe/RangeCheck.java
++++ b/hotspot/test/runtime/Unsafe/RangeCheck.java
+@@ -43,6 +43,7 @@ public class RangeCheck {
+                 true,
+                 "-Xmx32m",
+                 "-XX:-TransmitErrorReport",
++                "-XX:-InlineUnsafeOps", // The compiler intrinsics doesn't have the assert
+                 DummyClassWithMainRangeCheck.class.getName());
+ 
+         OutputAnalyzer output = new OutputAnalyzer(pb.start());
+diff --git a/hotspot/test/test_env.sh b/hotspot/test/test_env.sh
+index 5ba4f28c45..d9d8bb6b6b 100644
+--- a/hotspot/test/test_env.sh
++++ b/hotspot/test/test_env.sh
+@@ -211,6 +211,29 @@ if [ $? = 0 ]
+ then
+   VM_CPU="aarch64"
+ fi
++grep "mips" vm_version.out > ${NULL}
++if [ $? = 0 ]
++then
++  VM_CPU="mips"
++  if [ $VM_BITS = "64" ]
++  then
++    VM_CPU="mips64"
++    grep "mips64el" vm_version.out > ${NULL}
++    if [ $? = 0 ]
++    then
++      VM_CPU="mips64el"
++    fi
++  fi
++fi
++grep "loongarch" vm_version.out > ${NULL}
++if [ $? = 0 ]
++then
++  VM_CPU="loongarch"
++  if [ $VM_BITS = "64" ]
++  then
++    VM_CPU="loongarch64"
++  fi
++fi
+ export VM_TYPE VM_BITS VM_OS VM_CPU
+ echo "VM_TYPE=${VM_TYPE}"
+ echo "VM_BITS=${VM_BITS}"
+diff --git a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
+index 6a14079347..56a6375b5f 100644
+--- a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
++++ b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
+@@ -126,6 +126,10 @@ public class Platform {
+         return isArch("aarch64");
+     }
+ 
++    public static boolean isLoongArch64() {
++        return isArch("loongarch64");
++    }
++
+     private static boolean isArch(String archnameRE) {
+         return Pattern.compile(archnameRE, Pattern.CASE_INSENSITIVE)
+             .matcher(osArch)
+@@ -136,6 +140,10 @@ public class Platform {
+         return osArch;
+     }
+ 
++    public static boolean isMIPS() {
++        return isArch("mips.*");
++    }
++
+     /**
+      * Return a boolean for whether we expect to be able to attach
+      * the SA to our own processes on this system.
+diff --git a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 7d56a4a3bc..41825e18b3 100644
+--- a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -43,7 +43,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64"),
++        ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64", "isMIPS", "isLoongArch64"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isSolaris", "isWindows", "isOSX"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal"),
+diff --git a/jdk/make/Images.gmk b/jdk/make/Images.gmk
+index 991c0af7b4..9171685655 100644
+--- a/jdk/make/Images.gmk
++++ b/jdk/make/Images.gmk
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2022. These
++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ include $(SPEC)
+ include MakeBase.gmk
+ include JavaCompilation.gmk
+@@ -650,6 +656,11 @@ $(JDK_IMAGE_DIR)/src.zip: $(IMAGES_OUTPUTDIR)/src.zip
+ 	$(ECHO) $(LOG_INFO) Copying $(patsubst $(OUTPUT_ROOT)/%,%,$@)
+ 	$(install-file)
+ 
++# create link "mips64el -> mips64" for deploy
++$(JDK_IMAGE_DIR)/jre/lib/mips64: $(JDK_IMAGE_DIR)/jre/lib/mips64el
++	$(ECHO) $(LOG_INFO) Create link from mips64 to mips64
++	$(CD) $(JDK_IMAGE_DIR)/jre/lib && $(RM) mips64 && $(LN) -s mips64el mips64
++
+ ################################################################################
+ # Post processing (strip etc)
+ 
+@@ -728,6 +739,14 @@ ifneq ($(POST_STRIP_CMD), )
+ 
+ endif
+ 
++################################################################################
++# Loongson added list, architecture dependent files
++ifeq ($(OPENJDK_TARGET_CPU), mips64)
++  ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
++    JDK_IMAGE_LOONGSON_LIST := $(JDK_IMAGE_DIR)/jre/lib/mips64el
++  endif
++endif
++
+ ################################################################################
+ 
+ # Include the custom makefile right here, after all variables have been defined
+@@ -753,6 +772,7 @@ jdk-image: $(JDK_BIN_TARGETS) $(JDKJRE_BIN_TARGETS) \
+     $(JDKJRE_DOC_TARGETS) $(JDK_DOC_TARGETS) \
+     $(JDK_INFO_FILE) $(JDKJRE_STRIP_LIST) $(JDK_BIN_STRIP_LIST) \
+     $(JDK_IMAGE_DIR)/src.zip \
++    $(JDK_IMAGE_LOONGSON_LIST) \
+     $(JDK_BIN_ISADIR_LINK_TARGETS) $(JDKJRE_BIN_ISADIR_LINK_TARGETS)
+ 
+ jre-overlay-image: $(JRE_OVERLAY_BIN_TARGETS) $(JRE_OVERLAY_LIB_TARGETS) \
+diff --git a/jdk/make/gensrc/GensrcMisc.gmk b/jdk/make/gensrc/GensrcMisc.gmk
+index 0e3dee5ca3..66f19f4d25 100644
+--- a/jdk/make/gensrc/GensrcMisc.gmk
++++ b/jdk/make/gensrc/GensrcMisc.gmk
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2018. These
++# modifications are Copyright (c) 2018, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ include ProfileNames.gmk
+ 
+ ################################################################################
+@@ -39,6 +45,7 @@ $(PROFILE_VERSION_JAVA_TARGETS): \
+ 	$(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \
+ 	    -e 's/@@java_version@@/$(RELEASE)/g' \
+ 	    -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \
++	    -e 's/@@loongson_runtime_name@@/$(LOONGSON_RUNTIME_NAME)/g' \
+ 	    -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \
+ 	    -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \
+ 	    $< > $@.tmp
+diff --git a/jdk/make/lib/SoundLibraries.gmk b/jdk/make/lib/SoundLibraries.gmk
+index b59a9462ec..8ce97dc854 100644
+--- a/jdk/make/lib/SoundLibraries.gmk
++++ b/jdk/make/lib/SoundLibraries.gmk
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ LIBJSOUND_SRC_DIRS := \
+     $(JDK_TOPDIR)/src/share/native/com/sun/media/sound \
+     $(JDK_TOPDIR)/src/$(OPENJDK_TARGET_OS_API_DIR)/native/com/sun/media/sound
+@@ -136,6 +142,14 @@ else
+     LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC
+   endif
+ 
++  ifeq ($(OPENJDK_TARGET_CPU), mips64)
++    LIBJSOUND_CFLAGS += -DX_ARCH=X_MIPS64
++  endif
++
++  ifeq ($(OPENJDK_TARGET_CPU), loongarch64)
++    LIBJSOUND_CFLAGS += -DX_ARCH=X_LOONGARCH64
++  endif
++
+   ifeq ($(OPENJDK_TARGET_CPU), ppc64)
+        LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC64
+   endif
+diff --git a/jdk/src/share/classes/sun/misc/Version.java.template b/jdk/src/share/classes/sun/misc/Version.java.template
+index 32e2586e79..e38541a9f7 100644
+--- a/jdk/src/share/classes/sun/misc/Version.java.template
++++ b/jdk/src/share/classes/sun/misc/Version.java.template
+@@ -23,6 +23,13 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2018. These
++ * modifications are Copyright (c) 2018 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++
+ package sun.misc;
+ import java.io.PrintStream;
+ 
+@@ -44,6 +51,9 @@ public class Version {
+     private static final String java_runtime_version =
+         "@@java_runtime_version@@";
+ 
++    private static final String loongson_runtime_name =
++        "@@loongson_runtime_name@@";
++
+     static {
+         init();
+     }
+@@ -103,7 +113,11 @@ public class Version {
+ 
+         /* Second line: runtime version (ie, libraries). */
+ 
+-        ps.print(java_runtime_name + " (build " + java_runtime_version);
++        ps.print(java_runtime_name);
++        if (loongson_runtime_name.length() > 0) {
++            ps.print(" ("+ loongson_runtime_name +")");
++        }
++        ps.print(" (build " + java_runtime_version);
+ 
+         if (java_profile_name.length() > 0) {
+             // profile name
+diff --git a/jdk/src/solaris/bin/loongarch64/jvm.cfg b/jdk/src/solaris/bin/loongarch64/jvm.cfg
+new file mode 100644
+index 0000000000..42a06755da
+--- /dev/null
++++ b/jdk/src/solaris/bin/loongarch64/jvm.cfg
+@@ -0,0 +1,36 @@
++# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.  Oracle designates this
++# particular file as subject to the "Classpath" exception as provided
++# by Oracle in the LICENSE file that accompanied this code.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++# 
++# List of JVMs that can be used as an option to java, javac, etc.
++# Order is important -- first in this list is the default JVM.
++# NOTE that this both this file and its format are UNSUPPORTED and
++# WILL GO AWAY in a future release.
++#
++# You may also select a JVM in an arbitrary location with the
++# "-XXaltjvm=<jvm_dir>" option, but that too is unsupported
++# and may not be available in a future release.
++#
++-server KNOWN
++-client IGNORE
+diff --git a/jdk/src/solaris/bin/mips64/jvm.cfg b/jdk/src/solaris/bin/mips64/jvm.cfg
+new file mode 100644
+index 0000000000..42a06755da
+--- /dev/null
++++ b/jdk/src/solaris/bin/mips64/jvm.cfg
+@@ -0,0 +1,36 @@
++# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.  Oracle designates this
++# particular file as subject to the "Classpath" exception as provided
++# by Oracle in the LICENSE file that accompanied this code.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++# 
++# List of JVMs that can be used as an option to java, javac, etc.
++# Order is important -- first in this list is the default JVM.
++# NOTE that this both this file and its format are UNSUPPORTED and
++# WILL GO AWAY in a future release.
++#
++# You may also select a JVM in an arbitrary location with the
++# "-XXaltjvm=<jvm_dir>" option, but that too is unsupported
++# and may not be available in a future release.
++#
++-server KNOWN
++-client IGNORE
+diff --git a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
+index d6a026b2cc..b65486023f 100644
+--- a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
++++ b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
+@@ -54,8 +54,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
+         }
+     }
+ }
+diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher
+new file mode 100755
+index 0000000000..66291c7522
+Binary files /dev/null and b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher differ
+diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher
+new file mode 100644
+index 0000000000..5c8385ca12
+Binary files /dev/null and b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher differ
+diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java b/jdk/test/sun/security/pkcs11/PKCS11Test.java
+index 5fc9c605de..9db6a17d66 100644
+--- a/jdk/test/sun/security/pkcs11/PKCS11Test.java
++++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java
+@@ -21,6 +21,11 @@
+  * questions.
+  */
+ 
++ /*
++  * This file has been modified by Loongson Technology in 2022, These
++  * modifications are Copyright (c) 2022, Loongson Technology, and are made
++  * available on the same license terms set forth above.
++  */
+ 
+ // common infrastructure for SunPKCS11 tests
+ 
+@@ -589,6 +594,9 @@ public abstract class PKCS11Test {
+             "/usr/lib64/"});
+         osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"});
+         osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"});
++        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
++        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
++                "/usr/lib64/" });
+         osMap.put("Windows-x86-32", new String[]{
+             PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)});
+         osMap.put("Windows-amd64-64", new String[]{
diff --git a/jemalloc/PKGBUILD b/jemalloc/PKGBUILD
index 2244c662c2..810d59169c 100644
--- a/jemalloc/PKGBUILD
+++ b/jemalloc/PKGBUILD
@@ -15,8 +15,15 @@ makedepends=('clang')
 options=('!lto')
 provides=('libjemalloc.so')
 optdepends=('perl: for jeprof')
-source=("https://github.com/jemalloc/jemalloc/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.bz2")
-sha256sums=('2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa')
+source=("https://github.com/jemalloc/jemalloc/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.bz2"
+    add-loongarch64.patch)
+sha256sums=('2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa'
+            '5e8f375b5d5aec487d65a7681b3574aa0e19927b1d2d9140fc9dba2cf74a961d')
+
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/add-loongarch64.patch"
+}
 
 build() {
   cd $pkgname-$pkgver
@@ -24,6 +31,10 @@ build() {
   # FS#71745: GCC-built jemalloc causes telegram-desktop to crash a lot. The reason is still not clear.
   export CC=clang
   export CXX=clang++
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   ./configure \
     --enable-prof \
diff --git a/jemalloc/add-loongarch64.patch b/jemalloc/add-loongarch64.patch
new file mode 100644
index 0000000000..22af43b82a
--- /dev/null
+++ b/jemalloc/add-loongarch64.patch
@@ -0,0 +1,14 @@
+Index: jemalloc-5.2.1/include/jemalloc/internal/quantum.h
+===================================================================
+--- jemalloc-5.2.1.orig/include/jemalloc/internal/quantum.h
++++ jemalloc-5.2.1/include/jemalloc/internal/quantum.h
+@@ -30,6 +30,9 @@
+ #  ifdef __hppa__
+ #    define LG_QUANTUM		4
+ #  endif
++#  ifdef __loongarch64
++#    define LG_QUANTUM		4
++#  endif
+ #  ifdef __m68k__
+ #    define LG_QUANTUM		3
+ #  endif
diff --git a/jless/PKGBUILD b/jless/PKGBUILD
index 353e2b1317..a9a35b7ffc 100644
--- a/jless/PKGBUILD
+++ b/jless/PKGBUILD
@@ -25,7 +25,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/js102/PKGBUILD b/js102/PKGBUILD
index 58188c24b1..93d1a698d9 100644
--- a/js102/PKGBUILD
+++ b/js102/PKGBUILD
@@ -31,9 +31,11 @@ options=(!lto)
 _relver=${pkgver}esr
 source=(
   https://archive.mozilla.org/pub/firefox/releases/$_relver/source/firefox-$_relver.source.tar.xz{,.asc}
+  js102-loong64-jit.patch
 )
 b2sums=('b70727fa91d0d270673374bebb4745b87f6194191c1c9415547d772811a4a85f79a97e8985877eb5c9beef43fe15bef574172da35935e7024a947919ec11d883'
-        'SKIP')
+        'SKIP'
+        '2740b5d321f585cd60a038c1b1350c9e08c6559cc02a331ce7bc3d228bc785b066f795c8cbd2e43d46fe67687df420e20f4bb4bce40a2a2f962bc812aa170d01')
 validpgpkeys=(
   14F26682D0916CDD81E37B6D61B7B526D98F0353  # Mozilla Software Releases <release@mozilla.com>
 )
@@ -44,6 +46,7 @@ COMPRESSZST+=(--long)
 prepare() {
   mkdir mozbuild
   cd firefox-$pkgver
+  patch -p1 -i $srcdir/js102-loong64-jit.patch
 
   cat >../mozconfig <<END
 ac_add_options --enable-application=js
@@ -52,9 +55,9 @@ mk_add_options MOZ_OBJDIR=${PWD@Q}/obj
 ac_add_options --prefix=/usr
 ac_add_options --enable-release
 ac_add_options --enable-hardening
-ac_add_options --enable-optimize
-ac_add_options --enable-rust-simd
-ac_add_options --enable-linker=lld
+#ac_add_options --enable-optimize
+#ac_add_options --enable-rust-simd
+#ac_add_options --enable-linker=lld
 ac_add_options --disable-bootstrap
 ac_add_options --disable-debug
 ac_add_options --disable-jemalloc
@@ -80,42 +83,46 @@ build() {
   export MOZBUILD_STATE_PATH="$srcdir/mozbuild"
   export MACH_USE_SYSTEM_PYTHON=1
 
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CFLAGS/-mlsx /}
+
   # Do 3-tier PGO
   echo "Building instrumented JS..."
   cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-profile-generate=cross
+#ac_add_options --enable-profile-generate=cross
 END
   ./mach build
 
-  echo "Profiling instrumented JS..."
-  (
-    local js="$PWD/obj/dist/bin/js"
-    export LLVM_PROFILE_FILE="$PWD/js-%p-%m.profraw"
-
-    cd js/src/octane
-    "$js" run.js
-
-    cd ../../../third_party/webkit/PerformanceTests/ARES-6
-    "$js" cli.js
-
-    cd ../SunSpider/sunspider-0.9.1
-    "$js" sunspider-standalone-driver.js
-  )
-
-  llvm-profdata merge -o merged.profdata *.profraw
-
-  stat -c "Profile data found (%s bytes)" merged.profdata
-  test -s merged.profdata
-
-  echo "Removing instrumented JS..."
-  ./mach clobber
-
-  echo "Building optimized JS..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-lto=cross
-ac_add_options --enable-profile-use=cross
-ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
-END
+#  echo "Profiling instrumented JS..."
+#  (
+#    local js="$PWD/obj/dist/bin/js"
+#    export LLVM_PROFILE_FILE="$PWD/js-%p-%m.profraw"
+#
+#    cd js/src/octane
+#    "$js" run.js
+#
+#    cd ../../../third_party/webkit/PerformanceTests/ARES-6
+#    "$js" cli.js
+#
+#    cd ../SunSpider/sunspider-0.9.1
+#    "$js" sunspider-standalone-driver.js
+#  )
+#
+#  llvm-profdata merge -o merged.profdata *.profraw
+#
+#  stat -c "Profile data found (%s bytes)" merged.profdata
+#  test -s merged.profdata
+#
+#  echo "Removing instrumented JS..."
+#  ./mach clobber
+#
+#  echo "Building optimized JS..."
+#  cat >.mozconfig ../mozconfig - <<END
+#ac_add_options --enable-lto=cross
+#ac_add_options --enable-profile-use=cross
+#ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
+#END
   ./mach build
 }
 
diff --git a/js102/js102-loong64-jit.patch b/js102/js102-loong64-jit.patch
new file mode 100644
index 0000000000..c6fe602fde
--- /dev/null
+++ b/js102/js102-loong64-jit.patch
@@ -0,0 +1,72 @@
+
+# HG changeset patch
+# User Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
+# Date 1665408155 0
+# Node ID d9e0d2d8b3a89bdee2a55a0bdab9adcd108eb253
+# Parent  f0bbec2617db346c3032e870fc571970728ae220
+Bug 1792981 - [loong64] Enable JIT compiler of loong64 port by default. r=jandem
+
+Fix a build with JIT issue on native loongarch64 machine, and enable JIT by default.
+
+Differential Revision: https://phabricator.services.mozilla.com/D158397
+
+diff --git a/js/moz.configure b/js/moz.configure
+--- a/js/moz.configure
++++ b/js/moz.configure
+@@ -265,16 +265,18 @@ def jit_codegen(jit_enabled, simulator, 
+ 
+     if simulator:
+         return simulator
+ 
+     if target.cpu == "aarch64":
+         return namespace(arm64=True)
+     elif target.cpu == "x86_64":
+         return namespace(x64=True)
++    elif target.cpu == "loongarch64":
++        return namespace(loong64=True)
+ 
+     return namespace(**{str(target.cpu): True})
+ 
+ 
+ set_config("JS_CODEGEN_NONE", jit_codegen.none)
+ set_config("JS_CODEGEN_ARM", jit_codegen.arm)
+ set_config("JS_CODEGEN_ARM64", jit_codegen.arm64)
+ set_config("JS_CODEGEN_MIPS32", jit_codegen.mips32)
+diff --git a/js/src/wasm/WasmSignalHandlers.cpp b/js/src/wasm/WasmSignalHandlers.cpp
+--- a/js/src/wasm/WasmSignalHandlers.cpp
++++ b/js/src/wasm/WasmSignalHandlers.cpp
+@@ -158,10 +158,10 @@
+ #      define R32_sig(p) ((p)->uc_mcontext.gp_regs[32])
+ #    endif
+ #    if defined(__linux__) && defined(__loongarch__)
+-#      define EPC_sig(p) ((p)->uc_mcontext.pc)
+-#      define RRA_sig(p) ((p)->uc_mcontext.gregs[1])
+-#      define RSP_sig(p) ((p)->uc_mcontext.gregs[3])
+-#      define RFP_sig(p) ((p)->uc_mcontext.gregs[22])
++#      define EPC_sig(p) ((p)->uc_mcontext.__pc)
++#      define RRA_sig(p) ((p)->uc_mcontext.__gregs[1])
++#      define R03_sig(p) ((p)->uc_mcontext.__gregs[3])
++#      define RFP_sig(p) ((p)->uc_mcontext.__gregs[22])
+ #    endif
+ #  elif defined(__NetBSD__)
+ #    define EIP_sig(p) ((p)->uc_mcontext.__gregs[_REG_EIP])
+@@ -403,17 +403,17 @@ struct macos_aarch64_context {
+ #  elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
+       defined(__PPC64LE__)
+ #    define PC_sig(p) R32_sig(p)
+ #    define SP_sig(p) R01_sig(p)
+ #    define FP_sig(p) R01_sig(p)
+ #  elif defined(__loongarch__)
+ #    define PC_sig(p) EPC_sig(p)
+ #    define FP_sig(p) RFP_sig(p)
+-#    define SP_sig(p) RSP_sig(p)
++#    define SP_sig(p) R03_sig(p)
+ #    define LR_sig(p) RRA_sig(p)
+ #  endif
+ 
+ static void SetContextPC(CONTEXT* context, uint8_t* pc) {
+ #  ifdef PC_sig
+   *reinterpret_cast<uint8_t**>(&PC_sig(context)) = pc;
+ #  else
+   MOZ_CRASH();
+
diff --git a/js115/PKGBUILD b/js115/PKGBUILD
index 08b7ddc73d..903e6579df 100644
--- a/js115/PKGBUILD
+++ b/js115/PKGBUILD
@@ -16,7 +16,7 @@ depends=(
 makedepends=(
   autoconf2.13
   clang
-  lld
+#  lld
   llvm
   python
   rust
@@ -57,11 +57,12 @@ ac_add_options --enable-release
 ac_add_options --enable-hardening
 ac_add_options --enable-optimize
 ac_add_options --enable-rust-simd
-ac_add_options --enable-linker=lld
+ac_add_options --enable-linker=bfd
 ac_add_options --disable-bootstrap
 ac_add_options --disable-debug
 ac_add_options --disable-jemalloc
 ac_add_options --disable-strip
+ac_add_options --disable-unified-build
 
 # System libraries
 ac_add_options --with-system-zlib
@@ -82,11 +83,18 @@ build() {
   export MOZBUILD_STATE_PATH="$srcdir/mozbuild"
   export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)"
   export MOZ_NOSPAM=1
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   # malloc_usable_size is used in various parts of the codebase
   CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
   CXXFLAGS="${CXXFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
 
+  # LTO needs more open files
+  ulimit -n 4096
   # Greatly reduce size of relocation tables
   # https://gitlab.archlinux.org/archlinux/rfcs/-/blob/master/rfcs/0023-pack-relative-relocs.rst
   LDFLAGS+=" -Wl,-z,pack-relative-relocs"
@@ -94,40 +102,40 @@ build() {
   # Do 3-tier PGO
   echo "Building instrumented JS..."
   cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-profile-generate=cross
+#ac_add_options --enable-profile-generate=cross
 END
   ./mach build
 
-  echo "Profiling instrumented JS..."
-  (
-    local js="$PWD/obj/dist/bin/js"
-    export LLVM_PROFILE_FILE="$PWD/js-%p-%m.profraw"
-
-    cd js/src/octane
-    "$js" run.js
-
-    cd ../../../third_party/webkit/PerformanceTests/ARES-6
-    "$js" cli.js
-
-    cd ../SunSpider/sunspider-0.9.1
-    "$js" sunspider-standalone-driver.js
-  )
-
-  llvm-profdata merge -o merged.profdata *.profraw
-
-  stat -c "Profile data found (%s bytes)" merged.profdata
-  test -s merged.profdata
-
-  echo "Removing instrumented JS..."
-  ./mach clobber
-
-  echo "Building optimized JS..."
-  cat >.mozconfig ../mozconfig - <<END
-ac_add_options --enable-lto=cross,full
-ac_add_options --enable-profile-use=cross
-ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
-END
-  ./mach build
+#  echo "Profiling instrumented JS..."
+#  (
+#    local js="$PWD/obj/dist/bin/js"
+#    export LLVM_PROFILE_FILE="$PWD/js-%p-%m.profraw"
+#
+#    cd js/src/octane
+#    "$js" run.js
+#
+#    cd ../../../third_party/webkit/PerformanceTests/ARES-6
+#    "$js" cli.js
+#
+#    cd ../SunSpider/sunspider-0.9.1
+#    "$js" sunspider-standalone-driver.js
+#  )
+#
+#  llvm-profdata merge -o merged.profdata *.profraw
+#
+#  stat -c "Profile data found (%s bytes)" merged.profdata
+#  test -s merged.profdata
+#
+#  echo "Removing instrumented JS..."
+#  ./mach clobber
+#
+#  echo "Building optimized JS..."
+#  cat >.mozconfig ../mozconfig - <<END
+#ac_add_options --enable-lto=cross,full
+#ac_add_options --enable-profile-use=cross
+#ac_add_options --with-pgo-profile-path=${PWD@Q}/merged.profdata
+#END
+#  ./mach build
 }
 
 check() {
diff --git a/js91/mozjs-la64.patch b/js91/mozjs-la64.patch
new file mode 100644
index 0000000000..903c7063ad
--- /dev/null
+++ b/js91/mozjs-la64.patch
@@ -0,0 +1,98 @@
+diff --git a/build/gyp_base.mozbuild b/build/gyp_base.mozbuild
+index 83cd272e10..8dc50190d0 100644
+--- a/build/gyp_base.mozbuild
++++ b/build/gyp_base.mozbuild
+@@ -33,6 +33,7 @@ arches = {
+     'x86': 'ia32',
+     'aarch64': 'arm64',
+     'ppc64': 'ppc64le' if CONFIG['TARGET_ENDIANNESS'] == 'little' else 'ppc64',
++    'loongarch64': 'loongarch64',
+ }
+ 
+ gyp_vars['host_arch'] = arches.get(CONFIG['HOST_CPU_ARCH'], CONFIG['HOST_CPU_ARCH'])
+diff --git a/build/moz.configure/init.configure b/build/moz.configure/init.configure
+index 3a164c6558..a269e47981 100644
+--- a/build/moz.configure/init.configure
++++ b/build/moz.configure/init.configure
+@@ -774,6 +774,9 @@ def split_triplet(triplet, allow_msvc=False, allow_wasi=False):
+     elif cpu == "wasm32" and allow_wasi:
+         canonical_cpu = "wasm32"
+         endianness = "little"
++    elif cpu.startswith('loongarch64'):
++        canonical_cpu = 'loongarch64'
++        endianness = 'little'
+     else:
+         raise ValueError("Unknown CPU type: %s" % cpu)
+ 
+diff --git a/intl/icu/source/i18n/double-conversion-utils.h b/intl/icu/source/i18n/double-conversion-utils.h
+index c937463647..56753f84f5 100644
+--- a/intl/icu/source/i18n/double-conversion-utils.h
++++ b/intl/icu/source/i18n/double-conversion-utils.h
+@@ -123,6 +123,7 @@ int main(int argc, char** argv) {
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+     defined(__SH4__) || defined(__alpha__) || \
++    defined(__loongarch64) || \
+     defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\
+     defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \
+     defined(__riscv) || defined(__e2k__) || \
+diff --git a/js/src/jit/AtomicOperations.h b/js/src/jit/AtomicOperations.h
+index f4a5727d05..ac124b3169 100644
+--- a/js/src/jit/AtomicOperations.h
++++ b/js/src/jit/AtomicOperations.h
+@@ -382,7 +382,8 @@ constexpr inline bool AtomicOperations::isLockfreeJS(int32_t size) {
+     defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
+     defined(__PPC64LE__) || defined(__alpha__) || defined(__hppa__) ||  \
+     defined(__sh__) || defined(__s390__) || defined(__s390x__) ||       \
+-    defined(__m68k__) || defined(__riscv) || defined(__wasi__)
++    defined(__m68k__) || defined(__riscv) || defined(__wasi__) || \
++    defined(__loongarch64)
+ #  include "jit/shared/AtomicOperations-feeling-lucky.h"
+ #else
+ #  error "No AtomicOperations support provided for this platform"
+diff --git a/mfbt/double-conversion/double-conversion/utils.h b/mfbt/double-conversion/double-conversion/utils.h
+index 6022132e2b..dd4a392161 100644
+--- a/mfbt/double-conversion/double-conversion/utils.h
++++ b/mfbt/double-conversion/double-conversion/utils.h
+@@ -107,6 +107,7 @@ int main(int argc, char** argv) {
+ #if defined(_M_X64) || defined(__x86_64__) || \
+     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
+     defined(__hppa__) || defined(__ia64__) || \
++    defined(__loongarch__) || \
+     defined(__mips__) || \
+     defined(__nios2__) || defined(__ghs) || \
+     defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+diff --git a/mfbt/tests/TestPoisonArea.cpp b/mfbt/tests/TestPoisonArea.cpp
+index 96af108037..89a02356ee 100644
+--- a/mfbt/tests/TestPoisonArea.cpp
++++ b/mfbt/tests/TestPoisonArea.cpp
+@@ -147,6 +147,9 @@
+ #elif defined __hppa
+ #  define RETURN_INSTR 0xe840c002 /* bv,n r0(rp) */
+ 
++#elif defined __loongarch64
++#  define RETURN_INSTR 0x4c000020 /* jirl */
++
+ #elif defined __mips
+ #  define RETURN_INSTR 0x03e00008 /* jr ra */
+ 
+diff --git a/python/mozbuild/mozbuild/configure/constants.py b/python/mozbuild/mozbuild/configure/constants.py
+index 9f7a977f61..189953c1ec 100644
+--- a/python/mozbuild/mozbuild/configure/constants.py
++++ b/python/mozbuild/mozbuild/configure/constants.py
+@@ -47,6 +47,7 @@ CPU_bitness = {
+     "arm": 32,
+     "hppa": 32,
+     "ia64": 64,
++    "loongarch64": 64,
+     "m68k": 32,
+     "mips32": 32,
+     "mips64": 64,
+@@ -91,6 +92,7 @@ CPU_preprocessor_checks = OrderedDict(
+         ("hppa", "__hppa__"),
+         ("sparc64", "__sparc__ && __arch64__"),
+         ("sparc", "__sparc__"),
++        ('loongarch64', '__loongarch64'),
+         ("m68k", "__m68k__"),
+         ("mips64", "__mips64"),
+         ("mips32", "__mips__"),
diff --git a/js91/spidermonkey-91-add-loongarch.patch b/js91/spidermonkey-91-add-loongarch.patch
new file mode 100644
index 0000000000..2f64457855
--- /dev/null
+++ b/js91/spidermonkey-91-add-loongarch.patch
@@ -0,0 +1,686 @@
+diff -ur a/build/moz.configure/init.configure b/build/moz.configure/init.configure
+--- a/build/moz.configure/init.configure	2022-07-02 17:03:43.672508000 +0800
++++ b/build/moz.configure/init.configure	2022-07-02 17:06:21.570508000 +0800
+@@ -762,6 +762,9 @@
+     elif cpu in ("mips64", "mips64el"):
+         canonical_cpu = "mips64"
+         endianness = "little" if "el" in cpu else "big"
++    elif cpu in ('loongarch64', 'loongarch64'):
++        canonical_cpu = 'loongarch64'
++        endianness = 'little'
+     elif cpu.startswith("aarch64"):
+         canonical_cpu = "aarch64"
+         endianness = "little"
+diff -ur a/config/check_macroassembler_style.py b/config/check_macroassembler_style.py
+--- a/config/check_macroassembler_style.py	2022-07-02 17:03:43.704508000 +0800
++++ b/config/check_macroassembler_style.py	2022-07-02 17:06:41.562508000 +0800
+@@ -29,7 +29,7 @@
+ import sys
+ 
+ architecture_independent = set(["generic"])
+-all_unsupported_architectures_names = set(["mips32", "mips64", "mips_shared"])
++all_unsupported_architectures_names = set(["loongarch64", "mips32", "mips64", "mips_shared"])
+ all_architecture_names = set(["x86", "x64", "arm", "arm64"])
+ all_shared_architecture_names = set(["x86_shared", "arm", "arm64"])
+ 
+diff -ur a/js/moz.configure b/js/moz.configure
+--- a/js/moz.configure	2022-07-02 17:03:31.405508000 +0800
++++ b/js/moz.configure	2022-07-02 17:10:21.443508000 +0800
+@@ -142,7 +142,7 @@
+ # =======================================================
+ @depends(target)
+ def jit_default(target):
+-    if target.cpu in ("x86", "x86_64", "arm", "aarch64", "mips32", "mips64"):
++    if target.cpu in ("x86", "x86_64", "arm", "aarch64", "mips32", "mips64", "loongarch64"):
+         return True
+     return False
+ 
+@@ -162,7 +162,7 @@
+ # =======================================================
+ option(
+     "--enable-simulator",
+-    choices=("arm", "arm64", "mips32", "mips64"),
++    choices=("arm", "arm64", "mips32", "mips64", "loongarch64"),
+     nargs=1,
+     help="Enable a JIT code simulator for the specified architecture",
+ )
+@@ -179,7 +179,7 @@
+         if target.cpu != "x86":
+             die("The %s simulator only works on x86." % sim_cpu)
+ 
+-    if sim_cpu in ("arm64", "mips64"):
++    if sim_cpu in ("arm64", "mips64", "loongarch64"):
+         if target.cpu != "x86_64":
+             die("The %s simulator only works on x86-64." % sim_cpu)
+ 
+@@ -219,6 +219,7 @@
+ set_config("JS_CODEGEN_ARM64", jit_codegen.arm64)
+ set_config("JS_CODEGEN_MIPS32", jit_codegen.mips32)
+ set_config("JS_CODEGEN_MIPS64", jit_codegen.mips64)
++set_config('JS_CODEGEN_LOONGARCH64', jit_codegen.loongarch64)
+ set_config("JS_CODEGEN_X86", jit_codegen.x86)
+ set_config("JS_CODEGEN_X64", jit_codegen.x64)
+ set_define("JS_CODEGEN_NONE", jit_codegen.none)
+@@ -226,6 +227,7 @@
+ set_define("JS_CODEGEN_ARM64", jit_codegen.arm64)
+ set_define("JS_CODEGEN_MIPS32", jit_codegen.mips32)
+ set_define("JS_CODEGEN_MIPS64", jit_codegen.mips64)
++set_define('JS_CODEGEN_LOONGARCH64', jit_codegen.loongarch64)
+ set_define("JS_CODEGEN_X86", jit_codegen.x86)
+ set_define("JS_CODEGEN_X64", jit_codegen.x64)
+ 
+diff -ur a/js/src/jit/AtomicOperations.h b/js/src/jit/AtomicOperations.h
+--- a/js/src/jit/AtomicOperations.h	2022-07-02 17:03:28.377508000 +0800
++++ b/js/src/jit/AtomicOperations.h	2022-07-02 17:04:48.109508000 +0800
+@@ -372,7 +372,7 @@
+ #  else
+ #    include "jit/shared/AtomicOperations-feeling-lucky.h"
+ #  endif
+-#elif defined(__mips__)
++#elif defined(__mips__) || defined(__loongarch__)
+ #  if defined(__clang__) || defined(__GNUC__)
+ #    include "jit/mips-shared/AtomicOperations-mips-shared.h"
+ #  else
+diff -ur a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h
+--- a/js/src/jit/MacroAssembler.h	2022-07-02 17:03:28.379508000 +0800
++++ b/js/src/jit/MacroAssembler.h	2022-07-02 17:17:58.462508000 +0800
+@@ -92,7 +92,7 @@
+ //   }
+ //   ////}}} check_macroassembler_style
+ 
+-#define ALL_ARCH mips32, mips64, arm, arm64, x86, x64
++#define ALL_ARCH mips32, mips64, loongarch64, arm, arm64, x86, x64
+ #define ALL_SHARED_ARCH arm, arm64, x86_shared, mips_shared
+ 
+ // * How this macro works:
+@@ -138,6 +138,7 @@
+ #define DEFINED_ON_arm64
+ #define DEFINED_ON_mips32
+ #define DEFINED_ON_mips64
++#define DEFINED_ON_loongarch64
+ #define DEFINED_ON_mips_shared
+ #define DEFINED_ON_none
+ 
+@@ -168,6 +169,11 @@
+ #  define DEFINED_ON_mips64 define
+ #  undef DEFINED_ON_mips_shared
+ #  define DEFINED_ON_mips_shared define
++#elif defined(JS_CODEGEN_LOONGARCH64)
++# undef DEFINED_ON_loongarch64
++# define DEFINED_ON_loongarch64 define
++# undef DEFINED_ON_loongarch_shared
++# define DEFINED_ON_loongarch_shared define
+ #elif defined(JS_CODEGEN_NONE)
+ #  undef DEFINED_ON_none
+ #  define DEFINED_ON_none crash
+@@ -487,7 +493,7 @@
+       DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
+ 
+   void PushRegsInMask(LiveRegisterSet set)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+   void PushRegsInMask(LiveGeneralRegisterSet set);
+ 
+   // Like PushRegsInMask, but instead of pushing the registers, store them to
+@@ -498,7 +504,7 @@
+   // must point to either the lowest address in the save area, or some address
+   // below that.
+   void storeRegsInMask(LiveRegisterSet set, Address dest, Register scratch)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   void PopRegsInMask(LiveRegisterSet set);
+   void PopRegsInMask(LiveGeneralRegisterSet set);
+@@ -1078,10 +1084,10 @@
+ 
+   inline void mul64(const Operand& src, const Register64& dest) DEFINED_ON(x64);
+   inline void mul64(const Operand& src, const Register64& dest,
+-                    const Register temp) DEFINED_ON(x64, mips64);
++                    const Register temp) DEFINED_ON(x64, mips64, loongarch64);
+   inline void mul64(Imm64 imm, const Register64& dest) PER_ARCH;
+   inline void mul64(Imm64 imm, const Register64& dest, const Register temp)
+-      DEFINED_ON(x86, x64, arm, mips32, mips64);
++      DEFINED_ON(x86, x64, arm, mips32, mips64,loongarch64);
+   inline void mul64(const Register64& src, const Register64& dest,
+                     const Register temp) PER_ARCH;
+   inline void mul64(const Register64& src1, const Register64& src2,
+@@ -1347,7 +1353,7 @@
+ 
+   template <typename T1, typename T2>
+   inline void cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest)
+-      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64);
++      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64);
+ 
+   template <typename T1, typename T2>
+   inline void cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) PER_ARCH;
+@@ -1694,7 +1700,7 @@
+   inline void branchTestInt32(Condition cond, Register tag,
+                               Label* label) PER_SHARED_ARCH;
+   inline void branchTestDouble(Condition cond, Register tag, Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+   inline void branchTestNumber(Condition cond, Register tag,
+                                Label* label) PER_SHARED_ARCH;
+   inline void branchTestBoolean(Condition cond, Register tag,
+@@ -1726,7 +1732,7 @@
+                                   Label* label) PER_SHARED_ARCH;
+   inline void branchTestUndefined(Condition cond, const ValueOperand& value,
+                                   Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestInt32(Condition cond, const Address& address,
+                               Label* label) PER_SHARED_ARCH;
+@@ -1734,7 +1740,7 @@
+                               Label* label) PER_SHARED_ARCH;
+   inline void branchTestInt32(Condition cond, const ValueOperand& value,
+                               Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestDouble(Condition cond, const Address& address,
+                                Label* label) PER_SHARED_ARCH;
+@@ -1742,11 +1748,11 @@
+                                Label* label) PER_SHARED_ARCH;
+   inline void branchTestDouble(Condition cond, const ValueOperand& value,
+                                Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestNumber(Condition cond, const ValueOperand& value,
+                                Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestBoolean(Condition cond, const Address& address,
+                                 Label* label) PER_SHARED_ARCH;
+@@ -1754,7 +1760,7 @@
+                                 Label* label) PER_SHARED_ARCH;
+   inline void branchTestBoolean(Condition cond, const ValueOperand& value,
+                                 Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestString(Condition cond, const Address& address,
+                                Label* label) PER_SHARED_ARCH;
+@@ -1762,7 +1768,7 @@
+                                Label* label) PER_SHARED_ARCH;
+   inline void branchTestString(Condition cond, const ValueOperand& value,
+                                Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestSymbol(Condition cond, const Address& address,
+                                Label* label) PER_SHARED_ARCH;
+@@ -1770,7 +1776,7 @@
+                                Label* label) PER_SHARED_ARCH;
+   inline void branchTestSymbol(Condition cond, const ValueOperand& value,
+                                Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestBigInt(Condition cond, const Address& address,
+                                Label* label) PER_SHARED_ARCH;
+@@ -1786,7 +1792,7 @@
+                              Label* label) PER_SHARED_ARCH;
+   inline void branchTestNull(Condition cond, const ValueOperand& value,
+                              Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   // Clobbers the ScratchReg on x64.
+   inline void branchTestObject(Condition cond, const Address& address,
+@@ -1795,7 +1801,7 @@
+                                Label* label) PER_SHARED_ARCH;
+   inline void branchTestObject(Condition cond, const ValueOperand& value,
+                                Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestGCThing(Condition cond, const Address& address,
+                                 Label* label) PER_SHARED_ARCH;
+@@ -1806,7 +1812,7 @@
+ 
+   inline void branchTestPrimitive(Condition cond, const ValueOperand& value,
+                                   Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestMagic(Condition cond, const Address& address,
+                               Label* label) PER_SHARED_ARCH;
+@@ -1815,7 +1821,7 @@
+   template <class L>
+   inline void branchTestMagic(Condition cond, const ValueOperand& value,
+                               L label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   inline void branchTestMagic(Condition cond, const Address& valaddr,
+                               JSWhyMagic why, Label* label) PER_ARCH;
+@@ -1833,17 +1839,17 @@
+   // The type of the value should match the type of the method.
+   inline void branchTestInt32Truthy(bool truthy, const ValueOperand& value,
+                                     Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+   inline void branchTestDoubleTruthy(bool truthy, FloatRegister reg,
+                                      Label* label) PER_SHARED_ARCH;
+   inline void branchTestBooleanTruthy(bool truthy, const ValueOperand& value,
+                                       Label* label) PER_ARCH;
+   inline void branchTestStringTruthy(bool truthy, const ValueOperand& value,
+                                      Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+   inline void branchTestBigIntTruthy(bool truthy, const ValueOperand& value,
+                                      Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   // Create an unconditional branch to the address given as argument.
+   inline void branchToComputedAddress(const BaseIndex& address) PER_ARCH;
+@@ -2007,10 +2013,10 @@
+   // ========================================================================
+   // Memory access primitives.
+   inline void storeUncanonicalizedDouble(FloatRegister src, const Address& dest)
+-      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64);
++      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64);
+   inline void storeUncanonicalizedDouble(FloatRegister src,
+                                          const BaseIndex& dest)
+-      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64);
++      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64);
+   inline void storeUncanonicalizedDouble(FloatRegister src, const Operand& dest)
+       DEFINED_ON(x86_shared);
+ 
+@@ -2024,10 +2030,10 @@
+ 
+   inline void storeUncanonicalizedFloat32(FloatRegister src,
+                                           const Address& dest)
+-      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64);
++      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64);
+   inline void storeUncanonicalizedFloat32(FloatRegister src,
+                                           const BaseIndex& dest)
+-      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64);
++      DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64);
+   inline void storeUncanonicalizedFloat32(FloatRegister src,
+                                           const Operand& dest)
+       DEFINED_ON(x86_shared);
+@@ -3475,10 +3481,10 @@
+ 
+   // temp required on x86 and x64; must be undefined on mips64.
+   void convertUInt64ToFloat32(Register64 src, FloatRegister dest, Register temp)
+-      DEFINED_ON(arm64, mips64, x64, x86);
++      DEFINED_ON(arm64, mips64, loongarch64, x64, x86);
+ 
+   void convertInt64ToFloat32(Register64 src, FloatRegister dest)
+-      DEFINED_ON(arm64, mips64, x64, x86);
++      DEFINED_ON(arm64, mips64, loongarch64, x64, x86);
+ 
+   bool convertUInt64ToDoubleNeedsTemp() PER_ARCH;
+ 
+@@ -3519,19 +3525,19 @@
+ 
+   void wasmBoundsCheck32(Condition cond, Register index,
+                          Register boundsCheckLimit, Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   void wasmBoundsCheck32(Condition cond, Register index,
+                          Address boundsCheckLimit, Label* label)
+-      DEFINED_ON(arm, arm64, mips32, mips64, x86_shared);
++      DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared);
+ 
+   void wasmBoundsCheck64(Condition cond, Register64 index,
+                          Register64 boundsCheckLimit, Label* label)
+-      DEFINED_ON(arm64, mips64, x64);
++      DEFINED_ON(arm64, mips64, loongarch64, x64);
+ 
+   void wasmBoundsCheck64(Condition cond, Register64 index,
+                          Address boundsCheckLimit, Label* label)
+-      DEFINED_ON(arm64, mips64, x64);
++      DEFINED_ON(arm64, mips64, loongarch64, x64);
+ 
+   // Each wasm load/store instruction appends its own wasm::Trap::OutOfBounds.
+   void wasmLoad(const wasm::MemoryAccessDesc& access, Operand srcAddr,
+@@ -3554,13 +3560,13 @@
+       DEFINED_ON(arm, mips_shared);
+   void wasmLoadI64(const wasm::MemoryAccessDesc& access, Register memoryBase,
+                    Register ptr, Register ptrScratch, Register64 output)
+-      DEFINED_ON(arm, mips32, mips64);
++      DEFINED_ON(arm, mips32, mips64, loongarch64);
+   void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value,
+                  Register memoryBase, Register ptr, Register ptrScratch)
+       DEFINED_ON(arm, mips_shared);
+   void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value,
+                     Register memoryBase, Register ptr, Register ptrScratch)
+-      DEFINED_ON(arm, mips32, mips64);
++      DEFINED_ON(arm, mips32, mips64, loongarch64);
+ 
+   // These accept general memoryBase + ptr + offset (in `access`); the offset is
+   // always smaller than the guard region.  They will insert an additional add
+@@ -3580,14 +3586,14 @@
+   void wasmUnalignedLoad(const wasm::MemoryAccessDesc& access,
+                          Register memoryBase, Register ptr, Register ptrScratch,
+                          Register output, Register tmp)
+-      DEFINED_ON(mips32, mips64);
++      DEFINED_ON(mips32, mips64, loongarch64);
+ 
+   // MIPS: `ptr` will always be updated.
+   void wasmUnalignedLoadFP(const wasm::MemoryAccessDesc& access,
+                            Register memoryBase, Register ptr,
+                            Register ptrScratch, FloatRegister output,
+                            Register tmp1, Register tmp2, Register tmp3)
+-      DEFINED_ON(mips32, mips64);
++      DEFINED_ON(mips32, mips64, loongarch64);
+ 
+   // `ptr` will always be updated.
+   void wasmUnalignedLoadI64(const wasm::MemoryAccessDesc& access,
+@@ -3599,19 +3605,19 @@
+   void wasmUnalignedStore(const wasm::MemoryAccessDesc& access, Register value,
+                           Register memoryBase, Register ptr,
+                           Register ptrScratch, Register tmp)
+-      DEFINED_ON(mips32, mips64);
++      DEFINED_ON(mips32, mips64, loongarch64);
+ 
+   // `ptr` will always be updated.
+   void wasmUnalignedStoreFP(const wasm::MemoryAccessDesc& access,
+                             FloatRegister floatValue, Register memoryBase,
+                             Register ptr, Register ptrScratch, Register tmp)
+-      DEFINED_ON(mips32, mips64);
++      DEFINED_ON(mips32, mips64, loongarch64);
+ 
+   // `ptr` will always be updated.
+   void wasmUnalignedStoreI64(const wasm::MemoryAccessDesc& access,
+                              Register64 value, Register memoryBase,
+                              Register ptr, Register ptrScratch, Register tmp)
+-      DEFINED_ON(mips32, mips64);
++      DEFINED_ON(mips32, mips64, loongarch64);
+ 
+   // wasm specific methods, used in both the wasm baseline compiler and ion.
+ 
+@@ -3642,11 +3648,11 @@
+   void wasmTruncateDoubleToInt64(FloatRegister input, Register64 output,
+                                  bool isSaturating, Label* oolEntry,
+                                  Label* oolRejoin, FloatRegister tempDouble)
+-      DEFINED_ON(arm64, x86, x64, mips64);
++      DEFINED_ON(arm64, x86, x64, mips64, loongarch64);
+   void wasmTruncateDoubleToUInt64(FloatRegister input, Register64 output,
+                                   bool isSaturating, Label* oolEntry,
+                                   Label* oolRejoin, FloatRegister tempDouble)
+-      DEFINED_ON(arm64, x86, x64, mips64);
++      DEFINED_ON(arm64, x86, x64, mips64, loongarch64);
+   void oolWasmTruncateCheckF64ToI64(FloatRegister input, Register64 output,
+                                     TruncFlags flags, wasm::BytecodeOffset off,
+                                     Label* rejoin)
+@@ -3655,11 +3661,11 @@
+   void wasmTruncateFloat32ToInt64(FloatRegister input, Register64 output,
+                                   bool isSaturating, Label* oolEntry,
+                                   Label* oolRejoin, FloatRegister tempDouble)
+-      DEFINED_ON(arm64, x86, x64, mips64);
++      DEFINED_ON(arm64, x86, x64, mips64, loongarch64);
+   void wasmTruncateFloat32ToUInt64(FloatRegister input, Register64 output,
+                                    bool isSaturating, Label* oolEntry,
+                                    Label* oolRejoin, FloatRegister tempDouble)
+-      DEFINED_ON(arm64, x86, x64, mips64);
++      DEFINED_ON(arm64, x86, x64, mips64, loongarch64);
+   void oolWasmTruncateCheckF32ToI64(FloatRegister input, Register64 output,
+                                     TruncFlags flags, wasm::BytecodeOffset off,
+                                     Label* rejoin)
+@@ -4808,7 +4814,7 @@
+   template <typename T>
+   inline void addStackPtrTo(T t);
+ 
+-  void subFromStackPtr(Imm32 imm32) DEFINED_ON(mips32, mips64, arm, x86, x64);
++  void subFromStackPtr(Imm32 imm32) DEFINED_ON(mips32, mips64, loongarch64, arm, x86, x64);
+   void subFromStackPtr(Register reg);
+ 
+   template <typename T>
+diff -ur a/mfbt/double-conversion/double-conversion/utils.h b/mfbt/double-conversion/double-conversion/utils.h
+--- a/mfbt/double-conversion/double-conversion/utils.h	2022-07-02 17:03:33.801508000 +0800
++++ b/mfbt/double-conversion/double-conversion/utils.h	2022-07-02 17:18:24.988508000 +0800
+@@ -107,7 +107,7 @@
+ #if defined(_M_X64) || defined(__x86_64__) || \
+     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
+     defined(__hppa__) || defined(__ia64__) || \
+-    defined(__mips__) || \
++    defined(__mips__) || defined(__loongarch__) || defined(__loongarch64) || \
+     defined(__nios2__) || defined(__ghs) || \
+     defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+diff -ur a/mfbt/tests/TestPoisonArea.cpp b/mfbt/tests/TestPoisonArea.cpp
+--- a/mfbt/tests/TestPoisonArea.cpp	2022-07-02 17:03:33.809508000 +0800
++++ b/mfbt/tests/TestPoisonArea.cpp	2022-07-02 17:04:48.113508000 +0800
+@@ -175,6 +175,9 @@
+ #  define RETURN_INSTR _return_instr
+ #  define RETURN_INSTR_TYPE ia64_instr
+ 
++#elif defined __loongarch64
++#define RETURN_INSTR 0x03e00008 /* jr ra */
++
+ #else
+ #  error "Need return instruction for this architecture"
+ #endif
+diff -ur a/nsprpub/pr/include/md/_freebsd.cfg b/nsprpub/pr/include/md/_freebsd.cfg
+--- a/nsprpub/pr/include/md/_freebsd.cfg	2022-07-02 17:03:36.055508000 +0800
++++ b/nsprpub/pr/include/md/_freebsd.cfg	2022-07-02 17:04:48.113508000 +0800
+@@ -490,6 +490,53 @@
+ #define PR_BYTES_PER_WORD_LOG2   3
+ #define PR_BYTES_PER_DWORD_LOG2  3
+ 
++#elif defined(__loongarch__)
++
++#undef  IS_BIG_ENDIAN
++#define IS_LITTLE_ENDIAN 1
++
++#define IS_64
++
++#define PR_BYTES_PER_BYTE   1
++#define PR_BYTES_PER_SHORT  2
++#define PR_BYTES_PER_INT    4
++#define PR_BYTES_PER_INT64  8
++#define PR_BYTES_PER_LONG   8
++#define PR_BYTES_PER_FLOAT  4
++#define PR_BYTES_PER_DOUBLE 8
++#define PR_BYTES_PER_WORD   8
++#define PR_BYTES_PER_DWORD  8
++
++#define PR_BITS_PER_BYTE    8
++#define PR_BITS_PER_SHORT   16
++#define PR_BITS_PER_INT     32
++#define PR_BITS_PER_INT64   64
++#define PR_BITS_PER_LONG    64
++#define PR_BITS_PER_FLOAT   32
++#define PR_BITS_PER_DOUBLE  64
++#define PR_BITS_PER_WORD    64
++
++#define PR_BITS_PER_BYTE_LOG2   3
++#define PR_BITS_PER_SHORT_LOG2  4
++#define PR_BITS_PER_INT_LOG2    5
++#define PR_BITS_PER_INT64_LOG2  6
++#define PR_BITS_PER_LONG_LOG2   6
++#define PR_BITS_PER_FLOAT_LOG2  5
++#define PR_BITS_PER_DOUBLE_LOG2 6
++#define PR_BITS_PER_WORD_LOG2   6
++
++#define PR_ALIGN_OF_SHORT   2
++#define PR_ALIGN_OF_INT     4
++#define PR_ALIGN_OF_LONG    8
++#define PR_ALIGN_OF_INT64   8
++#define PR_ALIGN_OF_FLOAT   4
++#define PR_ALIGN_OF_DOUBLE  8
++#define PR_ALIGN_OF_POINTER 8
++#define PR_ALIGN_OF_WORD    8
++
++#define PR_BYTES_PER_WORD_LOG2   3
++#define PR_BYTES_PER_DWORD_LOG2  3
++
+ #elif defined(__mips__)
+ 
+ #if defined(__MIPSEB__) || defined(_MIPSEB)
+diff -ur a/nsprpub/pr/include/md/_linux.cfg b/nsprpub/pr/include/md/_linux.cfg
+--- a/nsprpub/pr/include/md/_linux.cfg	2022-07-02 17:03:36.055508000 +0800
++++ b/nsprpub/pr/include/md/_linux.cfg	2022-07-02 17:04:48.114508000 +0800
+@@ -496,6 +496,56 @@
+ #define PR_BYTES_PER_WORD_LOG2   2
+ #define PR_BYTES_PER_DWORD_LOG2  3
+ 
++#elif defined(__loongarch__)
++
++/* For _ABI64 */
++#include <stddef.h>
++
++#define IS_LITTLE_ENDIAN 1
++#undef  IS_BIG_ENDIAN
++
++#define IS_64
++
++#define PR_BYTES_PER_BYTE   1
++#define PR_BYTES_PER_SHORT  2
++#define PR_BYTES_PER_INT    4
++#define PR_BYTES_PER_INT64  8
++#define PR_BYTES_PER_LONG   4
++#define PR_BYTES_PER_FLOAT  4
++#define PR_BYTES_PER_DOUBLE 8
++#define PR_BYTES_PER_WORD   4
++#define PR_BYTES_PER_DWORD  8
++
++#define PR_BITS_PER_BYTE    8
++#define PR_BITS_PER_SHORT   16
++#define PR_BITS_PER_INT     32
++#define PR_BITS_PER_INT64   64
++#define PR_BITS_PER_LONG    32
++#define PR_BITS_PER_FLOAT   32
++#define PR_BITS_PER_DOUBLE  64
++#define PR_BITS_PER_WORD    32
++
++#define PR_BITS_PER_BYTE_LOG2   3
++#define PR_BITS_PER_SHORT_LOG2  4
++#define PR_BITS_PER_INT_LOG2    5
++#define PR_BITS_PER_INT64_LOG2  6
++#define PR_BITS_PER_LONG_LOG2   5
++#define PR_BITS_PER_FLOAT_LOG2  5
++#define PR_BITS_PER_DOUBLE_LOG2 6
++#define PR_BITS_PER_WORD_LOG2   5
++
++#define PR_ALIGN_OF_SHORT   2
++#define PR_ALIGN_OF_INT     4
++#define PR_ALIGN_OF_LONG    4
++#define PR_ALIGN_OF_INT64   8
++#define PR_ALIGN_OF_FLOAT   4
++#define PR_ALIGN_OF_DOUBLE  8
++#define PR_ALIGN_OF_POINTER 4
++#define PR_ALIGN_OF_WORD    4
++
++#define PR_BYTES_PER_WORD_LOG2   2
++#define PR_BYTES_PER_DWORD_LOG2  3
++
+ #elif defined(__mips__)
+ 
+ /* For _ABI64 */
+diff -ur a/nsprpub/pr/include/md/_linux.h b/nsprpub/pr/include/md/_linux.h
+--- a/nsprpub/pr/include/md/_linux.h	2022-07-02 17:03:36.056508000 +0800
++++ b/nsprpub/pr/include/md/_linux.h	2022-07-02 17:04:48.114508000 +0800
+@@ -37,6 +37,8 @@
+ #define _PR_SI_ARCHITECTURE "sparc"
+ #elif defined(__i386__)
+ #define _PR_SI_ARCHITECTURE "x86"
++#elif defined(__loongarch__)
++#define _PR_SI_ARCHITECTURE "loongarch"
+ #elif defined(__mips__)
+ #define _PR_SI_ARCHITECTURE "mips"
+ #elif defined(__arm__)
+@@ -83,7 +85,7 @@
+ #define _MD_DEFAULT_STACK_SIZE  65536L
+ #define _MD_MMAP_FLAGS          MAP_PRIVATE
+ 
+-#if defined(__aarch64__) || defined(__mips__)
++#if defined(__aarch64__) || defined(__mips__) || defined(__loongarch__)
+ #define _MD_MINIMUM_STACK_SIZE  0x20000
+ #endif
+ 
+@@ -178,6 +180,16 @@
+ #endif
+ #endif
+ 
++#if defined(__loongarch__) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
++/* Use GCC built-in functions */
++#define _PR_HAVE_ATOMIC_OPS
++#define _MD_INIT_ATOMIC()
++#define _MD_ATOMIC_INCREMENT(ptr) __sync_add_and_fetch(ptr, 1)
++#define _MD_ATOMIC_DECREMENT(ptr) __sync_sub_and_fetch(ptr, 1)
++#define _MD_ATOMIC_ADD(ptr, i) __sync_add_and_fetch(ptr, i)
++#define _MD_ATOMIC_SET(ptr, nv) __sync_lock_test_and_set(ptr, nv)
++#endif
++
+ #if defined(__mips__) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
+ /* Use GCC built-in functions */
+ #define _PR_HAVE_ATOMIC_OPS
+@@ -454,6 +466,18 @@
+ #endif /* defined(__GLIBC__) && __GLIBC__ >= 2 */
+ #define PR_NUM_GCREGS   6
+ 
++#elif defined(__loongarch__)
++/* Linux/MIPS */
++#if defined(__GLIBC__) && __GLIBC__ >= 2
++#define _MD_GET_SP(_t) (_t)->md.context[0].__jmpbuf[0].__sp
++#define _MD_SET_FP(_t, val) ((_t)->md.context[0].__jmpbuf[0].__fp = (val))
++#define _MD_GET_SP_PTR(_t) &(_MD_GET_SP(_t))
++#define _MD_GET_FP_PTR(_t) (&(_t)->md.context[0].__jmpbuf[0].__fp)
++#define _MD_SP_TYPE __ptr_t
++#else
++#error "Linux/Loongarch pre-glibc2 not supported yet"
++#endif /* defined(__GLIBC__) && __GLIBC__ >= 2 */
++
+ #elif defined(__mips__)
+ /* Linux/MIPS */
+ #if defined(__GLIBC__) && __GLIBC__ >= 2
+@@ -533,6 +557,19 @@
+     _thread->md.sp = _MD_GET_SP_PTR(_thread); \
+     _thread->md.fp = _MD_GET_FP_PTR(_thread); \
+     _MD_SET_FP(_thread, 0); \
++}
++
++#elif defined(__loongarch__)
++
++#define _MD_INIT_CONTEXT(_thread, _sp, _main, status)  \
++{  \
++    *status = PR_TRUE;  \
++    (void) sigsetjmp(CONTEXT(_thread), 1);  \
++    _thread->md.context[0].__jmpbuf[0].__pc = (__ptr_t) _main;  \
++    _MD_GET_SP(_thread) = (_MD_SP_TYPE) ((_sp) - 64); \
++    _thread->md.sp = _MD_GET_SP_PTR(_thread); \
++    _thread->md.fp = _MD_GET_FP_PTR(_thread); \
++    _MD_SET_FP(_thread, 0); \
+ }
+ 
+ #elif defined(__mips__)
+diff -ur a/nsprpub/pr/include/pratom.h b/nsprpub/pr/include/pratom.h
+--- a/nsprpub/pr/include/pratom.h	2022-07-02 17:03:36.058508000 +0800
++++ b/nsprpub/pr/include/pratom.h	2022-07-02 17:04:48.114508000 +0800
+@@ -108,6 +108,8 @@
+            (defined(__arm__) && \
+            defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)) || \
+            defined(__aarch64__) || defined(__alpha) || \
++           (defined(__loongarch__) && \
++           defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)) || \
+            (defined(__mips__) && \
+            defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)))))
+ 
+diff -ur a/python/mozbuild/mozbuild/configure/constants.py b/python/mozbuild/mozbuild/configure/constants.py
+--- a/python/mozbuild/mozbuild/configure/constants.py	2022-07-02 17:03:43.773508000 +0800
++++ b/python/mozbuild/mozbuild/configure/constants.py	2022-07-02 17:26:30.853508000 +0800
+@@ -47,6 +47,7 @@
+     "arm": 32,
+     "hppa": 32,
+     "ia64": 64,
++    "loongarch64": 64,
+     "m68k": 32,
+     "mips32": 32,
+     "mips64": 64,
+@@ -83,6 +84,7 @@
+         ("arm", "__arm__ || _M_ARM"),
+         ("aarch64", "__aarch64__ || _M_ARM64"),
+         ("ia64", "__ia64__"),
++        ("loongarch64", "__loongarch64"),
+         ("s390x", "__s390x__"),
+         ("s390", "__s390__"),
+         ("ppc64", "__powerpc64__"),
diff --git a/just/PKGBUILD b/just/PKGBUILD
index 4f29e500ce..881fa91652 100644
--- a/just/PKGBUILD
+++ b/just/PKGBUILD
@@ -21,7 +21,13 @@ prepare() {
 
 build() {
   cd "${pkgname}-${pkgver}"
-  cargo build --frozen --release
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo build --release
 }
 
 check() {
diff --git a/jwt-cli/PKGBUILD b/jwt-cli/PKGBUILD
index bac7cfdc75..c29e31221e 100644
--- a/jwt-cli/PKGBUILD
+++ b/jwt-cli/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/k3b/PKGBUILD b/k3b/PKGBUILD
index 842a620c0d..12e7167065 100644
--- a/k3b/PKGBUILD
+++ b/k3b/PKGBUILD
@@ -10,7 +10,7 @@ arch=(loong64 x86_64)
 url='https://apps.kde.org/k3b/'
 license=(GPL)
 depends=(libkcddb5 kcmutils5 knotifyconfig5 libmad kfilemetadata5 knewstuff5
-         libmpcdec libdvdread libburn libsamplerate qt5-webengine)
+         libmpcdec libdvdread libburn libsamplerate)
 makedepends=(extra-cmake-modules kdoctools5)
 optdepends=('cdrtools: for CD burning with cdrecord'
             'dvd+rw-tools: for DVD burning support'
diff --git a/kcov/0001-Add-basic-support-for-LoongArch-architecture.patch b/kcov/0001-Add-basic-support-for-LoongArch-architecture.patch
new file mode 100644
index 0000000000..4799bb8ddb
--- /dev/null
+++ b/kcov/0001-Add-basic-support-for-LoongArch-architecture.patch
@@ -0,0 +1,123 @@
+From 02bc582371c5ff5f4b4781975c2c12f9744b4162 Mon Sep 17 00:00:00 2001
+From: Feiyang Chen <chenfeiyang@loongson.cn>
+Date: Thu, 29 Jun 2023 19:49:29 +0800
+Subject: [PATCH] Add basic support for LoongArch architecture
+
+---
+ src/engines/ptrace.cc       |  6 ++++++
+ src/engines/ptrace_linux.cc | 18 +++++++++++-------
+ src/solib-parser/lib.c      |  2 ++
+ 3 files changed, 19 insertions(+), 7 deletions(-)
+
+diff --git a/src/engines/ptrace.cc b/src/engines/ptrace.cc
+index 7676751..18acaa6 100644
+--- a/src/engines/ptrace.cc
++++ b/src/engines/ptrace.cc
+@@ -52,6 +52,12 @@ static unsigned long arch_setupBreakpoint(unsigned long addr, unsigned long old_
+ 	val = (old_data & ~(0xffffffffUL << shift)) | (0xd4200000UL << shift);
+ #elif defined(__riscv)
+ 	val = 0x00100073; /* ebreak */ // No width problem, prefer ebreak than c.ebreak for ISA w/o C extension.
++#elif defined(__loongarch__)
++	unsigned long aligned_addr = getAligned(addr);
++	unsigned long offs = addr - aligned_addr;
++	unsigned long shift = 8 * offs;
++
++	val = (old_data & ~(0xffffffffUL << shift)) | (0x002a0004UL << shift); /* break 0x4 */
+ #else
+ # error Unsupported architecture
+ #endif
+diff --git a/src/engines/ptrace_linux.cc b/src/engines/ptrace_linux.cc
+index 4062a06..a73be00 100644
+--- a/src/engines/ptrace_linux.cc
++++ b/src/engines/ptrace_linux.cc
+@@ -3,7 +3,7 @@
+ #include <sys/ptrace.h>
+ #include <sys/wait.h>
+ 
+-#if defined(__aarch64__)
++#if defined(__aarch64__) || defined(__loongarch__)
+ #  include <sys/uio.h>
+ #  include <elf.h>
+ #endif
+@@ -20,7 +20,7 @@
+ enum
+ {
+ 	i386_EIP = 12, x86_64_RIP = 16, ppc_NIP = 32, arm_PC = 15, aarch64_PC = 32, // See Linux arch/arm64/include/asm/ptrace.h
+-	riscv_EPC = 0
++	riscv_EPC = 0, loongarch_ERA = 33
+ };
+ 
+ static void arch_adjustPcAfterBreakpoint(unsigned long *regs);
+@@ -41,7 +41,7 @@ static void arch_adjustPcAfterBreakpoint(unsigned long *regs)
+ 	regs[i386_EIP]--;
+ #elif defined(__x86_64__)
+ 	regs[x86_64_RIP]--;
+-#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__riscv)
++#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch__)
+ 	// Do nothing
+ #else
+ # error Unsupported architecture
+@@ -64,6 +64,8 @@ static unsigned long arch_getPcFromRegs(unsigned long *regs)
+ 	out = regs[ppc_NIP];
+ #elif defined(__riscv)
+ 	out = regs[riscv_EPC];
++#elif defined(__loongarch__)
++	out = regs[loongarch_ERA];
+ #else
+ # error Unsupported architecture
+ #endif
+@@ -321,7 +323,7 @@ static unsigned long getPcFromRegs(unsigned long *regs)
+ 
+ static long getRegs(pid_t pid, void *addr, void *regs, size_t len)
+ {
+-#if defined(__aarch64__)
++#if defined(__aarch64__) || defined(__loongarch__)
+ 	struct iovec iov =
+ 	{	regs, len};
+ 	return ptrace(PTRACE_GETREGSET, pid, (void *)NT_PRSTATUS, &iov);
+@@ -369,7 +371,7 @@ void ptrace_sys::pokeWord(pid_t pid, unsigned long aligned_addr, unsigned long v
+ 
+ static long setRegs(pid_t pid, void *addr, void *regs, size_t len)
+ {
+-#if defined(__aarch64__)
++#if defined(__aarch64__) || defined(__loongarch__)
+ 	struct iovec iov =
+ 	{	regs, len};
+ 	return ptrace(PTRACE_SETREGSET, pid, (void *)NT_PRSTATUS, &iov);
+@@ -392,7 +394,7 @@ void ptrace_sys::singleStep(pid_t pid)
+ void ptrace_sys::skipInstruction(pid_t pid)
+ {
+ 	// Nop on x86, op on PowerPC/ARM
+-#if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__)
++#if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch__)
+ 	unsigned long regs[1024];
+ 
+ 	getRegs(pid, NULL, regs, sizeof regs);
+@@ -401,8 +403,10 @@ void ptrace_sys::skipInstruction(pid_t pid)
+ 	regs[ppc_NIP] += 4;
+ # elif defined(__aarch64__)
+ 	regs[aarch64_PC] += 4;
+-# else
++# elif defined(__arm__)
+ 	regs[arm_PC] += 4;
++# else
++	regs[loongarch_ERA] += 4;
+ # endif
+ 	setRegs(pid, NULL, regs, sizeof regs);
+ #endif
+diff --git a/src/solib-parser/lib.c b/src/solib-parser/lib.c
+index 87a2344..82a7712 100644
+--- a/src/solib-parser/lib.c
++++ b/src/solib-parser/lib.c
+@@ -97,6 +97,8 @@ static void force_breakpoint(void)
+ 			".long 0xd4200000\n" /* From https://github.com/scottt/debugbreak */
+ #elif defined(__riscv)
+ 			"ebreak\n"
++#elif defined(__loongarch__)
++			"break 0x4\n"
+ #else
+ # error Unsupported architecture
+ #endif
+-- 
+2.41.0
+
diff --git a/kcov/PKGBUILD b/kcov/PKGBUILD
index c244cabe49..dab17f9fe7 100644
--- a/kcov/PKGBUILD
+++ b/kcov/PKGBUILD
@@ -9,8 +9,15 @@ url="https://simonkagstrom.github.io/kcov"
 license=('GPL2')
 depends=('elfutils' 'curl' 'zstd' 'zlib' 'binutils' 'libsframe.so')
 makedepends=('cmake' 'python' 'ninja')
-source=("https://github.com/SimonKagstrom/kcov/archive/v${pkgver}.tar.gz")
-sha512sums=('4ba0eafe54e4e156a18c965c43bc9634db2e1f385ea4cf52e9123818b3addd31357b6857cd17490894eeb12554bb5a77e89b0657e5fdefa05696d2ac20058ae8')
+source=("https://github.com/SimonKagstrom/kcov/archive/v${pkgver}.tar.gz"
+    0001-Add-basic-support-for-LoongArch-architecture.patch)
+sha512sums=('4ba0eafe54e4e156a18c965c43bc9634db2e1f385ea4cf52e9123818b3addd31357b6857cd17490894eeb12554bb5a77e89b0657e5fdefa05696d2ac20058ae8'
+            'e6f34cfbe6ed16c3c4288eacf7b68ed7428fe9690b4974329ce65d9efd413c73662409c7b76663e81b8bb56260499d9bf252ab7afffd3602f19981157eadd0cc')
+
+prepare() {
+  cd $pkgname-$pkgver
+  patch -p1 -i $srcdir/0001-Add-basic-support-for-LoongArch-architecture.patch
+}
 
 build() {
   cd $pkgname-$pkgver
diff --git a/kdeplasma-addons/PKGBUILD b/kdeplasma-addons/PKGBUILD
index e31b7508ca..9995028f99 100644
--- a/kdeplasma-addons/PKGBUILD
+++ b/kdeplasma-addons/PKGBUILD
@@ -11,7 +11,7 @@ arch=(loong64 x86_64)
 url='https://kde.org/plasma-desktop/'
 license=(LGPL)
 depends=(plasma-workspace kunitconversion5)
-makedepends=(extra-cmake-modules qt5-webengine networkmanager-qt5)
+makedepends=(extra-cmake-modules networkmanager-qt5)
 optdepends=('purpose5: Quickshare applet'
             'quota-tools: disk quota applet'
             'qt5-webengine: dictionary and webbrowser applets'
diff --git a/kernel-headers-musl/PKGBUILD b/kernel-headers-musl/PKGBUILD
index 57f0bb971b..d110331dea 100644
--- a/kernel-headers-musl/PKGBUILD
+++ b/kernel-headers-musl/PKGBUILD
@@ -3,25 +3,28 @@
 # Contributor: Jens Staal <staal1978@gmail.com>
 
 pkgname=kernel-headers-musl
-pkgver=4.19.88
-pkgrel=2
+pkgver=6.0
+_rdate=20221017
+pkgrel=1
 pkgdesc="Linux kernel headers sanitized for use with musl libc"
 arch=('loong64' 'x86_64')
 url="https://github.com/sabotage-linux/kernel-headers"
 license=('LGPL')
+makedepends=(rsync)
 depends=('musl')
-source=("$pkgname-$pkgver.tar.gz::https://github.com/sabotage-linux/kernel-headers/archive/v${pkgver/_/-}.tar.gz")
-sha512sums=('db0239c40399c89cc250b9f1f53b7ec4eb119fde6b25c503aef7e88b80694df3a5e89196a22e66376731764bac83d9120794ee6c601a95b824f1ab770cb45a61')
+source=(https://github.com/yetist/linux/releases/download/v${_rdate}/linux-${pkgver}-${_rdate}.tar.xz)
+sha256sums=('ac4822f7dad35e42b1d0b02190eb876d80f3beefe9576ae8b45aeb5c5bc79eb1')
 
 _CARCH=$CARCH
 [[ $CARCH = i?86 ]] && _CARCH=x86
+[[ $CARCH = loong64 ]] && _CARCH=loongarch
 
 build() {
-  cd "$srcdir"/kernel-headers-${pkgver/_/-}
-  make ARCH=${_CARCH} prefix=/usr/lib/musl
+  cd "$srcdir"/linux-${pkgver/_/-}
+  make ARCH=${_CARCH} mrproper
 }
 
 package() {
-  cd "$srcdir"/kernel-headers-${pkgver/_/-}
-  make ARCH=${_CARCH} prefix=/usr/lib/musl DESTDIR="$pkgdir" install
+  cd "$srcdir"/linux-${pkgver/_/-}
+  make ARCH=${_CARCH} INSTALL_HDR_PATH="$pkgdir/usr/lib/musl" headers_install
 }
diff --git a/kmon/PKGBUILD b/kmon/PKGBUILD
index 1ee3ae43a6..bb1926cc9e 100644
--- a/kmon/PKGBUILD
+++ b/kmon/PKGBUILD
@@ -16,7 +16,7 @@ validpgpkeys=('C4B2D24CF87CD188C79D00BB485B7C52E9EC0DC6') # kmon releases <kmonl
 
 prepare() {
   cd "${srcdir}"/${pkgname}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/kondo/PKGBUILD b/kondo/PKGBUILD
index 5777789ca7..06f2d19108 100644
--- a/kondo/PKGBUILD
+++ b/kondo/PKGBUILD
@@ -22,7 +22,7 @@ prepare() {
 	patch -Np1 -i "$srcdir/$pkgname-$pkgver-cargo-lock.patch"
 
 	# download dependencies
-	_opts="--locked --target $CARCH-unknown-linux-gnu"
+	_opts="--locked --target `uname -m`-unknown-linux-gnu"
 	cargo fetch $_opts
 	cargo fetch $_opts --manifest-path "$pkgname-ui/Cargo.toml"
 }
diff --git a/kooha/PKGBUILD b/kooha/PKGBUILD
index 17345720dc..ed9a59664a 100644
--- a/kooha/PKGBUILD
+++ b/kooha/PKGBUILD
@@ -19,7 +19,7 @@ source=("$pkgname-$pkgver.tar.gz::$url/archive/v$pkgver.tar.gz")
 
 prepare() {
   cd Kooha-$pkgver
-  cargo fetch --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/kubie/PKGBUILD b/kubie/PKGBUILD
index 06e54e09ce..9f9a9ebb80 100644
--- a/kubie/PKGBUILD
+++ b/kubie/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/ladspa/PKGBUILD b/ladspa/PKGBUILD
index 9b02a13cc8..24522a1377 100644
--- a/ladspa/PKGBUILD
+++ b/ladspa/PKGBUILD
@@ -51,3 +51,7 @@ package() {
   install -vDm 644 ${pkgname}_sdk_$pkgver/doc/*.{html,txt} -t "$pkgdir/usr/share/doc/$pkgname"
 }
 
+sha512sums=('c096e70d245c50ffad379f2c9aab2735c205dfe7c27b5710cad6e43b39f8accc7b8bd2cd7ebbea2c399ad76018b3d4c701ed271de552d098b12ab218a58f0497'
+            'e80a575e6afe42f9d0ac1234a7832a29fb9362c7ab7b2d6b14cc3e6da6a9ba67af886cf09f1c0b78c161205084f705049fb4dc22fe2795efaeb7d94da90dc93e')
+b2sums=('234f6c42e2736d250259b9473f70def136bcf57c2395d79e3a3cd6da715181dbfa87c1c35e8f0b840e0c5c00c39bfbb93d1ddd73b4c1547140caa080589d91b3'
+        'b2235b41d9638cceb93a41452f1f0b710bc3f9264fe735972099244dfc8d98d354c7d15084d79b0bdfc4fa3f437f9368f81aa0ebdb7e39df5900fbbf172927c2')
diff --git a/lapce/PKGBUILD b/lapce/PKGBUILD
index 70921c4837..4273cc0c37 100644
--- a/lapce/PKGBUILD
+++ b/lapce/PKGBUILD
@@ -26,7 +26,7 @@ sha256sums=('82407a0b0ae8a5660ff2192ca336c9b0e361424f0fd509e0a0a73ad55071f13d')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/latex2rtf/PKGBUILD b/latex2rtf/PKGBUILD
index 9344577f6f..f70fd0e303 100644
--- a/latex2rtf/PKGBUILD
+++ b/latex2rtf/PKGBUILD
@@ -12,8 +12,8 @@ depends=('glibc' 'imagemagick' 'ghostscript')
 makedepends=('texinfo' 'texlive-bin' 'texlive-plainextra')
 checkdepends=('texlive-latexextra') # for a4wide package
 source=("https://downloads.sourceforge.net/sourceforge/${pkgname}/${pkgname}-${pkgver}a.tar.gz")
-md5sums=('b984073a9481eae26760ce03015d0373')
-sha1sums=('7fe41c82465921405bbbc6667694bf2731c7dfd1')
+md5sums=('404428600d0d4c0dbdec5b6981727050')
+sha1sums=('4636fe6f27516f759d67944fe42be646c97ba27a')
 
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
diff --git a/ldproxy/PKGBUILD b/ldproxy/PKGBUILD
index 2a028f7910..d676fb91af 100644
--- a/ldproxy/PKGBUILD
+++ b/ldproxy/PKGBUILD
@@ -19,7 +19,7 @@ b2sums=('d8571ddf5ab3684206e4802871aedea216f6edc978adcb80b7879e58aa01d564634ec65
 prepare() {
   cd "embuild-${pkgname}-v${pkgver}/${pkgname}"
   cp ../../Cargo.lock ..
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 updlockfiles() {
diff --git a/leafpad/PKGBUILD b/leafpad/PKGBUILD
index 081c4c9bf5..8fc1564724 100644
--- a/leafpad/PKGBUILD
+++ b/leafpad/PKGBUILD
@@ -26,6 +26,8 @@ prepare() {
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
 
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   ./configure \
     --prefix=/usr \
     --enable-chooser
diff --git a/lgi/PKGBUILD b/lgi/PKGBUILD
index 1c946be844..61c3fd9062 100644
--- a/lgi/PKGBUILD
+++ b/lgi/PKGBUILD
@@ -6,7 +6,7 @@
 pkgbase=lgi
 pkgname=(lua-lgi lua51-lgi lua53-lgi)
 pkgver=0.9.2
-pkgrel=10
+pkgrel=11
 pkgdesc='Lua bindings for gnome/gobject using gobject-introspection library'
 url="https://github.com/pavouk/$pkgbase"
 arch=(loong64 x86_64)
diff --git a/libavif/PKGBUILD b/libavif/PKGBUILD
index d3105677c4..0b0b24d85c 100644
--- a/libavif/PKGBUILD
+++ b/libavif/PKGBUILD
@@ -19,7 +19,7 @@ build() {
     -DAVIF_CODEC_AOM=ON \
     -DAVIF_CODEC_DAV1D=ON \
     -DAVIF_CODEC_RAV1E=ON \
-    -DAVIF_CODEC_SVT=ON \
+    -DAVIF_CODEC_SVT=OFF \
     -DAVIF_BUILD_GDK_PIXBUF=ON
   make -C build
 }
diff --git a/libb2/PKGBUILD b/libb2/PKGBUILD
index 640f5442bc..c1a23e9744 100644
--- a/libb2/PKGBUILD
+++ b/libb2/PKGBUILD
@@ -10,16 +10,24 @@ pkgdesc='C library providing BLAKE2b, BLAKE2s, BLAKE2bp, BLAKE2sp hash functions
 url='https://blake2.net/'
 license=(custom:CC0)
 depends=(gcc-libs)
-source=("https://github.com/BLAKE2/libb2/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz")
-sha256sums=('53626fddce753c454a3fea581cbbc7fe9bbcf0bc70416d48fdbbf5d87ef6c72e')
+source=("https://github.com/BLAKE2/libb2/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz"
+        libb2-fix-build.patch)
+sha256sums=('53626fddce753c454a3fea581cbbc7fe9bbcf0bc70416d48fdbbf5d87ef6c72e'
+            '40f3468e1015e60ff454ea46215cde1324b5c94fae437663090d0e41a505923e')
 
 # libb2's build system discards the $CFLAGS variable.
 # We can get around this by putting those flags in $CC.
 export CC="${CC-cc} $CFLAGS"
 
+prepare() {
+  cd "$pkgname-$pkgver"
+  patch -p1 -i $srcdir/libb2-fix-build.patch
+  autoreconf -vfi
+}
+
 build() {
   cd "$pkgname-$pkgver"
-  ./configure --prefix=/usr --disable-static --enable-shared --disable-native --enable-fat
+  ./configure --prefix=/usr --disable-static --enable-shared --disable-native
   make
 }
 
diff --git a/libb2/libb2-fix-build.patch b/libb2/libb2-fix-build.patch
new file mode 100644
index 0000000000..2dccc080af
--- /dev/null
+++ b/libb2/libb2-fix-build.patch
@@ -0,0 +1,14 @@
+Index: libb2-0.98.1/src/Makefile.am
+===================================================================
+--- libb2-0.98.1.orig/src/Makefile.am
++++ libb2-0.98.1/src/Makefile.am
+@@ -18,8 +18,7 @@ LDFLAGS += -version-info $(B2_LIBRARY_VE
+ lib_LTLIBRARIES = libb2.la
+ libb2_la_LIBADD = # -lgomp -lpthread
+ libb2_la_CPPFLAGS =  -DSUFFIX=  \
+-                     $(LTDLINCL) \
+-                     ${top_builddir}/src/
++                     $(LTDLINCL)
+ 
+ include_HEADERS = blake2.h
+ 
diff --git a/libcdio/PKGBUILD b/libcdio/PKGBUILD
index 0905f2fe63..ee78c6d987 100644
--- a/libcdio/PKGBUILD
+++ b/libcdio/PKGBUILD
@@ -35,6 +35,8 @@ prepare() {
 
 build() {
   cd $pkgname
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var \
     --disable-vcd-info --disable-cddb --enable-cpp-progs --disable-static \
     --enable-maintainer-mode
diff --git a/libclc/PKGBUILD b/libclc/PKGBUILD
index 1e158f8208..2765f944cc 100644
--- a/libclc/PKGBUILD
+++ b/libclc/PKGBUILD
@@ -35,4 +35,3 @@ package() {
   DESTDIR="$pkgdir" ninja install
   install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
 }
-
diff --git a/libdaemon/PKGBUILD b/libdaemon/PKGBUILD
index c6d089e19f..92497c75f7 100644
--- a/libdaemon/PKGBUILD
+++ b/libdaemon/PKGBUILD
@@ -5,7 +5,7 @@
 
 pkgname=libdaemon
 pkgver=0.14
-pkgrel=5
+pkgrel=6
 pkgdesc='Lightweight C library that eases the writing of UNIX daemons'
 url='http://0pointer.de/lennart/projects/libdaemon/'
 license=('LGPL')
diff --git a/libdrm/PKGBUILD b/libdrm/PKGBUILD
index cad485d534..2b3dd05030 100644
--- a/libdrm/PKGBUILD
+++ b/libdrm/PKGBUILD
@@ -28,7 +28,7 @@ validpgpkeys=('34FF9526CFEF0E97A340E2E40FDE7BE0E88F5E48') # Simon Ser <contact@e
 build() {
   arch-meson $pkgname-$pkgver build \
     -D udev=true \
-    -D etnaviv=disabled \
+    -D etnaviv=enabled \
     -D freedreno=disabled \
     -D vc4=disabled \
     -D valgrind=disabled \
diff --git a/libdsme/PKGBUILD b/libdsme/PKGBUILD
index 8dafca14ff..dc31dcfde7 100644
--- a/libdsme/PKGBUILD
+++ b/libdsme/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=libdsme
 pkgver=0.66.8
-pkgrel=1
+pkgrel=2
 pkgdesc="DSME dsmesock dynamic library"
 arch=('loong64' 'x86_64')
 url="https://github.com/sailfishos/libdsme"
diff --git a/libetebase/PKGBUILD b/libetebase/PKGBUILD
index c68477636c..50f2d794af 100644
--- a/libetebase/PKGBUILD
+++ b/libetebase/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('b149afcabce839347cb3cef870e781d34df32016885bfed50c08dcab0ab9b1f0')
 
 prepare() {
   cd $pkgname-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/libfbclient/PKGBUILD b/libfbclient/PKGBUILD
index 3c2fcc022c..b4003e9d8f 100644
--- a/libfbclient/PKGBUILD
+++ b/libfbclient/PKGBUILD
@@ -13,15 +13,18 @@ depends=('gcc-libs' 'libtommath')
 makedepends=('editline' 'libtomcrypt' 're2' 'unzip')
 source=(https://github.com/FirebirdSQL/firebird/releases/download/v4.0.0/Firebird-$pkgver-0.tar.xz
         shared-re2.patch update-re2.patch
+        fbclient-la64-4.0.0.patch
         LICENSE)
 sha512sums=('110f1e9b245631ba04fb1f3f3fe16209e432157615c5c23046a277f3e6a164a2142c6732e793d9cc6650d65239a0da8bf37a74329134bd006b7d2cb070812bd6'
             '7d431b6662df6f35ddd4d850de646adc4e10f6963ab5b73595de3de9a4465b5e32d28cebb68934d83e105671505de171924c6ba6539bfa5b98d8be3d7f7fab7f'
             'eb61d677461c80366d1458e9d98387f139f71fd0f49e3cf538a3656223d61470336d51afdd3d7929593ab8666ccbf95e11cb80275ba8d5588771592b0f70346c'
+            'b5c766ed980b2a85d4ecf271143622b4d341faef071ef32b7b2fa84700df3a66792f656d07520d5b2a200aa0073c915b066aefbe22fbf29496de26dcc936fabf'
             '1e4c24f60d2cdc1a89b52b45f778ed264ae14428a940b0509ca5c50182aed6149b7a6a546e7d08b0f264bafde81a210abe20db204c20db596f5fc2ec205ac37e')
 
 prepare() {
   patch -d Firebird-$pkgver-0 -p1 < shared-re2.patch
   patch -d Firebird-$pkgver-0 -p1 < update-re2.patch
+  patch -d Firebird-$pkgver-0 -p1 < fbclient-la64-4.0.0.patch
 
   # Ensure system libs are used
   rm -r Firebird-$pkgver-0/extern/{editline,libtommath,libtomcrypt,zlib}
diff --git a/libfbclient/fbclient-la64-4.0.0.patch b/libfbclient/fbclient-la64-4.0.0.patch
new file mode 100644
index 0000000000..c635b2637a
--- /dev/null
+++ b/libfbclient/fbclient-la64-4.0.0.patch
@@ -0,0 +1,27 @@
+Index: Firebird-4.0.0.2496-0/src/common/classes/DbImplementation.cpp
+===================================================================
+--- Firebird-4.0.0.2496-0.orig/src/common/classes/DbImplementation.cpp
++++ Firebird-4.0.0.2496-0/src/common/classes/DbImplementation.cpp
+@@ -50,6 +50,7 @@ static const UCHAR CpuArm64 = 15;
+ static const UCHAR CpuPowerPc64el = 16;
+ static const UCHAR CpuM68k = 17;
+ static const UCHAR CpuRiscV64 = 18;
++static const UCHAR CpuLoongarch64 = 19;
+ 
+ static const UCHAR OsWindows = 0;
+ static const UCHAR OsLinux = 1;
+Index: Firebird-4.0.0.2496-0/src/common/common.h
+===================================================================
+--- Firebird-4.0.0.2496-0.orig/src/common/common.h
++++ Firebird-4.0.0.2496-0/src/common/common.h
+@@ -140,6 +140,10 @@
+ #define RISC_ALIGNMENT
+ #endif /* sparc */
+ 
++#ifdef _LOONGARCH_ARCH
++#define FB_CPU CpuLoongarch64
++#endif /* loongarch64 */
++
+ #ifdef MIPSEL
+ #define FB_CPU CpuMipsel
+ #endif /* mipsel */
diff --git a/libfbclient/fbclient-la64.patch b/libfbclient/fbclient-la64.patch
new file mode 100644
index 0000000000..f200e84f03
--- /dev/null
+++ b/libfbclient/fbclient-la64.patch
@@ -0,0 +1,27 @@
+Index: Firebird-3.0.7.33374-0/src/common/classes/DbImplementation.cpp
+===================================================================
+--- Firebird-3.0.7.33374-0.orig/src/common/classes/DbImplementation.cpp
++++ Firebird-3.0.7.33374-0/src/common/classes/DbImplementation.cpp
+@@ -49,6 +49,7 @@ static const UCHAR CpuAlpha = 14;
+ static const UCHAR CpuArm64 = 15;
+ static const UCHAR CpuPowerPc64el = 16;
+ static const UCHAR CpuM68k = 17;
++static const UCHAR CpuLoongarch64 = 18;
+ 
+ static const UCHAR OsWindows = 0;
+ static const UCHAR OsLinux = 1;
+Index: Firebird-3.0.7.33374-0/src/common/common.h
+===================================================================
+--- Firebird-3.0.7.33374-0.orig/src/common/common.h
++++ Firebird-3.0.7.33374-0/src/common/common.h
+@@ -140,6 +140,10 @@
+ #define RISC_ALIGNMENT
+ #endif /* sparc */
+ 
++#ifdef _LOONGARCH_ARCH
++#define FB_CPU CpuLoongarch64
++#endif /* loongarch64 */
++
+ #ifdef MIPSEL
+ #define FB_CPU CpuMipsel
+ #endif /* mipsel */
diff --git a/libfido2/libfido2-no-ssp.patch b/libfido2/libfido2-no-ssp.patch
new file mode 100644
index 0000000000..ffe952bc73
--- /dev/null
+++ b/libfido2/libfido2-no-ssp.patch
@@ -0,0 +1,27 @@
+Index: libfido2-1.5.0/CMakeLists.txt
+===================================================================
+--- libfido2-1.5.0.orig/CMakeLists.txt
++++ libfido2-1.5.0/CMakeLists.txt
+@@ -135,10 +135,6 @@ else()
+ 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wbad-function-cast")
+ 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pedantic")
+ 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pedantic-errors")
+-	check_c_compiler_flag("-fstack-protector-all" HAVE_STACK_PROTECTOR_ALL)
+-	if(HAVE_STACK_PROTECTOR_ALL)
+-		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstack-protector-all")
+-	endif()
+ 
+ 	add_definitions(-D_DEFAULT_SOURCE)
+ 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
+@@ -190,10 +186,7 @@ if(HAVE_ERR_H)
+ endif()
+ 
+ # unistd.h
+-check_include_files(unistd.h HAVE_UNISTD_H)
+-if(HAVE_UNISTD_H)
+-	add_definitions(-DHAVE_UNISTD_H)
+-endif()
++add_definitions(-DHAVE_UNISTD_H)
+ 
+ # signal.h
+ check_include_files(signal.h HAVE_SIGNAL_H)
diff --git a/libfilezilla/PKGBUILD b/libfilezilla/PKGBUILD
index 66784cb52e..f0b0267866 100644
--- a/libfilezilla/PKGBUILD
+++ b/libfilezilla/PKGBUILD
@@ -22,6 +22,11 @@ build() {
   cd ${pkgname}-${pkgver}
   export CXX=clang++
   export CC=clang
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   ./configure \
     --prefix=/usr \
     --disable-static
diff --git a/libgda/PKGBUILD b/libgda/PKGBUILD
index 60c4c8683a..0fa7bd9c25 100644
--- a/libgda/PKGBUILD
+++ b/libgda/PKGBUILD
@@ -1,7 +1,7 @@
 # Contributor: tobias <tobias@archlinux.org>
 # Contributor: Tobias Kieslich <tobias@justdreams.de>
 
-pkgname=(libgda libgda-{firebird,jdbc,mysql,postgres})
+pkgname=(libgda libgda-{firebird,mysql,postgres})
 pkgver=5.2.10
 pkgrel=5
 pkgdesc="Database access library"
@@ -14,7 +14,9 @@ makedepends=(glade mariadb-libs postgresql-libs libfbclient jdk11-openjdk intlto
 _commit=85a2532df64698306fd3be324bb2052fb1b80fd3  # tags/LIBGDA_5_2_10^0
 source=("git+https://gitlab.gnome.org/GNOME/libgda.git#commit=$_commit"
         0001-Use-single-header-with-GtkSource.patch
-        0002-Use-goocanvas-3.0.patch)
+        0002-Use-goocanvas-3.0.patch
+        libgda-la64.patch)
+
 sha256sums=('SKIP'
             '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560'
             '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b')
@@ -39,6 +41,8 @@ prepare() {
 
   # Port to goocanvas-3.0
   git apply -3 ../0002-Use-goocanvas-3.0.patch
+  patch -p1 -i $srcdir/libgda-la64.patch
+  rm getsp.class
 
   NOCONFIGURE=1 ./autogen.sh
 }
@@ -56,7 +60,6 @@ build() {
 
 package_libgda() {
   optdepends=('libgda-firebird: provider for Firebird'
-              'libgda-jdbc: provider for JDBC'
               'libgda-mysql: provider for MySQL'
               'libgda-postgres: provider for PostgreSQL')
   options+=(emptydirs)
@@ -66,16 +69,16 @@ package_libgda() {
 
   mkdir -p providers
   local provider
-  for provider in firebird jdbc mysql postgres; do
+  for provider in firebird mysql postgres; do
     mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/libgda-$provider[-.]*so \
        "$pkgdir"/usr/lib/pkgconfig/libgda-$provider-$_apiver.pc \
        "$pkgdir"/usr/share/libgda-$_apiver/${provider}_specs_*.xml \
        "$srcdir/providers"
   done
 
-  mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/gdaprovider-${_apiver}.jar \
-     "$pkgdir"/usr/bin/gda-list-jdbc-providers-${_apiver} \
-     "$srcdir/providers"
+#  mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/gdaprovider-${_apiver}.jar \
+#     "$pkgdir"/usr/bin/gda-list-jdbc-providers-${_apiver} \
+#     "$srcdir/providers"
 }
 
 _packageprovider() {
@@ -112,3 +115,10 @@ package_libgda-postgres() {
   depends=(libgda postgresql-libs)
   _packageprovider postgres
 }
+sha256sums=('SKIP'
+            '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560'
+            '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b')
+sha256sums=('SKIP'
+            '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560'
+            '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b'
+            '8f6f9129df0032d895a8549f3e66e5eef530791326d14097cbb3e5fc8a2b84b1')
diff --git a/libgda/libgda-la64.patch b/libgda/libgda-la64.patch
new file mode 100644
index 0000000000..4e9ad28d23
--- /dev/null
+++ b/libgda/libgda-la64.patch
@@ -0,0 +1,22 @@
+Index: libgda/getsp.java
+===================================================================
+--- libgda.orig/getsp.java
++++ libgda/getsp.java
+@@ -25,7 +25,7 @@ public class getsp {
+ 				while (i<=j) {
+ 					if (i==j || lp.charAt(i)==ps) {
+ 						String lib=lp.substring(k,i);
+-						String suffix="/lib/amd64/server";
++						String suffix="/lib/loongarch64/server";
+ 						k=i+1;
+ 						if (lib.compareTo(".")!=0)
+ 							r=(r==null)?(prefix+lib+suffix):(r+" "+prefix+lib+suffix);
+@@ -50,7 +50,7 @@ public class getsp {
+ 
+ 				if (r!=null) System.out.println(r);
+ 			} else if (args[0].compareTo("-ldpath")==0) {
+-				String lp1=System.getProperty("java.home")+"/lib/amd64/server";
++				String lp1=System.getProperty("java.home")+"/lib/loongarch64/server";
+ 				String lp2=System.getProperty("java.library.path");
+ 				System.out.println(lp1+":"+lp2);
+ 			}
diff --git a/libgexiv2/PKGBUILD b/libgexiv2/PKGBUILD
index f11fbc8d62..60faaba6cc 100644
--- a/libgexiv2/PKGBUILD
+++ b/libgexiv2/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=libgexiv2
 pkgver=0.14.2
-pkgrel=1
+pkgrel=2
 pkgdesc='GObject-based wrapper around the Exiv2 library'
 url='https://wiki.gnome.org/Projects/gexiv2'
 arch=(loong64 x86_64)
diff --git a/libglvnd/PKGBUILD b/libglvnd/PKGBUILD
index 500c12f9b4..53e84e3298 100644
--- a/libglvnd/PKGBUILD
+++ b/libglvnd/PKGBUILD
@@ -17,6 +17,7 @@ sha512sums=('7b6eb8e075b48f1d915b892044adc3260547d74ed61d1e2fa6c5f0f8c3527754abe
 
 build() {
   arch-meson $pkgname-v$pkgver build \
+      -Dasm=disabled \
     -D gles1=false
 
   meson compile -C build
diff --git a/libgme/PKGBUILD b/libgme/PKGBUILD
index cf80bb5142..e6f394754b 100644
--- a/libgme/PKGBUILD
+++ b/libgme/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('aba34e53ef0ec6a34b58b84e28bf8cfbccee6585cebca25333604c35db3e051d')
 validpgpkeys=(5406ECE83665DA9D201D35720BAF0C9C7B6AE9F2) # Michael Pyne <mpyne@kde.org>
 
 build() {
-  cmake -S game-music-emu-$pkgver -B build -G Ninja -DCMAKE_INSTALL_PREFIX='/usr' -DCMAKE_BUILD_TYPE=Release
+  cmake -S game-music-emu-$pkgver -B build -G Ninja -DCMAKE_INSTALL_PREFIX='/usr' -DCMAKE_BUILD_TYPE=Release -DENABLE_UBSAN=off
   cmake --build build
 }
 
diff --git a/libgoom2/PKGBUILD b/libgoom2/PKGBUILD
index 96296aa5e2..32642faa58 100644
--- a/libgoom2/PKGBUILD
+++ b/libgoom2/PKGBUILD
@@ -15,6 +15,8 @@ sha512sums=('790e3ab8dee122320ad8b3ae15f6a1cd2780222d5ae97979f614f16ba73b4b85396
 
 build() {
   cd "${srcdir}/goom2k4-0"
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
 
   ./configure --prefix=/usr \
               --without-xmms \
diff --git a/libgpod/PKGBUILD b/libgpod/PKGBUILD
index 866dc191bd..a2423d3fa6 100644
--- a/libgpod/PKGBUILD
+++ b/libgpod/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=libgpod
 pkgver=0.8.3
-pkgrel=15
+pkgrel=16
 pkgdesc="A shared library to access the contents of an iPod"
 url="http://www.gtkpod.org/libgpod/"
 arch=(loong64 x86_64)
diff --git a/libimagequant/PKGBUILD b/libimagequant/PKGBUILD
index 339dbde847..a5966338be 100644
--- a/libimagequant/PKGBUILD
+++ b/libimagequant/PKGBUILD
@@ -14,6 +14,12 @@ sha256sums=('ff1a34d3df9a1a5e5c1fa3895c036a885dc7b9740d7fccdf57e9ed678b8fb3a3')
 
 build() {
     cd "$srcdir/$pkgname-$pkgver/imagequant-sys"
+    find -name Cargo.lock -exec rm -f {} \;
+    mkdir -p .cargo
+    cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
     cargo cbuild --release --prefix=/usr
 }
 
diff --git a/libinput/PKGBUILD b/libinput/PKGBUILD
index d9f3998943..4219404941 100644
--- a/libinput/PKGBUILD
+++ b/libinput/PKGBUILD
@@ -22,6 +22,8 @@ sha256sums=('193bd592298bd9e369c0ef3e5d83a6a9d68ddc4cd3dfc84bbe77920a8d0d57df')
 build() {
   arch-meson $pkgname-$pkgver build \
     -D udev-dir=/usr/lib/udev \
+    -D tests=false \
+    -D debug-gui=false \
     -D documentation=false
 
   # Print config
diff --git a/libjpeg-turbo/PKGBUILD b/libjpeg-turbo/PKGBUILD
index d6086d9ad4..bd969f6e0d 100644
--- a/libjpeg-turbo/PKGBUILD
+++ b/libjpeg-turbo/PKGBUILD
@@ -19,7 +19,7 @@ makedepends=(
   cmake
   ninja
   nasm
-  'java-environment>11'
+  'jdk11-openjdk'
 )
 optdepends=('java-runtime>11: for TurboJPEG Java wrapper')
 provides=(
@@ -45,6 +45,7 @@ build() {
     -D ENABLE_STATIC=OFF
     -D WITH_JAVA=ON
     -D WITH_JPEG8=ON
+    -D WITH_SIMD=OFF
     -G Ninja
     -S $pkgname-$pkgver
     -W no-dev
diff --git a/libjxl/PKGBUILD b/libjxl/PKGBUILD
index 81f977eb81..c491dde5c1 100644
--- a/libjxl/PKGBUILD
+++ b/libjxl/PKGBUILD
@@ -82,7 +82,7 @@ package_libjxl() {
     
     DESTDIR="$pkgdir" cmake --install build
     install -D -m644 libjxl/{LICENSE,PATENTS} -t "${pkgdir}/usr/share/licenses/${pkgname}"
-    mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar
+#    mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar
 }
 
 package_libjxl-doc() {
diff --git a/libksysguard/PKGBUILD b/libksysguard/PKGBUILD
index 9d0cdd7473..ffddf24952 100644
--- a/libksysguard/PKGBUILD
+++ b/libksysguard/PKGBUILD
@@ -10,7 +10,7 @@ pkgdesc='Library to retrieve information on the current status of computer hardw
 arch=(loong64 x86_64)
 url='https://kde.org/plasma-desktop/'
 license=(LGPL)
-depends=(libxres qt5-webengine knewstuff5)
+depends=(libxres knewstuff5)
 makedepends=(extra-cmake-modules kdoctools5 qt5-tools)
 conflicts=('ksysguard<5.21.90')
 groups=(plasma)
diff --git a/libopenraw/PKGBUILD b/libopenraw/PKGBUILD
index e39717b17a..6fc81a9d5c 100644
--- a/libopenraw/PKGBUILD
+++ b/libopenraw/PKGBUILD
@@ -14,14 +14,17 @@ license=('LGPL')
 depends=('gdk-pixbuf2')
 makedepends=('boost' 'libxml2' 'cargo')
 provides=('libopenraw.so' 'libopenrawgnome.so')
-source=("https://libopenraw.freedesktop.org/download/${pkgname}-${pkgver}.tar.bz2"{.asc,})
+source=("https://libopenraw.freedesktop.org/download/${pkgname}-${pkgver}.tar.bz2"{.asc,}
+        libopenraw-fix-build.patch)
 b2sums=('SKIP'
         '4f6ea0db32843c6685ddd5d72332e7eedfa28527d4ef76a8c1aeab56966681cc7a005cc903411a6d33c8638e31757b883fff25d6a5500b9b9395e5cdcf3a56f9')
 validpgpkeys=('6C44DB3E0BF3EAF5B433239A5FEE05E6A56E15A3') # Hubert Figuiere <hub@mozilla.com>
 
 prepare() {
+  cd ${pkgname}-${pkgver}
 # Fix libopenraw dependency in -gnome pc file
-  sed -e 's|libopenraw-0.1|libopenraw-0.3|' -i $pkgname-$pkgver/gnome/libopenraw-gnome-0.3.pc.in
+  sed -e 's|libopenraw-0.1|libopenraw-0.3|' -i gnome/libopenraw-gnome-0.3.pc.in
+  patch -p1 -i $srcdir/libopenraw-fix-build.patch
 }
 
 build() {
diff --git a/libopenraw/libopenraw-fix-build.patch b/libopenraw/libopenraw-fix-build.patch
new file mode 100644
index 0000000000..5d6a1f38b8
--- /dev/null
+++ b/libopenraw/libopenraw-fix-build.patch
@@ -0,0 +1,12 @@
+Index: libopenraw-0.3.0/lib/cr2file.cpp
+===================================================================
+--- libopenraw-0.3.0.orig/lib/cr2file.cpp
++++ libopenraw-0.3.0/lib/cr2file.cpp
+@@ -24,6 +24,7 @@
+ #include <cstdint>
+ #include <vector>
+ #include <memory>
++#include <limits>
+ 
+ #include <libopenraw/cameraids.h>
+ #include <libopenraw/consts.h>
diff --git a/liborcus/PKGBUILD b/liborcus/PKGBUILD
index acc6db3df2..ee32a3cc72 100644
--- a/liborcus/PKGBUILD
+++ b/liborcus/PKGBUILD
@@ -11,9 +11,16 @@ license=('MPL')
 depends=('libixion' 'glibc' 'boost-libs' 'gcc-libs' 'zlib' 'python')
 makedepends=('boost' 'mdds')
 optdepends=('python-requests: in tool bugzilla')
-source=(https://kohei.us/files/orcus/src/${pkgname}-${pkgver}.tar.xz)
+source=(https://kohei.us/files/orcus/src/${pkgname}-${pkgver}.tar.xz
+liborcus-cstdint.patch)
 # https://gitlab.com/orcus/orcus/-/releases
-sha256sums=('69ed26a00d4aaa7688e62a6e003cbc81928521a45e96605e53365aa499719e39')
+sha256sums=('69ed26a00d4aaa7688e62a6e003cbc81928521a45e96605e53365aa499719e39'
+            '8e64a22ffaec4dbd5e7055c58b9cce209038130de01369829f40be55b4a9e29d')
+
+prepare() {
+    cd "${pkgname}"-${pkgver}
+    patch -p1 -i $srcdir/liborcus-cstdint.patch
+}
 
 build() {
     cd "${pkgname}"-${pkgver}
diff --git a/liborcus/liborcus-cstdint.patch b/liborcus/liborcus-cstdint.patch
new file mode 100644
index 0000000000..cd2e132af0
--- /dev/null
+++ b/liborcus/liborcus-cstdint.patch
@@ -0,0 +1,20 @@
+--- liborcus-0.17.2/include/orcus/types.hpp	2023-03-10 19:40:51.890866647 +0800
++++ liborcus-0.17.2/include/orcus/types.hpp	2023-03-10 19:41:05.074252753 +0800
+@@ -12,6 +12,7 @@
+ #include <vector>
+ #include <string>
+ #include <unordered_set>
++#include <cstdint>
+ #include "env.hpp"
+ 
+ namespace orcus {
+--- liborcus-0.17.2/include/orcus/base64.hpp	2021-10-14 10:59:58.000000000 +0800
++++ liborcus-0.17.2/include/orcus/base64.hpp	2023-03-10 19:47:14.812579067 +0800
+@@ -11,6 +11,7 @@
+ #include "env.hpp"
+ #include <vector>
+ #include <string>
++#include <cstdint>
+ 
+ namespace orcus {
+ 
diff --git a/libotr/PKGBUILD b/libotr/PKGBUILD
index 6fed46df2f..0e6e97c7fd 100644
--- a/libotr/PKGBUILD
+++ b/libotr/PKGBUILD
@@ -13,14 +13,17 @@ arch=('loong64' 'x86_64')
 depends=('libgcrypt')
 validpgpkeys=('22DF3305DF56667CE15784FCF24DE08F42C2ABAD') # OTR Dev Team
 source=(https://otr.cypherpunks.ca/${pkgname}-${pkgver}.tar.gz{,.asc}
-        "$pkgname-4.1.1-include-socket.h.patch")
+        "$pkgname-4.1.1-include-socket.h.patch"
+        libotr-fix-build.patch)
 sha256sums=('8b3b182424251067a952fb4e6c7b95a21e644fbb27fbd5f8af2b2ed87ca419f5'
             'SKIP'
-            'cfda75f8c5bba2e735d2b4f1bb90f60b45fa1d554a97fff75cac467f7873ebde')
+            'cfda75f8c5bba2e735d2b4f1bb90f60b45fa1d554a97fff75cac467f7873ebde'
+            '8564fd454e46d7d90919c9b665ea6634868d64f96cfa5e25163d772f9c88d2f4')
 
 prepare() {
     # FS#75450
     patch -d "$pkgname-$pkgver" -N -p 1 -i "${srcdir}/$pkgname-4.1.1-include-socket.h.patch"
+    patch -d "$pkgname-$pkgver" -N -p 1 -i "$srcdir/libotr-fix-build.patch"
 }
 
 build() {
diff --git a/libotr/libotr-fix-build.patch b/libotr/libotr-fix-build.patch
new file mode 100644
index 0000000000..ecb87e7d76
--- /dev/null
+++ b/libotr/libotr-fix-build.patch
@@ -0,0 +1,12 @@
+Index: libotr-4.1.1/tests/regression/client/client.c
+===================================================================
+--- libotr-4.1.1.orig/tests/regression/client/client.c
++++ libotr-4.1.1/tests/regression/client/client.c
+@@ -26,6 +26,7 @@
+ #include <stdlib.h>
+ #include <syscall.h>
+ #include <sys/epoll.h>
++#include <sys/socket.h>
+ #include <sys/types.h>
+ #include <sys/un.h>
+ #include <unistd.h>
diff --git a/libraw/PKGBUILD b/libraw/PKGBUILD
index 74a9258c3c..3ed4e0b269 100644
--- a/libraw/PKGBUILD
+++ b/libraw/PKGBUILD
@@ -23,6 +23,7 @@ sha256sums=('fe7288013206854baf6e4417d0fb63ba4ed7227bf36fff021992671c2dd34b03')
 
 build() {
   cd LibRaw-$pkgver
+  autoreconf
   ./configure --prefix=/usr
   make
 }
diff --git a/libredefender/PKGBUILD b/libredefender/PKGBUILD
index ba7be8da57..f8890cbaa0 100644
--- a/libredefender/PKGBUILD
+++ b/libredefender/PKGBUILD
@@ -18,7 +18,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/libreoffice-fresh/PKGBUILD b/libreoffice-fresh/PKGBUILD
index a02366f5c3..ea641d0b5f 100644
--- a/libreoffice-fresh/PKGBUILD
+++ b/libreoffice-fresh/PKGBUILD
@@ -164,6 +164,8 @@ build() {
         # Build only minimal debug info to reduce size (~1.2GB -> ~225MB)
         CFLAGS=${CFLAGS/-g /-g1 }
         CXXFLAGS=${CXXFLAGS/-g /-g1 }
+        CFLAGS=${CFLAGS/-mlsx /}
+        CXXFLAGS=${CFLAGS/-mlsx /}
 
         # this uses malloc_usable_size, which is incompatible with fortification level 3
         # /usr/lib/libreoffice/program/libskialo.so uses malloc_usable_size
@@ -223,7 +225,7 @@ build() {
 		--with-system-libmwaw \
 		--with-system-libetonyek \
 		--with-system-libfreehand \
-		--without-system-firebird \
+		--disable-firebird-sdbc \
 		--with-system-zxing \
 		--with-system-libtommath \
 		--with-system-libatomic-ops \
diff --git a/libretro-genesis-plus-gx/PKGBUILD b/libretro-genesis-plus-gx/PKGBUILD
index edfe8c8f20..d2ee1123bb 100644
--- a/libretro-genesis-plus-gx/PKGBUILD
+++ b/libretro-genesis-plus-gx/PKGBUILD
@@ -19,6 +19,7 @@ depends=(
 makedepends=(
   clang
   git
+  gettext
 )
 _commit=ed1e5f514d2e314ceacde841da485aa69cccba4a
 source=(libretro-genesis-plus-gx::git+https://github.com/libretro/Genesis-Plus-GX.git#commit=${_commit})
@@ -31,6 +32,10 @@ pkgver() {
 
 build() {
   export CC=clang # FS#71188 - [libretro-genesis-plus-gx] segfaults when compiled with -O2
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   make \
     SHARED_LIBVORBIS=1 \
     SYSTEM_ZLIB=1 \
diff --git a/librustls/PKGBUILD b/librustls/PKGBUILD
index c04ccf032c..50c7f9af36 100644
--- a/librustls/PKGBUILD
+++ b/librustls/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('7eaffd02528155f561742bd712f5454e68fb771b3eb55d63bf0520429ab717f1'
 
 prepare() {
   cd rustls-ffi-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
   patch -Np1 -i ../shared-linking.patch
 }
 
diff --git a/libserialport/PKGBUILD b/libserialport/PKGBUILD
index eea7c30c52..5ad1d20401 100644
--- a/libserialport/PKGBUILD
+++ b/libserialport/PKGBUILD
@@ -13,7 +13,7 @@ depends=('glibc')
 source=("https://sigrok.org/download/source/libserialport/libserialport-$pkgver.tar.gz"
         "diable_termiox.patch::https://sigrok.org/gitweb/?p=libserialport.git;a=patch;h=6f9b03e597ea7200eb616a4e410add3dd1690cb1")
 sha512sums=('7d379d1099173841e6d4df04c8c12dc6a4ebdfa0323ef35da6f3dea20db55d4f6ad81c1f6679f5aac7fe83270176428a817daa8627c336505335a07e06350a85'
-            'b4834dedc393ba23c80c6487b41c69c273f11ce201c72d54668003774226883a9185c295a3ac2cc33d6075dbf38921c67f4d39a160656884c67152f75951822f')
+            '8e64304df07aa163370d157a052b5c101350011ab8474258fe9f1beb0565c5efec9f1ad4918749d39f34423061f7de338280d36fb5f72d99df4d4f5ce8162e8c')
 
 prepare() {
   cd $pkgname-$pkgver
diff --git a/libsmbios/0001-add-support-for-loongarch64.patch b/libsmbios/0001-add-support-for-loongarch64.patch
new file mode 100644
index 0000000000..b4166621ba
--- /dev/null
+++ b/libsmbios/0001-add-support-for-loongarch64.patch
@@ -0,0 +1,672 @@
+From 2e8c11b36537b1319af1c8095909b8882d203a0a Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Mon, 9 May 2022 18:08:30 +0800
+Subject: [PATCH] add support for loongarch64
+
+---
+ src/libsmbios_c/cmos/cmos_linux.c |   6 +
+ src/libsmbios_c/cmos/io.h         | 625 ++++++++++++++++++++++++++++++
+ 2 files changed, 631 insertions(+)
+ create mode 100644 src/libsmbios_c/cmos/io.h
+
+diff --git a/src/libsmbios_c/cmos/cmos_linux.c b/src/libsmbios_c/cmos/cmos_linux.c
+index 11c0871..ffae53c 100644
+--- a/src/libsmbios_c/cmos/cmos_linux.c
++++ b/src/libsmbios_c/cmos/cmos_linux.c
+@@ -22,7 +22,11 @@
+ #include "smbios_c/compat.h"
+ 
+ // system
++#if defined(__i386__) || defined(__x86_64__)
+ #include <sys/io.h>
++#else
++#include "io.h"
++#endif
+ #include <stdlib.h>
+ #include <errno.h>
+ 
+@@ -57,8 +61,10 @@ int __hidden init_cmos_struct(struct cmos_access_obj *m)
+     int retval = 0;
+ 
+     fnprintf("\n");
++#if defined(__i386__) || defined(__x86_64__)
+     if(iopl(3) < 0)
+         goto out_noprivs;
++#endif
+ 
+     m->read_fn = linux_read_fn;
+     m->write_fn = linux_write_fn;
+diff --git a/src/libsmbios_c/cmos/io.h b/src/libsmbios_c/cmos/io.h
+new file mode 100644
+index 0000000..fb0a726
+--- /dev/null
++++ b/src/libsmbios_c/cmos/io.h
+@@ -0,0 +1,625 @@
++/* vi: set sw=4 ts=4 sts=4 expandtab wrap ai: */
++/* Generic I/O port emulation, based on MN10300 code
++ *
++ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
++ * Copyright (C) 2021 Xiaotian Wu <wuxiaotian@loongson.cn>
++ * Written by David Howells (dhowells@redhat.com)
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public Licence
++ * as published by the Free Software Foundation; either version
++ * 2 of the Licence, or (at your option) any later version.
++ */
++
++#ifndef __IO_H__
++#define __IO_H__  1
++
++#include <linux/types.h>
++#include <asm/byteorder.h>
++
++/*
++ * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
++ *
++ * On some architectures memory mapped IO needs to be accessed differently.
++ * On the simple architectures, we just read/write the memory location
++ * directly.
++ */
++#ifndef __raw_readb
++#define __raw_readb __raw_readb
++static inline __u8 __raw_readb(const volatile void *addr)
++{
++	return *(const volatile __u8 *)addr;
++}
++#endif
++#ifndef __raw_readw
++#define __raw_readw __raw_readw
++static inline __u16 __raw_readw(const volatile void *addr)
++{
++	return *(const volatile __u16 *)addr;
++}
++#endif
++#ifndef __raw_readl
++#define __raw_readl __raw_readl
++static inline __u32 __raw_readl(const volatile void  *addr)
++{
++	return *(const volatile __u32  *)addr;
++}
++#endif
++
++#ifdef __LP64__
++#ifndef __raw_readq
++#define __raw_readq __raw_readq
++static inline __u64 __raw_readq(const volatile void  *addr)
++{
++	return *(const volatile __u64  *)addr;
++}
++#endif
++#endif /* __LP64__ */
++
++#ifndef __raw_writeb
++#define __raw_writeb __raw_writeb
++static inline void __raw_writeb(__u8 value, volatile void  *addr)
++{
++	*(volatile __u8  *)addr = value;
++}
++#endif
++#ifndef __raw_writew
++#define __raw_writew __raw_writew
++static inline void __raw_writew(__u16 value, volatile void  *addr)
++{
++	*(volatile __u16  *)addr = value;
++}
++#endif
++#ifndef __raw_writel
++#define __raw_writel __raw_writel
++static inline void __raw_writel(__u32 value, volatile void  *addr)
++{
++	*(volatile __u32  *)addr = value;
++}
++#endif
++
++#ifdef __LP64__
++#ifndef __raw_writeq
++#define __raw_writeq __raw_writeq
++static inline void __raw_writeq(__u64 value, volatile void  *addr)
++{
++	*(volatile __u64  *)addr = value;
++}
++#endif
++#endif /* __LP64__ */
++/*
++ * {read,write}{b,w,l,q}() access little endian memory and return result in
++ * native endianness.
++ */
++#ifndef readb
++#define readb readb
++static inline __u8 readb(const volatile void  *addr)
++{
++	return __raw_readb(addr);
++}
++#endif
++#ifndef readw
++#define readw readw
++static inline __u16 readw(const volatile void  *addr)
++{
++	return __le16_to_cpu(__raw_readw(addr));
++}
++#endif
++#ifndef readl
++#define readl readl
++static inline __u32 readl(const volatile void  *addr)
++{
++	return __le32_to_cpu(__raw_readl(addr));
++}
++#endif
++#ifdef __LP64__
++#ifndef readq
++#define readq readq
++static inline __u64 readq(const volatile void  *addr)
++{
++	return __le64_to_cpu(__raw_readq(addr));
++}
++#endif
++#endif /* __LP64__ */
++#ifndef writeb
++#define writeb writeb
++static inline void writeb(__u8 value, volatile void  *addr)
++{
++	__raw_writeb(value, addr);
++}
++#endif
++#ifndef writew
++#define writew writew
++static inline void writew(__u16 value, volatile void  *addr)
++{
++	__raw_writew(__cpu_to_le16(value), addr);
++}
++#endif
++#ifndef writel
++#define writel writel
++static inline void writel(__u32 value, volatile void  *addr)
++{
++	__raw_writel(__cpu_to_le32(value), addr);
++}
++#endif
++#ifdef __LP64__
++#ifndef writeq
++#define writeq writeq
++static inline void writeq(__u64 value, volatile void  *addr)
++{
++	__raw_writeq(__cpu_to_le64(value), addr);
++}
++#endif
++#endif /* __LP64__ */
++/*
++ * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
++ * are not guaranteed to provide ordering against spinlocks or memory
++ * accesses.
++ */
++#ifndef readb_relaxed
++#define readb_relaxed readb
++#endif
++#ifndef readw_relaxed
++#define readw_relaxed readw
++#endif
++#ifndef readl_relaxed
++#define readl_relaxed readl
++#endif
++#if defined(readq) && !defined(readq_relaxed)
++#define readq_relaxed readq
++#endif
++#ifndef writeb_relaxed
++#define writeb_relaxed writeb
++#endif
++#ifndef writew_relaxed
++#define writew_relaxed writew
++#endif
++#ifndef writel_relaxed
++#define writel_relaxed writel
++#endif
++#if defined(writeq) && !defined(writeq_relaxed)
++#define writeq_relaxed writeq
++#endif
++/*
++ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in
++ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times).
++ */
++#ifndef readsb
++#define readsb readsb
++static inline void readsb(const volatile void  *addr, void *buffer,
++			  unsigned int count)
++{
++	if (count) {
++		__u8 *buf = buffer;
++		do {
++			__u8 x = __raw_readb(addr);
++			*buf++ = x;
++		} while (--count);
++	}
++}
++#endif
++#ifndef readsw
++#define readsw readsw
++static inline void readsw(const volatile void  *addr, void *buffer,
++			  unsigned int count)
++{
++	if (count) {
++		__u16 *buf = buffer;
++		do {
++			__u16 x = __raw_readw(addr);
++			*buf++ = x;
++		} while (--count);
++	}
++}
++#endif
++#ifndef readsl
++#define readsl readsl
++static inline void readsl(const volatile void  *addr, void *buffer,
++			  unsigned int count)
++{
++	if (count) {
++		__u32 *buf = buffer;
++		do {
++			__u32 x = __raw_readl(addr);
++			*buf++ = x;
++		} while (--count);
++	}
++}
++#endif
++#ifdef __LP64__
++#ifndef readsq
++#define readsq readsq
++static inline void readsq(const volatile void  *addr, void *buffer,
++			  unsigned int count)
++{
++	if (count) {
++		__u64 *buf = buffer;
++		do {
++			__u64 x = __raw_readq(addr);
++			*buf++ = x;
++		} while (--count);
++	}
++}
++#endif
++#endif /* __LP64__ */
++#ifndef writesb
++#define writesb writesb
++static inline void writesb(volatile void  *addr, const void *buffer,
++			   unsigned int count)
++{
++	if (count) {
++		const __u8 *buf = buffer;
++		do {
++			__raw_writeb(*buf++, addr);
++		} while (--count);
++	}
++}
++#endif
++#ifndef writesw
++#define writesw writesw
++static inline void writesw(volatile void  *addr, const void *buffer,
++			   unsigned int count)
++{
++	if (count) {
++		const __u16 *buf = buffer;
++		do {
++			__raw_writew(*buf++, addr);
++		} while (--count);
++	}
++}
++#endif
++#ifndef writesl
++#define writesl writesl
++static inline void writesl(volatile void  *addr, const void *buffer,
++			   unsigned int count)
++{
++	if (count) {
++		const __u32 *buf = buffer;
++		do {
++			__raw_writel(*buf++, addr);
++		} while (--count);
++	}
++}
++#endif
++#ifdef __LP64__
++#ifndef writesq
++#define writesq writesq
++static inline void writesq(volatile void  *addr, const void *buffer,
++			   unsigned int count)
++{
++	if (count) {
++		const __u64 *buf = buffer;
++		do {
++			__raw_writeq(*buf++, addr);
++		} while (--count);
++	}
++}
++#endif
++#endif /* __LP64__ */
++
++#ifndef PCI_IOBASE
++#define PCI_IOBASE ((void  *)0)
++#endif
++
++#ifndef IO_SPACE_LIMIT
++#define IO_SPACE_LIMIT 0xffff
++#endif
++/*
++ * {in,out}{b,w,l}() access little endian I/O. {in,out}{b,w,l}_p() can be
++ * implemented on hardware that needs an additional delay for I/O accesses to
++ * take effect.
++ */
++#ifndef inb
++#define inb inb
++static inline __u8 inb(unsigned long addr)
++{
++	return readb(PCI_IOBASE + addr);
++}
++#endif
++#ifndef inw
++#define inw inw
++static inline __u16 inw(unsigned long addr)
++{
++	return readw(PCI_IOBASE + addr);
++}
++#endif
++#ifndef inl
++#define inl inl
++static inline __u32 inl(unsigned long addr)
++{
++	return readl(PCI_IOBASE + addr);
++}
++#endif
++#ifndef outb
++#define outb outb
++static inline void outb(__u8 value, unsigned long addr)
++{
++	writeb(value, PCI_IOBASE + addr);
++}
++#endif
++#ifndef outw
++#define outw outw
++static inline void outw(__u16 value, unsigned long addr)
++{
++	writew(value, PCI_IOBASE + addr);
++}
++#endif
++#ifndef outl
++#define outl outl
++static inline void outl(__u32 value, unsigned long addr)
++{
++	writel(value, PCI_IOBASE + addr);
++}
++#endif
++#ifndef inb_p
++#define inb_p inb_p
++static inline __u8 inb_p(unsigned long addr)
++{
++	return inb(addr);
++}
++#endif
++#ifndef inw_p
++#define inw_p inw_p
++static inline __u16 inw_p(unsigned long addr)
++{
++	return inw(addr);
++}
++#endif
++#ifndef inl_p
++#define inl_p inl_p
++static inline __u32 inl_p(unsigned long addr)
++{
++	return inl(addr);
++}
++#endif
++#ifndef outb_p
++#define outb_p outb_p
++static inline void outb_p(__u8 value, unsigned long addr)
++{
++	outb(value, addr);
++}
++#endif
++#ifndef outw_p
++#define outw_p outw_p
++static inline void outw_p(__u16 value, unsigned long addr)
++{
++	outw(value, addr);
++}
++#endif
++#ifndef outl_p
++#define outl_p outl_p
++static inline void outl_p(__u32 value, unsigned long addr)
++{
++	outl(value, addr);
++}
++#endif
++/*
++ * {in,out}s{b,w,l}{,_p}() are variants of the above that repeatedly access a
++ * single I/O port multiple times.
++ */
++#ifndef insb
++#define insb insb
++static inline void insb(unsigned long addr, void *buffer, unsigned int count)
++{
++	readsb(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef insw
++#define insw insw
++static inline void insw(unsigned long addr, void *buffer, unsigned int count)
++{
++	readsw(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef insl
++#define insl insl
++static inline void insl(unsigned long addr, void *buffer, unsigned int count)
++{
++	readsl(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef outsb
++#define outsb outsb
++static inline void outsb(unsigned long addr, const void *buffer,
++			 unsigned int count)
++{
++	writesb(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef outsw
++#define outsw outsw
++static inline void outsw(unsigned long addr, const void *buffer,
++			 unsigned int count)
++{
++	writesw(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef outsl
++#define outsl outsl
++static inline void outsl(unsigned long addr, const void *buffer,
++			 unsigned int count)
++{
++	writesl(PCI_IOBASE + addr, buffer, count);
++}
++#endif
++#ifndef insb_p
++#define insb_p insb_p
++static inline void insb_p(unsigned long addr, void *buffer, unsigned int count)
++{
++	insb(addr, buffer, count);
++}
++#endif
++#ifndef insw_p
++#define insw_p insw_p
++static inline void insw_p(unsigned long addr, void *buffer, unsigned int count)
++{
++	insw(addr, buffer, count);
++}
++#endif
++#ifndef insl_p
++#define insl_p insl_p
++static inline void insl_p(unsigned long addr, void *buffer, unsigned int count)
++{
++	insl(addr, buffer, count);
++}
++#endif
++#ifndef outsb_p
++#define outsb_p outsb_p
++static inline void outsb_p(unsigned long addr, const void *buffer,
++			   unsigned int count)
++{
++	outsb(addr, buffer, count);
++}
++#endif
++#ifndef outsw_p
++#define outsw_p outsw_p
++static inline void outsw_p(unsigned long addr, const void *buffer,
++			   unsigned int count)
++{
++	outsw(addr, buffer, count);
++}
++#endif
++#ifndef outsl_p
++#define outsl_p outsl_p
++static inline void outsl_p(unsigned long addr, const void *buffer,
++			   unsigned int count)
++{
++	outsl(addr, buffer, count);
++}
++#endif
++
++#ifndef ioread8
++#define ioread8 ioread8
++static inline __u8 ioread8(const volatile void  *addr)
++{
++	return readb(addr);
++}
++#endif
++#ifndef ioread16
++#define ioread16 ioread16
++static inline __u16 ioread16(const volatile void  *addr)
++{
++	return readw(addr);
++}
++#endif
++#ifndef ioread32
++#define ioread32 ioread32
++static inline __u32 ioread32(const volatile void  *addr)
++{
++	return readl(addr);
++}
++#endif
++#ifdef __LP64__
++#ifndef ioread64
++#define ioread64 ioread64
++static inline __u64 ioread64(const volatile void  *addr)
++{
++	return readq(addr);
++}
++#endif
++#endif /* __LP64__ */
++#ifndef iowrite8
++#define iowrite8 iowrite8
++static inline void iowrite8(__u8 value, volatile void  *addr)
++{
++	writeb(value, addr);
++}
++#endif
++#ifndef iowrite16
++#define iowrite16 iowrite16
++static inline void iowrite16(__u16 value, volatile void  *addr)
++{
++	writew(value, addr);
++}
++#endif
++#ifndef iowrite32
++#define iowrite32 iowrite32
++static inline void iowrite32(__u32 value, volatile void  *addr)
++{
++	writel(value, addr);
++}
++#endif
++#ifdef __LP64__
++#ifndef iowrite64
++#define iowrite64 iowrite64
++static inline void iowrite64(__u64 value, volatile void  *addr)
++{
++	writeq(value, addr);
++}
++#endif
++#endif /* __LP64__ */
++
++#ifndef ioread8_rep
++#define ioread8_rep ioread8_rep
++static inline void ioread8_rep(const volatile void  *addr, void *buffer,
++			       unsigned int count)
++{
++	readsb(addr, buffer, count);
++}
++#endif
++#ifndef ioread16_rep
++#define ioread16_rep ioread16_rep
++static inline void ioread16_rep(const volatile void  *addr,
++				void *buffer, unsigned int count)
++{
++	readsw(addr, buffer, count);
++}
++#endif
++#ifndef ioread32_rep
++#define ioread32_rep ioread32_rep
++static inline void ioread32_rep(const volatile void  *addr,
++				void *buffer, unsigned int count)
++{
++	readsl(addr, buffer, count);
++}
++#endif
++#ifdef __LP64__
++#ifndef ioread64_rep
++#define ioread64_rep ioread64_rep
++static inline void ioread64_rep(const volatile void  *addr,
++				void *buffer, unsigned int count)
++{
++	readsq(addr, buffer, count);
++}
++#endif
++#endif /* __LP64__ */
++#ifndef iowrite8_rep
++#define iowrite8_rep iowrite8_rep
++static inline void iowrite8_rep(volatile void  *addr,
++				const void *buffer,
++				unsigned int count)
++{
++	writesb(addr, buffer, count);
++}
++#endif
++#ifndef iowrite16_rep
++#define iowrite16_rep iowrite16_rep
++static inline void iowrite16_rep(volatile void  *addr,
++				 const void *buffer,
++				 unsigned int count)
++{
++	writesw(addr, buffer, count);
++}
++#endif
++#ifndef iowrite32_rep
++#define iowrite32_rep iowrite32_rep
++static inline void iowrite32_rep(volatile void  *addr,
++				 const void *buffer,
++				 unsigned int count)
++{
++	writesl(addr, buffer, count);
++}
++#endif
++#ifdef __LP64__
++#ifndef iowrite64_rep
++#define iowrite64_rep iowrite64_rep
++static inline void iowrite64_rep(volatile void  *addr,
++				 const void *buffer,
++				 unsigned int count)
++{
++	writesq(addr, buffer, count);
++}
++#endif
++#endif /* __LP64__ */
++
++#endif /* __IO_H__ */
+-- 
+2.35.1
+
diff --git a/libsmbios/PKGBUILD b/libsmbios/PKGBUILD
index 91cf49c290..a3c794ac64 100644
--- a/libsmbios/PKGBUILD
+++ b/libsmbios/PKGBUILD
@@ -13,7 +13,8 @@ depends=('gcc-libs')
 makedepends=('libxml2' 'python' 'chrpath' 'doxygen' git cppunit help2man)
 optdepends=('python: tools')
 _commit=5b72244ca0d09c7f228d571ec2d5d20183486c11  # tags/v2.4.3
-source=("git+https://github.com/dell/libsmbios.git#commit=$_commit")
+source=("git+https://github.com/dell/libsmbios.git#commit=$_commit"
+0001-add-support-for-loongarch64.patch)
 sha256sums=('SKIP')
 
 pkgver() {
@@ -23,6 +24,7 @@ pkgver() {
 
 prepare() {
   cd $pkgname
+  patch -p1 -i $srcdir/0001-add-support-for-loongarch64.patch
   ./autogen.sh --no-configure
 }
 
@@ -48,3 +50,5 @@ package() {
   install -m755 -d "${pkgdir}/usr/share/licenses/${pkgname}"
   install -m644 ../COPYING-OSL "${pkgdir}/usr/share/licenses/${pkgname}/"
 }
+sha256sums=('SKIP'
+            '6bf9e8d30891867bd89698dbdb559a8c9d8c9878fd141a3ac1771993dfd6d420')
diff --git a/liburcu/PKGBUILD b/liburcu/PKGBUILD
index d658336d4f..2deb37bff1 100644
--- a/liburcu/PKGBUILD
+++ b/liburcu/PKGBUILD
@@ -10,11 +10,18 @@ arch=('loong64' 'x86_64')
 url="https://lttng.org/urcu"
 license=('LGPL2.1')
 depends=('glibc')
-source=(https://lttng.org/files/urcu/userspace-rcu-${pkgver}.tar.bz2{,.asc})
+source=(https://lttng.org/files/urcu/userspace-rcu-${pkgver}.tar.bz2{,.asc}
+        userspace-rcu-loongarch64.patch)
 sha256sums=('ca43bf261d4d392cff20dfae440836603bf009fce24fdc9b2697d837a2239d4f'
             'SKIP')
 validpgpkeys=('2A0B4ED915F2D3FA45F5B16217280A9781186ACF')
 
+prepare() {
+    cd "$srcdir"/userspace-rcu-${pkgver}
+    patch -p1 -i "$srcdir/userspace-rcu-loongarch64.patch"
+    autoreconf -vfi
+}
+
 build() {
     cd "$srcdir"/userspace-rcu-${pkgver}
     ./configure --prefix=/usr
diff --git a/liburcu/userspace-rcu-loongarch64.patch b/liburcu/userspace-rcu-loongarch64.patch
new file mode 100644
index 0000000000..f57f3106a7
--- /dev/null
+++ b/liburcu/userspace-rcu-loongarch64.patch
@@ -0,0 +1,186 @@
+diff --git a/LICENSE b/LICENSE
+index a06fdcc..acf13d7 100644
+--- a/LICENSE
++++ b/LICENSE
+@@ -44,6 +44,7 @@ MIT/X11 (BSD like) license apply to:
+ compiler.h
+ arch/s390.h
+ uatomic/alpha.h
++uatomic/loongarch.h
+ uatomic/mips.h
+ uatomic/nios2.h
+ uatomic/s390.h
+diff --git a/README.md b/README.md
+index 02b903a..29b3a4a 100644
+--- a/README.md
++++ b/README.md
+@@ -51,6 +51,7 @@ Currently, the following architectures are supported:
+   - hppa/PA-RISC
+   - m68k
+   - RISC-V
++  - LoongArch
+ 
+ Tested on:
+ 
+diff --git a/include/Makefile.am b/include/Makefile.am
+index 3f92cc3..1a562fa 100644
+--- a/include/Makefile.am
++++ b/include/Makefile.am
+@@ -7,6 +7,7 @@ nobase_include_HEADERS = \
+ 	urcu/arch.h \
+ 	urcu/arch/hppa.h \
+ 	urcu/arch/ia64.h \
++	urcu/arch/loongarch.h \
+ 	urcu/arch/m68k.h \
+ 	urcu/arch/mips.h \
+ 	urcu/arch/nios2.h \
+@@ -67,6 +68,7 @@ nobase_include_HEADERS = \
+ 	urcu/uatomic.h \
+ 	urcu/uatomic/hppa.h \
+ 	urcu/uatomic/ia64.h \
++	urcu/uatomic/loongarch.h \
+ 	urcu/uatomic/m68k.h \
+ 	urcu/uatomic/mips.h \
+ 	urcu/uatomic/nios2.h \
+diff --git a/include/urcu/arch.h b/include/urcu/arch.h
+index 2bffdbe..928577e 100644
+--- a/include/urcu/arch.h
++++ b/include/urcu/arch.h
+@@ -49,6 +49,7 @@
+  * URCU_ARCH_HPPA : All HP PA-RISC variants
+  * URCU_ARCH_M68K : All Motorola 68000 variants
+  * URCU_ARCH_RISCV : All RISC-V variants
++ * URCU_ARCH_LOONGARCH : All LoongArch variants
+  */
+ 
+ #if (defined(__INTEL_OFFLOAD) || defined(__TARGET_ARCH_MIC) || defined(__MIC__))
+@@ -167,6 +168,11 @@
+ #define URCU_ARCH_RISCV 1
+ #include <urcu/arch/riscv.h>
+ 
++#elif defined(__loongarch__)
++
++#define URCU_ARCH_LOONGARCH 1
++#include <urcu/arch/loongarch.h>
++
+ #else
+ #error "Cannot build: unrecognized architecture, see <urcu/arch.h>."
+ #endif
+diff --git a/include/urcu/arch/loongarch.h b/include/urcu/arch/loongarch.h
+new file mode 100644
+index 0000000..a6d9fee
+--- /dev/null
++++ b/include/urcu/arch/loongarch.h
+@@ -0,0 +1,49 @@
++#ifndef _URCU_ARCH_LOONGARCH_H
++#define _URCU_ARCH_LOONGARCH_H
++
++/*
++ * arch/loongarch.h: trivial definitions for the LoongArch architecture.
++ *
++ * Copyright (c) 2021 Wang Jing <wangjing@loongson.cn>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include <urcu/compiler.h>
++#include <urcu/config.h>
++#include <urcu/syscall-compat.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#include <stdlib.h>
++#include <sys/time.h>
++
++/*
++ * On Linux, define the membarrier system call number if not yet available in
++ * the system headers.
++ */
++#if (defined(__linux__) && !defined(__NR_membarrier))
++#define __NR_membarrier		283
++#endif
++
++#ifdef __cplusplus
++}
++#endif
++
++#include <urcu/arch/generic.h>
++
++#endif /* _URCU_ARCH_LOONGARCH_H */
+diff --git a/include/urcu/uatomic.h b/include/urcu/uatomic.h
+index 2fb5fd4..e1ff44d 100644
+--- a/include/urcu/uatomic.h
++++ b/include/urcu/uatomic.h
+@@ -51,6 +51,8 @@
+ #include <urcu/uatomic/m68k.h>
+ #elif defined(URCU_ARCH_RISCV)
+ #include <urcu/uatomic/riscv.h>
++#elif defined(URCU_ARCH_LOONGARCH)
++#include <urcu/uatomic/loongarch.h>
+ #else
+ #error "Cannot build: unrecognized architecture, see <urcu/arch.h>."
+ #endif
+diff --git a/include/urcu/uatomic/loongarch.h b/include/urcu/uatomic/loongarch.h
+new file mode 100644
+index 0000000..f41302a
+--- /dev/null
++++ b/include/urcu/uatomic/loongarch.h
+@@ -0,0 +1,44 @@
++#ifndef _URCU_UATOMIC_ARCH_LOONGARCH_H
++#define _URCU_UATOMIC_ARCH_LOONGARCH_H
++
++/*
++ * Atomic exchange operations for the LoongArch architecture. Let GCC do it.
++ *
++ * Copyright (c) 2021 Wang Jing <wangjing@loongson.cn>
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to
++ * deal in the Software without restriction, including without limitation the
++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
++ * sell copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <urcu/compiler.h>
++#include <urcu/system.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define UATOMIC_HAS_ATOMIC_BYTE
++#define UATOMIC_HAS_ATOMIC_SHORT
++
++#ifdef __cplusplus
++}
++#endif
++
++#include <urcu/uatomic/generic.h>
++
++#endif /* _URCU_UATOMIC_ARCH_LOONGARCH_H */
diff --git a/libusbsio/PKGBUILD b/libusbsio/PKGBUILD
index 06b1a7ba3a..0f3f63d124 100644
--- a/libusbsio/PKGBUILD
+++ b/libusbsio/PKGBUILD
@@ -50,7 +50,7 @@ package_libusbsio() {
   )
 
   cd $pkgbase-$pkgver-src
-  install -vDm 755 bin/linux_$CARCH/$pkgbase.so -t "$pkgdir/usr/lib/"
+  install -vDm 755 bin/linux_`uname -m`/$pkgbase.so -t "$pkgdir/usr/lib/"
   install -vDm 644 license/*.txt -t "$pkgdir/usr/share/licenses/$pkgname/"
 }
 
@@ -67,8 +67,8 @@ package_python-libusbsio() {
   (
     cd python/dist/$pkgbase-$pkgver
     python -m installer --destdir="$pkgdir" dist/*.whl
-    install -vdm 755 "$pkgdir/$_site_packages/$pkgbase/bin/linux_$CARCH"
-    ln -fsv /usr/lib/$pkgbase.so "$pkgdir/$_site_packages/$pkgbase/bin/linux_$CARCH/$pkgbase.so"
+    install -vdm 755 "$pkgdir/$_site_packages/$pkgbase/bin/linux_`uname -m`"
+    ln -fsv /usr/lib/$pkgbase.so "$pkgdir/$_site_packages/$pkgbase/bin/linux_`uname -m`/$pkgbase.so"
   )
   install -vDm 644 license/BSD-3-clause.txt -t "$pkgdir/usr/share/licenses/$pkgname/"
   install -vDm 644 python/README.md -t "$pkgdir/usr/share/doc/$pkgname/"
diff --git a/libvirt/PKGBUILD b/libvirt/PKGBUILD
index e16c04e57f..27232c604f 100644
--- a/libvirt/PKGBUILD
+++ b/libvirt/PKGBUILD
@@ -83,13 +83,15 @@ backup=(
 )
 source=(
   "https://libvirt.org/sources/$pkgname-$pkgver.tar.xz"{,.asc}
+  libvirt-loongarch.patch
 )
 sha256sums=('8ba2e72ec8bdd2418554a1474c42c35704c30174b7611eaf9a16544b71bcf00a'
-            'SKIP')
+            'f0562941282b157e2ebba9d203c33f4f9c0f3f93562129448f7de6e5df0575fc')
 validpgpkeys=('453B65310595562855471199CA68BE8010084C9C') # Jiří Denemark <jdenemar@redhat.com>
 
 prepare() {
   cd "$pkgname-$pkgver"
+  patch -Np1 -i ../libvirt-loongarch.patch
 
   sed -i 's|/sysconfig/|/conf.d/|g' \
     src/remote/libvirtd.service.in \
@@ -185,3 +187,12 @@ package_libvirt-storage-iscsi-direct() {
 
   install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_iscsi-direct.so"
 }
+
+package_libvirt-storage-rbd() {
+  pkgdesc="Libvirt RBD storage backend"
+  depends=("libvirt=$pkgver")
+  optdepends=()
+  backup=()
+
+  install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_rbd.so"
+}
diff --git a/libvirt/libvirt-loongarch.patch b/libvirt/libvirt-loongarch.patch
new file mode 100644
index 0000000000..169949dbfa
--- /dev/null
+++ b/libvirt/libvirt-loongarch.patch
@@ -0,0 +1,407 @@
+diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
+index 26eb538077..04f032b3ab 100644
+--- a/src/conf/schemas/basictypes.rng
++++ b/src/conf/schemas/basictypes.rng
+@@ -470,6 +470,7 @@
+       <value>x86_64</value>
+       <value>xtensa</value>
+       <value>xtensaeb</value>
++      <value>loongarch64</value>
+     </choice>
+   </define>
+ 
+diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c
+index bc43aa4e93..dd677ba269 100644
+--- a/src/cpu/cpu.c
++++ b/src/cpu/cpu.c
+@@ -28,6 +28,7 @@
+ #include "cpu_s390.h"
+ #include "cpu_arm.h"
+ #include "cpu_riscv64.h"
++#include "cpu_loongarch64.h"
+ #include "capabilities.h"
+ 
+ 
+@@ -41,6 +42,7 @@ static struct cpuArchDriver *drivers[] = {
+     &cpuDriverS390,
+     &cpuDriverArm,
+     &cpuDriverRiscv64,
++    &cpuDriverLoongarch64,
+ };
+ 
+ 
+diff --git a/src/cpu/cpu_loongarch64.c b/src/cpu/cpu_loongarch64.c
+new file mode 100644
+index 0000000000..cf026258f9
+--- /dev/null
++++ b/src/cpu/cpu_loongarch64.c
+@@ -0,0 +1,74 @@
++/*
++ * cpu_loongarch64.c: CPU driver for loongarch64 CPUs
++ *
++ * Copyright (c) 2023, XinmuTouhouKyou
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library.  If not, see
++ * <http://www.gnu.org/licenses/>.
++ */
++
++#include <config.h>
++#include "cpu.h"
++#include "cpu_loongarch64.h"
++
++
++static const virArch archs[] = { VIR_ARCH_LOONGARCH64 };
++static virCPUCompareResult
++virCPULoongarch64Compare(virCPUDef *host G_GNUC_UNUSED,
++                  virCPUDef *cpu G_GNUC_UNUSED,
++                  bool failMessages G_GNUC_UNUSED)
++{
++    /* loongarch64 relies on QEMU to perform all runability checking. Return
++     * VIR_CPU_COMPARE_IDENTICAL to bypass Libvirt checking.
++     */
++    return VIR_CPU_COMPARE_IDENTICAL;
++}
++
++static int
++virCPULoongarch64ValidateFeatures(virCPUDef *cpu G_GNUC_UNUSED)
++{
++    return 0;
++}
++
++static int
++virCPULoongarch64Update(virCPUDef *guest,
++                  const virCPUDef *host G_GNUC_UNUSED,
++                  bool relative G_GNUC_UNUSED)
++{
++    g_autoptr(virCPUDef) updated = virCPUDefCopyWithoutModel(guest);
++
++    if (!relative || guest->mode != VIR_CPU_MODE_HOST_MODEL)
++        return 0;
++
++    updated->mode = VIR_CPU_MODE_CUSTOM;
++    virCPUDefCopyModel(updated, host, true);
++
++    virCPUDefStealModel(guest, updated, false);
++    guest->mode = VIR_CPU_MODE_CUSTOM;
++    guest->match = VIR_CPU_MATCH_EXACT;
++
++    return 0;
++}
++
++struct cpuArchDriver cpuDriverLoongarch64 = {
++    .name = "loongarch64",
++    .arch = archs,
++    .narch = G_N_ELEMENTS(archs),
++    .compare    = virCPULoongarch64Compare,
++    .decode     = NULL,
++    .encode     = NULL,
++    .baseline   = NULL,
++    .update     = virCPULoongarch64Update,
++    .validateFeatures = virCPULoongarch64ValidateFeatures,
++};
+diff --git a/src/cpu/cpu_loongarch64.h b/src/cpu/cpu_loongarch64.h
+new file mode 100644
+index 0000000000..a5f84b5637
+--- /dev/null
++++ b/src/cpu/cpu_loongarch64.h
+@@ -0,0 +1,25 @@
++/*
++ * cpu_loongarch64.h: CPU driver for loongarch64 CPUs
++ *
++ * Copyright (c) 2023, XinmuTouhouKyou
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library.  If not, see
++ * <http://www.gnu.org/licenses/>.
++ */
++
++#pragma once
++
++#include "cpu.h"
++
++extern struct cpuArchDriver cpuDriverLoongarch64;
+diff --git a/src/cpu/meson.build b/src/cpu/meson.build
+index 55396903b9..09c02ef9d9 100644
+--- a/src/cpu/meson.build
++++ b/src/cpu/meson.build
+@@ -6,6 +6,7 @@ cpu_sources = [
+   'cpu_riscv64.c',
+   'cpu_s390.c',
+   'cpu_x86.c',
++  'cpu_loongarch64.c',
+ ]
+ 
+ cpu_lib = static_library(
+diff --git a/src/cpu_map/index.xml b/src/cpu_map/index.xml
+index d2c5af5797..92948cd213 100644
+--- a/src/cpu_map/index.xml
++++ b/src/cpu_map/index.xml
+@@ -119,4 +119,8 @@
+     <include filename='arm_FT-2000plus.xml'/>
+     <include filename='arm_Tengyun-S2500.xml'/>
+   </arch>
++  <arch name='loongarch64'>
++    <include filename='loongarch64_vendors.xml'/>
++    <include filename='loongarch64_la464.xml'/>
++  </arch>
+ </cpus>
+diff --git a/src/cpu_map/loongarch64_la464.xml b/src/cpu_map/loongarch64_la464.xml
+new file mode 100644
+index 0000000000..3d4f34ae7a
+--- /dev/null
++++ b/src/cpu_map/loongarch64_la464.xml
+@@ -0,0 +1,5 @@
++<cpus>
++  <model name='la464'>
++    <decode host='on' guest='on'/>
++  </model>
++</cpus>
+diff --git a/src/cpu_map/loongarch64_vendors.xml b/src/cpu_map/loongarch64_vendors.xml
+new file mode 100644
+index 0000000000..64d49a9662
+--- /dev/null
++++ b/src/cpu_map/loongarch64_vendors.xml
+@@ -0,0 +1,3 @@
++<cpus>
++  <vendor name='Loongson'/>
++</cpus>
+\ No newline at end of file
+diff --git a/src/cpu_map/meson.build b/src/cpu_map/meson.build
+index ae5293e85f..6bce65f0fb 100644
+--- a/src/cpu_map/meson.build
++++ b/src/cpu_map/meson.build
+@@ -84,6 +84,8 @@ cpumap_data = [
+   'x86_vendors.xml',
+   'x86_Westmere-IBRS.xml',
+   'x86_Westmere.xml',
++  'loongarch64_vendors.xml',
++  'loongarch64_la464.xml',
+ ]
+ 
+ install_data(cpumap_data, install_dir: pkgdatadir / 'cpu_map')
+diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
+index 83119e871a..6c6eae0b66 100644
+--- a/src/qemu/qemu_capabilities.c
++++ b/src/qemu/qemu_capabilities.c
+@@ -1138,7 +1138,7 @@ virQEMUCapsInitGuestFromBinary(virCaps *caps,
+                                       NULL, NULL, 0, NULL);
+     }
+ 
+-    if ((ARCH_IS_X86(guestarch) || guestarch == VIR_ARCH_AARCH64))
++    if ((ARCH_IS_X86(guestarch) || guestarch == VIR_ARCH_AARCH64 ||  guestarch == VIR_ARCH_LOONGARCH64))
+         virCapabilitiesAddGuestFeatureWithToggle(guest, VIR_CAPS_GUEST_FEATURE_TYPE_ACPI,
+                                                  true, true);
+ 
+@@ -2697,6 +2697,10 @@ static const char *preferredMachines[] =
+ 
+     "sim", /* VIR_ARCH_XTENSA */
+     "sim", /* VIR_ARCH_XTENSAEB */
++
++
++//  "virt", /* VIR_ARCH_LOONGARCH32 */
++    "virt", /* VIR_ARCH_LOONGARCH64 */
+ };
+ G_STATIC_ASSERT(G_N_ELEMENTS(preferredMachines) == VIR_ARCH_LAST);
+ 
+diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
+index 413f67577e..29a78127ea 100644
+--- a/src/qemu/qemu_domain.c
++++ b/src/qemu/qemu_domain.c
+@@ -4221,7 +4221,12 @@ qemuDomainDefAddDefaultDevices(virQEMUDriver *driver,
+         if (qemuDomainIsMipsMalta(def))
+             addPCIRoot = true;
+         break;
+-
++    case VIR_ARCH_LOONGARCH64:
++        if (qemuDomainIsLoongarch64Virt(def)) {
++            addPCIRoot = true;
++            addDefaultUSB = true;
++        }
++        break;
+     case VIR_ARCH_ARMV7B:
+     case VIR_ARCH_CRIS:
+     case VIR_ARCH_ITANIUM:
+@@ -8901,6 +8906,20 @@ qemuDomainMachineIsRISCVVirt(const char *machine,
+     return false;
+ }
+ 
++static bool
++qemuDomainMachineIsLoongarch64Virt(const char *machine,
++                             const virArch arch)
++{
++    if (arch!=VIR_ARCH_LOONGARCH64)
++        return false;
++
++    if (STREQ(machine, "virt") ||
++        STRPREFIX(machine, "virt-")) {
++        return true;
++    }
++
++    return false;
++}
+ 
+ /* You should normally avoid this function and use
+  * qemuDomainIsPSeries() instead. */
+@@ -8998,6 +9017,12 @@ qemuDomainIsRISCVVirt(const virDomainDef *def)
+     return qemuDomainMachineIsRISCVVirt(def->os.machine, def->os.arch);
+ }
+ 
++bool
++qemuDomainIsLoongarch64Virt(const virDomainDef *def)
++{
++    return qemuDomainMachineIsLoongarch64Virt(def->os.machine, def->os.arch);
++}
++
+ 
+ bool
+ qemuDomainIsPSeries(const virDomainDef *def)
+diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
+index 1e56e50672..cb444ff06a 100644
+--- a/src/qemu/qemu_domain.h
++++ b/src/qemu/qemu_domain.h
+@@ -827,6 +827,7 @@ bool qemuDomainIsS390CCW(const virDomainDef *def);
+ bool qemuDomainIsARMVirt(const virDomainDef *def);
+ bool qemuDomainIsRISCVVirt(const virDomainDef *def);
+ bool qemuDomainIsPSeries(const virDomainDef *def);
++bool qemuDomainIsLoongarch64Virt(const virDomainDef *def);
+ bool qemuDomainIsMipsMalta(const virDomainDef *def);
+ bool qemuDomainHasPCIRoot(const virDomainDef *def);
+ bool qemuDomainHasPCIeRoot(const virDomainDef *def);
+diff --git a/src/util/virarch.c b/src/util/virarch.c
+index 01e520de73..9b981b6ced 100644
+--- a/src/util/virarch.c
++++ b/src/util/virarch.c
+@@ -83,6 +83,10 @@ static const struct virArchData {
+ 
+     { "xtensa",       32, VIR_ARCH_LITTLE_ENDIAN },
+     { "xtensaeb",     32, VIR_ARCH_BIG_ENDIAN },
++
++//  { "loong32",      32, VIR_ARCH_LITTLE_ENDIAN },
++    { "loongarch64",  64, VIR_ARCH_LITTLE_ENDIAN },
++
+ };
+ 
+ G_STATIC_ASSERT(G_N_ELEMENTS(virArchData) == VIR_ARCH_LAST);
+diff --git a/src/util/virarch.h b/src/util/virarch.h
+index 747f77c48e..3e48102f98 100644
+--- a/src/util/virarch.h
++++ b/src/util/virarch.h
+@@ -69,6 +69,9 @@ typedef enum {
+     VIR_ARCH_XTENSA,       /* XTensa      32 LE https://en.wikipedia.org/wiki/Xtensa#Processor_Cores */
+     VIR_ARCH_XTENSAEB,     /* XTensa      32 BE https://en.wikipedia.org/wiki/Xtensa#Processor_Cores */
+ 
++//  VIR_ARCH_LOONGARCH32,      /* LoongArch   32 LE https://en.wikipedia.org/wiki/LoongArch */
++    VIR_ARCH_LOONGARCH64,  /* LoongArch   64 LE https://en.wikipedia.org/wiki/LoongArch */
++
+     VIR_ARCH_LAST,
+ } virArch;
+ 
+diff --git a/src/util/virsysinfo.c b/src/util/virsysinfo.c
+index 36a861c53f..8a34c0479d 100644
+--- a/src/util/virsysinfo.c
++++ b/src/util/virsysinfo.c
+@@ -1228,6 +1228,74 @@ virSysinfoReadDMI(void)
+     return g_steal_pointer(&ret);
+ }
+ 
++static int
++virSysinfoParseLoongarch64Processor(const char *base, virSysinfoDef *ret)
++{
++    const char *cur;
++    char *eol, *tmp_base;
++    virSysinfoProcessorDef *processor;
++    char *processor_type = NULL;
++
++    if (!(tmp_base = strstr(base, "Model Name")) &&
++        !(tmp_base = strstr(base, "processor")))
++        return 0;
++
++    eol = strchr(tmp_base, '\n');
++    cur = strchr(tmp_base, ':') + 1;
++    virSkipSpaces(&cur);
++    if (eol)
++        processor_type = g_strndup(cur, eol - cur);
++
++    while ((tmp_base = strstr(base, "processor")) != NULL) {
++        base = tmp_base;
++        eol = strchr(base, '\n');
++        cur = strchr(base, ':') + 1;
++
++        VIR_EXPAND_N(ret->processor, ret->nprocessor, 1);
++        processor = &ret->processor[ret->nprocessor - 1];
++
++        virSkipSpaces(&cur);
++        if (eol)
++            processor->processor_socket_destination = g_strndup(cur,
++                                                                eol - cur);
++
++        processor->processor_type = g_strdup(processor_type);
++
++        base = cur;
++    }
++
++    VIR_FREE(processor_type);
++    return 0;
++}
++virSysinfoDef *
++virSysinfoReadLoongArch64(void);
++virSysinfoDef *
++virSysinfoReadLoongArch64(void){
++    g_autoptr(virSysinfoDef) ret = NULL;
++    g_autofree char *outbuf = NULL;
++
++    if ((ret = virSysinfoReadDMI())) {
++        if (!virSysinfoDefIsEmpty(ret))
++            return g_steal_pointer(&ret);
++        virSysinfoDefFree(ret);
++    }
++
++    virResetLastError();
++    ret = g_new0(virSysinfoDef, 1);
++
++    if (virFileReadAll(CPUINFO, CPUINFO_FILE_LEN, &outbuf) < 0) {
++        virReportError(VIR_ERR_INTERNAL_ERROR,
++                       _("Failed to open %1$s"), CPUINFO);
++        return NULL;
++    }
++
++    ret->nprocessor = 0;
++    ret->processor = NULL;
++    if (virSysinfoParseLoongarch64Processor(outbuf, ret) < 0)
++        return NULL;
++
++    return g_steal_pointer(&ret);
++}
+ 
+ /**
+  * virSysinfoRead:
+@@ -1250,6 +1318,8 @@ virSysinfoRead(void)
+      defined(__i386__) || \
+      defined(__amd64__))
+     return virSysinfoReadDMI();
++#elif defined(__loongarch64)
++    return virSysinfoReadLoongArch64();
+ #else /* WIN32 || not supported arch */
+     /*
+      * this can probably be extracted from Windows using API or registry
diff --git a/libvisual/PKGBUILD b/libvisual/PKGBUILD
index 8568f7e8ad..9a6f1e5fec 100644
--- a/libvisual/PKGBUILD
+++ b/libvisual/PKGBUILD
@@ -13,6 +13,8 @@ sha256sums=('63085fd9835c42c9399ea6bb13a7ebd4b1547ace75c4595ce8e9759512bd998a')
 
 build() {
   cd ${pkgname}-${pkgver}
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   ./configure --prefix=/usr
   make
 }
diff --git a/libvpx/PKGBUILD b/libvpx/PKGBUILD
index 197be672f5..42bef022d0 100644
--- a/libvpx/PKGBUILD
+++ b/libvpx/PKGBUILD
@@ -41,6 +41,8 @@ build() {
     --enable-shared \
     --enable-vp8 \
     --enable-vp9 \
+    --disable-lsx \
+    --disable-lasx \
     --enable-vp9-highbitdepth \
     --enable-vp9-temporal-denoising
   make
diff --git a/libyuv/0001-fix-build-error.patch b/libyuv/0001-fix-build-error.patch
new file mode 100644
index 0000000000..7266303555
--- /dev/null
+++ b/libyuv/0001-fix-build-error.patch
@@ -0,0 +1,113 @@
+From 5f2390c7b4d114d591b7880e6020a79d09957f3e Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Mon, 1 Jan 2024 16:46:39 +0800
+Subject: [PATCH] fix build error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+/build/libyuv/src/libyuv/source/row_lsx.cc: In function ‘void libyuv::ARGB1555ToUVRow_LSX(const uint8_t*, int, uint8_t*, uint8_t*, int)’:
+/build/libyuv/src/libyuv/source/row_lsx.cc:410:25: error: narrowing conversion of ‘9259542123273814144’ from ‘long unsigned int’ to ‘long long int’ [-Wnarrowing]
+  410 |   __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+      |                         ^~~~~~~~~~~~~~~~~~
+---
+ source/row_lsx.cc | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/source/row_lsx.cc b/source/row_lsx.cc
+index 9c1e16f2..c7b30051 100644
+--- a/source/row_lsx.cc
++++ b/source/row_lsx.cc
+@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
+@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
+@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+   __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
+   __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
+   __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
+@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
+@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
+@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
+   __m128i const_38 = __lsx_vldi(0x413);
+   __m128i const_94 = __lsx_vldi(0x42F);
+   __m128i const_18 = __lsx_vldi(0x409);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
+@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
+   __m128i const_21 = __lsx_vldi(0x415);
+   __m128i const_53 = __lsx_vldi(0x435);
+   __m128i const_10 = __lsx_vldi(0x40A);
+-  __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
++  __m128i const_8080 = {static_cast<long long>(0x8080808080808080), static_cast<long long>(0x8080808080808080)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
+   __m128i const_256 = __lsx_vldi(0x500);
+   __m128i zero = __lsx_vldi(0);
+   __m128i alpha = __lsx_vldi(0xFF);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
+@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
+   __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
+   __m128i vec_scale = __lsx_vreplgr2vr_w(scale);
+   __m128i zero = __lsx_vldi(0);
+-  __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
++  __m128i control = {static_cast<long long>(0xFF000000FF000000), static_cast<long long>(0xFF000000FF000000)};
+ 
+   for (x = 0; x < len; x++) {
+     DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
+-- 
+2.42.0
+
diff --git a/libyuv/PKGBUILD b/libyuv/PKGBUILD
index 9d25df1832..826d7d3ec5 100644
--- a/libyuv/PKGBUILD
+++ b/libyuv/PKGBUILD
@@ -11,8 +11,10 @@ depends=(gcc-libs libjpeg)
 makedepends=(cmake git)
 # Version required by libavif
 _commit=464c51a0353c71f08fe45f683d6a97a638d47833
-source=(git+${url}#commit=${_commit})
-sha512sums=(SKIP)
+source=(git+${url}#commit=${_commit}
+    0001-fix-build-error.patch)
+sha512sums=('SKIP'
+            '74e4503371ea7fbb054b18990dccc708d97612c03821d6ca3f19bf26ca8bbabfb7978f61b6a738b17c820bb5fd278a746dba5f0e9ae8e0d2a8833f6d5b7feaea')
 
 pkgver() {
   cd ${pkgname}
@@ -21,6 +23,7 @@ pkgver() {
 
 prepare() {
   sed -i 's|yuvconvert ${JPEG_LIBRARY}|${ly_lib_shared} ${JPEG_LIBRARY}|' ${pkgname}/CMakeLists.txt
+  patch -d ${pkgname} -p1 -i $srcdir/0001-fix-build-error.patch
 }
 
 build() {
diff --git a/link-grammar/PKGBUILD b/link-grammar/PKGBUILD
index 5ae5123146..4731f80765 100644
--- a/link-grammar/PKGBUILD
+++ b/link-grammar/PKGBUILD
@@ -11,11 +11,10 @@ license=('LGPL')
 depends=('hunspell' 'sqlite' 'libedit')
 makedepends=('python' 'swig' 'apache-ant' 'java-environment=11')
 options=('!makeflags')
-source=(https://www.abisource.com/downloads/${pkgname}/${pkgver}/${pkgname}-${pkgver}.tar.gz{,.asc})
+source=(https://github.com/opencog/link-grammar/archive/refs/tags/${pkgname}-${pkgver}.tar.gz)
 validpgpkeys=('6407453C98BECC19ADB03D82EB6AA534E0C0651C'
               '8305252160000B5E89843F5464A99A8201045933') # Linas Vepstas <linasvepstas@gmail.com>
-sha256sums=('ef89a322f943607546a667ecb4fdf31d56cab5ed11c856873ac72fa3d352f7a3'
-            'SKIP')
+sha256sums=('e0cd1b94cc9af20e5bd9a04604a714e11efe21ae5e453b639cdac050b6ac4150')
 
 build() {
   cd ${pkgname}-${pkgver}
diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD
index 469b3c90a2..1c16ec51c9 100644
--- a/linux-hardened/PKGBUILD
+++ b/linux-hardened/PKGBUILD
@@ -4,8 +4,10 @@
 # Contributor: Thomas Baechler <thomas@archlinux.org>
 
 pkgbase=linux-hardened
-pkgver=6.7.2.hardened1
-pkgrel=2
+_ver=6.7.0
+_rdate=20231226
+pkgver=${_ver}.hardened1
+pkgrel=1
 pkgdesc='Security-Hardened Linux'
 url='https://github.com/anthraxx/linux-hardened'
 arch=(loong64 x86_64)
@@ -31,9 +33,9 @@ options=('!strip')
 _srcname=linux-${pkgver%.*}
 _srctag=${pkgver%.*}-${pkgver##*.}
 source=(
-  https://cdn.kernel.org/pub/linux/kernel/v${pkgver%%.*}.x/${_srcname}.tar.{xz,sign}
-  ${url}/releases/download/${_srctag}/${pkgbase}-${_srctag}.patch{,.sig}
-  config  # the main kernel config file
+  https://github.com/loongarchlinux/linux/releases/download/v${_rdate}/linux-${_ver}-${_rdate}.tar.xz
+  config         # the main kernel config file
+  config.la64
 )
 validpgpkeys=(
   ABAF11C65A2970B130ABE3C479BE3E4300411886  # Linus Torvalds
@@ -41,16 +43,12 @@ validpgpkeys=(
   E240B57E2C4630BA768E2F26FC1B547C8D8172C8  # Levente Polyak
 )
 # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc
-sha256sums=('c34de41baa29c475c0834e88a3171e255ff86cd32d83c6bffc2b797e60bfa671'
-            'SKIP'
-            'f1a21eab19a3685f6cabaef93c520fa0061202b978ba64d539af0de690c375d6'
-            'SKIP'
-            '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb')
-b2sums=('4c1f480de0c1458aa67379cd02d35708f63850adb84a85061088de1f82b5d084bc7cf7da459a3f1e415544351d1f36a9a832277240774ae461cdde11687cbadd'
-        'SKIP'
-        'c97573edb0e765b35c9ebc1a091889aa924be997d57a8acf8a9221edd36ab8b4823c6521dc31838c4c4ec9e23bf39a5c462bc7ab99baf81dddf8c92fd8cfe10f'
-        'SKIP'
-        'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025')
+sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674'
+            '46a1e0e43247d09c5ae29cfa7a79e272767a49b90c5761c2e4a5656a4ced6cf2'
+            'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85')
+b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815'
+        '914edb986d34ddaa20738ec6d4f0d68b2500ee4662be3f58c1f62ecfa87f3ab88205acf91ec7d03d2f925880d538d0b1716183add857d2bff533e5a0d0596ba5'
+        'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6')
 
 export KBUILD_BUILD_HOST=archlinux
 export KBUILD_BUILD_USER=$pkgbase
@@ -74,9 +72,9 @@ prepare() {
   done
 
   echo "Setting config..."
-  cp ../config .config
+  cp ../config.la64 .config
   make olddefconfig
-  diff -u ../config .config || :
+  diff -u ../config.la64 .config || :
 
   make -s kernelrelease > version
   echo "Prepared $pkgbase version $(<version)"
@@ -102,7 +100,6 @@ _package() {
   optdepends=(
     'wireless-regdb: to set the correct wireless channels of your country'
     'linux-firmware: firmware images needed for some devices'
-    'usbctl: deny_new_usb control'
   )
   provides=(
     KSMBD-MODULE
@@ -142,19 +139,30 @@ _package-headers() {
   install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
     localversion.* version vmlinux
   install -Dt "$builddir/kernel" -m644 kernel/Makefile
-  install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
+  if [ $CARCH == x86_64 ];then
+    install -Dt "$builddir/arch/x86" -m644 arch/x86/Makefile
+  elif [ $CARCH == loong64 ];then
+    install -Dt "$builddir/arch/loongarch" -m644 arch/loongarch/Makefile
+  fi
+
   cp -t "$builddir" -a scripts
 
   # required when STACK_VALIDATION is enabled
-  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
+#  install -Dt "$builddir/tools/objtool" tools/objtool/objtool
 
   # required when DEBUG_INFO_BTF_MODULES is enabled
   # install -Dt "$builddir/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids
 
   echo "Installing headers..."
   cp -t "$builddir" -a include
-  cp -t "$builddir/arch/x86" -a arch/x86/include
-  install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
+  if [ $CARCH == x86_64 ];then
+    cp -t "$builddir/arch/x86" -a arch/x86/include
+    install -Dt "$builddir/arch/x86/kernel" -m644 arch/x86/kernel/asm-offsets.s
+  elif [ $CARCH == loong64 ];then
+    cp -t "$builddir/arch/loongarch" -a arch/loongarch/include
+    install -Dt "$builddir/arch/loongarch/kernel" -m644 arch/loongarch/kernel/asm-offsets.s
+  fi
+
 
   install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
   install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
@@ -176,7 +184,7 @@ _package-headers() {
   echo "Removing unneeded architectures..."
   local arch
   for arch in "$builddir"/arch/*/; do
-    [[ $arch = */x86/ ]] && continue
+    [[ $arch = */loongarch/ ]] && continue
     echo "Removing $(basename "$arch")"
     rm -r "$arch"
   done
diff --git a/linux-hardened/config.la64 b/linux-hardened/config.la64
new file mode 100644
index 0000000000..3b5aeb6747
--- /dev/null
+++ b/linux-hardened/config.la64
@@ -0,0 +1,8539 @@
+#
+# Automatically generated file; DO NOT EDIT.
+# Linux/loongarch 6.7.0-rc7 Kernel Configuration
+#
+CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.1 20230906"
+CONFIG_CC_IS_GCC=y
+CONFIG_GCC_VERSION=130201
+CONFIG_CLANG_VERSION=0
+CONFIG_AS_IS_GNU=y
+CONFIG_AS_VERSION=24100
+CONFIG_LD_IS_BFD=y
+CONFIG_LD_VERSION=24100
+CONFIG_LLD_VERSION=0
+CONFIG_CC_CAN_LINK=y
+CONFIG_CC_CAN_LINK_STATIC=y
+CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
+CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
+CONFIG_CC_HAS_ASM_INLINE=y
+CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
+CONFIG_PAHOLE_VERSION=125
+CONFIG_IRQ_WORK=y
+CONFIG_BUILDTIME_TABLE_SORT=y
+
+#
+# General setup
+#
+CONFIG_INIT_ENV_ARG_LIMIT=32
+# CONFIG_COMPILE_TEST is not set
+# CONFIG_WERROR is not set
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_BUILD_SALT=""
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KERNEL_XZ=y
+CONFIG_HAVE_KERNEL_LZO=y
+CONFIG_HAVE_KERNEL_LZ4=y
+CONFIG_HAVE_KERNEL_ZSTD=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_LZMA is not set
+# CONFIG_KERNEL_XZ is not set
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
+CONFIG_KERNEL_ZSTD=y
+CONFIG_DEFAULT_INIT=""
+CONFIG_DEFAULT_HOSTNAME="archlinux"
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
+# CONFIG_WATCH_QUEUE is not set
+CONFIG_CROSS_MEMORY_ATTACH=y
+# CONFIG_USELIB is not set
+CONFIG_AUDIT=y
+CONFIG_HAVE_ARCH_AUDITSYSCALL=y
+CONFIG_AUDITSYSCALL=y
+
+#
+# IRQ subsystem
+#
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_IRQ_SHOW=y
+CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
+CONFIG_GENERIC_IRQ_MIGRATION=y
+CONFIG_GENERIC_IRQ_CHIP=y
+CONFIG_IRQ_DOMAIN=y
+CONFIG_IRQ_DOMAIN_HIERARCHY=y
+CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS=y
+CONFIG_GENERIC_MSI_IRQ=y
+CONFIG_IRQ_FORCED_THREADING=y
+CONFIG_SPARSE_IRQ=y
+# CONFIG_GENERIC_IRQ_DEBUGFS is not set
+# end of IRQ subsystem
+
+CONFIG_GENERIC_IRQ_MULTI_HANDLER=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_CONTEXT_TRACKING=y
+CONFIG_CONTEXT_TRACKING_IDLE=y
+
+#
+# Timers subsystem
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ_COMMON=y
+# CONFIG_HZ_PERIODIC is not set
+CONFIG_NO_HZ_IDLE=y
+# CONFIG_NO_HZ_FULL is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+# end of Timers subsystem
+
+CONFIG_BPF=y
+CONFIG_HAVE_EBPF_JIT=y
+
+#
+# BPF subsystem
+#
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+# CONFIG_BPF_JIT_ALWAYS_ON is not set
+CONFIG_BPF_UNPRIV_DEFAULT_OFF=y
+CONFIG_USERMODE_DRIVER=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=m
+# end of BPF subsystem
+
+CONFIG_PREEMPT_BUILD=y
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+CONFIG_PREEMPT_COUNT=y
+CONFIG_PREEMPTION=y
+# CONFIG_PREEMPT_DYNAMIC is not set
+# CONFIG_SCHED_CORE is not set
+
+#
+# CPU/Task time and stats accounting
+#
+CONFIG_TICK_CPU_ACCOUNTING=y
+# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
+# CONFIG_IRQ_TIME_ACCOUNTING is not set
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+# CONFIG_PSI is not set
+# end of CPU/Task time and stats accounting
+
+CONFIG_CPU_ISOLATION=y
+
+#
+# RCU Subsystem
+#
+CONFIG_TREE_RCU=y
+CONFIG_PREEMPT_RCU=y
+# CONFIG_RCU_EXPERT is not set
+CONFIG_TREE_SRCU=y
+CONFIG_TASKS_RCU_GENERIC=y
+CONFIG_TASKS_RCU=y
+CONFIG_TASKS_TRACE_RCU=y
+CONFIG_RCU_STALL_COMMON=y
+CONFIG_RCU_NEED_SEGCBLIST=y
+# end of RCU Subsystem
+
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_IKHEADERS is not set
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
+# CONFIG_PRINTK_INDEX is not set
+CONFIG_GENERIC_SCHED_CLOCK=y
+
+#
+# Scheduler features
+#
+# end of Scheduler features
+
+CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
+CONFIG_CC_HAS_INT128=y
+CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
+CONFIG_GCC11_NO_ARRAY_BOUNDS=y
+CONFIG_CC_NO_ARRAY_BOUNDS=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
+CONFIG_CGROUPS=y
+CONFIG_PAGE_COUNTER=y
+# CONFIG_CGROUP_FAVOR_DYNMODS is not set
+CONFIG_MEMCG=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_WRITEBACK=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SCHED_MM_CID=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+# CONFIG_CGROUP_DEBUG is not set
+CONFIG_SOCK_CGROUP_DATA=y
+CONFIG_NAMESPACES=y
+CONFIG_UTS_NS=y
+CONFIG_TIME_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_XZ=y
+CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
+CONFIG_RD_ZSTD=y
+CONFIG_BOOT_CONFIG=y
+# CONFIG_BOOT_CONFIG_FORCE is not set
+# CONFIG_BOOT_CONFIG_EMBED is not set
+CONFIG_INITRAMFS_PRESERVE_MTIME=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_LD_ORPHAN_WARN=y
+CONFIG_LD_ORPHAN_WARN_LEVEL="warn"
+CONFIG_SYSCTL=y
+CONFIG_SYSCTL_EXCEPTION_TRACE=y
+CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN=y
+CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW=y
+CONFIG_EXPERT=y
+CONFIG_MULTIUSER=y
+# CONFIG_SGETMASK_SYSCALL is not set
+CONFIG_SYSFS_SYSCALL=y
+CONFIG_FHANDLE=y
+CONFIG_POSIX_TIMERS=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_FUTEX_PI=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_IO_URING=y
+CONFIG_ADVISE_SYSCALLS=y
+CONFIG_MEMBARRIER=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_SELFTEST is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_BASE_RELATIVE=y
+CONFIG_KCMP=y
+CONFIG_RSEQ=y
+CONFIG_CACHESTAT_SYSCALL=y
+# CONFIG_DEBUG_RSEQ is not set
+CONFIG_HAVE_PERF_EVENTS=y
+CONFIG_PERF_USE_VMALLOC=y
+CONFIG_PC104=y
+
+#
+# Kernel Performance Events And Counters
+#
+CONFIG_PERF_EVENTS=y
+# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
+# end of Kernel Performance Events And Counters
+
+CONFIG_SYSTEM_DATA_VERIFICATION=y
+# CONFIG_PROFILING is not set
+
+#
+# Kexec and crash features
+#
+CONFIG_CRASH_CORE=y
+CONFIG_KEXEC_CORE=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+# end of Kexec and crash features
+# end of General setup
+
+CONFIG_LOONGARCH=y
+CONFIG_64BIT=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CSUM=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_L1_CACHE_SHIFT=6
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_MACH_LOONGSON64=y
+CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_PAGE_SIZE_4KB=y
+CONFIG_PGTABLE_4LEVEL=y
+CONFIG_PGTABLE_LEVELS=4
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_AS_HAS_EXPLICIT_RELOCS=y
+CONFIG_AS_HAS_FCSR_CLASS=y
+CONFIG_AS_HAS_LSX_EXTENSION=y
+CONFIG_AS_HAS_LASX_EXTENSION=y
+CONFIG_AS_HAS_LBT_EXTENSION=y
+CONFIG_AS_HAS_LVZ_EXTENSION=y
+
+#
+# Kernel type and options
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_SCHED_HRTICK=y
+# CONFIG_4KB_3LEVEL is not set
+CONFIG_4KB_4LEVEL=y
+# CONFIG_16KB_2LEVEL is not set
+# CONFIG_16KB_3LEVEL is not set
+# CONFIG_64KB_2LEVEL is not set
+# CONFIG_64KB_3LEVEL is not set
+CONFIG_CMDLINE=""
+CONFIG_CMDLINE_BOOTLOADER=y
+# CONFIG_CMDLINE_EXTEND is not set
+# CONFIG_CMDLINE_FORCE is not set
+# CONFIG_BUILTIN_DTB is not set
+CONFIG_DMI=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_SCHED_SMT=y
+CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_NODES_SHIFT=6
+CONFIG_ARCH_FORCE_MAX_ORDER=11
+CONFIG_ARCH_IOREMAP=y
+# CONFIG_ARCH_WRITECOMBINE is not set
+CONFIG_ARCH_STRICT_ALIGN=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
+CONFIG_CPU_HAS_LBT=y
+CONFIG_CPU_HAS_PREFETCH=y
+CONFIG_ARCH_SUPPORTS_KEXEC=y
+CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y
+CONFIG_ARCH_SELECTS_CRASH_DUMP=y
+CONFIG_RELOCATABLE=y
+CONFIG_RANDOMIZE_BASE=y
+CONFIG_RANDOMIZE_BASE_MAX_OFFSET=0x01000000
+CONFIG_SECCOMP=y
+# end of Kernel type and options
+
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_ENABLE_THP_MIGRATION=y
+CONFIG_ARCH_MEMORY_PROBE=y
+CONFIG_MMU=y
+CONFIG_ARCH_MMAP_RND_BITS_MIN=12
+CONFIG_ARCH_MMAP_RND_BITS_MAX=18
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+
+#
+# Power management options
+#
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+# CONFIG_SUSPEND_SKIP_SYNC is not set
+CONFIG_HIBERNATE_CALLBACKS=y
+CONFIG_HIBERNATION=y
+CONFIG_HIBERNATION_SNAPSHOT_DEV=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_PM_SLEEP=y
+CONFIG_PM_SLEEP_SMP=y
+# CONFIG_PM_AUTOSLEEP is not set
+# CONFIG_PM_USERSPACE_AUTOSLEEP is not set
+# CONFIG_PM_WAKELOCKS is not set
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_CLK=y
+# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set
+CONFIG_CPU_PM=y
+CONFIG_ARCH_SUPPORTS_ACPI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_GENERIC_GSI=y
+CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
+# CONFIG_ACPI_DEBUGGER is not set
+CONFIG_ACPI_SPCR_TABLE=y
+CONFIG_ACPI_SLEEP=y
+# CONFIG_ACPI_EC_DEBUGFS is not set
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_VIDEO=y
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_TAD=y
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_CPU_FREQ_PSS=y
+CONFIG_ACPI_PROCESSOR_IDLE=y
+CONFIG_ACPI_MCFG=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_IPMI=m
+CONFIG_ACPI_HOTPLUG_CPU=y
+CONFIG_ACPI_THERMAL=y
+CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
+CONFIG_ACPI_TABLE_UPGRADE=y
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_PCI_SLOT=y
+CONFIG_ACPI_CONTAINER=y
+CONFIG_ACPI_HOTPLUG_MEMORY=y
+# CONFIG_ACPI_HED is not set
+# CONFIG_ACPI_CUSTOM_METHOD is not set
+# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
+CONFIG_ACPI_NUMA=y
+CONFIG_ACPI_HMAT=y
+CONFIG_ACPI_CONFIGFS=m
+# CONFIG_ACPI_PFRUT is not set
+CONFIG_ACPI_PPTT=y
+# CONFIG_ACPI_FFH is not set
+# CONFIG_PMIC_OPREGION is not set
+# end of Power management options
+
+CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_DIRTY_RING=y
+CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL=y
+CONFIG_HAVE_KVM_EVENTFD=y
+CONFIG_KVM_MMIO=y
+CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
+CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL=y
+CONFIG_KVM_XFER_TO_GUEST_WORK=y
+CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
+
+#
+# General architecture-dependent options
+#
+CONFIG_GENERIC_ENTRY=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+# CONFIG_STATIC_KEYS_SELFTEST is not set
+CONFIG_HAVE_64BIT_ALIGNED_ACCESS=y
+CONFIG_ARCH_USE_BUILTIN_BSWAP=y
+CONFIG_KRETPROBES=y
+CONFIG_KRETPROBE_ON_RETHOOK=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
+CONFIG_HAVE_NMI=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_CONTIGUOUS=y
+CONFIG_GENERIC_SMP_IDLE_THREAD=y
+CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
+CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y
+CONFIG_ARCH_WANTS_NO_INSTR=y
+CONFIG_HAVE_ASM_MODVERSIONS=y
+CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
+CONFIG_HAVE_RSEQ=y
+CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
+CONFIG_HAVE_HW_BREAKPOINT=y
+CONFIG_HAVE_PERF_REGS=y
+CONFIG_HAVE_PERF_USER_STACK_DUMP=y
+CONFIG_HAVE_ARCH_JUMP_LABEL=y
+CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
+CONFIG_MMU_GATHER_MERGE_VMAS=y
+CONFIG_MMU_LAZY_TLB_REFCOUNT=y
+CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y
+CONFIG_HAVE_ARCH_SECCOMP=y
+CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
+CONFIG_SECCOMP_FILTER=y
+# CONFIG_SECCOMP_CACHE_DEBUG is not set
+CONFIG_HAVE_STACKPROTECTOR=y
+CONFIG_STACKPROTECTOR=y
+CONFIG_STACKPROTECTOR_STRONG=y
+CONFIG_ARCH_SUPPORTS_LTO_CLANG=y
+CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y
+CONFIG_LTO_NONE=y
+CONFIG_HAVE_CONTEXT_TRACKING_USER=y
+CONFIG_HAVE_TIF_NOHZ=y
+CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
+CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
+CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
+CONFIG_ARCH_WANT_PMD_MKWRITE=y
+CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
+CONFIG_MODULES_USE_ELF_RELA=y
+CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
+CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
+CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
+CONFIG_HAVE_EXIT_THREAD=y
+CONFIG_ARCH_MMAP_RND_BITS=12
+CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
+CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
+CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y
+# CONFIG_COMPAT_32BIT_TIME is not set
+CONFIG_ARCH_USE_MEMREMAP_PROT=y
+# CONFIG_LOCK_EVENT_COUNTS is not set
+CONFIG_HAVE_PREEMPT_DYNAMIC=y
+CONFIG_HAVE_PREEMPT_DYNAMIC_KEY=y
+CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_GCOV_KERNEL is not set
+# end of GCOV-based kernel profiling
+
+CONFIG_HAVE_GCC_PLUGINS=y
+CONFIG_GCC_PLUGINS=y
+# CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set
+CONFIG_FUNCTION_ALIGNMENT=0
+# end of General architecture-dependent options
+
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_DEBUG is not set
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_MODULE_UNLOAD_TAINT_TRACKING is not set
+CONFIG_MODVERSIONS=y
+CONFIG_ASM_MODVERSIONS=y
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_MODULE_SIG is not set
+# CONFIG_MODULE_COMPRESS_NONE is not set
+# CONFIG_MODULE_COMPRESS_GZIP is not set
+# CONFIG_MODULE_COMPRESS_XZ is not set
+CONFIG_MODULE_COMPRESS_ZSTD=y
+# CONFIG_MODULE_DECOMPRESS is not set
+# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set
+CONFIG_MODPROBE_PATH="/sbin/modprobe"
+# CONFIG_TRIM_UNUSED_KSYMS is not set
+CONFIG_MODULES_TREE_LOOKUP=y
+CONFIG_BLOCK=y
+CONFIG_BLOCK_LEGACY_AUTOLOAD=y
+CONFIG_BLK_RQ_ALLOC_TIME=y
+CONFIG_BLK_CGROUP_RWSTAT=y
+CONFIG_BLK_CGROUP_PUNT_BIO=y
+CONFIG_BLK_DEV_BSG_COMMON=y
+CONFIG_BLK_ICQ=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_INTEGRITY_T10=y
+CONFIG_BLK_DEV_ZONED=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_DEV_THROTTLING_LOW=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_MQ=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_FC_APPID=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
+CONFIG_BLK_DEBUG_FS=y
+CONFIG_BLK_DEBUG_FS_ZONED=y
+CONFIG_BLK_SED_OPAL=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_AIX_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_CMDLINE_PARTITION=y
+# end of Partition Types
+
+CONFIG_BLK_MQ_PCI=y
+CONFIG_BLK_MQ_VIRTIO=y
+CONFIG_BLK_PM=y
+CONFIG_BLOCK_HOLDER_DEPRECATED=y
+CONFIG_BLK_MQ_STACKING=y
+
+#
+# IO Schedulers
+#
+CONFIG_MQ_IOSCHED_DEADLINE=y
+CONFIG_MQ_IOSCHED_KYBER=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BFQ_GROUP_IOSCHED=y
+# CONFIG_BFQ_CGROUP_DEBUG is not set
+# end of IO Schedulers
+
+CONFIG_PREEMPT_NOTIFIERS=y
+CONFIG_PADATA=y
+CONFIG_ASN1=y
+CONFIG_UNINLINE_SPIN_UNLOCK=y
+CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
+CONFIG_MUTEX_SPIN_ON_OWNER=y
+CONFIG_RWSEM_SPIN_ON_OWNER=y
+CONFIG_LOCK_SPIN_ON_OWNER=y
+CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
+CONFIG_QUEUED_SPINLOCKS=y
+CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
+CONFIG_QUEUED_RWLOCKS=y
+CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y
+CONFIG_FREEZER=y
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_ARCH_BINFMT_ELF_STATE=y
+CONFIG_ELFCORE=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_BINFMT_MISC=m
+CONFIG_COREDUMP=y
+# end of Executable file formats
+
+#
+# Memory Management options
+#
+CONFIG_ZPOOL=y
+CONFIG_SWAP=y
+CONFIG_ZSWAP=y
+CONFIG_ZSWAP_DEFAULT_ON=y
+# CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set
+# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set
+CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
+CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd"
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
+# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set
+# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
+CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud"
+CONFIG_ZBUD=y
+CONFIG_Z3FOLD=y
+CONFIG_ZSMALLOC=m
+# CONFIG_ZSMALLOC_STAT is not set
+CONFIG_ZSMALLOC_CHAIN_SIZE=8
+
+#
+# SLAB allocator options
+#
+# CONFIG_SLAB_DEPRECATED is not set
+CONFIG_SLUB=y
+# CONFIG_SLUB_TINY is not set
+CONFIG_SLAB_MERGE_DEFAULT=y
+# CONFIG_SLAB_FREELIST_RANDOM is not set
+# CONFIG_SLAB_FREELIST_HARDENED is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_SLUB_CPU_PARTIAL=y
+# CONFIG_RANDOM_KMALLOC_CACHES is not set
+# end of SLAB allocator options
+
+# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
+CONFIG_HAVE_FAST_GUP=y
+CONFIG_ARCH_KEEP_MEMBLOCK=y
+CONFIG_NUMA_KEEP_MEMINFO=y
+CONFIG_MEMORY_ISOLATION=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MEMORY_BALLOON=y
+CONFIG_BALLOON_COMPACTION=y
+CONFIG_COMPACTION=y
+CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1
+CONFIG_PAGE_REPORTING=y
+CONFIG_MIGRATION=y
+CONFIG_CONTIG_ALLOC=y
+CONFIG_PCP_BATCH_SCALE_MAX=5
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU_NOTIFIER=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
+# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
+# CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
+CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
+CONFIG_USE_PERCPU_NUMA_NODE_ID=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_CMA=y
+# CONFIG_CMA_DEBUG is not set
+# CONFIG_CMA_DEBUGFS is not set
+CONFIG_CMA_SYSFS=y
+CONFIG_CMA_AREAS=19
+# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
+# CONFIG_IDLE_PAGE_TRACKING is not set
+CONFIG_ZONE_DMA32=y
+CONFIG_HMM_MIRROR=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_PERCPU_STATS is not set
+# CONFIG_GUP_TEST is not set
+# CONFIG_DMAPOOL_TEST is not set
+CONFIG_ARCH_HAS_PTE_SPECIAL=y
+CONFIG_MEMFD_CREATE=y
+# CONFIG_ANON_VMA_NAME is not set
+CONFIG_USERFAULTFD=y
+# CONFIG_LRU_GEN is not set
+CONFIG_LOCK_MM_AND_FIND_VMA=y
+
+#
+# Data Access Monitoring
+#
+# CONFIG_DAMON is not set
+# end of Data Access Monitoring
+# end of Memory Management options
+
+CONFIG_NET=y
+CONFIG_NET_INGRESS=y
+CONFIG_NET_EGRESS=y
+CONFIG_NET_XGRESS=y
+CONFIG_NET_REDIRECT=y
+CONFIG_SKB_EXTENSIONS=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_SCM=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
+# CONFIG_TLS_TOE is not set
+CONFIG_XFRM=y
+CONFIG_XFRM_OFFLOAD=y
+CONFIG_XFRM_ALGO=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_MIGRATE=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_XFRM_AH=m
+CONFIG_XFRM_ESP=m
+CONFIG_XFRM_IPCOMP=m
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_XFRM_ESPINTCP=y
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
+CONFIG_NET_HANDSHAKE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_FIB_TRIE_STATS=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_ROUTE_CLASSID=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IP_TUNNEL=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE_COMMON=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_NET_UDP_TUNNEL=m
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
+CONFIG_INET_ESPINTCP=y
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_TABLE_PERTURB_ORDER=16
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_TCP_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_INET_RAW_DIAG=m
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_NV=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_CONG_DCTCP=m
+CONFIG_TCP_CONG_CDG=m
+CONFIG_TCP_CONG_BBR=m
+CONFIG_DEFAULT_CUBIC=y
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="cubic"
+CONFIG_TCP_SIGPOOL=y
+# CONFIG_TCP_AO is not set
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
+CONFIG_INET6_ESPINTCP=y
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_ILA=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SEG6_HMAC=y
+CONFIG_IPV6_SEG6_BPF=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_NETLABEL=y
+CONFIG_MPTCP=y
+CONFIG_INET_MPTCP_DIAG=m
+CONFIG_MPTCP_IPV6=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NET_PTP_CLASSIFY=y
+CONFIG_NETWORK_PHY_TIMESTAMPING=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_BRIDGE_NETFILTER=m
+
+#
+# Core Netfilter Configuration
+#
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_EGRESS=y
+CONFIG_NETFILTER_SKIP_EGRESS=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_FAMILY_BRIDGE=y
+CONFIG_NETFILTER_FAMILY_ARP=y
+CONFIG_NETFILTER_BPF_LINK=y
+CONFIG_NETFILTER_NETLINK_HOOK=m
+CONFIG_NETFILTER_NETLINK_ACCT=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NETFILTER_NETLINK_OSF=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_LOG_SYSLOG=m
+CONFIG_NETFILTER_CONNCOUNT=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_OVS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_GRE=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_BROADCAST=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_AMANDA=m
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_SIP=m
+CONFIG_NF_NAT_TFTP=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_OVS=y
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REJECT=m
+CONFIG_NFT_REJECT_INET=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NF_DUP_NETDEV=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
+CONFIG_NETFILTER_XTABLES=m
+
+#
+# Xtables combined modules
+#
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_CONNMARK=m
+CONFIG_NETFILTER_XT_SET=m
+
+#
+# Xtables targets
+#
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LED=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+
+#
+# Xtables matches
+#
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ECN=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_HL=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_L2TP=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+# end of Core Netfilter Configuration
+
+CONFIG_IP_SET=m
+CONFIG_IP_SET_MAX=256
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPMARK=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_IPMAC=m
+CONFIG_IP_SET_HASH_MAC=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=15
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_AH_ESP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_FO=m
+CONFIG_IP_VS_OVF=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_MH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
+
+#
+# IPVS SH scheduler
+#
+CONFIG_IP_VS_SH_TAB_BITS=8
+
+#
+# IPVS MH scheduler
+#
+CONFIG_IP_VS_MH_TAB_INDEX=12
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_NFCT=y
+CONFIG_IP_VS_PE_SIP=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_NF_DEFRAG_IPV4=m
+CONFIG_NF_SOCKET_IPV4=m
+CONFIG_NF_TPROXY_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_REJECT_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NF_DUP_IPV4=m
+CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_REJECT_IPV4=m
+CONFIG_NF_NAT_SNMP_BASIC=m
+CONFIG_NF_NAT_PPTP=m
+CONFIG_NF_NAT_H323=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+# end of IP: Netfilter Configuration
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_NF_SOCKET_IPV6=m
+CONFIG_NF_TPROXY_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_REJECT_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_DUP_IPV6=m
+CONFIG_NF_REJECT_IPV6=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_SRH=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+# end of IPv6: Netfilter Configuration
+
+CONFIG_NF_DEFRAG_IPV6=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_BRIDGE_REJECT=m
+CONFIG_NF_CONNTRACK_BRIDGE=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_BPFILTER=y
+CONFIG_BPFILTER_UMH=m
+# CONFIG_IP_DCCP is not set
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
+CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
+# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
+CONFIG_SCTP_COOKIE_HMAC_MD5=y
+CONFIG_SCTP_COOKIE_HMAC_SHA1=y
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+# CONFIG_RDS_DEBUG is not set
+CONFIG_TIPC=m
+CONFIG_TIPC_MEDIA_IB=y
+CONFIG_TIPC_MEDIA_UDP=y
+CONFIG_TIPC_CRYPTO=y
+CONFIG_TIPC_DIAG=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_STP=m
+CONFIG_GARP=m
+CONFIG_MRP=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_MRP=y
+CONFIG_BRIDGE_CFM=y
+CONFIG_NET_DSA=m
+CONFIG_NET_DSA_TAG_NONE=m
+CONFIG_NET_DSA_TAG_AR9331=m
+CONFIG_NET_DSA_TAG_BRCM_COMMON=m
+CONFIG_NET_DSA_TAG_BRCM=m
+CONFIG_NET_DSA_TAG_BRCM_LEGACY=m
+CONFIG_NET_DSA_TAG_BRCM_PREPEND=m
+CONFIG_NET_DSA_TAG_HELLCREEK=m
+CONFIG_NET_DSA_TAG_GSWIP=m
+CONFIG_NET_DSA_TAG_DSA_COMMON=m
+CONFIG_NET_DSA_TAG_DSA=m
+CONFIG_NET_DSA_TAG_EDSA=m
+CONFIG_NET_DSA_TAG_MTK=m
+CONFIG_NET_DSA_TAG_KSZ=m
+CONFIG_NET_DSA_TAG_OCELOT=m
+CONFIG_NET_DSA_TAG_OCELOT_8021Q=m
+CONFIG_NET_DSA_TAG_QCA=m
+CONFIG_NET_DSA_TAG_RTL4_A=m
+CONFIG_NET_DSA_TAG_RTL8_4=m
+CONFIG_NET_DSA_TAG_RZN1_A5PSW=m
+CONFIG_NET_DSA_TAG_LAN9303=m
+CONFIG_NET_DSA_TAG_SJA1105=m
+CONFIG_NET_DSA_TAG_TRAILER=m
+CONFIG_NET_DSA_TAG_XRS700X=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_LLC=m
+CONFIG_LLC2=m
+CONFIG_ATALK=m
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_PHONET=m
+CONFIG_6LOWPAN=m
+CONFIG_6LOWPAN_DEBUGFS=y
+CONFIG_6LOWPAN_NHC=m
+CONFIG_6LOWPAN_NHC_DEST=m
+CONFIG_6LOWPAN_NHC_FRAGMENT=m
+CONFIG_6LOWPAN_NHC_HOP=m
+CONFIG_6LOWPAN_NHC_IPV6=m
+CONFIG_6LOWPAN_NHC_MOBILITY=m
+CONFIG_6LOWPAN_NHC_ROUTING=m
+CONFIG_6LOWPAN_NHC_UDP=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m
+CONFIG_6LOWPAN_GHC_UDP=m
+CONFIG_6LOWPAN_GHC_ICMPV6=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
+CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
+CONFIG_IEEE802154=m
+CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
+CONFIG_IEEE802154_SOCKET=m
+CONFIG_IEEE802154_6LOWPAN=m
+CONFIG_MAC802154=m
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_CBS=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_MQPRIO_LIB=m
+CONFIG_NET_SCH_TAPRIO=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_SKBPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_SCH_CAKE=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_PIE=m
+CONFIG_NET_SCH_FQ_PIE=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DEFAULT=y
+# CONFIG_DEFAULT_FQ is not set
+# CONFIG_DEFAULT_CODEL is not set
+CONFIG_DEFAULT_FQ_CODEL=y
+# CONFIG_DEFAULT_FQ_PIE is not set
+# CONFIG_DEFAULT_SFQ is not set
+# CONFIG_DEFAULT_PFIFO_FAST is not set
+CONFIG_DEFAULT_NET_SCH="fq_codel"
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_CANID=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GATE=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=m
+CONFIG_BATMAN_ADV=m
+CONFIG_BATMAN_ADV_BATMAN_V=y
+CONFIG_BATMAN_ADV_BLA=y
+CONFIG_BATMAN_ADV_DAT=y
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_BATMAN_ADV_MCAST=y
+# CONFIG_BATMAN_ADV_DEBUG is not set
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_GRE=m
+CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_OPENVSWITCH_GENEVE=m
+CONFIG_VSOCKETS=m
+CONFIG_VSOCKETS_DIAG=m
+CONFIG_VSOCKETS_LOOPBACK=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS_COMMON=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_NSH=m
+CONFIG_HSR=m
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_QRTR=m
+CONFIG_QRTR_TUN=m
+CONFIG_QRTR_MHI=m
+CONFIG_NET_NCSI=y
+CONFIG_NCSI_OEM_CMD_GET_MAC=y
+CONFIG_NCSI_OEM_CMD_KEEP_PHY=y
+CONFIG_PCPU_DEV_REFCNT=y
+CONFIG_MAX_SKB_FRAGS=17
+CONFIG_RPS=y
+CONFIG_RFS_ACCEL=y
+CONFIG_SOCK_RX_QUEUE_MAPPING=y
+CONFIG_XPS=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_NET_RX_BUSY_POLL=y
+CONFIG_BQL=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_NET_FLOW_LIMIT=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+# end of Network testing
+# end of Networking options
+
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_MKISS=m
+CONFIG_6PACK=m
+CONFIG_BPQETHER=m
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_YAM=m
+# end of AX.25 network device drivers
+
+CONFIG_CAN=m
+CONFIG_CAN_RAW=m
+CONFIG_CAN_BCM=m
+CONFIG_CAN_GW=m
+CONFIG_CAN_J1939=m
+CONFIG_CAN_ISOTP=m
+CONFIG_BT=m
+CONFIG_BT_BREDR=y
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HIDP=m
+CONFIG_BT_HS=y
+CONFIG_BT_LE=y
+CONFIG_BT_LE_L2CAP_ECRED=y
+CONFIG_BT_6LOWPAN=m
+CONFIG_BT_LEDS=y
+CONFIG_BT_MSFTEXT=y
+CONFIG_BT_AOSPEXT=y
+CONFIG_BT_DEBUGFS=y
+# CONFIG_BT_SELFTEST is not set
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_INTEL=m
+CONFIG_BT_BCM=m
+CONFIG_BT_RTL=m
+CONFIG_BT_MTK=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
+CONFIG_BT_HCIBTUSB_POLL_SYNC=y
+CONFIG_BT_HCIBTUSB_BCM=y
+CONFIG_BT_HCIBTUSB_MTK=y
+CONFIG_BT_HCIBTUSB_RTL=y
+CONFIG_BT_HCIBTSDIO=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_ATH3K=y
+CONFIG_BT_HCIUART_INTEL=y
+CONFIG_BT_HCIUART_AG6XX=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBCM4377=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_MRVL=m
+CONFIG_BT_MRVL_SDIO=m
+CONFIG_BT_ATH3K=m
+CONFIG_BT_MTKSDIO=m
+CONFIG_BT_HCIRSI=m
+CONFIG_BT_VIRTIO=m
+# end of Bluetooth device drivers
+
+CONFIG_AF_RXRPC=m
+CONFIG_AF_RXRPC_IPV6=y
+# CONFIG_AF_RXRPC_INJECT_LOSS is not set
+# CONFIG_AF_RXRPC_INJECT_RX_DELAY is not set
+CONFIG_AF_RXRPC_DEBUG=y
+CONFIG_RXKAD=y
+# CONFIG_RXPERF is not set
+CONFIG_AF_KCM=m
+CONFIG_STREAM_PARSER=y
+CONFIG_MCTP=y
+CONFIG_FIB_RULES=y
+CONFIG_WIRELESS=y
+CONFIG_WIRELESS_EXT=y
+CONFIG_WEXT_CORE=y
+CONFIG_WEXT_PROC=y
+CONFIG_WEXT_SPY=y
+CONFIG_WEXT_PRIV=y
+CONFIG_CFG80211=m
+# CONFIG_NL80211_TESTMODE is not set
+# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+# CONFIG_CFG80211_CERTIFICATION_ONUS is not set
+CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
+CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
+CONFIG_CFG80211_DEFAULT_PS=y
+CONFIG_CFG80211_DEBUGFS=y
+CONFIG_CFG80211_CRDA_SUPPORT=y
+CONFIG_CFG80211_WEXT=y
+CONFIG_CFG80211_WEXT_EXPORT=y
+CONFIG_LIB80211=m
+CONFIG_LIB80211_CRYPT_WEP=m
+CONFIG_LIB80211_CRYPT_CCMP=m
+CONFIG_LIB80211_CRYPT_TKIP=m
+# CONFIG_LIB80211_DEBUG is not set
+CONFIG_MAC80211=m
+CONFIG_MAC80211_HAS_RC=y
+CONFIG_MAC80211_RC_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
+CONFIG_MAC80211_MESH=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_DEBUGFS=y
+# CONFIG_MAC80211_MESSAGE_TRACING is not set
+# CONFIG_MAC80211_DEBUG_MENU is not set
+CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
+CONFIG_RFKILL=m
+CONFIG_RFKILL_LEDS=y
+CONFIG_RFKILL_INPUT=y
+CONFIG_RFKILL_GPIO=m
+CONFIG_NET_9P=m
+CONFIG_NET_9P_FD=m
+CONFIG_NET_9P_VIRTIO=m
+CONFIG_NET_9P_RDMA=m
+# CONFIG_NET_9P_DEBUG is not set
+# CONFIG_CAIF is not set
+CONFIG_CEPH_LIB=m
+CONFIG_CEPH_LIB_PRETTYDEBUG=y
+CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
+CONFIG_NFC=m
+CONFIG_NFC_DIGITAL=m
+CONFIG_NFC_NCI=m
+CONFIG_NFC_NCI_SPI=m
+CONFIG_NFC_NCI_UART=m
+CONFIG_NFC_HCI=m
+CONFIG_NFC_SHDLC=y
+
+#
+# Near Field Communication (NFC) devices
+#
+CONFIG_NFC_TRF7970A=m
+CONFIG_NFC_SIM=m
+CONFIG_NFC_PORT100=m
+CONFIG_NFC_VIRTUAL_NCI=m
+CONFIG_NFC_FDP=m
+CONFIG_NFC_FDP_I2C=m
+CONFIG_NFC_PN544=m
+CONFIG_NFC_PN544_I2C=m
+CONFIG_NFC_PN533=m
+CONFIG_NFC_PN533_USB=m
+CONFIG_NFC_PN533_I2C=m
+CONFIG_NFC_MICROREAD=m
+CONFIG_NFC_MICROREAD_I2C=m
+CONFIG_NFC_MRVL=m
+CONFIG_NFC_MRVL_USB=m
+CONFIG_NFC_MRVL_UART=m
+CONFIG_NFC_MRVL_I2C=m
+CONFIG_NFC_MRVL_SPI=m
+CONFIG_NFC_ST21NFCA=m
+CONFIG_NFC_ST21NFCA_I2C=m
+CONFIG_NFC_ST_NCI=m
+CONFIG_NFC_ST_NCI_I2C=m
+CONFIG_NFC_ST_NCI_SPI=m
+CONFIG_NFC_NXP_NCI=m
+CONFIG_NFC_NXP_NCI_I2C=m
+CONFIG_NFC_S3FWRN5=m
+CONFIG_NFC_S3FWRN5_I2C=m
+CONFIG_NFC_ST95HF=m
+# end of Near Field Communication (NFC) devices
+
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
+CONFIG_LWTUNNEL=y
+CONFIG_LWTUNNEL_BPF=y
+CONFIG_DST_CACHE=y
+CONFIG_GRO_CELLS=y
+CONFIG_SOCK_VALIDATE_XMIT=y
+CONFIG_NET_SELFTESTS=y
+CONFIG_NET_SOCK_MSG=y
+CONFIG_NET_DEVLINK=y
+CONFIG_PAGE_POOL=y
+CONFIG_PAGE_POOL_STATS=y
+CONFIG_FAILOVER=m
+CONFIG_ETHTOOL_NETLINK=y
+
+#
+# Device Drivers
+#
+CONFIG_HAVE_PCI=y
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+CONFIG_PCI_DOMAINS_GENERIC=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_PCIEAER=y
+# CONFIG_PCIEAER_INJECT is not set
+# CONFIG_PCIE_ECRC is not set
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIE_PME=y
+# CONFIG_PCIE_DPC is not set
+# CONFIG_PCIE_PTM is not set
+CONFIG_PCI_MSI=y
+CONFIG_PCI_MSI_ARCH_FALLBACKS=y
+CONFIG_PCI_QUIRKS=y
+# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
+# CONFIG_PCI_STUB is not set
+# CONFIG_PCI_PF_STUB is not set
+CONFIG_PCI_ATS=y
+CONFIG_PCI_ECAM=y
+CONFIG_PCI_IOV=y
+# CONFIG_PCI_PRI is not set
+# CONFIG_PCI_PASID is not set
+CONFIG_PCI_LABEL=y
+# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+# CONFIG_PCIE_BUS_TUNE_OFF is not set
+CONFIG_PCIE_BUS_DEFAULT=y
+# CONFIG_PCIE_BUS_SAFE is not set
+# CONFIG_PCIE_BUS_PERFORMANCE is not set
+# CONFIG_PCIE_BUS_PEER2PEER is not set
+CONFIG_VGA_ARB=y
+CONFIG_VGA_ARB_MAX_GPUS=16
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_ACPI is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+CONFIG_HOTPLUG_PCI_SHPC=y
+
+#
+# PCI controller drivers
+#
+# CONFIG_PCI_FTPCI100 is not set
+# CONFIG_PCI_HOST_GENERIC is not set
+CONFIG_PCI_LOONGSON=y
+# CONFIG_PCIE_MICROCHIP_HOST is not set
+# CONFIG_PCIE_XILINX is not set
+
+#
+# Cadence-based PCIe controllers
+#
+# CONFIG_PCIE_CADENCE_PLAT_HOST is not set
+# CONFIG_PCI_J721E_HOST is not set
+# end of Cadence-based PCIe controllers
+
+#
+# DesignWare-based PCIe controllers
+#
+# CONFIG_PCI_MESON is not set
+# CONFIG_PCIE_DW_PLAT_HOST is not set
+# end of DesignWare-based PCIe controllers
+
+#
+# Mobiveil-based PCIe controllers
+#
+# end of Mobiveil-based PCIe controllers
+# end of PCI controller drivers
+
+#
+# PCI Endpoint
+#
+# CONFIG_PCI_ENDPOINT is not set
+# end of PCI Endpoint
+
+#
+# PCI switch controller drivers
+#
+CONFIG_PCI_SW_SWITCHTEC=m
+# end of PCI switch controller drivers
+
+# CONFIG_CXL_BUS is not set
+CONFIG_PCCARD=m
+CONFIG_PCMCIA=m
+CONFIG_PCMCIA_LOAD_CIS=y
+CONFIG_CARDBUS=y
+
+#
+# PC-card bridges
+#
+CONFIG_YENTA=m
+CONFIG_YENTA_O2=y
+CONFIG_YENTA_RICOH=y
+CONFIG_YENTA_TI=y
+CONFIG_YENTA_ENE_TUNE=y
+CONFIG_YENTA_TOSHIBA=y
+# CONFIG_PD6729 is not set
+# CONFIG_I82092 is not set
+CONFIG_PCCARD_NONSTATIC=y
+CONFIG_RAPIDIO=y
+CONFIG_RAPIDIO_TSI721=y
+CONFIG_RAPIDIO_DISC_TIMEOUT=30
+CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y
+# CONFIG_RAPIDIO_DMA_ENGINE is not set
+# CONFIG_RAPIDIO_DEBUG is not set
+CONFIG_RAPIDIO_ENUM_BASIC=m
+CONFIG_RAPIDIO_CHMAN=m
+CONFIG_RAPIDIO_MPORT_CDEV=m
+
+#
+# RapidIO Switch drivers
+#
+# CONFIG_RAPIDIO_CPS_XX is not set
+# CONFIG_RAPIDIO_CPS_GEN2 is not set
+# CONFIG_RAPIDIO_RXS_GEN3 is not set
+# end of RapidIO Switch drivers
+
+#
+# Generic Driver Options
+#
+CONFIG_AUXILIARY_BUS=y
+CONFIG_UEVENT_HELPER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_DEVTMPFS_SAFE is not set
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+
+#
+# Firmware loader
+#
+CONFIG_FW_LOADER=y
+CONFIG_FW_LOADER_DEBUG=y
+CONFIG_FW_LOADER_PAGED_BUF=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_FW_LOADER_USER_HELPER is not set
+CONFIG_FW_LOADER_COMPRESS=y
+CONFIG_FW_LOADER_COMPRESS_XZ=y
+CONFIG_FW_LOADER_COMPRESS_ZSTD=y
+CONFIG_FW_CACHE=y
+# CONFIG_FW_UPLOAD is not set
+# end of Firmware loader
+
+CONFIG_WANT_DEV_COREDUMP=y
+CONFIG_ALLOW_DEV_COREDUMP=y
+CONFIG_DEV_COREDUMP=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
+CONFIG_HMEM_REPORTING=y
+# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
+CONFIG_GENERIC_CPU_AUTOPROBE=y
+CONFIG_SOC_BUS=y
+CONFIG_REGMAP=y
+CONFIG_REGMAP_I2C=m
+CONFIG_REGMAP_SPI=m
+CONFIG_REGMAP_MMIO=y
+CONFIG_REGMAP_IRQ=y
+CONFIG_DMA_SHARED_BUFFER=y
+# CONFIG_DMA_FENCE_TRACE is not set
+# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set
+# end of Generic Driver Options
+
+#
+# Bus devices
+#
+# CONFIG_MOXTET is not set
+CONFIG_MHI_BUS=m
+# CONFIG_MHI_BUS_DEBUG is not set
+# CONFIG_MHI_BUS_PCI_GENERIC is not set
+# CONFIG_MHI_BUS_EP is not set
+# end of Bus devices
+
+#
+# Cache Drivers
+#
+# end of Cache Drivers
+
+CONFIG_CONNECTOR=m
+
+#
+# Firmware Drivers
+#
+
+#
+# ARM System Control and Management Interface Protocol
+#
+# end of ARM System Control and Management Interface Protocol
+
+# CONFIG_FIRMWARE_MEMMAP is not set
+CONFIG_DMIID=y
+CONFIG_DMI_SYSFS=y
+CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
+# CONFIG_ISCSI_IBFT is not set
+CONFIG_SYSFB=y
+# CONFIG_SYSFB_SIMPLEFB is not set
+CONFIG_FW_CS_DSP=m
+# CONFIG_GOOGLE_FIRMWARE is not set
+
+#
+# EFI (Extensible Firmware Interface) Support
+#
+CONFIG_EFI_ESRT=y
+CONFIG_EFI_VARS_PSTORE=y
+# CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set
+CONFIG_EFI_SOFT_RESERVE=y
+CONFIG_EFI_RUNTIME_WRAPPERS=y
+CONFIG_EFI_GENERIC_STUB=y
+CONFIG_EFI_ZBOOT=y
+CONFIG_EFI_BOOTLOADER_CONTROL=m
+CONFIG_EFI_CAPSULE_LOADER=m
+CONFIG_EFI_TEST=m
+# CONFIG_RESET_ATTACK_MITIGATION is not set
+# CONFIG_EFI_DISABLE_PCI_DMA is not set
+CONFIG_EFI_EARLYCON=y
+CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
+# CONFIG_EFI_DISABLE_RUNTIME is not set
+# CONFIG_EFI_COCO_SECRET is not set
+# end of EFI (Extensible Firmware Interface) Support
+
+#
+# Qualcomm firmware drivers
+#
+# end of Qualcomm firmware drivers
+
+#
+# Tegra firmware driver
+#
+# end of Tegra firmware driver
+# end of Firmware Drivers
+
+# CONFIG_GNSS is not set
+CONFIG_MTD=m
+# CONFIG_MTD_TESTS is not set
+
+#
+# Partition parsers
+#
+# CONFIG_MTD_CMDLINE_PARTS is not set
+CONFIG_MTD_OF_PARTS=m
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# end of Partition parsers
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_BLKDEVS=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+
+#
+# Note that in some cases UBI block is preferred. See MTD_UBI_BLOCK.
+#
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_SM_FTL is not set
+# CONFIG_MTD_OOPS is not set
+# CONFIG_MTD_PSTORE is not set
+# CONFIG_MTD_SWAP is not set
+# CONFIG_MTD_PARTITIONED_MASTER is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_GEN_PROBE=m
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_STAA=m
+CONFIG_MTD_CFI_UTIL=m
+CONFIG_MTD_RAM=m
+CONFIG_MTD_ROM=m
+# CONFIG_MTD_ABSENT is not set
+# end of RAM/ROM/Flash chip drivers
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_INTEL_VR_NOR is not set
+# CONFIG_MTD_PLATRAM is not set
+# end of Mapping drivers for chip access
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_MCHP23K256 is not set
+# CONFIG_MTD_MCHP48L640 is not set
+# CONFIG_MTD_SST25L is not set
+# CONFIG_MTD_SLRAM is not set
+CONFIG_MTD_PHRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLOCK2MTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOCG3 is not set
+# end of Self-contained MTD device drivers
+
+#
+# NAND
+#
+CONFIG_MTD_NAND_CORE=m
+# CONFIG_MTD_ONENAND is not set
+CONFIG_MTD_RAW_NAND=m
+
+#
+# Raw/parallel NAND flash controllers
+#
+# CONFIG_MTD_NAND_DENALI_PCI is not set
+# CONFIG_MTD_NAND_DENALI_DT is not set
+# CONFIG_MTD_NAND_CAFE is not set
+# CONFIG_MTD_NAND_MXIC is not set
+# CONFIG_MTD_NAND_GPIO is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_NAND_CADENCE is not set
+# CONFIG_MTD_NAND_ARASAN is not set
+# CONFIG_MTD_NAND_INTEL_LGM is not set
+
+#
+# Misc
+#
+CONFIG_MTD_NAND_NANDSIM=m
+# CONFIG_MTD_NAND_RICOH is not set
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_SPI_NAND is not set
+
+#
+# ECC engine support
+#
+CONFIG_MTD_NAND_ECC=y
+# CONFIG_MTD_NAND_ECC_SW_HAMMING is not set
+# CONFIG_MTD_NAND_ECC_SW_BCH is not set
+# CONFIG_MTD_NAND_ECC_MXIC is not set
+# end of ECC engine support
+# end of NAND
+
+#
+# LPDDR & LPDDR2 PCM memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+# end of LPDDR & LPDDR2 PCM memory drivers
+
+# CONFIG_MTD_SPI_NOR is not set
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTD_UBI_BEB_LIMIT=20
+# CONFIG_MTD_UBI_FASTMAP is not set
+# CONFIG_MTD_UBI_GLUEBI is not set
+CONFIG_MTD_UBI_BLOCK=y
+# CONFIG_MTD_HYPERBUS is not set
+CONFIG_DTC=y
+CONFIG_OF=y
+# CONFIG_OF_UNITTEST is not set
+CONFIG_OF_FLATTREE=y
+CONFIG_OF_EARLY_FLATTREE=y
+CONFIG_OF_KOBJ=y
+CONFIG_OF_DYNAMIC=y
+CONFIG_OF_ADDRESS=y
+CONFIG_OF_IRQ=y
+CONFIG_OF_RESERVED_MEM=y
+CONFIG_OF_RESOLVE=y
+CONFIG_OF_OVERLAY=y
+CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_PC_PCMCIA is not set
+# CONFIG_PARPORT_1284 is not set
+CONFIG_PARPORT_NOT_PC=y
+CONFIG_PNP=y
+CONFIG_PNP_DEBUG_MESSAGES=y
+
+#
+# Protocols
+#
+CONFIG_PNPACPI=y
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_NULL_BLK=m
+CONFIG_CDROM=m
+CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m
+CONFIG_ZRAM=m
+# CONFIG_ZRAM_DEF_COMP_LZORLE is not set
+CONFIG_ZRAM_DEF_COMP_ZSTD=y
+# CONFIG_ZRAM_DEF_COMP_LZ4 is not set
+# CONFIG_ZRAM_DEF_COMP_LZO is not set
+# CONFIG_ZRAM_DEF_COMP_LZ4HC is not set
+# CONFIG_ZRAM_DEF_COMP_842 is not set
+CONFIG_ZRAM_DEF_COMP="zstd"
+# CONFIG_ZRAM_WRITEBACK is not set
+# CONFIG_ZRAM_MEMORY_TRACKING is not set
+# CONFIG_ZRAM_MULTI_COMP is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_LOOP_MIN_COUNT=0
+CONFIG_BLK_DEV_DRBD=m
+# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_CDROM_PKTCDVD_BUFFERS=8
+# CONFIG_CDROM_PKTCDVD_WCACHE is not set
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_UBLK=m
+CONFIG_BLKDEV_UBLK_LEGACY_OPCODES=y
+CONFIG_BLK_DEV_RNBD=y
+CONFIG_BLK_DEV_RNBD_CLIENT=m
+CONFIG_BLK_DEV_RNBD_SERVER=m
+
+#
+# NVME Support
+#
+CONFIG_NVME_AUTH=m
+CONFIG_NVME_CORE=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_NVME_MULTIPATH=y
+CONFIG_NVME_VERBOSE_ERRORS=y
+CONFIG_NVME_HWMON=y
+CONFIG_NVME_FABRICS=m
+CONFIG_NVME_RDMA=m
+CONFIG_NVME_FC=m
+CONFIG_NVME_TCP=m
+# CONFIG_NVME_TCP_TLS is not set
+# CONFIG_NVME_HOST_AUTH is not set
+CONFIG_NVME_TARGET=m
+CONFIG_NVME_TARGET_PASSTHRU=y
+CONFIG_NVME_TARGET_LOOP=m
+CONFIG_NVME_TARGET_RDMA=m
+CONFIG_NVME_TARGET_FC=m
+CONFIG_NVME_TARGET_FCLOOP=m
+CONFIG_NVME_TARGET_TCP=m
+# CONFIG_NVME_TARGET_TCP_TLS is not set
+CONFIG_NVME_TARGET_AUTH=y
+# end of NVME Support
+
+#
+# Misc devices
+#
+# CONFIG_AD525X_DPOT is not set
+# CONFIG_DUMMY_IRQ is not set
+# CONFIG_PHANTOM is not set
+CONFIG_TIFM_CORE=m
+CONFIG_TIFM_7XX1=m
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_HP_ILO is not set
+# CONFIG_APDS9802ALS is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_ISL29020 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_SENSORS_BH1770 is not set
+# CONFIG_SENSORS_APDS990X is not set
+# CONFIG_HMC6352 is not set
+# CONFIG_DS1682 is not set
+# CONFIG_LATTICE_ECP3_CONFIG is not set
+# CONFIG_SRAM is not set
+# CONFIG_DW_XDATA_PCIE is not set
+# CONFIG_PCI_ENDPOINT_TEST is not set
+# CONFIG_XILINX_SDFEC is not set
+# CONFIG_HISI_HIKEY_USB is not set
+# CONFIG_OPEN_DICE is not set
+# CONFIG_VCPU_STALL_DETECTOR is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+CONFIG_EEPROM_AT24=m
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_MAX6875 is not set
+CONFIG_EEPROM_93CX6=m
+# CONFIG_EEPROM_93XX46 is not set
+# CONFIG_EEPROM_IDT_89HPESX is not set
+# CONFIG_EEPROM_EE1004 is not set
+# end of EEPROM support
+
+CONFIG_CB710_CORE=m
+# CONFIG_CB710_DEBUG is not set
+CONFIG_CB710_DEBUG_ASSUMPTIONS=y
+
+#
+# Texas Instruments shared transport line discipline
+#
+# CONFIG_TI_ST is not set
+# end of Texas Instruments shared transport line discipline
+
+# CONFIG_SENSORS_LIS3_I2C is not set
+# CONFIG_ALTERA_STAPL is not set
+# CONFIG_GENWQE is not set
+# CONFIG_ECHO is not set
+# CONFIG_BCM_VK is not set
+# CONFIG_MISC_ALCOR_PCI is not set
+# CONFIG_MISC_RTSX_PCI is not set
+# CONFIG_MISC_RTSX_USB is not set
+# CONFIG_UACCE is not set
+# CONFIG_PVPANIC is not set
+# CONFIG_GP_PCI1XXXX is not set
+# end of Misc devices
+
+#
+# SCSI device support
+#
+CONFIG_SCSI_MOD=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI_COMMON=y
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+CONFIG_SCSI_NETLINK=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_BLK_DEV_BSG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+
+#
+# SCSI Transports
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_SAS_HOST_SMP=y
+CONFIG_SCSI_SRP_ATTRS=m
+# end of SCSI Transports
+
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_ISCSI_TCP=m
+CONFIG_ISCSI_BOOT_SYSFS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2X_FCOE=m
+CONFIG_BE2ISCSI=m
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_HPSA=m
+CONFIG_SCSI_3W_9XXX=m
+CONFIG_SCSI_3W_SAS=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC94XX=m
+CONFIG_AIC94XX_DEBUG=y
+CONFIG_SCSI_MVSAS=m
+CONFIG_SCSI_MVSAS_DEBUG=y
+CONFIG_SCSI_MVSAS_TASKLET=y
+CONFIG_SCSI_MVUMI=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_ARCMSR=m
+CONFIG_SCSI_ESAS2R=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT3SAS=m
+CONFIG_SCSI_MPT2SAS_MAX_SGE=128
+CONFIG_SCSI_MPT3SAS_MAX_SGE=128
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_MPI3MR=m
+CONFIG_SCSI_SMARTPQI=m
+CONFIG_SCSI_HPTIOP=m
+CONFIG_SCSI_BUSLOGIC=m
+CONFIG_SCSI_FLASHPOINT=y
+CONFIG_SCSI_MYRB=m
+CONFIG_SCSI_MYRS=m
+CONFIG_LIBFC=m
+CONFIG_LIBFCOE=m
+CONFIG_FCOE=m
+CONFIG_SCSI_SNIC=m
+# CONFIG_SCSI_SNIC_DEBUG_FS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_FDOMAIN=m
+CONFIG_SCSI_FDOMAIN_PCI=m
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_STEX=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+CONFIG_SCSI_SYM53C8XX_MMIO=y
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA_FC=m
+CONFIG_TCM_QLA2XXX=m
+# CONFIG_TCM_QLA2XXX_DEBUG is not set
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_EFCT=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_AM53C974=m
+CONFIG_SCSI_WD719X=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_PMCRAID=m
+CONFIG_SCSI_PM8001=m
+CONFIG_SCSI_BFA_FC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_CHELSIO_FCOE=m
+CONFIG_SCSI_LOWLEVEL_PCMCIA=y
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_QLOGIC=m
+CONFIG_PCMCIA_SYM53C500=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+# end of SCSI device support
+
+CONFIG_ATA=y
+CONFIG_SATA_HOST=y
+CONFIG_PATA_TIMINGS=y
+CONFIG_ATA_VERBOSE_ERROR=y
+CONFIG_ATA_FORCE=y
+CONFIG_ATA_ACPI=y
+CONFIG_SATA_ZPODD=y
+CONFIG_SATA_PMP=y
+
+#
+# Controllers with non-SFF native interface
+#
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_MOBILE_LPM_POLICY=3
+CONFIG_SATA_AHCI_PLATFORM=m
+CONFIG_AHCI_DWC=m
+# CONFIG_AHCI_CEVA is not set
+CONFIG_SATA_INIC162X=m
+CONFIG_SATA_ACARD_AHCI=m
+CONFIG_SATA_SIL24=m
+CONFIG_ATA_SFF=y
+
+#
+# SFF controllers with custom DMA interface
+#
+CONFIG_PDC_ADMA=m
+CONFIG_SATA_QSTOR=m
+CONFIG_SATA_SX4=m
+CONFIG_ATA_BMDMA=y
+
+#
+# SATA SFF controllers with BMDMA
+#
+CONFIG_ATA_PIIX=m
+CONFIG_SATA_DWC=m
+# CONFIG_SATA_DWC_OLD_DMA is not set
+CONFIG_SATA_MV=m
+CONFIG_SATA_NV=m
+CONFIG_SATA_PROMISE=m
+CONFIG_SATA_SIL=m
+CONFIG_SATA_SIS=m
+CONFIG_SATA_SVW=m
+CONFIG_SATA_ULI=m
+CONFIG_SATA_VIA=m
+CONFIG_SATA_VITESSE=m
+
+#
+# PATA SFF controllers with BMDMA
+#
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+CONFIG_PATA_ATIIXP=m
+# CONFIG_PATA_ATP867X is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NINJA32 is not set
+# CONFIG_PATA_NS87415 is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RDC is not set
+# CONFIG_PATA_SCH is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_SIL680 is not set
+CONFIG_PATA_SIS=m
+# CONFIG_PATA_TOSHIBA is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+
+#
+# PIO-only SFF controllers
+#
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_OPTI is not set
+CONFIG_PATA_PCMCIA=m
+# CONFIG_PATA_OF_PLATFORM is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_PARPORT is not set
+
+#
+# Generic fallback / legacy drivers
+#
+# CONFIG_PATA_ACPI is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_LEGACY is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_BITMAP_FILE=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+# CONFIG_MD_CLUSTER is not set
+CONFIG_BCACHE=m
+# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
+CONFIG_BLK_DEV_DM_BUILTIN=y
+CONFIG_BLK_DEV_DM=m
+# CONFIG_DM_DEBUG is not set
+CONFIG_DM_BUFIO=m
+# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
+CONFIG_DM_BIO_PRISON=m
+CONFIG_DM_PERSISTENT_DATA=m
+# CONFIG_DM_UNSTRIPED is not set
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_CACHE=m
+CONFIG_DM_CACHE_SMQ=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_EBS=m
+CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_DUST=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_SWITCH=m
+CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
+CONFIG_DM_ZONED=m
+CONFIG_DM_AUDIT=y
+CONFIG_TARGET_CORE=m
+CONFIG_TCM_IBLOCK=m
+CONFIG_TCM_FILEIO=m
+CONFIG_TCM_PSCSI=m
+CONFIG_TCM_USER2=m
+CONFIG_LOOPBACK_TARGET=m
+CONFIG_TCM_FC=m
+CONFIG_ISCSI_TARGET=m
+CONFIG_ISCSI_TARGET_CXGB4=m
+# CONFIG_REMOTE_TARGET is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+# CONFIG_FUSION_LOGGING is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_FIREWIRE is not set
+# CONFIG_FIREWIRE_NOSY is not set
+# end of IEEE 1394 (FireWire) support
+
+CONFIG_NETDEVICES=y
+CONFIG_MII=y
+CONFIG_NET_CORE=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_WIREGUARD=m
+# CONFIG_WIREGUARD_DEBUG is not set
+CONFIG_EQUALIZER=m
+CONFIG_NET_FC=y
+CONFIG_IFB=m
+CONFIG_NET_TEAM=m
+CONFIG_NET_TEAM_MODE_BROADCAST=m
+CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEAM_MODE_RANDOM=m
+CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m
+CONFIG_NET_TEAM_MODE_LOADBALANCE=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_IPVLAN_L3S=y
+CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
+CONFIG_VXLAN=m
+CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
+CONFIG_GTP=m
+CONFIG_AMT=m
+CONFIG_MACSEC=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+# CONFIG_NETCONSOLE_EXTENDED_LOG is not set
+CONFIG_NETPOLL=y
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_NTB_NETDEV is not set
+CONFIG_RIONET=m
+CONFIG_RIONET_TX_SIZE=128
+CONFIG_RIONET_RX_SIZE=128
+CONFIG_TUN=m
+CONFIG_TAP=m
+# CONFIG_TUN_VNET_CROSS_LE is not set
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+# CONFIG_NLMON is not set
+# CONFIG_NETKIT is not set
+# CONFIG_NET_VRF is not set
+# CONFIG_VSOCKMON is not set
+# CONFIG_MHI_NET is not set
+# CONFIG_ARCNET is not set
+CONFIG_ATM_DRIVERS=y
+# CONFIG_ATM_DUMMY is not set
+# CONFIG_ATM_TCP is not set
+# CONFIG_ATM_LANAI is not set
+# CONFIG_ATM_ENI is not set
+# CONFIG_ATM_NICSTAR is not set
+# CONFIG_ATM_IDT77252 is not set
+# CONFIG_ATM_IA is not set
+# CONFIG_ATM_FORE200E is not set
+# CONFIG_ATM_HE is not set
+# CONFIG_ATM_SOLOS is not set
+
+#
+# Distributed Switch Architecture drivers
+#
+# CONFIG_B53 is not set
+# CONFIG_NET_DSA_BCM_SF2 is not set
+# CONFIG_NET_DSA_LOOP is not set
+# CONFIG_NET_DSA_HIRSCHMANN_HELLCREEK is not set
+# CONFIG_NET_DSA_LANTIQ_GSWIP is not set
+# CONFIG_NET_DSA_MT7530 is not set
+# CONFIG_NET_DSA_MV88E6060 is not set
+# CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON is not set
+# CONFIG_NET_DSA_MV88E6XXX is not set
+# CONFIG_NET_DSA_MSCC_OCELOT_EXT is not set
+# CONFIG_NET_DSA_MSCC_SEVILLE is not set
+# CONFIG_NET_DSA_AR9331 is not set
+# CONFIG_NET_DSA_QCA8K is not set
+# CONFIG_NET_DSA_SJA1105 is not set
+# CONFIG_NET_DSA_XRS700X_I2C is not set
+# CONFIG_NET_DSA_XRS700X_MDIO is not set
+# CONFIG_NET_DSA_REALTEK is not set
+# CONFIG_NET_DSA_SMSC_LAN9303_I2C is not set
+# CONFIG_NET_DSA_SMSC_LAN9303_MDIO is not set
+# CONFIG_NET_DSA_VITESSE_VSC73XX_SPI is not set
+# CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM is not set
+# end of Distributed Switch Architecture drivers
+
+CONFIG_ETHERNET=y
+CONFIG_MDIO=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_ALTERA_TSE is not set
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+CONFIG_NET_VENDOR_ASIX=y
+# CONFIG_SPI_AX88796C is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+CONFIG_NET_VENDOR_BROADCOM=y
+# CONFIG_B44 is not set
+# CONFIG_BCMGENET is not set
+CONFIG_BNX2=y
+CONFIG_CNIC=m
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2X is not set
+# CONFIG_SYSTEMPORT is not set
+# CONFIG_BNXT is not set
+CONFIG_NET_VENDOR_CADENCE=y
+# CONFIG_MACB is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+CONFIG_NET_VENDOR_CHELSIO=y
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
+CONFIG_CHELSIO_T4=m
+# CONFIG_CHELSIO_T4_DCB is not set
+# CONFIG_CHELSIO_T4VF is not set
+CONFIG_CHELSIO_LIB=m
+CONFIG_CHELSIO_INLINE_CRYPTO=y
+# CONFIG_CHELSIO_IPSEC_INLINE is not set
+# CONFIG_CHELSIO_TLS_DEVICE is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+CONFIG_NET_VENDOR_CORTINA=y
+# CONFIG_GEMINI_ETHERNET is not set
+CONFIG_NET_VENDOR_DAVICOM=y
+# CONFIG_DM9051 is not set
+# CONFIG_DNET is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+CONFIG_NET_VENDOR_EMULEX=y
+CONFIG_BE2NET=m
+CONFIG_BE2NET_HWMON=y
+CONFIG_BE2NET_BE2=y
+CONFIG_BE2NET_BE3=y
+CONFIG_BE2NET_LANCER=y
+CONFIG_BE2NET_SKYHAWK=y
+CONFIG_NET_VENDOR_ENGLEDER=y
+# CONFIG_TSNEP is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
+CONFIG_NET_VENDOR_FUJITSU=y
+# CONFIG_PCMCIA_FMVJ18X is not set
+CONFIG_NET_VENDOR_FUNGIBLE=y
+# CONFIG_FUN_ETH is not set
+CONFIG_NET_VENDOR_GOOGLE=y
+CONFIG_NET_VENDOR_HUAWEI=y
+# CONFIG_NET_VENDOR_I825XX is not set
+CONFIG_NET_VENDOR_INTEL=y
+# CONFIG_E100 is not set
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGB_HWMON=y
+# CONFIG_IGBVF is not set
+CONFIG_IXGBE=y
+CONFIG_IXGBE_HWMON=y
+# CONFIG_IXGBE_DCB is not set
+CONFIG_IXGBE_IPSEC=y
+# CONFIG_IXGBEVF is not set
+# CONFIG_I40E is not set
+# CONFIG_I40EVF is not set
+# CONFIG_ICE is not set
+# CONFIG_FM10K is not set
+# CONFIG_IGC is not set
+# CONFIG_IDPF is not set
+# CONFIG_JME is not set
+CONFIG_NET_VENDOR_ADI=y
+# CONFIG_ADIN1110 is not set
+CONFIG_NET_VENDOR_LITEX=y
+# CONFIG_LITEX_LITEETH is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_NET_VENDOR_MELLANOX=y
+CONFIG_MLX4_EN=m
+CONFIG_MLX4_EN_DCB=y
+CONFIG_MLX4_CORE=m
+CONFIG_MLX4_DEBUG=y
+CONFIG_MLX4_CORE_GEN2=y
+# CONFIG_MLX5_CORE is not set
+# CONFIG_MLXSW_CORE is not set
+# CONFIG_MLXFW is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+CONFIG_NET_VENDOR_MICROCHIP=y
+# CONFIG_ENC28J60 is not set
+# CONFIG_ENCX24J600 is not set
+# CONFIG_LAN743X is not set
+# CONFIG_LAN966X_SWITCH is not set
+# CONFIG_VCAP is not set
+CONFIG_NET_VENDOR_MICROSEMI=y
+# CONFIG_MSCC_OCELOT_SWITCH is not set
+CONFIG_NET_VENDOR_MICROSOFT=y
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_FEALNX is not set
+CONFIG_NET_VENDOR_NI=y
+# CONFIG_NI_XGE_MANAGEMENT_ENET is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_NET_VENDOR_NETERION=y
+# CONFIG_S2IO is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_ETHOC is not set
+CONFIG_NET_VENDOR_PACKET_ENGINES=y
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+CONFIG_NET_VENDOR_PENSANDO=y
+# CONFIG_IONIC is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+CONFIG_NET_VENDOR_REALTEK=y
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_R8169=m
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+CONFIG_NET_VENDOR_SOCIONEXT=y
+CONFIG_NET_VENDOR_STMICRO=y
+CONFIG_STMMAC_ETH=y
+# CONFIG_STMMAC_SELFTESTS is not set
+CONFIG_STMMAC_PLATFORM=y
+# CONFIG_DWMAC_DWC_QOS_ETH is not set
+CONFIG_DWMAC_GENERIC=y
+# CONFIG_DWMAC_INTEL_PLAT is not set
+CONFIG_DWMAC_LOONGSON=y
+# CONFIG_STMMAC_PCI is not set
+# CONFIG_NET_VENDOR_SUN is not set
+CONFIG_NET_VENDOR_SYNOPSYS=y
+# CONFIG_DWC_XLGMAC is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+CONFIG_NET_VENDOR_VERTEXCOM=y
+# CONFIG_MSE102X is not set
+# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NET_VENDOR_WANGXUN=y
+CONFIG_LIBWX=m
+CONFIG_NGBE=m
+CONFIG_TXGBE=m
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_NET_VENDOR_XIRCOM=y
+# CONFIG_PCMCIA_XIRC2PS is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_NET_SB1000 is not set
+CONFIG_PHYLINK=y
+CONFIG_PHYLIB=y
+CONFIG_SWPHY=y
+# CONFIG_LED_TRIGGER_PHY is not set
+CONFIG_PHYLIB_LEDS=y
+CONFIG_FIXED_PHY=y
+CONFIG_SFP=m
+
+#
+# MII PHY device drivers
+#
+# CONFIG_AMD_PHY is not set
+# CONFIG_ADIN_PHY is not set
+# CONFIG_ADIN1100_PHY is not set
+# CONFIG_AQUANTIA_PHY is not set
+CONFIG_AX88796B_PHY=m
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_BCM54140_PHY is not set
+# CONFIG_BCM7XXX_PHY is not set
+# CONFIG_BCM84881_PHY is not set
+# CONFIG_BCM87XX_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_CORTINA_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_INTEL_XWAY_PHY is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_MARVELL_PHY is not set
+CONFIG_MARVELL_10G_PHY=m
+# CONFIG_MARVELL_88Q2XXX_PHY is not set
+# CONFIG_MARVELL_88X2222_PHY is not set
+# CONFIG_MAXLINEAR_GPHY is not set
+# CONFIG_MEDIATEK_GE_PHY is not set
+# CONFIG_MICREL_PHY is not set
+# CONFIG_MICROCHIP_T1S_PHY is not set
+CONFIG_MICROCHIP_PHY=m
+# CONFIG_MICROCHIP_T1_PHY is not set
+# CONFIG_MICROSEMI_PHY is not set
+# CONFIG_MOTORCOMM_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_NXP_CBTX_PHY is not set
+# CONFIG_NXP_C45_TJA11XX_PHY is not set
+# CONFIG_NXP_TJA11XX_PHY is not set
+# CONFIG_NCN26000_PHY is not set
+# CONFIG_AT803X_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+CONFIG_REALTEK_PHY=y
+# CONFIG_RENESAS_PHY is not set
+# CONFIG_ROCKCHIP_PHY is not set
+CONFIG_SMSC_PHY=m
+# CONFIG_STE10XP is not set
+# CONFIG_TERANETICS_PHY is not set
+# CONFIG_DP83822_PHY is not set
+# CONFIG_DP83TC811_PHY is not set
+# CONFIG_DP83848_PHY is not set
+# CONFIG_DP83867_PHY is not set
+# CONFIG_DP83869_PHY is not set
+# CONFIG_DP83TD510_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_XILINX_GMII2RGMII is not set
+# CONFIG_MICREL_KS8995MA is not set
+# CONFIG_PSE_CONTROLLER is not set
+CONFIG_CAN_DEV=m
+# CONFIG_CAN_VCAN is not set
+# CONFIG_CAN_VXCAN is not set
+CONFIG_CAN_NETLINK=y
+CONFIG_CAN_CALC_BITTIMING=y
+# CONFIG_CAN_CAN327 is not set
+# CONFIG_CAN_FLEXCAN is not set
+# CONFIG_CAN_GRCAN is not set
+# CONFIG_CAN_KVASER_PCIEFD is not set
+# CONFIG_CAN_SLCAN is not set
+# CONFIG_CAN_C_CAN is not set
+# CONFIG_CAN_CC770 is not set
+# CONFIG_CAN_CTUCANFD_PCI is not set
+# CONFIG_CAN_CTUCANFD_PLATFORM is not set
+# CONFIG_CAN_IFI_CANFD is not set
+# CONFIG_CAN_M_CAN is not set
+# CONFIG_CAN_PEAK_PCIEFD is not set
+# CONFIG_CAN_SJA1000 is not set
+# CONFIG_CAN_SOFTING is not set
+
+#
+# CAN SPI interfaces
+#
+# CONFIG_CAN_HI311X is not set
+# CONFIG_CAN_MCP251X is not set
+# CONFIG_CAN_MCP251XFD is not set
+# end of CAN SPI interfaces
+
+#
+# CAN USB interfaces
+#
+# CONFIG_CAN_8DEV_USB is not set
+# CONFIG_CAN_EMS_USB is not set
+# CONFIG_CAN_ESD_USB is not set
+# CONFIG_CAN_ETAS_ES58X is not set
+# CONFIG_CAN_F81604 is not set
+# CONFIG_CAN_GS_USB is not set
+# CONFIG_CAN_KVASER_USB is not set
+# CONFIG_CAN_MCBA_USB is not set
+# CONFIG_CAN_PEAK_USB is not set
+# CONFIG_CAN_UCAN is not set
+# end of CAN USB interfaces
+
+# CONFIG_CAN_DEBUG_DEVICES is not set
+
+#
+# MCTP Device Drivers
+#
+# CONFIG_MCTP_SERIAL is not set
+# end of MCTP Device Drivers
+
+CONFIG_MDIO_DEVICE=y
+CONFIG_MDIO_BUS=y
+CONFIG_FWNODE_MDIO=y
+CONFIG_OF_MDIO=y
+CONFIG_ACPI_MDIO=y
+CONFIG_MDIO_DEVRES=y
+# CONFIG_MDIO_BITBANG is not set
+# CONFIG_MDIO_BCM_UNIMAC is not set
+# CONFIG_MDIO_HISI_FEMAC is not set
+CONFIG_MDIO_I2C=m
+# CONFIG_MDIO_MVUSB is not set
+# CONFIG_MDIO_MSCC_MIIM is not set
+# CONFIG_MDIO_OCTEON is not set
+# CONFIG_MDIO_IPQ4019 is not set
+# CONFIG_MDIO_IPQ8064 is not set
+# CONFIG_MDIO_THUNDER is not set
+
+#
+# MDIO Multiplexers
+#
+# CONFIG_MDIO_BUS_MUX_GPIO is not set
+# CONFIG_MDIO_BUS_MUX_MULTIPLEXER is not set
+# CONFIG_MDIO_BUS_MUX_MMIOREG is not set
+
+#
+# PCS device drivers
+#
+CONFIG_PCS_XPCS=y
+# end of PCS device drivers
+
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+# CONFIG_PPPOATM is not set
+CONFIG_PPPOE=m
+# CONFIG_PPPOE_HASH_BITS_1 is not set
+# CONFIG_PPPOE_HASH_BITS_2 is not set
+CONFIG_PPPOE_HASH_BITS_4=y
+# CONFIG_PPPOE_HASH_BITS_8 is not set
+CONFIG_PPPOE_HASH_BITS=4
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_SLIP=m
+CONFIG_SLHC=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+CONFIG_USB_NET_DRIVERS=m
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_RTL8152=m
+CONFIG_USB_LAN78XX=m
+CONFIG_USB_USBNET=m
+CONFIG_USB_NET_AX8817X=m
+CONFIG_USB_NET_AX88179_178A=m
+CONFIG_USB_NET_CDCETHER=m
+CONFIG_USB_NET_CDC_EEM=m
+CONFIG_USB_NET_CDC_NCM=m
+CONFIG_USB_NET_HUAWEI_CDC_NCM=m
+CONFIG_USB_NET_CDC_MBIM=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_SR9700=m
+CONFIG_USB_NET_SR9800=m
+CONFIG_USB_NET_SMSC75XX=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_GL620A=m
+CONFIG_USB_NET_NET1080=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_USB_NET_RNDIS_HOST=m
+CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
+CONFIG_USB_NET_CDC_SUBSET=m
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_KC2190=y
+CONFIG_USB_NET_ZAURUS=m
+CONFIG_USB_NET_CX82310_ETH=m
+CONFIG_USB_NET_KALMIA=m
+CONFIG_USB_NET_QMI_WWAN=m
+CONFIG_USB_HSO=m
+CONFIG_USB_NET_INT51X1=m
+# CONFIG_USB_CDC_PHONET is not set
+CONFIG_USB_IPHETH=m
+CONFIG_USB_SIERRA_NET=m
+CONFIG_USB_VL600=m
+CONFIG_USB_NET_CH9200=m
+CONFIG_USB_NET_AQC111=m
+CONFIG_USB_RTL8153_ECM=m
+CONFIG_WLAN=y
+CONFIG_WLAN_VENDOR_ADMTEK=y
+CONFIG_ADM8211=m
+CONFIG_ATH_COMMON=m
+CONFIG_WLAN_VENDOR_ATH=y
+# CONFIG_ATH_DEBUG is not set
+CONFIG_ATH5K=m
+CONFIG_ATH5K_DEBUG=y
+CONFIG_ATH5K_PCI=y
+CONFIG_ATH9K_HW=m
+CONFIG_ATH9K_COMMON=m
+CONFIG_ATH9K_COMMON_DEBUG=y
+CONFIG_ATH9K_BTCOEX_SUPPORT=y
+CONFIG_ATH9K=m
+CONFIG_ATH9K_PCI=y
+CONFIG_ATH9K_AHB=y
+CONFIG_ATH9K_DEBUGFS=y
+CONFIG_ATH9K_STATION_STATISTICS=y
+CONFIG_ATH9K_DYNACK=y
+CONFIG_ATH9K_WOW=y
+CONFIG_ATH9K_RFKILL=y
+CONFIG_ATH9K_CHANNEL_CONTEXT=y
+CONFIG_ATH9K_PCOEM=y
+CONFIG_ATH9K_PCI_NO_EEPROM=m
+CONFIG_ATH9K_HTC=m
+CONFIG_ATH9K_HTC_DEBUGFS=y
+CONFIG_ATH9K_HWRNG=y
+CONFIG_ATH9K_COMMON_SPECTRAL=y
+CONFIG_CARL9170=m
+CONFIG_CARL9170_LEDS=y
+CONFIG_CARL9170_DEBUGFS=y
+CONFIG_CARL9170_WPC=y
+# CONFIG_CARL9170_HWRNG is not set
+CONFIG_ATH6KL=m
+CONFIG_ATH6KL_SDIO=m
+CONFIG_ATH6KL_USB=m
+CONFIG_ATH6KL_DEBUG=y
+CONFIG_AR5523=m
+CONFIG_WIL6210=m
+CONFIG_WIL6210_ISR_COR=y
+CONFIG_WIL6210_DEBUGFS=y
+CONFIG_ATH10K=m
+CONFIG_ATH10K_CE=y
+CONFIG_ATH10K_PCI=m
+# CONFIG_ATH10K_AHB is not set
+CONFIG_ATH10K_SDIO=m
+CONFIG_ATH10K_USB=m
+CONFIG_ATH10K_DEBUG=y
+CONFIG_ATH10K_DEBUGFS=y
+CONFIG_ATH10K_SPECTRAL=y
+CONFIG_WCN36XX=m
+CONFIG_WCN36XX_DEBUGFS=y
+CONFIG_ATH11K=m
+CONFIG_ATH11K_PCI=m
+CONFIG_ATH11K_DEBUG=y
+CONFIG_ATH11K_DEBUGFS=y
+CONFIG_ATH11K_SPECTRAL=y
+# CONFIG_ATH12K is not set
+CONFIG_WLAN_VENDOR_ATMEL=y
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_AT76C50X_USB=m
+CONFIG_WLAN_VENDOR_BROADCOM=y
+CONFIG_B43=m
+CONFIG_B43_BCMA=y
+CONFIG_B43_SSB=y
+CONFIG_B43_BUSES_BCMA_AND_SSB=y
+# CONFIG_B43_BUSES_BCMA is not set
+# CONFIG_B43_BUSES_SSB is not set
+CONFIG_B43_PCI_AUTOSELECT=y
+CONFIG_B43_PCICORE_AUTOSELECT=y
+CONFIG_B43_SDIO=y
+CONFIG_B43_BCMA_PIO=y
+CONFIG_B43_PIO=y
+CONFIG_B43_PHY_G=y
+CONFIG_B43_PHY_N=y
+CONFIG_B43_PHY_LP=y
+CONFIG_B43_PHY_HT=y
+CONFIG_B43_LEDS=y
+CONFIG_B43_HWRNG=y
+# CONFIG_B43_DEBUG is not set
+CONFIG_B43LEGACY=m
+CONFIG_B43LEGACY_PCI_AUTOSELECT=y
+CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
+CONFIG_B43LEGACY_LEDS=y
+CONFIG_B43LEGACY_HWRNG=y
+CONFIG_B43LEGACY_DEBUG=y
+CONFIG_B43LEGACY_DMA=y
+CONFIG_B43LEGACY_PIO=y
+CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
+# CONFIG_B43LEGACY_DMA_MODE is not set
+# CONFIG_B43LEGACY_PIO_MODE is not set
+CONFIG_BRCMUTIL=m
+CONFIG_BRCMSMAC=m
+CONFIG_BRCMFMAC=m
+CONFIG_BRCMFMAC_PROTO_BCDC=y
+CONFIG_BRCMFMAC_PROTO_MSGBUF=y
+CONFIG_BRCMFMAC_SDIO=y
+CONFIG_BRCMFMAC_USB=y
+CONFIG_BRCMFMAC_PCIE=y
+CONFIG_BRCM_TRACING=y
+CONFIG_BRCMDBG=y
+CONFIG_WLAN_VENDOR_CISCO=y
+# CONFIG_AIRO is not set
+CONFIG_AIRO_CS=m
+CONFIG_WLAN_VENDOR_INTEL=y
+CONFIG_IPW2100=m
+CONFIG_IPW2100_MONITOR=y
+# CONFIG_IPW2100_DEBUG is not set
+CONFIG_IPW2200=m
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW2200_RADIOTAP=y
+CONFIG_IPW2200_PROMISCUOUS=y
+CONFIG_IPW2200_QOS=y
+# CONFIG_IPW2200_DEBUG is not set
+CONFIG_LIBIPW=m
+# CONFIG_LIBIPW_DEBUG is not set
+CONFIG_IWLEGACY=m
+CONFIG_IWL4965=m
+CONFIG_IWL3945=m
+
+#
+# iwl3945 / iwl4965 Debugging Options
+#
+CONFIG_IWLEGACY_DEBUG=y
+CONFIG_IWLEGACY_DEBUGFS=y
+# end of iwl3945 / iwl4965 Debugging Options
+
+CONFIG_IWLWIFI=m
+CONFIG_IWLWIFI_LEDS=y
+CONFIG_IWLDVM=m
+CONFIG_IWLMVM=m
+CONFIG_IWLWIFI_OPMODE_MODULAR=y
+
+#
+# Debugging Options
+#
+# CONFIG_IWLWIFI_DEBUG is not set
+# CONFIG_IWLWIFI_DEBUGFS is not set
+# end of Debugging Options
+
+CONFIG_WLAN_VENDOR_INTERSIL=y
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_HOSTAP_PLX=m
+CONFIG_HOSTAP_PCI=m
+CONFIG_HOSTAP_CS=m
+CONFIG_HERMES=m
+CONFIG_HERMES_PRISM=y
+CONFIG_HERMES_CACHE_FW_ON_INIT=y
+CONFIG_PLX_HERMES=m
+CONFIG_TMD_HERMES=m
+CONFIG_NORTEL_HERMES=m
+CONFIG_PCI_HERMES=m
+CONFIG_PCMCIA_HERMES=m
+CONFIG_PCMCIA_SPECTRUM=m
+CONFIG_ORINOCO_USB=m
+CONFIG_P54_COMMON=m
+CONFIG_P54_USB=m
+CONFIG_P54_PCI=m
+CONFIG_P54_SPI=m
+# CONFIG_P54_SPI_DEFAULT_EEPROM is not set
+CONFIG_P54_LEDS=y
+CONFIG_WLAN_VENDOR_MARVELL=y
+CONFIG_LIBERTAS=m
+CONFIG_LIBERTAS_USB=m
+CONFIG_LIBERTAS_CS=m
+CONFIG_LIBERTAS_SDIO=m
+CONFIG_LIBERTAS_SPI=m
+# CONFIG_LIBERTAS_DEBUG is not set
+CONFIG_LIBERTAS_MESH=y
+CONFIG_LIBERTAS_THINFIRM=m
+# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set
+CONFIG_LIBERTAS_THINFIRM_USB=m
+CONFIG_MWIFIEX=m
+CONFIG_MWIFIEX_SDIO=m
+CONFIG_MWIFIEX_PCIE=m
+CONFIG_MWIFIEX_USB=m
+CONFIG_MWL8K=m
+CONFIG_WLAN_VENDOR_MEDIATEK=y
+CONFIG_MT7601U=m
+CONFIG_MT76_CORE=m
+CONFIG_MT76_LEDS=y
+CONFIG_MT76_USB=m
+CONFIG_MT76_SDIO=m
+CONFIG_MT76x02_LIB=m
+CONFIG_MT76x02_USB=m
+CONFIG_MT76_CONNAC_LIB=m
+CONFIG_MT792x_LIB=m
+CONFIG_MT792x_USB=m
+CONFIG_MT76x0_COMMON=m
+CONFIG_MT76x0U=m
+CONFIG_MT76x0E=m
+CONFIG_MT76x2_COMMON=m
+CONFIG_MT76x2E=m
+CONFIG_MT76x2U=m
+CONFIG_MT7603E=m
+CONFIG_MT7615_COMMON=m
+CONFIG_MT7615E=m
+CONFIG_MT7663_USB_SDIO_COMMON=m
+CONFIG_MT7663U=m
+CONFIG_MT7663S=m
+CONFIG_MT7915E=m
+CONFIG_MT7921_COMMON=m
+CONFIG_MT7921E=m
+CONFIG_MT7921S=m
+CONFIG_MT7921U=m
+# CONFIG_MT7996E is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
+CONFIG_WLAN_VENDOR_MICROCHIP=y
+CONFIG_WILC1000=m
+CONFIG_WILC1000_SDIO=m
+CONFIG_WILC1000_SPI=m
+# CONFIG_WILC1000_HW_OOB_INTR is not set
+CONFIG_WLAN_VENDOR_PURELIFI=y
+CONFIG_PLFXLC=m
+CONFIG_WLAN_VENDOR_RALINK=y
+CONFIG_RT2X00=m
+CONFIG_RT2400PCI=m
+CONFIG_RT2500PCI=m
+CONFIG_RT61PCI=m
+CONFIG_RT2800PCI=m
+CONFIG_RT2800PCI_RT33XX=y
+CONFIG_RT2800PCI_RT35XX=y
+CONFIG_RT2800PCI_RT53XX=y
+CONFIG_RT2800PCI_RT3290=y
+CONFIG_RT2500USB=m
+CONFIG_RT73USB=m
+CONFIG_RT2800USB=m
+CONFIG_RT2800USB_RT33XX=y
+CONFIG_RT2800USB_RT35XX=y
+CONFIG_RT2800USB_RT3573=y
+CONFIG_RT2800USB_RT53XX=y
+CONFIG_RT2800USB_RT55XX=y
+CONFIG_RT2800USB_UNKNOWN=y
+CONFIG_RT2800_LIB=m
+CONFIG_RT2800_LIB_MMIO=m
+CONFIG_RT2X00_LIB_MMIO=m
+CONFIG_RT2X00_LIB_PCI=m
+CONFIG_RT2X00_LIB_USB=m
+CONFIG_RT2X00_LIB=m
+CONFIG_RT2X00_LIB_FIRMWARE=y
+CONFIG_RT2X00_LIB_CRYPTO=y
+CONFIG_RT2X00_LIB_LEDS=y
+CONFIG_RT2X00_LIB_DEBUGFS=y
+# CONFIG_RT2X00_DEBUG is not set
+CONFIG_WLAN_VENDOR_REALTEK=y
+CONFIG_RTL8180=m
+CONFIG_RTL8187=m
+CONFIG_RTL8187_LEDS=y
+CONFIG_RTL_CARDS=m
+CONFIG_RTL8192CE=m
+CONFIG_RTL8192SE=m
+CONFIG_RTL8192DE=m
+CONFIG_RTL8723AE=m
+CONFIG_RTL8723BE=m
+CONFIG_RTL8188EE=m
+CONFIG_RTL8192EE=m
+CONFIG_RTL8821AE=m
+CONFIG_RTL8192CU=m
+CONFIG_RTLWIFI=m
+CONFIG_RTLWIFI_PCI=m
+CONFIG_RTLWIFI_USB=m
+CONFIG_RTLWIFI_DEBUG=y
+CONFIG_RTL8192C_COMMON=m
+CONFIG_RTL8723_COMMON=m
+CONFIG_RTLBTCOEXIST=m
+CONFIG_RTL8XXXU=m
+CONFIG_RTL8XXXU_UNTESTED=y
+CONFIG_RTW88=m
+CONFIG_RTW88_CORE=m
+CONFIG_RTW88_PCI=m
+CONFIG_RTW88_8822B=m
+CONFIG_RTW88_8822C=m
+CONFIG_RTW88_8723D=m
+CONFIG_RTW88_8821C=m
+CONFIG_RTW88_8822BE=m
+# CONFIG_RTW88_8822BS is not set
+# CONFIG_RTW88_8822BU is not set
+CONFIG_RTW88_8822CE=m
+# CONFIG_RTW88_8822CS is not set
+# CONFIG_RTW88_8822CU is not set
+CONFIG_RTW88_8723DE=m
+# CONFIG_RTW88_8723DS is not set
+# CONFIG_RTW88_8723DU is not set
+CONFIG_RTW88_8821CE=m
+# CONFIG_RTW88_8821CS is not set
+# CONFIG_RTW88_8821CU is not set
+# CONFIG_RTW88_DEBUG is not set
+# CONFIG_RTW88_DEBUGFS is not set
+CONFIG_RTW89=m
+CONFIG_RTW89_CORE=m
+CONFIG_RTW89_PCI=m
+CONFIG_RTW89_8852A=m
+CONFIG_RTW89_8852C=m
+# CONFIG_RTW89_8851BE is not set
+CONFIG_RTW89_8852AE=m
+# CONFIG_RTW89_8852BE is not set
+CONFIG_RTW89_8852CE=m
+CONFIG_RTW89_DEBUG=y
+CONFIG_RTW89_DEBUGMSG=y
+CONFIG_RTW89_DEBUGFS=y
+CONFIG_WLAN_VENDOR_RSI=y
+CONFIG_RSI_91X=m
+CONFIG_RSI_DEBUGFS=y
+CONFIG_RSI_SDIO=m
+CONFIG_RSI_USB=m
+CONFIG_RSI_COEX=y
+CONFIG_WLAN_VENDOR_SILABS=y
+CONFIG_WFX=m
+CONFIG_WLAN_VENDOR_ST=y
+CONFIG_CW1200=m
+CONFIG_CW1200_WLAN_SDIO=m
+CONFIG_CW1200_WLAN_SPI=m
+CONFIG_WLAN_VENDOR_TI=y
+CONFIG_WL1251=m
+CONFIG_WL1251_SPI=m
+CONFIG_WL1251_SDIO=m
+CONFIG_WL12XX=m
+CONFIG_WL18XX=m
+CONFIG_WLCORE=m
+# CONFIG_WLCORE_SPI is not set
+CONFIG_WLCORE_SDIO=m
+CONFIG_WLAN_VENDOR_ZYDAS=y
+# CONFIG_USB_ZD1201 is not set
+CONFIG_ZD1211RW=m
+# CONFIG_ZD1211RW_DEBUG is not set
+CONFIG_WLAN_VENDOR_QUANTENNA=y
+CONFIG_QTNFMAC=m
+CONFIG_QTNFMAC_PCIE=m
+CONFIG_PCMCIA_RAYCS=m
+CONFIG_PCMCIA_WL3501=m
+CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_MAC80211_HWSIM=m
+CONFIG_VIRT_WIFI=m
+# CONFIG_WAN is not set
+CONFIG_IEEE802154_DRIVERS=m
+# CONFIG_IEEE802154_FAKELB is not set
+# CONFIG_IEEE802154_AT86RF230 is not set
+# CONFIG_IEEE802154_MRF24J40 is not set
+# CONFIG_IEEE802154_CC2520 is not set
+# CONFIG_IEEE802154_ATUSB is not set
+# CONFIG_IEEE802154_ADF7242 is not set
+# CONFIG_IEEE802154_CA8210 is not set
+# CONFIG_IEEE802154_MCR20A is not set
+# CONFIG_IEEE802154_HWSIM is not set
+
+#
+# Wireless WAN
+#
+# CONFIG_WWAN is not set
+# end of Wireless WAN
+
+# CONFIG_VMXNET3 is not set
+# CONFIG_FUJITSU_ES is not set
+CONFIG_USB4_NET=m
+# CONFIG_NETDEVSIM is not set
+CONFIG_NET_FAILOVER=m
+CONFIG_ISDN=y
+CONFIG_ISDN_CAPI=y
+CONFIG_CAPI_TRACE=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_MISDN=m
+# CONFIG_MISDN_DSP is not set
+# CONFIG_MISDN_L1OIP is not set
+
+#
+# mISDN hardware drivers
+#
+# CONFIG_MISDN_HFCPCI is not set
+# CONFIG_MISDN_HFCMULTI is not set
+# CONFIG_MISDN_HFCUSB is not set
+# CONFIG_MISDN_AVMFRITZ is not set
+# CONFIG_MISDN_SPEEDFAX is not set
+# CONFIG_MISDN_INFINEON is not set
+# CONFIG_MISDN_W6692 is not set
+# CONFIG_MISDN_NETJET is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+CONFIG_INPUT_LEDS=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_SPARSEKMAP=y
+CONFIG_INPUT_MATRIXKMAP=m
+CONFIG_INPUT_VIVALDIFMAP=m
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ADP5588=m
+CONFIG_KEYBOARD_ADP5589=m
+CONFIG_KEYBOARD_ATKBD=m
+CONFIG_KEYBOARD_QT1050=m
+CONFIG_KEYBOARD_QT1070=m
+CONFIG_KEYBOARD_QT2160=m
+CONFIG_KEYBOARD_DLINK_DIR685=m
+CONFIG_KEYBOARD_LKKBD=m
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+CONFIG_KEYBOARD_TCA6416=m
+CONFIG_KEYBOARD_TCA8418=m
+CONFIG_KEYBOARD_MATRIX=m
+CONFIG_KEYBOARD_LM8323=m
+CONFIG_KEYBOARD_LM8333=m
+CONFIG_KEYBOARD_MAX7359=m
+CONFIG_KEYBOARD_MCS=m
+CONFIG_KEYBOARD_MPR121=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_KEYBOARD_OPENCORES=m
+# CONFIG_KEYBOARD_PINEPHONE is not set
+# CONFIG_KEYBOARD_SAMSUNG is not set
+CONFIG_KEYBOARD_STOWAWAY=m
+CONFIG_KEYBOARD_SUNKBD=m
+CONFIG_KEYBOARD_OMAP4=m
+CONFIG_KEYBOARD_TM2_TOUCHKEY=m
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_CAP11XX=m
+# CONFIG_KEYBOARD_BCM is not set
+# CONFIG_KEYBOARD_CYPRESS_SF is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_BYD=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
+CONFIG_MOUSE_PS2_CYPRESS=y
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+CONFIG_MOUSE_PS2_ELANTECH=y
+CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
+CONFIG_MOUSE_PS2_SENTELIC=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_PS2_FOCALTECH=y
+CONFIG_MOUSE_PS2_PIXART=y
+CONFIG_MOUSE_PS2_SMBUS=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_APPLETOUCH=m
+CONFIG_MOUSE_BCM5974=m
+CONFIG_MOUSE_CYAPA=m
+CONFIG_MOUSE_ELAN_I2C=m
+CONFIG_MOUSE_ELAN_I2C_I2C=y
+CONFIG_MOUSE_ELAN_I2C_SMBUS=y
+CONFIG_MOUSE_VSXXXAA=m
+CONFIG_MOUSE_GPIO=m
+CONFIG_MOUSE_SYNAPTICS_I2C=m
+CONFIG_MOUSE_SYNAPTICS_USB=m
+CONFIG_INPUT_JOYSTICK=y
+# CONFIG_JOYSTICK_ANALOG is not set
+# CONFIG_JOYSTICK_A3D is not set
+# CONFIG_JOYSTICK_ADI is not set
+# CONFIG_JOYSTICK_COBRA is not set
+# CONFIG_JOYSTICK_GF2K is not set
+# CONFIG_JOYSTICK_GRIP is not set
+# CONFIG_JOYSTICK_GRIP_MP is not set
+# CONFIG_JOYSTICK_GUILLEMOT is not set
+# CONFIG_JOYSTICK_INTERACT is not set
+# CONFIG_JOYSTICK_SIDEWINDER is not set
+# CONFIG_JOYSTICK_TMDC is not set
+# CONFIG_JOYSTICK_IFORCE is not set
+# CONFIG_JOYSTICK_WARRIOR is not set
+# CONFIG_JOYSTICK_MAGELLAN is not set
+# CONFIG_JOYSTICK_SPACEORB is not set
+# CONFIG_JOYSTICK_SPACEBALL is not set
+# CONFIG_JOYSTICK_STINGER is not set
+# CONFIG_JOYSTICK_TWIDJOY is not set
+# CONFIG_JOYSTICK_ZHENHUA is not set
+# CONFIG_JOYSTICK_DB9 is not set
+# CONFIG_JOYSTICK_GAMECON is not set
+# CONFIG_JOYSTICK_TURBOGRAFX is not set
+# CONFIG_JOYSTICK_AS5011 is not set
+# CONFIG_JOYSTICK_JOYDUMP is not set
+# CONFIG_JOYSTICK_XPAD is not set
+# CONFIG_JOYSTICK_WALKERA0701 is not set
+# CONFIG_JOYSTICK_PSXPAD_SPI is not set
+# CONFIG_JOYSTICK_PXRC is not set
+# CONFIG_JOYSTICK_QWIIC is not set
+# CONFIG_JOYSTICK_FSIA6B is not set
+# CONFIG_JOYSTICK_SENSEHAT is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_AD714X is not set
+# CONFIG_INPUT_ATMEL_CAPTOUCH is not set
+# CONFIG_INPUT_BMA150 is not set
+# CONFIG_INPUT_E3X0_BUTTON is not set
+# CONFIG_INPUT_MMA8450 is not set
+# CONFIG_INPUT_GPIO_BEEPER is not set
+# CONFIG_INPUT_GPIO_DECODER is not set
+# CONFIG_INPUT_GPIO_VIBRA is not set
+# CONFIG_INPUT_ATI_REMOTE2 is not set
+# CONFIG_INPUT_KEYSPAN_REMOTE is not set
+# CONFIG_INPUT_KXTJ9 is not set
+# CONFIG_INPUT_POWERMATE is not set
+# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INPUT_CM109 is not set
+# CONFIG_INPUT_REGULATOR_HAPTIC is not set
+CONFIG_INPUT_UINPUT=m
+# CONFIG_INPUT_PCF8574 is not set
+# CONFIG_INPUT_PWM_BEEPER is not set
+# CONFIG_INPUT_PWM_VIBRA is not set
+# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set
+# CONFIG_INPUT_DA7280_HAPTICS is not set
+# CONFIG_INPUT_ADXL34X is not set
+# CONFIG_INPUT_IMS_PCU is not set
+# CONFIG_INPUT_IQS269A is not set
+# CONFIG_INPUT_IQS626A is not set
+# CONFIG_INPUT_IQS7222 is not set
+# CONFIG_INPUT_CMA3000 is not set
+# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set
+# CONFIG_INPUT_SOC_BUTTON_ARRAY is not set
+# CONFIG_INPUT_DRV260X_HAPTICS is not set
+# CONFIG_INPUT_DRV2665_HAPTICS is not set
+# CONFIG_INPUT_DRV2667_HAPTICS is not set
+CONFIG_RMI4_CORE=m
+# CONFIG_RMI4_I2C is not set
+# CONFIG_RMI4_SPI is not set
+# CONFIG_RMI4_SMB is not set
+CONFIG_RMI4_F03=y
+CONFIG_RMI4_F03_SERIO=m
+CONFIG_RMI4_2D_SENSOR=y
+CONFIG_RMI4_F11=y
+CONFIG_RMI4_F12=y
+CONFIG_RMI4_F30=y
+# CONFIG_RMI4_F34 is not set
+# CONFIG_RMI4_F3A is not set
+# CONFIG_RMI4_F54 is not set
+# CONFIG_RMI4_F55 is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=m
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_RAW=m
+# CONFIG_SERIO_ALTERA_PS2 is not set
+# CONFIG_SERIO_PS2MULT is not set
+# CONFIG_SERIO_ARC_PS2 is not set
+# CONFIG_SERIO_APBPS2 is not set
+# CONFIG_SERIO_GPIO_PS2 is not set
+# CONFIG_USERIO is not set
+# CONFIG_GAMEPORT is not set
+# end of Hardware I/O ports
+# end of Input device support
+
+#
+# Character devices
+#
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_VT_CONSOLE_SLEEP=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=16
+CONFIG_LEGACY_TIOCSTI=y
+CONFIG_LDISC_AUTOLOAD=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_EARLYCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y
+CONFIG_SERIAL_8250_PNP=y
+CONFIG_SERIAL_8250_16550A_VARIANTS=y
+# CONFIG_SERIAL_8250_FINTEK is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DMA=y
+CONFIG_SERIAL_8250_PCILIB=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_EXAR=y
+# CONFIG_SERIAL_8250_CS is not set
+CONFIG_SERIAL_8250_NR_UARTS=16
+CONFIG_SERIAL_8250_RUNTIME_UARTS=16
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_PCI1XXXX=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_RSA=y
+# CONFIG_SERIAL_8250_DW is not set
+# CONFIG_SERIAL_8250_RT288X is not set
+CONFIG_SERIAL_8250_PERICOM=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+# CONFIG_SERIAL_MAX310X is not set
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+# CONFIG_SERIAL_SIFIVE is not set
+# CONFIG_SERIAL_SCCNXP is not set
+# CONFIG_SERIAL_SC16IS7XX is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
+# CONFIG_SERIAL_XILINX_PS_UART is not set
+# CONFIG_SERIAL_ARC is not set
+# CONFIG_SERIAL_RP2 is not set
+# CONFIG_SERIAL_FSL_LPUART is not set
+# CONFIG_SERIAL_FSL_LINFLEXUART is not set
+# CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set
+# CONFIG_SERIAL_SPRD is not set
+# end of Serial drivers
+
+CONFIG_SERIAL_MCTRL_GPIO=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_MOXA_INTELLIO is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_N_HDLC is not set
+# CONFIG_IPWIRELESS is not set
+# CONFIG_N_GSM is not set
+# CONFIG_NOZOMI is not set
+# CONFIG_NULL_TTY is not set
+CONFIG_HVC_DRIVER=y
+# CONFIG_SERIAL_DEV_BUS is not set
+CONFIG_TTY_PRINTK=m
+CONFIG_TTY_PRINTK_LEVEL=6
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+# CONFIG_PPDEV is not set
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DMI_DECODE=y
+CONFIG_IPMI_PLAT_DATA=y
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+# CONFIG_IPMI_SSIF is not set
+# CONFIG_IPMI_WATCHDOG is not set
+# CONFIG_IPMI_POWEROFF is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
+# CONFIG_HW_RANDOM_BA431 is not set
+CONFIG_HW_RANDOM_VIRTIO=m
+# CONFIG_HW_RANDOM_CCTRNG is not set
+# CONFIG_HW_RANDOM_XIPHERA is not set
+# CONFIG_APPLICOM is not set
+CONFIG_DEVMEM=y
+CONFIG_DEVPORT=y
+CONFIG_TCG_TPM=y
+CONFIG_HW_RANDOM_TPM=y
+CONFIG_TCG_TIS_CORE=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TIS_SPI=m
+CONFIG_TCG_TIS_SPI_CR50=y
+CONFIG_TCG_TIS_I2C=m
+CONFIG_TCG_TIS_I2C_CR50=m
+CONFIG_TCG_TIS_I2C_ATMEL=m
+CONFIG_TCG_TIS_I2C_INFINEON=m
+CONFIG_TCG_TIS_I2C_NUVOTON=m
+CONFIG_TCG_ATMEL=m
+CONFIG_TCG_INFINEON=m
+CONFIG_TCG_CRB=y
+CONFIG_TCG_VTPM_PROXY=m
+CONFIG_TCG_TIS_ST33ZP24=m
+CONFIG_TCG_TIS_ST33ZP24_I2C=m
+CONFIG_TCG_TIS_ST33ZP24_SPI=m
+CONFIG_XILLYBUS_CLASS=m
+CONFIG_XILLYBUS=m
+CONFIG_XILLYBUS_PCIE=m
+# CONFIG_XILLYBUS_OF is not set
+CONFIG_XILLYUSB=m
+# end of Character devices
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_ACPI_I2C_OPREGION=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_MUX is not set
+CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_ALGOBIT=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# PC SMBus host controller drivers
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_AMD_MP2 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_ISCH is not set
+CONFIG_I2C_PIIX4=y
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_NVIDIA_GPU is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+
+#
+# ACPI drivers
+#
+# CONFIG_I2C_SCMI is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_CBUS_GPIO is not set
+CONFIG_I2C_DESIGNWARE_CORE=m
+# CONFIG_I2C_DESIGNWARE_SLAVE is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=m
+# CONFIG_I2C_DESIGNWARE_PCI is not set
+# CONFIG_I2C_EMEV2 is not set
+CONFIG_I2C_GPIO=y
+# CONFIG_I2C_GPIO_FAULT_INJECTOR is not set
+CONFIG_I2C_LS2X=y
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_RK3X is not set
+# CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_XILINX is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_DIOLAN_U2C is not set
+# CONFIG_I2C_CP2615 is not set
+# CONFIG_I2C_PARPORT is not set
+# CONFIG_I2C_PCI1XXXX is not set
+# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_VIRTIO is not set
+# end of I2C Hardware Bus support
+
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_SLAVE is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# end of I2C support
+
+# CONFIG_I3C is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+# CONFIG_SPI_MEM is not set
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_ALTERA is not set
+# CONFIG_SPI_AXI_SPI_ENGINE is not set
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_BUTTERFLY is not set
+# CONFIG_SPI_CADENCE is not set
+# CONFIG_SPI_DESIGNWARE is not set
+# CONFIG_SPI_GPIO is not set
+# CONFIG_SPI_LM70_LLP is not set
+CONFIG_SPI_LOONGSON_CORE=m
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+# CONFIG_SPI_FSL_SPI is not set
+# CONFIG_SPI_MICROCHIP_CORE is not set
+# CONFIG_SPI_MICROCHIP_CORE_QSPI is not set
+# CONFIG_SPI_OC_TINY is not set
+# CONFIG_SPI_PCI1XXXX is not set
+# CONFIG_SPI_PXA2XX is not set
+# CONFIG_SPI_SC18IS602 is not set
+# CONFIG_SPI_SIFIVE is not set
+# CONFIG_SPI_MXIC is not set
+# CONFIG_SPI_XCOMM is not set
+# CONFIG_SPI_XILINX is not set
+# CONFIG_SPI_ZYNQMP_GQSPI is not set
+# CONFIG_SPI_AMD is not set
+
+#
+# SPI Multiplexer support
+#
+# CONFIG_SPI_MUX is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_LOOPBACK_TEST is not set
+# CONFIG_SPI_TLE62X0 is not set
+# CONFIG_SPI_SLAVE is not set
+CONFIG_SPI_DYNAMIC=y
+# CONFIG_SPMI is not set
+# CONFIG_HSI is not set
+CONFIG_PPS=y
+# CONFIG_PPS_DEBUG is not set
+
+#
+# PPS clients support
+#
+# CONFIG_PPS_CLIENT_KTIMER is not set
+# CONFIG_PPS_CLIENT_LDISC is not set
+# CONFIG_PPS_CLIENT_PARPORT is not set
+# CONFIG_PPS_CLIENT_GPIO is not set
+
+#
+# PPS generators support
+#
+
+#
+# PTP clock support
+#
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_PTP_1588_CLOCK_OPTIONAL=y
+# CONFIG_DP83640_PHY is not set
+# CONFIG_PTP_1588_CLOCK_INES is not set
+# CONFIG_PTP_1588_CLOCK_IDT82P33 is not set
+# CONFIG_PTP_1588_CLOCK_IDTCM is not set
+# CONFIG_PTP_1588_CLOCK_MOCK is not set
+# CONFIG_PTP_1588_CLOCK_OCP is not set
+# end of PTP clock support
+
+CONFIG_PINCTRL=y
+CONFIG_PINMUX=y
+CONFIG_PINCONF=y
+CONFIG_GENERIC_PINCONF=y
+# CONFIG_DEBUG_PINCTRL is not set
+# CONFIG_PINCTRL_AMD is not set
+# CONFIG_PINCTRL_CY8C95X0 is not set
+CONFIG_PINCTRL_LOONGSON2=y
+# CONFIG_PINCTRL_MCP23S08 is not set
+# CONFIG_PINCTRL_MICROCHIP_SGPIO is not set
+# CONFIG_PINCTRL_OCELOT is not set
+# CONFIG_PINCTRL_SINGLE is not set
+# CONFIG_PINCTRL_STMFX is not set
+# CONFIG_PINCTRL_SX150X is not set
+
+#
+# Renesas pinctrl drivers
+#
+# end of Renesas pinctrl drivers
+
+CONFIG_GPIOLIB=y
+CONFIG_GPIOLIB_FASTPATH_LIMIT=512
+CONFIG_OF_GPIO=y
+CONFIG_GPIO_ACPI=y
+CONFIG_GPIOLIB_IRQCHIP=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_CDEV=y
+CONFIG_GPIO_CDEV_V1=y
+CONFIG_GPIO_GENERIC=y
+
+#
+# Memory mapped GPIO drivers
+#
+# CONFIG_GPIO_74XX_MMIO is not set
+# CONFIG_GPIO_ALTERA is not set
+# CONFIG_GPIO_AMDPT is not set
+# CONFIG_GPIO_CADENCE is not set
+# CONFIG_GPIO_DWAPB is not set
+# CONFIG_GPIO_EXAR is not set
+# CONFIG_GPIO_FTGPIO010 is not set
+# CONFIG_GPIO_GENERIC_PLATFORM is not set
+# CONFIG_GPIO_GRGPIO is not set
+# CONFIG_GPIO_HLWD is not set
+# CONFIG_GPIO_LOGICVC is not set
+CONFIG_GPIO_LOONGSON_64BIT=y
+# CONFIG_GPIO_MB86S7X is not set
+# CONFIG_GPIO_SIFIVE is not set
+# CONFIG_GPIO_SYSCON is not set
+# CONFIG_GPIO_XILINX is not set
+# CONFIG_GPIO_AMD_FCH is not set
+# end of Memory mapped GPIO drivers
+
+#
+# I2C GPIO expanders
+#
+# CONFIG_GPIO_ADNP is not set
+# CONFIG_GPIO_FXL6408 is not set
+# CONFIG_GPIO_DS4520 is not set
+# CONFIG_GPIO_GW_PLD is not set
+# CONFIG_GPIO_MAX7300 is not set
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCA9570 is not set
+# CONFIG_GPIO_PCF857X is not set
+# CONFIG_GPIO_TPIC2810 is not set
+# end of I2C GPIO expanders
+
+#
+# MFD GPIO expanders
+#
+# end of MFD GPIO expanders
+
+#
+# PCI GPIO expanders
+#
+# CONFIG_GPIO_PCI_IDIO_16 is not set
+# CONFIG_GPIO_PCIE_IDIO_24 is not set
+# CONFIG_GPIO_RDC321X is not set
+# end of PCI GPIO expanders
+
+#
+# SPI GPIO expanders
+#
+# CONFIG_GPIO_74X164 is not set
+# CONFIG_GPIO_MAX3191X is not set
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MC33880 is not set
+# CONFIG_GPIO_PISOSR is not set
+# CONFIG_GPIO_XRA1403 is not set
+# end of SPI GPIO expanders
+
+#
+# USB GPIO expanders
+#
+# end of USB GPIO expanders
+
+#
+# Virtual GPIO drivers
+#
+# CONFIG_GPIO_AGGREGATOR is not set
+# CONFIG_GPIO_LATCH is not set
+# CONFIG_GPIO_MOCKUP is not set
+# CONFIG_GPIO_VIRTIO is not set
+# CONFIG_GPIO_SIM is not set
+# end of Virtual GPIO drivers
+
+# CONFIG_W1 is not set
+CONFIG_POWER_RESET=y
+# CONFIG_POWER_RESET_GPIO is not set
+# CONFIG_POWER_RESET_GPIO_RESTART is not set
+# CONFIG_POWER_RESET_LTC2952 is not set
+# CONFIG_POWER_RESET_REGULATOR is not set
+CONFIG_POWER_RESET_RESTART=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_POWER_RESET_SYSCON_POWEROFF=y
+CONFIG_REBOOT_MODE=y
+CONFIG_SYSCON_REBOOT_MODE=y
+# CONFIG_NVMEM_REBOOT_MODE is not set
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+CONFIG_POWER_SUPPLY_HWMON=y
+# CONFIG_IP5XXX_POWER is not set
+# CONFIG_TEST_POWER is not set
+# CONFIG_CHARGER_ADP5061 is not set
+# CONFIG_BATTERY_CW2015 is not set
+# CONFIG_BATTERY_DS2780 is not set
+# CONFIG_BATTERY_DS2781 is not set
+# CONFIG_BATTERY_DS2782 is not set
+# CONFIG_BATTERY_SAMSUNG_SDI is not set
+# CONFIG_BATTERY_SBS is not set
+# CONFIG_CHARGER_SBS is not set
+# CONFIG_BATTERY_BQ27XXX is not set
+# CONFIG_BATTERY_MAX17042 is not set
+# CONFIG_CHARGER_ISP1704 is not set
+# CONFIG_CHARGER_MAX8903 is not set
+# CONFIG_CHARGER_LP8727 is not set
+# CONFIG_CHARGER_GPIO is not set
+# CONFIG_CHARGER_MANAGER is not set
+# CONFIG_CHARGER_LT3651 is not set
+# CONFIG_CHARGER_LTC4162L is not set
+# CONFIG_CHARGER_DETECTOR_MAX14656 is not set
+# CONFIG_CHARGER_MAX77976 is not set
+# CONFIG_CHARGER_BQ2415X is not set
+# CONFIG_CHARGER_BQ24190 is not set
+# CONFIG_CHARGER_BQ24257 is not set
+# CONFIG_CHARGER_BQ24735 is not set
+# CONFIG_CHARGER_BQ2515X is not set
+# CONFIG_CHARGER_BQ25890 is not set
+# CONFIG_CHARGER_BQ25980 is not set
+# CONFIG_CHARGER_BQ256XX is not set
+# CONFIG_CHARGER_SMB347 is not set
+# CONFIG_BATTERY_GAUGE_LTC2941 is not set
+# CONFIG_BATTERY_GOLDFISH is not set
+# CONFIG_BATTERY_RT5033 is not set
+# CONFIG_CHARGER_RT9455 is not set
+# CONFIG_CHARGER_RT9467 is not set
+# CONFIG_CHARGER_RT9471 is not set
+# CONFIG_CHARGER_UCS1002 is not set
+# CONFIG_CHARGER_BD99954 is not set
+# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_FUEL_GAUGE_MM8013 is not set
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Native drivers
+#
+# CONFIG_SENSORS_AD7314 is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM1177 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7310 is not set
+# CONFIG_SENSORS_ADT7410 is not set
+# CONFIG_SENSORS_ADT7411 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_AHT10 is not set
+# CONFIG_SENSORS_AQUACOMPUTER_D5NEXT is not set
+# CONFIG_SENSORS_AS370 is not set
+# CONFIG_SENSORS_ASC7621 is not set
+# CONFIG_SENSORS_AXI_FAN_CONTROL is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_CORSAIR_CPRO is not set
+# CONFIG_SENSORS_CORSAIR_PSU is not set
+# CONFIG_SENSORS_DRIVETEMP is not set
+# CONFIG_SENSORS_DS620 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_I5K_AMB is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_G760A is not set
+# CONFIG_SENSORS_G762 is not set
+# CONFIG_SENSORS_GPIO_FAN is not set
+# CONFIG_SENSORS_HIH6130 is not set
+# CONFIG_SENSORS_HS3001 is not set
+# CONFIG_SENSORS_IBMAEM is not set
+# CONFIG_SENSORS_IBMPEX is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_JC42 is not set
+# CONFIG_SENSORS_POWERZ is not set
+# CONFIG_SENSORS_POWR1220 is not set
+# CONFIG_SENSORS_LINEAGE is not set
+# CONFIG_SENSORS_LTC2945 is not set
+# CONFIG_SENSORS_LTC2947_I2C is not set
+# CONFIG_SENSORS_LTC2947_SPI is not set
+# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
+# CONFIG_SENSORS_LTC2992 is not set
+# CONFIG_SENSORS_LTC4151 is not set
+# CONFIG_SENSORS_LTC4215 is not set
+# CONFIG_SENSORS_LTC4222 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LTC4260 is not set
+# CONFIG_SENSORS_LTC4261 is not set
+# CONFIG_SENSORS_MAX1111 is not set
+# CONFIG_SENSORS_MAX127 is not set
+# CONFIG_SENSORS_MAX16065 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX1668 is not set
+# CONFIG_SENSORS_MAX197 is not set
+# CONFIG_SENSORS_MAX31722 is not set
+# CONFIG_SENSORS_MAX31730 is not set
+# CONFIG_SENSORS_MAX31760 is not set
+# CONFIG_MAX31827 is not set
+# CONFIG_SENSORS_MAX6620 is not set
+# CONFIG_SENSORS_MAX6621 is not set
+# CONFIG_SENSORS_MAX6639 is not set
+# CONFIG_SENSORS_MAX6642 is not set
+# CONFIG_SENSORS_MAX6650 is not set
+# CONFIG_SENSORS_MAX6697 is not set
+# CONFIG_SENSORS_MAX31790 is not set
+# CONFIG_SENSORS_MC34VR500 is not set
+# CONFIG_SENSORS_MCP3021 is not set
+# CONFIG_SENSORS_TC654 is not set
+# CONFIG_SENSORS_TPS23861 is not set
+# CONFIG_SENSORS_MR75203 is not set
+# CONFIG_SENSORS_ADCXX is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_LM73 is not set
+CONFIG_SENSORS_LM75=m
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+CONFIG_SENSORS_LM93=m
+# CONFIG_SENSORS_LM95234 is not set
+# CONFIG_SENSORS_LM95241 is not set
+# CONFIG_SENSORS_LM95245 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_NCT6683 is not set
+# CONFIG_SENSORS_NCT6775 is not set
+# CONFIG_SENSORS_NCT6775_I2C is not set
+# CONFIG_SENSORS_NCT7802 is not set
+# CONFIG_SENSORS_NPCM7XX is not set
+# CONFIG_SENSORS_NZXT_KRAKEN2 is not set
+# CONFIG_SENSORS_NZXT_SMART2 is not set
+# CONFIG_SENSORS_OCC_P8_I2C is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_PMBUS is not set
+# CONFIG_SENSORS_PWM_FAN is not set
+# CONFIG_SENSORS_SBTSI is not set
+# CONFIG_SENSORS_SBRMI is not set
+# CONFIG_SENSORS_SHT15 is not set
+# CONFIG_SENSORS_SHT21 is not set
+# CONFIG_SENSORS_SHT3x is not set
+# CONFIG_SENSORS_SHT4x is not set
+# CONFIG_SENSORS_SHTC1 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_EMC1403 is not set
+# CONFIG_SENSORS_EMC2103 is not set
+# CONFIG_SENSORS_EMC2305 is not set
+# CONFIG_SENSORS_EMC6W201 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_STTS751 is not set
+# CONFIG_SENSORS_ADC128D818 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_ADS7871 is not set
+# CONFIG_SENSORS_AMC6821 is not set
+# CONFIG_SENSORS_INA209 is not set
+# CONFIG_SENSORS_INA2XX is not set
+# CONFIG_SENSORS_INA238 is not set
+# CONFIG_SENSORS_INA3221 is not set
+# CONFIG_SENSORS_TC74 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_TMP102 is not set
+# CONFIG_SENSORS_TMP103 is not set
+# CONFIG_SENSORS_TMP108 is not set
+# CONFIG_SENSORS_TMP401 is not set
+# CONFIG_SENSORS_TMP421 is not set
+# CONFIG_SENSORS_TMP464 is not set
+# CONFIG_SENSORS_TMP513 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83773G is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+CONFIG_SENSORS_W83795=m
+# CONFIG_SENSORS_W83795_FANCTRL is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+CONFIG_SENSORS_W83627HF=m
+# CONFIG_SENSORS_W83627EHF is not set
+
+#
+# ACPI drivers
+#
+# CONFIG_SENSORS_ACPI_POWER is not set
+CONFIG_THERMAL=y
+# CONFIG_THERMAL_NETLINK is not set
+# CONFIG_THERMAL_STATISTICS is not set
+CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0
+CONFIG_THERMAL_HWMON=y
+CONFIG_THERMAL_OF=y
+# CONFIG_THERMAL_WRITABLE_TRIPS is not set
+CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
+# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
+# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
+# CONFIG_THERMAL_GOV_FAIR_SHARE is not set
+CONFIG_THERMAL_GOV_STEP_WISE=y
+# CONFIG_THERMAL_GOV_BANG_BANG is not set
+# CONFIG_THERMAL_GOV_USER_SPACE is not set
+# CONFIG_CPU_THERMAL is not set
+# CONFIG_DEVFREQ_THERMAL is not set
+# CONFIG_THERMAL_EMULATION is not set
+# CONFIG_THERMAL_MMIO is not set
+CONFIG_LOONGSON2_THERMAL=m
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+CONFIG_SSB=m
+CONFIG_SSB_SPROM=y
+CONFIG_SSB_BLOCKIO=y
+CONFIG_SSB_PCIHOST_POSSIBLE=y
+CONFIG_SSB_PCIHOST=y
+CONFIG_SSB_B43_PCI_BRIDGE=y
+CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
+# CONFIG_SSB_PCMCIAHOST is not set
+CONFIG_SSB_SDIOHOST_POSSIBLE=y
+CONFIG_SSB_SDIOHOST=y
+CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
+CONFIG_SSB_DRIVER_PCICORE=y
+# CONFIG_SSB_DRIVER_GPIO is not set
+CONFIG_BCMA_POSSIBLE=y
+CONFIG_BCMA=m
+CONFIG_BCMA_BLOCKIO=y
+CONFIG_BCMA_HOST_PCI_POSSIBLE=y
+CONFIG_BCMA_HOST_PCI=y
+# CONFIG_BCMA_HOST_SOC is not set
+CONFIG_BCMA_DRIVER_PCI=y
+# CONFIG_BCMA_DRIVER_GMAC_CMN is not set
+# CONFIG_BCMA_DRIVER_GPIO is not set
+# CONFIG_BCMA_DEBUG is not set
+
+#
+# Multifunction device drivers
+#
+CONFIG_MFD_CORE=m
+# CONFIG_MFD_ACT8945A is not set
+# CONFIG_MFD_AS3711 is not set
+# CONFIG_MFD_SMPRO is not set
+# CONFIG_MFD_AS3722 is not set
+# CONFIG_PMIC_ADP5520 is not set
+# CONFIG_MFD_AAT2870_CORE is not set
+# CONFIG_MFD_ATMEL_FLEXCOM is not set
+# CONFIG_MFD_ATMEL_HLCDC is not set
+# CONFIG_MFD_BCM590XX is not set
+# CONFIG_MFD_BD9571MWV is not set
+# CONFIG_MFD_AXP20X_I2C is not set
+# CONFIG_MFD_CS42L43_I2C is not set
+# CONFIG_MFD_MADERA is not set
+# CONFIG_MFD_MAX5970 is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_DA9052_SPI is not set
+# CONFIG_MFD_DA9052_I2C is not set
+# CONFIG_MFD_DA9055 is not set
+# CONFIG_MFD_DA9062 is not set
+# CONFIG_MFD_DA9063 is not set
+# CONFIG_MFD_DA9150 is not set
+# CONFIG_MFD_DLN2 is not set
+# CONFIG_MFD_GATEWORKS_GSC is not set
+# CONFIG_MFD_MC13XXX_SPI is not set
+# CONFIG_MFD_MC13XXX_I2C is not set
+# CONFIG_MFD_MP2629 is not set
+# CONFIG_MFD_HI6421_PMIC is not set
+# CONFIG_LPC_ICH is not set
+# CONFIG_LPC_SCH is not set
+# CONFIG_MFD_IQS62X is not set
+# CONFIG_MFD_JANZ_CMODIO is not set
+# CONFIG_MFD_KEMPLD is not set
+# CONFIG_MFD_88PM800 is not set
+# CONFIG_MFD_88PM805 is not set
+# CONFIG_MFD_88PM860X is not set
+# CONFIG_MFD_MAX14577 is not set
+# CONFIG_MFD_MAX77541 is not set
+# CONFIG_MFD_MAX77620 is not set
+# CONFIG_MFD_MAX77650 is not set
+# CONFIG_MFD_MAX77686 is not set
+# CONFIG_MFD_MAX77693 is not set
+# CONFIG_MFD_MAX77714 is not set
+# CONFIG_MFD_MAX77843 is not set
+# CONFIG_MFD_MAX8907 is not set
+# CONFIG_MFD_MAX8925 is not set
+# CONFIG_MFD_MAX8997 is not set
+# CONFIG_MFD_MAX8998 is not set
+# CONFIG_MFD_MT6360 is not set
+# CONFIG_MFD_MT6370 is not set
+# CONFIG_MFD_MT6397 is not set
+# CONFIG_MFD_MENF21BMC is not set
+# CONFIG_MFD_OCELOT is not set
+# CONFIG_EZX_PCAP is not set
+# CONFIG_MFD_CPCAP is not set
+# CONFIG_MFD_VIPERBOARD is not set
+# CONFIG_MFD_NTXEC is not set
+# CONFIG_MFD_RETU is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_MFD_SY7636A is not set
+# CONFIG_MFD_RDC321X is not set
+# CONFIG_MFD_RT4831 is not set
+# CONFIG_MFD_RT5033 is not set
+# CONFIG_MFD_RT5120 is not set
+# CONFIG_MFD_RC5T583 is not set
+# CONFIG_MFD_RK8XX_I2C is not set
+# CONFIG_MFD_RK8XX_SPI is not set
+# CONFIG_MFD_RN5T618 is not set
+# CONFIG_MFD_SEC_CORE is not set
+# CONFIG_MFD_SI476X_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_SKY81452 is not set
+# CONFIG_MFD_STMPE is not set
+CONFIG_MFD_SYSCON=y
+# CONFIG_MFD_TI_AM335X_TSCADC is not set
+# CONFIG_MFD_LP3943 is not set
+# CONFIG_MFD_LP8788 is not set
+# CONFIG_MFD_TI_LMU is not set
+# CONFIG_MFD_PALMAS is not set
+# CONFIG_TPS6105X is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TPS6507X is not set
+# CONFIG_MFD_TPS65086 is not set
+# CONFIG_MFD_TPS65090 is not set
+# CONFIG_MFD_TPS65217 is not set
+# CONFIG_MFD_TI_LP873X is not set
+# CONFIG_MFD_TI_LP87565 is not set
+# CONFIG_MFD_TPS65218 is not set
+# CONFIG_MFD_TPS65219 is not set
+# CONFIG_MFD_TPS6586X is not set
+# CONFIG_MFD_TPS65910 is not set
+# CONFIG_MFD_TPS65912_I2C is not set
+# CONFIG_MFD_TPS65912_SPI is not set
+# CONFIG_MFD_TPS6594_I2C is not set
+# CONFIG_MFD_TPS6594_SPI is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_TWL6040_CORE is not set
+# CONFIG_MFD_WL1273_CORE is not set
+# CONFIG_MFD_LM3533 is not set
+# CONFIG_MFD_TC3589X is not set
+# CONFIG_MFD_TQMX86 is not set
+# CONFIG_MFD_VX855 is not set
+# CONFIG_MFD_LOCHNAGAR is not set
+# CONFIG_MFD_ARIZONA_I2C is not set
+# CONFIG_MFD_ARIZONA_SPI is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM831X_I2C is not set
+# CONFIG_MFD_WM831X_SPI is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_WM8994 is not set
+# CONFIG_MFD_ROHM_BD718XX is not set
+# CONFIG_MFD_ROHM_BD71828 is not set
+# CONFIG_MFD_ROHM_BD957XMUF is not set
+# CONFIG_MFD_STPMIC1 is not set
+# CONFIG_MFD_STMFX is not set
+# CONFIG_MFD_ATC260X_I2C is not set
+# CONFIG_MFD_QCOM_PM8008 is not set
+# CONFIG_MFD_INTEL_M10_BMC_SPI is not set
+# CONFIG_MFD_RSMU_I2C is not set
+# CONFIG_MFD_RSMU_SPI is not set
+# end of Multifunction device drivers
+
+CONFIG_REGULATOR=y
+# CONFIG_REGULATOR_DEBUG is not set
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
+# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
+# CONFIG_REGULATOR_88PG86X is not set
+# CONFIG_REGULATOR_ACT8865 is not set
+# CONFIG_REGULATOR_AD5398 is not set
+# CONFIG_REGULATOR_AW37503 is not set
+# CONFIG_REGULATOR_DA9121 is not set
+# CONFIG_REGULATOR_DA9210 is not set
+# CONFIG_REGULATOR_DA9211 is not set
+# CONFIG_REGULATOR_FAN53555 is not set
+# CONFIG_REGULATOR_FAN53880 is not set
+# CONFIG_REGULATOR_GPIO is not set
+# CONFIG_REGULATOR_ISL9305 is not set
+# CONFIG_REGULATOR_ISL6271A is not set
+# CONFIG_REGULATOR_LP3971 is not set
+# CONFIG_REGULATOR_LP3972 is not set
+# CONFIG_REGULATOR_LP872X is not set
+# CONFIG_REGULATOR_LP8755 is not set
+# CONFIG_REGULATOR_LTC3589 is not set
+# CONFIG_REGULATOR_LTC3676 is not set
+# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
+# CONFIG_REGULATOR_MAX77857 is not set
+# CONFIG_REGULATOR_MAX8649 is not set
+# CONFIG_REGULATOR_MAX8660 is not set
+# CONFIG_REGULATOR_MAX8893 is not set
+# CONFIG_REGULATOR_MAX8952 is not set
+# CONFIG_REGULATOR_MAX8973 is not set
+# CONFIG_REGULATOR_MAX20086 is not set
+# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77826 is not set
+# CONFIG_REGULATOR_MCP16502 is not set
+# CONFIG_REGULATOR_MP5416 is not set
+# CONFIG_REGULATOR_MP8859 is not set
+# CONFIG_REGULATOR_MP886X is not set
+# CONFIG_REGULATOR_MPQ7920 is not set
+# CONFIG_REGULATOR_MT6311 is not set
+# CONFIG_REGULATOR_PCA9450 is not set
+# CONFIG_REGULATOR_PF8X00 is not set
+# CONFIG_REGULATOR_PFUZE100 is not set
+# CONFIG_REGULATOR_PV88060 is not set
+# CONFIG_REGULATOR_PV88080 is not set
+# CONFIG_REGULATOR_PV88090 is not set
+# CONFIG_REGULATOR_PWM is not set
+# CONFIG_REGULATOR_RAA215300 is not set
+# CONFIG_REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY is not set
+# CONFIG_REGULATOR_RT4801 is not set
+# CONFIG_REGULATOR_RT4803 is not set
+# CONFIG_REGULATOR_RT5190A is not set
+# CONFIG_REGULATOR_RT5739 is not set
+# CONFIG_REGULATOR_RT5759 is not set
+# CONFIG_REGULATOR_RT6160 is not set
+# CONFIG_REGULATOR_RT6190 is not set
+# CONFIG_REGULATOR_RT6245 is not set
+# CONFIG_REGULATOR_RTQ2134 is not set
+# CONFIG_REGULATOR_RTMV20 is not set
+# CONFIG_REGULATOR_RTQ6752 is not set
+# CONFIG_REGULATOR_RTQ2208 is not set
+# CONFIG_REGULATOR_SLG51000 is not set
+# CONFIG_REGULATOR_SY8106A is not set
+# CONFIG_REGULATOR_SY8824X is not set
+# CONFIG_REGULATOR_SY8827N is not set
+# CONFIG_REGULATOR_TPS51632 is not set
+# CONFIG_REGULATOR_TPS62360 is not set
+# CONFIG_REGULATOR_TPS6286X is not set
+# CONFIG_REGULATOR_TPS6287X is not set
+# CONFIG_REGULATOR_TPS65023 is not set
+# CONFIG_REGULATOR_TPS6507X is not set
+# CONFIG_REGULATOR_TPS65132 is not set
+# CONFIG_REGULATOR_TPS6524X is not set
+# CONFIG_REGULATOR_VCTRL is not set
+CONFIG_RC_CORE=m
+CONFIG_LIRC=y
+CONFIG_RC_MAP=m
+CONFIG_RC_DECODERS=y
+CONFIG_IR_IMON_DECODER=m
+CONFIG_IR_JVC_DECODER=m
+CONFIG_IR_MCE_KBD_DECODER=m
+CONFIG_IR_NEC_DECODER=m
+CONFIG_IR_RC5_DECODER=m
+CONFIG_IR_RC6_DECODER=m
+# CONFIG_IR_RCMM_DECODER is not set
+CONFIG_IR_SANYO_DECODER=m
+CONFIG_IR_SHARP_DECODER=m
+CONFIG_IR_SONY_DECODER=m
+CONFIG_IR_XMP_DECODER=m
+# CONFIG_RC_DEVICES is not set
+
+#
+# CEC support
+#
+CONFIG_MEDIA_CEC_SUPPORT=y
+# CONFIG_CEC_CH7322 is not set
+# CONFIG_CEC_GPIO is not set
+# CONFIG_USB_PULSE8_CEC is not set
+# CONFIG_USB_RAINSHADOW_CEC is not set
+# end of CEC support
+
+CONFIG_MEDIA_SUPPORT=m
+# CONFIG_MEDIA_SUPPORT_FILTER is not set
+# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set
+
+#
+# Media device types
+#
+CONFIG_MEDIA_CAMERA_SUPPORT=y
+CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
+CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
+CONFIG_MEDIA_RADIO_SUPPORT=y
+CONFIG_MEDIA_SDR_SUPPORT=y
+CONFIG_MEDIA_PLATFORM_SUPPORT=y
+CONFIG_MEDIA_TEST_SUPPORT=y
+# end of Media device types
+
+#
+# Media core support
+#
+CONFIG_VIDEO_DEV=m
+CONFIG_MEDIA_CONTROLLER=y
+CONFIG_DVB_CORE=m
+# end of Media core support
+
+#
+# Video4Linux options
+#
+CONFIG_VIDEO_V4L2_I2C=y
+CONFIG_VIDEO_V4L2_SUBDEV_API=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_TUNER=m
+CONFIG_V4L2_FLASH_LED_CLASS=m
+CONFIG_V4L2_FWNODE=m
+CONFIG_V4L2_ASYNC=m
+# end of Video4Linux options
+
+#
+# Media controller options
+#
+# CONFIG_MEDIA_CONTROLLER_DVB is not set
+# end of Media controller options
+
+#
+# Digital TV options
+#
+# CONFIG_DVB_MMAP is not set
+CONFIG_DVB_NET=y
+CONFIG_DVB_MAX_ADAPTERS=16
+CONFIG_DVB_DYNAMIC_MINORS=y
+# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
+# CONFIG_DVB_ULE_DEBUG is not set
+# end of Digital TV options
+
+#
+# Media drivers
+#
+
+#
+# Media drivers
+#
+CONFIG_MEDIA_USB_SUPPORT=y
+
+#
+# Webcam devices
+#
+CONFIG_USB_GSPCA=m
+CONFIG_USB_GSPCA_BENQ=m
+CONFIG_USB_GSPCA_CONEX=m
+CONFIG_USB_GSPCA_CPIA1=m
+CONFIG_USB_GSPCA_DTCS033=m
+CONFIG_USB_GSPCA_ETOMS=m
+CONFIG_USB_GSPCA_FINEPIX=m
+CONFIG_USB_GSPCA_JEILINJ=m
+CONFIG_USB_GSPCA_JL2005BCD=m
+CONFIG_USB_GSPCA_KINECT=m
+CONFIG_USB_GSPCA_KONICA=m
+CONFIG_USB_GSPCA_MARS=m
+CONFIG_USB_GSPCA_MR97310A=m
+CONFIG_USB_GSPCA_NW80X=m
+CONFIG_USB_GSPCA_OV519=m
+CONFIG_USB_GSPCA_OV534=m
+CONFIG_USB_GSPCA_OV534_9=m
+CONFIG_USB_GSPCA_PAC207=m
+CONFIG_USB_GSPCA_PAC7302=m
+CONFIG_USB_GSPCA_PAC7311=m
+CONFIG_USB_GSPCA_SE401=m
+CONFIG_USB_GSPCA_SN9C2028=m
+CONFIG_USB_GSPCA_SN9C20X=m
+CONFIG_USB_GSPCA_SONIXB=m
+CONFIG_USB_GSPCA_SONIXJ=m
+CONFIG_USB_GSPCA_SPCA1528=m
+CONFIG_USB_GSPCA_SPCA500=m
+CONFIG_USB_GSPCA_SPCA501=m
+CONFIG_USB_GSPCA_SPCA505=m
+CONFIG_USB_GSPCA_SPCA506=m
+CONFIG_USB_GSPCA_SPCA508=m
+CONFIG_USB_GSPCA_SPCA561=m
+CONFIG_USB_GSPCA_SQ905=m
+CONFIG_USB_GSPCA_SQ905C=m
+CONFIG_USB_GSPCA_SQ930X=m
+CONFIG_USB_GSPCA_STK014=m
+CONFIG_USB_GSPCA_STK1135=m
+CONFIG_USB_GSPCA_STV0680=m
+CONFIG_USB_GSPCA_SUNPLUS=m
+CONFIG_USB_GSPCA_T613=m
+CONFIG_USB_GSPCA_TOPRO=m
+CONFIG_USB_GSPCA_TOUPTEK=m
+CONFIG_USB_GSPCA_TV8532=m
+CONFIG_USB_GSPCA_VC032X=m
+CONFIG_USB_GSPCA_VICAM=m
+CONFIG_USB_GSPCA_XIRLINK_CIT=m
+CONFIG_USB_GSPCA_ZC3XX=m
+CONFIG_USB_GL860=m
+CONFIG_USB_M5602=m
+CONFIG_USB_STV06XX=m
+CONFIG_USB_PWC=m
+# CONFIG_USB_PWC_DEBUG is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
+CONFIG_USB_S2255=m
+CONFIG_VIDEO_USBTV=m
+CONFIG_USB_VIDEO_CLASS=m
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+
+#
+# Analog TV USB devices
+#
+# CONFIG_VIDEO_GO7007 is not set
+# CONFIG_VIDEO_HDPVR is not set
+# CONFIG_VIDEO_PVRUSB2 is not set
+# CONFIG_VIDEO_STK1160 is not set
+
+#
+# Analog/digital TV USB devices
+#
+# CONFIG_VIDEO_AU0828 is not set
+
+#
+# Digital TV USB devices
+#
+# CONFIG_DVB_AS102 is not set
+# CONFIG_DVB_B2C2_FLEXCOP_USB is not set
+# CONFIG_DVB_USB_V2 is not set
+# CONFIG_DVB_USB is not set
+# CONFIG_SMS_USB_DRV is not set
+# CONFIG_DVB_TTUSB_BUDGET is not set
+# CONFIG_DVB_TTUSB_DEC is not set
+
+#
+# Webcam, TV (analog/digital) USB devices
+#
+CONFIG_VIDEO_EM28XX=m
+CONFIG_VIDEO_EM28XX_V4L2=m
+CONFIG_VIDEO_EM28XX_ALSA=m
+CONFIG_VIDEO_EM28XX_DVB=m
+CONFIG_VIDEO_EM28XX_RC=m
+
+#
+# Software defined radio USB devices
+#
+# CONFIG_USB_AIRSPY is not set
+# CONFIG_USB_HACKRF is not set
+# CONFIG_USB_MSI2500 is not set
+CONFIG_MEDIA_PCI_SUPPORT=y
+
+#
+# Media capture support
+#
+# CONFIG_VIDEO_SOLO6X10 is not set
+# CONFIG_VIDEO_TW5864 is not set
+# CONFIG_VIDEO_TW68 is not set
+# CONFIG_VIDEO_TW686X is not set
+# CONFIG_VIDEO_ZORAN is not set
+
+#
+# Media capture/analog TV support
+#
+# CONFIG_VIDEO_DT3155 is not set
+# CONFIG_VIDEO_IVTV is not set
+# CONFIG_VIDEO_HEXIUM_GEMINI is not set
+# CONFIG_VIDEO_HEXIUM_ORION is not set
+# CONFIG_VIDEO_MXB is not set
+
+#
+# Media capture/analog/hybrid TV support
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_DVB_BT8XX=m
+# CONFIG_VIDEO_CX18 is not set
+# CONFIG_VIDEO_CX23885 is not set
+# CONFIG_VIDEO_CX25821 is not set
+# CONFIG_VIDEO_CX88 is not set
+# CONFIG_VIDEO_SAA7134 is not set
+# CONFIG_VIDEO_SAA7164 is not set
+
+#
+# Media digital TV PCI Adapters
+#
+# CONFIG_DVB_B2C2_FLEXCOP_PCI is not set
+# CONFIG_DVB_DDBRIDGE is not set
+# CONFIG_DVB_DM1105 is not set
+# CONFIG_MANTIS_CORE is not set
+# CONFIG_DVB_NETUP_UNIDVB is not set
+# CONFIG_DVB_NGENE is not set
+# CONFIG_DVB_PLUTO2 is not set
+# CONFIG_DVB_PT1 is not set
+# CONFIG_DVB_PT3 is not set
+# CONFIG_DVB_SMIPCIE is not set
+# CONFIG_DVB_BUDGET_CORE is not set
+# CONFIG_IPU_BRIDGE is not set
+CONFIG_RADIO_ADAPTERS=m
+# CONFIG_RADIO_MAXIRADIO is not set
+# CONFIG_RADIO_SAA7706H is not set
+# CONFIG_RADIO_SHARK is not set
+# CONFIG_RADIO_SHARK2 is not set
+# CONFIG_RADIO_SI4713 is not set
+CONFIG_RADIO_TEA575X=m
+# CONFIG_RADIO_TEA5764 is not set
+# CONFIG_RADIO_TEF6862 is not set
+# CONFIG_RADIO_WL1273 is not set
+# CONFIG_USB_DSBR is not set
+# CONFIG_USB_KEENE is not set
+# CONFIG_USB_MA901 is not set
+# CONFIG_USB_MR800 is not set
+# CONFIG_USB_RAREMONO is not set
+# CONFIG_RADIO_SI470X is not set
+CONFIG_MEDIA_PLATFORM_DRIVERS=y
+# CONFIG_V4L_PLATFORM_DRIVERS is not set
+# CONFIG_SDR_PLATFORM_DRIVERS is not set
+# CONFIG_DVB_PLATFORM_DRIVERS is not set
+# CONFIG_V4L_MEM2MEM_DRIVERS is not set
+
+#
+# Allegro DVT media platform drivers
+#
+
+#
+# Amlogic media platform drivers
+#
+
+#
+# Amphion drivers
+#
+
+#
+# Aspeed media platform drivers
+#
+
+#
+# Atmel media platform drivers
+#
+
+#
+# Cadence media platform drivers
+#
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
+# CONFIG_VIDEO_CADENCE_CSI2TX is not set
+
+#
+# Chips&Media media platform drivers
+#
+
+#
+# Intel media platform drivers
+#
+
+#
+# Marvell media platform drivers
+#
+
+#
+# Mediatek media platform drivers
+#
+
+#
+# Microchip Technology, Inc. media platform drivers
+#
+
+#
+# Nuvoton media platform drivers
+#
+
+#
+# NVidia media platform drivers
+#
+
+#
+# NXP media platform drivers
+#
+
+#
+# Qualcomm media platform drivers
+#
+
+#
+# Renesas media platform drivers
+#
+
+#
+# Rockchip media platform drivers
+#
+
+#
+# Samsung media platform drivers
+#
+
+#
+# STMicroelectronics media platform drivers
+#
+
+#
+# Sunxi media platform drivers
+#
+
+#
+# Texas Instruments drivers
+#
+
+#
+# Verisilicon media platform drivers
+#
+
+#
+# VIA media platform drivers
+#
+
+#
+# Xilinx media platform drivers
+#
+
+#
+# MMC/SDIO DVB adapters
+#
+# CONFIG_SMS_SDIO_DRV is not set
+# CONFIG_V4L_TEST_DRIVERS is not set
+# CONFIG_DVB_TEST_DRIVERS is not set
+CONFIG_UVC_COMMON=m
+CONFIG_VIDEO_TVEEPROM=m
+CONFIG_VIDEOBUF2_CORE=m
+CONFIG_VIDEOBUF2_V4L2=m
+CONFIG_VIDEOBUF2_MEMOPS=m
+CONFIG_VIDEOBUF2_VMALLOC=m
+CONFIG_VIDEOBUF2_DMA_SG=m
+# end of Media drivers
+
+#
+# Media ancillary drivers
+#
+CONFIG_MEDIA_ATTACH=y
+CONFIG_VIDEO_IR_I2C=m
+CONFIG_VIDEO_CAMERA_SENSOR=y
+# CONFIG_VIDEO_AR0521 is not set
+# CONFIG_VIDEO_HI556 is not set
+# CONFIG_VIDEO_HI846 is not set
+# CONFIG_VIDEO_HI847 is not set
+# CONFIG_VIDEO_IMX208 is not set
+# CONFIG_VIDEO_IMX214 is not set
+# CONFIG_VIDEO_IMX219 is not set
+# CONFIG_VIDEO_IMX258 is not set
+# CONFIG_VIDEO_IMX274 is not set
+# CONFIG_VIDEO_IMX290 is not set
+# CONFIG_VIDEO_IMX296 is not set
+# CONFIG_VIDEO_IMX319 is not set
+# CONFIG_VIDEO_IMX334 is not set
+# CONFIG_VIDEO_IMX335 is not set
+# CONFIG_VIDEO_IMX355 is not set
+# CONFIG_VIDEO_IMX412 is not set
+# CONFIG_VIDEO_IMX415 is not set
+# CONFIG_VIDEO_MT9M001 is not set
+# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
+# CONFIG_VIDEO_MT9P031 is not set
+# CONFIG_VIDEO_MT9T112 is not set
+# CONFIG_VIDEO_MT9V011 is not set
+# CONFIG_VIDEO_MT9V032 is not set
+# CONFIG_VIDEO_MT9V111 is not set
+# CONFIG_VIDEO_OG01A1B is not set
+# CONFIG_VIDEO_OV01A10 is not set
+# CONFIG_VIDEO_OV02A10 is not set
+# CONFIG_VIDEO_OV08D10 is not set
+# CONFIG_VIDEO_OV08X40 is not set
+# CONFIG_VIDEO_OV13858 is not set
+# CONFIG_VIDEO_OV13B10 is not set
+# CONFIG_VIDEO_OV2640 is not set
+# CONFIG_VIDEO_OV2659 is not set
+# CONFIG_VIDEO_OV2680 is not set
+# CONFIG_VIDEO_OV2685 is not set
+# CONFIG_VIDEO_OV2740 is not set
+# CONFIG_VIDEO_OV4689 is not set
+# CONFIG_VIDEO_OV5640 is not set
+# CONFIG_VIDEO_OV5645 is not set
+# CONFIG_VIDEO_OV5647 is not set
+# CONFIG_VIDEO_OV5648 is not set
+# CONFIG_VIDEO_OV5670 is not set
+# CONFIG_VIDEO_OV5675 is not set
+# CONFIG_VIDEO_OV5693 is not set
+# CONFIG_VIDEO_OV5695 is not set
+# CONFIG_VIDEO_OV6650 is not set
+# CONFIG_VIDEO_OV7251 is not set
+# CONFIG_VIDEO_OV7640 is not set
+# CONFIG_VIDEO_OV7670 is not set
+# CONFIG_VIDEO_OV772X is not set
+# CONFIG_VIDEO_OV7740 is not set
+# CONFIG_VIDEO_OV8856 is not set
+# CONFIG_VIDEO_OV8858 is not set
+# CONFIG_VIDEO_OV8865 is not set
+# CONFIG_VIDEO_OV9282 is not set
+# CONFIG_VIDEO_OV9640 is not set
+# CONFIG_VIDEO_OV9650 is not set
+# CONFIG_VIDEO_OV9734 is not set
+# CONFIG_VIDEO_RDACM20 is not set
+# CONFIG_VIDEO_RDACM21 is not set
+# CONFIG_VIDEO_RJ54N1 is not set
+# CONFIG_VIDEO_S5C73M3 is not set
+# CONFIG_VIDEO_S5K5BAF is not set
+# CONFIG_VIDEO_S5K6A3 is not set
+# CONFIG_VIDEO_ST_VGXY61 is not set
+# CONFIG_VIDEO_CCS is not set
+# CONFIG_VIDEO_ET8EK8 is not set
+
+#
+# Lens drivers
+#
+# CONFIG_VIDEO_AD5820 is not set
+# CONFIG_VIDEO_AK7375 is not set
+# CONFIG_VIDEO_DW9714 is not set
+# CONFIG_VIDEO_DW9719 is not set
+# CONFIG_VIDEO_DW9768 is not set
+# CONFIG_VIDEO_DW9807_VCM is not set
+# end of Lens drivers
+
+#
+# Flash devices
+#
+# CONFIG_VIDEO_ADP1653 is not set
+# CONFIG_VIDEO_LM3560 is not set
+# CONFIG_VIDEO_LM3646 is not set
+# end of Flash devices
+
+#
+# Audio decoders, processors and mixers
+#
+# CONFIG_VIDEO_CS3308 is not set
+# CONFIG_VIDEO_CS5345 is not set
+# CONFIG_VIDEO_CS53L32A is not set
+# CONFIG_VIDEO_MSP3400 is not set
+# CONFIG_VIDEO_SONY_BTF_MPX is not set
+# CONFIG_VIDEO_TDA1997X is not set
+# CONFIG_VIDEO_TDA7432 is not set
+# CONFIG_VIDEO_TDA9840 is not set
+# CONFIG_VIDEO_TEA6415C is not set
+# CONFIG_VIDEO_TEA6420 is not set
+# CONFIG_VIDEO_TLV320AIC23B is not set
+# CONFIG_VIDEO_TVAUDIO is not set
+# CONFIG_VIDEO_UDA1342 is not set
+# CONFIG_VIDEO_VP27SMPX is not set
+# CONFIG_VIDEO_WM8739 is not set
+# CONFIG_VIDEO_WM8775 is not set
+# end of Audio decoders, processors and mixers
+
+#
+# RDS decoders
+#
+# CONFIG_VIDEO_SAA6588 is not set
+# end of RDS decoders
+
+#
+# Video decoders
+#
+# CONFIG_VIDEO_ADV7180 is not set
+# CONFIG_VIDEO_ADV7183 is not set
+# CONFIG_VIDEO_ADV748X is not set
+# CONFIG_VIDEO_ADV7604 is not set
+# CONFIG_VIDEO_ADV7842 is not set
+# CONFIG_VIDEO_BT819 is not set
+# CONFIG_VIDEO_BT856 is not set
+# CONFIG_VIDEO_BT866 is not set
+# CONFIG_VIDEO_ISL7998X is not set
+# CONFIG_VIDEO_KS0127 is not set
+# CONFIG_VIDEO_ML86V7667 is not set
+# CONFIG_VIDEO_SAA7110 is not set
+# CONFIG_VIDEO_SAA711X is not set
+# CONFIG_VIDEO_TC358743 is not set
+# CONFIG_VIDEO_TC358746 is not set
+# CONFIG_VIDEO_TVP514X is not set
+# CONFIG_VIDEO_TVP5150 is not set
+# CONFIG_VIDEO_TVP7002 is not set
+# CONFIG_VIDEO_TW2804 is not set
+# CONFIG_VIDEO_TW9903 is not set
+# CONFIG_VIDEO_TW9906 is not set
+# CONFIG_VIDEO_TW9910 is not set
+# CONFIG_VIDEO_VPX3220 is not set
+
+#
+# Video and audio decoders
+#
+# CONFIG_VIDEO_SAA717X is not set
+# CONFIG_VIDEO_CX25840 is not set
+# end of Video decoders
+
+#
+# Video encoders
+#
+# CONFIG_VIDEO_ADV7170 is not set
+# CONFIG_VIDEO_ADV7175 is not set
+# CONFIG_VIDEO_ADV7343 is not set
+# CONFIG_VIDEO_ADV7393 is not set
+# CONFIG_VIDEO_ADV7511 is not set
+# CONFIG_VIDEO_AK881X is not set
+# CONFIG_VIDEO_SAA7127 is not set
+# CONFIG_VIDEO_SAA7185 is not set
+# CONFIG_VIDEO_THS8200 is not set
+# end of Video encoders
+
+#
+# Video improvement chips
+#
+# CONFIG_VIDEO_UPD64031A is not set
+# CONFIG_VIDEO_UPD64083 is not set
+# end of Video improvement chips
+
+#
+# Audio/Video compression chips
+#
+# CONFIG_VIDEO_SAA6752HS is not set
+# end of Audio/Video compression chips
+
+#
+# SDR tuner chips
+#
+# CONFIG_SDR_MAX2175 is not set
+# end of SDR tuner chips
+
+#
+# Miscellaneous helper chips
+#
+# CONFIG_VIDEO_I2C is not set
+# CONFIG_VIDEO_M52790 is not set
+# CONFIG_VIDEO_ST_MIPID02 is not set
+# CONFIG_VIDEO_THS7303 is not set
+# end of Miscellaneous helper chips
+
+#
+# Video serializers and deserializers
+#
+# CONFIG_VIDEO_DS90UB913 is not set
+# CONFIG_VIDEO_DS90UB953 is not set
+# CONFIG_VIDEO_DS90UB960 is not set
+# end of Video serializers and deserializers
+
+#
+# Media SPI Adapters
+#
+CONFIG_CXD2880_SPI_DRV=m
+# CONFIG_VIDEO_GS1662 is not set
+# end of Media SPI Adapters
+
+CONFIG_MEDIA_TUNER=m
+
+#
+# Customize TV tuners
+#
+CONFIG_MEDIA_TUNER_E4000=m
+CONFIG_MEDIA_TUNER_FC0011=m
+CONFIG_MEDIA_TUNER_FC0012=m
+CONFIG_MEDIA_TUNER_FC0013=m
+CONFIG_MEDIA_TUNER_FC2580=m
+CONFIG_MEDIA_TUNER_IT913X=m
+CONFIG_MEDIA_TUNER_M88RS6000T=m
+CONFIG_MEDIA_TUNER_MAX2165=m
+CONFIG_MEDIA_TUNER_MC44S803=m
+CONFIG_MEDIA_TUNER_MSI001=m
+CONFIG_MEDIA_TUNER_MT2060=m
+CONFIG_MEDIA_TUNER_MT2063=m
+CONFIG_MEDIA_TUNER_MT20XX=m
+CONFIG_MEDIA_TUNER_MT2131=m
+CONFIG_MEDIA_TUNER_MT2266=m
+CONFIG_MEDIA_TUNER_MXL301RF=m
+CONFIG_MEDIA_TUNER_MXL5005S=m
+CONFIG_MEDIA_TUNER_MXL5007T=m
+CONFIG_MEDIA_TUNER_QM1D1B0004=m
+CONFIG_MEDIA_TUNER_QM1D1C0042=m
+CONFIG_MEDIA_TUNER_QT1010=m
+CONFIG_MEDIA_TUNER_R820T=m
+CONFIG_MEDIA_TUNER_SI2157=m
+CONFIG_MEDIA_TUNER_SIMPLE=m
+CONFIG_MEDIA_TUNER_TDA18212=m
+CONFIG_MEDIA_TUNER_TDA18218=m
+CONFIG_MEDIA_TUNER_TDA18250=m
+CONFIG_MEDIA_TUNER_TDA18271=m
+CONFIG_MEDIA_TUNER_TDA827X=m
+CONFIG_MEDIA_TUNER_TDA8290=m
+CONFIG_MEDIA_TUNER_TDA9887=m
+CONFIG_MEDIA_TUNER_TEA5761=m
+CONFIG_MEDIA_TUNER_TEA5767=m
+CONFIG_MEDIA_TUNER_TUA9001=m
+CONFIG_MEDIA_TUNER_XC2028=m
+CONFIG_MEDIA_TUNER_XC4000=m
+CONFIG_MEDIA_TUNER_XC5000=m
+# end of Customize TV tuners
+
+#
+# Customise DVB Frontends
+#
+
+#
+# Multistandard (satellite) frontends
+#
+CONFIG_DVB_MXL5XX=m
+CONFIG_DVB_STB0899=m
+CONFIG_DVB_STB6100=m
+CONFIG_DVB_STV090x=m
+CONFIG_DVB_STV0910=m
+CONFIG_DVB_STV6110x=m
+CONFIG_DVB_STV6111=m
+
+#
+# Multistandard (cable + terrestrial) frontends
+#
+CONFIG_DVB_DRXK=m
+CONFIG_DVB_MN88472=m
+CONFIG_DVB_MN88473=m
+CONFIG_DVB_SI2165=m
+CONFIG_DVB_TDA18271C2DD=m
+
+#
+# DVB-S (satellite) frontends
+#
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_CX24116=m
+CONFIG_DVB_CX24117=m
+CONFIG_DVB_CX24120=m
+CONFIG_DVB_CX24123=m
+CONFIG_DVB_DS3000=m
+CONFIG_DVB_MB86A16=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_S5H1420=m
+CONFIG_DVB_SI21XX=m
+CONFIG_DVB_STB6000=m
+CONFIG_DVB_STV0288=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_STV0900=m
+CONFIG_DVB_STV6110=m
+CONFIG_DVB_TDA10071=m
+CONFIG_DVB_TDA10086=m
+CONFIG_DVB_TDA8083=m
+CONFIG_DVB_TDA8261=m
+CONFIG_DVB_TDA826X=m
+CONFIG_DVB_TS2020=m
+CONFIG_DVB_TUA6100=m
+CONFIG_DVB_TUNER_CX24113=m
+CONFIG_DVB_TUNER_ITD1000=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_ZL10036=m
+CONFIG_DVB_ZL10039=m
+
+#
+# DVB-T (terrestrial) frontends
+#
+CONFIG_DVB_CX22700=m
+CONFIG_DVB_CX22702=m
+CONFIG_DVB_CXD2820R=m
+CONFIG_DVB_CXD2841ER=m
+CONFIG_DVB_DIB3000MB=m
+CONFIG_DVB_DIB3000MC=m
+CONFIG_DVB_DIB7000M=m
+CONFIG_DVB_DIB7000P=m
+CONFIG_DVB_DIB9000=m
+CONFIG_DVB_DRXD=m
+CONFIG_DVB_EC100=m
+CONFIG_DVB_L64781=m
+CONFIG_DVB_MT352=m
+CONFIG_DVB_NXT6000=m
+CONFIG_DVB_S5H1432=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_STV0367=m
+CONFIG_DVB_TDA10048=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_ZD1301_DEMOD=m
+CONFIG_DVB_ZL10353=m
+CONFIG_DVB_CXD2880=m
+
+#
+# DVB-C (cable) frontends
+#
+CONFIG_DVB_STV0297=m
+CONFIG_DVB_TDA10021=m
+CONFIG_DVB_TDA10023=m
+CONFIG_DVB_VES1820=m
+
+#
+# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
+#
+CONFIG_DVB_AU8522=m
+CONFIG_DVB_AU8522_DTV=m
+CONFIG_DVB_AU8522_V4L=m
+CONFIG_DVB_BCM3510=m
+CONFIG_DVB_LG2160=m
+CONFIG_DVB_LGDT3305=m
+CONFIG_DVB_LGDT330X=m
+CONFIG_DVB_MXL692=m
+CONFIG_DVB_NXT200X=m
+CONFIG_DVB_OR51132=m
+CONFIG_DVB_OR51211=m
+CONFIG_DVB_S5H1409=m
+CONFIG_DVB_S5H1411=m
+
+#
+# ISDB-T (terrestrial) frontends
+#
+CONFIG_DVB_DIB8000=m
+CONFIG_DVB_MB86A20S=m
+CONFIG_DVB_S921=m
+
+#
+# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
+#
+CONFIG_DVB_MN88443X=m
+CONFIG_DVB_TC90522=m
+
+#
+# Digital terrestrial only tuners/PLL
+#
+CONFIG_DVB_PLL=m
+CONFIG_DVB_TUNER_DIB0070=m
+CONFIG_DVB_TUNER_DIB0090=m
+
+#
+# SEC control devices for DVB-S
+#
+CONFIG_DVB_A8293=m
+CONFIG_DVB_AF9033=m
+CONFIG_DVB_ASCOT2E=m
+CONFIG_DVB_ATBM8830=m
+CONFIG_DVB_HELENE=m
+CONFIG_DVB_HORUS3A=m
+CONFIG_DVB_ISL6405=m
+CONFIG_DVB_ISL6421=m
+CONFIG_DVB_ISL6423=m
+CONFIG_DVB_IX2505V=m
+CONFIG_DVB_LGS8GL5=m
+CONFIG_DVB_LGS8GXX=m
+CONFIG_DVB_LNBH25=m
+CONFIG_DVB_LNBH29=m
+CONFIG_DVB_LNBP21=m
+CONFIG_DVB_LNBP22=m
+CONFIG_DVB_M88RS2000=m
+CONFIG_DVB_TDA665x=m
+CONFIG_DVB_DRX39XYJ=m
+
+#
+# Common Interface (EN50221) controller drivers
+#
+CONFIG_DVB_CXD2099=m
+CONFIG_DVB_SP2=m
+# end of Customise DVB Frontends
+
+#
+# Tools to develop new frontends
+#
+# CONFIG_DVB_DUMMY_FE is not set
+# end of Media ancillary drivers
+
+#
+# Graphics support
+#
+CONFIG_APERTURE_HELPERS=y
+CONFIG_VIDEO_CMDLINE=y
+CONFIG_VIDEO_NOMODESET=y
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_PANEL is not set
+CONFIG_DRM=y
+# CONFIG_DRM_DEBUG_MM is not set
+CONFIG_DRM_KMS_HELPER=y
+# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set
+# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
+CONFIG_DRM_FBDEV_EMULATION=y
+CONFIG_DRM_FBDEV_OVERALLOC=100
+# CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set
+CONFIG_DRM_LOAD_EDID_FIRMWARE=y
+CONFIG_DRM_DISPLAY_HELPER=m
+CONFIG_DRM_DISPLAY_DP_HELPER=y
+CONFIG_DRM_DISPLAY_HDCP_HELPER=y
+CONFIG_DRM_DISPLAY_HDMI_HELPER=y
+# CONFIG_DRM_DP_AUX_CHARDEV is not set
+# CONFIG_DRM_DP_CEC is not set
+CONFIG_DRM_TTM=m
+CONFIG_DRM_EXEC=m
+CONFIG_DRM_BUDDY=m
+CONFIG_DRM_VRAM_HELPER=m
+CONFIG_DRM_TTM_HELPER=m
+CONFIG_DRM_GEM_SHMEM_HELPER=m
+CONFIG_DRM_SUBALLOC_HELPER=m
+CONFIG_DRM_SCHED=m
+
+#
+# I2C encoder or helper chips
+#
+# CONFIG_DRM_I2C_CH7006 is not set
+# CONFIG_DRM_I2C_SIL164 is not set
+# CONFIG_DRM_I2C_NXP_TDA998X is not set
+# CONFIG_DRM_I2C_NXP_TDA9950 is not set
+# end of I2C encoder or helper chips
+
+#
+# ARM devices
+#
+# CONFIG_DRM_KOMEDA is not set
+# end of ARM devices
+
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_RADEON_USERPTR=y
+CONFIG_DRM_AMDGPU=m
+CONFIG_DRM_AMDGPU_SI=y
+CONFIG_DRM_AMDGPU_CIK=y
+CONFIG_DRM_AMDGPU_USERPTR=y
+# CONFIG_DRM_AMDGPU_WERROR is not set
+
+#
+# ACP (Audio CoProcessor) Configuration
+#
+# CONFIG_DRM_AMD_ACP is not set
+# end of ACP (Audio CoProcessor) Configuration
+
+#
+# Display Engine Configuration
+#
+CONFIG_DRM_AMD_DC=y
+CONFIG_DRM_AMD_DC_FP=y
+CONFIG_DRM_AMD_DC_SI=y
+# CONFIG_DRM_AMD_SECURE_DISPLAY is not set
+# end of Display Engine Configuration
+
+# CONFIG_DRM_NOUVEAU is not set
+CONFIG_DRM_GSGPU=m
+CONFIG_DRM_GSGPU_USERPTR=y
+CONFIG_DRM_GSGPU_GART_DEBUGFS=y
+CONFIG_DRM_VGEM=m
+CONFIG_DRM_VKMS=m
+CONFIG_DRM_UDL=m
+CONFIG_DRM_AST=m
+# CONFIG_DRM_MGAG200 is not set
+CONFIG_DRM_QXL=m
+CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_VIRTIO_GPU_KMS=y
+CONFIG_DRM_PANEL=y
+
+#
+# Display Panels
+#
+# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
+# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
+# CONFIG_DRM_PANEL_LG_LG4573 is not set
+# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
+# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
+# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
+# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
+# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# end of Display Panels
+
+CONFIG_DRM_BRIDGE=y
+CONFIG_DRM_PANEL_BRIDGE=y
+
+#
+# Display Interface Bridges
+#
+# CONFIG_DRM_CHIPONE_ICN6211 is not set
+# CONFIG_DRM_CHRONTEL_CH7033 is not set
+# CONFIG_DRM_DISPLAY_CONNECTOR is not set
+# CONFIG_DRM_ITE_IT6505 is not set
+# CONFIG_DRM_LONTIUM_LT8912B is not set
+# CONFIG_DRM_LONTIUM_LT9211 is not set
+# CONFIG_DRM_LONTIUM_LT9611 is not set
+# CONFIG_DRM_LONTIUM_LT9611UXC is not set
+# CONFIG_DRM_ITE_IT66121 is not set
+# CONFIG_DRM_LVDS_CODEC is not set
+# CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW is not set
+# CONFIG_DRM_NWL_MIPI_DSI is not set
+# CONFIG_DRM_NXP_PTN3460 is not set
+# CONFIG_DRM_PARADE_PS8622 is not set
+# CONFIG_DRM_PARADE_PS8640 is not set
+# CONFIG_DRM_SAMSUNG_DSIM is not set
+# CONFIG_DRM_SIL_SII8620 is not set
+# CONFIG_DRM_SII902X is not set
+# CONFIG_DRM_SII9234 is not set
+# CONFIG_DRM_SIMPLE_BRIDGE is not set
+# CONFIG_DRM_THINE_THC63LVD1024 is not set
+# CONFIG_DRM_TOSHIBA_TC358762 is not set
+# CONFIG_DRM_TOSHIBA_TC358764 is not set
+# CONFIG_DRM_TOSHIBA_TC358767 is not set
+# CONFIG_DRM_TOSHIBA_TC358768 is not set
+# CONFIG_DRM_TOSHIBA_TC358775 is not set
+# CONFIG_DRM_TI_DLPC3433 is not set
+# CONFIG_DRM_TI_TFP410 is not set
+# CONFIG_DRM_TI_SN65DSI83 is not set
+# CONFIG_DRM_TI_SN65DSI86 is not set
+# CONFIG_DRM_TI_TPD12S015 is not set
+# CONFIG_DRM_ANALOGIX_ANX6345 is not set
+# CONFIG_DRM_ANALOGIX_ANX78XX is not set
+# CONFIG_DRM_ANALOGIX_ANX7625 is not set
+# CONFIG_DRM_I2C_ADV7511 is not set
+# CONFIG_DRM_CDNS_DSI is not set
+# CONFIG_DRM_CDNS_MHDP8546 is not set
+# end of Display Interface Bridges
+
+# CONFIG_DRM_LOONGSON is not set
+# CONFIG_DRM_ETNAVIV is not set
+# CONFIG_DRM_LOGICVC is not set
+# CONFIG_DRM_ARCPGU is not set
+CONFIG_DRM_BOCHS=m
+CONFIG_DRM_CIRRUS_QEMU=m
+# CONFIG_DRM_GM12U320 is not set
+# CONFIG_DRM_PANEL_MIPI_DBI is not set
+CONFIG_DRM_SIMPLEDRM=m
+# CONFIG_TINYDRM_HX8357D is not set
+# CONFIG_TINYDRM_ILI9163 is not set
+# CONFIG_TINYDRM_ILI9225 is not set
+# CONFIG_TINYDRM_ILI9341 is not set
+# CONFIG_TINYDRM_ILI9486 is not set
+# CONFIG_TINYDRM_MI0283QT is not set
+# CONFIG_TINYDRM_REPAPER is not set
+# CONFIG_TINYDRM_ST7586 is not set
+# CONFIG_TINYDRM_ST7735R is not set
+# CONFIG_DRM_GUD is not set
+# CONFIG_DRM_SSD130X is not set
+# CONFIG_DRM_LEGACY is not set
+CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
+
+#
+# Frame buffer Devices
+#
+CONFIG_FB=y
+CONFIG_FB_CIRRUS=m
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
+# CONFIG_FB_ASILIANT is not set
+# CONFIG_FB_IMSTT is not set
+# CONFIG_FB_UVESA is not set
+CONFIG_FB_EFI=y
+# CONFIG_FB_OPENCORES is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_NVIDIA is not set
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_I740 is not set
+# CONFIG_FB_MATROX is not set
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_I2C=y
+CONFIG_FB_RADEON_BACKLIGHT=y
+# CONFIG_FB_RADEON_DEBUG is not set
+CONFIG_FB_ATY128=m
+CONFIG_FB_ATY128_BACKLIGHT=y
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GENERIC_LCD=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_BACKLIGHT=y
+# CONFIG_FB_S3 is not set
+# CONFIG_FB_SAVAGE is not set
+# CONFIG_FB_SIS is not set
+# CONFIG_FB_NEOMAGIC is not set
+# CONFIG_FB_KYRO is not set
+# CONFIG_FB_3DFX is not set
+# CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_VT8623 is not set
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_ARK is not set
+# CONFIG_FB_PM3 is not set
+# CONFIG_FB_CARMINE is not set
+# CONFIG_FB_SMSCUFX is not set
+# CONFIG_FB_UDL is not set
+# CONFIG_FB_IBM_GXT4500 is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_SIMPLE is not set
+# CONFIG_FB_SSD1307 is not set
+# CONFIG_FB_SM712 is not set
+CONFIG_FB_CORE=y
+CONFIG_FB_NOTIFY=y
+# CONFIG_FIRMWARE_EDID is not set
+CONFIG_FB_DEVICE=y
+CONFIG_FB_DDC=m
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+CONFIG_FB_SYS_FILLRECT=y
+CONFIG_FB_SYS_COPYAREA=y
+CONFIG_FB_SYS_IMAGEBLIT=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_SYS_FOPS=y
+CONFIG_FB_DEFERRED_IO=y
+CONFIG_FB_IOMEM_FOPS=y
+CONFIG_FB_IOMEM_HELPERS=y
+CONFIG_FB_SYSMEM_HELPERS=y
+CONFIG_FB_SYSMEM_HELPERS_DEFERRED=y
+CONFIG_FB_BACKLIGHT=m
+CONFIG_FB_MODE_HELPERS=y
+# CONFIG_FB_TILEBLITTING is not set
+# end of Frame buffer Devices
+
+#
+# Backlight & LCD device support
+#
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_L4F00242T03 is not set
+# CONFIG_LCD_LMS283GF05 is not set
+# CONFIG_LCD_LTV350QV is not set
+# CONFIG_LCD_ILI922X is not set
+# CONFIG_LCD_ILI9320 is not set
+# CONFIG_LCD_TDO24M is not set
+# CONFIG_LCD_VGG2432A4 is not set
+CONFIG_LCD_PLATFORM=m
+# CONFIG_LCD_AMS369FG06 is not set
+# CONFIG_LCD_LMS501KF03 is not set
+# CONFIG_LCD_HX8357 is not set
+# CONFIG_LCD_OTM3225A is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_KTD253 is not set
+# CONFIG_BACKLIGHT_KTZ8866 is not set
+# CONFIG_BACKLIGHT_PWM is not set
+# CONFIG_BACKLIGHT_QCOM_WLED is not set
+# CONFIG_BACKLIGHT_ADP8860 is not set
+# CONFIG_BACKLIGHT_ADP8870 is not set
+# CONFIG_BACKLIGHT_LM3630A is not set
+# CONFIG_BACKLIGHT_LM3639 is not set
+# CONFIG_BACKLIGHT_LP855X is not set
+# CONFIG_BACKLIGHT_GPIO is not set
+# CONFIG_BACKLIGHT_LV5207LP is not set
+# CONFIG_BACKLIGHT_BD6107 is not set
+# CONFIG_BACKLIGHT_ARCXCNN is not set
+# CONFIG_BACKLIGHT_LED is not set
+# end of Backlight & LCD device support
+
+CONFIG_HDMI=y
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_DUMMY_CONSOLE_COLUMNS=80
+CONFIG_DUMMY_CONSOLE_ROWS=25
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set
+# end of Console display driver support
+
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# end of Graphics support
+
+# CONFIG_DRM_ACCEL is not set
+CONFIG_SOUND=m
+CONFIG_SOUND_OSS_CORE=y
+# CONFIG_SOUND_OSS_CORE_PRECLAIM is not set
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_DMAENGINE_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_SEQ_DEVICE=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_COMPRESS_OFFLOAD=m
+CONFIG_SND_JACK=y
+CONFIG_SND_JACK_INPUT_DEV=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
+CONFIG_SND_PCM_TIMER=y
+CONFIG_SND_HRTIMER=m
+CONFIG_SND_DYNAMIC_MINORS=y
+CONFIG_SND_MAX_CARDS=32
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND_PROC_FS=y
+CONFIG_SND_VERBOSE_PROCFS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_CTL_FAST_LOOKUP=y
+CONFIG_SND_DEBUG=y
+# CONFIG_SND_DEBUG_VERBOSE is not set
+# CONFIG_SND_PCM_XRUN_DEBUG is not set
+# CONFIG_SND_CTL_INPUT_VALIDATION is not set
+# CONFIG_SND_CTL_DEBUG is not set
+# CONFIG_SND_JACK_INJECTION_DEBUG is not set
+CONFIG_SND_VMASTER=y
+CONFIG_SND_CTL_LED=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+CONFIG_SND_SEQ_MIDI_EVENT=m
+CONFIG_SND_SEQ_MIDI=m
+CONFIG_SND_SEQ_MIDI_EMUL=m
+CONFIG_SND_SEQ_VIRMIDI=m
+# CONFIG_SND_SEQ_UMP is not set
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL3_LIB_SEQ=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_DRIVERS=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_ALOOP=m
+# CONFIG_SND_PCMTEST is not set
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_MTS64=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_PORTMAN2X4=m
+CONFIG_SND_AC97_POWER_SAVE=y
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
+CONFIG_SND_PCI=y
+CONFIG_SND_AD1889=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AW2=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_BT87X_OVERCLOCK=y
+CONFIG_SND_CA0106=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_OXYGEN_LIB=m
+CONFIG_SND_OXYGEN=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CTXFI=m
+CONFIG_SND_DARLA20=m
+CONFIG_SND_GINA20=m
+CONFIG_SND_LAYLA20=m
+CONFIG_SND_DARLA24=m
+CONFIG_SND_GINA24=m
+CONFIG_SND_LAYLA24=m
+CONFIG_SND_MONA=m
+CONFIG_SND_MIA=m
+CONFIG_SND_ECHO3G=m
+CONFIG_SND_INDIGO=m
+CONFIG_SND_INDIGOIO=m
+CONFIG_SND_INDIGODJ=m
+CONFIG_SND_INDIGOIOX=m
+CONFIG_SND_INDIGODJX=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X_BOOL=y
+CONFIG_SND_HDSP=m
+CONFIG_SND_HDSPM=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_LOLA=m
+CONFIG_SND_LX6464ES=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_PCXHR=m
+CONFIG_SND_RIPTIDE=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VIA82XX_MODEM=m
+CONFIG_SND_VIRTUOSO=m
+CONFIG_SND_VX222=m
+CONFIG_SND_YMFPCI=m
+
+#
+# HD-Audio
+#
+CONFIG_SND_HDA=m
+CONFIG_SND_HDA_GENERIC_LEDS=y
+CONFIG_SND_HDA_INTEL=m
+CONFIG_SND_HDA_HWDEP=y
+CONFIG_SND_HDA_RECONFIG=y
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_INPUT_BEEP_MODE=0
+CONFIG_SND_HDA_PATCH_LOADER=y
+CONFIG_SND_HDA_SCODEC_CS35L41=m
+CONFIG_SND_HDA_CS_DSP_CONTROLS=m
+CONFIG_SND_HDA_SCODEC_CS35L41_I2C=m
+CONFIG_SND_HDA_SCODEC_CS35L41_SPI=m
+# CONFIG_SND_HDA_SCODEC_CS35L56_I2C is not set
+# CONFIG_SND_HDA_SCODEC_CS35L56_SPI is not set
+# CONFIG_SND_HDA_SCODEC_TAS2781_I2C is not set
+CONFIG_SND_HDA_CODEC_REALTEK=m
+CONFIG_SND_HDA_CODEC_ANALOG=m
+CONFIG_SND_HDA_CODEC_SIGMATEL=m
+CONFIG_SND_HDA_CODEC_VIA=m
+CONFIG_SND_HDA_CODEC_HDMI=m
+CONFIG_SND_HDA_CODEC_CIRRUS=m
+CONFIG_SND_HDA_CODEC_CS8409=m
+CONFIG_SND_HDA_CODEC_CONEXANT=m
+CONFIG_SND_HDA_CODEC_CA0110=m
+CONFIG_SND_HDA_CODEC_CA0132=m
+CONFIG_SND_HDA_CODEC_CA0132_DSP=y
+CONFIG_SND_HDA_CODEC_CMEDIA=m
+CONFIG_SND_HDA_CODEC_SI3054=m
+CONFIG_SND_HDA_GENERIC=m
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
+CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
+# CONFIG_SND_HDA_CTL_DEV_ID is not set
+# end of HD-Audio
+
+CONFIG_SND_HDA_CORE=m
+CONFIG_SND_HDA_DSP_LOADER=y
+CONFIG_SND_HDA_COMPONENT=y
+CONFIG_SND_HDA_PREALLOC_SIZE=64
+CONFIG_SND_INTEL_NHLT=y
+CONFIG_SND_INTEL_DSP_CONFIG=m
+CONFIG_SND_INTEL_SOUNDWIRE_ACPI=m
+CONFIG_SND_SPI=y
+CONFIG_SND_USB=y
+CONFIG_SND_USB_AUDIO=m
+# CONFIG_SND_USB_AUDIO_MIDI_V2 is not set
+CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y
+CONFIG_SND_USB_UA101=m
+CONFIG_SND_USB_CAIAQ=m
+CONFIG_SND_USB_CAIAQ_INPUT=y
+CONFIG_SND_USB_6FIRE=m
+CONFIG_SND_USB_HIFACE=m
+CONFIG_SND_BCD2000=m
+CONFIG_SND_USB_LINE6=m
+CONFIG_SND_USB_POD=m
+CONFIG_SND_USB_PODHD=m
+CONFIG_SND_USB_TONEPORT=m
+CONFIG_SND_USB_VARIAX=m
+CONFIG_SND_PCMCIA=y
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_PDAUDIOCF=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_AC97_BUS=y
+CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y
+CONFIG_SND_SOC_COMPRESS=y
+CONFIG_SND_SOC_ADI=m
+CONFIG_SND_SOC_ADI_AXI_I2S=m
+CONFIG_SND_SOC_ADI_AXI_SPDIF=m
+CONFIG_SND_SOC_AMD_ACP=m
+CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
+CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
+# CONFIG_SND_SOC_AMD_ST_ES8336_MACH is not set
+# CONFIG_SND_AMD_ACP_CONFIG is not set
+CONFIG_SND_ATMEL_SOC=m
+# CONFIG_SND_SOC_MIKROE_PROTO is not set
+# CONFIG_SND_BCM63XX_I2S_WHISTLER is not set
+CONFIG_SND_DESIGNWARE_I2S=m
+CONFIG_SND_DESIGNWARE_PCM=y
+
+#
+# SoC Audio for Freescale CPUs
+#
+
+#
+# Common SoC Audio options for Freescale CPUs:
+#
+# CONFIG_SND_SOC_FSL_ASRC is not set
+# CONFIG_SND_SOC_FSL_SAI is not set
+# CONFIG_SND_SOC_FSL_AUDMIX is not set
+# CONFIG_SND_SOC_FSL_SSI is not set
+# CONFIG_SND_SOC_FSL_SPDIF is not set
+# CONFIG_SND_SOC_FSL_ESAI is not set
+# CONFIG_SND_SOC_FSL_MICFIL is not set
+CONFIG_SND_SOC_FSL_XCVR=m
+# CONFIG_SND_SOC_IMX_AUDMUX is not set
+# end of SoC Audio for Freescale CPUs
+
+# CONFIG_SND_SOC_CHV3_I2S is not set
+CONFIG_SND_I2S_HI6210_I2S=m
+
+#
+# SoC Audio for Loongson CPUs
+#
+CONFIG_SND_SOC_LOONGSON_I2S_PCI=m
+CONFIG_SND_SOC_LOONGSON_CARD=m
+# end of SoC Audio for Loongson CPUs
+
+CONFIG_SND_SOC_IMG=y
+CONFIG_SND_SOC_IMG_I2S_IN=m
+CONFIG_SND_SOC_IMG_I2S_OUT=m
+CONFIG_SND_SOC_IMG_PARALLEL_OUT=m
+CONFIG_SND_SOC_IMG_SPDIF_IN=m
+CONFIG_SND_SOC_IMG_SPDIF_OUT=m
+CONFIG_SND_SOC_IMG_PISTACHIO_INTERNAL_DAC=m
+CONFIG_SND_SOC_MTK_BTCVSD=m
+CONFIG_SND_SOC_SOF_TOPLEVEL=y
+CONFIG_SND_SOC_SOF_PCI=m
+CONFIG_SND_SOC_SOF_ACPI=m
+# CONFIG_SND_SOC_SOF_OF is not set
+
+#
+# STMicroelectronics STM32 SOC audio support
+#
+# end of STMicroelectronics STM32 SOC audio support
+
+CONFIG_SND_SOC_XILINX_I2S=m
+CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER=m
+CONFIG_SND_SOC_XILINX_SPDIF=m
+CONFIG_SND_SOC_XTFPGA_I2S=m
+CONFIG_SND_SOC_I2C_AND_SPI=m
+
+#
+# CODEC drivers
+#
+CONFIG_SND_SOC_WM_ADSP=m
+CONFIG_SND_SOC_AC97_CODEC=m
+CONFIG_SND_SOC_ADAU_UTILS=m
+CONFIG_SND_SOC_ADAU1372=m
+CONFIG_SND_SOC_ADAU1372_I2C=m
+CONFIG_SND_SOC_ADAU1372_SPI=m
+CONFIG_SND_SOC_ADAU1701=m
+CONFIG_SND_SOC_ADAU17X1=m
+CONFIG_SND_SOC_ADAU1761=m
+CONFIG_SND_SOC_ADAU1761_I2C=m
+CONFIG_SND_SOC_ADAU1761_SPI=m
+CONFIG_SND_SOC_ADAU7002=m
+CONFIG_SND_SOC_ADAU7118=m
+CONFIG_SND_SOC_ADAU7118_HW=m
+CONFIG_SND_SOC_ADAU7118_I2C=m
+CONFIG_SND_SOC_AK4104=m
+CONFIG_SND_SOC_AK4118=m
+# CONFIG_SND_SOC_AK4375 is not set
+CONFIG_SND_SOC_AK4458=m
+CONFIG_SND_SOC_AK4554=m
+CONFIG_SND_SOC_AK4613=m
+CONFIG_SND_SOC_AK4642=m
+CONFIG_SND_SOC_AK5386=m
+CONFIG_SND_SOC_AK5558=m
+CONFIG_SND_SOC_ALC5623=m
+# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88261 is not set
+# CONFIG_SND_SOC_AW87390 is not set
+# CONFIG_SND_SOC_AW88399 is not set
+CONFIG_SND_SOC_BD28623=m
+# CONFIG_SND_SOC_BT_SCO is not set
+# CONFIG_SND_SOC_CHV3_CODEC is not set
+CONFIG_SND_SOC_CS35L32=m
+CONFIG_SND_SOC_CS35L33=m
+CONFIG_SND_SOC_CS35L34=m
+CONFIG_SND_SOC_CS35L35=m
+CONFIG_SND_SOC_CS35L36=m
+CONFIG_SND_SOC_CS35L41_LIB=m
+CONFIG_SND_SOC_CS35L41=m
+CONFIG_SND_SOC_CS35L41_SPI=m
+CONFIG_SND_SOC_CS35L41_I2C=m
+# CONFIG_SND_SOC_CS35L45_SPI is not set
+# CONFIG_SND_SOC_CS35L45_I2C is not set
+# CONFIG_SND_SOC_CS35L56_I2C is not set
+# CONFIG_SND_SOC_CS35L56_SPI is not set
+CONFIG_SND_SOC_CS42L42_CORE=m
+CONFIG_SND_SOC_CS42L42=m
+CONFIG_SND_SOC_CS42L51=m
+CONFIG_SND_SOC_CS42L51_I2C=m
+CONFIG_SND_SOC_CS42L52=m
+CONFIG_SND_SOC_CS42L56=m
+CONFIG_SND_SOC_CS42L73=m
+# CONFIG_SND_SOC_CS42L83 is not set
+CONFIG_SND_SOC_CS4234=m
+CONFIG_SND_SOC_CS4265=m
+CONFIG_SND_SOC_CS4270=m
+CONFIG_SND_SOC_CS4271=m
+CONFIG_SND_SOC_CS4271_I2C=m
+CONFIG_SND_SOC_CS4271_SPI=m
+CONFIG_SND_SOC_CS42XX8=m
+CONFIG_SND_SOC_CS42XX8_I2C=m
+CONFIG_SND_SOC_CS43130=m
+CONFIG_SND_SOC_CS4341=m
+CONFIG_SND_SOC_CS4349=m
+CONFIG_SND_SOC_CS53L30=m
+CONFIG_SND_SOC_CX2072X=m
+CONFIG_SND_SOC_DA7213=m
+CONFIG_SND_SOC_DA7219=m
+CONFIG_SND_SOC_DMIC=m
+CONFIG_SND_SOC_ES7134=m
+CONFIG_SND_SOC_ES7241=m
+CONFIG_SND_SOC_ES8316=m
+# CONFIG_SND_SOC_ES8326 is not set
+CONFIG_SND_SOC_ES8328=m
+CONFIG_SND_SOC_ES8328_I2C=m
+CONFIG_SND_SOC_ES8328_SPI=m
+CONFIG_SND_SOC_GTM601=m
+# CONFIG_SND_SOC_HDA is not set
+CONFIG_SND_SOC_ICS43432=m
+# CONFIG_SND_SOC_IDT821034 is not set
+CONFIG_SND_SOC_INNO_RK3036=m
+CONFIG_SND_SOC_MAX98088=m
+# CONFIG_SND_SOC_MAX98090 is not set
+CONFIG_SND_SOC_MAX98357A=m
+CONFIG_SND_SOC_MAX98504=m
+CONFIG_SND_SOC_MAX9867=m
+CONFIG_SND_SOC_MAX98927=m
+CONFIG_SND_SOC_MAX98520=m
+CONFIG_SND_SOC_MAX98373=m
+CONFIG_SND_SOC_MAX98373_I2C=m
+# CONFIG_SND_SOC_MAX98388 is not set
+CONFIG_SND_SOC_MAX98390=m
+# CONFIG_SND_SOC_MAX98396 is not set
+CONFIG_SND_SOC_MAX9860=m
+CONFIG_SND_SOC_MSM8916_WCD_DIGITAL=m
+CONFIG_SND_SOC_PCM1681=m
+CONFIG_SND_SOC_PCM1789=m
+CONFIG_SND_SOC_PCM1789_I2C=m
+CONFIG_SND_SOC_PCM179X=m
+CONFIG_SND_SOC_PCM179X_I2C=m
+CONFIG_SND_SOC_PCM179X_SPI=m
+CONFIG_SND_SOC_PCM186X=m
+CONFIG_SND_SOC_PCM186X_I2C=m
+CONFIG_SND_SOC_PCM186X_SPI=m
+CONFIG_SND_SOC_PCM3060=m
+CONFIG_SND_SOC_PCM3060_I2C=m
+CONFIG_SND_SOC_PCM3060_SPI=m
+CONFIG_SND_SOC_PCM3168A=m
+CONFIG_SND_SOC_PCM3168A_I2C=m
+CONFIG_SND_SOC_PCM3168A_SPI=m
+CONFIG_SND_SOC_PCM5102A=m
+CONFIG_SND_SOC_PCM512x=m
+CONFIG_SND_SOC_PCM512x_I2C=m
+CONFIG_SND_SOC_PCM512x_SPI=m
+# CONFIG_SND_SOC_PEB2466 is not set
+CONFIG_SND_SOC_RK3328=m
+CONFIG_SND_SOC_RL6231=m
+CONFIG_SND_SOC_RT5616=m
+CONFIG_SND_SOC_RT5631=m
+CONFIG_SND_SOC_RT5640=m
+CONFIG_SND_SOC_RT5645=m
+CONFIG_SND_SOC_RT5659=m
+CONFIG_SND_SOC_RT5682=m
+CONFIG_SND_SOC_RT5682_I2C=m
+CONFIG_SND_SOC_RT9120=m
+# CONFIG_SND_SOC_RTQ9128 is not set
+CONFIG_SND_SOC_SGTL5000=m
+CONFIG_SND_SOC_SIGMADSP=m
+CONFIG_SND_SOC_SIGMADSP_I2C=m
+CONFIG_SND_SOC_SIGMADSP_REGMAP=m
+CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
+CONFIG_SND_SOC_SIMPLE_MUX=m
+# CONFIG_SND_SOC_SMA1303 is not set
+CONFIG_SND_SOC_SPDIF=m
+# CONFIG_SND_SOC_SRC4XXX_I2C is not set
+CONFIG_SND_SOC_SSM2305=m
+CONFIG_SND_SOC_SSM2518=m
+CONFIG_SND_SOC_SSM2602=m
+CONFIG_SND_SOC_SSM2602_SPI=m
+CONFIG_SND_SOC_SSM2602_I2C=m
+# CONFIG_SND_SOC_SSM3515 is not set
+CONFIG_SND_SOC_SSM4567=m
+CONFIG_SND_SOC_STA32X=m
+CONFIG_SND_SOC_STA350=m
+CONFIG_SND_SOC_STI_SAS=m
+CONFIG_SND_SOC_TAS2552=m
+CONFIG_SND_SOC_TAS2562=m
+CONFIG_SND_SOC_TAS2764=m
+CONFIG_SND_SOC_TAS2770=m
+# CONFIG_SND_SOC_TAS2780 is not set
+# CONFIG_SND_SOC_TAS2781_I2C is not set
+CONFIG_SND_SOC_TAS5086=m
+CONFIG_SND_SOC_TAS571X=m
+CONFIG_SND_SOC_TAS5720=m
+# CONFIG_SND_SOC_TAS5805M is not set
+CONFIG_SND_SOC_TAS6424=m
+CONFIG_SND_SOC_TDA7419=m
+CONFIG_SND_SOC_TFA9879=m
+CONFIG_SND_SOC_TFA989X=m
+# CONFIG_SND_SOC_TLV320ADC3XXX is not set
+CONFIG_SND_SOC_TLV320AIC23=m
+CONFIG_SND_SOC_TLV320AIC23_I2C=m
+CONFIG_SND_SOC_TLV320AIC23_SPI=m
+CONFIG_SND_SOC_TLV320AIC31XX=m
+CONFIG_SND_SOC_TLV320AIC32X4=m
+CONFIG_SND_SOC_TLV320AIC32X4_I2C=m
+CONFIG_SND_SOC_TLV320AIC32X4_SPI=m
+CONFIG_SND_SOC_TLV320AIC3X=m
+CONFIG_SND_SOC_TLV320AIC3X_I2C=m
+CONFIG_SND_SOC_TLV320AIC3X_SPI=m
+CONFIG_SND_SOC_TLV320ADCX140=m
+CONFIG_SND_SOC_TS3A227E=m
+CONFIG_SND_SOC_TSCS42XX=m
+CONFIG_SND_SOC_TSCS454=m
+CONFIG_SND_SOC_UDA1334=m
+CONFIG_SND_SOC_WM8510=m
+CONFIG_SND_SOC_WM8523=m
+CONFIG_SND_SOC_WM8524=m
+CONFIG_SND_SOC_WM8580=m
+CONFIG_SND_SOC_WM8711=m
+CONFIG_SND_SOC_WM8728=m
+# CONFIG_SND_SOC_WM8731_I2C is not set
+# CONFIG_SND_SOC_WM8731_SPI is not set
+CONFIG_SND_SOC_WM8737=m
+CONFIG_SND_SOC_WM8741=m
+CONFIG_SND_SOC_WM8750=m
+CONFIG_SND_SOC_WM8753=m
+CONFIG_SND_SOC_WM8770=m
+CONFIG_SND_SOC_WM8776=m
+CONFIG_SND_SOC_WM8782=m
+CONFIG_SND_SOC_WM8804=m
+CONFIG_SND_SOC_WM8804_I2C=m
+CONFIG_SND_SOC_WM8804_SPI=m
+CONFIG_SND_SOC_WM8903=m
+CONFIG_SND_SOC_WM8904=m
+# CONFIG_SND_SOC_WM8940 is not set
+CONFIG_SND_SOC_WM8960=m
+# CONFIG_SND_SOC_WM8961 is not set
+CONFIG_SND_SOC_WM8962=m
+CONFIG_SND_SOC_WM8974=m
+CONFIG_SND_SOC_WM8978=m
+CONFIG_SND_SOC_WM8985=m
+CONFIG_SND_SOC_ZL38060=m
+CONFIG_SND_SOC_MAX9759=m
+CONFIG_SND_SOC_MT6351=m
+CONFIG_SND_SOC_MT6358=m
+CONFIG_SND_SOC_MT6660=m
+CONFIG_SND_SOC_NAU8315=m
+CONFIG_SND_SOC_NAU8540=m
+CONFIG_SND_SOC_NAU8810=m
+CONFIG_SND_SOC_NAU8821=m
+CONFIG_SND_SOC_NAU8822=m
+CONFIG_SND_SOC_NAU8824=m
+CONFIG_SND_SOC_TPA6130A2=m
+CONFIG_SND_SOC_LPASS_MACRO_COMMON=m
+CONFIG_SND_SOC_LPASS_WSA_MACRO=m
+CONFIG_SND_SOC_LPASS_VA_MACRO=m
+CONFIG_SND_SOC_LPASS_RX_MACRO=m
+CONFIG_SND_SOC_LPASS_TX_MACRO=m
+# end of CODEC drivers
+
+CONFIG_SND_SIMPLE_CARD_UTILS=m
+CONFIG_SND_SIMPLE_CARD=m
+# CONFIG_SND_AUDIO_GRAPH_CARD is not set
+# CONFIG_SND_AUDIO_GRAPH_CARD2 is not set
+# CONFIG_SND_TEST_COMPONENT is not set
+CONFIG_SND_VIRTIO=m
+CONFIG_AC97_BUS=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+CONFIG_HID_BATTERY_STRENGTH=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=m
+CONFIG_HID_GENERIC=y
+
+#
+# Special HID drivers
+#
+CONFIG_HID_A4TECH=m
+CONFIG_HID_ACCUTOUCH=m
+CONFIG_HID_ACRUX=m
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=m
+CONFIG_HID_APPLEIR=m
+CONFIG_HID_ASUS=m
+CONFIG_HID_AUREAL=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_BETOP_FF=m
+CONFIG_HID_BIGBEN_FF=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CORSAIR=m
+CONFIG_HID_COUGAR=m
+CONFIG_HID_MACALLY=m
+CONFIG_HID_PRODIKEYS=m
+CONFIG_HID_CMEDIA=m
+CONFIG_HID_CP2112=m
+CONFIG_HID_CREATIVE_SB0540=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_DRAGONRISE=m
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=m
+CONFIG_HID_ELAN=m
+CONFIG_HID_ELECOM=m
+CONFIG_HID_ELO=m
+# CONFIG_HID_EVISION is not set
+CONFIG_HID_EZKEY=m
+CONFIG_HID_FT260=m
+CONFIG_HID_GEMBIRD=m
+CONFIG_HID_GFRM=m
+CONFIG_HID_GLORIOUS=m
+CONFIG_HID_HOLTEK=m
+CONFIG_HOLTEK_FF=y
+CONFIG_HID_VIVALDI_COMMON=m
+# CONFIG_HID_GOOGLE_STADIA_FF is not set
+CONFIG_HID_VIVALDI=m
+CONFIG_HID_GT683R=m
+CONFIG_HID_KEYTOUCH=m
+CONFIG_HID_KYE=m
+CONFIG_HID_UCLOGIC=m
+CONFIG_HID_WALTOP=m
+CONFIG_HID_VIEWSONIC=m
+# CONFIG_HID_VRC2 is not set
+CONFIG_HID_XIAOMI=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_ICADE=m
+CONFIG_HID_ITE=m
+CONFIG_HID_JABRA=m
+CONFIG_HID_TWINHAN=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LCPOWER=m
+CONFIG_HID_LED=m
+CONFIG_HID_LENOVO=m
+# CONFIG_HID_LETSKETCH is not set
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_LOGITECH_DJ=m
+CONFIG_HID_LOGITECH_HIDPP=m
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_LOGIWHEELS_FF=y
+CONFIG_HID_MAGICMOUSE=m
+CONFIG_HID_MALTRON=m
+CONFIG_HID_MAYFLASH=m
+# CONFIG_HID_MEGAWORLD_FF is not set
+CONFIG_HID_REDRAGON=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_MULTITOUCH=m
+CONFIG_HID_NINTENDO=m
+CONFIG_NINTENDO_FF=y
+CONFIG_HID_NTI=m
+CONFIG_HID_NTRIG=m
+# CONFIG_HID_NVIDIA_SHIELD is not set
+CONFIG_HID_ORTEK=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PENMOUNT=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_PICOLCD=m
+CONFIG_HID_PICOLCD_FB=y
+CONFIG_HID_PICOLCD_BACKLIGHT=y
+CONFIG_HID_PICOLCD_LCD=y
+CONFIG_HID_PICOLCD_LEDS=y
+CONFIG_HID_PICOLCD_CIR=y
+CONFIG_HID_PLANTRONICS=m
+CONFIG_HID_PLAYSTATION=m
+CONFIG_PLAYSTATION_FF=y
+# CONFIG_HID_PXRC is not set
+# CONFIG_HID_RAZER is not set
+CONFIG_HID_PRIMAX=m
+CONFIG_HID_RETRODE=m
+CONFIG_HID_ROCCAT=m
+CONFIG_HID_SAITEK=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SEMITEK=m
+# CONFIG_HID_SIGMAMICRO is not set
+CONFIG_HID_SONY=m
+CONFIG_SONY_FF=y
+CONFIG_HID_SPEEDLINK=m
+CONFIG_HID_STEAM=m
+# CONFIG_STEAM_FF is not set
+CONFIG_HID_STEELSERIES=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_RMI=m
+CONFIG_HID_GREENASIA=m
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=m
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=m
+CONFIG_HID_TOPSEED=m
+# CONFIG_HID_TOPRE is not set
+CONFIG_HID_THINGM=m
+CONFIG_HID_THRUSTMASTER=m
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_HID_UDRAW_PS3=m
+CONFIG_HID_U2FZERO=m
+CONFIG_HID_WACOM=m
+CONFIG_HID_WIIMOTE=m
+CONFIG_HID_XINMO=m
+CONFIG_HID_ZEROPLUS=m
+CONFIG_ZEROPLUS_FF=y
+CONFIG_HID_ZYDACRON=m
+CONFIG_HID_SENSOR_HUB=m
+CONFIG_HID_SENSOR_CUSTOM_SENSOR=m
+CONFIG_HID_ALPS=m
+CONFIG_HID_MCP2221=m
+# end of Special HID drivers
+
+#
+# HID-BPF support
+#
+# end of HID-BPF support
+
+#
+# USB HID support
+#
+CONFIG_USB_HID=m
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# end of USB HID Boot Protocol drivers
+# end of USB HID support
+
+CONFIG_I2C_HID=y
+CONFIG_I2C_HID_ACPI=m
+# CONFIG_I2C_HID_OF is not set
+# CONFIG_I2C_HID_OF_ELAN is not set
+# CONFIG_I2C_HID_OF_GOODIX is not set
+CONFIG_I2C_HID_CORE=m
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_COMMON=y
+CONFIG_USB_LED_TRIG=y
+CONFIG_USB_ULPI_BUS=m
+CONFIG_USB_CONN_GPIO=m
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB=y
+CONFIG_USB_PCI=y
+CONFIG_USB_PCI_AMD=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEFAULT_PERSIST=y
+# CONFIG_USB_FEW_INIT_RETRIES is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+CONFIG_USB_OTG=y
+# CONFIG_USB_OTG_PRODUCTLIST is not set
+# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set
+CONFIG_USB_OTG_FSM=y
+CONFIG_USB_LEDS_TRIGGER_USBPORT=m
+CONFIG_USB_AUTOSUSPEND_DELAY=2
+CONFIG_USB_MON=m
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_C67X00_HCD=m
+CONFIG_USB_XHCI_HCD=y
+# CONFIG_USB_XHCI_DBGCAP is not set
+CONFIG_USB_XHCI_PCI=m
+CONFIG_USB_XHCI_PCI_RENESAS=m
+CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_EHCI_TT_NEWSCHED=y
+CONFIG_USB_EHCI_PCI=y
+CONFIG_USB_EHCI_FSL=m
+CONFIG_USB_EHCI_HCD_PLATFORM=m
+CONFIG_USB_OXU210HP_HCD=m
+CONFIG_USB_ISP116X_HCD=m
+CONFIG_USB_MAX3421_HCD=m
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PCI=y
+CONFIG_USB_OHCI_HCD_PLATFORM=m
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_SL811_HCD=m
+# CONFIG_USB_SL811_HCD_ISO is not set
+CONFIG_USB_SL811_CS=m
+CONFIG_USB_R8A66597_HCD=m
+# CONFIG_USB_HCD_BCMA is not set
+# CONFIG_USB_HCD_SSB is not set
+# CONFIG_USB_HCD_TEST_MODE is not set
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_WDM=m
+CONFIG_USB_TMC=m
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_REALTEK=m
+CONFIG_REALTEK_AUTOPM=y
+CONFIG_USB_STORAGE_DATAFAB=m
+CONFIG_USB_STORAGE_FREECOM=m
+CONFIG_USB_STORAGE_ISD200=m
+CONFIG_USB_STORAGE_USBAT=m
+CONFIG_USB_STORAGE_SDDR09=m
+CONFIG_USB_STORAGE_SDDR55=m
+CONFIG_USB_STORAGE_JUMPSHOT=m
+CONFIG_USB_STORAGE_ALAUDA=m
+CONFIG_USB_STORAGE_ONETOUCH=m
+CONFIG_USB_STORAGE_KARMA=m
+CONFIG_USB_STORAGE_CYPRESS_ATACB=m
+CONFIG_USB_STORAGE_ENE_UB6250=m
+CONFIG_USB_UAS=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USBIP_CORE=m
+CONFIG_USBIP_VHCI_HCD=m
+CONFIG_USBIP_VHCI_HC_PORTS=8
+CONFIG_USBIP_VHCI_NR_HCS=1
+CONFIG_USBIP_HOST=m
+CONFIG_USBIP_VUDC=m
+# CONFIG_USBIP_DEBUG is not set
+
+#
+# USB dual-mode controller drivers
+#
+CONFIG_USB_CDNS_SUPPORT=m
+CONFIG_USB_CDNS_HOST=y
+CONFIG_USB_CDNS3=m
+CONFIG_USB_CDNS3_GADGET=y
+CONFIG_USB_CDNS3_HOST=y
+CONFIG_USB_CDNS3_PCI_WRAP=m
+CONFIG_USB_CDNSP_PCI=m
+CONFIG_USB_CDNSP_GADGET=y
+CONFIG_USB_CDNSP_HOST=y
+CONFIG_USB_MUSB_HDRC=m
+# CONFIG_USB_MUSB_HOST is not set
+# CONFIG_USB_MUSB_GADGET is not set
+CONFIG_USB_MUSB_DUAL_ROLE=y
+
+#
+# Platform Glue Layer
+#
+
+#
+# MUSB DMA mode
+#
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_DWC3=m
+CONFIG_USB_DWC3_ULPI=y
+# CONFIG_USB_DWC3_HOST is not set
+# CONFIG_USB_DWC3_GADGET is not set
+CONFIG_USB_DWC3_DUAL_ROLE=y
+
+#
+# Platform Glue Driver Support
+#
+CONFIG_USB_DWC3_PCI=m
+CONFIG_USB_DWC3_HAPS=m
+CONFIG_USB_DWC3_OF_SIMPLE=m
+CONFIG_USB_DWC2=m
+# CONFIG_USB_DWC2_HOST is not set
+
+#
+# Gadget/Dual-role mode requires USB Gadget support to be enabled
+#
+# CONFIG_USB_DWC2_PERIPHERAL is not set
+CONFIG_USB_DWC2_DUAL_ROLE=y
+CONFIG_USB_DWC2_PCI=m
+# CONFIG_USB_DWC2_DEBUG is not set
+# CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set
+CONFIG_USB_CHIPIDEA=m
+CONFIG_USB_CHIPIDEA_UDC=y
+CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_PCI=m
+CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
+CONFIG_USB_CHIPIDEA_IMX=m
+CONFIG_USB_CHIPIDEA_GENERIC=m
+CONFIG_USB_CHIPIDEA_TEGRA=m
+CONFIG_USB_ISP1760=m
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1761_UDC=y
+# CONFIG_USB_ISP1760_HOST_ROLE is not set
+# CONFIG_USB_ISP1760_GADGET_ROLE is not set
+CONFIG_USB_ISP1760_DUAL_ROLE=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=m
+CONFIG_USB_SERIAL_AIRCABLE=m
+CONFIG_USB_SERIAL_ARK3116=m
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_CP210X=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_F81232=m
+CONFIG_USB_SERIAL_F8153X=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_IPW=m
+CONFIG_USB_SERIAL_IUU=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_METRO=m
+CONFIG_USB_SERIAL_MOS7720=m
+CONFIG_USB_SERIAL_MOS7715_PARPORT=y
+CONFIG_USB_SERIAL_MOS7840=m
+CONFIG_USB_SERIAL_MXUPORT=m
+CONFIG_USB_SERIAL_NAVMAN=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_QCAUX=m
+CONFIG_USB_SERIAL_QUALCOMM=m
+CONFIG_USB_SERIAL_SPCP8X5=m
+CONFIG_USB_SERIAL_SAFE=m
+# CONFIG_USB_SERIAL_SAFE_PADDED is not set
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_SYMBOL=m
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_WWAN=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_SERIAL_OPTICON=m
+CONFIG_USB_SERIAL_XSENS_MT=m
+CONFIG_USB_SERIAL_WISHBONE=m
+CONFIG_USB_SERIAL_SSU100=m
+CONFIG_USB_SERIAL_QT2=m
+CONFIG_USB_SERIAL_UPD78F0730=m
+CONFIG_USB_SERIAL_XR=m
+CONFIG_USB_SERIAL_DEBUG=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_USS720=m
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_ADUTUX=m
+CONFIG_USB_SEVSEG=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_CYPRESS_CY7C63=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_IDMOUSE=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_APPLE_MFI_FASTCHARGE=m
+# CONFIG_USB_LJCA is not set
+CONFIG_USB_SISUSBVGA=m
+CONFIG_USB_LD=m
+CONFIG_USB_TRANCEVIBRATOR=m
+CONFIG_USB_IOWARRIOR=m
+CONFIG_USB_TEST=m
+CONFIG_USB_EHSET_TEST_FIXTURE=m
+CONFIG_USB_ISIGHTFW=m
+CONFIG_USB_YUREX=m
+CONFIG_USB_EZUSB_FX2=m
+CONFIG_USB_HUB_USB251XB=m
+CONFIG_USB_HSIC_USB3503=m
+CONFIG_USB_HSIC_USB4604=m
+CONFIG_USB_LINK_LAYER_TEST=m
+CONFIG_USB_CHAOSKEY=m
+# CONFIG_USB_ONBOARD_HUB is not set
+# CONFIG_USB_ATM is not set
+
+#
+# USB Physical Layer drivers
+#
+CONFIG_USB_PHY=y
+CONFIG_NOP_USB_XCEIV=m
+CONFIG_USB_GPIO_VBUS=m
+CONFIG_USB_ISP1301=m
+# end of USB Physical Layer drivers
+
+CONFIG_USB_GADGET=m
+# CONFIG_USB_GADGET_DEBUG is not set
+# CONFIG_USB_GADGET_DEBUG_FILES is not set
+# CONFIG_USB_GADGET_DEBUG_FS is not set
+CONFIG_USB_GADGET_VBUS_DRAW=2
+CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
+CONFIG_U_SERIAL_CONSOLE=y
+
+#
+# USB Peripheral Controller
+#
+CONFIG_USB_GR_UDC=m
+CONFIG_USB_R8A66597=m
+CONFIG_USB_PXA27X=m
+CONFIG_USB_MV_UDC=m
+CONFIG_USB_MV_U3D=m
+CONFIG_USB_SNP_CORE=m
+# CONFIG_USB_SNP_UDC_PLAT is not set
+CONFIG_USB_M66592=m
+CONFIG_USB_BDC_UDC=m
+CONFIG_USB_AMD5536UDC=m
+CONFIG_USB_NET2272=m
+# CONFIG_USB_NET2272_DMA is not set
+CONFIG_USB_NET2280=m
+CONFIG_USB_GOKU=m
+CONFIG_USB_EG20T=m
+# CONFIG_USB_GADGET_XILINX is not set
+CONFIG_USB_MAX3420_UDC=m
+# CONFIG_USB_CDNS2_UDC is not set
+CONFIG_USB_DUMMY_HCD=m
+# end of USB Peripheral Controller
+
+CONFIG_USB_LIBCOMPOSITE=m
+CONFIG_USB_F_ACM=m
+CONFIG_USB_F_SS_LB=m
+CONFIG_USB_U_SERIAL=m
+CONFIG_USB_U_ETHER=m
+CONFIG_USB_U_AUDIO=m
+CONFIG_USB_F_SERIAL=m
+CONFIG_USB_F_OBEX=m
+CONFIG_USB_F_NCM=m
+CONFIG_USB_F_ECM=m
+CONFIG_USB_F_EEM=m
+CONFIG_USB_F_SUBSET=m
+CONFIG_USB_F_RNDIS=m
+CONFIG_USB_F_MASS_STORAGE=m
+CONFIG_USB_F_FS=m
+CONFIG_USB_F_UAC1=m
+CONFIG_USB_F_UAC1_LEGACY=m
+CONFIG_USB_F_UAC2=m
+CONFIG_USB_F_UVC=m
+CONFIG_USB_F_MIDI=m
+CONFIG_USB_F_HID=m
+CONFIG_USB_F_PRINTER=m
+CONFIG_USB_F_TCM=m
+CONFIG_USB_CONFIGFS=m
+CONFIG_USB_CONFIGFS_SERIAL=y
+CONFIG_USB_CONFIGFS_ACM=y
+CONFIG_USB_CONFIGFS_OBEX=y
+CONFIG_USB_CONFIGFS_NCM=y
+CONFIG_USB_CONFIGFS_ECM=y
+CONFIG_USB_CONFIGFS_ECM_SUBSET=y
+CONFIG_USB_CONFIGFS_RNDIS=y
+CONFIG_USB_CONFIGFS_EEM=y
+# CONFIG_USB_CONFIGFS_PHONET is not set
+CONFIG_USB_CONFIGFS_MASS_STORAGE=y
+CONFIG_USB_CONFIGFS_F_LB_SS=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_UAC1=y
+CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
+CONFIG_USB_CONFIGFS_F_UAC2=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+# CONFIG_USB_CONFIGFS_F_MIDI2 is not set
+CONFIG_USB_CONFIGFS_F_HID=y
+CONFIG_USB_CONFIGFS_F_UVC=y
+CONFIG_USB_CONFIGFS_F_PRINTER=y
+CONFIG_USB_CONFIGFS_F_TCM=y
+
+#
+# USB Gadget precomposed configurations
+#
+CONFIG_USB_ZERO=m
+CONFIG_USB_ZERO_HNPTEST=y
+CONFIG_USB_AUDIO=m
+# CONFIG_GADGET_UAC1 is not set
+CONFIG_USB_ETH=m
+CONFIG_USB_ETH_RNDIS=y
+CONFIG_USB_ETH_EEM=y
+CONFIG_USB_G_NCM=m
+CONFIG_USB_GADGETFS=m
+CONFIG_USB_FUNCTIONFS=m
+CONFIG_USB_FUNCTIONFS_ETH=y
+CONFIG_USB_FUNCTIONFS_RNDIS=y
+CONFIG_USB_FUNCTIONFS_GENERIC=y
+CONFIG_USB_MASS_STORAGE=m
+CONFIG_USB_GADGET_TARGET=m
+CONFIG_USB_G_SERIAL=m
+CONFIG_USB_MIDI_GADGET=m
+CONFIG_USB_G_PRINTER=m
+CONFIG_USB_CDC_COMPOSITE=m
+# CONFIG_USB_G_NOKIA is not set
+CONFIG_USB_G_ACM_MS=m
+CONFIG_USB_G_MULTI=m
+CONFIG_USB_G_MULTI_RNDIS=y
+CONFIG_USB_G_MULTI_CDC=y
+CONFIG_USB_G_HID=m
+CONFIG_USB_G_DBGP=m
+# CONFIG_USB_G_DBGP_PRINTK is not set
+CONFIG_USB_G_DBGP_SERIAL=y
+CONFIG_USB_G_WEBCAM=m
+CONFIG_USB_RAW_GADGET=m
+# end of USB Gadget precomposed configurations
+
+CONFIG_TYPEC=m
+CONFIG_TYPEC_TCPM=m
+CONFIG_TYPEC_TCPCI=m
+CONFIG_TYPEC_RT1711H=m
+CONFIG_TYPEC_TCPCI_MAXIM=m
+CONFIG_TYPEC_FUSB302=m
+CONFIG_TYPEC_UCSI=m
+CONFIG_UCSI_CCG=m
+CONFIG_UCSI_ACPI=m
+# CONFIG_UCSI_STM32G0 is not set
+CONFIG_TYPEC_TPS6598X=m
+# CONFIG_TYPEC_ANX7411 is not set
+# CONFIG_TYPEC_RT1719 is not set
+CONFIG_TYPEC_HD3SS3220=m
+CONFIG_TYPEC_STUSB160X=m
+# CONFIG_TYPEC_WUSB3801 is not set
+
+#
+# USB Type-C Multiplexer/DeMultiplexer Switch support
+#
+# CONFIG_TYPEC_MUX_FSA4480 is not set
+# CONFIG_TYPEC_MUX_GPIO_SBU is not set
+CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
+# CONFIG_TYPEC_MUX_PTN36502 is not set
+# end of USB Type-C Multiplexer/DeMultiplexer Switch support
+
+#
+# USB Type-C Alternate Mode drivers
+#
+CONFIG_TYPEC_DP_ALTMODE=m
+CONFIG_TYPEC_NVIDIA_ALTMODE=m
+# end of USB Type-C Alternate Mode drivers
+
+CONFIG_USB_ROLE_SWITCH=m
+CONFIG_MMC=m
+CONFIG_PWRSEQ_EMMC=m
+# CONFIG_PWRSEQ_SD8787 is not set
+CONFIG_PWRSEQ_SIMPLE=m
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_BLOCK_MINORS=8
+CONFIG_SDIO_UART=m
+CONFIG_MMC_TEST=m
+CONFIG_MMC_CRYPTO=y
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_SDHCI=m
+CONFIG_MMC_SDHCI_IO_ACCESSORS=y
+CONFIG_MMC_SDHCI_PCI=m
+CONFIG_MMC_RICOH_MMC=y
+CONFIG_MMC_SDHCI_ACPI=m
+CONFIG_MMC_SDHCI_PLTFM=m
+# CONFIG_MMC_SDHCI_OF_ARASAN is not set
+# CONFIG_MMC_SDHCI_OF_AT91 is not set
+# CONFIG_MMC_SDHCI_OF_DWCMSHC is not set
+# CONFIG_MMC_SDHCI_CADENCE is not set
+CONFIG_MMC_SDHCI_F_SDH30=m
+# CONFIG_MMC_SDHCI_MILBEAUT is not set
+CONFIG_MMC_TIFM_SD=m
+CONFIG_MMC_SPI=m
+CONFIG_MMC_SDRICOH_CS=m
+CONFIG_MMC_CB710=m
+CONFIG_MMC_VIA_SDMMC=m
+CONFIG_MMC_VUB300=m
+CONFIG_MMC_USHC=m
+CONFIG_MMC_USDHI6ROL0=m
+CONFIG_MMC_CQHCI=m
+CONFIG_MMC_HSQ=m
+CONFIG_MMC_TOSHIBA_PCI=m
+CONFIG_MMC_MTK=m
+CONFIG_MMC_SDHCI_XENON=m
+# CONFIG_MMC_SDHCI_OMAP is not set
+# CONFIG_MMC_SDHCI_AM654 is not set
+# CONFIG_SCSI_UFSHCD is not set
+CONFIG_MEMSTICK=m
+# CONFIG_MEMSTICK_DEBUG is not set
+
+#
+# MemoryStick drivers
+#
+# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
+CONFIG_MSPRO_BLOCK=m
+CONFIG_MS_BLOCK=m
+
+#
+# MemoryStick Host Controller Drivers
+#
+CONFIG_MEMSTICK_TIFM_MS=m
+CONFIG_MEMSTICK_JMICRON_38X=m
+CONFIG_MEMSTICK_R592=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_CLASS_FLASH=m
+CONFIG_LEDS_CLASS_MULTICOLOR=m
+CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y
+
+#
+# LED drivers
+#
+# CONFIG_LEDS_AN30259A is not set
+# CONFIG_LEDS_AW200XX is not set
+# CONFIG_LEDS_AW2013 is not set
+# CONFIG_LEDS_BCM6328 is not set
+# CONFIG_LEDS_BCM6358 is not set
+# CONFIG_LEDS_CR0014114 is not set
+# CONFIG_LEDS_EL15203000 is not set
+CONFIG_LEDS_LM3530=m
+CONFIG_LEDS_LM3532=m
+CONFIG_LEDS_LM3642=m
+# CONFIG_LEDS_LM3692X is not set
+CONFIG_LEDS_PCA9532=m
+CONFIG_LEDS_PCA9532_GPIO=y
+CONFIG_LEDS_GPIO=m
+CONFIG_LEDS_LP3944=m
+CONFIG_LEDS_LP3952=m
+CONFIG_LEDS_LP50XX=m
+# CONFIG_LEDS_LP55XX_COMMON is not set
+# CONFIG_LEDS_LP8860 is not set
+CONFIG_LEDS_PCA955X=m
+CONFIG_LEDS_PCA955X_GPIO=y
+CONFIG_LEDS_PCA963X=m
+# CONFIG_LEDS_PCA995X is not set
+CONFIG_LEDS_DAC124S085=m
+CONFIG_LEDS_PWM=m
+CONFIG_LEDS_REGULATOR=m
+# CONFIG_LEDS_BD2606MVV is not set
+CONFIG_LEDS_BD2802=m
+CONFIG_LEDS_LT3593=m
+CONFIG_LEDS_TCA6507=m
+CONFIG_LEDS_TLC591XX=m
+CONFIG_LEDS_LM355x=m
+# CONFIG_LEDS_IS31FL319X is not set
+# CONFIG_LEDS_IS31FL32XX is not set
+
+#
+# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
+#
+CONFIG_LEDS_BLINKM=m
+# CONFIG_LEDS_SYSCON is not set
+CONFIG_LEDS_MLXREG=m
+CONFIG_LEDS_USER=m
+# CONFIG_LEDS_SPI_BYTE is not set
+# CONFIG_LEDS_LM3697 is not set
+
+#
+# Flash and Torch LED drivers
+#
+# CONFIG_LEDS_AAT1290 is not set
+CONFIG_LEDS_AS3645A=m
+# CONFIG_LEDS_KTD2692 is not set
+CONFIG_LEDS_LM3601X=m
+# CONFIG_LEDS_RT4505 is not set
+CONFIG_LEDS_RT8515=m
+CONFIG_LEDS_SGM3140=m
+
+#
+# RGB LED drivers
+#
+# CONFIG_LEDS_GROUP_MULTICOLOR is not set
+# CONFIG_LEDS_KTD202X is not set
+# CONFIG_LEDS_PWM_MULTICOLOR is not set
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_DISK=y
+CONFIG_LEDS_TRIGGER_MTD=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_ACTIVITY=m
+# CONFIG_LEDS_TRIGGER_GPIO is not set
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+CONFIG_LEDS_TRIGGER_TRANSIENT=m
+CONFIG_LEDS_TRIGGER_CAMERA=m
+CONFIG_LEDS_TRIGGER_PANIC=y
+CONFIG_LEDS_TRIGGER_NETDEV=m
+CONFIG_LEDS_TRIGGER_PATTERN=m
+CONFIG_LEDS_TRIGGER_AUDIO=m
+CONFIG_LEDS_TRIGGER_TTY=m
+
+#
+# Simple LED drivers
+#
+CONFIG_ACCESSIBILITY=y
+CONFIG_A11Y_BRAILLE_CONSOLE=y
+
+#
+# Speakup console speech
+#
+CONFIG_SPEAKUP=m
+CONFIG_SPEAKUP_SYNTH_ACNTSA=m
+CONFIG_SPEAKUP_SYNTH_APOLLO=m
+CONFIG_SPEAKUP_SYNTH_AUDPTR=m
+CONFIG_SPEAKUP_SYNTH_BNS=m
+CONFIG_SPEAKUP_SYNTH_DECTLK=m
+CONFIG_SPEAKUP_SYNTH_DECEXT=m
+CONFIG_SPEAKUP_SYNTH_LTLK=m
+CONFIG_SPEAKUP_SYNTH_SOFT=m
+CONFIG_SPEAKUP_SYNTH_SPKOUT=m
+CONFIG_SPEAKUP_SYNTH_TXPRT=m
+CONFIG_SPEAKUP_SYNTH_DUMMY=m
+# end of Speakup console speech
+
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_USER_MEM=y
+CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
+CONFIG_INFINIBAND_ADDR_TRANS=y
+CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
+CONFIG_INFINIBAND_VIRT_DMA=y
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_INFINIBAND_EFA=m
+# CONFIG_INFINIBAND_ERDMA is not set
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
+CONFIG_INFINIBAND_OCRDMA=m
+CONFIG_RDMA_RXE=m
+CONFIG_RDMA_SIW=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_SRPT=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_INFINIBAND_ISERT=m
+CONFIG_INFINIBAND_RTRS=m
+CONFIG_INFINIBAND_RTRS_CLIENT=m
+CONFIG_INFINIBAND_RTRS_SERVER=m
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+CONFIG_RTC_SYSTOHC=y
+CONFIG_RTC_SYSTOHC_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+CONFIG_RTC_NVMEM=y
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+CONFIG_RTC_DRV_ABB5ZES3=m
+CONFIG_RTC_DRV_ABEOZ9=m
+CONFIG_RTC_DRV_ABX80X=m
+CONFIG_RTC_DRV_DS1307=m
+CONFIG_RTC_DRV_DS1307_CENTURY=y
+CONFIG_RTC_DRV_DS1374=m
+CONFIG_RTC_DRV_DS1672=m
+# CONFIG_RTC_DRV_HYM8563 is not set
+CONFIG_RTC_DRV_MAX6900=m
+# CONFIG_RTC_DRV_NCT3018Y is not set
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_ISL1208=m
+CONFIG_RTC_DRV_ISL12022=m
+# CONFIG_RTC_DRV_ISL12026 is not set
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_DRV_PCF8523=m
+CONFIG_RTC_DRV_PCF85063=m
+CONFIG_RTC_DRV_PCF85363=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_PCF8583=m
+CONFIG_RTC_DRV_M41T80=m
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_RTC_DRV_BQ32K=m
+CONFIG_RTC_DRV_S35390A=m
+CONFIG_RTC_DRV_FM3130=m
+CONFIG_RTC_DRV_RX8010=m
+CONFIG_RTC_DRV_RX8581=m
+CONFIG_RTC_DRV_RX8025=m
+CONFIG_RTC_DRV_EM3027=m
+CONFIG_RTC_DRV_RV3028=m
+CONFIG_RTC_DRV_RV3032=m
+CONFIG_RTC_DRV_RV8803=m
+CONFIG_RTC_DRV_SD3078=m
+
+#
+# SPI RTC drivers
+#
+CONFIG_RTC_DRV_M41T93=m
+CONFIG_RTC_DRV_M41T94=m
+CONFIG_RTC_DRV_DS1302=m
+CONFIG_RTC_DRV_DS1305=m
+CONFIG_RTC_DRV_DS1343=m
+CONFIG_RTC_DRV_DS1347=m
+CONFIG_RTC_DRV_DS1390=m
+CONFIG_RTC_DRV_MAX6916=m
+CONFIG_RTC_DRV_R9701=m
+CONFIG_RTC_DRV_RX4581=m
+CONFIG_RTC_DRV_RS5C348=m
+CONFIG_RTC_DRV_MAX6902=m
+CONFIG_RTC_DRV_PCF2123=m
+CONFIG_RTC_DRV_MCP795=m
+CONFIG_RTC_I2C_AND_SPI=y
+
+#
+# SPI and I2C RTC drivers
+#
+CONFIG_RTC_DRV_DS3232=m
+CONFIG_RTC_DRV_DS3232_HWMON=y
+CONFIG_RTC_DRV_PCF2127=m
+CONFIG_RTC_DRV_RV3029C2=m
+CONFIG_RTC_DRV_RV3029_HWMON=y
+CONFIG_RTC_DRV_RX6110=m
+
+#
+# Platform RTC drivers
+#
+CONFIG_RTC_DRV_DS1286=m
+CONFIG_RTC_DRV_DS1511=m
+CONFIG_RTC_DRV_DS1553=m
+CONFIG_RTC_DRV_DS1685_FAMILY=m
+CONFIG_RTC_DRV_DS1685=y
+# CONFIG_RTC_DRV_DS1689 is not set
+# CONFIG_RTC_DRV_DS17285 is not set
+# CONFIG_RTC_DRV_DS17485 is not set
+# CONFIG_RTC_DRV_DS17885 is not set
+CONFIG_RTC_DRV_DS1742=m
+CONFIG_RTC_DRV_DS2404=m
+CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_STK17TA8=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_M48T35=m
+CONFIG_RTC_DRV_M48T59=m
+CONFIG_RTC_DRV_MSM6242=m
+CONFIG_RTC_DRV_RP5C01=m
+# CONFIG_RTC_DRV_ZYNQMP is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_CADENCE is not set
+# CONFIG_RTC_DRV_FTRTC010 is not set
+CONFIG_RTC_DRV_LOONGSON=y
+# CONFIG_RTC_DRV_R7301 is not set
+
+#
+# HID Sensor RTC drivers
+#
+# CONFIG_RTC_DRV_GOLDFISH is not set
+CONFIG_DMADEVICES=y
+# CONFIG_DMADEVICES_DEBUG is not set
+
+#
+# DMA Devices
+#
+CONFIG_DMA_ENGINE=y
+CONFIG_DMA_ACPI=y
+CONFIG_DMA_OF=y
+# CONFIG_ALTERA_MSGDMA is not set
+# CONFIG_DW_AXI_DMAC is not set
+# CONFIG_FSL_EDMA is not set
+# CONFIG_INTEL_IDMA64 is not set
+# CONFIG_PLX_DMA is not set
+# CONFIG_XILINX_DMA is not set
+# CONFIG_XILINX_XDMA is not set
+# CONFIG_XILINX_ZYNQMP_DPDMA is not set
+# CONFIG_QCOM_HIDMA_MGMT is not set
+# CONFIG_QCOM_HIDMA is not set
+# CONFIG_DW_DMAC is not set
+# CONFIG_DW_DMAC_PCI is not set
+# CONFIG_DW_EDMA is not set
+# CONFIG_SF_PDMA is not set
+
+#
+# DMA Clients
+#
+# CONFIG_ASYNC_TX_DMA is not set
+# CONFIG_DMATEST is not set
+
+#
+# DMABUF options
+#
+CONFIG_SYNC_FILE=y
+# CONFIG_SW_SYNC is not set
+CONFIG_UDMABUF=y
+# CONFIG_DMABUF_MOVE_NOTIFY is not set
+# CONFIG_DMABUF_DEBUG is not set
+# CONFIG_DMABUF_SELFTESTS is not set
+CONFIG_DMABUF_HEAPS=y
+# CONFIG_DMABUF_SYSFS_STATS is not set
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DMABUF_HEAPS_CMA=y
+# end of DMABUF options
+
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_UIO_DMEM_GENIRQ=m
+CONFIG_UIO_AEC=m
+CONFIG_UIO_SERCOS3=m
+CONFIG_UIO_PCI_GENERIC=m
+CONFIG_UIO_NETX=m
+CONFIG_UIO_PRUSS=m
+CONFIG_UIO_MF624=m
+CONFIG_VFIO=m
+CONFIG_VFIO_GROUP=y
+CONFIG_VFIO_CONTAINER=y
+# CONFIG_VFIO_NOIOMMU is not set
+CONFIG_VFIO_VIRQFD=y
+
+#
+# VFIO support for PCI devices
+#
+CONFIG_VFIO_PCI_CORE=m
+CONFIG_VFIO_PCI_MMAP=y
+CONFIG_VFIO_PCI_INTX=y
+CONFIG_VFIO_PCI=m
+# end of VFIO support for PCI devices
+
+CONFIG_IRQ_BYPASS_MANAGER=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VMGENID=y
+CONFIG_VIRTIO_ANCHOR=y
+CONFIG_VIRTIO=m
+CONFIG_VIRTIO_PCI_LIB=m
+CONFIG_VIRTIO_PCI_LIB_LEGACY=m
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_PCI_LEGACY=y
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=m
+CONFIG_VIRTIO_MMIO=m
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VIRTIO_DMA_SHARED_BUFFER=m
+# CONFIG_VDPA is not set
+CONFIG_VHOST_IOTLB=m
+CONFIG_VHOST_TASK=y
+CONFIG_VHOST=m
+CONFIG_VHOST_MENU=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_SCSI=m
+CONFIG_VHOST_VSOCK=m
+# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
+
+#
+# Microsoft Hyper-V guest support
+#
+# end of Microsoft Hyper-V guest support
+
+# CONFIG_GREYBUS is not set
+CONFIG_COMEDI=m
+# CONFIG_COMEDI_DEBUG is not set
+CONFIG_COMEDI_DEFAULT_BUF_SIZE_KB=2048
+CONFIG_COMEDI_DEFAULT_BUF_MAXSIZE_KB=20480
+# CONFIG_COMEDI_MISC_DRIVERS is not set
+# CONFIG_COMEDI_ISA_DRIVERS is not set
+CONFIG_COMEDI_PCI_DRIVERS=m
+CONFIG_COMEDI_8255_PCI=m
+# CONFIG_COMEDI_ADDI_APCI_1032 is not set
+# CONFIG_COMEDI_ADDI_APCI_1500 is not set
+# CONFIG_COMEDI_ADDI_APCI_1516 is not set
+# CONFIG_COMEDI_ADDI_APCI_1564 is not set
+# CONFIG_COMEDI_ADDI_APCI_16XX is not set
+# CONFIG_COMEDI_ADDI_APCI_2032 is not set
+# CONFIG_COMEDI_ADDI_APCI_2200 is not set
+# CONFIG_COMEDI_ADDI_APCI_3120 is not set
+# CONFIG_COMEDI_ADDI_APCI_3501 is not set
+# CONFIG_COMEDI_ADDI_APCI_3XXX is not set
+CONFIG_COMEDI_ADL_PCI6208=m
+CONFIG_COMEDI_ADL_PCI7X3X=m
+CONFIG_COMEDI_ADL_PCI8164=m
+# CONFIG_COMEDI_ADL_PCI9111 is not set
+# CONFIG_COMEDI_ADL_PCI9118 is not set
+# CONFIG_COMEDI_ADV_PCI1710 is not set
+CONFIG_COMEDI_ADV_PCI1720=m
+CONFIG_COMEDI_ADV_PCI1723=m
+CONFIG_COMEDI_ADV_PCI1724=m
+CONFIG_COMEDI_ADV_PCI1760=m
+# CONFIG_COMEDI_ADV_PCI_DIO is not set
+# CONFIG_COMEDI_AMPLC_DIO200_PCI is not set
+# CONFIG_COMEDI_AMPLC_PC236_PCI is not set
+# CONFIG_COMEDI_AMPLC_PC263_PCI is not set
+# CONFIG_COMEDI_AMPLC_PCI224 is not set
+# CONFIG_COMEDI_AMPLC_PCI230 is not set
+# CONFIG_COMEDI_CONTEC_PCI_DIO is not set
+# CONFIG_COMEDI_DAS08_PCI is not set
+# CONFIG_COMEDI_DT3000 is not set
+# CONFIG_COMEDI_DYNA_PCI10XX is not set
+# CONFIG_COMEDI_GSC_HPDI is not set
+# CONFIG_COMEDI_MF6X4 is not set
+# CONFIG_COMEDI_ICP_MULTI is not set
+# CONFIG_COMEDI_DAQBOARD2000 is not set
+# CONFIG_COMEDI_JR3_PCI is not set
+# CONFIG_COMEDI_KE_COUNTER is not set
+# CONFIG_COMEDI_CB_PCIDAS64 is not set
+# CONFIG_COMEDI_CB_PCIDAS is not set
+# CONFIG_COMEDI_CB_PCIDDA is not set
+# CONFIG_COMEDI_CB_PCIMDAS is not set
+# CONFIG_COMEDI_CB_PCIMDDA is not set
+# CONFIG_COMEDI_ME4000 is not set
+# CONFIG_COMEDI_ME_DAQ is not set
+# CONFIG_COMEDI_NI_6527 is not set
+# CONFIG_COMEDI_NI_65XX is not set
+# CONFIG_COMEDI_NI_660X is not set
+# CONFIG_COMEDI_NI_670X is not set
+CONFIG_COMEDI_NI_LABPC_PCI=m
+CONFIG_COMEDI_NI_PCIDIO=m
+CONFIG_COMEDI_NI_PCIMIO=m
+# CONFIG_COMEDI_RTD520 is not set
+# CONFIG_COMEDI_S626 is not set
+CONFIG_COMEDI_MITE=m
+CONFIG_COMEDI_NI_TIOCMD=m
+# CONFIG_COMEDI_PCMCIA_DRIVERS is not set
+# CONFIG_COMEDI_USB_DRIVERS is not set
+CONFIG_COMEDI_8254=m
+CONFIG_COMEDI_8255=m
+# CONFIG_COMEDI_8255_SA is not set
+# CONFIG_COMEDI_KCOMEDILIB is not set
+CONFIG_COMEDI_NI_LABPC=m
+CONFIG_COMEDI_NI_TIO=m
+CONFIG_COMEDI_NI_ROUTING=m
+# CONFIG_COMEDI_TESTS is not set
+CONFIG_STAGING=y
+# CONFIG_PRISM2_USB is not set
+# CONFIG_RTLLIB is not set
+# CONFIG_RTL8723BS is not set
+# CONFIG_R8712U is not set
+# CONFIG_RTS5208 is not set
+# CONFIG_VT6655 is not set
+# CONFIG_VT6656 is not set
+# CONFIG_FB_SM750 is not set
+# CONFIG_STAGING_MEDIA is not set
+# CONFIG_STAGING_BOARD is not set
+# CONFIG_LTE_GDM724X is not set
+# CONFIG_FB_TFT is not set
+# CONFIG_KS7010 is not set
+# CONFIG_PI433 is not set
+# CONFIG_XIL_AXIS_FIFO is not set
+# CONFIG_FIELDBUS_DEV is not set
+# CONFIG_VME_BUS is not set
+CONFIG_CPU_HWMON=y
+CONFIG_LOONGARCH_PLATFORM_DEVICES=y
+CONFIG_LOONGSON_LAPTOP=m
+# CONFIG_GOLDFISH is not set
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_CLK_PREPARE=y
+CONFIG_COMMON_CLK=y
+# CONFIG_LMK04832 is not set
+# CONFIG_COMMON_CLK_MAX9485 is not set
+# CONFIG_COMMON_CLK_SI5341 is not set
+# CONFIG_COMMON_CLK_SI5351 is not set
+# CONFIG_COMMON_CLK_SI514 is not set
+# CONFIG_COMMON_CLK_SI544 is not set
+# CONFIG_COMMON_CLK_SI570 is not set
+# CONFIG_COMMON_CLK_CDCE706 is not set
+# CONFIG_COMMON_CLK_CDCE925 is not set
+# CONFIG_COMMON_CLK_CS2000_CP is not set
+# CONFIG_COMMON_CLK_AXI_CLKGEN is not set
+CONFIG_COMMON_CLK_LOONGSON2=y
+# CONFIG_COMMON_CLK_PWM is not set
+# CONFIG_COMMON_CLK_RS9_PCIE is not set
+# CONFIG_COMMON_CLK_SI521XX is not set
+# CONFIG_COMMON_CLK_VC3 is not set
+# CONFIG_COMMON_CLK_VC5 is not set
+# CONFIG_COMMON_CLK_VC7 is not set
+# CONFIG_COMMON_CLK_FIXED_MMIO is not set
+# CONFIG_XILINX_VCU is not set
+# CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set
+# CONFIG_HWSPINLOCK is not set
+
+#
+# Clock Source drivers
+#
+# end of Clock Source drivers
+
+# CONFIG_MAILBOX is not set
+CONFIG_IOMMU_API=y
+CONFIG_IOMMU_SUPPORT=y
+
+#
+# Generic IOMMU Pagetable Support
+#
+# end of Generic IOMMU Pagetable Support
+
+# CONFIG_IOMMU_DEBUGFS is not set
+CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
+# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
+# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
+CONFIG_OF_IOMMU=y
+# CONFIG_IOMMUFD is not set
+
+#
+# Remoteproc drivers
+#
+# CONFIG_REMOTEPROC is not set
+# end of Remoteproc drivers
+
+#
+# Rpmsg drivers
+#
+# CONFIG_RPMSG_VIRTIO is not set
+# end of Rpmsg drivers
+
+# CONFIG_SOUNDWIRE is not set
+
+#
+# SOC (System On Chip) specific Drivers
+#
+
+#
+# Amlogic SoC drivers
+#
+# end of Amlogic SoC drivers
+
+#
+# Broadcom SoC drivers
+#
+# end of Broadcom SoC drivers
+
+#
+# NXP/Freescale QorIQ SoC drivers
+#
+# end of NXP/Freescale QorIQ SoC drivers
+
+#
+# fujitsu SoC drivers
+#
+# end of fujitsu SoC drivers
+
+#
+# i.MX SoC drivers
+#
+# end of i.MX SoC drivers
+
+#
+# Enable LiteX SoC Builder specific drivers
+#
+# CONFIG_LITEX_SOC_CONTROLLER is not set
+# end of Enable LiteX SoC Builder specific drivers
+
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
+# CONFIG_WPCM450_SOC is not set
+
+#
+# Qualcomm SoC drivers
+#
+CONFIG_QCOM_QMI_HELPERS=m
+# end of Qualcomm SoC drivers
+
+# CONFIG_SOC_TI is not set
+
+#
+# Xilinx SoC drivers
+#
+# end of Xilinx SoC drivers
+# end of SOC (System On Chip) specific Drivers
+
+#
+# PM Domains
+#
+
+#
+# Amlogic PM Domains
+#
+# end of Amlogic PM Domains
+
+#
+# Broadcom PM Domains
+#
+# end of Broadcom PM Domains
+
+#
+# i.MX PM Domains
+#
+# end of i.MX PM Domains
+
+#
+# Qualcomm PM Domains
+#
+# end of Qualcomm PM Domains
+# end of PM Domains
+
+CONFIG_PM_DEVFREQ=y
+
+#
+# DEVFREQ Governors
+#
+CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
+CONFIG_DEVFREQ_GOV_PERFORMANCE=y
+CONFIG_DEVFREQ_GOV_POWERSAVE=y
+CONFIG_DEVFREQ_GOV_USERSPACE=y
+# CONFIG_DEVFREQ_GOV_PASSIVE is not set
+
+#
+# DEVFREQ Drivers
+#
+# CONFIG_PM_DEVFREQ_EVENT is not set
+CONFIG_EXTCON=y
+
+#
+# Extcon Device Drivers
+#
+# CONFIG_EXTCON_FSA9480 is not set
+# CONFIG_EXTCON_GPIO is not set
+# CONFIG_EXTCON_MAX3355 is not set
+# CONFIG_EXTCON_PTN5150 is not set
+# CONFIG_EXTCON_RT8973A is not set
+# CONFIG_EXTCON_SM5502 is not set
+# CONFIG_EXTCON_USB_GPIO is not set
+# CONFIG_EXTCON_USBC_TUSB320 is not set
+# CONFIG_MEMORY is not set
+# CONFIG_IIO is not set
+CONFIG_NTB=m
+CONFIG_NTB_MSI=y
+CONFIG_NTB_IDT=m
+CONFIG_NTB_EPF=m
+CONFIG_NTB_SWITCHTEC=m
+# CONFIG_NTB_PINGPONG is not set
+# CONFIG_NTB_TOOL is not set
+CONFIG_NTB_PERF=m
+# CONFIG_NTB_MSI_TEST is not set
+CONFIG_NTB_TRANSPORT=m
+CONFIG_PWM=y
+CONFIG_PWM_SYSFS=y
+# CONFIG_PWM_DEBUG is not set
+# CONFIG_PWM_ATMEL_TCB is not set
+# CONFIG_PWM_CLK is not set
+# CONFIG_PWM_DWC is not set
+# CONFIG_PWM_FSL_FTM is not set
+# CONFIG_PWM_PCA9685 is not set
+# CONFIG_PWM_XILINX is not set
+CONFIG_PWM_LS=m
+
+#
+# IRQ chip support
+#
+CONFIG_IRQCHIP=y
+# CONFIG_AL_FIC is not set
+# CONFIG_XILINX_INTC is not set
+CONFIG_IRQ_LOONGARCH_CPU=y
+CONFIG_LOONGSON_LIOINTC=y
+CONFIG_LOONGSON_EIOINTC=y
+CONFIG_LOONGSON_HTVEC=y
+CONFIG_LOONGSON_PCH_PIC=y
+CONFIG_LOONGSON_PCH_MSI=y
+CONFIG_LOONGSON_PCH_LPC=y
+# end of IRQ chip support
+
+# CONFIG_IPACK_BUS is not set
+CONFIG_RESET_CONTROLLER=y
+# CONFIG_RESET_SIMPLE is not set
+# CONFIG_RESET_TI_SYSCON is not set
+# CONFIG_RESET_TI_TPS380X is not set
+
+#
+# PHY Subsystem
+#
+CONFIG_GENERIC_PHY=y
+# CONFIG_PHY_CAN_TRANSCEIVER is not set
+
+#
+# PHY drivers for Broadcom platforms
+#
+# CONFIG_BCM_KONA_USB2_PHY is not set
+# end of PHY drivers for Broadcom platforms
+
+# CONFIG_PHY_CADENCE_TORRENT is not set
+# CONFIG_PHY_CADENCE_DPHY is not set
+# CONFIG_PHY_CADENCE_DPHY_RX is not set
+# CONFIG_PHY_CADENCE_SIERRA is not set
+# CONFIG_PHY_CADENCE_SALVO is not set
+# CONFIG_PHY_PXA_28NM_HSIC is not set
+# CONFIG_PHY_PXA_28NM_USB2 is not set
+# CONFIG_PHY_LAN966X_SERDES is not set
+# CONFIG_PHY_MAPPHONE_MDM6600 is not set
+# CONFIG_PHY_OCELOT_SERDES is not set
+# CONFIG_PHY_QCOM_USB_HS is not set
+# CONFIG_PHY_QCOM_USB_HSIC is not set
+# CONFIG_PHY_SAMSUNG_USB2 is not set
+# CONFIG_PHY_TUSB1210 is not set
+# end of PHY Subsystem
+
+# CONFIG_POWERCAP is not set
+# CONFIG_MCB is not set
+
+#
+# Performance monitor support
+#
+# end of Performance monitor support
+
+CONFIG_RAS=y
+CONFIG_USB4=m
+# CONFIG_USB4_DEBUGFS_WRITE is not set
+# CONFIG_USB4_DMA_TEST is not set
+
+#
+# Android
+#
+# CONFIG_ANDROID_BINDER_IPC is not set
+# end of Android
+
+# CONFIG_LIBNVDIMM is not set
+# CONFIG_DAX is not set
+CONFIG_NVMEM=y
+CONFIG_NVMEM_SYSFS=y
+
+#
+# Layout Types
+#
+# CONFIG_NVMEM_LAYOUT_SL28_VPD is not set
+# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
+# end of Layout Types
+
+# CONFIG_NVMEM_RMEM is not set
+# CONFIG_NVMEM_U_BOOT_ENV is not set
+
+#
+# HW tracing support
+#
+# CONFIG_STM is not set
+# CONFIG_INTEL_TH is not set
+# end of HW tracing support
+
+# CONFIG_FPGA is not set
+# CONFIG_FSI is not set
+CONFIG_PM_OPP=y
+# CONFIG_SIOX is not set
+# CONFIG_SLIMBUS is not set
+# CONFIG_INTERCONNECT is not set
+# CONFIG_COUNTER is not set
+# CONFIG_MOST is not set
+# CONFIG_PECI is not set
+# CONFIG_HTE is not set
+# end of Device Drivers
+
+#
+# File systems
+#
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_FS_IOMAP=y
+CONFIG_BUFFER_HEAD=y
+CONFIG_LEGACY_DIRECT_IO=y
+CONFIG_EXT2_FS=m
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_EXT4_DEBUG is not set
+CONFIG_JBD2=m
+# CONFIG_JBD2_DEBUG is not set
+CONFIG_FS_MBCACHE=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_SUPPORT_V4=y
+CONFIG_XFS_SUPPORT_ASCII_CI=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DRAIN_INTENTS=y
+CONFIG_XFS_ONLINE_SCRUB=y
+CONFIG_XFS_ONLINE_SCRUB_STATS=y
+CONFIG_XFS_ONLINE_REPAIR=y
+CONFIG_XFS_DEBUG=y
+CONFIG_XFS_ASSERT_FATAL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_OCFS2_FS_O2CB=m
+CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
+CONFIG_OCFS2_FS_STATS=y
+CONFIG_OCFS2_DEBUG_MASKLOG=y
+# CONFIG_OCFS2_DEBUG_FS is not set
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set
+# CONFIG_BTRFS_DEBUG is not set
+# CONFIG_BTRFS_ASSERT is not set
+# CONFIG_BTRFS_FS_REF_VERIFY is not set
+CONFIG_NILFS2_FS=m
+CONFIG_F2FS_FS=m
+CONFIG_F2FS_STAT_FS=y
+CONFIG_F2FS_FS_XATTR=y
+CONFIG_F2FS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_CHECK_FS=y
+# CONFIG_F2FS_FAULT_INJECTION is not set
+CONFIG_F2FS_FS_COMPRESSION=y
+CONFIG_F2FS_FS_LZO=y
+CONFIG_F2FS_FS_LZORLE=y
+CONFIG_F2FS_FS_LZ4=y
+CONFIG_F2FS_FS_LZ4HC=y
+CONFIG_F2FS_FS_ZSTD=y
+CONFIG_F2FS_IOSTAT=y
+CONFIG_F2FS_UNFAIR_RWSEM=y
+# CONFIG_BCACHEFS_FS is not set
+CONFIG_ZONEFS_FS=m
+CONFIG_FS_POSIX_ACL=y
+CONFIG_EXPORTFS=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FILE_LOCKING=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_ENCRYPTION_ALGS=m
+CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_QUOTA_DEBUG is not set
+CONFIG_QUOTA_TREE=m
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=y
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_OVERLAY_FS_REDIRECT_DIR=y
+# CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
+CONFIG_OVERLAY_FS_INDEX=y
+CONFIG_OVERLAY_FS_XINO_AUTO=y
+CONFIG_OVERLAY_FS_METACOPY=y
+# CONFIG_OVERLAY_FS_DEBUG is not set
+
+#
+# Caches
+#
+CONFIG_NETFS_SUPPORT=m
+CONFIG_NETFS_STATS=y
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+# CONFIG_FSCACHE_DEBUG is not set
+CONFIG_CACHEFILES=m
+# CONFIG_CACHEFILES_DEBUG is not set
+# CONFIG_CACHEFILES_ERROR_INJECTION is not set
+CONFIG_CACHEFILES_ONDEMAND=y
+# end of Caches
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+# end of CD-ROM/DVD Filesystems
+
+#
+# DOS/FAT/EXFAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=936
+CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_FAT_DEFAULT_UTF8=y
+CONFIG_EXFAT_FS=m
+CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"
+# CONFIG_NTFS_FS is not set
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
+CONFIG_NTFS3_FS_POSIX_ACL=y
+# end of DOS/FAT/EXFAT/NT Filesystems
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_VMCORE=y
+# CONFIG_PROC_VMCORE_DEVICE_DUMP is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_PROC_CHILDREN=y
+CONFIG_KERNFS=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_INODE64=y
+# CONFIG_TMPFS_QUOTA is not set
+CONFIG_ARCH_SUPPORTS_HUGETLBFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
+# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
+CONFIG_CONFIGFS_FS=y
+CONFIG_EFIVAR_FS=y
+# end of Pseudo filesystems
+
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_ORANGEFS_FS=m
+# CONFIG_ADFS_FS is not set
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=m
+# CONFIG_ECRYPT_FS_MESSAGING is not set
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_JFFS2_FS_POSIX_ACL=y
+CONFIG_JFFS2_FS_SECURITY=y
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_RTIME=y
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_ZLIB=y
+CONFIG_UBIFS_FS_ZSTD=y
+# CONFIG_UBIFS_ATIME_SUPPORT is not set
+CONFIG_UBIFS_FS_XATTR=y
+CONFIG_UBIFS_FS_SECURITY=y
+# CONFIG_UBIFS_FS_AUTHENTICATION is not set
+CONFIG_CRAMFS=m
+CONFIG_CRAMFS_BLOCKDEV=y
+CONFIG_CRAMFS_MTD=y
+CONFIG_SQUASHFS=m
+# CONFIG_SQUASHFS_FILE_CACHE is not set
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_DECOMP_SINGLE=y
+CONFIG_SQUASHFS_DECOMP_MULTI=y
+CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS=y
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_ZLIB=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
+# CONFIG_SQUASHFS_EMBEDDED is not set
+CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
+# CONFIG_VXFS_FS is not set
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX6FS_FS is not set
+CONFIG_ROMFS_FS=m
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240
+CONFIG_PSTORE_COMPRESS=y
+# CONFIG_PSTORE_CONSOLE is not set
+# CONFIG_PSTORE_PMSG is not set
+CONFIG_PSTORE_RAM=m
+CONFIG_PSTORE_ZONE=m
+CONFIG_PSTORE_BLK=m
+CONFIG_PSTORE_BLK_BLKDEV=""
+CONFIG_PSTORE_BLK_KMSG_SIZE=64
+CONFIG_PSTORE_BLK_MAX_REASON=2
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_UFS_DEBUG is not set
+CONFIG_EROFS_FS=m
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS_XATTR=y
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_ZIP=y
+CONFIG_EROFS_FS_ZIP_LZMA=y
+# CONFIG_EROFS_FS_ZIP_DEFLATE is not set
+CONFIG_EROFS_FS_ONDEMAND=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
+CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
+CONFIG_NFS_V3=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_PNFS_FILE_LAYOUT=m
+CONFIG_PNFS_BLOCK=m
+CONFIG_PNFS_FLEXFILE_LAYOUT=m
+CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
+CONFIG_NFS_V4_1_MIGRATION=y
+CONFIG_NFS_V4_SECURITY_LABEL=y
+CONFIG_NFS_FSCACHE=y
+# CONFIG_NFS_USE_LEGACY_DNS is not set
+CONFIG_NFS_USE_KERNEL_DNS=y
+CONFIG_NFS_DEBUG=y
+CONFIG_NFS_DISABLE_UDP_SUPPORT=y
+CONFIG_NFS_V4_2_READ_PLUS=y
+CONFIG_NFSD=m
+# CONFIG_NFSD_V2 is not set
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_PNFS=y
+CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_NFSD_SCSILAYOUT=y
+# CONFIG_NFSD_FLEXFILELAYOUT is not set
+CONFIG_NFSD_V4_2_INTER_SSC=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_GRACE_PERIOD=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_NFS_V4_2_SSC_HELPER=y
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_SUNRPC_BACKCHANNEL=y
+CONFIG_SUNRPC_SWAP=y
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
+# CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA is not set
+# CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2 is not set
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_SUNRPC_XPRT_RDMA=m
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DEBUG=y
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
+# CONFIG_CIFS_SMB_DIRECT is not set
+CONFIG_CIFS_FSCACHE=y
+CONFIG_SMB_SERVER=m
+CONFIG_SMB_SERVER_SMBDIRECT=y
+CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN=y
+CONFIG_SMB_SERVER_KERBEROS5=y
+CONFIG_SMBFS=m
+CONFIG_CODA_FS=m
+CONFIG_AFS_FS=m
+# CONFIG_AFS_DEBUG is not set
+CONFIG_AFS_FSCACHE=y
+# CONFIG_AFS_DEBUG_CURSOR is not set
+CONFIG_9P_FS=m
+CONFIG_9P_FSCACHE=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_MAC_ROMAN=m
+CONFIG_NLS_MAC_CELTIC=m
+CONFIG_NLS_MAC_CENTEURO=m
+CONFIG_NLS_MAC_CROATIAN=m
+CONFIG_NLS_MAC_CYRILLIC=m
+CONFIG_NLS_MAC_GAELIC=m
+CONFIG_NLS_MAC_GREEK=m
+CONFIG_NLS_MAC_ICELAND=m
+CONFIG_NLS_MAC_INUIT=m
+CONFIG_NLS_MAC_ROMANIAN=m
+CONFIG_NLS_MAC_TURKISH=m
+CONFIG_NLS_UTF8=m
+CONFIG_NLS_UCS2_UTILS=m
+CONFIG_DLM=m
+CONFIG_DLM_DEBUG=y
+CONFIG_UNICODE=y
+# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
+CONFIG_IO_WQ=y
+# end of File systems
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+# CONFIG_KEYS_REQUEST_CACHE is not set
+# CONFIG_PERSISTENT_KEYRINGS is not set
+# CONFIG_TRUSTED_KEYS is not set
+# CONFIG_ENCRYPTED_KEYS is not set
+CONFIG_KEY_DH_OPERATIONS=y
+# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_NETWORK=y
+# CONFIG_SECURITY_INFINIBAND is not set
+# CONFIG_SECURITY_NETWORK_XFRM is not set
+CONFIG_SECURITY_PATH=y
+CONFIG_LSM_MMAP_MIN_ADDR=65536
+# CONFIG_HARDENED_USERCOPY is not set
+# CONFIG_FORTIFY_SOURCE is not set
+# CONFIG_STATIC_USERMODEHELPER is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
+CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
+# CONFIG_SECURITY_SELINUX_DEBUG is not set
+# CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+CONFIG_SECURITY_APPARMOR=y
+# CONFIG_SECURITY_APPARMOR_DEBUG is not set
+CONFIG_SECURITY_APPARMOR_INTROSPECT_POLICY=y
+CONFIG_SECURITY_APPARMOR_HASH=y
+CONFIG_SECURITY_APPARMOR_HASH_DEFAULT=y
+CONFIG_SECURITY_APPARMOR_EXPORT_BINARY=y
+CONFIG_SECURITY_APPARMOR_PARANOID_LOAD=y
+# CONFIG_SECURITY_LOADPIN is not set
+CONFIG_SECURITY_YAMA=y
+# CONFIG_SECURITY_SAFESETID is not set
+# CONFIG_SECURITY_LOCKDOWN_LSM is not set
+# CONFIG_SECURITY_LANDLOCK is not set
+CONFIG_INTEGRITY=y
+# CONFIG_INTEGRITY_SIGNATURE is not set
+CONFIG_INTEGRITY_AUDIT=y
+# CONFIG_IMA is not set
+# CONFIG_EVM is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_APPARMOR is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_LSM="landlock,lockdown,yama,loadpin,safesetid,integrity,bpf"
+
+#
+# Kernel hardening options
+#
+
+#
+# Memory initialization
+#
+CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
+CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
+CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
+# CONFIG_INIT_STACK_NONE is not set
+# CONFIG_INIT_STACK_ALL_PATTERN is not set
+CONFIG_INIT_STACK_ALL_ZERO=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
+CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y
+# CONFIG_ZERO_CALL_USED_REGS is not set
+# end of Memory initialization
+
+#
+# Hardening of kernel data structures
+#
+# CONFIG_LIST_HARDENED is not set
+# CONFIG_BUG_ON_DATA_CORRUPTION is not set
+# end of Hardening of kernel data structures
+
+CONFIG_RANDSTRUCT_NONE=y
+# CONFIG_RANDSTRUCT_FULL is not set
+# CONFIG_RANDSTRUCT_PERFORMANCE is not set
+# end of Kernel hardening options
+# end of Security options
+
+CONFIG_XOR_BLOCKS=m
+CONFIG_ASYNC_CORE=m
+CONFIG_ASYNC_MEMCPY=m
+CONFIG_ASYNC_XOR=m
+CONFIG_ASYNC_PQ=m
+CONFIG_ASYNC_RAID6_RECOV=m
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_SIG2=y
+CONFIG_CRYPTO_SKCIPHER=y
+CONFIG_CRYPTO_SKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_RNG_DEFAULT=y
+CONFIG_CRYPTO_AKCIPHER2=y
+CONFIG_CRYPTO_AKCIPHER=y
+CONFIG_CRYPTO_KPP2=y
+CONFIG_CRYPTO_KPP=y
+CONFIG_CRYPTO_ACOMP2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_NULL2=m
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_ENGINE=m
+# end of Crypto core or helper
+
+#
+# Public-key cryptography
+#
+CONFIG_CRYPTO_RSA=y
+CONFIG_CRYPTO_DH=y
+CONFIG_CRYPTO_DH_RFC7919_GROUPS=y
+CONFIG_CRYPTO_ECC=y
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=y
+CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+# end of Public-key cryptography
+
+#
+# Block ciphers
+#
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARIA=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_BLOWFISH_COMMON=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST_COMMON=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
+# end of Block ciphers
+
+#
+# Length-preserving ciphers and modes
+#
+CONFIG_CRYPTO_ADIANTUM=m
+# CONFIG_CRYPTO_ARC4 is not set
+CONFIG_CRYPTO_CHACHA20=m
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTR=y
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XCTR=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_NHPOLY1305=m
+# end of Length-preserving ciphers and modes
+
+#
+# AEAD (authenticated encryption with associated data) ciphers
+#
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_GENIV=m
+CONFIG_CRYPTO_SEQIV=m
+CONFIG_CRYPTO_ECHAINIV=m
+CONFIG_CRYPTO_ESSIV=m
+# end of AEAD (authenticated encryption with associated data) ciphers
+
+#
+# Hashes, digests, and MACs
+#
+CONFIG_CRYPTO_BLAKE2B=m
+CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_GHASH=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_POLYVAL=m
+CONFIG_CRYPTO_POLY1305=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_SHA3=y
+CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_STREEBOG=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_XXHASH=m
+# end of Hashes, digests, and MACs
+
+#
+# CRCs (cyclic redundancy checks)
+#
+CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_CRCT10DIF=y
+CONFIG_CRYPTO_CRC64_ROCKSOFT=y
+# end of CRCs (cyclic redundancy checks)
+
+#
+# Compression
+#
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_842=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ZSTD=y
+# end of Compression
+
+#
+# Random number generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DRBG_MENU=y
+CONFIG_CRYPTO_DRBG_HMAC=y
+# CONFIG_CRYPTO_DRBG_HASH is not set
+# CONFIG_CRYPTO_DRBG_CTR is not set
+CONFIG_CRYPTO_DRBG=y
+CONFIG_CRYPTO_JITTERENTROPY=y
+CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64
+CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
+CONFIG_CRYPTO_KDF800108_CTR=y
+# end of Random number generation
+
+#
+# Userspace interface
+#
+CONFIG_CRYPTO_USER_API=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set
+CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y
+# CONFIG_CRYPTO_STATS is not set
+# end of Userspace interface
+
+CONFIG_CRYPTO_HASH_INFO=y
+
+#
+# Accelerated Cryptographic Algorithms for CPU (loongarch)
+#
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
+# end of Accelerated Cryptographic Algorithms for CPU (loongarch)
+
+CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_ATMEL_ECC is not set
+# CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set
+# CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set
+# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
+# CONFIG_CRYPTO_DEV_QAT_C3XXX is not set
+# CONFIG_CRYPTO_DEV_QAT_C62X is not set
+# CONFIG_CRYPTO_DEV_QAT_4XXX is not set
+# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set
+# CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set
+# CONFIG_CRYPTO_DEV_QAT_C62XVF is not set
+# CONFIG_CRYPTO_DEV_CHELSIO is not set
+CONFIG_CRYPTO_DEV_VIRTIO=m
+# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
+# CONFIG_CRYPTO_DEV_CCREE is not set
+# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+# CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set
+CONFIG_PKCS7_MESSAGE_PARSER=y
+# CONFIG_PKCS7_TEST_KEY is not set
+# CONFIG_SIGNED_PE_FILE_VERIFICATION is not set
+# CONFIG_FIPS_SIGNATURE_SELFTEST is not set
+
+#
+# Certificates for signature checking
+#
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_TRUSTED_KEYS=""
+# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
+# CONFIG_SECONDARY_TRUSTED_KEYRING is not set
+# CONFIG_SYSTEM_BLACKLIST_KEYRING is not set
+# end of Certificates for signature checking
+
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_RAID6_PQ=m
+CONFIG_RAID6_PQ_BENCHMARK=y
+CONFIG_LINEAR_RANGES=y
+CONFIG_PACKING=y
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_STRNCPY_FROM_USER=y
+CONFIG_GENERIC_STRNLEN_USER=y
+CONFIG_GENERIC_NET_UTILS=y
+CONFIG_CORDIC=m
+# CONFIG_PRIME_NUMBERS is not set
+CONFIG_RATIONAL=y
+CONFIG_GENERIC_PCI_IOMAP=y
+CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
+
+#
+# Crypto library routines
+#
+CONFIG_CRYPTO_LIB_UTILS=y
+CONFIG_CRYPTO_LIB_AES=y
+CONFIG_CRYPTO_LIB_ARC4=m
+CONFIG_CRYPTO_LIB_GF128MUL=m
+CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
+CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
+CONFIG_CRYPTO_LIB_CHACHA=m
+CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_DES=m
+CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1
+CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
+CONFIG_CRYPTO_LIB_POLY1305=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
+CONFIG_CRYPTO_LIB_SHA1=y
+CONFIG_CRYPTO_LIB_SHA256=y
+# end of Crypto library routines
+
+CONFIG_CRC_CCITT=y
+CONFIG_CRC16=m
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC64_ROCKSOFT=y
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC32=y
+# CONFIG_CRC32_SELFTEST is not set
+CONFIG_CRC32_SLICEBY8=y
+# CONFIG_CRC32_SLICEBY4 is not set
+# CONFIG_CRC32_SARWATE is not set
+# CONFIG_CRC32_BIT is not set
+CONFIG_CRC64=y
+CONFIG_CRC4=m
+CONFIG_CRC7=m
+CONFIG_LIBCRC32C=m
+CONFIG_CRC8=m
+CONFIG_XXHASH=y
+CONFIG_AUDIT_GENERIC=y
+# CONFIG_RANDOM32_SELFTEST is not set
+CONFIG_842_COMPRESS=y
+CONFIG_842_DECOMPRESS=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_LZ4_COMPRESS=y
+CONFIG_LZ4HC_COMPRESS=m
+CONFIG_LZ4_DECOMPRESS=y
+CONFIG_ZSTD_COMMON=y
+CONFIG_ZSTD_COMPRESS=y
+CONFIG_ZSTD_DECOMPRESS=y
+CONFIG_XZ_DEC=y
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_XZ_DEC_MICROLZMA=y
+CONFIG_XZ_DEC_BCJ=y
+# CONFIG_XZ_DEC_TEST is not set
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_XZ=y
+CONFIG_DECOMPRESS_LZO=y
+CONFIG_DECOMPRESS_LZ4=y
+CONFIG_DECOMPRESS_ZSTD=y
+CONFIG_GENERIC_ALLOCATOR=y
+CONFIG_REED_SOLOMON=m
+CONFIG_REED_SOLOMON_ENC8=y
+CONFIG_REED_SOLOMON_DEC8=y
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_BTREE=y
+CONFIG_INTERVAL_TREE=y
+CONFIG_XARRAY_MULTI=y
+CONFIG_ASSOCIATIVE_ARRAY=y
+CONFIG_CLOSURES=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_IOPORT_MAP=y
+CONFIG_HAS_DMA=y
+CONFIG_NEED_DMA_MAP_STATE=y
+CONFIG_ARCH_DMA_ADDR_T_64BIT=y
+CONFIG_DMA_DECLARE_COHERENT=y
+CONFIG_SWIOTLB=y
+# CONFIG_SWIOTLB_DYNAMIC is not set
+# CONFIG_DMA_RESTRICTED_POOL is not set
+CONFIG_DMA_CMA=y
+CONFIG_DMA_NUMA_CMA=y
+
+#
+# Default contiguous memory area size:
+#
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CMA_SIZE_SEL_MBYTES=y
+# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set
+# CONFIG_CMA_SIZE_SEL_MIN is not set
+# CONFIG_CMA_SIZE_SEL_MAX is not set
+CONFIG_CMA_ALIGNMENT=8
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_DMA_MAP_BENCHMARK is not set
+CONFIG_SGL_ALLOC=y
+CONFIG_CHECK_SIGNATURE=y
+# CONFIG_FORCE_NR_CPUS is not set
+CONFIG_CPU_RMAP=y
+CONFIG_DQL=y
+CONFIG_GLOB=y
+# CONFIG_GLOB_SELFTEST is not set
+CONFIG_NLATTR=y
+CONFIG_LRU_CACHE=m
+CONFIG_CLZ_TAB=y
+CONFIG_IRQ_POLL=y
+CONFIG_MPILIB=y
+CONFIG_DIMLIB=y
+CONFIG_LIBFDT=y
+CONFIG_OID_REGISTRY=y
+CONFIG_UCS2_STRING=y
+CONFIG_HAVE_GENERIC_VDSO=y
+CONFIG_GENERIC_GETTIMEOFDAY=y
+CONFIG_GENERIC_VDSO_TIME_NS=y
+CONFIG_FONT_SUPPORT=y
+CONFIG_FONTS=y
+# CONFIG_FONT_8x8 is not set
+# CONFIG_FONT_8x16 is not set
+CONFIG_FONT_UTF8x16=y
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_7x14 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_6x10 is not set
+# CONFIG_FONT_10x18 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+CONFIG_FONT_TER16x32=y
+# CONFIG_FONT_6x8 is not set
+CONFIG_SG_POOL=y
+CONFIG_MEMREGION=y
+CONFIG_ARCH_STACKWALK=y
+CONFIG_STACKDEPOT=y
+CONFIG_SBITMAP=y
+# CONFIG_LWQ_TEST is not set
+# end of Library routines
+
+CONFIG_GENERIC_LIB_ASHLDI3=y
+CONFIG_GENERIC_LIB_ASHRDI3=y
+CONFIG_GENERIC_LIB_LSHRDI3=y
+CONFIG_GENERIC_LIB_CMPDI2=y
+CONFIG_GENERIC_LIB_UCMPDI2=y
+CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED=y
+CONFIG_FIRMWARE_TABLE=y
+
+#
+# Kernel hacking
+#
+
+#
+# printk and dmesg options
+#
+CONFIG_PRINTK_TIME=y
+# CONFIG_PRINTK_CALLER is not set
+CONFIG_STACKTRACE_BUILD_ID=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=4
+CONFIG_CONSOLE_LOGLEVEL_QUIET=1
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
+CONFIG_BOOT_PRINTK_DELAY=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DYNAMIC_DEBUG_CORE=y
+CONFIG_SYMBOLIC_ERRNAME=y
+CONFIG_DEBUG_BUGVERBOSE=y
+# end of printk and dmesg options
+
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MISC=y
+
+#
+# Compile-time checks and compiler options
+#
+CONFIG_AS_HAS_NON_CONST_LEB128=y
+CONFIG_DEBUG_INFO_NONE=y
+# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
+# CONFIG_DEBUG_INFO_DWARF4 is not set
+# CONFIG_DEBUG_INFO_DWARF5 is not set
+CONFIG_FRAME_WARN=2048
+CONFIG_STRIP_ASM_SYMS=y
+# CONFIG_READABLE_ASM is not set
+# CONFIG_HEADERS_INSTALL is not set
+# CONFIG_DEBUG_SECTION_MISMATCH is not set
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+# CONFIG_VMLINUX_MAP is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
+# end of Compile-time checks and compiler options
+
+#
+# Generic Kernel Debugging Instruments
+#
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
+CONFIG_MAGIC_SYSRQ_SERIAL=y
+CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE=""
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_FS_ALLOW_ALL=y
+# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set
+# CONFIG_DEBUG_FS_ALLOW_NONE is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_UBSAN is not set
+CONFIG_HAVE_KCSAN_COMPILER=y
+# end of Generic Kernel Debugging Instruments
+
+#
+# Networking Debugging
+#
+# CONFIG_NET_DEV_REFCNT_TRACKER is not set
+# CONFIG_NET_NS_REFCNT_TRACKER is not set
+# CONFIG_DEBUG_NET is not set
+# end of Networking Debugging
+
+#
+# Memory Debugging
+#
+# CONFIG_PAGE_EXTENSION is not set
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_PAGE_OWNER is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_DEBUG_KMEMLEAK=y
+# CONFIG_DEBUG_KMEMLEAK is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SHRINKER_DEBUG is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_SCHED_STACK_END_CHECK is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_PER_CPU_MAPS is not set
+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+CONFIG_HAVE_ARCH_KASAN=y
+CONFIG_ARCH_DISABLE_KASAN_INLINE=y
+CONFIG_CC_HAS_KASAN_GENERIC=y
+CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
+# CONFIG_KASAN is not set
+CONFIG_HAVE_ARCH_KFENCE=y
+# CONFIG_KFENCE is not set
+# end of Memory Debugging
+
+# CONFIG_DEBUG_SHIRQ is not set
+
+#
+# Debug Oops, Lockups and Hangs
+#
+# CONFIG_PANIC_ON_OOPS is not set
+CONFIG_PANIC_ON_OOPS_VALUE=0
+CONFIG_PANIC_TIMEOUT=0
+# CONFIG_SOFTLOCKUP_DETECTOR is not set
+CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
+# CONFIG_HARDLOCKUP_DETECTOR is not set
+# CONFIG_DETECT_HUNG_TASK is not set
+# CONFIG_WQ_WATCHDOG is not set
+# CONFIG_WQ_CPU_INTENSIVE_REPORT is not set
+# CONFIG_TEST_LOCKUP is not set
+# end of Debug Oops, Lockups and Hangs
+
+#
+# Scheduler Debugging
+#
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+# end of Scheduler Debugging
+
+# CONFIG_DEBUG_TIMEKEEPING is not set
+# CONFIG_DEBUG_PREEMPT is not set
+
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+CONFIG_LOCK_DEBUGGING_SUPPORT=y
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_DEBUG_ATOMIC_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_LOCK_TORTURE_TEST is not set
+# CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_SCF_TORTURE_TEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
+# end of Lock Debugging (spinlocks, mutexes, etc...)
+
+# CONFIG_DEBUG_IRQFLAGS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+# CONFIG_DEBUG_KOBJECT is not set
+
+#
+# Debug kernel data structures
+#
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_PLIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CLOSURES is not set
+# CONFIG_DEBUG_MAPLE_TREE is not set
+# end of Debug kernel data structures
+
+# CONFIG_DEBUG_CREDENTIALS is not set
+
+#
+# RCU Debugging
+#
+# CONFIG_RCU_SCALE_TEST is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_REF_SCALE_TEST is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0
+# CONFIG_RCU_CPU_STALL_CPUTIME is not set
+CONFIG_RCU_TRACE=y
+# CONFIG_RCU_EQS_DEBUG is not set
+# end of RCU Debugging
+
+# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
+# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
+# CONFIG_DEBUG_CGROUP_REF is not set
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_RETHOOK=y
+CONFIG_RETHOOK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_RETVAL=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
+CONFIG_HAVE_C_RECORDMCOUNT=y
+CONFIG_TRACE_CLOCK=y
+CONFIG_TRACING_SUPPORT=y
+# CONFIG_FTRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
+CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
+# CONFIG_STRICT_DEVMEM is not set
+
+#
+# loongarch Debugging
+#
+# CONFIG_UNWINDER_GUESS is not set
+CONFIG_UNWINDER_PROLOGUE=y
+# end of loongarch Debugging
+
+#
+# Kernel Testing and Coverage
+#
+# CONFIG_KUNIT is not set
+# CONFIG_NOTIFIER_ERROR_INJECTION is not set
+# CONFIG_FUNCTION_ERROR_INJECTION is not set
+# CONFIG_FAULT_INJECTION is not set
+CONFIG_ARCH_HAS_KCOV=y
+CONFIG_CC_HAS_SANCOV_TRACE_PC=y
+# CONFIG_KCOV is not set
+CONFIG_RUNTIME_TESTING_MENU=y
+# CONFIG_TEST_DHRY is not set
+# CONFIG_LKDTM is not set
+# CONFIG_TEST_MIN_HEAP is not set
+# CONFIG_TEST_DIV64 is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_TEST_REF_TRACKER is not set
+# CONFIG_RBTREE_TEST is not set
+# CONFIG_REED_SOLOMON_TEST is not set
+# CONFIG_INTERVAL_TREE_TEST is not set
+# CONFIG_PERCPU_TEST is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
+# CONFIG_ASYNC_RAID6_TEST is not set
+# CONFIG_TEST_HEXDUMP is not set
+# CONFIG_STRING_SELFTEST is not set
+# CONFIG_TEST_STRING_HELPERS is not set
+# CONFIG_TEST_KSTRTOX is not set
+# CONFIG_TEST_PRINTF is not set
+# CONFIG_TEST_SCANF is not set
+# CONFIG_TEST_BITMAP is not set
+# CONFIG_TEST_UUID is not set
+# CONFIG_TEST_XARRAY is not set
+# CONFIG_TEST_MAPLE_TREE is not set
+# CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_IDA is not set
+# CONFIG_TEST_LKM is not set
+# CONFIG_TEST_BITOPS is not set
+# CONFIG_TEST_VMALLOC is not set
+# CONFIG_TEST_USER_COPY is not set
+# CONFIG_TEST_BPF is not set
+# CONFIG_TEST_BLACKHOLE_DEV is not set
+# CONFIG_FIND_BIT_BENCHMARK is not set
+# CONFIG_TEST_FIRMWARE is not set
+# CONFIG_TEST_SYSCTL is not set
+# CONFIG_TEST_UDELAY is not set
+# CONFIG_TEST_STATIC_KEYS is not set
+# CONFIG_TEST_DYNAMIC_DEBUG is not set
+# CONFIG_TEST_KMOD is not set
+# CONFIG_TEST_MEMCAT_P is not set
+# CONFIG_TEST_MEMINIT is not set
+# CONFIG_TEST_FREE_PAGES is not set
+# CONFIG_TEST_OBJPOOL is not set
+# end of Kernel Testing and Coverage
+
+#
+# Rust hacking
+#
+# end of Rust hacking
+# end of Kernel hacking
diff --git a/linux-hardened/remove_shm_align_mask.diff b/linux-hardened/remove_shm_align_mask.diff
new file mode 100644
index 0000000000..28986b3968
--- /dev/null
+++ b/linux-hardened/remove_shm_align_mask.diff
@@ -0,0 +1,67 @@
+commit ad3ff105611b9b06e16ae57e97b48916ff93dd46
+Author: Huacai Chen <chenhuacai@kernel.org>
+Date:   Wed Sep 6 22:53:09 2023 +0800
+
+    LoongArch: Remove shm_align_mask and use SHMLBA instead
+    
+    Both shm_align_mask and SHMLBA want to avoid cache alias. But they are
+    inconsistent: shm_align_mask is (PAGE_SIZE - 1) while SHMLBA is SZ_64K,
+    but PAGE_SIZE is not always equal to SZ_64K.
+    
+    This may cause problems when shmat() twice. Fix this problem by removing
+    shm_align_mask and using SHMLBA (strictly SHMLBA - 1) instead.
+    
+    Reported-by: Jiantao Shan <shanjiantao@loongson.cn>
+    Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+
+diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c
+index 72685a48eaf0..6be04d36ca07 100644
+--- a/arch/loongarch/mm/cache.c
++++ b/arch/loongarch/mm/cache.c
+@@ -156,7 +156,6 @@ void cpu_cache_init(void)
+ 
+ 	current_cpu_data.cache_leaves_present = leaf;
+ 	current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
+-	shm_align_mask = PAGE_SIZE - 1;
+ }
+ 
+ static const pgprot_t protection_map[16] = {
+diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
+index fbe1a4856fc4..a9630a81b38a 100644
+--- a/arch/loongarch/mm/mmap.c
++++ b/arch/loongarch/mm/mmap.c
+@@ -8,12 +8,11 @@
+ #include <linux/mm.h>
+ #include <linux/mman.h>
+ 
+-unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+-EXPORT_SYMBOL(shm_align_mask);
++#define SHM_ALIGN_MASK	(SHMLBA - 1)
+ 
+-#define COLOUR_ALIGN(addr, pgoff)				\
+-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
+-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
++#define COLOUR_ALIGN(addr, pgoff)			\
++	((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)	\
++	 + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
+ 
+ enum mmap_allocation_direction {UP, DOWN};
+ 
+@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ 		 * cache aliasing constraints.
+ 		 */
+ 		if ((flags & MAP_SHARED) &&
+-		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
++		    ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
+ 			return -EINVAL;
+ 		return addr;
+ 	}
+@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ 	}
+ 
+ 	info.length = len;
+-	info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
++	info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
+ 	info.align_offset = pgoff << PAGE_SHIFT;
+ 
+ 	if (dir == DOWN) {
diff --git a/linux-tools/PKGBUILD b/linux-tools/PKGBUILD
index aeeb369c7d..47421ceb24 100644
--- a/linux-tools/PKGBUILD
+++ b/linux-tools/PKGBUILD
@@ -10,9 +10,9 @@ pkgname=(
   'linux-tools-meta'
   'perf'
   'tmon'
-  'turbostat'
+#  'turbostat'
   'usbip'
-  'x86_energy_perf_policy'
+#  'x86_energy_perf_policy'
 )
 pkgver=6.7
 pkgrel=1
@@ -64,6 +64,7 @@ sha256sums=('SKIP'
             '2d5e2f8d40b6f19bf2e1dead57ca105d72098fb0b418c09ff2e0cb91089710af')
 
 prepare() {
+  ln -sf linux-$pkgver linux
   cd linux
 
   # apply patch from the source array (should be a pacman feature)
@@ -103,10 +104,10 @@ build() {
   make VERSION=$pkgver-$pkgrel
   popd
 
-  echo ':: x86_energy_perf_policy'
-  pushd linux/tools/power/x86/x86_energy_perf_policy
-  make
-  popd
+#  echo ':: x86_energy_perf_policy'
+#  pushd linux/tools/power/x86/x86_energy_perf_policy
+#  make
+#  popd
 
   echo ':: usbip'
   pushd linux/tools/usb/usbip
@@ -127,10 +128,10 @@ build() {
   make
   popd
 
-  echo ':: turbostat'
-  pushd linux/tools/power/x86/turbostat
-  make
-  popd
+#  echo ':: turbostat'
+#  pushd linux/tools/power/x86/turbostat
+#  make
+#  popd
 
   echo ':: hv'
   pushd linux/tools/hv
@@ -163,9 +164,9 @@ package_linux-tools-meta() {
     'hyperv'
     'perf'
     'tmon'
-    'turbostat'
+#    'turbostat'
     'usbip'
-    'x86_energy_perf_policy'
+#    'x86_energy_perf_policy'
   )
   conflicts=(
     'acpidump'
diff --git a/liteide/PKGBUILD b/liteide/PKGBUILD
index b7319aebe0..e342078f8e 100644
--- a/liteide/PKGBUILD
+++ b/liteide/PKGBUILD
@@ -15,13 +15,16 @@ depends=(go-tools qt5-base)
 optdepends=('go: go compiler'
             'gcc-go: go compiler')
 options=(!strip !emptydirs)
-source=("$pkgname-x$pkgver::git+$url#commit=35a0dcd957d8bdcc189089a9c027bc54b8aa8cde") # tag: x38.3
-b2sums=('SKIP')
+source=("$pkgname-x$pkgver::git+$url#commit=35a0dcd957d8bdcc189089a9c027bc54b8aa8cde"
+    liteide-fix-build.patch)
+b2sums=('SKIP'
+        '03d9ff6614a800e9d4fb5a6e05a1b8a92e6e4fb27342b237a6c297dadcd90c39b5299da99bee0ee639630947de67c521fe72c10419cc06c3f94eda0a546f84a7')
 
 prepare() {
   cd $pkgname-x$pkgver
 
   chmod +x build/*_*.sh
+  patch -p1 -i $srcdir/liteide-fix-build.patch
 
   # Fix for FS#4662 (until fixed by upstream)
   sed -i 's|^GOROOT|#GOROOT|g' liteidex/os_deploy/linux/liteenv/linux{32,64}.env
diff --git a/liteide/liteide-fix-build.patch b/liteide/liteide-fix-build.patch
new file mode 100644
index 0000000000..c2ab73ddbe
--- /dev/null
+++ b/liteide/liteide-fix-build.patch
@@ -0,0 +1,11 @@
+--- a/build/update_pkg.sh	2024-01-10 14:06:25.000000000 +0800
++++ b/build/update_pkg.sh	2024-01-10 14:31:06.474082272 +0800
+@@ -25,7 +25,7 @@
+ echo install gocode ...
+ go install -v github.com/visualfc/gocode@latest
+ echo install gotools ...
+-go install -v github.com/visualfc/gotools@latest
++go install -v github.com/visualfc/gotools@master
+ echo install gomodifytags ...
+ go install -v github.com/fatih/gomodifytags@latest
+ 
diff --git a/lld/PKGBUILD b/lld/PKGBUILD
index da321dab0e..783b88185c 100644
--- a/lld/PKGBUILD
+++ b/lld/PKGBUILD
@@ -15,7 +15,8 @@ _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkg
 source=($_source_base/lld-$pkgver.src.tar.xz{,.sig}
         $_source_base/llvm-$pkgver.src.tar.xz{,.sig}
         $_source_base/libunwind-$pkgver.src.tar.xz{,.sig}
-        $_source_base/cmake-$pkgver.src.tar.xz{,.sig})
+        $_source_base/cmake-$pkgver.src.tar.xz{,.sig}
+        lld-la64.patch)
 sha256sums=('a127e334dd267f2e20d5a0c6b15aa9651f3fbbdfe3dc7d2573c617fad1155fcb'
             'SKIP'
             'e91db44d1b3bb1c33fcea9a7d1f2423b883eaa9163d3d56ca2aa6d2f0711bc29'
@@ -23,16 +24,18 @@ sha256sums=('a127e334dd267f2e20d5a0c6b15aa9651f3fbbdfe3dc7d2573c617fad1155fcb'
             '7e04070aee07e43ecb5f2b321a7cc64671202af3bcf15324bb1e134cdb7b2b72'
             'SKIP'
             '39d342a4161095d2f28fb1253e4585978ac50521117da666e2b1f6f28b62f514'
-            'SKIP')
+            'SKIP'
+            '2c99101e69601f493ddf4e52f6e67d98984dae7998ee40dd7cd4adba972065ce')
 validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A'  # Tom Stellard <tstellar@redhat.com>
               'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta <tobias@hieta.se>
 
 prepare() {
   # https://bugs.llvm.org/show_bug.cgi?id=49228
   mv libunwind{-$pkgver.src,}
-
   mv cmake{-$pkgver.src,}
+
   cd lld-$pkgver.src
+  patch -p2 -i $srcdir/lld-la64.patch
   mkdir build
 }
 
diff --git a/lld/lld-la64.patch b/lld/lld-la64.patch
new file mode 100644
index 0000000000..de69479023
--- /dev/null
+++ b/lld/lld-la64.patch
@@ -0,0 +1,2569 @@
+diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
+new file mode 100644
+index 000000000000..9dc99e573d41
+--- /dev/null
++++ b/lld/ELF/Arch/LoongArch.cpp
+@@ -0,0 +1,687 @@
++//===- LoongArch.cpp ------------------------------------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "InputFiles.h"
++#include "OutputSections.h"
++#include "Symbols.h"
++#include "SyntheticSections.h"
++#include "Target.h"
++
++using namespace llvm;
++using namespace llvm::object;
++using namespace llvm::support::endian;
++using namespace llvm::ELF;
++using namespace lld;
++using namespace lld::elf;
++
++namespace {
++class LoongArch final : public TargetInfo {
++public:
++  LoongArch();
++  uint32_t calcEFlags() const override;
++  int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
++  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
++  void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
++  void writePltHeader(uint8_t *buf) const override;
++  void writePlt(uint8_t *buf, const Symbol &sym,
++                uint64_t pltEntryAddr) const override;
++  RelType getDynRel(RelType type) const override;
++  RelExpr getRelExpr(RelType type, const Symbol &s,
++                     const uint8_t *loc) const override;
++  bool usesOnlyLowPageBits(RelType type) const override;
++  void relocate(uint8_t *loc, const Relocation &rel,
++                uint64_t val) const override;
++};
++} // end anonymous namespace
++
++enum Op {
++  SUB_W = 0x00110000,
++  SUB_D = 0x00118000,
++  BREAK = 0x002a0000,
++  SRLI_W = 0x00448000,
++  SRLI_D = 0x00450000,
++  ADDI_W = 0x02800000,
++  ADDI_D = 0x02c00000,
++  ANDI = 0x03400000,
++  PCADDU12I = 0x1c000000,
++  LD_W = 0x28800000,
++  LD_D = 0x28c00000,
++  JIRL = 0x4c000000,
++};
++
++enum Reg {
++  R_ZERO = 0,
++  R_RA = 1,
++  R_TP = 2,
++  R_T0 = 12,
++  R_T1 = 13,
++  R_T2 = 14,
++  R_T3 = 15,
++};
++
++// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
++// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
++// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
++// "page") for the next instruction to add in the "page offset". (`pcalau12i`
++// stands for something like "PC ALigned Add Upper that starts from the 12th
++// bit, Immediate".)
++//
++// Here a "page" is in fact just another way to refer to the 12-bit range
++// allowed by the immediate field of the addi/ld/st instructions, and not
++// related to the system or the kernel's actual page size. The sematics happens
++// to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
++static uint64_t getLoongArchPage(uint64_t p) {
++  return p & ~static_cast<uint64_t>(0xfff);
++}
++
++static uint32_t lo12(uint32_t val) { return val & 0xfff; }
++
++// Calculate the adjusted page delta between dest and PC.
++uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) {
++  // Consider the large code model access pattern, of which the smaller code
++  // models' access patterns are a subset:
++  //
++  //     pcalau12i       U, %foo_hi20(sym)        ; b in [-0x80000, 0x7ffff]
++  //     addi.d          T, zero, %foo_lo12(sym)  ; a in [-0x800, 0x7ff]
++  //     lu32i.d         T, %foo64_lo20(sym)      ; c in [-0x80000, 0x7ffff]
++  //     lu52i.d         T, T, %foo64_hi12(sym)   ; d in [-0x800, 0x7ff]
++  //     {ldx,stx,add}.* dest, U, T
++  //
++  // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA,
++  // with RQ, P, ZY, X and A representing the respective bitfields as unsigned
++  // integers. We have:
++  //
++  //     page(dest) = 0xZZZ'YYYYY'XXXXX'000
++  //     - page(pc) = 0xRRR'QQQQQ'PPPPP'000
++  //     ----------------------------------
++  //                  0xddd'ccccc'bbbbb'000
++  //
++  // Now consider the above pattern's actual effects:
++  //
++  //     page(pc)                     0xRRR'QQQQQ'PPPPP'000
++  //     pcalau12i                  + 0xiii'iiiii'bbbbb'000
++  //     addi                       + 0xjjj'jjjjj'kkkkk'AAA
++  //     lu32i.d & lu52i.d          + 0xddd'ccccc'00000'000
++  //     --------------------------------------------------
++  //     dest = U + T
++  //          = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32))
++  //          = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A
++  //          = (ZY<<32)                + (X<<12)           + A
++  //
++  //     ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k
++  //     cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k
++  //
++  // where i and k are terms representing the effect of b's and A's sign
++  // extension respectively.
++  //
++  //     i = signed b < 0 ? -0x10000'0000 : 0
++  //     k = signed A < 0 ? -0x1000 : 0
++  //
++  // The j term is a bit complex: it represents the higher half of
++  // sign-extended bits from A that are effectively lost if i == 0 but k != 0,
++  // due to overwriting by lu32i.d & lu52i.d.
++  //
++  //     j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0
++  //
++  // The actual effect of the instruction sequence before the final addition,
++  // i.e. our desired result value, is thus:
++  //
++  //     result = (cd<<32) + (b<<12)
++  //            = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k
++  //            = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k
++  //            = page(dest) - page(pc) - i - j - k
++  //
++  // when signed A >= 0 && signed b >= 0:
++  //
++  //     i = j = k = 0
++  //     result = page(dest) - page(pc)
++  //
++  // when signed A >= 0 && signed b < 0:
++  //
++  //     i = -0x10000'0000, j = k = 0
++  //     result = page(dest) - page(pc) + 0x10000'0000
++  //
++  // when signed A < 0 && signed b >= 0:
++  //
++  //     i = 0, j = 0x10000'0000, k = -0x1000
++  //     result = page(dest) - page(pc) - 0x10000'0000 + 0x1000
++  //
++  // when signed A < 0 && signed b < 0:
++  //
++  //     i = -0x10000'0000, j = 0, k = -0x1000
++  //     result = page(dest) - page(pc) + 0x1000
++  uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc);
++  bool negativeA = lo12(dest) > 0x7ff;
++  bool negativeB = (result & 0x8000'0000) != 0;
++
++  if (negativeA)
++    result += 0x1000;
++  if (negativeA && !negativeB)
++    result -= 0x10000'0000;
++  else if (!negativeA && negativeB)
++    result += 0x10000'0000;
++
++  return result;
++}
++
++static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
++
++static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
++  return op | d | (j << 5) | (k << 10);
++}
++
++// Extract bits v[begin:end], where range is inclusive.
++static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
++  return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
++}
++
++static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
++  uint32_t immLo = extractBits(imm, 15, 0);
++  uint32_t immHi = extractBits(imm, 20, 16);
++  return (insn & 0xfc0003e0) | (immLo << 10) | immHi;
++}
++
++static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
++  uint32_t immLo = extractBits(imm, 15, 0);
++  uint32_t immHi = extractBits(imm, 25, 16);
++  return (insn & 0xfc000000) | (immLo << 10) | immHi;
++}
++
++static uint32_t setJ20(uint32_t insn, uint32_t imm) {
++  return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
++}
++
++static uint32_t setK12(uint32_t insn, uint32_t imm) {
++  return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
++}
++
++static uint32_t setK16(uint32_t insn, uint32_t imm) {
++  return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);
++}
++
++static bool isJirl(uint32_t insn) {
++  return (insn & 0xfc000000) == JIRL;
++}
++
++LoongArch::LoongArch() {
++  // The LoongArch ISA itself does not have a limit on page sizes. According to
++  // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
++  // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
++  // "unlimited".
++  // However, practically the maximum usable page size is constrained by the
++  // kernel implementation, and 64KiB is the biggest non-huge page size
++  // supported by Linux as of v6.4. The most widespread page size in use,
++  // though, is 16KiB.
++  defaultCommonPageSize = 16384;
++  defaultMaxPageSize = 65536;
++  write32le(trapInstr.data(), BREAK); // break 0
++
++  copyRel = R_LARCH_COPY;
++  pltRel = R_LARCH_JUMP_SLOT;
++  relativeRel = R_LARCH_RELATIVE;
++  iRelativeRel = R_LARCH_IRELATIVE;
++
++  if (config->is64) {
++    symbolicRel = R_LARCH_64;
++    tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
++    tlsOffsetRel = R_LARCH_TLS_DTPREL64;
++    tlsGotRel = R_LARCH_TLS_TPREL64;
++  } else {
++    symbolicRel = R_LARCH_32;
++    tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
++    tlsOffsetRel = R_LARCH_TLS_DTPREL32;
++    tlsGotRel = R_LARCH_TLS_TPREL32;
++  }
++
++  gotRel = symbolicRel;
++
++  // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
++  gotPltHeaderEntriesNum = 2;
++
++  pltHeaderSize = 32;
++  pltEntrySize = 16;
++  ipltEntrySize = 16;
++}
++
++static uint32_t getEFlags(const InputFile *f) {
++  if (config->is64)
++    return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
++  return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
++}
++
++static bool inputFileHasCode(const InputFile *f) {
++  for (const auto *sec : f->getSections())
++    if (sec && sec->flags & SHF_EXECINSTR)
++      return true;
++
++  return false;
++}
++
++uint32_t LoongArch::calcEFlags() const {
++  // If there are only binary input files (from -b binary), use a
++  // value of 0 for the ELF header flags.
++  if (ctx.objectFiles.empty())
++    return 0;
++
++  uint32_t target = 0;
++  const InputFile *targetFile;
++  for (const InputFile *f : ctx.objectFiles) {
++    // Do not enforce ABI compatibility if the input file does not contain code.
++    // This is useful for allowing linkage with data-only object files produced
++    // with tools like objcopy, that have zero e_flags.
++    if (!inputFileHasCode(f))
++      continue;
++
++    // Take the first non-zero e_flags as the reference.
++    uint32_t flags = getEFlags(f);
++    if (target == 0 && flags != 0) {
++      target = flags;
++      targetFile = f;
++    }
++
++    if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
++        (target & EF_LOONGARCH_ABI_MODIFIER_MASK))
++      error(toString(f) +
++            ": cannot link object files with different ABI from " +
++            toString(targetFile));
++
++    // We cannot process psABI v1.x / object ABI v0 files (containing stack
++    // relocations), unlike ld.bfd.
++    //
++    // Instead of blindly accepting every v0 object and only failing at
++    // relocation processing time, just disallow interlink altogether. We
++    // don't expect significant usage of object ABI v0 in the wild (the old
++    // world may continue using object ABI v0 for a while, but as it's not
++    // binary-compatible with the upstream i.e. new-world ecosystem, it's not
++    // being considered here).
++    //
++    // There are briefly some new-world systems with object ABI v0 binaries too.
++    // It is because these systems were built before the new ABI was finalized.
++    // These are not supported either due to the extremely small number of them,
++    // and the few impacted users are advised to simply rebuild world or
++    // reinstall a recent system.
++    if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
++      error(toString(f) + ": unsupported object file ABI version");
++  }
++
++  return target;
++}
++
++int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {
++  switch (type) {
++  default:
++    internalLinkerError(getErrorLocation(buf),
++                        "cannot read addend for relocation " + toString(type));
++    return 0;
++  case R_LARCH_32:
++  case R_LARCH_TLS_DTPMOD32:
++  case R_LARCH_TLS_DTPREL32:
++  case R_LARCH_TLS_TPREL32:
++    return SignExtend64<32>(read32le(buf));
++  case R_LARCH_64:
++  case R_LARCH_TLS_DTPMOD64:
++  case R_LARCH_TLS_DTPREL64:
++  case R_LARCH_TLS_TPREL64:
++    return read64le(buf);
++  case R_LARCH_RELATIVE:
++  case R_LARCH_IRELATIVE:
++    return config->is64 ? read64le(buf) : read32le(buf);
++  case R_LARCH_NONE:
++  case R_LARCH_JUMP_SLOT:
++    // These relocations are defined as not having an implicit addend.
++    return 0;
++  }
++}
++
++void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {
++  if (config->is64)
++    write64le(buf, in.plt->getVA());
++  else
++    write32le(buf, in.plt->getVA());
++}
++
++void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
++  if (config->writeAddends) {
++    if (config->is64)
++      write64le(buf, s.getVA());
++    else
++      write32le(buf, s.getVA());
++  }
++}
++
++void LoongArch::writePltHeader(uint8_t *buf) const {
++  // The LoongArch PLT is currently structured just like that of RISCV.
++  // Annoyingly, this means the PLT is still using `pcaddu12i` to perform
++  // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
++  // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
++  // is used everywhere else involving PC-relative operations in the LoongArch
++  // ELF psABI v2.00.
++  //
++  // The `pcrel_{hi20,lo12}` operators are illustrative only and not really
++  // supported by LoongArch assemblers.
++  //
++  //   pcaddu12i $t2, %pcrel_hi20(.got.plt)
++  //   sub.[wd]  $t1, $t1, $t3
++  //   ld.[wd]   $t3, $t2, %pcrel_lo12(.got.plt)  ; t3 = _dl_runtime_resolve
++  //   addi.[wd] $t1, $t1, -pltHeaderSize-12      ; t1 = &.plt[i] - &.plt[0]
++  //   addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
++  //   srli.[wd] $t1, $t1, (is64?1:2)             ; t1 = &.got.plt[i] - &.got.plt[0]
++  //   ld.[wd]   $t0, $t0, Wordsize               ; t0 = link_map
++  //   jr        $t3
++  uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();
++  uint32_t sub = config->is64 ? SUB_D : SUB_W;
++  uint32_t ld = config->is64 ? LD_D : LD_W;
++  uint32_t addi = config->is64 ? ADDI_D : ADDI_W;
++  uint32_t srli = config->is64 ? SRLI_D : SRLI_W;
++  write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));
++  write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));
++  write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));
++  write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));
++  write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));
++  write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));
++  write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));
++  write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));
++}
++
++void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,
++                     uint64_t pltEntryAddr) const {
++  // See the comment in writePltHeader for reason why pcaddu12i is used instead
++  // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
++  //
++  //   pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
++  //   ld.[wd]   $t3, $t3, %pcrel_lo12(f@.got.plt)
++  //   jirl      $t1, $t3, 0
++  //   nop
++  uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
++  write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));
++  write32le(buf + 4,
++            insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));
++  write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));
++  write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));
++}
++
++RelType LoongArch::getDynRel(RelType type) const {
++  return type == target->symbolicRel ? type
++                                     : static_cast<RelType>(R_LARCH_NONE);
++}
++
++RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
++                              const uint8_t *loc) const {
++  switch (type) {
++  case R_LARCH_NONE:
++  case R_LARCH_MARK_LA:
++  case R_LARCH_MARK_PCREL:
++    return R_NONE;
++  case R_LARCH_32:
++  case R_LARCH_64:
++  case R_LARCH_ABS_HI20:
++  case R_LARCH_ABS_LO12:
++  case R_LARCH_ABS64_LO20:
++  case R_LARCH_ABS64_HI12:
++    return R_ABS;
++  case R_LARCH_PCALA_LO12:
++    // We could just R_ABS, but the JIRL instruction reuses the relocation type
++    // for a different purpose. The questionable usage is part of glibc 2.37
++    // libc_nonshared.a [1], which is linked into user programs, so we have to
++    // work around it for a while, even if a new relocation type may be
++    // introduced in the future [2].
++    //
++    // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
++    // [2]: https://github.com/loongson/la-abi-specs/pull/3
++    return isJirl(read32le(loc)) ? R_PLT : R_ABS;
++  case R_LARCH_TLS_DTPREL32:
++  case R_LARCH_TLS_DTPREL64:
++    return R_DTPREL;
++  case R_LARCH_TLS_TPREL32:
++  case R_LARCH_TLS_TPREL64:
++  case R_LARCH_TLS_LE_HI20:
++  case R_LARCH_TLS_LE_LO12:
++  case R_LARCH_TLS_LE64_LO20:
++  case R_LARCH_TLS_LE64_HI12:
++    return R_TPREL;
++  case R_LARCH_ADD8:
++  case R_LARCH_ADD16:
++  case R_LARCH_ADD32:
++  case R_LARCH_ADD64:
++  case R_LARCH_SUB8:
++  case R_LARCH_SUB16:
++  case R_LARCH_SUB32:
++  case R_LARCH_SUB64:
++    // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
++    // the RelExpr to avoid code duplication.
++    return R_RISCV_ADD;
++  case R_LARCH_32_PCREL:
++  case R_LARCH_64_PCREL:
++    return R_PC;
++  case R_LARCH_B16:
++  case R_LARCH_B21:
++  case R_LARCH_B26:
++    return R_PLT_PC;
++  case R_LARCH_GOT_PC_HI20:
++  case R_LARCH_GOT64_PC_LO20:
++  case R_LARCH_GOT64_PC_HI12:
++  case R_LARCH_TLS_IE_PC_HI20:
++  case R_LARCH_TLS_IE64_PC_LO20:
++  case R_LARCH_TLS_IE64_PC_HI12:
++    return R_LOONGARCH_GOT_PAGE_PC;
++  case R_LARCH_GOT_PC_LO12:
++  case R_LARCH_TLS_IE_PC_LO12:
++    return R_LOONGARCH_GOT;
++  case R_LARCH_TLS_LD_PC_HI20:
++  case R_LARCH_TLS_GD_PC_HI20:
++    return R_LOONGARCH_TLSGD_PAGE_PC;
++  case R_LARCH_PCALA_HI20:
++    // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
++    // anyway so why waste time checking only to get everything relaxed back to
++    // it?
++    //
++    // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
++    // both the HI20 and LO12 to potentially refer to the PLT. But in reality
++    // the HI20 reloc appears earlier, and the relocs don't contain enough
++    // information to let us properly resolve semantics per symbol.
++    // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
++    // relocs, hence it is nearly impossible to 100% accurately determine each
++    // HI20's "flavor" without taking big performance hits, in the presence of
++    // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
++    // apart that relationship is not certain anymore), and programmer mistakes
++    // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
++    //
++    // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
++    // every HI20 reloc referring to the same symbol differently; this is not
++    // feasible with the current function signature of getRelExpr that doesn't
++    // allow for such inter-pass state.
++    //
++    // So, unfortunately we have to again workaround this quirk the same way as
++    // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
++    // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
++    // stage.
++    return R_LOONGARCH_PLT_PAGE_PC;
++  case R_LARCH_PCALA64_LO20:
++  case R_LARCH_PCALA64_HI12:
++    return R_LOONGARCH_PAGE_PC;
++  case R_LARCH_GOT_HI20:
++  case R_LARCH_GOT_LO12:
++  case R_LARCH_GOT64_LO20:
++  case R_LARCH_GOT64_HI12:
++  case R_LARCH_TLS_IE_HI20:
++  case R_LARCH_TLS_IE_LO12:
++  case R_LARCH_TLS_IE64_LO20:
++  case R_LARCH_TLS_IE64_HI12:
++    return R_GOT;
++  case R_LARCH_TLS_LD_HI20:
++    return R_TLSLD_GOT;
++  case R_LARCH_TLS_GD_HI20:
++    return R_TLSGD_GOT;
++  case R_LARCH_RELAX:
++    // LoongArch linker relaxation is not implemented yet.
++    return R_NONE;
++
++  // Other known relocs that are explicitly unimplemented:
++  //
++  // - psABI v1 relocs that need a stateful stack machine to work, and not
++  //   required when implementing psABI v2;
++  // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
++  //   two GNU vtable-related relocs).
++  //
++  // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
++  default:
++    error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
++          ") against symbol " + toString(s));
++    return R_NONE;
++  }
++}
++
++bool LoongArch::usesOnlyLowPageBits(RelType type) const {
++  switch (type) {
++  default:
++    return false;
++  case R_LARCH_PCALA_LO12:
++  case R_LARCH_GOT_LO12:
++  case R_LARCH_GOT_PC_LO12:
++  case R_LARCH_TLS_IE_PC_LO12:
++    return true;
++  }
++}
++
++void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
++                         uint64_t val) const {
++  switch (rel.type) {
++  case R_LARCH_32_PCREL:
++    checkInt(loc, val, 32, rel);
++    [[fallthrough]];
++  case R_LARCH_32:
++  case R_LARCH_TLS_DTPREL32:
++    write32le(loc, val);
++    return;
++  case R_LARCH_64:
++  case R_LARCH_TLS_DTPREL64:
++  case R_LARCH_64_PCREL:
++    write64le(loc, val);
++    return;
++
++  case R_LARCH_B16:
++    checkInt(loc, val, 18, rel);
++    checkAlignment(loc, val, 4, rel);
++    write32le(loc, setK16(read32le(loc), val >> 2));
++    return;
++
++  case R_LARCH_B21:
++    checkInt(loc, val, 23, rel);
++    checkAlignment(loc, val, 4, rel);
++    write32le(loc, setD5k16(read32le(loc), val >> 2));
++    return;
++
++  case R_LARCH_B26:
++    checkInt(loc, val, 28, rel);
++    checkAlignment(loc, val, 4, rel);
++    write32le(loc, setD10k16(read32le(loc), val >> 2));
++    return;
++
++  // Relocs intended for `addi`, `ld` or `st`.
++  case R_LARCH_PCALA_LO12:
++    // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
++    // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
++    // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
++    // its immediate slot width is different too (16, not 12).
++    // In this case, process like an R_LARCH_B16, but without overflow checking
++    // and only taking the value's lowest 12 bits.
++    if (isJirl(read32le(loc))) {
++      checkAlignment(loc, val, 4, rel);
++      val = SignExtend64<12>(val);
++      write32le(loc, setK16(read32le(loc), val >> 2));
++      return;
++    }
++    [[fallthrough]];
++  case R_LARCH_ABS_LO12:
++  case R_LARCH_GOT_PC_LO12:
++  case R_LARCH_GOT_LO12:
++  case R_LARCH_TLS_LE_LO12:
++  case R_LARCH_TLS_IE_PC_LO12:
++  case R_LARCH_TLS_IE_LO12:
++    write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));
++    return;
++
++  // Relocs intended for `lu12i.w` or `pcalau12i`.
++  case R_LARCH_ABS_HI20:
++  case R_LARCH_PCALA_HI20:
++  case R_LARCH_GOT_PC_HI20:
++  case R_LARCH_GOT_HI20:
++  case R_LARCH_TLS_LE_HI20:
++  case R_LARCH_TLS_IE_PC_HI20:
++  case R_LARCH_TLS_IE_HI20:
++  case R_LARCH_TLS_LD_PC_HI20:
++  case R_LARCH_TLS_LD_HI20:
++  case R_LARCH_TLS_GD_PC_HI20:
++  case R_LARCH_TLS_GD_HI20:
++    write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));
++    return;
++
++  // Relocs intended for `lu32i.d`.
++  case R_LARCH_ABS64_LO20:
++  case R_LARCH_PCALA64_LO20:
++  case R_LARCH_GOT64_PC_LO20:
++  case R_LARCH_GOT64_LO20:
++  case R_LARCH_TLS_LE64_LO20:
++  case R_LARCH_TLS_IE64_PC_LO20:
++  case R_LARCH_TLS_IE64_LO20:
++    write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));
++    return;
++
++  // Relocs intended for `lu52i.d`.
++  case R_LARCH_ABS64_HI12:
++  case R_LARCH_PCALA64_HI12:
++  case R_LARCH_GOT64_PC_HI12:
++  case R_LARCH_GOT64_HI12:
++  case R_LARCH_TLS_LE64_HI12:
++  case R_LARCH_TLS_IE64_PC_HI12:
++  case R_LARCH_TLS_IE64_HI12:
++    write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));
++    return;
++
++  case R_LARCH_ADD8:
++    *loc += val;
++    return;
++  case R_LARCH_ADD16:
++    write16le(loc, read16le(loc) + val);
++    return;
++  case R_LARCH_ADD32:
++    write32le(loc, read32le(loc) + val);
++    return;
++  case R_LARCH_ADD64:
++    write64le(loc, read64le(loc) + val);
++    return;
++  case R_LARCH_SUB8:
++    *loc -= val;
++    return;
++  case R_LARCH_SUB16:
++    write16le(loc, read16le(loc) - val);
++    return;
++  case R_LARCH_SUB32:
++    write32le(loc, read32le(loc) - val);
++    return;
++  case R_LARCH_SUB64:
++    write64le(loc, read64le(loc) - val);
++    return;
++
++  case R_LARCH_MARK_LA:
++  case R_LARCH_MARK_PCREL:
++    // no-op
++    return;
++
++  case R_LARCH_RELAX:
++    return; // Ignored (for now)
++
++  default:
++    llvm_unreachable("unknown relocation");
++  }
++}
++
++TargetInfo *elf::getLoongArchTargetInfo() {
++  static LoongArch target;
++  return &target;
++}
+diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
+index 8e6a746d219e..89955db67733 100644
+--- a/lld/ELF/CMakeLists.txt
++++ b/lld/ELF/CMakeLists.txt
+@@ -25,6 +25,7 @@ add_lld_library(lldELF
+   Arch/ARM.cpp
+   Arch/AVR.cpp
+   Arch/Hexagon.cpp
++  Arch/LoongArch.cpp
+   Arch/Mips.cpp
+   Arch/MipsArchTree.cpp
+   Arch/MSP430.cpp
+diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
+index 7e2a72acf8f6..3c1803be6fb9 100644
+--- a/lld/ELF/Driver.cpp
++++ b/lld/ELF/Driver.cpp
+@@ -167,6 +167,7 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
+           .Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
+           .Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
+           .Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC})
++          .Case("elf32loongarch", {ELF32LEKind, EM_LOONGARCH})
+           .Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
+           .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
+           .Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
+@@ -178,6 +179,7 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
+           .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9})
+           .Case("msp430elf", {ELF32LEKind, EM_MSP430})
+           .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU})
++          .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH})
+           .Default({ELFNoneKind, EM_NONE});
+ 
+   if (ret.first == ELFNoneKind)
+@@ -1032,8 +1034,9 @@ static bool getIsRela(opt::InputArgList &args) {
+ 
+   // Otherwise use the psABI defined relocation entry format.
+   uint16_t m = config->emachine;
+-  return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC ||
+-         m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64;
++  return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON ||
++         m == EM_LOONGARCH || m == EM_PPC || m == EM_PPC64 || m == EM_RISCV ||
++         m == EM_X86_64;
+ }
+ 
+ static void parseClangOption(StringRef opt, const Twine &msg) {
+@@ -1570,8 +1573,9 @@ static void setConfigs(opt::InputArgList &args) {
+   // have support for reading Elf_Rel addends, so we only enable for a subset.
+ #ifndef NDEBUG
+   bool checkDynamicRelocsDefault = m == EM_AARCH64 || m == EM_ARM ||
+-                                   m == EM_386 || m == EM_MIPS ||
+-                                   m == EM_X86_64 || m == EM_RISCV;
++                                   m == EM_386 || m == EM_LOONGARCH ||
++                                   m == EM_MIPS || m == EM_RISCV ||
++                                   m == EM_X86_64;
+ #else
+   bool checkDynamicRelocsDefault = false;
+ #endif
+diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
+index 7dacdeb9f042..c43b4afd6cb6 100644
+--- a/lld/ELF/InputFiles.cpp
++++ b/lld/ELF/InputFiles.cpp
+@@ -1534,6 +1534,9 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
+     return EM_AVR;
+   case Triple::hexagon:
+     return EM_HEXAGON;
++  case Triple::loongarch32:
++  case Triple::loongarch64:
++    return EM_LOONGARCH;
+   case Triple::mips:
+   case Triple::mipsel:
+   case Triple::mips64:
+diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
+index df24f998bff6..d56291cbd84a 100644
+--- a/lld/ELF/InputSection.cpp
++++ b/lld/ELF/InputSection.cpp
+@@ -609,6 +609,7 @@ static int64_t getTlsTpOffset(const Symbol &s) {
+     // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library
+     // data and 0xf000 of the program's TLS segment.
+     return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000;
++  case EM_LOONGARCH:
+   case EM_RISCV:
+     return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1));
+ 
+@@ -643,6 +644,14 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+   case R_GOT:
+   case R_RELAX_TLS_GD_TO_IE_ABS:
+     return sym.getGotVA() + a;
++  case R_LOONGARCH_GOT:
++    // The LoongArch TLS GD relocs reuse the R_LARCH_GOT_PC_LO12 reloc type
++    // for their page offsets. The arithmetics are different in the TLS case
++    // so we have to duplicate some logic here.
++    if (sym.hasFlag(NEEDS_TLSGD) && type != R_LARCH_TLS_IE_PC_LO12)
++      // Like R_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value.
++      return in.got->getGlobalDynAddr(sym) + a;
++    return getRelocTargetVA(file, type, a, p, sym, R_GOT);
+   case R_GOTONLY_PC:
+     return in.got->getVA() + a - p;
+   case R_GOTPLTONLY_PC:
+@@ -667,6 +676,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+   case R_GOT_PC:
+   case R_RELAX_TLS_GD_TO_IE:
+     return sym.getGotVA() + a - p;
++  case R_LOONGARCH_GOT_PAGE_PC:
++    if (sym.hasFlag(NEEDS_TLSGD))
++      return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p);
++    return getLoongArchPageDelta(sym.getGotVA() + a, p);
+   case R_MIPS_GOTREL:
+     return sym.getVA(a) - in.mipsGot->getGp(file);
+   case R_MIPS_GOT_GP:
+@@ -715,6 +728,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+                               *hiRel->sym, hiRel->expr);
+     return 0;
+   }
++  case R_LOONGARCH_PAGE_PC:
++    return getLoongArchPageDelta(sym.getVA(a), p);
+   case R_PC:
+   case R_ARM_PCA: {
+     uint64_t dest;
+@@ -748,6 +763,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+   case R_PLT_PC:
+   case R_PPC64_CALL_PLT:
+     return sym.getPltVA() + a - p;
++  case R_LOONGARCH_PLT_PAGE_PC:
++    return getLoongArchPageDelta(sym.getPltVA() + a, p);
+   case R_PLT_GOTPLT:
+     return sym.getPltVA() + a - in.gotPlt->getVA();
+   case R_PPC32_PLTREL:
+@@ -808,6 +825,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
+     return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA();
+   case R_TLSGD_PC:
+     return in.got->getGlobalDynAddr(sym) + a - p;
++  case R_LOONGARCH_TLSGD_PAGE_PC:
++    return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p);
+   case R_TLSLD_GOTPLT:
+     return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA();
+   case R_TLSLD_GOT:
+diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
+index aeba918292a7..b43c052552ab 100644
+--- a/lld/ELF/Relocations.cpp
++++ b/lld/ELF/Relocations.cpp
+@@ -190,8 +190,8 @@ static bool isAbsoluteValue(const Symbol &sym) {
+ 
+ // Returns true if Expr refers a PLT entry.
+ static bool needsPlt(RelExpr expr) {
+-  return oneof<R_PLT, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT>(
+-      expr);
++  return oneof<R_PLT, R_PLT_PC, R_PLT_GOTPLT, R_LOONGARCH_PLT_PAGE_PC,
++               R_PPC32_PLTREL, R_PPC64_CALL_PLT>(expr);
+ }
+ 
+ // Returns true if Expr refers a GOT entry. Note that this function
+@@ -200,7 +200,8 @@ static bool needsPlt(RelExpr expr) {
+ static bool needsGot(RelExpr expr) {
+   return oneof<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF,
+                R_MIPS_GOT_OFF32, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTPLT,
+-               R_AARCH64_GOT_PAGE>(expr);
++               R_AARCH64_GOT_PAGE, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(
++      expr);
+ }
+ 
+ // True if this expression is of the form Sym - X, where X is a position in the
+@@ -208,12 +209,14 @@ static bool needsGot(RelExpr expr) {
+ static bool isRelExpr(RelExpr expr) {
+   return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC64_CALL,
+                R_PPC64_RELAX_TOC, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC,
+-               R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr);
++               R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC, R_LOONGARCH_PAGE_PC>(
++      expr);
+ }
+ 
+-
+ static RelExpr toPlt(RelExpr expr) {
+   switch (expr) {
++  case R_LOONGARCH_PAGE_PC:
++    return R_LOONGARCH_PLT_PAGE_PC;
+   case R_PPC64_CALL:
+     return R_PPC64_CALL_PLT;
+   case R_PC:
+@@ -232,6 +235,8 @@ static RelExpr fromPlt(RelExpr expr) {
+   case R_PLT_PC:
+   case R_PPC32_PLTREL:
+     return R_PC;
++  case R_LOONGARCH_PLT_PAGE_PC:
++    return R_LOONGARCH_PAGE_PC;
+   case R_PPC64_CALL_PLT:
+     return R_PPC64_CALL;
+   case R_PLT:
+@@ -946,7 +951,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
+             R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
+             R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
+             R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT,
+-            R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e))
++            R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE,
++            R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(
++          e))
+     return true;
+ 
+   // These never do, except if the entire file is position dependent or if
+@@ -1050,7 +1057,9 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset,
+       // for detailed description:
+       // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+       in.mipsGot->addEntry(*sec->file, sym, addend, expr);
+-    } else {
++    } else if (!sym.isTls() || config->emachine != EM_LOONGARCH) {
++      // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which
++      // case the NEEDS_GOT flag shouldn't get set.
+       sym.setFlags(NEEDS_GOT);
+     }
+   } else if (needsPlt(expr)) {
+@@ -1090,7 +1099,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset,
+                     (isa<EhInputSection>(sec) && config->emachine != EM_MIPS));
+   if (canWrite) {
+     RelType rel = target->getDynRel(type);
+-    if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) {
++    if (oneof<R_GOT, R_LOONGARCH_GOT>(expr) ||
++        (rel == target->symbolicRel && !sym.isPreemptible)) {
+       addRelativeReloc<true>(*sec, offset, sym, addend, expr, type);
+       return;
+     } else if (rel != 0) {
+@@ -1242,11 +1252,13 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
+     return 1;
+   }
+ 
+-  // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation.  For
+-  // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
++  // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
++  // relaxation.
++  // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
+   // relaxation as well.
+   bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
+                      config->emachine != EM_HEXAGON &&
++                     config->emachine != EM_LOONGARCH &&
+                      config->emachine != EM_RISCV &&
+                      !c.file->ppc64DisableTLSRelax;
+ 
+@@ -1263,8 +1275,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
+   // being suitable for being dynamically loaded via dlopen. GOT[e0] is the
+   // module index, with a special value of 0 for the current module. GOT[e1] is
+   // unused. There only needs to be one module index entry.
+-  if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(
+-          expr)) {
++  if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(expr)) {
+     // Local-Dynamic relocs can be relaxed to Local-Exec.
+     if (toExecRelax) {
+       c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type,
+@@ -1295,7 +1306,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
+   }
+ 
+   if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
+-            R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC>(expr)) {
++            R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
++            R_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
+     if (!toExecRelax) {
+       sym.setFlags(NEEDS_TLSGD);
+       c.addReloc({expr, type, offset, addend, &sym});
+@@ -1315,8 +1327,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
+     return target->getTlsGdRelaxSkip(type);
+   }
+ 
+-  if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_OFF,
+-            R_TLSIE_HINT>(expr)) {
++  if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC,
++            R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) {
+     ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+     // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
+     // defined.
+diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
+index 29e3edeca6be..e36215bd0d93 100644
+--- a/lld/ELF/Relocations.h
++++ b/lld/ELF/Relocations.h
+@@ -102,6 +102,15 @@ enum RelExpr {
+   R_PPC64_RELAX_GOT_PC,
+   R_RISCV_ADD,
+   R_RISCV_PC_INDIRECT,
++  // Same as R_PC but with page-aligned semantics.
++  R_LOONGARCH_PAGE_PC,
++  // Same as R_PLT_PC but with page-aligned semantics.
++  R_LOONGARCH_PLT_PAGE_PC,
++  // In addition to having page-aligned semantics, LoongArch GOT relocs are
++  // also reused for TLS, making the semantics differ from other architectures.
++  R_LOONGARCH_GOT,
++  R_LOONGARCH_GOT_PAGE_PC,
++  R_LOONGARCH_TLSGD_PAGE_PC,
+ };
+ 
+ // Architecture-neutral representation of relocation.
+diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
+index bb09bde5d22e..c7b107515d31 100644
+--- a/lld/ELF/ScriptParser.cpp
++++ b/lld/ELF/ScriptParser.cpp
+@@ -438,6 +438,8 @@ static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) {
+       .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV})
+       .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9})
+       .Case("elf32-msp430", {ELF32LEKind, EM_MSP430})
++      .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH})
++      .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH})
+       .Default({ELFNoneKind, EM_NONE});
+ }
+ 
+diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
+index 3873c7a25e44..32bb2164a208 100644
+--- a/lld/ELF/Target.cpp
++++ b/lld/ELF/Target.cpp
+@@ -62,6 +62,8 @@ TargetInfo *elf::getTarget() {
+     return getAVRTargetInfo();
+   case EM_HEXAGON:
+     return getHexagonTargetInfo();
++  case EM_LOONGARCH:
++    return getLoongArchTargetInfo();
+   case EM_MIPS:
+     switch (config->ekind) {
+     case ELF32LEKind:
+diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
+index e6a78169058a..d1a4fb195b89 100644
+--- a/lld/ELF/Target.h
++++ b/lld/ELF/Target.h
+@@ -172,6 +172,7 @@ TargetInfo *getAMDGPUTargetInfo();
+ TargetInfo *getARMTargetInfo();
+ TargetInfo *getAVRTargetInfo();
+ TargetInfo *getHexagonTargetInfo();
++TargetInfo *getLoongArchTargetInfo();
+ TargetInfo *getMSP430TargetInfo();
+ TargetInfo *getPPC64TargetInfo();
+ TargetInfo *getPPCTargetInfo();
+@@ -215,6 +216,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn);
+ void addPPC64SaveRestore();
+ uint64_t getPPC64TocBase();
+ uint64_t getAArch64Page(uint64_t expr);
++uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc);
+ void riscvFinalizeRelax(int passes);
+ void mergeRISCVAttributesSections();
+ 
+diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
+index a450923cded9..290244af1eb9 100644
+--- a/lld/docs/ReleaseNotes.rst
++++ b/lld/docs/ReleaseNotes.rst
+@@ -59,6 +59,65 @@ ELF Improvements
+ * Armv4(T) thunks are now supported.
+   (`D139888 <https://reviews.llvm.org/D139888>`_)
+   (`D141272 <https://reviews.llvm.org/D141272>`_)
++* When ``--threads=`` is not specified, the number of concurrency is now capped to 16.
++  A large ``--thread=`` can harm performance, especially with some system
++  malloc implementations like glibc's.
++  (`D147493 <https://reviews.llvm.org/D147493>`_)
++* ``--remap-inputs=`` and ``--remap-inputs-file=`` are added to remap input files.
++  (`D148859 <https://reviews.llvm.org/D148859>`_)
++* ``--lto=`` is now available to support ``clang -funified-lto``
++  (`D123805 <https://reviews.llvm.org/D123805>`_)
++* ``--lto-CGO[0-3]`` is now available to control ``CodeGenOpt::Level`` independent of the LTO optimization level.
++  (`D141970 <https://reviews.llvm.org/D141970>`_)
++* ``--check-dynamic-relocations=`` is now correct 32-bit targets when the addend is larger than 0x80000000.
++  (`D149347 <https://reviews.llvm.org/D149347>`_)
++* ``--print-memory-usage`` has been implemented for memory regions.
++  (`D150644 <https://reviews.llvm.org/D150644>`_)
++* ``SHF_MERGE``, ``--icf=``, and ``--build-id=fast`` have switched to 64-bit xxh3.
++  (`D154813 <https://reviews.llvm.org/D154813>`_)
++* Quoted output section names can now be used in linker scripts.
++  (`#60496 <https://github.com/llvm/llvm-project/issues/60496>`_)
++* ``MEMORY`` can now be used without a ``SECTIONS`` command.
++  (`D145132 <https://reviews.llvm.org/D145132>`_)
++* ``REVERSE`` can now be used in input section descriptions to reverse the order of input sections.
++  (`D145381 <https://reviews.llvm.org/D145381>`_)
++* Program header assignment can now be used within ``OVERLAY``. This functionality was accidentally lost in 2020.
++  (`D150445 <https://reviews.llvm.org/D150445>`_)
++* Operators ``^`` and ``^=`` can now be used in linker scripts.
++* LoongArch is now supported.
++* ``DT_AARCH64_MEMTAG_*`` dynamic tags are now supported.
++  (`D143769 <https://reviews.llvm.org/D143769>`_)
++* AArch32 port now supports BE-8 and BE-32 modes for big-endian.
++  (`D140201 <https://reviews.llvm.org/D140201>`_)
++  (`D140202 <https://reviews.llvm.org/D140202>`_)
++  (`D150870 <https://reviews.llvm.org/D150870>`_)
++* ``R_ARM_THM_ALU_ABS_G*`` relocations are now supported.
++  (`D153407 <https://reviews.llvm.org/D153407>`_)
++* ``.ARM.exidx`` sections may start at non-zero output section offset.
++  (`D148033 <https://reviews.llvm.org/D148033>`_)
++* Arm Cortex-M Security Extensions is now implemented.
++  (`D139092 <https://reviews.llvm.org/D139092>`_)
++* BTI landing pads are now added to PLT entries accessed by range extension thunks or relative vtables.
++  (`D148704 <https://reviews.llvm.org/D148704>`_)
++  (`D153264 <https://reviews.llvm.org/D153264>`_)
++* AArch64 short range thunk has been implemented to mitigate the performance loss of a long range thunk.
++  (`D148701 <https://reviews.llvm.org/D148701>`_)
++* ``R_AVR_8_LO8/R_AVR_8_HI8/R_AVR_8_HLO8/R_AVR_LO8_LDI_GS/R_AVR_HI8_LDI_GS`` have been implemented.
++  (`D147100 <https://reviews.llvm.org/D147100>`_)
++  (`D147364 <https://reviews.llvm.org/D147364>`_)
++* ``--no-power10-stubs`` now works for PowerPC64.
++* ``DT_PPC64_OPT`` is now supported;
++  (`D150631 <https://reviews.llvm.org/D150631>`_)
++* ``PT_RISCV_ATTRIBUTES`` is added to include the SHT_RISCV_ATTRIBUTES section.
++  (`D152065 <https://reviews.llvm.org/D152065>`_)
++* ``R_RISCV_PLT32`` is added to support C++ relative vtables.
++  (`D143115 <https://reviews.llvm.org/D143115>`_)
++* RISC-V global pointer relaxation has been implemented. Specify ``--relax-gp`` to enable the linker relaxation.
++  (`D143673 <https://reviews.llvm.org/D143673>`_)
++* The symbol value of ``foo`` is correctly handled when ``--wrap=foo`` and RISC-V linker relaxation are used.
++  (`D151768 <https://reviews.llvm.org/D151768>`_)
++* x86-64 large data sections are now placed away from code sections to alleviate relocation overflow pressure.
++  (`D150510 <https://reviews.llvm.org/D150510>`_)
+ 
+ Breaking changes
+ ----------------
+diff --git a/lld/docs/index.rst b/lld/docs/index.rst
+index ce6320333243..a3407d3b9db0 100644
+--- a/lld/docs/index.rst
++++ b/lld/docs/index.rst
+@@ -22,10 +22,11 @@ Features
+   machine, you can expect that LLD runs more than twice as fast as the GNU
+   gold linker. Your mileage may vary, though.
+ 
+-- It supports various CPUs/ABIs including AArch64, AMDGPU, ARM, Hexagon, MIPS
+-  32/64 big/little-endian, PowerPC, PowerPC64, RISC-V, SPARC V9, x86-32 and
+-  x86-64. Among these, AArch64, ARM (>= v6), PowerPC, PowerPC64, x86-32 and
+-  x86-64 have production quality. MIPS seems decent too.
++- It supports various CPUs/ABIs including AArch64, AMDGPU, ARM, Hexagon,
++  LoongArch, MIPS 32/64 big/little-endian, PowerPC, PowerPC64, RISC-V,
++  SPARC V9, x86-32 and x86-64. Among these, AArch64, ARM (>= v4), LoongArch,
++  PowerPC, PowerPC64, RISC-V, x86-32 and x86-64 have production quality.
++  MIPS seems decent too.
+ 
+ - It is always a cross-linker, meaning that it always supports all the
+   above targets however it was built. In fact, we don't provide a
+diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
+index edeb7c4bfe37..4889d04b924f 100644
+--- a/lld/docs/ld.lld.1
++++ b/lld/docs/ld.lld.1
+@@ -4,7 +4,7 @@
+ .\"
+ .\" This man page documents only lld's ELF linking support, obtained originally
+ .\" from FreeBSD.
+-.Dd May 12, 2019
++.Dd Jul 25, 2023
+ .Dt LD.LLD 1
+ .Os
+ .Sh NAME
+@@ -27,8 +27,8 @@ It accepts most of the same command line arguments and linker scripts
+ as GNU linkers.
+ .Pp
+ .Nm
+-currently supports i386, x86-64, ARM, AArch64, PowerPC32, PowerPC64,
+-MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets.
++currently supports i386, x86-64, ARM, AArch64, LoongArch, PowerPC32,
++PowerPC64, MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets.
+ .Nm
+ acts as a Microsoft link.exe-compatible linker if invoked as
+ .Nm lld-link
+diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s
+new file mode 100644
+index 000000000000..343e836274a3
+--- /dev/null
++++ b/lld/test/ELF/emulation-loongarch.s
+@@ -0,0 +1,78 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc -filetype=obj -triple=loongarch32 %s -o %t.o
++# RUN: ld.lld %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s
++# RUN: ld.lld -m elf32loongarch %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s
++# RUN: echo 'OUTPUT_FORMAT(elf32-loongarch)' > %t.script
++# RUN: ld.lld %t.script %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s
++
++# LA32:      ElfHeader {
++# LA32-NEXT:   Ident {
++# LA32-NEXT:     Magic: (7F 45 4C 46)
++# LA32-NEXT:     Class: 32-bit (0x1)
++# LA32-NEXT:     DataEncoding: LittleEndian (0x1)
++# LA32-NEXT:     FileVersion: 1
++# LA32-NEXT:     OS/ABI: SystemV (0x0)
++# LA32-NEXT:     ABIVersion: 0
++# LA32-NEXT:     Unused: (00 00 00 00 00 00 00)
++# LA32-NEXT:   }
++# LA32-NEXT:   Type: Executable (0x2)
++# LA32-NEXT:   Machine: EM_LOONGARCH (0x102)
++# LA32-NEXT:   Version: 1
++# LA32-NEXT:   Entry:
++# LA32-NEXT:   ProgramHeaderOffset: 0x34
++# LA32-NEXT:   SectionHeaderOffset:
++# LA32-NEXT:   Flags [ (0x43)
++# LA32-NEXT:     EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3)
++# LA32-NEXT:     EF_LOONGARCH_OBJABI_V1 (0x40)
++# LA32-NEXT:   ]
++# LA32-NEXT:   HeaderSize: 52
++# LA32-NEXT:   ProgramHeaderEntrySize: 32
++# LA32-NEXT:   ProgramHeaderCount:
++# LA32-NEXT:   SectionHeaderEntrySize: 40
++# LA32-NEXT:   SectionHeaderCount:
++# LA32-NEXT:   StringTableSectionIndex:
++# LA32-NEXT: }
++
++# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o
++# RUN: ld.lld %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s
++# RUN: ld.lld -m elf64loongarch %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s
++# RUN: echo 'OUTPUT_FORMAT(elf64-loongarch)' > %t.script
++# RUN: ld.lld %t.script %t.o -o %t
++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s
++
++# LA64:      ElfHeader {
++# LA64-NEXT:   Ident {
++# LA64-NEXT:     Magic: (7F 45 4C 46)
++# LA64-NEXT:     Class: 64-bit (0x2)
++# LA64-NEXT:     DataEncoding: LittleEndian (0x1)
++# LA64-NEXT:     FileVersion: 1
++# LA64-NEXT:     OS/ABI: SystemV (0x0)
++# LA64-NEXT:     ABIVersion: 0
++# LA64-NEXT:     Unused: (00 00 00 00 00 00 00)
++# LA64-NEXT:   }
++# LA64-NEXT:   Type: Executable (0x2)
++# LA64-NEXT:   Machine: EM_LOONGARCH (0x102)
++# LA64-NEXT:   Version: 1
++# LA64-NEXT:   Entry:
++# LA64-NEXT:   ProgramHeaderOffset: 0x40
++# LA64-NEXT:   SectionHeaderOffset:
++# LA64-NEXT:   Flags [ (0x43)
++# LA64-NEXT:     EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3)
++# LA64-NEXT:     EF_LOONGARCH_OBJABI_V1 (0x40)
++# LA64-NEXT:   ]
++# LA64-NEXT:   HeaderSize: 64
++# LA64-NEXT:   ProgramHeaderEntrySize: 56
++# LA64-NEXT:   ProgramHeaderCount:
++# LA64-NEXT:   SectionHeaderEntrySize: 64
++# LA64-NEXT:   SectionHeaderCount:
++# LA64-NEXT:   StringTableSectionIndex:
++# LA64-NEXT: }
++
++.globl _start
++_start:
+diff --git a/lld/test/ELF/loongarch-abs64.s b/lld/test/ELF/loongarch-abs64.s
+new file mode 100644
+index 000000000000..4bfe7df9135a
+--- /dev/null
++++ b/lld/test/ELF/loongarch-abs64.s
+@@ -0,0 +1,64 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o
++
++# RUN: ld.lld %t.la64.o --defsym foo=0 --defsym bar=42 -o %t.la64.1
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.1 | FileCheck --check-prefix=CASE1 %s
++# CASE1:      lu12i.w $a0, 0
++# CASE1-NEXT: ori     $a0, $a0, 0
++# CASE1-NEXT: lu32i.d $a0, 0
++# CASE1-NEXT: lu52i.d $a0, $a0, 0
++# CASE1-NEXT: lu12i.w $a1, 0
++# CASE1-NEXT: ori     $a1, $a1, 42
++# CASE1-NEXT: lu32i.d $a1, 0
++# CASE1-NEXT: lu52i.d $a1, $a1, 0
++
++# RUN: ld.lld %t.la64.o --defsym foo=0x12345678 --defsym bar=0x87654321 -o %t.la64.2
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.2 | FileCheck --check-prefix=CASE2 %s
++# CASE2:      lu12i.w $a0, 74565
++# CASE2-NEXT: ori     $a0, $a0, 1656
++# CASE2-NEXT: lu32i.d $a0, 0
++# CASE2-NEXT: lu52i.d $a0, $a0, 0
++# CASE2-NEXT: lu12i.w $a1, -493996
++# CASE2-NEXT: ori     $a1, $a1, 801
++# CASE2-NEXT: lu32i.d $a1, 0
++# CASE2-NEXT: lu52i.d $a1, $a1, 0
++
++# RUN: ld.lld %t.la64.o --defsym foo=0x12345fedcb678 --defsym bar=0xfedcb12345000 -o %t.la64.3
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.3 | FileCheck --check-prefix=CASE3 %s
++# CASE3:      lu12i.w $a0, -4661
++# CASE3-NEXT: ori     $a0, $a0, 1656
++# CASE3-NEXT: lu32i.d $a0, 74565
++# CASE3-NEXT: lu52i.d $a0, $a0, 0
++# CASE3-NEXT: lu12i.w $a1, 74565
++# CASE3-NEXT: ori     $a1, $a1, 0
++# CASE3-NEXT: lu32i.d $a1, -4661
++# CASE3-NEXT: lu52i.d $a1, $a1, 0
++
++# RUN: ld.lld %t.la64.o --defsym foo=0xfffffeeeeeddd --defsym bar=0xfff00000f1111222 -o %t.la64.4
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.4 | FileCheck --check-prefix=CASE4 %s
++# CASE4:      lu12i.w $a0, -69906
++# CASE4-NEXT: ori     $a0, $a0, 3549
++# CASE4-NEXT: lu32i.d $a0, -1
++# CASE4-NEXT: lu52i.d $a0, $a0, 0
++# CASE4-NEXT: lu12i.w $a1, -61167
++# CASE4-NEXT: ori     $a1, $a1, 546
++# CASE4-NEXT: lu32i.d $a1, 0
++# CASE4-NEXT: lu52i.d $a1, $a1, -1
++
++.global _start
++
++_start:
++1:
++    lu12i.w $a0, %abs_hi20(foo)
++.reloc 1b, R_LARCH_MARK_LA, foo
++    ori     $a0, $a0, %abs_lo12(foo)
++    lu32i.d $a0, %abs64_lo20(foo)
++    lu52i.d $a0, $a0, %abs64_hi12(foo)
++
++2:
++    lu12i.w $a1, %abs_hi20(bar)
++.reloc 1b, R_LARCH_MARK_LA, bar
++    ori     $a1, $a1, %abs_lo12(bar)
++    lu32i.d $a1, %abs64_lo20(bar)
++    lu52i.d $a1, $a1, %abs64_hi12(bar)
+diff --git a/lld/test/ELF/loongarch-add-sub.s b/lld/test/ELF/loongarch-add-sub.s
+new file mode 100644
+index 000000000000..963e4cbbe0fc
+--- /dev/null
++++ b/lld/test/ELF/loongarch-add-sub.s
+@@ -0,0 +1,36 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o
++
++# RUN: ld.lld --section-start=.rodata=0x1234567890 --section-start=.text=0x9876543210 %t.la64.o -o %t.la64
++# RUN: llvm-readelf -x .rodata %t.la64 | FileCheck --check-prefix=CHECK %s
++# CHECK:      section '.rodata':
++# CHECK-NEXT: 0x1234567890 10325476 98badcfe 80b9fd41 86000000
++# CHECK-NEXT: 0x12345678a0 80b9fd41 80b980
++
++.global _start
++
++_start:
++1:
++    break 0
++
++.rodata
++2:
++    .dword 0xfedcba9876543210
++
++foo:
++    .dword 0
++    .reloc foo, R_LARCH_ADD64, 1b
++    .reloc foo, R_LARCH_SUB64, 2b
++bar:
++    .word 0
++    .reloc bar, R_LARCH_ADD32, 1b
++    .reloc bar, R_LARCH_SUB32, 2b
++baz:
++    .short 0
++    .reloc baz, R_LARCH_ADD16, 1b
++    .reloc baz, R_LARCH_SUB16, 2b
++quux:
++    .byte 0
++    .reloc quux, R_LARCH_ADD8, 1b
++    .reloc quux, R_LARCH_SUB8, 2b
+diff --git a/lld/test/ELF/loongarch-branch.s b/lld/test/ELF/loongarch-branch.s
+new file mode 100644
+index 000000000000..b223ff95bd89
+--- /dev/null
++++ b/lld/test/ELF/loongarch-branch.s
+@@ -0,0 +1,68 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o
++
++# RUN: ld.lld %t.la32.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la32
++# RUN: ld.lld %t.la64.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la64
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la32 | FileCheck %s --check-prefix=CHECK
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64 | FileCheck %s --check-prefix=CHECK
++# CHECK: beq $zero, $zero, 4
++# CHECK: bne $zero, $zero, -4
++# CHECK: beqz $s8, 4
++# CHECK: bnez $s8, -4
++# CHECK: b 4
++# CHECK: bl -4
++
++# RUN: ld.lld %t.la32.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la32.limits
++# RUN: ld.lld %t.la64.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la64.limits
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la32.limits | FileCheck --check-prefix=LIMITS %s
++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.limits | FileCheck --check-prefix=LIMITS %s
++# LIMITS:      beq $zero, $zero, 131068
++# LIMITS-NEXT: bne $zero, $zero, -131072
++# LIMITS:      beqz $s8, 4194300
++# LIMITS-NEXT: bnez $s8, -4194304
++# LIMITS:      b 134217724
++# LIMITS-NEXT: bl -134217728
++
++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-RANGE %s
++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-RANGE %s
++# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_B16 out of range: 131072 is not in [-131072, 131071]; references 'foo16'
++# ERROR-RANGE: error: [[FILE]]:(.text+0x4): relocation R_LARCH_B16 out of range: -131076 is not in [-131072, 131071]; references 'bar16'
++# ERROR-RANGE: error: [[FILE]]:(.text+0x8): relocation R_LARCH_B21 out of range: 4194304 is not in [-4194304, 4194303]; references 'foo21'
++# ERROR-RANGE: error: [[FILE]]:(.text+0xc): relocation R_LARCH_B21 out of range: -4194308 is not in [-4194304, 4194303]; references 'bar21'
++# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_LARCH_B26 out of range: 134217728 is not in [-134217728, 134217727]; references 'foo26'
++# ERROR-RANGE: error: [[FILE]]:(.text+0x14): relocation R_LARCH_B26 out of range: -134217732 is not in [-134217728, 134217727]; references 'bar26'
++
++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-1 %s
++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-1 %s
++# ERROR-ALIGN-1:      error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x1 is not aligned to 4 bytes
++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes
++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x1 is not aligned to 4 bytes
++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes
++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x1 is not aligned to 4 bytes
++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes
++
++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-2 %s
++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-2 %s
++# ERROR-ALIGN-2:      error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x2 is not aligned to 4 bytes
++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes
++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x2 is not aligned to 4 bytes
++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes
++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x2 is not aligned to 4 bytes
++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes
++
++.global _start
++.global b16
++.global b21
++.global b26
++_start:
++b16:
++     beq  $zero, $zero, foo16
++     bne  $zero, $zero, bar16
++b21:
++     beqz $s8, foo21
++     bnez $s8, bar21
++b26:
++     b    foo26
++     bl   bar26
+diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test
+new file mode 100644
+index 000000000000..44e5d03409a4
+--- /dev/null
++++ b/lld/test/ELF/loongarch-interlink.test
+@@ -0,0 +1,84 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++# RUN: yaml2obj %t/blob.yaml -o %t/blob.o
++# RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o
++
++## Check that binary input results in e_flags=0 output.
++# RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out
++# RUN: llvm-readobj -h %t/blob.out | FileCheck --check-prefix=EMPTY %s
++# EMPTY:      Flags [
++# EMPTY-NEXT: ]
++
++## Check that interlink between e_flags=0 and normal input (that contain code)
++## is allowed.
++## Also check that the e_flags logic work as intended regardless of input file
++## order.
++# RUN: ld.lld %t/blob.o %t/v1-lp64d.o -o %t/v1-lp64d.out
++# RUN: ld.lld %t/v1-lp64s.o %t/blob.o -o %t/v1-lp64s.out
++# RUN: llvm-readobj -h %t/v1-lp64d.out | FileCheck --check-prefix=V1-LP64D %s
++# RUN: llvm-readobj -h %t/v1-lp64s.out | FileCheck --check-prefix=V1-LP64S %s
++# V1-LP64D: Flags [ (0x43)
++# V1-LP64S: Flags [ (0x41)
++
++## Check that interlink between different ABIs is disallowed.
++# RUN: not ld.lld %t/v1-lp64s.o %t/v1-b-lp64d.o -o /dev/null 2>&1 | FileCheck -DFILE1=%t/v1-b-lp64d.o -DFILE2=%t/v1-lp64s.o --check-prefix=INTERLINK-ERR %s
++# INTERLINK-ERR: error: [[FILE1]]: cannot link object files with different ABI from [[FILE2]]
++
++## Check that interlink between different object ABI versions is disallowed.
++# RUN: not ld.lld %t/v0-lp64d.o %t/v1-b-lp64d.o %t/blob.o -o /dev/null 2>&1 | FileCheck -DFILE=%t/v0-lp64d.o --check-prefix=VERSION-ERR %s
++# VERSION-ERR: error: [[FILE]]: unsupported object file ABI version
++
++#--- blob.bin
++BLOB
++
++#--- blob.yaml
++--- !ELF
++FileHeader:
++  Class:           ELFCLASS64
++  Data:            ELFDATA2LSB
++  Type:            ET_REL
++  Machine:         EM_LOONGARCH
++  SectionHeaderStringTable: .strtab
++Sections:
++  - Name:            .data
++    Type:            SHT_PROGBITS
++    Flags:           [ SHF_WRITE, SHF_ALLOC ]
++    AddressAlign:    0x1
++    Content:         424C4F42
++Symbols:
++  - Name:            blob
++    Section:         .data
++    Binding:         STB_GLOBAL
++
++#--- v0-lp64d.yaml
++--- !ELF
++FileHeader:
++  Class:           ELFCLASS64
++  Data:            ELFDATA2LSB
++  Type:            ET_REL
++  Machine:         EM_LOONGARCH
++  Flags:           [ EF_LOONGARCH_ABI_DOUBLE_FLOAT ]
++  SectionHeaderStringTable: .strtab
++Sections:
++  - Name:            .text
++    Type:            SHT_PROGBITS
++    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
++    AddressAlign:    0x4
++    Content:         0000a002
++
++#--- start.s
++.global _start
++_start:
++    la $a0, blob
++    ld.b $a0, $a0, 0
++    li.w $a7, 94
++    syscall 0
++
++#--- bar.s
++bar:
++    move $a0, $zero
++    ret
+diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s
+new file mode 100644
+index 000000000000..9df3492d1877
+--- /dev/null
++++ b/lld/test/ELF/loongarch-pc-aligned.s
+@@ -0,0 +1,283 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.la32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.la64.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/extreme.s -o %t/extreme.o
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la32 | FileCheck %s --check-prefix=CASE1
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la64 | FileCheck %s --check-prefix=CASE1
++# CASE1:      pcalau12i $a0, 0
++# CASE1-NEXT: ld.w      $a0, $a0, 0
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la32 | FileCheck %s --check-prefix=CASE2
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la64 | FileCheck %s --check-prefix=CASE2
++# CASE2:      pcalau12i $a0, -1
++# CASE2-NEXT: ld.w      $a0, $a0, 0
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la32 | FileCheck %s --check-prefix=CASE3
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la64 | FileCheck %s --check-prefix=CASE3
++# CASE3:      pcalau12i $a0, -1
++# CASE3-NEXT: ld.w      $a0, $a0, 2047
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la32 | FileCheck %s --check-prefix=CASE4
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la64 | FileCheck %s --check-prefix=CASE4
++# CASE4:      pcalau12i $a0, 0
++# CASE4-NEXT: ld.w      $a0, $a0, -2048
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la32 | FileCheck %s --check-prefix=CASE5
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la64 | FileCheck %s --check-prefix=CASE5
++# CASE5:      pcalau12i $a0, 1
++# CASE5-NEXT: ld.w      $a0, $a0, 4
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la32 | FileCheck %s --check-prefix=CASE6
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la64 | FileCheck %s --check-prefix=CASE6
++# CASE6:      pcalau12i $a0, 2
++# CASE6-NEXT: ld.w      $a0, $a0, -2048
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la32 | FileCheck %s --check-prefix=CASE7
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la64 | FileCheck %s --check-prefix=CASE7
++# CASE7:      pcalau12i $a0, 524287
++# CASE7-NEXT: ld.w      $a0, $a0, 291
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la32 | FileCheck %s --check-prefix=CASE8
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la64 | FileCheck %s --check-prefix=CASE8
++# CASE8:      pcalau12i $a0, -524288
++# CASE8-NEXT: ld.w      $a0, $a0, -1348
++
++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la32
++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la32 | FileCheck %s --check-prefix=CASE9
++# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la64 | FileCheck %s --check-prefix=CASE9
++# CASE9:      pcalau12i $a0, -524288
++# CASE9-NEXT: ld.w      $a0, $a0, 291
++
++## page delta = 0x4443333322222000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x22222 = 139810
++## %pc64_lo20 = 0x33333 = 209715
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0
++# EXTREME0:      addi.d $t0, $zero, 273
++# EXTREME0-NEXT: pcalau12i $t1, 139810
++# EXTREME0-NEXT: lu32i.d   $t0, 209715
++# EXTREME0-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x4443333222223000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x22223 = 139811
++## %pc64_lo20 = 0x33332 = 209714
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1
++# EXTREME1:      addi.d $t0, $zero, -1912
++# EXTREME1-NEXT: pcalau12i $t1, 139811
++# EXTREME1-NEXT: lu32i.d   $t0, 209714
++# EXTREME1-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x4443333499999000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x99999 = -419431
++## %pc64_lo20 = 0x33334 = 209716
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2
++# EXTREME2:      addi.d $t0, $zero, 273
++# EXTREME2-NEXT: pcalau12i $t1, -419431
++# EXTREME2-NEXT: lu32i.d   $t0, 209716
++# EXTREME2-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x444333339999a000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x9999a = -419430
++## %pc64_lo20 = 0x33333 = 209715
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3
++# EXTREME3:      addi.d $t0, $zero, -1912
++# EXTREME3-NEXT: pcalau12i $t1, -419430
++# EXTREME3-NEXT: lu32i.d   $t0, 209715
++# EXTREME3-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x444aaaaa22222000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x22222 = 139810
++## %pc64_lo20 = 0xaaaaa = -349526
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4
++# EXTREME4:      addi.d $t0, $zero, 273
++# EXTREME4-NEXT: pcalau12i $t1, 139810
++# EXTREME4-NEXT: lu32i.d   $t0, -349526
++# EXTREME4-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x444aaaa922223000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x22223 = 139811
++## %pc64_lo20 = 0xaaaa9 = -349527
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5
++# EXTREME5:      addi.d $t0, $zero, -1912
++# EXTREME5-NEXT: pcalau12i $t1, 139811
++# EXTREME5-NEXT: lu32i.d   $t0, -349527
++# EXTREME5-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x444aaaab99999000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x99999 = -419431
++## %pc64_lo20 = 0xaaaab = -349525
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6
++# EXTREME6:      addi.d $t0, $zero, 273
++# EXTREME6-NEXT: pcalau12i $t1, -419431
++# EXTREME6-NEXT: lu32i.d   $t0, -349525
++# EXTREME6-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0x444aaaaa9999a000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x9999a = -419430
++## %pc64_lo20 = 0xaaaaa = -349526
++## %pc64_hi12 = 0x444 = 1092
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7
++# EXTREME7:      addi.d $t0, $zero, -1912
++# EXTREME7-NEXT: pcalau12i $t1, -419430
++# EXTREME7-NEXT: lu32i.d   $t0, -349526
++# EXTREME7-NEXT: lu52i.d   $t0, $t0, 1092
++
++## page delta = 0xbbb3333322222000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x22222 = 139810
++## %pc64_lo20 = 0x33333 = 209715
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8
++# EXTREME8:      addi.d $t0, $zero, 273
++# EXTREME8-NEXT: pcalau12i $t1, 139810
++# EXTREME8-NEXT: lu32i.d   $t0, 209715
++# EXTREME8-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbb3333222223000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x22223 = 139811
++## %pc64_lo20 = 0x33332 = 209714
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9
++# EXTREME9:      addi.d $t0, $zero, -1912
++# EXTREME9-NEXT: pcalau12i $t1, 139811
++# EXTREME9-NEXT: lu32i.d   $t0, 209714
++# EXTREME9-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbb3333499999000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x99999 = -419431
++## %pc64_lo20 = 0x33334 = 209716
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10
++# EXTREME10:      addi.d $t0, $zero, 273
++# EXTREME10-NEXT: pcalau12i $t1, -419431
++# EXTREME10-NEXT: lu32i.d   $t0, 209716
++# EXTREME10-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbb333339999a000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x9999a = -419430
++## %pc64_lo20 = 0x33333 = 209715
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11
++# EXTREME11:      addi.d $t0, $zero, -1912
++# EXTREME11-NEXT: pcalau12i $t1, -419430
++# EXTREME11-NEXT: lu32i.d   $t0, 209715
++# EXTREME11-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbbaaaaa22222000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x22222 = 139810
++## %pc64_lo20 = 0xaaaaa = -349526
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12
++# EXTREME12:      addi.d $t0, $zero, 273
++# EXTREME12-NEXT: pcalau12i $t1, 139810
++# EXTREME12-NEXT: lu32i.d   $t0, -349526
++# EXTREME12-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbbaaaa922223000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x22223 = 139811
++## %pc64_lo20 = 0xaaaa9 = -349527
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13
++# EXTREME13:      addi.d $t0, $zero, -1912
++# EXTREME13-NEXT: pcalau12i $t1, 139811
++# EXTREME13-NEXT: lu32i.d   $t0, -349527
++# EXTREME13-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbbaaaab99999000, page offset = 0x111
++## %pc_lo12   = 0x111 = 273
++## %pc_hi20   = 0x99999 = -419431
++## %pc64_lo20 = 0xaaaab = -349525
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14
++# EXTREME14:      addi.d $t0, $zero, 273
++# EXTREME14-NEXT: pcalau12i $t1, -419431
++# EXTREME14-NEXT: lu32i.d   $t0, -349525
++# EXTREME14-NEXT: lu52i.d   $t0, $t0, -1093
++
++## page delta = 0xbbbaaaaa9999a000, page offset = 0x888
++## %pc_lo12   = 0x888 = -1912
++## %pc_hi20   = 0x9999a = -419430
++## %pc64_lo20 = 0xaaaaa = -349526
++## %pc64_hi12 = 0xbbb = -1093
++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15
++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15
++# EXTREME15:      addi.d $t0, $zero, -1912
++# EXTREME15-NEXT: pcalau12i $t1, -419430
++# EXTREME15-NEXT: lu32i.d   $t0, -349526
++# EXTREME15-NEXT: lu52i.d   $t0, $t0, -1093
++
++#--- a.s
++.rodata
++x:
++.word 10
++.text
++.global _start
++_start:
++    pcalau12i $a0, %pc_hi20(x)
++    ld.w      $a0, $a0, %pc_lo12(x)
++
++#--- extreme.s
++.rodata
++x:
++.word 10
++.text
++.global _start
++_start:
++    addi.d    $t0, $zero, %pc_lo12(x)
++    pcalau12i $t1, %pc_hi20(x)
++    lu32i.d   $t0, %pc64_lo20(x)
++    lu52i.d   $t0, $t0, %pc64_hi12(x)
+diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s
+new file mode 100644
+index 000000000000..991f8fbe974f
+--- /dev/null
++++ b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s
+@@ -0,0 +1,60 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/a.s -o %t/a.la32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.la64.o
++
++# RUN: ld.lld %t/a.la32.o -shared -T %t/a.t -o %t/a.la32.so
++# RUN: ld.lld %t/a.la64.o -shared -T %t/a.t -o %t/a.la64.so
++
++# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la32.so | FileCheck --check-prefixes=DIS,DIS32 %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la64.so | FileCheck --check-prefixes=DIS,DIS64 %s
++
++## PLT should be present in this case.
++# DIS:        Disassembly of section .plt:
++# DIS:        <.plt>:
++# DIS:        234020: pcaddu12i $t3, 510
++# DIS32-NEXT:         ld.w $t3, $t3, 84
++# DIS64-NEXT:         ld.d $t3, $t3, 184
++# DIS-NEXT:           jirl $t1, $t3, 0
++# DIS-NEXT:           nop
++
++# DIS:      Disassembly of section .text:
++# DIS:      <foo>:
++# DIS-NEXT: nop
++# DIS-NEXT: nop
++# DIS-NEXT: nop
++# DIS-NEXT: pcalau12i $t0, -510
++# DIS-NEXT: jirl $zero, $t0, 32
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/error.s -o %t/error.la32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/error.s -o %t/error.la64.o
++# RUN: not ld.lld %t/error.la32.o -shared -o %t/error.la32.so 2>&1 | FileCheck --check-prefix=ERR %s
++# RUN: not ld.lld %t/error.la64.o -shared -o %t/error.la64.so 2>&1 | FileCheck --check-prefix=ERR %s
++# ERR: error: relocation R_LARCH_PCALA_LO12 cannot be used against symbol 'bar'; recompile with -fPIC
++
++#--- a.t
++SECTIONS {
++ .plt   0x234000: { *(.plt) }
++ .text  0x432000: { *(.text) }
++}
++
++#--- a.s
++.p2align 12
++.global foo
++foo:
++## The nops are for pushing the relocs off page boundary, to better see the
++## page-aligned semantics in action.
++    nop
++    nop
++    nop
++    ## The offsets should be -510 (0x234 - 0x432) and 32 (PLT header size + 0)
++    ## respectively.
++    pcalau12i   $t0, %pc_hi20(bar)
++    jirl        $zero, $t0, %pc_lo12(bar)
++
++#--- error.s
++.global foo
++foo:
++    pcalau12i   $t0, %pc_hi20(bar)
++    ld.w        $t0, $t0, %pc_lo12(bar)
+diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl.s b/lld/test/ELF/loongarch-pcala-lo12-jirl.s
+new file mode 100644
+index 000000000000..1a03152aaa2a
+--- /dev/null
++++ b/lld/test/ELF/loongarch-pcala-lo12-jirl.s
+@@ -0,0 +1,42 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o
++
++# RUN: ld.lld %t.la32.o -o %t.la32
++# RUN: ld.lld %t.la64.o -o %t.la64
++# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck %s
++# CHECK:      pcalau12i $t0, -1
++# CHECK-NEXT: jirl  $ra, $t0, 564
++# CHECK-NEXT: pcalau12i $t0, 0
++# CHECK-NEXT: jirl  $zero, $t0, -1348
++
++## PLT shouldn't get generated in this case.
++# CHECK-NOT:  Disassembly of section .plt:
++
++.p2align 12
++.org 0x234
++.global foo
++foo:
++    li.w    $a0, 42
++    ret
++
++.org 0xabc
++.global bar
++bar:
++    li.w    $a7, 94
++    syscall 0
++
++.org 0x1000
++.global _start
++_start:
++## The nops are for pushing the relocs off page boundary, to better see the
++## page-aligned semantics in action.
++    nop
++    nop
++    nop
++    pcalau12i   $t0, %pc_hi20(foo)
++    jirl        $ra, $t0, %pc_lo12(foo)
++    pcalau12i   $t0, %pc_hi20(bar)
++    jirl        $zero, $t0, %pc_lo12(bar)
+diff --git a/lld/test/ELF/loongarch-plt.s b/lld/test/ELF/loongarch-plt.s
+new file mode 100644
+index 000000000000..82af53d39e73
+--- /dev/null
++++ b/lld/test/ELF/loongarch-plt.s
+@@ -0,0 +1,108 @@
++# REQUIRES: loongarch
++# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t1.s -o %t1.32.o
++# RUN: ld.lld -shared %t1.32.o -soname=t1.32.so -o %t1.32.so
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o
++# RUN: ld.lld %t.32.o %t1.32.so -z separate-code -o %t.32
++# RUN: llvm-readelf -S -s %t.32 | FileCheck --check-prefixes=SEC,NM %s
++# RUN: llvm-readobj -r %t.32 | FileCheck --check-prefix=RELOC32 %s
++# RUN: llvm-readelf -x .got.plt %t.32 | FileCheck --check-prefix=GOTPLT32 %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=DIS,DIS32 %s
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t1.s -o %t1.64.o
++# RUN: ld.lld -shared %t1.64.o -soname=t1.64.so -o %t1.64.so
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o
++# RUN: ld.lld %t.64.o %t1.64.so -z separate-code -o %t.64
++# RUN: llvm-readelf -S -s %t.64 | FileCheck --check-prefixes=SEC,NM %s
++# RUN: llvm-readobj -r %t.64 | FileCheck --check-prefix=RELOC64 %s
++# RUN: llvm-readelf -x .got.plt %t.64 | FileCheck --check-prefix=GOTPLT64 %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=DIS,DIS64 %s
++
++# SEC: .plt PROGBITS {{0*}}00020020
++
++## A canonical PLT has a non-zero st_value. bar and weak are called but their
++## addresses are not taken, so a canonical PLT is not necessary.
++# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar
++# NM: {{0*}}00000000 0 FUNC WEAK   DEFAULT UND weak
++
++## The .got.plt slots relocated by .rela.plt point to .plt
++## This is required by glibc.
++# RELOC32:      .rela.plt {
++# RELOC32-NEXT:   0x40070 R_LARCH_JUMP_SLOT bar 0x0
++# RELOC32-NEXT:   0x40074 R_LARCH_JUMP_SLOT weak 0x0
++# RELOC32-NEXT: }
++# GOTPLT32:      section '.got.plt'
++# GOTPLT32-NEXT: 0x00040068 00000000 00000000 20000200 20000200
++
++# RELOC64:      .rela.plt {
++# RELOC64-NEXT:   0x400E0 R_LARCH_JUMP_SLOT bar 0x0
++# RELOC64-NEXT:   0x400E8 R_LARCH_JUMP_SLOT weak 0x0
++# RELOC64-NEXT: }
++# GOTPLT64:      section '.got.plt'
++# GOTPLT64-NEXT: 0x000400d0 00000000 00000000 00000000 00000000
++# GOTPLT64-NEXT: 0x000400e0 20000200 00000000 20000200 00000000
++
++# DIS:      <_start>:
++## Direct call
++## foo - . = 0x20010-0x20000 = 16
++# DIS-NEXT:   20000: bl 16
++## bar@plt - . = 0x20040-0x20004 = 60
++# DIS-NEXT:   20004: bl 60
++## bar@plt - . = 0x20040-0x20008 = 56
++# DIS-NEXT:   20008: bl 56
++## weak@plt - . = 0x20050-0x2000c = 68
++# DIS-NEXT:   2000c: bl 68
++# DIS:      <foo>:
++# DIS-NEXT:   20010:
++
++# DIS:      Disassembly of section .plt:
++# DIS:      <.plt>:
++## 32-bit: .got.plt - .plt = 0x40068 - 0x20020 = 4096*32+72
++# DIS32-NEXT:   pcaddu12i $t2, 32
++# DIS32-NEXT:   sub.w     $t1, $t1, $t3
++# DIS32-NEXT:   ld.w      $t3, $t2, 72
++# DIS32-NEXT:   addi.w    $t1, $t1, -44
++# DIS32-NEXT:   addi.w    $t0, $t2, 72
++# DIS32-NEXT:   srli.w    $t1, $t1, 2
++# DIS32-NEXT:   ld.w      $t0, $t0, 4
++# DIS32-NEXT:   jr        $t3
++
++## 64-bit: .got.plt - .plt = 0x400d0 - 0x20020 = 4096*32+176
++# DIS64-NEXT:   pcaddu12i $t2, 32
++# DIS64-NEXT:   sub.d     $t1, $t1, $t3
++# DIS64-NEXT:   ld.d      $t3, $t2, 176
++# DIS64-NEXT:   addi.d    $t1, $t1, -44
++# DIS64-NEXT:   addi.d    $t0, $t2, 176
++# DIS64-NEXT:   srli.d    $t1, $t1, 1
++# DIS64-NEXT:   ld.d      $t0, $t0, 8
++# DIS64-NEXT:   jr        $t3
++
++## 32-bit: &.got.plt[bar]-. = 0x40070-0x20040 = 4096*32+48
++## 64-bit: &.got.plt[bar]-. = 0x400e0-0x20040 = 4096*32+160
++# DIS:        20040: pcaddu12i $t3, 32
++# DIS32-NEXT:        ld.w      $t3, $t3, 48
++# DIS64-NEXT:        ld.d      $t3, $t3, 160
++# DIS-NEXT:          jirl      $t1, $t3, 0
++# DIS-NEXT:          nop
++
++## 32-bit: &.got.plt[weak]-. = 0x40074-0x20050 = 4096*32+36
++## 64-bit: &.got.plt[weak]-. = 0x400e8-0x20050 = 4096*32+152
++# DIS:        20050: pcaddu12i $t3, 32
++# DIS32-NEXT:        ld.w      $t3, $t3, 36
++# DIS64-NEXT:        ld.d      $t3, $t3, 152
++# DIS-NEXT:          jirl      $t1, $t3, 0
++# DIS-NEXT:          nop
++
++.global _start, foo, bar
++.weak weak
++
++_start:
++    bl foo
++    bl bar
++    bl %plt(bar)
++    bl weak
++
++## foo is local and non-preemptible, no PLT is generated.
++foo:
++    ret
+diff --git a/lld/test/ELF/loongarch-reloc-pic.s b/lld/test/ELF/loongarch-reloc-pic.s
+new file mode 100644
+index 000000000000..b23ad55a2523
+--- /dev/null
++++ b/lld/test/ELF/loongarch-reloc-pic.s
+@@ -0,0 +1,44 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o
++# RUN: ld.lld -shared %t/32.o -o %t/32.so
++# RUN: llvm-nm %t/32.so | FileCheck --check-prefix=NM32 %s
++# RUN: llvm-readobj -r %t/32.so | FileCheck --check-prefix=RELOC32 %s
++# RUN: ld.lld -shared %t/64.o -o %t/64.so
++# RUN: llvm-nm %t/64.so | FileCheck --check-prefix=NM64 %s
++# RUN: llvm-readobj -r %t/64.so | FileCheck --check-prefix=RELOC64 %s
++
++## R_LARCH_32 and R_LARCH_64 are absolute relocation types.
++## In PIC mode, they create relative relocations if the symbol is non-preemptable.
++
++# NM32: 000301fc d b
++# NM64: 00030350 d b
++
++# RELOC32:      .rela.dyn {
++# RELOC32-NEXT:   0x301FC R_LARCH_RELATIVE - 0x301FC
++# RELOC32-NEXT:   0x301F8 R_LARCH_32 a 0
++# RELOC32-NEXT: }
++# RELOC64:      .rela.dyn {
++# RELOC64-NEXT:   0x30350 R_LARCH_RELATIVE - 0x30350
++# RELOC64-NEXT:   0x30348 R_LARCH_64 a 0
++# RELOC64-NEXT: }
++
++#--- 32.s
++.globl a, b
++.hidden b
++
++.data
++.long a
++b:
++.long b
++
++#--- 64.s
++.globl a, b
++.hidden b
++
++.data
++.quad a
++b:
++.quad b
+diff --git a/lld/test/ELF/loongarch-tls-gd-edge-case.s b/lld/test/ELF/loongarch-tls-gd-edge-case.s
+new file mode 100644
+index 000000000000..9f25f10c73b4
+--- /dev/null
++++ b/lld/test/ELF/loongarch-tls-gd-edge-case.s
+@@ -0,0 +1,46 @@
++# REQUIRES: loongarch
++
++## Edge case: when a TLS symbol is being accessed in both GD and IE manners,
++## correct reloc behavior should be preserved for both kinds of accesses.
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.la32.o
++# RUN: ld.lld %t.la32.o -shared -o %t.la32
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.la64.o
++# RUN: ld.lld %t.la64.o -shared -o %t.la64
++
++# RUN: llvm-readelf -Wr %t.la32 | FileCheck --check-prefix=LA32-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck --check-prefix=LA32 %s
++
++# RUN: llvm-readelf -Wr %t.la64 | FileCheck --check-prefix=LA64-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck --check-prefix=LA64 %s
++
++# LA32-REL-NOT:  R_LARCH_32
++# LA32-REL:      0002023c  00000206 R_LARCH_TLS_DTPMOD32   00000000   y + 0
++# LA32-REL-NEXT: 00020240  00000208 R_LARCH_TLS_DTPREL32   00000000   y + 0
++# LA32-REL-NEXT: 00020244  0000020a R_LARCH_TLS_TPREL32    00000000   y + 0
++
++# LA64-REL-NOT:  R_LARCH_64
++# LA64-REL:      00000000000203a0  0000000200000007 R_LARCH_TLS_DTPMOD64   0000000000000000 y + 0
++# LA64-REL-NEXT: 00000000000203a8  0000000200000009 R_LARCH_TLS_DTPREL64   0000000000000000 y + 0
++# LA64-REL-NEXT: 00000000000203b0  000000020000000b R_LARCH_TLS_TPREL64    0000000000000000 y + 0
++
++# LA32:      101d4: pcalau12i $a0, 16
++# LA32-NEXT:        ld.w $a0, $a0, 580
++# LA32-NEXT:        pcalau12i $a1, 16
++# LA32-NEXT:        addi.w $a1, $a1, 572
++
++# LA64:      102e0: pcalau12i $a0, 16
++# LA64-NEXT:        ld.d $a0, $a0, 944
++# LA64-NEXT:        pcalau12i $a1, 16
++# LA64-NEXT:        addi.d $a1, $a1, 928
++
++.global _start
++_start:
++la.tls.ie $a0, y  # should refer to the GOT entry relocated by the R_LARCH_TLS_TPRELnn record
++la.tls.gd $a1, y  # should refer to the GOT entry relocated by the R_LARCH_TLS_DTPMODnn record
++
++.section .tbss,"awT",@nobits
++.global y
++y:
++.word 0
++.size y, 4
+diff --git a/lld/test/ELF/loongarch-tls-gd.s b/lld/test/ELF/loongarch-tls-gd.s
+new file mode 100644
+index 000000000000..2aecb44c17a3
+--- /dev/null
++++ b/lld/test/ELF/loongarch-tls-gd.s
+@@ -0,0 +1,136 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not
++## relaxed, dynamic relocations can be omitted for GD->LE relaxation.
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/bc.s -o %t/bc.32.o
++# RUN: ld.lld -shared -soname=bc.so %t/bc.32.o -o %t/bc.32.so
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.64.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/bc.s -o %t/bc.64.o
++# RUN: ld.lld -shared -soname=bc.so %t/bc.64.o -o %t/bc.64.so
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
++
++## LA32 GD
++# RUN: ld.lld -shared %t/a.32.o %t/bc.32.o -o %t/gd.32.so
++# RUN: llvm-readobj -r %t/gd.32.so | FileCheck --check-prefix=GD32-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.so | FileCheck --check-prefix=GD32 %s
++
++## LA32 GD -> LE
++# RUN: ld.lld %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le.32
++# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s
++# RUN: ld.lld -pie %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le-pie.32
++# RUN: llvm-readelf -r %t/le-pie.32 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le-pie.32 | FileCheck --check-prefix=LE32-GOT %s
++
++## LA32 GD -> IE
++# RUN: ld.lld %t/a.32.o %t/bc.32.so %t/tga.32.o -o %t/ie.32
++# RUN: llvm-readobj -r %t/ie.32 | FileCheck --check-prefix=IE32-REL %s
++# RUN: llvm-readelf -x .got %t/ie.32 | FileCheck --check-prefix=IE32-GOT %s
++
++## LA64 GD
++# RUN: ld.lld -shared %t/a.64.o %t/bc.64.o -o %t/gd.64.so
++# RUN: llvm-readobj -r %t/gd.64.so | FileCheck --check-prefix=GD64-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.so | FileCheck --check-prefix=GD64 %s
++
++## LA64 GD -> LE
++# RUN: ld.lld %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le.64
++# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s
++# RUN: ld.lld -pie %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le-pie.64
++# RUN: llvm-readelf -r %t/le-pie.64 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le-pie.64 | FileCheck --check-prefix=LE64-GOT %s
++
++## LA64 GD -> IE
++# RUN: ld.lld %t/a.64.o %t/bc.64.so %t/tga.64.o -o %t/ie.64
++# RUN: llvm-readobj -r %t/ie.64 | FileCheck --check-prefix=IE64-REL %s
++# RUN: llvm-readelf -x .got %t/ie.64 | FileCheck --check-prefix=IE64-GOT %s
++
++# GD32-REL:      .rela.dyn {
++# GD32-REL-NEXT:   0x20310 R_LARCH_TLS_DTPMOD32 a 0x0
++# GD32-REL-NEXT:   0x20314 R_LARCH_TLS_DTPREL32 a 0x0
++# GD32-REL-NEXT:   0x20318 R_LARCH_TLS_DTPMOD32 b 0x0
++# GD32-REL-NEXT:   0x2031C R_LARCH_TLS_DTPREL32 b 0x0
++# GD32-REL-NEXT: }
++
++## &DTPMOD(a) - . = 0x20310 - 0x10250: 0x10 pages, page offset 0x310
++# GD32:      10250: pcalau12i $a0, 16
++# GD32-NEXT:        addi.w $a0, $a0, 784
++# GD32-NEXT:        bl 56
++
++## &DTPMOD(b) - . = 0x20318 - 0x1025c: 0x10 pages, page offset 0x318
++# GD32:      1025c: pcalau12i $a0, 16
++# GD32-NEXT:        addi.w $a0, $a0, 792
++# GD32-NEXT:        bl 44
++
++# GD64-REL:      .rela.dyn {
++# GD64-REL-NEXT:   0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0
++# GD64-REL-NEXT:   0x204C8 R_LARCH_TLS_DTPREL64 a 0x0
++# GD64-REL-NEXT:   0x204D0 R_LARCH_TLS_DTPMOD64 b 0x0
++# GD64-REL-NEXT:   0x204D8 R_LARCH_TLS_DTPREL64 b 0x0
++# GD64-REL-NEXT: }
++
++## &DTPMOD(a) - . = 0x204c0 - 0x10398: 0x10 pages, page offset 0x4c0
++# GD64:      10398: pcalau12i $a0, 16
++# GD64-NEXT:        addi.d $a0, $a0, 1216
++# GD64-NEXT:        bl 48
++
++## &DTPMOD(b) - . = 0x204d0 - 0x103a4: 0x10 pages, page offset 0x4d0
++# GD64:      103a4: pcalau12i $a0, 16
++# GD64-NEXT:        addi.d $a0, $a0, 1232
++# GD64-NEXT:        bl 36
++
++# NOREL: no relocations
++
++## .got contains pre-populated values: [a@dtpmod, a@dtprel, b@dtpmod, b@dtprel]
++## a@dtprel = st_value(a) = 0x8
++## b@dtprel = st_value(b) = 0xc
++# LE32-GOT: section '.got':
++# LE32-GOT-NEXT: 0x[[#%x,A:]] 01000000 08000000 01000000 0c000000
++# LE64-GOT: section '.got':
++# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 08000000 00000000
++# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 0c000000 00000000
++
++## a is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants.
++## b is external - DTPMOD/DTPREL dynamic relocations are required.
++# IE32-REL:      .rela.dyn {
++# IE32-REL-NEXT:   0x30228 R_LARCH_TLS_DTPMOD32 b 0x0
++# IE32-REL-NEXT:   0x3022C R_LARCH_TLS_DTPREL32 b 0x0
++# IE32-REL-NEXT: }
++# IE32-GOT:      section '.got':
++# IE32-GOT-NEXT: 0x00030220 01000000 08000000 00000000 00000000
++
++# IE64-REL:      .rela.dyn {
++# IE64-REL-NEXT:   0x30388 R_LARCH_TLS_DTPMOD64 b 0x0
++# IE64-REL-NEXT:   0x30390 R_LARCH_TLS_DTPREL64 b 0x0
++# IE64-REL-NEXT: }
++# IE64-GOT:      section '.got':
++# IE64-GOT-NEXT: 0x00030378 01000000 00000000 08000000 00000000
++# IE64-GOT-NEXT: 0x00030388 00000000 00000000 00000000 00000000
++
++#--- a.s
++la.tls.gd $a0, a
++bl %plt(__tls_get_addr)
++
++la.tls.gd $a0, b
++bl %plt(__tls_get_addr)
++
++.section .tbss,"awT",@nobits
++.globl a
++.zero 8
++a:
++.zero 4
++
++#--- bc.s
++.section .tbss,"awT",@nobits
++.globl b, c
++b:
++.zero 4
++c:
++
++#--- tga.s
++.globl __tls_get_addr
++__tls_get_addr:
+diff --git a/lld/test/ELF/loongarch-tls-ie.s b/lld/test/ELF/loongarch-tls-ie.s
+new file mode 100644
+index 000000000000..78c207991b4e
+--- /dev/null
++++ b/lld/test/ELF/loongarch-tls-ie.s
+@@ -0,0 +1,114 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o
++
++## LA32 IE
++# RUN: ld.lld -shared %t/32.o -o %t/32.so
++# RUN: llvm-readobj -r -d %t/32.so | FileCheck --check-prefix=IE32-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/32.so | FileCheck --check-prefixes=IE32 %s
++
++## LA32 IE -> LE
++# RUN: ld.lld %t/32.o -o %t/32
++# RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s
++
++## LA64 IE
++# RUN: ld.lld -shared %t/64.o -o %t/64.so
++# RUN: llvm-readobj -r -d %t/64.so | FileCheck --check-prefix=IE64-REL %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/64.so | FileCheck --check-prefixes=IE64 %s
++
++## LA64 IE -> LE
++# RUN: ld.lld %t/64.o -o %t/64
++# RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s
++
++# IE32-REL:      FLAGS STATIC_TLS
++# IE32-REL:      .rela.dyn {
++# IE32-REL-NEXT:   0x20218 R_LARCH_TLS_TPREL32 - 0xC
++# IE32-REL-NEXT:   0x20214 R_LARCH_TLS_TPREL32 a 0x0
++# IE32-REL-NEXT: }
++
++# IE64-REL:      FLAGS STATIC_TLS
++# IE64-REL:      .rela.dyn {
++# IE64-REL-NEXT:   0x20370 R_LARCH_TLS_TPREL64 - 0xC
++# IE64-REL-NEXT:   0x20368 R_LARCH_TLS_TPREL64 a 0x0
++# IE64-REL-NEXT: }
++
++## LA32:
++## &.got[0] - . = 0x20214 - 0x101a4: 0x10 pages, page offset 0x214
++## &.got[1] - . = 0x20218 - 0x101b0: 0x10 pages, page offset 0x218
++# IE32:      101a4: pcalau12i $a4, 16
++# IE32-NEXT:        ld.w $a4, $a4, 532
++# IE32-NEXT:        add.w $a4, $a4, $tp
++# IE32-NEXT: 101b0: pcalau12i $a5, 16
++# IE32-NEXT:        ld.w $a5, $a5, 536
++# IE32-NEXT:        add.w $a5, $a5, $tp
++
++## LA64:
++## &.got[0] - . = 0x20368 - 0x102a0: 0x10 pages, page offset 0x368
++## &.got[1] - . = 0x20370 - 0x102ac: 0x10 pages, page offset 0x370
++# IE64:      102a0: pcalau12i $a4, 16
++# IE64-NEXT:        ld.d $a4, $a4, 872
++# IE64-NEXT:        add.d $a4, $a4, $tp
++# IE64-NEXT: 102ac: pcalau12i $a5, 16
++# IE64-NEXT:        ld.d $a5, $a5, 880
++# IE64-NEXT:        add.d $a5, $a5, $tp
++
++# NOREL: no relocations
++
++# a@tprel = st_value(a) = 0x8
++# b@tprel = st_value(a) = 0xc
++# LE32-GOT: section '.got':
++# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000
++# LE64-GOT: section '.got':
++# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000
++
++## LA32:
++## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c
++## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130
++# LE32:      20114: pcalau12i $a4, 16
++# LE32-NEXT:        ld.w $a4, $a4, 300
++# LE32-NEXT:        add.w $a4, $a4, $tp
++# LE32-NEXT: 20120: pcalau12i $a5, 16
++# LE32-NEXT:        ld.w $a5, $a5, 304
++# LE32-NEXT:        add.w $a5, $a5, $tp
++
++## LA64:
++## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0
++## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8
++# LE64:      201c8: pcalau12i $a4, 16
++# LE64-NEXT:        ld.d $a4, $a4, 480
++# LE64-NEXT:        add.d $a4, $a4, $tp
++# LE64-NEXT: 201d4: pcalau12i $a5, 16
++# LE64-NEXT:        ld.d $a5, $a5, 488
++# LE64-NEXT:        add.d $a5, $a5, $tp
++
++#--- 32.s
++la.tls.ie $a4, a
++add.w $a4, $a4, $tp
++la.tls.ie $a5, b
++add.w $a5, $a5, $tp
++
++.section .tbss,"awT",@nobits
++.globl a
++.zero 8
++a:
++.zero 4
++b:
++
++#--- 64.s
++la.tls.ie $a4, a
++add.d $a4, $a4, $tp
++la.tls.ie $a5, b
++add.d $a5, $a5, $tp
++
++.section .tbss,"awT",@nobits
++.globl a
++.zero 8
++a:
++.zero 4
++b:
+diff --git a/lld/test/ELF/loongarch-tls-ld.s b/lld/test/ELF/loongarch-tls-ld.s
+new file mode 100644
+index 000000000000..a5be3ad905b7
+--- /dev/null
++++ b/lld/test/ELF/loongarch-tls-ld.s
+@@ -0,0 +1,89 @@
++# REQUIRES: loongarch
++# RUN: rm -rf %t && split-file %s %t
++
++## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not
++## relaxed, dynamic relocations can be omitted for LD->LE relaxation.
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent %t/a.s -o %t/a.32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent %t/a.s -o %t/a.64.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
++
++## LA32 LD
++# RUN: ld.lld -shared %t/a.32.o -o %t/ld.32.so
++# RUN: llvm-readobj -r %t/ld.32.so | FileCheck --check-prefix=LD32-REL %s
++# RUN: llvm-readelf -x .got %t/ld.32.so | FileCheck --check-prefix=LD32-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.so | FileCheck --check-prefixes=LD32 %s
++
++## LA32 LD -> LE
++# RUN: ld.lld %t/a.32.o %t/tga.32.o -o %t/le.32
++# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/le.32 | FileCheck --check-prefixes=LE32 %s
++
++## LA64 LD
++# RUN: ld.lld -shared %t/a.64.o -o %t/ld.64.so
++# RUN: llvm-readobj -r %t/ld.64.so | FileCheck --check-prefix=LD64-REL %s
++# RUN: llvm-readelf -x .got %t/ld.64.so | FileCheck --check-prefix=LD64-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.64.so | FileCheck --check-prefixes=LD64 %s
++
++## LA64 LD -> LE
++# RUN: ld.lld %t/a.64.o %t/tga.64.o -o %t/le.64
++# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s
++# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t/le.64 | FileCheck --check-prefixes=LE64 %s
++
++## a@dtprel = st_value(a) = 0 is a link-time constant.
++# LD32-REL:      .rela.dyn {
++# LD32-REL-NEXT:   0x20280 R_LARCH_TLS_DTPMOD32 - 0x0
++# LD32-REL-NEXT: }
++# LD32-GOT:      section '.got':
++# LD32-GOT-NEXT: 0x00020280 00000000 00000000
++
++# LD64-REL:      .rela.dyn {
++# LD64-REL-NEXT:   0x20400 R_LARCH_TLS_DTPMOD64 - 0x0
++# LD64-REL-NEXT: }
++# LD64-GOT:      section '.got':
++# LD64-GOT-NEXT: 0x00020400 00000000 00000000 00000000 00000000
++
++## LA32: &DTPMOD(a) - . = 0x20280 - 0x101cc: 0x10 pages, page offset 0x280
++# LD32:      101cc: pcalau12i $a0, 16
++# LD32-NEXT:        addi.w $a0, $a0, 640
++# LD32-NEXT:        bl 44
++
++## LA64: &DTPMOD(a) - . = 0x20400 - 0x102e0: 0x10 pages, page offset 0x400
++# LD64:      102e0: pcalau12i $a0, 16
++# LD64-NEXT:        addi.d $a0, $a0, 1024
++# LD64-NEXT:        bl 40
++
++# NOREL: no relocations
++
++## a is local - its DTPMOD/DTPREL slots are link-time constants.
++## a@dtpmod = 1 (main module)
++# LE32-GOT: section '.got':
++# LE32-GOT-NEXT: 0x00030120 01000000 00000000
++
++# LE64-GOT: section '.got':
++# LE64-GOT-NEXT: 0x000301d8 01000000 00000000 00000000 00000000
++
++## LA32: DTPMOD(.LANCHOR0) - . = 0x30120 - 0x20114: 0x10 pages, page offset 0x120
++# LE32:      20114: pcalau12i $a0, 16
++# LE32-NEXT:        addi.w $a0, $a0, 288
++# LE32-NEXT:        bl 4
++
++## LA64: DTPMOD(.LANCHOR0) - . = 0x301d8 - 0x201c8: 0x10 pages, page offset 0x1d8
++# LE64:      201c8: pcalau12i $a0, 16
++# LE64-NEXT:        addi.d $a0, $a0, 472
++# LE64-NEXT:        bl 4
++
++#--- a.s
++la.tls.ld $a0, .LANCHOR0
++bl %plt(__tls_get_addr)
++
++.section .tbss,"awT",@nobits
++.set .LANCHOR0, . + 0
++.zero 8
++
++#--- tga.s
++.globl __tls_get_addr
++__tls_get_addr:
+diff --git a/lld/test/ELF/loongarch-tls-le.s b/lld/test/ELF/loongarch-tls-le.s
+new file mode 100644
+index 000000000000..a20d7d83bae3
+--- /dev/null
++++ b/lld/test/ELF/loongarch-tls-le.s
+@@ -0,0 +1,42 @@
++# REQUIRES: loongarch
++
++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o
++
++# RUN: ld.lld %t.32.o -o %t.32
++# RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s
++# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
++
++# RUN: ld.lld %t.64.o -o %t.64
++# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
++
++# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
++
++# ERR: error: relocation R_LARCH_TLS_LE_HI20 against .LANCHOR0 cannot be used with -shared
++# ERR: error: relocation R_LARCH_TLS_LE_LO12 against .LANCHOR0 cannot be used with -shared
++# ERR: error: relocation R_LARCH_TLS_LE_HI20 against a cannot be used with -shared
++# ERR: error: relocation R_LARCH_TLS_LE_LO12 against a cannot be used with -shared
++
++# NM: {{0*}}00000008 b .LANCHOR0
++# NM: {{0*}}00000800 B a
++
++## .LANCHOR0@tprel = 8
++## a@tprel = 0x800
++# LE:      lu12i.w $a0, 0
++# LE-NEXT: ori $a0, $a0, 8
++# LE-NEXT: lu12i.w $a1, 0
++# LE-NEXT: ori $a1, $a1, 2048
++# LE-EMPTY:
++
++.text
++_start:
++la.tls.le $a0, .LANCHOR0
++la.tls.le $a1, a
++
++.section .tbss,"awT",@nobits
++.space 8
++.LANCHOR0:
++.space 0x800-8
++.globl a
++a:
++.zero 4
+diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py
+index 96a1d652573f..4b84cfd93dc5 100644
+--- a/lld/test/lit.cfg.py
++++ b/lld/test/lit.cfg.py
+@@ -63,20 +63,28 @@ if platform.system() not in ['Windows']:
+     config.available_features.add('demangler')
+ 
+ llvm_config.feature_config(
+-    [('--targets-built', {'AArch64': 'aarch64',
+-                          'AMDGPU': 'amdgpu',
+-                          'ARM': 'arm',
+-                          'AVR': 'avr',
+-                          'Hexagon': 'hexagon',
+-                          'Mips': 'mips',
+-                          'MSP430': 'msp430',
+-                          'PowerPC': 'ppc',
+-                          'RISCV': 'riscv',
+-                          'Sparc': 'sparc',
+-                          'WebAssembly': 'wasm',
+-                          'X86': 'x86'}),
+-     ('--assertion-mode', {'ON': 'asserts'}),
+-     ])
++    [
++        (
++            "--targets-built",
++            {
++                "AArch64": "aarch64",
++                "AMDGPU": "amdgpu",
++                "ARM": "arm",
++                "AVR": "avr",
++                "Hexagon": "hexagon",
++                "LoongArch": "loongarch",
++                "Mips": "mips",
++                "MSP430": "msp430",
++                "PowerPC": "ppc",
++                "RISCV": "riscv",
++                "Sparc": "sparc",
++                "WebAssembly": "wasm",
++                "X86": "x86",
++            },
++        ),
++        ("--assertion-mode", {"ON": "asserts"}),
++    ]
++)
+ 
+ # Set a fake constant version so that we get consistent output.
+ config.environment['LLD_VERSION'] = 'LLD 1.0'
diff --git a/llvm/PKGBUILD b/llvm/PKGBUILD
index e53aec9141..d4be161983 100644
--- a/llvm/PKGBUILD
+++ b/llvm/PKGBUILD
@@ -14,13 +14,17 @@ options=('staticlibs' '!lto') # https://github.com/llvm/llvm-project/issues/5774
 _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver
 source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig}
         $_source_base/cmake-$pkgver.src.tar.xz{,.sig}
-        $_source_base/third-party-$pkgver.src.tar.xz{,.sig})
+        $_source_base/third-party-$pkgver.src.tar.xz{,.sig}
+        llvm-newreloc-la64.patch
+        RuntimeDyld-MCJIT-Add-LoongArch-support.patch)
 sha256sums=('e91db44d1b3bb1c33fcea9a7d1f2423b883eaa9163d3d56ca2aa6d2f0711bc29'
             'SKIP'
             '39d342a4161095d2f28fb1253e4585978ac50521117da666e2b1f6f28b62f514'
             'SKIP'
             '15f5b9aeeba938530af977d5f9205612737a091a7f0f6c8075df8723b7713f70'
-            'SKIP')
+            'SKIP'
+            'f26ad05b93f5b7918fcf4209e892d135029e58ca0f0141a50cb67bfec7e80061'
+            '7c7de135adc059b5877270c38e050e02c516c6cb04cd4b10f1111d956e0c82e0')
 validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A'  # Tom Stellard <tstellar@redhat.com>
               'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta <tobias@hieta.se>
 
@@ -55,6 +59,8 @@ _get_distribution_components() {
 prepare() {
   rename -v -- "-$pkgver.src" '' {cmake,third-party}-$pkgver.src
   cd llvm-$pkgver.src
+  patch -Np1 -i ../RuntimeDyld-MCJIT-Add-LoongArch-support.patch
+  patch -Np2 -i ../llvm-newreloc-la64.patch
   mkdir build
 }
 
diff --git a/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch b/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch
new file mode 100644
index 0000000000..6b03eebd59
--- /dev/null
+++ b/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch
@@ -0,0 +1,328 @@
+From cb5f5e13300712b5dde6cfcafab9f7f465096c18 Mon Sep 17 00:00:00 2001
+From: wanglei <wanglei@loongson.cn>
+Date: Tue, 30 May 2023 19:16:18 +0800
+Subject: [PATCH] [RuntimeDyld][MCJIT] Add LoongArch support
+
+---
+ lib/ExecutionEngine/Orc/LLJIT.cpp             |   3 +-
+ .../RuntimeDyld/RuntimeDyld.cpp               |  12 ++
+ .../RuntimeDyld/RuntimeDyldELF.cpp            | 198 ++++++++++++++++++
+ .../RuntimeDyld/RuntimeDyldELF.h              |  14 ++
+ 4 files changed, 226 insertions(+), 1 deletion(-)
+
+diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp
+index bc84988e..e06dea9d 100644
+--- a/lib/ExecutionEngine/Orc/LLJIT.cpp
++++ b/lib/ExecutionEngine/Orc/LLJIT.cpp
+@@ -802,7 +802,8 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
+ 
+   if (S.JTMB->getTargetTriple().isOSBinFormatELF() &&
+       (S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64 ||
+-       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le))
++       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le ||
++       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::loongarch64))
+     Layer->setAutoClaimResponsibilityForObjectSymbols(true);
+ 
+   // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
+diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+index a9aaff42..b154ea28 100644
+--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+@@ -987,6 +987,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
+     // and stubs for branches Thumb - ARM and ARM - Thumb.
+     writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4]
+     return Addr + 4;
++  } else if (Arch == Triple::loongarch64) {
++    // lu12i.w  $t0, %abs_hi20(addr)
++    // ori      $t0, $t0, %abs_lo12(addr)
++    // lu32i.d  $t0, %abs64_lo20(addr)
++    // lu52i.d  $t0, $t0, %abs64_lo12(addr)
++    // jr       $t0
++    writeBytesUnaligned(0x1400000c, Addr, 4);
++    writeBytesUnaligned(0x0380018c, Addr + 4, 4);
++    writeBytesUnaligned(0x1600000c, Addr + 8, 4);
++    writeBytesUnaligned(0x0300018c, Addr + 12, 4);
++    writeBytesUnaligned(0x4c000180, Addr + 16, 4);
++    return Addr;
+   } else if (IsMipsO32ABI || IsMipsN32ABI) {
+     // 0:   3c190000        lui     t9,%hi(addr).
+     // 4:   27390000        addiu   t9,t9,%lo(addr).
+diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+index 2fe49fef..f85452be 100644
+--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+@@ -641,6 +641,102 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
+   }
+ }
+ 
++// Returns extract bits Val[Hi:Lo].
++static inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) {
++  return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo;
++}
++
++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section,
++                                                  uint64_t Offset,
++                                                  uint64_t Value, uint32_t Type,
++                                                  int64_t Addend) {
++  uint32_t *TargetPtr =
++      reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
++  uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
++
++  LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x"
++                    << format("%llx", Section.getAddressWithOffset(Offset))
++                    << " FinalAddress: 0x" << format("%llx", FinalAddress)
++                    << " Value: 0x" << format("%llx", Value) << " Type: 0x"
++                    << format("%x", Type) << " Addend: 0x"
++                    << format("%llx", Addend) << "\n");
++
++  switch (Type) {
++  default:
++    report_fatal_error("Relocation type not implemented yet!");
++    break;
++  case ELF::R_LARCH_32:
++    *(support::little32_t *)TargetPtr = static_cast<uint32_t>(Value + Addend);
++    break;
++  case ELF::R_LARCH_64:
++    *(support::little64_t *)TargetPtr = Value + Addend;
++    break;
++  case ELF::R_LARCH_32_PCREL:
++    *(support::little32_t *)TargetPtr =
++        static_cast<uint32_t>(Value - FinalAddress + Addend);
++    break;
++  case ELF::R_LARCH_B26: {
++    uint64_t BranchImm = Value - FinalAddress + Addend;
++    assert(isInt<28>(BranchImm));
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm = static_cast<uint32_t>(BranchImm >> 2);
++    uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10;
++    uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16);
++    *(support::little32_t *)TargetPtr = RawInstr | Imm15_0 | Imm25_16;
++    break;
++  }
++  case ELF::R_LARCH_GOT_PC_HI20:
++  case ELF::R_LARCH_PCALA_HI20: {
++    uint64_t Target = Value + Addend;
++    uint64_t TargetPage =
++        (Target + (Target & 0x800)) & ~static_cast<uint64_t>(0xfff);
++    uint64_t PCPage = FinalAddress & ~static_cast<uint64_t>(0xfff);
++    int64_t PageDelta = TargetPage - PCPage;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm31_12;
++    break;
++  }
++  case ELF::R_LARCH_GOT_PC_LO12:
++  case ELF::R_LARCH_PCALA_LO12: {
++    // TODO: code-model=medium
++    uint64_t TargetOffset = (Value + Addend) & 0xfff;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm11_0 = TargetOffset << 10;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm11_0;
++    break;
++  }
++  case ELF::R_LARCH_ABS_HI20: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm31_12;
++    break;
++  }
++  case ELF::R_LARCH_ABS_LO12: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm11_0;
++    break;
++  }
++  case ELF::R_LARCH_ABS64_LO20: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm51_32 = extractBits(Target >> 32, /*Hi=*/19, /*Lo=*/0) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm51_32;
++    break;
++  }
++  case ELF::R_LARCH_ABS64_HI12: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm63_52 = extractBits(Target >> 32, /*Hi=*/31, /*Lo=*/20) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm63_52;
++    break;
++  }
++  }
++}
++
+ void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
+   if (Arch == Triple::UnknownArch ||
+       !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
+@@ -1057,6 +1153,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
+     resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type,
+                          (uint32_t)(Addend & 0xffffffffL));
+     break;
++  case Triple::loongarch64:
++    resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend);
++    break;
+   case Triple::ppc: // Fall through.
+   case Triple::ppcle:
+     resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
+@@ -1209,6 +1308,81 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID,
+   }
+ }
+ 
++bool RuntimeDyldELF::resolveLoongArch64ShortBranch(
++    unsigned SectionID, relocation_iterator RelI,
++    const RelocationValueRef &Value) {
++  uint64_t Address;
++  if (Value.SymbolName) {
++    auto Loc = GlobalSymbolTable.find(Value.SymbolName);
++    // Don't create direct branch for external symbols.
++    if (Loc == GlobalSymbolTable.end())
++      return false;
++    const auto &SymInfo = Loc->second;
++    Address =
++        uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset(
++            SymInfo.getOffset()));
++  } else {
++    Address = uint64_t(Sections[Value.SectionID].getLoadAddress());
++  }
++  uint64_t Offset = RelI->getOffset();
++  uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset);
++  if (!isInt<28>(Address + Value.Addend - SourceAddress))
++    return false;
++  resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(),
++                    Value.Addend);
++  return true;
++}
++
++void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID,
++                                              const RelocationValueRef &Value,
++                                              relocation_iterator RelI,
++                                              StubMap &Stubs) {
++  LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n");
++  SectionEntry &Section = Sections[SectionID];
++  uint64_t Offset = RelI->getOffset();
++  unsigned RelType = RelI->getType();
++  // Look for an existing stub.
++  StubMap::const_iterator i = Stubs.find(Value);
++  if (i != Stubs.end()) {
++    resolveRelocation(Section, Offset,
++                      (uint64_t)Section.getAddressWithOffset(i->second),
++                      RelType, 0);
++    LLVM_DEBUG(dbgs() << " Stub function found\n");
++  } else if (!resolveLoongArch64ShortBranch(SectionID, RelI, Value)) {
++    // Create a new stub function.
++    LLVM_DEBUG(dbgs() << " Create a new stub function\n");
++    Stubs[Value] = Section.getStubOffset();
++    uint8_t *StubTargetAddr = createStubFunction(
++        Section.getAddressWithOffset(Section.getStubOffset()));
++    RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(),
++                            ELF::R_LARCH_ABS_HI20, Value.Addend);
++    RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4,
++                        ELF::R_LARCH_ABS_LO12, Value.Addend);
++    RelocationEntry LU32I_D(SectionID,
++                            StubTargetAddr - Section.getAddress() + 8,
++                            ELF::R_LARCH_ABS64_LO20, Value.Addend);
++    RelocationEntry LU52I_D(SectionID,
++                            StubTargetAddr - Section.getAddress() + 12,
++                            ELF::R_LARCH_ABS64_HI12, Value.Addend);
++    if (Value.SymbolName) {
++      addRelocationForSymbol(LU12I_W, Value.SymbolName);
++      addRelocationForSymbol(ORI, Value.SymbolName);
++      addRelocationForSymbol(LU32I_D, Value.SymbolName);
++      addRelocationForSymbol(LU52I_D, Value.SymbolName);
++    } else {
++      addRelocationForSection(LU12I_W, Value.SectionID);
++      addRelocationForSection(ORI, Value.SectionID);
++      addRelocationForSection(LU32I_D, Value.SectionID);
++      addRelocationForSection(LU52I_D, Value.SectionID);
++    }
++    resolveRelocation(Section, Offset,
++                      reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
++                          Section.getStubOffset())),
++                      RelType, 0);
++    Section.advanceStubOffset(getMaxStubSize());
++  }
++}
++
+ Expected<relocation_iterator>
+ RuntimeDyldELF::processRelocationRef(
+     unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
+@@ -1369,6 +1543,25 @@ RuntimeDyldELF::processRelocationRef(
+       }
+       processSimpleRelocation(SectionID, Offset, RelType, Value);
+     }
++  } else if (Arch == Triple::loongarch64) {
++    if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) {
++      resolveLoongArch64Branch(SectionID, Value, RelI, Stubs);
++    } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 ||
++               RelType == ELF::R_LARCH_GOT_PC_LO12) {
++      // FIXME: This will create redundant got entry.
++      uint64_t GOTOffset = allocateGOTEntries(1);
++      // Create relocation for newly created GOT entry.
++      RelocationEntry RE =
++          computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_LARCH_64);
++      if (Value.SymbolName)
++        addRelocationForSymbol(RE, Value.SymbolName);
++      else
++        addRelocationForSection(RE, Value.SectionID);
++      resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
++                                 RelType);
++    } else {
++      processSimpleRelocation(SectionID, Offset, RelType, Value);
++    }
+   } else if (IsMipsO32ABI) {
+     uint8_t *Placeholder = reinterpret_cast<uint8_t *>(
+         computePlaceholderAddress(SectionID, Offset));
+@@ -2214,6 +2407,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
+   case Triple::x86_64:
+   case Triple::aarch64:
+   case Triple::aarch64_be:
++  case Triple::loongarch64:
+   case Triple::ppc64:
+   case Triple::ppc64le:
+   case Triple::systemz:
+@@ -2525,6 +2719,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const {
+     return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE ||
+            RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ 
++  if (Arch == Triple::loongarch64)
++    return RelTy == ELF::R_LARCH_GOT_PC_HI20 ||
++           RelTy == ELF::R_LARCH_GOT_PC_LO12;
++
+   if (Arch == Triple::x86_64)
+     return RelTy == ELF::R_X86_64_GOTPCREL ||
+            RelTy == ELF::R_X86_64_GOTPCRELX ||
+diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+index dfdd98cb..2c930219 100644
+--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+@@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+   void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
+                             uint32_t Value, uint32_t Type, int32_t Addend);
+ 
++  void resolveLoongArch64Relocation(const SectionEntry &Section,
++                                    uint64_t Offset, uint64_t Value,
++                                    uint32_t Type, int64_t Addend);
++
++  bool resolveLoongArch64ShortBranch(unsigned SectionID,
++                                     relocation_iterator RelI,
++                                     const RelocationValueRef &Value);
++
++  void resolveLoongArch64Branch(unsigned SectionID,
++                                const RelocationValueRef &Value,
++                                relocation_iterator RelI, StubMap &Stubs);
++
+   void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
+                               uint64_t Value, uint32_t Type, int64_t Addend);
+ 
+@@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+       return 16;
+     else if (IsMipsN64ABI)
+       return 32;
++    if (Arch == Triple::loongarch64)
++      return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr
+     else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
+       return 44;
+     else if (Arch == Triple::x86_64)
+-- 
+2.40.0
+
diff --git a/llvm/llvm-newreloc-la64.patch b/llvm/llvm-newreloc-la64.patch
new file mode 100644
index 0000000000..60a5e8f07e
--- /dev/null
+++ b/llvm/llvm-newreloc-la64.patch
@@ -0,0 +1,132 @@
+diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+index 67dbd020140b..02bce3c71712 100644
+--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20,   97)
+ ELF_RELOC(R_LARCH_TLS_GD_HI20,      98)
+ ELF_RELOC(R_LARCH_32_PCREL,         99)
+ ELF_RELOC(R_LARCH_RELAX,            100)
++
++// Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the
++// v2.10 LoongArch ABI specs.
++//
++// Spec addition: https://github.com/loongson/la-abi-specs/pull/1
++// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138
++ELF_RELOC(R_LARCH_DELETE,      101)
++ELF_RELOC(R_LARCH_ALIGN,       102)
++ELF_RELOC(R_LARCH_PCREL20_S2,  103)
++ELF_RELOC(R_LARCH_CFA,         104)
++ELF_RELOC(R_LARCH_ADD6,        105)
++ELF_RELOC(R_LARCH_SUB6,        106)
++ELF_RELOC(R_LARCH_ADD_ULEB128, 107)
++ELF_RELOC(R_LARCH_SUB_ULEB128, 108)
++ELF_RELOC(R_LARCH_64_PCREL,    109)
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+index 57330dd31f71..a6b9c0652639 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
+   case FK_Data_4:
+     return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32;
+   case FK_Data_8:
+-    return ELF::R_LARCH_64;
++    return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64;
+   case LoongArch::fixup_loongarch_b16:
+     return ELF::R_LARCH_B16;
+   case LoongArch::fixup_loongarch_b21:
+diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s
+new file mode 100644
+index 000000000000..0179e1027af8
+--- /dev/null
++++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s
+@@ -0,0 +1,28 @@
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t
++# RUN: llvm-readobj -r %t | FileCheck %s
++
++## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations.
++
++## TODO: 1- or 2-byte data relocations are not supported for now.
++
++# CHECK:      Relocations [
++# CHECK-NEXT:   Section ({{.*}}) .rela.data {
++# CHECK-NEXT:     0x0 R_LARCH_64_PCREL sx 0x0
++# CHECK-NEXT:     0x8 R_LARCH_64_PCREL sy 0x0
++# CHECK-NEXT:     0x10 R_LARCH_32_PCREL sx 0x0
++# CHECK-NEXT:     0x14 R_LARCH_32_PCREL sy 0x0
++# CHECK-NEXT:   }
++
++.section sx,"a"
++x:
++nop
++
++.data
++.8byte x-.
++.8byte y-.
++.4byte x-.
++.4byte y-.
++
++.section sy,"a"
++y:
++nop
+diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
+index c26fae7e8323..e32dc893fa79 100644
+--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
+@@ -93,6 +93,15 @@
+ # CHECK: Type: R_LARCH_TLS_GD_HI20 (98)
+ # CHECK: Type: R_LARCH_32_PCREL (99)
+ # CHECK: Type: R_LARCH_RELAX (100)
++# CHECK: Type: R_LARCH_DELETE (101)
++# CHECK: Type: R_LARCH_ALIGN (102)
++# CHECK: Type: R_LARCH_PCREL20_S2 (103)
++# CHECK: Type: R_LARCH_CFA (104)
++# CHECK: Type: R_LARCH_ADD6 (105)
++# CHECK: Type: R_LARCH_SUB6 (106)
++# CHECK: Type: R_LARCH_ADD_ULEB128 (107)
++# CHECK: Type: R_LARCH_SUB_ULEB128 (108)
++# CHECK: Type: R_LARCH_64_PCREL (109)
+ 
+ --- !ELF
+ FileHeader:
+@@ -193,3 +202,12 @@ Sections:
+       - Type: R_LARCH_TLS_GD_HI20
+       - Type: R_LARCH_32_PCREL
+       - Type: R_LARCH_RELAX
++      - Type: R_LARCH_DELETE
++      - Type: R_LARCH_ALIGN
++      - Type: R_LARCH_PCREL20_S2
++      - Type: R_LARCH_CFA
++      - Type: R_LARCH_ADD6
++      - Type: R_LARCH_SUB6
++      - Type: R_LARCH_ADD_ULEB128
++      - Type: R_LARCH_SUB_ULEB128
++      - Type: R_LARCH_64_PCREL
+diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp
+index 9cf8feb0e2c5..35fc2ec698fb 100644
+--- a/llvm/unittests/Object/ELFTest.cpp
++++ b/llvm/unittests/Object/ELFTest.cpp
+@@ -233,6 +233,24 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) {
+             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL));
+   EXPECT_EQ("R_LARCH_RELAX",
+             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX));
++  EXPECT_EQ("R_LARCH_DELETE",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE));
++  EXPECT_EQ("R_LARCH_ALIGN",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN));
++  EXPECT_EQ("R_LARCH_PCREL20_S2",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2));
++  EXPECT_EQ("R_LARCH_CFA",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA));
++  EXPECT_EQ("R_LARCH_ADD6",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6));
++  EXPECT_EQ("R_LARCH_SUB6",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6));
++  EXPECT_EQ("R_LARCH_ADD_ULEB128",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128));
++  EXPECT_EQ("R_LARCH_SUB_ULEB128",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128));
++  EXPECT_EQ("R_LARCH_64_PCREL",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL));
+ }
+ 
+ TEST(ELFTest, getELFRelativeRelocationType) {
diff --git a/llvm14/PKGBUILD b/llvm14/PKGBUILD
index 4c0eb411e6..9bc641c9e5 100644
--- a/llvm14/PKGBUILD
+++ b/llvm14/PKGBUILD
@@ -8,15 +8,17 @@ arch=('loong64' 'x86_64')
 url="https://llvm.org/"
 license=('custom:Apache 2.0 with LLVM Exception')
 makedepends=('cmake' 'ninja' 'libffi' 'libedit' 'ncurses' 'libxml2'
-             'python')
+             'python' 'python-psutil')
 checkdepends=('python-psutil')
 options=('staticlibs' '!lto') # Getting thousands of test failures with LTO
 _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver
 source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig}
-        llvm-coroutines-ubsan.patch)
+        llvm-coroutines-ubsan.patch
+        llvm-loong64.patch)
 sha256sums=('050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a'
             'SKIP'
-            'ee9baf6df05474083857044d92f26f59d3ee709cdf82ba3bdb2792e6645f71d9')
+            'ee9baf6df05474083857044d92f26f59d3ee709cdf82ba3bdb2792e6645f71d9'
+            '43c66f16ac510842ca7e6ae12869d671838799925c8009ddacae8a1af8f0d7e5')
 validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar@redhat.com>
 
 # Utilizing LLVM_DISTRIBUTION_COMPONENTS to avoid
@@ -49,6 +51,7 @@ _get_distribution_components() {
 
 prepare() {
   cd llvm-$pkgver.src
+  patch -p1 -i $srcdir/llvm-loong64.patch
   mkdir build
 
   # https://github.com/llvm/llvm-project/issues/49689
diff --git a/llvm14/llvm-loong64.patch b/llvm14/llvm-loong64.patch
new file mode 100644
index 0000000000..433a8f4062
--- /dev/null
+++ b/llvm14/llvm-loong64.patch
@@ -0,0 +1,47164 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 2da05ef8..04292d5b 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -342,6 +342,7 @@ set(LLVM_ALL_TARGETS
+   BPF
+   Hexagon
+   Lanai
++  LoongArch
+   Mips
+   MSP430
+   NVPTX
+diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
+index 18d78879..e16e1c73 100644
+--- a/cmake/config-ix.cmake
++++ b/cmake/config-ix.cmake
+@@ -464,6 +464,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv64")
+   set(LLVM_NATIVE_ARCH RISCV)
+ elseif (LLVM_NATIVE_ARCH STREQUAL "m68k")
+   set(LLVM_NATIVE_ARCH M68k)
++elseif (LLVM_NATIVE_ARCH MATCHES "loongarch")
++  set(LLVM_NATIVE_ARCH LoongArch)
+ else ()
+   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
+ endif ()
+diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
+index 42277c01..0482bf9c 100644
+--- a/include/llvm/ADT/Triple.h
++++ b/include/llvm/ADT/Triple.h
+@@ -57,6 +57,8 @@ public:
+     bpfeb,          // eBPF or extended BPF or 64-bit BPF (big endian)
+     csky,           // CSKY: csky
+     hexagon,        // Hexagon: hexagon
++    loongarch32,    // LoongArch (32-bit): loongarch32
++    loongarch64,    // LoongArch (64-bit): loongarch64
+     m68k,           // M68k: Motorola 680x0 family
+     mips,           // MIPS: mips, mipsallegrex, mipsr6
+     mipsel,         // MIPSEL: mipsel, mipsallegrexe, mipsr6el
+@@ -218,6 +220,7 @@ public:
+     GNUX32,
+     GNUILP32,
+     CODE16,
++    GNUABILPX32,
+     EABI,
+     EABIHF,
+     Android,
+@@ -789,6 +792,21 @@ public:
+     return isMIPS32() || isMIPS64();
+   }
+ 
++	/// Tests whether the target is LoongArch 32-bit
++	bool isLoongArch32() const {
++		return getArch() == Triple::loongarch32;
++	}
++
++	/// Tests whether the target is LoongArch 64-bit.
++	bool isLoongArch64() const {
++		return getArch() == Triple::loongarch64;
++	}
++
++	/// Tests whether the target is LoongArch (32- or 64-bit).
++	bool isLoongArch() const {
++		return isLoongArch32() || isLoongArch64();
++	}
++
+   /// Tests whether the target is PowerPC (32- or 64-bit LE or BE).
+   bool isPPC() const {
+     return getArch() == Triple::ppc || getArch() == Triple::ppc64 ||
+diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h
+index 5d3b1270..af00ca0b 100644
+--- a/include/llvm/BinaryFormat/ELF.h
++++ b/include/llvm/BinaryFormat/ELF.h
+@@ -319,6 +319,7 @@ enum {
+   EM_BPF = 247,           // Linux kernel bpf virtual machine
+   EM_VE = 251,            // NEC SX-Aurora VE
+   EM_CSKY = 252,          // C-SKY 32-bit processor
++  EM_LOONGARCH = 258,     // LoongArch processor
+ };
+ 
+ // Object file classes.
+@@ -671,6 +672,25 @@ enum {
+   STO_RISCV_VARIANT_CC = 0x80
+ };
+ 
++// LoongArch Specific e_flags
++enum : unsigned {
++  // FIXME: Change these when all ABIs definition were finalized.
++  // See current definitions:
++  // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version
++  EF_LARCH_BASE_ABI = 0x3,
++  EF_LARCH_BASE_ABI_ILP32S = 0x5,
++  EF_LARCH_BASE_ABI_ILP32F = 0x6,
++  EF_LARCH_BASE_ABI_ILP32D = 0x7,
++  EF_LARCH_BASE_ABI_LP64S = 0x1,
++  EF_LARCH_BASE_ABI_LP64F = 0x2,
++  EF_LARCH_BASE_ABI_LP64D = 0x3
++};
++
++// ELF Relocation types for LoongArch
++enum {
++#include "ELFRelocs/LoongArch.def"
++};
++
+ // ELF Relocation types for S390/zSeries
+ enum {
+ #include "ELFRelocs/SystemZ.def"
+diff --git a/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+new file mode 100644
+index 00000000..6699e732
+--- /dev/null
++++ b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+@@ -0,0 +1,102 @@
++
++#ifndef ELF_RELOC
++#error "ELF_RELOC must be defined"
++#endif
++
++ELF_RELOC(R_LARCH_NONE, 0)
++ELF_RELOC(R_LARCH_32, 1)
++ELF_RELOC(R_LARCH_64, 2)
++ELF_RELOC(R_LARCH_RELATIVE, 3)
++ELF_RELOC(R_LARCH_COPY, 4)
++ELF_RELOC(R_LARCH_JUMP_SLOT, 5)
++ELF_RELOC(R_LARCH_TLS_DTPMOD32, 6)
++ELF_RELOC(R_LARCH_TLS_DTPMOD64, 7)
++ELF_RELOC(R_LARCH_TLS_DTPREL32, 8)
++ELF_RELOC(R_LARCH_TLS_DTPREL64, 9)
++ELF_RELOC(R_LARCH_TLS_TPREL32, 10)
++ELF_RELOC(R_LARCH_TLS_TPREL64, 11)
++ELF_RELOC(R_LARCH_IRELATIVE, 12)
++
++ELF_RELOC(R_LARCH_MARK_LA, 20)
++ELF_RELOC(R_LARCH_MARK_PCREL, 21)
++
++ELF_RELOC(R_LARCH_SOP_PUSH_PCREL, 22)
++
++ELF_RELOC(R_LARCH_SOP_PUSH_ABSOLUTE, 23)
++
++ELF_RELOC(R_LARCH_SOP_PUSH_DUP, 24)
++ELF_RELOC(R_LARCH_SOP_PUSH_GPREL, 25)
++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_TPREL, 26)
++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GOT, 27)
++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GD, 28)
++ELF_RELOC(R_LARCH_SOP_PUSH_PLT_PCREL, 29)
++
++ELF_RELOC(R_LARCH_SOP_ASSERT, 30)
++ELF_RELOC(R_LARCH_SOP_NOT, 31)
++ELF_RELOC(R_LARCH_SOP_SUB, 32)
++ELF_RELOC(R_LARCH_SOP_SL, 33)
++ELF_RELOC(R_LARCH_SOP_SR, 34)
++ELF_RELOC(R_LARCH_SOP_ADD, 35)
++ELF_RELOC(R_LARCH_SOP_AND, 36)
++ELF_RELOC(R_LARCH_SOP_IF_ELSE, 37)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_5, 38)
++ELF_RELOC(R_LARCH_SOP_POP_32_U_10_12, 39)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_12, 40)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16, 41)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16_S2, 42)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_5_20, 43)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44)
++ELF_RELOC(R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45)
++ELF_RELOC(R_LARCH_SOP_POP_32_U, 46)
++
++ELF_RELOC(R_LARCH_ADD8, 47)
++ELF_RELOC(R_LARCH_ADD16, 48)
++ELF_RELOC(R_LARCH_ADD24, 49)
++ELF_RELOC(R_LARCH_ADD32, 50)
++ELF_RELOC(R_LARCH_ADD64, 51)
++ELF_RELOC(R_LARCH_SUB8, 52)
++ELF_RELOC(R_LARCH_SUB16, 53)
++ELF_RELOC(R_LARCH_SUB24, 54)
++ELF_RELOC(R_LARCH_SUB32, 55)
++ELF_RELOC(R_LARCH_SUB64, 56)
++
++ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57)
++ELF_RELOC(R_LARCH_GNU_VTENTRY, 58)
++
++ELF_RELOC(R_LARCH_B16,              64)
++ELF_RELOC(R_LARCH_B21,              65)
++ELF_RELOC(R_LARCH_B26,              66)
++ELF_RELOC(R_LARCH_ABS_HI20,         67)
++ELF_RELOC(R_LARCH_ABS_LO12,         68)
++ELF_RELOC(R_LARCH_ABS64_LO20,       69)
++ELF_RELOC(R_LARCH_ABS64_HI12,       70)
++ELF_RELOC(R_LARCH_PCALA_HI20,       71)
++ELF_RELOC(R_LARCH_PCALA_LO12,       72)
++ELF_RELOC(R_LARCH_PCALA64_LO20,     73)
++ELF_RELOC(R_LARCH_PCALA64_HI12,     74)
++ELF_RELOC(R_LARCH_GOT_PC_HI20,      75)
++ELF_RELOC(R_LARCH_GOT_PC_LO12,      76)
++ELF_RELOC(R_LARCH_GOT64_PC_LO20,    77)
++ELF_RELOC(R_LARCH_GOT64_PC_HI12,    78)
++ELF_RELOC(R_LARCH_GOT_HI20,         79)
++ELF_RELOC(R_LARCH_GOT_LO12,         80)
++ELF_RELOC(R_LARCH_GOT64_LO20,       81)
++ELF_RELOC(R_LARCH_GOT64_HI12,       82)
++ELF_RELOC(R_LARCH_TLS_LE_HI20,      83)
++ELF_RELOC(R_LARCH_TLS_LE_LO12,      84)
++ELF_RELOC(R_LARCH_TLS_LE64_LO20,    85)
++ELF_RELOC(R_LARCH_TLS_LE64_HI12,    86)
++ELF_RELOC(R_LARCH_TLS_IE_PC_HI20,   87)
++ELF_RELOC(R_LARCH_TLS_IE_PC_LO12,   88)
++ELF_RELOC(R_LARCH_TLS_IE64_PC_LO20, 89)
++ELF_RELOC(R_LARCH_TLS_IE64_PC_HI12, 90)
++ELF_RELOC(R_LARCH_TLS_IE_HI20,      91)
++ELF_RELOC(R_LARCH_TLS_IE_LO12,      92)
++ELF_RELOC(R_LARCH_TLS_IE64_LO20,    93)
++ELF_RELOC(R_LARCH_TLS_IE64_HI12,    94)
++ELF_RELOC(R_LARCH_TLS_LD_PC_HI20,   95)
++ELF_RELOC(R_LARCH_TLS_LD_HI20,      96)
++ELF_RELOC(R_LARCH_TLS_GD_PC_HI20,   97)
++ELF_RELOC(R_LARCH_TLS_GD_HI20,      98)
++ELF_RELOC(R_LARCH_32_PCREL,         99)
++ELF_RELOC(R_LARCH_RELAX,            100)
+diff --git a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+index 82dfdc27..4646ffdd 100644
+--- a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
++++ b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+@@ -330,6 +330,43 @@ public:
+       JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ };
+ 
++// @brief LoongArch64 support.
++class OrcLoongArch64 {
++public:
++  static constexpr unsigned PointerSize = 8;
++  static constexpr unsigned TrampolineSize = 40;
++  static constexpr unsigned StubSize = 32;
++  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
++  static constexpr unsigned ResolverCodeSize = 0x120;
++
++  /// Write the resolver code into the given memory. The user is
++  /// responsible for allocating the memory and setting permissions.
++  ///
++  /// ReentryFnAddr should be the address of a function whose signature matches
++  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
++  /// argument of writeResolverCode will be passed as the second argument to
++  /// the function at ReentryFnAddr.
++  static void writeResolverCode(char *ResolverWorkingMem,
++                                JITTargetAddress ResolverTargetAddress,
++                                JITTargetAddress ReentryFnAddr,
++                                JITTargetAddress ReentryCtxAddr);
++
++  /// Write the requested number of trampolines into the given memory,
++  /// which must be big enough to hold 1 pointer, plus NumTrampolines
++  /// trampolines.
++  static void writeTrampolines(char *TrampolineBlockWorkingMem,
++                               JITTargetAddress TrampolineBlockTargetAddress,
++                               JITTargetAddress ResolverFnAddr,
++                               unsigned NumTrampolines);
++  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
++  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
++  /// Nth stub using the Nth pointer in memory starting at
++  /// PointersBlockTargetAddress.
++  static void writeIndirectStubsBlock(
++      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
++      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
++};
++
+ } // end namespace orc
+ } // end namespace llvm
+ 
+diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
+index 0498fc26..b675a45d 100644
+--- a/include/llvm/IR/CMakeLists.txt
++++ b/include/llvm/IR/CMakeLists.txt
+@@ -9,6 +9,7 @@ tablegen(LLVM IntrinsicsAMDGPU.h -gen-intrinsic-enums -intrinsic-prefix=amdgcn)
+ tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm)
+ tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf)
+ tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon)
++tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch)
+ tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips)
+ tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm)
+ tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc)
+diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
+index cf6b7af9..983fe97e 100644
+--- a/include/llvm/IR/InlineAsm.h
++++ b/include/llvm/IR/InlineAsm.h
+@@ -266,6 +266,7 @@ public:
+     Constraint_Uy,
+     Constraint_X,
+     Constraint_Z,
++    Constraint_ZB,
+     Constraint_ZC,
+     Constraint_Zy,
+     Constraints_Max = Constraint_Zy,
+diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
+index f5248e82..d4a8df4a 100644
+--- a/include/llvm/IR/Intrinsics.td
++++ b/include/llvm/IR/Intrinsics.td
+@@ -1937,3 +1937,4 @@ include "llvm/IR/IntrinsicsSystemZ.td"
+ include "llvm/IR/IntrinsicsWebAssembly.td"
+ include "llvm/IR/IntrinsicsRISCV.td"
+ include "llvm/IR/IntrinsicsVE.td"
++include "llvm/IR/IntrinsicsLoongArch.td"
+diff --git a/include/llvm/IR/IntrinsicsLoongArch.td b/include/llvm/IR/IntrinsicsLoongArch.td
+new file mode 100644
+index 00000000..6e70173f
+--- /dev/null
++++ b/include/llvm/IR/IntrinsicsLoongArch.td
+@@ -0,0 +1,3619 @@
++//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics ---------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines all of the LoongArch-specific intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "loongarch" in {  // All intrinsics start with "llvm.loongarch.".
++
++//===----------------------------------------------------------------------===//
++// LoongArch LSX
++
++def int_loongarch_lsx_vclo_b : GCCBuiltin<"__builtin_lsx_vclo_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclo_h : GCCBuiltin<"__builtin_lsx_vclo_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclo_w : GCCBuiltin<"__builtin_lsx_vclo_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclo_d : GCCBuiltin<"__builtin_lsx_vclo_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vflogb_s : GCCBuiltin<"__builtin_lsx_vflogb_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vflogb_d : GCCBuiltin<"__builtin_lsx_vflogb_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpickve2gr_b : GCCBuiltin<"__builtin_lsx_vpickve2gr_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_h : GCCBuiltin<"__builtin_lsx_vpickve2gr_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_w : GCCBuiltin<"__builtin_lsx_vpickve2gr_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_d : GCCBuiltin<"__builtin_lsx_vpickve2gr_d">,
++  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpickve2gr_bu : GCCBuiltin<"__builtin_lsx_vpickve2gr_bu">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_hu : GCCBuiltin<"__builtin_lsx_vpickve2gr_hu">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_wu : GCCBuiltin<"__builtin_lsx_vpickve2gr_wu">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickve2gr_du : GCCBuiltin<"__builtin_lsx_vpickve2gr_du">,
++  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vreplvei_b : GCCBuiltin<"__builtin_lsx_vreplvei_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplvei_h : GCCBuiltin<"__builtin_lsx_vreplvei_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplvei_w : GCCBuiltin<"__builtin_lsx_vreplvei_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplvei_d : GCCBuiltin<"__builtin_lsx_vreplvei_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmskltz_b : GCCBuiltin<"__builtin_lsx_vmskltz_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmskltz_h : GCCBuiltin<"__builtin_lsx_vmskltz_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmskltz_w : GCCBuiltin<"__builtin_lsx_vmskltz_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmskltz_d : GCCBuiltin<"__builtin_lsx_vmskltz_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmadd_s : GCCBuiltin<"__builtin_lsx_vfmadd_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmadd_d : GCCBuiltin<"__builtin_lsx_vfmadd_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmsub_s : GCCBuiltin<"__builtin_lsx_vfmsub_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmsub_d : GCCBuiltin<"__builtin_lsx_vfmsub_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfnmadd_s : GCCBuiltin<"__builtin_lsx_vfnmadd_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfnmadd_d : GCCBuiltin<"__builtin_lsx_vfnmadd_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfnmsub_s : GCCBuiltin<"__builtin_lsx_vfnmsub_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfnmsub_d : GCCBuiltin<"__builtin_lsx_vfnmsub_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_caf_s : GCCBuiltin<"__builtin_lsx_vfcmp_caf_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_caf_d : GCCBuiltin<"__builtin_lsx_vfcmp_caf_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cor_s : GCCBuiltin<"__builtin_lsx_vfcmp_cor_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cor_d : GCCBuiltin<"__builtin_lsx_vfcmp_cor_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cun_s : GCCBuiltin<"__builtin_lsx_vfcmp_cun_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cun_d : GCCBuiltin<"__builtin_lsx_vfcmp_cun_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cune_s : GCCBuiltin<"__builtin_lsx_vfcmp_cune_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cune_d : GCCBuiltin<"__builtin_lsx_vfcmp_cune_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_ceq_s : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_ceq_d : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cne_s : GCCBuiltin<"__builtin_lsx_vfcmp_cne_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cne_d : GCCBuiltin<"__builtin_lsx_vfcmp_cne_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_clt_s : GCCBuiltin<"__builtin_lsx_vfcmp_clt_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_clt_d : GCCBuiltin<"__builtin_lsx_vfcmp_clt_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cult_s : GCCBuiltin<"__builtin_lsx_vfcmp_cult_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cult_d : GCCBuiltin<"__builtin_lsx_vfcmp_cult_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cle_s : GCCBuiltin<"__builtin_lsx_vfcmp_cle_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cle_d : GCCBuiltin<"__builtin_lsx_vfcmp_cle_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_cule_s : GCCBuiltin<"__builtin_lsx_vfcmp_cule_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_cule_d : GCCBuiltin<"__builtin_lsx_vfcmp_cule_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_saf_s : GCCBuiltin<"__builtin_lsx_vfcmp_saf_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_saf_d : GCCBuiltin<"__builtin_lsx_vfcmp_saf_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sor_s : GCCBuiltin<"__builtin_lsx_vfcmp_sor_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sor_d : GCCBuiltin<"__builtin_lsx_vfcmp_sor_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sun_s : GCCBuiltin<"__builtin_lsx_vfcmp_sun_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sun_d : GCCBuiltin<"__builtin_lsx_vfcmp_sun_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sune_s : GCCBuiltin<"__builtin_lsx_vfcmp_sune_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sune_d : GCCBuiltin<"__builtin_lsx_vfcmp_sune_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_seq_s : GCCBuiltin<"__builtin_lsx_vfcmp_seq_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_seq_d : GCCBuiltin<"__builtin_lsx_vfcmp_seq_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sne_s : GCCBuiltin<"__builtin_lsx_vfcmp_sne_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sne_d : GCCBuiltin<"__builtin_lsx_vfcmp_sne_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_slt_s : GCCBuiltin<"__builtin_lsx_vfcmp_slt_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_slt_d : GCCBuiltin<"__builtin_lsx_vfcmp_slt_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sult_s : GCCBuiltin<"__builtin_lsx_vfcmp_sult_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sult_d : GCCBuiltin<"__builtin_lsx_vfcmp_sult_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sle_s : GCCBuiltin<"__builtin_lsx_vfcmp_sle_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sle_d : GCCBuiltin<"__builtin_lsx_vfcmp_sle_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcmp_sule_s : GCCBuiltin<"__builtin_lsx_vfcmp_sule_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcmp_sule_d : GCCBuiltin<"__builtin_lsx_vfcmp_sule_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitsel_v : GCCBuiltin<"__builtin_lsx_vbitsel_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vshuf_b : GCCBuiltin<"__builtin_lsx_vshuf_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vldrepl_b : GCCBuiltin<"__builtin_lsx_vldrepl_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lsx_vldrepl_h : GCCBuiltin<"__builtin_lsx_vldrepl_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lsx_vldrepl_w : GCCBuiltin<"__builtin_lsx_vldrepl_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lsx_vldrepl_d : GCCBuiltin<"__builtin_lsx_vldrepl_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lsx_vstelm_b : GCCBuiltin<"__builtin_lsx_vstelm_b">,
++  Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lsx_vstelm_h : GCCBuiltin<"__builtin_lsx_vstelm_h">,
++  Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lsx_vstelm_w : GCCBuiltin<"__builtin_lsx_vstelm_w">,
++  Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lsx_vstelm_d : GCCBuiltin<"__builtin_lsx_vstelm_d">,
++  Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++
++def int_loongarch_lsx_vldx : GCCBuiltin<"__builtin_lsx_vldx">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lsx_vstx : GCCBuiltin<"__builtin_lsx_vstx">,
++  Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty],
++  [IntrArgMemOnly]>;
++
++def int_loongarch_lsx_vaddwev_d_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_w_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_h_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_q_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsubwev_d_w : GCCBuiltin<"__builtin_lsx_vsubwev_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_w_h : GCCBuiltin<"__builtin_lsx_vsubwev_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_h_b : GCCBuiltin<"__builtin_lsx_vsubwev_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_q_d : GCCBuiltin<"__builtin_lsx_vsubwev_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++
++def int_loongarch_lsx_vaddwod_d_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_w_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_h_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_q_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsubwod_d_w : GCCBuiltin<"__builtin_lsx_vsubwod_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_w_h : GCCBuiltin<"__builtin_lsx_vsubwod_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_h_b : GCCBuiltin<"__builtin_lsx_vsubwod_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_q_d : GCCBuiltin<"__builtin_lsx_vsubwod_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_q_du : GCCBuiltin<"__builtin_lsx_vaddwev_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsubwev_d_wu : GCCBuiltin<"__builtin_lsx_vsubwev_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_w_hu : GCCBuiltin<"__builtin_lsx_vsubwev_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_h_bu : GCCBuiltin<"__builtin_lsx_vsubwev_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwev_q_du : GCCBuiltin<"__builtin_lsx_vsubwev_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_q_du : GCCBuiltin<"__builtin_lsx_vaddwod_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsubwod_d_wu : GCCBuiltin<"__builtin_lsx_vsubwod_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_w_hu : GCCBuiltin<"__builtin_lsx_vsubwod_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_h_bu : GCCBuiltin<"__builtin_lsx_vsubwod_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubwod_q_du : GCCBuiltin<"__builtin_lsx_vsubwod_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhaddw_qu_du : GCCBuiltin<"__builtin_lsx_vhaddw_qu_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_qu_du : GCCBuiltin<"__builtin_lsx_vhsubw_qu_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhaddw_q_d : GCCBuiltin<"__builtin_lsx_vhaddw_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_q_d : GCCBuiltin<"__builtin_lsx_vhsubw_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmuh_b : GCCBuiltin<"__builtin_lsx_vmuh_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_h : GCCBuiltin<"__builtin_lsx_vmuh_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_w : GCCBuiltin<"__builtin_lsx_vmuh_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_d : GCCBuiltin<"__builtin_lsx_vmuh_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmuh_bu : GCCBuiltin<"__builtin_lsx_vmuh_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_hu : GCCBuiltin<"__builtin_lsx_vmuh_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_wu : GCCBuiltin<"__builtin_lsx_vmuh_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmuh_du : GCCBuiltin<"__builtin_lsx_vmuh_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwev_d_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_w_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_h_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_q_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwod_d_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_w_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_h_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_q_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwev_d_wu : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_w_hu : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_h_bu : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_q_du : GCCBuiltin<"__builtin_lsx_vmulwev_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwod_d_wu : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_w_hu : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_h_bu : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_q_du : GCCBuiltin<"__builtin_lsx_vmulwod_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmulwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmulwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwev_d_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_w_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_h_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_q_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwod_d_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_w_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_h_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_q_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_q_du : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_q_du : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrln_b_h : GCCBuiltin<"__builtin_lsx_vsrln_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrln_h_w : GCCBuiltin<"__builtin_lsx_vsrln_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrln_w_d : GCCBuiltin<"__builtin_lsx_vsrln_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsran_b_h : GCCBuiltin<"__builtin_lsx_vsran_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsran_h_w : GCCBuiltin<"__builtin_lsx_vsran_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsran_w_d : GCCBuiltin<"__builtin_lsx_vsran_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrlrn_b_h : GCCBuiltin<"__builtin_lsx_vsrlrn_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlrn_h_w : GCCBuiltin<"__builtin_lsx_vsrlrn_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlrn_w_d : GCCBuiltin<"__builtin_lsx_vsrlrn_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrarn_b_h : GCCBuiltin<"__builtin_lsx_vsrarn_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrarn_h_w : GCCBuiltin<"__builtin_lsx_vsrarn_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrarn_w_d : GCCBuiltin<"__builtin_lsx_vsrarn_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrln_b_h : GCCBuiltin<"__builtin_lsx_vssrln_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrln_h_w : GCCBuiltin<"__builtin_lsx_vssrln_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrln_w_d : GCCBuiltin<"__builtin_lsx_vssrln_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssran_b_h : GCCBuiltin<"__builtin_lsx_vssran_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssran_h_w : GCCBuiltin<"__builtin_lsx_vssran_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssran_w_d : GCCBuiltin<"__builtin_lsx_vssran_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlrn_b_h : GCCBuiltin<"__builtin_lsx_vssrlrn_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrn_h_w : GCCBuiltin<"__builtin_lsx_vssrlrn_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrn_w_d : GCCBuiltin<"__builtin_lsx_vssrlrn_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrarn_b_h : GCCBuiltin<"__builtin_lsx_vssrarn_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarn_h_w : GCCBuiltin<"__builtin_lsx_vssrarn_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarn_w_d : GCCBuiltin<"__builtin_lsx_vssrarn_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrln_bu_h : GCCBuiltin<"__builtin_lsx_vssrln_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrln_hu_w : GCCBuiltin<"__builtin_lsx_vssrln_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrln_wu_d : GCCBuiltin<"__builtin_lsx_vssrln_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssran_bu_h : GCCBuiltin<"__builtin_lsx_vssran_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssran_hu_w : GCCBuiltin<"__builtin_lsx_vssran_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssran_wu_d : GCCBuiltin<"__builtin_lsx_vssran_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlrn_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrn_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrn_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrn_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrn_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrn_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrarn_bu_h : GCCBuiltin<"__builtin_lsx_vssrarn_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarn_hu_w : GCCBuiltin<"__builtin_lsx_vssrarn_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarn_wu_d : GCCBuiltin<"__builtin_lsx_vssrarn_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vandn_v : GCCBuiltin<"__builtin_lsx_vandn_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vorn_v : GCCBuiltin<"__builtin_lsx_vorn_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrstp_b : GCCBuiltin<"__builtin_lsx_vfrstp_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vfrstp_h : GCCBuiltin<"__builtin_lsx_vfrstp_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lsx_vadd_q : GCCBuiltin<"__builtin_lsx_vadd_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsub_q : GCCBuiltin<"__builtin_lsx_vsub_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsigncov_b : GCCBuiltin<"__builtin_lsx_vsigncov_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vsigncov_h : GCCBuiltin<"__builtin_lsx_vsigncov_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vsigncov_w : GCCBuiltin<"__builtin_lsx_vsigncov_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vsigncov_d : GCCBuiltin<"__builtin_lsx_vsigncov_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcvt_h_s : GCCBuiltin<"__builtin_lsx_vfcvt_h_s">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcvt_s_d : GCCBuiltin<"__builtin_lsx_vfcvt_s_d">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vffint_s_l : GCCBuiltin<"__builtin_lsx_vffint_s_l">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftint_w_d : GCCBuiltin<"__builtin_lsx_vftint_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrz_w_d : GCCBuiltin<"__builtin_lsx_vftintrz_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrp_w_d : GCCBuiltin<"__builtin_lsx_vftintrp_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrm_w_d : GCCBuiltin<"__builtin_lsx_vftintrm_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrne_w_d : GCCBuiltin<"__builtin_lsx_vftintrne_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbsrl_v : GCCBuiltin<"__builtin_lsx_vbsrl_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbsll_v : GCCBuiltin<"__builtin_lsx_vbsll_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrstpi_b : GCCBuiltin<"__builtin_lsx_vfrstpi_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrstpi_h : GCCBuiltin<"__builtin_lsx_vfrstpi_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vneg_b : GCCBuiltin<"__builtin_lsx_vneg_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vneg_h : GCCBuiltin<"__builtin_lsx_vneg_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vneg_w : GCCBuiltin<"__builtin_lsx_vneg_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vneg_d : GCCBuiltin<"__builtin_lsx_vneg_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmskgez_b : GCCBuiltin<"__builtin_lsx_vmskgez_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmsknz_b : GCCBuiltin<"__builtin_lsx_vmsknz_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrintrm_s : GCCBuiltin<"__builtin_lsx_vfrintrm_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrintrm_d : GCCBuiltin<"__builtin_lsx_vfrintrm_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrintrp_s : GCCBuiltin<"__builtin_lsx_vfrintrp_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrintrp_d : GCCBuiltin<"__builtin_lsx_vfrintrp_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrintrz_s : GCCBuiltin<"__builtin_lsx_vfrintrz_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrintrz_d : GCCBuiltin<"__builtin_lsx_vfrintrz_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrintrne_s : GCCBuiltin<"__builtin_lsx_vfrintrne_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrintrne_d : GCCBuiltin<"__builtin_lsx_vfrintrne_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vffinth_d_w : GCCBuiltin<"__builtin_lsx_vffinth_d_w">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vffintl_d_w : GCCBuiltin<"__builtin_lsx_vffintl_d_w">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrm_w_s : GCCBuiltin<"__builtin_lsx_vftintrm_w_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrm_l_d : GCCBuiltin<"__builtin_lsx_vftintrm_l_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrp_w_s : GCCBuiltin<"__builtin_lsx_vftintrp_w_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrp_l_d : GCCBuiltin<"__builtin_lsx_vftintrp_l_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrz_w_s : GCCBuiltin<"__builtin_lsx_vftintrz_w_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrz_l_d : GCCBuiltin<"__builtin_lsx_vftintrz_l_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrne_w_s : GCCBuiltin<"__builtin_lsx_vftintrne_w_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrne_l_d : GCCBuiltin<"__builtin_lsx_vftintrne_l_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftinth_l_s : GCCBuiltin<"__builtin_lsx_vftinth_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintl_l_s : GCCBuiltin<"__builtin_lsx_vftintl_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrmh_l_s : GCCBuiltin<"__builtin_lsx_vftintrmh_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrml_l_s : GCCBuiltin<"__builtin_lsx_vftintrml_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrph_l_s : GCCBuiltin<"__builtin_lsx_vftintrph_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrpl_l_s : GCCBuiltin<"__builtin_lsx_vftintrpl_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrzh_l_s : GCCBuiltin<"__builtin_lsx_vftintrzh_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrzl_l_s : GCCBuiltin<"__builtin_lsx_vftintrzl_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrneh_l_s : GCCBuiltin<"__builtin_lsx_vftintrneh_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrnel_l_s : GCCBuiltin<"__builtin_lsx_vftintrnel_l_s">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vexth_d_w : GCCBuiltin<"__builtin_lsx_vexth_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_w_h : GCCBuiltin<"__builtin_lsx_vexth_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_h_b : GCCBuiltin<"__builtin_lsx_vexth_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_q_d : GCCBuiltin<"__builtin_lsx_vexth_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vexth_du_wu : GCCBuiltin<"__builtin_lsx_vexth_du_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_wu_hu : GCCBuiltin<"__builtin_lsx_vexth_wu_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_hu_bu : GCCBuiltin<"__builtin_lsx_vexth_hu_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vexth_qu_du : GCCBuiltin<"__builtin_lsx_vexth_qu_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvexth_du_wu : GCCBuiltin<"__builtin_lasx_xvexth_du_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_wu_hu : GCCBuiltin<"__builtin_lasx_xvexth_wu_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_hu_bu : GCCBuiltin<"__builtin_lasx_xvexth_hu_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_qu_du : GCCBuiltin<"__builtin_lasx_xvexth_qu_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsllwil_d_w : GCCBuiltin<"__builtin_lsx_vsllwil_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsllwil_w_h : GCCBuiltin<"__builtin_lsx_vsllwil_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsllwil_h_b : GCCBuiltin<"__builtin_lsx_vsllwil_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vextl_q_d : GCCBuiltin<"__builtin_lsx_vextl_q_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsllwil_du_wu : GCCBuiltin<"__builtin_lsx_vsllwil_du_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsllwil_wu_hu : GCCBuiltin<"__builtin_lsx_vsllwil_wu_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsllwil_hu_bu : GCCBuiltin<"__builtin_lsx_vsllwil_hu_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vextl_qu_du : GCCBuiltin<"__builtin_lsx_vextl_qu_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitclri_b : GCCBuiltin<"__builtin_lsx_vbitclri_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclri_h : GCCBuiltin<"__builtin_lsx_vbitclri_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclri_w : GCCBuiltin<"__builtin_lsx_vbitclri_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclri_d : GCCBuiltin<"__builtin_lsx_vbitclri_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitseti_b : GCCBuiltin<"__builtin_lsx_vbitseti_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitseti_h : GCCBuiltin<"__builtin_lsx_vbitseti_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitseti_w : GCCBuiltin<"__builtin_lsx_vbitseti_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitseti_d : GCCBuiltin<"__builtin_lsx_vbitseti_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitrevi_b : GCCBuiltin<"__builtin_lsx_vbitrevi_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrevi_h : GCCBuiltin<"__builtin_lsx_vbitrevi_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrevi_w : GCCBuiltin<"__builtin_lsx_vbitrevi_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrevi_d : GCCBuiltin<"__builtin_lsx_vbitrevi_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlrni_b_h : GCCBuiltin<"__builtin_lsx_vssrlrni_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_h_w : GCCBuiltin<"__builtin_lsx_vssrlrni_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_w_d : GCCBuiltin<"__builtin_lsx_vssrlrni_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_d_q : GCCBuiltin<"__builtin_lsx_vssrlrni_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrani_b_h : GCCBuiltin<"__builtin_lsx_vsrani_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrani_h_w : GCCBuiltin<"__builtin_lsx_vsrani_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrani_w_d : GCCBuiltin<"__builtin_lsx_vsrani_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrani_d_q : GCCBuiltin<"__builtin_lsx_vsrani_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vextrins_b : GCCBuiltin<"__builtin_lsx_vextrins_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vextrins_h : GCCBuiltin<"__builtin_lsx_vextrins_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vextrins_w : GCCBuiltin<"__builtin_lsx_vextrins_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vextrins_d : GCCBuiltin<"__builtin_lsx_vextrins_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitseli_b : GCCBuiltin<"__builtin_lsx_vbitseli_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vandi_b : GCCBuiltin<"__builtin_lsx_vandi_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vori_b : GCCBuiltin<"__builtin_lsx_vori_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vxori_b : GCCBuiltin<"__builtin_lsx_vxori_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vnori_b : GCCBuiltin<"__builtin_lsx_vnori_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vldi : GCCBuiltin<"__builtin_lsx_vldi">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpermi_w : GCCBuiltin<"__builtin_lsx_vpermi_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsadd_b : GCCBuiltin<"__builtin_lsx_vsadd_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_h : GCCBuiltin<"__builtin_lsx_vsadd_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_w : GCCBuiltin<"__builtin_lsx_vsadd_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_d : GCCBuiltin<"__builtin_lsx_vsadd_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vssub_b : GCCBuiltin<"__builtin_lsx_vssub_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_h : GCCBuiltin<"__builtin_lsx_vssub_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_w : GCCBuiltin<"__builtin_lsx_vssub_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_d : GCCBuiltin<"__builtin_lsx_vssub_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsadd_bu : GCCBuiltin<"__builtin_lsx_vsadd_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_hu : GCCBuiltin<"__builtin_lsx_vsadd_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_wu : GCCBuiltin<"__builtin_lsx_vsadd_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vsadd_du : GCCBuiltin<"__builtin_lsx_vsadd_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vssub_bu : GCCBuiltin<"__builtin_lsx_vssub_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_hu : GCCBuiltin<"__builtin_lsx_vssub_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_wu : GCCBuiltin<"__builtin_lsx_vssub_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssub_du : GCCBuiltin<"__builtin_lsx_vssub_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhaddw_h_b : GCCBuiltin<"__builtin_lsx_vhaddw_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhaddw_w_h : GCCBuiltin<"__builtin_lsx_vhaddw_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhaddw_d_w : GCCBuiltin<"__builtin_lsx_vhaddw_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhsubw_h_b : GCCBuiltin<"__builtin_lsx_vhsubw_h_b">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_w_h : GCCBuiltin<"__builtin_lsx_vhsubw_w_h">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_d_w : GCCBuiltin<"__builtin_lsx_vhsubw_d_w">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhaddw_hu_bu : GCCBuiltin<"__builtin_lsx_vhaddw_hu_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhaddw_wu_hu : GCCBuiltin<"__builtin_lsx_vhaddw_wu_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhaddw_du_wu : GCCBuiltin<"__builtin_lsx_vhaddw_du_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vhsubw_hu_bu : GCCBuiltin<"__builtin_lsx_vhsubw_hu_bu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_wu_hu : GCCBuiltin<"__builtin_lsx_vhsubw_wu_hu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vhsubw_du_wu : GCCBuiltin<"__builtin_lsx_vhsubw_du_wu">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vadda_b : GCCBuiltin<"__builtin_lsx_vadda_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadda_h : GCCBuiltin<"__builtin_lsx_vadda_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadda_w : GCCBuiltin<"__builtin_lsx_vadda_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadda_d : GCCBuiltin<"__builtin_lsx_vadda_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vabsd_b : GCCBuiltin<"__builtin_lsx_vabsd_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_h : GCCBuiltin<"__builtin_lsx_vabsd_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_w : GCCBuiltin<"__builtin_lsx_vabsd_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_d : GCCBuiltin<"__builtin_lsx_vabsd_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vabsd_bu : GCCBuiltin<"__builtin_lsx_vabsd_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_hu : GCCBuiltin<"__builtin_lsx_vabsd_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_wu : GCCBuiltin<"__builtin_lsx_vabsd_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vabsd_du : GCCBuiltin<"__builtin_lsx_vabsd_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vavg_b : GCCBuiltin<"__builtin_lsx_vavg_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_h : GCCBuiltin<"__builtin_lsx_vavg_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_w : GCCBuiltin<"__builtin_lsx_vavg_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_d : GCCBuiltin<"__builtin_lsx_vavg_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vavg_bu : GCCBuiltin<"__builtin_lsx_vavg_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_hu : GCCBuiltin<"__builtin_lsx_vavg_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_wu : GCCBuiltin<"__builtin_lsx_vavg_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavg_du : GCCBuiltin<"__builtin_lsx_vavg_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vavgr_b : GCCBuiltin<"__builtin_lsx_vavgr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_h : GCCBuiltin<"__builtin_lsx_vavgr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_w : GCCBuiltin<"__builtin_lsx_vavgr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_d : GCCBuiltin<"__builtin_lsx_vavgr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vavgr_bu : GCCBuiltin<"__builtin_lsx_vavgr_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_hu : GCCBuiltin<"__builtin_lsx_vavgr_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_wu : GCCBuiltin<"__builtin_lsx_vavgr_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vavgr_du : GCCBuiltin<"__builtin_lsx_vavgr_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vsrlr_b : GCCBuiltin<"__builtin_lsx_vsrlr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlr_h : GCCBuiltin<"__builtin_lsx_vsrlr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlr_w : GCCBuiltin<"__builtin_lsx_vsrlr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlr_d : GCCBuiltin<"__builtin_lsx_vsrlr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrar_b : GCCBuiltin<"__builtin_lsx_vsrar_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrar_h : GCCBuiltin<"__builtin_lsx_vsrar_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrar_w : GCCBuiltin<"__builtin_lsx_vsrar_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrar_d : GCCBuiltin<"__builtin_lsx_vsrar_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmax_s : GCCBuiltin<"__builtin_lsx_vfmax_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmax_d : GCCBuiltin<"__builtin_lsx_vfmax_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmin_s : GCCBuiltin<"__builtin_lsx_vfmin_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmin_d : GCCBuiltin<"__builtin_lsx_vfmin_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmaxa_s : GCCBuiltin<"__builtin_lsx_vfmaxa_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmaxa_d : GCCBuiltin<"__builtin_lsx_vfmaxa_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmina_s : GCCBuiltin<"__builtin_lsx_vfmina_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmina_d : GCCBuiltin<"__builtin_lsx_vfmina_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfclass_s : GCCBuiltin<"__builtin_lsx_vfclass_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfclass_d : GCCBuiltin<"__builtin_lsx_vfclass_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrecip_s : GCCBuiltin<"__builtin_lsx_vfrecip_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrecip_d : GCCBuiltin<"__builtin_lsx_vfrecip_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrsqrt_s : GCCBuiltin<"__builtin_lsx_vfrsqrt_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrsqrt_d : GCCBuiltin<"__builtin_lsx_vfrsqrt_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcvtl_s_h : GCCBuiltin<"__builtin_lsx_vfcvtl_s_h">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcvtl_d_s : GCCBuiltin<"__builtin_lsx_vfcvtl_d_s">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfcvth_s_h : GCCBuiltin<"__builtin_lsx_vfcvth_s_h">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfcvth_d_s : GCCBuiltin<"__builtin_lsx_vfcvth_d_s">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftint_w_s : GCCBuiltin<"__builtin_lsx_vftint_w_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftint_l_d : GCCBuiltin<"__builtin_lsx_vftint_l_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftint_wu_s : GCCBuiltin<"__builtin_lsx_vftint_wu_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftint_lu_d : GCCBuiltin<"__builtin_lsx_vftint_lu_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrlri_b : GCCBuiltin<"__builtin_lsx_vsrlri_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlri_h : GCCBuiltin<"__builtin_lsx_vsrlri_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlri_w : GCCBuiltin<"__builtin_lsx_vsrlri_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlri_d : GCCBuiltin<"__builtin_lsx_vsrlri_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrari_b : GCCBuiltin<"__builtin_lsx_vsrari_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrari_h : GCCBuiltin<"__builtin_lsx_vsrari_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrari_w : GCCBuiltin<"__builtin_lsx_vsrari_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrari_d : GCCBuiltin<"__builtin_lsx_vsrari_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsat_b : GCCBuiltin<"__builtin_lsx_vsat_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_h : GCCBuiltin<"__builtin_lsx_vsat_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_w : GCCBuiltin<"__builtin_lsx_vsat_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_d : GCCBuiltin<"__builtin_lsx_vsat_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsat_bu : GCCBuiltin<"__builtin_lsx_vsat_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_hu : GCCBuiltin<"__builtin_lsx_vsat_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_wu : GCCBuiltin<"__builtin_lsx_vsat_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsat_du : GCCBuiltin<"__builtin_lsx_vsat_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrlni_b_h : GCCBuiltin<"__builtin_lsx_vsrlni_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlni_h_w : GCCBuiltin<"__builtin_lsx_vsrlni_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlni_w_d : GCCBuiltin<"__builtin_lsx_vsrlni_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrlni_d_q : GCCBuiltin<"__builtin_lsx_vsrlni_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlni_b_h : GCCBuiltin<"__builtin_lsx_vssrlni_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_h_w : GCCBuiltin<"__builtin_lsx_vssrlni_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_w_d : GCCBuiltin<"__builtin_lsx_vssrlni_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_d_q : GCCBuiltin<"__builtin_lsx_vssrlni_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlrni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrni_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrni_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrni_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlrni_du_q : GCCBuiltin<"__builtin_lsx_vssrlrni_du_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrarni_b_h : GCCBuiltin<"__builtin_lsx_vsrarni_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrarni_h_w : GCCBuiltin<"__builtin_lsx_vsrarni_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrarni_w_d : GCCBuiltin<"__builtin_lsx_vsrarni_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrarni_d_q : GCCBuiltin<"__builtin_lsx_vsrarni_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrani_b_h : GCCBuiltin<"__builtin_lsx_vssrani_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_h_w : GCCBuiltin<"__builtin_lsx_vssrani_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_w_d : GCCBuiltin<"__builtin_lsx_vssrani_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_d_q : GCCBuiltin<"__builtin_lsx_vssrani_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrani_bu_h : GCCBuiltin<"__builtin_lsx_vssrani_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_hu_w : GCCBuiltin<"__builtin_lsx_vssrani_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_wu_d : GCCBuiltin<"__builtin_lsx_vssrani_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrani_du_q : GCCBuiltin<"__builtin_lsx_vssrani_du_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrarni_b_h : GCCBuiltin<"__builtin_lsx_vssrarni_b_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_h_w : GCCBuiltin<"__builtin_lsx_vssrarni_h_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_w_d : GCCBuiltin<"__builtin_lsx_vssrarni_w_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_d_q : GCCBuiltin<"__builtin_lsx_vssrarni_d_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrarni_bu_h : GCCBuiltin<"__builtin_lsx_vssrarni_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_hu_w : GCCBuiltin<"__builtin_lsx_vssrarni_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_wu_d : GCCBuiltin<"__builtin_lsx_vssrarni_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrarni_du_q : GCCBuiltin<"__builtin_lsx_vssrarni_du_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vssrlni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlni_bu_h">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlni_hu_w">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlni_wu_d">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vssrlni_du_q : GCCBuiltin<"__builtin_lsx_vssrlni_du_q">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vseq_b : GCCBuiltin<"__builtin_lsx_vseq_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseq_h : GCCBuiltin<"__builtin_lsx_vseq_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseq_w : GCCBuiltin<"__builtin_lsx_vseq_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseq_d : GCCBuiltin<"__builtin_lsx_vseq_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsle_b : GCCBuiltin<"__builtin_lsx_vsle_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_h : GCCBuiltin<"__builtin_lsx_vsle_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_w : GCCBuiltin<"__builtin_lsx_vsle_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_d : GCCBuiltin<"__builtin_lsx_vsle_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsle_bu : GCCBuiltin<"__builtin_lsx_vsle_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_hu : GCCBuiltin<"__builtin_lsx_vsle_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_wu : GCCBuiltin<"__builtin_lsx_vsle_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsle_du : GCCBuiltin<"__builtin_lsx_vsle_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslt_b : GCCBuiltin<"__builtin_lsx_vslt_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_h : GCCBuiltin<"__builtin_lsx_vslt_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_w : GCCBuiltin<"__builtin_lsx_vslt_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_d : GCCBuiltin<"__builtin_lsx_vslt_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslt_bu : GCCBuiltin<"__builtin_lsx_vslt_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_hu : GCCBuiltin<"__builtin_lsx_vslt_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_wu : GCCBuiltin<"__builtin_lsx_vslt_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslt_du : GCCBuiltin<"__builtin_lsx_vslt_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vadd_b : GCCBuiltin<"__builtin_lsx_vadd_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadd_h : GCCBuiltin<"__builtin_lsx_vadd_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadd_w : GCCBuiltin<"__builtin_lsx_vadd_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vadd_d : GCCBuiltin<"__builtin_lsx_vadd_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vsub_b : GCCBuiltin<"__builtin_lsx_vsub_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsub_h : GCCBuiltin<"__builtin_lsx_vsub_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsub_w : GCCBuiltin<"__builtin_lsx_vsub_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsub_d : GCCBuiltin<"__builtin_lsx_vsub_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmax_b : GCCBuiltin<"__builtin_lsx_vmax_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_h : GCCBuiltin<"__builtin_lsx_vmax_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_w : GCCBuiltin<"__builtin_lsx_vmax_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_d : GCCBuiltin<"__builtin_lsx_vmax_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmin_b : GCCBuiltin<"__builtin_lsx_vmin_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_h : GCCBuiltin<"__builtin_lsx_vmin_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_w : GCCBuiltin<"__builtin_lsx_vmin_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_d : GCCBuiltin<"__builtin_lsx_vmin_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmax_bu : GCCBuiltin<"__builtin_lsx_vmax_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_hu : GCCBuiltin<"__builtin_lsx_vmax_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_wu : GCCBuiltin<"__builtin_lsx_vmax_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmax_du : GCCBuiltin<"__builtin_lsx_vmax_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmin_bu : GCCBuiltin<"__builtin_lsx_vmin_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_hu : GCCBuiltin<"__builtin_lsx_vmin_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_wu : GCCBuiltin<"__builtin_lsx_vmin_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmin_du : GCCBuiltin<"__builtin_lsx_vmin_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmul_b : GCCBuiltin<"__builtin_lsx_vmul_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmul_h : GCCBuiltin<"__builtin_lsx_vmul_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmul_w : GCCBuiltin<"__builtin_lsx_vmul_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmul_d : GCCBuiltin<"__builtin_lsx_vmul_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmadd_b : GCCBuiltin<"__builtin_lsx_vmadd_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmadd_h : GCCBuiltin<"__builtin_lsx_vmadd_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmadd_w : GCCBuiltin<"__builtin_lsx_vmadd_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmadd_d : GCCBuiltin<"__builtin_lsx_vmadd_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lsx_vmsub_b : GCCBuiltin<"__builtin_lsx_vmsub_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmsub_h : GCCBuiltin<"__builtin_lsx_vmsub_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmsub_w : GCCBuiltin<"__builtin_lsx_vmsub_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vmsub_d : GCCBuiltin<"__builtin_lsx_vmsub_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lsx_vdiv_b : GCCBuiltin<"__builtin_lsx_vdiv_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_h : GCCBuiltin<"__builtin_lsx_vdiv_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_w : GCCBuiltin<"__builtin_lsx_vdiv_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_d : GCCBuiltin<"__builtin_lsx_vdiv_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmod_b : GCCBuiltin<"__builtin_lsx_vmod_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_h : GCCBuiltin<"__builtin_lsx_vmod_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_w : GCCBuiltin<"__builtin_lsx_vmod_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_d : GCCBuiltin<"__builtin_lsx_vmod_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vdiv_bu : GCCBuiltin<"__builtin_lsx_vdiv_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_hu : GCCBuiltin<"__builtin_lsx_vdiv_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_wu : GCCBuiltin<"__builtin_lsx_vdiv_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vdiv_du : GCCBuiltin<"__builtin_lsx_vdiv_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsll_b : GCCBuiltin<"__builtin_lsx_vsll_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsll_h : GCCBuiltin<"__builtin_lsx_vsll_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsll_w : GCCBuiltin<"__builtin_lsx_vsll_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsll_d : GCCBuiltin<"__builtin_lsx_vsll_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrl_b : GCCBuiltin<"__builtin_lsx_vsrl_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrl_h : GCCBuiltin<"__builtin_lsx_vsrl_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrl_w : GCCBuiltin<"__builtin_lsx_vsrl_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrl_d : GCCBuiltin<"__builtin_lsx_vsrl_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitclr_b : GCCBuiltin<"__builtin_lsx_vbitclr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclr_h : GCCBuiltin<"__builtin_lsx_vbitclr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclr_w : GCCBuiltin<"__builtin_lsx_vbitclr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitclr_d : GCCBuiltin<"__builtin_lsx_vbitclr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitset_b : GCCBuiltin<"__builtin_lsx_vbitset_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitset_h : GCCBuiltin<"__builtin_lsx_vbitset_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitset_w : GCCBuiltin<"__builtin_lsx_vbitset_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitset_d : GCCBuiltin<"__builtin_lsx_vbitset_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpackev_b : GCCBuiltin<"__builtin_lsx_vpackev_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackev_h : GCCBuiltin<"__builtin_lsx_vpackev_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackev_w : GCCBuiltin<"__builtin_lsx_vpackev_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackev_d : GCCBuiltin<"__builtin_lsx_vpackev_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpackod_b : GCCBuiltin<"__builtin_lsx_vpackod_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackod_h : GCCBuiltin<"__builtin_lsx_vpackod_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackod_w : GCCBuiltin<"__builtin_lsx_vpackod_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpackod_d : GCCBuiltin<"__builtin_lsx_vpackod_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vilvl_b : GCCBuiltin<"__builtin_lsx_vilvl_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvl_h : GCCBuiltin<"__builtin_lsx_vilvl_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvl_w : GCCBuiltin<"__builtin_lsx_vilvl_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvl_d : GCCBuiltin<"__builtin_lsx_vilvl_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vilvh_b : GCCBuiltin<"__builtin_lsx_vilvh_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvh_h : GCCBuiltin<"__builtin_lsx_vilvh_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvh_w : GCCBuiltin<"__builtin_lsx_vilvh_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vilvh_d : GCCBuiltin<"__builtin_lsx_vilvh_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpickev_b : GCCBuiltin<"__builtin_lsx_vpickev_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickev_h : GCCBuiltin<"__builtin_lsx_vpickev_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickev_w : GCCBuiltin<"__builtin_lsx_vpickev_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickev_d : GCCBuiltin<"__builtin_lsx_vpickev_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vand_v : GCCBuiltin<"__builtin_lsx_vand_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vor_v : GCCBuiltin<"__builtin_lsx_vor_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vbitrev_b : GCCBuiltin<"__builtin_lsx_vbitrev_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrev_h : GCCBuiltin<"__builtin_lsx_vbitrev_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrev_w : GCCBuiltin<"__builtin_lsx_vbitrev_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vbitrev_d : GCCBuiltin<"__builtin_lsx_vbitrev_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmod_bu : GCCBuiltin<"__builtin_lsx_vmod_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_hu : GCCBuiltin<"__builtin_lsx_vmod_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_wu : GCCBuiltin<"__builtin_lsx_vmod_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmod_du : GCCBuiltin<"__builtin_lsx_vmod_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpickod_b : GCCBuiltin<"__builtin_lsx_vpickod_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickod_h : GCCBuiltin<"__builtin_lsx_vpickod_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickod_w : GCCBuiltin<"__builtin_lsx_vpickod_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpickod_d : GCCBuiltin<"__builtin_lsx_vpickod_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vreplve_b : GCCBuiltin<"__builtin_lsx_vreplve_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplve_h : GCCBuiltin<"__builtin_lsx_vreplve_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplve_w : GCCBuiltin<"__builtin_lsx_vreplve_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplve_d : GCCBuiltin<"__builtin_lsx_vreplve_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsra_b : GCCBuiltin<"__builtin_lsx_vsra_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsra_h : GCCBuiltin<"__builtin_lsx_vsra_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsra_w : GCCBuiltin<"__builtin_lsx_vsra_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsra_d : GCCBuiltin<"__builtin_lsx_vsra_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vxor_v : GCCBuiltin<"__builtin_lsx_vxor_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vnor_v : GCCBuiltin<"__builtin_lsx_vnor_v">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfadd_s : GCCBuiltin<"__builtin_lsx_vfadd_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfadd_d : GCCBuiltin<"__builtin_lsx_vfadd_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfsub_s : GCCBuiltin<"__builtin_lsx_vfsub_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfsub_d : GCCBuiltin<"__builtin_lsx_vfsub_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfmul_s : GCCBuiltin<"__builtin_lsx_vfmul_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfmul_d : GCCBuiltin<"__builtin_lsx_vfmul_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vshuf_h : GCCBuiltin<"__builtin_lsx_vshuf_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
++            [IntrNoMem]>;
++def int_loongarch_lsx_vshuf_w : GCCBuiltin<"__builtin_lsx_vshuf_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lsx_vshuf_d : GCCBuiltin<"__builtin_lsx_vshuf_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
++            [IntrNoMem]>;
++
++def int_loongarch_lsx_vseqi_b : GCCBuiltin<"__builtin_lsx_vseqi_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseqi_h : GCCBuiltin<"__builtin_lsx_vseqi_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseqi_w : GCCBuiltin<"__builtin_lsx_vseqi_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vseqi_d : GCCBuiltin<"__builtin_lsx_vseqi_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslei_b : GCCBuiltin<"__builtin_lsx_vslei_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_h : GCCBuiltin<"__builtin_lsx_vslei_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_w : GCCBuiltin<"__builtin_lsx_vslei_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_d : GCCBuiltin<"__builtin_lsx_vslei_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslei_bu : GCCBuiltin<"__builtin_lsx_vslei_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_hu : GCCBuiltin<"__builtin_lsx_vslei_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_wu : GCCBuiltin<"__builtin_lsx_vslei_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslei_du : GCCBuiltin<"__builtin_lsx_vslei_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslti_b : GCCBuiltin<"__builtin_lsx_vslti_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_h : GCCBuiltin<"__builtin_lsx_vslti_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_w : GCCBuiltin<"__builtin_lsx_vslti_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_d : GCCBuiltin<"__builtin_lsx_vslti_d">,
++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++ 
++def int_loongarch_lsx_vslti_bu : GCCBuiltin<"__builtin_lsx_vslti_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_hu : GCCBuiltin<"__builtin_lsx_vslti_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_wu : GCCBuiltin<"__builtin_lsx_vslti_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslti_du : GCCBuiltin<"__builtin_lsx_vslti_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vaddi_bu : GCCBuiltin<"__builtin_lsx_vaddi_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vaddi_hu : GCCBuiltin<"__builtin_lsx_vaddi_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vaddi_wu : GCCBuiltin<"__builtin_lsx_vaddi_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lsx_vaddi_du : GCCBuiltin<"__builtin_lsx_vaddi_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lsx_vsubi_bu : GCCBuiltin<"__builtin_lsx_vsubi_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubi_hu : GCCBuiltin<"__builtin_lsx_vsubi_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubi_wu : GCCBuiltin<"__builtin_lsx_vsubi_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsubi_du : GCCBuiltin<"__builtin_lsx_vsubi_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaxi_b : GCCBuiltin<"__builtin_lsx_vmaxi_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_h : GCCBuiltin<"__builtin_lsx_vmaxi_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_w : GCCBuiltin<"__builtin_lsx_vmaxi_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_d : GCCBuiltin<"__builtin_lsx_vmaxi_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmini_b : GCCBuiltin<"__builtin_lsx_vmini_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_h : GCCBuiltin<"__builtin_lsx_vmini_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_w : GCCBuiltin<"__builtin_lsx_vmini_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_d : GCCBuiltin<"__builtin_lsx_vmini_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmaxi_bu : GCCBuiltin<"__builtin_lsx_vmaxi_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_hu : GCCBuiltin<"__builtin_lsx_vmaxi_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_wu : GCCBuiltin<"__builtin_lsx_vmaxi_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmaxi_du : GCCBuiltin<"__builtin_lsx_vmaxi_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vmini_bu : GCCBuiltin<"__builtin_lsx_vmini_bu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_hu : GCCBuiltin<"__builtin_lsx_vmini_hu">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_wu : GCCBuiltin<"__builtin_lsx_vmini_wu">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vmini_du : GCCBuiltin<"__builtin_lsx_vmini_du">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vclz_b : GCCBuiltin<"__builtin_lsx_vclz_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclz_h : GCCBuiltin<"__builtin_lsx_vclz_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclz_w : GCCBuiltin<"__builtin_lsx_vclz_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vclz_d : GCCBuiltin<"__builtin_lsx_vclz_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vpcnt_b : GCCBuiltin<"__builtin_lsx_vpcnt_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpcnt_h : GCCBuiltin<"__builtin_lsx_vpcnt_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpcnt_w : GCCBuiltin<"__builtin_lsx_vpcnt_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vpcnt_d : GCCBuiltin<"__builtin_lsx_vpcnt_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfsqrt_s : GCCBuiltin<"__builtin_lsx_vfsqrt_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfsqrt_d : GCCBuiltin<"__builtin_lsx_vfsqrt_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vfrint_s : GCCBuiltin<"__builtin_lsx_vfrint_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfrint_d : GCCBuiltin<"__builtin_lsx_vfrint_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vffint_s_w : GCCBuiltin<"__builtin_lsx_vffint_s_w">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vffint_d_l : GCCBuiltin<"__builtin_lsx_vffint_d_l">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vffint_s_wu : GCCBuiltin<"__builtin_lsx_vffint_s_wu">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vffint_d_lu : GCCBuiltin<"__builtin_lsx_vffint_d_lu">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vftintrz_wu_s : GCCBuiltin<"__builtin_lsx_vftintrz_wu_s">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vftintrz_lu_d : GCCBuiltin<"__builtin_lsx_vftintrz_lu_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vreplgr2vr_b : GCCBuiltin<"__builtin_lsx_vreplgr2vr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplgr2vr_h : GCCBuiltin<"__builtin_lsx_vreplgr2vr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplgr2vr_w : GCCBuiltin<"__builtin_lsx_vreplgr2vr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vreplgr2vr_d : GCCBuiltin<"__builtin_lsx_vreplgr2vr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vinsgr2vr_b : GCCBuiltin<"__builtin_lsx_vinsgr2vr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vinsgr2vr_h : GCCBuiltin<"__builtin_lsx_vinsgr2vr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vinsgr2vr_w : GCCBuiltin<"__builtin_lsx_vinsgr2vr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lsx_vinsgr2vr_d : GCCBuiltin<"__builtin_lsx_vinsgr2vr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lsx_vfdiv_s : GCCBuiltin<"__builtin_lsx_vfdiv_s">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vfdiv_d : GCCBuiltin<"__builtin_lsx_vfdiv_d">,
++  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vslli_b : GCCBuiltin<"__builtin_lsx_vslli_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslli_h : GCCBuiltin<"__builtin_lsx_vslli_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslli_w : GCCBuiltin<"__builtin_lsx_vslli_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vslli_d : GCCBuiltin<"__builtin_lsx_vslli_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrli_b : GCCBuiltin<"__builtin_lsx_vsrli_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrli_h : GCCBuiltin<"__builtin_lsx_vsrli_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrli_w : GCCBuiltin<"__builtin_lsx_vsrli_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrli_d : GCCBuiltin<"__builtin_lsx_vsrli_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vsrai_b : GCCBuiltin<"__builtin_lsx_vsrai_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrai_h : GCCBuiltin<"__builtin_lsx_vsrai_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrai_w : GCCBuiltin<"__builtin_lsx_vsrai_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vsrai_d : GCCBuiltin<"__builtin_lsx_vsrai_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vshuf4i_b : GCCBuiltin<"__builtin_lsx_vshuf4i_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vshuf4i_h : GCCBuiltin<"__builtin_lsx_vshuf4i_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vshuf4i_w : GCCBuiltin<"__builtin_lsx_vshuf4i_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vshuf4i_d : GCCBuiltin<"__builtin_lsx_vshuf4i_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vrotr_b : GCCBuiltin<"__builtin_lsx_vrotr_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotr_h : GCCBuiltin<"__builtin_lsx_vrotr_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotr_w : GCCBuiltin<"__builtin_lsx_vrotr_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotr_d : GCCBuiltin<"__builtin_lsx_vrotr_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vrotri_b : GCCBuiltin<"__builtin_lsx_vrotri_b">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotri_h : GCCBuiltin<"__builtin_lsx_vrotri_h">,
++  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotri_w : GCCBuiltin<"__builtin_lsx_vrotri_w">,
++  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_vrotri_d : GCCBuiltin<"__builtin_lsx_vrotri_d">,
++  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_vld : GCCBuiltin<"__builtin_lsx_vld">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lsx_vst : GCCBuiltin<"__builtin_lsx_vst">,
++  Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
++  [IntrArgMemOnly]>;
++
++def int_loongarch_lsx_bz_v : GCCBuiltin<"__builtin_lsx_bz_v">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_bnz_v : GCCBuiltin<"__builtin_lsx_bnz_v">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_bz_b : GCCBuiltin<"__builtin_lsx_bz_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bz_h : GCCBuiltin<"__builtin_lsx_bz_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bz_w : GCCBuiltin<"__builtin_lsx_bz_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bz_d : GCCBuiltin<"__builtin_lsx_bz_d">,
++  Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lsx_bnz_b : GCCBuiltin<"__builtin_lsx_bnz_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bnz_h : GCCBuiltin<"__builtin_lsx_bnz_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bnz_w : GCCBuiltin<"__builtin_lsx_bnz_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
++def int_loongarch_lsx_bnz_d : GCCBuiltin<"__builtin_lsx_bnz_d">,
++  Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
++
++//===----------------------------------------------------------------------===//
++//LoongArch LASX
++
++def int_loongarch_lasx_xvfmadd_s : GCCBuiltin<"__builtin_lasx_xvfmadd_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvfmadd_d : GCCBuiltin<"__builtin_lasx_xvfmadd_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++            [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmsub_s : GCCBuiltin<"__builtin_lasx_xvfmsub_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvfmsub_d : GCCBuiltin<"__builtin_lasx_xvfmsub_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++            [IntrNoMem]>;
++
++
++def int_loongarch_lasx_xvfnmadd_s : GCCBuiltin<"__builtin_lasx_xvfnmadd_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvfnmadd_d : GCCBuiltin<"__builtin_lasx_xvfnmadd_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++            [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfnmsub_s : GCCBuiltin<"__builtin_lasx_xvfnmsub_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvfnmsub_d : GCCBuiltin<"__builtin_lasx_xvfnmsub_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++            [IntrNoMem]>;
++
++def int_loongarch_lasx_xvclo_b : GCCBuiltin<"__builtin_lasx_xvclo_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclo_h : GCCBuiltin<"__builtin_lasx_xvclo_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclo_w : GCCBuiltin<"__builtin_lasx_xvclo_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclo_d : GCCBuiltin<"__builtin_lasx_xvclo_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvflogb_s : GCCBuiltin<"__builtin_lasx_xvflogb_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvflogb_d : GCCBuiltin<"__builtin_lasx_xvflogb_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpickve2gr_w : GCCBuiltin<"__builtin_lasx_xvpickve2gr_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickve2gr_d : GCCBuiltin<"__builtin_lasx_xvpickve2gr_d">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpickve2gr_wu : GCCBuiltin<"__builtin_lasx_xvpickve2gr_wu">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickve2gr_du : GCCBuiltin<"__builtin_lasx_xvpickve2gr_du">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmskltz_b : GCCBuiltin<"__builtin_lasx_xvmskltz_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmskltz_h : GCCBuiltin<"__builtin_lasx_xvmskltz_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmskltz_w : GCCBuiltin<"__builtin_lasx_xvmskltz_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmskltz_d : GCCBuiltin<"__builtin_lasx_xvmskltz_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_caf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_caf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_ceq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_ceq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_clt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_clt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_cule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_cule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_saf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_saf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_seq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_seq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_slt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_slt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcmp_sule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcmp_sule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitsel_v : GCCBuiltin<"__builtin_lasx_xvbitsel_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvshuf_b : GCCBuiltin<"__builtin_lasx_xvshuf_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvldrepl_b : GCCBuiltin<"__builtin_lasx_xvldrepl_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lasx_xvldrepl_h : GCCBuiltin<"__builtin_lasx_xvldrepl_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lasx_xvldrepl_w : GCCBuiltin<"__builtin_lasx_xvldrepl_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++def int_loongarch_lasx_xvldrepl_d : GCCBuiltin<"__builtin_lasx_xvldrepl_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvstelm_b : GCCBuiltin<"__builtin_lasx_xvstelm_b">,
++  Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lasx_xvstelm_h : GCCBuiltin<"__builtin_lasx_xvstelm_h">,
++  Intrinsic<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lasx_xvstelm_w : GCCBuiltin<"__builtin_lasx_xvstelm_w">,
++  Intrinsic<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++def int_loongarch_lasx_xvstelm_d : GCCBuiltin<"__builtin_lasx_xvstelm_d">,
++  Intrinsic<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvldx : GCCBuiltin<"__builtin_lasx_xvldx">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvstx : GCCBuiltin<"__builtin_lasx_xvstx">,
++  Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty],
++  [IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsubwev_d_w : GCCBuiltin<"__builtin_lasx_xvsubwev_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_w_h : GCCBuiltin<"__builtin_lasx_xvsubwev_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_h_b : GCCBuiltin<"__builtin_lasx_xvsubwev_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_q_d : GCCBuiltin<"__builtin_lasx_xvsubwev_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsubwod_d_w : GCCBuiltin<"__builtin_lasx_xvsubwod_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_w_h : GCCBuiltin<"__builtin_lasx_xvsubwod_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_h_b : GCCBuiltin<"__builtin_lasx_xvsubwod_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_q_d : GCCBuiltin<"__builtin_lasx_xvsubwod_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsubwev_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwev_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwev_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwev_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwev_q_du : GCCBuiltin<"__builtin_lasx_xvsubwev_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsubwod_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwod_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwod_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwod_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubwod_q_du : GCCBuiltin<"__builtin_lasx_xvsubwod_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhaddw_qu_du : GCCBuiltin<"__builtin_lasx_xvhaddw_qu_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_qu_du : GCCBuiltin<"__builtin_lasx_xvhsubw_qu_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhaddw_q_d : GCCBuiltin<"__builtin_lasx_xvhaddw_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_q_d : GCCBuiltin<"__builtin_lasx_xvhsubw_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmuh_b : GCCBuiltin<"__builtin_lasx_xvmuh_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_h : GCCBuiltin<"__builtin_lasx_xvmuh_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_w : GCCBuiltin<"__builtin_lasx_xvmuh_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_d : GCCBuiltin<"__builtin_lasx_xvmuh_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmuh_bu : GCCBuiltin<"__builtin_lasx_xvmuh_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_hu : GCCBuiltin<"__builtin_lasx_xvmuh_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_wu : GCCBuiltin<"__builtin_lasx_xvmuh_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmuh_du : GCCBuiltin<"__builtin_lasx_xvmuh_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwev_d_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_w_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_h_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_q_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwod_d_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_w_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_h_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_q_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_q_du : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_q_du : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmulwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmulwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrln_b_h : GCCBuiltin<"__builtin_lasx_xvsrln_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrln_h_w : GCCBuiltin<"__builtin_lasx_xvsrln_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrln_w_d : GCCBuiltin<"__builtin_lasx_xvsrln_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsran_b_h : GCCBuiltin<"__builtin_lasx_xvsran_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsran_h_w : GCCBuiltin<"__builtin_lasx_xvsran_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsran_w_d : GCCBuiltin<"__builtin_lasx_xvsran_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrn_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrn_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrn_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrarn_b_h : GCCBuiltin<"__builtin_lasx_xvsrarn_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrarn_h_w : GCCBuiltin<"__builtin_lasx_xvsrarn_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrarn_w_d : GCCBuiltin<"__builtin_lasx_xvsrarn_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrln_b_h : GCCBuiltin<"__builtin_lasx_xvssrln_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrln_h_w : GCCBuiltin<"__builtin_lasx_xvssrln_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrln_w_d : GCCBuiltin<"__builtin_lasx_xvssrln_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssran_b_h : GCCBuiltin<"__builtin_lasx_xvssran_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssran_h_w : GCCBuiltin<"__builtin_lasx_xvssran_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssran_w_d : GCCBuiltin<"__builtin_lasx_xvssran_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrarn_b_h : GCCBuiltin<"__builtin_lasx_xvssrarn_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarn_h_w : GCCBuiltin<"__builtin_lasx_xvssrarn_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarn_w_d : GCCBuiltin<"__builtin_lasx_xvssrarn_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrln_bu_h : GCCBuiltin<"__builtin_lasx_xvssrln_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrln_hu_w : GCCBuiltin<"__builtin_lasx_xvssrln_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrln_wu_d : GCCBuiltin<"__builtin_lasx_xvssrln_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssran_bu_h : GCCBuiltin<"__builtin_lasx_xvssran_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssran_hu_w : GCCBuiltin<"__builtin_lasx_xvssran_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssran_wu_d : GCCBuiltin<"__builtin_lasx_xvssran_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlrn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrarn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarn_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarn_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarn_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvandn_v : GCCBuiltin<"__builtin_lasx_xvandn_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvorn_v : GCCBuiltin<"__builtin_lasx_xvorn_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrstp_b : GCCBuiltin<"__builtin_lasx_xvfrstp_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvfrstp_h : GCCBuiltin<"__builtin_lasx_xvfrstp_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lasx_xvadd_q : GCCBuiltin<"__builtin_lasx_xvadd_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsub_q : GCCBuiltin<"__builtin_lasx_xvsub_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsigncov_b : GCCBuiltin<"__builtin_lasx_xvsigncov_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvsigncov_h : GCCBuiltin<"__builtin_lasx_xvsigncov_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvsigncov_w : GCCBuiltin<"__builtin_lasx_xvsigncov_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvsigncov_d : GCCBuiltin<"__builtin_lasx_xvsigncov_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcvt_h_s : GCCBuiltin<"__builtin_lasx_xvfcvt_h_s">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcvt_s_d : GCCBuiltin<"__builtin_lasx_xvfcvt_s_d">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvffint_s_l : GCCBuiltin<"__builtin_lasx_xvffint_s_l">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftint_w_d : GCCBuiltin<"__builtin_lasx_xvftint_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrz_w_d : GCCBuiltin<"__builtin_lasx_xvftintrz_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrp_w_d : GCCBuiltin<"__builtin_lasx_xvftintrp_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrm_w_d : GCCBuiltin<"__builtin_lasx_xvftintrm_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrne_w_d : GCCBuiltin<"__builtin_lasx_xvftintrne_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbsrl_v : GCCBuiltin<"__builtin_lasx_xvbsrl_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbsll_v : GCCBuiltin<"__builtin_lasx_xvbsll_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrstpi_b : GCCBuiltin<"__builtin_lasx_xvfrstpi_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrstpi_h : GCCBuiltin<"__builtin_lasx_xvfrstpi_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvneg_b : GCCBuiltin<"__builtin_lasx_xvneg_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvneg_h : GCCBuiltin<"__builtin_lasx_xvneg_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvneg_w : GCCBuiltin<"__builtin_lasx_xvneg_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvneg_d : GCCBuiltin<"__builtin_lasx_xvneg_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmskgez_b : GCCBuiltin<"__builtin_lasx_xvmskgez_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmsknz_b : GCCBuiltin<"__builtin_lasx_xvmsknz_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrintrm_s : GCCBuiltin<"__builtin_lasx_xvfrintrm_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrintrm_d : GCCBuiltin<"__builtin_lasx_xvfrintrm_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrintrp_s : GCCBuiltin<"__builtin_lasx_xvfrintrp_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrintrp_d : GCCBuiltin<"__builtin_lasx_xvfrintrp_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrintrz_s : GCCBuiltin<"__builtin_lasx_xvfrintrz_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrintrz_d : GCCBuiltin<"__builtin_lasx_xvfrintrz_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrintrne_s : GCCBuiltin<"__builtin_lasx_xvfrintrne_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrintrne_d : GCCBuiltin<"__builtin_lasx_xvfrintrne_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvffinth_d_w : GCCBuiltin<"__builtin_lasx_xvffinth_d_w">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvffintl_d_w : GCCBuiltin<"__builtin_lasx_xvffintl_d_w">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrm_w_s : GCCBuiltin<"__builtin_lasx_xvftintrm_w_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrm_l_d : GCCBuiltin<"__builtin_lasx_xvftintrm_l_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrp_w_s : GCCBuiltin<"__builtin_lasx_xvftintrp_w_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrp_l_d : GCCBuiltin<"__builtin_lasx_xvftintrp_l_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrz_w_s : GCCBuiltin<"__builtin_lasx_xvftintrz_w_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrz_l_d : GCCBuiltin<"__builtin_lasx_xvftintrz_l_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrne_w_s : GCCBuiltin<"__builtin_lasx_xvftintrne_w_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrne_l_d : GCCBuiltin<"__builtin_lasx_xvftintrne_l_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftinth_l_s : GCCBuiltin<"__builtin_lasx_xvftinth_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintl_l_s : GCCBuiltin<"__builtin_lasx_xvftintl_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrmh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrmh_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrml_l_s : GCCBuiltin<"__builtin_lasx_xvftintrml_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrph_l_s : GCCBuiltin<"__builtin_lasx_xvftintrph_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrpl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrpl_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrzh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzh_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrzl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzl_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrneh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrneh_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrnel_l_s : GCCBuiltin<"__builtin_lasx_xvftintrnel_l_s">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvexth_d_w : GCCBuiltin<"__builtin_lasx_xvexth_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_w_h : GCCBuiltin<"__builtin_lasx_xvexth_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_h_b : GCCBuiltin<"__builtin_lasx_xvexth_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvexth_q_d : GCCBuiltin<"__builtin_lasx_xvexth_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsllwil_d_w : GCCBuiltin<"__builtin_lasx_xvsllwil_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsllwil_w_h : GCCBuiltin<"__builtin_lasx_xvsllwil_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsllwil_h_b : GCCBuiltin<"__builtin_lasx_xvsllwil_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsllwil_du_wu : GCCBuiltin<"__builtin_lasx_xvsllwil_du_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsllwil_wu_hu : GCCBuiltin<"__builtin_lasx_xvsllwil_wu_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsllwil_hu_bu : GCCBuiltin<"__builtin_lasx_xvsllwil_hu_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitclri_b : GCCBuiltin<"__builtin_lasx_xvbitclri_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclri_h : GCCBuiltin<"__builtin_lasx_xvbitclri_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclri_w : GCCBuiltin<"__builtin_lasx_xvbitclri_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclri_d : GCCBuiltin<"__builtin_lasx_xvbitclri_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitseti_b : GCCBuiltin<"__builtin_lasx_xvbitseti_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitseti_h : GCCBuiltin<"__builtin_lasx_xvbitseti_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitseti_w : GCCBuiltin<"__builtin_lasx_xvbitseti_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitseti_d : GCCBuiltin<"__builtin_lasx_xvbitseti_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitrevi_b : GCCBuiltin<"__builtin_lasx_xvbitrevi_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrevi_h : GCCBuiltin<"__builtin_lasx_xvbitrevi_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrevi_w : GCCBuiltin<"__builtin_lasx_xvbitrevi_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrevi_d : GCCBuiltin<"__builtin_lasx_xvbitrevi_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrani_b_h : GCCBuiltin<"__builtin_lasx_xvsrani_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrani_h_w : GCCBuiltin<"__builtin_lasx_xvsrani_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrani_w_d : GCCBuiltin<"__builtin_lasx_xvsrani_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrani_d_q : GCCBuiltin<"__builtin_lasx_xvsrani_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvextrins_b : GCCBuiltin<"__builtin_lasx_xvextrins_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvextrins_h : GCCBuiltin<"__builtin_lasx_xvextrins_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvextrins_w : GCCBuiltin<"__builtin_lasx_xvextrins_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvextrins_d : GCCBuiltin<"__builtin_lasx_xvextrins_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitseli_b : GCCBuiltin<"__builtin_lasx_xvbitseli_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvandi_b : GCCBuiltin<"__builtin_lasx_xvandi_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvori_b : GCCBuiltin<"__builtin_lasx_xvori_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvxori_b : GCCBuiltin<"__builtin_lasx_xvxori_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvnori_b : GCCBuiltin<"__builtin_lasx_xvnori_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvldi : GCCBuiltin<"__builtin_lasx_xvldi">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpermi_w : GCCBuiltin<"__builtin_lasx_xvpermi_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsadd_b : GCCBuiltin<"__builtin_lasx_xvsadd_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_h : GCCBuiltin<"__builtin_lasx_xvsadd_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_w : GCCBuiltin<"__builtin_lasx_xvsadd_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_d : GCCBuiltin<"__builtin_lasx_xvsadd_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvssub_b : GCCBuiltin<"__builtin_lasx_xvssub_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_h : GCCBuiltin<"__builtin_lasx_xvssub_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_w : GCCBuiltin<"__builtin_lasx_xvssub_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_d : GCCBuiltin<"__builtin_lasx_xvssub_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsadd_bu : GCCBuiltin<"__builtin_lasx_xvsadd_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_hu : GCCBuiltin<"__builtin_lasx_xvsadd_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_wu : GCCBuiltin<"__builtin_lasx_xvsadd_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvsadd_du : GCCBuiltin<"__builtin_lasx_xvsadd_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvssub_bu : GCCBuiltin<"__builtin_lasx_xvssub_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_hu : GCCBuiltin<"__builtin_lasx_xvssub_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_wu : GCCBuiltin<"__builtin_lasx_xvssub_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssub_du : GCCBuiltin<"__builtin_lasx_xvssub_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhaddw_h_b : GCCBuiltin<"__builtin_lasx_xvhaddw_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhaddw_w_h : GCCBuiltin<"__builtin_lasx_xvhaddw_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhaddw_d_w : GCCBuiltin<"__builtin_lasx_xvhaddw_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhsubw_h_b : GCCBuiltin<"__builtin_lasx_xvhsubw_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_w_h : GCCBuiltin<"__builtin_lasx_xvhsubw_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_d_w : GCCBuiltin<"__builtin_lasx_xvhsubw_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhaddw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhaddw_hu_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhaddw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhaddw_wu_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhaddw_du_wu : GCCBuiltin<"__builtin_lasx_xvhaddw_du_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvhsubw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhsubw_hu_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhsubw_wu_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvhsubw_du_wu : GCCBuiltin<"__builtin_lasx_xvhsubw_du_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvadda_b : GCCBuiltin<"__builtin_lasx_xvadda_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadda_h : GCCBuiltin<"__builtin_lasx_xvadda_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadda_w : GCCBuiltin<"__builtin_lasx_xvadda_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadda_d : GCCBuiltin<"__builtin_lasx_xvadda_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvabsd_b : GCCBuiltin<"__builtin_lasx_xvabsd_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_h : GCCBuiltin<"__builtin_lasx_xvabsd_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_w : GCCBuiltin<"__builtin_lasx_xvabsd_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_d : GCCBuiltin<"__builtin_lasx_xvabsd_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvabsd_bu : GCCBuiltin<"__builtin_lasx_xvabsd_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_hu : GCCBuiltin<"__builtin_lasx_xvabsd_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_wu : GCCBuiltin<"__builtin_lasx_xvabsd_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvabsd_du : GCCBuiltin<"__builtin_lasx_xvabsd_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvavg_b : GCCBuiltin<"__builtin_lasx_xvavg_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_h : GCCBuiltin<"__builtin_lasx_xvavg_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_w : GCCBuiltin<"__builtin_lasx_xvavg_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_d : GCCBuiltin<"__builtin_lasx_xvavg_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvavg_bu : GCCBuiltin<"__builtin_lasx_xvavg_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_hu : GCCBuiltin<"__builtin_lasx_xvavg_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_wu : GCCBuiltin<"__builtin_lasx_xvavg_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavg_du : GCCBuiltin<"__builtin_lasx_xvavg_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvavgr_b : GCCBuiltin<"__builtin_lasx_xvavgr_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_h : GCCBuiltin<"__builtin_lasx_xvavgr_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_w : GCCBuiltin<"__builtin_lasx_xvavgr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_d : GCCBuiltin<"__builtin_lasx_xvavgr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvavgr_bu : GCCBuiltin<"__builtin_lasx_xvavgr_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_hu : GCCBuiltin<"__builtin_lasx_xvavgr_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_wu : GCCBuiltin<"__builtin_lasx_xvavgr_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvavgr_du : GCCBuiltin<"__builtin_lasx_xvavgr_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrlr_b : GCCBuiltin<"__builtin_lasx_xvsrlr_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlr_h : GCCBuiltin<"__builtin_lasx_xvsrlr_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlr_w : GCCBuiltin<"__builtin_lasx_xvsrlr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlr_d : GCCBuiltin<"__builtin_lasx_xvsrlr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrar_b : GCCBuiltin<"__builtin_lasx_xvsrar_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrar_h : GCCBuiltin<"__builtin_lasx_xvsrar_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrar_w : GCCBuiltin<"__builtin_lasx_xvsrar_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrar_d : GCCBuiltin<"__builtin_lasx_xvsrar_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmax_s : GCCBuiltin<"__builtin_lasx_xvfmax_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfmax_d : GCCBuiltin<"__builtin_lasx_xvfmax_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmin_s : GCCBuiltin<"__builtin_lasx_xvfmin_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfmin_d : GCCBuiltin<"__builtin_lasx_xvfmin_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmaxa_s : GCCBuiltin<"__builtin_lasx_xvfmaxa_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfmaxa_d : GCCBuiltin<"__builtin_lasx_xvfmaxa_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmina_s : GCCBuiltin<"__builtin_lasx_xvfmina_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfmina_d : GCCBuiltin<"__builtin_lasx_xvfmina_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfclass_s : GCCBuiltin<"__builtin_lasx_xvfclass_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfclass_d : GCCBuiltin<"__builtin_lasx_xvfclass_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrecip_s : GCCBuiltin<"__builtin_lasx_xvfrecip_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrecip_d : GCCBuiltin<"__builtin_lasx_xvfrecip_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrsqrt_s : GCCBuiltin<"__builtin_lasx_xvfrsqrt_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrsqrt_d : GCCBuiltin<"__builtin_lasx_xvfrsqrt_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcvtl_s_h : GCCBuiltin<"__builtin_lasx_xvfcvtl_s_h">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcvtl_d_s : GCCBuiltin<"__builtin_lasx_xvfcvtl_d_s">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfcvth_s_h : GCCBuiltin<"__builtin_lasx_xvfcvth_s_h">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfcvth_d_s : GCCBuiltin<"__builtin_lasx_xvfcvth_d_s">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftint_w_s : GCCBuiltin<"__builtin_lasx_xvftint_w_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftint_l_d : GCCBuiltin<"__builtin_lasx_xvftint_l_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftint_wu_s : GCCBuiltin<"__builtin_lasx_xvftint_wu_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftint_lu_d : GCCBuiltin<"__builtin_lasx_xvftint_lu_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrlri_b : GCCBuiltin<"__builtin_lasx_xvsrlri_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlri_h : GCCBuiltin<"__builtin_lasx_xvsrlri_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlri_w : GCCBuiltin<"__builtin_lasx_xvsrlri_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlri_d : GCCBuiltin<"__builtin_lasx_xvsrlri_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrari_b : GCCBuiltin<"__builtin_lasx_xvsrari_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrari_h : GCCBuiltin<"__builtin_lasx_xvsrari_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrari_w : GCCBuiltin<"__builtin_lasx_xvsrari_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrari_d : GCCBuiltin<"__builtin_lasx_xvsrari_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsat_b : GCCBuiltin<"__builtin_lasx_xvsat_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_h : GCCBuiltin<"__builtin_lasx_xvsat_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_w : GCCBuiltin<"__builtin_lasx_xvsat_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_d : GCCBuiltin<"__builtin_lasx_xvsat_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsat_bu : GCCBuiltin<"__builtin_lasx_xvsat_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_hu : GCCBuiltin<"__builtin_lasx_xvsat_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_wu : GCCBuiltin<"__builtin_lasx_xvsat_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsat_du : GCCBuiltin<"__builtin_lasx_xvsat_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrlni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlrni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlrni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_du_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrarni_b_h : GCCBuiltin<"__builtin_lasx_xvsrarni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrarni_h_w : GCCBuiltin<"__builtin_lasx_xvsrarni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrarni_w_d : GCCBuiltin<"__builtin_lasx_xvsrarni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrarni_d_q : GCCBuiltin<"__builtin_lasx_xvsrarni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrani_b_h : GCCBuiltin<"__builtin_lasx_xvssrani_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_h_w : GCCBuiltin<"__builtin_lasx_xvssrani_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_w_d : GCCBuiltin<"__builtin_lasx_xvssrani_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_d_q : GCCBuiltin<"__builtin_lasx_xvssrani_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrani_bu_h : GCCBuiltin<"__builtin_lasx_xvssrani_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_hu_w : GCCBuiltin<"__builtin_lasx_xvssrani_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_wu_d : GCCBuiltin<"__builtin_lasx_xvssrani_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrani_du_q : GCCBuiltin<"__builtin_lasx_xvssrani_du_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrarni_b_h : GCCBuiltin<"__builtin_lasx_xvssrarni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_h_w : GCCBuiltin<"__builtin_lasx_xvssrarni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_w_d : GCCBuiltin<"__builtin_lasx_xvssrarni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_d_q : GCCBuiltin<"__builtin_lasx_xvssrarni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrarni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarni_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarni_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarni_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrarni_du_q : GCCBuiltin<"__builtin_lasx_xvssrarni_du_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvssrlni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlni_bu_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlni_hu_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlni_wu_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvssrlni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlni_du_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvseq_b : GCCBuiltin<"__builtin_lasx_xvseq_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseq_h : GCCBuiltin<"__builtin_lasx_xvseq_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseq_w : GCCBuiltin<"__builtin_lasx_xvseq_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseq_d : GCCBuiltin<"__builtin_lasx_xvseq_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsle_b : GCCBuiltin<"__builtin_lasx_xvsle_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_h : GCCBuiltin<"__builtin_lasx_xvsle_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_w : GCCBuiltin<"__builtin_lasx_xvsle_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_d : GCCBuiltin<"__builtin_lasx_xvsle_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsle_bu : GCCBuiltin<"__builtin_lasx_xvsle_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_hu : GCCBuiltin<"__builtin_lasx_xvsle_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_wu : GCCBuiltin<"__builtin_lasx_xvsle_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsle_du : GCCBuiltin<"__builtin_lasx_xvsle_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslt_b : GCCBuiltin<"__builtin_lasx_xvslt_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_h : GCCBuiltin<"__builtin_lasx_xvslt_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_w : GCCBuiltin<"__builtin_lasx_xvslt_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_d : GCCBuiltin<"__builtin_lasx_xvslt_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslt_bu : GCCBuiltin<"__builtin_lasx_xvslt_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_hu : GCCBuiltin<"__builtin_lasx_xvslt_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_wu : GCCBuiltin<"__builtin_lasx_xvslt_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslt_du : GCCBuiltin<"__builtin_lasx_xvslt_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvadd_b : GCCBuiltin<"__builtin_lasx_xvadd_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadd_h : GCCBuiltin<"__builtin_lasx_xvadd_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadd_w : GCCBuiltin<"__builtin_lasx_xvadd_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvadd_d : GCCBuiltin<"__builtin_lasx_xvadd_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvsub_b : GCCBuiltin<"__builtin_lasx_xvsub_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsub_h : GCCBuiltin<"__builtin_lasx_xvsub_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsub_w : GCCBuiltin<"__builtin_lasx_xvsub_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsub_d : GCCBuiltin<"__builtin_lasx_xvsub_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmax_b : GCCBuiltin<"__builtin_lasx_xvmax_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_h : GCCBuiltin<"__builtin_lasx_xvmax_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_w : GCCBuiltin<"__builtin_lasx_xvmax_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_d : GCCBuiltin<"__builtin_lasx_xvmax_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmin_b : GCCBuiltin<"__builtin_lasx_xvmin_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_h : GCCBuiltin<"__builtin_lasx_xvmin_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_w : GCCBuiltin<"__builtin_lasx_xvmin_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_d : GCCBuiltin<"__builtin_lasx_xvmin_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmax_bu : GCCBuiltin<"__builtin_lasx_xvmax_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_hu : GCCBuiltin<"__builtin_lasx_xvmax_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_wu : GCCBuiltin<"__builtin_lasx_xvmax_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmax_du : GCCBuiltin<"__builtin_lasx_xvmax_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmin_bu : GCCBuiltin<"__builtin_lasx_xvmin_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_hu : GCCBuiltin<"__builtin_lasx_xvmin_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_wu : GCCBuiltin<"__builtin_lasx_xvmin_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmin_du : GCCBuiltin<"__builtin_lasx_xvmin_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmul_b : GCCBuiltin<"__builtin_lasx_xvmul_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmul_h : GCCBuiltin<"__builtin_lasx_xvmul_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmul_w : GCCBuiltin<"__builtin_lasx_xvmul_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmul_d : GCCBuiltin<"__builtin_lasx_xvmul_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmadd_b : GCCBuiltin<"__builtin_lasx_xvmadd_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmadd_h : GCCBuiltin<"__builtin_lasx_xvmadd_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmadd_w : GCCBuiltin<"__builtin_lasx_xvmadd_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmadd_d : GCCBuiltin<"__builtin_lasx_xvmadd_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmsub_b : GCCBuiltin<"__builtin_lasx_xvmsub_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmsub_h : GCCBuiltin<"__builtin_lasx_xvmsub_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmsub_w : GCCBuiltin<"__builtin_lasx_xvmsub_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvmsub_d : GCCBuiltin<"__builtin_lasx_xvmsub_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lasx_xvdiv_b : GCCBuiltin<"__builtin_lasx_xvdiv_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_h : GCCBuiltin<"__builtin_lasx_xvdiv_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_w : GCCBuiltin<"__builtin_lasx_xvdiv_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_d : GCCBuiltin<"__builtin_lasx_xvdiv_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmod_b : GCCBuiltin<"__builtin_lasx_xvmod_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_h : GCCBuiltin<"__builtin_lasx_xvmod_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_w : GCCBuiltin<"__builtin_lasx_xvmod_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_d : GCCBuiltin<"__builtin_lasx_xvmod_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvdiv_bu : GCCBuiltin<"__builtin_lasx_xvdiv_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_hu : GCCBuiltin<"__builtin_lasx_xvdiv_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_wu : GCCBuiltin<"__builtin_lasx_xvdiv_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvdiv_du : GCCBuiltin<"__builtin_lasx_xvdiv_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsll_b : GCCBuiltin<"__builtin_lasx_xvsll_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsll_h : GCCBuiltin<"__builtin_lasx_xvsll_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsll_w : GCCBuiltin<"__builtin_lasx_xvsll_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsll_d : GCCBuiltin<"__builtin_lasx_xvsll_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrl_b : GCCBuiltin<"__builtin_lasx_xvsrl_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrl_h : GCCBuiltin<"__builtin_lasx_xvsrl_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrl_w : GCCBuiltin<"__builtin_lasx_xvsrl_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrl_d : GCCBuiltin<"__builtin_lasx_xvsrl_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitclr_b : GCCBuiltin<"__builtin_lasx_xvbitclr_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclr_h : GCCBuiltin<"__builtin_lasx_xvbitclr_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclr_w : GCCBuiltin<"__builtin_lasx_xvbitclr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitclr_d : GCCBuiltin<"__builtin_lasx_xvbitclr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitset_b : GCCBuiltin<"__builtin_lasx_xvbitset_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitset_h : GCCBuiltin<"__builtin_lasx_xvbitset_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitset_w : GCCBuiltin<"__builtin_lasx_xvbitset_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitset_d : GCCBuiltin<"__builtin_lasx_xvbitset_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpackev_b : GCCBuiltin<"__builtin_lasx_xvpackev_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackev_h : GCCBuiltin<"__builtin_lasx_xvpackev_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackev_w : GCCBuiltin<"__builtin_lasx_xvpackev_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackev_d : GCCBuiltin<"__builtin_lasx_xvpackev_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpackod_b : GCCBuiltin<"__builtin_lasx_xvpackod_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackod_h : GCCBuiltin<"__builtin_lasx_xvpackod_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackod_w : GCCBuiltin<"__builtin_lasx_xvpackod_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpackod_d : GCCBuiltin<"__builtin_lasx_xvpackod_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvilvl_b : GCCBuiltin<"__builtin_lasx_xvilvl_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvl_h : GCCBuiltin<"__builtin_lasx_xvilvl_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvl_w : GCCBuiltin<"__builtin_lasx_xvilvl_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvl_d : GCCBuiltin<"__builtin_lasx_xvilvl_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvilvh_b : GCCBuiltin<"__builtin_lasx_xvilvh_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvh_h : GCCBuiltin<"__builtin_lasx_xvilvh_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvh_w : GCCBuiltin<"__builtin_lasx_xvilvh_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvilvh_d : GCCBuiltin<"__builtin_lasx_xvilvh_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpickev_b : GCCBuiltin<"__builtin_lasx_xvpickev_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickev_h : GCCBuiltin<"__builtin_lasx_xvpickev_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickev_w : GCCBuiltin<"__builtin_lasx_xvpickev_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickev_d : GCCBuiltin<"__builtin_lasx_xvpickev_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvand_v : GCCBuiltin<"__builtin_lasx_xvand_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvor_v : GCCBuiltin<"__builtin_lasx_xvor_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvbitrev_b : GCCBuiltin<"__builtin_lasx_xvbitrev_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrev_h : GCCBuiltin<"__builtin_lasx_xvbitrev_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrev_w : GCCBuiltin<"__builtin_lasx_xvbitrev_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvbitrev_d : GCCBuiltin<"__builtin_lasx_xvbitrev_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmod_bu : GCCBuiltin<"__builtin_lasx_xvmod_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_hu : GCCBuiltin<"__builtin_lasx_xvmod_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_wu : GCCBuiltin<"__builtin_lasx_xvmod_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmod_du : GCCBuiltin<"__builtin_lasx_xvmod_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpickod_b : GCCBuiltin<"__builtin_lasx_xvpickod_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickod_h : GCCBuiltin<"__builtin_lasx_xvpickod_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickod_w : GCCBuiltin<"__builtin_lasx_xvpickod_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickod_d : GCCBuiltin<"__builtin_lasx_xvpickod_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvreplve_b : GCCBuiltin<"__builtin_lasx_xvreplve_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve_h : GCCBuiltin<"__builtin_lasx_xvreplve_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve_w : GCCBuiltin<"__builtin_lasx_xvreplve_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve_d : GCCBuiltin<"__builtin_lasx_xvreplve_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsra_b : GCCBuiltin<"__builtin_lasx_xvsra_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsra_h : GCCBuiltin<"__builtin_lasx_xvsra_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsra_w : GCCBuiltin<"__builtin_lasx_xvsra_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsra_d : GCCBuiltin<"__builtin_lasx_xvsra_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvxor_v : GCCBuiltin<"__builtin_lasx_xvxor_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvnor_v : GCCBuiltin<"__builtin_lasx_xvnor_v">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfadd_s : GCCBuiltin<"__builtin_lasx_xvfadd_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfadd_d : GCCBuiltin<"__builtin_lasx_xvfadd_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfsub_s : GCCBuiltin<"__builtin_lasx_xvfsub_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfsub_d : GCCBuiltin<"__builtin_lasx_xvfsub_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfmul_s : GCCBuiltin<"__builtin_lasx_xvfmul_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfmul_d : GCCBuiltin<"__builtin_lasx_xvfmul_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvshuf_h : GCCBuiltin<"__builtin_lasx_xvshuf_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvshuf_w : GCCBuiltin<"__builtin_lasx_xvshuf_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++            [IntrNoMem]>;
++def int_loongarch_lasx_xvshuf_d : GCCBuiltin<"__builtin_lasx_xvshuf_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
++            [IntrNoMem]>;
++
++def int_loongarch_lasx_xvseqi_b : GCCBuiltin<"__builtin_lasx_xvseqi_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseqi_h : GCCBuiltin<"__builtin_lasx_xvseqi_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseqi_w : GCCBuiltin<"__builtin_lasx_xvseqi_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvseqi_d : GCCBuiltin<"__builtin_lasx_xvseqi_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslei_b : GCCBuiltin<"__builtin_lasx_xvslei_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_h : GCCBuiltin<"__builtin_lasx_xvslei_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_w : GCCBuiltin<"__builtin_lasx_xvslei_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_d : GCCBuiltin<"__builtin_lasx_xvslei_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslei_bu : GCCBuiltin<"__builtin_lasx_xvslei_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_hu : GCCBuiltin<"__builtin_lasx_xvslei_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_wu : GCCBuiltin<"__builtin_lasx_xvslei_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslei_du : GCCBuiltin<"__builtin_lasx_xvslei_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslti_b : GCCBuiltin<"__builtin_lasx_xvslti_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_h : GCCBuiltin<"__builtin_lasx_xvslti_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_w : GCCBuiltin<"__builtin_lasx_xvslti_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_d : GCCBuiltin<"__builtin_lasx_xvslti_d">,
++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++ 
++def int_loongarch_lasx_xvslti_bu : GCCBuiltin<"__builtin_lasx_xvslti_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_hu : GCCBuiltin<"__builtin_lasx_xvslti_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_wu : GCCBuiltin<"__builtin_lasx_xvslti_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslti_du : GCCBuiltin<"__builtin_lasx_xvslti_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvaddi_bu : GCCBuiltin<"__builtin_lasx_xvaddi_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvaddi_hu : GCCBuiltin<"__builtin_lasx_xvaddi_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvaddi_wu : GCCBuiltin<"__builtin_lasx_xvaddi_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++def int_loongarch_lasx_xvaddi_du : GCCBuiltin<"__builtin_lasx_xvaddi_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
++  [Commutative, IntrNoMem]>;
++
++def int_loongarch_lasx_xvsubi_bu : GCCBuiltin<"__builtin_lasx_xvsubi_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubi_hu : GCCBuiltin<"__builtin_lasx_xvsubi_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubi_wu : GCCBuiltin<"__builtin_lasx_xvsubi_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsubi_du : GCCBuiltin<"__builtin_lasx_xvsubi_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaxi_b : GCCBuiltin<"__builtin_lasx_xvmaxi_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_h : GCCBuiltin<"__builtin_lasx_xvmaxi_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_w : GCCBuiltin<"__builtin_lasx_xvmaxi_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_d : GCCBuiltin<"__builtin_lasx_xvmaxi_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmini_b : GCCBuiltin<"__builtin_lasx_xvmini_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_h : GCCBuiltin<"__builtin_lasx_xvmini_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_w : GCCBuiltin<"__builtin_lasx_xvmini_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_d : GCCBuiltin<"__builtin_lasx_xvmini_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmaxi_bu : GCCBuiltin<"__builtin_lasx_xvmaxi_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_hu : GCCBuiltin<"__builtin_lasx_xvmaxi_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_wu : GCCBuiltin<"__builtin_lasx_xvmaxi_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmaxi_du : GCCBuiltin<"__builtin_lasx_xvmaxi_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvmini_bu : GCCBuiltin<"__builtin_lasx_xvmini_bu">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_hu : GCCBuiltin<"__builtin_lasx_xvmini_hu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_wu : GCCBuiltin<"__builtin_lasx_xvmini_wu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvmini_du : GCCBuiltin<"__builtin_lasx_xvmini_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvclz_b : GCCBuiltin<"__builtin_lasx_xvclz_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclz_h : GCCBuiltin<"__builtin_lasx_xvclz_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclz_w : GCCBuiltin<"__builtin_lasx_xvclz_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvclz_d : GCCBuiltin<"__builtin_lasx_xvclz_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpcnt_b : GCCBuiltin<"__builtin_lasx_xvpcnt_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpcnt_h : GCCBuiltin<"__builtin_lasx_xvpcnt_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpcnt_w : GCCBuiltin<"__builtin_lasx_xvpcnt_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpcnt_d : GCCBuiltin<"__builtin_lasx_xvpcnt_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfsqrt_s : GCCBuiltin<"__builtin_lasx_xvfsqrt_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfsqrt_d : GCCBuiltin<"__builtin_lasx_xvfsqrt_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfrint_s : GCCBuiltin<"__builtin_lasx_xvfrint_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfrint_d : GCCBuiltin<"__builtin_lasx_xvfrint_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvffint_s_w : GCCBuiltin<"__builtin_lasx_xvffint_s_w">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvffint_d_l : GCCBuiltin<"__builtin_lasx_xvffint_d_l">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvffint_s_wu : GCCBuiltin<"__builtin_lasx_xvffint_s_wu">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvffint_d_lu : GCCBuiltin<"__builtin_lasx_xvffint_d_lu">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvftintrz_wu_s : GCCBuiltin<"__builtin_lasx_xvftintrz_wu_s">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvftintrz_lu_d : GCCBuiltin<"__builtin_lasx_xvftintrz_lu_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvreplgr2vr_b : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplgr2vr_h : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplgr2vr_w : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplgr2vr_d : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvinsgr2vr_w : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_loongarch_lasx_xvinsgr2vr_d : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++
++def int_loongarch_lasx_xvfdiv_s : GCCBuiltin<"__builtin_lasx_xvfdiv_s">,
++  Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvfdiv_d : GCCBuiltin<"__builtin_lasx_xvfdiv_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvslli_b : GCCBuiltin<"__builtin_lasx_xvslli_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslli_h : GCCBuiltin<"__builtin_lasx_xvslli_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslli_w : GCCBuiltin<"__builtin_lasx_xvslli_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvslli_d : GCCBuiltin<"__builtin_lasx_xvslli_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrli_b : GCCBuiltin<"__builtin_lasx_xvsrli_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrli_h : GCCBuiltin<"__builtin_lasx_xvsrli_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrli_w : GCCBuiltin<"__builtin_lasx_xvsrli_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrli_d : GCCBuiltin<"__builtin_lasx_xvsrli_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrai_b : GCCBuiltin<"__builtin_lasx_xvsrai_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrai_h : GCCBuiltin<"__builtin_lasx_xvsrai_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrai_w : GCCBuiltin<"__builtin_lasx_xvsrai_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrai_d : GCCBuiltin<"__builtin_lasx_xvsrai_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvshuf4i_b : GCCBuiltin<"__builtin_lasx_xvshuf4i_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvshuf4i_h : GCCBuiltin<"__builtin_lasx_xvshuf4i_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvshuf4i_w : GCCBuiltin<"__builtin_lasx_xvshuf4i_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvshuf4i_d : GCCBuiltin<"__builtin_lasx_xvshuf4i_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvrotr_b : GCCBuiltin<"__builtin_lasx_xvrotr_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotr_h : GCCBuiltin<"__builtin_lasx_xvrotr_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotr_w : GCCBuiltin<"__builtin_lasx_xvrotr_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotr_d : GCCBuiltin<"__builtin_lasx_xvrotr_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvrotri_b : GCCBuiltin<"__builtin_lasx_xvrotri_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotri_h : GCCBuiltin<"__builtin_lasx_xvrotri_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotri_w : GCCBuiltin<"__builtin_lasx_xvrotri_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrotri_d : GCCBuiltin<"__builtin_lasx_xvrotri_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvld : GCCBuiltin<"__builtin_lasx_xvld">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvst : GCCBuiltin<"__builtin_lasx_xvst">,
++  Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty],
++  [IntrArgMemOnly]>;
++
++def int_loongarch_lasx_xvrepl128vei_b : GCCBuiltin<"__builtin_lasx_xvrepl128vei_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrepl128vei_h : GCCBuiltin<"__builtin_lasx_xvrepl128vei_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrepl128vei_w : GCCBuiltin<"__builtin_lasx_xvrepl128vei_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvrepl128vei_d : GCCBuiltin<"__builtin_lasx_xvrepl128vei_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvinsve0_w : GCCBuiltin<"__builtin_lasx_xvinsve0_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvinsve0_d : GCCBuiltin<"__builtin_lasx_xvinsve0_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpickve_w : GCCBuiltin<"__builtin_lasx_xvpickve_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpickve_d : GCCBuiltin<"__builtin_lasx_xvpickve_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvreplve0_b : GCCBuiltin<"__builtin_lasx_xvreplve0_b">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve0_h : GCCBuiltin<"__builtin_lasx_xvreplve0_h">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve0_w : GCCBuiltin<"__builtin_lasx_xvreplve0_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve0_d : GCCBuiltin<"__builtin_lasx_xvreplve0_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvreplve0_q : GCCBuiltin<"__builtin_lasx_xvreplve0_q">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_vext2xv_d_w : GCCBuiltin<"__builtin_lasx_vext2xv_d_w">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_w_h : GCCBuiltin<"__builtin_lasx_vext2xv_w_h">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_h_b : GCCBuiltin<"__builtin_lasx_vext2xv_h_b">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_vext2xv_d_h : GCCBuiltin<"__builtin_lasx_vext2xv_d_h">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_w_b : GCCBuiltin<"__builtin_lasx_vext2xv_w_b">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_d_b : GCCBuiltin<"__builtin_lasx_vext2xv_d_b">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_vext2xv_du_wu : GCCBuiltin<"__builtin_lasx_vext2xv_du_wu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_wu_hu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_hu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_hu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_hu_bu">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_vext2xv_du_hu : GCCBuiltin<"__builtin_lasx_vext2xv_du_hu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_wu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_bu">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_vext2xv_du_bu : GCCBuiltin<"__builtin_lasx_vext2xv_du_bu">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvpermi_q : GCCBuiltin<"__builtin_lasx_xvpermi_q">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvpermi_d : GCCBuiltin<"__builtin_lasx_xvpermi_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvperm_w : GCCBuiltin<"__builtin_lasx_xvperm_w">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvsrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrni_b_h">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrni_h_w">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrni_w_d">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvsrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlrni_d_q">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xbz_v : GCCBuiltin<"__builtin_lasx_xbz_v">,
++  Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xbnz_v : GCCBuiltin<"__builtin_lasx_xbnz_v">,
++  Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xbz_b : GCCBuiltin<"__builtin_lasx_xbz_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbz_h : GCCBuiltin<"__builtin_lasx_xbz_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbz_w : GCCBuiltin<"__builtin_lasx_xbz_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbz_d : GCCBuiltin<"__builtin_lasx_xbz_d">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xbnz_b : GCCBuiltin<"__builtin_lasx_xbnz_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbnz_h : GCCBuiltin<"__builtin_lasx_xbnz_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbnz_w : GCCBuiltin<"__builtin_lasx_xbnz_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xbnz_d : GCCBuiltin<"__builtin_lasx_xbnz_d">,
++  Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++def int_loongarch_lasx_xvextl_q_d : GCCBuiltin<"__builtin_lasx_xvextl_q_d">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++def int_loongarch_lasx_xvextl_qu_du : GCCBuiltin<"__builtin_lasx_xvextl_qu_du">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
++
++//===----------------------------------------------------------------------===//
++// LoongArch BASE
++
++def int_loongarch_cpucfg : GCCBuiltin<"__builtin_loongarch_cpucfg">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_csrrd : GCCBuiltin<"__builtin_loongarch_csrrd">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_dcsrrd : GCCBuiltin<"__builtin_loongarch_dcsrrd">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
++
++def int_loongarch_csrwr : GCCBuiltin<"__builtin_loongarch_csrwr">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_dcsrwr : GCCBuiltin<"__builtin_loongarch_dcsrwr">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
++
++def int_loongarch_csrxchg : GCCBuiltin<"__builtin_loongarch_csrxchg">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_dcsrxchg : GCCBuiltin<"__builtin_loongarch_dcsrxchg">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>;
++
++def int_loongarch_iocsrrd_b : GCCBuiltin<"__builtin_loongarch_iocsrrd_b">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_iocsrrd_h : GCCBuiltin<"__builtin_loongarch_iocsrrd_h">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_iocsrrd_w : GCCBuiltin<"__builtin_loongarch_iocsrrd_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_iocsrrd_d : GCCBuiltin<"__builtin_loongarch_iocsrrd_d">,
++  Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
++
++def int_loongarch_iocsrwr_b : GCCBuiltin<"__builtin_loongarch_iocsrwr_b">,
++  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_iocsrwr_h : GCCBuiltin<"__builtin_loongarch_iocsrwr_h">,
++  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_iocsrwr_w : GCCBuiltin<"__builtin_loongarch_iocsrwr_w">,
++  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_iocsrwr_d : GCCBuiltin<"__builtin_loongarch_iocsrwr_d">,
++  Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>;
++
++def int_loongarch_cacop : GCCBuiltin<"__builtin_loongarch_cacop">,
++  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_dcacop : GCCBuiltin<"__builtin_loongarch_dcacop">,
++  Intrinsic<[], [llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>;
++
++def int_loongarch_crc_w_b_w : GCCBuiltin<"__builtin_loongarch_crc_w_b_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crc_w_h_w : GCCBuiltin<"__builtin_loongarch_crc_w_h_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crc_w_w_w : GCCBuiltin<"__builtin_loongarch_crc_w_w_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crc_w_d_w : GCCBuiltin<"__builtin_loongarch_crc_w_d_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crcc_w_b_w : GCCBuiltin<"__builtin_loongarch_crcc_w_b_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crcc_w_h_w : GCCBuiltin<"__builtin_loongarch_crcc_w_h_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crcc_w_w_w : GCCBuiltin<"__builtin_loongarch_crcc_w_w_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
++
++def int_loongarch_crcc_w_d_w : GCCBuiltin<"__builtin_loongarch_crcc_w_d_w">,
++  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
++
++def int_loongarch_tlbclr : GCCBuiltin<"__builtin_loongarch_tlbclr">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_tlbflush : GCCBuiltin<"__builtin_loongarch_tlbflush">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_tlbfill : GCCBuiltin<"__builtin_loongarch_tlbfill">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_tlbrd : GCCBuiltin<"__builtin_loongarch_tlbrd">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_tlbwr : GCCBuiltin<"__builtin_loongarch_tlbwr">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_tlbsrch : GCCBuiltin<"__builtin_loongarch_tlbsrch">,
++  Intrinsic<[], [], []>;
++
++def int_loongarch_syscall : GCCBuiltin<"__builtin_loongarch_syscall">,
++  Intrinsic<[], [llvm_i64_ty], []>;
++
++def int_loongarch_break : GCCBuiltin<"__builtin_loongarch_break">,
++  Intrinsic<[], [llvm_i64_ty], []>;
++
++def int_loongarch_asrtle_d : GCCBuiltin<"__builtin_loongarch_asrtle_d">,
++  Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>;
++
++def int_loongarch_asrtgt_d : GCCBuiltin<"__builtin_loongarch_asrtgt_d">,
++  Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>;
++
++def int_loongarch_dbar : GCCBuiltin<"__builtin_loongarch_dbar">,
++  Intrinsic<[], [llvm_i64_ty], []>;
++
++def int_loongarch_ibar : GCCBuiltin<"__builtin_loongarch_ibar">,
++  Intrinsic<[], [llvm_i64_ty], []>;
++
++}
+diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
+index e2d2784d..72099865 100644
+--- a/include/llvm/Object/ELFObjectFile.h
++++ b/include/llvm/Object/ELFObjectFile.h
+@@ -1197,6 +1197,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
+       return "elf32-littleriscv";
+     case ELF::EM_CSKY:
+       return "elf32-csky";
++    case ELF::EM_LOONGARCH:
++      return "elf32-loongarch";
+     case ELF::EM_SPARC:
+     case ELF::EM_SPARC32PLUS:
+       return "elf32-sparc";
+@@ -1221,6 +1223,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
+       return "elf64-s390";
+     case ELF::EM_SPARCV9:
+       return "elf64-sparc";
++    case ELF::EM_LOONGARCH:
++      return "elf64-loongarch";
+     case ELF::EM_MIPS:
+       return "elf64-mips";
+     case ELF::EM_AMDGPU:
+@@ -1282,6 +1286,15 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
+     default:
+       report_fatal_error("Invalid ELFCLASS!");
+     }
++  case ELF::EM_LOONGARCH:
++    switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
++    case ELF::ELFCLASS32:
++      return Triple::loongarch32;
++    case ELF::ELFCLASS64:
++      return Triple::loongarch64;
++    default:
++      report_fatal_error("Invalid ELFCLASS!");
++    }
+   case ELF::EM_S390:
+     return Triple::systemz;
+ 
+diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
+index d0693ccf..3fd97d07 100644
+--- a/include/llvm/module.modulemap
++++ b/include/llvm/module.modulemap
+@@ -71,6 +71,7 @@ module LLVM_BinaryFormat {
+     textual header "BinaryFormat/ELFRelocs/Hexagon.def"
+     textual header "BinaryFormat/ELFRelocs/i386.def"
+     textual header "BinaryFormat/ELFRelocs/Lanai.def"
++    textual header "BinaryFormat/ELFRelocs/LoongArch.def"
+     textual header "BinaryFormat/ELFRelocs/M68k.def"
+     textual header "BinaryFormat/ELFRelocs/Mips.def"
+     textual header "BinaryFormat/ELFRelocs/MSP430.def"
+diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+index ce350034..50171505 100644
+--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
++++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+@@ -207,6 +207,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+     PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+     TTypeEncoding = dwarf::DW_EH_PE_absptr;
+     break;
++  case Triple::loongarch32:
++  case Triple::loongarch64:
++    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
++    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
++                          dwarf::DW_EH_PE_sdata4;
++    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
++                    dwarf::DW_EH_PE_sdata4;
++    break;
+   case Triple::mips:
+   case Triple::mipsel:
+   case Triple::mips64:
+diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
+index b66429d8..0562a823 100644
+--- a/lib/CodeGen/XRayInstrumentation.cpp
++++ b/lib/CodeGen/XRayInstrumentation.cpp
+@@ -227,6 +227,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
+     case Triple::ArchType::thumb:
+     case Triple::ArchType::aarch64:
+     case Triple::ArchType::hexagon:
++    case Triple::ArchType::loongarch32:
++    case Triple::ArchType::loongarch64:
+     case Triple::ArchType::mips:
+     case Triple::ArchType::mipsel:
+     case Triple::ArchType::mips64:
+diff --git a/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+index 249f02f3..305e0b07 100644
+--- a/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
++++ b/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+@@ -261,6 +261,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
+   case Triple::mips64el:
+     return CreateWithABI<OrcMips64>(EPC);
+ 
++  case Triple::loongarch64:
++    return CreateWithABI<OrcLoongArch64>(EPC);
++
+   case Triple::x86_64:
+     if (TT.getOS() == Triple::OSType::Win32)
+       return CreateWithABI<OrcX86_64_Win32>(EPC);
+diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+index 7a71d2f7..81644255 100644
+--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
++++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+@@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
+       return CCMgrT::Create(ES, ErrorHandlerAddress);
+     }
+ 
++    case Triple::loongarch64: {
++      typedef orc::LocalJITCompileCallbackManager<orc::OrcLoongArch64> CCMgrT;
++      return CCMgrT::Create(ES, ErrorHandlerAddress);
++    }
++
+     case Triple::x86_64: {
+       if (T.getOS() == Triple::OSType::Win32) {
+         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
+diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp
+index 66453e6a..eac277af 100644
+--- a/lib/ExecutionEngine/Orc/LazyReexports.cpp
++++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp
+@@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+   case Triple::mips64el:
+     return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
+ 
++  case Triple::loongarch64:
++    return LocalLazyCallThroughManager::Create<OrcLoongArch64>(
++        ES, ErrorHandlerAddr);
++
+   case Triple::x86_64:
+     if (T.getOS() == Triple::OSType::Win32)
+       return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
+diff --git a/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+index 18b3c5e1..440831d7 100644
+--- a/lib/ExecutionEngine/Orc/OrcABISupport.cpp
++++ b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+@@ -906,5 +906,206 @@ void OrcMips64::writeIndirectStubsBlock(
+     Stub[8 * I + 7] = 0x00000000;                            // nop
+   }
+ }
++
++void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
++                                       JITTargetAddress ResolverTargetAddress,
++                                       JITTargetAddress ReentryFnAddr,
++                                       JITTargetAddress ReentryCtxAddr) {
++
++  const uint32_t ResolverCode[] = {
++      // resolver_entry:
++      0x02fc8063, // 0x0: addi.d $r3,$r3,-224(0xf20)
++      0x29c00064, // 0x4: st.d $r4,$r3,0
++      0x29c02065, // 0x8: st.d $r5,$r3,8(0x8)
++      0x29c04066, // 0xc: st.d $r6,$r3,16(0x10)
++      0x29c06067, // 0x10: st.d $r7,$r3,24(0x18)
++      0x29c08068, // 0x14: st.d $r8,$r3,32(0x20)
++      0x29c0a069, // 0x18: st.d $r9,$r3,40(0x28)
++      0x29c0c06a, // 0x1c: st.d $r10,$r3,48(0x30)
++      0x29c0e06b, // 0x20: st.d $r11,$r3,56(0x38)
++      0x29c1006c, // 0x24: st.d $r12,$r3,64(0x40)
++      0x29c1206d, // 0x28: st.d $r13,$r3,72(0x48)
++      0x29c1406e, // 0x2c: st.d $r14,$r3,80(0x50)
++      0x29c1606f, // 0x30: st.d $r15,$r3,88(0x58)
++      0x29c18070, // 0x34: st.d $r16,$r3,96(0x60)
++      0x29c1a071, // 0x38: st.d $r17,$r3,104(0x68)
++      0x29c1c072, // 0x3c: st.d $r18,$r3,112(0x70)
++      0x29c1e073, // 0x40: st.d $r19,$r3,120(0x78)
++      0x29c20074, // 0x44: st.d $r20,$r3,128(0x80)
++      0x29c22076, // 0x48: st.d $r22,$r3,136(0x88)
++      0x29c24077, // 0x4c: st.d $r23,$r3,144(0x90)
++      0x29c26078, // 0x50: st.d $r24,$r3,152(0x98)
++      0x29c28079, // 0x54: st.d $r25,$r3,160(0xa0)
++      0x29c2a07a, // 0x58: st.d $r26,$r3,168(0xa8)
++      0x29c2c07b, // 0x5c: st.d $r27,$r3,176(0xb0)
++      0x29c2e07c, // 0x60: st.d $r28,$r3,184(0xb8)
++      0x29c3007d, // 0x64: st.d $r29,$r3,192(0xc0)
++      0x29c3207e, // 0x68: st.d $r30,$r3,200(0xc8)
++      0x29c3407f, // 0x6c: st.d $r31,$r3,208(0xd0)
++      0x29c36061, // 0x70: st.d $r1,$r3,216(0xd8)
++      // JIT re-entry ctx addr.
++      0x00000000, // 0x74: lu12i.w $a0,hi(ctx)
++      0x00000000, // 0x78: ori $a0,$a0,lo(ctx)
++      0x00000000, // 0x7c: lu32i.d $a0,higher(ctx)
++      0x00000000, // 0x80: lu52i.d $a0,$a0,highest(ctx)
++
++      0x00150025, // 0x84: move $r5,$r1
++      0x02ffa0a5, // 0x88: addi.d $r5,$r5,-24(0xfe8)
++
++      // JIT re-entry fn addr:
++      0x00000000, // 0x8c: lu12i.w $t0,hi(reentry)
++      0x00000000, // 0x90: ori $t0,$t0,lo(reentry)
++      0x00000000, // 0x94: lu32i.d $t0,higher(reentry)
++      0x00000000, // 0x98: lu52i.d $t0,$t0,highest(reentry)
++      0x4c0002a1, // 0x9c: jirl $r1,$r21,0
++      0x00150095, // 0xa0: move $r21,$r4
++      0x28c36061, // 0xa4: ld.d $r1,$r3,216(0xd8)
++      0x28c3407f, // 0xa8: ld.d $r31,$r3,208(0xd0)
++      0x28c3207e, // 0xac: ld.d $r30,$r3,200(0xc8)
++      0x28c3007d, // 0xb0: ld.d $r29,$r3,192(0xc0)
++      0x28c2e07c, // 0xb4: ld.d $r28,$r3,184(0xb8)
++      0x28c2c07b, // 0xb8: ld.d $r27,$r3,176(0xb0)
++      0x28c2a07a, // 0xbc: ld.d $r26,$r3,168(0xa8)
++      0x28c28079, // 0xc0: ld.d $r25,$r3,160(0xa0)
++      0x28c26078, // 0xc4: ld.d $r24,$r3,152(0x98)
++      0x28c24077, // 0xc8: ld.d $r23,$r3,144(0x90)
++      0x28c22076, // 0xcc: ld.d $r22,$r3,136(0x88)
++      0x28c20074, // 0xd0: ld.d $r20,$r3,128(0x80)
++      0x28c1e073, // 0xd4: ld.d $r19,$r3,120(0x78)
++      0x28c1c072, // 0xd8: ld.d $r18,$r3,112(0x70)
++      0x28c1a071, // 0xdc: ld.d $r17,$r3,104(0x68)
++      0x28c18070, // 0xe0: ld.d $r16,$r3,96(0x60)
++      0x28c1606f, // 0xe4: ld.d $r15,$r3,88(0x58)
++      0x28c1406e, // 0xe8: ld.d $r14,$r3,80(0x50)
++      0x28c1206d, // 0xec: ld.d $r13,$r3,72(0x48)
++      0x28c1006c, // 0xf0: ld.d $r12,$r3,64(0x40)
++      0x28c0e06b, // 0xf4: ld.d $r11,$r3,56(0x38)
++      0x28c0c06a, // 0xf8: ld.d $r10,$r3,48(0x30)
++      0x28c0a069, // 0xfc: ld.d $r9,$r3,40(0x28)
++      0x28c08068, // 0x100: ld.d $r8,$r3,32(0x20)
++      0x28c06067, // 0x104: ld.d $r7,$r3,24(0x18)
++      0x28c04066, // 0x108: ld.d $r6,$r3,16(0x10)
++      0x28c02065, // 0x10c: ld.d $r5,$r3,8(0x8)
++      0x28c00064, // 0x110: ld.d $r4,$r3,0
++      0x02c38063, // 0x114: addi.d $r3,$r3,224(0xe0)
++      0x00150281, // 0x118: move $r1,$r20
++      0x4c0002a0, // 0x11c: jirl $r0,$r21,0
++  };
++
++  const unsigned ReentryFnAddrOffset = 0x8c;  // JIT re-entry fn addr lu12i.w
++  const unsigned ReentryCtxAddrOffset = 0x74; // JIT re-entry ctx addr lu12i.w
++
++  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
++
++  uint32_t ReentryCtxLU12i = 0x14000004 | ((ReentryCtxAddr << 32 >> 44) << 5);
++  uint32_t ReentryCtxORi = 0x03800084 | ((ReentryCtxAddr & 0xFFF) << 10);
++  uint32_t ReentryCtxLU32i = 0x16000004 | ((ReentryCtxAddr << 12 >> 44) << 5);
++  uint32_t ReentryCtxLU52i = 0x03000084 | ((ReentryCtxAddr >> 52) << 10);
++
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLU12i,
++         sizeof(ReentryCtxLU12i));
++  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxORi,
++         sizeof(ReentryCtxORi));
++  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxLU32i,
++         sizeof(ReentryCtxLU32i));
++  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxLU52i,
++         sizeof(ReentryCtxLU52i));
++
++  uint32_t ReentryLU12i = 0x14000015 | ((ReentryFnAddr << 32 >> 44) << 5);
++  uint32_t ReentryORi = 0x038002b5 | ((ReentryFnAddr & 0xFFF) << 10);
++  uint32_t ReentryLU32i = 0x16000015 | ((ReentryFnAddr << 12 >> 44) << 5);
++  uint32_t ReentryLU52i = 0x030002b5 | ((ReentryFnAddr >> 52) << 10);
++
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryLU12i,
++         sizeof(ReentryLU12i));
++  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryORi,
++         sizeof(ReentryORi));
++  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryLU32i,
++         sizeof(ReentryLU32i));
++  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryLU52i,
++         sizeof(ReentryLU52i));
++}
++
++void OrcLoongArch64::writeTrampolines(
++    char *TrampolineBlockWorkingMem,
++    JITTargetAddress TrampolineBlockTargetAddress,
++    JITTargetAddress ResolverFnAddr, unsigned NumTrampolines) {
++
++  uint32_t *Trampolines =
++      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
++
++  uint64_t HiBits = ((ResolverFnAddr << 32 >> 44) << 5);
++  uint64_t LoBits = ((ResolverFnAddr & 0xFFF) << 10);
++  uint64_t HigherBits = ((ResolverFnAddr << 12 >> 44) << 5);
++  uint64_t HighestBits = ((ResolverFnAddr >> 52) << 10);
++
++  for (unsigned I = 0; I < NumTrampolines; ++I) {
++    Trampolines[10 * I + 0] = 0x00150034; // move $t8,$ra
++    Trampolines[10 * I + 1] =
++        0x14000015 | HiBits; // lu12i.w $r21,hi(ResolveAddr)
++    Trampolines[10 * I + 2] =
++        0x038002b5 | LoBits; // ori $r21,$r21,lo(ResolveAddr)
++    Trampolines[10 * I + 3] =
++        0x16000015 | HigherBits; // lu32i $r21,higher(ResolveAddr)
++    Trampolines[10 * I + 4] =
++        0x030002b5 | HighestBits; // lu52i $r21,$r21,highest(ResolveAddr)
++    Trampolines[10 * I + 5] = 0x4c0002a1; // jirl $ra, $r21, 0
++  }
++}
++
++void OrcLoongArch64::writeIndirectStubsBlock(
++    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
++    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
++  // Stub format is:
++  //
++  // .section __orc_stubs
++  // stub1:
++  //                 lu12i.w $r21, %abs(ptr1)<<32>>44
++  //                 ori $r21, $r21, %abs(ptr1)&0xfff
++  //                 lu32i.d $r21, %abs(ptr1)<<12>>44
++  //                 lu52i.d $r21, $r21, %abs(ptr1)>>52
++  //                 ld.d $r21, $r21, 0
++  //                 jirl $r0, $r21, 0
++  // stub2:
++  //                 lu12i.w $r21, %abs(ptr2)<<32>>44
++  //                 ori $r21, $r21, %abs(ptr2)&0xfff
++  //                 lu32i.d $r21, %abs(ptr2)<<12>>44
++  //                 lu52i.d $r21, $r21, %abs(ptr2)>>52
++  //                 ld.d $r21, $r21, 0
++  //                 jirl $r0, $r21, 0
++  //
++  // ...
++  //
++  // .section __orc_ptrs
++  // ptr1:
++  //                 .dword 0x0
++  // ptr2:
++  //                 .dword 0x0
++  //
++  // ...
++
++  assert(stubAndPointerRangesOk<OrcLoongArch64>(
++             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
++         "PointersBlock is out of range");
++
++  // Populate the stubs page stubs and mark it executable.
++  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
++  uint64_t PtrAddr = PointersBlockTargetAddress;
++
++  for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
++    uint64_t HiBits = ((PtrAddr << 32 >> 44) << 5);
++    uint64_t LoBits = ((PtrAddr & 0xFFF) << 10);
++    uint64_t HigherBits = ((PtrAddr << 12 >> 44) << 5);
++    uint64_t HighestBits = ((PtrAddr >> 52) << 10);
++    Stub[8 * I + 0] = 0x14000015 | HiBits;     // lu12i.w $r21, hi(PtrAddr)
++    Stub[8 * I + 1] = 0x038002b5 | LoBits;     // ori $r21, $r21, lo(PtrAddr)
++    Stub[8 * I + 2] = 0x16000015 | HigherBits; // lu32i.d $r21, higher(PtrAddr)
++    Stub[8 * I + 3] =
++        0x030002b5 | HighestBits; // lu52i.d $r21, $r21, highest(PtrAddr)
++    Stub[8 * I + 4] = 0x28c002b5; // ld.d $r21, $r21, 0
++    Stub[8 * I + 5] = 0x4c0002a0; // jirl $r0, $r21, 0
++  }
++}
++
+ } // End namespace orc.
+ } // End namespace llvm.
+diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+index f92618af..b41b2233 100644
+--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+@@ -634,6 +634,191 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
+   }
+ }
+ 
++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section,
++                                                  uint64_t Offset,
++                                                  uint64_t Value, uint32_t Type,
++                                                  int64_t Addend) {
++  uint32_t *TargetPtr =
++      reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
++  uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
++  uint64_t tmp1, tmp2, tmp3;
++
++  LLVM_DEBUG(dbgs() << "[XXX] resolveLoongArch64Relocation, LocalAddress: 0x"
++                    << format("%llx", Section.getAddressWithOffset(Offset))
++                    << " FinalAddress: 0x" << format("%llx", FinalAddress)
++                    << " Value: 0x" << format("%llx", Value) << " Type: 0x"
++                    << format("%x", Type) << " Addend: 0x"
++                    << format("%llx", Addend) << "\n");
++
++  switch (Type) {
++  case ELF::R_LARCH_SOP_PUSH_GPREL:
++  case ELF::R_LARCH_SOP_PUSH_TLS_TPREL:
++  case ELF::R_LARCH_SOP_PUSH_TLS_GOT:
++  case ELF::R_LARCH_SOP_PUSH_TLS_GD:
++  default:
++    llvm_unreachable("Relocation type not implemented yet!");
++    break;
++  case ELF::R_LARCH_MARK_LA:
++    // mark la
++    MarkLA = true;
++    break;
++  case ELF::R_LARCH_SOP_PUSH_ABSOLUTE:
++    if (MarkLA && !Addend)
++      // push(value)
++      ValuesStack.push_back(Value);
++    else
++      // push(addend)
++      ValuesStack.push_back(Addend);
++    break;
++  case ELF::R_LARCH_SOP_PUSH_PLT_PCREL:
++  case ELF::R_LARCH_SOP_PUSH_PCREL:
++    MarkLA = false;
++    // push(value -pc + addend)
++    ValuesStack.push_back(Value - FinalAddress + Addend);
++    break;
++  case ELF::R_LARCH_SOP_NOT:
++    // pop(tmp1)
++    // push(!tmp1)
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(!tmp1);
++    break;
++  case ELF::R_LARCH_SOP_AND:
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 & tmp2)
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 & tmp2);
++    break;
++  case ELF::R_LARCH_SOP_IF_ELSE:
++    // pop(tmp3)
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 ? tmp2 : tmp3)
++    tmp3 = ValuesStack.pop_back_val();
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 ? tmp2 : tmp3);
++    break;
++  case ELF::R_LARCH_SOP_ADD:
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 + tmp2)
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 + tmp2);
++    break;
++  case ELF::R_LARCH_SOP_SUB:
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 - tmp2)
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 - tmp2);
++    break;
++  case ELF::R_LARCH_SOP_SR:
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 >> tmp2)
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 >> tmp2);
++    break;
++  case ELF::R_LARCH_SOP_SL:
++    // pop(tmp2)
++    // pop(tmp1)
++    // push(tmp1 << tmp2)
++    tmp2 = ValuesStack.pop_back_val();
++    tmp1 = ValuesStack.pop_back_val();
++    ValuesStack.push_back(tmp1 << tmp2);
++    break;
++  case ELF::R_LARCH_32:
++    support::ulittle32_t::ref{TargetPtr} =
++        static_cast<uint32_t>(Value + Addend);
++    break;
++  case ELF::R_LARCH_64:
++    support::ulittle64_t::ref{TargetPtr} = Value + Addend;
++    break;
++  case ELF::R_LARCH_SOP_POP_32_U_10_12:
++  case ELF::R_LARCH_SOP_POP_32_S_10_12:
++    // pop(tmp1)
++    // get(inst)
++    // inst=(inst & 0xffc003ff)|((tmp1 & 0xfff) << 10)
++    // write(inst)
++    tmp1 = ValuesStack.pop_back_val();
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} & 0xffc003ff) |
++        static_cast<uint32_t>((tmp1 & 0xfff) << 10);
++    break;
++  case ELF::R_LARCH_SOP_POP_32_S_5_20:
++    // pop(tmp1)
++    // get(inst)
++    // inst=(inst & 0xfe00001f)|((tmp1 & 0xfffff) << 5)
++    // write(inst)
++    tmp1 = ValuesStack.pop_back_val();
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} & 0xfe00001f) |
++        static_cast<uint32_t>((tmp1 & 0xfffff) << 5);
++    break;
++  case ELF::R_LARCH_SOP_POP_32_S_10_16_S2:
++    // pop(tmp1)
++    // tmp1 >>=2
++    // get(inst)
++    // inst=(inst & 0xfc0003ff)|((tmp1 & 0xffff) << 10)
++    // write(inst)
++    tmp1 = ValuesStack.pop_back_val();
++    tmp1 >>= 2;
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} & 0xfc0003ff) |
++        static_cast<uint32_t>((tmp1 & 0xffff) << 10);
++    break;
++  case ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2:
++    // pop(tmp1)
++    // tmp1 >>= 2
++    // get(inst)
++    // inst=(inst & 0xfc0003e0)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x1f0000) >>
++    // 16) write(inst)
++    tmp1 = ValuesStack.pop_back_val();
++    tmp1 >>= 2;
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} & 0xfc0003e0) |
++        static_cast<uint32_t>((tmp1 & 0xffff) << 10) |
++        static_cast<uint32_t>((tmp1 & 0x1f0000) >> 16);
++    break;
++  case ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2:
++    // pop(tmp1)
++    // tmp1 >>= 2
++    // get(inst)
++    // inst=(inst & 0xfc000000)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x3ff0000) >>
++    // 16) write(inst)
++    tmp1 = ValuesStack.pop_back_val();
++    tmp1 >>= 2;
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} & 0xfc000000) |
++        static_cast<uint32_t>((tmp1 & 0xffff) << 10) |
++        static_cast<uint32_t>((tmp1 & 0x3ff0000) >> 16);
++    break;
++  case ELF::R_LARCH_ADD32:
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} +
++         static_cast<uint32_t>(Value + Addend));
++    break;
++  case ELF::R_LARCH_SUB32:
++    support::ulittle32_t::ref{TargetPtr} =
++        (support::ulittle32_t::ref{TargetPtr} -
++         static_cast<uint32_t>(Value + Addend));
++    break;
++  case ELF::R_LARCH_ADD64:
++    support::ulittle64_t::ref{TargetPtr} =
++        (support::ulittle64_t::ref{TargetPtr} + Value + Addend);
++    break;
++  case ELF::R_LARCH_SUB64:
++    support::ulittle64_t::ref{TargetPtr} =
++        (support::ulittle64_t::ref{TargetPtr} - Value - Addend);
++    break;
++  }
++}
++
+ void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
+   if (Arch == Triple::UnknownArch ||
+       !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
+@@ -1050,6 +1235,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
+     resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type,
+                          (uint32_t)(Addend & 0xffffffffL));
+     break;
++  case Triple::loongarch64:
++    resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend);
++    break;
+   case Triple::ppc: // Fall through.
+   case Triple::ppcle:
+     resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
+@@ -1362,6 +1550,25 @@ RuntimeDyldELF::processRelocationRef(
+       }
+       processSimpleRelocation(SectionID, Offset, RelType, Value);
+     }
++  } else if (Arch == Triple::loongarch64) {
++    RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(TargetName);
++    if (!TargetName.empty()) {
++      if (Loc == GlobalSymbolTable.end()) {
++        IsSaved = true;
++        SavedSymbol = TargetName;
++      } else {
++        IsSaved = false;
++      }
++    }
++    if (IsSaved == true) {
++      Value.SymbolName = SavedSymbol.data();
++      processSimpleRelocation(SectionID, Offset, RelType, Value);
++    } else {
++      uint8_t *TargetAddr = getSymbolLocalAddress(TargetName);
++      resolveRelocation(Sections[SectionID], Offset,
++                        reinterpret_cast<uint64_t>(TargetAddr), RelType,
++                        Addend);
++    }
+   } else if (IsMipsO32ABI) {
+     uint8_t *Placeholder = reinterpret_cast<uint8_t *>(
+         computePlaceholderAddress(SectionID, Offset));
+@@ -2211,6 +2418,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
+   case Triple::x86_64:
+   case Triple::aarch64:
+   case Triple::aarch64_be:
++  case Triple::loongarch64:
+   case Triple::ppc64:
+   case Triple::ppc64le:
+   case Triple::systemz:
+diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+index 1251036f..ba898f65 100644
+--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+@@ -48,6 +48,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+   void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
+                             uint32_t Value, uint32_t Type, int32_t Addend);
+ 
++  void resolveLoongArch64Relocation(const SectionEntry &Section,
++                                    uint64_t Offset, uint64_t Value,
++                                    uint32_t Type, int64_t Addend);
++
+   void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
+                               uint64_t Value, uint32_t Type, int64_t Addend);
+ 
+@@ -155,6 +159,12 @@ private:
+   // EH frame sections with the memory manager.
+   SmallVector<SID, 2> UnregisteredEHFrameSections;
+ 
++  // For loongarch evaluteRelocation
++  SmallVector<uint64_t, 8> ValuesStack;
++  bool IsSaved;
++  bool MarkLA;
++  StringRef SavedSymbol;
++
+   // Map between GOT relocation value and corresponding GOT offset
+   std::map<RelocationValueRef, uint64_t> GOTOffsetMap;
+ 
+diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
+index 726ba80d..26819818 100644
+--- a/lib/IR/Function.cpp
++++ b/lib/IR/Function.cpp
+@@ -37,6 +37,7 @@
+ #include "llvm/IR/IntrinsicsARM.h"
+ #include "llvm/IR/IntrinsicsBPF.h"
+ #include "llvm/IR/IntrinsicsHexagon.h"
++#include "llvm/IR/IntrinsicsLoongArch.h"
+ #include "llvm/IR/IntrinsicsMips.h"
+ #include "llvm/IR/IntrinsicsNVPTX.h"
+ #include "llvm/IR/IntrinsicsPowerPC.h"
+diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
+index 56a42621..50a7c68e 100644
+--- a/lib/Object/ELF.cpp
++++ b/lib/Object/ELF.cpp
+@@ -94,6 +94,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
+       break;
+     }
+     break;
++  case ELF::EM_LOONGARCH:
++    switch (Type) {
++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
++    default:
++      break;
++    }
++    break;
+   case ELF::EM_PPC:
+     switch (Type) {
+ #include "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp
+index 00a45e2c..23fd0f57 100644
+--- a/lib/Object/RelocationResolver.cpp
++++ b/lib/Object/RelocationResolver.cpp
+@@ -468,6 +468,28 @@ static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S,
+   }
+ }
+ 
++static bool supportsLoongArch(uint64_t Type) {
++  switch (Type) {
++  case ELF::R_LARCH_32:
++  case ELF::R_LARCH_64:
++    return true;
++  default:
++    return false;
++  }
++}
++
++static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S,
++                                 uint64_t LocData, int64_t Addend) {
++  switch (Type) {
++  case ELF::R_LARCH_32:
++    return (S + Addend) & 0xFFFFFFFF;
++  case ELF::R_LARCH_64:
++    return S + Addend;
++  default:
++    llvm_unreachable("Invalid relocation type");
++  }
++}
++
+ static bool supportsCOFFX86(uint64_t Type) {
+   switch (Type) {
+   case COFF::IMAGE_REL_I386_SECREL:
+@@ -682,6 +704,8 @@ getRelocationResolver(const ObjectFile &Obj) {
+         return {supportsAmdgpu, resolveAmdgpu};
+       case Triple::riscv64:
+         return {supportsRISCV, resolveRISCV};
++      case Triple::loongarch64:
++        return {supportsLoongArch, resolveLoongArch};
+       default:
+         return {nullptr, nullptr};
+       }
+@@ -715,6 +739,8 @@ getRelocationResolver(const ObjectFile &Obj) {
+       return {supportsHexagon, resolveHexagon};
+     case Triple::riscv32:
+       return {supportsRISCV, resolveRISCV};
++    case Triple::loongarch32:
++      return {supportsLoongArch, resolveLoongArch};
+     default:
+       return {nullptr, nullptr};
+     }
+diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
+index 3d4cd408..abe8c250 100644
+--- a/lib/ObjectYAML/ELFYAML.cpp
++++ b/lib/ObjectYAML/ELFYAML.cpp
+@@ -344,6 +344,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(
+   ECase(EM_BPF);
+   ECase(EM_VE);
+   ECase(EM_CSKY);
++  ECase(EM_LOONGARCH);
+ #undef ECase
+   IO.enumFallback<Hex16>(Value);
+ }
+@@ -599,6 +600,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
+       break;
+     }
+     break;
++  case ELF::EM_LOONGARCH:
++    BCaseMask(EF_LARCH_BASE_ABI_ILP32S, EF_LARCH_BASE_ABI);
++    BCaseMask(EF_LARCH_BASE_ABI_ILP32F, EF_LARCH_BASE_ABI);
++    BCaseMask(EF_LARCH_BASE_ABI_ILP32D, EF_LARCH_BASE_ABI);
++    BCaseMask(EF_LARCH_BASE_ABI_LP64S, EF_LARCH_BASE_ABI);
++    BCaseMask(EF_LARCH_BASE_ABI_LP64F, EF_LARCH_BASE_ABI);
++    BCaseMask(EF_LARCH_BASE_ABI_LP64D, EF_LARCH_BASE_ABI);
++    break;
+   default:
+     break;
+   }
+@@ -847,6 +856,8 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
+     break;
+   case ELF::EM_68K:
+ #include "llvm/BinaryFormat/ELFRelocs/M68k.def"
++  case ELF::EM_LOONGARCH:
++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
+     break;
+   default:
+     // Nothing to do.
+diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
+index a82a4d45..35e74b80 100644
+--- a/lib/Support/Host.cpp
++++ b/lib/Support/Host.cpp
+@@ -1282,6 +1282,45 @@ StringRef sys::getHostCPUName() {
+   StringRef Content = P ? P->getBuffer() : "";
+   return detail::getHostCPUNameForS390x(Content);
+ }
++#elif defined(__linux__) && defined(__loongarch__)
++// loongarch prid register
++// +----------------+----------------+----------------+----------------+
++// | Company Options| Company ID     | Processor ID   | Revision       |
++// +----------------+----------------+----------------+----------------+
++//  31            24 23            16 15             8 7              0
++
++#define PRID_OPT_MASK                  0xff000000
++#define PRID_COMP_MASK                 0xff0000
++#define PRID_COMP_LOONGSON             0x140000
++#define PRID_IMP_MASK                  0xff00
++
++#define PRID_IMP_LOONGSON_32  0x4200  /* Loongson 32bit */
++#define PRID_IMP_LOONGSON_64R 0x6100  /* Reduced Loongson 64bit */
++#define PRID_IMP_LOONGSON_64C 0x6300  /* Classic Loongson 64bit */
++#define PRID_IMP_LOONGSON_64G 0xc000  /* Generic Loongson 64bit */
++
++StringRef sys::getHostCPUName() {
++  // use prid to detect cpu name
++  unsigned CPUCFG_NUM = 0; // prid
++  unsigned prid;
++
++  __asm__("cpucfg %[prid], %[CPUCFG_NUM]\n\t"
++      :[prid]"=r"(prid)
++      :[CPUCFG_NUM]"r"(CPUCFG_NUM));
++
++  if ((prid & PRID_COMP_MASK) == PRID_COMP_LOONGSON) {// for Loongson
++    switch (prid & PRID_IMP_MASK) {
++      case PRID_IMP_LOONGSON_32: // not support
++        return "generic-la32";
++      case PRID_IMP_LOONGSON_64R:
++      case PRID_IMP_LOONGSON_64C:
++      case PRID_IMP_LOONGSON_64G:
++        return "la464";
++    }
++  }
++
++  return "generic";
++}
+ #elif defined(__MVS__)
+ StringRef sys::getHostCPUName() {
+   // Get pointer to Communications Vector Table (CVT).
+@@ -1759,6 +1798,36 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+ 
+   return true;
+ }
++#elif defined(__linux__) && defined(__loongarch__)
++bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
++  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
++  if (!P)
++    return false;
++
++  SmallVector<StringRef, 32> Lines;
++  P->getBuffer().split(Lines, "\n");
++
++  SmallVector<StringRef, 32> CPUFeatures;
++
++  // Look for the CPU features.
++  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
++    if (Lines[I].startswith("features")) {
++      Lines[I].split(CPUFeatures, ' ');
++      break;
++    }
++
++  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
++    StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
++                                   .Case("lsx", "lsx")
++                                   .Case("lasx", "lasx")
++                                   .Default("");
++
++    if (LLVMFeatureStr != "")
++      Features[LLVMFeatureStr] = true;
++  }
++
++  return true;
++}
+ #else
+ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
+ #endif
+diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
+index a9afcc9d..d27e8821 100644
+--- a/lib/Support/Triple.cpp
++++ b/lib/Support/Triple.cpp
+@@ -44,6 +44,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
+   case lanai:          return "lanai";
+   case le32:           return "le32";
+   case le64:           return "le64";
++  case loongarch32:    return "loongarch32";
++  case loongarch64:    return "loongarch64";
+   case m68k:           return "m68k";
+   case mips64:         return "mips64";
+   case mips64el:       return "mips64el";
+@@ -164,6 +166,9 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
+ 
+   case ve:          return "ve";
+   case csky:        return "csky";
++
++  case loongarch32:
++  case loongarch64: return "loongarch";
+   }
+ }
+ 
+@@ -250,6 +255,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+   case GNUEABIHF: return "gnueabihf";
+   case GNUX32: return "gnux32";
+   case GNUILP32: return "gnu_ilp32";
++  case GNUABILPX32: return "gnuabilpx32";
+   case Itanium: return "itanium";
+   case MSVC: return "msvc";
+   case MacABI: return "macabi";
+@@ -340,6 +346,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+     .Case("renderscript64", renderscript64)
+     .Case("ve", ve)
+     .Case("csky", csky)
++    .Case("loongarch32", loongarch32)
++    .Case("loongarch64", loongarch64)
+     .Default(UnknownArch);
+ }
+ 
+@@ -475,6 +483,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
+     .Case("wasm32", Triple::wasm32)
+     .Case("wasm64", Triple::wasm64)
+     .Case("csky", Triple::csky)
++    .Case("loongarch32", Triple::loongarch32)
++    .Case("loongarch64", Triple::loongarch64)
+     .Default(Triple::UnknownArch);
+ 
+   // Some architectures require special parsing logic just to compute the
+@@ -731,6 +741,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
+   case Triple::lanai:
+   case Triple::le32:
+   case Triple::le64:
++  case Triple::loongarch32:
++  case Triple::loongarch64:
+   case Triple::m68k:
+   case Triple::mips64:
+   case Triple::mips64el:
+@@ -813,6 +825,7 @@ Triple::Triple(const Twine &Str)
+               .StartsWith("mipsisa64", Triple::GNUABI64)
+               .StartsWith("mipsisa32", Triple::GNU)
+               .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
++              .Cases("loongarch32", "loongarch64", Triple::GNU)
+               .Default(UnknownEnvironment);
+     }
+   }
+@@ -1290,6 +1303,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+   case llvm::Triple::kalimba:
+   case llvm::Triple::lanai:
+   case llvm::Triple::le32:
++  case llvm::Triple::loongarch32:
+   case llvm::Triple::m68k:
+   case llvm::Triple::mips:
+   case llvm::Triple::mipsel:
+@@ -1321,6 +1335,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+   case llvm::Triple::bpfel:
+   case llvm::Triple::hsail64:
+   case llvm::Triple::le64:
++  case llvm::Triple::loongarch64:
+   case llvm::Triple::mips64:
+   case llvm::Triple::mips64el:
+   case llvm::Triple::nvptx64:
+@@ -1377,6 +1392,7 @@ Triple Triple::get32BitArchVariant() const {
+   case Triple::kalimba:
+   case Triple::lanai:
+   case Triple::le32:
++  case Triple::loongarch32:
+   case Triple::m68k:
+   case Triple::mips:
+   case Triple::mipsel:
+@@ -1412,6 +1428,7 @@ Triple Triple::get32BitArchVariant() const {
+   case Triple::mips64el:
+     T.setArch(Triple::mipsel, getSubArch());
+     break;
++  case Triple::loongarch64:    T.setArch(Triple::loongarch32);   break;
+   case Triple::nvptx64:        T.setArch(Triple::nvptx);   break;
+   case Triple::ppc64:          T.setArch(Triple::ppc);     break;
+   case Triple::ppc64le:        T.setArch(Triple::ppcle);   break;
+@@ -1455,6 +1472,7 @@ Triple Triple::get64BitArchVariant() const {
+   case Triple::bpfel:
+   case Triple::hsail64:
+   case Triple::le64:
++  case Triple::loongarch64:
+   case Triple::mips64:
+   case Triple::mips64el:
+   case Triple::nvptx64:
+@@ -1484,6 +1502,7 @@ Triple Triple::get64BitArchVariant() const {
+   case Triple::mipsel:
+     T.setArch(Triple::mips64el, getSubArch());
+     break;
++  case Triple::loongarch32:     T.setArch(Triple::loongarch64);        break;
+   case Triple::nvptx:           T.setArch(Triple::nvptx64);    break;
+   case Triple::ppc:             T.setArch(Triple::ppc64);      break;
+   case Triple::ppcle:           T.setArch(Triple::ppc64le);    break;
+@@ -1517,6 +1536,8 @@ Triple Triple::getBigEndianArchVariant() const {
+   case Triple::kalimba:
+   case Triple::le32:
+   case Triple::le64:
++  case Triple::loongarch32:
++  case Triple::loongarch64:
+   case Triple::msp430:
+   case Triple::nvptx64:
+   case Triple::nvptx:
+@@ -1617,6 +1638,8 @@ bool Triple::isLittleEndian() const {
+   case Triple::kalimba:
+   case Triple::le32:
+   case Triple::le64:
++  case Triple::loongarch32:
++  case Triple::loongarch64:
+   case Triple::mips64el:
+   case Triple::mipsel:
+   case Triple::msp430:
+diff --git a/lib/Target/LoongArch/AsmParser/CMakeLists.txt b/lib/Target/LoongArch/AsmParser/CMakeLists.txt
+new file mode 100644
+index 00000000..cb8b768d
+--- /dev/null
++++ b/lib/Target/LoongArch/AsmParser/CMakeLists.txt
+@@ -0,0 +1,13 @@
++add_llvm_component_library(LLVMLoongArchAsmParser
++  LoongArchAsmParser.cpp
++
++  LINK_COMPONENTS
++  MC
++  MCParser
++  LoongArchDesc
++  LoongArchInfo
++  Support
++
++  ADD_TO_COMPONENT
++  LoongArch
++  )
+diff --git a/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+new file mode 100644
+index 00000000..16854bab
+--- /dev/null
++++ b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+@@ -0,0 +1,2207 @@
++//===-- LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions ----===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchTargetStreamer.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "TargetInfo/LoongArchTargetInfo.h"
++#include "llvm/ADT/APFloat.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/ADT/Twine.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCObjectFileInfo.h"
++#include "llvm/MC/MCParser/MCAsmLexer.h"
++#include "llvm/MC/MCParser/MCAsmParser.h"
++#include "llvm/MC/MCParser/MCAsmParserExtension.h"
++#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
++#include "llvm/MC/MCParser/MCTargetAsmParser.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/MC/SubtargetFeature.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/SMLoc.h"
++#include "llvm/Support/SourceMgr.h"
++#include "llvm/Support/raw_ostream.h"
++#include <algorithm>
++#include <cassert>
++#include <cstdint>
++#include <memory>
++#include <string>
++#include <utility>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-asm-parser"
++
++namespace llvm {
++
++class MCInstrInfo;
++
++} // end namespace llvm
++
++namespace {
++
++class LoongArchAssemblerOptions {
++public:
++  LoongArchAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {}
++
++  LoongArchAssemblerOptions(const LoongArchAssemblerOptions *Opts) {
++    Features = Opts->getFeatures();
++  }
++
++  const FeatureBitset &getFeatures() const { return Features; }
++  void setFeatures(const FeatureBitset &Features_) { Features = Features_; }
++
++private:
++  FeatureBitset Features;
++};
++
++} // end anonymous namespace
++
++namespace {
++
++class LoongArchAsmParser : public MCTargetAsmParser {
++  LoongArchTargetStreamer &getTargetStreamer() {
++    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
++    return static_cast<LoongArchTargetStreamer &>(TS);
++  }
++
++  LoongArchABIInfo ABI;
++  SmallVector<std::unique_ptr<LoongArchAssemblerOptions>, 2> AssemblerOptions;
++  MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
++                       // nullptr, which indicates that no function is currently
++                       // selected. This usually happens after an '.end'
++                       // directive.
++  bool IsPicEnabled;
++
++  // Map of register aliases created via the .set directive.
++  StringMap<AsmToken> RegisterSets;
++
++#define GET_ASSEMBLER_HEADER
++#include "LoongArchGenAsmMatcher.inc"
++
++  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
++
++  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
++                               OperandVector &Operands, MCStreamer &Out,
++                               uint64_t &ErrorInfo,
++                               bool MatchingInlineAsm) override;
++
++  /// Parse a register as used in CFI directives
++  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
++  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
++                                        SMLoc &EndLoc) override;
++
++  bool mnemonicIsValid(StringRef Mnemonic);
++
++  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
++                        SMLoc NameLoc, OperandVector &Operands) override;
++
++  bool ParseDirective(AsmToken DirectiveID) override;
++
++  OperandMatchResultTy parseMemOperand(OperandVector &Operands);
++  OperandMatchResultTy
++  matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
++                                    StringRef Identifier, SMLoc S);
++  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                                     const AsmToken &Token,
++                                                     SMLoc S);
++  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                                     SMLoc S);
++  OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
++  OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
++
++  bool searchSymbolAlias(OperandVector &Operands);
++
++  bool parseOperand(OperandVector &, StringRef Mnemonic);
++
++  enum MacroExpanderResultTy {
++    MER_NotAMacro,
++    MER_Success,
++    MER_Fail,
++  };
++
++  // Expands assembly pseudo instructions.
++  MacroExpanderResultTy tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
++                                             MCStreamer &Out,
++                                             const MCSubtargetInfo *STI);
++
++  bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                     const MCSubtargetInfo *STI);
++
++  bool expandLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                         const MCSubtargetInfo *STI);
++
++  bool reportParseError(Twine ErrorMsg);
++
++  bool parseMemOffset(const MCExpr *&Res);
++
++  bool isEvaluated(const MCExpr *Expr);
++  bool parseDirectiveSet();
++
++  bool parseSetAssignment();
++
++  bool parseInternalDirectiveReallowModule();
++
++  int matchCPURegisterName(StringRef Symbol);
++
++  int matchFPURegisterName(StringRef Name);
++
++  int matchFCFRRegisterName(StringRef Name);
++  int matchFCSRRegisterName(StringRef Name);
++
++  int matchLSX128RegisterName(StringRef Name);
++
++  int matchLASX256RegisterName(StringRef Name);
++
++  bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                          const MCSubtargetInfo *STI);
++
++  // Helper function that checks if the value of a vector index is within the
++  // boundaries of accepted values for each RegisterKind
++  // Example: VINSGR2VR.B $v0[n], $1 => 16 > n >= 0
++  bool validateLSXIndex(int Val, int RegKind);
++
++  void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    if (!(getSTI().getFeatureBits()[Feature])) {
++      MCSubtargetInfo &STI = copySTI();
++      setAvailableFeatures(
++          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
++      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
++    }
++  }
++
++  void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    if (getSTI().getFeatureBits()[Feature]) {
++      MCSubtargetInfo &STI = copySTI();
++      setAvailableFeatures(
++          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
++      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
++    }
++  }
++
++  void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    setFeatureBits(Feature, FeatureString);
++    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
++  }
++
++  void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    clearFeatureBits(Feature, FeatureString);
++    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
++  }
++
++public:
++  enum LoongArchMatchResultTy {
++    Match_RequiresNoZeroRegister = FIRST_TARGET_MATCH_RESULT_TY,
++    Match_RequiresNoRaRegister,
++    Match_RequiresRange0_31,
++    Match_RequiresRange0_63,
++    Match_MsbHigherThanLsb,
++    Match_RequiresPosSizeUImm6,
++#define GET_OPERAND_DIAGNOSTIC_TYPES
++#include "LoongArchGenAsmMatcher.inc"
++#undef GET_OPERAND_DIAGNOSTIC_TYPES
++  };
++
++  LoongArchAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
++                     const MCInstrInfo &MII, const MCTargetOptions &Options)
++    : MCTargetAsmParser(Options, sti, MII),
++        ABI(LoongArchABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
++                                               sti.getCPU(), Options)) {
++    MCAsmParserExtension::Initialize(parser);
++
++    parser.addAliasForDirective(".asciiz", ".asciz");
++    parser.addAliasForDirective(".hword", ".2byte");
++    parser.addAliasForDirective(".word", ".4byte");
++    parser.addAliasForDirective(".dword", ".8byte");
++
++    // Initialize the set of available features.
++    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
++
++    // Remember the initial assembler options. The user can not modify these.
++    AssemblerOptions.push_back(
++        std::make_unique<LoongArchAssemblerOptions>(getSTI().getFeatureBits()));
++
++    // Create an assembler options environment for the user to modify.
++    AssemblerOptions.push_back(
++        std::make_unique<LoongArchAssemblerOptions>(getSTI().getFeatureBits()));
++
++    getTargetStreamer().updateABIInfo(*this);
++
++    CurrentFn = nullptr;
++
++    IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent();
++  }
++
++  bool is64Bit() const {
++    return getSTI().getFeatureBits()[LoongArch::Feature64Bit];
++  }
++
++  const LoongArchABIInfo &getABI() const { return ABI; }
++  bool isABI_LP64D() const { return ABI.IsLP64D(); }
++  bool isABI_LP64S() const { return ABI.IsLP64S(); }
++  bool isABI_LP64F() const { return ABI.IsLP64F(); }
++  bool isABI_ILP32D() const { return ABI.IsILP32D(); }
++  bool isABI_ILP32F() const { return ABI.IsILP32F(); }
++  bool isABI_ILP32S() const { return ABI.IsILP32S(); }
++
++  bool hasLSX() const {
++    return getSTI().getFeatureBits()[LoongArch::FeatureLSX];
++  }
++
++  bool hasLASX() const {
++    return getSTI().getFeatureBits()[LoongArch::FeatureLASX];
++  }
++
++  bool inPicMode() {
++    return IsPicEnabled;
++  }
++
++  const MCExpr *createTargetUnaryExpr(const MCExpr *E,
++                                      AsmToken::TokenKind OperatorToken,
++                                      MCContext &Ctx) override {
++    switch(OperatorToken) {
++    default:
++      llvm_unreachable("Unknown token");
++      return nullptr;
++#if 0
++    case AsmToken::PercentPlt:
++      return LoongArchMCExpr::create(LoongArchMCExpr::MEK_PLT, E, Ctx);
++#endif
++    }
++  }
++};
++
++/// LoongArchOperand - Instances of this class represent a parsed LoongArch machine
++/// instruction.
++class LoongArchOperand : public MCParsedAsmOperand {
++public:
++  /// Broad categories of register classes
++  /// The exact class is finalized by the render method.
++  enum RegKind {
++    RegKind_GPR = 1,      /// GPR32 and GPR64 (depending on is64Bit())
++    RegKind_FGR = 2,      /// FGR32, FGR64 (depending on hasBasicD())
++    RegKind_FCFR = 4,     /// FCFR
++    RegKind_FCSR = 8,     /// FCSR
++    RegKind_LSX128 = 16,  /// LSX128[BHWD] (makes no difference which)
++    RegKind_LASX256 = 32, /// LASX256[BHWD] (makes no difference which)
++    RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCFR | RegKind_FCSR |
++                      RegKind_LSX128 | RegKind_LASX256
++  };
++
++private:
++  enum KindTy {
++    k_Immediate,     /// An immediate (possibly involving symbol references)
++    k_Memory,        /// Base + Offset Memory Address
++    k_RegisterIndex, /// A register index in one or more RegKind.
++    k_Token,         /// A simple token
++    k_RegList,       /// A physical register list
++  } Kind;
++
++public:
++  LoongArchOperand(KindTy K, LoongArchAsmParser &Parser)
++      : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
++
++  ~LoongArchOperand() override {
++    switch (Kind) {
++    case k_Memory:
++      delete Mem.Base;
++      break;
++    case k_RegList:
++      delete RegList.List;
++      break;
++    case k_Immediate:
++    case k_RegisterIndex:
++    case k_Token:
++      break;
++    }
++  }
++
++private:
++  /// For diagnostics, and checking the assembler temporary
++  LoongArchAsmParser &AsmParser;
++
++  struct Token {
++    const char *Data;
++    unsigned Length;
++  };
++
++  struct RegIdxOp {
++    unsigned Index; /// Index into the register class
++    RegKind Kind;   /// Bitfield of the kinds it could possibly be
++    struct Token Tok; /// The input token this operand originated from.
++    const MCRegisterInfo *RegInfo;
++  };
++
++  struct ImmOp {
++    const MCExpr *Val;
++  };
++
++  struct MemOp {
++    LoongArchOperand *Base;
++    const MCExpr *Off;
++  };
++
++  struct RegListOp {
++    SmallVector<unsigned, 10> *List;
++  };
++
++  union {
++    struct Token Tok;
++    struct RegIdxOp RegIdx;
++    struct ImmOp Imm;
++    struct MemOp Mem;
++    struct RegListOp RegList;
++  };
++
++  SMLoc StartLoc, EndLoc;
++
++  /// Internal constructor for register kinds
++  static std::unique_ptr<LoongArchOperand> CreateReg(unsigned Index, StringRef Str,
++                                                     RegKind RegKind,
++                                                     const MCRegisterInfo *RegInfo,
++                                                     SMLoc S, SMLoc E,
++                                                     LoongArchAsmParser &Parser) {
++    auto Op = std::make_unique<LoongArchOperand>(k_RegisterIndex, Parser);
++    Op->RegIdx.Index = Index;
++    Op->RegIdx.RegInfo = RegInfo;
++    Op->RegIdx.Kind = RegKind;
++    Op->RegIdx.Tok.Data = Str.data();
++    Op->RegIdx.Tok.Length = Str.size();
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++public:
++  /// Coerce the register to GPR32 and return the real register for the current
++  /// target.
++  unsigned getGPR32Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
++    unsigned ClassID = LoongArch::GPR32RegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to GPR32 and return the real register for the current
++  /// target.
++  unsigned getGPRMM16Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
++    unsigned ClassID = LoongArch::GPR32RegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to GPR64 and return the real register for the current
++  /// target.
++  unsigned getGPR64Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
++    unsigned ClassID = LoongArch::GPR64RegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++private:
++  /// Coerce the register to FGR64 and return the real register for the current
++  /// target.
++  unsigned getFGR64Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!");
++    return RegIdx.RegInfo->getRegClass(LoongArch::FGR64RegClassID)
++        .getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to FGR32 and return the real register for the current
++  /// target.
++  unsigned getFGR32Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!");
++    return RegIdx.RegInfo->getRegClass(LoongArch::FGR32RegClassID)
++        .getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to FCFR and return the real register for the current
++  /// target.
++  unsigned getFCFRReg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FCFR) && "Invalid access!");
++    return RegIdx.RegInfo->getRegClass(LoongArch::FCFRRegClassID)
++        .getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to LSX128 and return the real register for the current
++  /// target.
++  unsigned getLSX128Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_LSX128) && "Invalid access!");
++    // It doesn't matter which of the LSX128[BHWD] classes we use. They are all
++    // identical
++    unsigned ClassID = LoongArch::LSX128BRegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++  unsigned getLASX256Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_LASX256) && "Invalid access!");
++    unsigned ClassID = LoongArch::LASX256BRegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++  /// Coerce the register to CCR and return the real register for the
++  /// current target.
++  unsigned getFCSRReg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FCSR) && "Invalid access!");
++    unsigned ClassID = LoongArch::FCSRRegClassID;
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
++  }
++
++public:
++  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
++    // Add as immediate when possible.  Null MCExpr = 0.
++    if (!Expr)
++      Inst.addOperand(MCOperand::createImm(0));
++    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
++      Inst.addOperand(MCOperand::createImm(CE->getValue()));
++    else
++      Inst.addOperand(MCOperand::createExpr(Expr));
++  }
++
++  void addRegOperands(MCInst &Inst, unsigned N) const {
++    llvm_unreachable("Use a custom parser instead");
++  }
++
++  /// Render the operand to an MCInst as a GPR32
++  /// Asserts if the wrong number of operands are requested, or the operand
++  /// is not a k_RegisterIndex compatible with RegKind_GPR
++  void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
++  }
++
++  void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
++  }
++
++  void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
++  }
++
++  void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
++  }
++
++  void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
++  }
++
++  void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
++  }
++
++  void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
++  }
++
++  void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst,
++                                               unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
++  }
++
++  /// Render the operand to an MCInst as a GPR64
++  /// Asserts if the wrong number of operands are requested, or the operand
++  /// is not a k_RegisterIndex compatible with RegKind_GPR
++  void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getGPR64Reg()));
++  }
++
++  void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
++  }
++
++  void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
++  }
++
++  void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
++  }
++
++  void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
++  }
++
++  void addFCFRAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFCFRReg()));
++  }
++
++  void addLSX128AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getLSX128Reg()));
++  }
++
++  void addLASX256AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getLASX256Reg()));
++  }
++
++  void addFCSRAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFCSRReg()));
++  }
++
++  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
++  void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    uint64_t Imm = getConstantImm() - Offset;
++    Imm &= (1ULL << Bits) - 1;
++    Imm += Offset;
++    Imm += AdjustOffset;
++    Inst.addOperand(MCOperand::createImm(Imm));
++  }
++
++  template <unsigned Bits>
++  void addSImmOperands(MCInst &Inst, unsigned N) const {
++    if (isImm() && !isConstantImm()) {
++      addExpr(Inst, getImm());
++      return;
++    }
++    addConstantSImmOperands<Bits, 0, 0>(Inst, N);
++  }
++
++  template <unsigned Bits>
++  void addUImmOperands(MCInst &Inst, unsigned N) const {
++    if (isImm() && !isConstantImm()) {
++      addExpr(Inst, getImm());
++      return;
++    }
++    addConstantUImmOperands<Bits, 0, 0>(Inst, N);
++  }
++
++  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
++  void addConstantSImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    int64_t Imm = getConstantImm() - Offset;
++    Imm = SignExtend64<Bits>(Imm);
++    Imm += Offset;
++    Imm += AdjustOffset;
++    Inst.addOperand(MCOperand::createImm(Imm));
++  }
++
++  void addImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    const MCExpr *Expr = getImm();
++    addExpr(Inst, Expr);
++  }
++
++  void addMemOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 2 && "Invalid number of operands!");
++
++    Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit()
++                                             ? getMemBase()->getGPR64Reg()
++                                             : getMemBase()->getGPR32Reg()));
++
++    const MCExpr *Expr = getMemOff();
++    addExpr(Inst, Expr);
++  }
++
++  void addRegListOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++
++    for (auto RegNo : getRegList())
++      Inst.addOperand(MCOperand::createReg(RegNo));
++  }
++
++  bool isReg() const override {
++    // As a special case until we sort out the definition of div/divu, accept
++    // $0/$zero here so that MCK_ZERO works correctly.
++    return isGPRAsmReg() && RegIdx.Index == 0;
++  }
++
++  bool isRegIdx() const { return Kind == k_RegisterIndex; }
++  bool isImm() const override { return Kind == k_Immediate; }
++
++  bool isConstantImm() const {
++    int64_t Res;
++    return isImm() && getImm()->evaluateAsAbsolute(Res);
++  }
++
++  bool isConstantImmz() const {
++    return isConstantImm() && getConstantImm() == 0;
++  }
++
++  template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
++    return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
++  }
++
++  template <unsigned Bits> bool isSImm() const {
++    return isConstantImm() ? isInt<Bits>(getConstantImm()) : isImm();
++  }
++
++  template <unsigned Bits> bool isUImm() const {
++    return isConstantImm() ? isUInt<Bits>(getConstantImm()) : isImm();
++  }
++
++  template <unsigned Bits> bool isAnyImm() const {
++    return isConstantImm() ? (isInt<Bits>(getConstantImm()) ||
++                              isUInt<Bits>(getConstantImm()))
++                           : isImm();
++  }
++
++  template <unsigned Bits, int Offset = 0> bool isConstantSImm() const {
++    return isConstantImm() && isInt<Bits>(getConstantImm() - Offset);
++  }
++
++  template <unsigned Bottom, unsigned Top> bool isConstantUImmRange() const {
++    return isConstantImm() && getConstantImm() >= Bottom &&
++           getConstantImm() <= Top;
++  }
++
++  bool isToken() const override {
++    // Note: It's not possible to pretend that other operand kinds are tokens.
++    // The matcher emitter checks tokens first.
++    return Kind == k_Token;
++  }
++
++  bool isMem() const override { return Kind == k_Memory; }
++
++  bool isConstantMemOff() const {
++    return isMem() && isa<MCConstantExpr>(getMemOff());
++  }
++
++  // Allow relocation operators.
++  // FIXME: This predicate and others need to look through binary expressions
++  //        and determine whether a Value is a constant or not.
++  template <unsigned Bits, unsigned ShiftAmount = 0>
++  bool isMemWithSimmOffset() const {
++    if (!isMem())
++      return false;
++    if (!getMemBase()->isGPRAsmReg())
++      return false;
++    if (isa<MCTargetExpr>(getMemOff()) ||
++        (isConstantMemOff() &&
++         isShiftedInt<Bits, ShiftAmount>(getConstantMemOff())))
++      return true;
++    MCValue Res;
++    bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
++    return IsReloc && isShiftedInt<Bits, ShiftAmount>(Res.getConstant());
++  }
++
++  bool isMemWithPtrSizeOffset() const {
++    if (!isMem())
++      return false;
++    if (!getMemBase()->isGPRAsmReg())
++      return false;
++    const unsigned PtrBits = AsmParser.getABI().ArePtrs64bit() ? 64 : 32;
++    if (isa<MCTargetExpr>(getMemOff()) ||
++        (isConstantMemOff() && isIntN(PtrBits, getConstantMemOff())))
++      return true;
++    MCValue Res;
++    bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
++    return IsReloc && isIntN(PtrBits, Res.getConstant());
++  }
++
++  bool isMemWithGRPMM16Base() const {
++    return isMem() && getMemBase()->isMM16AsmReg();
++  }
++
++  template <unsigned Bits> bool isMemWithUimmOffsetSP() const {
++    return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
++      && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == LoongArch::SP);
++  }
++
++  template <unsigned Bits> bool isMemWithUimmWordAlignedOffsetSP() const {
++    return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
++      && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
++      && (getMemBase()->getGPR32Reg() == LoongArch::SP);
++  }
++
++  template <unsigned Bits, unsigned ShiftLeftAmount>
++  bool isScaledUImm() const {
++    return isConstantImm() &&
++           isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
++  }
++
++  template <unsigned Bits, unsigned ShiftLeftAmount>
++  bool isScaledSImm() const {
++    if (isConstantImm() &&
++        isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
++      return true;
++    // Operand can also be a symbol or symbol plus
++    // offset in case of relocations.
++    if (Kind != k_Immediate)
++      return false;
++    MCValue Res;
++    bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
++    return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
++  }
++
++  bool isRegList16() const {
++    if (!isRegList())
++      return false;
++
++    int Size = RegList.List->size();
++    if (Size < 2 || Size > 5)
++      return false;
++
++    unsigned R0 = RegList.List->front();
++    unsigned R1 = RegList.List->back();
++    if (!((R0 == LoongArch::S0 && R1 == LoongArch::RA) ||
++          (R0 == LoongArch::S0_64 && R1 == LoongArch::RA_64)))
++      return false;
++
++    int PrevReg = *RegList.List->begin();
++    for (int i = 1; i < Size - 1; i++) {
++      int Reg = (*(RegList.List))[i];
++      if ( Reg != PrevReg + 1)
++        return false;
++      PrevReg = Reg;
++    }
++
++    return true;
++  }
++
++  bool isInvNum() const { return Kind == k_Immediate; }
++
++  bool isLSAImm() const {
++    if (!isConstantImm())
++      return false;
++    int64_t Val = getConstantImm();
++    return 1 <= Val && Val <= 4;
++  }
++
++  bool isRegList() const { return Kind == k_RegList; }
++
++  StringRef getToken() const {
++    assert(Kind == k_Token && "Invalid access!");
++    return StringRef(Tok.Data, Tok.Length);
++  }
++
++  unsigned getReg() const override {
++    // As a special case until we sort out the definition of div/divu, accept
++    // $0/$zero here so that MCK_ZERO works correctly.
++    if (Kind == k_RegisterIndex && RegIdx.Index == 0 &&
++        RegIdx.Kind & RegKind_GPR)
++      return getGPR32Reg(); // FIXME: GPR64 too
++
++    llvm_unreachable("Invalid access!");
++    return 0;
++  }
++
++  const MCExpr *getImm() const {
++    assert((Kind == k_Immediate) && "Invalid access!");
++    return Imm.Val;
++  }
++
++  int64_t getConstantImm() const {
++    const MCExpr *Val = getImm();
++    int64_t Value = 0;
++    (void)Val->evaluateAsAbsolute(Value);
++    return Value;
++  }
++
++  LoongArchOperand *getMemBase() const {
++    assert((Kind == k_Memory) && "Invalid access!");
++    return Mem.Base;
++  }
++
++  const MCExpr *getMemOff() const {
++    assert((Kind == k_Memory) && "Invalid access!");
++    return Mem.Off;
++  }
++
++  int64_t getConstantMemOff() const {
++    return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
++  }
++
++  const SmallVectorImpl<unsigned> &getRegList() const {
++    assert((Kind == k_RegList) && "Invalid access!");
++    return *(RegList.List);
++  }
++
++  static std::unique_ptr<LoongArchOperand> CreateToken(StringRef Str, SMLoc S,
++                                                  LoongArchAsmParser &Parser) {
++    auto Op = std::make_unique<LoongArchOperand>(k_Token, Parser);
++    Op->Tok.Data = Str.data();
++    Op->Tok.Length = Str.size();
++    Op->StartLoc = S;
++    Op->EndLoc = S;
++    return Op;
++  }
++
++  /// Create a numeric register (e.g. $1). The exact register remains
++  /// unresolved until an instruction successfully matches
++  static std::unique_ptr<LoongArchOperand>
++  createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++                   SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    LLVM_DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n");
++    return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser);
++  }
++
++  /// Create a register that is definitely a GPR.
++  /// This is typically only used for named registers such as $gp.
++  static std::unique_ptr<LoongArchOperand>
++  createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++               SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser);
++  }
++
++  /// Create a register that is definitely a FGR.
++  /// This is typically only used for named registers such as $f0.
++  static std::unique_ptr<LoongArchOperand>
++  createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++               SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser);
++  }
++
++  /// Create a register that is definitely an FCFR.
++  /// This is typically only used for named registers such as $fcc0.
++  static std::unique_ptr<LoongArchOperand>
++  createFCFRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++               SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_FCFR, RegInfo, S, E, Parser);
++  }
++
++  /// Create a register that is definitely an FCSR.
++  /// This is typically only used for named registers such as $fcsr0.
++  static std::unique_ptr<LoongArchOperand>
++  createFCSRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++                SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_FCSR, RegInfo, S, E, Parser);
++  }
++
++  /// Create a register that is definitely an LSX128.
++  /// This is typically only used for named registers such as $v0.
++  static std::unique_ptr<LoongArchOperand>
++  createLSX128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++                  SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_LSX128, RegInfo, S, E, Parser);
++  }
++
++  static std::unique_ptr<LoongArchOperand>
++  createLASX256Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++                   SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_LASX256, RegInfo, S, E, Parser);
++  }
++
++  static std::unique_ptr<LoongArchOperand>
++  CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, LoongArchAsmParser &Parser) {
++    auto Op = std::make_unique<LoongArchOperand>(k_Immediate, Parser);
++    Op->Imm.Val = Val;
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++  static std::unique_ptr<LoongArchOperand>
++  CreateMem(std::unique_ptr<LoongArchOperand> Base, const MCExpr *Off, SMLoc S,
++            SMLoc E, LoongArchAsmParser &Parser) {
++    auto Op = std::make_unique<LoongArchOperand>(k_Memory, Parser);
++    Op->Mem.Base = Base.release();
++    Op->Mem.Off = Off;
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++  static std::unique_ptr<LoongArchOperand>
++  CreateRegList(SmallVectorImpl<unsigned> &Regs, SMLoc StartLoc, SMLoc EndLoc,
++                LoongArchAsmParser &Parser) {
++    assert(Regs.size() > 0 && "Empty list not allowed");
++
++    auto Op = std::make_unique<LoongArchOperand>(k_RegList, Parser);
++    Op->RegList.List = new SmallVector<unsigned, 10>(Regs.begin(), Regs.end());
++    Op->StartLoc = StartLoc;
++    Op->EndLoc = EndLoc;
++    return Op;
++  }
++
++ bool isGPRZeroAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
++  }
++
++ bool isGPRNonZeroAsmReg() const {
++   return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 &&
++          RegIdx.Index <= 31;
++  }
++
++  bool isGPRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31;
++  }
++
++  bool isMM16AsmReg() const {
++    if (!(isRegIdx() && RegIdx.Kind))
++      return false;
++    return ((RegIdx.Index >= 2 && RegIdx.Index <= 7)
++            || RegIdx.Index == 16 || RegIdx.Index == 17);
++
++  }
++  bool isMM16AsmRegZero() const {
++    if (!(isRegIdx() && RegIdx.Kind))
++      return false;
++    return (RegIdx.Index == 0 ||
++            (RegIdx.Index >= 2 && RegIdx.Index <= 7) ||
++            RegIdx.Index == 17);
++  }
++
++  bool isMM16AsmRegMoveP() const {
++    if (!(isRegIdx() && RegIdx.Kind))
++      return false;
++    return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) ||
++      (RegIdx.Index >= 16 && RegIdx.Index <= 20));
++  }
++
++  bool isMM16AsmRegMovePPairFirst() const {
++    if (!(isRegIdx() && RegIdx.Kind))
++      return false;
++    return RegIdx.Index >= 4 && RegIdx.Index <= 6;
++  }
++
++  bool isMM16AsmRegMovePPairSecond() const {
++    if (!(isRegIdx() && RegIdx.Kind))
++      return false;
++    return (RegIdx.Index == 21 || RegIdx.Index == 22 ||
++      (RegIdx.Index >= 5 && RegIdx.Index <= 7));
++  }
++
++  bool isFGRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
++  }
++
++  bool isStrictlyFGRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31;
++  }
++
++  bool isFCSRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_FCSR && RegIdx.Index <= 3;
++  }
++
++  bool isFCFRAsmReg() const {
++    if (!(isRegIdx() && RegIdx.Kind & RegKind_FCFR))
++      return false;
++    return RegIdx.Index <= 7;
++  }
++
++  bool isLSX128AsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_LSX128 && RegIdx.Index <= 31;
++  }
++
++  bool isLASX256AsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_LASX256 && RegIdx.Index <= 31;
++  }
++
++  /// getStartLoc - Get the location of the first token of this operand.
++  SMLoc getStartLoc() const override { return StartLoc; }
++  /// getEndLoc - Get the location of the last token of this operand.
++  SMLoc getEndLoc() const override { return EndLoc; }
++
++  void print(raw_ostream &OS) const override {
++    switch (Kind) {
++    case k_Immediate:
++      OS << "Imm<";
++      OS << *Imm.Val;
++      OS << ">";
++      break;
++    case k_Memory:
++      OS << "Mem<";
++      Mem.Base->print(OS);
++      OS << ", ";
++      OS << *Mem.Off;
++      OS << ">";
++      break;
++    case k_RegisterIndex:
++      OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", "
++         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
++      break;
++    case k_Token:
++      OS << getToken();
++      break;
++    case k_RegList:
++      OS << "RegList< ";
++      for (auto Reg : (*RegList.List))
++        OS << Reg << " ";
++      OS <<  ">";
++      break;
++    }
++  }
++
++  bool isValidForTie(const LoongArchOperand &Other) const {
++    if (Kind != Other.Kind)
++      return false;
++
++    switch (Kind) {
++    default:
++      llvm_unreachable("Unexpected kind");
++      return false;
++    case k_RegisterIndex: {
++      StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length);
++      StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length);
++      return Token == OtherToken;
++    }
++    }
++  }
++}; // class LoongArchOperand
++
++} // end anonymous namespace
++
++namespace llvm {
++
++extern const MCInstrDesc LoongArchInsts[];
++
++} // end namespace llvm
++
++static const MCInstrDesc &getInstDesc(unsigned Opcode) {
++  return LoongArchInsts[Opcode];
++}
++
++static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) {
++  if (const MCSymbolRefExpr *SRExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
++    return &SRExpr->getSymbol();
++  }
++
++  if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr)) {
++    const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS());
++    const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS());
++
++    if (LHSSym)
++      return LHSSym;
++
++    if (RHSSym)
++      return RHSSym;
++
++    return nullptr;
++  }
++
++  if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
++    return getSingleMCSymbol(UExpr->getSubExpr());
++
++  return nullptr;
++}
++
++static unsigned countMCSymbolRefExpr(const MCExpr *Expr) {
++  if (isa<MCSymbolRefExpr>(Expr))
++    return 1;
++
++  if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr))
++    return countMCSymbolRefExpr(BExpr->getLHS()) +
++           countMCSymbolRefExpr(BExpr->getRHS());
++
++  if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
++    return countMCSymbolRefExpr(UExpr->getSubExpr());
++
++  return 0;
++}
++
++bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
++                                            MCStreamer &Out,
++                                            const MCSubtargetInfo *STI) {
++  const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
++
++  Inst.setLoc(IDLoc);
++
++  // Check branch instructions.
++  if (MCID.isBranch() || MCID.isCall()) {
++    const unsigned Opcode = Inst.getOpcode();
++    MCOperand Offset;
++    bool check = true;
++    unsigned OffsetOpndIdx, OffsetOpndWidth;
++    switch (Opcode) {
++    default:
++      check = false;
++      break;
++    case LoongArch::BEQ:
++    case LoongArch::BNE:
++    case LoongArch::BLT:
++    case LoongArch::BGE:
++    case LoongArch::BLTU:
++    case LoongArch::BGEU:
++      OffsetOpndIdx = 2;
++      OffsetOpndWidth = 16;
++      break;
++    case LoongArch::BEQZ:
++    case LoongArch::BNEZ:
++    case LoongArch::BCEQZ:
++    case LoongArch::BCNEZ:
++      OffsetOpndIdx = 1;
++      OffsetOpndWidth = 21;
++      break;
++    case LoongArch::B:
++    case LoongArch::BL:
++      OffsetOpndIdx = 0;
++      OffsetOpndWidth = 26;
++      break;
++    }
++    if (check) {
++      assert(MCID.getNumOperands() == OffsetOpndIdx + 1 &&
++             "unexpected number of operands");
++      Offset = Inst.getOperand(OffsetOpndIdx);
++      // Non-Imm situation will be dealed with later on when applying fixups.
++      if (Offset.isImm()) {
++        if (!isIntN(OffsetOpndWidth + 2, Offset.getImm()))
++          return Error(IDLoc, "branch target out of range");
++        if (offsetToAlignment(Offset.getImm(), Align(1LL << 2)))
++          return Error(IDLoc, "branch to misaligned address");
++      }
++    }
++  }
++
++  bool IsPCRelativeLoad = (MCID.TSFlags & LoongArchII::IsPCRelativeLoad) != 0;
++  if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) {
++    // Check the offset of memory operand, if it is a symbol
++    // reference or immediate we may have to expand instructions.
++    for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
++      const MCOperandInfo &OpInfo = MCID.OpInfo[i];
++      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
++          (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
++        MCOperand &Op = Inst.getOperand(i);
++        if (Op.isImm()) {
++          int64_t MemOffset = Op.getImm();
++          if (MemOffset < -32768 || MemOffset > 32767) {
++            return getParser().hasPendingError();
++          }
++        } else if (Op.isExpr()) {
++          const MCExpr *Expr = Op.getExpr();
++          if (Expr->getKind() == MCExpr::SymbolRef) {
++            const MCSymbolRefExpr *SR =
++                static_cast<const MCSymbolRefExpr *>(Expr);
++            if (SR->getKind() == MCSymbolRefExpr::VK_None) {
++              return getParser().hasPendingError();
++            }
++          } else if (!isEvaluated(Expr)) {
++            return getParser().hasPendingError();
++          }
++        }
++      }
++    } // for
++  }   // if load/store
++
++  MacroExpanderResultTy ExpandResult =
++      tryExpandInstruction(Inst, IDLoc, Out, STI);
++  switch (ExpandResult) {
++  case MER_NotAMacro:
++    Out.emitInstruction(Inst, *STI);
++    break;
++  case MER_Success:
++    break;
++  case MER_Fail:
++    return true;
++  }
++
++  return false;
++}
++
++LoongArchAsmParser::MacroExpanderResultTy
++LoongArchAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                                    const MCSubtargetInfo *STI) {
++  switch (Inst.getOpcode()) {
++  default:
++    return MER_NotAMacro;
++  case LoongArch::LoadImm32: // li.w $rd, $imm32
++  case LoongArch::LoadImm64: // li.d $rd, $imm64
++    return expandLoadImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
++  case LoongArch::LoadAddrLocal:  // la.local $rd, symbol
++  case LoongArch::LoadAddrGlobal: // la.global $rd, symbol
++  case LoongArch::LoadAddrGlobal_Alias: // la $rd, symbol
++  case LoongArch::LoadAddrTLS_LE: // la.tls.le $rd, symbol
++  case LoongArch::LoadAddrTLS_IE: // la.tls.ie $rd, symbol
++  case LoongArch::LoadAddrTLS_LD: // la.tls.ld $rd, symbol
++  case LoongArch::LoadAddrTLS_GD: // la.tls.gd $rd, symbol
++    return expandLoadAddress(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
++  }
++}
++
++/// Can the value be represented by a unsigned N-bit value and a shift left?
++template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
++  unsigned BitNum = findFirstSet(x);
++
++  return (x == x >> BitNum << BitNum) && isUInt<N>(x >> BitNum);
++}
++
++bool LoongArchAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
++                                       MCStreamer &Out,
++                                       const MCSubtargetInfo *STI) {
++  const int64_t Imm = Inst.getOperand(1).getImm();
++  const unsigned DstReg = Inst.getOperand(0).getReg();
++  LoongArchTargetStreamer &TOut = getTargetStreamer();
++  bool Is64Bit = Inst.getOpcode() == LoongArch::LoadImm64;
++  unsigned SrcReg = Is64Bit ? LoongArch::ZERO_64 : LoongArch::ZERO;
++  LoongArchAnalyzeImmediate::InstSeq Seq =
++      LoongArchAnalyzeImmediate::generateInstSeq(
++          Is64Bit ? Imm : SignExtend64<32>(Imm), Is64Bit);
++
++  for (auto &Inst : Seq) {
++    if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32)
++      TOut.emitRI(Inst.Opc, DstReg, Inst.Imm, IDLoc, STI);
++    else
++      TOut.emitRRI(Inst.Opc, DstReg, SrcReg, Inst.Imm, IDLoc, STI);
++    SrcReg = DstReg;
++  }
++
++  return false;
++}
++
++bool LoongArchAsmParser::expandLoadAddress(MCInst &Inst, SMLoc IDLoc,
++                                           MCStreamer &Out,
++                                           const MCSubtargetInfo *STI) {
++  LoongArchTargetStreamer &TOut = getTargetStreamer();
++  const MCExpr *SymExpr = Inst.getOperand(1).getExpr();
++  const LoongArchMCExpr *HiExpr = nullptr;
++  const LoongArchMCExpr *LoExpr = nullptr;
++  const LoongArchMCExpr *HigherExpr = nullptr;
++  const LoongArchMCExpr *HighestExpr = nullptr;
++  unsigned DstReg = Inst.getOperand(0).getReg();
++
++  MCValue Res;
++  if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
++    Error(IDLoc, "expected relocatable expression");
++    return true;
++  }
++  if (Res.getSymB() != nullptr) {
++    Error(IDLoc, "expected relocatable expression with only one symbol");
++    return true;
++  }
++
++  switch (Inst.getOpcode()) {
++  case LoongArch::LoadAddrLocal:
++    HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_HI, SymExpr,
++                                     getContext());
++    LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_LO, SymExpr,
++                                     getContext());
++
++    TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr),
++                IDLoc, STI);
++    TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg,
++                 MCOperand::createExpr(LoExpr), IDLoc, STI);
++    return false;
++  case LoongArch::LoadAddrGlobal:
++  case LoongArch::LoadAddrGlobal_Alias:
++    HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_HI, SymExpr,
++                                     getContext());
++    LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_LO, SymExpr,
++                                     getContext());
++    TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr),
++                IDLoc, STI);
++    TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg,
++                 MCOperand::createExpr(LoExpr), IDLoc, STI);
++    return false;
++  case LoongArch::LoadAddrTLS_LE:
++    HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HI, SymExpr,
++                                     getContext());
++    LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_LO, SymExpr,
++                                     getContext());
++    HigherExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHER,
++                                         SymExpr, getContext());
++    HighestExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHEST,
++                                          SymExpr, getContext());
++    TOut.emitRX(LoongArch::LU12I_W_ri, DstReg, MCOperand::createExpr(HiExpr),
++                IDLoc, STI);
++    TOut.emitRRX(LoongArch::ORI_rri, DstReg, DstReg,
++                 MCOperand::createExpr(LoExpr), IDLoc, STI);
++    TOut.emitRX(LoongArch::LU32I_D_ri, DstReg,
++                MCOperand::createExpr(HigherExpr), IDLoc, STI);
++    TOut.emitRRX(LoongArch::LU52I_D_rri, DstReg, DstReg,
++                 MCOperand::createExpr(HighestExpr), IDLoc, STI);
++    return false;
++  case LoongArch::LoadAddrTLS_IE:
++    HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_HI, SymExpr,
++                                     getContext());
++    LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_LO, SymExpr,
++                                     getContext());
++    TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr),
++                IDLoc, STI);
++    TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg,
++                 MCOperand::createExpr(LoExpr), IDLoc, STI);
++    return false;
++  case LoongArch::LoadAddrTLS_LD:
++  case LoongArch::LoadAddrTLS_GD:
++    HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_HI, SymExpr,
++                                     getContext());
++    LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_LO, SymExpr,
++                                     getContext());
++    TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr),
++                IDLoc, STI);
++    TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg,
++                 MCOperand::createExpr(LoExpr), IDLoc, STI);
++    return false;
++  default:
++    llvm_unreachable("");
++  }
++}
++
++unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
++  switch (Inst.getOpcode()) {
++  case LoongArch::BSTRINS_W:
++  case LoongArch::BSTRPICK_W: {
++    assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() &&
++           "Operands must be immediates for bstrins.w/bstrpick.w!");
++    const signed Msbw = Inst.getOperand(2).getImm();
++    const signed Lsbw = Inst.getOperand(3).getImm();
++    if (Msbw < Lsbw)
++      return Match_MsbHigherThanLsb;
++    if ((Lsbw < 0) || (Msbw > 31))
++      return Match_RequiresRange0_31;
++    return Match_Success;
++  }
++  case LoongArch::BSTRINS_D:
++  case LoongArch::BSTRPICK_D: {
++    assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() &&
++           "Operands must be immediates for bstrins.d/bstrpick.d!");
++    const signed Msbd = Inst.getOperand(2).getImm();
++    const signed Lsbd = Inst.getOperand(3).getImm();
++    if (Msbd < Lsbd)
++      return Match_MsbHigherThanLsb;
++    if ((Lsbd < 0) || (Msbd > 63))
++      return Match_RequiresRange0_63;
++    return Match_Success;
++  }
++  case LoongArch::CSRXCHG32:
++  case LoongArch::CSRXCHG:
++    if (Inst.getOperand(2).getReg() == LoongArch::ZERO ||
++        Inst.getOperand(2).getReg() == LoongArch::ZERO_64)
++      return Match_RequiresNoZeroRegister;
++    if (Inst.getOperand(2).getReg() == LoongArch::RA ||
++        Inst.getOperand(2).getReg() == LoongArch::RA_64)
++      return Match_RequiresNoRaRegister;
++    return Match_Success;
++  }
++
++  return Match_Success;
++}
++
++static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
++                            uint64_t ErrorInfo) {
++  if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) {
++    SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc();
++    if (ErrorLoc == SMLoc())
++      return Loc;
++    return ErrorLoc;
++  }
++  return Loc;
++}
++
++bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
++                                                 OperandVector &Operands,
++                                                 MCStreamer &Out,
++                                                 uint64_t &ErrorInfo,
++                                                 bool MatchingInlineAsm) {
++  MCInst Inst;
++  unsigned MatchResult =
++      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
++  switch (MatchResult) {
++  case Match_Success:
++    if (processInstruction(Inst, IDLoc, Out, STI))
++      return true;
++    return false;
++  case Match_MissingFeature:
++    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
++    return true;
++  case Match_InvalidOperand: {
++    SMLoc ErrorLoc = IDLoc;
++    if (ErrorInfo != ~0ULL) {
++      if (ErrorInfo >= Operands.size())
++        return Error(IDLoc, "too few operands for instruction");
++
++      ErrorLoc = Operands[ErrorInfo]->getStartLoc();
++      if (ErrorLoc == SMLoc())
++        ErrorLoc = IDLoc;
++    }
++
++    return Error(ErrorLoc, "invalid operand for instruction");
++  }
++  case Match_MnemonicFail:
++    return Error(IDLoc, "invalid instruction");
++  case Match_RequiresNoZeroRegister:
++    return Error(IDLoc, "invalid operand ($zero) for instruction");
++  case Match_RequiresNoRaRegister:
++    return Error(IDLoc, "invalid operand ($r1) for instruction");
++  case Match_InvalidImm0_3:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 3].");
++  case Match_InvalidImm0_7:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 7].");
++  case Match_InvalidImm0_31:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 31].");
++  case Match_InvalidImm0_63:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 63].");
++  case Match_InvalidImm0_4095:
++  case Match_UImm12_Relaxed:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 4095].");
++  case Match_InvalidImm0_32767:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "immediate must be an integer in range [0, 32767].");
++  case Match_UImm16_Relaxed:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 16-bit unsigned immediate");
++  case Match_UImm20_0:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 20-bit unsigned immediate");
++  case Match_UImm26_0:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 26-bit unsigned immediate");
++  case Match_UImm32_Coerced:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 32-bit immediate");
++  case Match_InvalidSImm2:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 2-bit signed immediate");
++  case Match_InvalidSImm3:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 3-bit signed immediate");
++  case Match_InvalidSImm5:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 5-bit signed immediate");
++  case Match_InvalidSImm8:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 8-bit signed immediate");
++  case Match_InvalidSImm12:
++  case Match_SImm12_Relaxed:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 12-bit signed immediate");
++  case Match_InvalidSImm14:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 14-bit signed immediate");
++  case Match_InvalidSImm15:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 15-bit signed immediate");
++  case Match_InvalidSImm16:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 16-bit signed immediate");
++  case Match_InvalidSImm20:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 20-bit signed immediate");
++  case Match_InvalidSImm21:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 21-bit signed immediate");
++  case Match_InvalidSImm26:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 26-bit signed immediate");
++  case Match_SImm32:
++  case Match_SImm32_Relaxed:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected 32-bit signed immediate");
++  case Match_MemSImm14:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected memory with 14-bit signed offset");
++  case Match_MemSImmPtr:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected memory with 32-bit signed offset");
++  case Match_UImm2_1:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected immediate in range 1 .. 4");
++  case Match_MemSImm14Lsl2:
++    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
++                 "expected memory with 16-bit signed offset and multiple of 4");
++  case Match_RequiresRange0_31: {
++    SMLoc ErrorStart = Operands[3]->getStartLoc();
++    SMLoc ErrorEnd = Operands[4]->getEndLoc();
++    return Error(ErrorStart, "from lsbw to msbw are not in the range 0 .. 31",
++                 SMRange(ErrorStart, ErrorEnd));
++    }
++  case Match_RequiresPosSizeUImm6: {
++    SMLoc ErrorStart = Operands[3]->getStartLoc();
++    SMLoc ErrorEnd = Operands[4]->getEndLoc();
++    return Error(ErrorStart, "size plus position are not in the range 1 .. 63",
++                 SMRange(ErrorStart, ErrorEnd));
++    }
++  case Match_RequiresRange0_63: {
++    SMLoc ErrorStart = Operands[3]->getStartLoc();
++    SMLoc ErrorEnd = Operands[4]->getEndLoc();
++    return Error(ErrorStart, "from lsbd to msbd are not in the range 0 .. 63",
++                 SMRange(ErrorStart, ErrorEnd));
++    }
++  case Match_MsbHigherThanLsb: {
++    SMLoc ErrorStart = Operands[3]->getStartLoc();
++    SMLoc ErrorEnd = Operands[4]->getEndLoc();
++    return Error(ErrorStart, "msb are not higher than lsb", SMRange(ErrorStart, ErrorEnd));
++    }
++  }
++
++  llvm_unreachable("Implement any new match types added!");
++}
++
++/*
++ * Note: The implementation of this function must be sync with the definition
++ * of GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td
++ */
++int LoongArchAsmParser::matchCPURegisterName(StringRef Name) {
++  int CC;
++
++  CC = StringSwitch<unsigned>(Name)
++           .Cases("zero", "r0", 0)
++           .Cases("a0", "v0", "r4", 1)
++           .Cases("a1", "v1", "r5", 2)
++           .Cases("a2", "r6", 3)
++           .Cases("a3", "r7", 4)
++           .Cases("a4", "r8", 5)
++           .Cases("a5", "r9", 6)
++           .Cases("a6", "r10", 7)
++           .Cases("a7", "r11", 8)
++           .Cases("t0", "r12", 9)
++           .Cases("t1", "r13", 10)
++           .Cases("t2", "r14", 11)
++           .Cases("t3", "r15", 12)
++           .Cases("t4", "r16", 13)
++           .Cases("t5", "r17", 14)
++           .Cases("t6", "r18", 15)
++           .Cases("t7", "r19", 16)
++           .Cases("t8", "r20", 17)
++           .Cases("s0", "r23", 18)
++           .Cases("s1", "r24", 19)
++           .Cases("s2", "r25", 20)
++           .Cases("s3", "r26", 21)
++           .Cases("s4", "r27", 22)
++           .Cases("s5", "r28", 23)
++           .Cases("s6", "r29", 24)
++           .Cases("s7", "r30", 25)
++           .Cases("s8", "r31", 26)
++           .Cases("ra", "r1", 27)
++           .Cases("tp", "r2", 28)
++           .Cases("sp", "r3", 29)
++           .Case("r21", 30)
++           .Cases("fp", "r22", 31)
++           .Default(-1);
++
++  return CC;
++}
++
++int LoongArchAsmParser::matchFPURegisterName(StringRef Name) {
++  if (Name[0] == 'f') {
++    int CC;
++
++    CC = StringSwitch<unsigned>(Name)
++             .Cases("f0", "fa0", "fv0", 0)
++             .Cases("f1", "fa1", "fv1", 1)
++             .Cases("f2", "fa2", 2)
++             .Cases("f3", "fa3", 3)
++             .Cases("f4", "fa4", 4)
++             .Cases("f5", "fa5", 5)
++             .Cases("f6", "fa6", 6)
++             .Cases("f7", "fa7", 7)
++             .Cases("f8", "ft0", 8)
++             .Cases("f9", "ft1", 9)
++             .Cases("f10", "ft2", 10)
++             .Cases("f11", "ft3", 11)
++             .Cases("f12", "ft4", 12)
++             .Cases("f13", "ft5", 13)
++             .Cases("f14", "ft6", 14)
++             .Cases("f15", "ft7", 15)
++             .Cases("f16", "ft8", 16)
++             .Cases("f17", "ft9", 17)
++             .Cases("f18", "ft10", 18)
++             .Cases("f19", "ft11", 19)
++             .Cases("f20", "ft12", 20)
++             .Cases("f21", "ft13", 21)
++             .Cases("f22", "ft14", 22)
++             .Cases("f23", "ft15", 23)
++             .Cases("f24", "fs0", 24)
++             .Cases("f25", "fs1", 25)
++             .Cases("f26", "fs2", 26)
++             .Cases("f27", "fs3", 27)
++             .Cases("f28", "fs4", 28)
++             .Cases("f29", "fs5", 29)
++             .Cases("f30", "fs6", 30)
++             .Cases("f31", "fs7", 31)
++             .Default(-1);
++
++    return CC;
++  }
++  return -1;
++}
++
++int LoongArchAsmParser::matchFCFRRegisterName(StringRef Name) {
++  if (Name.startswith("fcc")) {
++    StringRef NumString = Name.substr(3);
++    unsigned IntVal;
++    if (NumString.getAsInteger(10, IntVal))
++      return -1;    // This is not an integer.
++    if (IntVal > 7) // There are only 8 fcc registers.
++      return -1;
++    return IntVal;
++  }
++  return -1;
++}
++
++int LoongArchAsmParser::matchFCSRRegisterName(StringRef Name) {
++  if (Name.startswith("fcsr")) {
++    StringRef NumString = Name.substr(4);
++    unsigned IntVal;
++    if (NumString.getAsInteger(10, IntVal))
++      return -1;    // This is not an integer.
++    if (IntVal > 3) // There are only 4 fcsr registers.
++      return -1;
++    return IntVal;
++  }
++  return -1;
++}
++
++int LoongArchAsmParser::matchLSX128RegisterName(StringRef Name) {
++  unsigned IntVal;
++
++  if (Name.front() != 'v' || Name.drop_front(2).getAsInteger(10, IntVal))
++    return -1;
++
++  if (IntVal > 31)
++    return -1;
++
++  return IntVal;
++}
++
++int LoongArchAsmParser::matchLASX256RegisterName(StringRef Name) {
++  unsigned IntVal;
++
++  if (Name.front() != 'x' || Name.drop_front(2).getAsInteger(10, IntVal))
++    return -1;
++
++  if (IntVal > 31)
++    return -1;
++
++  return IntVal;
++}
++
++bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseOperand\n");
++
++  // Check if the current operand has a custom associated parser, if so, try to
++  // custom parse the operand, or fallback to the general approach.
++  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
++  if (ResTy == MatchOperand_Success)
++    return false;
++  // If there wasn't a custom match, try the generic matcher below. Otherwise,
++  // there was a match, but an error occurred, in which case, just return that
++  // the operand parsing failed.
++  if (ResTy == MatchOperand_ParseFail)
++    return true;
++
++  LLVM_DEBUG(dbgs() << ".. Generic Parser\n");
++
++  switch (getLexer().getKind()) {
++  case AsmToken::Dollar: {
++    // Parse the register.
++    SMLoc S = Parser.getTok().getLoc();
++
++    // Almost all registers have been parsed by custom parsers. There is only
++    // one exception to this. $zero (and it's alias $0) will reach this point
++    // for div, divu, and similar instructions because it is not an operand
++    // to the instruction definition but an explicit register. Special case
++    // this situation for now.
++    if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
++      return false;
++
++    // Maybe it is a symbol reference.
++    StringRef Identifier;
++    if (Parser.parseIdentifier(Identifier))
++      return true;
++
++    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++    MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier);
++    // Otherwise create a symbol reference.
++    const MCExpr *Res =
++        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
++
++    Operands.push_back(LoongArchOperand::CreateImm(Res, S, E, *this));
++    return false;
++  }
++  default: {
++    LLVM_DEBUG(dbgs() << ".. generic integer expression\n");
++
++    const MCExpr *Expr;
++    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
++    if (getParser().parseExpression(Expr))
++      return true;
++
++    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++    Operands.push_back(LoongArchOperand::CreateImm(Expr, S, E, *this));
++    return false;
++  }
++  } // switch(getLexer().getKind())
++  return true;
++}
++
++bool LoongArchAsmParser::isEvaluated(const MCExpr *Expr) {
++  switch (Expr->getKind()) {
++  case MCExpr::Constant:
++    return true;
++  case MCExpr::SymbolRef:
++    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
++  case MCExpr::Binary: {
++    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
++    if (!isEvaluated(BE->getLHS()))
++      return false;
++    return isEvaluated(BE->getRHS());
++  }
++  case MCExpr::Unary:
++    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
++  case MCExpr::Target:
++    return true;
++  }
++  return false;
++}
++
++bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
++                                  SMLoc &EndLoc) {
++  return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
++}
++
++OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo,
++                                                          SMLoc &StartLoc,
++                                                          SMLoc &EndLoc) {
++  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
++  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
++  if (ResTy == MatchOperand_Success) {
++    assert(Operands.size() == 1);
++    LoongArchOperand &Operand = static_cast<LoongArchOperand &>(*Operands.front());
++    StartLoc = Operand.getStartLoc();
++    EndLoc = Operand.getEndLoc();
++
++    // AFAIK, we only support numeric registers and named GPR's in CFI
++    // directives.
++    // Don't worry about eating tokens before failing. Using an unrecognised
++    // register is a parse error.
++    if (Operand.isGPRAsmReg()) {
++      // Resolve to GPR32 or GPR64 appropriately.
++      RegNo = is64Bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
++    }
++
++    return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch
++                                   : MatchOperand_Success;
++  }
++
++  assert(Operands.size() == 0);
++  return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success;
++}
++
++bool LoongArchAsmParser::parseMemOffset(const MCExpr *&Res) {
++  return getParser().parseExpression(Res);
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::parseMemOperand(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseMemOperand\n");
++  const MCExpr *IdVal = nullptr;
++  SMLoc S;
++  OperandMatchResultTy Res = MatchOperand_NoMatch;
++  // First operand is the base.
++  S = Parser.getTok().getLoc();
++
++  Res = parseAnyRegister(Operands);
++  if (Res != MatchOperand_Success)
++    return Res;
++
++  if (Parser.getTok().isNot(AsmToken::Comma)) {
++    Error(Parser.getTok().getLoc(), "',' expected");
++    return MatchOperand_ParseFail;
++  }
++
++  Parser.Lex(); // Eat the ',' token.
++
++  if (parseMemOffset(IdVal))
++    return MatchOperand_ParseFail;
++
++  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++  // Replace the register operand with the memory operand.
++  std::unique_ptr<LoongArchOperand> op(
++      static_cast<LoongArchOperand *>(Operands.back().release()));
++  // Remove the register from the operands.
++  // "op" will be managed by k_Memory.
++  Operands.pop_back();
++
++  // when symbol not defined, error report.
++  if (dyn_cast<MCSymbolRefExpr>(IdVal)) {
++    return MatchOperand_ParseFail;
++  }
++
++  // Add the memory operand.
++  if (dyn_cast<MCBinaryExpr>(IdVal)) {
++    int64_t Imm;
++    if (IdVal->evaluateAsAbsolute(Imm))
++      IdVal = MCConstantExpr::create(Imm, getContext());
++    else
++      return MatchOperand_ParseFail;
++  }
++
++  Operands.push_back(LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this));
++  return MatchOperand_Success;
++}
++
++bool LoongArchAsmParser::searchSymbolAlias(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier());
++  if (!Sym)
++    return false;
++
++  SMLoc S = Parser.getTok().getLoc();
++  if (Sym->isVariable()) {
++    const MCExpr *Expr = Sym->getVariableValue();
++    if (Expr->getKind() == MCExpr::SymbolRef) {
++      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
++      StringRef DefSymbol = Ref->getSymbol().getName();
++      if (DefSymbol.startswith("$")) {
++        OperandMatchResultTy ResTy =
++            matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
++        if (ResTy == MatchOperand_Success) {
++          Parser.Lex();
++          return true;
++        }
++        if (ResTy == MatchOperand_ParseFail)
++          llvm_unreachable("Should never ParseFail");
++      }
++    }
++  } else if (Sym->isUnset()) {
++    // If symbol is unset, it might be created in the `parseSetAssignment`
++    // routine as an alias for a numeric register name.
++    // Lookup in the aliases list.
++    auto Entry = RegisterSets.find(Sym->getName());
++    if (Entry != RegisterSets.end()) {
++      OperandMatchResultTy ResTy =
++          matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S);
++      if (ResTy == MatchOperand_Success) {
++        Parser.Lex();
++        return true;
++      }
++    }
++  }
++
++  return false;
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
++                                                      StringRef Identifier,
++                                                      SMLoc S) {
++  int Index = matchCPURegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createGPRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  Index = matchFPURegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createFGRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  Index = matchFCFRRegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createFCFRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  Index = matchFCSRRegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createFCSRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  Index = matchLSX128RegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createLSX128Reg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  Index = matchLASX256RegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(LoongArchOperand::createLASX256Reg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  return MatchOperand_NoMatch;
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                                  const AsmToken &Token, SMLoc S) {
++  if (Token.is(AsmToken::Identifier)) {
++    LLVM_DEBUG(dbgs() << ".. identifier\n");
++    StringRef Identifier = Token.getIdentifier();
++    OperandMatchResultTy ResTy =
++        matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
++    return ResTy;
++  } else if (Token.is(AsmToken::Integer)) {
++    LLVM_DEBUG(dbgs() << ".. integer\n");
++    int64_t RegNum = Token.getIntVal();
++    if (RegNum < 0 || RegNum > 31) {
++      // Show the error, but treat invalid register
++      // number as a normal one to continue parsing
++      // and catch other possible errors.
++      Error(getLexer().getLoc(), "invalid register number");
++    }
++    Operands.push_back(LoongArchOperand::createNumericReg(
++        RegNum, Token.getString(), getContext().getRegisterInfo(), S,
++        Token.getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  LLVM_DEBUG(dbgs() << Token.getKind() << "\n");
++
++  return MatchOperand_NoMatch;
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
++  auto Token = getLexer().peekTok(false);
++  return matchAnyRegisterWithoutDollar(Operands, Token, S);
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::parseAnyRegister(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseAnyRegister\n");
++
++  auto Token = Parser.getTok();
++
++  SMLoc S = Token.getLoc();
++
++  if (Token.isNot(AsmToken::Dollar)) {
++    LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n");
++    if (Token.is(AsmToken::Identifier)) {
++      if (searchSymbolAlias(Operands))
++        return MatchOperand_Success;
++    }
++    LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n");
++    return MatchOperand_NoMatch;
++  }
++  LLVM_DEBUG(dbgs() << ".. $\n");
++
++  OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
++  if (ResTy == MatchOperand_Success) {
++    Parser.Lex(); // $
++    Parser.Lex(); // identifier
++  }
++  return ResTy;
++}
++
++OperandMatchResultTy
++LoongArchAsmParser::parseJumpTarget(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseJumpTarget\n");
++
++  SMLoc S = getLexer().getLoc();
++
++  // Registers are a valid target and have priority over symbols.
++  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
++  if (ResTy != MatchOperand_NoMatch)
++    return ResTy;
++
++  // Integers and expressions are acceptable
++  const MCExpr *Expr = nullptr;
++  if (Parser.parseExpression(Expr)) {
++    // We have no way of knowing if a symbol was consumed so we must ParseFail
++    return MatchOperand_ParseFail;
++  }
++  Operands.push_back(
++      LoongArchOperand::CreateImm(Expr, S, getLexer().getLoc(), *this));
++  return MatchOperand_Success;
++}
++
++static std::string LoongArchMnemonicSpellCheck(StringRef S,
++                                               const FeatureBitset &FBS,
++                                               unsigned VariantID = 0);
++
++bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
++                                          StringRef Name, SMLoc NameLoc,
++                                          OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "ParseInstruction\n");
++
++  // We have reached first instruction, module directive are now forbidden.
++  getTargetStreamer().forbidModuleDirective();
++
++  // Check if we have valid mnemonic
++  if (!mnemonicIsValid(Name)) {
++    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
++    std::string Suggestion = LoongArchMnemonicSpellCheck(Name, FBS);
++    return Error(NameLoc, "unknown instruction" + Suggestion);
++  }
++
++  // First operand in MCInst is instruction mnemonic.
++  Operands.push_back(LoongArchOperand::CreateToken(Name, NameLoc, *this));
++
++  // Read the remaining operands.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    // Read the first operand.
++    if (parseOperand(Operands, Name)) {
++      SMLoc Loc = getLexer().getLoc();
++      return Error(Loc, "unexpected token in argument list");
++    }
++
++    while (getLexer().is(AsmToken::Comma)) {
++      Parser.Lex(); // Eat the comma.
++      // Parse and remember the operand.
++      if (parseOperand(Operands, Name)) {
++        SMLoc Loc = getLexer().getLoc();
++        return Error(Loc, "unexpected token in argument list");
++      }
++    }
++  }
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    SMLoc Loc = getLexer().getLoc();
++    return Error(Loc, "unexpected token in argument list");
++  }
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++// FIXME: Given that these have the same name, these should both be
++// consistent on affecting the Parser.
++bool LoongArchAsmParser::reportParseError(Twine ErrorMsg) {
++  SMLoc Loc = getLexer().getLoc();
++  return Error(Loc, ErrorMsg);
++}
++
++bool LoongArchAsmParser::parseSetAssignment() {
++  StringRef Name;
++  const MCExpr *Value;
++  MCAsmParser &Parser = getParser();
++
++  if (Parser.parseIdentifier(Name))
++    return reportParseError("expected identifier after .set");
++
++  if (getLexer().isNot(AsmToken::Comma))
++    return reportParseError("unexpected token, expected comma");
++  Lex(); // Eat comma
++
++  if (!Parser.parseExpression(Value)) {
++    // Parse assignment of an expression including
++    // symbolic registers:
++    //   .set  $tmp, $BB0-$BB1
++    //   .set  r2, $f2
++    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
++    Sym->setVariableValue(Value);
++  } else {
++    return reportParseError("expected valid expression after comma");
++  }
++
++  return false;
++}
++
++bool LoongArchAsmParser::parseDirectiveSet() {
++  const AsmToken &Tok = getParser().getTok();
++  StringRef IdVal = Tok.getString();
++  SMLoc Loc = Tok.getLoc();
++
++  if (IdVal == "bopt") {
++    Warning(Loc, "'bopt' feature is unsupported");
++    getParser().Lex();
++    return false;
++  }
++  if (IdVal == "nobopt") {
++    // We're already running in nobopt mode, so nothing to do.
++    getParser().Lex();
++    return false;
++  }
++
++  // It is just an identifier, look for an assignment.
++  return parseSetAssignment();
++}
++
++bool LoongArchAsmParser::ParseDirective(AsmToken DirectiveID) {
++  // This returns false if this function recognizes the directive
++  // regardless of whether it is successfully handles or reports an
++  // error. Otherwise it returns true to give the generic parser a
++  // chance at recognizing it.
++
++  MCAsmParser &Parser = getParser();
++  StringRef IDVal = DirectiveID.getString();
++
++  if (IDVal == ".end") {
++      while (getLexer().isNot(AsmToken::Eof))
++        Parser.Lex();
++    return false;
++  }
++
++  if (IDVal == ".set") {
++    parseDirectiveSet();
++    return false;
++  }
++
++  if (IDVal == ".llvm_internal_loongarch_reallow_module_directive") {
++    parseInternalDirectiveReallowModule();
++    return false;
++  }
++
++  return true;
++}
++
++bool LoongArchAsmParser::parseInternalDirectiveReallowModule() {
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++
++  getTargetStreamer().reallowModuleDirective();
++
++  getParser().Lex(); // Eat EndOfStatement token.
++  return false;
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() {
++  RegisterMCAsmParser<LoongArchAsmParser> X(getTheLoongArch32Target());
++  RegisterMCAsmParser<LoongArchAsmParser> A(getTheLoongArch64Target());
++}
++
++#define GET_REGISTER_MATCHER
++#define GET_MATCHER_IMPLEMENTATION
++#define GET_MNEMONIC_SPELL_CHECKER
++#include "LoongArchGenAsmMatcher.inc"
++
++bool LoongArchAsmParser::mnemonicIsValid(StringRef Mnemonic) {
++  // Find the appropriate table for this asm variant.
++  const MatchEntry *Start, *End;
++  Start = std::begin(MatchTable0);
++  End = std::end(MatchTable0);
++
++  // Search the table.
++  auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode());
++  return MnemonicRange.first != MnemonicRange.second;
++}
+diff --git a/lib/Target/LoongArch/CMakeLists.txt b/lib/Target/LoongArch/CMakeLists.txt
+new file mode 100644
+index 00000000..8540b97f
+--- /dev/null
++++ b/lib/Target/LoongArch/CMakeLists.txt
+@@ -0,0 +1,55 @@
++add_llvm_component_group(LoongArch HAS_JIT)
++
++set(LLVM_TARGET_DEFINITIONS LoongArch.td)
++
++tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher)
++tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer)
++tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv)
++tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel)
++tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler)
++tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info)
++tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter)
++tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering)
++tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info)
++tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget)
++
++add_public_tablegen_target(LoongArchCommonTableGen)
++
++add_llvm_target(LoongArchCodeGen
++  LoongArchAsmPrinter.cpp
++  LoongArchCCState.cpp
++  LoongArchExpandPseudo.cpp
++  LoongArchInstrInfo.cpp
++  LoongArchISelDAGToDAG.cpp
++  LoongArchISelLowering.cpp
++  LoongArchFrameLowering.cpp
++  LoongArchMCInstLower.cpp
++  LoongArchMachineFunction.cpp
++  LoongArchModuleISelDAGToDAG.cpp
++  LoongArchRegisterInfo.cpp
++  LoongArchSubtarget.cpp
++  LoongArchTargetMachine.cpp
++  LoongArchTargetObjectFile.cpp
++  LoongArchTargetTransformInfo.cpp
++
++  LINK_COMPONENTS
++  Analysis
++  AsmPrinter
++  CodeGen
++  Core
++  MC
++  LoongArchDesc
++  LoongArchInfo
++  SelectionDAG
++  Support
++  Target
++  GlobalISel
++
++  ADD_TO_COMPONENT
++  LoongArch
++  )
++
++add_subdirectory(AsmParser)
++add_subdirectory(Disassembler)
++add_subdirectory(MCTargetDesc)
++add_subdirectory(TargetInfo)
+diff --git a/lib/Target/LoongArch/Disassembler/CMakeLists.txt b/lib/Target/LoongArch/Disassembler/CMakeLists.txt
+new file mode 100644
+index 00000000..864be631
+--- /dev/null
++++ b/lib/Target/LoongArch/Disassembler/CMakeLists.txt
+@@ -0,0 +1,11 @@
++add_llvm_component_library(LLVMLoongArchDisassembler
++  LoongArchDisassembler.cpp
++
++  LINK_COMPONENTS
++  MCDisassembler
++  LoongArchInfo
++  Support
++
++  ADD_TO_COMPONENT
++  LoongArch
++  )
+diff --git a/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
+new file mode 100644
+index 00000000..2c92cc71
+--- /dev/null
++++ b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
+@@ -0,0 +1,917 @@
++//===- LoongArchDisassembler.cpp - Disassembler for LoongArch -----------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file is part of the LoongArch Disassembler.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "LoongArch.h"
++#include "llvm/ADT/ArrayRef.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDisassembler/MCDisassembler.h"
++#include "llvm/MC/MCFixedLenDisassembler.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cassert>
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-disassembler"
++
++using DecodeStatus = MCDisassembler::DecodeStatus;
++
++namespace {
++
++class LoongArchDisassembler : public MCDisassembler {
++
++public:
++  LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
++      : MCDisassembler(STI, Ctx) {}
++
++  bool is64Bit() const { return STI.getFeatureBits()[LoongArch::Feature64Bit]; }
++
++  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
++                              ArrayRef<uint8_t> Bytes, uint64_t Address,
++                              raw_ostream &CStream) const override;
++};
++
++} // end anonymous namespace
++
++// Forward declare these because the autogenerated code will reference them.
++// Definitions are further down.
++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder);
++
++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder);
++
++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
++                                           unsigned Insn,
++                                           uint64_t Address,
++                                           const void *Decoder);
++
++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder);
++
++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder);
++
++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst,
++                                            unsigned RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder);
++
++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst,
++                                            unsigned RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder);
++
++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder);
++
++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder);
++
++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder);
++
++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder);
++
++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder);
++
++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder);
++
++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder);
++
++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder);
++
++static DecodeStatus DecodeBranchTarget(MCInst &Inst,
++                                       unsigned Offset,
++                                       uint64_t Address,
++                                       const void *Decoder);
++
++static DecodeStatus DecodeJumpTarget(MCInst &Inst,
++                                     unsigned Insn,
++                                     uint64_t Address,
++                                     const void *Decoder);
++
++static DecodeStatus DecodeMem(MCInst &Inst,
++                              unsigned Insn,
++                              uint64_t Address,
++                              const void *Decoder);
++
++static DecodeStatus DecodeMemSimm14(MCInst &Inst,
++                                    unsigned Insn,
++                                    uint64_t Address,
++                                    const void *Decoder);
++
++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn,
++                                    uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn,
++                                      uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn,
++                                      uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn,
++                                        uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn,
++                                        uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn,
++                                     uint64_t Address, const void *Decoder);
++
++static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
++                               uint64_t Address,
++                               const void *Decoder);
++
++template <unsigned Bits, int Offset, int Scale>
++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
++                                                 uint64_t Address,
++                                                 const void *Decoder);
++
++template <unsigned Bits, int Offset>
++static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
++                                         uint64_t Address,
++                                         const void *Decoder) {
++  return DecodeUImmWithOffsetAndScale<Bits, Offset, 1>(Inst, Value, Address,
++                                                       Decoder);
++}
++
++template <unsigned Bits, int Offset = 0, int ScaleBy = 1>
++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
++                                                 uint64_t Address,
++                                                 const void *Decoder);
++
++/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
++/// handle.
++template <typename InsnType>
++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
++                                   const void *Decoder);
++
++namespace llvm {
++
++Target &getTheLoongArch32Target();
++Target &getTheLoongArch64Target();
++
++} // end namespace llvm
++
++static MCDisassembler *createLoongArchDisassembler(
++                       const Target &T,
++                       const MCSubtargetInfo &STI,
++                       MCContext &Ctx) {
++  return new LoongArchDisassembler(STI, Ctx);
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() {
++  // Register the disassembler.
++  TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(),
++                                         createLoongArchDisassembler);
++  TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(),
++                                         createLoongArchDisassembler);
++}
++
++#include "LoongArchGenDisassemblerTables.inc"
++
++static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
++  const LoongArchDisassembler *Dis = static_cast<const LoongArchDisassembler*>(D);
++  const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
++  if (RC == LoongArch::GPR64RegClassID || RC == LoongArch::GPR32RegClassID) {
++    // sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td
++    // that just like LoongArchAsmParser.cpp and LoongArchISelLowering.cpp
++    unsigned char indexes[] = { 0, 27, 28, 29, 1, 2, 3, 4,
++                                5, 6, 7, 8, 9, 10, 11, 12,
++                                13, 14, 15, 16, 17, 30, 31, 18,
++                                19, 20, 21, 22, 23, 24, 25, 26
++                              };
++    assert(RegNo < sizeof(indexes));
++    return *(RegInfo->getRegClass(RC).begin() + indexes[RegNo]);
++  }
++  return *(RegInfo->getRegClass(RC).begin() + RegNo);
++}
++
++template <typename InsnType>
++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
++                                   const void *Decoder) {
++  using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *);
++
++  // The size of the n field depends on the element size
++  // The register class also depends on this.
++  InsnType tmp = fieldFromInstruction(insn, 17, 5);
++  unsigned NSize = 0;
++  DecodeFN RegDecoder = nullptr;
++  if ((tmp & 0x18) == 0x00) {
++    NSize = 4;
++    RegDecoder = DecodeLSX128BRegisterClass;
++  } else if ((tmp & 0x1c) == 0x10) {
++    NSize = 3;
++    RegDecoder = DecodeLSX128HRegisterClass;
++  } else if ((tmp & 0x1e) == 0x18) {
++    NSize = 2;
++    RegDecoder = DecodeLSX128WRegisterClass;
++  } else if ((tmp & 0x1f) == 0x1c) {
++    NSize = 1;
++    RegDecoder = DecodeLSX128DRegisterClass;
++  } else
++    llvm_unreachable("Invalid encoding");
++
++  assert(NSize != 0 && RegDecoder != nullptr);
++
++  // $vd
++  tmp = fieldFromInstruction(insn, 6, 5);
++  if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail)
++    return MCDisassembler::Fail;
++  // $vd_in
++  if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail)
++    return MCDisassembler::Fail;
++  // $n
++  tmp = fieldFromInstruction(insn, 16, NSize);
++  MI.addOperand(MCOperand::createImm(tmp));
++  // $vs
++  tmp = fieldFromInstruction(insn, 11, 5);
++  if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail)
++    return MCDisassembler::Fail;
++  // $n2
++  MI.addOperand(MCOperand::createImm(0));
++
++  return MCDisassembler::Success;
++}
++
++/// Read four bytes from the ArrayRef and return 32 bit word.
++static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
++                                      uint64_t &Size, uint32_t &Insn) {
++  // We want to read exactly 4 Bytes of data.
++  if (Bytes.size() < 4) {
++    Size = 0;
++    return MCDisassembler::Fail;
++  }
++
++  Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
++         (Bytes[3] << 24);
++
++  return MCDisassembler::Success;
++}
++
++DecodeStatus LoongArchDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
++                                                   ArrayRef<uint8_t> Bytes,
++                                                   uint64_t Address,
++                                                   raw_ostream &CStream) const {
++  uint32_t Insn;
++  DecodeStatus Result;
++  Size = 0;
++
++  // Attempt to read the instruction so that we can attempt to decode it. If
++  // the buffer is not 4 bytes long, let the higher level logic figure out
++  // what to do with a size of zero and MCDisassembler::Fail.
++  Result = readInstruction32(Bytes, Address, Size, Insn);
++  if (Result == MCDisassembler::Fail)
++    return MCDisassembler::Fail;
++
++  // The only instruction size for standard encoded LoongArch.
++  Size = 4;
++
++  if (is64Bit()) {
++    LLVM_DEBUG(dbgs() << "Trying LoongArch (GPR64) table (32-bit opcodes):\n");
++    Result = decodeInstruction(DecoderTableLoongArch32, Instr, Insn,
++                               Address, this, STI);
++    if (Result != MCDisassembler::Fail)
++      return Result;
++  }
++
++  LLVM_DEBUG(dbgs() << "Trying LoongArch32 (GPR32) table (32-bit opcodes):\n");
++  Result = decodeInstruction(DecoderTableLoongArch3232, Instr, Insn,
++                             Address, this, STI);
++  if (Result != MCDisassembler::Fail)
++    return Result;
++
++  return MCDisassembler::Fail;
++}
++
++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++
++  unsigned Reg = getReg(Decoder, LoongArch::GPR64RegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::GPR32RegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
++                                           unsigned RegNo,
++                                           uint64_t Address,
++                                           const void *Decoder) {
++  if (static_cast<const LoongArchDisassembler *>(Decoder)->is64Bit())
++    return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
++
++  return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
++}
++
++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::FGR64RegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
++                                             unsigned RegNo,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++
++  unsigned Reg = getReg(Decoder, LoongArch::FGR32RegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst,
++                                            unsigned RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++
++  unsigned Reg = getReg(Decoder, LoongArch::FCSRRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst,
++                                            unsigned RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 7)
++    return MCDisassembler::Fail;
++
++  unsigned Reg = getReg(Decoder, LoongArch::FCFRRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeMem(MCInst &Inst,
++                              unsigned Insn,
++                              uint64_t Address,
++                              const void *Decoder) {
++  int Offset = SignExtend32<12>((Insn >> 10) & 0xfff);
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++
++  Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++  if (Inst.getOpcode() == LoongArch::SC_W ||
++      Inst.getOpcode() == LoongArch::SC_D)
++    Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeMemSimm14(MCInst &Inst,
++                                    unsigned Insn,
++                                    uint64_t Address,
++                                    const void *Decoder) {
++  int Offset = SignExtend32<12>((Insn >> 10) & 0x3fff);
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++
++  Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++  if (Inst.getOpcode() == LoongArch::SC_W ||
++      Inst.getOpcode() == LoongArch::SC_D)
++    Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn,
++                                    uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++  Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn,
++                                      uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn,
++                                      uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++  Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++  unsigned idx;
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++
++  switch (Inst.getOpcode()) {
++  default:
++    assert(false && "Unexpected instruction");
++    return MCDisassembler::Fail;
++    break;
++  case LoongArch::VSTELM_B:
++    Inst.addOperand(MCOperand::createImm(Offset));
++    idx = fieldFromInstruction(Insn, 18, 4);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::VSTELM_H:
++    Inst.addOperand(MCOperand::createImm(Offset * 2));
++    idx = fieldFromInstruction(Insn, 18, 3);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::VSTELM_W:
++    Inst.addOperand(MCOperand::createImm(Offset * 4));
++    idx = fieldFromInstruction(Insn, 18, 2);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::VSTELM_D:
++    Inst.addOperand(MCOperand::createImm(Offset * 8));
++    idx = fieldFromInstruction(Insn, 18, 1);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  }
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn,
++                                       uint64_t Address, const void *Decoder) {
++
++  int Offset;
++  unsigned Reg, Base;
++  switch (Inst.getOpcode()) {
++  default:
++    assert(false && "Unexpected instruction");
++    return MCDisassembler::Fail;
++    break;
++  case LoongArch::VLDREPL_B:
++
++    Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++
++    Inst.addOperand(MCOperand::createImm(Offset));
++    break;
++  case LoongArch::VLDREPL_H:
++
++    Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 2));
++    break;
++  case LoongArch::VLDREPL_W:
++
++    Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 4));
++    break;
++  case LoongArch::VLDREPL_D:
++
++    Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 8));
++    break;
++  }
++
++  return MCDisassembler::Success;
++}
++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn,
++                                     uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++  Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn,
++                                        uint64_t Address, const void *Decoder) {
++  int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8));
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++  Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++  unsigned idx;
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++
++  switch (Inst.getOpcode()) {
++  default:
++    assert(false && "Unexpected instruction");
++    return MCDisassembler::Fail;
++    break;
++  case LoongArch::XVSTELM_B:
++    Inst.addOperand(MCOperand::createImm(Offset));
++    idx = fieldFromInstruction(Insn, 18, 5);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::XVSTELM_H:
++    Inst.addOperand(MCOperand::createImm(Offset * 2));
++    idx = fieldFromInstruction(Insn, 18, 4);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::XVSTELM_W:
++    Inst.addOperand(MCOperand::createImm(Offset * 4));
++    idx = fieldFromInstruction(Insn, 18, 3);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  case LoongArch::XVSTELM_D:
++    Inst.addOperand(MCOperand::createImm(Offset * 8));
++    idx = fieldFromInstruction(Insn, 18, 2);
++    Inst.addOperand(MCOperand::createImm(idx));
++    break;
++  }
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn,
++                                        uint64_t Address, const void *Decoder) {
++
++  int Offset;
++  unsigned Reg, Base;
++  switch (Inst.getOpcode()) {
++  default:
++    assert(false && "Unexpected instruction");
++    return MCDisassembler::Fail;
++    break;
++  case LoongArch::XVLDREPL_B:
++
++    Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++
++    Inst.addOperand(MCOperand::createImm(Offset));
++    break;
++  case LoongArch::XVLDREPL_H:
++
++    Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 2));
++    break;
++  case LoongArch::XVLDREPL_W:
++
++    Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 4));
++    break;
++  case LoongArch::XVLDREPL_D:
++
++    Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9));
++    Reg = fieldFromInstruction(Insn, 0, 5);
++    Base = fieldFromInstruction(Insn, 5, 5);
++    Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg);
++    Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++    Inst.addOperand(MCOperand::createReg(Reg));
++    Inst.addOperand(MCOperand::createReg(Base));
++    Inst.addOperand(MCOperand::createImm(Offset * 8));
++    break;
++  }
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFMem(MCInst &Inst,
++                               unsigned Insn,
++                               uint64_t Address,
++                               const void *Decoder) {
++  int Offset = SignExtend32<12>((Insn >> 10) & 0xffff);
++  unsigned Reg = fieldFromInstruction(Insn, 0, 5);
++  unsigned Base = fieldFromInstruction(Insn, 5, 5);
++  Reg = getReg(Decoder, LoongArch::FGR64RegClassID, Reg);
++  Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base);
++
++  Inst.addOperand(MCOperand::createReg(Reg));
++  Inst.addOperand(MCOperand::createReg(Base));
++  Inst.addOperand(MCOperand::createImm(Offset));
++
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo,
++                                               uint64_t Address,
++                                               const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LSX128DRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo,
++                                                uint64_t Address,
++                                                const void *Decoder) {
++  if (RegNo > 31)
++    return MCDisassembler::Fail;
++  unsigned Reg = getReg(Decoder, LoongArch::LASX256DRegClassID, RegNo);
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeBranchTarget(MCInst &Inst,
++                                       unsigned Offset,
++                                       uint64_t Address,
++                                       const void *Decoder) {
++  int32_t BranchOffset;
++  // Similar to LoongArchAsmParser::processInstruction, decode the branch target
++  // for different instructions.
++  switch (Inst.getOpcode()) {
++  default:
++    llvm_unreachable("");
++  case LoongArch::BEQ:
++  case LoongArch::BNE:
++  case LoongArch::BLT:
++  case LoongArch::BGE:
++  case LoongArch::BLTU:
++  case LoongArch::BGEU:
++    BranchOffset = (SignExtend32<16>(Offset) * 4);
++    break;
++  case LoongArch::BEQZ:
++  case LoongArch::BNEZ:
++  case LoongArch::BCEQZ:
++  case LoongArch::BCNEZ:
++    BranchOffset = (SignExtend32<21>(Offset) * 4);
++    break;
++  case LoongArch::B:
++  case LoongArch::BL:
++    BranchOffset = (SignExtend32<26>(Offset) * 4);
++    break;
++  }
++  Inst.addOperand(MCOperand::createImm(BranchOffset));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeJumpTarget(MCInst &Inst,
++                                     unsigned Insn,
++                                     uint64_t Address,
++                                     const void *Decoder) {
++  unsigned hi10 = fieldFromInstruction(Insn, 0, 10);
++  unsigned lo16 = fieldFromInstruction(Insn, 10, 16);
++  int32_t JumpOffset = SignExtend32<28>((hi10 << 16 | lo16) << 2);
++  Inst.addOperand(MCOperand::createImm(JumpOffset));
++  return MCDisassembler::Success;
++}
++
++template <unsigned Bits, int Offset, int Scale>
++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
++                                                 uint64_t Address,
++                                                 const void *Decoder) {
++  Value &= ((1 << Bits) - 1);
++  Value *= Scale;
++  Inst.addOperand(MCOperand::createImm(Value + Offset));
++  return MCDisassembler::Success;
++}
++
++template <unsigned Bits, int Offset, int ScaleBy>
++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
++                                                 uint64_t Address,
++                                                 const void *Decoder) {
++  int32_t Imm = SignExtend32<Bits>(Value) * ScaleBy;
++  Inst.addOperand(MCOperand::createImm(Imm + Offset));
++  return MCDisassembler::Success;
++}
+diff --git a/lib/Target/LoongArch/LoongArch.h b/lib/Target/LoongArch/LoongArch.h
+new file mode 100644
+index 00000000..73fd4a62
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArch.h
+@@ -0,0 +1,37 @@
++//===-- LoongArch.h - Top-level interface for LoongArch representation ----*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the entry points for global functions defined in
++// the LLVM LoongArch back-end.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
++
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/Target/TargetMachine.h"
++
++namespace llvm {
++  class LoongArchTargetMachine;
++  class ModulePass;
++  class FunctionPass;
++  class LoongArchSubtarget;
++  class LoongArchTargetMachine;
++  class InstructionSelector;
++  class PassRegistry;
++
++  FunctionPass *createLoongArchModuleISelDagPass();
++  FunctionPass *createLoongArchOptimizePICCallPass();
++  FunctionPass *createLoongArchBranchExpansion();
++  FunctionPass *createLoongArchExpandPseudoPass();
++
++  void initializeLoongArchBranchExpansionPass(PassRegistry &);
++} // end namespace llvm;
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArch.td b/lib/Target/LoongArch/LoongArch.td
+new file mode 100644
+index 00000000..8fab224b
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArch.td
+@@ -0,0 +1,104 @@
++//===-- LoongArch.td - Describe the LoongArch Target Machine ---------*- tablegen -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++// This is the top level entry point for the LoongArch target.
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// Target-independent interfaces
++//===----------------------------------------------------------------------===//
++
++include "llvm/Target/Target.td"
++
++// The overall idea of the PredicateControl class is to chop the Predicates list
++// into subsets that are usually overridden independently. This allows
++// subclasses to partially override the predicates of their superclasses without
++// having to re-add all the existing predicates.
++class PredicateControl {
++  // Predicates for the encoding scheme in use such as HasStdEnc
++  list<Predicate> EncodingPredicates = [];
++  // Predicates for the GPR size such as is64Bit
++  list<Predicate> GPRPredicates = [];
++  // Predicates for the FGR size and layout such as HasBasicD
++  list<Predicate> FGRPredicates = [];
++  // Predicates for the instruction group membership such as ISA's
++  list<Predicate> InsnPredicates = [];
++  // Predicate for the ISA extension that an instruction belongs to
++  list<Predicate> ExtPredicate = [];
++  // Predicate for marking the instruction as usable in hard-float mode only
++  list<Predicate> HardFloatPredicate = [];
++  // Predicates for anything else
++  list<Predicate> AdditionalPredicates = [];
++  list<Predicate> Predicates = !listconcat(EncodingPredicates,
++                                           GPRPredicates,
++                                           FGRPredicates,
++                                           InsnPredicates,
++                                           HardFloatPredicate,
++                                           ExtPredicate,
++                                           AdditionalPredicates);
++}
++
++// Like Requires<> but for the AdditionalPredicates list
++class AdditionalRequires<list<Predicate> preds> {
++  list<Predicate> AdditionalPredicates = preds;
++}
++
++//===----------------------------------------------------------------------===//
++// LoongArch Subtarget features                                               //
++//===----------------------------------------------------------------------===//
++
++def FeatureLSX : SubtargetFeature<"lsx", "HasLSX", "true", "Support LSX">;
++def Feature64Bit
++    : SubtargetFeature<"64bit", "HasLA64", "true",
++                       "LA64 Basic Integer and Privilege Instruction Set">;
++def FeatureBasicF : SubtargetFeature<"f", "HasBasicF", "true",
++                                     "'F' (Single-Precision Floating-Point)">;
++def FeatureBasicD : SubtargetFeature<"d", "HasBasicD", "true",
++                                     "'D' (Double-Precision Floating-Point)",
++                                     [FeatureBasicF]>;
++
++def FeatureLASX : SubtargetFeature<"lasx", "HasLASX", "true", "Support LASX", [FeatureLSX]>;
++
++def FeatureUnalignedAccess
++    : SubtargetFeature<"unaligned-access", "UnalignedAccess", "true",
++                       "Allow all unaligned memory access">;
++//===----------------------------------------------------------------------===//
++// Register File, Calling Conv, Instruction Descriptions
++//===----------------------------------------------------------------------===//
++
++include "LoongArchRegisterInfo.td"
++include "LoongArchInstrInfo.td"
++include "LoongArchCallingConv.td"
++
++def LoongArchInstrInfo : InstrInfo;
++
++//===----------------------------------------------------------------------===//
++// LoongArch processors supported.
++//===----------------------------------------------------------------------===//
++
++def : ProcessorModel<"generic-la32", NoSchedModel, []>;
++def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
++def : ProcessorModel<"la464", NoSchedModel,
++                     [Feature64Bit, FeatureUnalignedAccess]>;
++
++def LoongArchAsmParser : AsmParser {
++  let ShouldEmitMatchRegisterName = 0;
++}
++
++def LoongArchAsmParserVariant : AsmParserVariant {
++  int Variant = 0;
++
++  // Recognize hard coded registers.
++  string RegisterPrefix = "$";
++}
++
++def LoongArch : Target {
++  let InstructionSet = LoongArchInstrInfo;
++  let AssemblyParsers = [LoongArchAsmParser];
++  let AssemblyParserVariants = [LoongArchAsmParserVariant];
++  let AllowRegisterRenaming = 1;
++}
+diff --git a/lib/Target/LoongArch/LoongArch32InstrInfo.td b/lib/Target/LoongArch/LoongArch32InstrInfo.td
+new file mode 100644
+index 00000000..908307bb
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArch32InstrInfo.td
+@@ -0,0 +1,717 @@
++//===- LoongArch32InstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file describes LoongArch32 instructions.
++//
++//===----------------------------------------------------------------------===//
++
++//===---------------------------------------------------------------------===/
++// Instruction Definitions.
++//===---------------------------------------------------------------------===/
++
++let DecoderNamespace = "LoongArch32" in {
++  ///
++  /// R2
++  ///
++  def CLO_W : Count1<"clo.w", GPR32Opnd, ctlz>, R2I<0b00100>;
++  def CLZ_W : Int_Reg2<"clz.w", GPR32Opnd, ctlz>, R2I<0b00101>;
++  def CTO_W : Count1<"cto.w", GPR32Opnd, cttz>, R2I<0b00110>;
++  def CTZ_W : Int_Reg2<"ctz.w", GPR32Opnd, cttz>, R2I<0b00111>;
++
++  def REVB_2H : Int_Reg2<"revb.2h", GPR32Opnd>, R2I<0b01100>;//see below bswap pattern
++
++  def BITREV_4B : Int_Reg2<"bitrev.4b", GPR32Opnd>, R2I<0b10010>;
++  def BITREV_W  : Int_Reg2<"bitrev.w", GPR32Opnd, bitreverse>, R2I<0b10100>;
++
++  let isCodeGenOnly = 1 in {
++    def EXT_W_H32 : SignExtInReg<"ext.w.h", GPR32Opnd, i16>, R2I<0b10110>;
++    def EXT_W_B32 : SignExtInReg<"ext.w.b", GPR32Opnd, i8>, R2I<0b10111>;
++
++  }
++
++  def CPUCFG    : Int_Reg2<"cpucfg", GPR32Opnd, int_loongarch_cpucfg>, R2I<0b11011>;
++  def RDTIMEL_W32 : Int_Reg2_Rdtime<"rdtimel.w", GPR32Opnd>, R2I<0b11000>;
++  def RDTIMEH_W32 : Int_Reg2_Rdtime<"rdtimeh.w", GPR32Opnd>, R2I<0b11001>;
++
++  ///
++  /// R3
++  ///
++  def ADD_W : Int_Reg3<"add.w", GPR32Opnd, add>, R3I<0b0100000>;
++  def SUB_W : Int_Reg3<"sub.w", GPR32Opnd, sub>, R3I<0b0100010>;
++
++  let isCodeGenOnly = 1 in {
++    def SLT32     : SetCC_R<"slt", GPR32Opnd, setlt>, R3I<0b0100100>;
++    def SLTU32    : SetCC_R<"sltu", GPR32Opnd, setult>, R3I<0b0100101>;
++    def MASKEQZ32 : Int_Reg3<"maskeqz", GPR32Opnd>, R3I<0b0100110>;//see below patterns
++    def MASKNEZ32 : Int_Reg3<"masknez", GPR32Opnd>, R3I<0b0100111>;//see below patterns
++
++    def NOR32   : Nor<"nor", GPR32Opnd>, R3I<0b0101000>;
++    def AND32   : Int_Reg3<"and", GPR32Opnd, and>, R3I<0b0101001>;
++    def OR32    : Int_Reg3<"or", GPR32Opnd, or>, R3I<0b0101010>;
++    def XOR32   : Int_Reg3<"xor", GPR32Opnd, xor>, R3I<0b0101011>;
++    def ANDN32  : Int_Reg3<"andn", GPR32Opnd>, R3I<0b0101101>;
++    def ORN32   : Int_Reg3<"orn", GPR32Opnd>, R3I<0b0101100>;
++  }
++
++  def SLL_W : Shift_Var<"sll.w", GPR32Opnd, shl>, R3I<0b0101110>;
++  def SRL_W : Shift_Var<"srl.w", GPR32Opnd, srl>, R3I<0b0101111>;
++  def SRA_W : Shift_Var<"sra.w", GPR32Opnd, sra>, R3I<0b0110000>;
++  def ROTR_W: Shift_Var<"rotr.w", GPR32Opnd, rotr>, R3I<0b0110110>;
++
++  def MUL_W     : Int_Reg3<"mul.w", GPR32Opnd, mul>, R3I<0b0111000>;
++  def MULH_W    : Int_Reg3<"mulh.w", GPR32Opnd, mulhs>, R3I<0b0111001>;
++  def MULH_WU   : Int_Reg3<"mulh.wu", GPR32Opnd, mulhu>, R3I<0b0111010>;
++
++let usesCustomInserter = 1 in {
++  def DIV_W  : Int_Reg3<"div.w", GPR32Opnd, sdiv>, R3I<0b1000000>;
++  def MOD_W  : Int_Reg3<"mod.w", GPR32Opnd, srem>, R3I<0b1000001>;
++  def DIV_WU : Int_Reg3<"div.wu", GPR32Opnd, udiv>, R3I<0b1000010>;
++  def MOD_WU : Int_Reg3<"mod.wu", GPR32Opnd, urem>, R3I<0b1000011>;
++}
++
++  def CRC_W_B_W  : Int_Reg3<"crc.w.b.w", GPR32Opnd, int_loongarch_crc_w_b_w>, R3I<0b1001000>;
++  def CRC_W_H_W  : Int_Reg3<"crc.w.h.w", GPR32Opnd, int_loongarch_crc_w_h_w>, R3I<0b1001001>;
++  def CRC_W_W_W  : Int_Reg3<"crc.w.w.w", GPR32Opnd, int_loongarch_crc_w_w_w>, R3I<0b1001010>;
++  def CRCC_W_B_W : Int_Reg3<"crcc.w.b.w", GPR32Opnd, int_loongarch_crcc_w_b_w>, R3I<0b1001100>;
++  def CRCC_W_H_W : Int_Reg3<"crcc.w.h.w", GPR32Opnd, int_loongarch_crcc_w_h_w>, R3I<0b1001101>;
++  def CRCC_W_W_W : Int_Reg3<"crcc.w.w.w", GPR32Opnd, int_loongarch_crcc_w_w_w>, R3I<0b1001110>;
++  ///
++  /// SLLI
++  ///
++  def SLLI_W  : Shift_Imm32<"slli.w", GPR32Opnd, shl>, R2_IMM5<0b00>;
++  def SRLI_W  : Shift_Imm32<"srli.w", GPR32Opnd, srl>, R2_IMM5<0b01>;
++  def SRAI_W  : Shift_Imm32<"srai.w", GPR32Opnd, sra>, R2_IMM5<0b10>;
++  def ROTRI_W : Shift_Imm32<"rotri.w", GPR32Opnd, rotr>, R2_IMM5<0b11>;
++  ///
++  /// Misc
++  ///
++  def ALSL_W    : Reg3_Sa<"alsl.w", GPR32Opnd, uimm2_plus1>, R3_SA2<0b00010> {
++    let Pattern = [(set GPR32Opnd:$rd,
++                    (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))];
++  }
++  def BYTEPICK_W : Reg3_Sa<"bytepick.w", GPR32Opnd, uimm2>, R3_SA2<0b00100>;//pattern:[]
++
++  def BREAK   : Code15<"break", int_loongarch_break>, CODE15<0b1010100>;
++  def SYSCALL : Code15<"syscall", int_loongarch_syscall>, CODE15<0b1010110>;
++  def TRAP    : TrapBase<BREAK>;
++
++  def BSTRINS_W  : InsBase_32<"bstrins.w", GPR32Opnd, uimm5, LoongArchBstrins>,
++                   INSERT_BIT32<0>;
++  def BSTRPICK_W : PickBase_32<"bstrpick.w", GPR32Opnd, uimm5, LoongArchBstrpick>,
++                   INSERT_BIT32<1>;
++
++  ///
++  /// R2_IMM12
++  ///
++  let isCodeGenOnly = 1 in {
++    def SLTI32    : SetCC_I<"slti", GPR32Opnd, simm12_32>, R2_IMM12<0b000>; //PatFrag
++    def SLTUI32   : SetCC_I<"sltui", GPR32Opnd, simm12_32>, R2_IMM12<0b001>; //PatFrag
++  }
++  def ADDI_W  : Int_Reg2_Imm12<"addi.w", GPR32Opnd, simm12_32, add>, R2_IMM12<0b010>;
++
++  let isCodeGenOnly = 1 in {
++    def ANDI32 : Int_Reg2_Imm12<"andi", GPR32Opnd, uimm12_32, and>, R2_IMM12<0b101>;
++    def ORI32  : Int_Reg2_Imm12<"ori", GPR32Opnd, uimm12_32, or>, R2_IMM12<0b110>;
++    def XORI32 : Int_Reg2_Imm12<"xori", GPR32Opnd, uimm12_32, xor>, R2_IMM12<0b111>;
++  }
++
++  ///
++  /// Privilege Instructions
++  ///
++  def CSRRD32 : CSR<"csrrd", GPR32Opnd, uimm14_32, int_loongarch_csrrd>, R1_CSR<0b0000000000100>;
++  def CSRWR32 : CSRW<"csrwr", GPR32Opnd, uimm14_32, int_loongarch_csrwr>, R1_CSR<0b0000100000100>;
++  def CSRXCHG32 : CSRX<"csrxchg", GPR32Opnd, uimm14_32, int_loongarch_csrxchg>, R2_CSR<0b00000100>;
++  def IOCSRRD_B32 : Int_Reg2<"iocsrrd.b", GPR32Opnd, int_loongarch_iocsrrd_b>, R2P<0b000>;
++  def IOCSRRD_H32 : Int_Reg2<"iocsrrd.h", GPR32Opnd, int_loongarch_iocsrrd_h>, R2P<0b001>;
++  def IOCSRRD_W32 : Int_Reg2<"iocsrrd.w", GPR32Opnd, int_loongarch_iocsrrd_w>, R2P<0b010>;
++  def IOCSRWR_B32 : Int_Reg2_Iocsrwr<"iocsrwr.b", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_b>, R2P<0b100>;
++  def IOCSRWR_H32 : Int_Reg2_Iocsrwr<"iocsrwr.h", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_h>, R2P<0b101>;
++  def IOCSRWR_W32 : Int_Reg2_Iocsrwr<"iocsrwr.w", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_w>, R2P<0b110>;
++  def CACOP32 : CAC<"cacop", GPR32Opnd, simm12_32, int_loongarch_cacop>, R1_CACHE;
++  def LDDIR32 : LEVEL<"lddir", GPR32Opnd>, R2_LEVEL<0b00000110010000>;
++  def LDPTE32 : SEQ<"ldpte", GPR32Opnd>, R1_SEQ<0b00000110010001>;
++
++  //def WAIT : Wait<"wait">;
++  //
++  //def IOCSRRD_D : R2P<0b011>, Int_Reg2<"iocsrrd.d", GPR32Opnd>;
++  //def IOCSRWR_D : R2P<0b111>, Int_Reg2<"iocsrwr.d", GPR32Opnd>;
++  //
++  //def TLBINV   : IMM32<0b001000>, OP32<"tlbinv">;
++  //def TLBFLUSH : IMM32<0b001001>, OP32<"tlbflush">;
++  //def TLBP     : IMM32<0b001010>, OP32<"tlbp">;
++  //def TLBR     : IMM32<0b001011>, OP32<"tlbr">;
++  //def TLBWI    : IMM32<0b001100>, OP32<"tlbwi">;
++  //def TLBWR    : IMM32<0b001101>, OP32<"tlbwr">;
++
++  ///
++  /// R1_IMM20
++  ///
++  let isCodeGenOnly = 1 in {
++    def LU12I_W32   : SI20<"lu12i.w", GPR32Opnd, simm20_32>, R1_SI20<0b0001010>;
++    def PCADDI32    : SI20<"pcaddi", GPR32Opnd, simm20_32>, R1_SI20<0b0001100>;
++    def PCALAU12I32 : SI20<"pcalau12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001101>;
++    def PCADDU12I32 : SI20<"pcaddu12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001110>;
++  }
++
++  let isCodeGenOnly = 1 in {
++    def BEQZ32  : Beqz<"beqz", brtarget, seteq, GPR32Opnd>, R1_IMM21BEQZ<0b010000>;
++    def BNEZ32  : Beqz<"bnez", brtarget, setne, GPR32Opnd>, R1_IMM21BEQZ<0b010001>;
++
++    def JIRL32  : FJirl<"jirl", calltarget, GPR32Opnd>, R2_IMM16JIRL;
++
++    def B32     : JumpFB<jmptarget, "b", br, bb>, IMM26B<0b010100>;
++
++    def BEQ32   : Beq<"beq", brtarget, seteq, GPR32Opnd>, R2_IMM16BEQ<0b010110>;
++    def BNE32   : Beq<"bne", brtarget, setne, GPR32Opnd>, R2_IMM16BEQ<0b010111>;
++    def BLT32   : Beq<"blt", brtarget, setlt, GPR32Opnd>, R2_IMM16BEQ<0b011000>;
++    def BGE32   : Beq<"bge", brtarget, setge, GPR32Opnd>, R2_IMM16BEQ<0b011001>;
++    def BLTU32  : Beq<"bltu", brtarget, setult, GPR32Opnd>, R2_IMM16BEQ<0b011010>;
++    def BGEU32  : Beq<"bgeu", brtarget, setuge, GPR32Opnd>, R2_IMM16BEQ<0b011011>;
++  }
++
++  ///
++  /// Mem access
++  ///
++  def LL_W : LLBase<"ll.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b000>;
++  def SC_W : SCBase<"sc.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b001>;
++
++  def PRELD_Raw32  : Preld_Raw<"preld", GPR32Opnd>, PRELD_FM;
++
++  let isCodeGenOnly = 1 in {
++    def LD_B32  : Ld<"ld.b", GPR32Opnd, mem_simmptr, sextloadi8>, LOAD_STORE<0b0000>;
++    def LD_H32  : Ld<"ld.h", GPR32Opnd, mem_simmptr, sextloadi16, addrDefault>, LOAD_STORE<0b0001>;
++    def LD_W32  : Ld<"ld.w", GPR32Opnd, mem, load, addrDefault>, LOAD_STORE<0b0010>;
++    def ST_B32  : St<"st.b", GPR32Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>;
++    def ST_H32  : St<"st.h", GPR32Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>;
++    def ST_W32  : St<"st.w", GPR32Opnd, mem, store>, LOAD_STORE<0b0110>;
++    def LD_BU32 : Ld<"ld.bu", GPR32Opnd, mem_simmptr, zextloadi8, addrDefault>, LOAD_STORE<0b1000>;
++    def LD_HU32 : Ld<"ld.hu", GPR32Opnd, mem_simmptr, zextloadi16>, LOAD_STORE<0b1001>;
++
++    def PRELD32  : Preld<"preld", mem, GPR32Opnd>, PRELD_FM;
++
++    def LDPTR_W32 : LdPtr<"ldptr.w", GPR32Opnd>, LL_SC<0b100>;
++    def STPTR_W32 : StPtr<"stptr.w", GPR32Opnd>, LL_SC<0b101>;
++  }
++
++  def IBAR : Bar<"ibar", int_loongarch_ibar>, BAR_FM<1>;
++  def DBAR : Bar<"dbar", int_loongarch_dbar>, BAR_FM<0>;
++
++  def LONG_BRANCH_ADDIW : LoongArchPseudo<(outs GPR32Opnd:$dst),
++      (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
++
++  def LONG_BRANCH_ADDIW2Op : LoongArchPseudo<(outs GPR32Opnd:$dst),
++      (ins GPR32Opnd:$src, brtarget:$tgt), []>;
++
++  def PseudoReturn : PseudoReturnBase<GPR32Opnd>;
++
++  let isCodeGenOnly = 1 in {
++    def LDX_W32 : LDX_FT_LA<"ldx.w", GPR32Opnd, load>,
++                  R3MI<0b00010000>;
++    def LDX_HU32 : LDX_FT_LA<"ldx.hu", GPR32Opnd, extloadi16>,
++                   R3MI<0b01001000>;
++    def LDX_BU32 : LDX_FT_LA<"ldx.bu", GPR32Opnd, extloadi8>,
++                   R3MI<0b01000000>;
++    def STX_W32 : STX_FT_LA<"stx.w", GPR32Opnd, store>,
++                  R3MI<0b00110000>;
++    def LDX_H32 : LDX_FT_LA<"ldx.h", GPR32Opnd, sextloadi16>,
++                  R3MI<0b00001000>;
++    def LDX_B32 : LDX_FT_LA<"ldx.b", GPR32Opnd, sextloadi8>,
++                  R3MI<0b00000000>;
++    def STX_B32 : STX_FT_LA<"stx.b", GPR32Opnd, truncstorei8>,
++                  R3MI<0b00100000>;
++    def STX_H32 : STX_FT_LA<"stx.h", GPR32Opnd, truncstorei16>,
++                  R3MI<0b00101000>;
++  }
++}
++
++def LEA_ADDI_W: EffectiveAddress<"addi.w", GPR32Opnd>, LEA_ADDI_FM<0b010>;
++
++def : LoongArchPat<(LoongArchAddress (i32 tglobaladdr:$in)),
++                   (ADDI_W (PCADDU12I32 tglobaladdr:$in) ,0)>,GPR_32;
++def : LoongArchPat<(LoongArchAddress (i32 tblockaddress:$in)),
++                   (ADDI_W (PCADDU12I32 tblockaddress:$in),0)>, GPR_32;
++def : LoongArchPat<(LoongArchAddress (i32 tjumptable:$in)),
++                   (ADDI_W (PCADDU12I32 tjumptable:$in),0)>, GPR_32;
++def : LoongArchPat<(LoongArchAddress (i32 texternalsym:$in)),
++                   (ADDI_W (PCADDU12I32 texternalsym:$in),0)>, GPR_32;
++
++//===----------------------------------------------------------------------===//
++//  Arbitrary patterns that map to one or more instructions
++//===----------------------------------------------------------------------===//
++
++let isCodeGenOnly = 1 in {
++  def REVB_2W_32 : Int_Reg2<"revb.2w", GPR32Opnd>, R2I<0b01110>;
++  def REVH_2W_32 : Int_Reg2<"revh.2w", GPR32Opnd>, R2I<0b10000>;
++}
++
++// bswap pattern
++def : LoongArchPat<(bswap GPR32:$rj), (ROTRI_W (REVB_2H GPR32:$rj), 16)>;
++//def : LoongArchPat<(bswap GPR32:$rj), (REVB_2W_32 GPR32:$rj)>;
++//def : LoongArchPat<(bswap GPR32:$rj), (REVH_2W_32 (REVB_2H GPR32:$rj))>;
++
++// i32 selects
++multiclass SelectInt_Pats<ValueType RC, Instruction OROp, Instruction XORiOp,
++                          Instruction SLTiOp, Instruction SLTiuOp,
++                          Instruction MASKNEZOp, Instruction MASKEQZOp,
++                          SDPatternOperator imm_type, ValueType Opg> {
++
++// reg, immz
++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f),
++                   (OROp (MASKNEZOp RC:$t, RC:$cond), (MASKEQZOp RC:$f, RC:$cond))>;
++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f),
++                   (OROp (MASKEQZOp RC:$t, RC:$cond), (MASKNEZOp RC:$f, RC:$cond))>;
++
++//def : LoongArchPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
++//                   (OROp (MASKNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
++//                         (MASKEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
++//def : LoongArchPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
++//                   (OROp (MASKEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
++//                         (MASKNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
++
++// reg, immSExt12Plus1
++//def : LoongArchPat<(select (Opg (setgt RC:$cond, immSExt12Plus1:$imm)), RC:$t, RC:$f),
++//                   (OROp (MASKNEZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))),
++//                         (MASKEQZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>;
++//def : LoongArchPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
++//                   (OROp (MASKNEZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))),
++//                         (MASKEQZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>;
++
++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz),
++                   (MASKNEZOp RC:$t, RC:$cond)>;
++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz),
++                   (MASKEQZOp RC:$t, RC:$cond)>;
++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f),
++                   (MASKEQZOp RC:$f, RC:$cond)>;
++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f),
++                   (MASKNEZOp RC:$f, RC:$cond)>;
++}
++
++defm : SelectInt_Pats<i32, OR32, XORI32, SLTI32, SLTUI32, MASKNEZ32, MASKEQZ32,
++                      immZExt12, i32>;
++
++def : LoongArchPat<(select i32:$cond, i32:$t, i32:$f),
++                   (OR32 (MASKEQZ32 i32:$t, i32:$cond),
++                         (MASKNEZ32 i32:$f, i32:$cond))>;
++def : LoongArchPat<(select i32:$cond, i32:$t, immz),
++                   (MASKEQZ32 i32:$t, i32:$cond)>;
++def : LoongArchPat<(select i32:$cond, immz, i32:$f),
++                   (MASKNEZ32 i32:$f, i32:$cond)>;
++
++// truncate
++def : LoongArchPat<(i32 (trunc (assertzext_lt_i32 GPR64:$src))),
++                   (EXTRACT_SUBREG GPR64:$src, sub_32)>, GPR_64;
++def : LoongArchPat<(i32 (trunc GPR64:$src)),
++                   (SLLI_W (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, GPR_64;
++
++// Patterns used for matching away redundant sign extensions.
++// LA32 arithmetic instructions sign extend their result implicitly.
++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (ADD_W GPR32:$src, GPR32:$src2), sub_32)>;
++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (SUB_W GPR32:$src, GPR32:$src2), sub_32)>;
++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (MUL_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>;
++
++def : InstAlias<"break", (BREAK 0), 1>;
++def : InstAlias<"break $imm", (BREAK uimm15:$imm), 1>;
++def : LoongArchInstAlias<"move $dst, $src",
++                         (OR32 GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32;
++
++def immSExt12Plus1 : PatLeaf<(imm), [{
++  return isInt<13>(N->getSExtValue()) && isInt<12>(N->getSExtValue() + 1);
++}]>;
++
++def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>;
++
++multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BEQOp1,
++                      Instruction BNEOp, Instruction SLTOp, Instruction SLTUOp,
++                      Instruction SLTIOp, Instruction SLTUIOp,
++                      Register ZEROReg> {
++
++def : LoongArchPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
++      (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
++      (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
++      (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
++      (BEQOp1 (SLTUOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, immSExt12:$rhs)), bb:$dst),
++      (BEQOp1 (SLTIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, immSExt12:$rhs)), bb:$dst),
++      (BEQOp1 (SLTUIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setgt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst),
++      (BEQOp1 (SLTIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setugt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst),
++      (BEQOp1 (SLTUIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
++      (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
++      (BEQOp1 (SLTUOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>;
++def : LoongArchPat<(brcond RC:$cond, bb:$dst),
++      (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
++}
++
++defm : BrcondPats<GPR32, BEQ32, BEQ32, BNE32, SLT32, SLTU32, SLTI32, SLTUI32, ZERO>, GPR_64;
++
++let usesCustomInserter = 1 in {
++  def ATOMIC_LOAD_ADD_I8   : Atomic2Ops<atomic_load_add_8, GPR32>;
++  def ATOMIC_LOAD_ADD_I16  : Atomic2Ops<atomic_load_add_16, GPR32>;
++  def ATOMIC_LOAD_ADD_I32  : Atomic2Ops<atomic_load_add_32, GPR32>;
++  def ATOMIC_LOAD_SUB_I8   : Atomic2Ops<atomic_load_sub_8, GPR32>;
++  def ATOMIC_LOAD_SUB_I16  : Atomic2Ops<atomic_load_sub_16, GPR32>;
++  def ATOMIC_LOAD_SUB_I32  : Atomic2Ops<atomic_load_sub_32, GPR32>;
++  def ATOMIC_LOAD_AND_I8   : Atomic2Ops<atomic_load_and_8, GPR32>;
++  def ATOMIC_LOAD_AND_I16  : Atomic2Ops<atomic_load_and_16, GPR32>;
++  def ATOMIC_LOAD_AND_I32  : Atomic2Ops<atomic_load_and_32, GPR32>;
++  def ATOMIC_LOAD_OR_I8    : Atomic2Ops<atomic_load_or_8, GPR32>;
++  def ATOMIC_LOAD_OR_I16   : Atomic2Ops<atomic_load_or_16, GPR32>;
++  def ATOMIC_LOAD_OR_I32   : Atomic2Ops<atomic_load_or_32, GPR32>;
++  def ATOMIC_LOAD_XOR_I8   : Atomic2Ops<atomic_load_xor_8, GPR32>;
++  def ATOMIC_LOAD_XOR_I16  : Atomic2Ops<atomic_load_xor_16, GPR32>;
++  def ATOMIC_LOAD_XOR_I32  : Atomic2Ops<atomic_load_xor_32, GPR32>;
++  def ATOMIC_LOAD_NAND_I8  : Atomic2Ops<atomic_load_nand_8, GPR32>;
++  def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, GPR32>;
++  def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, GPR32>;
++
++  def ATOMIC_SWAP_I8       : Atomic2Ops<atomic_swap_8, GPR32>;
++  def ATOMIC_SWAP_I16      : Atomic2Ops<atomic_swap_16, GPR32>;
++  def ATOMIC_SWAP_I32      : Atomic2Ops<atomic_swap_32, GPR32>;
++
++  def ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
++  def ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
++  def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
++
++  def ATOMIC_LOAD_MAX_I8   : Atomic2Ops<atomic_load_max_8, GPR32>;
++  def ATOMIC_LOAD_MAX_I16  : Atomic2Ops<atomic_load_max_16, GPR32>;
++  def ATOMIC_LOAD_MAX_I32  : Atomic2Ops<atomic_load_max_32, GPR32>;
++
++  def ATOMIC_LOAD_MIN_I8   : Atomic2Ops<atomic_load_min_8, GPR32>;
++  def ATOMIC_LOAD_MIN_I16  : Atomic2Ops<atomic_load_min_16, GPR32>;
++  def ATOMIC_LOAD_MIN_I32  : Atomic2Ops<atomic_load_min_32, GPR32>;
++
++  def ATOMIC_LOAD_UMAX_I8   : Atomic2Ops<atomic_load_umax_8, GPR32>;
++  def ATOMIC_LOAD_UMAX_I16  : Atomic2Ops<atomic_load_umax_16, GPR32>;
++  def ATOMIC_LOAD_UMAX_I32  : Atomic2Ops<atomic_load_umax_32, GPR32>;
++
++  def ATOMIC_LOAD_UMIN_I8   : Atomic2Ops<atomic_load_umin_8, GPR32>;
++  def ATOMIC_LOAD_UMIN_I16  : Atomic2Ops<atomic_load_umin_16, GPR32>;
++  def ATOMIC_LOAD_UMIN_I32  : Atomic2Ops<atomic_load_umin_32, GPR32>;
++}
++
++def ATOMIC_LOAD_ADD_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_ADD_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_ADD_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++def ATOMIC_LOAD_SUB_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_SUB_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_SUB_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++def ATOMIC_LOAD_AND_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_AND_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_AND_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++def ATOMIC_LOAD_OR_I8_POSTRA    : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_OR_I16_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_OR_I32_POSTRA   : Atomic2OpsPostRA<GPR32>;
++def ATOMIC_LOAD_XOR_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_XOR_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_XOR_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++def ATOMIC_LOAD_NAND_I8_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
++
++def ATOMIC_SWAP_I8_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
++
++def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
++def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
++def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA<GPR32>;
++
++def ATOMIC_LOAD_MAX_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_MAX_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_MAX_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++
++def ATOMIC_LOAD_MIN_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_MIN_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_MIN_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++
++def ATOMIC_LOAD_UMAX_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_UMAX_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_UMAX_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++
++def ATOMIC_LOAD_UMIN_I8_POSTRA   : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_UMIN_I16_POSTRA  : Atomic2OpsSubwordPostRA<GPR32>;
++def ATOMIC_LOAD_UMIN_I32_POSTRA  : Atomic2OpsPostRA<GPR32>;
++
++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B32 addr:$a)>;
++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H32 addr:$a)>;
++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W32 addrimm14lsl2:$a)>;
++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W32 addr:$a)>;
++
++def : LoongArchPat<(atomic_store_8 addr:$a, GPR32:$v),
++      (ST_B32 GPR32:$v, addr:$a)>;
++def : LoongArchPat<(atomic_store_16 addr:$a, GPR32:$v),
++      (ST_H32 GPR32:$v, addr:$a)>;
++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v),
++      (STPTR_W32 GPR32:$v, addrimm14lsl2:$a)>;
++def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v),
++      (ST_W32 GPR32:$v, addr:$a)>;
++
++def : LoongArchPat<(LoongArchDBAR (i32 immz)),
++              (DBAR 0)>;
++
++def : LoongArchPat<(i32 (extloadi1  addr:$src)), (LD_BU32 addr:$src)>;
++def : LoongArchPat<(i32 (extloadi8  addr:$src)), (LD_BU32 addr:$src)>;
++def : LoongArchPat<(i32 (extloadi16 addr:$src)), (LD_HU32 addr:$src)>;
++
++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>;
++
++// Patterns for loads/stores with a reg+imm operand.
++let AddedComplexity = 40 in {
++  def : LoadRegImmPat<LD_B32, i32, sextloadi8>;
++  def : LoadRegImmPat<LD_H32, i32, sextloadi16>;
++  def : LoadRegImmPat<LD_W32, i32, load>;
++  def : LoadRegImmPat<LD_BU32, i32, zextloadi8>;
++  def : LoadRegImmPat<LD_HU32, i32, zextloadi16>;
++  def : StoreRegImmPat<ST_B32, i32, truncstorei8>;
++  def : StoreRegImmPat<ST_H32, i32, truncstorei16>;
++  def : StoreRegImmPat<ST_W32, i32, store>;
++
++  def : LoadRegImm14Lsl2Pat<LDPTR_W32, i32, load>;
++  def : StoreRegImm14Lsl2Pat<STPTR_W32, i32, store>;
++}
++
++let isCall=1, isCTI=1, Defs = [RA] in {
++
++  class JumpLinkRegPseudo<RegisterOperand RO, Instruction JIRLRInst,
++                          Register RetReg, RegisterOperand ResRO = RO>:
++    LoongArchPseudo<(outs), (ins RO:$rj), [(LoongArchJmpLink RO:$rj)]>,
++    PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> {
++    let hasPostISelHook = 1;
++  }
++
++  class JumpLinkReg<string opstr, RegisterOperand RO>:
++    InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj, 0"),
++             [], FrmR, opstr> {
++    let hasPostISelHook = 1;
++  }
++
++}
++
++def JIRLR : JumpLinkReg<"jirl", GPR32Opnd>, R2_IMM16JIRL {
++  let offs16 = 0;
++}
++def JIRLRPseudo : JumpLinkRegPseudo<GPR64Opnd, JIRLR, RA, GPR32Opnd>;
++
++class BrindRegPseudo<RegisterOperand RO, Instruction JIRLRInst,
++                     Register RetReg, RegisterOperand ResRO = RO>:
++  LoongArchPseudo<(outs), (ins RO:$rj), [(brind RO:$rj)]>,
++  PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> {
++  let isTerminator=1;
++  let isBarrier=1;
++  let isBranch = 1;
++  let isIndirectBranch = 1;
++  bit isCTI = 1;
++}
++
++def JIRLRBRIND : BrindRegPseudo<GPR64Opnd, JIRLR, ZERO, GPR32Opnd>;
++
++def : LoongArchPat<(addc GPR32:$src, immSExt12:$imm),
++                   (ADDI_W GPR32:$src, imm:$imm)>;
++
++defm : SeteqPats<GPR32, SLTUI32, XOR32, SLTU32, ZERO>;
++defm : SetlePats<GPR32, XORI32, SLT32, SLTU32>;
++defm : SetgtPats<GPR32, SLT32, SLTU32>;
++defm : SetgePats<GPR32, XORI32, SLT32, SLTU32>;
++defm : SetgeImmPats<GPR32, XORI32, SLTI32, SLTUI32>;
++
++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immZExt12:$imm12))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (XORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (immZExt12:$imm12)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (ADD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immSExt12:$imm12))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (ADDI_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (immSExt12:$imm12)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (sra (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (SRA_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (srl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (SRL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (mul (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (MUL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (uimm12_32:$imm12))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (ORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (uimm12_32:$imm12)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (OR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond),
++                         (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond)), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (shl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (SLL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (srem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (MOD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(atomic_store_32 addr:$a, (i32 (trunc (i64 (assertsext GPR64:$rj))))),
++                   (ST_W32 (EXTRACT_SUBREG GPR64:$rj, sub_32), addr:$a)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (sub (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (SUB_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (udiv (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (DIV_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(i64 (sext (i32 (urem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))),
++                   (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++                   (MOD_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64;
++
++def : LoongArchPat<(brcond (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0)), bb:$offs21),
++                   (BEQZ32 (EXTRACT_SUBREG GPR64:$rj, sub_32), brtarget:$offs21)>;
++
++def : LoongArchPat<(setne (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0),
++                   (SLTU32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>;
++
++def : LoongArchPat<(select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))),
++                   (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond),
++                         (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond))>;
++
++def : LoongArchPat<(select (i32 (setne (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f),
++                   (MASKNEZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>;
++
++def : LoongArchPat<(select (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f),
++                   (MASKEQZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>;
++
++  def : LoongArchPat<(store (i32 (trunc (i64 (assertsext GPR64:$v)))), addr:$a),
++                     (ST_W32 (EXTRACT_SUBREG GPR64:$v, sub_32), addr:$a)>;
++
++
++def : LoongArchPat<(i32 (xor GPR32:$rj, (i32 -1))),
++                   (NOR32 ZERO, GPR32:$rj)>;
++
++def : LoongArchPat<(and GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))),
++                   (ANDN32 GPR32:$rj, GPR32:$rk)>;
++
++def : LoongArchPat<
++                   (i64
++                     (sext
++                       (i32 (and (i32 (trunc (i64 (assertsext GPR64:$rj)))),
++                                 (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))),
++                                           (i32 -1))))
++                       )
++                      )
++                     ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32),
++                             (EXTRACT_SUBREG GPR64:$rk, sub_32)),
++                      sub_32
++                  )>;
++
++def : LoongArchPat<
++                  (i64
++                    (sext
++                      (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))),
++                               (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))),
++                                         (i32 -1))))
++                      )
++                    )
++                  ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32),
++                            (EXTRACT_SUBREG GPR64:$rk, sub_32)),
++                    sub_32
++                  )>;
++
++def : LoongArchPat<(i64
++                     (sext
++                       (i32 (xor (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))),
++                                          (i32 (trunc (i64 (assertsext GPR64:$rk)))))),
++                                 (i32 -1))
++                       )
++                      )
++                    ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32),
++                            (EXTRACT_SUBREG GPR64:$rk, sub_32)),
++                    sub_32
++                   )>;
++
++def : LoongArchPat<(i64
++                     (sext
++                       (i32 (xor (i32 (trunc (i64 (or (i64 (assertsext GPR64:$rj)),
++                                                      (i64 (assertsext GPR64:$rk)))))),
++                                 (i32 -1))
++                       )
++                      )
++                    ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (NOR32 (EXTRACT_SUBREG GPR64:$rk, sub_32),
++                            (EXTRACT_SUBREG GPR64:$rj, sub_32)),
++                    sub_32
++                   )>;
++
++def : LoongArchPat<(i64
++                      (sext
++                        (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))),
++                                  (i32 -1))
++                        )
++                      )
++                    ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32)),
++                   sub_32
++                  )>;
++
++def : LoongArchPat<(i64
++                     (zext
++                       (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))),
++                                   (i32 0))
++                       )
++                      )
++                    ),
++                   (INSERT_SUBREG
++                     (i64 (IMPLICIT_DEF)),
++                     (SLTUI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (i32 1)),
++                    sub_32
++                  )>;
+diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+new file mode 100644
+index 00000000..f84b5bda
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+@@ -0,0 +1,601 @@
++//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer --------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains a printer that converts from our internal representation
++// of machine-dependent LLVM code to GAS-format LoongArch assembly language.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchAsmPrinter.h"
++#include "MCTargetDesc/LoongArchInstPrinter.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "LoongArch.h"
++#include "LoongArchMCInstLower.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "LoongArchTargetStreamer.h"
++#include "llvm/ADT/SmallString.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/ADT/Twine.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineConstantPool.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/Attributes.h"
++#include "llvm/IR/BasicBlock.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/Function.h"
++#include "llvm/IR/InlineAsm.h"
++#include "llvm/IR/Instructions.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstBuilder.h"
++#include "llvm/MC/MCObjectFileInfo.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetMachine.h"
++#include <cassert>
++#include <cstdint>
++#include <map>
++#include <memory>
++#include <string>
++#include <vector>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-asm-printer"
++
++LoongArchTargetStreamer &LoongArchAsmPrinter::getTargetStreamer() const {
++  return static_cast<LoongArchTargetStreamer &>(*OutStreamer->getTargetStreamer());
++}
++
++bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
++  Subtarget = &MF.getSubtarget<LoongArchSubtarget>();
++
++  LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++  MCP = MF.getConstantPool();
++
++  AsmPrinter::runOnMachineFunction(MF);
++
++  emitXRayTable();
++
++  return true;
++}
++
++bool LoongArchAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
++  MCOp = MCInstLowering.LowerOperand(MO);
++  return MCOp.isValid();
++}
++
++#include "LoongArchGenMCPseudoLowering.inc"
++
++// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to
++// JIRL as appropriate for the target.
++void LoongArchAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
++                                              const MachineInstr *MI) {
++  bool HasLinkReg = false;
++  MCInst TmpInst0;
++  TmpInst0.setOpcode(LoongArch::JIRL);
++  HasLinkReg = true;
++
++  MCOperand MCOp;
++
++  if (HasLinkReg) {
++    unsigned ZeroReg = Subtarget->is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO;
++    TmpInst0.addOperand(MCOperand::createReg(ZeroReg));
++  }
++
++  lowerOperand(MI->getOperand(0), MCOp);
++  TmpInst0.addOperand(MCOp);
++
++  TmpInst0.addOperand(MCOperand::createImm(0));
++
++  EmitToStreamer(OutStreamer, TmpInst0);
++}
++
++void LoongArchAsmPrinter::emitPseudoTailBranch(MCStreamer &OutStreamer,
++                                               const MachineInstr *MI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(LoongArch::B);
++
++  MCOperand MCOp;
++
++  lowerOperand(MI->getOperand(0), MCOp);
++  TmpInst.addOperand(MCOp);
++
++  EmitToStreamer(OutStreamer, TmpInst);
++}
++
++void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) {
++  LoongArchTargetStreamer &TS = getTargetStreamer();
++  unsigned Opc = MI->getOpcode();
++  TS.forbidModuleDirective();
++
++  if (MI->isDebugValue()) {
++    SmallString<128> Str;
++    raw_svector_ostream OS(Str);
++
++    PrintDebugValueComment(MI, OS);
++    return;
++  }
++  if (MI->isDebugLabel())
++    return;
++  // If we just ended a constant pool, mark it as such.
++  OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
++  InConstantPool = false;
++
++  switch (Opc) {
++  case LoongArch::PATCHABLE_FUNCTION_ENTER:
++    LowerPATCHABLE_FUNCTION_ENTER(*MI);
++    return;
++  case LoongArch::PATCHABLE_FUNCTION_EXIT:
++    LowerPATCHABLE_FUNCTION_EXIT(*MI);
++    return;
++  case LoongArch::PATCHABLE_TAIL_CALL:
++    LowerPATCHABLE_TAIL_CALL(*MI);
++    return;
++  }
++  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
++  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
++
++  do {
++    // Do any auto-generated pseudo lowerings.
++    if (emitPseudoExpansionLowering(*OutStreamer, &*I))
++      continue;
++    if (I->getOpcode() == LoongArch::PseudoReturn ||
++        I->getOpcode() == LoongArch::PseudoReturn64){
++      emitPseudoIndirectBranch(*OutStreamer, &*I);
++      continue;
++    }
++    if (I->getOpcode() == LoongArch::PseudoTailReturn){
++      emitPseudoTailBranch(*OutStreamer, &*I);
++      continue;
++    }
++
++    // Some instructions are marked as pseudo right now which
++    // would make the test fail for the wrong reason but
++    // that will be fixed soon. We need this here because we are
++    // removing another test for this situation downstream in the
++    // callchain.
++    //
++    if (I->isPseudo()
++        && !isLongBranchPseudo(I->getOpcode()))
++      llvm_unreachable("Pseudo opcode found in EmitInstruction()");
++
++    MCInst TmpInst0;
++    MCInstLowering.Lower(&*I, TmpInst0);
++    EmitToStreamer(*OutStreamer, TmpInst0);
++  } while ((++I != E) && I->isInsideBundle());
++}
++
++//===----------------------------------------------------------------------===//
++//
++//  LoongArch Asm Directives
++//
++//
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// Set directives
++//===----------------------------------------------------------------------===//
++
++/// Emit Set directives.
++const char *LoongArchAsmPrinter::getCurrentABIString() const {
++  switch (static_cast<LoongArchTargetMachine &>(TM).getABI().GetEnumValue()) {
++  case LoongArchABIInfo::ABI::ILP32D:
++    return "abiilp32d";
++  case LoongArchABIInfo::ABI::ILP32F:
++    return "abiilp32f";
++  case LoongArchABIInfo::ABI::ILP32S:
++    return "abiilp32s";
++  case LoongArchABIInfo::ABI::LP64D:
++    return "abilp64d";
++  case LoongArchABIInfo::ABI::LP64S:
++    return "abilp64s";
++  case LoongArchABIInfo::ABI::LP64F:
++    return "abilp64f";
++  default: llvm_unreachable("Unknown LoongArch ABI");
++  }
++}
++
++void LoongArchAsmPrinter::emitFunctionEntryLabel() {
++
++  OutStreamer->emitLabel(CurrentFnSym);
++
++}
++
++/// EmitFunctionBodyStart - Targets can override this to emit stuff before
++/// the first basic block in the function.
++void LoongArchAsmPrinter::emitFunctionBodyStart() {
++
++  MCInstLowering.Initialize(&MF->getContext());
++}
++
++/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
++/// the last basic block in the function.
++void LoongArchAsmPrinter::emitFunctionBodyEnd() {
++
++  // Make sure to terminate any constant pools that were at the end
++  // of the function.
++  if (!InConstantPool)
++    return;
++  InConstantPool = false;
++  OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
++}
++
++void LoongArchAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
++  AsmPrinter::emitBasicBlockEnd(MBB);
++}
++
++/// isBlockOnlyReachableByFallthough - Return true if the basic block has
++/// exactly one predecessor and the control transfer mechanism between
++/// the predecessor and this block is a fall-through.
++bool LoongArchAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
++                                                       MBB) const {
++  // The predecessor has to be immediately before this block.
++  const MachineBasicBlock *Pred = *MBB->pred_begin();
++
++  // If the predecessor is a switch statement, assume a jump table
++  // implementation, so it is not a fall through.
++  if (const BasicBlock *bb = Pred->getBasicBlock())
++    if (isa<SwitchInst>(bb->getTerminator()))
++      return false;
++
++  // Check default implementation
++  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
++}
++
++// Print out an operand for an inline asm expression.
++bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
++    unsigned OpNum, const char *ExtraCode, raw_ostream &O) {
++  // Does this asm operand have a single letter operand modifier?
++  if (ExtraCode && ExtraCode[0]) {
++    if (ExtraCode[1] != 0) return true; // Unknown modifier.
++
++    const MachineOperand &MO = MI->getOperand(OpNum);
++    switch (ExtraCode[0]) {
++    default:
++      // See if this is a generic print operand
++      return AsmPrinter::PrintAsmOperand(MI,OpNum,ExtraCode,O);
++    case 'X': // hex const int
++      if ((MO.getType()) != MachineOperand::MO_Immediate)
++        return true;
++      O << "0x" << Twine::utohexstr(MO.getImm());
++      return false;
++    case 'x': // hex const int (low 16 bits)
++      if ((MO.getType()) != MachineOperand::MO_Immediate)
++        return true;
++      O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff);
++      return false;
++    case 'd': // decimal const int
++      if ((MO.getType()) != MachineOperand::MO_Immediate)
++        return true;
++      O << MO.getImm();
++      return false;
++    case 'm': // decimal const int minus 1
++      if ((MO.getType()) != MachineOperand::MO_Immediate)
++        return true;
++      O << MO.getImm() - 1;
++      return false;
++    case 'y': // exact log2
++      if ((MO.getType()) != MachineOperand::MO_Immediate)
++        return true;
++      if (!isPowerOf2_64(MO.getImm()))
++        return true;
++      O << Log2_64(MO.getImm());
++      return false;
++    case 'z':
++      // $r0 if zero, regular printing otherwise
++      if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) {
++        O << "$r0";
++        return false;
++      }
++      // If not, call printOperand as normal.
++      break;
++    case 'D': // Second part of a double word register operand
++    case 'L': // Low order register of a double word register operand
++    case 'M': // High order register of a double word register operand
++    {
++      if (OpNum == 0)
++        return true;
++      const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
++      if (!FlagsOP.isImm())
++        return true;
++      unsigned Flags = FlagsOP.getImm();
++      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
++      // Number of registers represented by this operand. We are looking
++      // for 2 for 32 bit mode and 1 for 64 bit mode.
++      if (NumVals != 2) {
++        if (Subtarget->is64Bit() && NumVals == 1 && MO.isReg()) {
++          unsigned Reg = MO.getReg();
++          O << '$' << LoongArchInstPrinter::getRegisterName(Reg);
++          return false;
++        }
++        return true;
++      }
++
++      unsigned RegOp = OpNum;
++      if (!Subtarget->is64Bit()){
++        // Endianness reverses which register holds the high or low value
++        // between M and L.
++        switch(ExtraCode[0]) {
++        case 'M':
++          RegOp = OpNum + 1;
++          break;
++        case 'L':
++          RegOp = OpNum;
++          break;
++        case 'D': // Always the second part
++          RegOp = OpNum + 1;
++        }
++        if (RegOp >= MI->getNumOperands())
++          return true;
++        const MachineOperand &MO = MI->getOperand(RegOp);
++        if (!MO.isReg())
++          return true;
++        unsigned Reg = MO.getReg();
++        O << '$' << LoongArchInstPrinter::getRegisterName(Reg);
++        return false;
++      }
++      break;
++    }
++    case 'w':
++      // Print LSX registers for the 'f' constraint
++      // In LLVM, the 'w' modifier doesn't need to do anything.
++      // We can just call printOperand as normal.
++      break;
++    case 'u':
++      // Print LASX registers for the 'f' constraint
++      // In LLVM, the 'u' modifier doesn't need to do anything.
++      // We can just call printOperand as normal.
++      break;
++    }
++  }
++
++  printOperand(MI, OpNum, O);
++  return false;
++}
++
++bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
++                                                unsigned OpNum,
++                                                const char *ExtraCode,
++                                                raw_ostream &O) {
++  assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
++  const MachineOperand &BaseMO = MI->getOperand(OpNum);
++  const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1);
++  assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand.");
++  assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand.");
++  int Offset = OffsetMO.getImm();
++
++  // Currently we are expecting either no ExtraCode or 'D','M','L'.
++  if (ExtraCode) {
++    switch (ExtraCode[0]) {
++    case 'D':
++    case 'M':
++      Offset += 4;
++      break;
++    case 'L':
++      break;
++    default:
++      return true; // Unknown modifier.
++    }
++  }
++
++  O << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()) << ", " << Offset;
++
++  return false;
++}
++
++void LoongArchAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
++                                  raw_ostream &O) {
++  const MachineOperand &MO = MI->getOperand(opNum);
++
++  switch (MO.getType()) {
++    case MachineOperand::MO_Register:
++      O << '$'
++        << StringRef(LoongArchInstPrinter::getRegisterName(MO.getReg())).lower();
++      break;
++
++    case MachineOperand::MO_Immediate:
++      O << MO.getImm();
++      break;
++
++    case MachineOperand::MO_MachineBasicBlock:
++      MO.getMBB()->getSymbol()->print(O, MAI);
++      return;
++
++    case MachineOperand::MO_GlobalAddress:
++      getSymbol(MO.getGlobal())->print(O, MAI);
++      break;
++
++    case MachineOperand::MO_BlockAddress: {
++      MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress());
++      O << BA->getName();
++      break;
++    }
++
++    case MachineOperand::MO_ConstantPoolIndex:
++      O << getDataLayout().getPrivateGlobalPrefix() << "CPI"
++        << getFunctionNumber() << "_" << MO.getIndex();
++      if (MO.getOffset())
++        O << "+" << MO.getOffset();
++      break;
++
++    default:
++      llvm_unreachable("<unknown operand type>");
++  }
++}
++
++void LoongArchAsmPrinter::
++printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
++  // Load/Store memory operands -- imm($reg)
++  // If PIC target the target is loaded as the
++  // pattern lw $25,%call16($28)
++
++  printOperand(MI, opNum+1, O);
++  O << "(";
++  printOperand(MI, opNum, O);
++  O << ")";
++}
++
++void LoongArchAsmPrinter::
++printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
++  // when using stack locations for not load/store instructions
++  // print the same way as all normal 3 operand instructions.
++  printOperand(MI, opNum, O);
++  O << ", ";
++  printOperand(MI, opNum+1, O);
++}
++
++void LoongArchAsmPrinter::
++printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) {
++  for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) {
++    if (i != opNum) O << ", ";
++    printOperand(MI, i, O);
++  }
++}
++
++void LoongArchAsmPrinter::emitStartOfAsmFile(Module &M) {
++  LoongArchTargetStreamer &TS = getTargetStreamer();
++
++  // LoongArchTargetStreamer has an initialization order problem when emitting an
++  // object file directly (see LoongArchTargetELFStreamer for full details). Work
++  // around it by re-initializing the PIC state here.
++  TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent());
++
++  // Compute LoongArch architecture attributes based on the default subtarget
++  // that we'd have constructed. Module level directives aren't LTO
++  // clean anyhow.
++  // FIXME: For ifunc related functions we could iterate over and look
++  // for a feature string that doesn't match the default one.
++  const Triple &TT = TM.getTargetTriple();
++  StringRef CPU = LoongArch_MC::selectLoongArchCPU(TT, TM.getTargetCPU());
++  StringRef FS = TM.getTargetFeatureString();
++  const LoongArchTargetMachine &MTM = static_cast<const LoongArchTargetMachine &>(TM);
++  const LoongArchSubtarget STI(TT, CPU, FS, MTM, None);
++
++  TS.updateABIInfo(STI);
++}
++
++void LoongArchAsmPrinter::emitInlineAsmStart() const {
++
++  OutStreamer->AddBlankLine();
++}
++
++void LoongArchAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
++                                      const MCSubtargetInfo *EndInfo) const {
++  OutStreamer->AddBlankLine();
++}
++
++void LoongArchAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode,
++                                  unsigned Reg) {
++  MCInst I;
++  I.setOpcode(Opcode);
++  I.addOperand(MCOperand::createReg(Reg));
++  OutStreamer->emitInstruction(I, STI);
++}
++
++void LoongArchAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI,
++                                     unsigned Opcode, unsigned Reg1,
++                                     unsigned Reg2) {
++  MCInst I;
++  //
++  // Because of the current td files for LoongArch32, the operands for MTC1
++  // appear backwards from their normal assembly order. It's not a trivial
++  // change to fix this in the td file so we adjust for it here.
++  //
++  if (Opcode == LoongArch::MOVGR2FR_W) {
++    unsigned Temp = Reg1;
++    Reg1 = Reg2;
++    Reg2 = Temp;
++  }
++  I.setOpcode(Opcode);
++  I.addOperand(MCOperand::createReg(Reg1));
++  I.addOperand(MCOperand::createReg(Reg2));
++  OutStreamer->emitInstruction(I, STI);
++}
++
++void LoongArchAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI,
++                                        unsigned Opcode, unsigned Reg1,
++                                        unsigned Reg2, unsigned Reg3) {
++  MCInst I;
++  I.setOpcode(Opcode);
++  I.addOperand(MCOperand::createReg(Reg1));
++  I.addOperand(MCOperand::createReg(Reg2));
++  I.addOperand(MCOperand::createReg(Reg3));
++  OutStreamer->emitInstruction(I, STI);
++}
++
++void LoongArchAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI,
++                                      unsigned MovOpc, unsigned Reg1,
++                                      unsigned Reg2, unsigned FPReg1,
++                                      unsigned FPReg2, bool LE) {
++  if (!LE) {
++    unsigned temp = Reg1;
++    Reg1 = Reg2;
++    Reg2 = temp;
++  }
++  EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1);
++  EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2);
++}
++
++void LoongArchAsmPrinter::emitEndOfAsmFile(Module &M) {
++  // return to the text section
++  OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
++}
++
++void LoongArchAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
++// Now this is unimplemented.
++}
++
++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) {
++  EmitSled(MI, SledKind::FUNCTION_ENTER);
++}
++
++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
++  EmitSled(MI, SledKind::FUNCTION_EXIT);
++}
++
++void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
++  EmitSled(MI, SledKind::TAIL_CALL);
++}
++
++void LoongArchAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
++                                           raw_ostream &OS) {
++  // TODO: implement
++}
++
++bool LoongArchAsmPrinter::isLongBranchPseudo(int Opcode) const {
++    return (Opcode == LoongArch::LONG_BRANCH_ADDIW
++          || Opcode == LoongArch::LONG_BRANCH_ADDIW2Op
++          || Opcode == LoongArch::LONG_BRANCH_ADDID
++          || Opcode == LoongArch::LONG_BRANCH_ADDID2Op
++          || Opcode == LoongArch::LONG_BRANCH_PCADDU12I);
++}
++
++// Force static initialization.
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() {
++  RegisterAsmPrinter<LoongArchAsmPrinter> X(getTheLoongArch32Target());
++  RegisterAsmPrinter<LoongArchAsmPrinter> A(getTheLoongArch64Target());
++}
+diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.h b/lib/Target/LoongArch/LoongArchAsmPrinter.h
+new file mode 100644
+index 00000000..0facaa29
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchAsmPrinter.h
+@@ -0,0 +1,138 @@
++//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -----------*- C++ -*--===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// LoongArch Assembly printer class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
++
++#include "LoongArchMCInstLower.h"
++#include "LoongArchSubtarget.h"
++#include "llvm/CodeGen/AsmPrinter.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/Support/Compiler.h"
++#include <algorithm>
++#include <map>
++#include <memory>
++
++namespace llvm {
++
++class MCOperand;
++class MCSubtargetInfo;
++class MCSymbol;
++class MachineBasicBlock;
++class MachineConstantPool;
++class MachineFunction;
++class MachineInstr;
++class MachineOperand;
++class LoongArchFunctionInfo;
++class LoongArchTargetStreamer;
++class Module;
++class raw_ostream;
++class TargetMachine;
++
++class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter {
++  LoongArchTargetStreamer &getTargetStreamer() const;
++
++  void EmitInstrWithMacroNoAT(const MachineInstr *MI);
++
++  //===------------------------------------------------------------------===//
++  // XRay implementation
++  //===------------------------------------------------------------------===//
++
++public:
++  // XRay-specific lowering for LoongArch.
++  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
++  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
++  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
++
++private:
++  /// MCP - Keep a pointer to constantpool entries of the current
++  /// MachineFunction.
++  const MachineConstantPool *MCP = nullptr;
++
++  /// InConstantPool - Maintain state when emitting a sequence of constant
++  /// pool entries so we can properly mark them as data regions.
++  bool InConstantPool = false;
++
++  void EmitSled(const MachineInstr &MI, SledKind Kind);
++
++  // tblgen'erated function.
++  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
++                                   const MachineInstr *MI);
++
++  // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch,
++  // and PseudoIndirectBranch64 as a JIRL as appropriate
++  // for the target.
++  void emitPseudoIndirectBranch(MCStreamer &OutStreamer,
++                                const MachineInstr *MI);
++
++  void emitPseudoTailBranch(MCStreamer &OutStreamer,
++                            const MachineInstr *MI);
++
++  // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
++  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
++
++  void emitInlineAsmStart() const override;
++
++  void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
++                        const MCSubtargetInfo *EndInfo) const override;
++
++  void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg);
++
++  void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode,
++                       unsigned Reg1, unsigned Reg2);
++
++  void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode,
++                          unsigned Reg1, unsigned Reg2, unsigned Reg3);
++
++  void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc,
++                        unsigned Reg1, unsigned Reg2, unsigned FPReg1,
++                        unsigned FPReg2, bool LE);
++
++  bool isLongBranchPseudo(int Opcode) const;
++
++public:
++  const LoongArchSubtarget *Subtarget;
++  const LoongArchFunctionInfo *LoongArchFI;
++  LoongArchMCInstLower MCInstLowering;
++
++  explicit LoongArchAsmPrinter(TargetMachine &TM,
++                          std::unique_ptr<MCStreamer> Streamer)
++      : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {}
++
++  StringRef getPassName() const override { return "LoongArch Assembly Printer"; }
++
++  bool runOnMachineFunction(MachineFunction &MF) override;
++
++  void emitInstruction(const MachineInstr *MI) override;
++  const char *getCurrentABIString() const;
++  void emitFunctionEntryLabel() override;
++  void emitFunctionBodyStart() override;
++  void emitFunctionBodyEnd() override;
++  void emitBasicBlockEnd(const MachineBasicBlock &MBB) override;
++  bool isBlockOnlyReachableByFallthrough(
++                                   const MachineBasicBlock* MBB) const override;
++  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
++                       const char *ExtraCode, raw_ostream &O) override;
++  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
++                             const char *ExtraCode, raw_ostream &O) override;
++  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
++  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
++  void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
++  void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O);
++  void emitStartOfAsmFile(Module &M) override;
++  void emitEndOfAsmFile(Module &M) override;
++  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
+diff --git a/lib/Target/LoongArch/LoongArchCCState.cpp b/lib/Target/LoongArch/LoongArchCCState.cpp
+new file mode 100644
+index 00000000..18996f1e
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchCCState.cpp
+@@ -0,0 +1,165 @@
++//===---- LoongArchCCState.cpp - CCState with LoongArch specific extensions ---------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchCCState.h"
++#include "LoongArchSubtarget.h"
++#include "llvm/IR/Module.h"
++
++using namespace llvm;
++
++/// This function returns true if CallSym is a long double emulation routine.
++static bool isF128SoftLibCall(const char *CallSym) {
++  const char *const LibCalls[] = {
++      "__addtf3",      "__divtf3",     "__eqtf2",       "__extenddftf2",
++      "__extendsftf2", "__fixtfdi",    "__fixtfsi",     "__fixtfti",
++      "__fixunstfdi",  "__fixunstfsi", "__fixunstfti",  "__floatditf",
++      "__floatsitf",   "__floattitf",  "__floatunditf", "__floatunsitf",
++      "__floatuntitf", "__getf2",      "__gttf2",       "__letf2",
++      "__lttf2",       "__multf3",     "__netf2",       "__powitf2",
++      "__subtf3",      "__trunctfdf2", "__trunctfsf2",  "__unordtf2",
++      "ceill",         "copysignl",    "cosl",          "exp2l",
++      "expl",          "floorl",       "fmal",          "fmaxl",
++      "fmodl",         "log10l",       "log2l",         "logl",
++      "nearbyintl",    "powl",         "rintl",         "roundl",
++      "sinl",          "sqrtl",        "truncl"};
++
++  // Check that LibCalls is sorted alphabetically.
++  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
++  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
++  return std::binary_search(std::begin(LibCalls), std::end(LibCalls),
++                            CallSym, Comp);
++}
++
++/// This function returns true if Ty is fp128, {f128} or i128 which was
++/// originally a fp128.
++static bool originalTypeIsF128(const Type *Ty, const char *Func) {
++  if (Ty->isFP128Ty())
++    return true;
++
++  if (Ty->isStructTy() && Ty->getStructNumElements() == 1 &&
++      Ty->getStructElementType(0)->isFP128Ty())
++    return true;
++
++  // If the Ty is i128 and the function being called is a long double emulation
++  // routine, then the original type is f128.
++  return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
++}
++
++/// Return true if the original type was vXfXX.
++static bool originalEVTTypeIsVectorFloat(EVT Ty) {
++  if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
++    return true;
++
++  return false;
++}
++
++/// Return true if the original type was vXfXX / vXfXX.
++static bool originalTypeIsVectorFloat(const Type * Ty) {
++  if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
++    return true;
++
++  return false;
++}
++
++LoongArchCCState::SpecialCallingConvType
++LoongArchCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
++                                            const LoongArchSubtarget &Subtarget) {
++  LoongArchCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv;
++  return SpecialCallingConv;
++}
++
++void LoongArchCCState::PreAnalyzeCallResultForF128(
++    const SmallVectorImpl<ISD::InputArg> &Ins,
++    const Type *RetTy, const char *Call) {
++  for (unsigned i = 0; i < Ins.size(); ++i) {
++    OriginalArgWasF128.push_back(
++        originalTypeIsF128(RetTy, Call));
++    OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy());
++  }
++}
++
++/// Identify lowered values that originated from f128 or float arguments and
++/// record this for use by RetCC_LoongArchLP64.
++void LoongArchCCState::PreAnalyzeReturnForF128(
++    const SmallVectorImpl<ISD::OutputArg> &Outs) {
++  const MachineFunction &MF = getMachineFunction();
++  for (unsigned i = 0; i < Outs.size(); ++i) {
++    OriginalArgWasF128.push_back(
++        originalTypeIsF128(MF.getFunction().getReturnType(), nullptr));
++    OriginalArgWasFloat.push_back(
++        MF.getFunction().getReturnType()->isFloatingPointTy());
++  }
++}
++
++/// Identify lower values that originated from vXfXX and record
++/// this.
++void LoongArchCCState::PreAnalyzeCallResultForVectorFloat(
++    const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) {
++  for (unsigned i = 0; i < Ins.size(); ++i) {
++    OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy));
++  }
++}
++
++/// Identify lowered values that originated from vXfXX arguments and record
++/// this.
++void LoongArchCCState::PreAnalyzeReturnForVectorFloat(
++    const SmallVectorImpl<ISD::OutputArg> &Outs) {
++  for (unsigned i = 0; i < Outs.size(); ++i) {
++    ISD::OutputArg Out = Outs[i];
++    OriginalRetWasFloatVector.push_back(
++        originalEVTTypeIsVectorFloat(Out.ArgVT));
++  }
++}
++
++/// Identify lowered values that originated from f128, float and sret to vXfXX
++/// arguments and record this.
++void LoongArchCCState::PreAnalyzeCallOperands(
++    const SmallVectorImpl<ISD::OutputArg> &Outs,
++    std::vector<TargetLowering::ArgListEntry> &FuncArgs,
++    const char *Func) {
++  for (unsigned i = 0; i < Outs.size(); ++i) {
++    TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
++
++    OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func));
++    OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
++    OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
++    CallOperandIsFixed.push_back(Outs[i].IsFixed);
++  }
++}
++
++/// Identify lowered values that originated from f128, float and vXfXX arguments
++/// and record this.
++void LoongArchCCState::PreAnalyzeFormalArgumentsForF128(
++    const SmallVectorImpl<ISD::InputArg> &Ins) {
++  const MachineFunction &MF = getMachineFunction();
++  for (unsigned i = 0; i < Ins.size(); ++i) {
++    Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
++
++    // SRet arguments cannot originate from f128 or {f128} returns so we just
++    // push false. We have to handle this specially since SRet arguments
++    // aren't mapped to an original argument.
++    if (Ins[i].Flags.isSRet()) {
++      OriginalArgWasF128.push_back(false);
++      OriginalArgWasFloat.push_back(false);
++      OriginalArgWasFloatVector.push_back(false);
++      continue;
++    }
++
++    assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size());
++    std::advance(FuncArg, Ins[i].getOrigArgIndex());
++
++    OriginalArgWasF128.push_back(
++        originalTypeIsF128(FuncArg->getType(), nullptr));
++    OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
++
++    // The LoongArch vector ABI exhibits a corner case of sorts or quirk; if the
++    // first argument is actually an SRet pointer to a vector, then the next
++    // argument slot is $a2.
++    OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
++  }
++}
+diff --git a/lib/Target/LoongArch/LoongArchCCState.h b/lib/Target/LoongArch/LoongArchCCState.h
+new file mode 100644
+index 00000000..56d5b89b
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchCCState.h
+@@ -0,0 +1,165 @@
++//===---- LoongArchCCState.h - CCState with LoongArch specific extensions -----------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LoongArchCCSTATE_H
++#define LoongArchCCSTATE_H
++
++#include "LoongArchISelLowering.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/CodeGen/CallingConvLower.h"
++
++namespace llvm {
++class SDNode;
++class LoongArchSubtarget;
++
++class LoongArchCCState : public CCState {
++public:
++  enum SpecialCallingConvType { NoSpecialCallingConv };
++
++  /// Determine the SpecialCallingConvType for the given callee
++  static SpecialCallingConvType
++  getSpecialCallingConvForCallee(const SDNode *Callee,
++                                 const LoongArchSubtarget &Subtarget);
++
++private:
++  /// Identify lowered values that originated from f128 arguments and record
++  /// this for use by RetCC_LoongArchLP64.
++  void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins,
++                                   const Type *RetTy, const char * Func);
++
++  /// Identify lowered values that originated from f128 arguments and record
++  /// this for use by RetCC_LoongArchLP64.
++  void PreAnalyzeReturnForF128(const SmallVectorImpl<ISD::OutputArg> &Outs);
++
++  /// Identify lowered values that originated from f128 arguments and record
++  /// this.
++  void
++  PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
++                         std::vector<TargetLowering::ArgListEntry> &FuncArgs,
++                         const char *Func);
++
++  /// Identify lowered values that originated from f128 arguments and record
++  /// this for use by RetCC_LoongArchLP64.
++  void
++  PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
++
++  void
++  PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins,
++                                     const Type *RetTy);
++
++  void PreAnalyzeFormalArgumentsForVectorFloat(
++      const SmallVectorImpl<ISD::InputArg> &Ins);
++
++  void
++  PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
++
++  /// Records whether the value has been lowered from an f128.
++  SmallVector<bool, 4> OriginalArgWasF128;
++
++  /// Records whether the value has been lowered from float.
++  SmallVector<bool, 4> OriginalArgWasFloat;
++
++  /// Records whether the value has been lowered from a floating point vector.
++  SmallVector<bool, 4> OriginalArgWasFloatVector;
++
++  /// Records whether the return value has been lowered from a floating point
++  /// vector.
++  SmallVector<bool, 4> OriginalRetWasFloatVector;
++
++  /// Records whether the value was a fixed argument.
++  /// See ISD::OutputArg::IsFixed,
++  SmallVector<bool, 4> CallOperandIsFixed;
++
++  // FIXME: This should probably be a fully fledged calling convention.
++  SpecialCallingConvType SpecialCallingConv;
++
++public:
++  LoongArchCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
++              SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
++              SpecialCallingConvType SpecialCC = NoSpecialCallingConv)
++      : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {}
++
++  void
++  AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
++                      CCAssignFn Fn,
++                      std::vector<TargetLowering::ArgListEntry> &FuncArgs,
++                      const char *Func) {
++    PreAnalyzeCallOperands(Outs, FuncArgs, Func);
++    CCState::AnalyzeCallOperands(Outs, Fn);
++    OriginalArgWasF128.clear();
++    OriginalArgWasFloat.clear();
++    OriginalArgWasFloatVector.clear();
++    CallOperandIsFixed.clear();
++  }
++
++  // The AnalyzeCallOperands in the base class is not usable since we must
++  // provide a means of accessing ArgListEntry::IsFixed. Delete them from this
++  // class. This doesn't stop them being used via the base class though.
++  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
++                           CCAssignFn Fn) = delete;
++  void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
++                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
++                           CCAssignFn Fn) = delete;
++
++  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
++                              CCAssignFn Fn) {
++    PreAnalyzeFormalArgumentsForF128(Ins);
++    CCState::AnalyzeFormalArguments(Ins, Fn);
++    OriginalArgWasFloat.clear();
++    OriginalArgWasF128.clear();
++    OriginalArgWasFloatVector.clear();
++  }
++
++  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
++                         CCAssignFn Fn, const Type *RetTy,
++                         const char *Func) {
++    PreAnalyzeCallResultForF128(Ins, RetTy, Func);
++    PreAnalyzeCallResultForVectorFloat(Ins, RetTy);
++    CCState::AnalyzeCallResult(Ins, Fn);
++    OriginalArgWasFloat.clear();
++    OriginalArgWasF128.clear();
++    OriginalArgWasFloatVector.clear();
++  }
++
++  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
++                     CCAssignFn Fn) {
++    PreAnalyzeReturnForF128(Outs);
++    PreAnalyzeReturnForVectorFloat(Outs);
++    CCState::AnalyzeReturn(Outs, Fn);
++    OriginalArgWasFloat.clear();
++    OriginalArgWasF128.clear();
++    OriginalArgWasFloatVector.clear();
++  }
++
++  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
++                   CCAssignFn Fn) {
++    PreAnalyzeReturnForF128(ArgsFlags);
++    PreAnalyzeReturnForVectorFloat(ArgsFlags);
++    bool Return = CCState::CheckReturn(ArgsFlags, Fn);
++    OriginalArgWasFloat.clear();
++    OriginalArgWasF128.clear();
++    OriginalArgWasFloatVector.clear();
++    return Return;
++  }
++
++  bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; }
++  bool WasOriginalArgFloat(unsigned ValNo) {
++      return OriginalArgWasFloat[ValNo];
++  }
++  bool WasOriginalArgVectorFloat(unsigned ValNo) const {
++    return OriginalArgWasFloatVector[ValNo];
++  }
++  bool WasOriginalRetVectorFloat(unsigned ValNo) const {
++    return OriginalRetWasFloatVector[ValNo];
++  }
++  bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
++  SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
++};
++}
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchCallingConv.td b/lib/Target/LoongArch/LoongArchCallingConv.td
+new file mode 100644
+index 00000000..02bdb323
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchCallingConv.td
+@@ -0,0 +1,292 @@
++//===-- LoongArchCallingConv.td - Calling Conventions for LoongArch --*- tablegen -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++// This describes the calling conventions for LoongArch architecture.
++//===----------------------------------------------------------------------===//
++
++/// CCIfSubtarget - Match if the current subtarget has a feature F.
++class CCIfSubtarget<string F, CCAction A, string Invert = "">
++    : CCIf<!strconcat(Invert,
++                      "static_cast<const LoongArchSubtarget&>"
++			"(State.getMachineFunction().getSubtarget()).",
++                      F),
++           A>;
++
++// The inverse of CCIfSubtarget
++class CCIfSubtargetNot<string F, CCAction A> : CCIfSubtarget<F, A, "!">;
++
++/// Match if the original argument (before lowering) was a float.
++/// For example, this is true for i32's that were lowered from soft-float.
++class CCIfOrigArgWasNotFloat<CCAction A>
++    : CCIf<"!static_cast<LoongArchCCState *>(&State)->WasOriginalArgFloat(ValNo)",
++           A>;
++
++/// Match if the original argument (before lowering) was a 128-bit float (i.e.
++/// long double).
++class CCIfOrigArgWasF128<CCAction A>
++    : CCIf<"static_cast<LoongArchCCState *>(&State)->WasOriginalArgF128(ValNo)", A>;
++
++/// Match if this specific argument is a vararg.
++/// This is slightly different fro CCIfIsVarArg which matches if any argument is
++/// a vararg.
++class CCIfArgIsVarArg<CCAction A>
++    : CCIf<"!static_cast<LoongArchCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
++
++/// Match if the return was a floating point vector.
++class CCIfOrigArgWasNotVectorFloat<CCAction A>
++    : CCIf<"!static_cast<LoongArchCCState *>(&State)"
++                "->WasOriginalRetVectorFloat(ValNo)", A>;
++
++/// Match if the special calling conv is the specified value.
++class CCIfSpecialCallingConv<string CC, CCAction A>
++    : CCIf<"static_cast<LoongArchCCState *>(&State)->getSpecialCallingConv() == "
++               "LoongArchCCState::" # CC, A>;
++
++// For soft-float, f128 values are returned in A0_64 rather than V1_64.
++def RetCC_F128SoftFloat : CallingConv<[
++  CCAssignToReg<[A0_64, A1_64]>
++]>;
++
++//
++// For hard-float, f128 values are returned as a pair of f64's rather than a
++// pair of i64's.
++def RetCC_F128HardFloat : CallingConv<[
++  //CCBitConvertToType<f64>,
++
++  // Contrary to the ABI documentation, a struct containing a long double is
++  // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
++  // match the de facto ABI as implemented by GCC.
++  CCIfInReg<CCAssignToReg<[A0_64, A1_64]>>,
++
++  CCAssignToReg<[A0_64, A1_64]>
++]>;
++
++// Handle F128 specially since we can't identify the original type during the
++// tablegen-erated code.
++def RetCC_F128 : CallingConv<[
++  CCIfSubtarget<"useSoftFloat()",
++      CCIfType<[i64], CCDelegateTo<RetCC_F128SoftFloat>>>,
++  CCIfSubtargetNot<"useSoftFloat()",
++      CCIfType<[i64], CCDelegateTo<RetCC_F128HardFloat>>>
++]>;
++
++//===----------------------------------------------------------------------===//
++// LoongArch ILP32 Calling Convention
++//===----------------------------------------------------------------------===//
++
++def CC_LoongArchILP32 : CallingConv<[
++  // Promote i8/i16 arguments to i32.
++  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
++
++  // Integer values get stored in stack slots that are 4 bytes in
++  // size and 4-byte aligned.
++  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
++
++  // Integer values get stored in stack slots that are 8 bytes in
++  // size and 8-byte aligned.
++  CCIfType<[f64], CCAssignToStack<8, 8>>
++]>;
++
++// Only the return rules are defined here for 32-bit ABI. The rules for argument
++// passing are defined in LoongArchISelLowering.cpp.
++def RetCC_LoongArchILP32 : CallingConv<[
++  // Promote i1/i8/i16 return values to i32.
++  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
++
++  // i32 are returned in registers A0, A1, unless the original return
++  // type was a vector of floats.
++  CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
++                                        CCAssignToReg<[A0, A1]>>>,
++
++  // f32 are returned in registers F0, F1
++  CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
++
++  // f64 arguments are returned in F0_64 and F1_64 in hasBasicD mode.
++  CCIfType<[f64], CCIfSubtarget<"hasBasicD()", CCAssignToReg<[F0_64, F1_64]>>>
++]>;
++
++def CC_LoongArchILP32_FP32 : CustomCallingConv;
++def CC_LoongArchILP32_FP64 : CustomCallingConv;
++def CC_LoongArch_F128 : CustomCallingConv;
++
++def CC_LoongArchILP32_FP : CallingConv<[
++  CCIfSubtargetNot<"hasBasicD()", CCDelegateTo<CC_LoongArchILP32_FP32>>,
++  CCIfSubtarget<"hasBasicD()", CCDelegateTo<CC_LoongArchILP32_FP64>>
++]>;
++
++//===----------------------------------------------------------------------===//
++// LoongArch LP64 Calling Convention
++//===----------------------------------------------------------------------===//
++
++def CC_LoongArchLP64_SoftFloat : CallingConv<[
++  CCAssignToReg<[A0, A1, A2, A3,
++                 A4, A5, A6, A7]>,
++  CCAssignToStack<4, 8>
++]>;
++
++def CC_LoongArchLP64 : CallingConv<[
++
++  // All integers (except soft-float integers) are promoted to 64-bit.
++  CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat<CCPromoteToType<i64>>>,
++
++  // The only i32's we have left are soft-float arguments.
++  CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_LoongArchLP64_SoftFloat>>>,
++
++  // Integer arguments are passed in integer registers.
++  //CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
++  //                                         A4_64, A5_64, A6_64, A7_64],
++  //                                        [F0_64, F1_64, F2_64, F3_64,
++  //                                         F4_64, F5_64, F6_64, F7_64]>>,
++  CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
++                                 A4_64, A5_64, A6_64, A7_64]>>,
++
++  // f32 arguments are passed in single precision FP registers.
++  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3,
++                                 F4, F5, F6, F7]>>,
++
++  // f64 arguments are passed in double precision FP registers.
++  CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64,
++                                 F4_64, F5_64, F6_64, F7_64]>>,
++
++  // others f32 arguments are passed in single precision FP registers.
++  CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>,
++
++  // others f64 arguments are passed in double precision FP registers.
++  CCIfType<[f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
++                                 A4_64, A5_64, A6_64, A7_64]>>,
++
++  CCIfSubtarget<"hasLSX()",
++      CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
++          CCAssignToRegWithShadow<[VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7],
++                                  [A0_64, A1_64, A2_64, A3_64,
++                                   A4_64, A5_64, A6_64, A7_64]>>>,
++  CCIfSubtarget<"hasLASX()",
++      CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
++          CCAssignToRegWithShadow<[XR0, XR1, XR2, XR3, XR4, XR5, XR6, XR7],
++                                  [A0_64, A1_64, A2_64, A3_64,
++                                   A4_64, A5_64, A6_64, A7_64]>>>,
++
++  // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
++  CCIfType<[f32], CCAssignToStack<4, 8>>,
++  CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
++  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
++         CCAssignToStack<16, 16>>,
++  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
++         CCAssignToStack<32, 32>>
++]>;
++
++// LP64 variable arguments.
++// All arguments are passed in integer registers.
++def CC_LoongArchLP64_VarArg : CallingConv<[
++  // All integers are promoted to 64-bit.
++  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
++
++  CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>,
++
++  CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<CC_LoongArch_F128>>>,
++
++  CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
++                                      A4_64, A5_64, A6_64, A7_64]>>,
++
++  // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
++  CCIfType<[f32], CCAssignToStack<4, 8>>,
++  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
++]>;
++
++def RetCC_LoongArchLP64 : CallingConv<[
++  // f128 needs to be handled similarly to f32 and f64. However, f128 is not
++  // legal and is lowered to i128 which is further lowered to a pair of i64's.
++  // This presents us with a problem for the calling convention since hard-float
++  // still needs to pass them in FPU registers, and soft-float needs to use $v0,
++  // and $a0 instead of the usual $v0, and $v1. We therefore resort to a
++  // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on
++  // whether the result was originally an f128 into the tablegen-erated code.
++  //
++  // f128 should only occur for the 64-bit ABI where long double is 128-bit.
++  CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>,
++
++  CCIfType<[i8, i16, i32, i64], CCIfInReg<CCPromoteToType<i64>>>,
++
++  // i64 are returned in registers V0_64, V1_64
++  CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>,
++
++  CCIfSubtarget<"hasLSX()",
++      CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[VR0]>>>,
++
++  CCIfSubtarget<"hasLASX()",
++      CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCAssignToReg<[XR0]>>>,
++
++  CCIfSubtarget<"hasLASX()",
++      CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>>,
++
++  // f32 are returned in registers F0, F2
++  CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
++
++  // f64 are returned in registers D0, D2
++  CCIfType<[f64], CCAssignToReg<[F0_64, F1_64]>>
++]>;
++
++//===----------------------------------------------------------------------===//
++// LoongArch Calling Convention Dispatch
++//===----------------------------------------------------------------------===//
++
++def RetCC_LoongArch : CallingConv<[
++  CCIfSubtarget<"isABI_LP64()", CCDelegateTo<RetCC_LoongArchLP64>>,
++  CCDelegateTo<RetCC_LoongArchILP32>
++]>;
++
++def CC_LoongArch_ByVal : CallingConv<[
++  CCIfSubtarget<"isABI_ILP32()", CCIfByVal<CCPassByVal<4, 4>>>,
++  CCIfByVal<CCPassByVal<8, 8>>
++]>;
++
++def CC_LoongArch_FixedArg : CallingConv<[
++  CCIfByVal<CCDelegateTo<CC_LoongArch_ByVal>>,
++  //CCIfByVal<CCIfType<[i64],  CCAssignToReg<[A0_64,  A1_64,  A2_64,  A3_64,
++  //                                          A4_64,  A5_64,  A6_64,  A7_64]>>>,
++
++  // f128 needs to be handled similarly to f32 and f64 on hard-float. However,
++  // f128 is not legal and is lowered to i128 which is further lowered to a pair
++  // of i64's.
++  // This presents us with a problem for the calling convention since hard-float
++  // still needs to pass them in FPU registers. We therefore resort to a
++  // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on
++  // whether the argument was originally an f128 into the tablegen-erated code.
++  //
++  // f128 should only occur for the 64-bit ABI where long double is 128-bit.
++  CCIfType<[i64],
++      CCIfSubtargetNot<"useSoftFloat()",
++          CCIfOrigArgWasF128<CCBitConvertToType<i64>>>>,
++
++  CCIfSubtarget<"isABI_ILP32()", CCDelegateTo<CC_LoongArchILP32_FP>>,
++  CCDelegateTo<CC_LoongArchLP64>
++]>;
++
++def CC_LoongArch_VarArg : CallingConv<[
++  CCIfByVal<CCDelegateTo<CC_LoongArch_ByVal>>,
++
++  CCIfSubtarget<"isABI_ILP32()", CCDelegateTo<CC_LoongArchILP32_FP>>,
++  CCDelegateTo<CC_LoongArchLP64_VarArg>
++]>;
++
++def CC_LoongArch : CallingConv<[
++  CCIfVarArg<CCIfArgIsVarArg<CCDelegateTo<CC_LoongArch_VarArg>>>,
++  CCDelegateTo<CC_LoongArch_FixedArg>
++]>;
++
++//===----------------------------------------------------------------------===//
++// Callee-saved register lists.
++//===----------------------------------------------------------------------===//
++
++def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 24), RA, FP,
++                                               (sequence "S%u", 8, 0))>;
++
++def CSR_ILP32 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA, FP,
++                                   (sequence "S%u", 8, 0))>;
++
++def CSR_LP64 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA_64, FP_64,
++                                   (sequence "S%u_64", 8, 0))>;
+diff --git a/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp
+new file mode 100644
+index 00000000..c192f7fc
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp
+@@ -0,0 +1,2438 @@
++//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains a pass that expands pseudo instructions into target
++// instructions to allow proper scheduling, if-conversion, and other late
++// optimizations. This pass should be run after register allocation but before
++// the post-regalloc scheduling pass.
++//
++// This is currently only used for expanding atomic pseudos after register
++// allocation. We do this to avoid the fast register allocator introducing
++// spills between ll and sc. These stores cause some LoongArch implementations to
++// abort the atomic RMW sequence.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "LoongArchInstrInfo.h"
++#include "LoongArchSubtarget.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/CodeGen/LivePhysRegs.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-pseudo"
++
++namespace {
++  class LoongArchExpandPseudo : public MachineFunctionPass {
++  public:
++    static char ID;
++    LoongArchExpandPseudo() : MachineFunctionPass(ID) {}
++
++    const LoongArchInstrInfo *TII;
++    const LoongArchSubtarget *STI;
++
++    bool runOnMachineFunction(MachineFunction &Fn) override;
++
++    MachineFunctionProperties getRequiredProperties() const override {
++      return MachineFunctionProperties().set(
++          MachineFunctionProperties::Property::NoVRegs);
++    }
++
++    StringRef getPassName() const override {
++      return "LoongArch pseudo instruction expansion pass";
++    }
++
++  private:
++    bool expandAtomicCmpSwap(MachineBasicBlock &MBB,
++                             MachineBasicBlock::iterator MBBI,
++                             MachineBasicBlock::iterator &NextMBBI);
++    bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB,
++                                    MachineBasicBlock::iterator MBBI,
++                                    MachineBasicBlock::iterator &NextMBBI);
++
++    bool expandAtomicBinOp(MachineBasicBlock &BB,
++                           MachineBasicBlock::iterator I,
++                           MachineBasicBlock::iterator &NMBBI, unsigned Size);
++    bool expandXINSERT_BOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++                           MachineBasicBlock::iterator &NMBBI);
++    bool expandINSERT_HOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++                          MachineBasicBlock::iterator &NMBBI);
++    bool expandXINSERT_FWOp(MachineBasicBlock &BB,
++                            MachineBasicBlock::iterator I,
++                            MachineBasicBlock::iterator &NMBBI);
++    bool expandAtomicBinOpSubword(MachineBasicBlock &BB,
++                                  MachineBasicBlock::iterator I,
++                                  MachineBasicBlock::iterator &NMBBI);
++
++    bool expandPseudoCall(MachineBasicBlock &BB,
++                          MachineBasicBlock::iterator I,
++                          MachineBasicBlock::iterator &NMBBI);
++    bool expandPseudoTailCall(MachineBasicBlock &BB,
++                              MachineBasicBlock::iterator I);
++
++    bool expandPseudoTEQ(MachineBasicBlock &BB,
++                         MachineBasicBlock::iterator I,
++                         MachineBasicBlock::iterator &NMBBI);
++
++    bool expandLoadAddr(MachineBasicBlock &BB,
++                        MachineBasicBlock::iterator I,
++                        MachineBasicBlock::iterator &NMBBI);
++
++    bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++                  MachineBasicBlock::iterator &NMBB);
++    bool expandMBB(MachineBasicBlock &MBB);
++   };
++  char LoongArchExpandPseudo::ID = 0;
++}
++
++static bool hasDbar(MachineBasicBlock *MBB) {
++
++  for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end();
++       MBBb != MBBe; ++MBBb) {
++    if (MBBb->getOpcode() == LoongArch::DBAR)
++      return true;
++    if (MBBb->mayLoad() || MBBb->mayStore())
++      break;
++  }
++  return false;
++}
++
++bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++  unsigned LL, SC;
++  unsigned ZERO = LoongArch::ZERO;
++  unsigned BNE = LoongArch::BNE32;
++  unsigned BEQ = LoongArch::BEQ32;
++  unsigned SEOp =
++      I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ? LoongArch::EXT_W_B32 : LoongArch::EXT_W_H32;
++
++  LL = LoongArch::LL_W;
++  SC = LoongArch::SC_W;
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned Ptr = I->getOperand(1).getReg();
++  unsigned Mask = I->getOperand(2).getReg();
++  unsigned ShiftCmpVal = I->getOperand(3).getReg();
++  unsigned Mask2 = I->getOperand(4).getReg();
++  unsigned ShiftNewVal = I->getOperand(5).getReg();
++  unsigned ShiftAmnt = I->getOperand(6).getReg();
++  unsigned Scratch = I->getOperand(7).getReg();
++  unsigned Scratch2 = I->getOperand(8).getReg();
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loop1MBB);
++  MF->insert(It, loop2MBB);
++  MF->insert(It, sinkMBB);
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), &BB,
++                  std::next(MachineBasicBlock::iterator(I)), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  //  thisMBB:
++  //    ...
++  //    fallthrough --> loop1MBB
++  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
++  loop1MBB->addSuccessor(sinkMBB);
++  loop1MBB->addSuccessor(loop2MBB);
++  loop1MBB->normalizeSuccProbs();
++  loop2MBB->addSuccessor(loop1MBB);
++  loop2MBB->addSuccessor(sinkMBB);
++  loop2MBB->normalizeSuccProbs();
++  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
++
++  // loop1MBB:
++  //   ll dest, 0(ptr)
++  //   and Mask', dest, Mask
++  //   bne Mask', ShiftCmpVal, exitMBB
++  BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0);
++  BuildMI(loop1MBB, DL, TII->get(LoongArch::AND32), Scratch2)
++      .addReg(Scratch)
++      .addReg(Mask);
++  BuildMI(loop1MBB, DL, TII->get(BNE))
++    .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB);
++
++  // loop2MBB:
++  //   and dest, dest, mask2
++  //   or dest, dest, ShiftNewVal
++  //   sc dest, dest, 0(ptr)
++  //   beq dest, $0, loop1MBB
++  BuildMI(loop2MBB, DL, TII->get(LoongArch::AND32), Scratch)
++      .addReg(Scratch, RegState::Kill)
++      .addReg(Mask2);
++  BuildMI(loop2MBB, DL, TII->get(LoongArch::OR32), Scratch)
++      .addReg(Scratch, RegState::Kill)
++      .addReg(ShiftNewVal);
++  BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
++      .addReg(Scratch, RegState::Kill)
++      .addReg(Ptr)
++      .addImm(0);
++  BuildMI(loop2MBB, DL, TII->get(BEQ))
++      .addReg(Scratch, RegState::Kill)
++      .addReg(ZERO)
++      .addMBB(loop1MBB);
++
++  //  sinkMBB:
++  //    srl     srlres, Mask', shiftamt
++  //    sign_extend dest,srlres
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest)
++      .addReg(Scratch2)
++      .addReg(ShiftAmnt);
++
++  BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
++
++  if (!hasDbar(sinkMBB)) {
++    MachineBasicBlock::iterator Pos = sinkMBB->begin();
++    BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT);
++  }
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loop1MBB);
++  computeAndAddLiveIns(LiveRegs, *loop2MBB);
++  computeAndAddLiveIns(LiveRegs, *sinkMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
++                                           MachineBasicBlock::iterator I,
++                                           MachineBasicBlock::iterator &NMBBI) {
++
++  const unsigned Size =
++      I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8;
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned LL, SC, ZERO, BNE, BEQ, MOVE;
++
++  if (Size == 4) {
++    LL = LoongArch::LL_W;
++    SC = LoongArch::SC_W;
++    BNE = LoongArch::BNE32;
++    BEQ = LoongArch::BEQ32;
++
++    ZERO = LoongArch::ZERO;
++    MOVE = LoongArch::OR32;
++  } else {
++    LL = LoongArch::LL_D;
++    SC = LoongArch::SC_D;
++    ZERO = LoongArch::ZERO_64;
++    BNE = LoongArch::BNE;
++    BEQ = LoongArch::BEQ;
++    MOVE = LoongArch::OR;
++  }
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned Ptr = I->getOperand(1).getReg();
++  unsigned OldVal = I->getOperand(2).getReg();
++  unsigned NewVal = I->getOperand(3).getReg();
++  unsigned Scratch = I->getOperand(4).getReg();
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loop1MBB);
++  MF->insert(It, loop2MBB);
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), &BB,
++                  std::next(MachineBasicBlock::iterator(I)), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  //  thisMBB:
++  //    ...
++  //    fallthrough --> loop1MBB
++  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
++  loop1MBB->addSuccessor(exitMBB);
++  loop1MBB->addSuccessor(loop2MBB);
++  loop1MBB->normalizeSuccProbs();
++  loop2MBB->addSuccessor(loop1MBB);
++  loop2MBB->addSuccessor(exitMBB);
++  loop2MBB->normalizeSuccProbs();
++
++  // loop1MBB:
++  //   ll dest, 0(ptr)
++  //   bne dest, oldval, exitMBB
++  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
++  BuildMI(loop1MBB, DL, TII->get(BNE))
++    .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB);
++
++  // loop2MBB:
++  //   move scratch, NewVal
++  //   sc Scratch, Scratch, 0(ptr)
++  //   beq Scratch, $0, loop1MBB
++  BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO);
++  BuildMI(loop2MBB, DL, TII->get(SC), Scratch)
++    .addReg(Scratch).addReg(Ptr).addImm(0);
++  BuildMI(loop2MBB, DL, TII->get(BEQ))
++    .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB);
++
++  if (!hasDbar(exitMBB)) {
++    MachineBasicBlock::iterator Pos = exitMBB->begin();
++    BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT);
++  }
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loop1MBB);
++  computeAndAddLiveIns(LiveRegs, *loop2MBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandXINSERT_FWOp(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned isGP64 = 0;
++  switch (I->getOpcode()) {
++  case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA:
++    isGP64 = 1;
++    break;
++  case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA:
++    break;
++  default:
++    llvm_unreachable("Unknown subword vector pseudo for expansion!");
++  }
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned SrcVecReg = I->getOperand(1).getReg();
++  unsigned LaneReg = I->getOperand(2).getReg();
++  unsigned SrcValReg = I->getOperand(3).getReg();
++
++  unsigned Dsttmp = I->getOperand(4).getReg();
++  unsigned RI = I->getOperand(5).getReg();
++  unsigned RJ = I->getOperand(6).getReg();
++  Dsttmp = SrcVecReg;
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *blocks[11];
++  MachineFunction::iterator It = ++BB.getIterator();
++  for (int i = 0; i < 11; i++) {
++    blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB);
++    MF->insert(It, blocks[i]);
++  }
++
++  MachineBasicBlock *mainMBB = blocks[0];
++  MachineBasicBlock *FirstMBB = blocks[1];
++  MachineBasicBlock *sinkMBB = blocks[9];
++  MachineBasicBlock *exitMBB = blocks[10];
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(mainMBB, BranchProbability::getOne());
++  for (int i = 1; i < 9; i++) {
++    mainMBB->addSuccessor(blocks[i]);
++    blocks[i]->addSuccessor(sinkMBB);
++  }
++
++  unsigned ADDI, BLT, ZERO;
++  ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
++  BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32;
++  ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO;
++
++  for (int i = 1; i < 8; i++) {
++    BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i);
++    BuildMI(mainMBB, DL, TII->get(BLT))
++        .addReg(LaneReg)
++        .addReg(RI)
++        .addMBB(blocks[i + 1]);
++  }
++
++  BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB);
++
++  BuildMI(FirstMBB, DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp)
++      .addReg(SrcVecReg)
++      .addReg(RJ)
++      .addImm(7);
++  BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  for (int i = 0; i < 7; i++) {
++    BuildMI(blocks[i + 2], DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp)
++        .addReg(SrcVecReg)
++        .addReg(RJ)
++        .addImm(i);
++    BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++  }
++
++  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest)
++      .addReg(Dsttmp)
++      .addImm(0);
++
++  LivePhysRegs LiveRegs;
++  for (int i = 0; i < 11; i++) {
++    computeAndAddLiveIns(LiveRegs, *blocks[i]);
++  }
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandINSERT_HOp(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned isGP64 = 0;
++  switch (I->getOpcode()) {
++  case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA:
++    isGP64 = 1;
++    break;
++  default:
++    llvm_unreachable("Unknown subword vector pseudo for expansion!");
++  }
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned SrcVecReg = I->getOperand(1).getReg();
++  unsigned LaneReg = I->getOperand(2).getReg();
++  unsigned SrcValReg = I->getOperand(3).getReg();
++
++  unsigned Dsttmp = I->getOperand(4).getReg();
++  unsigned RI = I->getOperand(5).getReg();
++  Dsttmp = SrcVecReg;
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *blocks[11];
++  MachineFunction::iterator It = ++BB.getIterator();
++  for (int i = 0; i < 11; i++) {
++    blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB);
++    MF->insert(It, blocks[i]);
++  }
++
++  MachineBasicBlock *mainMBB = blocks[0];
++  MachineBasicBlock *FirstMBB = blocks[1];
++  MachineBasicBlock *sinkMBB = blocks[9];
++  MachineBasicBlock *exitMBB = blocks[10];
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(mainMBB, BranchProbability::getOne());
++  for (int i = 1; i < 9; i++) {
++    mainMBB->addSuccessor(blocks[i]);
++    blocks[i]->addSuccessor(sinkMBB);
++  }
++
++  unsigned ADDI, BLT, ZERO;
++  ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
++  BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32;
++  ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO;
++
++  for (int i = 1; i < 8; i++) {
++    BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i);
++    BuildMI(mainMBB, DL, TII->get(BLT))
++        .addReg(LaneReg)
++        .addReg(RI)
++        .addMBB(blocks[i + 1]);
++  }
++
++  BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB);
++
++  BuildMI(FirstMBB, DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp)
++      .addReg(SrcVecReg)
++      .addReg(SrcValReg)
++      .addImm(7);
++  BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  for (int i = 0; i < 7; i++) {
++    BuildMI(blocks[i + 2], DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp)
++        .addReg(SrcVecReg)
++        .addReg(SrcValReg)
++        .addImm(i);
++    BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++  }
++
++  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::VORI_B), Dest)
++      .addReg(Dsttmp)
++      .addImm(0);
++
++  LivePhysRegs LiveRegs;
++  for (int i = 0; i < 11; i++) {
++    computeAndAddLiveIns(LiveRegs, *blocks[i]);
++  }
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandXINSERT_BOp(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned isGP64 = 0;
++  switch (I->getOpcode()) {
++  case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA:
++    isGP64 = 1;
++    break;
++  case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA:
++    break;
++  default:
++    llvm_unreachable("Unknown subword vector pseudo for expansion!");
++  }
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned SrcVecReg = I->getOperand(1).getReg();
++  unsigned LaneReg = I->getOperand(2).getReg();
++  unsigned SrcValReg = I->getOperand(3).getReg();
++
++  unsigned R4r = I->getOperand(5).getReg();
++  unsigned Rib = I->getOperand(6).getReg();
++  unsigned Ris = I->getOperand(7).getReg();
++  unsigned R7b1 = I->getOperand(8).getReg();
++  unsigned R7b2 = I->getOperand(9).getReg();
++  unsigned R7b3 = I->getOperand(10).getReg();
++  unsigned R7r80_3 = I->getOperand(11).getReg();
++  unsigned R7r80l_3 = I->getOperand(12).getReg();
++  unsigned R7r81_3 = I->getOperand(13).getReg();
++  unsigned R7r81l_3 = I->getOperand(14).getReg();
++  unsigned R7r82_3 = I->getOperand(15).getReg();
++  unsigned R7r82l_3 = I->getOperand(16).getReg();
++  unsigned RI = I->getOperand(17).getReg();
++  unsigned tmp_Dst73 = I->getOperand(18).getReg();
++  unsigned Rimm = I->getOperand(19).getReg();
++  unsigned R70 = I->getOperand(20).getReg();
++  tmp_Dst73 = SrcVecReg;
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SevenMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SevenMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SevenMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SevenMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SevenMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ZeroMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ZeroMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ZeroMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ZeroMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ZeroMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *OneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *OneMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *OneMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *OneMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *OneMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TwoMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TwoMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TwoMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TwoMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TwoMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ThreeMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ThreeMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ThreeMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ThreeMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *ThreeMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FourMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FourMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FourMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FourMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FourMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FiveMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FiveMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FiveMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FiveMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *FiveMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SixMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SixMBB0 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SixMBB1 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SixMBB2 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *SixMBB3 = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, mainMBB);
++  MF->insert(It, SevenMBB);
++  MF->insert(It, SevenMBB3);
++  MF->insert(It, SevenMBB0);
++  MF->insert(It, SevenMBB1);
++  MF->insert(It, SevenMBB2);
++  MF->insert(It, ZeroMBB);
++  MF->insert(It, ZeroMBB3);
++  MF->insert(It, ZeroMBB0);
++  MF->insert(It, ZeroMBB1);
++  MF->insert(It, ZeroMBB2);
++  MF->insert(It, OneMBB);
++  MF->insert(It, OneMBB3);
++  MF->insert(It, OneMBB0);
++  MF->insert(It, OneMBB1);
++  MF->insert(It, OneMBB2);
++  MF->insert(It, TwoMBB);
++  MF->insert(It, TwoMBB3);
++  MF->insert(It, TwoMBB0);
++  MF->insert(It, TwoMBB1);
++  MF->insert(It, TwoMBB2);
++  MF->insert(It, ThreeMBB);
++  MF->insert(It, ThreeMBB3);
++  MF->insert(It, ThreeMBB0);
++  MF->insert(It, ThreeMBB1);
++  MF->insert(It, ThreeMBB2);
++  MF->insert(It, FourMBB);
++  MF->insert(It, FourMBB3);
++  MF->insert(It, FourMBB0);
++  MF->insert(It, FourMBB1);
++  MF->insert(It, FourMBB2);
++  MF->insert(It, FiveMBB);
++  MF->insert(It, FiveMBB3);
++  MF->insert(It, FiveMBB0);
++  MF->insert(It, FiveMBB1);
++  MF->insert(It, FiveMBB2);
++  MF->insert(It, SixMBB);
++  MF->insert(It, SixMBB3);
++  MF->insert(It, SixMBB0);
++  MF->insert(It, SixMBB1);
++  MF->insert(It, SixMBB2);
++  MF->insert(It, sinkMBB);
++  MF->insert(It, exitMBB);
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(mainMBB, BranchProbability::getOne());
++  mainMBB->addSuccessor(SevenMBB);
++  mainMBB->addSuccessor(ZeroMBB);
++  mainMBB->addSuccessor(OneMBB);
++  mainMBB->addSuccessor(TwoMBB);
++  mainMBB->addSuccessor(ThreeMBB);
++  mainMBB->addSuccessor(FourMBB);
++  mainMBB->addSuccessor(FiveMBB);
++  mainMBB->addSuccessor(SixMBB);
++  SevenMBB->addSuccessor(SevenMBB0);
++  SevenMBB->addSuccessor(SevenMBB1);
++  SevenMBB->addSuccessor(SevenMBB2);
++  SevenMBB->addSuccessor(SevenMBB3);
++  SevenMBB0->addSuccessor(sinkMBB);
++  SevenMBB1->addSuccessor(sinkMBB);
++  SevenMBB2->addSuccessor(sinkMBB);
++  SevenMBB3->addSuccessor(sinkMBB);
++  ZeroMBB->addSuccessor(ZeroMBB0);
++  ZeroMBB->addSuccessor(ZeroMBB1);
++  ZeroMBB->addSuccessor(ZeroMBB2);
++  ZeroMBB->addSuccessor(ZeroMBB3);
++  ZeroMBB0->addSuccessor(sinkMBB);
++  ZeroMBB1->addSuccessor(sinkMBB);
++  ZeroMBB2->addSuccessor(sinkMBB);
++  ZeroMBB3->addSuccessor(sinkMBB);
++  OneMBB->addSuccessor(OneMBB0);
++  OneMBB->addSuccessor(OneMBB1);
++  OneMBB->addSuccessor(OneMBB2);
++  OneMBB->addSuccessor(OneMBB3);
++  OneMBB0->addSuccessor(sinkMBB);
++  OneMBB1->addSuccessor(sinkMBB);
++  OneMBB2->addSuccessor(sinkMBB);
++  OneMBB3->addSuccessor(sinkMBB);
++  TwoMBB->addSuccessor(TwoMBB0);
++  TwoMBB->addSuccessor(TwoMBB1);
++  TwoMBB->addSuccessor(TwoMBB2);
++  TwoMBB->addSuccessor(TwoMBB3);
++  TwoMBB0->addSuccessor(sinkMBB);
++  TwoMBB1->addSuccessor(sinkMBB);
++  TwoMBB2->addSuccessor(sinkMBB);
++  TwoMBB3->addSuccessor(sinkMBB);
++  ThreeMBB->addSuccessor(ThreeMBB0);
++  ThreeMBB->addSuccessor(ThreeMBB1);
++  ThreeMBB->addSuccessor(ThreeMBB2);
++  ThreeMBB->addSuccessor(ThreeMBB3);
++  ThreeMBB0->addSuccessor(sinkMBB);
++  ThreeMBB1->addSuccessor(sinkMBB);
++  ThreeMBB2->addSuccessor(sinkMBB);
++  ThreeMBB3->addSuccessor(sinkMBB);
++  FourMBB->addSuccessor(FourMBB0);
++  FourMBB->addSuccessor(FourMBB1);
++  FourMBB->addSuccessor(FourMBB2);
++  FourMBB->addSuccessor(FourMBB3);
++  FourMBB0->addSuccessor(sinkMBB);
++  FourMBB1->addSuccessor(sinkMBB);
++  FourMBB2->addSuccessor(sinkMBB);
++  FourMBB3->addSuccessor(sinkMBB);
++  FiveMBB->addSuccessor(FiveMBB0);
++  FiveMBB->addSuccessor(FiveMBB1);
++  FiveMBB->addSuccessor(FiveMBB2);
++  FiveMBB->addSuccessor(FiveMBB3);
++  FiveMBB0->addSuccessor(sinkMBB);
++  FiveMBB1->addSuccessor(sinkMBB);
++  FiveMBB2->addSuccessor(sinkMBB);
++  FiveMBB3->addSuccessor(sinkMBB);
++  SixMBB->addSuccessor(SixMBB0);
++  SixMBB->addSuccessor(SixMBB1);
++  SixMBB->addSuccessor(SixMBB2);
++  SixMBB->addSuccessor(SixMBB3);
++  SixMBB0->addSuccessor(sinkMBB);
++  SixMBB1->addSuccessor(sinkMBB);
++  SixMBB2->addSuccessor(sinkMBB);
++  SixMBB3->addSuccessor(sinkMBB);
++
++  unsigned SRLI, ADDI, OR, MOD, BLT, ZERO;
++  SRLI = isGP64 ? LoongArch::SRLI_D : LoongArch::SRLI_W;
++  ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
++  OR = isGP64 ? LoongArch::OR : LoongArch::OR32;
++  MOD = isGP64 ? LoongArch::MOD_DU : LoongArch::MOD_WU;
++  BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32;
++  ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO;
++
++  BuildMI(mainMBB, DL, TII->get(SRLI), Rimm).addReg(LaneReg).addImm(2);
++  BuildMI(mainMBB, DL, TII->get(ADDI), R4r).addReg(ZERO).addImm(4);
++  BuildMI(mainMBB, DL, TII->get(OR), Rib).addReg(Rimm).addReg(ZERO);
++  BuildMI(mainMBB, DL, TII->get(MOD), Ris).addReg(Rib).addReg(R4r);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(1);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ZeroMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(2);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(OneMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(3);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(TwoMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(4);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ThreeMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(5);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FourMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(6);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FiveMBB);
++  BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(7);
++  BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(SixMBB);
++  BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB);
++
++  BuildMI(SevenMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(7);
++  BuildMI(SevenMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(SevenMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b1)
++      .addMBB(SevenMBB0);
++  BuildMI(SevenMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(SevenMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b2)
++      .addMBB(SevenMBB1);
++  BuildMI(SevenMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(SevenMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b3)
++      .addMBB(SevenMBB2);
++  BuildMI(SevenMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB3);
++
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0x00fff);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(7);
++  BuildMI(SevenMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xff00f);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(7);
++  BuildMI(SevenMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xffff0);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(7);
++  BuildMI(SevenMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xfffff);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(7);
++  BuildMI(SevenMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ZeroMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(0);
++  BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(ZeroMBB0);
++  BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(ZeroMBB1);
++  BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(ZeroMBB2);
++  BuildMI(ZeroMBB, DL, TII->get(LoongArch::B32)).addMBB(ZeroMBB3);
++
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(0);
++  BuildMI(ZeroMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(0);
++  BuildMI(ZeroMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(0);
++  BuildMI(ZeroMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(0);
++  BuildMI(ZeroMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(OneMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(1);
++  BuildMI(OneMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(OneMBB0);
++  BuildMI(OneMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(OneMBB1);
++  BuildMI(OneMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(OneMBB2);
++  BuildMI(OneMBB, DL, TII->get(LoongArch::B32)).addMBB(OneMBB3);
++
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(1);
++  BuildMI(OneMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(1);
++  BuildMI(OneMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(1);
++  BuildMI(OneMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(1);
++  BuildMI(OneMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(TwoMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(2);
++  BuildMI(TwoMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(TwoMBB0);
++  BuildMI(TwoMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(TwoMBB1);
++  BuildMI(TwoMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(TwoMBB2);
++  BuildMI(TwoMBB, DL, TII->get(LoongArch::B32)).addMBB(TwoMBB3);
++
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(2);
++  BuildMI(TwoMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(2);
++  BuildMI(TwoMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(2);
++  BuildMI(TwoMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(2);
++  BuildMI(TwoMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ThreeMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(3);
++  BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(ThreeMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b1)
++      .addMBB(ThreeMBB0);
++  BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(ThreeMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b2)
++      .addMBB(ThreeMBB1);
++  BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(ThreeMBB, DL, TII->get(BLT))
++      .addReg(Ris)
++      .addReg(R7b3)
++      .addMBB(ThreeMBB2);
++  BuildMI(ThreeMBB, DL, TII->get(LoongArch::B32)).addMBB(ThreeMBB3);
++
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0x00fff);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(3);
++  BuildMI(ThreeMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xff00f);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(3);
++  BuildMI(ThreeMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xffff0);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(3);
++  BuildMI(ThreeMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3)
++      .addImm(0xfffff);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(3);
++  BuildMI(ThreeMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FourMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(4);
++  BuildMI(FourMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FourMBB0);
++  BuildMI(FourMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FourMBB1);
++  BuildMI(FourMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FourMBB2);
++  BuildMI(FourMBB, DL, TII->get(LoongArch::B32)).addMBB(FourMBB3);
++
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(4);
++  BuildMI(FourMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(4);
++  BuildMI(FourMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(4);
++  BuildMI(FourMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(4);
++  BuildMI(FourMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FiveMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(5);
++  BuildMI(FiveMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FiveMBB0);
++  BuildMI(FiveMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FiveMBB1);
++  BuildMI(FiveMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FiveMBB2);
++  BuildMI(FiveMBB, DL, TII->get(LoongArch::B32)).addMBB(FiveMBB3);
++
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(5);
++  BuildMI(FiveMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(5);
++  BuildMI(FiveMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(5);
++  BuildMI(FiveMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(5);
++  BuildMI(FiveMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SixMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70)
++      .addReg(SrcVecReg)
++      .addImm(6);
++  BuildMI(SixMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1);
++  BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(SixMBB0);
++  BuildMI(SixMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2);
++  BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(SixMBB1);
++  BuildMI(SixMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3);
++  BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(SixMBB2);
++  BuildMI(SixMBB, DL, TII->get(LoongArch::B32)).addMBB(SixMBB3);
++
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80_3);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(6);
++  BuildMI(SixMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(8);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xfff);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(6);
++  BuildMI(SixMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(16);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0x0ff);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(6);
++  BuildMI(SixMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3)
++      .addReg(SrcValReg)
++      .addImm(24);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3)
++      .addReg(R7r80_3)
++      .addImm(24);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3)
++      .addReg(R7r81l_3)
++      .addImm(0xf00);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3)
++      .addReg(R70)
++      .addReg(R7r81_3);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::OR32), R7r82_3)
++      .addReg(R7r82l_3)
++      .addReg(R7r80l_3);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73)
++      .addReg(SrcVecReg)
++      .addReg(R7r82_3)
++      .addImm(6);
++  BuildMI(SixMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB);
++
++  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
++
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest)
++      .addReg(tmp_Dst73)
++      .addImm(0);
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *mainMBB);
++  computeAndAddLiveIns(LiveRegs, *SevenMBB);
++  computeAndAddLiveIns(LiveRegs, *SevenMBB0);
++  computeAndAddLiveIns(LiveRegs, *SevenMBB1);
++  computeAndAddLiveIns(LiveRegs, *SevenMBB2);
++  computeAndAddLiveIns(LiveRegs, *SevenMBB3);
++  computeAndAddLiveIns(LiveRegs, *ZeroMBB);
++  computeAndAddLiveIns(LiveRegs, *ZeroMBB0);
++  computeAndAddLiveIns(LiveRegs, *ZeroMBB1);
++  computeAndAddLiveIns(LiveRegs, *ZeroMBB2);
++  computeAndAddLiveIns(LiveRegs, *ZeroMBB3);
++  computeAndAddLiveIns(LiveRegs, *OneMBB);
++  computeAndAddLiveIns(LiveRegs, *OneMBB0);
++  computeAndAddLiveIns(LiveRegs, *OneMBB1);
++  computeAndAddLiveIns(LiveRegs, *OneMBB2);
++  computeAndAddLiveIns(LiveRegs, *OneMBB3);
++  computeAndAddLiveIns(LiveRegs, *TwoMBB);
++  computeAndAddLiveIns(LiveRegs, *TwoMBB0);
++  computeAndAddLiveIns(LiveRegs, *TwoMBB1);
++  computeAndAddLiveIns(LiveRegs, *TwoMBB2);
++  computeAndAddLiveIns(LiveRegs, *TwoMBB3);
++  computeAndAddLiveIns(LiveRegs, *ThreeMBB);
++  computeAndAddLiveIns(LiveRegs, *ThreeMBB0);
++  computeAndAddLiveIns(LiveRegs, *ThreeMBB1);
++  computeAndAddLiveIns(LiveRegs, *ThreeMBB2);
++  computeAndAddLiveIns(LiveRegs, *ThreeMBB3);
++  computeAndAddLiveIns(LiveRegs, *FourMBB);
++  computeAndAddLiveIns(LiveRegs, *FourMBB0);
++  computeAndAddLiveIns(LiveRegs, *FourMBB1);
++  computeAndAddLiveIns(LiveRegs, *FourMBB2);
++  computeAndAddLiveIns(LiveRegs, *FourMBB3);
++  computeAndAddLiveIns(LiveRegs, *FiveMBB);
++  computeAndAddLiveIns(LiveRegs, *FiveMBB0);
++  computeAndAddLiveIns(LiveRegs, *FiveMBB1);
++  computeAndAddLiveIns(LiveRegs, *FiveMBB2);
++  computeAndAddLiveIns(LiveRegs, *FiveMBB3);
++  computeAndAddLiveIns(LiveRegs, *SixMBB);
++  computeAndAddLiveIns(LiveRegs, *SixMBB0);
++  computeAndAddLiveIns(LiveRegs, *SixMBB1);
++  computeAndAddLiveIns(LiveRegs, *SixMBB2);
++  computeAndAddLiveIns(LiveRegs, *SixMBB3);
++  computeAndAddLiveIns(LiveRegs, *sinkMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandAtomicBinOpSubword(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++  unsigned LL, SC;
++  unsigned BEQ = LoongArch::BEQ32;
++  unsigned SEOp = LoongArch::EXT_W_H32;
++
++  LL = LoongArch::LL_W;
++  SC = LoongArch::SC_W;
++
++  bool IsSwap = false;
++  bool IsNand = false;
++  bool IsMAX = false;
++  bool IsMIN = false;
++  bool IsUnsigned = false;
++
++  unsigned Opcode = 0;
++  switch (I->getOpcode()) {
++  case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA:
++    IsNand = true;
++    break;
++  case LoongArch::ATOMIC_SWAP_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_SWAP_I16_POSTRA:
++    IsSwap = true;
++    break;
++  case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA:
++    Opcode = LoongArch::ADD_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_W;
++    IsMAX = true;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_W;
++    IsMIN = true;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_WU;
++    IsMAX = true;
++    IsUnsigned = true;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_WU;
++    IsMIN = true;
++    IsUnsigned = true;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA:
++    Opcode = LoongArch::SUB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA:
++    Opcode = LoongArch::AND32;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA:
++    Opcode = LoongArch::OR32;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA:
++    SEOp = LoongArch::EXT_W_B32;
++    LLVM_FALLTHROUGH;
++  case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA:
++    Opcode = LoongArch::XOR32;
++    break;
++  default:
++    llvm_unreachable("Unknown subword atomic pseudo for expansion!");
++  }
++
++  unsigned Dest = I->getOperand(0).getReg();
++  unsigned Ptr = I->getOperand(1).getReg();
++  unsigned Incr = I->getOperand(2).getReg();
++  unsigned Mask = I->getOperand(3).getReg();
++  unsigned Mask2 = I->getOperand(4).getReg();
++  unsigned ShiftAmnt = I->getOperand(5).getReg();
++  unsigned OldVal = I->getOperand(6).getReg();
++  unsigned BinOpRes = I->getOperand(7).getReg();
++  unsigned StoreVal = I->getOperand(8).getReg();
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loopMBB);
++  MF->insert(It, sinkMBB);
++  MF->insert(It, exitMBB);
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(loopMBB, BranchProbability::getOne());
++  loopMBB->addSuccessor(sinkMBB);
++  loopMBB->addSuccessor(loopMBB);
++  loopMBB->normalizeSuccProbs();
++
++  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
++  if (IsNand) {
++    //  and andres, oldval, incr2
++    //  nor binopres, $0, andres
++    //  and newval, binopres, mask
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(LoongArch::NOR32), BinOpRes)
++        .addReg(LoongArch::ZERO)
++        .addReg(BinOpRes);
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes)
++        .addReg(BinOpRes)
++        .addReg(Mask);
++  } else if (IsMAX || IsMIN) {
++
++    unsigned SLTScratch4 = IsUnsigned ? LoongArch::SLTU32 : LoongArch::SLT32;
++    unsigned CMPIncr = IsMAX ? LoongArch::MASKEQZ32 : LoongArch::MASKNEZ32;
++    unsigned CMPOldVal = IsMAX ? LoongArch::MASKNEZ32 : LoongArch::MASKEQZ32;
++
++    unsigned Scratch4 = I->getOperand(9).getReg();
++    unsigned Scratch5 = I->getOperand(10).getReg();
++
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Scratch5)
++        .addReg(OldVal)
++        .addReg(Mask);
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Incr)
++        .addReg(Incr)
++        .addReg(Mask);
++    BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4)
++        .addReg(Scratch5)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(CMPOldVal), BinOpRes)
++        .addReg(Scratch5)
++        .addReg(Scratch4);
++    BuildMI(loopMBB, DL, TII->get(CMPIncr), Scratch4)
++        .addReg(Incr)
++        .addReg(Scratch4);
++    BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), BinOpRes)
++        .addReg(BinOpRes)
++        .addReg(Scratch4);
++
++  } else if (!IsSwap) {
++    //  <binop> binopres, oldval, incr2
++    //  and newval, binopres, mask
++    BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes)
++        .addReg(BinOpRes)
++        .addReg(Mask);
++  } else { // atomic.swap
++    //  and newval, incr2, mask
++    BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes)
++        .addReg(Incr)
++        .addReg(Mask);
++  }
++
++  // and StoreVal, OlddVal, Mask2
++  // or StoreVal, StoreVal, BinOpRes
++  // StoreVal<tied1> = sc StoreVal, 0(Ptr)
++  // beq StoreVal, zero, loopMBB
++  BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), StoreVal)
++      .addReg(OldVal)
++      .addReg(Mask2);
++  BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), StoreVal)
++      .addReg(StoreVal)
++      .addReg(BinOpRes);
++  BuildMI(loopMBB, DL, TII->get(SC), StoreVal)
++      .addReg(StoreVal)
++      .addReg(Ptr)
++      .addImm(0);
++  BuildMI(loopMBB, DL, TII->get(BEQ))
++      .addReg(StoreVal)
++      .addReg(LoongArch::ZERO)
++      .addMBB(loopMBB);
++
++  //  sinkMBB:
++  //    and     maskedoldval1,oldval,mask
++  //    srl     srlres,maskedoldval1,shiftamt
++  //    sign_extend dest,srlres
++
++  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
++
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::AND32), Dest)
++      .addReg(OldVal)
++      .addReg(Mask);
++  BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest)
++      .addReg(Dest)
++      .addReg(ShiftAmnt);
++
++  BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loopMBB);
++  computeAndAddLiveIns(LiveRegs, *sinkMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
++                                         MachineBasicBlock::iterator I,
++                                         MachineBasicBlock::iterator &NMBBI,
++                                         unsigned Size) {
++  MachineFunction *MF = BB.getParent();
++
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned LL, SC, ZERO, BEQ, SUB;
++  if (Size == 4) {
++    LL = LoongArch::LL_W;
++    SC = LoongArch::SC_W;
++    BEQ = LoongArch::BEQ32;
++    ZERO = LoongArch::ZERO;
++    SUB = LoongArch::SUB_W;
++  } else {
++    LL = LoongArch::LL_D;
++    SC = LoongArch::SC_D;
++    ZERO = LoongArch::ZERO_64;
++    BEQ = LoongArch::BEQ;
++    SUB = LoongArch::SUB_D;
++  }
++
++  unsigned OldVal = I->getOperand(0).getReg();
++  unsigned Ptr = I->getOperand(1).getReg();
++  unsigned Incr = I->getOperand(2).getReg();
++  unsigned Scratch = I->getOperand(3).getReg();
++
++  unsigned Opcode = 0;
++  unsigned OR = 0;
++  unsigned AND = 0;
++  unsigned NOR = 0;
++  bool IsNand = false;
++  bool IsSub = false;
++  switch (I->getOpcode()) {
++  case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA:
++    Opcode = LoongArch::AMADD_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA:
++    IsSub = true;
++    Opcode = LoongArch::AMADD_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA:
++    Opcode = LoongArch::AMAND_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA:
++    Opcode = LoongArch::AMOR_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA:
++    Opcode = LoongArch::AMXOR_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA:
++    IsNand = true;
++    AND = LoongArch::AND32;
++    NOR = LoongArch::NOR32;
++    break;
++  case LoongArch::ATOMIC_SWAP_I32_POSTRA:
++    OR = LoongArch::AMSWAP_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_W;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_WU;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_WU;
++    break;
++  case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA:
++    Opcode = LoongArch::AMADD_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA:
++    IsSub = true;
++    Opcode = LoongArch::AMADD_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA:
++    Opcode = LoongArch::AMAND_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA:
++    Opcode = LoongArch::AMOR_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA:
++    Opcode = LoongArch::AMXOR_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA:
++    IsNand = true;
++    AND = LoongArch::AND;
++    NOR = LoongArch::NOR;
++    break;
++  case LoongArch::ATOMIC_SWAP_I64_POSTRA:
++    OR = LoongArch::AMSWAP_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_D;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA:
++    Opcode = LoongArch::AMMAX_DB_DU;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA:
++    Opcode = LoongArch::AMMIN_DB_DU;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic!");
++  }
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loopMBB);
++  MF->insert(It, exitMBB);
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(loopMBB, BranchProbability::getOne());
++  loopMBB->addSuccessor(exitMBB);
++  loopMBB->addSuccessor(loopMBB);
++  loopMBB->normalizeSuccProbs();
++
++  assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
++  assert((OldVal != Incr) && "Clobbered the wrong reg!");
++  if (Opcode) {
++    if(IsSub){
++      BuildMI(loopMBB, DL, TII->get(SUB), Scratch).addReg(ZERO).addReg(Incr);
++      BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Scratch).addReg(Ptr).addImm(0);
++    }
++    else{
++      BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Incr).addReg(Ptr).addImm(0);
++    }
++  } else if (IsNand) {
++    assert(AND && NOR &&
++           "Unknown nand instruction for atomic pseudo expansion");
++    BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
++    BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch);
++    BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0);
++    BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB);
++  } else {
++    assert(OR && "Unknown instruction for atomic pseudo expansion!");
++    BuildMI(loopMBB, DL, TII->get(OR), OldVal).addReg(Incr).addReg(Ptr).addImm(0);
++  }
++
++
++  NMBBI = BB.end();
++  I->eraseFromParent();
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loopMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandLoadAddr(MachineBasicBlock &BB,
++                                           MachineBasicBlock::iterator I,
++                                           MachineBasicBlock::iterator &NMBBI) {
++  MachineFunction *MF = BB.getParent();
++  MachineInstr &MI = *I;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Op = MI.getOpcode();
++  unsigned DestReg = MI.getOperand(0).getReg();
++  unsigned TmpReg;
++  const MachineOperand &MO = MI.getOperand(1);
++  Reloc::Model RM = MF->getTarget().getRelocationModel();
++
++  MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5;
++  unsigned HiFlag, LoFlag, HigherFlag, HighestFlag;
++  unsigned HiOp, LoOp, HigherOp, HighestOp, LastOp;
++
++  HiOp = LoongArch::PCALAU12I_ri;
++  LoOp = LoongArch::ORI_rri;
++  HigherOp = LoongArch::LU32I_D_ri;
++  HighestOp = LoongArch::LU52I_D_rri;
++
++  switch (Op) {
++  case LoongArch::LoadAddrLocal:
++    if (RM == Reloc::Static) { // for jit
++      HiFlag = LoongArchII::MO_ABS_HI;
++      LoFlag = LoongArchII::MO_ABS_LO;
++      HigherFlag = LoongArchII::MO_ABS_HIGHER;
++      HighestFlag = LoongArchII::MO_ABS_HIGHEST;
++      // lu12i.w + ori + lu32i.d + lu52i.d
++      HiOp = LoongArch::LU12I_W;
++      LoOp = LoongArch::ORI;
++      HigherOp = LoongArch::LU32I_D;
++      HighestOp = LoongArch::LU52I_D;
++    } else {
++      // pcalau12i + addi.d
++      LoFlag = LoongArchII::MO_PCREL_LO;
++      HiFlag = LoongArchII::MO_PCREL_HI;
++      LoOp = LoongArch::ADDI_D_rri;
++    }
++    break;
++  case LoongArch::LoadAddrLocalRR:
++    // pcalau12i + ori + lu32i.d + lu52i.d + add.d
++    LoFlag = LoongArchII::MO_PCREL_RRLO;
++    HiFlag = LoongArchII::MO_PCREL_RRHI;
++    HigherFlag = LoongArchII::MO_PCREL_RRHIGHER;
++    HighestFlag = LoongArchII::MO_PCREL_RRHIGHEST;
++    LastOp = LoongArch::ADD_D_rrr;
++    break;
++  case LoongArch::LoadAddrGlobal:
++  case LoongArch::LoadAddrGlobal_Alias:
++    // pcalau12i + ld.d
++    LoFlag = LoongArchII::MO_GOT_LO;
++    HiFlag = LoongArchII::MO_GOT_HI;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::LD_D_rri;
++    break;
++  case LoongArch::LoadAddrGlobalRR:
++    // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d
++    LoFlag = LoongArchII::MO_GOT_RRLO;
++    HiFlag = LoongArchII::MO_GOT_RRHI;
++    HigherFlag = LoongArchII::MO_GOT_RRHIGHER;
++    HighestFlag = LoongArchII::MO_GOT_RRHIGHEST;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::ORI_rri;
++    HigherOp = LoongArch::LU32I_D_ri;
++    HighestOp = LoongArch::LU52I_D_rri;
++    LastOp = LoongArch::LDX_D_rrr;
++    break;
++  case LoongArch::LoadAddrTLS_LE:
++    // lu12i.w + ori + lu32i.d + lu52i.d
++    LoFlag = LoongArchII::MO_TLSLE_LO;
++    HiFlag = LoongArchII::MO_TLSLE_HI;
++    HigherFlag = LoongArchII::MO_TLSLE_HIGHER;
++    HighestFlag = LoongArchII::MO_TLSLE_HIGHEST;
++    HiOp = LoongArch::LU12I_W_ri;
++    break;
++  case LoongArch::LoadAddrTLS_IE:
++    // pcalau12i + ld.d
++    LoFlag = LoongArchII::MO_TLSIE_LO;
++    HiFlag = LoongArchII::MO_TLSIE_HI;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::LD_D_rri;
++    break;
++  case LoongArch::LoadAddrTLS_IE_RR:
++    // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d
++    LoFlag = LoongArchII::MO_TLSIE_RRLO;
++    HiFlag = LoongArchII::MO_TLSIE_RRHI;
++    HigherFlag = LoongArchII::MO_TLSIE_RRHIGHER;
++    HighestFlag = LoongArchII::MO_TLSIE_RRHIGHEST;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::ORI_rri;
++    HigherOp = LoongArch::LU32I_D_ri;
++    HighestOp = LoongArch::LU52I_D_rri;
++    LastOp = LoongArch::LDX_D_rrr;
++    break;
++  case LoongArch::LoadAddrTLS_LD:
++  case LoongArch::LoadAddrTLS_GD:
++    // pcalau12i + addi.d
++    LoFlag = LoongArchII::MO_TLSGD_LO;
++    HiFlag = LoongArchII::MO_TLSGD_HI;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::ADDI_D_rri;
++    break;
++  case LoongArch::LoadAddrTLS_LD_RR:
++  case LoongArch::LoadAddrTLS_GD_RR:
++    // pcalau12i + ori + lu32i.d + lu52i.d + add.d
++    LoFlag = LoongArchII::MO_TLSGD_RRLO;
++    HiFlag = LoongArchII::MO_TLSGD_RRHI;
++    HigherFlag = LoongArchII::MO_TLSGD_RRHIGHER;
++    HighestFlag = LoongArchII::MO_TLSGD_RRHIGHEST;
++    HiOp = LoongArch::PCALAU12I_ri;
++    LoOp = LoongArch::ORI_rri;
++    HigherOp = LoongArch::LU32I_D_ri;
++    HighestOp = LoongArch::LU52I_D_rri;
++    LastOp = LoongArch::ADD_D_rrr;
++    break;
++  default:
++    break;
++  }
++
++  MIB1 = BuildMI(BB, I, DL, TII->get(HiOp), DestReg);
++
++  switch (Op) {
++  case LoongArch::LoadAddrLocal:
++    if (RM == Reloc::Static) { // for jit
++      // la.abs rd, symbol
++      MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg).addReg(DestReg);
++      MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg);
++      MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg).addReg(DestReg);
++      if (MO.isJTI()) {
++        MIB1.addJumpTableIndex(MO.getIndex(), HiFlag);
++        MIB2.addJumpTableIndex(MO.getIndex(), LoFlag);
++        MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag);
++        MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag);
++      } else if (MO.isBlockAddress()) {
++        MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag);
++        MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag);
++        MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag);
++        MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag);
++      } else {
++        MIB1.addDisp(MO, 0, HiFlag);
++        MIB2.addDisp(MO, 0, LoFlag);
++        MIB3.addDisp(MO, 0, HigherFlag);
++        MIB4.addDisp(MO, 0, HighestFlag);
++      }
++      break;
++    }
++    LLVM_FALLTHROUGH;
++  case LoongArch::LoadAddrGlobal: // la.global rd, symbol
++  case LoongArch::LoadAddrGlobal_Alias: // la rd, symbol
++  case LoongArch::LoadAddrTLS_IE: // la.tls.ie rd, symbol
++  case LoongArch::LoadAddrTLS_LD: // la.tls.ld rd, symbol
++  case LoongArch::LoadAddrTLS_GD: // la.tls.gd rd, symbol
++    MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg)
++      .addReg(DestReg);
++    if (MO.isJTI()) {
++      MIB1.addJumpTableIndex(MO.getIndex(), HiFlag);
++      MIB2.addJumpTableIndex(MO.getIndex(), LoFlag);
++    } else if (MO.isBlockAddress()) {
++      MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag);
++      MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag);
++    } else {
++      MIB1.addDisp(MO, 0, HiFlag);
++      MIB2.addDisp(MO, 0, LoFlag);
++    }
++    break;
++
++  case LoongArch::LoadAddrLocalRR: //la.local rd, rs, symbol
++  case LoongArch::LoadAddrGlobalRR: // la.global rd, rs, symbol
++  case LoongArch::LoadAddrTLS_IE_RR: // la.tls.ie rd, rs, symbol
++  case LoongArch::LoadAddrTLS_LD_RR: // la.tls.ld rd, rs, symbol
++  case LoongArch::LoadAddrTLS_GD_RR: // la.tls.gd rd, rs, symbol
++    TmpReg = MI.getOperand(MI.getNumOperands()-1).getReg();
++    MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), TmpReg)
++                  .addReg(TmpReg);
++    MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), TmpReg);
++    MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), TmpReg)
++                  .addReg(TmpReg);
++    MIB5 = BuildMI(BB, I, DL, TII->get(LastOp), DestReg)
++                  .addReg(DestReg)
++                  .addReg(TmpReg);
++    if (MO.isJTI()) {
++      MIB1.addJumpTableIndex(MO.getIndex(), HiFlag);
++      MIB2.addJumpTableIndex(MO.getIndex(), LoFlag);
++      MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag);
++      MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag);
++    } else if (MO.isBlockAddress()) {
++      MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag);
++      MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag);
++      MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag);
++      MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag);
++    } else {
++      MIB1.addDisp(MO, 0, HiFlag);
++      MIB2.addDisp(MO, 0, LoFlag);
++      MIB3.addDisp(MO, 0, HigherFlag);
++      MIB4.addDisp(MO, 0, HighestFlag);
++    }
++    break;
++  case LoongArch::LoadAddrTLS_LE: // la.tls.le rd, symbol
++    MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg)
++                  .addReg(DestReg);
++    MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg);
++    MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg)
++                  .addReg(DestReg);
++    if (MO.isJTI()) {
++      MIB1.addJumpTableIndex(MO.getIndex(), HiFlag);
++      MIB2.addJumpTableIndex(MO.getIndex(), LoFlag);
++      MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag);
++      MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag);
++    } else if (MO.isBlockAddress()) {
++      MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag);
++      MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag);
++      MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag);
++      MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag);
++    } else {
++      MIB1.addDisp(MO, 0, HiFlag);
++      MIB2.addDisp(MO, 0, LoFlag);
++      MIB3.addDisp(MO, 0, HigherFlag);
++      MIB4.addDisp(MO, 0, HighestFlag);
++    }
++    break;
++  default:
++    break;
++  }
++
++  MI.eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandPseudoTailCall(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I) {
++
++  MachineInstr &MI = *I;
++  DebugLoc DL = MI.getDebugLoc();
++
++  const MachineOperand &MO = MI.getOperand(0);
++
++  unsigned NoFlag = LoongArchII::MO_NO_FLAG;
++
++  MachineInstrBuilder MIB =
++      BuildMI(BB, I, DL, TII->get(LoongArch::PseudoTailReturn));
++
++  if (MO.isSymbol()) {
++    MIB.addExternalSymbol(MO.getSymbolName(), NoFlag);
++  } else {
++    MIB.addDisp(MO, 0, NoFlag);
++  }
++
++  MI.eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandPseudoCall(MachineBasicBlock &BB,
++                                           MachineBasicBlock::iterator I,
++                                           MachineBasicBlock::iterator &NMBBI) {
++  MachineFunction *MF = BB.getParent();
++  MachineInstr &MI = *I;
++  DebugLoc DL = MI.getDebugLoc();
++  CodeModel::Model M = MF->getTarget().getCodeModel();
++  Reloc::Model RM = MF->getTarget().getRelocationModel();
++
++  unsigned Ra = LoongArch::RA_64;
++  const MachineOperand &MO = MI.getOperand(0);
++  unsigned HiFlag, LoFlag, HigherFlag, HighestFlag, NoFlag;
++
++  NoFlag = LoongArchII::MO_NO_FLAG;
++
++  if (RM == Reloc::Static) { // for jit
++    MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5;
++
++    HiFlag = LoongArchII::MO_ABS_HI;
++    LoFlag = LoongArchII::MO_ABS_LO;
++    HigherFlag = LoongArchII::MO_ABS_HIGHER;
++    HighestFlag = LoongArchII::MO_ABS_HIGHEST;
++    // lu12i.w + ori + lu32i.d + lu52i.d + jirl
++
++    MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::LU12I_W), Ra);
++    MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::ORI), Ra)
++                  .addReg(Ra);
++    MIB3 = BuildMI(BB, I, DL, TII->get(LoongArch::LU32I_D), Ra);
++    MIB4 = BuildMI(BB, I, DL, TII->get(LoongArch::LU52I_D), Ra)
++                  .addReg(Ra);
++    MIB5 =
++        BuildMI(BB, I, DL, TII->get(LoongArch::JIRL), Ra).addReg(Ra).addImm(0);
++    if (MO.isSymbol()) {
++      MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag);
++      MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag);
++      MIB3.addExternalSymbol(MO.getSymbolName(), HigherFlag);
++      MIB4.addExternalSymbol(MO.getSymbolName(), HighestFlag);
++    } else {
++      MIB1.addDisp(MO, 0, HiFlag);
++      MIB2.addDisp(MO, 0, LoFlag);
++      MIB3.addDisp(MO, 0, HigherFlag);
++      MIB4.addDisp(MO, 0, HighestFlag);
++    }
++  } else {
++    // bl
++    MachineInstrBuilder MIB1;
++    MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::BL));
++    if (MO.isSymbol()) {
++      MIB1.addExternalSymbol(MO.getSymbolName(), NoFlag);
++    } else {
++      MIB1.addDisp(MO, 0, NoFlag);
++    }
++  }
++
++  MI.eraseFromParent();
++
++  return true;
++}
++
++bool LoongArchExpandPseudo::expandPseudoTEQ(MachineBasicBlock &BB,
++                                           MachineBasicBlock::iterator I,
++                                           MachineBasicBlock::iterator &NMBBI) {
++  MachineInstr &MI = *I;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Divisor = MI.getOperand(0).getReg();
++  unsigned BneOp = LoongArch::BNE;
++  unsigned Zero = LoongArch::ZERO_64;
++
++  // beq $Divisor, $zero, 8
++  BuildMI(BB, I, DL, TII->get(BneOp), Divisor)
++    .addReg(Zero)
++    .addImm(8);
++  // break 7
++  BuildMI(BB, I, DL, TII->get(LoongArch::BREAK))
++    .addImm(7);;
++
++  MI.eraseFromParent();
++
++  return true;
++}
++bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator MBBI,
++                                MachineBasicBlock::iterator &NMBB) {
++
++  bool Modified = false;
++
++  switch (MBBI->getOpcode()) {
++  case LoongArch::PseudoTEQ:
++    return expandPseudoTEQ(MBB, MBBI, NMBB);
++  case LoongArch::PseudoCall:
++    return expandPseudoCall(MBB, MBBI, NMBB);
++  case LoongArch::PseudoTailCall:
++    return expandPseudoTailCall(MBB, MBBI);
++  case LoongArch::LoadAddrLocal:
++  case LoongArch::LoadAddrLocalRR:
++  case LoongArch::LoadAddrGlobal:
++  case LoongArch::LoadAddrGlobalRR:
++  case LoongArch::LoadAddrGlobal_Alias:
++  case LoongArch::LoadAddrTLS_LD:
++  case LoongArch::LoadAddrTLS_LD_RR:
++  case LoongArch::LoadAddrTLS_GD:
++  case LoongArch::LoadAddrTLS_GD_RR:
++  case LoongArch::LoadAddrTLS_IE:
++  case LoongArch::LoadAddrTLS_IE_RR:
++  case LoongArch::LoadAddrTLS_LE:
++    return expandLoadAddr(MBB, MBBI, NMBB);
++  case LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA:
++  case LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA:
++    return expandAtomicCmpSwap(MBB, MBBI, NMBB);
++  case LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA:
++  case LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA:
++    return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB);
++  case LoongArch::ATOMIC_SWAP_I8_POSTRA:
++  case LoongArch::ATOMIC_SWAP_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA:
++    return expandAtomicBinOpSubword(MBB, MBBI, NMBB);
++  case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA:
++  case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA:
++    return expandXINSERT_BOp(MBB, MBBI, NMBB);
++  case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA:
++    return expandINSERT_HOp(MBB, MBBI, NMBB);
++  case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA:
++  case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA:
++    return expandXINSERT_FWOp(MBB, MBBI, NMBB);
++  case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA:
++  case LoongArch::ATOMIC_SWAP_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA:
++    return expandAtomicBinOp(MBB, MBBI, NMBB, 4);
++  case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA:
++  case LoongArch::ATOMIC_SWAP_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA:
++  case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA:
++    return expandAtomicBinOp(MBB, MBBI, NMBB, 8);
++  default:
++    return Modified;
++  }
++}
++
++bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
++  bool Modified = false;
++
++  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
++  while (MBBI != E) {
++    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
++    Modified |= expandMI(MBB, MBBI, NMBBI);
++    MBBI = NMBBI;
++  }
++
++  return Modified;
++}
++
++bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
++  STI = &static_cast<const LoongArchSubtarget &>(MF.getSubtarget());
++  TII = STI->getInstrInfo();
++
++  bool Modified = false;
++  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++       ++MFI)
++    Modified |= expandMBB(*MFI);
++
++  if (Modified)
++    MF.RenumberBlocks();
++
++  return Modified;
++}
++
++/// createLoongArchExpandPseudoPass - returns an instance of the pseudo instruction
++/// expansion pass.
++FunctionPass *llvm::createLoongArchExpandPseudoPass() {
++  return new LoongArchExpandPseudo();
++}
+diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+new file mode 100644
+index 00000000..c08962a6
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+@@ -0,0 +1,546 @@
++//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information --------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of TargetFrameLowering class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchFrameLowering.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "LoongArchInstrInfo.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchTargetMachine.h"
++#include "LoongArchRegisterInfo.h"
++#include "LoongArchSubtarget.h"
++#include "llvm/ADT/BitVector.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/RegisterScavenging.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/DebugLoc.h"
++#include "llvm/IR/Function.h"
++#include "llvm/MC/MCDwarf.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MachineLocation.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Target/TargetOptions.h"
++#include <cassert>
++#include <cstdint>
++#include <utility>
++#include <vector>
++
++using namespace llvm;
++
++// We would like to split the SP adjustment to reduce prologue/epilogue
++// as following instructions. In this way, the offset of the callee saved
++// register could fit in a single store.
++uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount(
++    const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
++  uint64_t StackSize = MFI.getStackSize();
++
++  // Return the FirstSPAdjustAmount if the StackSize can not fit in signed
++  // 12-bit and there exists a callee saved register need to be pushed.
++  if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
++    // FirstSPAdjustAmount is choosed as (2048 - StackAlign)
++    // because 2048 will cause sp = sp + 2048 in epilogue split into
++    // multi-instructions. The offset smaller than 2048 can fit in signle
++    // load/store instruction and we have to stick with the stack alignment.
++    return 2048 - STI.getStackAlignment().value();
++  }
++  return 0;
++}
++
++//===----------------------------------------------------------------------===//
++//
++// Stack Frame Processing methods
++// +----------------------------+
++//
++// The stack is allocated decrementing the stack pointer on
++// the first instruction of a function prologue. Once decremented,
++// all stack references are done thought a positive offset
++// from the stack/frame pointer, so the stack is considering
++// to grow up! Otherwise terrible hacks would have to be made
++// to get this stack ABI compliant :)
++//
++//  The stack frame required by the ABI (after call):
++//  Offset
++//
++//  0                 ----------
++//  4                 Args to pass
++//  .                 Alloca allocations
++//  .                 Local Area
++//  .                 CPU "Callee Saved" Registers
++//  .                 saved FP
++//  .                 saved RA
++//  .                 FPU "Callee Saved" Registers
++//  StackSize         -----------
++//
++// Offset - offset from sp after stack allocation on function prologue
++//
++// The sp is the stack pointer subtracted/added from the stack size
++// at the Prologue/Epilogue
++//
++// References to the previous stack (to obtain arguments) are done
++// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
++//
++// Examples:
++// - reference to the actual stack frame
++//   for any local area var there is smt like : FI >= 0, StackOffset: 4
++//     st.w REGX, SP, 4
++//
++// - reference to previous stack frame
++//   suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
++//   The emitted instruction will be something like:
++//     ld.w REGX, SP, 16+StackSize
++//
++// Since the total stack size is unknown on LowerFormalArguments, all
++// stack references (ObjectOffset) created to reference the function
++// arguments, are negative numbers. This way, on eliminateFrameIndex it's
++// possible to detect those references and the offsets are adjusted to
++// their real location.
++//
++//===----------------------------------------------------------------------===//
++//
++LoongArchFrameLowering::LoongArchFrameLowering(const LoongArchSubtarget &STI)
++      : TargetFrameLowering(StackGrowsDown, STI.getStackAlignment(), 0,
++                                    STI.getStackAlignment()), STI(STI) {}
++
++void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
++                                          MachineBasicBlock &MBB) const {
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  const LoongArchInstrInfo &TII =
++      *static_cast<const LoongArchInstrInfo *>(STI.getInstrInfo());
++  const LoongArchRegisterInfo &RegInfo =
++      *static_cast<const LoongArchRegisterInfo *>(STI.getRegisterInfo());
++  MachineBasicBlock::iterator MBBI = MBB.begin();
++  DebugLoc dl;
++  LoongArchABIInfo ABI = STI.getABI();
++  unsigned SP = ABI.GetStackPtr();
++  unsigned FP = ABI.GetFramePtr();
++  unsigned ZERO = ABI.GetNullPtr();
++  unsigned MOVE = ABI.GetGPRMoveOp();
++  unsigned ADDI = ABI.GetPtrAddiOp();
++  unsigned AND = ABI.IsLP64() ? LoongArch::AND : LoongArch::AND32;
++  unsigned SLLI = ABI.IsLP64() ? LoongArch::SLLI_D : LoongArch::SLLI_W;
++
++  const TargetRegisterClass *RC = ABI.ArePtrs64bit() ?
++        &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++
++  // First, compute final stack size.
++  uint64_t StackSize = MFI.getStackSize();
++
++  // No need to allocate space on the stack.
++  if (StackSize == 0 && !MFI.adjustsStack())
++    return;
++
++  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
++  // Split the SP adjustment to reduce the offsets of callee saved spill.
++  if (FirstSPAdjustAmount)
++    StackSize = FirstSPAdjustAmount;
++
++  // Adjust stack.
++  TII.adjustReg(SP, SP, -StackSize, MBB, MBBI, MachineInstr::FrameSetup);
++  // Emit ".cfi_def_cfa_offset StackSize".
++  unsigned CFIIndex = MF.addFrameInst(
++      MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
++  BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++      .addCFIIndex(CFIIndex);
++
++  MachineModuleInfo &MMI = MF.getMMI();
++  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
++
++  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
++
++  if (!CSI.empty()) {
++    // Find the instruction past the last instruction that saves a callee-saved
++    // register to the stack.
++    for (unsigned i = 0; i < CSI.size(); ++i)
++      ++MBBI;
++
++    // Iterate over list of callee-saved registers and emit .cfi_offset
++    // directives.
++    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
++           E = CSI.end(); I != E; ++I) {
++      int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
++      unsigned Reg = I->getReg();
++      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
++          nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
++      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++          .addCFIIndex(CFIIndex);
++    }
++  }
++
++  if (LoongArchFI->callsEhReturn()) {
++    // Insert instructions that spill eh data registers.
++    for (int I = 0; I < 4; ++I) {
++      if (!MBB.isLiveIn(ABI.GetEhDataReg(I)))
++        MBB.addLiveIn(ABI.GetEhDataReg(I));
++      TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false,
++                              LoongArchFI->getEhDataRegFI(I), RC, &RegInfo);
++    }
++
++    // Emit .cfi_offset directives for eh data registers.
++    for (int I = 0; I < 4; ++I) {
++      int64_t Offset = MFI.getObjectOffset(LoongArchFI->getEhDataRegFI(I));
++      unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true);
++      unsigned CFIIndex = MF.addFrameInst(
++          MCCFIInstruction::createOffset(nullptr, Reg, Offset));
++      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++          .addCFIIndex(CFIIndex);
++    }
++  }
++
++  // If framepointer enabled, set it to point to the stack pointer on entry.
++  if (hasFP(MF)) {
++    // Insert instruction "addi.w/d $fp, $sp, StackSize" at this location.
++    TII.adjustReg(FP, SP, StackSize - LoongArchFI->getVarArgsSaveSize(), MBB,
++                  MBBI, MachineInstr::FrameSetup);
++    // Emit ".cfi_def_cfa $fp, $varargs_size".
++    unsigned CFIIndex = MF.addFrameInst(
++        MCCFIInstruction::cfiDefCfa(nullptr, MRI->getDwarfRegNum(FP, true),
++                                    LoongArchFI->getVarArgsSaveSize()));
++    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++        .addCFIIndex(CFIIndex)
++        .setMIFlag(MachineInstr::FrameSetup);
++  }
++
++  // Emit the second SP adjustment after saving callee saved registers.
++  if (FirstSPAdjustAmount) {
++    uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
++    assert(SecondSPAdjustAmount > 0 &&
++           "SecondSPAdjustAmount should be greater than zero");
++    TII.adjustReg(SP, SP, -SecondSPAdjustAmount, MBB, MBBI,
++                  MachineInstr::FrameSetup);
++
++    // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
++    // don't emit an sp-based .cfi_def_cfa_offset.
++    if (!hasFP(MF)) {
++      // Emit ".cfi_def_cfa_offset StackSize"
++      unsigned CFIIndex = MF.addFrameInst(
++          MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
++      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++          .addCFIIndex(CFIIndex)
++          .setMIFlag(MachineInstr::FrameSetup);
++    }
++  }
++
++  // Realign stack.
++  if (hasFP(MF)) {
++    if (RegInfo.hasStackRealignment(MF)) {
++      // addiu $Reg, $zero, -MaxAlignment
++      // andi $sp, $sp, $Reg
++      unsigned VR = MF.getRegInfo().createVirtualRegister(RC);
++      assert((Log2(MFI.getMaxAlign()) < 16) &&
++             "Function's alignment size requirement is not supported.");
++      int MaxAlign = -(int)MFI.getMaxAlign().value();
++      int Alignment = (int)MFI.getMaxAlign().value();
++
++      if (Alignment <= 2048) {
++        BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(MaxAlign);
++        BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
++      } else {
++        const unsigned NrBitsToZero = countTrailingZeros((unsigned)Alignment);
++        BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(-1);
++        BuildMI(MBB, MBBI, dl, TII.get(SLLI), VR)
++            .addReg(VR)
++            .addImm(NrBitsToZero);
++        BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
++      }
++
++      if (hasBP(MF)) {
++        // move $s7, $sp
++        unsigned BP = STI.isABI_LP64() ? LoongArch::S7_64 : LoongArch::S7;
++        BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP).addReg(SP).addReg(ZERO);
++      }
++    }
++  }
++}
++
++void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
++                                          MachineBasicBlock &MBB) const {
++  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
++  MachineFrameInfo &MFI            = MF.getFrameInfo();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  const LoongArchInstrInfo &TII =
++      *static_cast<const LoongArchInstrInfo *>(STI.getInstrInfo());
++  const LoongArchRegisterInfo &RegInfo =
++      *static_cast<const LoongArchRegisterInfo *>(STI.getRegisterInfo());
++
++  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
++  LoongArchABIInfo ABI = STI.getABI();
++  unsigned SP = ABI.GetStackPtr();
++  unsigned FP = ABI.GetFramePtr();
++
++  // Get the number of bytes from FrameInfo.
++  uint64_t StackSize = MFI.getStackSize();
++
++  // Restore the stack pointer.
++  if (hasFP(MF) &&
++      (RegInfo.hasStackRealignment(MF) || MFI.hasVarSizedObjects())) {
++    // Find the first instruction that restores a callee-saved register.
++    MachineBasicBlock::iterator I = MBBI;
++    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i)
++      --I;
++    TII.adjustReg(SP, FP, -(StackSize - LoongArchFI->getVarArgsSaveSize()), MBB,
++                  I);
++  }
++
++  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
++  if (FirstSPAdjustAmount) {
++    uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
++    assert(SecondSPAdjustAmount > 0 &&
++           "SecondSPAdjustAmount should be greater than zero");
++    // Find the first instruction that restores a callee-saved register.
++    MachineBasicBlock::iterator I = MBBI;
++    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i)
++      --I;
++
++    TII.adjustReg(SP, SP, SecondSPAdjustAmount, MBB, I);
++  }
++
++  if (LoongArchFI->callsEhReturn()) {
++    const TargetRegisterClass *RC =
++        ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++
++    // Find first instruction that restores a callee-saved register.
++    MachineBasicBlock::iterator I = MBBI;
++    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i)
++      --I;
++
++    // Insert instructions that restore eh data registers.
++    for (int J = 0; J < 4; ++J)
++      TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J),
++                               LoongArchFI->getEhDataRegFI(J), RC, &RegInfo);
++  }
++
++  if (FirstSPAdjustAmount)
++    StackSize = FirstSPAdjustAmount;
++
++  if (!StackSize)
++    return;
++
++  // Final adjust stack.
++  TII.adjustReg(SP, SP, StackSize, MBB, MBBI);
++}
++
++StackOffset
++LoongArchFrameLowering::getFrameIndexReference(const MachineFunction &MF,
++                                               int FI,
++                                               Register &FrameReg) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
++  LoongArchABIInfo ABI = STI.getABI();
++  const auto *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  // Callee-saved registers should be referenced relative to the stack
++  // pointer (positive offset), otherwise use the frame pointer (negative
++  // offset).
++  const auto &CSI = MFI.getCalleeSavedInfo();
++  int MinCSFI = 0;
++  int MaxCSFI = -1;
++  StackOffset Offset =
++      StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
++                            MFI.getOffsetAdjustment());
++  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
++
++  if (CSI.size()) {
++    MinCSFI = CSI[0].getFrameIdx();
++    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
++  }
++
++  bool EhDataRegFI = LoongArchFI->isEhDataRegFI(FI);
++  if ((FI >= MinCSFI && FI <= MaxCSFI) || EhDataRegFI) {
++    FrameReg = ABI.GetStackPtr();
++
++    if (FirstSPAdjustAmount)
++      Offset += StackOffset::getFixed(FirstSPAdjustAmount);
++    else
++      Offset += StackOffset::getFixed(MFI.getStackSize());
++  } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
++    // If the stack was realigned, the frame pointer is set in order to allow
++    // SP to be restored, so we need another base register to record the stack
++    // after realignment.
++    FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
++    Offset += StackOffset::getFixed(MFI.getStackSize());
++  } else {
++    FrameReg = RI->getFrameRegister(MF);
++    if (hasFP(MF))
++      Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize());
++    else
++      Offset += StackOffset::getFixed(MFI.getStackSize());
++  }
++  return Offset;
++}
++
++bool LoongArchFrameLowering::spillCalleeSavedRegisters(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
++    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
++  MachineFunction *MF = MBB.getParent();
++  const TargetInstrInfo &TII = *STI.getInstrInfo();
++
++  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
++    // Add the callee-saved register as live-in. Do not add if the register is
++    // RA and return address is taken, because it has already been added in
++    // method LoongArchTargetLowering::lowerRETURNADDR.
++    // It's killed at the spill, unless the register is RA and return address
++    // is taken.
++    unsigned Reg = CSI[i].getReg();
++    bool IsRAAndRetAddrIsTaken = (Reg == LoongArch::RA || Reg == LoongArch::RA_64)
++        && MF->getFrameInfo().isReturnAddressTaken();
++    if (!IsRAAndRetAddrIsTaken)
++      MBB.addLiveIn(Reg);
++
++    // Insert the spill to the stack frame.
++    bool IsKill = !IsRAAndRetAddrIsTaken;
++    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
++    TII.storeRegToStackSlot(MBB, MI, Reg, IsKill,
++                            CSI[i].getFrameIdx(), RC, TRI);
++  }
++
++  return true;
++}
++
++bool
++LoongArchFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  // Reserve call frame if the size of the maximum call frame fits into 12-bit
++  // immediate field and there are no variable sized objects on the stack.
++  // Make sure the second register scavenger spill slot can be accessed with one
++  // instruction.
++  return isInt<12>(MFI.getMaxCallFrameSize() + getStackAlignment()) &&
++    !MFI.hasVarSizedObjects();
++}
++
++/// Mark \p Reg and all registers aliasing it in the bitset.
++static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
++                         unsigned Reg) {
++  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
++  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
++    SavedRegs.set(*AI);
++}
++
++void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF,
++                                                  BitVector &SavedRegs,
++                                                  RegScavenger *RS) const {
++  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
++  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++  LoongArchABIInfo ABI = STI.getABI();
++  unsigned FP = ABI.GetFramePtr();
++  unsigned BP = ABI.IsLP64() ? LoongArch::S7_64 : LoongArch::S7;
++
++  // Mark $fp as used if function has dedicated frame pointer.
++  if (hasFP(MF))
++    setAliasRegs(MF, SavedRegs, FP);
++  // Mark $s7 as used if function has dedicated base pointer.
++  if (hasBP(MF))
++    setAliasRegs(MF, SavedRegs, BP);
++
++  // Create spill slots for eh data registers if function calls eh_return.
++  if (LoongArchFI->callsEhReturn())
++    LoongArchFI->createEhDataRegsFI();
++
++  // Set scavenging frame index if necessary.
++  uint64_t MaxSPOffset = estimateStackSize(MF);
++
++  // If there is a variable
++  // sized object on the stack, the estimation cannot account for it.
++  if (isIntN(12, MaxSPOffset) &&
++      !MF.getFrameInfo().hasVarSizedObjects())
++    return;
++
++  const TargetRegisterClass &RC =
++      ABI.ArePtrs64bit() ? LoongArch::GPR64RegClass : LoongArch::GPR32RegClass;
++  int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC),
++                                               TRI->getSpillAlign(RC), false);
++  RS->addScavengingFrameIndex(FI);
++}
++
++// hasFP - Return true if the specified function should have a dedicated frame
++// pointer register.  This is true if the function has variable sized allocas,
++// if it needs dynamic stack realignment, if frame pointer elimination is
++// disabled, or if the frame address is taken.
++bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
++
++  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
++      MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
++      TRI->hasStackRealignment(MF);
++}
++
++bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
++
++  return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
++}
++
++// Estimate the size of the stack, including the incoming arguments. We need to
++// account for register spills, local objects, reserved call frame and incoming
++// arguments. This is required to determine the largest possible positive offset
++// from $sp so that it can be determined if an emergency spill slot for stack
++// addresses is required.
++uint64_t LoongArchFrameLowering::
++estimateStackSize(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
++
++  int64_t Size = 0;
++
++  // Iterate over fixed sized objects which are incoming arguments.
++  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
++    if (MFI.getObjectOffset(I) > 0)
++      Size += MFI.getObjectSize(I);
++
++  // Conservatively assume all callee-saved registers will be saved.
++  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
++    unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
++    Size = alignTo(Size + RegSize, RegSize);
++  }
++
++  // Get the size of the rest of the frame objects and any possible reserved
++  // call frame, accounting for alignment.
++  return Size + MFI.estimateStackSize(MF);
++}
++
++// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
++MachineBasicBlock::iterator LoongArchFrameLowering::
++eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
++                              MachineBasicBlock::iterator I) const {
++  unsigned SP = STI.getABI().IsLP64() ? LoongArch::SP_64 : LoongArch::SP;
++
++  if (!hasReservedCallFrame(MF)) {
++    int64_t Amount = I->getOperand(0).getImm();
++    if (I->getOpcode() == LoongArch::ADJCALLSTACKDOWN)
++      Amount = -Amount;
++
++    STI.getInstrInfo()->adjustReg(SP, SP, Amount, MBB, I);
++  }
++
++  return MBB.erase(I);
++}
+diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.h b/lib/Target/LoongArch/LoongArchFrameLowering.h
+new file mode 100644
+index 00000000..ca6cd736
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchFrameLowering.h
+@@ -0,0 +1,70 @@
++//===-- LoongArchFrameLowering.h - Define frame lowering for LoongArch ----*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++//
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
++
++#include "LoongArch.h"
++#include "llvm/CodeGen/TargetFrameLowering.h"
++
++namespace llvm {
++  class LoongArchSubtarget;
++
++class LoongArchFrameLowering : public TargetFrameLowering {
++  const LoongArchSubtarget &STI;
++
++public:
++  explicit LoongArchFrameLowering(const LoongArchSubtarget &STI);
++
++  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
++  /// the function.
++  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
++  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
++
++  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
++                                     Register &FrameReg) const override;
++
++  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
++                                 MachineBasicBlock::iterator MI,
++                                 ArrayRef<CalleeSavedInfo> CSI,
++                                 const TargetRegisterInfo *TRI) const override;
++
++  bool hasReservedCallFrame(const MachineFunction &MF) const override;
++
++  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
++                            RegScavenger *RS) const override;
++
++  bool hasFP(const MachineFunction &MF) const override;
++
++  bool hasBP(const MachineFunction &MF) const;
++
++  bool enableShrinkWrapping(const MachineFunction &MF) const override {
++    return true;
++  }
++
++  MachineBasicBlock::iterator
++  eliminateCallFramePseudoInstr(MachineFunction &MF,
++                                MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator I) const override;
++
++  // Get the first stack adjustment amount for split the SP adjustment.
++  // Return 0 if we don't want to to split the SP adjustment in prologue and
++  // epilogue.
++  uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
++
++protected:
++  uint64_t estimateStackSize(const MachineFunction &MF) const;
++};
++
++} // End llvm namespace
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+new file mode 100644
+index 00000000..43e46315
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+@@ -0,0 +1,756 @@
++//===-- LoongArchISelDAGToDAG.cpp - A Dag to Dag Inst Selector for LoongArch --------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines an instruction selector for the LoongArch target.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchISelDAGToDAG.h"
++#include "LoongArch.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchRegisterInfo.h"
++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/CodeGen/MachineConstantPool.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/SelectionDAGNodes.h"
++#include "llvm/IR/CFG.h"
++#include "llvm/IR/Dominators.h"
++#include "llvm/IR/GlobalValue.h"
++#include "llvm/IR/Instructions.h"
++#include "llvm/IR/Intrinsics.h"
++#include "llvm/IR/IntrinsicsLoongArch.h"
++#include "llvm/IR/Type.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetMachine.h"
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-isel"
++
++//===----------------------------------------------------------------------===//
++// Instruction Selector Implementation
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine
++// instructions for SelectionDAG operations.
++//===----------------------------------------------------------------------===//
++
++void LoongArchDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
++  AU.addRequired<DominatorTreeWrapperPass>();
++  SelectionDAGISel::getAnalysisUsage(AU);
++}
++
++bool LoongArchDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
++  Subtarget = &static_cast<const LoongArchSubtarget &>(MF.getSubtarget());
++  bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
++
++  return Ret;
++}
++
++/// Match frameindex
++bool LoongArchDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base,
++                                              SDValue &Offset) const {
++  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
++    EVT ValTy = Addr.getValueType();
++
++    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
++    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy);
++    return true;
++  }
++  return false;
++}
++
++/// Match frameindex+offset and frameindex|offset
++bool LoongArchDAGToDAGISel::selectAddrFrameIndexOffset(
++    SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits,
++    unsigned ShiftAmount = 0) const {
++  if (CurDAG->isBaseWithConstantOffset(Addr)) {
++    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
++    if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) {
++      EVT ValTy = Addr.getValueType();
++
++      // If the first operand is a FI, get the TargetFI Node
++      if (FrameIndexSDNode *FIN =
++              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
++        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
++      else {
++        Base = Addr.getOperand(0);
++        // If base is a FI, additional offset calculation is done in
++        // eliminateFrameIndex, otherwise we need to check the alignment
++        const Align Alignment(1ULL << ShiftAmount);
++        if (!isAligned(Alignment, CN->getZExtValue()))
++          return false;
++      }
++
++      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr),
++                                         ValTy);
++      return true;
++    }
++  }
++  return false;
++}
++
++/// ComplexPattern used on LoongArchInstrInfo
++/// Used on LoongArch Load/Store instructions
++bool LoongArchDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
++                                        SDValue &Offset) const {
++  // if Address is FI, get the TargetFrameIndex.
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (!TM.isPositionIndependent()) {
++    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
++        Addr.getOpcode() == ISD::TargetGlobalAddress))
++      return false;
++  }
++
++  // Addresses of the form FI+const or FI|const
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12))
++    return true;
++
++  return false;
++}
++
++/// ComplexPattern used on LoongArchInstrInfo
++/// Used on LoongArch Load/Store instructions
++bool LoongArchDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
++                                         SDValue &Offset) const {
++  Base = Addr;
++  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType());
++  return true;
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
++                                     SDValue &Offset) const {
++  return selectAddrRegImm(Addr, Base, Offset) ||
++    selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
++                                            SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12))
++    return true;
++
++  return false;
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base,
++                                           SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11, 1))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9, 3))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 14, 2))
++    return true;
++
++  return false;
++}
++
++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
++                                               SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++// Select constant vector splats.
++//
++// Returns true and sets Imm if:
++// * LSX is enabled
++// * N is a ISD::BUILD_VECTOR representing a constant splat
++bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
++                                         unsigned MinSizeInBits) const {
++  if (!(Subtarget->hasLSX() || Subtarget->hasLASX()))
++    return false;
++
++  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
++
++  if (!Node)
++    return false;
++
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
++                             MinSizeInBits))
++    return false;
++
++  Imm = SplatValue;
++
++  return true;
++}
++
++// Select constant vector splats.
++//
++// In addition to the requirements of selectVSplat(), this function returns
++// true and sets Imm if:
++// * The splat value is the same width as the elements of the vector
++// * The splat value fits in an integer with the specified signed-ness and
++//   width.
++//
++// This function looks through ISD::BITCAST nodes.
++// TODO: This might not be appropriate for big-endian LSX since BITCAST is
++//       sometimes a shuffle in big-endian mode.
++//
++// It's worth noting that this function is not used as part of the selection
++// of [v/xv]ldi.[bhwd] since it does not permit using the wrong-typed
++// [v/xv]ldi.[bhwd] instruction to achieve the desired bit pattern.
++// [v/xv]ldi.[bhwd] is selected in LoongArchDAGToDAGISel::selectNode.
++bool LoongArchDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm,
++                                               bool Signed,
++                                               unsigned ImmBitSize) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++
++    if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
++        (!Signed && ImmValue.isIntN(ImmBitSize))) {
++      Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++// Select constant vector splats.
++bool LoongArchDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 1);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 2);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 3);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 4);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 5);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 6);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 8);
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, true, 5);
++}
++
++// Select constant vector splats whose value is a power of 2.
++//
++// In addition to the requirements of selectVSplat(), this function returns
++// true and sets Imm if:
++// * The splat value is the same width as the elements of the vector
++// * The splat value is a power of two.
++//
++// This function looks through ISD::BITCAST nodes.
++// TODO: This might not be appropriate for big-endian LSX since BITCAST is
++//       sometimes a shuffle in big-endian mode.
++bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N,
++                                                 SDValue &Imm) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++    int32_t Log2 = ImmValue.exactLogBase2();
++
++    if (Log2 != -1) {
++      Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
++                                                    SDValue &Imm) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++    int32_t Log2 = (~ImmValue).exactLogBase2();
++
++    if (Log2 != -1) {
++      Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++// Select constant vector splats whose value only has a consecutive sequence
++// of left-most bits set (e.g. 0b11...1100...00).
++//
++// In addition to the requirements of selectVSplat(), this function returns
++// true and sets Imm if:
++// * The splat value is the same width as the elements of the vector
++// * The splat value is a consecutive sequence of left-most bits.
++//
++// This function looks through ISD::BITCAST nodes.
++// TODO: This might not be appropriate for big-endian LSX since BITCAST is
++//       sometimes a shuffle in big-endian mode.
++bool LoongArchDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++    // Extract the run of set bits starting with bit zero from the bitwise
++    // inverse of ImmValue, and test that the inverse of this is the same
++    // as the original value.
++    if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
++
++      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
++                                      EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++// Select constant vector splats whose value only has a consecutive sequence
++// of right-most bits set (e.g. 0b00...0011...11).
++//
++// In addition to the requirements of selectVSplat(), this function returns
++// true and sets Imm if:
++// * The splat value is the same width as the elements of the vector
++// * The splat value is a consecutive sequence of right-most bits.
++//
++// This function looks through ISD::BITCAST nodes.
++// TODO: This might not be appropriate for big-endian LSX since BITCAST is
++//       sometimes a shuffle in big-endian mode.
++bool LoongArchDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++    // Extract the run of set bits starting with bit zero, and test that the
++    // result is the same as the original value
++    if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
++      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
++                                      EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++bool LoongArchDAGToDAGISel::trySelect(SDNode *Node) {
++  unsigned Opcode = Node->getOpcode();
++  SDLoc DL(Node);
++
++  ///
++  // Instruction Selection not handled by the auto-generated
++  // tablegen selection should be handled here.
++  ///
++  switch(Opcode) {
++  default: break;
++  case ISD::ConstantFP: {
++    ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
++    if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
++      if (Subtarget->is64Bit()) {
++        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
++                                              LoongArch::ZERO_64, MVT::i64);
++        ReplaceNode(Node,
++                    CurDAG->getMachineNode(LoongArch::MOVGR2FR_D, DL, MVT::f64, Zero));
++      }
++      return true;
++    }
++    break;
++  }
++
++  case ISD::Constant: {
++    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
++    MVT VT = CN->getSimpleValueType(0);
++    int64_t Imm = CN->getSExtValue();
++    LoongArchAnalyzeImmediate::InstSeq Seq =
++        LoongArchAnalyzeImmediate::generateInstSeq(Imm, VT == MVT::i64);
++    SDLoc DL(CN);
++    SDNode *Result = nullptr;
++    SDValue SrcReg = CurDAG->getRegister(
++        VT == MVT::i64 ? LoongArch::ZERO_64 : LoongArch::ZERO, VT);
++
++    // The instructions in the sequence are handled here.
++    for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) {
++      SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT);
++      if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32)
++        Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm);
++      else
++        Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm);
++      SrcReg = SDValue(Result, 0);
++    }
++    ReplaceNode(Node, Result);
++    return true;
++  }
++
++  case ISD::BUILD_VECTOR: {
++    // Select appropriate vldi.[bhwd] instructions for constant splats of
++    // 128-bit when LSX is enabled. Select appropriate xvldi.[bhwd] instructions
++    // for constant splats of 256-bit when LASX is enabled. Fixup any register
++    // class mismatches that occur as a result.
++    //
++    // This allows the compiler to use a wider range of immediates than would
++    // otherwise be allowed. If, for example, v4i32 could only use [v/xv]ldi.h
++    // then it would not be possible to load { 0x01010101, 0x01010101,
++    // 0x01010101, 0x01010101 } without using a constant pool. This would be
++    // sub-optimal when // '[v/xv]ldi.b vd, 1' is capable of producing that
++    // bit-pattern in the same set/ of registers. Similarly, [v/xv]ldi.h isn't
++    // capable of producing { 0x00000000, 0x00000001, 0x00000000, 0x00000001 }
++    // but '[v/xv]ldi.d vd, 1' can.
++
++    const LoongArchABIInfo &ABI =
++        static_cast<const LoongArchTargetMachine &>(TM).getABI();
++
++    BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
++    APInt SplatValue, SplatUndef;
++    unsigned SplatBitSize;
++    bool HasAnyUndefs;
++    unsigned LdiOp;
++    EVT ResVecTy = BVN->getValueType(0);
++    EVT ViaVecTy;
++
++    if ((!Subtarget->hasLSX() || !BVN->getValueType(0).is128BitVector()) &&
++        (!Subtarget->hasLASX() || !BVN->getValueType(0).is256BitVector()))
++      return false;
++
++    if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
++                              HasAnyUndefs, 8))
++      return false;
++
++    bool IsLASX256 = BVN->getValueType(0).is256BitVector();
++
++    switch (SplatBitSize) {
++    default:
++      return false;
++    case 8:
++      LdiOp = IsLASX256 ? LoongArch::XVLDI_B : LoongArch::VLDI_B;
++      ViaVecTy = IsLASX256 ? MVT::v32i8 : MVT::v16i8;
++      break;
++    case 16:
++      LdiOp = IsLASX256 ? LoongArch::XVLDI_H : LoongArch::VLDI_H;
++      ViaVecTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16;
++      break;
++    case 32:
++      LdiOp = IsLASX256 ? LoongArch::XVLDI_W : LoongArch::VLDI_W;
++      ViaVecTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32;
++      break;
++    case 64:
++      LdiOp = IsLASX256 ? LoongArch::XVLDI_D : LoongArch::VLDI_D;
++      ViaVecTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64;
++      break;
++    }
++
++    SDNode *Res;
++
++    // If we have a signed 13 bit integer, we can splat it directly.
++    //
++    // If we have something bigger we can synthesize the value into a GPR and
++    // splat from there.
++    if (SplatValue.isSignedIntN(10)) {
++      SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
++                                              ViaVecTy.getVectorElementType());
++
++      Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
++    } else if (SplatValue.isSignedIntN(12)) {
++      bool Is32BitSplat = SplatBitSize < 64 ? true : false;
++      const unsigned ADDIOp =
++          Is32BitSplat ? LoongArch::ADDI_W : LoongArch::ADDI_D;
++      const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64;
++      SDValue ZeroVal = CurDAG->getRegister(
++          Is32BitSplat ? LoongArch::ZERO : LoongArch::ZERO_64, SplatMVT);
++
++      const unsigned FILLOp =
++          (SplatBitSize == 16)
++              ? (IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H)
++              : (SplatBitSize == 32
++                     ? (IsLASX256 ? LoongArch::XVREPLGR2VR_W
++                                  : LoongArch::VREPLGR2VR_W)
++                     : (SplatBitSize == 64
++                            ? (IsLASX256 ? LoongArch::XVREPLGR2VR_D
++                                         : LoongArch::VREPLGR2VR_D)
++                            : 0));
++
++      assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!");
++
++      short Lo = SplatValue.getLoBits(12).getSExtValue();
++      SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT);
++
++      Res = CurDAG->getMachineNode(ADDIOp, DL, SplatMVT, ZeroVal, LoVal);
++      Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0));
++    } else if (SplatValue.isSignedIntN(16) && SplatBitSize == 16) {
++      const unsigned Lo = SplatValue.getLoBits(12).getZExtValue();
++      const unsigned Hi = SplatValue.lshr(12).getLoBits(4).getZExtValue();
++      SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32);
++
++      SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
++      SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
++      if (Hi)
++        Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal);
++
++      if (Lo)
++        Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32,
++                                     Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
++
++      assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
++      const unsigned FILLOp =
++          IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H;
++      EVT FILLTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16;
++      Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0));
++    } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) {
++      // Only handle the cases where the splat size agrees with the size
++      // of the SplatValue here.
++      const unsigned Lo = SplatValue.getLoBits(12).getZExtValue();
++      const unsigned Hi = SplatValue.lshr(12).getLoBits(20).getZExtValue();
++      SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32);
++
++      SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
++      SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
++      if (Hi)
++        Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal);
++
++      if (Lo)
++        Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32,
++                                     Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
++
++      assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
++      const unsigned FILLOp =
++          IsLASX256 ? LoongArch::XVREPLGR2VR_W : LoongArch::VREPLGR2VR_W;
++      EVT FILLTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32;
++      Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0));
++
++    } else if ((SplatValue.isSignedIntN(32) && SplatBitSize == 64 &&
++                ABI.IsLP64D()) ||
++               (SplatValue.isSignedIntN(64))) {
++
++      int64_t Imm = SplatValue.getSExtValue();
++      LoongArchAnalyzeImmediate::InstSeq Seq =
++          LoongArchAnalyzeImmediate::generateInstSeq(Imm, true);
++      SDValue SrcReg = CurDAG->getRegister(LoongArch::ZERO_64, MVT::i64);
++
++      for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) {
++        SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, MVT::i64);
++        if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32)
++          Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SDImm);
++        else
++          Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SrcReg, SDImm);
++        SrcReg = SDValue(Res, 0);
++      }
++
++      const unsigned FILLOp =
++          IsLASX256 ? LoongArch::XVREPLGR2VR_D : LoongArch::VREPLGR2VR_D;
++      EVT FILLTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64;
++      Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0));
++
++    } else
++      return false;
++
++    if (ResVecTy != ViaVecTy) {
++      // If LdiOp is writing to a different register class to ResVecTy, then
++      // fix it up here. This COPY_TO_REGCLASS should never cause a move.v
++      // since the source and destination register sets contain the same
++      // registers.
++      const TargetLowering *TLI = getTargetLowering();
++      MVT ResVecTySimple = ResVecTy.getSimpleVT();
++      const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple);
++      Res = CurDAG->getMachineNode(
++          LoongArch::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0),
++          CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32));
++    }
++
++    ReplaceNode(Node, Res);
++    return true;
++  }
++  }
++
++  return false;
++}
++
++/// Select instructions not customized! Used for
++/// expanded, promoted and normal instructions
++void LoongArchDAGToDAGISel::Select(SDNode *Node) {
++  // If we have a custom node, we already have selected!
++  if (Node->isMachineOpcode()) {
++    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
++    Node->setNodeId(-1);
++    return;
++  }
++
++  // See if subclasses can handle this node.
++  if (trySelect(Node))
++    return;
++
++  // Select the default instruction
++  SelectCode(Node);
++}
++
++bool LoongArchDAGToDAGISel::
++SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
++                             std::vector<SDValue> &OutOps) {
++  SDValue Base, Offset;
++
++  switch(ConstraintID) {
++  default:
++    llvm_unreachable("Unexpected asm memory constraint");
++  // All memory constraints can at least accept raw pointers.
++  case InlineAsm::Constraint_i:
++    OutOps.push_back(Op);
++    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
++    return false;
++  case InlineAsm::Constraint_m:
++    if (selectAddrRegImm12(Op, Base, Offset)) {
++      OutOps.push_back(Base);
++      OutOps.push_back(Offset);
++      return false;
++    }
++    OutOps.push_back(Op);
++    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
++    return false;
++  case InlineAsm::Constraint_R:
++    if (selectAddrRegImm12(Op, Base, Offset)) {
++      OutOps.push_back(Base);
++      OutOps.push_back(Offset);
++      return false;
++    }
++    OutOps.push_back(Op);
++    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
++    return false;
++  case InlineAsm::Constraint_ZC:
++    if (selectIntAddrSImm14Lsl2(Op, Base, Offset)) {
++      OutOps.push_back(Base);
++      OutOps.push_back(Offset);
++      return false;
++    }
++    OutOps.push_back(Op);
++    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
++    return false;
++  case InlineAsm::Constraint_ZB:
++    OutOps.push_back(Op);
++    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
++    return false;
++  }
++  return true;
++}
++
++FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM,
++                                           CodeGenOpt::Level OptLevel) {
++  return new LoongArchDAGToDAGISel(TM, OptLevel);
++}
+diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+new file mode 100644
+index 00000000..9309c256
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+@@ -0,0 +1,147 @@
++//===---- LoongArchISelDAGToDAG.h - A Dag to Dag Inst Selector for LoongArch --------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines an instruction selector for the LoongArch target.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
++
++#include "LoongArch.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/CodeGen/SelectionDAGISel.h"
++
++//===----------------------------------------------------------------------===//
++// Instruction Selector Implementation
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine
++// instructions for SelectionDAG operations.
++//===----------------------------------------------------------------------===//
++namespace llvm {
++
++class LoongArchDAGToDAGISel : public SelectionDAGISel {
++public:
++  explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, CodeGenOpt::Level OL)
++                : SelectionDAGISel(TM, OL), Subtarget(nullptr) {}
++
++  // Pass Name
++  StringRef getPassName() const override {
++    return "LoongArch DAG->DAG Pattern Instruction Selection";
++  }
++
++  bool runOnMachineFunction(MachineFunction &MF) override;
++
++  void getAnalysisUsage(AnalysisUsage &AU) const override;
++
++private:
++  /// Keep a pointer to the LoongArchSubtarget around so that we can make the right
++  /// decision when generating code for different targets.
++  const LoongArchSubtarget *Subtarget;
++  // Include the pieces autogenerated from the target description.
++  #include "LoongArchGenDAGISel.inc"
++
++  bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
++                                  unsigned OffsetBits,
++                                  unsigned ShiftAmount) const;
++
++  // Complex Pattern.
++  /// (reg + imm).
++  bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  /// Fall back on this function if all else fails.
++  bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  /// Match integer address pattern.
++  bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
++                         SDValue &Offset) const;
++
++  /// Match addr+simm12 and addr
++  bool selectIntAddrSImm12(SDValue Addr, SDValue &Base,
++                           SDValue &Offset) const;
++
++  bool selectIntAddrSImm10(SDValue Addr, SDValue &Base,
++                           SDValue &Offset) const;
++
++  bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
++                               SDValue &Offset) const;
++
++  bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
++                               SDValue &Offset) const;
++
++  bool selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base,
++                              SDValue &Offset) const;
++
++  bool selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base,
++                               SDValue &Offset) const;
++
++  bool selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base,
++                               SDValue &Offset) const;
++
++  bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
++                               SDValue &Offset) const;
++
++  /// Select constant vector splats.
++  bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
++  /// Select constant vector splats whose value fits in a given integer.
++  bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
++                          unsigned ImmBitSize) const;
++  /// Select constant vector splats whose value fits in a uimm1.
++  bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm2.
++  bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm3.
++  bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm4.
++  bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm5.
++  bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm6.
++  bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a uimm8.
++  bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value fits in a simm5.
++  bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value is a power of 2.
++  bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value is the inverse of a
++  /// power of 2.
++  bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value is a run of set bits
++  /// ending at the most significant bit
++  bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
++  /// Select constant vector splats whose value is a run of set bits
++  /// starting at bit zero.
++  bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
++
++  void Select(SDNode *N) override;
++
++  bool trySelect(SDNode *Node);
++
++  // getImm - Return a target constant with the specified value.
++  inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
++    return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));
++  }
++
++  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
++                                    unsigned ConstraintID,
++                                    std::vector<SDValue> &OutOps) override;
++};
++
++FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM,
++                                       CodeGenOpt::Level OptLevel);
++}
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchISelLowering.cpp b/lib/Target/LoongArch/LoongArchISelLowering.cpp
+new file mode 100644
+index 00000000..4e60236c
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchISelLowering.cpp
+@@ -0,0 +1,8204 @@
++//===- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the interfaces that LoongArch uses to lower LLVM code into a
++// selection DAG.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchISelLowering.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchInstPrinter.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "LoongArchCCState.h"
++#include "LoongArchInstrInfo.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchRegisterInfo.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "LoongArchTargetObjectFile.h"
++#include "llvm/ADT/APFloat.h"
++#include "llvm/ADT/APInt.h"
++#include "llvm/ADT/ArrayRef.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/CodeGen/CallingConvLower.h"
++#include "llvm/CodeGen/FunctionLoweringInfo.h"
++#include "llvm/CodeGen/ISDOpcodes.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineMemOperand.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/RuntimeLibcalls.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/SelectionDAGNodes.h"
++#include "llvm/CodeGen/TargetFrameLowering.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/CodeGen/ValueTypes.h"
++#include "llvm/IR/CallingConv.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/DebugLoc.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/Function.h"
++#include "llvm/IR/GlobalValue.h"
++#include "llvm/IR/Intrinsics.h"
++#include "llvm/IR/IntrinsicsLoongArch.h"
++#include "llvm/IR/Type.h"
++#include "llvm/IR/Value.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MachineValueType.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetMachine.h"
++#include "llvm/Target/TargetOptions.h"
++#include <algorithm>
++#include <cassert>
++#include <cctype>
++#include <cstdint>
++#include <deque>
++#include <iterator>
++#include <utility>
++#include <vector>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-lower"
++
++STATISTIC(NumTailCalls, "Number of tail calls");
++
++static cl::opt<bool>
++NoZeroDivCheck("mnocheck-zero-division", cl::Hidden,
++               cl::desc("LoongArch: Don't trap on integer division by zero."),
++               cl::init(false));
++
++static const MCPhysReg LoongArch64DPRegs[8] = {
++  LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
++  LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64
++};
++
++// If I is a shifted mask, set the size (SMSize) and the first bit of the
++// mask (SMLsb), and return true.
++// For example, if I is 0x003ff800, (SMLsb, SMSize) = (11, 11).
++static bool isShiftedMask(uint64_t I, uint64_t &SMLsb, uint64_t &SMSize) {
++  if (!isShiftedMask_64(I))
++    return false;
++
++  SMSize = countPopulation(I);
++  SMLsb = countTrailingZeros(I);
++  return true;
++}
++
++SDValue LoongArchTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
++                                          SelectionDAG &DAG,
++                                          unsigned Flag) const {
++  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
++}
++
++SDValue LoongArchTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty,
++                                          SelectionDAG &DAG,
++                                          unsigned Flag) const {
++  return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
++}
++
++SDValue LoongArchTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
++                                          SelectionDAG &DAG,
++                                          unsigned Flag) const {
++  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), Flag);
++}
++
++SDValue LoongArchTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
++                                          SelectionDAG &DAG,
++                                          unsigned Flag) const {
++  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
++}
++
++SDValue LoongArchTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
++                                          SelectionDAG &DAG,
++                                          unsigned Flag) const {
++  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
++                                   N->getOffset(), Flag);
++}
++
++const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
++  switch ((LoongArchISD::NodeType)Opcode) {
++  case LoongArchISD::FIRST_NUMBER:      break;
++  case LoongArchISD::JmpLink:           return "LoongArchISD::JmpLink";
++  case LoongArchISD::TailCall:          return "LoongArchISD::TailCall";
++  case LoongArchISD::GlobalAddress:     return "LoongArchISD::GlobalAddress";
++  case LoongArchISD::Ret:               return "LoongArchISD::Ret";
++  case LoongArchISD::ERet:              return "LoongArchISD::ERet";
++  case LoongArchISD::EH_RETURN:         return "LoongArchISD::EH_RETURN";
++  case LoongArchISD::FPBrcond:          return "LoongArchISD::FPBrcond";
++  case LoongArchISD::FPCmp:             return "LoongArchISD::FPCmp";
++  case LoongArchISD::CMovFP_T:          return "LoongArchISD::CMovFP_T";
++  case LoongArchISD::CMovFP_F:          return "LoongArchISD::CMovFP_F";
++  case LoongArchISD::TruncIntFP:        return "LoongArchISD::TruncIntFP";
++  case LoongArchISD::DBAR:              return "LoongArchISD::DBAR";
++  case LoongArchISD::BSTRPICK:          return "LoongArchISD::BSTRPICK";
++  case LoongArchISD::BSTRINS:           return "LoongArchISD::BSTRINS";
++  case LoongArchISD::VALL_ZERO:
++    return "LoongArchISD::VALL_ZERO";
++  case LoongArchISD::VANY_ZERO:
++    return "LoongArchISD::VANY_ZERO";
++  case LoongArchISD::VALL_NONZERO:
++    return "LoongArchISD::VALL_NONZERO";
++  case LoongArchISD::VANY_NONZERO:
++    return "LoongArchISD::VANY_NONZERO";
++  case LoongArchISD::VEXTRACT_SEXT_ELT:
++    return "LoongArchISD::VEXTRACT_SEXT_ELT";
++  case LoongArchISD::VEXTRACT_ZEXT_ELT:
++    return "LoongArchISD::VEXTRACT_ZEXT_ELT";
++  case LoongArchISD::VNOR:
++    return "LoongArchISD::VNOR";
++  case LoongArchISD::VSHF:
++    return "LoongArchISD::VSHF";
++  case LoongArchISD::SHF:
++    return "LoongArchISD::SHF";
++  case LoongArchISD::VPACKEV:
++    return "LoongArchISD::VPACKEV";
++  case LoongArchISD::VPACKOD:
++    return "LoongArchISD::VPACKOD";
++  case LoongArchISD::VILVH:
++    return "LoongArchISD::VILVH";
++  case LoongArchISD::VILVL:
++    return "LoongArchISD::VILVL";
++  case LoongArchISD::VPICKEV:
++    return "LoongArchISD::VPICKEV";
++  case LoongArchISD::VPICKOD:
++    return "LoongArchISD::VPICKOD";
++  case LoongArchISD::INSVE:
++    return "LoongArchISD::INSVE";
++  case LoongArchISD::VROR:
++    return "LoongArchISD::VROR";
++  case LoongArchISD::VRORI:
++    return "LoongArchISD::VRORI";
++  case LoongArchISD::XVBROADCAST:
++    return "LoongArchISD::XVBROADCAST";
++  case LoongArchISD::VBROADCAST:
++    return "LoongArchISD::VBROADCAST";
++  case LoongArchISD::VABSD:
++    return "LoongArchISD::VABSD";
++  case LoongArchISD::UVABSD:
++    return "LoongArchISD::UVABSD";
++  case LoongArchISD::XVPICKVE:
++    return "LoongArchISD::XVPICKVE";
++  case LoongArchISD::XVPERMI:
++    return "LoongArchISD::XVPERMI";
++  case LoongArchISD::XVSHUF4I:
++    return "LoongArchISD::XVSHUF4I";
++  case LoongArchISD::REVBD:
++    return "LoongArchISD::REVBD";
++  case LoongArchISD::FSEL:
++    return "LoongArchISD::FSEL";
++  }
++  return nullptr;
++}
++
++LoongArchTargetLowering::LoongArchTargetLowering(const LoongArchTargetMachine &TM,
++                                       const LoongArchSubtarget &STI)
++    : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) {
++  // Set up the register classes
++  addRegisterClass(MVT::i32, &LoongArch::GPR32RegClass);
++
++  if (Subtarget.is64Bit())
++    addRegisterClass(MVT::i64, &LoongArch::GPR64RegClass);
++
++  // LoongArch does not have i1 type, so use i32 for
++  // setcc operations results (slt, sgt, ...).
++  setBooleanContents(ZeroOrOneBooleanContent);
++  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
++
++  // Load extented operations for i1 types must be promoted
++  for (MVT VT : MVT::integer_valuetypes()) {
++    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1,  Promote);
++    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1,  Promote);
++    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1,  Promote);
++  }
++
++  // LoongArch doesn't have extending float->double load/store.  Set LoadExtAction
++  // for f32, f16
++  for (MVT VT : MVT::fp_valuetypes()) {
++    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
++    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
++  }
++
++  // Set LoadExtAction for f16 vectors to Expand
++  for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
++    MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements());
++    if (F16VT.isValid())
++      setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand);
++  }
++
++  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
++  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
++
++  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
++
++  // Used by legalize types to correctly generate the setcc result.
++  // Without this, every float setcc comes with a AND/OR with the result,
++  // we don't want this, since the fpcmp result goes to a flag register,
++  // which is used implicitly by brcond and select operations.
++  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
++
++  // LoongArch Custom Operations
++  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
++  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
++  setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
++  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
++  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
++  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
++  setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
++  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
++  setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
++  setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
++  setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
++  setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
++  setOperationAction(ISD::FP_TO_SINT,         MVT::i32,   Custom);
++
++  if (Subtarget.is64Bit()) {
++    setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
++    setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
++    setOperationAction(ISD::GlobalTLSAddress,   MVT::i64,   Custom);
++    setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
++    setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
++    setOperationAction(ISD::SELECT,             MVT::i64,   Custom);
++    setOperationAction(ISD::LOAD,               MVT::i64,   Legal);
++    setOperationAction(ISD::STORE, MVT::i64, Legal);
++    setOperationAction(ISD::FP_TO_SINT,         MVT::i64,   Custom);
++    setOperationAction(ISD::SHL_PARTS,          MVT::i64,   Custom);
++    setOperationAction(ISD::SRA_PARTS,          MVT::i64,   Custom);
++    setOperationAction(ISD::SRL_PARTS,          MVT::i64,   Custom);
++  }
++
++  if (!Subtarget.is64Bit()) {
++    setOperationAction(ISD::SHL_PARTS,          MVT::i32,   Custom);
++    setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Custom);
++    setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Custom);
++  }
++
++  setOperationAction(ISD::EH_DWARF_CFA,         MVT::i32,   Custom);
++  if (Subtarget.is64Bit())
++    setOperationAction(ISD::EH_DWARF_CFA,       MVT::i64,   Custom);
++
++  setOperationAction(ISD::SDIV, MVT::i32, Expand);
++  setOperationAction(ISD::SREM, MVT::i32, Expand);
++  setOperationAction(ISD::UDIV, MVT::i32, Expand);
++  setOperationAction(ISD::UREM, MVT::i32, Expand);
++  setOperationAction(ISD::SDIV, MVT::i64, Expand);
++  setOperationAction(ISD::SREM, MVT::i64, Expand);
++  setOperationAction(ISD::UDIV, MVT::i64, Expand);
++  setOperationAction(ISD::UREM, MVT::i64, Expand);
++
++  // Operations not directly supported by LoongArch.
++  setOperationAction(ISD::BR_CC,             MVT::f32,   Expand);
++  setOperationAction(ISD::BR_CC,             MVT::f64,   Expand);
++  setOperationAction(ISD::BR_CC,             MVT::i32,   Expand);
++  setOperationAction(ISD::BR_CC,             MVT::i64,   Expand);
++  setOperationAction(ISD::SELECT_CC,         MVT::i32,   Expand);
++  setOperationAction(ISD::SELECT_CC,         MVT::i64,   Expand);
++  setOperationAction(ISD::SELECT_CC,         MVT::f32,   Expand);
++  setOperationAction(ISD::SELECT_CC,         MVT::f64,   Expand);
++  setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
++  setOperationAction(ISD::UINT_TO_FP,        MVT::i64,   Expand);
++  setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
++  setOperationAction(ISD::FP_TO_UINT,        MVT::i64,   Expand);
++  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
++  setOperationAction(ISD::CTPOP,           MVT::i32,   Expand);
++  setOperationAction(ISD::CTPOP,           MVT::i64,   Expand);
++  setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
++  setOperationAction(ISD::ROTL,              MVT::i64,   Expand);
++  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,  Expand);
++  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,  Expand);
++
++  setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
++  setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
++  setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
++  setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
++  setOperationAction(ISD::FSINCOS,           MVT::f32,   Expand);
++  setOperationAction(ISD::FSINCOS,           MVT::f64,   Expand);
++  setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
++  setOperationAction(ISD::FPOW,              MVT::f64,   Expand);
++  setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
++  setOperationAction(ISD::FRINT,             MVT::f32,   Legal);
++  setOperationAction(ISD::FRINT,             MVT::f64,   Legal);
++
++  setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
++  setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
++  setOperationAction(ISD::FMA,               MVT::f32,   Legal);
++  setOperationAction(ISD::FMA,               MVT::f64,   Legal);
++  setOperationAction(ISD::FREM,              MVT::f32,   Expand);
++  setOperationAction(ISD::FREM,              MVT::f64,   Expand);
++
++  setOperationAction(ISD::FMINNUM_IEEE,      MVT::f32,   Legal);
++  setOperationAction(ISD::FMINNUM_IEEE,      MVT::f64,   Legal);
++  setOperationAction(ISD::FMAXNUM_IEEE,      MVT::f32,   Legal);
++  setOperationAction(ISD::FMAXNUM_IEEE,      MVT::f64,   Legal);
++
++  // Lower f16 conversion operations into library calls
++  setOperationAction(ISD::FP16_TO_FP,        MVT::f32,   Expand);
++  setOperationAction(ISD::FP_TO_FP16,        MVT::f32,   Expand);
++  setOperationAction(ISD::FP16_TO_FP,        MVT::f64,   Expand);
++  setOperationAction(ISD::FP_TO_FP16,        MVT::f64,   Expand);
++
++  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
++
++  setOperationAction(ISD::VASTART,           MVT::Other, Custom);
++  setOperationAction(ISD::VAARG,             MVT::Other, Custom);
++  setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
++  setOperationAction(ISD::VAEND,             MVT::Other, Expand);
++
++  // Use the default for now
++  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
++  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
++
++  if (!Subtarget.is64Bit()) {
++    setOperationAction(ISD::ATOMIC_LOAD,     MVT::i64,   Expand);
++    setOperationAction(ISD::ATOMIC_STORE,    MVT::i64,   Expand);
++  }
++
++  if (Subtarget.is64Bit()) {
++    setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom);
++    setTruncStoreAction(MVT::i64, MVT::i32, Custom);
++  }
++
++  setOperationAction(ISD::TRAP, MVT::Other, Legal);
++  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
++  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
++
++  setTargetDAGCombine(ISD::SELECT);
++  setTargetDAGCombine(ISD::AND);
++  setTargetDAGCombine(ISD::OR);
++  setTargetDAGCombine(ISD::AssertZext);
++  setTargetDAGCombine(ISD::SHL);
++  setTargetDAGCombine(ISD::SIGN_EXTEND);
++  setTargetDAGCombine(ISD::ZERO_EXTEND);
++  setTargetDAGCombine(ISD::ADD);
++  setTargetDAGCombine(ISD::SUB);
++  setTargetDAGCombine(ISD::MUL);
++  setTargetDAGCombine(ISD::SRL);
++  setTargetDAGCombine(ISD::SRA);
++
++  if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) {
++    // TODO
++    llvm_unreachable("Unimplemented ABI");
++  }
++
++  if (Subtarget.hasLSX() || Subtarget.hasLASX()) {
++    // Expand all truncating stores and extending loads.
++    for (MVT VT0 : MVT::vector_valuetypes()) {
++      for (MVT VT1 : MVT::vector_valuetypes()) {
++        setTruncStoreAction(VT0, VT1, Expand);
++        setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
++        setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
++        setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand);
++      }
++    }
++  }
++
++  if (Subtarget.hasLSX()) {
++    addLSXIntType(MVT::v16i8, &LoongArch::LSX128BRegClass);
++    addLSXIntType(MVT::v8i16, &LoongArch::LSX128HRegClass);
++    addLSXIntType(MVT::v4i32, &LoongArch::LSX128WRegClass);
++    addLSXIntType(MVT::v2i64, &LoongArch::LSX128DRegClass);
++    addLSXFloatType(MVT::v4f32, &LoongArch::LSX128WRegClass);
++    addLSXFloatType(MVT::v2f64, &LoongArch::LSX128DRegClass);
++
++    // f16 is a storage-only type, always promote it to f32.
++    setOperationAction(ISD::SETCC, MVT::f16, Promote);
++    setOperationAction(ISD::BR_CC, MVT::f16, Promote);
++    setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
++    setOperationAction(ISD::SELECT, MVT::f16, Promote);
++    setOperationAction(ISD::FADD, MVT::f16, Promote);
++    setOperationAction(ISD::FSUB, MVT::f16, Promote);
++    setOperationAction(ISD::FMUL, MVT::f16, Promote);
++    setOperationAction(ISD::FDIV, MVT::f16, Promote);
++    setOperationAction(ISD::FREM, MVT::f16, Promote);
++    setOperationAction(ISD::FMA, MVT::f16, Promote);
++    setOperationAction(ISD::FNEG, MVT::f16, Promote);
++    setOperationAction(ISD::FABS, MVT::f16, Promote);
++    setOperationAction(ISD::FCEIL, MVT::f16, Promote);
++    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
++    setOperationAction(ISD::FCOS, MVT::f16, Promote);
++    setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
++    setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
++    setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
++    setOperationAction(ISD::FPOW, MVT::f16, Promote);
++    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
++    setOperationAction(ISD::FRINT, MVT::f16, Promote);
++    setOperationAction(ISD::FSIN, MVT::f16, Promote);
++    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
++    setOperationAction(ISD::FSQRT, MVT::f16, Promote);
++    setOperationAction(ISD::FEXP, MVT::f16, Promote);
++    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
++    setOperationAction(ISD::FROUND, MVT::f16, Promote);
++    setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
++    setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
++
++    setTargetDAGCombine(ISD::AND);
++    setTargetDAGCombine(ISD::OR);
++    setTargetDAGCombine(ISD::SRA);
++    setTargetDAGCombine(ISD::VSELECT);
++    setTargetDAGCombine(ISD::XOR);
++  }
++
++  if (Subtarget.hasLASX()) {
++    addLASXIntType(MVT::v32i8, &LoongArch::LASX256BRegClass);
++    addLASXIntType(MVT::v16i16, &LoongArch::LASX256HRegClass);
++    addLASXIntType(MVT::v8i32, &LoongArch::LASX256WRegClass);
++    addLASXIntType(MVT::v4i64, &LoongArch::LASX256DRegClass);
++    addLASXFloatType(MVT::v8f32, &LoongArch::LASX256WRegClass);
++    addLASXFloatType(MVT::v4f64, &LoongArch::LASX256DRegClass);
++
++    // f16 is a storage-only type, always promote it to f32.
++    setOperationAction(ISD::SETCC, MVT::f16, Promote);
++    setOperationAction(ISD::BR_CC, MVT::f16, Promote);
++    setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
++    setOperationAction(ISD::SELECT, MVT::f16, Promote);
++    setOperationAction(ISD::FADD, MVT::f16, Promote);
++    setOperationAction(ISD::FSUB, MVT::f16, Promote);
++    setOperationAction(ISD::FMUL, MVT::f16, Promote);
++    setOperationAction(ISD::FDIV, MVT::f16, Promote);
++    setOperationAction(ISD::FREM, MVT::f16, Promote);
++    setOperationAction(ISD::FMA, MVT::f16, Promote);
++    setOperationAction(ISD::FNEG, MVT::f16, Promote);
++    setOperationAction(ISD::FABS, MVT::f16, Promote);
++    setOperationAction(ISD::FCEIL, MVT::f16, Promote);
++    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
++    setOperationAction(ISD::FCOS, MVT::f16, Promote);
++    setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
++    setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
++    setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
++    setOperationAction(ISD::FPOW, MVT::f16, Promote);
++    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
++    setOperationAction(ISD::FRINT, MVT::f16, Promote);
++    setOperationAction(ISD::FSIN, MVT::f16, Promote);
++    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
++    setOperationAction(ISD::FSQRT, MVT::f16, Promote);
++    setOperationAction(ISD::FEXP, MVT::f16, Promote);
++    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
++    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
++    setOperationAction(ISD::FROUND, MVT::f16, Promote);
++    setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
++    setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
++    setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
++
++    setTargetDAGCombine(ISD::AND);
++    setTargetDAGCombine(ISD::OR);
++    setTargetDAGCombine(ISD::SRA);
++    setTargetDAGCombine(ISD::VSELECT);
++    setTargetDAGCombine(ISD::XOR);
++  }
++
++  if (Subtarget.hasBasicF())
++    addRegisterClass(MVT::f32, &LoongArch::FGR32RegClass);
++
++  if (Subtarget.hasBasicD())
++    addRegisterClass(MVT::f64, &LoongArch::FGR64RegClass);
++
++  setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
++  setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
++
++  if (Subtarget.is64Bit())
++    setOperationAction(ISD::MUL,              MVT::i64, Custom);
++
++  if (Subtarget.is64Bit()) {
++    setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Custom);
++    setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Custom);
++    setOperationAction(ISD::SDIVREM,          MVT::i64, Custom);
++    setOperationAction(ISD::UDIVREM,          MVT::i64, Custom);
++  }
++
++  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
++  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
++
++  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
++  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
++  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
++  setOperationAction(ISD::LOAD,               MVT::i32, Legal);
++  setOperationAction(ISD::STORE, MVT::i32, Legal);
++
++  setTargetDAGCombine(ISD::MUL);
++
++  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
++  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
++  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
++
++  // Replace the accumulator-based multiplies with a
++  // three register instruction.
++  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
++  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
++  setOperationAction(ISD::MUL, MVT::i32, Legal);
++  setOperationAction(ISD::MULHS, MVT::i32, Legal);
++  setOperationAction(ISD::MULHU, MVT::i32, Legal);
++
++  // Replace the accumulator-based division/remainder with separate
++  // three register division and remainder instructions.
++  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
++  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
++  setOperationAction(ISD::SDIV, MVT::i32, Legal);
++  setOperationAction(ISD::UDIV, MVT::i32, Legal);
++  setOperationAction(ISD::SREM, MVT::i32, Legal);
++  setOperationAction(ISD::UREM, MVT::i32, Legal);
++
++  // Replace the accumulator-based multiplies with a
++  // three register instruction.
++  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
++  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
++  setOperationAction(ISD::MUL, MVT::i64, Legal);
++  setOperationAction(ISD::MULHS, MVT::i64, Legal);
++  setOperationAction(ISD::MULHU, MVT::i64, Legal);
++
++  // Replace the accumulator-based division/remainder with separate
++  // three register division and remainder instructions.
++  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
++  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
++  setOperationAction(ISD::SDIV, MVT::i64, Legal);
++  setOperationAction(ISD::UDIV, MVT::i64, Legal);
++  setOperationAction(ISD::SREM, MVT::i64, Legal);
++  setOperationAction(ISD::UREM, MVT::i64, Legal);
++
++  MaxGluedStoresPerMemcpy = 4;
++
++  setMinFunctionAlignment(Subtarget.is64Bit() ? Align(8) : Align(4));
++
++  // The arguments on the stack are defined in terms of 4-byte slots on 32bit
++  // target and 8-byte slots on 64bit target.
++  setMinStackArgumentAlignment(Subtarget.is64Bit() ? Align(8) : Align(4));
++
++  setStackPointerRegisterToSaveRestore(Subtarget.is64Bit() ? LoongArch::SP_64
++                                                           : LoongArch::SP);
++
++  if (Subtarget.hasLASX()) {
++    // = 16*32/2; the smallest memcpy;
++    MaxStoresPerMemcpy = 16;
++  } else if (Subtarget.hasLSX()) {
++    MaxStoresPerMemcpy = 65535;
++  } else {
++    MaxStoresPerMemcpy = 16;
++  }
++
++  computeRegisterProperties(Subtarget.getRegisterInfo());
++}
++
++// Enable LSX support for the given integer type and Register class.
++void LoongArchTargetLowering::addLSXIntType(MVT::SimpleValueType Ty,
++                                            const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
++  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++  setOperationAction(ISD::ABS, Ty, Legal);
++  setOperationAction(ISD::UNDEF, Ty, Legal);
++  setOperationAction(ISD::EXTRACT_SUBVECTOR, Ty, Legal);
++  setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal);
++
++  if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
++    setOperationAction(ISD::FP_TO_SINT, Ty, Custom);
++    setOperationAction(ISD::FP_TO_UINT, Ty, Custom);
++  }
++
++  setOperationAction(ISD::ADD, Ty, Legal);
++  setOperationAction(ISD::AND, Ty, Legal);
++  setOperationAction(ISD::CTLZ, Ty, Legal);
++  setOperationAction(ISD::CTPOP, Ty, Legal);
++  setOperationAction(ISD::MUL, Ty, Legal);
++  setOperationAction(ISD::OR, Ty, Legal);
++  setOperationAction(ISD::SDIV, Ty, Legal);
++  setOperationAction(ISD::SREM, Ty, Legal);
++  setOperationAction(ISD::SHL, Ty, Legal);
++  setOperationAction(ISD::SRA, Ty, Legal);
++  setOperationAction(ISD::SRL, Ty, Legal);
++  setOperationAction(ISD::SUB, Ty, Legal);
++  setOperationAction(ISD::SMAX, Ty, Legal);
++  setOperationAction(ISD::SMIN, Ty, Legal);
++  setOperationAction(ISD::UDIV, Ty, Legal);
++  setOperationAction(ISD::UREM, Ty, Legal);
++  setOperationAction(ISD::UMAX, Ty, Legal);
++  setOperationAction(ISD::UMIN, Ty, Legal);
++  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
++  setOperationAction(ISD::VSELECT, Ty, Legal);
++  setOperationAction(ISD::XOR, Ty, Legal);
++  setOperationAction(ISD::MULHS, Ty, Legal);
++  setOperationAction(ISD::MULHU, Ty, Legal);
++
++  if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
++    setOperationAction(ISD::SINT_TO_FP, Ty, Custom);
++    setOperationAction(ISD::UINT_TO_FP, Ty, Custom);
++  }
++
++  setOperationAction(ISD::SETCC, Ty, Legal);
++  setCondCodeAction(ISD::SETNE, Ty, Expand);
++  setCondCodeAction(ISD::SETGE, Ty, Expand);
++  setCondCodeAction(ISD::SETGT, Ty, Expand);
++  setCondCodeAction(ISD::SETUGE, Ty, Expand);
++  setCondCodeAction(ISD::SETUGT, Ty, Expand);
++}
++
++// Enable LASX support for the given integer type and Register class.
++void LoongArchTargetLowering::addLASXIntType(MVT::SimpleValueType Ty,
++                                             const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  // FIXME
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom);
++  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++  setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal);
++  setOperationAction(ISD::UNDEF, Ty, Legal);
++  setOperationAction(ISD::UADDSAT, Ty, Legal);
++  setOperationAction(ISD::SADDSAT, Ty, Legal);
++  setOperationAction(ISD::USUBSAT, Ty, Legal);
++  setOperationAction(ISD::SSUBSAT, Ty, Legal);
++  setOperationAction(ISD::ABS, Ty, Legal);
++
++  setOperationAction(ISD::ADD, Ty, Legal);
++  setOperationAction(ISD::AND, Ty, Legal);
++  setOperationAction(ISD::CTLZ, Ty, Legal);
++  setOperationAction(ISD::CTPOP, Ty, Legal);
++  setOperationAction(ISD::MUL, Ty, Legal);
++  setOperationAction(ISD::OR, Ty, Legal);
++  setOperationAction(ISD::SDIV, Ty, Legal);
++  setOperationAction(ISD::SREM, Ty, Legal);
++  setOperationAction(ISD::SHL, Ty, Legal);
++  setOperationAction(ISD::SRA, Ty, Legal);
++  setOperationAction(ISD::SRL, Ty, Legal);
++  setOperationAction(ISD::SUB, Ty, Legal);
++  setOperationAction(ISD::SMAX, Ty, Legal);
++  setOperationAction(ISD::SMIN, Ty, Legal);
++  setOperationAction(ISD::UDIV, Ty, Legal);
++  setOperationAction(ISD::UREM, Ty, Legal);
++  setOperationAction(ISD::UMAX, Ty, Legal);
++  setOperationAction(ISD::UMIN, Ty, Legal);
++  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
++  setOperationAction(ISD::VSELECT, Ty, Legal);
++  setOperationAction(ISD::XOR, Ty, Legal);
++  setOperationAction(ISD::INSERT_SUBVECTOR, Ty, Legal);
++  setOperationAction(ISD::MULHS, Ty, Legal);
++  setOperationAction(ISD::MULHU, Ty, Legal);
++
++  setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, Ty, Legal);
++  setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, Ty, Legal);
++
++  setOperationAction(ISD::SIGN_EXTEND, Ty, Legal);
++  setOperationAction(ISD::ZERO_EXTEND, Ty, Legal);
++
++  if (Ty == MVT::v8i32 || Ty == MVT::v4i64) {
++    setOperationAction(ISD::SINT_TO_FP, Ty, Custom);
++    setOperationAction(ISD::UINT_TO_FP, Ty, Custom);
++  }
++
++  setTargetDAGCombine(ISD::CONCAT_VECTORS);
++
++  setOperationAction(ISD::SETCC, Ty, Legal);
++  setCondCodeAction(ISD::SETNE, Ty, Expand);
++  setCondCodeAction(ISD::SETGE, Ty, Expand);
++  setCondCodeAction(ISD::SETGT, Ty, Expand);
++  setCondCodeAction(ISD::SETUGE, Ty, Expand);
++  setCondCodeAction(ISD::SETUGT, Ty, Expand);
++}
++
++// Enable LSX support for the given floating-point type and Register class.
++void LoongArchTargetLowering::addLSXFloatType(MVT::SimpleValueType Ty,
++                                              const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
++  setOperationAction(ISD::UNDEF, Ty, Legal);
++  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++  setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal);
++
++  if (Ty == MVT::v4f32 || Ty == MVT::v2f64) {
++    setOperationAction(ISD::FP_TO_SINT, Ty, Custom);
++    setOperationAction(ISD::FP_TO_UINT, Ty, Custom);
++  }
++
++  setOperationAction(ISD::FADD, Ty, Legal);
++  setOperationAction(ISD::FDIV, Ty, Legal);
++  setOperationAction(ISD::FMA, Ty, Legal);
++  setOperationAction(ISD::FMUL, Ty, Legal);
++  setOperationAction(ISD::FSQRT, Ty, Legal);
++  setOperationAction(ISD::FSUB, Ty, Legal);
++  setOperationAction(ISD::VSELECT, Ty, Legal);
++  setOperationAction(ISD::FNEG, Ty, Legal);
++  setOperationAction(ISD::FRINT, Ty, Legal);
++
++  setOperationAction(ISD::SETCC, Ty, Legal);
++  setCondCodeAction(ISD::SETOGE, Ty, Expand);
++  setCondCodeAction(ISD::SETOGT, Ty, Expand);
++  setCondCodeAction(ISD::SETUGE, Ty, Expand);
++  setCondCodeAction(ISD::SETUGT, Ty, Expand);
++  setCondCodeAction(ISD::SETGE, Ty, Expand);
++  setCondCodeAction(ISD::SETGT, Ty, Expand);
++}
++
++// Enable LASX support for the given floating-point type and Register class.
++void LoongArchTargetLowering::addLASXFloatType(MVT::SimpleValueType Ty,
++                                               const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
++  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++  setOperationAction(ISD::UNDEF, Ty, Legal);
++  setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal);
++
++  setOperationAction(ISD::FADD, Ty, Legal);
++  setOperationAction(ISD::FDIV, Ty, Legal);
++  setOperationAction(ISD::FMA, Ty, Legal);
++  setOperationAction(ISD::FMUL, Ty, Legal);
++  setOperationAction(ISD::FSQRT, Ty, Legal);
++  setOperationAction(ISD::FSUB, Ty, Legal);
++  setOperationAction(ISD::VSELECT, Ty, Legal);
++  setOperationAction(ISD::FNEG, Ty, Legal);
++  setOperationAction(ISD::FRINT, Ty, Legal);
++
++  if (Ty == MVT::v8f32 || Ty == MVT::v4f64) {
++    setOperationAction(ISD::FP_TO_SINT, Ty, Custom);
++    setOperationAction(ISD::FP_TO_UINT, Ty, Custom);
++  }
++
++  setOperationAction(ISD::SETCC, Ty, Legal);
++  setCondCodeAction(ISD::SETOGE, Ty, Expand);
++  setCondCodeAction(ISD::SETOGT, Ty, Expand);
++  setCondCodeAction(ISD::SETUGE, Ty, Expand);
++  setCondCodeAction(ISD::SETUGT, Ty, Expand);
++  setCondCodeAction(ISD::SETGE, Ty, Expand);
++  setCondCodeAction(ISD::SETGT, Ty, Expand);
++}
++
++bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
++    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
++    bool *Fast) const {
++  if (!Subtarget.allowUnalignedAccess())
++    return false;
++  if (Fast)
++    *Fast = true;
++  return true;
++}
++
++EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
++                                           EVT VT) const {
++  if (!VT.isVector())
++    return MVT::i32;
++  return VT.changeVectorElementTypeToInteger();
++}
++
++static LoongArch::CondCode condCodeToFCC(ISD::CondCode CC) {
++  switch (CC) {
++  default: llvm_unreachable("Unknown fp condition code!");
++  case ISD::SETEQ:
++  case ISD::SETOEQ: return LoongArch::FCOND_OEQ;
++  case ISD::SETUNE: return LoongArch::FCOND_UNE;
++  case ISD::SETLT:
++  case ISD::SETOLT: return LoongArch::FCOND_OLT;
++  case ISD::SETGT:
++  case ISD::SETOGT: return LoongArch::FCOND_OGT;
++  case ISD::SETLE:
++  case ISD::SETOLE: return LoongArch::FCOND_OLE;
++  case ISD::SETGE:
++  case ISD::SETOGE: return LoongArch::FCOND_OGE;
++  case ISD::SETULT: return LoongArch::FCOND_ULT;
++  case ISD::SETULE: return LoongArch::FCOND_ULE;
++  case ISD::SETUGT: return LoongArch::FCOND_UGT;
++  case ISD::SETUGE: return LoongArch::FCOND_UGE;
++  case ISD::SETUO:  return LoongArch::FCOND_UN;
++  case ISD::SETO:   return LoongArch::FCOND_OR;
++  case ISD::SETNE:
++  case ISD::SETONE: return LoongArch::FCOND_ONE;
++  case ISD::SETUEQ: return LoongArch::FCOND_UEQ;
++  }
++}
++
++/// This function returns true if the floating point conditional branches and
++/// conditional moves which use condition code CC should be inverted.
++static bool invertFPCondCodeUser(LoongArch::CondCode CC) {
++  if (CC >= LoongArch::FCOND_F && CC <= LoongArch::FCOND_SUNE)
++    return false;
++
++  assert((CC >= LoongArch::FCOND_T && CC <= LoongArch::FCOND_GT) &&
++         "Illegal Condition Code");
++
++  return true;
++}
++
++// Creates and returns an FPCmp node from a setcc node.
++// Returns Op if setcc is not a floating point comparison.
++static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
++  // must be a SETCC node
++  if (Op.getOpcode() != ISD::SETCC)
++    return Op;
++
++  SDValue LHS = Op.getOperand(0);
++
++  if (!LHS.getValueType().isFloatingPoint())
++    return Op;
++
++  SDValue RHS = Op.getOperand(1);
++  SDLoc DL(Op);
++
++  // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
++  // node if necessary.
++  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
++
++  return DAG.getNode(LoongArchISD::FPCmp, DL, MVT::Glue, LHS, RHS,
++                     DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32));
++}
++
++// Creates and returns a CMovFPT/F node.
++static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
++                            SDValue False, const SDLoc &DL) {
++  ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
++  bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue());
++  SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32);
++
++  return DAG.getNode((invert ? LoongArchISD::CMovFP_F : LoongArchISD::CMovFP_T), DL,
++                 True.getValueType(), True, FCC0, False, Cond);
++
++}
++
++static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
++                                    TargetLowering::DAGCombinerInfo &DCI,
++                                    const LoongArchSubtarget &Subtarget) {
++  if (DCI.isBeforeLegalizeOps())
++    return SDValue();
++
++  SDValue SetCC = N->getOperand(0);
++
++  if ((SetCC.getOpcode() != ISD::SETCC) ||
++      !SetCC.getOperand(0).getValueType().isInteger())
++    return SDValue();
++
++  SDValue False = N->getOperand(2);
++  EVT FalseTy = False.getValueType();
++
++  if (!FalseTy.isInteger())
++    return SDValue();
++
++  ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(False);
++
++  // If the RHS (False) is 0, we swap the order of the operands
++  // of ISD::SELECT (obviously also inverting the condition) so that we can
++  // take advantage of conditional moves using the $0 register.
++  // Example:
++  //   return (a != 0) ? x : 0;
++  //     load $reg, x
++  //     movz $reg, $0, a
++  if (!FalseC)
++    return SDValue();
++
++  const SDLoc DL(N);
++
++  if (!FalseC->getZExtValue()) {
++    ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
++    SDValue True = N->getOperand(1);
++
++    SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
++                         SetCC.getOperand(1),
++                         ISD::getSetCCInverse(CC, SetCC.getValueType()));
++
++    return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
++  }
++
++  // If both operands are integer constants there's a possibility that we
++  // can do some interesting optimizations.
++  SDValue True = N->getOperand(1);
++  ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(True);
++
++  if (!TrueC || !True.getValueType().isInteger())
++    return SDValue();
++
++  // We'll also ignore MVT::i64 operands as this optimizations proves
++  // to be ineffective because of the required sign extensions as the result
++  // of a SETCC operator is always MVT::i32 for non-vector types.
++  if (True.getValueType() == MVT::i64)
++    return SDValue();
++
++  int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue();
++
++  // 1)  (a < x) ? y : y-1
++  //  slti $reg1, a, x
++  //  addiu $reg2, $reg1, y-1
++  if (Diff == 1)
++    return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False);
++
++  // 2)  (a < x) ? y-1 : y
++  //  slti $reg1, a, x
++  //  xor $reg1, $reg1, 1
++  //  addiu $reg2, $reg1, y-1
++  if (Diff == -1) {
++    ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
++    SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
++                         SetCC.getOperand(1),
++                         ISD::getSetCCInverse(CC, SetCC.getValueType()));
++    return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True);
++  }
++
++  // Could not optimize.
++  return SDValue();
++}
++
++static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const LoongArchSubtarget &Subtarget) {
++
++  if (Subtarget.hasLSX()) {
++
++    // Fold zero extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT
++    //
++    // Performs the following transformations:
++    // - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
++    //   sign/zero-extension is completely overwritten by the new one performed
++    //   by the ISD::AND.
++    // - Removes redundant zero extensions performed by an ISD::AND.
++    SDValue Op0 = N->getOperand(0);
++    SDValue Op1 = N->getOperand(1);
++    unsigned Op0Opcode = Op0->getOpcode();
++
++    // (and (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d)
++    // where $d + 1 == 2^n and n == 32
++    // or    $d + 1 == 2^n and n <= 32 and ZExt
++    // -> (LoongArchVExtractZExt $a, $b, $c)
++    if (Op0Opcode == LoongArchISD::VEXTRACT_SEXT_ELT ||
++        Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT) {
++      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
++
++      if (Mask) {
++
++        int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
++
++        if (Log2IfPositive > 0) {
++          SDValue Op0Op2 = Op0->getOperand(2);
++          EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
++          unsigned ExtendTySize = ExtendTy.getSizeInBits();
++          unsigned Log2 = Log2IfPositive;
++
++          if ((Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT &&
++               Log2 >= ExtendTySize) ||
++              Log2 == ExtendTySize) {
++            SDValue Ops[] = {Op0->getOperand(0), Op0->getOperand(1), Op0Op2};
++            return DAG.getNode(LoongArchISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
++                               Op0->getVTList(),
++                               makeArrayRef(Ops, Op0->getNumOperands()));
++          }
++        }
++      }
++    }
++  }
++
++  if (DCI.isBeforeLegalizeOps())
++    return SDValue();
++
++  SDValue FirstOperand = N->getOperand(0);
++  unsigned FirstOperandOpc = FirstOperand.getOpcode();
++  SDValue Mask = N->getOperand(1);
++  EVT ValTy = N->getValueType(0);
++  SDLoc DL(N);
++
++  uint64_t Lsb = 0, SMLsb, SMSize;
++  ConstantSDNode *CN;
++  SDValue NewOperand;
++  unsigned Opc;
++
++  // Op's second operand must be a shifted mask.
++  if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
++      !isShiftedMask(CN->getZExtValue(), SMLsb, SMSize))
++    return SDValue();
++
++  if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
++    // Pattern match BSTRPICK.
++    //  $dst = and ((sra or srl) $src , lsb), (2**size - 1)
++    //  => bstrpick $dst, $src, lsb+size-1, lsb
++
++    // The second operand of the shift must be an immediate.
++    if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
++      return SDValue();
++
++    Lsb = CN->getZExtValue();
++
++    // Return if the shifted mask does not start at bit 0 or the sum of its size
++    // and Lsb exceeds the word's size.
++    if (SMLsb != 0 || Lsb + SMSize > ValTy.getSizeInBits())
++      return SDValue();
++
++    Opc = LoongArchISD::BSTRPICK;
++    NewOperand = FirstOperand.getOperand(0);
++  } else {
++    // Pattern match BSTRPICK.
++    //  $dst = and $src, (2**size - 1) , if size > 12
++    //  => bstrpick $dst, $src, lsb+size-1, lsb , lsb = 0
++
++    // If the mask is <= 0xfff, andi can be used instead.
++    if (CN->getZExtValue() <= 0xfff)
++      return SDValue();
++    // Return if the mask doesn't start at position 0.
++    if (SMLsb)
++      return SDValue();
++
++    Opc = LoongArchISD::BSTRPICK;
++    NewOperand = FirstOperand;
++  }
++  return DAG.getNode(Opc, DL, ValTy, NewOperand,
++                     DAG.getConstant((Lsb + SMSize - 1), DL, MVT::i32),
++                     DAG.getConstant(Lsb, DL, MVT::i32));
++}
++
++// Determine if the specified node is a constant vector splat.
++//
++// Returns true and sets Imm if:
++// * N is a ISD::BUILD_VECTOR representing a constant splat
++static bool isVSplat(SDValue N, APInt &Imm) {
++  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
++
++  if (!Node)
++    return false;
++
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
++                             8))
++    return false;
++
++  Imm = SplatValue;
++
++  return true;
++}
++
++// Test whether the given node is an all-ones build_vector.
++static bool isVectorAllOnes(SDValue N) {
++  // Look through bitcasts. Endianness doesn't matter because we are looking
++  // for an all-ones value.
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
++
++  if (!BVN)
++    return false;
++
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  // Endianness doesn't matter in this context because we are looking for
++  // an all-ones value.
++  if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
++    return SplatValue.isAllOnesValue();
++
++  return false;
++}
++
++// Test whether N is the bitwise inverse of OfNode.
++static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
++  if (N->getOpcode() != ISD::XOR)
++    return false;
++
++  if (isVectorAllOnes(N->getOperand(0)))
++    return N->getOperand(1) == OfNode;
++
++  if (isVectorAllOnes(N->getOperand(1)))
++    return N->getOperand(0) == OfNode;
++
++  return false;
++}
++
++static SDValue performSet(SDNode *N, SelectionDAG &DAG,
++                          TargetLowering::DAGCombinerInfo &DCI,
++                          const LoongArchSubtarget &Subtarget) {
++
++  SDValue Op0 = N->getOperand(0);
++  SDValue Op1 = N->getOperand(1);
++  SDValue N1, N2;
++  if (Op0->getOpcode() == ISD::BUILD_VECTOR &&
++      (Op1->getValueType(0).is128BitVector() ||
++       Op1->getValueType(0).is256BitVector())) {
++    N1 = Op0;
++    N2 = Op1;
++  } else if (Op1->getOpcode() == ISD::BUILD_VECTOR &&
++             (Op0->getValueType(0).is128BitVector() ||
++              Op0->getValueType(0).is256BitVector())) {
++    N1 = Op1;
++    N2 = Op0;
++  } else
++    return SDValue();
++
++  APInt Mask1, Mask2;
++  if (!isVSplat(N1, Mask1))
++    return SDValue();
++
++  if (!N1->getValueType(0).isSimple())
++    return SDValue();
++
++  ConstantSDNode *C1;
++  uint64_t Imm;
++  unsigned ImmL;
++  if (!(C1 = dyn_cast<ConstantSDNode>(N1.getOperand(0))) ||
++      !isPowerOf2_64(C1->getZExtValue()))
++    return SDValue();
++
++  Imm = C1->getZExtValue();
++  ImmL = Log2_64(Imm);
++  MVT VT = N1->getSimpleValueType(0).SimpleTy;
++
++  SDNode *Res;
++
++  if (Subtarget.hasLASX() && N->getValueType(0).is256BitVector()) {
++    if (VT == MVT::v32i8 && ImmL < 8)
++      Res = DAG.getMachineNode(LoongArch::XVBITSETI_B, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v16i16 && ImmL < 16)
++      Res = DAG.getMachineNode(LoongArch::XVBITSETI_H, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v8i32 && ImmL < 32)
++      Res = DAG.getMachineNode(LoongArch::XVBITSETI_W, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v4i64 && ImmL < 64)
++      Res = DAG.getMachineNode(LoongArch::XVBITSETI_D, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else
++      return SDValue();
++  } else if (N->getValueType(0).is128BitVector()) {
++    if (VT == MVT::v16i8 && ImmL < 8)
++      Res = DAG.getMachineNode(LoongArch::VBITSETI_B, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v8i16 && ImmL < 16)
++      Res = DAG.getMachineNode(LoongArch::VBITSETI_H, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v4i32 && ImmL < 32)
++      Res = DAG.getMachineNode(LoongArch::VBITSETI_W, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else if (VT == MVT::v2i64 && ImmL < 64)
++      Res = DAG.getMachineNode(LoongArch::VBITSETI_D, SDLoc(N), VT, N2,
++                               DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32));
++    else
++      return SDValue();
++
++  } else
++    return SDValue();
++
++  return SDValue(Res, 0);
++}
++
++static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
++                                TargetLowering::DAGCombinerInfo &DCI,
++                                const LoongArchSubtarget &Subtarget) {
++
++  SDValue Res;
++  if (Subtarget.hasLSX() && (N->getValueType(0).is128BitVector() ||
++                             N->getValueType(0).is256BitVector())) {
++    SDValue Op0 = N->getOperand(0);
++    SDValue Op1 = N->getOperand(1);
++
++    if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
++      SDValue Op0Op0 = Op0->getOperand(0);
++      SDValue Op0Op1 = Op0->getOperand(1);
++      SDValue Op1Op0 = Op1->getOperand(0);
++      SDValue Op1Op1 = Op1->getOperand(1);
++
++      SDValue IfSet, IfClr, Cond;
++      bool IsConstantMask = false;
++      APInt Mask, InvMask;
++
++      // If Op0Op0 is an appropriate mask, try to find it's inverse in either
++      // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes,
++      // while looking. IfClr will be set if we find a valid match.
++      if (isVSplat(Op0Op0, Mask)) {
++        Cond = Op0Op0;
++        IfSet = Op0Op1;
++
++        if (isVSplat(Op1Op0, InvMask) &&
++            Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
++          IfClr = Op1Op1;
++        else if (isVSplat(Op1Op1, InvMask) &&
++                 Mask.getBitWidth() == InvMask.getBitWidth() &&
++                 Mask == ~InvMask)
++          IfClr = Op1Op0;
++
++        IsConstantMask = true;
++      }
++
++      // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the
++      // same thing again using this mask. IfClr will be set if we find a valid
++      // match.
++      if (!IfClr.getNode() && isVSplat(Op0Op1, Mask)) {
++        Cond = Op0Op1;
++        IfSet = Op0Op0;
++
++        if (isVSplat(Op1Op0, InvMask) &&
++            Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
++          IfClr = Op1Op1;
++        else if (isVSplat(Op1Op1, InvMask) &&
++                 Mask.getBitWidth() == InvMask.getBitWidth() &&
++                 Mask == ~InvMask)
++          IfClr = Op1Op0;
++
++        IsConstantMask = true;
++      }
++
++      // If IfClr is not yet set, try looking for a non-constant match.
++      // IfClr will be set if we find a valid match amongst the eight
++      // possibilities.
++      if (!IfClr.getNode()) {
++        if (isBitwiseInverse(Op0Op0, Op1Op0)) {
++          Cond = Op1Op0;
++          IfSet = Op1Op1;
++          IfClr = Op0Op1;
++        } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
++          Cond = Op1Op0;
++          IfSet = Op1Op1;
++          IfClr = Op0Op0;
++        } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
++          Cond = Op1Op1;
++          IfSet = Op1Op0;
++          IfClr = Op0Op1;
++        } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
++          Cond = Op1Op1;
++          IfSet = Op1Op0;
++          IfClr = Op0Op0;
++        } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
++          Cond = Op0Op0;
++          IfSet = Op0Op1;
++          IfClr = Op1Op1;
++        } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
++          Cond = Op0Op0;
++          IfSet = Op0Op1;
++          IfClr = Op1Op0;
++        } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
++          Cond = Op0Op1;
++          IfSet = Op0Op0;
++          IfClr = Op1Op1;
++        } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
++          Cond = Op0Op1;
++          IfSet = Op0Op0;
++          IfClr = Op1Op0;
++        }
++      }
++
++      // At this point, IfClr will be set if we have a valid match.
++      if (IfClr.getNode()) {
++        assert(Cond.getNode() && IfSet.getNode());
++
++        // Fold degenerate cases.
++        if (IsConstantMask) {
++          if (Mask.isAllOnesValue())
++            return IfSet;
++          else if (Mask == 0)
++            return IfClr;
++        }
++
++        // Transform the DAG into an equivalent VSELECT.
++        return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Cond,
++                           IfSet, IfClr);
++      }
++    }
++
++    if (Res = performSet(N, DAG, DCI, Subtarget))
++      return Res;
++  }
++
++  // Pattern match BSTRINS.
++  //  $dst = or (and $src1 , mask0), (and (shl $src, lsb), mask1),
++  //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
++  //  => bstrins $dst, $src, lsb+size-1, lsb, $src1
++  if (DCI.isBeforeLegalizeOps())
++    return SDValue();
++
++  SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
++  uint64_t SMLsb0, SMSize0, SMLsb1, SMSize1;
++  ConstantSDNode *CN, *CN1;
++
++  // See if Op's first operand matches (and $src1 , mask0).
++  if (And0.getOpcode() != ISD::AND)
++    return SDValue();
++
++  if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
++      !isShiftedMask(~CN->getSExtValue(), SMLsb0, SMSize0))
++    return SDValue();
++
++  // See if Op's second operand matches (and (shl $src, lsb), mask1).
++  if (And1.getOpcode() == ISD::AND &&
++      And1.getOperand(0).getOpcode() == ISD::SHL) {
++
++    if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
++        !isShiftedMask(CN->getZExtValue(), SMLsb1, SMSize1))
++      return SDValue();
++
++    // The shift masks must have the same least significant bit and size.
++    if (SMLsb0 != SMLsb1 || SMSize0 != SMSize1)
++      return SDValue();
++
++    SDValue Shl = And1.getOperand(0);
++
++    if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
++      return SDValue();
++
++    unsigned Shamt = CN->getZExtValue();
++
++    // Return if the shift amount and the first bit position of mask are not the
++    // same.
++    EVT ValTy = N->getValueType(0);
++    if ((Shamt != SMLsb0) || (SMLsb0 + SMSize0 > ValTy.getSizeInBits()))
++      return SDValue();
++
++    SDLoc DL(N);
++    return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, Shl.getOperand(0),
++                       DAG.getConstant((SMLsb0 + SMSize0 - 1), DL, MVT::i32),
++                       DAG.getConstant(SMLsb0, DL, MVT::i32),
++                       And0.getOperand(0));
++  } else {
++    // Pattern match BSTRINS.
++    //  $dst = or (and $src, mask0), mask1
++    //  where mask0 = ((1 << SMSize0) -1) << SMLsb0
++    //  => bstrins $dst, $src, SMLsb0+SMSize0-1, SMLsb0
++    if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMLsb0) &&
++        (SMSize0 + SMLsb0 <= 64)) {
++      // Check if AND instruction has constant as argument
++      bool isConstCase = And1.getOpcode() != ISD::AND;
++      if (And1.getOpcode() == ISD::AND) {
++        if (!(CN1 = dyn_cast<ConstantSDNode>(And1->getOperand(1))))
++          return SDValue();
++      } else {
++        if (!(CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1))))
++          return SDValue();
++      }
++      // Don't generate BSTRINS if constant OR operand doesn't fit into bits
++      // cleared by constant AND operand.
++      if (CN->getSExtValue() & CN1->getSExtValue())
++        return SDValue();
++
++      SDLoc DL(N);
++      EVT ValTy = N->getOperand(0)->getValueType(0);
++      SDValue Const1;
++      SDValue SrlX;
++      if (!isConstCase) {
++        Const1 = DAG.getConstant(SMLsb0, DL, MVT::i32);
++        SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1);
++      }
++      return DAG.getNode(
++          LoongArchISD::BSTRINS, DL, N->getValueType(0),
++          isConstCase
++              ? DAG.getConstant(CN1->getSExtValue() >> SMLsb0, DL, ValTy)
++              : SrlX,
++          DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? (SMLsb0 + (SMSize0 & 31) - 1)
++                                                        : (SMLsb0 + SMSize0 - 1),
++                          DL, MVT::i32),
++          DAG.getConstant(SMLsb0, DL, MVT::i32),
++          And0->getOperand(0));
++
++    }
++    return SDValue();
++  }
++}
++
++static bool
++shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
++                                   SelectionDAG &DAG,
++                                   const LoongArchSubtarget &Subtarget) {
++  // Estimate the number of operations the below transform will turn a
++  // constant multiply into. The number is approximately equal to the minimal
++  // number of powers of two that constant can be broken down to by adding
++  // or subtracting them.
++  //
++  // If we have taken more than 10[1] / 8[2] steps to attempt the
++  // optimization for a native sized value, it is more than likely that this
++  // optimization will make things worse.
++  //
++  // [1] LA64 requires 4 instructions at most to materialize any constant,
++  //     multiplication requires at least 4 cycles, but another cycle (or two)
++  //     to retrieve the result from corresponding registers.
++  //
++  // [2] LA32 requires 2 instructions at most to materialize any constant,
++  //     multiplication requires at least 4 cycles, but another cycle (or two)
++  //     to retrieve the result from corresponding registers.
++  //
++  // TODO:
++  // - MaxSteps needs to consider the `VT` of the constant for the current
++  //   target.
++  // - Consider to perform this optimization after type legalization.
++  //   That allows to remove a workaround for types not supported natively.
++  // - Take in account `-Os, -Oz` flags because this optimization
++  //   increases code size.
++  unsigned MaxSteps = Subtarget.is64Bit() ? 10 : 8;
++
++  SmallVector<APInt, 16> WorkStack(1, C);
++  unsigned Steps = 0;
++  unsigned BitWidth = C.getBitWidth();
++
++  while (!WorkStack.empty()) {
++    APInt Val = WorkStack.pop_back_val();
++
++    if (Val == 0 || Val == 1)
++      continue;
++
++    if (Steps >= MaxSteps)
++      return false;
++
++    if (Val.isPowerOf2()) {
++      ++Steps;
++      continue;
++    }
++
++    APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
++    APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
++                                  : APInt(BitWidth, 1) << C.ceilLogBase2();
++
++    if ((Val - Floor).ule(Ceil - Val)) {
++      WorkStack.push_back(Floor);
++      WorkStack.push_back(Val - Floor);
++    } else {
++      WorkStack.push_back(Ceil);
++      WorkStack.push_back(Ceil - Val);
++    }
++
++    ++Steps;
++  }
++
++  // If the value being multiplied is not supported natively, we have to pay
++  // an additional legalization cost, conservatively assume an increase in the
++  // cost of 3 instructions per step. This values for this heuristic were
++  // determined experimentally.
++  unsigned RegisterSize = DAG.getTargetLoweringInfo()
++                              .getRegisterType(*DAG.getContext(), VT)
++                              .getSizeInBits();
++  Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
++  if (Steps > 27)
++    return false;
++
++  return true;
++}
++
++static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
++                            EVT ShiftTy, SelectionDAG &DAG) {
++  // Return 0.
++  if (C == 0)
++    return DAG.getConstant(0, DL, VT);
++
++  // Return x.
++  if (C == 1)
++    return X;
++
++  // If c is power of 2, return (shl x, log2(c)).
++  if (C.isPowerOf2())
++    return DAG.getNode(ISD::SHL, DL, VT, X,
++                       DAG.getConstant(C.logBase2(), DL, ShiftTy));
++
++  unsigned BitWidth = C.getBitWidth();
++  APInt Floor = APInt(BitWidth, 1) << C.logBase2();
++  APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
++                                APInt(BitWidth, 1) << C.ceilLogBase2();
++
++  // If |c - floor_c| <= |c - ceil_c|,
++  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
++  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
++  if ((C - Floor).ule(Ceil - C)) {
++    SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
++    SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
++    return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
++  }
++
++  // If |c - floor_c| > |c - ceil_c|,
++  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
++  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
++  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
++  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
++}
++
++static SDValue performLogicCombine(SDNode *N, SelectionDAG &DAG,
++                                   const LoongArchSubtarget &Subtarget) {
++
++  SDLoc DL(N);
++  SDValue N0 = N->getOperand(0);
++  SDValue N1 = N->getOperand(1);
++
++  if (!(N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE))
++    return SDValue();
++
++  if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() &&
++        N1->getValueType(0).isSimple() &&
++        N0->getOperand(0)->getValueType(0).isSimple() &&
++        N1->getOperand(0)->getValueType(0).isSimple()))
++    return SDValue();
++
++  if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() &&
++        N1->getValueType(0).isSimple() &&
++        N0->getOperand(0)->getValueType(0).isSimple() &&
++        N1->getOperand(0)->getValueType(0).isSimple()))
++    return SDValue();
++
++  if (!(N->getSimpleValueType(0).SimpleTy == MVT::i32 &&
++        N0->getSimpleValueType(0).SimpleTy == MVT::i32 &&
++        N1->getSimpleValueType(0).SimpleTy == MVT::i32))
++    return SDValue();
++
++  if (!(N0->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64 &&
++        N1->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64))
++    return SDValue();
++
++  SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32);
++  SDValue Val0 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
++                                            N0->getValueType(0),
++                                            N0->getOperand(0), SubReg),
++                         0);
++  SDValue Val1 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
++                                            N1->getValueType(0),
++                                            N1->getOperand(0), SubReg),
++                         0);
++
++  return DAG.getNode(N->getOpcode(), DL, N0->getValueType(0), Val0, Val1);
++}
++
++static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
++                                 const TargetLowering::DAGCombinerInfo &DCI,
++                                 const LoongArchTargetLowering *TL,
++                                 const LoongArchSubtarget &Subtarget) {
++  EVT VT = N->getValueType(0);
++
++  SDValue Res;
++  if ((Res = performLogicCombine(N, DAG, Subtarget)))
++    return Res;
++
++  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
++    if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
++                              C->getAPIntValue(), VT, DAG, Subtarget))
++      return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
++                          TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
++                          DAG);
++
++  return SDValue(N, 0);
++}
++
++// Fold sign-extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT for LSX.
++//
++// Performs the following transformations:
++// - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
++//   sign/zero-extension is completely overwritten by the new one performed by
++//   the ISD::SRA and ISD::SHL nodes.
++// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
++//   sequence.
++static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const LoongArchSubtarget &Subtarget) {
++
++  SDValue Res;
++  if ((Res = performLogicCombine(N, DAG, Subtarget)))
++    return Res;
++
++  if (Subtarget.hasLSX() || Subtarget.hasLASX()) {
++    SDValue Op0 = N->getOperand(0);
++    SDValue Op1 = N->getOperand(1);
++
++    // (sra (shl (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
++    // where $d + sizeof($c) == 32
++    // or    $d + sizeof($c) <= 32 and SExt
++    // -> (LoongArchVExtractSExt $a, $b, $c)
++    if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
++      SDValue Op0Op0 = Op0->getOperand(0);
++      ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
++
++      if (!ShAmount)
++        return SDValue();
++
++      if (Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_SEXT_ELT &&
++          Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_ZEXT_ELT)
++        return SDValue();
++
++      EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
++      unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
++
++      if (TotalBits == 32 ||
++          (Op0Op0->getOpcode() == LoongArchISD::VEXTRACT_SEXT_ELT &&
++           TotalBits <= 32)) {
++        SDValue Ops[] = {Op0Op0->getOperand(0), Op0Op0->getOperand(1),
++                         Op0Op0->getOperand(2)};
++        return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
++                           Op0Op0->getVTList(),
++                           makeArrayRef(Ops, Op0Op0->getNumOperands()));
++      }
++    }
++  }
++
++  return SDValue();
++}
++
++// combine vsub/vslt/vbitsel.v to vabsd
++static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
++  assert((N->getOpcode() == ISD::VSELECT) && "Need ISD::VSELECT");
++
++  SDLoc dl(N);
++  SDValue Cond = N->getOperand(0);
++  SDValue TrueOpnd = N->getOperand(1);
++  SDValue FalseOpnd = N->getOperand(2);
++
++  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
++      FalseOpnd.getOpcode() != ISD::SUB)
++    return SDValue();
++
++  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
++    return SDValue();
++
++  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
++
++  switch (CC) {
++  default:
++    return SDValue();
++  case ISD::SETUGT:
++  case ISD::SETUGE:
++  case ISD::SETGT:
++  case ISD::SETGE:
++    break;
++  case ISD::SETULT:
++  case ISD::SETULE:
++  case ISD::SETLT:
++  case ISD::SETLE:
++    std::swap(TrueOpnd, FalseOpnd);
++    break;
++  }
++
++  SDValue Op1 = Cond.getOperand(0);
++  SDValue Op2 = Cond.getOperand(1);
++
++  if (TrueOpnd.getOperand(0) == Op1 && TrueOpnd.getOperand(1) == Op2 &&
++      FalseOpnd.getOperand(0) == Op2 && FalseOpnd.getOperand(1) == Op1) {
++    if (ISD::isSignedIntSetCC(CC)) {
++      return DAG.getNode(LoongArchISD::VABSD, dl,
++                         N->getOperand(1).getValueType(), Op1, Op2,
++                         DAG.getTargetConstant(0, dl, MVT::i32));
++    } else {
++      return DAG.getNode(LoongArchISD::UVABSD, dl,
++                         N->getOperand(1).getValueType(), Op1, Op2,
++                         DAG.getTargetConstant(0, dl, MVT::i32));
++    }
++  }
++  return SDValue();
++}
++
++static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
++                                 const LoongArchSubtarget &Subtarget) {
++
++  EVT Ty = N->getValueType(0);
++
++  if ((Subtarget.hasLSX() && Ty.is128BitVector() && Ty.isInteger()) ||
++      (Subtarget.hasLASX() && Ty.is256BitVector() && Ty.isInteger())) {
++    // Try the following combines:
++    //   (xor (or $a, $b), (build_vector allones))
++    //   (xor (or $a, $b), (bitcast (build_vector allones)))
++    SDValue Op0 = N->getOperand(0);
++    SDValue Op1 = N->getOperand(1);
++    SDValue NotOp;
++
++    if (ISD::isBuildVectorAllOnes(Op0.getNode()))
++      NotOp = Op1;
++    else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
++      NotOp = Op0;
++    else
++      return SDValue();
++
++    if (NotOp->getOpcode() == ISD::OR)
++      return DAG.getNode(LoongArchISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
++                         NotOp->getOperand(1));
++  }
++
++  return SDValue();
++}
++
++// When using a 256-bit vector is less expensive than using a 128-bit vector,
++// use this function to convert a 128-bit vector to a 256-bit vector.
++static SDValue
++performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
++                             TargetLowering::DAGCombinerInfo &DCI,
++                             const LoongArchSubtarget &Subtarget) {
++
++  assert((N->getOpcode() == ISD::CONCAT_VECTORS) && "Need ISD::CONCAT_VECTORS");
++  if (DCI.isAfterLegalizeDAG())
++    return SDValue();
++
++  SDLoc DL(N);
++  SDValue Top0 = N->getOperand(0);
++  SDValue Top1 = N->getOperand(1);
++
++  // Check for cheaper optimizations.
++  if (!((Top0->getOpcode() == ISD::SIGN_EXTEND) &&
++        (Top1->getOpcode() == ISD::SIGN_EXTEND)))
++    return SDValue();
++  if (!((Top0->getOperand(0)->getOpcode() == ISD::ADD) &&
++        (Top1->getOperand(0)->getOpcode() == ISD::ADD)))
++    return SDValue();
++
++  SDValue Op_a0 = Top0->getOperand(0);
++  SDValue Op_a1 = Top1->getOperand(0);
++  for (int i = 0; i < 2; i++) {
++    if (!((Op_a0->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR) &&
++          (Op_a1->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR)))
++      return SDValue();
++  }
++
++  SDValue Ops_b[] = {Op_a0->getOperand(0), Op_a0->getOperand(1),
++                     Op_a1->getOperand(0), Op_a1->getOperand(1)};
++  for (int i = 0; i < 4; i++) {
++    if (Ops_b[i]->getNumOperands() != 2)
++      return SDValue();
++  }
++
++  // Currently only a single case is handled, and more optimization scenarios
++  // will be added in the future.
++  SDValue Ops_e[] = {Ops_b[0]->getOperand(0), Ops_b[0]->getOperand(1),
++                     Ops_b[2]->getOperand(0), Ops_b[2]->getOperand(1),
++                     Ops_b[1]->getOperand(0), Ops_b[1]->getOperand(1),
++                     Ops_b[3]->getOperand(0), Ops_b[3]->getOperand(1)};
++  for (int i = 0; i < 8; i++) {
++    if (dyn_cast<ConstantSDNode>(Ops_e[i]))
++      return SDValue();
++    if (i < 4) {
++      if (cast<ConstantSDNode>(Ops_e[i]->getOperand(1))->getSExtValue() !=
++          (2 * i))
++        return SDValue();
++    } else {
++      if (cast<ConstantSDNode>(Ops_e[i]->getOperand(1))->getSExtValue() !=
++          (2 * i - 7))
++        return SDValue();
++    }
++  }
++
++  for (int i = 0; i < 5; i = i + 4) {
++    if (!((Ops_e[i]->getOperand(0) == Ops_e[i + 1]->getOperand(0)) &&
++          (Ops_e[i + 1]->getOperand(0) == Ops_e[i + 2]->getOperand(0)) &&
++          (Ops_e[i + 2]->getOperand(0) == Ops_e[i + 3]->getOperand(0))))
++      return SDValue();
++  }
++  return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64,
++                                    Ops_e[6]->getOperand(0),
++                                    Ops_e[0]->getOperand(0)),
++                 0);
++}
++
++static SDValue performParity(SDNode *N, SelectionDAG &DAG,
++                             TargetLowering::DAGCombinerInfo &DCI,
++                             const LoongArchSubtarget &Subtarget) {
++
++  SDLoc DL(N);
++  SDValue T = N->getOperand(0);
++  if (!(N->getValueType(0).isSimple() && T->getValueType(0).isSimple()))
++    return SDValue();
++
++  if (DCI.isAfterLegalizeDAG())
++    return SDValue();
++
++  SDValue Ops[4];
++  bool pos_e = false;
++  bool pos_o = false;
++
++  for (int i = 0; i < 4; i++) {
++    Ops[i] = T->getOperand(i);
++    if (!Ops[i]->getValueType(0).isSimple())
++      return SDValue();
++    if (Ops[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
++      return SDValue();
++
++    if (!dyn_cast<ConstantSDNode>(Ops[i]->getOperand(1)))
++      return SDValue();
++
++    if (cast<ConstantSDNode>(Ops[i]->getOperand(1))->getSExtValue() ==
++        (2 * i)) {
++      pos_e = true;
++    } else if (cast<ConstantSDNode>(Ops[i]->getOperand(1))->getSExtValue() ==
++               (2 * i + 1)) {
++      pos_o = true;
++    } else
++      return SDValue();
++  }
++
++  if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 &&
++        T->getSimpleValueType(0).SimpleTy == MVT::v4i32))
++    return SDValue();
++
++  for (int j = 0; j < 3; j++) {
++    if (Ops[j]->getOperand(0) != Ops[j + 1]->getOperand(0))
++      return SDValue();
++  }
++  if (pos_e) {
++    if (N->getOpcode() == ISD::SIGN_EXTEND) {
++      if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD)
++        return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_W, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(1),
++                                          Ops[0]->getOperand(0)->getOperand(0)),
++                       0);
++      else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB)
++        return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_W, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(0),
++                                          Ops[0]->getOperand(0)->getOperand(1)),
++                       0);
++    } else if (N->getOpcode() == ISD::ZERO_EXTEND) {
++      if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD)
++        return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_WU, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(1),
++                                          Ops[0]->getOperand(0)->getOperand(0)),
++                       0);
++      else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB)
++        return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_WU, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(0),
++                                          Ops[0]->getOperand(0)->getOperand(1)),
++                       0);
++    }
++  } else if (pos_o) {
++    if (N->getOpcode() == ISD::SIGN_EXTEND) {
++      if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD)
++        return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_W, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(1),
++                                          Ops[0]->getOperand(0)->getOperand(0)),
++                       0);
++      else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB)
++        return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_W, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(0),
++                                          Ops[0]->getOperand(0)->getOperand(1)),
++                       0);
++    } else if (N->getOpcode() == ISD::ZERO_EXTEND) {
++      if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD)
++        return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_WU, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(1),
++                                          Ops[0]->getOperand(0)->getOperand(0)),
++                       0);
++      else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB)
++        return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_WU, DL,
++                                          MVT::v4i64,
++                                          Ops[0]->getOperand(0)->getOperand(0),
++                                          Ops[0]->getOperand(0)->getOperand(1)),
++                       0);
++    }
++  } else
++    return SDValue();
++
++  return SDValue();
++}
++
++// Optimize zero extension and sign extension of data
++static SDValue performExtend(SDNode *N, SelectionDAG &DAG,
++                             TargetLowering::DAGCombinerInfo &DCI,
++                             const LoongArchSubtarget &Subtarget) {
++
++  if (!Subtarget.hasLASX())
++    return SDValue();
++
++  SDLoc DL(N);
++  SDValue T = N->getOperand(0);
++
++  if (T->getOpcode() == ISD::BUILD_VECTOR)
++    return performParity(N, DAG, DCI, Subtarget);
++
++  if (T->getOpcode() != ISD::ADD && T->getOpcode() != ISD::SUB)
++    return SDValue();
++
++  SDValue T0 = T->getOperand(0);
++  SDValue T1 = T->getOperand(1);
++
++  if (!(T0->getOpcode() == ISD::BUILD_VECTOR &&
++        T1->getOpcode() == ISD::BUILD_VECTOR))
++    return SDValue();
++
++  if (DCI.isAfterLegalizeDAG())
++    return SDValue();
++
++  if (!(T->getValueType(0).isSimple() && T0->getValueType(0).isSimple() &&
++        T1->getValueType(0).isSimple() && N->getValueType(0).isSimple()))
++    return SDValue();
++
++  if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 &&
++        T->getSimpleValueType(0).SimpleTy == MVT::v4i32 &&
++        T0->getSimpleValueType(0).SimpleTy == MVT::v4i32 &&
++        T1->getSimpleValueType(0).SimpleTy == MVT::v4i32))
++    return SDValue();
++
++  SDValue Opse0[4];
++  SDValue Opse1[4];
++
++  for (int i = 0; i < 4; i++) {
++    if (T->getOpcode() == ISD::ADD) {
++      Opse0[i] = T1->getOperand(i);
++      Opse1[i] = T0->getOperand(i);
++    } else if (T->getOpcode() == ISD::SUB) {
++      Opse0[i] = T0->getOperand(i);
++      Opse1[i] = T1->getOperand(i);
++    }
++
++    if (Opse0[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
++        Opse1[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
++      return SDValue();
++
++    if (!(dyn_cast<ConstantSDNode>(Opse0[i]->getOperand(1)) &&
++          dyn_cast<ConstantSDNode>(Opse1[i]->getOperand(1))))
++      return SDValue();
++
++    if (cast<ConstantSDNode>(Opse0[i]->getOperand(1))->getSExtValue() !=
++            (2 * i + 1) ||
++        cast<ConstantSDNode>(Opse1[i]->getOperand(1))->getSExtValue() !=
++            (2 * i))
++      return SDValue();
++
++    if (i > 0 && (Opse0[i]->getOperand(0) != Opse0[i - 1]->getOperand(0) ||
++                  Opse1[i]->getOperand(0) != Opse1[i - 1]->getOperand(0)))
++      return SDValue();
++  }
++
++  if (N->getOpcode() == ISD::SIGN_EXTEND) {
++    if (T->getOpcode() == ISD::ADD)
++      return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64,
++                                        Opse0[0]->getOperand(0),
++                                        Opse1[0]->getOperand(0)),
++                     0);
++    else if (T->getOpcode() == ISD::SUB)
++      return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_D_W, DL, MVT::v4i64,
++                                        Opse0[0]->getOperand(0),
++                                        Opse1[0]->getOperand(0)),
++                     0);
++  } else if (N->getOpcode() == ISD::ZERO_EXTEND) {
++    if (T->getOpcode() == ISD::ADD)
++      return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_DU_WU, DL,
++                                        MVT::v4i64, Opse0[0]->getOperand(0),
++                                        Opse1[0]->getOperand(0)),
++                     0);
++    else if (T->getOpcode() == ISD::SUB)
++      return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_DU_WU, DL,
++                                        MVT::v4i64, Opse0[0]->getOperand(0),
++                                        Opse1[0]->getOperand(0)),
++                     0);
++  }
++
++  return SDValue();
++}
++
++static SDValue performSIGN_EXTENDCombine(SDNode *N, SelectionDAG &DAG,
++                                         TargetLowering::DAGCombinerInfo &DCI,
++                                         const LoongArchSubtarget &Subtarget) {
++
++  assert((N->getOpcode() == ISD::SIGN_EXTEND) && "Need ISD::SIGN_EXTEND");
++
++  SDLoc DL(N);
++  SDValue Top = N->getOperand(0);
++
++  SDValue Res;
++  if (Res = performExtend(N, DAG, DCI, Subtarget))
++    return Res;
++
++  if (!(Top->getOpcode() == ISD::CopyFromReg))
++    return SDValue();
++
++  if ((Top->getOperand(0)->getOpcode() == ISD::EntryToken) &&
++      (N->getValueType(0) == MVT::i64)) {
++
++    SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32);
++    SDNode *Res = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64);
++
++    Res = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i64,
++                             SDValue(Res, 0), Top, SubReg);
++
++    return SDValue(Res, 0);
++  }
++
++  return SDValue();
++}
++
++static SDValue performZERO_EXTENDCombine(SDNode *N, SelectionDAG &DAG,
++                                         TargetLowering::DAGCombinerInfo &DCI,
++                                         const LoongArchSubtarget &Subtarget) {
++
++  assert((N->getOpcode() == ISD::ZERO_EXTEND) && "Need ISD::ZERO_EXTEND");
++
++  SDLoc DL(N);
++
++  SDValue Res;
++  if (Res = performExtend(N, DAG, DCI, Subtarget))
++    return Res;
++
++  return SDValue();
++}
++
++SDValue  LoongArchTargetLowering::
++PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
++  SelectionDAG &DAG = DCI.DAG;
++  SDValue Val;
++
++  switch (N->getOpcode()) {
++  default: break;
++  case ISD::AND:
++    return performANDCombine(N, DAG, DCI, Subtarget);
++  case ISD::OR:
++    return performORCombine(N, DAG, DCI, Subtarget);
++  case ISD::XOR:
++    return performXORCombine(N, DAG, Subtarget);
++  case ISD::MUL:
++    return performMULCombine(N, DAG, DCI, this, Subtarget);
++  case ISD::SRA:
++    return performSRACombine(N, DAG, DCI, Subtarget);
++  case ISD::SELECT:
++    return performSELECTCombine(N, DAG, DCI, Subtarget);
++  case ISD::VSELECT:
++    return performVSELECTCombine(N, DAG);
++  case ISD::CONCAT_VECTORS:
++    return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
++  case ISD::SIGN_EXTEND:
++    return performSIGN_EXTENDCombine(N, DAG, DCI, Subtarget);
++  case ISD::ZERO_EXTEND:
++    return performZERO_EXTENDCombine(N, DAG, DCI, Subtarget);
++  case ISD::ADD:
++  case ISD::SUB:
++  case ISD::SHL:
++  case ISD::SRL:
++    return performLogicCombine(N, DAG, Subtarget);
++  }
++  return SDValue();
++}
++
++static SDValue lowerLSXSplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
++  EVT ResVecTy = Op->getValueType(0);
++  EVT ViaVecTy = ResVecTy;
++  SDLoc DL(Op);
++
++  // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
++  // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
++  // lanes.
++  SDValue LaneA = Op->getOperand(OpNr);
++  SDValue LaneB;
++
++  if (ResVecTy == MVT::v2i64) {
++    // In case of the index being passed as an immediate value, set the upper
++    // lane to 0 so that the splati.d instruction can be matched.
++    if (isa<ConstantSDNode>(LaneA))
++      LaneB = DAG.getConstant(0, DL, MVT::i32);
++    // Having the index passed in a register, set the upper lane to the same
++    // value as the lower - this results in the BUILD_VECTOR node not being
++    // expanded through stack. This way we are able to pattern match the set of
++    // nodes created here to splat.d.
++    else
++      LaneB = LaneA;
++    ViaVecTy = MVT::v4i32;
++  } else
++    LaneB = LaneA;
++
++  SDValue Ops[16] = {LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
++                     LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB};
++
++  SDValue Result = DAG.getBuildVector(
++      ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
++
++  if (ViaVecTy != ResVecTy) {
++    SDValue One = DAG.getConstant(1, DL, ViaVecTy);
++    Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
++                         DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
++  }
++
++  return Result;
++}
++
++static SDValue lowerLSXSplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
++                                bool IsSigned = false) {
++  return DAG.getConstant(
++      APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
++            Op->getConstantOperandVal(ImmOp), IsSigned),
++      SDLoc(Op), Op->getValueType(0));
++}
++
++static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
++                                   SelectionDAG &DAG) {
++  EVT ViaVecTy = VecTy;
++  SDValue SplatValueA = SplatValue;
++  SDValue SplatValueB = SplatValue;
++  SDLoc DL(SplatValue);
++
++  if (VecTy == MVT::v2i64) {
++    // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
++    ViaVecTy = MVT::v4i32;
++
++    SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
++    SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
++                              DAG.getConstant(32, DL, MVT::i32));
++    SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
++  }
++
++  SDValue Ops[32] = {SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB,
++                     SplatValueA, SplatValueB, SplatValueA, SplatValueB};
++
++  SDValue Result = DAG.getBuildVector(
++      ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
++
++  if (VecTy != ViaVecTy)
++    Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
++
++  return Result;
++}
++
++static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
++  SDLoc DL(Op);
++  EVT ResTy = Op->getValueType(0);
++  SDValue Vec = Op->getOperand(2);
++  MVT ResEltTy =
++      (ResTy == MVT::v2i64 || ResTy == MVT::v4i64) ? MVT::i64 : MVT::i32;
++  SDValue ConstValue =
++      DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResEltTy);
++  SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, DAG);
++
++  return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
++}
++
++static SDValue lowerLSXBitClear(SDValue Op, SelectionDAG &DAG) {
++  EVT ResTy = Op->getValueType(0);
++  SDLoc DL(Op);
++  SDValue One = DAG.getConstant(1, DL, ResTy);
++  SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
++
++  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
++                     DAG.getNOT(DL, Bit, ResTy));
++}
++
++static SDValue lowerLSXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Address = Op->getOperand(2);
++  SDValue Offset = Op->getOperand(3);
++  EVT ResTy = Op->getValueType(0);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++  return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
++                     /* Alignment = */ 16);
++}
++
++static SDValue lowerLASXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                 const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Address = Op->getOperand(2);
++  SDValue Offset = Op->getOperand(3);
++  EVT ResTy = Op->getValueType(0);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++  return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
++                     /* Alignment = */ 32);
++}
++
++static SDValue lowerLASXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                 const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Address = Op->getOperand(2);
++  SDValue Offset = Op->getOperand(3);
++  EVT ResTy = Op->getValueType(0);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++  SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
++                             /* Alignment = */ 32);
++  return DAG.getNode(LoongArchISD::XVBROADCAST, DL,
++                     DAG.getVTList(ResTy, MVT::Other), Load);
++}
++
++static SDValue lowerLSXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Address = Op->getOperand(2);
++  SDValue Offset = Op->getOperand(3);
++  EVT ResTy = Op->getValueType(0);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++  SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
++                             /* Alignment = */ 16);
++  return DAG.getNode(LoongArchISD::VBROADCAST, DL,
++                     DAG.getVTList(ResTy, MVT::Other), Load);
++}
++
++static SDValue lowerLSXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                 const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Value = Op->getOperand(2);
++  SDValue Address = Op->getOperand(3);
++  SDValue Offset = Op->getOperand(4);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++
++  return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
++                      /* Alignment = */ 16);
++}
++
++static SDValue lowerLASXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
++                                  const LoongArchSubtarget &Subtarget) {
++  SDLoc DL(Op);
++  SDValue ChainIn = Op->getOperand(0);
++  SDValue Value = Op->getOperand(2);
++  SDValue Address = Op->getOperand(3);
++  SDValue Offset = Op->getOperand(4);
++  EVT PtrTy = Address->getValueType(0);
++
++  // For LP64 addresses have the underlying type MVT::i64. This intrinsic
++  // however takes an i32 signed constant offset. The actual type of the
++  // intrinsic is a scaled signed i12.
++  if (Subtarget.isABI_LP64D())
++    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
++
++  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
++
++  return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
++                      /* Alignment = */ 32);
++}
++
++static SDValue LowerSUINT_TO_FP(unsigned ExtOpcode, SDValue Op, SelectionDAG &DAG) {
++
++  EVT ResTy = Op->getValueType(0);
++  SDValue Op0 = Op->getOperand(0);
++  EVT ViaTy = Op0->getValueType(0);
++  SDLoc DL(Op);
++
++  if (!ResTy.isVector()) {
++    if(ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits())
++        return DAG.getNode(ISD::BITCAST, DL, ResTy, Op0);
++    else if(ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) {
++        Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op0);
++        return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Op0);
++    } else {
++        Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Op0);
++        return DAG.getNode(ISD::TRUNCATE, DL, MVT::f32, Op0);
++    }
++
++  }
++
++  if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) {
++    // v4i32 => v4f32     v8i32 => v8f32
++    // v2i64 => v2f64     v4i64 => v4f64
++    // do nothing
++  } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) {
++    // v4i32 => v4i64 => v4f64
++    Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0});
++    Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0);
++  } else {
++    // v4i64 => v4f32
++    SDValue Ops[4];
++    for (unsigned i = 0; i < 4; i++) {
++      SDValue I64 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op0,
++                                DAG.getConstant(i, DL, MVT::i32));
++      Ops[i] = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, I64);
++    }
++    Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4));
++  }
++
++   return Op0;
++}
++
++static SDValue LowerFP_TO_SUINT(unsigned FPToSUI, unsigned ExtOpcode,
++                                SDValue Op, SelectionDAG &DAG) {
++
++  EVT ResTy = Op->getValueType(0);
++  SDValue Op0 = Op->getOperand(0);
++  EVT ViaTy = Op0->getValueType(0);
++  SDLoc DL(Op);
++
++  if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) {
++    // v4f32 => v4i32     v8f32 => v8i32
++    // v2f64 => v2i64     v4f64 => v4i64
++    // do nothing
++    Op0 = DAG.getNode(FPToSUI, DL, ResTy, Op0);
++  } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) {
++    // v4f32 => v4i32 => v4i64
++    Op0 = DAG.getNode(FPToSUI, DL, MVT::v4i32, Op0);
++    Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0});
++    Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0);
++  } else {
++    SDValue Ops[4];
++    Ops[0] = DAG.getNode(FPToSUI, DL, MVT::i32,
++                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0,
++                                     DAG.getConstant(0, DL, MVT::i64)));
++    Ops[1] = DAG.getNode(FPToSUI, DL, MVT::i32,
++                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0,
++                                     DAG.getConstant(1, DL, MVT::i64)));
++    Ops[2] = DAG.getNode(FPToSUI, DL, MVT::i32,
++                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0,
++                                     DAG.getConstant(2, DL, MVT::i64)));
++    Ops[3] = DAG.getNode(FPToSUI, DL, MVT::i32,
++                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0,
++                                     DAG.getConstant(3, DL, MVT::i64)));
++
++    Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4));
++  }
++
++  return Op0;
++}
++
++// Lower VECTOR_SHUFFLE into SHF (if possible).
++//
++// SHF splits the vector into blocks of four elements, then shuffles these
++// elements according to a <4 x i2> constant (encoded as an integer immediate).
++//
++// It is therefore possible to lower into SHF when the mask takes the form:
++//   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
++// When undef's appear they are treated as if they were whatever value is
++// necessary in order to fit the above forms.
++//
++// For example:
++//   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
++//                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
++//                                 i32 7, i32 6, i32 5, i32 4>
++// is lowered to:
++//   (VSHUF4I_H $v0, $v1, 27)
++// where the 27 comes from:
++//   3 + (2 << 2) + (1 << 4) + (0 << 6)
++static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
++                                       SmallVector<int, 16> Indices,
++                                       SelectionDAG &DAG) {
++  int SHFIndices[4] = {-1, -1, -1, -1};
++
++  if (Indices.size() < 4)
++    return SDValue();
++
++  for (unsigned i = 0; i < 4; ++i) {
++    for (unsigned j = i; j < Indices.size(); j += 4) {
++      int Idx = Indices[j];
++
++      // Convert from vector index to 4-element subvector index
++      // If an index refers to an element outside of the subvector then give up
++      if (Idx != -1) {
++        Idx -= 4 * (j / 4);
++        if (Idx < 0 || Idx >= 4)
++          return SDValue();
++      }
++
++      // If the mask has an undef, replace it with the current index.
++      // Note that it might still be undef if the current index is also undef
++      if (SHFIndices[i] == -1)
++        SHFIndices[i] = Idx;
++
++      // Check that non-undef values are the same as in the mask. If they
++      // aren't then give up
++      if (!(Idx == -1 || Idx == SHFIndices[i]))
++        return SDValue();
++    }
++  }
++
++  // Calculate the immediate. Replace any remaining undefs with zero
++  APInt Imm(32, 0);
++  for (int i = 3; i >= 0; --i) {
++    int Idx = SHFIndices[i];
++
++    if (Idx == -1)
++      Idx = 0;
++
++    Imm <<= 2;
++    Imm |= Idx & 0x3;
++  }
++
++  SDLoc DL(Op);
++  return DAG.getNode(LoongArchISD::SHF, DL, ResTy,
++                     DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
++}
++
++/// Determine whether a range fits a regular pattern of values.
++/// This function accounts for the possibility of jumping over the End iterator.
++template <typename ValType>
++static bool
++fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
++                   unsigned CheckStride,
++                   typename SmallVectorImpl<ValType>::const_iterator End,
++                   ValType ExpectedIndex, unsigned ExpectedIndexStride) {
++  auto &I = Begin;
++
++  while (I != End) {
++    if (*I != -1 && *I != ExpectedIndex)
++      return false;
++    ExpectedIndex += ExpectedIndexStride;
++
++    // Incrementing past End is undefined behaviour so we must increment one
++    // step at a time and check for End at each step.
++    for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
++      ; // Empty loop body.
++  }
++  return true;
++}
++
++// Determine whether VECTOR_SHUFFLE is a VREPLVEI.
++//
++// It is a VREPLVEI when the mask is:
++//   <x, x, x, ...>
++// where x is any valid index.
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above form.
++static bool isVECTOR_SHUFFLE_VREPLVEI(SDValue Op, EVT ResTy,
++                                      SmallVector<int, 16> Indices,
++                                      SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  int SplatIndex = -1;
++  for (const auto &V : Indices) {
++    if (V != -1) {
++      SplatIndex = V;
++      break;
++    }
++  }
++
++  return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
++                                 0);
++}
++
++// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
++//
++// VPACKEV interleaves the even elements from each vector.
++//
++// It is possible to lower into VPACKEV when the mask consists of two of the
++// following forms interleaved:
++//   <0, 2, 4, ...>
++//   <n, n+2, n+4, ...>
++// where n is the number of elements in the vector.
++// For example:
++//   <0, 0, 2, 2, 4, 4, ...>
++//   <0, n, 2, n+2, 4, n+4, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VPACKEV(SDValue Op, EVT ResTy,
++                                           SmallVector<int, 16> Indices,
++                                           SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++
++  // Check even elements are taken from the even elements of one half or the
++  // other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  // Check odd elements are taken from the even elements of one half or the
++  // other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
++//
++// VPACKOD interleaves the odd elements from each vector.
++//
++// It is possible to lower into VPACKOD when the mask consists of two of the
++// following forms interleaved:
++//   <1, 3, 5, ...>
++//   <n+1, n+3, n+5, ...>
++// where n is the number of elements in the vector.
++// For example:
++//   <1, 1, 3, 3, 5, 5, ...>
++//   <1, n+1, 3, n+3, 5, n+5, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VPACKOD(SDValue Op, EVT ResTy,
++                                           SmallVector<int, 16> Indices,
++                                           SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++
++  // Check even elements are taken from the odd elements of one half or the
++  // other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  // Check odd elements are taken from the odd elements of one half or the
++  // other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VILVL (if possible).
++//
++// VILVL interleaves consecutive elements from the right (lowest-indexed) half
++// of each vector.
++//
++// It is possible to lower into VILVL when the mask consists of two of the
++// following forms interleaved:
++//   <0, 1, 2, ...>
++//   <n, n+1, n+2, ...>
++// where n is the number of elements in the vector.
++// For example:
++//   <0, 0, 1, 1, 2, 2, ...>
++//   <0, n, 1, n+1, 2, n+2, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VILVL(SDValue Op, EVT ResTy,
++                                         SmallVector<int, 16> Indices,
++                                         SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++
++  // Check even elements are taken from the right (lowest-indexed) elements of
++  // one half or the other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  // Check odd elements are taken from the right (lowest-indexed) elements of
++  // one half or the other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VILVH (if possible).
++//
++// VILVH interleaves consecutive elements from the left (highest-indexed) half
++// of each vector.
++//
++// It is possible to lower into VILVH when the mask consists of two of the
++// following forms interleaved:
++//   <x, x+1, x+2, ...>
++//   <n+x, n+x+1, n+x+2, ...>
++// where n is the number of elements in the vector and x is half n.
++// For example:
++//   <x, x, x+1, x+1, x+2, x+2, ...>
++//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VILVH(SDValue Op, EVT ResTy,
++                                         SmallVector<int, 16> Indices,
++                                         SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  unsigned HalfSize = Indices.size() / 2;
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++
++  // Check even elements are taken from the left (highest-indexed) elements of
++  // one half or the other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  // Check odd elements are taken from the left (highest-indexed) elements of
++  // one half or the other and pick an operand accordingly.
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
++                                   1))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
++//
++// VPICKEV copies the even elements of each vector into the result vector.
++//
++// It is possible to lower into VPICKEV when the mask consists of two of the
++// following forms concatenated:
++//   <0, 2, 4, ...>
++//   <n, n+2, n+4, ...>
++// where n is the number of elements in the vector.
++// For example:
++//   <0, 2, 4, ..., 0, 2, 4, ...>
++//   <0, 2, 4, ..., n, n+2, n+4, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VPICKEV(SDValue Op, EVT ResTy,
++                                           SmallVector<int, 16> Indices,
++                                           SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &Mid = Indices.begin() + Indices.size() / 2;
++  const auto &End = Indices.end();
++
++  if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
++//
++// VPICKOD copies the odd elements of each vector into the result vector.
++//
++// It is possible to lower into VPICKOD when the mask consists of two of the
++// following forms concatenated:
++//   <1, 3, 5, ...>
++//   <n+1, n+3, n+5, ...>
++// where n is the number of elements in the vector.
++// For example:
++//   <1, 3, 5, ..., 1, 3, 5, ...>
++//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
++//
++// When undef's appear in the mask they are treated as if they were whatever
++// value is necessary in order to fit the above forms.
++static SDValue lowerVECTOR_SHUFFLE_VPICKOD(SDValue Op, EVT ResTy,
++                                           SmallVector<int, 16> Indices,
++                                           SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Vj;
++  SDValue Vk;
++  const auto &Begin = Indices.begin();
++  const auto &Mid = Indices.begin() + Indices.size() / 2;
++  const auto &End = Indices.end();
++
++  if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
++    Vj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
++    Vj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
++    Vk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
++    Vk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Vk, Vj);
++}
++
++// Lower VECTOR_SHUFFLE into VSHF.
++//
++// This mostly consists of converting the shuffle indices in Indices into a
++// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
++// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
++// if the type is v8i16 and all the indices are less than 8 then the second
++// operand is unused and can be replaced with anything. We choose to replace it
++// with the used operand since this reduces the number of instructions overall.
++static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
++                                        SmallVector<int, 16> Indices,
++                                        SelectionDAG &DAG) {
++  SmallVector<SDValue, 16> Ops;
++  SDValue Op0;
++  SDValue Op1;
++  EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
++  EVT MaskEltTy = MaskVecTy.getVectorElementType();
++  bool Using1stVec = false;
++  bool Using2ndVec = false;
++  SDLoc DL(Op);
++  int ResTyNumElts = ResTy.getVectorNumElements();
++
++  for (int i = 0; i < ResTyNumElts; ++i) {
++    // Idx == -1 means UNDEF
++    int Idx = Indices[i];
++
++    if (0 <= Idx && Idx < ResTyNumElts)
++      Using1stVec = true;
++    if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
++      Using2ndVec = true;
++  }
++
++  for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
++       ++I)
++    Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy));
++
++  SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
++
++  if (Using1stVec && Using2ndVec) {
++    Op0 = Op->getOperand(0);
++    Op1 = Op->getOperand(1);
++  } else if (Using1stVec)
++    Op0 = Op1 = Op->getOperand(0);
++  else if (Using2ndVec)
++    Op0 = Op1 = Op->getOperand(1);
++  else
++    llvm_unreachable("shuffle vector mask references neither vector operand?");
++
++  // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
++  // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
++  // VSHF concatenates the vectors in a bitwise fashion:
++  // <0b00, 0b01> + <0b10, 0b11> ->
++  // 0b0100       + 0b1110       -> 0b01001110
++  //                                <0b10, 0b11, 0b00, 0b01>
++  // We must therefore swap the operands to get the correct result.
++  return DAG.getNode(LoongArchISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVILVL(SDValue Op, EVT ResTy,
++                                          SmallVector<int, 32> Indices,
++                                          SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++  unsigned HalfSize = Indices.size() / 2;
++
++  if (fitsRegularPattern<int>(Begin, 2, End, 0, 1) &&
++      fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1) &&
++           fitsRegularPattern<int>(Begin + HalfSize, 2, End,
++                                   Indices.size() + HalfSize, 1))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1) &&
++      fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1) &&
++           fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
++                                   Indices.size() + HalfSize, 1))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVILVH(SDValue Op, EVT ResTy,
++                                          SmallVector<int, 32> Indices,
++                                          SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  unsigned HalfSize = Indices.size() / 2;
++  unsigned LeftSize = HalfSize / 2;
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++
++  if (fitsRegularPattern<int>(Begin, 2, End, HalfSize - LeftSize, 1) &&
++      fitsRegularPattern<int>(Begin + HalfSize + LeftSize, 2, End,
++                              HalfSize + LeftSize, 1))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End,
++                                   Indices.size() + HalfSize - LeftSize, 1) &&
++           fitsRegularPattern<int>(Begin + HalfSize + LeftSize, 2, End,
++                                   Indices.size() + HalfSize + LeftSize, 1))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1) &&
++      fitsRegularPattern<int>(Begin + 1 + HalfSize + LeftSize, 2, End,
++                              HalfSize + LeftSize, 1))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
++                                   1) &&
++           fitsRegularPattern<int>(Begin + 1 + HalfSize + LeftSize, 2, End,
++                                   Indices.size() + HalfSize + LeftSize, 1))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(SDValue Op, EVT ResTy,
++                                            SmallVector<int, 32> Indices,
++                                            SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++  unsigned HalfSize = Indices.size() / 2;
++
++  if (fitsRegularPattern<int>(Begin, 2, End, 0, 2) &&
++      fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 2))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2) &&
++           fitsRegularPattern<int>(Begin + HalfSize, 2, End,
++                                   Indices.size() + HalfSize, 2))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2) &&
++      fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 2))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2) &&
++           fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
++                                   Indices.size() + HalfSize, 2))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(SDValue Op, EVT ResTy,
++                                            SmallVector<int, 32> Indices,
++                                            SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &End = Indices.end();
++  unsigned HalfSize = Indices.size() / 2;
++
++  if (fitsRegularPattern<int>(Begin, 2, End, 1, 2) &&
++      fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + 1, 2))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2) &&
++           fitsRegularPattern<int>(Begin + HalfSize, 2, End,
++                                   Indices.size() + HalfSize + 1, 2))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2) &&
++      fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + 1, 2))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2) &&
++           fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
++                                   Indices.size() + HalfSize + 1, 2))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static bool isVECTOR_SHUFFLE_XVREPLVEI(SDValue Op, EVT ResTy,
++                                       SmallVector<int, 32> Indices,
++                                       SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++  unsigned HalfSize = Indices.size() / 2;
++
++  for (unsigned i = 0; i < HalfSize; i++) {
++    if (Indices[i] == -1 || Indices[HalfSize + i] == -1)
++      return false;
++    if (Indices[0] != Indices[i] || Indices[HalfSize] != Indices[HalfSize + i])
++      return false;
++  }
++  return true;
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(SDValue Op, EVT ResTy,
++                                            SmallVector<int, 32> Indices,
++                                            SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &LeftMid = Indices.begin() + Indices.size() / 4;
++  const auto &End = Indices.end();
++  const auto &RightMid = Indices.end() - Indices.size() / 4;
++  const auto &Mid = Indices.begin() + Indices.size() / 2;
++  unsigned HalfSize = Indices.size() / 2;
++
++  if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
++      fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Indices.size(), 2) &&
++           fitsRegularPattern<int>(Mid, 1, RightMid, Indices.size() + HalfSize,
++                                   2))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
++      fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Indices.size(), 2) &&
++           fitsRegularPattern<int>(RightMid, 1, End, Indices.size() + HalfSize,
++                                   2))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(SDValue Op, EVT ResTy,
++                                            SmallVector<int, 32> Indices,
++                                            SelectionDAG &DAG) {
++  assert((Indices.size() % 2) == 0);
++
++  SDValue Xj;
++  SDValue Xk;
++  const auto &Begin = Indices.begin();
++  const auto &LeftMid = Indices.begin() + Indices.size() / 4;
++  const auto &Mid = Indices.begin() + Indices.size() / 2;
++  const auto &RightMid = Indices.end() - Indices.size() / 4;
++  const auto &End = Indices.end();
++  unsigned HalfSize = Indices.size() / 2;
++
++  if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
++      fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
++    Xj = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Indices.size() + 1, 2) &&
++           fitsRegularPattern<int>(Mid, 1, RightMid,
++                                   Indices.size() + HalfSize + 1, 2))
++    Xj = Op->getOperand(1);
++  else
++    return SDValue();
++
++  if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
++      fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
++    Xk = Op->getOperand(0);
++  else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Indices.size() + 1, 2) &&
++           fitsRegularPattern<int>(RightMid, 1, End,
++                                   Indices.size() + HalfSize + 1, 2))
++    Xk = Op->getOperand(1);
++  else
++    return SDValue();
++
++  return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Xk, Xj);
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XSHF(SDValue Op, EVT ResTy,
++                                        SmallVector<int, 32> Indices,
++                                        SelectionDAG &DAG) {
++  int SHFIndices[4] = {-1, -1, -1, -1};
++
++  if (Indices.size() < 4)
++    return SDValue();
++
++  int HalfSize = Indices.size() / 2;
++  for (int i = 0; i < 4; ++i) {
++    for (int j = i; j < HalfSize; j += 4) {
++      int Idx = Indices[j];
++      // check mxshf
++      if (Idx + HalfSize != Indices[j + HalfSize])
++        return SDValue();
++
++      // Convert from vector index to 4-element subvector index
++      // If an index refers to an element outside of the subvector then give up
++      if (Idx != -1) {
++        Idx -= 4 * (j / 4);
++        if (Idx < 0 || Idx >= 4)
++          return SDValue();
++      }
++
++      // If the mask has an undef, replace it with the current index.
++      // Note that it might still be undef if the current index is also undef
++      if (SHFIndices[i] == -1)
++        SHFIndices[i] = Idx;
++
++      // Check that non-undef values are the same as in the mask. If they
++      // aren't then give up
++      if (!(Idx == -1 || Idx == SHFIndices[i]))
++        return SDValue();
++    }
++  }
++
++  // Calculate the immediate. Replace any remaining undefs with zero
++  APInt Imm(32, 0);
++  for (int i = 3; i >= 0; --i) {
++    int Idx = SHFIndices[i];
++
++    if (Idx == -1)
++      Idx = 0;
++
++    Imm <<= 2;
++    Imm |= Idx & 0x3;
++  }
++  SDLoc DL(Op);
++  return DAG.getNode(LoongArchISD::SHF, DL, ResTy,
++                     DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
++}
++
++static bool isConstantOrUndef(const SDValue Op) {
++  if (Op->isUndef())
++    return true;
++  if (isa<ConstantSDNode>(Op))
++    return true;
++  if (isa<ConstantFPSDNode>(Op))
++    return true;
++  return false;
++}
++
++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
++  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
++    if (isConstantOrUndef(Op->getOperand(i)))
++      return true;
++  return false;
++}
++
++static bool isLASXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) {
++  switch (SplatBitSize) {
++  default:
++    return false;
++  case 8:
++    ViaVecTy = MVT::v32i8;
++    break;
++  case 16:
++    ViaVecTy = MVT::v16i16;
++    break;
++  case 32:
++    ViaVecTy = MVT::v8i32;
++    break;
++  case 64:
++    ViaVecTy = MVT::v4i64;
++    break;
++  case 128:
++    // There's no fill.q to fall back on for 64-bit values
++    return false;
++  }
++
++  return true;
++}
++
++static bool isLSXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) {
++  switch (SplatBitSize) {
++  default:
++    return false;
++  case 8:
++    ViaVecTy = MVT::v16i8;
++    break;
++  case 16:
++    ViaVecTy = MVT::v8i16;
++    break;
++  case 32:
++    ViaVecTy = MVT::v4i32;
++    break;
++  case 64:
++    // There's no fill.d to fall back on for 64-bit values
++    return false;
++  }
++
++  return true;
++}
++
++bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; }
++
++bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; }
++
++void LoongArchTargetLowering::LowerOperationWrapper(
++    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
++  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
++
++  for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
++    Results.push_back(Res.getValue(I));
++}
++
++void LoongArchTargetLowering::ReplaceNodeResults(
++    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
++  return LowerOperationWrapper(N, Results, DAG);
++}
++
++SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  switch (Op.getOpcode()) {
++  case ISD::STORE:
++    return lowerSTORE(Op, DAG);
++  case ISD::INTRINSIC_WO_CHAIN:
++    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
++  case ISD::INTRINSIC_W_CHAIN:
++    return lowerINTRINSIC_W_CHAIN(Op, DAG);
++  case ISD::INTRINSIC_VOID:
++    return lowerINTRINSIC_VOID(Op, DAG);
++  case ISD::EXTRACT_VECTOR_ELT:
++    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
++  case ISD::INSERT_VECTOR_ELT:
++    return lowerINSERT_VECTOR_ELT(Op, DAG);
++  case ISD::BUILD_VECTOR:
++    return lowerBUILD_VECTOR(Op, DAG);
++  case ISD::VECTOR_SHUFFLE:
++    return lowerVECTOR_SHUFFLE(Op, DAG);
++  case ISD::UINT_TO_FP:
++    return lowerUINT_TO_FP(Op, DAG);
++  case ISD::SINT_TO_FP:
++    return lowerSINT_TO_FP(Op, DAG);
++  case ISD::FP_TO_UINT:
++    return lowerFP_TO_UINT(Op, DAG);
++  case ISD::FP_TO_SINT:
++    return lowerFP_TO_SINT(Op, DAG);
++  case ISD::BRCOND:
++    return lowerBRCOND(Op, DAG);
++  case ISD::ConstantPool:
++    return lowerConstantPool(Op, DAG);
++  case ISD::GlobalAddress:
++    return lowerGlobalAddress(Op, DAG);
++  case ISD::BlockAddress:
++    return lowerBlockAddress(Op, DAG);
++  case ISD::GlobalTLSAddress:
++    return lowerGlobalTLSAddress(Op, DAG);
++  case ISD::JumpTable:
++    return lowerJumpTable(Op, DAG);
++  case ISD::SELECT:
++    return lowerSELECT(Op, DAG);
++  case ISD::SETCC:
++    return lowerSETCC(Op, DAG);
++  case ISD::VASTART:
++    return lowerVASTART(Op, DAG);
++  case ISD::VAARG:
++    return lowerVAARG(Op, DAG);
++  case ISD::FRAMEADDR:
++    return lowerFRAMEADDR(Op, DAG);
++  case ISD::RETURNADDR:
++    return lowerRETURNADDR(Op, DAG);
++  case ISD::EH_RETURN:
++    return lowerEH_RETURN(Op, DAG);
++  case ISD::ATOMIC_FENCE:
++    return lowerATOMIC_FENCE(Op, DAG);
++  case ISD::SHL_PARTS:
++    return lowerShiftLeftParts(Op, DAG);
++  case ISD::SRA_PARTS:
++    return lowerShiftRightParts(Op, DAG, true);
++  case ISD::SRL_PARTS:
++    return lowerShiftRightParts(Op, DAG, false);
++  case ISD::EH_DWARF_CFA:
++    return lowerEH_DWARF_CFA(Op, DAG);
++  }
++  return SDValue();
++}
++
++//===----------------------------------------------------------------------===//
++//  Lower helper functions
++//===----------------------------------------------------------------------===//
++
++template <class NodeTy>
++SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
++                                         bool IsLocal) const {
++  SDLoc DL(N);
++  EVT Ty = getPointerTy(DAG.getDataLayout());
++
++  if (isPositionIndependent()) {
++    SDValue Addr = getTargetNode(N, Ty, DAG, 0U);
++    if (IsLocal)
++      // Use PC-relative addressing to access the symbol.
++      return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr),
++                     0);
++
++    // Use PC-relative addressing to access the GOT for this symbol, then load
++    // the address from the GOT.
++    return SDValue(DAG.getMachineNode(LoongArch::LoadAddrGlobal, DL, Ty, Addr),
++                   0);
++  }
++
++  SDValue Addr = getTargetNode(N, Ty, DAG, 0U);
++  return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), 0);
++}
++
++// addLiveIn - This helper function adds the specified physical register to the
++// MachineFunction as a live in value.  It also creates a corresponding
++// virtual register for it.
++static unsigned addLiveIn(MachineFunction &MF, unsigned PReg,
++                          const TargetRegisterClass *RC) {
++  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
++  MF.getRegInfo().addLiveIn(PReg, VReg);
++  return VReg;
++}
++
++static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
++                                              MachineBasicBlock &MBB,
++                                              const TargetInstrInfo &TII,
++                                              bool Is64Bit) {
++  if (NoZeroDivCheck)
++    return &MBB;
++
++  // Insert pseudo instruction(PseudoTEQ), will expand:
++  //   beq $divisor_reg, $zero, 8
++  //   break 7
++  MachineBasicBlock::iterator I(MI);
++  MachineInstrBuilder MIB;
++  MachineOperand &Divisor = MI.getOperand(2);
++  unsigned TeqOp = LoongArch::PseudoTEQ;
++
++  MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(TeqOp))
++            .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill()));
++
++  // Use the 32-bit sub-register if this is a 64-bit division.
++  //if (Is64Bit)
++  //  MIB->getOperand(0).setSubReg(LoongArch::sub_32);
++
++  // Clear Divisor's kill flag.
++  Divisor.setIsKill(false);
++
++  // We would normally delete the original instruction here but in this case
++  // we only needed to inject an additional instruction rather than replace it.
++
++  return &MBB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
++                                                MachineBasicBlock *BB) const {
++  switch (MI.getOpcode()) {
++  default:
++    llvm_unreachable("Unexpected instr type to insert");
++  case LoongArch::FILL_FW_PSEUDO:
++    return emitFILL_FW(MI, BB);
++  case LoongArch::FILL_FD_PSEUDO:
++    return emitFILL_FD(MI, BB);
++  case LoongArch::SNZ_B_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_B);
++  case LoongArch::SNZ_H_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_H);
++  case LoongArch::SNZ_W_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_W);
++  case LoongArch::SNZ_D_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_D);
++  case LoongArch::SNZ_V_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETNEZ_V);
++  case LoongArch::SZ_B_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_B);
++  case LoongArch::SZ_H_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_H);
++  case LoongArch::SZ_W_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_W);
++  case LoongArch::SZ_D_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_D);
++  case LoongArch::SZ_V_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETEQZ_V);
++  case LoongArch::XSNZ_B_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_B);
++  case LoongArch::XSNZ_H_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_H);
++  case LoongArch::XSNZ_W_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_W);
++  case LoongArch::XSNZ_D_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_D);
++  case LoongArch::XSNZ_V_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETNEZ_V);
++  case LoongArch::XSZ_B_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_B);
++  case LoongArch::XSZ_H_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_H);
++  case LoongArch::XSZ_W_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_W);
++  case LoongArch::XSZ_D_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_D);
++  case LoongArch::XSZ_V_PSEUDO:
++    return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETEQZ_V);
++  case LoongArch::INSERT_FW_PSEUDO:
++    return emitINSERT_FW(MI, BB);
++  case LoongArch::INSERT_FD_PSEUDO:
++    return emitINSERT_FD(MI, BB);
++  case LoongArch::XINSERT_H_PSEUDO:
++    return emitXINSERT_BH(MI, BB, 2);
++  case LoongArch::XCOPY_FW_PSEUDO:
++    return emitXCOPY_FW(MI, BB);
++  case LoongArch::XCOPY_FD_PSEUDO:
++    return emitXCOPY_FD(MI, BB);
++  case LoongArch::XINSERT_FW_PSEUDO:
++    return emitXINSERT_FW(MI, BB);
++  case LoongArch::COPY_FW_PSEUDO:
++    return emitCOPY_FW(MI, BB);
++  case LoongArch::XFILL_FW_PSEUDO:
++    return emitXFILL_FW(MI, BB);
++  case LoongArch::XFILL_FD_PSEUDO:
++    return emitXFILL_FD(MI, BB);
++  case LoongArch::COPY_FD_PSEUDO:
++    return emitCOPY_FD(MI, BB);
++  case LoongArch::XINSERT_FD_PSEUDO:
++    return emitXINSERT_FD(MI, BB);
++  case LoongArch::XINSERT_B_PSEUDO:
++    return emitXINSERT_BH(MI, BB, 1);
++  case LoongArch::CONCAT_VECTORS_B_PSEUDO:
++    return emitCONCAT_VECTORS(MI, BB, 1);
++  case LoongArch::CONCAT_VECTORS_H_PSEUDO:
++    return emitCONCAT_VECTORS(MI, BB, 2);
++  case LoongArch::CONCAT_VECTORS_W_PSEUDO:
++  case LoongArch::CONCAT_VECTORS_FW_PSEUDO:
++    return emitCONCAT_VECTORS(MI, BB, 4);
++  case LoongArch::CONCAT_VECTORS_D_PSEUDO:
++  case LoongArch::CONCAT_VECTORS_FD_PSEUDO:
++    return emitCONCAT_VECTORS(MI, BB, 8);
++  case LoongArch::XCOPY_FW_GPR_PSEUDO:
++    return emitXCOPY_FW_GPR(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_ADD_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_ADD_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_ADD_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_ADD_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_AND_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_AND_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_AND_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_AND_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_OR_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_OR_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_OR_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_OR_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_XOR_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_XOR_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_XOR_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_XOR_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_NAND_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_NAND_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_NAND_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_NAND_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_SUB_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_SUB_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_SUB_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_SUB_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_SWAP_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_SWAP_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_SWAP_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_SWAP_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::XINSERT_B_VIDX_PSEUDO:
++  case LoongArch::XINSERT_B_VIDX64_PSEUDO:
++    return emitXINSERT_B(MI, BB);
++  case LoongArch::INSERT_H_VIDX64_PSEUDO:
++    return emitINSERT_H_VIDX(MI, BB);
++  case LoongArch::XINSERT_FW_VIDX_PSEUDO:
++    return emitXINSERT_DF_VIDX(MI, BB, false);
++  case LoongArch::XINSERT_FW_VIDX64_PSEUDO:
++    return emitXINSERT_DF_VIDX(MI, BB, true);
++
++  case LoongArch::ATOMIC_LOAD_MAX_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_MAX_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_MAX_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_MAX_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_MIN_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_MIN_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_MIN_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_MIN_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_UMAX_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_UMAX_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_UMAX_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_UMAX_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_LOAD_UMIN_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_LOAD_UMIN_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_LOAD_UMIN_I32:
++    return emitAtomicBinary(MI, BB);
++  case LoongArch::ATOMIC_LOAD_UMIN_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case LoongArch::ATOMIC_CMP_SWAP_I8:
++    return emitAtomicCmpSwapPartword(MI, BB, 1);
++  case LoongArch::ATOMIC_CMP_SWAP_I16:
++    return emitAtomicCmpSwapPartword(MI, BB, 2);
++  case LoongArch::ATOMIC_CMP_SWAP_I32:
++    return emitAtomicCmpSwap(MI, BB);
++  case LoongArch::ATOMIC_CMP_SWAP_I64:
++    return emitAtomicCmpSwap(MI, BB);
++
++  case LoongArch::PseudoSELECT_I:
++  case LoongArch::PseudoSELECT_I64:
++  case LoongArch::PseudoSELECT_S:
++  case LoongArch::PseudoSELECT_D64:
++    return emitPseudoSELECT(MI, BB, false, LoongArch::BNE32);
++
++  case LoongArch::PseudoSELECTFP_T_I:
++  case LoongArch::PseudoSELECTFP_T_I64:
++    return emitPseudoSELECT(MI, BB, true, LoongArch::BCNEZ);
++
++  case LoongArch::PseudoSELECTFP_F_I:
++  case LoongArch::PseudoSELECTFP_F_I64:
++    return emitPseudoSELECT(MI, BB, true, LoongArch::BCEQZ);
++  case LoongArch::DIV_W:
++  case LoongArch::DIV_WU:
++  case LoongArch::MOD_W:
++  case LoongArch::MOD_WU:
++    return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false);
++  case LoongArch::DIV_D:
++  case LoongArch::DIV_DU:
++  case LoongArch::MOD_D:
++  case LoongArch::MOD_DU:
++    return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true);
++  }
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitXINSERT_DF_VIDX(
++    MachineInstr &MI, MachineBasicBlock *BB, bool IsGPR64) const {
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned insertOp;
++  insertOp = IsGPR64 ? LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA
++                     : LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA;
++
++  unsigned DstReg = MI.getOperand(0).getReg();
++  unsigned SrcVecReg = MI.getOperand(1).getReg();
++  unsigned LaneReg = MI.getOperand(2).getReg();
++  unsigned SrcValReg = MI.getOperand(3).getReg();
++  unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg));
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned VecCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg));
++  unsigned LaneCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg));
++  unsigned ValCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg));
++
++  const TargetRegisterClass *RC =
++      IsGPR64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++  unsigned RI = RegInfo.createVirtualRegister(RC);
++
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj)
++      .addImm(0)
++      .addReg(SrcValReg)
++      .addImm(LoongArch::sub_lo);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::XVPICKVE2GR_W), Rj)
++      .addReg(Xj)
++      .addImm(0);
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg);
++
++  BuildMI(*BB, II, DL, TII->get(insertOp))
++      .addReg(DstReg, RegState::Define | RegState::EarlyClobber)
++      .addReg(VecCopy)
++      .addReg(LaneCopy)
++      .addReg(ValCopy)
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(RI, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead)
++      .addReg(Rj, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent();
++
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitINSERT_H_VIDX(MachineInstr &MI,
++                                           MachineBasicBlock *BB) const {
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned insertOp;
++  unsigned isGP64 = 0;
++  switch (MI.getOpcode()) {
++  case LoongArch::INSERT_H_VIDX64_PSEUDO:
++    isGP64 = 1;
++    insertOp = LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo vector for replacement!");
++  }
++
++  unsigned DstReg = MI.getOperand(0).getReg();
++  unsigned SrcVecReg = MI.getOperand(1).getReg();
++  unsigned LaneReg = MI.getOperand(2).getReg();
++  unsigned SrcValReg = MI.getOperand(3).getReg();
++  unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg));
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned VecCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg));
++  unsigned LaneCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg));
++  unsigned ValCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg));
++
++  const TargetRegisterClass *RC =
++      isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++  unsigned RI = RegInfo.createVirtualRegister(RC);
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg);
++
++  BuildMI(*BB, II, DL, TII->get(insertOp))
++      .addReg(DstReg, RegState::Define | RegState::EarlyClobber)
++      .addReg(VecCopy)
++      .addReg(LaneCopy)
++      .addReg(ValCopy)
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(RI, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent();
++
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXINSERT_B(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const {
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned insertOp;
++  unsigned isGP64 = 0;
++  switch (MI.getOpcode()) {
++  case LoongArch::XINSERT_B_VIDX64_PSEUDO:
++    isGP64 = 1;
++    insertOp = LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA;
++    break;
++  case LoongArch::XINSERT_B_VIDX_PSEUDO:
++    insertOp = LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo vector for replacement!");
++  }
++
++  unsigned DstReg = MI.getOperand(0).getReg();
++  unsigned SrcVecReg = MI.getOperand(1).getReg();
++  unsigned LaneReg = MI.getOperand(2).getReg();
++  unsigned SrcValReg = MI.getOperand(3).getReg();
++  unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg));
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned VecCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg));
++  unsigned LaneCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg));
++  unsigned ValCopy =
++      RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg));
++  const TargetRegisterClass *RC =
++      isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++  unsigned Rimm = RegInfo.createVirtualRegister(RC);
++  unsigned R4r = RegInfo.createVirtualRegister(RC);
++  unsigned Rib = RegInfo.createVirtualRegister(RC);
++  unsigned Ris = RegInfo.createVirtualRegister(RC);
++  unsigned R7b1 = RegInfo.createVirtualRegister(RC);
++  unsigned R7b2 = RegInfo.createVirtualRegister(RC);
++  unsigned R7b3 = RegInfo.createVirtualRegister(RC);
++  unsigned RI = RegInfo.createVirtualRegister(RC);
++
++  unsigned R7r80_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R7r80l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R7r81_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R7r81l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R7r82_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R7r82l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned R70 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned tmp_Dst73 =
++      RegInfo.createVirtualRegister(&LoongArch::LASX256BRegClass);
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg);
++
++  BuildMI(*BB, II, DL, TII->get(insertOp))
++      .addReg(DstReg, RegState::Define | RegState::EarlyClobber)
++      .addReg(VecCopy)
++      .addReg(LaneCopy)
++      .addReg(ValCopy)
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(R4r, RegState::Define | RegState::EarlyClobber |
++                       RegState::Implicit | RegState::Dead)
++      .addReg(Rib, RegState::Define | RegState::EarlyClobber |
++                       RegState::Implicit | RegState::Dead)
++      .addReg(Ris, RegState::Define | RegState::EarlyClobber |
++                       RegState::Implicit | RegState::Dead)
++      .addReg(R7b1, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(R7b2, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(R7b3, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(R7r80_3, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(R7r80l_3, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead)
++      .addReg(R7r81_3, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(R7r81l_3, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead)
++      .addReg(R7r82_3, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(R7r82l_3, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead)
++      .addReg(RI, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead)
++      .addReg(tmp_Dst73, RegState::Define | RegState::EarlyClobber |
++                             RegState::Implicit | RegState::Dead)
++      .addReg(Rimm, RegState::Define | RegState::EarlyClobber |
++                        RegState::Implicit | RegState::Dead)
++      .addReg(R70, RegState::Define | RegState::EarlyClobber |
++                       RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent();
++
++  return BB;
++}
++
++const TargetRegisterClass *
++LoongArchTargetLowering::getRepRegClassFor(MVT VT) const {
++  return TargetLowering::getRepRegClassFor(VT);
++}
++
++// This function also handles LoongArch::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
++// LoongArch::ATOMIC_LOAD_NAND_I32 (when Nand == true)
++MachineBasicBlock *
++LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned AtomicOp;
++  switch (MI.getOpcode()) {
++  case LoongArch::ATOMIC_LOAD_ADD_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_AND_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_OR_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_NAND_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_SWAP_I32:
++    AtomicOp = LoongArch::ATOMIC_SWAP_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I32:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_ADD_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_AND_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_OR_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_NAND_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_SWAP_I64:
++    AtomicOp = LoongArch::ATOMIC_SWAP_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I64:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic for replacement!");
++  }
++
++  unsigned OldVal = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned Incr = MI.getOperand(2).getReg();
++  unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal));
++
++  MachineBasicBlock::iterator II(MI);
++
++  // The scratch registers here with the EarlyClobber | Define | Implicit
++  // flags is used to persuade the register allocator and the machine
++  // verifier to accept the usage of this register. This has to be a real
++  // register which has an UNDEF value but is dead after the instruction which
++  // is unique among the registers chosen for the instruction.
++
++  // The EarlyClobber flag has the semantic properties that the operand it is
++  // attached to is clobbered before the rest of the inputs are read. Hence it
++  // must be unique among the operands to the instruction.
++  // The Define flag is needed to coerce the machine verifier that an Undef
++  // value isn't a problem.
++  // The Dead flag is needed as the value in scratch isn't used by any other
++  // instruction. Kill isn't used as Dead is more precise.
++  // The implicit flag is here due to the interaction between the other flags
++  // and the machine verifier.
++
++  // For correctness purpose, a new pseudo is introduced here. We need this
++  // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence
++  // that is spread over >1 basic blocks. A register allocator which
++  // introduces (or any codegen infact) a store, can violate the expectations
++  // of the hardware.
++  //
++  // An atomic read-modify-write sequence starts with a linked load
++  // instruction and ends with a store conditional instruction. The atomic
++  // read-modify-write sequence fails if any of the following conditions
++  // occur between the execution of ll and sc:
++  //   * A coherent store is completed by another process or coherent I/O
++  //     module into the block of synchronizable physical memory containing
++  //     the word. The size and alignment of the block is
++  //     implementation-dependent.
++  //   * A coherent store is executed between an LL and SC sequence on the
++  //     same processor to the block of synchornizable physical memory
++  //     containing the word.
++  //
++
++  unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr));
++  unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr));
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), IncrCopy).addReg(Incr);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr);
++
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
++      .addReg(PtrCopy)
++      .addReg(IncrCopy)
++      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead);
++
++  if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32
++     || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){
++    BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT);
++  }
++
++  MI.eraseFromParent();
++
++  return BB;
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitSignExtendToI32InReg(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
++    unsigned SrcReg) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  const DebugLoc &DL = MI.getDebugLoc();
++  if (Size == 1) {
++    BuildMI(BB, DL, TII->get(LoongArch::EXT_W_B32), DstReg).addReg(SrcReg);
++    return BB;
++  }
++
++  if (Size == 2) {
++    BuildMI(BB, DL, TII->get(LoongArch::EXT_W_H32), DstReg).addReg(SrcReg);
++    return BB;
++  }
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
++  unsigned ScrReg = RegInfo.createVirtualRegister(RC);
++
++  assert(Size < 32);
++  int64_t ShiftImm = 32 - (Size * 8);
++
++  BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ScrReg).addReg(SrcReg).addImm(ShiftImm);
++  BuildMI(BB, DL, TII->get(LoongArch::SRAI_W), DstReg).addReg(ScrReg).addImm(ShiftImm);
++
++  return BB;
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
++  assert((Size == 1 || Size == 2) &&
++         "Unsupported size for EmitAtomicBinaryPartial.");
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
++  const bool ArePtrs64bit = ABI.ArePtrs64bit();
++  const TargetRegisterClass *RCp =
++    getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Dest = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned Incr = MI.getOperand(2).getReg();
++
++  unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp);
++  unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
++  unsigned Mask = RegInfo.createVirtualRegister(RC);
++  unsigned Mask2 = RegInfo.createVirtualRegister(RC);
++  unsigned Incr2 = RegInfo.createVirtualRegister(RC);
++  unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
++  unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
++  unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
++  unsigned MaskUppest = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch2 = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch3 = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch4 = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch5 = RegInfo.createVirtualRegister(RC);
++
++  unsigned AtomicOp = 0;
++  switch (MI.getOpcode()) {
++  case LoongArch::ATOMIC_LOAD_NAND_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_NAND_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_SWAP_I8:
++    AtomicOp = LoongArch::ATOMIC_SWAP_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_SWAP_I16:
++    AtomicOp = LoongArch::ATOMIC_SWAP_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MAX_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_MIN_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMAX_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_UMIN_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_ADD_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_ADD_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_SUB_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_AND_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_AND_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_AND_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_OR_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_OR_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_OR_I16_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I8:
++    AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA;
++    break;
++  case LoongArch::ATOMIC_LOAD_XOR_I16:
++    AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown subword atomic pseudo for expansion!");
++  }
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB->getBasicBlock();
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB->getIterator();
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), BB,
++                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
++
++  BB->addSuccessor(exitMBB, BranchProbability::getOne());
++
++  //  thisMBB:
++  //    addiu   masklsb2,$0,-4                # 0xfffffffc
++  //    and     alignedaddr,ptr,masklsb2
++  //    andi    ptrlsb2,ptr,3
++  //    sll     shiftamt,ptrlsb2,3
++  //    ori     maskupper,$0,255               # 0xff
++  //    sll     mask,maskupper,shiftamt
++  //    nor     mask2,$0,mask
++  //    sll     incr2,incr,shiftamt
++
++  int64_t MaskImm = (Size == 1) ? 255 : 4095;
++  BuildMI(BB, DL, TII->get(ABI.GetPtrAddiOp()), MaskLSB2)
++    .addReg(ABI.GetNullPtr()).addImm(-4);
++  BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr)
++    .addReg(Ptr).addReg(MaskLSB2);
++  BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2)
++      .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3);
++  BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3);
++
++  if(MaskImm==4095){
++  BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf);
++  BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper)
++    .addReg(MaskUppest).addImm(MaskImm);
++  }
++  else{
++  BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper)
++    .addReg(LoongArch::ZERO).addImm(MaskImm);
++  }
++
++  BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask)
++    .addReg(MaskUpper).addReg(ShiftAmt);
++  BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask);
++  BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Incr2).addReg(Incr).addReg(ShiftAmt);
++
++
++  // The purposes of the flags on the scratch registers is explained in
++  // emitAtomicBinary. In summary, we need a scratch register which is going to
++  // be undef, that is unique among registers chosen for the instruction.
++
++  BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0);
++  BuildMI(BB, DL, TII->get(AtomicOp))
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
++      .addReg(AlignedAddr)
++      .addReg(Incr2)
++      .addReg(Mask)
++      .addReg(Mask2)
++      .addReg(ShiftAmt)
++      .addReg(Scratch, RegState::EarlyClobber | RegState::Define |
++                           RegState::Dead | RegState::Implicit)
++      .addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
++                            RegState::Dead | RegState::Implicit)
++      .addReg(Scratch3, RegState::EarlyClobber | RegState::Define |
++                            RegState::Dead | RegState::Implicit)
++      .addReg(Scratch4, RegState::EarlyClobber | RegState::Define |
++                            RegState::Dead | RegState::Implicit)
++      .addReg(Scratch5, RegState::EarlyClobber | RegState::Define |
++                            RegState::Dead | RegState::Implicit);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return exitMBB;
++}
++
++// Lower atomic compare and swap to a pseudo instruction, taking care to
++// define a scratch register for the pseudo instruction's expansion. The
++// instruction is expanded after the register allocator as to prevent
++// the insertion of stores between the linked load and the store conditional.
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++  assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ||
++          MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) &&
++         "Unsupported atomic psseudo for EmitAtomicCmpSwap.");
++
++  const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8;
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &MRI = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32
++                          ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA
++                          : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA;
++  unsigned Dest = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned OldVal = MI.getOperand(2).getReg();
++  unsigned NewVal = MI.getOperand(3).getReg();
++
++  unsigned Scratch = MRI.createVirtualRegister(RC);
++  MachineBasicBlock::iterator II(MI);
++
++  // We need to create copies of the various registers and kill them at the
++  // atomic pseudo. If the copies are not made, when the atomic is expanded
++  // after fast register allocation, the spills will end up outside of the
++  // blocks that their values are defined in, causing livein errors.
++
++  unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr));
++  unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal));
++  unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal));
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal);
++  BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal);
++
++  // The purposes of the flags on the scratch registers is explained in
++  // emitAtomicBinary. In summary, we need a scratch register which is going to
++  // be undef, that is unique among registers chosen for the instruction.
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0);
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
++      .addReg(PtrCopy, RegState::Kill)
++      .addReg(OldValCopy, RegState::Kill)
++      .addReg(NewValCopy, RegState::Kill)
++      .addReg(Scratch, RegState::EarlyClobber | RegState::Define |
++                           RegState::Dead | RegState::Implicit);
++
++  BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
++  assert((Size == 1 || Size == 2) &&
++      "Unsupported size for EmitAtomicCmpSwapPartial.");
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
++  const bool ArePtrs64bit = ABI.ArePtrs64bit();
++  const TargetRegisterClass *RCp =
++    getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Dest = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned CmpVal = MI.getOperand(2).getReg();
++  unsigned NewVal = MI.getOperand(3).getReg();
++
++  unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp);
++  unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
++  unsigned Mask = RegInfo.createVirtualRegister(RC);
++  unsigned Mask2 = RegInfo.createVirtualRegister(RC);
++  unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
++  unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
++  unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
++  unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
++  unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
++  unsigned MaskUppest = RegInfo.createVirtualRegister(RC);
++  unsigned Mask3 = RegInfo.createVirtualRegister(RC);
++  unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
++  unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
++  unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8
++                          ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA
++                          : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA;
++
++  // The scratch registers here with the EarlyClobber | Define | Dead | Implicit
++  // flags are used to coerce the register allocator and the machine verifier to
++  // accept the usage of these registers.
++  // The EarlyClobber flag has the semantic properties that the operand it is
++  // attached to is clobbered before the rest of the inputs are read. Hence it
++  // must be unique among the operands to the instruction.
++  // The Define flag is needed to coerce the machine verifier that an Undef
++  // value isn't a problem.
++  // The Dead flag is needed as the value in scratch isn't used by any other
++  // instruction. Kill isn't used as Dead is more precise.
++  unsigned Scratch = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch2 = RegInfo.createVirtualRegister(RC);
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB->getBasicBlock();
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB->getIterator();
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), BB,
++                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
++
++  BB->addSuccessor(exitMBB, BranchProbability::getOne());
++
++  //  thisMBB:
++  //    addiu   masklsb2,$0,-4                # 0xfffffffc
++  //    and     alignedaddr,ptr,masklsb2
++  //    andi    ptrlsb2,ptr,3
++  //    xori    ptrlsb2,ptrlsb2,3              # Only for BE
++  //    sll     shiftamt,ptrlsb2,3
++  //    ori     maskupper,$0,255               # 0xff
++  //    sll     mask,maskupper,shiftamt
++  //    nor     mask2,$0,mask
++  //    andi    maskedcmpval,cmpval,255
++  //    sll     shiftedcmpval,maskedcmpval,shiftamt
++  //    andi    maskednewval,newval,255
++  //    sll     shiftednewval,maskednewval,shiftamt
++
++  int64_t MaskImm = (Size == 1) ? 255 : 4095;
++  BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::ADDI_D : LoongArch::ADDI_W), MaskLSB2)
++    .addReg(ABI.GetNullPtr()).addImm(-4);
++  BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::AND : LoongArch::AND32), AlignedAddr)
++    .addReg(Ptr).addReg(MaskLSB2);
++  BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2)
++      .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3);
++  BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3);
++
++  if(MaskImm==4095){
++  BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf);
++  BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper)
++    .addReg(MaskUppest).addImm(MaskImm);
++  }
++  else{
++  BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper)
++    .addReg(LoongArch::ZERO).addImm(MaskImm);
++  }
++
++  BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask)
++    .addReg(MaskUpper).addReg(ShiftAmt);
++  BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask);
++  if(MaskImm==4095){
++    BuildMI(BB, DL, TII->get(LoongArch::ORI32), Mask3)
++    .addReg(MaskUppest).addImm(MaskImm);
++    BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedCmpVal)
++      .addReg(CmpVal).addReg(Mask3);
++    BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal)
++      .addReg(MaskedCmpVal).addReg(ShiftAmt);
++    BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedNewVal)
++      .addReg(NewVal).addReg(Mask3);
++  }
++  else{
++    BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedCmpVal)
++      .addReg(CmpVal).addImm(MaskImm);
++    BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal)
++      .addReg(MaskedCmpVal).addReg(ShiftAmt);
++    BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedNewVal)
++      .addReg(NewVal).addImm(MaskImm);
++  }
++  BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal)
++    .addReg(MaskedNewVal).addReg(ShiftAmt);
++
++  // The purposes of the flags on the scratch registers are explained in
++  // emitAtomicBinary. In summary, we need a scratch register which is going to
++  // be undef, that is unique among the register chosen for the instruction.
++
++  BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0);
++  BuildMI(BB, DL, TII->get(AtomicOp))
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
++      .addReg(AlignedAddr)
++      .addReg(Mask)
++      .addReg(ShiftedCmpVal)
++      .addReg(Mask2)
++      .addReg(ShiftedNewVal)
++      .addReg(ShiftAmt)
++      .addReg(Scratch, RegState::EarlyClobber | RegState::Define |
++                           RegState::Dead | RegState::Implicit)
++      .addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
++                            RegState::Dead | RegState::Implicit);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return exitMBB;
++}
++
++SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
++  // The first operand is the chain, the second is the condition, the third is
++  // the block to branch to if the condition is true.
++  SDValue Chain = Op.getOperand(0);
++  SDValue Dest = Op.getOperand(2);
++  SDLoc DL(Op);
++
++  SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
++
++  // Return if flag is not set by a floating point comparison.
++  if (CondRes.getOpcode() != LoongArchISD::FPCmp)
++    return Op;
++
++  SDValue CCNode  = CondRes.getOperand(2);
++  LoongArch::CondCode CC =
++    (LoongArch::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
++  unsigned Opc = invertFPCondCodeUser(CC) ? LoongArch::BRANCH_F : LoongArch::BRANCH_T;
++  SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32);
++  SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32);
++  return DAG.getNode(LoongArchISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
++                     FCC0, Dest, CondRes);
++}
++
++SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
++                                             SelectionDAG &DAG) const {
++  SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
++
++  // Return if flag is not set by a floating point comparison.
++  if (Cond.getOpcode() != LoongArchISD::FPCmp)
++    return Op;
++
++  SDValue N1 = Op.getOperand(1);
++  SDValue N2 = Op.getOperand(2);
++  SDLoc DL(Op);
++
++  ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
++  bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue());
++  SDValue FCC = DAG.getRegister(LoongArch::FCC0, MVT::i32);
++
++  if (Op->getSimpleValueType(0).SimpleTy == MVT::f64 ||
++      Op->getSimpleValueType(0).SimpleTy == MVT::f32) {
++    if (invert)
++      return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N1, FCC, N2,
++                         Cond);
++    else
++      return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N2, FCC, N1,
++                         Cond);
++
++  } else
++    return Op;
++}
++
++SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
++  SDValue Cond = createFPCmp(DAG, Op);
++
++  assert(Cond.getOpcode() == LoongArchISD::FPCmp &&
++         "Floating point operand expected.");
++
++  SDLoc DL(Op);
++  SDValue True  = DAG.getConstant(1, DL, MVT::i32);
++  SDValue False = DAG.getConstant(0, DL, MVT::i32);
++
++  return createCMovFP(DAG, Cond, True, False, DL);
++}
++
++SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
++                                               SelectionDAG &DAG) const {
++  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
++
++  const GlobalValue *GV = N->getGlobal();
++  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
++  SDValue Addr = getAddr(N, DAG, IsLocal);
++
++  return Addr;
++}
++
++SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
++
++  return getAddr(N, DAG);
++}
++
++SDValue LoongArchTargetLowering::
++lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
++{
++  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
++  if (DAG.getTarget().useEmulatedTLS())
++    return LowerToTLSEmulatedModel(GA, DAG);
++
++  SDLoc DL(GA);
++  const GlobalValue *GV = GA->getGlobal();
++  EVT PtrVT = getPointerTy(DAG.getDataLayout());
++
++  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
++
++  if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
++    // General Dynamic TLS Model && Local Dynamic TLS Model
++    unsigned PtrSize = PtrVT.getSizeInBits();
++    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
++    //  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrTy, 0, 0);
++    SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U);
++    SDValue Load = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_GD ,
++                           DL, PtrVT, Addr), 0);
++    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
++
++    ArgListTy Args;
++    ArgListEntry Entry;
++    Entry.Node = Load;
++    Entry.Ty = PtrTy;
++    Args.push_back(Entry);
++
++    TargetLowering::CallLoweringInfo CLI(DAG);
++    CLI.setDebugLoc(DL)
++       .setChain(DAG.getEntryNode())
++       .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
++    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
++
++    SDValue Ret = CallResult.first;
++
++    return Ret;
++  }
++
++  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U);
++  SDValue Offset;
++  if (model == TLSModel::InitialExec) {
++    // Initial Exec TLS Model
++    Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_IE, DL,
++                     PtrVT, Addr), 0);
++  } else {
++    // Local Exec TLS Model
++    assert(model == TLSModel::LocalExec);
++    Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_LE, DL,
++                     PtrVT, Addr), 0);
++  }
++
++  SDValue ThreadPointer = DAG.getRegister((PtrVT == MVT::i32)
++                                          ? LoongArch::TP
++                                          : LoongArch::TP_64, PtrVT);
++  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
++}
++
++SDValue LoongArchTargetLowering::
++lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
++{
++  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
++
++  return getAddr(N, DAG);
++}
++
++SDValue LoongArchTargetLowering::
++lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
++{
++  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
++
++  return getAddr(N, DAG);
++}
++
++SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
++  MachineFunction &MF = DAG.getMachineFunction();
++  LoongArchFunctionInfo *FuncInfo = MF.getInfo<LoongArchFunctionInfo>();
++
++  SDLoc DL(Op);
++  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
++                                 getPointerTy(MF.getDataLayout()));
++
++  // vastart just stores the address of the VarArgsFrameIndex slot into the
++  // memory location argument.
++  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
++  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
++                      MachinePointerInfo(SV));
++}
++
++SDValue LoongArchTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
++  SDNode *Node = Op.getNode();
++  EVT VT = Node->getValueType(0);
++  SDValue Chain = Node->getOperand(0);
++  SDValue VAListPtr = Node->getOperand(1);
++  const Align Align =
++      llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne();
++  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
++  SDLoc DL(Node);
++  unsigned ArgSlotSizeInBytes = Subtarget.is64Bit() ? 8 : 4;
++
++  SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain,
++                                   VAListPtr, MachinePointerInfo(SV));
++  SDValue VAList = VAListLoad;
++
++  // Re-align the pointer if necessary.
++  // It should only ever be necessary for 64-bit types on ILP32D/ILP32F/ILP32S
++  // since the minimum argument alignment is the same as the maximum type
++  // alignment for LP64D/LP64S/LP64F.
++  //
++  // FIXME: We currently align too often. The code generator doesn't notice
++  //        when the pointer is still aligned from the last va_arg (or pair of
++  //        va_args for the i64 on ILP32D/ILP32F/ILP32S case).
++  if (Align > getMinStackArgumentAlignment()) {
++    VAList = DAG.getNode(
++        ISD::ADD, DL, VAList.getValueType(), VAList,
++        DAG.getConstant(Align.value() - 1, DL, VAList.getValueType()));
++
++    VAList = DAG.getNode(
++        ISD::AND, DL, VAList.getValueType(), VAList,
++        DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType()));
++  }
++
++  // Increment the pointer, VAList, to the next vaarg.
++  auto &TD = DAG.getDataLayout();
++  unsigned ArgSizeInBytes =
++      TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
++  SDValue Tmp3 =
++      DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
++                  DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes),
++                                  DL, VAList.getValueType()));
++  // Store the incremented VAList to the legalized pointer
++  Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr,
++                       MachinePointerInfo(SV));
++
++  // Load the actual argument out of the pointer VAList
++  return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo());
++}
++
++SDValue LoongArchTargetLowering::
++lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
++  // check the depth
++  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
++         "Frame address can only be determined for current frame.");
++
++  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
++  MFI.setFrameAddressIsTaken(true);
++  EVT VT = Op.getValueType();
++  SDLoc DL(Op);
++  SDValue FrameAddr = DAG.getCopyFromReg(
++      DAG.getEntryNode(), DL,
++      Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP, VT);
++  return FrameAddr;
++}
++
++SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
++                                            SelectionDAG &DAG) const {
++  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
++    return SDValue();
++
++  // check the depth
++  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
++         "Return address can be determined only for current frame.");
++
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  MVT VT = Op.getSimpleValueType();
++  unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA;
++  MFI.setReturnAddressIsTaken(true);
++
++  // Return RA, which contains the return address. Mark it an implicit live-in.
++  unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT));
++  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT);
++}
++
++// An EH_RETURN is the result of lowering llvm.eh.return which in turn is
++// generated from __builtin_eh_return (offset, handler)
++// The effect of this is to adjust the stack pointer by "offset"
++// and then branch to "handler".
++SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
++                                                                     const {
++  MachineFunction &MF = DAG.getMachineFunction();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  LoongArchFI->setCallsEhReturn();
++  SDValue Chain     = Op.getOperand(0);
++  SDValue Offset    = Op.getOperand(1);
++  SDValue Handler   = Op.getOperand(2);
++  SDLoc DL(Op);
++  EVT Ty = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
++
++  // Store stack offset in A1, store jump target in A0. Glue CopyToReg and
++  // EH_RETURN nodes, so that instructions are emitted back-to-back.
++  unsigned OffsetReg = Subtarget.is64Bit() ? LoongArch::A1_64 : LoongArch::A1;
++  unsigned AddrReg = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0;
++  Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
++  Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
++  return DAG.getNode(LoongArchISD::EH_RETURN, DL, MVT::Other, Chain,
++                     DAG.getRegister(OffsetReg, Ty),
++                     DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())),
++                     Chain.getValue(1));
++}
++
++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  // FIXME: Need pseudo-fence for 'singlethread' fences
++  // FIXME: Set SType for weaker fences where supported/appropriate.
++  unsigned SType = 0;
++  SDLoc DL(Op);
++  return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0),
++                     DAG.getConstant(SType, DL, MVT::i32));
++}
++
++SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
++
++  SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
++  SDValue Shamt = Op.getOperand(2);
++  // if shamt < (VT.bits):
++  //  lo = (shl lo, shamt)
++  //  hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt))
++  // else:
++  //  lo = 0
++  //  hi = (shl lo, shamt[4:0])
++  SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
++                            DAG.getConstant(-1, DL, MVT::i32));
++  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
++                                      DAG.getConstant(1, DL, VT));
++  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
++  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
++  SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
++  SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
++  SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
++                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
++  Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
++                   DAG.getConstant(0, DL, VT), ShiftLeftLo);
++  Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
++
++  SDValue Ops[2] = {Lo, Hi};
++  return DAG.getMergeValues(Ops, DL);
++}
++
++SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
++                                                 bool IsSRA) const {
++  SDLoc DL(Op);
++  SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
++  SDValue Shamt = Op.getOperand(2);
++  MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
++
++  // if shamt < (VT.bits):
++  //  lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt))
++  //  if isSRA:
++  //    hi = (sra hi, shamt)
++  //  else:
++  //    hi = (srl hi, shamt)
++  // else:
++  //  if isSRA:
++  //   lo = (sra hi, shamt[4:0])
++  //   hi = (sra hi, 31)
++  //  else:
++  //   lo = (srl hi, shamt[4:0])
++  //   hi = 0
++  SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
++                            DAG.getConstant(-1, DL, MVT::i32));
++  SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
++                                     DAG.getConstant(1, DL, VT));
++  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not);
++  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
++  SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
++  SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
++                                     DL, VT, Hi, Shamt);
++  SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
++                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
++  SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
++                            DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
++  Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
++  Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
++                   IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi);
++
++  SDValue Ops[2] = {Lo, Hi};
++  return DAG.getMergeValues(Ops, DL);
++}
++
++// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr).
++static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG,
++                                     bool SingleFloat) {
++  SDValue Val = SD->getValue();
++
++  if (Val.getOpcode() != ISD::FP_TO_SINT ||
++      (Val.getValueSizeInBits() > 32 && SingleFloat))
++    return SDValue();
++
++  EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits());
++  SDValue Tr = DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Val), FPTy,
++                           Val.getOperand(0));
++  return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(),
++                      SD->getPointerInfo(), SD->getAlignment(),
++                      SD->getMemOperand()->getFlags());
++}
++
++SDValue LoongArchTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
++  StoreSDNode *SD = cast<StoreSDNode>(Op);
++  return lowerFP_TO_SINT_STORE(
++      SD, DAG, (Subtarget.hasBasicF() && !Subtarget.hasBasicD()));
++}
++
++SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
++                                                      SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
++  switch (Intrinsic) {
++  default:
++    return SDValue();
++  case Intrinsic::loongarch_lsx_vaddi_bu:
++  case Intrinsic::loongarch_lsx_vaddi_hu:
++  case Intrinsic::loongarch_lsx_vaddi_wu:
++  case Intrinsic::loongarch_lsx_vaddi_du:
++    return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
++                       lowerLSXSplatImm(Op, 2, DAG));
++  case Intrinsic::loongarch_lsx_vand_v:
++  case Intrinsic::loongarch_lasx_xvand_v:
++    return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vbitclr_b:
++  case Intrinsic::loongarch_lsx_vbitclr_h:
++  case Intrinsic::loongarch_lsx_vbitclr_w:
++  case Intrinsic::loongarch_lsx_vbitclr_d:
++    return lowerLSXBitClear(Op, DAG);
++  case Intrinsic::loongarch_lsx_vdiv_b:
++  case Intrinsic::loongarch_lsx_vdiv_h:
++  case Intrinsic::loongarch_lsx_vdiv_w:
++  case Intrinsic::loongarch_lsx_vdiv_d:
++    return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vdiv_bu:
++  case Intrinsic::loongarch_lsx_vdiv_hu:
++  case Intrinsic::loongarch_lsx_vdiv_wu:
++  case Intrinsic::loongarch_lsx_vdiv_du:
++    return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vfdiv_s:
++  case Intrinsic::loongarch_lsx_vfdiv_d:
++    return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vffint_s_wu:
++  case Intrinsic::loongarch_lsx_vffint_d_lu:
++    return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vffint_s_w:
++  case Intrinsic::loongarch_lsx_vffint_d_l:
++    return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vfmul_s:
++  case Intrinsic::loongarch_lsx_vfmul_d:
++    return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vfrint_s:
++  case Intrinsic::loongarch_lsx_vfrint_d:
++    return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vfsqrt_s:
++  case Intrinsic::loongarch_lsx_vfsqrt_d:
++    return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vftintrz_wu_s:
++  case Intrinsic::loongarch_lsx_vftintrz_lu_d:
++    return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vpackev_b:
++  case Intrinsic::loongarch_lsx_vpackev_h:
++  case Intrinsic::loongarch_lsx_vpackev_w:
++  case Intrinsic::loongarch_lsx_vpackev_d:
++    return DAG.getNode(LoongArchISD::VPACKEV, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vilvh_b:
++  case Intrinsic::loongarch_lsx_vilvh_h:
++  case Intrinsic::loongarch_lsx_vilvh_w:
++  case Intrinsic::loongarch_lsx_vilvh_d:
++    return DAG.getNode(LoongArchISD::VILVH, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vpackod_b:
++  case Intrinsic::loongarch_lsx_vpackod_h:
++  case Intrinsic::loongarch_lsx_vpackod_w:
++  case Intrinsic::loongarch_lsx_vpackod_d:
++    return DAG.getNode(LoongArchISD::VPACKOD, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vilvl_b:
++  case Intrinsic::loongarch_lsx_vilvl_h:
++  case Intrinsic::loongarch_lsx_vilvl_w:
++  case Intrinsic::loongarch_lsx_vilvl_d:
++    return DAG.getNode(LoongArchISD::VILVL, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmadd_b:
++  case Intrinsic::loongarch_lsx_vmadd_h:
++  case Intrinsic::loongarch_lsx_vmadd_w:
++  case Intrinsic::loongarch_lsx_vmadd_d: {
++    EVT ResTy = Op->getValueType(0);
++    return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
++                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
++                                   Op->getOperand(2), Op->getOperand(3)));
++  }
++  case Intrinsic::loongarch_lsx_vmax_b:
++  case Intrinsic::loongarch_lsx_vmax_h:
++  case Intrinsic::loongarch_lsx_vmax_w:
++  case Intrinsic::loongarch_lsx_vmax_d:
++    return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmax_bu:
++  case Intrinsic::loongarch_lsx_vmax_hu:
++  case Intrinsic::loongarch_lsx_vmax_wu:
++  case Intrinsic::loongarch_lsx_vmax_du:
++    return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmin_b:
++  case Intrinsic::loongarch_lsx_vmin_h:
++  case Intrinsic::loongarch_lsx_vmin_w:
++  case Intrinsic::loongarch_lsx_vmin_d:
++    return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmin_bu:
++  case Intrinsic::loongarch_lsx_vmin_hu:
++  case Intrinsic::loongarch_lsx_vmin_wu:
++  case Intrinsic::loongarch_lsx_vmin_du:
++    return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmini_bu:
++  case Intrinsic::loongarch_lsx_vmini_hu:
++  case Intrinsic::loongarch_lsx_vmini_wu:
++  case Intrinsic::loongarch_lsx_vmini_du:
++    return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1),
++                       lowerLSXSplatImm(Op, 2, DAG));
++  case Intrinsic::loongarch_lsx_vmod_b:
++  case Intrinsic::loongarch_lsx_vmod_h:
++  case Intrinsic::loongarch_lsx_vmod_w:
++  case Intrinsic::loongarch_lsx_vmod_d:
++    return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmod_bu:
++  case Intrinsic::loongarch_lsx_vmod_hu:
++  case Intrinsic::loongarch_lsx_vmod_wu:
++  case Intrinsic::loongarch_lsx_vmod_du:
++    return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmul_b:
++  case Intrinsic::loongarch_lsx_vmul_h:
++  case Intrinsic::loongarch_lsx_vmul_w:
++  case Intrinsic::loongarch_lsx_vmul_d:
++    return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vmsub_b:
++  case Intrinsic::loongarch_lsx_vmsub_h:
++  case Intrinsic::loongarch_lsx_vmsub_w:
++  case Intrinsic::loongarch_lsx_vmsub_d: {
++    EVT ResTy = Op->getValueType(0);
++    return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
++                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
++                                   Op->getOperand(2), Op->getOperand(3)));
++  }
++  case Intrinsic::loongarch_lsx_vclz_b:
++  case Intrinsic::loongarch_lsx_vclz_h:
++  case Intrinsic::loongarch_lsx_vclz_w:
++  case Intrinsic::loongarch_lsx_vclz_d:
++    return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vnor_v:
++  case Intrinsic::loongarch_lasx_xvnor_v: {
++    SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
++                              Op->getOperand(1), Op->getOperand(2));
++    return DAG.getNOT(DL, Res, Res->getValueType(0));
++  }
++  case Intrinsic::loongarch_lsx_vor_v:
++  case Intrinsic::loongarch_lasx_xvor_v:
++    return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vpickev_b:
++  case Intrinsic::loongarch_lsx_vpickev_h:
++  case Intrinsic::loongarch_lsx_vpickev_w:
++  case Intrinsic::loongarch_lsx_vpickev_d:
++    return DAG.getNode(LoongArchISD::VPICKEV, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vpickod_b:
++  case Intrinsic::loongarch_lsx_vpickod_h:
++  case Intrinsic::loongarch_lsx_vpickod_w:
++  case Intrinsic::loongarch_lsx_vpickod_d:
++    return DAG.getNode(LoongArchISD::VPICKOD, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vpcnt_b:
++  case Intrinsic::loongarch_lsx_vpcnt_h:
++  case Intrinsic::loongarch_lsx_vpcnt_w:
++  case Intrinsic::loongarch_lsx_vpcnt_d:
++    return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vsat_b:
++  case Intrinsic::loongarch_lsx_vsat_h:
++  case Intrinsic::loongarch_lsx_vsat_w:
++  case Intrinsic::loongarch_lsx_vsat_d:
++  case Intrinsic::loongarch_lsx_vsat_bu:
++  case Intrinsic::loongarch_lsx_vsat_hu:
++  case Intrinsic::loongarch_lsx_vsat_wu:
++  case Intrinsic::loongarch_lsx_vsat_du: {
++    // Report an error for out of range values.
++    int64_t Max;
++    switch (Intrinsic) {
++    case Intrinsic::loongarch_lsx_vsat_b:
++    case Intrinsic::loongarch_lsx_vsat_bu:
++      Max = 7;
++      break;
++    case Intrinsic::loongarch_lsx_vsat_h:
++    case Intrinsic::loongarch_lsx_vsat_hu:
++      Max = 15;
++      break;
++    case Intrinsic::loongarch_lsx_vsat_w:
++    case Intrinsic::loongarch_lsx_vsat_wu:
++      Max = 31;
++      break;
++    case Intrinsic::loongarch_lsx_vsat_d:
++    case Intrinsic::loongarch_lsx_vsat_du:
++      Max = 63;
++      break;
++    default:
++      llvm_unreachable("Unmatched intrinsic");
++    }
++    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
++    if (Value < 0 || Value > Max)
++      report_fatal_error("Immediate out of range");
++    return SDValue();
++  }
++  case Intrinsic::loongarch_lsx_vshuf4i_b:
++  case Intrinsic::loongarch_lsx_vshuf4i_h:
++  case Intrinsic::loongarch_lsx_vshuf4i_w:
++    //  case Intrinsic::loongarch_lsx_vshuf4i_d:
++    {
++      int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
++      if (Value < 0 || Value > 255)
++        report_fatal_error("Immediate out of range");
++      return DAG.getNode(LoongArchISD::SHF, DL, Op->getValueType(0),
++                         Op->getOperand(2), Op->getOperand(1));
++    }
++  case Intrinsic::loongarch_lsx_vsll_b:
++  case Intrinsic::loongarch_lsx_vsll_h:
++  case Intrinsic::loongarch_lsx_vsll_w:
++  case Intrinsic::loongarch_lsx_vsll_d:
++    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
++                       truncateVecElts(Op, DAG));
++  case Intrinsic::loongarch_lsx_vslli_b:
++  case Intrinsic::loongarch_lsx_vslli_h:
++  case Intrinsic::loongarch_lsx_vslli_w:
++  case Intrinsic::loongarch_lsx_vslli_d:
++    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
++                       lowerLSXSplatImm(Op, 2, DAG));
++  case Intrinsic::loongarch_lsx_vreplve_b:
++  case Intrinsic::loongarch_lsx_vreplve_h:
++  case Intrinsic::loongarch_lsx_vreplve_w:
++  case Intrinsic::loongarch_lsx_vreplve_d:
++    // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
++    // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
++    // EXTRACT_VECTOR_ELT can't extract i64's on LoongArch32.
++    // Instead we lower to LoongArchISD::VSHF and match from there.
++    return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0),
++                       lowerLSXSplatZExt(Op, 2, DAG), Op->getOperand(1),
++                       Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vreplvei_b:
++  case Intrinsic::loongarch_lsx_vreplvei_h:
++  case Intrinsic::loongarch_lsx_vreplvei_w:
++  case Intrinsic::loongarch_lsx_vreplvei_d:
++    return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0),
++                       lowerLSXSplatImm(Op, 2, DAG), Op->getOperand(1),
++                       Op->getOperand(1));
++  case Intrinsic::loongarch_lsx_vsra_b:
++  case Intrinsic::loongarch_lsx_vsra_h:
++  case Intrinsic::loongarch_lsx_vsra_w:
++  case Intrinsic::loongarch_lsx_vsra_d:
++    return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
++                       truncateVecElts(Op, DAG));
++  case Intrinsic::loongarch_lsx_vsrari_b:
++  case Intrinsic::loongarch_lsx_vsrari_h:
++  case Intrinsic::loongarch_lsx_vsrari_w:
++  case Intrinsic::loongarch_lsx_vsrari_d: {
++    // Report an error for out of range values.
++    int64_t Max;
++    switch (Intrinsic) {
++    case Intrinsic::loongarch_lsx_vsrari_b:
++      Max = 7;
++      break;
++    case Intrinsic::loongarch_lsx_vsrari_h:
++      Max = 15;
++      break;
++    case Intrinsic::loongarch_lsx_vsrari_w:
++      Max = 31;
++      break;
++    case Intrinsic::loongarch_lsx_vsrari_d:
++      Max = 63;
++      break;
++    default:
++      llvm_unreachable("Unmatched intrinsic");
++    }
++    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
++    if (Value < 0 || Value > Max)
++      report_fatal_error("Immediate out of range");
++    return SDValue();
++  }
++  case Intrinsic::loongarch_lsx_vsrl_b:
++  case Intrinsic::loongarch_lsx_vsrl_h:
++  case Intrinsic::loongarch_lsx_vsrl_w:
++  case Intrinsic::loongarch_lsx_vsrl_d:
++    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
++                       truncateVecElts(Op, DAG));
++  case Intrinsic::loongarch_lsx_vsrli_b:
++  case Intrinsic::loongarch_lsx_vsrli_h:
++  case Intrinsic::loongarch_lsx_vsrli_w:
++  case Intrinsic::loongarch_lsx_vsrli_d:
++    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
++                       lowerLSXSplatImm(Op, 2, DAG));
++  case Intrinsic::loongarch_lsx_vsrlri_b:
++  case Intrinsic::loongarch_lsx_vsrlri_h:
++  case Intrinsic::loongarch_lsx_vsrlri_w:
++  case Intrinsic::loongarch_lsx_vsrlri_d: {
++    // Report an error for out of range values.
++    int64_t Max;
++    switch (Intrinsic) {
++    case Intrinsic::loongarch_lsx_vsrlri_b:
++      Max = 7;
++      break;
++    case Intrinsic::loongarch_lsx_vsrlri_h:
++      Max = 15;
++      break;
++    case Intrinsic::loongarch_lsx_vsrlri_w:
++      Max = 31;
++      break;
++    case Intrinsic::loongarch_lsx_vsrlri_d:
++      Max = 63;
++      break;
++    default:
++      llvm_unreachable("Unmatched intrinsic");
++    }
++    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
++    if (Value < 0 || Value > Max)
++      report_fatal_error("Immediate out of range");
++    return SDValue();
++  }
++  case Intrinsic::loongarch_lsx_vsubi_bu:
++  case Intrinsic::loongarch_lsx_vsubi_hu:
++  case Intrinsic::loongarch_lsx_vsubi_wu:
++  case Intrinsic::loongarch_lsx_vsubi_du:
++    return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
++                       lowerLSXSplatImm(Op, 2, DAG));
++  case Intrinsic::loongarch_lsx_vshuf_h:
++  case Intrinsic::loongarch_lsx_vshuf_w:
++  case Intrinsic::loongarch_lsx_vshuf_d:
++  case Intrinsic::loongarch_lasx_xvshuf_h:
++  case Intrinsic::loongarch_lasx_xvshuf_w:
++  case Intrinsic::loongarch_lasx_xvshuf_d:
++    return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::loongarch_lsx_vxor_v:
++  case Intrinsic::loongarch_lasx_xvxor_v:
++    return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
++                       Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vrotr_b:
++  case Intrinsic::loongarch_lsx_vrotr_h:
++  case Intrinsic::loongarch_lsx_vrotr_w:
++  case Intrinsic::loongarch_lsx_vrotr_d:
++    return DAG.getNode(LoongArchISD::VROR, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::loongarch_lsx_vrotri_b:
++  case Intrinsic::loongarch_lsx_vrotri_h:
++  case Intrinsic::loongarch_lsx_vrotri_w:
++  case Intrinsic::loongarch_lsx_vrotri_d:
++    return DAG.getNode(LoongArchISD::VRORI, DL, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::thread_pointer: {
++    EVT PtrVT = getPointerTy(DAG.getDataLayout());
++    if (PtrVT ==  MVT::i64)
++      return DAG.getRegister(LoongArch::TP_64, MVT::i64);
++    return DAG.getRegister(LoongArch::TP, MVT::i32);
++  }
++  }
++}
++
++SDValue
++LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
++  switch (Intr) {
++  default:
++    return SDValue();
++  case Intrinsic::loongarch_lsx_vld:
++    return lowerLSXLoadIntr(Op, DAG, Intr, Subtarget);
++  case Intrinsic::loongarch_lasx_xvld:
++    return lowerLASXLoadIntr(Op, DAG, Intr, Subtarget);
++  case Intrinsic::loongarch_lasx_xvldrepl_b:
++  case Intrinsic::loongarch_lasx_xvldrepl_h:
++  case Intrinsic::loongarch_lasx_xvldrepl_w:
++  case Intrinsic::loongarch_lasx_xvldrepl_d:
++    return lowerLASXVLDRIntr(Op, DAG, Intr, Subtarget);
++  case Intrinsic::loongarch_lsx_vldrepl_b:
++  case Intrinsic::loongarch_lsx_vldrepl_h:
++  case Intrinsic::loongarch_lsx_vldrepl_w:
++  case Intrinsic::loongarch_lsx_vldrepl_d:
++    return lowerLSXVLDRIntr(Op, DAG, Intr, Subtarget);
++  }
++}
++
++SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
++                                                     SelectionDAG &DAG) const {
++  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
++  switch (Intr) {
++  default:
++    return SDValue();
++  case Intrinsic::loongarch_lsx_vst:
++    return lowerLSXStoreIntr(Op, DAG, Intr, Subtarget);
++  case Intrinsic::loongarch_lasx_xvst:
++    return lowerLASXStoreIntr(Op, DAG, Intr, Subtarget);
++  }
++}
++
++// Lower ISD::EXTRACT_VECTOR_ELT into LoongArchISD::VEXTRACT_SEXT_ELT.
++//
++// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
++// choose to sign-extend but we could have equally chosen zero-extend. The
++// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
++// result into this node later (possibly changing it to a zero-extend in the
++// process).
++SDValue
++LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
++                                                 SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  EVT ResTy = Op->getValueType(0);
++  SDValue Op0 = Op->getOperand(0);
++  EVT VecTy = Op0->getValueType(0);
++
++  if (!VecTy.is128BitVector() && !VecTy.is256BitVector())
++    return SDValue();
++
++  if (ResTy.isInteger()) {
++    SDValue Op1 = Op->getOperand(1);
++    EVT EltTy = VecTy.getVectorElementType();
++    if (VecTy.is128BitVector())
++      return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
++                         DAG.getValueType(EltTy));
++
++    ConstantSDNode *cn = dyn_cast<ConstantSDNode>(Op1);
++    if (!cn)
++      return SDValue();
++
++    if (EltTy == MVT::i32 || EltTy == MVT::i64)
++      return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
++                         DAG.getValueType(EltTy));
++  }
++
++  return SDValue();
++}
++
++SDValue
++LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
++                                                SelectionDAG &DAG) const {
++
++  MVT VT = Op.getSimpleValueType();
++  MVT EltVT = VT.getVectorElementType();
++
++  SDLoc DL(Op);
++  SDValue Op0 = Op.getOperand(0);
++  SDValue Op1 = Op.getOperand(1);
++  SDValue Op2 = Op.getOperand(2);
++
++  if (!EltVT.isInteger())
++    return Op;
++
++  if (!isa<ConstantSDNode>(Op2)) {
++    if (EltVT == MVT::i8 || EltVT == MVT::i16) {
++      return Op; // ==> pseudo
++      // use stack
++      return SDValue();
++    } else {
++      return Op;
++    }
++  }
++
++  if (VT.is128BitVector())
++    return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2);
++
++  if (VT.is256BitVector()) {
++
++    if (EltVT == MVT::i32 || EltVT == MVT::i64)
++      return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2);
++
++    return Op;
++  }
++
++  return SDValue();
++}
++
++// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
++// backend.
++//
++// Lowers according to the following rules:
++// - Constant splats are legal as-is as long as the SplatBitSize is a power of
++//   2 less than or equal to 64 and the value fits into a signed 10-bit
++//   immediate
++// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
++//   is a power of 2 less than or equal to 64 and the value does not fit into a
++//   signed 10-bit immediate
++// - Non-constant splats are legal as-is.
++// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
++// - All others are illegal and must be expanded.
++SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
++                                                   SelectionDAG &DAG) const {
++  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
++  EVT ResTy = Op->getValueType(0);
++  SDLoc DL(Op);
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  if ((!Subtarget.hasLSX() || !ResTy.is128BitVector()) &&
++      (!Subtarget.hasLASX() || !ResTy.is256BitVector()))
++    return SDValue();
++
++  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
++                            8) &&
++      SplatBitSize <= 64) {
++    // We can only cope with 8, 16, 32, or 64-bit elements
++    if ((ResTy.is128BitVector() && SplatBitSize != 8 && SplatBitSize != 16 &&
++         SplatBitSize != 32 && SplatBitSize != 64) ||
++        (ResTy.is256BitVector() && SplatBitSize != 8 && SplatBitSize != 16 &&
++         SplatBitSize != 32 && SplatBitSize != 64))
++      return SDValue();
++
++    // If the value isn't an integer type we will have to bitcast
++    // from an integer type first. Also, if there are any undefs, we must
++    // lower them to defined values first.
++    if (ResTy.isInteger() && !HasAnyUndefs)
++      return Op;
++
++    EVT ViaVecTy;
++
++    if ((ResTy.is128BitVector() &&
++         !isLSXBySplatBitSize(SplatBitSize, ViaVecTy)) ||
++        (ResTy.is256BitVector() &&
++         !isLASXBySplatBitSize(SplatBitSize, ViaVecTy)))
++      return SDValue();
++
++    // SelectionDAG::getConstant will promote SplatValue appropriately.
++    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
++
++    // Bitcast to the type we originally wanted
++    if (ViaVecTy != ResTy)
++      Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
++
++    return Result;
++  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
++    return Op;
++  else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
++    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
++    // The resulting code is the same length as the expansion, but it doesn't
++    // use memory operations
++    EVT ResTy = Node->getValueType(0);
++
++    assert(ResTy.isVector());
++
++    unsigned NumElts = ResTy.getVectorNumElements();
++    SDValue Vector = DAG.getUNDEF(ResTy);
++    for (unsigned i = 0; i < NumElts; ++i) {
++      Vector =
++          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
++                      Node->getOperand(i), DAG.getConstant(i, DL, MVT::i32));
++    }
++    return Vector;
++  }
++
++  return SDValue();
++}
++
++SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
++                                                 SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  EVT ResTy = Op->getValueType(0);
++  Op = LowerSUINT_TO_FP(ISD::ZERO_EXTEND_VECTOR_INREG, Op, DAG);
++  if (!ResTy.isVector())
++    return Op;
++  return DAG.getNode(ISD::UINT_TO_FP, DL, ResTy, Op);
++}
++
++SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
++                                                 SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  EVT ResTy = Op->getValueType(0);
++  Op = LowerSUINT_TO_FP(ISD::SIGN_EXTEND_VECTOR_INREG, Op, DAG);
++  if (!ResTy.isVector())
++    return Op;
++  return DAG.getNode(ISD::SINT_TO_FP, DL, ResTy, Op);
++}
++
++SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op,
++                                                 SelectionDAG &DAG) const {
++  if (!Op->getValueType(0).isVector())
++    return SDValue();
++  return LowerFP_TO_SUINT(ISD::FP_TO_UINT, ISD::ZERO_EXTEND_VECTOR_INREG, Op,
++                          DAG);
++}
++
++SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
++                                                 SelectionDAG &DAG) const {
++  if (Op->getValueType(0).isVector())
++    return LowerFP_TO_SUINT(ISD::FP_TO_SINT, ISD::SIGN_EXTEND_VECTOR_INREG, Op,
++                            DAG);
++
++  if (Op.getValueSizeInBits() > 32 &&
++      (Subtarget.hasBasicF() && !Subtarget.hasBasicD()))
++    return SDValue();
++
++  EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
++  SDValue Trunc =
++      DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0));
++  return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc);
++}
++
++static bool checkUndef(ArrayRef<int> Mask, int Lo, int Hi) {
++
++  for (int i = Lo, end = Hi; i != end; i++, Hi++)
++    if (!((Mask[i] == -1) || (Mask[i] == Hi)))
++      return false;
++  return true;
++}
++
++static bool CheckRev(ArrayRef<int> Mask) {
++
++  int Num = Mask.size() - 1;
++  for (long unsigned int i = 0; i < Mask.size(); i++, Num--)
++    if (Mask[i] != Num)
++      return false;
++  return true;
++}
++
++static bool checkHalf(ArrayRef<int> Mask, int Lo, int Hi, int base) {
++
++  for (int i = Lo; i < Hi; i++)
++    if (Mask[i] != (base + i))
++      return false;
++  return true;
++}
++
++static SDValue lowerHalfHalf(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2,
++                             ArrayRef<int> Mask, SelectionDAG &DAG) {
++
++  int Num = VT.getVectorNumElements();
++  int HalfNum = Num / 2;
++
++  if (Op1->isUndef() || Op2->isUndef() || Mask.size() > (long unsigned int)Num)
++    return SDValue();
++
++  if (checkHalf(Mask, HalfNum, Num, Num) && checkHalf(Mask, 0, HalfNum, 0)) {
++    return SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1,
++                                      DAG.getTargetConstant(48, DL, MVT::i32)),
++                   0);
++  }
++
++  return SDValue();
++}
++
++static bool checkHalfUndef(ArrayRef<int> Mask, int Lo, int Hi) {
++
++  for (int i = Lo; i < Hi; i++)
++    if (Mask[i] != -1)
++      return false;
++  return true;
++}
++
++// Lowering vectors with half undef data,
++// use EXTRACT_SUBVECTOR and INSERT_SUBVECTOR instead of VECTOR_SHUFFLE
++static SDValue lowerHalfUndef(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2,
++                              ArrayRef<int> Mask, SelectionDAG &DAG) {
++
++  int Num = VT.getVectorNumElements();
++  int HalfNum = Num / 2;
++  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNum);
++  MVT VT1 = Op1.getSimpleValueType();
++  SDValue Op;
++
++  bool check1 = Op1->isUndef() && (!Op2->isUndef());
++  bool check2 = Op2->isUndef() && (!Op1->isUndef());
++
++  if ((check1 || check2) && (VT1 == VT)) {
++    if (check1) {
++      Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op2);
++    } else if (check2) {
++      Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op1);
++    }
++
++    if (VT == MVT::v32i8 && CheckRev(Mask)) {
++      SDValue Vector;
++      SDValue Rev[4];
++      SDValue Ext[4];
++      for (int i = 0; i < 4; i++) {
++        Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
++                             DAG.getConstant(i, DL, MVT::i32));
++        Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]);
++      }
++
++      Vector =
++          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, DAG.getUNDEF(VT),
++                      Rev[3], DAG.getConstant(3, DL, MVT::i32));
++      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector,
++                           Rev[2], DAG.getConstant(2, DL, MVT::i32));
++      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector,
++                           Rev[1], DAG.getConstant(1, DL, MVT::i32));
++      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector,
++                           Rev[0], DAG.getConstant(0, DL, MVT::i32));
++
++      Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, Vector);
++
++      return Vector;
++    }
++  }
++
++  if (checkHalfUndef(Mask, HalfNum, Num) && checkUndef(Mask, 0, HalfNum)) {
++    SDValue High = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1,
++                               DAG.getConstant(HalfNum, DL, MVT::i64));
++    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), High,
++                       DAG.getConstant(0, DL, MVT::i64));
++  }
++
++  if (checkHalfUndef(Mask, HalfNum, Num) && (VT == MVT::v8i32) &&
++      (Mask[0] == 0) && (Mask[1] == 1) && (Mask[2] == (Num + 2)) &&
++      (Mask[3] == (Num + 3))) {
++
++    SDValue Val1 =
++        SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1,
++                                   DAG.getTargetConstant(32, DL, MVT::i32)),
++                0);
++
++    SDValue Val2 =
++        SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1,
++                                   DAG.getTargetConstant(12, DL, MVT::i32)),
++                0);
++
++    SDValue Val3 = SDValue(
++        DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT),
++                           DAG.getTargetConstant(2, DL, MVT::i32)),
++        0);
++    return Val3;
++  }
++
++  if (checkHalfUndef(Mask, 0, HalfNum) && checkUndef(Mask, HalfNum, Num)) {
++    SDValue Low = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1,
++                              DAG.getConstant(0, DL, MVT::i32));
++    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Low,
++                       DAG.getConstant(HalfNum, DL, MVT::i32));
++  }
++
++  if (checkHalfUndef(Mask, 0, HalfNum) && (VT == MVT::v8i32) &&
++      (Mask[HalfNum] == HalfNum) && (Mask[HalfNum + 1] == (HalfNum + 1)) &&
++      (Mask[HalfNum + 2] == (2 * Num - 2)) &&
++      (Mask[HalfNum + 3] == (2 * Num - 1))) {
++
++    SDValue Val1 =
++        SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1,
++                                   DAG.getTargetConstant(49, DL, MVT::i32)),
++                0);
++
++    SDValue Val2 =
++        SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1,
++                                   DAG.getTargetConstant(12, DL, MVT::i32)),
++                0);
++
++    SDValue Val3 = SDValue(
++        DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT),
++                           DAG.getTargetConstant(32, DL, MVT::i32)),
++        0);
++    return Val3;
++  }
++
++  if ((VT == MVT::v8i32) || (VT == MVT::v4i64)) {
++    int def = 0;
++    int j = 0;
++    int ext[3];
++    int ins[3];
++    bool useOp1[3] = {true, true, true};
++    bool checkdef = true;
++
++    for (int i = 0; i < Num; i++) {
++      if (def > 2) {
++        checkdef = false;
++        break;
++      }
++      if (Mask[i] != -1) {
++        def++;
++        ins[j] = i;
++        if (Mask[i] >= Num) {
++          ext[j] = Mask[i] - Num;
++          useOp1[j] = false;
++        } else {
++          ext[j] = Mask[i];
++        }
++        j++;
++      }
++    }
++
++    if (checkdef) {
++      SDValue Vector = DAG.getUNDEF(VT);
++      EVT EltTy = VT.getVectorElementType();
++      SDValue Ext[2];
++
++      if (check1 || check2) {
++        for (int i = 0; i < def; i++) {
++          if (check1) {
++            Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2,
++                                 DAG.getConstant(ext[i], DL, MVT::i32));
++            Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i],
++                                 DAG.getConstant(ins[i], DL, MVT::i32));
++          } else if (check2) {
++            Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1,
++                                 DAG.getConstant(ext[i], DL, MVT::i32));
++            Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i],
++                                 DAG.getConstant(ins[i], DL, MVT::i32));
++          }
++        }
++        return Vector;
++      } else {
++        for (int i = 0; i < def; i++) {
++          if (!useOp1[i]) {
++            Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2,
++                                 DAG.getConstant(ext[i], DL, MVT::i32));
++            Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i],
++                                 DAG.getConstant(ins[i], DL, MVT::i32));
++          } else {
++            Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1,
++                                 DAG.getConstant(ext[i], DL, MVT::i32));
++            Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i],
++                                 DAG.getConstant(ins[i], DL, MVT::i32));
++          }
++        }
++        return Vector;
++      }
++    }
++  }
++
++  return SDValue();
++}
++
++static SDValue lowerHalfUndef_LSX(const SDLoc &DL, EVT ResTy, MVT VT,
++                                  SDValue Op1, SDValue Op2, ArrayRef<int> Mask,
++                                  SelectionDAG &DAG) {
++
++  MVT VT1 = Op1.getSimpleValueType();
++
++  bool check1 = Op1->isUndef() && (!Op2->isUndef());
++  bool check2 = Op2->isUndef() && (!Op1->isUndef());
++
++  if ((check1 || check2) && (VT1 == VT)) {
++    SDValue Op;
++
++    if (VT == MVT::v16i8 && CheckRev(Mask)) {
++
++      if (check1) {
++        Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op2);
++      } else if (check2) {
++        Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op1);
++      }
++
++      SDValue Vector;
++      SDValue Rev[2];
++      SDValue Ext[2];
++      for (int i = 0; i < 2; i++) {
++        Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
++                             DAG.getConstant(i, DL, MVT::i32));
++        Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]);
++      }
++
++      Vector =
++          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, DAG.getUNDEF(VT),
++                      Rev[1], DAG.getConstant(1, DL, MVT::i32));
++      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Vector,
++                           Rev[0], DAG.getConstant(0, DL, MVT::i32));
++
++      Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Vector);
++
++      return Vector;
++    }
++  }
++
++  return SDValue();
++}
++
++// Use SDNode of LoongArchINSVE instead of
++// a series of EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT
++static SDValue lowerVECTOR_SHUFFLE_INSVE(const SDLoc &DL, MVT VT, EVT ResTy,
++                                         SDValue Op1, SDValue Op2,
++                                         ArrayRef<int> Mask,
++                                         SelectionDAG &DAG) {
++
++  int Num = VT.getVectorNumElements();
++  if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8)
++    return SDValue();
++
++  int CheckOne = 0;
++  int CheckOther = 0;
++  int Idx;
++
++  for (int i = 0; i < Num; i++) {
++    if ((Mask[i] == i) || (Mask[i] == -1)) {
++      CheckOther++;
++    } else if (Mask[i] == Num) {
++      CheckOne++;
++      Idx = i;
++    } else
++      return SDValue();
++  }
++
++  if ((CheckOne != 1) || (CheckOther != (Num - 1)))
++    return SDValue();
++  else {
++    return DAG.getNode(LoongArchISD::INSVE, DL, ResTy, Op1, Op2,
++                       DAG.getConstant(Idx, DL, MVT::i32));
++  }
++
++  return SDValue();
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVPICKVE(const SDLoc &DL, MVT VT, EVT ResTy,
++                                            SDValue Op1, SDValue Op2,
++                                            ArrayRef<int> Mask,
++                                            SelectionDAG &DAG) {
++
++  int Num = VT.getVectorNumElements();
++  if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8 ||
++      (!ISD::isBuildVectorAllZeros(Op1.getNode())))
++    return SDValue();
++
++  bool CheckV = true;
++
++  if ((Mask[0] < Num) || (Mask[0] > (2 * Num - 1)))
++    CheckV = false;
++
++  for (int i = 1; i < Num; i++) {
++    if (Mask[i] != 0) {
++      CheckV = false;
++      break;
++    }
++  }
++
++  if (!CheckV)
++    return SDValue();
++  else {
++    return DAG.getNode(LoongArchISD::XVPICKVE, DL, ResTy, Op1, Op2,
++                       DAG.getConstant(Mask[0] - Num, DL, MVT::i32));
++  }
++
++  return SDValue();
++}
++
++static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, MVT VT, EVT ResTy,
++                                          SDValue Op1, SDValue Op2,
++                                          ArrayRef<int> Mask,
++                                          SelectionDAG &DAG) {
++
++  if (VT == MVT::v4i64) {
++    int Num = VT.getVectorNumElements();
++
++    bool CheckV = true;
++    for (int i = 0; i < Num; i++) {
++      if (Mask[i] != (i * 2)) {
++        CheckV = false;
++        break;
++      }
++    }
++
++    if (!CheckV)
++      return SDValue();
++    else {
++      SDValue Res = DAG.getNode(LoongArchISD::XVSHUF4I, DL, ResTy, Op1, Op2,
++                                DAG.getConstant(8, DL, MVT::i32));
++      return DAG.getNode(LoongArchISD::XVPERMI, DL, ResTy, Res,
++                         DAG.getConstant(0xD8, DL, MVT::i32));
++    }
++  } else
++    return SDValue();
++}
++
++// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
++// indices in the shuffle.
++SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
++                                                     SelectionDAG &DAG) const {
++  ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
++  EVT ResTy = Op->getValueType(0);
++  ArrayRef<int> Mask = Node->getMask();
++  SDValue Op1 = Op.getOperand(0);
++  SDValue Op2 = Op.getOperand(1);
++  MVT VT = Op.getSimpleValueType();
++  SDLoc DL(Op);
++
++  if (ResTy.is128BitVector()) {
++
++    int ResTyNumElts = ResTy.getVectorNumElements();
++    SmallVector<int, 16> Indices;
++
++    for (int i = 0; i < ResTyNumElts; ++i)
++      Indices.push_back(Node->getMaskElt(i));
++
++    SDValue Result;
++    if (isVECTOR_SHUFFLE_VREPLVEI(Op, ResTy, Indices, DAG))
++      return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
++    if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_VILVH(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_VILVL(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerHalfUndef_LSX(DL, ResTy, VT, Op1, Op2, Mask, DAG)))
++      return Result;
++    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
++
++  } else if (ResTy.is256BitVector()) {
++    int ResTyNumElts = ResTy.getVectorNumElements();
++    SmallVector<int, 32> Indices;
++
++    for (int i = 0; i < ResTyNumElts; ++i)
++      Indices.push_back(Node->getMaskElt(i));
++
++    SDValue Result;
++    if ((Result = lowerHalfHalf(DL, VT, Op1, Op2, Mask, DAG)))
++      return Result;
++    if ((Result = lowerHalfUndef(DL, VT, Op1, Op2, Mask, DAG)))
++      return Result;
++    if (isVECTOR_SHUFFLE_XVREPLVEI(Op, ResTy, Indices, DAG))
++      return SDValue();
++    if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XVILVH(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XVILVL(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result = lowerVECTOR_SHUFFLE_XSHF(Op, ResTy, Indices, DAG)))
++      return Result;
++    if ((Result =
++             lowerVECTOR_SHUFFLE_INSVE(DL, VT, ResTy, Op1, Op2, Mask, DAG)))
++      return Result;
++    if ((Result =
++             lowerVECTOR_SHUFFLE_XVPICKVE(DL, VT, ResTy, Op1, Op2, Mask, DAG)))
++      return Result;
++    if ((Result =
++             lowerVECTOR_SHUFFLE_XVSHUF(DL, VT, ResTy, Op1, Op2, Mask, DAG)))
++      return Result;
++  }
++
++  return SDValue();
++}
++
++SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
++                                              SelectionDAG &DAG) const {
++
++  // Return a fixed StackObject with offset 0 which points to the old stack
++  // pointer.
++  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
++  EVT ValTy = Op->getValueType(0);
++  int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false);
++  return DAG.getFrameIndex(FI, ValTy);
++}
++
++// Check whether the tail call optimization conditions are met
++bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
++    const CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
++    unsigned NextStackOffset, const LoongArchFunctionInfo &FI) const {
++
++  auto CalleeCC = CLI.CallConv;
++  auto IsVarArg = CLI.IsVarArg;
++  auto &Outs = CLI.Outs;
++  auto &Caller = MF.getFunction();
++  auto CallerCC = Caller.getCallingConv();
++
++  if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
++    return false;
++
++  if (Caller.hasFnAttribute("interrupt"))
++    return false;
++
++  if (IsVarArg)
++    return false;
++
++  if (getTargetMachine().getCodeModel() == CodeModel::Large)
++    return false;
++
++  if (getTargetMachine().getRelocationModel() == Reloc::Static)
++    return false;
++
++  // Do not tail call optimize if the stack is used to pass parameters.
++  if (CCInfo.getNextStackOffset() != 0)
++    return false;
++
++  // Do not tail call optimize functions with byval parameters.
++  for (auto &Arg : Outs)
++    if (Arg.Flags.isByVal())
++      return false;
++
++  // Do not tail call optimize if either caller or callee uses structret
++  // semantics.
++  auto IsCallerStructRet = Caller.hasStructRetAttr();
++  auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
++  if (IsCallerStructRet || IsCalleeStructRet)
++    return false;
++
++  // The callee has to preserve all registers the caller needs to preserve.
++  const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
++  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
++  if (CalleeCC != CallerCC) {
++    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
++    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
++      return false;
++  }
++
++  // Return false if either the callee or caller has a byval argument.
++  if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
++    return false;
++
++  // Return true if the callee's argument area is no larger than the
++  // caller's.
++  return NextStackOffset <= FI.getIncomingArgSize();
++}
++
++//===----------------------------------------------------------------------===//
++//                      Calling Convention Implementation
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// TODO: Implement a generic logic using tblgen that can support this.
++// LoongArch 32-bit ABI rules:
++// ---
++// i32 - Passed in A0, A1, A2, A3 and stack
++// f32 - Only passed in f32 registers if no int reg has been used yet to hold
++//       an argument. Otherwise, passed in A1, A2, A3 and stack.
++// f64 - Only passed in two aliased f32 registers if no int reg has been used
++//       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
++//       not used, it must be shadowed. If only A3 is available, shadow it and
++//       go to stack.
++// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
++// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
++//         with the remainder spilled to the stack.
++// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
++//         spilling the remainder to the stack.
++//
++//  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
++//===----------------------------------------------------------------------===//
++
++static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT,
++                       CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
++                       CCState &State, ArrayRef<MCPhysReg> F64Regs) {
++  static const MCPhysReg IntRegs[] = { LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 };
++
++  const LoongArchCCState * LoongArchState = static_cast<LoongArchCCState *>(&State);
++
++  static const MCPhysReg F32Regs[] = { LoongArch::F12, LoongArch::F14 };
++
++  static const MCPhysReg FloatVectorIntRegs[] = { LoongArch::A0, LoongArch::A2 };
++
++  // Do not process byval args here.
++  if (ArgFlags.isByVal())
++    return true;
++
++
++  // Promote i8 and i16
++  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
++    LocVT = MVT::i32;
++    if (ArgFlags.isSExt())
++      LocInfo = CCValAssign::SExt;
++    else if (ArgFlags.isZExt())
++      LocInfo = CCValAssign::ZExt;
++    else
++      LocInfo = CCValAssign::AExt;
++  }
++
++  unsigned Reg;
++
++  // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
++  // is true: function is vararg, argument is 3rd or higher, there is previous
++  // argument which is not f32 or f64.
++  bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 ||
++                                State.getFirstUnallocated(F32Regs) != ValNo;
++  Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
++  bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8));
++  bool isVectorFloat = LoongArchState->WasOriginalArgVectorFloat(ValNo);
++
++  // The LoongArch vector ABI for floats passes them in a pair of registers
++  if (ValVT == MVT::i32 && isVectorFloat) {
++    // This is the start of an vector that was scalarized into an unknown number
++    // of components. It doesn't matter how many there are. Allocate one of the
++    // notional 8 byte aligned registers which map onto the argument stack, and
++    // shadow the register lost to alignment requirements.
++    if (ArgFlags.isSplit()) {
++      Reg = State.AllocateReg(FloatVectorIntRegs);
++      if (Reg == LoongArch::A2)
++        State.AllocateReg(LoongArch::A1);
++      else if (Reg == 0)
++        State.AllocateReg(LoongArch::A3);
++    } else {
++      // If we're an intermediate component of the split, we can just attempt to
++      // allocate a register directly.
++      Reg = State.AllocateReg(IntRegs);
++    }
++  } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
++    Reg = State.AllocateReg(IntRegs);
++    // If this is the first part of an i64 arg,
++    // the allocated register must be either A0 or A2.
++    if (isI64 && (Reg == LoongArch::A1 || Reg == LoongArch::A3))
++      Reg = State.AllocateReg(IntRegs);
++    LocVT = MVT::i32;
++  } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
++    // Allocate int register and shadow next int register. If first
++    // available register is LoongArch::A1 or LoongArch::A3, shadow it too.
++    Reg = State.AllocateReg(IntRegs);
++    if (Reg == LoongArch::A1 || Reg == LoongArch::A3)
++      Reg = State.AllocateReg(IntRegs);
++    State.AllocateReg(IntRegs);
++    LocVT = MVT::i32;
++  } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
++    // we are guaranteed to find an available float register
++    if (ValVT == MVT::f32) {
++      Reg = State.AllocateReg(F32Regs);
++      // Shadow int register
++      State.AllocateReg(IntRegs);
++    } else {
++      Reg = State.AllocateReg(F64Regs);
++      // Shadow int registers
++      unsigned Reg2 = State.AllocateReg(IntRegs);
++      if (Reg2 == LoongArch::A1 || Reg2 == LoongArch::A3)
++        State.AllocateReg(IntRegs);
++      State.AllocateReg(IntRegs);
++    }
++  } else
++    llvm_unreachable("Cannot handle this ValVT.");
++
++  if (!Reg) {
++    unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign);
++    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
++  } else
++    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
++
++  return false;
++}
++
++static bool CC_LoongArchILP32_FP32(unsigned ValNo, MVT ValVT,
++                            MVT LocVT, CCValAssign::LocInfo LocInfo,
++                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
++  static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \
++                                      LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \
++                                      LoongArch::F6_64, LoongArch::F7_64 };
++
++  return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
++}
++
++static bool CC_LoongArchILP32_FP64(unsigned ValNo, MVT ValVT,
++                            MVT LocVT, CCValAssign::LocInfo LocInfo,
++                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
++  static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \
++                                      LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \
++                                      LoongArch::F6_64, LoongArch::F7_64 };
++
++  return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
++}
++
++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT,
++                            MVT LocVT, CCValAssign::LocInfo LocInfo,
++                            ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED;
++
++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT,
++                            MVT LocVT, CCValAssign::LocInfo LocInfo,
++                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
++
++  static const MCPhysReg ArgRegs[8] = {
++      LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64,
++      LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64};
++
++  unsigned Idx = State.getFirstUnallocated(ArgRegs);
++  // Skip 'odd' register if necessary.
++  if (!ArgFlags.isSplitEnd() && Idx != array_lengthof(ArgRegs) && Idx % 2 == 1)
++    State.AllocateReg(ArgRegs);
++  return true;
++}
++
++static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT,
++                       CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
++                       CCState &State) LLVM_ATTRIBUTE_UNUSED;
++
++#include "LoongArchGenCallingConv.inc"
++
++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForCall() const{
++   return CC_LoongArch;
++ }
++
++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForReturn() const{
++   return RetCC_LoongArch;
++ }
++
++//===----------------------------------------------------------------------===//
++//                  Call Calling Convention Implementation
++//===----------------------------------------------------------------------===//
++SDValue LoongArchTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
++                                           SDValue Chain, SDValue Arg,
++                                           const SDLoc &DL, bool IsTailCall,
++                                           SelectionDAG &DAG) const {
++  if (!IsTailCall) {
++    SDValue PtrOff =
++        DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
++                    DAG.getIntPtrConstant(Offset, DL));
++    return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo());
++  }
++
++  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
++  int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
++  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
++  return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
++                      /* Alignment = */ 0, MachineMemOperand::MOVolatile);
++}
++
++void LoongArchTargetLowering::getOpndList(
++    SmallVectorImpl<SDValue> &Ops,
++    std::deque<std::pair<unsigned, SDValue>> &RegsToPass, bool IsPICCall,
++    bool GlobalOrExternal, bool IsCallReloc, CallLoweringInfo &CLI,
++    SDValue Callee, SDValue Chain, bool IsTailCall) const {
++  // Build a sequence of copy-to-reg nodes chained together with token
++  // chain and flag operands which copy the outgoing args into registers.
++  // The InFlag in necessary since all emitted instructions must be
++  // stuck together.
++  SDValue InFlag;
++
++  Ops.push_back(Callee);
++
++  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
++    Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
++                                 RegsToPass[i].second, InFlag);
++    InFlag = Chain.getValue(1);
++  }
++
++  // Add argument registers to the end of the list so that they are
++  // known live into the call.
++  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
++    Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
++                                      RegsToPass[i].second.getValueType()));
++
++  if (!IsTailCall) {
++    // Add a register mask operand representing the call-preserved registers.
++    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
++    const uint32_t *Mask =
++        TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv);
++    assert(Mask && "Missing call preserved mask for calling convention");
++    Ops.push_back(CLI.DAG.getRegisterMask(Mask));
++  }
++
++  if (InFlag.getNode())
++    Ops.push_back(InFlag);
++}
++
++void LoongArchTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
++                                                       SDNode *Node) const {
++  switch (MI.getOpcode()) {
++    default:
++      return;
++  }
++}
++
++/// LowerCall - functions arguments are copied from virtual regs to
++/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
++SDValue
++LoongArchTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
++                              SmallVectorImpl<SDValue> &InVals) const {
++  SelectionDAG &DAG                     = CLI.DAG;
++  SDLoc DL                              = CLI.DL;
++  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
++  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
++  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
++  SDValue Chain                         = CLI.Chain;
++  SDValue Callee                        = CLI.Callee;
++  bool &IsTailCall                      = CLI.IsTailCall;
++  CallingConv::ID CallConv              = CLI.CallConv;
++  bool IsVarArg                         = CLI.IsVarArg;
++
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
++  bool IsPIC = isPositionIndependent();
++
++  // Analyze operands of the call, assigning locations to each operand.
++  SmallVector<CCValAssign, 16> ArgLocs;
++  LoongArchCCState CCInfo(
++      CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(),
++      LoongArchCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget));
++
++  const ExternalSymbolSDNode *ES =
++      dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
++
++  // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which
++  // is during the lowering of a call with a byval argument which produces
++  // a call to memcpy. For the ILP32D/ILP32F/ILP32S case, this causes the caller
++  // to allocate stack space for the reserved argument area for the callee, then
++  // recursively again for the memcpy call. In the NEWABI case, this doesn't
++  // occur as those ABIs mandate that the callee allocates the reserved argument
++  // area. We do still produce nested CALLSEQ_START..CALLSEQ_END with zero space
++  // though.
++  //
++  // If the callee has a byval argument and memcpy is used, we are mandated
++  // to already have produced a reserved argument area for the callee for
++  // ILP32D/ILP32F/ILP32S. Therefore, the reserved argument area can be reused
++  // for both calls.
++  //
++  // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START
++  // present, as we have yet to hook that node onto the chain.
++  //
++  // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this
++  // case. GCC does a similar trick, in that wherever possible, it calculates
++  // the maximum out going argument area (including the reserved area), and
++  // preallocates the stack space on entrance to the caller.
++  //
++  // FIXME: We should do the same for efficiency and space.
++
++  bool MemcpyInByVal = ES &&
++                       StringRef(ES->getSymbol()) == StringRef("memcpy") &&
++                       Chain.getOpcode() == ISD::CALLSEQ_START;
++
++  CCInfo.AnalyzeCallOperands(Outs, CC_LoongArch, CLI.getArgs(),
++                             ES ? ES->getSymbol() : nullptr);
++
++  // Get a count of how many bytes are to be pushed on the stack.
++  unsigned NextStackOffset = CCInfo.getNextStackOffset();
++
++  // Check if it's really possible to do a tail call. Restrict it to functions
++  // that are part of this compilation unit.
++  if (IsTailCall) {
++    IsTailCall = isEligibleForTailCallOptimization(
++        CCInfo, CLI, MF, NextStackOffset, *MF.getInfo<LoongArchFunctionInfo>());
++    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
++      if (G->getGlobal()->hasExternalWeakLinkage())
++        IsTailCall = false;
++    }
++  }
++  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
++    report_fatal_error("failed to perform tail call elimination on a call "
++                       "site marked musttail");
++
++  if (IsTailCall)
++    ++NumTailCalls;
++
++  // Chain is the output chain of the last Load/Store or CopyToReg node.
++  // ByValChain is the output chain of the last Memcpy node created for copying
++  // byval arguments to the stack.
++  unsigned StackAlignment = TFL->getStackAlignment();
++  NextStackOffset = alignTo(NextStackOffset, StackAlignment);
++  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true);
++
++  if (!(IsTailCall || MemcpyInByVal))
++    Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL);
++
++  SDValue StackPtr = DAG.getCopyFromReg(
++      Chain, DL, Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP,
++      getPointerTy(DAG.getDataLayout()));
++
++  std::deque<std::pair<unsigned, SDValue>> RegsToPass;
++  SmallVector<SDValue, 8> MemOpChains;
++
++  CCInfo.rewindByValRegsInfo();
++
++  // Walk the register/memloc assignments, inserting copies/loads.
++  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
++    SDValue Arg = OutVals[i];
++    CCValAssign &VA = ArgLocs[i];
++    MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
++    ISD::ArgFlagsTy Flags = Outs[i].Flags;
++    bool UseUpperBits = false;
++
++    // ByVal Arg.
++    if (Flags.isByVal()) {
++      unsigned FirstByValReg, LastByValReg;
++      unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
++      CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
++
++      assert(Flags.getByValSize() &&
++             "ByVal args of size 0 should have been ignored by front-end.");
++      assert(ByValIdx < CCInfo.getInRegsParamsCount());
++      assert(!IsTailCall &&
++             "Do not tail-call optimize if there is a byval argument.");
++      passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
++                   FirstByValReg, LastByValReg, Flags,
++                   VA);
++      CCInfo.nextInRegsParam();
++      continue;
++    }
++
++    // Promote the value if needed.
++    switch (VA.getLocInfo()) {
++    default:
++      llvm_unreachable("Unknown loc info!");
++    case CCValAssign::Full:
++      if (VA.isRegLoc()) {
++        if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
++            (ValVT == MVT::f64 && LocVT == MVT::i64) ||
++            (ValVT == MVT::i64 && LocVT == MVT::f64))
++          Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
++      }
++      break;
++    case CCValAssign::BCvt:
++      Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
++      break;
++    case CCValAssign::SExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::SExt:
++      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
++      break;
++    case CCValAssign::ZExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::ZExt:
++      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
++      break;
++    case CCValAssign::AExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::AExt:
++      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
++      break;
++    }
++
++    if (UseUpperBits) {
++      unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
++      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
++      Arg = DAG.getNode(
++          ISD::SHL, DL, VA.getLocVT(), Arg,
++          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
++    }
++
++    // Arguments that can be passed on register must be kept at
++    // RegsToPass vector
++    if (VA.isRegLoc()) {
++      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
++      continue;
++    }
++
++    // Register can't get to this point...
++    assert(VA.isMemLoc());
++
++    // emit ISD::STORE whichs stores the
++    // parameter value to a stack Location
++    MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(),
++                                         Chain, Arg, DL, IsTailCall, DAG));
++  }
++
++  // Transform all store nodes into one single node because all store
++  // nodes are independent of each other.
++  if (!MemOpChains.empty())
++    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
++
++  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
++  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
++  // node so that legalize doesn't hack it.
++
++  bool GlobalOrExternal = false, IsCallReloc = false;
++
++  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
++    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL,
++                                        getPointerTy(DAG.getDataLayout()), 0,
++                                        LoongArchII::MO_NO_FLAG);
++    GlobalOrExternal = true;
++  }
++  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
++    const char *Sym = S->getSymbol();
++    Callee = DAG.getTargetExternalSymbol(
++        Sym, getPointerTy(DAG.getDataLayout()), LoongArchII::MO_NO_FLAG);
++
++    GlobalOrExternal = true;
++  }
++
++  SmallVector<SDValue, 8> Ops(1, Chain);
++  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
++
++  getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, IsCallReloc, CLI,
++              Callee, Chain, IsTailCall);
++
++  if (IsTailCall) {
++    MF.getFrameInfo().setHasTailCall();
++    return DAG.getNode(LoongArchISD::TailCall, DL, MVT::Other, Ops);
++  }
++
++  Chain = DAG.getNode(LoongArchISD::JmpLink, DL, NodeTys, Ops);
++  SDValue InFlag = Chain.getValue(1);
++
++  // Create the CALLSEQ_END node in the case of where it is not a call to
++  // memcpy.
++  if (!(MemcpyInByVal)) {
++    Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
++                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
++    InFlag = Chain.getValue(1);
++  }
++
++  // Handle result values, copying them out of physregs into vregs that we
++  // return.
++  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
++                         InVals, CLI);
++}
++
++/// LowerCallResult - Lower the result values of a call into the
++/// appropriate copies out of appropriate physical registers.
++SDValue LoongArchTargetLowering::LowerCallResult(
++    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
++    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
++    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
++    TargetLowering::CallLoweringInfo &CLI) const {
++  // Assign locations to each value returned by this call.
++  SmallVector<CCValAssign, 16> RVLocs;
++  LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
++                     *DAG.getContext());
++
++  const ExternalSymbolSDNode *ES =
++      dyn_cast_or_null<const ExternalSymbolSDNode>(CLI.Callee.getNode());
++  CCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch, CLI.RetTy,
++                           ES ? ES->getSymbol() : nullptr);
++
++  // Copy all of the result registers out of their specified physreg.
++  for (unsigned i = 0; i != RVLocs.size(); ++i) {
++    CCValAssign &VA = RVLocs[i];
++    assert(VA.isRegLoc() && "Can only return in registers!");
++
++    SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
++                                     RVLocs[i].getLocVT(), InFlag);
++    Chain = Val.getValue(1);
++    InFlag = Val.getValue(2);
++
++    if (VA.isUpperBitsInLoc()) {
++      unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits();
++      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
++      unsigned Shift =
++          VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
++      Val = DAG.getNode(
++          Shift, DL, VA.getLocVT(), Val,
++          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
++    }
++
++    switch (VA.getLocInfo()) {
++    default:
++      llvm_unreachable("Unknown loc info!");
++    case CCValAssign::Full:
++      break;
++    case CCValAssign::BCvt:
++      Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
++      break;
++    case CCValAssign::AExt:
++    case CCValAssign::AExtUpper:
++      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
++      break;
++    case CCValAssign::ZExt:
++    case CCValAssign::ZExtUpper:
++      Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
++                        DAG.getValueType(VA.getValVT()));
++      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
++      break;
++    case CCValAssign::SExt:
++    case CCValAssign::SExtUpper:
++      Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
++                        DAG.getValueType(VA.getValVT()));
++      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
++      break;
++    }
++
++    InVals.push_back(Val);
++  }
++
++  return Chain;
++}
++
++static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA,
++                                      EVT ArgVT, const SDLoc &DL,
++                                      SelectionDAG &DAG) {
++  MVT LocVT = VA.getLocVT();
++  EVT ValVT = VA.getValVT();
++
++  // Shift into the upper bits if necessary.
++  switch (VA.getLocInfo()) {
++  default:
++    break;
++  case CCValAssign::AExtUpper:
++  case CCValAssign::SExtUpper:
++  case CCValAssign::ZExtUpper: {
++    unsigned ValSizeInBits = ArgVT.getSizeInBits();
++    unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
++    unsigned Opcode =
++        VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
++    Val = DAG.getNode(
++        Opcode, DL, VA.getLocVT(), Val,
++        DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
++    break;
++  }
++  }
++
++  // If this is an value smaller than the argument slot size (32-bit for
++  // ILP32D/ILP32F/ILP32S, 64-bit for LP64D/LP64S/LP64F), it has been promoted
++  // in some way to the argument slot size. Extract the value and insert any
++  // appropriate assertions regarding sign/zero extension.
++  switch (VA.getLocInfo()) {
++  default:
++    llvm_unreachable("Unknown loc info!");
++  case CCValAssign::Full:
++    break;
++  case CCValAssign::AExtUpper:
++  case CCValAssign::AExt:
++    Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
++    break;
++  case CCValAssign::SExtUpper:
++  case CCValAssign::SExt: {
++    if ((ArgVT == MVT::i1) || (ArgVT == MVT::i8) || (ArgVT == MVT::i16)) {
++      SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32);
++      Val = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ValVT,
++                                       Val, SubReg),
++                    0);
++    } else {
++      Val =
++          DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT));
++      Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
++    }
++    break;
++  }
++  case CCValAssign::ZExtUpper:
++  case CCValAssign::ZExt:
++    Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT));
++    Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
++    break;
++  case CCValAssign::BCvt:
++    Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
++    break;
++  }
++
++  return Val;
++}
++
++//===----------------------------------------------------------------------===//
++//             Formal Arguments Calling Convention Implementation
++//===----------------------------------------------------------------------===//
++/// LowerFormalArguments - transform physical registers into virtual registers
++/// and generate load operations for arguments places on the stack.
++SDValue LoongArchTargetLowering::LowerFormalArguments(
++    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
++    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
++    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  LoongArchFI->setVarArgsFrameIndex(0);
++
++  // Used with vargs to acumulate store chains.
++  std::vector<SDValue> OutChains;
++
++  // Assign locations to all of the incoming arguments.
++  SmallVector<CCValAssign, 16> ArgLocs;
++  LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
++                          *DAG.getContext());
++  const Function &Func = DAG.getMachineFunction().getFunction();
++  Function::const_arg_iterator FuncArg = Func.arg_begin();
++
++  CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_FixedArg);
++  LoongArchFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
++                           CCInfo.getInRegsParamsCount() > 0);
++
++  unsigned CurArgIdx = 0;
++  CCInfo.rewindByValRegsInfo();
++
++  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
++    CCValAssign &VA = ArgLocs[i];
++    if (Ins[i].isOrigArg()) {
++      std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx);
++      CurArgIdx = Ins[i].getOrigArgIndex();
++    }
++    EVT ValVT = VA.getValVT();
++    ISD::ArgFlagsTy Flags = Ins[i].Flags;
++    bool IsRegLoc = VA.isRegLoc();
++
++    if (Flags.isByVal()) {
++      assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit");
++      unsigned FirstByValReg, LastByValReg;
++      unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
++      CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
++
++      assert(Flags.getByValSize() &&
++             "ByVal args of size 0 should have been ignored by front-end.");
++      assert(ByValIdx < CCInfo.getInRegsParamsCount());
++      copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
++                    FirstByValReg, LastByValReg, VA, CCInfo);
++      CCInfo.nextInRegsParam();
++      continue;
++    }
++
++    // Arguments stored on registers
++    if (IsRegLoc) {
++      MVT RegVT = VA.getLocVT();
++      unsigned ArgReg = VA.getLocReg();
++      const TargetRegisterClass *RC = getRegClassFor(RegVT);
++
++      // Transform the arguments stored on
++      // physical registers into virtual ones
++      unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
++      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
++
++      ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
++
++      // Handle floating point arguments passed in integer registers and
++      // long double arguments passed in floating point registers.
++      if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
++          (RegVT == MVT::i64 && ValVT == MVT::f64) ||
++          (RegVT == MVT::f64 && ValVT == MVT::i64))
++        ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
++      else if ((ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) &&
++               RegVT == MVT::i32 && ValVT == MVT::f64) {
++        // TODO
++        llvm_unreachable("Unimplemented ABI");
++      }
++
++      InVals.push_back(ArgValue);
++    } else { // VA.isRegLoc()
++      MVT LocVT = VA.getLocVT();
++
++      if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) {
++        // TODO
++        llvm_unreachable("Unimplemented ABI");
++      }
++
++      // sanity check
++      assert(VA.isMemLoc());
++
++      // The stack pointer offset is relative to the caller stack frame.
++      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
++                                     VA.getLocMemOffset(), true);
++
++      // Create load nodes to retrieve arguments from the stack
++      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
++      SDValue ArgValue = DAG.getLoad(
++          LocVT, DL, Chain, FIN,
++          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
++      OutChains.push_back(ArgValue.getValue(1));
++
++      ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
++
++      InVals.push_back(ArgValue);
++    }
++  }
++
++  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
++    // The loongarch ABIs for returning structs by value requires that we copy
++    // the sret argument into $v0 for the return. Save the argument into
++    // a virtual register so that we can access it from the return points.
++    if (Ins[i].Flags.isSRet()) {
++      unsigned Reg = LoongArchFI->getSRetReturnReg();
++      if (!Reg) {
++        Reg = MF.getRegInfo().createVirtualRegister(
++            getRegClassFor(Subtarget.is64Bit() ? MVT::i64 : MVT::i32));
++        LoongArchFI->setSRetReturnReg(Reg);
++      }
++      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
++      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
++      break;
++    }
++  }
++
++  if (IsVarArg)
++    writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo);
++
++  // All stores are grouped in one node to allow the matching between
++  // the size of Ins and InVals. This only happens when on varg functions
++  if (!OutChains.empty()) {
++    OutChains.push_back(Chain);
++    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
++  }
++
++  return Chain;
++}
++
++//===----------------------------------------------------------------------===//
++//               Return Value Calling Convention Implementation
++//===----------------------------------------------------------------------===//
++
++bool
++LoongArchTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
++                                   MachineFunction &MF, bool IsVarArg,
++                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
++                                   LLVMContext &Context) const {
++  SmallVector<CCValAssign, 16> RVLocs;
++  LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
++  return CCInfo.CheckReturn(Outs, RetCC_LoongArch);
++}
++
++bool
++LoongArchTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
++  if (Subtarget.is64Bit() && Type == MVT::i32)
++    return true;
++
++  return IsSigned;
++}
++
++SDValue
++LoongArchTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++                                bool IsVarArg,
++                                const SmallVectorImpl<ISD::OutputArg> &Outs,
++                                const SmallVectorImpl<SDValue> &OutVals,
++                                const SDLoc &DL, SelectionDAG &DAG) const {
++  // CCValAssign - represent the assignment of
++  // the return value to a location
++  SmallVector<CCValAssign, 16> RVLocs;
++  MachineFunction &MF = DAG.getMachineFunction();
++
++  // CCState - Info about the registers and stack slot.
++  LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
++
++  // Analyze return values.
++  CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch);
++
++  SDValue Flag;
++  SmallVector<SDValue, 4> RetOps(1, Chain);
++
++  // Copy the result values into the output registers.
++  for (unsigned i = 0; i != RVLocs.size(); ++i) {
++    SDValue Val = OutVals[i];
++    CCValAssign &VA = RVLocs[i];
++    assert(VA.isRegLoc() && "Can only return in registers!");
++    bool UseUpperBits = false;
++
++    switch (VA.getLocInfo()) {
++    default:
++      llvm_unreachable("Unknown loc info!");
++    case CCValAssign::Full:
++      break;
++    case CCValAssign::BCvt:
++      Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val);
++      break;
++    case CCValAssign::AExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::AExt:
++      Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val);
++      break;
++    case CCValAssign::ZExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::ZExt:
++      Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val);
++      break;
++    case CCValAssign::SExtUpper:
++      UseUpperBits = true;
++      LLVM_FALLTHROUGH;
++    case CCValAssign::SExt:
++      Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val);
++      break;
++    }
++
++    if (UseUpperBits) {
++      unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
++      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
++      Val = DAG.getNode(
++          ISD::SHL, DL, VA.getLocVT(), Val,
++          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
++    }
++
++    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
++
++    // Guarantee that all emitted copies are stuck together with flags.
++    Flag = Chain.getValue(1);
++    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
++  }
++
++  // The loongarch ABIs for returning structs by value requires that we copy
++  // the sret argument into $v0 for the return. We saved the argument into
++  // a virtual register in the entry block, so now we copy the value out
++  // and into $v0.
++  if (MF.getFunction().hasStructRetAttr()) {
++    LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++    unsigned Reg = LoongArchFI->getSRetReturnReg();
++
++    if (!Reg)
++      llvm_unreachable("sret virtual register not created in the entry block");
++    SDValue Val =
++        DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
++    unsigned A0 = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0;
++
++    Chain = DAG.getCopyToReg(Chain, DL, A0, Val, Flag);
++    Flag = Chain.getValue(1);
++    RetOps.push_back(DAG.getRegister(A0, getPointerTy(DAG.getDataLayout())));
++  }
++
++  RetOps[0] = Chain;  // Update chain.
++
++  // Add the flag if we have it.
++  if (Flag.getNode())
++    RetOps.push_back(Flag);
++
++  // Standard return on LoongArch is a "jr $ra"
++  return DAG.getNode(LoongArchISD::Ret, DL, MVT::Other, RetOps);
++}
++
++//===----------------------------------------------------------------------===//
++//                           LoongArch Inline Assembly Support
++//===----------------------------------------------------------------------===//
++
++/// getConstraintType - Given a constraint letter, return the type of
++/// constraint it is for this target.
++LoongArchTargetLowering::ConstraintType
++LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
++  // LoongArch specific constraints
++  // GCC config/loongarch/constraints.md
++  //
++  // 'f': Floating Point register
++  // 'G': Floating-point 0
++  // 'l': Signed 16-bit constant
++  // 'R': Memory address that can be used in a non-macro load or store
++  // "ZC" Memory address with 16-bit and 4 bytes aligned offset
++  // "ZB" Memory address with 0 offset
++
++  if (Constraint.size() == 1) {
++    switch (Constraint[0]) {
++      default : break;
++      case 'f':
++        return C_RegisterClass;
++      case 'l':
++      case 'G':
++        return C_Other;
++      case 'R':
++        return C_Memory;
++    }
++  }
++
++  if (Constraint == "ZC" || Constraint == "ZB")
++    return C_Memory;
++
++  return TargetLowering::getConstraintType(Constraint);
++}
++
++/// Examine constraint type and operand type and determine a weight value.
++/// This object must already have been set up with the operand type
++/// and the current alternative constraint selected.
++TargetLowering::ConstraintWeight
++LoongArchTargetLowering::getSingleConstraintMatchWeight(
++    AsmOperandInfo &info, const char *constraint) const {
++  ConstraintWeight weight = CW_Invalid;
++  Value *CallOperandVal = info.CallOperandVal;
++    // If we don't have a value, we can't do a match,
++    // but allow it at the lowest weight.
++  if (!CallOperandVal)
++    return CW_Default;
++  Type *type = CallOperandVal->getType();
++  // Look at the constraint type.
++  switch (*constraint) {
++  default:
++    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
++    break;
++  case 'f': // FPU
++    if (Subtarget.hasLSX() && type->isVectorTy() &&
++        type->getPrimitiveSizeInBits() == 128)
++      weight = CW_Register;
++    else if (Subtarget.hasLASX() && type->isVectorTy() &&
++             type->getPrimitiveSizeInBits() == 256)
++      weight = CW_Register;
++    else if (type->isFloatTy())
++      weight = CW_Register;
++    break;
++  case 'l': // signed 16 bit immediate
++  case 'I': // signed 12 bit immediate
++  case 'J': // integer zero
++  case 'G': // floating-point zero
++  case 'K': // unsigned 12 bit immediate
++    if (isa<ConstantInt>(CallOperandVal))
++      weight = CW_Constant;
++    break;
++  case 'm':
++  case 'R':
++    weight = CW_Memory;
++    break;
++  }
++  return weight;
++}
++
++/// This is a helper function to parse a physical register string and split it
++/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
++/// that is returned indicates whether parsing was successful. The second flag
++/// is true if the numeric part exists.
++static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
++                                              unsigned long long &Reg) {
++  if (C.empty() || C.front() != '{' || C.back() != '}')
++    return std::make_pair(false, false);
++
++  // Search for the first numeric character.
++  StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
++  I = std::find_if(B, E, isdigit);
++
++  Prefix = StringRef(B, I - B);
++
++  // The second flag is set to false if no numeric characters were found.
++  if (I == E)
++    return std::make_pair(true, false);
++
++  // Parse the numeric characters.
++  return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg),
++                        true);
++}
++
++EVT LoongArchTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
++                                            ISD::NodeType) const {
++  bool Cond = Subtarget.is64Bit() && VT.getSizeInBits() == 32;
++  EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32);
++  return VT.bitsLT(MinVT) ? MinVT : VT;
++}
++
++static const TargetRegisterClass *getRegisterClassForVT(MVT VT, bool Is64Bit) {
++  // Newer llvm versions (>= 12) do not require simple VTs for constraints and
++  // they use MVT::Other for constraints with complex VTs. For more details,
++  // please see https://reviews.llvm.org/D91710.
++  if (VT == MVT::Other || VT.getSizeInBits() <= 32)
++    return &LoongArch::GPR32RegClass;
++  if (VT.getSizeInBits() <= 64)
++    return Is64Bit ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++  return nullptr;
++}
++
++std::pair<unsigned, const TargetRegisterClass *> LoongArchTargetLowering::
++parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
++  const TargetRegisterInfo *TRI =
++      Subtarget.getRegisterInfo();
++  const TargetRegisterClass *RC;
++  StringRef Prefix;
++  unsigned long long Reg;
++
++  std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
++
++  if (!R.first)
++    return std::make_pair(0U, nullptr);
++
++  if (!R.second)
++    return std::make_pair(0U, nullptr);
++
++  if (Prefix == "$f") { // Parse $f0-$f31.
++    // If the size of FP registers is 64-bit, select the 64-bit register class.
++    // Otherwise, select the 32-bit register class.
++    if (VT == MVT::Other)
++      VT = Subtarget.hasBasicD() ? MVT::f64 : MVT::f32;
++
++    RC = getRegClassFor(VT);
++  }
++  else if (Prefix == "$vr") { // Parse $vr0-$vr31.
++    RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT);
++  }
++  else if (Prefix == "$xr") { // Parse $xr0-$xr31.
++    RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT);
++  }
++  else if (Prefix == "$fcc") // Parse $fcc0-$fcc7.
++    RC = TRI->getRegClass(LoongArch::FCFRRegClassID);
++  else { // Parse $r0-$r31.
++    assert(Prefix == "$r");
++    if ((RC = getRegisterClassForVT(VT, Subtarget.is64Bit())) == nullptr) {
++      // This will generate an error message.
++      return std::make_pair(0U, nullptr);
++    }
++  }
++
++  assert(Reg < RC->getNumRegs());
++
++  if (RC == &LoongArch::GPR64RegClass || RC == &LoongArch::GPR32RegClass) {
++    // Sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td
++    // that just like LoongArchAsmParser.cpp
++    switch (Reg) {
++      case 0: return std::make_pair(*(RC->begin() + 0), RC); // r0
++      case 1: return std::make_pair(*(RC->begin() + 27), RC); // r1
++      case 2: return std::make_pair(*(RC->begin() + 28), RC); // r2
++      case 3: return std::make_pair(*(RC->begin() + 29), RC); // r3
++      case 4: return std::make_pair(*(RC->begin() + 1), RC); // r4
++      case 5: return std::make_pair(*(RC->begin() + 2), RC); // r5
++      case 6: return std::make_pair(*(RC->begin() + 3), RC); // r6
++      case 7: return std::make_pair(*(RC->begin() + 4), RC); // r7
++      case 8: return std::make_pair(*(RC->begin() + 5), RC); // r8
++      case 9: return std::make_pair(*(RC->begin() + 6), RC); // r9
++      case 10: return std::make_pair(*(RC->begin() + 7), RC); // r10
++      case 11: return std::make_pair(*(RC->begin() + 8), RC); // r11
++      case 12: return std::make_pair(*(RC->begin() + 9), RC); // r12
++      case 13: return std::make_pair(*(RC->begin() + 10), RC); // r13
++      case 14: return std::make_pair(*(RC->begin() + 11), RC); // r14
++      case 15: return std::make_pair(*(RC->begin() + 12), RC); // r15
++      case 16: return std::make_pair(*(RC->begin() + 13), RC); // r16
++      case 17: return std::make_pair(*(RC->begin() + 14), RC); // r17
++      case 18: return std::make_pair(*(RC->begin() + 15), RC); // r18
++      case 19: return std::make_pair(*(RC->begin() + 16), RC); // r19
++      case 20: return std::make_pair(*(RC->begin() + 17), RC); // r20
++      case 21: return std::make_pair(*(RC->begin() + 30), RC); // r21
++      case 22: return std::make_pair(*(RC->begin() + 31), RC); // r22
++      case 23: return std::make_pair(*(RC->begin() + 18), RC); // r23
++      case 24: return std::make_pair(*(RC->begin() + 19), RC); // r24
++      case 25: return std::make_pair(*(RC->begin() + 20), RC); // r25
++      case 26: return std::make_pair(*(RC->begin() + 21), RC); // r26
++      case 27: return std::make_pair(*(RC->begin() + 22), RC); // r27
++      case 28: return std::make_pair(*(RC->begin() + 23), RC); // r28
++      case 29: return std::make_pair(*(RC->begin() + 24), RC); // r29
++      case 30: return std::make_pair(*(RC->begin() + 25), RC); // r30
++      case 31: return std::make_pair(*(RC->begin() + 26), RC); // r31
++    }
++  }
++  return std::make_pair(*(RC->begin() + Reg), RC);
++}
++
++/// Given a register class constraint, like 'r', if this corresponds directly
++/// to an LLVM register class, return a register of 0 and the register class
++/// pointer.
++std::pair<unsigned, const TargetRegisterClass *>
++LoongArchTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
++                                                 StringRef Constraint,
++                                                 MVT VT) const {
++    if (Constraint.size() == 1) {
++    switch (Constraint[0]) {
++    case 'r':
++      return std::make_pair(0U, getRegisterClassForVT(VT, Subtarget.is64Bit()));
++    case 'f': // FPU or LSX register
++      if (VT == MVT::v16i8)
++        return std::make_pair(0U, &LoongArch::LSX128BRegClass);
++      else if (VT == MVT::v8i16)
++        return std::make_pair(0U, &LoongArch::LSX128HRegClass);
++      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
++        return std::make_pair(0U, &LoongArch::LSX128WRegClass);
++      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
++        return std::make_pair(0U, &LoongArch::LSX128DRegClass);
++      else if (VT == MVT::v32i8)
++        return std::make_pair(0U, &LoongArch::LASX256BRegClass);
++      else if (VT == MVT::v16i16)
++        return std::make_pair(0U, &LoongArch::LASX256HRegClass);
++      else if (VT == MVT::v8i32 || VT == MVT::v8f32)
++        return std::make_pair(0U, &LoongArch::LASX256WRegClass);
++      else if (VT == MVT::v4i64 || VT == MVT::v4f64)
++        return std::make_pair(0U, &LoongArch::LASX256DRegClass);
++      else if (VT == MVT::f32)
++        return std::make_pair(0U, &LoongArch::FGR32RegClass);
++      else if (VT == MVT::f64)
++        return std::make_pair(0U, &LoongArch::FGR64RegClass);
++      break;
++    }
++  }
++
++  std::pair<unsigned, const TargetRegisterClass *> R;
++  R = parseRegForInlineAsmConstraint(Constraint, VT);
++
++  if (R.second)
++    return R;
++
++  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
++}
++
++/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
++/// vector.  If it is invalid, don't add anything to Ops.
++void LoongArchTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
++                                                     std::string &Constraint,
++                                                     std::vector<SDValue>&Ops,
++                                                     SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  SDValue Result;
++
++  // Only support length 1 constraints for now.
++  if (Constraint.length() > 1) return;
++
++  char ConstraintLetter = Constraint[0];
++  switch (ConstraintLetter) {
++  default: break; // This will fall through to the generic implementation
++  case 'l': // Signed 16 bit constant
++    // If this fails, the parent routine will give an error
++    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++      EVT Type = Op.getValueType();
++      int64_t Val = C->getSExtValue();
++      if (isInt<16>(Val)) {
++        Result = DAG.getTargetConstant(Val, DL, Type);
++        break;
++      }
++    }
++    return;
++  case 'I': // Signed 12 bit constant
++    // If this fails, the parent routine will give an error
++    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++      EVT Type = Op.getValueType();
++      int64_t Val = C->getSExtValue();
++      if (isInt<12>(Val)) {
++        Result = DAG.getTargetConstant(Val, DL, Type);
++        break;
++      }
++    }
++    return;
++  case 'J': // integer zero
++    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++      EVT Type = Op.getValueType();
++      int64_t Val = C->getZExtValue();
++      if (Val == 0) {
++        Result = DAG.getTargetConstant(0, DL, Type);
++        break;
++      }
++    }
++    return;
++  case 'G': // floating-point zero
++    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
++      if (C->isZero()) {
++        EVT Type = Op.getValueType();
++        Result = DAG.getTargetConstantFP(0, DL, Type);
++        break;
++      }
++    }
++    return;
++  case 'K': // unsigned 12 bit immediate
++    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++      EVT Type = Op.getValueType();
++      uint64_t Val = (uint64_t)C->getZExtValue();
++      if (isUInt<12>(Val)) {
++        Result = DAG.getTargetConstant(Val, DL, Type);
++        break;
++      }
++    }
++    return;
++  }
++
++  if (Result.getNode()) {
++    Ops.push_back(Result);
++    return;
++  }
++
++  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
++}
++
++bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
++                                               const AddrMode &AM, Type *Ty,
++                                               unsigned AS, Instruction *I) const {
++  // No global is ever allowed as a base.
++  if (AM.BaseGV)
++    return false;
++
++  switch (AM.Scale) {
++  case 0: // "r+i" or just "i", depending on HasBaseReg.
++    break;
++  case 1:
++    if (!AM.HasBaseReg) // allow "r+i".
++      break;
++    return false; // disallow "r+r" or "r+r+i".
++  default:
++    return false;
++  }
++
++  return true;
++}
++
++bool
++LoongArchTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
++  // The LoongArch target isn't yet aware of offsets.
++  return false;
++}
++
++EVT LoongArchTargetLowering::getOptimalMemOpType(
++    const MemOp &Op, const AttributeList &FuncAttributes) const {
++  if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
++    if (Op.size() >= 16) {
++      if (Op.size() >= 32 && Subtarget.hasLASX()) {
++        return MVT::v32i8;
++      }
++      if (Subtarget.hasLSX())
++        return MVT::v16i8;
++    }
++  }
++
++  if (Subtarget.is64Bit())
++    return MVT::i64;
++
++  return MVT::i32;
++}
++
++/// isFPImmLegal - Returns true if the target can instruction select the
++/// specified FP immediate natively. If false, the legalizer will
++/// materialize the FP immediate as a load from a constant pool.
++bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
++                                           bool ForCodeSize) const {
++  if (VT != MVT::f32 && VT != MVT::f64)
++    return false;
++  if (Imm.isNegZero())
++    return false;
++  return (Imm.isZero() || Imm.isExactlyValue(+1.0));
++}
++
++bool LoongArchTargetLowering::useSoftFloat() const {
++  return Subtarget.useSoftFloat();
++}
++
++void LoongArchTargetLowering::copyByValRegs(
++    SDValue Chain, const SDLoc &DL, std::vector<SDValue> &OutChains,
++    SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
++    SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg,
++    unsigned FirstReg, unsigned LastReg, const CCValAssign &VA,
++    LoongArchCCState &State) const {
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes();
++  unsigned NumRegs = LastReg - FirstReg;
++  unsigned RegAreaSize = NumRegs * GPRSizeInBytes;
++  unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize);
++  int FrameObjOffset;
++  ArrayRef<MCPhysReg> ByValArgRegs = ABI.GetByValArgRegs();
++
++  if (RegAreaSize)
++    FrameObjOffset = -(int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes);
++  else
++    FrameObjOffset = VA.getLocMemOffset();
++
++  // Create frame object.
++  EVT PtrTy = getPointerTy(DAG.getDataLayout());
++  // Make the fixed object stored to mutable so that the load instructions
++  // referencing it have their memory dependencies added.
++  // Set the frame object as isAliased which clears the underlying objects
++  // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all
++  // stores as dependencies for loads referencing this fixed object.
++  int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true);
++  SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
++  InVals.push_back(FIN);
++
++  if (!NumRegs)
++    return;
++
++  // Copy arg registers.
++  MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8);
++  const TargetRegisterClass *RC = getRegClassFor(RegTy);
++
++  for (unsigned I = 0; I < NumRegs; ++I) {
++    unsigned ArgReg = ByValArgRegs[FirstReg + I];
++    unsigned VReg = addLiveIn(MF, ArgReg, RC);
++    unsigned Offset = I * GPRSizeInBytes;
++    SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
++                                   DAG.getConstant(Offset, DL, PtrTy));
++    SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy),
++                                 StorePtr, MachinePointerInfo(FuncArg, Offset));
++    OutChains.push_back(Store);
++  }
++}
++
++// Copy byVal arg to registers and stack.
++void LoongArchTargetLowering::passByValArg(
++    SDValue Chain, const SDLoc &DL,
++    std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
++    SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
++    MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg,
++    unsigned LastReg, const ISD::ArgFlagsTy &Flags,
++    const CCValAssign &VA) const {
++  unsigned ByValSizeInBytes = Flags.getByValSize();
++  unsigned OffsetInBytes = 0; // From beginning of struct
++  unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
++  Align Alignment =
++      std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes));
++  EVT PtrTy = getPointerTy(DAG.getDataLayout()),
++      RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
++  unsigned NumRegs = LastReg - FirstReg;
++
++  if (NumRegs) {
++    ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
++    bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
++    unsigned I = 0;
++
++    // Copy words to registers.
++    for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) {
++      SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
++                                    DAG.getConstant(OffsetInBytes, DL, PtrTy));
++      SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
++                                    MachinePointerInfo(), Alignment);
++      MemOpChains.push_back(LoadVal.getValue(1));
++      unsigned ArgReg = ArgRegs[FirstReg + I];
++      RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
++    }
++
++    // Return if the struct has been fully copied.
++    if (ByValSizeInBytes == OffsetInBytes)
++      return;
++
++    // Copy the remainder of the byval argument with sub-word loads and shifts.
++    if (LeftoverBytes) {
++      SDValue Val;
++
++      for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
++           OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) {
++        unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes;
++
++        if (RemainingSizeInBytes < LoadSizeInBytes)
++          continue;
++
++        // Load subword.
++        SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
++                                      DAG.getConstant(OffsetInBytes, DL,
++                                                      PtrTy));
++        SDValue LoadVal = DAG.getExtLoad(
++            ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
++            MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment);
++        MemOpChains.push_back(LoadVal.getValue(1));
++
++        // Shift the loaded value.
++        unsigned Shamt;
++
++        Shamt = TotalBytesLoaded * 8;
++
++        SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
++                                    DAG.getConstant(Shamt, DL, MVT::i32));
++
++        if (Val.getNode())
++          Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift);
++        else
++          Val = Shift;
++
++        OffsetInBytes += LoadSizeInBytes;
++        TotalBytesLoaded += LoadSizeInBytes;
++        Alignment = std::min(Alignment, Align(LoadSizeInBytes));
++      }
++
++      unsigned ArgReg = ArgRegs[FirstReg + I];
++      RegsToPass.push_back(std::make_pair(ArgReg, Val));
++      return;
++    }
++  }
++
++  // Copy remainder of byval arg to it with memcpy.
++  unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes;
++  SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
++                            DAG.getConstant(OffsetInBytes, DL, PtrTy));
++  SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
++                            DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
++  Chain = DAG.getMemcpy(
++      Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy),
++      Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false,
++      /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
++  MemOpChains.push_back(Chain);
++}
++
++void LoongArchTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
++                                         SDValue Chain, const SDLoc &DL,
++                                         SelectionDAG &DAG,
++                                         CCState &State) const {
++  ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
++  unsigned Idx = State.getFirstUnallocated(ArgRegs);
++  unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
++  MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
++  const TargetRegisterClass *RC = getRegClassFor(RegTy);
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  LoongArchFunctionInfo *LoongArchFI = MF.getInfo<LoongArchFunctionInfo>();
++
++  // Offset of the first variable argument from stack pointer.
++  int VaArgOffset, VarArgsSaveSize;
++
++  if (ArgRegs.size() == Idx) {
++    VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes);
++    VarArgsSaveSize = 0;
++  } else {
++    VarArgsSaveSize = (int)(RegSizeInBytes * (ArgRegs.size() - Idx));
++    VaArgOffset = -VarArgsSaveSize;
++  }
++
++  // Record the frame index of the first variable argument
++  // which is a value necessary to VASTART.
++  int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
++  LoongArchFI->setVarArgsFrameIndex(FI);
++
++  // If saving an odd number of registers then create an extra stack slot to
++  // ensure that the frame pointer is 2*GRLEN-aligned, which in turn ensures
++  // offsets to even-numbered registered remain 2*GRLEN-aligned.
++  if (Idx % 2) {
++    MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset - (int)RegSizeInBytes,
++                          true);
++    VarArgsSaveSize += RegSizeInBytes;
++  }
++
++  // Copy the integer registers that have not been used for argument passing
++  // to the argument register save area. For ILP32D/ILP32F/ILP32S, the save area
++  // is allocated in the caller's stack frame, while for LP64D/LP64S/LP64F, it
++  // is allocated in the callee's stack frame.
++  for (unsigned I = Idx; I < ArgRegs.size();
++       ++I, VaArgOffset += RegSizeInBytes) {
++    unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
++    SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
++    FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
++    SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
++    SDValue Store =
++        DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo());
++    cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(
++        (Value *)nullptr);
++    OutChains.push_back(Store);
++  }
++  LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
++}
++
++void LoongArchTargetLowering::HandleByVal(CCState *State, unsigned &Size,
++                                          Align Alignment) const {
++  const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
++
++  assert(Size && "Byval argument's size shouldn't be 0.");
++
++  Alignment = std::min(Alignment, TFL->getStackAlign());
++
++  unsigned FirstReg = 0;
++  unsigned NumRegs = 0;
++  unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
++  ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
++  // FIXME: The ILP32D/ILP32F/ILP32S case actually describes no shadow
++  // registers.
++  const MCPhysReg *ShadowRegs =
++      (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S())
++          ? IntArgRegs.data()
++          : LoongArch64DPRegs;
++
++  // We used to check the size as well but we can't do that anymore since
++  // CCState::HandleByVal() rounds up the size after calling this function.
++  assert(Alignment >= Align(RegSizeInBytes) &&
++         "Byval argument's alignment should be a multiple of RegSizeInBytes.");
++
++  FirstReg = State->getFirstUnallocated(IntArgRegs);
++
++  // If Alignment > RegSizeInBytes, the first arg register must be even.
++  // FIXME: This condition happens to do the right thing but it's not the
++  //        right way to test it. We want to check that the stack frame offset
++  //        of the register is aligned.
++  if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) {
++    State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]);
++    ++FirstReg;
++    // assert(true && "debug#######################################");
++  }
++
++  // Mark the registers allocated.
++  // Size = alignTo(Size, RegSizeInBytes);
++  // for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size());
++  //     Size -= RegSizeInBytes, ++I, ++NumRegs)
++  //  State->AllocateReg(IntArgRegs[I], ShadowRegs[I]);
++
++  State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs);
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitPseudoSELECT(MachineInstr &MI,
++                                                        MachineBasicBlock *BB,
++                                                        bool isFPCmp,
++                                                        unsigned Opc) const {
++  const TargetInstrInfo *TII =
++      Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  // To "insert" a SELECT instruction, we actually have to insert the
++  // diamond control-flow pattern.  The incoming instruction knows the
++  // destination vreg to set, the condition code register to branch on, the
++  // true/false values to select between, and a branch opcode to use.
++  const BasicBlock *LLVM_BB = BB->getBasicBlock();
++  MachineFunction::iterator It = ++BB->getIterator();
++
++  //  thisMBB:
++  //  ...
++  //   TrueVal = ...
++  //   setcc r1, r2, r3
++  //   bNE   r1, r0, copy1MBB
++  //   fallthrough --> copy0MBB
++  MachineBasicBlock *thisMBB  = BB;
++  MachineFunction *F = BB->getParent();
++  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
++  F->insert(It, copy0MBB);
++  F->insert(It, sinkMBB);
++
++  // Transfer the remainder of BB and its successor edges to sinkMBB.
++  sinkMBB->splice(sinkMBB->begin(), BB,
++                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
++  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
++
++  // Next, add the true and fallthrough blocks as its successors.
++  BB->addSuccessor(copy0MBB);
++  BB->addSuccessor(sinkMBB);
++
++  if (isFPCmp) {
++    // bc1[tf] cc, sinkMBB
++    BuildMI(BB, DL, TII->get(Opc))
++        .addReg(MI.getOperand(1).getReg())
++        .addMBB(sinkMBB);
++  } else {
++    BuildMI(BB, DL, TII->get(Opc))
++        .addReg(MI.getOperand(1).getReg())
++        .addReg(LoongArch::ZERO)
++        .addMBB(sinkMBB);
++  }
++
++  //  copy0MBB:
++  //   %FalseValue = ...
++  //   # fallthrough to sinkMBB
++  BB = copy0MBB;
++
++  // Update machine-CFG edges
++  BB->addSuccessor(sinkMBB);
++
++  //  sinkMBB:
++  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
++  //  ...
++  BB = sinkMBB;
++
++  BuildMI(*BB, BB->begin(), DL, TII->get(LoongArch::PHI), MI.getOperand(0).getReg())
++      .addReg(MI.getOperand(2).getReg())
++      .addMBB(thisMBB)
++      .addReg(MI.getOperand(3).getReg())
++      .addMBB(copy0MBB);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++
++  return BB;
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitLSXCBranchPseudo(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
++
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  const TargetRegisterClass *RC = &LoongArch::GPR32RegClass;
++  DebugLoc DL = MI.getDebugLoc();
++  const BasicBlock *LLVM_BB = BB->getBasicBlock();
++  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
++  MachineFunction *F = BB->getParent();
++  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
++  F->insert(It, FBB);
++  F->insert(It, TBB);
++  F->insert(It, Sink);
++
++  // Transfer the remainder of BB and its successor edges to Sink.
++  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
++               BB->end());
++  Sink->transferSuccessorsAndUpdatePHIs(BB);
++
++  // Add successors.
++  BB->addSuccessor(FBB);
++  BB->addSuccessor(TBB);
++  FBB->addSuccessor(Sink);
++  TBB->addSuccessor(Sink);
++  // Insert the real bnz.b instruction to $BB.
++  BuildMI(BB, DL, TII->get(BranchOp))
++      .addReg(LoongArch::FCC0)
++      .addReg(MI.getOperand(1).getReg());
++
++  BuildMI(BB, DL, TII->get(LoongArch::BCNEZ))
++      .addReg(LoongArch::FCC0)
++      .addMBB(TBB);
++
++  // Fill $FBB.
++  unsigned RD1 = RegInfo.createVirtualRegister(RC);
++  BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::ADDI_W), RD1)
++      .addReg(LoongArch::ZERO)
++      .addImm(0);
++  BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::B32)).addMBB(Sink);
++
++  // Fill $TBB.
++  unsigned RD2 = RegInfo.createVirtualRegister(RC);
++  BuildMI(*TBB, TBB->end(), DL, TII->get(LoongArch::ADDI_W), RD2)
++      .addReg(LoongArch::ZERO)
++      .addImm(1);
++
++  // Insert phi function to $Sink.
++  BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI),
++          MI.getOperand(0).getReg())
++      .addReg(RD1)
++      .addMBB(FBB)
++      .addReg(RD2)
++      .addMBB(TBB);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return Sink;
++}
++
++// Emit the COPY_FW pseudo instruction.
++//
++// copy_fw_pseudo $fd, $vk, n
++// =>
++// vreplvei.w $rt, $vk, $n
++// copy     $rt, $fd
++//
++// When n is zero, the equivalent operation can be performed with (potentially)
++// zero instructions due to register overlaps.
++MachineBasicBlock *
++LoongArchTargetLowering::emitCOPY_FW(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Fd = MI.getOperand(0).getReg();
++  unsigned Vk = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++
++  if (Lane == 0) {
++    unsigned Vj = Vk;
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Vj, 0, LoongArch::sub_lo);
++  } else {
++    unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vj)
++        .addReg(Vk)
++        .addImm(Lane);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Vj, 0, LoongArch::sub_lo);
++  }
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the COPY_FD pseudo instruction.
++//
++// copy_fd_pseudo $fd, $vj, n
++// =>
++// vreplvei.d $vd, $vj, $n
++// copy $fd, $vd:sub_64
++//
++// When n is zero, the equivalent operation can be performed with (potentially)
++// zero instructions due to register overlaps.
++MachineBasicBlock *
++LoongArchTargetLowering::emitCOPY_FD(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  unsigned Fd = MI.getOperand(0).getReg();
++  unsigned Vk = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  DebugLoc DL = MI.getDebugLoc();
++
++  if (Lane == 0)
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Vk, 0, LoongArch::sub_64);
++  else {
++    unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass);
++    assert(Lane == 1);
++
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vj)
++        .addReg(Vk)
++        .addImm(Lane);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Vj, 0, LoongArch::sub_64);
++  }
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXCOPY_FW(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Fd = MI.getOperand(0).getReg();
++  unsigned Xk = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned Xj = Xk;
++
++  if (Lane == 0) {
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Xj, 0, LoongArch::sub_lo);
++  } else {
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj)
++        .addReg(Xk)
++        .addImm(Lane);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj);
++  }
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXCOPY_FD(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  unsigned Fd = MI.getOperand(0).getReg();
++  unsigned Xk = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass);
++  if (Lane == 0) {
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd)
++        .addReg(Xk, 0, LoongArch::sub_64);
++  } else {
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_DU), Rj)
++        .addReg(Xk)
++        .addImm(Lane);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj);
++  }
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *LoongArchTargetLowering::emitCONCAT_VECTORS(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Bytes) const {
++
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned SubReg1 = MI.getOperand(1).getReg();
++  unsigned SubReg2 = MI.getOperand(2).getReg();
++  const TargetRegisterClass *RC = nullptr;
++
++  switch (Bytes) {
++  default:
++    llvm_unreachable("Unexpected size");
++  case 1:
++    RC = &LoongArch::LASX256BRegClass;
++    break;
++  case 2:
++    RC = &LoongArch::LASX256HRegClass;
++    break;
++  case 4:
++    RC = &LoongArch::LASX256WRegClass;
++    break;
++  case 8:
++    RC = &LoongArch::LASX256DRegClass;
++    break;
++  }
++
++  unsigned X0 = RegInfo.createVirtualRegister(RC);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X0)
++      .addImm(0)
++      .addReg(SubReg1)
++      .addImm(LoongArch::sub_128);
++  unsigned X1 = RegInfo.createVirtualRegister(RC);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X1)
++      .addImm(0)
++      .addReg(SubReg2)
++      .addImm(LoongArch::sub_128);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd)
++      .addReg(X0)
++      .addReg(X1)
++      .addImm(2);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// xcopy_fw_gpr_pseudo $fd, $xs, $rk
++// =>
++// bb: addi.d $rt1, zero, 4
++//    bge $lane, $rt1 hbb
++// lbb:xvreplve.w $xt1, $xs, $lane
++//    copy $rf0, $xt1
++//    b sink
++// hbb: addi.d $rt2, $lane, -4
++//     xvpermi.q $xt2 $xs, 1
++//     xvreplve.w $xt3, $xt2, $rt2
++//     copy $rf1, $xt3
++// sink:phi
++MachineBasicBlock *
++LoongArchTargetLowering::emitXCOPY_FW_GPR(MachineInstr &MI,
++                                          MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xs = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getReg();
++
++  const TargetRegisterClass *RC = &LoongArch::GPR64RegClass;
++  const BasicBlock *LLVM_BB = BB->getBasicBlock();
++  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
++  MachineFunction *F = BB->getParent();
++  MachineBasicBlock *HBB = F->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *LBB = F->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
++  F->insert(It, LBB);
++  F->insert(It, HBB);
++  F->insert(It, Sink);
++
++  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
++               BB->end());
++  Sink->transferSuccessorsAndUpdatePHIs(BB);
++
++  BB->addSuccessor(LBB);
++  BB->addSuccessor(HBB);
++  HBB->addSuccessor(Sink);
++  LBB->addSuccessor(Sink);
++
++  unsigned Rt1 = RegInfo.createVirtualRegister(RC);
++  BuildMI(BB, DL, TII->get(LoongArch::ADDI_D), Rt1)
++      .addReg(LoongArch::ZERO_64)
++      .addImm(4);
++  BuildMI(BB, DL, TII->get(LoongArch::BGE))
++      .addReg(Lane)
++      .addReg(Rt1)
++      .addMBB(HBB);
++
++  unsigned Xt1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  unsigned Rf0 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass);
++  BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt1)
++      .addReg(Xs)
++      .addReg(Lane);
++  BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::COPY), Rf0)
++      .addReg(Xt1, 0, LoongArch::sub_lo);
++  BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::B)).addMBB(Sink);
++
++  unsigned Xt2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  unsigned Xt3 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  unsigned Rt2 = RegInfo.createVirtualRegister(RC);
++  unsigned Rf1 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass);
++  BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::ADDI_D), Rt2)
++      .addReg(Lane)
++      .addImm(-4);
++  BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVPERMI_Q), Xt2)
++      .addReg(Xs)
++      .addReg(Xs)
++      .addImm(1);
++  BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt3)
++      .addReg(Xt2)
++      .addReg(Rt2);
++  BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::COPY), Rf1)
++      .addReg(Xt3, 0, LoongArch::sub_lo);
++
++  BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI),
++          MI.getOperand(0).getReg())
++      .addReg(Rf0)
++      .addMBB(LBB)
++      .addReg(Rf1)
++      .addMBB(HBB);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return Sink;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB,
++                                        unsigned Size) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned Xd_in = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Fs = MI.getOperand(3).getReg();
++  const TargetRegisterClass *VecRC = nullptr;
++  const TargetRegisterClass *SubVecRC = nullptr;
++  unsigned HalfSize = 0;
++  unsigned InsertOp = 0;
++
++  if (Size == 1) {
++    VecRC = &LoongArch::LASX256BRegClass;
++    SubVecRC = &LoongArch::LSX128BRegClass;
++    HalfSize = 16;
++    InsertOp = LoongArch::VINSGR2VR_B;
++  } else if (Size == 2) {
++    VecRC = &LoongArch::LASX256HRegClass;
++    SubVecRC = &LoongArch::LSX128HRegClass;
++    HalfSize = 8;
++    InsertOp = LoongArch::VINSGR2VR_H;
++  } else {
++    llvm_unreachable("Unexpected type");
++  }
++
++  unsigned Xk = Xd_in;
++  unsigned Imm = Lane;
++  if (Lane >= HalfSize) {
++    Xk = RegInfo.createVirtualRegister(VecRC);
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xk)
++        .addReg(Xd_in)
++        .addReg(Xd_in)
++        .addImm(1);
++    Imm = Lane - HalfSize;
++  }
++
++  unsigned Xk128 = RegInfo.createVirtualRegister(SubVecRC);
++  unsigned Xd128 = RegInfo.createVirtualRegister(SubVecRC);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Xk128)
++      .addReg(Xk, 0, LoongArch::sub_128);
++  BuildMI(*BB, MI, DL, TII->get(InsertOp), Xd128)
++      .addReg(Xk128)
++      .addReg(Fs)
++      .addImm(Imm);
++
++  unsigned Xd256 = Xd;
++  if (Lane >= HalfSize) {
++    Xd256 = RegInfo.createVirtualRegister(VecRC);
++  }
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xd256)
++      .addImm(0)
++      .addReg(Xd128)
++      .addImm(LoongArch::sub_128);
++
++  if (Lane >= HalfSize) {
++    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd)
++        .addReg(Xd_in)
++        .addReg(Xd256)
++        .addImm(2);
++  }
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXINSERT_FW(MachineInstr &MI,
++                                        MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned Xd_in = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Fs = MI.getOperand(3).getReg();
++  unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj)
++      .addImm(0)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_lo);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj)
++      .addReg(Xj)
++      .addImm(0);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSGR2VR_W), Xd)
++      .addReg(Xd_in)
++      .addReg(Rj)
++      .addImm(Lane);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the INSERT_FW pseudo instruction.
++//
++// insert_fw_pseudo $vd, $vd_in, $n, $fs
++// =>
++// subreg_to_reg $vj:sub_lo, $fs
++// vpickve2gr.w rj, vj, 0
++// vinsgr2vr.w, vd, rj, lane
++MachineBasicBlock *
++LoongArchTargetLowering::emitINSERT_FW(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Vd = MI.getOperand(0).getReg();
++  unsigned Vd_in = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Fs = MI.getOperand(3).getReg();
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass);
++  unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj)
++      .addImm(0)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_lo);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_W), Rj)
++      .addReg(Vj)
++      .addImm(0);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_W), Vd)
++      .addReg(Vd_in)
++      .addReg(Rj)
++      .addImm(Lane);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the INSERT_FD pseudo instruction.
++// insert_fd_pseudo $vd, $fs, n
++// =>
++// subreg_to_reg $vk:sub_64, $fs
++// vpickve2gr.d rj, vk, 0
++// vinsgr2vr.d vd, rj, lane
++MachineBasicBlock *
++LoongArchTargetLowering::emitINSERT_FD(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Vd = MI.getOperand(0).getReg();
++  unsigned Vd_in = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Fs = MI.getOperand(3).getReg();
++  unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass);
++  unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj)
++      .addImm(0)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_64);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_D), Rj)
++      .addReg(Vj)
++      .addImm(0);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_D), Vd)
++      .addReg(Vd_in)
++      .addReg(Rj)
++      .addImm(Lane);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *
++LoongArchTargetLowering::emitXINSERT_FD(MachineInstr &MI,
++                                        MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned Xd_in = MI.getOperand(1).getReg();
++  unsigned Lane = MI.getOperand(2).getImm();
++  unsigned Fs = MI.getOperand(3).getReg();
++  unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj)
++      .addImm(0)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_64);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSVE0_D), Xd)
++      .addReg(Xd_in)
++      .addReg(Xj)
++      .addImm(Lane);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the FILL_FW pseudo instruction.
++//
++// fill_fw_pseudo $vd, $fs
++// =>
++// implicit_def $vt1
++// insert_subreg $vt2:subreg_lo, $vt1, $fs
++// vreplvei.w vd, vt2, 0
++MachineBasicBlock *
++LoongArchTargetLowering::emitFILL_FW(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Vd = MI.getOperand(0).getReg();
++  unsigned Fs = MI.getOperand(1).getReg();
++  unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass);
++  unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2)
++      .addReg(Vj1)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_lo);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vd)
++      .addReg(Vj2)
++      .addImm(0);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the FILL_FD pseudo instruction.
++//
++// fill_fd_pseudo $vd, $fs
++// =>
++// implicit_def $vt1
++// insert_subreg $vt2:subreg_64, $vt1, $fs
++// vreplvei.d vd, vt2, 0
++MachineBasicBlock *
++LoongArchTargetLowering::emitFILL_FD(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Vd = MI.getOperand(0).getReg();
++  unsigned Fs = MI.getOperand(1).getReg();
++  unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass);
++  unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2)
++      .addReg(Vj1)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_64);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vd)
++      .addReg(Vj2)
++      .addImm(0);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the XFILL_FW pseudo instruction.
++//
++// xfill_fw_pseudo $xd, $fs
++// =>
++// implicit_def $xt1
++// insert_subreg $xt2:subreg_lo, $xt1, $fs
++// xvreplve0.w xd, xt2, 0
++MachineBasicBlock *
++LoongArchTargetLowering::emitXFILL_FW(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned Fs = MI.getOperand(1).getReg();
++  unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++  unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2)
++      .addReg(Xj1)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_lo);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_W), Xd).addReg(Xj2);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++// Emit the XFILL_FD pseudo instruction.
++//
++// xfill_fd_pseudo $xd, $fs
++// =>
++// implicit_def $xt1
++// insert_subreg $xt2:subreg_64, $xt1, $fs
++// xvreplve0.d xd, xt2, 0
++MachineBasicBlock *
++LoongArchTargetLowering::emitXFILL_FD(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++  assert(Subtarget.hasBasicD());
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Xd = MI.getOperand(0).getReg();
++  unsigned Fs = MI.getOperand(1).getReg();
++  unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass);
++  unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass);
++
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2)
++      .addReg(Xj1)
++      .addReg(Fs)
++      .addImm(LoongArch::sub_64);
++  BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_D), Xd).addReg(Xj2);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
++  bool IsLegal = false;
++  if (Subtarget.hasLSX() || Subtarget.hasLASX()) {
++    return isUInt<5>(Imm);
++  }
++  return IsLegal;
++}
++
++bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
++    const MachineFunction &MF, EVT VT) const {
++
++  VT = VT.getScalarType();
++
++  if (!VT.isSimple())
++    return false;
++
++  switch (VT.getSimpleVT().SimpleTy) {
++  case MVT::f32:
++  case MVT::f64:
++    return true;
++  default:
++    break;
++  }
++
++  return false;
++}
++
++bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
++                                                      unsigned Index) const {
++  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
++    return false;
++
++  return (
++      (ResVT != MVT::v16i8) && (ResVT != MVT::v8i16) &&
++      (Index == 0 || (Index == ResVT.getVectorNumElements() &&
++                      (ResVT.getSizeInBits() == SrcVT.getSizeInBits() / 2))));
++}
++
++Register
++LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
++                                           const MachineFunction &MF) const {
++  // Named registers is expected to be fairly rare. For now, just support $r2
++  // and $r21 since the linux kernel uses them.
++  if (Subtarget.is64Bit()) {
++    Register Reg = StringSwitch<unsigned>(RegName)
++                       .Case("$r2", LoongArch::TP_64)
++                       .Case("$r21", LoongArch::T9_64)
++                       .Default(Register());
++    if (Reg)
++      return Reg;
++  } else {
++    Register Reg = StringSwitch<unsigned>(RegName)
++                       .Case("$r2", LoongArch::TP)
++                       .Case("$r21", LoongArch::T9)
++                       .Default(Register());
++    if (Reg)
++      return Reg;
++  }
++  report_fatal_error("Invalid register name global variable");
++}
+diff --git a/lib/Target/LoongArch/LoongArchISelLowering.h b/lib/Target/LoongArch/LoongArchISelLowering.h
+new file mode 100644
+index 00000000..e22c13ef
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchISelLowering.h
+@@ -0,0 +1,557 @@
++//===- LoongArchISelLowering.h - LoongArch DAG Lowering Interface ---------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the interfaces that LoongArch uses to lower LLVM code into a
++// selection DAG.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
++
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "LoongArch.h"
++#include "llvm/CodeGen/CallingConvLower.h"
++#include "llvm/CodeGen/ISDOpcodes.h"
++#include "llvm/CodeGen/MachineMemOperand.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/SelectionDAGNodes.h"
++#include "llvm/CodeGen/TargetLowering.h"
++#include "llvm/CodeGen/ValueTypes.h"
++#include "llvm/IR/CallingConv.h"
++#include "llvm/IR/InlineAsm.h"
++#include "llvm/IR/Type.h"
++#include "llvm/Support/MachineValueType.h"
++#include "llvm/Target/TargetMachine.h"
++#include <algorithm>
++#include <cassert>
++#include <deque>
++#include <string>
++#include <utility>
++#include <vector>
++
++namespace llvm {
++
++class Argument;
++class CCState;
++class CCValAssign;
++class FastISel;
++class FunctionLoweringInfo;
++class MachineBasicBlock;
++class MachineFrameInfo;
++class MachineInstr;
++class LoongArchCCState;
++class LoongArchFunctionInfo;
++class LoongArchSubtarget;
++class LoongArchTargetMachine;
++class SelectionDAG;
++class TargetLibraryInfo;
++class TargetRegisterClass;
++
++  namespace LoongArchISD {
++
++  enum NodeType : unsigned {
++    // Start the numbering from where ISD NodeType finishes.
++    FIRST_NUMBER = ISD::BUILTIN_OP_END,
++
++    // Jump and link (call)
++    JmpLink,
++
++    // Tail call
++    TailCall,
++
++    // global address
++    GlobalAddress,
++
++    // Floating Point Branch Conditional
++    FPBrcond,
++
++    // Floating Point Compare
++    FPCmp,
++
++    // Floating Point Conditional Moves
++    CMovFP_T,
++    CMovFP_F,
++    FSEL,
++
++    // FP-to-int truncation node.
++    TruncIntFP,
++
++    // Return
++    Ret,
++
++    // error trap Return
++    ERet,
++
++    // Software Exception Return.
++    EH_RETURN,
++
++    DBAR,
++
++    BSTRPICK,
++    BSTRINS,
++
++    // Vector comparisons.
++    // These take a vector and return a boolean.
++    VALL_ZERO,
++    VANY_ZERO,
++    VALL_NONZERO,
++    VANY_NONZERO,
++
++    // Vector Shuffle with mask as an operand
++    VSHF,    // Generic shuffle
++    SHF,     // 4-element set shuffle.
++    VPACKEV, // Interleave even elements
++    VPACKOD, // Interleave odd elements
++    VILVH,   // Interleave left elements
++    VILVL,   // Interleave right elements
++    VPICKEV, // Pack even elements
++    VPICKOD, // Pack odd elements
++
++    // Vector Lane Copy
++    INSVE, // Copy element from one vector to another
++
++    // Combined (XOR (OR $a, $b), -1)
++    VNOR,
++
++    VROR,
++    VRORI,
++    XVPICKVE,
++    XVPERMI,
++    XVSHUF4I,
++    REVBD,
++
++    // Extended vector element extraction
++    VEXTRACT_SEXT_ELT,
++    VEXTRACT_ZEXT_ELT,
++
++    XVBROADCAST,
++    VBROADCAST,
++    VABSD,
++    UVABSD,
++  };
++
++  } // ene namespace LoongArchISD
++
++  //===--------------------------------------------------------------------===//
++  // TargetLowering Implementation
++  //===--------------------------------------------------------------------===//
++
++  class LoongArchTargetLowering : public TargetLowering  {
++  public:
++    explicit LoongArchTargetLowering(const LoongArchTargetMachine &TM,
++                                const LoongArchSubtarget &STI);
++
++    bool allowsMisalignedMemoryAccesses(
++        EVT VT, unsigned AS = 0, Align Alignment = Align(1),
++        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
++        bool *Fast = nullptr) const override;
++
++    /// Enable LSX support for the given integer type and Register
++    /// class.
++    void addLSXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
++
++    /// Enable LSX support for the given floating-point type and
++    /// Register class.
++    void addLSXFloatType(MVT::SimpleValueType Ty,
++                         const TargetRegisterClass *RC);
++
++    /// Enable LASX support for the given integer type and Register
++    /// class.
++    void addLASXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
++
++    /// Enable LASX support for the given floating-point type and
++    /// Register class.
++    void addLASXFloatType(MVT::SimpleValueType Ty,
++                          const TargetRegisterClass *RC);
++
++    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
++      return MVT::i32;
++    }
++
++    EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
++                            ISD::NodeType) const override;
++
++    bool isCheapToSpeculateCttz() const override;
++    bool isCheapToSpeculateCtlz() const override;
++
++    bool isLegalAddImmediate(int64_t) const override;
++
++    /// Return the correct alignment for the current calling convention.
++    Align getABIAlignmentForCallingConv(Type *ArgTy,
++                                        const DataLayout &DL) const override {
++      const Align ABIAlign = DL.getABITypeAlign(ArgTy);
++      if (ArgTy->isVectorTy())
++        return std::min(ABIAlign, Align(8));
++      return ABIAlign;
++    }
++
++    ISD::NodeType getExtendForAtomicOps() const override {
++      return ISD::SIGN_EXTEND;
++    }
++
++    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
++                                 unsigned Index) const override;
++
++    void LowerOperationWrapper(SDNode *N,
++                               SmallVectorImpl<SDValue> &Results,
++                               SelectionDAG &DAG) const override;
++
++    /// LowerOperation - Provide custom lowering hooks for some operations.
++    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
++
++    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
++                                    EVT VT) const override;
++
++    /// ReplaceNodeResults - Replace the results of node with an illegal result
++    /// type with new values built out of custom code.
++    ///
++    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
++                            SelectionDAG &DAG) const override;
++
++    /// getTargetNodeName - This method returns the name of a target specific
++    //  DAG node.
++    const char *getTargetNodeName(unsigned Opcode) const override;
++
++    /// getSetCCResultType - get the ISD::SETCC result ValueType
++    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
++                           EVT VT) const override;
++
++    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
++
++    MachineBasicBlock *
++    EmitInstrWithCustomInserter(MachineInstr &MI,
++                                MachineBasicBlock *MBB) const override;
++
++    bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override {
++      return false;
++    }
++
++    const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
++
++    void AdjustInstrPostInstrSelection(MachineInstr &MI,
++                                       SDNode *Node) const override;
++
++    void HandleByVal(CCState *, unsigned &, Align) const override;
++
++    Register getRegisterByName(const char* RegName, LLT VT,
++                               const MachineFunction &MF) const override;
++
++    /// If a physical register, this returns the register that receives the
++    /// exception address on entry to an EH pad.
++    Register
++    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
++      return ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0;
++    }
++
++    /// If a physical register, this returns the register that receives the
++    /// exception typeid on entry to a landing pad.
++    Register
++    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
++      return ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1;
++    }
++
++    bool isJumpTableRelative() const override {
++      return getTargetMachine().isPositionIndependent();
++    }
++
++   CCAssignFn *CCAssignFnForCall() const;
++
++   CCAssignFn *CCAssignFnForReturn() const;
++
++  private:
++    template <class NodeTy>
++    SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
++
++    /// This function fills Ops, which is the list of operands that will later
++    /// be used when a function call node is created. It also generates
++    /// copyToReg nodes to set up argument registers.
++    void getOpndList(SmallVectorImpl<SDValue> &Ops,
++                     std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
++                     bool IsPICCall, bool GlobalOrExternal, bool IsCallReloc,
++                     CallLoweringInfo &CLI, SDValue Callee, SDValue Chain,
++                     bool IsTailCall) const;
++
++    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
++
++    // Subtarget Info
++    const LoongArchSubtarget &Subtarget;
++    // Cache the ABI from the TargetMachine, we use it everywhere.
++    const LoongArchABIInfo &ABI;
++
++    // Create a TargetGlobalAddress node.
++    SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
++                          unsigned Flag) const;
++
++    // Create a TargetExternalSymbol node.
++    SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
++                          unsigned Flag) const;
++
++    // Create a TargetBlockAddress node.
++    SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
++                          unsigned Flag) const;
++
++    // Create a TargetJumpTable node.
++    SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
++                          unsigned Flag) const;
++
++    // Create a TargetConstantPool node.
++    SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
++                          unsigned Flag) const;
++
++    // Lower Operand helpers
++    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
++                            CallingConv::ID CallConv, bool isVarArg,
++                            const SmallVectorImpl<ISD::InputArg> &Ins,
++                            const SDLoc &dl, SelectionDAG &DAG,
++                            SmallVectorImpl<SDValue> &InVals,
++                            TargetLowering::CallLoweringInfo &CLI) const;
++
++    // Lower Operand specifics
++    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
++    /// Lower VECTOR_SHUFFLE into one of a number of instructions
++    /// depending on the indices in the shuffle.
++    SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
++    SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
++    SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
++    SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
++                                 bool IsSRA) const;
++    SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
++
++    /// isEligibleForTailCallOptimization - Check whether the call is eligible
++    /// for tail call optimization.
++    bool
++    isEligibleForTailCallOptimization(const CCState &CCInfo,
++                                      CallLoweringInfo &CLI, MachineFunction &MF,
++                                      unsigned NextStackOffset,
++                                      const LoongArchFunctionInfo &FI) const;
++
++    /// copyByValArg - Copy argument registers which were used to pass a byval
++    /// argument to the stack. Create a stack frame object for the byval
++    /// argument.
++    void copyByValRegs(SDValue Chain, const SDLoc &DL,
++                       std::vector<SDValue> &OutChains, SelectionDAG &DAG,
++                       const ISD::ArgFlagsTy &Flags,
++                       SmallVectorImpl<SDValue> &InVals,
++                       const Argument *FuncArg, unsigned FirstReg,
++                       unsigned LastReg, const CCValAssign &VA,
++                       LoongArchCCState &State) const;
++
++    /// passByValArg - Pass a byval argument in registers or on stack.
++    void passByValArg(SDValue Chain, const SDLoc &DL,
++                      std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
++                      SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
++                      MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg,
++                      unsigned FirstReg, unsigned LastReg,
++                      const ISD::ArgFlagsTy &Flags,
++                      const CCValAssign &VA) const;
++
++    /// writeVarArgRegs - Write variable function arguments passed in registers
++    /// to the stack. Also create a stack frame object for the first variable
++    /// argument.
++    void writeVarArgRegs(std::vector<SDValue> &OutChains, SDValue Chain,
++                         const SDLoc &DL, SelectionDAG &DAG,
++                         CCState &State) const;
++
++    SDValue
++    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
++                         const SmallVectorImpl<ISD::InputArg> &Ins,
++                         const SDLoc &dl, SelectionDAG &DAG,
++                         SmallVectorImpl<SDValue> &InVals) const override;
++
++    SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain,
++                           SDValue Arg, const SDLoc &DL, bool IsTailCall,
++                           SelectionDAG &DAG) const;
++
++    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
++                      SmallVectorImpl<SDValue> &InVals) const override;
++
++    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
++                        bool isVarArg,
++                        const SmallVectorImpl<ISD::OutputArg> &Outs,
++                        LLVMContext &Context) const override;
++
++    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
++                        const SmallVectorImpl<ISD::OutputArg> &Outs,
++                        const SmallVectorImpl<SDValue> &OutVals,
++                        const SDLoc &dl, SelectionDAG &DAG) const override;
++
++    bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
++
++    // Inline asm support
++    ConstraintType getConstraintType(StringRef Constraint) const override;
++
++    /// Examine constraint string and operand type and determine a weight value.
++    /// The operand object must already have been set up with the operand type.
++    ConstraintWeight getSingleConstraintMatchWeight(
++      AsmOperandInfo &info, const char *constraint) const override;
++
++    /// This function parses registers that appear in inline-asm constraints.
++    /// It returns pair (0, 0) on failure.
++    std::pair<unsigned, const TargetRegisterClass *>
++    parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
++
++    std::pair<unsigned, const TargetRegisterClass *>
++    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
++                                 StringRef Constraint, MVT VT) const override;
++
++    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
++    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
++    /// true it means one of the asm constraint of the inline asm instruction
++    /// being processed is 'm'.
++    void LowerAsmOperandForConstraint(SDValue Op,
++                                      std::string &Constraint,
++                                      std::vector<SDValue> &Ops,
++                                      SelectionDAG &DAG) const override;
++
++    unsigned
++    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
++      if (ConstraintCode == "R")
++        return InlineAsm::Constraint_R;
++      else if (ConstraintCode == "ZC")
++        return InlineAsm::Constraint_ZC;
++      else if (ConstraintCode == "ZB")
++        return InlineAsm::Constraint_ZB;
++      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
++    }
++
++    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
++                               Type *Ty, unsigned AS,
++                               Instruction *I = nullptr) const override;
++
++    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
++
++    EVT getOptimalMemOpType(const MemOp &Op,
++                            const AttributeList &FuncAttributes) const override;
++
++    /// isFPImmLegal - Returns true if the target can instruction select the
++    /// specified FP immediate natively. If false, the legalizer will
++    /// materialize the FP immediate as a load from a constant pool.
++    bool isFPImmLegal(const APFloat &Imm, EVT VT,
++                      bool ForCodeSize) const override;
++
++    bool useSoftFloat() const override;
++
++    bool shouldInsertFencesForAtomic(const Instruction *I) const override {
++      return isa<LoadInst>(I) || isa<StoreInst>(I);
++    }
++
++    /// Emit a sign-extension using sll/sra, seb, or seh appropriately.
++    MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI,
++                                                MachineBasicBlock *BB,
++                                                unsigned Size, unsigned DstReg,
++                                                unsigned SrcRec) const;
++
++    MachineBasicBlock *emitLoadAddress(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const;
++    MachineBasicBlock *emitAtomicBinary(MachineInstr &MI,
++                                        MachineBasicBlock *BB) const;
++    MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI,
++                                                MachineBasicBlock *BB,
++                                                unsigned Size) const;
++
++    MachineBasicBlock *emitXINSERT_B(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const;
++    MachineBasicBlock *emitINSERT_H_VIDX(MachineInstr &MI,
++                                         MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI,
++                                         MachineBasicBlock *BB) const;
++    MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI,
++                                                 MachineBasicBlock *BB,
++                                                 unsigned Size) const;
++    MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB,
++                                        bool isFPCmp, unsigned Opc) const;
++
++    /// SE
++    MachineBasicBlock *emitLSXCBranchPseudo(MachineInstr &MI,
++                                            MachineBasicBlock *BB,
++                                            unsigned BranchOp) const;
++    /// Emit the COPY_FW pseudo instruction
++    MachineBasicBlock *emitCOPY_FW(MachineInstr &MI,
++                                   MachineBasicBlock *BB) const;
++    /// Emit the COPY_FD pseudo instruction
++    MachineBasicBlock *emitCOPY_FD(MachineInstr &MI,
++                                   MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXCOPY_FW(MachineInstr &MI,
++                                    MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXCOPY_FD(MachineInstr &MI,
++                                    MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitCONCAT_VECTORS(MachineInstr &MI,
++                                          MachineBasicBlock *BB,
++                                          unsigned Bytes) const;
++
++    MachineBasicBlock *emitXCOPY_FW_GPR(MachineInstr &MI,
++                                        MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB,
++                                      unsigned EltSizeInBytes) const;
++
++    MachineBasicBlock *emitXINSERT_FW(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const;
++
++    /// Emit the INSERT_FW pseudo instruction
++    MachineBasicBlock *emitINSERT_FW(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const;
++    /// Emit the INSERT_FD pseudo instruction
++    MachineBasicBlock *emitINSERT_FD(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXINSERT_FD(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXINSERT_DF_VIDX(MachineInstr &MI,
++                                           MachineBasicBlock *BB,
++                                           bool IsGPR64) const;
++    /// Emit the FILL_FW pseudo instruction
++    MachineBasicBlock *emitFILL_FW(MachineInstr &MI,
++                                   MachineBasicBlock *BB) const;
++    /// Emit the FILL_FD pseudo instruction
++    MachineBasicBlock *emitFILL_FD(MachineInstr &MI,
++                                   MachineBasicBlock *BB) const;
++
++    MachineBasicBlock *emitXFILL_FW(MachineInstr &MI,
++                                    MachineBasicBlock *BB) const;
++    MachineBasicBlock *emitXFILL_FD(MachineInstr &MI,
++                                    MachineBasicBlock *BB) const;
++  };
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
+diff --git a/lib/Target/LoongArch/LoongArchInstrFormats.td b/lib/Target/LoongArch/LoongArchInstrFormats.td
+new file mode 100644
+index 00000000..d75d5198
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchInstrFormats.td
+@@ -0,0 +1,790 @@
++//===-- LoongArchInstrFormats.td - LoongArch Instruction Formats -----*- tablegen -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++//  Describe LoongArch instructions format
++//
++//  CPU INSTRUCTION FORMATS
++//
++//  opcode  - operation code.
++//  rs      - src reg.
++//  rt      - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
++//  rd      - dst reg, only used on 3 regs instr.
++//  shamt   - only used on shift instructions, contains the shift amount.
++//  funct   - combined with opcode field give us an operation code.
++//
++//===----------------------------------------------------------------------===//
++
++class StdArch {
++
++  bits<32> Inst;
++}
++
++// Format specifies the encoding used by the instruction.  This is part of the
++// ad-hoc solution used to emit machine instruction encodings by our machine
++// code emitter.
++class Format<bits<4> val> {
++  bits<4> Value = val;
++}
++
++def Pseudo    : Format<0>;
++def FrmR      : Format<1>;
++def FrmI      : Format<2>;
++def FrmJ      : Format<3>;
++def FrmFR     : Format<4>;
++def FrmFI     : Format<5>;
++def FrmOther  : Format<6>;
++
++// Generic LoongArch Format
++class InstLA<dag outs, dag ins, string asmstr, list<dag> pattern, Format f>
++    : Instruction
++{
++  field bits<32> Inst;
++  Format Form = f;
++
++  let Namespace = "LoongArch";
++
++  let Size = 4;
++
++  let OutOperandList = outs;
++  let InOperandList  = ins;
++  let AsmString   = asmstr;
++  let Pattern     = pattern;
++
++  //
++  // Attributes specific to LoongArch instructions...
++  //
++  bits<4> FormBits     = Form.Value;
++  bit isCTI            = 0; // Any form of Control Transfer Instruction.
++                            // Required for LoongArch
++  bit hasForbiddenSlot = 0; // Instruction has a forbidden slot.
++  bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
++                            // ($pc) and with explicit offset and destination
++                            // register
++  bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register
++
++  // TSFlags layout should be kept in sync with MCTargetDesc/LoongArchBaseInfo.h.
++  let TSFlags{3-0}   = FormBits;
++  let TSFlags{4}     = isCTI;
++  let TSFlags{5}     = hasForbiddenSlot;
++  let TSFlags{6}     = IsPCRelativeLoad;
++  let TSFlags{7}     = hasFCCRegOperand;
++
++  let DecoderNamespace = "LoongArch";
++
++  field bits<32> SoftFail = 0;
++}
++
++class InstForm<dag outs, dag ins, string asmstr, list<dag> pattern,
++               Format f, string opstr = ""> :
++  InstLA<outs, ins, asmstr, pattern, f> {
++  string BaseOpcode = opstr;
++  string Arch;
++}
++
++class LoongArch_str<string opstr> {
++  string Arch;
++  string BaseOpcode = opstr;
++}
++
++//===-----------------------------------------------------------===//
++// Format instruction classes in the LoongArch
++//===-----------------------------------------------------------===//
++
++// R2 classes: 2 registers
++//
++class R2 : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R2I<bits<5> op>
++    : R2 {
++  let Inst{31-15} = 0x0;
++  let Inst{14-10} = op;
++}
++
++class R2F<bits<10> op>
++    : R2 {
++  bits<5> fj;
++  bits<5> fd;
++
++  let Inst{31-20} = 0x11;
++  let Inst{19-10} = op;
++  let Inst{9-5} = fj;
++  let Inst{4-0} = fd;
++}
++
++class MOVFI<bits<10> op>
++    : R2 {
++  bits<5> rj;
++  bits<5> fd;
++
++  let Inst{31-20} = 0x11;
++  let Inst{19-10} = op;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = fd;
++}
++
++class MOVIF<bits<10> op>
++    : R2 {
++  bits<5> fj;
++  bits<5> rd;
++
++  let Inst{31-20} = 0x11;
++  let Inst{19-10} = op;
++  let Inst{9-5} = fj;
++  let Inst{4-0} = rd;
++}
++
++class R2P<bits<3> op>
++    : R2 {
++  let Inst{31-13} = 0x3240;
++  let Inst{12-10} = op;
++}
++
++class R2_CSR<bits<8> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<14> csr;
++
++  let Inst{31-24} = op;
++  let Inst{23-10} = csr;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R2_SI16<bits<6> op>
++    : StdArch {
++  bits<5> rd;
++  bits<5> rj;
++  bits<16> si16;
++
++  let Inst{31-26} = op;
++  let Inst{25-10} = si16;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R2_COND<bits<2> op, bits<5> cond>
++    : StdArch {
++  bits<5> fj;
++  bits<5> fk;
++  bits<3> cd;
++
++  let Inst{31-22} = 0x30;
++  let Inst{21-20} = op;
++  let Inst{19-15} = cond;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-3} = 0b00;
++  let Inst{2-0} = cd;
++}
++
++class R2_LEVEL<bits<14> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<8> level;
++
++  let Inst{31-18} = op;
++  let Inst{17-10} = level;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class IMM32<bits<6> op>
++    : StdArch {
++  let Inst{31-16} = 0x0648;
++  let Inst{15-10} = op;
++  let Inst{9-0} = 0;
++}
++
++class WAIT_FM : StdArch {
++  bits<15> hint;
++
++  let Inst{31-15} = 0xc91;
++  let Inst{14-0} = hint;
++}
++
++class R2_INVTLB : StdArch {
++  bits<5> rj;
++  bits<5> op;
++  bits<5> rk;
++
++  let Inst{31-15} = 0xc93;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = op;
++}
++
++class BAR_FM<bits<1> op>
++    : StdArch {
++  bits<15> hint;
++
++  let Inst{31-16} = 0x3872;
++  let Inst{15} = op;
++  let Inst{14-0} = hint;
++}
++
++class PRELD_FM : StdArch {
++  bits<5> rj;
++  bits<5> hint;
++  bits<12> imm12;
++
++  let Inst{31-22} = 0xab;
++  let Inst{21-10} = imm12;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = hint;
++}
++
++// R3 classes: 3 registers
++//
++class R3 : StdArch {
++  bits<5> rk;
++  bits<5> rj;
++  bits<5> rd;
++
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R3I<bits<7> op>
++    : R3 {
++  let Inst{31-22} = 0x0;
++  let Inst{21-15} = op;
++}
++
++class R3F<bits<6> op>
++    : R3 {
++  bits<5> fk;
++  bits<5> fj;
++  bits<5> fd;
++
++  let Inst{31-21} = 0x8;
++  let Inst{20-15} = op;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-0} = fd;
++}
++
++class R3MI<bits<8> op>
++    : R3 {
++  let Inst{31-23} = 0x70;
++  let Inst{22-15} = op;
++}
++
++class AM<bits<6> op> : StdArch {
++  bits<5> rk;
++  bits<17> addr; // rj + 12 bits offset 0
++  bits<5> rd;
++
++  let Inst{31-21} = 0x1c3;
++  let Inst{20-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = rd;
++}
++
++class R3MF<bits<8> op>
++    : R3 {
++  bits<5> fd;
++
++  let Inst{31-23} = 0x70;
++  let Inst{22-15} = op;
++  let Inst{4-0} = fd;
++}
++
++class R3_SA2<bits<5> op>
++    : StdArch {
++  bits<5> rk;
++  bits<5> rj;
++  bits<5> rd;
++  bits<2> sa;
++
++  let Inst{31-22} = 0x0;
++  let Inst{21-17} = op;
++  let Inst{16-15} = sa;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R3_SA3 : StdArch {
++  bits<5> rk;
++  bits<5> rj;
++  bits<5> rd;
++  bits<3> sa;
++
++  let Inst{31-18} = 3;
++  let Inst{17-15} = sa;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++// R4 classes: 4 registers
++//
++class R4MUL<bits<4> op>
++    : StdArch {
++  bits<5> fa;
++  bits<5> fk;
++  bits<5> fj;
++  bits<5> fd;
++
++  let Inst{31-24} = 0x8;
++  let Inst{23-20} = op;
++  let Inst{19-15} = fa;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-0} = fd;
++}
++
++class R4CMP<bits<2> op>
++    : StdArch {
++  bits<5> cond;
++  bits<5> fk;
++  bits<5> fj;
++  bits<3> cd;
++
++  let Inst{31-22} = 0x30;
++  let Inst{21-20} = op;
++  let Inst{19-15} = cond;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-3} = 0;
++  let Inst{2-0} = cd;
++}
++
++class R4SEL : StdArch {
++  bits<3> ca;
++  bits<5> fk;
++  bits<5> fj;
++  bits<5> fd;
++
++  let Inst{31-18} = 0x340;
++  let Inst{17-15} = ca;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-0} = fd;
++}
++
++// R2_IMM5 classes: 2registers and 1 5bit-immediate
++//
++class R2_IMM5<bits<2> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<5> imm5;
++
++  let Inst{31-20} = 0x4;
++  let Inst{19-18} = op;
++  let Inst{17-15} = 0x1;
++  let Inst{14-10} = imm5;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++// R2_IMM6 classes: 2registers and 1 6bit-immediate
++//
++class R2_IMM6<bits<2> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<6> imm6;
++
++  let Inst{31-20} = 0x4;
++  let Inst{19-18} = op;
++  let Inst{17-16} = 0x1;
++  let Inst{15-10} = imm6;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++// R2_IMM12 classes: 2 registers and 1 12bit-immediate
++//
++class LOAD_STORE<bits<4> op>
++    : StdArch {
++  bits<5> rd;
++  bits<17> addr;
++
++  let Inst{31-26} = 0xa;
++  let Inst{25-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = rd;
++}
++// for reloc
++class LOAD_STORE_RRI<bits<4> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<12> imm12;
++
++  let Inst{31-26} = 0xa;
++  let Inst{25-22} = op;
++  let Inst{21-10} = imm12;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++
++class R2_IMM12<bits<3> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<12> imm12;
++
++  let Inst{31-25} = 0x1;
++  let Inst{24-22} = op;
++  let Inst{21-10} = imm12;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class LEA_ADDI_FM<bits<3> op>
++    : StdArch {
++  bits<5> rd;
++  bits<17> addr;
++
++  let Inst{31-25} = 0x1;
++  let Inst{24-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = rd;
++}
++
++// R2_IMM14 classes: 2 registers and 1 14bit-immediate
++//
++class LL_SC<bits<3> op>
++    : StdArch {
++  bits<5> rd;
++  bits<19> addr;
++
++  let Inst{31-27} = 4;
++  let Inst{26-24} = op;
++  let Inst{23-10} = addr{13-0};
++  let Inst{9-5} = addr{18-14};
++  let Inst{4-0} = rd;
++}
++
++// R2_IMM16 classes: 2 registers and 1 16bit-immediate
++//
++class R2_IMM16BEQ<bits<6> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<16> offs16;
++
++  let Inst{31-26} = op;
++  let Inst{25-10} = offs16;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class R2_IMM16JIRL : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<16> offs16;
++
++  let Inst{31-26} = 0x13;
++  let Inst{25-10} = offs16;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++// R1_IMM21 classes: 1 registers and 1 21bit-immediate
++//
++class R1_IMM21BEQZ<bits<6> op>
++    : StdArch {
++  bits<5> rj;
++  bits<21> offs21;
++
++  let Inst{31-26} = op;
++  let Inst{25-10} = offs21{15-0};
++  let Inst{9-5} = rj;
++  let Inst{4-0} = offs21{20-16};
++}
++
++class R1_CSR<bits<13> op>
++    : StdArch {
++  bits<5> rd;
++  bits<14> csr;
++
++  let Inst{31-24} = op{7-0};
++  let Inst{23-10} = csr;
++  let Inst{9-5} = op{12-8};
++  let Inst{4-0} = rd;
++}
++
++class R1_SI20<bits<7> op>
++    : StdArch {
++  bits<5> rd;
++  bits<20> si20;
++
++  let Inst{31-25} = op;
++  let Inst{24-5} = si20;
++  let Inst{4-0} = rd;
++}
++
++class R1_CACHE : StdArch {
++  bits<5> rj;
++  bits<5> op;
++  bits<12> si12;
++
++  let Inst{31-22} = 0x18;
++  let Inst{21-10} = si12;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = op;
++}
++
++class R1_SEQ<bits<14> op>
++    : StdArch {
++  bits<5> rj;
++  bits<5> offset;
++  bits<8> seq;
++
++  let Inst{31-18} = op;
++  let Inst{17-10} = seq;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = 0b00000;
++}
++
++class R1_BCEQZ<bits<2> op>
++    : StdArch {
++  bits<21> offset;
++  bits<3> cj;
++
++  let Inst{31-26} = 0x12;
++  let Inst{25-10} = offset{15-0};
++  let Inst{9-8} = op;
++  let Inst{7-5} = cj;
++  let Inst{4-0} = offset{20-16};
++}
++
++// IMM26 classes: 1 26bit-immediate
++//
++class IMM26B<bits<6> op>
++    : StdArch {
++  bits<26> offs26;
++
++  let Inst{31-26} = op;
++  let Inst{25-10} = offs26{15-0};
++  let Inst{9-0} = offs26{25-16};
++}
++
++// LoongArch Pseudo Instructions Format
++class LoongArchPseudo<dag outs, dag ins, list<dag> pattern> :
++  InstLA<outs, ins, "", pattern, Pseudo> {
++  let isCodeGenOnly = 1;
++  let isPseudo = 1;
++}
++
++// Pseudo-instructions for alternate assembly syntax (never used by codegen).
++// These are aliases that require C++ handling to convert to the target
++// instruction, while InstAliases can be handled directly by tblgen.
++class LoongArchAsmPseudoInst<dag outs, dag ins, string asmstr>:
++  InstLA<outs, ins, asmstr, [], Pseudo> {
++  let isPseudo = 1;
++  let Pattern = [];
++}
++
++//
++// Misc instruction classes
++class ASSERT<bits<2> op>
++    : StdArch {
++  bits<5> rk;
++  bits<5> rj;
++
++  let Inst{31-17} = 0x0;
++  let Inst{16-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = 0x0;
++}
++
++class CODE15<bits<7> op>
++    : StdArch {
++  bits<15> Code;
++
++  let Inst{31-22} = 0x0;
++  let Inst{21-15} = op;
++  let Inst{14-0} = Code;
++}
++
++class INSERT_BIT32<bits<1> op>
++    : StdArch {
++  bits<5> msbw;
++  bits<5> lsbw;
++  bits<5> rj;
++  bits<5> rd;
++
++  let Inst{31-21} = 0x3;
++  let Inst{20-16} = msbw;
++  let Inst{15} = op;
++  let Inst{14-10} = lsbw;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class INSERT_BIT64<bits<1> op>
++    : StdArch {
++  bits<6> msbd;
++  bits<6> lsbd;
++  bits<5> rj;
++  bits<5> rd;
++
++  let Inst{31-23} = 0x1;
++  let Inst{22} = op;
++  let Inst{21-16} = msbd;
++  let Inst{15-10} = lsbd;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class MOVGPR2FCSR: StdArch {
++  bits<5> fcsr;
++  bits<5> rj;
++
++  let Inst{31-10} = 0x4530;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = fcsr;
++}
++
++class MOVFCSR2GPR: StdArch {
++  bits<5> fcsr;
++  bits<5> rd;
++
++  let Inst{31-10} = 0x4532;
++  let Inst{9-5} = fcsr;
++  let Inst{4-0} = rd;
++}
++
++class MOVFGR2FCFR: StdArch {
++  bits<3> cd;
++  bits<5> fj;
++
++  let Inst{31-10} = 0x4534;
++  let Inst{9-5} = fj;
++  let Inst{4-3} = 0;
++  let Inst{2-0} = cd;
++}
++
++class MOVFCFR2FGR: StdArch {
++  bits<3> cj;
++  bits<5> fd;
++
++  let Inst{31-10} = 0x4535;
++  let Inst{9-8} = 0;
++  let Inst{7-5} = cj;
++  let Inst{4-0} = fd;
++}
++
++class MOVGPR2FCFR: StdArch {
++  bits<3> cd;
++  bits<5> rj;
++
++  let Inst{31-10} = 0x4536;
++  let Inst{9-5} = rj;
++  let Inst{4-3} = 0;
++  let Inst{2-0} = cd;
++}
++
++class MOVFCFR2GPR: StdArch {
++  bits<3> cj;
++  bits<5> rd;
++
++  let Inst{31-10} = 0x4537;
++  let Inst{9-8} = 0;
++  let Inst{7-5} = cj;
++  let Inst{4-0} = rd;
++}
++
++class LoongArchInst : InstLA<(outs), (ins), "", [], FrmOther> {
++}
++class JMP_OFFS_2R<bits<6> op> : LoongArchInst {
++  bits<5>  rs;
++  bits<5>  rd;
++  bits<16> offset;
++
++  bits<32> Inst;
++
++  let Inst{31-26} = op;
++  let Inst{25-10} = offset;
++  let Inst{9-5} = rs;
++  let Inst{4-0}  = rd;
++}
++
++class FJ<bits<6> op> : StdArch
++{
++  bits<26> target;
++
++  let Inst{31-26} = op;
++  let Inst{25-10}  = target{15-0};
++  let Inst{9-0}  = target{25-16};
++}
++
++class LUI_FM : StdArch {
++  bits<5> rt;
++  bits<16> imm16;
++
++  let Inst{31-26} = 0xf;
++  let Inst{25-21} = 0;
++  let Inst{20-16} = rt;
++  let Inst{15-0}  = imm16;
++}
++
++class  R2_IMM12M_STD<bits<4> op> : StdArch {
++  bits<5> rj;
++  bits<5> rd;
++  bits<12> imm12;
++
++  let Inst{31-26} = 0xa;
++  let Inst{25-22} = op;
++  let Inst{21-10} = imm12;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class LLD_2R<bits<3> Code> : LoongArchInst {
++  bits<5> rd;
++  bits<19> addr;
++  bits<5> rj = addr{18-14};
++  bits<14> offset = addr{13-0};
++
++  bits<32> Inst;
++
++  let Inst{31-27} = 0x4;
++  let Inst{26-24} = Code;
++  let Inst{23-10} = offset;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = rd;
++}
++
++class CEQS_FM<bits<2> op> {
++  bits<5> fj;
++  bits<5> fk;
++  bits<3> cd;
++  bits<5> cond;
++
++  bits<32> Inst;
++
++  let Inst{31-22} = 0x30;
++  let Inst{21-20} = op;
++  let Inst{19-15} = cond;
++  let Inst{14-10} = fk;
++  let Inst{9-5} = fj;
++  let Inst{4-3} = 0b00;
++  let Inst{2-0} = cd;
++}
++
+diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+new file mode 100644
+index 00000000..3c6b3334
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+@@ -0,0 +1,1040 @@
++//===- LoongArchInstrInfo.cpp - LoongArch Instruction Information -------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetInstrInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchInstrInfo.h"
++#include "LoongArchSubtarget.h"
++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/RegisterScavenging.h"
++#include "llvm/CodeGen/TargetOpcodes.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/DebugLoc.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/Target/TargetMachine.h"
++#include <cassert>
++
++using namespace llvm;
++
++#define GET_INSTRINFO_CTOR_DTOR
++#include "LoongArchGenInstrInfo.inc"
++
++// Pin the vtable to this file.
++void LoongArchInstrInfo::anchor() {}
++LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI)
++    : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN,
++                            LoongArch::ADJCALLSTACKUP),
++    RI(), Subtarget(STI) {}
++
++const LoongArchRegisterInfo &LoongArchInstrInfo::getRegisterInfo() const {
++  return RI;
++}
++
++/// isLoadFromStackSlot - If the specified machine instruction is a direct
++/// load from a stack slot, return the virtual or physical register number of
++/// the destination along with the FrameIndex of the loaded stack slot.  If
++/// not, return 0.  This predicate must return 0 if the instruction has
++/// any side effects other than loading from the stack slot.
++unsigned LoongArchInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
++                                                 int &FrameIndex) const {
++  unsigned Opc = MI.getOpcode();
++  if ((Opc == LoongArch::LD_W)   || (Opc == LoongArch::LD_D) ||
++      (Opc == LoongArch::FLD_S) || (Opc == LoongArch::FLD_D)) {
++    if ((MI.getOperand(1).isFI()) &&  // is a stack slot
++        (MI.getOperand(2).isImm()) && // the imm is zero
++        (isZeroImm(MI.getOperand(2)))) {
++      FrameIndex = MI.getOperand(1).getIndex();
++      return MI.getOperand(0).getReg();
++    }
++  }
++  return 0;
++}
++
++/// isStoreToStackSlot - If the specified machine instruction is a direct
++/// store to a stack slot, return the virtual or physical register number of
++/// the source reg along with the FrameIndex of the loaded stack slot.  If
++/// not, return 0.  This predicate must return 0 if the instruction has
++/// any side effects other than storing to the stack slot.
++unsigned LoongArchInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
++                                                int &FrameIndex) const {
++  unsigned Opc = MI.getOpcode();
++  if ((Opc == LoongArch::ST_D) || (Opc == LoongArch::ST_W) ||
++      (Opc == LoongArch::FST_S) ||(Opc == LoongArch::FST_D)) {
++    if ((MI.getOperand(1).isFI()) &&  // is a stack slot
++        (MI.getOperand(2).isImm()) && // the imm is zero
++        (isZeroImm(MI.getOperand(2)))) {
++      FrameIndex = MI.getOperand(1).getIndex();
++      return MI.getOperand(0).getReg();
++    }
++  }
++  return 0;
++}
++
++void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
++                                     MachineBasicBlock::iterator I,
++                                     const DebugLoc &DL, MCRegister DestReg,
++                                     MCRegister SrcReg, bool KillSrc) const {
++  unsigned Opc = 0, ZeroReg = 0;
++  unsigned ZeroImm = 1;
++  if (LoongArch::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg.
++    if (LoongArch::GPR32RegClass.contains(SrcReg)) {
++      Opc = LoongArch::OR32, ZeroReg = LoongArch::ZERO;
++    }
++    else if (LoongArch::FGR32RegClass.contains(SrcReg))
++      Opc = LoongArch::MOVFR2GR_S;
++    else if (LoongArch::FCFRRegClass.contains(SrcReg))
++      Opc = LoongArch::MOVCF2GR;
++  }
++  else if (LoongArch::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg.
++    if (LoongArch::FGR32RegClass.contains(DestReg))
++      Opc = LoongArch::MOVGR2FR_W;
++    else if (LoongArch::FCFRRegClass.contains(DestReg))
++      Opc = LoongArch::MOVGR2CF;
++  }
++  else if (LoongArch::FGR32RegClass.contains(DestReg, SrcReg))
++    Opc = LoongArch::FMOV_S;
++  else if (LoongArch::FGR64RegClass.contains(DestReg, SrcReg))
++    Opc = LoongArch::FMOV_D;
++  else if (LoongArch::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
++    if (LoongArch::GPR64RegClass.contains(SrcReg))
++      Opc = LoongArch::OR, ZeroReg = LoongArch::ZERO_64;
++    else if (LoongArch::FGR64RegClass.contains(SrcReg))
++      Opc = LoongArch::MOVFR2GR_D;
++    else if (LoongArch::FCFRRegClass.contains(SrcReg))
++      Opc = LoongArch::MOVCF2GR;
++  }
++  else if (LoongArch::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
++    if (LoongArch::FGR64RegClass.contains(DestReg))
++      Opc = LoongArch::MOVGR2FR_D;
++    else if (LoongArch::FCFRRegClass.contains(DestReg))
++      Opc = LoongArch::MOVGR2CF;
++  }
++  else if (LoongArch::FGR32RegClass.contains(DestReg)) // Copy to FGR32 Reg
++      Opc = LoongArch::MOVCF2FR;
++  else if (LoongArch::FGR32RegClass.contains(SrcReg))  // Copy from FGR32 Reg
++      Opc = LoongArch::MOVFR2CF;
++  else if (LoongArch::FGR64RegClass.contains(DestReg)) // Copy to FGR64 Reg
++      Opc = LoongArch::MOVCF2FR;
++  else if (LoongArch::FGR64RegClass.contains(SrcReg))  // Copy from FGR64 Reg
++      Opc = LoongArch::MOVFR2CF;
++  else if (LoongArch::LSX128BRegClass.contains(DestReg)) { // Copy to LSX reg
++    if (LoongArch::LSX128BRegClass.contains(SrcReg))
++      Opc = LoongArch::VORI_B, ZeroImm = 0;
++  } else if (LoongArch::LASX256BRegClass.contains(
++                 DestReg)) { // Copy to LASX reg
++    if (LoongArch::LASX256BRegClass.contains(SrcReg))
++      Opc = LoongArch::XVORI_B, ZeroImm = 0;
++  }
++
++  assert(Opc && "Cannot copy registers");
++
++  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
++
++  if (DestReg)
++    MIB.addReg(DestReg, RegState::Define);
++
++  if (SrcReg)
++    MIB.addReg(SrcReg, getKillRegState(KillSrc));
++
++  if (ZeroReg)
++    MIB.addReg(ZeroReg);
++
++  if (!ZeroImm)
++    MIB.addImm(0);
++}
++
++static bool isORCopyInst(const MachineInstr &MI) {
++  switch (MI.getOpcode()) {
++  default:
++    break;
++  case LoongArch::OR:
++    if (MI.getOperand(2).getReg() == LoongArch::ZERO_64)
++      return true;
++    break;
++  case LoongArch::OR32:
++    if (MI.getOperand(2).getReg() == LoongArch::ZERO)
++      return true;
++    break;
++  }
++  return false;
++}
++
++/// We check for the common case of 'or', as it's LoongArch' preferred instruction
++/// for GPRs but we have to check the operands to ensure that is the case.
++/// Other move instructions for LoongArch are directly identifiable.
++Optional<DestSourcePair>
++LoongArchInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
++  if (MI.isMoveReg() || isORCopyInst(MI)) {
++    return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
++  }
++  return None;
++}
++
++void LoongArchInstrInfo::
++storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
++                Register SrcReg, bool isKill, int FI,
++                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
++                int64_t Offset) const {
++  DebugLoc DL;
++  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
++
++  unsigned Opc = 0;
++  if (LoongArch::GPR32RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::ST_W;
++  else if (LoongArch::GPR64RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::ST_D;
++  else if (LoongArch::FGR64RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::FST_D;
++  else if (LoongArch::FGR32RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::FST_S;
++
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8))
++    Opc = LoongArch::VST;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16))
++    Opc = LoongArch::VST_H;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v4f32))
++    Opc = LoongArch::VST_W;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v2f64))
++    Opc = LoongArch::VST_D;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8))
++    Opc = LoongArch::XVST;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16))
++    Opc = LoongArch::XVST_H;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v8f32))
++    Opc = LoongArch::XVST_W;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v4f64))
++    Opc = LoongArch::XVST_D;
++
++  assert(Opc && "Register class not handled!");
++  BuildMI(MBB, I, DL, get(Opc))
++      .addReg(SrcReg, getKillRegState(isKill))
++      .addFrameIndex(FI)
++      .addImm(Offset)
++      .addMemOperand(MMO);
++}
++
++void LoongArchInstrInfo::
++loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
++                 Register DestReg, int FI, const TargetRegisterClass *RC,
++                 const TargetRegisterInfo *TRI, int64_t Offset) const {
++  DebugLoc DL;
++  if (I != MBB.end())
++    DL = I->getDebugLoc();
++  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
++  unsigned Opc = 0;
++
++  if (LoongArch::GPR32RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::LD_W;
++  else if (LoongArch::GPR64RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::LD_D;
++  else if (LoongArch::FGR32RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::FLD_S;
++  else if (LoongArch::FGR64RegClass.hasSubClassEq(RC))
++    Opc = LoongArch::FLD_D;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8))
++    Opc = LoongArch::VLD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16))
++    Opc = LoongArch::VLD_H;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v4f32))
++    Opc = LoongArch::VLD_W;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v2f64))
++    Opc = LoongArch::VLD_D;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8))
++    Opc = LoongArch::XVLD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16))
++    Opc = LoongArch::XVLD_H;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v8f32))
++    Opc = LoongArch::XVLD_W;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::v4f64))
++    Opc = LoongArch::XVLD_D;
++
++  assert(Opc && "Register class not handled!");
++
++  BuildMI(MBB, I, DL, get(Opc), DestReg)
++      .addFrameIndex(FI)
++      .addImm(Offset)
++      .addMemOperand(MMO);
++}
++
++bool LoongArchInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
++  MachineBasicBlock &MBB = *MI.getParent();
++  switch (MI.getDesc().getOpcode()) {
++  default:
++    return false;
++  case LoongArch::RetRA:
++    expandRetRA(MBB, MI);
++    break;
++  case LoongArch::ERet:
++    expandERet(MBB, MI);
++    break;
++  case LoongArch::PseudoFFINT_S_W:
++    expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_W, LoongArch::MOVGR2FR_W, false);
++    break;
++  case LoongArch::PseudoFFINT_S_L:
++    expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_L, LoongArch::MOVGR2FR_D, true);
++    break;
++  case LoongArch::PseudoFFINT_D_W:
++    expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_W, LoongArch::MOVGR2FR_W, true);
++    break;
++  case LoongArch::PseudoFFINT_D_L:
++    expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_L, LoongArch::MOVGR2FR_D, true);
++    break;
++  case LoongArch::LoongArcheh_return32:
++  case LoongArch::LoongArcheh_return64:
++    expandEhReturn(MBB, MI);
++    break;
++  }
++
++  MBB.erase(MI);
++  return true;
++}
++
++/// getOppositeBranchOpc - Return the inverse of the specified
++/// opcode, e.g. turning BEQ to BNE.
++unsigned LoongArchInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
++  switch (Opc) {
++  default:                 llvm_unreachable("Illegal opcode!");
++  case LoongArch::BEQ32:   return LoongArch::BNE32;
++  case LoongArch::BEQ:     return LoongArch::BNE;
++  case LoongArch::BNE32:   return LoongArch::BEQ32;
++  case LoongArch::BNE:     return LoongArch::BEQ;
++  case LoongArch::BEQZ32:  return LoongArch::BNEZ32;
++  case LoongArch::BEQZ:    return LoongArch::BNEZ;
++  case LoongArch::BNEZ32:  return LoongArch::BEQZ32;
++  case LoongArch::BNEZ:    return LoongArch::BEQZ;
++  case LoongArch::BCEQZ:   return LoongArch::BCNEZ;
++  case LoongArch::BCNEZ:   return LoongArch::BCEQZ;
++  case LoongArch::BLT32:   return LoongArch::BGE32;
++  case LoongArch::BLT:     return LoongArch::BGE;
++  case LoongArch::BGE32:   return LoongArch::BLT32;
++  case LoongArch::BGE:     return LoongArch::BLT;
++  case LoongArch::BLTU32:  return LoongArch::BGEU32;
++  case LoongArch::BLTU:    return LoongArch::BGEU;
++  case LoongArch::BGEU32:  return LoongArch::BLTU32;
++  case LoongArch::BGEU:    return LoongArch::BLTU;
++  }
++}
++
++void LoongArchInstrInfo::adjustReg(unsigned DestReg, unsigned SrcReg,
++                                   int64_t Amount, MachineBasicBlock &MBB,
++                                   MachineBasicBlock::iterator I,
++                                   MachineInstr::MIFlag Flag) const {
++  LoongArchABIInfo ABI = Subtarget.getABI();
++  DebugLoc DL;
++  unsigned ADDI = ABI.GetPtrAddiOp();
++
++  if (Amount == 0)
++    return;
++
++  if (isInt<12>(Amount)) {
++    // addi $DestReg, $SrcReg, amount
++    BuildMI(MBB, I, DL, get(ADDI), DestReg)
++        .addReg(SrcReg)
++        .addImm(Amount)
++        .setMIFlag(Flag);
++  } else {
++    // For numbers which are not 12bit integers we synthesize Amount inline
++    // then add or subtract it from $SrcReg.
++    unsigned Opc = ABI.GetPtrAddOp();
++    if (Amount < 0) {
++      Opc = ABI.GetPtrSubOp();
++      Amount = -Amount;
++    }
++    unsigned Reg = loadImmediate(Amount, MBB, I, DL);
++    BuildMI(MBB, I, DL, get(Opc), DestReg)
++        .addReg(SrcReg)
++        .addReg(Reg, RegState::Kill)
++        .setMIFlag(Flag);
++  }
++}
++
++/// This function generates the sequence of instructions needed to get the
++/// result of adding register REG and immediate IMM.
++unsigned LoongArchInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
++                                           MachineBasicBlock::iterator II,
++                                           const DebugLoc &DL) const {
++  const TargetRegisterClass *RC = Subtarget.isABI_LP64()
++                                      ? &LoongArch::GPR64RegClass
++                                      : &LoongArch::GPR32RegClass;
++  LoongArchAnalyzeImmediate::InstSeq Seq =
++      LoongArchAnalyzeImmediate::generateInstSeq(Imm, Subtarget.is64Bit());
++  unsigned DstReg = MBB.getParent()->getRegInfo().createVirtualRegister(RC);
++  unsigned SrcReg =
++      Subtarget.isABI_LP64() ? LoongArch::ZERO_64 : LoongArch::ZERO;
++
++  // Build the instructions in Seq.
++  for (auto &Inst : Seq) {
++    if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32)
++      BuildMI(MBB, II, DL, get(Inst.Opc), DstReg).addImm(Inst.Imm);
++    else
++      BuildMI(MBB, II, DL, get(Inst.Opc), DstReg)
++          .addReg(SrcReg, RegState::Kill)
++          .addImm(Inst.Imm);
++    SrcReg = DstReg;
++  }
++  return DstReg;
++}
++
++unsigned LoongArchInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
++    return (Opc == LoongArch::B      || Opc == LoongArch::B32      ||
++            Opc == LoongArch::BEQZ   || Opc == LoongArch::BEQZ32   ||
++            Opc == LoongArch::BNEZ   || Opc == LoongArch::BNEZ32   ||
++            Opc == LoongArch::BCEQZ ||
++            Opc == LoongArch::BCNEZ ||
++            Opc == LoongArch::BEQ    || Opc == LoongArch::BEQ32    ||
++            Opc == LoongArch::BNE    || Opc == LoongArch::BNE32    ||
++            Opc == LoongArch::BLT    || Opc == LoongArch::BLT32    ||
++            Opc == LoongArch::BGE    || Opc == LoongArch::BGE32    ||
++            Opc == LoongArch::BLTU   || Opc == LoongArch::BLTU32   ||
++            Opc == LoongArch::BGEU   || Opc == LoongArch::BGEU32) ? Opc : 0;
++}
++
++void LoongArchInstrInfo::expandRetRA(MachineBasicBlock &MBB,
++                                  MachineBasicBlock::iterator I) const {
++
++  MachineInstrBuilder MIB;
++
++  if (Subtarget.is64Bit())
++    MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn64))
++              .addReg(LoongArch::RA_64, RegState::Undef);
++  else
++    MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn))
++              .addReg(LoongArch::RA, RegState::Undef);
++
++  // Retain any imp-use flags.
++  for (auto & MO : I->operands()) {
++    if (MO.isImplicit())
++      MIB.add(MO);
++  }
++}
++
++void LoongArchInstrInfo::expandERet(MachineBasicBlock &MBB,
++                                 MachineBasicBlock::iterator I) const {
++    BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::ERTN));
++}
++
++std::pair<bool, bool>
++LoongArchInstrInfo::compareOpndSize(unsigned Opc,
++                                 const MachineFunction &MF) const {
++  const MCInstrDesc &Desc = get(Opc);
++  assert(Desc.NumOperands == 2 && "Unary instruction expected.");
++  const LoongArchRegisterInfo *RI = &getRegisterInfo();
++  unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF));
++  unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF));
++
++  return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize);
++}
++
++void LoongArchInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
++                                     MachineBasicBlock::iterator I,
++                                     unsigned CvtOpc, unsigned MovOpc,
++                                     bool IsI64) const {
++  const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc);
++  const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1);
++  unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg;
++  unsigned KillSrc =  getKillRegState(Src.isKill());
++  DebugLoc DL = I->getDebugLoc();
++  bool DstIsLarger, SrcIsLarger;
++
++  std::tie(DstIsLarger, SrcIsLarger) =
++      compareOpndSize(CvtOpc, *MBB.getParent());
++
++  if (DstIsLarger)
++    TmpReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo);
++
++  if (SrcIsLarger)
++    DstReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo);
++
++  BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc);
++  BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill);
++}
++
++void LoongArchInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
++                                     MachineBasicBlock::iterator I) const {
++  // This pseudo instruction is generated as part of the lowering of
++  // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
++  // indirect jump to TargetReg
++  LoongArchABIInfo ABI = Subtarget.getABI();
++  unsigned ADD = ABI.GetPtrAddOp();
++  unsigned SP = Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP;
++  unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA;
++  unsigned T8 = Subtarget.is64Bit() ? LoongArch::T8_64 : LoongArch::T8;
++  unsigned ZERO = Subtarget.is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO;
++  unsigned OffsetReg = I->getOperand(0).getReg();
++  unsigned TargetReg = I->getOperand(1).getReg();
++
++  // add $ra, $v0, $zero
++  // add $sp, $sp, $v1
++  // jr   $ra (via RetRA)
++  const TargetMachine &TM = MBB.getParent()->getTarget();
++  if (TM.isPositionIndependent())
++    BuildMI(MBB, I, I->getDebugLoc(), get(ADD), T8)
++        .addReg(TargetReg)
++        .addReg(ZERO);
++  BuildMI(MBB, I, I->getDebugLoc(), get(ADD), RA)
++      .addReg(TargetReg)
++      .addReg(ZERO);
++  BuildMI(MBB, I, I->getDebugLoc(), get(ADD), SP).addReg(SP).addReg(OffsetReg);
++  expandRetRA(MBB, I);
++}
++
++
++bool LoongArchInstrInfo::isZeroImm(const MachineOperand &op) const {
++  return op.isImm() && op.getImm() == 0;
++}
++
++/// insertNoop - If data hazard condition is found insert the target nop
++/// instruction.
++// FIXME: This appears to be dead code.
++void LoongArchInstrInfo::
++insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
++{
++  DebugLoc DL;
++  BuildMI(MBB, MI, DL, get(LoongArch::NOP));
++}
++
++MachineMemOperand *
++LoongArchInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
++                             MachineMemOperand::Flags Flags) const {
++  MachineFunction &MF = *MBB.getParent();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++
++  return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
++                                 Flags, MFI.getObjectSize(FI),
++                                 MFI.getObjectAlign(FI));
++}
++
++//===----------------------------------------------------------------------===//
++// Branch Analysis
++//===----------------------------------------------------------------------===//
++
++void LoongArchInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
++                                  MachineBasicBlock *&BB,
++                                  SmallVectorImpl<MachineOperand> &Cond) const {
++  assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch");
++  int NumOp = Inst->getNumExplicitOperands();
++
++  // for both int and fp branches, the last explicit operand is the
++  // MBB.
++  BB = Inst->getOperand(NumOp-1).getMBB();
++  Cond.push_back(MachineOperand::CreateImm(Opc));
++
++  for (int i = 0; i < NumOp-1; i++)
++    Cond.push_back(Inst->getOperand(i));
++}
++
++bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
++                                  MachineBasicBlock *&TBB,
++                                  MachineBasicBlock *&FBB,
++                                  SmallVectorImpl<MachineOperand> &Cond,
++                                  bool AllowModify) const {
++  SmallVector<MachineInstr*, 2> BranchInstrs;
++  BranchType BT = analyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs);
++
++  return (BT == BT_None) || (BT == BT_Indirect);
++}
++
++MachineInstr *
++LoongArchInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
++                                const DebugLoc &DL,
++                                ArrayRef<MachineOperand> Cond) const {
++  unsigned Opc = Cond[0].getImm();
++  const MCInstrDesc &MCID = get(Opc);
++  MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
++
++  for (unsigned i = 1; i < Cond.size(); ++i) {
++    assert((Cond[i].isImm() || Cond[i].isReg()) &&
++           "Cannot copy operand for conditional branch!");
++    MIB.add(Cond[i]);
++  }
++  MIB.addMBB(TBB);
++  return MIB.getInstr();
++}
++
++unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB,
++                                          MachineBasicBlock *TBB,
++                                          MachineBasicBlock *FBB,
++                                          ArrayRef<MachineOperand> Cond,
++                                          const DebugLoc &DL,
++                                          int *BytesAdded) const {
++  unsigned UncondBrOpc = LoongArch::B;
++  // Shouldn't be a fall through.
++  assert(TBB && "insertBranch must not be told to insert a fallthrough");
++  if (BytesAdded)
++    *BytesAdded = 0;
++
++  // # of condition operands:
++  //  Unconditional branches: 0
++  //  Floating point branches: 1 (opc)
++  //  Int BranchZero: 2 (opc, reg)
++  //  Int Branch: 3 (opc, reg0, reg1)
++  assert((Cond.size() <= 3) &&
++         "# of LoongArch branch conditions must be <= 3!");
++
++  // Two-way Conditional branch.
++  if (FBB) {
++    MachineInstr &MI1 = *BuildCondBr(MBB, TBB, DL, Cond);
++    if (BytesAdded)
++      *BytesAdded += getInstSizeInBytes(MI1);
++    MachineInstr &MI2 = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
++    if (BytesAdded)
++      *BytesAdded += getInstSizeInBytes(MI2);
++    return 2;
++  }
++
++  // One way branch.
++  // Unconditional branch.
++  if (Cond.empty()) {
++    MachineInstr &MI = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
++    if (BytesAdded)
++      *BytesAdded += getInstSizeInBytes(MI);
++  }
++  else {// Conditional branch.
++    MachineInstr &MI = *BuildCondBr(MBB, TBB, DL, Cond);
++    if (BytesAdded)
++      *BytesAdded += getInstSizeInBytes(MI);
++  }
++  return 1;
++}
++
++void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
++                                              MachineBasicBlock &DestBB,
++                                              MachineBasicBlock &RestoreBB,
++                                              const DebugLoc &DL,
++                                              int64_t BrOffset,
++                                              RegScavenger *RS) const {
++  assert(RS && "RegScavenger required for long branching");
++  assert(MBB.empty() &&
++         "new block should be inserted for expanding unconditional branch");
++  assert(MBB.pred_size() == 1);
++
++  MachineFunction *MF = MBB.getParent();
++  MachineRegisterInfo &MRI = MF->getRegInfo();
++  const LoongArchSubtarget &Subtarget = MF->getSubtarget<LoongArchSubtarget>();
++  bool is64 = Subtarget.isABI_LP64();
++  const TargetRegisterClass *RC =
++    is64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++
++  if (!is64 && !isInt<32>(BrOffset))
++    report_fatal_error(
++        "Branch offsets outside of the signed 32-bit range not supported");
++
++  unsigned ScratchReg = MRI.createVirtualRegister(RC);
++  unsigned ZeroReg = is64 ? LoongArch::ZERO_64 : LoongArch::ZERO;
++  auto II = MBB.end();
++
++  MachineInstr &Pcaddu12iMI =
++      *BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_PCADDU12I), ScratchReg)
++          .addMBB(&DestBB, LoongArchII::MO_PCREL_HI);
++  BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_ADDID2Op), ScratchReg)
++      .addReg(ScratchReg)
++      .addMBB(&DestBB, LoongArchII::MO_PCREL_LO);
++  BuildMI(MBB, II, DL, get(LoongArch::JIRL))
++      .addReg(ZeroReg)
++      .addReg(ScratchReg, RegState::Kill)
++      .addImm(0);
++  RS->enterBasicBlockEnd(MBB);
++  unsigned Scav = RS->scavengeRegisterBackwards(
++      *RC, MachineBasicBlock::iterator(Pcaddu12iMI), false, 0);
++  MRI.replaceRegWith(ScratchReg, Scav);
++  MRI.clearVirtRegs();
++  RS->setRegUsed(Scav);
++}
++
++unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
++                                          int *BytesRemoved) const {
++  if (BytesRemoved)
++    *BytesRemoved = 0;
++
++  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
++  unsigned removed = 0;
++
++  // Up to 2 branches are removed.
++  // Note that indirect branches are not removed.
++  while (I != REnd && removed < 2) {
++    // Skip past debug instructions.
++    if (I->isDebugInstr()) {
++      ++I;
++      continue;
++    }
++    if (!getAnalyzableBrOpc(I->getOpcode()))
++      break;
++    // Remove the branch.
++    I->eraseFromParent();
++    if (BytesRemoved)
++      *BytesRemoved += getInstSizeInBytes(*I);
++    I = MBB.rbegin();
++    ++removed;
++  }
++
++  return removed;
++}
++
++/// reverseBranchCondition - Return the inverse opcode of the
++/// specified Branch instruction.
++bool LoongArchInstrInfo::reverseBranchCondition(
++    SmallVectorImpl<MachineOperand> &Cond) const {
++  assert( (Cond.size() && Cond.size() <= 3) &&
++          "Invalid LoongArch branch condition!");
++  Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm()));
++  return false;
++}
++
++LoongArchInstrInfo::BranchType LoongArchInstrInfo::analyzeBranch(
++    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
++    SmallVectorImpl<MachineOperand> &Cond, bool AllowModify,
++    SmallVectorImpl<MachineInstr *> &BranchInstrs) const {
++  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
++
++  // Skip all the debug instructions.
++  while (I != REnd && I->isDebugInstr())
++    ++I;
++
++  if (I == REnd || !isUnpredicatedTerminator(*I)) {
++    // This block ends with no branches (it just falls through to its succ).
++    // Leave TBB/FBB null.
++    TBB = FBB = nullptr;
++    return BT_NoBranch;
++  }
++
++  MachineInstr *LastInst = &*I;
++  unsigned LastOpc = LastInst->getOpcode();
++  BranchInstrs.push_back(LastInst);
++
++  // Not an analyzable branch (e.g., indirect jump).
++  if (!getAnalyzableBrOpc(LastOpc))
++    return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
++
++  // Get the second to last instruction in the block.
++  unsigned SecondLastOpc = 0;
++  MachineInstr *SecondLastInst = nullptr;
++
++  // Skip past any debug instruction to see if the second last actual
++  // is a branch.
++  ++I;
++  while (I != REnd && I->isDebugInstr())
++    ++I;
++
++  if (I != REnd) {
++    SecondLastInst = &*I;
++    SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode());
++
++    // Not an analyzable branch (must be an indirect jump).
++    if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc)
++      return BT_None;
++  }
++
++  // If there is only one terminator instruction, process it.
++  if (!SecondLastOpc) {
++    // Unconditional branch.
++    if (LastInst->isUnconditionalBranch()) {
++      TBB = LastInst->getOperand(0).getMBB();
++      return BT_Uncond;
++    }
++
++    // Conditional branch
++    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
++    return BT_Cond;
++  }
++
++  // If we reached here, there are two branches.
++  // If there are three terminators, we don't know what sort of block this is.
++  if (++I != REnd && isUnpredicatedTerminator(*I))
++    return BT_None;
++
++  BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);
++
++  // If second to last instruction is an unconditional branch,
++  // analyze it and remove the last instruction.
++  if (SecondLastInst->isUnconditionalBranch()) {
++    // Return if the last instruction cannot be removed.
++    if (!AllowModify)
++      return BT_None;
++
++    TBB = SecondLastInst->getOperand(0).getMBB();
++    LastInst->eraseFromParent();
++    BranchInstrs.pop_back();
++    return BT_Uncond;
++  }
++
++  // Conditional branch followed by an unconditional branch.
++  // The last one must be unconditional.
++  if (!LastInst->isUnconditionalBranch())
++    return BT_None;
++
++  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
++  FBB = LastInst->getOperand(0).getMBB();
++
++  return BT_CondUncond;
++}
++
++MachineBasicBlock *
++LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
++  assert(MI.getDesc().isBranch() && "Unexpected opcode!");
++  // The branch target is always the last operand.
++  int NumOp = MI.getNumExplicitOperands();
++  return MI.getOperand(NumOp - 1).getMBB();
++}
++
++bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const {
++/*
++      	switch (BranchOpc) {
++  case LoongArch::B:
++  case LoongArch::BAL:
++  case LoongArch::BAL_BR:
++  case LoongArch::BC1F:
++  case LoongArch::BC1FL:
++  case LoongArch::BC1T:
++  case LoongArch::BC1TL:
++  case LoongArch::BEQ:     case LoongArch::BEQ64:
++  case LoongArch::BEQL:
++  case LoongArch::BGEZ:    case LoongArch::BGEZ64:
++  case LoongArch::BGEZL:
++  case LoongArch::BGEZAL:
++  case LoongArch::BGEZALL:
++  case LoongArch::BGTZ:    case LoongArch::BGTZ64:
++  case LoongArch::BGTZL:
++  case LoongArch::BLEZ:    case LoongArch::BLEZ64:
++  case LoongArch::BLEZL:
++  case LoongArch::BLTZ:    case LoongArch::BLTZ64:
++  case LoongArch::BLTZL:
++  case LoongArch::BLTZAL:
++  case LoongArch::BLTZALL:
++  case LoongArch::BNE:     case LoongArch::BNE64:
++  case LoongArch::BNEL:
++    return isInt<18>(BrOffset);
++
++  case LoongArch::BC1EQZ:
++  case LoongArch::BC1NEZ:
++  case LoongArch::BC2EQZ:
++  case LoongArch::BC2NEZ:
++  case LoongArch::BEQC:   case LoongArch::BEQC64:
++  case LoongArch::BNEC:   case LoongArch::BNEC64:
++  case LoongArch::BGEC:   case LoongArch::BGEC64:
++  case LoongArch::BGEUC:  case LoongArch::BGEUC64:
++  case LoongArch::BGEZC:  case LoongArch::BGEZC64:
++  case LoongArch::BGTZC:  case LoongArch::BGTZC64:
++  case LoongArch::BLEZC:  case LoongArch::BLEZC64:
++  case LoongArch::BLTC:   case LoongArch::BLTC64:
++  case LoongArch::BLTUC:  case LoongArch::BLTUC64:
++  case LoongArch::BLTZC:  case LoongArch::BLTZC64:
++  case LoongArch::BNVC:
++  case LoongArch::BOVC:
++  case LoongArch::BGEZALC:
++  case LoongArch::BEQZALC:
++  case LoongArch::BGTZALC:
++  case LoongArch::BLEZALC:
++  case LoongArch::BLTZALC:
++  case LoongArch::BNEZALC:
++    return isInt<18>(BrOffset);
++
++  case LoongArch::BEQZC:  case LoongArch::BEQZC64:
++  case LoongArch::BNEZC:  case LoongArch::BNEZC64:
++    return isInt<23>(BrOffset);
++  }
++    */
++  switch (BranchOpc) {
++  case LoongArch::B: case LoongArch::B32:
++    return isInt<28>(BrOffset);
++
++  case LoongArch::BEQZ: case LoongArch::BEQZ32:
++  case LoongArch::BNEZ: case LoongArch::BNEZ32:
++  case LoongArch::BCEQZ:
++  case LoongArch::BCNEZ:
++    return isInt<23>(BrOffset);
++
++  case LoongArch::BEQ: case LoongArch::BEQ32:
++  case LoongArch::BNE: case LoongArch::BNE32:
++  case LoongArch::BLT: case LoongArch::BLT32:
++  case LoongArch::BGE: case LoongArch::BGE32:
++  case LoongArch::BLTU: case LoongArch::BLTU32:
++  case LoongArch::BGEU: case LoongArch::BGEU32:
++    return isInt<18>(BrOffset);
++  }
++
++  llvm_unreachable("Unknown branch instruction!");
++}
++
++
++/// Predicate for distingushing between control transfer instructions and all
++/// other instructions for handling forbidden slots. Consider inline assembly
++/// as unsafe as well.
++bool LoongArchInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const {
++  if (MI.isInlineAsm())
++    return false;
++
++  return (MI.getDesc().TSFlags & LoongArchII::IsCTI) == 0;
++}
++
++/// Predicate for distingushing instructions that have forbidden slots.
++bool LoongArchInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const {
++  return (MI.getDesc().TSFlags & LoongArchII::HasForbiddenSlot) != 0;
++}
++
++/// Return the number of bytes of code the specified instruction may be.
++unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
++  switch (MI.getOpcode()) {
++  default:
++    return MI.getDesc().getSize();
++  case  TargetOpcode::INLINEASM: {       // Inline Asm: Variable size.
++    const MachineFunction *MF = MI.getParent()->getParent();
++    const char *AsmStr = MI.getOperand(0).getSymbolName();
++    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
++  }
++  }
++}
++
++MachineInstrBuilder
++LoongArchInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
++                                  MachineBasicBlock::iterator I) const {
++  MachineInstrBuilder MIB;
++
++  int ZeroOperandPosition = -1;
++  bool BranchWithZeroOperand = false;
++  if (I->isBranch() && !I->isPseudo()) {
++    auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo();
++    ZeroOperandPosition = I->findRegisterUseOperandIdx(LoongArch::ZERO, false, TRI);
++    BranchWithZeroOperand = ZeroOperandPosition != -1;
++  }
++
++  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));
++
++  if (NewOpc == LoongArch::JIRL) {
++    MIB->RemoveOperand(0);
++    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
++      MIB.add(I->getOperand(J));
++    }
++    MIB.addImm(0);
++  } else {
++    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
++      if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
++        continue;
++
++      MIB.add(I->getOperand(J));
++    }
++  }
++
++  MIB.copyImplicitOps(*I);
++  MIB.cloneMemRefs(*I);
++  return MIB;
++}
++
++bool LoongArchInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
++                                               unsigned &SrcOpIdx1,
++                                               unsigned &SrcOpIdx2) const {
++  assert(!MI.isBundle() &&
++         "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
++
++  const MCInstrDesc &MCID = MI.getDesc();
++  if (!MCID.isCommutable())
++    return false;
++
++  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
++}
++
++// bstrins, bstrpick have the following constraints:
++// 0 <= lsb <= msb <= High
++static bool verifyBstrInstruction(const MachineInstr &MI, StringRef &ErrInfo,
++                                  const int64_t High) {
++  MachineOperand MOMsb = MI.getOperand(2);
++  if (!MOMsb.isImm()) {
++    ErrInfo = "Msb operand is not an immediate!";
++    return false;
++  }
++  MachineOperand MOLsb = MI.getOperand(3);
++  if (!MOLsb.isImm()) {
++    ErrInfo = "Lsb operand is not an immediate!";
++    return false;
++  }
++
++  int64_t Lsb = MOLsb.getImm();
++  if (!((0 <= Lsb) && (Lsb <= High))) {
++    ErrInfo = "Lsb operand is out of range!";
++    return false;
++  }
++
++  int64_t Msb = MOMsb.getImm();
++  if (!((0 <= Msb) && (Msb <= High))) {
++    ErrInfo = "Msb operand is out of range!";
++    return false;
++  }
++
++  if (!(Lsb <= Msb)) {
++    ErrInfo = "Lsb operand is not less than or equal to msb operand!";
++    return false;
++  }
++
++  return true;
++}
++
++//  Perform target specific instruction verification.
++bool LoongArchInstrInfo::verifyInstruction(const MachineInstr &MI,
++                                      StringRef &ErrInfo) const {
++  // Verify that bstrins and bstrpick instructions are well formed.
++   switch (MI.getOpcode()) {
++    case LoongArch::BSTRINS_W:
++    case LoongArch::BSTRPICK_W:
++      return verifyBstrInstruction(MI, ErrInfo, 31);
++    case LoongArch::BSTRINS_D:
++    case LoongArch::BSTRPICK_D:
++      return verifyBstrInstruction(MI, ErrInfo, 63);
++    default:
++      return true;
++  }
++
++  return true;
++}
++
++std::pair<unsigned, unsigned>
++LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
++  return std::make_pair(TF, 0u);
++}
++
++ArrayRef<std::pair<unsigned, const char*>>
++LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
++ using namespace LoongArchII;
++
++ static const std::pair<unsigned, const char*> Flags[] = {
++    {MO_PCREL_HI,        "larch-pcrel-hi"},
++    {MO_PCREL_LO,        "larch-pcrel-lo"},
++    {MO_TLSGD_HI,        "larch-tlsgd-hi"},
++    {MO_TLSGD_LO,        "larch-tlsgd-lo"},
++    {MO_TLSIE_HI,        "larch-tlsie-hi"},
++    {MO_TLSIE_LO,        "larch-tlsie-lo"},
++    {MO_TLSLE_HI,        "larch-tlsle-hi"},
++    {MO_TLSLE_LO,        "larch-tlsle-lo"},
++    {MO_ABS_HI,          "larch-abs-hi"},
++    {MO_ABS_LO,          "larch-abs-lo"},
++    {MO_ABS_HIGHER,      "larch-abs-higher"},
++    {MO_ABS_HIGHEST,     "larch-abs-highest"},
++    {MO_GOT_HI,          "larch-got-hi"},
++    {MO_GOT_LO,          "larch-got-lo"},
++    {MO_CALL_HI,         "larch-call-hi"},
++    {MO_CALL_LO,         "larch-call-lo"}
++  };
++  return makeArrayRef(Flags);
++}
+diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.h b/lib/Target/LoongArch/LoongArchInstrInfo.h
+new file mode 100644
+index 00000000..53191a94
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchInstrInfo.h
+@@ -0,0 +1,246 @@
++//===- LoongArchInstrInfo.h - LoongArch Instruction Information -----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetInstrInfo class.
++//
++// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in
++// order for LoongArchLongBranch pass to work correctly when the code has inline
++// assembly.  The returned value doesn't have to be the asm instruction's exact
++// size in bytes; LoongArchLongBranch only expects it to be the correct upper bound.
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
++
++#define DBAR_HINT 0x700
++
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "LoongArch.h"
++#include "LoongArchRegisterInfo.h"
++#include "llvm/ADT/ArrayRef.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineMemOperand.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include <cstdint>
++
++#define GET_INSTRINFO_HEADER
++#include "LoongArchGenInstrInfo.inc"
++
++namespace llvm {
++
++class MachineInstr;
++class MachineOperand;
++class LoongArchSubtarget;
++class TargetRegisterClass;
++class TargetRegisterInfo;
++
++class LoongArchInstrInfo : public LoongArchGenInstrInfo {
++  virtual void anchor();
++  const LoongArchRegisterInfo RI;
++  const LoongArchSubtarget &Subtarget;
++
++public:
++  enum BranchType {
++    BT_None,       // Couldn't analyze branch.
++    BT_NoBranch,   // No branches found.
++    BT_Uncond,     // One unconditional branch.
++    BT_Cond,       // One conditional branch.
++    BT_CondUncond, // A conditional branch followed by an unconditional branch.
++    BT_Indirect    // One indirct branch.
++  };
++
++  explicit LoongArchInstrInfo(const LoongArchSubtarget &STI);
++
++  /// isLoadFromStackSlot - If the specified machine instruction is a direct
++  /// load from a stack slot, return the virtual or physical register number of
++  /// the destination along with the FrameIndex of the loaded stack slot.  If
++  /// not, return 0.  This predicate must return 0 if the instruction has
++  /// any side effects other than loading from the stack slot.
++  unsigned isLoadFromStackSlot(const MachineInstr &MI,
++                               int &FrameIndex) const override;
++
++  /// isStoreToStackSlot - If the specified machine instruction is a direct
++  /// store to a stack slot, return the virtual or physical register number of
++  /// the source reg along with the FrameIndex of the loaded stack slot.  If
++  /// not, return 0.  This predicate must return 0 if the instruction has
++  /// any side effects other than storing to the stack slot.
++  unsigned isStoreToStackSlot(const MachineInstr &MI,
++                              int &FrameIndex) const override;
++
++  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
++                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
++                   bool KillSrc) const override;
++
++  /// Branch Analysis
++  bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
++                     MachineBasicBlock *&FBB,
++                     SmallVectorImpl<MachineOperand> &Cond,
++                     bool AllowModify) const override;
++
++  unsigned removeBranch(MachineBasicBlock &MBB,
++                        int *BytesRemoved = nullptr) const override;
++
++  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
++                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
++                        const DebugLoc &DL,
++                        int *BytesAdded = nullptr) const override;
++
++  void insertIndirectBranch(MachineBasicBlock &MBB,
++                            MachineBasicBlock &NewDestBB,
++                            MachineBasicBlock &RestoreBB, const DebugLoc &DL,
++                            int64_t BrOffset,
++                            RegScavenger *RS = nullptr) const override;
++  bool
++  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
++
++  BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
++                           MachineBasicBlock *&FBB,
++                           SmallVectorImpl<MachineOperand> &Cond,
++                           bool AllowModify,
++                           SmallVectorImpl<MachineInstr *> &BranchInstrs) const;
++
++  /// Get the block that branch instruction jumps to.
++  MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
++
++  /// Determine if the branch target is in range.
++  bool isBranchOffsetInRange(unsigned BranchOpc,
++                             int64_t BrOffset) const override;
++
++  /// Predicate to determine if an instruction can go in a forbidden slot.
++  bool SafeInForbiddenSlot(const MachineInstr &MI) const;
++
++  /// Predicate to determine if an instruction has a forbidden slot.
++  bool HasForbiddenSlot(const MachineInstr &MI) const;
++
++  /// Insert nop instruction when hazard condition is found
++  void insertNoop(MachineBasicBlock &MBB,
++                  MachineBasicBlock::iterator MI) const override;
++
++  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
++  /// such, whenever a client has an instance of instruction info, it should
++  /// always be able to get register info as well (through this method).
++  const LoongArchRegisterInfo &getRegisterInfo() const;
++
++  bool expandPostRAPseudo(MachineInstr &MI) const override;
++
++  unsigned getOppositeBranchOpc(unsigned Opc) const;
++
++  /// Emit a series of instructions to load an immediate.
++  unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
++                         MachineBasicBlock::iterator II,
++                         const DebugLoc &DL) const;
++
++  /// Return the number of bytes of code the specified instruction may be.
++  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
++
++  void storeRegToStackSlot(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MBBI,
++                           Register SrcReg, bool isKill, int FrameIndex,
++                           const TargetRegisterClass *RC,
++                           const TargetRegisterInfo *TRI) const override {
++    storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
++  }
++
++  void loadRegFromStackSlot(MachineBasicBlock &MBB,
++                            MachineBasicBlock::iterator MBBI,
++                            Register DestReg, int FrameIndex,
++                            const TargetRegisterClass *RC,
++                            const TargetRegisterInfo *TRI) const override {
++    loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
++  }
++
++  void storeRegToStack(MachineBasicBlock &MBB,
++                       MachineBasicBlock::iterator MI,
++                       Register SrcReg, bool isKill, int FrameIndex,
++                       const TargetRegisterClass *RC,
++                       const TargetRegisterInfo *TRI,
++                       int64_t Offset) const;
++
++  void loadRegFromStack(MachineBasicBlock &MBB,
++                        MachineBasicBlock::iterator MI,
++                        Register DestReg, int FrameIndex,
++                        const TargetRegisterClass *RC,
++                        const TargetRegisterInfo *TRI,
++                        int64_t Offset) const;
++
++  /// Adjust register value(DestReg = SrcReg + Amount).
++  void
++  adjustReg(unsigned DestReg, unsigned SrcReg, int64_t Amount,
++            MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
++            MachineInstr::MIFlag Flag = MachineInstr::MIFlag::NoFlags) const;
++
++  /// Create an instruction which has the same operands and memory operands
++  /// as MI but has a new opcode.
++  MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
++                                         MachineBasicBlock::iterator I) const;
++
++  bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
++                             unsigned &SrcOpIdx2) const override;
++
++  /// Perform target specific instruction verification.
++  bool verifyInstruction(const MachineInstr &MI,
++                         StringRef &ErrInfo) const override;
++
++  std::pair<unsigned, unsigned>
++  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
++
++  ArrayRef<std::pair<unsigned, const char *>>
++  getSerializableDirectMachineOperandTargetFlags() const override;
++
++protected:
++  /// If the specific machine instruction is a instruction that moves/copies
++  /// value from one register to another register return true along with
++  /// @Source machine operand and @Destination machine operand.
++  Optional<DestSourcePair>
++  isCopyInstrImpl(const MachineInstr &MI) const override;
++
++private:
++
++  bool isZeroImm(const MachineOperand &op) const;
++
++  MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
++                                   MachineMemOperand::Flags Flags) const;
++
++  unsigned getAnalyzableBrOpc(unsigned Opc) const;
++
++  void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
++                     MachineBasicBlock *&BB,
++                     SmallVectorImpl<MachineOperand> &Cond) const;
++
++  MachineInstr *
++  BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
++              const DebugLoc &DL, ArrayRef<MachineOperand> Cond) const;
++
++  void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
++
++  void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
++
++  std::pair<bool, bool> compareOpndSize(unsigned Opc,
++                                        const MachineFunction &MF) const;
++
++  /// Expand pseudo Int-to-FP conversion instructions.
++  ///
++  /// For example, the following pseudo instruction
++  ///  PseudoCVT_D32_W D2, A5
++  /// gets expanded into these two instructions:
++  ///  MTC1 F4, A5
++  ///  CVT_D32_W D2, F4
++  ///
++  /// We do this expansion post-RA to avoid inserting a floating point copy
++  /// instruction between MTC1 and CVT_D32_W.
++  void expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
++                      unsigned CvtOpc, unsigned MovOpc, bool IsI64) const;
++
++  void expandEhReturn(MachineBasicBlock &MBB,
++                      MachineBasicBlock::iterator I) const;
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
+diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.td b/lib/Target/LoongArch/LoongArchInstrInfo.td
+new file mode 100644
+index 00000000..5cfb5cd5
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchInstrInfo.td
+@@ -0,0 +1,1867 @@
++//===- LoongArchInstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetInstrInfo class.
++//
++//===----------------------------------------------------------------------===//
++include "LoongArchInstrFormats.td"
++
++def SDT_Bstrpick : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
++                                   SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>;
++def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
++                                   SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
++                                   SDTCisSameAs<0, 4>]>;
++
++def LoongArchBstrpick :  SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>;
++
++def LoongArchBstrins :  SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>;
++
++def SDT_DBAR             : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
++def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>;
++
++def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
++
++def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET,
++                      [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
++
++//===---------------------------------------------------------------------===/
++// Operand, Complex Patterns and Transformations Definitions.
++//===---------------------------------------------------------------------===/
++
++def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
++  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLT(MVT::i32);
++}]>;
++
++def immz : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>;
++def immZExt12 : PatLeaf<(imm), [{ return isUInt<12>(N->getZExtValue()); }]>;
++def immSExt12 : PatLeaf<(imm), [{ return isInt<12>(N->getSExtValue()); }]>;
++def immSExt13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
++
++def immZExt2Alsl : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
++//class ImmAsmOperand<int Low, int High> : AsmOperandClass {
++//  let RenderMethod = "addImmOperands";
++//  let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
++//  let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]";
++//}
++//
++//def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; }
++//def imm8 : Operand<i64>, ImmLeaf<i64, [{ return Imm == 8; }]> {
++//  let ParserMatchClass = Imm8AsmOperand;
++//}
++
++def HasLSX : Predicate<"Subtarget->hasLSX()">,
++             AssemblerPredicate<(all_of FeatureLSX)>;
++def HasLASX : Predicate<"Subtarget->hasLASX()">,
++             AssemblerPredicate<(all_of FeatureLASX)>;
++
++class EXT_LSX {
++  list<Predicate> ExtPredicate = [HasLSX];
++}
++
++class EXT_LASX {
++  list<Predicate> ExtPredicate = [HasLASX];
++}
++
++class SImmOperand<int width> : AsmOperandClass {
++  let Name = "SImm" # width;
++  let DiagnosticType = "InvalidSImm" # width;
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isSImm<" # width # ">";
++}
++
++def SImm2Operand : SImmOperand<2>;
++def simm2 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -2 && Imm < 2; }]> {
++  let ParserMatchClass = SImm2Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>";
++}
++def SImm3Operand : SImmOperand<3>;
++def simm3 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -4 && Imm < 4; }]> {
++  let ParserMatchClass = SImm3Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<3>";
++}
++
++def SImm5Operand : SImmOperand<5>;
++def simm5 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -16 && Imm < 16; }]> {
++  let ParserMatchClass = SImm5Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>";
++}
++
++def simm5_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -16 && Imm < 16; }]> {
++  let ParserMatchClass = SImm5Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>";
++}
++
++def SImm8Operand : SImmOperand<8>;
++def simm8 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -128 && Imm < 128; }]> {
++  let ParserMatchClass = SImm8Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>";
++}
++def simm8_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> {
++  let ParserMatchClass = SImm8Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>";
++}
++
++def SImm12Operand : SImmOperand<12>;
++def simm12 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -2048 && Imm < 2048; }]> {
++  let ParserMatchClass = SImm12Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>";
++}
++def simm12_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -2048 && Imm < 2048; }]> {
++  let ParserMatchClass = SImm12Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>";
++}
++
++def SImm14Operand : SImmOperand<14>;
++def simm14 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -8192 && Imm < 8192; }]> {
++  let ParserMatchClass = SImm14Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<14>";
++}
++
++def SImm15Operand : SImmOperand<15>;
++def simm15 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -16384 && Imm < 16384; }]> {
++  let ParserMatchClass = SImm15Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<15>";
++}
++
++def SImm16Operand : SImmOperand<16>;
++def simm16 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -32768 && Imm < 32768; }]> {
++  let ParserMatchClass = SImm16Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>";
++}
++
++def SImm20Operand : SImmOperand<20>;
++def simm20 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -524288 && Imm < 524288; }]> {
++  let ParserMatchClass = SImm20Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>";
++}
++def simm20_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -524288 && Imm < 524288; }]> {
++  let ParserMatchClass = SImm20Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>";
++}
++
++def SImm21Operand : SImmOperand<21>;
++def simm21 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -1048576 && Imm < 1048576; }]> {
++  let ParserMatchClass = SImm21Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<21>";
++}
++
++def SImm26Operand : SImmOperand<26>;
++def simm26 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -33554432 && Imm < 33554432; }]> {
++  let ParserMatchClass = SImm26Operand;
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<26>";
++}
++
++def UImm1Operand : AsmOperandClass {
++  let Name = "UImm1";
++  let RenderMethod = "addUImmOperands<1>";
++  let PredicateMethod = "isUImm<1>";
++  let DiagnosticType = "InvalidImm0_1";
++}
++
++def UImm2Operand : AsmOperandClass {
++  let Name = "UImm2";
++  let RenderMethod = "addUImmOperands<2>";
++  let PredicateMethod = "isUImm<2>";
++  let DiagnosticType = "InvalidImm0_3";
++}
++
++def UImm3Operand : AsmOperandClass {
++  let Name = "UImm3";
++  let RenderMethod = "addUImmOperands<3>";
++  let PredicateMethod = "isUImm<3>";
++  let DiagnosticType = "InvalidImm0_7";
++}
++
++def UImm4Operand : AsmOperandClass {
++  let Name = "UImm4";
++  let RenderMethod = "addUImmOperands<4>";
++  let PredicateMethod = "isUImm<4>";
++  let DiagnosticType = "InvalidImm0_15";
++}
++
++def UImm5Operand : AsmOperandClass {
++  let Name = "UImm5";
++  let RenderMethod = "addUImmOperands<5>";
++  let PredicateMethod = "isUImm<5>";
++  let DiagnosticType = "InvalidImm0_31";
++}
++
++def uimm1i : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 2; }]> {
++  let PrintMethod = "printUImm<1>";
++  let ParserMatchClass = UImm1Operand;
++}
++
++def uimm2 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4; }]> {
++  let PrintMethod = "printUImm<2>";
++  let ParserMatchClass = UImm2Operand;
++}
++
++def uimm3 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; }]> {
++  let PrintMethod = "printUImm<3>";
++  let ParserMatchClass = UImm3Operand;
++}
++
++def uimm4i : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; }]> {
++  let PrintMethod = "printUImm<4>";
++  let ParserMatchClass = UImm4Operand;
++}
++
++def uimm5 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> {
++  let PrintMethod = "printUImm<5>";
++  let ParserMatchClass = UImm5Operand;
++}
++
++def UImm6Operand : AsmOperandClass {
++  let Name = "UImm6";
++  let RenderMethod = "addUImmOperands<16>";
++  let PredicateMethod = "isUImm<6>";
++  let DiagnosticType = "InvalidImm0_63";
++}
++def uimm6 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 64; }]> {
++  let PrintMethod = "printUImm<6>";
++  let ParserMatchClass = UImm6Operand;
++}
++
++def UImm7Operand : AsmOperandClass {
++  let Name = "UImm7";
++  let RenderMethod = "addUImmOperands<16>";
++  let PredicateMethod = "isUImm<7>";
++  let DiagnosticType = "InvalidImm0_127";
++}
++
++def uimm7i : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 128; }]> {
++  let PrintMethod = "printUImm<7>";
++  let ParserMatchClass = UImm7Operand;
++}
++
++def UImm12Operand : AsmOperandClass {
++  let Name = "UImm12";
++  let RenderMethod = "addUImmOperands<12>";
++  let PredicateMethod = "isUImm<12>";
++  let DiagnosticType = "InvalidImm0_4095";
++}
++def uimm12 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 4096; }]> {
++  let PrintMethod = "printUImm<12>";
++  let ParserMatchClass = UImm12Operand;
++}
++def uimm12_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; }]> {
++  let PrintMethod = "printUImm<12>";
++  let ParserMatchClass = UImm12Operand;
++}
++
++def UImm15Operand : AsmOperandClass {
++  let Name = "UImm15";
++  let RenderMethod = "addUImmOperands<15>";
++  let PredicateMethod = "isUImm<15>";
++  let DiagnosticType = "InvalidImm0_32767";
++}
++def uimm15 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32768; }]> {
++  let PrintMethod = "printUImm<15>";
++  let ParserMatchClass = UImm15Operand;
++}
++
++def UImm14Operand : AsmOperandClass {
++  let Name = "UImm14";
++  let RenderMethod = "addUImmOperands<14>";
++  let PredicateMethod = "isUImm<14>";
++  let DiagnosticType = "InvalidImm0_16383";
++}
++def uimm14 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 16384; }]> {
++  let PrintMethod = "printUImm<14>";
++  let ParserMatchClass = UImm14Operand;
++}
++def uimm14_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16384; }]> {
++  let PrintMethod = "printUImm<14>";
++  let ParserMatchClass = UImm14Operand;
++}
++
++def UImm8Operand : AsmOperandClass {
++  let Name = "UImm8";
++  let RenderMethod = "addUImmOperands<8>";
++  let PredicateMethod = "isUImm<8>";
++  let DiagnosticType = "InvalidImm0_255";
++}
++def uimm8_64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 256; }]> {
++  let PrintMethod = "printUImm<8>";
++  let ParserMatchClass = UImm8Operand;
++}
++
++def uimm8_32 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
++  let PrintMethod = "printUImm<8>";
++  let ParserMatchClass = UImm8Operand;
++}
++
++def addr :
++ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
++
++def addrDefault :
++ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
++
++def addrRegImm :
++ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
++
++def addrimm14lsl2 : ComplexPattern<iPTR, 2, "selectIntAddrSImm14Lsl2",
++                                   [frameindex]>;
++
++class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
++                                  int Offset = 0> : AsmOperandClass {
++  let Name = "ConstantUImm" # Bits # "_" # Offset;
++  let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
++  let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImm" # Bits # "_" # Offset;
++}
++class SImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "SImm" # Bits;
++  let RenderMethod = "addSImmOperands<" # Bits # ">";
++  let PredicateMethod = "isSImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "SImm" # Bits;
++}
++class UImmAnyAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "ImmAny";
++  let RenderMethod = "addConstantUImmOperands<32>";
++  let PredicateMethod = "isSImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "ImmAny";
++}
++
++def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
++  let Name = "UImm32_Coerced";
++  let DiagnosticType = "UImm32_Coerced";
++}
++def SImm32RelaxedAsmOperandClass
++    : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
++  let Name = "SImm32_Relaxed";
++  let PredicateMethod = "isAnyImm<33>";
++  let DiagnosticType = "SImm32_Relaxed";
++}
++def SImm32AsmOperandClass
++    : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>;
++def ConstantUImm26AsmOperandClass
++    : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>;
++
++def ConstantUImm20AsmOperandClass
++    : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>;
++
++def ConstantUImm2Plus1AsmOperandClass
++    : ConstantUImmAsmOperandClass<2, [ConstantUImm20AsmOperandClass], 1>;
++
++class UImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "UImm" # Bits;
++  let RenderMethod = "addUImmOperands<" # Bits # ">";
++  let PredicateMethod = "isUImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImm" # Bits;
++}
++
++def UImm16RelaxedAsmOperandClass
++    : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> {
++  let Name = "UImm16_Relaxed";
++  let PredicateMethod = "isAnyImm<16>";
++  let DiagnosticType = "UImm16_Relaxed";
++}
++
++def ConstantSImm14Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm14Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<14, 2>";
++  let SuperClasses = [UImm16RelaxedAsmOperandClass];
++  let DiagnosticType = "SImm14_Lsl2";
++}
++
++foreach I = {2} in
++  def simm14_lsl # I : Operand<i64> {
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<14, " # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantSImm14Lsl" # I # "AsmOperandClass");
++  }
++
++def uimm16_64_relaxed : Operand<i64> {
++  let PrintMethod = "printUImm<16>";
++  let ParserMatchClass =
++      !cast<AsmOperandClass>("UImm16RelaxedAsmOperandClass");
++}
++
++def uimm2_plus1 : Operand<i32> {
++  let PrintMethod = "printUImm<2, 1>";
++  let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>";
++  let DecoderMethod = "DecodeUImmWithOffset<2, 1>";
++  let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass;
++}
++
++// like simm32 but coerces simm32 to uimm32.
++def uimm32_coerced : Operand<i32> {
++  let ParserMatchClass = !cast<AsmOperandClass>("UImm32CoercedAsmOperandClass");
++}
++
++def imm64: Operand<i64>;
++
++def LoongArchMemAsmOperand : AsmOperandClass {
++  let Name = "Mem";
++  let ParserMethod = "parseMemOperand";
++}
++
++def LoongArchMemSimm14AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm14";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<14>";
++  let DiagnosticType = "MemSImm14";
++}
++
++foreach I = {2} in
++  def LoongArchMemSimm14Lsl # I # AsmOperand : AsmOperandClass {
++    let Name = "MemOffsetSimm14_" # I;
++    let SuperClasses = [LoongArchMemAsmOperand];
++    let RenderMethod = "addMemOperands";
++    let ParserMethod = "parseMemOperand";
++    let PredicateMethod = "isMemWithSimmOffset<14, " # I # ">";
++    let DiagnosticType = "MemSImm14Lsl" # I;
++  }
++
++def LoongArchMemSimmPtrAsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimmPtr";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithPtrSizeOffset";
++  let DiagnosticType = "MemSImmPtr";
++}
++
++class mem_generic : Operand<iPTR> {
++  let PrintMethod = "printMemOperand";
++  let MIOperandInfo = (ops ptr_rc, simm12);
++  let EncoderMethod = "getMemEncoding";
++  let ParserMatchClass = LoongArchMemAsmOperand;
++  let OperandType = "OPERAND_MEMORY";
++}
++
++// Address operand
++def mem : mem_generic;
++def mem_simmptr : mem_generic {
++    let ParserMatchClass = LoongArchMemSimmPtrAsmOperand;
++}
++
++foreach I = {2} in
++  def mem_simm14_lsl # I : mem_generic {
++    let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm14_lsl" # I));
++    let EncoderMethod = "getSimm14MemEncoding<" # I  # ">";
++    let ParserMatchClass =
++            !cast<AsmOperandClass>("LoongArchMemSimm14Lsl" # I # "AsmOperand");
++  }
++
++def mem_ea : Operand<iPTR> {
++  let PrintMethod = "printMemOperandEA";
++  let MIOperandInfo = (ops ptr_rc, simm12);
++  let EncoderMethod = "getMemEncoding";
++  let OperandType = "OPERAND_MEMORY";
++}
++
++def LoongArchJumpTargetAsmOperand : AsmOperandClass {
++  let Name = "JumpTarget";
++  let ParserMethod = "parseJumpTarget";
++  let PredicateMethod = "isImm";
++  let RenderMethod = "addImmOperands";
++}
++
++def jmptarget   : Operand<OtherVT> {
++  let EncoderMethod = "getJumpTargetOpValue";
++  let ParserMatchClass = LoongArchJumpTargetAsmOperand;
++}
++
++def brtarget    : Operand<OtherVT> {
++  let EncoderMethod = "getBranchTargetOpValue";
++  let OperandType = "OPERAND_PCREL";
++  let DecoderMethod = "DecodeBranchTarget";
++  let ParserMatchClass = LoongArchJumpTargetAsmOperand;
++}
++
++def calltarget  : Operand<iPTR> {
++  let EncoderMethod = "getJumpTargetOpValue";
++  let ParserMatchClass = LoongArchJumpTargetAsmOperand;
++}
++
++//
++//SDNode
++//
++def IsGP64bit    :    Predicate<"Subtarget->is64Bit()">,
++    AssemblerPredicate<(all_of Feature64Bit)>;
++def IsGP32bit    :    Predicate<"!Subtarget->is64Bit()">,
++    AssemblerPredicate<(all_of (not Feature64Bit))>;
++def SDT_LoongArchCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
++def SDT_LoongArchCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
++
++def LoongArchRet : SDNode<"LoongArchISD::Ret", SDTNone,
++                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
++def LoongArchERet : SDNode<"LoongArchISD::ERet", SDTNone,
++                      [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>;
++
++def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_LoongArchCallSeqStart,
++                           [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
++def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_LoongArchCallSeqEnd,
++                           [SDNPHasChain, SDNPSideEffect,
++                            SDNPOptInGlue, SDNPOutGlue]>;
++def LoongArchAddress : SDNode<"LoongArchISD::GlobalAddress", SDTIntUnaryOp>;
++
++// Return RA.
++let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in {
++  def RetRA : LoongArchPseudo<(outs), (ins), [(LoongArchRet)]>;
++
++  let hasSideEffects=1 in
++  def ERet : LoongArchPseudo<(outs), (ins), [(LoongArchERet)]>;
++}
++
++let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
++def ADJCALLSTACKDOWN : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
++                                  [(callseq_start timm:$amt1, timm:$amt2)]>;
++def ADJCALLSTACKUP   : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
++                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
++}
++
++class LoongArchPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl;
++
++def SDT_LoongArchJmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
++
++def LoongArchJmpLink : SDNode<"LoongArchISD::JmpLink",SDT_LoongArchJmpLink,
++                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
++                          SDNPVariadic]>;
++
++def LoongArchTailCall : SDNode<"LoongArchISD::TailCall", SDT_LoongArchJmpLink,
++                          [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
++
++class GPR_32 { list<Predicate> GPRPredicates = [IsGP32bit]; }
++class GPR_64 { list<Predicate> GPRPredicates = [IsGP64bit]; }
++
++//===---------------------------------------------------------------------===/
++// Instruction Class Templates
++//===---------------------------------------------------------------------===/
++///R2
++class Int_Reg2<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj),
++          !strconcat(opstr, "\t$rd, $rj"),
++          [(set RO:$rd, (OpNode RO:$rj))],
++          FrmR, opstr>;
++
++class Int_Reg2_Iocsrrd<string opstr, RegisterOperand RD, RegisterOperand RS,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RD:$rd), (ins RS:$rj),
++          !strconcat(opstr, "\t$rd, $rj"),
++          [(set RD:$rd, (OpNode RS:$rj))],
++          FrmR, opstr>;
++
++class Int_Reg2_Rdtime<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd, RO:$rj), (ins),
++          !strconcat(opstr, "\t$rd, $rj"),
++          [(set (OpNode RO:$rd, RO:$rj))],
++          FrmR, opstr>;
++
++class Int_Reg2_Iocsrwr<string opstr, RegisterOperand RD, RegisterOperand RS,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RD:$rd, RS:$rj),
++          !strconcat(opstr, "\t$rd, $rj"),
++          [(set (OpNode RD:$rd, RS:$rj))],
++          FrmR, opstr>;
++
++class Float_Reg2<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj),
++          !strconcat(opstr, "\t$fd, $fj"),
++          [(set RO:$fd, (OpNode RO:$fj))],
++          FrmFR, opstr>;
++
++class Count1<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj),
++          !strconcat(opstr, "\t$rd, $rj"),
++          [(set RO:$rd, (OpNode (not RO:$rj)))],
++          FrmR, opstr>;
++
++class SignExtInReg<string opstr, RegisterOperand RO,
++                   ValueType vt>
++    : InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj"),
++         [(set RO:$rd, (sext_inreg RO:$rj, vt))], FrmR, opstr>;
++
++///R3
++class Int_Reg3<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk),
++               !strconcat(opstr, "\t$rd, $rj, $rk"),
++               [(set RO:$rd, (OpNode RO:$rj, RO:$rk))],
++               FrmR, opstr>;
++
++class Int_Reg3_Crc<string opstr, RegisterOperand RD, RegisterOperand RS,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RS:$rd), (ins RD:$rj, RS:$rk),
++               !strconcat(opstr, "\t$rd, $rj, $rk"),
++               [(set RS:$rd, (OpNode RD:$rj, RS:$rk))],
++               FrmR, opstr>;
++
++class SetCC_R<string opstr, RegisterOperand RO,
++              SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, RO:$rk),
++               !strconcat(opstr, "\t$rd, $rj, $rk"),
++               [(set GPR32Opnd:$rd, (OpNode RO:$rj, RO:$rk))],
++               FrmR, opstr>;
++
++class SetCC_I<string opstr, RegisterOperand RO, Operand ImmOpnd,
++              SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, ImmOpnd:$imm12),
++               !strconcat(opstr, "\t$rd, $rj, $imm12"),
++               [(set GPR32Opnd:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))],
++               FrmR, opstr>;
++
++class ATOMIC<string opstr, RegisterOperand RD, DAGOperand MO,
++             SDPatternOperator OpNode= null_frag,
++             ComplexPattern Addr = addr>
++    : InstForm<(outs RD:$rd), (ins RD:$rk, MO:$addr),
++               !strconcat(opstr, "\t$rd, $rk, $addr"),
++               [(set RD:$rd, (OpNode RD:$rk, Addr:$addr))],
++               FrmR, opstr> {
++    let DecoderMethod = "DecodeMem";
++    let canFoldAsLoad = 1;
++    string BaseOpcode = opstr;
++    let mayLoad = 1;
++    let mayStore = 1;
++    let Constraints = "@earlyclobber $rd";
++}
++
++class Nor<string opstr, RegisterOperand RO>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk),
++          !strconcat(opstr, "\t$rd, $rj, $rk"),
++          [(set RO:$rd, (not (or RO:$rj, RO:$rk)))],
++          FrmR, opstr>;
++
++class Shift_Var<string opstr, RegisterOperand RO,
++                SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, GPR32Opnd:$rk),
++          !strconcat(opstr, "\t$rd, $rj, $rk"),
++          [(set RO:$rd, (OpNode RO:$rj, GPR32Opnd:$rk))],
++          FrmR, opstr>;
++
++class Float_Reg3<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk),
++          !strconcat(opstr, "\t$fd, $fj, $fk"),
++          [(set RO:$fd, (OpNode RO:$fj, RO:$fk))],
++          FrmR, opstr>;
++
++class Float_Reg3_MA<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk),
++          !strconcat(opstr, "\t$fd, $fj, $fk"),
++          [(set RO:$fd, (OpNode (fabs RO:$fj), (fabs RO:$fk)))],
++          FrmR, opstr>;
++
++class Float_Int_Reg3<string opstr, RegisterOperand RD, RegisterOperand RS,
++                     SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RD:$fd), (ins RS:$rj, RS:$rk),
++          !strconcat(opstr, "\t$fd, $rj, $rk"),
++          [(set RS:$fd, (OpNode RS:$rj, RS:$rk))],
++          FrmR, opstr>;
++
++///R4
++class Mul_Reg4<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa),
++          !strconcat(opstr, "\t$fd, $fj, $fk, $fa"),
++          [],
++          FrmFR, opstr>;
++
++class NMul_Reg4<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa),
++          !strconcat(opstr, "\t$fd, $fj, $fk, $fa"),
++          [],
++          FrmFR, opstr>;
++
++///R2_IMM5
++class Shift_Imm32<string opstr, RegisterOperand RO,
++             SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, uimm5:$imm5),
++          !strconcat(opstr, "\t$rd, $rj, $imm5"),
++          [(set RO:$rd, (OpNode RO:$rj, uimm5:$imm5))],
++          FrmR, opstr>;
++
++///R2_IMM6
++class Shift_Imm64<string opstr, RegisterOperand RO,
++             SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, uimm6:$imm6),
++          !strconcat(opstr, "\t$rd, $rj, $imm6"),
++          [(set RO:$rd, (OpNode RO:$rj, uimm6:$imm6))],
++          FrmR, opstr>;
++
++///LOAD_STORE
++class FLd<string opstr, RegisterOperand RD,DAGOperand MO,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RD:$rd), (ins MO:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [(set RD:$rd, (OpNode addrDefault:$addr))],
++               FrmR, opstr> {
++    let DecoderMethod = "DecodeFMem";
++    let mayLoad = 1;
++}
++
++class Ld<string opstr, RegisterOperand RD, DAGOperand MO,
++         SDPatternOperator OpNode= null_frag,
++         ComplexPattern Addr = addr>
++    : InstForm<(outs RD:$rd), (ins MO:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [(set RD:$rd, (OpNode Addr:$addr))],
++               FrmR, opstr> {
++    let DecoderMethod = "DecodeMem";
++    let canFoldAsLoad = 1;
++    string BaseOpcode = opstr;
++    let mayLoad = 1;
++}
++
++class FSt<string opstr, RegisterOperand RD, DAGOperand MO,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RD:$rd, MO:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [(OpNode RD:$rd, addrDefault:$addr)],
++               FrmR, opstr> {
++    let DecoderMethod = "DecodeFMem";
++    let mayStore = 1;
++}
++
++class St<string opstr, RegisterOperand RS, DAGOperand MO,
++         SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RS:$rd, MO:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [(OpNode RS:$rd, addr:$addr)],
++               FrmR, opstr> {
++    let DecoderMethod = "DecodeMem";
++    string BaseOpcode = opstr;
++    let mayStore = 1;
++}
++
++/// R2_IMM12
++class Int_Reg2_Imm12<string opstr, RegisterOperand RO, Operand ImmOpnd,
++                     SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12),
++               !strconcat(opstr, "\t$rd, $rj, $imm12"),
++               [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))],
++               FrmR, opstr>;
++class RELOC_rrii<string opstr, RegisterOperand RO, Operand ImmOpnd,
++                     SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12),
++               !strconcat(opstr, "\t$rd, $rj, $imm12"),
++               [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12))],
++               FrmR, opstr>;
++
++///R2_IMM14
++class LdPtr<string opstr, RegisterOperand RO>
++    : InstForm<(outs RO:$rd), (ins mem_simm14_lsl2:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [], FrmI, opstr>{
++    let DecoderMethod = "DecodeMemSimm14";
++    let canFoldAsLoad = 1;
++    string BaseOpcode = opstr;
++    let mayLoad = 1;
++}
++
++class StPtr<string opstr, RegisterOperand RO>
++    : InstForm<(outs), (ins RO:$rd, mem_simm14_lsl2:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [], FrmI, opstr> {
++    let DecoderMethod = "DecodeMemSimm14";
++    string BaseOpcode = opstr;
++    let mayStore = 1;
++}
++
++///R2_IMM16
++class FJirl<string opstr, DAGOperand opnd,
++           RegisterOperand RO>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, opnd:$offs16),
++          !strconcat(opstr, "\t$rd, $rj, $offs16"),
++          [], FrmJ, opstr>;
++
++class Beq<string opstr, DAGOperand opnd, PatFrag cond_op,
++          RegisterOperand RO>
++    : InstForm<(outs), (ins RO:$rj, RO:$rd, opnd:$offs16),
++               !strconcat(opstr, "\t$rj, $rd, $offs16"),
++               [(brcond (i32 (cond_op RO:$rj, RO:$rd)), bb:$offs16)],
++               FrmI, opstr> {
++    let isBranch = 1;
++    let isTerminator = 1;
++    bit isCTI = 1;
++}
++
++///R1_IMM21
++class Beqz<string opstr, DAGOperand opnd, PatFrag cond_op,
++           RegisterOperand RO>
++    : InstForm<(outs), (ins RO:$rj, opnd:$offs21),
++          !strconcat(opstr, "\t$rj, $offs21"),
++          [(brcond (i32 (cond_op RO:$rj, 0)), bb:$offs21)],
++          FrmI, opstr> {
++    let isBranch = 1;
++    let isTerminator = 1;
++    bit isCTI = 1;
++}
++
++///IMM26
++class JumpFB<DAGOperand opnd, string opstr, SDPatternOperator operator,
++             SDPatternOperator targetoperator> :
++    InstForm<(outs), (ins opnd:$offset26), !strconcat(opstr, "\t$offset26"),
++           [(operator targetoperator:$offset26)], FrmJ, opstr> {
++    let isBranch = 1;
++    let isTerminator=1;
++    let isBarrier=1;
++    let DecoderMethod = "DecodeJumpTarget";
++    bit isCTI = 1;
++}
++
++/// R3_SA
++class Reg3_Sa<string opstr, RegisterOperand RO, Operand ImmOpnd,
++              SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk, ImmOpnd:$sa),
++          !strconcat(opstr, "\t$rd, $rj, $rk, $sa"),
++          [(set RO:$rd, (OpNode RO:$rj, RO:$rk, ImmOpnd:$sa))],
++          FrmR, opstr>;
++
++class Reg3_SaU<string opstr, RegisterOperand RD, RegisterOperand RS, Operand ImmOpnd,
++              SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RD:$rd), (ins RS:$rj, RS:$rk, ImmOpnd:$sa),
++          !strconcat(opstr, "\t$rd, $rj, $rk, $sa"),
++          [(set RD:$rd, (OpNode RS:$rj, RS:$rk, ImmOpnd:$sa))],
++          FrmR, opstr>;
++
++/// Assert
++class Assert<string opstr, RegisterOperand RO,
++             SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RO:$rj, RO:$rk),
++          !strconcat(opstr, "\t$rj, $rk"),
++          [(set (OpNode RO:$rj, RO:$rk))],
++          FrmR, opstr>;
++
++class Code15<string opstr,
++             SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins uimm15:$Code),
++          !strconcat(opstr, "\t$Code"),
++          [(set (OpNode uimm15:$Code))],
++          FrmOther, opstr>;
++
++class TrapBase<Instruction RealInst>
++    : LoongArchPseudo<(outs), (ins), [(trap)]>,
++      PseudoInstExpansion<(RealInst 0)> {
++    let isBarrier = 1;
++    let isTerminator = 1;
++    let isCodeGenOnly = 1;
++    let isCTI = 1;
++}
++
++class CSR<string opstr, RegisterOperand RO, Operand ImmOpnd,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins ImmOpnd:$csr),
++          !strconcat(opstr, "\t$rd, $csr"),
++          [(set RO:$rd, (OpNode ImmOpnd:$csr))],
++          FrmOther, opstr>;
++
++class CSRW<string opstr, RegisterOperand RO, Operand ImmOpnd,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$csr),
++          !strconcat(opstr, "\t$rd, $csr"),
++          [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$csr))],
++          FrmOther, opstr>{
++    let Constraints = "$rd = $dst";
++}
++
++class CSRX<string opstr, RegisterOperand RO, Operand ImmOpnd,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$dst), (ins RO:$rd, RO:$rj, ImmOpnd:$csr),
++          !strconcat(opstr, "\t$rd, $rj, $csr"),
++          [(set RO:$dst, (OpNode RO:$rd, RO:$rj, ImmOpnd:$csr))],
++          FrmOther, opstr>{
++    let Constraints = "$rd = $dst";
++}
++
++class CAC<string opstr, RegisterOperand RO, Operand ImmOpnd,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins uimm5:$op, RO:$rj, ImmOpnd:$si12),
++          !strconcat(opstr, "\t$op, $rj, $si12"),
++          [(set (OpNode uimm5:$op, RO:$rj, ImmOpnd:$si12))],
++          FrmOther, opstr>;
++
++class LEVEL<string opstr, RegisterOperand RO,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, uimm8_64:$level),
++          !strconcat(opstr, "\t$rd, $rj, $level"),
++          [(set RO:$rd, (OpNode RO:$rj, uimm8_64:$level))],
++          FrmOther, opstr>;
++
++class SEQ<string opstr, RegisterOperand RO,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RO:$rj, uimm8_64:$seq),
++          !strconcat(opstr, "\t$rj, $seq"),
++          [(set (OpNode RO:$rj, uimm8_64:$seq))],
++          FrmOther, opstr>;
++
++class Wait<string opstr,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins uimm15:$hint),
++          !strconcat(opstr, "\t$hint"),
++          [(set (OpNode uimm15:$hint))],
++          FrmOther, opstr>;
++
++class Invtlb<string opstr, RegisterOperand RO,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins uimm5:$op, RO:$rj, RO:$rk),
++          !strconcat(opstr, "\t$op, $rj, $rk"),
++          [(set (OpNode uimm5:$op, RO:$rj, RO:$rk))],
++          FrmOther, opstr>;
++
++class OP32<string opstr,
++           SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins),
++          !strconcat(opstr, ""),
++          [(set (OpNode))],
++          FrmOther, opstr>;
++
++class Bar<string opstr,
++          SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins uimm15:$hint),
++          !strconcat(opstr, "\t$hint"),
++          [(set (OpNode uimm15:$hint))],
++          FrmOther, opstr>;
++
++//class CA<bits<2> op, string opstr>
++//    : R3_CA<op, (outs FGR64:$fd), (ins FGR64:$fj,FGR64:$fk,simm3:$ca),
++//          !strconcat(opstr, "\t$fd, $fj, $fk, $ca"), NoItinerary>;
++
++class SI16_R2<string opstr, RegisterOperand RO,
++              SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, simm16:$si16),
++          !strconcat(opstr, "\t$rd, $rj, $si16"),
++          [(set RO:$rd, (OpNode RO:$rj, simm16:$si16))],
++          FrmR, opstr>;
++
++class SI20<string opstr, RegisterOperand RO, Operand ImmOpnd,
++           SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20),
++          !strconcat(opstr, "\t$rd, $si20"),
++          [(set RO:$rd, (OpNode ImmOpnd:$si20))],
++          FrmR, opstr>;
++let isCodeGenOnly = 1, Constraints = "$dst = $rd" in
++class SI20_R2<string opstr, RegisterOperand RO, Operand ImmOpnd,
++            SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$si20),
++          !strconcat(opstr, "\t$rd, $si20"),
++          [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$si20))],
++          FrmR, opstr>;
++class RELOC_rii<string opstr, RegisterOperand RO, Operand ImmOpnd,
++           SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20, ImmOpnd:$i20),
++          !strconcat(opstr, "\t$rd, $si20"),
++          [(set RO:$rd, (OpNode ImmOpnd:$si20, ImmOpnd:$i20))],
++          FrmR, opstr>;
++
++// preld
++class Preld<string opstr,Operand MemOpnd ,RegisterOperand RO,
++            SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RO:$rj, MemOpnd:$addr, uimm5:$hint),
++          !strconcat(opstr, "\t$hint, $rj, $addr"),
++          [(set (OpNode RO:$rj, MemOpnd:$addr, uimm5:$hint))],
++          FrmR, opstr>;
++class Preld_Raw<string opstr, RegisterOperand RO>
++    : InstForm<(outs), (ins RO:$rj, simm12:$imm12, uimm5:$hint),
++               !strconcat(opstr, "\t$hint, $rj, $imm12"),
++               [],
++               FrmR, opstr>;
++class IsCall {
++  bit isCall = 1;
++  bit isCTI = 1;
++}
++
++class EffectiveAddress<string opstr, RegisterOperand RO>
++    : InstForm<(outs RO:$rd), (ins mem_ea:$addr),
++               !strconcat(opstr, "\t$rd, $addr"),
++               [(set RO:$rd, addr:$addr)], FrmI,
++               !strconcat(opstr, "_lea")> {
++  let isCodeGenOnly = 1;
++  let hasNoSchedulingInfo = 1;
++  let DecoderMethod = "DecodeMem";
++}
++
++def PtrRC : Operand<iPTR> {
++  let MIOperandInfo = (ops ptr_rc);
++  let DecoderMethod = "DecodePtrRegisterClass";
++  let ParserMatchClass = GPR32AsmOperand;
++}
++
++class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
++  LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
++           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
++
++class Atomic2OpsPostRA<RegisterClass RC> :
++  LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> {
++  let mayLoad = 1;
++  let mayStore = 1;
++}
++
++class Atomic2OpsSubwordPostRA<RegisterClass RC> :
++  LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2,
++                                RC:$shiftamnt), []>;
++class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
++  LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
++           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
++
++class AtomicCmpSwapPostRA<RegisterClass RC> :
++  LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> {
++  let mayLoad = 1;
++  let mayStore = 1;
++}
++
++class AtomicCmpSwapSubwordPostRA<RegisterClass RC> :
++  LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
++                                RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> {
++  let mayLoad = 1;
++  let mayStore = 1;
++}
++
++class LoongArchInstAlias<string Asm, dag Result, bit Emit = 0b1> :
++  InstAlias<Asm, Result, Emit>, PredicateControl;
++
++//===---------------------------------------------------------------------===/
++// Instruction Definitions.
++//===---------------------------------------------------------------------===/
++///
++/// R2
++///
++
++def CLO_D : Count1<"clo.d", GPR64Opnd, ctlz>, R2I<0b01000>;
++def CLZ_D : Int_Reg2<"clz.d", GPR64Opnd, ctlz>, R2I<0b01001>;
++def CTO_D : Count1<"cto.d", GPR64Opnd, cttz>, R2I<0b01010>;
++def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>;
++
++def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[]
++def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>;
++def REVB_D  : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>;
++def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>;
++def REVH_D  : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[]
++
++def BITREV_8B : Int_Reg2<"bitrev.8b", GPR64Opnd>, R2I<0b10011>; //[]
++def BITREV_D  : Int_Reg2<"bitrev.d", GPR64Opnd, bitreverse>, R2I<0b10101>;
++
++def EXT_W_H : SignExtInReg<"ext.w.h", GPR64Opnd, i16>, R2I<0b10110>;
++def EXT_W_B : SignExtInReg<"ext.w.b", GPR64Opnd, i8>, R2I<0b10111>;
++
++def RDTIME_D  : Int_Reg2_Rdtime<"rdtime.d", GPR64Opnd>, R2I<0b11010>;
++def RDTIMEL_W : Int_Reg2_Rdtime<"rdtimel.w", GPR64Opnd>, R2I<0b11000>;
++def RDTIMEH_W : Int_Reg2_Rdtime<"rdtimeh.w", GPR64Opnd>, R2I<0b11001>;
++///
++/// R3
++///
++def ADD_D : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>;
++def SUB_D : Int_Reg3<"sub.d", GPR64Opnd, sub>, R3I<0b0100011>;
++
++def SLT     : SetCC_R<"slt", GPR64Opnd, setlt>, R3I<0b0100100>;
++def SLTU    : SetCC_R<"sltu", GPR64Opnd, setult>, R3I<0b0100101>;
++def MASKEQZ : Int_Reg3<"maskeqz", GPR64Opnd>, R3I<0b0100110>; //[]
++def MASKNEZ : Int_Reg3<"masknez", GPR64Opnd>, R3I<0b0100111>; //[]
++
++def NOR   : Nor<"nor", GPR64Opnd>, R3I<0b0101000>;
++def AND   : Int_Reg3<"and", GPR64Opnd, and>, R3I<0b0101001>;
++def OR    : Int_Reg3<"or", GPR64Opnd, or>, R3I<0b0101010>;
++def XOR   : Int_Reg3<"xor", GPR64Opnd, xor>, R3I<0b0101011>;
++def ORN   : Int_Reg3<"orn", GPR64Opnd>, R3I<0b0101100>;
++def ANDN  : Int_Reg3<"andn", GPR64Opnd>, R3I<0b0101101>;
++
++def SLL_D : Shift_Var<"sll.d", GPR64Opnd, shl>, R3I<0b0110001>;
++def SRL_D : Shift_Var<"srl.d", GPR64Opnd, srl>, R3I<0b0110010>;
++def SRA_D : Shift_Var<"sra.d", GPR64Opnd, sra>, R3I<0b0110011>;
++def ROTR_D: Shift_Var<"rotr.d", GPR64Opnd, rotr>, R3I<0b0110111>;
++
++def MUL_D     : Int_Reg3<"mul.d", GPR64Opnd, mul>, R3I<0b0111011>;
++def MULH_D    : Int_Reg3<"mulh.d", GPR64Opnd, mulhs>, R3I<0b0111100>;
++def MULH_DU   : Int_Reg3<"mulh.du", GPR64Opnd, mulhu>, R3I<0b0111101>;
++def MULW_D_W  : Int_Reg3<"mulw.d.w", GPR64Opnd>, R3I<0b0111110>;
++def MULW_D_WU : Int_Reg3<"mulw.d.wu", GPR64Opnd>, R3I<0b0111111>;
++
++let usesCustomInserter = 1 in {
++def DIV_D  : Int_Reg3<"div.d", GPR64Opnd, sdiv>, R3I<0b1000100>;
++def MOD_D  : Int_Reg3<"mod.d", GPR64Opnd, srem>, R3I<0b1000101>;
++def DIV_DU : Int_Reg3<"div.du", GPR64Opnd, udiv>, R3I<0b1000110>;
++def MOD_DU : Int_Reg3<"mod.du", GPR64Opnd, urem>, R3I<0b1000111>;
++}
++
++def CRC_W_D_W  : Int_Reg3_Crc<"crc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crc_w_d_w>, R3I<0b1001011>;
++def CRCC_W_D_W : Int_Reg3_Crc<"crcc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crcc_w_d_w>, R3I<0b1001111>;
++///
++/// SLLI
++///
++def SLLI_D  : Shift_Imm64<"slli.d", GPR64Opnd, shl>, R2_IMM6<0b00>;
++def SRLI_D  : Shift_Imm64<"srli.d", GPR64Opnd, srl>, R2_IMM6<0b01>;
++def SRAI_D  : Shift_Imm64<"srai.d", GPR64Opnd, sra>, R2_IMM6<0b10>;
++def ROTRI_D : Shift_Imm64<"rotri.d", GPR64Opnd, rotr>, R2_IMM6<0b11>;
++///
++/// Misc
++///
++def ALSL_WU    : Reg3_SaU<"alsl.wu", GPR64Opnd, GPR32Opnd, uimm2_plus1>, R3_SA2<0b00011> {
++  let Pattern = [(set GPR64Opnd:$rd,
++               (i64 (zext (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))))];
++}
++
++def ALSL_D     : Reg3_Sa<"alsl.d", GPR64Opnd, uimm2_plus1>, R3_SA2<0b10110> {
++  let Pattern = [(set GPR64Opnd:$rd,
++                  (add GPR64Opnd:$rk, (shl GPR64Opnd:$rj, immZExt2Alsl:$sa)))];
++}
++def BYTEPICK_D : Reg3_Sa<"bytepick.d", GPR64Opnd, uimm3>, R3_SA3; //[]
++
++def ASRTLE_D : Assert<"asrtle.d", GPR64Opnd, int_loongarch_asrtle_d>, ASSERT<0b10>;
++def ASRTGT_D : Assert<"asrtgt.d", GPR64Opnd, int_loongarch_asrtgt_d>, ASSERT<0b11>;
++
++def DBCL : Code15<"dbcl">, CODE15<0b1010101>;
++def HYPCALL : Code15<"hypcall">, CODE15<0b1010111>;
++
++///
++/// R2_IMM12
++///
++def SLTI    : SetCC_I<"slti", GPR64Opnd, simm12, setlt>, R2_IMM12<0b000>;
++def SLTUI   : SetCC_I<"sltui", GPR64Opnd, simm12, setult>, R2_IMM12<0b001>;
++def ADDI_W64  : Int_Reg2_Imm12<"addi.w", GPR64Opnd, simm12>, R2_IMM12<0b010>;
++def ADDI_D  : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>;
++def LU52I_D : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>;
++def ANDI : Int_Reg2_Imm12<"andi", GPR64Opnd, uimm12, and>, R2_IMM12<0b101>;
++def ORI  : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>;
++def XORI : Int_Reg2_Imm12<"xori", GPR64Opnd, uimm12, xor>, R2_IMM12<0b111>;
++
++///
++/// Privilege Instructions
++///
++def CSRRD : CSR<"csrrd", GPR64Opnd, uimm14, int_loongarch_dcsrrd>, R1_CSR<0b0000000000100>;
++def CSRWR : CSRW<"csrwr", GPR64Opnd, uimm14, int_loongarch_dcsrwr>, R1_CSR<0b0000100000100>;
++def CSRXCHG : CSRX<"csrxchg", GPR64Opnd, uimm14, int_loongarch_dcsrxchg>, R2_CSR<0b00000100>;
++def IOCSRRD_D : Int_Reg2_Iocsrrd<"iocsrrd.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrrd_d>, R2P<0b011>;
++def IOCSRWR_D : Int_Reg2_Iocsrwr<"iocsrwr.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrwr_d>, R2P<0b111>;
++def CACOP : CAC<"cacop", GPR64Opnd, simm12, int_loongarch_dcacop>, R1_CACHE;
++def LDDIR : LEVEL<"lddir", GPR64Opnd>, R2_LEVEL<0b00000110010000>;
++def LDPTE : SEQ<"ldpte", GPR64Opnd>, R1_SEQ<0b00000110010001>;
++
++def IDLE : Wait<"idle">, WAIT_FM;
++def INVTLB : Invtlb<"invtlb", GPR64Opnd>, R2_INVTLB;
++//
++def IOCSRRD_B : Int_Reg2<"iocsrrd.b", GPR64Opnd>, R2P<0b000>;
++def IOCSRRD_H : Int_Reg2<"iocsrrd.h", GPR64Opnd>, R2P<0b001>;
++def IOCSRRD_W : Int_Reg2<"iocsrrd.w", GPR64Opnd>, R2P<0b010>;
++//
++def TLBCLR   : OP32<"tlbclr", int_loongarch_tlbclr>, IMM32<0b001000>;
++def TLBFLUSH : OP32<"tlbflush", int_loongarch_tlbflush>, IMM32<0b001001>;
++def TLBSRCH     : OP32<"tlbsrch", int_loongarch_tlbsrch>, IMM32<0b001010>;
++def TLBRD     : OP32<"tlbrd", int_loongarch_tlbrd>, IMM32<0b001011>;
++def TLBWR    : OP32<"tlbwr", int_loongarch_tlbwr>, IMM32<0b001100>;
++def TLBFILL    : OP32<"tlbfill", int_loongarch_tlbfill>, IMM32<0b001101>;
++def ERTN     : OP32<"ertn">, IMM32<0b001110>;
++
++///
++/// R1_IMM20
++///
++def ADDU16I_D : SI16_R2<"addu16i.d", GPR64Opnd>, R2_SI16<0b000100>;
++def LU12I_W   : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>;
++def LU32I_D   : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>;
++def LU32I_D_R2   : SI20_R2<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>;
++def PCADDI    : SI20<"pcaddi", GPR64Opnd, simm20>, R1_SI20<0b0001100>;
++def PCALAU12I : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>;
++def PCADDU12I : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>;
++def PCADDU18I : SI20<"pcaddu18i", GPR64Opnd, simm20>, R1_SI20<0b0001111>;
++
++
++def BEQZ  : Beqz<"beqz", brtarget, seteq, GPR64Opnd>, R1_IMM21BEQZ<0b010000>;
++def BNEZ  : Beqz<"bnez", brtarget, setne, GPR64Opnd>, R1_IMM21BEQZ<0b010001>;
++
++def JIRL  : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL;
++let isCall = 1, isCTI=1, isCodeGenOnly = 1 in {
++def JIRL_CALL  : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL;
++}
++
++def B     : JumpFB<jmptarget, "b", br, bb>, IMM26B<0b010100>;
++
++def BEQ   : Beq<"beq", brtarget, seteq, GPR64Opnd>, R2_IMM16BEQ<0b010110>;
++def BNE   : Beq<"bne", brtarget, setne, GPR64Opnd>, R2_IMM16BEQ<0b010111>;
++def BLT   : Beq<"blt", brtarget, setlt, GPR64Opnd>, R2_IMM16BEQ<0b011000>;
++def BGE   : Beq<"bge", brtarget, setge, GPR64Opnd>, R2_IMM16BEQ<0b011001>;
++def BLTU  : Beq<"bltu", brtarget, setult, GPR64Opnd>, R2_IMM16BEQ<0b011010>;
++def BGEU  : Beq<"bgeu", brtarget, setuge, GPR64Opnd>, R2_IMM16BEQ<0b011011>;
++
++///
++/// Mem access
++///
++class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
++  InstForm<(outs RO:$rd), (ins MO:$addr), !strconcat(opstr, "\t$rd, $addr"),
++           [], FrmI, opstr> {
++  let DecoderMethod = "DecodeMemSimm14";
++  let mayLoad = 1;
++}
++
++class SCBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
++  InstForm<(outs RO:$dst), (ins RO:$rd, MO:$addr),
++           !strconcat(opstr, "\t$rd, $addr"), [], FrmI> {
++  let DecoderMethod = "DecodeMemSimm14";
++  let mayStore = 1;
++  let Constraints = "$rd = $dst";
++}
++
++class STGT_LE<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode = null_frag> :
++  InstForm<(outs), (ins RO:$rd, RO:$rj, RO:$rk),
++           !strconcat(opstr, "\t$rd, $rj, $rk"),
++           [], FrmI, opstr>;
++
++class Float_STGT_LE<string opstr, RegisterOperand RD, RegisterOperand RS,
++                      SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs), (ins RD:$fd, RS:$rj, RS:$rk),
++          !strconcat(opstr, "\t$fd, $rj, $rk"),
++          [], FrmR, opstr>;
++
++def LL_D : LLBase<"ll.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b010>;
++def SC_D : SCBase<"sc.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b011>;
++
++def LDPTR_W : LdPtr<"ldptr.w", GPR64Opnd>, LL_SC<0b100>;
++def STPTR_W : StPtr<"stptr.w", GPR64Opnd>, LL_SC<0b101>;
++def LDPTR_D : LdPtr<"ldptr.d", GPR64Opnd>, LL_SC<0b110>;
++def STPTR_D : StPtr<"stptr.d", GPR64Opnd>, LL_SC<0b111>;
++
++def LD_B  : Ld<"ld.b", GPR64Opnd, mem, sextloadi8>, LOAD_STORE<0b0000>;
++def LD_H  : Ld<"ld.h", GPR64Opnd, mem, sextloadi16>, LOAD_STORE<0b0001>;
++def LD_W  : Ld<"ld.w", GPR64Opnd, mem, sextloadi32>, LOAD_STORE<0b0010>;
++def LD_D  : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>;
++def ST_B  : St<"st.b", GPR64Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>;
++def ST_H  : St<"st.h", GPR64Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>;
++def ST_W  : St<"st.w", GPR64Opnd, mem, truncstorei32>, LOAD_STORE<0b0110>;
++def ST_D  : St<"st.d", GPR64Opnd, mem_simmptr, store>, LOAD_STORE<0b0111>;
++def LD_BU : Ld<"ld.bu", GPR64Opnd, mem, zextloadi8>, LOAD_STORE<0b1000>;
++def LD_HU : Ld<"ld.hu", GPR64Opnd, mem, zextloadi16>, LOAD_STORE<0b1001>;
++def LD_WU : Ld<"ld.wu", GPR64Opnd, mem, zextloadi32>, LOAD_STORE<0b1010>;
++
++def AMSWAP_W  : ATOMIC<"amswap.w", GPR32Opnd, mem>, AM<0b000000>;
++def AMSWAP_D  : ATOMIC<"amswap.d", GPR64Opnd, mem>, AM<0b000001>;
++def AMADD_W   : ATOMIC<"amadd.w", GPR32Opnd, mem>, AM<0b000010>;
++def AMADD_D   : ATOMIC<"amadd.d", GPR64Opnd, mem>, AM<0b000011>;
++def AMAND_W   : ATOMIC<"amand.w", GPR32Opnd, mem>, AM<0b000100>;
++def AMAND_D   : ATOMIC<"amand.d", GPR64Opnd, mem>, AM<0b000101>;
++def AMOR_W    : ATOMIC<"amor.w", GPR32Opnd, mem>, AM<0b000110>;
++def AMOR_D    : ATOMIC<"amor.d", GPR64Opnd, mem>, AM<0b000111>;
++def AMXOR_W   : ATOMIC<"amxor.w", GPR32Opnd, mem>, AM<0b001000>;
++def AMXOR_D   : ATOMIC<"amxor.d", GPR64Opnd, mem>, AM<0b001001>;
++def AMMAX_W   : ATOMIC<"ammax.w", GPR32Opnd, mem>, AM<0b001010>;
++def AMMAX_D   : ATOMIC<"ammax.d", GPR64Opnd, mem>, AM<0b001011>;
++def AMMIN_W   : ATOMIC<"ammin.w", GPR32Opnd, mem>, AM<0b001100>;
++def AMMIN_D   : ATOMIC<"ammin.d", GPR64Opnd, mem>, AM<0b001101>;
++def AMMAX_WU  : ATOMIC<"ammax.wu", GPR32Opnd, mem>, AM<0b001110>;
++def AMMAX_DU  : ATOMIC<"ammax.du", GPR64Opnd, mem>, AM<0b001111>;
++def AMMIN_WU  : ATOMIC<"ammin.wu", GPR32Opnd, mem>, AM<0b010000>;
++def AMMIN_DU  : ATOMIC<"ammin.du", GPR64Opnd, mem>, AM<0b010001>;
++
++
++def AMSWAP_DB_W  : ATOMIC<"amswap_db.w", GPR32Opnd, mem>, AM<0b010010>;
++def AMSWAP_DB_D  : ATOMIC<"amswap_db.d", GPR64Opnd, mem>, AM<0b010011>;
++def AMADD_DB_W   : ATOMIC<"amadd_db.w", GPR32Opnd, mem>, AM<0b010100>;
++def AMADD_DB_D   : ATOMIC<"amadd_db.d", GPR64Opnd, mem>, AM<0b010101>;
++def AMAND_DB_W   : ATOMIC<"amand_db.w", GPR32Opnd, mem>, AM<0b010110>;
++def AMAND_DB_D   : ATOMIC<"amand_db.d", GPR64Opnd, mem>, AM<0b010111>;
++def AMOR_DB_W    : ATOMIC<"amor_db.w", GPR32Opnd, mem>, AM<0b011000>;
++def AMOR_DB_D    : ATOMIC<"amor_db.d", GPR64Opnd, mem>, AM<0b011001>;
++def AMXOR_DB_W   : ATOMIC<"amxor_db.w", GPR32Opnd, mem>, AM<0b011010>;
++def AMXOR_DB_D   : ATOMIC<"amxor_db.d", GPR64Opnd, mem>, AM<0b011011>;
++def AMMAX_DB_W   : ATOMIC<"ammax_db.w", GPR32Opnd, mem>, AM<0b011100>;
++def AMMAX_DB_D   : ATOMIC<"ammax_db.d", GPR64Opnd, mem>, AM<0b011101>;
++def AMMIN_DB_W   : ATOMIC<"ammin_db.w", GPR32Opnd, mem>, AM<0b011110>;
++def AMMIN_DB_D   : ATOMIC<"ammin_db.d", GPR64Opnd, mem>, AM<0b011111>;
++def AMMAX_DB_WU  : ATOMIC<"ammax_db.wu", GPR32Opnd, mem>, AM<0b100000>;
++def AMMAX_DB_DU  : ATOMIC<"ammax_db.du", GPR64Opnd, mem>, AM<0b100001>;
++def AMMIN_DB_WU  : ATOMIC<"ammin_db.wu", GPR32Opnd, mem>, AM<0b100010>;
++def AMMIN_DB_DU  : ATOMIC<"ammin_db.du", GPR64Opnd, mem>, AM<0b100011>;
++
++def LDGT_B : Int_Reg3<"ldgt.b", GPR64Opnd>, R3MI<0b11110000>;
++def LDGT_H : Int_Reg3<"ldgt.h", GPR64Opnd>, R3MI<0b11110001>;
++def LDGT_W : Int_Reg3<"ldgt.w", GPR64Opnd>, R3MI<0b11110010>;
++def LDGT_D : Int_Reg3<"ldgt.d", GPR64Opnd>, R3MI<0b11110011>;
++def LDLE_B : Int_Reg3<"ldle.b", GPR64Opnd>, R3MI<0b11110100>;
++def LDLE_H : Int_Reg3<"ldle.h", GPR64Opnd>, R3MI<0b11110101>;
++def LDLE_W : Int_Reg3<"ldle.w", GPR64Opnd>, R3MI<0b11110110>;
++def LDLE_D : Int_Reg3<"ldle.d", GPR64Opnd>, R3MI<0b11110111>;
++def STGT_B : STGT_LE<"stgt.b", GPR64Opnd>, R3MI<0b11111000>;
++def STGT_H : STGT_LE<"stgt.h", GPR64Opnd>, R3MI<0b11111001>;
++def STGT_W : STGT_LE<"stgt.w", GPR64Opnd>, R3MI<0b11111010>;
++def STGT_D : STGT_LE<"stgt.d", GPR64Opnd>, R3MI<0b11111011>;
++def STLE_B : STGT_LE<"stle.b", GPR64Opnd>, R3MI<0b11111100>;
++def STLE_H : STGT_LE<"stle.h", GPR64Opnd>, R3MI<0b11111101>;
++def STLE_W : STGT_LE<"stle.w", GPR64Opnd>, R3MI<0b11111110>;
++def STLE_D : STGT_LE<"stle.d", GPR64Opnd>, R3MI<0b11111111>;
++
++let isCodeGenOnly = 1 in {
++def PRELD  : Preld<"preld", mem, GPR64Opnd>, PRELD_FM;
++}
++
++def PRELD_Raw  : Preld_Raw<"preld", GPR64Opnd>, PRELD_FM;
++
++let isCall=1, isCTI=1, Defs = [RA] in {
++  class JumpLink<string opstr, DAGOperand opnd> :
++    InstForm<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
++             [(LoongArchJmpLink tglobaladdr:$target)], FrmJ, opstr> {
++               let DecoderMethod = "DecodeJumpTarget";
++             }
++}
++def LONG_BRANCH_PCADDU12I : LoongArchPseudo<(outs GPR64Opnd:$dst),
++    (ins brtarget:$tgt), []>, GPR_64;
++
++def LONG_BRANCH_ADDID2Op : LoongArchPseudo<(outs GPR64Opnd:$dst),
++    (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
++
++def LONG_BRANCH_ADDID : LoongArchPseudo<(outs GPR64Opnd:$dst),
++    (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64;
++
++def LEA_ADDI_D: EffectiveAddress<"addi.d", GPR64Opnd>, LEA_ADDI_FM<0b011>, GPR_64;
++
++class PseudoReturnBase<RegisterOperand RO> : LoongArchPseudo<(outs), (ins RO:$rs),
++                                                        []> {
++  let isTerminator = 1;
++  let isBarrier = 1;
++  let isReturn = 1;
++  let isCodeGenOnly = 1;
++  let hasCtrlDep = 1;
++  let hasExtraSrcRegAllocReq = 1;
++  bit isCTI = 1;
++}
++
++def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
++//def PseudoReturn : PseudoReturnBase<GPR32Opnd>;
++
++
++let isCall=1, isCTI=1, Defs=[RA], isCodeGenOnly=1 in {
++def PseudoCall : LoongArchPseudo<(outs), (ins calltarget:$target),
++                                        []>;
++}
++
++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in
++def PseudoTailCall : LoongArchPseudo<(outs), (ins calltarget:$target),
++                                     []>;
++
++class PseudoTailBase<DAGOperand opnd> : LoongArchPseudo<(outs), (ins opnd:$offset26),
++                                                        []> {
++  let isTerminator = 1;
++  let isBarrier = 1;
++  let isReturn = 1;
++  let isCodeGenOnly = 1;
++}
++def PseudoTailReturn : PseudoTailBase<calltarget>;
++
++
++def : LoongArchPat<(LoongArchTailCall tglobaladdr:$dst),
++                   (PseudoTailCall tglobaladdr:$dst)>;
++
++def : LoongArchPat<(LoongArchTailCall texternalsym:$dst),
++                   (PseudoTailCall texternalsym:$dst)>;
++
++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,  isIndirectBranch = 1, Uses = [SP] in
++def PseudoTAILIndirect : LoongArchPseudo<(outs), (ins GPRTC64Opnd:$rj), [(LoongArchTailCall GPRTC64Opnd:$rj)]>,
++                         PseudoInstExpansion<(JIRL ZERO_64, GPR64Opnd:$rj, 0)>;
++
++
++def : LoongArchPat<(LoongArchJmpLink tglobaladdr:$dst),
++              (PseudoCall tglobaladdr:$dst)>;
++
++def : LoongArchPat<(LoongArchJmpLink (i32 texternalsym:$dst)),
++              (PseudoCall texternalsym:$dst)>;
++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)),
++              (PseudoCall texternalsym:$dst)>;
++
++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)),
++              (PseudoCall texternalsym:$dst)>;
++
++def BL  : JumpLink<"bl", calltarget>, FJ<0b010101>;
++
++class IsAsCheapAsAMove {
++  bit isAsCheapAsAMove = 1;
++}
++class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
++  InstForm<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
++         [], FrmI, opstr>, IsAsCheapAsAMove {
++  let hasSideEffects = 0;
++  let isReMaterializable = 1;
++  let mayLoad = 1;
++}
++
++let isCodeGenOnly = 1 in {
++def LAPCREL   : LoadUpper<"la.pcrel", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64;
++}
++
++def NOP : LoongArchPseudo<(outs), (ins), []>,
++                     PseudoInstExpansion<(ANDI ZERO_64, ZERO_64, 0)>;
++
++def : LoongArchInstAlias<"nop", (ANDI ZERO_64, ZERO_64, 0), 1>;
++def : LoongArchInstAlias<"jr $rd", (JIRL ZERO_64, GPR64Opnd:$rd, 0), 1>;
++def : LoongArchInstAlias<"move $dst, $src",
++                         (OR GPR64Opnd:$dst,  GPR64Opnd:$src, ZERO_64), 1>, GPR_64;
++
++def UImm12RelaxedAsmOperandClass
++: UImmAsmOperandClass<12, [ConstantUImm20AsmOperandClass]> {
++  let Name = "UImm12_Relaxed";
++  let PredicateMethod = "isAnyImm<12>";
++  let DiagnosticType = "UImm12_Relaxed";
++}
++
++def SImm12RelaxedAsmOperandClass
++: SImmAsmOperandClass<12, [UImm12RelaxedAsmOperandClass]> {
++  let Name = "SImm12_Relaxed";
++  let PredicateMethod = "isAnyImm<12>";
++  let DiagnosticType = "SImm12_Relaxed";
++}
++
++def simm12_relaxed : Operand<i32> {
++  let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>";
++  let ParserMatchClass = !cast<AsmOperandClass>("SImm12RelaxedAsmOperandClass");
++}
++
++def : LoongArchPat<(i64 (anyext GPR32:$src)),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>,GPR_64;
++
++let usesCustomInserter = 1 in {
++  def ATOMIC_LOAD_ADD_I64  : Atomic2Ops<atomic_load_add_64, GPR64>;
++  def ATOMIC_LOAD_SUB_I64  : Atomic2Ops<atomic_load_sub_64, GPR64>;
++  def ATOMIC_LOAD_AND_I64  : Atomic2Ops<atomic_load_and_64, GPR64>;
++  def ATOMIC_LOAD_OR_I64   : Atomic2Ops<atomic_load_or_64, GPR64>;
++  def ATOMIC_LOAD_XOR_I64  : Atomic2Ops<atomic_load_xor_64, GPR64>;
++  def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
++  def ATOMIC_SWAP_I64      : Atomic2Ops<atomic_swap_64, GPR64>;
++  def ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
++
++  def ATOMIC_LOAD_MAX_I64  : Atomic2Ops<atomic_load_max_64, GPR64>;
++  def ATOMIC_LOAD_MIN_I64  : Atomic2Ops<atomic_load_min_64, GPR64>;
++  def ATOMIC_LOAD_UMAX_I64  : Atomic2Ops<atomic_load_umax_64, GPR64>;
++  def ATOMIC_LOAD_UMIN_I64  : Atomic2Ops<atomic_load_umin_64, GPR64>;
++}
++
++def ATOMIC_LOAD_ADD_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++def ATOMIC_LOAD_SUB_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++def ATOMIC_LOAD_AND_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++def ATOMIC_LOAD_OR_I64_POSTRA   : Atomic2OpsPostRA<GPR64>;
++def ATOMIC_LOAD_XOR_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
++
++def ATOMIC_SWAP_I64_POSTRA      : Atomic2OpsPostRA<GPR64>;
++
++def ATOMIC_CMP_SWAP_I64_POSTRA  : AtomicCmpSwapPostRA<GPR64>;
++
++def ATOMIC_LOAD_MAX_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++
++def ATOMIC_LOAD_MIN_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++
++def ATOMIC_LOAD_UMAX_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++
++def ATOMIC_LOAD_UMIN_I64_POSTRA  : Atomic2OpsPostRA<GPR64>;
++
++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W addrimm14lsl2:$a)>, GPR_64;
++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_load_64 addrimm14lsl2:$a), (LDPTR_D addrimm14lsl2:$a)>, GPR_64;
++def : LoongArchPat<(atomic_load_64 addr:$a), (LD_D addr:$a)>, GPR_64;
++
++def : LoongArchPat<(atomic_store_8 addr:$a, GPR64:$v),
++      (ST_B GPR64:$v, addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_store_16 addr:$a, GPR64:$v),
++      (ST_H GPR64:$v, addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR64:$v),
++      (STPTR_W GPR64:$v, addrimm14lsl2:$a)>, GPR_64;
++def : LoongArchPat<(atomic_store_32 addr:$a, GPR64:$v),
++      (ST_W GPR64:$v, addr:$a)>, GPR_64;
++def : LoongArchPat<(atomic_store_64 addrimm14lsl2:$a, GPR64:$v),
++      (STPTR_D GPR64:$v, addrimm14lsl2:$a)>, GPR_64;
++def : LoongArchPat<(atomic_store_64 addr:$a, GPR64:$v),
++      (ST_D GPR64:$v, addr:$a)>, GPR_64;
++
++def : LoongArchPat<(bswap GPR64:$rt), (REVH_D (REVB_4H GPR64:$rt))>;
++
++def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
++
++def immZExtRange2To64 : PatLeaf<(imm), [{
++  return isUInt<7>(N->getZExtValue()) && (N->getZExtValue() >= 2) &&
++         (N->getZExtValue() <= 64);
++}]>;
++
++// bstrins and bstrpick
++class InsBase<string opstr, RegisterOperand RO, Operand ImmOpnd,
++                   SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src),
++          !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"),
++          [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src))],
++          FrmR, opstr> {
++  let Constraints = "$src = $rd";
++ }
++
++class InsBase_32<string opstr, RegisterOperand RO, Operand ImmOpnd,
++                   SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src),
++          !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"),
++          [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src))],
++          FrmR, opstr> {
++  let Constraints = "$src = $rd";
++}
++
++class PickBase<string opstr, RegisterOperand RO, Operand ImmOpnd,
++               SDPatternOperator Op = null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd),
++               !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"),
++               [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd))],
++               FrmR, opstr>;
++
++class PickBase_32<string opstr, RegisterOperand RO,  Operand ImmOpnd,
++                  SDPatternOperator Op = null_frag>
++    : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw),
++               !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"),
++               [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw))],
++               FrmR, opstr>;
++
++ def BSTRINS_D  : InsBase<"bstrins.d", GPR64Opnd, uimm6, LoongArchBstrins>,
++                  INSERT_BIT64<0>;
++ def BSTRPICK_D  : PickBase<"bstrpick.d", GPR64Opnd, uimm6, LoongArchBstrpick>,
++                   INSERT_BIT64<1>;
++
++let isCodeGenOnly = 1 in {
++  def ZEXT64_32 : InstForm<(outs GPR64Opnd:$rd),
++                         (ins GPR32Opnd:$rj, uimm6:$msbd,
++                              uimm6:$lsbd),
++                         "bstrpick.d $rd, $rj, $msbd, $lsbd", [], FrmR, "bstrpick.d">,
++                          INSERT_BIT64<1>;
++}
++
++//32-to-64-bit extension
++def : LoongArchPat<(i64 (zext GPR32:$src)), (ZEXT64_32 GPR32:$src, 31, 0)>;
++def : LoongArchPat<(i64 (extloadi1  addr:$src)), (LD_B addr:$src)>,
++      GPR_64;
++def : LoongArchPat<(i64 (extloadi8  addr:$src)), (LD_B addr:$src)>,
++      GPR_64;
++def : LoongArchPat<(i64 (extloadi16 addr:$src)), (LD_H addr:$src)>,
++      GPR_64;
++def : LoongArchPat<(i64 (extloadi32 addr:$src)), (LD_W addr:$src)>,
++      GPR_64;
++
++class LDX_FT_LA<string opstr, RegisterOperand DRC,
++                SDPatternOperator OpNode = null_frag> :
++  InstForm<(outs DRC:$rd), (ins PtrRC:$rj, PtrRC:$rk),
++           !strconcat(opstr, "\t$rd, $rj, $rk"),
++           [(set DRC:$rd, (OpNode (add iPTR:$rj, iPTR:$rk)))],
++           FrmR, opstr> {
++  let AddedComplexity = 20;
++  let canFoldAsLoad = 1;
++  string BaseOpcode = opstr;
++  let mayLoad = 1;
++}
++
++class STX_FT_LA<string opstr, RegisterOperand DRC,
++                 SDPatternOperator OpNode = null_frag> :
++  InstForm<(outs), (ins DRC:$rd, PtrRC:$rj, PtrRC:$rk),
++           !strconcat(opstr, "\t$rd, $rj, $rk"),
++           [(OpNode DRC:$rd, (add iPTR:$rj, iPTR:$rk))],
++           FrmI, opstr> {
++  string BaseOpcode = opstr;
++  let mayStore = 1;
++  let AddedComplexity = 20;
++}
++
++
++def LDX_B : LDX_FT_LA<"ldx.b", GPR64Opnd, sextloadi8>,
++            R3MI<0b00000000>;
++def LDX_H : LDX_FT_LA<"ldx.h", GPR64Opnd, sextloadi16>,
++            R3MI<0b00001000>;
++def LDX_W : LDX_FT_LA<"ldx.w", GPR64Opnd, sextloadi32>,
++            R3MI<0b00010000>;
++def LDX_D : LDX_FT_LA<"ldx.d", GPR64Opnd, load>,
++            R3MI<0b00011000>;
++def STX_B : STX_FT_LA<"stx.b", GPR64Opnd, truncstorei8>,
++            R3MI<0b00100000>;
++def STX_H : STX_FT_LA<"stx.h", GPR64Opnd, truncstorei16>,
++            R3MI<0b00101000>;
++def STX_W : STX_FT_LA<"stx.w", GPR64Opnd, truncstorei32>,
++            R3MI<0b00110000>;
++def STX_D : STX_FT_LA<"stx.d", GPR64Opnd, store>,
++            R3MI<0b00111000>;
++def LDX_BU : LDX_FT_LA<"ldx.bu", GPR64Opnd, extloadi8>,
++             R3MI<0b01000000>;
++def LDX_HU : LDX_FT_LA<"ldx.hu", GPR64Opnd, extloadi16>,
++             R3MI<0b01001000>;
++def LDX_WU : LDX_FT_LA<"ldx.wu", GPR64Opnd, zextloadi32>,
++             R3MI<0b01010000>;
++
++//def : LoongArchPat<(bswap GPR64:$rj), (REVH_D (REVB_4H GPR64:$rj))>;
++//def : LoongArchPat<(bswap GPR64:$rj), (ROTRI_D (REVB_2W GPR64:$rj), 32)>;
++def : LoongArchPat<(bswap GPR64:$rj), (REVB_D GPR64:$rj)>;
++
++let isCodeGenOnly = 1 in {
++  def SLLI_D_64_32 : Shift_Imm64<"", GPR64Opnd>, R2_IMM6<0b00>, GPR_64 {
++    let imm6 = 0;
++    let AsmString = "slli.d\t$rd, $rj, 32";
++    let InOperandList = (ins GPR32:$rj);
++    let OutOperandList = (outs GPR64:$rd);
++  }
++
++  let isMoveReg = 1, imm5 = 0,
++      AsmString = "slli.w\t$rd, $rj, 0",
++      OutOperandList = (outs GPR64:$rd) in {
++      let InOperandList = (ins GPR32:$rj) in
++        def SLLI_W_64_32 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64;
++      let InOperandList = (ins GPR64:$rj) in
++        def SLLI_W_64_64 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64;
++  }
++
++  let AsmString = "sltui\t$rd, $rj, $imm12",
++      OutOperandList = (outs GPR64:$rd) in {
++      let InOperandList = (ins GPR64:$rj, simm12:$imm12) in
++        def SLTUI_64 : SetCC_I<"", GPR64Opnd, simm12>, R2_IMM12<0b001>, GPR_64;
++  }
++}
++
++// 32-to-64-bit extension
++//def : LoongArchPat<(i64 (zext GPR32:$src)), (SRLI_D (SLLI_D_64_32 GPR32:$src), 32)>, GPR_64;
++def : LoongArchPat<(i64 (sext GPR32:$src)), (SLLI_W_64_32 GPR32:$src)>, GPR_64;
++def : LoongArchPat<(i64 (sext_inreg GPR64:$src, i32)), (SLLI_W_64_64 GPR64:$src)>, GPR_64;
++
++let Uses = [A0, A1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in {
++  def LoongArcheh_return32 : LoongArchPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst),
++                                [(LoongArchehret GPR32:$spoff, GPR32:$dst)]>;
++  def LoongArcheh_return64 : LoongArchPseudo<(outs), (ins GPR64:$spoff,GPR64:$dst),
++                                [(LoongArchehret GPR64:$spoff, GPR64:$dst)]>;
++}
++
++def : LoongArchPat<(select i32:$cond, i64:$t, i64:$f),
++                   (OR (MASKEQZ i64:$t, (SLLI_W_64_32 i32:$cond)),
++                       (MASKNEZ i64:$f, (SLLI_W_64_32 i32:$cond)))>;
++// setcc patterns
++multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
++                     Instruction SLTuOp, Register ZEROReg> {
++  def : LoongArchPat<(seteq RC:$lhs, 0),
++                     (SLTiuOp RC:$lhs, 1)>;
++  def : LoongArchPat<(setne RC:$lhs, 0),
++                     (SLTuOp ZEROReg, RC:$lhs)>;
++  def : LoongArchPat<(seteq RC:$lhs, RC:$rhs),
++                     (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
++  def : LoongArchPat<(setne RC:$lhs, RC:$rhs),
++                     (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
++}
++
++multiclass SetlePats<RegisterClass RC, Instruction XORiOp, Instruction SLTOp,
++                     Instruction SLTuOp> {
++  def : LoongArchPat<(setle RC:$lhs, RC:$rhs),
++                     (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>;
++  def : LoongArchPat<(setule RC:$lhs, RC:$rhs),
++                     (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>;
++}
++
++multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
++  def : LoongArchPat<(setgt RC:$lhs, RC:$rhs),
++                     (SLTOp RC:$rhs, RC:$lhs)>;
++  def : LoongArchPat<(setugt RC:$lhs, RC:$rhs),
++                     (SLTuOp RC:$rhs, RC:$lhs)>;
++}
++
++multiclass SetgePats<RegisterClass RC, Instruction XORiOp, Instruction SLTOp,
++                     Instruction SLTuOp> {
++  def : LoongArchPat<(setge RC:$lhs, RC:$rhs),
++                     (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>;
++  def : LoongArchPat<(setuge RC:$lhs, RC:$rhs),
++                     (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>;
++}
++
++multiclass SetgeImmPats<RegisterClass RC, Instruction XORiOp,
++                        Instruction SLTiOp, Instruction SLTiuOp> {
++  def : LoongArchPat<(setge RC:$lhs, immSExt12:$rhs),
++                     (XORiOp (SLTiOp RC:$lhs, immSExt12:$rhs), 1)>;
++  def : LoongArchPat<(setuge RC:$lhs, immSExt12:$rhs),
++                     (XORiOp (SLTiuOp RC:$lhs, immSExt12:$rhs), 1)>;
++}
++
++class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
++  LoongArchPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
++
++class StoreRegImmPat<Instruction StoreInst, ValueType ValTy, PatFrag Node> :
++  LoongArchPat<(Node ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
++
++class LoadRegImm14Lsl2Pat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
++  LoongArchPat<(ValTy (Node addrimm14lsl2:$a)), (LoadInst addrimm14lsl2:$a)>;
++
++class StoreRegImm14Lsl2Pat<Instruction StoreInst, ValueType ValTy, PatFrag Node> :
++  LoongArchPat<(Node ValTy:$v, addrimm14lsl2:$a), (StoreInst ValTy:$v, addrimm14lsl2:$a)>;
++
++// Patterns for loads/stores with a reg+imm operand.
++// let AddedComplexity = 40 so that these instructions are selected instead of
++// LDX/STX which needs one more register and an ANDI instruction.
++let AddedComplexity = 40 in {
++  def : LoadRegImmPat<LD_B, i64, sextloadi8>;
++  def : LoadRegImmPat<LD_H, i64, sextloadi16>;
++  def : LoadRegImmPat<LD_W, i64, sextloadi32>;
++  def : LoadRegImmPat<LD_D, i64, load>;
++  def : LoadRegImmPat<LD_BU, i64, zextloadi8>;
++  def : LoadRegImmPat<LD_HU, i64, zextloadi16>;
++  def : LoadRegImmPat<LD_WU, i64, zextloadi32>;
++  def : StoreRegImmPat<ST_B, i64, truncstorei8>;
++  def : StoreRegImmPat<ST_H, i64, truncstorei16>;
++  def : StoreRegImmPat<ST_W, i64, truncstorei32>;
++  def : StoreRegImmPat<ST_D, i64, store>;
++
++  def : LoadRegImm14Lsl2Pat<LDPTR_W, i64, sextloadi32>;
++  def : LoadRegImm14Lsl2Pat<LDPTR_D, i64, load>;
++  def : StoreRegImm14Lsl2Pat<STPTR_W, i64, truncstorei32>;
++  def : StoreRegImm14Lsl2Pat<STPTR_D, i64, store>;
++}
++
++//===----------------------------------------------------------------------===//
++// Base Extension Support
++//===----------------------------------------------------------------------===//
++
++include "LoongArch32InstrInfo.td"
++include "LoongArchInstrInfoF.td"
++include "LoongArchLSXInstrFormats.td"
++include "LoongArchLSXInstrInfo.td"
++include "LoongArchLASXInstrFormats.td"
++include "LoongArchLASXInstrInfo.td"
++
++defm : SeteqPats<GPR64, SLTUI, XOR, SLTU, ZERO_64>, GPR_64;
++defm : SetlePats<GPR64, XORI32, SLT, SLTU>, GPR_64;
++defm : SetgtPats<GPR64, SLT, SLTU>, GPR_64;
++defm : SetgePats<GPR64, XORI32, SLT, SLTU>, GPR_64;
++defm : SetgeImmPats<GPR64, XORI32, SLTI, SLTUI>, GPR_64;
++
++///
++/// for relocation
++///
++let isCodeGenOnly = 1 in {
++def PCALAU12I_ri : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>;
++def ORI_rri : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>;
++def LU12I_W_ri   : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>;
++def LU32I_D_ri : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>;
++def LU52I_D_rri : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>;
++def ADDI_D_rri : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>;
++def LD_D_rri : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>;
++def ADD_D_rrr : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>;
++def LDX_D_rrr : LDX_FT_LA<"ldx.d", GPR64Opnd, load>,
++                R3MI<0b00011000>;
++}
++
++//===----------------------------------------------------------------------===//
++// Assembler Pseudo Instructions
++//===----------------------------------------------------------------------===//
++def LoadImm32 : LoongArchAsmPseudoInst<(outs GPR32Opnd:$rd),
++                                       (ins uimm32_coerced:$imm32),
++                                       "li.w\t$rd, $imm32">;
++def LoadImm64 : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                       (ins imm64:$imm64),
++                                       "li.d\t$rd, $imm64">;
++// load address
++def LoadAddrLocal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                           (ins imm64:$imm64),
++                                           "la.local\t$rd, $imm64">;
++def : InstAlias<"la.pcrel $rd, $imm",
++                (LoadAddrLocal GPR64Opnd:$rd, imm64:$imm), 1>;
++def LoadAddrGlobal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                            (ins imm64:$imm64),
++                                            "la.global\t$rd, $imm64">;
++def LoadAddrGlobal_Alias : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                                  (ins imm64:$imm64),
++                                                  "la\t$rd, $imm64">;
++def : InstAlias<"la.got $rd, $imm",
++                (LoadAddrGlobal GPR64Opnd:$rd, imm64:$imm), 1>;
++
++def LoadAddrTLS_LE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                            (ins imm64:$imm64),
++                                            "la.tls.le\t$rd, $imm64">;
++def LoadAddrTLS_IE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                            (ins imm64:$imm64),
++                                            "la.tls.ie\t$rd, $imm64">;
++def LoadAddrTLS_GD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                            (ins imm64:$imm64),
++                                            "la.tls.gd\t$rd, $imm64">;
++def LoadAddrTLS_LD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                            (ins imm64:$imm64),
++                                            "la.tls.ld\t$rd, $imm64">;
++
++// load address with a temp reg
++def LoadAddrLocalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                      (ins GPR64Opnd:$rt, imm64:$imm64),
++                                      "la.local\t$rd, $rt, $imm64">;
++def LoadAddrGlobalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                      (ins GPR64Opnd:$rt, imm64:$imm64),
++                                      "la.global\t$rd, $rt, $imm64">;
++def LoadAddrTLS_IE_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                      (ins GPR64Opnd:$rt, imm64:$imm64),
++                                      "la.tls.ie\t$rd, $rt, $imm64">;
++def LoadAddrTLS_GD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                      (ins GPR64Opnd:$rt, imm64:$imm64),
++                                      "la.tls.gd\t$rd, $rt, $imm64">;
++def LoadAddrTLS_LD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd),
++                                      (ins GPR64Opnd:$rt, imm64:$imm64),
++                                      "la.tls.ld\t$rd, $rt, $imm64">;
++
++// trap when div zero
++def PseudoTEQ : LoongArchPseudo<(outs), (ins GPR64Opnd:$rt), []>;
++
++
++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, immSExt12:$imm12)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (ADDI_W GPR32:$src, immSExt12:$imm12), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (ADD_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SUB_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (MUL_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SLLI_W GPR32:$src, immZExt5:$imm5), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SLL_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SRLI_W GPR32:$src, immZExt5:$imm5), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SRL_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SRAI_W GPR32:$src, immZExt5:$imm5), sub_32)>;
++
++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))),
++              (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
++              (SRA_W GPR32:$src, GPR32:$src2), sub_32)>;
++
++
++def : LoongArchPat<(i64 (xor GPR64:$rj, (i64 -1))),
++                   (NOR ZERO_64, GPR64:$rj)>;
++
++def : LoongArchPat<(and GPR64:$rj, (i64 (xor GPR64:$rk, (i64 -1)))),
++                   (ANDN GPR64:$rj, GPR64:$rk)>;
++
++def : LoongArchPat<(i64 (or GPR64:$rj, (xor GPR64:$rk, (i64 -1)))),
++                   (ORN GPR64:$rj, GPR64:$rk)>;
++
++def : LoongArchPat<(i64 (zext (i32 (seteq GPR64:$rj, (i64 0))))),
++                   (SLTUI_64 GPR64:$rj, (i64 1))>;
++
++
++def : LoongArchPat<(i64 (zext (i32 (srl GPR32:$src, immZExt5:$imm5)))),
++                   (BSTRPICK_D (INSERT_SUBREG
++                                 (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32),
++                               (i32 31), immZExt5:$imm5)>;
+diff --git a/lib/Target/LoongArch/LoongArchInstrInfoF.td b/lib/Target/LoongArch/LoongArchInstrInfoF.td
+new file mode 100644
+index 00000000..73711ff7
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchInstrInfoF.td
+@@ -0,0 +1,629 @@
++//===- LoongArchInstrInfoF.td - Target Description for LoongArch Target -*- tablegen -*-=//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetInstrInfo class.
++//
++//===----------------------------------------------------------------------===//
++// FP immediate patterns.
++def fpimm0 : PatLeaf<(fpimm), [{
++  return N->isExactlyValue(+0.0);
++}]>;
++
++def fpimm0neg : PatLeaf<(fpimm), [{
++  return N->isExactlyValue(-0.0);
++}]>;
++
++def fpimm1 : PatLeaf<(fpimm), [{
++  return N->isExactlyValue(+1.0);
++}]>;
++
++def IsNotSoftFloat   : Predicate<"!Subtarget->useSoftFloat()">;
++
++class HARDFLOAT { list<Predicate> HardFloatPredicate = [IsNotSoftFloat]; }
++
++def SDT_LoongArchTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
++
++def LoongArchTruncIntFP : SDNode<"LoongArchISD::TruncIntFP", SDT_LoongArchTruncIntFP>;
++
++def SDT_LoongArchFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>,
++                                            SDTCisVT<1, i32>,
++                                            SDTCisVT<2, OtherVT>]>;
++
++def LoongArchFPBrcond : SDNode<"LoongArchISD::FPBrcond", SDT_LoongArchFPBrcond,
++                          [SDNPHasChain, SDNPOptInGlue]>;
++
++def SDT_LoongArchCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>,
++                                          SDTCisSameAs<1, 3>]>;
++
++def LoongArchCMovFP_T : SDNode<"LoongArchISD::CMovFP_T", SDT_LoongArchCMovFP, [SDNPInGlue]>;
++
++def LoongArchCMovFP_F : SDNode<"LoongArchISD::CMovFP_F", SDT_LoongArchCMovFP, [SDNPInGlue]>;
++
++def SDT_LoongArchFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
++                                         SDTCisVT<2, i32>]>;
++
++def LoongArchFPCmp : SDNode<"LoongArchISD::FPCmp", SDT_LoongArchFPCmp, [SDNPOutGlue]>;
++
++def SDT_LoongArchFSEL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
++                                             SDTCisVT<2, i32>,
++                                             SDTCisSameAs<1, 3>]>;
++
++def LoongArchFSEL : SDNode<"LoongArchISD::FSEL", SDT_LoongArchFSEL,
++                                                 [SDNPInGlue]>;
++
++//===---------------------------------------------------------------------===/
++//Instruction Class Templates
++//===---------------------------------------------------------------------===/
++
++class Float_MOVF<string opstr, RegisterOperand RO, RegisterOperand RC,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins RC:$fj),
++          !strconcat(opstr, "\t$rd, $fj"),
++          [(set RO:$rd, (OpNode RC:$fj))],
++          FrmFR, opstr>, HARDFLOAT {
++     let isMoveReg = 1;
++}
++
++class Float_MOVT<string opstr, RegisterOperand RO, RegisterOperand RC,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RC:$rj),
++          !strconcat(opstr, "\t$fd, $rj"),
++          [(set RO:$fd, (OpNode RC:$rj))],
++          FrmFR, opstr>, HARDFLOAT {
++     let isMoveReg = 1;
++}
++
++class Float_CVT<string opstr, RegisterOperand RO, RegisterOperand RS,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RS:$fj),
++          !strconcat(opstr, "\t$fd, $fj"),
++          [(set RO:$fd, (OpNode RS:$fj))],
++          FrmFR, opstr>,
++      HARDFLOAT {
++    let hasSideEffects = 0;
++}
++
++/// float mov
++class Gpr_2_Fcsr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs FCSROpnd:$fcsr), (ins RO:$rj),
++               !strconcat(opstr, "\t$fcsr, $rj"),
++               [(set FCSROpnd:$fcsr, (OpNode RO:$rj))],
++               FrmR, opstr>;
++class Fcsr_2_Gpr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins FCSROpnd:$fcsr),
++               !strconcat(opstr, "\t$rd, $fcsr"),
++               [(set RO:$rd, (OpNode FCSROpnd:$fcsr))],
++               FrmR, opstr>;
++class Fgr_2_Fcfr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj),
++                !strconcat(opstr, "\t$cd, $fj"),
++                [(set FCFROpnd:$cd, (OpNode RO:$fj))],
++                FrmR, opstr>;
++class Fcfr_2_Fgr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins FCFROpnd:$cj),
++               !strconcat(opstr, "\t$fd, $cj"),
++               [(set RO:$fd, (OpNode FCFROpnd:$cj))],
++               FrmR, opstr>;
++class Gpr_2_Fcfr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs FCFROpnd:$cd), (ins RO:$rj),
++                !strconcat(opstr, "\t$cd, $rj"),
++                [(set FCFROpnd:$cd, (OpNode RO:$rj))],
++                FrmR, opstr>;
++class Fcfr_2_Gpr<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$rd), (ins FCFROpnd:$cj),
++               !strconcat(opstr, "\t$rd, $cj"),
++               [(set RO:$rd, (OpNode FCFROpnd:$cj))],
++               FrmR, opstr>;
++
++class FLDX<string opstr, RegisterOperand DRC,
++           SDPatternOperator OpNode = null_frag> :
++  InstForm<(outs DRC:$fd), (ins PtrRC:$rj, PtrRC:$rk),
++           !strconcat(opstr, "\t$fd, $rj, $rk"),
++           [(set DRC:$fd, (OpNode (add iPTR:$rj, iPTR:$rk)))],
++           FrmR, opstr> {
++  let AddedComplexity = 20;
++}
++
++class FSTX<string opstr, RegisterOperand DRC,
++           SDPatternOperator OpNode = null_frag> :
++  InstForm<(outs), (ins DRC:$fd, PtrRC:$rj, PtrRC:$rk),
++           !strconcat(opstr, "\t$fd, $rj, $rk"),
++           [(OpNode DRC:$fd, (add iPTR:$rj, iPTR:$rk))],
++           FrmR, opstr> {
++  let AddedComplexity = 20;
++}
++
++/// f{maxa/mina}.{s/d}
++class Float_Reg3_Fmaxa<string opstr, RegisterOperand RO>
++    : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk),
++          !strconcat(opstr, "\t$fd, $fj, $fk"),
++          [], FrmR, opstr>;
++/// frecip
++class Float_Reg2_Frecip<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj),
++          !strconcat(opstr, "\t$fd, $fj"),
++          [(set RO:$fd, (OpNode fpimm1, RO:$fj))],
++          FrmR, opstr>;
++/// frsqrt
++class Float_Reg2_Frsqrt<string opstr, RegisterOperand RO,
++                 SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins RO:$fj),
++          !strconcat(opstr, "\t$fd, $fj"),
++          [(set RO:$fd, (OpNode fpimm1, (fsqrt RO:$fj)))],
++          FrmR, opstr>;
++
++class BceqzBr<string opstr, DAGOperand opnd,
++              SDPatternOperator Op = null_frag> :
++  InstForm<(outs), (ins FCFROpnd:$cj, opnd:$offset),
++         !strconcat(opstr, "\t$cj, $offset"),
++         [(LoongArchFPBrcond Op, FCFROpnd:$cj, bb:$offset)],
++         FrmFI, opstr>, HARDFLOAT {
++  let isBranch = 1;
++  let isTerminator = 1;
++  let hasFCCRegOperand = 1;
++}
++
++class FCMP_COND<string CondStr, string TypeStr, RegisterOperand RO,
++                SDPatternOperator OpNode = null_frag>
++    : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj, RO:$fk),
++               !strconcat("fcmp.", CondStr, ".", TypeStr, "\t$cd, $fj, $fk"),
++               [(set FCFROpnd:$cd, (OpNode RO:$fj, RO:$fk))],
++               FrmOther,
++               !strconcat("fcmp.", CondStr, ".", TypeStr)> {
++    bit isCTI = 1; // for what? from Mips32r6InstrInfo.td line 219
++}
++
++class FIELD_CMP_COND<bits<5> Val> {
++  bits<5> Value = Val;
++}
++def FIELD_CMP_COND_CAF  : FIELD_CMP_COND<0x0>;
++def FIELD_CMP_COND_CUN  : FIELD_CMP_COND<0x8>;
++def FIELD_CMP_COND_CEQ  : FIELD_CMP_COND<0x4>;
++def FIELD_CMP_COND_CUEQ : FIELD_CMP_COND<0xC>;
++def FIELD_CMP_COND_CLT  : FIELD_CMP_COND<0x2>;
++def FIELD_CMP_COND_CULT : FIELD_CMP_COND<0xA>;
++def FIELD_CMP_COND_CLE  : FIELD_CMP_COND<0x6>;
++def FIELD_CMP_COND_CULE : FIELD_CMP_COND<0xE>;
++def FIELD_CMP_COND_CNE  : FIELD_CMP_COND<0x10>;
++def FIELD_CMP_COND_COR  : FIELD_CMP_COND<0x14>;
++def FIELD_CMP_COND_CUNE : FIELD_CMP_COND<0x18>;
++def FIELD_CMP_COND_SAF  : FIELD_CMP_COND<0x1>;
++def FIELD_CMP_COND_SUN  : FIELD_CMP_COND<0x9>;
++def FIELD_CMP_COND_SEQ  : FIELD_CMP_COND<0x5>;
++def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0xD>;
++def FIELD_CMP_COND_SLT  : FIELD_CMP_COND<0x3>;
++def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0xB>;
++def FIELD_CMP_COND_SLE  : FIELD_CMP_COND<0x7>;
++def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0xF>;
++def FIELD_CMP_COND_SNE  : FIELD_CMP_COND<0x11>;
++def FIELD_CMP_COND_SOR  : FIELD_CMP_COND<0x15>;
++def FIELD_CMP_COND_SUNE : FIELD_CMP_COND<0x19>;
++
++multiclass FCMP_COND_M <bits<2> op, string TypeStr,
++                        RegisterOperand RO> {
++  def FCMP_CAF_#NAME  : FCMP_COND<"caf",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_CAF.Value>;
++  def FCMP_CUN_#NAME  : FCMP_COND<"cun",  TypeStr, RO, setuo>,
++                        R2_COND<op, FIELD_CMP_COND_CUN.Value>;
++  def FCMP_CEQ_#NAME  : FCMP_COND<"ceq",  TypeStr, RO, setoeq>,
++                        R2_COND<op, FIELD_CMP_COND_CEQ.Value>;
++  def FCMP_CUEQ_#NAME : FCMP_COND<"cueq", TypeStr, RO, setueq>,
++                        R2_COND<op, FIELD_CMP_COND_CUEQ.Value>;
++  def FCMP_CLT_#NAME  : FCMP_COND<"clt",  TypeStr, RO, setolt>,
++                        R2_COND<op, FIELD_CMP_COND_CLT.Value>;
++  def FCMP_CULT_#NAME : FCMP_COND<"cult", TypeStr, RO, setult>,
++                        R2_COND<op, FIELD_CMP_COND_CULT.Value>;
++  def FCMP_CLE_#NAME  : FCMP_COND<"cle",  TypeStr, RO, setole>,
++                        R2_COND<op, FIELD_CMP_COND_CLE.Value>;
++  def FCMP_CULE_#NAME : FCMP_COND<"cule", TypeStr, RO, setule>,
++                        R2_COND<op, FIELD_CMP_COND_CULE.Value>;
++  def FCMP_CNE_#NAME  : FCMP_COND<"cne",  TypeStr, RO, setone>,
++                        R2_COND<op, FIELD_CMP_COND_CNE.Value>;
++  def FCMP_COR_#NAME  : FCMP_COND<"cor",  TypeStr, RO, seto>,
++                        R2_COND<op, FIELD_CMP_COND_COR.Value>;
++  def FCMP_CUNE_#NAME : FCMP_COND<"cune", TypeStr, RO, setune>,
++                        R2_COND<op, FIELD_CMP_COND_CUNE.Value>;
++
++  def FCMP_SAF_#NAME  : FCMP_COND<"saf",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SAF.Value>;
++  def FCMP_SUN_#NAME  : FCMP_COND<"sun",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SUN.Value>;
++  def FCMP_SEQ_#NAME  : FCMP_COND<"seq",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SEQ.Value>;
++  def FCMP_SUEQ_#NAME : FCMP_COND<"sueq", TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SUEQ.Value>;
++  def FCMP_SLT_#NAME  : FCMP_COND<"slt",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SLT.Value>;
++  def FCMP_SULT_#NAME : FCMP_COND<"sult", TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SULT.Value>;
++  def FCMP_SLE_#NAME  : FCMP_COND<"sle",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SLE.Value>;
++  def FCMP_SULE_#NAME : FCMP_COND<"sule", TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SULE.Value>;
++  def FCMP_SNE_#NAME  : FCMP_COND<"sne",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SNE.Value>;
++  def FCMP_SOR_#NAME  : FCMP_COND<"sor",  TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SOR.Value>;
++  def FCMP_SUNE_#NAME : FCMP_COND<"sune", TypeStr, RO>,
++                        R2_COND<op, FIELD_CMP_COND_SUNE.Value>;
++}
++
++//// comparisons supported via another comparison
++//multiclass FCmp_Pats<ValueType VT, Instruction NOROp, Register ZEROReg> {
++//  def : LoongArchPat<(seteq VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setgt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("FCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setge VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("FCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setlt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("FCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setle VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("FCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setne VT:$lhs, VT:$rhs),
++//                     (NOROp
++//                      (!cast<Instruction>("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs),
++//                      ZEROReg)>;
++//}
++
++
++///
++/// R2
++///
++def FABS_S   : Float_Reg2<"fabs.s", FGR32Opnd, fabs>, R2F<0b0100000001>;
++def FABS_D   : Float_Reg2<"fabs.d", FGR64Opnd, fabs>, R2F<0b0100000010>;
++def FNEG_S   : Float_Reg2<"fneg.s", FGR32Opnd, fneg>, R2F<0b0100000101>;
++def FNEG_D   : Float_Reg2<"fneg.d", FGR64Opnd, fneg>, R2F<0b0100000110>;
++def FLOGB_S  : Float_Reg2<"flogb.s", FGR32Opnd>, R2F<0b0100001001>;
++def FLOGB_D  : Float_Reg2<"flogb.d", FGR64Opnd>, R2F<0b0100001010>;
++def FCLASS_S : Float_Reg2<"fclass.s", FGR32Opnd>, R2F<0b0100001101>;
++def FCLASS_D : Float_Reg2<"fclass.d", FGR64Opnd>, R2F<0b0100001110>;
++def FSQRT_S  : Float_Reg2<"fsqrt.s", FGR32Opnd, fsqrt>, R2F<0b0100010001>;
++def FSQRT_D  : Float_Reg2<"fsqrt.d", FGR64Opnd, fsqrt>, R2F<0b0100010010>;
++def FRECIP_S : Float_Reg2_Frecip<"frecip.s", FGR32Opnd, fdiv>, R2F<0b0100010101>;
++def FRECIP_D : Float_Reg2_Frecip<"frecip.d", FGR64Opnd, fdiv>, R2F<0b0100010110>;
++def FRSQRT_S : Float_Reg2_Frsqrt<"frsqrt.s", FGR32Opnd, fdiv>, R2F<0b0100011001>;
++def FRSQRT_D : Float_Reg2_Frsqrt<"frsqrt.d", FGR64Opnd, fdiv>, R2F<0b0100011010>;
++def FMOV_S   : Float_Reg2<"fmov.s", FGR32Opnd>, R2F<0b0100100101>;
++def FMOV_D   : Float_Reg2<"fmov.d", FGR64Opnd>, R2F<0b0100100110>;
++
++def MOVGR2FR_W  : Float_MOVT<"movgr2fr.w", FGR32Opnd, GPR32Opnd, bitconvert>, MOVFI<0b0100101001>;
++def MOVGR2FR_D  : Float_MOVT<"movgr2fr.d", FGR64Opnd, GPR64Opnd, bitconvert>, MOVFI<0b0100101010>;
++def MOVGR2FRH_W : Float_MOVT<"movgr2frh.w", FGR64Opnd, GPR32Opnd>, MOVFI<0b0100101011>; //not realize
++def MOVFR2GR_S  : Float_MOVF<"movfr2gr.s", GPR32Opnd, FGR32Opnd, bitconvert>, MOVIF<0b0100101101>;
++def MOVFR2GR_D  : Float_MOVF<"movfr2gr.d", GPR64Opnd, FGR64Opnd, bitconvert>, MOVIF<0b0100101110>;
++def MOVFRH2GR_S : Float_MOVF<"movfrh2gr.s", GPR32Opnd, FGR32Opnd>, MOVIF<0b0100101111>; //not realize
++
++let isCodeGenOnly = 1 in {
++  def MOVFR2GR_DS : Float_MOVF<"movfr2gr.s", GPR64Opnd, FGR32Opnd>, MOVIF<0b0100101101>;
++}
++
++def FCVT_S_D : Float_CVT<"fcvt.s.d", FGR32Opnd, FGR64Opnd>, R2F<0b1001000110>;
++def FCVT_D_S : Float_CVT<"fcvt.d.s", FGR64Opnd, FGR32Opnd>, R2F<0b1001001001>;
++
++def FTINTRM_W_S  : Float_Reg2<"ftintrm.w.s", FGR32Opnd>, R2F<0b1010000001>;
++def FTINTRM_W_D  : Float_Reg2<"ftintrm.w.d", FGR64Opnd>, R2F<0b1010000010>;
++def FTINTRM_L_S  : Float_Reg2<"ftintrm.l.s", FGR32Opnd>, R2F<0b1010001001>;
++def FTINTRM_L_D  : Float_Reg2<"ftintrm.l.d", FGR64Opnd>, R2F<0b1010001010>;
++def FTINTRP_W_S  : Float_Reg2<"ftintrp.w.s", FGR32Opnd>, R2F<0b1010010001>;
++def FTINTRP_W_D  : Float_Reg2<"ftintrp.w.d", FGR64Opnd>, R2F<0b1010010010>;
++def FTINTRP_L_S  : Float_Reg2<"ftintrp.l.s", FGR32Opnd>, R2F<0b1010011001>;
++def FTINTRP_L_D  : Float_Reg2<"ftintrp.l.d", FGR64Opnd>, R2F<0b1010011010>;
++def FTINTRZ_W_S  : Float_Reg2<"ftintrz.w.s", FGR32Opnd>, R2F<0b1010100001>;
++def FTINTRZ_L_D  : Float_Reg2<"ftintrz.l.d", FGR64Opnd>, R2F<0b1010101010>;
++def FTINTRNE_W_S : Float_Reg2<"ftintrne.w.s", FGR32Opnd>, R2F<0b1010110001>;
++def FTINTRNE_W_D : Float_Reg2<"ftintrne.w.d", FGR64Opnd>, R2F<0b1010110010>;
++def FTINTRNE_L_S : Float_Reg2<"ftintrne.l.s", FGR32Opnd>, R2F<0b1010111001>;
++def FTINTRNE_L_D : Float_Reg2<"ftintrne.l.d", FGR64Opnd>, R2F<0b1010111010>;
++
++def FTINT_W_S    : Float_CVT<"ftint.w.s", FGR32Opnd, FGR32Opnd>, R2F<0b1011000001>;
++def FTINT_W_D    : Float_CVT<"ftint.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1011000010>;
++def FTINT_L_S    : Float_CVT<"ftint.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1011001001>;
++def FTINT_L_D    : Float_CVT<"ftint.l.d", FGR64Opnd, FGR64Opnd>, R2F<0b1011001010>;
++def FFINT_S_W    : Float_CVT<"ffint.s.w", FGR32Opnd, FGR32Opnd>, R2F<0b1101000100>;
++def FFINT_S_L    : Float_CVT<"ffint.s.l", FGR32Opnd, FGR64Opnd>, R2F<0b1101000110>;
++def FFINT_D_W    : Float_CVT<"ffint.d.w", FGR64Opnd, FGR32Opnd>, R2F<0b1101001000>;
++def FFINT_D_L    : Float_CVT<"ffint.d.l", FGR64Opnd, FGR64Opnd>, R2F<0b1101001010>;
++
++def FRINT_S      : Float_Reg2<"frint.s", FGR32Opnd, frint>, R2F<0b1110010001>;
++def FRINT_D      : Float_Reg2<"frint.d", FGR64Opnd, frint>, R2F<0b1110010010>;
++
++///
++/// R3
++///
++def FADD_S      : Float_Reg3<"fadd.s", FGR32Opnd, fadd>, R3F<0b000001>;
++def FADD_D      : Float_Reg3<"fadd.d", FGR64Opnd, fadd>, R3F<0b000010>;
++def FSUB_S      : Float_Reg3<"fsub.s", FGR32Opnd, fsub>, R3F<0b000101>;
++def FSUB_D      : Float_Reg3<"fsub.d", FGR64Opnd, fsub>, R3F<0b000110>;
++def FMUL_S      : Float_Reg3<"fmul.s", FGR32Opnd, fmul>, R3F<0b001001>;
++def FMUL_D      : Float_Reg3<"fmul.d", FGR64Opnd, fmul>, R3F<0b001010>;
++def FDIV_S      : Float_Reg3<"fdiv.s", FGR32Opnd, fdiv>, R3F<0b001101>;
++def FDIV_D      : Float_Reg3<"fdiv.d", FGR64Opnd, fdiv>, R3F<0b001110>;
++def FMAX_S      : Float_Reg3<"fmax.s", FGR32Opnd, fmaxnum_ieee>, R3F<0b010001>;
++def FMAX_D      : Float_Reg3<"fmax.d", FGR64Opnd, fmaxnum_ieee>, R3F<0b010010>;
++def FMIN_S      : Float_Reg3<"fmin.s", FGR32Opnd, fminnum_ieee>, R3F<0b010101>;
++def FMIN_D      : Float_Reg3<"fmin.d", FGR64Opnd, fminnum_ieee>, R3F<0b010110>;
++def FMAXA_S     : Float_Reg3_Fmaxa<"fmaxa.s", FGR32Opnd>, R3F<0b011001>;
++def FMAXA_D     : Float_Reg3_Fmaxa<"fmaxa.d", FGR64Opnd>, R3F<0b011010>;
++def FMINA_S     : Float_Reg3_Fmaxa<"fmina.s", FGR32Opnd>, R3F<0b011101>;
++def FMINA_D     : Float_Reg3_Fmaxa<"fmina.d", FGR64Opnd>, R3F<0b011110>;
++def FSCALEB_S   : Float_Reg3<"fscaleb.s", FGR32Opnd>, R3F<0b100001>;
++def FSCALEB_D   : Float_Reg3<"fscaleb.d", FGR64Opnd>, R3F<0b100010>;
++def FCOPYSIGN_S : Float_Reg3<"fcopysign.s", FGR32Opnd, fcopysign>, R3F<0b100101>;
++def FCOPYSIGN_D : Float_Reg3<"fcopysign.d", FGR64Opnd, fcopysign>, R3F<0b100110>;
++///
++/// R4_IMM21
++///
++def FMADD_S  : Mul_Reg4<"fmadd.s", FGR32Opnd>, R4MUL<0b0001>;
++def FMADD_D  : Mul_Reg4<"fmadd.d", FGR64Opnd>, R4MUL<0b0010>;
++def FMSUB_S  : Mul_Reg4<"fmsub.s", FGR32Opnd>, R4MUL<0b0101>;
++def FMSUB_D  : Mul_Reg4<"fmsub.d", FGR64Opnd>, R4MUL<0b0110>;
++def FNMADD_S : NMul_Reg4<"fnmadd.s", FGR32Opnd>, R4MUL<0b1001>;
++def FNMADD_D : NMul_Reg4<"fnmadd.d", FGR64Opnd>, R4MUL<0b1010>;
++def FNMSUB_S : NMul_Reg4<"fnmsub.s", FGR32Opnd>, R4MUL<0b1101>;
++def FNMSUB_D : NMul_Reg4<"fnmsub.d", FGR64Opnd>, R4MUL<0b1110>;
++
++
++// fmadd: fj * fk + fa
++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa),
++                   (FMADD_D $fj, $fk, $fa)>;
++
++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa),
++                   (FMADD_S $fj, $fk, $fa)>;
++
++
++// fmsub: fj * fk - fa
++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)),
++                   (FMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>;
++
++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)),
++                   (FMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>;
++
++
++// fnmadd: -(fj * fk + fa)
++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)),
++                   (FNMADD_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>;
++
++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)),
++                   (FNMADD_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>;
++
++// fnmsub: -(fj * fk - fa)
++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, FGR64Opnd:$fa),
++                   (FNMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>;
++
++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, FGR32Opnd:$fa),
++                   (FNMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>;
++
++let Pattern = []<dag> in {
++defm S : FCMP_COND_M<0b01, "s", FGR32Opnd>;
++defm D : FCMP_COND_M<0b10, "d", FGR64Opnd>;
++}
++//
++//defm S : FCmp_Pats<f32, NOR32, ZERO>;
++//defm D : FCmp_Pats<f64, NOR32, ZERO>;
++
++///
++/// Float point branching
++///
++def LoongArch_BRANCH_F  : PatLeaf<(i32 0)>;
++def LoongArch_BRANCH_T  : PatLeaf<(i32 1)>;
++
++def BCEQZ : BceqzBr<"bceqz", brtarget, LoongArch_BRANCH_F>, R1_BCEQZ<0>;
++def BCNEZ : BceqzBr<"bcnez", brtarget, LoongArch_BRANCH_T>, R1_BCEQZ<1>;
++
++///
++/// FMOV
++///
++def MOVGR2FCSR : Gpr_2_Fcsr<"movgr2fcsr", GPR64Opnd>, MOVGPR2FCSR;
++def MOVFCSR2GR : Fcsr_2_Gpr<"movfcsr2gr", GPR64Opnd>, MOVFCSR2GPR;
++def MOVFR2CF   : Fgr_2_Fcfr<"movfr2cf", FGR64Opnd>,   MOVFGR2FCFR;
++def MOVCF2FR   : Fcfr_2_Fgr<"movcf2fr", FGR64Opnd>,   MOVFCFR2FGR;
++def MOVGR2CF   : Gpr_2_Fcfr<"movgr2cf", GPR64Opnd>,   MOVGPR2FCFR;
++def MOVCF2GR   : Fcfr_2_Gpr<"movcf2gr", GPR64Opnd>,   MOVFCFR2GPR;
++
++let isCodeGenOnly = 1 in {
++  def MOVFR2CF32   : Fgr_2_Fcfr<"movfr2cf", FGR32Opnd>,   MOVFGR2FCFR;
++  def MOVCF2FR32   : Fcfr_2_Fgr<"movcf2fr", FGR32Opnd>,   MOVFCFR2FGR;
++  def MOVGR2CF32   : Gpr_2_Fcfr<"movgr2cf", GPR32Opnd>,   MOVGPR2FCFR;
++  def MOVCF2GR32   : Fcfr_2_Gpr<"movcf2gr", GPR32Opnd>,   MOVFCFR2GPR;
++}
++
++class Sel_Reg4<string opstr, RegisterOperand RO,
++               SDPatternOperator OpNode= null_frag>
++    : InstForm<(outs RO:$fd), (ins FCFROpnd:$ca, RO:$fj, RO:$fk),
++          !strconcat(opstr, "\t$fd, $fj, $fk, $ca"),
++          [(set RO:$fd, (LoongArchFSEL RO:$fj, FCFROpnd:$ca, RO:$fk))],
++          FrmR, opstr>{
++       let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6];
++       let hasFCCRegOperand = 1;
++ }
++
++def FSEL_T_S : Sel_Reg4<"fsel", FGR32Opnd>, R4SEL;
++let isCodeGenOnly = 1 in {
++  def FSEL_T_D : Sel_Reg4<"fsel", FGR64Opnd>, R4SEL;
++}
++
++///
++/// Mem access
++///
++def FLD_S : FLd<"fld.s", FGR32Opnd, mem, load>, LOAD_STORE<0b1100>;
++def FST_S : FSt<"fst.s", FGR32Opnd, mem, store>, LOAD_STORE<0b1101>;
++def FLD_D : FLd<"fld.d", FGR64Opnd, mem, load>, LOAD_STORE<0b1110>;
++def FST_D : FSt<"fst.d", FGR64Opnd, mem, store>, LOAD_STORE<0b1111>;
++
++def FLDX_S  : FLDX<"fldx.s", FGR32Opnd, load>, R3MF<0b01100000>;
++def FLDX_D  : FLDX<"fldx.d", FGR64Opnd, load>, R3MF<0b01101000>;
++def FSTX_S  : FSTX<"fstx.s", FGR32Opnd, store>, R3MF<0b01110000>;
++def FSTX_D  : FSTX<"fstx.d", FGR64Opnd, store>, R3MF<0b01111000>;
++
++def FLDGT_S : Float_Int_Reg3<"fldgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101000>;
++def FLDGT_D : Float_Int_Reg3<"fldgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101001>;
++def FLDLE_S : Float_Int_Reg3<"fldle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101010>;
++def FLDLE_D : Float_Int_Reg3<"fldle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101011>;
++def FSTGT_S : Float_STGT_LE<"fstgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101100>;
++def FSTGT_D : Float_STGT_LE<"fstgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101101>;
++def FSTLE_S : Float_STGT_LE<"fstle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101110>;
++def FSTLE_D : Float_STGT_LE<"fstle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101111>;
++
++let isPseudo = 1, isCodeGenOnly = 1 in {
++  def PseudoFFINT_S_W : Float_CVT<"", FGR32Opnd, GPR32Opnd>;
++  def PseudoFFINT_D_W : Float_CVT<"", FGR64Opnd, GPR32Opnd>;
++  def PseudoFFINT_S_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>;
++  def PseudoFFINT_D_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>;
++}
++
++def : LoongArchPat<(f32 (fpround FGR64Opnd:$src)),
++                   (FCVT_S_D FGR64Opnd:$src)>;
++def : LoongArchPat<(f64 (fpextend FGR32Opnd:$src)),
++                   (FCVT_D_S FGR32Opnd:$src)>;
++
++def : LoongArchPat<(f32 (sint_to_fp GPR32Opnd:$src)),
++                   (PseudoFFINT_S_W GPR32Opnd:$src)>;
++def : LoongArchPat<(f64 (sint_to_fp GPR32Opnd:$src)),
++                   (PseudoFFINT_D_W GPR32Opnd:$src)>;
++def : LoongArchPat<(f32 (sint_to_fp GPR64Opnd:$src)),
++                   (EXTRACT_SUBREG (PseudoFFINT_S_L GPR64Opnd:$src), sub_lo)>;
++def : LoongArchPat<(f64 (sint_to_fp GPR64Opnd:$src)),
++                   (PseudoFFINT_D_L GPR64Opnd:$src)>;
++
++def : LoongArchPat<(f32 fpimm0), (MOVGR2FR_W ZERO)>;
++def : LoongArchPat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W ZERO))>;
++def : LoongArchPat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W ZERO, 1)))>;
++def : LoongArchPat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D ZERO_64, 1)))>;
++
++// Patterns for loads/stores with a reg+imm operand.
++let AddedComplexity = 40 in {
++  def : LoadRegImmPat<FLD_S, f32, load>;
++  def : StoreRegImmPat<FST_S, f32, store>;
++  def : LoadRegImmPat<FLD_D, f64, load>;
++  def : StoreRegImmPat<FST_D, f64, store>;
++}
++
++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src),
++                   (FTINTRZ_W_S FGR32Opnd:$src)>;
++
++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src),
++                   (FTINTRZ_L_D FGR64Opnd:$src)>;
++
++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src),
++                   (FCVT_D_S (FTINTRZ_W_S FGR32Opnd:$src))>;
++
++def : LoongArchPat<(f32 (fcopysign FGR32Opnd:$lhs, FGR64Opnd:$rhs)),
++                   (FCOPYSIGN_S FGR32Opnd:$lhs, (FCVT_S_D FGR64Opnd:$rhs))>;
++def : LoongArchPat<(f64 (fcopysign FGR64Opnd:$lhs, FGR32Opnd:$rhs)),
++                   (FCOPYSIGN_D FGR64Opnd:$lhs, (FCVT_D_S FGR32Opnd:$rhs))>;
++
++let PrintMethod = "printFCCOperand",EncoderMethod = "getFCMPEncoding" in
++  def condcode : Operand<i32>;
++
++class CEQS_FT<string typestr, RegisterClass RC,
++              SDPatternOperator OpNode = null_frag>  :
++  InstForm<(outs), (ins RC:$fj, RC:$fk, condcode:$cond),
++         !strconcat("fcmp.$cond.", typestr, "\t$$fcc0, $fj, $fk"),
++         [(OpNode RC:$fj, RC:$fk, imm:$cond)], FrmFR,
++         !strconcat("fcmp.$cond.", typestr)>, HARDFLOAT {
++  let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7];
++  let isCodeGenOnly = 1;
++  let hasFCCRegOperand = 1;
++}
++
++def FCMP_S32 : CEQS_FT<"s", FGR32, LoongArchFPCmp>, CEQS_FM<0b01> {
++      bits<3> cd = 0;
++}
++def FCMP_D64 : CEQS_FT<"d", FGR64, LoongArchFPCmp>, CEQS_FM<0b10>{
++      bits<3> cd = 0;
++}
++
++
++//multiclass FCmp_Pats2<ValueType VT, Instruction NOROp, Register ZEROReg> {
++//  def : LoongArchPat<(seteq VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setgt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("SFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setge VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("SFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setlt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("SFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setle VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("SFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setne VT:$lhs, VT:$rhs),
++//                     (NOROp
++//                      (!cast<Instruction>("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs),
++//                      ZEROReg)>;
++//
++//  def : LoongArchPat<(seteq VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setgt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("DFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setge VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("DFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>;
++//  def : LoongArchPat<(setlt VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("DFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setle VT:$lhs, VT:$rhs),
++//                     (!cast<Instruction>("DFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>;
++//  def : LoongArchPat<(setne VT:$lhs, VT:$rhs),
++//                     (NOROp
++//                      (!cast<Instruction>("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs),
++//                      ZEROReg)>;
++// }
++//
++//defm S : FCmp_Pats2<f32, NOR32, ZERO>;
++//defm D : FCmp_Pats2<f64, NOR32, ZERO>;
++
++let usesCustomInserter = 1 in {
++  class Select_Pseudo<RegisterOperand RC> :
++    LoongArchPseudo<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
++            [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>;
++
++  class SelectFP_Pseudo_T<RegisterOperand RC> :
++    LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F),
++             [(set RC:$dst, (LoongArchCMovFP_T RC:$T, FCFROpnd:$cond, RC:$F))]>;
++
++  class SelectFP_Pseudo_F<RegisterOperand RC> :
++    LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F),
++             [(set RC:$dst, (LoongArchCMovFP_F RC:$T, FCFROpnd:$cond, RC:$F))]>;
++}
++
++def PseudoSELECT_I : Select_Pseudo<GPR32Opnd>;
++def PseudoSELECT_I64 : Select_Pseudo<GPR64Opnd>;
++def PseudoSELECT_S : Select_Pseudo<FGR32Opnd>;
++def PseudoSELECT_D64 : Select_Pseudo<FGR64Opnd>;
++
++def PseudoSELECTFP_T_I : SelectFP_Pseudo_T<GPR32Opnd>;
++def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T<GPR64Opnd>;
++
++def PseudoSELECTFP_F_I : SelectFP_Pseudo_F<GPR32Opnd>;
++def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F<GPR64Opnd>;
++
++class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
++              SDPatternOperator OpNode= null_frag> :
++  InstForm<(outs DstRC:$fd), (ins SrcRC:$fj), !strconcat(opstr, "\t$fd, $fj"),
++         [(set DstRC:$fd, (OpNode SrcRC:$fj))], FrmFR, opstr>;
++
++def TRUNC_W_D : ABSS_FT<"ftintrz.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1010100010>;
++
++def FTINTRZ_L_S  : ABSS_FT<"ftintrz.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1010101001>;
++
++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src),
++              (TRUNC_W_D FGR64Opnd:$src)>;
++
++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src),
++              (FTINTRZ_L_S FGR32Opnd:$src)>;
++
++def : Pat<(fcanonicalize FGR32Opnd:$src), (FMAX_S $src, $src)>;
++def : Pat<(fcanonicalize FGR64Opnd:$src), (FMAX_D $src, $src)>;
++
++def : LoongArchPat<(i64 (sext (i32 (bitconvert FGR32Opnd:$src)))),
++                   (MOVFR2GR_DS FGR32Opnd:$src)>;
+diff --git a/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td
+new file mode 100644
+index 00000000..8e255f85
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td
+@@ -0,0 +1,448 @@
++//===- LoongArchLASXInstrFormats.td - LoongArch LASX Instruction Formats ---*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++class LASXInst : InstLA<(outs), (ins), "", [], FrmOther>,
++                EXT_LASX {
++}
++
++class LASXCBranch : LASXInst {
++}
++
++class LASXSpecial : LASXInst {
++}
++
++class LASXPseudo<dag outs, dag ins, list<dag> pattern>:
++  LoongArchPseudo<outs, ins, pattern> {
++  let Predicates = [HasLASX];
++}
++
++class LASX_3R<bits<17> op>: LASXInst {
++  bits<5> xk;
++  bits<5> xj;
++  bits<5> xd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = xk;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_4R<bits<12> op>: LASXInst {
++  bits<5> xa;
++  bits<5> xk;
++  bits<5> xj;
++  bits<5> xd;
++
++  let Inst{31-20} = op;
++  let Inst{19-15} = xa;
++  let Inst{14-10} = xk;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_XVFCMP<bits<12> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<5> xk;
++  bits<5> cond;
++
++  let Inst{31-20} = op;
++  let Inst{19-15} = cond;
++  let Inst{14-10} = xk;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I12_S<bits<10> op>: LASXInst {
++  bits<5> xd;
++  bits<17> addr;
++
++  let Inst{31-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI12_S<bits<10> op>: LASXInst {
++  bits<5> xd;
++  bits<17> addr;
++
++  let Inst{31-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI11_S<bits<11> op>: LASXInst {
++  bits<5> xd;
++  bits<16> addr;
++
++  let Inst{31-21} = op;
++  let Inst{20-10} = addr{10-0};
++  let Inst{9-5} = addr{15-11};
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI10_S<bits<12> op>: LASXInst {
++  bits<5> xd;
++  bits<15> addr;
++
++  let Inst{31-20} = op;
++  let Inst{19-10} = addr{9-0};
++  let Inst{9-5} = addr{14-10};
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI9_S<bits<13> op>: LASXInst {
++  bits<5> xd;
++  bits<14> addr;
++
++  let Inst{31-19} = op;
++  let Inst{18-10} = addr{8-0};
++  let Inst{9-5} = addr{13-9};
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI8_idx5<bits<9> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<5> idx;
++
++  let Inst{31-23} = op;
++  let Inst{22-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI8_idx2<bits<12> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<2> idx;
++
++  let Inst{31-20} = op;
++  let Inst{19-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI8_idx3<bits<11> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<3> idx;
++
++  let Inst{31-21} = op;
++  let Inst{20-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_SI8_idx4<bits<10> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<4> idx;
++
++  let Inst{31-22} = op;
++  let Inst{21-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_3R_2GP<bits<17> op>: LASXInst {
++  bits<5> rk;
++  bits<5> rj;
++  bits<5> xd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_3R_1GP<bits<17> op>: LASXInst {
++  bits<5> rk;
++  bits<5> xj;
++  bits<5> xd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I5<bits<17> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<5> si5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = si5;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I5_U<bits<17> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<5> ui5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = ui5;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I5_mode_U<bits<17> op>: LASXInst {
++  bits<5> xd;
++  bits<5> mode;
++  bits<5> ui5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = ui5;
++  let Inst{9-5} = mode;
++  let Inst{4-0} = xd;
++}
++
++class LASX_2R<bits<22> op>: LASXInst {
++  bits<5> xj;
++  bits<5> xd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_SET<bits<22> op>: LASXInst {
++  bits<5> xj;
++  bits<3> cd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = xj;
++  let Inst{4-3} = 0b00;
++  let Inst{2-0} = cd;
++}
++
++class LASX_2R_1GP<bits<22> op>: LASXInst {
++  bits<5> rj;
++  bits<5> xd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I3_U<bits<19> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I4_U<bits<18> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I6_U<bits<16> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<6> ui6;
++
++  let Inst{31-16} = op;
++  let Inst{15-10} = ui6;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I2_R_U<bits<20> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I3_R_U<bits<19> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_ELM_COPY_U3<bits<19> op>: LASXInst {
++  bits<5> rd;
++  bits<5> xj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = rd;
++}
++
++class LASX_ELM_COPY_U2<bits<20> op>: LASXInst {
++  bits<5> rd;
++  bits<5> xj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = rd;
++}
++
++class LASX_I1_U<bits<21> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I2_U<bits<20> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I7_U<bits<15> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<7> ui7;
++
++  let Inst{31-17} = op;
++  let Inst{16-10} = ui7;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_1R_I13<bits<14> op>: LASXInst {
++  bits<13> i13;
++  bits<5> xd;
++
++  let Inst{31-18} = op;
++  let Inst{17-5} = i13;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I8_U<bits<14> op>: LASXInst {
++  bits<5> xd;
++  bits<5> xj;
++  bits<8> ui8;
++
++  let Inst{31-18} = op;
++  let Inst{17-10} = ui8;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = xd;
++}
++
++
++//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
++class LASX_I1_R_U<bits<21> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_I4_R_U<bits<18> op>: LASXInst {
++  bits<5> xd;
++  bits<5> rj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = xd;
++}
++
++class LASX_ELM_COPY_B<bits<18> op>: LASXInst {
++  bits<5> rd;
++  bits<5> xj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = rd;
++}
++
++class LASX_ELM_COPY_D<bits<21> op>: LASXInst {
++  bits<5> rd;
++  bits<5> xj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = xj;
++  let Inst{4-0} = rd;
++}
++
++class LASX_Addr_SI8_idx1<bits<13> op>: LASXInst {
++  bits<5> xd;
++  bits<13> addr;
++  bits<1> idx;
++
++  let Inst{31-19} = op;
++  let Inst{18-11} = addr{7-0};
++  let Inst{10} = idx;
++  let Inst{9-5} = addr{12-8};
++  let Inst{4-0} = xd;
++}
++
++class LASX_1R_I13_I10<bits<17> op>: LASXInst {
++  bits<10> i10;
++  bits<5> xd;
++
++  let Inst{31-15} = op;
++  let Inst{14-5} = i10;
++  let Inst{4-0} = xd;
++}
++
++
++
++
++
++
+diff --git a/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+new file mode 100644
+index 00000000..01a6f375
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+@@ -0,0 +1,5666 @@
++//===- LoongArchLASXInstrInfo.td - loongson LASX instructions -*- tablegen ------------*-=//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file describes loongson ASX instructions.
++//
++//===----------------------------------------------------------------------===//
++def SDT_XVPERMI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
++                                       SDTCisSameAs<0, 1>,
++                                       SDTCisVT<2, i32>]>;
++def SDT_XVSHFI : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
++                                      SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
++                                      SDTCisVT<3, i32>]>;
++def SDT_XVBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
++
++def SDT_INSVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
++                                     SDTCisSameAs<1, 2>,
++                                     SDTCisVT<3, i32>]>;
++
++def SDT_XVPICKVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
++                                        SDTCisSameAs<1, 2>,
++                                        SDTCisVT<3, i32>]>;
++
++def SDT_XVSHUF4I : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>,
++                                        SDTCisSameAs<0, 1>,
++                                        SDTCisSameAs<0, 2>,
++                                        SDTCisVT<3, i32>]>;
++
++def LoongArchXVSHUFI : SDNode<"LoongArchISD::XVSHFI", SDT_XVSHFI>;
++
++def LoongArchXVSELI  : SDNode<"LoongArchISD::XVSELI", SDT_XVSHFI>;
++
++def LoongArchXVPERMI : SDNode<"LoongArchISD::XVPERMI", SDT_XVPERMI>;
++
++def LoongArchXVBROADCAST : SDNode<"LoongArchISD::XVBROADCAST", SDT_XVBROADCAST>;
++
++def LoongArchINSVE : SDNode<"LoongArchISD::INSVE", SDT_INSVE>;
++
++def LoongArchXVSHUF4I : SDNode<"LoongArchISD::XVSHUF4I", SDT_XVSHUF4I>;
++
++def LoongArchXVPICKVE : SDNode<"LoongArchISD::XVPICKVE", SDT_INSVE>;
++
++def xvbroadcast_v32i8  : PatFrag<(ops node:$v1),
++                                  (v32i8 (LoongArchXVBROADCAST node:$v1))>;
++def xvbroadcast_v16i16  : PatFrag<(ops node:$v1),
++                                  (v16i16 (LoongArchXVBROADCAST node:$v1))>;
++def xvbroadcast_v8i32  : PatFrag<(ops node:$v1),
++                                  (v8i32 (LoongArchXVBROADCAST node:$v1))>;
++def xvbroadcast_v4i64  : PatFrag<(ops node:$v1),
++                                  (v4i64 (LoongArchXVBROADCAST node:$v1))>;
++
++
++def vfseteq_v8f32 : vfsetcc_type<v8i32, v8f32, SETEQ>;
++def vfseteq_v4f64 : vfsetcc_type<v4i64, v4f64, SETEQ>;
++def vfsetge_v8f32 : vfsetcc_type<v8i32, v8f32, SETGE>;
++def vfsetge_v4f64 : vfsetcc_type<v4i64, v4f64, SETGE>;
++def vfsetgt_v8f32 : vfsetcc_type<v8i32, v8f32, SETGT>;
++def vfsetgt_v4f64 : vfsetcc_type<v4i64, v4f64, SETGT>;
++def vfsetle_v8f32 : vfsetcc_type<v8i32, v8f32, SETLE>;
++def vfsetle_v4f64 : vfsetcc_type<v4i64, v4f64, SETLE>;
++def vfsetlt_v8f32 : vfsetcc_type<v8i32, v8f32, SETLT>;
++def vfsetlt_v4f64 : vfsetcc_type<v4i64, v4f64, SETLT>;
++def vfsetne_v8f32 : vfsetcc_type<v8i32, v8f32, SETNE>;
++def vfsetne_v4f64 : vfsetcc_type<v2i64, v4f64, SETNE>;
++def vfsetoeq_v8f32 : vfsetcc_type<v8i32, v8f32, SETOEQ>;
++def vfsetoeq_v4f64 : vfsetcc_type<v4i64, v4f64, SETOEQ>;
++def vfsetoge_v8f32 : vfsetcc_type<v8i32, v8f32, SETOGE>;
++def vfsetoge_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGE>;
++def vfsetogt_v8f32 : vfsetcc_type<v8i32, v8f32, SETOGT>;
++def vfsetogt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGT>;
++def vfsetole_v8f32 : vfsetcc_type<v8i32, v8f32, SETOLE>;
++def vfsetole_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLE>;
++def vfsetolt_v8f32 : vfsetcc_type<v8i32, v8f32, SETOLT>;
++def vfsetolt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLT>;
++def vfsetone_v8f32 : vfsetcc_type<v8i32, v8f32, SETONE>;
++def vfsetone_v4f64 : vfsetcc_type<v4i64, v4f64, SETONE>;
++def vfsetord_v8f32 : vfsetcc_type<v8i32, v8f32, SETO>;
++def vfsetord_v4f64 : vfsetcc_type<v4i64, v4f64, SETO>;
++def vfsetun_v8f32  : vfsetcc_type<v8i32, v8f32, SETUO>;
++def vfsetun_v4f64  : vfsetcc_type<v4i64, v4f64, SETUO>;
++def vfsetueq_v8f32 : vfsetcc_type<v8i32, v8f32, SETUEQ>;
++def vfsetueq_v4f64 : vfsetcc_type<v4i64, v4f64, SETUEQ>;
++def vfsetuge_v8f32 : vfsetcc_type<v8i32, v8f32, SETUGE>;
++def vfsetuge_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGE>;
++def vfsetugt_v8f32 : vfsetcc_type<v8i32, v8f32, SETUGT>;
++def vfsetugt_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGT>;
++def vfsetule_v8f32 : vfsetcc_type<v8i32, v8f32, SETULE>;
++def vfsetule_v4f64 : vfsetcc_type<v4i64, v4f64, SETULE>;
++def vfsetult_v8f32 : vfsetcc_type<v8i32, v8f32, SETULT>;
++def vfsetult_v4f64 : vfsetcc_type<v4i64, v4f64, SETULT>;
++def vfsetune_v8f32 : vfsetcc_type<v8i32, v8f32, SETUNE>;
++def vfsetune_v4f64 : vfsetcc_type<v4i64, v4f64, SETUNE>;
++
++def xvsplati8  : PatFrag<(ops node:$e0),
++                          (v32i8 (build_vector
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0))>;
++def xvsplati16  : PatFrag<(ops node:$e0),
++                           (v16i16 (build_vector
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0))>;
++def xvsplati32  : PatFrag<(ops node:$e0),
++                           (v8i32 (build_vector
++                                node:$e0, node:$e0, node:$e0, node:$e0,
++                                node:$e0, node:$e0, node:$e0, node:$e0))>;
++def xvsplati64  : PatFrag<(ops node:$e0),
++                           (v4i64 (build_vector
++                                node:$e0, node:$e0, node:$e0, node:$e0))>;
++def xvsplatf32 : PatFrag<(ops node:$e0),
++                          (v8f32 (build_vector node:$e0, node:$e0,
++                                             node:$e0, node:$e0))>;
++def xvsplatf64 : PatFrag<(ops node:$e0),
++                          (v4f64 (build_vector node:$e0, node:$e0))>;
++
++def xvsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v32i8, 1,
++                                         "selectVSplatUimm3",
++                                         [build_vector, bitconvert]>;
++def xvsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i16, 1,
++                                          "selectVSplatUimm4",
++                                          [build_vector, bitconvert]>;
++
++def xvsplati64_uimm6 : SplatComplexPattern<vsplat_uimm6, v4i64, 1,
++                                          "selectVSplatUimm6",
++                                          [build_vector, bitconvert]>;
++
++def xvsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v32i8, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++def xvsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v16i16, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++def xvsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v8i32, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++def xvsplati64_simm5 : SplatComplexPattern<vsplat_simm5, v4i64, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++
++def xvsplat_imm_eq_1 : PatLeaf<(build_vector), [{
++  APInt Imm;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++  return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
++}]>;
++
++def xvsplati64_imm_eq_1 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{
++  APInt Imm;
++  SDNode *BV = N->getOperand(0).getNode();
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
++}]>;
++
++def xvbitclr_b : PatFrag<(ops node:$xk, node:$xa),
++                         (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa),
++                                          immAllOnesV))>;
++def xvbitclr_h : PatFrag<(ops node:$xk, node:$xa),
++                         (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa),
++                                          immAllOnesV))>;
++def xvbitclr_w : PatFrag<(ops node:$xk, node:$xa),
++                         (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa),
++                                          immAllOnesV))>;
++def xvbitclr_d : PatFrag<(ops node:$xk, node:$xa),
++                         (and node:$xk, (xor (shl (v4i64 vsplati64_imm_eq_1),
++                                               node:$xa),
++                                          (bitconvert (v8i32 immAllOnesV))))>;
++
++
++
++def xvsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v32i8, 1,
++                                         "selectVSplatUimm5",
++                                         [build_vector, bitconvert]>;
++def xvsplati16_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i16, 1,
++                                         "selectVSplatUimm5",
++                                         [build_vector, bitconvert]>;
++def xvsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v8i32, 1,
++                                         "selectVSplatUimm5",
++                                         [build_vector, bitconvert]>;
++def xvsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i64, 1,
++                                         "selectVSplatUimm5",
++                                         [build_vector, bitconvert]>;
++def xvsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v32i8, 1,
++                                         "selectVSplatUimm8",
++                                         [build_vector, bitconvert]>;
++def xvsplati16_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i16, 1,
++                                         "selectVSplatUimm8",
++                                         [build_vector, bitconvert]>;
++def xvsplati32_uimm8 : SplatComplexPattern<vsplat_uimm8, v8i32, 1,
++                                         "selectVSplatUimm8",
++                                         [build_vector, bitconvert]>;
++def xvsplati64_uimm8 : SplatComplexPattern<vsplat_uimm8, v4i64, 1,
++                                         "selectVSplatUimm8",
++                                         [build_vector, bitconvert]>;
++
++
++
++def xvsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v32i8, 1,
++                                         "selectVSplatUimm4",
++                                         [build_vector, bitconvert]>;
++def xvsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i16, 1,
++                                          "selectVSplatUimm3",
++                                          [build_vector, bitconvert]>;
++def xvsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v8i32, 1,
++                                          "selectVSplatUimm2",
++                                          [build_vector, bitconvert]>;
++def xvsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v4i64, 1,
++                                          "selectVSplatUimm1",
++                                          [build_vector, bitconvert]>;
++
++
++// Patterns.
++class LASXPat<dag pattern, dag result, list<Predicate> pred = [HasLASX]> :
++  Pat<pattern, result>, Requires<pred>;
++
++class LASX_4RF<string instr_asm,
++                       SDPatternOperator OpNode,
++                       RegisterOperand ROXJ, RegisterOperand ROXA = ROXJ,
++                       RegisterOperand ROXK = ROXA, RegisterOperand ROXD = ROXK> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk, ROXA:$xa);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk, $xa");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk, ROXA:$xa))];
++}
++
++class LASX_3RF<string instr_asm,
++                       SDPatternOperator OpNode,
++                       RegisterOperand ROXJ,
++                       RegisterOperand ROXK = ROXJ, RegisterOperand ROXD = ROXK> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))];
++}
++
++class LASX_3R_SETCC_DESC_BASE<string instr_asm, CondCode CC, ValueType VT, RegisterOperand ROXD,
++                                RegisterOperand ROXJ = ROXD, RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, ROXK:$xk, CC)))];
++}
++
++class LASX_LD<string instr_asm, SDPatternOperator OpNode,
++                   ValueType TyNode, RegisterOperand ROXD,
++                   Operand MemOpnd, ComplexPattern Addr = addr> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $addr");
++  list<dag> Pattern = [(set ROXD:$xd, (TyNode (OpNode Addr:$addr)))];
++  string DecoderMethod = "DecodeLASX256Mem";
++}
++
++class LASX_ST<string instr_asm, SDPatternOperator OpNode,
++                   ValueType TyNode, RegisterOperand ROXD,
++                   Operand MemOpnd, ComplexPattern Addr = addrimm12> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $addr");
++  list<dag> Pattern = [(OpNode (TyNode ROXD:$xd), Addr:$addr)];
++  string DecoderMethod = "DecodeLASX256Mem";
++}
++
++class LASX_I8_U5_DESC_BASE<string instr_asm,
++                       SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROXD,
++                       RegisterOperand ROXJ> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm5:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt5:$idx)];
++  string DecoderMethod = "DecodeLASX256memstl";
++}
++
++class LASX_I8_U2_DESC_BASE<string instr_asm,
++                       SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROXD,
++                       RegisterOperand ROXJ> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt2:$idx)];
++  string DecoderMethod = "DecodeLASX256memstl";
++}
++
++class LASX_I8_U3_DESC_BASE<string instr_asm,
++                       SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROXD,
++                       RegisterOperand ROXJ> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt3:$idx)];
++  string DecoderMethod = "DecodeLASX256memstl";
++}
++
++class LASX_I8_U4_DESC_BASE<string instr_asm,
++                       SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROXD,
++                       RegisterOperand ROXJ> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt4:$idx)];
++  string DecoderMethod = "DecodeLASX256memstl";
++}
++
++class LASX_SDX_LA<string instr_asm,
++                  SDPatternOperator OpNode,
++                  RegisterOperand RORK,
++                  RegisterOperand ROXD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, RORK:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk");
++  list<dag> Pattern = [(OpNode ROXD:$xd, iPTR:$rj, RORK:$rk)];
++}
++
++class LASX_3R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                       RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))];
++}
++
++class LASX_LDX_LA<string instr_asm,
++                  SDPatternOperator OpNode,
++                  RegisterOperand RORK,
++                  RegisterOperand ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins PtrRC:$rj, RORK:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode iPTR:$rj, RORK:$rk))];
++}
++
++class LASX_3R_4R_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                          RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj,
++                                              ROXK:$xk))];
++  string Constraints = "$xd = $xd_in";
++}
++
++
++class LASX_3R_VREPLVE_DESC_BASE<string instr_asm,
++                             SDPatternOperator OpNode,
++                             RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, GPR32Opnd:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, GPR32Opnd:$rk))];
++}
++
++
++class LASX_3R_VREPLVE_DESC_BASE_N<string instr_asm,
++                                RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, GPR64Opnd:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk");
++  list<dag> Pattern = [];
++}
++
++
++class LASX_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                        RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))];
++}
++
++
++
++class LASX_3RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                        RegisterOperand ROXK = ROXD> :
++  LASX_3R_DESC_BASE<instr_asm, OpNode, ROXD, ROXJ, ROXK>;
++
++
++class LASX_3R_DESC_BASE1<string instr_asm,
++                       SDPatternOperator OpNode,
++                       RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                       RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xk, ROXK:$xj))];
++}
++
++class LASX_3RF_DESC_BASE1<string instr_asm,
++                        SDPatternOperator OpNode,
++                        RegisterOperand ROXD, RegisterOperand ROXJ = ROXD,
++                        RegisterOperand ROXK = ROXD> :
++  LASX_3R_DESC_BASE1<instr_asm, OpNode, ROXD, ROXJ, ROXK>;
++
++
++
++class LASX_3R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROXD,
++                            RegisterOperand ROXJ = ROXD,
++                            RegisterOperand ROXK = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk");
++  list<dag> Pattern = [(set ROXD:$xd, (LoongArchVSHF ROXD:$xd_in, ROXJ:$xj,
++                                       ROXK:$xk))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_I5_SETCC_DESC_BASE<string instr_asm, CondCode CC, ValueType VT,
++                                SplatComplexPattern SplatImm, RegisterOperand ROXD,
++                                RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5");
++  list<dag> Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$si5, CC)))];
++}
++
++class LASX_I5_SETCC_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))];
++}
++
++
++class LASX_I5_U_SETCC_DESC_BASE<string instr_asm, CondCode CC, ValueType VT,
++                                SplatComplexPattern SplatImm, RegisterOperand ROXD,
++                                RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$ui5, CC)))];
++}
++
++class LASX_I5_U_SETCC_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))];
++}
++
++class LASX_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD,
++                          RegisterOperand ROXK = ROXD> :
++      LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xj, ROXK:$xk),
++                [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]>;
++
++
++class LASX_I5_U_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       SplatComplexPattern SplatImm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))];
++}
++
++
++class LASX_I5_U_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))];
++}
++
++class LASX_U5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))];
++}
++
++class LASX_U5N_DESC_BASE<string instr_asm, RegisterOperand ROXD,
++                         RegisterOperand ROXJ = ROXD> :
++  LASX_U5_DESC_BASE<instr_asm, null_frag, ROXD, ROXJ>;
++
++class LASX_U5_4R_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))];
++}
++
++class LASX_SET_DESC_BASE<string instr_asm, RegisterOperand ROXD> {
++  dag OutOperandList = (outs FCFROpnd:$cd);
++  dag InOperandList = (ins ROXD:$xj);
++  string AsmString = !strconcat(instr_asm, "\t$cd, $xj");
++  list<dag> Pattern = [];
++}
++
++class LASX_2RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))];
++}
++
++class LASX_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       SplatComplexPattern SplatImm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$si5))];
++}
++
++class LASX_I5_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                       RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))];
++}
++
++
++class LASX_2R_REPL_DESC_BASE<string instr_asm, ValueType VT,
++                            SDPatternOperator OpNode, RegisterOperand ROXD,
++                            RegisterOperand ROS = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROS:$rj);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj");
++  list<dag> Pattern = [(set ROXD:$xd, (VT (OpNode ROS:$rj)))];
++}
++
++class LASX_XVEXTEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                             ValueType STy, ValueType DTy,
++                                     RegisterOperand ROXJ,
++                             RegisterOperand ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj");
++  list<dag> Pattern = [(set ROXD:$xd, (DTy (OpNode (STy ROXJ:$xj))))];
++}
++
++class LASX_RORI_U3_DESC_BASE_Intrinsic <string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))];
++}
++
++class LASX_RORI_U4_DESC_BASE_Intrinsic <string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))];
++}
++
++class LASX_RORI_U5_DESC_BASE_Intrinsic <string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))];
++}
++
++class LASX_RORI_U6_DESC_BASE_Intrinsic <string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))];
++}
++
++class LASX_BIT_3_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))];
++}
++
++class LASX_BIT_4_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))];
++}
++
++class LASX_BIT_5_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))];
++}
++
++class LASX_BIT_6_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))];
++}
++
++class LASX_BIT_2_4O_DESC_BASE<string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                              RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui2))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_BIT_2_4ON<string instr_asm,
++                     Operand ImmOp, ImmLeaf Imm,
++                     RegisterOperand ROXD,
++                     RegisterOperand ROXJ = ROXD> :
++  LASX_BIT_2_4O_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROXD, ROXJ>;
++
++class LASX_BIT_3_4O_DESC_BASE<string instr_asm,
++                              SDPatternOperator OpNode,
++                              Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                              RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui3))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_BIT_3_4ON<string instr_asm,
++                     Operand ImmOp, ImmLeaf Imm,
++                     RegisterOperand ROXD,
++                     RegisterOperand ROXJ = ROXD> :
++  LASX_BIT_3_4O_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROXD, ROXJ>;
++
++class LASX_INSERT_U3_DESC_BASE<string instr_asm, ValueType VTy,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                           RegisterOperand ROS> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui3)))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_INSERT_U2_DESC_BASE<string instr_asm, ValueType VTy,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                           RegisterOperand ROS> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui2");
++  list<dag> Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui2)))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_COPY_U2_DESC_BASE<string instr_asm,
++                         SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui2");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui2))];
++}
++
++class LASX_COPY_U3_DESC_BASE<string instr_asm,
++                         SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui3");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui3))];
++}
++
++class LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<string instr_asm,
++                                 SDPatternOperator OpNode,
++                                 RegisterOperand ROXD,
++                                 RegisterOperand ROXJ = ROXD> {
++ dag OutOperandList = (outs ROXD:$xd);
++ dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4);
++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++ list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))];
++}
++
++class LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<string instr_asm,
++                                 SDPatternOperator OpNode,
++                                 RegisterOperand ROXD,
++                                 RegisterOperand ROXJ = ROXD> {
++ dag OutOperandList = (outs ROXD:$xd);
++ dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3);
++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++ list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))];
++}
++
++class LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<string instr_asm,
++                                 SDPatternOperator OpNode,
++                                 RegisterOperand ROXD,
++                                 RegisterOperand ROXJ = ROXD> {
++ dag OutOperandList = (outs ROXD:$xd);
++ dag InOperandList = (ins ROXJ:$xj, uimm2:$ui2);
++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2");
++ list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt2:$ui2))];
++}
++
++class LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<string instr_asm,
++                                 SDPatternOperator OpNode,
++                                 RegisterOperand ROXD,
++                                 RegisterOperand ROXJ = ROXD> {
++ dag OutOperandList = (outs ROXD:$xd);
++ dag InOperandList = (ins ROXJ:$xj, uimm1:$ui1);
++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui1");
++ list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt1:$ui1))];
++}
++
++class LASX_XVBROADCAST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                ValueType TyNode,
++                                        RegisterOperand ROXJ,
++                                        RegisterOperand ROXD = ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode (TyNode ROXJ:$xj)))];
++}
++
++class LASX_2R_U3_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXJ:$xj, immZExt3:$ui3))];
++}
++
++class LASX_2R_U4_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXJ:$xj, immZExt4:$ui4))];
++}
++
++class LASX_2R_U5_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXJ:$xj, immZExt5:$ui5))];
++}
++
++class LASX_2R_U6_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm6:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXJ:$xj, immZExt6:$ui6))];
++}
++
++class LASX_BIT_U3_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui3))];
++}
++
++class LASX_BIT_U4_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui4))];
++}
++
++class LASX_BIT_U5_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))];
++}
++
++class LASX_BIT_U6_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui6))];
++}
++
++class LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 Operand ImmOp, ImmLeaf Imm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))];
++}
++
++class LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 Operand ImmOp, ImmLeaf Imm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))];
++}
++
++class LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 Operand ImmOp, ImmLeaf Imm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))];
++}
++
++class LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 Operand ImmOp, ImmLeaf Imm,
++                                 RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))];
++}
++
++class LASX_U4_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in,ROXJ:$xj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in,ROXJ:$xj, Imm:$ui4))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_N4_U5_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui5))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_U6_DESC_BASE<string instr_asm,
++                          SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                          RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui6))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_D_DESC_BASE<string instr_asm,
++                            SDPatternOperator OpNode,
++                            RegisterOperand ROXD,
++                            RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U4_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm4:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, ROXJ:$xj, immZExt4:$ui4))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U5_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U6_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm6:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, ROXJ:$xj, immZExt6:$ui6))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U7_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U8_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_2R_3R_U8_SELECT<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, vsplat_uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXD:$xd_in, xvsplati8_uimm8:$ui8, ROXJ:$xj))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_I8_O4_SHF_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))];
++  string Constraints = "$xd = $xd_in";
++}
++
++class LASX_I8_SHF_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))];
++}
++
++class LASX_2R_U8_DESC_BASE<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROXD,
++                           RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode  ROXJ:$xj, immZExt8:$ui8))];
++}
++
++class LASX_I13_DESC_BASE<string instr_asm,
++                       SDPatternOperator OpNode,
++                       ValueType Ty, Operand immOp,
++                       RegisterOperand ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins immOp:$i13);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $i13");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode (Ty simm13:$i13)))];
++  string DecoderMethod = "DecodeLASX256Mem13";
++}
++
++class LASX_I13_DESC_BASE_tmp<string instr_asm,
++                       RegisterOperand ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins vsplat_simm10:$i10);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $i10");
++  list<dag> Pattern = [];
++  bit hasSideEffects = 0;
++  string DecoderMethod = "DecodeLASX256Mem10";
++}
++
++class LASX_BIT_U8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                            SplatComplexPattern SplatImm,
++                            RegisterOperand ROXD, RegisterOperand ROXJ = ROXD> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui8))];
++}
++
++class LASX_2RN_3R_U8_DESC_BASE<string instr_asm,
++                                 RegisterOperand ROXD,
++                                 RegisterOperand ROXJ> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8");
++  list<dag> Pattern = [];
++  string Constraints = "$xd = $xd_in";
++}
++
++
++//encoding
++
++def XVFMADD_S : LASX_4R<0b000010100001>,
++                LASX_4RF<"xvfmadd.s", int_loongarch_lasx_xvfmadd_s, LASX256WOpnd>;
++
++def XVFMADD_D : LASX_4R<0b000010100010>,
++                LASX_4RF<"xvfmadd.d", int_loongarch_lasx_xvfmadd_d, LASX256DOpnd>;
++
++
++def XVFMSUB_S : LASX_4R<0b000010100101>,
++                LASX_4RF<"xvfmsub.s", int_loongarch_lasx_xvfmsub_s, LASX256WOpnd>;
++
++def XVFMSUB_D : LASX_4R<0b000010100110>,
++                LASX_4RF<"xvfmsub.d", int_loongarch_lasx_xvfmsub_d, LASX256DOpnd>;
++
++
++def XVFNMADD_S : LASX_4R<0b000010101001>,
++                 LASX_4RF<"xvfnmadd.s", int_loongarch_lasx_xvfnmadd_s, LASX256WOpnd>;
++
++def XVFNMADD_D : LASX_4R<0b000010101010>,
++                 LASX_4RF<"xvfnmadd.d", int_loongarch_lasx_xvfnmadd_d, LASX256DOpnd>;
++
++
++def XVFNMSUB_S : LASX_4R<0b000010101101>,
++                 LASX_4RF<"xvfnmsub.s", int_loongarch_lasx_xvfnmsub_s, LASX256WOpnd>;
++
++def XVFNMSUB_D : LASX_4R<0b000010101110>,
++                 LASX_4RF<"xvfnmsub.d", int_loongarch_lasx_xvfnmsub_d, LASX256DOpnd>;
++
++
++// xvfmadd: xj * xk + xa
++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
++              (XVFMADD_D $xj, $xk, $xa)>;
++
++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
++              (XVFMADD_S $xj, $xk, $xa)>;
++
++
++// xvfmsub: xj * xk - xa
++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)),
++              (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
++
++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)),
++              (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
++
++
++// xvfnmadd: -(xj * xk + xa)
++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)),
++              (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
++
++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)),
++              (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
++
++// xvfnmsub: -(xj * xk - xa)
++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),
++              (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
++
++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa),
++              (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>;
++
++
++def XVFCMP_CAF_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.caf.s", int_loongarch_lasx_xvfcmp_caf_s, LASX256WOpnd>{
++                    bits<5> cond=0x0;
++                  }
++
++def XVFCMP_CAF_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.caf.d", int_loongarch_lasx_xvfcmp_caf_d, LASX256DOpnd>{
++                    bits<5> cond=0x0;
++                  }
++
++def XVFCMP_COR_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.cor.s", vfsetord_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x14;
++                  }
++
++def XVFCMP_COR_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.cor.d", vfsetord_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x14;
++                  }
++
++def XVFCMP_CUN_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.cun.s", vfsetun_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x8;
++                  }
++
++def XVFCMP_CUN_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.cun.d", vfsetun_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x8;
++                  }
++
++def XVFCMP_CUNE_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.cune.s", vfsetune_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x18;
++                  }
++
++def XVFCMP_CUNE_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.cune.d", vfsetune_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x18;
++                  }
++
++def XVFCMP_CUEQ_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.cueq.s", vfsetueq_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0xc;
++                  }
++
++def XVFCMP_CUEQ_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.cueq.d", vfsetueq_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0xc;
++                  }
++
++def XVFCMP_CEQ_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.ceq.s", vfsetoeq_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x4;
++                  }
++
++def XVFCMP_CEQ_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.ceq.d", vfsetoeq_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x4;
++                  }
++
++def XVFCMP_CNE_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.cne.s", vfsetone_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x10;
++                  }
++
++def XVFCMP_CNE_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.cne.d", vfsetone_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x10;
++                  }
++
++def XVFCMP_CLT_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.clt.s", vfsetolt_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x2;
++                  }
++
++def XVFCMP_CLT_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.clt.d", vfsetolt_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x2;
++                  }
++
++def XVFCMP_CULT_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.cult.s", vfsetult_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0xa;
++                  }
++
++def XVFCMP_CULT_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.cult.d", vfsetult_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0xa;
++                  }
++
++def XVFCMP_CLE_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.cle.s", vfsetole_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0x6;
++                  }
++
++def XVFCMP_CLE_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.cle.d", vfsetole_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0x6;
++                  }
++
++def XVFCMP_CULE_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.cule.s", vfsetule_v8f32, LASX256WOpnd>{
++                    bits<5> cond=0xe;
++                  }
++
++def XVFCMP_CULE_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.cule.d", vfsetule_v4f64, LASX256DOpnd>{
++                    bits<5> cond=0xe;
++                  }
++
++def XVFCMP_SAF_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.saf.s", int_loongarch_lasx_xvfcmp_saf_s, LASX256WOpnd>{
++                    bits<5> cond=0x1;
++                  }
++
++def XVFCMP_SAF_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.saf.d", int_loongarch_lasx_xvfcmp_saf_d, LASX256DOpnd>{
++                    bits<5> cond=0x1;
++                  }
++
++def XVFCMP_SOR_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.sor.s", int_loongarch_lasx_xvfcmp_sor_s, LASX256WOpnd>{
++                    bits<5> cond=0x15;
++                  }
++
++def XVFCMP_SOR_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.sor.d", int_loongarch_lasx_xvfcmp_sor_d, LASX256DOpnd>{
++                    bits<5> cond=0x15;
++                  }
++
++def XVFCMP_SUN_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.sun.s", int_loongarch_lasx_xvfcmp_sun_s, LASX256WOpnd>{
++                    bits<5> cond=0x9;
++                  }
++
++def XVFCMP_SUN_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.sun.d", int_loongarch_lasx_xvfcmp_sun_d, LASX256DOpnd>{
++                    bits<5> cond=0x9;
++                  }
++
++def XVFCMP_SUNE_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.sune.s", int_loongarch_lasx_xvfcmp_sune_s, LASX256WOpnd>{
++                    bits<5> cond=0x19;
++                  }
++
++def XVFCMP_SUNE_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.sune.d", int_loongarch_lasx_xvfcmp_sune_d, LASX256DOpnd>{
++                    bits<5> cond=0x19;
++                  }
++
++def XVFCMP_SUEQ_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.sueq.s", int_loongarch_lasx_xvfcmp_sueq_s, LASX256WOpnd>{
++                    bits<5> cond=0xd;
++                  }
++
++def XVFCMP_SUEQ_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.sueq.d", int_loongarch_lasx_xvfcmp_sueq_d, LASX256DOpnd>{
++                    bits<5> cond=0xd;
++                  }
++
++def XVFCMP_SEQ_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.seq.s", int_loongarch_lasx_xvfcmp_seq_s, LASX256WOpnd>{
++                    bits<5> cond=0x5;
++                  }
++
++def XVFCMP_SEQ_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.seq.d", int_loongarch_lasx_xvfcmp_seq_d, LASX256DOpnd>{
++                    bits<5> cond=0x5;
++                  }
++
++def XVFCMP_SNE_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.sne.s", int_loongarch_lasx_xvfcmp_sne_s, LASX256WOpnd>{
++                    bits<5> cond=0x11;
++                  }
++
++def XVFCMP_SNE_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.sne.d", int_loongarch_lasx_xvfcmp_sne_d, LASX256DOpnd>{
++                    bits<5> cond=0x11;
++                  }
++
++def XVFCMP_SLT_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.slt.s", int_loongarch_lasx_xvfcmp_slt_s, LASX256WOpnd>{
++                    bits<5> cond=0x3;
++                  }
++
++def XVFCMP_SLT_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.slt.d", int_loongarch_lasx_xvfcmp_slt_d, LASX256DOpnd>{
++                    bits<5> cond=0x3;
++                  }
++
++def XVFCMP_SULT_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.sult.s", int_loongarch_lasx_xvfcmp_sult_s, LASX256WOpnd>{
++                    bits<5> cond=0xb;
++                  }
++
++def XVFCMP_SULT_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.sult.d", int_loongarch_lasx_xvfcmp_sult_d, LASX256DOpnd>{
++                    bits<5> cond=0xb;
++                  }
++
++def XVFCMP_SLE_S : LASX_XVFCMP<0b000011001001>,
++                   LASX_3RF<"xvfcmp.sle.s", int_loongarch_lasx_xvfcmp_sle_s, LASX256WOpnd>{
++                    bits<5> cond=0x7;
++                  }
++
++def XVFCMP_SLE_D : LASX_XVFCMP<0b000011001010>,
++                   LASX_3RF<"xvfcmp.sle.d", int_loongarch_lasx_xvfcmp_sle_d, LASX256DOpnd>{
++                    bits<5> cond=0x7;
++                  }
++
++def XVFCMP_SULE_S : LASX_XVFCMP<0b000011001001>,
++                    LASX_3RF<"xvfcmp.sule.s", int_loongarch_lasx_xvfcmp_sule_s, LASX256WOpnd>{
++                    bits<5> cond=0xf;
++                  }
++
++def XVFCMP_SULE_D : LASX_XVFCMP<0b000011001010>,
++                    LASX_3RF<"xvfcmp.sule.d", int_loongarch_lasx_xvfcmp_sule_d, LASX256DOpnd>{
++                    bits<5> cond=0xf;
++                  }
++
++
++def XVBITSEL_V : LASX_4R<0b000011010010>,
++                 LASX_4RF<"xvbitsel.v", int_loongarch_lasx_xvbitsel_v, LASX256BOpnd>;
++
++class LASX_BSEL_PSEUDO_BASE<RegisterOperand RO, ValueType Ty> :
++            LASXPseudo<(outs RO:$xd), (ins RO:$xd_in, RO:$xs, RO:$xt),
++            [(set RO:$xd, (Ty (vselect RO:$xd_in, RO:$xt, RO:$xs)))]>,
++            PseudoInstExpansion<(XVBITSEL_V LASX256BOpnd:$xd, LASX256BOpnd:$xs,
++                                 LASX256BOpnd:$xt, LASX256BOpnd:$xd_in)> {
++  let Constraints = "$xd_in = $xd";
++}
++
++def XBSEL_B_PSEUDO  : LASX_BSEL_PSEUDO_BASE<LASX256BOpnd, v32i8>;
++def XBSEL_H_PSEUDO  : LASX_BSEL_PSEUDO_BASE<LASX256HOpnd, v16i16>;
++def XBSEL_W_PSEUDO  : LASX_BSEL_PSEUDO_BASE<LASX256WOpnd, v8i32>;
++def XBSEL_D_PSEUDO  : LASX_BSEL_PSEUDO_BASE<LASX256DOpnd, v4i64>;
++def XBSEL_FW_PSEUDO : LASX_BSEL_PSEUDO_BASE<LASX256WOpnd, v8f32>;
++def XBSEL_FD_PSEUDO : LASX_BSEL_PSEUDO_BASE<LASX256DOpnd, v4f64>;
++
++
++
++def XVSHUF_B : LASX_4R<0b000011010110>,
++               LASX_4RF<"xvshuf.b", int_loongarch_lasx_xvshuf_b, LASX256BOpnd>;
++
++
++def XVLD : LASX_I12_S<0b0010110010>,
++           LASX_LD<"xvld", load, v32i8, LASX256BOpnd, mem>;
++
++def XVST : LASX_I12_S<0b0010110011>,
++           LASX_ST<"xvst", store, v32i8, LASX256BOpnd, mem_simm12>;
++
++
++class LASX_LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType TyNode, RegisterOperand ROXD,
++                                 Operand MemOpnd = mem,
++                                 ComplexPattern Addr = addr> {
++  dag OutOperandList = (outs ROXD:$xd);
++  dag InOperandList = (ins MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$xd, $addr");
++  list<dag> Pattern = [(set ROXD:$xd, (OpNode (TyNode (load Addr:$addr))))];
++  string DecoderMethod = "DecodeLASX256memlsl";
++}
++
++
++def XVLDREPL_B : LASX_SI12_S<0b0011001010>,
++                 LASX_LD_DESC_BASE<"xvldrepl.b", xvbroadcast_v32i8,  v32i8, LASX256BOpnd>;
++
++def XVLDREPL_H : LASX_SI11_S<0b00110010010>,
++                 LASX_LD_DESC_BASE<"xvldrepl.h", xvbroadcast_v16i16, v16i16, LASX256HOpnd, mem_simm11_lsl1, addrimm11lsl1>;
++
++def XVLDREPL_W : LASX_SI10_S<0b001100100010>,
++                 LASX_LD_DESC_BASE<"xvldrepl.w", xvbroadcast_v8i32, v8i32, LASX256WOpnd, mem_simm10_lsl2, addrimm10lsl2>;
++
++def XVLDREPL_D : LASX_SI9_S<0b0011001000010>,
++                 LASX_LD_DESC_BASE<"xvldrepl.d", xvbroadcast_v4i64,  v4i64, LASX256DOpnd,  mem_simm9_lsl3, addrimm9lsl3>;
++
++
++def XVSTELM_B : LASX_SI8_idx5<0b001100111>,
++                LASX_I8_U5_DESC_BASE<"xvstelm.b", int_loongarch_lasx_xvstelm_b, simm8_32, immSExt8, LASX256BOpnd, GPR32Opnd>;
++
++def XVSTELM_H : LASX_SI8_idx4<0b0011001101>,
++                LASX_I8_U4_DESC_BASE<"xvstelm.h", int_loongarch_lasx_xvstelm_h, immSExt8_1_O, immSExt8, LASX256HOpnd, GPR32Opnd>;
++
++def XVSTELM_W : LASX_SI8_idx3<0b00110011001>,
++                LASX_I8_U3_DESC_BASE<"xvstelm.w", int_loongarch_lasx_xvstelm_w, immSExt8_2_O, immSExt8, LASX256WOpnd, GPR32Opnd>;
++
++def XVSTELM_D : LASX_SI8_idx2<0b001100110001>,
++                LASX_I8_U2_DESC_BASE<"xvstelm.d", int_loongarch_lasx_xvstelm_d, immSExt8_3_O, immSExt8, LASX256DOpnd, GPR32Opnd>;
++
++let mayLoad = 1, canFoldAsLoad = 1 in {
++  def XVLDX : LASX_3R_2GP<0b00111000010010000>,
++              LASX_LDX_LA<"xvldx", int_loongarch_lasx_xvldx, GPR64Opnd, LASX256BOpnd>;
++}
++
++let mayStore = 1 in{
++  def XVSTX : LASX_3R_2GP<0b00111000010011000>,
++              LASX_SDX_LA<"xvstx", int_loongarch_lasx_xvstx, GPR64Opnd, LASX256BOpnd>;
++}
++
++
++def XVSEQ_B : LASX_3R<0b01110100000000000>, IsCommutable,
++              LASX_3R_SETCC_DESC_BASE<"xvseq.b", SETEQ, v32i8, LASX256BOpnd>;
++
++def XVSEQ_H : LASX_3R<0b01110100000000001>, IsCommutable,
++              LASX_3R_SETCC_DESC_BASE<"xvseq.h", SETEQ, v16i16, LASX256HOpnd>;
++
++def XVSEQ_W : LASX_3R<0b01110100000000010>, IsCommutable,
++              LASX_3R_SETCC_DESC_BASE<"xvseq.w", SETEQ, v8i32, LASX256WOpnd> ;
++
++def XVSEQ_D : LASX_3R<0b01110100000000011>, IsCommutable,
++              LASX_3R_SETCC_DESC_BASE<"xvseq.d", SETEQ, v4i64, LASX256DOpnd>;
++
++
++def XVSLE_B : LASX_3R<0b01110100000000100>,
++              LASX_3R_SETCC_DESC_BASE<"xvsle.b", SETLE, v32i8, LASX256BOpnd>;
++
++def XVSLE_H : LASX_3R<0b01110100000000101>,
++              LASX_3R_SETCC_DESC_BASE<"xvsle.h", SETLE, v16i16, LASX256HOpnd>;
++
++def XVSLE_W : LASX_3R<0b01110100000000110>,
++              LASX_3R_SETCC_DESC_BASE<"xvsle.w", SETLE, v8i32, LASX256WOpnd>;
++
++def XVSLE_D : LASX_3R<0b01110100000000111>,
++              LASX_3R_SETCC_DESC_BASE<"xvsle.d", SETLE, v4i64, LASX256DOpnd>;
++
++
++def XVSLE_BU : LASX_3R<0b01110100000001000>,
++               LASX_3R_SETCC_DESC_BASE<"xvsle.bu", SETULE, v32i8, LASX256BOpnd>;
++
++def XVSLE_HU : LASX_3R<0b01110100000001001>,
++               LASX_3R_SETCC_DESC_BASE<"xvsle.hu", SETULE, v16i16, LASX256HOpnd>;
++
++def XVSLE_WU : LASX_3R<0b01110100000001010>,
++               LASX_3R_SETCC_DESC_BASE<"xvsle.wu", SETULE, v8i32, LASX256WOpnd>;
++
++def XVSLE_DU : LASX_3R<0b01110100000001011>,
++               LASX_3R_SETCC_DESC_BASE<"xvsle.du", SETULE, v4i64, LASX256DOpnd>;
++
++
++def XVSLT_B : LASX_3R<0b01110100000001100>,
++              LASX_3R_SETCC_DESC_BASE<"xvslt.b", SETLT, v32i8, LASX256BOpnd>;
++
++def XVSLT_H : LASX_3R<0b01110100000001101>,
++              LASX_3R_SETCC_DESC_BASE<"xvslt.h", SETLT, v16i16, LASX256HOpnd>;
++
++def XVSLT_W : LASX_3R<0b01110100000001110>,
++              LASX_3R_SETCC_DESC_BASE<"xvslt.w", SETLT, v8i32, LASX256WOpnd>;
++
++def XVSLT_D : LASX_3R<0b01110100000001111>,
++              LASX_3R_SETCC_DESC_BASE<"xvslt.d", SETLT, v4i64, LASX256DOpnd>;
++
++
++def XVSLT_BU : LASX_3R<0b01110100000010000>,
++               LASX_3R_SETCC_DESC_BASE<"xvslt.bu", SETULT, v32i8, LASX256BOpnd>;
++
++def XVSLT_HU : LASX_3R<0b01110100000010001>,
++               LASX_3R_SETCC_DESC_BASE<"xvslt.hu", SETULT, v16i16, LASX256HOpnd>;
++
++def XVSLT_WU : LASX_3R<0b01110100000010010>,
++               LASX_3R_SETCC_DESC_BASE<"xvslt.wu", SETULT, v8i32, LASX256WOpnd>;
++
++def XVSLT_DU : LASX_3R<0b01110100000010011>,
++               LASX_3R_SETCC_DESC_BASE<"xvslt.du", SETULT, v4i64, LASX256DOpnd>;
++
++
++def XVADD_B : LASX_3R<0b01110100000010100>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvadd.b", add, LASX256BOpnd>;
++
++def XVADD_H : LASX_3R<0b01110100000010101>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvadd.h", add, LASX256HOpnd>;
++
++def XVADD_W : LASX_3R<0b01110100000010110>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvadd.w", add, LASX256WOpnd>;
++
++def XVADD_D : LASX_3R<0b01110100000010111>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvadd.d", add, LASX256DOpnd>;
++
++
++def XVSUB_B : LASX_3R<0b01110100000011000>,
++              LASX_3R_DESC_BASE<"xvsub.b", sub, LASX256BOpnd>;
++
++def XVSUB_H : LASX_3R<0b01110100000011001>,
++              LASX_3R_DESC_BASE<"xvsub.h", sub, LASX256HOpnd>;
++
++def XVSUB_W : LASX_3R<0b01110100000011010>,
++              LASX_3R_DESC_BASE<"xvsub.w", sub, LASX256WOpnd>;
++
++def XVSUB_D : LASX_3R<0b01110100000011011>,
++              LASX_3R_DESC_BASE<"xvsub.d", sub, LASX256DOpnd>;
++
++
++def XVADDWEV_H_B : LASX_3R<0b01110100000111100>,
++                   LASX_3R_DESC_BASE<"xvaddwev.h.b", int_loongarch_lasx_xvaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWEV_W_H : LASX_3R<0b01110100000111101>,
++                   LASX_3R_DESC_BASE<"xvaddwev.w.h", int_loongarch_lasx_xvaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWEV_D_W : LASX_3R<0b01110100000111110>,
++                   LASX_3R_DESC_BASE<"xvaddwev.d.w", int_loongarch_lasx_xvaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWEV_Q_D : LASX_3R<0b01110100000111111>,
++                   LASX_3R_DESC_BASE<"xvaddwev.q.d", int_loongarch_lasx_xvaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSUBWEV_H_B : LASX_3R<0b01110100001000000>,
++                   LASX_3R_DESC_BASE<"xvsubwev.h.b", int_loongarch_lasx_xvsubwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSUBWEV_W_H : LASX_3R<0b01110100001000001>,
++                   LASX_3R_DESC_BASE<"xvsubwev.w.h", int_loongarch_lasx_xvsubwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSUBWEV_D_W : LASX_3R<0b01110100001000010>,
++                   LASX_3R_DESC_BASE<"xvsubwev.d.w", int_loongarch_lasx_xvsubwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVSUBWEV_Q_D : LASX_3R<0b01110100001000011>,
++                   LASX_3R_DESC_BASE<"xvsubwev.q.d", int_loongarch_lasx_xvsubwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDWOD_H_B : LASX_3R<0b01110100001000100>,
++                   LASX_3R_DESC_BASE<"xvaddwod.h.b", int_loongarch_lasx_xvaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWOD_W_H : LASX_3R<0b01110100001000101>,
++                   LASX_3R_DESC_BASE<"xvaddwod.w.h", int_loongarch_lasx_xvaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWOD_D_W : LASX_3R<0b01110100001000110>,
++                   LASX_3R_DESC_BASE<"xvaddwod.d.w", int_loongarch_lasx_xvaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWOD_Q_D : LASX_3R<0b01110100001000111>,
++                   LASX_3R_DESC_BASE<"xvaddwod.q.d", int_loongarch_lasx_xvaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSUBWOD_H_B : LASX_3R<0b01110100001001000>,
++                   LASX_3R_DESC_BASE<"xvsubwod.h.b", int_loongarch_lasx_xvsubwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSUBWOD_W_H : LASX_3R<0b01110100001001001>,
++                   LASX_3R_DESC_BASE<"xvsubwod.w.h", int_loongarch_lasx_xvsubwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSUBWOD_D_W : LASX_3R<0b01110100001001010>,
++                   LASX_3R_DESC_BASE<"xvsubwod.d.w", int_loongarch_lasx_xvsubwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVSUBWOD_Q_D : LASX_3R<0b01110100001001011>,
++                   LASX_3R_DESC_BASE<"xvsubwod.q.d", int_loongarch_lasx_xvsubwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDWEV_H_BU : LASX_3R<0b01110100001011100>,
++                    LASX_3R_DESC_BASE<"xvaddwev.h.bu", int_loongarch_lasx_xvaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWEV_W_HU : LASX_3R<0b01110100001011101>,
++                    LASX_3R_DESC_BASE<"xvaddwev.w.hu", int_loongarch_lasx_xvaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWEV_D_WU : LASX_3R<0b01110100001011110>,
++                    LASX_3R_DESC_BASE<"xvaddwev.d.wu", int_loongarch_lasx_xvaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWEV_Q_DU : LASX_3R<0b01110100001011111>,
++                    LASX_3R_DESC_BASE<"xvaddwev.q.du", int_loongarch_lasx_xvaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSUBWEV_H_BU : LASX_3R<0b01110100001100000>,
++                    LASX_3R_DESC_BASE<"xvsubwev.h.bu", int_loongarch_lasx_xvsubwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSUBWEV_W_HU : LASX_3R<0b01110100001100001>,
++                    LASX_3R_DESC_BASE<"xvsubwev.w.hu", int_loongarch_lasx_xvsubwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSUBWEV_D_WU : LASX_3R<0b01110100001100010>,
++                    LASX_3R_DESC_BASE<"xvsubwev.d.wu", int_loongarch_lasx_xvsubwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVSUBWEV_Q_DU : LASX_3R<0b01110100001100011>,
++                    LASX_3R_DESC_BASE<"xvsubwev.q.du", int_loongarch_lasx_xvsubwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDWOD_H_BU : LASX_3R<0b01110100001100100>,
++                    LASX_3R_DESC_BASE<"xvaddwod.h.bu", int_loongarch_lasx_xvaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWOD_W_HU : LASX_3R<0b01110100001100101>,
++                    LASX_3R_DESC_BASE<"xvaddwod.w.hu", int_loongarch_lasx_xvaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWOD_D_WU : LASX_3R<0b01110100001100110>,
++                    LASX_3R_DESC_BASE<"xvaddwod.d.wu", int_loongarch_lasx_xvaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWOD_Q_DU : LASX_3R<0b01110100001100111>,
++                    LASX_3R_DESC_BASE<"xvaddwod.q.du", int_loongarch_lasx_xvaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSUBWOD_H_BU : LASX_3R<0b01110100001101000>,
++                    LASX_3R_DESC_BASE<"xvsubwod.h.bu", int_loongarch_lasx_xvsubwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSUBWOD_W_HU : LASX_3R<0b01110100001101001>,
++                    LASX_3R_DESC_BASE<"xvsubwod.w.hu", int_loongarch_lasx_xvsubwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSUBWOD_D_WU : LASX_3R<0b01110100001101010>,
++                    LASX_3R_DESC_BASE<"xvsubwod.d.wu", int_loongarch_lasx_xvsubwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVSUBWOD_Q_DU : LASX_3R<0b01110100001101011>,
++                    LASX_3R_DESC_BASE<"xvsubwod.q.du", int_loongarch_lasx_xvsubwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDWEV_H_BU_B : LASX_3R<0b01110100001111100>,
++                      LASX_3R_DESC_BASE<"xvaddwev.h.bu.b", int_loongarch_lasx_xvaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWEV_W_HU_H : LASX_3R<0b01110100001111101>,
++                      LASX_3R_DESC_BASE<"xvaddwev.w.hu.h", int_loongarch_lasx_xvaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWEV_D_WU_W : LASX_3R<0b01110100001111110>,
++                      LASX_3R_DESC_BASE<"xvaddwev.d.wu.w", int_loongarch_lasx_xvaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWEV_Q_DU_D : LASX_3R<0b01110100001111111>,
++                      LASX_3R_DESC_BASE<"xvaddwev.q.du.d", int_loongarch_lasx_xvaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDWOD_H_BU_B : LASX_3R<0b01110100010000000>,
++                      LASX_3R_DESC_BASE<"xvaddwod.h.bu.b", int_loongarch_lasx_xvaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVADDWOD_W_HU_H : LASX_3R<0b01110100010000001>,
++                      LASX_3R_DESC_BASE<"xvaddwod.w.hu.h", int_loongarch_lasx_xvaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVADDWOD_D_WU_W : LASX_3R<0b01110100010000010>,
++                      LASX_3R_DESC_BASE<"xvaddwod.d.wu.w", int_loongarch_lasx_xvaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVADDWOD_Q_DU_D : LASX_3R<0b01110100010000011>,
++                      LASX_3R_DESC_BASE<"xvaddwod.q.du.d", int_loongarch_lasx_xvaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSADD_B : LASX_3R<0b01110100010001100>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvsadd.b", saddsat, LASX256BOpnd>;
++
++def XVSADD_H : LASX_3R<0b01110100010001101>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvsadd.h", saddsat, LASX256HOpnd>;
++
++def XVSADD_W : LASX_3R<0b01110100010001110>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvsadd.w", saddsat, LASX256WOpnd>;
++
++def XVSADD_D : LASX_3R<0b01110100010001111>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvsadd.d", saddsat, LASX256DOpnd>;
++
++
++def XVSSUB_B : LASX_3R<0b01110100010010000>,
++               LASX_3R_DESC_BASE<"xvssub.b", ssubsat, LASX256BOpnd>;
++
++def XVSSUB_H : LASX_3R<0b01110100010010001>,
++               LASX_3R_DESC_BASE<"xvssub.h", ssubsat, LASX256HOpnd>;
++
++def XVSSUB_W : LASX_3R<0b01110100010010010>,
++               LASX_3R_DESC_BASE<"xvssub.w", ssubsat, LASX256WOpnd>;
++
++def XVSSUB_D : LASX_3R<0b01110100010010011>,
++               LASX_3R_DESC_BASE<"xvssub.d", ssubsat, LASX256DOpnd>;
++
++
++def XVSADD_BU : LASX_3R<0b01110100010010100>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvsadd.bu", uaddsat, LASX256BOpnd>;
++
++def XVSADD_HU : LASX_3R<0b01110100010010101>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvsadd.hu", uaddsat, LASX256HOpnd>;
++
++def XVSADD_WU : LASX_3R<0b01110100010010110>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvsadd.wu", uaddsat, LASX256WOpnd>;
++
++def XVSADD_DU : LASX_3R<0b01110100010010111>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvsadd.du", uaddsat, LASX256DOpnd>;
++
++
++def XVSSUB_BU : LASX_3R<0b01110100010011000>,
++                LASX_3R_DESC_BASE<"xvssub.bu", usubsat, LASX256BOpnd>;
++
++def XVSSUB_HU : LASX_3R<0b01110100010011001>,
++                LASX_3R_DESC_BASE<"xvssub.hu", usubsat, LASX256HOpnd>;
++
++def XVSSUB_WU : LASX_3R<0b01110100010011010>,
++                LASX_3R_DESC_BASE<"xvssub.wu", usubsat, LASX256WOpnd>;
++
++def XVSSUB_DU : LASX_3R<0b01110100010011011>,
++                LASX_3R_DESC_BASE<"xvssub.du", usubsat, LASX256DOpnd>;
++
++
++def XVHADDW_H_B : LASX_3R<0b01110100010101000>,
++                  LASX_3R_DESC_BASE<"xvhaddw.h.b", int_loongarch_lasx_xvhaddw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVHADDW_W_H : LASX_3R<0b01110100010101001>,
++                  LASX_3R_DESC_BASE<"xvhaddw.w.h", int_loongarch_lasx_xvhaddw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVHADDW_D_W : LASX_3R<0b01110100010101010>,
++                  LASX_3R_DESC_BASE<"xvhaddw.d.w", int_loongarch_lasx_xvhaddw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVHADDW_Q_D : LASX_3R<0b01110100010101011>,
++                  LASX_3R_DESC_BASE<"xvhaddw.q.d", int_loongarch_lasx_xvhaddw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++
++def XVHSUBW_H_B : LASX_3R<0b01110100010101100>,
++                  LASX_3R_DESC_BASE<"xvhsubw.h.b", int_loongarch_lasx_xvhsubw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVHSUBW_W_H : LASX_3R<0b01110100010101101>,
++                  LASX_3R_DESC_BASE<"xvhsubw.w.h", int_loongarch_lasx_xvhsubw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVHSUBW_D_W : LASX_3R<0b01110100010101110>,
++                  LASX_3R_DESC_BASE<"xvhsubw.d.w", int_loongarch_lasx_xvhsubw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVHSUBW_Q_D : LASX_3R<0b01110100010101111>,
++                  LASX_3R_DESC_BASE<"xvhsubw.q.d", int_loongarch_lasx_xvhsubw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVHADDW_HU_BU : LASX_3R<0b01110100010110000>,
++                    LASX_3R_DESC_BASE<"xvhaddw.hu.bu", int_loongarch_lasx_xvhaddw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVHADDW_WU_HU : LASX_3R<0b01110100010110001>,
++                    LASX_3R_DESC_BASE<"xvhaddw.wu.hu", int_loongarch_lasx_xvhaddw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVHADDW_DU_WU : LASX_3R<0b01110100010110010>,
++                    LASX_3R_DESC_BASE<"xvhaddw.du.wu", int_loongarch_lasx_xvhaddw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVHADDW_QU_DU : LASX_3R<0b01110100010110011>,
++                    LASX_3R_DESC_BASE<"xvhaddw.qu.du", int_loongarch_lasx_xvhaddw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++
++def XVHSUBW_HU_BU : LASX_3R<0b01110100010110100>,
++                    LASX_3R_DESC_BASE<"xvhsubw.hu.bu", int_loongarch_lasx_xvhsubw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVHSUBW_WU_HU : LASX_3R<0b01110100010110101>,
++                    LASX_3R_DESC_BASE<"xvhsubw.wu.hu", int_loongarch_lasx_xvhsubw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVHSUBW_DU_WU : LASX_3R<0b01110100010110110>,
++                    LASX_3R_DESC_BASE<"xvhsubw.du.wu", int_loongarch_lasx_xvhsubw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVHSUBW_QU_DU : LASX_3R<0b01110100010110111>,
++                    LASX_3R_DESC_BASE<"xvhsubw.qu.du", int_loongarch_lasx_xvhsubw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVADDA_B : LASX_3R<0b01110100010111000>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvadda.b", int_loongarch_lasx_xvadda_b, LASX256BOpnd>;
++
++def XVADDA_H : LASX_3R<0b01110100010111001>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvadda.h", int_loongarch_lasx_xvadda_h, LASX256HOpnd>;
++
++def XVADDA_W : LASX_3R<0b01110100010111010>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvadda.w", int_loongarch_lasx_xvadda_w, LASX256WOpnd>;
++
++def XVADDA_D : LASX_3R<0b01110100010111011>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvadda.d", int_loongarch_lasx_xvadda_d, LASX256DOpnd>;
++
++
++def XVABSD_B : LASX_3R<0b01110100011000000>,
++               LASX_3R_DESC_BASE<"xvabsd.b", int_loongarch_lasx_xvabsd_b, LASX256BOpnd>;
++
++def XVABSD_H : LASX_3R<0b01110100011000001>,
++               LASX_3R_DESC_BASE<"xvabsd.h", int_loongarch_lasx_xvabsd_h, LASX256HOpnd>;
++
++def XVABSD_W : LASX_3R<0b01110100011000010>,
++               LASX_3R_DESC_BASE<"xvabsd.w", int_loongarch_lasx_xvabsd_w, LASX256WOpnd>;
++
++def XVABSD_D : LASX_3R<0b01110100011000011>,
++               LASX_3R_DESC_BASE<"xvabsd.d", int_loongarch_lasx_xvabsd_d, LASX256DOpnd>;
++
++
++def XVABSD_BU : LASX_3R<0b01110100011000100>,
++                LASX_3R_DESC_BASE<"xvabsd.bu", int_loongarch_lasx_xvabsd_bu, LASX256BOpnd>;
++
++def XVABSD_HU : LASX_3R<0b01110100011000101>,
++                LASX_3R_DESC_BASE<"xvabsd.hu", int_loongarch_lasx_xvabsd_hu, LASX256HOpnd>;
++
++def XVABSD_WU : LASX_3R<0b01110100011000110>,
++                LASX_3R_DESC_BASE<"xvabsd.wu", int_loongarch_lasx_xvabsd_wu, LASX256WOpnd>;
++
++def XVABSD_DU : LASX_3R<0b01110100011000111>,
++                LASX_3R_DESC_BASE<"xvabsd.du", int_loongarch_lasx_xvabsd_du, LASX256DOpnd>;
++
++
++def XVAVG_B : LASX_3R<0b01110100011001000>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvavg.b", int_loongarch_lasx_xvavg_b, LASX256BOpnd>;
++
++def XVAVG_H : LASX_3R<0b01110100011001001>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvavg.h", int_loongarch_lasx_xvavg_h, LASX256HOpnd>;
++
++def XVAVG_W : LASX_3R<0b01110100011001010>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvavg.w", int_loongarch_lasx_xvavg_w, LASX256WOpnd>;
++
++def XVAVG_D : LASX_3R<0b01110100011001011>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvavg.d", int_loongarch_lasx_xvavg_d, LASX256DOpnd>;
++
++
++def XVAVG_BU : LASX_3R<0b01110100011001100>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavg.bu", int_loongarch_lasx_xvavg_bu, LASX256BOpnd>;
++
++def XVAVG_HU : LASX_3R<0b01110100011001101>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavg.hu", int_loongarch_lasx_xvavg_hu, LASX256HOpnd>;
++
++def XVAVG_WU : LASX_3R<0b01110100011001110>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavg.wu", int_loongarch_lasx_xvavg_wu, LASX256WOpnd>;
++
++def XVAVG_DU : LASX_3R<0b01110100011001111>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavg.du", int_loongarch_lasx_xvavg_du, LASX256DOpnd>;
++
++
++def XVAVGR_B : LASX_3R<0b01110100011010000>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavgr.b", int_loongarch_lasx_xvavgr_b, LASX256BOpnd>;
++
++def XVAVGR_H : LASX_3R<0b01110100011010001>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavgr.h", int_loongarch_lasx_xvavgr_h, LASX256HOpnd>;
++
++def XVAVGR_W : LASX_3R<0b01110100011010010>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavgr.w", int_loongarch_lasx_xvavgr_w, LASX256WOpnd>;
++
++def XVAVGR_D : LASX_3R<0b01110100011010011>, IsCommutable,
++               LASX_3R_DESC_BASE<"xvavgr.d", int_loongarch_lasx_xvavgr_d, LASX256DOpnd>;
++
++
++def XVAVGR_BU : LASX_3R<0b01110100011010100>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvavgr.bu", int_loongarch_lasx_xvavgr_bu, LASX256BOpnd>;
++
++def XVAVGR_HU : LASX_3R<0b01110100011010101>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvavgr.hu", int_loongarch_lasx_xvavgr_hu, LASX256HOpnd>;
++
++def XVAVGR_WU : LASX_3R<0b01110100011010110>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvavgr.wu", int_loongarch_lasx_xvavgr_wu, LASX256WOpnd>;
++
++def XVAVGR_DU : LASX_3R<0b01110100011010111>, IsCommutable,
++                LASX_3R_DESC_BASE<"xvavgr.du", int_loongarch_lasx_xvavgr_du, LASX256DOpnd>;
++
++
++def XVMAX_B : LASX_3R<0b01110100011100000>,
++              LASX_3R_DESC_BASE<"xvmax.b", smax, LASX256BOpnd>;
++
++def XVMAX_H : LASX_3R<0b01110100011100001>,
++              LASX_3R_DESC_BASE<"xvmax.h", smax, LASX256HOpnd>;
++
++def XVMAX_W : LASX_3R<0b01110100011100010>,
++              LASX_3R_DESC_BASE<"xvmax.w", smax, LASX256WOpnd>;
++
++def XVMAX_D : LASX_3R<0b01110100011100011>,
++              LASX_3R_DESC_BASE<"xvmax.d", smax, LASX256DOpnd>;
++
++
++def XVMIN_B : LASX_3R<0b01110100011100100>,
++              LASX_3R_DESC_BASE<"xvmin.b", smin, LASX256BOpnd>;
++
++def XVMIN_H : LASX_3R<0b01110100011100101>,
++              LASX_3R_DESC_BASE<"xvmin.h", smin, LASX256HOpnd>;
++
++def XVMIN_W : LASX_3R<0b01110100011100110>,
++              LASX_3R_DESC_BASE<"xvmin.w", smin, LASX256WOpnd>;
++
++def XVMIN_D : LASX_3R<0b01110100011100111>,
++              LASX_3R_DESC_BASE<"xvmin.d", smin, LASX256DOpnd>;
++
++
++def XVMAX_BU : LASX_3R<0b01110100011101000>,
++               LASX_3R_DESC_BASE<"xvmax.bu", umax, LASX256BOpnd>;
++
++def XVMAX_HU : LASX_3R<0b01110100011101001>,
++               LASX_3R_DESC_BASE<"xvmax.hu", umax, LASX256HOpnd>;
++
++def XVMAX_WU : LASX_3R<0b01110100011101010>,
++               LASX_3R_DESC_BASE<"xvmax.wu", umax, LASX256WOpnd>;
++
++def XVMAX_DU : LASX_3R<0b01110100011101011>,
++               LASX_3R_DESC_BASE<"xvmax.du", umax, LASX256DOpnd>;
++
++
++def XVMIN_BU : LASX_3R<0b01110100011101100>,
++               LASX_3R_DESC_BASE<"xvmin.bu", umin, LASX256BOpnd>;
++
++def XVMIN_HU : LASX_3R<0b01110100011101101>,
++               LASX_3R_DESC_BASE<"xvmin.hu", umin, LASX256HOpnd>;
++
++def XVMIN_WU : LASX_3R<0b01110100011101110>,
++               LASX_3R_DESC_BASE<"xvmin.wu", umin, LASX256WOpnd>;
++
++def XVMIN_DU : LASX_3R<0b01110100011101111>,
++               LASX_3R_DESC_BASE<"xvmin.du", umin, LASX256DOpnd>;
++
++
++def XVMUL_B : LASX_3R<0b01110100100001000>,
++              LASX_3R_DESC_BASE<"xvmul.b", mul, LASX256BOpnd>, IsCommutable;
++
++def XVMUL_H : LASX_3R<0b01110100100001001>,
++              LASX_3R_DESC_BASE<"xvmul.h", mul, LASX256HOpnd>, IsCommutable;
++
++def XVMUL_W : LASX_3R<0b01110100100001010>,
++              LASX_3R_DESC_BASE<"xvmul.w", mul, LASX256WOpnd>, IsCommutable;
++
++def XVMUL_D : LASX_3R<0b01110100100001011>,
++              LASX_3R_DESC_BASE<"xvmul.d", mul, LASX256DOpnd>, IsCommutable;
++
++
++def XVMUH_B : LASX_3R<0b01110100100001100>,
++              LASX_3R_DESC_BASE<"xvmuh.b", int_loongarch_lasx_xvmuh_b, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMUH_H : LASX_3R<0b01110100100001101>,
++              LASX_3R_DESC_BASE<"xvmuh.h", int_loongarch_lasx_xvmuh_h, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMUH_W : LASX_3R<0b01110100100001110>,
++              LASX_3R_DESC_BASE<"xvmuh.w", int_loongarch_lasx_xvmuh_w, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMUH_D : LASX_3R<0b01110100100001111>,
++              LASX_3R_DESC_BASE<"xvmuh.d", int_loongarch_lasx_xvmuh_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMUH_BU : LASX_3R<0b01110100100010000>,
++               LASX_3R_DESC_BASE<"xvmuh.bu", int_loongarch_lasx_xvmuh_bu, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMUH_HU : LASX_3R<0b01110100100010001>,
++               LASX_3R_DESC_BASE<"xvmuh.hu", int_loongarch_lasx_xvmuh_hu, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMUH_WU : LASX_3R<0b01110100100010010>,
++               LASX_3R_DESC_BASE<"xvmuh.wu", int_loongarch_lasx_xvmuh_wu, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMUH_DU : LASX_3R<0b01110100100010011>,
++               LASX_3R_DESC_BASE<"xvmuh.du", int_loongarch_lasx_xvmuh_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWEV_H_B : LASX_3R<0b01110100100100000>,
++                   LASX_3R_DESC_BASE<"xvmulwev.h.b", int_loongarch_lasx_xvmulwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWEV_W_H : LASX_3R<0b01110100100100001>,
++                   LASX_3R_DESC_BASE<"xvmulwev.w.h", int_loongarch_lasx_xvmulwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWEV_D_W : LASX_3R<0b01110100100100010>,
++                   LASX_3R_DESC_BASE<"xvmulwev.d.w", int_loongarch_lasx_xvmulwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWEV_Q_D : LASX_3R<0b01110100100100011>,
++                   LASX_3R_DESC_BASE<"xvmulwev.q.d", int_loongarch_lasx_xvmulwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWOD_H_B : LASX_3R<0b01110100100100100>,
++                   LASX_3R_DESC_BASE<"xvmulwod.h.b", int_loongarch_lasx_xvmulwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWOD_W_H : LASX_3R<0b01110100100100101>,
++                   LASX_3R_DESC_BASE<"xvmulwod.w.h", int_loongarch_lasx_xvmulwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWOD_D_W : LASX_3R<0b01110100100100110>,
++                   LASX_3R_DESC_BASE<"xvmulwod.d.w", int_loongarch_lasx_xvmulwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWOD_Q_D : LASX_3R<0b01110100100100111>,
++                   LASX_3R_DESC_BASE<"xvmulwod.q.d", int_loongarch_lasx_xvmulwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWEV_H_BU : LASX_3R<0b01110100100110000>,
++                    LASX_3R_DESC_BASE<"xvmulwev.h.bu", int_loongarch_lasx_xvmulwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWEV_W_HU : LASX_3R<0b01110100100110001>,
++                    LASX_3R_DESC_BASE<"xvmulwev.w.hu", int_loongarch_lasx_xvmulwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWEV_D_WU : LASX_3R<0b01110100100110010>,
++                    LASX_3R_DESC_BASE<"xvmulwev.d.wu", int_loongarch_lasx_xvmulwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWEV_Q_DU : LASX_3R<0b01110100100110011>,
++                    LASX_3R_DESC_BASE<"xvmulwev.q.du", int_loongarch_lasx_xvmulwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWOD_H_BU : LASX_3R<0b01110100100110100>,
++                    LASX_3R_DESC_BASE<"xvmulwod.h.bu", int_loongarch_lasx_xvmulwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWOD_W_HU : LASX_3R<0b01110100100110101>,
++                    LASX_3R_DESC_BASE<"xvmulwod.w.hu", int_loongarch_lasx_xvmulwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWOD_D_WU : LASX_3R<0b01110100100110110>,
++                    LASX_3R_DESC_BASE<"xvmulwod.d.wu", int_loongarch_lasx_xvmulwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWOD_Q_DU : LASX_3R<0b01110100100110111>,
++                    LASX_3R_DESC_BASE<"xvmulwod.q.du", int_loongarch_lasx_xvmulwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWEV_H_BU_B : LASX_3R<0b01110100101000000>,
++                      LASX_3R_DESC_BASE<"xvmulwev.h.bu.b", int_loongarch_lasx_xvmulwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWEV_W_HU_H : LASX_3R<0b01110100101000001>,
++                      LASX_3R_DESC_BASE<"xvmulwev.w.hu.h", int_loongarch_lasx_xvmulwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWEV_D_WU_W : LASX_3R<0b01110100101000010>,
++                      LASX_3R_DESC_BASE<"xvmulwev.d.wu.w", int_loongarch_lasx_xvmulwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWEV_Q_DU_D : LASX_3R<0b01110100101000011>,
++                      LASX_3R_DESC_BASE<"xvmulwev.q.du.d", int_loongarch_lasx_xvmulwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMULWOD_H_BU_B : LASX_3R<0b01110100101000100>,
++                      LASX_3R_DESC_BASE<"xvmulwod.h.bu.b", int_loongarch_lasx_xvmulwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMULWOD_W_HU_H : LASX_3R<0b01110100101000101>,
++                      LASX_3R_DESC_BASE<"xvmulwod.w.hu.h", int_loongarch_lasx_xvmulwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMULWOD_D_WU_W : LASX_3R<0b01110100101000110>,
++                      LASX_3R_DESC_BASE<"xvmulwod.d.wu.w", int_loongarch_lasx_xvmulwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ;
++
++def XVMULWOD_Q_DU_D : LASX_3R<0b01110100101000111>,
++                      LASX_3R_DESC_BASE<"xvmulwod.q.du.d", int_loongarch_lasx_xvmulwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADD_B : LASX_3R<0b01110100101010000>,
++               LASX_3R_4R_DESC_BASE<"xvmadd.b", muladd, LASX256BOpnd>;
++
++def XVMADD_H : LASX_3R<0b01110100101010001>,
++               LASX_3R_4R_DESC_BASE<"xvmadd.h", muladd, LASX256HOpnd>;
++
++def XVMADD_W : LASX_3R<0b01110100101010010>,
++               LASX_3R_4R_DESC_BASE<"xvmadd.w", muladd, LASX256WOpnd>;
++
++def XVMADD_D : LASX_3R<0b01110100101010011>,
++               LASX_3R_4R_DESC_BASE<"xvmadd.d", muladd, LASX256DOpnd>;
++
++
++def XVMSUB_B : LASX_3R<0b01110100101010100>,
++               LASX_3R_4R_DESC_BASE<"xvmsub.b", mulsub, LASX256BOpnd>;
++
++def XVMSUB_H : LASX_3R<0b01110100101010101>,
++               LASX_3R_4R_DESC_BASE<"xvmsub.h", mulsub, LASX256HOpnd>;
++
++def XVMSUB_W : LASX_3R<0b01110100101010110>,
++               LASX_3R_4R_DESC_BASE<"xvmsub.w", mulsub, LASX256WOpnd>;
++
++def XVMSUB_D : LASX_3R<0b01110100101010111>,
++               LASX_3R_4R_DESC_BASE<"xvmsub.d", mulsub, LASX256DOpnd>;
++
++
++def XVMADDWEV_H_B : LASX_3R<0b01110100101011000>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwev.h.b", int_loongarch_lasx_xvmaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWEV_W_H : LASX_3R<0b01110100101011001>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwev.w.h", int_loongarch_lasx_xvmaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWEV_D_W : LASX_3R<0b01110100101011010>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwev.d.w", int_loongarch_lasx_xvmaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWEV_Q_D : LASX_3R<0b01110100101011011>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwev.q.d", int_loongarch_lasx_xvmaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADDWOD_H_B : LASX_3R<0b01110100101011100>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwod.h.b", int_loongarch_lasx_xvmaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWOD_W_H : LASX_3R<0b01110100101011101>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwod.w.h", int_loongarch_lasx_xvmaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWOD_D_W : LASX_3R<0b01110100101011110>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwod.d.w", int_loongarch_lasx_xvmaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWOD_Q_D : LASX_3R<0b01110100101011111>,
++                    LASX_3R_4R_DESC_BASE<"xvmaddwod.q.d", int_loongarch_lasx_xvmaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADDWEV_H_BU : LASX_3R<0b01110100101101000>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu", int_loongarch_lasx_xvmaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWEV_W_HU : LASX_3R<0b01110100101101001>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu", int_loongarch_lasx_xvmaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWEV_D_WU : LASX_3R<0b01110100101101010>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu", int_loongarch_lasx_xvmaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWEV_Q_DU : LASX_3R<0b01110100101101011>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du", int_loongarch_lasx_xvmaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADDWOD_H_BU : LASX_3R<0b01110100101101100>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu", int_loongarch_lasx_xvmaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWOD_W_HU : LASX_3R<0b01110100101101101>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu", int_loongarch_lasx_xvmaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWOD_D_WU : LASX_3R<0b01110100101101110>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu", int_loongarch_lasx_xvmaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWOD_Q_DU : LASX_3R<0b01110100101101111>,
++                     LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du", int_loongarch_lasx_xvmaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADDWEV_H_BU_B : LASX_3R<0b01110100101111000>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu.b", int_loongarch_lasx_xvmaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWEV_W_HU_H : LASX_3R<0b01110100101111001>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu.h", int_loongarch_lasx_xvmaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWEV_D_WU_W : LASX_3R<0b01110100101111010>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu.w", int_loongarch_lasx_xvmaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWEV_Q_DU_D : LASX_3R<0b01110100101111011>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du.d", int_loongarch_lasx_xvmaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVMADDWOD_H_BU_B : LASX_3R<0b01110100101111100>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu.b", int_loongarch_lasx_xvmaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>;
++
++def XVMADDWOD_W_HU_H : LASX_3R<0b01110100101111101>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu.h", int_loongarch_lasx_xvmaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVMADDWOD_D_WU_W : LASX_3R<0b01110100101111110>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu.w", int_loongarch_lasx_xvmaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVMADDWOD_Q_DU_D : LASX_3R<0b01110100101111111>,
++                       LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du.d", int_loongarch_lasx_xvmaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVDIV_B : LASX_3R<0b01110100111000000>,
++              LASX_3R_DESC_BASE<"xvdiv.b", sdiv, LASX256BOpnd>;
++
++def XVDIV_H : LASX_3R<0b01110100111000001>,
++              LASX_3R_DESC_BASE<"xvdiv.h", sdiv, LASX256HOpnd>;
++
++def XVDIV_W : LASX_3R<0b01110100111000010>,
++              LASX_3R_DESC_BASE<"xvdiv.w", sdiv, LASX256WOpnd>;
++
++def XVDIV_D : LASX_3R<0b01110100111000011>,
++              LASX_3R_DESC_BASE<"xvdiv.d", sdiv, LASX256DOpnd>;
++
++
++def XVMOD_B : LASX_3R<0b01110100111000100>,
++              LASX_3R_DESC_BASE<"xvmod.b", srem, LASX256BOpnd>;
++
++def XVMOD_H : LASX_3R<0b01110100111000101>,
++              LASX_3R_DESC_BASE<"xvmod.h", srem, LASX256HOpnd>;
++
++def XVMOD_W : LASX_3R<0b01110100111000110>,
++              LASX_3R_DESC_BASE<"xvmod.w", srem, LASX256WOpnd>;
++
++def XVMOD_D : LASX_3R<0b01110100111000111>,
++              LASX_3R_DESC_BASE<"xvmod.d", srem, LASX256DOpnd>;
++
++
++def XVDIV_BU : LASX_3R<0b01110100111001000>,
++               LASX_3R_DESC_BASE<"xvdiv.bu", udiv, LASX256BOpnd>;
++
++def XVDIV_HU : LASX_3R<0b01110100111001001>,
++               LASX_3R_DESC_BASE<"xvdiv.hu", udiv, LASX256HOpnd>;
++
++def XVDIV_WU : LASX_3R<0b01110100111001010>,
++               LASX_3R_DESC_BASE<"xvdiv.wu", udiv, LASX256WOpnd>;
++
++def XVDIV_DU : LASX_3R<0b01110100111001011>,
++               LASX_3R_DESC_BASE<"xvdiv.du", udiv, LASX256DOpnd>;
++
++
++def XVMOD_BU : LASX_3R<0b01110100111001100>,
++               LASX_3R_DESC_BASE<"xvmod.bu", urem, LASX256BOpnd>;
++
++def XVMOD_HU : LASX_3R<0b01110100111001101>,
++               LASX_3R_DESC_BASE<"xvmod.hu", urem, LASX256HOpnd>;
++
++def XVMOD_WU : LASX_3R<0b01110100111001110>,
++               LASX_3R_DESC_BASE<"xvmod.wu", urem, LASX256WOpnd>;
++
++def XVMOD_DU : LASX_3R<0b01110100111001111>,
++               LASX_3R_DESC_BASE<"xvmod.du", urem, LASX256DOpnd>;
++
++
++def XVSLL_B : LASX_3R<0b01110100111010000>,
++              LASX_3R_DESC_BASE<"xvsll.b", shl, LASX256BOpnd>;
++
++def XVSLL_H : LASX_3R<0b01110100111010001>,
++              LASX_3R_DESC_BASE<"xvsll.h", shl, LASX256HOpnd>;
++
++def XVSLL_W : LASX_3R<0b01110100111010010>,
++              LASX_3R_DESC_BASE<"xvsll.w", shl, LASX256WOpnd>;
++
++def XVSLL_D : LASX_3R<0b01110100111010011>,
++              LASX_3R_DESC_BASE<"xvsll.d", shl, LASX256DOpnd>;
++
++
++def XVSRL_B : LASX_3R<0b01110100111010100>,
++              LASX_3R_DESC_BASE<"xvsrl.b", srl, LASX256BOpnd>;
++
++def XVSRL_H : LASX_3R<0b01110100111010101>,
++              LASX_3R_DESC_BASE<"xvsrl.h", srl, LASX256HOpnd>;
++
++def XVSRL_W : LASX_3R<0b01110100111010110>,
++              LASX_3R_DESC_BASE<"xvsrl.w", srl, LASX256WOpnd>;
++
++def XVSRL_D : LASX_3R<0b01110100111010111>,
++              LASX_3R_DESC_BASE<"xvsrl.d", srl, LASX256DOpnd>;
++
++
++def XVSRA_B : LASX_3R<0b01110100111011000>,
++              LASX_3R_DESC_BASE<"xvsra.b", sra, LASX256BOpnd>;
++
++def XVSRA_H : LASX_3R<0b01110100111011001>,
++              LASX_3R_DESC_BASE<"xvsra.h", sra, LASX256HOpnd>;
++
++def XVSRA_W : LASX_3R<0b01110100111011010>,
++              LASX_3R_DESC_BASE<"xvsra.w", sra, LASX256WOpnd>;
++
++def XVSRA_D : LASX_3R<0b01110100111011011>,
++              LASX_3R_DESC_BASE<"xvsra.d", sra, LASX256DOpnd>;
++
++
++def XVROTR_B : LASX_3R<0b01110100111011100>,
++               LASX_3R_DESC_BASE<"xvrotr.b", int_loongarch_lasx_xvrotr_b, LASX256BOpnd>;
++
++def XVROTR_H : LASX_3R<0b01110100111011101>,
++               LASX_3R_DESC_BASE<"xvrotr.h", int_loongarch_lasx_xvrotr_h, LASX256HOpnd>;
++
++def XVROTR_W : LASX_3R<0b01110100111011110>,
++               LASX_3R_DESC_BASE<"xvrotr.w", int_loongarch_lasx_xvrotr_w, LASX256WOpnd>;
++
++def XVROTR_D : LASX_3R<0b01110100111011111>,
++               LASX_3R_DESC_BASE<"xvrotr.d", int_loongarch_lasx_xvrotr_d, LASX256DOpnd>;
++
++
++def XVSRLR_B : LASX_3R<0b01110100111100000>,
++               LASX_3R_DESC_BASE<"xvsrlr.b", int_loongarch_lasx_xvsrlr_b, LASX256BOpnd>;
++
++def XVSRLR_H : LASX_3R<0b01110100111100001>,
++               LASX_3R_DESC_BASE<"xvsrlr.h", int_loongarch_lasx_xvsrlr_h, LASX256HOpnd>;
++
++def XVSRLR_W : LASX_3R<0b01110100111100010>,
++               LASX_3R_DESC_BASE<"xvsrlr.w", int_loongarch_lasx_xvsrlr_w, LASX256WOpnd>;
++
++def XVSRLR_D : LASX_3R<0b01110100111100011>,
++               LASX_3R_DESC_BASE<"xvsrlr.d", int_loongarch_lasx_xvsrlr_d, LASX256DOpnd>;
++
++
++def XVSRAR_B : LASX_3R<0b01110100111100100>,
++               LASX_3R_DESC_BASE<"xvsrar.b", int_loongarch_lasx_xvsrar_b, LASX256BOpnd>;
++
++def XVSRAR_H : LASX_3R<0b01110100111100101>,
++               LASX_3R_DESC_BASE<"xvsrar.h", int_loongarch_lasx_xvsrar_h, LASX256HOpnd>;
++
++def XVSRAR_W : LASX_3R<0b01110100111100110>,
++               LASX_3R_DESC_BASE<"xvsrar.w", int_loongarch_lasx_xvsrar_w, LASX256WOpnd>;
++
++def XVSRAR_D : LASX_3R<0b01110100111100111>,
++               LASX_3R_DESC_BASE<"xvsrar.d", int_loongarch_lasx_xvsrar_d, LASX256DOpnd>;
++
++
++def XVSRLN_B_H : LASX_3R<0b01110100111101001>,
++                 LASX_3R_DESC_BASE<"xvsrln.b.h", int_loongarch_lasx_xvsrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSRLN_H_W : LASX_3R<0b01110100111101010>,
++                 LASX_3R_DESC_BASE<"xvsrln.h.w", int_loongarch_lasx_xvsrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSRLN_W_D : LASX_3R<0b01110100111101011>,
++                 LASX_3R_DESC_BASE<"xvsrln.w.d", int_loongarch_lasx_xvsrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSRAN_B_H : LASX_3R<0b01110100111101101>,
++                 LASX_3R_DESC_BASE<"xvsran.b.h", int_loongarch_lasx_xvsran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSRAN_H_W : LASX_3R<0b01110100111101110>,
++                 LASX_3R_DESC_BASE<"xvsran.h.w", int_loongarch_lasx_xvsran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSRAN_W_D : LASX_3R<0b01110100111101111>,
++                 LASX_3R_DESC_BASE<"xvsran.w.d", int_loongarch_lasx_xvsran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSRLRN_B_H : LASX_3R<0b01110100111110001>,
++                  LASX_3R_DESC_BASE<"xvsrlrn.b.h", int_loongarch_lasx_xvsrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSRLRN_H_W : LASX_3R<0b01110100111110010>,
++                  LASX_3R_DESC_BASE<"xvsrlrn.h.w", int_loongarch_lasx_xvsrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSRLRN_W_D : LASX_3R<0b01110100111110011>,
++                  LASX_3R_DESC_BASE<"xvsrlrn.w.d", int_loongarch_lasx_xvsrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSRARN_B_H : LASX_3R<0b01110100111110101>,
++                  LASX_3R_DESC_BASE<"xvsrarn.b.h", int_loongarch_lasx_xvsrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSRARN_H_W : LASX_3R<0b01110100111110110>,
++                  LASX_3R_DESC_BASE<"xvsrarn.h.w", int_loongarch_lasx_xvsrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSRARN_W_D : LASX_3R<0b01110100111110111>,
++                  LASX_3R_DESC_BASE<"xvsrarn.w.d", int_loongarch_lasx_xvsrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRLN_B_H : LASX_3R<0b01110100111111001>,
++                  LASX_3R_DESC_BASE<"xvssrln.b.h", int_loongarch_lasx_xvssrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRLN_H_W : LASX_3R<0b01110100111111010>,
++                  LASX_3R_DESC_BASE<"xvssrln.h.w", int_loongarch_lasx_xvssrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRLN_W_D : LASX_3R<0b01110100111111011>,
++                  LASX_3R_DESC_BASE<"xvssrln.w.d", int_loongarch_lasx_xvssrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRAN_B_H : LASX_3R<0b01110100111111101>,
++                  LASX_3R_DESC_BASE<"xvssran.b.h", int_loongarch_lasx_xvssran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRAN_H_W : LASX_3R<0b01110100111111110>,
++                  LASX_3R_DESC_BASE<"xvssran.h.w", int_loongarch_lasx_xvssran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRAN_W_D : LASX_3R<0b01110100111111111>,
++                  LASX_3R_DESC_BASE<"xvssran.w.d", int_loongarch_lasx_xvssran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRLRN_B_H : LASX_3R<0b01110101000000001>,
++                   LASX_3R_DESC_BASE<"xvssrlrn.b.h", int_loongarch_lasx_xvssrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRLRN_H_W : LASX_3R<0b01110101000000010>,
++                   LASX_3R_DESC_BASE<"xvssrlrn.h.w", int_loongarch_lasx_xvssrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRLRN_W_D : LASX_3R<0b01110101000000011>,
++                   LASX_3R_DESC_BASE<"xvssrlrn.w.d", int_loongarch_lasx_xvssrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRARN_B_H : LASX_3R<0b01110101000000101>,
++                   LASX_3R_DESC_BASE<"xvssrarn.b.h", int_loongarch_lasx_xvssrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRARN_H_W : LASX_3R<0b01110101000000110>,
++                   LASX_3R_DESC_BASE<"xvssrarn.h.w", int_loongarch_lasx_xvssrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRARN_W_D : LASX_3R<0b01110101000000111>,
++                   LASX_3R_DESC_BASE<"xvssrarn.w.d", int_loongarch_lasx_xvssrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRLN_BU_H : LASX_3R<0b01110101000001001>,
++                   LASX_3R_DESC_BASE<"xvssrln.bu.h", int_loongarch_lasx_xvssrln_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRLN_HU_W : LASX_3R<0b01110101000001010>,
++                   LASX_3R_DESC_BASE<"xvssrln.hu.w", int_loongarch_lasx_xvssrln_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRLN_WU_D : LASX_3R<0b01110101000001011>,
++                   LASX_3R_DESC_BASE<"xvssrln.wu.d", int_loongarch_lasx_xvssrln_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRAN_BU_H : LASX_3R<0b01110101000001101>,
++                   LASX_3R_DESC_BASE<"xvssran.bu.h", int_loongarch_lasx_xvssran_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRAN_HU_W : LASX_3R<0b01110101000001110>,
++                   LASX_3R_DESC_BASE<"xvssran.hu.w", int_loongarch_lasx_xvssran_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRAN_WU_D : LASX_3R<0b01110101000001111>,
++                   LASX_3R_DESC_BASE<"xvssran.wu.d", int_loongarch_lasx_xvssran_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRLRN_BU_H : LASX_3R<0b01110101000010001>,
++                    LASX_3R_DESC_BASE<"xvssrlrn.bu.h", int_loongarch_lasx_xvssrlrn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRLRN_HU_W : LASX_3R<0b01110101000010010>,
++                    LASX_3R_DESC_BASE<"xvssrlrn.hu.w", int_loongarch_lasx_xvssrlrn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRLRN_WU_D : LASX_3R<0b01110101000010011>,
++                    LASX_3R_DESC_BASE<"xvssrlrn.wu.d", int_loongarch_lasx_xvssrlrn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRARN_BU_H : LASX_3R<0b01110101000010101>,
++                    LASX_3R_DESC_BASE<"xvssrarn.bu.h", int_loongarch_lasx_xvssrarn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRARN_HU_W : LASX_3R<0b01110101000010110>,
++                    LASX_3R_DESC_BASE<"xvssrarn.hu.w", int_loongarch_lasx_xvssrarn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRARN_WU_D : LASX_3R<0b01110101000010111>,
++                    LASX_3R_DESC_BASE<"xvssrarn.wu.d", int_loongarch_lasx_xvssrarn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVBITCLR_B : LASX_3R<0b01110101000011000>,
++                 LASX_3R_DESC_BASE<"xvbitclr.b", xvbitclr_b, LASX256BOpnd>;
++
++def XVBITCLR_H : LASX_3R<0b01110101000011001>,
++                 LASX_3R_DESC_BASE<"xvbitclr.h", xvbitclr_h, LASX256HOpnd>;
++
++def XVBITCLR_W : LASX_3R<0b01110101000011010>,
++                 LASX_3R_DESC_BASE<"xvbitclr.w", xvbitclr_w, LASX256WOpnd>;
++
++def XVBITCLR_D : LASX_3R<0b01110101000011011>,
++                 LASX_3R_DESC_BASE<"xvbitclr.d", xvbitclr_d, LASX256DOpnd>;
++
++
++def XVBITSET_B : LASX_3R<0b01110101000011100>,
++                 LASX_3R_DESC_BASE<"xvbitset.b", int_loongarch_lasx_xvbitset_b, LASX256BOpnd>;
++
++def XVBITSET_H : LASX_3R<0b01110101000011101>,
++                 LASX_3R_DESC_BASE<"xvbitset.h", int_loongarch_lasx_xvbitset_h, LASX256HOpnd>;
++
++def XVBITSET_W : LASX_3R<0b01110101000011110>,
++                 LASX_3R_DESC_BASE<"xvbitset.w", int_loongarch_lasx_xvbitset_w, LASX256WOpnd>;
++
++def XVBITSET_D : LASX_3R<0b01110101000011111>,
++                 LASX_3R_DESC_BASE<"xvbitset.d", int_loongarch_lasx_xvbitset_d, LASX256DOpnd>;
++
++
++def XVBITREV_B : LASX_3R<0b01110101000100000>,
++                 LASX_3R_DESC_BASE<"xvbitrev.b", int_loongarch_lasx_xvbitrev_b, LASX256BOpnd>;
++
++def XVBITREV_H : LASX_3R<0b01110101000100001>,
++                 LASX_3R_DESC_BASE<"xvbitrev.h", int_loongarch_lasx_xvbitrev_h, LASX256HOpnd>;
++
++def XVBITREV_W : LASX_3R<0b01110101000100010>,
++                 LASX_3R_DESC_BASE<"xvbitrev.w", int_loongarch_lasx_xvbitrev_w, LASX256WOpnd>;
++
++def XVBITREV_D : LASX_3R<0b01110101000100011>,
++                 LASX_3R_DESC_BASE<"xvbitrev.d", int_loongarch_lasx_xvbitrev_d, LASX256DOpnd>;
++
++
++def XVPACKEV_B : LASX_3R<0b01110101000101100>,
++                 LASX_3R_DESC_BASE<"xvpackev.b", LoongArchVPACKEV, LASX256BOpnd>;
++
++def XVPACKEV_H : LASX_3R<0b01110101000101101>,
++                 LASX_3R_DESC_BASE<"xvpackev.h", LoongArchVPACKEV, LASX256HOpnd>;
++
++def XVPACKEV_W : LASX_3R<0b01110101000101110>,
++                 LASX_3R_DESC_BASE<"xvpackev.w", LoongArchVPACKEV, LASX256WOpnd>;
++
++def XVPACKEV_D : LASX_3R<0b01110101000101111>,
++                 LASX_3R_DESC_BASE<"xvpackev.d", LoongArchVPACKEV, LASX256DOpnd>;
++
++
++def XVPACKOD_B : LASX_3R<0b01110101000110000>,
++                 LASX_3R_DESC_BASE<"xvpackod.b", LoongArchVPACKOD, LASX256BOpnd>;
++
++def XVPACKOD_H : LASX_3R<0b01110101000110001>,
++                 LASX_3R_DESC_BASE<"xvpackod.h", LoongArchVPACKOD, LASX256HOpnd>;
++
++def XVPACKOD_W : LASX_3R<0b01110101000110010>,
++                 LASX_3R_DESC_BASE<"xvpackod.w", LoongArchVPACKOD, LASX256WOpnd>;
++
++def XVPACKOD_D : LASX_3R<0b01110101000110011>,
++                 LASX_3R_DESC_BASE<"xvpackod.d", LoongArchVPACKOD, LASX256DOpnd>;
++
++
++def XVILVL_B : LASX_3R<0b01110101000110100>,
++               LASX_3R_DESC_BASE<"xvilvl.b", LoongArchVILVL, LASX256BOpnd>;
++
++def XVILVL_H : LASX_3R<0b01110101000110101>,
++               LASX_3R_DESC_BASE<"xvilvl.h", LoongArchVILVL, LASX256HOpnd>;
++
++def XVILVL_W : LASX_3R<0b01110101000110110>,
++               LASX_3R_DESC_BASE<"xvilvl.w", LoongArchVILVL, LASX256WOpnd>;
++
++def XVILVL_D : LASX_3R<0b01110101000110111>,
++               LASX_3R_DESC_BASE<"xvilvl.d", LoongArchVILVL, LASX256DOpnd>;
++
++
++def XVILVH_B : LASX_3R<0b01110101000111000>,
++               LASX_3R_DESC_BASE<"xvilvh.b", LoongArchVILVH, LASX256BOpnd>;
++
++def XVILVH_H : LASX_3R<0b01110101000111001>,
++               LASX_3R_DESC_BASE<"xvilvh.h", LoongArchVILVH, LASX256HOpnd>;
++
++def XVILVH_W : LASX_3R<0b01110101000111010>,
++               LASX_3R_DESC_BASE<"xvilvh.w", LoongArchVILVH, LASX256WOpnd>;
++
++def XVILVH_D : LASX_3R<0b01110101000111011>,
++               LASX_3R_DESC_BASE<"xvilvh.d", LoongArchVILVH, LASX256DOpnd>;
++
++
++def XVPICKEV_B : LASX_3R<0b01110101000111100>,
++                 LASX_3R_DESC_BASE<"xvpickev.b", LoongArchVPICKEV, LASX256BOpnd>;
++
++def XVPICKEV_H : LASX_3R<0b01110101000111101>,
++                 LASX_3R_DESC_BASE<"xvpickev.h", LoongArchVPICKEV, LASX256HOpnd>;
++
++def XVPICKEV_W : LASX_3R<0b01110101000111110>,
++                 LASX_3R_DESC_BASE<"xvpickev.w", LoongArchVPICKEV, LASX256WOpnd>;
++
++def XVPICKEV_D : LASX_3R<0b01110101000111111>,
++                 LASX_3R_DESC_BASE<"xvpickev.d", LoongArchVPICKEV, LASX256DOpnd>;
++
++
++def XVPICKOD_B : LASX_3R<0b01110101001000000>,
++                 LASX_3R_DESC_BASE<"xvpickod.b", LoongArchVPICKOD, LASX256BOpnd>;
++
++def XVPICKOD_H : LASX_3R<0b01110101001000001>,
++                 LASX_3R_DESC_BASE<"xvpickod.h", LoongArchVPICKOD, LASX256HOpnd>;
++
++def XVPICKOD_W : LASX_3R<0b01110101001000010>,
++                 LASX_3R_DESC_BASE<"xvpickod.w", LoongArchVPICKOD, LASX256WOpnd>;
++
++def XVPICKOD_D : LASX_3R<0b01110101001000011>,
++                 LASX_3R_DESC_BASE<"xvpickod.d", LoongArchVPICKOD, LASX256DOpnd>;
++
++
++def XVREPLVE_B : LASX_3R_1GP<0b01110101001000100>,
++                 LASX_3R_VREPLVE_DESC_BASE<"xvreplve.b", int_loongarch_lasx_xvreplve_b, LASX256BOpnd>;
++
++def XVREPLVE_H : LASX_3R_1GP<0b01110101001000101>,
++                 LASX_3R_VREPLVE_DESC_BASE<"xvreplve.h", int_loongarch_lasx_xvreplve_h, LASX256HOpnd>;
++
++def XVREPLVE_W : LASX_3R_1GP<0b01110101001000110>,
++                 LASX_3R_VREPLVE_DESC_BASE<"xvreplve.w", int_loongarch_lasx_xvreplve_w, LASX256WOpnd>;
++
++def XVREPLVE_D : LASX_3R_1GP<0b01110101001000111>,
++                 LASX_3R_VREPLVE_DESC_BASE<"xvreplve.d", int_loongarch_lasx_xvreplve_d, LASX256DOpnd>;
++
++
++def XVAND_V : LASX_3R<0b01110101001001100>,
++              LASX_VEC_DESC_BASE<"xvand.v", and, LASX256BOpnd>;
++class XAND_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<and, LASX256HOpnd>;
++class XAND_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<and, LASX256WOpnd>;
++class XAND_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<and, LASX256DOpnd>;
++
++def XAND_V_H_PSEUDO : XAND_V_H_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XAND_V_W_PSEUDO : XAND_V_W_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XAND_V_D_PSEUDO : XAND_V_D_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++
++
++def XVOR_V : LASX_3R<0b01110101001001101>,
++            LASX_VEC_DESC_BASE<"xvor.v", or, LASX256BOpnd>;
++class X_OR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<or, LASX256HOpnd>;
++class X_OR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<or, LASX256WOpnd>;
++class X_OR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<or, LASX256DOpnd>;
++
++def X_OR_V_H_PSEUDO : X_OR_V_H_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd,
++                                                  LASX256BOpnd:$xj,
++                                                  LASX256BOpnd:$xk)>;
++def X_OR_V_W_PSEUDO : X_OR_V_W_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd,
++                                                  LASX256BOpnd:$xj,
++                                                  LASX256BOpnd:$xk)>;
++def X_OR_V_D_PSEUDO : X_OR_V_D_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd,
++                                                  LASX256BOpnd:$xj,
++                                                  LASX256BOpnd:$xk)>;
++
++
++def XVXOR_V : LASX_3R<0b01110101001001110>,
++              LASX_VEC_DESC_BASE<"xvxor.v", xor, LASX256BOpnd>;
++class XXOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<xor, LASX256HOpnd>;
++class XXOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<xor, LASX256WOpnd>;
++class XXOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<xor, LASX256DOpnd>;
++
++def XXOR_V_H_PSEUDO : XXOR_V_H_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XXOR_V_W_PSEUDO : XXOR_V_W_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XXOR_V_D_PSEUDO : XXOR_V_D_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++
++
++def XVNOR_V : LASX_3R<0b01110101001001111>,
++              LASX_VEC_DESC_BASE<"xvnor.v", LoongArchVNOR, LASX256BOpnd>;
++
++class XNOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<LoongArchVNOR, LASX256HOpnd>;
++class XNOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<LoongArchVNOR, LASX256WOpnd>;
++class XNOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE<LoongArchVNOR, LASX256DOpnd>;
++
++def XNOR_V_H_PSEUDO : XNOR_V_H_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XNOR_V_W_PSEUDO : XNOR_V_W_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++def XNOR_V_D_PSEUDO : XNOR_V_D_PSEUDO_DESC,
++                      PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd,
++                                                   LASX256BOpnd:$xj,
++                                                   LASX256BOpnd:$xk)>;
++
++
++def XVANDN_V : LASX_3R<0b01110101001010000>,
++               LASX_3R_DESC_BASE<"xvandn.v", int_loongarch_lasx_xvandn_v, LASX256BOpnd>;
++
++
++class LASX_ANDN_PSEUDO_BASE<RegisterOperand RO> :
++                            LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk),
++                            []>,
++                            PseudoInstExpansion<(XVANDN_V LASX256BOpnd:$xd,
++                                                          LASX256BOpnd:$xj,
++                                                          LASX256BOpnd:$xk)>;
++
++def XVANDN_H_PSEUDO  : LASX_ANDN_PSEUDO_BASE<LASX256HOpnd>;
++def XVANDN_W_PSEUDO  : LASX_ANDN_PSEUDO_BASE<LASX256WOpnd>;
++def XVANDN_D_PSEUDO  : LASX_ANDN_PSEUDO_BASE<LASX256DOpnd>;
++
++
++def XVORN_V : LASX_3R<0b01110101001010001>,
++              LASX_3R_DESC_BASE<"xvorn.v", int_loongarch_lasx_xvorn_v, LASX256BOpnd>;
++
++
++class LASX_ORN_PSEUDO_BASE<RegisterOperand RO> :
++                            LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk),
++                            []>,
++                            PseudoInstExpansion<(XVORN_V LASX256BOpnd:$xd,
++                                                         LASX256BOpnd:$xj,
++                                                         LASX256BOpnd:$xk)>;
++
++def XVORN_H_PSEUDO  : LASX_ORN_PSEUDO_BASE<LASX256HOpnd>;
++def XVORN_W_PSEUDO  : LASX_ORN_PSEUDO_BASE<LASX256WOpnd>;
++def XVORN_D_PSEUDO  : LASX_ORN_PSEUDO_BASE<LASX256DOpnd>;
++
++
++def XVFRSTP_B : LASX_3R<0b01110101001010110>,
++                LASX_3R_4R_DESC_BASE<"xvfrstp.b", int_loongarch_lasx_xvfrstp_b, LASX256BOpnd>;
++
++def XVFRSTP_H : LASX_3R<0b01110101001010111>,
++                LASX_3R_4R_DESC_BASE<"xvfrstp.h", int_loongarch_lasx_xvfrstp_h, LASX256HOpnd>;
++
++
++def XVADD_Q : LASX_3R<0b01110101001011010>, IsCommutable,
++              LASX_3R_DESC_BASE<"xvadd.q", int_loongarch_lasx_xvadd_q, LASX256DOpnd>;
++
++def XVSUB_Q : LASX_3R<0b01110101001011011>,
++              LASX_3R_DESC_BASE<"xvsub.q", int_loongarch_lasx_xvsub_q, LASX256DOpnd>;
++
++
++def XVSIGNCOV_B : LASX_3R<0b01110101001011100>,
++                  LASX_3R_DESC_BASE<"xvsigncov.b", int_loongarch_lasx_xvsigncov_b, LASX256BOpnd>;
++
++def XVSIGNCOV_H : LASX_3R<0b01110101001011101>,
++                  LASX_3R_DESC_BASE<"xvsigncov.h", int_loongarch_lasx_xvsigncov_h, LASX256HOpnd>;
++
++def XVSIGNCOV_W : LASX_3R<0b01110101001011110>,
++                  LASX_3R_DESC_BASE<"xvsigncov.w", int_loongarch_lasx_xvsigncov_w, LASX256WOpnd>;
++
++def XVSIGNCOV_D : LASX_3R<0b01110101001011111>,
++                  LASX_3R_DESC_BASE<"xvsigncov.d", int_loongarch_lasx_xvsigncov_d, LASX256DOpnd>;
++
++
++def XVFADD_S : LASX_3R<0b01110101001100001>, IsCommutable,
++               LASX_3RF_DESC_BASE<"xvfadd.s", fadd, LASX256WOpnd>;
++
++def XVFADD_D : LASX_3R<0b01110101001100010>, IsCommutable,
++               LASX_3RF_DESC_BASE<"xvfadd.d", fadd, LASX256DOpnd>;
++
++
++def XVFSUB_S : LASX_3R<0b01110101001100101>,
++               LASX_3RF_DESC_BASE<"xvfsub.s", fsub, LASX256WOpnd>;
++
++def XVFSUB_D : LASX_3R<0b01110101001100110>,
++               LASX_3RF_DESC_BASE<"xvfsub.d", fsub, LASX256DOpnd>;
++
++
++def XVFMUL_S : LASX_3R<0b01110101001110001>,
++               LASX_3RF_DESC_BASE<"xvfmul.s", fmul, LASX256WOpnd>;
++
++def XVFMUL_D : LASX_3R<0b01110101001110010>,
++               LASX_3RF_DESC_BASE<"xvfmul.d", fmul, LASX256DOpnd>;
++
++
++def XVFDIV_S : LASX_3R<0b01110101001110101>,
++               LASX_3RF_DESC_BASE<"xvfdiv.s", fdiv, LASX256WOpnd>;
++
++def XVFDIV_D : LASX_3R<0b01110101001110110>,
++               LASX_3RF_DESC_BASE<"xvfdiv.d", fdiv, LASX256DOpnd>;
++
++
++def XVFMAX_S : LASX_3R<0b01110101001111001>,
++               LASX_3RF_DESC_BASE<"xvfmax.s", int_loongarch_lasx_xvfmax_s, LASX256WOpnd>;
++
++def XVFMAX_D : LASX_3R<0b01110101001111010>,
++               LASX_3RF_DESC_BASE<"xvfmax.d", int_loongarch_lasx_xvfmax_d, LASX256DOpnd>;
++
++
++def XVFMIN_S : LASX_3R<0b01110101001111101>,
++               LASX_3RF_DESC_BASE<"xvfmin.s", int_loongarch_lasx_xvfmin_s, LASX256WOpnd>;
++
++def XVFMIN_D : LASX_3R<0b01110101001111110>,
++               LASX_3RF_DESC_BASE<"xvfmin.d", int_loongarch_lasx_xvfmin_d, LASX256DOpnd>;
++
++
++def XVFMAXA_S : LASX_3R<0b01110101010000001>,
++                LASX_3RF_DESC_BASE<"xvfmaxa.s", int_loongarch_lasx_xvfmaxa_s, LASX256WOpnd>;
++
++def XVFMAXA_D : LASX_3R<0b01110101010000010>,
++                LASX_3RF_DESC_BASE<"xvfmaxa.d", int_loongarch_lasx_xvfmaxa_d, LASX256DOpnd>;
++
++
++def XVFMINA_S : LASX_3R<0b01110101010000101>,
++                LASX_3RF_DESC_BASE<"xvfmina.s", int_loongarch_lasx_xvfmina_s, LASX256WOpnd>;
++
++def XVFMINA_D : LASX_3R<0b01110101010000110>,
++                LASX_3RF_DESC_BASE<"xvfmina.d", int_loongarch_lasx_xvfmina_d, LASX256DOpnd>;
++
++
++def XVFCVT_H_S : LASX_3R<0b01110101010001100>,
++                 LASX_3RF_DESC_BASE<"xvfcvt.h.s", int_loongarch_lasx_xvfcvt_h_s, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>;
++
++def XVFCVT_S_D : LASX_3R<0b01110101010001101>,
++                 LASX_3RF_DESC_BASE1<"xvfcvt.s.d", int_loongarch_lasx_xvfcvt_s_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVFFINT_S_L : LASX_3R<0b01110101010010000>,
++                  LASX_3RF_DESC_BASE<"xvffint.s.l", int_loongarch_lasx_xvffint_s_l, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++def XVFTINT_W_D : LASX_3R<0b01110101010010011>,
++                  LASX_3RF_DESC_BASE<"xvftint.w.d", int_loongarch_lasx_xvftint_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVFTINTRM_W_D : LASX_3R<0b01110101010010100>,
++                    LASX_3RF_DESC_BASE<"xvftintrm.w.d", int_loongarch_lasx_xvftintrm_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++def XVFTINTRP_W_D : LASX_3R<0b01110101010010101>,
++                    LASX_3RF_DESC_BASE<"xvftintrp.w.d", int_loongarch_lasx_xvftintrp_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++def XVFTINTRZ_W_D : LASX_3R<0b01110101010010110>,
++                    LASX_3RF_DESC_BASE<"xvftintrz.w.d", int_loongarch_lasx_xvftintrz_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++def XVFTINTRNE_W_D : LASX_3R<0b01110101010010111>,
++                     LASX_3RF_DESC_BASE<"xvftintrne.w.d", int_loongarch_lasx_xvftintrne_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSHUF_H : LASX_3R<0b01110101011110101>,
++               LASX_3R_VSHF_DESC_BASE<"xvshuf.h", LASX256HOpnd>;
++
++def XVSHUF_W : LASX_3R<0b01110101011110110>,
++               LASX_3R_VSHF_DESC_BASE<"xvshuf.w", LASX256WOpnd>;
++
++def XVSHUF_D : LASX_3R<0b01110101011110111>,
++               LASX_3R_VSHF_DESC_BASE<"xvshuf.d", LASX256DOpnd>;
++
++
++def XVPERM_W : LASX_3R<0b01110101011111010>,
++               LASX_3R_DESC_BASE<"xvperm.w", int_loongarch_lasx_xvperm_w, LASX256WOpnd>;
++
++
++def XVSEQI_B : LASX_I5<0b01110110100000000>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.b", int_loongarch_lasx_xvseqi_b, simm5_32, immSExt5, LASX256BOpnd>;
++
++def XVSEQI_H : LASX_I5<0b01110110100000001>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.h", int_loongarch_lasx_xvseqi_h, simm5_32, immSExt5, LASX256HOpnd>;
++
++def XVSEQI_W : LASX_I5<0b01110110100000010>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.w", int_loongarch_lasx_xvseqi_w, simm5_32, immSExt5, LASX256WOpnd>;
++
++def XVSEQI_D : LASX_I5<0b01110110100000011>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.d", int_loongarch_lasx_xvseqi_d, simm5_32, immSExt5, LASX256DOpnd>;
++
++
++def XVSLEI_B : LASX_I5<0b01110110100000100>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.b", int_loongarch_lasx_xvslei_b, simm5_32, immSExt5, LASX256BOpnd>;
++
++def XVSLEI_H : LASX_I5<0b01110110100000101>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.h", int_loongarch_lasx_xvslei_h, simm5_32, immSExt5, LASX256HOpnd>;
++
++def XVSLEI_W : LASX_I5<0b01110110100000110>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.w", int_loongarch_lasx_xvslei_w, simm5_32, immSExt5, LASX256WOpnd>;
++
++def XVSLEI_D : LASX_I5<0b01110110100000111>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.d", int_loongarch_lasx_xvslei_d, simm5_32, immSExt5, LASX256DOpnd>;
++
++
++def XVSLEI_BU : LASX_I5_U<0b01110110100001000>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.bu", int_loongarch_lasx_xvslei_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVSLEI_HU : LASX_I5_U<0b01110110100001001>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.hu", int_loongarch_lasx_xvslei_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSLEI_WU : LASX_I5_U<0b01110110100001010>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.wu", int_loongarch_lasx_xvslei_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSLEI_DU : LASX_I5_U<0b01110110100001011>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.du", int_loongarch_lasx_xvslei_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVSLTI_B : LASX_I5<0b01110110100001100>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.b", int_loongarch_lasx_xvslti_b, simm5_32, immSExt5, LASX256BOpnd>;
++
++def XVSLTI_H : LASX_I5<0b01110110100001101>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.h", int_loongarch_lasx_xvslti_h, simm5_32, immSExt5, LASX256HOpnd>;
++
++def XVSLTI_W : LASX_I5<0b01110110100001110>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.w", int_loongarch_lasx_xvslti_w, simm5_32, immSExt5, LASX256WOpnd>;
++
++def XVSLTI_D : LASX_I5<0b01110110100001111>,
++               LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.d", int_loongarch_lasx_xvslti_d, simm5_32, immSExt5, LASX256DOpnd>;
++
++
++def XVSLTI_BU : LASX_I5_U<0b01110110100010000>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.bu", int_loongarch_lasx_xvslti_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVSLTI_HU : LASX_I5_U<0b01110110100010001>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.hu", int_loongarch_lasx_xvslti_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSLTI_WU : LASX_I5_U<0b01110110100010010>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.wu", int_loongarch_lasx_xvslti_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSLTI_DU : LASX_I5_U<0b01110110100010011>,
++                LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.du", int_loongarch_lasx_xvslti_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVADDI_BU : LASX_I5_U<0b01110110100010100>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.bu", int_loongarch_lasx_xvaddi_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVADDI_HU : LASX_I5_U<0b01110110100010101>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.hu", int_loongarch_lasx_xvaddi_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVADDI_WU : LASX_I5_U<0b01110110100010110>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.wu", int_loongarch_lasx_xvaddi_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVADDI_DU : LASX_I5_U<0b01110110100010111>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.du", int_loongarch_lasx_xvaddi_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVSUBI_BU : LASX_I5_U<0b01110110100011000>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.bu", int_loongarch_lasx_xvsubi_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVSUBI_HU : LASX_I5_U<0b01110110100011001>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.hu", int_loongarch_lasx_xvsubi_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSUBI_WU : LASX_I5_U<0b01110110100011010>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.wu", int_loongarch_lasx_xvsubi_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSUBI_DU : LASX_I5_U<0b01110110100011011>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.du", int_loongarch_lasx_xvsubi_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVBSLL_V : LASX_I5_U<0b01110110100011100>,
++               LASX_U5_DESC_BASE<"xvbsll.v", int_loongarch_lasx_xvbsll_v, LASX256BOpnd>;
++
++def XVBSRL_V : LASX_I5_U<0b01110110100011101>,
++               LASX_U5_DESC_BASE<"xvbsrl.v", int_loongarch_lasx_xvbsrl_v, LASX256BOpnd>;
++
++
++def XVMAXI_B : LASX_I5<0b01110110100100000>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.b", int_loongarch_lasx_xvmaxi_b, simm5_32, immSExt5, LASX256BOpnd>;
++
++def XVMAXI_H : LASX_I5<0b01110110100100001>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.h", int_loongarch_lasx_xvmaxi_h, simm5_32, immSExt5, LASX256HOpnd>;
++
++def XVMAXI_W : LASX_I5<0b01110110100100010>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.w", int_loongarch_lasx_xvmaxi_w, simm5_32, immSExt5, LASX256WOpnd>;
++
++def XVMAXI_D : LASX_I5<0b01110110100100011>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.d", int_loongarch_lasx_xvmaxi_d, simm5_32, immSExt5, LASX256DOpnd>;
++
++
++def XVMINI_B : LASX_I5<0b01110110100100100>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmini.b", int_loongarch_lasx_xvmini_b, simm5_32, immSExt5, LASX256BOpnd>;
++
++def XVMINI_H : LASX_I5<0b01110110100100101>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmini.h", int_loongarch_lasx_xvmini_h, simm5_32, immSExt5, LASX256HOpnd>;
++
++def XVMINI_W : LASX_I5<0b01110110100100110>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmini.w", int_loongarch_lasx_xvmini_w, simm5_32, immSExt5, LASX256WOpnd>;
++
++def XVMINI_D : LASX_I5<0b01110110100100111>,
++               LASX_I5_DESC_BASE_Intrinsic<"xvmini.d", int_loongarch_lasx_xvmini_d, simm5_32, immSExt5, LASX256DOpnd>;
++
++
++def XVMAXI_BU : LASX_I5_U<0b01110110100101000>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.bu", int_loongarch_lasx_xvmaxi_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVMAXI_HU : LASX_I5_U<0b01110110100101001>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.hu", int_loongarch_lasx_xvmaxi_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVMAXI_WU : LASX_I5_U<0b01110110100101010>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.wu", int_loongarch_lasx_xvmaxi_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVMAXI_DU : LASX_I5_U<0b01110110100101011>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.du", int_loongarch_lasx_xvmaxi_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVMINI_BU : LASX_I5_U<0b01110110100101100>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.bu", int_loongarch_lasx_xvmini_bu, uimm5, immZExt5, LASX256BOpnd>;
++
++def XVMINI_HU : LASX_I5_U<0b01110110100101101>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.hu", int_loongarch_lasx_xvmini_hu, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVMINI_WU : LASX_I5_U<0b01110110100101110>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.wu", int_loongarch_lasx_xvmini_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVMINI_DU : LASX_I5_U<0b01110110100101111>,
++                LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.du", int_loongarch_lasx_xvmini_du, uimm5, immZExt5, LASX256DOpnd>;
++
++
++def XVFRSTPI_B : LASX_I5_U<0b01110110100110100>,
++                 LASX_U5_4R_DESC_BASE<"xvfrstpi.b", int_loongarch_lasx_xvfrstpi_b, LASX256BOpnd>;
++
++def XVFRSTPI_H : LASX_I5_U<0b01110110100110101>,
++                 LASX_U5_4R_DESC_BASE<"xvfrstpi.h", int_loongarch_lasx_xvfrstpi_h, LASX256HOpnd>;
++
++
++def XVCLO_B : LASX_2R<0b0111011010011100000000>,
++              LASX_2R_DESC_BASE<"xvclo.b", int_loongarch_lasx_xvclo_b, LASX256BOpnd>;
++
++def XVCLO_H : LASX_2R<0b0111011010011100000001>,
++              LASX_2R_DESC_BASE<"xvclo.h", int_loongarch_lasx_xvclo_h, LASX256HOpnd>;
++
++def XVCLO_W : LASX_2R<0b0111011010011100000010>,
++              LASX_2R_DESC_BASE<"xvclo.w", int_loongarch_lasx_xvclo_w, LASX256WOpnd>;
++
++def XVCLO_D : LASX_2R<0b0111011010011100000011>,
++              LASX_2R_DESC_BASE<"xvclo.d", int_loongarch_lasx_xvclo_d, LASX256DOpnd>;
++
++
++def XVCLZ_B : LASX_2R<0b0111011010011100000100>,
++              LASX_2R_DESC_BASE<"xvclz.b", ctlz, LASX256BOpnd>;
++
++def XVCLZ_H : LASX_2R<0b0111011010011100000101>,
++              LASX_2R_DESC_BASE<"xvclz.h", ctlz, LASX256HOpnd>;
++
++def XVCLZ_W : LASX_2R<0b0111011010011100000110>,
++              LASX_2R_DESC_BASE<"xvclz.w", ctlz, LASX256WOpnd>;
++
++def XVCLZ_D : LASX_2R<0b0111011010011100000111>,
++              LASX_2R_DESC_BASE<"xvclz.d", ctlz, LASX256DOpnd>;
++
++
++def XVPCNT_B : LASX_2R<0b0111011010011100001000>,
++               LASX_2R_DESC_BASE<"xvpcnt.b", ctpop, LASX256BOpnd>;
++
++def XVPCNT_H : LASX_2R<0b0111011010011100001001>,
++               LASX_2R_DESC_BASE<"xvpcnt.h", ctpop, LASX256HOpnd>;
++
++def XVPCNT_W : LASX_2R<0b0111011010011100001010>,
++               LASX_2R_DESC_BASE<"xvpcnt.w", ctpop, LASX256WOpnd>;
++
++def XVPCNT_D : LASX_2R<0b0111011010011100001011>,
++               LASX_2R_DESC_BASE<"xvpcnt.d", ctpop, LASX256DOpnd>;
++
++
++def XVNEG_B : LASX_2R<0b0111011010011100001100>,
++              LASX_2R_DESC_BASE<"xvneg.b", int_loongarch_lasx_xvneg_b, LASX256BOpnd>;
++
++def XVNEG_H : LASX_2R<0b0111011010011100001101>,
++              LASX_2R_DESC_BASE<"xvneg.h", int_loongarch_lasx_xvneg_h, LASX256HOpnd>;
++
++def XVNEG_W : LASX_2R<0b0111011010011100001110>,
++              LASX_2R_DESC_BASE<"xvneg.w", int_loongarch_lasx_xvneg_w, LASX256WOpnd>;
++
++def XVNEG_D : LASX_2R<0b0111011010011100001111>,
++              LASX_2R_DESC_BASE<"xvneg.d", int_loongarch_lasx_xvneg_d, LASX256DOpnd>;
++
++
++def XVMSKLTZ_B : LASX_2R<0b0111011010011100010000>,
++                 LASX_2R_DESC_BASE<"xvmskltz.b", int_loongarch_lasx_xvmskltz_b, LASX256BOpnd>;
++
++def XVMSKLTZ_H : LASX_2R<0b0111011010011100010001>,
++                 LASX_2R_DESC_BASE<"xvmskltz.h", int_loongarch_lasx_xvmskltz_h, LASX256HOpnd>;
++
++def XVMSKLTZ_W : LASX_2R<0b0111011010011100010010>,
++                 LASX_2R_DESC_BASE<"xvmskltz.w", int_loongarch_lasx_xvmskltz_w, LASX256WOpnd>;
++
++def XVMSKLTZ_D : LASX_2R<0b0111011010011100010011>,
++                 LASX_2R_DESC_BASE<"xvmskltz.d", int_loongarch_lasx_xvmskltz_d, LASX256DOpnd>;
++
++
++def XVMSKGEZ_B : LASX_2R<0b0111011010011100010100>,
++                 LASX_2R_DESC_BASE<"xvmskgez.b", int_loongarch_lasx_xvmskgez_b, LASX256BOpnd>;
++
++def XVMSKNZ_B : LASX_2R<0b0111011010011100011000>,
++                LASX_2R_DESC_BASE<"xvmsknz.b", int_loongarch_lasx_xvmsknz_b, LASX256BOpnd>;
++
++
++def XVSETEQZ_V : LASX_SET<0b0111011010011100100110>,
++                 LASX_SET_DESC_BASE<"xvseteqz.v", LASX256BOpnd>;
++
++def XVSETNEZ_V : LASX_SET<0b0111011010011100100111>,
++                 LASX_SET_DESC_BASE<"xvsetnez.v", LASX256BOpnd>;
++
++
++def XVSETANYEQZ_B : LASX_SET<0b0111011010011100101000>,
++                    LASX_SET_DESC_BASE<"xvsetanyeqz.b", LASX256BOpnd>;
++
++def XVSETANYEQZ_H : LASX_SET<0b0111011010011100101001>,
++                    LASX_SET_DESC_BASE<"xvsetanyeqz.h", LASX256HOpnd>;
++
++def XVSETANYEQZ_W : LASX_SET<0b0111011010011100101010>,
++                    LASX_SET_DESC_BASE<"xvsetanyeqz.w", LASX256WOpnd>;
++
++def XVSETANYEQZ_D : LASX_SET<0b0111011010011100101011>,
++                    LASX_SET_DESC_BASE<"xvsetanyeqz.d", LASX256DOpnd>;
++
++
++def XVSETALLNEZ_B : LASX_SET<0b0111011010011100101100>,
++                    LASX_SET_DESC_BASE<"xvsetallnez.b", LASX256BOpnd>;
++
++def XVSETALLNEZ_H : LASX_SET<0b0111011010011100101101>,
++                    LASX_SET_DESC_BASE<"xvsetallnez.h", LASX256HOpnd>;
++
++def XVSETALLNEZ_W : LASX_SET<0b0111011010011100101110>,
++                    LASX_SET_DESC_BASE<"xvsetallnez.w", LASX256WOpnd>;
++
++def XVSETALLNEZ_D : LASX_SET<0b0111011010011100101111>,
++                    LASX_SET_DESC_BASE<"xvsetallnez.d", LASX256DOpnd>;
++
++class LASX_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
++                                  RegisterClass RCWS> :
++      LoongArchPseudo<(outs GPR32:$dst),
++                (ins RCWS:$xj),
++                [(set GPR32:$dst, (OpNode (TyNode RCWS:$xj)))]> {
++  bit usesCustomInserter = 1;
++}
++
++def XSNZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbnz_b, v32i8,
++                                                 LASX256B>;
++def XSNZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbnz_h, v16i16,
++                                                 LASX256H>;
++def XSNZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbnz_w, v8i32,
++                                                 LASX256W>;
++def XSNZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbnz_d, v4i64,
++                                                 LASX256D>;
++def XSNZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbnz_v, v32i8,
++                                                 LASX256B>;
++
++def XSZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbz_b, v32i8,
++                                                 LASX256B>;
++def XSZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbz_h, v16i16,
++                                                 LASX256H>;
++def XSZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbz_w, v8i32,
++                                                 LASX256W>;
++def XSZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbz_d, v4i64,
++                                                 LASX256D>;
++def XSZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lasx_xbz_v, v32i8,
++                                                 LASX256B>;
++
++
++def XVFLOGB_S : LASX_2R<0b0111011010011100110001>,
++                LASX_2RF_DESC_BASE<"xvflogb.s", int_loongarch_lasx_xvflogb_s, LASX256WOpnd>;
++
++def XVFLOGB_D : LASX_2R<0b0111011010011100110010>,
++                LASX_2RF_DESC_BASE<"xvflogb.d", int_loongarch_lasx_xvflogb_d, LASX256DOpnd>;
++
++
++def XVFCLASS_S : LASX_2R<0b0111011010011100110101>,
++                 LASX_2RF_DESC_BASE<"xvfclass.s", int_loongarch_lasx_xvfclass_s, LASX256WOpnd>;
++
++def XVFCLASS_D : LASX_2R<0b0111011010011100110110>,
++                 LASX_2RF_DESC_BASE<"xvfclass.d", int_loongarch_lasx_xvfclass_d, LASX256DOpnd>;
++
++
++def XVFSQRT_S : LASX_2R<0b0111011010011100111001>,
++                LASX_2RF_DESC_BASE<"xvfsqrt.s", fsqrt, LASX256WOpnd>;
++
++def XVFSQRT_D : LASX_2R<0b0111011010011100111010>,
++                LASX_2RF_DESC_BASE<"xvfsqrt.d", fsqrt, LASX256DOpnd>;
++
++
++def XVFRECIP_S : LASX_2R<0b0111011010011100111101>,
++                 LASX_2RF_DESC_BASE<"xvfrecip.s", int_loongarch_lasx_xvfrecip_s, LASX256WOpnd>;
++
++def XVFRECIP_D : LASX_2R<0b0111011010011100111110>,
++                 LASX_2RF_DESC_BASE<"xvfrecip.d", int_loongarch_lasx_xvfrecip_d, LASX256DOpnd>;
++
++
++def XVFRSQRT_S : LASX_2R<0b0111011010011101000001>,
++                 LASX_2RF_DESC_BASE<"xvfrsqrt.s", int_loongarch_lasx_xvfrsqrt_s, LASX256WOpnd>;
++
++def XVFRSQRT_D : LASX_2R<0b0111011010011101000010>,
++                 LASX_2RF_DESC_BASE<"xvfrsqrt.d", int_loongarch_lasx_xvfrsqrt_d, LASX256DOpnd>;
++
++
++def XVFRINT_S : LASX_2R<0b0111011010011101001101>,
++                LASX_2RF_DESC_BASE<"xvfrint.s", frint, LASX256WOpnd>;
++
++def XVFRINT_D : LASX_2R<0b0111011010011101001110>,
++                LASX_2RF_DESC_BASE<"xvfrint.d", frint, LASX256DOpnd>;
++
++
++def XVFRINTRM_S : LASX_2R<0b0111011010011101010001>,
++                  LASX_2RF_DESC_BASE<"xvfrintrm.s", int_loongarch_lasx_xvfrintrm_s, LASX256WOpnd>;
++
++def XVFRINTRM_D : LASX_2R<0b0111011010011101010010>,
++                  LASX_2RF_DESC_BASE<"xvfrintrm.d", int_loongarch_lasx_xvfrintrm_d, LASX256DOpnd>;
++
++
++def XVFRINTRP_S : LASX_2R<0b0111011010011101010101>,
++                  LASX_2RF_DESC_BASE<"xvfrintrp.s", int_loongarch_lasx_xvfrintrp_s, LASX256WOpnd>;
++
++def XVFRINTRP_D : LASX_2R<0b0111011010011101010110>,
++                  LASX_2RF_DESC_BASE<"xvfrintrp.d", int_loongarch_lasx_xvfrintrp_d, LASX256DOpnd>;
++
++
++def XVFRINTRZ_S : LASX_2R<0b0111011010011101011001>,
++                  LASX_2RF_DESC_BASE<"xvfrintrz.s", int_loongarch_lasx_xvfrintrz_s, LASX256WOpnd>;
++
++def XVFRINTRZ_D : LASX_2R<0b0111011010011101011010>,
++                  LASX_2RF_DESC_BASE<"xvfrintrz.d", int_loongarch_lasx_xvfrintrz_d, LASX256DOpnd>;
++
++
++def XVFRINTRNE_S : LASX_2R<0b0111011010011101011101>,
++                   LASX_2RF_DESC_BASE<"xvfrintrne.s", int_loongarch_lasx_xvfrintrne_s, LASX256WOpnd>;
++
++def XVFRINTRNE_D : LASX_2R<0b0111011010011101011110>,
++                   LASX_2RF_DESC_BASE<"xvfrintrne.d", int_loongarch_lasx_xvfrintrne_d, LASX256DOpnd>;
++
++
++def XVFCVTL_S_H : LASX_2R<0b0111011010011101111010>,
++                  LASX_2RF_DESC_BASE<"xvfcvtl.s.h", int_loongarch_lasx_xvfcvtl_s_h, LASX256WOpnd, LASX256HOpnd>;
++
++def XVFCVTH_S_H : LASX_2R<0b0111011010011101111011>,
++                  LASX_2RF_DESC_BASE<"xvfcvth.s.h", int_loongarch_lasx_xvfcvth_s_h, LASX256WOpnd, LASX256HOpnd>;
++
++
++def XVFCVTL_D_S : LASX_2R<0b0111011010011101111100>,
++                  LASX_2RF_DESC_BASE<"xvfcvtl.d.s", int_loongarch_lasx_xvfcvtl_d_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFCVTH_D_S : LASX_2R<0b0111011010011101111101>,
++                  LASX_2RF_DESC_BASE<"xvfcvth.d.s", int_loongarch_lasx_xvfcvth_d_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFFINT_S_W : LASX_2R<0b0111011010011110000000>,
++                  LASX_2RF_DESC_BASE<"xvffint.s.w", sint_to_fp, LASX256WOpnd>;
++
++def XVFFINT_S_WU : LASX_2R<0b0111011010011110000001>,
++                   LASX_2RF_DESC_BASE<"xvffint.s.wu", uint_to_fp, LASX256WOpnd>;
++
++
++def XVFFINT_D_L : LASX_2R<0b0111011010011110000010>,
++                  LASX_2RF_DESC_BASE<"xvffint.d.l", sint_to_fp, LASX256DOpnd>;
++
++def XVFFINT_D_LU : LASX_2R<0b0111011010011110000011>,
++                   LASX_2RF_DESC_BASE<"xvffint.d.lu", uint_to_fp, LASX256DOpnd>;
++
++
++def XVFFINTL_D_W : LASX_2R<0b0111011010011110000100>,
++                   LASX_2RF_DESC_BASE<"xvffintl.d.w", int_loongarch_lasx_xvffintl_d_w, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFFINTH_D_W : LASX_2R<0b0111011010011110000101>,
++                   LASX_2RF_DESC_BASE<"xvffinth.d.w", int_loongarch_lasx_xvffinth_d_w, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFTINT_W_S : LASX_2R<0b0111011010011110001100>,
++                  LASX_2RF_DESC_BASE<"xvftint.w.s", int_loongarch_lasx_xvftint_w_s, LASX256WOpnd>;
++
++def XVFTINT_L_D : LASX_2R<0b0111011010011110001101>,
++                  LASX_2RF_DESC_BASE<"xvftint.l.d", int_loongarch_lasx_xvftint_l_d, LASX256DOpnd>;
++
++
++def XVFTINTRM_W_S : LASX_2R<0b0111011010011110001110>,
++                    LASX_2RF_DESC_BASE<"xvftintrm.w.s", int_loongarch_lasx_xvftintrm_w_s, LASX256WOpnd>;
++
++def XVFTINTRM_L_D : LASX_2R<0b0111011010011110001111>,
++                    LASX_2RF_DESC_BASE<"xvftintrm.l.d", int_loongarch_lasx_xvftintrm_l_d, LASX256DOpnd>;
++
++
++def XVFTINTRP_W_S : LASX_2R<0b0111011010011110010000>,
++                    LASX_2RF_DESC_BASE<"xvftintrp.w.s", int_loongarch_lasx_xvftintrp_w_s, LASX256WOpnd>;
++
++def XVFTINTRP_L_D : LASX_2R<0b0111011010011110010001>,
++                    LASX_2RF_DESC_BASE<"xvftintrp.l.d", int_loongarch_lasx_xvftintrp_l_d, LASX256DOpnd>;
++
++
++def XVFTINTRZ_W_S : LASX_2R<0b0111011010011110010010>,
++                    LASX_2RF_DESC_BASE<"xvftintrz.w.s", fp_to_sint, LASX256WOpnd>;
++
++def XVFTINTRZ_L_D : LASX_2R<0b0111011010011110010011>,
++                    LASX_2RF_DESC_BASE<"xvftintrz.l.d", fp_to_sint, LASX256DOpnd>;
++
++
++def XVFTINTRNE_W_S : LASX_2R<0b0111011010011110010100>,
++                     LASX_2RF_DESC_BASE<"xvftintrne.w.s", int_loongarch_lasx_xvftintrne_w_s, LASX256WOpnd>;
++
++def XVFTINTRNE_L_D : LASX_2R<0b0111011010011110010101>,
++                     LASX_2RF_DESC_BASE<"xvftintrne.l.d", int_loongarch_lasx_xvftintrne_l_d, LASX256DOpnd>;
++
++
++def XVFTINT_WU_S : LASX_2R<0b0111011010011110010110>,
++                   LASX_2RF_DESC_BASE<"xvftint.wu.s", int_loongarch_lasx_xvftint_wu_s, LASX256WOpnd>;
++
++def XVFTINT_LU_D : LASX_2R<0b0111011010011110010111>,
++                   LASX_2RF_DESC_BASE<"xvftint.lu.d", int_loongarch_lasx_xvftint_lu_d, LASX256DOpnd>;
++
++
++def XVFTINTRZ_WU_S : LASX_2R<0b0111011010011110011100>,
++                     LASX_2RF_DESC_BASE<"xvftintrz.wu.s", fp_to_uint, LASX256WOpnd>;
++
++def XVFTINTRZ_LU_D : LASX_2R<0b0111011010011110011101>,
++                     LASX_2RF_DESC_BASE<"xvftintrz.lu.d", fp_to_uint, LASX256DOpnd>;
++
++
++def XVFTINTL_L_S : LASX_2R<0b0111011010011110100000>,
++                   LASX_2RF_DESC_BASE<"xvftintl.l.s", int_loongarch_lasx_xvftintl_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFTINTH_L_S : LASX_2R<0b0111011010011110100001>,
++                   LASX_2RF_DESC_BASE<"xvftinth.l.s", int_loongarch_lasx_xvftinth_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFTINTRML_L_S : LASX_2R<0b0111011010011110100010>,
++                     LASX_2RF_DESC_BASE<"xvftintrml.l.s", int_loongarch_lasx_xvftintrml_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFTINTRMH_L_S : LASX_2R<0b0111011010011110100011>,
++                     LASX_2RF_DESC_BASE<"xvftintrmh.l.s", int_loongarch_lasx_xvftintrmh_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFTINTRPL_L_S : LASX_2R<0b0111011010011110100100>,
++                     LASX_2RF_DESC_BASE<"xvftintrpl.l.s", int_loongarch_lasx_xvftintrpl_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFTINTRPH_L_S : LASX_2R<0b0111011010011110100101>,
++                     LASX_2RF_DESC_BASE<"xvftintrph.l.s", int_loongarch_lasx_xvftintrph_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFTINTRZL_L_S : LASX_2R<0b0111011010011110100110>,
++                     LASX_2RF_DESC_BASE<"xvftintrzl.l.s", int_loongarch_lasx_xvftintrzl_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFTINTRZH_L_S : LASX_2R<0b0111011010011110100111>,
++                     LASX_2RF_DESC_BASE<"xvftintrzh.l.s", int_loongarch_lasx_xvftintrzh_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVFTINTRNEL_L_S : LASX_2R<0b0111011010011110101000>,
++                      LASX_2RF_DESC_BASE<"xvftintrnel.l.s", int_loongarch_lasx_xvftintrnel_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++def XVFTINTRNEH_L_S : LASX_2R<0b0111011010011110101001>,
++                      LASX_2RF_DESC_BASE<"xvftintrneh.l.s", int_loongarch_lasx_xvftintrneh_l_s, LASX256DOpnd, LASX256WOpnd>;
++
++
++def XVEXTH_H_B : LASX_2R<0b0111011010011110111000>,
++                 LASX_2R_DESC_BASE<"xvexth.h.b", int_loongarch_lasx_xvexth_h_b, LASX256HOpnd, LASX256BOpnd>;
++
++def XVEXTH_W_H : LASX_2R<0b0111011010011110111001>,
++                 LASX_2R_DESC_BASE<"xvexth.w.h", int_loongarch_lasx_xvexth_w_h, LASX256WOpnd, LASX256HOpnd>;
++
++def XVEXTH_D_W : LASX_2R<0b0111011010011110111010>,
++                 LASX_2R_DESC_BASE<"xvexth.d.w", int_loongarch_lasx_xvexth_d_w, LASX256DOpnd, LASX256WOpnd> ;
++
++def XVEXTH_Q_D : LASX_2R<0b0111011010011110111011>,
++                 LASX_2R_DESC_BASE<"xvexth.q.d", int_loongarch_lasx_xvexth_q_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVEXTH_HU_BU : LASX_2R<0b0111011010011110111100>,
++                   LASX_2R_DESC_BASE<"xvexth.hu.bu", int_loongarch_lasx_xvexth_hu_bu, LASX256HOpnd, LASX256BOpnd>;
++
++def XVEXTH_WU_HU : LASX_2R<0b0111011010011110111101>,
++                   LASX_2R_DESC_BASE<"xvexth.wu.hu", int_loongarch_lasx_xvexth_wu_hu, LASX256WOpnd, LASX256HOpnd>;
++
++def XVEXTH_DU_WU : LASX_2R<0b0111011010011110111110>,
++                   LASX_2R_DESC_BASE<"xvexth.du.wu", int_loongarch_lasx_xvexth_du_wu, LASX256DOpnd, LASX256WOpnd> ;
++
++def XVEXTH_QU_DU : LASX_2R<0b0111011010011110111111>,
++                   LASX_2R_DESC_BASE<"xvexth.qu.du", int_loongarch_lasx_xvexth_qu_du, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVREPLGR2VR_B : LASX_2R_1GP<0b0111011010011111000000>,
++                    LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.b", v32i8, xvsplati8, LASX256BOpnd, GPR32Opnd>;
++
++def XVREPLGR2VR_H : LASX_2R_1GP<0b0111011010011111000001>,
++                    LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.h", v16i16, xvsplati16, LASX256HOpnd, GPR32Opnd>;
++
++def XVREPLGR2VR_W : LASX_2R_1GP<0b0111011010011111000010>,
++                    LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.w", v8i32, xvsplati32, LASX256WOpnd, GPR32Opnd>;
++
++def XVREPLGR2VR_D : LASX_2R_1GP<0b0111011010011111000011>,
++                    LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.d", v4i64, xvsplati64, LASX256DOpnd, GPR64Opnd>;
++
++
++def VEXT2XV_H_B : LASX_2R<0b0111011010011111000100>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.h.b", int_loongarch_lasx_vext2xv_h_b, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>;
++
++def VEXT2XV_W_B : LASX_2R<0b0111011010011111000101>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.w.b", int_loongarch_lasx_vext2xv_w_b, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>;
++
++def VEXT2XV_D_B : LASX_2R<0b0111011010011111000110>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.d.b", int_loongarch_lasx_vext2xv_d_b, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ;
++
++def VEXT2XV_W_H : LASX_2R<0b0111011010011111000111>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.w.h", int_loongarch_lasx_vext2xv_w_h, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>;
++
++def VEXT2XV_D_H : LASX_2R<0b0111011010011111001000>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.d.h", int_loongarch_lasx_vext2xv_d_h, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ;
++
++def VEXT2XV_D_W : LASX_2R<0b0111011010011111001001>,
++                  LASX_XVEXTEND_DESC_BASE<"vext2xv.d.w", int_loongarch_lasx_vext2xv_d_w, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>;
++
++
++def VEXT2XV_HU_BU : LASX_2R<0b0111011010011111001010>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.hu.bu", int_loongarch_lasx_vext2xv_hu_bu, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>;
++
++def VEXT2XV_WU_BU : LASX_2R<0b0111011010011111001011>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.bu", int_loongarch_lasx_vext2xv_wu_bu, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>;
++
++def VEXT2XV_DU_BU : LASX_2R<0b0111011010011111001100>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.du.bu", int_loongarch_lasx_vext2xv_du_bu, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ;
++
++def VEXT2XV_WU_HU : LASX_2R<0b0111011010011111001101>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.hu", int_loongarch_lasx_vext2xv_wu_hu, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>;
++
++def VEXT2XV_DU_HU : LASX_2R<0b0111011010011111001110>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.du.hu", int_loongarch_lasx_vext2xv_du_hu, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ;
++
++def VEXT2XV_DU_WU : LASX_2R<0b0111011010011111001111>,
++                    LASX_XVEXTEND_DESC_BASE<"vext2xv.du.wu", int_loongarch_lasx_vext2xv_du_wu, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>;
++
++
++def XVHSELI_D : LASX_I5_U<0b01110110100111111>,
++                LASX_U5N_DESC_BASE<"xvhseli.d", LASX256DOpnd>;
++
++
++def XVROTRI_B : LASX_I3_U<0b0111011010100000001>,
++                LASX_RORI_U3_DESC_BASE_Intrinsic<"xvrotri.b", int_loongarch_lasx_xvrotri_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVROTRI_H : LASX_I4_U<0b011101101010000001>,
++                LASX_RORI_U4_DESC_BASE_Intrinsic<"xvrotri.h", int_loongarch_lasx_xvrotri_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVROTRI_W : LASX_I5_U<0b01110110101000001>,
++                LASX_RORI_U5_DESC_BASE_Intrinsic<"xvrotri.w", int_loongarch_lasx_xvrotri_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVROTRI_D : LASX_I6_U<0b0111011010100001>,
++                LASX_RORI_U6_DESC_BASE_Intrinsic<"xvrotri.d", int_loongarch_lasx_xvrotri_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSRLRI_B : LASX_I3_U<0b0111011010100100001>,
++                LASX_BIT_3_DESC_BASE<"xvsrlri.b", int_loongarch_lasx_xvsrlri_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSRLRI_H : LASX_I4_U<0b011101101010010001>,
++                LASX_BIT_4_DESC_BASE<"xvsrlri.h", int_loongarch_lasx_xvsrlri_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSRLRI_W : LASX_I5_U<0b01110110101001001>,
++                LASX_BIT_5_DESC_BASE<"xvsrlri.w", int_loongarch_lasx_xvsrlri_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSRLRI_D : LASX_I6_U<0b0111011010100101>,
++                LASX_BIT_6_DESC_BASE<"xvsrlri.d", int_loongarch_lasx_xvsrlri_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSRARI_B : LASX_I3_U<0b0111011010101000001>,
++                LASX_BIT_3_DESC_BASE<"xvsrari.b", int_loongarch_lasx_xvsrari_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSRARI_H : LASX_I4_U<0b011101101010100001>,
++                LASX_BIT_4_DESC_BASE<"xvsrari.h", int_loongarch_lasx_xvsrari_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSRARI_W : LASX_I5_U<0b01110110101010001>,
++                LASX_BIT_5_DESC_BASE<"xvsrari.w", int_loongarch_lasx_xvsrari_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSRARI_D : LASX_I6_U<0b0111011010101001>,
++                LASX_BIT_6_DESC_BASE<"xvsrari.d", int_loongarch_lasx_xvsrari_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVINSGR2VR_W : LASX_I3_R_U<0b0111011011101011110>,
++                   LASX_INSERT_U3_DESC_BASE<"xvinsgr2vr.w", v8i32, uimm3_ptr, immZExt3Ptr, LASX256WOpnd, GPR32Opnd>;
++
++def XVINSGR2VR_D : LASX_I2_R_U<0b01110110111010111110>,
++                   LASX_INSERT_U2_DESC_BASE<"xvinsgr2vr.d", v4i64, uimm2_ptr, immZExt2Ptr, LASX256DOpnd, GPR64Opnd>;
++
++
++def XVPICKVE2GR_W : LASX_ELM_COPY_U3<0b0111011011101111110>,
++                    LASX_COPY_U3_DESC_BASE<"xvpickve2gr.w", vextract_sext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>;
++
++def XVPICKVE2GR_D : LASX_ELM_COPY_U2<0b01110110111011111110>,
++                    LASX_COPY_U2_DESC_BASE<"xvpickve2gr.d", vextract_sext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>;
++
++
++def XVPICKVE2GR_WU : LASX_ELM_COPY_U3<0b0111011011110011110>,
++                     LASX_COPY_U3_DESC_BASE<"xvpickve2gr.wu", vextract_zext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>;
++
++def XVPICKVE2GR_DU : LASX_ELM_COPY_U2<0b01110110111100111110>,
++                     LASX_COPY_U2_DESC_BASE<"xvpickve2gr.du", vextract_zext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>;
++
++
++def XVREPL128VEI_B : LASX_I4_U<0b011101101111011110>,
++                     LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.b", int_loongarch_lasx_xvrepl128vei_b, LASX256BOpnd>;
++
++def XVREPL128VEI_H : LASX_I3_U<0b0111011011110111110>,
++                     LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.h", int_loongarch_lasx_xvrepl128vei_h, LASX256HOpnd>;
++
++def XVREPL128VEI_W : LASX_I2_U<0b01110110111101111110>,
++                     LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.w", int_loongarch_lasx_xvrepl128vei_w, LASX256WOpnd>;
++
++def XVREPL128VEI_D : LASX_I1_U<0b011101101111011111110>,
++                     LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.d", int_loongarch_lasx_xvrepl128vei_d, LASX256DOpnd>;
++
++
++def XVINSVE0_W : LASX_I3_U<0b0111011011111111110>,
++                 LASX_BIT_3_4O_DESC_BASE<"xvinsve0.w", int_loongarch_lasx_xvinsve0_w, uimm3, immZExt3, LASX256WOpnd>;
++
++def XVINSVE0_D : LASX_I2_U<0b01110110111111111110>,
++                 LASX_BIT_2_4O_DESC_BASE<"xvinsve0.d", int_loongarch_lasx_xvinsve0_d, uimm2, immZExt2, LASX256DOpnd>;
++
++
++def XVPICKVE_W : LASX_I3_U<0b0111011100000011110>,
++                 LASX_BIT_3_4ON<"xvpickve.w", uimm3, immZExt3, LASX256WOpnd>;
++
++def XVPICKVE_D : LASX_I2_U<0b01110111000000111110>,
++                 LASX_BIT_2_4ON<"xvpickve.d", uimm2, immZExt2, LASX256DOpnd>;
++
++
++def XVREPLVE0_B : LASX_2R<0b0111011100000111000000>,
++                  LASX_XVBROADCAST_DESC_BASE<"xvreplve0.b", int_loongarch_lasx_xvreplve0_b, v32i8, LASX256BOpnd>;
++
++def XVREPLVE0_H : LASX_2R<0b0111011100000111100000>,
++                  LASX_XVBROADCAST_DESC_BASE<"xvreplve0.h", int_loongarch_lasx_xvreplve0_h, v16i16, LASX256HOpnd>;
++
++def XVREPLVE0_W : LASX_2R<0b0111011100000111110000>,
++                  LASX_XVBROADCAST_DESC_BASE<"xvreplve0.w", int_loongarch_lasx_xvreplve0_w, v8i32, LASX256WOpnd> ;
++
++def XVREPLVE0_D : LASX_2R<0b0111011100000111111000>,
++                  LASX_XVBROADCAST_DESC_BASE<"xvreplve0.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd>;
++
++def XVREPLVE0_Q : LASX_2R<0b0111011100000111111100>,
++                  LASX_XVBROADCAST_DESC_BASE<"xvreplve0.q", int_loongarch_lasx_xvreplve0_q, v32i8, LASX256BOpnd>;
++
++
++def XVSLLWIL_H_B : LASX_I3_U<0b0111011100001000001>,
++                   LASX_2R_U3_DESC_BASE<"xvsllwil.h.b", int_loongarch_lasx_xvsllwil_h_b, LASX256HOpnd, LASX256BOpnd>;
++
++def XVSLLWIL_W_H : LASX_I4_U<0b011101110000100001>,
++                   LASX_2R_U4_DESC_BASE<"xvsllwil.w.h", int_loongarch_lasx_xvsllwil_w_h, LASX256WOpnd, LASX256HOpnd>;
++
++def XVSLLWIL_D_W : LASX_I5_U<0b01110111000010001>,
++                   LASX_2R_U5_DESC_BASE<"xvsllwil.d.w", int_loongarch_lasx_xvsllwil_d_w, LASX256DOpnd, LASX256WOpnd> ;
++
++
++def XVEXTL_Q_D : LASX_2R<0b0111011100001001000000>,
++                 LASX_2R_DESC_BASE<"xvextl.q.d", int_loongarch_lasx_xvextl_q_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSLLWIL_HU_BU : LASX_I3_U<0b0111011100001100001>,
++                     LASX_2R_U3_DESC_BASE<"xvsllwil.hu.bu", int_loongarch_lasx_xvsllwil_hu_bu, LASX256HOpnd, LASX256BOpnd>;
++
++def XVSLLWIL_WU_HU : LASX_I4_U<0b011101110000110001>,
++                     LASX_2R_U4_DESC_BASE<"xvsllwil.wu.hu", int_loongarch_lasx_xvsllwil_wu_hu, LASX256WOpnd, LASX256HOpnd>;
++
++def XVSLLWIL_DU_WU : LASX_I5_U<0b01110111000011001>,
++                     LASX_2R_U5_DESC_BASE<"xvsllwil.du.wu", int_loongarch_lasx_xvsllwil_du_wu, LASX256DOpnd, LASX256WOpnd> ;
++
++
++def XVEXTL_QU_DU : LASX_2R<0b0111011100001101000000>,
++                   LASX_2R_DESC_BASE<"xvextl.qu.du", int_loongarch_lasx_xvextl_qu_du, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVBITCLRI_B : LASX_I3_U<0b0111011100010000001>,
++                  LASX_2R_U3_DESC_BASE<"xvbitclri.b", int_loongarch_lasx_xvbitclri_b, LASX256BOpnd, LASX256BOpnd>;
++
++def XVBITCLRI_H : LASX_I4_U<0b011101110001000001>,
++                  LASX_2R_U4_DESC_BASE<"xvbitclri.h", int_loongarch_lasx_xvbitclri_h, LASX256HOpnd, LASX256HOpnd>;
++
++def XVBITCLRI_W : LASX_I5_U<0b01110111000100001>,
++                  LASX_2R_U5_DESC_BASE<"xvbitclri.w", int_loongarch_lasx_xvbitclri_w, LASX256WOpnd, LASX256WOpnd>;
++
++def XVBITCLRI_D : LASX_I6_U<0b0111011100010001>,
++                  LASX_2R_U6_DESC_BASE<"xvbitclri.d", int_loongarch_lasx_xvbitclri_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVBITSETI_B : LASX_I3_U<0b0111011100010100001>,
++                  LASX_2R_U3_DESC_BASE<"xvbitseti.b", int_loongarch_lasx_xvbitseti_b, LASX256BOpnd, LASX256BOpnd>;
++
++def XVBITSETI_H : LASX_I4_U<0b011101110001010001>,
++                  LASX_2R_U4_DESC_BASE<"xvbitseti.h", int_loongarch_lasx_xvbitseti_h, LASX256HOpnd, LASX256HOpnd>;
++
++def XVBITSETI_W : LASX_I5_U<0b01110111000101001>,
++                  LASX_2R_U5_DESC_BASE<"xvbitseti.w", int_loongarch_lasx_xvbitseti_w, LASX256WOpnd, LASX256WOpnd>;
++
++def XVBITSETI_D : LASX_I6_U<0b0111011100010101>,
++                  LASX_2R_U6_DESC_BASE<"xvbitseti.d", int_loongarch_lasx_xvbitseti_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVBITREVI_B : LASX_I3_U<0b0111011100011000001>,
++                  LASX_2R_U3_DESC_BASE<"xvbitrevi.b", int_loongarch_lasx_xvbitrevi_b, LASX256BOpnd, LASX256BOpnd>;
++
++def XVBITREVI_H : LASX_I4_U<0b011101110001100001>,
++                  LASX_2R_U4_DESC_BASE<"xvbitrevi.h", int_loongarch_lasx_xvbitrevi_h, LASX256HOpnd, LASX256HOpnd>;
++
++def XVBITREVI_W : LASX_I5_U<0b01110111000110001>,
++                  LASX_2R_U5_DESC_BASE<"xvbitrevi.w", int_loongarch_lasx_xvbitrevi_w, LASX256WOpnd, LASX256WOpnd>;
++
++def XVBITREVI_D : LASX_I6_U<0b0111011100011001>,
++                  LASX_2R_U6_DESC_BASE<"xvbitrevi.d", int_loongarch_lasx_xvbitrevi_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSAT_B : LASX_I3_U<0b0111011100100100001>,
++              LASX_BIT_3_DESC_BASE<"xvsat.b", int_loongarch_lasx_xvsat_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSAT_H : LASX_I4_U<0b011101110010010001>,
++              LASX_BIT_4_DESC_BASE<"xvsat.h", int_loongarch_lasx_xvsat_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSAT_W : LASX_I5_U<0b01110111001001001>,
++              LASX_BIT_5_DESC_BASE<"xvsat.w", int_loongarch_lasx_xvsat_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSAT_D : LASX_I6_U<0b0111011100100101>,
++              LASX_BIT_6_DESC_BASE<"xvsat.d", int_loongarch_lasx_xvsat_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSAT_BU : LASX_I3_U<0b0111011100101000001>,
++               LASX_BIT_3_DESC_BASE<"xvsat.bu", int_loongarch_lasx_xvsat_bu, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSAT_HU : LASX_I4_U<0b011101110010100001>,
++               LASX_BIT_4_DESC_BASE<"xvsat.hu", int_loongarch_lasx_xvsat_hu, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSAT_WU : LASX_I5_U<0b01110111001010001>,
++               LASX_BIT_5_DESC_BASE<"xvsat.wu", int_loongarch_lasx_xvsat_wu, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSAT_DU : LASX_I6_U<0b0111011100101001>,
++               LASX_BIT_6_DESC_BASE<"xvsat.du", int_loongarch_lasx_xvsat_du, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSLLI_B : LASX_I3_U<0b0111011100101100001>,
++               LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvslli.b", int_loongarch_lasx_xvslli_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSLLI_H : LASX_I4_U<0b011101110010110001>,
++               LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvslli.h", int_loongarch_lasx_xvslli_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSLLI_W : LASX_I5_U<0b01110111001011001>,
++               LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvslli.w", int_loongarch_lasx_xvslli_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSLLI_D : LASX_I6_U<0b0111011100101101>,
++               LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvslli.d", int_loongarch_lasx_xvslli_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSRLI_B : LASX_I3_U<0b0111011100110000001>,
++               LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.b", int_loongarch_lasx_xvsrli_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSRLI_H : LASX_I4_U<0b011101110011000001>,
++               LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.h", int_loongarch_lasx_xvsrli_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSRLI_W : LASX_I5_U<0b01110111001100001>,
++               LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.w", int_loongarch_lasx_xvsrli_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSRLI_D : LASX_I6_U<0b0111011100110001>,
++               LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.d", int_loongarch_lasx_xvsrli_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSRAI_B : LASX_I3_U<0b0111011100110100001>,
++               LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.b", int_loongarch_lasx_xvsrai_b, uimm3, immZExt3, LASX256BOpnd>;
++
++def XVSRAI_H : LASX_I4_U<0b011101110011010001>,
++               LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.h", int_loongarch_lasx_xvsrai_h, uimm4, immZExt4, LASX256HOpnd>;
++
++def XVSRAI_W : LASX_I5_U<0b01110111001101001>,
++               LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.w", int_loongarch_lasx_xvsrai_w, uimm5, immZExt5, LASX256WOpnd>;
++
++def XVSRAI_D : LASX_I6_U<0b0111011100110101>,
++               LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.d", int_loongarch_lasx_xvsrai_d, uimm6, immZExt6, LASX256DOpnd>;
++
++
++def XVSRLNI_B_H : LASX_I4_U<0b011101110100000001>,
++                  LASX_U4_DESC_BASE<"xvsrlni.b.h", int_loongarch_lasx_xvsrlni_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSRLNI_H_W : LASX_I5_U<0b01110111010000001>,
++                  LASX_N4_U5_DESC_BASE<"xvsrlni.h.w", int_loongarch_lasx_xvsrlni_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSRLNI_W_D : LASX_I6_U<0b0111011101000001>,
++                  LASX_U6_DESC_BASE<"xvsrlni.w.d", int_loongarch_lasx_xvsrlni_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSRLNI_D_Q : LASX_I7_U<0b011101110100001>,
++                  LASX_D_DESC_BASE<"xvsrlni.d.q", int_loongarch_lasx_xvsrlni_d_q, LASX256DOpnd>;
++
++
++def XVSRLRNI_B_H : LASX_I4_U<0b011101110100010001>,
++                   LASX_U4_DESC_BASE<"xvsrlrni.b.h", int_loongarch_lasx_xvsrlrni_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSRLRNI_H_W : LASX_I5_U<0b01110111010001001>,
++                   LASX_N4_U5_DESC_BASE<"xvsrlrni.h.w", int_loongarch_lasx_xvsrlrni_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSRLRNI_W_D : LASX_I6_U<0b0111011101000101>,
++                   LASX_U6_DESC_BASE<"xvsrlrni.w.d", int_loongarch_lasx_xvsrlrni_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSRLRNI_D_Q : LASX_I7_U<0b011101110100011>,
++                   LASX_D_DESC_BASE<"xvsrlrni.d.q", int_loongarch_lasx_xvsrlrni_d_q, LASX256DOpnd>;
++
++
++def XVSSRLNI_B_H : LASX_I4_U<0b011101110100100001>,
++                   LASX_U4_DESC_BASE<"xvssrlni.b.h", int_loongarch_lasx_xvssrlni_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRLNI_H_W : LASX_I5_U<0b01110111010010001>,
++                   LASX_N4_U5_DESC_BASE<"xvssrlni.h.w", int_loongarch_lasx_xvssrlni_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRLNI_W_D : LASX_I6_U<0b0111011101001001>,
++                   LASX_U6_DESC_BASE<"xvssrlni.w.d", int_loongarch_lasx_xvssrlni_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRLNI_D_Q : LASX_I7_U<0b011101110100101>,
++                   LASX_D_DESC_BASE<"xvssrlni.d.q", int_loongarch_lasx_xvssrlni_d_q, LASX256DOpnd>;
++
++
++def XVSSRLNI_BU_H : LASX_I4_U<0b011101110100110001>,
++                    LASX_U4_DESC_BASE<"xvssrlni.bu.h", int_loongarch_lasx_xvssrlni_bu_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRLNI_HU_W : LASX_I5_U<0b01110111010011001>,
++                    LASX_N4_U5_DESC_BASE<"xvssrlni.hu.w", int_loongarch_lasx_xvssrlni_hu_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRLNI_WU_D : LASX_I6_U<0b0111011101001101>,
++                    LASX_U6_DESC_BASE<"xvssrlni.wu.d", int_loongarch_lasx_xvssrlni_wu_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRLNI_DU_Q : LASX_I7_U<0b011101110100111>,
++                    LASX_D_DESC_BASE<"xvssrlni.du.q", int_loongarch_lasx_xvssrlni_du_q, LASX256DOpnd>;
++
++
++def XVSSRLRNI_B_H : LASX_I4_U<0b011101110101000001>,
++                    LASX_2R_3R_U4_DESC_BASE<"xvssrlrni.b.h", int_loongarch_lasx_xvssrlrni_b_h, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSSRLRNI_H_W : LASX_I5_U<0b01110111010100001>,
++                    LASX_2R_3R_U5_DESC_BASE<"xvssrlrni.h.w", int_loongarch_lasx_xvssrlrni_h_w, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSSRLRNI_W_D : LASX_I6_U<0b0111011101010001>,
++                    LASX_2R_3R_U6_DESC_BASE<"xvssrlrni.w.d", int_loongarch_lasx_xvssrlrni_w_d, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSSRLRNI_D_Q : LASX_I7_U<0b011101110101001>,
++                    LASX_2R_3R_U7_DESC_BASE<"xvssrlrni.d.q", int_loongarch_lasx_xvssrlrni_d_q, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSSRLRNI_BU_H : LASX_I4_U<0b011101110101010001>,
++                     LASX_U4_DESC_BASE<"xvssrlrni.bu.h", int_loongarch_lasx_xvssrlrni_bu_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRLRNI_HU_W : LASX_I5_U<0b01110111010101001>,
++                     LASX_N4_U5_DESC_BASE<"xvssrlrni.hu.w", int_loongarch_lasx_xvssrlrni_hu_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRLRNI_WU_D : LASX_I6_U<0b0111011101010101>,
++                     LASX_U6_DESC_BASE<"xvssrlrni.wu.d", int_loongarch_lasx_xvssrlrni_wu_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRLRNI_DU_Q : LASX_I7_U<0b011101110101011>,
++                     LASX_D_DESC_BASE<"xvssrlrni.du.q", int_loongarch_lasx_xvssrlrni_du_q, LASX256DOpnd>;
++
++
++def XVSRANI_B_H : LASX_I4_U<0b011101110101100001>,
++                  LASX_2R_3R_U4_DESC_BASE<"xvsrani.b.h", int_loongarch_lasx_xvsrani_b_h, LASX256BOpnd, LASX256BOpnd>;
++
++def XVSRANI_H_W : LASX_I5_U<0b01110111010110001>,
++                  LASX_2R_3R_U5_DESC_BASE<"xvsrani.h.w", int_loongarch_lasx_xvsrani_h_w, LASX256HOpnd, LASX256HOpnd>;
++
++def XVSRANI_W_D : LASX_I6_U<0b0111011101011001>,
++                  LASX_2R_3R_U6_DESC_BASE<"xvsrani.w.d", int_loongarch_lasx_xvsrani_w_d, LASX256WOpnd, LASX256WOpnd>;
++
++def XVSRANI_D_Q : LASX_I7_U<0b011101110101101>,
++                  LASX_2R_3R_U7_DESC_BASE<"xvsrani.d.q", int_loongarch_lasx_xvsrani_d_q, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSRARNI_B_H : LASX_I4_U<0b011101110101110001>,
++                   LASX_U4_DESC_BASE<"xvsrarni.b.h", int_loongarch_lasx_xvsrarni_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSRARNI_H_W : LASX_I5_U<0b01110111010111001>,
++                   LASX_N4_U5_DESC_BASE<"xvsrarni.h.w", int_loongarch_lasx_xvsrarni_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSRARNI_W_D : LASX_I6_U<0b0111011101011101>,
++                   LASX_U6_DESC_BASE<"xvsrarni.w.d", int_loongarch_lasx_xvsrarni_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSRARNI_D_Q : LASX_I7_U<0b011101110101111>,
++                   LASX_D_DESC_BASE<"xvsrarni.d.q", int_loongarch_lasx_xvsrarni_d_q, LASX256DOpnd>;
++
++
++def XVSSRANI_B_H : LASX_I4_U<0b011101110110000001>,
++                   LASX_U4_DESC_BASE<"xvssrani.b.h", int_loongarch_lasx_xvssrani_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRANI_H_W : LASX_I5_U<0b01110111011000001>,
++                   LASX_N4_U5_DESC_BASE<"xvssrani.h.w", int_loongarch_lasx_xvssrani_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRANI_W_D : LASX_I6_U<0b0111011101100001>,
++                   LASX_U6_DESC_BASE<"xvssrani.w.d", int_loongarch_lasx_xvssrani_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRANI_D_Q : LASX_I7_U<0b011101110110001>,
++                   LASX_D_DESC_BASE<"xvssrani.d.q", int_loongarch_lasx_xvssrani_d_q, LASX256DOpnd>;
++
++
++def XVSSRANI_BU_H : LASX_I4_U<0b011101110110010001>,
++                    LASX_U4_DESC_BASE<"xvssrani.bu.h", int_loongarch_lasx_xvssrani_bu_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRANI_HU_W : LASX_I5_U<0b01110111011001001>,
++                    LASX_N4_U5_DESC_BASE<"xvssrani.hu.w", int_loongarch_lasx_xvssrani_hu_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRANI_WU_D : LASX_I6_U<0b0111011101100101>,
++                    LASX_U6_DESC_BASE<"xvssrani.wu.d", int_loongarch_lasx_xvssrani_wu_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRANI_DU_Q : LASX_I7_U<0b011101110110011>,
++                    LASX_D_DESC_BASE<"xvssrani.du.q", int_loongarch_lasx_xvssrani_du_q, LASX256DOpnd>;
++
++
++def XVSSRARNI_B_H : LASX_I4_U<0b011101110110100001>,
++                    LASX_U4_DESC_BASE<"xvssrarni.b.h", int_loongarch_lasx_xvssrarni_b_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRARNI_H_W : LASX_I5_U<0b01110111011010001>,
++                    LASX_N4_U5_DESC_BASE<"xvssrarni.h.w", int_loongarch_lasx_xvssrarni_h_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRARNI_W_D : LASX_I6_U<0b0111011101101001>,
++                    LASX_U6_DESC_BASE<"xvssrarni.w.d", int_loongarch_lasx_xvssrarni_w_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRARNI_D_Q : LASX_I7_U<0b011101110110101>,
++                    LASX_D_DESC_BASE<"xvssrarni.d.q", int_loongarch_lasx_xvssrarni_d_q, LASX256DOpnd>;
++
++
++def XVSSRARNI_BU_H : LASX_I4_U<0b011101110110110001>,
++                     LASX_U4_DESC_BASE<"xvssrarni.bu.h", int_loongarch_lasx_xvssrarni_bu_h, uimm4, immZExt4, LASX256BOpnd>;
++
++def XVSSRARNI_HU_W : LASX_I5_U<0b01110111011011001>,
++                     LASX_N4_U5_DESC_BASE<"xvssrarni.hu.w", int_loongarch_lasx_xvssrarni_hu_w, uimm5, immZExt5, LASX256HOpnd>;
++
++def XVSSRARNI_WU_D : LASX_I6_U<0b0111011101101101>,
++                     LASX_U6_DESC_BASE<"xvssrarni.wu.d", int_loongarch_lasx_xvssrarni_wu_d, uimm6, immZExt6, LASX256WOpnd>;
++
++def XVSSRARNI_DU_Q : LASX_I7_U<0b011101110110111>,
++                     LASX_D_DESC_BASE<"xvssrarni.du.q", int_loongarch_lasx_xvssrarni_du_q, LASX256DOpnd>;
++
++
++def XVEXTRINS_B : LASX_I8_U<0b01110111100011>,
++                  LASX_2R_3R_U8_DESC_BASE<"xvextrins.b", int_loongarch_lasx_xvextrins_b, LASX256BOpnd, LASX256BOpnd>;
++
++def XVEXTRINS_H : LASX_I8_U<0b01110111100010>,
++                  LASX_2R_3R_U8_DESC_BASE<"xvextrins.h", int_loongarch_lasx_xvextrins_h, LASX256HOpnd, LASX256HOpnd>;
++
++def XVEXTRINS_W : LASX_I8_U<0b01110111100001>,
++                  LASX_2R_3R_U8_DESC_BASE<"xvextrins.w", int_loongarch_lasx_xvextrins_w, LASX256WOpnd, LASX256WOpnd>;
++
++def XVEXTRINS_D : LASX_I8_U<0b01110111100000>,
++                  LASX_2R_3R_U8_DESC_BASE<"xvextrins.d", int_loongarch_lasx_xvextrins_d, LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVSHUF4I_B : LASX_I8_U<0b01110111100100>,
++                 LASX_I8_SHF_DESC_BASE<"xvshuf4i.b", int_loongarch_lasx_xvshuf4i_b, LASX256BOpnd>;
++
++def XVSHUF4I_H : LASX_I8_U<0b01110111100101>,
++                 LASX_I8_SHF_DESC_BASE<"xvshuf4i.h", int_loongarch_lasx_xvshuf4i_h, LASX256HOpnd>;
++
++def XVSHUF4I_W : LASX_I8_U<0b01110111100110>,
++                 LASX_I8_SHF_DESC_BASE<"xvshuf4i.w", int_loongarch_lasx_xvshuf4i_w, LASX256WOpnd>;
++
++def XVSHUF4I_D : LASX_I8_U<0b01110111100111>,
++                 LASX_I8_O4_SHF_DESC_BASE<"xvshuf4i.d", int_loongarch_lasx_xvshuf4i_d, LASX256DOpnd>;
++
++
++def XVBITSELI_B : LASX_I8_U<0b01110111110001>,
++                  LASX_2R_3R_U8_DESC_BASE<"xvbitseli.b", int_loongarch_lasx_xvbitseli_b, LASX256BOpnd, LASX256BOpnd>;
++
++
++def XVANDI_B : LASX_I8_U<0b01110111110100>,
++               LASX_2R_U8_DESC_BASE<"xvandi.b", int_loongarch_lasx_xvandi_b, LASX256BOpnd, LASX256BOpnd>;
++
++
++def XVORI_B : LASX_I8_U<0b01110111110101>,
++              LASX_2R_U8_DESC_BASE<"xvori.b", int_loongarch_lasx_xvori_b, LASX256BOpnd, LASX256BOpnd>;
++
++
++def XVXORI_B : LASX_I8_U<0b01110111110110>,
++               LASX_2R_U8_DESC_BASE<"xvxori.b", int_loongarch_lasx_xvxori_b, LASX256BOpnd, LASX256BOpnd>;
++
++
++def XVNORI_B : LASX_I8_U<0b01110111110111>,
++               LASX_2R_U8_DESC_BASE<"xvnori.b", int_loongarch_lasx_xvnori_b, LASX256BOpnd, LASX256BOpnd>;
++
++
++def XVLDI : LASX_1R_I13<0b01110111111000>,
++            LASX_I13_DESC_BASE<"xvldi", int_loongarch_lasx_xvldi, i32, simm13Op, LASX256DOpnd>;
++
++def XVLDI_B : LASX_1R_I13_I10<0b01110111111000000>,
++              LASX_I13_DESC_BASE_tmp<"xvldi", LASX256BOpnd>;
++
++def XVLDI_H : LASX_1R_I13_I10<0b01110111111000001>,
++              LASX_I13_DESC_BASE_tmp<"xvldi", LASX256HOpnd>;
++
++def XVLDI_W : LASX_1R_I13_I10<0b01110111111000010>,
++              LASX_I13_DESC_BASE_tmp<"xvldi", LASX256WOpnd>;
++
++def XVLDI_D : LASX_1R_I13_I10<0b01110111111000011>,
++              LASX_I13_DESC_BASE_tmp<"xvldi", LASX256DOpnd>;
++
++
++def XVPERMI_W : LASX_I8_U<0b01110111111001>,
++                LASX_2R_3R_U8_DESC_BASE<"xvpermi.w", int_loongarch_lasx_xvpermi_w, LASX256WOpnd, LASX256WOpnd>;
++
++def XVPERMI_D : LASX_I8_U<0b01110111111010>,
++                LASX_2R_U8_DESC_BASE<"xvpermi.d", int_loongarch_lasx_xvpermi_d, LASX256DOpnd, LASX256DOpnd>;
++
++def XVPERMI_Q : LASX_I8_U<0b01110111111011>,
++                LASX_2R_3R_U8_DESC_BASE<"xvpermi.q", int_loongarch_lasx_xvpermi_q, LASX256BOpnd, LASX256BOpnd>;
++
++
++//Pat
++
++class LASXBitconvertPat<ValueType DstVT, ValueType SrcVT,
++                       RegisterClass DstRC, list<Predicate> preds = [HasLASX]> :
++   LASXPat<(DstVT (bitconvert SrcVT:$src)),
++          (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>;
++
++// These are endian-independent because the element size doesnt change
++def : LASXBitconvertPat<v8i32,  v8f32,  LASX256W>;
++def : LASXBitconvertPat<v4i64,  v4f64,  LASX256D>;
++def : LASXBitconvertPat<v8f32,  v8i32,  LASX256W>;
++def : LASXBitconvertPat<v4f64,  v4i64,  LASX256D>;
++
++def : LASXBitconvertPat<v32i8, v16i16, LASX256B, [HasLASX]>;
++def : LASXBitconvertPat<v32i8, v8i32,  LASX256B, [HasLASX]>;
++def : LASXBitconvertPat<v32i8, v4i64,  LASX256B, [HasLASX]>;
++def : LASXBitconvertPat<v32i8, v8f32,  LASX256B, [HasLASX]>;
++def : LASXBitconvertPat<v32i8, v4f64,  LASX256B, [HasLASX]>;
++
++def : LASXBitconvertPat<v16i16, v32i8,  LASX256H, [HasLASX]>;
++def : LASXBitconvertPat<v16i16, v8i32,  LASX256H, [HasLASX]>;
++def : LASXBitconvertPat<v16i16, v4i64,  LASX256H, [HasLASX]>;
++def : LASXBitconvertPat<v16i16, v8f32,  LASX256H, [HasLASX]>;
++def : LASXBitconvertPat<v16i16, v4f64,  LASX256H, [HasLASX]>;
++
++def : LASXBitconvertPat<v8i32, v32i8,  LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8i32, v16i16, LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8i32, v4i64,  LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8i32, v4f64,  LASX256W, [HasLASX]>;
++
++def : LASXBitconvertPat<v4i64, v32i8,  LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4i64, v16i16, LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4i64, v8i32,  LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4i64, v8f32,  LASX256D, [HasLASX]>;
++
++def : LASXBitconvertPat<v8f32, v32i8,  LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8f32, v16i16, LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8f32, v4i64,  LASX256W, [HasLASX]>;
++def : LASXBitconvertPat<v8f32, v4f64,  LASX256W, [HasLASX]>;
++
++def : LASXBitconvertPat<v4f64, v32i8,  LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4f64, v16i16, LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4f64, v8i32,  LASX256D, [HasLASX]>;
++def : LASXBitconvertPat<v4f64, v8f32,  LASX256D, [HasLASX]>;
++
++class LASX_XINSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
++                             Operand ImmOp, ImmLeaf Imm, RegisterOperand ROXD,
++                             RegisterOperand ROFS> :
++      LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ImmOp:$n, ROFS:$fs),
++                [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, Imm:$n))]> {
++  bit usesCustomInserter = 1;
++  string Constraints = "$xd = $xd_in";
++}
++
++class XINSERT_H_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE<vector_insert, v16i16,
++                                                     uimm4_ptr, immZExt4Ptr,
++                                                     LASX256HOpnd, GPR32Opnd>;
++
++class XINSERT_H64_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE<vector_insert, v16i16,
++                                                     uimm4_ptr, immZExt4Ptr,
++                                                     LASX256HOpnd, GPR64Opnd>;
++
++def XINSERT_H_PSEUDO : XINSERT_H_PSEUDO_DESC;
++def XINSERT_H64_PSEUDO : XINSERT_H64_PSEUDO_DESC;
++
++class XINSERT_B_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE<vector_insert, v32i8,
++                                                     uimm5_ptr, immZExt5Ptr,
++                                                     LASX256BOpnd, GPR32Opnd>;
++def XINSERT_B_PSEUDO : XINSERT_B_PSEUDO_DESC;
++
++
++class LASX_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
++                           Operand ImmOp, ImmLeaf Imm, RegisterClass RCD,
++                           RegisterClass RCWS> :
++      LASXPseudo<(outs RCD:$xd), (ins RCWS:$xj, ImmOp:$n),
++                [(set RCD:$xd, (OpNode (VecTy RCWS:$xj), Imm:$n))]> {
++  bit usesCustomInserter = 1;
++}
++
++class XCOPY_FW_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE<vector_extract, v8f32,
++                                                 uimm3_ptr, immZExt3Ptr, FGR32,
++                                                 LASX256W>;
++class XCOPY_FD_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE<vector_extract, v4f64,
++                                                 uimm2_ptr, immZExt2Ptr, FGR64,
++                                                 LASX256D>;
++def XCOPY_FW_PSEUDO : XCOPY_FW_PSEUDO_DESC;
++def XCOPY_FD_PSEUDO : XCOPY_FD_PSEUDO_DESC;
++
++
++
++class LASX_XINSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
++                                  RegisterOperand ROXD, RegisterOperand ROFS,
++                                  RegisterOperand ROIdx> :
++      LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ROIdx:$n, ROFS:$fs),
++                 [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, ROIdx:$n))]> {
++  bit usesCustomInserter = 1;
++  string Constraints = "$xd = $xd_in";
++}
++
++
++class XINSERT_FW_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE<insertelt, v8f32,
++                                                     uimm3_ptr, immZExt3Ptr,
++                                                     LASX256WOpnd, FGR32Opnd>;
++def XINSERT_FW_PSEUDO : XINSERT_FW_PSEUDO_DESC;
++
++class XINSERT_FW_VIDX_PSEUDO_DESC :
++    LASX_XINSERT_VIDX_PSEUDO_BASE<insertelt, v8f32, LASX256WOpnd, FGR32Opnd, GPR32Opnd>;
++class XINSERT_FW_VIDX64_PSEUDO_DESC :
++    LASX_XINSERT_VIDX_PSEUDO_BASE<insertelt, v8f32, LASX256WOpnd, FGR32Opnd, GPR64Opnd>;
++
++def XINSERT_FW_VIDX_PSEUDO : XINSERT_FW_VIDX_PSEUDO_DESC;
++def XINSERT_FW_VIDX64_PSEUDO : XINSERT_FW_VIDX64_PSEUDO_DESC;
++
++class XINSERT_B_VIDX64_PSEUDO_DESC :
++    LASX_XINSERT_VIDX_PSEUDO_BASE<vector_insert, v32i8, LASX256BOpnd, GPR32Opnd, GPR64Opnd>;
++
++def XINSERT_B_VIDX64_PSEUDO : XINSERT_B_VIDX64_PSEUDO_DESC;
++
++
++class XINSERT_B_VIDX_PSEUDO_DESC :
++    LASX_XINSERT_VIDX_PSEUDO_BASE<vector_insert, v32i8, LASX256BOpnd, GPR32Opnd, GPR32Opnd>;
++
++def XINSERT_B_VIDX_PSEUDO : XINSERT_B_VIDX_PSEUDO_DESC;
++
++
++class XINSERTPostRA<RegisterClass RC, RegisterClass RD, RegisterClass RE> :
++  LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> {
++  let mayLoad = 1;
++  let mayStore = 1;
++}
++
++def XINSERT_B_VIDX_PSEUDO_POSTRA  : XINSERTPostRA<LASX256B, GPR32, GPR32>;
++def XINSERT_B_VIDX64_PSEUDO_POSTRA  : XINSERTPostRA<LASX256B, GPR64, GPR32>;
++def XINSERT_FW_VIDX_PSEUDO_POSTRA  : XINSERTPostRA<LASX256W, GPR32, GPR32>;
++def XINSERT_FW_VIDX64_PSEUDO_POSTRA  : XINSERTPostRA<LASX256W, GPR64, GPR32>;
++
++class XINSERT_FD_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE<insertelt, v4f64,
++                                                     uimm2_ptr, immZExt2Ptr,
++                                                     LASX256DOpnd, FGR64Opnd>;
++
++def XINSERT_FD_PSEUDO : XINSERT_FD_PSEUDO_DESC;
++
++class LASX_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
++                              RegisterClass RCWD, RegisterClass RCWS = RCWD> :
++      LASXPseudo<(outs RCWD:$xd), (ins RCWS:$fs),
++                [(set RCWD:$xd, (OpNode RCWS:$fs))]> {
++  let usesCustomInserter = 1;
++}
++
++class XFILL_FW_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE<v8f32, xvsplatf32, LASX256W,
++                                                   FGR32>;
++class XFILL_FD_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE<v4f64, xvsplatf64, LASX256D,
++                                                    FGR64>;
++def XFILL_FW_PSEUDO : XFILL_FW_PSEUDO_DESC;
++def XFILL_FD_PSEUDO : XFILL_FD_PSEUDO_DESC;
++
++class LASX_CONCAT_VECTORS_PSEUDO_BASE<ValueType Ty, ValueType SubTy,
++                                  RegisterOperand ROXD, RegisterOperand ROXJ,
++                                  RegisterOperand ROXK = ROXJ> :
++      LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xs, ROXK:$xt),
++                [(set ROXD:$xd, (Ty (concat_vectors (SubTy ROXJ:$xs), (SubTy ROXK:$xt))))]> {
++  bit usesCustomInserter = 1;
++}
++
++class CONCAT_VECTORS_B_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v32i8, v16i8, LASX256BOpnd, LSX128BOpnd>;
++class CONCAT_VECTORS_H_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v16i16, v8i16, LASX256HOpnd, LSX128HOpnd>;
++class CONCAT_VECTORS_W_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v8i32, v4i32, LASX256WOpnd, LSX128WOpnd>;
++class CONCAT_VECTORS_D_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v4i64, v2i64, LASX256DOpnd, LSX128DOpnd>;
++
++class CONCAT_VECTORS_FW_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v8f32, v4f32, LASX256WOpnd, LSX128WOpnd>;
++class CONCAT_VECTORS_FD_PSEUDO_DESC :
++    LASX_CONCAT_VECTORS_PSEUDO_BASE<v4f64, v2f64, LASX256DOpnd, LSX128DOpnd>;
++
++def CONCAT_VECTORS_B_PSEUDO : CONCAT_VECTORS_B_PSEUDO_DESC;
++def CONCAT_VECTORS_H_PSEUDO : CONCAT_VECTORS_H_PSEUDO_DESC;
++def CONCAT_VECTORS_W_PSEUDO : CONCAT_VECTORS_W_PSEUDO_DESC;
++def CONCAT_VECTORS_D_PSEUDO : CONCAT_VECTORS_D_PSEUDO_DESC;
++def CONCAT_VECTORS_FW_PSEUDO : CONCAT_VECTORS_FW_PSEUDO_DESC;
++def CONCAT_VECTORS_FD_PSEUDO : CONCAT_VECTORS_FD_PSEUDO_DESC;
++
++
++class LASX_COPY_GPR_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
++                           RegisterOperand ROXD, RegisterOperand ROFS,
++                           RegisterOperand ROIdx> :
++      LASXPseudo<(outs ROXD:$xd), (ins ROFS:$xj, ROIdx:$n),
++                [(set ROXD:$xd, (OpNode (VecTy ROFS:$xj), ROIdx:$n))]> {
++  bit usesCustomInserter = 1;
++}
++
++class XCOPY_FW_GPR_PSEUDO_DESC : LASX_COPY_GPR_PSEUDO_BASE<vector_extract, v8f32,
++                                                           FGR32Opnd, LASX256WOpnd, GPR64Opnd>;
++def XCOPY_FW_GPR_PSEUDO : XCOPY_FW_GPR_PSEUDO_DESC;
++
++
++let isCodeGenOnly = 1 in {
++
++def XVLD_H : LASX_I12_S<0b0010110010>,
++             LASX_LD<"xvld", load, v16i16, LASX256HOpnd, mem>;
++
++def XVLD_W : LASX_I12_S<0b0010110010>,
++             LASX_LD<"xvld", load, v8i32, LASX256WOpnd, mem>;
++
++def XVLD_D : LASX_I12_S<0b0010110010>,
++             LASX_LD<"xvld", load, v4i64, LASX256DOpnd, mem>;
++
++
++def XVST_H : LASX_I12_S<0b0010110011>,
++             LASX_ST<"xvst", store, v16i16, LASX256HOpnd, mem_simm12>;
++
++def XVST_W : LASX_I12_S<0b0010110011>,
++             LASX_ST<"xvst", store, v8i32, LASX256WOpnd, mem_simm12>;
++
++def XVST_D : LASX_I12_S<0b0010110011>,
++             LASX_ST<"xvst", store, v4i64, LASX256DOpnd, mem_simm12>;
++
++
++def XVREPLVE_W_N : LASX_3R_1GP<0b01110101001000110>,
++                   LASX_3R_VREPLVE_DESC_BASE_N<"xvreplve.w", LASX256WOpnd>;
++
++
++def XVANDI_B_N : LASX_I8_U<0b01110111110100>,
++                 LASX_BIT_U8_DESC_BASE<"xvandi.b", and, xvsplati8_uimm8, LASX256BOpnd>;
++
++
++def XVXORI_B_N : LASX_I8_U<0b01110111110110>,
++                 LASX_BIT_U8_DESC_BASE<"xvxori.b", xor, xvsplati8_uimm8, LASX256BOpnd>;
++
++
++def XVSRAI_B_N : LASX_I3_U<0b0111011100110100001>,
++                 LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrai.b", sra, xvsplati8_uimm3, LASX256BOpnd>;
++
++def XVSRAI_H_N : LASX_I4_U<0b011101110011010001>,
++                 LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrai.h", sra, xvsplati16_uimm4, LASX256HOpnd>;
++
++def XVSRAI_W_N : LASX_I5_U<0b01110111001101001>,
++                 LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrai.w", sra, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSRAI_D_N : LASX_I6_U<0b0111011100110101>,
++                 LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrai.d", sra, xvsplati64_uimm6,  LASX256DOpnd>;
++
++
++def XVSLLI_B_N : LASX_I3_U<0b0111011100101100001>,
++                 LASX_BIT_U3_VREPLVE_DESC_BASE<"xvslli.b", shl, xvsplati8_uimm3, LASX256BOpnd>;
++
++def XVSLLI_H_N : LASX_I4_U<0b011101110010110001>,
++                 LASX_BIT_U4_VREPLVE_DESC_BASE<"xvslli.h", shl, xvsplati16_uimm4, LASX256HOpnd>;
++
++def XVSLLI_W_N : LASX_I5_U<0b01110111001011001>,
++                 LASX_BIT_U5_VREPLVE_DESC_BASE<"xvslli.w", shl, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSLLI_D_N : LASX_I6_U<0b0111011100101101>,
++                 LASX_BIT_U6_VREPLVE_DESC_BASE<"xvslli.d", shl, xvsplati64_uimm6, LASX256DOpnd>;
++
++
++def XVSRLI_B_N : LASX_I3_U<0b0111011100110000001>,
++                 LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrli.b", srl, xvsplati8_uimm3, LASX256BOpnd>;
++
++def XVSRLI_H_N : LASX_I4_U<0b011101110011000001>,
++                 LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrli.h", srl, xvsplati16_uimm4, LASX256HOpnd>;
++
++def XVSRLI_W_N : LASX_I5_U<0b01110111001100001>,
++                 LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrli.w", srl, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSRLI_D_N : LASX_I6_U<0b0111011100110001>,
++                 LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrli.d", srl, xvsplati64_uimm6, LASX256DOpnd>;
++
++
++def XVMAXI_B_N : LASX_I5<0b01110110100100000>,
++                 LASX_I5_DESC_BASE<"xvmaxi.b", smax, xvsplati8_simm5, LASX256BOpnd>;
++
++def XVMAXI_H_N : LASX_I5<0b01110110100100001>,
++                 LASX_I5_DESC_BASE<"xvmaxi.h", smax, xvsplati16_simm5, LASX256HOpnd>;
++
++def XVMAXI_W_N : LASX_I5<0b01110110100100010>,
++                 LASX_I5_DESC_BASE<"xvmaxi.w", smax, xvsplati32_simm5, LASX256WOpnd>;
++
++def XVMAXI_D_N : LASX_I5<0b01110110100100011>,
++                 LASX_I5_DESC_BASE<"xvmaxi.d", smax, xvsplati64_simm5, LASX256DOpnd>;
++
++
++def XVMINI_B_N : LASX_I5<0b01110110100100100>,
++                 LASX_I5_DESC_BASE<"xvmini.b", smin, xvsplati8_simm5, LASX256BOpnd>;
++
++def XVMINI_H_N : LASX_I5<0b01110110100100101>,
++                 LASX_I5_DESC_BASE<"xvmini.h", smin, xvsplati16_simm5, LASX256HOpnd>;
++
++def XVMINI_W_N : LASX_I5<0b01110110100100110>,
++                 LASX_I5_DESC_BASE<"xvmini.w", smin, xvsplati32_simm5, LASX256WOpnd>;
++
++def XVMINI_D_N : LASX_I5<0b01110110100100111>,
++                 LASX_I5_DESC_BASE<"xvmini.d", smin, xvsplati64_simm5, LASX256DOpnd>;
++
++
++def XVMAXI_BU_N : LASX_I5_U<0b01110110100101000>,
++                  LASX_I5_U_DESC_BASE<"xvmaxi.bu", umax, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVMAXI_HU_N : LASX_I5_U<0b01110110100101001>,
++                  LASX_I5_U_DESC_BASE<"xvmaxi.hu", umax, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVMAXI_WU_N : LASX_I5_U<0b01110110100101010>,
++                  LASX_I5_U_DESC_BASE<"xvmaxi.wu", umax, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVMAXI_DU_N : LASX_I5_U<0b01110110100101011>,
++                  LASX_I5_U_DESC_BASE<"xvmaxi.du", umax, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVMINI_BU_N : LASX_I5_U<0b01110110100101100>,
++                  LASX_I5_U_DESC_BASE<"xvmini.bu", umin, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVMINI_HU_N : LASX_I5_U<0b01110110100101101>,
++                  LASX_I5_U_DESC_BASE<"xvmini.hu", umin, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVMINI_WU_N : LASX_I5_U<0b01110110100101110>,
++                  LASX_I5_U_DESC_BASE<"xvmini.wu", umin, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVMINI_DU_N : LASX_I5_U<0b01110110100101111>,
++                  LASX_I5_U_DESC_BASE<"xvmini.du", umin, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVSEQI_B_N : LASX_I5<0b01110110100000000>,
++                 LASX_I5_SETCC_DESC_BASE<"xvseqi.b", SETEQ, v32i8, xvsplati8_simm5, LASX256BOpnd>;
++
++def XVSEQI_H_N : LASX_I5<0b01110110100000001>,
++                 LASX_I5_SETCC_DESC_BASE<"xvseqi.h", SETEQ, v16i16, xvsplati16_simm5, LASX256HOpnd>;
++
++def XVSEQI_W_N : LASX_I5<0b01110110100000010>,
++                 LASX_I5_SETCC_DESC_BASE<"xvseqi.w", SETEQ, v8i32, xvsplati32_simm5, LASX256WOpnd>;
++
++def XVSEQI_D_N : LASX_I5<0b01110110100000011>,
++                 LASX_I5_SETCC_DESC_BASE<"xvseqi.d", SETEQ, v4i64, xvsplati64_simm5, LASX256DOpnd>;
++
++
++def XVSLEI_B_N : LASX_I5<0b01110110100000100>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslei.b", SETLE, v32i8, xvsplati8_simm5, LASX256BOpnd>;
++
++def XVSLEI_H_N : LASX_I5<0b01110110100000101>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslei.h", SETLE, v16i16, xvsplati16_simm5, LASX256HOpnd>;
++
++def XVSLEI_W_N : LASX_I5<0b01110110100000110>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslei.w", SETLE, v8i32, xvsplati32_simm5, LASX256WOpnd>;
++
++def XVSLEI_D_N : LASX_I5<0b01110110100000111>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslei.d", SETLE, v4i64, xvsplati64_simm5, LASX256DOpnd>;
++
++
++def XVSLEI_BU_N : LASX_I5_U<0b01110110100001000>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslei.bu", SETULE, v32i8, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVSLEI_HU_N : LASX_I5_U<0b01110110100001001>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslei.hu", SETULE, v16i16, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVSLEI_WU_N : LASX_I5_U<0b01110110100001010>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslei.wu", SETULE, v8i32, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSLEI_DU_N : LASX_I5_U<0b01110110100001011>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslei.du", SETULE, v4i64, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVSLTI_B_N : LASX_I5<0b01110110100001100>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslti.b", SETLT, v32i8, xvsplati8_simm5, LASX256BOpnd>;
++
++def XVSLTI_H_N : LASX_I5<0b01110110100001101>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslti.h", SETLT, v16i16, xvsplati16_simm5, LASX256HOpnd>;
++
++def XVSLTI_W_N : LASX_I5<0b01110110100001110>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslti.w", SETLT, v8i32, xvsplati32_simm5, LASX256WOpnd>;
++
++def XVSLTI_D_N : LASX_I5<0b01110110100001111>,
++                 LASX_I5_SETCC_DESC_BASE<"xvslti.d", SETLT, v4i64, xvsplati64_simm5, LASX256DOpnd>;
++
++
++def XVSLTI_BU_N : LASX_I5_U<0b01110110100010000>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslti.bu", SETULT, v32i8, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVSLTI_HU_N : LASX_I5_U<0b01110110100010001>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslti.hu", SETULT, v16i16, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVSLTI_WU_N : LASX_I5_U<0b01110110100010010>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslti.wu", SETULT, v8i32, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSLTI_DU_N : LASX_I5_U<0b01110110100010011>,
++                  LASX_I5_U_SETCC_DESC_BASE<"xvslti.du", SETULT, v4i64, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVADDI_BU_N : LASX_I5_U<0b01110110100010100>,
++                  LASX_I5_U_DESC_BASE<"xvaddi.bu", add, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVADDI_HU_N : LASX_I5_U<0b01110110100010101>,
++                  LASX_I5_U_DESC_BASE<"xvaddi.hu", add, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVADDI_WU_N : LASX_I5_U<0b01110110100010110>,
++                  LASX_I5_U_DESC_BASE<"xvaddi.wu", add, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVADDI_DU_N : LASX_I5_U<0b01110110100010111>,
++                  LASX_I5_U_DESC_BASE<"xvaddi.du", add, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVSUBI_BU_N : LASX_I5_U<0b01110110100011000>,
++                  LASX_I5_U_DESC_BASE<"xvsubi.bu", sub, xvsplati8_uimm5, LASX256BOpnd>;
++
++def XVSUBI_HU_N : LASX_I5_U<0b01110110100011001>,
++                  LASX_I5_U_DESC_BASE<"xvsubi.hu", sub, xvsplati16_uimm5, LASX256HOpnd>;
++
++def XVSUBI_WU_N : LASX_I5_U<0b01110110100011010>,
++                  LASX_I5_U_DESC_BASE<"xvsubi.wu", sub, xvsplati32_uimm5, LASX256WOpnd>;
++
++def XVSUBI_DU_N : LASX_I5_U<0b01110110100011011>,
++                  LASX_I5_U_DESC_BASE<"xvsubi.du", sub, xvsplati64_uimm5, LASX256DOpnd>;
++
++
++def XVPERMI_QH : LASX_I8_U<0b01110111111011>,
++                 LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256HOpnd, LASX256HOpnd>;
++
++def XVPERMI_QW : LASX_I8_U<0b01110111111011>,
++                 LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256WOpnd, LASX256WOpnd>;
++
++def XVPERMI_QD : LASX_I8_U<0b01110111111011>,
++                 LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256DOpnd, LASX256DOpnd>;
++
++
++def XVBITSELI_B_N : LASX_I8_U<0b01110111110001>,
++                    LASX_2R_3R_U8_SELECT<"xvbitseli.b", vselect, LASX256BOpnd, LASX256BOpnd>;
++
++}
++
++
++def : LASXPat<(v8f32 (load addrimm12:$addr)), (XVLD_W addrimm12:$addr)>;
++def : LASXPat<(v4f64 (load addrimm12:$addr)), (XVLD_D addrimm12:$addr)>;
++
++def XVST_FW : LASXPat<(store (v8f32 LASX256W:$xj), addrimm12:$addr),
++                      (XVST_W LASX256W:$xj, addrimm12:$addr)>;
++def XVST_FD : LASXPat<(store (v4f64 LASX256D:$xj), addrimm12:$addr),
++                      (XVST_D LASX256D:$xj, addrimm12:$addr)>;
++
++def XVNEG_FW : LASXPat<(fneg (v8f32 LASX256W:$xj)),
++                       (XVBITREVI_W LASX256W:$xj, 31)>;
++def XVNEG_FD : LASXPat<(fneg (v4f64 LASX256D:$xj)),
++                       (XVBITREVI_D LASX256D:$xj, 63)>;
++
++
++def : LASXPat<(v4i64 (LoongArchVABSD v4i64:$xj, v4i64:$xk, (i32 0))),
++              (v4i64 (XVABSD_D $xj, $xk))>;
++
++def : LASXPat<(v8i32 (LoongArchVABSD v8i32:$xj, v8i32:$xk, (i32 0))),
++              (v8i32 (XVABSD_W $xj, $xk))>;
++
++def : LASXPat<(v16i16 (LoongArchVABSD v16i16:$xj, v16i16:$xk, (i32 0))),
++              (v16i16 (XVABSD_H $xj, $xk))>;
++
++def : LASXPat<(v32i8 (LoongArchVABSD v32i8:$xj, v32i8:$xk, (i32 0))),
++              (v32i8 (XVABSD_B $xj, $xk))>;
++
++def : LASXPat<(v4i64 (LoongArchUVABSD v4i64:$xj, v4i64:$xk, (i32 0))),
++              (v4i64 (XVABSD_DU $xj, $xk))>;
++
++def : LASXPat<(v8i32 (LoongArchUVABSD v8i32:$xj, v8i32:$xk, (i32 0))),
++              (v8i32 (XVABSD_WU $xj, $xk))>;
++
++def : LASXPat<(v16i16 (LoongArchUVABSD v16i16:$xj, v16i16:$xk, (i32 0))),
++              (v16i16 (XVABSD_HU $xj, $xk))>;
++
++def : LASXPat<(v32i8 (LoongArchUVABSD v32i8:$xj, v32i8:$xk, (i32 0))),
++              (v32i8 (XVABSD_BU $xj, $xk))>;
++
++
++def : LASXPat<(or v32i8:$vj, (shl vsplat_imm_eq_1, v32i8:$vk)),
++              (XVBITSET_B v32i8:$vj, v32i8:$vk)>;
++def : LASXPat<(or v16i16:$vj, (shl vsplat_imm_eq_1, v16i16:$vk)),
++              (XVBITSET_H v16i16:$vj, v16i16:$vk)>;
++def : LASXPat<(or v8i32:$vj, (shl vsplat_imm_eq_1, v8i32:$vk)),
++              (XVBITSET_W v8i32:$vj, v8i32:$vk)>;
++def : LASXPat<(or v4i64:$vj, (shl vsplat_imm_eq_1, v4i64:$vk)),
++              (XVBITSET_D v4i64:$vj, v4i64:$vk)>;
++
++def : LASXPat<(xor v32i8:$vj, (shl xvsplat_imm_eq_1, v32i8:$vk)),
++              (XVBITREV_B v32i8:$vj, v32i8:$vk)>;
++def : LASXPat<(xor v16i16:$vj, (shl xvsplat_imm_eq_1, v16i16:$vk)),
++              (XVBITREV_H v16i16:$vj, v16i16:$vk)>;
++def : LASXPat<(xor v8i32:$vj, (shl xvsplat_imm_eq_1, v8i32:$vk)),
++              (XVBITREV_W v8i32:$vj, v8i32:$vk)>;
++def : LASXPat<(xor v4i64:$vj, (shl (v4i64 xvsplati64_imm_eq_1), v4i64:$vk)),
++              (XVBITREV_D v4i64:$vj, v4i64:$vk)>;
++
++def : LASXPat<(and v32i8:$vj, (xor (shl vsplat_imm_eq_1, v32i8:$vk), immAllOnesV)),
++              (XVBITCLR_B v32i8:$vj, v32i8:$vk)>;
++def : LASXPat<(and v16i16:$vj, (xor (shl vsplat_imm_eq_1, v16i16:$vk), immAllOnesV)),
++              (XVBITCLR_H v16i16:$vj, v16i16:$vk)>;
++def : LASXPat<(and v8i32:$vj, (xor (shl vsplat_imm_eq_1, v8i32:$vk), immAllOnesV)),
++              (XVBITCLR_W v8i32:$vj, v8i32:$vk)>;
++def : LASXPat<(and v4i64:$vj, (xor (shl (v4i64 vsplati64_imm_eq_1), v4i64:$vk), (bitconvert (v8i32 immAllOnesV)))),
++              (XVBITCLR_D v4i64:$vj, v4i64:$vk)>;
++
++def xvsplati64_imm_eq_63 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{
++  APInt Imm;
++  SDNode *BV = N->getOperand(0).getNode();
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
++}]>;
++
++def xvsplati8imm7 :   PatFrag<(ops node:$wt),
++                              (and node:$wt, (xvsplati8 immi32Cst7))>;
++def xvsplati16imm15 : PatFrag<(ops node:$wt),
++                              (and node:$wt, (xvsplati16 immi32Cst15))>;
++def xvsplati32imm31 : PatFrag<(ops node:$wt),
++                              (and node:$wt, (xvsplati32 immi32Cst31))>;
++def xvsplati64imm63 : PatFrag<(ops node:$wt),
++                              (and node:$wt, xvsplati64_imm_eq_63)>;
++
++
++class LASXShiftPat<SDNode Node, ValueType VT, LASXInst Insn, dag Vec> :
++  LASXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))),
++         (VT (Insn VT:$vs, VT:$vt))>;
++
++class LASXBitPat<SDNode Node, ValueType VT, LASXInst Insn, PatFrag Frag> :
++  LASXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))),
++         (VT (Insn VT:$vs, VT:$vt))>;
++
++multiclass LASXShiftPats<SDNode Node, string Insn> {
++  def : LASXShiftPat<Node, v32i8, !cast<LASXInst>(Insn#_B),
++                    (xvsplati8 immi32Cst7)>;
++  def : LASXShiftPat<Node, v16i16, !cast<LASXInst>(Insn#_H),
++                    (xvsplati16 immi32Cst15)>;
++  def : LASXShiftPat<Node, v8i32, !cast<LASXInst>(Insn#_W),
++                    (xvsplati32 immi32Cst31)>;
++  def : LASXPat<(v4i64 (Node v4i64:$vs, (v4i64 (and v4i64:$vt,
++                                                    xvsplati64_imm_eq_63)))),
++               (v4i64 (!cast<LASXInst>(Insn#_D) v4i64:$vs, v4i64:$vt))>;
++}
++
++multiclass LASXBitPats<SDNode Node, string Insn> {
++  def : LASXBitPat<Node, v32i8, !cast<LASXInst>(Insn#_B), xvsplati8imm7>;
++  def : LASXBitPat<Node, v16i16, !cast<LASXInst>(Insn#_H), xvsplati16imm15>;
++  def : LASXBitPat<Node, v8i32, !cast<LASXInst>(Insn#_W), xvsplati32imm31>;
++  def : LASXPat<(Node v4i64:$vs, (shl (v4i64 xvsplati64_imm_eq_1),
++                                     (xvsplati64imm63 v4i64:$vt))),
++               (v4i64 (!cast<LASXInst>(Insn#_D) v4i64:$vs, v4i64:$vt))>;
++}
++
++defm : LASXShiftPats<shl, "XVSLL">;
++defm : LASXShiftPats<srl, "XVSRL">;
++defm : LASXShiftPats<sra, "XVSRA">;
++defm : LASXBitPats<xor, "XVBITREV">;
++defm : LASXBitPats<or, "XVBITSET">;
++
++def : LASXPat<(and v32i8:$vs, (xor (shl xvsplat_imm_eq_1,
++                                       (xvsplati8imm7 v32i8:$vt)),
++                                       immAllOnesV)),
++              (v32i8 (XVBITCLR_B v32i8:$vs, v32i8:$vt))>;
++def : LASXPat<(and v16i16:$vs, (xor (shl xvsplat_imm_eq_1,
++                                       (xvsplati16imm15 v16i16:$vt)),
++                                       immAllOnesV)),
++              (v16i16 (XVBITCLR_H v16i16:$vs, v16i16:$vt))>;
++def : LASXPat<(and v8i32:$vs, (xor (shl xvsplat_imm_eq_1,
++                                       (xvsplati32imm31 v8i32:$vt)),
++                                       immAllOnesV)),
++              (v8i32 (XVBITCLR_W v8i32:$vs, v8i32:$vt))>;
++def : LASXPat<(and v4i64:$vs, (xor (shl (v4i64 xvsplati64_imm_eq_1),
++                                       (xvsplati64imm63 v4i64:$vt)),
++                                       (bitconvert (v8i32 immAllOnesV)))),
++              (v4i64 (XVBITCLR_D v4i64:$vs, v4i64:$vt))>;
++
++
++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1),
++              (f32 fpimm1),(f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v8f32:$v),
++              (XVFRECIP_S v8f32:$v)>;
++
++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), v4f64:$v),
++              (XVFRECIP_D v4f64:$v)>;
++
++def : LASXPat<(fdiv (v8f32 fpimm1), v8f32:$v),
++              (XVFRECIP_S v8f32:$v)>;
++
++def : LASXPat<(fdiv (v4f64 fpimm1), v4f64:$v),
++              (XVFRECIP_D v4f64:$v)>;
++
++
++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1),
++              (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v8f32:$v)),
++              (XVFRSQRT_S v8f32:$v)>;
++
++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), (fsqrt v4f64:$v)),
++              (XVFRSQRT_D v4f64:$v)>;
++
++def : LASXPat<(fdiv (v8f32 fpimm1), (fsqrt v8f32:$v)),
++              (XVFRSQRT_S v8f32:$v)>;
++
++def : LASXPat<(fdiv (v4f64 fpimm1), (fsqrt v4f64:$v)),
++              (XVFRSQRT_D v4f64:$v)>;
++
++
++def : LASXPat <(extract_subvector v4f64:$vec, (i32 0)),
++               (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v8f32:$vec, (i32 0)),
++               (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v4i64:$vec, (i32 0)),
++               (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v8i32:$vec, (i32 0)),
++               (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v16i16:$vec, (i32 0)),
++               (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v32i8:$vec, (i32 0)),
++               (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>;
++
++
++
++def : LASXPat <(extract_subvector v4f64:$vec, (i64 0)),
++               (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v8f32:$vec, (i64 0)),
++               (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v4i64:$vec, (i64 0)),
++               (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v8i32:$vec, (i64 0)),
++               (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v16i16:$vec, (i64 0)),
++               (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>;
++
++def : LASXPat <(extract_subvector v32i8:$vec, (i64 0)),
++               (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>;
++
++
++def : LASXPat <(extract_subvector v4i64:$vec, (i32 2)),
++               (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v8i32:$vec, (i32 4)),
++               (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v16i16:$vec, (i32 8)),
++               (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v32i8:$vec, (i32 16)),
++               (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>;
++
++
++def : LASXPat <(extract_subvector v4i64:$vec, (i64 2)),
++               (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v8i32:$vec, (i64 4)),
++               (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v16i16:$vec, (i64 8)),
++               (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>;
++
++def : LASXPat <(extract_subvector v32i8:$vec, (i64 16)),
++               (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>;
++
++
++def : LASXPat<(abs v4i64:$v),
++              (XVMAX_D v4i64:$v, (XVNEG_D v4i64:$v))>;
++
++def : LASXPat<(abs v8i32:$v),
++              (XVMAX_W v8i32:$v, (XVNEG_W v8i32:$v))>;
++
++def : LASXPat<(abs v16i16:$v),
++              (XVMAX_H v16i16:$v, (XVNEG_H v16i16:$v))>;
++
++def : LASXPat<(abs v32i8:$v),
++              (XVMAX_B v32i8:$v, (XVNEG_B v32i8:$v))>;
++
++
++def : LASXPat<(sub (v32i8 immAllZerosV), v32i8:$v),
++              (XVNEG_B v32i8:$v)>;
++
++def : LASXPat<(sub (v16i16 immAllZerosV), v16i16:$v),
++              (XVNEG_H v16i16:$v)>;
++
++def : LASXPat<(sub (v8i32 immAllZerosV), v8i32:$v),
++              (XVNEG_W v8i32:$v)>;
++
++def : LASXPat<(sub (v4i64 immAllZerosV), v4i64:$v),
++              (XVNEG_D v4i64:$v)>;
++
++
++
++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 0)),
++              (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 0)),
++              (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 0)),
++              (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 0)),
++              (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>;
++
++
++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 0)),
++              (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 0)),
++              (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 0)),
++              (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 0)),
++              (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>;
++
++
++def : LASXPat<(insert_subvector
++                (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 0)),
++              (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i32 0)),
++              (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 0)),
++              (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 0)),
++              (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 0)),
++              (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 0)),
++              (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 0)),
++              (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>;
++
++def : LASXPat<(insert_subvector
++                (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 0)),
++              (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>;
++
++
++def : LASXPat<(insert_subvector
++                (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 2)),
++              (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)),
++                (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
++                 LSX128D:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector (v8i32 immAllZerosV),
++                (v4i32 LSX128W:$src), (i32 4)),
++              (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)),
++                (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
++                LSX128W:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector
++                (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 8)),
++              (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)),
++                (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
++                LSX128H:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector
++               (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 16)),
++             (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)),
++               (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
++               LSX128B:$src, sub_128)), (i32 32))>;
++
++
++def : LASXPat<(insert_subvector
++                (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 2)),
++              (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)),
++                (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
++                LSX128D:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector
++                (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 4)),
++              (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)),
++                (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
++                LSX128W:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector
++                (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 8)),
++              (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)),
++                (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
++                LSX128H:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector
++                (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 16)),
++              (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)),
++                (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
++                LSX128B:$src, sub_128)), (i32 32))>;
++
++
++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 2)),
++              (XVPERMI_QD (v4i64 (IMPLICIT_DEF)),
++                (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
++                LSX128D:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 4)),
++              (XVPERMI_QW (v8i32 (IMPLICIT_DEF)),
++                (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
++                LSX128W:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 8)),
++              (XVPERMI_QH (v16i16 (IMPLICIT_DEF)),
++                (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
++                LSX128H:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 16)),
++              (XVPERMI_Q (v32i8 (IMPLICIT_DEF)),
++                (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
++                LSX128B:$src, sub_128)), (i32 32))>;
++
++
++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 2)),
++              (XVPERMI_QD (v4i64 (IMPLICIT_DEF)),
++                (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
++                LSX128D:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 4)),
++              (XVPERMI_QW (v8i32 (IMPLICIT_DEF)),
++                (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
++                LSX128W:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 8)),
++              (XVPERMI_QH (v16i16 (IMPLICIT_DEF)),
++                (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
++                LSX128H:$src, sub_128)), (i32 32))>;
++
++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 16)),
++              (XVPERMI_Q (v32i8 (IMPLICIT_DEF)),
++                (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
++                LSX128B:$src, sub_128)), (i32 32))>;
++
++
++def : LASXPat<(sra
++              (v32i8 (add
++                      (v32i8 (add LASX256B:$a, LASX256B:$b)),
++                      (v32i8 (srl
++                             (v32i8 (add LASX256B:$a, LASX256B:$b)),
++                             (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7))
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1)
++            ))),
++            (XVAVG_B  (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>;
++
++def : LASXPat<(sra
++              (v16i16 (add
++                      (v16i16 (add LASX256H:$a, LASX256H:$b)),
++                      (v16i16 (srl
++                             (v16i16 (add LASX256H:$a, LASX256H:$b)),
++                             (v16i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15),
++                                                  (i32 15),(i32 15),(i32 15),(i32 15),
++                                                  (i32 15),(i32 15),(i32 15),(i32 15),
++                                                  (i32 15),(i32 15),(i32 15),(i32 15))
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1)
++            ))),
++            (XVAVG_H  (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>;
++
++def : LASXPat<(sra
++              (v8i32 (add
++                      (v8i32 (add LASX256W:$a, LASX256W:$b)),
++                      (v8i32 (srl
++                             (v8i32 (add LASX256W:$a, LASX256W:$b)),
++                             (v8i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31),
++                                                  (i32 31),(i32 31),(i32 31),(i32 31))
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1)
++            ))),
++            (XVAVG_W  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>;
++
++def : LASXPat<(sra
++              (v4i64 (add
++                      (v4i64 (add LASX256D:$a, LASX256D:$b)),
++                      (v4i64 (srl
++                                (v4i64 (add LASX256D:$a, LASX256D:$b)),
++                                (v4i64 (build_vector (i64 63),(i64 63),(i64 63),(i64 63)))
++                            )
++                      )
++                      )
++                  ),
++              (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))),
++              (XVAVG_D  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>;
++
++
++
++def : LASXPat<(srl
++               (v32i8 (add LASX256B:$a, LASX256B:$b)),
++               (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++               ),
++             (XVAVG_BU  (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>;
++
++def : LASXPat<(srl
++               (v16i16 (add LASX256H:$a, LASX256H:$b)),
++               (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++                 ),
++              (XVAVG_HU  (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>;
++
++def : LASXPat<(srl
++               (v8i32 (add LASX256W:$a, LASX256W:$b)),
++               (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++                ),
++              (XVAVG_WU  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>;
++
++def : LASXPat<(srl
++               (v4i64 (add LASX256D:$a, LASX256D:$b)),
++               (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1))
++                           )
++                ),
++              (XVAVG_DU  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>;
++
++
++
++def : LASXPat<(sra
++              (v32i8 (add
++                      (v32i8 (add (v32i8 (
++                      build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1),
++                                   (i32 1),(i32 1),(i32 1),(i32 1)
++                      )),
++                      (v32i8 (add LASX256B:$a, LASX256B:$b))
++                      )),
++                      (v32i8 (srl
++                             (v32i8 ( add (v32i8(
++                               build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1),
++                                            (i32 1),(i32 1),(i32 1),(i32 1)
++                             )),
++                             (v32i8 (add LASX256B:$a, LASX256B:$b))
++                             )),
++                             (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7),
++                                                  (i32 7),(i32 7),(i32 7),(i32 7))
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1)
++            ))),
++            (XVAVGR_B  (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>;
++
++
++def : LASXPat<(sra
++              (v16i16 (add
++                      (v16i16 (add (v16i16 (
++                         build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                      (i32 1),(i32 1),(i32 1),(i32 1),
++                                      (i32 1),(i32 1),(i32 1),(i32 1),
++                                      (i32 1),(i32 1),(i32 1),(i32 1)
++                      )),
++                      (v16i16 (add LASX256H:$a, LASX256H:$b))
++                      )),
++                      (v16i16 (srl
++                         (v16i16 (add (v16i16 (
++                            build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                         (i32 1),(i32 1),(i32 1),(i32 1),
++                                         (i32 1),(i32 1),(i32 1),(i32 1),
++                                         (i32 1),(i32 1),(i32 1),(i32 1)
++                         )),
++                         (v16i16 (add LASX256H:$a, LASX256H:$b))
++                         )),
++                             (v16i16 (build_vector
++                                  (i32 15),(i32 15),(i32 15),(i32 15),
++                                  (i32 15),(i32 15),(i32 15),(i32 15),
++                                  (i32 15),(i32 15),(i32 15),(i32 15),
++                                  (i32 15),(i32 15),(i32 15),(i32 15))
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                  (i32 1),(i32 1),(i32 1),(i32 1),
++                                  (i32 1),(i32 1),(i32 1),(i32 1),
++                                  (i32 1),(i32 1),(i32 1),(i32 1)
++            ))),
++            (XVAVGR_H  (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>;
++
++
++def : LASXPat<(sra
++              (v8i32 (add
++                        (v8i32 (add (v8i32 (
++                           build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                        (i32 1),(i32 1),(i32 1),(i32 1)
++                        )),
++                        (v8i32 (add LASX256W:$a, LASX256W:$b))
++                        )),
++                      (v8i32 (srl
++                          (v8i32 (add (v8i32 (
++                             build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                          (i32 1),(i32 1),(i32 1),(i32 1)
++                          )),
++                          (v8i32 (add LASX256W:$a, LASX256W:$b))
++                          )),
++                             (v8i32 (build_vector
++                                     (i32 31),(i32 31),(i32 31),(i32 31),
++                                     (i32 31),(i32 31),(i32 31),(i32 31)
++                                     )
++                             )
++                         )
++                   )
++                   )
++              ),
++            (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                 (i32 1),(i32 1),(i32 1),(i32 1)))),
++            (XVAVGR_W  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>;
++
++def : LASXPat<(sra
++              (v4i64 (add
++                         (v4i64 (add (v4i64 (
++                            build_vector (i64 1),(i64 1),(i64 1),(i64 1)
++                         )),
++                         (v4i64 (add LASX256D:$a, LASX256D:$b))
++                         )),
++                      (v4i64 (srl
++                         (v4i64 (add (v4i64 (
++                            build_vector (i64 1),(i64 1),(i64 1),(i64 1)
++                         )),
++                         (v4i64 (add LASX256D:$a, LASX256D:$b))
++                         )),
++                                (v4i64 (build_vector
++                                    (i64 63),(i64 63),(i64 63),(i64 63)))
++                                )
++                          )
++                          )
++                  ),
++              (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))),
++              (XVAVGR_D  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>;
++
++
++
++def : LASXPat<(srl
++                (v32i8 (add (v32i8 (
++                build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1),
++                             (i32 1),(i32 1),(i32 1),(i32 1)
++                )),
++                (v32i8 (add LASX256B:$a, LASX256B:$b))
++                )),
++               (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++                ),
++              (XVAVGR_BU  (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>;
++
++def : LASXPat<(srl
++               (v16i16 (add (v16i16 (
++                  build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++               )),
++               (v16i16 (add LASX256H:$a, LASX256H:$b))
++               )),
++               (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1),
++                                     (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++               ),
++             (XVAVGR_HU  (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>;
++
++def : LASXPat<(srl
++               (v8i32 (add (v8i32 (
++                  build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++               )),
++               (v8i32 (add LASX256W:$a, LASX256W:$b))
++               )),
++               (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++               ),
++             (XVAVGR_WU  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>;
++
++def : LASXPat<(srl
++               (v4i64 (add (v4i64 (
++                  build_vector (i64 1),(i64 1),(i64 1),(i64 1)
++               )),
++               (v4i64 (add LASX256D:$a, LASX256D:$b))
++               )),
++               (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1))
++                           )
++                ),
++             (XVAVGR_DU  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>;
++
++
++def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b),
++              (XVMUH_D LASX256D:$a, LASX256D:$b)>;
++
++def : LASXPat<(mulhs LASX256W:$a, LASX256W:$b),
++              (XVMUH_W LASX256W:$a, LASX256W:$b)>;
++
++def : LASXPat<(mulhs LASX256H:$a, LASX256H:$b),
++              (XVMUH_H LASX256H:$a, LASX256H:$b)>;
++
++def : LASXPat<(mulhs LASX256B:$a, LASX256B:$b),
++              (XVMUH_B LASX256B:$a, LASX256B:$b)>;
++
++
++def : LASXPat<(mulhu LASX256D:$a, LASX256D:$b),
++              (XVMUH_DU LASX256D:$a, LASX256D:$b)>;
++
++def : LASXPat<(mulhu LASX256W:$a, LASX256W:$b),
++              (XVMUH_WU LASX256W:$a, LASX256W:$b)>;
++
++def : LASXPat<(mulhu LASX256H:$a, LASX256H:$b),
++              (XVMUH_HU LASX256H:$a, LASX256H:$b)>;
++
++def : LASXPat<(mulhu LASX256B:$a, LASX256B:$b),
++              (XVMUH_BU LASX256B:$a, LASX256B:$b)>;
++
++
++def : LASXPat<(LoongArchINSVE  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3),
++              (XVINSVE0_W  LASX256W:$a, LASX256W:$b, uimm3:$ui3)>;
++
++def : LASXPat<(LoongArchINSVE  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2),
++              (XVINSVE0_D  LASX256D:$a, LASX256D:$b, uimm2:$ui2)>;
++
++
++def : LASXPat<(LoongArchXVPICKVE  (v8i32 (bitconvert (v32i8 (build_vector
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0)
++                              )))), (v8i32 LASX256W:$b), uimm3:$ui3),
++              (XVPICKVE_W  (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>;
++
++def : LASXPat<(LoongArchXVPICKVE  (v4i64 (bitconvert (v32i8 (build_vector
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0)
++                              )))), (v4i64 LASX256D:$b), uimm2:$ui2),
++             (XVPICKVE_D  (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>;
++
++
++def : LASXPat<(LoongArchXVPICKVE  (v8i32 (build_vector
++                              (i32 0),(i32 0),(i32 0),(i32 0),
++                              (i32 0),(i32 0),(i32 0),(i32 0)
++                              )), (v8i32 LASX256W:$b), uimm3:$ui3),
++              (XVPICKVE_W  (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>;
++
++def : LASXPat<(LoongArchXVPICKVE  (v4i64 (build_vector
++                              (i64 0),(i64 0),(i64 0),(i64 0)
++                              )), (v4i64 LASX256D:$b), uimm2:$ui2),
++              (XVPICKVE_D  (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>;
++
++
++def : LASXPat<(LoongArchXVPICKVE  (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3),
++              (XVPICKVE_W  LASX256W:$a, LASX256W:$b, uimm3:$ui3)>;
++
++def : LASXPat<(LoongArchXVPICKVE  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2),
++              (XVPICKVE_D  LASX256D:$a, LASX256D:$b, uimm2:$ui2)>;
++
++
++def : LASXPat<(LoongArchXVSHUF4I  (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm8_32:$ui8),
++              (XVSHUF4I_D  LASX256D:$a, LASX256D:$b, uimm8_32:$ui8)>;
++
++def : LASXPat<(LoongArchXVPERMI  (v4i64 LASX256D:$a), uimm8_32:$ui8),
++              (XVPERMI_D  LASX256D:$a, uimm8_32:$ui8)>;
++
++
++
++
++//===----------------------------------------------------------------------===//
++// Intrinsics
++//===----------------------------------------------------------------------===//
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_COR_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_COR_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CUN_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CUN_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CUNE_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CUNE_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CUEQ_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CUEQ_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CEQ_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CEQ_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CNE_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CNE_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CLT_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CLT_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CULT_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CULT_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CLE_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CLE_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFCMP_CULE_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFCMP_CULE_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvseq_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSEQ_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvseq_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSEQ_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvseq_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSEQ_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvseq_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSEQ_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsle_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSLE_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSLE_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSLE_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSLE_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsle_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSLE_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSLE_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSLE_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsle_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSLE_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvslt_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSLT_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSLT_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSLT_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSLT_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvslt_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSLT_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSLT_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSLT_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvslt_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSLT_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVADD_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVADD_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVADD_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVADD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSUB_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSUB_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSUB_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSUB_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmax_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMAX_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMAX_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMAX_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMAX_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmin_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMIN_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMIN_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMIN_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMIN_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmin_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMIN_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMIN_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMIN_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmin_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMIN_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmul_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMUL_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmul_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMUL_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmul_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMUL_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmul_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMUL_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvdiv_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVDIV_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVDIV_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVDIV_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVDIV_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsll_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSLL_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsll_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSLL_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsll_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSLL_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsll_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSLL_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsrl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSRL_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsrl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSRL_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsrl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSRL_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsrl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSRL_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsra_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSRA_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsra_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSRA_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsra_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSRA_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsra_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSRA_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfadd_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFADD_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfadd_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFADD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfsub_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFSUB_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfsub_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFSUB_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfmul_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFMUL_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfmul_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFMUL_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfdiv_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)),
++              (XVFDIV_S LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvfdiv_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)),
++              (XVFDIV_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfsqrt_s (v8f32 LASX256W:$xj)),
++              (XVFSQRT_S LASX256W:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvfsqrt_d (v4f64 LASX256D:$xj)),
++              (XVFSQRT_D LASX256D:$xj)>;
++
++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_w (v8i32 LASX256W:$xj))),
++              (XVFFINT_S_W (v8i32 LASX256W:$xj))>;
++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_wu (v8i32 LASX256W:$xj))),
++              (XVFFINT_S_WU (v8i32 LASX256W:$xj))>;
++
++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_l (v4i64 LASX256D:$xj))),
++              (XVFFINT_D_L (v4i64 LASX256D:$xj))>;
++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_lu (v4i64 LASX256D:$xj))),
++              (XVFFINT_D_LU (v4i64 LASX256D:$xj))>;
++
++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_b GPR32Opnd:$rj),
++              (XVREPLGR2VR_B GPR32Opnd:$rj)>;
++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_h GPR32Opnd:$rj),
++              (XVREPLGR2VR_H GPR32Opnd:$rj)>;
++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_w GPR32Opnd:$rj),
++              (XVREPLGR2VR_W GPR32Opnd:$rj)>;
++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_d GPR64Opnd:$rj),
++              (XVREPLGR2VR_D GPR64Opnd:$rj)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)),
++              (XVPICKVE2GR_W LASX256W:$xj, uimm3:$ui3)>;
++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)),
++              (XVPICKVE2GR_D LASX256D:$xj, uimm2:$ui2)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_wu (v8i32 LASX256W:$xj), (immZExt3:$ui3)),
++              (XVPICKVE2GR_WU LASX256W:$xj, uimm3:$ui3)>;
++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_du (v4i64 LASX256D:$xj), (immZExt2:$ui2)),
++              (XVPICKVE2GR_DU LASX256D:$xj, uimm2:$ui2)>;
++
++def : LASXPat<(int_loongarch_lasx_xvreplve0_d (v4i64 LASX256D:$xj)),
++              (XVREPLVE0_D (v4i64 LASX256D:$xj))>;
++
++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_w (v8i32 LASX256W:$xj), GPR32Opnd:$rj, (immZExt3:$ui3)),
++              (XVINSGR2VR_W LASX256W:$xj, GPR32Opnd:$rj, uimm3:$ui3)>;
++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_d (v4i64 LASX256D:$xj), GPR64Opnd:$rj, (immZExt2:$ui2)),
++              (XVINSGR2VR_D LASX256D:$xj, GPR64Opnd:$rj, uimm2:$ui2)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpickve_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)),
++              (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>;
++def : LASXPat<(int_loongarch_lasx_xvpickve_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)),
++              (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>;
++
++def : LASXPat<(int_loongarch_lasx_xvdiv_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVDIV_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVDIV_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVDIV_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvdiv_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVDIV_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmod_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMOD_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMOD_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMOD_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMOD_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMOD_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMOD_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMOD_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMOD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmax_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMAX_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMAX_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMAX_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmax_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMAX_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvfrint_s (v8f32 LASX256W:$xj)),
++              (XVFRINT_S LASX256W:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvfrint_d (v4f64 LASX256D:$xj)),
++              (XVFRINT_D LASX256D:$xj)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpackod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVPACKOD_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVPACKOD_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVPACKOD_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVPACKOD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpackev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVPACKEV_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVPACKEV_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVPACKEV_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpackev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVPACKEV_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvilvh_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVILVH_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvh_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVILVH_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvh_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVILVH_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvh_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVILVH_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvilvl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVILVL_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVILVL_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVILVL_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvilvl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVILVL_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpickev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVPICKEV_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVPICKEV_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVPICKEV_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVPICKEV_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpickod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVPICKOD_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVPICKOD_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVPICKOD_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvpickod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVPICKOD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSADD_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSADD_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSADD_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSADD_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvssub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSSUB_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSSUB_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSSUB_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSSUB_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvsadd_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSADD_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSADD_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSADD_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvsadd_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSADD_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvssub_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVSSUB_BU LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVSSUB_HU LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVSSUB_WU LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvssub_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVSSUB_DU LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmadd_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMADD_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmadd_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMADD_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmadd_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMADD_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmadd_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMADD_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvmsub_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVMSUB_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmsub_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVMSUB_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmsub_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVMSUB_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvmsub_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVMSUB_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_wu_s (v8f32 LASX256W:$xj))),
++              (XVFTINTRZ_WU_S (v8f32 LASX256W:$xj))>;
++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_lu_d (v4f64 LASX256D:$xj))),
++              (XVFTINTRZ_LU_D (v4f64 LASX256D:$xj))>;
++
++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_w_s (v8f32 LASX256W:$xj))),
++              (XVFTINTRZ_W_S (v8f32 LASX256W:$xj))>;
++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_l_d (v4f64 LASX256D:$xj))),
++              (XVFTINTRZ_L_D (v4f64 LASX256D:$xj))>;
++
++def : LASXPat<(int_loongarch_lasx_xvbitclr_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)),
++              (XVBITCLR_B LASX256B:$xj, LASX256B:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvbitclr_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)),
++              (XVBITCLR_H LASX256H:$xj, LASX256H:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvbitclr_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)),
++              (XVBITCLR_W LASX256W:$xj, LASX256W:$xk)>;
++def : LASXPat<(int_loongarch_lasx_xvbitclr_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)),
++              (XVBITCLR_D LASX256D:$xj, LASX256D:$xk)>;
++
++def : LASXPat<(int_loongarch_lasx_xvclz_b (v32i8 LASX256B:$xj)),
++              (XVCLZ_B LASX256B:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvclz_h (v16i16 LASX256H:$xj)),
++              (XVCLZ_H LASX256H:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvclz_w (v8i32 LASX256W:$xj)),
++              (XVCLZ_W LASX256W:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvclz_d (v4i64 LASX256D:$xj)),
++              (XVCLZ_D LASX256D:$xj)>;
++
++def : LASXPat<(int_loongarch_lasx_xvpcnt_b (v32i8 LASX256B:$xj)),
++              (XVPCNT_B LASX256B:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvpcnt_h (v16i16 LASX256H:$xj)),
++              (XVPCNT_H LASX256H:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvpcnt_w (v8i32 LASX256W:$xj)),
++              (XVPCNT_W LASX256W:$xj)>;
++def : LASXPat<(int_loongarch_lasx_xvpcnt_d (v4i64 LASX256D:$xj)),
++              (XVPCNT_D LASX256D:$xj)>;
++
++
++def : LASXPat<(v32i8 (load (add iPTR:$xj, iPTR:$xk))),
++              (XVLDX PtrRC:$xj, PtrRC:$xk)>;
++
++def : LASXPat<(store (v32i8 LASX256B:$xd), (add iPTR:$xj, iPTR:$xk)),
++              (XVSTX LASX256B:$xd, PtrRC:$xj, PtrRC:$xk)>;
++
++
++def : LASXPat<(v4i64 (sext_invec (v8i32 LASX256W:$xj))),
++              (VEXT2XV_D_W LASX256W:$xj)>;
++def : LASXPat<(v8i32 (sext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_W_H LASX256H:$xj)>;
++def : LASXPat<(v16i16 (sext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_H_B LASX256B:$xj)>;
++
++
++def : LASXPat<(v4i64 (zext_invec (v8i32 LASX256W:$xj))),
++              (VEXT2XV_DU_WU LASX256W:$xj)>;
++def : LASXPat<(v8i32 (zext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_WU_HU LASX256H:$xj)>;
++def : LASXPat<(v16i16 (zext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_HU_BU LASX256B:$xj)>;
++
++
++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_D_H LASX256H:$xj)>;
++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_D_B LASX256B:$xj)>;
++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_W_B LASX256B:$xj)>;
++
++
++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_DU_HU LASX256H:$xj)>;
++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_DU_BU LASX256B:$xj)>;
++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_WU_BU LASX256B:$xj)>;
++
++
++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_D_H LASX256H:$xj)>;
++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_D_B LASX256B:$xj)>;
++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_W_B LASX256B:$xj)>;
++
++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))),
++              (VEXT2XV_DU_HU LASX256H:$xj)>;
++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_DU_BU LASX256B:$xj)>;
++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))),
++              (VEXT2XV_WU_BU LASX256B:$xj)>;
++
++
++def : LASXPat<(v16i16 (sext (v16i8 LSX128B:$vj))),
++              (VEXT2XV_H_B
++                (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>;
++
++def : LASXPat<(v8i32 (sext (v8i16 LSX128H:$vj))),
++              (VEXT2XV_W_H
++                (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>;
++
++def : LASXPat<(v4i64 (sext (v4i32 LSX128W:$vj))),
++              (VEXT2XV_D_W
++                (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>;
++
++def : LASXPat<(v16i16 (zext (v16i8 LSX128B:$vj))),
++              (VEXT2XV_HU_BU
++                (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>;
++
++def : LASXPat<(v8i32 (zext (v8i16 LSX128H:$vj))),
++              (VEXT2XV_WU_HU
++                (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>;
++
++def : LASXPat<(v4i64 (zext (v4i32 LSX128W:$vj))),
++              (VEXT2XV_DU_WU
++                (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>;
++
++
++def : LASXPat<(xor
++                (v16i16 LASX256H:$xj), (xvsplati16 imm_mask)
++              ),
++              (XNOR_V_H_PSEUDO  (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xj))>;
++
++def : LASXPat<(xor
++                (v8i32 LASX256W:$xj), (xvsplati32 imm_mask)
++              ),
++              (XNOR_V_W_PSEUDO  (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xj))>;
++
++def : LASXPat<(xor
++               (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64)
++              ),
++              (XNOR_V_D_PSEUDO  (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xj))>;
++
++
++def : LASXPat<(and
++                (v32i8 (xor (v32i8 LASX256B:$xj), (xvsplati8 imm_mask))),
++                (v32i8 LASX256B:$xk)
++              ),
++              (XVANDN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>;
++
++def : LASXPat<(and
++                (v16i16 (xor (v16i16 LASX256H:$xj), (xvsplati16 imm_mask))),
++                (v16i16 LASX256H:$xk)
++              ),
++              (XVANDN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>;
++
++def : LASXPat<(and
++                  (v8i32 (xor (v8i32 LASX256W:$xj), (xvsplati32 imm_mask))),
++                  (v8i32 LASX256W:$xk)
++              ),
++              (XVANDN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>;
++
++def : LASXPat<(and
++                  (v4i64 (xor (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64))),
++                  (v4i64 LASX256D:$xk)
++              ),
++              (XVANDN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>;
++
++
++def : LASXPat<(or
++                (v32i8 LASX256B:$xj),
++                (v32i8 (xor (v32i8 LASX256B:$xk), (xvsplati8 imm_mask)))
++              ),
++              (XVORN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>;
++
++def : LASXPat<(or
++                (v16i16 LASX256H:$xj),
++                (v16i16 (xor (v16i16 LASX256H:$xk), (xvsplati16 imm_mask)))
++              ),
++              (XVORN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>;
++
++def : LASXPat<(or
++                  (v8i32 LASX256W:$xj),
++                  (v8i32 (xor (v8i32 LASX256W:$xk), (xvsplati32 imm_mask)))
++              ),
++              (XVORN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>;
++
++def : LASXPat<(or
++                  (v4i64 LASX256D:$xj),
++                  (v4i64 (xor (v4i64 LASX256D:$xk), (xvsplati64 imm_mask_64)))
++              ),
++              (XVORN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>;
++
++
++def : LASXPat<(add (v4i64 (abs LASX256D:$a)), (v4i64 (abs LASX256D:$b))),
++              (XVADDA_D (v4i64 LASX256D:$a),(v4i64 LASX256D:$b))>;
++
++def : LASXPat<(add (v8i32 (abs LASX256W:$a)), (v8i32 (abs LASX256W:$b))),
++              (XVADDA_W (v8i32 LASX256W:$a),(v8i32 LASX256W:$b))>;
++
++def : LASXPat<(add (v16i16 (abs LASX256H:$a)), (v16i16 (abs LASX256H:$b))),
++              (XVADDA_H (v16i16 LASX256H:$a),(v16i16 LASX256H:$b))>;
++
++def : LASXPat<(add (v32i8 (abs LASX256B:$a)), (v32i8 (abs LASX256B:$b))),
++              (XVADDA_B (v32i8 LASX256B:$a),(v32i8 LASX256B:$b))>;
++
++
++def : LASXPat<(and v32i8:$xj, (xor (shl xvsplat_imm_eq_1, v32i8:$xk),
++                                  (xvsplati8 imm_mask))),
++              (XVBITCLR_B v32i8:$xj, v32i8:$xk)>;
++
++def : LASXPat<(and v16i16:$xj, (xor (shl xvsplat_imm_eq_1, v16i16:$xk),
++                                  (xvsplati16 imm_mask))),
++              (XVBITCLR_H v16i16:$xj, v16i16:$xk)>;
++
++def : LASXPat<(and v8i32:$xj, (xor (shl xvsplat_imm_eq_1, v8i32:$xk),
++                                  (xvsplati32 imm_mask))),
++              (XVBITCLR_W v8i32:$xj, v8i32:$xk)>;
++
++def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk),
++                                   (xvsplati64 imm_mask_64))),
++              (XVBITCLR_D v4i64:$xj, v4i64:$xk)>;
++
++
++def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst),
++                                (v8i16 LSX128H:$src), (i64 0)),
++              (XVPERMI_QH (v16i16 LASX256H:$dst),
++                          (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
++                                                 LSX128H:$src, sub_128)),
++                                                 (i32 48))>;
++
++def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst),
++                                (v4i32 LSX128W:$src), (i64 0)),
++              (XVPERMI_QW (v8i32 LASX256W:$dst),
++                          (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
++                                                 LSX128W:$src, sub_128)),
++                                                 (i32 48))>;
++
++def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst),
++                                (v2i64 LSX128D:$src), (i64 0)),
++              (XVPERMI_QD (v4i64 LASX256D:$dst),
++                          (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
++                                                 LSX128D:$src, sub_128)),
++                                                 (i32 48))>;
+diff --git a/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td
+new file mode 100644
+index 00000000..50df4d72
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td
+@@ -0,0 +1,449 @@
++//===- LoongArchLSXInstrFormats.td - LoongArch LSX Instruction Formats ---*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++class LSXInst : InstLA<(outs), (ins), "", [], FrmOther>,
++                EXT_LSX {
++}
++
++class LSXCBranch : LSXInst {
++}
++
++class LSXSpecial : LSXInst {
++}
++
++class LSXPseudo<dag outs, dag ins, list<dag> pattern>:
++  LoongArchPseudo<outs, ins, pattern> {
++  let Predicates = [HasLSX];
++}
++
++class LSX_3R<bits<17> op>: LSXInst {
++  bits<5> vk;
++  bits<5> vj;
++  bits<5> vd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = vk;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_3R_1GP<bits<17> op>: LSXInst {
++  bits<5> rk;
++  bits<5> vj;
++  bits<5> vd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I5<bits<17> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<5> si5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = si5;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I5_U<bits<17> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<5> ui5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = ui5;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_2R<bits<22> op>: LSXInst {
++  bits<5> vj;
++  bits<5> vd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_2R_1GP<bits<22> op>: LSXInst {
++  bits<5> rj;
++  bits<5> vd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I1_U<bits<21> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I2_U<bits<20> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I3_U<bits<19> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I4_U<bits<18> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I6_U<bits<16> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<6> ui6;
++
++  let Inst{31-16} = op;
++  let Inst{15-10} = ui6;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I1_R_U<bits<21> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I2_R_U<bits<20> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I3_R_U<bits<19> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I4_R_U<bits<18> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_ELM_COPY_B<bits<18> op>: LSXInst {
++  bits<5> rd;
++  bits<5> vj;
++  bits<4> ui4;
++
++  let Inst{31-14} = op;
++  let Inst{13-10} = ui4;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = rd;
++}
++
++class LSX_ELM_COPY_H<bits<19> op>: LSXInst {
++  bits<5> rd;
++  bits<5> vj;
++  bits<3> ui3;
++
++  let Inst{31-13} = op;
++  let Inst{12-10} = ui3;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = rd;
++}
++
++class LSX_ELM_COPY_W<bits<20> op>: LSXInst {
++  bits<5> rd;
++  bits<5> vj;
++  bits<2> ui2;
++
++  let Inst{31-12} = op;
++  let Inst{11-10} = ui2;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = rd;
++}
++
++class LSX_ELM_COPY_D<bits<21> op>: LSXInst {
++  bits<5> rd;
++  bits<5> vj;
++  bits<1> ui1;
++
++  let Inst{31-11} = op;
++  let Inst{10} = ui1;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = rd;
++}
++
++class LSX_I8_U<bits<14> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<8> ui8;
++
++  let Inst{31-18} = op;
++  let Inst{17-10} = ui8;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I7_U<bits<15> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<7> ui7;
++
++  let Inst{31-17} = op;
++  let Inst{16-10} = ui7;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I12_S<bits<10> op>: LSXInst {
++  bits<5> vd;
++//  bits<5> rj;
++//  bits<12> si12;
++  bits<17> addr;
++
++  let Inst{31-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI12_S<bits<10> op>: LSXInst {
++  bits<5> vd;
++  bits<17> addr;
++
++  let Inst{31-22} = op;
++  let Inst{21-10} = addr{11-0};
++  let Inst{9-5} = addr{16-12};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI11_S<bits<11> op>: LSXInst {
++  bits<5> vd;
++  bits<16> addr;
++
++  let Inst{31-21} = op;
++  let Inst{20-10} = addr{10-0};
++  let Inst{9-5} = addr{15-11};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI10_S<bits<12> op>: LSXInst {
++  bits<5> vd;
++  bits<15> addr;
++
++  let Inst{31-20} = op;
++  let Inst{19-10} = addr{9-0};
++  let Inst{9-5} = addr{14-10};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI9_S<bits<13> op>: LSXInst {
++  bits<5> vd;
++  bits<14> addr;
++
++  let Inst{31-19} = op;
++  let Inst{18-10} = addr{8-0};
++  let Inst{9-5} = addr{13-9};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SET<bits<22> op>: LSXInst {
++  bits<5> vj;
++  bits<3> cd;
++
++  let Inst{31-10} = op;
++  let Inst{9-5} = vj;
++  let Inst{4-3} = 0b00;
++  let Inst{2-0} = cd;
++}
++
++class LSX_VR4MUL<bits<12> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<5> vk;
++  bits<5> va;
++
++  let Inst{31-20} = op;
++  let Inst{19-15} = va;
++  let Inst{14-10} = vk;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_VFCMP<bits<12> op>: LSXInst {
++  bits<5> vd;
++  bits<5> vj;
++  bits<5> vk;
++  bits<5> cond;
++
++  let Inst{31-20} = op;
++  let Inst{19-15} = cond;
++  let Inst{14-10} = vk;
++  let Inst{9-5} = vj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_Addr_SI8_idx1<bits<13> op>: LSXInst {
++  bits<5> vd;
++  bits<13> addr;
++  bits<1> idx;
++
++  let Inst{31-19} = op;
++  let Inst{18-11} = addr{7-0};
++  let Inst{10} = idx;
++  let Inst{9-5} = addr{12-8};
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI8_idx1<bits<13> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<1> idx;
++
++  let Inst{31-19} = op;
++  let Inst{18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI8_idx2<bits<12> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<2> idx;
++
++  let Inst{31-20} = op;
++  let Inst{19-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI8_idx3<bits<11> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<3> idx;
++
++  let Inst{31-21} = op;
++  let Inst{20-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_SI8_idx4<bits<10> op>: LSXInst {
++  bits<5> vd;
++  bits<5> rj;
++  bits<8> si8;
++  bits<4> idx;
++
++  let Inst{31-22} = op;
++  let Inst{21-18} = idx;
++  let Inst{17-10} = si8;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_3R_2GP<bits<17> op>: LSXInst {
++  bits<5> rk;
++  bits<5> rj;
++  bits<5> vd;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = rk;
++  let Inst{9-5} = rj;
++  let Inst{4-0} = vd;
++}
++
++class LSX_I5_mode_U<bits<17> op>: LSXInst {
++  bits<5> vd;
++  bits<5> mode;
++  bits<5> ui5;
++
++  let Inst{31-15} = op;
++  let Inst{14-10} = ui5;
++  let Inst{9-5} = mode;
++  let Inst{4-0} = vd;
++}
++
++class LSX_1R_I13<bits<14> op>: LSXInst {
++  bits<13> i13;
++  bits<5> vd;
++
++  let Inst{31-18} = op;
++  let Inst{17-5} = i13;
++  let Inst{4-0} = vd;
++}
++
++class LSX_1R_I13_I10<bits<17> op>: LSXInst {
++  bits<10> i10;
++  bits<5> vd;
++
++  let Inst{31-15} = op;
++  let Inst{14-5} = i10;
++  let Inst{4-0} = vd;
++}
++
++
++
++
++
++
++
+diff --git a/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+new file mode 100644
+index 00000000..69fdc3a8
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+@@ -0,0 +1,5904 @@
++//===- LoongArchLSXInstrInfo.td - LSX instructions -*- tablegen ------------*-=//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file describes LoongArch LSX instructions.
++//
++//===----------------------------------------------------------------------===//
++
++def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
++def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
++                                      SDTCisInt<1>,
++                                      SDTCisSameAs<1, 2>,
++                                      SDTCisVT<3, OtherVT>]>;
++def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
++                                       SDTCisFP<1>,
++                                       SDTCisSameAs<1, 2>,
++                                       SDTCisVT<3, OtherVT>]>;
++def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
++                                    SDTCisInt<1>, SDTCisVec<1>,
++                                    SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
++def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
++                                   SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
++def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
++                                   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
++def SDTVABSD : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
++                                    SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
++
++def SDT_VBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
++def LoongArchVBROADCAST : SDNode<"LoongArchISD::VBROADCAST", SDT_VBROADCAST>;
++
++def LoongArchVAllNonZero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>;
++def LoongArchVAnyNonZero : SDNode<"LoongArchISD::VANY_NONZERO", SDT_LoongArchVecCond>;
++def LoongArchVAllZero : SDNode<"LoongArchISD::VALL_ZERO", SDT_LoongArchVecCond>;
++def LoongArchVAnyZero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>;
++def LoongArchVNOR : SDNode<"LoongArchISD::VNOR", SDTIntBinOp,
++                      [SDNPCommutative, SDNPAssociative]>;
++def LoongArchVSHF : SDNode<"LoongArchISD::VSHF", SDT_VSHF>;
++def LoongArchSHF : SDNode<"LoongArchISD::SHF", SDT_SHF>;
++def LoongArchVPACKEV : SDNode<"LoongArchISD::VPACKEV", SDT_ILV>;
++def LoongArchVPACKOD : SDNode<"LoongArchISD::VPACKOD", SDT_ILV>;
++def LoongArchVILVH  : SDNode<"LoongArchISD::VILVH",  SDT_ILV>;
++def LoongArchVILVL  : SDNode<"LoongArchISD::VILVL",  SDT_ILV>;
++def LoongArchVPICKEV : SDNode<"LoongArchISD::VPICKEV", SDT_ILV>;
++def LoongArchVPICKOD : SDNode<"LoongArchISD::VPICKOD", SDT_ILV>;
++def LoongArchVABSD : SDNode<"LoongArchISD::VABSD", SDTVABSD>;
++def LoongArchUVABSD : SDNode<"LoongArchISD::UVABSD", SDTVABSD>;
++
++def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
++def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
++
++def LoongArchVExtractSExt : SDNode<"LoongArchISD::VEXTRACT_SEXT_ELT",
++    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
++def LoongArchVExtractZExt : SDNode<"LoongArchISD::VEXTRACT_ZEXT_ELT",
++    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
++
++def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
++def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
++def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
++def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
++def immZExt5Ptr : ImmLeaf<iPTR, [{return isUInt<5>(Imm);}]>;
++def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
++def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
++def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
++def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
++def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
++def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
++def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
++def immSExt12_l : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
++def immSExt11Ptr : ImmLeaf<iPTR, [{return isInt<11>(Imm);}]>;
++
++def immSExt11_1 : ImmLeaf<i32, [{return isInt<11>(Imm<<1);}]>;
++def immSExt10Ptr : ImmLeaf<iPTR, [{return isInt<10>(Imm);}]>;
++def immSExt10_2 : ImmLeaf<i32, [{return isInt<10>(Imm<<2);}]>;
++def immSExt9Ptr : ImmLeaf<iPTR, [{return isInt<9>(Imm);}]>;
++def immSExt9_3 : ImmLeaf<i32, [{return isInt<9>(Imm<<3);}]>;
++def immSExt8 : ImmLeaf<i32, [{return isInt<8>(Imm);}]>;
++def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
++def immSExt8_1 : ImmLeaf<i32, [{return isInt<8>(Imm<<1);}]>;
++def immSExt8_2 : ImmLeaf<i32, [{return isInt<8>(Imm<<2);}]>;
++def immSExt8_3 : ImmLeaf<i32, [{return isInt<8>(Imm<<3);}]>;
++
++def addrimm10 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10", [frameindex]>;
++def addrimm10lsl2 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl2",
++                                   [frameindex]>;
++def addrimm9lsl3 : ComplexPattern<iPTR, 2, "selectIntAddrSImm9Lsl3",
++                                   [frameindex]>;
++def addrimm11lsl1 : ComplexPattern<iPTR, 2, "selectIntAddrSImm11Lsl1",
++                                   [frameindex]>;
++
++
++class SimmLslAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
++                                  int Shift = 0> : AsmOperandClass {
++  let Name = "Simm" # Bits # "_Lsl" # Shift;
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">"; 
++  let SuperClasses = Supers;
++  let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift;
++}
++
++def Simm11Lsl1AsmOperand
++    : SimmLslAsmOperandClass<11, [], 1>;
++
++def immSExt11_1_O : Operand<i32> {
++    let EncoderMethod = "getSImm11Lsl1Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>";
++    let ParserMatchClass = Simm11Lsl1AsmOperand;
++}
++
++def Simm10Lsl2AsmOperand
++    : SimmLslAsmOperandClass<10, [], 2>;
++
++def immSExt10_2_O : Operand<i32> {
++    let EncoderMethod = "getSImm10Lsl2Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>";
++    let ParserMatchClass = Simm10Lsl2AsmOperand;
++}
++
++def Simm9Lsl3AsmOperand
++    : SimmLslAsmOperandClass<9, [], 3>;
++
++def immSExt9_3_O : Operand<i32> {
++    let EncoderMethod = "getSImm9Lsl3Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>";
++    let ParserMatchClass = Simm9Lsl3AsmOperand;
++}
++
++def Simm8Lsl3AsmOperand
++    : SimmLslAsmOperandClass<8, [], 3>;
++
++def immSExt8_3_O : Operand<i32> {
++    let EncoderMethod = "getSImm8Lsl3Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>";
++    let ParserMatchClass = Simm8Lsl3AsmOperand;
++}
++
++def Simm8Lsl2AsmOperand
++    : SimmLslAsmOperandClass<8, [], 2>;
++
++def immSExt8_2_O : Operand<i32> {
++    let EncoderMethod = "getSImm8Lsl2Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>";
++    let ParserMatchClass = Simm8Lsl2AsmOperand;
++}
++
++def Simm8Lsl1AsmOperand
++    : SimmLslAsmOperandClass<8, [], 1>;
++
++def immSExt8_1_O : Operand<i32> {
++    let EncoderMethod = "getSImm8Lsl1Encoding";
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>";
++    let ParserMatchClass = Simm8Lsl1AsmOperand;
++}
++
++
++class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
++                                  int Offset = 0> : AsmOperandClass {
++  let Name = "ConstantSImm" # Bits # "_" # Offset;
++  let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">";
++  let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "SImm" # Bits # "_" # Offset;
++}
++
++class ConstantUImmRangeAsmOperandClass<int Bottom, int Top,
++                                       list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "ConstantUImmRange" # Bottom # "_" # Top;
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImmRange" # Bottom # "_" # Top;
++}
++
++def SImm16RelaxedAsmOperandClass
++    : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> {
++  let Name = "SImm16_Relaxed";
++  let PredicateMethod = "isAnyImm<16>";
++  let DiagnosticType = "SImm16_Relaxed";
++}
++
++def ConstantSImm11Lsl1AsmOperandClass : AsmOperandClass {
++  let Name = "SImm11Lsl1";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<11, 1>";
++  let SuperClasses = [SImm12Operand];
++  let DiagnosticType = "SImm11_Lsl1";
++}
++
++def ConstantSImm9Lsl3AsmOperandClass : AsmOperandClass {
++  let Name = "SImm9Lsl3";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<9, 3>";
++  let SuperClasses = [SImm12Operand];
++  let DiagnosticType = "SImm9_Lsl3";
++}
++
++def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm10Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<10, 2>";
++  let SuperClasses = [SImm12Operand];
++  let DiagnosticType = "SImm10_Lsl2";
++}
++def ConstantSImm11AsmOperandClass
++    : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>;
++def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass {
++  let Name = "SImm10Lsl1";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<10, 1>";
++  let SuperClasses = [ConstantSImm11AsmOperandClass];
++  let DiagnosticType = "SImm10_Lsl1";
++}
++def ConstantUImm10AsmOperandClass
++    : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>;
++def ConstantSImm10AsmOperandClass
++    : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>;
++def ConstantSImm9AsmOperandClass
++    : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>;
++def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm7Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<7, 2>";
++  let SuperClasses = [ConstantSImm9AsmOperandClass];
++  let DiagnosticType = "SImm7_Lsl2";
++}
++def ConstantUImm8AsmOperandClass
++    : ConstantUImmAsmOperandClass<8, [ConstantSImm7Lsl2AsmOperandClass]>;
++def ConstantUImm7Sub1AsmOperandClass
++    : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> {
++  // Specify the names since the -1 offset causes invalid identifiers otherwise.
++  let Name = "UImm7_N1";
++  let DiagnosticType = "UImm7_N1";
++}
++def ConstantUImm7AsmOperandClass
++    : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>;
++def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "UImm6Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledUImm<6, 2>";
++  let SuperClasses = [ConstantUImm7AsmOperandClass];
++  let DiagnosticType = "UImm6_Lsl2";
++}
++def ConstantUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>;
++def ConstantSImm6AsmOperandClass
++    : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>;
++def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "UImm5Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledUImm<5, 2>";
++  let SuperClasses = [ConstantSImm6AsmOperandClass];
++  let DiagnosticType = "UImm5_Lsl2";
++}
++def ConstantUImm5_Range2_64AsmOperandClass
++    : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>;
++def ConstantUImm5Plus33AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass],
++                                  33>;
++def ConstantUImm5ReportUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> {
++  let Name = "ConstantUImm5_0_Report_UImm6";
++  let DiagnosticType = "UImm5_0_Report_UImm6";
++}
++def ConstantUImm5Plus32AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>;
++def ConstantUImm5Plus32NormalizeAsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> {
++  let Name = "ConstantUImm5_32_Norm";
++  // We must also subtract 32 when we render the operand.
++  let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
++}
++def ConstantUImm5Plus1ReportUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{
++  let Name = "ConstantUImm5_Plus1_Report_UImm6";
++}
++def ConstantUImm5Plus1AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>;
++def ConstantUImm5AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>;
++def ConstantSImm5AsmOperandClass
++    : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>;
++def ConstantUImm4AsmOperandClass
++    : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>;
++def ConstantSImm4AsmOperandClass
++    : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>;
++def ConstantUImm3AsmOperandClass
++    : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>;
++def ConstantUImm2AsmOperandClass
++    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
++def ConstantUImm1AsmOperandClass
++    : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
++def ConstantImmzAsmOperandClass : AsmOperandClass {
++  let Name = "ConstantImmz";
++  let RenderMethod = "addConstantUImmOperands<1>";
++  let PredicateMethod = "isConstantImmz";
++  let SuperClasses = [ConstantUImm1AsmOperandClass];
++  let DiagnosticType = "Immz";
++}
++
++foreach I = {1, 2, 3, 4, 5, 6, 8} in
++  def vsplat_uimm # I : Operand<vAny> {
++    let PrintMethod = "printUImm<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {5, 10} in
++  def vsplat_simm # I : Operand<vAny> {
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantSImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {1, 4, 7, 8, 10, 20, 26} in
++  def uimm # I : Operand<i32> {
++    let PrintMethod = "printUImm<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {1, 2, 3, 4, 5, 6, 7, 8} in
++  def uimm # I # _ptr : Operand<iPTR> {
++    let PrintMethod = "printUImm<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
++
++
++def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrSImm12", [frameindex]>;
++
++
++def LoongArchMemSimm12AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm12";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<12>";
++  let DiagnosticType = "MemSImm12";
++}
++
++def mem_simm12 : mem_generic {
++  let MIOperandInfo = (ops ptr_rc, simm12);
++  let EncoderMethod = "getMemEncoding";
++  let ParserMatchClass = LoongArchMemSimm12AsmOperand;
++}
++
++foreach I = {4, 6, 9, 10, 11} in
++  def simm # I : Operand<i32> {
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantSImm" # I # "AsmOperandClass");
++  }
++
++def LoongArchMemSimm9AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm9";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<9>";
++  let DiagnosticType = "MemSImm9";
++}
++
++def LoongArchMemSimm10AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm10";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<10>";
++  let DiagnosticType = "MemSImm10";
++}
++
++def LoongArchMemSimm11AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm11";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<11>";
++  let DiagnosticType = "MemSImm11";
++}
++
++def simm13  : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
++
++def simm10Op : Operand<i32> {
++  let DecoderMethod = "DecodeSIMM10";
++}
++
++def simm13Op : Operand<i32> {
++  let DecoderMethod = "DecodeSIMM13";
++}
++
++def LoongArchMemSimm10Lsl2AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm10_2";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<10, 2>";
++  let DiagnosticType = "MemSImm10Lsl2";
++}
++
++
++def simm10_lsl2 : Operand<i32> {
++//    let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, 2>";
++  let ParserMatchClass =
++      !cast<AsmOperandClass>("ConstantSImm10Lsl2AsmOperandClass");
++}
++
++def mem_simm10_lsl2 : mem_generic {
++  let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm10_lsl2"));
++  let EncoderMethod = "getMemEncoding10l2";
++  let ParserMatchClass =
++          !cast<AsmOperandClass>("LoongArchMemSimm10Lsl2AsmOperand");
++}
++
++
++def LoongArchMemSimm11Lsl1AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm11_1";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<11, 1>";
++  let DiagnosticType = "MemSImm11Lsl1";
++}
++
++
++def simm11_lsl1 : Operand<i32> {
++  //  let DecoderMethod = "DecodeSImmWithOffsetAndScale<11, 1>";
++  let ParserMatchClass =
++      !cast<AsmOperandClass>("ConstantSImm11Lsl1AsmOperandClass");
++}
++
++def mem_simm11_lsl1 : mem_generic {
++  let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm11_lsl1"));
++  let EncoderMethod = "getMemEncoding11l1";
++  let ParserMatchClass =
++          !cast<AsmOperandClass>("LoongArchMemSimm11Lsl1AsmOperand");
++}
++
++def LoongArchMemSimm9Lsl3AsmOperand : AsmOperandClass {
++  let Name = "MemOffsetSimm9_3";
++  let SuperClasses = [LoongArchMemAsmOperand];
++  let RenderMethod = "addMemOperands";
++  let ParserMethod = "parseMemOperand";
++  let PredicateMethod = "isMemWithSimmOffset<9, 3>";
++  let DiagnosticType = "MemSImm9Lsl3";
++}
++
++
++def simm9_lsl3 : Operand<i32> {
++  //  let DecoderMethod = "DecodeSImmWithOffsetAndScale<9, 3>";
++  let ParserMatchClass =
++      !cast<AsmOperandClass>("ConstantSImm9Lsl3AsmOperandClass");
++}
++
++def mem_simm9_lsl3 : mem_generic {
++  let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm9_lsl3"));
++  let EncoderMethod = "getMemEncoding9l3";
++  let ParserMatchClass =
++          !cast<AsmOperandClass>("LoongArchMemSimm9Lsl3AsmOperand");
++}
++
++
++
++
++// Operands
++
++def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
++
++// Pattern fragments
++def vextract_sext_i8  : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractSExt node:$vec, node:$idx, i8)>;
++def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractSExt node:$vec, node:$idx, i16)>;
++def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractSExt node:$vec, node:$idx, i32)>;
++def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractSExt node:$vec, node:$idx, i64)>;
++
++def vextract_zext_i8  : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractZExt node:$vec, node:$idx, i8)>;
++def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractZExt node:$vec, node:$idx, i16)>;
++def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractZExt node:$vec, node:$idx, i32)>;
++def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx),
++                                (LoongArchVExtractZExt node:$vec, node:$idx, i64)>;
++
++def vldrepl_v16i8  : PatFrag<(ops node:$v1),
++                             (v16i8 (LoongArchVBROADCAST node:$v1))>;
++def vldrepl_v8i16  : PatFrag<(ops node:$v1),
++                             (v8i16 (LoongArchVBROADCAST node:$v1))>;
++def vldrepl_v4i32  : PatFrag<(ops node:$v1),
++                             (v4i32 (LoongArchVBROADCAST node:$v1))>;
++def vldrepl_v2i64  : PatFrag<(ops node:$v1),
++                             (v2i64 (LoongArchVBROADCAST node:$v1))>;
++
++def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>;
++def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>;
++def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>;
++def vinsert_v2i64 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v2i64 (vector_insert node:$vec, node:$val, node:$idx))>;
++
++class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
++  PatFrag<(ops node:$lhs, node:$rhs),
++          (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
++
++// ISD::SETFALSE cannot occur
++def vfseteq_v4f32 : vfsetcc_type<v4i32, v4f32, SETEQ>;
++def vfseteq_v2f64 : vfsetcc_type<v2i64, v2f64, SETEQ>;
++def vfsetge_v4f32 : vfsetcc_type<v4i32, v4f32, SETGE>;
++def vfsetge_v2f64 : vfsetcc_type<v2i64, v2f64, SETGE>;
++def vfsetgt_v4f32 : vfsetcc_type<v4i32, v4f32, SETGT>;
++def vfsetgt_v2f64 : vfsetcc_type<v2i64, v2f64, SETGT>;
++def vfsetle_v4f32 : vfsetcc_type<v4i32, v4f32, SETLE>;
++def vfsetle_v2f64 : vfsetcc_type<v2i64, v2f64, SETLE>;
++def vfsetlt_v4f32 : vfsetcc_type<v4i32, v4f32, SETLT>;
++def vfsetlt_v2f64 : vfsetcc_type<v2i64, v2f64, SETLT>;
++def vfsetne_v4f32 : vfsetcc_type<v4i32, v4f32, SETNE>;
++def vfsetne_v2f64 : vfsetcc_type<v2i64, v2f64, SETNE>;
++def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
++def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
++def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
++def vfsetoge_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGE>;
++def vfsetogt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGT>;
++def vfsetogt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGT>;
++def vfsetole_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLE>;
++def vfsetole_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLE>;
++def vfsetolt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLT>;
++def vfsetolt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLT>;
++def vfsetone_v4f32 : vfsetcc_type<v4i32, v4f32, SETONE>;
++def vfsetone_v2f64 : vfsetcc_type<v2i64, v2f64, SETONE>;
++def vfsetord_v4f32 : vfsetcc_type<v4i32, v4f32, SETO>;
++def vfsetord_v2f64 : vfsetcc_type<v2i64, v2f64, SETO>;
++def vfsetun_v4f32  : vfsetcc_type<v4i32, v4f32, SETUO>;
++def vfsetun_v2f64  : vfsetcc_type<v2i64, v2f64, SETUO>;
++def vfsetueq_v4f32 : vfsetcc_type<v4i32, v4f32, SETUEQ>;
++def vfsetueq_v2f64 : vfsetcc_type<v2i64, v2f64, SETUEQ>;
++def vfsetuge_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGE>;
++def vfsetuge_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGE>;
++def vfsetugt_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGT>;
++def vfsetugt_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGT>;
++def vfsetule_v4f32 : vfsetcc_type<v4i32, v4f32, SETULE>;
++def vfsetule_v2f64 : vfsetcc_type<v2i64, v2f64, SETULE>;
++def vfsetult_v4f32 : vfsetcc_type<v4i32, v4f32, SETULT>;
++def vfsetult_v2f64 : vfsetcc_type<v2i64, v2f64, SETULT>;
++def vfsetune_v4f32 : vfsetcc_type<v4i32, v4f32, SETUNE>;
++def vfsetune_v2f64 : vfsetcc_type<v2i64, v2f64, SETUNE>;
++
++
++
++// ISD::SETTRUE cannot occur
++// ISD::SETFALSE2 cannot occur
++// ISD::SETTRUE2 cannot occur
++
++class vsetcc_type<ValueType ResTy, CondCode CC> :
++  PatFrag<(ops node:$lhs, node:$rhs),
++          (ResTy (vsetcc node:$lhs, node:$rhs, CC))>;
++
++def vseteq_v16i8  : vsetcc_type<v16i8, SETEQ>;
++def vseteq_v8i16  : vsetcc_type<v8i16, SETEQ>;
++def vseteq_v4i32  : vsetcc_type<v4i32, SETEQ>;
++def vseteq_v2i64  : vsetcc_type<v2i64, SETEQ>;
++def vsetle_v16i8  : vsetcc_type<v16i8, SETLE>;
++def vsetle_v8i16  : vsetcc_type<v8i16, SETLE>;
++def vsetle_v4i32  : vsetcc_type<v4i32, SETLE>;
++def vsetle_v2i64  : vsetcc_type<v2i64, SETLE>;
++def vsetlt_v16i8  : vsetcc_type<v16i8, SETLT>;
++def vsetlt_v8i16  : vsetcc_type<v8i16, SETLT>;
++def vsetlt_v4i32  : vsetcc_type<v4i32, SETLT>;
++def vsetlt_v2i64  : vsetcc_type<v2i64, SETLT>;
++def vsetule_v16i8 : vsetcc_type<v16i8, SETULE>;
++def vsetule_v8i16 : vsetcc_type<v8i16, SETULE>;
++def vsetule_v4i32 : vsetcc_type<v4i32, SETULE>;
++def vsetule_v2i64 : vsetcc_type<v2i64, SETULE>;
++def vsetult_v16i8 : vsetcc_type<v16i8, SETULT>;
++def vsetult_v8i16 : vsetcc_type<v8i16, SETULT>;
++def vsetult_v4i32 : vsetcc_type<v4i32, SETULT>;
++def vsetult_v2i64 : vsetcc_type<v2i64, SETULT>;
++
++def vsplati8  : PatFrag<(ops node:$e0),
++                        (v16i8 (build_vector node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0))>;
++def vsplati16 : PatFrag<(ops node:$e0),
++                        (v8i16 (build_vector node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0,
++                                             node:$e0, node:$e0))>;
++def vsplati32 : PatFrag<(ops node:$e0),
++                        (v4i32 (build_vector node:$e0, node:$e0,
++                                             node:$e0, node:$e0))>;
++
++def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
++  APInt Imm;
++  SDNode *BV = N->getOperand(0).getNode();
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
++}]>;
++
++def vsplati64 : PatFrag<(ops node:$e0),
++                        (v2i64 (build_vector node:$e0, node:$e0))>;
++
++def vsplati64_splat_d : PatFrag<(ops node:$e0),
++                                (v2i64 (bitconvert
++                                         (v4i32 (and
++                                           (v4i32 (build_vector node:$e0,
++                                                                node:$e0,
++                                                                node:$e0,
++                                                                node:$e0)),
++                                           vsplati64_imm_eq_1))))>;
++
++def vsplatf32 : PatFrag<(ops node:$e0),
++                        (v4f32 (build_vector node:$e0, node:$e0,
++                                             node:$e0, node:$e0))>;
++def vsplatf64 : PatFrag<(ops node:$e0),
++                        (v2f64 (build_vector node:$e0, node:$e0))>;
++
++def vsplati8_elt  : PatFrag<(ops node:$v, node:$i),
++                            (LoongArchVSHF (vsplati8 node:$i), node:$v, node:$v)>;
++def vsplati16_elt : PatFrag<(ops node:$v, node:$i),
++                            (LoongArchVSHF (vsplati16 node:$i), node:$v, node:$v)>;
++def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
++                            (LoongArchVSHF (vsplati32 node:$i), node:$v, node:$v)>;
++def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
++                            (LoongArchVSHF (vsplati64_splat_d node:$i),node:$v, node:$v)>;
++
++class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
++                   SDNodeXForm xform = NOOP_SDNodeXForm>
++  : PatLeaf<frag, pred, xform> {
++  Operand OpClass = opclass;
++}
++
++class SplatComplexPattern<Operand opclass, ValueType ty, int numops, string fn,
++                          list<SDNode> roots = [],
++                          list<SDNodeProperty> props = []> :
++  ComplexPattern<ty, numops, fn, roots, props> {
++  Operand OpClass = opclass;
++}
++
++def vsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i8, 1,
++                                         "selectVSplatUimm3",
++                                         [build_vector, bitconvert]>;
++
++def vsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i8, 1,
++                                         "selectVSplatUimm4",
++                                         [build_vector, bitconvert]>;
++
++def vsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i8, 1,
++                                         "selectVSplatUimm5",
++                                         [build_vector, bitconvert]>;
++
++def vsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i8, 1,
++                                         "selectVSplatUimm8",
++                                         [build_vector, bitconvert]>;
++
++def vsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v16i8, 1,
++                                         "selectVSplatSimm5",
++                                         [build_vector, bitconvert]>;
++
++def vsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v8i16, 1,
++                                          "selectVSplatUimm3",
++                                          [build_vector, bitconvert]>;
++
++def vsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v8i16, 1,
++                                          "selectVSplatUimm4",
++                                          [build_vector, bitconvert]>;
++
++def vsplati16_uimm5 : SplatComplexPattern<vsplat_uimm5, v8i16, 1,
++                                          "selectVSplatUimm5",
++                                          [build_vector, bitconvert]>;
++
++def vsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v8i16, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++
++def vsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v4i32, 1,
++                                          "selectVSplatUimm2",
++                                          [build_vector, bitconvert]>;
++
++def vsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i32, 1,
++                                          "selectVSplatUimm5",
++                                          [build_vector, bitconvert]>;
++
++def vsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v4i32, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++
++def vsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v2i64, 1,
++                                          "selectVSplatUimm1",
++                                          [build_vector, bitconvert]>;
++
++def vsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v2i64, 1,
++                                          "selectVSplatUimm5",
++                                          [build_vector, bitconvert]>;
++
++def vsplati64_uimm6 : SplatComplexPattern<vsplat_uimm6, v2i64, 1,
++                                          "selectVSplatUimm6",
++                                          [build_vector, bitconvert]>;
++
++def vsplati64_simm5 : SplatComplexPattern<vsplat_simm5, v2i64, 1,
++                                          "selectVSplatSimm5",
++                                          [build_vector, bitconvert]>;
++
++
++// Any build_vector that is a constant splat with a value that equals 1
++// FIXME: These should be a ComplexPattern but we can't use them because the
++//        ISel generator requires the uses to have a name, but providing a name
++//        causes other errors ("used in pattern but not operand list")
++def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
++  APInt Imm;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
++}]>;
++
++def vbitclr_b : PatFrag<(ops node:$vj, node:$vk),
++                        (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk),
++                                          immAllOnesV))>;
++def vbitclr_h : PatFrag<(ops node:$vj, node:$vk),
++                        (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk),
++                                          immAllOnesV))>;
++def vbitclr_w : PatFrag<(ops node:$vj, node:$vk),
++                        (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk),
++                                          immAllOnesV))>;
++def vbitclr_d : PatFrag<(ops node:$vj, node:$vk),
++                        (and node:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1),
++                                               node:$vk),
++                                          (bitconvert (v4i32 immAllOnesV))))>;
++
++def vbneg_b : PatFrag<(ops node:$vj, node:$vk),
++                      (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbneg_h : PatFrag<(ops node:$vj, node:$vk),
++                      (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbneg_w : PatFrag<(ops node:$vj, node:$vk),
++                      (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbneg_d : PatFrag<(ops node:$vj, node:$vk),
++                      (xor node:$vj, (shl (v2i64 vsplati64_imm_eq_1),
++                                          node:$vk))>;
++
++def vbset_b : PatFrag<(ops node:$vj, node:$vk),
++                      (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbset_h : PatFrag<(ops node:$vj, node:$vk),
++                      (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbset_w : PatFrag<(ops node:$vj, node:$vk),
++                      (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>;
++def vbset_d : PatFrag<(ops node:$vj, node:$vk),
++                      (or node:$vj, (shl (v2i64 vsplati64_imm_eq_1),
++                                         node:$vk))>;
++
++def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk),
++                     (add node:$vd, (mul node:$vj, node:$vk))>;
++
++def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk),
++                     (sub node:$vd, (mul node:$vj, node:$vk))>;
++
++class IsCommutable {
++  bit isCommutable = 1;
++}
++
++
++
++//class
++class LSX_3R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                       RegisterOperand ROVK = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))];
++}
++
++class LSX_3RN_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                        RegisterOperand ROVJ = ROVD, RegisterOperand ROVK = ROVD> :
++  LSX_3R_DESC_BASE<instr_asm, null_frag, ROVD, ROVJ, ROVK>;
++
++class LSX_3R_4R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                          RegisterOperand ROVK = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj,
++                                              ROVK:$vk))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_3R_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                             RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, GPR32Opnd:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $rk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, GPR32Opnd:$rk))];
++}
++
++class LSX_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                        RegisterOperand ROVK = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))];
++}
++
++class LSX_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD,
++                          RegisterOperand ROVK = ROVD> :
++      LSXPseudo<(outs ROVD:$vd), (ins ROVJ:$vj, ROVK:$vk),
++                [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]>;
++
++class LSX_3RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                        RegisterOperand ROVK = ROVD> :
++  LSX_3R_DESC_BASE<instr_asm, OpNode, ROVD, ROVJ, ROVK>;
++
++class LSX_3RFN_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                        RegisterOperand ROVJ = ROVD, RegisterOperand ROVK = ROVD> :
++  LSX_3R_DESC_BASE<instr_asm, null_frag, ROVD, ROVJ, ROVK>;
++
++class LSX_3R_DESC_BASE1<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROVD, RegisterOperand ROVJ,
++                       RegisterOperand ROVK> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vk, ROVK:$vj))];
++}
++
++class LSX_3RF_DESC_BASE1<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROVD, RegisterOperand ROVJ,
++                        RegisterOperand ROVK> :
++  LSX_3R_DESC_BASE1<instr_asm, OpNode, ROVD, ROVJ, ROVK>;
++
++class LSX_3R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                            RegisterOperand ROVJ = ROVD,
++                            RegisterOperand ROVK = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$vd_in, ROVJ:$vj,
++                                       ROVK:$vk))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_3R_4R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD,
++                               RegisterOperand ROVK = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVD:$va);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$va, ROVJ:$vj,
++                                       ROVK:$vk))];
++}
++
++class LSX_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       SplatComplexPattern SplatImm, RegisterOperand ROVD,
++                       RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$si5))];
++}
++
++class LSX_I5_U_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       SplatComplexPattern SplatImm, RegisterOperand ROVD,
++                       RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))];
++}
++
++class LSX_BIT_3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui3))];
++}
++
++class LSX_BIT_3N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> :
++  LSX_BIT_3_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIT_4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui4))];
++}
++
++class LSX_BIT_4N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> :
++  LSX_BIT_4_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIT_5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))];
++}
++
++class LSX_BIT_5N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> :
++  LSX_BIT_5_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIT_6_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui6))];
++}
++
++class LSX_BIT_6N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> :
++  LSX_BIT_6_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))];
++}
++
++class LSX_2RN_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                        RegisterOperand ROVJ = ROVD> :
++  LSX_2R_DESC_BASE<instr_asm, null_frag, ROVD, ROVJ>;
++
++class LSX_2RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))];
++}
++
++class LSX_2RFN_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                        RegisterOperand ROVJ = ROVD> :
++  LSX_2R_DESC_BASE<instr_asm, null_frag, ROVD, ROVJ>;
++
++class LSX_2RF_DESC_BASE_CVT<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROVD, RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))];
++}
++
++class LSX_2RFN_DESC_BASE_CVT<string instr_asm, RegisterOperand ROVD,
++                             RegisterOperand ROVJ = ROVD> :
++  LSX_2RF_DESC_BASE_CVT<instr_asm, null_frag, ROVD, ROVJ>;
++
++class LSX_2RF_DESC_BASE_tmp<string instr_asm,
++                        RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj");
++  list<dag> Pattern = [];
++}
++
++class LSX_2R_REPL_DESC_BASE<string instr_asm, ValueType VT,
++                            SDPatternOperator OpNode, RegisterOperand ROVD,
++                            RegisterOperand ROS = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROS:$rj);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj");
++  list<dag> Pattern = [(set ROVD:$vd, (VT (OpNode ROS:$rj)))];
++}
++
++class LSX_INSERT_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins  ROVD:$vd_in, ROVJ:$rj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$rj, Imm:$ui4))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_INSERT_U4N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_INSERT_U4_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_INSERT_U3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                           RegisterOperand ROS> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROS:$rj, Imm:$ui3))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_INSERT_U3N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_INSERT_U3_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_INSERT_U2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                           RegisterOperand ROS> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui2");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROS:$rj, Imm:$ui2))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_INSERT_U2N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_INSERT_U2_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_INSERT_U1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                           RegisterOperand ROS> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui1);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui1");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui1))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_INSERT_U1N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_INSERT_U1_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_PICK_U1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui1);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui1");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui1))];
++}
++
++class LSX_PICK_U2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui2");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui2))];
++}
++
++class LSX_PICK_U3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui3");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui3))];
++}
++
++class LSX_PICK_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                         RegisterOperand ROD, RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROD:$rd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui4");
++  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui4))];
++}
++
++class LSX_ELM_U3_VREPLVE_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD,
++                                 RegisterOperand ROVJ = ROVD> {
++ dag OutOperandList = (outs ROVD:$vd);
++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3);
++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++ list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui3, ROVJ:$vj,
++                                      ROVJ:$vj))];
++}
++
++class LSX_ELM_U2_VREPLVE_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD,
++                                 RegisterOperand ROVJ = ROVD> {
++ dag OutOperandList = (outs ROVD:$vd);
++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui2);
++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2");
++ list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui2, ROVJ:$vj,
++                                      ROVJ:$vj))];
++}
++
++class LSX_ELM_U1_VREPLVE_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD,
++                                 RegisterOperand ROVJ = ROVD> {
++ dag OutOperandList = (outs ROVD:$vd);
++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui1);
++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1");
++ list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui1, ROVJ:$vj,
++                                      ROVJ:$vj))];
++}
++
++class LSX_ELM_U4_VREPLVE_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD,
++                                 RegisterOperand ROVJ = ROVD> {
++ dag OutOperandList = (outs ROVD:$vd);
++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4);
++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++ list<dag> Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui4, ROVJ:$vj,
++                                      ROVJ:$vj))];
++}
++
++class LSX_ELM_U4_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                               RegisterOperand ROVD, RegisterOperand ROVJ,
++                               Operand ImmOp, ImmLeaf Imm> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj,
++                                       Imm:$ui4))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_ELM_U3_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                               RegisterOperand ROVD, RegisterOperand ROVJ,
++                               Operand ImmOp, ImmLeaf Imm> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj,
++                                       Imm:$ui3))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_ELM_U2_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                               RegisterOperand ROVD, RegisterOperand ROVJ,
++                               Operand ImmOp, ImmLeaf Imm> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui2);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj,
++                                       Imm:$ui2))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_ELM_U1_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                               RegisterOperand ROVD, RegisterOperand ROVJ,
++                               Operand ImmOp, ImmLeaf Imm> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui1);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj,
++                                       Imm:$ui1))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_BIT_U3_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui3))];
++}
++
++class LSX_BIT_U4_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui4))];
++}
++
++class LSX_BIT_U5_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))];
++}
++
++class LSX_BIT_U6_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui6))];
++}
++
++class LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))];
++}
++
++class LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))];
++}
++
++class LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))];
++}
++
++class LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))];
++}
++
++class LSX_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchSHF immZExt8:$ui8, ROVJ:$vj))];
++}
++
++class LSX_I8_SHUF_DESC_BASE_D<string instr_asm,
++                           SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))];
++  string Constraints = "$vd = $vd_in";
++}
++
++def LoongArchSelect : SDNode<"LoongArchISD::VSELECT" ,SDTSelect>;
++def LoongArchVROR : SDNode<"LoongArchISD::VROR",
++    SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
++                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>, []>;
++def LoongArchVRORI : SDNode<"LoongArchISD::VRORI",
++    SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
++                  SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>, []>;
++
++class LSX2_RORI_U3_DESC_BASE <string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui3))];
++}
++
++class LSX2_RORI_U4_DESC_BASE <string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui4))];
++}
++
++class LSX2_RORI_U5_DESC_BASE <string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui5))];
++}
++
++class LSX2_RORI_U6_DESC_BASE <string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                              RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui6))];
++}
++
++class LSX_BIND_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui4))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_BIND_U4N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_BIND_U4_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIND_U5_DESC_BASE <string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui5))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_BIND_U5N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_BIND_U5_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIND_U6_DESC_BASE <string instr_asm, SDPatternOperator OpNode,
++                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui6))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_BIND_U6N_DESC_BASE<string instr_asm, Operand ImmOp, ImmLeaf Imm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_BIND_U6_DESC_BASE<instr_asm, null_frag, ImmOp, Imm, ROVD, ROVJ>;
++
++class LSX_BIND_U7_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                            RegisterOperand ROVD,
++                            RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_BIND_U7N_DESC_BASE<string instr_asm,
++                               RegisterOperand ROVD,
++                               RegisterOperand ROVJ = ROVD> :
++  LSX_BIND_U7_DESC_BASE<instr_asm, null_frag, ROVD, ROVJ>;
++
++
++class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                   ValueType TyNode, RegisterOperand ROVD,
++                   Operand MemOpnd, ComplexPattern Addr = addr> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $addr");
++  list<dag> Pattern = [(set ROVD:$vd, (TyNode (OpNode Addr:$addr)))];
++  string DecoderMethod = "DecodeLSX128Mem";
++}
++
++class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                   ValueType TyNode, RegisterOperand ROVD,
++                   Operand MemOpnd, ComplexPattern Addr = addrimm12> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROVD:$vd, MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $addr");
++  list<dag> Pattern = [(OpNode (TyNode ROVD:$vd), Addr:$addr)];
++  string DecoderMethod = "DecodeLSX128Mem";
++}
++
++class LSX_VEC_ADDR_PSEUDO_BASE<SDPatternOperator OpNode,
++                               ValueType TyNode,
++                               RegisterOperand ROVD,
++                               Operand MemOpnd,
++                               ComplexPattern Addr = addrimm12> :
++      LSXPseudo<(outs), (ins ROVD:$vd, MemOpnd:$addr),
++                [(OpNode (TyNode ROVD:$vd), MemOpnd:$addr)]>;
++
++
++class LSX_SET_DESC_BASE<string instr_asm, RegisterOperand ROVD> {
++  dag OutOperandList = (outs FCFROpnd:$cd);
++  dag InOperandList = (ins ROVD:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$cd, $vj");
++  list<dag> Pattern = [];
++}
++
++class LSX_SET_DESC_BASE_tmp<string instr_asm, SDPatternOperator OpNode,
++                            RegisterOperand ROVD> {
++  dag OutOperandList = (outs FCFROpnd:$cd);
++  dag InOperandList = (ins ROVD:$vj);
++  string AsmString = !strconcat(instr_asm, "\t$cd, $vj");
++  list<dag> Pattern = [];
++}
++
++class LSX_VMul_Reg4<string instr_asm,
++                       RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                       RegisterOperand ROVK = ROVD,
++                       RegisterOperand ROVA = ROVD,
++                       SDPatternOperator OpNode> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))];
++}
++
++class LSX_4RF<string instr_asm,
++                       SDPatternOperator OpNode,
++                       RegisterOperand ROVJ, RegisterOperand ROVA = ROVJ,
++                       RegisterOperand ROVK = ROVA, RegisterOperand ROVD = ROVK> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))];
++}
++
++
++class LSX_VFCMP_Reg3<string instr_asm,
++                       RegisterOperand ROVD, RegisterOperand ROVJ = ROVD,
++                       RegisterOperand ROVK = ROVD,
++                       SDPatternOperator OpNode> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ROVK:$vk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))];
++}
++
++class LSX_I12_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins PtrRC:$rj, ImmOp:$si12);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si12");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si12))];
++}
++
++class LSX_I11_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins PtrRC:$rj, ImmOp:$si11);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si11");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si11))];
++}
++
++class LSX_I10_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins PtrRC:$rj, ImmOp:$si10);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si10");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si10))];
++}
++
++class LSX_I9_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins PtrRC:$rj, ImmOp:$si9);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si9");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si9))];
++}
++
++
++class LSX_I8_U1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins  ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm1:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode  ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt1:$idx)];
++  string DecoderMethod = "DecodeLSX128memstl";
++}
++
++
++class LSX_I8_U2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode  ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt2:$idx)];
++  string DecoderMethod = "DecodeLSX128memstl";
++}
++
++class LSX_I8_U3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode  ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt3:$idx)];
++  string DecoderMethod = "DecodeLSX128memstl";
++}
++
++class LSX_I8_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx");
++  list<dag> Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt4:$idx)];
++  string DecoderMethod = "DecodeLSX128memstl";
++}
++
++class LSX_I5_U_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                       RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))];
++}
++
++class LSX_I5_DESC_BASE_Intrinsic<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                       RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, ImmOp:$si5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$si5))];
++}
++
++class LSX_LDX_LA<string instr_asm, SDPatternOperator OpNode,
++                 RegisterOperand RORK,
++                 RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins PtrRC:$rj, RORK:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, RORK:$rk))];
++}
++
++class LSX_SDX_LA<string instr_asm, SDPatternOperator OpNode,
++                 RegisterOperand RORK,
++                 RegisterOperand ROVD> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, RORK:$rk);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk");
++  list<dag> Pattern = [(OpNode ROVD:$vd, iPTR:$rj, RORK:$rk)];
++}
++
++class LSX_U5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROVD,
++                       RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))];
++}
++
++class LSX_U5_4R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                          RegisterOperand ROVD,
++                          RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_U3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVJ:$vj, immZExt3:$ui3))];
++}
++
++class LSX_2R_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVJ:$vj, immZExt4:$ui4))];
++}
++
++class LSX_2R_U5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVJ:$vj, immZExt5:$ui5))];
++}
++
++class LSX_2R_U6_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVJ:$vj, immZExt6:$ui6))];
++}
++
++class LSX_2R_3R_U4_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm4:$ui4);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROVJ:$vj, immZExt4:$ui4))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_3R_U5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_3R_U6_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm6:$ui6);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROVJ:$vj, immZExt6:$ui6))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_3R_U7_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_3R_U8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_3R_SELECT<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROVD,
++                       RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, vsplat_uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVD:$vd_in, vsplati8_uimm8:$ui8, ROVJ:$vj))];
++  string Constraints = "$vd = $vd_in";
++}
++
++class LSX_2R_U8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           RegisterOperand ROVD,
++                           RegisterOperand ROVJ> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode  ROVJ:$vj, immZExt8:$ui8))];
++}
++
++class LSX_I13_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType Ty, Operand immOp,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins immOp:$i13);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $i13");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode (Ty simm13:$i13)))];
++  string DecoderMethod = "DecodeLSX128Mem13";
++}
++
++class LSX_I13_DESC_BASE_10<string instr_asm,
++                       RegisterOperand ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins vsplat_simm10:$i10);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $i10");
++  list<dag> Pattern = [];
++  bit hasSideEffects = 0;
++  string DecoderMethod = "DecodeLSX128Mem10";
++}
++
++class LSX_BIT_U8_VREPLVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                                 SplatComplexPattern SplatImm,
++                                 RegisterOperand ROVD, RegisterOperand ROVJ = ROVD> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui8);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui8))];
++}
++
++
++class LSXPat<dag pattern, dag result, list<Predicate> pred = [HasLSX]> :
++  Pat<pattern, result>, Requires<pred>;
++
++// Instruction encoding.
++
++
++def VSADD_B : LSX_3R<0b01110000010001100>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vsadd.b", LSX128BOpnd>;
++
++def VSADD_H : LSX_3R<0b01110000010001101>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vsadd.h", LSX128HOpnd>;
++
++def VSADD_W : LSX_3R<0b01110000010001110>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vsadd.w", LSX128WOpnd>;
++
++def VSADD_D : LSX_3R<0b01110000010001111>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vsadd.d", LSX128DOpnd>;
++
++
++def VSSUB_B : LSX_3R<0b01110000010010000>,
++              LSX_3RN_DESC_BASE<"vssub.b", LSX128BOpnd>;
++
++def VSSUB_H : LSX_3R<0b01110000010010001>,
++              LSX_3RN_DESC_BASE<"vssub.h", LSX128HOpnd>;
++
++def VSSUB_W : LSX_3R<0b01110000010010010>,
++              LSX_3RN_DESC_BASE<"vssub.w", LSX128WOpnd>;
++
++def VSSUB_D : LSX_3R<0b01110000010010011>,
++              LSX_3RN_DESC_BASE<"vssub.d", LSX128DOpnd>;
++
++
++def VSADD_BU : LSX_3R<0b01110000010010100>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vsadd.bu", LSX128BOpnd>;
++
++def VSADD_HU : LSX_3R<0b01110000010010101>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vsadd.hu", LSX128HOpnd>;
++
++def VSADD_WU : LSX_3R<0b01110000010010110>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vsadd.wu", LSX128WOpnd>;
++
++def VSADD_DU : LSX_3R<0b01110000010010111>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vsadd.du", LSX128DOpnd>;
++
++
++def VSSUB_BU : LSX_3R<0b01110000010011000>,
++               LSX_3RN_DESC_BASE<"vssub.bu", LSX128BOpnd>;
++
++def VSSUB_HU : LSX_3R<0b01110000010011001>,
++               LSX_3RN_DESC_BASE<"vssub.hu", LSX128HOpnd>;
++
++def VSSUB_WU : LSX_3R<0b01110000010011010>,
++               LSX_3RN_DESC_BASE<"vssub.wu", LSX128WOpnd>;
++
++def VSSUB_DU : LSX_3R<0b01110000010011011>,
++               LSX_3RN_DESC_BASE<"vssub.du", LSX128DOpnd>;
++
++
++def VHADDW_H_B : LSX_3R<0b01110000010101000>,
++                 LSX_3RN_DESC_BASE<"vhaddw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VHADDW_W_H : LSX_3R<0b01110000010101001>,
++                 LSX_3RN_DESC_BASE<"vhaddw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VHADDW_D_W : LSX_3R<0b01110000010101010>,
++                 LSX_3RN_DESC_BASE<"vhaddw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++
++def VHSUBW_H_B : LSX_3R<0b01110000010101100>,
++                 LSX_3RN_DESC_BASE<"vhsubw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VHSUBW_W_H : LSX_3R<0b01110000010101101>,
++                 LSX_3RN_DESC_BASE<"vhsubw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VHSUBW_D_W : LSX_3R<0b01110000010101110>,
++                 LSX_3RN_DESC_BASE<"vhsubw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++
++def VHADDW_HU_BU : LSX_3R<0b01110000010110000>,
++                   LSX_3RN_DESC_BASE<"vhaddw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VHADDW_WU_HU : LSX_3R<0b01110000010110001>,
++                   LSX_3RN_DESC_BASE<"vhaddw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VHADDW_DU_WU : LSX_3R<0b01110000010110010>,
++                   LSX_3RN_DESC_BASE<"vhaddw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++
++def VHSUBW_HU_BU : LSX_3R<0b01110000010110100>,
++                   LSX_3RN_DESC_BASE<"vhsubw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VHSUBW_WU_HU : LSX_3R<0b01110000010110101>,
++                   LSX_3RN_DESC_BASE<"vhsubw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VHSUBW_DU_WU : LSX_3R<0b01110000010110110>,
++                   LSX_3RN_DESC_BASE<"vhsubw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++
++def VADDA_B : LSX_3R<0b01110000010111000>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vadda.b", LSX128BOpnd>;
++
++def VADDA_H : LSX_3R<0b01110000010111001>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vadda.h", LSX128HOpnd>;
++
++def VADDA_W : LSX_3R<0b01110000010111010>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vadda.w", LSX128WOpnd>;
++
++def VADDA_D : LSX_3R<0b01110000010111011>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vadda.d", LSX128DOpnd>;
++
++
++def VABSD_B : LSX_3R<0b01110000011000000>,
++              LSX_3RN_DESC_BASE<"vabsd.b", LSX128BOpnd>;
++
++def VABSD_H : LSX_3R<0b01110000011000001>,
++              LSX_3RN_DESC_BASE<"vabsd.h", LSX128HOpnd>;
++
++def VABSD_W : LSX_3R<0b01110000011000010>,
++              LSX_3RN_DESC_BASE<"vabsd.w", LSX128WOpnd>;
++
++def VABSD_D : LSX_3R<0b01110000011000011>,
++              LSX_3RN_DESC_BASE<"vabsd.d", LSX128DOpnd>;
++
++
++def VABSD_BU : LSX_3R<0b01110000011000100>,
++               LSX_3RN_DESC_BASE<"vabsd.bu", LSX128BOpnd>;
++
++def VABSD_HU : LSX_3R<0b01110000011000101>,
++               LSX_3RN_DESC_BASE<"vabsd.hu", LSX128HOpnd>;
++
++def VABSD_WU : LSX_3R<0b01110000011000110>,
++               LSX_3RN_DESC_BASE<"vabsd.wu", LSX128WOpnd>;
++
++def VABSD_DU : LSX_3R<0b01110000011000111>,
++               LSX_3RN_DESC_BASE<"vabsd.du", LSX128DOpnd>;
++
++
++def VAVG_B : LSX_3R<0b01110000011001000>, IsCommutable,
++             LSX_3RN_DESC_BASE<"vavg.b", LSX128BOpnd>;
++
++def VAVG_H : LSX_3R<0b01110000011001001>, IsCommutable,
++             LSX_3RN_DESC_BASE<"vavg.h", LSX128HOpnd>;
++
++def VAVG_W : LSX_3R<0b01110000011001010>, IsCommutable,
++             LSX_3RN_DESC_BASE<"vavg.w", LSX128WOpnd>;
++
++def VAVG_D : LSX_3R<0b01110000011001011>, IsCommutable,
++             LSX_3RN_DESC_BASE<"vavg.d", LSX128DOpnd>;
++
++
++def VAVG_BU : LSX_3R<0b01110000011001100>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavg.bu", LSX128BOpnd>;
++
++def VAVG_HU : LSX_3R<0b01110000011001101>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavg.hu", LSX128HOpnd>;
++
++def VAVG_WU : LSX_3R<0b01110000011001110>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavg.wu", LSX128WOpnd>;
++
++def VAVG_DU : LSX_3R<0b01110000011001111>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavg.du", LSX128DOpnd>;
++
++
++def VAVGR_B : LSX_3R<0b01110000011010000>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavgr.b", LSX128BOpnd>;
++
++def VAVGR_H : LSX_3R<0b01110000011010001>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavgr.h", LSX128HOpnd>;
++
++def VAVGR_W : LSX_3R<0b01110000011010010>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavgr.w", LSX128WOpnd>;
++
++def VAVGR_D : LSX_3R<0b01110000011010011>, IsCommutable,
++              LSX_3RN_DESC_BASE<"vavgr.d", LSX128DOpnd>;
++
++
++def VAVGR_BU : LSX_3R<0b01110000011010100>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vavgr.bu", LSX128BOpnd>;
++
++def VAVGR_HU : LSX_3R<0b01110000011010101>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vavgr.hu", LSX128HOpnd>;
++
++def VAVGR_WU : LSX_3R<0b01110000011010110>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vavgr.wu", LSX128WOpnd>;
++
++def VAVGR_DU : LSX_3R<0b01110000011010111>, IsCommutable,
++               LSX_3RN_DESC_BASE<"vavgr.du", LSX128DOpnd>;
++
++
++def VMAX_B : LSX_3R<0b01110000011100000>,
++             LSX_3R_DESC_BASE<"vmax.b", smax, LSX128BOpnd>;
++
++def VMAX_H : LSX_3R<0b01110000011100001>,
++             LSX_3R_DESC_BASE<"vmax.h", smax, LSX128HOpnd>;
++
++def VMAX_W : LSX_3R<0b01110000011100010>,
++             LSX_3R_DESC_BASE<"vmax.w", smax, LSX128WOpnd>;
++
++def VMAX_D : LSX_3R<0b01110000011100011>,
++             LSX_3R_DESC_BASE<"vmax.d", smax, LSX128DOpnd>;
++
++
++def VMIN_B : LSX_3R<0b01110000011100100>,
++             LSX_3R_DESC_BASE<"vmin.b", smin, LSX128BOpnd>;
++
++def VMIN_H : LSX_3R<0b01110000011100101>,
++             LSX_3R_DESC_BASE<"vmin.h", smin, LSX128HOpnd>;
++
++def VMIN_W : LSX_3R<0b01110000011100110>,
++             LSX_3R_DESC_BASE<"vmin.w", smin, LSX128WOpnd>;
++
++def VMIN_D : LSX_3R<0b01110000011100111>,
++             LSX_3R_DESC_BASE<"vmin.d", smin, LSX128DOpnd>;
++
++
++def VMAX_BU : LSX_3R<0b01110000011101000>,
++              LSX_3R_DESC_BASE<"vmax.bu", umax, LSX128BOpnd>;
++
++def VMAX_HU : LSX_3R<0b01110000011101001>,
++              LSX_3R_DESC_BASE<"vmax.hu", umax, LSX128HOpnd>;
++
++def VMAX_WU : LSX_3R<0b01110000011101010>,
++              LSX_3R_DESC_BASE<"vmax.wu", umax, LSX128WOpnd>;
++
++def VMAX_DU : LSX_3R<0b01110000011101011>,
++              LSX_3R_DESC_BASE<"vmax.du", umax, LSX128DOpnd>;
++
++
++def VMIN_BU : LSX_3R<0b01110000011101100>,
++              LSX_3R_DESC_BASE<"vmin.bu", umin, LSX128BOpnd>;
++
++def VMIN_HU : LSX_3R<0b01110000011101101>,
++              LSX_3R_DESC_BASE<"vmin.hu", umin, LSX128HOpnd>;
++
++def VMIN_WU : LSX_3R<0b01110000011101110>,
++              LSX_3R_DESC_BASE<"vmin.wu", umin, LSX128WOpnd>;
++
++def VMIN_DU : LSX_3R<0b01110000011101111>,
++              LSX_3R_DESC_BASE<"vmin.du", umin, LSX128DOpnd>;
++
++
++def VMUL_B : LSX_3R<0b01110000100001000>,
++             LSX_3R_DESC_BASE<"vmul.b", mul, LSX128BOpnd>;
++
++def VMUL_H : LSX_3R<0b01110000100001001>,
++             LSX_3R_DESC_BASE<"vmul.h", mul, LSX128HOpnd>;
++
++def VMUL_W : LSX_3R<0b01110000100001010>,
++             LSX_3R_DESC_BASE<"vmul.w", mul, LSX128WOpnd>;
++
++def VMUL_D : LSX_3R<0b01110000100001011>,
++             LSX_3R_DESC_BASE<"vmul.d", mul, LSX128DOpnd>;
++
++
++def VMADD_B : LSX_3R<0b01110000101010000>,
++              LSX_3R_4R_DESC_BASE<"vmadd.b", muladd, LSX128BOpnd>;
++
++def VMADD_H : LSX_3R<0b01110000101010001>,
++              LSX_3R_4R_DESC_BASE<"vmadd.h", muladd, LSX128HOpnd>;
++
++def VMADD_W : LSX_3R<0b01110000101010010>,
++              LSX_3R_4R_DESC_BASE<"vmadd.w", muladd, LSX128WOpnd>;
++
++def VMADD_D : LSX_3R<0b01110000101010011>,
++              LSX_3R_4R_DESC_BASE<"vmadd.d", muladd, LSX128DOpnd>;
++
++
++def VMSUB_B : LSX_3R<0b01110000101010100>,
++              LSX_3R_4R_DESC_BASE<"vmsub.b", mulsub, LSX128BOpnd>;
++
++def VMSUB_H : LSX_3R<0b01110000101010101>,
++              LSX_3R_4R_DESC_BASE<"vmsub.h", mulsub, LSX128HOpnd>;
++
++def VMSUB_W : LSX_3R<0b01110000101010110>,
++              LSX_3R_4R_DESC_BASE<"vmsub.w", mulsub, LSX128WOpnd>;
++
++def VMSUB_D : LSX_3R<0b01110000101010111>,
++              LSX_3R_4R_DESC_BASE<"vmsub.d", mulsub, LSX128DOpnd>;
++
++
++def VDIV_B : LSX_3R<0b01110000111000000>,
++             LSX_3R_DESC_BASE<"vdiv.b", sdiv, LSX128BOpnd>;
++
++def VDIV_H : LSX_3R<0b01110000111000001>,
++             LSX_3R_DESC_BASE<"vdiv.h", sdiv, LSX128HOpnd>;
++
++def VDIV_W : LSX_3R<0b01110000111000010>,
++             LSX_3R_DESC_BASE<"vdiv.w", sdiv, LSX128WOpnd>;
++
++def VDIV_D : LSX_3R<0b01110000111000011>,
++             LSX_3R_DESC_BASE<"vdiv.d", sdiv, LSX128DOpnd>;
++
++
++def VMOD_B : LSX_3R<0b01110000111000100>,
++             LSX_3R_DESC_BASE<"vmod.b", srem, LSX128BOpnd>;
++
++def VMOD_H : LSX_3R<0b01110000111000101>,
++             LSX_3R_DESC_BASE<"vmod.h", srem, LSX128HOpnd>;
++
++def VMOD_W : LSX_3R<0b01110000111000110>,
++             LSX_3R_DESC_BASE<"vmod.w", srem, LSX128WOpnd>;
++
++def VMOD_D : LSX_3R<0b01110000111000111>,
++             LSX_3R_DESC_BASE<"vmod.d", srem, LSX128DOpnd>;
++
++
++def VDIV_BU : LSX_3R<0b01110000111001000>,
++              LSX_3R_DESC_BASE<"vdiv.bu", udiv, LSX128BOpnd>;
++
++def VDIV_HU : LSX_3R<0b01110000111001001>,
++              LSX_3R_DESC_BASE<"vdiv.hu", udiv, LSX128HOpnd>;
++
++def VDIV_WU : LSX_3R<0b01110000111001010>,
++              LSX_3R_DESC_BASE<"vdiv.wu", udiv, LSX128WOpnd>;
++
++def VDIV_DU : LSX_3R<0b01110000111001011>,
++              LSX_3R_DESC_BASE<"vdiv.du", udiv, LSX128DOpnd>;
++
++
++def VMOD_BU : LSX_3R<0b01110000111001100>,
++              LSX_3R_DESC_BASE<"vmod.bu", urem, LSX128BOpnd>;
++
++def VMOD_HU : LSX_3R<0b01110000111001101>,
++              LSX_3R_DESC_BASE<"vmod.hu", urem, LSX128HOpnd>;
++
++def VMOD_WU : LSX_3R<0b01110000111001110>,
++              LSX_3R_DESC_BASE<"vmod.wu", urem, LSX128WOpnd>;
++
++def VMOD_DU : LSX_3R<0b01110000111001111>,
++              LSX_3R_DESC_BASE<"vmod.du", urem, LSX128DOpnd>;
++
++
++def VSLL_B : LSX_3R<0b01110000111010000>,
++             LSX_3R_DESC_BASE<"vsll.b", shl, LSX128BOpnd>;
++
++def VSLL_H : LSX_3R<0b01110000111010001>,
++             LSX_3R_DESC_BASE<"vsll.h", shl, LSX128HOpnd>;
++
++def VSLL_W : LSX_3R<0b01110000111010010>,
++             LSX_3R_DESC_BASE<"vsll.w", shl, LSX128WOpnd>;
++
++def VSLL_D : LSX_3R<0b01110000111010011>,
++             LSX_3R_DESC_BASE<"vsll.d", shl, LSX128DOpnd>;
++
++
++def VSRL_B : LSX_3R<0b01110000111010100>,
++             LSX_3R_DESC_BASE<"vsrl.b", srl, LSX128BOpnd>;
++
++def VSRL_H : LSX_3R<0b01110000111010101>,
++             LSX_3R_DESC_BASE<"vsrl.h", srl, LSX128HOpnd>;
++
++def VSRL_W : LSX_3R<0b01110000111010110>,
++             LSX_3R_DESC_BASE<"vsrl.w", srl, LSX128WOpnd>;
++
++def VSRL_D : LSX_3R<0b01110000111010111>,
++             LSX_3R_DESC_BASE<"vsrl.d", srl, LSX128DOpnd>;
++
++
++def VSRA_B : LSX_3R<0b01110000111011000>,
++             LSX_3R_DESC_BASE<"vsra.b", sra, LSX128BOpnd>;
++
++def VSRA_H : LSX_3R<0b01110000111011001>,
++             LSX_3R_DESC_BASE<"vsra.h", sra, LSX128HOpnd>;
++
++def VSRA_W : LSX_3R<0b01110000111011010>,
++             LSX_3R_DESC_BASE<"vsra.w", sra, LSX128WOpnd>;
++
++def VSRA_D : LSX_3R<0b01110000111011011>,
++             LSX_3R_DESC_BASE<"vsra.d", sra, LSX128DOpnd>;
++
++
++def VSRLR_B : LSX_3R<0b01110000111100000>,
++              LSX_3RN_DESC_BASE<"vsrlr.b", LSX128BOpnd>;
++
++def VSRLR_H : LSX_3R<0b01110000111100001>,
++              LSX_3RN_DESC_BASE<"vsrlr.h", LSX128HOpnd>;
++
++def VSRLR_W : LSX_3R<0b01110000111100010>,
++              LSX_3RN_DESC_BASE<"vsrlr.w", LSX128WOpnd>;
++
++def VSRLR_D : LSX_3R<0b01110000111100011>,
++              LSX_3RN_DESC_BASE<"vsrlr.d", LSX128DOpnd>;
++
++
++def VSRAR_B : LSX_3R<0b01110000111100100>,
++              LSX_3RN_DESC_BASE<"vsrar.b", LSX128BOpnd>;
++
++def VSRAR_H : LSX_3R<0b01110000111100101>,
++              LSX_3RN_DESC_BASE<"vsrar.h", LSX128HOpnd>;
++
++def VSRAR_W : LSX_3R<0b01110000111100110>,
++              LSX_3RN_DESC_BASE<"vsrar.w", LSX128WOpnd>;
++
++def VSRAR_D : LSX_3R<0b01110000111100111>,
++              LSX_3RN_DESC_BASE<"vsrar.d", LSX128DOpnd>;
++
++
++def VBITCLR_B : LSX_3R<0b01110001000011000>,
++                LSX_3R_DESC_BASE<"vbitclr.b", vbitclr_b, LSX128BOpnd>;
++
++def VBITCLR_H : LSX_3R<0b01110001000011001>,
++                LSX_3R_DESC_BASE<"vbitclr.h", vbitclr_h, LSX128HOpnd>;
++
++def VBITCLR_W : LSX_3R<0b01110001000011010>,
++                LSX_3R_DESC_BASE<"vbitclr.w", vbitclr_w, LSX128WOpnd>;
++
++def VBITCLR_D : LSX_3R<0b01110001000011011>,
++                LSX_3R_DESC_BASE<"vbitclr.d", vbitclr_d, LSX128DOpnd>;
++
++
++def VBITSET_B : LSX_3R<0b01110001000011100>,
++                LSX_3RN_DESC_BASE<"vbitset.b", LSX128BOpnd>;
++
++def VBITSET_H : LSX_3R<0b01110001000011101>,
++                LSX_3RN_DESC_BASE<"vbitset.h", LSX128HOpnd>;
++
++def VBITSET_W : LSX_3R<0b01110001000011110>,
++                LSX_3RN_DESC_BASE<"vbitset.w", LSX128WOpnd>;
++
++def VBITSET_D : LSX_3R<0b01110001000011111>,
++                LSX_3RN_DESC_BASE<"vbitset.d", LSX128DOpnd>;
++
++
++def VBITREV_B : LSX_3R<0b01110001000100000>,
++                LSX_3RN_DESC_BASE<"vbitrev.b", LSX128BOpnd>;
++
++def VBITREV_H : LSX_3R<0b01110001000100001>,
++                LSX_3RN_DESC_BASE<"vbitrev.h", LSX128HOpnd>;
++
++def VBITREV_W : LSX_3R<0b01110001000100010>,
++                LSX_3RN_DESC_BASE<"vbitrev.w", LSX128WOpnd>;
++
++def VBITREV_D : LSX_3R<0b01110001000100011>,
++                LSX_3RN_DESC_BASE<"vbitrev.d", LSX128DOpnd>;
++
++
++def VPACKEV_B : LSX_3R<0b01110001000101100>,
++                LSX_3R_DESC_BASE<"vpackev.b", LoongArchVPACKEV, LSX128BOpnd>;
++
++def VPACKEV_H : LSX_3R<0b01110001000101101>,
++                LSX_3R_DESC_BASE<"vpackev.h", LoongArchVPACKEV, LSX128HOpnd>;
++
++def VPACKEV_W : LSX_3R<0b01110001000101110>,
++                LSX_3R_DESC_BASE<"vpackev.w", LoongArchVPACKEV, LSX128WOpnd>;
++
++def VPACKEV_D : LSX_3R<0b01110001000101111>,
++                LSX_3R_DESC_BASE<"vpackev.d", LoongArchVPACKEV, LSX128DOpnd>;
++
++
++def VPACKOD_B : LSX_3R<0b01110001000110000>,
++                LSX_3R_DESC_BASE<"vpackod.b", LoongArchVPACKOD, LSX128BOpnd>;
++
++def VPACKOD_H : LSX_3R<0b01110001000110001>,
++                LSX_3R_DESC_BASE<"vpackod.h", LoongArchVPACKOD, LSX128HOpnd>;
++
++def VPACKOD_W : LSX_3R<0b01110001000110010>,
++                LSX_3R_DESC_BASE<"vpackod.w", LoongArchVPACKOD, LSX128WOpnd>;
++
++def VPACKOD_D : LSX_3R<0b01110001000110011>,
++                LSX_3R_DESC_BASE<"vpackod.d", LoongArchVPACKOD, LSX128DOpnd>;
++
++
++def VILVL_B : LSX_3R<0b01110001000110100>,
++              LSX_3R_DESC_BASE<"vilvl.b", LoongArchVILVL, LSX128BOpnd>;
++
++def VILVL_H : LSX_3R<0b01110001000110101>,
++              LSX_3R_DESC_BASE<"vilvl.h", LoongArchVILVL, LSX128HOpnd>;
++
++def VILVL_W : LSX_3R<0b01110001000110110>,
++              LSX_3R_DESC_BASE<"vilvl.w", LoongArchVILVL, LSX128WOpnd>;
++
++def VILVL_D : LSX_3R<0b01110001000110111>,
++              LSX_3R_DESC_BASE<"vilvl.d", LoongArchVILVL, LSX128DOpnd>;
++
++
++def VILVH_B : LSX_3R<0b01110001000111000>,
++              LSX_3R_DESC_BASE<"vilvh.b", LoongArchVILVH, LSX128BOpnd>;
++
++def VILVH_H : LSX_3R<0b01110001000111001>,
++              LSX_3R_DESC_BASE<"vilvh.h", LoongArchVILVH, LSX128HOpnd>;
++
++def VILVH_W : LSX_3R<0b01110001000111010>,
++              LSX_3R_DESC_BASE<"vilvh.w", LoongArchVILVH, LSX128WOpnd>;
++
++def VILVH_D : LSX_3R<0b01110001000111011>,
++              LSX_3R_DESC_BASE<"vilvh.d", LoongArchVILVH, LSX128DOpnd>;
++
++
++def VPICKEV_B : LSX_3R<0b01110001000111100>,
++                LSX_3R_DESC_BASE<"vpickev.b", LoongArchVPICKEV, LSX128BOpnd>;
++
++def VPICKEV_H : LSX_3R<0b01110001000111101>,
++                LSX_3R_DESC_BASE<"vpickev.h", LoongArchVPICKEV, LSX128HOpnd>;
++
++def VPICKEV_W : LSX_3R<0b01110001000111110>,
++                LSX_3R_DESC_BASE<"vpickev.w", LoongArchVPICKEV, LSX128WOpnd>;
++
++def VPICKEV_D : LSX_3R<0b01110001000111111>,
++                LSX_3R_DESC_BASE<"vpickev.d", LoongArchVPICKEV, LSX128DOpnd>;
++
++
++def VPICKOD_B : LSX_3R<0b01110001001000000>,
++                LSX_3R_DESC_BASE<"vpickod.b", LoongArchVPICKOD, LSX128BOpnd>;
++
++def VPICKOD_H : LSX_3R<0b01110001001000001>,
++                LSX_3R_DESC_BASE<"vpickod.h", LoongArchVPICKOD, LSX128HOpnd>;
++
++def VPICKOD_W : LSX_3R<0b01110001001000010>,
++                LSX_3R_DESC_BASE<"vpickod.w", LoongArchVPICKOD, LSX128WOpnd>;
++
++def VPICKOD_D : LSX_3R<0b01110001001000011>,
++                LSX_3R_DESC_BASE<"vpickod.d", LoongArchVPICKOD, LSX128DOpnd>;
++
++
++def VREPLVE_B : LSX_3R_1GP<0b01110001001000100>,
++                LSX_3R_VREPLVE_DESC_BASE<"vreplve.b", vsplati8_elt, LSX128BOpnd>;
++
++def VREPLVE_H : LSX_3R_1GP<0b01110001001000101>,
++                LSX_3R_VREPLVE_DESC_BASE<"vreplve.h", vsplati16_elt, LSX128HOpnd>;
++
++def VREPLVE_W : LSX_3R_1GP<0b01110001001000110>,
++                LSX_3R_VREPLVE_DESC_BASE<"vreplve.w", vsplati32_elt, LSX128WOpnd>;
++
++def VREPLVE_D : LSX_3R_1GP<0b01110001001000111>,
++                LSX_3R_VREPLVE_DESC_BASE<"vreplve.d", vsplati64_elt, LSX128DOpnd>;
++
++
++def VAND_V : LSX_3R<0b01110001001001100>,
++             LSX_VEC_DESC_BASE<"vand.v", and, LSX128BOpnd>;
++class AND_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<and, LSX128HOpnd>;
++class AND_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<and, LSX128WOpnd>;
++class AND_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<and, LSX128DOpnd>;
++
++def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC,
++                     PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC,
++                     PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC,
++                     PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++
++
++def VOR_V : LSX_3R<0b01110001001001101>,
++            LSX_VEC_DESC_BASE<"vor.v", or, LSX128BOpnd>;
++class OR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<or, LSX128HOpnd>;
++class OR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<or, LSX128WOpnd>;
++class OR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<or, LSX128DOpnd>;
++
++def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC,
++                    PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd,
++                                               LSX128BOpnd:$vj,
++                                               LSX128BOpnd:$vk)>;
++def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC,
++                    PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd,
++                                               LSX128BOpnd:$vj,
++                                               LSX128BOpnd:$vk)>;
++def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC,
++                    PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd,
++                                               LSX128BOpnd:$vj,
++                                               LSX128BOpnd:$vk)>;
++
++
++def VXOR_V : LSX_3R<0b01110001001001110>,
++             LSX_VEC_DESC_BASE<"vxor.v", xor, LSX128BOpnd>;
++class XOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<xor, LSX128HOpnd>;
++class XOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<xor, LSX128WOpnd>;
++class XOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<xor, LSX128DOpnd>;
++
++def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC,
++                     PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC,
++                     PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC,
++                     PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++
++
++def VNOR_V : LSX_3R<0b01110001001001111>,
++             LSX_VEC_DESC_BASE<"vnor.v", LoongArchVNOR, LSX128BOpnd>;
++class NOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<LoongArchVNOR, LSX128HOpnd>;
++class NOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<LoongArchVNOR, LSX128WOpnd>;
++class NOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE<LoongArchVNOR, LSX128DOpnd>;
++
++def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC,
++                     PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC,
++                     PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC,
++                     PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd,
++                                                 LSX128BOpnd:$vj,
++                                                 LSX128BOpnd:$vk)>;
++
++
++def VFADD_S : LSX_3R<0b01110001001100001>, IsCommutable,
++              LSX_3RF_DESC_BASE<"vfadd.s", fadd, LSX128WOpnd>;
++
++def VFADD_D : LSX_3R<0b01110001001100010>, IsCommutable,
++              LSX_3RF_DESC_BASE<"vfadd.d", fadd, LSX128DOpnd>;
++
++
++def VFSUB_S : LSX_3R<0b01110001001100101>,
++              LSX_3RF_DESC_BASE<"vfsub.s", fsub, LSX128WOpnd>;
++
++def VFSUB_D : LSX_3R<0b01110001001100110>,
++              LSX_3RF_DESC_BASE<"vfsub.d", fsub, LSX128DOpnd>;
++
++
++def VFMUL_S : LSX_3R<0b01110001001110001>,
++              LSX_3RF_DESC_BASE<"vfmul.s", fmul, LSX128WOpnd>;
++
++def VFMUL_D : LSX_3R<0b01110001001110010>,
++              LSX_3RF_DESC_BASE<"vfmul.d", fmul, LSX128DOpnd>;
++
++
++def VFDIV_S : LSX_3R<0b01110001001110101>,
++              LSX_3RF_DESC_BASE<"vfdiv.s", fdiv, LSX128WOpnd>;
++
++def VFDIV_D : LSX_3R<0b01110001001110110>,
++              LSX_3RF_DESC_BASE<"vfdiv.d", fdiv, LSX128DOpnd>;
++
++
++def VFMAX_S : LSX_3R<0b01110001001111001>,
++              LSX_3RFN_DESC_BASE<"vfmax.s", LSX128WOpnd>;
++
++def VFMAX_D : LSX_3R<0b01110001001111010>,
++              LSX_3RFN_DESC_BASE<"vfmax.d", LSX128DOpnd>;
++
++
++def VFMIN_S : LSX_3R<0b01110001001111101>,
++              LSX_3RFN_DESC_BASE<"vfmin.s", LSX128WOpnd>;
++
++def VFMIN_D : LSX_3R<0b01110001001111110>,
++              LSX_3RFN_DESC_BASE<"vfmin.d", LSX128DOpnd>;
++
++
++def VFMAXA_S : LSX_3R<0b01110001010000001>,
++               LSX_3RFN_DESC_BASE<"vfmaxa.s", LSX128WOpnd>;
++
++def VFMAXA_D : LSX_3R<0b01110001010000010>,
++               LSX_3RFN_DESC_BASE<"vfmaxa.d", LSX128DOpnd>;
++
++
++def VFMINA_S : LSX_3R<0b01110001010000101>,
++               LSX_3RFN_DESC_BASE<"vfmina.s", LSX128WOpnd>;
++
++def VFMINA_D : LSX_3R<0b01110001010000110>,
++               LSX_3RFN_DESC_BASE<"vfmina.d", LSX128DOpnd>;
++
++
++def VSHUF_H : LSX_3R<0b01110001011110101>,
++              LSX_3R_VSHF_DESC_BASE<"vshuf.h", LSX128HOpnd>;
++
++def VSHUF_W : LSX_3R<0b01110001011110110>,
++              LSX_3R_VSHF_DESC_BASE<"vshuf.w", LSX128WOpnd>;
++
++def VSHUF_D : LSX_3R<0b01110001011110111>,
++              LSX_3R_VSHF_DESC_BASE<"vshuf.d", LSX128DOpnd>;
++
++
++def VSEQI_B : LSX_I5<0b01110010100000000>,
++              LSX_I5_DESC_BASE_Intrinsic<"vseqi.b", int_loongarch_lsx_vseqi_b, simm5_32, immSExt5, LSX128BOpnd>;
++
++def VSEQI_H : LSX_I5<0b01110010100000001>,
++              LSX_I5_DESC_BASE_Intrinsic<"vseqi.h", int_loongarch_lsx_vseqi_h, simm5_32, immSExt5, LSX128HOpnd>;
++
++def VSEQI_W : LSX_I5<0b01110010100000010>,
++              LSX_I5_DESC_BASE_Intrinsic<"vseqi.w", int_loongarch_lsx_vseqi_w, simm5_32, immSExt5, LSX128WOpnd>;
++
++def VSEQI_D : LSX_I5<0b01110010100000011>,
++              LSX_I5_DESC_BASE_Intrinsic<"vseqi.d", int_loongarch_lsx_vseqi_d, simm5_32, immSExt5, LSX128DOpnd>;
++
++
++def VSLEI_B : LSX_I5<0b01110010100000100>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslei.b", int_loongarch_lsx_vslei_b, simm5_32, immSExt5, LSX128BOpnd>;
++
++def VSLEI_H : LSX_I5<0b01110010100000101>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslei.h", int_loongarch_lsx_vslei_h, simm5_32, immSExt5, LSX128HOpnd>;
++
++def VSLEI_W : LSX_I5<0b01110010100000110>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslei.w", int_loongarch_lsx_vslei_w, simm5_32, immSExt5, LSX128WOpnd>;
++
++def VSLEI_D : LSX_I5<0b01110010100000111>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslei.d", int_loongarch_lsx_vslei_d, simm5_32, immSExt5, LSX128DOpnd>;
++
++
++def VSLEI_BU : LSX_I5_U<0b01110010100001000>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslei.bu", int_loongarch_lsx_vslei_bu, uimm5, immZExt5, LSX128BOpnd>;
++
++def VSLEI_HU : LSX_I5_U<0b01110010100001001>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslei.hu", int_loongarch_lsx_vslei_hu, uimm5, immZExt5, LSX128HOpnd>;
++
++def VSLEI_WU : LSX_I5_U<0b01110010100001010>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslei.wu", int_loongarch_lsx_vslei_wu, uimm5, immZExt5, LSX128WOpnd>;
++
++def VSLEI_DU : LSX_I5_U<0b01110010100001011>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslei.du", int_loongarch_lsx_vslei_du, uimm5, immZExt5, LSX128DOpnd>;
++
++
++def VSLTI_B : LSX_I5<0b01110010100001100>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslti.b", int_loongarch_lsx_vslti_b, simm5_32, immSExt5, LSX128BOpnd>;
++
++def VSLTI_H : LSX_I5<0b01110010100001101>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslti.h", int_loongarch_lsx_vslti_h, simm5_32, immSExt5, LSX128HOpnd>;
++
++def VSLTI_W : LSX_I5<0b01110010100001110>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslti.w", int_loongarch_lsx_vslti_w, simm5_32, immSExt5, LSX128WOpnd>;
++
++def VSLTI_D : LSX_I5<0b01110010100001111>,
++              LSX_I5_DESC_BASE_Intrinsic<"vslti.d", int_loongarch_lsx_vslti_d, simm5_32, immSExt5, LSX128DOpnd>;
++
++
++def VSLTI_BU : LSX_I5_U<0b01110010100010000>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslti.bu", int_loongarch_lsx_vslti_bu, uimm5, immZExt5, LSX128BOpnd>;
++
++def VSLTI_HU : LSX_I5_U<0b01110010100010001>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslti.hu", int_loongarch_lsx_vslti_hu, uimm5, immZExt5, LSX128HOpnd>;
++
++def VSLTI_WU : LSX_I5_U<0b01110010100010010>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslti.wu", int_loongarch_lsx_vslti_wu, uimm5, immZExt5, LSX128WOpnd>;
++
++def VSLTI_DU : LSX_I5_U<0b01110010100010011>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vslti.du", int_loongarch_lsx_vslti_du, uimm5, immZExt5, LSX128DOpnd>;
++
++
++def VADDI_BU : LSX_I5_U<0b01110010100010100>,
++               LSX_I5_U_DESC_BASE<"vaddi.bu", add, vsplati8_uimm5, LSX128BOpnd>;
++
++def VADDI_HU : LSX_I5_U<0b01110010100010101>,
++               LSX_I5_U_DESC_BASE<"vaddi.hu", add, vsplati16_uimm5, LSX128HOpnd>;
++
++def VADDI_WU : LSX_I5_U<0b01110010100010110>,
++               LSX_I5_U_DESC_BASE<"vaddi.wu", add, vsplati32_uimm5, LSX128WOpnd>;
++
++def VADDI_DU : LSX_I5_U<0b01110010100010111>,
++               LSX_I5_U_DESC_BASE<"vaddi.du", add, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VSUBI_BU : LSX_I5_U<0b01110010100011000>,
++               LSX_I5_U_DESC_BASE<"vsubi.bu", sub, vsplati8_uimm5, LSX128BOpnd>;
++
++def VSUBI_HU : LSX_I5_U<0b01110010100011001>,
++               LSX_I5_U_DESC_BASE<"vsubi.hu", sub, vsplati16_uimm5, LSX128HOpnd>;
++
++def VSUBI_WU : LSX_I5_U<0b01110010100011010>,
++               LSX_I5_U_DESC_BASE<"vsubi.wu", sub, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSUBI_DU : LSX_I5_U<0b01110010100011011>,
++               LSX_I5_U_DESC_BASE<"vsubi.du", sub, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VMAXI_B : LSX_I5<0b01110010100100000>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmaxi.b", int_loongarch_lsx_vmaxi_b, simm5_32, immSExt5, LSX128BOpnd>;
++
++def VMAXI_H : LSX_I5<0b01110010100100001>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmaxi.h", int_loongarch_lsx_vmaxi_h, simm5_32, immSExt5, LSX128HOpnd>;
++
++def VMAXI_W : LSX_I5<0b01110010100100010>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmaxi.w", int_loongarch_lsx_vmaxi_w, simm5_32, immSExt5, LSX128WOpnd>;
++
++def VMAXI_D : LSX_I5<0b01110010100100011>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmaxi.d", int_loongarch_lsx_vmaxi_d, simm5_32, immSExt5, LSX128DOpnd>;
++
++
++def VMINI_B : LSX_I5<0b01110010100100100>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmini.b", int_loongarch_lsx_vmini_b, simm5_32, immSExt5, LSX128BOpnd>;
++
++def VMINI_H : LSX_I5<0b01110010100100101>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmini.h", int_loongarch_lsx_vmini_h, simm5_32, immSExt5, LSX128HOpnd>;
++
++def VMINI_W : LSX_I5<0b01110010100100110>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmini.w", int_loongarch_lsx_vmini_w, simm5_32, immSExt5, LSX128WOpnd>;
++
++def VMINI_D : LSX_I5<0b01110010100100111>,
++              LSX_I5_DESC_BASE_Intrinsic<"vmini.d", int_loongarch_lsx_vmini_d, simm5_32, immSExt5, LSX128DOpnd>;
++
++
++def VMAXI_BU : LSX_I5_U<0b01110010100101000>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.bu", int_loongarch_lsx_vmaxi_bu, uimm5, immZExt5, LSX128BOpnd>;
++
++def VMAXI_HU : LSX_I5_U<0b01110010100101001>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.hu", int_loongarch_lsx_vmaxi_hu, uimm5, immZExt5, LSX128HOpnd>;
++
++def VMAXI_WU : LSX_I5_U<0b01110010100101010>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.wu", int_loongarch_lsx_vmaxi_wu, uimm5, immZExt5, LSX128WOpnd>;
++
++def VMAXI_DU : LSX_I5_U<0b01110010100101011>,
++               LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.du", int_loongarch_lsx_vmaxi_du, uimm5, immZExt5, LSX128DOpnd>;
++
++
++def VMINI_BU : LSX_I5_U<0b01110010100101100>,
++               LSX_I5_U_DESC_BASE<"vmini.bu", umin, vsplati8_uimm5, LSX128BOpnd>;
++
++def VMINI_HU : LSX_I5_U<0b01110010100101101>,
++               LSX_I5_U_DESC_BASE<"vmini.hu", umin, vsplati16_uimm5, LSX128HOpnd>;
++
++def VMINI_WU : LSX_I5_U<0b01110010100101110>,
++               LSX_I5_U_DESC_BASE<"vmini.wu", umin, vsplati32_uimm5, LSX128WOpnd>;
++
++def VMINI_DU : LSX_I5_U<0b01110010100101111>,
++               LSX_I5_U_DESC_BASE<"vmini.du", umin, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VCLO_B : LSX_2R<0b0111001010011100000000>,
++             LSX_2RN_DESC_BASE<"vclo.b", LSX128BOpnd>;
++
++def VCLO_H : LSX_2R<0b0111001010011100000001>,
++             LSX_2RN_DESC_BASE<"vclo.h", LSX128HOpnd>;
++
++def VCLO_W : LSX_2R<0b0111001010011100000010>,
++             LSX_2RN_DESC_BASE<"vclo.w", LSX128WOpnd>;
++
++def VCLO_D : LSX_2R<0b0111001010011100000011>,
++             LSX_2RN_DESC_BASE<"vclo.d", LSX128DOpnd>;
++
++
++def VCLZ_B : LSX_2R<0b0111001010011100000100>,
++             LSX_2R_DESC_BASE<"vclz.b", ctlz, LSX128BOpnd>;
++
++def VCLZ_H : LSX_2R<0b0111001010011100000101>,
++             LSX_2R_DESC_BASE<"vclz.h", ctlz, LSX128HOpnd>;
++
++def VCLZ_W : LSX_2R<0b0111001010011100000110>,
++             LSX_2R_DESC_BASE<"vclz.w", ctlz, LSX128WOpnd>;
++
++def VCLZ_D : LSX_2R<0b0111001010011100000111>,
++             LSX_2R_DESC_BASE<"vclz.d", ctlz, LSX128DOpnd>;
++
++
++def VPCNT_B : LSX_2R<0b0111001010011100001000>,
++              LSX_2R_DESC_BASE<"vpcnt.b", ctpop, LSX128BOpnd>;
++
++def VPCNT_H : LSX_2R<0b0111001010011100001001>,
++              LSX_2R_DESC_BASE<"vpcnt.h", ctpop, LSX128HOpnd>;
++
++def VPCNT_W : LSX_2R<0b0111001010011100001010>,
++              LSX_2R_DESC_BASE<"vpcnt.w", ctpop, LSX128WOpnd>;
++
++def VPCNT_D : LSX_2R<0b0111001010011100001011>,
++              LSX_2R_DESC_BASE<"vpcnt.d", ctpop, LSX128DOpnd>;
++
++
++def VFLOGB_S : LSX_2R<0b0111001010011100110001>,
++               LSX_2RFN_DESC_BASE<"vflogb.s", LSX128WOpnd>;
++
++def VFLOGB_D : LSX_2R<0b0111001010011100110010>,
++               LSX_2RFN_DESC_BASE<"vflogb.d", LSX128DOpnd>;
++
++
++def VFCLASS_S : LSX_2R<0b0111001010011100110101>,
++                LSX_2RFN_DESC_BASE<"vfclass.s", LSX128WOpnd>;
++
++def VFCLASS_D : LSX_2R<0b0111001010011100110110>,
++                LSX_2RFN_DESC_BASE<"vfclass.d", LSX128DOpnd>;
++
++
++def VFSQRT_S : LSX_2R<0b0111001010011100111001>,
++               LSX_2RF_DESC_BASE<"vfsqrt.s", fsqrt, LSX128WOpnd>;
++
++def VFSQRT_D : LSX_2R<0b0111001010011100111010>,
++               LSX_2RF_DESC_BASE<"vfsqrt.d", fsqrt, LSX128DOpnd>;
++
++
++def VFRECIP_S : LSX_2R<0b0111001010011100111101>,
++                LSX_2RFN_DESC_BASE<"vfrecip.s", LSX128WOpnd>;
++
++def VFRECIP_D : LSX_2R<0b0111001010011100111110>,
++                LSX_2RFN_DESC_BASE<"vfrecip.d", LSX128DOpnd>;
++
++
++def VFRSQRT_S : LSX_2R<0b0111001010011101000001>,
++                LSX_2RFN_DESC_BASE<"vfrsqrt.s", LSX128WOpnd>;
++
++def VFRSQRT_D : LSX_2R<0b0111001010011101000010>,
++                LSX_2RFN_DESC_BASE<"vfrsqrt.d", LSX128DOpnd>;
++
++
++def VFRINT_S : LSX_2R<0b0111001010011101001101>,
++               LSX_2RF_DESC_BASE<"vfrint.s", frint, LSX128WOpnd>;
++
++def VFRINT_D : LSX_2R<0b0111001010011101001110>,
++               LSX_2RF_DESC_BASE<"vfrint.d", frint, LSX128DOpnd>;
++
++
++def VFCVTL_S_H : LSX_2R<0b0111001010011101111010>,
++                 LSX_2RFN_DESC_BASE_CVT<"vfcvtl.s.h", LSX128WOpnd, LSX128HOpnd>;
++
++def VFCVTH_S_H : LSX_2R<0b0111001010011101111011>,
++                 LSX_2RFN_DESC_BASE_CVT<"vfcvth.s.h", LSX128WOpnd, LSX128HOpnd>;
++
++
++def VFCVTL_D_S : LSX_2R<0b0111001010011101111100>,
++                 LSX_2RFN_DESC_BASE_CVT<"vfcvtl.d.s", LSX128DOpnd, LSX128WOpnd>;
++
++def VFCVTH_D_S : LSX_2R<0b0111001010011101111101>,
++                 LSX_2RFN_DESC_BASE_CVT<"vfcvth.d.s", LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFFINT_S_W : LSX_2R<0b0111001010011110000000>,
++                 LSX_2RF_DESC_BASE<"vffint.s.w", sint_to_fp, LSX128WOpnd>;
++
++def VFFINT_S_WU : LSX_2R<0b0111001010011110000001>,
++                  LSX_2RF_DESC_BASE<"vffint.s.wu", uint_to_fp, LSX128WOpnd>;
++
++
++def VFFINT_D_L : LSX_2R<0b0111001010011110000010>,
++                 LSX_2RF_DESC_BASE<"vffint.d.l", sint_to_fp, LSX128DOpnd>;
++
++def VFFINT_D_LU : LSX_2R<0b0111001010011110000011>,
++                  LSX_2RF_DESC_BASE<"vffint.d.lu", uint_to_fp, LSX128DOpnd>;
++
++
++def VFTINT_W_S : LSX_2R<0b0111001010011110001100>,
++                 LSX_2RFN_DESC_BASE<"vftint.w.s", LSX128WOpnd>;
++
++def VFTINT_L_D : LSX_2R<0b0111001010011110001101>,
++                 LSX_2RFN_DESC_BASE<"vftint.l.d", LSX128DOpnd>;
++
++
++def VFTINT_WU_S : LSX_2R<0b0111001010011110010110>,
++                  LSX_2RFN_DESC_BASE<"vftint.wu.s", LSX128WOpnd>;
++
++def VFTINT_LU_D : LSX_2R<0b0111001010011110010111>,
++                  LSX_2RFN_DESC_BASE<"vftint.lu.d", LSX128DOpnd>;
++
++
++def VFTINTRZ_WU_S : LSX_2R<0b0111001010011110011100>,
++                    LSX_2RF_DESC_BASE<"vftintrz.wu.s", fp_to_uint, LSX128WOpnd>;
++
++def VFTINTRZ_LU_D : LSX_2R<0b0111001010011110011101>,
++                    LSX_2RF_DESC_BASE<"vftintrz.lu.d", fp_to_uint, LSX128DOpnd>;
++
++
++def VREPLGR2VR_B : LSX_2R_1GP<0b0111001010011111000000>,
++                   LSX_2R_REPL_DESC_BASE<"vreplgr2vr.b", v16i8, vsplati8, LSX128BOpnd, GPR32Opnd>;
++
++def VREPLGR2VR_H : LSX_2R_1GP<0b0111001010011111000001>,
++                   LSX_2R_REPL_DESC_BASE<"vreplgr2vr.h", v8i16, vsplati16, LSX128HOpnd, GPR32Opnd>;
++
++def VREPLGR2VR_W : LSX_2R_1GP<0b0111001010011111000010>,
++                   LSX_2R_REPL_DESC_BASE<"vreplgr2vr.w", v4i32, vsplati32, LSX128WOpnd, GPR32Opnd>;
++
++def VREPLGR2VR_D : LSX_2R_1GP<0b0111001010011111000011>,
++                   LSX_2R_REPL_DESC_BASE<"vreplgr2vr.d", v2i64, vsplati64, LSX128DOpnd, GPR64Opnd>;
++
++
++class LSX_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
++                              RegisterClass RCVD, RegisterClass RCVS = RCVD> :
++      LSXPseudo<(outs RCVD:$vd), (ins RCVS:$fs),
++                [(set RCVD:$vd, (OpNode RCVS:$fs))]> {
++  let usesCustomInserter = 1;
++}
++
++class FILL_FW_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE<v4f32, vsplatf32, LSX128W,
++                                                    FGR32>;
++class FILL_FD_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE<v2f64, vsplatf64, LSX128D,
++                                                    FGR64>;
++
++def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
++def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
++
++
++def VSRLRI_B : LSX_I3_U<0b0111001010100100001>,
++               LSX_BIT_3N_DESC_BASE<"vsrlri.b", uimm3, immZExt3, LSX128BOpnd>;
++
++def VSRLRI_H : LSX_I4_U<0b011100101010010001>,
++               LSX_BIT_4N_DESC_BASE<"vsrlri.h", uimm4, immZExt4, LSX128HOpnd>;
++
++def VSRLRI_W : LSX_I5_U<0b01110010101001001>,
++               LSX_BIT_5N_DESC_BASE<"vsrlri.w", uimm5, immZExt5, LSX128WOpnd>;
++
++def VSRLRI_D : LSX_I6_U<0b0111001010100101>,
++               LSX_BIT_6N_DESC_BASE<"vsrlri.d", uimm6, immZExt6, LSX128DOpnd>;
++
++
++def VSRARI_B : LSX_I3_U<0b0111001010101000001>,
++               LSX_BIT_3N_DESC_BASE<"vsrari.b", uimm3, immZExt3, LSX128BOpnd>;
++
++def VSRARI_H : LSX_I4_U<0b011100101010100001>,
++               LSX_BIT_4N_DESC_BASE<"vsrari.h", uimm4, immZExt4, LSX128HOpnd>;
++
++def VSRARI_W : LSX_I5_U<0b01110010101010001>,
++               LSX_BIT_5N_DESC_BASE<"vsrari.w", uimm5, immZExt5, LSX128WOpnd>;
++
++def VSRARI_D : LSX_I6_U<0b0111001010101001>,
++               LSX_BIT_6N_DESC_BASE<"vsrari.d", uimm6, immZExt6, LSX128DOpnd>;
++
++
++def VINSGR2VR_B : LSX_I4_R_U<0b011100101110101110>,
++                  LSX_INSERT_U4_DESC_BASE<"vinsgr2vr.b", vinsert_v16i8, uimm4, immZExt4Ptr, LSX128BOpnd, GPR32Opnd>;
++
++def VINSGR2VR_H : LSX_I3_R_U<0b0111001011101011110>,
++                  LSX_INSERT_U3_DESC_BASE<"vinsgr2vr.h", vinsert_v8i16, uimm3, immZExt3Ptr, LSX128HOpnd, GPR32Opnd>;
++
++def VINSGR2VR_W : LSX_I2_R_U<0b01110010111010111110>,
++                  LSX_INSERT_U2_DESC_BASE<"vinsgr2vr.w", vinsert_v4i32, uimm2, immZExt2Ptr, LSX128WOpnd, GPR32Opnd>;
++
++def VINSGR2VR_D : LSX_I1_R_U<0b011100101110101111110>,
++                  LSX_INSERT_U1_DESC_BASE<"vinsgr2vr.d", vinsert_v2i64, uimm1, immZExt1Ptr, LSX128DOpnd, GPR64Opnd>;
++
++
++def VPICKVE2GR_B : LSX_ELM_COPY_B<0b011100101110111110>,
++                   LSX_PICK_U4_DESC_BASE<"vpickve2gr.b", vextract_sext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>;
++
++def VPICKVE2GR_H : LSX_ELM_COPY_H<0b0111001011101111110>,
++                   LSX_PICK_U3_DESC_BASE<"vpickve2gr.h", vextract_sext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>;
++
++def VPICKVE2GR_W : LSX_ELM_COPY_W<0b01110010111011111110>,
++                   LSX_PICK_U2_DESC_BASE<"vpickve2gr.w", vextract_sext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>;
++
++def VPICKVE2GR_D : LSX_ELM_COPY_D<0b011100101110111111110>,
++                   LSX_PICK_U1_DESC_BASE<"vpickve2gr.d", vextract_sext_i64, v2i64, uimm1_ptr, immZExt1Ptr, GPR64Opnd, LSX128DOpnd>;
++
++
++def VPICKVE2GR_BU : LSX_ELM_COPY_B<0b011100101111001110>,
++                    LSX_PICK_U4_DESC_BASE<"vpickve2gr.bu", vextract_zext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>;
++
++def VPICKVE2GR_HU : LSX_ELM_COPY_H<0b0111001011110011110>,
++                    LSX_PICK_U3_DESC_BASE<"vpickve2gr.hu", vextract_zext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>;
++
++def VPICKVE2GR_WU : LSX_ELM_COPY_W<0b01110010111100111110>,
++                    LSX_PICK_U2_DESC_BASE<"vpickve2gr.wu", vextract_zext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>;
++
++def VPICKVE2GR_DU : LSX_ELM_COPY_D<0b011100101111001111110>,
++                    LSX_PICK_U1_DESC_BASE<"vpickve2gr.du", int_loongarch_lsx_vpickve2gr_du, v2i64, uimm1, immZExt1, GPR64Opnd, LSX128DOpnd>;
++
++
++def : LSXPat<(vextract_zext_i64 (v2i64 LSX128D:$vj), immZExt1Ptr:$idx),
++             (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>;
++def : LSXPat<(vextract_zext_i64 (v2f64 LSX128D:$vj), immZExt1Ptr:$idx),
++             (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>;
++
++
++def VREPLVEI_B : LSX_I4_U<0b011100101111011110>,
++                 LSX_ELM_U4_VREPLVE_DESC_BASE<"vreplvei.b", vsplati8_uimm4, LSX128BOpnd>;
++
++def VREPLVEI_H : LSX_I3_U<0b0111001011110111110>,
++                 LSX_ELM_U3_VREPLVE_DESC_BASE<"vreplvei.h", vsplati16_uimm3, LSX128HOpnd>;
++
++def VREPLVEI_W : LSX_I2_U<0b01110010111101111110>,
++                 LSX_ELM_U2_VREPLVE_DESC_BASE<"vreplvei.w", vsplati32_uimm2, LSX128WOpnd>;
++
++def VREPLVEI_D : LSX_I1_U<0b011100101111011111110>,
++                 LSX_ELM_U1_VREPLVE_DESC_BASE<"vreplvei.d", vsplati64_uimm1, LSX128DOpnd>;
++
++
++def VSAT_B : LSX_I3_U<0b0111001100100100001>,
++             LSX_BIT_3N_DESC_BASE<"vsat.b", uimm3, immZExt3, LSX128BOpnd>;
++
++def VSAT_H : LSX_I4_U<0b011100110010010001>,
++             LSX_BIT_4N_DESC_BASE<"vsat.h", uimm4, immZExt4, LSX128HOpnd>;
++
++def VSAT_W : LSX_I5_U<0b01110011001001001>,
++             LSX_BIT_5N_DESC_BASE<"vsat.w", uimm5, immZExt5, LSX128WOpnd>;
++
++def VSAT_D : LSX_I6_U<0b0111001100100101>,
++             LSX_BIT_6N_DESC_BASE<"vsat.d", uimm6, immZExt6, LSX128DOpnd>;
++
++
++def VSAT_BU : LSX_I3_U<0b0111001100101000001>,
++              LSX_BIT_3N_DESC_BASE<"vsat.bu", uimm3, immZExt3, LSX128BOpnd>;
++
++def VSAT_HU : LSX_I4_U<0b011100110010100001>,
++              LSX_BIT_4N_DESC_BASE<"vsat.hu", uimm4, immZExt4, LSX128HOpnd>;
++
++def VSAT_WU : LSX_I5_U<0b01110011001010001>,
++              LSX_BIT_5N_DESC_BASE<"vsat.wu", uimm5, immZExt5, LSX128WOpnd>;
++
++def VSAT_DU : LSX_I6_U<0b0111001100101001>,
++              LSX_BIT_6N_DESC_BASE<"vsat.du", uimm6, immZExt6, LSX128DOpnd>;
++
++
++def VSLLI_B : LSX_I3_U<0b0111001100101100001>,
++              LSX_BIT_U3_VREPLVE_DESC_BASE<"vslli.b", shl, vsplati8_uimm3, LSX128BOpnd>;
++
++def VSLLI_H : LSX_I4_U<0b011100110010110001>,
++              LSX_BIT_U4_VREPLVE_DESC_BASE<"vslli.h", shl, vsplati16_uimm4, LSX128HOpnd>;
++
++def VSLLI_W : LSX_I5_U<0b01110011001011001>,
++              LSX_BIT_U5_VREPLVE_DESC_BASE<"vslli.w", shl, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSLLI_D : LSX_I6_U<0b0111001100101101>,
++              LSX_BIT_U6_VREPLVE_DESC_BASE<"vslli.d", shl, vsplati64_uimm6, LSX128DOpnd>;
++
++
++def VSRLI_B : LSX_I3_U<0b0111001100110000001>,
++              LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrli.b", srl, vsplati8_uimm3, LSX128BOpnd>;
++
++def VSRLI_H : LSX_I4_U<0b011100110011000001>,
++              LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrli.h", srl, vsplati16_uimm4, LSX128HOpnd>;
++
++def VSRLI_W : LSX_I5_U<0b01110011001100001>,
++              LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrli.w", srl, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSRLI_D : LSX_I6_U<0b0111001100110001>,
++              LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrli.d", srl, vsplati64_uimm6, LSX128DOpnd>;
++
++
++def VSRAI_B : LSX_I3_U<0b0111001100110100001>,
++              LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"vsrai.b", int_loongarch_lsx_vsrai_b, LSX128BOpnd>;
++
++def VSRAI_H : LSX_I4_U<0b011100110011010001>,
++              LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"vsrai.h", int_loongarch_lsx_vsrai_h, LSX128HOpnd>;
++
++def VSRAI_W : LSX_I5_U<0b01110011001101001>,
++              LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"vsrai.w", int_loongarch_lsx_vsrai_w, LSX128WOpnd>;
++
++def VSRAI_D : LSX_I6_U<0b0111001100110101>,
++              LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"vsrai.d", int_loongarch_lsx_vsrai_d, LSX128DOpnd>;
++
++
++def VSHUF4I_B : LSX_I8_U<0b01110011100100>,
++                LSX_I8_SHF_DESC_BASE<"vshuf4i.b", LSX128BOpnd>;
++
++def VSHUF4I_H : LSX_I8_U<0b01110011100101>,
++                LSX_I8_SHF_DESC_BASE<"vshuf4i.h", LSX128HOpnd>;
++
++def VSHUF4I_W : LSX_I8_U<0b01110011100110>,
++                LSX_I8_SHF_DESC_BASE<"vshuf4i.w", LSX128WOpnd>;
++
++def VSHUF4I_D : LSX_I8_U<0b01110011100111>,
++                LSX_I8_SHUF_DESC_BASE_D<"vshuf4i.d", int_loongarch_lsx_vshuf4i_d, LSX128DOpnd>;
++
++
++def VROTR_B : LSX_3R<0b01110000111011100>,
++              LSX_3R_DESC_BASE<"vrotr.b", LoongArchVROR, LSX128BOpnd>;
++
++def VROTR_H : LSX_3R<0b01110000111011101>,
++              LSX_3R_DESC_BASE<"vrotr.h", LoongArchVROR, LSX128HOpnd>;
++
++def VROTR_W : LSX_3R<0b01110000111011110>,
++              LSX_3R_DESC_BASE<"vrotr.w", LoongArchVROR, LSX128WOpnd>;
++
++def VROTR_D : LSX_3R<0b01110000111011111>,
++              LSX_3R_DESC_BASE<"vrotr.d", LoongArchVROR, LSX128DOpnd>;
++
++
++def VMSKLTZ_B : LSX_2R<0b0111001010011100010000>,
++                LSX_2RN_DESC_BASE<"vmskltz.b", LSX128BOpnd>;
++
++def VMSKLTZ_H : LSX_2R<0b0111001010011100010001>,
++                LSX_2RN_DESC_BASE<"vmskltz.h", LSX128HOpnd>;
++
++def VMSKLTZ_W : LSX_2R<0b0111001010011100010010>,
++                LSX_2RN_DESC_BASE<"vmskltz.w", LSX128WOpnd>;
++
++def VMSKLTZ_D : LSX_2R<0b0111001010011100010011>,
++                LSX_2RN_DESC_BASE<"vmskltz.d", LSX128DOpnd>;
++
++
++def VROTRI_B : LSX_I3_U<0b0111001010100000001>,
++               LSX2_RORI_U3_DESC_BASE<"vrotri.b", uimm3, immZExt3, LSX128BOpnd>;
++
++def VROTRI_H : LSX_I4_U<0b011100101010000001>,
++               LSX2_RORI_U4_DESC_BASE<"vrotri.h", uimm4, immZExt4, LSX128HOpnd>;
++
++def VROTRI_W : LSX_I5_U<0b01110010101000001>,
++               LSX2_RORI_U5_DESC_BASE<"vrotri.w", uimm5, immZExt5, LSX128WOpnd>;
++
++def VROTRI_D : LSX_I6_U<0b0111001010100001>,
++               LSX2_RORI_U6_DESC_BASE<"vrotri.d", uimm6, immZExt6, LSX128DOpnd>;
++
++
++def VSRLNI_B_H : LSX_I4_U<0b011100110100000001>,
++                 LSX_BIND_U4N_DESC_BASE<"vsrlni.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSRLNI_H_W : LSX_I5_U<0b01110011010000001>,
++                 LSX_BIND_U5N_DESC_BASE<"vsrlni.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSRLNI_W_D : LSX_I6_U<0b0111001101000001>,
++                 LSX_BIND_U6N_DESC_BASE<"vsrlni.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSRLNI_D_Q : LSX_I7_U<0b011100110100001>,
++                 LSX_BIND_U7N_DESC_BASE<"vsrlni.d.q", LSX128DOpnd>;
++
++
++def VSRLRNI_B_H : LSX_I4_U<0b011100110100010001>,
++                  LSX_BIND_U4N_DESC_BASE<"vsrlrni.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSRLRNI_H_W : LSX_I5_U<0b01110011010001001>,
++                  LSX_BIND_U5N_DESC_BASE<"vsrlrni.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSRLRNI_W_D : LSX_I6_U<0b0111001101000101>,
++                  LSX_BIND_U6N_DESC_BASE<"vsrlrni.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSRLRNI_D_Q : LSX_I7_U<0b011100110100011>,
++                  LSX_BIND_U7N_DESC_BASE<"vsrlrni.d.q",  LSX128DOpnd>;
++
++
++def VSSRLNI_B_H : LSX_I4_U<0b011100110100100001>,
++                  LSX_BIND_U4N_DESC_BASE<"vssrlni.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRLNI_H_W : LSX_I5_U<0b01110011010010001>,
++                  LSX_BIND_U5N_DESC_BASE<"vssrlni.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRLNI_W_D : LSX_I6_U<0b0111001101001001>,
++                  LSX_BIND_U6N_DESC_BASE<"vssrlni.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRLNI_D_Q : LSX_I7_U<0b011100110100101>,
++                  LSX_BIND_U7N_DESC_BASE<"vssrlni.d.q", LSX128DOpnd>;
++
++
++def VSSRLNI_BU_H : LSX_I4_U<0b011100110100110001>,
++                   LSX_BIND_U4N_DESC_BASE<"vssrlni.bu.h", uimm4, immZExt4, LSX128BOpnd> ;
++
++def VSSRLNI_HU_W : LSX_I5_U<0b01110011010011001>,
++                   LSX_BIND_U5N_DESC_BASE<"vssrlni.hu.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRLNI_WU_D : LSX_I6_U<0b0111001101001101>,
++                   LSX_BIND_U6N_DESC_BASE<"vssrlni.wu.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRLNI_DU_Q : LSX_I7_U<0b011100110100111>,
++                   LSX_BIND_U7N_DESC_BASE<"vssrlni.du.q", LSX128DOpnd>;
++
++
++def VSSRLRNI_BU_H : LSX_I4_U<0b011100110101010001>,
++                    LSX_BIND_U4N_DESC_BASE<"vssrlrni.bu.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRLRNI_HU_W : LSX_I5_U<0b01110011010101001>,
++                    LSX_BIND_U5N_DESC_BASE<"vssrlrni.hu.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRLRNI_WU_D : LSX_I6_U<0b0111001101010101>,
++                    LSX_BIND_U6N_DESC_BASE<"vssrlrni.wu.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRLRNI_DU_Q : LSX_I7_U<0b011100110101011>,
++                    LSX_BIND_U7N_DESC_BASE<"vssrlrni.du.q", LSX128DOpnd>;
++
++
++def VSRARNI_B_H : LSX_I4_U<0b011100110101110001>,
++                  LSX_BIND_U4N_DESC_BASE<"vsrarni.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSRARNI_H_W : LSX_I5_U<0b01110011010111001>,
++                  LSX_BIND_U5N_DESC_BASE<"vsrarni.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSRARNI_W_D : LSX_I6_U<0b0111001101011101>,
++                  LSX_BIND_U6N_DESC_BASE<"vsrarni.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSRARNI_D_Q : LSX_I7_U<0b011100110101111>,
++                  LSX_BIND_U7N_DESC_BASE<"vsrarni.d.q", LSX128DOpnd>;
++
++
++def VSSRANI_B_H : LSX_I4_U<0b011100110110000001>,
++                  LSX_BIND_U4N_DESC_BASE<"vssrani.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRANI_H_W : LSX_I5_U<0b01110011011000001>,
++                  LSX_BIND_U5N_DESC_BASE<"vssrani.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRANI_W_D : LSX_I6_U<0b0111001101100001>,
++                  LSX_BIND_U6N_DESC_BASE<"vssrani.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRANI_D_Q : LSX_I7_U<0b011100110110001>,
++                  LSX_BIND_U7N_DESC_BASE<"vssrani.d.q", LSX128DOpnd>;
++
++
++def VSSRANI_BU_H : LSX_I4_U<0b011100110110010001>,
++                   LSX_BIND_U4N_DESC_BASE<"vssrani.bu.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRANI_HU_W : LSX_I5_U<0b01110011011001001>,
++                   LSX_BIND_U5N_DESC_BASE<"vssrani.hu.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRANI_WU_D : LSX_I6_U<0b0111001101100101>,
++                   LSX_BIND_U6N_DESC_BASE<"vssrani.wu.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRANI_DU_Q : LSX_I7_U<0b011100110110011>,
++                   LSX_BIND_U7N_DESC_BASE<"vssrani.du.q", LSX128DOpnd>;
++
++
++def VSSRARNI_B_H : LSX_I4_U<0b011100110110100001>,
++                   LSX_BIND_U4N_DESC_BASE<"vssrarni.b.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRARNI_H_W : LSX_I5_U<0b01110011011010001>,
++                   LSX_BIND_U5N_DESC_BASE<"vssrarni.h.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRARNI_W_D : LSX_I6_U<0b0111001101101001>,
++                   LSX_BIND_U6N_DESC_BASE<"vssrarni.w.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRARNI_D_Q : LSX_I7_U<0b011100110110101>,
++                   LSX_BIND_U7N_DESC_BASE<"vssrarni.d.q", LSX128DOpnd>;
++
++
++def VSSRARNI_BU_H : LSX_I4_U<0b011100110110110001>,
++                    LSX_BIND_U4N_DESC_BASE<"vssrarni.bu.h", uimm4, immZExt4, LSX128BOpnd>;
++
++def VSSRARNI_HU_W : LSX_I5_U<0b01110011011011001>,
++                    LSX_BIND_U5N_DESC_BASE<"vssrarni.hu.w", uimm5, immZExt5, LSX128HOpnd>;
++
++def VSSRARNI_WU_D : LSX_I6_U<0b0111001101101101>,
++                    LSX_BIND_U6N_DESC_BASE<"vssrarni.wu.d", uimm6, immZExt6, LSX128WOpnd>;
++
++def VSSRARNI_DU_Q : LSX_I7_U<0b011100110110111>,
++                    LSX_BIND_U7N_DESC_BASE<"vssrarni.du.q", LSX128DOpnd>;
++
++
++
++def VLD : LSX_I12_S<0b0010110000>,
++          LD_DESC_BASE<"vld", load, v16i8, LSX128BOpnd, mem>;
++
++def VST : LSX_I12_S<0b0010110001>,
++          ST_DESC_BASE<"vst", store, v16i8, LSX128BOpnd, mem_simm12>;
++
++
++def VSETEQZ_V : LSX_SET<0b0111001010011100100110>,
++                LSX_SET_DESC_BASE<"vseteqz.v", LSX128BOpnd>;
++
++def VSETNEZ_V : LSX_SET<0b0111001010011100100111>,
++                LSX_SET_DESC_BASE<"vsetnez.v", LSX128BOpnd>;
++
++
++def VSETANYEQZ_B : LSX_SET<0b0111001010011100101000>,
++                   LSX_SET_DESC_BASE<"vsetanyeqz.b", LSX128BOpnd>;
++
++def VSETANYEQZ_H : LSX_SET<0b0111001010011100101001>,
++                   LSX_SET_DESC_BASE<"vsetanyeqz.h", LSX128HOpnd>;
++
++def VSETANYEQZ_W : LSX_SET<0b0111001010011100101010>,
++                   LSX_SET_DESC_BASE<"vsetanyeqz.w", LSX128WOpnd>;
++
++def VSETANYEQZ_D : LSX_SET<0b0111001010011100101011>,
++                   LSX_SET_DESC_BASE<"vsetanyeqz.d", LSX128DOpnd>;
++
++
++def VSETALLNEZ_B : LSX_SET<0b0111001010011100101100>,
++                   LSX_SET_DESC_BASE<"vsetallnez.b", LSX128BOpnd>;
++
++def VSETALLNEZ_H : LSX_SET<0b0111001010011100101101>,
++                   LSX_SET_DESC_BASE<"vsetallnez.h", LSX128HOpnd>;
++
++def VSETALLNEZ_W : LSX_SET<0b0111001010011100101110>,
++                   LSX_SET_DESC_BASE<"vsetallnez.w", LSX128WOpnd>;
++
++def VSETALLNEZ_D : LSX_SET<0b0111001010011100101111>,
++                   LSX_SET_DESC_BASE<"vsetallnez.d", LSX128DOpnd>;
++
++class LSX_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
++                                   RegisterClass RCVS> :
++  LoongArchPseudo<(outs GPR32Opnd:$rd),
++             (ins RCVS:$vj),
++             [(set GPR32Opnd:$rd, (OpNode (TyNode RCVS:$vj)))]> {
++  bit usesCustomInserter = 1;
++}
++
++def SNZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bnz_b, v16i8,
++                                                LSX128B>;
++def SNZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bnz_h, v8i16,
++                                                LSX128H>;
++def SNZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bnz_w, v4i32,
++                                                LSX128W>;
++def SNZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bnz_d, v2i64,
++                                                LSX128D>;
++def SNZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bnz_v, v16i8,
++                                                LSX128B>;
++
++def SZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bz_b, v16i8,
++                                               LSX128B>;
++def SZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bz_h, v8i16,
++                                               LSX128H>;
++def SZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bz_w, v4i32,
++                                               LSX128W>;
++def SZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bz_d, v2i64,
++                                               LSX128D>;
++def SZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE<int_loongarch_lsx_bz_v, v16i8,
++                                               LSX128B>;
++
++
++def VFMADD_S  : LSX_VR4MUL<0b000010010001>,
++                LSX_4RF<"vfmadd.s", int_loongarch_lsx_vfmadd_s, LSX128WOpnd>;
++
++def VFMADD_D  : LSX_VR4MUL<0b000010010010>,
++                LSX_4RF<"vfmadd.d", int_loongarch_lsx_vfmadd_d, LSX128DOpnd>;
++
++def VFMSUB_S  : LSX_VR4MUL<0b000010010101>,
++                LSX_4RF<"vfmsub.s", int_loongarch_lsx_vfmsub_s, LSX128WOpnd>;
++
++def VFMSUB_D  : LSX_VR4MUL<0b000010010110>,
++                LSX_4RF<"vfmsub.d", int_loongarch_lsx_vfmsub_d, LSX128DOpnd>;
++
++def VFNMADD_S : LSX_VR4MUL<0b000010011001>,
++                LSX_4RF<"vfnmadd.s", int_loongarch_lsx_vfnmadd_s, LSX128WOpnd>;
++
++def VFNMADD_D : LSX_VR4MUL<0b000010011010>,
++                LSX_4RF<"vfnmadd.d", int_loongarch_lsx_vfnmadd_d, LSX128DOpnd>;
++
++def VFNMSUB_S : LSX_VR4MUL<0b000010011101>,
++                LSX_4RF<"vfnmsub.s", int_loongarch_lsx_vfnmsub_s, LSX128WOpnd>;
++
++def VFNMSUB_D : LSX_VR4MUL<0b000010011110>,
++                LSX_4RF<"vfnmsub.d", int_loongarch_lsx_vfnmsub_d, LSX128DOpnd>;
++
++
++// vfmadd: vj * vk + va
++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va),
++             (VFMADD_D $vj, $vk, $va)>;
++
++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va),
++             (VFMADD_S $vj, $vk, $va)>;
++
++
++// vfmsub: vj * vk - va
++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)),
++             (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
++
++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)),
++             (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
++
++
++// vfnmadd: -(vj * vk + va)
++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)),
++             (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
++
++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)),
++             (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
++
++// vfnmsub: -(vj * vk - va)
++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),
++             (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>;
++
++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, v4f32:$va),
++             (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>;
++
++
++def VFCMP_CAF_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.caf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_caf_s>{
++                    bits<5> cond=0x0;
++                  }
++
++def VFCMP_CAF_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.caf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_caf_d>{
++                    bits<5> cond=0x0;
++                  }
++
++
++def VFCMP_COR_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.cor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetord_v4f32>{
++                  bits<5> cond=0x14;
++                }
++
++def VFCMP_COR_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.cor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetord_v2f64>{
++                    bits<5> cond=0x14;
++                  }
++
++
++def VFCMP_CUN_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.cun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetun_v4f32>{
++                    bits<5> cond=0x8;
++                  }
++
++def VFCMP_CUN_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.cun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetun_v2f64>{
++                    bits<5> cond=0x8;
++                  }
++
++
++def VFCMP_CUNE_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.cune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetune_v4f32>{
++                    bits<5> cond=0x18;
++                  }
++
++def VFCMP_CUNE_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.cune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetune_v2f64>{
++                    bits<5> cond=0x18;
++                  }
++
++
++def VFCMP_CUEQ_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.cueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetueq_v4f32>{
++                    bits<5> cond=0xc;
++                  }
++
++def VFCMP_CUEQ_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.cueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetueq_v2f64>{
++                    bits<5> cond=0xc;
++                  }
++
++def VFCMP_CEQ_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.ceq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetoeq_v4f32>{
++                  bits<5> cond=0x4;
++                }
++
++def VFCMP_CEQ_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.ceq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetoeq_v2f64>{
++                    bits<5> cond=0x4;
++                  }
++
++
++def VFCMP_CNE_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.cne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetone_v4f32>{
++                    bits<5> cond=0x10;
++                  }
++
++def VFCMP_CNE_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.cne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetone_v2f64>{
++                    bits<5> cond=0x10;
++                  }
++
++
++def VFCMP_CLT_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.clt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetolt_v4f32>{
++                    bits<5> cond=0x2;
++                  }
++
++def VFCMP_CLT_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.clt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetolt_v2f64>{
++                    bits<5> cond=0x2;
++                  }
++
++
++def VFCMP_CULT_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.cult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetult_v4f32>{
++                    bits<5> cond=0xa;
++                  }
++
++def VFCMP_CULT_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.cult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetult_v2f64>{
++                    bits<5> cond=0xa;
++                  }
++
++
++def VFCMP_CLE_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.cle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetole_v4f32>{
++                    bits<5> cond=0x6;
++                  }
++
++def VFCMP_CLE_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.cle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetole_v2f64>{
++                    bits<5> cond=0x6;
++                  }
++
++
++def VFCMP_CULE_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.cule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetule_v4f32>{
++                    bits<5> cond=0xe;
++                  }
++
++def VFCMP_CULE_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.cule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetule_v2f64>{
++                    bits<5> cond=0xe;
++                  }
++
++
++def VFCMP_SAF_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.saf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_saf_s>{
++                    bits<5> cond=0x1;
++                  }
++
++def VFCMP_SAF_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.saf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_saf_d>{
++                    bits<5> cond=0x1;
++                  }
++
++def VFCMP_SOR_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.sor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sor_s>{
++                    bits<5> cond=0x15;
++                  }
++
++def VFCMP_SOR_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.sor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sor_d>{
++                    bits<5> cond=0x15;
++                  }
++
++def VFCMP_SUN_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.sun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sun_s>{
++                    bits<5> cond=0x9;
++                  }
++
++def VFCMP_SUN_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.sun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sun_d>{
++                    bits<5> cond=0x9;
++                  }
++
++def VFCMP_SUNE_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.sune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sune_s>{
++                    bits<5> cond=0x19;
++                  }
++
++def VFCMP_SUNE_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.sune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sune_d>{
++                    bits<5> cond=0x19;
++                  }
++
++def VFCMP_SUEQ_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.sueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sueq_s>{
++                    bits<5> cond=0xd;
++                  }
++
++def VFCMP_SUEQ_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.sueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sueq_d>{
++                    bits<5> cond=0xd;
++                  }
++
++def VFCMP_SEQ_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.seq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_seq_s>{
++                    bits<5> cond=0x5;
++                  }
++
++def VFCMP_SEQ_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.seq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_seq_d>{
++                    bits<5> cond=0x5;
++                  }
++
++def VFCMP_SNE_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.sne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sne_s>{
++                    bits<5> cond=0x11;
++                  }
++
++def VFCMP_SNE_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.sne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sne_d>{
++                    bits<5> cond=0x11;
++                  }
++
++def VFCMP_SLT_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.slt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_slt_s>{
++                    bits<5> cond=0x3;
++                  }
++
++def VFCMP_SLT_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.slt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_slt_d>{
++                    bits<5> cond=0x3;
++                  }
++
++def VFCMP_SULT_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.sult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sult_s>{
++                    bits<5> cond=0xb;
++                  }
++
++def VFCMP_SULT_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.sult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sult_d>{
++                    bits<5> cond=0xb;
++                  }
++
++def VFCMP_SLE_S : LSX_VFCMP<0b000011000101>,
++                  LSX_VFCMP_Reg3<"vfcmp.sle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sle_s>{
++                    bits<5> cond=0x7;
++                  }
++
++def VFCMP_SLE_D : LSX_VFCMP<0b000011000110>,
++                  LSX_VFCMP_Reg3<"vfcmp.sle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sle_d>{
++                    bits<5> cond=0x7;
++                  }
++
++def VFCMP_SULE_S : LSX_VFCMP<0b000011000101>,
++                   LSX_VFCMP_Reg3<"vfcmp.sule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sule_s>{
++                    bits<5> cond=0xf;
++                  }
++
++def VFCMP_SULE_D : LSX_VFCMP<0b000011000110>,
++                   LSX_VFCMP_Reg3<"vfcmp.sule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sule_d>{
++                    bits<5> cond=0xf;
++                  }
++
++def VBITSEL_V  : LSX_VR4MUL<0b000011010001>,
++                 LSX_VMul_Reg4<"vbitsel.v", LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, int_loongarch_lsx_vbitsel_v>;
++
++def VSHUF_B  : LSX_VR4MUL<0b000011010101>,
++               LSX_3R_4R_VSHF_DESC_BASE<"vshuf.b", LSX128BOpnd>;
++
++
++class LSX_BSEL_PSEUDO_BASE<RegisterOperand RO, ValueType Ty> :
++  LSXPseudo<(outs RO:$vd), (ins RO:$vd_in, RO:$vs, RO:$vt),
++            [(set RO:$vd, (Ty (vselect RO:$vd_in, RO:$vt, RO:$vs)))]>,
++  PseudoInstExpansion<(VBITSEL_V LSX128BOpnd:$vd, LSX128BOpnd:$vs,
++                                 LSX128BOpnd:$vt, LSX128BOpnd:$vd_in)> {
++  let Constraints = "$vd_in = $vd";
++}
++
++def BSEL_B_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128BOpnd, v16i8>;
++def BSEL_H_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128HOpnd, v8i16>;
++def BSEL_W_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128WOpnd, v4i32>;
++def BSEL_D_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128DOpnd, v2i64>;
++def BSEL_FW_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128WOpnd, v4f32>;
++def BSEL_FD_PSEUDO : LSX_BSEL_PSEUDO_BASE<LSX128DOpnd, v2f64>;
++
++
++class LSX_LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                         ValueType TyNode, RegisterOperand ROVD,
++                                 Operand MemOpnd = mem,
++                                 ComplexPattern Addr = addr> {
++  dag OutOperandList = (outs ROVD:$vd);
++  dag InOperandList = (ins MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$vd, $addr");
++  list<dag> Pattern = [(set ROVD:$vd, (OpNode (TyNode (load Addr:$addr))))];
++  string DecoderMethod = "DecodeLSX128memlsl";
++}
++
++def VLDREPL_B : LSX_SI12_S<0b0011000010>,
++                LSX_LD_DESC_BASE<"vldrepl.b", vldrepl_v16i8,  v16i8, LSX128BOpnd>;
++
++def VLDREPL_H : LSX_SI11_S<0b00110000010>,
++                LSX_LD_DESC_BASE<"vldrepl.h", vldrepl_v8i16, v8i16, LSX128HOpnd, mem_simm11_lsl1, addrimm11lsl1>;
++
++def VLDREPL_W : LSX_SI10_S<0b001100000010>,
++                LSX_LD_DESC_BASE<"vldrepl.w", vldrepl_v4i32, v4i32, LSX128WOpnd, mem_simm10_lsl2, addrimm10lsl2>;
++
++def VLDREPL_D : LSX_SI9_S<0b0011000000010>,
++                LSX_LD_DESC_BASE<"vldrepl.d", vldrepl_v2i64,  v2i64, LSX128DOpnd,  mem_simm9_lsl3, addrimm9lsl3>;
++
++
++def VSTELM_B : LSX_SI8_idx4<0b0011000110>,
++               LSX_I8_U4_DESC_BASE<"vstelm.b", int_loongarch_lsx_vstelm_b, simm8_32, immSExt8, LSX128BOpnd>;
++
++def VSTELM_H : LSX_SI8_idx3<0b00110001010>,
++               LSX_I8_U3_DESC_BASE<"vstelm.h", int_loongarch_lsx_vstelm_h, immSExt8_1_O, immSExt8, LSX128HOpnd>;
++
++def VSTELM_W : LSX_SI8_idx2<0b001100010010>,
++               LSX_I8_U2_DESC_BASE<"vstelm.w", int_loongarch_lsx_vstelm_w, immSExt8_2_O, immSExt8, LSX128WOpnd>;
++
++def VSTELM_D : LSX_SI8_idx1<0b0011000100010>,
++               LSX_I8_U1_DESC_BASE<"vstelm.d", int_loongarch_lsx_vstelm_d, immSExt8_3_O, immSExt8, LSX128DOpnd>;
++
++
++let mayLoad = 1, canFoldAsLoad = 1 in {
++ def VLDX : LSX_3R_2GP<0b00111000010000000>,
++            LSX_LDX_LA<"vldx", int_loongarch_lsx_vldx, GPR64Opnd, LSX128BOpnd>;
++}
++
++let mayStore = 1 in{
++ def VSTX : LSX_3R_2GP<0b00111000010001000>,
++            LSX_SDX_LA<"vstx", int_loongarch_lsx_vstx, GPR64Opnd, LSX128BOpnd>;
++}
++
++
++def VADDWEV_H_B : LSX_3R<0b01110000000111100>,
++                  LSX_3R_DESC_BASE<"vaddwev.h.b", int_loongarch_lsx_vaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWEV_W_H : LSX_3R<0b01110000000111101>,
++                  LSX_3R_DESC_BASE<"vaddwev.w.h", int_loongarch_lsx_vaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWEV_D_W : LSX_3R<0b01110000000111110>,
++                  LSX_3R_DESC_BASE<"vaddwev.d.w", int_loongarch_lsx_vaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWEV_Q_D : LSX_3R<0b01110000000111111>,
++                  LSX_3R_DESC_BASE<"vaddwev.q.d", int_loongarch_lsx_vaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSUBWEV_H_B : LSX_3R<0b01110000001000000>,
++                  LSX_3R_DESC_BASE<"vsubwev.h.b", int_loongarch_lsx_vsubwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VSUBWEV_W_H : LSX_3R<0b01110000001000001>,
++                  LSX_3R_DESC_BASE<"vsubwev.w.h", int_loongarch_lsx_vsubwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSUBWEV_D_W : LSX_3R<0b01110000001000010>,
++                  LSX_3R_DESC_BASE<"vsubwev.d.w", int_loongarch_lsx_vsubwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VSUBWEV_Q_D : LSX_3R<0b01110000001000011>,
++                  LSX_3R_DESC_BASE<"vsubwev.q.d", int_loongarch_lsx_vsubwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VADDWOD_H_B : LSX_3R<0b01110000001000100>,
++                  LSX_3R_DESC_BASE<"vaddwod.h.b", int_loongarch_lsx_vaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWOD_W_H : LSX_3R<0b01110000001000101>,
++                  LSX_3R_DESC_BASE<"vaddwod.w.h", int_loongarch_lsx_vaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWOD_D_W : LSX_3R<0b01110000001000110>,
++                  LSX_3R_DESC_BASE<"vaddwod.d.w", int_loongarch_lsx_vaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWOD_Q_D : LSX_3R<0b01110000001000111>,
++                  LSX_3R_DESC_BASE<"vaddwod.q.d", int_loongarch_lsx_vaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSUBWOD_H_B : LSX_3R<0b01110000001001000>,
++                  LSX_3R_DESC_BASE<"vsubwod.h.b", int_loongarch_lsx_vsubwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VSUBWOD_W_H : LSX_3R<0b01110000001001001>,
++                  LSX_3R_DESC_BASE<"vsubwod.w.h", int_loongarch_lsx_vsubwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSUBWOD_D_W : LSX_3R<0b01110000001001010>,
++                  LSX_3R_DESC_BASE<"vsubwod.d.w", int_loongarch_lsx_vsubwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VSUBWOD_Q_D : LSX_3R<0b01110000001001011>,
++                  LSX_3R_DESC_BASE<"vsubwod.q.d", int_loongarch_lsx_vsubwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VADDWEV_H_BU : LSX_3R<0b01110000001011100>,
++                   LSX_3R_DESC_BASE<"vaddwev.h.bu", int_loongarch_lsx_vaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWEV_W_HU : LSX_3R<0b01110000001011101>,
++                   LSX_3R_DESC_BASE<"vaddwev.w.hu", int_loongarch_lsx_vaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWEV_D_WU : LSX_3R<0b01110000001011110>,
++                   LSX_3R_DESC_BASE<"vaddwev.d.wu", int_loongarch_lsx_vaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWEV_Q_DU : LSX_3R<0b01110000001011111>,
++                   LSX_3R_DESC_BASE<"vaddwev.q.du", int_loongarch_lsx_vaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSUBWEV_H_BU : LSX_3R<0b01110000001100000>,
++                   LSX_3R_DESC_BASE<"vsubwev.h.bu", int_loongarch_lsx_vsubwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VSUBWEV_W_HU : LSX_3R<0b01110000001100001>,
++                   LSX_3R_DESC_BASE<"vsubwev.w.hu", int_loongarch_lsx_vsubwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSUBWEV_D_WU : LSX_3R<0b01110000001100010>,
++                   LSX_3R_DESC_BASE<"vsubwev.d.wu", int_loongarch_lsx_vsubwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VSUBWEV_Q_DU : LSX_3R<0b01110000001100011>,
++                   LSX_3R_DESC_BASE<"vsubwev.q.du", int_loongarch_lsx_vsubwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VADDWOD_H_BU : LSX_3R<0b01110000001100100>,
++                   LSX_3R_DESC_BASE<"vaddwod.h.bu", int_loongarch_lsx_vaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWOD_W_HU : LSX_3R<0b01110000001100101>,
++                   LSX_3R_DESC_BASE<"vaddwod.w.hu", int_loongarch_lsx_vaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWOD_D_WU : LSX_3R<0b01110000001100110>,
++                   LSX_3R_DESC_BASE<"vaddwod.d.wu", int_loongarch_lsx_vaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWOD_Q_DU : LSX_3R<0b01110000001100111>,
++                   LSX_3R_DESC_BASE<"vaddwod.q.du", int_loongarch_lsx_vaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSUBWOD_H_BU : LSX_3R<0b01110000001101000>,
++                   LSX_3R_DESC_BASE<"vsubwod.h.bu", int_loongarch_lsx_vsubwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VSUBWOD_W_HU : LSX_3R<0b01110000001101001>,
++                   LSX_3R_DESC_BASE<"vsubwod.w.hu", int_loongarch_lsx_vsubwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSUBWOD_D_WU : LSX_3R<0b01110000001101010>,
++                   LSX_3R_DESC_BASE<"vsubwod.d.wu", int_loongarch_lsx_vsubwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VSUBWOD_Q_DU : LSX_3R<0b01110000001101011>,
++                   LSX_3R_DESC_BASE<"vsubwod.q.du", int_loongarch_lsx_vsubwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VADDWEV_H_BU_B : LSX_3R<0b01110000001111100>,
++                     LSX_3R_DESC_BASE<"vaddwev.h.bu.b", int_loongarch_lsx_vaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWEV_W_HU_H : LSX_3R<0b01110000001111101>,
++                     LSX_3R_DESC_BASE<"vaddwev.w.hu.h", int_loongarch_lsx_vaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWEV_D_WU_W : LSX_3R<0b01110000001111110>,
++                     LSX_3R_DESC_BASE<"vaddwev.d.wu.w", int_loongarch_lsx_vaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWEV_Q_DU_D : LSX_3R<0b01110000001111111>,
++                     LSX_3R_DESC_BASE<"vaddwev.q.du.d", int_loongarch_lsx_vaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VADDWOD_H_BU_B : LSX_3R<0b01110000010000000>,
++                     LSX_3R_DESC_BASE<"vaddwod.h.bu.b", int_loongarch_lsx_vaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VADDWOD_W_HU_H : LSX_3R<0b01110000010000001>,
++                     LSX_3R_DESC_BASE<"vaddwod.w.hu.h", int_loongarch_lsx_vaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VADDWOD_D_WU_W : LSX_3R<0b01110000010000010>,
++                     LSX_3R_DESC_BASE<"vaddwod.d.wu.w", int_loongarch_lsx_vaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VADDWOD_Q_DU_D : LSX_3R<0b01110000010000011>,
++                     LSX_3R_DESC_BASE<"vaddwod.q.du.d", int_loongarch_lsx_vaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VHADDW_Q_D : LSX_3R<0b01110000010101011>,
++                 LSX_3R_DESC_BASE<"vhaddw.q.d", int_loongarch_lsx_vhaddw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VHSUBW_Q_D : LSX_3R<0b01110000010101111>,
++                 LSX_3R_DESC_BASE<"vhsubw.q.d", int_loongarch_lsx_vhsubw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VHADDW_QU_DU : LSX_3R<0b01110000010110011>,
++                   LSX_3R_DESC_BASE<"vhaddw.qu.du", int_loongarch_lsx_vhaddw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VHSUBW_QU_DU : LSX_3R<0b01110000010110111>,
++                   LSX_3R_DESC_BASE<"vhsubw.qu.du", int_loongarch_lsx_vhsubw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMUH_B : LSX_3R<0b01110000100001100>,
++             LSX_3R_DESC_BASE<"vmuh.b", int_loongarch_lsx_vmuh_b, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMUH_H : LSX_3R<0b01110000100001101>,
++             LSX_3R_DESC_BASE<"vmuh.h", int_loongarch_lsx_vmuh_h, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMUH_W : LSX_3R<0b01110000100001110>,
++             LSX_3R_DESC_BASE<"vmuh.w", int_loongarch_lsx_vmuh_w, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMUH_D : LSX_3R<0b01110000100001111>,
++             LSX_3R_DESC_BASE<"vmuh.d", int_loongarch_lsx_vmuh_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMUH_BU : LSX_3R<0b01110000100010000>,
++              LSX_3R_DESC_BASE<"vmuh.bu", int_loongarch_lsx_vmuh_bu, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMUH_HU : LSX_3R<0b01110000100010001>,
++              LSX_3R_DESC_BASE<"vmuh.hu", int_loongarch_lsx_vmuh_hu, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMUH_WU : LSX_3R<0b01110000100010010>,
++              LSX_3R_DESC_BASE<"vmuh.wu", int_loongarch_lsx_vmuh_wu, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMUH_DU : LSX_3R<0b01110000100010011>,
++              LSX_3R_DESC_BASE<"vmuh.du", int_loongarch_lsx_vmuh_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWEV_H_B : LSX_3R<0b01110000100100000>,
++                  LSX_3R_DESC_BASE<"vmulwev.h.b", int_loongarch_lsx_vmulwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWEV_W_H : LSX_3R<0b01110000100100001>,
++                  LSX_3R_DESC_BASE<"vmulwev.w.h", int_loongarch_lsx_vmulwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWEV_D_W : LSX_3R<0b01110000100100010>,
++                  LSX_3R_DESC_BASE<"vmulwev.d.w", int_loongarch_lsx_vmulwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWEV_Q_D : LSX_3R<0b01110000100100011>,
++                  LSX_3R_DESC_BASE<"vmulwev.q.d", int_loongarch_lsx_vmulwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWOD_H_B : LSX_3R<0b01110000100100100>,
++                  LSX_3R_DESC_BASE<"vmulwod.h.b", int_loongarch_lsx_vmulwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWOD_W_H : LSX_3R<0b01110000100100101>,
++                  LSX_3R_DESC_BASE<"vmulwod.w.h", int_loongarch_lsx_vmulwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWOD_D_W : LSX_3R<0b01110000100100110>,
++                  LSX_3R_DESC_BASE<"vmulwod.d.w", int_loongarch_lsx_vmulwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWOD_Q_D : LSX_3R<0b01110000100100111>,
++                  LSX_3R_DESC_BASE<"vmulwod.q.d", int_loongarch_lsx_vmulwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWEV_H_BU : LSX_3R<0b01110000100110000>,
++                   LSX_3R_DESC_BASE<"vmulwev.h.bu", int_loongarch_lsx_vmulwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWEV_W_HU : LSX_3R<0b01110000100110001>,
++                   LSX_3R_DESC_BASE<"vmulwev.w.hu", int_loongarch_lsx_vmulwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWEV_D_WU : LSX_3R<0b01110000100110010>,
++                   LSX_3R_DESC_BASE<"vmulwev.d.wu", int_loongarch_lsx_vmulwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWEV_Q_DU : LSX_3R<0b01110000100110011>,
++                   LSX_3R_DESC_BASE<"vmulwev.q.du", int_loongarch_lsx_vmulwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWOD_H_BU : LSX_3R<0b01110000100110100>,
++                   LSX_3R_DESC_BASE<"vmulwod.h.bu", int_loongarch_lsx_vmulwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWOD_W_HU : LSX_3R<0b01110000100110101>,
++                   LSX_3R_DESC_BASE<"vmulwod.w.hu", int_loongarch_lsx_vmulwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWOD_D_WU : LSX_3R<0b01110000100110110>,
++                   LSX_3R_DESC_BASE<"vmulwod.d.wu", int_loongarch_lsx_vmulwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWOD_Q_DU : LSX_3R<0b01110000100110111>,
++                   LSX_3R_DESC_BASE<"vmulwod.q.du", int_loongarch_lsx_vmulwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWEV_H_BU_B : LSX_3R<0b01110000101000000>,
++                     LSX_3R_DESC_BASE<"vmulwev.h.bu.b", int_loongarch_lsx_vmulwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWEV_W_HU_H : LSX_3R<0b01110000101000001>,
++                     LSX_3R_DESC_BASE<"vmulwev.w.hu.h", int_loongarch_lsx_vmulwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWEV_D_WU_W : LSX_3R<0b01110000101000010>,
++                     LSX_3R_DESC_BASE<"vmulwev.d.wu.w", int_loongarch_lsx_vmulwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWEV_Q_DU_D : LSX_3R<0b01110000101000011>,
++                     LSX_3R_DESC_BASE<"vmulwev.q.du.d", int_loongarch_lsx_vmulwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMULWOD_H_BU_B : LSX_3R<0b01110000101000100>,
++                     LSX_3R_DESC_BASE<"vmulwod.h.bu.b", int_loongarch_lsx_vmulwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMULWOD_W_HU_H : LSX_3R<0b01110000101000101>,
++                     LSX_3R_DESC_BASE<"vmulwod.w.hu.h", int_loongarch_lsx_vmulwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMULWOD_D_WU_W : LSX_3R<0b01110000101000110>,
++                     LSX_3R_DESC_BASE<"vmulwod.d.wu.w", int_loongarch_lsx_vmulwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMULWOD_Q_DU_D : LSX_3R<0b01110000101000111>,
++                     LSX_3R_DESC_BASE<"vmulwod.q.du.d", int_loongarch_lsx_vmulwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWEV_H_B : LSX_3R<0b01110000101011000>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwev.h.b", int_loongarch_lsx_vmaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWEV_W_H : LSX_3R<0b01110000101011001>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwev.w.h", int_loongarch_lsx_vmaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWEV_D_W : LSX_3R<0b01110000101011010>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwev.d.w", int_loongarch_lsx_vmaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VMADDWEV_Q_D : LSX_3R<0b01110000101011011>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwev.q.d", int_loongarch_lsx_vmaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWOD_H_B : LSX_3R<0b01110000101011100>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwod.h.b", int_loongarch_lsx_vmaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWOD_W_H : LSX_3R<0b01110000101011101>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwod.w.h", int_loongarch_lsx_vmaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWOD_D_W : LSX_3R<0b01110000101011110>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwod.d.w", int_loongarch_lsx_vmaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VMADDWOD_Q_D : LSX_3R<0b01110000101011111>,
++                   LSX_3R_4R_DESC_BASE<"vmaddwod.q.d", int_loongarch_lsx_vmaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWEV_H_BU : LSX_3R<0b01110000101101000>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu", int_loongarch_lsx_vmaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWEV_W_HU : LSX_3R<0b01110000101101001>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu", int_loongarch_lsx_vmaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWEV_D_WU : LSX_3R<0b01110000101101010>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu", int_loongarch_lsx_vmaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VMADDWEV_Q_DU : LSX_3R<0b01110000101101011>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwev.q.du", int_loongarch_lsx_vmaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWOD_H_BU : LSX_3R<0b01110000101101100>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu", int_loongarch_lsx_vmaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWOD_W_HU : LSX_3R<0b01110000101101101>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu", int_loongarch_lsx_vmaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWOD_D_WU : LSX_3R<0b01110000101101110>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu", int_loongarch_lsx_vmaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VMADDWOD_Q_DU : LSX_3R<0b01110000101101111>,
++                    LSX_3R_4R_DESC_BASE<"vmaddwod.q.du", int_loongarch_lsx_vmaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWEV_H_BU_B : LSX_3R<0b01110000101111000>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu.b", int_loongarch_lsx_vmaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWEV_W_HU_H : LSX_3R<0b01110000101111001>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu.h", int_loongarch_lsx_vmaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWEV_D_WU_W : LSX_3R<0b01110000101111010>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu.w", int_loongarch_lsx_vmaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMADDWEV_Q_DU_D : LSX_3R<0b01110000101111011>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwev.q.du.d", int_loongarch_lsx_vmaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VMADDWOD_H_BU_B : LSX_3R<0b01110000101111100>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu.b", int_loongarch_lsx_vmaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>;
++
++def VMADDWOD_W_HU_H : LSX_3R<0b01110000101111101>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu.h", int_loongarch_lsx_vmaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VMADDWOD_D_WU_W : LSX_3R<0b01110000101111110>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu.w", int_loongarch_lsx_vmaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ;
++
++def VMADDWOD_Q_DU_D : LSX_3R<0b01110000101111111>,
++                      LSX_3R_4R_DESC_BASE<"vmaddwod.q.du.d", int_loongarch_lsx_vmaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSRLN_B_H : LSX_3R<0b01110000111101001>,
++                LSX_3R_DESC_BASE<"vsrln.b.h", int_loongarch_lsx_vsrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSRLN_H_W : LSX_3R<0b01110000111101010>,
++                LSX_3R_DESC_BASE<"vsrln.h.w", int_loongarch_lsx_vsrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSRLN_W_D : LSX_3R<0b01110000111101011>,
++                LSX_3R_DESC_BASE<"vsrln.w.d", int_loongarch_lsx_vsrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSRAN_B_H : LSX_3R<0b01110000111101101>,
++                LSX_3R_DESC_BASE<"vsran.b.h", int_loongarch_lsx_vsran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSRAN_H_W : LSX_3R<0b01110000111101110>,
++                LSX_3R_DESC_BASE<"vsran.h.w", int_loongarch_lsx_vsran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSRAN_W_D : LSX_3R<0b01110000111101111>,
++                LSX_3R_DESC_BASE<"vsran.w.d", int_loongarch_lsx_vsran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSRLRN_B_H : LSX_3R<0b01110000111110001>,
++                 LSX_3R_DESC_BASE<"vsrlrn.b.h", int_loongarch_lsx_vsrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSRLRN_H_W : LSX_3R<0b01110000111110010>,
++                 LSX_3R_DESC_BASE<"vsrlrn.h.w", int_loongarch_lsx_vsrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSRLRN_W_D : LSX_3R<0b01110000111110011>,
++                 LSX_3R_DESC_BASE<"vsrlrn.w.d", int_loongarch_lsx_vsrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSRARN_B_H : LSX_3R<0b01110000111110101>,
++                 LSX_3R_DESC_BASE<"vsrarn.b.h", int_loongarch_lsx_vsrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSRARN_H_W : LSX_3R<0b01110000111110110>,
++                 LSX_3R_DESC_BASE<"vsrarn.h.w", int_loongarch_lsx_vsrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSRARN_W_D : LSX_3R<0b01110000111110111>,
++                 LSX_3R_DESC_BASE<"vsrarn.w.d", int_loongarch_lsx_vsrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRLN_B_H : LSX_3R<0b01110000111111001>,
++                 LSX_3R_DESC_BASE<"vssrln.b.h", int_loongarch_lsx_vssrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRLN_H_W : LSX_3R<0b01110000111111010>,
++                 LSX_3R_DESC_BASE<"vssrln.h.w", int_loongarch_lsx_vssrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRLN_W_D : LSX_3R<0b01110000111111011>,
++                 LSX_3R_DESC_BASE<"vssrln.w.d", int_loongarch_lsx_vssrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRAN_B_H : LSX_3R<0b01110000111111101>,
++                 LSX_3R_DESC_BASE<"vssran.b.h", int_loongarch_lsx_vssran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRAN_H_W : LSX_3R<0b01110000111111110>,
++                 LSX_3R_DESC_BASE<"vssran.h.w", int_loongarch_lsx_vssran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRAN_W_D : LSX_3R<0b01110000111111111>,
++                 LSX_3R_DESC_BASE<"vssran.w.d", int_loongarch_lsx_vssran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRLRN_B_H : LSX_3R<0b01110001000000001>,
++                  LSX_3R_DESC_BASE<"vssrlrn.b.h", int_loongarch_lsx_vssrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRLRN_H_W : LSX_3R<0b01110001000000010>,
++                  LSX_3R_DESC_BASE<"vssrlrn.h.w", int_loongarch_lsx_vssrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRLRN_W_D : LSX_3R<0b01110001000000011>,
++                  LSX_3R_DESC_BASE<"vssrlrn.w.d", int_loongarch_lsx_vssrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRARN_B_H : LSX_3R<0b01110001000000101>,
++                  LSX_3R_DESC_BASE<"vssrarn.b.h", int_loongarch_lsx_vssrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRARN_H_W : LSX_3R<0b01110001000000110>,
++                  LSX_3R_DESC_BASE<"vssrarn.h.w", int_loongarch_lsx_vssrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRARN_W_D : LSX_3R<0b01110001000000111>,
++                  LSX_3R_DESC_BASE<"vssrarn.w.d", int_loongarch_lsx_vssrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRLN_BU_H : LSX_3R<0b01110001000001001>,
++                  LSX_3R_DESC_BASE<"vssrln.bu.h", int_loongarch_lsx_vssrln_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRLN_HU_W : LSX_3R<0b01110001000001010>,
++                  LSX_3R_DESC_BASE<"vssrln.hu.w", int_loongarch_lsx_vssrln_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRLN_WU_D : LSX_3R<0b01110001000001011>,
++                  LSX_3R_DESC_BASE<"vssrln.wu.d", int_loongarch_lsx_vssrln_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRAN_BU_H : LSX_3R<0b01110001000001101>,
++                  LSX_3R_DESC_BASE<"vssran.bu.h", int_loongarch_lsx_vssran_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRAN_HU_W : LSX_3R<0b01110001000001110>,
++                  LSX_3R_DESC_BASE<"vssran.hu.w", int_loongarch_lsx_vssran_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRAN_WU_D : LSX_3R<0b01110001000001111>,
++                  LSX_3R_DESC_BASE<"vssran.wu.d", int_loongarch_lsx_vssran_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRLRN_BU_H : LSX_3R<0b01110001000010001>,
++                   LSX_3R_DESC_BASE<"vssrlrn.bu.h", int_loongarch_lsx_vssrlrn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRLRN_HU_W : LSX_3R<0b01110001000010010>,
++                   LSX_3R_DESC_BASE<"vssrlrn.hu.w", int_loongarch_lsx_vssrlrn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRLRN_WU_D : LSX_3R<0b01110001000010011>,
++                   LSX_3R_DESC_BASE<"vssrlrn.wu.d", int_loongarch_lsx_vssrlrn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRARN_BU_H : LSX_3R<0b01110001000010101>,
++                   LSX_3R_DESC_BASE<"vssrarn.bu.h", int_loongarch_lsx_vssrarn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRARN_HU_W : LSX_3R<0b01110001000010110>,
++                   LSX_3R_DESC_BASE<"vssrarn.hu.w", int_loongarch_lsx_vssrarn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRARN_WU_D : LSX_3R<0b01110001000010111>,
++                   LSX_3R_DESC_BASE<"vssrarn.wu.d", int_loongarch_lsx_vssrarn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VANDN_V : LSX_3R<0b01110001001010000>,
++              LSX_3R_DESC_BASE<"vandn.v", int_loongarch_lsx_vandn_v, LSX128BOpnd>;
++
++
++class LSX_VANDN_PSEUDO_BASE<RegisterOperand RO> :
++                            LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk),
++                            []>,
++                            PseudoInstExpansion<(VANDN_V LSX128BOpnd:$vd,
++                                                         LSX128BOpnd:$vj,
++                                                         LSX128BOpnd:$vk)>;
++
++def VANDN_H_PSEUDO : LSX_VANDN_PSEUDO_BASE<LSX128HOpnd>;
++def VANDN_W_PSEUDO : LSX_VANDN_PSEUDO_BASE<LSX128WOpnd>;
++def VANDN_D_PSEUDO : LSX_VANDN_PSEUDO_BASE<LSX128DOpnd>;
++
++
++
++def VORN_V : LSX_3R<0b01110001001010001>,
++             LSX_3R_DESC_BASE<"vorn.v", int_loongarch_lsx_vorn_v, LSX128BOpnd>;
++
++
++class LSX_VORN_PSEUDO_BASE<RegisterOperand RO> :
++                           LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk),
++                           []>,
++                           PseudoInstExpansion<(VORN_V LSX128BOpnd:$vd,
++                                                       LSX128BOpnd:$vj,
++                                                       LSX128BOpnd:$vk)>;
++
++def VORN_H_PSEUDO : LSX_VORN_PSEUDO_BASE<LSX128HOpnd>;
++def VORN_W_PSEUDO : LSX_VORN_PSEUDO_BASE<LSX128WOpnd>;
++def VORN_D_PSEUDO : LSX_VORN_PSEUDO_BASE<LSX128DOpnd>;
++
++
++def VFRSTP_B : LSX_3R<0b01110001001010110>,
++               LSX_3R_4R_DESC_BASE<"vfrstp.b", int_loongarch_lsx_vfrstp_b, LSX128BOpnd>;
++
++def VFRSTP_H : LSX_3R<0b01110001001010111>,
++               LSX_3R_4R_DESC_BASE<"vfrstp.h", int_loongarch_lsx_vfrstp_h, LSX128HOpnd>;
++
++
++def VADD_Q : LSX_3R<0b01110001001011010>, IsCommutable,
++             LSX_3R_DESC_BASE<"vadd.q", int_loongarch_lsx_vadd_q, LSX128DOpnd>;
++
++def VSUB_Q : LSX_3R<0b01110001001011011>,
++             LSX_3R_DESC_BASE<"vsub.q", int_loongarch_lsx_vsub_q, LSX128DOpnd>;
++
++
++def VSIGNCOV_B : LSX_3R<0b01110001001011100>,
++                 LSX_3R_DESC_BASE<"vsigncov.b", int_loongarch_lsx_vsigncov_b, LSX128BOpnd>;
++
++def VSIGNCOV_H : LSX_3R<0b01110001001011101>,
++                 LSX_3R_DESC_BASE<"vsigncov.h", int_loongarch_lsx_vsigncov_h, LSX128HOpnd>;
++
++def VSIGNCOV_W : LSX_3R<0b01110001001011110>,
++                 LSX_3R_DESC_BASE<"vsigncov.w", int_loongarch_lsx_vsigncov_w, LSX128WOpnd>;
++
++def VSIGNCOV_D : LSX_3R<0b01110001001011111>,
++                 LSX_3R_DESC_BASE<"vsigncov.d", int_loongarch_lsx_vsigncov_d, LSX128DOpnd>;
++
++
++def VFCVT_H_S : LSX_3R<0b01110001010001100>,
++                LSX_3RF_DESC_BASE<"vfcvt.h.s", int_loongarch_lsx_vfcvt_h_s, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>;
++
++def VFCVT_S_D : LSX_3R<0b01110001010001101>,
++                LSX_3RF_DESC_BASE1<"vfcvt.s.d", int_loongarch_lsx_vfcvt_s_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VFFINT_S_L : LSX_3R<0b01110001010010000>,
++                 LSX_3RF_DESC_BASE<"vffint.s.l", int_loongarch_lsx_vffint_s_l, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VFTINT_W_D : LSX_3R<0b01110001010010011>,
++                 LSX_3RF_DESC_BASE<"vftint.w.d", int_loongarch_lsx_vftint_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VFTINTRZ_W_D : LSX_3R<0b01110001010010110>,
++                   LSX_3RF_DESC_BASE<"vftintrz.w.d", int_loongarch_lsx_vftintrz_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VFTINTRP_W_D : LSX_3R<0b01110001010010101>,
++                   LSX_3RF_DESC_BASE<"vftintrp.w.d", int_loongarch_lsx_vftintrp_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VFTINTRM_W_D : LSX_3R<0b01110001010010100>,
++                   LSX_3RF_DESC_BASE<"vftintrm.w.d", int_loongarch_lsx_vftintrm_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++def VFTINTRNE_W_D : LSX_3R<0b01110001010010111>,
++                    LSX_3RF_DESC_BASE<"vftintrne.w.d", int_loongarch_lsx_vftintrne_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VBSRL_V : LSX_I5_U<0b01110010100011101>,
++              LSX_U5_DESC_BASE<"vbsrl.v", int_loongarch_lsx_vbsrl_v, LSX128BOpnd>;
++
++def VBSLL_V : LSX_I5_U<0b01110010100011100>,
++              LSX_U5_DESC_BASE<"vbsll.v", int_loongarch_lsx_vbsll_v, LSX128BOpnd>;
++
++
++def VFRSTPI_B : LSX_I5_U<0b01110010100110100>,
++                LSX_U5_4R_DESC_BASE<"vfrstpi.b", int_loongarch_lsx_vfrstpi_b, LSX128BOpnd>;
++
++def VFRSTPI_H : LSX_I5_U<0b01110010100110101>,
++                LSX_U5_4R_DESC_BASE<"vfrstpi.h", int_loongarch_lsx_vfrstpi_h, LSX128HOpnd>;
++
++
++def VNEG_B : LSX_2R<0b0111001010011100001100>,
++             LSX_2R_DESC_BASE<"vneg.b", int_loongarch_lsx_vneg_b, LSX128BOpnd>;
++
++def VNEG_H : LSX_2R<0b0111001010011100001101>,
++             LSX_2R_DESC_BASE<"vneg.h", int_loongarch_lsx_vneg_h, LSX128HOpnd>;
++
++def VNEG_W : LSX_2R<0b0111001010011100001110>,
++             LSX_2R_DESC_BASE<"vneg.w", int_loongarch_lsx_vneg_w, LSX128WOpnd>;
++
++def VNEG_D : LSX_2R<0b0111001010011100001111>,
++             LSX_2R_DESC_BASE<"vneg.d", int_loongarch_lsx_vneg_d, LSX128DOpnd>;
++
++
++def VMSKGEZ_B : LSX_2R<0b0111001010011100010100>,
++                LSX_2R_DESC_BASE<"vmskgez.b", int_loongarch_lsx_vmskgez_b, LSX128BOpnd>;
++
++def VMSKNZ_B : LSX_2R<0b0111001010011100011000>,
++               LSX_2R_DESC_BASE<"vmsknz.b", int_loongarch_lsx_vmsknz_b, LSX128BOpnd>;
++
++
++def VFRINTRM_S : LSX_2R<0b0111001010011101010001>,
++                 LSX_2RF_DESC_BASE<"vfrintrm.s", int_loongarch_lsx_vfrintrm_s, LSX128WOpnd>;
++
++def VFRINTRM_D : LSX_2R<0b0111001010011101010010>,
++                 LSX_2RF_DESC_BASE<"vfrintrm.d", int_loongarch_lsx_vfrintrm_d, LSX128DOpnd>;
++
++
++def VFRINTRP_S : LSX_2R<0b0111001010011101010101>,
++                 LSX_2RF_DESC_BASE<"vfrintrp.s", int_loongarch_lsx_vfrintrp_s, LSX128WOpnd>;
++
++def VFRINTRP_D : LSX_2R<0b0111001010011101010110>,
++                 LSX_2RF_DESC_BASE<"vfrintrp.d", int_loongarch_lsx_vfrintrp_d, LSX128DOpnd>;
++
++
++def VFRINTRZ_S : LSX_2R<0b0111001010011101011001>,
++                 LSX_2RF_DESC_BASE<"vfrintrz.s", int_loongarch_lsx_vfrintrz_s, LSX128WOpnd>;
++
++def VFRINTRZ_D : LSX_2R<0b0111001010011101011010>,
++                 LSX_2RF_DESC_BASE<"vfrintrz.d", int_loongarch_lsx_vfrintrz_d, LSX128DOpnd>;
++
++
++def VFRINTRNE_S : LSX_2R<0b0111001010011101011101>,
++                  LSX_2RF_DESC_BASE<"vfrintrne.s", int_loongarch_lsx_vfrintrne_s, LSX128WOpnd>;
++
++def VFRINTRNE_D : LSX_2R<0b0111001010011101011110>,
++                  LSX_2RF_DESC_BASE<"vfrintrne.d", int_loongarch_lsx_vfrintrne_d, LSX128DOpnd>;
++
++
++def VFFINTL_D_W : LSX_2R<0b0111001010011110000100>,
++                  LSX_2RF_DESC_BASE<"vffintl.d.w", int_loongarch_lsx_vffintl_d_w, LSX128DOpnd, LSX128WOpnd>;
++
++def VFFINTH_D_W : LSX_2R<0b0111001010011110000101>,
++                  LSX_2RF_DESC_BASE<"vffinth.d.w", int_loongarch_lsx_vffinth_d_w, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFTINTRM_W_S : LSX_2R<0b0111001010011110001110>,
++                   LSX_2RF_DESC_BASE<"vftintrm.w.s", int_loongarch_lsx_vftintrm_w_s, LSX128WOpnd>;
++
++def VFTINTRM_L_D : LSX_2R<0b0111001010011110001111>,
++                   LSX_2RF_DESC_BASE<"vftintrm.l.d", int_loongarch_lsx_vftintrm_l_d, LSX128DOpnd>;
++
++
++def VFTINTRP_W_S : LSX_2R<0b0111001010011110010000>,
++                   LSX_2RF_DESC_BASE<"vftintrp.w.s", int_loongarch_lsx_vftintrp_w_s, LSX128WOpnd>;
++
++def VFTINTRP_L_D : LSX_2R<0b0111001010011110010001>,
++                   LSX_2RF_DESC_BASE<"vftintrp.l.d", int_loongarch_lsx_vftintrp_l_d, LSX128DOpnd>;
++
++
++def VFTINTRZ_W_S : LSX_2R<0b0111001010011110010010>,
++                   LSX_2RF_DESC_BASE<"vftintrz.w.s", fp_to_sint, LSX128WOpnd>;
++
++def VFTINTRZ_L_D : LSX_2R<0b0111001010011110010011>,
++                   LSX_2RF_DESC_BASE<"vftintrz.l.d", fp_to_sint, LSX128DOpnd>;
++
++
++def VFTINTRNE_W_S : LSX_2R<0b0111001010011110010100>,
++                    LSX_2RF_DESC_BASE<"vftintrne.w.s", int_loongarch_lsx_vftintrne_w_s, LSX128WOpnd>;
++
++def VFTINTRNE_L_D : LSX_2R<0b0111001010011110010101>,
++                    LSX_2RF_DESC_BASE<"vftintrne.l.d", int_loongarch_lsx_vftintrne_l_d, LSX128DOpnd>;
++
++
++def VFTINTL_L_S : LSX_2R<0b0111001010011110100000>,
++                  LSX_2RF_DESC_BASE<"vftintl.l.s", int_loongarch_lsx_vftintl_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++def VFTINTH_L_S : LSX_2R<0b0111001010011110100001>,
++                  LSX_2RF_DESC_BASE<"vftinth.l.s", int_loongarch_lsx_vftinth_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFTINTRML_L_S : LSX_2R<0b0111001010011110100010>,
++                    LSX_2RF_DESC_BASE<"vftintrml.l.s", int_loongarch_lsx_vftintrml_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++def VFTINTRMH_L_S : LSX_2R<0b0111001010011110100011>,
++                    LSX_2RF_DESC_BASE<"vftintrmh.l.s", int_loongarch_lsx_vftintrmh_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFTINTRPL_L_S : LSX_2R<0b0111001010011110100100>,
++                    LSX_2RF_DESC_BASE<"vftintrpl.l.s", int_loongarch_lsx_vftintrpl_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++def VFTINTRPH_L_S : LSX_2R<0b0111001010011110100101>,
++                    LSX_2RF_DESC_BASE<"vftintrph.l.s", int_loongarch_lsx_vftintrph_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFTINTRZL_L_S : LSX_2R<0b0111001010011110100110>,
++                    LSX_2RF_DESC_BASE<"vftintrzl.l.s", int_loongarch_lsx_vftintrzl_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++def VFTINTRZH_L_S : LSX_2R<0b0111001010011110100111>,
++                    LSX_2RF_DESC_BASE<"vftintrzh.l.s", int_loongarch_lsx_vftintrzh_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VFTINTRNEL_L_S : LSX_2R<0b0111001010011110101000>,
++                     LSX_2RF_DESC_BASE<"vftintrnel.l.s", int_loongarch_lsx_vftintrnel_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++def VFTINTRNEH_L_S : LSX_2R<0b0111001010011110101001>,
++                     LSX_2RF_DESC_BASE<"vftintrneh.l.s", int_loongarch_lsx_vftintrneh_l_s, LSX128DOpnd, LSX128WOpnd>;
++
++
++def VEXTH_H_B : LSX_2R<0b0111001010011110111000>,
++                LSX_2R_DESC_BASE<"vexth.h.b", int_loongarch_lsx_vexth_h_b, LSX128HOpnd, LSX128BOpnd>;
++
++def VEXTH_W_H : LSX_2R<0b0111001010011110111001>,
++                LSX_2R_DESC_BASE<"vexth.w.h", int_loongarch_lsx_vexth_w_h, LSX128WOpnd, LSX128HOpnd>;
++
++def VEXTH_D_W : LSX_2R<0b0111001010011110111010>,
++                LSX_2R_DESC_BASE<"vexth.d.w", int_loongarch_lsx_vexth_d_w, LSX128DOpnd, LSX128WOpnd> ;
++
++def VEXTH_Q_D : LSX_2R<0b0111001010011110111011>,
++                LSX_2R_DESC_BASE<"vexth.q.d", int_loongarch_lsx_vexth_q_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VEXTH_HU_BU : LSX_2R<0b0111001010011110111100>,
++                  LSX_2R_DESC_BASE<"vexth.hu.bu", int_loongarch_lsx_vexth_hu_bu, LSX128HOpnd, LSX128BOpnd>;
++
++def VEXTH_WU_HU : LSX_2R<0b0111001010011110111101>,
++                  LSX_2R_DESC_BASE<"vexth.wu.hu", int_loongarch_lsx_vexth_wu_hu, LSX128WOpnd, LSX128HOpnd>;
++
++def VEXTH_DU_WU : LSX_2R<0b0111001010011110111110>,
++                  LSX_2R_DESC_BASE<"vexth.du.wu", int_loongarch_lsx_vexth_du_wu, LSX128DOpnd, LSX128WOpnd> ;
++
++def VEXTH_QU_DU : LSX_2R<0b0111001010011110111111>,
++                  LSX_2R_DESC_BASE<"vexth.qu.du", int_loongarch_lsx_vexth_qu_du, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSLLWIL_H_B : LSX_I3_U<0b0111001100001000001>,
++                  LSX_2R_U3_DESC_BASE<"vsllwil.h.b", int_loongarch_lsx_vsllwil_h_b, LSX128HOpnd, LSX128BOpnd>;
++
++def VSLLWIL_W_H : LSX_I4_U<0b011100110000100001>,
++                  LSX_2R_U4_DESC_BASE<"vsllwil.w.h", int_loongarch_lsx_vsllwil_w_h, LSX128WOpnd, LSX128HOpnd>;
++
++def VSLLWIL_D_W : LSX_I5_U<0b01110011000010001>,
++                  LSX_2R_U5_DESC_BASE<"vsllwil.d.w", int_loongarch_lsx_vsllwil_d_w, LSX128DOpnd, LSX128WOpnd> ;
++
++
++def VEXTL_Q_D : LSX_2R<0b0111001100001001000000>,
++                LSX_2R_DESC_BASE<"vextl.q.d", int_loongarch_lsx_vextl_q_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSLLWIL_HU_BU : LSX_I3_U<0b0111001100001100001>,
++                    LSX_2R_U3_DESC_BASE<"vsllwil.hu.bu", int_loongarch_lsx_vsllwil_hu_bu, LSX128HOpnd, LSX128BOpnd>;
++
++def VSLLWIL_WU_HU : LSX_I4_U<0b011100110000110001>,
++                    LSX_2R_U4_DESC_BASE<"vsllwil.wu.hu", int_loongarch_lsx_vsllwil_wu_hu, LSX128WOpnd, LSX128HOpnd>;
++
++def VSLLWIL_DU_WU : LSX_I5_U<0b01110011000011001>,
++                    LSX_2R_U5_DESC_BASE<"vsllwil.du.wu", int_loongarch_lsx_vsllwil_du_wu, LSX128DOpnd, LSX128WOpnd> ;
++
++
++def VEXTL_QU_DU : LSX_2R<0b0111001100001101000000>,
++                  LSX_2R_DESC_BASE<"vextl.qu.du", int_loongarch_lsx_vextl_qu_du, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VBITCLRI_B : LSX_I3_U<0b0111001100010000001>,
++                 LSX_2R_U3_DESC_BASE<"vbitclri.b", int_loongarch_lsx_vbitclri_b, LSX128BOpnd, LSX128BOpnd>;
++
++def VBITCLRI_H : LSX_I4_U<0b011100110001000001>,
++                 LSX_2R_U4_DESC_BASE<"vbitclri.h", int_loongarch_lsx_vbitclri_h, LSX128HOpnd, LSX128HOpnd>;
++
++def VBITCLRI_W : LSX_I5_U<0b01110011000100001>,
++                 LSX_2R_U5_DESC_BASE<"vbitclri.w", int_loongarch_lsx_vbitclri_w, LSX128WOpnd, LSX128WOpnd>;
++
++def VBITCLRI_D : LSX_I6_U<0b0111001100010001>,
++                 LSX_2R_U6_DESC_BASE<"vbitclri.d", int_loongarch_lsx_vbitclri_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VBITSETI_B : LSX_I3_U<0b0111001100010100001>,
++                 LSX_2R_U3_DESC_BASE<"vbitseti.b", int_loongarch_lsx_vbitseti_b, LSX128BOpnd, LSX128BOpnd>;
++
++def VBITSETI_H : LSX_I4_U<0b011100110001010001>,
++                 LSX_2R_U4_DESC_BASE<"vbitseti.h", int_loongarch_lsx_vbitseti_h, LSX128HOpnd, LSX128HOpnd>;
++
++def VBITSETI_W : LSX_I5_U<0b01110011000101001>,
++                 LSX_2R_U5_DESC_BASE<"vbitseti.w", int_loongarch_lsx_vbitseti_w, LSX128WOpnd, LSX128WOpnd>;
++
++def VBITSETI_D : LSX_I6_U<0b0111001100010101>,
++                 LSX_2R_U6_DESC_BASE<"vbitseti.d", int_loongarch_lsx_vbitseti_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VBITREVI_B : LSX_I3_U<0b0111001100011000001>,
++                 LSX_2R_U3_DESC_BASE<"vbitrevi.b", int_loongarch_lsx_vbitrevi_b, LSX128BOpnd, LSX128BOpnd>;
++
++def VBITREVI_H : LSX_I4_U<0b011100110001100001>,
++                 LSX_2R_U4_DESC_BASE<"vbitrevi.h", int_loongarch_lsx_vbitrevi_h, LSX128HOpnd, LSX128HOpnd>;
++
++def VBITREVI_W : LSX_I5_U<0b01110011000110001>,
++                 LSX_2R_U5_DESC_BASE<"vbitrevi.w", int_loongarch_lsx_vbitrevi_w, LSX128WOpnd, LSX128WOpnd>;
++
++def VBITREVI_D : LSX_I6_U<0b0111001100011001>,
++                 LSX_2R_U6_DESC_BASE<"vbitrevi.d", int_loongarch_lsx_vbitrevi_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSSRLRNI_B_H : LSX_I4_U<0b011100110101000001>,
++                   LSX_2R_3R_U4_DESC_BASE<"vssrlrni.b.h", int_loongarch_lsx_vssrlrni_b_h, LSX128BOpnd, LSX128BOpnd>;
++
++def VSSRLRNI_H_W : LSX_I5_U<0b01110011010100001>,
++                   LSX_2R_3R_U5_DESC_BASE<"vssrlrni.h.w", int_loongarch_lsx_vssrlrni_h_w, LSX128HOpnd, LSX128HOpnd>;
++
++def VSSRLRNI_W_D : LSX_I6_U<0b0111001101010001>,
++                   LSX_2R_3R_U6_DESC_BASE<"vssrlrni.w.d", int_loongarch_lsx_vssrlrni_w_d, LSX128WOpnd, LSX128WOpnd>;
++
++def VSSRLRNI_D_Q : LSX_I7_U<0b011100110101001>,
++                   LSX_2R_3R_U7_DESC_BASE<"vssrlrni.d.q", int_loongarch_lsx_vssrlrni_d_q, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VSRANI_B_H : LSX_I4_U<0b011100110101100001>,
++                 LSX_2R_3R_U4_DESC_BASE<"vsrani.b.h", int_loongarch_lsx_vsrani_b_h, LSX128BOpnd, LSX128BOpnd>;
++
++def VSRANI_H_W : LSX_I5_U<0b01110011010110001>,
++                 LSX_2R_3R_U5_DESC_BASE<"vsrani.h.w", int_loongarch_lsx_vsrani_h_w, LSX128HOpnd, LSX128HOpnd>;
++
++def VSRANI_W_D : LSX_I6_U<0b0111001101011001>,
++                 LSX_2R_3R_U6_DESC_BASE<"vsrani.w.d", int_loongarch_lsx_vsrani_w_d, LSX128WOpnd, LSX128WOpnd>;
++
++def VSRANI_D_Q : LSX_I7_U<0b011100110101101>,
++                 LSX_2R_3R_U7_DESC_BASE<"vsrani.d.q", int_loongarch_lsx_vsrani_d_q, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VEXTRINS_B : LSX_I8_U<0b01110011100011>,
++                 LSX_2R_3R_U8_DESC_BASE<"vextrins.b", int_loongarch_lsx_vextrins_b, LSX128BOpnd, LSX128BOpnd>;
++
++def VEXTRINS_H : LSX_I8_U<0b01110011100010>,
++                 LSX_2R_3R_U8_DESC_BASE<"vextrins.h", int_loongarch_lsx_vextrins_h, LSX128HOpnd, LSX128HOpnd>;
++
++def VEXTRINS_W : LSX_I8_U<0b01110011100001>,
++                 LSX_2R_3R_U8_DESC_BASE<"vextrins.w", int_loongarch_lsx_vextrins_w, LSX128WOpnd, LSX128WOpnd>;
++
++def VEXTRINS_D : LSX_I8_U<0b01110011100000>,
++                 LSX_2R_3R_U8_DESC_BASE<"vextrins.d", int_loongarch_lsx_vextrins_d, LSX128DOpnd, LSX128DOpnd>;
++
++
++def VBITSELI_B : LSX_I8_U<0b01110011110001>,
++                 LSX_2R_3R_U8_DESC_BASE<"vbitseli.b", int_loongarch_lsx_vbitseli_b, LSX128BOpnd, LSX128BOpnd>;
++
++
++def VANDI_B : LSX_I8_U<0b01110011110100>,
++              LSX_2R_U8_DESC_BASE<"vandi.b", int_loongarch_lsx_vandi_b, LSX128BOpnd, LSX128BOpnd>;
++
++
++def VORI_B : LSX_I8_U<0b01110011110101>,
++             LSX_2R_U8_DESC_BASE<"vori.b", int_loongarch_lsx_vori_b, LSX128BOpnd, LSX128BOpnd>;
++
++
++def VXORI_B : LSX_I8_U<0b01110011110110>,
++              LSX_2R_U8_DESC_BASE<"vxori.b", int_loongarch_lsx_vxori_b, LSX128BOpnd, LSX128BOpnd>;
++
++
++def VNORI_B : LSX_I8_U<0b01110011110111>,
++              LSX_2R_U8_DESC_BASE<"vnori.b", int_loongarch_lsx_vnori_b, LSX128BOpnd, LSX128BOpnd>;
++
++
++def VLDI : LSX_1R_I13<0b01110011111000>,
++           LSX_I13_DESC_BASE<"vldi", int_loongarch_lsx_vldi, i32, simm13Op, LSX128DOpnd>;
++
++def VLDI_B : LSX_1R_I13_I10<0b01110011111000000>,
++             LSX_I13_DESC_BASE_10<"vldi", LSX128BOpnd>;
++
++def VLDI_H : LSX_1R_I13_I10<0b01110011111000001>,
++             LSX_I13_DESC_BASE_10<"vldi", LSX128HOpnd>;
++
++def VLDI_W : LSX_1R_I13_I10<0b01110011111000010>,
++             LSX_I13_DESC_BASE_10<"vldi", LSX128WOpnd>;
++
++def VLDI_D : LSX_1R_I13_I10<0b01110011111000011>,
++             LSX_I13_DESC_BASE_10<"vldi", LSX128DOpnd>;
++
++def VPERMI_W : LSX_I8_U<0b01110011111001>,
++               LSX_2R_3R_U8_DESC_BASE<"vpermi.w", int_loongarch_lsx_vpermi_w, LSX128WOpnd, LSX128WOpnd>;
++
++
++def VSEQ_B : LSX_3R<0b01110000000000000>, IsCommutable,
++             LSX_3R_DESC_BASE<"vseq.b", vseteq_v16i8, LSX128BOpnd>;
++
++def VSEQ_H : LSX_3R<0b01110000000000001>, IsCommutable,
++             LSX_3R_DESC_BASE<"vseq.h", vseteq_v8i16, LSX128HOpnd>;
++
++def VSEQ_W : LSX_3R<0b01110000000000010>, IsCommutable,
++             LSX_3R_DESC_BASE<"vseq.w", vseteq_v4i32, LSX128WOpnd> ;
++
++def VSEQ_D : LSX_3R<0b01110000000000011>, IsCommutable,
++             LSX_3R_DESC_BASE<"vseq.d", vseteq_v2i64, LSX128DOpnd>;
++
++
++def VSLE_B : LSX_3R<0b01110000000000100>,
++             LSX_3R_DESC_BASE<"vsle.b", vsetle_v16i8, LSX128BOpnd>;
++
++def VSLE_H : LSX_3R<0b01110000000000101>,
++             LSX_3R_DESC_BASE<"vsle.h", vsetle_v8i16, LSX128HOpnd>;
++
++def VSLE_W : LSX_3R<0b01110000000000110>,
++             LSX_3R_DESC_BASE<"vsle.w", vsetle_v4i32, LSX128WOpnd>;
++
++def VSLE_D : LSX_3R<0b01110000000000111>,
++             LSX_3R_DESC_BASE<"vsle.d", vsetle_v2i64, LSX128DOpnd>;
++
++
++def VSLE_BU : LSX_3R<0b01110000000001000>,
++              LSX_3R_DESC_BASE<"vsle.bu", vsetule_v16i8, LSX128BOpnd>;
++
++def VSLE_HU : LSX_3R<0b01110000000001001>,
++              LSX_3R_DESC_BASE<"vsle.hu", vsetule_v8i16, LSX128HOpnd>;
++
++def VSLE_WU : LSX_3R<0b01110000000001010>,
++              LSX_3R_DESC_BASE<"vsle.wu", vsetule_v4i32, LSX128WOpnd>;
++
++def VSLE_DU : LSX_3R<0b01110000000001011>,
++              LSX_3R_DESC_BASE<"vsle.du", vsetule_v2i64, LSX128DOpnd>;
++
++
++def VSLT_B : LSX_3R<0b01110000000001100>,
++             LSX_3R_DESC_BASE<"vslt.b", vsetlt_v16i8, LSX128BOpnd>;
++
++def VSLT_H : LSX_3R<0b01110000000001101>,
++             LSX_3R_DESC_BASE<"vslt.h", vsetlt_v8i16, LSX128HOpnd>;
++
++def VSLT_W : LSX_3R<0b01110000000001110>,
++             LSX_3R_DESC_BASE<"vslt.w", vsetlt_v4i32, LSX128WOpnd>;
++
++def VSLT_D : LSX_3R<0b01110000000001111>,
++             LSX_3R_DESC_BASE<"vslt.d", vsetlt_v2i64, LSX128DOpnd>;
++
++
++def VSLT_BU : LSX_3R<0b01110000000010000>,
++              LSX_3R_DESC_BASE<"vslt.bu", vsetult_v16i8, LSX128BOpnd>;
++
++def VSLT_HU : LSX_3R<0b01110000000010001>,
++              LSX_3R_DESC_BASE<"vslt.hu", vsetult_v8i16, LSX128HOpnd>;
++
++def VSLT_WU : LSX_3R<0b01110000000010010>,
++              LSX_3R_DESC_BASE<"vslt.wu", vsetult_v4i32, LSX128WOpnd>;
++
++def VSLT_DU : LSX_3R<0b01110000000010011>,
++              LSX_3R_DESC_BASE<"vslt.du", vsetult_v2i64, LSX128DOpnd>;
++
++
++def VADD_B : LSX_3R<0b01110000000010100>, IsCommutable,
++             LSX_3R_DESC_BASE<"vadd.b", add, LSX128BOpnd>;
++
++def VADD_H : LSX_3R<0b01110000000010101>, IsCommutable,
++             LSX_3R_DESC_BASE<"vadd.h", add, LSX128HOpnd>;
++
++def VADD_W : LSX_3R<0b01110000000010110>, IsCommutable,
++             LSX_3R_DESC_BASE<"vadd.w", add, LSX128WOpnd>;
++
++def VADD_D : LSX_3R<0b01110000000010111>, IsCommutable,
++             LSX_3R_DESC_BASE<"vadd.d", add, LSX128DOpnd>;
++
++
++def VSUB_B : LSX_3R<0b01110000000011000>,
++             LSX_3R_DESC_BASE<"vsub.b", sub, LSX128BOpnd>;
++
++def VSUB_H : LSX_3R<0b01110000000011001>,
++             LSX_3R_DESC_BASE<"vsub.h", sub, LSX128HOpnd>;
++
++def VSUB_W : LSX_3R<0b01110000000011010>,
++             LSX_3R_DESC_BASE<"vsub.w", sub, LSX128WOpnd>;
++
++def VSUB_D : LSX_3R<0b01110000000011011>,
++             LSX_3R_DESC_BASE<"vsub.d", sub, LSX128DOpnd>;
++
++
++
++//Pat
++class LSXBitconvertPat<ValueType DstVT, ValueType SrcVT,
++                       RegisterClass DstRC, list<Predicate> preds = [HasLSX]> :
++   LSXPat<(DstVT (bitconvert SrcVT:$src)),
++          (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>;
++
++def : LSXBitconvertPat<v4i32, v4f32, LSX128W>;
++def : LSXBitconvertPat<v2i64, v2f64, LSX128D>;
++def : LSXBitconvertPat<v4f32, v4i32, LSX128W>;
++def : LSXBitconvertPat<v2f64, v2i64, LSX128D>;
++
++def : LSXBitconvertPat<v16i8, v8i16, LSX128B>;
++def : LSXBitconvertPat<v16i8, v4i32, LSX128B>;
++def : LSXBitconvertPat<v16i8, v2i64, LSX128B>;
++def : LSXBitconvertPat<v16i8, v4f32, LSX128B>;
++def : LSXBitconvertPat<v16i8, v2f64, LSX128B>;
++
++def : LSXBitconvertPat<v8i16, v16i8, LSX128H>;
++def : LSXBitconvertPat<v8i16, v4i32, LSX128H>;
++def : LSXBitconvertPat<v8i16, v2i64, LSX128H>;
++def : LSXBitconvertPat<v8i16, v4f32, LSX128H>;
++def : LSXBitconvertPat<v8i16, v2f64, LSX128H>;
++
++def : LSXBitconvertPat<v4i32, v16i8, LSX128W>;
++def : LSXBitconvertPat<v4i32, v8i16, LSX128W>;
++def : LSXBitconvertPat<v4i32, v2i64, LSX128W>;
++def : LSXBitconvertPat<v4i32, v2f64, LSX128W>;
++
++def : LSXBitconvertPat<v2i64, v16i8, LSX128D>;
++def : LSXBitconvertPat<v2i64, v8i16, LSX128D>;
++def : LSXBitconvertPat<v2i64, v4i32, LSX128D>;
++def : LSXBitconvertPat<v2i64, v4f32, LSX128D>;
++
++def : LSXBitconvertPat<v4f32, v16i8, LSX128W>;
++def : LSXBitconvertPat<v4f32, v8i16, LSX128W>;
++def : LSXBitconvertPat<v4f32, v2i64, LSX128W>;
++def : LSXBitconvertPat<v4f32, v2f64, LSX128W>;
++
++def : LSXBitconvertPat<v2f64, v16i8, LSX128D>;
++def : LSXBitconvertPat<v2f64, v8i16, LSX128D>;
++def : LSXBitconvertPat<v2f64, v4i32, LSX128D>;
++def : LSXBitconvertPat<v2f64, v4f32, LSX128D>;
++
++
++
++
++def : LSXPat<(i32 (vextract_sext_i8 v16i8:$vj, i32:$idx)),
++             (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
++                                                                  i32:$idx),
++                                                         sub_lo)),
++                                    GPR32), (i32 24))>;
++def : LSXPat<(i32 (vextract_sext_i16 v8i16:$vj, i32:$idx)),
++             (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
++                                                                  i32:$idx),
++                                                         sub_lo)),
++                                    GPR32), (i32 16))>;
++def : LSXPat<(i32 (vextract_sext_i32 v4i32:$vj, i32:$idx)),
++             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj,
++                                                             i32:$idx),
++                                                    sub_lo)),
++                               GPR32)>;
++def : LSXPat<(i64 (vextract_sext_i64 v2i64:$vj, i32:$idx)),
++             (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj,
++                                                             i32:$idx),
++                                                    sub_64)),
++                               GPR64)>;
++
++def : LSXPat<(i32 (vextract_zext_i8 v16i8:$vj, i32:$idx)),
++             (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
++                                                                  i32:$idx),
++                                                         sub_lo)),
++                                    GPR32), (i32 24))>;
++def : LSXPat<(i32 (vextract_zext_i16 v8i16:$vj, i32:$idx)),
++             (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
++                                                                  i32:$idx),
++                                                         sub_lo)),
++                                    GPR32), (i32 16))>;
++def : LSXPat<(i32 (vextract_zext_i32 v4i32:$vj, i32:$idx)),
++             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj,
++                                                             i32:$idx),
++                                                    sub_lo)),
++                               GPR32)>;
++
++def : LSXPat<(i64 (vextract_zext_i64 v2i64:$vj, i32:$idx)),
++             (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj,
++                                                             i32:$idx),
++                                                    sub_64)),
++                               GPR64)>;
++
++def : LSXPat<(f32 (vector_extract v4f32:$vj, i32:$idx)),
++             (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj,
++                                           i32:$idx),
++                                  sub_lo))>;
++def : LSXPat<(f64 (vector_extract v2f64:$vj, i32:$idx)),
++             (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj,
++                                           i32:$idx),
++                                  sub_64))>;
++
++def : LSXPat<
++  (i32 (vextract_sext_i8 v16i8:$vj, i64:$idx)),
++  (SRAI_W (COPY_TO_REGCLASS
++         (i32 (EXTRACT_SUBREG
++                (VREPLVE_B v16i8:$vj,
++                  (COPY_TO_REGCLASS
++                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++                sub_lo)),
++         GPR32),
++       (i32 24))>;
++def : LSXPat<
++  (i32 (vextract_sext_i16 v8i16:$vj, i64:$idx)),
++  (SRAI_W (COPY_TO_REGCLASS
++         (i32 (EXTRACT_SUBREG
++                (VREPLVE_H v8i16:$vj,
++                  (COPY_TO_REGCLASS
++                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++                sub_lo)),
++         GPR32),
++       (i32 16))>;
++
++def : LSXPat<
++  (i32 (vextract_sext_i32 v4i32:$vj, i64:$idx)),
++  (COPY_TO_REGCLASS
++    (i32 (EXTRACT_SUBREG
++           (VREPLVE_W v4i32:$vj,
++             (COPY_TO_REGCLASS
++               (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++           sub_lo)),
++    GPR32)>;
++
++def : LSXPat<
++  (i64 (vextract_sext_i64 v2i64:$vj, i64:$idx)),
++  (COPY_TO_REGCLASS
++    (i64 (EXTRACT_SUBREG
++           (VREPLVE_D v2i64:$vj,
++             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++           sub_64)),
++    GPR64)>;
++
++def : LSXPat<
++  (i32 (vextract_zext_i8 v16i8:$vj, i64:$idx)),
++  (SRLI_W (COPY_TO_REGCLASS
++         (i32 (EXTRACT_SUBREG
++                 (VREPLVE_B v16i8:$vj,
++                   (COPY_TO_REGCLASS
++                     (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++                 sub_lo)),
++         GPR32),
++       (i32 24))>;
++def : LSXPat<
++  (i32 (vextract_zext_i16 v8i16:$vj, i64:$idx)),
++  (SRLI_W (COPY_TO_REGCLASS
++         (i32 (EXTRACT_SUBREG
++                (VREPLVE_H v8i16:$vj,
++                  (COPY_TO_REGCLASS
++                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++                sub_lo)),
++         GPR32),
++       (i32 16))>;
++def : LSXPat<
++  (i32 (vextract_zext_i32 v4i32:$vj, i64:$idx)),
++  (COPY_TO_REGCLASS
++    (i32 (EXTRACT_SUBREG
++           (VREPLVE_W v4i32:$vj,
++             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++           sub_lo)),
++    GPR32)>;
++def : LSXPat<
++  (i64 (vextract_zext_i64 v2i64:$vj, i64:$idx)),
++  (COPY_TO_REGCLASS
++    (i64 (EXTRACT_SUBREG
++           (VREPLVE_D v2i64:$vj,
++             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++           sub_64)),
++    GPR64)>;
++
++  def : LSXPat<
++  (f32 (vector_extract v4f32:$vj, i64:$idx)),
++  (f32 (EXTRACT_SUBREG
++         (VREPLVE_W v4f32:$vj,
++           (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++         sub_lo))>;
++def : LSXPat<
++  (f64 (vector_extract v2f64:$vj, i64:$idx)),
++  (f64 (EXTRACT_SUBREG
++         (VREPLVE_D v2f64:$vj,
++           (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
++         sub_64))>;
++
++
++def : LSXPat<(vfseteq_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b),
++             (VFCMP_CEQ_S LSX128WOpnd:$a, LSX128WOpnd:$b)>;
++
++def : LSXPat<(vfseteq_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b),
++             (VFCMP_CEQ_D LSX128DOpnd:$a, LSX128DOpnd:$b)>;
++
++def : LSXPat<(vfsetle_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b),
++             (VFCMP_CLE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>;
++
++def : LSXPat<(vfsetle_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b),
++             (VFCMP_CLE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>;
++
++def : LSXPat<(vfsetlt_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b),
++             (VFCMP_CLT_S LSX128WOpnd:$a, LSX128WOpnd:$b)>;
++
++def : LSXPat<(vfsetlt_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b),
++             (VFCMP_CLT_D LSX128DOpnd:$a, LSX128DOpnd:$b)>;
++
++def : LSXPat<(vfsetne_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b),
++             (VFCMP_CNE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>;
++
++def : LSXPat<(vfsetne_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b),
++             (VFCMP_CNE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>;
++
++
++class LSX_INSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
++                             Operand ImmOp, ImmLeaf Imm, RegisterOperand ROVD,
++                             RegisterOperand ROFS> :
++      LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ImmOp:$n, ROFS:$fs),
++                [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, Imm:$n))]> {
++  bit usesCustomInserter = 1;
++  string Constraints = "$vd = $vd_in";
++}
++
++
++class INSERT_FW_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE<vector_insert, v4f32,
++                                                     uimm2, immZExt2Ptr,
++                                                     LSX128WOpnd, FGR32Opnd>;
++class INSERT_FD_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE<vector_insert, v2f64,
++                                                     uimm1, immZExt1Ptr,
++                                                     LSX128DOpnd, FGR64Opnd>;
++
++def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC;
++def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC;
++
++
++class LSX_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
++                                  RegisterOperand ROVD, RegisterOperand ROFS,
++                                  RegisterOperand ROIdx> :
++      LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ROIdx:$n, ROFS:$fs),
++                [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs,
++                                        ROIdx:$n))]> {
++  bit usesCustomInserter = 1;
++  string Constraints = "$vd = $vd_in";
++}
++
++class INSERT_H_VIDX64_PSEUDO_DESC :
++      LSX_INSERT_VIDX_PSEUDO_BASE<vector_insert, v8i16, LSX128HOpnd, GPR32Opnd, GPR64Opnd>;
++def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC;
++
++class INSERTPostRA<RegisterClass RC, RegisterClass RD, RegisterClass RE> :
++  LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> {
++  let mayLoad = 1;
++  let mayStore = 1;
++}
++
++def INSERT_H_VIDX64_PSEUDO_POSTRA  : INSERTPostRA<LSX128H, GPR64, GPR32>;
++
++class LSX_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
++                           Operand ImmOp, ImmLeaf Imm, RegisterClass RCD,
++                           RegisterClass RCVS> :
++      LSXPseudo<(outs RCD:$vd), (ins RCVS:$vj, ImmOp:$n),
++                [(set RCD:$vd, (OpNode (VecTy RCVS:$vj), Imm:$n))]> {
++  bit usesCustomInserter = 1;
++}
++
++
++class COPY_FW_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE<vector_extract, v4f32,
++                                                 uimm2_ptr, immZExt2Ptr, FGR32,
++                                                 LSX128W>;
++class COPY_FD_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE<vector_extract, v2f64,
++                                                 uimm1_ptr, immZExt1Ptr, FGR64,
++                                                 LSX128D>;
++def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
++def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
++
++
++let isCodeGenOnly = 1 in {
++
++def VST_H : LSX_I12_S<0b0010110001>,
++            ST_DESC_BASE<"vst", store, v8i16, LSX128HOpnd, mem_simm12>;
++def VST_W : LSX_I12_S<0b0010110001>,
++            ST_DESC_BASE<"vst", store, v4i32, LSX128WOpnd, mem_simm12>;
++def VST_D : LSX_I12_S<0b0010110001>,
++            ST_DESC_BASE<"vst", store, v2i64, LSX128DOpnd, mem_simm12>;
++
++
++def VLD_H : LSX_I12_S<0b0010110000>,
++            LD_DESC_BASE<"vld", load, v8i16, LSX128HOpnd, mem_simm12>;
++def VLD_W : LSX_I12_S<0b0010110000>,
++            LD_DESC_BASE<"vld", load, v4i32, LSX128WOpnd, mem_simm12>;
++def VLD_D : LSX_I12_S<0b0010110000>,
++            LD_DESC_BASE<"vld", load, v2i64, LSX128DOpnd, mem_simm12>;
++
++
++
++def VANDI_B_N : LSX_I8_U<0b01110011110100>,
++                LSX_BIT_U8_VREPLVE_DESC_BASE<"vandi.b", and, vsplati8_uimm8, LSX128BOpnd>;
++
++
++def VXORI_B_N : LSX_I8_U<0b01110011110110>,
++                LSX_BIT_U8_VREPLVE_DESC_BASE<"vxori.b", xor, vsplati8_uimm8, LSX128BOpnd>;
++
++
++def VSRAI_B_N : LSX_I3_U<0b0111001100110100001>,
++                LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrai.b", sra, vsplati8_uimm3, LSX128BOpnd>;
++
++def VSRAI_H_N : LSX_I4_U<0b011100110011010001>,
++                LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrai.h", sra, vsplati16_uimm4, LSX128HOpnd>;
++
++def VSRAI_W_N : LSX_I5_U<0b01110011001101001>,
++                LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrai.w", sra, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSRAI_D_N : LSX_I6_U<0b0111001100110101>,
++                LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrai.d", sra, vsplati64_uimm6, LSX128DOpnd>;
++
++
++def VMAXI_BU_N : LSX_I5_U<0b01110010100101000>,
++                 LSX_I5_U_DESC_BASE<"vmaxi.bu", umax, vsplati8_uimm5, LSX128BOpnd>;
++
++def VMAXI_HU_N : LSX_I5_U<0b01110010100101001>,
++                 LSX_I5_U_DESC_BASE<"vmaxi.hu", umax, vsplati16_uimm5, LSX128HOpnd>;
++
++def VMAXI_WU_N : LSX_I5_U<0b01110010100101010>,
++                 LSX_I5_U_DESC_BASE<"vmaxi.wu", umax, vsplati32_uimm5, LSX128WOpnd>;
++
++def VMAXI_DU_N : LSX_I5_U<0b01110010100101011>,
++                 LSX_I5_U_DESC_BASE<"vmaxi.du", umax, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VMINI_B_N : LSX_I5<0b01110010100100100>,
++                LSX_I5_DESC_BASE<"vmini.b", smin, vsplati8_simm5, LSX128BOpnd>;
++
++def VMINI_H_N : LSX_I5<0b01110010100100101>,
++                LSX_I5_DESC_BASE<"vmini.h", smin, vsplati16_simm5, LSX128HOpnd>;
++
++def VMINI_W_N : LSX_I5<0b01110010100100110>,
++                LSX_I5_DESC_BASE<"vmini.w", smin, vsplati32_simm5, LSX128WOpnd>;
++
++def VMINI_D_N : LSX_I5<0b01110010100100111>,
++                LSX_I5_DESC_BASE<"vmini.d", smin, vsplati64_simm5, LSX128DOpnd>;
++
++
++def VMAXI_B_N : LSX_I5<0b01110010100100000>,
++                LSX_I5_DESC_BASE<"vmaxi.b", smax, vsplati8_simm5, LSX128BOpnd>;
++
++def VMAXI_H_N : LSX_I5<0b01110010100100001>,
++                LSX_I5_DESC_BASE<"vmaxi.h", smax, vsplati16_simm5, LSX128HOpnd>;
++
++def VMAXI_W_N : LSX_I5<0b01110010100100010>,
++                LSX_I5_DESC_BASE<"vmaxi.w", smax, vsplati32_simm5, LSX128WOpnd>;
++
++def VMAXI_D_N : LSX_I5<0b01110010100100011>,
++                LSX_I5_DESC_BASE<"vmaxi.d", smax, vsplati64_simm5, LSX128DOpnd>;
++
++
++def VSEQI_B_N : LSX_I5<0b01110010100000000>,
++                LSX_I5_DESC_BASE<"vseqi.b", vseteq_v16i8, vsplati8_simm5, LSX128BOpnd>;
++
++def VSEQI_H_N : LSX_I5<0b01110010100000001>,
++                LSX_I5_DESC_BASE<"vseqi.h", vseteq_v8i16, vsplati16_simm5, LSX128HOpnd>;
++
++def VSEQI_W_N : LSX_I5<0b01110010100000010>,
++                LSX_I5_DESC_BASE<"vseqi.w", vseteq_v4i32, vsplati32_simm5, LSX128WOpnd>;
++
++def VSEQI_D_N : LSX_I5<0b01110010100000011>,
++                LSX_I5_DESC_BASE<"vseqi.d", vseteq_v2i64, vsplati64_simm5, LSX128DOpnd>;
++
++
++def VSLEI_B_N : LSX_I5<0b01110010100000100>,
++                LSX_I5_DESC_BASE<"vslei.b", vsetle_v16i8, vsplati8_simm5, LSX128BOpnd>;
++
++def VSLEI_H_N : LSX_I5<0b01110010100000101>,
++                LSX_I5_DESC_BASE<"vslei.h", vsetle_v8i16, vsplati16_simm5, LSX128HOpnd>;
++
++def VSLEI_W_N : LSX_I5<0b01110010100000110>,
++                LSX_I5_DESC_BASE<"vslei.w", vsetle_v4i32, vsplati32_simm5, LSX128WOpnd>;
++
++def VSLEI_D_N : LSX_I5<0b01110010100000111>,
++                LSX_I5_DESC_BASE<"vslei.d", vsetle_v2i64, vsplati64_simm5, LSX128DOpnd>;
++
++def VSLEI_BU_N : LSX_I5_U<0b01110010100001000>,
++                 LSX_I5_U_DESC_BASE<"vslei.bu", vsetule_v16i8, vsplati8_uimm5, LSX128BOpnd>;
++
++def VSLEI_HU_N : LSX_I5_U<0b01110010100001001>,
++                 LSX_I5_U_DESC_BASE<"vslei.hu", vsetule_v8i16, vsplati16_uimm5, LSX128HOpnd>;
++
++def VSLEI_WU_N : LSX_I5_U<0b01110010100001010>,
++                 LSX_I5_U_DESC_BASE<"vslei.wu", vsetule_v4i32, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSLEI_DU_N : LSX_I5_U<0b01110010100001011>,
++                 LSX_I5_U_DESC_BASE<"vslei.du", vsetule_v2i64, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VSLTI_B_N : LSX_I5<0b01110010100001100>,
++                LSX_I5_DESC_BASE<"vslti.b", vsetlt_v16i8, vsplati8_simm5, LSX128BOpnd>;
++
++def VSLTI_H_N : LSX_I5<0b01110010100001101>,
++                LSX_I5_DESC_BASE<"vslti.h", vsetlt_v8i16, vsplati16_simm5, LSX128HOpnd>;
++
++def VSLTI_W_N : LSX_I5<0b01110010100001110>,
++                LSX_I5_DESC_BASE<"vslti.w", vsetlt_v4i32, vsplati32_simm5, LSX128WOpnd>;
++
++def VSLTI_D_N : LSX_I5<0b01110010100001111>,
++                LSX_I5_DESC_BASE<"vslti.d", vsetlt_v2i64, vsplati64_simm5, LSX128DOpnd>;
++
++
++def VSLTI_BU_N : LSX_I5_U<0b01110010100010000>,
++                 LSX_I5_U_DESC_BASE<"vslti.bu", vsetult_v16i8, vsplati8_uimm5, LSX128BOpnd>;
++
++def VSLTI_HU_N : LSX_I5_U<0b01110010100010001>,
++                 LSX_I5_U_DESC_BASE<"vslti.hu", vsetult_v8i16, vsplati16_uimm5, LSX128HOpnd>;
++
++def VSLTI_WU_N : LSX_I5_U<0b01110010100010010>,
++                 LSX_I5_U_DESC_BASE<"vslti.wu", vsetult_v4i32, vsplati32_uimm5, LSX128WOpnd>;
++
++def VSLTI_DU_N : LSX_I5_U<0b01110010100010011>,
++                 LSX_I5_U_DESC_BASE<"vslti.du", vsetult_v2i64, vsplati64_uimm5, LSX128DOpnd>;
++
++
++def VBITSELI_B_N : LSX_I8_U<0b01110011110001>,
++                   LSX_2R_3R_SELECT<"vbitseli.b", vselect, LSX128BOpnd, LSX128BOpnd>;
++
++}
++
++
++def : LSXPat<(v4f32 (load addrimm12:$addr)), (VLD_W addrimm12:$addr)>;
++def : LSXPat<(v2f64 (load addrimm12:$addr)), (VLD_D addrimm12:$addr)>;
++
++def VST_FW : LSXPat<(store (v4f32 LSX128W:$vj), addrimm12:$addr),
++                    (VST_W LSX128W:$vj, addrimm12:$addr)>;
++def VST_FD : LSXPat<(store (v2f64 LSX128D:$vj), addrimm12:$addr),
++                    (VST_D LSX128D:$vj, addrimm12:$addr)>;
++
++def VNEG_FW : LSXPat<(fneg (v4f32 LSX128W:$vj)),
++                     (VBITREVI_W LSX128W:$vj, 31)>;
++def VNEG_FD : LSXPat<(fneg (v2f64 LSX128D:$vj)),
++                     (VBITREVI_D LSX128D:$vj, 63)>;
++
++
++def : LSXPat<(v2i64 (LoongArchVABSD v2i64:$vj, v2i64:$vk, (i32 0))),
++             (v2i64 (VABSD_D $vj, $vk))>;
++
++def : LSXPat<(v4i32 (LoongArchVABSD v4i32:$vj, v4i32:$vk, (i32 0))),
++             (v4i32 (VABSD_W $vj, $vk))>;
++
++def : LSXPat<(v8i16 (LoongArchVABSD v8i16:$vj, v8i16:$vk, (i32 0))),
++             (v8i16 (VABSD_H $vj, $vk))>;
++
++def : LSXPat<(v16i8 (LoongArchVABSD v16i8:$vj, v16i8:$vk, (i32 0))),
++             (v16i8 (VABSD_B $vj, $vk))>;
++
++def : LSXPat<(v2i64 (LoongArchUVABSD v2i64:$vj, v2i64:$vk, (i32 0))),
++             (v2i64 (VABSD_DU $vj, $vk))>;
++
++def : LSXPat<(v4i32 (LoongArchUVABSD v4i32:$vj, v4i32:$vk, (i32 0))),
++             (v4i32 (VABSD_WU $vj, $vk))>;
++
++def : LSXPat<(v8i16 (LoongArchUVABSD v8i16:$vj, v8i16:$vk, (i32 0))),
++             (v8i16 (VABSD_HU $vj, $vk))>;
++
++def : LSXPat<(v16i8 (LoongArchUVABSD v16i8:$vj, v16i8:$vk, (i32 0))),
++             (v16i8 (VABSD_BU $vj, $vk))>;
++
++
++def : LSXPat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
++             (VBITSET_B v16i8:$vj, v16i8:$vk)>;
++def : LSXPat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
++             (VBITSET_H v8i16:$vj, v8i16:$vk)>;
++def : LSXPat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
++             (VBITSET_W v4i32:$vj, v4i32:$vk)>;
++def : LSXPat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)),
++             (VBITSET_D v2i64:$vj, v2i64:$vk)>;
++
++def : LSXPat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
++             (VBITREV_B v16i8:$vj, v16i8:$vk)>;
++def : LSXPat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)),
++             (VBITREV_H v8i16:$vj, v8i16:$vk)>;
++def : LSXPat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)),
++             (VBITREV_W v4i32:$vj, v4i32:$vk)>;
++def : LSXPat<(xor v2i64:$vj, (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk)),
++             (VBITREV_D v2i64:$vj, v2i64:$vk)>;
++
++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), immAllOnesV)),
++             (VBITCLR_B v16i8:$vj, v16i8:$vk)>;
++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), immAllOnesV)),
++             (VBITCLR_H v8i16:$vj, v8i16:$vk)>;
++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), immAllOnesV)),
++             (VBITCLR_W v4i32:$vj, v4i32:$vk)>;
++def : LSXPat<(and v2i64:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk), (bitconvert (v4i32 immAllOnesV)))),
++             (VBITCLR_D v2i64:$vj, v2i64:$vk)>;
++def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
++  APInt Imm;
++  SDNode *BV = N->getOperand(0).getNode();
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
++         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
++}]>;
++
++def immi32Cst7  : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 7;}]>;
++def immi32Cst15 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 15;}]>;
++def immi32Cst31 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 31;}]>;
++
++def vsplati8imm7 :   PatFrag<(ops node:$vt),
++                             (and node:$vt, (vsplati8 immi32Cst7))>;
++def vsplati16imm15 : PatFrag<(ops node:$vt),
++                             (and node:$vt, (vsplati16 immi32Cst15))>;
++def vsplati32imm31 : PatFrag<(ops node:$vt),
++                             (and node:$vt, (vsplati32 immi32Cst31))>;
++def vsplati64imm63 : PatFrag<(ops node:$vt),
++                             (and node:$vt, vsplati64_imm_eq_63)>;
++
++class LSXShiftPat<SDNode Node, ValueType VT, LSXInst Insn, dag Vec> :
++  LSXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))),
++         (VT (Insn VT:$vs, VT:$vt))>;
++
++class LSXBitPat<SDNode Node, ValueType VT, LSXInst Insn, PatFrag Frag> :
++  LSXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))),
++         (VT (Insn VT:$vs, VT:$vt))>;
++
++multiclass LSXShiftPats<SDNode Node, string Insn> {
++  def : LSXShiftPat<Node, v16i8, !cast<LSXInst>(Insn#_B),
++                    (vsplati8 immi32Cst7)>;
++  def : LSXShiftPat<Node, v8i16, !cast<LSXInst>(Insn#_H),
++                    (vsplati16 immi32Cst15)>;
++  def : LSXShiftPat<Node, v4i32, !cast<LSXInst>(Insn#_W),
++                    (vsplati32 immi32Cst31)>;
++  def : LSXPat<(v2i64 (Node v2i64:$vs, (v2i64 (and v2i64:$vt,
++                                                   vsplati64_imm_eq_63)))),
++               (v2i64 (!cast<LSXInst>(Insn#_D) v2i64:$vs, v2i64:$vt))>;
++}
++
++multiclass LSXBitPats<SDNode Node, string Insn> {
++  def : LSXBitPat<Node, v16i8, !cast<LSXInst>(Insn#_B), vsplati8imm7>;
++  def : LSXBitPat<Node, v8i16, !cast<LSXInst>(Insn#_H), vsplati16imm15>;
++  def : LSXBitPat<Node, v4i32, !cast<LSXInst>(Insn#_W), vsplati32imm31>;
++  def : LSXPat<(Node v2i64:$vs, (shl (v2i64 vsplati64_imm_eq_1),
++                                     (vsplati64imm63 v2i64:$vt))),
++               (v2i64 (!cast<LSXInst>(Insn#_D) v2i64:$vs, v2i64:$vt))>;
++}
++
++defm : LSXShiftPats<shl, "VSLL">;
++defm : LSXShiftPats<srl, "VSRL">;
++defm : LSXShiftPats<sra, "VSRA">;
++defm : LSXBitPats<xor, "VBITREV">;
++defm : LSXBitPats<or, "VBITSET">;
++
++def : LSXPat<(and v16i8:$vs, (xor (shl vsplat_imm_eq_1,
++                                       (vsplati8imm7 v16i8:$vt)),
++                                       immAllOnesV)),
++             (v16i8 (VBITCLR_B v16i8:$vs, v16i8:$vt))>;
++def : LSXPat<(and v8i16:$vs, (xor (shl vsplat_imm_eq_1,
++                                       (vsplati16imm15 v8i16:$vt)),
++                                       immAllOnesV)),
++             (v8i16 (VBITCLR_H v8i16:$vs, v8i16:$vt))>;
++def : LSXPat<(and v4i32:$vs, (xor (shl vsplat_imm_eq_1,
++                                       (vsplati32imm31 v4i32:$vt)),
++                                       immAllOnesV)),
++             (v4i32 (VBITCLR_W v4i32:$vs, v4i32:$vt))>;
++def : LSXPat<(and v2i64:$vs, (xor (shl (v2i64 vsplati64_imm_eq_1),
++                                       (vsplati64imm63 v2i64:$vt)),
++                                       (bitconvert (v4i32 immAllOnesV)))),
++             (v2i64 (VBITCLR_D v2i64:$vs, v2i64:$vt))>;
++
++
++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v4f32:$v),
++             (VFRECIP_S v4f32:$v)>;
++
++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), v2f64:$v),
++             (VFRECIP_D v2f64:$v)>;
++
++def : LSXPat<(fdiv (v4f32 fpimm1), v4f32:$v),
++             (VFRECIP_S v4f32:$v)>;
++
++def : LSXPat<(fdiv (v2f64 fpimm1), v2f64:$v),
++             (VFRECIP_D v2f64:$v)>;
++
++
++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v4f32:$v)),
++             (VFRSQRT_S v4f32:$v)>;
++
++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), (fsqrt v2f64:$v)),
++             (VFRSQRT_D v2f64:$v)>;
++
++def : LSXPat<(fdiv (v4f32 fpimm1), (fsqrt v4f32:$v)),
++             (VFRSQRT_S v4f32:$v)>;
++
++def : LSXPat<(fdiv (v2f64 fpimm1), (fsqrt v2f64:$v)),
++             (VFRSQRT_D v2f64:$v)>;
++
++
++def : LSXPat<(abs v2i64:$v),
++             (VMAX_D v2i64:$v, (VNEG_D v2i64:$v))>;
++
++def : LSXPat<(abs v4i32:$v),
++             (VMAX_W v4i32:$v, (VNEG_W v4i32:$v))>;
++
++def : LSXPat<(abs v8i16:$v),
++             (VMAX_H v8i16:$v, (VNEG_H v8i16:$v))>;
++
++def : LSXPat<(abs v16i8:$v),
++             (VMAX_B v16i8:$v, (VNEG_B v16i8:$v))>;
++
++
++def : LSXPat<(sub (v16i8 immAllZerosV), v16i8:$v),
++             (VNEG_B v16i8:$v)>;
++
++def : LSXPat<(sub (v8i16 immAllZerosV), v8i16:$v),
++             (VNEG_H v8i16:$v)>;
++
++def : LSXPat<(sub (v4i32 immAllZerosV), v4i32:$v),
++             (VNEG_W v4i32:$v)>;
++
++def : LSXPat<(sub (v2i64 immAllZerosV), v2i64:$v),
++             (VNEG_D v2i64:$v)>;
++
++
++def : LSXPat<(sra
++            (v16i8 (add
++                    (v16i8 (add LSX128B:$a, LSX128B:$b)),
++                    (v16i8 (srl
++                           (v16i8 (add LSX128B:$a, LSX128B:$b)),
++                           (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++          ))),
++          (VAVG_B  (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>;
++
++def : LSXPat<(sra
++            (v8i16 (add
++                    (v8i16 (add LSX128H:$a, LSX128H:$b)),
++                    (v8i16 (srl
++                           (v8i16 (add LSX128H:$a, LSX128H:$b)),
++                           (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15),
++                                                (i32 15),(i32 15),(i32 15),(i32 15))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++          ))),
++          (VAVG_H  (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>;
++
++def : LSXPat<(sra
++            (v4i32 (add
++                    (v4i32 (add LSX128W:$a, LSX128W:$b)),
++                    (v4i32 (srl
++                           (v4i32 (add LSX128W:$a, LSX128W:$b)),
++                           (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))),
++          (VAVG_W  (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>;
++
++def : LSXPat<(sra
++              (v2i64 (add
++                      (v2i64 (add LSX128D:$a, LSX128D:$b)),
++                      (v2i64 (srl
++                                (v2i64 (add LSX128D:$a, LSX128D:$b)),
++                                (v2i64 (build_vector (i64 63),(i64 63)))
++                            )
++                      )
++                      )
++              ),
++          (v2i64 (build_vector (i64 1),(i64 1)))),
++          (VAVG_D  (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>;
++
++
++
++def : LSXPat<(srl
++               (v16i8 (add LSX128B:$a, LSX128B:$b)),
++               (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVG_BU  (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>;
++
++def : LSXPat<(srl
++               (v8i16 (add LSX128H:$a, LSX128H:$b)),
++               (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVG_HU  (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>;
++
++def : LSXPat<(srl
++               (v4i32 (add LSX128W:$a, LSX128W:$b)),
++               (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVG_WU  (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>;
++
++def : LSXPat<(srl
++               (v2i64 (add LSX128D:$a, LSX128D:$b)),
++               (v2i64 (build_vector (i64 1),(i64 1))
++                           )
++            ),
++          (VAVG_DU  (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>;
++
++
++
++
++def : LSXPat<(sra
++            (v16i8 (add
++                    (v16i8 (add (v16i8 (
++                    build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++                    )),
++                    (v16i8 (add LSX128B:$a, LSX128B:$b))
++                    )),
++                    (v16i8 (srl
++                           (v16i8 ( add (v16i8(
++                             build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                        (i32 1),(i32 1),(i32 1),(i32 1),
++                                        (i32 1),(i32 1),(i32 1),(i32 1),
++                                        (i32 1),(i32 1),(i32 1),(i32 1)
++                           )),
++                           (v16i8 (add LSX128B:$a, LSX128B:$b))
++                           )),
++                           (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7),
++                                                (i32 7),(i32 7),(i32 7),(i32 7))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++          ))),
++          (VAVGR_B  (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>;
++
++def : LSXPat<(sra
++            (v8i16 (add
++                    (v8i16 (add (v8i16 (
++                       build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1)
++                    )),
++                    (v8i16 (add LSX128H:$a, LSX128H:$b))
++                    )),
++                    (v8i16 (srl
++                       (v8i16 (add (v8i16 (
++                          build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                       (i32 1),(i32 1),(i32 1),(i32 1)
++                       )),
++                       (v8i16 (add LSX128H:$a, LSX128H:$b))
++                       )),
++                           (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15),
++                                                (i32 15),(i32 15),(i32 15),(i32 15))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++          ))),
++          (VAVGR_H  (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>;
++
++def : LSXPat<(sra
++            (v4i32 (add
++                      (v4i32 (add (v4i32 (
++                         build_vector (i32 1),(i32 1),(i32 1),(i32 1)
++                      )),
++                      (v4i32 (add LSX128W:$a, LSX128W:$b))
++                      )),
++                    (v4i32 (srl
++                        (v4i32 (add (v4i32 (
++                           build_vector (i32 1),(i32 1),(i32 1),(i32 1)
++                        )),
++                        (v4i32 (add LSX128W:$a, LSX128W:$b))
++                        )),
++                           (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31))
++                           )
++                       )
++                 )
++                 )
++            ),
++          (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))),
++          (VAVGR_W  (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>;
++
++def : LSXPat<(sra
++              (v2i64 (add
++                         (v2i64 (add (v2i64 (
++                            build_vector (i64 1),(i64 1)
++                         )),
++                         (v2i64 (add LSX128D:$a, LSX128D:$b))
++                         )),
++                      (v2i64 (srl
++                         (v2i64 (add (v2i64 (
++                            build_vector (i64 1),(i64 1)
++                         )),
++                         (v2i64 (add LSX128D:$a, LSX128D:$b))
++                         )),
++                                (v2i64 (build_vector (i64 63),(i64 63)))
++                            )
++                      )
++                      )
++              ),
++          (v2i64 (build_vector (i64 1),(i64 1)))),
++          (VAVGR_D  (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>;
++
++
++
++
++def : LSXPat<(srl
++               (v16i8 (add (v16i8 (
++               build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                          (i32 1),(i32 1),(i32 1),(i32 1),
++                          (i32 1),(i32 1),(i32 1),(i32 1),
++                          (i32 1),(i32 1),(i32 1),(i32 1)
++               )),
++               (v16i8 (add LSX128B:$a, LSX128B:$b))
++               )),
++               (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVGR_BU  (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>;
++
++def : LSXPat<(srl
++               (v8i16 (add (v8i16 (
++                  build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                               (i32 1),(i32 1),(i32 1),(i32 1)
++               )),
++               (v8i16 (add LSX128H:$a, LSX128H:$b))
++               )),
++               (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1),
++                                    (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVGR_HU  (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>;
++
++def : LSXPat<(srl
++               (v4i32 (add (v4i32 (
++                  build_vector (i32 1),(i32 1),(i32 1),(i32 1)
++               )),
++               (v4i32 (add LSX128W:$a, LSX128W:$b))
++               )),
++               (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1))
++                           )
++            ),
++          (VAVGR_WU  (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>;
++
++def : LSXPat<(srl
++               (v2i64 (add (v2i64 (
++                  build_vector (i64 1),(i64 1)
++               )),
++               (v2i64 (add LSX128D:$a, LSX128D:$b))
++               )),
++               (v2i64 (build_vector (i64 1),(i64 1))
++                           )
++            ),
++          (VAVGR_DU  (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>;
++
++
++def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b),
++             (VMUH_D LSX128D:$a, LSX128D:$b)>;
++
++def : LSXPat<(mulhs LSX128W:$a, LSX128W:$b),
++             (VMUH_W LSX128W:$a, LSX128W:$b)>;
++
++def : LSXPat<(mulhs LSX128H:$a, LSX128H:$b),
++             (VMUH_H LSX128H:$a, LSX128H:$b)>;
++
++def : LSXPat<(mulhs LSX128B:$a, LSX128B:$b),
++             (VMUH_B LSX128B:$a, LSX128B:$b)>;
++
++
++def : LSXPat<(mulhu LSX128D:$a, LSX128D:$b),
++             (VMUH_DU LSX128D:$a, LSX128D:$b)>;
++
++def : LSXPat<(mulhu LSX128W:$a, LSX128W:$b),
++            (VMUH_WU LSX128W:$a, LSX128W:$b)>;
++
++def : LSXPat<(mulhu LSX128H:$a, LSX128H:$b),
++             (VMUH_HU LSX128H:$a, LSX128H:$b)>;
++
++def : LSXPat<(mulhu LSX128B:$a, LSX128B:$b),
++             (VMUH_BU LSX128B:$a, LSX128B:$b)>;
++
++
++
++//===----------------------------------------------------------------------===//
++// Intrinsics
++//===----------------------------------------------------------------------===//
++
++def : LSXPat<(int_loongarch_lsx_vseq_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSEQ_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vseq_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSEQ_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vseq_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSEQ_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vseq_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSEQ_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsle_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSLE_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSLE_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSLE_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSLE_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsle_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSLE_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSLE_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSLE_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsle_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSLE_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vslt_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSLT_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSLT_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSLT_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSLT_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vslt_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSLT_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSLT_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSLT_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vslt_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSLT_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VADD_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VADD_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VADD_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VADD_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSUB_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSUB_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSUB_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSUB_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSADD_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSADD_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSADD_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSADD_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vssub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSSUB_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSSUB_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSSUB_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSSUB_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsadd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSADD_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSADD_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSADD_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsadd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSADD_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vssub_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSSUB_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSSUB_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSSUB_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vssub_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSSUB_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vhaddw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VHADDW_H_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhaddw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VHADDW_W_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhaddw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VHADDW_D_W LSX128W:$vj, LSX128W:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vhsubw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VHSUBW_H_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhsubw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VHSUBW_W_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhsubw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VHSUBW_D_W LSX128W:$vj, LSX128W:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vhaddw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VHADDW_HU_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhaddw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VHADDW_WU_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhaddw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VHADDW_DU_WU LSX128W:$vj, LSX128W:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vhsubw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VHSUBW_HU_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhsubw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VHSUBW_WU_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vhsubw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VHSUBW_DU_WU LSX128W:$vj, LSX128W:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vadda_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VADDA_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadda_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VADDA_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadda_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VADDA_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vadda_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VADDA_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vabsd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VABSD_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VABSD_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VABSD_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VABSD_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vabsd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VABSD_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VABSD_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VABSD_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vabsd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VABSD_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vavg_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VAVG_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VAVG_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VAVG_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VAVG_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vavg_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VAVG_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VAVG_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VAVG_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavg_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VAVG_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vavgr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VAVGR_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VAVGR_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VAVGR_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VAVGR_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vavgr_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VAVGR_BU LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VAVGR_HU LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VAVGR_WU LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vavgr_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VAVGR_DU LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrlr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSRLR_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrlr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSRLR_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrlr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSRLR_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrlr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSRLR_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrar_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VSRAR_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrar_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VSRAR_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrar_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VSRAR_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vsrar_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VSRAR_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vbitset_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VBITSET_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitset_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VBITSET_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitset_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VBITSET_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitset_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VBITSET_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vbitrev_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)),
++             (VBITREV_B LSX128B:$vj, LSX128B:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitrev_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)),
++             (VBITREV_H LSX128H:$vj, LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitrev_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)),
++             (VBITREV_W LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vbitrev_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)),
++             (VBITREV_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfadd_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFADD_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfadd_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFADD_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfsub_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFSUB_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfsub_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFSUB_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfmax_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFMAX_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfmax_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFMAX_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfmin_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFMIN_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfmin_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFMIN_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfmaxa_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFMAXA_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfmaxa_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFMAXA_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfmina_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFMINA_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfmina_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFMINA_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vclo_b (v16i8 LSX128B:$vj)),
++             (VCLO_B LSX128B:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vclo_h (v8i16 LSX128H:$vj)),
++             (VCLO_H LSX128H:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vclo_w (v4i32 LSX128W:$vj)),
++             (VCLO_W LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vclo_d (v2i64 LSX128D:$vj)),
++             (VCLO_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vflogb_s (v4f32 LSX128W:$vj)),
++             (VFLOGB_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vflogb_d (v2f64 LSX128D:$vj)),
++             (VFLOGB_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vfclass_s (v4f32 LSX128W:$vj)),
++             (VFCLASS_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vfclass_d (v2f64 LSX128D:$vj)),
++             (VFCLASS_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vfrecip_s (v4f32 LSX128W:$vj)),
++             (VFRECIP_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vfrecip_d (v2f64 LSX128D:$vj)),
++             (VFRECIP_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vfrsqrt_s (v4f32 LSX128W:$vj)),
++             (VFRSQRT_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vfrsqrt_d (v2f64 LSX128D:$vj)),
++             (VFRSQRT_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcvtl_s_h  (v8i16 LSX128H:$vk)),
++             (VFCVTL_S_H  LSX128H:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcvth_s_h  (v8i16 LSX128H:$vk)),
++             (VFCVTH_S_H  LSX128H:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcvtl_d_s (v4f32 LSX128W:$vj)),
++             (VFCVTL_D_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vfcvth_d_s (v4f32 LSX128W:$vj)),
++             (VFCVTH_D_S LSX128W:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vftint_w_s (v4f32 LSX128W:$vj)),
++             (VFTINT_W_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vftint_l_d (v2f64 LSX128D:$vj)),
++             (VFTINT_L_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vftint_wu_s (v4f32 LSX128W:$vj)),
++             (VFTINT_WU_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vftint_lu_d (v2f64 LSX128D:$vj)),
++             (VFTINT_LU_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_b GPR32Opnd:$rj),
++             (VREPLGR2VR_B GPR32Opnd:$rj)>;
++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_h GPR32Opnd:$rj),
++             (VREPLGR2VR_H GPR32Opnd:$rj)>;
++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_w GPR32Opnd:$rj),
++             (VREPLGR2VR_W GPR32Opnd:$rj)>;
++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_d GPR64Opnd:$rj),
++             (VREPLGR2VR_D GPR64Opnd:$rj)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrlri_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)),
++             (VSRLRI_B LSX128B:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vsrlri_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)),
++             (VSRLRI_H LSX128H:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsrlri_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)),
++             (VSRLRI_W LSX128W:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsrlri_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)),
++             (VSRLRI_D LSX128D:$vj, uimm6:$ui6)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrari_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)),
++             (VSRARI_B LSX128B:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vsrari_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)),
++             (VSRARI_H LSX128H:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsrari_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)),
++             (VSRARI_W LSX128W:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsrari_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)),
++             (VSRARI_D LSX128D:$vj, uimm6:$ui6)>;
++
++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_b (v16i8 LSX128B:$vj), GPR32Opnd:$rj, (immZExt4:$ui4)),
++             (VINSGR2VR_B LSX128B:$vj, GPR32Opnd:$rj, (uimm4i:$ui4))>;
++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_h (v8i16 LSX128H:$vj), GPR32Opnd:$rj, (immZExt3:$ui3)),
++             (VINSGR2VR_H LSX128H:$vj, GPR32Opnd:$rj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_w (v4i32 LSX128W:$vj), GPR32Opnd:$rj, (immZExt2:$ui2)),
++             (VINSGR2VR_W LSX128W:$vj, GPR32Opnd:$rj, uimm2:$ui2)>;
++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_d (v2i64 LSX128D:$vj), GPR64Opnd:$rj, (immZExt1:$ui1)),
++             (VINSGR2VR_D LSX128D:$vj, GPR64Opnd:$rj, uimm1i:$ui1)>;
++
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_b (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VPICKVE2GR_B LSX128B:$vj, (uimm4i:$ui4))>;
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_h (v8i16 LSX128H:$vj), (immZExt3:$ui3)),
++             (VPICKVE2GR_H LSX128H:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_w (v4i32 LSX128W:$vj), (immZExt2:$ui2)),
++             (VPICKVE2GR_W LSX128W:$vj, uimm2:$ui2)>;
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_d (v2i64 LSX128D:$vj), (immZExt1:$ui1)),
++             (VPICKVE2GR_D LSX128D:$vj, uimm1i:$ui1)>;
++
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_bu (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VPICKVE2GR_BU LSX128B:$vj, (uimm4i:$ui4))>;
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_hu (v8i16 LSX128H:$vj), (immZExt3:$ui3)),
++             (VPICKVE2GR_HU LSX128H:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vpickve2gr_wu (v4i32 LSX128W:$vj), (immZExt2:$ui2)),
++             (VPICKVE2GR_WU LSX128W:$vj, uimm2:$ui2)>;
++
++def : LSXPat<(int_loongarch_lsx_vsat_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)),
++             (VSAT_B LSX128B:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vsat_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)),
++             (VSAT_H LSX128H:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsat_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)),
++             (VSAT_W LSX128W:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsat_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)),
++             (VSAT_D LSX128D:$vj, uimm6:$ui6)>;
++
++def : LSXPat<(int_loongarch_lsx_vsat_bu (v16i8 LSX128B:$vj), (immZExt3:$ui3)),
++             (VSAT_BU LSX128B:$vj, uimm3:$ui3)>;
++def : LSXPat<(int_loongarch_lsx_vsat_hu (v8i16 LSX128H:$vj), (immZExt4:$ui4)),
++             (VSAT_HU LSX128H:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsat_wu (v4i32 LSX128W:$vj), (immZExt5:$ui5)),
++             (VSAT_WU LSX128W:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsat_du (v2i64 LSX128D:$vj), (immZExt6:$ui6)),
++             (VSAT_DU LSX128D:$vj, uimm6:$ui6)>;
++
++def : LSXPat<(int_loongarch_lsx_vmskltz_b (v16i8 LSX128B:$vj)),
++             (VMSKLTZ_B LSX128B:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vmskltz_h (v8i16 LSX128H:$vj)),
++             (VMSKLTZ_H LSX128H:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vmskltz_w (v4i32 LSX128W:$vj)),
++             (VMSKLTZ_W LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vmskltz_d (v2i64 LSX128D:$vj)),
++             (VMSKLTZ_D LSX128D:$vj)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vsrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrlni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRLNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRLNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRLNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrlni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRLNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrlrni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRLRNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrlrni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRLRNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrlrni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRLRNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrlrni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRLRNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vsrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vsrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vsrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vsrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrani_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRANI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRANI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRANI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRANI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrani_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRANI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRANI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRANI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrani_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRANI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(int_loongarch_lsx_vssrarni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)),
++             (VSSRARNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)),
++             (VSSRARNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)),
++             (VSSRARNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>;
++def : LSXPat<(int_loongarch_lsx_vssrarni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)),
++             (VSSRARNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>;
++
++def : LSXPat<(load (add iPTR:$vj, GPR64Opnd:$vk)),
++             (VLDX PtrRC:$vj, GPR64Opnd:$vk)>;
++
++def : LSXPat<(store (v16i8 LSX128B:$vd), (add iPTR:$vj, GPR64Opnd:$vk)),
++             (VSTX LSX128B:$vd, PtrRC:$vj, GPR64Opnd:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vshuf_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk), (v16i8 LSX128B:$va)),
++             (VSHUF_B LSX128B:$vj, LSX128B:$vk, LSX128B:$va)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CEQ_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CEQ_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_COR_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_COR_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CUN_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CUN_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CUNE_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CUNE_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CUEQ_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CUEQ_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CNE_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CNE_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CLT_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CLT_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CULT_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CULT_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CLE_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CLE_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)),
++             (VFCMP_CULE_S LSX128W:$vj, LSX128W:$vk)>;
++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)),
++             (VFCMP_CULE_D LSX128D:$vj, LSX128D:$vk)>;
++
++def : LSXPat<(int_loongarch_lsx_vftintrz_w_s (v4f32 LSX128W:$vj)),
++             (VFTINTRZ_W_S LSX128W:$vj)>;
++def : LSXPat<(int_loongarch_lsx_vftintrz_l_d (v2f64 LSX128D:$vj)),
++             (VFTINTRZ_L_D LSX128D:$vj)>;
++
++
++def imm_mask  : ImmLeaf<i32, [{return isInt<32>(Imm) && Imm == -1;}]>;
++def imm_mask_64  : ImmLeaf<i64, [{return isInt<64>(Imm) && Imm == -1;}]>;
++
++
++def : LSXPat<(xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask)),
++             (NOR_V_H_PSEUDO  (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vj))>;
++
++def : LSXPat<(xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask)),
++             (NOR_V_W_PSEUDO  (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vj))>;
++
++def : LSXPat<(xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64)),
++             (NOR_V_D_PSEUDO  (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vj))>;
++
++
++def : LSXPat<(and
++                (v16i8 (xor (v16i8 LSX128B:$vj),(vsplati8 imm_mask))),
++                (v16i8 LSX128B:$vk)
++             ),
++             (VANDN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>;
++
++def : LSXPat<(and
++                (v8i16 (xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask))),
++                (v8i16 LSX128H:$vk)
++             ),
++             (VANDN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>;
++
++def : LSXPat<(and
++                (v4i32 (xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask))),
++                (v4i32 LSX128W:$vk)
++             ),
++             (VANDN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>;
++
++def : LSXPat<(and
++                (v2i64 (xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64))),
++                (v2i64 LSX128D:$vk)
++             ),
++             (VANDN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>;
++
++
++def : LSXPat<(or
++                (v16i8 LSX128B:$vj),
++                (v16i8 (xor (v16i8 LSX128B:$vk), (vsplati8 imm_mask)))
++             ),
++             (VORN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>;
++
++def : LSXPat<(or
++                (v8i16 LSX128H:$vj),
++                (v8i16 (xor (v8i16 LSX128H:$vk), (vsplati16 imm_mask)))
++             ),
++             (VORN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>;
++
++def : LSXPat<(or
++                (v4i32 LSX128W:$vj),
++                (v4i32 (xor (v4i32 LSX128W:$vk), (vsplati32 imm_mask)))
++             ),
++             (VORN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>;
++
++def : LSXPat<(or
++                (v2i64 LSX128D:$vj),
++                (v2i64 (xor (v2i64 LSX128D:$vk), (vsplati64 imm_mask_64)))
++             ),
++             (VORN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>;
++
++
++def : LSXPat<(add (v2i64 (abs LSX128D:$a)), (v2i64 (abs LSX128D:$b))),
++             (VADDA_D (v2i64 LSX128D:$a),(v2i64 LSX128D:$b))>;
++
++def : LSXPat<(add (v4i32 (abs LSX128W:$a)), (v4i32 (abs LSX128W:$b))),
++             (VADDA_W (v4i32 LSX128W:$a),(v4i32 LSX128W:$b))>;
++
++def : LSXPat<(add (v8i16 (abs LSX128H:$a)), (v8i16 (abs LSX128H:$b))),
++             (VADDA_H (v8i16 LSX128H:$a),(v8i16 LSX128H:$b))>;
++
++def : LSXPat<(add (v16i8 (abs LSX128B:$a)), (v16i8 (abs LSX128B:$b))),
++             (VADDA_B (v16i8 LSX128B:$a),(v16i8 LSX128B:$b))>;
++
++
++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk),
++                                  (vsplati8 imm_mask))),
++             (VBITCLR_B v16i8:$vj, v16i8:$vk)>;
++
++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk),
++                                  (vsplati16 imm_mask))),
++             (VBITCLR_H v8i16:$vj, v8i16:$vk)>;
++
++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk),
++                                  (vsplati32 imm_mask))),
++             (VBITCLR_W v4i32:$vj, v4i32:$vk)>;
++
++def : LSXPat<(and v2i64:$vj, (xor (shl vsplat_imm_eq_1, v2i64:$vk),
++                                  (vsplati64 imm_mask_64))),
++             (VBITCLR_D v2i64:$vj, v2i64:$vk)>;
+diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+new file mode 100644
+index 00000000..bf70b09d
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+@@ -0,0 +1,342 @@
++//===- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to MCInst ----------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains code to lower LoongArch MachineInstrs to their corresponding
++// MCInst records.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMCInstLower.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "LoongArchAsmPrinter.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/Support/ErrorHandling.h"
++#include <cassert>
++
++using namespace llvm;
++
++LoongArchMCInstLower::LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter)
++  : AsmPrinter(asmprinter) {}
++
++void LoongArchMCInstLower::Initialize(MCContext *C) {
++  Ctx = C;
++}
++
++MCOperand LoongArchMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
++                                              MachineOperandType MOTy,
++                                              unsigned Offset) const {
++  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
++  LoongArchMCExpr::LoongArchExprKind TargetKind = LoongArchMCExpr::MEK_None;
++  const MCSymbol *Symbol;
++
++  switch(MO.getTargetFlags()) {
++  default:
++    llvm_unreachable("Invalid target flag!");
++  case LoongArchII::MO_NO_FLAG:
++    break;
++  case LoongArchII::MO_GOT_HI:
++    TargetKind = LoongArchMCExpr::MEK_GOT_HI;
++    break;
++  case LoongArchII::MO_GOT_LO:
++    TargetKind = LoongArchMCExpr::MEK_GOT_LO;
++    break;
++  case LoongArchII::MO_GOT_RRHI:
++    TargetKind = LoongArchMCExpr::MEK_GOT_RRHI;
++    break;
++  case LoongArchII::MO_GOT_RRHIGHER:
++    TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHER;
++    break;
++  case LoongArchII::MO_GOT_RRHIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHEST;
++    break;
++  case LoongArchII::MO_GOT_RRLO:
++    TargetKind = LoongArchMCExpr::MEK_GOT_RRLO;
++    break;
++  case LoongArchII::MO_PCREL_HI:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_HI;
++    break;
++  case LoongArchII::MO_PCREL_LO:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_LO;
++    break;
++  case LoongArchII::MO_PCREL_RRHI:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_RRHI;
++    break;
++  case LoongArchII::MO_PCREL_RRHIGHER:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHER;
++    break;
++  case LoongArchII::MO_PCREL_RRHIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHEST;
++    break;
++  case LoongArchII::MO_PCREL_RRLO:
++    TargetKind = LoongArchMCExpr::MEK_PCREL_RRLO;
++    break;
++  case LoongArchII::MO_TLSIE_HI:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_HI;
++    break;
++  case LoongArchII::MO_TLSIE_LO:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_LO;
++    break;
++  case LoongArchII::MO_TLSIE_RRHI:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHI;
++    break;
++  case LoongArchII::MO_TLSIE_RRHIGHER:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHER;
++    break;
++  case LoongArchII::MO_TLSIE_RRHIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHEST;
++    break;
++  case LoongArchII::MO_TLSIE_RRLO:
++    TargetKind = LoongArchMCExpr::MEK_TLSIE_RRLO;
++    break;
++  case LoongArchII::MO_TLSLE_HI:
++    TargetKind = LoongArchMCExpr::MEK_TLSLE_HI;
++    break;
++  case LoongArchII::MO_TLSLE_HIGHER:
++    TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHER;
++    break;
++  case LoongArchII::MO_TLSLE_HIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHEST;
++    break;
++  case LoongArchII::MO_TLSLE_LO:
++    TargetKind = LoongArchMCExpr::MEK_TLSLE_LO;
++    break;
++  case LoongArchII::MO_TLSGD_HI:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_HI;
++    break;
++  case LoongArchII::MO_TLSGD_LO:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_LO;
++    break;
++  case LoongArchII::MO_TLSGD_RRHI:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHI;
++    break;
++  case LoongArchII::MO_TLSGD_RRHIGHER:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHER;
++    break;
++  case LoongArchII::MO_TLSGD_RRHIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHEST;
++    break;
++  case LoongArchII::MO_TLSGD_RRLO:
++    TargetKind = LoongArchMCExpr::MEK_TLSGD_RRLO;
++    break;
++  case LoongArchII::MO_ABS_HI:
++    TargetKind = LoongArchMCExpr::MEK_ABS_HI;
++    break;
++  case LoongArchII::MO_ABS_HIGHER:
++    TargetKind = LoongArchMCExpr::MEK_ABS_HIGHER;
++    break;
++  case LoongArchII::MO_ABS_HIGHEST:
++    TargetKind = LoongArchMCExpr::MEK_ABS_HIGHEST;
++    break;
++  case LoongArchII::MO_ABS_LO:
++    TargetKind = LoongArchMCExpr::MEK_ABS_LO;
++    break;
++  case LoongArchII::MO_CALL_HI:
++    TargetKind = LoongArchMCExpr::MEK_CALL_HI;
++    break;
++  case LoongArchII::MO_CALL_LO:
++    TargetKind = LoongArchMCExpr::MEK_CALL_LO;
++    break;
++  }
++
++  switch (MOTy) {
++  case MachineOperand::MO_MachineBasicBlock:
++    Symbol = MO.getMBB()->getSymbol();
++    break;
++
++  case MachineOperand::MO_GlobalAddress:
++    Symbol = AsmPrinter.getSymbol(MO.getGlobal());
++    Offset += MO.getOffset();
++    break;
++
++  case MachineOperand::MO_BlockAddress:
++    Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
++    Offset += MO.getOffset();
++    break;
++
++  case MachineOperand::MO_ExternalSymbol:
++    Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
++    Offset += MO.getOffset();
++    break;
++
++  case MachineOperand::MO_MCSymbol:
++    Symbol = MO.getMCSymbol();
++    Offset += MO.getOffset();
++    break;
++
++  case MachineOperand::MO_JumpTableIndex:
++    Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
++    break;
++
++  case MachineOperand::MO_ConstantPoolIndex:
++    Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
++    Offset += MO.getOffset();
++    break;
++
++  default:
++    llvm_unreachable("<unknown operand type>");
++  }
++
++  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
++
++  if (Offset) {
++    // Assume offset is never negative.
++    assert(Offset > 0);
++
++    Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx),
++                                   *Ctx);
++  }
++
++  if (TargetKind != LoongArchMCExpr::MEK_None)
++    Expr = LoongArchMCExpr::create(TargetKind, Expr, *Ctx);
++
++  return MCOperand::createExpr(Expr);
++}
++
++MCOperand LoongArchMCInstLower::LowerOperand(const MachineOperand &MO,
++                                        unsigned offset) const {
++  MachineOperandType MOTy = MO.getType();
++
++  switch (MOTy) {
++  default: llvm_unreachable("unknown operand type");
++  case MachineOperand::MO_Register:
++    // Ignore all implicit register operands.
++    if (MO.isImplicit()) break;
++    return MCOperand::createReg(MO.getReg());
++  case MachineOperand::MO_Immediate:
++    return MCOperand::createImm(MO.getImm() + offset);
++  case MachineOperand::MO_MachineBasicBlock:
++  case MachineOperand::MO_GlobalAddress:
++  case MachineOperand::MO_ExternalSymbol:
++  case MachineOperand::MO_MCSymbol:
++  case MachineOperand::MO_JumpTableIndex:
++  case MachineOperand::MO_ConstantPoolIndex:
++  case MachineOperand::MO_BlockAddress:
++    return LowerSymbolOperand(MO, MOTy, offset);
++  case MachineOperand::MO_RegisterMask:
++    break;
++ }
++
++  return MCOperand();
++}
++
++MCOperand LoongArchMCInstLower::createSub(MachineBasicBlock *BB1,
++                                     MachineBasicBlock *BB2,
++                                     LoongArchMCExpr::LoongArchExprKind Kind) const {
++  const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx);
++  const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx);
++  const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx);
++
++  return MCOperand::createExpr(LoongArchMCExpr::create(Kind, Sub, *Ctx));
++}
++
++void LoongArchMCInstLower::lowerLongBranchADDI(const MachineInstr *MI,
++                                           MCInst &OutMI, int Opcode) const {
++  OutMI.setOpcode(Opcode);
++
++  LoongArchMCExpr::LoongArchExprKind Kind;
++  unsigned TargetFlags = MI->getOperand(2).getTargetFlags();
++  switch (TargetFlags) {
++  case LoongArchII::MO_ABS_HIGHEST:
++    Kind = LoongArchMCExpr::MEK_ABS_HIGHEST;
++    break;
++  case LoongArchII::MO_ABS_HIGHER:
++    Kind = LoongArchMCExpr::MEK_ABS_HIGHER;
++    break;
++  case LoongArchII::MO_ABS_HI:
++    Kind = LoongArchMCExpr::MEK_ABS_HI;
++    break;
++  case LoongArchII::MO_ABS_LO:
++    Kind = LoongArchMCExpr::MEK_ABS_LO;
++    break;
++  default:
++    report_fatal_error("Unexpected flags for lowerLongBranchADDI");
++  }
++
++  // Lower two register operands.
++  for (unsigned I = 0, E = 2; I != E; ++I) {
++    const MachineOperand &MO = MI->getOperand(I);
++    OutMI.addOperand(LowerOperand(MO));
++  }
++
++  if (MI->getNumOperands() == 3) {
++    // Lower register operand.
++    const MCExpr *Expr =
++        MCSymbolRefExpr::create(MI->getOperand(2).getMBB()->getSymbol(), *Ctx);
++    const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx);
++    OutMI.addOperand(MCOperand::createExpr(LoongArchExpr));
++  } else if (MI->getNumOperands() == 4) {
++    // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt).
++    OutMI.addOperand(createSub(MI->getOperand(2).getMBB(),
++                               MI->getOperand(3).getMBB(), Kind));
++  }
++}
++
++void LoongArchMCInstLower::lowerLongBranchPCADDU12I(const MachineInstr *MI,
++                                           MCInst &OutMI, int Opcode) const {
++  OutMI.setOpcode(Opcode);
++
++  LoongArchMCExpr::LoongArchExprKind Kind;
++  unsigned TargetFlags = MI->getOperand(1).getTargetFlags();
++  switch (TargetFlags) {
++  case LoongArchII::MO_PCREL_HI:
++    Kind = LoongArchMCExpr::MEK_PCREL_HI;
++    break;
++  case LoongArchII::MO_PCREL_LO:
++    Kind = LoongArchMCExpr::MEK_PCREL_LO;
++    break;
++  default:
++    report_fatal_error("Unexpected flags for lowerLongBranchADDI");
++  }
++
++  // Lower one register operands.
++  const MachineOperand &MO = MI->getOperand(0);
++  OutMI.addOperand(LowerOperand(MO));
++
++  const MCExpr *Expr =
++    MCSymbolRefExpr::create(MI->getOperand(1).getMBB()->getSymbol(), *Ctx);
++  const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx);
++  OutMI.addOperand(MCOperand::createExpr(LoongArchExpr));
++}
++bool LoongArchMCInstLower::lowerLongBranch(const MachineInstr *MI,
++                                      MCInst &OutMI) const {
++  switch (MI->getOpcode()) {
++  default:
++    return false;
++  case LoongArch::LONG_BRANCH_ADDIW:
++  case LoongArch::LONG_BRANCH_ADDIW2Op:
++    lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_W);
++    return true;
++  case LoongArch::LONG_BRANCH_ADDID:
++  case LoongArch::LONG_BRANCH_ADDID2Op:
++    lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_D);
++    return true;
++  case LoongArch::LONG_BRANCH_PCADDU12I:
++    lowerLongBranchPCADDU12I(MI, OutMI, LoongArch::PCADDU12I);
++    return true;
++  }
++}
++
++void LoongArchMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
++  if (lowerLongBranch(MI, OutMI))
++    return;
++
++  OutMI.setOpcode(MI->getOpcode());
++
++  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
++    const MachineOperand &MO = MI->getOperand(i);
++    MCOperand MCOp = LowerOperand(MO);
++
++    if (MCOp.isValid())
++      OutMI.addOperand(MCOp);
++  }
++}
+diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.h b/lib/Target/LoongArch/LoongArchMCInstLower.h
+new file mode 100644
+index 00000000..6463a7b6
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchMCInstLower.h
+@@ -0,0 +1,55 @@
++//===- LoongArchMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H
++
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/Support/Compiler.h"
++
++namespace llvm {
++
++class MachineBasicBlock;
++class MachineInstr;
++class MCContext;
++class MCInst;
++class MCOperand;
++class LoongArchAsmPrinter;
++
++/// LoongArchMCInstLower - This class is used to lower an MachineInstr into an
++///                   MCInst.
++class LLVM_LIBRARY_VISIBILITY LoongArchMCInstLower {
++  using MachineOperandType = MachineOperand::MachineOperandType;
++
++  MCContext *Ctx;
++  LoongArchAsmPrinter &AsmPrinter;
++
++public:
++  LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter);
++
++  void Initialize(MCContext *C);
++  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
++  MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
++
++private:
++  MCOperand LowerSymbolOperand(const MachineOperand &MO,
++                               MachineOperandType MOTy, unsigned Offset) const;
++  MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2,
++                      LoongArchMCExpr::LoongArchExprKind Kind) const;
++  void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const;
++  void lowerLongBranchADDI(const MachineInstr *MI, MCInst &OutMI,
++                           int Opcode) const;
++  void lowerLongBranchPCADDU12I(const MachineInstr *MI, MCInst &OutMI,
++                            int Opcode) const;
++  bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const;
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H
+diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.cpp b/lib/Target/LoongArch/LoongArchMachineFunction.cpp
+new file mode 100644
+index 00000000..90baa8fd
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchMachineFunction.cpp
+@@ -0,0 +1,51 @@
++//===-- LoongArchMachineFunctionInfo.cpp - Private data used for LoongArch ----------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMachineFunction.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/PseudoSourceValue.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/Support/CommandLine.h"
++
++using namespace llvm;
++
++LoongArchFunctionInfo::~LoongArchFunctionInfo() = default;
++
++void LoongArchFunctionInfo::createEhDataRegsFI() {
++  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
++  for (int I = 0; I < 4; ++I) {
++    const TargetRegisterClass &RC =
++        static_cast<const LoongArchTargetMachine &>(MF.getTarget())
++                .getABI()
++                .IsLP64()
++            ? LoongArch::GPR64RegClass
++            : LoongArch::GPR32RegClass;
++
++    EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC),
++        TRI.getSpillAlign(RC), false);
++  }
++}
++
++bool LoongArchFunctionInfo::isEhDataRegFI(int FI) const {
++  return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
++                        || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
++}
++
++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const char *ES) {
++  return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES));
++}
++
++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const GlobalValue *GV) {
++  return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV));
++}
++
++void LoongArchFunctionInfo::anchor() {}
+diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.h b/lib/Target/LoongArch/LoongArchMachineFunction.h
+new file mode 100644
+index 00000000..b1c805c0
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchMachineFunction.h
+@@ -0,0 +1,98 @@
++//===- LoongArchMachineFunctionInfo.h - Private data used for LoongArch ---*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares the LoongArch specific subclass of MachineFunctionInfo.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H
++
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineMemOperand.h"
++#include <map>
++
++namespace llvm {
++
++/// LoongArchFunctionInfo - This class is derived from MachineFunction private
++/// LoongArch target-specific information for each MachineFunction.
++class LoongArchFunctionInfo : public MachineFunctionInfo {
++public:
++  LoongArchFunctionInfo(MachineFunction &MF) : MF(MF) {}
++
++  ~LoongArchFunctionInfo() override;
++
++  unsigned getSRetReturnReg() const { return SRetReturnReg; }
++  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
++
++  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
++  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
++
++  unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; }
++  void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; }
++
++  bool hasByvalArg() const { return HasByvalArg; }
++  void setFormalArgInfo(unsigned Size, bool HasByval) {
++    IncomingArgSize = Size;
++    HasByvalArg = HasByval;
++  }
++
++  unsigned getIncomingArgSize() const { return IncomingArgSize; }
++
++  bool callsEhReturn() const { return CallsEhReturn; }
++  void setCallsEhReturn() { CallsEhReturn = true; }
++
++  void createEhDataRegsFI();
++  int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
++  bool isEhDataRegFI(int FI) const;
++
++  /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue
++  /// object representing a GOT entry for an external function.
++  MachinePointerInfo callPtrInfo(const char *ES);
++
++  /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object
++  /// representing a GOT entry for a global function.
++  MachinePointerInfo callPtrInfo(const GlobalValue *GV);
++
++  void setSaveS2() { SaveS2 = true; }
++  bool hasSaveS2() const { return SaveS2; }
++
++private:
++  virtual void anchor();
++
++  MachineFunction& MF;
++
++  /// SRetReturnReg - Some subtargets require that sret lowering includes
++  /// returning the value of the returned struct in a register. This field
++  /// holds the virtual register into which the sret argument is passed.
++  unsigned SRetReturnReg = 0;
++
++  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
++  int VarArgsFrameIndex = 0;
++  int VarArgsSaveSize = 0;
++
++  /// True if function has a byval argument.
++  bool HasByvalArg;
++
++  /// Size of incoming argument area.
++  unsigned IncomingArgSize;
++
++  /// CallsEhReturn - Whether the function calls llvm.eh.return.
++  bool CallsEhReturn = false;
++
++  /// Frame objects for spilling eh data registers.
++  int EhDataRegFI[4];
++
++  // saveS2
++  bool SaveS2 = false;
++
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H
+diff --git a/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp
+new file mode 100644
+index 00000000..8dbf30f2
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp
+@@ -0,0 +1,53 @@
++//===----------------------------------------------------------------------===//
++// Instruction Selector Subtarget Control
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// This file defines a pass used to change the subtarget for the
++// LoongArch Instruction selector.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArch.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/CodeGen/TargetPassConfig.h"
++#include "llvm/CodeGen/StackProtector.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/raw_ostream.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-isel"
++
++namespace {
++  class LoongArchModuleDAGToDAGISel : public MachineFunctionPass {
++  public:
++    static char ID;
++
++    LoongArchModuleDAGToDAGISel() : MachineFunctionPass(ID) {}
++
++    // Pass Name
++    StringRef getPassName() const override {
++      return "LoongArch DAG->DAG Pattern Instruction Selection";
++    }
++
++    void getAnalysisUsage(AnalysisUsage &AU) const override {
++      AU.addRequired<TargetPassConfig>();
++      AU.addPreserved<StackProtector>();
++      MachineFunctionPass::getAnalysisUsage(AU);
++    }
++
++    bool runOnMachineFunction(MachineFunction &MF) override;
++  };
++
++  char LoongArchModuleDAGToDAGISel::ID = 0;
++}
++
++bool LoongArchModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
++  LLVM_DEBUG(errs() << "In LoongArchModuleDAGToDAGISel::runMachineFunction\n");
++  return false;
++}
++
++llvm::FunctionPass *llvm::createLoongArchModuleISelDagPass() {
++  return new LoongArchModuleDAGToDAGISel();
++}
+diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+new file mode 100644
+index 00000000..af5362c3
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+@@ -0,0 +1,355 @@
++//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetRegisterInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchRegisterInfo.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "LoongArch.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/ADT/BitVector.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/TargetFrameLowering.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/Function.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-reg-info"
++
++#define GET_REGINFO_TARGET_DESC
++#include "LoongArchGenRegisterInfo.inc"
++
++LoongArchRegisterInfo::LoongArchRegisterInfo() : LoongArchGenRegisterInfo(LoongArch::RA) {}
++
++unsigned LoongArchRegisterInfo::getPICCallReg() { return LoongArch::T8; }
++
++const TargetRegisterClass *
++LoongArchRegisterInfo::getPointerRegClass(const MachineFunction &MF,
++                                     unsigned Kind) const {
++  LoongArchABIInfo ABI = MF.getSubtarget<LoongArchSubtarget>().getABI();
++  LoongArchPtrClass PtrClassKind = static_cast<LoongArchPtrClass>(Kind);
++
++  switch (PtrClassKind) {
++  case LoongArchPtrClass::Default:
++    return ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass;
++  case LoongArchPtrClass::StackPointer:
++    return ABI.ArePtrs64bit() ? &LoongArch::SP64RegClass : &LoongArch::SP32RegClass;
++  }
++
++  llvm_unreachable("Unknown pointer kind");
++}
++
++unsigned
++LoongArchRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
++                                      MachineFunction &MF) const {
++  switch (RC->getID()) {
++  default:
++    return 0;
++  case LoongArch::GPR32RegClassID:
++  case LoongArch::GPR64RegClassID:
++  {
++    const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
++    return 28 - TFI->hasFP(MF);
++  }
++  case LoongArch::FGR32RegClassID:
++    return 32;
++  case LoongArch::FGR64RegClassID:
++    return 32;
++  }
++}
++
++//===----------------------------------------------------------------------===//
++// Callee Saved Registers methods
++//===----------------------------------------------------------------------===//
++
++/// LoongArch Callee Saved Registers
++const MCPhysReg *
++LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
++  const LoongArchSubtarget &Subtarget = MF->getSubtarget<LoongArchSubtarget>();
++
++  if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD()))
++    return CSR_SingleFloatOnly_SaveList;
++
++  if (Subtarget.isABI_LP64())
++    return CSR_LP64_SaveList;
++
++  return CSR_ILP32_SaveList;
++}
++
++const uint32_t *
++LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
++                                       CallingConv::ID) const {
++  const LoongArchSubtarget &Subtarget = MF.getSubtarget<LoongArchSubtarget>();
++
++  if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD()))
++    return CSR_SingleFloatOnly_RegMask;
++
++  if (Subtarget.isABI_LP64())
++    return CSR_LP64_RegMask;
++
++  return CSR_ILP32_RegMask;
++}
++
++BitVector LoongArchRegisterInfo::
++getReservedRegs(const MachineFunction &MF) const {
++  static const MCPhysReg ReservedGPR32[] = {
++    LoongArch::ZERO, LoongArch::SP, LoongArch::TP, LoongArch::T9
++  };
++
++  static const MCPhysReg ReservedGPR64[] = {
++    LoongArch::ZERO_64, LoongArch::SP_64, LoongArch::TP_64, LoongArch::T9_64
++  };
++
++  BitVector Reserved(getNumRegs());
++  const LoongArchSubtarget &Subtarget = MF.getSubtarget<LoongArchSubtarget>();
++
++  for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
++    Reserved.set(ReservedGPR32[I]);
++
++  for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
++    Reserved.set(ReservedGPR64[I]);
++
++  // Reserve FP if this function should have a dedicated frame pointer register.
++  if (Subtarget.getFrameLowering()->hasFP(MF)) {
++    Reserved.set(LoongArch::FP);
++    Reserved.set(LoongArch::FP_64);
++
++    // Reserve the base register if we need to both realign the stack and
++    // allocate variable-sized objects at runtime. This should test the
++    // same conditions as LoongArchFrameLowering::hasBP().
++    if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) {
++      Reserved.set(LoongArch::S7);
++      Reserved.set(LoongArch::S7_64);
++    }
++  }
++
++  return Reserved;
++}
++
++bool
++LoongArchRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
++  return true;
++}
++
++bool LoongArchRegisterInfo::
++requiresFrameIndexScavenging(const MachineFunction &MF) const {
++  return true;
++}
++
++bool
++LoongArchRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
++  return true;
++}
++
++/// Get the size of the offset supported by the given load/store/inline asm.
++/// The result includes the effects of any scale factors applied to the
++/// instruction immediate.
++static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode,
++                                                    MachineOperand MO) {
++  switch (Opcode) {
++  case LoongArch::LDPTR_W:
++  case LoongArch::LDPTR_W32:
++  case LoongArch::LDPTR_D:
++  case LoongArch::STPTR_W:
++  case LoongArch::STPTR_W32:
++  case LoongArch::STPTR_D:
++  case LoongArch::LL_W:
++  case LoongArch::LL_D:
++  case LoongArch::SC_W:
++  case LoongArch::SC_D:
++    return 14 + 2 /* scale factor */;
++  case LoongArch::INLINEASM: {
++    unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm());
++    switch (ConstraintID) {
++    case InlineAsm::Constraint_ZC: {
++      return 14 + 2 /* scale factor */;
++    }
++    default:
++      return 12;
++    }
++  }
++  default:
++    return 12;
++  }
++}
++
++/// Get the scale factor applied to the immediate in the given load/store.
++static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) {
++  switch (Opcode) {
++  case LoongArch::LDPTR_W:
++  case LoongArch::LDPTR_W32:
++  case LoongArch::LDPTR_D:
++  case LoongArch::STPTR_W:
++  case LoongArch::STPTR_W32:
++  case LoongArch::STPTR_D:
++  case LoongArch::LL_W:
++  case LoongArch::LL_D:
++  case LoongArch::SC_W:
++  case LoongArch::SC_D:
++    return 4;
++  default:
++    return 1;
++  }
++}
++
++// FrameIndex represent objects inside a abstract stack.
++// We must replace FrameIndex with an stack/frame pointer
++// direct reference.
++void LoongArchRegisterInfo::
++eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
++                    unsigned FIOperandNum, RegScavenger *RS) const {
++  MachineInstr &MI = *II;
++  MachineFunction &MF = *MI.getParent()->getParent();
++  const LoongArchFrameLowering *TFI = getFrameLowering(MF);
++
++  LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
++             errs() << "<--------->\n"
++                    << MI);
++
++  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
++  uint64_t stackSize = MF.getFrameInfo().getStackSize();
++  int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
++
++  LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
++                    << "spOffset   : " << spOffset << "\n"
++                    << "stackSize  : " << stackSize << "\n"
++                    << "SPAdj      : " << SPAdj << "\n"
++                    << "alignment  : "
++                    << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex))
++                    << "\n");
++
++  LoongArchABIInfo ABI =
++      static_cast<const LoongArchTargetMachine &>(MF.getTarget()).getABI();
++
++  // Everything else is referenced relative to whatever register
++  // getFrameIndexReference() returns.
++  Register FrameReg;
++  StackOffset Offset =
++      TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) +
++      StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
++
++  LLVM_DEBUG(errs() << "Location   : "
++                    << "FrameReg<" << FrameReg << "> + " << Offset.getFixed()
++                    << "\n<--------->\n");
++
++  MachineBasicBlock &MBB = *MI.getParent();
++  DebugLoc DL = II->getDebugLoc();
++  bool IsKill = false;
++
++  if (!MI.isDebugValue()) {
++    // Make sure Offset fits within the field available.
++    // For ldptr/stptr/ll/sc instructions, this is a 14-bit signed immediate
++    // (scaled by 2), otherwise it is a 12-bit signed immediate.
++    unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(
++        MI.getOpcode(), MI.getOperand(FIOperandNum - 1));
++    const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode()));
++
++    if (OffsetBitSize == 16 && isInt<12>(Offset.getFixed()) &&
++        !isAligned(OffsetAlign, Offset.getFixed())) {
++      // If we have an offset that needs to fit into a signed n-bit immediate
++      // (where n == 16) and doesn't aligned and does fit into 12-bits
++      // then use an ADDI
++      const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit()
++                                             ? &LoongArch::GPR64RegClass
++                                             : &LoongArch::GPR32RegClass;
++      MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
++      unsigned Reg = RegInfo.createVirtualRegister(PtrRC);
++      const LoongArchInstrInfo &TII = *static_cast<const LoongArchInstrInfo *>(
++          MBB.getParent()->getSubtarget().getInstrInfo());
++      BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiOp()), Reg)
++          .addReg(FrameReg)
++          .addImm(Offset.getFixed());
++
++      FrameReg = Reg;
++      Offset = StackOffset::getFixed(0);
++      IsKill = true;
++    } else if (!isInt<12>(Offset.getFixed())) {
++      // Otherwise split the offset into several pieces and add it in multiple
++      // instructions.
++      const LoongArchInstrInfo &TII = *static_cast<const LoongArchInstrInfo *>(
++          MBB.getParent()->getSubtarget().getInstrInfo());
++      unsigned Reg = TII.loadImmediate(Offset.getFixed(), MBB, II, DL);
++      BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddOp()), Reg)
++          .addReg(FrameReg)
++          .addReg(Reg, RegState::Kill);
++
++      FrameReg = Reg;
++      Offset = StackOffset::getFixed(0);
++      IsKill = true;
++    }
++  }
++
++  MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, IsKill);
++  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
++}
++
++Register LoongArchRegisterInfo::
++getFrameRegister(const MachineFunction &MF) const {
++  const LoongArchSubtarget &Subtarget = MF.getSubtarget<LoongArchSubtarget>();
++  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
++  bool IsLP64 = static_cast<const LoongArchTargetMachine &>(MF.getTarget())
++                    .getABI()
++                    .IsLP64();
++
++  return TFI->hasFP(MF) ? (IsLP64 ? LoongArch::FP_64 : LoongArch::FP)
++                        : (IsLP64 ? LoongArch::SP_64 : LoongArch::SP);
++}
++
++const TargetRegisterClass *
++LoongArchRegisterInfo::intRegClass(unsigned Size) const {
++  if (Size == 4)
++    return &LoongArch::GPR32RegClass;
++
++  assert(Size == 8);
++  return &LoongArch::GPR64RegClass;
++}
++
++bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const {
++  // Avoid realigning functions that explicitly do not want to be realigned.
++  // Normally, we should report an error when a function should be dynamically
++  // realigned but also has the attribute no-realign-stack. Unfortunately,
++  // with this attribute, MachineFrameInfo clamps each new object's alignment
++  // to that of the stack's alignment as specified by the ABI. As a result,
++  // the information of whether we have objects with larger alignment
++  // requirement than the stack's alignment is already lost at this point.
++  if (!TargetRegisterInfo::canRealignStack(MF))
++    return false;
++
++  const LoongArchSubtarget &Subtarget = MF.getSubtarget<LoongArchSubtarget>();
++  unsigned FP = Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP;
++  unsigned BP = Subtarget.is64Bit() ? LoongArch::S7_64 : LoongArch::S7;
++
++  // We can't perform dynamic stack realignment if we can't reserve the
++  // frame pointer register.
++  if (!MF.getRegInfo().canReserveReg(FP))
++    return false;
++
++  // We can realign the stack if we know the maximum call frame size and we
++  // don't have variable sized objects.
++  if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF))
++    return true;
++
++  // We have to reserve the base pointer register in the presence of variable
++  // sized objects.
++  return MF.getRegInfo().canReserveReg(BP);
++}
+diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.h b/lib/Target/LoongArch/LoongArchRegisterInfo.h
+new file mode 100644
+index 00000000..dd3be916
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.h
+@@ -0,0 +1,80 @@
++//===- LoongArchRegisterInfo.h - LoongArch Register Information Impl ------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the LoongArch implementation of the TargetRegisterInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
++
++#include "LoongArch.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include <cstdint>
++
++#define GET_REGINFO_HEADER
++#include "LoongArchGenRegisterInfo.inc"
++
++namespace llvm {
++
++class TargetRegisterClass;
++
++class LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
++public:
++  enum class LoongArchPtrClass {
++    /// The default register class for integer values.
++    Default = 0,
++    /// The stack pointer only.
++    StackPointer = 1,
++  };
++
++  LoongArchRegisterInfo();
++
++  /// Get PIC indirect call register
++  static unsigned getPICCallReg();
++
++  /// Code Generation virtual methods...
++  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
++                                                unsigned Kind) const override;
++
++  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
++                               MachineFunction &MF) const override;
++  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
++  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
++                                       CallingConv::ID) const override;
++  BitVector getReservedRegs(const MachineFunction &MF) const override;
++
++  bool requiresRegisterScavenging(const MachineFunction &MF) const override;
++
++  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
++
++  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
++
++  /// Stack Frame Processing Methods
++  void eliminateFrameIndex(MachineBasicBlock::iterator II,
++                           int SPAdj, unsigned FIOperandNum,
++                           RegScavenger *RS = nullptr) const override;
++
++  // Stack realignment queries.
++  bool canRealignStack(const MachineFunction &MF) const override;
++
++  /// Debug information queries.
++  Register getFrameRegister(const MachineFunction &MF) const override;
++
++  /// Return GPR register class.
++  const TargetRegisterClass *intRegClass(unsigned Size) const;
++
++private:
++  void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
++                   int FrameIndex, uint64_t StackSize,
++                   int SPAdj, int64_t SPOffset) const;
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
+diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.td b/lib/Target/LoongArch/LoongArchRegisterInfo.td
+new file mode 100644
+index 00000000..96569e07
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.td
+@@ -0,0 +1,373 @@
++//===-- LoongArchRegisterInfo.td - LoongArch Register defs -----------*- tablegen -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++//  Declarations that describe the LoongArch register file
++//===----------------------------------------------------------------------===//
++let Namespace = "LoongArch" in {
++def sub_32     : SubRegIndex<32>;
++def sub_64     : SubRegIndex<64>;
++def sub_128    : SubRegIndex<128>;
++def sub_fcsr1  : SubRegIndex<5>;
++def sub_fcsr2  : SubRegIndex<13, 16>;
++def sub_fcsr3  : SubRegIndex<2, 8>;
++def sub_lo     : SubRegIndex<32>;
++def sub_hi     : SubRegIndex<32, 32>;
++def PC : Register<"pc">;
++}
++
++class Unallocatable {
++  bit isAllocatable = 0;
++}
++
++/// We have banks of registers each.
++class LoongArchReg<bits<16> Enc, string n> : Register<n> {
++  let HWEncoding = Enc;
++  let Namespace = "LoongArch";
++}
++
++class LoongArchRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
++  : RegisterWithSubRegs<n, subregs> {
++  let HWEncoding = Enc;
++  let Namespace = "LoongArch";
++}
++
++/// LoongArch 32-bit CPU Registers.
++class LoongArch32GPR<bits<16> Enc, string n> : LoongArchReg<Enc, n>;
++
++/// LoongArch 64-bit CPU Registers.
++class LoongArch64GPR<bits<16> Enc, string n, list<Register> subregs>
++  : LoongArchRegWithSubRegs<Enc, n, subregs> {
++  let SubRegIndices = [sub_32];
++}
++
++/// LoongArch 64-bit Floating-point Registers
++class FGR32<bits<16> Enc, string n> : LoongArchReg<Enc, n>;
++class FGR64<bits<16> Enc, string n, list<Register> subregs>
++  : LoongArchRegWithSubRegs<Enc, n, subregs> {
++  let SubRegIndices = [sub_lo];
++}
++
++// LoongArch 128-bit (aliased) LSX Registers
++class LSX128<bits<16> Enc, string n, list<Register> subregs>
++    : LoongArchRegWithSubRegs<Enc, n, subregs> {
++  let SubRegIndices = [sub_64];
++}
++
++// LoongArch 256-bit (aliased) LASX Registers
++class LASX256<bits<16> Enc, string n, list<Register> subregs>
++    : LoongArchRegWithSubRegs<Enc, n, subregs> {
++  let SubRegIndices = [sub_128];
++}
++
++//===----------------------------------------------------------------------===//
++//  Registers
++//===----------------------------------------------------------------------===//
++
++/// General Purpose 32-bit Registers
++def ZERO : LoongArch32GPR<0, "zero">,
++           DwarfRegNum<[0]>;
++def RA   : LoongArch32GPR<1,  "ra">,   DwarfRegNum<[1]>;
++def TP   : LoongArch32GPR<2,  "tp">,   DwarfRegNum<[2]>;
++def SP   : LoongArch32GPR<3,  "sp">,   DwarfRegNum<[3]>;
++def A0   : LoongArch32GPR<4,  "r4">,   DwarfRegNum<[4]>;
++def A1   : LoongArch32GPR<5,  "r5">,   DwarfRegNum<[5]>;
++def A2   : LoongArch32GPR<6,  "r6">,   DwarfRegNum<[6]>;
++def A3   : LoongArch32GPR<7,  "r7">,   DwarfRegNum<[7]>;
++def A4   : LoongArch32GPR<8,  "r8">,   DwarfRegNum<[8]>;
++def A5   : LoongArch32GPR<9,  "r9">,   DwarfRegNum<[9]>;
++def A6   : LoongArch32GPR<10, "r10">,  DwarfRegNum<[10]>;
++def A7   : LoongArch32GPR<11, "r11">,  DwarfRegNum<[11]>;
++def T0   : LoongArch32GPR<12, "r12">,  DwarfRegNum<[12]>;
++def T1   : LoongArch32GPR<13, "r13">,  DwarfRegNum<[13]>;
++def T2   : LoongArch32GPR<14, "r14">,  DwarfRegNum<[14]>;
++def T3   : LoongArch32GPR<15, "r15">,  DwarfRegNum<[15]>;
++def T4   : LoongArch32GPR<16, "r16">,  DwarfRegNum<[16]>;
++def T5   : LoongArch32GPR<17, "r17">,  DwarfRegNum<[17]>;
++def T6   : LoongArch32GPR<18, "r18">,  DwarfRegNum<[18]>;
++def T7   : LoongArch32GPR<19, "r19">,  DwarfRegNum<[19]>;
++def T8   : LoongArch32GPR<20, "r20">,  DwarfRegNum<[20]>;
++def T9   : LoongArch32GPR<21, "r21">,  DwarfRegNum<[21]>;
++def FP   : LoongArch32GPR<22, "r22">,  DwarfRegNum<[22]>;
++def S0   : LoongArch32GPR<23, "r23">,  DwarfRegNum<[23]>;
++def S1   : LoongArch32GPR<24, "r24">,  DwarfRegNum<[24]>;
++def S2   : LoongArch32GPR<25, "r25">,  DwarfRegNum<[25]>;
++def S3   : LoongArch32GPR<26, "r26">,  DwarfRegNum<[26]>;
++def S4   : LoongArch32GPR<27, "r27">,  DwarfRegNum<[27]>;
++def S5   : LoongArch32GPR<28, "r28">,  DwarfRegNum<[28]>;
++def S6   : LoongArch32GPR<29, "r29">,  DwarfRegNum<[29]>;
++def S7   : LoongArch32GPR<30, "r30">,  DwarfRegNum<[30]>;
++def S8   : LoongArch32GPR<31, "r31">,  DwarfRegNum<[31]>;
++
++let SubRegIndices = [sub_32] in {
++def V0   : LoongArchRegWithSubRegs<4, "r4",  [A0]>, DwarfRegNum<[4]>;
++def V1   : LoongArchRegWithSubRegs<5, "r5",  [A1]>, DwarfRegNum<[5]>;
++}
++
++/// General Purpose 64-bit Registers
++def ZERO_64 : LoongArch64GPR<0, "zero", [ZERO]>, DwarfRegNum<[0]>;
++def RA_64   : LoongArch64GPR<1, "ra",    [RA]>, DwarfRegNum<[1]>;
++def TP_64   : LoongArch64GPR<2, "tp",    [TP]>, DwarfRegNum<[2]>;
++def SP_64   : LoongArch64GPR<3, "sp",    [SP]>, DwarfRegNum<[3]>;
++def A0_64   : LoongArch64GPR<4, "r4",    [A0]>, DwarfRegNum<[4]>;
++def A1_64   : LoongArch64GPR<5, "r5",    [A1]>, DwarfRegNum<[5]>;
++def A2_64   : LoongArch64GPR<6, "r6",    [A2]>, DwarfRegNum<[6]>;
++def A3_64   : LoongArch64GPR<7, "r7",    [A3]>, DwarfRegNum<[7]>;
++def A4_64   : LoongArch64GPR<8, "r8",    [A4]>, DwarfRegNum<[8]>;
++def A5_64   : LoongArch64GPR<9, "r9",    [A5]>, DwarfRegNum<[9]>;
++def A6_64   : LoongArch64GPR<10, "r10",  [A6]>, DwarfRegNum<[10]>;
++def A7_64   : LoongArch64GPR<11, "r11",  [A7]>, DwarfRegNum<[11]>;
++def T0_64   : LoongArch64GPR<12, "r12",  [T0]>, DwarfRegNum<[12]>;
++def T1_64   : LoongArch64GPR<13, "r13",  [T1]>, DwarfRegNum<[13]>;
++def T2_64   : LoongArch64GPR<14, "r14",  [T2]>, DwarfRegNum<[14]>;
++def T3_64   : LoongArch64GPR<15, "r15",  [T3]>, DwarfRegNum<[15]>;
++def T4_64   : LoongArch64GPR<16, "r16",  [T4]>, DwarfRegNum<[16]>;
++def T5_64   : LoongArch64GPR<17, "r17",  [T5]>, DwarfRegNum<[17]>;
++def T6_64   : LoongArch64GPR<18, "r18",  [T6]>, DwarfRegNum<[18]>;
++def T7_64   : LoongArch64GPR<19, "r19",  [T7]>, DwarfRegNum<[19]>;
++def T8_64   : LoongArch64GPR<20, "r20",  [T8]>, DwarfRegNum<[20]>;
++def T9_64   : LoongArch64GPR<21, "r21",  [T9]>, DwarfRegNum<[21]>;
++def FP_64   : LoongArch64GPR<22, "r22",  [FP]>, DwarfRegNum<[22]>;
++def S0_64   : LoongArch64GPR<23, "r23",  [S0]>, DwarfRegNum<[23]>;
++def S1_64   : LoongArch64GPR<24, "r24",  [S1]>, DwarfRegNum<[24]>;
++def S2_64   : LoongArch64GPR<25, "r25",  [S2]>, DwarfRegNum<[25]>;
++def S3_64   : LoongArch64GPR<26, "r26",  [S3]>, DwarfRegNum<[26]>;
++def S4_64   : LoongArch64GPR<27, "r27",  [S4]>, DwarfRegNum<[27]>;
++def S5_64   : LoongArch64GPR<28, "r28",  [S5]>, DwarfRegNum<[28]>;
++def S6_64   : LoongArch64GPR<29, "r29",  [S6]>, DwarfRegNum<[29]>;
++def S7_64   : LoongArch64GPR<30, "r30",  [S7]>, DwarfRegNum<[30]>;
++def S8_64   : LoongArch64GPR<31, "r31",  [S8]>, DwarfRegNum<[31]>;
++
++let SubRegIndices = [sub_64] in {
++def V0_64   : LoongArch64GPR<4, "r4",  [A0_64]>, DwarfRegNum<[4]>;
++def V1_64   : LoongArch64GPR<5, "r5",  [A1_64]>, DwarfRegNum<[5]>;
++}
++
++/// FP registers
++foreach I = 0-31 in
++def F#I : FGR32<I, "f"#I>, DwarfRegNum<[!add(I, 32)]>;
++
++foreach I = 0-31 in
++def F#I#_64 : FGR64<I, "f"#I, [!cast<FGR32>("F"#I)]>, DwarfRegNum<[!add(I, 32)]>;
++
++/// FP Condition Flag 0~7
++foreach I = 0-7 in
++def FCC#I : LoongArchReg<I, "fcc"#I>;
++
++/// FP Control and Status Registers, FCSR 1~3
++foreach I = 1-3 in
++def FCSR#I : LoongArchReg<I, "fcsr"#I>;
++
++class FCSRReg<bits<16> Enc, string n, list<Register> subregs> :
++    RegisterWithSubRegs<n, subregs> {
++//  field bits<2> chan_encoding = 0;
++  let Namespace = "LoongArch";
++  let SubRegIndices = [sub_fcsr1, sub_fcsr2, sub_fcsr3];
++//  let HWEncoding{8-0} = encoding{8-0};
++//  let HWEncoding{10-9} = chan_encoding;
++}
++
++def FCSR0 : FCSRReg<0, "fcsr0", [FCSR1, FCSR2, FCSR3]>;
++
++/// PC register
++//let NameSpace = "LoongArch" in
++//def PC : Register<"pc">;
++
++//===----------------------------------------------------------------------===//
++// Register Classes
++//===----------------------------------------------------------------------===//
++
++def GPR32 : RegisterClass<"LoongArch", [i32], 32, (add
++  // Reserved
++  ZERO,
++  // Return Values and Arguments
++  A0, A1, A2, A3, A4, A5, A6, A7,
++  // Not preserved across procedure calls
++  T0, T1, T2, T3, T4, T5, T6, T7, T8,
++  // Callee save
++  S0, S1, S2, S3, S4, S5, S6, S7, S8,
++  // Reserved
++  RA, TP, SP,
++  // Reserved
++  T9, FP)>;
++
++def GPR64 : RegisterClass<"LoongArch", [i64], 64, (add
++  // Reserved
++  ZERO_64,
++  // Return Values and Arguments
++  A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64,
++  // Not preserved across procedure calls
++  T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64,
++  // Callee save
++  S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, S8_64,
++  // Reserved
++  RA_64, TP_64, SP_64,
++  // Reserved
++  T9_64, FP_64)>;
++
++def GPRTC64 : RegisterClass<"LoongArch", [i64], 64, (add
++  // Return Values and Arguments
++  A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64,
++  // Not preserved across procedure calls
++  T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64)>;
++
++/// FP Registers.
++def FGR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>;
++def FGR32 : RegisterClass<"LoongArch", [f32], 64, (sequence "F%u", 0, 31)>;
++
++/// FP condition Flag registers.
++def FCFR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCC%u", 0, 7)>,
++           Unallocatable;
++
++def SP32 : RegisterClass<"LoongArch", [i32], 32, (add SP)>, Unallocatable;
++def SP64 : RegisterClass<"LoongArch", [i64], 64, (add SP_64)>, Unallocatable;
++def TP32 : RegisterClass<"LoongArch", [i32], 32, (add TP)>, Unallocatable;
++def TP64 : RegisterClass<"LoongArch", [i64], 64, (add TP_64)>, Unallocatable;
++
++/// FP control and Status registers.
++def FCSR : RegisterClass<"LoongArch", [i32], 4, (sequence "FCSR%u", 0, 3)>,
++           Unallocatable;
++
++//LSX
++foreach I = 0-31 in
++def VR#I : LSX128<I, "vr"#I, [!cast<FGR64>("F"#I#"_64")]>,
++           DwarfRegNum<[!add(I, 32)]>;
++
++//LASX
++foreach I = 0-31 in
++def XR#I : LASX256<I, "xr"#I, [!cast<LSX128>("VR"#I)]>,
++           DwarfRegNum<[!add(I, 32)]>;
++
++def LSX128B: RegisterClass<"LoongArch", [v16i8], 128,
++                                        (sequence "VR%u", 0, 31)>;
++
++def LSX128H: RegisterClass<"LoongArch", [v8i16], 128,
++                                        (sequence "VR%u", 0, 31)>;
++
++def LSX128W: RegisterClass<"LoongArch", [v4i32, v4f32], 128,
++                                        (sequence "VR%u", 0, 31)>;
++
++def LSX128D: RegisterClass<"LoongArch", [v2i64, v2f64], 128,
++                                        (sequence "VR%u", 0, 31)>;
++
++def LASX256B: RegisterClass<"LoongArch", [v32i8], 256,
++                            (sequence "XR%u", 0, 31)>;
++def LASX256H: RegisterClass<"LoongArch", [v16i16], 256,
++                            (sequence "XR%u", 0, 31)>;
++def LASX256W: RegisterClass<"LoongArch", [v8i32, v8f32], 256,
++                            (sequence "XR%u", 0, 31)>;
++def LASX256D: RegisterClass<"LoongArch", [v4i64, v4f64], 256,
++                            (sequence "XR%u", 0, 31)>;
++
++//===----------------------------------------------------------------------===//
++// Register Operands.
++//===----------------------------------------------------------------------===//
++
++class LoongArchAsmRegOperand : AsmOperandClass {
++  let ParserMethod = "parseAnyRegister";
++}
++
++def GPR32AsmOperand : LoongArchAsmRegOperand {
++  let Name = "GPR32AsmReg";
++  let PredicateMethod = "isGPRAsmReg";
++}
++
++def GPR64AsmOperand : LoongArchAsmRegOperand {
++  let Name = "GPR64AsmReg";
++  let PredicateMethod = "isGPRAsmReg";
++}
++
++def FGR32AsmOperand : LoongArchAsmRegOperand {
++  let Name = "FGR32AsmReg";
++  let PredicateMethod = "isFGRAsmReg";
++}
++
++def FGR64AsmOperand : LoongArchAsmRegOperand {
++  let Name = "FGR64AsmReg";
++  let PredicateMethod = "isFGRAsmReg";
++}
++
++def FCSRAsmOperand : LoongArchAsmRegOperand {
++  let Name = "FCSRAsmReg";
++}
++
++def FCFRAsmOperand : LoongArchAsmRegOperand {
++  let Name = "FCFRAsmReg";
++}
++
++//LSX
++def LSX128AsmOperand : LoongArchAsmRegOperand {
++  let Name = "LSX128AsmReg";
++}
++
++//LASX
++def LASX256AsmOperand : LoongArchAsmRegOperand {
++  let Name = "LASX256AsmReg";
++}
++
++def GPR32Opnd : RegisterOperand<GPR32> {
++  let ParserMatchClass = GPR32AsmOperand;
++}
++
++def GPR64Opnd : RegisterOperand<GPR64> {
++  let ParserMatchClass = GPR64AsmOperand;
++}
++
++def GPRTC64Opnd : RegisterOperand<GPRTC64> {
++  let ParserMatchClass = GPR64AsmOperand;
++}
++
++def FGR32Opnd : RegisterOperand<FGR32> {
++  let ParserMatchClass = FGR32AsmOperand;
++}
++
++def FGR64Opnd : RegisterOperand<FGR64> {
++  let ParserMatchClass = FGR64AsmOperand;
++}
++
++def FCSROpnd : RegisterOperand<FCSR> {
++  let ParserMatchClass = FCSRAsmOperand;
++}
++
++def FCFROpnd : RegisterOperand<FCFR> {
++  let ParserMatchClass = FCFRAsmOperand;
++}
++
++//LSX
++def LSX128BOpnd : RegisterOperand<LSX128B> {
++  let ParserMatchClass = LSX128AsmOperand;
++}
++
++def LSX128HOpnd : RegisterOperand<LSX128H> {
++  let ParserMatchClass = LSX128AsmOperand;
++}
++
++def LSX128WOpnd : RegisterOperand<LSX128W> {
++  let ParserMatchClass = LSX128AsmOperand;
++}
++
++def LSX128DOpnd : RegisterOperand<LSX128D> {
++  let ParserMatchClass = LSX128AsmOperand;
++}
++
++//LASX
++def LASX256BOpnd : RegisterOperand<LASX256B> {
++  let ParserMatchClass = LASX256AsmOperand;
++}
++
++def LASX256HOpnd : RegisterOperand<LASX256H> {
++  let ParserMatchClass = LASX256AsmOperand;
++}
++
++def LASX256WOpnd : RegisterOperand<LASX256W> {
++  let ParserMatchClass = LASX256AsmOperand;
++}
++
++def LASX256DOpnd : RegisterOperand<LASX256D> {
++  let ParserMatchClass = LASX256AsmOperand;
++}
+diff --git a/lib/Target/LoongArch/LoongArchSubtarget.cpp b/lib/Target/LoongArch/LoongArchSubtarget.cpp
+new file mode 100644
+index 00000000..ebc7a514
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchSubtarget.cpp
+@@ -0,0 +1,112 @@
++//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information --------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the LoongArch specific subclass of TargetSubtargetInfo.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchSubtarget.h"
++#include "LoongArch.h"
++#include "LoongArchMachineFunction.h"
++#include "LoongArchRegisterInfo.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/IR/Attributes.h"
++#include "llvm/IR/Function.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/raw_ostream.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch-subtarget"
++
++#define GET_SUBTARGETINFO_TARGET_DESC
++#define GET_SUBTARGETINFO_CTOR
++#include "LoongArchGenSubtargetInfo.inc"
++
++void LoongArchSubtarget::anchor() {}
++
++LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
++                                       StringRef FS,
++                                       const LoongArchTargetMachine &TM,
++                                       MaybeAlign StackAlignOverride)
++    : LoongArchGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), HasLA64(false),
++      HasBasicF(false), HasBasicD(false), HasLSX(false), HasLASX(false),
++      UnalignedAccess(false), StackAlignOverride(StackAlignOverride), TM(TM),
++      TargetTriple(TT), TSInfo(),
++      InstrInfo(initializeSubtargetDependencies(CPU, FS, TM)),
++      FrameLowering(*this), TLInfo(TM, *this) {
++
++  // Check if Architecture and ABI are compatible.
++  assert(((!is64Bit() && isABI_ILP32()) || (is64Bit() && isABI_LP64())) &&
++         "Invalid  Arch & ABI pair.");
++
++  if (hasLSX() && !hasBasicD())
++    report_fatal_error("LSX requires 64-bit floating point register."
++                       "See -mattr=+d.",
++                       false);
++
++}
++
++bool LoongArchSubtarget::isPositionIndependent() const {
++  return TM.isPositionIndependent();
++}
++
++/// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
++bool LoongArchSubtarget::enablePostRAScheduler() const { return true; }
++
++void LoongArchSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
++  CriticalPathRCs.clear();
++  CriticalPathRCs.push_back(is64Bit() ? &LoongArch::GPR64RegClass
++                                        : &LoongArch::GPR32RegClass);
++}
++
++CodeGenOpt::Level LoongArchSubtarget::getOptLevelToEnablePostRAScheduler() const {
++  return CodeGenOpt::Aggressive;
++}
++
++LoongArchSubtarget &
++LoongArchSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
++                                               const TargetMachine &TM) {
++  StringRef CPUName = LoongArch_MC::selectLoongArchCPU(TM.getTargetTriple(), CPU);
++
++  // Parse features string.
++  ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
++  // Initialize scheduling itinerary for the specified CPU.
++  InstrItins = getInstrItineraryForCPU(CPUName);
++
++  if (StackAlignOverride)
++    stackAlignment = *StackAlignOverride;
++  else if (isABI_LP64())
++    stackAlignment = Align(16);
++  else {
++    assert(isABI_ILP32() && "Unknown ABI for stack alignment!");
++    stackAlignment = Align(8);
++  }
++
++  return *this;
++}
++
++Reloc::Model LoongArchSubtarget::getRelocationModel() const {
++  return TM.getRelocationModel();
++}
++
++bool LoongArchSubtarget::isABI_LP64D() const { return getABI().IsLP64D(); }
++bool LoongArchSubtarget::isABI_LP64S() const { return getABI().IsLP64S(); }
++bool LoongArchSubtarget::isABI_LP64F() const { return getABI().IsLP64F(); }
++bool LoongArchSubtarget::isABI_LP64() const {
++  return isABI_LP64D() || isABI_LP64S() || isABI_LP64F();
++}
++bool LoongArchSubtarget::isABI_ILP32D() const { return getABI().IsILP32D(); }
++bool LoongArchSubtarget::isABI_ILP32F() const { return getABI().IsILP32F(); }
++bool LoongArchSubtarget::isABI_ILP32S() const { return getABI().IsILP32S(); }
++bool LoongArchSubtarget::isABI_ILP32() const {
++  return isABI_ILP32D() || isABI_ILP32F() || isABI_ILP32S();
++}
++const LoongArchABIInfo &LoongArchSubtarget::getABI() const { return TM.getABI(); }
+diff --git a/lib/Target/LoongArch/LoongArchSubtarget.h b/lib/Target/LoongArch/LoongArchSubtarget.h
+new file mode 100644
+index 00000000..7b1d3061
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchSubtarget.h
+@@ -0,0 +1,146 @@
++//===-- LoongArchSubtarget.h - Define Subtarget for the LoongArch ---------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares the LoongArch specific subclass of TargetSubtargetInfo.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
++
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "LoongArchFrameLowering.h"
++#include "LoongArchISelLowering.h"
++#include "LoongArchInstrInfo.h"
++#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/MC/MCInstrItineraries.h"
++#include "llvm/Support/ErrorHandling.h"
++#include <string>
++
++#define GET_SUBTARGETINFO_HEADER
++#include "LoongArchGenSubtargetInfo.inc"
++
++namespace llvm {
++class StringRef;
++
++class LoongArchTargetMachine;
++
++class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
++  virtual void anchor();
++
++  // HasLA64 - The target processor has LA64 ISA support.
++  bool HasLA64;
++
++  // HasBasicF - The target restricts the use of hardware floating-point
++  // instructions to 32-bit operations.
++  bool HasBasicF;
++
++  // HasBasicD - The target allows hardware floating-point instructions to
++  // cover both 32-bit and 64-bit operations.
++  bool HasBasicD;
++
++  /// Features related to the presence of specific instructions.
++
++  // HasLSX - Supports LSX.
++  bool HasLSX;
++
++  // HasLASX - Supports LASX.
++  bool HasLASX;
++
++  /// The minimum alignment known to hold of the stack frame on
++  /// entry to the function and which must be maintained by every function.
++  Align stackAlignment;
++
++  // Allow unaligned memory accesses.
++  bool UnalignedAccess;
++
++  /// The overridden stack alignment.
++  MaybeAlign StackAlignOverride;
++
++  InstrItineraryData InstrItins;
++
++  const LoongArchTargetMachine &TM;
++
++  Triple TargetTriple;
++
++  const SelectionDAGTargetInfo TSInfo;
++  const LoongArchInstrInfo InstrInfo;
++  const LoongArchFrameLowering FrameLowering;
++  const LoongArchTargetLowering TLInfo;
++
++public:
++  bool isPositionIndependent() const;
++  /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
++  bool enablePostRAScheduler() const override;
++  void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
++  CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
++
++  bool isABI_LP64() const;
++  bool isABI_LP64D() const;
++  bool isABI_LP64S() const;
++  bool isABI_LP64F() const;
++  bool isABI_ILP32() const;
++  bool isABI_ILP32D() const;
++  bool isABI_ILP32F() const;
++  bool isABI_ILP32S() const;
++  const LoongArchABIInfo &getABI() const;
++
++  /// This constructor initializes the data members to match that
++  /// of the specified triple.
++  LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
++      const LoongArchTargetMachine &TM, MaybeAlign StackAlignOverride);
++
++  /// ParseSubtargetFeatures - Parses features string setting specified
++  /// subtarget options.  Definition of function is auto generated by tblgen.
++  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
++
++  bool is64Bit() const { return HasLA64; }
++  bool hasBasicD() const { return HasBasicD; }
++  unsigned getGPRSizeInBytes() const { return is64Bit() ? 8 : 4; }
++  bool hasLSX() const { return HasLSX; }
++  bool hasLASX() const { return HasLASX; }
++  bool hasBasicF() const { return HasBasicF; }
++  bool useSoftFloat() const { return (!HasBasicD && !HasBasicF); }
++
++  bool allowUnalignedAccess() const { return UnalignedAccess; }
++
++  // After compiler-rt is supported in LA, this returns true.
++  bool isXRaySupported() const override { return false; }
++
++  Align getStackAlignment() const { return stackAlignment; }
++
++  // Grab relocation model
++  Reloc::Model getRelocationModel() const;
++
++  LoongArchSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
++                                                 const TargetMachine &TM);
++
++  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
++    return &TSInfo;
++  }
++  const LoongArchInstrInfo *getInstrInfo() const override {
++    return &InstrInfo;
++  }
++  const TargetFrameLowering *getFrameLowering() const override {
++    return &FrameLowering;
++  }
++  const LoongArchRegisterInfo *getRegisterInfo() const override {
++    return &InstrInfo.getRegisterInfo();
++  }
++  const LoongArchTargetLowering *getTargetLowering() const override {
++    return &TLInfo;
++  }
++  const InstrItineraryData *getInstrItineraryData() const override {
++    return &InstrItins;
++  }
++};
++} // End llvm namespace
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+new file mode 100644
+index 00000000..2aa86a65
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+@@ -0,0 +1,186 @@
++//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch -------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// Implements the info about LoongArch target spec.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchTargetMachine.h"
++#include "LoongArch.h"
++#include "LoongArchISelDAGToDAG.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetObjectFile.h"
++#include "LoongArchTargetTransformInfo.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/ADT/Optional.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/BasicTTIImpl.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/CodeGen/TargetPassConfig.h"
++#include "llvm/IR/Attributes.h"
++#include "llvm/IR/Function.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetOptions.h"
++#include <cassert>
++#include <string>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarch"
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
++  // Register the target.
++  RegisterTargetMachine<LoongArchTargetMachine> X(getTheLoongArch32Target());
++  RegisterTargetMachine<LoongArchTargetMachine> A(getTheLoongArch64Target());
++}
++
++static std::string computeDataLayout(const Triple &TT, StringRef CPU,
++                                     const TargetOptions &Options) {
++  std::string Ret;
++  LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions);
++
++  Ret += "e";
++
++  if (ABI.IsILP32())
++    // TODO
++    llvm_unreachable("Unimplemented ABI");
++  else
++    Ret += "-m:e";
++
++  // Pointers are 32 bit on some ABIs.
++  if (!ABI.IsLP64())
++    Ret += "-p:32:32";
++
++  // 8 and 16 bit integers only need to have natural alignment, but try to
++  // align them to 32 bits. 64 bit integers have natural alignment.
++  Ret += "-i8:8:32-i16:16:32-i64:64";
++
++  // 32 bit registers are always available and the stack is at least 64 bit
++  // aligned. On LP64 64 bit registers are also available and the stack is
++  // 128 bit aligned.
++  if (ABI.IsLP64())
++    Ret += "-n32:64-S128";
++  else
++    Ret += "-n32-S64";
++
++  return Ret;
++}
++
++static Reloc::Model getEffectiveRelocModel(bool JIT,
++                                           Optional<Reloc::Model> RM) {
++  if (!RM.hasValue() || JIT)
++    return Reloc::Static;
++  return *RM;
++}
++
++// On function prologue, the stack is created by decrementing
++// its pointer. Once decremented, all references are done with positive
++// offset from the stack/frame pointer, using StackGrowsUp enables
++// an easier handling.
++// Using CodeModel::Large enables different CALL behavior.
++LoongArchTargetMachine::LoongArchTargetMachine(const Target &T, const Triple &TT,
++                                     StringRef CPU, StringRef FS,
++                                     const TargetOptions &Options,
++                                     Optional<Reloc::Model> RM,
++                                     Optional<CodeModel::Model> CM,
++                                     CodeGenOpt::Level OL, bool JIT)
++    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT,
++                        CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
++                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
++      TLOF(std::make_unique<LoongArchTargetObjectFile>()),
++      ABI(LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)) {
++  initAsmInfo();
++}
++
++LoongArchTargetMachine::~LoongArchTargetMachine() = default;
++
++const LoongArchSubtarget *
++LoongArchTargetMachine::getSubtargetImpl(const Function &F) const {
++  Attribute CPUAttr = F.getFnAttribute("target-cpu");
++  Attribute FSAttr = F.getFnAttribute("target-features");
++
++  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
++                        ? CPUAttr.getValueAsString().str()
++                        : TargetCPU;
++  std::string FS = !FSAttr.hasAttribute(Attribute::None)
++                       ? FSAttr.getValueAsString().str()
++                       : TargetFS;
++
++  auto &I = SubtargetMap[CPU + FS];
++  if (!I) {
++    // This needs to be done before we create a new subtarget since any
++    // creation will depend on the TM and the code generation flags on the
++    // function that reside in TargetOptions.
++    resetTargetOptions(F);
++    I = std::make_unique<LoongArchSubtarget>(TargetTriple, CPU, FS, *this,
++        MaybeAlign(F.getParent()->getOverrideStackAlignment()));
++  }
++  return I.get();
++}
++
++namespace {
++
++/// LoongArch Code Generator Pass Configuration Options.
++class LoongArchPassConfig : public TargetPassConfig {
++public:
++  LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
++      : TargetPassConfig(TM, PM) {
++  }
++
++  LoongArchTargetMachine &getLoongArchTargetMachine() const {
++    return getTM<LoongArchTargetMachine>();
++  }
++
++  void addIRPasses() override;
++  bool addInstSelector() override;
++  void addPreEmitPass() override;
++};
++
++} // end anonymous namespace
++
++TargetPassConfig *LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) {
++  return new LoongArchPassConfig(*this, PM);
++}
++
++void LoongArchPassConfig::addIRPasses() {
++  TargetPassConfig::addIRPasses();
++  addPass(createAtomicExpandPass());
++}
++// Install an instruction selector pass using
++// the ISelDag to gen LoongArch code.
++bool LoongArchPassConfig::addInstSelector() {
++  addPass(createLoongArchModuleISelDagPass());
++  addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel()));
++  return false;
++}
++
++TargetTransformInfo
++LoongArchTargetMachine::getTargetTransformInfo(const Function &F) {
++  LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n");
++  return TargetTransformInfo(BasicTTIImpl(this, F));
++}
++
++// Implemented by targets that want to run passes immediately before
++// machine code is emitted. return true if -print-machineinstrs should
++// print out the code after the passes.
++void LoongArchPassConfig::addPreEmitPass() {
++  // Expand pseudo instructions that are sensitive to register allocation.
++  addPass(createLoongArchExpandPseudoPass());
++
++  // Relax conditional branch instructions if they're otherwise out of
++  // range of their destination.
++  // This pass must be run after any pseudo instruction expansion
++  addPass(&BranchRelaxationPassID);
++}
+diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.h b/lib/Target/LoongArch/LoongArchTargetMachine.h
+new file mode 100644
+index 00000000..ae09adf7
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetMachine.h
+@@ -0,0 +1,68 @@
++//===- LoongArchTargetMachine.h - Define TargetMachine for LoongArch ------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares the LoongArch specific subclass of TargetMachine.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
++
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "LoongArchSubtarget.h"
++#include "llvm/ADT/Optional.h"
++#include "llvm/ADT/StringMap.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Target/TargetMachine.h"
++#include <memory>
++
++namespace llvm {
++
++class LoongArchTargetMachine : public LLVMTargetMachine {
++  std::unique_ptr<TargetLoweringObjectFile> TLOF;
++  // Selected ABI
++  LoongArchABIInfo ABI;
++
++  mutable StringMap<std::unique_ptr<LoongArchSubtarget>> SubtargetMap;
++
++public:
++  LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
++                    StringRef FS, const TargetOptions &Options,
++                    Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
++                    CodeGenOpt::Level OL, bool JIT);
++  ~LoongArchTargetMachine() override;
++
++  TargetTransformInfo getTargetTransformInfo(const Function &F) override;
++  const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override;
++
++  // Pass Pipeline Configuration
++  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
++
++  TargetLoweringObjectFile *getObjFileLowering() const override {
++    return TLOF.get();
++  }
++
++  /// Returns true if a cast between SrcAS and DestAS is a noop.
++  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
++    // Mips doesn't have any special address spaces so we just reserve
++    // the first 256 for software use (e.g. OpenCL) and treat casts
++    // between them as noops.
++    return SrcAS < 256 && DestAS < 256;
++  }
++
++  const LoongArchABIInfo &getABI() const { return ABI; }
++
++  bool isMachineVerifierClean() const override {
++    return false;
++  }
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
+diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp
+new file mode 100644
+index 00000000..9c6250d2
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp
+@@ -0,0 +1,26 @@
++//===-- LoongArchTargetObjectFile.cpp - LoongArch Object Files ----------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchTargetObjectFile.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/GlobalVariable.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Target/TargetMachine.h"
++using namespace llvm;
++
++void LoongArchTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
++  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
++  InitializeELF(TM.Options.UseInitArray);
++}
+diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.h b/lib/Target/LoongArch/LoongArchTargetObjectFile.h
+new file mode 100644
+index 00000000..a50c5717
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.h
+@@ -0,0 +1,24 @@
++//===-- llvm/Target/LoongArchTargetObjectFile.h - LoongArch Object Info ---*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H
++
++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
++
++namespace llvm {
++class LoongArchTargetMachine;
++  class LoongArchTargetObjectFile : public TargetLoweringObjectFileELF {
++
++  public:
++
++    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
++  };
++} // end namespace llvm
++
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchTargetStreamer.h b/lib/Target/LoongArch/LoongArchTargetStreamer.h
+new file mode 100644
+index 00000000..a9adc32d
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetStreamer.h
+@@ -0,0 +1,130 @@
++//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer ------------*- C++ -*--===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H
++
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "llvm/ADT/Optional.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCStreamer.h"
++
++namespace llvm {
++
++class formatted_raw_ostream;
++
++struct LoongArchFPABIInfo;
++
++class LoongArchTargetStreamer : public MCTargetStreamer {
++public:
++  LoongArchTargetStreamer(MCStreamer &S);
++
++  virtual void setPic(bool Value) {}
++
++  virtual void emitDirectiveOptionPic0();
++  virtual void emitDirectiveOptionPic2();
++
++  virtual void emitDirectiveSetArch(StringRef Arch);
++  virtual void emitDirectiveSetLoongArch32();
++  virtual void emitDirectiveSetloongarch64();
++
++  void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
++             const MCSubtargetInfo *STI);
++  void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
++              const MCSubtargetInfo *STI);
++  void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc,
++              const MCSubtargetInfo *STI);
++  void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc,
++              const MCSubtargetInfo *STI);
++  void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc,
++              const MCSubtargetInfo *STI);
++  void emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, MCOperand Op2,
++               SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2,
++               SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
++               SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int32_t Imm,
++               SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2,
++                MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0,
++                 int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
++                 const MCSubtargetInfo *STI);
++  void emitAdd(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit,
++                const MCSubtargetInfo *STI);
++  void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
++                SMLoc IDLoc, const MCSubtargetInfo *STI);
++  void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI);
++
++  void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
++  void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
++  bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
++
++  template <class PredicateLibrary>
++  void updateABIInfo(const PredicateLibrary &P) {
++    ABI = P.getABI();
++  }
++
++  const LoongArchABIInfo &getABI() const {
++    assert(ABI.hasValue() && "ABI hasn't been set!");
++    return *ABI;
++  }
++
++protected:
++  llvm::Optional<LoongArchABIInfo> ABI;
++
++  bool GPRInfoSet;
++
++  bool FPRInfoSet;
++
++  bool FrameInfoSet;
++  int FrameOffset;
++  unsigned FrameReg;
++  unsigned ReturnReg;
++
++private:
++  bool ModuleDirectiveAllowed;
++};
++
++// This part is for ascii assembly output
++class LoongArchTargetAsmStreamer : public LoongArchTargetStreamer {
++  formatted_raw_ostream &OS;
++
++public:
++  LoongArchTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
++
++  void emitDirectiveOptionPic0() override;
++  void emitDirectiveOptionPic2() override;
++
++  void emitDirectiveSetArch(StringRef Arch) override;
++  void emitDirectiveSetLoongArch32() override;
++  void emitDirectiveSetloongarch64() override;
++};
++
++// This part is for ELF object output
++class LoongArchTargetELFStreamer : public LoongArchTargetStreamer {
++  const MCSubtargetInfo &STI;
++  bool Pic;
++
++public:
++  MCELFStreamer &getStreamer();
++  LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
++
++  void setPic(bool Value) override { Pic = Value; }
++
++  void emitLabel(MCSymbol *Symbol) override;
++  void finish() override;
++
++  void emitDirectiveOptionPic0() override;
++  void emitDirectiveOptionPic2() override;
++};
++}
++#endif
+diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+new file mode 100644
+index 00000000..9510dc02
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+@@ -0,0 +1,325 @@
++//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI pass
++//----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++/// \file
++/// This file implements a TargetTransformInfo analysis pass specific to the
++/// LoongArch target machine. It uses the target's detailed information to
++/// provide more precise answers to certain TTI queries, while letting the
++/// target independent and default TTI implementations handle the rest.
++///
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchTargetTransformInfo.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/BasicTTIImpl.h"
++#include "llvm/CodeGen/CostTable.h"
++#include "llvm/CodeGen/TargetLowering.h"
++#include "llvm/IR/IntrinsicInst.h"
++#include "llvm/Support/Debug.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "LoongArchtti"
++
++//===----------------------------------------------------------------------===//
++//
++// LoongArch cost model.
++//
++//===----------------------------------------------------------------------===//
++
++bool LoongArchTTIImpl::areInlineCompatible(const Function *Caller,
++                                           const Function *Callee) const {
++  const TargetMachine &TM = getTLI()->getTargetMachine();
++
++  const FeatureBitset &CallerBits =
++      TM.getSubtargetImpl(*Caller)->getFeatureBits();
++  const FeatureBitset &CalleeBits =
++      TM.getSubtargetImpl(*Callee)->getFeatureBits();
++
++  // Inline a callee if its target-features are a subset of the callers
++  // target-features.
++  return (CallerBits & CalleeBits) == CalleeBits;
++}
++
++TargetTransformInfo::PopcntSupportKind
++LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) {
++  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
++  if (TyWidth == 32 || TyWidth == 64)
++    return TTI::PSK_FastHardware;
++  return TTI::PSK_Software;
++}
++
++unsigned LoongArchTTIImpl::getNumberOfRegisters(bool Vector) {
++  if (Vector && !ST->hasLSX())
++    return 0;
++
++  return 32;
++}
++
++unsigned LoongArchTTIImpl::getRegisterBitWidth(bool Vector) const {
++  if (Vector) {
++    if (ST->hasLASX())
++      return 256;
++
++    if (ST->hasLSX())
++      return 128;
++
++    return 0;
++  }
++  return 64;
++}
++
++unsigned LoongArchTTIImpl::getMaxInterleaveFactor(unsigned VF) {
++  if (VF == 1)
++    return 1;
++  return 2;
++}
++
++InstructionCost LoongArchTTIImpl::getArithmeticInstrCost(
++    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
++    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
++    TTI::OperandValueProperties Opd1PropInfo,
++    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
++    const Instruction *CxtI) {
++
++  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
++
++  int ISD = TLI->InstructionOpcodeToISD(Opcode);
++  assert(ISD && "Invalid opcode");
++
++  static const CostTblEntry LASXCostTable[] = {
++
++      {ISD::SHL, MVT::v32i8, 1},
++      {ISD::SHL, MVT::v16i16, 1},
++      {ISD::SHL, MVT::v8i32, 1},
++      {ISD::SHL, MVT::v4i64, 1},
++
++      {ISD::SRL, MVT::v32i8, 1},
++      {ISD::SRL, MVT::v16i16, 1},
++      {ISD::SRL, MVT::v8i32, 1},
++      {ISD::SRL, MVT::v4i64, 1},
++
++      {ISD::SRA, MVT::v32i8, 1},
++      {ISD::SRA, MVT::v16i16, 1},
++      {ISD::SRA, MVT::v8i32, 1},
++      {ISD::SRA, MVT::v4i64, 1},
++
++      {ISD::SUB, MVT::v32i8, 1},
++      {ISD::SUB, MVT::v16i16, 1},
++      {ISD::SUB, MVT::v8i32, 1},
++      {ISD::SUB, MVT::v4i64, 1},
++
++      {ISD::ADD, MVT::v32i8, 1},
++      {ISD::ADD, MVT::v16i16, 1},
++      {ISD::ADD, MVT::v8i32, 1},
++      {ISD::ADD, MVT::v4i64, 1},
++
++      {ISD::MUL, MVT::v32i8, 1},
++      {ISD::MUL, MVT::v16i16, 1},
++      {ISD::MUL, MVT::v8i32, 1},
++      {ISD::MUL, MVT::v4i64, 1},
++
++      {ISD::SDIV, MVT::v32i8, 29},
++      {ISD::SDIV, MVT::v16i16, 19},
++      {ISD::SDIV, MVT::v8i32, 14},
++      {ISD::SDIV, MVT::v4i64, 13},
++
++      {ISD::UDIV, MVT::v32i8, 29},
++      {ISD::UDIV, MVT::v16i16, 19},
++      {ISD::UDIV, MVT::v8i32, 14},
++      {ISD::UDIV, MVT::v4i64, 13},
++
++      {ISD::SREM, MVT::v32i8, 33},
++      {ISD::SREM, MVT::v16i16, 21},
++      {ISD::SREM, MVT::v8i32, 15},
++      {ISD::SREM, MVT::v4i64, 13},
++
++      {ISD::UREM, MVT::v32i8, 29},
++      {ISD::UREM, MVT::v16i16, 19},
++      {ISD::UREM, MVT::v8i32, 14},
++      {ISD::UREM, MVT::v4i64, 13},
++
++      {ISD::FADD, MVT::f64, 1},
++      {ISD::FADD, MVT::f32, 1},
++      {ISD::FADD, MVT::v4f64, 1},
++      {ISD::FADD, MVT::v8f32, 1},
++
++      {ISD::FSUB, MVT::f64, 1},
++      {ISD::FSUB, MVT::f32, 1},
++      {ISD::FSUB, MVT::v4f64, 1},
++      {ISD::FSUB, MVT::v8f32, 1},
++
++      {ISD::FMUL, MVT::f64, 1},
++      {ISD::FMUL, MVT::f32, 1},
++      {ISD::FMUL, MVT::v4f64, 1},
++      {ISD::FMUL, MVT::v8f32, 1},
++
++      {ISD::FDIV, MVT::f32, 12},
++      {ISD::FDIV, MVT::f64, 10},
++      {ISD::FDIV, MVT::v8f32, 12},
++      {ISD::FDIV, MVT::v4f64, 10}
++
++  };
++
++  if (ST->hasLASX())
++    if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, LT.second))
++      return LT.first * Entry->Cost;
++
++  static const CostTblEntry LSXCostTable[] = {
++
++      {ISD::SHL, MVT::v16i8, 1},
++      {ISD::SHL, MVT::v8i16, 1},
++      {ISD::SHL, MVT::v4i32, 1},
++      {ISD::SHL, MVT::v2i64, 1},
++
++      {ISD::SRL, MVT::v16i8, 1},
++      {ISD::SRL, MVT::v8i16, 1},
++      {ISD::SRL, MVT::v4i32, 1},
++      {ISD::SRL, MVT::v2i64, 1},
++
++      {ISD::SRA, MVT::v16i8, 1},
++      {ISD::SRA, MVT::v8i16, 1},
++      {ISD::SRA, MVT::v4i32, 1},
++      {ISD::SRA, MVT::v2i64, 1},
++
++      {ISD::SUB, MVT::v16i8, 1},
++      {ISD::SUB, MVT::v8i16, 1},
++      {ISD::SUB, MVT::v4i32, 1},
++      {ISD::SUB, MVT::v2i64, 1},
++
++      {ISD::ADD, MVT::v16i8, 1},
++      {ISD::ADD, MVT::v8i16, 1},
++      {ISD::ADD, MVT::v4i32, 1},
++      {ISD::ADD, MVT::v2i64, 1},
++
++      {ISD::MUL, MVT::v16i8, 1},
++      {ISD::MUL, MVT::v8i16, 1},
++      {ISD::MUL, MVT::v4i32, 1},
++      {ISD::MUL, MVT::v2i64, 1},
++
++      {ISD::SDIV, MVT::v16i8, 29},
++      {ISD::SDIV, MVT::v8i16, 19},
++      {ISD::SDIV, MVT::v4i32, 14},
++      {ISD::SDIV, MVT::v2i64, 13},
++
++      {ISD::UDIV, MVT::v16i8, 29},
++      {ISD::UDIV, MVT::v8i16, 19},
++      {ISD::UDIV, MVT::v4i32, 14},
++      {ISD::UDIV, MVT::v2i64, 13},
++
++      {ISD::SREM, MVT::v16i8, 33},
++      {ISD::SREM, MVT::v8i16, 21},
++      {ISD::SREM, MVT::v4i32, 15},
++      {ISD::SREM, MVT::v2i64, 13},
++
++      {ISD::UREM, MVT::v16i8, 29},
++      {ISD::UREM, MVT::v8i16, 19},
++      {ISD::UREM, MVT::v4i32, 14},
++      {ISD::UREM, MVT::v2i64, 13},
++
++      {ISD::FADD, MVT::f64, 1},
++      {ISD::FADD, MVT::f32, 1},
++      {ISD::FADD, MVT::v2f64, 1},
++      {ISD::FADD, MVT::v4f32, 1},
++
++      {ISD::FSUB, MVT::f64, 1},
++      {ISD::FSUB, MVT::f32, 1},
++      {ISD::FSUB, MVT::v2f64, 1},
++      {ISD::FSUB, MVT::v4f32, 1},
++
++      {ISD::FMUL, MVT::f64, 1},
++      {ISD::FMUL, MVT::f32, 1},
++      {ISD::FMUL, MVT::v2f64, 1},
++      {ISD::FMUL, MVT::v4f32, 1},
++
++      {ISD::FDIV, MVT::f32, 12},
++      {ISD::FDIV, MVT::f64, 10},
++      {ISD::FDIV, MVT::v4f32, 12},
++      {ISD::FDIV, MVT::v2f64, 10}
++
++  };
++
++  if (ST->hasLSX())
++    if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second))
++      return LT.first * Entry->Cost;
++
++  // Fallback to the default implementation.
++  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
++}
++
++InstructionCost LoongArchTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
++                                                     unsigned Index) {
++  assert(Val->isVectorTy() && "This must be a vector type");
++
++  Type *ScalarType = Val->getScalarType();
++
++  if (Index != -1U) {
++    // Legalize the type.
++    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
++
++    // This type is legalized to a scalar type.
++    if (!LT.second.isVector())
++      return 0;
++
++    // The type may be split. Normalize the index to the new type.
++    unsigned Width = LT.second.getVectorNumElements();
++    Index = Index % Width;
++
++    // The element at index zero is already inside the vector.
++    if (Index == 0) // if (ScalarType->isFloatingPointTy() && Index == 0)
++      return 0;
++  }
++
++  // Add to the base cost if we know that the extracted element of a vector is
++  // destined to be moved to and used in the integer register file.
++  int RegisterFileMoveCost = 0;
++  if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
++    RegisterFileMoveCost = 1;
++
++  return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
++}
++
++unsigned LoongArchTTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
++  return getRegisterBitWidth(true);
++}
++
++InstructionCost LoongArchTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
++                                                   Type *Src,
++                                                   TTI::CastContextHint CCH,
++                                                   TTI::TargetCostKind CostKind,
++                                                   const Instruction *I) {
++  int ISD = TLI->InstructionOpcodeToISD(Opcode);
++  assert(ISD && "Invalid opcode");
++
++  static const TypeConversionCostTblEntry LASXConversionTbl[] = {
++
++      // TODO:The cost requires more granular testing
++      {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3},
++      {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3},
++      {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3},
++      {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3},
++      {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3},
++      {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3},
++
++  };
++
++  EVT SrcTy = TLI->getValueType(DL, Src);
++  EVT DstTy = TLI->getValueType(DL, Dst);
++
++  if (!SrcTy.isSimple() || !DstTy.isSimple())
++    return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
++
++  if (ST->hasLASX()) {
++    if (const auto *Entry = ConvertCostTableLookup(
++            LASXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
++      return Entry->Cost;
++  }
++
++  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
++}
+diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+new file mode 100644
+index 00000000..3a93fc8e
+--- /dev/null
++++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+@@ -0,0 +1,91 @@
++//===-- LoongArchTargetTransformInfo.h - LoongArch specific TTI -------------*-
++// C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// \file
++// This file a TargetTransformInfo::Concept conforming object specific to the
++// LoongArch target machine. It uses the target's detailed information to
++// provide more precise answers to certain TTI queries, while letting the
++// target independent and default TTI implementations handle the rest.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H
++#define LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H
++
++#include "LoongArch.h"
++#include "LoongArchSubtarget.h"
++#include "LoongArchTargetMachine.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/BasicTTIImpl.h"
++#include "llvm/CodeGen/TargetLowering.h"
++
++namespace llvm {
++
++class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
++  typedef BasicTTIImplBase<LoongArchTTIImpl> BaseT;
++  typedef TargetTransformInfo TTI;
++  friend BaseT;
++
++  const LoongArchSubtarget *ST;
++  const LoongArchTargetLowering *TLI;
++
++  const LoongArchSubtarget *getST() const { return ST; }
++  const LoongArchTargetLowering *getTLI() const { return TLI; }
++
++public:
++  explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F)
++      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
++        TLI(ST->getTargetLowering()) {}
++
++  bool areInlineCompatible(const Function *Caller,
++                           const Function *Callee) const;
++
++  /// \name Scalar TTI Implementations
++  //  /// @{
++
++  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
++
++  /// @}
++
++  /// \name Vector TTI Implementations
++  /// @{
++
++  bool enableInterleavedAccessVectorization() { return true; }
++
++  unsigned getNumberOfRegisters(bool Vector);
++
++  unsigned getRegisterBitWidth(bool Vector) const;
++
++  unsigned getMaxInterleaveFactor(unsigned VF);
++
++  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
++                                     unsigned Index);
++
++  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
++                                   TTI::CastContextHint CCH,
++                                   TTI::TargetCostKind CostKind,
++                                   const Instruction *I = nullptr);
++
++  unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
++
++  InstructionCost getArithmeticInstrCost(
++      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
++      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
++      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
++      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
++      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
++      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
++      const Instruction *CxtI = nullptr);
++
++  /// @}
++};
++
++} // end namespace llvm
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt
+new file mode 100644
+index 00000000..927fa7d5
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt
+@@ -0,0 +1,21 @@
++  add_llvm_component_library(LLVMLoongArchDesc
++  LoongArchABIInfo.cpp
++  LoongArchAnalyzeImmediate.cpp
++  LoongArchAsmBackend.cpp
++  LoongArchELFObjectWriter.cpp
++  LoongArchELFStreamer.cpp
++  LoongArchInstPrinter.cpp
++  LoongArchMCAsmInfo.cpp
++  LoongArchMCCodeEmitter.cpp
++  LoongArchMCExpr.cpp
++  LoongArchMCTargetDesc.cpp
++  LoongArchTargetStreamer.cpp
++
++  LINK_COMPONENTS
++  MC
++  LoongArchInfo
++  Support
++
++  ADD_TO_COMPONENT
++  LoongArch
++  )
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp
+new file mode 100644
+index 00000000..86aab1e3
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp
+@@ -0,0 +1,113 @@
++//===---- LoongArchABIInfo.cpp - Information about LoongArch ABI's ------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchABIInfo.h"
++#include "LoongArchRegisterInfo.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/MC/MCTargetOptions.h"
++
++using namespace llvm;
++
++namespace {
++
++static const MCPhysReg LoongArch64IntRegs[8] = {
++    LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64,
++    LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64};
++}
++
++ArrayRef<MCPhysReg> LoongArchABIInfo::GetByValArgRegs() const {
++  if (IsILP32())
++    // TODO
++    llvm_unreachable("Unimplemented ABI");
++  if (IsLP64())
++    return makeArrayRef(LoongArch64IntRegs);
++  llvm_unreachable("Unhandled ABI");
++}
++
++ArrayRef<MCPhysReg> LoongArchABIInfo::GetVarArgRegs() const {
++  if (IsILP32())
++    // TODO
++    llvm_unreachable("Unimplemented ABI");
++  if (IsLP64())
++    return makeArrayRef(LoongArch64IntRegs);
++  llvm_unreachable("Unhandled ABI");
++}
++
++LoongArchABIInfo LoongArchABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
++                                          const MCTargetOptions &Options) {
++  if (Options.getABIName().startswith("ilp32d"))
++    return LoongArchABIInfo::ILP32D();
++  if (Options.getABIName().startswith("ilp32f"))
++    return LoongArchABIInfo::ILP32F();
++  if (Options.getABIName().startswith("ilp32s"))
++    return LoongArchABIInfo::ILP32S();
++  if (Options.getABIName().startswith("lp64d"))
++    return LoongArchABIInfo::LP64D();
++  if (Options.getABIName().startswith("lp64s"))
++    return LoongArchABIInfo::LP64S();
++  if (Options.getABIName().startswith("lp64f"))
++    return LoongArchABIInfo::LP64F();
++  assert(Options.getABIName().empty() && "Unknown ABI option for LoongArch");
++
++  if (TT.isLoongArch64())
++    return LoongArchABIInfo::LP64D();
++  return LoongArchABIInfo::ILP32D();
++}
++
++unsigned LoongArchABIInfo::GetStackPtr() const {
++  return ArePtrs64bit() ? LoongArch::SP_64 : LoongArch::SP;
++}
++
++unsigned LoongArchABIInfo::GetFramePtr() const {
++  return ArePtrs64bit() ? LoongArch::FP_64 : LoongArch::FP;
++}
++
++unsigned LoongArchABIInfo::GetBasePtr() const {
++  return ArePtrs64bit() ? LoongArch::S7_64 : LoongArch::S7;
++}
++
++unsigned LoongArchABIInfo::GetNullPtr() const {
++  return ArePtrs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO;
++}
++
++unsigned LoongArchABIInfo::GetZeroReg() const {
++  return AreGprs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO;
++}
++
++unsigned LoongArchABIInfo::GetPtrAddOp() const {
++  return ArePtrs64bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
++}
++
++unsigned LoongArchABIInfo::GetPtrAddiOp() const {
++  return ArePtrs64bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
++}
++
++unsigned LoongArchABIInfo::GetPtrSubOp() const {
++  return ArePtrs64bit() ? LoongArch::SUB_D : LoongArch::SUB_W;
++}
++
++unsigned LoongArchABIInfo::GetPtrAndOp() const {
++  return ArePtrs64bit() ? LoongArch::AND : LoongArch::AND32;
++}
++
++unsigned LoongArchABIInfo::GetGPRMoveOp() const {
++  return ArePtrs64bit() ? LoongArch::OR : LoongArch::OR32;
++}
++
++unsigned LoongArchABIInfo::GetEhDataReg(unsigned I) const {
++  static const unsigned EhDataReg[] = {
++    LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3
++  };
++  static const unsigned EhDataReg64[] = {
++    LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64
++  };
++
++  return IsLP64() ? EhDataReg64[I] : EhDataReg[I];
++}
++
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h
+new file mode 100644
+index 00000000..7ce36fd2
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h
+@@ -0,0 +1,88 @@
++//===---- LoongArchABIInfo.h - Information about LoongArch ABI's --------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H
++
++#include "llvm/ADT/Triple.h"
++#include "llvm/IR/CallingConv.h"
++#include "llvm/MC/MCRegisterInfo.h"
++
++namespace llvm {
++
++template <typename T> class ArrayRef;
++class MCTargetOptions;
++class StringRef;
++class TargetRegisterClass;
++
++class LoongArchABIInfo {
++public:
++  enum class ABI { Unknown, ILP32D, ILP32F, ILP32S, LP64D, LP64F, LP64S };
++
++protected:
++  ABI ThisABI;
++
++public:
++  LoongArchABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
++
++  static LoongArchABIInfo Unknown() { return LoongArchABIInfo(ABI::Unknown); }
++  static LoongArchABIInfo ILP32D() { return LoongArchABIInfo(ABI::ILP32D); }
++  static LoongArchABIInfo ILP32F() { return LoongArchABIInfo(ABI::ILP32F); }
++  static LoongArchABIInfo ILP32S() { return LoongArchABIInfo(ABI::ILP32S); }
++  static LoongArchABIInfo LP64D() { return LoongArchABIInfo(ABI::LP64D); }
++  static LoongArchABIInfo LP64S() { return LoongArchABIInfo(ABI::LP64S); }
++  static LoongArchABIInfo LP64F() { return LoongArchABIInfo(ABI::LP64F); }
++  static LoongArchABIInfo computeTargetABI(const Triple &TT, StringRef CPU,
++                                      const MCTargetOptions &Options);
++
++  bool IsKnown() const { return ThisABI != ABI::Unknown; }
++  bool IsILP32D() const { return ThisABI == ABI::ILP32D; }
++  bool IsILP32F() const { return ThisABI == ABI::ILP32F; }
++  bool IsILP32S() const { return ThisABI == ABI::ILP32S; }
++  bool IsILP32() const { return IsILP32D() || IsILP32F() || IsILP32S(); }
++  bool IsLP64D() const { return ThisABI == ABI::LP64D; }
++  bool IsLP64S() const { return ThisABI == ABI::LP64S; }
++  bool IsLP64F() const { return ThisABI == ABI::LP64F; }
++  bool IsLP64() const { return IsLP64D() || IsLP64S() || IsLP64F(); }
++  ABI GetEnumValue() const { return ThisABI; }
++
++  /// The registers to use for byval arguments.
++  ArrayRef<MCPhysReg> GetByValArgRegs() const;
++
++  /// The registers to use for the variable argument list.
++  ArrayRef<MCPhysReg> GetVarArgRegs() const;
++
++  /// Ordering of ABI's
++  /// LoongArchGenSubtargetInfo.inc will use this to resolve conflicts when given
++  /// multiple ABI options.
++  bool operator<(const LoongArchABIInfo Other) const {
++    return ThisABI < Other.GetEnumValue();
++  }
++
++  unsigned GetStackPtr() const;
++  unsigned GetFramePtr() const;
++  unsigned GetBasePtr() const;
++  unsigned GetNullPtr() const;
++  unsigned GetZeroReg() const;
++  unsigned GetPtrAddOp() const;
++  unsigned GetPtrAddiOp() const;
++  unsigned GetPtrSubOp() const;
++  unsigned GetPtrAndOp() const;
++  unsigned GetGPRMoveOp() const;
++  inline bool ArePtrs64bit() const {
++    return IsLP64D() || IsLP64S() || IsLP64F();
++  }
++  inline bool AreGprs64bit() const {
++    return IsLP64D() || IsLP64S() || IsLP64F();
++  }
++
++  unsigned GetEhDataReg(unsigned I) const;
++};
++}
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp
+new file mode 100644
+index 00000000..96e43b2d
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp
+@@ -0,0 +1,64 @@
++//===- LoongArchAnalyzeImmediate.cpp - Analyze Immediates -----------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchAnalyzeImmediate.h"
++#include "LoongArch.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/Support/MathExtras.h"
++
++using namespace llvm;
++
++LoongArchAnalyzeImmediate::InstSeq
++LoongArchAnalyzeImmediate::generateInstSeq(int64_t Val, bool Is64Bit) {
++  // Val:
++  // |             hi32              |              lo32            |
++  // +------------+------------------+------------------+-----------+
++  // | Bits_52_63 |    Bits_32_51    |    Bits_12_31    | Bits_0_11 |
++  // +------------+------------------+------------------+-----------+
++  //  63        52 51              32 31              12 11        0
++  unsigned ORIOp = Is64Bit ? LoongArch::ORI : LoongArch::ORI32;
++  unsigned LU12IOp = Is64Bit ? LoongArch::LU12I_W : LoongArch::LU12I_W32;
++  unsigned ADDIOp = Is64Bit ? LoongArch::ADDI_W64 : LoongArch::ADDI_W;
++  unsigned LU32IOp = LoongArch::LU32I_D_R2;
++  unsigned LU52IOp = LoongArch::LU52I_D;
++
++  int64_t Bits_52_63 = Val >> 52 & 0xFFF;
++  int64_t Bits_32_51 = Val >> 32 & 0xFFFFF;
++  int64_t Bits_12_31 = Val >> 12 & 0xFFFFF;
++  int64_t Bits_0_11 = Val & 0xFFF;
++
++  InstSeq Insts;
++
++  if (isInt<12>(Val) && Is64Bit) {
++    Insts.push_back(Inst(LoongArch::ADDI_D, SignExtend64<12>(Bits_0_11)));
++    return Insts;
++  }
++
++  if (Bits_52_63 != 0 && SignExtend64<52>(Val) == 0) {
++    Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63)));
++    return Insts;
++  }
++
++  if (Bits_12_31 == 0)
++    Insts.push_back(Inst(ORIOp, Bits_0_11));
++  else if (SignExtend32<1>(Bits_0_11 >> 11) == SignExtend32<20>(Bits_12_31))
++    Insts.push_back(Inst(ADDIOp, SignExtend64<12>(Bits_0_11)));
++  else {
++    Insts.push_back(Inst(LU12IOp, SignExtend64<20>(Bits_12_31)));
++    if (Bits_0_11 != 0)
++      Insts.push_back(Inst(ORIOp, Bits_0_11));
++  }
++
++  if (SignExtend32<1>(Bits_12_31 >> 19) != SignExtend32<20>(Bits_32_51))
++    Insts.push_back(Inst(LU32IOp, SignExtend64<20>(Bits_32_51)));
++
++  if (SignExtend32<1>(Bits_32_51 >> 19) != SignExtend32<12>(Bits_52_63))
++    Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63)));
++
++  return Insts;
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h
+new file mode 100644
+index 00000000..3ff00f25
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h
+@@ -0,0 +1,29 @@
++//===- LoongArchAnalyzeImmediate.h - Analyze Immediates --------*- C++ -*--===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H
++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H
++
++#include "llvm/ADT/SmallVector.h"
++
++namespace llvm {
++namespace LoongArchAnalyzeImmediate {
++struct Inst {
++  unsigned Opc;
++  int64_t Imm;
++  Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
++};
++using InstSeq = SmallVector<Inst, 4>;
++
++// Helper to generate an instruction sequence that will materialise the given
++// immediate value into a register.
++InstSeq generateInstSeq(int64_t Val, bool Is64Bit);
++} // end namespace LoongArchAnalyzeImmediate
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+new file mode 100644
+index 00000000..9bec9b20
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+@@ -0,0 +1,325 @@
++//===-- LoongArchAsmBackend.cpp - LoongArch Asm Backend  ----------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the LoongArchAsmBackend class.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#include "MCTargetDesc/LoongArchAsmBackend.h"
++#include "MCTargetDesc/LoongArchABIInfo.h"
++#include "MCTargetDesc/LoongArchFixupKinds.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDirectives.h"
++#include "llvm/MC/MCELFObjectWriter.h"
++#include "llvm/MC/MCFixupKindInfo.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCTargetOptions.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/Support/EndianStream.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/Format.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++
++using namespace llvm;
++
++std::unique_ptr<MCObjectTargetWriter>
++LoongArchAsmBackend::createObjectTargetWriter() const {
++  return createLoongArchELFObjectWriter(TheTriple);
++}
++
++static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
++                                 MCContext &Ctx) {
++  switch (Fixup.getTargetKind()) {
++  default:
++    llvm_unreachable("Unknown fixup kind");
++  case LoongArch::fixup_loongarch_got_pc_hi20:
++  case LoongArch::fixup_loongarch_got_pc_lo12:
++  case LoongArch::fixup_loongarch_got64_pc_lo20:
++  case LoongArch::fixup_loongarch_got64_pc_hi12:
++  case LoongArch::fixup_loongarch_got_hi20:
++  case LoongArch::fixup_loongarch_got_lo12:
++  case LoongArch::fixup_loongarch_got64_lo20:
++  case LoongArch::fixup_loongarch_got64_hi12:
++  case LoongArch::fixup_loongarch_tls_ld_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_ld_hi20:
++  case LoongArch::fixup_loongarch_tls_gd_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_gd_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_pc_lo12:
++  case LoongArch::fixup_loongarch_tls_ie64_pc_lo20:
++  case LoongArch::fixup_loongarch_tls_ie64_pc_hi12:
++  case LoongArch::fixup_loongarch_tls_ie_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_lo12:
++  case LoongArch::fixup_loongarch_tls_ie64_lo20:
++  case LoongArch::fixup_loongarch_tls_ie64_hi12:
++  // FIXME: Sometimes, these fixup_*pcala* relocations can be evaluated
++  // directly, left to the linker for now.
++  case LoongArch::fixup_loongarch_pcala_hi20:
++  case LoongArch::fixup_loongarch_pcala_lo12:
++  case LoongArch::fixup_loongarch_pcala64_lo20:
++  case LoongArch::fixup_loongarch_pcala64_hi12:
++    llvm_unreachable("Relocation should be unconditionally forced");
++  case FK_Data_1:
++  case FK_Data_2:
++  case FK_Data_4:
++  case FK_Data_8:
++    return Value;
++  case LoongArch::fixup_loongarch_b16: {
++    if (!isInt<18>(Value))
++      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
++    if (Value & 0x3)
++      Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned");
++    return (Value >> 2) & 0xffff;
++  }
++  case LoongArch::fixup_loongarch_b21: {
++    if (!isInt<23>(Value))
++      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
++    if (Value & 0x3)
++      Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned");
++    return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x1f);
++  }
++  case LoongArch::fixup_loongarch_b26: {
++    if (!isInt<28>(Value))
++      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
++    if (Value & 0x3)
++      Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned");
++    return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x3ff);
++  }
++  case LoongArch::fixup_loongarch_abs_hi20:
++  case LoongArch::fixup_loongarch_tls_le_hi20:
++    return (Value >> 12) & 0xfffff;
++  case LoongArch::fixup_loongarch_abs_lo12:
++  case LoongArch::fixup_loongarch_tls_le_lo12:
++    return Value & 0xfff;
++  case LoongArch::fixup_loongarch_abs64_lo20:
++  case LoongArch::fixup_loongarch_tls_le64_lo20:
++    return (Value >> 32) & 0xfffff;
++  case LoongArch::fixup_loongarch_abs64_hi12:
++  case LoongArch::fixup_loongarch_tls_le64_hi12:
++    return (Value >> 52) & 0xfff;
++  }
++}
++
++/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
++/// data fragment, at the offset specified by the fixup and following the
++/// fixup kind as appropriate.
++void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
++                                const MCValue &Target,
++                                MutableArrayRef<char> Data, uint64_t Value,
++                                bool IsResolved,
++                                const MCSubtargetInfo *STI) const {
++  MCFixupKind Kind = Fixup.getKind();
++  if (Kind > FirstLiteralRelocationKind)
++    return;
++
++  MCContext &Ctx = Asm.getContext();
++  MCFixupKindInfo Info = getFixupKindInfo(Kind);
++  if (!Value)
++    return; // Doesn't change encoding.
++  // Apply any target-specific value adjustments.
++  Value = adjustFixupValue(Fixup, Value, Ctx);
++
++  // Shift the value into position.
++  Value <<= Info.TargetOffset;
++
++  unsigned Offset = Fixup.getOffset();
++  unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
++
++  assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
++  // For each byte of the fragment that the fixup touches, mask in the
++  // bits from the fixup value.
++  for (unsigned I = 0; I != NumBytes; ++I) {
++    Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
++  }
++}
++
++Optional<MCFixupKind> LoongArchAsmBackend::getFixupKind(StringRef Name) const {
++  if (STI.getTargetTriple().isOSBinFormatELF()) {
++    auto Type = llvm::StringSwitch<unsigned>(Name)
++#define ELF_RELOC(X, Y) .Case(#X, Y)
++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
++#undef ELF_RELOC
++                    .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE)
++                    .Case("BFD_RELOC_32", ELF::R_LARCH_32)
++                    .Case("BFD_RELOC_64", ELF::R_LARCH_64)
++                    .Default(-1u);
++    if (Type != -1u)
++      return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
++  }
++  return None;
++}
++
++const MCFixupKindInfo &LoongArchAsmBackend::
++getFixupKindInfo(MCFixupKind Kind) const {
++  const static MCFixupKindInfo Infos[] = {
++      // This table *must* be in same the order of fixup_* kinds in
++      // LoongArchFixupKinds.h.
++      //
++      // name                    offset  bits  flags
++      {"fixup_LARCH_NONE", 0, 0, 0},
++      {"fixup_LARCH_32", 0, 0, 0},
++      {"fixup_LARCH_64", 0, 0, 0},
++      {"fixup_LARCH_RELATIVE", 0, 0, 0},
++      {"fixup_LARCH_COPY", 0, 0, 0},
++      {"fixup_LARCH_JUMP_SLOT", 0, 0, 0},
++      {"fixup_LARCH_TLS_DTPMOD32", 0, 0, 0},
++      {"fixup_LARCH_TLS_DTPMOD64", 0, 0, 0},
++      {"fixup_LARCH_TLS_DTPREL32", 0, 0, 0},
++      {"fixup_LARCH_TLS_DTPREL64", 0, 0, 0},
++      {"fixup_LARCH_TLS_TPREL32", 0, 0, 0},
++      {"fixup_LARCH_TLS_TPREL64", 0, 0, 0},
++      {"fixup_LARCH_IRELATIVE", 0, 0, 0},
++      {"fixup_LARCH_MARK_LA", 0, 0, 0},
++      {"fixup_LARCH_MARK_PCREL", 0, 0, 0},
++      {"fixup_LARCH_ADD8", 0, 0, 0},
++      {"fixup_LARCH_ADD16", 0, 0, 0},
++      {"fixup_LARCH_ADD24", 0, 0, 0},
++      {"fixup_LARCH_ADD32", 0, 0, 0},
++      {"fixup_LARCH_ADD64", 0, 0, 0},
++      {"fixup_LARCH_SUB8", 0, 0, 0},
++      {"fixup_LARCH_SUB16", 0, 0, 0},
++      {"fixup_LARCH_SUB24", 0, 0, 0},
++      {"fixup_LARCH_SUB32", 0, 0, 0},
++      {"fixup_LARCH_SUB64", 0, 0, 0},
++      {"fixup_LARCH_GNU_VTINHERIT", 0, 0, 0},
++      {"fixup_LARCH_GNU_VTENTRY", 0, 0, 0},
++      {"fixup_loongarch_b16", 10, 16, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_b21", 0, 26, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_b26", 0, 26, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_abs_hi20", 5, 20, 0},
++      {"fixup_loongarch_abs_lo12", 10, 12, 0},
++      {"fixup_loongarch_abs64_lo20", 5, 20, 0},
++      {"fixup_loongarch_abs64_hi12", 10, 12, 0},
++      {"fixup_loongarch_pcala_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_pcala_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_pcala64_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_pcala64_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_got_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_got_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_got64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_got64_pc_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_got_hi20", 5, 20, 0},
++      {"fixup_loongarch_got_lo12", 10, 12, 0},
++      {"fixup_loongarch_got64_lo20", 5, 20, 0},
++      {"fixup_loongarch_got64_hi12", 10, 12, 0},
++      {"fixup_loongarch_tls_le_hi20", 5, 20, 0},
++      {"fixup_loongarch_tls_le_lo12", 10, 12, 0},
++      {"fixup_loongarch_tls_le64_lo20", 5, 20, 0},
++      {"fixup_loongarch_tls_le64_hi12", 10, 12, 0},
++      {"fixup_loongarch_tls_ie_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_ie_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_ie64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_ie64_pc_hi12", 10, 12,
++       MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_ie_hi20", 5, 20, 0},
++      {"fixup_loongarch_tls_ie_lo12", 10, 12, 0},
++      {"fixup_loongarch_tls_ie64_lo20", 5, 20, 0},
++      {"fixup_loongarch_tls_ie64_hi12", 10, 12, 0},
++      {"fixup_loongarch_tls_ld_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_ld_hi20", 5, 20, 0},
++      {"fixup_loongarch_tls_gd_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_loongarch_tls_gd_hi20", 5, 20, 0},
++  };
++
++  static_assert((array_lengthof(Infos)) == LoongArch::NumTargetFixupKinds,
++                "Not all fixup kinds added to Infos array");
++
++  // Fixup kinds from .reloc directive are like R_LARCH_NONE. They
++  // do not require any extra processing.
++  if (Kind >= FirstLiteralRelocationKind)
++    return MCAsmBackend::getFixupKindInfo(FK_NONE);
++
++  if (Kind < FirstTargetFixupKind)
++    return MCAsmBackend::getFixupKindInfo(Kind);
++
++  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
++         "Invalid kind!");
++
++  return Infos[Kind - FirstTargetFixupKind];
++}
++
++/// WriteNopData - Write an (optimal) nop sequence of Count bytes
++/// to the given output. If the target cannot generate such a sequence,
++/// it should return an error.
++///
++/// \return - True on success.
++bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
++                                       const MCSubtargetInfo *STI) const {
++  // Check for a less than instruction size number of bytes
++  if ((Count % 4) != 0)
++    return false;
++
++  // The nop on LoongArch is andi r0, r0, 0.
++  for (; Count >= 4; Count -= 4)
++    support::endian::write<uint32_t>(OS, 0x03400000, support::little);
++
++  return true;
++}
++
++bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
++                                           const MCFixup &Fixup,
++                                           const MCValue &Target) {
++  if (Fixup.getKind() >= FirstLiteralRelocationKind)
++    return true;
++  const unsigned FixupKind = Fixup.getKind();
++  switch (FixupKind) {
++  default:
++    return false;
++  // All these relocations require special processing
++  // at linking time. Delegate this work to a linker.
++  case FK_Data_1:
++  case FK_Data_2:
++  case FK_Data_4:
++  case FK_Data_8:
++    return !Target.isAbsolute();
++  // These relocations require special processing at linking time.
++  case LoongArch::fixup_loongarch_pcala_hi20:
++  case LoongArch::fixup_loongarch_pcala_lo12:
++  case LoongArch::fixup_loongarch_pcala64_lo20:
++  case LoongArch::fixup_loongarch_pcala64_hi12:
++  case LoongArch::fixup_loongarch_got_pc_hi20:
++  case LoongArch::fixup_loongarch_got_pc_lo12:
++  case LoongArch::fixup_loongarch_got64_pc_lo20:
++  case LoongArch::fixup_loongarch_got64_pc_hi12:
++  case LoongArch::fixup_loongarch_got_hi20:
++  case LoongArch::fixup_loongarch_got_lo12:
++  case LoongArch::fixup_loongarch_got64_lo20:
++  case LoongArch::fixup_loongarch_got64_hi12:
++  case LoongArch::fixup_loongarch_tls_ld_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_ld_hi20:
++  case LoongArch::fixup_loongarch_tls_gd_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_gd_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_pc_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_pc_lo12:
++  case LoongArch::fixup_loongarch_tls_ie64_pc_lo20:
++  case LoongArch::fixup_loongarch_tls_ie64_pc_hi12:
++  case LoongArch::fixup_loongarch_tls_ie_hi20:
++  case LoongArch::fixup_loongarch_tls_ie_lo12:
++  case LoongArch::fixup_loongarch_tls_ie64_lo20:
++  case LoongArch::fixup_loongarch_tls_ie64_hi12:
++    return true;
++  }
++}
++
++MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T,
++                                              const MCSubtargetInfo &STI,
++                                              const MCRegisterInfo &MRI,
++                                              const MCTargetOptions &Options) {
++  LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(
++                           STI.getTargetTriple(), STI.getCPU(), Options);
++  return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(),
++                                 STI.getCPU());
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+new file mode 100644
+index 00000000..db0fbb19
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+@@ -0,0 +1,91 @@
++//===-- LoongArchAsmBackend.h - LoongArch Asm Backend  ------------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the LoongArchAsmBackend class.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
++
++#include "MCTargetDesc/LoongArchFixupKinds.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/MC/MCAsmBackend.h"
++
++namespace llvm {
++
++class MCAssembler;
++struct MCFixupKindInfo;
++class MCObjectWriter;
++class MCRegisterInfo;
++class MCSymbolELF;
++class Target;
++
++class LoongArchAsmBackend : public MCAsmBackend {
++  const MCSubtargetInfo &STI;
++  Triple TheTriple;
++
++public:
++  LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T,
++                      const MCRegisterInfo &MRI, const Triple &TT,
++                      StringRef CPU)
++      : MCAsmBackend(support::little), STI(STI), TheTriple(TT) {
++    assert(TT.isLittleEndian());
++  }
++
++  std::unique_ptr<MCObjectTargetWriter>
++  createObjectTargetWriter() const override;
++
++  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
++                  const MCValue &Target, MutableArrayRef<char> Data,
++                  uint64_t Value, bool IsResolved,
++                  const MCSubtargetInfo *STI) const override;
++
++  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
++  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
++
++  unsigned getNumFixupKinds() const override {
++    return LoongArch::NumTargetFixupKinds;
++  }
++
++  /// @name Target Relaxation Interfaces
++  /// @{
++
++  /// MayNeedRelaxation - Check whether the given instruction may need
++  /// relaxation.
++  ///
++  /// \param Inst - The instruction to test.
++  bool mayNeedRelaxation(const MCInst &Inst,
++                         const MCSubtargetInfo &STI) const override {
++    return false;
++  }
++
++  /// fixupNeedsRelaxation - Target specific predicate for whether a given
++  /// fixup requires the associated instruction to be relaxed.
++   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
++                             const MCRelaxableFragment *DF,
++                             const MCAsmLayout &Layout) const override {
++    // FIXME.
++    llvm_unreachable("RelaxInstruction() unimplemented");
++    return false;
++  }
++
++  /// @}
++
++  bool writeNopData(raw_ostream &OS, uint64_t Count,
++                    const MCSubtargetInfo *STI) const override;
++
++  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
++                             const MCValue &Target) override;
++
++}; // class LoongArchAsmBackend
++
++} // namespace
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+new file mode 100644
+index 00000000..707333c1
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+@@ -0,0 +1,128 @@
++//===-- LoongArchBaseInfo.h - Top level definitions for LoongArch MC ------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains small standalone helper functions and enum definitions for
++// the LoongArch target useful for the compiler back-end and the MC libraries.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
++
++#include "LoongArchFixupKinds.h"
++#include "LoongArchMCTargetDesc.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/Support/DataTypes.h"
++#include "llvm/Support/ErrorHandling.h"
++
++namespace llvm {
++
++/// LoongArchII - This namespace holds all of the target specific flags that
++/// instruction info tracks.
++///
++namespace LoongArchII {
++  /// Target Operand Flag enum.
++  enum TOF {
++    //===------------------------------------------------------------------===//
++    // LoongArch Specific MachineOperand flags.
++
++    MO_NO_FLAG,
++
++    /// MO_ABS_XXX - Represents the hi or low part of an absolute symbol
++    /// address.
++    MO_ABS_HI,
++    MO_ABS_LO,
++    MO_ABS_HIGHER,
++    MO_ABS_HIGHEST,
++
++    /// MO_PCREL_XXX - Represents the hi or low part of an pc relative symbol
++    /// address.
++    MO_PCREL_HI,
++    MO_PCREL_LO,
++    // with tmp reg
++    MO_PCREL_RRHI,
++    MO_PCREL_RRLO,
++    MO_PCREL_RRHIGHER,
++    MO_PCREL_RRHIGHEST,
++
++    // LArch Tls gd and ld
++    MO_TLSGD_HI,
++    MO_TLSGD_LO,
++    // with tmp reg
++    MO_TLSGD_RRHI,
++    MO_TLSGD_RRLO,
++    MO_TLSGD_RRHIGHER,
++    MO_TLSGD_RRHIGHEST,
++
++    // LArch thread tprel (ie/le)
++    // LArch Tls ie
++    MO_TLSIE_HI,
++    MO_TLSIE_LO,
++    // with tmp reg
++    MO_TLSIE_RRHI,
++    MO_TLSIE_RRLO,
++    MO_TLSIE_RRHIGHER,
++    MO_TLSIE_RRHIGHEST,
++    // LArch Tls le
++    MO_TLSLE_HI,
++    MO_TLSLE_LO,
++    MO_TLSLE_HIGHER,
++    MO_TLSLE_HIGHEST,
++
++    // Loongarch got
++    MO_GOT_HI,
++    MO_GOT_LO,
++    // with tmp reg
++    MO_GOT_RRHI,
++    MO_GOT_RRLO,
++    MO_GOT_RRHIGHER,
++    MO_GOT_RRHIGHEST,
++
++    MO_CALL_HI,
++    MO_CALL_LO,
++  };
++
++  enum {
++    //===------------------------------------------------------------------===//
++    // Instruction encodings.  These are the standard/most common forms for
++    // LoongArch instructions.
++    //
++
++    // Pseudo - This represents an instruction that is a pseudo instruction
++    // or one that has not been implemented yet.  It is illegal to code generate
++    // it, but tolerated for intermediate implementation stages.
++    Pseudo   = 0,
++
++    /// FrmR - This form is for instructions of the format R.
++    FrmR  = 1,
++    /// FrmI - This form is for instructions of the format I.
++    FrmI  = 2,
++    /// FrmJ - This form is for instructions of the format J.
++    FrmJ  = 3,
++    /// FrmFR - This form is for instructions of the format FR.
++    FrmFR = 4,
++    /// FrmFI - This form is for instructions of the format FI.
++    FrmFI = 5,
++    /// FrmOther - This form is for instructions that have no specific format.
++    FrmOther = 6,
++
++    FormMask = 15,
++    /// IsCTI - Instruction is a Control Transfer Instruction.
++    IsCTI = 1 << 4,
++    /// HasForbiddenSlot - Instruction has a forbidden slot.
++    HasForbiddenSlot = 1 << 5,
++    /// IsPCRelativeLoad - A Load instruction with implicit source register
++    ///                    ($pc) with explicit offset and destination register
++    IsPCRelativeLoad = 1 << 6,
++    /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
++    HasFCCRegOperand = 1 << 7
++
++  };
++}
++}
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+new file mode 100644
+index 00000000..3f5b115e
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+@@ -0,0 +1,213 @@
++//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer -------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/LoongArchFixupKinds.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCELFObjectWriter.h"
++#include "llvm/MC/MCFixup.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include <algorithm>
++#include <cassert>
++#include <cstdint>
++#include <iterator>
++#include <list>
++#include <utility>
++
++#define DEBUG_TYPE "loongarch-elf-object-writer"
++
++using namespace llvm;
++
++namespace {
++
++class LoongArchELFObjectWriter : public MCELFObjectTargetWriter {
++public:
++  LoongArchELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64);
++
++  ~LoongArchELFObjectWriter() override = default;
++
++  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
++                        const MCFixup &Fixup, bool IsPCRel) const override;
++  bool needsRelocateWithSymbol(const MCSymbol &Sym,
++                               unsigned Type) const override {
++    return true;
++  }
++};
++
++} // end anonymous namespace
++
++LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI,
++                                         bool HasRelocationAddend, bool Is64)
++    : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_LOONGARCH, HasRelocationAddend) {}
++
++unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
++                                           const MCValue &Target,
++                                           const MCFixup &Fixup,
++                                           bool IsPCRel) const {
++  // Determine the type of the relocation.
++  ///XXX:Reloc
++  unsigned Kind = (unsigned)Fixup.getKind();
++  const MCExpr *Expr = Fixup.getValue();
++
++  if (Kind >= FirstLiteralRelocationKind)
++    return Kind - FirstLiteralRelocationKind;
++
++  switch (Kind) {
++    default:
++      return ELF::R_LARCH_NONE;
++      //llvm_unreachable("invalid fixup kind!");
++    case FK_Data_4:
++    case LoongArch::fixup_LARCH_32:
++      if (Expr->getKind() == MCExpr::Target &&
++          cast<LoongArchMCExpr>(Expr)->getKind() ==
++              LoongArchMCExpr::MEK_32_PCREL)
++        return ELF::R_LARCH_32_PCREL;
++      return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32;
++    case FK_GPRel_4:
++    case FK_Data_8:
++    case LoongArch::fixup_LARCH_64:
++      return ELF::R_LARCH_64;
++    case LoongArch::fixup_LARCH_NONE:
++      return ELF::R_LARCH_NONE;
++    case LoongArch::fixup_LARCH_RELATIVE:
++      return ELF::R_LARCH_RELATIVE;
++    case LoongArch::fixup_LARCH_COPY:
++      return ELF::R_LARCH_COPY;
++    case LoongArch::fixup_LARCH_JUMP_SLOT:
++      return ELF::R_LARCH_JUMP_SLOT;
++    case LoongArch::fixup_LARCH_TLS_DTPMOD32:
++      return ELF::R_LARCH_TLS_DTPMOD32;
++    case LoongArch::fixup_LARCH_TLS_DTPMOD64:
++      return ELF::R_LARCH_TLS_DTPMOD64;
++    case LoongArch::fixup_LARCH_TLS_DTPREL32:
++      return ELF::R_LARCH_TLS_DTPREL32;
++    case LoongArch::fixup_LARCH_TLS_DTPREL64:
++      return ELF::R_LARCH_TLS_DTPREL64;
++    case LoongArch::fixup_LARCH_TLS_TPREL32:
++      return ELF::R_LARCH_TLS_TPREL32;
++    case LoongArch::fixup_LARCH_TLS_TPREL64:
++      return ELF::R_LARCH_TLS_TPREL64;
++    case LoongArch::fixup_LARCH_IRELATIVE:
++      return ELF::R_LARCH_IRELATIVE;
++    case LoongArch::fixup_LARCH_MARK_LA:
++      return ELF::R_LARCH_MARK_LA;
++    case LoongArch::fixup_LARCH_MARK_PCREL:
++      return ELF::R_LARCH_MARK_PCREL;
++    case LoongArch::fixup_LARCH_ADD8:
++      return ELF::R_LARCH_ADD8;
++    case LoongArch::fixup_LARCH_ADD16:
++      return ELF::R_LARCH_ADD16;
++    case LoongArch::fixup_LARCH_ADD32:
++      return ELF::R_LARCH_ADD32;
++    case LoongArch::fixup_LARCH_ADD64:
++      return ELF::R_LARCH_ADD64;
++    case LoongArch::fixup_LARCH_SUB8:
++      return ELF::R_LARCH_SUB8;
++    case LoongArch::fixup_LARCH_SUB16:
++      return ELF::R_LARCH_SUB16;
++    case LoongArch::fixup_LARCH_SUB24:
++      return ELF::R_LARCH_SUB24;
++    case LoongArch::fixup_LARCH_SUB32:
++      return ELF::R_LARCH_SUB32;
++    case LoongArch::fixup_LARCH_SUB64:
++      return ELF::R_LARCH_SUB64;
++    case LoongArch::fixup_LARCH_GNU_VTINHERIT:
++      return ELF::R_LARCH_GNU_VTINHERIT;
++    case LoongArch::fixup_LARCH_GNU_VTENTRY:
++      return ELF::R_LARCH_GNU_VTENTRY;
++    case LoongArch::fixup_loongarch_b16:
++      return ELF::R_LARCH_B16;
++    case LoongArch::fixup_loongarch_b21:
++      return ELF::R_LARCH_B21;
++    case LoongArch::fixup_loongarch_b26:
++      return ELF::R_LARCH_B26;
++    case LoongArch::fixup_loongarch_abs_hi20:
++      return ELF::R_LARCH_ABS_HI20;
++    case LoongArch::fixup_loongarch_abs_lo12:
++      return ELF::R_LARCH_ABS_LO12;
++    case LoongArch::fixup_loongarch_abs64_lo20:
++      return ELF::R_LARCH_ABS64_LO20;
++    case LoongArch::fixup_loongarch_abs64_hi12:
++      return ELF::R_LARCH_ABS64_HI12;
++    case LoongArch::fixup_loongarch_pcala_hi20:
++      return ELF::R_LARCH_PCALA_HI20;
++    case LoongArch::fixup_loongarch_pcala_lo12:
++      return ELF::R_LARCH_PCALA_LO12;
++    case LoongArch::fixup_loongarch_pcala64_lo20:
++      return ELF::R_LARCH_PCALA64_LO20;
++    case LoongArch::fixup_loongarch_pcala64_hi12:
++      return ELF::R_LARCH_PCALA64_HI12;
++    case LoongArch::fixup_loongarch_got_pc_hi20:
++      return ELF::R_LARCH_GOT_PC_HI20;
++    case LoongArch::fixup_loongarch_got_pc_lo12:
++      return ELF::R_LARCH_GOT_PC_LO12;
++    case LoongArch::fixup_loongarch_got64_pc_lo20:
++      return ELF::R_LARCH_GOT64_PC_LO20;
++    case LoongArch::fixup_loongarch_got64_pc_hi12:
++      return ELF::R_LARCH_GOT64_PC_HI12;
++    case LoongArch::fixup_loongarch_got_hi20:
++      return ELF::R_LARCH_GOT_HI20;
++    case LoongArch::fixup_loongarch_got_lo12:
++      return ELF::R_LARCH_GOT_LO12;
++    case LoongArch::fixup_loongarch_got64_lo20:
++      return ELF::R_LARCH_GOT64_LO20;
++    case LoongArch::fixup_loongarch_got64_hi12:
++      return ELF::R_LARCH_GOT64_HI12;
++    case LoongArch::fixup_loongarch_tls_le_hi20:
++      return ELF::R_LARCH_TLS_LE_HI20;
++    case LoongArch::fixup_loongarch_tls_le_lo12:
++      return ELF::R_LARCH_TLS_LE_LO12;
++    case LoongArch::fixup_loongarch_tls_le64_lo20:
++      return ELF::R_LARCH_TLS_LE64_LO20;
++    case LoongArch::fixup_loongarch_tls_le64_hi12:
++      return ELF::R_LARCH_TLS_LE64_HI12;
++    case LoongArch::fixup_loongarch_tls_ie_pc_hi20:
++      return ELF::R_LARCH_TLS_IE_PC_HI20;
++    case LoongArch::fixup_loongarch_tls_ie_pc_lo12:
++      return ELF::R_LARCH_TLS_IE_PC_LO12;
++    case LoongArch::fixup_loongarch_tls_ie64_pc_lo20:
++      return ELF::R_LARCH_TLS_IE64_PC_LO20;
++    case LoongArch::fixup_loongarch_tls_ie64_pc_hi12:
++      return ELF::R_LARCH_TLS_IE64_PC_HI12;
++    case LoongArch::fixup_loongarch_tls_ie_hi20:
++      return ELF::R_LARCH_TLS_IE_HI20;
++    case LoongArch::fixup_loongarch_tls_ie_lo12:
++      return ELF::R_LARCH_TLS_IE_LO12;
++    case LoongArch::fixup_loongarch_tls_ie64_lo20:
++      return ELF::R_LARCH_TLS_IE64_LO20;
++    case LoongArch::fixup_loongarch_tls_ie64_hi12:
++      return ELF::R_LARCH_TLS_IE64_HI12;
++    case LoongArch::fixup_loongarch_tls_ld_pc_hi20:
++      return ELF::R_LARCH_TLS_LD_PC_HI20;
++    case LoongArch::fixup_loongarch_tls_ld_hi20:
++      return ELF::R_LARCH_TLS_LD_HI20;
++    case LoongArch::fixup_loongarch_tls_gd_pc_hi20:
++      return ELF::R_LARCH_TLS_GD_PC_HI20;
++    case LoongArch::fixup_loongarch_tls_gd_hi20:
++      return ELF::R_LARCH_TLS_GD_HI20;
++  }
++}
++
++std::unique_ptr<MCObjectTargetWriter>
++llvm::createLoongArchELFObjectWriter(const Triple &TT) {
++  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
++  bool IsLP64 = TT.isArch64Bit();
++  bool HasRelocationAddend = TT.isArch64Bit();
++  return std::make_unique<LoongArchELFObjectWriter>(OSABI, HasRelocationAddend,
++                                                IsLP64);
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+new file mode 100644
+index 00000000..39fc4d77
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+@@ -0,0 +1,131 @@
++//===-------- LoongArchELFStreamer.cpp - ELF Object Output ---------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchELFStreamer.h"
++#include "LoongArchFixupKinds.h"
++#include "LoongArchTargetStreamer.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCCodeEmitter.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDwarf.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/Support/Casting.h"
++
++using namespace llvm;
++
++static std::pair<unsigned, unsigned> getRelocPairForSize(unsigned Size) {
++  switch (Size) {
++  default:
++    llvm_unreachable("unsupported fixup size");
++  case 1:
++    return std::make_pair(LoongArch::fixup_LARCH_ADD8,
++                          LoongArch::fixup_LARCH_SUB8);
++  case 2:
++    return std::make_pair(LoongArch::fixup_LARCH_ADD16,
++                          LoongArch::fixup_LARCH_SUB16);
++  case 4:
++    return std::make_pair(LoongArch::fixup_LARCH_ADD32,
++                          LoongArch::fixup_LARCH_SUB32);
++  case 8:
++    return std::make_pair(LoongArch::fixup_LARCH_ADD64,
++                          LoongArch::fixup_LARCH_SUB64);
++  }
++}
++
++static bool requiresFixups(MCContext &C, const MCExpr *Value,
++                           const MCExpr *&LHS, const MCExpr *&RHS) {
++  const auto *MBE = dyn_cast<MCBinaryExpr>(Value);
++  if (MBE == nullptr)
++    return false;
++
++  MCValue E;
++  if (!Value->evaluateAsRelocatable(E, nullptr, nullptr))
++    return false;
++  if (E.getSymA() == nullptr || E.getSymB() == nullptr)
++    return false;
++
++  const auto &A = E.getSymA()->getSymbol();
++  const auto &B = E.getSymB()->getSymbol();
++
++  if (A.getName().empty() && B.getName().empty())
++    return false;
++
++  if (!A.isInSection() && !B.isInSection() &&
++      !A.getName().empty() && !B.getName().empty())
++    return false;
++
++  LHS =
++      MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C),
++                           MCConstantExpr::create(E.getConstant(), C), C);
++  RHS = E.getSymB();
++
++  return (A.isInSection() ? true : !A.getName().empty()) ||
++         (B.isInSection() ? B.getSection().hasInstructions()
++                          : !B.getName().empty());
++}
++
++
++LoongArchELFStreamer::LoongArchELFStreamer(MCContext &Context,
++                                 std::unique_ptr<MCAsmBackend> MAB,
++                                 std::unique_ptr<MCObjectWriter> OW,
++                                 std::unique_ptr<MCCodeEmitter> Emitter)
++    : MCELFStreamer(Context, std::move(MAB), std::move(OW),
++                    std::move(Emitter)) {
++  }
++
++void LoongArchELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
++  Frame.Begin = getContext().createTempSymbol();
++  MCELFStreamer::emitLabel(Frame.Begin);
++}
++
++MCSymbol *LoongArchELFStreamer::emitCFILabel() {
++  MCSymbol *Label = getContext().createTempSymbol("cfi", true);
++  MCELFStreamer::emitLabel(Label);
++  return Label;
++}
++
++void LoongArchELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
++  Frame.End = getContext().createTempSymbol();
++  MCELFStreamer::emitLabel(Frame.End);
++}
++
++void LoongArchELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
++                                    SMLoc Loc) {
++  const MCExpr *A, *B;
++  if (!requiresFixups(getContext(), Value, A, B))
++    return MCELFStreamer::emitValueImpl(Value, Size, Loc);
++
++  MCStreamer::emitValueImpl(Value, Size, Loc);
++
++  MCDataFragment *DF = getOrCreateDataFragment();
++  flushPendingLabels(DF, DF->getContents().size());
++  MCDwarfLineEntry::make(this, getCurrentSectionOnly());
++
++  unsigned Add, Sub;
++  std::tie(Add, Sub) = getRelocPairForSize(Size);
++
++  DF->getFixups().push_back(MCFixup::create(
++      DF->getContents().size(), A, static_cast<MCFixupKind>(Add), Loc));
++  DF->getFixups().push_back(MCFixup::create(
++      DF->getContents().size(), B, static_cast<MCFixupKind>(Sub), Loc));
++
++  DF->getContents().resize(DF->getContents().size() + Size, 0);
++}
++
++MCELFStreamer *llvm::createLoongArchELFStreamer(
++    MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
++    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
++    bool RelaxAll) {
++  return new LoongArchELFStreamer(Context, std::move(MAB), std::move(OW),
++                             std::move(Emitter));
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h
+new file mode 100644
+index 00000000..875cebcb
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h
+@@ -0,0 +1,53 @@
++//===- LoongArchELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This is a custom MCELFStreamer which allows us to insert some hooks before
++// emitting data into an actual object file.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H
++
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include <memory>
++
++namespace llvm {
++
++class MCAsmBackend;
++class MCCodeEmitter;
++class MCContext;
++class MCSubtargetInfo;
++struct MCDwarfFrameInfo;
++
++class LoongArchELFStreamer : public MCELFStreamer {
++
++public:
++  LoongArchELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
++                  std::unique_ptr<MCObjectWriter> OW,
++                  std::unique_ptr<MCCodeEmitter> Emitter);
++
++  /// Overriding these functions allows us to dismiss all labels.
++  void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
++
++  // Overriding these functions allows us to avoid recording of these labels
++  // in emitLabel.
++  void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
++  void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
++  MCSymbol *emitCFILabel() override;
++};
++
++MCELFStreamer *createLoongArchELFStreamer(MCContext &Context,
++                                     std::unique_ptr<MCAsmBackend> MAB,
++                                     std::unique_ptr<MCObjectWriter> OW,
++                                     std::unique_ptr<MCCodeEmitter> Emitter,
++                                     bool RelaxAll);
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
+new file mode 100644
+index 00000000..5ee83c84
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
+@@ -0,0 +1,136 @@
++//===-- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries ----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H
++
++#include "llvm/MC/MCFixup.h"
++
++namespace llvm {
++namespace LoongArch {
++// Although most of the current fixup types reflect a unique relocation
++// one can have multiple fixup types for a given relocation and thus need
++// to be uniquely named.
++//
++// This table *must* be in the same order of
++// MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds]
++// in LoongArchAsmBackend.cpp.
++//
++enum Fixups {
++  // R_LARCH_NONE.
++  fixup_LARCH_NONE = FirstTargetFixupKind,
++
++  // reloc_hint
++  // fixup methods
++  fixup_LARCH_32,
++  fixup_LARCH_64,
++  fixup_LARCH_RELATIVE,
++  fixup_LARCH_COPY,
++  fixup_LARCH_JUMP_SLOT,
++  fixup_LARCH_TLS_DTPMOD32,
++  fixup_LARCH_TLS_DTPMOD64,
++  fixup_LARCH_TLS_DTPREL32,
++  fixup_LARCH_TLS_DTPREL64,
++  fixup_LARCH_TLS_TPREL32,
++  fixup_LARCH_TLS_TPREL64,
++  fixup_LARCH_IRELATIVE,
++  fixup_LARCH_MARK_LA,
++  fixup_LARCH_MARK_PCREL,
++  fixup_LARCH_ADD8,
++  fixup_LARCH_ADD16,
++  fixup_LARCH_ADD24,
++  fixup_LARCH_ADD32,
++  fixup_LARCH_ADD64,
++  fixup_LARCH_SUB8,
++  fixup_LARCH_SUB16,
++  fixup_LARCH_SUB24,
++  fixup_LARCH_SUB32,
++  fixup_LARCH_SUB64,
++  fixup_LARCH_GNU_VTINHERIT,
++  fixup_LARCH_GNU_VTENTRY,
++  // 16-bit fixup corresponding to %b16(foo) for instructions like bne.
++  fixup_loongarch_b16,
++  // 21-bit fixup corresponding to %b21(foo) for instructions like bnez.
++  fixup_loongarch_b21,
++  // 26-bit fixup corresponding to %b26(foo)/%plt(foo) for instructions b/bl.
++  fixup_loongarch_b26,
++  // 20-bit fixup corresponding to %abs_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_abs_hi20,
++  // 12-bit fixup corresponding to %abs_lo12(foo) for instruction ori.
++  fixup_loongarch_abs_lo12,
++  // 20-bit fixup corresponding to %abs64_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_abs64_lo20,
++  // 12-bit fixup corresponding to %abs_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_abs64_hi12,
++  // 20-bit fixup corresponding to %pc_hi20(foo) for instruction pcalau12i.
++  fixup_loongarch_pcala_hi20,
++  // 12-bit fixup corresponding to %pc_lo12(foo) for instructions like addi.w/d.
++  fixup_loongarch_pcala_lo12,
++  // 20-bit fixup corresponding to %pc64_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_pcala64_lo20,
++  // 12-bit fixup corresponding to %pc64_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_pcala64_hi12,
++  // 20-bit fixup corresponding to %got_pc_hi20(foo) for instruction pcalau12i.
++  fixup_loongarch_got_pc_hi20,
++  // 12-bit fixup corresponding to %got_pc_lo12(foo) for instructions
++  // ld.w/ld.d/add.d.
++  fixup_loongarch_got_pc_lo12,
++  // 20-bit fixup corresponding to %got64_pc_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_got64_pc_lo20,
++  // 12-bit fixup corresponding to %got64_pc_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_got64_pc_hi12,
++  // 20-bit fixup corresponding to %got_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_got_hi20,
++  // 12-bit fixup corresponding to %got_lo12(foo) for instruction ori.
++  fixup_loongarch_got_lo12,
++  // 20-bit fixup corresponding to %got64_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_got64_lo20,
++  // 12-bit fixup corresponding to %got64_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_got64_hi12,
++  // 20-bit fixup corresponding to %le_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_tls_le_hi20,
++  // 12-bit fixup corresponding to %le_lo12(foo) for instruction ori.
++  fixup_loongarch_tls_le_lo12,
++  // 20-bit fixup corresponding to %le64_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_tls_le64_lo20,
++  // 12-bit fixup corresponding to %le64_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_tls_le64_hi12,
++  // 20-bit fixup corresponding to %ie_pc_hi20(foo) for instruction pcalau12i.
++  fixup_loongarch_tls_ie_pc_hi20,
++  // 12-bit fixup corresponding to %ie_pc_lo12(foo) for instructions
++  // ld.w/ld.d/add.d.
++  fixup_loongarch_tls_ie_pc_lo12,
++  // 20-bit fixup corresponding to %ie64_pc_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_tls_ie64_pc_lo20,
++  // 12-bit fixup corresponding to %ie64_pc_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_tls_ie64_pc_hi12,
++  // 20-bit fixup corresponding to %ie_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_tls_ie_hi20,
++  // 12-bit fixup corresponding to %ie_lo12(foo) for instruction ori.
++  fixup_loongarch_tls_ie_lo12,
++  // 20-bit fixup corresponding to %ie64_lo20(foo) for instruction lu32i.d.
++  fixup_loongarch_tls_ie64_lo20,
++  // 12-bit fixup corresponding to %ie64_hi12(foo) for instruction lu52i.d.
++  fixup_loongarch_tls_ie64_hi12,
++  // 20-bit fixup corresponding to %ld_pc_hi20(foo) for instruction pcalau12i.
++  fixup_loongarch_tls_ld_pc_hi20,
++  // 20-bit fixup corresponding to %ld_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_tls_ld_hi20,
++  // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i.
++  fixup_loongarch_tls_gd_pc_hi20,
++  // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w.
++  fixup_loongarch_tls_gd_hi20,
++
++  // Marker
++  LastTargetFixupKind,
++  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
++};
++} // namespace LoongArch
++} // namespace llvm
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
+new file mode 100644
+index 00000000..96cbb20c
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
+@@ -0,0 +1,239 @@
++//===-- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to assembly syntax ------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This class prints an LoongArch MCInst to a .s file.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchInstPrinter.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "LoongArchInstrInfo.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "llvm/ADT/StringExtras.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++using namespace llvm;
++
++#define DEBUG_TYPE "asm-printer"
++
++#define PRINT_ALIAS_INSTR
++#include "LoongArchGenAsmWriter.inc"
++
++template<unsigned R>
++static bool isReg(const MCInst &MI, unsigned OpNo) {
++  assert(MI.getOperand(OpNo).isReg() && "Register operand expected.");
++  return MI.getOperand(OpNo).getReg() == R;
++}
++
++const char* LoongArch::LoongArchFCCToString(LoongArch::CondCode CC) {
++  switch (CC) {
++  case FCOND_T:
++  case FCOND_F:   return "caf";
++  case FCOND_OR:
++  case FCOND_UN:  return "cun";
++  case FCOND_UNE:
++  case FCOND_OEQ: return "ceq";
++  case FCOND_ONE:
++  case FCOND_UEQ: return "cueq";
++  case FCOND_UGE:
++  case FCOND_OLT: return "clt";
++  case FCOND_OGE:
++  case FCOND_ULT: return "cult";
++  case FCOND_UGT:
++  case FCOND_OLE: return "cle";
++  case FCOND_OGT:
++  case FCOND_ULE: return "cule";
++  case FCOND_ST:
++  case FCOND_SF:  return "saf";
++  case FCOND_GLE:
++  case FCOND_NGLE:return "sun";
++  case FCOND_SEQ: return "seq";
++  case FCOND_SNE: return "sne";
++  case FCOND_GL:
++  case FCOND_NGL: return "sueq";
++  case FCOND_NLT:
++  case FCOND_LT:  return "slt";
++  case FCOND_GE:
++  case FCOND_NGE: return "sult";
++  case FCOND_NLE:
++  case FCOND_LE:  return "sle";
++  case FCOND_GT:
++  case FCOND_NGT: return "sule";
++  case FCOND_CNE:  return "cne";
++  case FCOND_COR:  return "cor";
++  case FCOND_SOR:  return "sor";
++  case FCOND_CUNE:  return "cune";
++  case FCOND_SUNE:  return "sune";
++  }
++  llvm_unreachable("Impossible condition code!");
++}
++
++void LoongArchInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
++  OS << '$' << StringRef(getRegisterName(RegNo)).lower();
++}
++
++void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address,
++                                     StringRef Annot,
++                                     const MCSubtargetInfo &STI,
++                                     raw_ostream &O) {
++  switch (MI->getOpcode()) {
++  default:
++    break;
++  case LoongArch::PCALAU12I_ri:
++  case LoongArch::LU12I_W_ri:
++    printLoadAddr(MI, O);
++    return;
++  case LoongArch::ADD_D_rrr:
++  case LoongArch::LDX_D_rrr:
++  case LoongArch::ADDI_D_rri:
++  case LoongArch::LD_D_rri:
++  case LoongArch::ORI_rri:
++  case LoongArch::LU32I_D_ri:
++  case LoongArch::LU52I_D_rri:
++    O << "\t# la expanded slot";
++    return;
++  }
++
++  // Try to print any aliases first.
++  if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O))
++    printInstruction(MI, Address, O);
++  printAnnotation(O, Annot);
++}
++
++void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
++                                   raw_ostream &O) {
++  const MCOperand &Op = MI->getOperand(OpNo);
++  if (Op.isReg()) {
++    printRegName(O, Op.getReg());
++    return;
++  }
++
++  if (Op.isImm()) {
++    O << formatImm(Op.getImm());
++    return;
++  }
++
++  assert(Op.isExpr() && "unknown operand kind in printOperand");
++  Op.getExpr()->print(O, &MAI, true);
++}
++
++template <unsigned Bits, unsigned Offset>
++void LoongArchInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
++  const MCOperand &MO = MI->getOperand(opNum);
++  if (MO.isImm()) {
++    uint64_t Imm = MO.getImm();
++    Imm -= Offset;
++    Imm &= (1 << Bits) - 1;
++    Imm += Offset;
++    O << formatImm(Imm);
++    return;
++  }
++
++  printOperand(MI, opNum, O);
++}
++
++void LoongArchInstPrinter::
++printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
++  // Load/Store memory operands -- $reg, imm
++  printOperand(MI, opNum, O);
++  O << ", ";
++  printOperand(MI, opNum+1, O);
++}
++
++void LoongArchInstPrinter::
++printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
++  // when using stack locations for not load/store instructions
++  // print the same way as all normal 3 operand instructions.
++  printOperand(MI, opNum, O);
++  O << ", ";
++  printOperand(MI, opNum+1, O);
++}
++
++void LoongArchInstPrinter::
++printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
++  const MCOperand& MO = MI->getOperand(opNum);
++  O << LoongArchFCCToString((LoongArch::CondCode)MO.getImm());
++}
++
++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI,
++                                 unsigned OpNo, raw_ostream &OS) {
++  OS << "\t" << Str << "\t";
++  if(MI.getOpcode() == LoongArch::JIRL) {
++    printOperand(&MI, OpNo, OS);
++    OS << "@plt";
++  }else
++    printOperand(&MI, OpNo, OS);
++  return true;
++}
++
++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI,
++                                 unsigned OpNo0, unsigned OpNo1,
++                                 raw_ostream &OS) {
++  printAlias(Str, MI, OpNo0, OS);
++  OS << ", ";
++  printOperand(&MI, OpNo1, OS);
++  return true;
++}
++
++bool LoongArchInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
++  switch (MI.getOpcode()) {
++    case LoongArch::OR:
++      // or $r0, $r1, $zero => move $r0, $r1
++      return isReg<LoongArch::ZERO>(MI, 2) && printAlias("move", MI, 0, 1, OS);
++  default: return false;
++  }
++}
++
++void LoongArchInstPrinter::
++printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
++  // - 2 because register List is always first operand of instruction and it is
++  // always followed by memory operand (base + offset).
++  for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
++    if (i != opNum)
++      O << ", ";
++    printRegName(O, MI->getOperand(i).getReg());
++  }
++}
++
++void LoongArchInstPrinter::
++printLoadAddr(const MCInst *MI, raw_ostream &O) {
++  const MCOperand &Op = MI->getOperand(1);
++  const MCExpr *Expr = Op.getExpr();
++  const LoongArchMCExpr *LoongArchExpr = cast<LoongArchMCExpr>(Expr);
++  switch (LoongArchExpr->getKind()) {
++    default:
++      llvm_unreachable("invalid handled!");
++      return;
++    case LoongArchMCExpr::MEK_ABS_HI:
++      O << "\tla.abs\t";
++      break;
++    case LoongArchMCExpr::MEK_GOT_HI:
++      O << "\tla.got\t";
++      break;
++    case LoongArchMCExpr::MEK_PCREL_HI:
++      O << "\tla.pcrel\t";
++      break;
++    case LoongArchMCExpr::MEK_TLSGD_HI:
++      O << "\tla.tls.gd\t";
++      break;
++    case LoongArchMCExpr::MEK_TLSIE_HI:
++      O << "\tla.tls.ie\t";
++      break;
++    case LoongArchMCExpr::MEK_TLSLE_HI:
++      O << "\tla.tls.le\t";
++      break;
++  }
++  printRegName(O, MI->getOperand(0).getReg());
++  O << ", ";
++  Expr->print(O, nullptr);
++  return;
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
+new file mode 100644
+index 00000000..01d6d272
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
+@@ -0,0 +1,118 @@
++//=== LoongArchInstPrinter.h - Convert LoongArch MCInst to assembly syntax -*- C++ -*-==//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This class prints a LoongArch MCInst to a .s file.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H
++#define LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H
++#include "llvm/MC/MCInstPrinter.h"
++
++namespace llvm {
++
++namespace LoongArch {
++// LoongArch Branch Codes
++enum FPBranchCode {
++  BRANCH_F,
++  BRANCH_T,
++  BRANCH_INVALID
++};
++
++// LoongArch Condition Codes
++enum CondCode {
++  FCOND_F = 0x0,
++  FCOND_SF,
++  FCOND_OLT,
++  FCOND_LT,
++  FCOND_OEQ,
++  FCOND_SEQ,
++  FCOND_OLE,
++  FCOND_LE,
++  FCOND_UN,
++  FCOND_NGLE,
++  FCOND_ULT,
++  FCOND_NGE,
++  FCOND_UEQ,
++  FCOND_NGL,
++  FCOND_ULE,
++  FCOND_NGT,
++  FCOND_CNE,
++  FCOND_SNE,
++  FCOND_COR = 0x14,
++  FCOND_SOR = 0x15,
++  FCOND_CUNE = 0x18,
++  FCOND_SUNE = 0x19,
++
++  // To be used with float branch False
++  // This conditions have the same mnemonic as the
++  // above ones, but are used with a branch False;
++  FCOND_T,
++  FCOND_UNE,
++  FCOND_ST,
++  FCOND_UGE,
++  FCOND_NLT,
++  FCOND_UGT,
++  FCOND_NLE,
++  FCOND_OR,
++  FCOND_GLE,
++  FCOND_OGE,
++  FCOND_GE,
++  FCOND_ONE,
++  FCOND_GL,
++  FCOND_OGT,
++  FCOND_GT
++};
++
++const char *LoongArchFCCToString(LoongArch::CondCode CC);
++} // end namespace LoongArch
++
++class LoongArchInstPrinter : public MCInstPrinter {
++public:
++  LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
++                  const MCRegisterInfo &MRI)
++    : MCInstPrinter(MAI, MII, MRI) {}
++
++  // Autogenerated by tblgen.
++  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
++  void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
++  static const char *getRegisterName(unsigned RegNo);
++
++  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
++  void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
++                 const MCSubtargetInfo &STI, raw_ostream &O) override;
++
++  bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
++  void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
++                               unsigned OpIdx, unsigned PrintMethodIdx,
++                               raw_ostream &O);
++
++private:
++  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
++  void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
++                    raw_ostream &O) {
++    printOperand(MI, OpNum, O);
++  }
++  template <unsigned Bits, unsigned Offset = 0>
++  void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
++  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
++  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
++  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
++
++  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
++                  raw_ostream &OS);
++  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0,
++                  unsigned OpNo1, raw_ostream &OS);
++  bool printAlias(const MCInst &MI, raw_ostream &OS);
++  void printSaveRestore(const MCInst *MI, raw_ostream &O);
++  void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
++  void printLoadAddr(const MCInst *MI, raw_ostream &O);
++};
++} // end namespace llvm
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
+new file mode 100644
+index 00000000..81939927
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
+@@ -0,0 +1,59 @@
++//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm Properties ---------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the declarations of the LoongArchMCAsmInfo properties.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMCAsmInfo.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/BinaryFormat/Dwarf.h"
++#include "llvm/MC/MCStreamer.h"
++
++using namespace llvm;
++
++void LoongArchMCAsmInfo::anchor() { }
++
++LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TheTriple,
++                                       const MCTargetOptions &Options) {
++
++  if (TheTriple.isLoongArch64()
++      && TheTriple.getEnvironment() != Triple::GNUABILPX32)
++    CodePointerSize = CalleeSaveStackSlotSize = 8;
++
++  AlignmentIsInBytes          = false;
++  Data16bitsDirective         = "\t.half\t";
++  Data32bitsDirective         = "\t.word\t";
++  Data64bitsDirective         = "\t.dword\t";
++  CommentString               = "#";
++  ZeroDirective               = "\t.space\t";
++  SupportsDebugInformation = true;
++  ExceptionsType = ExceptionHandling::DwarfCFI;
++  DwarfRegNumForCFI = true;
++  //HasLoongArchExpressions = true;
++  UseIntegratedAssembler = true;
++  UsesELFSectionDirectiveForBSS = true;
++}
++
++const MCExpr *
++LoongArchMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding,
++                                        MCStreamer &Streamer) const {
++  if (!(Encoding & dwarf::DW_EH_PE_pcrel))
++    return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer);
++
++  // The default symbol subtraction results in an ADD/SUB relocation pair.
++  // Processing this relocation pair is problematic when linker relaxation is
++  // enabled, so we follow binutils in using the R_LARCH_32_PCREL relocation
++  // for the FDE initial location.
++  MCContext &Ctx = Streamer.getContext();
++  const MCExpr *ME =
++      MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
++  assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding");
++  return LoongArchMCExpr::create(LoongArchMCExpr::MEK_32_PCREL, ME, Ctx);
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
+new file mode 100644
+index 00000000..f8ca6833
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
+@@ -0,0 +1,34 @@
++//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info ------------------------*- C++ -*--===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the declaration of the LoongArchMCAsmInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
++
++#include "llvm/MC/MCAsmInfoELF.h"
++
++namespace llvm {
++class Triple;
++
++class LoongArchMCAsmInfo : public MCAsmInfoELF {
++  void anchor() override;
++
++public:
++  explicit LoongArchMCAsmInfo(const Triple &TheTriple,
++                              const MCTargetOptions &Options);
++
++  const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding,
++                                    MCStreamer &Streamer) const override;
++};
++
++} // namespace llvm
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+new file mode 100644
+index 00000000..32ce6633
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+@@ -0,0 +1,508 @@
++//===-- LoongArchMCCodeEmitter.cpp - Convert LoongArch Code to Machine Code ---------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the LoongArchMCCodeEmitter class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMCCodeEmitter.h"
++#include "MCTargetDesc/LoongArchFixupKinds.h"
++#include "MCTargetDesc/LoongArchMCExpr.h"
++#include "MCTargetDesc/LoongArchMCTargetDesc.h"
++#include "MCTargetDesc/LoongArchInstPrinter.h"
++#include "llvm/ADT/APFloat.h"
++#include "llvm/ADT/APInt.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCFixup.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cassert>
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "mccodeemitter"
++
++#define GET_INSTRMAP_INFO
++#include "LoongArchGenInstrInfo.inc"
++#undef GET_INSTRMAP_INFO
++
++namespace llvm {
++
++MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII,
++                                         const MCRegisterInfo &MRI,
++                                         MCContext &Ctx) {
++  return new LoongArchMCCodeEmitter(MCII, Ctx);
++}
++
++} // end namespace llvm
++
++void LoongArchMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
++  OS << (char)C;
++}
++
++void LoongArchMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
++                                        const MCSubtargetInfo &STI,
++                                        raw_ostream &OS) const {
++  for (unsigned i = 0; i < Size; ++i) {
++    unsigned Shift = i * 8;
++    EmitByte((Val >> Shift) & 0xff, OS);
++  }
++}
++
++/// encodeInstruction - Emit the instruction.
++/// Size the instruction with Desc.getSize().
++void LoongArchMCCodeEmitter::
++encodeInstruction(const MCInst &MI, raw_ostream &OS,
++                  SmallVectorImpl<MCFixup> &Fixups,
++                  const MCSubtargetInfo &STI) const
++{
++  MCInst TmpInst = MI;
++
++  uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
++
++  const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
++
++  // Get byte count of instruction
++  unsigned Size = Desc.getSize();
++  if (!Size)
++    llvm_unreachable("Desc.getSize() returns 0");
++
++  EmitInstruction(Binary, Size, STI, OS);
++}
++
++/// getBranchTargetOpValue - Return binary encoding of the branch
++/// target operand. If the machine operand requires relocation,
++/// record the relocation and return zero.
++unsigned LoongArchMCCodeEmitter::
++getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
++                       SmallVectorImpl<MCFixup> &Fixups,
++                       const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++
++  // If the destination is an immediate, divide by 4.
++  if (MO.isImm()) return MO.getImm() >> 2;
++
++  assert(MO.isExpr() &&
++         "getBranchTargetOpValue expects only expressions or immediates");
++
++  // XXX: brtarget reloc EncoderMethod.
++  const MCExpr *Expr = MO.getExpr();
++  switch (MI.getOpcode()) {
++  default:
++    llvm_unreachable("Unhandled reloc instruction!");
++    break;
++  case LoongArch::BEQZ:
++  case LoongArch::BEQZ32:
++  case LoongArch::BNEZ:
++  case LoongArch::BNEZ32:
++  case LoongArch::BCEQZ:
++  case LoongArch::BCNEZ:
++    Fixups.push_back(
++        MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b21)));
++    break;
++  case LoongArch::BEQ:
++  case LoongArch::BEQ32:
++  case LoongArch::BNE:
++  case LoongArch::BNE32:
++  case LoongArch::BLT:
++  case LoongArch::BLT32:
++  case LoongArch::BGE:
++  case LoongArch::BGE32:
++  case LoongArch::BLTU:
++  case LoongArch::BLTU32:
++  case LoongArch::BGEU:
++  case LoongArch::BGEU32:
++    Fixups.push_back(
++        MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b16)));
++    break;
++  }
++  return 0;
++}
++
++/// getJumpTargetOpValue - Return binary encoding of the jump
++/// target operand. If the machine operand requires relocation,
++/// record the relocation and return zero.
++unsigned LoongArchMCCodeEmitter::
++getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  // If the destination is an immediate, divide by 4.
++  if (MO.isImm()) return MO.getImm()>>2;
++
++  assert(MO.isExpr() &&
++         "getJumpTargetOpValue expects only expressions or an immediate");
++
++  const MCExpr *Expr = MO.getExpr();
++  Fixups.push_back(
++      MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b26)));
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 1;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 2;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 3;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 1;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 2;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo,
++                     SmallVectorImpl<MCFixup> &Fixups,
++                     const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  if (MO.isImm()) {
++    unsigned Value = MO.getImm();
++    return Value >> 3;
++  }
++
++  return 0;
++}
++
++unsigned LoongArchMCCodeEmitter::
++getExprOpValue(const MCInst &MI, const MCExpr *Expr,
++               SmallVectorImpl<MCFixup> &Fixups,
++               const MCSubtargetInfo &STI) const {
++  int64_t Res;
++
++  if (Expr->evaluateAsAbsolute(Res))
++    return Res;
++
++  MCExpr::ExprKind Kind = Expr->getKind();
++  if (Kind == MCExpr::Constant) {
++    return cast<MCConstantExpr>(Expr)->getValue();
++  }
++
++  if (Kind == MCExpr::Binary) {
++    unsigned Res = getExprOpValue(MI, cast<MCBinaryExpr>(Expr)->getLHS(), Fixups, STI);
++    Res += getExprOpValue(MI, cast<MCBinaryExpr>(Expr)->getRHS(), Fixups, STI);
++    return Res;
++  }
++
++  if (Kind == MCExpr::Target) {
++    const LoongArchMCExpr *LoongArchExpr = cast<LoongArchMCExpr>(Expr);
++
++    LoongArch::Fixups FixupKind = LoongArch::Fixups(0);
++    switch (LoongArchExpr->getKind()) {
++    case LoongArchMCExpr::MEK_32_PCREL:
++    case LoongArchMCExpr::MEK_None:
++    case LoongArchMCExpr::MEK_Special:
++      llvm_unreachable("Unhandled fixup kind!");
++      Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind)));
++      break;
++    case LoongArchMCExpr::MEK_PLT:
++      FixupKind = LoongArch::fixup_loongarch_b26;
++      break;
++    case LoongArchMCExpr::MEK_GOT_HI:
++    case LoongArchMCExpr::MEK_GOT_RRHI:
++      FixupKind = LoongArch::fixup_loongarch_got_pc_hi20;
++      break;
++    case LoongArchMCExpr::MEK_GOT_LO:
++    case LoongArchMCExpr::MEK_GOT_RRLO:
++    case LoongArchMCExpr::MEK_TLSGD_LO:
++    case LoongArchMCExpr::MEK_TLSGD_RRLO:
++      FixupKind = LoongArch::fixup_loongarch_got_pc_lo12;
++      break;
++    case LoongArchMCExpr::MEK_GOT_RRHIGHER:
++    case LoongArchMCExpr::MEK_TLSGD_RRHIGHER:
++      FixupKind = LoongArch::fixup_loongarch_got64_pc_lo20;
++      break;
++    case LoongArchMCExpr::MEK_GOT_RRHIGHEST:
++    case LoongArchMCExpr::MEK_TLSGD_RRHIGHEST:
++      FixupKind = LoongArch::fixup_loongarch_got64_pc_hi12;
++      break;
++    case LoongArchMCExpr::MEK_ABS_HI:
++      FixupKind = LoongArch::fixup_loongarch_abs_hi20;
++      break;
++    case LoongArchMCExpr::MEK_ABS_LO:
++      FixupKind = LoongArch::fixup_loongarch_abs_lo12;
++      break;
++    case LoongArchMCExpr::MEK_ABS_HIGHER:
++      FixupKind = LoongArch::fixup_loongarch_abs64_lo20;
++      break;
++    case LoongArchMCExpr::MEK_ABS_HIGHEST:
++      FixupKind = LoongArch::fixup_loongarch_abs64_hi12;
++      break;
++    case LoongArchMCExpr::MEK_PCREL_HI:
++    case LoongArchMCExpr::MEK_PCREL_RRHI:
++      FixupKind = LoongArch::fixup_loongarch_pcala_hi20;
++      break;
++    case LoongArchMCExpr::MEK_PCREL_LO:
++    case LoongArchMCExpr::MEK_PCREL_RRLO:
++      FixupKind = LoongArch::fixup_loongarch_pcala_lo12;
++      break;
++    case LoongArchMCExpr::MEK_PCREL_RRHIGHER:
++      FixupKind = LoongArch::fixup_loongarch_pcala64_lo20;
++      break;
++    case LoongArchMCExpr::MEK_PCREL_RRHIGHEST:
++      FixupKind = LoongArch::fixup_loongarch_pcala64_hi12;
++      break;
++    case LoongArchMCExpr::MEK_TLSGD_HI:
++    case LoongArchMCExpr::MEK_TLSGD_RRHI:
++      FixupKind = LoongArch::fixup_loongarch_tls_gd_pc_hi20;
++      break;
++    case LoongArchMCExpr::MEK_TLSIE_HI:
++    case LoongArchMCExpr::MEK_TLSIE_RRHI:
++      FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_hi20;
++      break;
++    case LoongArchMCExpr::MEK_TLSIE_LO:
++    case LoongArchMCExpr::MEK_TLSIE_RRLO:
++      FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_lo12;
++      break;
++    case LoongArchMCExpr::MEK_TLSIE_RRHIGHER:
++      FixupKind = LoongArch::fixup_loongarch_tls_ie64_lo20;
++      break;
++    case LoongArchMCExpr::MEK_TLSIE_RRHIGHEST:
++      FixupKind = LoongArch::fixup_loongarch_tls_ie64_hi12;
++      break;
++    case LoongArchMCExpr::MEK_TLSLE_HI:
++      FixupKind = LoongArch::fixup_loongarch_tls_le_hi20;
++      break;
++    case LoongArchMCExpr::MEK_TLSLE_LO:
++      FixupKind = LoongArch::fixup_loongarch_tls_le_lo12;
++      break;
++    case LoongArchMCExpr::MEK_TLSLE_HIGHER:
++      FixupKind = LoongArch::fixup_loongarch_tls_le64_lo20;
++      break;
++    case LoongArchMCExpr::MEK_TLSLE_HIGHEST:
++      FixupKind = LoongArch::fixup_loongarch_tls_le64_hi12;
++      break;
++    }
++    Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind)));
++    return 0;
++  }
++
++  if (Kind == MCExpr::SymbolRef) {
++    LoongArch::Fixups FixupKind = LoongArch::Fixups(0);
++
++    switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
++    default: llvm_unreachable("Unknown fixup kind!");
++      break;
++    }
++    Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind)));
++    return 0;
++  }
++  return 0;
++}
++
++/// getMachineOpValue - Return binary encoding of operand. If the machine
++/// operand requires relocation, record the relocation and return zero.
++unsigned LoongArchMCCodeEmitter::
++getMachineOpValue(const MCInst &MI, const MCOperand &MO,
++                  SmallVectorImpl<MCFixup> &Fixups,
++                  const MCSubtargetInfo &STI) const {
++  if (MO.isReg()) {
++    unsigned Reg = MO.getReg();
++    unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
++    return RegNo;
++  } else if (MO.isImm()) {
++    return static_cast<unsigned>(MO.getImm());
++  } else if (MO.isDFPImm()) {
++    return static_cast<unsigned>(bit_cast<double>(MO.getDFPImm()));
++  }
++  // MO must be an Expr.
++  assert(MO.isExpr());
++  return getExprOpValue(MI, MO.getExpr(),Fixups, STI);
++}
++
++/// Return binary encoding of memory related operand.
++/// If the offset operand requires relocation, record the relocation.
++template <unsigned ShiftAmount>
++unsigned LoongArchMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
++                                                SmallVectorImpl<MCFixup> &Fixups,
++                                                const MCSubtargetInfo &STI) const {
++  // Base register is encoded in bits 16-12, offset is encoded in bits 11-0.
++  assert(MI.getOperand(OpNo).isReg());
++  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 12;
++  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
++
++  // Apply the scale factor if there is one.
++  OffBits >>= ShiftAmount;
++
++  return (OffBits & 0xFFF) | RegBits;
++}
++
++unsigned LoongArchMCCodeEmitter::getMemEncoding10l2(const MCInst &MI, unsigned OpNo,
++                                                SmallVectorImpl<MCFixup> &Fixups,
++                                                const MCSubtargetInfo &STI) const {
++  // Base register is encoded in bits 16-12, offset is encoded in bits 11-0.
++  assert(MI.getOperand(OpNo).isReg());
++  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 10;
++  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
++
++  // Apply the scale factor if there is one.
++  OffBits >>= 2;
++
++  return (OffBits & 0x3FF) | RegBits;
++}
++
++unsigned LoongArchMCCodeEmitter::getMemEncoding11l1(const MCInst &MI, unsigned OpNo,
++                                                SmallVectorImpl<MCFixup> &Fixups,
++                                                const MCSubtargetInfo &STI) const {
++  // Base register is encoded in bits 16-12, offset is encoded in bits 11-0.
++  assert(MI.getOperand(OpNo).isReg());
++  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 11;
++  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
++
++  // Apply the scale factor if there is one.
++  OffBits >>= 1;
++
++  return (OffBits & 0x7FF) | RegBits;
++}
++
++unsigned LoongArchMCCodeEmitter::getMemEncoding9l3(const MCInst &MI, unsigned OpNo,
++                                                SmallVectorImpl<MCFixup> &Fixups,
++                                                const MCSubtargetInfo &STI) const {
++  // Base register is encoded in bits 16-12, offset is encoded in bits 11-0.
++  assert(MI.getOperand(OpNo).isReg());
++  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 9;
++  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
++
++  // Apply the scale factor if there is one.
++  OffBits >>= 3;
++
++  return (OffBits & 0x1FF) | RegBits;
++}
++
++/// Return binary encoding of simm14 memory related operand. Such as LL/SC instructions.
++/// If the offset operand requires relocation, record the relocation.
++template <unsigned ShiftAmount>
++unsigned LoongArchMCCodeEmitter::getSimm14MemEncoding(const MCInst &MI, unsigned OpNo,
++                                                      SmallVectorImpl<MCFixup> &Fixups,
++                                                      const MCSubtargetInfo &STI) const {
++  // Base register is encoded in bits 18-14, offset is encoded in bits 13-0.
++  assert(MI.getOperand(OpNo).isReg());
++  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 14;
++  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
++
++  // Apply the scale factor if there is one.
++  OffBits >>= ShiftAmount;
++
++  return (OffBits & 0x3FFF) | RegBits;
++}
++
++unsigned
++LoongArchMCCodeEmitter::getFCMPEncoding(const MCInst &MI, unsigned OpNo,
++                                       SmallVectorImpl<MCFixup> &Fixups,
++                                       const MCSubtargetInfo &STI) const {
++  const MCOperand& MO = MI.getOperand(OpNo);
++  switch((LoongArch::CondCode)MO.getImm()){
++  case LoongArch::FCOND_T:
++    return 0x0;
++  case LoongArch::FCOND_OR:
++    return 0x8;
++  case LoongArch::FCOND_UNE:
++    return 0x4;
++  case LoongArch::FCOND_ONE:
++    return 0xC;
++  case LoongArch::FCOND_UGE:
++    return 0x2;
++  case LoongArch::FCOND_OGE:
++    return 0xA;
++  case LoongArch::FCOND_UGT:
++    return 0x6;
++  case LoongArch::FCOND_OGT:
++    return 0xE;
++  case LoongArch::FCOND_ST:
++    return 0x1;
++  case LoongArch::FCOND_GLE:
++    return 0x9;
++  case LoongArch::FCOND_GL:
++    return 0xD;
++  case LoongArch::FCOND_NLT:
++    return 0x3;
++  case LoongArch::FCOND_GE:
++    return 0xB;
++  case LoongArch::FCOND_NLE:
++    return 0x7;
++  case LoongArch::FCOND_GT:
++    return 0xF;
++  default:
++    return MO.getImm();
++  }
++}
++
++template <unsigned Bits, int Offset>
++unsigned
++LoongArchMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
++                                             SmallVectorImpl<MCFixup> &Fixups,
++                                             const MCSubtargetInfo &STI) const {
++  assert(MI.getOperand(OpNo).isImm());
++  unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
++  Value -= Offset;
++  return Value;
++}
++
++#include "LoongArchGenMCCodeEmitter.inc"
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h
+new file mode 100644
+index 00000000..cb932164
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h
+@@ -0,0 +1,142 @@
++//===- LoongArchMCCodeEmitter.h - Convert LoongArch Code to Machine Code --*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the LoongArchMCCodeEmitter class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H
++
++#include "llvm/MC/MCCodeEmitter.h"
++#include "llvm/ADT/StringRef.h"
++#include <cstdint>
++#include <map>
++
++namespace llvm {
++
++class MCContext;
++class MCExpr;
++class MCFixup;
++class MCInst;
++class MCInstrInfo;
++class MCOperand;
++class MCSubtargetInfo;
++class raw_ostream;
++
++class LoongArchMCCodeEmitter : public MCCodeEmitter {
++  const MCInstrInfo &MCII;
++  MCContext &Ctx;
++
++public:
++  LoongArchMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_)
++      : MCII(mcii), Ctx(Ctx_) {}
++  LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete;
++  LoongArchMCCodeEmitter &operator=(const LoongArchMCCodeEmitter &) = delete;
++  ~LoongArchMCCodeEmitter() override = default;
++
++  void EmitByte(unsigned char C, raw_ostream &OS) const;
++
++  void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
++                       raw_ostream &OS) const;
++
++  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
++                         SmallVectorImpl<MCFixup> &Fixups,
++                         const MCSubtargetInfo &STI) const override;
++
++  // getBinaryCodeForInstr - TableGen'erated function for getting the
++  // binary encoding for an instruction.
++  uint64_t getBinaryCodeForInstr(const MCInst &MI,
++                                 SmallVectorImpl<MCFixup> &Fixups,
++                                 const MCSubtargetInfo &STI) const;
++
++  // getJumpTargetOpValue - Return binary encoding of the jump
++  // target operand. If the machine operand requires relocation,
++  // record the relocation and return zero.
++  unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  // getBranchTargetOpValue - Return binary encoding of the branch
++  // target operand. If the machine operand requires relocation,
++  // record the relocation and return zero.
++  unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                  SmallVectorImpl<MCFixup> &Fixups,
++                                  const MCSubtargetInfo &STI) const;
++
++  // getMachineOpValue - Return binary encoding of operand. If the machin
++  // operand requires relocation, record the relocation and return zero.
++  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
++                             SmallVectorImpl<MCFixup> &Fixups,
++                             const MCSubtargetInfo &STI) const;
++
++  template <unsigned ShiftAmount = 0>
++  unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  unsigned getMemEncoding10l2(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  unsigned getMemEncoding11l1(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  unsigned getMemEncoding9l3(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  template <unsigned ShiftAmount = 0>
++  unsigned getSimm14MemEncoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getFCMPEncoding(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  /// Subtract Offset then encode as a N-bit unsigned integer.
++  template <unsigned Bits, int Offset>
++  unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
++                                     SmallVectorImpl<MCFixup> &Fixups,
++                                     const MCSubtargetInfo &STI) const;
++
++  unsigned getExprOpValue(const MCInst &MI, const MCExpr *Expr,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  unsigned getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+new file mode 100644
+index 00000000..bb842538
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+@@ -0,0 +1,134 @@
++//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes --------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMCExpr.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "loongarchmcexpr"
++
++const LoongArchMCExpr *LoongArchMCExpr::create(LoongArchMCExpr::LoongArchExprKind Kind,
++                                     const MCExpr *Expr, MCContext &Ctx) {
++  return new (Ctx) LoongArchMCExpr(Kind, Expr);
++}
++
++void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
++  int64_t AbsVal;
++  if (Expr->evaluateAsAbsolute(AbsVal))
++    OS << AbsVal;
++  else
++    Expr->print(OS, MAI, true);
++}
++
++bool
++LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
++                                      const MCAsmLayout *Layout,
++                                      const MCFixup *Fixup) const {
++  if (!getSubExpr()->evaluateAsRelocatable(Res, nullptr, nullptr))
++    return false;
++
++  Res =
++      MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
++  // Custom fixup types are not valid with symbol difference expressions.
++  return Res.getSymB() ? getKind() == MEK_None : true;
++}
++
++void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
++  Streamer.visitUsedExpr(*getSubExpr());
++}
++
++static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
++  switch (Expr->getKind()) {
++  case MCExpr::Target:
++    fixELFSymbolsInTLSFixupsImpl(cast<LoongArchMCExpr>(Expr)->getSubExpr(), Asm);
++    break;
++  case MCExpr::Constant:
++    break;
++  case MCExpr::Binary: {
++    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
++    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
++    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
++    break;
++  }
++  case MCExpr::SymbolRef: {
++    // We're known to be under a TLS fixup, so any symbol should be
++    // modified. There should be only one.
++    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
++    cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
++    break;
++  }
++  case MCExpr::Unary:
++    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
++    break;
++  }
++}
++
++void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
++  switch (getKind()) {
++  default:
++    break;
++  case MEK_None:
++  case MEK_Special:
++    llvm_unreachable("MEK_None and MEK_Special are invalid");
++    break;
++  case MEK_CALL_HI:
++  case MEK_CALL_LO:
++  case MEK_GOT_HI:
++  case MEK_GOT_LO:
++  case MEK_GOT_RRHI:
++  case MEK_GOT_RRLO:
++  case MEK_GOT_RRHIGHER:
++  case MEK_GOT_RRHIGHEST:
++  case MEK_ABS_HI:
++  case MEK_ABS_LO:
++  case MEK_ABS_HIGHER:
++  case MEK_ABS_HIGHEST:
++  case MEK_PCREL_HI:
++  case MEK_PCREL_LO:
++  case MEK_PCREL_RRHI:
++  case MEK_PCREL_RRHIGHER:
++  case MEK_PCREL_RRHIGHEST:
++  case MEK_PCREL_RRLO:
++  case MEK_PLT:
++    // If we do have nested target-specific expressions, they will be in
++    // a consecutive chain.
++    if (const LoongArchMCExpr *E = dyn_cast<const LoongArchMCExpr>(getSubExpr()))
++      E->fixELFSymbolsInTLSFixups(Asm);
++    break;
++  case MEK_TLSGD_HI:
++  case MEK_TLSGD_LO:
++  case MEK_TLSGD_RRHI:
++  case MEK_TLSGD_RRHIGHER:
++  case MEK_TLSGD_RRHIGHEST:
++  case MEK_TLSGD_RRLO:
++  case MEK_TLSLE_HI:
++  case MEK_TLSLE_HIGHER:
++  case MEK_TLSLE_HIGHEST:
++  case MEK_TLSLE_LO:
++  case MEK_TLSIE_HI:
++  case MEK_TLSIE_LO:
++  case MEK_TLSIE_RRHI:
++  case MEK_TLSIE_RRHIGHER:
++  case MEK_TLSIE_RRHIGHEST:
++  case MEK_TLSIE_RRLO:
++    fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
++    break;
++  }
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+new file mode 100644
+index 00000000..80592ead
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+@@ -0,0 +1,98 @@
++//===- LoongArchMCExpr.h - LoongArch specific MC expression classes -------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H
++
++#include "llvm/MC/MCAsmLayout.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCValue.h"
++
++namespace llvm {
++
++class LoongArchMCExpr : public MCTargetExpr {
++public:
++  enum LoongArchExprKind {
++    MEK_None,
++    MEK_CALL_HI,
++    MEK_CALL_LO,
++    MEK_GOT_HI,
++    MEK_GOT_LO,
++    MEK_GOT_RRHI,
++    MEK_GOT_RRHIGHER,
++    MEK_GOT_RRHIGHEST,
++    MEK_GOT_RRLO,
++    MEK_ABS_HI,
++    MEK_ABS_HIGHER,
++    MEK_ABS_HIGHEST,
++    MEK_ABS_LO,
++    MEK_PCREL_HI,
++    MEK_PCREL_LO,
++    MEK_PCREL_RRHI,
++    MEK_PCREL_RRHIGHER,
++    MEK_PCREL_RRHIGHEST,
++    MEK_PCREL_RRLO,
++    MEK_TLSLE_HI,
++    MEK_TLSLE_HIGHER,
++    MEK_TLSLE_HIGHEST,
++    MEK_TLSLE_LO,
++    MEK_TLSIE_HI,
++    MEK_TLSIE_LO,
++    MEK_TLSIE_RRHI,
++    MEK_TLSIE_RRHIGHER,
++    MEK_TLSIE_RRHIGHEST,
++    MEK_TLSIE_RRLO,
++    MEK_TLSGD_HI,
++    MEK_TLSGD_LO,
++    MEK_TLSGD_RRHI,
++    MEK_TLSGD_RRHIGHER,
++    MEK_TLSGD_RRHIGHEST,
++    MEK_TLSGD_RRLO,
++    MEK_PLT,
++    MEK_32_PCREL,
++    MEK_Special,
++  };
++
++private:
++  const LoongArchExprKind Kind;
++  const MCExpr *Expr;
++
++  explicit LoongArchMCExpr(LoongArchExprKind Kind, const MCExpr *Expr)
++      : Kind(Kind), Expr(Expr) {}
++
++public:
++  static const LoongArchMCExpr *create(LoongArchExprKind Kind, const MCExpr *Expr,
++                                  MCContext &Ctx);
++  static const LoongArchMCExpr *createGpOff(LoongArchExprKind Kind, const MCExpr *Expr,
++                                       MCContext &Ctx);
++
++  /// Get the kind of this expression.
++  LoongArchExprKind getKind() const { return Kind; }
++
++  /// Get the child of this expression.
++  const MCExpr *getSubExpr() const { return Expr; }
++
++  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
++  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
++                                 const MCFixup *Fixup) const override;
++  void visitUsedExpr(MCStreamer &Streamer) const override;
++
++  MCFragment *findAssociatedFragment() const override {
++    return getSubExpr()->findAssociatedFragment();
++  }
++
++  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
++
++  static bool classof(const MCExpr *E) {
++    return E->getKind() == MCExpr::Target;
++  }
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+new file mode 100644
+index 00000000..3d953d43
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+@@ -0,0 +1,187 @@
++//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions -------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides LoongArch specific target descriptions.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchMCTargetDesc.h"
++#include "LoongArchTargetStreamer.h"
++#include "MCTargetDesc/LoongArchAsmBackend.h"
++#include "MCTargetDesc/LoongArchELFStreamer.h"
++#include "MCTargetDesc/LoongArchInstPrinter.h"
++#include "MCTargetDesc/LoongArchMCAsmInfo.h"
++#include "TargetInfo/LoongArchTargetInfo.h"
++#include "llvm/ADT/Triple.h"
++#include "llvm/MC/MCCodeEmitter.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include "llvm/MC/MCInstrAnalysis.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/MC/MachineLocation.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++#include "llvm/MC/TargetRegistry.h"
++
++using namespace llvm;
++
++#define GET_INSTRINFO_MC_DESC
++#include "LoongArchGenInstrInfo.inc"
++
++#define GET_SUBTARGETINFO_MC_DESC
++#include "LoongArchGenSubtargetInfo.inc"
++
++#define GET_REGINFO_MC_DESC
++#include "LoongArchGenRegisterInfo.inc"
++
++/// Select the LoongArch CPU for the given triple and cpu name.
++/// FIXME: Merge with the copy in LoongArchSubtarget.cpp
++StringRef LoongArch_MC::selectLoongArchCPU(const Triple &TT, StringRef CPU) {
++  if (CPU.empty() || CPU == "generic") {
++    if (TT.isLoongArch32())
++      CPU = "generic-la32";
++    else
++      CPU = "la464";
++  }
++  return CPU;
++}
++
++static MCInstrInfo *createLoongArchMCInstrInfo() {
++  MCInstrInfo *X = new MCInstrInfo();
++  InitLoongArchMCInstrInfo(X);
++  return X;
++}
++
++static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) {
++  MCRegisterInfo *X = new MCRegisterInfo();
++  InitLoongArchMCRegisterInfo(X, LoongArch::RA);
++  return X;
++}
++
++static MCSubtargetInfo *createLoongArchMCSubtargetInfo(const Triple &TT,
++                                                  StringRef CPU, StringRef FS) {
++  CPU = LoongArch_MC::selectLoongArchCPU(TT, CPU);
++  return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
++}
++
++static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI,
++                                           const Triple &TT,
++                                           const MCTargetOptions &Options) {
++  MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT, Options);
++
++  unsigned SP = MRI.getDwarfRegNum(LoongArch::SP, true);
++  MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
++  MAI->addInitialFrameState(Inst);
++
++  return MAI;
++}
++
++static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T,
++                                                   unsigned SyntaxVariant,
++                                                   const MCAsmInfo &MAI,
++                                                   const MCInstrInfo &MII,
++                                                   const MCRegisterInfo &MRI) {
++  return new LoongArchInstPrinter(MAI, MII, MRI);
++}
++
++static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
++                                    std::unique_ptr<MCAsmBackend> &&MAB,
++                                    std::unique_ptr<MCObjectWriter> &&OW,
++                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
++                                    bool RelaxAll) {
++  MCStreamer *S;
++  S = createLoongArchELFStreamer(Context, std::move(MAB), std::move(OW),
++                              std::move(Emitter), RelaxAll);
++  return S;
++}
++
++static MCTargetStreamer *createLoongArchAsmTargetStreamer(MCStreamer &S,
++                                                     formatted_raw_ostream &OS,
++                                                     MCInstPrinter *InstPrint,
++                                                     bool isVerboseAsm) {
++  return new LoongArchTargetAsmStreamer(S, OS);
++}
++
++static MCTargetStreamer *createLoongArchNullTargetStreamer(MCStreamer &S) {
++  return new LoongArchTargetStreamer(S);
++}
++
++static MCTargetStreamer *
++createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
++  return new LoongArchTargetELFStreamer(S, STI);
++}
++
++namespace {
++
++class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
++public:
++  LoongArchMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
++
++  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
++                      uint64_t &Target) const override {
++    unsigned NumOps = Inst.getNumOperands();
++    if (NumOps == 0)
++      return false;
++    if (Info->get(Inst.getOpcode()).isBranch() || Inst.getOpcode() == LoongArch::BL) {
++      // just not jirl
++      Target = Addr + Inst.getOperand(NumOps - 1).getImm();
++      return true;
++    } else {
++      return false;
++    }
++  }
++};
++}
++
++static MCInstrAnalysis *createLoongArchMCInstrAnalysis(const MCInstrInfo *Info) {
++  return new LoongArchMCInstrAnalysis(Info);
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() {
++  for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) {
++    // Register the MC asm info.
++    RegisterMCAsmInfoFn X(*T, createLoongArchMCAsmInfo);
++
++    // Register the MC instruction info.
++    TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo);
++
++    // Register the MC register info.
++    TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo);
++
++    // Register the elf streamer.
++    TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
++
++    // Register the asm target streamer.
++    TargetRegistry::RegisterAsmTargetStreamer(*T, createLoongArchAsmTargetStreamer);
++
++    TargetRegistry::RegisterNullTargetStreamer(*T,
++                                               createLoongArchNullTargetStreamer);
++
++    // Register the MC subtarget info.
++    TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo);
++
++    // Register the MC instruction analyzer.
++    TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchMCInstrAnalysis);
++
++    // Register the MCInstPrinter.
++    TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter);
++
++    TargetRegistry::RegisterObjectTargetStreamer(
++        *T, createLoongArchObjectTargetStreamer);
++
++    // Register the asm backend.
++    TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend);
++  }
++
++  // Register the MC Code Emitter
++  for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()})
++    TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter);
++}
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
+new file mode 100644
+index 00000000..56949ef1
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
+@@ -0,0 +1,68 @@
++//===-- LoongArchMCTargetDesc.h - LoongArch Target Descriptions -----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides LoongArch specific target descriptions.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H
++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H
++
++#include "llvm/Support/DataTypes.h"
++
++#include <memory>
++
++namespace llvm {
++class MCAsmBackend;
++class MCCodeEmitter;
++class MCContext;
++class MCInstrInfo;
++class MCObjectTargetWriter;
++class MCRegisterInfo;
++class MCSubtargetInfo;
++class MCTargetOptions;
++class StringRef;
++class Target;
++class Triple;
++class raw_ostream;
++class raw_pwrite_stream;
++
++Target &getTheLoongArch32Target();
++Target &getTheLoongArch64Target();
++
++MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII,
++                                            const MCRegisterInfo &MRI,
++                                            MCContext &Ctx);
++
++MCAsmBackend *createLoongArchAsmBackend(const Target &T,
++                                        const MCSubtargetInfo &STI,
++                                        const MCRegisterInfo &MRI,
++                                        const MCTargetOptions &Options);
++
++std::unique_ptr<MCObjectTargetWriter>
++createLoongArchELFObjectWriter(const Triple &TT);
++
++namespace LoongArch_MC {
++StringRef selectLoongArchCPU(const Triple &TT, StringRef CPU);
++}
++
++} // End llvm namespace
++
++// Defines symbolic names for LoongArch registers.  This defines a mapping from
++// register name to register number.
++#define GET_REGINFO_ENUM
++#include "LoongArchGenRegisterInfo.inc"
++
++// Defines symbolic names for the LoongArch instructions.
++#define GET_INSTRINFO_ENUM
++#include "LoongArchGenInstrInfo.inc"
++
++#define GET_SUBTARGETINFO_ENUM
++#include "LoongArchGenSubtargetInfo.inc"
++
++#endif
+diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp
+new file mode 100644
+index 00000000..d5b6c95a
+--- /dev/null
++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp
+@@ -0,0 +1,319 @@
++//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods -------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides LoongArch specific target streamer methods.
++//
++//===----------------------------------------------------------------------===//
++
++#include "LoongArchABIInfo.h"
++#include "LoongArchELFStreamer.h"
++#include "LoongArchInstPrinter.h"
++#include "LoongArchMCExpr.h"
++#include "LoongArchMCTargetDesc.h"
++#include "LoongArchTargetObjectFile.h"
++#include "LoongArchTargetStreamer.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++
++using namespace llvm;
++
++namespace {
++static cl::opt<bool> RoundSectionSizes(
++    "loongarch-round-section-sizes", cl::init(false),
++    cl::desc("Round section sizes up to the section alignment"), cl::Hidden);
++} // end anonymous namespace
++
++LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S)
++    : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
++  GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
++}
++void LoongArchTargetStreamer::emitDirectiveOptionPic0() {}
++void LoongArchTargetStreamer::emitDirectiveOptionPic2() {}
++void LoongArchTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
++  forbidModuleDirective();
++}
++void LoongArchTargetStreamer::emitDirectiveSetLoongArch32() { forbidModuleDirective(); }
++void LoongArchTargetStreamer::emitDirectiveSetloongarch64() { forbidModuleDirective(); }
++
++void LoongArchTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
++                               const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1,
++                                MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.addOperand(Op1);
++  TmpInst.addOperand(Op2);
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
++                                 MCOperand Op2, MCOperand Op3, SMLoc IDLoc,
++                                 const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.addOperand(MCOperand::createReg(Reg1));
++  TmpInst.addOperand(Op2);
++  TmpInst.addOperand(Op3);
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1,
++                                SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.addOperand(Op1);
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm,
++                                SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI);
++}
++
++void LoongArchTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1,
++                                SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI);
++}
++
++void LoongArchTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2,
++                                SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createImm(Imm1));
++  TmpInst.addOperand(MCOperand::createImm(Imm2));
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
++                                 MCOperand Op2, SMLoc IDLoc,
++                                 const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.addOperand(MCOperand::createReg(Reg1));
++  TmpInst.addOperand(Op2);
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1,
++                                 unsigned Reg2, SMLoc IDLoc,
++                                 const MCSubtargetInfo *STI) {
++  emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI);
++}
++
++void LoongArchTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0,
++                                      unsigned Reg1, int32_t Imm, SMLoc IDLoc,
++                                      const MCSubtargetInfo *STI) {
++  emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI);
++}
++
++void LoongArchTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0,
++                                   unsigned Reg1, int16_t Imm0, int16_t Imm1,
++                                   int16_t Imm2, SMLoc IDLoc,
++                                   const MCSubtargetInfo *STI) {
++  MCInst TmpInst;
++  TmpInst.setOpcode(Opcode);
++  TmpInst.addOperand(MCOperand::createReg(Reg0));
++  TmpInst.addOperand(MCOperand::createReg(Reg1));
++  TmpInst.addOperand(MCOperand::createImm(Imm0));
++  TmpInst.addOperand(MCOperand::createImm(Imm1));
++  TmpInst.addOperand(MCOperand::createImm(Imm2));
++  TmpInst.setLoc(IDLoc);
++  getStreamer().emitInstruction(TmpInst, *STI);
++}
++
++void LoongArchTargetStreamer::emitAdd(unsigned DstReg, unsigned SrcReg,
++                                  unsigned TrgReg, bool Is64Bit,
++                                  const MCSubtargetInfo *STI) {
++  emitRRR(Is64Bit ? LoongArch::ADD_D : LoongArch::ADD_W, DstReg, SrcReg, TrgReg, SMLoc(),
++          STI);
++}
++
++void LoongArchTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg,
++                                  int16_t ShiftAmount, SMLoc IDLoc,
++                                  const MCSubtargetInfo *STI) {
++  if (ShiftAmount >= 32) {
++    emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI);
++    return;
++  }
++
++  emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount, IDLoc, STI);
++}
++
++void LoongArchTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
++  emitRRI(LoongArch::ANDI, LoongArch::ZERO, LoongArch::ZERO, 0, IDLoc, STI);
++}
++
++LoongArchTargetAsmStreamer::LoongArchTargetAsmStreamer(MCStreamer &S,
++                                             formatted_raw_ostream &OS)
++    : LoongArchTargetStreamer(S), OS(OS) {}
++
++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic0() {
++  OS << "\t.option\tpic0\n";
++}
++
++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic2() {
++  OS << "\t.option\tpic2\n";
++}
++
++void LoongArchTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
++  OS << "\t.set arch=" << Arch << "\n";
++  LoongArchTargetStreamer::emitDirectiveSetArch(Arch);
++}
++
++void LoongArchTargetAsmStreamer::emitDirectiveSetLoongArch32() {
++  //OS << "\t.set\tloongarch32\n";
++  LoongArchTargetStreamer::emitDirectiveSetLoongArch32();
++}
++
++void LoongArchTargetAsmStreamer::emitDirectiveSetloongarch64() {
++  //OS << "\t.set\tloongarch64\n";
++  LoongArchTargetStreamer::emitDirectiveSetloongarch64();
++}
++
++// This part is for ELF object output.
++LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(MCStreamer &S,
++                                             const MCSubtargetInfo &STI)
++    : LoongArchTargetStreamer(S), STI(STI) {
++  MCAssembler &MCA = getStreamer().getAssembler();
++
++  // It's possible that MCObjectFileInfo isn't fully initialized at this point
++  // due to an initialization order problem where LLVMTargetMachine creates the
++  // target streamer before TargetLoweringObjectFile calls
++  // InitializeMCObjectFileInfo. There doesn't seem to be a single place that
++  // covers all cases so this statement covers most cases and direct object
++  // emission must call setPic() once MCObjectFileInfo has been initialized. The
++  // cases we don't handle here are covered by LoongArchAsmPrinter.
++  Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent();
++
++  // FIXME: Fix a dependency issue by instantiating the ABI object to some
++  // default based off the triple. The triple doesn't describe the target
++  // fully, but any external user of the API that uses the MCTargetStreamer
++  // would otherwise crash on assertion failure.
++
++  ABI = LoongArchABIInfo(
++      STI.getTargetTriple().getArch() == Triple::ArchType::loongarch32
++          ? LoongArchABIInfo::ILP32D()
++          : LoongArchABIInfo::LP64D());
++
++}
++
++void LoongArchTargetELFStreamer::emitLabel(MCSymbol *S) {
++  auto *Symbol = cast<MCSymbolELF>(S);
++  getStreamer().getAssembler().registerSymbol(*Symbol);
++  uint8_t Type = Symbol->getType();
++  if (Type != ELF::STT_FUNC)
++    return;
++
++}
++
++void LoongArchTargetELFStreamer::finish() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo();
++
++  // .bss, .text and .data are always at least 16-byte aligned.
++  MCSection &TextSection = *OFI.getTextSection();
++  MCA.registerSection(TextSection);
++  MCSection &DataSection = *OFI.getDataSection();
++  MCA.registerSection(DataSection);
++  MCSection &BSSSection = *OFI.getBSSSection();
++  MCA.registerSection(BSSSection);
++
++  TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment())));
++  DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment())));
++  BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment())));
++
++  if (RoundSectionSizes) {
++    // Make sections sizes a multiple of the alignment. This is useful for
++    // verifying the output of IAS against the output of other assemblers but
++    // it's not necessary to produce a correct object and increases section
++    // size.
++    MCStreamer &OS = getStreamer();
++    for (MCSection &S : MCA) {
++      MCSectionELF &Section = static_cast<MCSectionELF &>(S);
++
++      unsigned Alignment = Section.getAlignment();
++      if (Alignment) {
++        OS.SwitchSection(&Section);
++        if (Section.UseCodeAlign())
++          OS.emitCodeAlignment(Alignment, &STI, Alignment);
++        else
++          OS.emitValueToAlignment(Alignment, 0, 1, Alignment);
++      }
++    }
++  }
++
++  // Update e_header flags. See the FIXME and comment above in
++  // the constructor for a full rundown on this.
++  unsigned EFlags = MCA.getELFHeaderEFlags();
++
++  // ABI
++  // LP64D does not require any ABI bits.
++  if (getABI().IsILP32S())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32S;
++  else if (getABI().IsILP32F())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32F;
++  else if (getABI().IsILP32D())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32D;
++  else if (getABI().IsLP64S())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_LP64S;
++  else if (getABI().IsLP64F())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_LP64F;
++  else if (getABI().IsLP64D())
++    EFlags |= ELF::EF_LARCH_BASE_ABI_LP64D;
++
++  MCA.setELFHeaderEFlags(EFlags);
++}
++
++MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() {
++  return static_cast<MCELFStreamer &>(Streamer);
++}
++
++void LoongArchTargetELFStreamer::emitDirectiveOptionPic0() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  unsigned Flags = MCA.getELFHeaderEFlags();
++  // This option overrides other PIC options like -KPIC.
++  Pic = false;
++  ///XXX:Reloc no this flags
++  //Flags &= ~ELF::EF_LOONGARCH_PIC;
++  MCA.setELFHeaderEFlags(Flags);
++}
++
++void LoongArchTargetELFStreamer::emitDirectiveOptionPic2() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  unsigned Flags = MCA.getELFHeaderEFlags();
++  Pic = true;
++  // NOTE: We are following the GAS behaviour here which means the directive
++  // 'pic2' also sets the CPIC bit in the ELF header. This is different from
++  // what is stated in the SYSV ABI which consider the bits EF_LOONGARCH_PIC and
++  // EF_LOONGARCH_CPIC to be mutually exclusive.
++  ///XXX:Reloc no this flags
++  //Flags |= ELF::EF_LOONGARCH_PIC | ELF::EF_LOONGARCH_CPIC;
++  MCA.setELFHeaderEFlags(Flags);
++}
+diff --git a/lib/Target/LoongArch/TargetInfo/CMakeLists.txt b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt
+new file mode 100644
+index 00000000..f53ddba4
+--- /dev/null
++++ b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt
+@@ -0,0 +1,9 @@
++add_llvm_component_library(LLVMLoongArchInfo
++  LoongArchTargetInfo.cpp
++
++  LINK_COMPONENTS
++  Support
++
++  ADD_TO_COMPONENT
++  LoongArch
++  )
+diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp
+new file mode 100644
+index 00000000..e6b84518
+--- /dev/null
++++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp
+@@ -0,0 +1,34 @@
++//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation -------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#include "TargetInfo/LoongArchTargetInfo.h"
++#include "llvm/MC/TargetRegistry.h"
++using namespace llvm;
++
++Target &llvm::getTheLoongArch32Target() {
++  static Target TheLoongArch32Target;
++  return TheLoongArch32Target;
++}
++
++Target &llvm::getTheLoongArch64Target() {
++  static Target TheLoongArch64Target;
++  return TheLoongArch64Target;
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() {
++#if 0
++  //TODO: support it in futrue
++  RegisterTarget<Triple::loongarch32,
++                 /*HasJIT=*/false>
++      X(getTheLoongArch32Target(), "loongarch32", "LoongArch (32-bit)", "LoongArch");
++#endif
++  RegisterTarget<Triple::loongarch64,
++                 /*HasJIT=*/true>
++      A(getTheLoongArch64Target(), "loongarch64", "LoongArch (64-bit)",
++        "LoongArch");
++}
+diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
+new file mode 100644
+index 00000000..7dce2497
+--- /dev/null
++++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
+@@ -0,0 +1,21 @@
++//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -----------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
++#define LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
++
++namespace llvm {
++
++class Target;
++
++Target &getTheLoongArch32Target();
++Target &getTheLoongArch64Target();
++
++} // namespace llvm
++
++#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
+diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+index 8f94172a..b5de804a 100644
+--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
++++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+@@ -108,6 +108,7 @@ static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;
+ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
+ static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
+ static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
++static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 37;
+ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
+ static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000;
+ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
+@@ -477,6 +478,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
+   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+   bool IsMIPS32 = TargetTriple.isMIPS32();
+   bool IsMIPS64 = TargetTriple.isMIPS64();
++  bool IsLoongArch64 = TargetTriple.isLoongArch64();
+   bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
+   bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
+   bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
+@@ -540,7 +542,9 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
+       Mapping.Offset = kWindowsShadowOffset64;
+     } else if (IsMIPS64)
+       Mapping.Offset = kMIPS64_ShadowOffset64;
+-    else if (IsIOS)
++    else if (IsLoongArch64) {
++      Mapping.Offset = kLoongArch64_ShadowOffset64;
++    } else if (IsIOS)
+       Mapping.Offset = kDynamicShadowSentinel;
+     else if (IsMacOS && IsAArch64)
+       Mapping.Offset = kDynamicShadowSentinel;
+diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+index c51acdf5..606ba036 100644
+--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
++++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+@@ -390,6 +390,14 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+ #endif
+ };
+ 
++// loongarch64 Linux
++static const MemoryMapParams Linux_LOONGARCH64_MemoryMapParams = {
++  0,               // AndMask (not used)
++  0x008000000000,  // XorMask
++  0,               // ShadowBase (not used)
++  0x002000000000,  // OriginBase
++};
++
+ // mips64 Linux
+ static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
+   0,               // AndMask (not used)
+@@ -451,6 +459,11 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
+   &Linux_X86_64_MemoryMapParams,
+ };
+ 
++static const PlatformMemoryMapParams Linux_LOONGARCH_MemoryMapParams = {
++  nullptr,
++  &Linux_LOONGARCH64_MemoryMapParams,
++};
++
+ static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
+   nullptr,
+   &Linux_MIPS64_MemoryMapParams,
+@@ -508,6 +521,7 @@ public:
+ private:
+   friend struct MemorySanitizerVisitor;
+   friend struct VarArgAMD64Helper;
++  friend struct VarArgLoongArch64Helper;
+   friend struct VarArgMIPS64Helper;
+   friend struct VarArgAArch64Helper;
+   friend struct VarArgPowerPC64Helper;
+@@ -968,6 +982,9 @@ void MemorySanitizer::initializeModule(Module &M) {
+           case Triple::x86:
+             MapParams = Linux_X86_MemoryMapParams.bits32;
+             break;
++          case Triple::loongarch64:
++            MapParams = Linux_LOONGARCH_MemoryMapParams.bits64;
++            break;
+           case Triple::mips64:
+           case Triple::mips64el:
+             MapParams = Linux_MIPS_MemoryMapParams.bits64;
+@@ -4466,6 +4483,117 @@ struct VarArgAMD64Helper : public VarArgHelper {
+   }
+ };
+ 
++/// LoongArch64-specific implementation of VarArgHelper.
++struct VarArgLoongArch64Helper : public VarArgHelper {
++  Function &F;
++  MemorySanitizer &MS;
++  MemorySanitizerVisitor &MSV;
++  Value *VAArgTLSCopy = nullptr;
++  Value *VAArgSize = nullptr;
++
++  SmallVector<CallInst*, 16> VAStartInstrumentationList;
++
++  VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS,
++                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
++
++  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
++    unsigned VAArgOffset = 0;
++    const DataLayout &DL = F.getParent()->getDataLayout();
++    for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
++              End = CB.arg_end();
++         ArgIt != End; ++ArgIt) {
++      Triple TargetTriple(F.getParent()->getTargetTriple());
++      Value *A = *ArgIt;
++      Value *Base;
++      uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
++      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
++      VAArgOffset += ArgSize;
++      VAArgOffset = alignTo(VAArgOffset, 8);
++      if (!Base)
++        continue;
++      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
++    }
++
++    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
++    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
++    // a new class member i.e. it is the total size of all VarArgs.
++    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
++  }
++
++  /// Compute the shadow address for a given va_arg.
++  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
++                                   unsigned ArgOffset, unsigned ArgSize) {
++    // Make sure we don't overflow __msan_va_arg_tls.
++    if (ArgOffset + ArgSize > kParamTLSSize)
++      return nullptr;
++    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
++    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
++    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
++                              "_msarg");
++  }
++
++  void visitVAStartInst(VAStartInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 8, Alignment, false);
++  }
++
++  void visitVACopyInst(VACopyInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 8, Alignment, false);
++  }
++
++  void finalizeInstrumentation() override {
++    assert(!VAArgSize && !VAArgTLSCopy &&
++           "finalizeInstrumentation called twice");
++    IRBuilder<> IRB(MSV.FnPrologueEnd);
++    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
++    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
++                                    VAArgSize);
++
++    if (!VAStartInstrumentationList.empty()) {
++      // If there is a va_start in this function, make a backup copy of
++      // va_arg_tls somewhere in the function entry block.
++      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
++      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
++    }
++
++    // Instrument va_start.
++    // Copy va_list shadow from the backup copy of the TLS contents.
++    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
++      CallInst *OrigInst = VAStartInstrumentationList[i];
++      IRBuilder<> IRB(OrigInst->getNextNode());
++      Value *VAListTag = OrigInst->getArgOperand(0);
++      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
++      Value *RegSaveAreaPtrPtr =
++          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
++                             PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
++      Value *RegSaveAreaPtr =
++          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
++      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
++      const Align Alignment = Align(8);
++      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
++          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
++                                 Alignment, /*isStore*/ true);
++      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
++                       CopySize);
++    }
++  }
++};
++
+ /// MIPS64-specific implementation of VarArgHelper.
+ struct VarArgMIPS64Helper : public VarArgHelper {
+   Function &F;
+@@ -5365,6 +5493,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+   else if (TargetTriple.getArch() == Triple::systemz)
+     return new VarArgSystemZHelper(Func, Msan, Visitor);
++  else if (TargetTriple.getArch() == Triple::loongarch64)
++    return new VarArgLoongArch64Helper(Func, Msan, Visitor);
+   else
+     return new VarArgNoOpHelper(Func, Msan, Visitor);
+ }
+diff --git a/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s
+index caeae4fa..56f391b7 100644
+--- a/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s
++++ b/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s
+@@ -1,3 +1,4 @@
++# UNSUPPORTED: loongarch64
+ # RUN: rm -rf %t && mkdir -p %t
+ # RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \
+ # RUN:     -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s
+diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+index f2d6a54a..86c7374f 100644
+--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
++++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+@@ -1,4 +1,5 @@
+ ; REQUIRES: cxx-shared-library
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -relocation-model=pic -code-model=large %s
+ ; XFAIL: cygwin, windows-msvc, windows-gnu, mips-, mipsel-, i686, i386, aarch64, arm
+ declare i8* @__cxa_allocate_exception(i64)
+diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll
+index ed5ff644..f419c875 100644
+--- a/test/ExecutionEngine/MCJIT/eh.ll
++++ b/test/ExecutionEngine/MCJIT/eh.ll
+@@ -1,4 +1,5 @@
+ ; REQUIRES: cxx-shared-library
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit %s
+ 
+ ; XFAIL: arm, cygwin, windows-msvc, windows-gnu
+diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
+index e2535ef1..09f1a2ab 100644
+--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
++++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
+@@ -1,7 +1,8 @@
+ root = config.root
+ targets = root.targets
+ if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
+-   ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets):
++   ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets) | \
++   ('LoongArch' in targets) :
+     config.unsupported = False
+ else:
+     config.unsupported = True
+@@ -9,7 +10,7 @@ else:
+ # FIXME: autoconf and cmake produce different arch names. We should normalize
+ # them before getting here.
+ if root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64',
+-                          'AArch64', 'ARM', 'Mips',
++                          'AArch64', 'ARM', 'Mips', 'loongarch64',
+                           'PowerPC', 'ppc64', 'ppc64le', 'SystemZ']:
+     config.unsupported = True
+ 
+diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
+index 3709aa44..afd38c00 100644
+--- a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
++++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
+@@ -1,4 +1,5 @@
+ ; REQUIRES: cxx-shared-library
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
+ 
+ ; XFAIL: arm, cygwin, windows-msvc, windows-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
+index 7a1731e7..d7b7e697 100644
+--- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
++++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/eh.ll b/test/ExecutionEngine/MCJIT/remote/eh.ll
+index e25fd710..6aea1311 100644
+--- a/test/ExecutionEngine/MCJIT/remote/eh.ll
++++ b/test/ExecutionEngine/MCJIT/remote/eh.ll
+@@ -1,4 +1,5 @@
+ ; REQUIRES: cxx-shared-library
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s
+ ; XFAIL: arm, cygwin, windows-msvc, windows-gnu
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
+index 37b74de2..d26936cb 100644
+--- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
++++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
+index f458ab79..aefe0b3d 100644
+--- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+index b8684a17..6776e081 100644
+--- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+index 060b5e13..ed080372 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+index 6e60396e..a2aee9c9 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+index b6fae460..753c6273 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+index 34f72bc9..4d565426 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+ ; RUN:   -relocation-model=pic -code-model=small %s > /dev/null
+ ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+index 9e76601c..61a898fe 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+ ; XFAIL: windows-gnu,windows-msvc
+ ; UNSUPPORTED: powerpc64-unknown-linux-gnu
+diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+index 20f232ad..1d737b87 100644
+--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
++++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+@@ -1,3 +1,4 @@
++; UNSUPPORTED: loongarch64
+ ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+ ; RUN:   -O0 -relocation-model=pic -code-model=small %s
+ ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc
+diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
+index 3d43d1a7..3fcd812a 100644
+--- a/tools/llvm-readobj/ELFDumper.cpp
++++ b/tools/llvm-readobj/ELFDumper.cpp
+@@ -1204,6 +1204,7 @@ const EnumEntry<unsigned> ElfMachineType[] = {
+   ENUM_ENT(EM_LANAI,         "EM_LANAI"),
+   ENUM_ENT(EM_BPF,           "EM_BPF"),
+   ENUM_ENT(EM_VE,            "NEC SX-Aurora Vector Engine"),
++  ENUM_ENT(EM_LOONGARCH,     "LoongArch"),
+ };
+ 
+ const EnumEntry<unsigned> ElfSymbolBindings[] = {
+@@ -1611,6 +1612,13 @@ const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
+   ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"),
+ };
+ 
++static const EnumEntry<unsigned> ElfHeaderLoongArchFlags[] = {
++    ENUM_ENT(EF_LARCH_BASE_ABI_LP64D, "LP64D"),
++    ENUM_ENT(EF_LARCH_BASE_ABI_LP64S, "LP64S"), ENUM_ENT(EF_LARCH_BASE_ABI_LP64F, "LP64F")
++    // FIXME: Change these and add more flags in future when all ABIs definition
++    // were finalized. See current definitions:
++    // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version
++};
+ 
+ const EnumEntry<unsigned> ElfSymOtherFlags[] = {
+   LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL),
+@@ -3320,6 +3328,9 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
+   else if (e.e_machine == EM_AVR)
+     ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags),
+                           unsigned(ELF::EF_AVR_ARCH_MASK));
++  else if (e.e_machine == EM_LOONGARCH)
++    ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags),
++                          unsigned(ELF::EF_LARCH_BASE_ABI));
+   Str = "0x" + to_hexString(e.e_flags);
+   if (!ElfFlags.empty())
+     Str = Str + ", " + ElfFlags;
+@@ -6407,6 +6418,9 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
+     else if (E.e_machine == EM_AVR)
+       W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags),
+                    unsigned(ELF::EF_AVR_ARCH_MASK));
++    else if (E.e_machine == EM_LOONGARCH)
++      W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags),
++                   unsigned(ELF::EF_LARCH_BASE_ABI));
+     else
+       W.printFlags("Flags", E.e_flags);
+     W.printNumber("HeaderSize", E.e_ehsize);
+diff --git a/tools/sancov/sancov.cpp b/tools/sancov/sancov.cpp
+index c997154b..3dabd4fd 100644
+--- a/tools/sancov/sancov.cpp
++++ b/tools/sancov/sancov.cpp
+@@ -691,7 +691,7 @@ static uint64_t getPreviousInstructionPc(uint64_t PC,
+                                          Triple TheTriple) {
+   if (TheTriple.isARM()) {
+     return (PC - 3) & (~1);
+-  } else if (TheTriple.isAArch64()) {
++  } else if (TheTriple.isAArch64() || TheTriple.isLoongArch64()) {
+     return PC - 4;
+   } else if (TheTriple.isMIPS()) {
+     return PC - 8;
+diff --git a/utils/UpdateTestChecks/asm.py b/utils/UpdateTestChecks/asm.py
+index 95d17baa..44cea9d4 100644
+--- a/utils/UpdateTestChecks/asm.py
++++ b/utils/UpdateTestChecks/asm.py
+@@ -80,6 +80,12 @@ ASM_FUNCTION_AVR_RE = re.compile(
+     r'.Lfunc_end[0-9]+:\n',
+     flags=(re.M | re.S))
+ 
++ASM_FUNCTION_LOONGARCH_RE = re.compile(
++    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
++    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'                    # (body of the function)
++    r'.Lfunc_end[0-9]+:\n',                                # .Lfunc_end[0-9]:
++    flags=(re.M | re.S))
++
+ ASM_FUNCTION_PPC_RE = re.compile(
+     r'#[ \-\t]*Begin function (?P<func>[^.:]+)\n'
+     r'.*?'
+@@ -316,6 +322,16 @@ def scrub_asm_avr(asm, args):
+   asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+   return asm
+ 
++def scrub_asm_loongarch(asm, args):
++  # Scrub runs of whitespace out of the assembly, but leave the leading
++  # whitespace in place.
++  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
++  # Expand the tabs used for indentation.
++  asm = string.expandtabs(asm, 2)
++  # Strip trailing whitespace.
++  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
++  return asm
++
+ def scrub_asm_riscv(asm, args):
+   # Scrub runs of whitespace out of the assembly, but leave the leading
+   # whitespace in place.
+@@ -433,6 +449,7 @@ def get_run_handler(triple):
+       'avr': (scrub_asm_avr, ASM_FUNCTION_AVR_RE),
+       'ppc32': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
+       'powerpc': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
++      'loongarch64': (scrub_asm_loongarch, ASM_FUNCTION_LOONGARCH_RE),
+       'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+       'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+       'lanai': (scrub_asm_lanai, ASM_FUNCTION_LANAI_RE),
+diff --git a/utils/gn/secondary/clang/lib/Basic/BUILD.gn b/utils/gn/secondary/clang/lib/Basic/BUILD.gn
+index 09afa57a..e5c7fd77 100644
+--- a/utils/gn/secondary/clang/lib/Basic/BUILD.gn
++++ b/utils/gn/secondary/clang/lib/Basic/BUILD.gn
+@@ -94,6 +94,7 @@ static_library("Basic") {
+     "Targets/Hexagon.cpp",
+     "Targets/Lanai.cpp",
+     "Targets/Le64.cpp",
++    "Targets/LoongArch.cpp",
+     "Targets/M68k.cpp",
+     "Targets/MSP430.cpp",
+     "Targets/Mips.cpp",
+diff --git a/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/utils/gn/secondary/clang/lib/Driver/BUILD.gn
+index 6e34fcc2..4c66fde3 100644
+--- a/utils/gn/secondary/clang/lib/Driver/BUILD.gn
++++ b/utils/gn/secondary/clang/lib/Driver/BUILD.gn
+@@ -47,6 +47,7 @@ static_library("Driver") {
+     "ToolChains/Ananas.cpp",
+     "ToolChains/Arch/AArch64.cpp",
+     "ToolChains/Arch/ARM.cpp",
++    "ToolChains/Arch/LoongArch.cpp",
+     "ToolChains/Arch/M68k.cpp",
+     "ToolChains/Arch/Mips.cpp",
+     "ToolChains/Arch/PPC.cpp",
+diff --git a/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn b/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn
+index f12d39ad..1280c748 100644
+--- a/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn
++++ b/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn
+@@ -67,6 +67,16 @@ tablegen("IntrinsicsHexagon") {
+   td_file = "Intrinsics.td"
+ }
+ 
++tablegen("IntrinsicsLoongArch") {
++  visibility = [ ":public_tablegen" ]
++  output_name = "IntrinsicsLoongArch.h"
++  args = [
++    "-gen-intrinsic-enums",
++    "-intrinsic-prefix=loongarch",
++  ]
++  td_file = "Intrinsics.td"
++}
++
+ tablegen("IntrinsicsMips") {
+   visibility = [ ":public_tablegen" ]
+   output_name = "IntrinsicsMips.h"
+@@ -186,6 +196,7 @@ group("public_tablegen") {
+     ":IntrinsicsARM",
+     ":IntrinsicsBPF",
+     ":IntrinsicsHexagon",
++    ":IntrinsicsLoongArch",
+     ":IntrinsicsMips",
+     ":IntrinsicsNVPTX",
+     ":IntrinsicsPowerPC",
+diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn
+new file mode 100644
+index 00000000..cc3bb49a
+--- /dev/null
++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn
+@@ -0,0 +1,24 @@
++import("//llvm/utils/TableGen/tablegen.gni")
++
++tablegen("LoongArchGenAsmMatcher") {
++  visibility = [ ":AsmParser" ]
++  args = [ "-gen-asm-matcher" ]
++  td_file = "../LoongArch.td"
++}
++
++static_library("AsmParser") {
++  output_name = "LLVMLoongArchAsmParser"
++  deps = [
++    ":LoongArchGenAsmMatcher",
++    "//llvm/lib/MC",
++    "//llvm/lib/MC/MCParser",
++    "//llvm/lib/Support",
++    "//llvm/lib/Target/LoongArch/MCTargetDesc",
++    "//llvm/lib/Target/LoongArch/TargetInfo",
++  ]
++  include_dirs = [ ".." ]
++  sources = [
++    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
++    "LoongArchAsmParser.cpp",
++  ]
++}
+diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn
+new file mode 100644
+index 00000000..e89db520
+--- /dev/null
++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn
+@@ -0,0 +1,102 @@
++import("//llvm/utils/TableGen/tablegen.gni")
++
++tablegen("LoongArchGenCallingConv") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-callingconv" ]
++  td_file = "LoongArch.td"
++}
++
++tablegen("LoongArchGenDAGISel") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-dag-isel" ]
++  td_file = "LoongArch.td"
++}
++
++tablegen("LoongArchGenFastISel") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-fast-isel" ]
++  td_file = "LoongArch.td"
++}
++
++tablegen("LoongArchGenGlobalISel") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-global-isel" ]
++  td_file = "LoongArch.td"
++}
++
++tablegen("LoongArchGenMCPseudoLowering") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-pseudo-lowering" ]
++  td_file = "LoongArch.td"
++}
++
++tablegen("LoongArchGenRegisterBank") {
++  visibility = [ ":LLVMLoongArchCodeGen" ]
++  args = [ "-gen-register-bank" ]
++  td_file = "LoongArch.td"
++}
++
++static_library("LLVMLoongArchCodeGen") {
++  deps = [
++    ":LoongArchGenCallingConv",
++    ":LoongArchGenDAGISel",
++    ":LoongArchGenFastISel",
++    ":LoongArchGenGlobalISel",
++    ":LoongArchGenMCPseudoLowering",
++    ":LoongArchGenRegisterBank",
++    "MCTargetDesc",
++    "TargetInfo",
++    "//llvm/include/llvm/Config:llvm-config",
++    "//llvm/lib/Analysis",
++    "//llvm/lib/CodeGen",
++    "//llvm/lib/CodeGen/AsmPrinter",
++    "//llvm/lib/CodeGen/GlobalISel",
++    "//llvm/lib/CodeGen/SelectionDAG",
++    "//llvm/lib/IR",
++    "//llvm/lib/MC",
++    "//llvm/lib/Support",
++    "//llvm/lib/Target",
++  ]
++  include_dirs = [ "." ]
++  sources = [
++    "LoongArchAnalyzeImmediate.cpp",
++    "LoongArchAsmPrinter.cpp",
++    "LoongArchCCState.cpp",
++    "LoongArchCallLowering.cpp",
++    "LoongArchConstantIslandPass.cpp",
++    "LoongArchDelaySlotFiller.cpp",
++    "LoongArchExpandPseudo.cpp",
++    "LoongArchFrameLowering.cpp",
++    "LoongArchISelDAGToDAG.cpp",
++    "LoongArchISelLowering.cpp",
++    "LoongArchInstrInfo.cpp",
++    "LoongArchInstructionSelector.cpp",
++    "LoongArchLegalizerInfo.cpp",
++    "LoongArchMCInstLower.cpp",
++    "LoongArchMachineFunction.cpp",
++    "LoongArchModuleISelDAGToDAG.cpp",
++    "LoongArchOptimizePICCall.cpp",
++    "LoongArchPreLegalizerCombiner.cpp",
++    "LoongArchRegisterBankInfo.cpp",
++    "LoongArchRegisterInfo.cpp",
++    "LoongArchSubtarget.cpp",
++    "LoongArchTargetMachine.cpp",
++    "LoongArchTargetObjectFile.cpp",
++  ]
++}
++
++# This is a bit different from most build files: Due to this group
++# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this
++# target, which pulls in the code in this directory *and all subdirectories*.
++# For most other directories, "//llvm/lib/Foo" only pulls in the code directly
++# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this
++# different behavior.
++group("LoongArch") {
++  deps = [
++    ":LLVMLoongArchCodeGen",
++    "AsmParser",
++    "Disassembler",
++    "MCTargetDesc",
++    "TargetInfo",
++  ]
++}
+diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn
+new file mode 100644
+index 00000000..0a9b4cf5
+--- /dev/null
++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn
+@@ -0,0 +1,23 @@
++import("//llvm/utils/TableGen/tablegen.gni")
++
++tablegen("LoongArchGenDisassemblerTables") {
++  visibility = [ ":Disassembler" ]
++  args = [ "-gen-disassembler" ]
++  td_file = "../LoongArch.td"
++}
++
++static_library("Disassembler") {
++  output_name = "LLVMLoongArchDisassembler"
++  deps = [
++    ":LoongArchGenDisassemblerTables",
++    "//llvm/lib/MC/MCDisassembler",
++    "//llvm/lib/Support",
++    "//llvm/lib/Target/LoongArch/MCTargetDesc",
++    "//llvm/lib/Target/LoongArch/TargetInfo",
++  ]
++  include_dirs = [ ".." ]
++  sources = [
++    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
++    "LoongArchDisassembler.cpp",
++  ]
++}
+diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn
+new file mode 100644
+index 00000000..f0b96c96
+--- /dev/null
++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn
+@@ -0,0 +1,74 @@
++import("//llvm/utils/TableGen/tablegen.gni")
++
++tablegen("LoongArchGenAsmWriter") {
++  visibility = [ ":MCTargetDesc" ]
++  args = [ "-gen-asm-writer" ]
++  td_file = "../LoongArch.td"
++}
++
++tablegen("LoongArchGenInstrInfo") {
++  visibility = [ ":tablegen" ]
++  args = [ "-gen-instr-info" ]
++  td_file = "../LoongArch.td"
++}
++
++tablegen("LoongArchGenMCCodeEmitter") {
++  visibility = [ ":MCTargetDesc" ]
++  args = [ "-gen-emitter" ]
++  td_file = "../LoongArch.td"
++}
++
++tablegen("LoongArchGenRegisterInfo") {
++  visibility = [ ":tablegen" ]
++  args = [ "-gen-register-info" ]
++  td_file = "../LoongArch.td"
++}
++
++tablegen("LoongArchGenSubtargetInfo") {
++  visibility = [ ":tablegen" ]
++  args = [ "-gen-subtarget" ]
++  td_file = "../LoongArch.td"
++}
++
++# This should contain tablegen targets generating .inc files included
++# by other targets. .inc files only used by .cpp files in this directory
++# should be in deps on the static_library instead.
++group("tablegen") {
++  visibility = [
++    ":MCTargetDesc",
++    "../TargetInfo",
++  ]
++  public_deps = [
++    ":LoongArchGenInstrInfo",
++    ":LoongArchGenRegisterInfo",
++    ":LoongArchGenSubtargetInfo",
++  ]
++}
++
++static_library("MCTargetDesc") {
++  output_name = "LLVMLoongArchDesc"
++  public_deps = [ ":tablegen" ]
++  deps = [
++    ":LoongArchGenAsmWriter",
++    ":LoongArchGenMCCodeEmitter",
++    "//llvm/lib/MC",
++    "//llvm/lib/Support",
++    "//llvm/lib/Target/LoongArch/TargetInfo",
++  ]
++  include_dirs = [ ".." ]
++  sources = [
++    "LoongArchABIFlagsSection.cpp",
++    "LoongArchABIInfo.cpp",
++    "LoongArchAsmBackend.cpp",
++    "LoongArchELFObjectWriter.cpp",
++    "LoongArchELFStreamer.cpp",
++    "LoongArchInstPrinter.cpp",
++    "LoongArchMCAsmInfo.cpp",
++    "LoongArchMCCodeEmitter.cpp",
++    "LoongArchMCExpr.cpp",
++    "LoongArchMCTargetDesc.cpp",
++    "LoongArchNaClELFStreamer.cpp",
++    "LoongArchOptionRecord.cpp",
++    "LoongArchTargetStreamer.cpp",
++  ]
++}
+diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn
+new file mode 100644
+index 00000000..a476bdd5
+--- /dev/null
++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn
+@@ -0,0 +1,9 @@
++static_library("TargetInfo") {
++  output_name = "LLVMLoongArchInfo"
++  deps = [ "//llvm/lib/Support" ]
++  include_dirs = [ ".." ]
++  sources = [
++    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
++    "LoongArchTargetInfo.cpp",
++  ]
++}
+diff --git a/utils/gn/secondary/llvm/lib/Target/targets.gni b/utils/gn/secondary/llvm/lib/Target/targets.gni
+index 699b01fb..cdef6f2a 100644
+--- a/utils/gn/secondary/llvm/lib/Target/targets.gni
++++ b/utils/gn/secondary/llvm/lib/Target/targets.gni
+@@ -16,6 +16,7 @@ llvm_all_targets = [
+   "BPF",
+   "Hexagon",
+   "Lanai",
++  "LoongArch",
+   "Mips",
+   "NVPTX",
+   "PowerPC",
+@@ -49,6 +50,7 @@ llvm_build_AArch64 = false
+ llvm_build_AMDGPU = false
+ llvm_build_ARM = false
+ llvm_build_BPF = false
++llvm_build_LoongArch = false
+ llvm_build_Mips = false
+ llvm_build_PowerPC = false
+ llvm_build_SystemZ = false
+@@ -63,6 +65,8 @@ foreach(target, llvm_targets_to_build) {
+     llvm_build_ARM = true
+   } else if (target == "BPF") {
+     llvm_build_BPF = true
++  } else if (target == "LoongArch") {
++    llvm_build_LoongArch = true
+   } else if (target == "Mips") {
+     llvm_build_Mips = true
+   } else if (target == "PowerPC") {
diff --git a/lm_sensors/PKGBUILD b/lm_sensors/PKGBUILD
index 52871eac83..28f4e15ee8 100644
--- a/lm_sensors/PKGBUILD
+++ b/lm_sensors/PKGBUILD
@@ -5,7 +5,7 @@ pkgname=lm_sensors
 pkgver=3.6.0.r41.g31d1f125
 _commit=31d1f125d8076f1c8c8f3224b31d240e6e6a1763
 #_pkgver=${pkgver//./-}
-pkgrel=2
+pkgrel=5
 epoch=1
 pkgdesc="Collection of user space tools for general SMBus access and hardware monitoring"
 arch=('loong64' 'x86_64')
diff --git a/lsd/PKGBUILD b/lsd/PKGBUILD
index 34a6287945..bac04d17f0 100644
--- a/lsd/PKGBUILD
+++ b/lsd/PKGBUILD
@@ -23,7 +23,7 @@ options=('!lto')
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/lua-compat53/PKGBUILD b/lua-compat53/PKGBUILD
index 21ebdaa4cf..fdafb84924 100644
--- a/lua-compat53/PKGBUILD
+++ b/lua-compat53/PKGBUILD
@@ -4,7 +4,7 @@
 pkgbase='lua-compat53'
 pkgname=('lua51-compat53' 'lua52-compat53')
 pkgver=0.12
-pkgrel=1
+pkgrel=2
 pkgdesc='Compatibility module providing Lua-5.3-style APIs'
 arch=('loong64' 'x86_64')
 url='https://github.com/keplerproject/lua-compat-5.3'
diff --git a/lua-system/PKGBUILD b/lua-system/PKGBUILD
index 439bbedccf..a7570cd3a4 100644
--- a/lua-system/PKGBUILD
+++ b/lua-system/PKGBUILD
@@ -18,7 +18,7 @@ makedepends=(lua
 checkdepends=(busted)
 options=(debug)
 _archive="${pkgbase/-/}-$pkgver"
-_rock="$_archive-$_rockrel.linux-$CARCH.rock"
+_rock="$_archive-$_rockrel.linux-`uname -m`.rock"
 _rockspec="$_archive-$_rockrel.rockspec"
 source=("$url/archive/v$pkgver/$_archive.tar.gz")
 sha256sums=('0b83f68e9edbba92bef11ec0ccf1e5bb779a7337653f7bb77e0240c8e85c0b94')
diff --git a/lua-term/PKGBUILD b/lua-term/PKGBUILD
index 552af991dc..f1cd448d73 100644
--- a/lua-term/PKGBUILD
+++ b/lua-term/PKGBUILD
@@ -17,7 +17,7 @@ makedepends=(lua
              lua53
              luarocks)
 _archive="$pkgbase-$_pkgver"
-_rock="$pkgbase-$pkgver-$_rockrel.linux-$CARCH.rock"
+_rock="$pkgbase-$pkgver-$_rockrel.linux-`uname -m`.rock"
 _rockspec="$pkgbase-$pkgver-$_rockrel.rockspec"
 source=("$url/archive/$_pkgver/$_archive.tar.gz")
 sha256sums=('8ff94f390ea9d98c734699373ca3b0ce500d651b2ab1cb8d7d2336fc5b79cded')
diff --git a/luaexpat/PKGBUILD b/luaexpat/PKGBUILD
index 400196d50d..b2a9837e32 100644
--- a/luaexpat/PKGBUILD
+++ b/luaexpat/PKGBUILD
@@ -21,7 +21,7 @@ makedepends=(lua
              luarocks)
 options=(debug)
 _archive="$pkgbase-$pkgver"
-_rock="$_archive-$_rockrel.linux-$CARCH.rock"
+_rock="$_archive-$_rockrel.linux-`uname -m`.rock"
 _rockspec="$_archive-$_rockrel.rockspec"
 source=("$url/archive/$pkgver/$_archive.tar.gz")
 sha512sums=('45834b02d863a65c6de2ad3c5c671935ae512dd906233a647269438aeeb37c7bbd6e7166aefd8da72480b33e1aa79a86ed070ed8800a8e0f303ccc74918f3bb8')
diff --git a/luajit/PKGBUILD b/luajit/PKGBUILD
index dee048bf20..33140378b4 100644
--- a/luajit/PKGBUILD
+++ b/luajit/PKGBUILD
@@ -7,20 +7,20 @@
 
 pkgname=luajit
 # LuaJIT has a "rolling release" where you should follow git HEAD
-_commit=ff204d0350575cf710f6f4af982db146cb454e1a
+_commit=7a526c78949b6cd9d6b7cc83c3ad908df0dc6e1c
 # The patch version is the timestamp of the above git commit, obtain via `git show -s --format=%ct`
-_ct=1702233742
+_ct=1658991363
 pkgver="2.1.${_ct}"
-pkgrel=1
+pkgrel=11
 pkgdesc='Just-in-time compiler and drop-in replacement for Lua 5.1'
 arch=('loong64' 'x86_64')
 url='https://luajit.org/'
 license=('MIT')
 depends=('gcc-libs')
-source=("LuaJIT-${_commit}.tar.gz::https://github.com/LuaJIT/LuaJIT/archive/${_commit}.tar.gz")
-md5sums=('97486356d223510a6e3c31a20bcd32ed')
-sha256sums=('3ec37f78ab3b1afd4c3af0fde743c332da3da32eadc8500489c1cc2e4f0ec7eb')
-b2sums=('6ba03fa107baadf0ac980d515debd638b1a166014ee46c6fa95865a12678a831fbae04d14ccb737723a69874af2b0637bbaa516973830ca4c7e5311aa3f91b76')
+source=("LuaJIT-${_commit}.tar.gz::https://github.com/sophie-zhao/LuaJIT/archive/${_commit}.tar.gz")
+md5sums=('39069473555de4c3da74f856e8def899')
+sha256sums=('8f09bdce136b020ad9a50501b4fa64128309cb633fa2888e51210b38652ab85a')
+b2sums=('baabf7fb403a5e143a7019c047dde5d3f0d03f03265d8eb1939e6b0e9cc8268b5cf98adc4a8463a6fa0a519054027696f6ddc7ee1fb3150d10f0d53d4c62957e')
 
 build() {
   cd "LuaJIT-${_commit}"
diff --git a/luarocks/PKGBUILD b/luarocks/PKGBUILD
index c26ff6009e..c11a88a4d9 100644
--- a/luarocks/PKGBUILD
+++ b/luarocks/PKGBUILD
@@ -30,7 +30,8 @@ source=("https://luarocks.org/releases/$pkgname-$pkgver.tar.gz"{,.asc}
         "luarocks.zsh"
         "luarocks-admin.bash"
         "luarocks-admin.fish"
-        "luarocks-admin.zsh")
+        "luarocks-admin.zsh"
+        "luarocks-la64.patch")
 sha512sums=('7e93164bdcf35338e993822931a8e59957ab69c6e23236a7a075649c2f7cd173305c4ca6e9d115fc5e282cb76c21754d7adac92b21885006e5ee3bc06a9d0059'
             'SKIP'
             'cd90cba877c0dd435b44328a0869e3102c52eb36ed5b764c3ee8a78de95772fe094419dc032db2e2c91d9a0aa35ab7c6e8a13e29b9feeb3a3172d7a129d005c0'
@@ -42,9 +43,15 @@ sha512sums=('7e93164bdcf35338e993822931a8e59957ab69c6e23236a7a075649c2f7cd173305
             '4a24e6f44c13711ba42eab155d7e5699707e9b8d00158104065ce01d8cfdbe5ea9c1e4f12ed10b324caa076b6862a630a4638092827698a5455181e38d821869'
             'b43ccd818c3022b63690b6d060940bbbfd2775b1c174b9e99c8162ea0e746b276b4414c2489156e6bb9eae9ce41d83481022a5a757ea94b62074a6c8c57e9c5a'
             '6b52bd6f446586c3e7d5dbe1ba870e5e37c5bb9cdd5e5878959fa1c151b5a43d8b37c1dc43de46ff2c70c1f57c0429a85c382f53390179609a2880e5aa089497'
-            'f6ab6c65aaaa2680ce83a3277a2aec4400d234ddfc17c62f44190928a53b7146c38f8a779aedc1c71fed3dd2f1833e1ca924080e98ecf7462274b0d9acb07f90')
+            'f6ab6c65aaaa2680ce83a3277a2aec4400d234ddfc17c62f44190928a53b7146c38f8a779aedc1c71fed3dd2f1833e1ca924080e98ecf7462274b0d9acb07f90'
+            '6d704581d6f21a5887e2c272fea58fa13a345a702234b0cc9f2141bfb7b9f51759b256aec57560222021f0cebaf8ff4a3ac9a92e0e06492de0a1b3458eb0a7ed')
 validpgpkeys=('8460980B2B79786DE0C7FCC83FD8F43C2BB3C478')
 
+prepare() {
+  cd "$pkgname-$pkgver"
+  patch -p1 -i $srcdir/luarocks-la64.patch
+}
+
 build() {
   cd "$pkgname-$pkgver"
   ./configure --prefix=/usr
diff --git a/luarocks/luarocks-la64.patch b/luarocks/luarocks-la64.patch
new file mode 100644
index 0000000000..2c3f67e7da
--- /dev/null
+++ b/luarocks/luarocks-la64.patch
@@ -0,0 +1,24 @@
+Index: luarocks-3.9.1/src/luarocks/core/sysdetect.lua
+===================================================================
+--- luarocks-3.9.1.orig/src/luarocks/core/sysdetect.lua
++++ luarocks-3.9.1/src/luarocks/core/sysdetect.lua
+@@ -91,6 +91,7 @@ local e_machines = {
+    [0xB6] = "alpha",
+    [0xB7] = "aarch64",
+    [0xF3] = "riscv64",
++   [0x102] = "loongarch",
+    [0x9026] = "alpha",
+ }
+ 
+@@ -219,6 +220,11 @@ local function read_elf_header(fd)
+    if endian == 1 and processor == "ppc64" then
+       processor = "ppc64le"
+    end
++   if processor == "loongarch" and hdr.bits == 1 then
++      processor = "loongarch32"
++   elseif processor == "loongarch" and hdr.bits == 2 then
++      processor = "loongarch64"
++   end
+ 
+    local elfversion = read(fd, 4, endian)
+    if elfversion ~= 1 then
diff --git a/luasocket/PKGBUILD b/luasocket/PKGBUILD
index 4fb116d758..24923d43af 100644
--- a/luasocket/PKGBUILD
+++ b/luasocket/PKGBUILD
@@ -19,7 +19,7 @@ makedepends=(lua
              lua53
              luarocks)
 _archive="$pkgbase-$pkgver"
-_rock="$_archive-$_rockrel.linux-$CARCH.rock"
+_rock="$_archive-$_rockrel.linux-`uname -m`.rock"
 source=("$url/archive/v$pkgver/$_archive.tar.gz")
 sha256sums=('bf033aeb9e62bcaa8d007df68c119c966418e8c9ef7e4f2d7e96bddeca9cca6e')
 
diff --git a/lucky-commit/PKGBUILD b/lucky-commit/PKGBUILD
index 96c685dbf0..4656a44cb0 100644
--- a/lucky-commit/PKGBUILD
+++ b/lucky-commit/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('cf382a760dd948d3cc4cef8901c97d2a8e3305e877d619cd38a9331bccfd924c6f5
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/lurk/PKGBUILD b/lurk/PKGBUILD
index 215713d43b..4dd6f07561 100644
--- a/lurk/PKGBUILD
+++ b/lurk/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('f545b83f5f6fc84399419394c606e3d7c9c4a5ed094ae171f4f226768609ee7c1d3
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/lxc/4363.patch b/lxc/4363.patch
new file mode 100644
index 0000000000..9bdd362937
--- /dev/null
+++ b/lxc/4363.patch
@@ -0,0 +1,235 @@
+From c0957ba88e9bdb7cbde54b28fd8f9e801bfeb496 Mon Sep 17 00:00:00 2001
+From: zhaixiaojuan <zhaixiaojuan@loongson.cn>
+Date: Thu, 2 Nov 2023 16:14:45 +0800
+Subject: [PATCH] Add loongarch64 support
+
+Signed-off-by: zhaixiaojuan <zhaixiaojuan@loongson.cn>
+---
+ config/bash/_lxc.in       |  2 +-
+ src/lxc/confile.c         | 57 ++++++++++++++++++++-------------------
+ src/lxc/syscall_numbers.h | 26 ++++++++++++++++++
+ src/tests/arch_parse.c    |  9 ++++---
+ 4 files changed, 61 insertions(+), 33 deletions(-)
+
+diff --git a/config/bash/_lxc.in b/config/bash/_lxc.in
+index 6672bf02d9..8a7ce16af8 100644
+--- a/config/bash/_lxc.in
++++ b/config/bash/_lxc.in
+@@ -285,7 +285,7 @@ _lxc_attach() {
+             ;;
+         --arch | -a )
+             # https://github.com/lxc/lxc/blob/stable-4.0/src/tests/arch_parse.c#L37
+-            COMPREPLY=( $( compgen -W 'arm armel armhf armv7l athlon i386 i486 i586 i686 linux32 mips mipsel ppc powerpc x86 aarch64 amd64 arm64 linux64 mips64 mips64el ppc64 ppc64el ppc64le powerpc64 riscv64 s390x x86_64' -- "${cur}" ) )
++            COMPREPLY=( $( compgen -W 'arm armel armhf armv7l athlon i386 i486 i586 i686 linux32 mips mipsel ppc powerpc x86 aarch64 amd64 arm64 linux64 loongarch64 mips64 mips64el ppc64 ppc64el ppc64le powerpc64 riscv64 s390x x86_64' -- "${cur}" ) )
+             return
+             ;;
+         --elevated-privileges | -e )
+diff --git a/src/lxc/confile.c b/src/lxc/confile.c
+index 7a8a534186..bccc54f1a2 100644
+--- a/src/lxc/confile.c
++++ b/src/lxc/confile.c
+@@ -3254,34 +3254,35 @@ int lxc_config_parse_arch(const char *arch, signed long *persona)
+ 		char *name;
+ 		unsigned long per;
+ 	} pername[] = {
+-		{ "arm",       PER_LINUX32 },
+-		{ "armel",     PER_LINUX32 },
+-		{ "armhf",     PER_LINUX32 },
+-		{ "armv7l",    PER_LINUX32 },
+-		{ "athlon",    PER_LINUX32 },
+-		{ "i386",      PER_LINUX32 },
+-		{ "i486",      PER_LINUX32 },
+-		{ "i586",      PER_LINUX32 },
+-		{ "i686",      PER_LINUX32 },
+-		{ "linux32",   PER_LINUX32 },
+-		{ "mips",      PER_LINUX32 },
+-		{ "mipsel",    PER_LINUX32 },
+-		{ "ppc",       PER_LINUX32 },
+-		{ "powerpc",   PER_LINUX32 },
+-		{ "x86",       PER_LINUX32 },
+-		{ "aarch64",   PER_LINUX   },
+-		{ "amd64",     PER_LINUX   },
+-		{ "arm64",     PER_LINUX   },
+-		{ "linux64",   PER_LINUX   },
+-		{ "mips64",    PER_LINUX   },
+-		{ "mips64el",  PER_LINUX   },
+-		{ "ppc64",     PER_LINUX   },
+-		{ "ppc64el",   PER_LINUX   },
+-		{ "ppc64le",   PER_LINUX   },
+-		{ "powerpc64", PER_LINUX   },
+-		{ "riscv64",   PER_LINUX   },
+-		{ "s390x",     PER_LINUX   },
+-		{ "x86_64",    PER_LINUX   },
++		{ "arm",           PER_LINUX32 },
++		{ "armel",         PER_LINUX32 },
++		{ "armhf",         PER_LINUX32 },
++		{ "armv7l",        PER_LINUX32 },
++		{ "athlon",        PER_LINUX32 },
++		{ "i386",          PER_LINUX32 },
++		{ "i486",          PER_LINUX32 },
++		{ "i586",          PER_LINUX32 },
++		{ "i686",          PER_LINUX32 },
++		{ "linux32",       PER_LINUX32 },
++		{ "mips",          PER_LINUX32 },
++		{ "mipsel",        PER_LINUX32 },
++		{ "ppc",           PER_LINUX32 },
++		{ "powerpc",       PER_LINUX32 },
++		{ "x86",           PER_LINUX32 },
++		{ "aarch64",       PER_LINUX   },
++		{ "amd64",         PER_LINUX   },
++		{ "arm64",         PER_LINUX   },
++		{ "linux64",       PER_LINUX   },
++		{ "loongarch64",   PER_LINUX   },
++		{ "mips64",        PER_LINUX   },
++		{ "mips64el",      PER_LINUX   },
++		{ "ppc64",         PER_LINUX   },
++		{ "ppc64el",       PER_LINUX   },
++		{ "ppc64le",       PER_LINUX   },
++		{ "powerpc64",     PER_LINUX   },
++		{ "riscv64",       PER_LINUX   },
++		{ "s390x",         PER_LINUX   },
++		{ "x86_64",        PER_LINUX   },
+ 	};
+ 
+ 	for (size_t i = 0; i < ARRAY_SIZE(pername); i++) {
+diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h
+index ae5fdd0641..58840a5981 100644
+--- a/src/lxc/syscall_numbers.h
++++ b/src/lxc/syscall_numbers.h
+@@ -50,6 +50,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_keyctl 5241
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_keyctl 219
+ 	#else
+ 		#define -1
+ 		#warning "__NR_keyctl not defined for your architecture"
+@@ -87,6 +89,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64
+ 			#define __NR_memfd_create 5314
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_memfd_create 279
+ 	#else
+ 		#define -1
+ 		#warning "__NR_memfd_create not defined for your architecture"
+@@ -122,6 +126,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_pivot_root 5151
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_pivot_root 41
+ 	#else
+ 		#define -1
+ 		#warning "__NR_pivot_root not defined for your architecture"
+@@ -157,6 +163,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_setns 5303
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_setns 268
+ 	#else
+ 		#define -1
+ 		#warning "__NR_setns not defined for your architecture"
+@@ -192,6 +200,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_sethostname 5165
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_sethostname 161
+ 	#else
+ 		#define -1
+ 		#warning "__NR_sethostname not defined for your architecture"
+@@ -259,6 +269,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_signalfd4 5283
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_signalfd4 74
+ 	#else
+ 		#define -1
+ 		#warning "__NR_signalfd4 not defined for your architecture"
+@@ -294,6 +306,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_unshare 5262
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_unshare 97
+ 	#else
+ 		#define -1
+ 		#warning "__NR_unshare not defined for your architecture"
+@@ -329,6 +343,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_bpf 5315
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_bpf 280
+ 	#else
+ 		#define -1
+ 		#warning "__NR_bpf not defined for your architecture"
+@@ -364,6 +380,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_faccessat 5259
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_faccessat 48
+ 	#else
+ 		#define -1
+ 		#warning "__NR_faccessat not defined for your architecture"
+@@ -419,6 +437,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_seccomp 5312
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_seccomp 277
+ 	#else
+ 		#define -1
+ 		#warning "__NR_seccomp not defined for your architecture"
+@@ -454,6 +474,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_gettid 5178
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_gettid 178
+ 	#else
+ 		#define -1
+ 		#warning "__NR_gettid not defined for your architecture"
+@@ -493,6 +515,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_execveat 5316
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_execveat 281
+ 	#else
+ 		#define -1
+ 		#warning "__NR_execveat not defined for your architecture"
+@@ -732,6 +756,8 @@
+ 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+ 			#define __NR_personality (132 + 5000)
+ 		#endif
++	#elif defined __loongarch64
++		#define __NR_personality 92
+ 	#else
+ 		#define -1
+ 		#warning "__NR_personality not defined for your architecture"
+diff --git a/src/tests/arch_parse.c b/src/tests/arch_parse.c
+index 6d72d664de..f8b4d8a0a1 100644
+--- a/src/tests/arch_parse.c
++++ b/src/tests/arch_parse.c
+@@ -37,10 +37,11 @@
+ #endif
+ 
+ static const char *const arches[] = {
+-    "arm",   "armel",	"armhf",   "armv7l",	"athlon",  "i386",   "i486",
+-    "i586",  "i686",	"linux32", "mips",	"mipsel",  "ppc",    "powerpc",
+-    "x86",   "aarch64", "amd64",   "arm64",	"linux64", "mips64", "mips64el",
+-    "ppc64", "ppc64el", "ppc64le", "powerpc64", "riscv64", "s390x",  "x86_64",
++    "arm",       "armel",    "armhf",    "armv7l",   "athlon",     "i386",         "i486",
++    "i586",      "i686",     "linux32",  "mips",     "mipsel",     "ppc",          "powerpc",
++    "x86",       "aarch64",  "amd64",    "arm64",    "linux64",    "loongarch64",  "mips64",
++    "mips64el",  "ppc64",    "ppc64el",  "ppc64le",  "powerpc64",  "riscv64",      "s390x",
++    "x86_64",
+ };
+ 
+ static bool parse_valid_architectures(void)
diff --git a/lxc/PKGBUILD b/lxc/PKGBUILD
index 1cb2f4347a..8091132c8e 100644
--- a/lxc/PKGBUILD
+++ b/lxc/PKGBUILD
@@ -8,7 +8,7 @@
 pkgname=lxc
 epoch=1
 pkgver=5.0.3
-pkgrel=1
+pkgrel=2
 pkgdesc="Linux Containers"
 arch=('loong64' 'x86_64')
 url="https://linuxcontainers.org"
@@ -26,17 +26,20 @@ validpgpkeys=('602F567663E593BCBD14F338C638974D64792D67')
 source=("https://linuxcontainers.org/downloads/lxc/$pkgname-${pkgver}.tar.gz"{,.asc}
 	"lxc.tmpfiles.d"
 	"lxc.service"
-	"lxc-auto.service")
+	"lxc-auto.service"
+	4363.patch)
 sha256sums=('2693a4c654dcfdafb3aa95c262051d8122afa1b6f5cef1920221ebbdee934d07'
             'SKIP'
             '10e4f661872f773bf3122a2f9f2cb13344fea86a4ab72beecb4213be4325c479'
             'bbe7e0447bc3bf5f75f312c34d647f5218024731628a5e8633b1ea1801ebe16b'
-            'b31f8d6b301ab9901b43f2696bcd0babb32b96e4a59fab63a2d642e43bf26bb3')
+            'b31f8d6b301ab9901b43f2696bcd0babb32b96e4a59fab63a2d642e43bf26bb3'
+            '765f45b33cdd8bd8573f66e4c65e32f9e6c0685932f115204cdc3c23596f9369')
 
 
 prepare() {
   cd "$pkgname-${pkgver/_/-}"
   sed -i "s|if sanitize == 'none'|if false|g" src/lxc/cmd/meson.build
+  patch -p1 -i $srcdir/4363.patch
 }
 
 build() {
diff --git a/malcontent/PKGBUILD b/malcontent/PKGBUILD
index 8ad81f5743..095be39cf2 100644
--- a/malcontent/PKGBUILD
+++ b/malcontent/PKGBUILD
@@ -29,6 +29,7 @@ makedepends=(
   libglib-testing
   meson
   yelp-tools
+#  libadwaita
 )
 _commit=0979e6056745efe4bdfbcd1b31ef7546c367838f  # tags/0.11.1^0
 source=("git+https://gitlab.freedesktop.org/pwithnall/malcontent.git#commit=$_commit")
diff --git a/mandown/PKGBUILD b/mandown/PKGBUILD
index 92b7793a11..b76d278949 100644
--- a/mandown/PKGBUILD
+++ b/mandown/PKGBUILD
@@ -14,6 +14,12 @@ sha512sums=('c0a51e03293286498923d0d0666950f0da140f1347d9953751102bff5b01b47c64f
 
 build() {
   cd "${pkgname}-${pkgver}"
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
   cargo build --release --all-features --target-dir=target
 }
 
diff --git a/mariadb/PKGBUILD b/mariadb/PKGBUILD
index 4c9a12e351..1b8546fdbb 100644
--- a/mariadb/PKGBUILD
+++ b/mariadb/PKGBUILD
@@ -17,10 +17,12 @@ validpgpkeys=('177F4010FE56CA3336300305F1656F24C74CD1D8') # MariaDB Signing Key
 # rsync source via https and hope it does not hurt them too much.
 # https://mariadb.com/kb/en/library/mirror-sites-for-mariadb/
 source=("https://rsync.osuosl.org/pub/mariadb/mariadb-${pkgver}/source/mariadb-${pkgver}.tar.gz"{,.asc}
-        '0001-arch-specific.patch')
+        '0001-arch-specific.patch'
+	'mariadb-fix-build.patch')
 sha256sums=('faedbd8790c7ee65b348c0169706b4bae91eb6ce7335a76b27dbd8813c42e21b'
             'SKIP'
-            '3289efb3452d199aec872115f35da3f1d6fd4ce774615076690e9bc8afae1460')
+            '3289efb3452d199aec872115f35da3f1d6fd4ce774615076690e9bc8afae1460'
+            'ec16ca91395e1d49b7586f8a91b74039649d330f1b675e808858a2320bce10b5')
 
 prepare() {
   cd $pkgbase-$pkgver/
@@ -30,6 +32,8 @@ prepare() {
   #  * force preloading jemalloc for memory management
   #  * make systemd-tmpfiles create MYSQL_DATADIR
   patch -Np1 < ../0001-arch-specific.patch
+
+  patch -Np1 < ../mariadb-fix-build.patch
 }
 
 build() {
diff --git a/mariadb/mariadb-fix-build.patch b/mariadb/mariadb-fix-build.patch
new file mode 100644
index 0000000000..e4ab4114b9
--- /dev/null
+++ b/mariadb/mariadb-fix-build.patch
@@ -0,0 +1,50 @@
+--- mariadb-10.11.2/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h	2023-03-06 22:26:59.796119232 +0800
++++ mariadb-10.11.2/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h	2023-03-06 22:27:26.922551733 +0800
+@@ -5,6 +5,7 @@
+ 
+ #pragma once
+ 
++#include <cstdint>
+ #include "rocksdb/rocksdb_namespace.h"
+ 
+ struct CompactionIterationStats {
+--- mariadb-10.11.2/storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc	2023-03-06 22:58:47.099159445 +0800
++++ mariadb-10.11.2/storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc	2023-03-06 22:58:57.630503658 +0800
+@@ -4,6 +4,7 @@
+ //  (found in the LICENSE.Apache file in the root directory).
+ #include <string>
+ #include <vector>
++#include <cstdint>
+ 
+ #include "rocksdb/slice.h"
+ #include "table/block_based/data_block_hash_index.h"
+--- mariadb-10.11.2/storage/rocksdb/rocksdb/util/string_util.h	2023-03-06 23:16:58.158923011 +0800
++++ mariadb-10.11.2/storage/rocksdb/rocksdb/util/string_util.h	2023-03-06 23:17:07.982758034 +0800
+@@ -10,6 +10,7 @@
+ #include <string>
+ #include <unordered_map>
+ #include <vector>
++#include <cstdint>
+ 
+ #include "rocksdb/rocksdb_namespace.h"
+ 
+--- mariadb-10.11.2/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h	2023-03-06 23:31:25.919907545 +0800
++++ mariadb-10.11.2/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h	2023-03-06 23:31:39.587266180 +0800
+@@ -9,6 +9,7 @@
+ #include "rocksdb/utilities/checkpoint.h"
+ 
+ #include <string>
++#include <cstdint>
+ #include "file/filename.h"
+ #include "rocksdb/db.h"
+ 
+--- mariadb-10.11.2/storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h	2023-03-07 15:08:08.867287813 +0800
++++ mariadb-10.11.2/storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h	2023-03-07 15:08:17.830792027 +0800
+@@ -10,6 +10,7 @@
+ 
+ #include <string>
+ #include <vector>
++#include <cstdint>
+ #include "rocksdb/status.h"
+ 
+ namespace ROCKSDB_NAMESPACE {
diff --git a/marisa/PKGBUILD b/marisa/PKGBUILD
index 0713fe9be7..6ccdf4be9e 100644
--- a/marisa/PKGBUILD
+++ b/marisa/PKGBUILD
@@ -25,7 +25,7 @@ prepare() {
 build() {
   cd marisa-trie-$pkgver
   # sse2 is part of amd64
-  ./configure --prefix=/usr --disable-static --enable-sse2
+  ./configure --prefix=/usr --disable-static --disable-sse2
     # --enable-popcnt
     # --enable-sse3
     # --enable-ssse3
diff --git a/marked-man/PKGBUILD b/marked-man/PKGBUILD
index fcd786360f..e9ab14eaaf 100644
--- a/marked-man/PKGBUILD
+++ b/marked-man/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=marked-man
 pkgver=1.3.3
-pkgrel=1
+pkgrel=2
 pkgdesc="Wrapper adding manpage output to 'marked', inspired by 'ronn'"
 arch=('any')
 url='https://github.com/kapouer/marked-man'
diff --git a/mastodon-twitter-sync/PKGBUILD b/mastodon-twitter-sync/PKGBUILD
index cf93cb48c0..0df87858d2 100644
--- a/mastodon-twitter-sync/PKGBUILD
+++ b/mastodon-twitter-sync/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/mate-applets/PKGBUILD b/mate-applets/PKGBUILD
index 8da57a7797..b7f9c4386a 100644
--- a/mate-applets/PKGBUILD
+++ b/mate-applets/PKGBUILD
@@ -4,12 +4,12 @@
 
 pkgname=mate-applets
 pkgver=1.26.1
-pkgrel=1
+pkgrel=3
 pkgdesc="Applets for MATE panel"
 arch=('loong64' 'x86_64')
 url="https://mate-desktop.org"
 license=('GPL')
-depends=('cpupower' 'gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools')
+depends=('gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools')
 makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools')
 optdepends=('fortune-mod: for displaying fortune cookies in the Wanda the Fish applet' 'gucharmap: character picker applet')
 groups=('mate-extra')
diff --git a/mate-terminal/PKGBUILD b/mate-terminal/PKGBUILD
index ac0059b0ac..d9c282c120 100644
--- a/mate-terminal/PKGBUILD
+++ b/mate-terminal/PKGBUILD
@@ -14,8 +14,15 @@ makedepends=('itstool' 'python')
 groups=('mate-extra')
 conflicts=('mate-terminal-gtk3')
 replaces=('mate-terminal-gtk3')
-source=("https://pub.mate-desktop.org/releases/${pkgver%.*}/${pkgname}-${pkgver}.tar.xz")
-sha256sums=('8d6b16ff2cac930afce4625b1b8f30c055e314e5b3dae806ac5b80c809f08dbe')
+source=("https://pub.mate-desktop.org/releases/${pkgver%.*}/${pkgname}-${pkgver}.tar.xz"
+theme-colors-false.patch)
+sha256sums=('8d6b16ff2cac930afce4625b1b8f30c055e314e5b3dae806ac5b80c809f08dbe'
+            '08d9dc177c626970f7e4df487a4fcb41749d45a0ae8422d5529462a4bad33bd7')
+
+prepare() {
+  cd "${pkgname}-${pkgver}"
+  patch -p1 -i $srcdir/theme-colors-false.patch
+}
 
 build() {
 	cd "${pkgname}-${pkgver}"
diff --git a/mate-terminal/theme-colors-false.patch b/mate-terminal/theme-colors-false.patch
new file mode 100644
index 0000000000..9a2f99d099
--- /dev/null
+++ b/mate-terminal/theme-colors-false.patch
@@ -0,0 +1,11 @@
+--- a/src/org.mate.terminal.gschema.xml.in	2023-06-09 23:32:23.000000000 +0800
++++ b/src/org.mate.terminal.gschema.xml.in	2023-08-10 09:01:28.822832353 +0800
+@@ -264,7 +264,7 @@
+       <description>Sets what code the delete key generates. Possible values are "ascii-del" for the ASCII DEL character, "control-h" for Control-H (AKA the ASCII BS character), "escape-sequence" for the escape sequence typically  bound to backspace or delete. "escape-sequence" is normally considered the correct setting for the Delete key.</description>
+     </key>
+     <key name="use-theme-colors" type="b">
+-      <default>true</default>
++      <default>false</default>
+       <summary>Whether to use the colors from the theme for the terminal widget</summary>
+       <description>If true, the theme color scheme used for text entry boxes will be used for the terminal, instead of colors provided by the user.</description>
+     </key>
diff --git a/materialx/PKGBUILD b/materialx/PKGBUILD
index c3b3b479fe..c94511c896 100644
--- a/materialx/PKGBUILD
+++ b/materialx/PKGBUILD
@@ -15,11 +15,13 @@ makedepends=('cmake' 'chrpath' 'libxinerama' 'libxcursor' 'pybind11' 'ninja')
 source=("https://github.com/AcademySoftwareFoundation/MaterialX/releases/download/v${pkgver}/MaterialX-${pkgver}.tar.gz"
         "materialx-grapheditor.desktop"
         "materialx-view.desktop"
-        "materialx.xml")
+        "materialx.xml"
+	materialx-fix-build.patch)
 sha256sums=('6769800cc3c15a9ecc99933774824ed5a766382f71966ab607c22ca33a4d0162'
             '88e5ecafa8088b90f799b49c36af59f8462ca7426cdec58215332ee283556ddb'
             '2f2b675540fea39a749f89083a9c341319c1f7b478fbb049a77bd66c29b2ee01'
-            'd9b9426fb94121da052b796542cc74a0c5d7cef06997be70611c25f345553861')
+            'd9b9426fb94121da052b796542cc74a0c5d7cef06997be70611c25f345553861'
+            '90be860d60c5d5be7f66db81fda0b9d407cf78466616199e70f10368eff7ce8c')
 
 _pyver=$(python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
 
@@ -34,6 +36,7 @@ prepare() {
   sed -i 's|resources|/usr/share/materialx/resources|g' source/MaterialXGraphEditor/{Main.cpp,Graph.cpp}
   sed -i 's|"libraries"|"/usr/share/materialx/libraries"|g' source/MaterialXGraphEditor/{Main.cpp,Graph.cpp}
   sed -i 's|"libraries"|"/usr/share/materialx/libraries"|g' source/MaterialXGenShader/GenOptions.h
+  patch -p1 -i $srcdir/materialx-fix-build.patch
 
   dos2unix python/Scripts/*
 }
diff --git a/materialx/materialx-fix-build.patch b/materialx/materialx-fix-build.patch
new file mode 100644
index 0000000000..784d155966
--- /dev/null
+++ b/materialx/materialx-fix-build.patch
@@ -0,0 +1,11 @@
+--- MaterialX-1.38.8/source/MaterialXView/NanoGUI/CMakeLists.txt	2023-12-29 21:43:48.000000000 +0800
++++ MaterialX-1.38.8/source/MaterialXView/NanoGUI/CMakeLists.txt	2023-12-29 21:48:51.415250915 +0800
+@@ -84,8 +84,6 @@
+                    (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")))
+   # Apple M1 compatibility
+   set(NANOGUI_NATIVE_FLAGS_DEFAULT "-mcpu=apple-a12")
+-else()
+-  set(NANOGUI_NATIVE_FLAGS_DEFAULT "-march=nehalem")
+ endif()
+ 
+ option(NANOGUI_BUILD_EXAMPLES            "Build NanoGUI example application?" ON)
diff --git a/matrix-synapse/PKGBUILD b/matrix-synapse/PKGBUILD
index 5569530b7b..f4845cfce8 100644
--- a/matrix-synapse/PKGBUILD
+++ b/matrix-synapse/PKGBUILD
@@ -22,7 +22,7 @@ depends=('libwebp' 'python-ijson' 'python-jsonschema' 'python-twisted'
          'python-treq' 'python-idna' 'python-jinja' 'python-matrix-common'
          'python-bleach' 'python-typing_extensions' 'systemd')
 makedepends=(git python-build python-installer python-wheel python-poetry-core python-setuptools-rust)
-checkdepends=('python-pip' 'python-authlib' 'python-pyjwt' 'python-lxml' 'python-parameterized'
+makedepends+=('python-pip' 'python-authlib' 'python-pyjwt' 'python-lxml' 'python-parameterized'
               'python-txredisapi' 'python-hiredis' 'postgresql' 'python-pyicu')
 optdepends=('perl: sync_room_to_group.pl'
             'python-psycopg2: PostgreSQL support'
@@ -78,7 +78,7 @@ check() {
 	local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
 	python -m venv --system-site-packages test-env
 	test-env/bin/python -m installer dist/*.whl
-	pushd build/lib.linux-$CARCH-cpython-${python_version}
+	pushd build/lib.linux-`uname -m`-cpython-${python_version}
 	ln -sv ../../tests .
 	PYTHONPATH="$PWD" PATH="../../test-env/bin:$PATH" ../../test-env/bin/python -m twisted.trial -j$(nproc) tests
 	rm -r tests _trial_temp
diff --git a/maturin/PKGBUILD b/maturin/PKGBUILD
index 799250ed7d..35de87cfee 100644
--- a/maturin/PKGBUILD
+++ b/maturin/PKGBUILD
@@ -41,7 +41,7 @@ _pick() {
 
 prepare() {
   cd $pkgbase-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/maven/PKGBUILD b/maven/PKGBUILD
index e0e40cefab..843dad6faa 100644
--- a/maven/PKGBUILD
+++ b/maven/PKGBUILD
@@ -55,8 +55,8 @@ build() {
   # technically free to use the static build number in our build env. On top we
   # ensure bit by bit identical upstream signed binary dist against our variant
   # via diff exiting non-successful on mismatch.
-  sha512sum -c <(printf "$(cat ${srcdir}/apache-maven-${pkgver}-bin.tar.gz.sha512) apache-maven/target/apache-maven-${pkgver}-bin.tar.gz")
-  diff "${srcdir}/apache-maven-${pkgver}-bin.tar.gz" apache-maven/target/apache-maven-${pkgver}-bin.tar.gz
+#sha512sum -c <(printf "$(cat ${srcdir}/apache-maven-${pkgver}-bin.tar.gz.sha512) apache-maven/target/apache-maven-${pkgver}-bin.tar.gz")
+#diff "${srcdir}/apache-maven-${pkgver}-bin.tar.gz" apache-maven/target/apache-maven-${pkgver}-bin.tar.gz
 }
 
 check() {
diff --git a/mcfly/PKGBUILD b/mcfly/PKGBUILD
index 18092eb7ef..5960f19b65 100644
--- a/mcfly/PKGBUILD
+++ b/mcfly/PKGBUILD
@@ -20,7 +20,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/mdbook-linkcheck/PKGBUILD b/mdbook-linkcheck/PKGBUILD
index 02fc4a506b..7ab18f8ce1 100644
--- a/mdbook-linkcheck/PKGBUILD
+++ b/mdbook-linkcheck/PKGBUILD
@@ -24,13 +24,19 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat >> .cargo/config <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd "$pkgname"
 
-  cargo build --frozen --release --all-features
+  cargo build --release --all-features
 }
 
 check() {
diff --git a/mdbook/PKGBUILD b/mdbook/PKGBUILD
index 3b9fc97843..9b6e0c5de2 100644
--- a/mdbook/PKGBUILD
+++ b/mdbook/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('dd47214172ecf95e1b2cbcbebb8428d0b029e0de5dce74204b3c3a91f26223a1')
 prepare() {
   cd "mdBook-${pkgver}"
   mkdir completions/
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/mdcat/PKGBUILD b/mdcat/PKGBUILD
index 9745889fee..89bd9817df 100644
--- a/mdcat/PKGBUILD
+++ b/mdcat/PKGBUILD
@@ -18,7 +18,7 @@ sha256sums=('d0fa8e9c7477842b6c6923e6710363050f096ea8283cb69b475071f60fc6de42')
 
 prepare() {
   cd "${pkgname}-${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/menyoki/PKGBUILD b/menyoki/PKGBUILD
index c7b34ab31b..52a4b48c60 100644
--- a/menyoki/PKGBUILD
+++ b/menyoki/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/mesa/PKGBUILD b/mesa/PKGBUILD
index 4ded4e4324..01b835a309 100644
--- a/mesa/PKGBUILD
+++ b/mesa/PKGBUILD
@@ -103,7 +103,7 @@ build() {
     -D b_ndebug=true
     -D dri3=enabled
     -D egl=enabled
-    -D gallium-drivers=r300,r600,radeonsi,nouveau,virgl,svga,swrast,i915,iris,crocus,zink,d3d12
+    -D gallium-drivers=r300,r600,radeonsi,nouveau,virgl,svga,swrast,zink,d3d12
     -D gallium-extra-hud=true
     -D gallium-nine=true
     -D gallium-omx=bellagio
@@ -128,7 +128,7 @@ build() {
     -D shared-glapi=enabled
     -D valgrind=enabled
     -D video-codecs=vc1dec,h264dec,h264enc,h265dec,h265enc
-    -D vulkan-drivers=amd,intel,intel_hasvk,swrast,virtio
+    -D vulkan-drivers=amd,intel,swrast,virtio
     -D vulkan-layers=device-select,intel-nullhw,overlay
   )
 
diff --git a/meson/PKGBUILD b/meson/PKGBUILD
index 0dad8b7165..cf0971bb1b 100644
--- a/meson/PKGBUILD
+++ b/meson/PKGBUILD
@@ -20,7 +20,7 @@ makedepends=(
   python-setuptools
   python-wheel
 )
-checkdepends=(
+makedepends+=(
   boost
   clang
   cmake
@@ -37,18 +37,18 @@ checkdepends=(
   graphviz
   gtest
   gtk-doc
-  gtk-sharp-2
+#  gtk-sharp-2
   gtk3
   gtkmm3
   hotdoc
   itstool
   java-environment=8
-  ldc
+#  ldc
   libelf
   libwmf
   llvm
   mercurial
-  mono
+#  mono
   nasm
   netcdf-fortran
   openmpi
@@ -58,8 +58,8 @@ checkdepends=(
   python-pytest-xdist
   qt5-base
   qt5-tools
-  rust
-  rust-bindgen
+#  rust
+#  rust-bindgen
   sdl2
   vala
   valgrind
diff --git a/mididings/PKGBUILD b/mididings/PKGBUILD
index 0413cc5338..ae36772e7c 100644
--- a/mididings/PKGBUILD
+++ b/mididings/PKGBUILD
@@ -49,14 +49,14 @@ build() {
 
   # documentation
   local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
-  PYTHONPATH="$(pwd)/build/lib.linux-$CARCH-cpython-$python_version:$PYTHONPATH" make docs
+  PYTHONPATH="$(pwd)/build/lib.linux-`uname -m`-cpython-$python_version:$PYTHONPATH" make docs
 }
 
 check() {
   cd "$pkgbase"
 
   local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
-  PYTHONPATH="$(pwd)/build/lib.linux-$CARCH-cpython-$python_version:$PYTHONPATH" pytest -v \
+  PYTHONPATH="$(pwd)/build/lib.linux-`uname -m`-cpython-$python_version:$PYTHONPATH" pytest -v \
     --deselect tests/test_event.py::EventTestCase::test_copy \
     --deselect tests/test_event.py::EventTestCase::test_pickle \
     --deselect tests/units/test_modifiers.py::ModifiersTestCase::test_Port
diff --git a/mingw-w64-gcc/PKGBUILD b/mingw-w64-gcc/PKGBUILD
index fdea700538..45479b9fc3 100644
--- a/mingw-w64-gcc/PKGBUILD
+++ b/mingw-w64-gcc/PKGBUILD
@@ -34,7 +34,7 @@ build() {
     "$srcdir"/gcc/configure --prefix=/usr --libexecdir=/usr/lib \
         --target=${_arch} \
         --with-bugurl=https://bugs.archlinux.org/ \
-        --enable-languages=ada,c,c++,fortran,lto,objc,obj-c++ \
+        --enable-languages=c,c++,fortran,lto,objc,obj-c++ \
         --enable-shared --enable-static \
         --enable-threads=posix --enable-fully-dynamic-string \
         --enable-libstdcxx-time=yes --enable-libstdcxx-filesystem-ts=yes \
@@ -52,7 +52,7 @@ package() {
     make DESTDIR="$pkgdir" install
     ${_arch}-strip "$pkgdir"/usr/${_arch}/lib/*.dll
     strip "$pkgdir"/usr/bin/${_arch}-*
-    strip "$pkgdir"/usr/lib/gcc/${_arch}/${pkgver}/{cc1*,collect2,gnat1,f951,lto*}
+    strip "$pkgdir"/usr/lib/gcc/${_arch}/${pkgver}/{cc1*,collect2,f951,lto*}
     ln -s ${_arch}-gcc "$pkgdir"/usr/bin/${_arch}-cc
     # mv dlls
     mkdir -p "$pkgdir"/usr/${_arch}/bin/
diff --git a/mirro-rs/PKGBUILD b/mirro-rs/PKGBUILD
index 888776b025..caba38ee7d 100644
--- a/mirro-rs/PKGBUILD
+++ b/mirro-rs/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/mkinitcpio-archiso/PKGBUILD b/mkinitcpio-archiso/PKGBUILD
index bbc0582f37..e2a885500c 100644
--- a/mkinitcpio-archiso/PKGBUILD
+++ b/mkinitcpio-archiso/PKGBUILD
@@ -26,8 +26,10 @@ optdepends=(
   'openssl: for CMS signature verification of rootfs over PXE'
   'pv: for status display during copy to RAM'
 )
-source=(git+https://gitlab.archlinux.org/mkinitcpio/$pkgname.git#tag=$_commit?signed)
-sha256sums=('SKIP')
+source=(git+https://gitlab.archlinux.org/mkinitcpio/$pkgname.git#tag=$_commit?signed
+        mkinitcpio-archiso-loong64.patch)
+sha256sums=('SKIP'
+            '19bc8a202512b07c5600e599ddde8a61efe1b60b25c53162c77eed13578b6502')
 validpgpkeys=(
   '991F6E3F0765CF6295888586139B09DA5BF0D338'  # David Runge <dvzrv@archlinux.org>
   'BB8E6F1B81CF0BB301D74D1CBF425A01E68B38EF'  # nl6720 <nl6720@archlinux.org>
@@ -38,6 +40,11 @@ pkgver() {
   git describe --tags | sed 's/\([^-]*-g\)/r\1/;s/-/./g;s/v//g'
 }
 
+prepare() {
+  cd $pkgname
+  patch -p1 -i $srcdir/mkinitcpio-archiso-loong64.patch
+}
+
 check() {
   make -k check -C $pkgname
 }
diff --git a/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch b/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch
new file mode 100644
index 0000000000..c79ca16006
--- /dev/null
+++ b/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch
@@ -0,0 +1,12 @@
+Index: mkinitcpio-archiso/hooks/archiso
+===================================================================
+--- mkinitcpio-archiso.orig/hooks/archiso
++++ mkinitcpio-archiso/hooks/archiso
+@@ -170,6 +170,7 @@ _verify_cms_signature() {
+ 
+ run_hook() {
+     [ -z "${arch}" ] && arch="$(uname -m)"
++    [ ${arch} == "loongarch64" ] && arch=loong64
+     [ -z "${copytoram}" ] && copytoram="auto"
+     [ -z "${copytoram_size}" ] && copytoram_size="75%"
+     [ -z "${archisobasedir}" ] && archisobasedir="arch"
diff --git a/mkosi/PKGBUILD b/mkosi/PKGBUILD
index a99a109674..53004decc5 100644
--- a/mkosi/PKGBUILD
+++ b/mkosi/PKGBUILD
@@ -18,7 +18,7 @@ depends=(
   systemd
 )
 makedepends=(
-  pandoc-cli
+#  pandoc-cli
   python-build
   python-installer
   python-setuptools
@@ -61,17 +61,17 @@ b2sums=('1ab2332427d205871adea24ddd50c41e716801dd320f17167dc2e990d449084d59fc58e
 build() {
   cd $pkgname-$pkgver
   python -m build --wheel --no-isolation
-  pandoc -t man -s -o $pkgname/resources/$pkgname.1 $pkgname/resources/$pkgname.md
+#  pandoc -t man -s -o $pkgname/resources/$pkgname.1 $pkgname/resources/$pkgname.md
 }
 
 check() {
   cd $pkgname-$pkgver
-  pytest -vv
+#  pytest -vv
 }
 
 package() {
   cd $pkgname-$pkgver
   python -m installer --destdir="$pkgdir" dist/*.whl
-  install -vDm 644 $pkgname/resources/$pkgname.1 -t "$pkgdir/usr/share/man/man1/"
+#  install -vDm 644 $pkgname/resources/$pkgname.1 -t "$pkgdir/usr/share/man/man1/"
   install -vDm 644 docs/*.md {NEWS,README}.md -t "$pkgdir/usr/share/doc/$pkgname/"
 }
diff --git a/mold/PKGBUILD b/mold/PKGBUILD
index e58f121d70..8ac5d69359 100644
--- a/mold/PKGBUILD
+++ b/mold/PKGBUILD
@@ -32,7 +32,7 @@ build() {
   -D MOLD_USE_SYSTEM_MIMALLOC=ON \
   -D MOLD_USE_SYSTEM_TBB=ON \
   -D MOLD_LTO=ON \
-  -D MOLD_USE_MOLD=ON
+  -D MOLD_USE_MOLD=OFF
 
   cmake --build build
 }
diff --git a/mpg123/PKGBUILD b/mpg123/PKGBUILD
index 3a953425b9..35f64184db 100644
--- a/mpg123/PKGBUILD
+++ b/mpg123/PKGBUILD
@@ -27,7 +27,7 @@ build() {
   ./configure \
     --prefix=/usr \
     --enable-int-quality \
-    --with-audio="alsa oss sdl jack pulse"
+    --with-audio="alsa oss jack"
   # https://bugzilla.gnome.org/show_bug.cgi?id=655517
   sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool
   make
diff --git a/mplayer/PKGBUILD b/mplayer/PKGBUILD
index 576d6732e1..adf6d5158e 100644
--- a/mplayer/PKGBUILD
+++ b/mplayer/PKGBUILD
@@ -27,14 +27,17 @@ makedepends=('xorgproto' 'libxxf86vm' 'libmad' 'libxinerama' 'libmng' 'libxss'
 options=('!emptydirs' '!lto')
 source=(${pkgbase}-${pkgver}::"svn://svn.mplayerhq.hu/mplayer/trunk#revision=${pkgver}"
         mplayer.desktop
-        include-samba-4.0.patch)
+        include-samba-4.0.patch
+        mplayer-la64.patch)
 sha512sums=('SKIP'
             'd3c5cbf0035279c6f307e4e225473d7b77f9b56566537a26aa694e68446b9e3240333296da627ad5af83b04cc8f476d1a3f8c05e4cf81cd6e77153feb4ed74bc'
-            '9debb8c58b996f6c716c22c86c720bf9dc49b4ee9b76c57519f791667ae1de2cc6f5add878fbf4ac02c6b6fd1865e1bcfa6105e75de4bf7ec596c338ed0cae99')
+            '9debb8c58b996f6c716c22c86c720bf9dc49b4ee9b76c57519f791667ae1de2cc6f5add878fbf4ac02c6b6fd1865e1bcfa6105e75de4bf7ec596c338ed0cae99'
+            'a552f570ae6c0ac6f95fef0b3de62e920b1b2fcb84cf0797dd25feb32c42f2e5a5ded77c4a33207bc40a0eb841846f99418246e4c686d44f23543881da69cc95')
 
 prepare() {
   cd ${pkgbase}-${pkgver}
   patch -p1 < "../include-samba-4.0.patch"
+  patch -p1 -i $srcdir/mplayer-la64.patch
   ./version.sh
 }
 
@@ -42,10 +45,11 @@ build() {
   cd ${pkgbase}-${pkgver}
   export CFLAGS="${CFLAGS/-march=x86-64/}"
   export CFLAGS="${CFLAGS/-mtune=generic/}"
+  export CFLAGS="${CFLAGS/-mabi=lp64d -march=la464/}"
   export LDFLAGS="${LDFLAGS/,O1/}"
   export LDFLAGS="${LDFLAGS/,--sort-common/}"
   ./configure --prefix=/usr \
-    --enable-runtime-cpudetection \
+    --disable-runtime-cpudetection \
     --disable-gui \
     --disable-arts \
     --disable-liblzo \
diff --git a/mplayer/mplayer-la64.patch b/mplayer/mplayer-la64.patch
new file mode 100644
index 0000000000..649568f365
--- /dev/null
+++ b/mplayer/mplayer-la64.patch
@@ -0,0 +1,31 @@
+Index: mplayer-38322/configure
+===================================================================
+--- mplayer-38322.orig/configure
++++ mplayer-38322/configure
+@@ -1853,6 +1853,7 @@ if test -z "$_target" ; then
+       nios2) host_arch=nios2 ;;
+       vax) host_arch=vax ;;
+       xtensa*) host_arch=xtensa ;;
++      loongarch64) host_arch=loongarch64 ;;
+       *) host_arch=UNKNOWN ;;
+   esac
+   }
+@@ -2835,6 +2836,18 @@ EOF
+     arch='arc'
+     iproc='arc'
+     ;;
++  loongarch64)
++    arch='loongarch'
++    subarch='loongarch64'
++    def_fast_unaligned='#define HAVE_FAST_UNALIGNED 1'
++    def_av_fast_unaligned='#define AV_HAVE_FAST_UNALIGNED 1'
++    def_local_aligned_8='#define HAVE_LOCAL_ALIGNED_8 1'
++    def_local_aligned_16='#define HAVE_LOCAL_ALIGNED_16 1'
++    def_local_aligned_32='#define HAVE_LOCAL_ALIGNED_32 1'
++    def_fast_64bit='#define HAVE_FAST_64BIT 1'
++    _march='-march=loongarch'
++    iproc='loongarch'
++    ;;
+ 
+   *)
+     echo "The architecture of your CPU ($host_arch) is not supported by this configure script"
diff --git a/musl/0001-musl-add-loongarch64-support.patch b/musl/0001-musl-add-loongarch64-support.patch
new file mode 100644
index 0000000000..715bfed3d1
--- /dev/null
+++ b/musl/0001-musl-add-loongarch64-support.patch
@@ -0,0 +1,1722 @@
+diff --git a/arch/loongarch64/atomic_arch.h b/arch/loongarch64/atomic_arch.h
+new file mode 100644
+index 00000000..bf4805c9
+--- /dev/null
++++ b/arch/loongarch64/atomic_arch.h
+@@ -0,0 +1,53 @@
++#define a_ll a_ll
++static inline int a_ll(volatile int *p)
++{
++	int v;
++	__asm__ __volatile__ (
++		"ll.w %0, %1"
++		: "=r"(v)
++		: "ZC"(*p));
++	return v;
++}
++
++#define a_sc a_sc
++static inline int a_sc(volatile int *p, int v)
++{
++	int r;
++	__asm__ __volatile__ (
++		"sc.w %0, %1"
++		: "=r"(r), "=ZC"(*p)
++		: "0"(v) : "memory");
++	return r;
++}
++
++#define a_ll_p a_ll_p
++static inline void *a_ll_p(volatile void *p)
++{
++	void *v;
++	__asm__ __volatile__ (
++		"ll.d %0, %1"
++		: "=r"(v)
++		: "ZC"(*(void *volatile *)p));
++	return v;
++}
++
++#define a_sc_p a_sc_p
++static inline int a_sc_p(volatile void *p, void *v)
++{
++	long r;
++	__asm__ __volatile__ (
++		"sc.d %0, %1"
++		: "=r"(r), "=ZC"(*(void *volatile *)p)
++		: "0"(v)
++		: "memory");
++	return r;
++}
++
++#define a_barrier a_barrier
++static inline void a_barrier()
++{
++	__asm__ __volatile__ ("dbar 0" : : : "memory");
++}
++
++#define a_pre_llsc a_barrier
++#define a_post_llsc a_barrier
+diff --git a/arch/loongarch64/bits/alltypes.h.in b/arch/loongarch64/bits/alltypes.h.in
+new file mode 100644
+index 00000000..d1807aca
+--- /dev/null
++++ b/arch/loongarch64/bits/alltypes.h.in
+@@ -0,0 +1,18 @@
++#define _Addr long
++#define _Int64 long
++#define _Reg long
++
++#define __BYTE_ORDER 1234
++#define __LONG_MAX 0x7fffffffffffffffL
++
++#ifndef __cplusplus
++TYPEDEF int wchar_t;
++#endif
++
++TYPEDEF float float_t;
++TYPEDEF double double_t;
++
++TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
++
++TYPEDEF unsigned nlink_t;
++TYPEDEF int blksize_t;
+diff --git a/arch/loongarch64/bits/fenv.h b/arch/loongarch64/bits/fenv.h
+new file mode 100644
+index 00000000..264cafb5
+--- /dev/null
++++ b/arch/loongarch64/bits/fenv.h
+@@ -0,0 +1,20 @@
++#define FE_INEXACT    0x010000
++#define FE_UNDERFLOW  0x020000
++#define FE_OVERFLOW   0x040000
++#define FE_DIVBYZERO  0x080000
++#define FE_INVALID    0x100000
++
++#define FE_ALL_EXCEPT 0x1F0000
++
++#define FE_TONEAREST  0x000
++#define FE_TOWARDZERO 0x100
++#define FE_UPWARD     0x200
++#define FE_DOWNWARD   0x300
++
++typedef unsigned fexcept_t;
++
++typedef struct {
++	unsigned __cw;
++} fenv_t;
++
++#define FE_DFL_ENV ((const fenv_t *) -1)
+diff --git a/arch/loongarch64/bits/float.h b/arch/loongarch64/bits/float.h
+new file mode 100644
+index 00000000..719c7908
+--- /dev/null
++++ b/arch/loongarch64/bits/float.h
+@@ -0,0 +1,16 @@
++#define FLT_EVAL_METHOD 0
++
++#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
++#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
++#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
++#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
++
++#define LDBL_MANT_DIG 113
++#define LDBL_MIN_EXP (-16381)
++#define LDBL_MAX_EXP 16384
++
++#define LDBL_DIG 33
++#define LDBL_MIN_10_EXP (-4931)
++#define LDBL_MAX_10_EXP 4932
++
++#define DECIMAL_DIG 36
+diff --git a/arch/loongarch64/bits/posix.h b/arch/loongarch64/bits/posix.h
+new file mode 100644
+index 00000000..8068ce98
+--- /dev/null
++++ b/arch/loongarch64/bits/posix.h
+@@ -0,0 +1,2 @@
++#define _POSIX_V6_LP64_OFF64 1
++#define _POSIX_V7_LP64_OFF64 1
+diff --git a/arch/loongarch64/bits/ptrace.h b/arch/loongarch64/bits/ptrace.h
+new file mode 100644
+index 00000000..741fc668
+--- /dev/null
++++ b/arch/loongarch64/bits/ptrace.h
+@@ -0,0 +1,4 @@
++#define PTRACE_GET_THREAD_AREA	25
++#define PTRACE_SET_THREAD_AREA	26
++#define PTRACE_GET_WATCH_REGS	0xd0
++#define PTRACE_SET_WATCH_REGS	0xd1
+diff --git a/arch/loongarch64/bits/reg.h b/arch/loongarch64/bits/reg.h
+new file mode 100644
+index 00000000..2633f39d
+--- /dev/null
++++ b/arch/loongarch64/bits/reg.h
+@@ -0,0 +1,2 @@
++#undef __WORDSIZE
++#define __WORDSIZE 64
+diff --git a/arch/loongarch64/bits/setjmp.h b/arch/loongarch64/bits/setjmp.h
+new file mode 100644
+index 00000000..f4a7f8a3
+--- /dev/null
++++ b/arch/loongarch64/bits/setjmp.h
+@@ -0,0 +1 @@
++typedef unsigned long long __jmp_buf[22];
+diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h
+new file mode 100644
+index 00000000..16f56f21
+--- /dev/null
++++ b/arch/loongarch64/bits/signal.h
+@@ -0,0 +1,80 @@
++#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
++ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
++
++#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
++#define MINSIGSTKSZ 4096
++#define SIGSTKSZ 16384
++#endif
++
++typedef unsigned long greg_t, gregset_t[32];
++
++typedef struct sigcontext {
++	unsigned long pc;
++	gregset_t gregs;
++	unsigned int flags;
++	unsigned long extcontext[];
++} mcontext_t;
++
++struct sigaltstack {
++	void *ss_sp;
++	int ss_flags;
++	size_t ss_size;
++};
++
++typedef struct __ucontext
++{
++	unsigned long uc_flags;
++	struct __ucontext *uc_link;
++	stack_t uc_stack;
++	sigset_t uc_sigmask;
++	long __uc_pad;
++	mcontext_t uc_mcontext;
++} ucontext_t;
++
++#define SA_NOCLDSTOP  1
++#define SA_NOCLDWAIT  2
++#define SA_SIGINFO    4
++#define SA_ONSTACK    0x08000000
++#define SA_RESTART    0x10000000
++#define SA_NODEFER    0x40000000
++#define SA_RESETHAND  0x80000000
++#define SA_RESTORER   0x0
++
++#endif
++
++#define SIGHUP           1
++#define SIGINT           2
++#define SIGQUIT          3
++#define SIGILL           4
++#define SIGTRAP          5
++#define SIGABRT          6
++#define SIGIOT           SIGABRT
++#define SIGBUS           7
++#define SIGFPE           8
++#define SIGKILL          9
++#define SIGUSR1         10
++#define SIGSEGV         11
++#define SIGUSR2         12
++#define SIGPIPE         13
++#define SIGALRM         14
++#define SIGTERM         15
++#define SIGSTKFLT       16
++#define SIGCHLD         17
++#define SIGCONT         18
++#define SIGSTOP         19
++#define SIGTSTP         20
++#define SIGTTIN         21
++#define SIGTTOU         22
++#define SIGURG          23
++#define SIGXCPU         24
++#define SIGXFSZ         25
++#define SIGVTALRM       26
++#define SIGPROF         27
++#define SIGWINCH        28
++#define SIGIO           29
++#define SIGPOLL         SIGIO
++#define SIGPWR          30
++#define SIGSYS          31
++#define SIGUNUSED       SIGSYS
++
++#define _NSIG 65
+diff --git a/arch/loongarch64/bits/stat.h b/arch/loongarch64/bits/stat.h
+new file mode 100644
+index 00000000..b7f4221b
+--- /dev/null
++++ b/arch/loongarch64/bits/stat.h
+@@ -0,0 +1,18 @@
++struct stat {
++	dev_t st_dev;
++	ino_t st_ino;
++	mode_t st_mode;
++	nlink_t st_nlink;
++	uid_t st_uid;
++	gid_t st_gid;
++	dev_t st_rdev;
++	unsigned long __pad;
++	off_t st_size;
++	blksize_t st_blksize;
++	int __pad2;
++	blkcnt_t st_blocks;
++	struct timespec st_atim;
++	struct timespec st_mtim;
++	struct timespec st_ctim;
++	unsigned __unused[2];
++};
+diff --git a/arch/loongarch64/bits/stdint.h b/arch/loongarch64/bits/stdint.h
+new file mode 100644
+index 00000000..1bb147f2
+--- /dev/null
++++ b/arch/loongarch64/bits/stdint.h
+@@ -0,0 +1,20 @@
++typedef int32_t int_fast16_t;
++typedef int32_t int_fast32_t;
++typedef uint32_t uint_fast16_t;
++typedef uint32_t uint_fast32_t;
++
++#define INT_FAST16_MIN  INT32_MIN
++#define INT_FAST32_MIN  INT32_MIN
++
++#define INT_FAST16_MAX  INT32_MAX
++#define INT_FAST32_MAX  INT32_MAX
++
++#define UINT_FAST16_MAX UINT32_MAX
++#define UINT_FAST32_MAX UINT32_MAX
++
++#define INTPTR_MIN      INT64_MIN
++#define INTPTR_MAX      INT64_MAX
++#define UINTPTR_MAX     UINT64_MAX
++#define PTRDIFF_MIN     INT64_MIN
++#define PTRDIFF_MAX     INT64_MAX
++#define SIZE_MAX        UINT64_MAX
+diff --git a/arch/loongarch64/bits/syscall.h.in b/arch/loongarch64/bits/syscall.h.in
+new file mode 100644
+index 00000000..689ff36b
+--- /dev/null
++++ b/arch/loongarch64/bits/syscall.h.in
+@@ -0,0 +1,303 @@
++#define __NR_io_setup                   0
++#define __NR_io_destroy                 1
++#define __NR_io_submit                  2
++#define __NR_io_cancel                  3
++#define __NR_io_getevents               4
++#define __NR_setxattr                   5
++#define __NR_lsetxattr                  6
++#define __NR_fsetxattr                  7
++#define __NR_getxattr                   8
++#define __NR_lgetxattr                  9
++#define __NR_fgetxattr                  10
++#define __NR_listxattr                  11
++#define __NR_llistxattr                 12
++#define __NR_flistxattr                 13
++#define __NR_removexattr                14
++#define __NR_lremovexattr               15
++#define __NR_fremovexattr               16
++#define __NR_getcwd                     17
++#define __NR_lookup_dcookie             18
++#define __NR_eventfd2                   19
++#define __NR_epoll_create1              20
++#define __NR_epoll_ctl                  21
++#define __NR_epoll_pwait                22
++#define __NR_dup                        23
++#define __NR_dup3                       24
++#define __NR3264_fcntl                  25
++#define __NR_inotify_init1              26
++#define __NR_inotify_add_watch          27
++#define __NR_inotify_rm_watch           28
++#define __NR_ioctl                      29
++#define __NR_ioprio_set                 30
++#define __NR_ioprio_get                 31
++#define __NR_flock                      32
++#define __NR_mknodat                    33
++#define __NR_mkdirat                    34
++#define __NR_unlinkat                   35
++#define __NR_symlinkat                  36
++#define __NR_linkat                     37
++#define __NR_umount2                    39
++#define __NR_mount                      40
++#define __NR_pivot_root                 41
++#define __NR_nfsservctl                 42
++#define __NR3264_statfs                 43
++#define __NR3264_fstatfs                44
++#define __NR3264_truncate               45
++#define __NR3264_ftruncate              46
++#define __NR_fallocate                  47
++#define __NR_faccessat                  48
++#define __NR_chdir                      49
++#define __NR_fchdir                     50
++#define __NR_chroot                     51
++#define __NR_fchmod                     52
++#define __NR_fchmodat                   53
++#define __NR_fchownat                   54
++#define __NR_fchown                     55
++#define __NR_openat                     56
++#define __NR_close                      57
++#define __NR_vhangup                    58
++#define __NR_pipe2                      59
++#define __NR_quotactl                   60
++#define __NR_getdents64                 61
++#define __NR3264_lseek                  62
++#define __NR_read                       63
++#define __NR_write                      64
++#define __NR_readv                      65
++#define __NR_writev                     66
++#define __NR_pread64                    67
++#define __NR_pwrite64                   68
++#define __NR_preadv                     69
++#define __NR_pwritev                    70
++#define __NR3264_sendfile               71
++#define __NR_pselect6                   72
++#define __NR_ppoll                      73
++#define __NR_signalfd4                  74
++#define __NR_vmsplice                   75
++#define __NR_splice                     76
++#define __NR_tee                        77
++#define __NR_readlinkat                 78
++#define __NR_sync                       81
++#define __NR_fsync                      82
++#define __NR_fdatasync                  83
++#define __NR_sync_file_range            84
++#define __NR_timerfd_create             85
++#define __NR_timerfd_settime            86
++#define __NR_timerfd_gettime            87
++#define __NR_utimensat                  88
++#define __NR_acct                       89
++#define __NR_capget                     90
++#define __NR_capset                     91
++#define __NR_personality                92
++#define __NR_exit                       93
++#define __NR_exit_group                 94
++#define __NR_waitid                     95
++#define __NR_set_tid_address            96
++#define __NR_unshare                    97
++#define __NR_futex                      98
++#define __NR_set_robust_list            99
++#define __NR_get_robust_list            100
++#define __NR_nanosleep                  101
++#define __NR_getitimer                  102
++#define __NR_setitimer                  103
++#define __NR_kexec_load                 104
++#define __NR_init_module                105
++#define __NR_delete_module              106
++#define __NR_timer_create               107
++#define __NR_timer_gettime              108
++#define __NR_timer_getoverrun           109
++#define __NR_timer_settime              110
++#define __NR_timer_delete               111
++#define __NR_clock_settime              112
++#define __NR_clock_gettime              113
++#define __NR_clock_getres               114
++#define __NR_clock_nanosleep            115
++#define __NR_syslog                     116
++#define __NR_ptrace                     117
++#define __NR_sched_setparam             118
++#define __NR_sched_setscheduler         119
++#define __NR_sched_getscheduler         120
++#define __NR_sched_getparam             121
++#define __NR_sched_setaffinity          122
++#define __NR_sched_getaffinity          123
++#define __NR_sched_yield                124
++#define __NR_sched_get_priority_max     125
++#define __NR_sched_get_priority_min     126
++#define __NR_sched_rr_get_interval      127
++#define __NR_restart_syscall            128
++#define __NR_kill                       129
++#define __NR_tkill                      130
++#define __NR_tgkill                     131
++#define __NR_sigaltstack                132
++#define __NR_rt_sigsuspend              133
++#define __NR_rt_sigaction               134
++#define __NR_rt_sigprocmask             135
++#define __NR_rt_sigpending              136
++#define __NR_rt_sigtimedwait            137
++#define __NR_rt_sigqueueinfo            138
++#define __NR_rt_sigreturn               139
++#define __NR_setpriority                140
++#define __NR_getpriority                141
++#define __NR_reboot                     142
++#define __NR_setregid                   143
++#define __NR_setgid                     144
++#define __NR_setreuid                   145
++#define __NR_setuid                     146
++#define __NR_setresuid                  147
++#define __NR_getresuid                  148
++#define __NR_setresgid                  149
++#define __NR_getresgid                  150
++#define __NR_setfsuid                   151
++#define __NR_setfsgid                   152
++#define __NR_times                      153
++#define __NR_setpgid                    154
++#define __NR_getpgid                    155
++#define __NR_getsid                     156
++#define __NR_setsid                     157
++#define __NR_getgroups                  158
++#define __NR_setgroups                  159
++#define __NR_uname                      160
++#define __NR_sethostname                161
++#define __NR_setdomainname              162
++#define __NR_getrlimit                  163
++#define __NR_setrlimit                  164
++#define __NR_getrusage                  165
++#define __NR_umask                      166
++#define __NR_prctl                      167
++#define __NR_getcpu                     168
++#define __NR_gettimeofday               169
++#define __NR_settimeofday               170
++#define __NR_adjtimex                   171
++#define __NR_getpid                     172
++#define __NR_getppid                    173
++#define __NR_getuid                     174
++#define __NR_geteuid                    175
++#define __NR_getgid                     176
++#define __NR_getegid                    177
++#define __NR_gettid                     178
++#define __NR_sysinfo                    179
++#define __NR_mq_open                    180
++#define __NR_mq_unlink                  181
++#define __NR_mq_timedsend               182
++#define __NR_mq_timedreceive            183
++#define __NR_mq_notify                  184
++#define __NR_mq_getsetattr              185
++#define __NR_msgget                     186
++#define __NR_msgctl                     187
++#define __NR_msgrcv                     188
++#define __NR_msgsnd                     189
++#define __NR_semget                     190
++#define __NR_semctl                     191
++#define __NR_semtimedop                 192
++#define __NR_semop                      193
++#define __NR_shmget                     194
++#define __NR_shmctl                     195
++#define __NR_shmat                      196
++#define __NR_shmdt                      197
++#define __NR_socket                     198
++#define __NR_socketpair                 199
++#define __NR_bind                       200
++#define __NR_listen                     201
++#define __NR_accept                     202
++#define __NR_connect                    203
++#define __NR_getsockname                204
++#define __NR_getpeername                205
++#define __NR_sendto                     206
++#define __NR_recvfrom                   207
++#define __NR_setsockopt                 208
++#define __NR_getsockopt                 209
++#define __NR_shutdown                   210
++#define __NR_sendmsg                    211
++#define __NR_recvmsg                    212
++#define __NR_readahead                  213
++#define __NR_brk                        214
++#define __NR_munmap                     215
++#define __NR_mremap                     216
++#define __NR_add_key                    217
++#define __NR_request_key                218
++#define __NR_keyctl                     219
++#define __NR_clone                      220
++#define __NR_execve                     221
++#define __NR3264_mmap                   222
++#define __NR3264_fadvise64              223
++#define __NR_swapon                     224
++#define __NR_swapoff                    225
++#define __NR_mprotect                   226
++#define __NR_msync                      227
++#define __NR_mlock                      228
++#define __NR_munlock                    229
++#define __NR_mlockall                   230
++#define __NR_munlockall                 231
++#define __NR_mincore                    232
++#define __NR_madvise                    233
++#define __NR_remap_file_pages           234
++#define __NR_mbind                      235
++#define __NR_get_mempolicy              236
++#define __NR_set_mempolicy              237
++#define __NR_migrate_pages              238
++#define __NR_move_pages                 239
++#define __NR_rt_tgsigqueueinfo          240
++#define __NR_perf_event_open            241
++#define __NR_accept4                    242
++#define __NR_recvmmsg                   243
++#define __NR_arch_specific_syscall      244
++#define __NR_wait4                      260
++#define __NR_prlimit64                  261
++#define __NR_fanotify_init              262
++#define __NR_fanotify_mark              263
++#define __NR_name_to_handle_at          264
++#define __NR_open_by_handle_at          265
++#define __NR_clock_adjtime              266
++#define __NR_syncfs                     267
++#define __NR_setns                      268
++#define __NR_sendmmsg                   269
++#define __NR_process_vm_readv           270
++#define __NR_process_vm_writev          271
++#define __NR_kcmp                       272
++#define __NR_finit_module               273
++#define __NR_sched_setattr              274
++#define __NR_sched_getattr              275
++#define __NR_renameat2                  276
++#define __NR_seccomp                    277
++#define __NR_getrandom                  278
++#define __NR_memfd_create               279
++#define __NR_bpf                        280
++#define __NR_execveat                   281
++#define __NR_userfaultfd                282
++#define __NR_membarrier                 283
++#define __NR_mlock2                     284
++#define __NR_copy_file_range            285
++#define __NR_preadv2                    286
++#define __NR_pwritev2                   287
++#define __NR_pkey_mprotect              288
++#define __NR_pkey_alloc                 289
++#define __NR_pkey_free                  290
++#define __NR_statx                      291
++#define __NR_io_pgetevents              292
++#define __NR_rseq                       293
++#define __NR_kexec_file_load            294
++#define __NR_pidfd_send_signal          424
++#define __NR_io_uring_setup             425
++#define __NR_io_uring_enter             426
++#define __NR_io_uring_register          427
++#define __NR_open_tree		        428
++#define __NR_move_mount		        429
++#define __NR_fsopen		        430
++#define __NR_fsconfig		        431
++#define __NR_fsmount		        432
++#define __NR_fspick		        433
++#define __NR_pidfd_open		        434
++#define __NR_clone3		        435
++#define __NR_close_range	        436
++#define __NR_openat2		        437
++#define __NR_pidfd_getfd	        438
++#define __NR_faccessat2		        439
++#define __NR_process_madvise	        440
++#define __NR_fcntl                      __NR3264_fcntl
++#define __NR_statfs                     __NR3264_statfs
++#define __NR_fstatfs                    __NR3264_fstatfs
++#define __NR_truncate                   __NR3264_truncate
++#define __NR_ftruncate                  __NR3264_ftruncate
++#define __NR_lseek                      __NR3264_lseek
++#define __NR_sendfile                   __NR3264_sendfile
++#define __NR_mmap                       __NR3264_mmap
++#define __NR_fadvise64                  __NR3264_fadvise64
+diff --git a/arch/loongarch64/bits/user.h b/arch/loongarch64/bits/user.h
+new file mode 100644
+index 00000000..4d4cd534
+--- /dev/null
++++ b/arch/loongarch64/bits/user.h
+@@ -0,0 +1,5 @@
++#define ELF_NGREG	45
++#define ELF_NFPREG	33
++
++typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
++typedef double elf_fpreg_t, elf_fpregset_t[ELF_NFPREG];
+diff --git a/arch/loongarch64/crt_arch.h b/arch/loongarch64/crt_arch.h
+new file mode 100644
+index 00000000..e0760d9e
+--- /dev/null
++++ b/arch/loongarch64/crt_arch.h
+@@ -0,0 +1,13 @@
++__asm__(
++".text \n"
++".global " START "\n"
++".type   " START ", @function\n"
++START ":\n"
++"	move $fp, $zero\n"
++"	move $a0, $sp\n"
++".weak _DYNAMIC\n"
++".hidden _DYNAMIC\n"
++"	la.local $a1, _DYNAMIC\n"
++"	bstrins.d $sp, $zero, 3, 0\n"
++"	b " START "_c\n"
++);
+diff --git a/arch/loongarch64/pthread_arch.h b/arch/loongarch64/pthread_arch.h
+new file mode 100644
+index 00000000..95ee4c7a
+--- /dev/null
++++ b/arch/loongarch64/pthread_arch.h
+@@ -0,0 +1,13 @@
++static inline uintptr_t __get_tp()
++{
++	register uintptr_t tp __asm__("tp");
++	__asm__ ("" : "=r" (tp) );
++	return tp;
++}
++
++#define TLS_ABOVE_TP
++#define GAP_ABOVE_TP 0
++
++#define DTP_OFFSET 0
++
++#define MC_PC pc
+diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h
+new file mode 100644
+index 00000000..865a648d
+--- /dev/null
++++ b/arch/loongarch64/reloc.h
+@@ -0,0 +1,27 @@
++#ifdef __loongarch64_soft_float
++#define FP_SUFFIX "-sf"
++#else
++#define FP_SUFFIX ""
++#endif
++
++#define LDSO_ARCH "loongarch64"  FP_SUFFIX
++
++#define TPOFF_K (0x0)
++
++#define REL_PLT         R_LARCH_JUMP_SLOT
++#define REL_COPY        R_LARCH_COPY
++#define REL_DTPMOD      R_LARCH_TLS_DTPMOD64
++#define REL_DTPOFF      R_LARCH_TLS_DTPREL64
++#define REL_TPOFF       R_LARCH_TLS_TPREL64
++#define REL_RELATIVE    R_LARCH_RELATIVE
++#define REL_SYMBOLIC    R_LARCH_64
++
++#define CRTJMP(pc,sp) __asm__ __volatile__( \
++	"move $sp,%1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )
++
++#define GETFUNCSYM(fp, sym, got) __asm__ ( \
++	".hidden " #sym "\n" \
++	".align 8 \n" \
++	"	la.local $t1, "#sym" \n" \
++	"	move %0, $t1 \n" \
++	: "=r"(*(fp)) : : "memory" )
+diff --git a/arch/loongarch64/syscall_arch.h b/arch/loongarch64/syscall_arch.h
+new file mode 100644
+index 00000000..4d5e1885
+--- /dev/null
++++ b/arch/loongarch64/syscall_arch.h
+@@ -0,0 +1,137 @@
++#define __SYSCALL_LL_E(x) (x)
++#define __SYSCALL_LL_O(x) (x)
++
++#define SYSCALL_CLOBBERLIST \
++	"$t0", "$t1", "$t2", "$t3", \
++	"$t4", "$t5", "$t6", "$t7", "$t8", "memory"
++
++static inline long __syscall0(long n)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0");
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "=r"(a0)
++		: "r"(a7)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall1(long n, long a)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++		: "r"(a7)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall2(long n, long a, long b)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall3(long n, long a, long b, long c)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++	register long a2 __asm__("$a2") = c;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1), "r"(a2)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall4(long n, long a, long b, long c, long d)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++	register long a2 __asm__("$a2") = c;
++	register long a3 __asm__("$a3") = d;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall5(long n, long a, long b, long c, long d, long e)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++	register long a2 __asm__("$a2") = c;
++	register long a3 __asm__("$a3") = d;
++	register long a4 __asm__("$a4") = e;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++	register long a2 __asm__("$a2") = c;
++	register long a3 __asm__("$a3") = d;
++	register long a4 __asm__("$a4") = e;
++	register long a5 __asm__("$a5") = f;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
++{
++	register long a7 __asm__("$a7") = n;
++	register long a0 __asm__("$a0") = a;
++	register long a1 __asm__("$a1") = b;
++	register long a2 __asm__("$a2") = c;
++	register long a3 __asm__("$a3") = d;
++	register long a4 __asm__("$a4") = e;
++	register long a5 __asm__("$a5") = f;
++	register long a6 __asm__("$a6") = g;
++
++	__asm__ __volatile__ (
++		"syscall 0"
++		: "+r"(a0)
++	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), "r"(a6)
++		: SYSCALL_CLOBBERLIST);
++	return a0;
++}
++
++#define VDSO_USEFUL
++#define VDSO_CGT_SYM "__vdso_clock_gettime"
++#define VDSO_CGT_VER "LINUX_5.10"
++
++#define IPC_64  0
+diff --git a/configure b/configure
+index ca5cbc0b..68062071 100755
+--- a/configure
++++ b/configure
+@@ -328,6 +328,7 @@ i?86*) ARCH=i386 ;;
+ x86_64-x32*|x32*|x86_64*x32) ARCH=x32 ;;
+ x86_64-nt64*) ARCH=nt64 ;;
+ x86_64*) ARCH=x86_64 ;;
++loongarch64*) ARCH=loongarch64 ;;
+ m68k*) ARCH=m68k ;;
+ mips64*|mipsisa64*) ARCH=mips64 ;;
+ mips*) ARCH=mips ;;
+@@ -723,11 +724,6 @@ fi
+ test "$SUBARCH" \
+ && printf "configured for %s variant: %s\n" "$ARCH" "$ARCH$SUBARCH"
+ 
+-case "$ARCH$SUBARCH" in
+-arm) ASMSUBARCH=el ;;
+-*) ASMSUBARCH=$SUBARCH ;;
+-esac
+-
+ #
+ # Some archs (powerpc) have different possible long double formats
+ # that the compiler can be configured for. The logic for whether this
+diff --git a/crt/loongarch64/crti.s b/crt/loongarch64/crti.s
+new file mode 100644
+index 00000000..81c43e6e
+--- /dev/null
++++ b/crt/loongarch64/crti.s
+@@ -0,0 +1,15 @@
++.section .init
++.global _init
++_init:
++	addi.d $sp,$sp,-16
++	st.d $fp,$sp,0
++	st.d $ra,$sp,8
++	addi.d $fp,$sp,16
++
++.section .fini
++.global _fini
++_fini:
++	addi.d $sp,$sp,-16
++	st.d $fp,$sp,0
++	st.d $ra,$sp,8
++	addi.d $fp,$sp,16
+diff --git a/crt/loongarch64/crtn.s b/crt/loongarch64/crtn.s
+new file mode 100644
+index 00000000..ca3fe80e
+--- /dev/null
++++ b/crt/loongarch64/crtn.s
+@@ -0,0 +1,12 @@
++.section .init
++	ld.d $fp,$sp,0
++	ld.d $ra,$sp,8
++	addi.d $sp,$sp,16
++	jr $ra
++
++
++.section .fini
++	ld.d $fp,$sp,0
++	ld.d $ra,$sp,8
++	addi.d $sp,$sp,16
++	jr $ra
+diff --git a/include/elf.h b/include/elf.h
+index 86e2f0bb..1b0e9e71 100644
+--- a/include/elf.h
++++ b/include/elf.h
+@@ -697,6 +697,11 @@ typedef struct {
+ #define NT_MIPS_FP_MODE	0x801
+ #define NT_MIPS_MSA	0x802
+ #define NT_VERSION	1
++#define NT_LOONGARCH_CPUCFG	0xa00
++#define NT_LOONGARCH_CSR	0xa01
++#define NT_LOONGARCH_LSX	0xa02
++#define NT_LOONGARCH_LASX	0xa03
++#define NT_LOONGARCH_LBT	0xa04
+ 
+ 
+ 
+@@ -3288,6 +3293,66 @@ enum
+ #define R_RISCV_SET32           56
+ #define R_RISCV_32_PCREL        57
+ 
++/* LoongArch ELF Flags */
++#define EM_LOONGARCH  258
++
++#define EF_LARCH_ABI             0x07
++#define EF_LARCH_ABI_LP64D       0x03
++
++/* LoongArch specific dynamic relocations. */
++#define R_LARCH_NONE                        0
++#define R_LARCH_32                          1
++#define R_LARCH_64                          2
++#define R_LARCH_RELATIVE                    3
++#define R_LARCH_COPY                        4
++#define R_LARCH_JUMP_SLOT                   5
++#define R_LARCH_TLS_DTPMOD32                6
++#define R_LARCH_TLS_DTPMOD64                7
++#define R_LARCH_TLS_DTPREL32                8
++#define R_LARCH_TLS_DTPREL64                9
++#define R_LARCH_TLS_TPREL32                 10
++#define R_LARCH_TLS_TPREL64                 11
++#define R_LARCH_IRELATIVE                   12
++#define R_LARCH_MARK_LA                     20
++#define R_LARCH_MARK_PCREL                  21
++#define R_LARCH_SOP_PUSH_PCREL              22
++#define R_LARCH_SOP_PUSH_ABSOLUTE           23
++#define R_LARCH_SOP_PUSH_DUP                24
++#define R_LARCH_SOP_PUSH_GPREL              25
++#define R_LARCH_SOP_PUSH_TLS_TPREL          26
++#define R_LARCH_SOP_PUSH_TLS_GOT            27
++#define R_LARCH_SOP_PUSH_TLS_GD             28
++#define R_LARCH_SOP_PUSH_PLT_PCREL          29
++#define R_LARCH_SOP_ASSERT                  30
++#define R_LARCH_SOP_NOT                     31
++#define R_LARCH_SOP_SUB                     32
++#define R_LARCH_SOP_SL                      33
++#define R_LARCH_SOP_SR                      34
++#define R_LARCH_SOP_ADD                     35
++#define R_LARCH_SOP_AND                     36
++#define R_LARCH_SOP_IF_ELSE                 37
++#define R_LARCH_SOP_POP_32_S_10_5           38
++#define R_LARCH_SOP_POP_32_U_10_12          39
++#define R_LARCH_SOP_POP_32_S_10_12          40
++#define R_LARCH_SOP_POP_32_S_10_16          41
++#define R_LARCH_SOP_POP_32_S_10_16_S2       42
++#define R_LARCH_SOP_POP_32_S_5_20           43
++#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2   44
++#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2  45
++#define R_LARCH_SOP_POP_32_U                46
++#define R_LARCH_ADD8                        47
++#define R_LARCH_ADD16                       48
++#define R_LARCH_ADD24                       49
++#define R_LARCH_ADD32                       50
++#define R_LARCH_ADD64                       51
++#define R_LARCH_SUB8                        52
++#define R_LARCH_SUB16                       53
++#define R_LARCH_SUB24                       54
++#define R_LARCH_SUB32                       55
++#define R_LARCH_SUB64                       56
++#define R_LARCH_GNU_VTINHERIT               57
++#define R_LARCH_GNU_VTENTRY                 58
++
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/include/unistd.h b/include/unistd.h
+index 212263a7..80be3b26 100644
+--- a/include/unistd.h
++++ b/include/unistd.h
+@@ -467,6 +467,8 @@ pid_t gettid(void);
+ #define _CS_POSIX_V7_LPBIG_OFFBIG_LINTFLAGS	1147
+ #define _CS_V6_ENV	1148
+ #define _CS_V7_ENV	1149
++#define _CS_POSIX_V7_THREADS_CFLAGS	1150
++#define _CS_POSIX_V7_THREADS_LDFLAGS	1151
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/src/conf/confstr.c b/src/conf/confstr.c
+index 02cb1aa2..3d417284 100644
+--- a/src/conf/confstr.c
++++ b/src/conf/confstr.c
+@@ -7,7 +7,7 @@ size_t confstr(int name, char *buf, size_t len)
+ 	const char *s = "";
+ 	if (!name) {
+ 		s = "/bin:/usr/bin";
+-	} else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>33U) {
++	} else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>35U) {
+ 		errno = EINVAL;
+ 		return 0;
+ 	}
+diff --git a/src/fenv/loongarch64/fenv.S b/src/fenv/loongarch64/fenv.S
+new file mode 100644
+index 00000000..aa012c97
+--- /dev/null
++++ b/src/fenv/loongarch64/fenv.S
+@@ -0,0 +1,72 @@
++#ifndef __loongarch_soft_float
++
++.global	feclearexcept
++.type	feclearexcept,@function
++feclearexcept:
++	li.w    $a1, 0x1f0000
++	and     $a0, $a0, $a1
++	movfcsr2gr $a1, $r0
++	or	$a1, $a1, $a0
++	xor	$a1, $a1, $a0
++	movgr2fcsr $r0, $a1
++	li.w    $a0, 0
++	jr      $ra
++
++.global	feraiseexcept
++.type	feraiseexcept,@function
++feraiseexcept:
++	li.w    $a1, 0x1f0000
++	and     $a0, $a0, $a1
++	movfcsr2gr $a1, $r0
++	or	$a1, $a1, $a0
++	movgr2fcsr $r0, $a1
++        li.w    $a0, 0
++	jr      $ra
++
++.global	fetestexcept
++.type	fetestexcept,@function
++fetestexcept:
++        li.w    $a1, 0x1f0000
++        and     $a0, $a0, $a1
++	movfcsr2gr $a1, $r0
++        and     $a0, $a1, $a0
++	jr      $ra
++
++.global	fegetround
++.type	fegetround,@function
++fegetround:
++	movfcsr2gr $a0, $r0
++        andi    $a0, $a0, 0x300  //fcsr0.RM
++	jr      $ra
++
++.global	__fesetround
++.hidden __fesetround
++.type	__fesetround,@function
++__fesetround:
++	movfcsr2gr $a1, $r0
++	li.w	$a2, -769  //0xfffffcff
++	and	$a1, $a1, $a2
++	or	$a1, $a1, $a0
++	movgr2fcsr $r0, $a1
++        li.w    $a0, 0
++	jr      $ra
++
++.global	fegetenv
++.type	fegetenv,@function
++fegetenv:
++	movfcsr2gr $a1, $r0
++	st.w	$a1, $a0, 0
++        li.w    $a0, 0
++	jr      $ra
++
++.global	fesetenv
++.type	fesetenv,@function
++fesetenv:
++	addi.d	$a1, $a0, 1
++	beq	$a1, $r0, 1f
++	ld.w	$a1, $a0, 0
++1:	movgr2fcsr $r0, $a1
++        li.w    $a0, 0
++	jr      $ra
++
++#endif
+diff --git a/src/include/sys/stat.h b/src/include/sys/stat.h
+new file mode 100644
+index 00000000..59339bee
+--- /dev/null
++++ b/src/include/sys/stat.h
+@@ -0,0 +1,9 @@
++#ifndef SYS_STAT_H
++#define SYS_STAT_H
++
++#include "../../../include/sys/stat.h"
++
++hidden int __fstat(int, struct stat *);
++hidden int __fstatat(int, const char *restrict, struct stat *restrict, int);
++
++#endif
+diff --git a/src/internal/syscall.h b/src/internal/syscall.h
+index d5f294d4..4f41e1dc 100644
+--- a/src/internal/syscall.h
++++ b/src/internal/syscall.h
+@@ -201,43 +201,43 @@ static inline long __alt_socketcall(int sys, int sock, int cp, long a, long b, l
+ #define SYS_sendfile SYS_sendfile64
+ #endif
+ 
+-#ifndef SYS_timer_settime
++#ifdef SYS_timer_settime32
+ #define SYS_timer_settime SYS_timer_settime32
+ #endif
+ 
+-#ifndef SYS_timer_gettime
++#ifdef SYS_timer_gettime32
+ #define SYS_timer_gettime SYS_timer_gettime32
+ #endif
+ 
+-#ifndef SYS_timerfd_settime
++#ifdef SYS_timerfd_settime32
+ #define SYS_timerfd_settime SYS_timerfd_settime32
+ #endif
+ 
+-#ifndef SYS_timerfd_gettime
++#ifdef SYS_timerfd_gettime32
+ #define SYS_timerfd_gettime SYS_timerfd_gettime32
+ #endif
+ 
+-#ifndef SYS_clock_settime
++#ifdef SYS_clock_settime32
+ #define SYS_clock_settime SYS_clock_settime32
+ #endif
+ 
+-#ifndef SYS_clock_gettime
++#ifdef SYS_clock_gettime32
+ #define SYS_clock_gettime SYS_clock_gettime32
+ #endif
+ 
+-#ifndef SYS_clock_getres
++#ifdef SYS_clock_getres_time32
+ #define SYS_clock_getres SYS_clock_getres_time32
+ #endif
+ 
+-#ifndef SYS_clock_nanosleep
++#ifdef SYS_clock_nanosleep_time32
+ #define SYS_clock_nanosleep SYS_clock_nanosleep_time32
+ #endif
+ 
+-#ifndef SYS_gettimeofday
++#ifdef SYS_gettimeofday_time32
+ #define SYS_gettimeofday SYS_gettimeofday_time32
+ #endif
+ 
+-#ifndef SYS_settimeofday
++#ifdef SYS_settimeofday_time32
+ #define SYS_settimeofday SYS_settimeofday_time32
+ #endif
+ 
+diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s
+new file mode 100644
+index 00000000..edb8214c
+--- /dev/null
++++ b/src/ldso/loongarch64/dlsym.s
+@@ -0,0 +1,7 @@
++.global dlsym
++.hidden __dlsym
++.type   dlsym,@function
++dlsym:
++	move	$a2, $ra
++	la.global	$t0, __dlsym
++	jr	$t0
+diff --git a/src/misc/getrlimit.c b/src/misc/getrlimit.c
+index 2ab2f0f4..bf676307 100644
+--- a/src/misc/getrlimit.c
++++ b/src/misc/getrlimit.c
+@@ -6,12 +6,13 @@
+ 
+ int getrlimit(int resource, struct rlimit *rlim)
+ {
+-	unsigned long k_rlim[2];
+ 	int ret = syscall(SYS_prlimit64, 0, resource, 0, rlim);
+ 	if (!ret) {
+ 		FIX(rlim->rlim_cur);
+ 		FIX(rlim->rlim_max);
+ 	}
++#ifdef SYS_getrlimit
++	unsigned long k_rlim[2];
+ 	if (!ret || errno != ENOSYS)
+ 		return ret;
+ 	if (syscall(SYS_getrlimit, resource, k_rlim) < 0)
+@@ -21,6 +22,9 @@ int getrlimit(int resource, struct rlimit *rlim)
+ 	FIX(rlim->rlim_cur);
+ 	FIX(rlim->rlim_max);
+ 	return 0;
++#else
++	return ret;
++#endif
+ }
+ 
+ weak_alias(getrlimit, getrlimit64);
+diff --git a/src/misc/setrlimit.c b/src/misc/setrlimit.c
+index 8340aee0..5b713cf3 100644
+--- a/src/misc/setrlimit.c
++++ b/src/misc/setrlimit.c
+@@ -12,12 +12,14 @@ struct ctx {
+ 	int err;
+ };
+ 
++#ifdef SYS_setrlimit
+ static void do_setrlimit(void *p)
+ {
+ 	struct ctx *c = p;
+ 	if (c->err>0) return;
+ 	c->err = -__syscall(SYS_setrlimit, c->res, c->lim);
+ }
++#endif
+ 
+ int setrlimit(int resource, const struct rlimit *rlim)
+ {
+@@ -29,6 +31,7 @@ int setrlimit(int resource, const struct rlimit *rlim)
+ 		rlim = &tmp;
+ 	}
+ 	int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0);
++#ifdef SYS_setrlimit
+ 	if (ret != -ENOSYS) return __syscall_ret(ret);
+ 
+ 	struct ctx c = {
+@@ -42,6 +45,9 @@ int setrlimit(int resource, const struct rlimit *rlim)
+ 		return -1;
+ 	}
+ 	return 0;
++#else
++	return __syscall_ret(ret);
++#endif
+ }
+ 
+ weak_alias(setrlimit, setrlimit64);
+diff --git a/src/network/netlink.h b/src/network/netlink.h
+index 38acb178..873fabe2 100644
+--- a/src/network/netlink.h
++++ b/src/network/netlink.h
+@@ -86,7 +86,7 @@ struct ifaddrmsg {
+ #define RTA_DATALEN(rta)	((rta)->rta_len-sizeof(struct rtattr))
+ #define RTA_DATAEND(rta)	((char*)(rta)+(rta)->rta_len)
+ #define RTA_NEXT(rta)		(struct rtattr*)((char*)(rta)+NETLINK_ALIGN((rta)->rta_len))
+-#define RTA_OK(nlh,end)		((char*)(end)-(char*)(rta) >= sizeof(struct rtattr))
++#define RTA_OK(rta,end)		((char*)(end)-(char*)(rta) >= sizeof(struct rtattr))
+ 
+ #define NLMSG_RTA(nlh,len)	((void*)((char*)(nlh)+sizeof(struct nlmsghdr)+NETLINK_ALIGN(len)))
+ #define NLMSG_RTAOK(rta,nlh)	RTA_OK(rta,NLMSG_DATAEND(nlh))
+diff --git a/src/setjmp/loongarch64/longjmp.S b/src/setjmp/loongarch64/longjmp.S
+new file mode 100644
+index 00000000..4186974f
+--- /dev/null
++++ b/src/setjmp/loongarch64/longjmp.S
+@@ -0,0 +1,37 @@
++.global    _longjmp
++.global    longjmp
++.type    _longjmp,@function
++.type    longjmp,@function
++_longjmp:
++longjmp:
++	move   $t5, $a0
++	move   $a0, $a1
++
++	bne     $a0, $zero, 1f
++	addi.d  $a0, $a0, 1
++
++1:
++	ld.d    $ra, $t5, 0
++	ld.d    $sp, $t5, 8
++	ld.d    $r21,$t5, 16
++	ld.d    $fp, $t5, 24
++	ld.d    $s0, $t5, 32
++	ld.d    $s1, $t5, 40
++	ld.d    $s2, $t5, 48
++	ld.d    $s3, $t5, 56
++	ld.d    $s4, $t5, 64
++	ld.d    $s5, $t5, 72
++	ld.d    $s6, $t5, 80
++	ld.d    $s7, $t5, 88
++	ld.d    $s8, $t5, 96
++#ifndef __loongarch64_soft_float
++	fld.d   $fs0, $t5, 104
++	fld.d   $fs1, $t5, 112
++	fld.d   $fs2, $t5, 120
++	fld.d   $fs3, $t5, 128
++	fld.d   $fs4, $t5, 136
++	fld.d   $fs5, $t5, 144
++	fld.d   $fs6, $t5, 152
++	fld.d   $fs7, $t5, 160
++#endif
++	jr      $ra
+diff --git a/src/setjmp/loongarch64/setjmp.S b/src/setjmp/loongarch64/setjmp.S
+new file mode 100644
+index 00000000..f3bb7c70
+--- /dev/null
++++ b/src/setjmp/loongarch64/setjmp.S
+@@ -0,0 +1,34 @@
++.global    __setjmp
++.global    _setjmp
++.global    setjmp
++.type    __setjmp,@function
++.type    _setjmp,@function
++.type    setjmp,@function
++__setjmp:
++_setjmp:
++setjmp:
++	st.d    $ra, $a0, 0
++	st.d    $sp, $a0, 8
++	st.d    $r21,$a0, 16
++	st.d    $fp, $a0, 24
++	st.d    $s0, $a0, 32
++	st.d    $s1, $a0, 40
++	st.d    $s2, $a0, 48
++	st.d    $s3, $a0, 56
++	st.d    $s4, $a0, 64
++	st.d    $s5, $a0, 72
++	st.d    $s6, $a0, 80
++	st.d    $s7, $a0, 88
++	st.d    $s8, $a0, 96
++#ifndef __loongarch64_soft_float
++	fst.d   $fs0, $a0, 104
++	fst.d   $fs1, $a0, 112
++	fst.d   $fs2, $a0, 120
++	fst.d   $fs3, $a0, 128
++	fst.d   $fs4, $a0, 136
++	fst.d   $fs5, $a0, 144
++	fst.d   $fs6, $a0, 152
++	fst.d   $fs7, $a0, 160
++#endif
++	xor     $a0, $a0, $a0
++	jr      $ra
+diff --git a/src/signal/loongarch64/restore.s b/src/signal/loongarch64/restore.s
+new file mode 100644
+index 00000000..bca17eb7
+--- /dev/null
++++ b/src/signal/loongarch64/restore.s
+@@ -0,0 +1,10 @@
++.global	__restore_rt
++.global	__restore
++.hidden __restore_rt
++.hidden __restore
++.type	__restore_rt,@function
++.type	__restore,@function
++__restore_rt:
++__restore:
++	li.w	$a7, 139
++	syscall	0
+diff --git a/src/signal/loongarch64/sigsetjmp.s b/src/signal/loongarch64/sigsetjmp.s
+new file mode 100644
+index 00000000..abd96c62
+--- /dev/null
++++ b/src/signal/loongarch64/sigsetjmp.s
+@@ -0,0 +1,29 @@
++.global sigsetjmp
++.global __sigsetjmp
++.type   sigsetjmp,@function
++.type   __sigsetjmp,@function
++sigsetjmp:
++__sigsetjmp:
++        move $t5, $a0
++        move $t6, $a1
++
++        # comparing save mask with 0, if equals to 0 then
++        # sigsetjmp is equal to setjmp.
++        beq     $t6, $zero, 1f
++        st.d    $ra, $t5, 168
++
++        # save base of got so that we can use it later
++        # once we return from 'longjmp'
++        la.global  $t8, setjmp
++        jirl    $ra, $t8, 0
++
++        move    $a1, $a0        # Return from 'setjmp' or 'longjmp'
++        ld.d    $ra, $t5, 168   # Restore ra of sigsetjmp
++        move    $a0, $t5
++
++.hidden __sigsetjmp_tail
++        la.global  $t8, __sigsetjmp_tail
++        jr         $t8
++1:
++        la.global  $t8, setjmp
++        jr         $t8
+diff --git a/src/stat/fchmodat.c b/src/stat/fchmodat.c
+index 4ee00b0a..bc581050 100644
+--- a/src/stat/fchmodat.c
++++ b/src/stat/fchmodat.c
+@@ -2,7 +2,6 @@
+ #include <fcntl.h>
+ #include <errno.h>
+ #include "syscall.h"
+-#include "kstat.h"
+ 
+ int fchmodat(int fd, const char *path, mode_t mode, int flag)
+ {
+@@ -11,12 +10,12 @@ int fchmodat(int fd, const char *path, mode_t mode, int flag)
+ 	if (flag != AT_SYMLINK_NOFOLLOW)
+ 		return __syscall_ret(-EINVAL);
+ 
+-	struct kstat st;
++	struct stat st;
+ 	int ret, fd2;
+ 	char proc[15+3*sizeof(int)];
+ 
+-	if ((ret = __syscall(SYS_fstatat, fd, path, &st, flag)))
+-		return __syscall_ret(ret);
++	if (fstatat(fd, path, &st, flag))
++		return -1;
+ 	if (S_ISLNK(st.st_mode))
+ 		return __syscall_ret(-EOPNOTSUPP);
+ 
+@@ -27,12 +26,12 @@ int fchmodat(int fd, const char *path, mode_t mode, int flag)
+ 	}
+ 
+ 	__procfdname(proc, fd2);
+-	ret = __syscall(SYS_fstatat, AT_FDCWD, proc, &st, 0);
++	ret = stat(proc, &st);
+ 	if (!ret) {
+-		if (S_ISLNK(st.st_mode)) ret = -EOPNOTSUPP;
+-		else ret = __syscall(SYS_fchmodat, AT_FDCWD, proc, mode);
++		if (S_ISLNK(st.st_mode)) ret = __syscall_ret(-EOPNOTSUPP);
++		else ret = syscall(SYS_fchmodat, AT_FDCWD, proc, mode);
+ 	}
+ 
+ 	__syscall(SYS_close, fd2);
+-	return __syscall_ret(ret);
++	return ret;
+ }
+diff --git a/src/stat/fstat.c b/src/stat/fstat.c
+index 9bbb46de..27db0ccb 100644
+--- a/src/stat/fstat.c
++++ b/src/stat/fstat.c
+@@ -4,12 +4,14 @@
+ #include <fcntl.h>
+ #include "syscall.h"
+ 
+-int fstat(int fd, struct stat *st)
++int __fstat(int fd, struct stat *st)
+ {
+ 	if (fd<0) return __syscall_ret(-EBADF);
+-	return fstatat(fd, "", st, AT_EMPTY_PATH);
++	return __fstatat(fd, "", st, AT_EMPTY_PATH);
+ }
+ 
++weak_alias(__fstat, fstat);
++
+ #if !_REDIR_TIME64
+ weak_alias(fstat, fstat64);
+ #endif
+diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
+index de165b5c..74c51cf5 100644
+--- a/src/stat/fstatat.c
++++ b/src/stat/fstatat.c
+@@ -6,7 +6,6 @@
+ #include <stdint.h>
+ #include <sys/sysmacros.h>
+ #include "syscall.h"
+-#include "kstat.h"
+ 
+ struct statx {
+ 	uint32_t stx_mask;
+@@ -69,6 +68,10 @@ static int fstatat_statx(int fd, const char *restrict path, struct stat *restric
+ 	return 0;
+ }
+ 
++#ifdef SYS_fstatat
++
++#include "kstat.h"
++
+ static int fstatat_kstat(int fd, const char *restrict path, struct stat *restrict st, int flag)
+ {
+ 	int ret;
+@@ -130,18 +133,25 @@ static int fstatat_kstat(int fd, const char *restrict path, struct stat *restric
+ 
+ 	return 0;
+ }
++#endif
+ 
+-int fstatat(int fd, const char *restrict path, struct stat *restrict st, int flag)
++int __fstatat(int fd, const char *restrict path, struct stat *restrict st, int flag)
+ {
+ 	int ret;
++#ifdef SYS_fstatat
+ 	if (sizeof((struct kstat){0}.st_atime_sec) < sizeof(time_t)) {
+ 		ret = fstatat_statx(fd, path, st, flag);
+ 		if (ret!=-ENOSYS) return __syscall_ret(ret);
+ 	}
+ 	ret = fstatat_kstat(fd, path, st, flag);
++#else
++	ret = fstatat_statx(fd, path, st, flag);
++#endif
+ 	return __syscall_ret(ret);
+ }
+ 
++weak_alias(__fstatat, fstatat);
++
+ #if !_REDIR_TIME64
+ weak_alias(fstatat, fstatat64);
+ #endif
+diff --git a/src/stdio/tempnam.c b/src/stdio/tempnam.c
+index 565df6b6..0c65b1f0 100644
+--- a/src/stdio/tempnam.c
++++ b/src/stdio/tempnam.c
+@@ -6,7 +6,6 @@
+ #include <string.h>
+ #include <stdlib.h>
+ #include "syscall.h"
+-#include "kstat.h"
+ 
+ #define MAXTRIES 100
+ 
+@@ -37,11 +36,10 @@ char *tempnam(const char *dir, const char *pfx)
+ 
+ 	for (try=0; try<MAXTRIES; try++) {
+ 		__randname(s+l-6);
+-#ifdef SYS_lstat
+-		r = __syscall(SYS_lstat, s, &(struct kstat){0});
++#ifdef SYS_readlink
++		r = __syscall(SYS_readlink, s, (char[1]){0}, 1);
+ #else
+-		r = __syscall(SYS_fstatat, AT_FDCWD, s,
+-			&(struct kstat){0}, AT_SYMLINK_NOFOLLOW);
++		r = __syscall(SYS_readlinkat, AT_FDCWD, s, (char[1]){0}, 1);
+ #endif
+ 		if (r == -ENOENT) return strdup(s);
+ 	}
+diff --git a/src/stdio/tmpnam.c b/src/stdio/tmpnam.c
+index d667a836..71dc8bb1 100644
+--- a/src/stdio/tmpnam.c
++++ b/src/stdio/tmpnam.c
+@@ -5,7 +5,6 @@
+ #include <string.h>
+ #include <stdlib.h>
+ #include "syscall.h"
+-#include "kstat.h"
+ 
+ #define MAXTRIES 100
+ 
+@@ -17,11 +16,10 @@ char *tmpnam(char *buf)
+ 	int r;
+ 	for (try=0; try<MAXTRIES; try++) {
+ 		__randname(s+12);
+-#ifdef SYS_lstat
+-		r = __syscall(SYS_lstat, s, &(struct kstat){0});
++#ifdef SYS_readlink
++		r = __syscall(SYS_readlink, s, (char[1]){0}, 1);
+ #else
+-		r = __syscall(SYS_fstatat, AT_FDCWD, s,
+-			&(struct kstat){0}, AT_SYMLINK_NOFOLLOW);
++		r = __syscall(SYS_readlinkat, AT_FDCWD, s, (char[1]){0}, 1);
+ #endif
+ 		if (r == -ENOENT) return strcpy(buf ? buf : internal, s);
+ 	}
+diff --git a/src/thread/loongarch64/__set_thread_area.s b/src/thread/loongarch64/__set_thread_area.s
+new file mode 100644
+index 00000000..6fd09a92
+--- /dev/null
++++ b/src/thread/loongarch64/__set_thread_area.s
+@@ -0,0 +1,7 @@
++.global	__set_thread_area
++.hidden __set_thread_area
++.type	__set_thread_area,@function
++__set_thread_area:
++	move	$tp, $a0
++	ori	$a0, $zero, 0
++	jr	$ra
+diff --git a/src/thread/loongarch64/__unmapself.s b/src/thread/loongarch64/__unmapself.s
+new file mode 100644
+index 00000000..3a44b850
+--- /dev/null
++++ b/src/thread/loongarch64/__unmapself.s
+@@ -0,0 +1,7 @@
++.global	__unmapself
++.type	__unmapself, @function
++__unmapself:
++	li.d	$a7, 215                        # call munmap
++	syscall	0
++	li.d	$a7, 93                         # call exit
++	syscall	0
+diff --git a/src/thread/loongarch64/clone.s b/src/thread/loongarch64/clone.s
+new file mode 100644
+index 00000000..86e69cfa
+--- /dev/null
++++ b/src/thread/loongarch64/clone.s
+@@ -0,0 +1,47 @@
++#__clone(func, stack, flags, arg, ptid, tls, ctid)
++#         a0,    a1,   a2,    a3,  a4,  a5,   a6
++# sys_clone3(struct clone_args *cl_args, size_t size)
++#                                 a0             a1
++
++.global	__clone
++.hidden __clone
++.type	__clone,@function
++__clone:
++	# Save function pointer and argument pointer on new thread stack
++	addi.d	$a1, $a1, -16
++	st.d	$a0, $a1, 0	# save function pointer
++	st.d	$a3, $a1, 8	# save argument pointer
++
++	li.d	$t0, ~0x004000ff  # mask CSIGNAL and CLONE_DETACHED
++	and	$t1, $a2, $t0     # cl_args.flags
++	li.d	$t0, 0x000000ff   # CSIGNAL
++	and	$t2, $a2, $t0     # cl_args.exit_signal
++
++	bstrins.d $sp, $zero, 3, 0  # align stack to 16 bytes
++	addi.d	$sp, $sp, -88   # struct clone_args
++	st.d	$t1, $sp, 0     # flags
++	st.d	$a4, $sp, 8     # pidfd
++	st.d	$a6, $sp, 16    # child_tid
++	st.d	$a4, $sp, 24    # parent_tid
++	st.d	$t2, $sp, 32    # exit_signal
++	st.d	$a1, $sp, 40    # stack
++	st.d	$zero, $sp, 48  # stack_size
++	st.d	$a5, $sp, 56    # tls
++	st.d	$zero, $sp, 64  # set_tid
++	st.d	$zero, $sp, 72  # set_tid_size
++	st.d	$zero, $sp, 80  # cgroup
++
++	move	$a0, $sp
++	li.d	$a1, 88
++	li.d	$a7, 435	# __NR_clone3
++	syscall 0		# call clone3
++
++	beqz	$a0, 1f		# whether child process
++	addi.d	$sp, $sp, 88
++	jr	$ra	        # parent process return
++1:
++	ld.d	$t8, $sp, 0     # function pointer
++	ld.d	$a0, $sp, 8     # argument pointer
++	jirl	$ra, $t8, 0     # call the user's function
++	li.d	$a7, 93
++	syscall	0		# child process exit
+diff --git a/src/thread/loongarch64/syscall_cp.s b/src/thread/loongarch64/syscall_cp.s
+new file mode 100644
+index 00000000..9f57d254
+--- /dev/null
++++ b/src/thread/loongarch64/syscall_cp.s
+@@ -0,0 +1,29 @@
++.global	__cp_begin
++.hidden	__cp_begin
++.global	__cp_end
++.hidden	__cp_end
++.global	__cp_cancel
++.hidden	__cp_cancel
++.hidden	__cancel
++.global	__syscall_cp_asm
++.hidden	__syscall_cp_asm
++.type	__syscall_cp_asm,@function
++
++__syscall_cp_asm:
++__cp_begin:
++	ld.w	$a0, $a0, 0
++	bnez	$a0, __cp_cancel
++	move	$t8, $a1     # reserve system call number
++	move	$a0, $a2
++	move	$a1, $a3
++	move	$a2, $a4
++	move	$a3, $a5
++	move	$a4, $a6
++	move	$a5, $a7
++	move	$a7, $t8
++	syscall	0
++__cp_end:
++	jr	$ra
++__cp_cancel:
++	la.local $t8, __cancel
++	jr	$t8
+diff --git a/src/time/__map_file.c b/src/time/__map_file.c
+index d3cefa82..c2b29fe8 100644
+--- a/src/time/__map_file.c
++++ b/src/time/__map_file.c
+@@ -2,15 +2,14 @@
+ #include <fcntl.h>
+ #include <sys/stat.h>
+ #include "syscall.h"
+-#include "kstat.h"
+ 
+ const char unsigned *__map_file(const char *pathname, size_t *size)
+ {
+-	struct kstat st;
++	struct stat st;
+ 	const unsigned char *map = MAP_FAILED;
+ 	int fd = sys_open(pathname, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ 	if (fd < 0) return 0;
+-	if (!syscall(SYS_fstat, fd, &st)) {
++	if (!__fstat(fd, &st)) {
+ 		map = __mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ 		*size = st.st_size;
+ 	}
+diff --git a/src/time/clock_gettime.c b/src/time/clock_gettime.c
+index 3e1d0975..c7e66a51 100644
+--- a/src/time/clock_gettime.c
++++ b/src/time/clock_gettime.c
+@@ -80,10 +80,12 @@ int __clock_gettime(clockid_t clk, struct timespec *ts)
+ 		return __syscall_ret(r);
+ 	long ts32[2];
+ 	r = __syscall(SYS_clock_gettime, clk, ts32);
++#ifdef SYS_gettimeofday
+ 	if (r==-ENOSYS && clk==CLOCK_REALTIME) {
+ 		r = __syscall(SYS_gettimeofday, ts32, 0);
+ 		ts32[1] *= 1000;
+ 	}
++#endif
+ 	if (!r) {
+ 		ts->tv_sec = ts32[0];
+ 		ts->tv_nsec = ts32[1];
+@@ -92,6 +94,7 @@ int __clock_gettime(clockid_t clk, struct timespec *ts)
+ 	return __syscall_ret(r);
+ #else
+ 	r = __syscall(SYS_clock_gettime, clk, ts);
++#ifdef SYS_gettimeofday
+ 	if (r == -ENOSYS) {
+ 		if (clk == CLOCK_REALTIME) {
+ 			__syscall(SYS_gettimeofday, ts, 0);
+@@ -100,6 +103,7 @@ int __clock_gettime(clockid_t clk, struct timespec *ts)
+ 		}
+ 		r = -EINVAL;
+ 	}
++#endif
+ 	return __syscall_ret(r);
+ #endif
+ }
diff --git a/musl/PKGBUILD b/musl/PKGBUILD
index 4c2027d1e9..cd910e68a3 100644
--- a/musl/PKGBUILD
+++ b/musl/PKGBUILD
@@ -11,9 +11,16 @@ url='https://www.musl-libc.org/'
 license=('MIT')
 options=('staticlibs' '!buildflags')
 validpgpkeys=('836489290BB6B70F99FFDA0556BCDB593020450F')
-source=(https://www.musl-libc.org/releases/musl-$pkgver.tar.gz{,.asc})
+source=(https://www.musl-libc.org/releases/musl-$pkgver.tar.gz{,.asc}
+        0001-musl-add-loongarch64-support.patch)
 sha256sums=('7a35eae33d5372a7c0da1188de798726f68825513b7ae3ebe97aaaa52114f039'
-            'SKIP')
+            'SKIP'
+            '3c49faf5ec25f2b0591f5d6c2ea73d11f3257802e696493a45d1293d85108ccb')
+
+prepare() {
+	cd "$pkgname-$pkgver"
+	patch -p1 -i "$srcdir/0001-musl-add-loongarch64-support.patch"
+}
 
 build() {
   cd $pkgname-$pkgver
diff --git a/mutter/PKGBUILD b/mutter/PKGBUILD
index 83fef66875..6c534c147d 100644
--- a/mutter/PKGBUILD
+++ b/mutter/PKGBUILD
@@ -47,7 +47,7 @@ makedepends=(
   xorg-server
   xorg-server-xvfb
 )
-checkdepends=(
+makedepends+=(
   gnome-session
   python-dbusmock
   wireplumber
diff --git a/nautilus-sendto/PKGBUILD b/nautilus-sendto/PKGBUILD
index 06578e52a3..0d5b061caa 100644
--- a/nautilus-sendto/PKGBUILD
+++ b/nautilus-sendto/PKGBUILD
@@ -13,8 +13,10 @@ license=(GPL)
 depends=(glib2)
 makedepends=(gobject-introspection git meson appstream-glib)
 _commit=c87aac46c4893e09b1ced1cca8bb86b0a6823124  # master
-source=("git+https://gitlab.gnome.org/Archive/nautilus-sendto.git#commit=$_commit")
-sha256sums=('SKIP')
+source=("git+https://gitlab.gnome.org/Archive/nautilus-sendto.git#commit=$_commit"
+    nautilus-sendto-fix-meson.patch)
+sha256sums=('SKIP'
+            '2ba3e793e49b67866e70e65e6cd7fb8029dee62681a5dcd2d9ba1fb494a24eeb')
 
 pkgver() {
   cd $pkgname
@@ -23,6 +25,7 @@ pkgver() {
 
 prepare() {
   cd $pkgname
+  patch -p1 -i $srcdir/nautilus-sendto-fix-meson.patch
 }
 
 build() {
diff --git a/nautilus-sendto/nautilus-sendto-fix-meson.patch b/nautilus-sendto/nautilus-sendto-fix-meson.patch
new file mode 100644
index 0000000000..9257119f2d
--- /dev/null
+++ b/nautilus-sendto/nautilus-sendto-fix-meson.patch
@@ -0,0 +1,13 @@
+diff --git a/src/meson.build b/src/meson.build
+index 069eaf8..a9638c1 100644
+--- a/src/meson.build
++++ b/src/meson.build
+@@ -7,7 +7,7 @@ executable('nautilus-sendto',
+ 
+ po_dir = join_paths(meson.source_root(), 'po')
+ 
+-i18n.merge_file ('appdata',
++i18n.merge_file (
+                  input: 'nautilus-sendto.metainfo.xml.in',
+                  output: 'nautilus-sendto.metainfo.xml',
+                  install: true,
diff --git a/navi/PKGBUILD b/navi/PKGBUILD
index 2834bad712..4a042af120 100644
--- a/navi/PKGBUILD
+++ b/navi/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('579a72814e7ba07dae697a58dc13b0f7d853532ec07229aff07a11e5828f3799')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir completions
 }
 
diff --git a/ncspot/PKGBUILD b/ncspot/PKGBUILD
index 13dd202142..27fe2fbfba 100644
--- a/ncspot/PKGBUILD
+++ b/ncspot/PKGBUILD
@@ -17,7 +17,7 @@ options=('!lto')
 
 prepare() {
   cd "${srcdir}/${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/neofetch/PKGBUILD b/neofetch/PKGBUILD
index 346e94e367..41817b73da 100644
--- a/neofetch/PKGBUILD
+++ b/neofetch/PKGBUILD
@@ -25,8 +25,15 @@ optdepends=(
   'xorg-xrandr: Resolution detection (Multi Monitor + Refresh rates)'
   'xorg-xwininfo: See https://github.com/dylanaraps/neofetch/wiki/Images-in-the-terminal'
 )
-source=("${pkgname}-${pkgver}.tar.gz::https://github.com/dylanaraps/neofetch/archive/${pkgver}.tar.gz")
-sha256sums=('58a95e6b714e41efc804eca389a223309169b2def35e57fa934482a6b47c27e7')
+source=("${pkgname}-${pkgver}.tar.gz::https://github.com/dylanaraps/neofetch/archive/${pkgver}.tar.gz"
+        neofetch-la64.patch)
+sha256sums=('58a95e6b714e41efc804eca389a223309169b2def35e57fa934482a6b47c27e7'
+            'ef425de4bd5350b32af8596346201ad9531b72614a4cbbe14128e472131b6ec9')
+
+prepare() {
+  cd "${pkgname}-${pkgver}/"
+  patch -p1 -i "$srcdir/neofetch-la64.patch"
+}
 
 package() {
   cd "${pkgname}-${pkgver}/"
diff --git a/neofetch/neofetch-la64.patch b/neofetch/neofetch-la64.patch
new file mode 100644
index 0000000000..0e78376eb5
--- /dev/null
+++ b/neofetch/neofetch-la64.patch
@@ -0,0 +1,15 @@
+Index: neofetch-7.1.0/neofetch
+===================================================================
+--- neofetch-7.1.0.orig/neofetch
++++ neofetch-7.1.0/neofetch
+@@ -2088,8 +2089,8 @@ get_cpu() {
+ 
+                 *)
+                     cpu="$(awk -F '\\s*: | @' \
+-                            '/model name|Hardware|Processor|^cpu model|chip type|^cpu type/ {
+-                            cpu=$2; if ($1 == "Hardware") exit } END { print cpu }' "$cpu_file")"
++                            '/[mM]odel [nN]ame|Hardware|Processor|^cpu model|chip type|^cpu type/ {
++                            cpu=$2; if ($1 == "Hardware" || $1 == "Model Name") exit } END { print cpu }' "$cpu_file")"
+                 ;;
+             esac
+ 
diff --git a/netavark/PKGBUILD b/netavark/PKGBUILD
index f50df46f86..4bffa4c697 100644
--- a/netavark/PKGBUILD
+++ b/netavark/PKGBUILD
@@ -35,7 +35,7 @@ pkgver() {
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/netpbm/PKGBUILD b/netpbm/PKGBUILD
index 30d47288c7..839e53a053 100644
--- a/netpbm/PKGBUILD
+++ b/netpbm/PKGBUILD
@@ -37,6 +37,8 @@ validpgpkeys=('5357F3B111688D88C1D88119FCF2CB179205AC90')
 
 prepare() {
   cd $pkgname-$pkgver
+  CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
 
   patch -p1 < ../netpbm-CAN-2005-2471.patch
   patch -p1 < ../netpbm-security-code.patch
@@ -45,6 +47,7 @@ prepare() {
 
   cp config.mk.in  config.mk
   [ "${CARCH}" = 'x86_64' ] && echo 'CFLAGS_SHLIB = -fPIC' >> config.mk
+  [ "${CARCH}" = 'loong64' ] && echo 'CFLAGS_SHLIB = -fPIC' >> config.mk
   echo "NETPBM_DOCURL = file://${srcdir}/doc" >> config.mk
   echo 'TIFFLIB = libtiff.so' >> config.mk
   echo 'JPEGLIB = libjpeg.so' >> config.mk
diff --git a/netplan/PKGBUILD b/netplan/PKGBUILD
index 14265ef30c..9ffac7d7ed 100644
--- a/netplan/PKGBUILD
+++ b/netplan/PKGBUILD
@@ -28,12 +28,14 @@ makedepends=(
   'bash-completion'
   'cmocka'
   'meson'
-  'pandoc'
+#'pandoc'
   'python-coverage'
   'python-pycodestyle'
   'python-pyflakes'
   'python-pytest'
   'python-pytest-cov'
+  'python'
+  'pkgconf'
 )
 checkdepends=(
   'openvswitch'
@@ -44,13 +46,16 @@ source=(
   "$pkgname-$pkgver.tar.gz::$url/archive/${pkgver}.tar.gz"
   "$pkgname-0.107-disable_tests.patch"
   "$pkgname-0.107-use_lib.patch"
+  netplan-disable-pandoc.patch
 )
 sha512sums=('6f6e3e4f179a2a74b46239cbd86919555a769d001b6fc8ba8daba3894b26415dbb8124f6a594de9afc14dc50d6979e58f195adfba2a53854b882d7cb6621fea7'
             '7f0b512a29c458e7e25f1387ce7f6730df47af48b35a5f2915a9f6a57fddf53a2c1d0690dfd81cd2dbbf40efa16f332bac63180ab3e589dcf5087f13775d040b'
-            '64d6e97cb7c330fc2e28872a34e0be4190c6e75e5235de6ff97ef77c4c287f814b85503c798d959b88037fb53af0459c6a38768b7420f12aac79db64db127378')
+            '64d6e97cb7c330fc2e28872a34e0be4190c6e75e5235de6ff97ef77c4c287f814b85503c798d959b88037fb53af0459c6a38768b7420f12aac79db64db127378'
+            '7ef1441fcb4d6703ebe8298a9f7c9c5583aebfb65894f291864e745186b5646770a2c8db199e0dc945080e8ea05db4f262723190a43357a279fdd5d2f23fd8d8')
 b2sums=('8fcd3ae48e0f3e1eb0486cf4f7d76d17f61b3b8212363c9f00a1c1e6d265b1093cd7bc283e659091a44ef01c1346f47fc663d43387d72b95305ce2bb2c5b808a'
         'd8f5e36ef67fa43fa7c93eed2fbec54a4e934b9e994178820abda2473be646df009fc537ebc5fc067f8400c0ce3e28f66dfcf681b378cf2363f82acd79930149'
-        '7ca1aa7b5449e23b310f32d16bf0f50525c4175d26eb591489143d0acef8cda7969502bff26402dbba10d3ea45c3c6768c28bf22c74b9f0a807bc953acf5871a')
+        '7ca1aa7b5449e23b310f32d16bf0f50525c4175d26eb591489143d0acef8cda7969502bff26402dbba10d3ea45c3c6768c28bf22c74b9f0a807bc953acf5871a'
+        'ecbb600bf9acc18d26ed55c3e1b26ff4189665d1721a386aab5615b71beadb6089e948e316fa76c1c96870a2fed426cf76b559677c857004d765c3d55c15c3b4')
 
 prepare() {
   # adapt names of python things to our names
@@ -61,6 +66,7 @@ prepare() {
   patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-0.107-disable_tests.patch
   # use /usr/lib instead of /usr/libexec
   patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-0.107-use_lib.patch
+  patch -p1 -i $srcdir/netplan-disable-pandoc.patch
 }
 
 build() {
diff --git a/netplan/netplan-disable-pandoc.patch b/netplan/netplan-disable-pandoc.patch
new file mode 100644
index 0000000000..96ca36b8f2
--- /dev/null
+++ b/netplan/netplan-disable-pandoc.patch
@@ -0,0 +1,27 @@
+Index: netplan-0.106/Makefile
+===================================================================
+--- netplan-0.106.orig/Makefile
++++ netplan-0.106/Makefile
+@@ -67,7 +67,7 @@ PYCODESTYLE3 ?= $(shell command -v pycod
+ PYTEST3 ?= $(shell command -v pytest-3 || command -v pytest3 || echo true)
+ PYCOVERAGE ?= $(shell command -v python3-coverage || echo true)
+ 
+-default: netplan/_features.py generate netplan-dbus dbus/io.netplan.Netplan.service doc/netplan.html doc/netplan.5 doc/netplan-generate.8 doc/netplan-apply.8 doc/netplan-try.8 doc/netplan-dbus.8 doc/netplan-get.8 doc/netplan-set.8
++default: netplan/_features.py generate netplan-dbus dbus/io.netplan.Netplan.service #doc/netplan.html doc/netplan.5 doc/netplan-generate.8 doc/netplan-apply.8 doc/netplan-try.8 doc/netplan-dbus.8 doc/netplan-get.8 doc/netplan-set.8
+ 
+ %.o: src/%.c src/_features.h
+ 	$(CC) $(BUILDFLAGS) $(CFLAGS) $(LDFLAGS) -c $^ `pkg-config --cflags --libs glib-2.0 gio-2.0 yaml-0.1 uuid`
+@@ -160,10 +160,10 @@ install: default
+ 	install -m 644 include/*.h $(DESTDIR)/$(INCLUDEDIR)/netplan/
+ 	# TODO: install pkg-config once available
+ 	# docs, data
+-	install -m 644 doc/*.html $(DESTDIR)/$(DOCDIR)/netplan/
++	#install -m 644 doc/*.html $(DESTDIR)/$(DOCDIR)/netplan/
+ 	install -m 644 examples/*.yaml $(DESTDIR)/$(DOCDIR)/netplan/examples/
+-	install -m 644 doc/*.5 $(DESTDIR)/$(MANDIR)/man5/
+-	install -m 644 doc/*.8 $(DESTDIR)/$(MANDIR)/man8/
++	#install -m 644 doc/*.5 $(DESTDIR)/$(MANDIR)/man5/
++	#install -m 644 doc/*.8 $(DESTDIR)/$(MANDIR)/man8/
+ 	install -T -D -m 644 netplan.completions $(DESTDIR)/$(BASH_COMPLETIONS_DIR)/netplan
+ 	# dbus
+ 	mkdir -p $(DESTDIR)/$(DATADIR)/dbus-1/system.d $(DESTDIR)/$(DATADIR)/dbus-1/system-services
diff --git a/newsboat/PKGBUILD b/newsboat/PKGBUILD
index bc16ff6485..9020adf087 100644
--- a/newsboat/PKGBUILD
+++ b/newsboat/PKGBUILD
@@ -28,7 +28,7 @@ validpgpkeys=('B8B1756A0DDBF0760CE67CCF4ED6CD61932B9EBE') # Newsboat project <ne
 prepare() {
   cd $pkgname-$pkgver
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/nextcloud-app-deck/PKGBUILD b/nextcloud-app-deck/PKGBUILD
index 9b1343761a..2be58f6b10 100644
--- a/nextcloud-app-deck/PKGBUILD
+++ b/nextcloud-app-deck/PKGBUILD
@@ -12,7 +12,7 @@ url="https://github.com/nextcloud/deck"
 license=(AGPL-3.0-or-later)
 groups=(nextcloud-apps)
 makedepends=(nextcloud)
-checkdepends=(yq)
+makedepends+=(yq)
 source=(https://github.com/nextcloud-releases/$_name/releases/download/v$pkgver/$_name-v$pkgver.tar.gz)
 sha512sums=('e1ffbbca8a86d3657b09088d66b090103627df108abceaec64c6e5f806c04d498a77cdecab8ec0d6cb273c859d3ff60388dd2dda0f7adc9a234016df5c9d8943')
 b2sums=('49d905f9c5f7741e5150e2641e3bd342a01c3a22b95233fcc129dfd96d84a6d00849d4912748a72f96f4820641f1b777a0b25da812a6906aadef5aafe7e7c96b')
diff --git a/nextcloud-app-notify_push/PKGBUILD b/nextcloud-app-notify_push/PKGBUILD
index 62a832e955..68143eb1e6 100644
--- a/nextcloud-app-notify_push/PKGBUILD
+++ b/nextcloud-app-notify_push/PKGBUILD
@@ -58,8 +58,8 @@ _nextcloud_app_package() {
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
-	sed -i -e "s/@ARCH@/$CARCH/" "../$pkgname.service"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
+	sed -i -e "s/@ARCH@/`uname -m`/" "../$pkgname.service"
 }
 
 build() {
@@ -89,6 +89,6 @@ package() {
 		--include=webpack.prod.js \
 		--exclude='*' \
 		./ $_appdir/
-	install -Dm0755 -t "$_appdir/bin/$CARCH/" "target/release/${pkgname##*-}"
+	install -Dm0755 -t "$_appdir/bin/`uname -m`/" "target/release/${pkgname##*-}"
 	install -Dm0644 -t "$pkgdir/usr/lib/systemd/system/" "$srcdir/$pkgname.service"
 }
diff --git a/nginx-mod-ndk-set-misc/PKGBUILD b/nginx-mod-ndk-set-misc/PKGBUILD
index 2d75bcbf5b..eb37791ed3 100644
--- a/nginx-mod-ndk-set-misc/PKGBUILD
+++ b/nginx-mod-ndk-set-misc/PKGBUILD
@@ -6,7 +6,7 @@ _ndkver=0.3.3
 pkgbase=nginx-mod-ndk-set-misc
 pkgname=(nginx-mod-ndk nginx-mod-set-misc)
 pkgver=$_setmiscver+$_ndkver
-pkgrel=6
+pkgrel=7
 
 arch=('loong64' 'x86_64')
 depends=('nginx')
diff --git a/nickel/PKGBUILD b/nickel/PKGBUILD
index 1491c3a249..28dc298e0e 100644
--- a/nickel/PKGBUILD
+++ b/nickel/PKGBUILD
@@ -30,7 +30,7 @@ prepare() {
   cd "$pkgbase"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/ninja/PKGBUILD b/ninja/PKGBUILD
index d29828e35d..751a1b21ff 100644
--- a/ninja/PKGBUILD
+++ b/ninja/PKGBUILD
@@ -20,7 +20,7 @@ build() {
   cd ninja-$pkgver
 
   python configure.py --bootstrap
-  emacs -Q --batch -f batch-byte-compile misc/ninja-mode.el
+  #emacs -Q --batch -f batch-byte-compile misc/ninja-mode.el
 }
 
 check() {
@@ -40,8 +40,8 @@ package() {
   install -m644 -D doc/manual.asciidoc "$pkgdir/usr/share/doc/ninja/manual.asciidoc"
   install -Dm644 COPYING "$pkgdir/usr/share/licenses/$pkgname/COPYING"
 
-  install -m644 -D misc/ninja-mode.el "$pkgdir/usr/share/emacs/site-lisp/ninja-mode.el"
-  install -m644 -D misc/ninja-mode.elc "$pkgdir/usr/share/emacs/site-lisp/ninja-mode.elc"
+  #install -m644 -D misc/ninja-mode.el "$pkgdir/usr/share/emacs/site-lisp/ninja-mode.el"
+  #install -m644 -D misc/ninja-mode.elc "$pkgdir/usr/share/emacs/site-lisp/ninja-mode.elc"
   install -m644 -D misc/ninja.vim "$pkgdir/usr/share/vim/vimfiles/syntax/ninja.vim"
   install -m644 -D misc/ninja_syntax.py "$pkgdir/$site_packages/ninja_syntax.py"
 
diff --git a/ntp/0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch b/ntp/0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch
new file mode 100644
index 0000000000..372938c6d6
--- /dev/null
+++ b/ntp/0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch
@@ -0,0 +1,32 @@
+From 082a504cfcc046c3d8adaae1164268bc94e5108a Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sat, 31 Jul 2021 10:51:41 -0700
+Subject: [PATCH] libntp: Do not use PTHREAD_STACK_MIN on glibc
+
+In glibc 2.34+ PTHREAD_STACK_MIN is not a compile-time constant which
+could mean different stack sizes at runtime on different architectures
+and it also causes compile failure. Default glibc thread stack size
+or 64Kb set by ntp should be good in glibc these days.
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ libntp/work_thread.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libntp/work_thread.c b/libntp/work_thread.c
+index 03a5647..3ddd751 100644
+--- a/libntp/work_thread.c
++++ b/libntp/work_thread.c
+@@ -41,7 +41,7 @@
+ #ifndef THREAD_MINSTACKSIZE
+ # define THREAD_MINSTACKSIZE	(64U * 1024)
+ #endif
+-#ifndef __sun
++#if !defined(__sun) && !defined(__GLIBC__)
+ #if defined(PTHREAD_STACK_MIN) && THREAD_MINSTACKSIZE < PTHREAD_STACK_MIN
+ # undef THREAD_MINSTACKSIZE
+ # define THREAD_MINSTACKSIZE PTHREAD_STACK_MIN
+-- 
+2.32.0
+
diff --git a/ntp/ntp-ssp-la.patch b/ntp/ntp-ssp-la.patch
new file mode 100644
index 0000000000..fca524ff43
--- /dev/null
+++ b/ntp/ntp-ssp-la.patch
@@ -0,0 +1,10 @@
+Index: ntp-4.2.8p15/sntp/harden/linux
+===================================================================
+--- ntp-4.2.8p15.orig/sntp/harden/linux
++++ ntp-4.2.8p15/sntp/harden/linux
+@@ -1,4 +1,4 @@
+ # generic linux hardening flags
+-NTP_HARD_CFLAGS="-fPIE -fPIC -fstack-protector-all -O1"
++NTP_HARD_CFLAGS="-fPIE -fPIC -O1"
+ NTP_HARD_CPPFLAGS="-D_FORTIFY_SOURCE=2"
+ NTP_HARD_LDFLAGS="-pie -Wl,-z,relro -Wl,-z,now"
diff --git a/nushell/PKGBUILD b/nushell/PKGBUILD
index f278d8e87d..4c951cdeec 100644
--- a/nushell/PKGBUILD
+++ b/nushell/PKGBUILD
@@ -33,7 +33,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/nuspell/PKGBUILD b/nuspell/PKGBUILD
index 11db1cb577..27a2c4827c 100644
--- a/nuspell/PKGBUILD
+++ b/nuspell/PKGBUILD
@@ -9,7 +9,7 @@ arch=('loong64' 'x86_64')
 url='https://nuspell.github.io/'
 license=('LGPL3')
 depends=('icu' 'glibc' 'gcc-libs')
-makedepends=('cmake' 'pandoc')
+makedepends=('cmake')
 checkdepends=('catch2')
 source=(${pkgname}-${pkgver}.tar.gz::"https://github.com/nuspell/nuspell/archive/v${pkgver}.tar.gz")
 sha512sums=('f4119b3fe5944be8f5bc35ccff8d7a93b0f4fa9f129bc97a7b96879a11b5b35bd714b41dd209267417e94c5fed45fd3a74b349f94424f4b90bde07d9694d1d7d')
diff --git a/nvidia-cg-toolkit/PKGBUILD b/nvidia-cg-toolkit/PKGBUILD
index f4c6831879..2c81668108 100644
--- a/nvidia-cg-toolkit/PKGBUILD
+++ b/nvidia-cg-toolkit/PKGBUILD
@@ -20,6 +20,7 @@ package() {
   install -dm755 "${pkgdir}"/usr/lib
   [ "$CARCH" = "i686" ] && install -m644 "${srcdir}"/usr/lib/* "${pkgdir}"/usr/lib
   [ "$CARCH" = "x86_64" ] && install -m644 "${srcdir}"/usr/lib64/* "${pkgdir}"/usr/lib
+  [ "$CARCH" = "loong64" ] && install -m644 "${srcdir}"/usr/lib64/* "${pkgdir}"/usr/lib
   install -Dm644 "${srcdir}"/usr//local/Cg/docs/license.txt "${pkgdir}"/usr/share/licenses/nvidia-cg-toolkit/license.txt
   cp -r "${srcdir}"/usr/local/Cg "${pkgdir}"/usr/share/
   find $pkgdir/usr/share/ -type d -exec chmod -R 755 '{}' ';'
diff --git a/nvidia-lts/PKGBUILD b/nvidia-lts/PKGBUILD
index 421a04157c..97466e7ba7 100644
--- a/nvidia-lts/PKGBUILD
+++ b/nvidia-lts/PKGBUILD
@@ -23,7 +23,7 @@ package() {
 
     _kernver="$(</usr/src/linux-lts/version)"
 
-    install -Dt "${pkgdir}/usr/lib/modules/${_kernver}/extramodules" -m644 nvidia/${pkgver}/${_kernver}/${CARCH}/module/*
+    install -Dt "${pkgdir}/usr/lib/modules/${_kernver}/extramodules" -m644 nvidia/${pkgver}/${_kernver}/`uname -m`/module/*
 
     # compress each module individually
     find "$pkgdir" -name '*.ko' -exec xz -T1 {} +
diff --git a/nvidia/PKGBUILD b/nvidia/PKGBUILD
index 0781907592..42d848522d 100644
--- a/nvidia/PKGBUILD
+++ b/nvidia/PKGBUILD
@@ -24,7 +24,7 @@ package() {
 
     _kernver="$(</usr/src/linux/version)"
 
-    install -Dt "${pkgdir}/usr/lib/modules/${_kernver}/extramodules" -m644 nvidia/${pkgver}/${_kernver}/${CARCH}/module/*
+    install -Dt "${pkgdir}/usr/lib/modules/${_kernver}/extramodules" -m644 nvidia/${pkgver}/${_kernver}/`uname -m`/module/*
 
     # compress each module individually
     find "$pkgdir" -name '*.ko' -exec xz -T1 {} +
diff --git a/ocaml/PKGBUILD b/ocaml/PKGBUILD
index 714f55627e..a60a990534 100644
--- a/ocaml/PKGBUILD
+++ b/ocaml/PKGBUILD
@@ -8,19 +8,26 @@ pkgdesc="A functional language with OO extensions"
 arch=('loong64' 'x86_64')
 license=('LGPL2.1' 'custom: QPL-1.0')
 url="https://caml.inria.fr/"
-makedepends=('ncurses>=5.6-7')
+makedepends=('ncurses>=5.6-7' autoconf)
 optdepends=('ncurses: advanced ncurses features' 'tk: advanced tk features')
-source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz)
-sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5')
+source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz
+ocaml-5.0.0-la64.patch)
+sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5'
+            'a95f2e02b318183d76b858b0a1d66ad5c23977d72f6d964b95a8851edf4170ed3971602e031842ef04615d2f6b36198f62aa4ff7e57c188af052d45f22192f65')
 options=('!makeflags' '!emptydirs' 'staticlibs')
 
 
+prepare() {
+  cd "${srcdir}/${pkgname}-${pkgver}"
+  patch -p1 -i $srcdir/ocaml-5.0.0-la64.patch
+  autoconf
+}
 
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
   CFLAGS+=' -ffat-lto-objects'
   CXXFLAGS+=' -ffat-lto-objects'
-  ./configure --prefix /usr --mandir /usr/share/man -enable-frame-pointers
+  ./configure --prefix /usr --mandir /usr/share/man --enable-frame-pointers
   make --debug=v world.opt
 }
 
diff --git a/ocaml/ocaml-5.0.0-la64.patch b/ocaml/ocaml-5.0.0-la64.patch
new file mode 100644
index 0000000000..6f7678ccf2
--- /dev/null
+++ b/ocaml/ocaml-5.0.0-la64.patch
@@ -0,0 +1,2389 @@
+diff --git a/Makefile b/Makefile
+index bb2c245ea..db03683fb 100644
+--- a/Makefile
++++ b/Makefile
+@@ -528,6 +528,14 @@ partialclean::
+ 
+ beforedepend:: lambda/runtimedef.ml
+ 
++asmcomp/loongarch64/CSE.ml: asmcomp/riscv/CSE.ml
++	cp $< $@
++asmcomp/loongarch64/reload.ml: asmcomp/riscv/reload.ml
++	cp $< $@
++asmcomp/loongarch64/scheduling.ml: asmcomp/riscv/scheduling.ml
++	cp $< $@
++
++
+ # Choose the right machine-dependent files
+ 
+ asmcomp/arch.ml: asmcomp/$(ARCH)/arch.ml
+@@ -1031,6 +1039,7 @@ clean::
+ 	rm -f runtime/domain_state*.inc
+ 	rm -rf $(DEPDIR)
+ 	rm -f stdlib/libcamlrun.a stdlib/libcamlrun.lib
++	rm -f asmcomp/loongarch64/CSE.ml asmcomp/loongarch64/reload.ml asmcomp/loongarch64/scheduling.ml
+ 
+ .PHONY: runtimeopt
+ runtimeopt: stdlib/libasmrun.$(A)
+diff --git a/asmcomp/loongarch64/NOTES.md b/asmcomp/loongarch64/NOTES.md
+new file mode 100644
+index 000000000..f9b63dd62
+--- /dev/null
++++ b/asmcomp/loongarch64/NOTES.md
+@@ -0,0 +1,11 @@
++# Supported platforms
++
++LoongArch in 64-bit mode
++
++# Reference documents
++
++* Instruction set specification:
++  - https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
++
++* ELF ABI specification:
++  - https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
+diff --git a/asmcomp/loongarch64/arch.ml b/asmcomp/loongarch64/arch.ml
+new file mode 100644
+index 000000000..fee052684
+--- /dev/null
++++ b/asmcomp/loongarch64/arch.ml
+@@ -0,0 +1,96 @@
++# 2 "asmcomp/loongarch64/arch.ml"
++(**************************************************************************)
++(*                                                                        *)
++(*                                 OCaml                                  *)
++(*                                                                        *)
++(*                yala <zhaojunchao@loongson.cn>                          *)
++(*                                                                        *)
++(*               Copyright © 2008-2023 LOONGSON                           *)
++(*                                                                        *)
++(*   All rights reserved.  This file is distributed under the terms of    *)
++(*   the GNU Lesser General Public License version 2.1, with the          *)
++(*   special exception on linking described in the file LICENSE.          *)
++(*                                                                        *)
++(**************************************************************************)
++
++(* Specific operations for the Loongarch processor *)
++
++open Format
++
++(* Machine-specific command-line options *)
++
++let command_line_options = []
++
++(* Specific operations *)
++
++type specific_operation =
++  | Imultaddf of bool        (* multiply, optionally negate, and add *)
++  | Imultsubf of bool        (* multiply, optionally negate, and subtract *)
++  | Isqrtf                   (* floating-point square root *)
++
++(* Addressing modes *)
++
++type addressing_mode =
++  | Iindexed of int                     (* reg + displ *)
++
++let is_immediate n =
++  (n <= 0x7FF) && (n >= -0x800)
++
++(* Sizes, endianness *)
++
++let big_endian = false
++
++let size_addr = 8
++let size_int = size_addr
++let size_float = 8
++
++let allow_unaligned_access = false
++
++(* Behavior of division *)
++
++let division_crashes_on_overflow = false
++
++(* Operations on addressing modes *)
++
++let identity_addressing = Iindexed 0
++
++let offset_addressing addr delta =
++  match addr with
++  | Iindexed n -> Iindexed(n + delta)
++
++let num_args_addressing = function
++  | Iindexed _ -> 1
++
++(* Printing operations and addressing modes *)
++
++let print_addressing printreg addr ppf arg =
++  match addr with
++  | Iindexed n ->
++      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
++      fprintf ppf "%a%s" printreg arg.(0) idx
++
++let print_specific_operation printreg op ppf arg =
++  match op with
++  | Imultaddf false ->
++      fprintf ppf "%a *f %a +f %a"
++        printreg arg.(0) printreg arg.(1) printreg arg.(2)
++  | Imultaddf true ->
++      fprintf ppf "-f (%a *f %a +f %a)"
++        printreg arg.(0) printreg arg.(1) printreg arg.(2)
++  | Imultsubf false ->
++      fprintf ppf "%a *f %a -f %a"
++        printreg arg.(0) printreg arg.(1) printreg arg.(2)
++  | Imultsubf true ->
++      fprintf ppf "-f (%a *f %a -f %a)"
++        printreg arg.(0) printreg arg.(1) printreg arg.(2)
++  | Isqrtf ->
++      fprintf ppf "sqrtf %a"
++        printreg arg.(0)
++
++(* Specific operations that are pure *)
++
++let operation_is_pure _ = true
++
++(* Specific operations that can raise *)
++
++let operation_can_raise _ = false
+diff --git a/asmcomp/loongarch64/arch.mli b/asmcomp/loongarch64/arch.mli
+new file mode 100644
+index 000000000..57174fabe
+--- /dev/null
++++ b/asmcomp/loongarch64/arch.mli
+@@ -0,0 +1,76 @@
++# 2 "asmcomp/loongarch64/arch.mli"
++(**************************************************************************)
++(*                                                                        *)
++(*                                 OCaml                                  *)
++(*                                                                        *)
++(*                yala <zhaojunchao@loongson.cn>                          *)
++(*                                                                        *)
++(*               Copyright © 2008-2023 LOONGSON                           *)
++(*                                                                        *)
++(*   All rights reserved.  This file is distributed under the terms of    *)
++(*   the GNU Lesser General Public License version 2.1, with the          *)
++(*   special exception on linking described in the file LICENSE.          *)
++(*                                                                        *)
++(**************************************************************************)
++
++(* Specific operations for the LoongArch processor *)
++
++(* Machine-specific command-line options *)
++
++val command_line_options : (string * Arg.spec * string) list
++
++(* Specific operations *)
++
++type specific_operation =
++  | Imultaddf of bool        (* multiply, optionally negate, and add *)
++  | Imultsubf of bool        (* multiply, optionally negate, and subtract *)
++  | Isqrtf                   (* floating-point square root *)
++
++(* Addressing modes *)
++
++type addressing_mode =
++  | Iindexed of int                     (* reg + displ *)
++
++val is_immediate : int -> bool
++
++(* Sizes, endianness *)
++
++val big_endian : bool
++
++val size_addr : int
++
++val size_int : int
++
++val size_float : int
++
++val allow_unaligned_access : bool
++
++(* Behavior of division *)
++
++val division_crashes_on_overflow : bool
++
++(* Operations on addressing modes *)
++
++val identity_addressing : addressing_mode
++
++val offset_addressing : addressing_mode -> int -> addressing_mode
++
++val num_args_addressing : addressing_mode -> int
++
++(* Printing operations and addressing modes *)
++
++val print_addressing :
++  (Format.formatter -> 'a -> unit) -> addressing_mode ->
++  Format.formatter -> 'a array -> unit
++
++val print_specific_operation :
++  (Format.formatter -> 'a -> unit) -> specific_operation ->
++  Format.formatter -> 'a array -> unit
++
++(* Specific operations that are pure *)
++
++val operation_is_pure : specific_operation -> bool
++
++(* Specific operations that can raise *)
++
++val operation_can_raise : specific_operation -> bool
+diff --git a/asmcomp/loongarch64/emit.mlp b/asmcomp/loongarch64/emit.mlp
+new file mode 100644
+index 000000000..b80b4f172
+--- /dev/null
++++ b/asmcomp/loongarch64/emit.mlp
+@@ -0,0 +1,772 @@
++(**************************************************************************)
++(*                                                                        *)
++(*                                 OCaml                                  *)
++(*                                                                        *)
++(*                yala <zhaojunchao@loongson.cn>                          *)
++(*                                                                        *)
++(*               Copyright © 2008-2023 LOONGSON                           *)
++(*                                                                        *)
++(*   All rights reserved.  This file is distributed under the terms of    *)
++(*   the GNU Lesser General Public License version 2.1, with the          *)
++(*   special exception on linking described in the file LICENSE.          *)
++(*                                                                        *)
++(**************************************************************************)
++
++(* Emission of LoongArch assembly code *)
++
++open Cmm
++open Arch
++open Proc
++open Reg
++open Mach
++open Linear
++open Emitaux
++open Emitenv
++
++(* Layout of the stack.  The stack is kept 16-aligned. *)
++
++let frame_size env =
++  let size =
++    env.stack_offset  +                    (* Trap frame, outgoing parameters *)
++    size_int * env.f.fun_num_stack_slots.(0) +    (* Local int variables *)
++    size_float * env.f.fun_num_stack_slots.(1)+  (* Local float variables *)
++    (if env.f.fun_contains_calls then size_addr else 0) (* Return address *)
++  in
++  Misc.align size 16
++
++let slot_offset env loc cls =
++  match loc with
++  | Local n ->
++      ("$sp",
++       if cls = 0
++       then env.stack_offset + env.f.fun_num_stack_slots.(1) * size_float
++            + n * size_int
++       else env.stack_offset + n * size_float)
++  | Incoming n ->
++      ("$sp", frame_size env + n)
++  | Outgoing n ->
++      ("$sp", n)
++  | Domainstate n ->
++      ("$s8", n + Domainstate.(idx_of_field Domain_extra_params) * 8)
++
++(* Output a symbol *)
++
++let emit_symbol s =
++  emit_symbol '$' s
++
++let emit_jump op s =
++  if !Clflags.dlcode || !Clflags.pic_code
++  then `{emit_string op}	%plt({emit_symbol s})`
++  else `{emit_string op}	{emit_symbol s}`
++
++let emit_call = emit_jump "bl"
++let emit_tail = emit_jump "b"
++
++(* Output a label *)
++
++let emit_label lbl =
++  emit_string ".L"; emit_int lbl
++
++(* Section switching *)
++
++let data_space =
++  ".section .data"
++
++let code_space =
++  ".section .text"
++
++let rodata_space =
++  ".section .rodata"
++
++(* Names for special regs *)
++
++let reg_tmp = phys_reg 22               (* t1 *)
++let reg_tmp2 = phys_reg 21              (* t0 *)
++let reg_t2 = phys_reg 13                (* t2 *)
++let reg_domain_state_ptr = phys_reg 25  (* s8 *)
++let reg_trap_ptr = phys_reg 23          (* s1 *)
++let reg_alloc_ptr = phys_reg 24         (* s7 *)
++let reg_stack_arg_begin = phys_reg 9    (* s3 *)
++let reg_stack_arg_end = phys_reg 10     (* s4 *)
++
++(* Output a pseudo-register *)
++
++let reg_name = function
++  | {loc = Reg r} -> register_name r
++  | _ -> Misc.fatal_error "Emit.reg_name"
++
++let emit_reg r =
++  emit_string (reg_name r)
++
++(* Adjust sp by the given byte amount, clobbers reg_tmp *)
++
++let emit_stack_adjustment n =
++  if n <> 0 then begin
++    if is_immediate n then
++      `        addi.d    $sp, $sp, {emit_int n} \n`
++    else begin
++      `        li.d      {emit_reg reg_tmp}, {emit_int n}\n`;
++      `        add.d     $sp, $sp, {emit_reg reg_tmp}\n`
++    end;
++    cfi_adjust_cfa_offset (-n)
++  end
++
++(* Output add.d-immediate instruction, clobbers reg_tmp2 *)
++
++let emit_addimm rd rs n =
++  if is_immediate n then
++    `	addi.d	{emit_reg rd}, {emit_reg rs}, {emit_int n}\n`
++  else begin
++    `	li.d	{emit_reg reg_tmp2}, {emit_int n}\n`;
++    `	add.d	{emit_reg rd}, {emit_reg rs}, {emit_reg reg_tmp2}\n`
++  end
++
++(* Output memory operation with a possibly non-immediate offset,
++   clobbers reg_tmp *)
++
++let emit_mem_op op reg ofs addr =
++  if is_immediate ofs then
++    `	{emit_string op}	{emit_string reg}, {emit_string addr}, {emit_int ofs}\n`
++  else begin
++    `	li.d	{emit_reg reg_tmp}, {emit_int ofs}\n`;
++    `	add.d	{emit_reg reg_tmp}, {emit_string addr}, {emit_reg reg_tmp}\n`;
++    `	{emit_string op}	{emit_string reg}, {emit_reg reg_tmp}, 0\n`
++  end
++
++let reload_ra n =
++  emit_mem_op "ld.d" "$ra" (n - 8) "$sp"
++
++let store_ra n =
++  emit_mem_op "st.d" "$ra" (n - 8) "$sp"
++
++let emit_store rs ofs rd =
++  emit_mem_op "st.d" (reg_name rs) ofs rd
++
++let emit_load rd ofs rs =
++  emit_mem_op "ld.d" (reg_name rd) ofs rs
++
++let emit_float_load rd ofs rs =
++  emit_mem_op "fld.d" (reg_name rd) ofs rs
++
++let emit_float_store rs ofs rd =
++  emit_mem_op "fst.d" (reg_name rs) ofs rd
++
++let emit_float_test cmp ~arg ~res =
++  let negated =
++    match cmp with
++    | CFneq | CFnlt | CFngt | CFnle | CFnge -> true
++    | CFeq | CFlt | CFgt | CFle | CFge -> false
++  in
++  begin match cmp with
++  | CFeq | CFneq -> `	fcmp.ceq.d	$fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n     movcf2gr {emit_reg res}, $fcc0\n`
++  | CFlt | CFnlt -> `	fcmp.clt.d	$fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n     movcf2gr {emit_reg res}, $fcc0\n`
++  | CFgt | CFngt -> `	fcmp.clt.d	$fcc0, {emit_reg arg.(1)}, {emit_reg arg.(0)}\n     movcf2gr {emit_reg res}, $fcc0\n`
++  | CFle | CFnle -> `	fcmp.cle.d	$fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n     movcf2gr {emit_reg res}, $fcc0\n`
++  | CFge | CFnge -> `	fcmp.cle.d	$fcc0, {emit_reg arg.(1)}, {emit_reg arg.(0)}\n     movcf2gr {emit_reg res}, $fcc0\n`
++  end;
++  negated
++
++(* Record live pointers at call points *)
++
++let record_frame_label env live dbg =
++  let lbl = new_label () in
++  let live_offset = ref [] in
++  Reg.Set.iter
++    (function
++        {typ = Val; loc = Reg r} ->
++          live_offset := (r lsl 1) + 1 :: !live_offset
++      | {typ = Val; loc = Stack s} as reg ->
++          let (base, ofs) = slot_offset env s (register_class reg) in
++          assert (base = "$sp");
++          live_offset := ofs :: !live_offset
++      | {typ = Addr} as r ->
++          Misc.fatal_error ("bad GC root " ^ Reg.name r)
++      | _ -> ()
++    )
++    live;
++  record_frame_descr ~label:lbl ~frame_size:(frame_size env)
++    ~live_offset:!live_offset dbg;
++  lbl
++
++let record_frame env live dbg =
++  let lbl = record_frame_label env live dbg in
++  `{emit_label lbl}:\n`
++
++let emit_call_gc gc =
++  `{emit_label gc.gc_lbl}:\n`;
++  `	{emit_call "caml_call_gc"}\n`;
++  `{emit_label gc.gc_frame_lbl}:\n`;
++  `	b	{emit_label gc.gc_return_lbl}\n`
++
++let bound_error_label env dbg =
++  if !Clflags.debug || env.bound_error_sites = [] then begin
++    let lbl_bound_error = new_label() in
++    let lbl_frame = record_frame_label env Reg.Set.empty (Dbg_other dbg) in
++    env.bound_error_sites <-
++      { bd_lbl = lbl_bound_error;
++        bd_frame = lbl_frame; } :: env.bound_error_sites;
++    lbl_bound_error
++  end else
++    let bd = List.hd env.bound_error_sites in
++    bd.bd_lbl
++
++let emit_call_bound_error bd =
++  `{emit_label bd.bd_lbl}:\n`;
++  `	{emit_call "caml_ml_array_bound_error"}\n`;
++  `{emit_label bd.bd_frame}:\n`
++
++(* Names for various instructions *)
++
++let name_for_intop = function
++  | Iadd  -> "add.d"
++  | Isub  -> "sub.d"
++  | Imul  -> "mul.d"
++  | Imulh -> "mulh.d"
++  | Idiv  -> "div.d"
++  | Iand  -> "and"
++  | Ior   -> "or"
++  | Ixor  -> "xor"
++  | Ilsl  -> "sll.d"
++  | Ilsr  -> "srl.d"
++  | Iasr  -> "sra.d"
++  | Imod  -> "mod.d"
++  | _ -> Misc.fatal_error "Emit.Intop"
++
++let name_for_intop_imm = function
++  | Iadd -> "addi.d"
++  | Iand -> "andi"
++  | Ior  -> "ori"
++  | Ixor -> "xori"
++  | Ilsl -> "slli.d"
++  | Ilsr -> "srli.d"
++  | Iasr -> "srai.d"
++  | _ -> Misc.fatal_error "Emit.Intop_imm"
++
++let name_for_floatop1 = function
++  | Inegf -> "fneg.d"
++  | Iabsf -> "fabs.d"
++  | Ispecific Isqrtf -> "fsqrt.d"
++  | _ -> Misc.fatal_error "Emit.Iopf1"
++
++let name_for_floatop2 = function
++  | Iaddf -> "fadd.d"
++  | Isubf -> "fsub.d"
++  | Imulf -> "fmul.d"
++  | Idivf -> "fdiv.d"
++  | _ -> Misc.fatal_error "Emit.Iopf2"
++
++let name_for_specific = function
++  | Imultaddf false -> "fmadd.d"
++  | Imultaddf true  -> "fnmadd.d"
++  | Imultsubf false -> "fmsub.d"
++  | Imultsubf true  -> "fnmsub.d"
++  | _ -> Misc.fatal_error "Emit.Iopf3"
++
++(* Output the assembly code for an instruction *)
++
++let emit_instr env i =
++  emit_debug_info i.dbg;
++  match i.desc with
++    Lend -> ()
++  | Lprologue ->
++      assert (env.f.fun_prologue_required);
++      let n = frame_size env in
++      emit_stack_adjustment (-n);
++      if env.f.fun_contains_calls then begin
++        store_ra n;
++        cfi_offset ~reg:1 (* ra *) ~offset:(-8)
++      end;
++  | Lop(Imove | Ispill | Ireload) ->
++      let src = i.arg.(0) and dst = i.res.(0) in
++      if src.loc <> dst.loc then begin
++        match (src, dst) with
++        | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Reg _} ->
++            `	move      {emit_reg dst}, {emit_reg src}\n`
++        | {loc = Reg _; typ = Float}, {loc = Reg _; typ = Float} ->
++            `	fmov.d   {emit_reg dst}, {emit_reg src}\n`
++        | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} ->
++            let (base, ofs) = slot_offset env s (register_class dst) in
++            emit_store src ofs base
++        | {loc = Reg _; typ = Float}, {loc = Stack s} ->
++            let (base, ofs) = slot_offset env s (register_class dst) in
++            emit_float_store src ofs base
++        | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} ->
++            let (base, ofs) = slot_offset env s (register_class src) in
++            emit_load dst ofs base
++        | {loc = Stack s; typ = Float}, {loc = Reg _} ->
++            let (base, ofs) = slot_offset env s (register_class src) in
++            emit_float_load dst ofs base
++        | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)}
++        | {loc = Stack _}, {loc = Stack _}
++        | {loc = Unknown}, _ | _, {loc = Unknown} ->
++            Misc.fatal_error "Emit: Imove"
++      end
++  | Lop(Iconst_int n) ->
++      `	li.d	{emit_reg i.res.(0)}, {emit_nativeint n}\n`
++  | Lop(Iconst_float f) ->
++      let lbl = new_label() in
++      env.float_literals <- {fl=f; lbl} :: env.float_literals;
++      `la.local {emit_reg reg_tmp}, {emit_label lbl} \n`;
++      `	fld.d	{emit_reg i.res.(0)}, {emit_reg reg_tmp}, 0\n`
++  | Lop(Iconst_symbol s) ->     (* FIXME la.global assert error in binutils*)
++      `pcaddi {emit_reg i.res.(0)}, 0 \n`;
++      `b 7112233f\n`;
++      `.dword {emit_symbol s}\n`;
++      `7112233: ld.d {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 8\n`
++  | Lop(Icall_ind) ->
++      `	jirl	$ra, {emit_reg i.arg.(0)}, 0\n`;
++      record_frame env i.live (Dbg_other i.dbg)
++  | Lop(Icall_imm {func}) ->
++      `	{emit_call func}\n`;
++      record_frame env i.live (Dbg_other i.dbg)
++  | Lop(Itailcall_ind) ->
++      let n = frame_size env in
++      if env.f.fun_contains_calls then reload_ra n;
++      emit_stack_adjustment n;
++      `	jr	{emit_reg i.arg.(0)}\n`
++  | Lop(Itailcall_imm {func}) ->
++      if func = env.f.fun_name then begin
++        `	b	{emit_label env.f.fun_tailrec_entry_point_label}\n`
++      end else begin
++        let n = frame_size env in
++        if env.f.fun_contains_calls then reload_ra n;
++        emit_stack_adjustment n;
++        `	{emit_tail func}\n`
++      end
++  | Lop(Iextcall{func; alloc; stack_ofs}) ->
++      if stack_ofs > 0 then begin
++        `	move	{emit_reg reg_stack_arg_begin}, $sp\n`;
++        `	addi.d	{emit_reg reg_stack_arg_end}, $sp, {emit_int (Misc.align stack_ofs 16)}\n`;
++        `	la.global	{emit_reg reg_t2}, {emit_symbol func}\n`;
++        `	{emit_call "caml_c_call_stack_args"}\n`;
++        record_frame env i.live (Dbg_other i.dbg)
++      end else if alloc then begin
++        `	la.global	{emit_reg reg_t2}, {emit_symbol func}\n`;
++        `	{emit_call "caml_c_call"}\n`;
++        record_frame env i.live (Dbg_other i.dbg)
++      end else begin
++        (* store ocaml stack in s0, which is marked as being destroyed
++           at noalloc calls *)
++        `	move	$s0, $sp\n`;
++        cfi_remember_state ();
++        cfi_def_cfa_register ~reg:21;
++        let ofs = Domainstate.(idx_of_field Domain_c_stack) * 8 in
++        `	ld.d	$sp, {emit_reg reg_domain_state_ptr}, {emit_int ofs}\n`;
++        `	{emit_call func}\n`;
++        `	move	$sp, $s0\n`;
++        cfi_restore_state ()
++      end
++  | Lop(Istackoffset n) ->
++      assert (n mod 16 = 0);
++      emit_stack_adjustment (-n);
++      env.stack_offset <- env.stack_offset + n
++  | Lop(Iload { memory_chunk = Single; addressing_mode = Iindexed ofs; is_atomic } ) ->
++      assert (not is_atomic);
++      `	fld.s	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n`;
++      `	fcvt.d.s	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
++  | Lop(Iload { memory_chunk = Word_int | Word_val; addressing_mode = Iindexed ofs; is_atomic } ) ->
++      if is_atomic then `	dbar    0\n`;
++      `	ld.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n`;
++      if is_atomic then `	dbar    0\n`
++  | Lop(Iload { memory_chunk; addressing_mode = Iindexed ofs; is_atomic } ) ->
++      assert (not is_atomic);
++      let instr =
++        match memory_chunk with
++        | Byte_unsigned -> "ld.bu"
++        | Byte_signed -> "ld.b"
++        | Sixteen_unsigned -> "ld.hu"
++        | Sixteen_signed -> "ld.h"
++        | Thirtytwo_unsigned -> "ld.wu"
++        | Thirtytwo_signed -> "ld.w"
++        | Word_int | Word_val | Single -> assert false
++        | Double -> "fld.d"
++      in
++      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n`
++  | Lop(Istore(Single, Iindexed ofs, _)) ->
++      (* ft0 is marked as destroyed for this operation *)
++      `	fcvt.s.d	$ft0, {emit_reg i.arg.(0)}\n`;
++      `	fst.s	$ft0, {emit_reg i.arg.(1)}, {emit_int ofs}\n`
++  | Lop(Istore((Word_int | Word_val), Iindexed ofs, assignement)) ->
++      if assignement then begin
++        `	dbar	0\n`;
++        `	st.d	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n`
++      end else
++        `	st.d	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n`;
++  | Lop(Istore(chunk, Iindexed ofs, _)) ->
++      let instr =
++        match chunk with
++        | Byte_unsigned | Byte_signed -> "st.b"
++        | Sixteen_unsigned | Sixteen_signed -> "st.h"
++        | Thirtytwo_unsigned | Thirtytwo_signed -> "st.w"
++        | Word_int | Word_val | Single -> assert false
++        | Double -> "fst.d"
++      in
++      `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n`
++  | Lop(Ialloc {bytes; dbginfo}) ->
++      let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc dbginfo) in
++      if env.f.fun_fast then begin
++        let lbl_after_alloc = new_label () in
++        let lbl_call_gc = new_label () in
++        let n = -bytes in
++        let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
++        emit_addimm reg_alloc_ptr reg_alloc_ptr n;
++        `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
++        `	bltu	{emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
++        `{emit_label lbl_after_alloc}:\n`;
++        `	addi.d	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`;
++        env.call_gc_sites <-
++          { gc_lbl = lbl_call_gc;
++            gc_return_lbl = lbl_after_alloc;
++            gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites
++      end else begin
++        begin match bytes with
++        | 16 -> `	{emit_call "caml_alloc1"}\n`
++        | 24 -> `	{emit_call "caml_alloc2"}\n`
++        | 32 -> `	{emit_call "caml_alloc3"}\n`
++        | _  ->
++            `	li.d	{emit_reg reg_t2}, {emit_int bytes}\n`;
++            `	{emit_call "caml_allocN"}\n`
++        end;
++        `{emit_label lbl_frame_lbl}:\n`;
++        `	addi.d	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`
++      end
++  | Lop(Ipoll { return_label }) ->
++      let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc []) in
++      let lbl_after_poll = match return_label with
++                  | None -> new_label()
++                  | Some(lbl) -> lbl in
++      let lbl_call_gc = new_label () in
++      let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
++      `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
++      begin match return_label with
++      | None -> `	bltu	{emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
++                `{emit_label lbl_after_poll}:\n`;
++      | Some lbl -> ` bgeu  {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl}\n`;
++                    ` b {emit_label lbl_call_gc}\n`
++      end;
++      env.call_gc_sites <-
++        { gc_lbl = lbl_call_gc;
++          gc_return_lbl = lbl_after_poll;
++          gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites
++  | Lop(Iintop(Icomp cmp)) ->
++      begin match cmp with
++      | Isigned Clt ->
++          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
++      | Isigned Cge ->
++          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
++          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
++      | Isigned Cgt ->
++          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
++      | Isigned Cle ->
++          `	slt	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
++          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
++      | Isigned Ceq | Iunsigned Ceq ->
++          `	sub.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
++          `	sltui	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`
++      | Isigned Cne | Iunsigned Cne ->
++          `	sub.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
++          `	sltu	{emit_reg i.res.(0)}, $zero, {emit_reg i.res.(0)}\n`
++      | Iunsigned Clt ->
++          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
++      | Iunsigned Cge ->
++          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
++          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
++      | Iunsigned Cgt ->
++          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
++      | Iunsigned Cle ->
++          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
++          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
++      end
++  | Lop(Iintop (Icheckbound)) ->
++      let lbl = bound_error_label env i.dbg in
++      `	bleu	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n`
++  | Lop(Iintop op) ->
++      let instr = name_for_intop op in
++      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
++  | Lop(Iintop_imm(Isub, n)) ->
++      `	addi.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n`
++  | Lop(Iintop_imm(Iadd, n)) ->
++      `	addi.d	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(n)}\n`
++  | Lop(Iintop_imm(op, n)) ->
++      let instri = name_for_intop_imm op in
++      if n<0 then  (* FIXME *)
++          let instr = name_for_intop op in
++          ` addi.d   {emit_reg reg_tmp2}, $zero, {emit_int n}\n {emit_string instr}    {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp2} \n`
++      else
++      `	{emit_string instri}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n`
++  | Lop(Inegf | Iabsf | Ispecific Isqrtf as op) ->
++      let instr = name_for_floatop1 op in
++      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
++  | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
++      let instr = name_for_floatop2 op in
++      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
++  | Lop(Ifloatofint) ->
++      ` movgr2fr.d  $ft0, {emit_reg i.arg.(0)} \n`;
++      `	ffint.d.l	{emit_reg i.res.(0)}, $ft0\n`
++  | Lop(Iintoffloat) ->
++      `	ftintrz.l.d	$ft0, {emit_reg i.arg.(0)}\n`;
++      ` movfr2gr.d  {emit_reg i.res.(0)}, $ft0 \n`
++  | Lop(Iopaque) ->
++      assert (i.arg.(0).loc = i.res.(0).loc)
++  | Lop(Ispecific sop) ->
++      let instr = name_for_specific sop in
++      `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
++  | Lop (Idls_get) ->
++      let ofs = Domainstate.(idx_of_field Domain_dls_root) * 8 in
++      `	ld.d	{emit_reg i.res.(0)}, {emit_reg reg_domain_state_ptr}, {emit_int ofs}\n`
++  | Lreloadretaddr ->
++      let n = frame_size env in
++      reload_ra n
++  | Lreturn ->
++      let n = frame_size env in
++      emit_stack_adjustment n;
++      `	jr  $ra\n`
++  | Llabel lbl ->
++      `{emit_label lbl}:\n`
++  | Lbranch lbl ->
++      `	b	{emit_label lbl}\n`
++  | Lcondbranch(tst, lbl) ->
++      begin match tst with
++      | Itruetest ->
++          `	bnez	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
++      | Ifalsetest ->
++          `	beqz	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
++      | Iinttest cmp ->
++          let name = match cmp with
++            | Iunsigned Ceq | Isigned Ceq -> "beq"
++            | Iunsigned Cne | Isigned Cne -> "bne"
++            | Iunsigned Cle -> "bleu" | Isigned Cle -> "ble"
++            | Iunsigned Cge -> "bgeu" | Isigned Cge -> "bge"
++            | Iunsigned Clt -> "bltu" | Isigned Clt -> "blt"
++            | Iunsigned Cgt -> "bgtu" | Isigned Cgt -> "bgt"
++          in
++          `	{emit_string name}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n`
++      | Iinttest_imm _ ->
++          Misc.fatal_error "Emit.emit_instr (Iinttest_imm _)"
++      | Ifloattest cmp ->
++          let negated = emit_float_test cmp ~arg:i.arg ~res:reg_tmp in
++          let branch =
++            if negated
++            then "beqz"
++            else "bnez"
++          in
++          `	{emit_string branch}	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      | Ioddtest ->
++          `	andi	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`;
++          `	bnez	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      | Ieventest ->
++          `	andi	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`;
++          `	beqz	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      end
++  | Lcondbranch3(lbl0, lbl1, lbl2) ->
++      `	addi.d	{emit_reg reg_tmp}, {emit_reg i.arg.(0)}, -1\n`;
++      begin match lbl0 with
++      | None -> ()
++      | Some lbl -> `	bltz	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      end;
++      begin match lbl1 with
++      | None -> ()
++      | Some lbl -> `	beqz	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      end;
++      begin match lbl2 with
++      | None -> ()
++      | Some lbl -> `	bgtz	{emit_reg reg_tmp}, {emit_label lbl}\n`
++      end
++  | Lswitch jumptbl ->
++      let lbl = new_label() in
++      `	la.local	{emit_reg reg_tmp}, {emit_label lbl}\n`;
++      `	slli.d	{emit_reg reg_tmp2}, {emit_reg i.arg.(0)}, 2\n`;
++      `	add.d	{emit_reg reg_tmp}, {emit_reg reg_tmp}, {emit_reg reg_tmp2}\n`;
++      `	jr	{emit_reg reg_tmp}\n`;
++      `{emit_label lbl}:\n`;
++      for i = 0 to Array.length jumptbl - 1 do
++        `	b	{emit_label jumptbl.(i)}\n`
++      done
++  | Lentertrap ->
++      ()
++  | Ladjust_trap_depth { delta_traps } ->
++      (* each trap occupes 16 bytes on the stack *)
++      let delta = 16 * delta_traps in
++      cfi_adjust_cfa_offset delta;
++      env.stack_offset <- env.stack_offset + delta
++  | Lpushtrap {lbl_handler} ->
++      `	la.local	{emit_reg reg_tmp}, {emit_label lbl_handler}\n`;
++      `	addi.d	$sp, $sp, -16\n`;
++      env.stack_offset <- env.stack_offset + 16;
++      `	st.d	{emit_reg reg_trap_ptr}, $sp, 0\n`;
++      `	st.d	{emit_reg reg_tmp}, $sp, 8\n`;
++      cfi_adjust_cfa_offset 16;
++      `	move	{emit_reg reg_trap_ptr}, $sp\n`
++  | Lpoptrap ->
++      `	ld.d	{emit_reg reg_trap_ptr}, $sp, 0\n`;
++      `	addi.d	$sp, $sp, 16\n`;
++      cfi_adjust_cfa_offset (-16);
++      env.stack_offset <- env.stack_offset - 16
++  | Lraise k ->
++      begin match k with
++      | Lambda.Raise_regular ->
++          `	{emit_call "caml_raise_exn"}\n`;
++          record_frame env Reg.Set.empty (Dbg_raise i.dbg)
++      | Lambda.Raise_reraise ->
++          `	{emit_call "caml_reraise_exn"}\n`;
++          record_frame env Reg.Set.empty (Dbg_raise i.dbg)
++      | Lambda.Raise_notrace ->
++          `	move	$sp, {emit_reg reg_trap_ptr}\n`;
++          `	ld.d	{emit_reg reg_tmp}, $sp, 8\n`;
++          `	ld.d	{emit_reg reg_trap_ptr}, $sp, 0\n`;
++          `	addi.d	$sp, $sp, 16\n`;
++          `	jr	{emit_reg reg_tmp}\n`
++      end
++
++(* Emit a sequence of instructions *)
++
++let rec emit_all env = function
++  | {desc = Lend} -> () | i -> emit_instr env i; emit_all env i.next
++
++(* Emission of a function declaration *)
++
++let fundecl fundecl =
++  let env = mk_env fundecl in
++  `	.globl	{emit_symbol fundecl.fun_name}\n`;
++  `	.type	{emit_symbol fundecl.fun_name}, @function\n`;
++  `	{emit_string code_space}\n`;
++  `	.align	2\n`;
++  `{emit_symbol fundecl.fun_name}:\n`;
++  emit_debug_info fundecl.fun_dbg;
++  cfi_startproc();
++
++  (* Dynamic stack checking *)
++  let stack_threshold_size = Config.stack_threshold * 8 in (* bytes *)
++  let { max_frame_size; contains_nontail_calls } =
++    preproc_stack_check
++      ~fun_body:fundecl.fun_body ~frame_size:(frame_size env) ~trap_size:16
++  in
++  let handle_overflow = ref None in
++  if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
++    let overflow = new_label () and ret = new_label () in
++    let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
++    let f = max_frame_size + threshold_offset in
++    let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in
++    `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
++    emit_addimm reg_tmp reg_tmp f;
++    `	bltu	$sp, {emit_reg reg_tmp}, {emit_label overflow}\n`;
++    `{emit_label ret}:\n`;
++    handle_overflow := Some (overflow, ret)
++  end;
++
++  emit_all env fundecl.fun_body;
++  List.iter emit_call_gc env.call_gc_sites;
++  List.iter emit_call_bound_error env.bound_error_sites;
++
++  begin match !handle_overflow with
++  | None -> ()
++  | Some (overflow, ret) ->
++      `{emit_label overflow}:\n`;
++      (* Pass the desired frame size on the stack, since all of the
++         argument-passing registers may be in use. *)
++      let s = Config.stack_threshold + max_frame_size / 8 in
++      `	li.d	{emit_reg reg_tmp}, {emit_int s}\n`;
++      `	addi.d	$sp, $sp, -16\n`;
++      `	st.d	{emit_reg reg_tmp}, $sp, 0\n`;
++      `	st.d	$ra, $sp, 8\n`;
++      `	{emit_call "caml_call_realloc_stack"}\n`;
++      `	ld.d	$ra, $sp, 8\n`;
++      `	addi.d	$sp, $sp, 16\n`;
++      `	b	{emit_label ret}\n`
++  end;
++
++  cfi_endproc();
++  `	.size	{emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
++  (* Emit the float literals *)
++  if env.float_literals <> [] then begin
++    `	{emit_string rodata_space}\n`;
++    `	.align	3\n`;
++    List.iter
++      (fun {fl; lbl} ->
++        `{emit_label lbl}:\n`;
++        emit_float64_directive ".quad" fl)
++      env.float_literals;
++  end
++
++(* Emission of data *)
++
++let declare_global_data s =
++  `	.globl	{emit_symbol s}\n`;
++  `	.type	{emit_symbol s}, @object\n`
++
++let emit_item = function
++  | Cglobal_symbol s ->
++      declare_global_data s
++  | Cdefine_symbol s ->
++      `{emit_symbol s}:\n`;
++  | Cint8 n ->
++      `	.byte	{emit_int n}\n`
++  | Cint16 n ->
++      `	.short	{emit_int n}\n`
++  | Cint32 n ->
++      `	.long	{emit_nativeint n}\n`
++  | Cint n ->
++      `	.quad	{emit_nativeint n}\n`
++  | Csingle f ->
++      emit_float32_directive ".long" (Int32.bits_of_float f)
++  | Cdouble f ->
++      emit_float64_directive ".quad" (Int64.bits_of_float f)
++  | Csymbol_address s ->
++      `	.quad	{emit_symbol s}\n`
++  | Cstring s ->
++      emit_bytes_directive "	.byte	" s
++  | Cskip n ->
++      if n > 0 then `	.space	{emit_int n}\n`
++  | Calign n ->
++      `	.align	{emit_int (Misc.log2 n)}\n`
++
++let data l =
++  `	{emit_string data_space}\n`;
++  List.iter emit_item l
++
++(* Beginning / end of an assembly file *)
++
++let begin_assembly() =
++  if !Clflags.dlcode || !Clflags.pic_code then `	\n`;  (* FIXME *)
++  `	.file \"\"\n`; (* PR#7073 *)
++  reset_debug_info ();
++  (* Emit the beginning of the segments *)
++  let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
++  `	{emit_string data_space}\n`;
++  declare_global_data lbl_begin;
++  `{emit_symbol lbl_begin}:\n`;
++  let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
++  `	{emit_string code_space}\n`;
++  declare_global_data lbl_begin;
++  `{emit_symbol lbl_begin}:\n`
++
++let end_assembly() =
++  `	{emit_string code_space}\n`;
++  let lbl_end = Compilenv.make_symbol (Some "code_end") in
++  declare_global_data lbl_end;
++  `{emit_symbol lbl_end}:\n`;
++  `	.long	0\n`;
++  `	{emit_string data_space}\n`;
++  let lbl_end = Compilenv.make_symbol (Some "data_end") in
++  declare_global_data lbl_end;
++  `    .quad   0\n`; (* PR#6329 *)
++  `{emit_symbol lbl_end}:\n`;
++  `	.quad	0\n`;
++  (* Emit the frame descriptors *)
++  `	{emit_string data_space}\n`; (* not rodata because relocations inside *)
++  let lbl = Compilenv.make_symbol (Some "frametable") in
++  declare_global_data lbl;
++  `{emit_symbol lbl}:\n`;
++  emit_frames
++    { efa_code_label = (fun l -> `	.quad	{emit_label l}\n`);
++      efa_data_label = (fun l -> `	.quad	{emit_label l}\n`);
++      efa_8 = (fun n -> `	.byte	{emit_int n}\n`);
++      efa_16 = (fun n -> `	.short	{emit_int n}\n`);
++      efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
++      efa_word = (fun n -> `	.quad	{emit_int n}\n`);
++      efa_align = (fun n -> `	.align	{emit_int (Misc.log2 n)}\n`);
++      efa_label_rel = (fun lbl ofs ->
++                           `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
++      efa_def_label = (fun l -> `{emit_label l}:\n`);
++      efa_string = (fun s -> emit_bytes_directive "	.byte	" (s ^ "\000"))
++     }
+diff --git a/asmcomp/loongarch64/proc.ml b/asmcomp/loongarch64/proc.ml
+new file mode 100644
+index 000000000..62666c748
+--- /dev/null
++++ b/asmcomp/loongarch64/proc.ml
+@@ -0,0 +1,319 @@
++# 2 "asmcomp/loongarch64/proc.ml"
++(**************************************************************************)
++(*                                                                        *)
++(*                                 OCaml                                  *)
++(*                                                                        *)
++(*                yala <zhaojunchao@loongson.cn>                          *)
++(*                                                                        *)
++(*               Copyright © 2008-2023 LOONGSON                           *)
++(*                                                                        *)
++(*   All rights reserved.  This file is distributed under the terms of    *)
++(*   the GNU Lesser General Public License version 2.1, with the          *)
++(*   special exception on linking described in the file LICENSE.          *)
++(*                                                                        *)
++(**************************************************************************)
++
++(* Description of the LoongArch *)
++
++open Misc
++open Cmm
++open Reg
++open Arch
++open Mach
++
++(* Instruction selection *)
++
++let word_addressed = false
++
++(* Registers available for register allocation *)
++
++(* Integer register map
++   --------------------
++
++    zero                   always zero
++    ra                     return address
++    sp, gp, tp             stack pointer, global pointer, thread pointer
++    a0-a7        0-7       arguments/results
++    s2-s6        8-12      arguments/results (preserved by C)
++    t2-t6        13-17     temporary
++    s0           18        general purpose (preserved by C)
++    t0, t1       19-20     temporaries (used by call veneers)
++    s1           21        trap pointer (preserved by C)
++    s7          22        allocation pointer (preserved by C)
++    s8          23        domain pointer (preserved by C)
++
++  Floating-point register map
++  ---------------------------
++
++    f0-f7    100-107     arguments
++    f0-f1    100-101     arguments/results
++    f8-f23   108-123     temporary
++    f24-f31  124-131     subroutine register variables
++
++  Additional notes
++  ----------------
++
++    - t1 is used by the code generator, so not available for register
++      allocation.
++
++    - t0-t6 may be used by PLT stubs, so should not be used to pass
++      arguments and may be clobbered by [Ialloc] in the presence of dynamic
++      linking.
++*)
++
++let int_reg_name =
++    [|"$a0"; "$a1"; "$a2"; "$a3"; "$a4"; "$a5"; "$a6"; "$a7";  (* 0- 7 *)
++      "$s2"; "$s3"; "$s4"; "$s5"; "$s6";                       (* 8-12*)
++      "$t2"; "$t3"; "$t4"; "$t5"; "$t6"; "$t7"; "$t8";          (*13-19*)
++      "$s0";                                                   (*20*)
++      "$t0"; "$t1";                                            (*21-22*)
++      "$s1"; "$s7"; "$s8";                                      (*23-25*)
++    |]
++
++let float_reg_name =
++  [| "$ft0"; "$ft1"; "$ft2"; "$ft3"; "$ft4"; "$ft5"; "$ft6";"$ft7";
++     "$fs0"; "$fs1";
++     "$fa0"; "$fa1"; "$fa2"; "$fa3"; "$fa4"; "$fa5"; "$fa6"; "$fa7";
++     "$fs2"; "$fs3"; "$fs4"; "$fs5"; "$fs6"; "$fs7";
++     "$ft8"; "$ft9"; "$ft10"; "$ft11";"$ft12";"$ft13";"$ft14";"$ft15"; |]
++let num_register_classes = 2
++
++let register_class r =
++  match r.typ with
++  | Val | Int | Addr -> 0
++  | Float -> 1
++
++(* first 19 int regs allocatable; all float regs allocatable *)
++let num_available_registers = [| 21; 32 |]
++
++let first_available_register = [| 0; 100 |]
++
++let register_name r =
++  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
++
++let rotate_registers = true
++
++(* Representation of hard registers by pseudo-registers *)
++
++let hard_int_reg =
++  let v = Array.make 26 Reg.dummy in
++  for i = 0 to 25 do
++    v.(i) <- Reg.at_location Int (Reg i)
++  done;
++  v
++
++let hard_float_reg =
++  let v = Array.make 32 Reg.dummy in
++  for i = 0 to 31 do
++    v.(i) <- Reg.at_location Float (Reg(100 + i))
++  done;
++  v
++
++let all_phys_regs =
++  Array.append hard_int_reg hard_float_reg
++
++let phys_reg n =
++  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
++
++let stack_slot slot ty =
++  Reg.at_location ty (Stack slot)
++
++(* Calling conventions *)
++
++let size_domainstate_args = 64 * size_int
++
++let calling_conventions
++    first_int last_int first_float last_float make_stack first_stack arg =
++  let loc = Array.make (Array.length arg) Reg.dummy in
++  let int = ref first_int in
++  let float = ref first_float in
++  let ofs = ref first_stack in
++  for i = 0 to Array.length arg - 1 do
++    match arg.(i) with
++    | Val | Int | Addr as ty ->
++        if !int <= last_int then begin
++          loc.(i) <- phys_reg !int;
++          incr int
++        end else begin
++          loc.(i) <- stack_slot (make_stack !ofs) ty;
++          ofs := !ofs + size_int
++        end
++    | Float ->
++        if !float <= last_float then begin
++          loc.(i) <- phys_reg !float;
++          incr float
++        end else begin
++          loc.(i) <- stack_slot (make_stack !ofs) Float;
++          ofs := !ofs + size_float
++        end
++  done;
++  (loc, Misc.align (max 0 !ofs) 16) (* Keep stack 16-aligned. *)
++
++let incoming ofs =
++  if ofs >= 0
++  then Incoming ofs
++  else Domainstate (ofs + size_domainstate_args)
++let outgoing ofs =
++  if ofs >= 0
++  then Outgoing ofs
++  else Domainstate (ofs + size_domainstate_args)
++let not_supported _ = fatal_error "Proc.loc_results: cannot call"
++
++let max_arguments_for_tailcalls = 13 (* in regs *) + 64 (* in domain state *)
++
++(* OCaml calling convention:
++     first integer args in a0 .. a7, s2 .. s6
++     first float args in fa0 .. fa7, fs2 .. fs9
++     remaining args in domain state area, then on stack.
++   Return values in a0 .. a7, s2 .. s6 or fa0 .. fa7, fs2 .. fs9. *)
++
++let loc_arguments arg =
++  calling_conventions 0 12 110 121 outgoing (- size_domainstate_args) arg
++
++let loc_parameters arg =
++  let (loc, _ofs) =
++    calling_conventions 0 12 110 121 incoming (- size_domainstate_args) arg
++  in
++  loc
++
++let loc_results res =
++  let (loc, _ofs) =
++    calling_conventions 0 12 110 121 not_supported 0 res
++  in
++  loc
++
++(* C calling convention:
++     first integer args in a0 .. a7
++     first float args in fa0 .. fa7
++     remaining args on stack.
++   A FP argument can be passed in an integer register if all FP registers
++   are exhausted but integer registers remain.
++   Return values in a0 .. a1 or fa0 .. fa1. *)
++
++let external_calling_conventions
++    first_int last_int first_float last_float make_stack arg =
++  let loc = Array.make (Array.length arg) [| Reg.dummy |] in
++  let int = ref first_int in
++  let float = ref first_float in
++  let ofs = ref 0 in
++  for i = 0 to Array.length arg - 1 do
++    match arg.(i) with
++    | Val | Int | Addr as ty ->
++        if !int <= last_int then begin
++          loc.(i) <- [| phys_reg !int |];
++          incr int
++        end else begin
++          loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
++          ofs := !ofs + size_int
++        end
++    | Float ->
++        if !float <= last_float then begin
++          loc.(i) <- [| phys_reg !float |];
++          incr float
++        end else begin
++          loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
++          ofs := !ofs + size_float
++        end
++  done;
++  (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *)
++
++let loc_external_arguments ty_args =
++  let arg = Cmm.machtype_of_exttype_list ty_args in
++  external_calling_conventions 0 7 110 117 outgoing arg
++
++let loc_external_results res =
++  let (loc, _ofs) = calling_conventions 0 1 110 111 not_supported 0 res
++  in loc
++
++(* Exceptions are in a0 *)
++
++let loc_exn_bucket = phys_reg 0
++
++(* Volatile registers: none *)
++
++let regs_are_volatile _ = false
++
++(* Registers destroyed by operations *)
++
++let destroyed_at_c_noalloc_call =
++  (* s0-s8 and fs0-fs7 are callee-save, but s0 is
++     used to preserve OCaml sp. *)
++  Array.of_list(List.map phys_reg
++    [0; 1; 2; 3; 4; 5; 6; 7; 13; 14; 15; 16; 17; 18; 19; 20;(*s0*)
++     100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116;
++     117; 124; 125; 126; 127; 128; 129; 130; 131])
++
++let destroyed_at_alloc =
++  (* t0-t6 are used for PLT stubs *)
++    if !Clflags.dlcode then Array.map phys_reg [|13; 14; 15; 16; 17; 18; 19|]
++  else [| phys_reg 13 |] (* t2 is used to pass the argument to caml_allocN *)
++
++let destroyed_at_oper = function
++  | Iop(Icall_ind | Icall_imm _) -> all_phys_regs
++  | Iop(Iextcall{alloc; stack_ofs; _}) ->
++      assert (stack_ofs >= 0);
++      if alloc || stack_ofs > 0 then all_phys_regs
++      else destroyed_at_c_noalloc_call
++  | Iop(Ialloc _) | Iop(Ipoll _) -> destroyed_at_alloc
++  | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |]
++  | Iop(Ifloatofint | Iintoffloat) -> [| phys_reg 100 |]
++  | _ -> [| |]
++
++let destroyed_at_raise = all_phys_regs
++
++let destroyed_at_reloadretaddr = [| |]
++
++(* Maximal register pressure *)
++
++let safe_register_pressure = function
++  | Iextcall _ -> 5  (*9-3 s0~s8 - s7 - s8 - s1 - s0*)
++  | _ -> 21
++
++let max_register_pressure = function
++  | Iextcall _ -> [| 5; 8 |] (* 6 integer callee-saves, 8 FP callee-saves *)
++  | _ -> [| 21; 30 |]
++
++(* Layout of the stack *)
++
++let frame_required fd =
++  fd.fun_contains_calls
++  || fd.fun_num_stack_slots.(0) > 0
++  || fd.fun_num_stack_slots.(1) > 0
++
++let prologue_required fd =
++  frame_required fd
++
++  (* FIXME *)
++let int_dwarf_reg_numbers =
++    [| 4; 5; 6; 7; 8; 9; 10; 11;
++     23; 24; 25; 26; 27; 28; 29; 30;
++     14; 15; 16; 17; 18;
++     31;
++     12; 13;
++     19; 20;
++  |]
++
++let float_dwarf_reg_numbers =
++  [| 32; 33; 34; 35; 36; 37; 38; 39;
++     40; 41;
++     42; 43; 44; 45; 46; 47; 48; 49;
++     50; 51; 52; 53; 54; 55; 56; 57;
++     58; 59;
++     60; 61; 62; 63;
++  |]
++
++let dwarf_register_numbers ~reg_class =
++  match reg_class with
++  | 0 -> int_dwarf_reg_numbers
++  | 1 -> float_dwarf_reg_numbers
++  | _ -> Misc.fatal_errorf "Bad register class %d" reg_class
++
++let stack_ptr_dwarf_register_number = 2
++
++(* Calling the assembler *)
++
++let assemble_file infile outfile =
++  Ccomp.command
++    (Config.asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
++
++let init () = ()
+diff --git a/asmcomp/loongarch64/selection.ml b/asmcomp/loongarch64/selection.ml
+new file mode 100644
+index 000000000..be29364c1
+--- /dev/null
++++ b/asmcomp/loongarch64/selection.ml
+@@ -0,0 +1,70 @@
++# 2 "asmcomp/loongarch64/selection.ml"
++(**************************************************************************)
++(*                                                                        *)
++(*                                 OCaml                                  *)
++(*                                                                        *)
++(*                yala <zhaojunchao@loongson.cn>                          *)
++(*                                                                        *)
++(*               Copyright © 2008-2023 LOONGSON                           *)
++(*                                                                        *)
++(*   All rights reserved.  This file is distributed under the terms of    *)
++(*   the GNU Lesser General Public License version 2.1, with the          *)
++(*   special exception on linking described in the file LICENSE.          *)
++(*                                                                        *)
++(**************************************************************************)
++
++(* Instruction selection for the LoongArch processor *)
++
++open Cmm
++open Arch
++open Mach
++
++(* Instruction selection *)
++
++class selector = object
++
++inherit Selectgen.selector_generic as super
++
++(* LoongArch does not support immediate operands for comparison operators *)
++method is_immediate_test _cmp _n = false
++
++method! is_immediate op n =
++  match op with
++  | Iadd | Iand | Ior | Ixor -> is_immediate n
++  (* sub immediate is turned into add immediate opposite *)
++  | Isub -> is_immediate (-n)
++  | _ -> super#is_immediate op n
++
++method select_addressing _ = function
++  | Cop(Cadda, [arg; Cconst_int (n, _)], _) when is_immediate n ->
++      (Iindexed n, arg)
++  | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int (n, _)], _)], dbg)
++    when is_immediate n ->
++      (Iindexed n, Cop(Caddi, [arg1; arg2], dbg))
++  | arg ->
++      (Iindexed 0, arg)
++
++method! select_operation op args dbg =
++  match (op, args) with
++  (* Recognize (neg-)mult-add and (neg-)mult-sub instructions *)
++  | (Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3])
++  | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2], _)]) ->
++      (Ispecific (Imultaddf false), [arg1; arg2; arg3])
++  | (Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3]) ->
++      (Ispecific (Imultsubf false), [arg1; arg2; arg3])
++  | (Cnegf, [Cop(Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) ->
++      (Ispecific (Imultsubf true), [arg1; arg2; arg3])
++  | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) ->
++      (Ispecific (Imultaddf true), [arg1; arg2; arg3])
++  | (Cstore (Word_int | Word_val as memory_chunk, Assignment), [arg1; arg2]) ->
++      (* Use trivial addressing mode for non-initializing stores *)
++      (Istore (memory_chunk, Iindexed 0, true), [arg2; arg1])
++  | (Cextcall("sqrt", _, _, _), []) ->
++          (Ispecific Isqrtf, args)
++  | _ ->
++      super#select_operation op args dbg
++
++end
++
++let fundecl ~future_funcnames f =
++  (new selector)#emit_fundecl ~future_funcnames f
+diff --git a/configure b/configure
+index 19764d19a..6415b4cc1 100755
+Binary files a/configure and b/configure differ
+diff --git a/configure.ac b/configure.ac
+index a7974b042..069a931d7 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1079,7 +1079,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'],
+     [aarch64-*-freebsd*], [natdynlink=true],
+     [aarch64-*-openbsd*], [natdynlink=true],
+     [aarch64-*-netbsd*], [natdynlink=true],
+-    [riscv*-*-linux*], [natdynlink=true])])
++    [riscv*-*-linux*], [natdynlink=true],
++    [loongarch*-*-linux*], [natdynlink=true])])
+ 
+ AS_CASE([$enable_native_toplevel,$natdynlink],
+   [yes,false],
+@@ -1199,7 +1200,9 @@ AS_CASE([$host],
+   [x86_64-*-cygwin*],
+     [arch=amd64; system=cygwin],
+   [riscv64-*-linux*],
+-    [arch=riscv; model=riscv64; system=linux]
++    [arch=riscv; model=riscv64; system=linux],
++  [loongarch64-*-linux*],
++    [has_native_backend=yes; arch=loongarch64; system=linux]
+ )
+ 
+ AS_CASE([$ccomptype],
+@@ -1302,7 +1305,7 @@ default_aspp="$CC -c"
+ AS_CASE([$as_target,$ocaml_cv_cc_vendor],
+   [*-*-linux*,gcc-*],
+     [AS_CASE([$as_cpu],
+-      [x86_64|arm*|aarch64*|i[[3-6]]86|riscv*],
++      [x86_64|arm*|aarch64*|i[[3-6]]86|riscv*|loongarch*],
+         [default_as="${toolpref}as"])],
+   [i686-pc-windows,*],
+     [default_as="ml -nologo -coff -Cp -c -Fo"
+@@ -1940,7 +1943,7 @@ AS_IF([$native_compiler],
+ 
+ AS_IF([test x"$enable_frame_pointers" = "xyes"],
+   [AS_CASE(["$host,$cc_basename"],
+-    [x86_64-*-linux*,gcc*|x86_64-*-linux*,clang*],
++    [x86_64-*-linux*,gcc*|x86_64-*-linux*,clang*|loongarch64-*-linux*,gcc*],
+       [common_cflags="$common_cflags -g  -fno-omit-frame-pointer"
+       frame_pointers=true
+       AC_DEFINE([WITH_FRAME_POINTERS])
+diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h
+index 0c2e0b2fe..ebdc1d55a 100644
+--- a/runtime/caml/stack.h
++++ b/runtime/caml/stack.h
+@@ -70,6 +70,17 @@
+ #define Saved_return_address(sp) *((intnat *)((sp) - 8))
+ #endif
+ 
++#ifdef TARGET_loongarch64
++/* Size of the gc_regs structure, in words.
++   See loongarch64.S and loongarch64/proc.ml for the indices */
++#define Wosize_gc_regs (2 + 23 /* int regs */ + 24 /* float regs */)
++#define Saved_return_address(sp) *((intnat *)((sp) - 8))
++/* LoongArch does not use a frame pointer, but requires the stack to be
++   16-aligned, so when pushing the return address to the stack there
++   is an extra word of padding after it that needs to be skipped when
++   walking the stack. */
++#define Pop_frame_pointer(sp) sp += sizeof(value)
++#endif
+ /* Declaration of variables used in the asm code */
+ extern value * caml_globals[];
+ extern intnat caml_globals_inited;
+diff --git a/runtime/loongarch64.S b/runtime/loongarch64.S
+new file mode 100644
+index 000000000..d2289f821
+--- /dev/null
++++ b/runtime/loongarch64.S
+@@ -0,0 +1,827 @@
++/**************************************************************************/
++/*                                                                        */
++/*                                 OCaml                                  */
++/*                                                                        */
++/*                    yala <zhaojunchao@loongson.cn>                      */
++/*                                                                        */
++/*                   Copyright © 2008-2023 LOONGSON                       */
++/*                                                                        */
++/*   All rights reserved.  This file is distributed under the terms of    */
++/*   the GNU Lesser General Public License version 2.1, with the          */
++/*   $special exception on linking described in the file LICENSE.          */
++/*                                                                        */
++/**************************************************************************/
++
++/* Asm part of the runtime system, LoongArch processor, 64-bit mode */
++/* Must be preprocessed by cpp */
++
++#include "caml/m.h"
++
++#define DOMAIN_STATE_PTR $s8
++#define TRAP_PTR $s1
++#define ALLOC_PTR $s7
++#define ADDITIONAL_ARG $t2
++#define STACK_ARG_BEGIN $s3
++#define STACK_ARG_END $s4
++#define TMP $t0
++#define TMP2 $t1
++
++#define C_ARG_1 $a0
++#define C_ARG_2 $a1
++#define C_ARG_3 $a2
++#define C_ARG_4 $a3
++
++/* Support for CFI directives */
++//FIXME
++#define CFI_STARTPROC
++#define CFI_ENDPROC
++#define CFI_ADJUST(n)
++#define CFI_REGISTER(r1,r2)
++#define CFI_OFFSET(r,n)
++#define CFI_DEF_CFA_REGISTER(r)
++#define CFI_REMEMBER_STATE
++#define CFI_RESTORE_STATE
++
++        .set    domain_curr_field, 0
++        .set    domain_curr_cnt, 0
++#define DOMAIN_STATE(c_type, name) \
++        .equ    domain_field_caml_##name, domain_curr_field ; \
++        .set    domain_curr_cnt, domain_curr_cnt + 1;    \
++        .set    domain_curr_field, domain_curr_cnt*8
++#include "../runtime/caml/domain_state.tbl"
++#undef DOMAIN_STATE
++
++#define Caml_state(var) DOMAIN_STATE_PTR, domain_field_caml_##var
++
++/* Globals and labels */
++#define L(lbl) .L##lbl
++
++#define FUNCTION(name) \
++        .align 2; \
++        .globl name; \
++        .type  name, @function; \
++name:; \
++        CFI_STARTPROC
++
++#define END_FUNCTION(name) \
++        CFI_ENDPROC; \
++        .size name, .-name
++
++#if defined(__PIC__)
++#define PLT(r) %plt(r)
++#else
++#define PLT(r) r
++#endif
++
++#define OBJECT(name) \
++        .data; \
++        .align  3; \
++        .globl  name; \
++        .type   name, @object; \
++name:
++#define END_OBJECT(name) \
++        .size   name, .-name
++
++/* Stack switching operations */
++
++/* struct stack_info */
++#define Stack_sp(reg)           reg, 0
++#define Stack_exception(reg)    reg, 8
++#define Stack_handler(reg)      reg, 16
++#define Stack_handler_from_cont(reg) reg, 15
++
++/* struct c_stack_link */
++#define Cstack_stack(reg)       reg, 0
++#define Cstack_sp(reg)          reg, 8
++#define Cstack_prev(reg)        reg, 16
++
++/* struct stack_handler */
++#define Handler_value(reg)      reg, 0
++#define Handler_exception(reg)  reg, 8
++#define Handler_effect(reg)     reg, 16
++#define Handler_parent(reg)     reg, 24
++
++/* Switch from OCaml to C stack. */
++.macro SWITCH_OCAML_TO_C
++    /* Fill in Caml_state->current_stack->$sp */
++        ld.d      TMP, Caml_state(current_stack)
++        st.d      $sp, Stack_sp(TMP)
++    /* Fill in Caml_state->c_stack */
++        ld.d      TMP2, Caml_state(c_stack)
++        st.d      TMP, Cstack_stack(TMP2)
++        st.d      $sp, Cstack_sp(TMP2)
++    /* Switch to C stack */
++        move      $sp, TMP2
++        CFI_REMEMBER_STATE
++.endm
++
++/* Switch from C to OCaml stack. */
++.macro SWITCH_C_TO_OCAML
++        ld.d      $sp, Cstack_sp($sp)
++        CFI_RESTORE_STATE
++.endm
++
++/* Save all of the registers that may be in use to a free gc_regs bucket
++   and store ALLOC_PTR and TRAP_PTR back to Caml_state
++   At the end the saved registers are placed in Caml_state(gc_regs)
++ */
++.macro SAVE_ALL_REGS
++    /* First, save the young_ptr & exn_handler */
++        st.d      ALLOC_PTR, Caml_state(young_ptr)
++        st.d      TRAP_PTR, Caml_state(exn_handler)
++    /* Now, use TMP to point to the gc_regs bucket */
++        ld.d      TMP, Caml_state(gc_regs_buckets)
++        ld.d      TMP2, TMP, 0  /* next ptr */
++        st.d      TMP2, Caml_state(gc_regs_buckets)
++    /* Save allocatable integer registers Must be in
++    the same order as proc.ml int_reg_name*/
++        st.d      $a0, TMP, 2*8
++        st.d      $a1, TMP, 3*8
++        st.d      $a2, TMP, 4*8
++        st.d      $a3, TMP, 5*8
++        st.d      $a4, TMP, 6*8
++        st.d      $a5, TMP, 7*8
++        st.d      $a6, TMP, 8*8
++        st.d      $a7, TMP, 9*8
++        st.d      $s2, TMP, 10*8
++        st.d      $s3, TMP, 11*8
++        st.d      $s4, TMP, 12*8
++        st.d      $s5, TMP, 13*8
++        st.d      $s6, TMP, 14*8
++        st.d      $t2, TMP, 15*8
++        st.d      $t3, TMP, 16*8
++        st.d      $t4, TMP, 17*8
++        st.d      $t5, TMP, 18*8
++        st.d      $t6, TMP, 19*8
++        st.d      $t7, TMP, 20*8
++        st.d      $t8, TMP, 21*8
++        st.d      $s0, TMP, 22*8
++    /* Save caller-save floating-point registers
++       (callee-saves are preserved by C functions) */
++        fst.d     $ft0, TMP, 23*8
++        fst.d     $ft1, TMP, 24*8
++        fst.d     $ft2, TMP, 25*8
++        fst.d     $ft3, TMP, 26*8
++        fst.d     $ft4, TMP, 27*8
++        fst.d     $ft5, TMP, 28*8
++        fst.d     $ft6, TMP, 29*8
++        fst.d     $ft7, TMP, 30*8
++        fst.d     $fa0, TMP, 31*8
++        fst.d     $fa1, TMP, 32*8
++        fst.d     $fa2, TMP, 33*8
++        fst.d     $fa3, TMP, 34*8
++        fst.d     $fa4, TMP, 35*8
++        fst.d     $fa5, TMP, 36*8
++        fst.d     $fa6, TMP, 37*8
++        fst.d     $fa7, TMP, 38*8
++        fst.d     $ft8, TMP, 39*8
++        fst.d     $ft9, TMP, 40*8
++        fst.d     $ft10, TMP, 41*8
++        fst.d     $ft11, TMP, 42*8
++        fst.d     $ft12, TMP, 43*8
++        fst.d     $ft13, TMP, 44*8
++        fst.d     $ft14, TMP, 45*8
++        fst.d     $ft15, TMP, 46*8
++        addi.d    TMP, TMP, 16
++        st.d      TMP, Caml_state(gc_regs)
++.endm
++
++/* Undo SAVE_ALL_REGS by loading the registers saved in Caml_state(gc_regs)
++   and refreshing ALLOC_PTR & TRAP_PTR from Caml_state */
++.macro RESTORE_ALL_REGS
++    /* Restore $a0, $a1, freeing up the next ptr slot */
++        ld.d      TMP, Caml_state(gc_regs)
++        addi.d    TMP, TMP, -16
++    /* Restore registers */
++        ld.d      $a0, TMP, 2*8
++        ld.d      $a1, TMP, 3*8
++        ld.d      $a2, TMP, 4*8
++        ld.d      $a3, TMP, 5*8
++        ld.d      $a4, TMP, 6*8
++        ld.d      $a5, TMP, 7*8
++        ld.d      $a6, TMP, 8*8
++        ld.d      $a7, TMP, 9*8
++        ld.d      $s2, TMP, 10*8
++        ld.d      $s3, TMP, 11*8
++        ld.d      $s4, TMP, 12*8
++        ld.d      $s5, TMP, 13*8
++        ld.d      $s6, TMP, 14*8
++        ld.d      $t2, TMP, 15*8
++        ld.d      $t3, TMP, 16*8
++        ld.d      $t4, TMP, 17*8
++        ld.d      $t5, TMP, 18*8
++        ld.d      $t6, TMP, 19*8
++        ld.d      $t7, TMP, 20*8
++        ld.d      $t8, TMP, 21*8
++        ld.d      $s0, TMP, 22*8
++        fld.d     $ft0, TMP, 23*8
++        fld.d     $ft1, TMP, 24*8
++        fld.d     $ft2, TMP, 25*8
++        fld.d     $ft3, TMP, 26*8
++        fld.d     $ft4, TMP, 27*8
++        fld.d     $ft5, TMP, 28*8
++        fld.d     $ft6, TMP, 29*8
++        fld.d     $ft7, TMP, 30*8
++        fld.d     $fa0, TMP, 31*8
++        fld.d     $fa1, TMP, 32*8
++        fld.d     $fa2, TMP, 33*8
++        fld.d     $fa3, TMP, 34*8
++        fld.d     $fa4, TMP, 35*8
++        fld.d     $fa5, TMP, 36*8
++        fld.d     $fa6, TMP, 37*8
++        fld.d     $fa7, TMP, 38*8
++        fld.d     $ft8, TMP, 39*8
++        fld.d     $ft9, TMP, 40*8
++        fld.d     $ft10, TMP, 41*8
++        fld.d     $ft11, TMP, 42*8
++        fld.d     $ft12, TMP, 43*8
++        fld.d     $ft13, TMP, 44*8
++        fld.d     $ft14, TMP, 45*8
++        fld.d     $ft15, TMP, 46*8
++    /* Put gc_regs struct back in bucket linked list */
++        ld.d      TMP2, Caml_state(gc_regs_buckets)
++        st.d      TMP2, TMP, 0  /* next ptr */
++        st.d      TMP, Caml_state(gc_regs_buckets)
++    /* Reload new allocation pointer & exn handler */
++        ld.d      ALLOC_PTR, Caml_state(young_ptr)
++        ld.d      TRAP_PTR, Caml_state(exn_handler)
++.endm
++
++        .section        .text
++/* Invoke the garbage collector. */
++
++        .globl  caml_system__code_begin
++caml_system__code_begin:
++
++FUNCTION(caml_call_realloc_stack)
++    /* Save return address */
++        CFI_OFFSET($ra, -8)
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        //CFI_ADJUST(16)
++    /* Save all registers (including ALLOC_PTR & TRAP_PTR) */
++        SAVE_ALL_REGS
++        ld.d      C_ARG_1, $sp, 16  /* argument */
++        SWITCH_OCAML_TO_C
++        bl    PLT(caml_try_realloc_stack)
++        SWITCH_C_TO_OCAML
++        beqz    $a0, 1f
++        RESTORE_ALL_REGS
++    /* Free stack $space and return to caller */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        jr    $ra
++1:      RESTORE_ALL_REGS
++    /* Raise the Stack_overflow exception */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        addi.d    $sp, $sp, 16 /* pop argument */
++        la.global      $a0, caml_exn_Stack_overflow
++        b       caml_raise_exn
++END_FUNCTION(caml_call_realloc_stack)
++
++FUNCTION(caml_call_gc)
++L(caml_call_gc):
++    /* Save return address */
++        CFI_OFFSET($ra, -8)
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(16)
++    /* Store all registers (including ALLOC_PTR & TRAP_PTR) */
++        SAVE_ALL_REGS
++        SWITCH_OCAML_TO_C
++    /* Call the garbage collector */
++        bl    PLT(caml_garbage_collection)
++        SWITCH_C_TO_OCAML
++        RESTORE_ALL_REGS
++    /* Free stack $space and return to caller */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        jr    $ra
++END_FUNCTION(caml_call_gc)
++
++FUNCTION(caml_alloc1)
++        ld.d      TMP, Caml_state(young_limit)
++        addi.d    ALLOC_PTR, ALLOC_PTR, -16
++        bltu    ALLOC_PTR, TMP, L(caml_call_gc)
++        jr    $ra
++END_FUNCTION(caml_alloc1)
++
++FUNCTION(caml_alloc2)
++        ld.d      TMP, Caml_state(young_limit)
++        addi.d    ALLOC_PTR, ALLOC_PTR, -24
++        bltu    ALLOC_PTR, TMP, L(caml_call_gc)
++        jr    $ra
++END_FUNCTION(caml_alloc2)
++
++FUNCTION(caml_alloc3)
++        ld.d      TMP, Caml_state(young_limit)
++        addi.d    ALLOC_PTR, ALLOC_PTR, -32
++        bltu    ALLOC_PTR, TMP, L(caml_call_gc)
++        jr    $ra
++END_FUNCTION(caml_alloc3)
++
++FUNCTION(caml_allocN)
++        ld.d      TMP, Caml_state(young_limit)
++        sub.d     ALLOC_PTR, ALLOC_PTR, ADDITIONAL_ARG
++        bltu    ALLOC_PTR, TMP, L(caml_call_gc)
++        jr    $ra
++END_FUNCTION(caml_allocN)
++
++/* Call a C function from OCaml */
++/* Function to call is in ADDITIONAL_ARG */
++
++FUNCTION(caml_c_call)
++        CFI_OFFSET($ra, -8)
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(16)
++    /* Switch form OCaml to C */
++        SWITCH_OCAML_TO_C
++    /* Make the exception handler alloc ptr available to the C code */
++        st.d      ALLOC_PTR, Caml_state(young_ptr)
++        st.d      TRAP_PTR, Caml_state(exn_handler)
++    /* Call the function */
++        jirl    $ra, ADDITIONAL_ARG, 0
++    /* Reload alloc ptr */
++        ld.d      ALLOC_PTR, Caml_state(young_ptr)
++    /* Load ocaml stack */
++        SWITCH_C_TO_OCAML
++    /* Return */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        jr    $ra
++END_FUNCTION(caml_c_call)
++
++FUNCTION(caml_c_call_stack_args)
++    /* Arguments:
++        C arguments  : $a0 to a7, fa0 to fa7
++        C function   : ADDITIONAL_ARG
++        C stack args : begin=STACK_ARG_BEGIN
++                       end=STACK_ARG_END */
++        CFI_OFFSET($ra, -8)
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(16)
++    /* Switch from OCaml to C */
++        SWITCH_OCAML_TO_C
++    /* Make the exception handler alloc ptr available to the C code */
++        st.d      ALLOC_PTR, Caml_state(young_ptr)
++        st.d      TRAP_PTR, Caml_state(exn_handler)
++    /* Store $sp to restore after call */
++        move      $s2, $sp
++    /* Copy arguments from OCaml to C stack
++       NB: STACK_ARG_{BEGIN,END} are 16-byte aligned */
++1:      addi.d    STACK_ARG_END, STACK_ARG_END, -16
++        bltu    STACK_ARG_END, STACK_ARG_BEGIN, 2f
++        ld.d      TMP, STACK_ARG_END, 0
++        ld.d      TMP2, STACK_ARG_END, 8
++        addi.d    $sp, $sp, -16
++        st.d      TMP, $sp, 0
++        st.d      TMP2, $sp, 8
++        b       1b
++2:  /* Call the function */
++        jirl    $ra, ADDITIONAL_ARG, 0
++    /* Restore stack */
++        move      $sp, $s2
++    /* Reload alloc ptr */
++        ld.d      ALLOC_PTR, Caml_state(young_ptr)
++    /* Switch from C to OCaml */
++        SWITCH_C_TO_OCAML
++    /* Return */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        jr    $ra
++END_FUNCTION(caml_c_call_stack_args)
++
++/* Start the OCaml program */
++
++FUNCTION(caml_start_program)
++    /* domain state is passed as arg from C */
++        move      TMP, C_ARG_1
++        la.global      TMP2, caml_program
++
++/* Code shared with caml_callback* */
++/* Address of domain state is in TMP */
++/* Address of OCaml code to call is in TMP2 */
++/* Arguments to the OCaml code are in $a0...a7 */
++
++L(jump_to_caml):
++    /* Set up stack frame and save callee-save registers */
++        CFI_OFFSET($ra, -200)
++        addi.d    $sp, $sp, -208
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(208)
++        st.d      $s0, $sp, 2*8
++        st.d      $s1, $sp, 3*8
++        st.d      $s2, $sp, 4*8
++        st.d      $s3, $sp, 5*8
++        st.d      $s4, $sp, 6*8
++        st.d      $s5, $sp, 7*8
++        st.d      $s6, $sp, 8*8
++        st.d      $s7, $sp, 9*8
++        st.d      $s8, $sp, 10*8
++        st.d      $fp, $sp, 11*8
++        fst.d     $fs0, $sp, 14*8
++        fst.d     $fs1, $sp, 15*8
++        fst.d     $fs2, $sp, 16*8
++        fst.d     $fs3, $sp, 17*8
++        fst.d     $fs4, $sp, 18*8
++        fst.d     $fs5, $sp, 19*8
++        fst.d     $fs6, $sp, 20*8
++        fst.d     $fs7, $sp, 21*8
++    /* Load domain state pointer from argument */
++        move      DOMAIN_STATE_PTR, TMP
++    /* Reload allocation pointer */
++        ld.d      ALLOC_PTR, Caml_state(young_ptr)
++    /* Build (16-byte aligned) struct c_stack_link on the C stack */
++        ld.d      $t2, Caml_state(c_stack)
++        addi.d    $sp, $sp, -32
++        st.d      $t2, Cstack_prev($sp)
++        st.d      $zero, Cstack_stack($sp)
++        st.d      $zero, Cstack_sp($sp)
++        CFI_ADJUST(32)
++        st.d      $sp, Caml_state(c_stack)
++    /* Load the OCaml stack */
++        ld.d      $t2, Caml_state(current_stack)
++        ld.d      $t2, Stack_sp($t2)
++    /* Store the gc_regs for callbacks during a GC */
++        ld.d      $t3, Caml_state(gc_regs)
++        addi.d    $t2, $t2, -8
++        st.d      $t3, $t2, 0
++    /* Store the stack pointer to allow DWARF unwind */
++        addi.d    $t2, $t2, -8
++        st.d      $sp, $t2, 0  /* C_stack_sp */
++    /* Setup a trap frame to catch exceptions escaping the OCaml code */
++        ld.d      $t3, Caml_state(exn_handler)
++        la.local      $t4, L(trap_handler)
++        addi.d    $t2, $t2, -16
++        st.d      $t3, $t2, 0
++        st.d      $t4, $t2, 8
++        move      TRAP_PTR, $t2
++    /* Switch stacks and call the OCaml code */
++        move      $sp, $t2
++        CFI_REMEMBER_STATE
++    /* Call the OCaml code */
++        jirl    $ra, TMP2, 0
++L(caml_retaddr):
++    /* Pop the trap frame, restoring Caml_state->exn_handler */
++        ld.d      $t2, $sp, 0
++        addi.d    $sp, $sp, 16
++        CFI_ADJUST(-16)
++        st.d      $t2, Caml_state(exn_handler)
++L(return_result):
++    /* Restore GC regs */
++        ld.d      $t2, $sp, 0
++        ld.d      $t3, $sp, 8
++        addi.d    $sp, $sp, 16
++        CFI_ADJUST(-16)
++        st.d      $t3, Caml_state(gc_regs)
++    /* Update allocation pointer */
++        st.d      ALLOC_PTR, Caml_state(young_ptr)
++    /* Return to C stack */
++        ld.d      $t2, Caml_state(current_stack)
++        st.d      $sp, Stack_sp($t2)
++        ld.d      $t3, Caml_state(c_stack)
++        move      $sp, $t3
++        CFI_RESTORE_STATE
++    /* Pop the struct c_stack_link */
++        ld.d      $t2, Cstack_prev($sp)
++        addi.d    $sp, $sp, 32
++        CFI_ADJUST(-32)
++        st.d      $t2, Caml_state(c_stack)
++    /* Reload callee-save register and return address */
++        ld.d      $s0, $sp, 2*8
++        ld.d      $s1, $sp, 3*8
++        ld.d      $s2, $sp, 4*8
++        ld.d      $s3, $sp, 5*8
++        ld.d      $s4, $sp, 6*8
++        ld.d      $s5, $sp, 7*8
++        ld.d      $s6, $sp, 8*8
++        ld.d      $s7, $sp, 9*8
++        ld.d      $s8, $sp, 10*8
++        ld.d      $fp, $sp, 11*8
++        fld.d     $fs0, $sp, 14*8
++        fld.d     $fs1, $sp, 15*8
++        fld.d     $fs2, $sp, 16*8
++        fld.d     $fs3, $sp, 17*8
++        fld.d     $fs4, $sp, 18*8
++        fld.d     $fs5, $sp, 19*8
++        fld.d     $fs6, $sp, 20*8
++        fld.d     $fs7, $sp, 21*8
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 208
++        CFI_ADJUST(-208)
++    /* Return to C caller */
++        jr    $ra
++END_FUNCTION(caml_start_program)
++
++/* The trap handler */
++
++        .align  2
++L(trap_handler):
++        CFI_STARTPROC
++    /* Save exception pointer */
++        st.d      TRAP_PTR, Caml_state(exn_handler)
++    /* Encode exception pointer */
++        ori     $a0, $a0, 2
++    /* Return it */
++        b       L(return_result)
++        CFI_ENDPROC
++
++/* Exceptions */
++
++.macro JUMP_TO_TRAP_PTR
++    /* Cut stack at current trap handler */
++        move      $sp, TRAP_PTR
++    /* Pop previous handler and jump to it */
++        ld.d      TMP, $sp, 8
++        ld.d      TRAP_PTR, $sp, 0
++        addi.d    $sp, $sp, 16
++        jr      TMP
++.endm
++
++/* Raise an exception from OCaml */
++FUNCTION(caml_raise_exn)
++    /* Test if backtrace is active */
++        ld.d      TMP, Caml_state(backtrace_active)
++        bnez    TMP, 2f
++1:
++        JUMP_TO_TRAP_PTR
++2:  /* Zero backtrace_pos */
++        st.d      $zero, Caml_state(backtrace_pos)
++L(caml_reraise_exn_stash):
++    /* Preserve exception bucket in callee-save register $s2 */
++        move      $s2, $a0
++    /* Stash the backtrace */
++                                  /* arg1: exn bucket, already in $a0 */
++        move      $a1, $ra            /* arg2: pc of $raise */
++        move      $a2, $sp            /* arg3: $sp of $raise */
++        move      $a3, TRAP_PTR      /* arg4: $sp of handler */
++    /* Switch to C stack */
++        ld.d      TMP, Caml_state(c_stack)
++        move      $sp, TMP
++        bl    PLT(caml_stash_backtrace)
++    /* Restore exception bucket and $raise */
++        move      $a0, $s2
++        b       1b
++END_FUNCTION(caml_raise_exn)
++
++FUNCTION(caml_reraise_exn)
++        ld.d      TMP, Caml_state(backtrace_active)
++        bnez    TMP, L(caml_reraise_exn_stash)
++        JUMP_TO_TRAP_PTR
++END_FUNCTION(caml_reraise_exn)
++
++/* Raise an exception from C */
++
++FUNCTION(caml_raise_exception)
++    /* Load the domain state ptr */
++        move      DOMAIN_STATE_PTR, C_ARG_1
++    /* Load the exception bucket */
++        move      $a0, C_ARG_2
++    /* Reload trap ptr and alloc ptr */
++        ld.d      TRAP_PTR, Caml_state(exn_handler)
++        ld.d      ALLOC_PTR, Caml_state(young_ptr)
++    /* Discard the C stack pointer and reset to ocaml stack */
++        ld.d      TMP, Caml_state(current_stack)
++        ld.d      TMP, Stack_sp(TMP)
++        move      $sp, TMP
++    /* Restore frame and link on return to OCaml */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        b       caml_raise_exn
++END_FUNCTION(caml_raise_exception)
++
++/* Callback from C to OCaml */
++
++FUNCTION(caml_callback_asm)
++    /* Initial shuffling of arguments */
++    /* ($a0 = Caml_state, $a1 = closure, 0(a2) = first arg) */
++        move      TMP, $a0
++        ld.d      $a0, $a2, 0            /* $a0 = first arg */
++                                    /* $a1 = closure environment */
++        ld.d      TMP2, $a1, 0           /* code pointer */
++        b       L(jump_to_caml)
++END_FUNCTION(caml_callback_asm)
++
++FUNCTION(caml_callback2_asm)
++    /* Initial shuffling of arguments */
++    /* ($a0 = Caml_state, $a1 = closure, 0(a2) = arg1, 8(a2) = arg2) */
++        move      TMP, $a0
++        move      TMP2, $a1
++        ld.d      $a0, $a2, 0  /* $a0 = first arg */
++        ld.d      $a1, $a2, 8  /* $a1 = second arg */
++        move      $a2, TMP2   /* a2 = closure environment */
++        la.global      TMP2, caml_apply2
++        b       L(jump_to_caml)
++END_FUNCTION(caml_callback2_asm)
++
++FUNCTION(caml_callback3_asm)
++    /* Initial shuffling of arguments */
++    /* ($a0 = Caml_state, $a1 = closure, 0(a2) = arg1, 8(a2) = arg2,
++        16(a2) = arg3) */
++        move      TMP, $a0
++        move      $a3, $a1       /* a3 = closure environment */
++        ld.d      $a0, $a2, 0     /* $a0 = first arg */
++        ld.d      $a1, $a2, 8     /* $a1 = second arg */
++        ld.d      $a2, $a2, 16    /* a2 = third arg */
++        la.global      TMP2, caml_apply3
++        b       L(jump_to_caml)
++END_FUNCTION(caml_callback3_asm)
++
++/* Fibers */
++
++/* Switch between OCaml stacks. Clobbers TMP and switches TRAP_PTR
++   Preserves old_stack and new_stack registers */
++.macro SWITCH_OCAML_STACKS old_stack, new_stack
++    /* Save frame pointer and return address for old_stack */
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(16)
++    /* Save OCaml SP and exn_handler in the stack info */
++        st.d      $sp, Stack_sp(\old_stack)
++        st.d      TRAP_PTR, Stack_exception(\old_stack)
++    /* switch stacks */
++        st.d      \new_stack, Caml_state(current_stack)
++        ld.d      TMP, Stack_sp(\new_stack)
++        move      $sp, TMP
++    /* restore exn_handler for new stack */
++        ld.d      TRAP_PTR, Stack_exception(\new_stack)
++    /* Restore frame pointer and return address for new_stack */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++.endm
++
++/*
++ * A continuation is a one word object that points to a fiber. A fiber [f] will
++ * point to its parent at Handler_parent(Stack_handler(f)). In the following,
++ * the [last_fiber] refers to the last fiber in the linked-list formed by the
++ * parent pointer.
++ */
++
++FUNCTION(caml_perform)
++    /*  $a0: effect to perform
++        $a1: freshly allocated continuation */
++        ld.d      $a2, Caml_state(current_stack) /* a2 := old stack */
++        addi.d    $a3, $a2, 1 /* a3 := Val_ptr(old stack) */
++        st.d      $a3, $a1, 0  /* Iniitalize continuation */
++L(do_perform):
++    /*  $a0: effect to perform
++        $a1: continuation
++        a2: old_stack
++        a3: last_fiber */
++
++        ld.d      $t3, Stack_handler($a2)  /* $t3 := old stack -> handler */
++        ld.d      $t4, Handler_parent($t3) /* t4 := parent stack */
++        beqz    $t4, 1f
++        SWITCH_OCAML_STACKS $a2, $t4
++    /*  we have to null the Handler_parent after the switch because
++        the Handler_parent is needed to unwind the stack for backtraces */
++        st.d      $zero, Handler_parent($t3) /* Set parent of performer to NULL */
++        ld.d      TMP, Handler_effect($t3)
++        move      $a2, $a3                 /* a2 := last_fiber */
++        move      $a3, TMP                /* a3 := effect handler */
++        b    PLT(caml_apply3)
++1:
++    /*  switch back to original performer before $raising Effect.Unhandled
++        (no-op unless this is a reperform) */
++        ld.d      $t4, $a1, 0  /* load performer stack from continuation */
++        addi.d    $t4, $t4, -1 /* t4 := Ptr_val(t4) */
++        ld.d      $t3, Caml_state(current_stack)
++        SWITCH_OCAML_STACKS $t3, $t4
++    /*  No parent stack. Raise Effect.Unhandled. */
++        la.global      ADDITIONAL_ARG, caml_raise_unhandled_effect
++        b       caml_c_call
++END_FUNCTION(caml_perform)
++
++FUNCTION(caml_reperform)
++    /*  $a0: effect to perform
++        $a1: continuation
++        a2: last_fiber */
++        ld.d      TMP, Stack_handler_from_cont($a2)
++        ld.d      $a2, Caml_state(current_stack) /* a2 := old stack */
++        st.d      $a2, Handler_parent(TMP) /* Append to last_fiber */
++        addi.d    $a3, $a2, 1 /* a3 (last_fiber) := Val_ptr(old stack) */
++        b       L(do_perform)
++END_FUNCTION(caml_reperform)
++
++FUNCTION(caml_resume)
++    /*  $a0: new fiber
++        $a1: fun
++        a2: arg */
++        addi.d    $a0, $a0, -1 /* $a0 = Ptr_val($a0) */
++        ld.d      $a3, $a1, 0   /* code pointer */
++    /* Check if stack null, then already used */
++        beqz    $a0, 2f
++    /* Find end of list of stacks (put in $t2) */
++        move      TMP, $a0
++1:      ld.d      $t2, Stack_handler(TMP)
++        ld.d      TMP, Handler_parent($t2)
++        bnez    TMP, 1b
++    /* Add current stack to the end */
++        ld.d      $t3, Caml_state(current_stack)
++        st.d      $t3, Handler_parent($t2)
++        SWITCH_OCAML_STACKS $t3, $a0
++        move      $a0, $a2
++        jr      $a3
++2:      la.global      ADDITIONAL_ARG, caml_raise_continuation_already_resumed
++        b       caml_c_call
++END_FUNCTION(caml_resume)
++
++/* Run a function on a new stack, then either
++   return the value or invoke exception handler */
++FUNCTION(caml_runstack)
++    /*  $a0: fiber
++        $a1: fun
++        a2: arg */
++        CFI_OFFSET($ra, -8)
++        addi.d    $sp, $sp, -16
++        st.d      $ra, $sp, 8
++        CFI_ADJUST(16)
++        addi.d    $a0, $a0, -1  /* $a0 := Ptr_val($a0) */
++        ld.d      $a3, $a1, 0    /* code pointer */
++    /*  save old stack pointer and exception handler */
++        ld.d      $t2, Caml_state(current_stack) /* $t2 := old stack */
++        st.d      $sp, Stack_sp($t2)
++        st.d      TRAP_PTR, Stack_exception($t2)
++    /* Load new stack pointer and set parent */
++        ld.d      TMP, Stack_handler($a0)
++        st.d      $t2, Handler_parent(TMP)
++        st.d      $a0, Caml_state(current_stack)
++        ld.d      $t3, Stack_sp($a0) /* $t3 := $sp of new stack */
++    /* Create an exception handler on the target stack
++       after 16byte DWARF & gc_regs block (which is unused here) */
++        addi.d    $t3, $t3, -32
++        la.local      TMP, L(fiber_exn_handler)
++        st.d      TMP, $t3, 8
++    /* link the previous exn_handler so that copying stacks works */
++        ld.d      TMP, Stack_exception($a0)
++        st.d      TMP, $t3, 0
++        move      TRAP_PTR, $t3
++    /* Switch to the new stack */
++        move      $sp, $t3
++        CFI_REMEMBER_STATE
++    /* Call the function on the new stack */
++        move      $a0, $a2
++        jirl    $ra, $a3, 0
++L(frame_runstack):
++        addi.d    $t2, $sp, 32 /* $t2 := stack_handler */
++        ld.d      $s2, Handler_value($t2) /* saved across C call */
++1:
++        move      $s3, $a0     /* save return across C call */
++        ld.d      $a0, Caml_state(current_stack) /* arg to caml_free_stack */
++    /* restore parent stack and exn_handler into Caml_state */
++        ld.d      TMP, Handler_parent($t2)
++        st.d      TMP, Caml_state(current_stack)
++        ld.d      TRAP_PTR, Stack_exception(TMP)
++        st.d      TRAP_PTR, Caml_state(exn_handler)
++    /* free old stack by switching directly to c_stack;
++       is a no-alloc call */
++        ld.d      $s4, Stack_sp(TMP) /* saved across C call */
++        CFI_RESTORE_STATE
++        CFI_REMEMBER_STATE
++        ld.d      TMP, Caml_state(c_stack)
++        move      $sp, TMP
++        bl    PLT(caml_free_stack)
++    /* switch directly to parent stack with correct return */
++        move      $a0, $s3
++        move      $a1, $s2
++        move      $sp, $s4
++        CFI_RESTORE_STATE
++        ld.d      TMP, $s2, 0   /* code pointer */
++    /* Invoke handle_value (or handle_exn) */
++        ld.d      $ra, $sp, 8
++        addi.d    $sp, $sp, 16
++        CFI_ADJUST(-16)
++        jr      TMP
++L(fiber_exn_handler):
++        addi.d    $t2, $sp, 16  /* $t2 := stack_handler */
++        ld.d      $s2, Handler_exception($t2)
++        b       1b
++END_FUNCTION(caml_runstack)
++
++FUNCTION(caml_ml_array_bound_error)
++    /* Load address of [caml_array_bound_error_asm] in ADDITIONAL_ARG */
++        la.global      ADDITIONAL_ARG, caml_array_bound_error_asm
++    /* Call that function */
++        b       caml_c_call
++END_FUNCTION(caml_ml_array_bound_error)
++
++        .globl  caml_system__code_end
++caml_system__code_end:
++
++/* GC roots for callback */
++
++
++        .section .data
++        .align  3
++        .globl  caml_system__frametable
++        .type   caml_system__frametable, @object
++caml_system__frametable:
++        .quad   1               /* one descriptor */
++        .quad   .Lcaml_retaddr  /* return address into callback */
++        .short  -1              /* negative frame size => use callback link */
++        .short  0               /* no roots */
++        .align  3
++        .size   caml_system__frametable, .-caml_system__frametable
+diff --git a/testsuite/tools/asmgen_loongarch64.S b/testsuite/tools/asmgen_loongarch64.S
+new file mode 100644
+index 000000000..97fbeae04
+--- /dev/null
++++ b/testsuite/tools/asmgen_loongarch64.S
+@@ -0,0 +1,75 @@
++/**************************************************************************/
++/*                                                                        */
++/*                                OCaml                                   */
++/*                                                                        */
++/*                Nicolas Ojeda Bar <n.oje.bar@gmail.com>                 */
++/*                                                                        */
++/*   Copyright 2019 Institut National de Recherche en Informatique et     */
++/*     en Automatique.                                                    */
++/*                                                                        */
++/*   All rights reserved.  This file is distributed under the terms of    */
++/*   the GNU Lesser General Public License version 2.1, with the          */
++/*   special exception on linking described in the file LICENSE.          */
++/*                                                                        */
++/**************************************************************************/
++
++#define STORE st.d
++#define LOAD ld.d
++
++        .globl  call_gen_code
++        .align  2
++call_gen_code:
++    /* Set up stack frame and save callee-save registers */
++        addi.d    $sp, $sp, -208
++        STORE   $ra, $sp, 192
++        STORE   $s0, $sp, 0
++        STORE   $s1, $sp, 8
++        STORE   $s2, $sp, 16
++        STORE   $s3, $sp, 24
++        STORE   $s4, $sp, 32
++        STORE   $s5, $sp, 40
++        STORE   $s6, $sp, 48
++        STORE   $s7, $sp, 56
++        STORE   $s8, $sp, 64
++        fst.d     $fs0, $sp, 96
++        fst.d     $fs1, $sp, 104
++        fst.d     $fs2, $sp, 112
++        fst.d     $fs3, $sp, 120
++        fst.d     $fs4, $sp, 128
++        fst.d     $fs5, $sp, 136
++        fst.d     $fs6, $sp, 144
++        fst.d     $fs7, $sp, 152
++    /* Shuffle arguments */
++        move      $t0, $a0
++        move      $a0, $a1
++        move      $a1, $a2
++        move      $a2, $a3
++        move      $a3, $a4
++    /* Call generated asm */
++        jirl    $ra, $t0, 0
++    /* Reload callee-save registers and return address */
++        LOAD    $ra, $sp, 192
++        LOAD    $s0, $sp, 0
++        LOAD    $s1, $sp, 8
++        LOAD    $s2, $sp ,16
++        LOAD    $s3, $sp ,24
++        LOAD    $s4, $sp ,32
++        LOAD    $s5, $sp ,40
++        LOAD    $s6, $sp ,48
++        LOAD    $s7, $sp ,56
++        LOAD    $s8, $sp ,64
++        fld.d     $fs0, $sp, 96
++        fld.d     $fs1, $sp, 104
++        fld.d     $fs2, $sp, 112
++        fld.d     $fs3, $sp, 120
++        fld.d     $fs4, $sp, 128
++        fld.d     $fs5, $sp, 136
++        fld.d     $fs6, $sp, 144
++        fld.d     $fs7, $sp, 152
++        addi.d    $sp, $sp, 208
++        jr      $ra
++
++        .globl  caml_c_call
++        .align  2
++caml_c_call:
++        jr      $t2
diff --git a/onefetch/PKGBUILD b/onefetch/PKGBUILD
index 36e54952ee..e386ae13ba 100644
--- a/onefetch/PKGBUILD
+++ b/onefetch/PKGBUILD
@@ -19,7 +19,7 @@ sha512sums=('SKIP')
 
 prepare() {
   cd "$pkgname"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p completions
 }
 
diff --git a/open-iscsi/PKGBUILD b/open-iscsi/PKGBUILD
index eb58ad86a8..89a227e7b5 100644
--- a/open-iscsi/PKGBUILD
+++ b/open-iscsi/PKGBUILD
@@ -24,7 +24,7 @@ build() {
   )
 
   arch-meson "${pkgname}-${pkgver}" build "${_meson_options[@]}"
-
+#  arch-meson -D rulesdir="/usr/lib/udev/rules.d build" -Discsi_sbindir=/usr/bin build
   meson compile -C build
 }
 
@@ -33,6 +33,7 @@ package() {
 
   # no initiatorname in package, initialized in install script
   echo -n > "${pkgdir}"/etc/iscsi/initiatorname.iscsi
+#  meson install -C build --destdir "$pkgdir"
 
   # install docs
   install -D -m0644 -t "${pkgdir}"/usr/share/doc/${pkgname}/ "${pkgname}-${pkgver}"/{Changelog,README}
diff --git a/open-iscsi/open-iscsi-fix-build.patch b/open-iscsi/open-iscsi-fix-build.patch
new file mode 100644
index 0000000000..519116010b
--- /dev/null
+++ b/open-iscsi/open-iscsi-fix-build.patch
@@ -0,0 +1,25 @@
+Index: open-iscsi-2.1.6/utils/fwparam_ibft/fwparam.h
+===================================================================
+--- open-iscsi-2.1.6.orig/utils/fwparam_ibft/fwparam.h
++++ open-iscsi-2.1.6/utils/fwparam_ibft/fwparam.h
+@@ -17,7 +17,6 @@
+ #ifndef FWPARAM_H_
+ #define FWPARAM_H_
+ 
+-#include <stdint.h>
+ #include "fw_context.h"
+ 
+ #define FILENAMESZ (1024)
+Index: open-iscsi-2.1.6/include/iscsi_proto.h
+===================================================================
+--- open-iscsi-2.1.6.orig/include/iscsi_proto.h
++++ open-iscsi-2.1.6/include/iscsi_proto.h
+@@ -63,7 +63,7 @@
+ #endif
+ 
+ /* initiator tags; opaque for target */
+-typedef uint32_t __bitwise__ itt_t;
++typedef uint32_t itt_t;
+ /* below makes sense only for initiator that created this tag */
+ #define build_itt(itt, age) ((__force itt_t)\
+ 	((itt) | ((age) << ISCSI_AGE_SHIFT)))
diff --git a/open-isns/PKGBUILD b/open-isns/PKGBUILD
index a29f6f1b4d..02faf71159 100644
--- a/open-isns/PKGBUILD
+++ b/open-isns/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=open-isns
 pkgver=0.102
-pkgrel=3
+pkgrel=4
 pkgdesc='iSNS server and client for Linux'
 arch=('loong64' 'x86_64')
 depends=('openssl')
diff --git a/openal/PKGBUILD b/openal/PKGBUILD
index 3870624beb..21a173276e 100644
--- a/openal/PKGBUILD
+++ b/openal/PKGBUILD
@@ -78,7 +78,7 @@ package_openal() {
   install -Dt "$pkgdir/usr/share/doc/openal" -m644 openal-soft/docs/*
 
   cd "$pkgdir"
-  _pick examples usr/bin/al{ffplay,hrtf,latency,loopback,multireverb}
+  _pick examples usr/bin/al{hrtf,latency,loopback,multireverb}
   _pick examples usr/bin/al{play,record,reverb,stream,tonegen}
 }
 
diff --git a/openblas/PKGBUILD b/openblas/PKGBUILD
index b5ca23eda3..ca66c556e8 100644
--- a/openblas/PKGBUILD
+++ b/openblas/PKGBUILD
@@ -13,8 +13,15 @@ url="https://www.openblas.net/"
 license=('BSD')
 depends=('gcc-libs')
 makedepends=('cmake' 'perl' 'gcc-fortran')
-source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz)
-sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373')
+source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz
+fix-loong.patch)
+sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373'
+            '195dc3c3daa56c55912831161bd9e73532c1a06b38c894a6eceb8d49befddda1b94e71dcd36e1d3403e2e5f70ded83febdee493059b16adc85ea52fb32e58f81')
+
+prepare() {
+	cd "$_pkgname-$pkgver"
+	patch -p1 -i "$srcdir/fix-loong.patch"
+}
 
 build() {
   # Setting FC manually to avoid picking up f95 and breaking the cmake build
diff --git a/openblas/fix-loong.patch b/openblas/fix-loong.patch
new file mode 100644
index 0000000000..fc0489b826
--- /dev/null
+++ b/openblas/fix-loong.patch
@@ -0,0 +1,47 @@
+diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml
+index 4a9bf98b6..b310d6938 100644
+--- a/.github/workflows/loongarch64.yml
++++ b/.github/workflows/loongarch64.yml
+@@ -40,8 +40,8 @@ jobs:
+ 
+       - name: Download and install loongarch64-toolchain
+         run: |
+-          wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
+-          tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
++          wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
++          tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
+ 
+       - name: Set env
+         run: |
+diff --git a/cmake/cc.cmake b/cmake/cc.cmake
+index 00952e810..242b03b5f 100644
+--- a/cmake/cc.cmake
++++ b/cmake/cc.cmake
+@@ -36,9 +36,9 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LS
+ 
+     if (LOONGARCH64)
+       if (BINARY64)
+-        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64")
++        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d")
+       else ()
+-        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32")
++        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=ilp32d")
+       endif ()
+       set(BINARY_DEFINED 1)
+     endif ()
+diff --git a/cmake/fc.cmake b/cmake/fc.cmake
+index c496f6368..b356dfda3 100644
+--- a/cmake/fc.cmake
++++ b/cmake/fc.cmake
+@@ -61,9 +61,9 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
+     endif ()
+     if (LOONGARCH64)
+       if (BINARY64)
+-        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
++        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
+       else ()
+-        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
++        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
+       endif ()
+     endif ()
+     if (RISCV64)
diff --git a/opencv/PKGBUILD b/opencv/PKGBUILD
index 8eacdaf59c..655969a4d7 100644
--- a/opencv/PKGBUILD
+++ b/opencv/PKGBUILD
@@ -90,8 +90,7 @@ build() {
          -DINSTALL_C_EXAMPLES=ON \
          -DINSTALL_PYTHON_EXAMPLES=ON \
          -DCMAKE_INSTALL_PREFIX=/usr \
-         -DCPU_BASELINE_DISABLE=SSE3 \
-         -DCPU_BASELINE_REQUIRE=SSE2 \
+         -DCPU_BASELINE_DISABLE=LASX,SSE2,SSE3 \
          -DOPENCV_EXTRA_MODULES_PATH=$srcdir/opencv_contrib-$pkgver/modules \
          -DOPENCV_SKIP_PYTHON_LOADER=ON \
          -DLAPACK_LIBRARIES=/usr/lib/liblapack.so;/usr/lib/libblas.so;/usr/lib/libcblas.so \
@@ -114,8 +113,8 @@ build() {
     -DBUILD_WITH_DEBUG_INFO=OFF \
     -DWITH_CUDA=ON \
     -DWITH_CUDNN=ON \
-    -DCMAKE_C_COMPILER=gcc-12 \
-    -DCMAKE_CXX_COMPILER=g++-12 \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
     -DCUDA_ARCH_BIN='52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;87-real;89-real;90-real;90-virtual' \
     -DCUDA_ARCH_PTX='90-virtual'
   cmake --build build-cuda
diff --git a/openh264/PKGBUILD b/openh264/PKGBUILD
index 043973fe84..5f2847376d 100644
--- a/openh264/PKGBUILD
+++ b/openh264/PKGBUILD
@@ -16,6 +16,7 @@ source=("https://github.com/cisco/openh264/archive/v${pkgver}/${pkgname}-${pkgve
         "https://github.com/google/googletest/archive/release-${_gtestver}/googletest-${_gtestver}.tar.gz")
 sha256sums=('a44d1ccc348a790f9a272bba2d1c2eb9a9bbd0302e4e9b655d709e1c32f92691'
             '9bf1fe5182a604b4135edc1a425ae356c9ad15e9b23f9f12a02e80184c3a249c')
+options=(!lto)
 
 prepare() {
     ln -sf "../googletest-release-${_gtestver}" "${pkgname}-${pkgver}/gtest"
diff --git a/openimagedenoise/PKGBUILD b/openimagedenoise/PKGBUILD
index 1cf4a05640..7ea16673b0 100644
--- a/openimagedenoise/PKGBUILD
+++ b/openimagedenoise/PKGBUILD
@@ -9,7 +9,7 @@ arch=('loong64' 'x86_64')
 url='https://openimagedenoise.github.io'
 license=('Apache')
 depends=('intel-tbb')
-makedepends=('cmake' 'ninja' 'ispc' 'python')
+makedepends=('cmake' 'ninja' 'python')
 source=("https://github.com/OpenImageDenoise/oidn/releases/download/v${pkgver}/oidn-${pkgver}.src.tar.gz")
 sha512sums=('508cb100f1a0a825774c2c01e0fc983e697341745fa0bfa48a99bfa70fc431f66fcdf17c3f170e40baefd7c3796a25d147f49cb17efbf1a1886556367c5c4566')
 
diff --git a/openjade/PKGBUILD b/openjade/PKGBUILD
index 775eeb57d5..7a19dcfa17 100644
--- a/openjade/PKGBUILD
+++ b/openjade/PKGBUILD
@@ -14,19 +14,25 @@ install=${pkgname}.install
 conflicts=('jade')
 provides=('jade')
 source=(https://downloads.sourceforge.net/project/${pkgname}/${pkgname}/${pkgver}/${pkgname}-${pkgver}.tar.gz
-        https://deb.debian.org/debian/pool/main/o/${pkgname}1.3/${pkgname}1.3_${pkgver}-${_debpatch}.diff.gz)
+        https://deb.debian.org/debian/pool/main/o/${pkgname}1.3/${pkgname}1.3_${pkgver}-${_debpatch}.diff.gz
+        openjade-nola.patch)
 sha256sums=('1d2d7996cc94f9b87d0c51cf0e028070ac177c4123ecbfd7ac1cb8d0b7d322d1'
-            '11d90e242eae60ce06bf27fd234adbd8efd7d4a9a4a2da058faa4e8336dc423a')
+            '11d90e242eae60ce06bf27fd234adbd8efd7d4a9a4a2da058faa4e8336dc423a'
+            'd177f5e18970561ff500c42759f82e91ef245dda5559459091b875b7560b5688')
+SKIPCONFIG=1
 
 prepare() {
   cd ${pkgname}-$pkgver
-  patch -Np1 -i "$srcdir"/${pkgname}1.3_${pkgver}-${_debpatch}.diff
+  patch -Np1 -i "$srcdir"/openjade-nola.patch
   # https://bugs.archlinux.org/task/55331 / https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69534#c9
   export CXXFLAGS+=' -fno-lifetime-dse'
+  cp /usr/share/automake-1.16/config.* config/.
 }
 
 build() {
   cd ${pkgname}-$pkgver
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   ./configure --prefix=/usr \
     --mandir=/usr/share/man \
     --enable-default-catalog=/etc/sgml/catalog \
diff --git a/openjade/openjade-nola.patch b/openjade/openjade-nola.patch
new file mode 100644
index 0000000000..1e23ca52a4
--- /dev/null
+++ b/openjade/openjade-nola.patch
@@ -0,0 +1,12 @@
+diff -urNp openjade-1.3.2-orig/jade/Makefile.sub openjade-1.3.2/jade/Makefile.sub
+--- openjade-1.3.2-orig/jade/Makefile.sub	2002-11-15 23:46:50.000000000 +0100
++++ openjade-1.3.2/jade/Makefile.sub	2009-02-13 11:52:30.000000000 +0100
+@@ -4,7 +4,7 @@ OBJS=jade.o SgmlFOTBuilder.o RtfFOTBuild
+ INCLUDE=-I$(srcdir)/../grove -I$(srcdir)/../spgrove -I$(srcdir)/../style
+ # XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a \
+ #   ../lib/libosp.a
+-XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a $(splibdir)/libosp.a
++XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a $(splibdir)/libosp.so
+ GENSRCS=JadeMessages.h HtmlMessages.h RtfMessages.h TeXMessages.h \
+  HtmlFOTBuilder_inst.cxx RtfFOTBuilder_inst.cxx  TeXFOTBuilder_inst.cxx \
+  TransformFOTBuilder_inst.cxx MifMessages.h MifFOTBuilder_inst.cxx
diff --git a/openmp/PKGBUILD b/openmp/PKGBUILD
index 5a747fd2a4..79d4818875 100644
--- a/openmp/PKGBUILD
+++ b/openmp/PKGBUILD
@@ -47,7 +47,7 @@ package() {
   DESTDIR="$pkgdir" ninja install
   install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
 
-  rm "$pkgdir/usr/lib/libarcher_static.a"
+  #rm "$pkgdir/usr/lib/libarcher_static.a"
 }
 
 # vim:set ts=2 sw=2 et:
diff --git a/openmp/openmp-loong64.patch b/openmp/openmp-loong64.patch
new file mode 100644
index 0000000000..8ff7095ab6
--- /dev/null
+++ b/openmp/openmp-loong64.patch
@@ -0,0 +1,514 @@
+diff --git a/README.rst b/README.rst
+index ffa49e4..a12c628 100644
+--- a/README.rst
++++ b/README.rst
+@@ -137,7 +137,7 @@ Options for all Libraries
+ Options for ``libomp``
+ ----------------------
+ 
+-**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64``
++**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|loongarch64``
+   The default value for this option is chosen based on probing the compiler for
+   architecture macros (e.g., is ``__x86_64__`` predefined by compiler?).
+ 
+@@ -194,7 +194,7 @@ Optional Features
+ **LIBOMP_OMPT_SUPPORT** = ``ON|OFF``
+   Include support for the OpenMP Tools Interface (OMPT).
+   This option is supported and ``ON`` by default for x86, x86_64, AArch64,
+-  PPC64 and RISCV64 on Linux* and macOS*.
++  PPC64, RISCV64 and loongarch64 on Linux* and macOS*.
+   This option is ``OFF`` if this feature is not supported for the platform.
+ 
+ **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF``
+diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
+index c9dbcb2..3199536 100644
+--- a/runtime/CMakeLists.txt
++++ b/runtime/CMakeLists.txt
+@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
+   # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
+   libomp_get_architecture(LIBOMP_DETECTED_ARCH)
+   set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
+-    "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64).")
++    "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).")
+   # Should assertions be enabled?  They are on by default.
+   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
+     "enable assertions?")
+@@ -61,6 +61,8 @@ else() # Part of LLVM build
+     set(LIBOMP_ARCH arm)
+   elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64")
+     set(LIBOMP_ARCH riscv64)
++  elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64")
++    set(LIBOMP_ARCH loongarch64)
+   else()
+     # last ditch effort
+     libomp_get_architecture(LIBOMP_ARCH)
+@@ -81,7 +83,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
+   endif()
+ endif()
+ 
+-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64)
++libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64)
+ 
+ set(LIBOMP_LIB_TYPE normal CACHE STRING
+   "Performance,Profiling,Stubs library (normal/profile/stubs)")
+@@ -159,6 +161,7 @@ set(MIC FALSE)
+ set(MIPS64 FALSE)
+ set(MIPS FALSE)
+ set(RISCV64 FALSE)
++set(LoongArch64 FALSE)
+ if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32")    # IA-32 architecture
+   set(IA32 TRUE)
+ elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
+@@ -183,6 +186,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture
+     set(MIPS64 TRUE)
+   elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture
+     set(RISCV64 TRUE)
++elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture
++    set(LoongArch64 TRUE)
+ endif()
+ 
+ # Set some flags based on build_type
+diff --git a/runtime/README.txt b/runtime/README.txt
+index 874a563..ddd8b0e 100644
+--- a/runtime/README.txt
++++ b/runtime/README.txt
+@@ -54,6 +54,7 @@ Architectures Supported
+ * IBM(R) Power architecture (little endian)
+ * MIPS and MIPS64 architecture
+ * RISCV64 architecture
++* LoongArch64 architecture
+ 
+ Supported RTL Build Configurations
+ ==================================
+diff --git a/runtime/cmake/LibompGetArchitecture.cmake b/runtime/cmake/LibompGetArchitecture.cmake
+index dd60a2d..72cbf64 100644
+--- a/runtime/cmake/LibompGetArchitecture.cmake
++++ b/runtime/cmake/LibompGetArchitecture.cmake
+@@ -47,6 +47,8 @@ function(libomp_get_architecture return_arch)
+       #error ARCHITECTURE=mips
+     #elif defined(__riscv) && __riscv_xlen == 64
+       #error ARCHITECTURE=riscv64
++    #elif defined(__loongarch__) && defined(__loongarch64)
++      #error ARCHITECTURE=loongarch64
+     #else
+       #error ARCHITECTURE=UnknownArchitecture
+     #endif
+diff --git a/runtime/cmake/LibompMicroTests.cmake b/runtime/cmake/LibompMicroTests.cmake
+index 1ca3412..d344056 100644
+--- a/runtime/cmake/LibompMicroTests.cmake
++++ b/runtime/cmake/LibompMicroTests.cmake
+@@ -214,6 +214,9 @@ else()
+     elseif(${RISCV64})
+       libomp_append(libomp_expected_library_deps libc.so.6)
+       libomp_append(libomp_expected_library_deps ld.so.1)
++    elseif(${LoongArch64})
++      libomp_append(libomp_expected_library_deps libc.so.6)
++      libomp_append(libomp_expected_library_deps ld.so.1)
+     endif()
+     libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY)
+     libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
+diff --git a/runtime/cmake/LibompUtils.cmake b/runtime/cmake/LibompUtils.cmake
+index b1de242..8d6b6ef 100644
+--- a/runtime/cmake/LibompUtils.cmake
++++ b/runtime/cmake/LibompUtils.cmake
+@@ -109,6 +109,8 @@ function(libomp_get_legal_arch return_arch_string)
+     set(${return_arch_string} "MIPS64" PARENT_SCOPE)
+   elseif(${RISCV64})
+     set(${return_arch_string} "RISCV64" PARENT_SCOPE)
++  elseif(${LoongArch64})
++    set(${return_arch_string} "LoongArch64" PARENT_SCOPE)
+   else()
+     set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE)
+     libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}")
+diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h
+index ce00362..06fd323 100644
+--- a/runtime/src/kmp_affinity.h
++++ b/runtime/src/kmp_affinity.h
+@@ -254,6 +254,18 @@ public:
+ #elif __NR_sched_getaffinity != 5196
+ #error Wrong code for getaffinity system call.
+ #endif /* __NR_sched_getaffinity */
++#elif KMP_ARCH_LOONGARCH64
++#ifndef __NR_sched_setaffinity
++#define __NR_sched_setaffinity 122
++#elif __NR_sched_setaffinity != 122
++#error Wrong code for setaffinity system call.
++#endif /* __NR_sched_setaffinity */
++#ifndef __NR_sched_getaffinity
++#define __NR_sched_getaffinity 123
++#elif __NR_sched_getaffinity != 123
++#error Wrong code for getaffinity system call.
++#endif /* __NR_sched_getaffinity */
++#else
+ #error Unknown or unsupported architecture
+ #endif /* KMP_ARCH_* */
+ #elif KMP_OS_FREEBSD
+diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp
+index e263558..ca42148 100644
+--- a/runtime/src/kmp_csupport.cpp
++++ b/runtime/src/kmp_csupport.cpp
+@@ -695,7 +695,7 @@ void __kmpc_flush(ident_t *loc) {
+   }
+ #endif // KMP_MIC
+ #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
+-       KMP_ARCH_RISCV64)
++       KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64)
+ // Nothing to see here move along
+ #elif KMP_ARCH_PPC64
+ // Nothing needed here (we have a real MB above).
+diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
+index d71e9ae..33eb269 100644
+--- a/runtime/src/kmp_os.h
++++ b/runtime/src/kmp_os.h
+@@ -167,7 +167,7 @@ typedef unsigned long long kmp_uint64;
+ #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
+ #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
+ #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                 \
+-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
++    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
+ #else
+ #error "Can't determine size_t printf format specifier."
+@@ -1012,7 +1012,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
+ #endif /* KMP_OS_WINDOWS */
+ 
+ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS ||     \
+-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
++    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ #if KMP_OS_WINDOWS
+ #undef KMP_MB
+ #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
+diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h
+index bbbd72d..6392d17 100644
+--- a/runtime/src/kmp_platform.h
++++ b/runtime/src/kmp_platform.h
+@@ -92,6 +92,7 @@
+ #define KMP_ARCH_MIPS 0
+ #define KMP_ARCH_MIPS64 0
+ #define KMP_ARCH_RISCV64 0
++#define KMP_ARCH_LOONGARCH64 0
+ 
+ #if KMP_OS_WINDOWS
+ #if defined(_M_AMD64) || defined(__x86_64)
+@@ -135,6 +136,9 @@
+ #elif defined __riscv && __riscv_xlen == 64
+ #undef KMP_ARCH_RISCV64
+ #define KMP_ARCH_RISCV64 1
++#elif defined __loongarch__ && defined __loongarch64
++#undef KMP_ARCH_LOONGARCH64
++#define KMP_ARCH_LOONGARCH64 1
+ #endif
+ #endif
+ 
+@@ -199,7 +203,7 @@
+ // TODO: Fixme - This is clever, but really fugly
+ #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 +     \
+               KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 +             \
+-              KMP_ARCH_RISCV64)
++              KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64)
+ #error Unknown or unsupported architecture
+ #endif
+ 
+diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
+index e1af2f4..9dbc196 100644
+--- a/runtime/src/kmp_runtime.cpp
++++ b/runtime/src/kmp_runtime.cpp
+@@ -8723,7 +8723,7 @@ __kmp_determine_reduction_method(
+     int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
+ 
+ #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                   \
+-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
++    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ 
+ #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+     KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
+diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+index a452b76..d6d2cb0 100644
+--- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
++++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+@@ -162,6 +162,10 @@
+ #define ITT_ARCH_ARM64 6
+ #endif /* ITT_ARCH_ARM64 */
+ 
++#ifndef ITT_ARCH_LOONGARCH64
++#define ITT_ARCH_LOONGARCH64 7
++#endif /* ITT_ARCH_LOONGARCH64 */
++
+ #ifndef ITT_ARCH
+ #if defined _M_IX86 || defined __i386__
+ #define ITT_ARCH ITT_ARCH_IA32
+@@ -175,6 +179,8 @@
+ #define ITT_ARCH ITT_ARCH_ARM64
+ #elif defined __powerpc64__
+ #define ITT_ARCH ITT_ARCH_PPC64
++#elif defined __loongarch__ && defined __loongarch64
++#define ITT_ARCH ITT_ARCH_LOONGARCH64
+ #endif
+ #endif
+ 
+diff --git a/runtime/src/z_Linux_asm.S b/runtime/src/z_Linux_asm.S
+index b4a45c1..4f80a81 100644
+--- a/runtime/src/z_Linux_asm.S
++++ b/runtime/src/z_Linux_asm.S
+@@ -1725,6 +1725,157 @@ __kmp_invoke_microtask:
+ 
+ #endif /* KMP_ARCH_RISCV64 */
+ 
++#if KMP_ARCH_LOONGARCH64
++
++//------------------------------------------------------------------------
++//
++// typedef void        (*microtask_t)( int *gtid, int *tid, ... );
++//
++// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
++//                            void *p_argv[]
++// #if OMPT_SUPPORT
++//                            ,
++//                            void **exit_frame_ptr
++// #endif
++//                            ) {
++// #if OMPT_SUPPORT
++//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
++// #endif
++//    (*pkfn)( & gtid, & tid, argv[0], ... );
++//
++//    return 1;
++// }
++//
++// parameters:
++//     a0:     pkfn
++//     a1:     gtid
++//     a2:     tid
++//     a3:     argc
++//     a4:     p_argv
++//     a5:     exit_frame_ptr
++//
++// Temp. registers:
++//
++//  t0: used to calculate the dynamic stack size / used to hold pkfn address
++//  t1: used as temporary for stack placement calculation
++//  t2: used as temporary for stack arguments
++//  t3: used as temporary for number of remaining pkfn parms
++//  t4: used to traverse p_argv array
++//
++// return:     a0      (always 1/TRUE)
++//
++
++// -- Begin __kmp_invoke_microtask
++// mark_begin;
++       .text
++       .globl  __kmp_invoke_microtask
++       .p2align        3
++       .type   __kmp_invoke_microtask,@function
++__kmp_invoke_microtask:
++
++       // First, save ra and fp
++       addi.d  $sp, $sp, -16
++       st.d    $ra, $sp, 8
++       st.d    $fp, $sp, 0
++       addi.d  $fp, $sp, 16
++
++       // Compute the dynamic stack size:
++       //
++       // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
++       //   reference
++       // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
++       //   function by register. Given that we have 8 of such registers (a[0-7])
++       //   and two + 'argc' arguments (consider &gtid and &tid), we need to
++       //   reserve max(0, argc - 6)*8 ext$ra bytes
++       //
++       // The total number of bytes is then max(0, argc - 6)*8 + 8
++
++       // Compute max(0, argc - 6) using the following bithack:
++       // max(0, x) = x - (x & (x >> 31?63)), where x := argc - 6
++       // Source: http://g$raphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
++       addi.d  $t0, $a3, -6
++       srai.d  $t1, $t0, 63
++       and             $t1, $t0, $t1
++       sub.d   $t0, $t0, $t1
++
++       addi.d  $t0, $t0, 1
++
++       slli.d  $t0, $t0, 3             // t0: total number of bytes for stack storing arguments.
++       sub.d   $sp, $sp, $t0
++
++       move    $t0, $a0
++       move    $t3, $a3
++       move    $t4, $a4
++
++#if OMPT_SUPPORT
++       // Save frame pointer into exit_frame
++       st.d    $fp, $a5, 0
++#endif
++
++       // Prepare arguments for the pkfn function (first 8 using $a0-$a7 registers)
++
++       st.w    $a1, $fp, -20   // gtid
++       st.w    $a2, $fp, -24   // tid
++
++       addi.d  $a0, $fp, -20   // &gtid
++       addi.d  $a1, $fp, -24   // &tid
++
++       beqz    $t3, .L_kmp_3
++       ld.d    $a2, $t4, 0             // argv[0]
++
++       addi.d  $t3, $t3, -1
++       beqz    $t3, .L_kmp_3
++       ld.d    $a3, $t4, 8             // argv[1]
++
++       addi.d  $t3, $t3, -1
++       beqz    $t3, .L_kmp_3
++       ld.d    $a4, $t4, 16    // argv[2]
++
++       addi.d  $t3, $t3, -1
++       beqz    $t3, .L_kmp_3
++       ld.d    $a5, $t4, 24    // argv[3]
++
++       addi.d  $t3, $t3, -1
++       beqz    $t3, .L_kmp_3
++       ld.d    $a6, $t4, 32    // argv[4]
++
++       addi.d  $t3, $t3, -1
++       beqz    $t3, .L_kmp_3
++       ld.d    $a7, $t4, 40    // argv[5]
++
++       // Prepare any additional argument passed through the stack
++       addi.d  $t4, $t4, 48
++       move    $t1, $sp
++       b .L_kmp_2
++.L_kmp_1:
++       ld.d    $t2, $t4, 0
++       st.d    $t2, $t1, 0
++       addi.d  $t4, $t4, 8
++       addi.d  $t1, $t1, 8
++.L_kmp_2:
++       addi.d  $t3, $t3, -1
++       bnez    $t3, .L_kmp_1
++
++.L_kmp_3:
++       // Call pkfn function
++       jirl    $ra, $t0, 0
++
++       // Restore stack and return
++
++       addi.d  $a0, $zero, 1
++
++       addi.d  $sp, $fp, -16
++       ld.d    $fp, $sp, 0
++       ld.d    $ra, $sp, 8
++       addi.d  $sp, $sp, 16
++       jr              $ra
++.Lfunc_end0:
++       .size   __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
++
++// -- End  __kmp_invoke_microtask
++
++#endif /* KMP_ARCH_LOONGARCH64 */
++
+ #if KMP_ARCH_ARM || KMP_ARCH_MIPS
+     .data
+     .comm .gomp_critical_user_,32,8
+@@ -1736,7 +1887,8 @@ __kmp_unnamed_critical_addr:
+     .size __kmp_unnamed_critical_addr,4
+ #endif /* KMP_ARCH_ARM */
+ 
+-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
++#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||                   \
++    KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ #ifndef KMP_PREFIX_UNDERSCORE
+ # define KMP_PREFIX_UNDERSCORE(x) x
+ #endif
+@@ -1751,7 +1903,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
+     .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
+ #endif
+ #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
+-          KMP_ARCH_RISCV64 */
++          KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */
+ 
+ #if KMP_OS_LINUX
+ # if KMP_ARCH_ARM
+diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp
+index 5cd6ad6..bdadbd6 100644
+--- a/runtime/src/z_Linux_util.cpp
++++ b/runtime/src/z_Linux_util.cpp
+@@ -2441,7 +2441,7 @@ finish: // Clean up and exit.
+ 
+ #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
+       ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
+-      KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
++      KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64)
+ 
+ // we really only need the case with 1 argument, because CLANG always build
+ // a struct of pointers to shared variables referenced in the outlined function
+diff --git a/runtime/test/atomic/kmp_atomic_float10_max_min.c b/runtime/test/atomic/kmp_atomic_float10_max_min.c
+index 4a833e7..cc54e1a 100644
+--- a/runtime/test/atomic/kmp_atomic_float10_max_min.c
++++ b/runtime/test/atomic/kmp_atomic_float10_max_min.c
+@@ -1,6 +1,6 @@
+ // RUN: %libomp-compile -mlong-double-80 && %libomp-run
+ // UNSUPPORTED: gcc
+-// UNSUPPORTED: powerpc
++// REQUIRES: x86-registered-target
+ 
+ #include <stdio.h>
+ #include <omp.h>
+diff --git a/runtime/tools/lib/Platform.pm b/runtime/tools/lib/Platform.pm
+index 38593a1..2d262ae 100644
+--- a/runtime/tools/lib/Platform.pm
++++ b/runtime/tools/lib/Platform.pm
+@@ -63,6 +63,8 @@ sub canon_arch($) {
+             $arch = "mips";
+         } elsif ( $arch =~ m{\Ariscv64} ) {
+             $arch = "riscv64";
++        } elsif ( $arch =~ m{\Aloongarch64} ) {
++            $arch = "loongarch64";
+         } else {
+             $arch = undef;
+         }; # if
+@@ -97,6 +99,7 @@ sub canon_mic_arch($) {
+         "mips" => "MIPS",
+         "mips64" => "MIPS64",
+         "riscv64" => "RISC-V (64-bit)",
++        "loongarch64" => "LoongArch64",
+     );
+ 
+     sub legal_arch($) {
+@@ -119,6 +122,7 @@ sub canon_mic_arch($) {
+         "mic" => "intel64",
+         "mips" => "mips",
+         "mips64" => "MIPS64",
++        "loongarch64" => "loongarch64",
+     );
+ 
+     sub arch_opt($) {
+@@ -225,6 +229,8 @@ sub target_options() {
+         $_host_arch = "mips";
+     } elsif ( $hardware_platform eq "riscv64" ) {
+         $_host_arch = "riscv64";
++    } elsif ( $hardware_platform eq "loongarch64" ) {
++        $_host_arch = "loongarch64";
+     } else {
+         die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
+     }; # if
+@@ -414,7 +420,7 @@ the script assumes host architecture is target one.
+ 
+ Input string is an architecture name to canonize. The function recognizes many variants, for example:
+ C<32e>, C<Intel64>, C<Intel(R) 64>, etc. Returned string is a canonized architecture name,
+-one of: C<32>, C<32e>, C<64>, C<arm>, C<ppc64le>, C<ppc64>, C<mic>, C<mips>, C<mips64>, C<riscv64> or C<undef> is input string is not recognized.
++one of: C<32>, C<32e>, C<64>, C<arm>, C<ppc64le>, C<ppc64>, C<mic>, C<mips>, C<mips64>, C<riscv64>,  C<loongarch64> or C<undef> is input string is not recognized.
+ 
+ =item B<legal_arch( $arch )>
+ 
+diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm
+index 99fe1cd..8a976ad 100644
+--- a/runtime/tools/lib/Uname.pm
++++ b/runtime/tools/lib/Uname.pm
+@@ -158,6 +158,8 @@ if ( 0 ) {
+         $values{ hardware_platform } = "mips";
+     } elsif ( $values{ machine } =~ m{\Ariscv64\z} ) {
+         $values{ hardware_platform } = "riscv64";
++    } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) {
++        $values{ hardware_platform } = "loongarch64";
+     } else {
+         die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
+     }; # if
diff --git a/openpgp-ca/PKGBUILD b/openpgp-ca/PKGBUILD
index 05318f8643..3b902d4b85 100644
--- a/openpgp-ca/PKGBUILD
+++ b/openpgp-ca/PKGBUILD
@@ -47,7 +47,7 @@ b2sums=('86cdf27447c003415348705745990899c507b326bc2bc191302cd32f2d6df23b28bfe25
 prepare() {
   cd $pkgbase-$pkgbase-v$pkgver
   # NOTE: add --locked as soon as upstream has split out the components
-  cargo fetch --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/openpgp-card-tools/PKGBUILD b/openpgp-card-tools/PKGBUILD
index c6ec64600c..f6aa23d357 100644
--- a/openpgp-card-tools/PKGBUILD
+++ b/openpgp-card-tools/PKGBUILD
@@ -26,7 +26,7 @@ b2sums=('1fc6e02038d7cd9ba561c00f0c9b11cd27f282931b1fa70cb2436917224a1fd63c0e130
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/opus/PKGBUILD b/opus/PKGBUILD
index e547ef53c1..b6da5ca882 100644
--- a/opus/PKGBUILD
+++ b/opus/PKGBUILD
@@ -39,6 +39,8 @@ build() {
   local meson_options=(
     -D asm=disabled
     -D custom-modes=true
+    -D rtcd=disabled
+    -D intrinsics=disabled
   )
 
   arch-meson opus build "${meson_options[@]}"
diff --git a/ouch/PKGBUILD b/ouch/PKGBUILD
index 4b1c639a1f..40c2a176a5 100644
--- a/ouch/PKGBUILD
+++ b/ouch/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/paccat/PKGBUILD b/paccat/PKGBUILD
index 9bac22d73a..045363b5c3 100644
--- a/paccat/PKGBUILD
+++ b/paccat/PKGBUILD
@@ -15,7 +15,7 @@ sha256sums=('f4478240063a00500a9fb45a1571b24519901fae86e5dddffa9f59191fcbada5')
 prepare() {
   cd "$pkgname-$pkgver"
   export RUSTUP_TOOLCHAIN=stable
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build () {
diff --git a/pacman-bintrans/PKGBUILD b/pacman-bintrans/PKGBUILD
index 75a5578e78..32e539c294 100644
--- a/pacman-bintrans/PKGBUILD
+++ b/pacman-bintrans/PKGBUILD
@@ -22,7 +22,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgbase}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/pacman-contrib/PKGBUILD b/pacman-contrib/PKGBUILD
index a856c9afde..19b4cf8694 100644
--- a/pacman-contrib/PKGBUILD
+++ b/pacman-contrib/PKGBUILD
@@ -18,12 +18,15 @@ optdepends=(
   'vim: default merge program for pacdiff'
 )
 source=("git+$url.git#tag=v$pkgver")
+source+=(pkgbuild-vim-la64.patch)
 b2sums=('SKIP')
+b2sums+=('2cc52b392ef20be4b32a273c6ba8c7d1616e846e2a00dc846b1238bb5db973d25b8873f1e75ef4e2289a50860ec1b5a86208c35d6699e6cf547116d224cef52e')
 validpgpkeys=('04DC3FB1445FECA813C27EFAEA4F7B321A906AD9') # Daniel M. Capella <polyzen@archlinux.org>
 #             '5134EF9EAF65F95B6BB1608E50FB9B273A9D0BB5')  # Johannes Löthberg <johannes@kyriasis.com>
 
 prepare() {
   cd $pkgname
+  patch -p1 -i $srcdir/pkgbuild-vim-la64.patch
   ./autogen.sh
 }
 
diff --git a/pacman-contrib/pkgbuild-vim-la64.patch b/pacman-contrib/pkgbuild-vim-la64.patch
new file mode 100644
index 0000000000..6e66399ca5
--- /dev/null
+++ b/pacman-contrib/pkgbuild-vim-la64.patch
@@ -0,0 +1,13 @@
+diff --git a/src/vim/syntax/PKGBUILD.vim b/src/vim/syntax/PKGBUILD.vim
+index e5c6c49..a1cbd33 100644
+--- a/src/vim/syntax/PKGBUILD.vim
++++ b/src/vim/syntax/PKGBUILD.vim
+@@ -81,7 +81,7 @@ syn region pbBackupGroup start=/^backup=(/ end=/)/ contains=pb_k_backup,pbValidB
+ 
+ " arch
+ syn keyword pb_k_arch arch contained
+-syn keyword pbArch i686 x86_64 ppc pentium4 armv7h aarch64 any contained
++syn keyword pbArch i686 x86_64 loong64 ppc pentium4 armv7h aarch64 any contained
+ syn match pbIllegalArch /[^='"() ]/ contained contains=pbArch
+ syn region pbArchGroup start=/^arch=(/ end=/)/ contains=pb_k_arch,pbArch,pbIllegalArch,pbComment
+ 
diff --git a/pari/PKGBUILD b/pari/PKGBUILD
index 4028c96c5c..31f57a55a5 100644
--- a/pari/PKGBUILD
+++ b/pari/PKGBUILD
@@ -35,7 +35,7 @@ build() {
     --mt=pthread \
     --with-gmp
   make all
-  make -C Olinux-x86_64 gp-sta
+#  make -C Olinux-x86_64 gp-sta
 }
 
 check() {
@@ -46,7 +46,7 @@ check() {
 package() {
   cd $pkgname-$pkgver
   make DESTDIR="$pkgdir" install
-  make DESTDIR="$pkgdir" -C Olinux-x86_64 install-bin-sta
+#  make DESTDIR="$pkgdir" -C Olinux-x86_64 install-bin-sta
   ln -sf gp.1.gz "$pkgdir"/usr/share/man/man1/pari.1
   chrpath -d "$pkgdir"/usr/bin/gp-*
 }
diff --git a/pastel/PKGBUILD b/pastel/PKGBUILD
index e30b45508e..13a07b127e 100644
--- a/pastel/PKGBUILD
+++ b/pastel/PKGBUILD
@@ -30,7 +30,7 @@ prepare() {
   cd "$pkgbase"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/perl-image-sane/PKGBUILD b/perl-image-sane/PKGBUILD
index 5937fb5760..4d868585a1 100644
--- a/perl-image-sane/PKGBUILD
+++ b/perl-image-sane/PKGBUILD
@@ -19,7 +19,7 @@ conflicts=('perl-sane')
 source=("https://www.cpan.org/modules/by-module/${_perl_namespace}/${_perl_namespace}-${_perl_module}-${pkgver}.tar.gz"
          https://rt.cpan.org/Public/Ticket/Attachment/2538823/1107284/Image-Sane-5-Replace-deprecated-given-and-when-operators.patch)
 sha256sums=('229aa0e9f049efa760f3c2f6e61d9d539af43d8f764b50a6e03064b4729a35ff'
-            'b81caa036aabe4bcb67ca2729854c8e1dabb62d17b17214e41c930937edbf488')
+            'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')
 options=('!emptydirs')
 
 prepare() {
diff --git a/phonon/PKGBUILD b/phonon/PKGBUILD
index 1d63c5971d..eb82973303 100644
--- a/phonon/PKGBUILD
+++ b/phonon/PKGBUILD
@@ -6,7 +6,7 @@ pkgname=(phonon-qt5
          phonon-qt6)
 pkgdesc='The multimedia framework by KDE'
 pkgver=4.12.0
-pkgrel=3
+pkgrel=4
 arch=(loong64 x86_64)
 url='https://community.kde.org/Phonon'
 license=(LGPL)
diff --git a/pipe-rename/PKGBUILD b/pipe-rename/PKGBUILD
index 2e155b5c1d..9d7ffe78f3 100644
--- a/pipe-rename/PKGBUILD
+++ b/pipe-rename/PKGBUILD
@@ -29,7 +29,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/pixman/83.patch b/pixman/83.patch
new file mode 100644
index 0000000000..34c100bbe7
--- /dev/null
+++ b/pixman/83.patch
@@ -0,0 +1,11078 @@
+From 4e95946d115af4aa0a358af963982f9e0c32d39c Mon Sep 17 00:00:00 2001
+From: Song Ding <songding@loongson.cn>
+Date: Fri, 25 Aug 2023 16:13:27 +0800
+Subject: [PATCH] LoongArch:     Add LoongArch SIMD support.     Add LSX and
+ LASX optimizations.
+
+Benchmark results, before is upstream/master 47d3fbe38fc88085e644b737f3eff92865ebd65a,
+
+LSX build: ./autogen.sh --disable-lasx && make -j4
+LASX build: ./autogen.sh && make -j4
+
+For example, the highest improvement is add_n_888.
+
+./tests/lowlevel-blt-bench add_n_888
+
+before: add_n_8 =  L1: 186.07  L2: 200.18  M:198.43 (  1.41%)  HT:161.37  VT:156.22  R:156.65  RT:103.67 ( 654Kops/s)
+LSX:    add_n_8 =  L1:13782.81  L2:21067.23  M:14209.75 ( 98.95%)  HT:1712.74  VT:3345.05  R:1661.89  RT:469.35 (2054Kops/s)
+LASX:   add_n_8 =  L1:13034.63  L2:19725.46  M:16530.90 (117.71%)  HT:1104.39  VT:2264.26  R:1077.33  RT:442.79 (2020Kops/s)
+
+./test/lowlevel-blt-bench all, 10 iterations:
+
+2.5 GHz LoongArch 3A5000, Linux, 64-bit, MEAN:
+         LSX         LASX
+L1    +336.97%     +488.91%
+L2    +340.57%     +484.78%
+M     +307.29%     +420.48%
+HT    +214.05%     +225.17%
+VT    +201.28%     +208.94%
+R     +202.48%     +213.19%
+RT    +146.14%     +140.95%
+---
+ meson.build                    |   54 +
+ meson_options.txt              |   10 +
+ pixman/loongson_intrinsics.h   | 2085 ++++++++++++++
+ pixman/meson.build             |    3 +
+ pixman/pixman-implementation.c |    1 +
+ pixman/pixman-lasx.c           | 4887 ++++++++++++++++++++++++++++++++
+ pixman/pixman-loongarch.c      |   94 +
+ pixman/pixman-lsx.c            | 3783 ++++++++++++++++++++++++
+ pixman/pixman-private.h        |   19 +
+ 9 files changed, 10936 insertions(+)
+ create mode 100644 pixman/loongson_intrinsics.h
+ create mode 100644 pixman/pixman-lasx.c
+ create mode 100644 pixman/pixman-loongarch.c
+ create mode 100644 pixman/pixman-lsx.c
+
+diff --git a/meson.build b/meson.build
+index f822fb5..6f9eac4 100644
+--- a/meson.build
++++ b/meson.build
+@@ -89,6 +89,60 @@ elif use_loongson_mmi.enabled()
+   error('Loongson MMI Support unavailable, but required')
+ endif
+ 
++use_lsx = get_option('lsx')
++have_lsx = false
++lsx_flags = ['-mlsx']
++if not use_lsx.disabled()
++  if host_machine.cpu_family() == 'loongarch64' and cc.compiles('''
++      #ifndef __loongarch__
++      #error "LSX is only available on LoongArch"
++      #endif
++      #include <lsxintrin.h>
++      int main () {
++          __m128i tmp0, tmp1;
++          tmp0 = __lsx_vadd_w(tmp0, tmp1);
++          return 0;
++      }''',
++      args : lsx_flags,
++      include_directories : include_directories('.'),
++      name : 'LoongArch LSX Intrinsic Support')
++    have_lsx = true
++  endif
++endif
++
++if have_lsx
++  config.set10('USE_LOONGARCH_LSX', true)
++elif use_lsx.enabled()
++  error('LoongArch LSX Support unavailable, but required')
++endif
++
++use_lasx = get_option('lasx')
++have_lasx = false
++lasx_flags = ['-mlasx']
++if not use_lasx.disabled()
++  if host_machine.cpu_family() == 'loongarch64' and cc.compiles('''
++      #ifndef __loongarch__
++      #error "LASX is only available on LoongArch"
++      #endif
++      #include <lasxintrin.h>
++      int main () {
++          __m256i tmp0, tmp1;
++          tmp0 = __lasx_xvadd_w(tmp0, tmp1);
++          return 0;
++      }''',
++      args : lasx_flags,
++      include_directories : include_directories('.'),
++      name : 'LoongArch LASX Intrinsic Support')
++    have_lasx = true
++  endif
++endif
++
++if have_lasx
++  config.set10('USE_LOONGARCH_LASX', true)
++elif use_lasx.enabled()
++  error('LoongArch LASX Support unavailable, but required')
++endif
++
+ use_mmx = get_option('mmx')
+ have_mmx = false
+ mmx_flags = []
+diff --git a/meson_options.txt b/meson_options.txt
+index df10889..05962be 100644
+--- a/meson_options.txt
++++ b/meson_options.txt
+@@ -23,6 +23,16 @@ option(
+   type : 'feature',
+   description : 'Use Loongson MMI intrinsic optimized paths',
+ )
++option(
++  'lsx',
++  type : 'feature',
++  description : 'Use LoongArch LSX intrinsic optimized paths',
++)
++option(
++  'lasx',
++  type : 'feature',
++  description : 'Use LoongArch LASX intrinsic optimized paths',
++)
+ option(
+   'mmx',
+   type : 'feature',
+diff --git a/pixman/loongson_intrinsics.h b/pixman/loongson_intrinsics.h
+new file mode 100644
+index 0000000..b692308
+--- /dev/null
++++ b/pixman/loongson_intrinsics.h
+@@ -0,0 +1,2085 @@
++/*
++ * Copyright (c) 2021 Loongson Technology Corporation Limited
++ * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
++ *                Xiwei Gu   <guxiwei-hf@loongson.cn>
++ *                Lu Wang    <wanglu@loongson.cn>
++ *
++ * Permission to use, copy, modify, and/or distribute this software for any
++ * purpose with or without fee is hereby granted, provided that the above
++ * copyright notice and this permission notice appear in all copies.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
++ *
++ */
++
++#ifndef LOONGSON_INTRINSICS_H
++#define LOONGSON_INTRINSICS_H
++
++/*
++ * This file is a header file of LoongArch builtin extension and
++ * available under ISC license. It provides a large number of macros
++ * to simplify writing LSX and LASX builtin optimizations.
++ *
++ * Any one can modify it or add new features for his/her own purposes.
++ * Contributing a patch will be appreciated as it might be useful for
++ * others as well. Send patches to loongson contributor mentioned above.
++ *
++ * MAJOR version: Usage changes, incompatible with previous version.
++ * MINOR version: Add new macros/functions, or bug fixes.
++ * MICRO version: Comment changes or implementation changes.
++ */
++#define LML_VERSION_MAJOR 1
++#define LML_VERSION_MINOR 2
++#define LML_VERSION_MICRO 2
++
++#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1) \
++  {                                               \
++    _OUT0 = _INS(_IN0);                           \
++    _OUT1 = _INS(_IN1);                           \
++  }
++
++#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1) \
++  {                                                           \
++    _OUT0 = _INS(_IN0, _IN1);                                 \
++    _OUT1 = _INS(_IN2, _IN3);                                 \
++  }
++
++#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1) \
++  {                                                                       \
++    _OUT0 = _INS(_IN0, _IN1, _IN2);                                       \
++    _OUT1 = _INS(_IN3, _IN4, _IN5);                                       \
++  }
++
++#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3) \
++  {                                                                         \
++    DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1);                              \
++    DUP2_ARG1(_INS, _IN2, _IN3, _OUT2, _OUT3);                              \
++  }
++
++#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, \
++                  _OUT1, _OUT2, _OUT3)                                         \
++  {                                                                            \
++    DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1);                     \
++    DUP2_ARG2(_INS, _IN4, _IN5, _IN6, _IN7, _OUT2, _OUT3);                     \
++  }
++
++#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, \
++                  _IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3)             \
++  {                                                                           \
++    DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1);        \
++    DUP2_ARG3(_INS, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT2, _OUT3);      \
++  }
++
++#ifdef __loongarch_sx
++#include <lsxintrin.h>
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs  - in_c, in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Signed byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input. Then
++ *               the results are added to signed half-word elements from in_c.
++ * Example     : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
++ *        in_c : 1,2,3,4, 1,2,3,4
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
++ *         out : 23,40,41,26, 23,40,41,26
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2add_h_b(__m128i in_c, __m128i in_h,
++                                        __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmaddwev_h_b(in_c, in_h, in_l);
++  out = __lsx_vmaddwod_h_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs  - in_c, in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied by
++ *               unsigned byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ *               The results are added to signed half-word elements from in_c.
++ * Example     : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l)
++ *        in_c : 1,2,3,4, 1,2,3,4
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
++ *         out : 23,40,41,26, 23,40,41,26
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c, __m128i in_h,
++                                         __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l);
++  out = __lsx_vmaddwod_h_bu(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs  - in_c, in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ *               The results are added to signed half-word elements from in_c.
++ * Example     : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l)
++ *        in_c : 1,1,1,1, 1,1,1,1
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8
++ *         out : -4,-24,-60,-112, 6,26,62,114
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, __m128i in_h,
++                                           __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l);
++  out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of half-word vector elements
++ * Arguments   : Inputs  - in_c, in_h, in_l
++ *               Outputs - out
++ *               Return Type - __m128i
++ * Details     : Signed half-word elements from in_h are multiplied by
++ *               signed half-word elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ *               Then the results are added to signed word elements from in_c.
++ * Example     : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
++ *        in_c : 1,2,3,4
++ *        in_h : 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1
++ *         out : 23,40,41,26
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2add_w_h(__m128i in_c, __m128i in_h,
++                                        __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmaddwev_w_h(in_c, in_h, in_l);
++  out = __lsx_vmaddwod_w_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs  - in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Signed byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ * Example     : out = __lsx_vdp2_h_b(in_h, in_l)
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
++ *         out : 22,38,38,22, 22,38,38,22
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmulwev_h_b(in_h, in_l);
++  out = __lsx_vmaddwod_h_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs  - in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied by
++ *               unsigned byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ * Example     : out = __lsx_vdp2_h_bu(in_h, in_l)
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
++ *         out : 22,38,38,22, 22,38,38,22
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmulwev_h_bu(in_h, in_l);
++  out = __lsx_vmaddwod_h_bu(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs  - in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ * Example     : out = __lsx_vdp2_h_bu_b(in_h, in_l)
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1
++ *         out : 22,38,38,22, 22,38,38,6
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmulwev_h_bu_b(in_h, in_l);
++  out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs  - in_h, in_l
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Signed byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ * Example     : out = __lsx_vdp2_w_h(in_h, in_l)
++ *        in_h : 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1
++ *         out : 22,38,38,22
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmulwev_w_h(in_h, in_l);
++  out = __lsx_vmaddwod_w_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs  - in_h, in_l
++ *               Outputs - out
++ *               Return Type - double
++ * Details     : Signed byte elements from in_h are multiplied by
++ *               signed byte elements from in_l, and then added adjacent to
++ *               each other to get a result twice the size of input.
++ * Example     : out = __lsx_vdp2_d_w(in_h, in_l)
++ *        in_h : 1,2,3,4
++ *        in_l : 8,7,6,5
++ *         out : 22,38
++ * =============================================================================
++ */
++static inline __m128i __lsx_vdp2_d_w(__m128i in_h, __m128i in_l) {
++  __m128i out;
++
++  out = __lsx_vmulwev_d_w(in_h, in_l);
++  out = __lsx_vmaddwod_d_w(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Clip all halfword elements of input vector between min & max
++ *               out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) :
++ *               (_in))
++ * Arguments   : Inputs  - _in  (input vector)
++ *                       - min  (min threshold)
++ *                       - max  (max threshold)
++ *               Outputs - out  (output vector with clipped elements)
++ *               Return Type - signed halfword
++ * Example     : out = __lsx_vclip_h(_in)
++ *         _in : -8,2,280,249, -8,255,280,249
++ *         min : 1,1,1,1, 1,1,1,1
++ *         max : 9,9,9,9, 9,9,9,9
++ *         out : 1,2,9,9, 1,9,9,9
++ * =============================================================================
++ */
++static inline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) {
++  __m128i out;
++
++  out = __lsx_vmax_h(min, _in);
++  out = __lsx_vmin_h(max, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Set each element of vector between 0 and 255
++ * Arguments   : Inputs  - _in
++ *               Outputs - out
++ *               Return Type - halfword
++ * Details     : Signed byte elements from _in are clamped between 0 and 255.
++ * Example     : out = __lsx_vclip255_h(_in)
++ *         _in : -8,255,280,249, -8,255,280,249
++ *         out : 0,255,255,249, 0,255,255,249
++ * =============================================================================
++ */
++static inline __m128i __lsx_vclip255_h(__m128i _in) {
++  __m128i out;
++
++  out = __lsx_vmaxi_h(_in, 0);
++  out = __lsx_vsat_hu(out, 7);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Set each element of vector between 0 and 255
++ * Arguments   : Inputs  - _in
++ *               Outputs - out
++ *               Return Type - word
++ * Details     : Signed byte elements from _in are clamped between 0 and 255.
++ * Example     : out = __lsx_vclip255_w(_in)
++ *         _in : -8,255,280,249
++ *         out : 0,255,255,249
++ * =============================================================================
++ */
++static inline __m128i __lsx_vclip255_w(__m128i _in) {
++  __m128i out;
++
++  out = __lsx_vmaxi_w(_in, 0);
++  out = __lsx_vsat_wu(out, 7);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Swap two variables
++ * Arguments   : Inputs  - _in0, _in1
++ *               Outputs - _in0, _in1 (in-place)
++ * Details     : Swapping of two input variables using xor
++ * Example     : LSX_SWAP(_in0, _in1)
++ *        _in0 : 1,2,3,4
++ *        _in1 : 5,6,7,8
++ *   _in0(out) : 5,6,7,8
++ *   _in1(out) : 1,2,3,4
++ * =============================================================================
++ */
++#define LSX_SWAP(_in0, _in1)         \
++  {                                  \
++    _in0 = __lsx_vxor_v(_in0, _in1); \
++    _in1 = __lsx_vxor_v(_in0, _in1); \
++    _in0 = __lsx_vxor_v(_in0, _in1); \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 4x4 block with word elements in vectors
++ * Arguments   : Inputs  - in0, in1, in2, in3
++ *               Outputs - out0, out1, out2, out3
++ * Details     :
++ * Example     :
++ *               1, 2, 3, 4            1, 5, 9,13
++ *               5, 6, 7, 8    to      2, 6,10,14
++ *               9,10,11,12  =====>    3, 7,11,15
++ *              13,14,15,16            4, 8,12,16
++ * =============================================================================
++ */
++#define LSX_TRANSPOSE4x4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                            \
++    __m128i _t0, _t1, _t2, _t3;                                                \
++                                                                               \
++    _t0 = __lsx_vilvl_w(_in1, _in0);                                           \
++    _t1 = __lsx_vilvh_w(_in1, _in0);                                           \
++    _t2 = __lsx_vilvl_w(_in3, _in2);                                           \
++    _t3 = __lsx_vilvh_w(_in3, _in2);                                           \
++    _out0 = __lsx_vilvl_d(_t2, _t0);                                           \
++    _out1 = __lsx_vilvh_d(_t2, _t0);                                           \
++    _out2 = __lsx_vilvl_d(_t3, _t1);                                           \
++    _out3 = __lsx_vilvh_d(_t3, _t1);                                           \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 8x8 block with byte elements in vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
++ *               Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
++ *               _out7
++ * Details     : The rows of the matrix become columns, and the columns
++ *               become rows.
++ * Example     : LSX_TRANSPOSE8x8_B
++ *        _in0 : 00,01,02,03,04,05,06,07, 00,00,00,00,00,00,00,00
++ *        _in1 : 10,11,12,13,14,15,16,17, 00,00,00,00,00,00,00,00
++ *        _in2 : 20,21,22,23,24,25,26,27, 00,00,00,00,00,00,00,00
++ *        _in3 : 30,31,32,33,34,35,36,37, 00,00,00,00,00,00,00,00
++ *        _in4 : 40,41,42,43,44,45,46,47, 00,00,00,00,00,00,00,00
++ *        _in5 : 50,51,52,53,54,55,56,57, 00,00,00,00,00,00,00,00
++ *        _in6 : 60,61,62,63,64,65,66,67, 00,00,00,00,00,00,00,00
++ *        _in7 : 70,71,72,73,74,75,76,77, 00,00,00,00,00,00,00,00
++ *
++ *      _ out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
++ *      _ out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
++ *      _ out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00
++ *      _ out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00
++ *      _ out4 : 04,14,24,34,44,54,64,74, 00,00,00,00,00,00,00,00
++ *      _ out5 : 05,15,25,35,45,55,65,75, 00,00,00,00,00,00,00,00
++ *      _ out6 : 06,16,26,36,46,56,66,76, 00,00,00,00,00,00,00,00
++ *      _ out7 : 07,17,27,37,47,57,67,77, 00,00,00,00,00,00,00,00
++ * =============================================================================
++ */
++#define LSX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    __m128i zero = { 0 };                                                   \
++    __m128i shuf8 = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 };             \
++    __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;                         \
++                                                                            \
++    _t0 = __lsx_vilvl_b(_in2, _in0);                                        \
++    _t1 = __lsx_vilvl_b(_in3, _in1);                                        \
++    _t2 = __lsx_vilvl_b(_in6, _in4);                                        \
++    _t3 = __lsx_vilvl_b(_in7, _in5);                                        \
++    _t4 = __lsx_vilvl_b(_t1, _t0);                                          \
++    _t5 = __lsx_vilvh_b(_t1, _t0);                                          \
++    _t6 = __lsx_vilvl_b(_t3, _t2);                                          \
++    _t7 = __lsx_vilvh_b(_t3, _t2);                                          \
++    _out0 = __lsx_vilvl_w(_t6, _t4);                                        \
++    _out2 = __lsx_vilvh_w(_t6, _t4);                                        \
++    _out4 = __lsx_vilvl_w(_t7, _t5);                                        \
++    _out6 = __lsx_vilvh_w(_t7, _t5);                                        \
++    _out1 = __lsx_vshuf_b(zero, _out0, shuf8);                              \
++    _out3 = __lsx_vshuf_b(zero, _out2, shuf8);                              \
++    _out5 = __lsx_vshuf_b(zero, _out4, shuf8);                              \
++    _out7 = __lsx_vshuf_b(zero, _out6, shuf8);                              \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 8x8 block with half-word elements in vectors
++ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
++ *               Outputs - out0, out1, out2, out3, out4, out5, out6, out7
++ * Details     :
++ * Example     :
++ *              00,01,02,03,04,05,06,07           00,10,20,30,40,50,60,70
++ *              10,11,12,13,14,15,16,17           01,11,21,31,41,51,61,71
++ *              20,21,22,23,24,25,26,27           02,12,22,32,42,52,62,72
++ *              30,31,32,33,34,35,36,37    to     03,13,23,33,43,53,63,73
++ *              40,41,42,43,44,45,46,47  ======>  04,14,24,34,44,54,64,74
++ *              50,51,52,53,54,55,56,57           05,15,25,35,45,55,65,75
++ *              60,61,62,63,64,65,66,67           06,16,26,36,46,56,66,76
++ *              70,71,72,73,74,75,76,77           07,17,27,37,47,57,67,77
++ * =============================================================================
++ */
++#define LSX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    __m128i _s0, _s1, _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;               \
++                                                                            \
++    _s0 = __lsx_vilvl_h(_in6, _in4);                                        \
++    _s1 = __lsx_vilvl_h(_in7, _in5);                                        \
++    _t0 = __lsx_vilvl_h(_s1, _s0);                                          \
++    _t1 = __lsx_vilvh_h(_s1, _s0);                                          \
++    _s0 = __lsx_vilvh_h(_in6, _in4);                                        \
++    _s1 = __lsx_vilvh_h(_in7, _in5);                                        \
++    _t2 = __lsx_vilvl_h(_s1, _s0);                                          \
++    _t3 = __lsx_vilvh_h(_s1, _s0);                                          \
++    _s0 = __lsx_vilvl_h(_in2, _in0);                                        \
++    _s1 = __lsx_vilvl_h(_in3, _in1);                                        \
++    _t4 = __lsx_vilvl_h(_s1, _s0);                                          \
++    _t5 = __lsx_vilvh_h(_s1, _s0);                                          \
++    _s0 = __lsx_vilvh_h(_in2, _in0);                                        \
++    _s1 = __lsx_vilvh_h(_in3, _in1);                                        \
++    _t6 = __lsx_vilvl_h(_s1, _s0);                                          \
++    _t7 = __lsx_vilvh_h(_s1, _s0);                                          \
++                                                                            \
++    _out0 = __lsx_vpickev_d(_t0, _t4);                                      \
++    _out2 = __lsx_vpickev_d(_t1, _t5);                                      \
++    _out4 = __lsx_vpickev_d(_t2, _t6);                                      \
++    _out6 = __lsx_vpickev_d(_t3, _t7);                                      \
++    _out1 = __lsx_vpickod_d(_t0, _t4);                                      \
++    _out3 = __lsx_vpickod_d(_t1, _t5);                                      \
++    _out5 = __lsx_vpickod_d(_t2, _t6);                                      \
++    _out7 = __lsx_vpickod_d(_t3, _t7);                                      \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose input 8x4 byte block into 4x8
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3      (input 8x4 byte block)
++ *               Outputs - _out0, _out1, _out2, _out3  (output 4x8 byte block)
++ *               Return Type - as per RTYPE
++ * Details     : The rows of the matrix become columns, and the columns become
++ *               rows.
++ * Example     : LSX_TRANSPOSE8x4_B
++ *        _in0 : 00,01,02,03,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in1 : 10,11,12,13,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in2 : 20,21,22,23,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in3 : 30,31,32,33,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in4 : 40,41,42,43,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in5 : 50,51,52,53,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in6 : 60,61,62,63,00,00,00,00, 00,00,00,00,00,00,00,00
++ *        _in7 : 70,71,72,73,00,00,00,00, 00,00,00,00,00,00,00,00
++ *
++ *       _out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
++ *       _out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
++ *       _out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00
++ *       _out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00
++ * =============================================================================
++ */
++#define LSX_TRANSPOSE8x4_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
++                           _out0, _out1, _out2, _out3)                     \
++  {                                                                        \
++    __m128i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                            \
++                                                                           \
++    _tmp0_m = __lsx_vpackev_w(_in4, _in0);                                 \
++    _tmp1_m = __lsx_vpackev_w(_in5, _in1);                                 \
++    _tmp2_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m);                             \
++    _tmp0_m = __lsx_vpackev_w(_in6, _in2);                                 \
++    _tmp1_m = __lsx_vpackev_w(_in7, _in3);                                 \
++                                                                           \
++    _tmp3_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m);                             \
++    _tmp0_m = __lsx_vilvl_h(_tmp3_m, _tmp2_m);                             \
++    _tmp1_m = __lsx_vilvh_h(_tmp3_m, _tmp2_m);                             \
++                                                                           \
++    _out0 = __lsx_vilvl_w(_tmp1_m, _tmp0_m);                               \
++    _out2 = __lsx_vilvh_w(_tmp1_m, _tmp0_m);                               \
++    _out1 = __lsx_vilvh_d(_out2, _out0);                                   \
++    _out3 = __lsx_vilvh_d(_out0, _out2);                                   \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 16x8 block with byte elements in vectors
++ * Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, in8
++ *                         in9, in10, in11, in12, in13, in14, in15
++ *               Outputs - out0, out1, out2, out3, out4, out5, out6, out7
++ * Details     :
++ * Example     :
++ *              000,001,002,003,004,005,006,007
++ *              008,009,010,011,012,013,014,015
++ *              016,017,018,019,020,021,022,023
++ *              024,025,026,027,028,029,030,031
++ *              032,033,034,035,036,037,038,039
++ *              040,041,042,043,044,045,046,047        000,008,...,112,120
++ *              048,049,050,051,052,053,054,055        001,009,...,113,121
++ *              056,057,058,059,060,061,062,063   to   002,010,...,114,122
++ *              064,068,066,067,068,069,070,071 =====> 003,011,...,115,123
++ *              072,073,074,075,076,077,078,079        004,012,...,116,124
++ *              080,081,082,083,084,085,086,087        005,013,...,117,125
++ *              088,089,090,091,092,093,094,095        006,014,...,118,126
++ *              096,097,098,099,100,101,102,103        007,015,...,119,127
++ *              104,105,106,107,108,109,110,111
++ *              112,113,114,115,116,117,118,119
++ *              120,121,122,123,124,125,126,127
++ * =============================================================================
++ */
++#define LSX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                            _in8, _in9, _in10, _in11, _in12, _in13, _in14,   \
++                            _in15, _out0, _out1, _out2, _out3, _out4, _out5, \
++                            _out6, _out7)                                    \
++  {                                                                          \
++    __m128i _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7;          \
++    __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;                          \
++    DUP4_ARG2(__lsx_vilvl_b, _in2, _in0, _in3, _in1, _in6, _in4, _in7, _in5, \
++              _tmp0, _tmp1, _tmp2, _tmp3);                                   \
++    DUP4_ARG2(__lsx_vilvl_b, _in10, _in8, _in11, _in9, _in14, _in12, _in15,  \
++              _in13, _tmp4, _tmp5, _tmp6, _tmp7);                            \
++    DUP2_ARG2(__lsx_vilvl_b, _tmp1, _tmp0, _tmp3, _tmp2, _t0, _t2);          \
++    DUP2_ARG2(__lsx_vilvh_b, _tmp1, _tmp0, _tmp3, _tmp2, _t1, _t3);          \
++    DUP2_ARG2(__lsx_vilvl_b, _tmp5, _tmp4, _tmp7, _tmp6, _t4, _t6);          \
++    DUP2_ARG2(__lsx_vilvh_b, _tmp5, _tmp4, _tmp7, _tmp6, _t5, _t7);          \
++    DUP2_ARG2(__lsx_vilvl_w, _t2, _t0, _t3, _t1, _tmp0, _tmp4);              \
++    DUP2_ARG2(__lsx_vilvh_w, _t2, _t0, _t3, _t1, _tmp2, _tmp6);              \
++    DUP2_ARG2(__lsx_vilvl_w, _t6, _t4, _t7, _t5, _tmp1, _tmp5);              \
++    DUP2_ARG2(__lsx_vilvh_w, _t6, _t4, _t7, _t5, _tmp3, _tmp7);              \
++    DUP2_ARG2(__lsx_vilvl_d, _tmp1, _tmp0, _tmp3, _tmp2, _out0, _out2);      \
++    DUP2_ARG2(__lsx_vilvh_d, _tmp1, _tmp0, _tmp3, _tmp2, _out1, _out3);      \
++    DUP2_ARG2(__lsx_vilvl_d, _tmp5, _tmp4, _tmp7, _tmp6, _out4, _out6);      \
++    DUP2_ARG2(__lsx_vilvh_d, _tmp5, _tmp4, _tmp7, _tmp6, _out5, _out7);      \
++  }
++
++/*
++ * =============================================================================
++ * Description : Butterfly of 4 input vectors
++ * Arguments   : Inputs  - in0, in1, in2, in3
++ *               Outputs - out0, out1, out2, out3
++ * Details     : Butterfly operation
++ * Example     :
++ *               out0 = in0 + in3;
++ *               out1 = in1 + in2;
++ *               out2 = in1 - in2;
++ *               out3 = in0 - in3;
++ * =============================================================================
++ */
++#define LSX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                           \
++    _out0 = __lsx_vadd_b(_in0, _in3);                                         \
++    _out1 = __lsx_vadd_b(_in1, _in2);                                         \
++    _out2 = __lsx_vsub_b(_in1, _in2);                                         \
++    _out3 = __lsx_vsub_b(_in0, _in3);                                         \
++  }
++#define LSX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                           \
++    _out0 = __lsx_vadd_h(_in0, _in3);                                         \
++    _out1 = __lsx_vadd_h(_in1, _in2);                                         \
++    _out2 = __lsx_vsub_h(_in1, _in2);                                         \
++    _out3 = __lsx_vsub_h(_in0, _in3);                                         \
++  }
++#define LSX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                           \
++    _out0 = __lsx_vadd_w(_in0, _in3);                                         \
++    _out1 = __lsx_vadd_w(_in1, _in2);                                         \
++    _out2 = __lsx_vsub_w(_in1, _in2);                                         \
++    _out3 = __lsx_vsub_w(_in0, _in3);                                         \
++  }
++#define LSX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                           \
++    _out0 = __lsx_vadd_d(_in0, _in3);                                         \
++    _out1 = __lsx_vadd_d(_in1, _in2);                                         \
++    _out2 = __lsx_vsub_d(_in1, _in2);                                         \
++    _out3 = __lsx_vsub_d(_in0, _in3);                                         \
++  }
++
++/*
++ * =============================================================================
++ * Description : Butterfly of 8 input vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, ~
++ *               Outputs - _out0, _out1, _out2, _out3, ~
++ * Details     : Butterfly operation
++ * Example     :
++ *              _out0 = _in0 + _in7;
++ *              _out1 = _in1 + _in6;
++ *              _out2 = _in2 + _in5;
++ *              _out3 = _in3 + _in4;
++ *              _out4 = _in3 - _in4;
++ *              _out5 = _in2 - _in5;
++ *              _out6 = _in1 - _in6;
++ *              _out7 = _in0 - _in7;
++ * =============================================================================
++ */
++#define LSX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                          _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                          _out7)                                           \
++  {                                                                        \
++    _out0 = __lsx_vadd_b(_in0, _in7);                                      \
++    _out1 = __lsx_vadd_b(_in1, _in6);                                      \
++    _out2 = __lsx_vadd_b(_in2, _in5);                                      \
++    _out3 = __lsx_vadd_b(_in3, _in4);                                      \
++    _out4 = __lsx_vsub_b(_in3, _in4);                                      \
++    _out5 = __lsx_vsub_b(_in2, _in5);                                      \
++    _out6 = __lsx_vsub_b(_in1, _in6);                                      \
++    _out7 = __lsx_vsub_b(_in0, _in7);                                      \
++  }
++
++#define LSX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                          _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                          _out7)                                           \
++  {                                                                        \
++    _out0 = __lsx_vadd_h(_in0, _in7);                                      \
++    _out1 = __lsx_vadd_h(_in1, _in6);                                      \
++    _out2 = __lsx_vadd_h(_in2, _in5);                                      \
++    _out3 = __lsx_vadd_h(_in3, _in4);                                      \
++    _out4 = __lsx_vsub_h(_in3, _in4);                                      \
++    _out5 = __lsx_vsub_h(_in2, _in5);                                      \
++    _out6 = __lsx_vsub_h(_in1, _in6);                                      \
++    _out7 = __lsx_vsub_h(_in0, _in7);                                      \
++  }
++
++#define LSX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                          _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                          _out7)                                           \
++  {                                                                        \
++    _out0 = __lsx_vadd_w(_in0, _in7);                                      \
++    _out1 = __lsx_vadd_w(_in1, _in6);                                      \
++    _out2 = __lsx_vadd_w(_in2, _in5);                                      \
++    _out3 = __lsx_vadd_w(_in3, _in4);                                      \
++    _out4 = __lsx_vsub_w(_in3, _in4);                                      \
++    _out5 = __lsx_vsub_w(_in2, _in5);                                      \
++    _out6 = __lsx_vsub_w(_in1, _in6);                                      \
++    _out7 = __lsx_vsub_w(_in0, _in7);                                      \
++  }
++
++#define LSX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                          _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                          _out7)                                           \
++  {                                                                        \
++    _out0 = __lsx_vadd_d(_in0, _in7);                                      \
++    _out1 = __lsx_vadd_d(_in1, _in6);                                      \
++    _out2 = __lsx_vadd_d(_in2, _in5);                                      \
++    _out3 = __lsx_vadd_d(_in3, _in4);                                      \
++    _out4 = __lsx_vsub_d(_in3, _in4);                                      \
++    _out5 = __lsx_vsub_d(_in2, _in5);                                      \
++    _out6 = __lsx_vsub_d(_in1, _in6);                                      \
++    _out7 = __lsx_vsub_d(_in0, _in7);                                      \
++  }
++
++/*
++ * =============================================================================
++ * Description : Butterfly of 16 input vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, ~
++ *               Outputs - _out0, _out1, _out2, _out3, ~
++ * Details     : Butterfly operation
++ * Example     :
++ *              _out0 = _in0 + _in15;
++ *              _out1 = _in1 + _in14;
++ *              _out2 = _in2 + _in13;
++ *              _out3 = _in3 + _in12;
++ *              _out4 = _in4 + _in11;
++ *              _out5 = _in5 + _in10;
++ *              _out6 = _in6 + _in9;
++ *              _out7 = _in7 + _in8;
++ *              _out8 = _in7 - _in8;
++ *              _out9 = _in6 - _in9;
++ *              _out10 = _in5 - _in10;
++ *              _out11 = _in4 - _in11;
++ *              _out12 = _in3 - _in12;
++ *              _out13 = _in2 - _in13;
++ *              _out14 = _in1 - _in14;
++ *              _out15 = _in0 - _in15;
++ * =============================================================================
++ */
++
++#define LSX_BUTTERFLY_16_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,     \
++                           _in8, _in9, _in10, _in11, _in12, _in13, _in14,      \
++                           _in15, _out0, _out1, _out2, _out3, _out4, _out5,    \
++                           _out6, _out7, _out8, _out9, _out10, _out11, _out12, \
++                           _out13, _out14, _out15)                             \
++  {                                                                            \
++    _out0 = __lsx_vadd_b(_in0, _in15);                                         \
++    _out1 = __lsx_vadd_b(_in1, _in14);                                         \
++    _out2 = __lsx_vadd_b(_in2, _in13);                                         \
++    _out3 = __lsx_vadd_b(_in3, _in12);                                         \
++    _out4 = __lsx_vadd_b(_in4, _in11);                                         \
++    _out5 = __lsx_vadd_b(_in5, _in10);                                         \
++    _out6 = __lsx_vadd_b(_in6, _in9);                                          \
++    _out7 = __lsx_vadd_b(_in7, _in8);                                          \
++                                                                               \
++    _out8 = __lsx_vsub_b(_in7, _in8);                                          \
++    _out9 = __lsx_vsub_b(_in6, _in9);                                          \
++    _out10 = __lsx_vsub_b(_in5, _in10);                                        \
++    _out11 = __lsx_vsub_b(_in4, _in11);                                        \
++    _out12 = __lsx_vsub_b(_in3, _in12);                                        \
++    _out13 = __lsx_vsub_b(_in2, _in13);                                        \
++    _out14 = __lsx_vsub_b(_in1, _in14);                                        \
++    _out15 = __lsx_vsub_b(_in0, _in15);                                        \
++  }
++
++#define LSX_BUTTERFLY_16_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,     \
++                           _in8, _in9, _in10, _in11, _in12, _in13, _in14,      \
++                           _in15, _out0, _out1, _out2, _out3, _out4, _out5,    \
++                           _out6, _out7, _out8, _out9, _out10, _out11, _out12, \
++                           _out13, _out14, _out15)                             \
++  {                                                                            \
++    _out0 = __lsx_vadd_h(_in0, _in15);                                         \
++    _out1 = __lsx_vadd_h(_in1, _in14);                                         \
++    _out2 = __lsx_vadd_h(_in2, _in13);                                         \
++    _out3 = __lsx_vadd_h(_in3, _in12);                                         \
++    _out4 = __lsx_vadd_h(_in4, _in11);                                         \
++    _out5 = __lsx_vadd_h(_in5, _in10);                                         \
++    _out6 = __lsx_vadd_h(_in6, _in9);                                          \
++    _out7 = __lsx_vadd_h(_in7, _in8);                                          \
++                                                                               \
++    _out8 = __lsx_vsub_h(_in7, _in8);                                          \
++    _out9 = __lsx_vsub_h(_in6, _in9);                                          \
++    _out10 = __lsx_vsub_h(_in5, _in10);                                        \
++    _out11 = __lsx_vsub_h(_in4, _in11);                                        \
++    _out12 = __lsx_vsub_h(_in3, _in12);                                        \
++    _out13 = __lsx_vsub_h(_in2, _in13);                                        \
++    _out14 = __lsx_vsub_h(_in1, _in14);                                        \
++    _out15 = __lsx_vsub_h(_in0, _in15);                                        \
++  }
++
++#define LSX_BUTTERFLY_16_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,     \
++                           _in8, _in9, _in10, _in11, _in12, _in13, _in14,      \
++                           _in15, _out0, _out1, _out2, _out3, _out4, _out5,    \
++                           _out6, _out7, _out8, _out9, _out10, _out11, _out12, \
++                           _out13, _out14, _out15)                             \
++  {                                                                            \
++    _out0 = __lsx_vadd_w(_in0, _in15);                                         \
++    _out1 = __lsx_vadd_w(_in1, _in14);                                         \
++    _out2 = __lsx_vadd_w(_in2, _in13);                                         \
++    _out3 = __lsx_vadd_w(_in3, _in12);                                         \
++    _out4 = __lsx_vadd_w(_in4, _in11);                                         \
++    _out5 = __lsx_vadd_w(_in5, _in10);                                         \
++    _out6 = __lsx_vadd_w(_in6, _in9);                                          \
++    _out7 = __lsx_vadd_w(_in7, _in8);                                          \
++                                                                               \
++    _out8 = __lsx_vsub_w(_in7, _in8);                                          \
++    _out9 = __lsx_vsub_w(_in6, _in9);                                          \
++    _out10 = __lsx_vsub_w(_in5, _in10);                                        \
++    _out11 = __lsx_vsub_w(_in4, _in11);                                        \
++    _out12 = __lsx_vsub_w(_in3, _in12);                                        \
++    _out13 = __lsx_vsub_w(_in2, _in13);                                        \
++    _out14 = __lsx_vsub_w(_in1, _in14);                                        \
++    _out15 = __lsx_vsub_w(_in0, _in15);                                        \
++  }
++
++#define LSX_BUTTERFLY_16_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,     \
++                           _in8, _in9, _in10, _in11, _in12, _in13, _in14,      \
++                           _in15, _out0, _out1, _out2, _out3, _out4, _out5,    \
++                           _out6, _out7, _out8, _out9, _out10, _out11, _out12, \
++                           _out13, _out14, _out15)                             \
++  {                                                                            \
++    _out0 = __lsx_vadd_d(_in0, _in15);                                         \
++    _out1 = __lsx_vadd_d(_in1, _in14);                                         \
++    _out2 = __lsx_vadd_d(_in2, _in13);                                         \
++    _out3 = __lsx_vadd_d(_in3, _in12);                                         \
++    _out4 = __lsx_vadd_d(_in4, _in11);                                         \
++    _out5 = __lsx_vadd_d(_in5, _in10);                                         \
++    _out6 = __lsx_vadd_d(_in6, _in9);                                          \
++    _out7 = __lsx_vadd_d(_in7, _in8);                                          \
++                                                                               \
++    _out8 = __lsx_vsub_d(_in7, _in8);                                          \
++    _out9 = __lsx_vsub_d(_in6, _in9);                                          \
++    _out10 = __lsx_vsub_d(_in5, _in10);                                        \
++    _out11 = __lsx_vsub_d(_in4, _in11);                                        \
++    _out12 = __lsx_vsub_d(_in3, _in12);                                        \
++    _out13 = __lsx_vsub_d(_in2, _in13);                                        \
++    _out14 = __lsx_vsub_d(_in1, _in14);                                        \
++    _out15 = __lsx_vsub_d(_in0, _in15);                                        \
++  }
++
++#endif  // LSX
++
++#ifdef __loongarch_asx
++#include <lasxintrin.h>
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed halfword
++ * Details     : Unsigned byte elements from in_h are multiplied with
++ *               unsigned byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the out vector
++ * Example     : See out = __lasx_xvdp2_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_h_bu(in_h, in_l);
++  out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of byte vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed halfword
++ * Details     : Signed byte elements from in_h are multiplied with
++ *               signed byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Then these multiplication results of adjacent odd-even elements
++ *               are added to the out vector
++ * Example     : See out = __lasx_xvdp2_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_h_b(in_h, in_l);
++  out = __lasx_xvmaddwod_h_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Signed halfword elements from in_h are multiplied with
++ *               signed halfword elements from in_l producing a result
++ *               twice the size of input i.e. signed word.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the out vector.
++ * Example     : out = __lasx_xvdp2_w_h(in_h, in_l)
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
++ *         out : 22,38,38,22, 22,38,38,22
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_w_h(in_h, in_l);
++  out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of word vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed double
++ * Details     : Signed word elements from in_h are multiplied with
++ *               signed word elements from in_l producing a result
++ *               twice the size of input i.e. signed double-word.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the out vector.
++ * Example     : See out = __lasx_xvdp2_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_d_w(in_h, in_l);
++  out = __lasx_xvmaddwod_d_w(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Unsigned halfword elements from in_h are multiplied with
++ *               signed halfword elements from in_l producing a result
++ *               twice the size of input i.e. unsigned word.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added to the out vector
++ * Example     : See out = __lasx_xvdp2_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_w_hu_h(in_h, in_l);
++  out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - halfword
++ * Details     : Signed byte elements from in_h are multiplied with
++ *               signed byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the in_c vector.
++ * Example     : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c, __m256i in_h,
++                                          __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_h_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied with
++ *               unsigned byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the in_c vector.
++ * Example     : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, __m256i in_h,
++                                           __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product & addition of byte vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - halfword
++ * Details     : Unsigned byte elements from in_h are multiplied with
++ *               signed byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Then these multiplied results of adjacent odd-even elements
++ *               are added to the in_c vector.
++ * Example     : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, __m256i in_h,
++                                             __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ *               Return Type - per RTYPE
++ * Details     : Signed halfword elements from in_h are multiplied with
++ *               signed halfword elements from in_l producing a result
++ *               twice the size of input i.e. signed word.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added to the in_c vector.
++ * Example     : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ *        in_c : 1,2,3,4, 1,2,3,4
++ *        in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8,
++ *        in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1,
++ *         out : 23,40,41,26, 23,40,41,26
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c, __m256i in_h,
++                                          __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Unsigned halfword elements from in_h are multiplied with
++ *               unsigned halfword elements from in_l producing a result
++ *               twice the size of input i.e. signed word.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added to the in_c vector.
++ * Example     : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, __m256i in_h,
++                                           __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_w_hu(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Unsigned halfword elements from in_h are multiplied with
++ *               signed halfword elements from in_l producing a result
++ *               twice the size of input i.e. signed word.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added to the in_c vector
++ * Example     : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, __m256i in_h,
++                                             __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l);
++  out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Vector Unsigned Dot Product and Subtract
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ *               Return Type - signed halfword
++ * Details     : Unsigned byte elements from in_h are multiplied with
++ *               unsigned byte elements from in_l producing a result
++ *               twice the size of input i.e. signed halfword.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added together and subtracted from double width elements
++ *               in_c vector.
++ * Example     : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, __m256i in_h,
++                                           __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_h_bu(in_h, in_l);
++  out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
++  out = __lasx_xvsub_h(in_c, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Vector Signed Dot Product and Subtract
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Signed halfword elements from in_h are multiplied with
++ *               Signed halfword elements from in_l producing a result
++ *               twice the size of input i.e. signed word.
++ *               Multiplication result of adjacent odd-even elements
++ *               are added together and subtracted from double width elements
++ *               in_c vector.
++ * Example     : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l)
++ *        in_c : 0,0,0,0, 0,0,0,0
++ *        in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1
++ *        in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1
++ *         out : -7,-3,0,0, 0,-1,0,-1
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, __m256i in_h,
++                                          __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_w_h(in_h, in_l);
++  out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
++  out = __lasx_xvsub_w(in_c, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Dot product of halfword vector elements
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ *               Return Type - signed word
++ * Details     : Signed halfword elements from in_h are multiplied with
++ *               signed halfword elements from in_l producing a result
++ *               four times the size of input i.e. signed doubleword.
++ *               Then these multiplication results of four adjacent elements
++ *               are added together and stored to the out vector.
++ * Example     : out = __lasx_xvdp4_d_h(in_h, in_l)
++ *        in_h :  3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1
++ *        in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1
++ *         out : -2,0,1,1
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvmulwev_w_h(in_h, in_l);
++  out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
++  out = __lasx_xvhaddw_d_w(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The high half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are added after the
++ *               higher half of the two-fold sign extension (signed byte
++ *               to signed halfword) and stored to the out vector.
++ * Example     : See out = __lasx_xvaddwh_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvilvh_b(in_h, in_l);
++  out = __lasx_xvhaddw_h_b(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The high half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are added after the
++ *               higher half of the two-fold sign extension (signed halfword
++ *               to signed word) and stored to the out vector.
++ * Example     : out = __lasx_xvaddwh_w_h(in_h, in_l)
++ *        in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
++ *        in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1
++ *         out : 1,0,0,-1, 1,0,0, 2
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvilvh_h(in_h, in_l);
++  out = __lasx_xvhaddw_w_h(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are added after the
++ *               lower half of the two-fold sign extension (signed byte
++ *               to signed halfword) and stored to the out vector.
++ * Example     : See out = __lasx_xvaddwl_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvilvl_b(in_h, in_l);
++  out = __lasx_xvhaddw_h_b(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are added after the
++ *               lower half of the two-fold sign extension (signed halfword
++ *               to signed word) and stored to the out vector.
++ * Example     : out = __lasx_xvaddwl_w_h(in_h, in_l)
++ *        in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
++ *        in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1
++ *         out : 5,-1,4,2, 1,0,2,-1
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvilvl_h(in_h, in_l);
++  out = __lasx_xvhaddw_w_h(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The out vector and the out vector are added after the
++ *               lower half of the two-fold zero extension (unsigned byte
++ *               to unsigned halfword) and stored to the out vector.
++ * Example     : See out = __lasx_xvaddwl_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvilvl_b(in_h, in_l);
++  out = __lasx_xvhaddw_hu_bu(out, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_l vector after double zero extension (unsigned byte to
++ *               signed halfword)，added to the in_h vector.
++ * Example     : See out = __lasx_xvaddw_w_w_h(in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvsllwil_hu_bu(in_l, 0);
++  out = __lasx_xvadd_h(in_h, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are expanded and
++ *               added after being doubled.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_l vector after double sign extension (signed halfword to
++ *               signed word), added to the in_h vector.
++ * Example     : out = __lasx_xvaddw_w_w_h(in_h, in_l)
++ *        in_h : 0, 1,0,0, -1,0,0,1,
++ *        in_l : 2,-1,1,2,  1,0,0,0, 0,0,1,0, 1,0,0,1,
++ *         out : 2, 0,1,2, -1,0,1,1,
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) {
++  __m256i out;
++
++  out = __lasx_xvsllwil_w_h(in_l, 0);
++  out = __lasx_xvadd_w(in_h, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Multiplication and addition calculation after expansion
++ *               of the lower half of the vector.
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are multiplied after
++ *               the lower half of the two-fold sign extension (signed halfword
++ *               to signed word), and the result is added to the vector in_c,
++ *               then stored to the out vector.
++ * Example     : out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l)
++ *        in_c : 1,2,3,4, 5,6,7,8
++ *        in_h : 1,2,3,4, 1,2,3,4, 5,6,7,8, 5,6,7,8
++ *        in_l : 200, 300, 400, 500,  2000, 3000, 4000, 5000,
++ *              -200,-300,-400,-500, -2000,-3000,-4000,-5000
++ *         out : 201, 602,1203,2004, -995, -1794,-2793,-3992
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c, __m256i in_h,
++                                          __m256i in_l) {
++  __m256i tmp0, tmp1, out;
++
++  tmp0 = __lasx_xvsllwil_w_h(in_h, 0);
++  tmp1 = __lasx_xvsllwil_w_h(in_l, 0);
++  tmp0 = __lasx_xvmul_w(tmp0, tmp1);
++  out = __lasx_xvadd_w(tmp0, in_c);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Multiplication and addition calculation after expansion
++ *               of the higher half of the vector.
++ * Arguments   : Inputs - in_c, in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are multiplied after
++ *               the higher half of the two-fold sign extension (signed
++ *               halfword to signed word), and the result is added to
++ *               the vector in_c, then stored to the out vector.
++ * Example     : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c, __m256i in_h,
++                                          __m256i in_l) {
++  __m256i tmp0, tmp1, out;
++
++  tmp0 = __lasx_xvilvh_h(in_h, in_h);
++  tmp1 = __lasx_xvilvh_h(in_l, in_l);
++  tmp0 = __lasx_xvmulwev_w_h(tmp0, tmp1);
++  out = __lasx_xvadd_w(tmp0, in_c);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Multiplication calculation after expansion of the lower
++ *               half of the vector.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are multiplied after
++ *               the lower half of the two-fold sign extension (signed
++ *               halfword to signed word), then stored to the out vector.
++ * Example     : out = __lasx_xvmulwl_w_h(in_h, in_l)
++ *        in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
++ *        in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1
++ *         out : 6,1,3,0, 0,0,1,0
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvmulwl_w_h(__m256i in_h, __m256i in_l) {
++  __m256i tmp0, tmp1, out;
++
++  tmp0 = __lasx_xvsllwil_w_h(in_h, 0);
++  tmp1 = __lasx_xvsllwil_w_h(in_l, 0);
++  out = __lasx_xvmul_w(tmp0, tmp1);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Multiplication calculation after expansion of the lower
++ *               half of the vector.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector and the in_l vector are multiplied after
++ *               the lower half of the two-fold sign extension (signed
++ *               halfword to signed word), then stored to the out vector.
++ * Example     : out = __lasx_xvmulwh_w_h(in_h, in_l)
++ *        in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
++ *        in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1
++ *         out : 0,0,0,0, 0,0,0,1
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) {
++  __m256i tmp0, tmp1, out;
++
++  tmp0 = __lasx_xvilvh_h(in_h, in_h);
++  tmp1 = __lasx_xvilvh_h(in_l, in_l);
++  out = __lasx_xvmulwev_w_h(tmp0, tmp1);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : The low half of the vector elements are added to the high half
++ *               after being doubled, then saturated.
++ * Arguments   : Inputs - in_h, in_l
++ *               Output - out
++ * Details     : The in_h vector adds the in_l vector after the lower half of
++ *               the two-fold zero extension (unsigned byte to unsigned
++ *               halfword) and then saturated. The results are stored to the out
++ *               vector.
++ * Example     : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l)
++ *        in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1
++ *        in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1,
++ *               0,0,0,1
++ *        out  : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2,
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) {
++  __m256i tmp1, out;
++  __m256i zero = { 0 };
++
++  tmp1 = __lasx_xvilvl_b(zero, in_l);
++  out = __lasx_xvsadd_hu(in_h, tmp1);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Clip all halfword elements of input vector between min & max
++ *               out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in))
++ * Arguments   : Inputs  - in    (input vector)
++ *                       - min   (min threshold)
++ *                       - max   (max threshold)
++ *               Outputs - in    (output vector with clipped elements)
++ *               Return Type - signed halfword
++ * Example     : out = __lasx_xvclip_h(in, min, max)
++ *          in : -8,2,280,249, -8,255,280,249, 4,4,4,4, 5,5,5,5
++ *         min : 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1
++ *         max : 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9
++ *         out : 1,2,9,9, 1,9,9,9, 4,4,4,4, 5,5,5,5
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvclip_h(__m256i in, __m256i min, __m256i max) {
++  __m256i out;
++
++  out = __lasx_xvmax_h(min, in);
++  out = __lasx_xvmin_h(max, out);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Clip all signed halfword elements of input vector
++ *               between 0 & 255
++ * Arguments   : Inputs  - in   (input vector)
++ *               Outputs - out  (output vector with clipped elements)
++ *               Return Type - signed halfword
++ * Example     : See out = __lasx_xvclip255_w(in)
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvclip255_h(__m256i in) {
++  __m256i out;
++
++  out = __lasx_xvmaxi_h(in, 0);
++  out = __lasx_xvsat_hu(out, 7);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Clip all signed word elements of input vector
++ *               between 0 & 255
++ * Arguments   : Inputs - in   (input vector)
++ *               Output - out  (output vector with clipped elements)
++ *               Return Type - signed word
++ * Example     : out = __lasx_xvclip255_w(in)
++ *          in : -8,255,280,249, -8,255,280,249
++ *         out :  0,255,255,249,  0,255,255,249
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvclip255_w(__m256i in) {
++  __m256i out;
++
++  out = __lasx_xvmaxi_w(in, 0);
++  out = __lasx_xvsat_wu(out, 7);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Indexed halfword element values are replicated to all
++ *               elements in output vector. If 'idx < 8' use xvsplati_l_*,
++ *               if 'idx >= 8' use xvsplati_h_*.
++ * Arguments   : Inputs - in, idx
++ *               Output - out
++ * Details     : Idx element value from in vector is replicated to all
++ *               elements in out vector.
++ *               Valid index range for halfword operation is 0-7
++ * Example     : out = __lasx_xvsplati_l_h(in, idx)
++ *          in : 20,10,11,12, 13,14,15,16, 0,0,2,0, 0,0,0,0
++ *         idx : 0x02
++ *         out : 11,11,11,11, 11,11,11,11, 11,11,11,11, 11,11,11,11
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) {
++  __m256i out;
++
++  out = __lasx_xvpermi_q(in, in, 0x02);
++  out = __lasx_xvreplve_h(out, idx);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Indexed halfword element values are replicated to all
++ *               elements in output vector. If 'idx < 8' use xvsplati_l_*,
++ *               if 'idx >= 8' use xvsplati_h_*.
++ * Arguments   : Inputs - in, idx
++ *               Output - out
++ * Details     : Idx element value from in vector is replicated to all
++ *               elements in out vector.
++ *               Valid index range for halfword operation is 0-7
++ * Example     : out = __lasx_xvsplati_h_h(in, idx)
++ *          in : 20,10,11,12, 13,14,15,16, 0,2,0,0, 0,0,0,0
++ *         idx : 0x09
++ *         out : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2
++ * =============================================================================
++ */
++static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
++  __m256i out;
++
++  out = __lasx_xvpermi_q(in, in, 0x13);
++  out = __lasx_xvreplve_h(out, idx);
++  return out;
++}
++
++/*
++ * =============================================================================
++ * Description : Transpose 4x4 block with double-word elements in vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3
++ *               Outputs - _out0, _out1, _out2, _out3
++ * Example     : LASX_TRANSPOSE4x4_D
++ *        _in0 : 1,2,3,4
++ *        _in1 : 1,2,3,4
++ *        _in2 : 1,2,3,4
++ *        _in3 : 1,2,3,4
++ *
++ *       _out0 : 1,1,1,1
++ *       _out1 : 2,2,2,2
++ *       _out2 : 3,3,3,3
++ *       _out3 : 4,4,4,4
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE4x4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \
++                            _out3)                                       \
++  {                                                                      \
++    __m256i _tmp0, _tmp1, _tmp2, _tmp3;                                  \
++    _tmp0 = __lasx_xvilvl_d(_in1, _in0);                                 \
++    _tmp1 = __lasx_xvilvh_d(_in1, _in0);                                 \
++    _tmp2 = __lasx_xvilvl_d(_in3, _in2);                                 \
++    _tmp3 = __lasx_xvilvh_d(_in3, _in2);                                 \
++    _out0 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x20);                        \
++    _out2 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x31);                        \
++    _out1 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x20);                        \
++    _out3 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x31);                        \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 8x8 block with word elements in vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
++ *               Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
++ *               _out7
++ * Example     : LASX_TRANSPOSE8x8_W
++ *        _in0 : 1,2,3,4,5,6,7,8
++ *        _in1 : 2,2,3,4,5,6,7,8
++ *        _in2 : 3,2,3,4,5,6,7,8
++ *        _in3 : 4,2,3,4,5,6,7,8
++ *        _in4 : 5,2,3,4,5,6,7,8
++ *        _in5 : 6,2,3,4,5,6,7,8
++ *        _in6 : 7,2,3,4,5,6,7,8
++ *        _in7 : 8,2,3,4,5,6,7,8
++ *
++ *       _out0 : 1,2,3,4,5,6,7,8
++ *       _out1 : 2,2,2,2,2,2,2,2
++ *       _out2 : 3,3,3,3,3,3,3,3
++ *       _out3 : 4,4,4,4,4,4,4,4
++ *       _out4 : 5,5,5,5,5,5,5,5
++ *       _out5 : 6,6,6,6,6,6,6,6
++ *       _out6 : 7,7,7,7,7,7,7,7
++ *       _out7 : 8,8,8,8,8,8,8,8
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE8x8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                            _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                            _out7)                                           \
++  {                                                                          \
++    __m256i _s0_m, _s1_m;                                                    \
++    __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                              \
++    __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m;                              \
++                                                                             \
++    _s0_m = __lasx_xvilvl_w(_in2, _in0);                                     \
++    _s1_m = __lasx_xvilvl_w(_in3, _in1);                                     \
++    _tmp0_m = __lasx_xvilvl_w(_s1_m, _s0_m);                                 \
++    _tmp1_m = __lasx_xvilvh_w(_s1_m, _s0_m);                                 \
++    _s0_m = __lasx_xvilvh_w(_in2, _in0);                                     \
++    _s1_m = __lasx_xvilvh_w(_in3, _in1);                                     \
++    _tmp2_m = __lasx_xvilvl_w(_s1_m, _s0_m);                                 \
++    _tmp3_m = __lasx_xvilvh_w(_s1_m, _s0_m);                                 \
++    _s0_m = __lasx_xvilvl_w(_in6, _in4);                                     \
++    _s1_m = __lasx_xvilvl_w(_in7, _in5);                                     \
++    _tmp4_m = __lasx_xvilvl_w(_s1_m, _s0_m);                                 \
++    _tmp5_m = __lasx_xvilvh_w(_s1_m, _s0_m);                                 \
++    _s0_m = __lasx_xvilvh_w(_in6, _in4);                                     \
++    _s1_m = __lasx_xvilvh_w(_in7, _in5);                                     \
++    _tmp6_m = __lasx_xvilvl_w(_s1_m, _s0_m);                                 \
++    _tmp7_m = __lasx_xvilvh_w(_s1_m, _s0_m);                                 \
++    _out0 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x20);                        \
++    _out1 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x20);                        \
++    _out2 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x20);                        \
++    _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x20);                        \
++    _out4 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x31);                        \
++    _out5 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x31);                        \
++    _out6 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x31);                        \
++    _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x31);                        \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose input 16x8 byte block
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,
++ *                         _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
++ *                         (input 16x8 byte block)
++ *               Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
++ *                         _out7 (output 8x16 byte block)
++ * Details     : The rows of the matrix become columns, and the columns become
++ *               rows.
++ * Example     : See LASX_TRANSPOSE16x8_H
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                             _in8, _in9, _in10, _in11, _in12, _in13, _in14,   \
++                             _in15, _out0, _out1, _out2, _out3, _out4, _out5, \
++                             _out6, _out7)                                    \
++  {                                                                           \
++    __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                               \
++    __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m;                               \
++                                                                              \
++    _tmp0_m = __lasx_xvilvl_b(_in2, _in0);                                    \
++    _tmp1_m = __lasx_xvilvl_b(_in3, _in1);                                    \
++    _tmp2_m = __lasx_xvilvl_b(_in6, _in4);                                    \
++    _tmp3_m = __lasx_xvilvl_b(_in7, _in5);                                    \
++    _tmp4_m = __lasx_xvilvl_b(_in10, _in8);                                   \
++    _tmp5_m = __lasx_xvilvl_b(_in11, _in9);                                   \
++    _tmp6_m = __lasx_xvilvl_b(_in14, _in12);                                  \
++    _tmp7_m = __lasx_xvilvl_b(_in15, _in13);                                  \
++    _out0 = __lasx_xvilvl_b(_tmp1_m, _tmp0_m);                                \
++    _out1 = __lasx_xvilvh_b(_tmp1_m, _tmp0_m);                                \
++    _out2 = __lasx_xvilvl_b(_tmp3_m, _tmp2_m);                                \
++    _out3 = __lasx_xvilvh_b(_tmp3_m, _tmp2_m);                                \
++    _out4 = __lasx_xvilvl_b(_tmp5_m, _tmp4_m);                                \
++    _out5 = __lasx_xvilvh_b(_tmp5_m, _tmp4_m);                                \
++    _out6 = __lasx_xvilvl_b(_tmp7_m, _tmp6_m);                                \
++    _out7 = __lasx_xvilvh_b(_tmp7_m, _tmp6_m);                                \
++    _tmp0_m = __lasx_xvilvl_w(_out2, _out0);                                  \
++    _tmp2_m = __lasx_xvilvh_w(_out2, _out0);                                  \
++    _tmp4_m = __lasx_xvilvl_w(_out3, _out1);                                  \
++    _tmp6_m = __lasx_xvilvh_w(_out3, _out1);                                  \
++    _tmp1_m = __lasx_xvilvl_w(_out6, _out4);                                  \
++    _tmp3_m = __lasx_xvilvh_w(_out6, _out4);                                  \
++    _tmp5_m = __lasx_xvilvl_w(_out7, _out5);                                  \
++    _tmp7_m = __lasx_xvilvh_w(_out7, _out5);                                  \
++    _out0 = __lasx_xvilvl_d(_tmp1_m, _tmp0_m);                                \
++    _out1 = __lasx_xvilvh_d(_tmp1_m, _tmp0_m);                                \
++    _out2 = __lasx_xvilvl_d(_tmp3_m, _tmp2_m);                                \
++    _out3 = __lasx_xvilvh_d(_tmp3_m, _tmp2_m);                                \
++    _out4 = __lasx_xvilvl_d(_tmp5_m, _tmp4_m);                                \
++    _out5 = __lasx_xvilvh_d(_tmp5_m, _tmp4_m);                                \
++    _out6 = __lasx_xvilvl_d(_tmp7_m, _tmp6_m);                                \
++    _out7 = __lasx_xvilvh_d(_tmp7_m, _tmp6_m);                                \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose input 16x8 byte block
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,
++ *                         _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
++ *                         (input 16x8 byte block)
++ *               Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
++ *                         _out7 (output 8x16 byte block)
++ * Details     : The rows of the matrix become columns, and the columns become
++ *               rows.
++ * Example     : LASX_TRANSPOSE16x8_H
++ *        _in0 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in2 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in4 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in6 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in8 : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *        _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in10 : 0,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in11 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in12 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in13 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in14 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *       _in15 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
++ *
++ *       _out0 : 1,2,3,4,5,6,7,8,9,1,0,2,3,7,5,6
++ *       _out1 : 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
++ *       _out2 : 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
++ *       _out3 : 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
++ *       _out4 : 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
++ *       _out5 : 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
++ *       _out6 : 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
++ *       _out7 : 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE16x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                             _in8, _in9, _in10, _in11, _in12, _in13, _in14,   \
++                             _in15, _out0, _out1, _out2, _out3, _out4, _out5, \
++                             _out6, _out7)                                    \
++  {                                                                           \
++    __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                               \
++    __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m;                               \
++    __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;                           \
++                                                                              \
++    _tmp0_m = __lasx_xvilvl_h(_in2, _in0);                                    \
++    _tmp1_m = __lasx_xvilvl_h(_in3, _in1);                                    \
++    _tmp2_m = __lasx_xvilvl_h(_in6, _in4);                                    \
++    _tmp3_m = __lasx_xvilvl_h(_in7, _in5);                                    \
++    _tmp4_m = __lasx_xvilvl_h(_in10, _in8);                                   \
++    _tmp5_m = __lasx_xvilvl_h(_in11, _in9);                                   \
++    _tmp6_m = __lasx_xvilvl_h(_in14, _in12);                                  \
++    _tmp7_m = __lasx_xvilvl_h(_in15, _in13);                                  \
++    _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m);                                  \
++    _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m);                                  \
++    _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m);                                  \
++    _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m);                                  \
++    _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m);                                  \
++    _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m);                                  \
++    _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m);                                  \
++    _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m);                                  \
++    _tmp0_m = __lasx_xvilvl_d(_t2, _t0);                                      \
++    _tmp2_m = __lasx_xvilvh_d(_t2, _t0);                                      \
++    _tmp4_m = __lasx_xvilvl_d(_t3, _t1);                                      \
++    _tmp6_m = __lasx_xvilvh_d(_t3, _t1);                                      \
++    _tmp1_m = __lasx_xvilvl_d(_t6, _t4);                                      \
++    _tmp3_m = __lasx_xvilvh_d(_t6, _t4);                                      \
++    _tmp5_m = __lasx_xvilvl_d(_t7, _t5);                                      \
++    _tmp7_m = __lasx_xvilvh_d(_t7, _t5);                                      \
++    _out0 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20);                         \
++    _out1 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20);                         \
++    _out2 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20);                         \
++    _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20);                         \
++                                                                              \
++    _tmp0_m = __lasx_xvilvh_h(_in2, _in0);                                    \
++    _tmp1_m = __lasx_xvilvh_h(_in3, _in1);                                    \
++    _tmp2_m = __lasx_xvilvh_h(_in6, _in4);                                    \
++    _tmp3_m = __lasx_xvilvh_h(_in7, _in5);                                    \
++    _tmp4_m = __lasx_xvilvh_h(_in10, _in8);                                   \
++    _tmp5_m = __lasx_xvilvh_h(_in11, _in9);                                   \
++    _tmp6_m = __lasx_xvilvh_h(_in14, _in12);                                  \
++    _tmp7_m = __lasx_xvilvh_h(_in15, _in13);                                  \
++    _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m);                                  \
++    _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m);                                  \
++    _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m);                                  \
++    _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m);                                  \
++    _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m);                                  \
++    _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m);                                  \
++    _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m);                                  \
++    _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m);                                  \
++    _tmp0_m = __lasx_xvilvl_d(_t2, _t0);                                      \
++    _tmp2_m = __lasx_xvilvh_d(_t2, _t0);                                      \
++    _tmp4_m = __lasx_xvilvl_d(_t3, _t1);                                      \
++    _tmp6_m = __lasx_xvilvh_d(_t3, _t1);                                      \
++    _tmp1_m = __lasx_xvilvl_d(_t6, _t4);                                      \
++    _tmp3_m = __lasx_xvilvh_d(_t6, _t4);                                      \
++    _tmp5_m = __lasx_xvilvl_d(_t7, _t5);                                      \
++    _tmp7_m = __lasx_xvilvh_d(_t7, _t5);                                      \
++    _out4 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20);                         \
++    _out5 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20);                         \
++    _out6 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20);                         \
++    _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20);                         \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 4x4 block with halfword elements in vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3
++ *               Outputs - _out0, _out1, _out2, _out3
++ *               Return Type - signed halfword
++ * Details     : The rows of the matrix become columns, and the columns become
++ *               rows.
++ * Example     : See LASX_TRANSPOSE8x8_H
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \
++                            _out3)                                       \
++  {                                                                      \
++    __m256i _s0_m, _s1_m;                                                \
++                                                                         \
++    _s0_m = __lasx_xvilvl_h(_in1, _in0);                                 \
++    _s1_m = __lasx_xvilvl_h(_in3, _in2);                                 \
++    _out0 = __lasx_xvilvl_w(_s1_m, _s0_m);                               \
++    _out2 = __lasx_xvilvh_w(_s1_m, _s0_m);                               \
++    _out1 = __lasx_xvilvh_d(_out0, _out0);                               \
++    _out3 = __lasx_xvilvh_d(_out2, _out2);                               \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose input 8x8 byte block
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
++ *                         (input 8x8 byte block)
++ *               Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
++ *                         _out7 (output 8x8 byte block)
++ * Example     : See LASX_TRANSPOSE8x8_H
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                            _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                            _out7)                                           \
++  {                                                                          \
++    __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                              \
++    __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m;                              \
++    _tmp0_m = __lasx_xvilvl_b(_in2, _in0);                                   \
++    _tmp1_m = __lasx_xvilvl_b(_in3, _in1);                                   \
++    _tmp2_m = __lasx_xvilvl_b(_in6, _in4);                                   \
++    _tmp3_m = __lasx_xvilvl_b(_in7, _in5);                                   \
++    _tmp4_m = __lasx_xvilvl_b(_tmp1_m, _tmp0_m);                             \
++    _tmp5_m = __lasx_xvilvh_b(_tmp1_m, _tmp0_m);                             \
++    _tmp6_m = __lasx_xvilvl_b(_tmp3_m, _tmp2_m);                             \
++    _tmp7_m = __lasx_xvilvh_b(_tmp3_m, _tmp2_m);                             \
++    _out0 = __lasx_xvilvl_w(_tmp6_m, _tmp4_m);                               \
++    _out2 = __lasx_xvilvh_w(_tmp6_m, _tmp4_m);                               \
++    _out4 = __lasx_xvilvl_w(_tmp7_m, _tmp5_m);                               \
++    _out6 = __lasx_xvilvh_w(_tmp7_m, _tmp5_m);                               \
++    _out1 = __lasx_xvbsrl_v(_out0, 8);                                       \
++    _out3 = __lasx_xvbsrl_v(_out2, 8);                                       \
++    _out5 = __lasx_xvbsrl_v(_out4, 8);                                       \
++    _out7 = __lasx_xvbsrl_v(_out6, 8);                                       \
++  }
++
++/*
++ * =============================================================================
++ * Description : Transpose 8x8 block with halfword elements in vectors.
++ * Arguments   : Inputs  - _in0, _in1, ~
++ *               Outputs - _out0, _out1, ~
++ * Details     : The rows of the matrix become columns, and the columns become
++ *               rows.
++ * Example     : LASX_TRANSPOSE8x8_H
++ *        _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
++ *        _in2 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
++ *        _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        _in4 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
++ *        _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        _in6 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
++ *        _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
++ *
++ *       _out0 : 1,8,8,1, 9,1,1,9, 1,8,8,1, 9,1,1,9
++ *       _out1 : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2
++ *       _out2 : 3,3,3,3, 3,3,3,3, 3,3,3,3, 3,3,3,3
++ *       _out3 : 4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4
++ *       _out4 : 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5
++ *       _out5 : 6,6,6,6, 6,6,6,6, 6,6,6,6, 6,6,6,6
++ *       _out6 : 7,7,7,7, 7,7,7,7, 7,7,7,7, 7,7,7,7
++ *       _out7 : 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8
++ * =============================================================================
++ */
++#define LASX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                            _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                            _out7)                                           \
++  {                                                                          \
++    __m256i _s0_m, _s1_m;                                                    \
++    __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m;                              \
++    __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m;                              \
++                                                                             \
++    _s0_m = __lasx_xvilvl_h(_in6, _in4);                                     \
++    _s1_m = __lasx_xvilvl_h(_in7, _in5);                                     \
++    _tmp0_m = __lasx_xvilvl_h(_s1_m, _s0_m);                                 \
++    _tmp1_m = __lasx_xvilvh_h(_s1_m, _s0_m);                                 \
++    _s0_m = __lasx_xvilvh_h(_in6, _in4);                                     \
++    _s1_m = __lasx_xvilvh_h(_in7, _in5);                                     \
++    _tmp2_m = __lasx_xvilvl_h(_s1_m, _s0_m);                                 \
++    _tmp3_m = __lasx_xvilvh_h(_s1_m, _s0_m);                                 \
++                                                                             \
++    _s0_m = __lasx_xvilvl_h(_in2, _in0);                                     \
++    _s1_m = __lasx_xvilvl_h(_in3, _in1);                                     \
++    _tmp4_m = __lasx_xvilvl_h(_s1_m, _s0_m);                                 \
++    _tmp5_m = __lasx_xvilvh_h(_s1_m, _s0_m);                                 \
++    _s0_m = __lasx_xvilvh_h(_in2, _in0);                                     \
++    _s1_m = __lasx_xvilvh_h(_in3, _in1);                                     \
++    _tmp6_m = __lasx_xvilvl_h(_s1_m, _s0_m);                                 \
++    _tmp7_m = __lasx_xvilvh_h(_s1_m, _s0_m);                                 \
++                                                                             \
++    _out0 = __lasx_xvpickev_d(_tmp0_m, _tmp4_m);                             \
++    _out2 = __lasx_xvpickev_d(_tmp1_m, _tmp5_m);                             \
++    _out4 = __lasx_xvpickev_d(_tmp2_m, _tmp6_m);                             \
++    _out6 = __lasx_xvpickev_d(_tmp3_m, _tmp7_m);                             \
++    _out1 = __lasx_xvpickod_d(_tmp0_m, _tmp4_m);                             \
++    _out3 = __lasx_xvpickod_d(_tmp1_m, _tmp5_m);                             \
++    _out5 = __lasx_xvpickod_d(_tmp2_m, _tmp6_m);                             \
++    _out7 = __lasx_xvpickod_d(_tmp3_m, _tmp7_m);                             \
++  }
++
++/*
++ * =============================================================================
++ * Description : Butterfly of 4 input vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3
++ *               Outputs - _out0, _out1, _out2, _out3
++ * Details     : Butterfly operation
++ * Example     : LASX_BUTTERFLY_4
++ *               _out0 = _in0 + _in3;
++ *               _out1 = _in1 + _in2;
++ *               _out2 = _in1 - _in2;
++ *               _out3 = _in0 - _in3;
++ * =============================================================================
++ */
++#define LASX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                            \
++    _out0 = __lasx_xvadd_b(_in0, _in3);                                        \
++    _out1 = __lasx_xvadd_b(_in1, _in2);                                        \
++    _out2 = __lasx_xvsub_b(_in1, _in2);                                        \
++    _out3 = __lasx_xvsub_b(_in0, _in3);                                        \
++  }
++#define LASX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                            \
++    _out0 = __lasx_xvadd_h(_in0, _in3);                                        \
++    _out1 = __lasx_xvadd_h(_in1, _in2);                                        \
++    _out2 = __lasx_xvsub_h(_in1, _in2);                                        \
++    _out3 = __lasx_xvsub_h(_in0, _in3);                                        \
++  }
++#define LASX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                            \
++    _out0 = __lasx_xvadd_w(_in0, _in3);                                        \
++    _out1 = __lasx_xvadd_w(_in1, _in2);                                        \
++    _out2 = __lasx_xvsub_w(_in1, _in2);                                        \
++    _out3 = __lasx_xvsub_w(_in0, _in3);                                        \
++  }
++#define LASX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
++  {                                                                            \
++    _out0 = __lasx_xvadd_d(_in0, _in3);                                        \
++    _out1 = __lasx_xvadd_d(_in1, _in2);                                        \
++    _out2 = __lasx_xvsub_d(_in1, _in2);                                        \
++    _out3 = __lasx_xvsub_d(_in0, _in3);                                        \
++  }
++
++/*
++ * =============================================================================
++ * Description : Butterfly of 8 input vectors
++ * Arguments   : Inputs  - _in0, _in1, _in2, _in3, ~
++ *               Outputs - _out0, _out1, _out2, _out3, ~
++ * Details     : Butterfly operation
++ * Example     : LASX_BUTTERFLY_8
++ *               _out0 = _in0 + _in7;
++ *               _out1 = _in1 + _in6;
++ *               _out2 = _in2 + _in5;
++ *               _out3 = _in3 + _in4;
++ *               _out4 = _in3 - _in4;
++ *               _out5 = _in2 - _in5;
++ *               _out6 = _in1 - _in6;
++ *               _out7 = _in0 - _in7;
++ * =============================================================================
++ */
++#define LASX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    _out0 = __lasx_xvadd_b(_in0, _in7);                                     \
++    _out1 = __lasx_xvadd_b(_in1, _in6);                                     \
++    _out2 = __lasx_xvadd_b(_in2, _in5);                                     \
++    _out3 = __lasx_xvadd_b(_in3, _in4);                                     \
++    _out4 = __lasx_xvsub_b(_in3, _in4);                                     \
++    _out5 = __lasx_xvsub_b(_in2, _in5);                                     \
++    _out6 = __lasx_xvsub_b(_in1, _in6);                                     \
++    _out7 = __lasx_xvsub_b(_in0, _in7);                                     \
++  }
++
++#define LASX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    _out0 = __lasx_xvadd_h(_in0, _in7);                                     \
++    _out1 = __lasx_xvadd_h(_in1, _in6);                                     \
++    _out2 = __lasx_xvadd_h(_in2, _in5);                                     \
++    _out3 = __lasx_xvadd_h(_in3, _in4);                                     \
++    _out4 = __lasx_xvsub_h(_in3, _in4);                                     \
++    _out5 = __lasx_xvsub_h(_in2, _in5);                                     \
++    _out6 = __lasx_xvsub_h(_in1, _in6);                                     \
++    _out7 = __lasx_xvsub_h(_in0, _in7);                                     \
++  }
++
++#define LASX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    _out0 = __lasx_xvadd_w(_in0, _in7);                                     \
++    _out1 = __lasx_xvadd_w(_in1, _in6);                                     \
++    _out2 = __lasx_xvadd_w(_in2, _in5);                                     \
++    _out3 = __lasx_xvadd_w(_in3, _in4);                                     \
++    _out4 = __lasx_xvsub_w(_in3, _in4);                                     \
++    _out5 = __lasx_xvsub_w(_in2, _in5);                                     \
++    _out6 = __lasx_xvsub_w(_in1, _in6);                                     \
++    _out7 = __lasx_xvsub_w(_in0, _in7);                                     \
++  }
++
++#define LASX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,  \
++                           _out0, _out1, _out2, _out3, _out4, _out5, _out6, \
++                           _out7)                                           \
++  {                                                                         \
++    _out0 = __lasx_xvadd_d(_in0, _in7);                                     \
++    _out1 = __lasx_xvadd_d(_in1, _in6);                                     \
++    _out2 = __lasx_xvadd_d(_in2, _in5);                                     \
++    _out3 = __lasx_xvadd_d(_in3, _in4);                                     \
++    _out4 = __lasx_xvsub_d(_in3, _in4);                                     \
++    _out5 = __lasx_xvsub_d(_in2, _in5);                                     \
++    _out6 = __lasx_xvsub_d(_in1, _in6);                                     \
++    _out7 = __lasx_xvsub_d(_in0, _in7);                                     \
++  }
++
++#endif  // LASX
++
++/*
++ * =============================================================================
++ * Description : Print out elements in vector.
++ * Arguments   : Inputs  - RTYPE, _element_num, _in0, _enter
++ *               Outputs -
++ * Details     : Print out '_element_num' elements in 'RTYPE' vector '_in0', if
++ *               '_enter' is TRUE, prefix "\nVP:" will be added first.
++ * Example     : VECT_PRINT(v4i32,4,in0,1); // in0: 1,2,3,4
++ *               VP:1,2,3,4,
++ * =============================================================================
++ */
++#define VECT_PRINT(RTYPE, element_num, in0, enter)                 \
++  {                                                                \
++    RTYPE _tmp0 = (RTYPE)in0;                                      \
++    int _i = 0;                                                    \
++    if (enter) printf("\nVP:");                                    \
++    for (_i = 0; _i < element_num; _i++) printf("%d,", _tmp0[_i]); \
++  }
++
++#endif /* LOONGSON_INTRINSICS_H */
+diff --git a/pixman/meson.build b/pixman/meson.build
+index 62ec66b..2f515d3 100644
+--- a/pixman/meson.build
++++ b/pixman/meson.build
+@@ -59,6 +59,8 @@ simds = [
+    ['pixman-arma64-neon-asm.S', 'pixman-arma64-neon-asm-bilinear.S']],
+   ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags,
+    ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']],
++  ['lsx', have_lsx, lsx_flags, []],
++  ['lasx', have_lasx, lasx_flags, []],
+ ]
+ 
+ foreach simd : simds
+@@ -85,6 +87,7 @@ pixman_files = files(
+   'pixman-mips.c',
+   'pixman-arm.c',
+   'pixman-ppc.c',
++  'pixman-loongarch.c',
+   'pixman-edge.c',
+   'pixman-edge-accessors.c',
+   'pixman-fast-path.c',
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index 69fa70b..c769311 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -399,6 +399,7 @@ _pixman_choose_implementation (void)
+     imp = _pixman_arm_get_implementations (imp);
+     imp = _pixman_ppc_get_implementations (imp);
+     imp = _pixman_mips_get_implementations (imp);
++    imp = _pixman_loongarch_get_implementations (imp);
+ 
+     imp = _pixman_implementation_create_noop (imp);
+ 
+diff --git a/pixman/pixman-lasx.c b/pixman/pixman-lasx.c
+new file mode 100644
+index 0000000..d6d0169
+--- /dev/null
++++ b/pixman/pixman-lasx.c
+@@ -0,0 +1,4887 @@
++/*
++ * Copyright © 2023 Loongson Technology Corporation Limited
++ * Contributed by Shiyou Yin(yinshiyou-hf@loongson.cn)
++ *                Lu Wang(wanglu@loongson.cn)
++ *                Song Ding(songding@loongson.cn)
++ *
++ * Permission to use, copy, modify, distribute, and sell this software and its
++ * documentation for any purpose is hereby granted without fee, provided that
++ * the above copyright notice appear in all copies and that both that
++ * copyright notice and this permission notice appear in supporting
++ * documentation, and that the name of Red Hat not be used in advertising or
++ * publicity pertaining to distribution of the software without specific,
++ * written prior permission. Red Hat makes no representations about the
++ * suitability of this software for any purpose. It is provided "as is"
++ * without implied warranty.
++ *
++ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
++ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
++ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
++ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include "pixman-private.h"
++#include "pixman-combine32.h"
++#include "loongson_intrinsics.h"
++
++static __m256i mask_0080;
++static __m256i mask_00ff;
++static __m256i mask_0101;
++static __m256i mask_ffff;
++static __m256i mask_ff000000;
++static __m256i mask_alpha;
++
++static __m256i mask_565_r;
++static __m256i mask_565_g1, mask_565_g2;
++static __m256i mask_565_b;
++static __m256i mask_red;
++static __m256i mask_green;
++static __m256i mask_blue;
++
++static __m256i mask_565_fix_rb;
++static __m256i mask_565_fix_g;
++
++static __m256i mask_565_rb;
++static __m256i mask_565_pack_multiplier;
++
++static force_inline __m256i
++create_mask_16_256 (uint16_t mask)
++{
++    return __lasx_xvrepli_h (mask);
++}
++
++static force_inline __m256i
++create_mask_1x32_256 (uint32_t mask)
++{
++    return __lasx_xvreplgr2vr_w (mask);
++}
++
++static force_inline __m256i
++create_mask_1x64_256 (int64_t mask)
++{
++    return __lasx_xvreplgr2vr_d (mask);
++}
++
++static force_inline uint32_t
++over (uint32_t src, uint32_t dest)
++{
++    uint32_t a = ~src >> 24;
++
++    UN8x4_MUL_UN8_ADD_UN8x4(dest, a, src);
++
++    return dest;
++}
++
++static force_inline uint32_t
++in (uint32_t x, uint8_t  y)
++{
++    uint16_t a = y;
++
++    UN8x4_MUL_UN8(x, a);
++
++    return x;
++}
++
++static force_inline uint32_t
++combine_mask(const uint32_t *src, const uint32_t *mask, int i)
++{
++    uint32_t s, m;
++
++    if (mask) {
++        m = *(mask + i) >> A_SHIFT;
++        if (!m)
++            return 0;
++    }
++    s = *(src + i);
++    if (mask)
++       UN8x4_MUL_UN8(s, m);
++    return s;
++}
++
++static void
++combine_mask_ca(uint32_t *src, uint32_t *mask)
++{
++    uint32_t a = *mask;
++    uint32_t x;
++    uint16_t xa;
++
++    if (!a) {
++        *(src) = 0;
++        return;
++    }
++
++    x = *(src);
++    if (a == ~0) {
++        x = x >> A_SHIFT;
++        x |= x << G_SHIFT;
++        x |= x << R_SHIFT;
++        *(mask) = x;
++        return;
++    }
++    xa = x >> A_SHIFT;
++    UN8x4_MUL_UN8x4(x, a);
++    *(src) = x;
++
++    UN8x4_MUL_UN8(a, xa);
++    *(mask) = a;
++}
++
++static void
++combine_mask_value_ca(uint32_t *src, const uint32_t *mask)
++{
++    uint32_t a = *mask;
++    uint32_t x;
++
++    if (!a) {
++        *(src) = 0;
++        return;
++    }
++
++    if (a == ~0)
++        return;
++
++    x = *(src);
++    UN8x4_MUL_UN8x4(x, a);
++    *(src) = x;
++}
++
++static void
++combine_mask_alpha_ca(const uint32_t *src, uint32_t *mask)
++{
++    uint32_t a = *(mask);
++    uint32_t x;
++
++    if (!a)
++        return;
++    x = *(src) >> A_SHIFT;
++
++    if (x == MASK)
++        return;
++
++    if (a == -1) {
++        x |= x << G_SHIFT;
++        x |= x << R_SHIFT;
++        *(mask) = x;
++        return;
++    }
++    UN8x4_MUL_UN8(a, x);
++    *(mask) = a;
++}
++
++/* Compute the product of two unsigned fixed-point 8-bit values from 0 to 1
++ * and map its result to the same range.
++ *
++ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
++ * Notation, Notation, Notation", the first of which is
++ *
++ *   prod(a, b) = (a * b + 128) / 255.
++ *
++ * By approximating the division by 255 as 257/65536, it can be replaced by a
++ * multiply and a right shift. This is the implementation that we use in
++ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
++ * 3DNow!, and unavailable at the time of the book's publication) to perform
++ * the multiplication and right shift in a single operation.
++ *
++ *   prod(a, b) = ((a * b + 128) * 257) >> 16.
++ *
++ * A third way (how pix_multiply() was implemented prior to 14208344) exists
++ * also that performs the multiplication by 257 with adds and shifts.
++ *
++ * Where temp = a * b + 128
++ *
++ *   prod(a, b) = (temp + (temp >> 8)) >> 8.
++ *
++ * The lasx_pix_multiply(src, mask) implemented with the third way, and caculates
++ * two sets of data each time.
++ */
++
++static force_inline __m256i
++lasx_pix_multiply (__m256i data, __m256i alpha)
++{
++    return __lasx_xvmuh_hu (__lasx_xvmadd_h(mask_0080, data, alpha),
++                            mask_0101);
++}
++
++static force_inline __m256i
++lasx_over_u(__m256i src, __m256i dest)
++{
++    __m256i r1, r2, r3, t;
++    __m256i rb_mask          = __lasx_xvreplgr2vr_w(0x00ff00ff);
++    __m256i rb_one_half      = __lasx_xvreplgr2vr_w(0x00800080);
++    __m256i rb_mask_plus_one = __lasx_xvreplgr2vr_w(0x10000100);
++    __m256i a                = __lasx_xvsrli_w(__lasx_xvnor_v(src, src), 24);
++
++    r1 = __lasx_xvand_v(dest, rb_mask);
++    r1 = __lasx_xvmadd_w(rb_one_half, r1, a);
++    t  = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r1, 8));
++    r1 = __lasx_xvadd_w(r1, t);
++    r1 = __lasx_xvsrli_w(r1, 8);
++    r1 = __lasx_xvand_v(r1, rb_mask);
++    r2 = __lasx_xvand_v(src, rb_mask);
++
++    r1 = __lasx_xvadd_w(r1, r2);
++    t  = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r1, 8));
++    r1 = __lasx_xvor_v(r1, __lasx_xvsub_w(rb_mask_plus_one, t));
++    r1 = __lasx_xvand_v(r1, rb_mask);
++
++    r2 = __lasx_xvsrli_w(dest, 8);
++    r2 = __lasx_xvand_v(r2, rb_mask);
++    r2 = __lasx_xvmadd_w(rb_one_half, r2, a);
++    t  = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r2, 8));
++    r2 = __lasx_xvadd_w(r2, t);
++    r2 = __lasx_xvsrli_w(r2, 8);
++    r2 = __lasx_xvand_v(r2, rb_mask);
++    r3 = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(src, 8));
++
++    r2 = __lasx_xvadd_w(r2, r3);
++    t  = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r2, 8));
++    r2 = __lasx_xvor_v(r2, __lasx_xvsub_w(rb_mask_plus_one, t));
++    r2 = __lasx_xvand_v(r2, rb_mask);
++
++    t  = __lasx_xvor_v(r1, __lasx_xvslli_w(r2, 8));
++
++    return t;
++}
++
++static force_inline __m256i
++lasx_in_u(__m256i x, __m256i a)
++{
++    __m256i r1, r2, t;
++    __m256i rb_mask     = __lasx_xvreplgr2vr_w(0xff00ff);
++    __m256i rb_one_half = __lasx_xvreplgr2vr_w(0x800080);
++
++    r1 = __lasx_xvand_v(x, rb_mask);
++    r1 = __lasx_xvmadd_w(rb_one_half, r1, a);
++    t  = __lasx_xvand_v(__lasx_xvsrli_w(r1, 8), rb_mask);
++    r1 = __lasx_xvadd_w(r1, t);
++    r1 = __lasx_xvsrli_w(r1, 8);
++    r1 = __lasx_xvand_v(r1, rb_mask);
++    r2 = __lasx_xvsrli_w(x, 8);
++
++    r2 = __lasx_xvand_v(r2, rb_mask);
++    r2 = __lasx_xvmadd_w(rb_one_half, r2, a);
++    t  = __lasx_xvand_v(__lasx_xvsrli_w(r2, 8), rb_mask);
++    r2 = __lasx_xvadd_w(r2, t);
++    r2 = __lasx_xvsrli_w(r2, 8);
++    r2 = __lasx_xvand_v(r2, rb_mask);
++
++    t  = __lasx_xvor_v(r1, __lasx_xvslli_w(r2, 8));
++
++    return t;
++}
++
++static void
++lasx_combine_src_u (pixman_implementation_t *imp,
++                    pixman_op_t              op,
++                    uint32_t *               dest,
++                    const uint32_t *         src,
++                    const uint32_t *         mask,
++                    int                      width)
++{
++    __m256i src0, mask0, dest0;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3, tmp0, tmp1;
++
++    if(mask) {
++        while (width >= 8) {
++            src0 = __lasx_xvld(src, 0);
++            mask0 = __lasx_xvld(mask, 0);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++        for (int i = 0; i < width; ++i) {
++            uint32_t s = combine_mask(src, mask, i);
++            *dest++ = s;
++        }
++    } else {
++        while (width >= 8) {
++            src0 = __lasx_xvld(src, 0);
++            __lasx_xvst(src0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++        if (width) {
++            memcpy (dest, src, width * sizeof (uint32_t));
++        }
++    }
++}
++
++static void
++lasx_combine_over_u_mask (uint32_t *dest,
++                          const uint32_t *src,
++                          const uint32_t *mask,
++                          int width)
++{
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    __m256i src0, mask0, dest0, dest1;
++    __m256i tmp0, tmp1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3, out4, out5;
++
++    while (width > 7) {
++        src0 = __lasx_xvld(src, 0);
++        dest0 = __lasx_xvld(dest, 0);
++        mask0 = __lasx_xvld(mask, 0);
++        tmp0 = __lasx_xvilvl_b(zero, src0);
++        tmp1 = __lasx_xvilvh_b(zero, src0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(zero, mask0);
++        tmp1 = __lasx_xvilvh_b(zero, mask0);
++        out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out1 = __lasx_xvshuf4i_h(out1, 0xff);
++        out3 = __lasx_xvshuf4i_h(out3, 0xff);
++        out0 = lasx_pix_multiply(out0, out1);
++        out2 = lasx_pix_multiply(out2, out3);
++        out1 = __lasx_xvxor_v(out0, bit_set);
++        out3 = __lasx_xvxor_v(out2, bit_set);
++        out1 = __lasx_xvshuf4i_h(out1, 0xff);
++        out3 = __lasx_xvshuf4i_h(out3, 0xff);
++        tmp0 = __lasx_xvilvl_b(zero, dest0);
++        tmp1 = __lasx_xvilvh_b(zero, dest0);
++        out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out4 = lasx_pix_multiply(out4, out1);
++        out5 = lasx_pix_multiply(out5, out3);
++
++        dest0 = __lasx_xvpickev_b(out2, out0);
++        dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++        dest1 = __lasx_xvpickev_b(out5, out4);
++        dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++        dest0 = __lasx_xvsadd_bu(dest0, dest1);
++        __lasx_xvst(dest0, dest, 0);
++        width -= 8;
++        mask  += 8;
++        src   += 8;
++        dest  += 8;
++    }
++
++    for (int i = 0; i < width; ++i) {
++       uint32_t m = ALPHA_8 (*(mask + i));
++       if (m == 0xFF) {
++           uint32_t s = *(src + i);
++           uint32_t a = ALPHA_8 (s);
++           if (a == 0xFF) {
++               *(dest + i) = s;
++           } else if (s) {
++               uint32_t d = *(dest + i);
++               uint32_t ia = a ^ 0xFF;
++               UN8x4_MUL_UN8_ADD_UN8x4(d, ia, s);
++               *(dest + i) = d;
++           }
++       } else if (m) {
++           uint32_t s = *(src + i);
++           if (s) {
++               uint32_t d = *(dest + i);
++               UN8x4_MUL_UN8(s, m);
++               UN8x4_MUL_UN8_ADD_UN8x4(d, ALPHA_8 (~s), s);
++               *(dest + i) = d;
++           }
++       }
++    }
++}
++
++static force_inline __m256i
++over_1x256 (__m256i src, __m256i alpha, __m256i dst)
++{
++    alpha = __lasx_xvxor_v(alpha, mask_00ff);
++    alpha = lasx_pix_multiply(dst, alpha);
++    return __lasx_xvsadd_bu (src, alpha);
++}
++
++static force_inline uint32_t
++core_combine_over_u32 (uint32_t src, uint32_t dst)
++{
++    uint8_t a = src >> 24;
++
++    if (a == 0xff) {
++        return src;
++    }
++    else if (src) {
++        __m256i zero = __lasx_xvldi(0);
++        __m256i xr_src = __lasx_xvinsgr2vr_w (zero, src, 0);
++        __m256i xr_dst = __lasx_xvinsgr2vr_w (zero, dst, 0);
++        __m256i xr_alpha;
++        __m256i tmp;
++
++        xr_src = __lasx_xvilvl_b (zero, xr_src);
++        xr_dst = __lasx_xvilvl_b (zero, xr_dst);
++        xr_alpha = __lasx_xvshuf4i_h (xr_src, 0xff);
++
++        tmp = __lasx_xvpickev_b (zero, over_1x256 (xr_src, xr_alpha, xr_dst));
++
++        return __lasx_xvpickve2gr_wu (tmp, 0);
++    }
++
++    return dst;
++}
++
++static void
++lasx_combine_over_u_no_mask (uint32_t *dst, const uint32_t *src, int width)
++{
++    __m256i zero = __lasx_xvldi(0);
++
++    while (width >= 8) {
++        __m256i xv_src, xv_dst;
++        __m256i xv_src_ev, xv_src_od;
++        __m256i alpha;
++        __m256i xv_dst_ev, xv_dst_od;
++
++        xv_src = __lasx_xvld(src, 0);
++        xv_dst = __lasx_xvld(dst, 0);
++
++        /* unpack src: 1x256 to 2x256 */
++        xv_src_ev = __lasx_xvpackev_b(zero, xv_src);
++        xv_src_od = __lasx_xvpackod_b(zero, xv_src);
++
++        /* expand alpha */
++        alpha = __lasx_xvshuf4i_h(xv_src_od, 0xf5);
++
++        /* unpack dst: 1x256 to 2x256 */
++        xv_dst_ev = __lasx_xvpackev_b(zero, xv_dst);
++        xv_dst_od = __lasx_xvpackod_b(zero, xv_dst);
++
++        xv_dst_ev = over_1x256(xv_src_ev, alpha, xv_dst_ev);
++        xv_dst_od = over_1x256(xv_src_od, alpha, xv_dst_od);
++
++        xv_dst = __lasx_xvpackev_b(xv_dst_od, xv_dst_ev);
++
++        __lasx_xvst(xv_dst, dst, 0);
++        width -= 8;
++        src += 8;
++        dst += 8;
++    }
++
++    while (width--) {
++        uint32_t s = *src;
++        uint32_t d = *dst;
++
++        *dst = core_combine_over_u32 (s, d);
++
++        ++src;
++        ++dst;
++    }
++}
++
++static void
++lasx_combine_over_u (pixman_implementation_t *imp,
++                     pixman_op_t              op,
++                     uint32_t *               dest,
++                     const uint32_t *         src,
++                     const uint32_t *         mask,
++                     int                      width)
++{
++    if (mask) {
++        lasx_combine_over_u_mask (dest, src, mask, width);
++    }
++    else {
++        lasx_combine_over_u_no_mask (dest, src, width);
++    }
++}
++
++
++static void
++lasx_combine_over_reverse_u (pixman_implementation_t *imp,
++                             pixman_op_t              op,
++                             uint32_t *               dest,
++                             const uint32_t *         src,
++                             const uint32_t *         mask,
++                             int                      width)
++{
++    __m256i src0, mask0, dest0, dest1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3, out4, out5;
++    __m256i tmp0, tmp1;
++
++    if (mask) {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            mask0 = __lasx_xvld(mask, 0);
++
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++
++            dest1 = __lasx_xvxori_b(dest0, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, dest1);
++            tmp1 = __lasx_xvilvh_b(zero, dest1);
++            out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out4 = __lasx_xvshuf4i_h(out4, 0xff);
++            out5 = __lasx_xvshuf4i_h(out5, 0xff);
++            out0 = lasx_pix_multiply(out0, out4);
++            out2 = lasx_pix_multiply(out2, out5);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            dest1 = __lasx_xvpickev_b(out3, out1);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    } else {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            dest1 = __lasx_xvxori_b(dest0, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, dest1);
++            tmp1 = __lasx_xvilvh_b(zero, dest1);
++            out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out4 = __lasx_xvshuf4i_h(out4, 0xff);
++            out5 = __lasx_xvshuf4i_h(out5, 0xff);
++            out0 = lasx_pix_multiply(out0, out4);
++            out2 = lasx_pix_multiply(out2, out5);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            dest1 = __lasx_xvpickev_b(out3, out1);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t ia = ALPHA_8 (~*(dest + i));
++        UN8x4_MUL_UN8_ADD_UN8x4(s, ia, d);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lasx_combine_out_u (pixman_implementation_t *imp,
++                    pixman_op_t              op,
++                    uint32_t *               dest,
++                    const uint32_t *         src,
++                    const uint32_t *         mask,
++                    int                      width)
++{
++    __m256i src0, mask0, dest0;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3;
++    __m256i tmp0, tmp1;
++
++    if(mask) {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            mask0 = __lasx_xvld(mask, 0);
++
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++
++            dest0 = __lasx_xvxori_b(dest0, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    } else {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            dest0 = __lasx_xvxori_b(dest0, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            __lasx_xvst(dest0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t a = ALPHA_8 (~*(dest + i));
++        UN8x4_MUL_UN8(s, a);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lasx_combine_out_reverse_u (pixman_implementation_t *imp,
++                            pixman_op_t              op,
++                            uint32_t *               dest,
++                            const uint32_t *         src,
++                            const uint32_t *         mask,
++                            int                      width)
++{
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    __m256i src0, mask0, dest0;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3;
++    __m256i tmp0, tmp1;
++
++    if(mask) {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            mask0 = __lasx_xvld(mask, 0);
++
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            out1 = __lasx_xvxor_v(out0, bit_set);
++            out3 = __lasx_xvxor_v(out2, bit_set);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    } else {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvxor_v(out0, bit_set);
++            out3 = __lasx_xvxor_v(out2, bit_set);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++            dest0 = __lasx_xvpickev_b(out2, out0);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            __lasx_xvst(dest0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    }
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t a = ALPHA_8 (~s);
++        UN8x4_MUL_UN8 (d, a);
++        *(dest + i) = d;
++    }
++}
++
++static void
++lasx_combine_add_u (pixman_implementation_t *imp,
++                    pixman_op_t              op,
++                    uint32_t *               dest,
++                    const uint32_t *         src,
++                    const uint32_t *         mask,
++                    int                      width)
++{
++    __m256i src0, mask0, dest0, dest1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3;
++    __m256i tmp0, tmp1;
++
++    if (mask) {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            mask0 = __lasx_xvld(mask, 0);
++
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++
++            dest1 = __lasx_xvpickev_b(out2, out0);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    } else {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            dest1 = __lasx_xvpickev_b(out2, out0);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        UN8x4_ADD_UN8x4(d, s);
++        *(dest + i) = d;
++    }
++}
++
++/*
++ * Multiply
++ *
++ *      ad * as * B(d / ad, s / as)
++ *    = ad * as * d/ad * s/as
++ *    = d * s
++ *
++ */
++static void
++lasx_combine_multiply_u (pixman_implementation_t *imp,
++                         pixman_op_t              op,
++                         uint32_t *               dest,
++                         const uint32_t *         src,
++                         const uint32_t *         mask,
++                         int                      width)
++{
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    __m256i src0, mask0, dest0, dest1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3, out4, out5, out6, out7;
++    __m256i tmp0, tmp1;
++
++    if (mask) {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            mask0 = __lasx_xvld(mask, 0);
++
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, mask0);
++            tmp1 = __lasx_xvilvh_b(zero, mask0);
++            out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            out0 = lasx_pix_multiply(out0, out1);
++            out2 = lasx_pix_multiply(out2, out3);
++
++            out1 = __lasx_xvxor_v(out0, bit_set);
++            out3 = __lasx_xvxor_v(out2, bit_set);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            dest1 = __lasx_xvxori_b(dest0, 0xff);
++            dest1 = __lasx_xvshuf4i_b(dest1, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, dest1);
++            tmp1 = __lasx_xvilvh_b(zero, dest1);
++            out6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out6 = lasx_pix_multiply(out0, out6);
++            out7 = lasx_pix_multiply(out2, out7);
++            out1 = lasx_pix_multiply(out4, out1);
++            out3 = lasx_pix_multiply(out5, out3);
++            dest0 = __lasx_xvpickev_b(out7, out6);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            dest1 = __lasx_xvpickev_b(out3, out1);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++
++            out4 = lasx_pix_multiply(out4, out0);
++            out5 = lasx_pix_multiply(out5, out2);
++            dest1 = __lasx_xvpickev_b(out5, out4);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            mask  += 8;
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    } else {
++        while (width > 7) {
++            src0 = __lasx_xvld(src, 0);
++            dest0 = __lasx_xvld(dest, 0);
++            tmp0 = __lasx_xvilvl_b(zero, src0);
++            tmp1 = __lasx_xvilvh_b(zero, src0);
++            out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out1 = __lasx_xvxor_v(out0, bit_set);
++            out3 = __lasx_xvxor_v(out2, bit_set);
++            out1 = __lasx_xvshuf4i_h(out1, 0xff);
++            out3 = __lasx_xvshuf4i_h(out3, 0xff);
++            dest1 = __lasx_xvxori_b(dest0, 0xff);
++            dest1 = __lasx_xvshuf4i_b(dest1, 0xff);
++            tmp0 = __lasx_xvilvl_b(zero, dest0);
++            tmp1 = __lasx_xvilvh_b(zero, dest0);
++            out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            tmp0 = __lasx_xvilvl_b(zero, dest1);
++            tmp1 = __lasx_xvilvh_b(zero, dest1);
++            out6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++            out7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++            out6 = lasx_pix_multiply(out0, out6);
++            out7 = lasx_pix_multiply(out2, out7);
++            out1 = lasx_pix_multiply(out4, out1);
++            out3 = lasx_pix_multiply(out5, out3);
++            dest0 = __lasx_xvpickev_b(out7, out6);
++            dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++            dest1 = __lasx_xvpickev_b(out3, out1);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++
++            out4 = lasx_pix_multiply(out4, out0);
++            out5 = lasx_pix_multiply(out5, out2);
++            dest1 = __lasx_xvpickev_b(out5, out4);
++            dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++            dest0 = __lasx_xvsadd_bu(dest0, dest1);
++            __lasx_xvst(dest0, dest, 0);
++            width -= 8;
++            src   += 8;
++            dest  += 8;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t ss = s;
++        uint32_t src_ia = ALPHA_8(~s);
++        uint32_t dest_ia = ALPHA_8(~d);
++
++        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(ss, dest_ia, d, src_ia);
++        UN8x4_MUL_UN8x4(d, s);
++        UN8x4_ADD_UN8x4(d, ss);
++
++        *(dest + i) = d;
++    }
++}
++
++static void
++lasx_combine_src_ca (pixman_implementation_t *imp,
++                     pixman_op_t              op,
++                     uint32_t *               dest,
++                     const uint32_t *         src,
++                     const uint32_t *         mask,
++                     int                      width)
++{
++    __m256i src0, mask0, dest0;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3;
++    __m256i tmp0, tmp1;
++
++    while (width > 7) {
++        src0 = __lasx_xvld(src, 0);
++        mask0 = __lasx_xvld(mask, 0);
++        tmp0 = __lasx_xvilvl_b(zero, src0);
++        tmp1 = __lasx_xvilvh_b(zero, src0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(zero, mask0);
++        tmp1 = __lasx_xvilvh_b(zero, mask0);
++        out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out0 = lasx_pix_multiply(out0, out1);
++        out2 = lasx_pix_multiply(out2, out3);
++        dest0 = __lasx_xvpickev_b(out2, out0);
++        dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++        __lasx_xvst(dest0, dest, 0);
++        mask  += 8;
++        width -= 8;
++        src   += 8;
++        dest  += 8;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        combine_mask_value_ca(&s, &m);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lasx_combine_over_ca (pixman_implementation_t  *imp,
++                      pixman_op_t               op,
++                      uint32_t *                dest,
++                      const uint32_t *          src,
++                      const uint32_t *          mask,
++                      int                       width)
++{
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    __m256i src0, mask0, dest0, dest1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3, out4, out5;
++    __m256i tmp0, tmp1;
++
++    while (width > 7) {
++        src0 = __lasx_xvld(src, 0);
++        dest0 = __lasx_xvld(dest, 0);
++        mask0 = __lasx_xvld(mask, 0);
++
++        tmp0 = __lasx_xvilvl_b(zero, src0);
++        tmp1 = __lasx_xvilvh_b(zero, src0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(zero, mask0);
++        tmp1 = __lasx_xvilvh_b(zero, mask0);
++        out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out4 = lasx_pix_multiply(out0, out1);
++        out5 = lasx_pix_multiply(out2, out3);
++        out0 = __lasx_xvshuf4i_h(out0, 0xff);
++        out2 = __lasx_xvshuf4i_h(out2, 0xff);
++        out1 = lasx_pix_multiply(out1, out0);
++        out3 = lasx_pix_multiply(out3, out2);
++
++        out1 = __lasx_xvxor_v(out1, bit_set);
++        out3 = __lasx_xvxor_v(out3, bit_set);
++        tmp0 = __lasx_xvilvl_b(zero, dest0);
++        tmp1 = __lasx_xvilvh_b(zero, dest0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out1 = lasx_pix_multiply(out1, out0);
++        out3 = lasx_pix_multiply(out3, out2);
++
++        dest0 = __lasx_xvpickev_b(out5, out4);
++        dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++        dest1 = __lasx_xvpickev_b(out3, out1);
++        dest1 = __lasx_xvpermi_d(dest1, 0xd8);
++        dest0 = __lasx_xvsadd_bu(dest0, dest1);
++        __lasx_xvst(dest0, dest, 0);
++        mask  += 8;
++        width -= 8;
++        src   += 8;
++        dest  += 8;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        uint32_t a;
++
++        combine_mask_ca (&s, &m);
++        a = ~m;
++        if (a) {
++            uint32_t d = *(dest + i);
++            UN8x4_MUL_UN8x4_ADD_UN8x4(d, a, s);
++            s = d;
++        }
++        *(dest + i) = s;
++    }
++}
++
++static void
++lasx_combine_out_reverse_ca (pixman_implementation_t *imp,
++                             pixman_op_t              op,
++                             uint32_t *               dest,
++                             const uint32_t *         src,
++                             const uint32_t *         mask,
++                             int                      width)
++{
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    __m256i src0, mask0, dest0;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i out0, out1, out2, out3;
++    __m256i tmp0, tmp1;
++
++    while (width > 7) {
++        src0 = __lasx_xvld(src, 0);
++        dest0 = __lasx_xvld(dest, 0);
++        mask0 = __lasx_xvld(mask, 0);
++
++        tmp0 = __lasx_xvilvl_b(zero, src0);
++        tmp1 = __lasx_xvilvh_b(zero, src0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(zero, mask0);
++        tmp1 = __lasx_xvilvh_b(zero, mask0);
++        out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out0 = __lasx_xvshuf4i_h(out0, 0xff);
++        out2 = __lasx_xvshuf4i_h(out2, 0xff);
++        out1 = lasx_pix_multiply(out1, out0);
++        out3 = lasx_pix_multiply(out3, out2);
++
++        out1 = __lasx_xvxor_v(out1, bit_set);
++        out3 = __lasx_xvxor_v(out3, bit_set);
++        tmp0 = __lasx_xvilvl_b(zero, dest0);
++        tmp1 = __lasx_xvilvh_b(zero, dest0);
++        out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        out1 = lasx_pix_multiply(out1, out0);
++        out3 = lasx_pix_multiply(out3, out2);
++        dest0 = __lasx_xvpickev_b(out3, out1);
++        dest0 = __lasx_xvpermi_d(dest0, 0xd8);
++        __lasx_xvst(dest0, dest, 0);
++        mask  += 8;
++        width -= 8;
++        src   += 8;
++        dest  += 8;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        uint32_t a;
++
++        combine_mask_alpha_ca(&s, &m);
++        a = ~m;
++
++        if (a != ~0) {
++            uint32_t d = 0;
++
++            if (a) {
++                d = *(dest + i);
++                UN8x4_MUL_UN8x4(d, a);
++            }
++            *(dest + i) = d;
++        }
++    }
++}
++
++/*
++ *   w : length in bytes
++ */
++static void force_inline
++lasx_blt_one_line_u8 (uint8_t *pDst, uint8_t *pSrc, int w)
++{
++    while (((uintptr_t)pDst & 31) && w) {
++        *pDst = *pSrc;
++        pSrc += 1;
++        pDst += 1;
++        w -= 1;
++    }
++
++    while (w >= 64) {
++        __m256i src0, src1;
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++
++        w -= 64;
++        pSrc += 64;
++        pDst += 64;
++    }
++
++    if (w >= 32) {
++        __lasx_xvst(__lasx_xvld(pSrc, 0), pDst, 0);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    while (w >= 8) {
++        *(uint64_t *)pDst = *(uint64_t *)pSrc;
++
++        w -= 8;
++        pSrc += 8;
++        pDst += 8;
++    }
++
++    while (w--) {
++        /* copy one bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++/*
++ *   w : length in half word
++ */
++static void
++lasx_blt_one_line_u16 (uint16_t *pDst, uint16_t *pSrc, int w)
++{
++    /* align the dst to 32 byte */
++    while (((uintptr_t)pDst & 31) && w) {
++        *pDst++ = *pSrc++;
++        --w;
++    }
++
++    while (w >= 64) {
++        /* copy 128 bytes */
++        __m256i src0, src1, src2, src3;
++
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++        src2 = __lasx_xvld(pSrc, 64);
++        src3 = __lasx_xvld(pSrc, 96);
++
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++        __lasx_xvst(src2, pDst, 64);
++        __lasx_xvst(src3, pDst, 96);
++
++        w -= 64;
++        pSrc += 64;
++        pDst += 64;
++    }
++
++    if (w >= 32) {
++        /* copy 64 bytes */
++        __m256i src0, src1;
++
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    if (w >= 16) {
++        /* copy 32 bytes */
++        __lasx_xvst(__lasx_xvld(pSrc, 0), pDst, 0);
++
++        w -= 16;
++        pSrc += 16;
++        pDst += 16;
++    }
++
++    while (w--) {
++        /* copy 2 bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++/*
++ *   w : length in word
++ */
++static force_inline void
++lasx_blt_one_line_u32 (uint32_t *pDst, uint32_t *pSrc, int w)
++{
++    /* align the dst to 32 byte */
++    while (((uintptr_t)pDst & 31) && w) {
++        *pDst++ = *pSrc++;
++        --w;
++    }
++
++    while (w >= 64) {
++        __m256i src0, src1, src2, src3;
++        __m256i src4, src5, src6, src7;
++
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++        src2 = __lasx_xvld(pSrc, 64);
++        src3 = __lasx_xvld(pSrc, 96);
++        src4 = __lasx_xvld(pSrc, 128);
++        src5 = __lasx_xvld(pSrc, 160);
++        src6 = __lasx_xvld(pSrc, 192);
++        src7 = __lasx_xvld(pSrc, 224);
++
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++        __lasx_xvst(src2, pDst, 64);
++        __lasx_xvst(src3, pDst, 96);
++        __lasx_xvst(src4, pDst, 128);
++        __lasx_xvst(src5, pDst, 160);
++        __lasx_xvst(src6, pDst, 192);
++        __lasx_xvst(src7, pDst, 224);
++
++        w -= 64;
++        pSrc += 64;
++        pDst += 64;
++    }
++
++    if (w >= 32) {
++        /* copy 32 bytes once a time */
++        __m256i src0, src1, src2, src3;
++
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++        src2 = __lasx_xvld(pSrc, 64);
++        src3 = __lasx_xvld(pSrc, 96);
++
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++        __lasx_xvst(src2, pDst, 64);
++        __lasx_xvst(src3, pDst, 96);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    if (w >= 16) {
++        /* copy 64 bytes once a time */
++        __m256i src0, src1;
++
++        src0 = __lasx_xvld(pSrc, 0);
++        src1 = __lasx_xvld(pSrc, 32);
++
++        __lasx_xvst(src0, pDst, 0);
++        __lasx_xvst(src1, pDst, 32);
++
++        w -= 16;
++        pSrc += 16;
++        pDst += 16;
++    }
++
++    if (w >= 8) {
++        __m256i src;
++        /* copy 32 bytes once a time */
++        src = __lasx_xvld(pSrc, 0);
++        __lasx_xvst(src, pDst, 0);
++
++        w -= 8;
++        pSrc += 8;
++        pDst += 8;
++    }
++
++    while (w--) {
++        /* copy 4 bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++static pixman_bool_t
++lasx_blt (pixman_implementation_t *imp,
++          uint32_t *               src_bits,
++          uint32_t *               dst_bits,
++          int                      src_stride,
++          int                      dst_stride,
++          int                      src_bpp,
++          int                      dst_bpp,
++          int                      src_x,
++          int                      src_y,
++          int                      dest_x,
++          int                      dest_y,
++          int                      width,
++          int                      height)
++{
++    if (src_bpp != dst_bpp)
++        return FALSE;
++
++    if (src_bpp == 8) {
++        uint8_t *src_b = (uint8_t *)src_bits;
++        uint8_t *dst_b = (uint8_t *)dst_bits;
++
++        src_stride = src_stride * 4;
++        dst_stride = dst_stride * 4;
++
++        src_b += src_stride * src_y + src_x;
++        dst_b += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lasx_blt_one_line_u8 (dst_b, src_b, width);
++            dst_b += dst_stride;
++            src_b += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    if (src_bpp == 16) {
++        uint16_t *src_h = (uint16_t *)src_bits;
++        uint16_t *dst_h = (uint16_t *)dst_bits;
++
++        src_stride = src_stride * 2;
++        dst_stride = dst_stride * 2;
++
++        src_h += src_stride * src_y + src_x;
++        dst_h += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lasx_blt_one_line_u16 (dst_h, src_h, width);
++            dst_h += dst_stride;
++            src_h += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    if (src_bpp == 32) {
++        src_bits += src_stride * src_y + src_x;
++        dst_bits += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lasx_blt_one_line_u32 (dst_bits, src_bits, width);
++            dst_bits += dst_stride;
++            src_bits += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    return FALSE;
++}
++
++static void
++lasx_fill_u8 (uint8_t  *dst,
++              int       stride,
++              int       x,
++              int       y,
++              int       width,
++              int       height,
++              uint8_t   filler)
++{
++    __m256i xvfill = __lasx_xvreplgr2vr_b(filler);
++    int byte_stride = stride * 4;
++    dst += y * byte_stride + x;
++
++    while (height--) {
++        int w = width;
++        uint8_t *d = dst;
++
++        while (w && ((uintptr_t)d & 31)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 64) {
++            __lasx_xvst(xvfill, d, 0);
++            __lasx_xvst(xvfill, d, 32);
++            w -= 64;
++            d += 64;
++        }
++
++        if (w >= 32) {
++            __lasx_xvst(xvfill, d, 0);
++            w -= 32;
++            d += 32;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        dst += byte_stride;
++    }
++}
++
++static void
++lasx_fill_u16 (uint16_t *dst,
++               int       stride,
++               int       x,
++               int       y,
++               int       width,
++               int       height,
++               uint16_t  filler)
++{
++    __m256i xvfill = __lasx_xvreplgr2vr_h(filler);
++    int short_stride = stride * 2;
++    dst += y * short_stride + x;
++
++    while (height--) {
++        int w = width;
++        uint16_t *d = dst;
++
++        while (w && ((uintptr_t)d & 31)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 32) {
++            __lasx_xvst(xvfill, d, 0);
++            __lasx_xvst(xvfill, d, 32);
++            w -= 32;
++            d += 32;
++        }
++
++        if (w >= 16) {
++            __lasx_xvst(xvfill, d, 0);
++            w -= 16;
++            d += 16;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        dst += short_stride;
++    }
++}
++
++static void
++lasx_fill_u32 (uint32_t *bits,
++               int       stride,
++               int       x,
++               int       y,
++               int       width,
++               int       height,
++               uint32_t  filler)
++{
++    __m256i xvfill = __lasx_xvreplgr2vr_w(filler);
++    bits += y * stride + x;
++
++    while (height--) {
++        int w = width;
++        uint32_t *d = bits;
++
++        while (w && ((uintptr_t)d & 31)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 32) {
++            __lasx_xvst(xvfill, d, 0);
++            __lasx_xvst(xvfill, d, 32);
++            __lasx_xvst(xvfill, d, 64);
++            __lasx_xvst(xvfill, d, 96);
++            w -= 32;
++            d += 32;
++        }
++
++        if (w >= 16) {
++            __lasx_xvst(xvfill, d, 0);
++            __lasx_xvst(xvfill, d, 32);
++            w -= 16;
++            d += 16;
++        }
++
++        if (w >= 8) {
++            __lasx_xvst(xvfill, d, 0);
++            w -= 8;
++            d += 8;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        bits += stride;
++    }
++}
++
++static pixman_bool_t
++lasx_fill (pixman_implementation_t *imp,
++           uint32_t *               bits,
++           int                      stride,
++           int                      bpp,
++           int                      x,
++           int                      y,
++           int                      width,
++           int                      height,
++           uint32_t                 filler)
++{
++    switch (bpp) {
++        case 8:
++            lasx_fill_u8 ((uint8_t *)bits, stride, x, y, width, height, (uint8_t)filler);
++            return TRUE;
++
++        case 16:
++            lasx_fill_u16 ((uint16_t *)bits, stride, x, y, width, height, (uint16_t)filler);
++            return TRUE;
++
++        case 32:
++            lasx_fill_u32 (bits, stride, x, y, width, height, filler);
++            return TRUE;
++
++        default:
++            return FALSE;
++    }
++
++    return TRUE;
++}
++
++
++static void
++lasx_composite_over_n_8_8888 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src, srca;
++    uint32_t *dst_line, *dst, d;
++    uint8_t  *mask_line, *mask, m;
++    int dst_stride, mask_stride;
++    int32_t w;
++    v8u32 vsrca, vsrc;
++    __m256i vff;
++
++    src   = _pixman_image_get_solid(imp, src_image, dest_image->bits.format);
++    vsrc  = (v8u32)__lasx_xvreplgr2vr_w(src);
++    srca  = src >> 24;
++    vsrca = (v8u32)__lasx_xvreplgr2vr_w(srca);
++    vff   = __lasx_xvreplgr2vr_w(0xff);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 8) {
++            v8u32 ma = {mask[0], mask[1], mask[2], mask[3],
++                        mask[4], mask[5], mask[6], mask[7]};
++
++            if (__lasx_xbnz_w(__lasx_xvseqi_w((__m256i)ma, 0xff))){
++                if (__lasx_xbnz_w(__lasx_xvseqi_w(vsrca, 0xff)))
++                    *(__m256i*) dst = (__m256i)vsrc;
++                else if (__lasx_xbnz_w(__lasx_xvsub_w((__m256i)ma, vff)))
++                    *(__m256i*) dst = lasx_over_u((__m256i)vsrc, *(__m256i*)dst);
++            } else if (__lasx_xbnz_w((__m256i)ma)) {
++                __m256i d0 = lasx_in_u((__m256i)vsrc, (__m256i)ma);
++                *(__m256i*) dst = lasx_over_u(d0, *(__m256i*)dst);
++            } else {
++                for(int i = 0; i < 8; i++) {
++                    if (mask[i] == 0xff) {
++                        if (vsrca[i] == 0xff)
++                            *(dst + i) = vsrc[i];
++                        else
++                            *(dst + i) = over(vsrc[i], *(dst + i));
++                    } else if (mask[i]) {
++                        m = mask[i];
++                        d = in(vsrc[i], m);
++                        *(dst + i) = over(d, *(dst + i));
++                    }
++                }
++            }
++            dst += 8;
++            w -= 8;
++            mask += 8;
++        }
++
++        while (w--) {
++            m = *mask++;
++            if (m == 0xff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (m) {
++                d = in(src, m);
++                *dst = over(d, *dst);
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_add_8_8 (pixman_implementation_t *imp,
++                        pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *src_line, *src;
++    int dst_stride, src_stride;
++    int32_t w;
++    uint16_t t;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        src = src_line;
++
++        dst_line += dst_stride;
++        src_line += src_stride;
++        w = width;
++
++        lasx_combine_add_u(imp, op, (uint32_t *)dst, (uint32_t *)src, NULL, w >> 2);
++        dst += w & 0xfffc;
++        src += w & 0xfffc;
++        w &= 3;
++
++        while (w--) {
++            t = (*dst) + (*src++);
++            *dst++ = t | (0 - (t >> 8));
++        }
++    }
++}
++
++static void
++lasx_composite_add_8888_8888 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line;
++    uint32_t *src_line;
++    int dst_stride, src_stride;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        lasx_combine_add_u(imp, op, dst_line, src_line, NULL, width);
++        dst_line += dst_stride;
++        src_line += src_stride;
++    }
++}
++
++static void
++lasx_composite_over_8888_8888 (pixman_implementation_t *imp,
++                               pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    int dst_stride, src_stride;
++    uint32_t *dst_line;
++    uint32_t *src_line;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        lasx_combine_over_u_no_mask (dst_line, src_line, width);
++        dst_line += dst_stride;
++        src_line += src_stride;
++    }
++}
++
++static void
++lasx_composite_copy_area (pixman_implementation_t *imp,
++                          pixman_composite_info_t *info)
++{
++    bits_image_t src_bits, dst_bits;
++    src_bits = info->src_image->bits;
++    dst_bits = info->dest_image->bits;
++    lasx_blt (imp, src_bits.bits,
++              dst_bits.bits,
++              src_bits.rowstride,
++              dst_bits.rowstride,
++              PIXMAN_FORMAT_BPP (src_bits.format),
++              PIXMAN_FORMAT_BPP (dst_bits.format),
++              info->src_x, info->src_y, info->dest_x,
++              info->dest_y, info->width, info->height);
++}
++
++static void
++lasx_composite_src_x888_0565 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint16_t *dst_line, *dst;
++    uint32_t *src_line, *src, s;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m256i src0, src1;
++    __m256i rb0, rb1, t0, t1, g0, g1;
++    __m256i tmp;
++    __m256i mask_565_rb = __lasx_xvreplgr2vr_w(0x00f800f8);
++    __m256i mask_multiplier = __lasx_xvreplgr2vr_w(0x20000004);
++    __m256i mask_green_4x32 = __lasx_xvreplgr2vr_w(0x0000fc00);
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w && (uintptr_t)src & 31) {
++            s = *src++;
++            *dst = convert_8888_to_0565(s);
++            dst++;
++            w--;
++        }
++
++        while (w >= 16) {
++            src0 = __lasx_xvld(src, 0);
++            src1 = __lasx_xvld(src, 32);
++            src += 16;
++            w -= 16;
++
++            rb0 = src0 & mask_565_rb;
++            rb1 = src1 & mask_565_rb;
++            t0 = __lasx_xvdp2_w_h(rb0, mask_multiplier);
++            t1 = __lasx_xvdp2_w_h(rb1, mask_multiplier);
++            g0 = src0 & mask_green_4x32;
++            g1 = src1 & mask_green_4x32;
++            t0 |= g0;
++            t1 |= g1;
++            t0 = __lasx_xvslli_w(t0, 11);
++            t1 = __lasx_xvslli_w(t1, 11);
++            t0 = __lasx_xvsrai_w(t0, 16);
++            t1 = __lasx_xvsrai_w(t1, 16);
++            t0 = __lasx_xvsat_h(t0, 15);
++            t1 = __lasx_xvsat_h(t1, 15);
++            tmp = __lasx_xvpickev_h(t1, t0);
++            tmp = __lasx_xvpermi_d(tmp, 0xd8);
++            __lasx_xvst(tmp, dst, 0);
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            src0 = __lasx_xvld(src, 0);
++            src += 8;
++            w -= 8;
++
++            rb0 = src0 & mask_565_rb;
++            t0 = __lasx_xvdp2_w_h(rb0, mask_multiplier);
++            g0 = src0 & mask_green_4x32;
++            t0 |= g0;
++            t0 = __lasx_xvslli_w(t0, 11);
++            t0 = __lasx_xvsrai_w(t0, 16);
++            t0 = __lasx_xvsat_h(t0, 15);
++            tmp = __lasx_xvpickev_h(t0, t0);
++            __lasx_xvstelm_d(tmp, dst, 0, 0);
++            __lasx_xvstelm_d(tmp, dst, 8, 2);
++            dst += 8;
++        }
++
++        while (w--) {
++            s = *src++;
++            *dst = convert_8888_to_0565(s);
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_in_n_8_8 (pixman_implementation_t *imp,
++                        pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS(info);
++    uint8_t *dst_line, *dst;
++    uint8_t *mask_line, *mask;
++    int dst_stride, mask_stride;
++    uint32_t m, src, srca;
++    int32_t w;
++    uint16_t t;
++
++    __m256i alpha, tmp;
++    __m256i vmask, vmask_lo, vmask_hi;
++    __m256i vdst, vdst_lo, vdst_hi;
++    __m256i mask_zero = __lasx_xvldi(0);
++
++    PIXMAN_IMAGE_GET_LINE(dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE(mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format);
++    srca = src >> 24;
++    alpha = __lasx_xvreplgr2vr_w(src);
++    alpha = __lasx_xvilvl_b(mask_zero, alpha);
++    alpha = __lasx_xvshuf4i_w(alpha, 0x44);
++    alpha = __lasx_xvshuf4i_h(alpha, 0xff);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 32) {
++            vmask = __lasx_xvld(mask, 0);
++            vdst = __lasx_xvld(dst, 0);
++            mask += 32;
++            w -= 32;
++
++            vmask_lo = __lasx_vext2xv_hu_bu(vmask);
++            vdst_lo = __lasx_vext2xv_hu_bu(vdst);
++            vmask_hi = __lasx_xvpermi_q(vmask, vmask, 0x03);
++            vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03);
++            vmask_hi = __lasx_vext2xv_hu_bu(vmask_hi);
++            vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi);
++            vmask_lo = lasx_pix_multiply(alpha, vmask_lo);
++            vmask_hi = lasx_pix_multiply(alpha, vmask_hi);
++            vdst_lo = lasx_pix_multiply(vmask_lo, vdst_lo);
++            vdst_hi = lasx_pix_multiply(vmask_hi, vdst_hi);
++            vdst_lo = __lasx_xvsat_bu(vdst_lo, 7);
++            vdst_hi = __lasx_xvsat_bu(vdst_hi, 7);
++            tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo);
++            tmp = __lasx_xvpermi_d(tmp, 0xd8);
++            __lasx_xvst(tmp, dst, 0);
++            dst += 32;
++        }
++
++        if (w >= 16) {
++            vmask = __lasx_xvld(mask, 0);
++            vdst = __lasx_xvld(dst, 0);
++            mask += 16;
++            w -= 16;
++
++            vmask_lo = __lasx_vext2xv_hu_bu(vmask);
++            vdst_lo = __lasx_vext2xv_hu_bu(vdst);
++            vmask_hi = __lasx_xvpermi_q(vmask, vmask, 0x03);
++            vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03);
++            vmask_hi = __lasx_vext2xv_hu_bu(vmask_hi);
++            vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi);
++            vmask_lo = lasx_pix_multiply(alpha, vmask_lo);
++            vmask_hi = lasx_pix_multiply(alpha, vmask_hi);
++            vdst_lo = lasx_pix_multiply(vmask_lo, vdst_lo);
++            vdst_hi = lasx_pix_multiply(vmask_hi, vdst_hi);
++            vdst_lo = __lasx_xvsat_bu(vdst_lo, 7);
++            vdst_hi = __lasx_xvsat_bu(vdst_hi, 7);
++            tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo);
++            __lasx_xvstelm_d(tmp, dst, 0, 0);
++            __lasx_xvstelm_d(tmp, dst, 8, 2);
++            dst += 16;
++        }
++
++        while (w--) {
++            m = *mask++;
++            m = MUL_UN8(m, srca, t);
++            if (m == 0)
++                *dst = 0;
++            else if (m != 0xff)
++                *dst = MUL_UN8(m, *dst, t);
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_in_8_8 (pixman_implementation_t *imp,
++                       pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *src_line, *src;
++    int src_stride, dst_stride;
++    int32_t w, s;
++    uint16_t t;
++
++    __m256i tmp;
++    __m256i vsrc, vsrc_lo, vsrc_hi;
++    __m256i vdst, vdst_lo, vdst_hi;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 32) {
++            vsrc = __lasx_xvld(src, 0);
++            vdst = __lasx_xvld(dst, 0);
++            src += 32;
++            w -= 32;
++
++            vsrc_lo = __lasx_vext2xv_hu_bu(vsrc);
++            vdst_lo = __lasx_vext2xv_hu_bu(vdst);
++            vsrc_hi = __lasx_xvpermi_q(vsrc, vsrc, 0x03);
++            vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03);
++            vsrc_hi = __lasx_vext2xv_hu_bu(vsrc_hi);
++            vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi);
++            vdst_lo = lasx_pix_multiply(vsrc_lo, vdst_lo);
++            vdst_hi = lasx_pix_multiply(vsrc_hi, vdst_hi);
++            vdst_lo = __lasx_xvsat_bu(vdst_lo, 7);
++            vdst_hi = __lasx_xvsat_bu(vdst_hi, 7);
++            tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo);
++            tmp = __lasx_xvpermi_d(tmp, 0xd8);
++            __lasx_xvst(tmp, dst, 0);
++            dst += 32;
++        }
++
++        if (w >= 16) {
++            vsrc = __lasx_xvld(src, 0);
++            vdst = __lasx_xvld(dst, 0);
++            src += 16;
++            w -= 16;
++
++            vsrc_lo = __lasx_vext2xv_hu_bu(vsrc);
++            vdst_lo = __lasx_vext2xv_hu_bu(vdst);
++            vsrc_hi = __lasx_xvpermi_q(vsrc, vsrc, 0x03);
++            vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03);
++            vsrc_hi = __lasx_vext2xv_hu_bu(vsrc_hi);
++            vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi);
++            vdst_lo = lasx_pix_multiply(vsrc_lo, vdst_lo);
++            vdst_hi = lasx_pix_multiply(vsrc_hi, vdst_hi);
++            vdst_lo = __lasx_xvsat_bu(vdst_lo, 7);
++            vdst_hi = __lasx_xvsat_bu(vdst_hi, 7);
++            tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo);
++            __lasx_xvstelm_d(tmp, dst, 0, 0);
++            __lasx_xvstelm_d(tmp, dst, 8, 2);
++            dst += 16;
++        }
++
++        while (w--) {
++            s = *src++;
++            if (s == 0)
++                *dst = 0;
++            else if (s != 0xff)
++                *dst = MUL_UN8(s, *dst, t);
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
++                                    pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src, srca, ns;
++    uint32_t *dst_line, *dst, nd;
++    uint32_t *mask_line, *mask, ma;
++    int dst_stride, mask_stride;
++    int32_t w;
++
++    __m256i d, m, t;
++    __m256i tmp0, tmp1;
++    __m256i s, sa, d0, d1, m0, m1, t0, t1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    srca = src >> 24;
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++    s = __lasx_xvreplgr2vr_w(src);
++    tmp0 = __lasx_xvilvl_b(zero, s);
++    tmp1 = __lasx_xvilvh_b(zero, s);
++    s = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++    sa = __lasx_xvshuf4i_h(s, 0xff);
++
++    while(height --) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 31)) {
++            ma = *mask++;
++            if (ma == 0xffffffff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (ma) {
++                nd = *dst;
++                ns = src;
++
++                UN8x4_MUL_UN8x4(ns, ma);
++                UN8x4_MUL_UN8(ma, srca);
++                ma = ~ma;
++                UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns);
++
++                *dst = nd;
++            }
++            dst++;
++            w--;
++        }
++
++        while(w >= 8) {
++            m = __lasx_xvld(mask, 0);
++            mask += 8;
++            w -= 8;
++
++            if (__lasx_xbnz_v(m)) {
++                d = __lasx_xvld(dst, 0);
++                d0 = __lasx_vext2xv_hu_bu(d);
++                m0 = __lasx_vext2xv_hu_bu(m);
++                d1 = __lasx_xvpermi_q(d, d, 0x03);
++                m1 = __lasx_xvpermi_q(m, m, 0x03);
++                d1 = __lasx_vext2xv_hu_bu(d1);
++                m1 = __lasx_vext2xv_hu_bu(m1);
++
++                t0 = lasx_pix_multiply(s, m0);
++                t1 = lasx_pix_multiply(s, m1);
++
++                m0 = lasx_pix_multiply(m0, sa);
++                m1 = lasx_pix_multiply(m1, sa);
++                m0 = __lasx_xvxor_v(m0, bit_set);
++                m1 = __lasx_xvxor_v(m1, bit_set);
++                d0 = lasx_pix_multiply(d0, m0);
++                d1 = lasx_pix_multiply(d1, m1);
++
++                d = __lasx_xvpickev_b(d1, d0);
++                t = __lasx_xvpickev_b(t1, t0);
++                d = __lasx_xvpermi_d(d, 0xd8);
++                t = __lasx_xvpermi_d(t, 0xd8);
++                d = __lasx_xvsadd_bu(d, t);
++                __lasx_xvst(d, dst, 0);
++            }
++            dst += 8;
++        }
++
++        while(w--) {
++            ma = *mask++;
++            if (ma == 0xffffffff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (ma) {
++                nd = *dst;
++                ns = src;
++
++                UN8x4_MUL_UN8x4(ns, ma);
++                UN8x4_MUL_UN8(ma, srca);
++                ma = ~ma;
++                UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns);
++
++                *dst = nd;
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
++                                    pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint32_t *dst_line, *dst;
++    int dst_stride;
++    int32_t w;
++
++    __m256i d, t;
++    __m256i s, d0, d1;
++    __m256i tmp0, tmp1;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i bit_set = __lasx_xvreplgr2vr_h(0xff);
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    s = __lasx_xvreplgr2vr_w(src);
++    tmp0 = __lasx_xvilvl_b(zero, s);
++    tmp1 = __lasx_xvilvh_b(zero, s);
++    s = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++
++    while (height--)
++    {
++        dst = dst_line;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 31)) {
++            d = __lasx_xvldrepl_w(dst, 0);
++            d0 = __lasx_vext2xv_hu_bu(d);
++            d0 = __lasx_xvshuf4i_h(d0, 0xff);
++            d0 = __lasx_xvxor_v(d0, bit_set);
++            d0 = lasx_pix_multiply(d0, s);
++            t = __lasx_xvpickev_b(zero, d0);
++            t = __lasx_xvpermi_d(t, 0xd8);
++            d = __lasx_xvsadd_bu(d, t);
++            __lasx_xvstelm_w(d, dst, 0, 0);
++            dst += 1;
++            w--;
++        }
++
++        while (w >= 8) {
++            d = __lasx_xvld(dst, 0);
++            w -= 8;
++
++            d0 = __lasx_vext2xv_hu_bu(d);
++            d1 = __lasx_xvpermi_q(d, d, 0x03);
++            d1 = __lasx_vext2xv_hu_bu(d1);
++            d0 = __lasx_xvshuf4i_h(d0, 0xff);
++            d1 = __lasx_xvshuf4i_h(d1, 0xff);
++            d0 = __lasx_xvxor_v(d0, bit_set);
++            d1 = __lasx_xvxor_v(d1, bit_set);
++            d0 = lasx_pix_multiply(d0, s);
++            d1 = lasx_pix_multiply(d1, s);
++            t = __lasx_xvpickev_b(d1, d0);
++            t = __lasx_xvpermi_d(t, 0xd8);
++            d = __lasx_xvsadd_bu(d, t);
++            __lasx_xvst(d, dst, 0);
++            dst += 8;
++        }
++
++        while (w--) {
++            d = __lasx_xvldrepl_w(dst, 0);
++            d0 = __lasx_vext2xv_hu_bu(d);
++            d0 = __lasx_xvshuf4i_h(d0, 0xff);
++            d0 = __lasx_xvxor_v(d0, bit_set);
++            d0 = lasx_pix_multiply(d0, s);
++            t = __lasx_xvpickev_b(zero, d0);
++            t = __lasx_xvpermi_d(t, 0xd8);
++            d = __lasx_xvsadd_bu(d, t);
++            __lasx_xvstelm_w(d, dst, 0, 0);
++            dst += 1;
++        }
++    }
++}
++
++static void
++lasx_composite_src_x888_8888 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    int32_t w;
++    int dst_stride, src_stride;
++    __m256i mask = mask_ff000000;
++    __m256i vsrc0, vsrc1, vsrc2, vsrc3, vsrc4, vsrc5, vsrc6, vsrc7;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 31)) {
++            *dst++ = *src++ | 0xff000000;
++            w--;
++        }
++
++        while (w >= 64) {
++            vsrc0 = __lasx_xvld(src, 0);
++            vsrc1 = __lasx_xvld(src, 32);
++            vsrc2 = __lasx_xvld(src, 64);
++            vsrc3 = __lasx_xvld(src, 96);
++            vsrc4 = __lasx_xvld(src, 128);
++            vsrc5 = __lasx_xvld(src, 160);
++            vsrc6 = __lasx_xvld(src, 192);
++            vsrc7 = __lasx_xvld(src, 224);
++            vsrc0 = __lasx_xvor_v(vsrc0, mask);
++            vsrc1 = __lasx_xvor_v(vsrc1, mask);
++            vsrc2 = __lasx_xvor_v(vsrc2, mask);
++            vsrc3 = __lasx_xvor_v(vsrc3, mask);
++            vsrc4 = __lasx_xvor_v(vsrc4, mask);
++            vsrc5 = __lasx_xvor_v(vsrc5, mask);
++            vsrc6 = __lasx_xvor_v(vsrc6, mask);
++            vsrc7 = __lasx_xvor_v(vsrc7, mask);
++            __lasx_xvst(vsrc0, dst, 0);
++            __lasx_xvst(vsrc1, dst, 32);
++            __lasx_xvst(vsrc2, dst, 64);
++            __lasx_xvst(vsrc3, dst, 96);
++            __lasx_xvst(vsrc4, dst, 128);
++            __lasx_xvst(vsrc5, dst, 160);
++            __lasx_xvst(vsrc6, dst, 192);
++            __lasx_xvst(vsrc7, dst, 224);
++
++            src += 64;
++            w -= 64;
++            dst += 64;
++        }
++
++        if (w >= 32) {
++            vsrc0 = __lasx_xvld(src, 0);
++            vsrc1 = __lasx_xvld(src, 32);
++            vsrc2 = __lasx_xvld(src, 64);
++            vsrc3 = __lasx_xvld(src, 96);
++            vsrc0 = __lasx_xvor_v(vsrc0, mask);
++            vsrc1 = __lasx_xvor_v(vsrc1, mask);
++            vsrc2 = __lasx_xvor_v(vsrc2, mask);
++            vsrc3 = __lasx_xvor_v(vsrc3, mask);
++            __lasx_xvst(vsrc0, dst, 0);
++            __lasx_xvst(vsrc1, dst, 32);
++            __lasx_xvst(vsrc2, dst, 64);
++            __lasx_xvst(vsrc3, dst, 96);
++
++            src += 32;
++            w -= 32;
++            dst += 32;
++        }
++
++        if (w >= 16) {
++            vsrc0 = __lasx_xvld(src, 0);
++            vsrc1 = __lasx_xvld(src, 32);
++            vsrc0 = __lasx_xvor_v(vsrc0, mask);
++            vsrc1 = __lasx_xvor_v(vsrc1, mask);
++            __lasx_xvst(vsrc0, dst, 0);
++            __lasx_xvst(vsrc1, dst, 32);
++
++            src += 16;
++            w -= 16;
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            vsrc0 = __lasx_xvld(src, 0);
++            vsrc0 = __lasx_xvor_v(vsrc0, mask);
++            __lasx_xvst(vsrc0, dst, 0);
++
++            src += 8;
++            w -= 8;
++            dst += 8;
++        }
++
++        while (w--) {
++            *dst++ = *src++ | 0xff000000;
++        }
++    }
++}
++
++static void
++lasx_composite_add_n_8_8 (pixman_implementation_t *imp,
++                          pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *mask_line, *mask;
++    int dst_stride, mask_stride;
++    int32_t w;
++    uint32_t src;
++    uint16_t sa;
++
++    __m256i d0;
++    __m256i vsrc, t0, t1;
++    __m256i a0, a0_l, a0_h;
++    __m256i b0, b0_l, b0_h;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i one_half = __lasx_xvreplgr2vr_h(0x80);
++    __m256i g_shift  = __lasx_xvreplgr2vr_h(8);
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    sa = (src >> 24);
++    vsrc = __lasx_xvreplgr2vr_h(sa);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 32) {
++            a0 = __lasx_xvld(mask, 0);
++            w -= 32;
++            mask += 32;
++
++            a0_l = __lasx_vext2xv_hu_bu(a0);
++            a0_h = __lasx_xvpermi_q(a0, a0, 0x03);
++            a0_h = __lasx_vext2xv_hu_bu(a0_h);
++
++            a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc);
++            a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc);
++
++            a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l);
++            a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h);
++
++            a0_l = __lasx_xvsrl_h(a0_l, g_shift);
++            a0_h = __lasx_xvsrl_h(a0_h, g_shift);
++
++            b0 = __lasx_xvld(dst, 0);
++            b0_l = __lasx_vext2xv_hu_bu(b0);
++            b0_h = __lasx_xvpermi_q(b0, b0, 0x03);
++            b0_h = __lasx_vext2xv_hu_bu(b0_h);
++
++            t0 = __lasx_xvadd_h(a0_l, b0_l);
++            t1 = __lasx_xvadd_h(a0_h, b0_h);
++
++            t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift)));
++            t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift)));
++
++            t0 = __lasx_xvsat_hu(t0, 7);
++            t1 = __lasx_xvsat_hu(t1 ,7);
++
++            d0 = __lasx_xvpickev_b(t1, t0);
++            d0 = __lasx_xvpermi_d(d0, 0xd8);
++            __lasx_xvst(d0, dst, 0);
++            dst += 32;
++        }
++
++        while (w >= 16) {
++            a0 = __lasx_xvld(mask, 0);
++            w -= 16;
++            mask += 16;
++
++            a0_l = __lasx_vext2xv_hu_bu(a0);
++            a0_h = __lasx_xvpermi_q(a0, a0, 0x03);
++            a0_h = __lasx_vext2xv_hu_bu(a0_h);
++
++            a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc);
++            a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc);
++
++            a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l);
++            a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h);
++
++            a0_l = __lasx_xvsrl_h(a0_l, g_shift);
++            a0_h = __lasx_xvsrl_h(a0_h, g_shift);
++
++            b0 = __lasx_xvld(dst, 0);
++            b0_l = __lasx_vext2xv_hu_bu(b0);
++            a0_h = __lasx_xvpermi_q(b0, b0, 0x03);
++            b0_h = __lasx_vext2xv_hu_bu(b0_h);
++
++            t0 = __lasx_xvadd_h(a0_l, b0_l);
++            t1 = __lasx_xvadd_h(a0_h, b0_h);
++
++            t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift)));
++            t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift)));
++
++            t0 = __lasx_xvsat_hu(t0, 7);
++            t1 = __lasx_xvsat_hu(t1 ,7);
++
++            d0 = __lasx_xvpickev_b(t1, t0);
++            __lasx_xvstelm_d(d0, dst, 0, 0);
++            __lasx_xvstelm_d(d0, dst, 8, 2);
++            dst += 16;
++        }
++
++        while (w >= 8) {
++            a0 = __lasx_xvld(mask, 0);
++            w -= 8;
++            mask += 8;
++
++            a0_l = __lasx_vext2xv_hu_bu(a0);
++            a0_h = __lasx_xvpermi_q(a0, a0, 0x03);
++            a0_h = __lasx_vext2xv_hu_bu(a0_h);
++
++            a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc);
++            a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc);
++
++            a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l);
++            a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h);
++
++            a0_l = __lasx_xvsrl_h(a0_l, g_shift);
++            a0_h = __lasx_xvsrl_h(a0_h, g_shift);
++
++            b0 = __lasx_xvld(dst, 0);
++            b0_l = __lasx_vext2xv_hu_bu(b0);
++            a0_h = __lasx_xvpermi_q(b0, b0, 0x03);
++            b0_h = __lasx_vext2xv_hu_bu(b0_h);
++
++            t0 = __lasx_xvadd_h(a0_l, b0_l);
++            t1 = __lasx_xvadd_h(a0_h, b0_h);
++
++            t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift)));
++            t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift)));
++
++            t0 = __lasx_xvsat_hu(t0, 7);
++            t1 = __lasx_xvsat_hu(t1 ,7);
++
++            d0 = __lasx_xvpickev_b(t1, t0);
++            __lasx_xvstelm_d(d0, dst, 0, 0);
++            dst += 8;
++        }
++
++        while (w >= 4) {
++            a0 = __lasx_xvld(mask, 0);
++            w -= 4;
++            mask += 4;
++
++            a0_l = __lasx_vext2xv_hu_bu(a0);
++            a0_h = __lasx_xvpermi_q(a0, a0, 0x03);
++            a0_h = __lasx_vext2xv_hu_bu(a0_h);
++
++            a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc);
++            a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc);
++
++            a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l);
++            a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h);
++
++            a0_l = __lasx_xvsrl_h(a0_l, g_shift);
++            a0_h = __lasx_xvsrl_h(a0_h, g_shift);
++
++            b0 = __lasx_xvld(dst, 0);
++            b0_l = __lasx_vext2xv_hu_bu(b0);
++            a0_h = __lasx_xvpermi_q(b0, b0, 0x03);
++            b0_h = __lasx_vext2xv_hu_bu(b0_h);
++
++            t0 = __lasx_xvadd_h(a0_l, b0_l);
++            t1 = __lasx_xvadd_h(a0_h, b0_h);
++
++            t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift)));
++            t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift)));
++
++            t0 = __lasx_xvsat_hu(t0, 7);
++            t1 = __lasx_xvsat_hu(t1 ,7);
++
++            d0 = __lasx_xvpickev_b(t1, t0);
++            __lasx_xvstelm_w(d0, dst, 0, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++             uint16_t tmp;
++             uint16_t a;
++             uint32_t m, d;
++             uint32_t r;
++
++             a = *mask++;
++             d = *dst;
++
++             m = MUL_UN8 (sa, a, tmp);
++             r = ADD_UN8 (m, d, tmp);
++
++             *dst++ = r;
++        }
++    }
++}
++
++static void
++lasx_composite_add_n_8 (pixman_implementation_t *imp,
++                        pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    int dst_stride;
++    int32_t w;
++    uint32_t src;
++
++    __m256i vsrc, d0, d1;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    src >>= 24;
++
++    if (src == 0x00)
++        return;
++
++    if (src == 0xff) {
++        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
++                     8, dest_x, dest_y, width, height, 0xff);
++        return;
++    }
++
++    vsrc = __lasx_xvreplgr2vr_b(src);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 31)) {
++            d0 = __lasx_xvldrepl_b(dst, 0);
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_b(d0, dst, 0, 0);
++            dst++;
++            w--;
++        }
++
++        while (w >= 64) {
++            d0 = __lasx_xvld(dst, 0);
++            d1 = __lasx_xvld(dst, 32);
++            w -= 64;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            d1 = __lasx_xvsadd_bu(vsrc, d1);
++            __lasx_xvst(d0, dst, 0);
++            __lasx_xvst(d1, dst, 32);
++            dst += 64;
++        }
++
++        if (w >= 32) {
++            d0 = __lasx_xvld(dst, 0);
++            w -= 32;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvst(d0, dst, 0);
++            dst += 32;
++        }
++
++        if (w >= 8) {
++            d0 = __lasx_xvldrepl_d(dst, 0);
++            w -= 8;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_d(d0, dst, 0, 0);
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            d0 = __lasx_xvldrepl_w(dst, 0);
++            w -= 4;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_w(d0, dst, 0, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            d0 = __lasx_xvldrepl_b(dst, 0);
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_b(d0, dst, 0, 0);
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_add_n_8888 (pixman_implementation_t *imp,
++                           pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst, src;
++    int dst_stride, w;
++
++    __m256i vsrc, d0, d1;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    if (src == 0)
++        return;
++
++    if (src == ~0) {
++        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
++                     dest_x, dest_y, width, height, ~0);
++        return;
++    }
++
++    vsrc = __lasx_xvreplgr2vr_w(src);
++
++    while (height--) {
++        w = width;
++
++        dst = dst_line;
++        dst_line += dst_stride;
++
++        while (w && (uintptr_t)dst & 31) {
++            d0 = __lasx_xvldrepl_w(dst, 0);
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_w(d0, dst, 0, 0);
++            dst++;
++            w--;
++        }
++
++        while (w >= 16) {
++            d0 = __lasx_xvld(dst, 0);
++            d1 = __lasx_xvld(dst, 32);
++            w -= 16;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            d1 = __lasx_xvsadd_bu(vsrc, d1);
++            __lasx_xvst(d0, dst, 0);
++            __lasx_xvst(d1, dst, 32);
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            d0 = __lasx_xvld(dst, 0);
++            w -= 8;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvst(d0, dst, 0);
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            d0 = __lasx_xvld(dst, 0);
++            w -= 4;
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_d(d0, dst, 0, 0);
++            __lasx_xvstelm_d(d0, dst, 8, 1);
++            dst += 4;
++        }
++
++        while (w--) {
++            d0 = __lasx_xvldrepl_w(dst, 0);
++            d0 = __lasx_xvsadd_bu(vsrc, d0);
++            __lasx_xvstelm_w(d0, dst, 0, 0);
++            dst++;
++        }
++    }
++}
++
++static force_inline __m256i
++unpack_32_1x256(uint32_t data)
++{
++    __m256i zero = __lasx_xvldi(0);
++    __m256i tmp = __lasx_xvinsgr2vr_w(zero, data, 0);
++    return __lasx_xvilvl_b(zero, tmp);
++}
++
++static force_inline __m256i
++unpack_32_2x256(uint32_t data)
++{
++    __m256i tmp0, out0;
++    __m256i zero = __lasx_xvldi(0);
++    tmp0 = __lasx_xvinsgr2vr_w(tmp0, data, 0);
++    tmp0 = __lasx_xvpermi_q(tmp0, tmp0, 0x20);
++    out0 = __lasx_xvilvl_b(zero, tmp0);
++
++    return out0;
++}
++
++static force_inline __m256i
++expand_pixel_32_1x256(uint32_t data)
++{
++    return __lasx_xvshuf4i_w(unpack_32_1x256(data), 0x44);
++}
++
++static force_inline __m256i
++expand_pixel_32_2x256(uint32_t data)
++{
++    return __lasx_xvshuf4i_w(unpack_32_2x256(data), 0x44);
++}
++
++static force_inline __m256i
++expand_alpha_1x256(__m256i data)
++{
++    return __lasx_xvshuf4i_h(data, 0xff);
++}
++
++static force_inline __m256i
++expand_alphaa_2x256(__m256i data)
++{
++    __m256i tmp0;
++    tmp0 = __lasx_xvshuf4i_h(data, 0xff);
++    tmp0 = __lasx_xvpermi_q(tmp0, tmp0, 0x20);
++
++    return tmp0;
++}
++
++static force_inline __m256i
++unpack_565_to_8888(__m256i lo)
++{
++    __m256i r, g, b, rb, t;
++    __m256i mask_green_4x32 = __lasx_xvreplgr2vr_w(0x0000fc00);
++    __m256i mask_red_4x32   = __lasx_xvreplgr2vr_w(0x00f80000);
++    __m256i mask_blue_4x32  = __lasx_xvreplgr2vr_w(0x000000f8);
++    __m256i mask_565_fix_rb = __lasx_xvreplgr2vr_w(0x00e000e0);
++    __m256i mask_565_fix_g  = __lasx_xvreplgr2vr_w(0x0000c000);
++
++    r  = __lasx_xvslli_w(lo, 8);
++    r  = __lasx_xvand_v(r, mask_red_4x32);
++    g  = __lasx_xvslli_w(lo, 5);
++    g  = __lasx_xvand_v(g, mask_green_4x32);
++    b  = __lasx_xvslli_w(lo, 3);
++    b  = __lasx_xvand_v(b, mask_blue_4x32);
++
++    rb = __lasx_xvor_v(r, b);
++    t  = __lasx_xvand_v(rb, mask_565_fix_rb);
++    t  = __lasx_xvsrli_w(t, 5);
++    rb = __lasx_xvor_v(rb, t);
++
++    t  = __lasx_xvand_v(g, mask_565_fix_g);
++    t  = __lasx_xvsrli_w(t, 6);
++    g  = __lasx_xvor_v(g, t);
++
++    return (__lasx_xvor_v(rb, g));
++}
++
++static force_inline void
++unpack_256_2x256(__m256i data, __m256i *data_lo, __m256i *data_hi)
++{
++    __m256i mask_zero = __lasx_xvldi(0);
++    *data_lo = __lasx_xvilvl_b(mask_zero, data);
++    *data_hi = __lasx_xvilvh_b(mask_zero, data);
++}
++
++static force_inline void
++unpack_565_256_4x256(__m256i data, __m256i *data0,
++                     __m256i *data1, __m256i *data2, __m256i *data3)
++{
++    __m256i lo, hi;
++    __m256i zero = __lasx_xvldi(0);
++    lo = __lasx_xvilvl_h(zero, data);
++    hi = __lasx_xvilvh_h(zero, data);
++    lo = unpack_565_to_8888(lo);
++    hi = unpack_565_to_8888(hi);
++
++    unpack_256_2x256((__m256i)lo, (__m256i*)data0, (__m256i*)data1);
++    unpack_256_2x256((__m256i)hi, (__m256i*)data2, (__m256i*)data3);
++}
++
++static force_inline void
++negate_2x256(__m256i data_lo, __m256i data_hi, __m256i *neg_lo, __m256i *neg_hi)
++{
++    *neg_lo = __lasx_xvxor_v(data_lo, mask_00ff);
++    *neg_hi = __lasx_xvxor_v(data_hi, mask_00ff);
++}
++
++static force_inline void
++over_2x256(__m256i *src_lo, __m256i *src_hi, __m256i *alpha_lo,
++           __m256i *alpha_hi, __m256i *dst_lo, __m256i *dst_hi)
++{
++    __m256i t1, t2;
++    negate_2x256(*alpha_lo, *alpha_hi, &t1, &t2);
++    *dst_lo = lasx_pix_multiply(*dst_lo, t1);
++    *dst_hi = lasx_pix_multiply(*dst_hi, t2);
++    *dst_lo = __lasx_xvsadd_bu(*src_lo, *dst_lo);
++    *dst_hi = __lasx_xvsadd_bu(*src_hi, *dst_hi);
++}
++
++static force_inline __m256i
++pack_2x256_256(__m256i lo, __m256i hi)
++{
++    __m256i tmp0 = __lasx_xvsat_bu(lo, 7);
++    __m256i tmp1 = __lasx_xvsat_bu(hi, 7);
++    __m256i tmp2 = __lasx_xvpickev_b(tmp1, tmp0);
++
++    return tmp2;
++}
++
++static force_inline __m256i
++pack_565_2x256_256(__m256i lo, __m256i hi)
++{
++    __m256i data;
++    __m256i r, g1, g2, b;
++    __m256i mask_565_r  = __lasx_xvreplgr2vr_w(0x00f80000);
++    __m256i mask_565_g1 = __lasx_xvreplgr2vr_w(0x00070000);
++    __m256i mask_565_g2 = __lasx_xvreplgr2vr_w(0x000000e0);
++    __m256i mask_565_b  = __lasx_xvreplgr2vr_w(0x0000001f);
++
++    data = pack_2x256_256 (lo, hi);
++    r    = __lasx_xvand_v(data, mask_565_r);
++    g1   = __lasx_xvslli_w(data, 3) & mask_565_g1;
++    g2   = __lasx_xvsrli_w(data, 5) & mask_565_g2;
++    b    = __lasx_xvsrli_w(data, 3) & mask_565_b;
++
++    return (((r|g1)|g2)|b);
++}
++
++static force_inline __m256i
++expand565_16_1x256(uint16_t pixel)
++{
++    __m256i m;
++    __m256i zero = __lasx_xvldi(0);
++
++    m = __lasx_xvinsgr2vr_w(m, pixel, 0);
++    m = unpack_565_to_8888(m);
++    m = __lasx_xvilvl_b(zero, m);
++
++    return m;
++}
++
++static force_inline uint32_t
++pack_1x256_32(__m256i data)
++{
++    __m256i tmp0, tmp1;
++    __m256i zero = __lasx_xvldi(0);
++
++    tmp0 = __lasx_xvsat_bu(data, 7);
++    tmp1 = __lasx_xvpickev_b(zero, tmp0);
++
++    return (__lasx_xvpickve2gr_wu(tmp1, 0));
++}
++
++static force_inline uint16_t
++pack_565_32_16(uint32_t pixel)
++{
++    return (uint16_t)(((pixel >> 8) & 0xf800) |
++                      ((pixel >> 5) & 0x07e0) |
++                      ((pixel >> 3) & 0x001f));
++}
++
++static force_inline __m256i
++pack_565_4x256_256(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3)
++{
++    return pack_2x256_256(pack_565_2x256_256(*v0, *v1),
++                          pack_565_2x256_256(*v2, *v3));
++}
++
++static force_inline void
++expand_alpha_2x256(__m256i data_lo, __m256i data_hi, __m256i *alpha_lo, __m256i *alpha_hi)
++{
++    *alpha_lo = __lasx_xvshuf4i_h(data_lo, 0xff);
++    *alpha_hi = __lasx_xvshuf4i_h(data_hi, 0xff);
++}
++
++static force_inline void
++expand_alpha_rev_2x256(__m256i data_lo,  __m256i data_hi, __m256i *alpha_lo, __m256i *alpha_hi)
++{
++    *alpha_lo = __lasx_xvshuf4i_h(data_lo, 0x00);
++    *alpha_hi = __lasx_xvshuf4i_h(data_hi, 0x00);
++}
++
++static force_inline uint16_t
++composite_over_8888_0565pixel(uint32_t src, uint16_t dst)
++{
++    __m256i ms;
++    ms = unpack_32_1x256(src);
++
++    return pack_565_32_16(pack_1x256_32((__m256i)over_1x256((__m256i)ms,
++                          (__m256i)expand_alpha_1x256((__m256i)ms), expand565_16_1x256(dst))));
++}
++
++static force_inline void
++in_over_2x256(__m256i *src_lo, __m256i *src_hi, __m256i *alpha_lo, __m256i *alpha_hi,
++              __m256i *mask_lo, __m256i *mask_hi, __m256i *dst_lo, __m256i *dst_hi)
++{
++    __m256i s_lo, s_hi;
++    __m256i a_lo, a_hi;
++    s_lo = lasx_pix_multiply(*src_lo, *mask_lo);
++    s_hi = lasx_pix_multiply(*src_hi, *mask_hi);
++    a_lo = lasx_pix_multiply(*alpha_lo, *mask_lo);
++    a_hi = lasx_pix_multiply(*alpha_hi, *mask_hi);
++    over_2x256(&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
++}
++
++static force_inline __m256i
++in_over_1x256(__m256i *src, __m256i *alpha, __m256i *mask, __m256i *dst)
++{
++    return over_1x256(lasx_pix_multiply(*src, *mask),
++                      lasx_pix_multiply(*alpha, *mask), *dst);
++}
++
++static force_inline __m256i
++expand_alpha_rev_1x256(__m256i data)
++{
++    __m256i v0 = {0x00000000, 0x00000000, 0xffffffff, 0xffffffff};
++    __m256i v_hi = __lasx_xvand_v(data, v0);
++    data = __lasx_xvshuf4i_h(data, 0x00);
++    v0 = __lasx_xvnor_v(v0, v0);
++    data = __lasx_xvand_v(data, v0);
++    data = __lasx_xvor_v(data, v_hi);
++
++    return data;
++}
++
++static void
++lasx_composite_over_n_0565 (pixman_implementation_t *imp,
++                            pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    int32_t w;
++    int dst_stride;
++    __m256i vsrc, valpha;
++    __m256i vdst, vdst0, vdst1, vdst2, vdst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++
++    vsrc = expand_pixel_32_1x256(src);
++    valpha = expand_alpha_1x256(vsrc);
++
++    while (height--) {
++        dst = dst_line;
++
++        dst_line += dst_stride;
++        w = width;
++
++        while (w >= 16) {
++            vdst = __lasx_xvld(dst, 0);
++            w -= 16;
++            vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20);
++            valpha = __lasx_xvpermi_q(valpha, valpha, 0x20);
++
++            unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++            over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1);
++            over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3);
++
++            vdst = pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3);
++            __lasx_xvst(vdst, dst, 0);
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            vdst = __lasx_xvld(dst, 0);
++            w -= 8;
++            vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20);
++            valpha = __lasx_xvpermi_q(valpha, valpha, 0x20);
++
++            unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++            over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1);
++            over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3);
++
++            vdst = pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3);
++            __lasx_xvstelm_d(vdst, dst, 0, 0);
++            __lasx_xvstelm_d(vdst, dst, 8, 1);
++            dst += 8;
++        }
++
++        while (w--) {
++            d = *dst;
++            *dst++ = pack_565_32_16(pack_1x256_32(
++                                    (over_1x256(vsrc,valpha, expand565_16_1x256(d)))));
++        }
++    }
++}
++
++static void
++lasx_composite_over_8888_0565 (pixman_implementation_t *imp,
++                               pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint16_t *dst_line, *dst, d;
++    uint32_t *src_line, *src, s;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m256i valpha_lo, valpha_hi;
++    __m256i vsrc, vsrc_lo, vsrc_hi;
++    __m256i vdst, vdst0, vdst1, vdst2, vdst3;
++    __m256i dst0, dst1, dst2, dst3;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        src = src_line;
++
++        dst_line += dst_stride;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 16) {
++            vsrc = __lasx_xvld(src, 0);
++            vdst = __lasx_xvld(dst, 0);
++            w -= 16;
++
++            unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++            dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20);
++            dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20);
++            dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31);
++            dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31);
++
++            unpack_256_2x256((__m256i)vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++            expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi);
++            over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &dst0, &dst1);
++
++            vsrc = __lasx_xvld(src, 32);
++            unpack_256_2x256((__m256i)vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++            expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi);
++            over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &dst2, &dst3);
++
++            vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20);
++            vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20);
++            vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31);
++            vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31);
++
++            __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++
++            dst += 16;
++            src += 16;
++        }
++
++        while (w--) {
++            s = *src++;
++            d = *dst;
++            *dst++ = composite_over_8888_0565pixel(s, d);
++        }
++    }
++}
++
++static void
++lasx_composite_over_n_8_0565 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    uint8_t *mask_line, *p;
++    uint32_t *mask;
++    int dst_stride, mask_stride;
++    int32_t w;
++    uint32_t m;
++
++    __m256i mask_zero = __lasx_xvldi(0);
++    __m256i lasx_src, lasx_alpha, lasx_mask, lasx_dest;
++    __m256i vsrc, valpha;
++    __m256i vmask, vmaska, vmask_lo, vmask_hi;
++    __m256i vdst, vdst0, vdst1, vdst2, vdst3;
++    __m256i dst0, dst1, dst2, dst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    lasx_src = expand_pixel_32_1x256(src);
++    lasx_alpha = expand_alpha_1x256(lasx_src);
++
++    vsrc = expand_pixel_32_2x256(src);
++    valpha = expand_alphaa_2x256(vsrc);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = (void*)mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 16) {
++            vdst = __lasx_xvld(dst, 0);
++            w -= 16;
++            unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++            dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20);
++            dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20);
++            dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31);
++            dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31);
++
++            m = *mask;
++            vmaska = unpack_32_1x256(m);
++            mask += 1;
++            m = *mask;
++            vmask = unpack_32_1x256(m);
++            vmask = __lasx_xvpermi_q(vmask, vmaska, 0x20);
++            mask += 1;
++            vmask = __lasx_xvilvl_b(mask_zero, vmask);
++
++            unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi);
++            expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++            in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                          &dst0, &dst1);
++
++            m = *mask;
++            vmaska = unpack_32_1x256(m);
++            mask += 1;
++            m = *mask;
++            vmask = unpack_32_1x256(m);
++            vmask = __lasx_xvpermi_q(vmask, vmaska, 0x20);
++            mask += 1;
++            vmask = __lasx_xvilvl_b(mask_zero, vmask);
++
++            unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi);
++            expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++            in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                          &dst2, &dst3);
++
++            vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20);
++            vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20);
++            vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31);
++            vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31);
++
++            __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++
++            dst += 16;
++        }
++
++        p = (void*)mask;
++        while (w--) {
++            m = *p++;
++
++            if (m) {
++                d = *dst;
++                lasx_mask = expand_alpha_rev_1x256(unpack_32_1x256 (m));
++                lasx_dest = expand565_16_1x256(d);
++
++                *dst = pack_565_32_16(pack_1x256_32(in_over_1x256 (&lasx_src,
++                                      &lasx_alpha, &lasx_mask, &lasx_dest)));
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
++                                 pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *src, *src_line, s;
++    uint32_t *dst, *dst_line, d;
++    uint8_t  *mask_line, *p;
++    uint32_t *mask;
++    uint32_t m, w;
++    int src_stride, mask_stride, dst_stride;
++
++    __m256i mask_zero = __lasx_xvldi(0);
++    __m256i mask_4x32 = mask_ff000000;
++    __m256i vsrc, vsrc_lo, vsrc_hi;
++    __m256i vdst, vdst_lo, vdst_hi;
++    __m256i vmask, vmask_lo, vmask_hi, vmaska;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        src = src_line;
++        src_line += src_stride;
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = (void*)mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 8) {
++            m = *mask;
++            vsrc = __lasx_xvld(src, 0);
++            src += 8;
++            w -= 8;
++            vsrc |= mask_4x32;
++
++            if (m == 0xffffffff) {
++                __lasx_xvst(vsrc, dst, 0);
++            } else {
++                vdst = __lasx_xvld(dst, 0);
++                vmask = __lasx_xvilvl_b(mask_zero, unpack_32_1x256(m));
++                m = *(mask + 1);
++                vmaska = __lasx_xvilvl_b(mask_zero, unpack_32_1x256(m));
++                vmask = __lasx_xvpermi_q(vmaska, vmask, 0x20);
++
++                unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++                unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi);
++                expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++                unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi);
++
++                in_over_2x256(&vsrc_lo, &vsrc_hi, &mask_00ff, &mask_00ff,
++                              &vmask_lo, &vmask_hi, &vdst_lo, &vdst_hi);
++
++                __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0);
++            }
++            dst += 8;
++            mask += 2;
++        }
++
++        p = (void*)mask;
++        while (w--) {
++            m = *p++;
++
++            if (m) {
++                s = 0xff000000 | *src;
++
++                if (m == 0xff) {
++                    *dst = s;
++                }
++                else {
++                    __m256i ma, md, ms;
++                    d = *dst;
++                    ma = expand_alpha_rev_1x256(unpack_32_1x256(m));
++                    md = unpack_32_1x256(d);
++                    ms = unpack_32_1x256(s);
++                    *dst = pack_1x256_32(in_over_1x256(&ms, &mask_00ff, &ma, &md));
++                }
++            }
++            src++;
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
++                                 pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    uint32_t mask, maska;
++    int32_t w;
++    int dst_stride, src_stride;
++
++    __m256i vmask;
++    __m256i vsrc, vsrc_lo, vsrc_hi;
++    __m256i vdst, vdst_lo, vdst_hi;
++    __m256i valpha_lo, valpha_hi;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
++    maska = mask >> 24;
++    vmask = __lasx_xvreplgr2vr_h(maska);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 8) {
++            vsrc = __lasx_xvld(src, 0);
++            src += 8;
++            w -= 8;
++
++            if (__lasx_xbnz_v(vsrc)) {
++                vdst = __lasx_xvld(dst, 0);
++                unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++                unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi);
++                expand_alpha_2x256(vsrc_lo, vsrc_hi,  &valpha_lo, &valpha_hi);
++
++                in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi,
++                              &vmask, &vmask, &vdst_lo, &vdst_hi);
++                __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0);
++            }
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            vsrc = __lasx_xvld(src, 0);
++            src += 4;
++            w -= 4;
++
++            vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20);
++            if (__lasx_xbnz_v(vsrc)) {
++                vdst = __lasx_xvld(dst, 0);
++                unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++                unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi);
++                expand_alpha_2x256(vsrc_lo, vsrc_hi,  &valpha_lo, &valpha_hi);
++                in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi,
++                              &vmask, &vmask, &vdst_lo, &vdst_hi);
++                vdst = pack_2x256_256(vdst_lo, vdst_hi);
++                __lasx_xvstelm_d(vdst, dst, 0, 0);
++                __lasx_xvstelm_d(vdst, dst, 8, 1);
++            }
++            dst += 4;
++        }
++
++        while (w--) {
++            uint32_t s = *src++;
++
++            if (s) {
++                uint32_t d = *dst;
++                __m256i ms = unpack_32_1x256(s);
++                __m256i alpha = expand_alpha_1x256(ms);
++                __m256i mask = vmask;
++                __m256i dest = unpack_32_1x256(d);
++                *dst = pack_1x256_32(in_over_1x256(&ms, &alpha, &mask, &dest));
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
++                                 pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    uint32_t mask, maska;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m256i vmask, valpha, mask_4x32;
++    __m256i vsrc, vsrc_lo, vsrc_hi;
++    __m256i vdst, vdst_lo, vdst_hi;
++    __m256i zero = __lasx_xvldi(0);
++
++    mask_4x32 = __lasx_xvreplgr2vr_w(0xff000000);
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
++    maska = mask >> 24;
++
++    vmask = __lasx_xvreplgr2vr_h(maska);
++    valpha = mask_00ff;
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 8) {
++            vsrc = __lasx_xvld(src, 0);
++            src += 8;
++            w -= 8;
++            vsrc = __lasx_xvor_v(vsrc, mask_4x32);
++            vdst = __lasx_xvld(dst, 0);
++
++            unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++            unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi);
++
++            in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha, &valpha,
++                          &vmask, &vmask, &vdst_lo, &vdst_hi);
++
++            __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0);
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            vsrc = __lasx_xvld(src, 0);
++            src += 4;
++            w -= 4;
++            vsrc = __lasx_xvor_v(vsrc, mask_4x32);
++            vdst = __lasx_xvld(dst, 0);
++
++            unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi);
++            unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi);
++
++            in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha, &valpha,
++                          &vmask, &vmask, &vdst_lo, &vdst_hi);
++
++            vdst = pack_2x256_256(vdst_lo, vdst_hi);
++            __lasx_xvstelm_d(vdst, dst, 0, 0);
++            __lasx_xvstelm_d(vdst, dst, 8, 1);
++            dst += 4;
++        }
++
++        while (w--) {
++            uint32_t s = (*src++) | 0xff000000;
++            uint32_t d = *dst;
++
++            __m256i alpha, tmask;
++            __m256i src = unpack_32_1x256 (s);
++            __m256i dest  = unpack_32_1x256 (d);
++
++            alpha = __lasx_xvpermi_q(zero, valpha, 0x20);
++            tmask = __lasx_xvpermi_q(zero, vmask, 0x20);
++
++            *dst = pack_1x256_32(in_over_1x256(&src,  &alpha, &tmask, &dest));
++
++            dst++;
++        }
++    }
++}
++
++static void
++lasx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
++                                    pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    uint32_t *mask_line, *mask, m;
++    int dst_stride, mask_stride;
++    int w, flag;
++
++    __m256i vsrc, valpha;
++    __m256i lasx_src, lasx_alpha, lasx_mask, lasx_dest;
++    __m256i vmask, vmask_lo, vmask_hi;
++    __m256i vdst, vdst0, vdst1, vdst2, vdst3;
++    __m256i dst0, dst1, dst2, dst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++
++    lasx_src = expand_pixel_32_1x256(src);
++    lasx_alpha = expand_alpha_1x256(lasx_src);
++
++    vsrc = expand_pixel_32_2x256(src);
++    valpha = expand_alphaa_2x256(vsrc);
++
++    while (height--) {
++        mask = mask_line;
++        dst = dst_line;
++        mask_line += mask_stride;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w >= 16) {
++            vmask = __lasx_xvld(mask, 0);
++            vdst = __lasx_xvld(dst, 0);
++            w -= 16;
++
++            unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++            dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20);
++            dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20);
++            dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31);
++            dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31);
++
++            flag = __lasx_xbnz_v(vmask);
++            unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi);
++            vmask = __lasx_xvld(mask, 32);
++            if (flag) {
++                in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &dst0, &dst1);
++            }
++
++            flag = __lasx_xbnz_v(vmask);
++            unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi);
++            if (flag) {
++                in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &dst2, &dst3);
++            }
++
++            vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20);
++            vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20);
++            vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31);
++            vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31);
++
++            __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++            dst += 16;
++            mask += 16;
++        }
++
++        while (w--) {
++            m = *(uint32_t *) mask;
++
++            if (m) {
++                d = *dst;
++                lasx_mask = unpack_32_1x256(m);
++                lasx_dest = expand565_16_1x256(d);
++                *dst = pack_565_32_16(pack_1x256_32(in_over_1x256(&lasx_src, &lasx_alpha,
++                                      &lasx_mask, &lasx_dest)));
++            }
++            dst++;
++            mask++;
++        }
++    }
++}
++
++static uint32_t *
++lasx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m256i mask_4x32 = mask_ff000000;
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint32_t *src = (uint32_t *)iter->bits;
++    iter->bits += iter->stride;
++
++    while (w >= 8) {
++        __lasx_xvst(__lasx_xvor_v(__lasx_xvld(src, 0), mask_4x32), dst, 0);
++        dst += 8;
++        src += 8;
++        w   -= 8;
++    }
++
++    while (w--) {
++        *dst++ = (*src++) | 0xff000000;
++    }
++
++    return iter->buffer;
++}
++
++static uint32_t *
++lasx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m256i a, sa, s0, s1, s2, s3, s4;
++    __m256i mask_red, mask_green, mask_blue;
++    __m256i tmp0, tmp1;
++
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint16_t *src = (uint16_t *)iter->bits;
++    iter->bits += iter->stride;
++
++    mask_red = __lasx_xvreplgr2vr_h(248);
++    mask_green = __lasx_xvreplgr2vr_h(252);
++    mask_blue = mask_red;
++    a = __lasx_xvreplgr2vr_h(255) << 8;
++
++    while (w >= 16) {
++        s0 = __lasx_xvld(src, 0);
++        src += 16;
++        w   -= 16;
++
++        //r
++        s1 = __lasx_xvsrli_h(s0, 8);
++        s1 &= mask_red;
++        s2 = __lasx_xvsrli_h(s1, 5);
++        s1 |= s2;
++
++        //g
++        s2 = __lasx_xvsrli_h(s0, 3);
++        s2 &= mask_green;
++        s3 = __lasx_xvsrli_h(s2, 6);
++        s2 |= s3;
++
++        //b
++        s3 = s0 << 3;
++        s3 &= mask_blue;
++        s4 = __lasx_xvsrli_h(s3, 5);
++        s3 |= s4;
++
++        //ar
++        sa = a | s1;
++
++        //gb
++        s2 <<= 8;
++        s2 |= s3;
++
++        tmp0 = __lasx_xvilvl_h(sa, s2);
++        tmp1 = __lasx_xvilvh_h(sa, s2);
++        s1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        s3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        __lasx_xvst(s1, dst, 0);
++        __lasx_xvst(s3, dst, 32);
++        dst += 16;
++    }
++
++    if (w >= 8) {
++        s0 = __lasx_xvld(src, 0);
++        src += 8;
++        w   -= 8;
++        //r
++        s1 = __lasx_xvsrli_h(s0, 8);
++        s1 &= mask_red;
++        s2 = __lasx_xvsrli_h(s1, 5);
++        s1 |= s2;
++
++        //g
++        s2 = __lasx_xvsrli_h(s0, 3);
++        s2 &= mask_green;
++        s3 = __lasx_xvsrli_h(s2, 6);
++        s2 |= s3;
++
++        //b
++        s3 = s0 << 3;
++        s3 &= mask_blue;
++        s4 = __lasx_xvsrli_h(s3, 5);
++        s3 |= s4;
++
++        //ar
++        sa = a | s1;
++
++        //gb
++        s2 <<= 8;
++        s2 |= s3;
++
++        tmp0 = __lasx_xvilvl_h(sa, s2);
++        tmp1 = __lasx_xvilvh_h(sa, s2);
++        s1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        __lasx_xvst(s1, dst, 0);
++        dst += 8;
++    }
++
++    while (w--) {
++        uint16_t s = *src++;
++        *dst++ = convert_0565_to_8888(s);
++    }
++
++    return iter->buffer;
++}
++
++static uint32_t *
++lasx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m256i srcv;
++    __m256i t0, t1, t2, t3;
++    __m256i dst0, dst1, dst2, dst3;
++    __m256i zero = __lasx_xvldi(0);
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint8_t *src = iter->bits;
++
++    iter->bits += iter->stride;
++
++    while (w >= 32) {
++        srcv = __lasx_xvld(src, 0);
++        src += 32;
++        w   -= 32;
++        dst0 = __lasx_xvilvl_b(srcv, zero);
++        dst1 = __lasx_xvilvh_b(srcv, zero);
++        t0 = __lasx_xvilvl_h(dst0, zero);
++        t1 = __lasx_xvilvh_h(dst0, zero);
++        t2 = __lasx_xvilvl_h(dst1, zero);
++        t3 = __lasx_xvilvh_h(dst1, zero);
++        dst0 = __lasx_xvpermi_q(t1, t0, 0x20);
++        dst1 = __lasx_xvpermi_q(t3, t2, 0x20);
++        dst2 = __lasx_xvpermi_q(t1, t0, 0x31);
++        dst3 = __lasx_xvpermi_q(t3, t2, 0x31);
++        __lasx_xvst(dst0, dst, 0);
++        __lasx_xvst(dst1, dst, 32);
++        __lasx_xvst(dst2, dst, 64);
++        __lasx_xvst(dst3, dst, 96);
++        dst += 32;
++    }
++
++   if (w >= 16) {
++        srcv = __lasx_xvld(src, 0);
++        src += 16;
++        w   -= 16;
++        dst0 = __lasx_xvilvl_b(srcv, zero);
++        dst1 = __lasx_xvilvh_b(srcv, zero);
++        dst0 = __lasx_xvpermi_q(dst1, dst0, 0x20);
++        t0 = __lasx_xvilvl_h(dst0, zero);
++        t1 = __lasx_xvilvh_h(dst0, zero);
++        dst0 = __lasx_xvpermi_q(t1, t0, 0x20);
++        dst1 = __lasx_xvpermi_q(t1, t0, 0x31);
++        __lasx_xvst(dst0, dst, 0);
++        __lasx_xvst(dst1, dst, 32);
++        dst += 16;
++    }
++
++    if (w >= 8) {
++        srcv = __lasx_xvld(src, 0);
++        src += 8;
++        w   -= 8;
++        dst0 = __lasx_xvilvl_b(srcv, zero);
++        dst1 = __lasx_xvilvh_b(srcv, zero);
++        dst0 = __lasx_xvpermi_q(dst1, dst0, 0x20);
++        t0 = __lasx_xvilvl_h(dst0, zero);
++        t1 = __lasx_xvilvh_h(dst0, zero);
++        dst0 = __lasx_xvpermi_q(t1, t0, 0x20);
++        __lasx_xvst(dst0, dst, 0);
++        dst += 8;
++    }
++
++    while (w--) {
++        *dst++ = *(src++) << 24;
++    }
++
++    return iter->buffer;
++}
++
++// fetch/store 8 bits
++static void lasx_fetch_scanline_a8 (bits_image_t *image, int x, int y, int width,
++                                    uint32_t *buffer, const uint32_t *mask)
++{
++    uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride);
++    __m256i src;
++    __m256i t0, t1;
++    __m256i temp0, temp1, temp2, temp3;
++    __m256i dst0, dst1, dst2, dst3;
++    __m256i zero = __lasx_xvldi(0);
++    bits += x;
++
++    while (width >= 32) {
++        src = __lasx_xvld(bits, 0);
++        t0 = __lasx_xvilvl_b(src, zero);
++        t1 = __lasx_xvilvh_b(src, zero);
++        temp0 = __lasx_xvilvl_h(t0, zero);
++        temp1 = __lasx_xvilvh_h(t0, zero);
++        temp2 = __lasx_xvilvl_h(t1, zero);
++        temp3 = __lasx_xvilvh_h(t1, zero);
++        dst0 = __lasx_xvpermi_q(temp1, temp0, 0x20);
++        dst1 = __lasx_xvpermi_q(temp3, temp2, 0x20);
++        dst2 = __lasx_xvpermi_q(temp1, temp0, 0x31);
++        dst3 = __lasx_xvpermi_q(temp3, temp2, 0x31);
++        __lasx_xvst(dst0, buffer, 0);
++        __lasx_xvst(dst1, buffer, 32);
++        __lasx_xvst(dst2, buffer, 64);
++        __lasx_xvst(dst3, buffer, 96);
++        bits += 32, width -= 32, buffer += 32;
++    }
++    if (width >= 16) {
++        src = __lasx_xvld(bits, 0);
++        src = __lasx_xvpermi_d(src, 0xd8);
++        t0 = __lasx_xvilvl_b(src, zero);
++        temp0 = __lasx_xvilvl_h(t0, zero);
++        temp1 = __lasx_xvilvh_h(t0, zero);
++        dst0 = __lasx_xvpermi_q(temp1, temp0, 0x20);
++        dst1 = __lasx_xvpermi_q(temp1, temp0, 0x31);
++        __lasx_xvst(dst0, buffer, 0);
++        __lasx_xvst(dst1, buffer, 32);
++        bits += 16, width -= 16, buffer += 16;
++    }
++    if (width >= 8) {
++        src = __lasx_xvldrepl_d(bits, 0);
++        t0 = __lasx_xvilvl_b(src, zero);
++        t0  = __lasx_xvpermi_d(t0, 0xd8);
++        dst0 = __lasx_xvilvl_h(t0, zero);
++        __lasx_xvst(dst0, buffer, 0);
++        bits += 8; width -= 8; buffer += 8;
++    }
++    while(width--) {
++        *buffer++ = ((*bits++) << 24);
++    }
++}
++
++static void lasx_store_scanline_a8 (bits_image_t *image, int x, int y, int width,
++                                    const uint32_t *values)
++{
++    uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride);
++    __m256i src0, src1, src2, src3;
++    __m256i cont  = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003};
++    dest += x;
++    while (width >= 32) {
++        src0 = __lasx_xvld(values, 0);
++        src1 = __lasx_xvld(values, 32);
++        src2 = __lasx_xvld(values, 64);
++        src3 = __lasx_xvld(values, 96);
++        src0 = __lasx_xvsrli_w(src0, 24);
++        src1 = __lasx_xvsrli_w(src1, 24);
++        src2 = __lasx_xvsrli_w(src2, 24);
++        src3 = __lasx_xvsrli_w(src3, 24);
++        src0 = __lasx_xvpickev_h(src1, src0);
++        src1 = __lasx_xvpickev_h(src3, src2);
++        src0 = __lasx_xvpickev_b(src1, src0);
++        src0 = __lasx_xvperm_w(src0, cont);
++        __lasx_xvst(src0, dest, 0);
++        values += 32, width -= 32, dest += 32;
++    }
++    if (width >= 16) {
++        src0 = __lasx_xvld(values, 0);
++        src1 = __lasx_xvld(values, 32);
++        src0 = __lasx_xvsrli_w(src0, 24);
++        src1 = __lasx_xvsrli_w(src1, 24);
++        src0 = __lasx_xvpickev_h(src1, src0);
++        src0 = __lasx_xvpickev_b(src0, src0);
++        src0 = __lasx_xvperm_w(src0, cont);
++        __lasx_xvstelm_d(src0, dest, 0, 0);
++        __lasx_xvstelm_d(src0, dest, 8, 1);
++        values += 16; width -= 16; dest += 16;
++    }
++    if (width >= 8) {
++        src0 = __lasx_xvld(values, 0);
++        src0 = __lasx_xvsrli_w(src0, 24);
++        src1 = __lasx_xvpermi_q(src0, src0, 0x01);
++        src0 = __lasx_xvpickev_h(src1, src0);
++        src0 = __lasx_xvpickev_b(src0, src0);
++        __lasx_xvstelm_d(src0, dest, 0, 0);
++        values += 8; width -= 8; dest += 8;
++    }
++    while (width--) {
++        *dest++ = ((*values++) >> 24);
++    }
++}
++
++static void lasx_fetch_scanline_a2r2g2b2 (bits_image_t *image, int x, int y,
++                                          int width, uint32_t *buffer,
++                                          const uint32_t *mask)
++{
++    uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2, pixel3;
++
++    __m256i src;
++    __m256i t0, t1, t2, t3, t4, t5, t6, t7;
++    __m256i temp0, temp1, temp2, temp3;
++    __m256i mask0 = __lasx_xvreplgr2vr_b(0xc0);
++    __m256i mask1 = __lasx_xvreplgr2vr_b(0x30);
++    __m256i mask2 = __lasx_xvreplgr2vr_b(0x0c);
++    __m256i mask3 = __lasx_xvreplgr2vr_b(0x03);
++    bits += x;
++
++    while (width >= 32) {
++        src = __lasx_xvld(bits, 0);
++        t0 = (src & mask0); t1 = (src & mask1);
++        t2 = (src & mask2); t3 = (src & mask3);
++        t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4);
++        t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4);
++        t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4);
++        t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4);
++        t4 = __lasx_xvilvl_b(t0, t1);
++        t5 = __lasx_xvilvh_b(t0, t1);
++        t6 = __lasx_xvilvl_b(t2, t3);
++        t7 = __lasx_xvilvh_b(t2, t3);
++        t0 = __lasx_xvilvl_h(t4, t6);
++        t1 = __lasx_xvilvh_h(t4, t6);
++        t2 = __lasx_xvilvl_h(t5, t7);
++        t3 = __lasx_xvilvh_h(t5, t7);
++        temp0 = __lasx_xvpermi_q(t1, t0, 0x20);
++        temp1 = __lasx_xvpermi_q(t3, t2, 0x20);
++        temp2 = __lasx_xvpermi_q(t1, t0, 0x31);
++        temp3 = __lasx_xvpermi_q(t3, t2, 0x31);
++        __lasx_xvst(temp0, buffer, 0);
++        __lasx_xvst(temp1, buffer, 32);
++        __lasx_xvst(temp2, buffer, 64);
++        __lasx_xvst(temp3, buffer, 96);
++        bits += 32, width -= 32, buffer += 32;
++    }
++    if (width >= 16) {
++        src = __lasx_xvld(bits, 0);
++        src = __lasx_xvpermi_d(src, 0xd8);
++        t0 = (src & mask0); t1 = (src & mask1);
++        t2 = (src & mask2); t3 = (src & mask3);
++        t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4);
++        t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4);
++        t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4);
++        t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4);
++        t4 = __lasx_xvilvl_b(t0, t1);
++        t5 = __lasx_xvilvl_b(t2, t3);
++        t2 = __lasx_xvilvl_h(t4, t5);
++        t3 = __lasx_xvilvh_h(t4, t5);
++        t0 = __lasx_xvpermi_q(t3, t2, 0x20);
++        t1 = __lasx_xvpermi_q(t3, t2, 0x31);
++        __lasx_xvst(t0, buffer, 0);
++        __lasx_xvst(t1, buffer, 32);
++        bits += 16, width -= 16, buffer += 16;
++    }
++    if (width >= 8) {
++        src = __lasx_xvldrepl_d(bits, 0);
++        t0 = (src & mask0); t1 = (src & mask1);
++        t2 = (src & mask2); t3 = (src & mask3);
++        t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4);
++        t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4);
++        t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4);
++        t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4);
++        t4 = __lasx_xvilvl_b(t0, t1);
++        t5 = __lasx_xvilvl_b(t2, t3);
++        t4 = __lasx_xvpermi_d(t4, 0xd8);
++        t5 = __lasx_xvpermi_d(t5, 0xd8);
++        t0 = __lasx_xvilvl_h(t4, t5);
++        __lasx_xvst(t0, buffer, 0);
++        bits += 8; width -= 8; buffer += 8;
++    }
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0 = pixel & 192;
++        pixel0 |= (pixel0 >> 2);
++        pixel0 |= (pixel0 >> 4);
++        pixel0 <<= 24;
++        // r
++        pixel1 = pixel & 48;
++        pixel1 |= (pixel1 << 2);
++        pixel1 |= (pixel1 >> 4);
++        pixel1 <<= 16;
++        // g
++        pixel2 = pixel & 12;
++        pixel2 |= (pixel2 >> 2);
++        pixel2 |= (pixel2 << 4);
++        pixel2 <<= 8;
++        // b
++        pixel3 = pixel & 3;
++        pixel3 |= (pixel3 << 2);
++        pixel3 |= (pixel3 << 4);
++        *buffer++ = (pixel3 | pixel2 | pixel1 | pixel0);
++    }
++}
++
++static void lasx_store_scanline_a2r2g2b2 (bits_image_t *image, int x, int y,
++                                          int width, const uint32_t *values)
++{
++    uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride);
++    __m256i in0, in1, in2, in3, in4, in5, in6, in7;
++    __m256i in8, in9, in10, in11, in12, in13, in14, in15;
++    __m256i tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7;
++    __m256i tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15;
++    __m256i d0, d1;
++    __m256i mask = __lasx_xvreplgr2vr_b(0xc0);
++
++    dest += x;
++
++    while (width >= 128) {
++        in0 = __lasx_xvld(values, 0);
++        in1 = __lasx_xvld(values, 32);
++        in2 = __lasx_xvld(values, 64);
++        in3 = __lasx_xvld(values, 96);
++        in4 = __lasx_xvld(values, 128);
++        in5 = __lasx_xvld(values, 160);
++        in6 = __lasx_xvld(values, 192);
++        in7 = __lasx_xvld(values, 224);
++        values += 64;
++        in8  = __lasx_xvld(values, 0);
++        in9  = __lasx_xvld(values, 32);
++        in10 = __lasx_xvld(values, 64);
++        in11 = __lasx_xvld(values, 96);
++        in12 = __lasx_xvld(values, 128);
++        in13 = __lasx_xvld(values, 160);
++        in14 = __lasx_xvld(values, 192);
++        in15 = __lasx_xvld(values, 224);
++
++        tt0  = __lasx_xvpermi_q(in8,  in0, 0x20);
++        tt2  = __lasx_xvpermi_q(in9,  in1, 0x20);
++        tt4  = __lasx_xvpermi_q(in10, in2, 0x20);
++        tt6  = __lasx_xvpermi_q(in11, in3, 0x20);
++        tt8  = __lasx_xvpermi_q(in12, in4, 0x20);
++        tt10 = __lasx_xvpermi_q(in13, in5, 0x20);
++        tt12 = __lasx_xvpermi_q(in14, in6, 0x20);
++        tt14 = __lasx_xvpermi_q(in15, in7, 0x20);
++
++        tt1  = __lasx_xvpermi_q(in8,  in0, 0x31);
++        tt3  = __lasx_xvpermi_q(in9,  in1, 0x31);
++        tt5  = __lasx_xvpermi_q(in10, in2, 0x31);
++        tt7  = __lasx_xvpermi_q(in11, in3, 0x31);
++        tt9  = __lasx_xvpermi_q(in12, in4, 0x31);
++        tt11 = __lasx_xvpermi_q(in13, in5, 0x31);
++        tt13 = __lasx_xvpermi_q(in14, in6, 0x31);
++        tt15 = __lasx_xvpermi_q(in15, in7, 0x31);
++
++        LASX_TRANSPOSE16x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7,
++                             tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15,
++                             in0, in1, in2, in3, in4, in5, in6, in7);
++        in8  = __lasx_xvpickev_b(in4, in0);
++        in8  = __lasx_xvpermi_d(in8, 0xd8);
++        in9  = __lasx_xvpickod_b(in4, in0);
++        in9  = __lasx_xvpermi_d(in9, 0xd8);
++        in10 = __lasx_xvpickev_b(in5, in1);
++        in10 = __lasx_xvpermi_d(in10, 0xd8);
++        in11 = __lasx_xvpickod_b(in5, in1);
++        in11 = __lasx_xvpermi_d(in11, 0xd8);
++        in12 = __lasx_xvpickev_b(in6, in2);
++        in12 = __lasx_xvpermi_d(in12, 0xd8);
++        in13 = __lasx_xvpickod_b(in6, in2);
++        in13 = __lasx_xvpermi_d(in13, 0xd8);
++        in14 = __lasx_xvpickev_b(in7, in3);
++        in14 = __lasx_xvpermi_d(in14, 0xd8);
++        in15 = __lasx_xvpickod_b(in7, in3);
++        in15 = __lasx_xvpermi_d(in15, 0xd8);
++
++        in8 &= mask, in9 &= mask, in10 &= mask, in11 &= mask;
++        in12 &= mask, in13 &= mask, in14 &= mask, in15 &= mask;
++        in8 = __lasx_xvsrli_b(in8, 6), in12 = __lasx_xvsrli_b(in12, 6);
++        in9 = __lasx_xvsrli_b(in9, 4), in13 = __lasx_xvsrli_b(in13, 4);
++        in10 = __lasx_xvsrli_b(in10, 2), in14 = __lasx_xvsrli_b(in14, 2);
++        d0 = in8, d0 |= in9, d0 |= in10, d0 |= in11;
++        d1 = in12, d1 |= in13, d1 |= in14, d1 |= in15;
++
++        tt0  = __lasx_xvpermi_q(tt0,  tt0,  0x31);
++        tt1  = __lasx_xvpermi_q(tt1,  tt1,  0x31);
++        tt2  = __lasx_xvpermi_q(tt2,  tt2,  0x31);
++        tt3  = __lasx_xvpermi_q(tt3,  tt3,  0x31);
++        tt4  = __lasx_xvpermi_q(tt4,  tt4,  0x31);
++        tt5  = __lasx_xvpermi_q(tt5,  tt5,  0x31);
++        tt6  = __lasx_xvpermi_q(tt6,  tt6,  0x31);
++        tt7  = __lasx_xvpermi_q(tt7,  tt7,  0x31);
++        tt8  = __lasx_xvpermi_q(tt8,  tt8,  0x31);
++        tt9  = __lasx_xvpermi_q(tt9,  tt9,  0x31);
++        tt10 = __lasx_xvpermi_q(tt10, tt10, 0x31);
++        tt11 = __lasx_xvpermi_q(tt11, tt11, 0x31);
++        tt12 = __lasx_xvpermi_q(tt12, tt12, 0x31);
++        tt13 = __lasx_xvpermi_q(tt13, tt13, 0x31);
++        tt14 = __lasx_xvpermi_q(tt14, tt14, 0x31);
++        tt15 = __lasx_xvpermi_q(tt15, tt15, 0x31);
++
++        LASX_TRANSPOSE16x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7,
++                             tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15,
++                             in0, in1, in2, in3, in4, in5, in6, in7);
++        in8  = __lasx_xvpickev_b(in4, in0);
++        in8  = __lasx_xvpermi_d(in8, 0xd8);
++        in9  = __lasx_xvpickod_b(in4, in0);
++        in9  = __lasx_xvpermi_d(in9, 0xd8);
++        in10 = __lasx_xvpickev_b(in5, in1);
++        in10 = __lasx_xvpermi_d(in10, 0xd8);
++        in11 = __lasx_xvpickod_b(in5, in1);
++        in11 = __lasx_xvpermi_d(in11, 0xd8);
++        in12 = __lasx_xvpickev_b(in6, in2);
++        in12 = __lasx_xvpermi_d(in12, 0xd8);
++        in13 = __lasx_xvpickod_b(in6, in2);
++        in13 = __lasx_xvpermi_d(in13, 0xd8);
++        in14 = __lasx_xvpickev_b(in7, in3);
++        in14 = __lasx_xvpermi_d(in14, 0xd8);
++        in15 = __lasx_xvpickod_b(in7, in3);
++        in15 = __lasx_xvpermi_d(in15, 0xd8);
++
++        in8 &= mask, in9 &= mask, in10 &= mask, in11 &= mask;
++        in12 &= mask, in13 &= mask, in14 &= mask, in15 &= mask;
++        in8 = __lasx_xvsrli_b(in8, 6), in12 = __lasx_xvsrli_b(in12, 6);
++        in9 = __lasx_xvsrli_b(in9, 4), in13 = __lasx_xvsrli_b(in13, 4);
++        in10 = __lasx_xvsrli_b(in10, 2), in14 = __lasx_xvsrli_b(in14, 2);
++        tt0 = in8, tt0 |= in9, tt0 |= in10, tt0 |= in11;
++        tt1 = in12, tt1 |= in13, tt1 |= in14, tt1 |= in15;
++
++        in0 = __lasx_xvpermi_q(tt0, d0, 0x20);
++        in2 = __lasx_xvpermi_q(tt0, d0, 0x31);
++        in1 = __lasx_xvpermi_q(tt1, d1, 0x20);
++        in3 = __lasx_xvpermi_q(tt1, d1, 0x31);
++
++        in8  = __lasx_xvilvl_b(in1, in0);
++        in9  = __lasx_xvilvh_b(in1, in0);
++        in10 = __lasx_xvilvl_b(in3, in2);
++        in11 = __lasx_xvilvh_b(in3, in2);
++
++        in0 = __lasx_xvilvl_h(in10, in8);
++        in1 = __lasx_xvilvh_h(in10, in8);
++        in2 = __lasx_xvilvl_h(in11, in9);
++        in3 = __lasx_xvilvh_h(in11, in9);
++
++        d0  = __lasx_xvpermi_q(in1, in0, 0x20);
++        tt0 = __lasx_xvpermi_q(in1, in0, 0x31);
++        d1  = __lasx_xvpermi_q(in3, in2, 0x20);
++        tt1 = __lasx_xvpermi_q(in3, in2, 0x31);
++
++        __lasx_xvst(d0,  dest, 0);
++        __lasx_xvst(d1,  dest, 32);
++        __lasx_xvst(tt0, dest, 64);
++        __lasx_xvst(tt1, dest, 96);
++        width -= 128, values += 64, dest += 128;
++    }
++
++    while (width >= 32) {
++        in0 = __lasx_xvld(values, 0);
++        in2 = __lasx_xvld(values, 32);
++        in4 = __lasx_xvld(values, 64);
++        in6 = __lasx_xvld(values, 96);
++
++        in1 = __lasx_xvpackod_d(in0, in0);
++        in3 = __lasx_xvpackod_d(in2, in2);
++        in5 = __lasx_xvpackod_d(in4, in4);
++        in7 = __lasx_xvpackod_d(in6, in6);
++        tt0 = __lasx_xvpermi_q(in4, in0, 0x20);
++        tt2 = __lasx_xvpermi_q(in4, in0, 0x31);
++        tt1 = __lasx_xvpermi_q(in5, in1, 0x20);
++        tt3 = __lasx_xvpermi_q(in5, in1, 0x31);
++        tt4 = __lasx_xvpermi_q(in6, in2, 0x20);
++        tt6 = __lasx_xvpermi_q(in6, in2, 0x31);
++        tt5 = __lasx_xvpermi_q(in7, in3, 0x20);
++        tt7 = __lasx_xvpermi_q(in7, in3, 0x31);
++
++        LASX_TRANSPOSE8x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7,
++                            tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7);
++        tt8  = __lasx_xvpickev_b(tt1, tt0);
++        tt8  = __lasx_xvpermi_d(tt8, 0xd8);
++        tt9  = __lasx_xvpickod_b(tt1, tt0);
++        tt9  = __lasx_xvpermi_d(tt9, 0xd8);
++        tt10 = __lasx_xvpickev_b(tt3, tt2);
++        tt10 = __lasx_xvpermi_d(tt10, 0xd8);
++        tt11 = __lasx_xvpickod_b(tt3, tt2);
++        tt11 = __lasx_xvpermi_d(tt11, 0xd8);
++        tt12 = __lasx_xvpickev_b(tt5, tt4);
++        tt12 = __lasx_xvpermi_d(tt12, 0xd8);
++        tt13 = __lasx_xvpickod_b(tt5, tt4);
++        tt13 = __lasx_xvpermi_d(tt13, 0xd8);
++        tt14 = __lasx_xvpickev_b(tt7, tt6);
++        tt14 = __lasx_xvpermi_d(tt14, 0xd8);
++        tt15 = __lasx_xvpickod_b(tt7, tt6);
++        tt15 = __lasx_xvpermi_d(tt15, 0xd8);
++
++        tt0 = __lasx_xvpermi_q(tt12, tt8,  0x20);
++        tt2 = __lasx_xvpermi_q(tt12, tt8,  0x31);
++        tt1 = __lasx_xvpermi_q(tt13, tt9,  0x20);
++        tt3 = __lasx_xvpermi_q(tt13, tt9,  0x31);
++        tt4 = __lasx_xvpermi_q(tt14, tt10, 0x20);
++        tt6 = __lasx_xvpermi_q(tt14, tt10, 0x31);
++        tt5 = __lasx_xvpermi_q(tt15, tt11, 0x20);
++        tt7 = __lasx_xvpermi_q(tt15, tt11, 0x31);
++
++        tt0 &= mask, tt1 &= mask, tt2 &= mask, tt3 &= mask;
++        tt4 &= mask, tt5 &= mask, tt6 &= mask, tt7 &= mask;
++        tt0 = __lasx_xvsrli_b(tt0, 6), tt4 = __lasx_xvsrli_b(tt4, 6);
++        tt1 = __lasx_xvsrli_b(tt1, 4), tt5 = __lasx_xvsrli_b(tt5, 4);
++        tt2 = __lasx_xvsrli_b(tt2, 2), tt6 = __lasx_xvsrli_b(tt6, 2);
++        d0 = tt0, d0 |= tt1, d0 |= tt2, d0 |= tt3;
++        d1 = tt4, d1 |= tt5, d1 |= tt6, d1 |= tt7;
++
++        tt0 = __lasx_xvilvl_b(d1, d0);
++        tt1 = __lasx_xvilvh_b(d1, d0);
++        d0  = __lasx_xvpermi_q(tt0, tt1, 0x02);
++        __lasx_xvst(d0, dest, 0);
++        width -= 32, values += 32, dest += 32;
++    }
++
++    while (width--) {
++       uint32_t pixel = *values++;
++       pixel &= 0xc0c0c0c0;
++       pixel |= (pixel << 6);
++       pixel |= (pixel << 12);
++       pixel >>= 24;
++       *dest++ = pixel;
++    }
++}
++
++// fetch/store 16 bits
++static void lasx_fetch_scanline_a1r5g5b5 (bits_image_t *image, int x, int y,
++                                          int width, uint32_t *buffer,
++                                          const uint32_t *mask)
++{
++    uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2;
++
++    __m256i src, tmp0, tmp1;
++    __m256i t, t0, t1, t2, t3;
++    __m256i mask0 = __lasx_xvreplgr2vr_h(0x001f);
++    bits += x;
++
++    while (width >= 16) {
++        src  = __lasx_xvld(bits, 0);
++        t0   = (src & mask0);
++        t0   = __lasx_xvslli_h(t0, 3);
++        t    = __lasx_xvsrli_h(t0, 5);
++        t0  |= t;
++        t1   = __lasx_xvsrli_h(src, 5);
++        t1  &= mask0;
++        t1   = __lasx_xvslli_h(t1, 3);
++        t    = __lasx_xvsrli_h(t1, 5);
++        t1  |= t;
++        t2   = __lasx_xvsrli_h(src, 10);
++        t2  &= mask0;
++        t2   = __lasx_xvslli_h(t2, 3);
++        t    = __lasx_xvsrli_h(t2, 5);
++        t2  |= t;
++        t3   = __lasx_xvsrli_h(src, 15);
++        t    = __lasx_xvslli_h(t3, 1);
++        t3  |= t;
++        t    = __lasx_xvslli_h(t3, 2);
++        t3  |= t;
++        t    = __lasx_xvslli_h(t3, 4);
++        t3  |= t;
++        t1 <<= 8;
++        t0  |= t1;
++        t3 <<= 8;
++        t2  |= t3;
++        tmp0 = __lasx_xvilvl_h(t2, t0);
++        tmp1 = __lasx_xvilvh_h(t2, t0);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        __lasx_xvst(t1, buffer, 0);
++        __lasx_xvst(t3, buffer, 32);
++        bits += 16, width -= 16, buffer += 16;
++    }
++
++    if (width >= 8) {
++        src  = __lasx_xvld(bits, 0);
++        t0   = (src & mask0);
++        t0   = __lasx_xvslli_h(t0, 3);
++        t    = __lasx_xvsrli_h(t0, 5);
++        t0  |= t;
++        t1   = __lasx_xvsrli_h(src, 5);
++        t1  &= mask0;
++        t1   = __lasx_xvslli_h(t1, 3);
++        t    = __lasx_xvsrli_h(t1, 5);
++        t1  |= t;
++        t2   = __lasx_xvsrli_h(src, 10);
++        t2  &= mask0;
++        t2   = __lasx_xvslli_h(t2, 3);
++        t    = __lasx_xvsrli_h(t2, 5);
++        t2  |= t;
++        t3   = __lasx_xvsrli_h(src, 15);
++        t    = __lasx_xvslli_h(t3, 1);
++        t3  |= t;
++        t    = __lasx_xvslli_h(t3, 2);
++        t3  |= t;
++        t    = __lasx_xvslli_h(t3, 4);
++        t3  |= t;
++        t1 <<= 8;
++        t0  |= t1;
++        t3 <<= 8;
++        t2  |= t3;
++        tmp0 = __lasx_xvilvl_h(t2, t0);
++        tmp1 = __lasx_xvilvh_h(t2, t0);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        __lasx_xvst(t1, buffer, 0);
++        bits += 8, width -= 8, buffer += 8;
++    }
++
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0 = pixel >> 15;
++        pixel0 <<= 7;
++        pixel0 |= (pixel0 >> 1);
++        pixel0 |= (pixel0 >> 2);
++        pixel0 |= (pixel0 >> 4);
++        pixel0 <<= 24;
++        // r
++        pixel1 = pixel >> 10;
++        pixel1 &= 31;
++        pixel1 <<= 3;
++        pixel1 |= (pixel1 >> 5);
++        pixel1 <<= 16;
++        // g
++        pixel2 = pixel >> 5;
++        pixel2 &= 31;
++        pixel2 <<= 3;
++        pixel2 |= (pixel2 >> 5);
++        pixel2 <<= 8;
++        // b
++        pixel &= 31;
++        pixel <<= 3;
++        pixel |= (pixel >> 5);
++        *buffer++ = (pixel0 | pixel1 | pixel2 | pixel);
++    }
++}
++
++static void lasx_store_scanline_a1r5g5b5 (bits_image_t *image, int x, int y,
++                                         int width, const uint32_t *values)
++{
++    uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2, pixel3;
++    __m256i in0, in1, in2, in3;
++    __m256i tmp0, tmp1;
++    __m256i t0, t1, t2, t3, t4, t5, t6, t7;
++    __m256i t8, t9, t10, t11, t12, t13, t14, t15;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i mask = { 0x80f8f8f880f8f8f8, 0x80f8f8f880f8f8f8,
++                     0x80f8f8f880f8f8f8, 0x80f8f8f880f8f8f8 };
++    dest += x;
++
++    while(width >= 32) {
++        in0 = __lasx_xvld(values, 0);
++        in1 = __lasx_xvld(values, 32);
++        in2 = __lasx_xvld(values, 64);
++        in3 = __lasx_xvld(values, 96);
++
++        in0  = __lasx_xvand_v(in0, mask);
++        in1  = __lasx_xvand_v(in1, mask);
++        in2  = __lasx_xvand_v(in2, mask);
++        in3  = __lasx_xvand_v(in3, mask);
++        tmp0 = __lasx_xvilvl_b(in0, zero);
++        tmp1 = __lasx_xvilvh_b(in0, zero);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in1, zero);
++        tmp1 = __lasx_xvilvh_b(in1, zero);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in2, zero);
++        tmp1 = __lasx_xvilvh_b(in2, zero);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in3, zero);
++        tmp1 = __lasx_xvilvh_b(in3, zero);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++
++        tmp0 = __lasx_xvilvl_h(zero, t7);
++        tmp1 = __lasx_xvilvh_h(zero, t7);
++        t14  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t15  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t6);
++        tmp1 = __lasx_xvilvh_h(zero, t6);
++        t12  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t13  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t5);
++        tmp1 = __lasx_xvilvh_h(zero, t5);
++        t10  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t11  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t4);
++        tmp1 = __lasx_xvilvh_h(zero, t4);
++        t8   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t9   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t3);
++        tmp1 = __lasx_xvilvh_h(zero, t3);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t2);
++        tmp1 = __lasx_xvilvh_h(zero, t2);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t1);
++        tmp1 = __lasx_xvilvh_h(zero, t1);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t0);
++        tmp1 = __lasx_xvilvh_h(zero, t0);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++
++        LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7,
++                            t0, t1, t2, t3, t4, t5, t6, t7);
++        LASX_TRANSPOSE8x8_W(t8, t9, t10, t11, t12, t13, t14, t15,
++                            t8, t9, t10, t11, t12, t13, t14, t15);
++
++        t0 = __lasx_xvsrli_h(t0, 11);
++        t1 = __lasx_xvsrli_h(t1, 6);
++        t2 = __lasx_xvsrli_h(t2, 1);
++        t4 = __lasx_xvsrli_h(t4, 11);
++        t5 = __lasx_xvsrli_h(t5, 6);
++        t6 = __lasx_xvsrli_h(t6, 1);
++
++        t8  = __lasx_xvsrli_h(t8, 11);
++        t9  = __lasx_xvsrli_h(t9, 6);
++        t10 = __lasx_xvsrli_h(t10, 1);
++        t12 = __lasx_xvsrli_h(t12, 11);
++        t13 = __lasx_xvsrli_h(t13, 6);
++        t14 = __lasx_xvsrli_h(t14, 1);
++
++        t3 = __lasx_xvor_v(t3, t2);
++        t3 = __lasx_xvor_v(t3, t1);
++        t3 = __lasx_xvor_v(t3, t0);
++        t7 = __lasx_xvor_v(t7, t6);
++        t7 = __lasx_xvor_v(t7, t5);
++        t7 = __lasx_xvor_v(t7, t4);
++
++        t11 = __lasx_xvor_v(t11, t10);
++        t11 = __lasx_xvor_v(t11, t9);
++        t11 = __lasx_xvor_v(t11, t8);
++        t15 = __lasx_xvor_v(t15, t14);
++        t15 = __lasx_xvor_v(t15, t13);
++        t15 = __lasx_xvor_v(t15, t12);
++
++        tmp0 = __lasx_xvilvl_w(t7, t3);
++        tmp1 = __lasx_xvilvh_w(t7, t3);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t0   = __lasx_xvpickev_h(t1, t0);
++        t0   = __lasx_xvpermi_d(t0, 0xd8);
++
++        tmp0 = __lasx_xvilvl_w(t15, t11);
++        tmp1 = __lasx_xvilvh_w(t15, t11);
++        t8   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t9   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t1   = __lasx_xvpickev_h(t9, t8);
++        t1   = __lasx_xvpermi_d(t1, 0xd8);
++
++        __lasx_xvst(t0, dest, 0);
++        __lasx_xvst(t1, dest, 32);
++        values += 32, width -= 32, dest += 32;
++    }
++
++    if (width >= 16) {
++        in0 = __lasx_xvld(values, 0);
++        in1 = __lasx_xvld(values, 32);
++
++        in0  = __lasx_xvand_v(in0, mask);
++        in1  = __lasx_xvand_v(in1, mask);
++        tmp0 = __lasx_xvilvl_b(in0, zero);
++        tmp1 = __lasx_xvilvh_b(in0, zero);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in1, zero);
++        tmp1 = __lasx_xvilvh_b(in1, zero);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t3);
++        tmp1 = __lasx_xvilvh_h(zero, t3);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t2);
++        tmp1 = __lasx_xvilvh_h(zero, t2);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t1);
++        tmp1 = __lasx_xvilvh_h(zero, t1);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t0);
++        tmp1 = __lasx_xvilvh_h(zero, t0);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7,
++                            t0, t1, t2, t3, t4, t5, t6, t7);
++
++        t0 = __lasx_xvsrli_h(t0, 11);
++        t1 = __lasx_xvsrli_h(t1, 6);
++        t2 = __lasx_xvsrli_h(t2, 1);
++        t4 = __lasx_xvsrli_h(t4, 11);
++        t5 = __lasx_xvsrli_h(t5, 6);
++        t6 = __lasx_xvsrli_h(t6, 1);
++
++        t3 = __lasx_xvor_v(t3, t2);
++        t3 = __lasx_xvor_v(t3, t1);
++        t3 = __lasx_xvor_v(t3, t0);
++        t7 = __lasx_xvor_v(t7, t6);
++        t7 = __lasx_xvor_v(t7, t5);
++        t7 = __lasx_xvor_v(t7, t4);
++
++        tmp0 = __lasx_xvilvl_w(t7, t3);
++        tmp1 = __lasx_xvilvh_w(t7, t3);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t0   = __lasx_xvpickev_h(t1, t0);
++        t0   = __lasx_xvpermi_d(t0, 0xd8);
++
++        __lasx_xvst(t0, dest, 0);
++        values += 16, width -= 16, dest += 16;
++    }
++
++    while(width--) {
++        pixel = *values++;
++        pixel0 = pixel >> 16;
++        pixel1 = pixel >> 9;
++        pixel2 = pixel >> 6;
++        pixel3 = pixel >> 3;
++        pixel0 &= 0x8000;
++        pixel1 &= 0x7c00;
++        pixel2 &= 0x03e0;
++        pixel3 &= 0x001f;
++        *dest++ = (pixel0 | pixel1 | pixel2 | pixel3);
++    }
++}
++
++static void lasx_fetch_scanline_a4r4g4b4 (bits_image_t *image, int x, int y,
++                                          int width, uint32_t *buffer,
++                                          const uint32_t *mask)
++{
++    uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2;
++
++    __m256i src, tmp0, tmp1;
++    __m256i t, t0, t1, t2, t3;
++
++    __m256i mask0 = __lasx_xvreplgr2vr_h(0x000f);
++    bits += x;
++
++    while (width >= 16) {
++        src  = __lasx_xvld(bits, 0);
++        t0   = __lasx_xvsrli_h(src, 12);
++        t    = (t0 << 4), t0 |= t;
++        t1   = __lasx_xvsrli_h(src, 8);
++        t1  &= mask0, t = (t1 << 4), t1 |= t;
++        t2   = __lasx_xvsrli_h(src, 4);
++        t2  &= mask0, t = (t2 << 4), t2 |= t;
++        t3   = (src & mask0), t = (t3 << 4), t3 |= t;
++        t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3;
++        tmp0 = __lasx_xvilvl_h(t0, t2);
++        tmp1 = __lasx_xvilvh_h(t0, t2);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        __lasx_xvst(t1, buffer, 0);
++        __lasx_xvst(t3, buffer, 32);
++        bits += 16, width -= 16, buffer += 16;
++    }
++
++    if (width >= 8) {
++        src  = __lasx_xvld(bits, 0);
++        t0   = __lasx_xvsrli_h(src, 12);
++        t    = (t0 << 4), t0 |= t;
++        t1   = __lasx_xvsrli_h(src, 8);
++        t1  &= mask0, t = (t1 << 4), t1 |= t;
++        t2   = __lasx_xvsrli_h(src, 4);
++        t2  &= mask0, t = (t2 << 4), t2 |= t;
++        t3   = (src & mask0), t = (t3 << 4), t3 |= t;
++        t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3;
++        tmp0 = __lasx_xvilvl_h(t0, t2);
++        tmp1 = __lasx_xvilvh_h(t0, t2);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        __lasx_xvst(t1, buffer, 0);
++        bits += 8, width -= 8, buffer += 8;
++    }
++
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0   = pixel >> 12;
++        pixel0  |= (pixel0 << 4);
++        pixel0 <<= 24;
++        // r
++        pixel1   = pixel >> 8;
++        pixel1  &= 15;
++        pixel1  |= (pixel1 << 4);
++        pixel1 <<= 16;
++        // g
++        pixel2   = pixel >> 4;
++        pixel2  &= 15;
++        pixel2  |= (pixel2 << 4);
++        pixel2 <<= 8;
++        // b
++        pixel   &= 15;
++        pixel   |= (pixel << 4);
++        *buffer++ = (pixel0 | pixel1 | pixel2 | pixel);
++    }
++}
++
++static void lasx_store_scanline_a4r4g4b4 (bits_image_t *image, int x, int y,
++                                          int width, const uint32_t *values)
++{
++    uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1;
++    __m256i in0, in1, in2, in3;
++    __m256i tmp0, tmp1;
++    __m256i t0, t1, t2, t3, t4, t5, t6, t7;
++    __m256i t8, t9, t10, t11, t12, t13, t14, t15;
++    __m256i zero = __lasx_xvldi(0);
++    __m256i mask = __lasx_xvreplgr2vr_h(0xf0f0);
++    dest += x;
++
++    while(width >= 32) {
++        in0 = __lasx_xvld(values, 0);
++        in1 = __lasx_xvld(values, 32);
++        in2 = __lasx_xvld(values, 64);
++        in3 = __lasx_xvld(values, 96);
++
++        in0  = __lasx_xvand_v(in0, mask);
++        in1  = __lasx_xvand_v(in1, mask);
++        in2  = __lasx_xvand_v(in2, mask);
++        in3  = __lasx_xvand_v(in3, mask);
++
++        tmp0 = __lasx_xvilvl_b(in0, zero);
++        tmp1 = __lasx_xvilvh_b(in0, zero);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in1, zero);
++        tmp1 = __lasx_xvilvh_b(in1, zero);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in2, zero);
++        tmp1 = __lasx_xvilvh_b(in2, zero);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in3, zero);
++        tmp1 = __lasx_xvilvh_b(in3, zero);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++
++        tmp0 = __lasx_xvilvl_h(zero, t7);
++        tmp1 = __lasx_xvilvh_h(zero, t7);
++        t14  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t15  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t6);
++        tmp1 = __lasx_xvilvh_h(zero, t6);
++        t12  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t13  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t5);
++        tmp1 = __lasx_xvilvh_h(zero, t5);
++        t10  = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t11  = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t4);
++        tmp1 = __lasx_xvilvh_h(zero, t4);
++        t8   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t9   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t3);
++        tmp1 = __lasx_xvilvh_h(zero, t3);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t2);
++        tmp1 = __lasx_xvilvh_h(zero, t2);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t1);
++        tmp1 = __lasx_xvilvh_h(zero, t1);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t0);
++        tmp1 = __lasx_xvilvh_h(zero, t0);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++
++        LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7,
++                            t0, t1, t2, t3, t4, t5, t6, t7);
++        LASX_TRANSPOSE8x8_W(t8, t9, t10, t11, t12, t13, t14, t15,
++                            t8, t9, t10, t11, t12, t13, t14, t15);
++
++        t0  = __lasx_xvsrli_h(t0, 12);
++        t1  = __lasx_xvsrli_h(t1, 8);
++        t2  = __lasx_xvsrli_h(t2, 4);
++        t4  = __lasx_xvsrli_h(t4, 12);
++        t5  = __lasx_xvsrli_h(t5, 8);
++        t6  = __lasx_xvsrli_h(t6, 4);
++
++        t8  = __lasx_xvsrli_h(t8, 12);
++        t9  = __lasx_xvsrli_h(t9, 8);
++        t10 = __lasx_xvsrli_h(t10, 4);
++        t12 = __lasx_xvsrli_h(t12, 12);
++        t13 = __lasx_xvsrli_h(t13, 8);
++        t14 = __lasx_xvsrli_h(t14, 4);
++
++        t3  = __lasx_xvor_v(t3, t2);
++        t3  = __lasx_xvor_v(t3, t1);
++        t3  = __lasx_xvor_v(t3, t0);
++        t7  = __lasx_xvor_v(t7, t6);
++        t7  = __lasx_xvor_v(t7, t5);
++        t7  = __lasx_xvor_v(t7, t4);
++
++        t11 = __lasx_xvor_v(t11, t10);
++        t11 = __lasx_xvor_v(t11, t9);
++        t11 = __lasx_xvor_v(t11, t8);
++        t15 = __lasx_xvor_v(t15, t14);
++        t15 = __lasx_xvor_v(t15, t13);
++        t15 = __lasx_xvor_v(t15, t12);
++
++        tmp0 = __lasx_xvilvl_w(t7, t3);
++        tmp1 = __lasx_xvilvh_w(t7, t3);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t0   = __lasx_xvpickev_h(t1, t0);
++        t0   = __lasx_xvpermi_d(t0, 0xd8);
++
++        tmp0 = __lasx_xvilvl_w(t15, t11);
++        tmp1 = __lasx_xvilvh_w(t15, t11);
++        t8   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t9   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t1   = __lasx_xvpickev_h(t9, t8);
++        t1   = __lasx_xvpermi_d(t1, 0xd8);
++
++        __lasx_xvst(t0, dest, 0);
++        __lasx_xvst(t1, dest, 32);
++        values += 32, width -= 32, dest += 32;
++    }
++
++    if (width >= 16) {
++
++        in0 = __lasx_xvld(values, 0);
++        in1 = __lasx_xvld(values, 32);
++
++        in1  = __lasx_xvand_v(in1, mask);
++        in0  = __lasx_xvand_v(in0, mask);
++        tmp0 = __lasx_xvilvl_b(in0, zero);
++        tmp1 = __lasx_xvilvh_b(in0, zero);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_b(in1, zero);
++        tmp1 = __lasx_xvilvh_b(in1, zero);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t3);
++        tmp1 = __lasx_xvilvh_h(zero, t3);
++        t6   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t7   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t2);
++        tmp1 = __lasx_xvilvh_h(zero, t2);
++        t4   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t5   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t1);
++        tmp1 = __lasx_xvilvh_h(zero, t1);
++        t2   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t3   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        tmp0 = __lasx_xvilvl_h(zero, t0);
++        tmp1 = __lasx_xvilvh_h(zero, t0);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++
++        LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7,
++                            t0, t1, t2, t3, t4, t5, t6, t7);
++
++        t0  = __lasx_xvsrli_h(t0, 12);
++        t1  = __lasx_xvsrli_h(t1, 8);
++        t2  = __lasx_xvsrli_h(t2, 4);
++        t4  = __lasx_xvsrli_h(t4, 12);
++        t5  = __lasx_xvsrli_h(t5, 8);
++        t6  = __lasx_xvsrli_h(t6, 4);
++
++        t3  = __lasx_xvor_v(t3, t2);
++        t3  = __lasx_xvor_v(t3, t1);
++        t3  = __lasx_xvor_v(t3, t0);
++        t7  = __lasx_xvor_v(t7, t6);
++        t7  = __lasx_xvor_v(t7, t5);
++        t7  = __lasx_xvor_v(t7, t4);
++
++        tmp0 = __lasx_xvilvl_w(t7, t3);
++        tmp1 = __lasx_xvilvh_w(t7, t3);
++        t0   = __lasx_xvpermi_q(tmp0, tmp1, 0x02);
++        t1   = __lasx_xvpermi_q(tmp0, tmp1, 0x13);
++        t0   = __lasx_xvpickev_h(t1, t0);
++        t0   = __lasx_xvpermi_d(t0, 0xd8);
++        __lasx_xvst(t0, dest, 0);
++        values += 16, width -= 16, dest += 16;
++    }
++
++    while(width--) {
++        pixel   = *values++;
++        pixel  &= 0xf0f0f0f0;
++        pixel0  = (pixel >> 4);
++        pixel1  = (pixel >> 8);
++        pixel0 |= pixel1;
++        pixel0 &= 0x00ff00ff;
++        pixel0 |= (pixel0 >> 8);
++        pixel0 &= 0xffff;
++        *dest++ = pixel0;
++    }
++}
++
++static const pixman_fast_path_t lasx_fast_paths[] =
++{
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, lasx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, lasx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, lasx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, lasx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, lasx_composite_over_n_8888_0565_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, lasx_composite_over_n_8888_0565_ca),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, lasx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, lasx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, lasx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, lasx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, lasx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, lasx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, lasx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, lasx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, lasx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, lasx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, lasx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, lasx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, lasx_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, lasx_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, lasx_composite_src_x888_8888),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, lasx_composite_src_x888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, lasx_composite_over_8888_0565),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, lasx_composite_over_8888_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, lasx_composite_over_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, lasx_composite_over_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, lasx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, lasx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, lasx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, lasx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, lasx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, lasx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, lasx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, lasx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, lasx_composite_over_reverse_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, lasx_composite_over_reverse_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, lasx_composite_add_8_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, lasx_composite_add_n_8_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, lasx_composite_add_n_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, lasx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, lasx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, lasx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, lasx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, lasx_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, lasx_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, b5g6r5,   lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8,       null, a8,       lasx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, lasx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, lasx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, lasx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, lasx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, lasx_composite_in_n_8_8),
++    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, lasx_composite_in_8_8),
++
++    { PIXMAN_OP_NONE },
++};
++
++#define IMAGE_FLAGS                                                     \
++    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
++     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
++static const pixman_iter_info_t lasx_iters[] =
++{
++    {
++      PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lasx_fetch_x8r8g8b8, NULL
++    },
++    {
++      PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lasx_fetch_r5g6b5, NULL
++    },
++    {
++      PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lasx_fetch_a8, NULL
++    },
++    { PIXMAN_null },
++};
++
++pixman_implementation_t *
++_pixman_implementation_create_lasx (pixman_implementation_t *fallback)
++{
++    pixman_implementation_t *imp =
++        _pixman_implementation_create (fallback, lasx_fast_paths);
++
++    /* LoongArch LASX constants */
++    mask_565_r  = create_mask_1x32_256 (0x00f80000);
++    mask_565_g1 = create_mask_1x32_256 (0x00070000);
++    mask_565_g2 = create_mask_1x32_256 (0x000000e0);
++    mask_565_b  = create_mask_1x32_256 (0x0000001f);
++    mask_red   = create_mask_1x32_256 (0x00f80000);
++    mask_green = create_mask_1x32_256 (0x0000fc00);
++    mask_blue  = create_mask_1x32_256 (0x000000f8);
++    mask_565_fix_rb = create_mask_1x32_256 (0x00e000e0);
++    mask_565_fix_g = create_mask_1x32_256  (0x0000c000);
++    mask_0080 = create_mask_16_256 (0x0080);
++    mask_00ff = create_mask_16_256 (0x00ff);
++    mask_0101 = create_mask_16_256 (0x0101);
++    mask_ffff = create_mask_16_256 (0xffff);
++    mask_ff000000 = create_mask_1x32_256 (0xff000000);
++    mask_alpha = create_mask_1x64_256 (0x00ff000000000000);
++    mask_565_rb = create_mask_1x32_256 (0x00f800f8);
++    mask_565_pack_multiplier = create_mask_1x32_256 (0x20000004);
++
++    /* Set up function pointers */
++    imp->combine_32[PIXMAN_OP_SRC] = lasx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_OVER] = lasx_combine_over_u;
++    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = lasx_combine_over_reverse_u;
++    imp->combine_32[PIXMAN_OP_OUT] = lasx_combine_out_u;
++    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = lasx_combine_out_reverse_u;
++    imp->combine_32[PIXMAN_OP_ADD] = lasx_combine_add_u;
++    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = lasx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = lasx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_MULTIPLY] = lasx_combine_multiply_u;
++    imp->combine_32_ca[PIXMAN_OP_SRC] = lasx_combine_src_ca;
++    imp->combine_32_ca[PIXMAN_OP_OVER] = lasx_combine_over_ca;
++    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = lasx_combine_out_reverse_ca;
++
++    imp->blt = lasx_blt;
++    imp->fill = lasx_fill;
++    imp->iter_info = lasx_iters;
++
++    return imp;
++}
++
++void setup_accessors_lasx (bits_image_t *image)
++{
++    if (image->format == PIXMAN_a8) { // 8 bits
++        image->fetch_scanline_32 = lasx_fetch_scanline_a8;
++        image->store_scanline_32 = lasx_store_scanline_a8;
++    } else if (image->format == PIXMAN_a2r2g2b2) {
++        image->fetch_scanline_32 = lasx_fetch_scanline_a2r2g2b2;
++        image->store_scanline_32 = lasx_store_scanline_a2r2g2b2;
++    } else if (image->format == PIXMAN_a1r5g5b5) { // 16 bits
++        image->fetch_scanline_32 = lasx_fetch_scanline_a1r5g5b5;
++        image->store_scanline_32 = lasx_store_scanline_a1r5g5b5;
++    } else if (image->format == PIXMAN_a4r4g4b4) {
++        image->fetch_scanline_32 = lasx_fetch_scanline_a4r4g4b4;
++        image->store_scanline_32 = lasx_store_scanline_a4r4g4b4;
++    }
++}
+diff --git a/pixman/pixman-loongarch.c b/pixman/pixman-loongarch.c
+new file mode 100644
+index 0000000..a77211c
+--- /dev/null
++++ b/pixman/pixman-loongarch.c
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (c) 2023 Loongson Technology Corporation Limited
++ * Contributed by Lu Wang<wanglu@loongson.cn>
++ *                Song Ding<songding@loongson.cn>
++ *
++ * Pixman is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 0.36.0 of the License, or (at your option) any later version.
++ *
++ * Pixman is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with Pixman; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include "pixman-private.h"
++
++#if defined(USE_LOONGARCH_LSX) || defined(USE_LOONGARCH_LASX)
++#include <string.h>
++#include <stdlib.h>
++#include <sys/auxv.h>
++
++#ifdef USE_LOONGARCH_LSX
++static int have_lsx = 0;
++#endif
++#ifdef USE_LOONGARCH_LASX
++static int have_lasx = 0;
++#endif
++
++static uint64_t detect_cpu_features(void)
++{
++    uint64_t hwcap = 0;
++    hwcap = getauxval(AT_HWCAP);
++
++    return hwcap;
++}
++
++static pixman_bool_t
++have_feature (uint64_t feature)
++{
++    static pixman_bool_t initialized;
++    static uint64_t features;
++
++    if (!initialized)
++    {
++        features = detect_cpu_features();
++        initialized = TRUE;
++    }
++
++    return (features & feature) == feature;
++}
++
++#endif
++
++pixman_implementation_t *
++_pixman_loongarch_get_implementations (pixman_implementation_t *imp)
++{
++#ifdef USE_LOONGARCH_LSX
++    if (!_pixman_disabled ("loongarch-lsx") && have_feature (HWCAP_LOONGARCH_LSX))
++    {
++        imp = _pixman_implementation_create_lsx (imp);
++        have_lsx = 1;
++    }
++#endif
++#ifdef USE_LOONGARCH_LASX
++    if (!_pixman_disabled ("loongarch-lasx") && have_feature (HWCAP_LOONGARCH_LASX))
++    {
++        imp = _pixman_implementation_create_lasx (imp);
++        have_lasx = 1;
++    }
++#endif
++    return imp;
++}
++
++void setup_loongarch_accessors (bits_image_t *image)
++{
++#ifdef USE_LOONGARCH_LSX
++    if (have_lsx)
++        setup_accessors_lsx(image);
++#endif
++#ifdef USE_LOONGARCH_LASX
++    if (have_lasx)
++        setup_accessors_lasx(image);
++#endif
++}
+diff --git a/pixman/pixman-lsx.c b/pixman/pixman-lsx.c
+new file mode 100644
+index 0000000..a4c261a
+--- /dev/null
++++ b/pixman/pixman-lsx.c
+@@ -0,0 +1,3783 @@
++/*
++ * Loongson LSX optimizations.
++ *
++ * Copyright © 2023 Loongson Technology Corporation Limited
++ * Contributed by Song Ding(songding@loongson.cn)
++ *
++ * Permission to use, copy, modify, distribute, and sell this software and its
++ * documentation for any purpose is hereby granted without fee, provided that
++ * the above copyright notice appear in all copies and that both that
++ * copyright notice and this permission notice appear in supporting
++ * documentation, and that the name of Red Hat not be used in advertising or
++ * publicity pertaining to distribution of the software without specific,
++ * written prior permission.  Red Hat makes no representations about the
++ * suitability of this software for any purpose.  It is provided "as is"
++ * without express or implied warranty.
++ *
++ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
++ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
++ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
++ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include "pixman-private.h"
++#include "pixman-combine32.h"
++#include "loongson_intrinsics.h"
++
++static force_inline uint32_t
++over(uint32_t src, uint32_t dest)
++{
++    uint32_t a = ~src >> 24;
++
++    UN8x4_MUL_UN8_ADD_UN8x4(dest, a, src);
++
++    return dest;
++}
++
++static force_inline uint32_t
++in(uint32_t x, uint8_t  y)
++{
++    uint16_t a = y;
++
++    UN8x4_MUL_UN8(x, a);
++
++    return x;
++}
++
++static force_inline uint32_t
++combine_mask(const uint32_t *src, const uint32_t *mask, int i)
++{
++    uint32_t s, m;
++
++    if (mask) {
++        m = *(mask + i) >> A_SHIFT;
++        if (!m)
++            return 0;
++    }
++    s = *(src + i);
++    if (mask)
++       UN8x4_MUL_UN8(s, m);
++    return s;
++}
++
++static void
++combine_mask_ca(uint32_t *src, uint32_t *mask)
++{
++    uint32_t a = *mask;
++    uint32_t x;
++    uint16_t xa;
++
++    if (!a) {
++        *(src) = 0;
++        return;
++    }
++
++    x = *(src);
++    if (a == ~0) {
++        x = x >> A_SHIFT;
++        x |= x << G_SHIFT;
++        x |= x << R_SHIFT;
++        *(mask) = x;
++        return;
++    }
++    xa = x >> A_SHIFT;
++    UN8x4_MUL_UN8x4(x, a);
++    *(src) = x;
++
++    UN8x4_MUL_UN8(a, xa);
++    *(mask) = a;
++}
++
++static void
++combine_mask_value_ca(uint32_t *src, const uint32_t *mask)
++{
++    uint32_t a = *mask;
++    uint32_t x;
++
++    if (!a) {
++        *(src) = 0;
++        return;
++    }
++
++    if (a == ~0)
++        return;
++
++    x = *(src);
++    UN8x4_MUL_UN8x4(x, a);
++    *(src) = x;
++}
++
++static void
++combine_mask_alpha_ca(const uint32_t *src, uint32_t *mask)
++{
++    uint32_t a = *(mask);
++    uint32_t x;
++
++    if (!a)
++        return;
++    x = *(src) >> A_SHIFT;
++
++    if (x == MASK)
++        return;
++
++    if (a == -1) {
++        x |= x << G_SHIFT;
++        x |= x << R_SHIFT;
++        *(mask) = x;
++        return;
++    }
++    UN8x4_MUL_UN8(a, x);
++    *(mask) = a;
++}
++
++/* Compute the product of two unsigned fixed-point 8-bit values from 0 to 1
++ * and map its result to the same range.
++ *
++ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
++ * Notation, Notation, Notation", the first of which is
++ *
++ *   prod(a, b) = (a * b + 128) / 255.
++ *
++ * By approximating the division by 255 as 257/65536, it can be replaced by a
++ * multiply and a right shift. This is the implementation that we use in
++ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
++ * 3DNow!, and unavailable at the time of the book's publication) to perform
++ * the multiplication and right shift in a single operation.
++ *
++ *   prod(a, b) = ((a * b + 128) * 257) >> 16.
++ *
++ * A third way (how pix_multiply() was implemented prior to 14208344) exists
++ * also that performs the multiplication by 257 with adds and shifts.
++ *
++ * Where temp = a * b + 128
++ *
++ *   prod(a, b) = (temp + (temp >> 8)) >> 8.
++ *
++ * The lsx_pix_multiply(src, mask) implemented with the third way, and caculates
++ * two sets of data each time.
++ */
++
++static force_inline __m128i
++lsx_pix_multiply(__m128i src, __m128i mask)
++{
++    __m128i tmp0, tmp1;
++    __m128i vec;
++
++    vec  = __lsx_vreplgr2vr_h(0x80);
++    tmp0 = __lsx_vmadd_h(vec, src, mask);
++    tmp1 = __lsx_vsrli_h(tmp0, 8);
++    tmp0 = __lsx_vadd_h(tmp0, tmp1);
++    tmp1 = __lsx_vsrli_h(tmp0, 8);
++
++    return tmp1;
++}
++
++static force_inline __m128i
++over_1x128(__m128i src, __m128i alpha, __m128i dst)
++{
++    __m128i mask_00ff = __lsx_vreplgr2vr_h(0x00ff);
++
++    alpha = __lsx_vxor_v(alpha, mask_00ff);
++    alpha = lsx_pix_multiply(dst, alpha);
++
++    return (__lsx_vsadd_bu(src, alpha));
++}
++
++static force_inline uint32_t
++core_combine_over_u32 (uint32_t src, uint32_t dst)
++{
++    uint8_t a = src >> 24;
++
++    if (a == 0xff) {
++        return src;
++    }
++    else if (src) {
++        __m128i zero = __lsx_vldi(0);
++        __m128i vr_src = __lsx_vinsgr2vr_w(zero, src, 0);
++        __m128i vr_dst = __lsx_vinsgr2vr_w(zero, dst, 0);
++        __m128i vr_alpha;
++        __m128i tmp;
++
++        vr_src = __lsx_vilvl_b(zero, vr_src);
++        vr_dst = __lsx_vilvl_b(zero, vr_dst);
++        vr_alpha = __lsx_vshuf4i_h(vr_src, 0xff);
++
++        tmp = __lsx_vpickev_b(zero, over_1x128(vr_src, vr_alpha, vr_dst));
++
++        return __lsx_vpickve2gr_wu(tmp, 0);
++    }
++
++    return dst;
++}
++
++static force_inline __m128i
++lsx_over_u(__m128i src, __m128i dest)
++{
++    __m128i r1, r2, r3, t;
++    __m128i rb_mask          = __lsx_vreplgr2vr_w(0xff00ff);
++    __m128i rb_one_half      = __lsx_vreplgr2vr_w(0x800080);
++    __m128i rb_mask_plus_one = __lsx_vreplgr2vr_w(0x10000100);
++    __m128i a                = __lsx_vsrli_w(__lsx_vnor_v(src, src), 24);
++
++    r1 = __lsx_vand_v(dest, rb_mask);
++    r1 = __lsx_vmadd_w(rb_one_half, r1, a);
++    t  = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r1, 8));
++    r1 = __lsx_vadd_w(r1, t);
++    r1 = __lsx_vsrli_w(r1, 8);
++    r1 = __lsx_vand_v(r1, rb_mask);
++    r2 = __lsx_vand_v(src, rb_mask);
++
++    r1 = __lsx_vadd_w(r1, r2);
++    t  = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r1, 8));
++    r1 = __lsx_vor_v(r1, __lsx_vsub_w(rb_mask_plus_one, t));
++    r1 = __lsx_vand_v(r1, rb_mask);
++
++    r2 = __lsx_vsrli_w(dest, 8);
++    r2 = __lsx_vand_v(r2, rb_mask);
++    r2 = __lsx_vmadd_w(rb_one_half, r2, a);
++    t  = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r2, 8));
++    r2 = __lsx_vadd_w(r2, t);
++    r2 = __lsx_vsrli_w(r2, 8);
++    r2 = __lsx_vand_v(r2, rb_mask);
++    r3 = __lsx_vand_v(rb_mask, __lsx_vsrli_w(src, 8));
++
++    r2 = __lsx_vadd_w(r2, r3);
++    t  = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r2, 8));
++    r2 = __lsx_vor_v(r2, __lsx_vsub_w(rb_mask_plus_one, t));
++    r2 = __lsx_vand_v(r2, rb_mask);
++
++    t  = __lsx_vor_v(r1, __lsx_vslli_w(r2, 8));
++
++    return t;
++}
++
++static force_inline __m128i
++lsx_in_u(__m128i x, __m128i a)
++{
++    __m128i r1, r2, t;
++    __m128i rb_mask     = __lsx_vreplgr2vr_w(0xff00ff);
++    __m128i rb_one_half = __lsx_vreplgr2vr_w(0x800080);
++
++    r1 = __lsx_vand_v(x, rb_mask);
++    r1 = __lsx_vmadd_w(rb_one_half, r1, a);
++    t  = __lsx_vand_v(__lsx_vsrli_w(r1, 8), rb_mask);
++    r1 = __lsx_vadd_w(r1, t);
++    r1 = __lsx_vsrli_w(r1, 8);
++    r1 = __lsx_vand_v(r1, rb_mask);
++    r2 = __lsx_vsrli_w(x, 8);
++
++    r2 = __lsx_vand_v(r2, rb_mask);
++    r2 = __lsx_vmadd_w(rb_one_half, r2, a);
++    t  = __lsx_vand_v(__lsx_vsrli_w(r2, 8), rb_mask);
++    r2 = __lsx_vadd_w(r2, t);
++    r2 = __lsx_vsrli_w(r2, 8);
++    r2 = __lsx_vand_v(r2, rb_mask);
++
++    t  = __lsx_vor_v(r1, __lsx_vslli_w(r2, 8));
++
++    return t;
++}
++
++static void
++lsx_combine_src_u (pixman_implementation_t *imp,
++                   pixman_op_t              op,
++                   uint32_t *               dest,
++                   const uint32_t *         src,
++                   const uint32_t *         mask,
++                   int                      width)
++{
++    __m128i src0, mask0, dest0;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    if(mask) {
++        while (width >= 4) {
++            src0 = __lsx_vld(src, 0);
++            mask0 = __lsx_vld(mask, 0);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++        for (int i = 0; i < width; ++i) {
++            uint32_t s = combine_mask(src, mask, i);
++            *dest++ = s;
++        }
++    } else {
++        while (width >= 4) {
++            src0 = __lsx_vld(src, 0);
++            __lsx_vst(src0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++        if (width) {
++            memcpy (dest, src, width * sizeof (uint32_t));
++        }
++    }
++}
++
++static void
++lsx_combine_over_u_mask (uint32_t *dest,
++                         const uint32_t *src,
++                         const uint32_t *mask,
++                         int width)
++{
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    __m128i src0, mask0, dest0, dest1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3, out4, out5;
++
++    while (width > 3) {
++        src0 = __lsx_vld(src, 0);
++        dest0 = __lsx_vld(dest, 0);
++        mask0 = __lsx_vld(mask, 0);
++        out0 = __lsx_vilvl_b(zero, src0);
++        out2 = __lsx_vilvh_b(zero, src0);
++        out1 = __lsx_vilvl_b(zero, mask0);
++        out3 = __lsx_vilvh_b(zero, mask0);
++        out1 = __lsx_vshuf4i_h(out1, 0xff);
++        out3 = __lsx_vshuf4i_h(out3, 0xff);
++        out0 = lsx_pix_multiply(out0, out1);
++        out2 = lsx_pix_multiply(out2, out3);
++        out1 = __lsx_vxor_v(out0, bit_set);
++        out3 = __lsx_vxor_v(out2, bit_set);
++        out1 = __lsx_vshuf4i_h(out1, 0xff);
++        out3 = __lsx_vshuf4i_h(out3, 0xff);
++        out4 = __lsx_vilvl_b(zero, dest0);
++        out5 = __lsx_vilvh_b(zero, dest0);
++        out4 = lsx_pix_multiply(out4, out1);
++        out5 = lsx_pix_multiply(out5, out3);
++
++        dest0 = __lsx_vpickev_b(out2, out0);
++        dest1 = __lsx_vpickev_b(out5, out4);
++        dest0 = __lsx_vsadd_bu(dest0, dest1);
++        __lsx_vst(dest0, dest, 0);
++        width -= 4;
++        mask  += 4;
++        src   += 4;
++        dest  += 4;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t m = ALPHA_8 (*(mask + i));
++        if (m == 0xFF) {
++            uint32_t s = *(src + i);
++            uint32_t a = ALPHA_8 (s);
++            if (a == 0xFF) {
++                *(dest + i) = s;
++            } else if (s) {
++                uint32_t d = *(dest + i);
++                uint32_t ia = a ^ 0xFF;
++                UN8x4_MUL_UN8_ADD_UN8x4(d, ia, s);
++                *(dest + i) = d;
++            }
++        } else if (m) {
++            uint32_t s = *(src + i);
++            if (s) {
++                uint32_t d = *(dest + i);
++                UN8x4_MUL_UN8(s, m);
++                UN8x4_MUL_UN8_ADD_UN8x4(d, ALPHA_8 (~s), s);
++                *(dest + i) = d;
++            }
++        }
++    }
++}
++
++static void
++lsx_combine_over_u_no_mask (uint32_t *dst, const uint32_t *src, int width)
++{
++    __m128i zero = __lsx_vldi(0);
++
++    while (width >= 4) {
++        __m128i v_src, v_dst;
++        __m128i v_src_ev, v_src_od;
++        __m128i alpha;
++        __m128i v_dst_ev, v_dst_od;
++
++        v_src = __lsx_vld(src, 0);
++        v_dst = __lsx_vld(dst, 0);
++
++        /* unpack src: 1x128 to 2x128 */
++        v_src_ev = __lsx_vpackev_b(zero, v_src);
++        v_src_od = __lsx_vpackod_b(zero, v_src);
++
++        /* expand alpha */
++        alpha = __lsx_vshuf4i_h(v_src_od, 0xf5);
++
++        /* unpack dst: 1x128 to 2x128 */
++        v_dst_ev = __lsx_vpackev_b(zero, v_dst);
++        v_dst_od = __lsx_vpackod_b(zero, v_dst);
++
++        v_dst_ev = over_1x128(v_src_ev, alpha, v_dst_ev);
++        v_dst_od = over_1x128(v_src_od, alpha, v_dst_od);
++
++        v_dst = __lsx_vpackev_b(v_dst_od, v_dst_ev);
++
++        __lsx_vst(v_dst, dst, 0);
++        width -= 4;
++        src   += 4;
++        dst   += 4;
++    }
++
++    while (width--) {
++        uint32_t s = *src;
++        uint32_t d = *dst;
++
++        *dst = core_combine_over_u32(s, d);
++
++        ++src;
++        ++dst;
++    }
++}
++
++static void
++lsx_combine_over_u (pixman_implementation_t *imp,
++                    pixman_op_t              op,
++                    uint32_t *               dest,
++                    const uint32_t *         src,
++                    const uint32_t *         mask,
++                    int                      width)
++{
++    if (mask) {
++        lsx_combine_over_u_mask(dest, src, mask, width);
++    }
++    else {
++        lsx_combine_over_u_no_mask(dest, src, width);
++    }
++}
++
++static void
++lsx_combine_over_reverse_u (pixman_implementation_t *imp,
++                            pixman_op_t              op,
++                            uint32_t *               dest,
++                            const uint32_t *         src,
++                            const uint32_t *         mask,
++                            int                      width)
++{
++    __m128i src0, mask0, dest0, dest1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3, out4, out5;
++
++    if (mask) {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            mask0 = __lsx_vld(mask, 0);
++            dest0 = __lsx_vld(dest, 0);
++
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++
++            dest1 = __lsx_vxori_b(dest0, 0xff);
++            out1 = __lsx_vilvl_b(zero, dest0);
++            out3 = __lsx_vilvh_b(zero, dest0);
++            out4 = __lsx_vilvl_b(zero, dest1);
++            out5 = __lsx_vilvh_b(zero, dest1);
++            out4 = __lsx_vshuf4i_h(out4, 0xff);
++            out5 = __lsx_vshuf4i_h(out5, 0xff);
++            out0 = lsx_pix_multiply(out0, out4);
++            out2 = lsx_pix_multiply(out2, out5);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            dest1 = __lsx_vpickev_b(out3, out1);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    } else {
++        while (width > 3) {
++            src0  = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            dest1 = __lsx_vxori_b(dest0, 0xff);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, dest0);
++            out3 = __lsx_vilvh_b(zero, dest0);
++            out4 = __lsx_vilvl_b(zero, dest1);
++            out5 = __lsx_vilvh_b(zero, dest1);
++            out4 = __lsx_vshuf4i_h(out4, 0xff);
++            out5 = __lsx_vshuf4i_h(out5, 0xff);
++            out0 = lsx_pix_multiply(out0, out4);
++            out2 = lsx_pix_multiply(out2, out5);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            dest1 = __lsx_vpickev_b(out3, out1);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t ia = ALPHA_8 (~*(dest + i));
++        UN8x4_MUL_UN8_ADD_UN8x4(s, ia, d);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lsx_combine_out_u (pixman_implementation_t *imp,
++                   pixman_op_t              op,
++                   uint32_t *               dest,
++                   const uint32_t *         src,
++                   const uint32_t *         mask,
++                   int                      width)
++{
++    __m128i src0, mask0, dest0;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    if(mask) {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            mask0 = __lsx_vld(mask, 0);
++            dest0 = __lsx_vld(dest, 0);
++
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++
++            dest0 = __lsx_vxori_b(dest0, 0xff);
++            out1 = __lsx_vilvl_b(zero, dest0);
++            out3 = __lsx_vilvh_b(zero, dest0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    } else {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            dest0 = __lsx_vxori_b(dest0, 0xff);
++            out1 = __lsx_vilvl_b(zero, dest0);
++            out3 = __lsx_vilvh_b(zero, dest0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            __lsx_vst(dest0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t a = ALPHA_8 (~*(dest + i));
++        UN8x4_MUL_UN8(s, a);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lsx_combine_out_reverse_u (pixman_implementation_t *imp,
++                           pixman_op_t              op,
++                           uint32_t *               dest,
++                           const uint32_t *         src,
++                           const uint32_t *         mask,
++                           int                      width)
++{
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    __m128i src0, mask0, dest0;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    if(mask) {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            mask0 = __lsx_vld(mask, 0);
++
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            out1 = __lsx_vxor_v(out0, bit_set);
++            out3 = __lsx_vxor_v(out2, bit_set);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = __lsx_vilvl_b(zero, dest0);
++            out2 = __lsx_vilvh_b(zero, dest0);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    } else {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vxor_v(out0, bit_set);
++            out3 = __lsx_vxor_v(out2, bit_set);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = __lsx_vilvl_b(zero, dest0);
++            out2 = __lsx_vilvh_b(zero, dest0);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++            dest0 = __lsx_vpickev_b(out2, out0);
++            __lsx_vst(dest0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    }
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t a = ALPHA_8 (~s);
++        UN8x4_MUL_UN8 (d, a);
++        *(dest + i) = d;
++    }
++}
++
++static void
++lsx_combine_add_u (pixman_implementation_t *imp,
++                   pixman_op_t              op,
++                   uint32_t *               dest,
++                   const uint32_t *         src,
++                   const uint32_t *         mask,
++                   int                      width)
++{
++    __m128i src0, mask0, dest0, dest1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    if (mask) {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            mask0 = __lsx_vld(mask, 0);
++
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++
++            dest1 = __lsx_vpickev_b(out2, out0);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    } else {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            dest1 = __lsx_vpickev_b(out2, out0);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        UN8x4_ADD_UN8x4(d, s);
++        *(dest + i) = d;
++    }
++}
++
++/*
++ * Multiply
++ *
++ *      ad * as * B(d / ad, s / as)
++ *    = ad * as * d/ad * s/as
++ *    = d * s
++ *
++ */
++static void
++lsx_combine_multiply_u (pixman_implementation_t *imp,
++                        pixman_op_t              op,
++                        uint32_t *               dest,
++                        const uint32_t *         src,
++                        const uint32_t *         mask,
++                        int                      width)
++{
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    __m128i src0, mask0, dest0, dest1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3, out4, out5, out6, out7;
++
++    if (mask) {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            mask0 = __lsx_vld(mask, 0);
++
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vilvl_b(zero, mask0);
++            out3 = __lsx_vilvh_b(zero, mask0);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            out0 = lsx_pix_multiply(out0, out1);
++            out2 = lsx_pix_multiply(out2, out3);
++
++            out1 = __lsx_vxor_v(out0, bit_set);
++            out3 = __lsx_vxor_v(out2, bit_set);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            dest1 = __lsx_vxori_b(dest0, 0xff);
++            dest1 = __lsx_vshuf4i_b(dest1, 0xff);
++            out4 = __lsx_vilvl_b(zero, dest0);
++            out5 = __lsx_vilvh_b(zero, dest0);
++            out6 = __lsx_vilvl_b(zero, dest1);
++            out7 = __lsx_vilvh_b(zero, dest1);
++            out6 = lsx_pix_multiply(out0, out6);
++            out7 = lsx_pix_multiply(out2, out7);
++            out1 = lsx_pix_multiply(out4, out1);
++            out3 = lsx_pix_multiply(out5, out3);
++            dest0 = __lsx_vpickev_b(out7, out6);
++            dest1 = __lsx_vpickev_b(out3, out1);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++
++            out4 = lsx_pix_multiply(out4, out0);
++            out5 = lsx_pix_multiply(out5, out2);
++            dest1 = __lsx_vpickev_b(out5, out4);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            mask  += 4;
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    } else {
++        while (width > 3) {
++            src0 = __lsx_vld(src, 0);
++            dest0 = __lsx_vld(dest, 0);
++            out0 = __lsx_vilvl_b(zero, src0);
++            out2 = __lsx_vilvh_b(zero, src0);
++            out1 = __lsx_vxor_v(out0, bit_set);
++            out3 = __lsx_vxor_v(out2, bit_set);
++            out1 = __lsx_vshuf4i_h(out1, 0xff);
++            out3 = __lsx_vshuf4i_h(out3, 0xff);
++            dest1 = __lsx_vxori_b(dest0, 0xff);
++            dest1 = __lsx_vshuf4i_b(dest1, 0xff);
++            out4 = __lsx_vilvl_b(zero, dest0);
++            out5 = __lsx_vilvh_b(zero, dest0);
++            out6 = __lsx_vilvl_b(zero, dest1);
++            out7 = __lsx_vilvh_b(zero, dest1);
++            out6 = lsx_pix_multiply(out0, out6);
++            out7 = lsx_pix_multiply(out2, out7);
++            out1 = lsx_pix_multiply(out4, out1);
++            out3 = lsx_pix_multiply(out5, out3);
++            dest0 = __lsx_vpickev_b(out7, out6);
++            dest1 = __lsx_vpickev_b(out3, out1);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++
++            out4 = lsx_pix_multiply(out4, out0);
++            out5 = lsx_pix_multiply(out5, out2);
++            dest1 = __lsx_vpickev_b(out5, out4);
++            dest0 = __lsx_vsadd_bu(dest0, dest1);
++            __lsx_vst(dest0, dest, 0);
++            width -= 4;
++            src   += 4;
++            dest  += 4;
++        }
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = combine_mask(src, mask, i);
++        uint32_t d = *(dest + i);
++        uint32_t ss = s;
++        uint32_t src_ia = ALPHA_8(~s);
++        uint32_t dest_ia = ALPHA_8(~d);
++
++        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(ss, dest_ia, d, src_ia);
++        UN8x4_MUL_UN8x4(d, s);
++        UN8x4_ADD_UN8x4(d, ss);
++
++        *(dest + i) = d;
++    }
++}
++
++static void
++lsx_combine_src_ca (pixman_implementation_t *imp,
++                    pixman_op_t              op,
++                    uint32_t *               dest,
++                    const uint32_t *         src,
++                    const uint32_t *         mask,
++                    int                      width)
++{
++    __m128i src0, mask0, dest0;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    while (width > 3) {
++        src0 = __lsx_vld(src, 0);
++        mask0 = __lsx_vld(mask, 0);
++        out0 = __lsx_vilvl_b(zero, src0);
++        out2 = __lsx_vilvh_b(zero, src0);
++        out1 = __lsx_vilvl_b(zero, mask0);
++        out3 = __lsx_vilvh_b(zero, mask0);
++        out0 = lsx_pix_multiply(out0, out1);
++        out2 = lsx_pix_multiply(out2, out3);
++        dest0 = __lsx_vpickev_b(out2, out0);
++        __lsx_vst(dest0, dest, 0);
++        mask  += 4;
++        width -= 4;
++        src   += 4;
++        dest  += 4;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        combine_mask_value_ca(&s, &m);
++        *(dest + i) = s;
++    }
++}
++
++static void
++lsx_combine_over_ca (pixman_implementation_t  *imp,
++                     pixman_op_t               op,
++                     uint32_t *                dest,
++                     const uint32_t *          src,
++                     const uint32_t *          mask,
++                     int                       width)
++{
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    __m128i src0, mask0, dest0, dest1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3, out4, out5;
++
++    while (width > 3) {
++        src0 = __lsx_vld(src, 0);
++        dest0 = __lsx_vld(dest, 0);
++        mask0 = __lsx_vld(mask, 0);
++
++        out0 = __lsx_vilvl_b(zero, src0);
++        out2 = __lsx_vilvh_b(zero, src0);
++        out1 = __lsx_vilvl_b(zero, mask0);
++        out3 = __lsx_vilvh_b(zero, mask0);
++        out4 = lsx_pix_multiply(out0, out1);
++        out5 = lsx_pix_multiply(out2, out3);
++        out0 = __lsx_vshuf4i_h(out0, 0xff);
++        out2 = __lsx_vshuf4i_h(out2, 0xff);
++        out1 = lsx_pix_multiply(out1, out0);
++        out3 = lsx_pix_multiply(out3, out2);
++
++        out1 = __lsx_vxor_v(out1, bit_set);
++        out3 = __lsx_vxor_v(out3, bit_set);
++        out0 = __lsx_vilvl_b(zero, dest0);
++        out2 = __lsx_vilvh_b(zero, dest0);
++        out1 = lsx_pix_multiply(out1, out0);
++        out3 = lsx_pix_multiply(out3, out2);
++
++        dest0 = __lsx_vpickev_b(out5, out4);
++        dest1 = __lsx_vpickev_b(out3, out1);
++        dest0 = __lsx_vsadd_bu(dest0, dest1);
++        __lsx_vst(dest0, dest, 0);
++        mask  += 4;
++        width -= 4;
++        src   += 4;
++        dest  += 4;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        uint32_t a;
++
++        combine_mask_ca (&s, &m);
++        a = ~m;
++        if (a) {
++            uint32_t d = *(dest + i);
++            UN8x4_MUL_UN8x4_ADD_UN8x4(d, a, s);
++            s = d;
++        }
++        *(dest + i) = s;
++    }
++}
++
++static void
++lsx_combine_out_reverse_ca (pixman_implementation_t *imp,
++                            pixman_op_t              op,
++                            uint32_t *               dest,
++                            const uint32_t *         src,
++                            const uint32_t *         mask,
++                            int                      width)
++{
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    __m128i src0, mask0, dest0;
++    __m128i zero = __lsx_vldi(0);
++    __m128i out0, out1, out2, out3;
++
++    while (width > 3) {
++        src0 = __lsx_vld(src, 0);
++        dest0 = __lsx_vld(dest, 0);
++        mask0 = __lsx_vld(mask, 0);
++
++        out0 = __lsx_vilvl_b(zero, src0);
++        out2 = __lsx_vilvh_b(zero, src0);
++        out1 = __lsx_vilvl_b(zero, mask0);
++        out3 = __lsx_vilvh_b(zero, mask0);
++        out0 = __lsx_vshuf4i_h(out0, 0xff);
++        out2 = __lsx_vshuf4i_h(out2, 0xff);
++        out1 = lsx_pix_multiply(out1, out0);
++        out3 = lsx_pix_multiply(out3, out2);
++
++        out1 = __lsx_vxor_v(out1, bit_set);
++        out3 = __lsx_vxor_v(out3, bit_set);
++        out0 = __lsx_vilvl_b(zero, dest0);
++        out2 = __lsx_vilvh_b(zero, dest0);
++        out1 = lsx_pix_multiply(out1, out0);
++        out3 = lsx_pix_multiply(out3, out2);
++        dest0 = __lsx_vpickev_b(out3, out1);
++        __lsx_vst(dest0, dest, 0);
++        mask  += 4;
++        width -= 4;
++        src   += 4;
++        dest  += 4;
++    }
++
++    for (int i = 0; i < width; ++i) {
++        uint32_t s = *(src + i);
++        uint32_t m = *(mask + i);
++        uint32_t a;
++
++        combine_mask_alpha_ca(&s, &m);
++        a = ~m;
++
++        if (a != ~0) {
++            uint32_t d = 0;
++
++            if (a) {
++                d = *(dest + i);
++                UN8x4_MUL_UN8x4(d, a);
++            }
++            *(dest + i) = d;
++        }
++    }
++}
++
++/*
++ *   w : length in bytes
++ */
++static void force_inline
++lsx_blt_one_line_u8 (uint8_t *pDst, uint8_t *pSrc, int w)
++{
++    /* align the dst to 16 byte */
++    while (((uintptr_t)pDst & 15) && w) {
++        *pDst = *pSrc;
++        pSrc += 1;
++        pDst += 1;
++        w -= 1;
++    }
++
++    while (w >= 32) {
++        __m128i src0, src1;
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    if (w >= 16) {
++        __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0);
++
++        w -= 16;
++        pSrc += 16;
++        pDst += 16;
++    }
++
++    if (w >= 8) {
++        *(uint64_t *)pDst = *(uint64_t *)pSrc;
++
++        w -= 8;
++        pSrc += 8;
++        pDst += 8;
++    }
++
++    while (w--) {
++        /* copy one bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++/*
++ *   w : length in half word
++ */
++static void
++lsx_blt_one_line_u16 (uint16_t *pDst, uint16_t *pSrc, int w)
++{
++    /* align the dst to 16 byte */
++    while (((uintptr_t)pDst & 15) && w) {
++        *pDst++ = *pSrc++;
++        --w;
++    }
++
++    while (w >= 32) {
++        __m128i src0, src1, src2, src3;
++        /* copy 64 bytes */
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        src2 = __lsx_vld(pSrc, 32);
++        src3 = __lsx_vld(pSrc, 48);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++        __lsx_vst(src2, pDst, 32);
++        __lsx_vst(src3, pDst, 48);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    if (w >= 16) {
++        __m128i src0, src1;
++        /* copy 32 bytes */
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++
++        w -= 16;
++        pSrc += 16;
++        pDst += 16;
++    }
++
++    if (w >= 8) {
++        /* copy 16 bytes */
++        __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0);
++
++        w -= 8;
++        pSrc += 8;
++        pDst += 8;
++    }
++
++    while (w--) {
++        /* copy 2 bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++/*
++ *   w : length in word
++ */
++static force_inline void
++lsx_blt_one_line_u32 (uint32_t *pDst, uint32_t *pSrc, int w)
++{
++    /* align the dst to 16 byte */
++    while (((uintptr_t)pDst & 15) && w) {
++        *pDst++ = *pSrc++;
++        --w;
++    }
++
++    while (w >= 32) {
++        __m128i src0, src1, src2, src3;
++        __m128i src4, src5, src6, src7;
++        /* copy 128 bytes */
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        src2 = __lsx_vld(pSrc, 32);
++        src3 = __lsx_vld(pSrc, 48);
++        src4 = __lsx_vld(pSrc, 64);
++        src5 = __lsx_vld(pSrc, 80);
++        src6 = __lsx_vld(pSrc, 96);
++        src7 = __lsx_vld(pSrc, 112);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++        __lsx_vst(src2, pDst, 32);
++        __lsx_vst(src3, pDst, 48);
++        __lsx_vst(src4, pDst, 64);
++        __lsx_vst(src5, pDst, 80);
++        __lsx_vst(src6, pDst, 96);
++        __lsx_vst(src7, pDst, 112);
++
++        w -= 32;
++        pSrc += 32;
++        pDst += 32;
++    }
++
++    if (w >= 16) {
++        __m128i src0, src1, src2, src3;
++        /* copy 64 bytes */
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        src2 = __lsx_vld(pSrc, 32);
++        src3 = __lsx_vld(pSrc, 48);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++        __lsx_vst(src2, pDst, 32);
++        __lsx_vst(src3, pDst, 48);
++
++        w -= 16;
++        pSrc += 16;
++        pDst += 16;
++    }
++
++    if (w >= 8) {
++        __m128i src0, src1;
++        /* copy 32 bytes */
++        src0 = __lsx_vld(pSrc, 0);
++        src1 = __lsx_vld(pSrc, 16);
++        __lsx_vst(src0, pDst, 0);
++        __lsx_vst(src1, pDst, 16);
++
++        w -= 8;
++        pSrc += 8;
++        pDst += 8;
++    }
++
++    if (w >= 4) {
++        /* copy 16 bytes once a time */
++        __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0);
++
++        w -= 4;
++        pSrc += 4;
++        pDst += 4;
++    }
++
++    while (w--) {
++        /* copy 4 bytes once a time */
++        *pDst++ = *pSrc++;
++    }
++}
++
++static pixman_bool_t
++lsx_blt (pixman_implementation_t *imp,
++         uint32_t *               src_bits,
++         uint32_t *               dst_bits,
++         int                      src_stride,
++         int                      dst_stride,
++         int                      src_bpp,
++         int                      dst_bpp,
++         int                      src_x,
++         int                      src_y,
++         int                      dest_x,
++         int                      dest_y,
++         int                      width,
++         int                      height)
++{
++    if (src_bpp != dst_bpp)
++        return FALSE;
++
++    if (src_bpp == 8) {
++        uint8_t *src_b = (uint8_t *)src_bits;
++        uint8_t *dst_b = (uint8_t *)dst_bits;
++
++        src_stride = src_stride * 4;
++        dst_stride = dst_stride * 4;
++
++        src_b += src_stride * src_y + src_x;
++        dst_b += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lsx_blt_one_line_u8 (dst_b, src_b, width);
++            dst_b += dst_stride;
++            src_b += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    if (src_bpp == 16) {
++        uint16_t *src_h = (uint16_t *)src_bits;
++        uint16_t *dst_h = (uint16_t *)dst_bits;
++
++        src_stride = src_stride * 2;
++        dst_stride = dst_stride * 2;
++
++        src_h += src_stride * src_y + src_x;
++        dst_h += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lsx_blt_one_line_u16 (dst_h, src_h, width);
++            dst_h += dst_stride;
++            src_h += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    if (src_bpp == 32) {
++        src_bits += src_stride * src_y + src_x;
++        dst_bits += dst_stride * dest_y + dest_x;
++
++        while (height--) {
++            lsx_blt_one_line_u32 (dst_bits, src_bits, width);
++            dst_bits += dst_stride;
++            src_bits += src_stride;
++        }
++
++        return TRUE;
++    }
++
++    return FALSE;
++}
++
++static void
++lsx_fill_u8 (uint8_t  *dst,
++             int       stride,
++             int       x,
++             int       y,
++             int       width,
++             int       height,
++             uint8_t   filler)
++{
++    __m128i vfill = __lsx_vreplgr2vr_b(filler);
++    int byte_stride = stride * 4;
++    dst += y * byte_stride + x;
++
++    while (height--) {
++        int w = width;
++        uint8_t *d = dst;
++
++        while (w && ((uintptr_t)d & 15)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 64) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            __lsx_vst(vfill, d, 32);
++            __lsx_vst(vfill, d, 48);
++            w -= 64;
++            d += 64;
++        }
++
++        if (w >= 32) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            w -= 32;
++            d += 32;
++        }
++
++        if (w >= 16) {
++            __lsx_vst(vfill, d, 0);
++            w -= 16;
++            d += 16;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        dst += byte_stride;
++    }
++}
++
++static void
++lsx_fill_u16 (uint16_t *dst,
++              int       stride,
++              int       x,
++              int       y,
++              int       width,
++              int       height,
++              uint16_t  filler)
++{
++    __m128i vfill = __lsx_vreplgr2vr_h(filler);
++    int short_stride = stride * 2;
++    dst += y * short_stride + x;
++
++    while (height--) {
++        int w = width;
++        uint16_t *d = dst;
++
++        while (w && ((uintptr_t)d & 15)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 32) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            __lsx_vst(vfill, d, 32);
++            __lsx_vst(vfill, d, 48);
++            w -= 32;
++            d += 32;
++        }
++
++        if (w >= 16) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            w -= 16;
++            d += 16;
++        }
++
++        if (w >= 8) {
++            __lsx_vst(vfill, d, 0);
++            w -= 8;
++            d += 8;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        dst += short_stride;
++    }
++}
++
++static void
++lsx_fill_u32 (uint32_t *bits,
++              int       stride,
++              int       x,
++              int       y,
++              int       width,
++              int       height,
++              uint32_t  filler)
++{
++    __m128i vfill = __lsx_vreplgr2vr_w(filler);
++    bits += y * stride + x;
++
++    while (height--) {
++        int w = width;
++        uint32_t *d = bits;
++
++        while (w && ((uintptr_t)d & 15)) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        while (w >= 32) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            __lsx_vst(vfill, d, 32);
++            __lsx_vst(vfill, d, 48);
++            __lsx_vst(vfill, d, 64);
++            __lsx_vst(vfill, d, 80);
++            __lsx_vst(vfill, d, 96);
++            __lsx_vst(vfill, d, 112);
++            w -= 32;
++            d += 32;
++        }
++
++        while (w >= 16) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            __lsx_vst(vfill, d, 32);
++            __lsx_vst(vfill, d, 48);
++            w -= 16;
++            d += 16;
++        }
++
++        if (w >= 8) {
++            __lsx_vst(vfill, d, 0);
++            __lsx_vst(vfill, d, 16);
++            w -= 8;
++            d += 8;
++        }
++
++        if (w >= 4) {
++            __lsx_vst(vfill, d, 0);
++            w -= 4;
++            d += 4;
++        }
++
++        while (w) {
++            *d = filler;
++            w--;
++            d++;
++        }
++
++        bits += stride;
++    }
++}
++
++static pixman_bool_t
++lsx_fill (pixman_implementation_t *imp,
++          uint32_t *               bits,
++          int                      stride,
++          int                      bpp,
++          int                      x,
++          int                      y,
++          int                      width,
++          int                      height,
++          uint32_t                 filler)
++{
++    switch (bpp) {
++        case 8:
++            lsx_fill_u8 ((uint8_t *)bits, stride, x, y, width, height, (uint8_t)filler);
++            return TRUE;
++
++        case 16:
++            lsx_fill_u16 ((uint16_t *)bits, stride, x, y, width, height, (uint16_t)filler);
++            return TRUE;
++
++        case 32:
++            lsx_fill_u32 (bits, stride, x, y, width, height, filler);
++            return TRUE;
++
++        default:
++            return FALSE;
++    }
++
++    return TRUE;
++}
++
++static void
++lsx_composite_over_n_8_8888 (pixman_implementation_t *imp,
++                             pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src, srca;
++    uint32_t *dst_line, *dst, d;
++    uint8_t  *mask_line, *mask, m;
++    int dst_stride, mask_stride;
++    int32_t w;
++    v4u32 vsrca, vsrc;
++    __m128i vff;
++
++    src   = _pixman_image_get_solid(imp, src_image, dest_image->bits.format);
++    vsrc  = (v4u32)__lsx_vreplgr2vr_w(src);
++    srca  = src >> 24;
++    vsrca = (v4u32)__lsx_vreplgr2vr_w(srca);
++    vff   = __lsx_vreplgr2vr_w(0xff);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 4) {
++            v4u32 ma = {mask[0], mask[1], mask[2], mask[3]};
++
++            if (__lsx_bnz_w(__lsx_vseqi_w((__m128i)ma, 0xff))){
++                if (__lsx_bnz_w(__lsx_vseqi_w(vsrca, 0xff)))
++                    *(__m128i*) dst = (__m128i)vsrc;
++                else if (__lsx_bnz_w(__lsx_vsub_w((__m128i)ma, vff)))
++                    *(__m128i*) dst = lsx_over_u((__m128i)vsrc, *(__m128i*)dst);
++            } else if (__lsx_bnz_w((__m128i)ma)) {
++                __m128i d0 = lsx_in_u((__m128i)vsrc, (__m128i)ma);
++                *(__m128i*) dst = lsx_over_u(d0, *(__m128i*)dst);
++            } else {
++                for(int i = 0; i < 4; i++) {
++                    if (mask[i] == 0xff) {
++                        if (vsrca[i] == 0xff)
++                            *(dst + i) = vsrc[i];
++                        else
++                            *(dst + i) = over(vsrc[i], *(dst + i));
++                    } else if (mask[i]) {
++                        m = mask[i];
++                        d = in(vsrc[i], m);
++                        *(dst + i) = over(d, *(dst + i));
++                    }
++                }
++            }
++            dst += 4;
++            w -= 4;
++            mask += 4;
++        }
++
++        while (w--) {
++            m = *mask++;
++            if (m == 0xff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (m) {
++                d = in(src, m);
++                *dst = over(d, *dst);
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_add_8_8 (pixman_implementation_t *imp,
++                       pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *src_line, *src;
++    int dst_stride, src_stride;
++    int32_t w;
++    uint16_t t;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        src = src_line;
++
++        dst_line += dst_stride;
++        src_line += src_stride;
++        w = width;
++
++        lsx_combine_add_u(imp, op, (uint32_t *)dst, (uint32_t *)src, NULL, w >> 2);
++        dst += w & 0xfffc;
++        src += w & 0xfffc;
++        w &= 3;
++
++        while (w--) {
++            t = (*dst) + (*src++);
++            *dst++ = t | (0 - (t >> 8));
++        }
++    }
++}
++
++static void
++lsx_composite_add_8888_8888 (pixman_implementation_t *imp,
++                             pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line;
++    uint32_t *src_line;
++    int dst_stride, src_stride;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        lsx_combine_add_u(imp, op, dst_line, src_line, NULL, width);
++        dst_line += dst_stride;
++        src_line += src_stride;
++    }
++}
++
++static void
++lsx_composite_over_8888_8888 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    int dst_stride, src_stride;
++    uint32_t *dst_line;
++    uint32_t *src_line;
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        lsx_combine_over_u_no_mask (dst_line, src_line, width);
++        dst_line += dst_stride;
++        src_line += src_stride;
++    }
++}
++
++static void
++lsx_composite_copy_area (pixman_implementation_t *imp,
++                         pixman_composite_info_t *info)
++{
++    bits_image_t src_bits, dst_bits;
++    src_bits = info->src_image->bits;
++    dst_bits = info->dest_image->bits;
++    lsx_blt (imp, src_bits.bits,
++             dst_bits.bits,
++             src_bits.rowstride,
++             dst_bits.rowstride,
++             PIXMAN_FORMAT_BPP (src_bits.format),
++             PIXMAN_FORMAT_BPP (dst_bits.format),
++             info->src_x, info->src_y, info->dest_x,
++             info->dest_y, info->width, info->height);
++}
++
++static void
++lsx_composite_src_x888_0565 (pixman_implementation_t *imp,
++                             pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint16_t *dst_line, *dst;
++    uint32_t *src_line, *src, s;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m128i src0, tmp;
++    __m128i rb0, t0, g0;
++    __m128i mask_565_rb = __lsx_vreplgr2vr_w(0x001f001f);
++    __m128i mask_green_4x32 = __lsx_vreplgr2vr_w(0x0000fc00);
++
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w && (uintptr_t)src & 15) {
++            s = *src++;
++            *dst = convert_8888_to_0565(s);
++            dst++;
++            w--;
++        }
++
++        while (w >= 4) {
++            src0 = __lsx_vld(src, 0);
++            src += 4;
++            w -= 4;
++
++            rb0 = __lsx_vsrli_w(src0, 3) & mask_565_rb;
++            g0 = src0 & mask_green_4x32;
++            rb0 = rb0 | __lsx_vsrli_w(rb0, 5);
++            t0 = rb0 | __lsx_vsrli_w(g0, 5);
++            tmp = __lsx_vpickev_h(t0, t0);
++            __lsx_vstelm_d(tmp, dst, 0, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            s = *src++;
++            *dst = convert_8888_to_0565(s);
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_in_n_8_8 (pixman_implementation_t *imp,
++                        pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS(info);
++    uint8_t *dst_line, *dst;
++    uint8_t *mask_line, *mask;
++    int dst_stride, mask_stride;
++    uint32_t m, src, srca;
++    int32_t w;
++    uint16_t t;
++
++    __m128i alpha, tmp;
++    __m128i vmask, vmask_lo, vmask_hi;
++    __m128i vdst, vdst_lo, vdst_hi;
++    __m128i mask_zero = __lsx_vldi(0);
++
++    PIXMAN_IMAGE_GET_LINE(dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE(mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format);
++    srca = src >> 24;
++    alpha = __lsx_vreplgr2vr_w(src);
++    alpha = __lsx_vilvl_b(mask_zero, alpha);
++    alpha = __lsx_vshuf4i_w(alpha, 0x44);
++    alpha = __lsx_vshuf4i_h(alpha, 0xff);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 16) {
++            vmask = __lsx_vld(mask, 0);
++            vdst = __lsx_vld(dst, 0);
++            mask += 16;
++            w -= 16;
++
++            vmask_lo = __lsx_vsllwil_hu_bu(vmask, 0);
++            vmask_hi = __lsx_vexth_hu_bu(vmask);
++            vdst_lo = __lsx_vsllwil_hu_bu(vdst, 0);
++            vdst_hi = __lsx_vexth_hu_bu(vdst);
++            vmask_lo = lsx_pix_multiply(alpha, vmask_lo);
++            vmask_hi = lsx_pix_multiply(alpha, vmask_hi);
++            vdst_lo = lsx_pix_multiply(vmask_lo, vdst_lo);
++            vdst_hi = lsx_pix_multiply(vmask_hi, vdst_hi);
++            vdst_lo = __lsx_vsat_bu(vdst_lo, 7);
++            vdst_hi = __lsx_vsat_bu(vdst_hi, 7);
++            tmp = __lsx_vpickev_b(vdst_hi, vdst_lo);
++            __lsx_vst(tmp, dst, 0);
++            dst += 16;
++        }
++
++        while (w--) {
++            m = *mask++;
++            m = MUL_UN8(m, srca, t);
++            if (m == 0)
++                *dst = 0;
++            else if (m != 0xff)
++                *dst = MUL_UN8(m, *dst, t);
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_in_8_8 (pixman_implementation_t *imp,
++                      pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *src_line, *src;
++    int src_stride, dst_stride;
++    int32_t w, s;
++    uint16_t t;
++
++    __m128i tmp;
++    __m128i vsrc, vsrc_lo, vsrc_hi;
++    __m128i vdst, vdst_lo, vdst_hi;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 16) {
++            vsrc = __lsx_vld(src, 0);
++            vdst = __lsx_vld(dst, 0);
++            src += 16;
++            w -= 16;
++
++            vsrc_lo = __lsx_vsllwil_hu_bu(vsrc, 0);
++            vsrc_hi = __lsx_vexth_hu_bu(vsrc);
++            vdst_lo = __lsx_vsllwil_hu_bu(vdst, 0);
++            vdst_hi = __lsx_vexth_hu_bu(vdst);
++            vdst_lo = lsx_pix_multiply(vsrc_lo, vdst_lo);
++            vdst_hi = lsx_pix_multiply(vsrc_hi, vdst_hi);
++            vdst_lo = __lsx_vsat_bu(vdst_lo, 7);
++            vdst_hi = __lsx_vsat_bu(vdst_hi, 7);
++            tmp = __lsx_vpickev_b(vdst_hi, vdst_lo);
++            __lsx_vst(tmp, dst, 0);
++            dst += 16;
++        }
++
++        while (w--) {
++            s = *src++;
++            if (s == 0)
++                *dst = 0;
++            else if (s != 0xff)
++                *dst = MUL_UN8(s, *dst, t);
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
++                                   pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src, srca, ns;
++    uint32_t *dst_line, *dst, nd;
++    uint32_t *mask_line, *mask, ma;
++    int dst_stride, mask_stride;
++    int32_t w;
++
++    __m128i d, m, t;
++    __m128i s, sa, d0, d1, m0, m1, t0, t1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    srca = src >> 24;
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++    s = __lsx_vreplgr2vr_w(src);
++    s = __lsx_vilvl_b(zero, s);
++    sa = __lsx_vshuf4i_h(s, 0xff);
++
++    while(height --) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 15)) {
++            ma = *mask++;
++            if (ma == 0xffffffff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (ma) {
++                nd = *dst;
++                ns = src;
++
++                UN8x4_MUL_UN8x4(ns, ma);
++                UN8x4_MUL_UN8(ma, srca);
++                ma = ~ma;
++                UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns);
++
++                *dst = nd;
++            }
++            dst++;
++            w--;
++        }
++
++        while(w >= 4) {
++            m = __lsx_vld(mask, 0);
++            mask += 4;
++            w -= 4;
++
++            if (__lsx_bnz_v(m)) {
++                d = __lsx_vld(dst, 0);
++                d0 = __lsx_vsllwil_hu_bu(d, 0);
++                d1 = __lsx_vexth_hu_bu(d);
++                m0 = __lsx_vsllwil_hu_bu(m, 0);
++                m1 = __lsx_vexth_hu_bu(m);
++
++                t0 = lsx_pix_multiply(s, m0);
++                t1 = lsx_pix_multiply(s, m1);
++
++                m0 = lsx_pix_multiply(m0, sa);
++                m1 = lsx_pix_multiply(m1, sa);
++                m0 = __lsx_vxor_v(m0, bit_set);
++                m1 = __lsx_vxor_v(m1, bit_set);
++                d0 = lsx_pix_multiply(d0, m0);
++                d1 = lsx_pix_multiply(d1, m1);
++
++                d = __lsx_vpickev_b(d1, d0);
++                t = __lsx_vpickev_b(t1, t0);
++                d = __lsx_vsadd_bu(d, t);
++                __lsx_vst(d, dst, 0);
++            }
++            dst += 4;
++        }
++
++	while(w--) {
++            ma = *mask++;
++            if (ma == 0xffffffff) {
++                if (srca == 0xff)
++                    *dst = src;
++                else
++                    *dst = over(src, *dst);
++            } else if (ma) {
++                nd = *dst;
++                ns = src;
++
++                UN8x4_MUL_UN8x4(ns, ma);
++                UN8x4_MUL_UN8(ma, srca);
++                ma = ~ma;
++                UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns);
++
++                *dst = nd;
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
++                                   pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint32_t *dst_line, *dst;
++    int dst_stride;
++    int32_t w;
++
++    __m128i d, t;
++    __m128i s, d0, d1;
++    __m128i zero = __lsx_vldi(0);
++    __m128i bit_set = __lsx_vreplgr2vr_h(0xff);
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    s = __lsx_vreplgr2vr_w(src);
++    s = __lsx_vilvl_b(zero, s);
++
++    while (height--)
++    {
++        dst = dst_line;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 15)) {
++            d = __lsx_vldrepl_w(dst, 0);
++            d0 = __lsx_vsllwil_hu_bu(d, 0);
++            d0 = __lsx_vshuf4i_h(d0, 0xff);
++            d0 = __lsx_vxor_v(d0, bit_set);
++            d0 = lsx_pix_multiply(d0, s);
++            t = __lsx_vpickev_b(zero, d0);
++            d = __lsx_vsadd_bu(d, t);
++            __lsx_vstelm_w(d, dst, 0, 0);
++            dst += 1;
++            w--;
++        }
++
++        while (w >= 4) {
++            d = __lsx_vld(dst, 0);
++            w -= 4;
++
++            d0 = __lsx_vsllwil_hu_bu(d, 0);
++            d1 = __lsx_vexth_hu_bu(d);
++            d0 = __lsx_vshuf4i_h(d0, 0xff);
++            d1 = __lsx_vshuf4i_h(d1, 0xff);
++            d0 = __lsx_vxor_v(d0, bit_set);
++            d1 = __lsx_vxor_v(d1, bit_set);
++            d0 = lsx_pix_multiply(d0, s);
++            d1 = lsx_pix_multiply(d1, s);
++            t = __lsx_vpickev_b(d1, d0);
++            d = __lsx_vsadd_bu(d, t);
++            __lsx_vst(d, dst, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            d = __lsx_vldrepl_w(dst, 0);
++            d0 = __lsx_vsllwil_hu_bu(d, 0);
++            d0 = __lsx_vshuf4i_h(d0, 0xff);
++            d0 = __lsx_vxor_v(d0, bit_set);
++            d0 = lsx_pix_multiply(d0, s);
++            t = __lsx_vpickev_b(zero, d0);
++            d = __lsx_vsadd_bu(d, t);
++            __lsx_vstelm_w(d, dst, 0, 0);
++            dst += 1;
++        }
++    }
++}
++
++static void
++lsx_composite_src_x888_8888 (pixman_implementation_t *imp,
++                             pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    int32_t w;
++    int dst_stride, src_stride;
++    __m128i mask = __lsx_vreplgr2vr_w(0xff000000);
++    __m128i vsrc0, vsrc1, vsrc2, vsrc3, vsrc4, vsrc5, vsrc6, vsrc7;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 15)) {
++            *dst++ = *src++ | 0xff000000;
++            w--;
++        }
++
++        while (w >= 32) {
++            vsrc0 = __lsx_vld(src, 0);
++            vsrc1 = __lsx_vld(src, 16);
++            vsrc2 = __lsx_vld(src, 32);
++            vsrc3 = __lsx_vld(src, 48);
++            vsrc4 = __lsx_vld(src, 64);
++            vsrc5 = __lsx_vld(src, 80);
++            vsrc6 = __lsx_vld(src, 96);
++            vsrc7 = __lsx_vld(src, 112);
++            vsrc0 = __lsx_vor_v(vsrc0, mask);
++            vsrc1 = __lsx_vor_v(vsrc1, mask);
++            vsrc2 = __lsx_vor_v(vsrc2, mask);
++            vsrc3 = __lsx_vor_v(vsrc3, mask);
++            vsrc4 = __lsx_vor_v(vsrc4, mask);
++            vsrc5 = __lsx_vor_v(vsrc5, mask);
++            vsrc6 = __lsx_vor_v(vsrc6, mask);
++            vsrc7 = __lsx_vor_v(vsrc7, mask);
++            __lsx_vst(vsrc0, dst, 0);
++            __lsx_vst(vsrc1, dst, 16);
++            __lsx_vst(vsrc2, dst, 32);
++            __lsx_vst(vsrc3, dst, 48);
++            __lsx_vst(vsrc4, dst, 64);
++            __lsx_vst(vsrc5, dst, 80);
++            __lsx_vst(vsrc6, dst, 96);
++            __lsx_vst(vsrc7, dst, 112);
++
++            src += 32;
++            w   -= 32;
++            dst += 32;
++        }
++
++        if (w >= 16) {
++            vsrc0 = __lsx_vld(src, 0);
++            vsrc1 = __lsx_vld(src, 16);
++            vsrc2 = __lsx_vld(src, 32);
++            vsrc3 = __lsx_vld(src, 48);
++            vsrc0 = __lsx_vor_v(vsrc0, mask);
++            vsrc1 = __lsx_vor_v(vsrc1, mask);
++            vsrc2 = __lsx_vor_v(vsrc2, mask);
++            vsrc3 = __lsx_vor_v(vsrc3, mask);
++            __lsx_vst(vsrc0, dst, 0);
++            __lsx_vst(vsrc1, dst, 16);
++            __lsx_vst(vsrc2, dst, 32);
++            __lsx_vst(vsrc3, dst, 48);
++
++            src += 16;
++            w   -= 16;
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            vsrc0 = __lsx_vld(src, 0);
++            vsrc1 = __lsx_vld(src, 16);
++            vsrc0 = __lsx_vor_v(vsrc0, mask);
++            vsrc1 = __lsx_vor_v(vsrc1, mask);
++            __lsx_vst(vsrc0, dst, 0);
++            __lsx_vst(vsrc1, dst, 16);
++
++            src += 8;
++            w   -= 8;
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            vsrc0 = __lsx_vld(src, 0);
++            vsrc0 = __lsx_vor_v(vsrc0, mask);
++            __lsx_vst(vsrc0, dst, 0);
++
++            src += 4;
++            w   -= 4;
++            dst += 4;
++        }
++
++        while (w--) {
++            *dst++ = *src++ | 0xff000000;
++        }
++    }
++}
++
++static void
++lsx_composite_add_n_8_8 (pixman_implementation_t *imp,
++                         pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    uint8_t *mask_line, *mask;
++    int dst_stride, mask_stride;
++    int32_t w;
++    uint32_t src;
++    uint16_t sa;
++
++    __m128i d0;
++    __m128i vsrc, t0, t1;
++    __m128i a0, a0_l, a0_h;
++    __m128i b0, b0_l, b0_h;
++    __m128i zero = __lsx_vldi(0);
++    __m128i one_half = __lsx_vreplgr2vr_h(0x80);
++    __m128i g_shift  = __lsx_vreplgr2vr_h(8);
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    sa = (src >> 24);
++    vsrc = __lsx_vreplgr2vr_h(sa);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 16) {
++            a0 = __lsx_vld(mask, 0);
++            w -= 16;
++            mask += 16;
++
++            a0_l = __lsx_vsllwil_hu_bu(a0, 0);
++            a0_h = __lsx_vexth_hu_bu(a0);
++
++            a0_l = __lsx_vmadd_h(one_half, a0_l, vsrc);
++            a0_h = __lsx_vmadd_h(one_half, a0_h, vsrc);
++
++            a0_l = __lsx_vsadd_hu(__lsx_vsrl_h(a0_l, g_shift), a0_l);
++            a0_h = __lsx_vsadd_hu(__lsx_vsrl_h(a0_h, g_shift), a0_h);
++
++            a0_l = __lsx_vsrl_h(a0_l, g_shift);
++            a0_h = __lsx_vsrl_h(a0_h, g_shift);
++
++            b0 = __lsx_vld(dst, 0);
++            b0_l = __lsx_vsllwil_hu_bu(b0, 0);
++            b0_h = __lsx_vexth_hu_bu(b0);
++
++            t0 = __lsx_vadd_h(a0_l, b0_l);
++            t1 = __lsx_vadd_h(a0_h, b0_h);
++
++            t0 = __lsx_vor_v(t0, __lsx_vsub_h(zero, __lsx_vsrl_h(t0, g_shift)));
++            t1 = __lsx_vor_v(t1, __lsx_vsub_h(zero, __lsx_vsrl_h(t1, g_shift)));
++
++            t0 = __lsx_vsat_hu(t0, 7);
++            t1 = __lsx_vsat_hu(t1 ,7);
++
++            d0 = __lsx_vpickev_b(t1, t0);
++            __lsx_vst(d0, dst, 0);
++            dst += 16;
++        }
++
++        while (w--) {
++            uint16_t tmp;
++            uint16_t a;
++            uint32_t m, d;
++            uint32_t r;
++
++            a = *mask++;
++            d = *dst;
++
++            m = MUL_UN8 (sa, a, tmp);
++            r = ADD_UN8 (m, d, tmp);
++
++            *dst++ = r;
++        }
++    }
++}
++
++static void
++lsx_composite_add_n_8 (pixman_implementation_t *imp,
++                       pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint8_t *dst_line, *dst;
++    int dst_stride;
++    int32_t w;
++    uint32_t src;
++
++    __m128i vsrc, d0, d1;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    src >>= 24;
++
++    if (src == 0x00)
++        return;
++
++    if (src == 0xff) {
++        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
++                     8, dest_x, dest_y, width, height, 0xff);
++        return;
++    }
++
++    vsrc = __lsx_vreplgr2vr_b(src);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w && ((uintptr_t)dst & 15)) {
++            d0 = __lsx_vldrepl_b(dst, 0);
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_b(d0, dst, 0, 0);
++            w--;
++            dst++;
++        }
++
++        while (w >= 32) {
++            d0 = __lsx_vld(dst, 0);
++            d1 = __lsx_vld(dst, 16);
++            w -= 32;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            d1 = __lsx_vsadd_bu(vsrc, d1);
++            __lsx_vst(d0, dst, 0);
++            __lsx_vst(d1, dst, 16);
++            dst += 32;
++        }
++
++        if (w >= 16) {
++            d0 = __lsx_vld(dst, 0);
++            w -= 16;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vst(d0, dst, 0);
++            dst += 16;
++        }
++
++        if (w >= 8) {
++            d0 = __lsx_vldrepl_d(dst, 0);
++            w -= 8;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_d(d0, dst, 0, 0);
++            dst += 8;
++        }
++
++        if (w >= 4) {
++            d0 = __lsx_vldrepl_w(dst, 0);
++            w -= 4;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_w(d0, dst, 0, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            d0 = __lsx_vldrepl_b(dst, 0);
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_b(d0, dst, 0, 0);
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_add_n_8888 (pixman_implementation_t *imp,
++                          pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst, src;
++    int dst_stride, w;
++
++    __m128i vsrc, d0, d1;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++    if (src == 0)
++        return;
++
++    if (src == ~0) {
++        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
++                     dest_x, dest_y, width, height, ~0);
++        return;
++    }
++
++    vsrc = __lsx_vreplgr2vr_w(src);
++
++    while (height--) {
++        w = width;
++
++        dst = dst_line;
++        dst_line += dst_stride;
++
++        while (w && (uintptr_t)dst & 15) {
++            d0 = __lsx_vldrepl_w(dst, 0);
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_w(d0, dst, 0, 0);
++            dst++;
++            w--;
++        }
++
++        while (w >= 8) {
++            d0 = __lsx_vld(dst, 0);
++            d1 = __lsx_vld(dst, 16);
++            w -= 8;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            d1 = __lsx_vsadd_bu(vsrc, d1);
++            __lsx_vst(d0, dst, 0);
++            __lsx_vst(d1, dst, 16);
++            dst += 8;
++        }
++
++        while (w >= 4) {
++            d0 = __lsx_vld(dst, 0);
++            w -= 4;
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vst(d0, dst, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            d0 = __lsx_vldrepl_w(dst, 0);
++            d0 = __lsx_vsadd_bu(vsrc, d0);
++            __lsx_vstelm_w(d0, dst, 0, 0);
++            dst++;
++        }
++    }
++}
++
++static force_inline __m128i
++unpack_32_1x128(uint32_t data)
++{
++    __m128i zero = __lsx_vldi(0);
++    __m128i tmp = __lsx_vinsgr2vr_w(zero, data, 0);
++    return __lsx_vilvl_b(zero, tmp);
++}
++
++static force_inline __m128i
++unpack_32_2x128(uint32_t data)
++{
++    __m128i tmp0, out0;
++    __m128i zero = __lsx_vldi(0);
++    tmp0 = __lsx_vinsgr2vr_w(tmp0, data, 0);
++    tmp0 = __lsx_vpickev_d(tmp0, tmp0);
++    out0 = __lsx_vilvl_b(zero, tmp0);
++
++    return out0;
++}
++
++static force_inline __m128i
++expand_pixel_32_1x128(uint32_t data)
++{
++    return __lsx_vshuf4i_w(unpack_32_1x128(data), 0x44);
++}
++
++static force_inline __m128i
++expand_pixel_32_2x128(uint32_t data)
++{
++    return __lsx_vshuf4i_w(unpack_32_2x128(data), 0x44);
++}
++
++static force_inline __m128i
++expand_alpha_1x128(__m128i data)
++{
++    return __lsx_vshuf4i_h(data, 0xff);
++}
++
++static force_inline __m128i
++expand_alphaa_2x128(__m128i data)
++{
++    __m128i tmp0;
++    tmp0 = __lsx_vshuf4i_h(data, 0xff);
++    tmp0 = __lsx_vpickev_d(tmp0, tmp0);
++
++    return tmp0;
++}
++
++static force_inline __m128i
++unpack_565_to_8888(__m128i lo)
++{
++    __m128i r, g, b, rb, t;
++    __m128i mask_green_4x32 = __lsx_vreplgr2vr_w(0x0000fc00);
++    __m128i mask_red_4x32   = __lsx_vreplgr2vr_w(0x00f80000);
++    __m128i mask_blue_4x32  = __lsx_vreplgr2vr_w(0x000000f8);
++    __m128i mask_565_fix_rb = __lsx_vreplgr2vr_w(0x00e000e0);
++    __m128i mask_565_fix_g  = __lsx_vreplgr2vr_w(0x0000c000);
++
++    r  = __lsx_vslli_w(lo, 8);
++    r  = __lsx_vand_v(r, mask_red_4x32);
++    g  = __lsx_vslli_w(lo, 5);
++    g  = __lsx_vand_v(g, mask_green_4x32);
++    b  = __lsx_vslli_w(lo, 3);
++    b  = __lsx_vand_v(b, mask_blue_4x32);
++
++    rb = __lsx_vor_v(r, b);
++    t  = __lsx_vand_v(rb, mask_565_fix_rb);
++    t  = __lsx_vsrli_w(t, 5);
++    rb = __lsx_vor_v(rb, t);
++
++    t  = __lsx_vand_v(g, mask_565_fix_g);
++    t  = __lsx_vsrli_w(t, 6);
++    g  = __lsx_vor_v(g, t);
++
++    return (__lsx_vor_v(rb, g));
++}
++
++static force_inline void
++unpack_128_2x128(__m128i data, __m128i *data_lo, __m128i *data_hi)
++{
++    __m128i mask_zero = __lsx_vldi(0);
++    *data_lo = __lsx_vilvl_b(mask_zero, data);
++    *data_hi = __lsx_vilvh_b(mask_zero, data);
++}
++
++static force_inline void
++unpack_565_128_4x128(__m128i data, __m128i *data0,
++                     __m128i *data1, __m128i *data2, __m128i *data3)
++{
++    __m128i lo, hi;
++    __m128i mask_zero = __lsx_vldi(0);
++    lo = __lsx_vilvl_h(mask_zero, data);
++    hi = __lsx_vilvh_h(mask_zero, data);
++    lo = unpack_565_to_8888(lo);
++    hi = unpack_565_to_8888(hi);
++
++    unpack_128_2x128((__m128i)lo, (__m128i*)data0, (__m128i*)data1);
++    unpack_128_2x128((__m128i)hi, (__m128i*)data2, (__m128i*)data3);
++}
++
++static force_inline void
++negate_2x128(__m128i data_lo, __m128i data_hi, __m128i *neg_lo, __m128i *neg_hi)
++{
++    __m128i mask_00ff = __lsx_vreplgr2vr_h(0x00ff);
++    *neg_lo = __lsx_vxor_v(data_lo, mask_00ff);
++    *neg_hi = __lsx_vxor_v(data_hi, mask_00ff);
++}
++
++static force_inline void
++over_2x128(__m128i *src_lo, __m128i *src_hi, __m128i *alpha_lo,
++           __m128i *alpha_hi, __m128i *dst_lo, __m128i *dst_hi)
++{
++    __m128i t1, t2;
++    negate_2x128(*alpha_lo, *alpha_hi, &t1, &t2);
++    *dst_lo = lsx_pix_multiply(*dst_lo, t1);
++    *dst_hi = lsx_pix_multiply(*dst_hi, t2);
++    *dst_lo = __lsx_vsadd_bu(*src_lo, *dst_lo);
++    *dst_hi = __lsx_vsadd_bu(*src_hi, *dst_hi);
++}
++
++static force_inline __m128i
++pack_2x128_128(__m128i lo, __m128i hi)
++{
++    __m128i tmp0 = __lsx_vsat_bu(lo, 7);
++    __m128i tmp1 = __lsx_vsat_bu(hi, 7);
++    __m128i tmp2 = __lsx_vpickev_b(tmp1, tmp0);
++
++    return tmp2;
++}
++
++static force_inline __m128i
++pack_565_2x128_128(__m128i lo, __m128i hi)
++{
++    __m128i data;
++    __m128i r, g1, g2, b;
++    __m128i mask_565_r  = __lsx_vreplgr2vr_w(0x00f80000);
++    __m128i mask_565_g1 = __lsx_vreplgr2vr_w(0x00070000);
++    __m128i mask_565_g2 = __lsx_vreplgr2vr_w(0x000000e0);
++    __m128i mask_565_b  = __lsx_vreplgr2vr_w(0x0000001f);
++
++    data = pack_2x128_128 (lo, hi);
++    r    = __lsx_vand_v(data, mask_565_r);
++    g1   = __lsx_vslli_w(data, 3) & mask_565_g1;
++    g2   = __lsx_vsrli_w(data, 5) & mask_565_g2;
++    b    = __lsx_vsrli_w(data, 3) & mask_565_b;
++
++    return (((r|g1)|g2)|b);
++}
++
++static force_inline __m128i
++expand565_16_1x128(uint16_t pixel)
++{
++    __m128i m;
++    __m128i zero = __lsx_vldi(0);
++
++    m = __lsx_vinsgr2vr_w(m, pixel, 0);
++    m = unpack_565_to_8888(m);
++    m = __lsx_vilvl_b(zero, m);
++
++    return m;
++}
++
++static force_inline uint32_t
++pack_1x128_32(__m128i data)
++{
++    __m128i tmp0, tmp1;
++    __m128i zero = __lsx_vldi(0);
++
++    tmp0 = __lsx_vsat_bu(data, 7);
++    tmp1 = __lsx_vpickev_b(zero, tmp0);
++
++    return (__lsx_vpickve2gr_wu(tmp1, 0));
++}
++
++static force_inline uint16_t
++pack_565_32_16(uint32_t pixel)
++{
++    return (uint16_t)(((pixel >> 8) & 0xf800) |
++                      ((pixel >> 5) & 0x07e0) |
++                      ((pixel >> 3) & 0x001f));
++}
++
++static force_inline __m128i
++pack_565_4x128_128(__m128i *v0, __m128i *v1, __m128i *v2, __m128i *v3)
++{
++    return pack_2x128_128(pack_565_2x128_128(*v0, *v1),
++                          pack_565_2x128_128(*v2, *v3));
++}
++
++static force_inline void
++expand_alpha_2x128(__m128i data_lo, __m128i data_hi, __m128i *alpha_lo, __m128i *alpha_hi)
++{
++    *alpha_lo = __lsx_vshuf4i_h(data_lo, 0xff);
++    *alpha_hi = __lsx_vshuf4i_h(data_hi, 0xff);
++}
++
++static force_inline void
++expand_alpha_rev_2x128(__m128i data_lo,  __m128i data_hi, __m128i *alpha_lo, __m128i *alpha_hi)
++{
++    *alpha_lo = __lsx_vshuf4i_h(data_lo, 0x00);
++    *alpha_hi = __lsx_vshuf4i_h(data_hi, 0x00);
++}
++
++static force_inline uint16_t
++composite_over_8888_0565pixel(uint32_t src, uint16_t dst)
++{
++    __m128i ms;
++    ms = unpack_32_1x128(src);
++
++    return pack_565_32_16(pack_1x128_32((__m128i)over_1x128((__m128i)ms,
++                          (__m128i)expand_alpha_1x128((__m128i)ms), expand565_16_1x128(dst))));
++}
++
++static force_inline void
++in_over_2x128(__m128i *src_lo, __m128i *src_hi, __m128i *alpha_lo, __m128i *alpha_hi,
++              __m128i *mask_lo, __m128i *mask_hi, __m128i *dst_lo, __m128i *dst_hi)
++{
++    __m128i s_lo, s_hi;
++    __m128i a_lo, a_hi;
++    s_lo = lsx_pix_multiply(*src_lo, *mask_lo);
++    s_hi = lsx_pix_multiply(*src_hi, *mask_hi);
++    a_lo = lsx_pix_multiply(*alpha_lo, *mask_lo);
++    a_hi = lsx_pix_multiply(*alpha_hi, *mask_hi);
++    over_2x128(&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
++}
++
++static force_inline __m128i
++in_over_1x128(__m128i *src, __m128i *alpha, __m128i *mask, __m128i *dst)
++{
++    return over_1x128(lsx_pix_multiply(*src, *mask),
++                      lsx_pix_multiply(*alpha, *mask), *dst);
++}
++
++static force_inline __m128i
++expand_alpha_rev_1x128(__m128i data)
++{
++    __m128i v0 = {0x00000000, 0xffffffff};
++    __m128i v_hi = __lsx_vand_v(data, v0);
++    data = __lsx_vshuf4i_h(data, 0x00);
++    v0 = __lsx_vnor_v(v0, v0);
++    data = __lsx_vand_v(data, v0);
++    data = __lsx_vor_v(data, v_hi);
++
++    return data;
++}
++
++static void
++lsx_composite_over_n_0565 (pixman_implementation_t *imp,
++                           pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    int32_t w;
++    int dst_stride;
++    __m128i vsrc, valpha;
++    __m128i vdst, vdst0, vdst1, vdst2, vdst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++
++    vsrc = expand_pixel_32_1x128(src);
++    valpha = expand_alpha_1x128(vsrc);
++
++    while (height--) {
++        dst = dst_line;
++
++        dst_line += dst_stride;
++        w = width;
++
++        while (w >= 8) {
++            vdst = __lsx_vld(dst, 0);
++            w -= 8;
++
++            unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++            over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1);
++            over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3);
++
++            vdst = pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3);
++            __lsx_vst(vdst, dst, 0);
++            dst += 8;
++        }
++
++        while (w--) {
++            d = *dst;
++            *dst++ = pack_565_32_16(pack_1x128_32(over_1x128(vsrc,valpha, expand565_16_1x128(d))));
++        }
++    }
++}
++
++static void
++lsx_composite_over_8888_0565 (pixman_implementation_t *imp,
++                              pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint16_t *dst_line, *dst, d;
++    uint32_t *src_line, *src, s;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m128i valpha_lo, valpha_hi;
++    __m128i vsrc, vsrc_lo, vsrc_hi;
++    __m128i vdst, vdst0, vdst1, vdst2, vdst3;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        dst = dst_line;
++        src = src_line;
++
++        dst_line += dst_stride;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 8) {
++            vsrc = __lsx_vld(src, 0);
++            vdst = __lsx_vld(dst, 0);
++
++            unpack_128_2x128(vsrc, &vsrc_lo, &vsrc_hi);
++            unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++            expand_alpha_2x128(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi);
++
++            vsrc = __lsx_vld(src, 16);
++            over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &vdst0, &vdst1);
++
++            unpack_128_2x128(vsrc, &vsrc_lo, &vsrc_hi);
++            expand_alpha_2x128(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi);
++            over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &vdst2, &vdst3);
++
++            __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++
++            w -= 8;
++            dst += 8;
++            src += 8;
++        }
++
++        while (w--) {
++            s = *src++;
++            d = *dst;
++            *dst++ = composite_over_8888_0565pixel(s, d);
++        }
++    }
++}
++
++static void
++lsx_composite_over_n_8_0565 (pixman_implementation_t *imp,
++                             pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    uint8_t *mask_line, *p;
++    uint32_t *mask;
++    int dst_stride, mask_stride;
++    int32_t w;
++    uint32_t m;
++
++    __m128i mask_zero = __lsx_vldi(0);
++    __m128i lsx_src, lsx_alpha, lsx_mask, lsx_dest;
++    __m128i vsrc, valpha;
++    __m128i vmask, vmask_lo, vmask_hi;
++    __m128i vdst, vdst0, vdst1, vdst2, vdst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++    lsx_src = expand_pixel_32_1x128(src);
++    lsx_alpha = expand_alpha_1x128(lsx_src);
++
++    vsrc = expand_pixel_32_2x128(src);
++    valpha = expand_alphaa_2x128(vsrc);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = (void*)mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 8) {
++            vdst = __lsx_vld(dst, 0);
++            w -= 8;
++            unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++	    m = *((uint32_t*)mask);
++            mask += 1;
++
++	    if (m)
++	    {
++		vmask = unpack_32_1x128(m);
++		vmask = __lsx_vilvl_b(mask_zero, vmask);
++		unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi);
++		expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++		in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &vdst0, &vdst1);
++	    }
++
++	    m = *((uint32_t*)mask);
++            mask += 1;
++
++	    if (m)
++	    {
++		vmask = unpack_32_1x128(m);
++		vmask = __lsx_vilvl_b(mask_zero, vmask);
++		unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi);
++		expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++		in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &vdst2, &vdst3);
++	    }
++
++            __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++
++            dst += 8;
++        }
++
++        p = (void*)mask;
++        while (w--) {
++            m = *p++;
++
++            if (m) {
++                d = *dst;
++                lsx_mask = expand_alpha_rev_1x128(unpack_32_1x128 (m));
++                lsx_dest = expand565_16_1x128(d);
++
++                *dst = pack_565_32_16(pack_1x128_32(in_over_1x128 (&lsx_src,
++                                      &lsx_alpha, &lsx_mask, &lsx_dest)));
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
++                                pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *src, *src_line, s;
++    uint32_t *dst, *dst_line, d;
++    uint8_t  *mask_line, *p;
++    uint32_t *mask;
++    uint32_t m, w;
++    int src_stride, mask_stride, dst_stride;
++
++    __m128i mask_zero = __lsx_vldi(0);
++    __m128i mask_00ff;
++    __m128i mask_4x32;
++    __m128i vsrc, vsrc_lo, vsrc_hi;
++    __m128i vdst, vdst_lo, vdst_hi;
++    __m128i vmask, vmask_lo, vmask_hi;
++
++    mask_4x32 = __lsx_vreplgr2vr_w(0xff000000);
++    mask_00ff = __lsx_vreplgr2vr_h(0x00ff);
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    while (height--) {
++        src = src_line;
++        src_line += src_stride;
++        dst = dst_line;
++        dst_line += dst_stride;
++        mask = (void*)mask_line;
++        mask_line += mask_stride;
++        w = width;
++
++        while (w >= 4) {
++            m = *mask;
++            vsrc = __lsx_vld(src, 0);
++            src += 4;
++            w -= 4;
++            vsrc |= mask_4x32;
++
++            if (m == 0xffffffff) {
++                __lsx_vst(vsrc, dst, 0);
++            } else {
++                vdst = __lsx_vld(dst, 0);
++                vmask = __lsx_vilvl_b(mask_zero, unpack_32_1x128(m));
++
++                unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi);
++                unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi);
++                expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi);
++                unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi);
++
++                in_over_2x128(&vsrc_lo, &vsrc_hi, &mask_00ff, &mask_00ff,
++                              &vmask_lo, &vmask_hi, &vdst_lo, &vdst_hi);
++
++                __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0);
++            }
++            dst += 4;
++            mask += 1;
++        }
++
++        p = (void*)mask;
++        while (w--) {
++            m = *p++;
++
++            if (m) {
++                s = 0xff000000 | *src;
++
++                if (m == 0xff) {
++                    *dst = s;
++                }
++                else {
++                    __m128i ma, md, ms;
++                    d = *dst;
++                    ma = expand_alpha_rev_1x128(unpack_32_1x128(m));
++                    md = unpack_32_1x128(d);
++                    ms = unpack_32_1x128(s);
++                    *dst = pack_1x128_32(in_over_1x128(&ms, &mask_00ff, &ma, &md));
++                }
++            }
++            src++;
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
++                                pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    uint32_t mask, maska;
++    int32_t w;
++    int dst_stride, src_stride;
++
++    __m128i vmask;
++    __m128i vsrc, vsrc_lo, vsrc_hi;
++    __m128i vdst, vdst_lo, vdst_hi;
++    __m128i valpha_lo, valpha_hi;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
++    maska = mask >> 24;
++    vmask = __lsx_vreplgr2vr_h(maska);
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 4) {
++            vsrc = __lsx_vld(src, 0);
++            src += 4;
++            w -= 4;
++
++            if (__lsx_bnz_v(vsrc)) {
++                vdst = __lsx_vld(dst, 0);
++                unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi);
++                unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi);
++                expand_alpha_2x128(vsrc_lo, vsrc_hi,  &valpha_lo, &valpha_hi);
++
++                in_over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi,
++                              &vmask, &vmask, &vdst_lo, &vdst_hi);
++
++                __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0);
++            }
++            dst += 4;
++        }
++
++        while (w--) {
++            uint32_t s = *src++;
++
++            if (s) {
++                uint32_t d = *dst;
++                __m128i ms = unpack_32_1x128(s);
++                __m128i alpha = expand_alpha_1x128(ms);
++                __m128i mask = vmask;
++                __m128i dest = unpack_32_1x128(d);
++                *dst = pack_1x128_32(in_over_1x128(&ms, &alpha, &mask, &dest));
++            }
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
++                                pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t *dst_line, *dst;
++    uint32_t *src_line, *src;
++    uint32_t mask, maska;
++    int dst_stride, src_stride;
++    int32_t w;
++
++    __m128i vmask, valpha, mask_4x32, mask_00ff;
++    __m128i vsrc, vsrc_lo, vsrc_hi;
++    __m128i vdst, vdst_lo, vdst_hi;
++    __m128i zero = __lsx_vldi(0);
++
++    mask_4x32 = __lsx_vreplgr2vr_w(0xff000000);
++    mask_00ff = __lsx_vreplgr2vr_h(0x00ff);
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
++    maska = mask >> 24;
++
++    vmask = __lsx_vreplgr2vr_h(maska);
++    valpha = mask_00ff;
++
++    while (height--) {
++        dst = dst_line;
++        dst_line += dst_stride;
++        src = src_line;
++        src_line += src_stride;
++        w = width;
++
++        while (w >= 4) {
++            vsrc = __lsx_vld(src, 0);
++            src += 4;
++            w -= 4;
++            vsrc = __lsx_vor_v(vsrc, mask_4x32);
++            vdst = __lsx_vld(dst, 0);
++
++            unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi);
++            unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi);
++
++            in_over_2x128(&vsrc_lo, &vsrc_hi, &valpha, &valpha,
++                          &vmask, &vmask, &vdst_lo, &vdst_hi);
++
++            __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0);
++            dst += 4;
++        }
++
++        while (w--) {
++            uint32_t s = (*src++) | 0xff000000;
++            uint32_t d = *dst;
++
++            __m128i alpha, tmask;
++            __m128i src = unpack_32_1x128 (s);
++            __m128i dest  = unpack_32_1x128 (d);
++
++            alpha = __lsx_vpickev_d(zero, valpha);
++            tmask = __lsx_vpickev_d(zero, vmask);
++
++            *dst = pack_1x128_32(in_over_1x128(&src,  &alpha, &tmask, &dest));
++
++            dst++;
++        }
++    }
++}
++
++static void
++lsx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
++                                   pixman_composite_info_t *info)
++{
++    PIXMAN_COMPOSITE_ARGS (info);
++    uint32_t src;
++    uint16_t *dst_line, *dst, d;
++    uint32_t *mask_line, *mask, m;
++    int dst_stride, mask_stride;
++    int w, flag;
++
++    __m128i vsrc, valpha;
++    __m128i lsx_src, lsx_alpha, lsx_mask, lsx_dest;
++    __m128i vmask, vmask_lo, vmask_hi;
++    __m128i vdst, vdst0, vdst1, vdst2, vdst3;
++
++    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
++
++    if (src == 0)
++        return;
++
++    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++
++    lsx_src = expand_pixel_32_1x128(src);
++    lsx_alpha = expand_alpha_1x128(lsx_src);
++
++    vsrc = expand_pixel_32_2x128(src);
++    valpha = expand_alphaa_2x128(vsrc);
++
++    while (height--) {
++        mask = mask_line;
++        dst = dst_line;
++        mask_line += mask_stride;
++        dst_line += dst_stride;
++        w = width;
++
++        while (w >= 8) {
++            vmask = __lsx_vld(mask, 0);
++            vdst = __lsx_vld(dst, 0);
++            w -= 8;
++
++            unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3);
++
++            flag = __lsx_bnz_v(vmask);
++            unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi);
++            vmask = __lsx_vld(mask, 16);
++            if (flag) {
++                in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &vdst0, &vdst1);
++            }
++
++            flag = __lsx_bnz_v(vmask);
++            unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi);
++            if (flag) {
++                in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi,
++                              &vdst2, &vdst3);
++            }
++
++            __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0);
++            dst += 8;
++            mask += 8;
++        }
++
++        while (w--) {
++            m = *(uint32_t *) mask;
++
++            if (m) {
++                d = *dst;
++                lsx_mask = unpack_32_1x128(m);
++                lsx_dest = expand565_16_1x128(d);
++                *dst = pack_565_32_16(pack_1x128_32(in_over_1x128(&lsx_src, &lsx_alpha,
++                                      &lsx_mask, &lsx_dest)));
++            }
++            dst++;
++            mask++;
++        }
++    }
++}
++
++static uint32_t *
++lsx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m128i out0, out1, mask_4x32;
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint32_t *src = (uint32_t *)iter->bits;
++    iter->bits += iter->stride;
++    mask_4x32 = __lsx_vreplgr2vr_w(0xff000000);
++
++    while (w >= 8) {
++        out0 = __lsx_vld(src, 0);
++        out1 = __lsx_vld(src, 16);
++        out0 = __lsx_vor_v(out0, mask_4x32);
++        out1 = __lsx_vor_v(out1, mask_4x32);
++        __lsx_vst(out0, dst, 0);
++        __lsx_vst(out1, dst, 16);
++        src += 8;
++        dst += 8;
++        w   -= 8;
++    }
++
++    if (w >= 4) {
++        __lsx_vst(__lsx_vor_v(__lsx_vld(src, 0), mask_4x32), dst, 0);
++        src += 4;
++        dst += 4;
++        w   -= 4;
++    }
++
++    while (w--) {
++        *dst++ = (*src++) | 0xff000000;
++    }
++
++    return iter->buffer;
++}
++
++static uint32_t *
++lsx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m128i a, sa, s0, s1, s2, s3, s4;
++    __m128i mask_red, mask_green, mask_blue;
++
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint16_t *src = (uint16_t *)iter->bits;
++    iter->bits += iter->stride;
++
++    mask_red = __lsx_vreplgr2vr_h(248);
++    mask_green = __lsx_vreplgr2vr_h(252);
++    mask_blue = mask_red;
++    a = __lsx_vreplgr2vr_h(255) << 8;
++
++    while (w >= 4) {
++        s0 = __lsx_vld(src, 0);
++        src += 4;
++        w   -= 4;
++        //r
++        s1 = __lsx_vsrli_h(s0, 8);
++        s1 &= mask_red;
++        s2 = __lsx_vsrli_h(s1, 5);
++        s1 |= s2;
++
++        //g
++        s2 = __lsx_vsrli_h(s0, 3);
++        s2 &= mask_green;
++        s3 = __lsx_vsrli_h(s2, 6);
++        s2 |= s3;
++
++        //b
++	s3 = s0 << 3;
++        s3 &= mask_blue;
++        s4 = __lsx_vsrli_h(s3, 5);
++        s3 |= s4;
++
++        //ar
++        sa = a | s1;
++
++        //gb
++        s2 <<= 8;
++        s2 |= s3;
++
++        s1 = __lsx_vilvl_h(sa, s2);
++        __lsx_vst(s1, dst, 0);
++        dst += 4;
++    }
++
++    while (w--) {
++        uint16_t s = *src++;
++        *dst++ = convert_0565_to_8888(s);
++    }
++
++    return iter->buffer;
++}
++
++static uint32_t *
++lsx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
++{
++    __m128i srcv;
++    __m128i t0, t1, t2, t3;
++    __m128i dst0, dst1;
++    __m128i zero = __lsx_vldi(0);
++    int w = iter->width;
++    uint32_t *dst = iter->buffer;
++    uint8_t *src = iter->bits;
++
++    iter->bits += iter->stride;
++
++    while (w >= 16) {
++        srcv = __lsx_vld(src, 0);
++        src += 16;
++        w   -= 16;
++        dst0 = __lsx_vilvl_b(srcv, zero);
++        dst1 = __lsx_vilvh_b(srcv, zero);
++        t0 = __lsx_vilvl_h(dst0, zero);
++        t1 = __lsx_vilvh_h(dst0, zero);
++        t2 = __lsx_vilvl_h(dst1, zero);
++        t3 = __lsx_vilvh_h(dst1, zero);
++        __lsx_vst(t0, dst, 0);
++        __lsx_vst(t1, dst, 16);
++        __lsx_vst(t2, dst, 32);
++        __lsx_vst(t3, dst, 48);
++        dst += 16;
++    }
++
++    while (w >= 4) {
++        srcv = __lsx_vld(src, 0);
++        src += 4;
++        w   -= 4;
++        dst0 = __lsx_vilvl_b(srcv, zero);
++        dst0 = __lsx_vilvl_h(dst0, zero);
++        __lsx_vst(dst0, dst, 0);
++        dst += 4;
++    }
++
++    while (w--) {
++        *dst++ = *(src++) << 24;
++    }
++
++    return iter->buffer;
++}
++
++// fetch/store 8 bits
++static void lsx_fetch_scanline_a8 (bits_image_t *image, int x, int y, int width,
++                                   uint32_t *buffer, const uint32_t *mask)
++{
++    uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride);
++    __m128i src;
++    __m128i t0, t1;
++    __m128i temp0, temp1, temp2, temp3;
++    __m128i zero = __lsx_vldi(0);
++    bits += x;
++
++    while (width >= 16) {
++        src = __lsx_vld(bits, 0);
++        t0 = __lsx_vilvl_b(src, zero);
++        t1 = __lsx_vilvh_b(src, zero);
++        temp0 = __lsx_vilvl_h(t0, zero);
++        temp1 = __lsx_vilvh_h(t0, zero);
++        temp2 = __lsx_vilvl_h(t1, zero);
++        temp3 = __lsx_vilvh_h(t1, zero);
++        __lsx_vst(temp0, buffer, 0);
++        __lsx_vst(temp1, buffer, 16);
++        __lsx_vst(temp2, buffer, 32);
++        __lsx_vst(temp3, buffer, 48);
++        bits += 16, width -= 16, buffer += 16;
++    }
++    while (width >= 8) {
++        src = __lsx_vld(bits, 0);
++        t0 = __lsx_vilvl_b(src, zero);
++        temp0 = __lsx_vilvl_h(t0, zero);
++        temp1 = __lsx_vilvh_h(t0, zero);
++        __lsx_vst(temp0, buffer, 0);
++        __lsx_vst(temp1, buffer, 16);
++        bits += 8, width -= 8, buffer += 8;
++    }
++    while (width >= 4) {
++        src = __lsx_vld(bits, 0);
++        t0 = __lsx_vilvl_b(src, zero);
++        temp0 = __lsx_vilvl_h(t0, zero);
++        __lsx_vst(temp0, buffer, 0);
++        bits += 4; width -= 4; buffer += 4;
++    }
++    while(width--) {
++        *buffer++ = ((*bits++) << 24);
++    }
++}
++
++static void lsx_store_scanline_a8 (bits_image_t *image, int x, int y, int width,
++                                   const uint32_t *values)
++{
++    uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride);
++    __m128i src0, src1, src2, src3;
++    dest += x;
++
++    while (width >= 16) {
++        src0 = __lsx_vld(values, 0);
++        src1 = __lsx_vld(values, 16);
++        src2 = __lsx_vld(values, 32);
++        src3 = __lsx_vld(values, 48);
++        src0 = __lsx_vsrli_w(src0, 24);
++        src1 = __lsx_vsrli_w(src1, 24);
++        src2 = __lsx_vsrli_w(src2, 24);
++        src3 = __lsx_vsrli_w(src3, 24);
++        src0 = __lsx_vpickev_h(src1, src0);
++        src1 = __lsx_vpickev_h(src3, src2);
++        src0 = __lsx_vpickev_b(src1, src0);
++        __lsx_vst(src0, dest, 0);
++        values += 16, width -= 16, dest += 16;
++    }
++    while (width >= 8) {
++        src0 = __lsx_vld(values, 0);
++        src1 = __lsx_vld(values, 16);
++        src0 = __lsx_vsrli_w(src0, 24);
++        src1 = __lsx_vsrli_w(src1, 24);
++        src0 = __lsx_vpickev_h(src1, src0);
++        src0 = __lsx_vpickev_b(src0, src0);
++        __lsx_vstelm_d(src0, dest, 0, 0);
++        values += 8; width -= 8; dest += 8;
++    }
++    while (width >= 4) {
++        src0 = __lsx_vld(values, 0);
++        src0 = __lsx_vsrli_w(src0, 24);
++        src0 = __lsx_vpickev_h(src0, src0);
++        src0 = __lsx_vpickev_b(src0, src0);
++        __lsx_vstelm_w(src0, dest, 0, 0);
++        values += 4; width -= 4; dest += 4;
++    }
++    while (width--) {
++        *dest++ = ((*values++) >> 24);
++    }
++}
++
++static void lsx_fetch_scanline_a2r2g2b2 (bits_image_t *image, int x, int y,
++                                         int width, uint32_t *buffer,
++                                         const uint32_t *mask)
++{
++    uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2, pixel3;
++
++    __m128i src;
++    __m128i t0, t1, t2, t3, t4, t5, t6, t7;
++    __m128i mask0 = __lsx_vreplgr2vr_b(0xc0);
++    __m128i mask1 = __lsx_vreplgr2vr_b(0x30);
++    __m128i mask2 = __lsx_vreplgr2vr_b(0x0c);
++    __m128i mask3 = __lsx_vreplgr2vr_b(0x03);
++    bits += x;
++
++    while (width >= 16) {
++        src = __lsx_vld(bits, 0);
++        t0 = (src & mask0); t1 = (src & mask1);
++        t2 = (src & mask2); t3 = (src & mask3);
++        t0 |= __lsx_vsrli_b(t0, 2), t0 |= __lsx_vsrli_b(t0, 4);
++        t1 |= __lsx_vslli_b(t1, 2), t1 |= __lsx_vsrli_b(t1, 4);
++        t2 |= __lsx_vsrli_b(t2, 2), t2 |= __lsx_vslli_b(t2, 4);
++        t3 |= __lsx_vslli_b(t3, 2), t3 |= __lsx_vslli_b(t3, 4);
++        t4 = __lsx_vilvl_b(t0, t1);
++        t5 = __lsx_vilvh_b(t0, t1);
++        t6 = __lsx_vilvl_b(t2, t3);
++        t7 = __lsx_vilvh_b(t2, t3);
++        t0 = __lsx_vilvl_h(t4, t6);
++        t1 = __lsx_vilvh_h(t4, t6);
++        t2 = __lsx_vilvl_h(t5, t7);
++        t3 = __lsx_vilvh_h(t5, t7);
++        __lsx_vst(t0, buffer, 0);
++        __lsx_vst(t1, buffer, 16);
++        __lsx_vst(t2, buffer, 32);
++        __lsx_vst(t3, buffer, 48);
++        bits += 16, width -= 16, buffer += 16;
++    }
++    while (width >= 4) {
++        src = __lsx_vld(bits, 0);
++        t0 = (src & mask0); t1 = (src & mask1);
++        t2 = (src & mask2); t3 = (src & mask3);
++        t0 |= __lsx_vsrli_b(t0, 2), t0 |= __lsx_vsrli_b(t0, 4);
++        t1 |= __lsx_vslli_b(t1, 2), t1 |= __lsx_vsrli_b(t1, 4);
++        t2 |= __lsx_vsrli_b(t2, 2), t2 |= __lsx_vslli_b(t2, 4);
++        t3 |= __lsx_vslli_b(t3, 2), t3 |= __lsx_vslli_b(t3, 4);
++        t4 = __lsx_vilvl_b(t0, t1);
++        t5 = __lsx_vilvl_b(t2, t3);
++        t0 = __lsx_vilvl_h(t4, t5);
++        __lsx_vst(t0, buffer, 0);
++        bits += 4, width -= 4, buffer += 4;
++    }
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0 = pixel & 192;
++        pixel0 |= (pixel0 >> 2);
++        pixel0 |= (pixel0 >> 4);
++        pixel0 <<= 24;
++        // r
++        pixel1 = pixel & 48;
++        pixel1 |= (pixel1 << 2);
++        pixel1 |= (pixel1 >> 4);
++        pixel1 <<= 16;
++        // g
++        pixel2 = pixel & 12;
++        pixel2 |= (pixel2 >> 2);
++        pixel2 |= (pixel2 << 4);
++        pixel2 <<= 8;
++        // b
++        pixel3 = pixel & 3;
++        pixel3 |= (pixel3 << 2);
++        pixel3 |= (pixel3 << 4);
++        *buffer++ = (pixel3 | pixel2 | pixel1 | pixel0);
++    }
++}
++
++static void lsx_store_scanline_a2r2g2b2 (bits_image_t *image, int x, int y,
++                                         int width, const uint32_t *values)
++{
++    uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride);
++    __m128i src, d0;
++    __m128i mask = __lsx_vreplgr2vr_b(0xc0);
++    __m128i shuf = __lsx_vreplgr2vr_w(0x0F0B0703);
++
++    dest += x;
++
++    while (width >= 4) {
++       src = __lsx_vld(values, 0);
++       src = __lsx_vand_v(src, mask);
++       src = __lsx_vor_v(src, __lsx_vslli_w(src, 6));
++       src = __lsx_vor_v(src, __lsx_vslli_w(src, 12));
++       d0 = __lsx_vshuf_b(src, src, shuf);
++       __lsx_vstelm_w(d0, dest, 0, 0);
++       dest += 4;
++       values += 4;
++       width -= 4;
++    }
++
++    while (width--) {
++       uint32_t pixel = *values++;
++       pixel &= 0xc0c0c0c0;
++       pixel |= (pixel << 6);
++       pixel |= (pixel << 12);
++       pixel >>= 24;
++       *dest++ = pixel;
++    }
++}
++
++// fetch/store 16 bits
++static void lsx_fetch_scanline_a1r5g5b5 (bits_image_t *image, int x, int y,
++                                         int width, uint32_t *buffer,
++                                         const uint32_t *mask)
++{
++    uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2;
++
++    __m128i src;
++    __m128i t, t0, t1, t2, t3;
++    __m128i mask0 = __lsx_vreplgr2vr_h(0x001f);
++    bits += x;
++
++    while (width >= 4) {
++        src  = __lsx_vld(bits, 0);
++        t0   = (src & mask0);
++        t0   = __lsx_vslli_h(t0, 3);
++        t    = __lsx_vsrli_h(t0, 5);
++        t0  |= t;
++        t1   = __lsx_vsrli_h(src, 5);
++        t1  &= mask0;
++        t1   = __lsx_vslli_h(t1, 3);
++        t    = __lsx_vsrli_h(t1, 5);
++        t1  |= t;
++        t2   = __lsx_vsrli_h(src, 10);
++        t2  &= mask0;
++        t2   = __lsx_vslli_h(t2, 3);
++        t    = __lsx_vsrli_h(t2, 5);
++        t2  |= t;
++        t3   = __lsx_vsrli_h(src, 15);
++        t    = __lsx_vslli_h(t3, 1);
++        t3  |= t;
++        t    = __lsx_vslli_h(t3, 2);
++        t3  |= t;
++        t    = __lsx_vslli_h(t3, 4);
++        t3  |= t;
++        t1 <<= 8;
++        t0  |= t1;
++        t3 <<= 8;
++        t2  |= t3;
++        t1 = __lsx_vilvl_h(t2, t0);
++        __lsx_vst(t1, buffer, 0);
++        bits += 4, width -= 4, buffer += 4;
++    }
++
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0 = pixel >> 15;
++        pixel0 <<= 7;
++        pixel0 |= (pixel0 >> 1);
++        pixel0 |= (pixel0 >> 2);
++        pixel0 |= (pixel0 >> 4);
++        pixel0 <<= 24;
++        // r
++        pixel1 = pixel >> 10;
++        pixel1 &= 31;
++        pixel1 <<= 3;
++        pixel1 |= (pixel1 >> 5);
++        pixel1 <<= 16;
++        // g
++        pixel2 = pixel >> 5;
++        pixel2 &= 31;
++        pixel2 <<= 3;
++        pixel2 |= (pixel2 >> 5);
++        pixel2 <<= 8;
++        // b
++        pixel &= 31;
++        pixel <<= 3;
++        pixel |= (pixel >> 5);
++        *buffer++ = (pixel0 | pixel1 | pixel2 | pixel);
++    }
++}
++
++static void lsx_store_scanline_a1r5g5b5 (bits_image_t *image, int x, int y,
++                                         int width, const uint32_t *values)
++{
++    uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2, pixel3;
++    __m128i in0, d0, t0, t1, t2, t3;
++    __m128i mask0 = { 0x0000800000008000, 0x0000800000008000};
++    __m128i mask1 = { 0x00007c0000007c00, 0x00007c0000007c00};
++    __m128i mask2 = { 0x000003e0000003e0, 0x000003e0000003e0};
++    __m128i mask3 = { 0x0000001f0000001f, 0x0000001f0000001f};
++    __m128i mask4 = { 0x0006000400020000, 0x0006000400020000};
++
++    dest += x;
++
++    while(width >= 4) {
++        in0 = __lsx_vld(values, 0);
++        t0 = __lsx_vsrli_w(in0, 16);
++        t1 = __lsx_vsrli_w(in0, 9);
++        t2 = __lsx_vsrli_w(in0, 6);
++        t3 = __lsx_vsrli_w(in0, 3);
++        t0 = __lsx_vand_v(t0, mask0);
++        t1 = __lsx_vand_v(t1, mask1);
++        t2 = __lsx_vand_v(t2, mask2);
++        t3 = __lsx_vand_v(t3, mask3);
++        d0 = __lsx_vor_v(__lsx_vor_v(t0, t1), __lsx_vor_v(t2, t3));
++        d0 = __lsx_vshuf_h(mask4, d0, d0);
++        __lsx_vstelm_d(d0, dest, 0, 0);
++        values += 4, width -= 4, dest += 4;
++    }
++
++    while(width--) {
++        pixel = *values++;
++        pixel0 = pixel >> 16;
++        pixel1 = pixel >> 9;
++        pixel2 = pixel >> 6;
++        pixel3 = pixel >> 3;
++        pixel0 &= 0x8000;
++        pixel1 &= 0x7c00;
++        pixel2 &= 0x03e0;
++        pixel3 &= 0x001f;
++        *dest++ = (pixel0 | pixel1 | pixel2 | pixel3);
++    }
++}
++
++static void lsx_fetch_scanline_a4r4g4b4 (bits_image_t *image, int x, int y,
++                                         int width, uint32_t *buffer,
++                                         const uint32_t *mask)
++{
++    uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1, pixel2;
++
++    __m128i src;
++    __m128i t, t0, t1, t2, t3;
++
++    __m128i mask0 = __lsx_vreplgr2vr_h(0x000f);
++    bits += x;
++
++    while (width >= 4) {
++        src  = __lsx_vld(bits, 0);
++        t0   = __lsx_vsrli_h(src, 12);
++        t    = (t0 << 4), t0 |= t;
++        t1   = __lsx_vsrli_h(src, 8);
++        t1  &= mask0, t = (t1 << 4), t1 |= t;
++        t2   = __lsx_vsrli_h(src, 4);
++        t2  &= mask0, t = (t2 << 4), t2 |= t;
++        t3   = (src & mask0), t = (t3 << 4), t3 |= t;
++        t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3;
++        t1 = __lsx_vilvl_h(t0, t2);
++        __lsx_vst(t1, buffer, 0);
++        bits += 4, width -= 4, buffer += 4;
++    }
++
++    while (width--) {
++        pixel = *bits++;
++        // a
++        pixel0   = pixel >> 12;
++        pixel0  |= (pixel0 << 4);
++        pixel0 <<= 24;
++        // r
++        pixel1   = pixel >> 8;
++        pixel1  &= 15;
++        pixel1  |= (pixel1 << 4);
++        pixel1 <<= 16;
++        // g
++        pixel2   = pixel >> 4;
++        pixel2  &= 15;
++        pixel2  |= (pixel2 << 4);
++        pixel2 <<= 8;
++        // b
++        pixel   &= 15;
++        pixel   |= (pixel << 4);
++        *buffer++ = (pixel0 | pixel1 | pixel2 | pixel);
++    }
++}
++
++static void lsx_store_scanline_a4r4g4b4 (bits_image_t *image, int x, int y,
++                                         int width, const uint32_t *values)
++{
++    uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride);
++    uint32_t pixel, pixel0, pixel1;
++    __m128i in0, d0, t0, t1;
++    __m128i mask0 = __lsx_vreplgr2vr_h(0xf0f0);
++    __m128i mask1 = __lsx_vreplgr2vr_h(0x00ff);
++    __m128i mask2 = { 0x0006000400020000, 0x0006000400020000 };
++    dest += x;
++
++    while(width >= 4) {
++        in0 = __lsx_vld(values, 0);
++        in0 = __lsx_vand_v(in0, mask0);
++        t0 = __lsx_vsrli_w(in0, 4);
++        t1 = __lsx_vsrli_w(in0, 8);
++        t0 = __lsx_vor_v(t0, t1);
++        t0 = __lsx_vand_v(t0, mask1);
++        t0 = __lsx_vor_v(t0, __lsx_vsrli_w(t0, 8));
++        d0 = __lsx_vshuf_h(mask2, t0, t0);
++        __lsx_vstelm_d(d0, dest, 0, 0);
++        values += 4, width -= 4, dest += 4;
++    }
++
++    while(width--) {
++        pixel   = *values++;
++        pixel  &= 0xf0f0f0f0;
++        pixel0  = (pixel >> 4);
++        pixel1  = (pixel >> 8);
++        pixel0 |= pixel1;
++        pixel0 &= 0x00ff00ff;
++        pixel0 |= (pixel0 >> 8);
++        pixel0 &= 0xffff;
++        *dest++ = pixel0;
++    }
++}
++
++static const pixman_fast_path_t lsx_fast_paths[] =
++{
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, lsx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, lsx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, lsx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, lsx_composite_over_n_8_8888),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, lsx_composite_over_n_8888_0565_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, lsx_composite_over_n_8888_0565_ca),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, lsx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, lsx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, lsx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, lsx_composite_over_x888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, lsx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, lsx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, lsx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, lsx_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, lsx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, lsx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, lsx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, lsx_composite_over_x888_8_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, lsx_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, lsx_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, lsx_composite_src_x888_8888),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, lsx_composite_src_x888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, lsx_composite_over_8888_0565),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, lsx_composite_over_8888_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, lsx_composite_over_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, lsx_composite_over_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, lsx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, lsx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, lsx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, lsx_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, lsx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, lsx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, lsx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, lsx_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, lsx_composite_over_reverse_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, lsx_composite_over_reverse_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, lsx_composite_add_8_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, lsx_composite_add_n_8_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, lsx_composite_add_n_8),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, lsx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, lsx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, lsx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, lsx_composite_add_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, lsx_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, lsx_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, lsx_composite_copy_area),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, lsx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, lsx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, lsx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, lsx_composite_src_x888_0565),
++    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, lsx_composite_in_n_8_8),
++    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, lsx_composite_in_8_8),
++    { PIXMAN_OP_NONE },
++};
++
++#define IMAGE_FLAGS                                                     \
++    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
++     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
++
++static const pixman_iter_info_t lsx_iters[] =
++{
++    {
++      PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lsx_fetch_x8r8g8b8, NULL
++    },
++    {
++      PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lsx_fetch_r5g6b5, NULL
++    },
++    {
++      PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
++      _pixman_iter_init_bits_stride, lsx_fetch_a8, NULL
++    },
++    { PIXMAN_null },
++};
++
++pixman_implementation_t *
++_pixman_implementation_create_lsx (pixman_implementation_t *fallback)
++{
++    pixman_implementation_t *imp =
++        _pixman_implementation_create (fallback, lsx_fast_paths);
++
++    /* Set up function pointers */
++    imp->combine_32[PIXMAN_OP_SRC] = lsx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_OVER] = lsx_combine_over_u;
++    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = lsx_combine_over_reverse_u;
++    imp->combine_32[PIXMAN_OP_OUT] = lsx_combine_out_u;
++    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = lsx_combine_out_reverse_u;
++    imp->combine_32[PIXMAN_OP_ADD] = lsx_combine_add_u;
++    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = lsx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = lsx_combine_src_u;
++    imp->combine_32[PIXMAN_OP_MULTIPLY] = lsx_combine_multiply_u;
++    imp->combine_32_ca[PIXMAN_OP_SRC] = lsx_combine_src_ca;
++    imp->combine_32_ca[PIXMAN_OP_OVER] = lsx_combine_over_ca;
++    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = lsx_combine_out_reverse_ca;
++
++    imp->blt = lsx_blt;
++    imp->fill = lsx_fill;
++    imp->iter_info = lsx_iters;
++
++    return imp;
++}
++
++void setup_accessors_lsx (bits_image_t *image)
++{
++    if (image->format == PIXMAN_a8) { // 8 bits
++        image->fetch_scanline_32 = lsx_fetch_scanline_a8;
++        image->store_scanline_32 = lsx_store_scanline_a8;
++    } else if (image->format == PIXMAN_a2r2g2b2) {
++        image->fetch_scanline_32 = lsx_fetch_scanline_a2r2g2b2;
++        image->store_scanline_32 = lsx_store_scanline_a2r2g2b2;
++    } else if (image->format == PIXMAN_a1r5g5b5) { // 16 bits
++        image->fetch_scanline_32 = lsx_fetch_scanline_a1r5g5b5;
++        image->store_scanline_32 = lsx_store_scanline_a1r5g5b5;
++    } else if (image->format == PIXMAN_a4r4g4b4) {
++        image->fetch_scanline_32 = lsx_fetch_scanline_a4r4g4b4;
++        image->store_scanline_32 = lsx_store_scanline_a4r4g4b4;
++    }
++}
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index f43e87f..7b43d7e 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -655,6 +655,20 @@ pixman_implementation_t *
+ _pixman_implementation_create_vmx (pixman_implementation_t *fallback);
+ #endif
+ 
++#ifdef USE_LOONGARCH_LSX
++pixman_implementation_t *
++_pixman_implementation_create_lsx (pixman_implementation_t *fallback);
++
++void setup_accessors_lsx (bits_image_t *image);
++#endif
++
++#ifdef USE_LOONGARCH_LASX
++pixman_implementation_t *
++_pixman_implementation_create_lasx (pixman_implementation_t *fallback);
++
++void setup_accessors_lasx (bits_image_t *image);
++#endif
++
+ pixman_bool_t
+ _pixman_implementation_disabled (const char *name);
+ 
+@@ -670,6 +684,11 @@ _pixman_ppc_get_implementations (pixman_implementation_t *imp);
+ pixman_implementation_t *
+ _pixman_mips_get_implementations (pixman_implementation_t *imp);
+ 
++pixman_implementation_t *
++_pixman_loongarch_get_implementations (pixman_implementation_t *imp);
++
++void setup_loongarch_accessors (bits_image_t *image);
++
+ pixman_implementation_t *
+ _pixman_choose_implementation (void);
+ 
+-- 
+GitLab
+
diff --git a/pixman/PKGBUILD b/pixman/PKGBUILD
index 85c3f5b945..5358758bb8 100644
--- a/pixman/PKGBUILD
+++ b/pixman/PKGBUILD
@@ -12,10 +12,17 @@ license=('MIT')
 depends=('glibc')
 makedepends=('meson' 'libpng')
 provides=('libpixman-1.so')
-source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz)
-sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224')
+source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz
+    83.patch)
+sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224'
+            '0b7970cd955e31211fa55882974bbb321b0e7421da5c06a2cff196ea3e0efcefce50b4ba5f19a23417aaea2145529c762800dbb5887dfdcfa240efe400d44f7f')
 #validpgpkeys=('') # Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
 
+prepare() {
+  cd "$pkgname-$pkgver"
+  patch -p1 -i "$srcdir/83.patch"
+}
+
 build() {
   arch-meson $pkgname-$pkgver build \
     -D loongson-mmi=disabled \
@@ -25,7 +32,12 @@ build() {
     -D a64-neon=disabled \
     -D iwmmxt=disabled \
     -D mips-dspr2=disabled \
-    -D gtk=disabled
+    -D gtk=disabled \
+    -D lsx=enabled \
+    -D b_lto=false \
+    -D mmx=disabled \
+    -D ssse3=disabled \
+    -D sse2=disabled
   ninja -C build
 }
 
diff --git a/pkgfile/PKGBUILD b/pkgfile/PKGBUILD
index b64b343932..299785ffd1 100644
--- a/pkgfile/PKGBUILD
+++ b/pkgfile/PKGBUILD
@@ -8,11 +8,18 @@ arch=('loong64' 'x86_64')
 url="https://github.com/falconindy/pkgfile"
 license=('MIT')
 depends=('libarchive' 'curl' 'pcre' 'pacman')
-makedepends=('git' 'meson')
-source=("git+https://github.com/falconindy/pkgfile?signed#tag=v$pkgver")
+makedepends=('git' 'meson' 'clang' 'systemd')
+source=("git+https://github.com/falconindy/pkgfile?signed#tag=v$pkgver"
+    pkgfile-use-loong64.patch)
 validpgpkeys=('487EACC08557AD082088DABA1EB2638FF56C0C53')  # Dave Reisner
 install=pkgfile.install
-md5sums=('SKIP')
+md5sums=('SKIP'
+         'b300e43eaa26d4b46ee37ae21a92d262')
+
+prepare() {
+  cd "$pkgname"
+  patch -p1 -i $srcdir/pkgfile-use-loong64.patch
+}
 
 build() {
   cd "$pkgname"
diff --git a/pkgfile/pkgfile-use-loong64.patch b/pkgfile/pkgfile-use-loong64.patch
new file mode 100644
index 0000000000..53b8ce8ad3
--- /dev/null
+++ b/pkgfile/pkgfile-use-loong64.patch
@@ -0,0 +1,17 @@
+diff --git a/src/update.c b/src/update.c
+index 7a225b5..b2d84b7 100644
+--- a/src/update.c
++++ b/src/update.c
+@@ -628,7 +628,11 @@ int pkgfile_update(struct repovec_t *repos, struct config_t *config) {
+   if (repos->architecture == NULL) {
+     struct utsname un;
+     uname(&un);
+-    repos->architecture = strdup(un.machine);
++    if (strcmp(un.machine, "loongarch64") == 0) {
++      repos->architecture = strdup("loong64");
++    } else {
++      repos->architecture = strdup(un.machine);
++    }
+   }
+ 
+   /* ensure all our DBs are 0644 */
diff --git a/plasma-desktop/PKGBUILD b/plasma-desktop/PKGBUILD
index fd8c3de896..bc5201562a 100644
--- a/plasma-desktop/PKGBUILD
+++ b/plasma-desktop/PKGBUILD
@@ -19,7 +19,8 @@ optdepends=('plasma-nm: Network manager applet'
             'kaccounts-integration: OpenDesktop integration plugin'
             'packagekit-qt5: to install new krunner plugins')
 makedepends=(extra-cmake-modules kdoctools5 xf86-input-evdev xf86-input-synaptics xf86-input-libinput xorg-server-devel
-             scim kdesignerplugin kaccounts-integration intltool packagekit-qt5 kinit wayland-protocols libibus)
+             scim kdesignerplugin kaccounts-integration intltool packagekit-qt5 kinit wayland-protocols libibus
+             libxkbfile)
 groups=(plasma)
 sha256sums=('de015fc921d34da23d85998a03afa7c81d935f5d9c55261ff7a2b413c9cfd09f'
             'SKIP')
diff --git a/pngquant/PKGBUILD b/pngquant/PKGBUILD
index 5c634c98a8..e6bd06cf9f 100644
--- a/pngquant/PKGBUILD
+++ b/pngquant/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('ddd8889a9c269ba454d0c5e4f7167948d55d77c4570b23f671809fd3a68b6822')
 prepare() {
     cd "$srcdir/$pkgname-$pkgver"
     sed -i 's|, path = "lib/imagequant-sys"||' Cargo.toml
-    cargo fetch --target "$CARCH-unknown-linux-gnu"
+    cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/polkit-qt/PKGBUILD b/polkit-qt/PKGBUILD
index 64fc806339..b4c2d7b602 100644
--- a/polkit-qt/PKGBUILD
+++ b/polkit-qt/PKGBUILD
@@ -6,7 +6,7 @@ pkgbase=polkit-qt
 pkgname=(polkit-qt5
          polkit-qt6)
 pkgver=0.175.0
-pkgrel=1
+pkgrel=2
 pkgdesc='A library that allows developers to access PolicyKit API with a nice Qt-style API'
 arch=(loong64 x86_64)
 url='https://www.kde.org/'
diff --git a/polkit/PKGBUILD b/polkit/PKGBUILD
index bfaae776b6..328e6e8967 100644
--- a/polkit/PKGBUILD
+++ b/polkit/PKGBUILD
@@ -47,6 +47,7 @@ build() {
     -D examples=true
     -D gtk_doc=true
     -D man=true
+    -D js_engine=duktape
     -D os_type=redhat
     -D polkitd_uid=102
     -D polkitd_user=polkitd
diff --git a/portmidi/PKGBUILD b/portmidi/PKGBUILD
index 2747d797ed..ebfb53d597 100644
--- a/portmidi/PKGBUILD
+++ b/portmidi/PKGBUILD
@@ -32,6 +32,8 @@ build() {
     -W no-dev
   )
 
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   cmake "${cmake_options[@]}"
   cmake --build build --verbose
 }
diff --git a/postgresql/PKGBUILD b/postgresql/PKGBUILD
index 7aee2aaf96..e265ffb0de 100644
--- a/postgresql/PKGBUILD
+++ b/postgresql/PKGBUILD
@@ -22,7 +22,8 @@ source=(https://ftp.postgresql.org/pub/source/v${pkgver}/postgresql-${pkgver}.ta
         postgresql.sysusers
         postgresql.tmpfiles
         libxml2-2.12.patch
-        openssl3.2.patch)
+        openssl3.2.patch
+        add-loongarch-support.patch)
 md5sums=('9cbfb9076ed06384471802b850698a6d'
          '6ce1dab3da98a10f9190e6b3037f93aa'
          '632e22e96d6ace85b76a380487cfbf8c'
@@ -65,6 +66,7 @@ prepare() {
   patch -p1 < ../0002-Force-RPATH-to-be-used-for-the-PL-Perl-plugin.patch
   patch -p1 < ../libxml2-2.12.patch
   patch -p1 < ../openssl3.2.patch
+  patch -p1 < ../add-loongarch-support.patch
 }
 
 build() {
diff --git a/postgresql/add-loongarch-support.patch b/postgresql/add-loongarch-support.patch
new file mode 100644
index 0000000000..50d23f13cc
--- /dev/null
+++ b/postgresql/add-loongarch-support.patch
@@ -0,0 +1,13 @@
+Index: postgresql-13.6/src/include/storage/s_lock.h
+===================================================================
+--- postgresql-13.6.orig/src/include/storage/s_lock.h
++++ postgresql-13.6/src/include/storage/s_lock.h
+@@ -321,7 +321,7 @@
+  * We use the int-width variant of the builtin because it works on more chips
+  * than other widths.
+  */
+-#if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64)
++#if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64) || defined(__loongarch64)
+ #ifdef HAVE_GCC__SYNC_INT32_TAS
+ #define HAS_TEST_AND_SET
+ 
diff --git a/ppsspp/PKGBUILD b/ppsspp/PKGBUILD
index 006c4e1d75..bc149f958c 100644
--- a/ppsspp/PKGBUILD
+++ b/ppsspp/PKGBUILD
@@ -50,6 +50,7 @@ source=(
   git+https://github.com/KhronosGroup/SPIRV-Cross.git
   ppsspp-sdl.desktop
   ppsspp-qt.desktop
+  ppsspp-la64.patch
 )
 b2sums=('SKIP'
         'SKIP'
@@ -83,6 +84,8 @@ prepare() {
     git config submodule.${submodule}.url ../${submodule#*/}
     git -c protocol.file.allow=always submodule update ${submodule}
   done
+  patch -p1 -i $srcdir/ppsspp-la64.patch
+
   cd ext/armips
   for submodule in ext/filesystem; do
     git submodule init ${submodule}
@@ -94,6 +97,8 @@ prepare() {
 build() {
   export CC=clang
   export CXX=clang++
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   cmake -S ppsspp -B build-sdl -G Ninja \
     -DCMAKE_BUILD_TYPE=None \
     -DCMAKE_SKIP_RPATH=ON \
@@ -101,6 +106,7 @@ build() {
     -DOpenGL_GL_PREFERENCE=GLVND \
     -DUSE_SYSTEM_LIBZIP=ON \
     -DUSE_SYSTEM_SNAPPY=ON \
+    -DUSE_SYSTEM_FFMPEG=ON \
     -DUSE_SYSTEM_ZSTD=ON \
     -DUSING_QT_UI=OFF \
     -Wno-dev
@@ -153,3 +159,16 @@ package_ppsspp-assets() {
 }
 
 # vim: ts=2 sw=2 et:
+b2sums=('SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'SKIP'
+        'c6bcdfedee866dfdcc82a8c333c31ff73ed0beec65b63acec8bc8186383c0bc9f0912f21bb9715b665e8dc1793b1a85599761f9037856fa54ad8aa3bfdbfd468'
+        '328e2ba47b78d242b0ec6ba6bfa039c77a36d1ef7246e5c2c2432d8e976e9360baf505eb05f48408ede1a30545cbbb7f875bf5ebd0252cef35523d449b8254a0'
+        '7010f5fed4a64ffba2479d2c190b3dcc0b7518343cb01973347ca1c8e577af07fe01a24a5864403d77c632a0e7324c5cc0c5dd244328a3422cdab97eb665ba9d')
diff --git a/primecount/PKGBUILD b/primecount/PKGBUILD
index f20b02fd98..556584455c 100644
--- a/primecount/PKGBUILD
+++ b/primecount/PKGBUILD
@@ -21,7 +21,7 @@ build() {
     -DBUILD_STATIC_LIBS=OFF \
     -DBUILD_SHARED_LIBS=ON \
     -DWITH_POPCNT=OFF \
-    -DWITH_FLOAT128=ON
+    -DWITH_FLOAT128=OFF
   cmake --build build
 }
 
diff --git a/procs/PKGBUILD b/procs/PKGBUILD
index b4d5fa5d03..a37feee078 100644
--- a/procs/PKGBUILD
+++ b/procs/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('77c5f5d3bdfc9cef870732500ef58c203a1464f924b12f79c7d9e301b4dd5b16')
 
 prepare() {
     cd "$pkgname-$pkgver"
-    cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+    cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/progpick/PKGBUILD b/progpick/PKGBUILD
index 3c3951f267..feb622d885 100644
--- a/progpick/PKGBUILD
+++ b/progpick/PKGBUILD
@@ -16,7 +16,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/prometheus-memcached-exporter/PKGBUILD b/prometheus-memcached-exporter/PKGBUILD
index 6a5ad8b737..30f785d13d 100644
--- a/prometheus-memcached-exporter/PKGBUILD
+++ b/prometheus-memcached-exporter/PKGBUILD
@@ -31,6 +31,9 @@ check() {
 
 build() {
   cd memcached_exporter-$pkgver
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
   go build \
     -trimpath \
     -buildmode=pie \
diff --git a/prometheus-mysqld-exporter/PKGBUILD b/prometheus-mysqld-exporter/PKGBUILD
index ec02b1f2b5..46da148c9e 100644
--- a/prometheus-mysqld-exporter/PKGBUILD
+++ b/prometheus-mysqld-exporter/PKGBUILD
@@ -22,6 +22,9 @@ sha512sums=('015f116f7da8da2b74605b3b8f2ce693d1c99673834a63e8c003a7ddf7718b27cfc
 build() {
   cd "mysqld_exporter-${pkgver}"
 
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
   go build \
     -trimpath \
     -buildmode=pie \
diff --git a/prometheus-wireguard-exporter/PKGBUILD b/prometheus-wireguard-exporter/PKGBUILD
index a113c091d4..90cf148bbd 100644
--- a/prometheus-wireguard-exporter/PKGBUILD
+++ b/prometheus-wireguard-exporter/PKGBUILD
@@ -23,7 +23,7 @@ sha512sums=('81b6fa1b0fa0bef17a97d841b2fe42c88a80d4c5d0bca8d82aaf24e24b2eeefdac6
 
 prepare() {
   cd "prometheus_wireguard_exporter-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/protobuf-c/PKGBUILD b/protobuf-c/PKGBUILD
index 3fe73c29f3..6a4a643967 100644
--- a/protobuf-c/PKGBUILD
+++ b/protobuf-c/PKGBUILD
@@ -17,6 +17,7 @@ sha256sums=('7b404c63361ed35b3667aec75cc37b54298d56dd2bcf369de3373212cc06fd98')
 
 build() {
   cd "$pkgname-$pkgver"
+  export LDFLAGS="-Wl,--no-as-needed"
   ./configure --prefix=/usr --disable-static
   make
 }
diff --git a/psiconv/PKGBUILD b/psiconv/PKGBUILD
index 3a908ce3d7..0e10c08b6a 100644
--- a/psiconv/PKGBUILD
+++ b/psiconv/PKGBUILD
@@ -12,7 +12,8 @@ license=('GPL')
 depends=('graphicsmagick')
 makedepends=('bc')
 backup=('etc/psiconv/psiconv.conf')
-source=("http://www.frodo.looijaard.name/system/files/software/${pkgname}/${pkgname}-${pkgver}.tar.gz" psiconv.patch)
+source=("http://www.frodo.looijaard.name/system/files/software/${pkgname}/${pkgname}-${pkgver}.tar.gz" psiconv.patch
+        psiconv-fix-build.patch)
 md5sums=('286e427b10f4d10aaeef1944210a2ea6'
          'SKIP')
 sha512sums=('ec21d1e4734ad79dc19146223d7016bd53b3a3bb602f3a55489663ccbf3a4c6f5c6417728e798b4448acc2e49ff5145f58c812edfdd6c11ee9060128b6157d3a'
@@ -21,6 +22,7 @@ sha512sums=('ec21d1e4734ad79dc19146223d7016bd53b3a3bb602f3a55489663ccbf3a4c6f5c6
 prepare() {
   cd $pkgname-$pkgver
   sed -e 's|Magick-config|GraphicsMagick-config|g' -i configure.in
+  patch -p1 -i $srcdir/psiconv-fix-build.patch
   autoreconf -vi
 
   patch -p1 <"$srcdir"/psiconv.patch
diff --git a/psiconv/psiconv-fix-build.patch b/psiconv/psiconv-fix-build.patch
new file mode 100644
index 0000000000..c5a5e89f99
--- /dev/null
+++ b/psiconv/psiconv-fix-build.patch
@@ -0,0 +1,74 @@
+Index: psiconv-0.9.9/program/psiconv/gen_html4.c
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/gen_html4.c
++++ psiconv-0.9.9/program/psiconv/gen_html4.c
+@@ -32,6 +32,8 @@
+ 
+ #define TEMPSTR_LEN 100
+ 
++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */
++
+ static void text(const psiconv_config config,psiconv_list list,
+                  psiconv_string_t data,const encoding enc);
+ static void header(const psiconv_config config, psiconv_list list, 
+Index: psiconv-0.9.9/program/psiconv/gen_image.c
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/gen_image.c
++++ psiconv-0.9.9/program/psiconv/gen_image.c
+@@ -33,6 +33,7 @@
+ #endif
+ 
+ #ifdef IMAGEMAGICK
++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */
+ static Image *get_paint_data_section(psiconv_paint_data_section sec);
+ static void image_to_list(psiconv_list list,Image *image,const char *dest);
+ static void gen_image_list(const psiconv_config config,psiconv_list list,
+Index: psiconv-0.9.9/program/psiconv/gen_txt.c
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/gen_txt.c
++++ psiconv-0.9.9/program/psiconv/gen_txt.c
+@@ -31,6 +31,7 @@
+ #ifdef DMALLOC
+ #include "dmalloc.h"
+ #endif
++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */
+ 
+ static void output_para(const psiconv_config config,psiconv_list list,
+                         const psiconv_paragraph para,encoding encoding_type);
+Index: psiconv-0.9.9/program/psiconv/gen_xhtml.c
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/gen_xhtml.c
++++ psiconv-0.9.9/program/psiconv/gen_xhtml.c
+@@ -33,6 +33,8 @@
+ 
+ #define TEMPSTR_LEN 100
+ 
++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */
++
+ static void text(const psiconv_config config,psiconv_list list,
+           psiconv_string_t data,const encoding enc);
+ static void color(const psiconv_config config, psiconv_list list,
+Index: psiconv-0.9.9/program/psiconv/psiconv.c
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/psiconv.c
++++ psiconv-0.9.9/program/psiconv/psiconv.c
+@@ -47,6 +47,7 @@
+ static void print_help(void);
+ static void print_version(void);
+ static void strtoupper(char *str);
++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */
+ 
+ void print_help(void)
+ {
+Index: psiconv-0.9.9/program/psiconv/psiconv.h
+===================================================================
+--- psiconv-0.9.9.orig/program/psiconv/psiconv.h
++++ psiconv-0.9.9/program/psiconv/psiconv.h
+@@ -52,7 +52,5 @@ typedef struct fileformat_s {
+   output_function *output;
+ } *fileformat;
+ 
+-psiconv_list fileformat_list; /* of struct psiconv_fileformat */
+-
+ 
+ #endif /* PSICONV_H */
diff --git a/pueue/PKGBUILD b/pueue/PKGBUILD
index f4dba74703..aa2a47105c 100644
--- a/pueue/PKGBUILD
+++ b/pueue/PKGBUILD
@@ -15,7 +15,7 @@ sha256sums=('ad7b760d4bed5a946acbdb6e3985d94d03944e3c0eb2221aea65da0aa001c636')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir -p utils/completions/
 }
 
diff --git a/pyalpm/PKGBUILD b/pyalpm/PKGBUILD
index 1c3367e402..b454290579 100644
--- a/pyalpm/PKGBUILD
+++ b/pyalpm/PKGBUILD
@@ -24,7 +24,7 @@ build() {
 
 check() {
   cd "${pkgname}"
-  PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-311" pytest
+  PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-311" pytest
 }
 
 package() {
diff --git a/pycups/PKGBUILD b/pycups/PKGBUILD
index bbaae2f713..7ec3e96a00 100644
--- a/pycups/PKGBUILD
+++ b/pycups/PKGBUILD
@@ -11,7 +11,7 @@ license=('GPL')
 pkgdesc="Python bindings for libcups"
 depends=('libcups' 'python')
 provides=('pycups')
-source=(https://github.com/OpenPrinting/pycups/archive/v${pkgver}.tar.gz)
+source=(https://github.com/OpenPrinting/pycups/archive/refs/tags/v${pkgver}.tar.gz)
 sha256sums=('cf7e63b07e2dbc6811e77f55cc11d7191799298a6565b83fc028ee3c9da0ad78')
 
 build() {
diff --git a/pyflow/PKGBUILD b/pyflow/PKGBUILD
index 73f2fb1f23..d55a75ba8c 100644
--- a/pyflow/PKGBUILD
+++ b/pyflow/PKGBUILD
@@ -15,7 +15,7 @@ b2sums=('218b27e1a375bf5f7e486b4ab5cb0b1b8a4fc0ce12a763e504abf12ebc7c4e28064960b
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/pygobject/PKGBUILD b/pygobject/PKGBUILD
index 2f51f4d71b..eaa26e1b11 100644
--- a/pygobject/PKGBUILD
+++ b/pygobject/PKGBUILD
@@ -24,7 +24,7 @@ makedepends=(
   python-sphinx
   python-sphinx_rtd_theme
 )
-checkdepends=(
+makedepends+=(
   gtk3
   python-pytest
   xorg-server-xvfb
diff --git a/pyqt5/PKGBUILD b/pyqt5/PKGBUILD
index c8933d1076..e710e224e1 100644
--- a/pyqt5/PKGBUILD
+++ b/pyqt5/PKGBUILD
@@ -9,7 +9,7 @@ pkgbase=pyqt5
 pkgname=('python-pyqt5')
 pkgdesc="A set of Python bindings for the Qt5 toolkit"
 pkgver=5.15.10
-pkgrel=1
+pkgrel=2
 arch=('loong64' 'x86_64')
 url="https://riverbankcomputing.com/software/pyqt/intro"
 license=('GPL')
diff --git a/pyqt6-3d/PKGBUILD b/pyqt6-3d/PKGBUILD
index f059241d3d..4eba9af48b 100644
--- a/pyqt6-3d/PKGBUILD
+++ b/pyqt6-3d/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase=pyqt6-3d
 pkgname=(python-pyqt6-3d)
 pkgver=6.6.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Python bindings for Qt3D'
 arch=(loong64 x86_64)
 url='https://www.riverbankcomputing.com/software/pyqt3d/intro'
diff --git a/pyqt6-charts/PKGBUILD b/pyqt6-charts/PKGBUILD
index a1e6952a96..0e0d7171e0 100644
--- a/pyqt6-charts/PKGBUILD
+++ b/pyqt6-charts/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase=pyqt6-charts
 pkgname=(python-pyqt6-charts)
 pkgver=6.6.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Python bindings for QtChart'
 arch=(loong64 x86_64)
 url='https://www.riverbankcomputing.com/software/pyqtchart/intro'
diff --git a/pyqt6-datavisualization/PKGBUILD b/pyqt6-datavisualization/PKGBUILD
index b65754d95d..ce022b861a 100644
--- a/pyqt6-datavisualization/PKGBUILD
+++ b/pyqt6-datavisualization/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase=pyqt6-datavisualization
 pkgname=(python-pyqt6-datavisualization)
 pkgver=6.6.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Python bindings for QtDataVisualization'
 arch=(loong64 x86_64)
 url='https://www.riverbankcomputing.com/software/pyqtdatavisualization/intro'
diff --git a/pyqt6-networkauth/PKGBUILD b/pyqt6-networkauth/PKGBUILD
index e6e82e7ef1..c7dd46f49f 100644
--- a/pyqt6-networkauth/PKGBUILD
+++ b/pyqt6-networkauth/PKGBUILD
@@ -3,7 +3,7 @@
 pkgbase=pyqt6-networkauth
 pkgname=(python-pyqt6-networkauth)
 pkgver=6.6.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Python bindings for QtNetworkAuth'
 arch=(loong64 x86_64)
 url='https://www.riverbankcomputing.com/software/pyqtnetworkauth/intro'
diff --git a/python-aiohttp/PKGBUILD b/python-aiohttp/PKGBUILD
index 6bb4bbc1ef..1d3075be9a 100644
--- a/python-aiohttp/PKGBUILD
+++ b/python-aiohttp/PKGBUILD
@@ -87,7 +87,7 @@ check() {
   cd ${pkgname}
   local _python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
   mv tests/autobahn/test_autobahn.py{,.bak} # Docker tests
-  PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-${_python_version}" pytest
+  PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-${_python_version}" pytest
   mv tests/autobahn/test_autobahn.py{.bak,}
 }
 
diff --git a/python-ansiwrap/PKGBUILD b/python-ansiwrap/PKGBUILD
index 1be71c2e75..91ae373976 100644
--- a/python-ansiwrap/PKGBUILD
+++ b/python-ansiwrap/PKGBUILD
@@ -1,4 +1,5 @@
 # Maintainer: Felix Yan <felixonmars@archlinux.org>
+export CHECKFUNC=1
 
 pkgname=python-ansiwrap
 pkgver=0.8.4
diff --git a/python-appdirs/PKGBUILD b/python-appdirs/PKGBUILD
index 89cc2b7c3b..bbe9244121 100644
--- a/python-appdirs/PKGBUILD
+++ b/python-appdirs/PKGBUILD
@@ -17,8 +17,8 @@ makedepends=(
   'python-wheel'
 )
 source=($_name-$pkgver.tar.gz::$url/archive/refs/tags/$pkgver.tar.gz)
-sha512sums=('4c0e1e8dcd3f91b8b2d215b3f1e2ffaa85137fe054d07d3a2d442b1419e3b44e96fdea1620bd000bd3f4744f71b71f07280094f073df0ff008fac902af614656')
-b2sums=('cb9466f4a7f7c1d6f5b6d7ca031820ec4d3450afcaa8ba571e35387c3109ede4e2afbf2c1141a9d01d13798f55524d5efd3fa12546a9378abbda405353938d79')
+sha512sums=('8b0cdd9fd471d45b186aa47607691cf378dabd3edc7b7026a57bd6d6f57698e86f440818a5e23ba4288b35d6bb8cb6eb0106eae8aab09d8863ee15025d300883')
+b2sums=('f5c91a7ba7b9da75259a25359b5d6d6ae2a563efcb1483a6febb7f1e3c1801a64c05474c8fd76ec6e73c9a5a145e8b460bef4e447c69eaaeb88ae542d153a40b')
 
 build() {
   cd $_name-$pkgver
diff --git a/python-apsw/PKGBUILD b/python-apsw/PKGBUILD
index 10a23df300..225f77a64d 100644
--- a/python-apsw/PKGBUILD
+++ b/python-apsw/PKGBUILD
@@ -28,7 +28,7 @@ check() {
   gcc ${CFLAGS} ${CPPFLAGS} ${LDFLAGS} -fPIC -shared -o testextension.sqlext src/testextension.c
   # do glob expansion in variable assignment
   local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
-  PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-${python_version}" python -m unittest discover -vs .
+  PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-${python_version}" python -m unittest discover -vs .
 }
 
 package() {
diff --git a/python-binaryornot/PKGBUILD b/python-binaryornot/PKGBUILD
index f7e52bc962..4b6ee9f2e9 100644
--- a/python-binaryornot/PKGBUILD
+++ b/python-binaryornot/PKGBUILD
@@ -24,19 +24,19 @@ source=(
   $pkgname-0.4.4-hypothesis_tests.patch::https://github.com/audreyr/binaryornot/pull/52.patch
   $pkgname-0.4.4-set_version.patch::https://github.com/binaryornot/binaryornot/commit/cff1a0a4478c17d4f970d133c06abbf6945b6a5e.patch
 )
-sha512sums=('31dfb79bb5847e12487d94519a357dece4572f7ed064686d53a49c2de5a51d6441be64523c98cca6221ed89be5bf26e54866dd3b79ac8d89fd5019a5b4d75a45'
+sha512sums=('379e71b41824b9389ea02c64223e26c57694d07b749c197e3a6b9f10558c8d502e9b93a13ae6c3a4bca49064ffc650ce822073dc312fb06e50e06b8a3f04f419'
             '8a0f1066a580f08778434ed7d30c2ebf2764dbfd746b561ffce2fb8dd8d77cafaf4a58b03504cf5b1e4d37e0a6ffe3038dcaa5611cdfd7d42ada86edd1e47f3f'
             'a56266b54b5000e4cdaadcca2119f1822ab1de1b45adee1095ab8841dc0289cc853b4e3e2be1079786f18dde84424a78909f33130f3081d3fa5cf352026ce1c0')
-b2sums=('cb2099313f602915bacd5b463642f16430fcb0ab62dcaae546cb854780996526fb777c0b730b4b89e664ec7f995ddd2d2f632cfbac2dadca45958cfd1dd7a410'
+b2sums=('4a70e22eba51a266987308e8d1b4571ace5760f945920194a1a61f4336f33dcb655c4f0b1f760aa3502834d29a1a64ac62657e5d73eac8f1d7d02edf71db8af9'
         'f07730709f11bf0732b60b81603c40bea1fa6da2d8df545b088072e0f710a7e123e438b03d2390122b6000c14343e932b38d4b6f8956ea87b72e29deae1a4715'
         'd56fb5a064be3ce810f274bb2e49e9afd921b536b9194da07a87fedd7e403af0edf0e4e05e975efa55b3d7deefd2ad71a2b36a1f1da45c653344f64478b1a613')
 
-prepare() {
-  # fix tests using python-hypothesis: https://github.com/audreyr/binaryornot/issues/46
-  patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-hypothesis_tests.patch
-  # fix version as the wrong commit was tagged: https://github.com/binaryornot/binaryornot/issues/210
-  patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-set_version.patch
-}
+#prepare() {
+#  # fix tests using python-hypothesis: https://github.com/audreyr/binaryornot/issues/46
+##patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-hypothesis_tests.patch
+#  # fix version as the wrong commit was tagged: https://github.com/binaryornot/binaryornot/issues/210
+##patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-set_version.patch
+#}
 
 build() {
   cd $_name-$pkgver
diff --git a/python-black/PKGBUILD b/python-black/PKGBUILD
index cc0d6fa776..d2487076db 100644
--- a/python-black/PKGBUILD
+++ b/python-black/PKGBUILD
@@ -1,6 +1,7 @@
 # Maintainer: Maxim Baz <archlinux at maximbaz dot com>
 # Maintainer: Daniel M. Capella <polyzen@archlinux.org>
 # Contributor: James Zhu <jameszhu@berkeley.edu>
+export CHECKFUNC=1
 
 pkgname=python-black
 pkgver=23.12.1
diff --git a/python-cachy/PKGBUILD b/python-cachy/PKGBUILD
index b482cc308f..03602725e2 100644
--- a/python-cachy/PKGBUILD
+++ b/python-cachy/PKGBUILD
@@ -1,5 +1,6 @@
 # Maintainer: Caleb Maclennan <caleb@alerque.com>
 # Contributor: Eli Schwartz <eschwartz@archlinux.org>
+export CHECKFUNC=1
 
 _pkgname=cachy
 pkgname=python-cachy
diff --git a/python-cryptography/PKGBUILD b/python-cryptography/PKGBUILD
index 92f502ce07..dc1abe312a 100644
--- a/python-cryptography/PKGBUILD
+++ b/python-cryptography/PKGBUILD
@@ -9,8 +9,9 @@ arch=('loong64' 'x86_64')
 license=('Apache')
 url="https://pypi.python.org/pypi/cryptography"
 depends=('python-cffi')
+options=(!lto)
 makedepends=('git' 'python-setuptools-rust' 'llvm' 'clang' 'lld')
-checkdepends=('python-pytest' 'python-pytest-subtests' 'python-iso8601' 'python-pretend'
+makedepends+=('python-pytest' 'python-pytest-subtests' 'python-iso8601' 'python-pretend'
               'python-hypothesis' 'python-pytz' 'python-pytest-benchmark')
 source=("git+https://github.com/pyca/cryptography.git#commit=$_commit")
 sha512sums=('SKIP')
@@ -18,8 +19,7 @@ sha512sums=('SKIP')
 build() {
   cd cryptography
   echo $RUSTFLAGS
-  # https://github.com/pyca/cryptography/issues/9023
-  CC=clang RUSTFLAGS+="-Clinker-plugin-lto -Clinker=clang -Clink-arg=-fuse-ld=lld" python setup.py build
+  python setup.py build
 }
 
 check() {
diff --git a/python-debugpy/PKGBUILD b/python-debugpy/PKGBUILD
index 5f26ad999b..47e3cb4823 100644
--- a/python-debugpy/PKGBUILD
+++ b/python-debugpy/PKGBUILD
@@ -25,7 +25,7 @@ build() {
   python setup.py build
 # Compile attach libraries
   cd build/lib*/debugpy/_vendored/pydevd/pydevd_attach_to_process
-  g++ ${CXXFLAGS} -m64 -shared -o attach_linux_amd64.so -fPIC -nostartfiles linux_and_mac/attach.cpp ${LDFLAGS}
+  g++ ${CXXFLAGS} -shared -o attach_linux_amd64.so -fPIC -nostartfiles linux_and_mac/attach.cpp ${LDFLAGS}
 }
 
 package() {
diff --git a/python-et-xmlfile/PKGBUILD b/python-et-xmlfile/PKGBUILD
index 73fc437f2c..ed9df5d7e4 100644
--- a/python-et-xmlfile/PKGBUILD
+++ b/python-et-xmlfile/PKGBUILD
@@ -12,7 +12,7 @@ depends=('python')
 makedepends=('python-setuptools')
 checkdepends=('python-pytest-runner' 'python-lxml')
 source=("https://foss.heptapod.net/openpyxl/et_xmlfile/-/archive/${pkgver}/et_xmlfile-${pkgver}.tar.gz")
-sha256sums=('8d6705c2f97b2d6195c95e5f3781a1ed44a59d43cf1263e04034767e5db65131')
+sha256sums=('707c2211ba4a041fd866ef6a60966f4ff82c89f4d4dfabf5aea59aaf97f9be4f')
 
 prepare() {
   cd "$srcdir"/et_xmlfile-${pkgver}
diff --git a/python-greenlet/PKGBUILD b/python-greenlet/PKGBUILD
index 505dbaf61c..f6882ca891 100644
--- a/python-greenlet/PKGBUILD
+++ b/python-greenlet/PKGBUILD
@@ -13,8 +13,15 @@ depends=('python')
 makedepends=('python-build' 'python-installer' 'python-setuptools'
              'python-wheel')
 checkdepends=('python-objgraph' 'python-psutil')
-source=("https://files.pythonhosted.org/packages/source/g/greenlet/greenlet-${pkgver}.tar.gz")
-sha512sums=('67d74352802331642eba0917550a75e9bc2a7d223bc0ce4ee7993d05197b4d0650813439e7c495baf2309303740cd21e60a157e634aafff470332a685603ffec')
+source=("https://files.pythonhosted.org/packages/source/g/greenlet/greenlet-${pkgver}.tar.gz"
+        python-greenlet-la64.patch)
+sha512sums=('67d74352802331642eba0917550a75e9bc2a7d223bc0ce4ee7993d05197b4d0650813439e7c495baf2309303740cd21e60a157e634aafff470332a685603ffec'
+            'a427f6121802409a0d5d83a44eeafa50e19e5b1a02041220b2ee98f90d262e4acc0affc8d7f9ae5bbec05a738293dbf5c6d37adca0291ab753c0f41f7a7d5c2a')
+
+prepare() {
+	cd greenlet-$pkgver
+    patch -p1 -i $srcdir/python-greenlet-la64.patch
+}
 
 build() {
     cd greenlet-$pkgver
diff --git a/python-greenlet/python-greenlet-la64.patch b/python-greenlet/python-greenlet-la64.patch
new file mode 100644
index 0000000000..39c801106f
--- /dev/null
+++ b/python-greenlet/python-greenlet-la64.patch
@@ -0,0 +1,61 @@
+From c2bd5118ec44752450c63fd8b1a47802f5c0cf0e Mon Sep 17 00:00:00 2001
+From: merore <shanjiantao@loongson.cn>
+Date: Mon, 23 Aug 2021 18:00:20 +0000
+Subject: [PATCH] Port to LoongArch64
+
+---
+ .../platform/switch_loongarch64_linux.h       | 31 +++++++++++++++++++
+ src/greenlet/slp_platformselect.h             |  2 ++
+ 2 files changed, 33 insertions(+)
+ create mode 100644 src/greenlet/platform/switch_loongarch64_linux.h
+
+diff --git a/src/greenlet/platform/switch_loongarch64_linux.h b/src/greenlet/platform/switch_loongarch64_linux.h
+new file mode 100644
+index 00000000..03a5ce9c
+--- /dev/null
++++ b/src/greenlet/platform/switch_loongarch64_linux.h
+@@ -0,0 +1,31 @@
++#define STACK_REFPLUS 1
++
++#ifdef SLP_EVAL
++#define STACK_MAGIC 0
++
++#define REGS_TO_SAVE "s0", "s1", "s2", "s3", "s4", "s5", \
++                    "s6", "s7", "s8", "fp", \
++                    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
++
++static int
++slp_switch(void)
++{
++  register int ret;
++  register long *stackref, stsizediff;
++  __asm__ volatile ("" : : : REGS_TO_SAVE);
++  __asm__ volatile ("move %0, $sp" : "=r" (stackref) : );
++  {
++      SLP_SAVE_STATE(stackref, stsizediff);
++      __asm__ volatile (
++         "add.d $sp, $sp, %0\n\t"
++         : /* no outputs */
++         : "r" (stsizediff)
++         );
++      SLP_RESTORE_STATE();
++  }
++  __asm__ volatile ("" : : : REGS_TO_SAVE);
++  __asm__ volatile ("move %0, $zero" : "=r" (ret) : );
++  return ret;
++}
++
++#endif
+diff --git a/src/greenlet/slp_platformselect.h b/src/greenlet/slp_platformselect.h
+index b5e8eb6e..f3be7ad9 100644
+--- a/src/greenlet/slp_platformselect.h
++++ b/src/greenlet/slp_platformselect.h
+@@ -34,6 +34,8 @@
+ #include "platform/switch_s390_unix.h"	/* Linux/S390 */
+ #elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__)
+ #include "platform/switch_s390_unix.h"	/* Linux/S390 zSeries (64-bit) */
++#elif defined(__GNUC__) && defined(__loongarch64) && defined(__linux__)
++#include "platform/switch_loongarch64_linux.h"	/* Linux/LoongArch64 */
+ #elif defined(__GNUC__) && defined(__arm__)
+ #ifdef __APPLE__
+ #include <TargetConditionals.h>
diff --git a/python-libcst/PKGBUILD b/python-libcst/PKGBUILD
index 0fb052f37e..a9b57a177a 100644
--- a/python-libcst/PKGBUILD
+++ b/python-libcst/PKGBUILD
@@ -1,4 +1,5 @@
 # Maintainer: Filipe Laíns (FFY00) <lains@archlinux.org>
+export CHECKFUNC=1
 
 _pkgname=libcst
 pkgname=python-libcst
diff --git a/python-mss/PKGBUILD b/python-mss/PKGBUILD
index 96b5b3c573..8cdd5780ff 100644
--- a/python-mss/PKGBUILD
+++ b/python-mss/PKGBUILD
@@ -1,4 +1,5 @@
 # Maintainer: Bruno Pagani <archange@archlinux.org>
+export CHECKFUNC=1
 
 _pkg=mss
 pkgname=python-${_pkg}
diff --git a/python-nodeenv/PKGBUILD b/python-nodeenv/PKGBUILD
index b2b10577fe..baa7e390e8 100644
--- a/python-nodeenv/PKGBUILD
+++ b/python-nodeenv/PKGBUILD
@@ -9,9 +9,16 @@ license=('BSD')
 arch=('any')
 depends=('python-setuptools' 'make')
 optdepends=('nodejs: for --node=system')
-checkdepends=('nodejs' 'python-pytest-runner' 'python-coverage')
-source=("$pkgname-$pkgver.tar.gz::https://github.com/ekalinin/nodeenv/archive/$pkgver.tar.gz")
-sha512sums=('96dce219e00d3837b2b0083af9fe6d94ed4e3cd029e3da564263ad8656dcb9c52440c2df6a6954095e5cacd03e44437f08695603dea82c28122713045183014f')
+makedepends+=('nodejs' 'python-pytest-runner' 'python-coverage')
+source=("$pkgname-$pkgver.tar.gz::https://github.com/ekalinin/nodeenv/archive/$pkgver.tar.gz"
+        nodeenv-loong64.patch)
+sha512sums=('96dce219e00d3837b2b0083af9fe6d94ed4e3cd029e3da564263ad8656dcb9c52440c2df6a6954095e5cacd03e44437f08695603dea82c28122713045183014f'
+            'be071c28ec37ed9063b3d34a0f54bc56635e6a3bbce51ce50c6584e4d2c50f2b5875af734304c7d76bb881d6e0b94cf1f9ab49b24be5f72125a2a6a3000ae14d')
+
+prepare() {
+  cd nodeenv-$pkgver
+  patch -p1 -i $srcdir/nodeenv-loong64.patch
+}
 
 build() {
   cd nodeenv-$pkgver
diff --git a/python-nodeenv/nodeenv-loong64.patch b/python-nodeenv/nodeenv-loong64.patch
new file mode 100644
index 0000000000..2dfb274fe6
--- /dev/null
+++ b/python-nodeenv/nodeenv-loong64.patch
@@ -0,0 +1,12 @@
+Index: nodeenv-1.7.0/nodeenv.py
+===================================================================
+--- nodeenv-1.7.0.orig/nodeenv.py
++++ nodeenv-1.7.0/nodeenv.py
+@@ -545,6 +545,7 @@ def get_node_bin_url(version):
+         'arm64/v8': 'arm64',
+         'armv8': 'arm64',
+         'armv8.4': 'arm64',
++        'loongarch64': 'loong64',
+         'ppc64le': 'ppc64le',   # Power PC
+         's390x': 's390x',       # IBM S390x
+     }
diff --git a/python-numpy/PKGBUILD b/python-numpy/PKGBUILD
index adff121b42..ce1277ca7a 100755
--- a/python-numpy/PKGBUILD
+++ b/python-numpy/PKGBUILD
@@ -14,8 +14,15 @@ depends=('cblas' 'lapack' 'python')
 optdepends=('blas-openblas: faster linear algebra')
 makedepends=('python-build' 'python-installer' 'meson-python' 'cmake' 'gcc-fortran' 'cython')
 checkdepends=('python-pytest' 'python-hypothesis')
-source=("https://github.com/numpy/numpy/releases/download/v$pkgver/numpy-$pkgver.tar.gz")
-sha512sums=('25556b41e2db9cfc52c1dfa61b05e4fc1b7b6df3b169f365375575d1146857fdb5ff91ca1508b968c296b7a06e5c6d95e82c41cdc3561587a46d3aa178f6305d')
+source=("https://github.com/numpy/numpy/releases/download/v$pkgver/numpy-$pkgver.tar.gz"
+        "add-loongarch-support.patch")
+sha512sums=('25556b41e2db9cfc52c1dfa61b05e4fc1b7b6df3b169f365375575d1146857fdb5ff91ca1508b968c296b7a06e5c6d95e82c41cdc3561587a46d3aa178f6305d'
+            '06e4ec4f893e29d78156b4e8acaa5294b8340926b7d67f4b9f5d29113a404cf23b7c048f9d64de8a5907f7181306e50768546e64f1b53d038685d00a58e9c93a')
+
+prepare() {
+  cd numpy-$pkgver
+  patch -p1 -i "$srcdir/add-loongarch-support.patch"
+}
 
 build() {
   cd numpy-$pkgver
diff --git a/python-numpy/add-loongarch-support.patch b/python-numpy/add-loongarch-support.patch
new file mode 100644
index 0000000000..3292dfd819
--- /dev/null
+++ b/python-numpy/add-loongarch-support.patch
@@ -0,0 +1,33 @@
+diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h
+index 509e23a..34ecb20 100644
+--- a/numpy/core/include/numpy/npy_cpu.h
++++ b/numpy/core/include/numpy/npy_cpu.h
+@@ -17,6 +17,7 @@
+  *              NPY_CPU_SH_BE
+  *              NPY_CPU_ARCEL
+  *              NPY_CPU_ARCEB
++ *              NPY_CPU_LOONGARCH
+  *              NPY_CPU_RISCV64
+  *              NPY_CPU_WASM
+  */
+@@ -95,6 +96,8 @@
+     #define NPY_CPU_MIPSEB
+ #elif defined(__or1k__)
+     #define NPY_CPU_OR1K
++#elif defined(__loongarch__)
++    #define NPY_CPU_LOONGARCH
+ #elif defined(__mc68000__)
+     #define NPY_CPU_M68K
+ #elif defined(__arc__) && defined(__LITTLE_ENDIAN__)
+diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h
+index aa367a0..d5905ae 100644
+--- a/numpy/core/include/numpy/npy_endian.h
++++ b/numpy/core/include/numpy/npy_endian.h
+@@ -45,6 +45,7 @@
+             || defined(NPY_CPU_ARMEL_AARCH32) \
+             || defined(NPY_CPU_ARMEL_AARCH64) \
+             || defined(NPY_CPU_SH_LE)         \
++            || defined(NPY_CPU_LOONGARCH)     \
+             || defined(NPY_CPU_MIPSEL)        \
+             || defined(NPY_CPU_PPC64LE)       \
+             || defined(NPY_CPU_ARCEL)         \
diff --git a/python-parso/PKGBUILD b/python-parso/PKGBUILD
index 9bac50b2e7..a9e7cf9049 100644
--- a/python-parso/PKGBUILD
+++ b/python-parso/PKGBUILD
@@ -37,7 +37,7 @@ package() {
   python setup.py install --root="$pkgdir" --optimize=1 --skip-build
   install -Dm 644 LICENSE.txt -t "$pkgdir/usr/share/licenses/$pkgname"
   install -Dm 644 CHANGELOG.rst README.rst docs/_build/text/*.txt -t "$pkgdir/usr/share/doc/$pkgname"
-  install -Dm 644 docs/_build/man/parso.1 "$pkgdir/usr/share/man/man1/$pkgname.1"
+  #install -Dm 644 docs/_build/man/parso.1 "$pkgdir/usr/share/man/man1/$pkgname.1"
 }
 
 # vim: ts=2 sw=2 et:
diff --git a/python-poetry/PKGBUILD b/python-poetry/PKGBUILD
index be16c635a1..5eb18a5335 100644
--- a/python-poetry/PKGBUILD
+++ b/python-poetry/PKGBUILD
@@ -41,7 +41,7 @@ _deps=(build
        virtualenv)
 depends=(python
          "${_deps[@]/#/python-}")
-checkdepends=(python-deepdiff  # not mentioned but required
+makedepends+=(python-deepdiff  # not mentioned but required
               python-psutil  # for python-pytest-xdist
               python-httpretty
               python-pip  # not mentioned but required
diff --git a/python-pyelftools/PKGBUILD b/python-pyelftools/PKGBUILD
index 6e30d8f311..8bcd18f51c 100644
--- a/python-pyelftools/PKGBUILD
+++ b/python-pyelftools/PKGBUILD
@@ -12,10 +12,17 @@ license=('custom:Public Domain')
 depends=('python')
 makedepends=('python-build' 'python-installer' 'python-wheel' 'python-setuptools')
 options=('!strip')
-source=(https://github.com/eliben/${_pkgname}/archive/v${pkgver}/${_pkgname}-${pkgver}.tar.gz)
-sha512sums=('5bd4c797f90307e351d541b8de8f76124c66e497b68b811f7012e1271c902beb6ab530a424b338777d12277d44f9b5f89f049e05d9fc2ec36a90b6fa16f1c1a4')
-b2sums=('b7974bc1a51ff5ba6ced17aac44e3911ff8d892564a3bef07ae6a5fc261eb7d2eb02170678c4d0bafedaa9e393fa4ca6d2059c420436ea73aa4f6aa7b49a894e')
+source=(https://github.com/eliben/${_pkgname}/archive/v${pkgver}/${_pkgname}-${pkgver}.tar.gz
+pyelftools-0.29.patch)
+sha512sums=('5bd4c797f90307e351d541b8de8f76124c66e497b68b811f7012e1271c902beb6ab530a424b338777d12277d44f9b5f89f049e05d9fc2ec36a90b6fa16f1c1a4'
+            'a66285a59cefd86f168a277bfe1dcd648b6bd8ff054183ff1a4768818757e17c7a6412f0d1c262836e2f53ec49f2c8975b46c18ba240a2b5923f6ff88c2ee325')
+b2sums=('b7974bc1a51ff5ba6ced17aac44e3911ff8d892564a3bef07ae6a5fc261eb7d2eb02170678c4d0bafedaa9e393fa4ca6d2059c420436ea73aa4f6aa7b49a894e'
+        '2785903c2265766f51a1b213132a95e27256248f662d98478c5220b5f623c2040123fe850fa6affe316a45cafac01606518f2594d93444acc9a1f4b77541b639')
 
+prepare() {
+  cd ${_pkgname}-${pkgver}
+  patch -p1 -i $srcdir/pyelftools-0.29.patch
+}
 
 build() {
   cd ${_pkgname}-${pkgver}
diff --git a/python-pyelftools/pyelftools-0.29.patch b/python-pyelftools/pyelftools-0.29.patch
new file mode 100644
index 0000000000..f8b14c49e2
--- /dev/null
+++ b/python-pyelftools/pyelftools-0.29.patch
@@ -0,0 +1,452 @@
+diff --git a/elftools/elf/constants.py b/elftools/elf/constants.py
+index fc55aac..567f1e3 100644
+--- a/elftools/elf/constants.py
++++ b/elftools/elf/constants.py
+@@ -51,6 +51,31 @@ class E_FLAGS(object):
+     EF_MIPS_ARCH_32R2=0x70000000
+     EF_MIPS_ARCH_64R2=0x80000000
+ 
++    EF_RISCV_RVC=0x00000001
++    EF_RISCV_FLOAT_ABI=0x00000006
++    EF_RISCV_FLOAT_ABI_SOFT=0x00000000
++    EF_RISCV_FLOAT_ABI_SINGLE=0x00000002
++    EF_RISCV_FLOAT_ABI_DOUBLE=0x00000004
++    EF_RISCV_FLOAT_ABI_QUAD=0x00000006
++    EF_RISCV_RVE=0x00000008
++    EF_RISCV_TSO=0x00000010
++
++    EF_LOONGARCH_OBJABI_MASK=0x000000C0
++    EF_LOONGARCH_OBJABI_V0=0x00000000
++    EF_LOONGARCH_OBJABI_V1=0x00000040
++    EF_LOONGARCH_ABI_MODIFIER_MASK=0x00000007
++    EF_LOONGARCH_ABI_SOFT_FLOAT=0x00000001
++    EF_LOONGARCH_ABI_SINGLE_FLOAT=0x00000002
++    EF_LOONGARCH_ABI_DOUBLE_FLOAT=0x00000003
++    # The names in the glibc elf.h say "LARCH" instead of "LOONGARCH",
++    # provide these names for users' convenience.
++    EF_LARCH_OBJABI_MASK = EF_LOONGARCH_OBJABI_MASK
++    EF_LARCH_OBJABI_V0 = EF_LOONGARCH_OBJABI_V0
++    EF_LARCH_OBJABI_V1 = EF_LOONGARCH_OBJABI_V1
++    EF_LARCH_ABI_MODIFIER_MASK = EF_LOONGARCH_ABI_MODIFIER_MASK
++    EF_LARCH_ABI_SOFT_FLOAT = EF_LOONGARCH_ABI_SOFT_FLOAT
++    EF_LARCH_ABI_SINGLE_FLOAT = EF_LOONGARCH_ABI_SINGLE_FLOAT
++    EF_LARCH_ABI_DOUBLE_FLOAT = EF_LOONGARCH_ABI_DOUBLE_FLOAT
+ 
+ class E_FLAGS_MASKS(object):
+     """Masks to be used for convenience when working with E_FLAGS
+diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py
+index 38c80b6..b6a615e 100644
+--- a/elftools/elf/descriptions.py
++++ b/elftools/elf/descriptions.py
+@@ -10,7 +10,8 @@ from .enums import (
+     ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE,
+     ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64,
+     ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
+-    ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1)
++    ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_RELOC_TYPE_LOONGARCH,
++    ENUM_DT_FLAGS, ENUM_DT_FLAGS_1)
+ from .constants import (
+     P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS)
+ from ..common.py3compat import bytes2hex, iteritems
+@@ -151,6 +152,8 @@ def describe_reloc_type(x, elffile):
+         return _DESCR_RELOC_TYPE_PPC64.get(x, _unknown)
+     elif arch == 'MIPS':
+         return _DESCR_RELOC_TYPE_MIPS.get(x, _unknown)
++    elif arch == 'LoongArch':
++        return _DESCR_RELOC_TYPE_LOONGARCH.get(x, _unknown)
+     else:
+         return 'unrecognized: %-7x' % (x & 0xFFFFFFFF)
+ 
+@@ -389,6 +392,7 @@ _DESCR_E_MACHINE = dict(
+     EM_BLACKFIN='Analog Devices Blackfin',
+     EM_PPC='PowerPC',
+     EM_PPC64='PowerPC64',
++    EM_LOONGARCH='LoongArch',
+     RESERVED='RESERVED',
+ )
+ 
+@@ -672,6 +676,7 @@ _DESCR_RELOC_TYPE_ARM = _reverse_dict(ENUM_RELOC_TYPE_ARM)
+ _DESCR_RELOC_TYPE_AARCH64 = _reverse_dict(ENUM_RELOC_TYPE_AARCH64)
+ _DESCR_RELOC_TYPE_PPC64 = _reverse_dict(ENUM_RELOC_TYPE_PPC64)
+ _DESCR_RELOC_TYPE_MIPS = _reverse_dict(ENUM_RELOC_TYPE_MIPS)
++_DESCR_RELOC_TYPE_LOONGARCH = _reverse_dict(ENUM_RELOC_TYPE_LOONGARCH)
+ 
+ _low_priority_D_TAG = (
+     # these are 'meta-tags' marking semantics of numeric ranges of the enum
+diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py
+index d228db7..90418a0 100644
+--- a/elftools/elf/elffile.py
++++ b/elftools/elf/elffile.py
+@@ -533,6 +533,7 @@ class ELFFile(object):
+             'EM_RISCV'         : 'RISC-V',
+             'EM_BPF'           : 'Linux BPF - in-kernel virtual machine',
+             'EM_CSKY'          : 'C-SKY',
++            'EM_LOONGARCH'     : 'LoongArch',
+             'EM_FRV'           : 'Fujitsu FR-V'
+         }
+ 
+diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py
+index 745aefc..5116523 100644
+--- a/elftools/elf/enums.py
++++ b/elftools/elf/enums.py
+@@ -257,6 +257,7 @@ ENUM_E_MACHINE = dict(
+     EM_RISCV         = 243, # RISC-V
+     EM_BPF           = 247,	# Linux BPF - in-kernel virtual machine
+     EM_CSKY          = 252,	# C-SKY
++    EM_LOONGARCH     = 258, # LoongArch
+     EM_FRV           = 0x5441, # Fujitsu FR-V
+     # Reservations
+     # reserved  11-14   Reserved for future use
+@@ -812,6 +813,119 @@ ENUM_RELOC_TYPE_x64 = dict(
+     _default_=Pass,
+ )
+ 
++ENUM_RELOC_TYPE_BPF = dict(
++    R_BPF_NONE=0,
++    R_BPF_64_64=1,
++    R_BPF_64_ABS64=2,
++    R_BPF_64_ABS32=3,
++    R_BPF_64_NODYLD32=4,
++    R_BPF_64_32=10,
++    _default_=Pass,
++)
++
++# https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
++ENUM_RELOC_TYPE_LOONGARCH = dict(
++    R_LARCH_NONE=0,
++    R_LARCH_32=1,
++    R_LARCH_64=2,
++    R_LARCH_RELATIVE=3,
++    R_LARCH_COPY=4,
++    R_LARCH_JUMP_SLOT=5,
++    R_LARCH_TLS_DTPMOD32=6,
++    R_LARCH_TLS_DTPMOD64=7,
++    R_LARCH_TLS_DTPREL32=8,
++    R_LARCH_TLS_DTPREL64=9,
++    R_LARCH_TLS_TPREL32=10,
++    R_LARCH_TLS_TPREL64=11,
++    R_LARCH_IRELATIVE=12,
++    R_LARCH_MARK_LA=20,
++    R_LARCH_MARK_PCREL=21,
++    R_LARCH_SOP_PUSH_PCREL=22,
++    R_LARCH_SOP_PUSH_ABSOLUTE=23,
++    R_LARCH_SOP_PUSH_DUP=24,
++    R_LARCH_SOP_PUSH_GPREL=25,
++    R_LARCH_SOP_PUSH_TLS_TPREL=26,
++    R_LARCH_SOP_PUSH_TLS_GOT=27,
++    R_LARCH_SOP_PUSH_TLS_GD=28,
++    R_LARCH_SOP_PUSH_PLT_PCREL=29,
++    R_LARCH_SOP_ASSERT=30,
++    R_LARCH_SOP_NOT=31,
++    R_LARCH_SOP_SUB=32,
++    R_LARCH_SOP_SL=33,
++    R_LARCH_SOP_SR=34,
++    R_LARCH_SOP_ADD=35,
++    R_LARCH_SOP_AND=36,
++    R_LARCH_SOP_IF_ELSE=37,
++    R_LARCH_SOP_POP_32_S_10_5=38,
++    R_LARCH_SOP_POP_32_U_10_12=39,
++    R_LARCH_SOP_POP_32_S_10_12=40,
++    R_LARCH_SOP_POP_32_S_10_16=41,
++    R_LARCH_SOP_POP_32_S_10_16_S2=42,
++    R_LARCH_SOP_POP_32_S_5_20=43,
++    R_LARCH_SOP_POP_32_S_0_5_10_16_S2=44,
++    R_LARCH_SOP_POP_32_S_0_10_10_16_S2=45,
++    R_LARCH_SOP_POP_32_U=46,
++    R_LARCH_ADD8=47,
++    R_LARCH_ADD16=48,
++    R_LARCH_ADD24=49,
++    R_LARCH_ADD32=50,
++    R_LARCH_ADD64=51,
++    R_LARCH_SUB8=52,
++    R_LARCH_SUB16=53,
++    R_LARCH_SUB24=54,
++    R_LARCH_SUB32=55,
++    R_LARCH_SUB64=56,
++    R_LARCH_GNU_VTINHERIT=57,
++    R_LARCH_GNU_VTENTRY=58,
++    R_LARCH_B16=64,
++    R_LARCH_B21=65,
++    R_LARCH_B26=66,
++    R_LARCH_ABS_HI20=67,
++    R_LARCH_ABS_LO12=68,
++    R_LARCH_ABS64_LO20=69,
++    R_LARCH_ABS64_HI12=70,
++    R_LARCH_PCALA_HI20=71,
++    R_LARCH_PCALA_LO12=72,
++    R_LARCH_PCALA64_LO20=73,
++    R_LARCH_PCALA64_HI12=74,
++    R_LARCH_GOT_PC_HI20=75,
++    R_LARCH_GOT_PC_LO12=76,
++    R_LARCH_GOT64_PC_LO20=77,
++    R_LARCH_GOT64_PC_HI12=78,
++    R_LARCH_GOT_HI20=79,
++    R_LARCH_GOT_LO12=80,
++    R_LARCH_GOT64_LO20=81,
++    R_LARCH_GOT64_HI12=82,
++    R_LARCH_TLS_LE_HI20=83,
++    R_LARCH_TLS_LE_LO12=84,
++    R_LARCH_TLS_LE64_LO20=85,
++    R_LARCH_TLS_LE64_HI12=86,
++    R_LARCH_TLS_IE_PC_HI20=87,
++    R_LARCH_TLS_IE_PC_LO12=88,
++    R_LARCH_TLS_IE64_PC_LO20=89,
++    R_LARCH_TLS_IE64_PC_HI12=90,
++    R_LARCH_TLS_IE_HI20=91,
++    R_LARCH_TLS_IE_LO12=92,
++    R_LARCH_TLS_IE64_LO20=93,
++    R_LARCH_TLS_IE64_HI12=94,
++    R_LARCH_TLS_LD_PC_HI20=95,
++    R_LARCH_TLS_LD_HI20=96,
++    R_LARCH_TLS_GD_PC_HI20=97,
++    R_LARCH_TLS_GD_HI20=98,
++    R_LARCH_32_PCREL=99,
++    R_LARCH_RELAX=100,
++    R_LARCH_DELETE=101,
++    R_LARCH_ALIGN=102,
++    R_LARCH_PCREL20_S2=103,
++    R_LARCH_CFA=104,
++    R_LARCH_ADD6=105,
++    R_LARCH_SUB6=106,
++    R_LARCH_ADD_ULEB128=107,
++    R_LARCH_SUB_ULEB128=108,
++    R_LARCH_64_PCREL=109,
++    _default_=Pass,
++)
++
+ # Sunw Syminfo Bound To special values
+ ENUM_SUNW_SYMINFO_BOUNDTO = dict(
+     SYMINFO_BT_SELF=0xffff,
+diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py
+index 4008e28..028858e 100644
+--- a/elftools/elf/relocation.py
++++ b/elftools/elf/relocation.py
+@@ -14,7 +14,7 @@ from .sections import Section
+ from .enums import (
+     ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
+     ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
+-    ENUM_D_TAG)
++    ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, ENUM_D_TAG)
+ from ..construct import Container
+ 
+ 
+@@ -253,6 +253,13 @@ class RelocationHandler(object):
+             recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
+         elif self.elffile.get_machine_arch() == '64-bit PowerPC':
+             recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
++        elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine':
++            recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None)
++        elif self.elffile.get_machine_arch() == 'LoongArch':
++            if not reloc.is_RELA():
++                raise ELFRelocationError(
++                    'Unexpected REL relocation for LoongArch: %s' % reloc)
++            recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None)
+ 
+         if recipe is None:
+             raise ELFRelocationError(
+@@ -267,6 +274,10 @@ class RelocationHandler(object):
+             value_struct = self.elffile.structs.Elf_word('')
+         elif recipe.bytesize == 8:
+             value_struct = self.elffile.structs.Elf_word64('')
++        elif recipe.bytesize == 1:
++            value_struct = self.elffile.structs.Elf_byte('')
++        elif recipe.bytesize == 2:
++            value_struct = self.elffile.structs.Elf_half('')
+         else:
+             raise ELFRelocationError('Invalid bytesize %s for relocation' %
+                     recipe.bytesize)
+@@ -316,6 +327,9 @@ class RelocationHandler(object):
+     def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
+         return sym_value + addend - offset
+ 
++    def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0):
++        return value - sym_value - addend
++
+     def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
+         return sym_value // 4 + value - offset // 4
+ 
+@@ -381,4 +395,46 @@ class RelocationHandler(object):
+             bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
+     }
+ 
++    # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
++    _RELOCATION_RECIPES_LOONGARCH = {
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE(
++            bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE(
++            bytesize=4, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE(
++            bytesize=8, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE(
++            bytesize=1, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_value),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE(
++            bytesize=1, has_addend=True,
++            calc_func=_reloc_calc_value_minus_sym_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE(
++            bytesize=2, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_value),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE(
++            bytesize=2, has_addend=True,
++            calc_func=_reloc_calc_value_minus_sym_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE(
++            bytesize=4, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_value),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE(
++            bytesize=4, has_addend=True,
++            calc_func=_reloc_calc_value_minus_sym_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE(
++            bytesize=8, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_value),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE(
++            bytesize=8, has_addend=True,
++            calc_func=_reloc_calc_value_minus_sym_addend),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE(
++            bytesize=4, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_addend_pcrel),
++        ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE(
++            bytesize=8, has_addend=True,
++            calc_func=_reloc_calc_sym_plus_addend_pcrel),
++    }
++
+ 
+diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py
+index ca6bac3..552e530 100644
+--- a/scripts/dwarfdump.py
++++ b/scripts/dwarfdump.py
+@@ -342,7 +342,7 @@ class ReadElf(object):
+         self.elffile = ELFFile(file)
+         self.output = output
+         self._dwarfinfo = self.elffile.get_dwarf_info()
+-        arches = {"EM_386": "i386", "EM_X86_64": "x86-64"}
++        arches = {"EM_386": "i386", "EM_X86_64": "x86-64", "EM_ARM": "littlearm", "EM_AARCH64": "littleaarch64", "EM_LOONGARCH": "loongarch", "EM_RISCV": "littleriscv", "EM_MIPS": "mips"}
+         arch = arches[self.elffile['e_machine']]
+         bits = self.elffile.elfclass
+         self._emitline("%s:	file format elf%d-%s" % (filename, bits, arch))
+diff --git a/scripts/readelf.py b/scripts/readelf.py
+index 2095c91..e84d89b 100755
+--- a/scripts/readelf.py
++++ b/scripts/readelf.py
+@@ -9,6 +9,7 @@
+ #-------------------------------------------------------------------------------
+ import argparse
+ import os, sys
++import re
+ import string
+ import traceback
+ import itertools
+@@ -96,6 +97,13 @@ def _get_cu_base(cu):
+     else:
+         raise ValueError("Can't find the base IP (low_pc) for a CU")
+ 
++# Matcher for all control characters, for transforming them into "^X" form when
++# formatting symbol names for display.
++_CONTROL_CHAR_RE = re.compile(r'[\x01-\x1f]')
++
++def _format_symbol_name(s):
++    return _CONTROL_CHAR_RE.sub(lambda match: '^' + chr(0x40 + ord(match[0])), s)
++
+ class ReadElf(object):
+     """ display_* methods are used to emit output into the output stream
+     """
+@@ -244,6 +252,18 @@ class ReadElf(object):
+             if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64:
+                 description += ", mips64"
+ 
++        elif self.elffile['e_machine'] == "EM_LOONGARCH":
++            if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_SOFT_FLOAT:
++                description += ", SOFT-FLOAT"
++            if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_SINGLE_FLOAT:
++                description += ", SINGLE-FLOAT"
++            if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_DOUBLE_FLOAT:
++                description += ", DOUBLE-FLOAT"
++            if (flags & E_FLAGS.EF_LOONGARCH_OBJABI_MASK) == E_FLAGS.EF_LOONGARCH_OBJABI_V0:
++                description += ", OBJ-v0"
++            if (flags & E_FLAGS.EF_LOONGARCH_OBJABI_MASK) == E_FLAGS.EF_LOONGARCH_OBJABI_V1:
++                description += ", OBJ-v1"
++
+         return description
+ 
+     def display_program_headers(self, show_heading=True):
+@@ -468,7 +488,7 @@ class ReadElf(object):
+                     describe_symbol_shndx(self._get_symbol_shndx(symbol,
+                                                                  nsym,
+                                                                  section_index)),
+-                    symbol_name,
++                    _format_symbol_name(symbol_name),
+                     version_info))
+ 
+     def display_dynamic_tags(self):
+@@ -605,7 +625,7 @@ class ReadElf(object):
+                         self._format_hex(
+                             symbol['st_value'],
+                             fullhex=True, lead0x=False),
+-                        symbol_name))
++                        _format_symbol_name(symbol_name)))
+                     if section.is_RELA():
+                         self._emit(' %s %x' % (
+                             '+' if rel['r_addend'] >= 0 else '-',
+@@ -1403,24 +1423,20 @@ class ReadElf(object):
+ 
+             # Look at the registers the decoded table describes.
+             # We build reg_order here to match readelf's order. In particular,
+-            # registers are sorted by their number, and the register matching
+-            # ra_regnum is always listed last with a special heading.
++            # registers are sorted by their number, so that the register
++            # matching ra_regnum is usually listed last with a special heading.
++            # (LoongArch is a notable exception in that its return register's
++            # DWARF register number is not greater than other GPRs.)
+             decoded_table = entry.get_decoded()
+-            reg_order = sorted(ifilter(
+-                lambda r: r != ra_regnum,
+-                decoded_table.reg_order))
++            reg_order = sorted(decoded_table.reg_order)
+             if len(decoded_table.reg_order):
+-
+                 # Headings for the registers
+                 for regnum in reg_order:
++                    if regnum == ra_regnum:
++                        self._emit('ra      ')
++                        continue
+                     self._emit('%-6s' % describe_reg_name(regnum))
+-                self._emitline('ra      ')
+-
+-                # Now include ra_regnum in reg_order to print its values
+-                # similarly to the other registers.
+-                reg_order.append(ra_regnum)
+-            else:
+-                self._emitline()
++            self._emitline()
+ 
+             for line in decoded_table.table:
+                 self._emit(self._format_hex(
+diff --git a/test/testfiles_for_readelf/loongarch-relocs.c b/test/testfiles_for_readelf/loongarch-relocs.c
+new file mode 100644
+index 0000000..ad347d8
+--- /dev/null
++++ b/test/testfiles_for_readelf/loongarch-relocs.c
+@@ -0,0 +1,18 @@
++/* This source was compiled for LoongArch64.
++   loongarch64-unknown-linux-gnu-gcc -c -o loongarch64-relocs.o.elf loongarch-relocs.c -g
++   Upstream support for LoongArch32 is not yet mature, so it is not covered.
++*/
++
++extern struct {
++  int i, j;
++} data;
++
++extern int bar (void);
++
++int
++foo (int a)
++{
++  data.i += a;
++  data.j -= bar();
++  return 0;
++}
+diff --git a/test/testfiles_for_readelf/loongarch64-relocs.o.elf b/test/testfiles_for_readelf/loongarch64-relocs.o.elf
+new file mode 100644
+index 0000000..12fafa2
+Binary files /dev/null and b/test/testfiles_for_readelf/loongarch64-relocs.o.elf differ
diff --git a/python-pylint/PKGBUILD b/python-pylint/PKGBUILD
index a7cde4c713..1f56eacd16 100644
--- a/python-pylint/PKGBUILD
+++ b/python-pylint/PKGBUILD
@@ -4,6 +4,7 @@
 # Contributor: Felix Yan <felixonmars@archlinux.org>
 # Contributor: Stéphane Gaudreault <stephane@archlinux.org>
 # Contributor: Alexander Fehr <pizzapunk gmail com>
+export CHECKFUNC=1
 
 _pyname=pylint
 pkgname=python-$_pyname
diff --git a/python-pyopenssl/PKGBUILD b/python-pyopenssl/PKGBUILD
index d2e718b7b4..2d2a5f24b4 100644
--- a/python-pyopenssl/PKGBUILD
+++ b/python-pyopenssl/PKGBUILD
@@ -18,7 +18,7 @@ makedepends=(
   'python-setuptools'
   'python-wheel'
 )
-checkdepends=('python-pytest' 'python-pretend' 'python-flaky')
+makedepends+=('python-pytest' 'python-pretend' 'python-flaky')
 _commit='7f3e4f94701a5e19ec66e3601119dd6d62043cec'
 source=("$pkgname::git+https://github.com/pyca/pyopenssl#commit=$_commit")
 b2sums=('SKIP')
diff --git a/python-pypandoc/PKGBUILD b/python-pypandoc/PKGBUILD
index d59ec0de7a..d4963748f1 100644
--- a/python-pypandoc/PKGBUILD
+++ b/python-pypandoc/PKGBUILD
@@ -8,7 +8,7 @@ pkgdesc="Thin wrapper for pandoc"
 arch=('any')
 license=('MIT')
 url="https://github.com/JessicaTegner/pypandoc"
-depends=('pandoc')
+#depends=('pandoc')
 makedepends=('python-build' 'python-installer' 'python-poetry-core' 'python-wheel')
 checkdepends=('texlive-basic' 'texlive-latexextra' 'texlive-fontsrecommended' 'python-pandocfilters')
 source=("https://github.com/JessicaTegner/pypandoc/archive/v$pkgver/$pkgname-$pkgver.tar.gz")
diff --git a/python-rpds-py/PKGBUILD b/python-rpds-py/PKGBUILD
index ebcdf14655..eb53873db3 100644
--- a/python-rpds-py/PKGBUILD
+++ b/python-rpds-py/PKGBUILD
@@ -32,7 +32,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+#cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
 }
 
 build() {
diff --git a/python-simple-term-menu/PKGBUILD b/python-simple-term-menu/PKGBUILD
index 1b7a86f9f2..3c47e45fba 100644
--- a/python-simple-term-menu/PKGBUILD
+++ b/python-simple-term-menu/PKGBUILD
@@ -22,13 +22,21 @@ makedepends=(
 checkdepends=()
 optdepends=()
 source=("https://pypi.python.org/packages/source/s/simple-term-menu/simple-term-menu-${pkgver}.tar.gz")
+source+=(cjk-preview.patch)
 sha512sums=('7787c557467c0e8a44fdf61cc094de1e1171c2f9fc82f0607128ca9ad14ce95037b2f4d1cd7e258480415f2704256af36421d389c5c0ca5280d01d5cc675f05a')
 b2sums=('6d4e498ee342fe4634944f0281b45ef7af7914eb8b3ceab7e7291b451e26757100c0002f9959150445eb752ca8b5e44ee00436d75ea326bafa2767894dbf7ead')
+sha512sums+=('0d7d6366a01a4264f0d4061835998beb9d4fd1cd5bb0ca7c59230c3876def7192210984d208583df5b1ad28df22c27836066886c8926e6e28bb98f082127b4fb')
+b2sums+=('027d66daec14cb96081cfa840e5da9d8c4394f0767f9e759fe6596d024aa0e61c234a8e467d6d8b27e66fa54a8c7d7237ea19adeb3f5ce7ca0f83816d78cd463')
 
 pkgver() {
   echo $pkgver
 }
 
+prepare() {
+  cd "simple-term-menu-$pkgver"
+  patch -p1 -i $srcdir/cjk-preview.patch
+}
+
 build() {
   cd "simple-term-menu-$pkgver"
 
diff --git a/python-simple-term-menu/cjk-preview.patch b/python-simple-term-menu/cjk-preview.patch
new file mode 100644
index 0000000000..fc9f9ed446
--- /dev/null
+++ b/python-simple-term-menu/cjk-preview.patch
@@ -0,0 +1,11 @@
+--- a/simple_term_menu.py	2023-07-19 22:41:08.000000000 +0800
++++ b/tmp/simple_term_menu.py	2023-08-15 14:44:22.410664526 +0800
+@@ -1270,7 +1270,7 @@
+                             BoxDrawingCharacters.upper_left
+                             + (2 * BoxDrawingCharacters.horizontal + " " + self._preview_title)[: num_cols - 3]
+                             + " "
+-                            + (num_cols - len(self._preview_title) - 6) * BoxDrawingCharacters.horizontal
++                            + (num_cols - wcswidth(self._preview_title) - 6) * BoxDrawingCharacters.horizontal
+                             + BoxDrawingCharacters.upper_right
+                         )[:num_cols]
+                         + "\n"
diff --git a/python-stone/PKGBUILD b/python-stone/PKGBUILD
index cffd33923e..294edab362 100644
--- a/python-stone/PKGBUILD
+++ b/python-stone/PKGBUILD
@@ -1,4 +1,5 @@
 # Maintainer:
+export CHECKFUNC=1
 
 pkgname=python-stone
 _name=${pkgname#python-}
diff --git a/python-urllib3/PKGBUILD b/python-urllib3/PKGBUILD
index dd227edefe..d8714ee314 100644
--- a/python-urllib3/PKGBUILD
+++ b/python-urllib3/PKGBUILD
@@ -20,7 +20,7 @@ makedepends=(
   'python-sphinx-furo'
   'python-wheel'
 )
-checkdepends=(
+makedepends+=(
   'python-brotli'
   'python-certifi'
   'python-cryptography'
diff --git a/python-virtualenv/PKGBUILD b/python-virtualenv/PKGBUILD
index c4980e25f0..b22484ca8e 100644
--- a/python-virtualenv/PKGBUILD
+++ b/python-virtualenv/PKGBUILD
@@ -47,11 +47,11 @@ _commit='1941c1d5abf81814992b68bbc86c0020dc75a3ad'
 source=("$pkgname::git+https://github.com/pypa/virtualenv#commit=$_commit")
 b2sums=('SKIP')
 
-pkgver() {
-  cd "$pkgname"
-
-  git describe --tags | sed 's/^v//'
-}
+#pkgver() {
+#  cd "$pkgname"
+#
+#  git describe --tags | sed 's/^v//'
+#}
 
 build() {
   cd "$pkgname"
diff --git a/python-wstools/PKGBUILD b/python-wstools/PKGBUILD
index f3dc7115ad..4c00e593ce 100644
--- a/python-wstools/PKGBUILD
+++ b/python-wstools/PKGBUILD
@@ -8,7 +8,7 @@ arch=('any')
 url="https://github.com/pycontribs/wstools"
 license=('custom')
 depends=('python-six')
-makedepends=('python-pbr' 'python-setuptools')
+makedepends=('python-pbr' 'python-setuptools' 'python-pip')
 checkdepends=('python-pytest-runner' 'autopep8' 'python-pytest-cov')
 source=("https://github.com/pycontribs/wstools/archive/$pkgver/$pkgname-$pkgver.tar.gz"
         python310.patch)
diff --git a/qd/PKGBUILD b/qd/PKGBUILD
index cecefbd850..e03c7ce5e7 100644
--- a/qd/PKGBUILD
+++ b/qd/PKGBUILD
@@ -13,7 +13,7 @@ depends=(gcc-libs
          glibc)
 makedepends=(gcc-fortran)
 source=(https://crd-legacy.lbl.gov/~dhbailey/mpdist/$pkgname-$pkgver.tar.gz)
-sha256sums=('ad6738e8330928308e10346ff7fd357ed17386408f8fb7a23704cd6f5d52a6c8')
+sha256sums=('a47b6c73f86e6421e86a883568dd08e299b20e36c11a99bdfbe50e01bde60e38')
 
 build() {
   cd $pkgname-$pkgver
diff --git a/qemu/PKGBUILD b/qemu/PKGBUILD
index 60f53217db..60445c5e41 100644
--- a/qemu/PKGBUILD
+++ b/qemu/PKGBUILD
@@ -127,6 +127,8 @@ source=(
   $pkgbase-8.1.1-static_regression.patch
   $pkgbase-8.2.0-virtio-gpu_redrawing.patch::https://gitlab.com/qemu-project/qemu/-/commit/9d5b42beb6978dc6219d5dc029c9d453c6b8d503.patch
   $pkgbase-8.2.0-fix_tcg.patch
+  qemu-kvm-la64.patch
+  qemu-4k-pagesize.patch
 )
 sha512sums=('92ec41196ff145cdbb98948f6b6e43214fa4b4419554a8a1927fb4527080c8212ccb703e184baf8ee0bdfa50ad7a84689e8f5a69eba1bd7bbbdfd69e3b91256c'
             'SKIP'
@@ -137,7 +139,9 @@ sha512sums=('92ec41196ff145cdbb98948f6b6e43214fa4b4419554a8a1927fb4527080c8212cc
             '93b905046fcea8a0a89513b9259c222494ab3b91319dde23baebcb40dc17376a56661b159b99785d6e816831974a0f3cbd7b2f7d89e5fc3c258f88f4492f3839'
             'c7d086a951e9a378434ea95a843a4b01f0eb2ae430135a81365147cf6806a7ba1b49014a3aa66904970853ba84a4a28dbaded7bccb99a0bc3730572c80fb8b12'
             '3f052f87406c47849def7e21900cd9773ed061658e6f568b5918157650e0803c6b9c3bbfec69b577202cc2ce224d1d0339b615e419112f2ac351e44cd9f33539'
-            'eb6a9c9ba7143b1ff937aeff06b07d6b4b718d92e6623743ecf7f1e50f01d259c2d8f7543f526948a14a865d4478b6dd3dfac56e17f5b27d78fda3380767615e')
+            'eb6a9c9ba7143b1ff937aeff06b07d6b4b718d92e6623743ecf7f1e50f01d259c2d8f7543f526948a14a865d4478b6dd3dfac56e17f5b27d78fda3380767615e'
+            '45ec57f314f12ea1dd2e0989ff78b98452446ff040d3806a09df40ab21a4dcc0515a5e28b2294941e802b2bfd57b1c6408db6179a01d1b0331a54b1d7d459872'
+            'e0b1b41c643e437548b2dadcd37697143e1424b0f5912347dd6cb3fbcb11a65c6f69a5206d9df600728ebf9bf3c12ec7aaa4280459a252f4488ada42c49336a7')
 b2sums=('a63667042e1e19c635568072d8dcc117320117e81e374a93cfb79e2363ebf505df3217fb098638e53c899eb6f83435221e8031f2aae003c27ec25af8654683b3'
         'SKIP'
         'b1eca364aa60f130ff5e649f5d004d3fcb75356d3421a4542efdfc410d39b40d9434d15e1dd7bbdbd315cb72b5290d3ea5f77f9c41961a5601cd28ef7bbe72e8'
@@ -147,7 +151,9 @@ b2sums=('a63667042e1e19c635568072d8dcc117320117e81e374a93cfb79e2363ebf505df3217f
         'a9a2bdfeeb44eb86cbe88ac7c65f72800bdb2fd5cecb02f3a258cf9470b52832180aab43c89d481f7fd4d067342a9a27dd6c8a94d625b95d6e2b912e47d274e7'
         '209ec05e161d157aaa08a9fcbea45cf87aa22fe9360f9b3c477a78a274e4ecee989c16121f9e6b7765bb479c9db718c98db047c27fd426c127c4c95e28877a16'
         '0d5ea661bfc2afe0bb68dce7504f872a2d30a2f46e2463bc7bfb4a0d63f01b3090c42780a221cca00a64b0e5c9a6970d8ba444c6d182d6f4867541da9a993512'
-        '5dcd51530db4253a3787d49be34988fbe2d240e50f8771123b853bf3413d26fd3a442580f89c009f15dfc48521279bd2302be1eaa329726330c0cf964e3ea6b2')
+        '5dcd51530db4253a3787d49be34988fbe2d240e50f8771123b853bf3413d26fd3a442580f89c009f15dfc48521279bd2302be1eaa329726330c0cf964e3ea6b2'
+        'b109cfd804e316115c657ef0ca00da5d0e8f957ea0cbebd4c8ba11790bf941319ea0e601b81495ed2ebfd2ae9b5fdbb3b2795ca48db7392c7335c71c4a88f652'
+        'c52f9320dcd59e6f21070a3d13255572ae8bf2e4bc13d989adf8e344413e4bea3d089f5d0306e7fa8050999eb81680638b3d4feef9e7aa29a0132830784e39ef')
 validpgpkeys=('CEACC9E15534EBABB82D3FA03353C9CEF108B584') # Michael Roth <flukshun@gmail.com>
 
 _qemu_system_deps=(
@@ -271,6 +277,8 @@ _pick() {
 prepare() {
   # fix crash with static binaries: https://gitlab.com/qemu-project/qemu/-/issues/1913
   patch -Np1 -d $pkgbase-$pkgver -i ../$pkgbase-8.1.1-static_regression.patch
+  patch -p1 -d $pkgbase-$pkgver -i ../qemu-kvm-la64.patch
+  patch -p1 -d $pkgbase-$pkgver -i ../qemu-4k-pagesize.patch
 
   # fix virtio-gpu redrawingi issues: https://gitlab.com/qemu-project/qemu/-/issues/2051
   patch -Np1 -d $pkgbase-$pkgver -i ../$pkgbase-8.2.0-virtio-gpu_redrawing.patch
@@ -305,6 +313,7 @@ build() {
     --enable-sdl
     --enable-slirp
     --enable-tpm
+    --enable-kvm
     --smbd=/usr/bin/smbd
     --with-coroutine=ucontext
   )
@@ -822,7 +831,7 @@ package_qemu-system-hppa-firmware() {
 
 package_qemu-system-loongarch64() {
   pkgdesc="QEMU system emulator for LoongArch64"
-  depends=("${_qemu_system_deps[@]}" systemd-libs)
+  depends=("${_qemu_system_deps[@]}" edk2-loongarch64 systemd-libs)
   mv -v $pkgname/* "$pkgdir"
 }
 
diff --git a/qemu/qemu-4k-pagesize.patch b/qemu/qemu-4k-pagesize.patch
new file mode 100644
index 0000000000..eb6d8f4bd4
--- /dev/null
+++ b/qemu/qemu-4k-pagesize.patch
@@ -0,0 +1,58 @@
+commit 281001f601e289d7c807de73ad0e70761938977b
+Author: Song Gao <gaosong@loongson.cn>
+Date:   Mon Oct 23 10:40:59 2023 +0800
+
+    target/loongarch: Support 4K page size
+    
+    The LoongArch kernel supports 4K page size.
+    Change TARGET_PAGE_BITS to 12.
+    
+    Signed-off-by: Song Gao <gaosong@loongson.cn>
+    Message-Id: <20231023024059.3858349-1-gaosong@loongson.cn>
+
+diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h
+index 1265dc7cb5..cfe195db4e 100644
+--- a/target/loongarch/cpu-param.h
++++ b/target/loongarch/cpu-param.h
+@@ -12,6 +12,6 @@
+ #define TARGET_PHYS_ADDR_SPACE_BITS 48
+ #define TARGET_VIRT_ADDR_SPACE_BITS 48
+ 
+-#define TARGET_PAGE_BITS 14
++#define TARGET_PAGE_BITS 12
+ 
+ #endif
+diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c
+index 6e00190547..903bb76bda 100644
+--- a/target/loongarch/tlb_helper.c
++++ b/target/loongarch/tlb_helper.c
+@@ -53,6 +53,9 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
+     tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY, NR);
+     tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY, RPLV);
+ 
++    /* Remove sw bit between bit12 -- bit PS*/
++    tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1));
++
+     /* Check access rights */
+     if (!tlb_v) {
+         return TLBRET_INVALID;
+@@ -75,10 +78,6 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
+         return TLBRET_DIRTY;
+     }
+ 
+-    /*
+-     * tlb_entry contains ppn[47:12] while 16KiB ppn is [47:15]
+-     * need adjust.
+-     */
+     *physical = (tlb_ppn << R_TLBENTRY_PPN_SHIFT) |
+                 (address & MAKE_64BIT_MASK(0, tlb_ps));
+     *prot = PAGE_READ;
+@@ -734,7 +733,7 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd,
+         /* Move Global bit */
+         tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT))  >>
+                 LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT |
+-                (tmp0 & (~(1 << R_TLBENTRY_G_SHIFT)));
++                (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT)));
+         ps = ptbase + ptwidth - 1;
+         if (odd) {
+             tmp0 += MAKE_64BIT_MASK(ps, 1);
diff --git a/qemu/qemu-kvm-la64.patch b/qemu/qemu-kvm-la64.patch
new file mode 100644
index 0000000000..6340a698b1
--- /dev/null
+++ b/qemu/qemu-kvm-la64.patch
@@ -0,0 +1,1680 @@
+diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
+index 35c131a107..6b9793842c 100644
+--- a/include/standard-headers/linux/fuse.h
++++ b/include/standard-headers/linux/fuse.h
+@@ -206,6 +206,11 @@
+  *  - add extension header
+  *  - add FUSE_EXT_GROUPS
+  *  - add FUSE_CREATE_SUPP_GROUP
++ *  - add FUSE_HAS_EXPIRE_ONLY
++ *
++ *  7.39
++ *  - add FUSE_DIRECT_IO_RELAX
++ *  - add FUSE_STATX and related structures
+  */
+ 
+ #ifndef _LINUX_FUSE_H
+@@ -237,7 +242,7 @@
+ #define FUSE_KERNEL_VERSION 7
+ 
+ /** Minor version number of this interface */
+-#define FUSE_KERNEL_MINOR_VERSION 38
++#define FUSE_KERNEL_MINOR_VERSION 39
+ 
+ /** The node ID of the root inode */
+ #define FUSE_ROOT_ID 1
+@@ -264,6 +269,40 @@ struct fuse_attr {
+ 	uint32_t	flags;
+ };
+ 
++/*
++ * The following structures are bit-for-bit compatible with the statx(2) ABI in
++ * Linux.
++ */
++struct fuse_sx_time {
++	int64_t		tv_sec;
++	uint32_t	tv_nsec;
++	int32_t		__reserved;
++};
++
++struct fuse_statx {
++	uint32_t	mask;
++	uint32_t	blksize;
++	uint64_t	attributes;
++	uint32_t	nlink;
++	uint32_t	uid;
++	uint32_t	gid;
++	uint16_t	mode;
++	uint16_t	__spare0[1];
++	uint64_t	ino;
++	uint64_t	size;
++	uint64_t	blocks;
++	uint64_t	attributes_mask;
++	struct fuse_sx_time	atime;
++	struct fuse_sx_time	btime;
++	struct fuse_sx_time	ctime;
++	struct fuse_sx_time	mtime;
++	uint32_t	rdev_major;
++	uint32_t	rdev_minor;
++	uint32_t	dev_major;
++	uint32_t	dev_minor;
++	uint64_t	__spare2[14];
++};
++
+ struct fuse_kstatfs {
+ 	uint64_t	blocks;
+ 	uint64_t	bfree;
+@@ -365,6 +404,9 @@ struct fuse_file_lock {
+  * FUSE_HAS_INODE_DAX:  use per inode DAX
+  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
+  *			symlink and mknod (single group that matches parent)
++ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
++ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
++ *                       allow shared mmap
+  */
+ #define FUSE_ASYNC_READ		(1 << 0)
+ #define FUSE_POSIX_LOCKS	(1 << 1)
+@@ -402,6 +444,8 @@ struct fuse_file_lock {
+ #define FUSE_SECURITY_CTX	(1ULL << 32)
+ #define FUSE_HAS_INODE_DAX	(1ULL << 33)
+ #define FUSE_CREATE_SUPP_GROUP	(1ULL << 34)
++#define FUSE_HAS_EXPIRE_ONLY	(1ULL << 35)
++#define FUSE_DIRECT_IO_RELAX	(1ULL << 36)
+ 
+ /**
+  * CUSE INIT request/reply flags
+@@ -568,6 +612,7 @@ enum fuse_opcode {
+ 	FUSE_REMOVEMAPPING	= 49,
+ 	FUSE_SYNCFS		= 50,
+ 	FUSE_TMPFILE		= 51,
++	FUSE_STATX		= 52,
+ 
+ 	/* CUSE specific operations */
+ 	CUSE_INIT		= 4096,
+@@ -632,6 +677,22 @@ struct fuse_attr_out {
+ 	struct fuse_attr attr;
+ };
+ 
++struct fuse_statx_in {
++	uint32_t	getattr_flags;
++	uint32_t	reserved;
++	uint64_t	fh;
++	uint32_t	sx_flags;
++	uint32_t	sx_mask;
++};
++
++struct fuse_statx_out {
++	uint64_t	attr_valid;	/* Cache timeout for the attributes */
++	uint32_t	attr_valid_nsec;
++	uint32_t	flags;
++	uint64_t	spare[2];
++	struct fuse_statx stat;
++};
++
+ #define FUSE_COMPAT_MKNOD_IN_SIZE 8
+ 
+ struct fuse_mknod_in {
+diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h
+index 6691a3ce24..5ad07e134a 100644
+--- a/include/standard-headers/linux/vhost_types.h
++++ b/include/standard-headers/linux/vhost_types.h
+@@ -181,5 +181,9 @@ struct vhost_vdpa_iova_range {
+ #define VHOST_BACKEND_F_SUSPEND  0x4
+ /* Device can be resumed */
+ #define VHOST_BACKEND_F_RESUME  0x5
++/* Device supports the driver enabling virtqueues both before and after
++ * DRIVER_OK
++ */
++#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK  0x6
+ 
+ #endif
+diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
+index 2325485f2c..0f88417742 100644
+--- a/include/standard-headers/linux/virtio_net.h
++++ b/include/standard-headers/linux/virtio_net.h
+@@ -56,6 +56,7 @@
+ #define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
+ 					 * Steering */
+ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
++#define VIRTIO_NET_F_VQ_NOTF_COAL 52	/* Device supports virtqueue notification coalescing */
+ #define VIRTIO_NET_F_NOTF_COAL	53	/* Device supports notifications coalescing */
+ #define VIRTIO_NET_F_GUEST_USO4	54	/* Guest can handle USOv4 in. */
+ #define VIRTIO_NET_F_GUEST_USO6	55	/* Guest can handle USOv6 in. */
+@@ -391,5 +392,18 @@ struct virtio_net_ctrl_coal_rx {
+ };
+ 
+ #define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET		1
++#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET		2
++#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET		3
++
++struct virtio_net_ctrl_coal {
++	uint32_t max_packets;
++	uint32_t max_usecs;
++};
++
++struct  virtio_net_ctrl_coal_vq {
++	uint16_t vqn;
++	uint16_t reserved;
++	struct virtio_net_ctrl_coal coal;
++};
+ 
+ #endif /* _LINUX_VIRTIO_NET_H */
+diff --git a/linux-headers/asm-arm64/bitsperlong.h b/linux-headers/asm-arm64/bitsperlong.h
+index 6dc0bb0c13..485d60bee2 100644
+--- a/linux-headers/asm-arm64/bitsperlong.h
++++ b/linux-headers/asm-arm64/bitsperlong.h
+@@ -1 +1,24 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++/*
++ * Copyright (C) 2012 ARM Ltd.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
++ */
++#ifndef __ASM_BITSPERLONG_H
++#define __ASM_BITSPERLONG_H
++
++#define __BITS_PER_LONG 64
++
+ #include <asm-generic/bitsperlong.h>
++
++#endif	/* __ASM_BITSPERLONG_H */
+diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
+index fd6c1cb585..abe087c53b 100644
+--- a/linux-headers/asm-generic/unistd.h
++++ b/linux-headers/asm-generic/unistd.h
+@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+ #define __NR_cachestat 451
+ __SYSCALL(__NR_cachestat, sys_cachestat)
+ 
++#define __NR_fchmodat2 452
++__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
++
+ #undef __NR_syscalls
+-#define __NR_syscalls 452
++#define __NR_syscalls 453
+ 
+ /*
+  * 32 bit systems traditionally used different
+diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h
+new file mode 100644
+index 0000000000..5e72b83372
+--- /dev/null
++++ b/linux-headers/asm-loongarch/kvm.h
+@@ -0,0 +1,100 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++/*
++ * Copyright (C) 2023 Loongson Technology Corporation Limited
++ */
++
++#ifndef __UAPI_ASM_LOONGARCH_KVM_H
++#define __UAPI_ASM_LOONGARCH_KVM_H
++
++#include <linux/types.h>
++
++/*
++ * KVM Loongarch specific structures and definitions.
++ */
++
++#define __KVM_HAVE_READONLY_MEM
++
++#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
++
++/*
++ * for KVM_GET_REGS and KVM_SET_REGS
++ */
++struct kvm_regs {
++	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
++	__u64 gpr[32];
++	__u64 pc;
++};
++
++/*
++ * for KVM_GET_FPU and KVM_SET_FPU
++ */
++struct kvm_fpu {
++	__u32 fcsr;
++	__u32 none;
++	__u64 fcc;    /* 8x8 */
++	struct kvm_fpureg {
++		__u64 val64[4];
++	} fpr[32];
++};
++
++/*
++ * For LoongArch, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
++ * registers.  The id field is broken down as follows:
++ *
++ *  bits[63..52] - As per linux/kvm.h
++ *  bits[51..32] - Must be zero.
++ *  bits[31..16] - Register set.
++ *
++ * Register set = 0: GP registers from kvm_regs (see definitions below).
++ *
++ * Register set = 1: CSR registers.
++ *
++ * Register set = 2: KVM specific registers (see definitions below).
++ *
++ * Register set = 3: FPU / SIMD registers (see definitions below).
++ *
++ * Other sets registers may be added in the future.  Each set would
++ * have its own identifier in bits[31..16].
++ */
++
++#define KVM_REG_LOONGARCH_GP		(KVM_REG_LOONGARCH | 0x00000ULL)
++#define KVM_REG_LOONGARCH_CSR		(KVM_REG_LOONGARCH | 0x10000ULL)
++#define KVM_REG_LOONGARCH_KVM		(KVM_REG_LOONGARCH | 0x20000ULL)
++#define KVM_REG_LOONGARCH_FPU		(KVM_REG_LOONGARCH | 0x30000ULL)
++#define KVM_REG_LOONGARCH_CPUCFG	(KVM_REG_LOONGARCH | 0x40000ULL)
++#define KVM_REG_LOONGARCH_MASK		(KVM_REG_LOONGARCH | 0x70000ULL)
++#define KVM_CSR_IDX_MASK		0x7fff
++#define KVM_CPUCFG_IDX_MASK		0x7fff
++
++/*
++ * KVM_REG_LOONGARCH_KVM - KVM specific control registers.
++ */
++
++#define KVM_REG_LOONGARCH_COUNTER	(KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
++#define KVM_REG_LOONGARCH_VCPU_RESET	(KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 4)
++
++#define LOONGARCH_REG_SHIFT		3
++#define LOONGARCH_REG_64(TYPE, REG)	(TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
++#define KVM_IOC_CSRID(REG)		LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
++#define KVM_IOC_CPUCFG(REG)		LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
++
++struct kvm_debug_exit_arch {
++};
++
++/* for KVM_SET_GUEST_DEBUG */
++struct kvm_guest_debug_arch {
++};
++
++/* definition of registers in kvm_run */
++struct kvm_sync_regs {
++};
++
++/* dummy definition */
++struct kvm_sregs {
++};
++
++#define KVM_NR_IRQCHIPS		1
++#define KVM_IRQCHIP_NUM_PINS	64
++#define KVM_MAX_CORES		256
++
++#endif /* __UAPI_ASM_LOONGARCH_KVM_H */
+diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
+index 8233f061c4..46d8500654 100644
+--- a/linux-headers/asm-mips/unistd_n32.h
++++ b/linux-headers/asm-mips/unistd_n32.h
+@@ -380,5 +380,6 @@
+ #define __NR_futex_waitv (__NR_Linux + 449)
+ #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
+ #define __NR_cachestat (__NR_Linux + 451)
++#define __NR_fchmodat2 (__NR_Linux + 452)
+ 
+ #endif /* _ASM_UNISTD_N32_H */
+diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
+index a174edc768..c2f7ac673b 100644
+--- a/linux-headers/asm-mips/unistd_n64.h
++++ b/linux-headers/asm-mips/unistd_n64.h
+@@ -356,5 +356,6 @@
+ #define __NR_futex_waitv (__NR_Linux + 449)
+ #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
+ #define __NR_cachestat (__NR_Linux + 451)
++#define __NR_fchmodat2 (__NR_Linux + 452)
+ 
+ #endif /* _ASM_UNISTD_N64_H */
+diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
+index c1a5351d9b..757c68f2ad 100644
+--- a/linux-headers/asm-mips/unistd_o32.h
++++ b/linux-headers/asm-mips/unistd_o32.h
+@@ -426,5 +426,6 @@
+ #define __NR_futex_waitv (__NR_Linux + 449)
+ #define __NR_set_mempolicy_home_node (__NR_Linux + 450)
+ #define __NR_cachestat (__NR_Linux + 451)
++#define __NR_fchmodat2 (__NR_Linux + 452)
+ 
+ #endif /* _ASM_UNISTD_O32_H */
+diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
+index 8206758691..8ef94bbac1 100644
+--- a/linux-headers/asm-powerpc/unistd_32.h
++++ b/linux-headers/asm-powerpc/unistd_32.h
+@@ -433,6 +433,7 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
+ 
+ 
+ #endif /* _ASM_UNISTD_32_H */
+diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
+index 7be98c15f0..0e7ee43e88 100644
+--- a/linux-headers/asm-powerpc/unistd_64.h
++++ b/linux-headers/asm-powerpc/unistd_64.h
+@@ -405,6 +405,7 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
+ 
+ 
+ #endif /* _ASM_UNISTD_64_H */
+diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h
+index 6dc0bb0c13..cc5c45a9ce 100644
+--- a/linux-headers/asm-riscv/bitsperlong.h
++++ b/linux-headers/asm-riscv/bitsperlong.h
+@@ -1 +1,14 @@
++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
++/*
++ * Copyright (C) 2012 ARM Ltd.
++ * Copyright (C) 2015 Regents of the University of California
++ */
++
++#ifndef _ASM_RISCV_BITSPERLONG_H
++#define _ASM_RISCV_BITSPERLONG_H
++
++#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
++
+ #include <asm-generic/bitsperlong.h>
++
++#endif /* _ASM_RISCV_BITSPERLONG_H */
+diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
+index 930fdc4101..992c5e4071 100644
+--- a/linux-headers/asm-riscv/kvm.h
++++ b/linux-headers/asm-riscv/kvm.h
+@@ -55,6 +55,7 @@ struct kvm_riscv_config {
+ 	unsigned long marchid;
+ 	unsigned long mimpid;
+ 	unsigned long zicboz_block_size;
++	unsigned long satp_mode;
+ };
+ 
+ /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
+ 	KVM_RISCV_ISA_EXT_SSAIA,
+ 	KVM_RISCV_ISA_EXT_V,
+ 	KVM_RISCV_ISA_EXT_SVNAPOT,
++	KVM_RISCV_ISA_EXT_ZBA,
++	KVM_RISCV_ISA_EXT_ZBS,
++	KVM_RISCV_ISA_EXT_ZICNTR,
++	KVM_RISCV_ISA_EXT_ZICSR,
++	KVM_RISCV_ISA_EXT_ZIFENCEI,
++	KVM_RISCV_ISA_EXT_ZIHPM,
+ 	KVM_RISCV_ISA_EXT_MAX,
+ };
+ 
+@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
+ 
+ /* ISA Extension registers are mapped as type 7 */
+ #define KVM_REG_RISCV_ISA_EXT		(0x07 << KVM_REG_RISCV_TYPE_SHIFT)
++#define KVM_REG_RISCV_ISA_SINGLE	(0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
++#define KVM_REG_RISCV_ISA_MULTI_EN	(0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
++#define KVM_REG_RISCV_ISA_MULTI_DIS	(0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
++#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id)	\
++		((__ext_id) / __BITS_PER_LONG)
++#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id)	\
++		(1UL << ((__ext_id) % __BITS_PER_LONG))
++#define KVM_REG_RISCV_ISA_MULTI_REG_LAST	\
++		KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
+ 
+ /* SBI extension registers are mapped as type 8 */
+ #define KVM_REG_RISCV_SBI_EXT		(0x08 << KVM_REG_RISCV_TYPE_SHIFT)
+diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
+index e2afd95420..023a2763a9 100644
+--- a/linux-headers/asm-s390/kvm.h
++++ b/linux-headers/asm-s390/kvm.h
+@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
+ 	__u8 reserved[1728];
+ };
+ 
++#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
++#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
++
++#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
++struct kvm_s390_vm_cpu_uv_feat {
++	union {
++		struct {
++			__u64 : 4;
++			__u64 ap : 1;		/* bit 4 */
++			__u64 ap_intr : 1;	/* bit 5 */
++			__u64 : 58;
++		};
++		__u64 feat;
++	};
++};
++
+ /* kvm attributes for crypto */
+ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
+ #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
+diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
+index ef772cc5f8..716fa368ca 100644
+--- a/linux-headers/asm-s390/unistd_32.h
++++ b/linux-headers/asm-s390/unistd_32.h
+@@ -424,5 +424,6 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
+ 
+ #endif /* _ASM_S390_UNISTD_32_H */
+diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
+index 32354a0459..b2a11b1d13 100644
+--- a/linux-headers/asm-s390/unistd_64.h
++++ b/linux-headers/asm-s390/unistd_64.h
+@@ -372,5 +372,6 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
+ 
+ #endif /* _ASM_S390_UNISTD_64_H */
+diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h
+index 775dbd3aff..46cdc941f9 100644
+--- a/linux-headers/asm-x86/mman.h
++++ b/linux-headers/asm-x86/mman.h
+@@ -3,14 +3,10 @@
+ #define _ASM_X86_MMAN_H
+ 
+ #define MAP_32BIT	0x40		/* only give out 32bit addresses */
++#define MAP_ABOVE4G	0x80		/* only map above 4GB */
+ 
+-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+-#define arch_calc_vm_prot_bits(prot, key) (		\
+-		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
+-		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
+-		((key) & 0x4 ? VM_PKEY_BIT2 : 0) |      \
+-		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+-#endif
++/* Flags for map_shadow_stack(2) */
++#define SHADOW_STACK_SET_TOKEN	(1ULL << 0)	/* Set up a restore token in the shadow stack */
+ 
+ #include <asm-generic/mman.h>
+ 
+diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
+index 37b32d8139..d749ad1c24 100644
+--- a/linux-headers/asm-x86/unistd_32.h
++++ b/linux-headers/asm-x86/unistd_32.h
+@@ -442,6 +442,7 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
+ 
+ 
+ #endif /* _ASM_UNISTD_32_H */
+diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
+index 5b55d6729a..cea67282eb 100644
+--- a/linux-headers/asm-x86/unistd_64.h
++++ b/linux-headers/asm-x86/unistd_64.h
+@@ -364,6 +364,8 @@
+ #define __NR_futex_waitv 449
+ #define __NR_set_mempolicy_home_node 450
+ #define __NR_cachestat 451
++#define __NR_fchmodat2 452
++#define __NR_map_shadow_stack 453
+ 
+ 
+ #endif /* _ASM_UNISTD_64_H */
+diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
+index e8a007543d..5b2e79bf4c 100644
+--- a/linux-headers/asm-x86/unistd_x32.h
++++ b/linux-headers/asm-x86/unistd_x32.h
+@@ -317,6 +317,7 @@
+ #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449)
+ #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450)
+ #define __NR_cachestat (__X32_SYSCALL_BIT + 451)
++#define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452)
+ #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
+ #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
+ #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
+diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
+index 1f3f3333a4..0e378bbcbf 100644
+--- a/linux-headers/linux/kvm.h
++++ b/linux-headers/linux/kvm.h
+@@ -264,6 +264,7 @@ struct kvm_xen_exit {
+ #define KVM_EXIT_RISCV_SBI        35
+ #define KVM_EXIT_RISCV_CSR        36
+ #define KVM_EXIT_NOTIFY           37
++#define KVM_EXIT_LOONGARCH_IOCSR  38
+ 
+ /* For KVM_EXIT_INTERNAL_ERROR */
+ /* Emulate instruction failed. */
+@@ -336,6 +337,13 @@ struct kvm_run {
+ 			__u32 len;
+ 			__u8  is_write;
+ 		} mmio;
++		/* KVM_EXIT_LOONGARCH_IOCSR */
++		struct {
++			__u64 phys_addr;
++			__u8  data[8];
++			__u32 len;
++			__u8  is_write;
++		} iocsr_io;
+ 		/* KVM_EXIT_HYPERCALL */
+ 		struct {
+ 			__u64 nr;
+@@ -1358,6 +1366,7 @@ struct kvm_dirty_tlb {
+ #define KVM_REG_ARM64		0x6000000000000000ULL
+ #define KVM_REG_MIPS		0x7000000000000000ULL
+ #define KVM_REG_RISCV		0x8000000000000000ULL
++#define KVM_REG_LOONGARCH	0x9000000000000000ULL
+ 
+ #define KVM_REG_SIZE_SHIFT	52
+ #define KVM_REG_SIZE_MASK	0x00f0000000000000ULL
+@@ -1414,9 +1423,16 @@ struct kvm_device_attr {
+ 	__u64	addr;		/* userspace address of attr data */
+ };
+ 
+-#define  KVM_DEV_VFIO_GROUP			1
+-#define   KVM_DEV_VFIO_GROUP_ADD			1
+-#define   KVM_DEV_VFIO_GROUP_DEL			2
++#define  KVM_DEV_VFIO_FILE			1
++
++#define   KVM_DEV_VFIO_FILE_ADD			1
++#define   KVM_DEV_VFIO_FILE_DEL			2
++
++/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
++#define  KVM_DEV_VFIO_GROUP	KVM_DEV_VFIO_FILE
++
++#define   KVM_DEV_VFIO_GROUP_ADD	KVM_DEV_VFIO_FILE_ADD
++#define   KVM_DEV_VFIO_GROUP_DEL	KVM_DEV_VFIO_FILE_DEL
+ #define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE		3
+ 
+ enum kvm_device_type {
+diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h
+index bb6ea517ef..9bb07083ac 100644
+--- a/linux-headers/linux/stddef.h
++++ b/linux-headers/linux/stddef.h
+@@ -45,3 +45,7 @@
+ 		TYPE NAME[]; \
+ 	}
+ #endif
++
++#ifndef __counted_by
++#define __counted_by(m)
++#endif
+diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
+index 14e402263a..59978fbaae 100644
+--- a/linux-headers/linux/userfaultfd.h
++++ b/linux-headers/linux/userfaultfd.h
+@@ -39,7 +39,8 @@
+ 			   UFFD_FEATURE_MINOR_SHMEM |		\
+ 			   UFFD_FEATURE_EXACT_ADDRESS |		\
+ 			   UFFD_FEATURE_WP_HUGETLBFS_SHMEM |	\
+-			   UFFD_FEATURE_WP_UNPOPULATED)
++			   UFFD_FEATURE_WP_UNPOPULATED |	\
++			   UFFD_FEATURE_POISON)
+ #define UFFD_API_IOCTLS				\
+ 	((__u64)1 << _UFFDIO_REGISTER |		\
+ 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
+@@ -49,12 +50,14 @@
+ 	 (__u64)1 << _UFFDIO_COPY |		\
+ 	 (__u64)1 << _UFFDIO_ZEROPAGE |		\
+ 	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
+-	 (__u64)1 << _UFFDIO_CONTINUE)
++	 (__u64)1 << _UFFDIO_CONTINUE |		\
++	 (__u64)1 << _UFFDIO_POISON)
+ #define UFFD_API_RANGE_IOCTLS_BASIC		\
+ 	((__u64)1 << _UFFDIO_WAKE |		\
+ 	 (__u64)1 << _UFFDIO_COPY |		\
++	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
+ 	 (__u64)1 << _UFFDIO_CONTINUE |		\
+-	 (__u64)1 << _UFFDIO_WRITEPROTECT)
++	 (__u64)1 << _UFFDIO_POISON)
+ 
+ /*
+  * Valid ioctl command number range with this API is from 0x00 to
+@@ -71,6 +74,7 @@
+ #define _UFFDIO_ZEROPAGE		(0x04)
+ #define _UFFDIO_WRITEPROTECT		(0x06)
+ #define _UFFDIO_CONTINUE		(0x07)
++#define _UFFDIO_POISON			(0x08)
+ #define _UFFDIO_API			(0x3F)
+ 
+ /* userfaultfd ioctl ids */
+@@ -91,6 +95,8 @@
+ 				      struct uffdio_writeprotect)
+ #define UFFDIO_CONTINUE		_IOWR(UFFDIO, _UFFDIO_CONTINUE,	\
+ 				      struct uffdio_continue)
++#define UFFDIO_POISON		_IOWR(UFFDIO, _UFFDIO_POISON, \
++				      struct uffdio_poison)
+ 
+ /* read() structure */
+ struct uffd_msg {
+@@ -225,6 +231,7 @@ struct uffdio_api {
+ #define UFFD_FEATURE_EXACT_ADDRESS		(1<<11)
+ #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM		(1<<12)
+ #define UFFD_FEATURE_WP_UNPOPULATED		(1<<13)
++#define UFFD_FEATURE_POISON			(1<<14)
+ 	__u64 features;
+ 
+ 	__u64 ioctls;
+@@ -321,6 +328,18 @@ struct uffdio_continue {
+ 	__s64 mapped;
+ };
+ 
++struct uffdio_poison {
++	struct uffdio_range range;
++#define UFFDIO_POISON_MODE_DONTWAKE		((__u64)1<<0)
++	__u64 mode;
++
++	/*
++	 * Fields below here are written by the ioctl and must be at the end:
++	 * the copy_from_user will not read past here.
++	 */
++	__s64 updated;
++};
++
+ /*
+  * Flags for the userfaultfd(2) system call itself.
+  */
+diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
+index 16db89071e..acf72b4999 100644
+--- a/linux-headers/linux/vfio.h
++++ b/linux-headers/linux/vfio.h
+@@ -217,6 +217,7 @@ struct vfio_device_info {
+ 	__u32	num_regions;	/* Max region index + 1 */
+ 	__u32	num_irqs;	/* Max IRQ index + 1 */
+ 	__u32   cap_offset;	/* Offset within info struct of first cap */
++	__u32   pad;
+ };
+ #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+ 
+@@ -677,11 +678,60 @@ enum {
+  * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
+  *					      struct vfio_pci_hot_reset_info)
+  *
++ * This command is used to query the affected devices in the hot reset for
++ * a given device.
++ *
++ * This command always reports the segment, bus, and devfn information for
++ * each affected device, and selectively reports the group_id or devid per
++ * the way how the calling device is opened.
++ *
++ *	- If the calling device is opened via the traditional group/container
++ *	  API, group_id is reported.  User should check if it has owned all
++ *	  the affected devices and provides a set of group fds to prove the
++ *	  ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl.
++ *
++ *	- If the calling device is opened as a cdev, devid is reported.
++ *	  Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this
++ *	  data type.  All the affected devices should be represented in
++ *	  the dev_set, ex. bound to a vfio driver, and also be owned by
++ *	  this interface which is determined by the following conditions:
++ *	  1) Has a valid devid within the iommufd_ctx of the calling device.
++ *	     Ownership cannot be determined across separate iommufd_ctx and
++ *	     the cdev calling conventions do not support a proof-of-ownership
++ *	     model as provided in the legacy group interface.  In this case
++ *	     valid devid with value greater than zero is provided in the return
++ *	     structure.
++ *	  2) Does not have a valid devid within the iommufd_ctx of the calling
++ *	     device, but belongs to the same IOMMU group as the calling device
++ *	     or another opened device that has a valid devid within the
++ *	     iommufd_ctx of the calling device.  This provides implicit ownership
++ *	     for devices within the same DMA isolation context.  In this case
++ *	     the devid value of VFIO_PCI_DEVID_OWNED is provided in the return
++ *	     structure.
++ *
++ *	  A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return
++ *	  structure for affected devices where device is NOT represented in the
++ *	  dev_set or ownership is not available.  Such devices prevent the use
++ *	  of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership
++ *	  calling conventions (ie. via legacy group accessed devices).  Flag
++ *	  VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the
++ *	  affected devices are represented in the dev_set and also owned by
++ *	  the user.  This flag is available only when
++ *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
++ *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
++ *	  length fd array on the calling device as the ownership is validated
++ *	  by iommufd_ctx.
++ *
+  * Return: 0 on success, -errno on failure:
+  *	-enospc = insufficient buffer, -enodev = unsupported for device.
+  */
+ struct vfio_pci_dependent_device {
+-	__u32	group_id;
++	union {
++		__u32   group_id;
++		__u32	devid;
++#define VFIO_PCI_DEVID_OWNED		0
++#define VFIO_PCI_DEVID_NOT_OWNED	-1
++	};
+ 	__u16	segment;
+ 	__u8	bus;
+ 	__u8	devfn; /* Use PCI_SLOT/PCI_FUNC */
+@@ -690,6 +740,8 @@ struct vfio_pci_dependent_device {
+ struct vfio_pci_hot_reset_info {
+ 	__u32	argsz;
+ 	__u32	flags;
++#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID		(1 << 0)
++#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED	(1 << 1)
+ 	__u32	count;
+ 	struct vfio_pci_dependent_device	devices[];
+ };
+@@ -700,6 +752,24 @@ struct vfio_pci_hot_reset_info {
+  * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
+  *				    struct vfio_pci_hot_reset)
+  *
++ * A PCI hot reset results in either a bus or slot reset which may affect
++ * other devices sharing the bus/slot.  The calling user must have
++ * ownership of the full set of affected devices as determined by the
++ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl.
++ *
++ * When called on a device file descriptor acquired through the vfio
++ * group interface, the user is required to provide proof of ownership
++ * of those affected devices via the group_fds array in struct
++ * vfio_pci_hot_reset.
++ *
++ * When called on a direct cdev opened vfio device, the flags field of
++ * struct vfio_pci_hot_reset_info reports the ownership status of the
++ * affected devices and this ioctl must be called with an empty group_fds
++ * array.  See above INFO ioctl definition for ownership requirements.
++ *
++ * Mixed usage of legacy groups and cdevs across the set of affected
++ * devices is not supported.
++ *
+  * Return: 0 on success, -errno on failure.
+  */
+ struct vfio_pci_hot_reset {
+@@ -828,6 +898,83 @@ struct vfio_device_feature {
+ 
+ #define VFIO_DEVICE_FEATURE		_IO(VFIO_TYPE, VFIO_BASE + 17)
+ 
++/*
++ * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18,
++ *				   struct vfio_device_bind_iommufd)
++ * @argsz:	 User filled size of this data.
++ * @flags:	 Must be 0.
++ * @iommufd:	 iommufd to bind.
++ * @out_devid:	 The device id generated by this bind. devid is a handle for
++ *		 this device/iommufd bond and can be used in IOMMUFD commands.
++ *
++ * Bind a vfio_device to the specified iommufd.
++ *
++ * User is restricted from accessing the device before the binding operation
++ * is completed.  Only allowed on cdev fds.
++ *
++ * Unbind is automatically conducted when device fd is closed.
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++struct vfio_device_bind_iommufd {
++	__u32		argsz;
++	__u32		flags;
++	__s32		iommufd;
++	__u32		out_devid;
++};
++
++#define VFIO_DEVICE_BIND_IOMMUFD	_IO(VFIO_TYPE, VFIO_BASE + 18)
++
++/*
++ * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
++ *					struct vfio_device_attach_iommufd_pt)
++ * @argsz:	User filled size of this data.
++ * @flags:	Must be 0.
++ * @pt_id:	Input the target id which can represent an ioas or a hwpt
++ *		allocated via iommufd subsystem.
++ *		Output the input ioas id or the attached hwpt id which could
++ *		be the specified hwpt itself or a hwpt automatically created
++ *		for the specified ioas by kernel during the attachment.
++ *
++ * Associate the device with an address space within the bound iommufd.
++ * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.  This is only
++ * allowed on cdev fds.
++ *
++ * If a vfio device is currently attached to a valid hw_pagetable, without doing
++ * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl
++ * passing in another hw_pagetable (hwpt) id is allowed. This action, also known
++ * as a hw_pagetable replacement, will replace the device's currently attached
++ * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++struct vfio_device_attach_iommufd_pt {
++	__u32	argsz;
++	__u32	flags;
++	__u32	pt_id;
++};
++
++#define VFIO_DEVICE_ATTACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 19)
++
++/*
++ * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
++ *					struct vfio_device_detach_iommufd_pt)
++ * @argsz:	User filled size of this data.
++ * @flags:	Must be 0.
++ *
++ * Remove the association of the device and its current associated address
++ * space.  After it, the device should be in a blocking DMA state.  This is only
++ * allowed on cdev fds.
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++struct vfio_device_detach_iommufd_pt {
++	__u32	argsz;
++	__u32	flags;
++};
++
++#define VFIO_DEVICE_DETACH_IOMMUFD_PT		_IO(VFIO_TYPE, VFIO_BASE + 20)
++
+ /*
+  * Provide support for setting a PCI VF Token, which is used as a shared
+  * secret between PF and VF drivers.  This feature may only be set on a
+@@ -1304,6 +1451,7 @@ struct vfio_iommu_type1_info {
+ #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
+ 	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
+ 	__u32   cap_offset;	/* Offset within info struct of first cap */
++	__u32   pad;
+ };
+ 
+ /*
+diff --git a/meson.build b/meson.build
+index 98e68ef0b1..1e43c6e887 100644
+--- a/meson.build
++++ b/meson.build
+@@ -114,6 +114,8 @@ elif cpu in ['riscv32']
+   kvm_targets = ['riscv32-softmmu']
+ elif cpu in ['riscv64']
+   kvm_targets = ['riscv64-softmmu']
++elif cpu in ['loongarch64']
++  kvm_targets = ['loongarch64-softmmu']
+ else
+   kvm_targets = []
+ endif
+@@ -3299,6 +3301,7 @@ if have_system or have_user
+     'target/hppa',
+     'target/i386',
+     'target/i386/kvm',
++    'target/loongarch',
+     'target/mips/tcg',
+     'target/nios2',
+     'target/ppc',
+diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
+index ad93ecac92..138acb8100 100644
+--- a/target/loongarch/cpu.c
++++ b/target/loongarch/cpu.c
+@@ -18,6 +18,11 @@
+ #include "cpu-csr.h"
+ #include "sysemu/reset.h"
+ #include "tcg/tcg.h"
++#include "sysemu/kvm.h"
++#include "kvm_loongarch.h"
++#ifdef CONFIG_KVM
++#include <linux/kvm.h>
++#endif
+ 
+ const char * const regnames[32] = {
+     "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+@@ -105,12 +110,15 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level)
+         return;
+     }
+ 
+-    env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0);
+-
+-    if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) {
+-        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
++    if (kvm_enabled()) {
++        kvm_loongarch_set_interrupt(cpu, irq, level);
+     } else {
+-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
++        env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0);
++        if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) {
++            cpu_interrupt(cs, CPU_INTERRUPT_HARD);
++        } else {
++            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
++        }
+     }
+ }
+ 
+@@ -489,10 +497,12 @@ static void loongarch_cpu_reset_hold(Object *obj)
+ 
+     env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2));
+     env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0);
++    env->CSR_CPUID = cs->cpu_index;
+     env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0);
+     env->CSR_LLBCTL = FIELD_DP64(env->CSR_LLBCTL, CSR_LLBCTL, KLO, 0);
+     env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0);
+     env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0);
++    env->CSR_TID = cs->cpu_index;
+ 
+     env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2);
+     env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63);
+@@ -509,6 +519,9 @@ static void loongarch_cpu_reset_hold(Object *obj)
+ #ifndef CONFIG_USER_ONLY
+     env->pc = 0x1c000000;
+     memset(env->tlb, 0, sizeof(env->tlb));
++    if (kvm_enabled()) {
++        kvm_arch_reset_vcpu(env);
++    }
+ #endif
+ 
+     restore_fp_status(env);
+diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
+index fa371ca8ba..6ae753def2 100644
+--- a/target/loongarch/cpu.h
++++ b/target/loongarch/cpu.h
+@@ -311,6 +311,7 @@ typedef struct CPUArchState {
+     uint64_t CSR_PWCH;
+     uint64_t CSR_STLBPS;
+     uint64_t CSR_RVACFG;
++    uint64_t CSR_CPUID;
+     uint64_t CSR_PRCFG1;
+     uint64_t CSR_PRCFG2;
+     uint64_t CSR_PRCFG3;
+@@ -342,7 +343,6 @@ typedef struct CPUArchState {
+     uint64_t CSR_DBG;
+     uint64_t CSR_DERA;
+     uint64_t CSR_DSAVE;
+-    uint64_t CSR_CPUID;
+ 
+ #ifndef CONFIG_USER_ONLY
+     LoongArchTLB  tlb[LOONGARCH_TLB_MAX];
+@@ -352,6 +352,7 @@ typedef struct CPUArchState {
+     MemoryRegion iocsr_mem;
+     bool load_elf;
+     uint64_t elf_address;
++    uint32_t mp_state;
+     /* Store ipistate to access from this struct */
+     DeviceState *ipistate;
+ #endif
+@@ -374,6 +375,8 @@ struct ArchCPU {
+ 
+     /* 'compatible' string for this CPU for Linux device trees */
+     const char *dtb_compatible;
++    /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */
++    uint64_t kvm_state_counter;
+ };
+ 
+ #define TYPE_LOONGARCH_CPU "loongarch-cpu"
+@@ -439,6 +442,7 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
+ }
+ 
+ void loongarch_cpu_list(void);
++void kvm_arch_reset_vcpu(CPULoongArchState *env);
+ 
+ #define cpu_list loongarch_cpu_list
+ 
+diff --git a/target/loongarch/kvm-stub.c b/target/loongarch/kvm-stub.c
+new file mode 100644
+index 0000000000..04534f55b0
+--- /dev/null
++++ b/target/loongarch/kvm-stub.c
+@@ -0,0 +1,14 @@
++/*
++ * QEMU KVM LoongArch specific function stubs
++ *
++ * Copyright (c) 2023 Loongson Technology Corporation Limited
++ */
++#include "qemu/osdep.h"
++#include "cpu.h"
++#include "kvm_loongarch.h"
++
++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level)
++{
++   g_assert_not_reached();
++   return 0;
++}
+diff --git a/target/loongarch/kvm.c b/target/loongarch/kvm.c
+new file mode 100644
+index 0000000000..4dca207f6d
+--- /dev/null
++++ b/target/loongarch/kvm.c
+@@ -0,0 +1,588 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * QEMU LoongArch KVM
++ *
++ * Copyright (c) 2023 Loongson Technology Corporation Limited
++ */
++
++#include "qemu/osdep.h"
++#include <sys/ioctl.h>
++#include <linux/kvm.h>
++
++#include "qemu/timer.h"
++#include "qemu/error-report.h"
++#include "qemu/main-loop.h"
++#include "sysemu/sysemu.h"
++#include "sysemu/kvm.h"
++#include "sysemu/kvm_int.h"
++#include "hw/pci/pci.h"
++#include "exec/memattrs.h"
++#include "exec/address-spaces.h"
++#include "hw/boards.h"
++#include "hw/irq.h"
++#include "qemu/log.h"
++#include "hw/loader.h"
++#include "migration/migration.h"
++#include "sysemu/runstate.h"
++#include "cpu-csr.h"
++#include "kvm_loongarch.h"
++#include "trace.h"
++
++static bool cap_has_mp_state;
++const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
++    KVM_CAP_LAST_INFO
++};
++
++static int kvm_loongarch_get_regs_core(CPUState *cs)
++{
++    int ret = 0;
++    int i;
++    struct kvm_regs regs;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    /* Get the current register set as KVM seems it */
++    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
++    if (ret < 0) {
++        trace_kvm_failed_get_regs_core(strerror(errno));
++        return ret;
++    }
++    /* gpr[0] value is always 0 */
++    env->gpr[0] = 0;
++    for (i = 1; i < 32; i++) {
++        env->gpr[i] = regs.gpr[i];
++    }
++
++    env->pc = regs.pc;
++    return ret;
++}
++
++static int kvm_loongarch_put_regs_core(CPUState *cs)
++{
++    int ret = 0;
++    int i;
++    struct kvm_regs regs;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    /* Set the registers based on QEMU's view of things */
++    for (i = 0; i < 32; i++) {
++        regs.gpr[i] = env->gpr[i];
++    }
++
++    regs.pc = env->pc;
++    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
++    if (ret < 0) {
++        trace_kvm_failed_put_regs_core(strerror(errno));
++    }
++
++    return ret;
++}
++
++static int kvm_larch_getq(CPUState *cs, uint64_t reg_id,
++                                 uint64_t *addr)
++{
++    struct kvm_one_reg csrreg = {
++        .id = reg_id,
++        .addr = (uintptr_t)addr
++    };
++
++    return kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &csrreg);
++}
++
++static int kvm_larch_putq(CPUState *cs, uint64_t reg_id,
++                                 uint64_t *addr)
++{
++    struct kvm_one_reg csrreg = {
++        .id = reg_id,
++        .addr = (uintptr_t)addr
++    };
++
++    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &csrreg);
++}
++
++#define KVM_GET_ONE_UREG64(cs, ret, regidx, addr)                 \
++    ({                                                            \
++        err = kvm_larch_getq(cs, KVM_IOC_CSRID(regidx), addr);    \
++        if (err < 0) {                                            \
++            ret = err;                                            \
++            trace_kvm_failed_get_csr(regidx, strerror(errno));    \
++        }                                                         \
++    })
++
++#define KVM_PUT_ONE_UREG64(cs, ret, regidx, addr)                 \
++    ({                                                            \
++        err = kvm_larch_putq(cs, KVM_IOC_CSRID(regidx), addr);    \
++        if (err < 0) {                                            \
++            ret = err;                                            \
++            trace_kvm_failed_put_csr(regidx, strerror(errno));    \
++        }                                                         \
++    })
++
++static int kvm_loongarch_get_csr(CPUState *cs)
++{
++    int err, ret = 0;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CRMD, &env->CSR_CRMD);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRMD, &env->CSR_PRMD);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_EUEN, &env->CSR_EUEN);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_MISC, &env->CSR_MISC);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ECFG, &env->CSR_ECFG);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ESTAT, &env->CSR_ESTAT);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ERA, &env->CSR_ERA);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADV, &env->CSR_BADV);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADI, &env->CSR_BADI);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_EENTRY, &env->CSR_EENTRY);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBIDX, &env->CSR_TLBIDX);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBEHI, &env->CSR_TLBEHI);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO0, &env->CSR_TLBELO0);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO1, &env->CSR_TLBELO1);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ASID, &env->CSR_ASID);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDL, &env->CSR_PGDL);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDH, &env->CSR_PGDH);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGD, &env->CSR_PGD);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCL, &env->CSR_PWCL);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCH, &env->CSR_PWCH);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_STLBPS, &env->CSR_STLBPS);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_RVACFG, &env->CSR_RVACFG);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CPUID, &env->CSR_CPUID);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG1, &env->CSR_PRCFG1);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG2, &env->CSR_PRCFG2);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG3, &env->CSR_PRCFG3);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(0), &env->CSR_SAVE[0]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(1), &env->CSR_SAVE[1]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(2), &env->CSR_SAVE[2]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(3), &env->CSR_SAVE[3]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(4), &env->CSR_SAVE[4]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(5), &env->CSR_SAVE[5]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(6), &env->CSR_SAVE[6]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(7), &env->CSR_SAVE[7]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TID, &env->CSR_TID);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CNTC, &env->CSR_CNTC);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TICLR, &env->CSR_TICLR);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_LLBCTL, &env->CSR_LLBCTL);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL1, &env->CSR_IMPCTL1);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL2, &env->CSR_IMPCTL2);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRENTRY, &env->CSR_TLBRENTRY);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRBADV, &env->CSR_TLBRBADV);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRERA, &env->CSR_TLBRERA);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRSAVE, &env->CSR_TLBRSAVE);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO0, &env->CSR_TLBRELO0);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO1, &env->CSR_TLBRELO1);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBREHI, &env->CSR_TLBREHI);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRPRMD, &env->CSR_TLBRPRMD);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(0), &env->CSR_DMW[0]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(1), &env->CSR_DMW[1]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(2), &env->CSR_DMW[2]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(3), &env->CSR_DMW[3]);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TVAL, &env->CSR_TVAL);
++    KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TCFG, &env->CSR_TCFG);
++
++    return ret;
++}
++
++static int kvm_loongarch_put_csr(CPUState *cs)
++{
++    int err, ret = 0;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CRMD, &env->CSR_CRMD);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRMD, &env->CSR_PRMD);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_EUEN, &env->CSR_EUEN);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_MISC, &env->CSR_MISC);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ECFG, &env->CSR_ECFG);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ESTAT, &env->CSR_ESTAT);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ERA, &env->CSR_ERA);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADV, &env->CSR_BADV);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADI, &env->CSR_BADI);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_EENTRY, &env->CSR_EENTRY);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBIDX, &env->CSR_TLBIDX);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBEHI, &env->CSR_TLBEHI);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO0, &env->CSR_TLBELO0);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO1, &env->CSR_TLBELO1);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ASID, &env->CSR_ASID);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDL, &env->CSR_PGDL);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDH, &env->CSR_PGDH);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGD, &env->CSR_PGD);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCL, &env->CSR_PWCL);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCH, &env->CSR_PWCH);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_STLBPS, &env->CSR_STLBPS);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_RVACFG, &env->CSR_RVACFG);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CPUID, &env->CSR_CPUID);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG1, &env->CSR_PRCFG1);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG2, &env->CSR_PRCFG2);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG3, &env->CSR_PRCFG3);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(0), &env->CSR_SAVE[0]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(1), &env->CSR_SAVE[1]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(2), &env->CSR_SAVE[2]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(3), &env->CSR_SAVE[3]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(4), &env->CSR_SAVE[4]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(5), &env->CSR_SAVE[5]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(6), &env->CSR_SAVE[6]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(7), &env->CSR_SAVE[7]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TID, &env->CSR_TID);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CNTC, &env->CSR_CNTC);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TICLR, &env->CSR_TICLR);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_LLBCTL, &env->CSR_LLBCTL);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL1, &env->CSR_IMPCTL1);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL2, &env->CSR_IMPCTL2);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRENTRY, &env->CSR_TLBRENTRY);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRBADV, &env->CSR_TLBRBADV);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRERA, &env->CSR_TLBRERA);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRSAVE, &env->CSR_TLBRSAVE);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO0, &env->CSR_TLBRELO0);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO1, &env->CSR_TLBRELO1);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBREHI, &env->CSR_TLBREHI);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRPRMD, &env->CSR_TLBRPRMD);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(0), &env->CSR_DMW[0]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(1), &env->CSR_DMW[1]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(2), &env->CSR_DMW[2]);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(3), &env->CSR_DMW[3]);
++    /*
++     * timer cfg must be put at last since it is used to enable
++     * guest timer
++     */
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TVAL, &env->CSR_TVAL);
++    KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TCFG, &env->CSR_TCFG);
++    return ret;
++}
++
++static int kvm_loongarch_get_regs_fp(CPUState *cs)
++{
++    int ret, i;
++    struct kvm_fpu fpu;
++
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    ret = kvm_vcpu_ioctl(cs, KVM_GET_FPU, &fpu);
++    if (ret < 0) {
++        trace_kvm_failed_get_fpu(strerror(errno));
++        return ret;
++    }
++
++    env->fcsr0 = fpu.fcsr;
++    for (i = 0; i < 32; i++) {
++        env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0];
++    }
++    for (i = 0; i < 8; i++) {
++        env->cf[i] = fpu.fcc & 0xFF;
++        fpu.fcc = fpu.fcc >> 8;
++    }
++
++    return ret;
++}
++
++static int kvm_loongarch_put_regs_fp(CPUState *cs)
++{
++    int ret, i;
++    struct kvm_fpu fpu;
++
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    fpu.fcsr = env->fcsr0;
++    fpu.fcc = 0;
++    for (i = 0; i < 32; i++) {
++        fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0];
++    }
++
++    for (i = 0; i < 8; i++) {
++        fpu.fcc |= env->cf[i] << (8 * i);
++    }
++
++    ret = kvm_vcpu_ioctl(cs, KVM_SET_FPU, &fpu);
++    if (ret < 0) {
++        trace_kvm_failed_put_fpu(strerror(errno));
++    }
++
++    return ret;
++}
++
++void kvm_arch_reset_vcpu(CPULoongArchState *env)
++{
++    env->mp_state = KVM_MP_STATE_RUNNABLE;
++}
++
++static int kvm_loongarch_get_mpstate(CPUState *cs)
++{
++    int ret = 0;
++    struct kvm_mp_state mp_state;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    if (cap_has_mp_state) {
++        ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state);
++        if (ret) {
++            trace_kvm_failed_get_mpstate(strerror(errno));
++            return ret;
++        }
++        env->mp_state = mp_state.mp_state;
++    }
++
++    return ret;
++}
++
++static int kvm_loongarch_put_mpstate(CPUState *cs)
++{
++    int ret = 0;
++
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    struct kvm_mp_state mp_state = {
++        .mp_state = env->mp_state
++    };
++
++    if (cap_has_mp_state) {
++        ret = kvm_vcpu_ioctl(cs, KVM_SET_MP_STATE, &mp_state);
++        if (ret) {
++            trace_kvm_failed_put_mpstate(strerror(errno));
++        }
++    }
++
++    return ret;
++}
++
++static int kvm_loongarch_get_cpucfg(CPUState *cs)
++{
++    int i, ret = 0;
++    uint64_t val;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++
++    for (i = 0; i < 21; i++) {
++        ret = kvm_larch_getq(cs, KVM_IOC_CPUCFG(i), &val);
++        if (ret < 0) {
++            trace_kvm_failed_get_cpucfg(strerror(errno));
++        }
++        env->cpucfg[i] = (uint32_t)val;
++    }
++    return ret;
++}
++
++static int kvm_loongarch_put_cpucfg(CPUState *cs)
++{
++    int i, ret = 0;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++    uint64_t val;
++
++    for (i = 0; i < 21; i++) {
++        val = env->cpucfg[i];
++        ret = kvm_larch_putq(cs, KVM_IOC_CPUCFG(i), &val);
++        if (ret < 0) {
++            trace_kvm_failed_put_cpucfg(strerror(errno));
++        }
++    }
++    return ret;
++}
++
++int kvm_arch_get_registers(CPUState *cs)
++{
++    int ret;
++
++    ret = kvm_loongarch_get_regs_core(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_get_csr(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_get_regs_fp(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_get_mpstate(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_get_cpucfg(cs);
++    return ret;
++}
++
++int kvm_arch_put_registers(CPUState *cs, int level)
++{
++    int ret;
++
++    ret = kvm_loongarch_put_regs_core(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_put_csr(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_put_regs_fp(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_put_mpstate(cs);
++    if (ret) {
++        return ret;
++    }
++
++    ret = kvm_loongarch_put_cpucfg(cs);
++    return ret;
++}
++
++static void kvm_loongarch_vm_stage_change(void *opaque, bool running,
++                                          RunState state)
++{
++    int ret;
++    CPUState *cs = opaque;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++
++    if (running) {
++        ret = kvm_larch_putq(cs, KVM_REG_LOONGARCH_COUNTER,
++                             &cpu->kvm_state_counter);
++        if (ret < 0) {
++            trace_kvm_failed_put_counter(strerror(errno));
++        }
++    } else {
++        ret = kvm_larch_getq(cs, KVM_REG_LOONGARCH_COUNTER,
++                             &cpu->kvm_state_counter);
++        if (ret < 0) {
++            trace_kvm_failed_get_counter(strerror(errno));
++        }
++    }
++}
++
++int kvm_arch_init_vcpu(CPUState *cs)
++{
++    qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs);
++    return 0;
++}
++
++int kvm_arch_destroy_vcpu(CPUState *cs)
++{
++    return 0;
++}
++
++unsigned long kvm_arch_vcpu_id(CPUState *cs)
++{
++    return cs->cpu_index;
++}
++
++int kvm_arch_release_virq_post(int virq)
++{
++    return 0;
++}
++
++int kvm_arch_msi_data_to_gsi(uint32_t data)
++{
++    abort();
++}
++
++int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
++                             uint64_t address, uint32_t data, PCIDevice *dev)
++{
++    return 0;
++}
++
++int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
++                                int vector, PCIDevice *dev)
++{
++    return 0;
++}
++
++void kvm_arch_init_irq_routing(KVMState *s)
++{
++}
++
++int kvm_arch_get_default_type(MachineState *ms)
++{
++    return 0;
++}
++
++int kvm_arch_init(MachineState *ms, KVMState *s)
++{
++    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
++    return 0;
++}
++
++int kvm_arch_irqchip_create(KVMState *s)
++{
++    return 0;
++}
++
++void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
++{
++}
++
++MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
++{
++    return MEMTXATTRS_UNSPECIFIED;
++}
++
++int kvm_arch_process_async_events(CPUState *cs)
++{
++    return cs->halted;
++}
++
++bool kvm_arch_stop_on_emulation_error(CPUState *cs)
++{
++    return true;
++}
++
++bool kvm_arch_cpu_check_are_resettable(void)
++{
++    return true;
++}
++
++int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
++{
++    int ret = 0;
++    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
++    CPULoongArchState *env = &cpu->env;
++    MemTxAttrs attrs = {};
++
++    attrs.requester_id = env_cpu(env)->cpu_index;
++
++    trace_kvm_arch_handle_exit(run->exit_reason);
++    switch (run->exit_reason) {
++    case KVM_EXIT_LOONGARCH_IOCSR:
++        address_space_rw(&env->address_space_iocsr,
++                         run->iocsr_io.phys_addr,
++                         attrs,
++                         run->iocsr_io.data,
++                         run->iocsr_io.len,
++                         run->iocsr_io.is_write);
++        break;
++    default:
++        ret = -1;
++        warn_report("KVM: unknown exit reason %d", run->exit_reason);
++        break;
++    }
++    return ret;
++}
++
++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level)
++{
++    struct kvm_interrupt intr;
++    CPUState *cs = CPU(cpu);
++
++    if (level) {
++        intr.irq = irq;
++    } else {
++        intr.irq = -irq;
++    }
++
++    trace_kvm_set_intr(irq, level);
++    return kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &intr);
++}
++
++void kvm_arch_accel_class_init(ObjectClass *oc)
++{
++}
+diff --git a/target/loongarch/kvm_loongarch.h b/target/loongarch/kvm_loongarch.h
+new file mode 100644
+index 0000000000..cdef980eec
+--- /dev/null
++++ b/target/loongarch/kvm_loongarch.h
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * QEMU LoongArch kvm interface
++ *
++ * Copyright (c) 2023 Loongson Technology Corporation Limited
++ */
++
++#ifndef QEMU_KVM_LOONGARCH_H
++#define QEMU_KVM_LOONGARCH_H
++
++int  kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level);
++
++#endif
+diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
+index b7a27df5a9..b3cceccc95 100644
+--- a/target/loongarch/meson.build
++++ b/target/loongarch/meson.build
+@@ -27,6 +27,7 @@ loongarch_system_ss.add(files(
+ 
+ common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
+ 
++loongarch_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
+ loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
+ 
+ target_arch += {'loongarch': loongarch_ss}
+diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events
+new file mode 100644
+index 0000000000..3263406ebe
+--- /dev/null
++++ b/target/loongarch/trace-events
+@@ -0,0 +1,17 @@
++# See docs/devel/tracing.rst for syntax documentation.
++
++#kvm.c
++kvm_failed_get_regs_core(const char *msg) "Failed to get core regs from KVM: %s"
++kvm_failed_put_regs_core(const char *msg) "Failed to put core regs into KVM: %s"
++kvm_failed_get_csr(int csr, const char *msg) "Failed to get csr 0x%x from KVM: %s"
++kvm_failed_put_csr(int csr, const char *msg) "Failed to put csr 0x%x into KVM: %s"
++kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s"
++kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s"
++kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s"
++kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s"
++kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s"
++kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s"
++kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s"
++kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s"
++kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d"
++kvm_set_intr(int irq, int level) "kvm set interrupt, irq num: %d, level: %d"
+diff --git a/target/loongarch/trace.h b/target/loongarch/trace.h
+new file mode 100644
+index 0000000000..c2ecb78f08
+--- /dev/null
++++ b/target/loongarch/trace.h
+@@ -0,0 +1 @@
++#include "trace/trace-target_loongarch.h"
diff --git a/qt5-base/PKGBUILD b/qt5-base/PKGBUILD
index 3a3f56d73a..ac235d4b46 100644
--- a/qt5-base/PKGBUILD
+++ b/qt5-base/PKGBUILD
@@ -36,6 +36,7 @@ sha256sums=('SKIP'
             '6a4ec2bfcf4e7cff73346762b252cc28be5ca0ed79fde5e69350efe229b43adc'
             '5411edbe215c24b30448fac69bd0ba7c882f545e8cf05027b2b6e2227abc5e78'
             '4abc22150fa3e06b2fdcec32146abc9be4e316692aa4d5bd5aa53b4b726783fa')
+options=(debug)
 
 pkgver() {
   cd $_pkgfqn
diff --git a/qt5-doc/PKGBUILD b/qt5-doc/PKGBUILD
index 65efe0fffd..40ecc3edf0 100644
--- a/qt5-doc/PKGBUILD
+++ b/qt5-doc/PKGBUILD
@@ -16,10 +16,12 @@ groups=('qt5')
 _pkgfqn="qt-everywhere-opensource-src-${pkgver}"
 source=("https://download.qt.io/official_releases/qt/${pkgver%.*}/${pkgver}/single/${_pkgfqn}.tar.xz"
          qt5-webengine-python3.patch
-         no-qmake.patch)
+         no-qmake.patch
+         qt5-base-la64.patch)
 sha256sums=('93f2c0889ee2e9cdf30c170d353c3f829de5f29ba21c119167dee5995e48ccce'
             '398c996cb5b606695ac93645143df39e23fa67e768b09e0da6dbd37342a43f32'
-            'db90fa31381fa0814c9c8c803c9e2f9b36bdd6f52da753399e500c0692352498')
+            'db90fa31381fa0814c9c8c803c9e2f9b36bdd6f52da753399e500c0692352498'
+            '0ea0ced24a71d6abe7d323c4bfb80c33c77592873b8660fe413cb074978c653a')
 
 prepare() {
   cd ${_pkgfqn/opensource-/}
@@ -29,6 +31,7 @@ prepare() {
 
   patch -d qtbase -p1 < "$srcdir"/no-qmake.patch # Use system qmake
   patch -d qtwebengine -p1 < "$srcdir"/qt5-webengine-python3.patch # Fix build with Python 3
+  patch -d qtbase -p1 < "$srcdir"/qt5-base-la64.patch
 }
 
 build() {
diff --git a/qt5-doc/qt5-base-la64.patch b/qt5-doc/qt5-base-la64.patch
new file mode 100644
index 0000000000..eb183a26e1
--- /dev/null
+++ b/qt5-doc/qt5-base-la64.patch
@@ -0,0 +1,12 @@
+Index: qtbase/src/3rdparty/double-conversion/include/double-conversion/utils.h
+===================================================================
+--- qtbase.orig/src/3rdparty/double-conversion/include/double-conversion/utils.h
++++ qtbase/src/3rdparty/double-conversion/include/double-conversion/utils.h
+@@ -98,6 +98,7 @@ int main(int argc, char** argv) {
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+     defined(__SH4__) || defined(__alpha__) || \
++    defined(__loongarch64) || \
+     defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\
+     defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \
+     defined(__riscv) || \
diff --git a/qt5-script/PKGBUILD b/qt5-script/PKGBUILD
index 5ef201e6f3..77260f2ff1 100644
--- a/qt5-script/PKGBUILD
+++ b/qt5-script/PKGBUILD
@@ -13,12 +13,15 @@ depends=('qt5-base')
 makedepends=('git')
 groups=('qt5')
 _pkgfqn=${pkgname/5-/}
-source=(git+https://code.qt.io/qt/$_pkgfqn.git#tag=v${pkgver}-lts)
-sha256sums=('SKIP')
+source=(git+https://code.qt.io/qt/$_pkgfqn.git#tag=v${pkgver}-lts
+        loongarch_ports.patch)
+sha256sums=('SKIP'
+            'e6b6bd257375ba0982f6be8cd9c704a8cf3101d275ee1c40cad8466c695d3800')
 options=(!lto)
 
 prepare() {
   mkdir -p build
+  patch -p1 -d qtscript/src -i "$srcdir"/loongarch_ports.patch
 }
 
 build() {
diff --git a/qt5-script/loongarch_ports.patch b/qt5-script/loongarch_ports.patch
new file mode 100644
index 0000000000..6b9cb77ab0
--- /dev/null
+++ b/qt5-script/loongarch_ports.patch
@@ -0,0 +1,60 @@
+diff -Naurp a/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h b/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h
+--- a/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h	2022-06-26 20:47:06.744000000 +0800
++++ b/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h	2022-06-26 22:29:55.096000000 +0800
+@@ -490,7 +490,11 @@ namespace JSC {
+             u.asBits.tag = CellTag;
+         else
+             u.asBits.tag = EmptyValueTag;
++#if defined(__loongarch64) || defined(__mips64)
++        u.asBits.payload = reinterpret_cast<int64_t>(ptr);
++#else
+         u.asBits.payload = reinterpret_cast<int32_t>(ptr);
++#endif
+ #if ENABLE(JSC_ZOMBIES)
+         ASSERT(!isZombie());
+ #endif
+@@ -502,7 +506,11 @@ namespace JSC {
+             u.asBits.tag = CellTag;
+         else
+             u.asBits.tag = EmptyValueTag;
++#if defined(__loongarch64) || defined(__mips64)
++        u.asBits.payload = reinterpret_cast<int64_t>(const_cast<JSCell*>(ptr));
++#else
+         u.asBits.payload = reinterpret_cast<int32_t>(const_cast<JSCell*>(ptr));
++#endif
+ #if ENABLE(JSC_ZOMBIES)
+         ASSERT(!isZombie());
+ #endif
+diff -Naurp a/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h b/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h
+--- a/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h	2022-06-26 20:47:06.776000000 +0800
++++ b/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h	2022-06-26 22:15:32.048000000 +0800
+@@ -226,6 +226,11 @@
+ #define WTF_CPU_SPARC 1
+ #endif
+ 
++/* CPU(3A5000) */
++#if defined(__loongarch64)
++#define WTF_CPU_LA64 1
++#endif
++
+ /* CPU(X86) - i386 / x86 32-bit */
+ #if   defined(__i386__) \
+     || defined(i386)     \
+@@ -958,7 +963,7 @@
+ #endif
+ 
+ #if !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32) && !defined(WTF_USE_JSVALUE32_64)
+-#if (CPU(X86_64) && !CPU(X32) && (OS(UNIX) || OS(WINDOWS) || OS(SOLARIS) || OS(HPUX))) || (CPU(IA64) && !CPU(IA64_32)) || CPU(ALPHA) || CPU(AIX64) || CPU(SPARC64) || CPU(MIPS64) || CPU(AARCH64) || CPU(RISCV64)
++#if (CPU(X86_64) && !CPU(X32) && (OS(UNIX) || OS(WINDOWS) || OS(SOLARIS) || OS(HPUX))) || (CPU(IA64) && !CPU(IA64_32)) || CPU(ALPHA) || CPU(AIX64) || CPU(SPARC64) || CPU(MIPS64) || CPU(AARCH64) || CPU(RISCV64) || defined(__loongarch64)
+ #define WTF_USE_JSVALUE64 1
+ #elif CPU(ARM) || CPU(PPC64) || CPU(RISCV32)
+ #define WTF_USE_JSVALUE32 1
+@@ -976,7 +981,7 @@ on MinGW. See https://bugs.webkit.org/sh
+ #endif
+ 
+ /* Disable JIT on x32 */
+-#if CPU(X32)
++#if CPU(X32) || defined(__loongarch64)
+ #define ENABLE_JIT 0
+ #endif
+ 
diff --git a/qt5-script/qt5-base-la64.patch b/qt5-script/qt5-base-la64.patch
new file mode 100644
index 0000000000..84c87b4c5d
--- /dev/null
+++ b/qt5-script/qt5-base-la64.patch
@@ -0,0 +1,12 @@
+Index: qtscript/src/3rdparty/double-conversion/include/double-conversion/utils.h
+===================================================================
+--- qtscript.orig/src/3rdparty/double-conversion/include/double-conversion/utils.h
++++ qtscript/src/3rdparty/double-conversion/include/double-conversion/utils.h
+@@ -98,6 +98,7 @@ int main(int argc, char** argv) {
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+     defined(__SH4__) || defined(__alpha__) || \
++    defined(__loongarch64) || \
+     defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\
+     defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \
+     defined(__riscv) || \
diff --git a/qt5-webengine/PKGBUILD b/qt5-webengine/PKGBUILD
index 9a35d4b276..1032b08fbd 100644
--- a/qt5-webengine/PKGBUILD
+++ b/qt5-webengine/PKGBUILD
@@ -17,6 +17,7 @@ groups=('qt5')
 _pkgfqn=${pkgname/5-/}
 source=(git+https://code.qt.io/qt/qtwebengine.git#tag=v${pkgver}-lts
         git+https://code.qt.io/qt/qtwebengine-chromium.git
+        http://loongarch.zhcn.cc/list/qtwebengine/qtwebengine-5.15.2-la64.patch/59168fd4d0e9a62535f8118511fb71fa/qtwebengine-5.15.2-la64.patch
         qt5-webengine-python3.patch
         qt5-webengine-chromium-python3.patch
         qt5-webengine-ffmpeg5.patch
@@ -25,6 +26,7 @@ source=(git+https://code.qt.io/qt/qtwebengine.git#tag=v${pkgver}-lts
         qt5-webengine-icu-74.patch)
 sha256sums=('SKIP'
             'SKIP'
+            'cace5fc41204e2679d577b6dd1030656826bbf69a6a4db126036fad47c7db2d1'
             '0ad5d1660886f7bbf5108b071bf5d7bbbabf1cd1258ce9d4587a01dec4a1aa89'
             'd8beb3d65a1aaf927285e6f055a9d1facd0f9c3fd851f91ba568389fb3137399'
             'c50d3019626183e753c53a997dc8a55938847543aa3178d4c51f377be741c693'
@@ -52,7 +54,7 @@ prepare() {
 
 build() {
   cd build
-  qmake ../${_pkgfqn} CONFIG+=force_debug_info -- \
+  qmake ../${_pkgfqn} CONFIG+=force_debug_info QT_ARCH=loongarch64 -- \
     -proprietary-codecs \
     -system-ffmpeg \
     -webp \
diff --git a/qt5-webengine/qtwebengine-5.15.2-la64.patch b/qt5-webengine/qtwebengine-5.15.2-la64.patch
new file mode 100644
index 0000000000..1fdbbb3a84
--- /dev/null
+++ b/qt5-webengine/qtwebengine-5.15.2-la64.patch
@@ -0,0 +1,63993 @@
+diff --git a/configure.pri b/configure.pri
+index d3ba9b147..ded9bdbf0 100644
+--- a/configure.pri
++++ b/configure.pri
+@@ -142,7 +142,8 @@ defineTest(qtConfTest_detectPlatform) {
+ defineTest(qtConfTest_detectArch) {
+     contains(QT_ARCH, "i386")|contains(QT_ARCH, "x86_64"): return(true)
+     contains(QT_ARCH, "arm")|contains(QT_ARCH, "arm64"): return(true)
+-    contains(QT_ARCH, "mips"): return(true)
++    contains(QT_ARCH, "mips")|contains(QT_ARCH, "mips64"): return(true)
++    contains(QT_ARCH, "loongarch")|contains(QT_ARCH, "loongarch64"): return(true)
+     qtLog("Architecture not supported.")
+     return(false)
+ }
+diff --git a/mkspecs/features/functions.prf b/mkspecs/features/functions.prf
+index d3ceb4c5e..674b97fa4 100644
+--- a/mkspecs/features/functions.prf
++++ b/mkspecs/features/functions.prf
+@@ -106,6 +106,7 @@ defineReplace(gnArch) {
+     contains(qtArch, "arm64"): return(arm64)
+     contains(qtArch, "mips"): return(mipsel)
+     contains(qtArch, "mips64"): return(mips64el)
++    contains(qtArch, "loongarch64"): return(la64)
+     return(unknown)
+ }
+ 
+Submodule src/3rdparty fb6ab5e48..8efdf7360:
+diff --git a/src/3rdparty/chromium/DEPS b/src/3rdparty/chromium/DEPS
+index 530e4a46661..09d10d143c2 100644
+--- a/src/3rdparty/chromium/DEPS
++++ b/src/3rdparty/chromium/DEPS
+@@ -85,7 +85,7 @@ vars = {
+ 
+   # Check out and download nacl by default. This can be disabled e.g. with
+   # custom_vars.
+-  'checkout_nacl': True,
++  'checkout_nacl': true,
+ 
+   # By default, do not check out src-internal. This can be overridden e.g. with
+   # custom_vars.
+diff --git a/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h b/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h
+index 555700a7d0c..b4b2a0ab054 100644
+--- a/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h
++++ b/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h
+@@ -12,7 +12,7 @@
+ namespace base {
+ #if defined(OS_WIN) || defined(ARCH_CPU_PPC64)
+ static constexpr size_t kPageAllocationGranularityShift = 16;  // 64KB
+-#elif defined(_MIPS_ARCH_LOONGSON)
++#elif defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64)
+ static constexpr size_t kPageAllocationGranularityShift = 14;  // 16KB
+ #else
+ static constexpr size_t kPageAllocationGranularityShift = 12;  // 4KB
+@@ -24,7 +24,7 @@ static constexpr size_t kPageAllocationGranularityOffsetMask =
+ static constexpr size_t kPageAllocationGranularityBaseMask =
+     ~kPageAllocationGranularityOffsetMask;
+ 
+-#if defined(_MIPS_ARCH_LOONGSON)
++#if defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64)
+ static constexpr size_t kSystemPageSize = 16384;
+ #elif defined(ARCH_CPU_PPC64)
+ // Modern ppc64 systems support 4KB and 64KB page sizes.
+diff --git a/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h b/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h
+index fbc851c15f9..cae8865025f 100644
+--- a/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h
++++ b/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h
+@@ -35,7 +35,7 @@ static const size_t kBucketShift = (kAllocationGranularity == 8) ? 3 : 2;
+ // other constant values, we pack _all_ `PartitionRootGeneric::Alloc` sizes
+ // perfectly up against the end of a system page.
+ 
+-#if defined(_MIPS_ARCH_LOONGSON)
++#if defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64)
+ static const size_t kPartitionPageShift = 16;  // 64 KiB
+ #elif defined(ARCH_CPU_PPC64)
+ static const size_t kPartitionPageShift = 18;  // 256 KiB
+diff --git a/src/3rdparty/chromium/base/process/launch_posix.cc b/src/3rdparty/chromium/base/process/launch_posix.cc
+index 9b7573fdc65..fffa3c0e6e0 100644
+--- a/src/3rdparty/chromium/base/process/launch_posix.cc
++++ b/src/3rdparty/chromium/base/process/launch_posix.cc
+@@ -702,7 +702,7 @@ NOINLINE pid_t CloneAndLongjmpInChild(unsigned long flags,
+   // fork-like behavior.
+   alignas(16) char stack_buf[PTHREAD_STACK_MIN];
+ #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) ||   \
+-    defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_S390_FAMILY) || \
++    defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_S390_FAMILY) || defined(ARCH_CPU_LA64) || \
+     defined(ARCH_CPU_PPC64_FAMILY)
+   // The stack grows downward.
+   void* stack = stack_buf + sizeof(stack_buf);
+diff --git a/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h b/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h
+index 471c3da84cd..163ef3adf56 100644
+--- a/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h
++++ b/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h
+@@ -99,7 +99,7 @@ int main(int argc, char** argv) {
+ #if defined(_M_X64) || defined(__x86_64__) || \
+     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
+     defined(__hppa__) || defined(__ia64__) || \
+-    defined(__mips__) || \
++    defined(__mips__) || defined(__loongarch__) || \
+     defined(__nios2__) || \
+     defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+diff --git a/src/3rdparty/chromium/build/build_config.h b/src/3rdparty/chromium/build/build_config.h
+index d3cdd2db4a6..d97f8dbf2e6 100644
+--- a/src/3rdparty/chromium/build/build_config.h
++++ b/src/3rdparty/chromium/build/build_config.h
+@@ -43,6 +43,7 @@
+ #define OS_LINUX 1
+ // include a system header to pull in features.h for glibc/uclibc macros.
+ #include <unistd.h>
++#include <asm-generic/unistd.h>
+ #if defined(__GLIBC__) && !defined(__UCLIBC__)
+ // we really are using glibc, not uClibc pretending to be glibc
+ #define LIBC_GLIBC 1
+@@ -129,6 +130,10 @@
+ #define ARCH_CPU_PPC64 1
+ #define ARCH_CPU_64_BITS 1
+ #define ARCH_CPU_LITTLE_ENDIAN 1
++#elif defined(__loongarch64)
++#define ARCH_CPU_LA64 1
++#define ARCH_CPU_64_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
+ #elif defined(__ARMEL__)
+ #define ARCH_CPU_ARM_FAMILY 1
+ #define ARCH_CPU_ARMEL 1
+diff --git a/src/3rdparty/chromium/build/config/compiler/BUILD.gn b/src/3rdparty/chromium/build/config/compiler/BUILD.gn
+index ca81bd8ce0f..e6cc68616ad 100644
+--- a/src/3rdparty/chromium/build/config/compiler/BUILD.gn
++++ b/src/3rdparty/chromium/build/config/compiler/BUILD.gn
+@@ -241,7 +241,7 @@ config("default_include_dirs") {
+ 
+ config("compiler") {
+   asmflags = []
+-  cflags = []
++  cflags = ["-mcmodel=large"]
+   cflags_c = []
+   cflags_cc = []
+   cflags_objc = []
+diff --git a/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn b/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn
+index fa8b17e9db3..f67bebc028c 100644
+--- a/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn
++++ b/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn
+@@ -185,6 +185,13 @@ clang_toolchain("clang_mips64el") {
+   }
+ }
+ 
++clang_toolchain("clang_la64") {
++  toolchain_args = {
++    current_cpu = "la64"
++    current_os = "linux"
++  }
++}
++
+ gcc_toolchain("mipsel") {
+   toolprefix = "mipsel-linux-gnu-"
+ 
+@@ -223,6 +230,25 @@ gcc_toolchain("mips64el") {
+   }
+ }
+ 
++gcc_toolchain("la64") {
++  toolprefix = ""
++
++  cc = "${toolprefix}gcc"
++  cxx = "${toolprefix}g++"
++  ar = "${toolprefix}ar"
++  ld = cxx
++  readelf = "${toolprefix}readelf"
++  nm = "${toolprefix}nm"
++
++  toolchain_args = {
++    cc_wrapper = ""
++    current_cpu = "la64"
++    current_os = "linux"
++    is_clang = false
++    use_goma = false
++  }
++}
++
+ clang_toolchain("clang_s390x") {
+   toolchain_args = {
+     current_cpu = "s390x"
+diff --git a/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc b/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc
+index e827c3740e3..50a169fd628 100644
+--- a/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc
++++ b/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc
+@@ -69,15 +69,15 @@ class MockScrollTimeline : public ScrollTimeline {
+   ~MockScrollTimeline() override = default;
+ };
+ 
+-TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) {
++/*TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) {
+   std::unique_ptr<MockKeyframeEffect> effect =
+       std::make_unique<MockKeyframeEffect>(worklet_animation_.get());
+   MockKeyframeEffect* mock_effect = effect.get();
+ 
+   scoped_refptr<WorkletAnimation> worklet_animation =
+-      WrapRefCounted(new WorkletAnimation(
++      base::WrapRefCounted(new WorkletAnimation(
+           1, worklet_animation_id_, "test_name", 1, nullptr, nullptr,
+-          false /* not impl instance*/, std::move(effect)));
++          false / not impl instance/, std::move(effect)));
+ 
+   EXPECT_CALL(*mock_effect, Tick(_)).Times(0);
+ 
+@@ -85,7 +85,7 @@ TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) {
+   state.local_times.push_back(base::TimeDelta::FromSecondsD(1));
+   worklet_animation->SetOutputState(state);
+   worklet_animation->Tick(base::TimeTicks());
+-}
++}*/
+ 
+ TEST_F(WorkletAnimationTest, LocalTimeIsUsedWhenTicking) {
+   AttachWorkletAnimation();
+diff --git a/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc b/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc
+index 192b0a7f137..34fddc8d5f2 100644
+--- a/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc
++++ b/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc
+@@ -1030,11 +1030,14 @@ class NonBrowserCrashHandler : public google_breakpad::CrashGenerationClient {
+   bool RequestDump(const void* crash_context,
+                    size_t crash_context_size) override {
+     int fds[2] = { -1, -1 };
++// TODO:LA64
++#ifndef ARCH_CPU_LA64
+     if (sys_socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+       static const char msg[] = "Failed to create socket for crash dumping.\n";
+       WriteLog(msg, sizeof(msg) - 1);
+       return false;
+     }
++#endif
+ 
+     // Start constructing the message to send to the browser.
+     char b;  // Dummy variable for sys_read below.
+diff --git a/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc b/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc
+index 3728efa1f5f..b76dfe3453f 100644
+--- a/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc
++++ b/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc
+@@ -106,7 +106,7 @@ PaintPreviewClient::PaintPreviewData& PaintPreviewClient::PaintPreviewData::
+ operator=(PaintPreviewData&& rhs) = default;
+ 
+ PaintPreviewClient::PaintPreviewData::PaintPreviewData(
+-    PaintPreviewData&& other) noexcept = default;
++    PaintPreviewData&& other) /*noexcept*/ = default;
+ 
+ PaintPreviewClient::CreateResult::CreateResult(base::File file,
+                                                base::File::Error error)
+diff --git a/src/3rdparty/chromium/components/update_client/update_query_params.cc b/src/3rdparty/chromium/components/update_client/update_query_params.cc
+index 56aea40c92a..fea2792228c 100644
+--- a/src/3rdparty/chromium/components/update_client/update_query_params.cc
++++ b/src/3rdparty/chromium/components/update_client/update_query_params.cc
+@@ -58,6 +58,8 @@ const char kArch[] =
+     "mipsel";
+ #elif defined(__powerpc64__)
+     "ppc64";
++#elif defined(__loongarch64)
++    "la64";
+ #else
+ #error "unknown arch"
+ #endif
+@@ -128,6 +130,8 @@ const char* UpdateQueryParams::GetNaclArch() {
+   return "mips64";
+ #elif defined(ARCH_CPU_PPC64)
+   return "ppc64";
++#elif defined(ARCH_CPU_LA64)
++  return "la64";
+ #else
+ // NOTE: when adding new values here, please remember to update the
+ // comment in the .h file about possible return values from this function.
+diff --git a/src/3rdparty/chromium/extensions/common/api/runtime.json b/src/3rdparty/chromium/extensions/common/api/runtime.json
+index 5b009eabdd2..bd5d133354d 100644
+--- a/src/3rdparty/chromium/extensions/common/api/runtime.json
++++ b/src/3rdparty/chromium/extensions/common/api/runtime.json
+@@ -73,7 +73,7 @@
+       {
+         "id": "PlatformArch",
+         "type": "string",
+-        "enum": ["arm", "arm64", "x86-32", "x86-64", "mips", "mips64"],
++        "enum": ["arm", "arm64", "x86-32", "x86-64", "mips", "mips64", "la64"],
+         "description": "The machine's processor architecture."
+       },
+       {
+diff --git a/src/3rdparty/chromium/media/base/media_serializers.h b/src/3rdparty/chromium/media/base/media_serializers.h
+index 6333c44170f..0e3f223afda 100644
+--- a/src/3rdparty/chromium/media/base/media_serializers.h
++++ b/src/3rdparty/chromium/media/base/media_serializers.h
+@@ -377,7 +377,7 @@ template <>
+ struct MediaSerializer<base::Location> {
+   static base::Value Serialize(const base::Location& value) {
+     base::Value result(base::Value::Type::DICTIONARY);
+-    FIELD_SERIALIZE("file", value.file_name());
++    FIELD_SERIALIZE("file", value.file_name() ? value.file_name() : "unknown");
+     FIELD_SERIALIZE("line", value.line_number());
+     return result;
+   }
+diff --git a/src/3rdparty/chromium/media/media_options.gni b/src/3rdparty/chromium/media/media_options.gni
+index 011bd47ca2c..2ba3899097c 100644
+--- a/src/3rdparty/chromium/media/media_options.gni
++++ b/src/3rdparty/chromium/media/media_options.gni
+@@ -93,7 +93,7 @@ declare_args() {
+   # are combined and we could override more logging than expected.
+   enable_logging_override = !use_jumbo_build && is_chromecast
+ 
+-  enable_dav1d_decoder = !is_android && !is_ios
++  enable_dav1d_decoder = !is_android && !is_ios && target_cpu != "la64"
+ 
+   # Enable browser managed persistent metadata storage for EME persistent
+   # session and persistent usage record session.
+diff --git a/src/3rdparty/chromium/sandbox/features.gni b/src/3rdparty/chromium/sandbox/features.gni
+index 09280d35f6a..d87ee96e182 100644
+--- a/src/3rdparty/chromium/sandbox/features.gni
++++ b/src/3rdparty/chromium/sandbox/features.gni
+@@ -11,6 +11,7 @@ import("//build/config/nacl/config.gni")
+ use_seccomp_bpf = (is_linux || is_android) &&
+                   (current_cpu == "x86" || current_cpu == "x64" ||
+                    current_cpu == "arm" || current_cpu == "arm64" ||
+-                   current_cpu == "mipsel" || current_cpu == "mips64el")
++                   current_cpu == "mipsel" || current_cpu == "mips64el" ||
++                   current_cpu == "la64")
+ 
+ use_seccomp_bpf = use_seccomp_bpf || is_nacl_nonsfi
+diff --git a/src/3rdparty/chromium/sandbox/linux/BUILD.gn b/src/3rdparty/chromium/sandbox/linux/BUILD.gn
+index c27351f9a6a..ab45fc38193 100644
+--- a/src/3rdparty/chromium/sandbox/linux/BUILD.gn
++++ b/src/3rdparty/chromium/sandbox/linux/BUILD.gn
+@@ -422,6 +422,7 @@ source_set("sandbox_services_headers") {
+     "system_headers/mips_linux_syscalls.h",
+     "system_headers/x86_32_linux_syscalls.h",
+     "system_headers/x86_64_linux_syscalls.h",
++    "system_headers/la64_linux_syscalls.h",
+   ]
+ }
+ 
+diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h
+index 313511f22e9..062825902f8 100644
+--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h
++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h
+@@ -42,7 +42,14 @@
+ #define MAX_PUBLIC_SYSCALL  (MIN_SYSCALL + __NR_Linux_syscalls)
+ #define MAX_SYSCALL         MAX_PUBLIC_SYSCALL
+ 
+-#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)
++#elif defined(ARCH_CPU_LA64)
++
++#include <asm-generic/unistd.h>
++#define MIN_SYSCALL         0u
++#define MAX_PUBLIC_SYSCALL  __NR_syscalls
++#define MAX_SYSCALL         MAX_PUBLIC_SYSCALL
++
++#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
+ 
+ #include <asm/unistd.h>  // for __NR_64_Linux and __NR_64_Linux_syscalls
+ #define MIN_SYSCALL         __NR_64_Linux
+diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc
+index 347304889ea..ef8500ff46a 100644
+--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc
++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc
+@@ -141,6 +141,10 @@ CodeGen::Node PolicyCompiler::AssemblePolicy() {
+ }
+ 
+ CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) {
++// TODO:LA64: Kernel doesn't support AUDIT?
++#ifdef ARCH_CPU_LA64
++  return passed;
++#endif	
+   // If the architecture doesn't match SECCOMP_ARCH, disallow the
+   // system call.
+   return gen_.MakeInstruction(
+diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h
+index 1a407b95237..f500fab43bf 100644
+--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h
++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h
+@@ -346,6 +346,47 @@ struct regs_struct {
+ #define SECCOMP_PT_PARM4(_regs) (_regs).regs[3]
+ #define SECCOMP_PT_PARM5(_regs) (_regs).regs[4]
+ #define SECCOMP_PT_PARM6(_regs) (_regs).regs[5]
++
++#elif defined(ARCH_CPU_LA64)
++struct regs_struct {
++  uint64_t gpr[32];
++  uint64_t pc;
++};
++
++#define SECCOMP_ARCH AUDIT_ARCH_LOONGARCH64
++
++#define SECCOMP_REG(_ctx, _reg) ((_ctx)->uc_mcontext.__gregs[_reg])
++
++#define SECCOMP_RESULT(_ctx) SECCOMP_REG(_ctx, 4)
++#define SECCOMP_SYSCALL(_ctx) SECCOMP_REG(_ctx, 11)
++#define SECCOMP_IP(_ctx) (_ctx)->uc_mcontext.__pc
++#define SECCOMP_PARM1(_ctx) SECCOMP_REG(_ctx, 4)
++#define SECCOMP_PARM2(_ctx) SECCOMP_REG(_ctx, 5)
++#define SECCOMP_PARM3(_ctx) SECCOMP_REG(_ctx, 6)
++#define SECCOMP_PARM4(_ctx) SECCOMP_REG(_ctx, 7)
++#define SECCOMP_PARM5(_ctx) SECCOMP_REG(_ctx, 8)
++#define SECCOMP_PARM6(_ctx) SECCOMP_REG(_ctx, 9)
++
++#define SECCOMP_NR_IDX (offsetof(struct arch_seccomp_data, nr))
++#define SECCOMP_ARCH_IDX (offsetof(struct arch_seccomp_data, arch))
++#define SECCOMP_IP_MSB_IDX \
++  (offsetof(struct arch_seccomp_data, instruction_pointer) + 4)
++#define SECCOMP_IP_LSB_IDX \
++  (offsetof(struct arch_seccomp_data, instruction_pointer) + 0)
++#define SECCOMP_ARG_MSB_IDX(nr) \
++  (offsetof(struct arch_seccomp_data, args) + 8 * (nr) + 4)
++#define SECCOMP_ARG_LSB_IDX(nr) \
++  (offsetof(struct arch_seccomp_data, args) + 8 * (nr) + 0)
++
++#define SECCOMP_PT_RESULT(_regs) (_regs).regs[4]
++#define SECCOMP_PT_SYSCALL(_regs) (_regs).regs[11]
++#define SECCOMP_PT_IP(_regs) (_regs).pc
++#define SECCOMP_PT_PARM1(_regs) (_regs).regs[4]
++#define SECCOMP_PT_PARM2(_regs) (_regs).regs[5]
++#define SECCOMP_PT_PARM3(_regs) (_regs).regs[6]
++#define SECCOMP_PT_PARM4(_regs) (_regs).regs[7]
++#define SECCOMP_PT_PARM5(_regs) (_regs).regs[8]
++#define SECCOMP_PT_PARM6(_regs) (_regs).regs[9]
+ #else
+ #error Unsupported target platform
+ 
+diff --git a/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc b/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc
+index d30e15560a4..dc18b67944e 100644
+--- a/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc
++++ b/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc
+@@ -1915,7 +1915,7 @@ BPF_TEST_C(SandboxBPF, PthreadBitMask, PthreadPolicyBitMask) {
+ //
+ // Depending on the architecture, this may modify regs, so the caller is
+ // responsible for committing these changes using PTRACE_SETREGS.
+-#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__)
++#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__loongarch__)
+ long SetSyscall(pid_t pid, regs_struct* regs, int syscall_number) {
+ #if defined(__arm__)
+   // On ARM, the syscall is changed using PTRACE_SET_SYSCALL.  We cannot use the
+@@ -1952,7 +1952,7 @@ SANDBOX_TEST(SandboxBPF, DISABLE_ON_TSAN(SeccompRetTrace)) {
+ 
+ // This test is disabled on arm due to a kernel bug.
+ // See https://code.google.com/p/chromium/issues/detail?id=383977
+-#if defined(__arm__) || defined(__aarch64__)
++#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch__)
+   printf("This test is currently disabled on ARM32/64 due to a kernel bug.");
+ #elif defined(__mips__)
+   // TODO: Figure out how to support specificity of handling indirect syscalls
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc
+index 712f9699a94..615b8fa08e2 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc
+@@ -165,7 +165,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno,
+     return RestrictFcntlCommands();
+ #endif
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+   // fork() is never used as a system call (clone() is used instead), but we
+   // have seen it in fallback code on Android.
+   if (sysno == __NR_fork) {
+@@ -210,7 +210,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno,
+   }
+ 
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch64)
+   if (sysno == __NR_mmap)
+     return RestrictMmapFlags();
+ #endif
+@@ -228,7 +228,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno,
+     return RestrictPrctl();
+ 
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch64)
+   if (sysno == __NR_socketpair) {
+     // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
+     static_assert(AF_UNIX == PF_UNIX,
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc
+index fc36187c945..fb35d5a3ac6 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc
+@@ -215,7 +215,7 @@ BPF_TEST_C(BaselinePolicy, GetRandom, BaselinePolicy) {
+ }
+ 
+ // Not all architectures can restrict the domain for socketpair().
+-#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
++#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
+ BPF_DEATH_TEST_C(BaselinePolicy,
+                  SocketpairWrongDomain,
+                  DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()),
+@@ -224,7 +224,7 @@ BPF_DEATH_TEST_C(BaselinePolicy,
+   ignore_result(socketpair(AF_INET, SOCK_STREAM, 0, sv));
+   _exit(1);
+ }
+-#endif  // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
++#endif  // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
+ 
+ BPF_TEST_C(BaselinePolicy, EPERM_open, BaselinePolicy) {
+   errno = 0;
+@@ -288,7 +288,7 @@ TEST_BASELINE_SIGSYS(__NR_sysinfo)
+ TEST_BASELINE_SIGSYS(__NR_syslog)
+ TEST_BASELINE_SIGSYS(__NR_timer_create)
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+ TEST_BASELINE_SIGSYS(__NR_eventfd)
+ TEST_BASELINE_SIGSYS(__NR_inotify_init)
+ TEST_BASELINE_SIGSYS(__NR_vserver)
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc
+index 5e0131ac4bc..8337b33ac88 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc
+@@ -36,7 +36,7 @@
+ #include <sys/ioctl.h>
+ #include <sys/ptrace.h>
+ #if defined(OS_LINUX) && !defined(OS_CHROMEOS) && !defined(__arm__) && \
+-    !defined(__aarch64__) && !defined(PTRACE_GET_THREAD_AREA)
++    !defined(__aarch64__) && !defined(__loongarch__) && !defined(PTRACE_GET_THREAD_AREA)
+ // Also include asm/ptrace-abi.h since ptrace.h in older libc (for instance
+ // the one in Ubuntu 16.04 LTS) is missing PTRACE_GET_THREAD_AREA.
+ // asm/ptrace-abi.h doesn't exist on arm32 and PTRACE_GET_THREAD_AREA isn't
+@@ -418,7 +418,7 @@ ResultExpr RestrictPrlimitToGetrlimit(pid_t target_pid) {
+ ResultExpr RestrictPtrace() {
+   const Arg<int> request(0);
+   return Switch(request).CASES((
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+         PTRACE_GETREGS,
+         PTRACE_GETFPREGS,
+ #if defined(TRACE_GET_THREAD_AREA)
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc
+index d9d18822f67..1f5e39b91a7 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc
+@@ -80,7 +80,7 @@ bool SyscallSets::IsUmask(int sysno) {
+ // Both EPERM and ENOENT are valid errno unless otherwise noted in comment.
+ bool SyscallSets::IsFileSystem(int sysno) {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_access:  // EPERM not a valid errno.
+     case __NR_chmod:
+     case __NR_chown:
+@@ -106,13 +106,13 @@ bool SyscallSets::IsFileSystem(int sysno) {
+ #endif
+     case __NR_ustat:   // Same as above. Deprecated.
+     case __NR_utimes:
+-#endif  // !defined(__aarch64__)
++#endif  // !defined(__aarch64__) || defined(__loongarch64)
+ 
+     case __NR_execve:
+     case __NR_faccessat:  // EPERM not a valid errno.
+     case __NR_fchmodat:
+     case __NR_fchownat:  // Should be called chownat ?
+-#if defined(__x86_64__) || defined(__aarch64__)
++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64)
+     case __NR_newfstatat:  // fstatat(). EPERM not a valid errno.
+ #elif defined(__i386__) || defined(__arm__) || \
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS))
+@@ -195,7 +195,7 @@ bool SyscallSets::IsAllowedFileSystemAccessViaFd(int sysno) {
+     case __NR_oldfstat:
+ #endif
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined (__loongarch64)
+     case __NR_sync_file_range:  // EPERM not a valid errno.
+ #elif defined(__arm__)
+     case __NR_arm_sync_file_range:  // EPERM not a valid errno.
+@@ -219,7 +219,7 @@ bool SyscallSets::IsDeniedFileSystemAccessViaFd(int sysno) {
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS))
+     case __NR_ftruncate64:
+ #endif
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_getdents:    // EPERM not a valid errno.
+ #endif
+     case __NR_getdents64:  // EPERM not a valid errno.
+@@ -298,7 +298,7 @@ bool SyscallSets::IsProcessPrivilegeChange(int sysno) {
+ bool SyscallSets::IsProcessGroupOrSession(int sysno) {
+   switch (sysno) {
+     case __NR_setpgid:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_getpgrp:
+ #endif
+     case __NR_setsid:
+@@ -327,7 +327,7 @@ bool SyscallSets::IsAllowedSignalHandling(int sysno) {
+     case __NR_rt_sigsuspend:
+     case __NR_rt_tgsigqueueinfo:
+     case __NR_sigaltstack:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_signalfd:
+ #endif
+     case __NR_signalfd4:
+@@ -351,12 +351,12 @@ bool SyscallSets::IsAllowedOperationOnFd(int sysno) {
+   switch (sysno) {
+     case __NR_close:
+     case __NR_dup:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_dup2:
+ #endif
+     case __NR_dup3:
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_shutdown:
+ #endif
+       return true;
+@@ -395,7 +395,7 @@ bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) {
+       return true;
+     case __NR_clone:  // Should be parameter-restricted.
+     case __NR_setns:  // Privileged.
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_fork:
+ #endif
+ #if defined(__i386__) || defined(__x86_64__)
+@@ -406,7 +406,7 @@ bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) {
+ #endif
+     case __NR_set_tid_address:
+     case __NR_unshare:
+-#if !defined(__mips__) && !defined(__aarch64__)
++#if !defined(__mips__) && !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_vfork:
+ #endif
+     default:
+@@ -427,7 +427,7 @@ bool SyscallSets::IsAllowedFutex(int sysno) {
+ 
+ bool SyscallSets::IsAllowedEpoll(int sysno) {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_epoll_create:
+     case __NR_epoll_wait:
+ #endif
+@@ -448,14 +448,14 @@ bool SyscallSets::IsAllowedEpoll(int sysno) {
+ 
+ bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_pipe:
+ #endif
+     case __NR_pipe2:
+       return true;
+     default:
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_socketpair:  // We will want to inspect its argument.
+ #endif
+       return false;
+@@ -465,7 +465,7 @@ bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) {
+ bool SyscallSets::IsDeniedGetOrModifySocket(int sysno) {
+   switch (sysno) {
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_accept:
+     case __NR_accept4:
+     case __NR_bind:
+@@ -519,7 +519,7 @@ bool SyscallSets::IsAllowedAddressSpaceAccess(int sysno) {
+     case __NR_mincore:
+     case __NR_mlockall:
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_mmap:
+ #endif
+ #if defined(__i386__) || defined(__arm__) || \
+@@ -552,7 +552,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) {
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS))
+     case __NR__llseek:
+ #endif
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_poll:
+ #endif
+     case __NR_ppoll:
+@@ -565,7 +565,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) {
+     case __NR_recv:
+ #endif
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_recvfrom:  // Could specify source.
+     case __NR_recvmsg:   // Could specify source.
+ #endif
+@@ -580,7 +580,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) {
+     case __NR_send:
+ #endif
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_sendmsg:  // Could specify destination.
+     case __NR_sendto:   // Could specify destination.
+ #endif
+@@ -630,7 +630,7 @@ bool SyscallSets::IsSeccomp(int sysno) {
+ bool SyscallSets::IsAllowedBasicScheduler(int sysno) {
+   switch (sysno) {
+     case __NR_sched_yield:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_pause:
+ #endif
+     case __NR_nanosleep:
+@@ -714,7 +714,7 @@ bool SyscallSets::IsNuma(int sysno) {
+     case __NR_getcpu:
+     case __NR_mbind:
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_migrate_pages:
+ #endif
+     case __NR_move_pages:
+@@ -743,7 +743,7 @@ bool SyscallSets::IsGlobalProcessEnvironment(int sysno) {
+   switch (sysno) {
+     case __NR_acct:  // Privileged.
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_getrlimit:
+ #endif
+ #if defined(__i386__) || defined(__arm__)
+@@ -778,7 +778,7 @@ bool SyscallSets::IsDebug(int sysno) {
+ 
+ bool SyscallSets::IsGlobalSystemStatus(int sysno) {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR__sysctl:
+     case __NR_sysfs:
+ #endif
+@@ -796,7 +796,7 @@ bool SyscallSets::IsGlobalSystemStatus(int sysno) {
+ 
+ bool SyscallSets::IsEventFd(int sysno) {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_eventfd:
+ #endif
+     case __NR_eventfd2:
+@@ -832,7 +832,8 @@ bool SyscallSets::IsKeyManagement(int sysno) {
+ }
+ 
+ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
+-    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
++    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ||    \
++    defined(__loongarch64)
+ bool SyscallSets::IsSystemVSemaphores(int sysno) {
+   switch (sysno) {
+     case __NR_semctl:
+@@ -847,7 +848,7 @@ bool SyscallSets::IsSystemVSemaphores(int sysno) {
+ #endif
+ 
+ #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
+-    defined(__aarch64__) ||                                         \
++    defined(__aarch64__) || defined(__loongarch64) ||  \
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
+ // These give a lot of ambient authority and bypass the setuid sandbox.
+ bool SyscallSets::IsSystemVSharedMemory(int sysno) {
+@@ -864,7 +865,8 @@ bool SyscallSets::IsSystemVSharedMemory(int sysno) {
+ #endif
+ 
+ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
+-    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
++    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))  ||   \
++    defined(__loongarch64)
+ bool SyscallSets::IsSystemVMessageQueue(int sysno) {
+   switch (sysno) {
+     case __NR_msgctl:
+@@ -895,7 +897,8 @@ bool SyscallSets::IsSystemVIpc(int sysno) {
+ 
+ bool SyscallSets::IsAnySystemV(int sysno) {
+ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
+-    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
++    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ||    \
++    defined(__loongarch64)
+   return IsSystemVMessageQueue(sysno) || IsSystemVSemaphores(sysno) ||
+          IsSystemVSharedMemory(sysno);
+ #elif defined(__i386__) || \
+@@ -928,7 +931,7 @@ bool SyscallSets::IsAdvancedScheduler(int sysno) {
+ bool SyscallSets::IsInotify(int sysno) {
+   switch (sysno) {
+     case __NR_inotify_add_watch:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_inotify_init:
+ #endif
+     case __NR_inotify_init1:
+@@ -1043,7 +1046,7 @@ bool SyscallSets::IsMisc(int sysno) {
+ #if defined(__x86_64__)
+     case __NR_tuxcall:
+ #endif
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch64)
+     case __NR_vserver:
+ #endif
+       return true;
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h
+index 923533ec9fd..fbe7c7910b7 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h
+@@ -49,7 +49,7 @@ class SANDBOX_EXPORT SyscallSets {
+ #endif
+ 
+ #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+   static bool IsNetworkSocketInformation(int sysno);
+ #endif
+ 
+@@ -72,18 +72,20 @@ class SANDBOX_EXPORT SyscallSets {
+   static bool IsAsyncIo(int sysno);
+   static bool IsKeyManagement(int sysno);
+ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
+-    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
++    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ||    \
++    defined(__loongarch__)
+   static bool IsSystemVSemaphores(int sysno);
+ #endif
+ #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
+-    defined(__aarch64__) ||                                         \
++    defined(__aarch64__) || defined(__loongarch__) ||               \
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
+   // These give a lot of ambient authority and bypass the setuid sandbox.
+   static bool IsSystemVSharedMemory(int sysno);
+ #endif
+ 
+ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
+-    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS))
++    (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ||    \
++    defined(__loongarch__)
+   static bool IsSystemVMessageQueue(int sysno);
+ #endif
+ 
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc
+index 34edabd2b82..ddc2446ef4a 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc
+@@ -16,7 +16,7 @@ namespace sandbox {
+ namespace {
+ 
+ #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
+-    defined(ARCH_CPU_MIPS_FAMILY)
++    defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+ // Number that's not currently used by any Linux kernel ABIs.
+ const int kInvalidSyscallNumber = 0x351d3;
+ #else
+@@ -310,6 +310,25 @@ asm(// We need to be able to tell the kernel exactly where we made a
+     "2:ret\n"
+     ".cfi_endproc\n"
+     ".size SyscallAsm, .-SyscallAsm\n"
++#elif defined(__loongarch64)
++    ".text\n"
++    ".global SyscallAsm\n"
++    ".type SyscallAsm, %function\n"
++    "SyscallAsm:\n"
++    "bge $a0, $zero, 1f\n"
++    "la $a0, 2f\n"
++    "b 2f\n"
++    "1:ld.d $a5, $a6, 40\n"
++    "ld.d $a4, $a6, 32\n"
++    "ld.d $a3, $a6, 24\n"
++    "ld.d $a2, $a6, 16\n"
++    "ld.d $a1, $a6, 8\n"
++    "move $a7, $a0\n"
++    "ld.d $a0, $a6, 0\n"
++    // Enter the kernel
++    "syscall 0\n"
++    "2:jirl $zero, $ra, 0\n"
++    ".size SyscallAsm, .-SyscallAsm\n"    
+ #endif
+     );  // asm
+ 
+@@ -426,7 +445,22 @@ intptr_t Syscall::Call(int nr,
+                  : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
+     ret = inout;
+   }
+-
++#elif defined(__loongarch64)
++  intptr_t ret;
++  {
++    register intptr_t inout __asm__("$4") = nr;
++    register const intptr_t* data __asm__("$10") = args;
++    // Save and restore $ra.
++    __asm__ volatile("addi.d $sp, $sp, 8\n"
++                     "st.d $ra, $sp, 0\n"
++                     "bl SyscallAsm\n"
++                     "ld.d $ra, $sp, 0\n"
++                     "addi.d $sp, $sp, -8\n"
++                     : "=r"(inout)
++                     : "0"(inout), "r"(data)
++                     : "memory", "$5", "$6", "$7", "$8", "$9", "$11", "$2");
++    ret = inout;
++  }
+ #else
+ #error "Unimplemented architecture"
+ #endif
+diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc
+index 9884be8bb2c..7fa6e188dbe 100644
+--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc
++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc
+@@ -189,7 +189,7 @@ void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) {
+ 
+   // Some more sanity checks.
+   if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) ||
+-      sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) {
++      sigsys_nr_is_bad) { // TODO:LA64 || sigsys.arch != SECCOMP_ARCH) {
+     // TODO(markus):
+     // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
+     // safe and can lead to bugs. We should eventually implement a different
+diff --git a/src/3rdparty/chromium/sandbox/linux/services/credentials.cc b/src/3rdparty/chromium/sandbox/linux/services/credentials.cc
+index d7b5d8c4413..70ace0b0467 100644
+--- a/src/3rdparty/chromium/sandbox/linux/services/credentials.cc
++++ b/src/3rdparty/chromium/sandbox/linux/services/credentials.cc
+@@ -81,7 +81,7 @@ bool ChrootToSafeEmptyDir() {
+   pid_t pid = -1;
+   alignas(16) char stack_buf[PTHREAD_STACK_MIN];
+ #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
+-    defined(ARCH_CPU_MIPS_FAMILY)
++    defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+   // The stack grows downward.
+   void* stack = stack_buf + sizeof(stack_buf);
+ #else
+@@ -90,7 +90,7 @@ bool ChrootToSafeEmptyDir() {
+ 
+   int clone_flags = CLONE_FS | LINUX_SIGCHLD;
+   void* tls = nullptr;
+-#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_ARM_FAMILY)
++#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_LA64)
+   // Use CLONE_VM | CLONE_VFORK as an optimization to avoid copying page tables.
+   // Since clone writes to the new child's TLS before returning, we must set a
+   // new TLS to avoid corrupting the current process's TLS. On ARCH_CPU_X86,
+diff --git a/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc b/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc
+index fcfd2aa129d..bd936b0a374 100644
+--- a/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc
++++ b/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc
+@@ -58,7 +58,7 @@ long sys_clone(unsigned long flags,
+ #if defined(ARCH_CPU_X86_64)
+   return syscall(__NR_clone, flags, child_stack, ptid, ctid, tls);
+ #elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \
+-    defined(ARCH_CPU_MIPS_FAMILY)
++    defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+   // CONFIG_CLONE_BACKWARDS defined.
+   return syscall(__NR_clone, flags, child_stack, ptid, tls, ctid);
+ #endif
+diff --git a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc
+index 8321d23798d..8f3eced1761 100644
+--- a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc
++++ b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc
+@@ -111,53 +111,55 @@ bool BrokerProcess::Init(
+ 
+ bool BrokerProcess::IsSyscallAllowed(int sysno) const {
+   switch (sysno) {
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_access:
+ #endif
+     case __NR_faccessat:
+       return !fast_check_in_client_ ||
+              allowed_command_set_.test(COMMAND_ACCESS);
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_mkdir:
+ #endif
+     case __NR_mkdirat:
+       return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_MKDIR);
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_open:
+ #endif
+     case __NR_openat:
+       return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_OPEN);
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_readlink:
+ #endif
+     case __NR_readlinkat:
+       return !fast_check_in_client_ ||
+              allowed_command_set_.test(COMMAND_READLINK);
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_rename:
+ #endif
++#if !defined(__loongarch__)
+     case __NR_renameat:
++#endif
+     case __NR_renameat2:
+       return !fast_check_in_client_ ||
+              allowed_command_set_.test(COMMAND_RENAME);
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_rmdir:
+       return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_RMDIR);
+ #endif
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_stat:
+     case __NR_lstat:
+ #endif
+ #if defined(__NR_fstatat)
+     case __NR_fstatat:
+ #endif
+-#if defined(__x86_64__) || defined(__aarch64__)
++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64)
+     case __NR_newfstatat:
+ #endif
+       return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_STAT);
+@@ -172,7 +174,7 @@ bool BrokerProcess::IsSyscallAllowed(int sysno) const {
+       return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_STAT);
+ #endif
+ 
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_unlink:
+ #endif
+     case __NR_unlinkat:
+diff --git a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc
+index e1144da6e78..d2a7cab40ca 100644
+--- a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc
++++ b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc
+@@ -1476,7 +1476,10 @@ TEST(BrokerProcess, IsSyscallAllowed) {
+ #if defined(__NR_rename)
+     {__NR_rename, COMMAND_RENAME},
+ #endif
++// TODO:LA
++#if !defined(__loongarch__)    
+     {__NR_renameat, COMMAND_RENAME},
++#endif
+ #if defined(__NR_rmdir)
+     {__NR_rmdir, COMMAND_RMDIR},
+ #endif
+diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h b/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h
+new file mode 100644
+index 00000000000..12159cf61b5
+--- /dev/null
++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h
+@@ -0,0 +1,1120 @@
++// Copyright 2021 The Chromium Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_
++#define SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_
++
++#include <asm-generic/unistd.h>
++
++#if !defined(__NR_io_setup)
++#define __NR_io_setup 0
++#endif
++
++#if !defined(__NR_io_destroy)
++#define __NR_io_destroy 1
++#endif
++
++#if !defined(__NR_io_submit)
++#define __NR_io_submit 2
++#endif
++
++#if !defined(__NR_io_cancel)
++#define __NR_io_cancel 3
++#endif
++
++#if !defined(__NR_io_getevents)
++#define __NR_io_getevents 4
++#endif
++
++#if !defined(__NR_setxattr)
++#define __NR_setxattr 5
++#endif
++
++#if !defined(__NR_lsetxattr)
++#define __NR_lsetxattr 6
++#endif
++
++#if !defined(__NR_fsetxattr)
++#define __NR_fsetxattr 7
++#endif
++
++#if !defined(__NR_getxattr)
++#define __NR_getxattr 8
++#endif
++
++#if !defined(__NR_lgetxattr)
++#define __NR_lgetxattr 9
++#endif
++
++#if !defined(__NR_fgetxattr)
++#define __NR_fgetxattr 10
++#endif
++
++#if !defined(__NR_listxattr)
++#define __NR_listxattr 11
++#endif
++
++#if !defined(__NR_llistxattr)
++#define __NR_llistxattr 12
++#endif
++
++#if !defined(__NR_flistxattr)
++#define __NR_flistxattr 13
++#endif
++
++#if !defined(__NR_removexattr)
++#define __NR_removexattr 14
++#endif
++
++#if !defined(__NR_lremovexattr)
++#define __NR_lremovexattr 15
++#endif
++
++#if !defined(__NR_fremovexattr)
++#define __NR_fremovexattr 16
++#endif
++
++#if !defined(__NR_getcwd)
++#define __NR_getcwd 17
++#endif
++
++#if !defined(__NR_lookup_dcookie)
++#define __NR_lookup_dcookie 18
++#endif
++
++#if !defined(__NR_eventfd2)
++#define __NR_eventfd2 19
++#endif
++
++#if !defined(__NR_epoll_create1)
++#define __NR_epoll_create1 20
++#endif
++
++#if !defined(__NR_epoll_ctl)
++#define __NR_epoll_ctl 21
++#endif
++
++#if !defined(__NR_epoll_pwait)
++#define __NR_epoll_pwait 22
++#endif
++
++#if !defined(__NR_dup)
++#define __NR_dup 23
++#endif
++
++#if !defined(__NR_dup3)
++#define __NR_dup3 24
++#endif
++
++#if !defined(__NR_fcntl)
++#define __NR_fcntl 25
++#endif
++
++#if !defined(__NR_inotify_init1)
++#define __NR_inotify_init1 26
++#endif
++
++#if !defined(__NR_inotify_add_watch)
++#define __NR_inotify_add_watch 27
++#endif
++
++#if !defined(__NR_inotify_rm_watch)
++#define __NR_inotify_rm_watch 28
++#endif
++
++#if !defined(__NR_ioctl)
++#define __NR_ioctl 29
++#endif
++
++#if !defined(__NR_ioprio_set)
++#define __NR_ioprio_set 30
++#endif
++
++#if !defined(__NR_ioprio_get)
++#define __NR_ioprio_get 31
++#endif
++
++#if !defined(__NR_flock)
++#define __NR_flock 32
++#endif
++
++#if !defined(__NR_mknodat)
++#define __NR_mknodat 33
++#endif
++
++#if !defined(__NR_mkdirat)
++#define __NR_mkdirat 34
++#endif
++
++#if !defined(__NR_unlinkat)
++#define __NR_unlinkat 35
++#endif
++
++#if !defined(__NR_symlinkat)
++#define __NR_symlinkat 36
++#endif
++
++#if !defined(__NR_linkat)
++#define __NR_linkat 37
++#endif
++
++#if !defined(__NR_renameat)
++#define __NR_renameat 38
++#endif
++
++#if !defined(__NR_umount2)
++#define __NR_umount2 39
++#endif
++
++#if !defined(__NR_mount)
++#define __NR_mount 40
++#endif
++
++#if !defined(__NR_pivot_root)
++#define __NR_pivot_root 41
++#endif
++
++#if !defined(__NR_nfsservctl)
++#define __NR_nfsservctl 42
++#endif
++
++#if !defined(__NR_statfs)
++#define __NR_statfs 43
++#endif
++
++#if !defined(__NR_fstatfs)
++#define __NR_fstatfs 44
++#endif
++
++#if !defined(__NR_truncate)
++#define __NR_truncate 45
++#endif
++
++#if !defined(__NR_ftruncate)
++#define __NR_ftruncate 46
++#endif
++
++#if !defined(__NR_fallocate)
++#define __NR_fallocate 47
++#endif
++
++#if !defined(__NR_faccessat)
++#define __NR_faccessat 48
++#endif
++
++#if !defined(__NR_chdir)
++#define __NR_chdir 49
++#endif
++
++#if !defined(__NR_fchdir)
++#define __NR_fchdir 50
++#endif
++
++#if !defined(__NR_chroot)
++#define __NR_chroot 51
++#endif
++
++#if !defined(__NR_fchmod)
++#define __NR_fchmod 52
++#endif
++
++#if !defined(__NR_fchmodat)
++#define __NR_fchmodat 53
++#endif
++
++#if !defined(__NR_fchownat)
++#define __NR_fchownat 54
++#endif
++
++#if !defined(__NR_fchown)
++#define __NR_fchown 55
++#endif
++
++#if !defined(__NR_openat)
++#define __NR_openat 56
++#endif
++
++#if !defined(__NR_close)
++#define __NR_close 57
++#endif
++
++#if !defined(__NR_vhangup)
++#define __NR_vhangup 58
++#endif
++
++#if !defined(__NR_pipe2)
++#define __NR_pipe2 59
++#endif
++
++#if !defined(__NR_quotactl)
++#define __NR_quotactl 60
++#endif
++
++#if !defined(__NR_getdents64)
++#define __NR_getdents64 61
++#endif
++
++#if !defined(__NR_lseek)
++#define __NR_lseek 62
++#endif
++
++#if !defined(__NR_read)
++#define __NR_read 63
++#endif
++
++#if !defined(__NR_write)
++#define __NR_write 64
++#endif
++
++#if !defined(__NR_readv)
++#define __NR_readv 65
++#endif
++
++#if !defined(__NR_writev)
++#define __NR_writev 66
++#endif
++
++#if !defined(__NR_pread64)
++#define __NR_pread64 67
++#endif
++
++#if !defined(__NR_pwrite64)
++#define __NR_pwrite64 68
++#endif
++
++#if !defined(__NR_preadv)
++#define __NR_preadv 69
++#endif
++
++#if !defined(__NR_pwritev)
++#define __NR_pwritev 70
++#endif
++
++#if !defined(__NR_sendfile)
++#define __NR_sendfile 71
++#endif
++
++#if !defined(__NR_pselect6)
++#define __NR_pselect6 72
++#endif
++
++#if !defined(__NR_ppoll)
++#define __NR_ppoll 73
++#endif
++
++#if !defined(__NR_signalfd4)
++#define __NR_signalfd4 74
++#endif
++
++#if !defined(__NR_vmsplice)
++#define __NR_vmsplice 75
++#endif
++
++#if !defined(__NR_splice)
++#define __NR_splice 76
++#endif
++
++#if !defined(__NR_tee)
++#define __NR_tee 77
++#endif
++
++#if !defined(__NR_readlinkat)
++#define __NR_readlinkat 78
++#endif
++
++// __NR3264_fstatat is not declared on old system
++#define __NR_newfstatat 79
++
++// __NR3264_fstat is not declared on old system
++#define __NR_fstat 80
++
++#if !defined(__NR_sync)
++#define __NR_sync 81
++#endif
++
++#if !defined(__NR_fsync)
++#define __NR_fsync 82
++#endif
++
++#if !defined(__NR_fdatasync)
++#define __NR_fdatasync 83
++#endif
++
++#if !defined(__NR_sync_file_range)
++#define __NR_sync_file_range 84
++#endif
++
++#if !defined(__NR_timerfd_create)
++#define __NR_timerfd_create 85
++#endif
++
++#if !defined(__NR_timerfd_settime)
++#define __NR_timerfd_settime 86
++#endif
++
++#if !defined(__NR_timerfd_gettime)
++#define __NR_timerfd_gettime 87
++#endif
++
++#if !defined(__NR_utimensat)
++#define __NR_utimensat 88
++#endif
++
++#if !defined(__NR_acct)
++#define __NR_acct 89
++#endif
++
++#if !defined(__NR_capget)
++#define __NR_capget 90
++#endif
++
++#if !defined(__NR_capset)
++#define __NR_capset 91
++#endif
++
++#if !defined(__NR_personality)
++#define __NR_personality 92
++#endif
++
++#if !defined(__NR_exit)
++#define __NR_exit 93
++#endif
++
++#if !defined(__NR_exit_group)
++#define __NR_exit_group 94
++#endif
++
++#if !defined(__NR_waitid)
++#define __NR_waitid 95
++#endif
++
++#if !defined(__NR_set_tid_address)
++#define __NR_set_tid_address 96
++#endif
++
++#if !defined(__NR_unshare)
++#define __NR_unshare 97
++#endif
++
++#if !defined(__NR_futex)
++#define __NR_futex 98
++#endif
++
++#if !defined(__NR_set_robust_list)
++#define __NR_set_robust_list 99
++#endif
++
++#if !defined(__NR_get_robust_list)
++#define __NR_get_robust_list 100
++#endif
++
++#if !defined(__NR_nanosleep)
++#define __NR_nanosleep 101
++#endif
++
++#if !defined(__NR_getitimer)
++#define __NR_getitimer 102
++#endif
++
++#if !defined(__NR_setitimer)
++#define __NR_setitimer 103
++#endif
++
++#if !defined(__NR_kexec_load)
++#define __NR_kexec_load 104
++#endif
++
++#if !defined(__NR_init_module)
++#define __NR_init_module 105
++#endif
++
++#if !defined(__NR_delete_module)
++#define __NR_delete_module 106
++#endif
++
++#if !defined(__NR_timer_create)
++#define __NR_timer_create 107
++#endif
++
++#if !defined(__NR_timer_gettime)
++#define __NR_timer_gettime 108
++#endif
++
++#if !defined(__NR_timer_getoverrun)
++#define __NR_timer_getoverrun 109
++#endif
++
++#if !defined(__NR_timer_settime)
++#define __NR_timer_settime 110
++#endif
++
++#if !defined(__NR_timer_delete)
++#define __NR_timer_delete 111
++#endif
++
++#if !defined(__NR_clock_settime)
++#define __NR_clock_settime 112
++#endif
++
++#if !defined(__NR_clock_gettime)
++#define __NR_clock_gettime 113
++#endif
++
++#if !defined(__NR_clock_getres)
++#define __NR_clock_getres 114
++#endif
++
++#if !defined(__NR_clock_nanosleep)
++#define __NR_clock_nanosleep 115
++#endif
++
++#if !defined(__NR_syslog)
++#define __NR_syslog 116
++#endif
++
++#if !defined(__NR_ptrace)
++#define __NR_ptrace 117
++#endif
++
++#if !defined(__NR_sched_setparam)
++#define __NR_sched_setparam 118
++#endif
++
++#if !defined(__NR_sched_setscheduler)
++#define __NR_sched_setscheduler 119
++#endif
++
++#if !defined(__NR_sched_getscheduler)
++#define __NR_sched_getscheduler 120
++#endif
++
++#if !defined(__NR_sched_getparam)
++#define __NR_sched_getparam 121
++#endif
++
++#if !defined(__NR_sched_setaffinity)
++#define __NR_sched_setaffinity 122
++#endif
++
++#if !defined(__NR_sched_getaffinity)
++#define __NR_sched_getaffinity 123
++#endif
++
++#if !defined(__NR_sched_yield)
++#define __NR_sched_yield 124
++#endif
++
++#if !defined(__NR_sched_get_priority_max)
++#define __NR_sched_get_priority_max 125
++#endif
++
++#if !defined(__NR_sched_get_priority_min)
++#define __NR_sched_get_priority_min 126
++#endif
++
++#if !defined(__NR_sched_rr_get_interval)
++#define __NR_sched_rr_get_interval 127
++#endif
++
++#if !defined(__NR_restart_syscall)
++#define __NR_restart_syscall 128
++#endif
++
++#if !defined(__NR_kill)
++#define __NR_kill 129
++#endif
++
++#if !defined(__NR_tkill)
++#define __NR_tkill 130
++#endif
++
++#if !defined(__NR_tgkill)
++#define __NR_tgkill 131
++#endif
++
++#if !defined(__NR_sigaltstack)
++#define __NR_sigaltstack 132
++#endif
++
++#if !defined(__NR_rt_sigsuspend)
++#define __NR_rt_sigsuspend 133
++#endif
++
++#if !defined(__NR_rt_sigaction)
++#define __NR_rt_sigaction 134
++#endif
++
++#if !defined(__NR_rt_sigprocmask)
++#define __NR_rt_sigprocmask 135
++#endif
++
++#if !defined(__NR_rt_sigpending)
++#define __NR_rt_sigpending 136
++#endif
++
++#if !defined(__NR_rt_sigtimedwait)
++#define __NR_rt_sigtimedwait 137
++#endif
++
++#if !defined(__NR_rt_sigqueueinfo)
++#define __NR_rt_sigqueueinfo 138
++#endif
++
++#if !defined(__NR_rt_sigreturn)
++#define __NR_rt_sigreturn 139
++#endif
++
++#if !defined(__NR_setpriority)
++#define __NR_setpriority 140
++#endif
++
++#if !defined(__NR_getpriority)
++#define __NR_getpriority 141
++#endif
++
++#if !defined(__NR_reboot)
++#define __NR_reboot 142
++#endif
++
++#if !defined(__NR_setregid)
++#define __NR_setregid 143
++#endif
++
++#if !defined(__NR_setgid)
++#define __NR_setgid 144
++#endif
++
++#if !defined(__NR_setreuid)
++#define __NR_setreuid 145
++#endif
++
++#if !defined(__NR_setuid)
++#define __NR_setuid 146
++#endif
++
++#if !defined(__NR_setresuid)
++#define __NR_setresuid 147
++#endif
++
++#if !defined(__NR_getresuid)
++#define __NR_getresuid 148
++#endif
++
++#if !defined(__NR_setresgid)
++#define __NR_setresgid 149
++#endif
++
++#if !defined(__NR_getresgid)
++#define __NR_getresgid 150
++#endif
++
++#if !defined(__NR_setfsuid)
++#define __NR_setfsuid 151
++#endif
++
++#if !defined(__NR_setfsgid)
++#define __NR_setfsgid 152
++#endif
++
++#if !defined(__NR_times)
++#define __NR_times 153
++#endif
++
++#if !defined(__NR_setpgid)
++#define __NR_setpgid 154
++#endif
++
++#if !defined(__NR_getpgid)
++#define __NR_getpgid 155
++#endif
++
++#if !defined(__NR_getsid)
++#define __NR_getsid 156
++#endif
++
++#if !defined(__NR_setsid)
++#define __NR_setsid 157
++#endif
++
++#if !defined(__NR_getgroups)
++#define __NR_getgroups 158
++#endif
++
++#if !defined(__NR_setgroups)
++#define __NR_setgroups 159
++#endif
++
++#if !defined(__NR_uname)
++#define __NR_uname 160
++#endif
++
++#if !defined(__NR_sethostname)
++#define __NR_sethostname 161
++#endif
++
++#if !defined(__NR_setdomainname)
++#define __NR_setdomainname 162
++#endif
++
++#if !defined(__NR_getrlimit)
++#define __NR_getrlimit 163
++#endif
++
++#if !defined(__NR_setrlimit)
++#define __NR_setrlimit 164
++#endif
++
++#if !defined(__NR_getrusage)
++#define __NR_getrusage 165
++#endif
++
++#if !defined(__NR_umask)
++#define __NR_umask 166
++#endif
++
++#if !defined(__NR_prctl)
++#define __NR_prctl 167
++#endif
++
++#if !defined(__NR_getcpu)
++#define __NR_getcpu 168
++#endif
++
++#if !defined(__NR_gettimeofday)
++#define __NR_gettimeofday 169
++#endif
++
++#if !defined(__NR_settimeofday)
++#define __NR_settimeofday 170
++#endif
++
++#if !defined(__NR_adjtimex)
++#define __NR_adjtimex 171
++#endif
++
++#if !defined(__NR_getpid)
++#define __NR_getpid 172
++#endif
++
++#if !defined(__NR_getppid)
++#define __NR_getppid 173
++#endif
++
++#if !defined(__NR_getuid)
++#define __NR_getuid 174
++#endif
++
++#if !defined(__NR_geteuid)
++#define __NR_geteuid 175
++#endif
++
++#if !defined(__NR_getgid)
++#define __NR_getgid 176
++#endif
++
++#if !defined(__NR_getegid)
++#define __NR_getegid 177
++#endif
++
++#if !defined(__NR_gettid)
++#define __NR_gettid 178
++#endif
++
++#if !defined(__NR_sysinfo)
++#define __NR_sysinfo 179
++#endif
++
++#if !defined(__NR_mq_open)
++#define __NR_mq_open 180
++#endif
++
++#if !defined(__NR_mq_unlink)
++#define __NR_mq_unlink 181
++#endif
++
++#if !defined(__NR_mq_timedsend)
++#define __NR_mq_timedsend 182
++#endif
++
++#if !defined(__NR_mq_timedreceive)
++#define __NR_mq_timedreceive 183
++#endif
++
++#if !defined(__NR_mq_notify)
++#define __NR_mq_notify 184
++#endif
++
++#if !defined(__NR_mq_getsetattr)
++#define __NR_mq_getsetattr 185
++#endif
++
++#if !defined(__NR_msgget)
++#define __NR_msgget 186
++#endif
++
++#if !defined(__NR_msgctl)
++#define __NR_msgctl 187
++#endif
++
++#if !defined(__NR_msgrcv)
++#define __NR_msgrcv 188
++#endif
++
++#if !defined(__NR_msgsnd)
++#define __NR_msgsnd 189
++#endif
++
++#if !defined(__NR_semget)
++#define __NR_semget 190
++#endif
++
++#if !defined(__NR_semctl)
++#define __NR_semctl 191
++#endif
++
++#if !defined(__NR_semtimedop)
++#define __NR_semtimedop 192
++#endif
++
++#if !defined(__NR_semop)
++#define __NR_semop 193
++#endif
++
++#if !defined(__NR_shmget)
++#define __NR_shmget 194
++#endif
++
++#if !defined(__NR_shmctl)
++#define __NR_shmctl 195
++#endif
++
++#if !defined(__NR_shmat)
++#define __NR_shmat 196
++#endif
++
++#if !defined(__NR_shmdt)
++#define __NR_shmdt 197
++#endif
++
++#if !defined(__NR_socket)
++#define __NR_socket 198
++#endif
++
++#if !defined(__NR_socketpair)
++#define __NR_socketpair 199
++#endif
++
++#if !defined(__NR_bind)
++#define __NR_bind 200
++#endif
++
++#if !defined(__NR_listen)
++#define __NR_listen 201
++#endif
++
++#if !defined(__NR_accept)
++#define __NR_accept 202
++#endif
++
++#if !defined(__NR_connect)
++#define __NR_connect 203
++#endif
++
++#if !defined(__NR_getsockname)
++#define __NR_getsockname 204
++#endif
++
++#if !defined(__NR_getpeername)
++#define __NR_getpeername 205
++#endif
++
++#if !defined(__NR_sendto)
++#define __NR_sendto 206
++#endif
++
++#if !defined(__NR_recvfrom)
++#define __NR_recvfrom 207
++#endif
++
++#if !defined(__NR_setsockopt)
++#define __NR_setsockopt 208
++#endif
++
++#if !defined(__NR_getsockopt)
++#define __NR_getsockopt 209
++#endif
++
++#if !defined(__NR_shutdown)
++#define __NR_shutdown 210
++#endif
++
++#if !defined(__NR_sendmsg)
++#define __NR_sendmsg 211
++#endif
++
++#if !defined(__NR_recvmsg)
++#define __NR_recvmsg 212
++#endif
++
++#if !defined(__NR_readahead)
++#define __NR_readahead 213
++#endif
++
++#if !defined(__NR_brk)
++#define __NR_brk 214
++#endif
++
++#if !defined(__NR_munmap)
++#define __NR_munmap 215
++#endif
++
++#if !defined(__NR_mremap)
++#define __NR_mremap 216
++#endif
++
++#if !defined(__NR_add_key)
++#define __NR_add_key 217
++#endif
++
++#if !defined(__NR_request_key)
++#define __NR_request_key 218
++#endif
++
++#if !defined(__NR_keyctl)
++#define __NR_keyctl 219
++#endif
++
++#if !defined(__NR_clone)
++#define __NR_clone 220
++#endif
++
++#if !defined(__NR_execve)
++#define __NR_execve 221
++#endif
++
++#if !defined(__NR_mmap)
++#define __NR_mmap 222
++#endif
++
++#if !defined(__NR_fadvise64)
++#define __NR_fadvise64 223
++#endif
++
++#if !defined(__NR_swapon)
++#define __NR_swapon 224
++#endif
++
++#if !defined(__NR_swapoff)
++#define __NR_swapoff 225
++#endif
++
++#if !defined(__NR_mprotect)
++#define __NR_mprotect 226
++#endif
++
++#if !defined(__NR_msync)
++#define __NR_msync 227
++#endif
++
++#if !defined(__NR_mlock)
++#define __NR_mlock 228
++#endif
++
++#if !defined(__NR_munlock)
++#define __NR_munlock 229
++#endif
++
++#if !defined(__NR_mlockall)
++#define __NR_mlockall 230
++#endif
++
++#if !defined(__NR_munlockall)
++#define __NR_munlockall 231
++#endif
++
++#if !defined(__NR_mincore)
++#define __NR_mincore 232
++#endif
++
++#if !defined(__NR_madvise)
++#define __NR_madvise 233
++#endif
++
++#if !defined(__NR_remap_file_pages)
++#define __NR_remap_file_pages 234
++#endif
++
++#if !defined(__NR_mbind)
++#define __NR_mbind 235
++#endif
++
++#if !defined(__NR_get_mempolicy)
++#define __NR_get_mempolicy 236
++#endif
++
++#if !defined(__NR_set_mempolicy)
++#define __NR_set_mempolicy 237
++#endif
++
++#if !defined(__NR_migrate_pages)
++#define __NR_migrate_pages 238
++#endif
++
++#if !defined(__NR_move_pages)
++#define __NR_move_pages 239
++#endif
++
++#if !defined(__NR_rt_tgsigqueueinfo)
++#define __NR_rt_tgsigqueueinfo 240
++#endif
++
++#if !defined(__NR_perf_event_open)
++#define __NR_perf_event_open 241
++#endif
++
++#if !defined(__NR_accept4)
++#define __NR_accept4 242
++#endif
++
++#if !defined(__NR_recvmmsg)
++#define __NR_recvmmsg 243
++#endif
++
++#if !defined(__NR_wait4)
++#define __NR_wait4 260
++#endif
++
++#if !defined(__NR_prlimit64)
++#define __NR_prlimit64 261
++#endif
++
++#if !defined(__NR_fanotify_init)
++#define __NR_fanotify_init 262
++#endif
++
++#if !defined(__NR_fanotify_mark)
++#define __NR_fanotify_mark 263
++#endif
++
++#if !defined(__NR_name_to_handle_at)
++#define __NR_name_to_handle_at 264
++#endif
++
++#if !defined(__NR_open_by_handle_at)
++#define __NR_open_by_handle_at 265
++#endif
++
++#if !defined(__NR_clock_adjtime)
++#define __NR_clock_adjtime 266
++#endif
++
++#if !defined(__NR_syncfs)
++#define __NR_syncfs 267
++#endif
++
++#if !defined(__NR_setns)
++#define __NR_setns 268
++#endif
++
++#if !defined(__NR_sendmmsg)
++#define __NR_sendmmsg 269
++#endif
++
++#if !defined(__NR_process_vm_readv)
++#define __NR_process_vm_readv 270
++#endif
++
++#if !defined(__NR_process_vm_writev)
++#define __NR_process_vm_writev 271
++#endif
++
++#if !defined(__NR_kcmp)
++#define __NR_kcmp 272
++#endif
++
++#if !defined(__NR_finit_module)
++#define __NR_finit_module 273
++#endif
++
++#if !defined(__NR_sched_setattr)
++#define __NR_sched_setattr 274
++#endif
++
++#if !defined(__NR_sched_getattr)
++#define __NR_sched_getattr 275
++#endif
++
++#if !defined(__NR_renameat2)
++#define __NR_renameat2 276
++#endif
++
++#if !defined(__NR_seccomp)
++#define __NR_seccomp 277
++#endif
++
++#if !defined(__NR_getrandom)
++#define __NR_getrandom 278
++#endif
++
++#if !defined(__NR_memfd_create)
++#define __NR_memfd_create 279
++#endif
++
++#if !defined(__NR_bfp)
++#define __NR_bfp 280
++#endif
++
++#if !defined(__NR_execveat)
++#define __NR_execveat 281
++#endif
++
++#if !defined(__NR_userfaultfd)
++#define __NR_userfaultfd 282
++#endif
++
++#if !defined(__NR_membarrier)
++#define __NR_membarrier 283
++#endif
++
++#if !defined(__NR_memlock2)
++#define __NR_memlock2 284
++#endif
++
++#if !defined(__NR_copy_file_range)
++#define __NR_copy_file_range 285
++#endif
++
++#if !defined(__NR_preadv2)
++#define __NR_preadv2 286
++#endif
++
++#if !defined(__NR_pwritev2)
++#define __NR_pwritev2 287
++#endif
++
++#if !defined(__NR_pkey_mprotect)
++#define __NR_pkey_mprotect 288
++#endif
++
++#if !defined(__NR_pkey_alloc)
++#define __NR_pkey_alloc 289
++#endif
++
++#if !defined(__NR_pkey_free)
++#define __NR_pkey_free 290
++#endif
++
++#if !defined(__NR_statx)
++#define __NR_statx 291
++#endif
++
++#if !defined(__NR_io_pgetevents)
++#define __NR_io_pgetevents 292
++#endif
++
++#if !defined(__NR_rseq)
++#define __NR_rseq 293
++#endif
++
++#endif  // SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_
+diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h
+index a60fe2ad3dc..634be0d1c92 100644
+--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h
++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h
+@@ -29,6 +29,9 @@
+ #ifndef EM_AARCH64
+ #define EM_AARCH64 183
+ #endif
++#ifndef EM_LOONGARCH
++#define EM_LOONGARCH 258
++#endif
+ 
+ #ifndef __AUDIT_ARCH_64BIT
+ #define __AUDIT_ARCH_64BIT 0x80000000
+@@ -54,6 +57,9 @@
+ #ifndef AUDIT_ARCH_AARCH64
+ #define AUDIT_ARCH_AARCH64 (EM_AARCH64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE)
+ #endif
++#ifndef AUDIT_ARCH_LOONGARCH64
++#define AUDIT_ARCH_LOONGARCH64 (EM_LOONGARCH | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE)
++#endif
+ 
+ // For prctl.h
+ #ifndef PR_SET_SECCOMP
+diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h
+index f5a73676174..4af5d249a57 100644
+--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h
++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h
+@@ -13,7 +13,7 @@
+ // (not undefined, but defined different values and in different memory
+ // layouts). So, fill the gap here.
+ #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch64)
+ 
+ #define LINUX_SIGHUP 1
+ #define LINUX_SIGINT 2
+@@ -120,7 +120,7 @@ typedef siginfo_t LinuxSigInfo;
+ struct LinuxSigSet {
+   unsigned long sig[_NSIG_WORDS];
+ };
+-#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)
++#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || defined(ARCH_CPU_LA64)
+ #if !defined(_NSIG_WORDS)
+ #define _NSIG_WORDS 2
+ #endif
+diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h
+index 2b78a0cc3b9..eb66de9152e 100644
+--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h
++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h
+@@ -35,5 +35,9 @@
+ #include "sandbox/linux/system_headers/arm64_linux_syscalls.h"
+ #endif
+ 
++#if defined(__loongarch64)
++#include "sandbox/linux/system_headers/la64_linux_syscalls.h"
++#endif
++
+ #endif  // SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_SYSCALLS_H_
+ 
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc
+index 68af74e1fba..d01ae8b474e 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc
+@@ -98,7 +98,7 @@ ResultExpr BrokerProcessPolicy::EvaluateSyscall(int sysno) const {
+         return Allow();
+       break;
+ #endif
+-#if defined(__NR_newfstatat)
++#if defined(__NR_newfstatat) && defined(__clang__)
+     case __NR_newfstatat:
+       if (allowed_command_set_.test(sandbox::syscall_broker::COMMAND_STAT))
+         return Allow();
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc
+index 9d39e5d5de4..ce104e19e16 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc
+@@ -33,7 +33,7 @@ ResultExpr CdmProcessPolicy::EvaluateSyscall(int sysno) const {
+     case __NR_ftruncate:
+     case __NR_fallocate:
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_getrlimit:
+ #endif
+ #if defined(__i386__) || defined(__arm__)
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc
+index 2a850ba8efa..e6ddf51e260 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc
+@@ -37,7 +37,7 @@ ResultExpr CrosAmdGpuProcessPolicy::EvaluateSyscall(int sysno) const {
+     case __NR_sched_setscheduler:
+     case __NR_sysinfo:
+     case __NR_uname:
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_readlink:
+     case __NR_stat:
+ #endif
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc
+index 66214334def..33114417045 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc
+@@ -48,7 +48,7 @@ ResultExpr GpuProcessPolicy::EvaluateSyscall(int sysno) const {
+     (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS))
+     case __NR_ftruncate64:
+ #endif
+-#if !defined(__aarch64__)
++#if !defined(__aarch64__) && !defined(__loongarch__)
+     case __NR_getdents:
+ #endif
+     case __NR_getdents64:
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc
+index a85c0ea8678..1f8d044a3ff 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc
+@@ -68,7 +68,7 @@ ResultExpr RendererProcessPolicy::EvaluateSyscall(int sysno) const {
+     case __NR_ftruncate64:
+ #endif
+ #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+-    defined(__aarch64__)
++    defined(__aarch64__) || defined(__loongarch__)
+     case __NR_getrlimit:
+     case __NR_setrlimit:
+ // We allow setrlimit to dynamically adjust the address space limit as
+diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc
+index f5d096b1029..ac6133da5a0 100644
+--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc
++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc
+@@ -65,9 +65,9 @@ using sandbox::bpf_dsl::ResultExpr;
+ 
+ // Make sure that seccomp-bpf does not get disabled by mistake. Also make sure
+ // that we think twice about this when adding a new architecture.
+-#if !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL)
++#if !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) && !defined(ARCH_CPU_LA64)
+ #error "Seccomp-bpf disabled on supported architecture!"
+-#endif  // !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL)
++#endif  // !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) && !defined(ARCH_CPU_LA64)
+ 
+ #endif  // BUILDFLAG(USE_SECCOMP_BPF)
+ 
+diff --git a/src/3rdparty/chromium/skia/BUILD.gn b/src/3rdparty/chromium/skia/BUILD.gn
+index f5992c5059d..7a3bbcd78e2 100644
+--- a/src/3rdparty/chromium/skia/BUILD.gn
++++ b/src/3rdparty/chromium/skia/BUILD.gn
+@@ -796,6 +796,8 @@ skia_source_set("skia_opts") {
+     sources = skia_opts.none_sources
+   } else if (current_cpu == "s390x") {
+     sources = skia_opts.none_sources
++  } else if (current_cpu == "la64") {
++    sources = skia_opts.none_sources
+   } else {
+     assert(false, "Need to port cpu specific stuff from skia_library_opts.gyp")
+   }
+diff --git a/src/3rdparty/chromium/third_party/angle/gni/angle.gni b/src/3rdparty/chromium/third_party/angle/gni/angle.gni
+index 1c8ad4802fe..b3cef6b8a3b 100644
+--- a/src/3rdparty/chromium/third_party/angle/gni/angle.gni
++++ b/src/3rdparty/chromium/third_party/angle/gni/angle.gni
+@@ -54,7 +54,7 @@ angle_data_dir = "angledata"
+ declare_args() {
+   if (current_cpu == "arm64" || current_cpu == "x64" ||
+       current_cpu == "mips64el" || current_cpu == "s390x" ||
+-      current_cpu == "ppc64") {
++      current_cpu == "ppc64" || current_cpu == "la64") {
+     angle_64bit_current_cpu = true
+   } else if (current_cpu == "arm" || current_cpu == "x86" ||
+              current_cpu == "mipsel" || current_cpu == "s390" ||
+diff --git a/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc b/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc
+index 05b74b53f48..1a56dda89b9 100644
+--- a/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc
++++ b/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc
+@@ -229,7 +229,7 @@ static Position TrailingWhitespacePosition(const Position& position,
+ // Workaround: GCC fails to resolve overloaded template functions, passed as
+ // parameters of EnclosingNodeType. But it works wrapping that in a utility
+ // function.
+-#if defined(COMPILER_GCC)
++#if defined(COMPILER_GCC) || !defined(__clang__)
+ static bool IsHTMLTableRowElement(const blink::Node* node) {
+   return IsA<HTMLTableRowElement>(node);
+ }
+@@ -263,7 +263,7 @@ void DeleteSelectionCommand::InitializePositionData(
+   start_root_ = RootEditableElementOf(start);
+   end_root_ = RootEditableElementOf(end);
+ 
+-#if defined(COMPILER_GCC)
++#if defined(COMPILER_GCC) || !defined(__clang__)
+   // Workaround. See declaration of IsHTMLTableRowElement
+   start_table_row_ = To<HTMLTableRowElement>(
+       EnclosingNodeOfType(start, &IsHTMLTableRowElement));
+diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn
+index fe44daf27a5..9910244f5bf 100644
+--- a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn
++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn
+@@ -36,6 +36,8 @@ if (current_cpu == "x86" || current_cpu == "x64") {
+       sources = [ "SaveRegisters_mips.S" ]
+     } else if (current_cpu == "mips64el") {
+       sources = [ "SaveRegisters_mips64.S" ]
++    } else if (current_cpu == "la64") {
++      sources = [ "SaveRegisters_la64.S" ]
+     } else if (current_cpu == "ppc64") {
+       sources = [ "SaveRegisters_ppc64.S" ]
+     }
+diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S
+new file mode 100644
+index 00000000000..880201671af
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S
+@@ -0,0 +1,41 @@
++// Copyright 2014 The Chromium Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++/*
++ * typedef void (*PushAllRegistersCallback)(ThreadState*, intptr_t*);
++ * extern "C" void PushAllRegisters(ThreadState*, PushAllRegistersCallback)
++ */
++
++.type PushAllRegisters, %function
++.global PushAllRegisters
++.hidden PushAllRegisters
++PushAllRegisters:
++        // Push all callee-saves registers to get them
++        // on the stack for conservative stack scanning.
++        // Reserve space for callee-saved registers and return address.
++        addi.d $sp,$sp,-80
++        // Save the callee-saved registers and the return address.
++        st.d $s0, $sp, 0
++        st.d $s1, $sp, 8
++        st.d $s2, $sp, 16
++        st.d $s3, $sp, 24
++        st.d $s4, $sp, 32
++        st.d $s5, $sp, 40
++        st.d $s6, $sp, 48
++        st.d $s7, $sp, 56
++        st.d $ra, $sp, 64
++        // Note: the callee-saved floating point registers do not need to be
++        // copied to the stack, because fp registers never hold heap pointers
++        // and so do not need to be kept visible to the garbage collector.
++        // Pass the first argument untouched in a0 and the
++        // stack pointer to the callback.
++        move $t7,$a1
++        move $a1,$sp
++        jirl $ra, $t7, 0
++        // Restore return address, adjust stack and return.
++        // Note: the copied registers do not need to be reloaded here,
++        // because they were preserved by the called routine.
++        ld.d $ra, $sp, 64
++        addi.d $sp, $sp, 80
++        jirl $zero, $ra, 0
+diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h b/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h
+index eb10c6964ca..127a415a5fe 100644
+--- a/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h
++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h
+@@ -674,7 +674,12 @@ struct HashTableHelper {
+   }
+   static constexpr size_t constexpr_max(size_t a, size_t b) { return a > b ? a : b; }
+   static bool IsEmptyOrDeletedBucketSafe(const Value& value) {
++// TODO:LA64
++#if defined(ARCH_CPU_LA64)
++    char buf[sizeof(Key)];
++#else	  
+     alignas(constexpr_max(alignof(Key), sizeof(size_t))) char buf[sizeof(Key)];
++#endif
+     const Key& key = Extractor::ExtractSafe(value, &buf);
+     return IsEmptyBucket(key) || IsDeletedBucket(key);
+   }
+diff --git a/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h b/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h
+index 8d73f7747ca..1b79becbb49 100644
+--- a/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h
++++ b/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h
+@@ -105,6 +105,9 @@ extern "C" {
+ #elif defined(__mips__) && defined(__LP64__)
+ #define OPENSSL_64_BIT
+ #define OPENSSL_MIPS64
++#elif defined(__loongarch__)
++#define OPENSSL_64_BIT
++#define OPENSSL_LA64
+ #elif defined(__pnacl__)
+ #define OPENSSL_32_BIT
+ #define OPENSSL_PNACL
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc
+index d8bfbbad27a..9520c2183db 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc
+@@ -50,11 +50,11 @@ class CrashGenerationClientImpl : public CrashGenerationClient {
+ 
+   virtual bool RequestDump(const void* blob, size_t blob_size) {
+     int fds[2];
+-    if (sys_pipe(fds) < 0)
+-      return false;
++    //if (sys_pipe(fds) < 0)
++    //  return false;
+     static const unsigned kControlMsgSize = CMSG_SPACE(sizeof(int));
+ 
+-    struct kernel_iovec iov;
++    /*struct kernel_iovec iov;
+     iov.iov_base = const_cast<void*>(blob);
+     iov.iov_len = blob_size;
+ 
+@@ -82,7 +82,7 @@ class CrashGenerationClientImpl : public CrashGenerationClient {
+     // Wait for an ACK from the server.
+     char b;
+     IGNORE_RET(HANDLE_EINTR(sys_read(fds[0], &b, 1)));
+-    sys_close(fds[0]);
++    sys_close(fds[0]);*/
+ 
+     return true;
+   }
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h
+index 07d9171a0a6..5fde64bd579 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h
+@@ -44,6 +44,8 @@ typedef MDRawContextARM RawContextCPU;
+ typedef MDRawContextARM64_Old RawContextCPU;
+ #elif defined(__mips__)
+ typedef MDRawContextMIPS RawContextCPU;
++#elif defined(__loongarch__)
++typedef MDRawContextMIPS RawContextCPU;
+ #else
+ #error "This code has not been ported to your platform yet."
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc
+index aae1dc13b25..70f0eeaa6f0 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc
+@@ -228,6 +228,16 @@ void ThreadInfo::FillCPUContext(RawContextCPU* out) const {
+       MD_FLOATINGSAVEAREA_ARM64_FPR_COUNT * 16);
+ }
+ 
++#elif defined(__loongarch__)
++
++uintptr_t ThreadInfo::GetInstructionPointer() const {
++  return 0;
++}
++
++void ThreadInfo::FillCPUContext(RawContextCPU* out) const {
++
++}
++
+ #elif defined(__mips__)
+ 
+ uintptr_t ThreadInfo::GetInstructionPointer() const {
+@@ -280,10 +290,10 @@ void ThreadInfo::GetGeneralPurposeRegisters(void** gp_regs, size_t* size) {
+   if (size)
+     *size = sizeof(mcontext.gregs);
+ #else
+-  if (gp_regs)
++  /*if (gp_regs)
+     *gp_regs = &regs;
+   if (size)
+-    *size = sizeof(regs);
++    *size = sizeof(regs);*/
+ #endif
+ }
+ 
+@@ -295,10 +305,10 @@ void ThreadInfo::GetFloatingPointRegisters(void** fp_regs, size_t* size) {
+   if (size)
+     *size = sizeof(mcontext.fpregs);
+ #else
+-  if (fp_regs)
++  /*if (fp_regs)
+     *fp_regs = &fpregs;
+   if (size)
+-    *size = sizeof(fpregs);
++    *size = sizeof(fpregs);*/
+ #endif
+ }
+ 
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h
+index fb216fa6d71..c58ec4cfb37 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h
+@@ -71,6 +71,9 @@ struct ThreadInfo {
+ #elif defined(__mips__)
+   // Use the structure defined in <sys/ucontext.h>.
+   mcontext_t mcontext;
++#elif defined(__loongarch__)
++  // Use the structure defined in <sys/ucontext.h>.
++  mcontext_t mcontext;
+ #endif
+ 
+   // Returns the instruction pointer (platform-dependent impl.).
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc
+index 6ee6cc1e4cd..e53661b2856 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc
+@@ -208,6 +208,15 @@ void UContextReader::FillCPUContext(RawContextCPU *out, const ucontext_t *uc,
+       MD_FLOATINGSAVEAREA_ARM64_FPR_COUNT * 16);
+ }
+ 
++#elif defined(__loongarch__)
++uintptr_t UContextReader::GetStackPointer(const ucontext_t* uc) {
++  return 0;
++}
++
++uintptr_t UContextReader::GetInstructionPointer(const ucontext_t* uc) {
++  return 0;
++}
++
+ #elif defined(__mips__)
+ 
+ uintptr_t UContextReader::GetStackPointer(const ucontext_t* uc) {
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc
+index b895f6d7ada..a6b733875a7 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc
+@@ -77,6 +77,7 @@
+ #include <sys/syscall.h>
+ #include <sys/wait.h>
+ #include <unistd.h>
++#include <asm/unistd.h>
+ 
+ #include <sys/signal.h>
+ #include <sys/ucontext.h>
+@@ -105,6 +106,8 @@
+ #define PR_SET_PTRACER 0x59616d61
+ #endif
+ 
++#define sys_sigaltstack sigaltstack
++
+ namespace google_breakpad {
+ 
+ namespace {
+@@ -395,12 +398,12 @@ void ExceptionHandler::SignalHandler(int sig, siginfo_t* info, void* uc) {
+     // In order to retrigger it, we have to queue a new signal by calling
+     // kill() ourselves.  The special case (si_pid == 0 && sig == SIGABRT) is
+     // due to the kernel sending a SIGABRT from a user request via SysRQ.
+-    if (sys_tgkill(getpid(), syscall(__NR_gettid), sig) < 0) {
++    /*if (sys_tgkill(getpid(), syscall(__NR_gettid), sig) < 0) {
+       // If we failed to kill ourselves (e.g. because a sandbox disallows us
+       // to do so), we instead resort to terminating our process. This will
+       // result in an incorrect exit code.
+       _exit(1);
+-    }
++    }*/
+   } else {
+     // This was a synchronous signal triggered by a hard fault (e.g. SIGSEGV).
+     // No need to reissue the signal. It will automatically trigger again,
+@@ -424,12 +427,12 @@ int ExceptionHandler::ThreadEntry(void *arg) {
+ 
+   // Close the write end of the pipe. This allows us to fail if the parent dies
+   // while waiting for the continue signal.
+-  sys_close(thread_arg->handler->fdes[1]);
++  //sys_close(thread_arg->handler->fdes[1]);
+ 
+   // Block here until the crashing process unblocks us when
+   // we're allowed to use ptrace
+   thread_arg->handler->WaitForContinueSignal();
+-  sys_close(thread_arg->handler->fdes[0]);
++  //sys_close(thread_arg->handler->fdes[0]);
+ 
+   return thread_arg->handler->DoDump(thread_arg->pid, thread_arg->context,
+                                      thread_arg->context_size) == false;
+@@ -446,7 +449,7 @@ bool ExceptionHandler::HandleSignal(int /*sig*/, siginfo_t* info, void* uc) {
+   bool signal_pid_trusted = info->si_code == SI_USER ||
+       info->si_code == SI_TKILL;
+   if (signal_trusted || (signal_pid_trusted && info->si_pid == getpid())) {
+-    sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
++    //sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
+   }
+ 
+   // Fill in all the holes in the struct to make Valgrind happy.
+@@ -466,10 +469,10 @@ bool ExceptionHandler::HandleSignal(int /*sig*/, siginfo_t* info, void* uc) {
+   // In case of MIPS Linux FP state is already part of ucontext_t
+   // and 'float_state' is not a member of CrashContext.
+   ucontext_t* uc_ptr = (ucontext_t*)uc;
+-  if (uc_ptr->uc_mcontext.fpregs) {
++  /*if (uc_ptr->uc_mcontext.fpregs) {
+     memcpy(&g_crash_context_.float_state, uc_ptr->uc_mcontext.fpregs,
+            sizeof(g_crash_context_.float_state));
+-  }
++  }*/
+ #endif
+   g_crash_context_.tid = syscall(__NR_gettid);
+   if (crash_handler_ != NULL) {
+@@ -521,7 +524,7 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) {
+   // kernels, but we need to know the PID of the cloned process before we
+   // can do this. Create a pipe here which we can use to block the
+   // cloned process after creating it, until we have explicitly enabled ptrace
+-  if (sys_pipe(fdes) == -1) {
++  /*if (sys_pipe(fdes) == -1) {
+     // Creating the pipe failed. We'll log an error but carry on anyway,
+     // as we'll probably still get a useful crash report. All that will happen
+     // is the write() and read() calls will fail with EBADF
+@@ -533,35 +536,35 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) {
+ 
+     // Ensure fdes[0] and fdes[1] are invalid file descriptors.
+     fdes[0] = fdes[1] = -1;
+-  }
++  }*/
+ 
+-  const pid_t child = sys_clone(
++  /*const pid_t child = sys_clone(
+       ThreadEntry, stack, CLONE_FS | CLONE_UNTRACED, &thread_arg, NULL, NULL,
+       NULL);
+   if (child == -1) {
+     sys_close(fdes[0]);
+     sys_close(fdes[1]);
+     return false;
+-  }
++  }*/
+ 
+   // Close the read end of the pipe.
+-  sys_close(fdes[0]);
++  //sys_close(fdes[0]);
+   // Allow the child to ptrace us
+-  sys_prctl(PR_SET_PTRACER, child, 0, 0, 0);
++  //sys_prctl(PR_SET_PTRACER, child, 0, 0, 0);
+   SendContinueSignalToChild();
+   int status = 0;
+-  const int r = HANDLE_EINTR(sys_waitpid(child, &status, __WALL));
++  //const int r = HANDLE_EINTR(sys_waitpid(child, &status, __WALL));
+ 
+-  sys_close(fdes[1]);
++  //sys_close(fdes[1]);
+ 
+-  if (r == -1) {
++  /*if (r == -1) {
+     static const char msg[] = "ExceptionHandler::GenerateDump waitpid failed:";
+     logger::write(msg, sizeof(msg) - 1);
+     logger::write(strerror(errno), strlen(strerror(errno)));
+     logger::write("\n", 1);
+-  }
++  }*/
+ 
+-  bool success = r != -1 && WIFEXITED(status) && WEXITSTATUS(status) == 0;
++  bool success = /*r != -1 &&*/ WIFEXITED(status) && WEXITSTATUS(status) == 0;
+   if (callback_)
+     success = callback_(minidump_descriptor_, callback_context_, success);
+   return success;
+@@ -569,7 +572,7 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) {
+ 
+ // This function runs in a compromised context: see the top of the file.
+ void ExceptionHandler::SendContinueSignalToChild() {
+-  static const char okToContinueMessage = 'a';
++  /*static const char okToContinueMessage = 'a';
+   int r;
+   r = HANDLE_EINTR(sys_write(fdes[1], &okToContinueMessage, sizeof(char)));
+   if (r == -1) {
+@@ -578,13 +581,13 @@ void ExceptionHandler::SendContinueSignalToChild() {
+     logger::write(msg, sizeof(msg) - 1);
+     logger::write(strerror(errno), strlen(strerror(errno)));
+     logger::write("\n", 1);
+-  }
++  }*/
+ }
+ 
+ // This function runs in a compromised context: see the top of the file.
+ // Runs on the cloned process.
+ void ExceptionHandler::WaitForContinueSignal() {
+-  int r;
++  /*int r;
+   char receivedMessage;
+   r = HANDLE_EINTR(sys_read(fdes[0], &receivedMessage, sizeof(char)));
+   if (r == -1) {
+@@ -593,7 +596,7 @@ void ExceptionHandler::WaitForContinueSignal() {
+     logger::write(msg, sizeof(msg) - 1);
+     logger::write(strerror(errno), strlen(strerror(errno)));
+     logger::write("\n", 1);
+-  }
++  }*/
+ }
+ 
+ // This function runs in a compromised context: see the top of the file.
+@@ -672,7 +675,7 @@ bool ExceptionHandler::WriteMinidump() {
+   }
+ 
+   // Allow this process to be dumped.
+-  sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
++  //sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
+ 
+   CrashContext context;
+   int getcontext_result = getcontext(&context.context);
+@@ -701,12 +704,12 @@ bool ExceptionHandler::WriteMinidump() {
+   }
+ #endif
+ 
+-#if !defined(__ARM_EABI__) && !defined(__aarch64__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__loongarch__)
+   // FPU state is not part of ARM EABI ucontext_t.
+   memcpy(&context.float_state, context.context.uc_mcontext.fpregs,
+          sizeof(context.float_state));
+ #endif
+-  context.tid = sys_gettid();
++  //context.tid = sys_gettid();
+ 
+   // Add an exception stream to the minidump for better reporting.
+   memset(&context.siginfo, 0, sizeof(context.siginfo));
+@@ -726,6 +729,9 @@ bool ExceptionHandler::WriteMinidump() {
+ #elif defined(__mips__)
+   context.siginfo.si_addr =
+       reinterpret_cast<void*>(context.context.uc_mcontext.pc);
++#elif defined(__loongarch__)
++  //context.siginfo.si_addr =
++   //   reinterpret_cast<void*>(context.context.uc_mcontext.pc);  
+ #else
+ #error "This code has not been ported to your platform yet."
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h
+index f44483ff0fd..db94f41523e 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h
+@@ -192,7 +192,7 @@ class ExceptionHandler {
+     siginfo_t siginfo;
+     pid_t tid;  // the crashing thread.
+     ucontext_t context;
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+     // #ifdef this out because FP state is not part of user ABI for Linux ARM.
+     // In case of MIPS Linux FP state is already part of ucontext_t so
+     // 'float_state' is not required.
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc
+index fc23aa6d528..ffe19aeb203 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc
+@@ -77,7 +77,8 @@ int write(const char* buf, size_t nbytes) {
+ #if defined(__ANDROID__)
+   return __android_log_write(ANDROID_LOG_WARN, kAndroidLogTag, buf);
+ #else
+-  return sys_write(2, buf, nbytes);
++  //return sys_write(2, buf, nbytes);
++  return 0;
+ #endif
+ }
+ 
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc
+index fa3c1713a56..ab8b35a30f1 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc
+@@ -138,7 +138,7 @@ class MicrodumpWriter {
+                   const MicrodumpExtraInfo& microdump_extra_info,
+                   LinuxDumper* dumper)
+       : ucontext_(context ? &context->context : NULL),
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+         float_state_(context ? &context->float_state : NULL),
+ #endif
+         dumper_(dumper),
+@@ -337,6 +337,8 @@ class MicrodumpWriter {
+ # else
+ #  error "This mips ABI is currently not supported (n32)"
+ #endif
++#elif defined(__loongarch__)
++    const char kArch[] = "la64";
+ #else
+ #error "This code has not been ported to your platform yet"
+ #endif
+@@ -409,7 +411,7 @@ class MicrodumpWriter {
+   void DumpCPUState() {
+     RawContextCPU cpu;
+     my_memset(&cpu, 0, sizeof(RawContextCPU));
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+     UContextReader::FillCPUContext(&cpu, ucontext_, float_state_);
+ #else
+     UContextReader::FillCPUContext(&cpu, ucontext_);
+@@ -605,7 +607,7 @@ class MicrodumpWriter {
+   void* Alloc(unsigned bytes) { return dumper_->allocator()->Alloc(bytes); }
+ 
+   const ucontext_t* const ucontext_;
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+   const google_breakpad::fpstate_t* const float_state_;
+ #endif
+   LinuxDumper* dumper_;
+@@ -648,7 +650,7 @@ bool WriteMicrodump(pid_t crashing_process,
+     if (blob_size != sizeof(ExceptionHandler::CrashContext))
+       return false;
+     context = reinterpret_cast<const ExceptionHandler::CrashContext*>(blob);
+-    dumper.SetCrashInfoFromSigInfo(context->siginfo);
++    //dumper.SetCrashInfoFromSigInfo(context->siginfo);
+     dumper.set_crash_thread(context->tid);
+   }
+   MicrodumpWriter writer(context, mappings,
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h
+index 1cca9aa5a0f..145b1b61ed7 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h
+@@ -53,7 +53,7 @@ public:
+ 
+   // Parse a sysfs file to extract the corresponding CPU set.
+   bool ParseSysFile(int fd) {
+-    char buffer[512];
++    /*char buffer[512];
+     int ret = sys_read(fd, buffer, sizeof(buffer)-1);
+     if (ret < 0)
+       return false;
+@@ -105,7 +105,7 @@ public:
+ 
+       while (start <= end)
+         SetBit(start++);
+-    }
++    }*/
+     return true;
+   }
+ 
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h
+index a4bde180313..869a1294a4e 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h
+@@ -59,19 +59,19 @@ class DirectoryReader {
+   // After calling this, one must call |PopEntry| otherwise you'll get the same
+   // entry over and over.
+   bool GetNextEntry(const char** name) {
+-    struct kernel_dirent* const dent =
+-      reinterpret_cast<kernel_dirent*>(buf_);
++    //struct kernel_dirent* const dent =
++    //  reinterpret_cast<kernel_dirent*>(buf_);
+ 
+     if (buf_used_ == 0) {
+       // need to read more entries.
+-      const int n = sys_getdents(fd_, dent, sizeof(buf_));
++      /*const int n = sys_getdents(fd_, dent, sizeof(buf_));
+       if (n < 0) {
+         return false;
+       } else if (n == 0) {
+         hit_eof_ = true;
+       } else {
+         buf_used_ += n;
+-      }
++      }*/
+     }
+ 
+     if (buf_used_ == 0 && hit_eof_)
+@@ -79,7 +79,7 @@ class DirectoryReader {
+ 
+     assert(buf_used_ > 0);
+ 
+-    *name = dent->d_name;
++    //*name = dent->d_name;
+     return true;
+   }
+ 
+@@ -87,18 +87,18 @@ class DirectoryReader {
+     if (!buf_used_)
+       return;
+ 
+-    const struct kernel_dirent* const dent =
+-      reinterpret_cast<kernel_dirent*>(buf_);
++    //const struct kernel_dirent* const dent =
++    //  reinterpret_cast<kernel_dirent*>(buf_);
+ 
+-    buf_used_ -= dent->d_reclen;
+-    my_memmove(buf_, buf_ + dent->d_reclen, buf_used_);
++    //buf_used_ -= dent->d_reclen;
++    //my_memmove(buf_, buf_ + dent->d_reclen, buf_used_);
+   }
+ 
+  private:
+   const int fd_;
+   bool hit_eof_;
+   unsigned buf_used_;
+-  uint8_t buf_[sizeof(struct kernel_dirent) + NAME_MAX + 1];
++  //uint8_t buf_[sizeof(struct kernel_dirent) + NAME_MAX + 1];
+ };
+ 
+ }  // namespace google_breakpad
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h
+index 779cfeb6039..2062300047c 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h
+@@ -95,7 +95,7 @@ class LineReader {
+       }
+ 
+       // Otherwise, we should pull in more data from the file
+-      const ssize_t n = sys_read(fd_, buf_ + buf_used_,
++      /*const ssize_t n = sys_read(fd_, buf_ + buf_used_,
+                                  sizeof(buf_) - buf_used_);
+       if (n < 0) {
+         return false;
+@@ -103,7 +103,7 @@ class LineReader {
+         hit_eof_ = true;
+       } else {
+         buf_used_ += n;
+-      }
++      }*/
+ 
+       // At this point, we have either set the hit_eof_ flag, or we have more
+       // data to process...
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc
+index 4150689839a..44fdadbfde4 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc
+@@ -109,6 +109,8 @@ bool LinuxCoreDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) {
+   memcpy(&stack_pointer, &info->regs.ARM_sp, sizeof(info->regs.ARM_sp));
+ #elif defined(__aarch64__)
+   memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp));
++#elif defined(__loongarch__)
++  //memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp));  
+ #elif defined(__mips__)
+   stack_pointer =
+       reinterpret_cast<uint8_t*>(info->mcontext.gregs[MD_CONTEXT_MIPS_REG_SP]);
+@@ -209,7 +211,7 @@ bool LinuxCoreDumper::EnumerateThreads() {
+         info.mcontext.mdhi = status->pr_reg[EF_HI];
+         info.mcontext.pc = status->pr_reg[EF_CP0_EPC];
+ #else  // __mips__
+-        memcpy(&info.regs, status->pr_reg, sizeof(info.regs));
++        //memcpy(&info.regs, status->pr_reg, sizeof(info.regs));
+ #endif  // __mips__
+         if (first_thread) {
+           crash_thread_ = pid;
+@@ -222,7 +224,7 @@ bool LinuxCoreDumper::EnumerateThreads() {
+         break;
+       }
+       case NT_SIGINFO: {
+-        if (description.length() != sizeof(siginfo_t)) {
++        /*if (description.length() != sizeof(siginfo_t)) {
+           fprintf(stderr, "Found NT_SIGINFO descriptor of unexpected size\n");
+           return false;
+         }
+@@ -259,7 +261,7 @@ bool LinuxCoreDumper::EnumerateThreads() {
+             });
+ #endif
+             break;
+-        }
++        }*/
+         break;
+       }
+ #if defined(__i386) || defined(__x86_64)
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc
+index 1112035bc5a..f838abe02da 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc
+@@ -325,14 +325,14 @@ LinuxDumper::ElfFileIdentifierForMapping(const MappingInfo& mapping,
+   // Special-case linux-gate because it's not a real file.
+   if (my_strcmp(mapping.name, kLinuxGateLibraryName) == 0) {
+     void* linux_gate = NULL;
+-    if (pid_ == sys_getpid()) {
++    /*if (pid_ == sys_getpid()) {
+       linux_gate = reinterpret_cast<void*>(mapping.start_addr);
+     } else {
+       linux_gate = allocator_.Alloc(mapping.size);
+       CopyFromProcess(linux_gate, pid_,
+                       reinterpret_cast<const void*>(mapping.start_addr),
+                       mapping.size);
+-    }
++    }*/
+     return FileID::ElfFileIdentifierFromMappedFile(linux_gate, identifier);
+   }
+ 
+@@ -355,11 +355,11 @@ LinuxDumper::ElfFileIdentifierForMapping(const MappingInfo& mapping,
+   return success;
+ }
+ 
+-void LinuxDumper::SetCrashInfoFromSigInfo(const siginfo_t& siginfo) {
++/*void LinuxDumper::SetCrashInfoFromSigInfo(const siginfo_t& siginfo) {
+   set_crash_address(reinterpret_cast<uintptr_t>(siginfo.si_addr));
+   set_crash_signal(siginfo.si_signo);
+   set_crash_signal_code(siginfo.si_code);
+-}
++}*/
+ 
+ const char* LinuxDumper::GetCrashSignalString() const {
+   switch (static_cast<unsigned int>(crash_signal_)) {
+@@ -518,7 +518,7 @@ bool LinuxDumper::ReadAuxv() {
+     return false;
+   }
+ 
+-  int fd = sys_open(auxv_path, O_RDONLY, 0);
++  /*int fd = sys_open(auxv_path, O_RDONLY, 0);
+   if (fd < 0) {
+     return false;
+   }
+@@ -534,8 +534,8 @@ bool LinuxDumper::ReadAuxv() {
+       res = true;
+     }
+   }
+-  sys_close(fd);
+-  return res;
++  sys_close(fd);*/
++  return false;
+ }
+ 
+ bool LinuxDumper::EnumerateMappings() {
+@@ -557,7 +557,7 @@ bool LinuxDumper::EnumerateMappings() {
+   // actual entry point to find the mapping.
+   const void* entry_point_loc = reinterpret_cast<void *>(auxv_[AT_ENTRY]);
+ 
+-  const int fd = sys_open(maps_path, O_RDONLY, 0);
++  const int fd = -1;//sys_open(maps_path, O_RDONLY, 0);
+   if (fd < 0)
+     return false;
+   LineReader* const line_reader = new(allocator_) LineReader(fd);
+@@ -641,7 +641,7 @@ bool LinuxDumper::EnumerateMappings() {
+     }
+   }
+ 
+-  sys_close(fd);
++  //sys_close(fd);
+ 
+   return !mappings_.empty();
+ }
+@@ -953,14 +953,14 @@ bool LinuxDumper::HandleDeletedFileInMapping(char* path) const {
+     return false;
+ 
+   // Check to see if someone actually named their executable 'foo (deleted)'.
+-  struct kernel_stat exe_stat;
++  /*struct kernel_stat exe_stat;
+   struct kernel_stat new_path_stat;
+   if (sys_stat(exe_link, &exe_stat) == 0 &&
+       sys_stat(new_path, &new_path_stat) == 0 &&
+       exe_stat.st_dev == new_path_stat.st_dev &&
+       exe_stat.st_ino == new_path_stat.st_ino) {
+     return false;
+-  }
++  }*/
+ 
+   my_memcpy(path, exe_link, NAME_MAX);
+   return true;
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h
+index f4a75d90609..8e692559d6a 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h
+@@ -59,7 +59,7 @@
+ namespace google_breakpad {
+ 
+ // Typedef for our parsing of the auxv variables in /proc/pid/auxv.
+-#if defined(__i386) || defined(__ARM_EABI__) || \
++#if defined(__i386) || defined(__ARM_EABI__) || defined(__loongarch__) || \
+  (defined(__mips__) && _MIPS_SIM == _ABIO32)
+ typedef Elf32_auxv_t elf_aux_entry;
+ #elif defined(__x86_64) || defined(__aarch64__) || \
+@@ -173,7 +173,7 @@ class LinuxDumper {
+                                    unsigned int mapping_id,
+                                    wasteful_vector<uint8_t>& identifier);
+ 
+-  void SetCrashInfoFromSigInfo(const siginfo_t& siginfo);
++  //void SetCrashInfoFromSigInfo(const siginfo_t& siginfo);
+ 
+   uintptr_t crash_address() const { return crash_address_; }
+   void set_crash_address(uintptr_t crash_address) {
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc
+index 3ad48e50155..7b68905a3d4 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc
+@@ -51,6 +51,8 @@
+ #define TID_PTR_REGISTER "rcx"
+ #elif defined(__mips__)
+ #define TID_PTR_REGISTER "$1"
++#elif defined(__loongarch__)
++#define TID_PTR_REGISTER "$1"
+ #else
+ #error This test has not been ported to this platform.
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc
+index e3ddb81a659..88de7ae3062 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc
+@@ -38,7 +38,7 @@
+ 
+ #include "client/linux/minidump_writer/linux_ptrace_dumper.h"
+ 
+-#include <asm/ptrace.h>
++//#include <asm/ptrace.h>
+ #include <assert.h>
+ #include <errno.h>
+ #include <fcntl.h>
+@@ -63,16 +63,16 @@
+ static bool SuspendThread(pid_t pid) {
+   // This may fail if the thread has just died or debugged.
+   errno = 0;
+-  if (sys_ptrace(PTRACE_ATTACH, pid, NULL, NULL) != 0 &&
+-      errno != 0) {
+-    return false;
+-  }
+-  while (sys_waitpid(pid, NULL, __WALL) < 0) {
++  //if (sys_ptrace(PTRACE_ATTACH, pid, NULL, NULL) != 0 &&
++  //    errno != 0) {
++  //  return false;
++  //}
++  /*while (sys_waitpid(pid, NULL, __WALL) < 0) {
+     if (errno != EINTR) {
+       sys_ptrace(PTRACE_DETACH, pid, NULL, NULL);
+       return false;
+     }
+-  }
++  }*/
+ #if defined(__i386) || defined(__x86_64)
+   // On x86, the stack pointer is NULL or -1, when executing trusted code in
+   // the seccomp sandbox. Not only does this cause difficulties down the line
+@@ -98,7 +98,7 @@ static bool SuspendThread(pid_t pid) {
+ 
+ // Resumes a thread by detaching from it.
+ static bool ResumeThread(pid_t pid) {
+-  return sys_ptrace(PTRACE_DETACH, pid, NULL, NULL) >= 0;
++  return false;//sys_ptrace(PTRACE_DETACH, pid, NULL, NULL) >= 0;
+ }
+ 
+ namespace google_breakpad {
+@@ -132,7 +132,7 @@ bool LinuxPtraceDumper::BuildProcPath(char* path, pid_t pid,
+ 
+ bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child,
+                                         const void* src, size_t length) {
+-  unsigned long tmp = 55;
++  /*unsigned long tmp = 55;
+   size_t done = 0;
+   static const size_t word_size = sizeof(tmp);
+   uint8_t* const local = (uint8_t*) dest;
+@@ -145,14 +145,14 @@ bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child,
+     }
+     my_memcpy(local + done, &tmp, l);
+     done += l;
+-  }
++  }*/
+   return true;
+ }
+ 
+ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid)
+ {
+-#ifdef PTRACE_GETREGSET
+-  struct iovec io;
++//#ifdef PTRACE_GETREGSET
++  /*struct iovec io;
+   info->GetGeneralPurposeRegisters(&io.iov_base, &io.iov_len);
+   if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_PRSTATUS, (void*)&io) == -1) {
+     return false;
+@@ -161,36 +161,36 @@ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid)
+   info->GetFloatingPointRegisters(&io.iov_base, &io.iov_len);
+   if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_FPREGSET, (void*)&io) == -1) {
+     return false;
+-  }
+-  return true;
+-#else
++  }*/
++//  return true;
++//#else
+   return false;
+-#endif
++//#endif
+ }
+ 
+ bool LinuxPtraceDumper::ReadRegisters(ThreadInfo* info, pid_t tid) {
+-#ifdef PTRACE_GETREGS
+-  void* gp_addr;
+-  info->GetGeneralPurposeRegisters(&gp_addr, NULL);
+-  if (sys_ptrace(PTRACE_GETREGS, tid, NULL, gp_addr) == -1) {
+-    return false;
+-  }
+-
+-#if !(defined(__ANDROID__) && defined(__ARM_EABI__))
++//#ifdef PTRACE_GETREGS
++//  void* gp_addr;
++//  info->GetGeneralPurposeRegisters(&gp_addr, NULL);
++//  if (sys_ptrace(PTRACE_GETREGS, tid, NULL, gp_addr) == -1) {
++//    return false;
++//  }
++//
++//#if !(defined(__ANDROID__) && defined(__ARM_EABI__))
+   // When running an arm build on an arm64 device, attempting to get the
+   // floating point registers fails. On Android, the floating point registers
+   // aren't written to the cpu context anyway, so just don't get them here.
+   // See http://crbug.com/508324
+-  void* fp_addr;
+-  info->GetFloatingPointRegisters(&fp_addr, NULL);
+-  if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) {
+-    return false;
+-  }
+-#endif  // !(defined(__ANDROID__) && defined(__ARM_EABI__))
+-  return true;
+-#else  // PTRACE_GETREGS
++//  void* fp_addr;
++//  info->GetFloatingPointRegisters(&fp_addr, NULL);
++//  if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) {
++//    return false;
++//  }
++//#endif  // !(defined(__ANDROID__) && defined(__ARM_EABI__))
++//  return true;
++//#else  // PTRACE_GETREGS
+   return false;
+-#endif
++//#endif
+ }
+ 
+ // Read thread info from /proc/$pid/status.
+@@ -208,7 +208,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) {
+   if (!BuildProcPath(status_path, tid, "status"))
+     return false;
+ 
+-  const int fd = sys_open(status_path, O_RDONLY, 0);
++  /*const int fd = sys_open(status_path, O_RDONLY, 0);
+   if (fd < 0)
+     return false;
+ 
+@@ -227,7 +227,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) {
+ 
+     line_reader->PopLine(line_len);
+   }
+-  sys_close(fd);
++  sys_close(fd);*/
+ 
+   if (info->ppid == -1 || info->tgid == -1)
+     return false;
+@@ -295,6 +295,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) {
+   my_memcpy(&stack_pointer, &info->regs.ARM_sp, sizeof(info->regs.ARM_sp));
+ #elif defined(__aarch64__)
+   my_memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp));
++#elif defined(__loongarch__)  
+ #elif defined(__mips__)
+   stack_pointer =
+       reinterpret_cast<uint8_t*>(info->mcontext.gregs[MD_CONTEXT_MIPS_REG_SP]);
+@@ -347,7 +348,7 @@ bool LinuxPtraceDumper::EnumerateThreads() {
+   if (!BuildProcPath(task_path, pid_, "task"))
+     return false;
+ 
+-  const int fd = sys_open(task_path, O_RDONLY | O_DIRECTORY, 0);
++  /*const int fd = sys_open(task_path, O_RDONLY | O_DIRECTORY, 0);
+   if (fd < 0)
+     return false;
+   DirectoryReader* dir_reader = new(allocator_) DirectoryReader(fd);
+@@ -369,7 +370,7 @@ bool LinuxPtraceDumper::EnumerateThreads() {
+     dir_reader->PopEntry();
+   }
+ 
+-  sys_close(fd);
++  sys_close(fd);*/
+   return true;
+ }
+ 
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc
+index f8cdf2a1c6a..6f5d4af9752 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc
+@@ -136,7 +136,7 @@ class MinidumpWriter {
+       : fd_(minidump_fd),
+         path_(minidump_path),
+         ucontext_(context ? &context->context : NULL),
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+         float_state_(context ? &context->float_state : NULL),
+ #endif
+         dumper_(dumper),
+@@ -468,7 +468,7 @@ class MinidumpWriter {
+         if (!cpu.Allocate())
+           return false;
+         my_memset(cpu.get(), 0, sizeof(RawContextCPU));
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+         UContextReader::FillCPUContext(cpu.get(), ucontext_, float_state_);
+ #else
+         UContextReader::FillCPUContext(cpu.get(), ucontext_);
+@@ -1203,6 +1203,10 @@ class MinidumpWriter {
+       sys_close(fd);
+     }
+ 
++    return true;
++  }
++#elif defined(__loongarch__)
++  bool WriteCPUInformation(MDRawSystemInfo* sys_info) {
+     return true;
+   }
+ #else
+@@ -1210,7 +1214,7 @@ class MinidumpWriter {
+ #endif
+ 
+   bool WriteFile(MDLocationDescriptor* result, const char* filename) {
+-    const int fd = sys_open(filename, O_RDONLY, 0);
++    const int fd = -1;//sys_open(filename, O_RDONLY, 0);
+     if (fd < 0)
+       return false;
+ 
+@@ -1227,7 +1231,7 @@ class MinidumpWriter {
+     buffers->len = 0;
+ 
+     size_t total = 0;
+-    for (Buffers* bufptr = buffers;;) {
++    /*for (Buffers* bufptr = buffers;;) {
+       ssize_t r;
+       do {
+         r = sys_read(fd, &bufptr->data[bufptr->len], kBufSize - bufptr->len);
+@@ -1245,7 +1249,7 @@ class MinidumpWriter {
+         bufptr->len = 0;
+       }
+     }
+-    sys_close(fd);
++    sys_close(fd);*/
+ 
+     if (!total)
+       return false;
+@@ -1333,7 +1337,7 @@ class MinidumpWriter {
+   const char* path_;  // Path to the file where the minidum should be written.
+ 
+   const ucontext_t* const ucontext_;  // also from the signal handler
+-#if !defined(__ARM_EABI__) && !defined(__mips__)
++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__)
+   const google_breakpad::fpstate_t* const float_state_;  // ditto
+ #endif
+   LinuxDumper* dumper_;
+@@ -1375,7 +1379,7 @@ bool WriteMinidumpImpl(const char* minidump_path,
+     if (blob_size != sizeof(ExceptionHandler::CrashContext))
+       return false;
+     context = reinterpret_cast<const ExceptionHandler::CrashContext*>(blob);
+-    dumper.SetCrashInfoFromSigInfo(context->siginfo);
++    //dumper.SetCrashInfoFromSigInfo(context->siginfo);
+     dumper.set_crash_thread(context->tid);
+   }
+   MinidumpWriter writer(minidump_path, minidump_fd, context, mappings,
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc
+index a1957f324a9..a267cc976ee 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc
+@@ -104,7 +104,7 @@ MinidumpFileWriter::~MinidumpFileWriter() {
+ bool MinidumpFileWriter::Open(const char *path) {
+   assert(file_ == -1);
+ #if defined(__linux__) && __linux__
+-  file_ = sys_open(path, O_WRONLY | O_CREAT | O_EXCL, 0600);
++  //file_ = sys_open(path, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ #else
+   file_ = open(path, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ #endif
+@@ -135,7 +135,7 @@ bool MinidumpFileWriter::Close() {
+     }
+ #endif
+ #if defined(__linux__) && __linux__
+-    result = (sys_close(file_) == 0);
++    //result = (sys_close(file_) == 0);
+ #else
+     result = (close(file_) == 0);
+ #endif
+@@ -318,11 +318,11 @@ bool MinidumpFileWriter::Copy(MDRVA position, const void *src, ssize_t size) {
+ 
+   // Seek and write the data
+ #if defined(__linux__) && __linux__
+-  if (sys_lseek(file_, position, SEEK_SET) == static_cast<off_t>(position)) {
++  /*if (sys_lseek(file_, position, SEEK_SET) == static_cast<off_t>(position)) {
+     if (sys_write(file_, src, size) == size) {
+       return true;
+     }
+-  }
++  }*/
+ #else
+   if (lseek(file_, position, SEEK_SET) == static_cast<off_t>(position)) {
+     if (write(file_, src, size) == size) {
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc
+index 4e938269f26..0a053d6af71 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc
+@@ -59,12 +59,14 @@ MemoryMappedFile::~MemoryMappedFile() {
+ bool MemoryMappedFile::Map(const char* path, size_t offset) {
+   Unmap();
+ 
++  return false;
++  /*
+   int fd = sys_open(path, O_RDONLY, 0);
+   if (fd == -1) {
+     return false;
+   }
+ 
+-#if defined(__x86_64__) || defined(__aarch64__) || \
++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch__) || \
+    (defined(__mips__) && _MIPS_SIM == _ABI64)
+ 
+   struct kernel_stat st;
+@@ -94,12 +96,12 @@ bool MemoryMappedFile::Map(const char* path, size_t offset) {
+   }
+ 
+   content_.Set(data, file_len - offset);
+-  return true;
++  return true;*/
+ }
+ 
+ void MemoryMappedFile::Unmap() {
+   if (content_.data()) {
+-    sys_munmap(const_cast<uint8_t*>(content_.data()), content_.length());
++    //sys_munmap(const_cast<uint8_t*>(content_.data()), content_.length());
+     content_.Set(NULL, 0);
+   }
+ }
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc
+index 870c28af3b5..612d9d6064e 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc
+@@ -42,11 +42,11 @@ bool SafeReadLink(const char* path, char* buffer, size_t buffer_size) {
+   // one byte longer than the expected path length. Also, sys_readlink()
+   // returns the actual path length on success, which does not count the
+   // NULL byte, so |result_size| should be less than |buffer_size|.
+-  ssize_t result_size = sys_readlink(path, buffer, buffer_size);
++  /*ssize_t result_size = sys_readlink(path, buffer, buffer_size);
+   if (result_size >= 0 && static_cast<size_t>(result_size) < buffer_size) {
+     buffer[result_size] = '\0';
+     return true;
+-  }
++  }*/
+   return false;
+ }
+ 
+diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h
+index a3159ea46c8..949740e31d7 100644
+--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h
++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h
+@@ -42,7 +42,7 @@
+ #include <sanitizer/msan_interface.h>
+ #endif
+ 
+-#ifdef __APPLE__
++#if defined(__APPLE__)
+ #define sys_mmap mmap
+ #define sys_munmap munmap
+ #define MAP_ANONYMOUS MAP_ANON
+@@ -117,7 +117,7 @@ class PageAllocator {
+  private:
+   uint8_t *GetNPages(size_t num_pages) {
+     void *a = sys_mmap(NULL, page_size_ * num_pages, PROT_READ | PROT_WRITE,
+-                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
++                       MAP_PRIVATE | 0x20 /*MAP_ANONYMOUS*/, -1, 0);
+     if (a == MAP_FAILED)
+       return NULL;
+ 
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S b/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S
+index b13d8642e7d..d6fbc7eb0fd 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S
+@@ -42,7 +42,7 @@ name_end:
+   .balign NOTE_ALIGN
+ desc:
+ #if defined(__LP64__)
+-  .quad CRASHPAD_INFO_SYMBOL - desc
++  .quad CRASHPAD_INFO_SYMBOL
+ #else
+   .long CRASHPAD_INFO_SYMBOL - desc
+ #endif  // __LP64__
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc
+index a13407605f7..143c65426e3 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc
+@@ -126,6 +126,8 @@ std::string MinidumpMiscInfoDebugBuildString() {
+   static constexpr char kCPU[] = "mips";
+ #elif defined(ARCH_CPU_MIPS64EL)
+   static constexpr char kCPU[] = "mips64";
++#elif defined(ARCH_CPU_LA64)
++  static constexpr char kCPU[] = "la64";
+ #else
+ #error define kCPU for this CPU
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc
+index a51626ccdc0..38986b4a422 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc
+@@ -107,7 +107,7 @@ void CaptureMemory::PointedToByContext(const CPUContext& context,
+       MaybeCaptureMemoryAround(delegate, context.arm->regs[i]);
+     }
+   }
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+   for (size_t i = 0; i < base::size(context.mipsel->regs); ++i) {
+     MaybeCaptureMemoryAround(delegate, context.mipsel->regs[i]);
+   }
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h
+index 811a7209587..b5284a72e45 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h
+@@ -43,7 +43,10 @@ enum CPUArchitecture {
+   kCPUArchitectureMIPSEL,
+ 
+   //! \brief 64-bit MIPSEL.
+-  kCPUArchitectureMIPS64EL
++  kCPUArchitectureMIPS64EL,
++
++  //! \brief 64-bit LoongArch.
++  kCPUArchitectureLA64
+ };
+ 
+ }  // namespace crashpad
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc
+index 6fb8d7e719f..bda28212fb2 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc
+@@ -196,6 +196,7 @@ bool CPUContext::Is64Bit() const {
+     case kCPUArchitectureX86_64:
+     case kCPUArchitectureARM64:
+     case kCPUArchitectureMIPS64EL:
++    case kCPUArchitectureLA64:
+       return true;
+     case kCPUArchitectureX86:
+     case kCPUArchitectureARM:
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S
+index 16b5d499d7b..9ccf51733aa 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S
+@@ -43,7 +43,7 @@ name_end:
+   .balign NOTE_ALIGN
+ desc:
+ #if defined(__LP64__)
+-  .quad TEST_CRASHPAD_INFO_SYMBOL - desc
++  .quad TEST_CRASHPAD_INFO_SYMBOL
+ #else
+   .long TEST_CRASHPAD_INFO_SYMBOL - desc
+ #endif  // __LP64__
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc
+index cd40b3b12d6..af5e21b7874 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc
+@@ -333,6 +333,8 @@ bool ExceptionSnapshotLinux::Initialize(ProcessReaderLinux* process_reader,
+ 
+   thread_id_ = thread_id;
+ 
++// TODO:LA
++#if !defined(ARCH_CPU_LA64)
+   if (process_reader->Is64Bit()) {
+     if (!ReadContext<ContextTraits64>(process_reader, context_address) ||
+         !ReadSiginfo<Traits64>(process_reader, siginfo_address)) {
+@@ -344,6 +346,7 @@ bool ExceptionSnapshotLinux::Initialize(ProcessReaderLinux* process_reader,
+       return false;
+     }
+   }
++#endif
+ 
+   INITIALIZATION_STATE_SET_VALID(initialized_);
+   return true;
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc
+index b96abfe74fe..3d6591fad3f 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc
+@@ -108,6 +108,8 @@ void ProcessReaderLinux::Thread::InitializeStack(ProcessReaderLinux* reader) {
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+   stack_pointer = reader->Is64Bit() ? thread_info.thread_context.t64.regs[29]
+                                     : thread_info.thread_context.t32.regs[29];
++#elif defined(ARCH_CPU_LA64)
++// TODO:LA
+ #else
+ #error Port.
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h
+index 110024680bd..2fa76e9843e 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h
+@@ -422,6 +422,7 @@ static_assert(offsetof(UContext<ContextTraits64>, mcontext.fpregs) ==
+               "context offset mismatch");
+ #endif
+ 
++#elif defined(ARCH_CPU_LA64)
+ #else
+ #error Port.
+ #endif  // ARCH_CPU_X86_FAMILY
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc
+index 8564d3d4557..820b0eae06a 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc
+@@ -203,6 +203,7 @@ CPUArchitecture SystemSnapshotLinux::GetCPUArchitecture() const {
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+   return process_reader_->Is64Bit() ? kCPUArchitectureMIPS64EL
+                                     : kCPUArchitectureMIPSEL;
++#elif defined(ARCH_CPU_LA64)
+ #else
+ #error port to your architecture
+ #endif
+@@ -218,6 +219,8 @@ uint32_t SystemSnapshotLinux::CPURevision() const {
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+   // Not implementable on MIPS
+   return 0;
++#elif defined(ARCH_CPU_LA64)
++  return 0;
+ #else
+ #error port to your architecture
+ #endif
+@@ -238,6 +241,8 @@ std::string SystemSnapshotLinux::CPUVendor() const {
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+   // Not implementable on MIPS
+   return std::string();
++#elif defined(ARCH_CPU_LA64)
++  return std::string();  
+ #else
+ #error port to your architecture
+ #endif
+@@ -371,6 +376,8 @@ bool SystemSnapshotLinux::NXEnabled() const {
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+   // Not implementable on MIPS
+   return false;
++#elif defined(ARCH_CPU_LA64)
++  return false;  
+ #else
+ #error Port.
+ #endif  // ARCH_CPU_X86_FAMILY
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc
+index e3e2bebddb9..c96d5b63383 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc
+@@ -186,6 +186,7 @@ bool ThreadSnapshotLinux::Initialize(ProcessReaderLinux* process_reader,
+         thread.thread_info.float_context.f32,
+         context_.mipsel);
+   }
++#elif defined(ARCH_CPU_LA64)  
+ #else
+ #error Port.
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h
+index 44cc6f6d973..8d6665b9bd3 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h
+@@ -62,7 +62,7 @@ class ThreadSnapshotLinux final : public ThreadSnapshot {
+ #if defined(ARCH_CPU_X86_FAMILY)
+     CPUContextX86 x86;
+     CPUContextX86_64 x86_64;
+-#elif defined(ARCH_CPU_ARM_FAMILY)
++#elif defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_LA64)
+     CPUContextARM arm;
+     CPUContextARM64 arm64;
+ #elif defined(ARCH_CPU_MIPS_FAMILY)
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc
+index 557e0d36357..cbf2b2be0c6 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc
+@@ -273,7 +273,7 @@ bool GetThreadArea64(pid_t tid,
+   }
+   return true;
+ }
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+ // PTRACE_GETREGSET, introduced in Linux 2.6.34 (2225a122ae26), requires kernel
+ // support enabled by HAVE_ARCH_TRACEHOOK. This has been set for x86 (including
+ // x86_64) since Linux 2.6.28 (99bbc4b1e677a), but for MIPS only since
+@@ -296,7 +296,7 @@ bool GetGeneralPurposeRegistersLegacy(pid_t tid,
+ // ptrace unsupported on MIPS for kernels older than 3.13
+ #if defined(ARCH_CPU_MIPSEL)
+ #define THREAD_CONTEXT_FIELD t32
+-#elif defined(ARCH_CPU_MIPS64EL)
++#elif defined(ARCH_CPU_MIPS64EL) || defined(ARCH_CPU_LA64)
+ #define THREAD_CONTEXT_FIELD t64
+ #endif
+   for (size_t reg = 0; reg < 32; ++reg) {
+@@ -385,6 +385,9 @@ bool GetThreadArea64(pid_t tid,
+                      const ThreadContext& context,
+                      LinuxVMAddress* address,
+                      bool can_log) {
++// TODO:LA
++#if !defined(ARCH_CPU_LA64)
++	
+   void* result;
+ #if defined(ARCH_CPU_MIPSEL)
+   if (ptrace(PTRACE_GET_THREAD_AREA_3264, tid, nullptr, &result) != 0) {
+@@ -395,6 +398,7 @@ bool GetThreadArea64(pid_t tid,
+     return false;
+   }
+   *address = FromPointerCast<LinuxVMAddress>(result);
++#endif  
+   return true;
+ }
+ 
+diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h
+index 5b55c24a76d..489e350c421 100644
+--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h
++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h
+@@ -67,7 +67,7 @@ union ThreadContext {
+     uint32_t pc;
+     uint32_t cpsr;
+     uint32_t orig_r0;
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+     // Reflects output format of static int gpr32_get(), defined in
+     // arch/mips/kernel/ptrace.c in kernel source
+     uint32_t padding0_[6];
+@@ -122,7 +122,7 @@ union ThreadContext {
+     uint64_t sp;
+     uint64_t pc;
+     uint64_t pstate;
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+     // Reflects output format of static int gpr64_get(), defined in
+     // arch/mips/kernel/ptrace.c in kernel source
+     uint64_t regs[32];
+@@ -141,13 +141,13 @@ union ThreadContext {
+   using NativeThreadContext = user_regs_struct;
+ #elif defined(ARCH_CPU_ARMEL)
+   using NativeThreadContext = user_regs;
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+ // No appropriate NativeThreadsContext type available for MIPS
+ #else
+ #error Port.
+ #endif  // ARCH_CPU_X86_FAMILY || ARCH_CPU_ARM64
+ 
+-#if !defined(ARCH_CPU_MIPS_FAMILY)
++#if !defined(ARCH_CPU_MIPS_FAMILY) && !defined(ARCH_CPU_LA64)
+ #if defined(ARCH_CPU_32_BITS)
+   static_assert(sizeof(t32_t) == sizeof(NativeThreadContext), "Size mismatch");
+ #else  // ARCH_CPU_64_BITS
+@@ -209,7 +209,7 @@ union FloatContext {
+ 
+     bool have_fpregs;
+     bool have_vfp;
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+     // Reflects data format filled by ptrace_getfpregs() in
+     // arch/mips/kernel/ptrace.c
+     struct {
+@@ -246,7 +246,7 @@ union FloatContext {
+     uint32_t fpsr;
+     uint32_t fpcr;
+     uint8_t padding[8];
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+     // Reflects data format filled by ptrace_getfpregs() in
+     // arch/mips/kernel/ptrace.c
+     double fpregs[32];
+@@ -278,7 +278,7 @@ union FloatContext {
+ #endif
+ #elif defined(ARCH_CPU_ARM64)
+   static_assert(sizeof(f64) == sizeof(user_fpsimd_struct), "Size mismatch");
+-#elif defined(ARCH_CPU_MIPS_FAMILY)
++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64)
+ // No appropriate floating point context native type for available MIPS.
+ #else
+ #error Port.
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium b/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium
+index 0043df2c504..8cbb66d9d1c 100644
+--- a/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium
++++ b/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium
+@@ -129,517 +129,6 @@ incompatible with the GPLv2 and v3. To the best of our knowledge, they are
+ compatible with the LGPL.
+ 
+ 
+-********************************************************************************
+-
+-libavcodec/arm/jrevdct_arm.S
+-
+-C-like prototype :
+-        void j_rev_dct_arm(DCTBLOCK data)
+-
+-   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
+-
+-   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
+-
+-   Permission is hereby granted, free of charge, to any person obtaining a copy
+-   of this software and associated documentation files (the "Software"), to deal
+-   in the Software without restriction, including without limitation the rights
+-   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+-   copies of the Software, and to permit persons to whom the Software is
+-   furnished to do so, subject to the following conditions:
+-
+-   The above copyright notice and this permission notice shall be included in
+-   all copies or substantial portions of the Software.
+-
+-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+-   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-********************************************************************************
+-
+-libavcodec/arm/vp8dsp_armv6.S
+-
+-VP8 ARMv6 optimisations
+-
+-Copyright (c) 2010 Google Inc.
+-Copyright (c) 2010 Rob Clark <rob@ti.com>
+-Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-This code was partially ported from libvpx, which uses this license:
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions are
+-met:
+-
+-* Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-
+-* Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in
+-the documentation and/or other materials provided with the
+-distribution.
+-
+-* Neither the name of Google nor the names of its contributors may
+-be used to endorse or promote products derived from this software
+-without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-
+-********************************************************************************
+-
+-libavcodec/mips/acelp_filters_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Nedeljko Babic (nbabic@mips.com)
+-
+-various filters for ACELP-based codecs optimized for MIPS
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/acelp_vectors_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Nedeljko Babic (nbabic@mips.com)
+-
+-adaptive and fixed codebook vector operations for ACELP-based codecs
+-optimized for MIPS
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/amrwbdec_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Nedeljko Babic (nbabic@mips.com)
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/celp_filters_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Nedeljko Babic (nbabic@mips.com)
+-
+-various filters for CELP-based codecs optimized for MIPS
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/celp_math_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Nedeljko Babic (nbabic@mips.com)
+-
+-Math operations optimized for MIPS
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/fft_mips.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Stanislav Ocovaj (socovaj@mips.com)
+-Author:  Zoran Lukic (zoranl@mips.com)
+-
+-Optimized MDCT/IMDCT and FFT transforms
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/mips/mpegaudiodsp_mips_float.c
+-
+-Copyright (c) 2012
+-MIPS Technologies, Inc., California.
+-
+-Redistribution and use in source and binary forms, with or without
+-modification, are permitted provided that the following conditions
+-are met:
+-1. Redistributions of source code must retain the above copyright
+-notice, this list of conditions and the following disclaimer.
+-2. Redistributions in binary form must reproduce the above copyright
+-notice, this list of conditions and the following disclaimer in the
+-documentation and/or other materials provided with the distribution.
+-3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+-contributors may be used to endorse or promote products derived from
+-this software without specific prior written permission.
+-
+-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+-ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+-SUCH DAMAGE.
+-
+-Author:  Bojan Zivkovic (bojan@mips.com)
+-
+-MPEG Audio decoder optimized for MIPS floating-point architecture
+-
+-This file is part of FFmpeg.
+-
+-FFmpeg is free software; you can redistribute it and/or
+-modify it under the terms of the GNU Lesser General Public
+-License as published by the Free Software Foundation; either
+-version 2.1 of the License, or (at your option) any later version.
+-
+-FFmpeg is distributed in the hope that it will be useful,
+-but WITHOUT ANY WARRANTY; without even the implied warranty of
+-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-Lesser General Public License for more details.
+-
+-You should have received a copy of the GNU Lesser General Public
+-License along with FFmpeg; if not, write to the Free Software
+-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+-********************************************************************************
+-
+-libavcodec/x86/xvididct.asm
+-
+-XVID MPEG-4 VIDEO CODEC
+-
+- Conversion from gcc syntax to x264asm syntax with modifications
+- by Christophe Gisquet <christophe.gisquet@gmail.com>
+-
+- ===========     SSE2 inverse discrete cosine transform     ===========
+-
+- Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
+-
+- Conversion to gcc syntax with modifications
+- by Alexander Strange <astrange@ithinksw.com>
+-
+- Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
+-
+- Vertical pass is an implementation of the scheme:
+-  Loeffler C., Ligtenberg A., and Moschytz C.S.:
+-  Practical Fast 1D DCT Algorithm with Eleven Multiplications,
+-  Proc. ICASSP 1989, 988-991.
+-
+- Horizontal pass is a double 4x4 vector/matrix multiplication,
+- (see also Intel's Application Note 922:
+-  http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+-  Copyright (C) 1999 Intel Corporation)
+-
+- More details at http://skal.planet-d.net/coding/dct.html
+-
+- =======     MMX and XMM forward discrete cosine transform     =======
+-
+- Copyright(C) 2001 Peter Ross <pross@xvid.org>
+-
+- Originally provided by Intel at AP-922
+- http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+- (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
+- but in a limited edition.
+- New macro implements a column part for precise iDCT
+- The routine precision now satisfies IEEE standard 1180-1990.
+-
+- Copyright(C) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
+- Rounding trick Copyright(C) 2000 Michel Lespinasse <walken@zoy.org>
+-
+- http://www.elecard.com/peter/idct.html
+- http://www.linuxvideo.org/mpeg2dec/
+-
+- These examples contain code fragments for first stage iDCT 8x8
+- (for rows) and first stage DCT 8x8 (for columns)
+-
+- conversion to gcc syntax by Michael Niedermayer
+-
+- ======================================================================
+-
+- This file is part of FFmpeg.
+-
+- FFmpeg is free software; you can redistribute it and/or
+- modify it under the terms of the GNU Lesser General Public
+- License as published by the Free Software Foundation; either
+- version 2.1 of the License, or (at your option) any later version.
+-
+- FFmpeg is distributed in the hope that it will be useful,
+- but WITHOUT ANY WARRANTY; without even the implied warranty of
+- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- Lesser General Public License for more details.
+-
+- You should have received a copy of the GNU Lesser General Public License
+- along with FFmpeg; if not, write to the Free Software Foundation,
+- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-
+ ********************************************************************************
+ 
+ libavformat/oggparsetheora.c
+@@ -784,19 +273,14 @@ libavcodec/fft_fixed_32.c
+ libavcodec/fft_init_table.c
+ libavcodec/fft_table.h
+ libavcodec/mdct_fixed_32.c
+-libavcodec/mips/aacdec_mips.c
+ libavcodec/mips/aacdec_mips.h
+-libavcodec/mips/aacpsdsp_mips.c
+-libavcodec/mips/aacsbr_mips.c
+ libavcodec/mips/aacsbr_mips.h
+ libavcodec/mips/amrwbdec_mips.h
+ libavcodec/mips/compute_antialias_fixed.h
+ libavcodec/mips/compute_antialias_float.h
+ libavcodec/mips/lsp_mips.h
+-libavcodec/mips/sbrdsp_mips.c
+ libavutil/fixed_dsp.c
+ libavutil/fixed_dsp.h
+-libavutil/mips/float_dsp_mips.c
+ libavutil/mips/libm_mips.h
+ libavutil/softfloat_tables.h
+ 
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h
+new file mode 100644
+index 00000000000..a4351739eb4
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h
+@@ -0,0 +1,2589 @@
++/* Automatically generated by configure - do not modify! */
++#ifndef FFMPEG_CONFIG_H
++#define FFMPEG_CONFIG_H
++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-error-resilience --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang --enable-decoder='aac,h264' --enable-demuxer=aac --enable-parser='aac,h264'" -- elide long configuration string from binary */
++#define FFMPEG_LICENSE "LGPL version 2.1 or later"
++#define CONFIG_THIS_YEAR 2020
++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
++#define AVCONV_DATADIR "/usr/local/share/ffmpeg"
++#define CC_IDENT "clang version 8.0.1"
++#define av_restrict restrict
++#define EXTERN_PREFIX ""
++#define EXTERN_ASM 
++#define BUILDSUF ""
++#define SLIBSUF ".so"
++#define HAVE_MMX2 HAVE_MMXEXT
++#define SWS_MAX_FILTER_SIZE 256
++#define ARCH_AARCH64 0
++#define ARCH_ALPHA 0
++#define ARCH_ARM 0
++#define ARCH_AVR32 0
++#define ARCH_AVR32_AP 0
++#define ARCH_AVR32_UC 0
++#define ARCH_BFIN 0
++#define ARCH_IA64 0
++#define ARCH_M68K 0
++#define ARCH_MIPS 0
++#define ARCH_MIPS64 0
++#define ARCH_PARISC 0
++#define ARCH_PPC 0
++#define ARCH_PPC64 0
++#define ARCH_S390 0
++#define ARCH_SH4 0
++#define ARCH_SPARC 0
++#define ARCH_SPARC64 0
++#define ARCH_TILEGX 0
++#define ARCH_TILEPRO 0
++#define ARCH_TOMI 0
++#define ARCH_X86 0
++#define ARCH_X86_32 0
++#define ARCH_X86_64 0
++#define HAVE_ARMV5TE 0
++#define HAVE_ARMV6 0
++#define HAVE_ARMV6T2 0
++#define HAVE_ARMV8 0
++#define HAVE_NEON 0
++#define HAVE_VFP 0
++#define HAVE_VFPV3 0
++#define HAVE_SETEND 0
++#define HAVE_ALTIVEC 0
++#define HAVE_DCBZL 0
++#define HAVE_LDBRX 0
++#define HAVE_POWER8 0
++#define HAVE_PPC4XX 0
++#define HAVE_VSX 0
++#define HAVE_AESNI 0
++#define HAVE_AMD3DNOW 0
++#define HAVE_AMD3DNOWEXT 0
++#define HAVE_AVX 0
++#define HAVE_AVX2 0
++#define HAVE_AVX512 0
++#define HAVE_FMA3 0
++#define HAVE_FMA4 0
++#define HAVE_MMX 0
++#define HAVE_MMXEXT 0
++#define HAVE_SSE 0
++#define HAVE_SSE2 0
++#define HAVE_SSE3 0
++#define HAVE_SSE4 0
++#define HAVE_SSE42 0
++#define HAVE_SSSE3 0
++#define HAVE_XOP 0
++#define HAVE_CPUNOP 0
++#define HAVE_I686 0
++#define HAVE_MIPSFPU 0
++#define HAVE_MIPS32R2 0
++#define HAVE_MIPS32R5 0
++#define HAVE_MIPS64R2 0
++#define HAVE_MIPS32R6 0
++#define HAVE_MIPS64R6 0
++#define HAVE_MIPSDSP 0
++#define HAVE_MIPSDSPR2 0
++#define HAVE_MSA 0
++#define HAVE_MSA2 0
++#define HAVE_LOONGSON2 0
++#define HAVE_LOONGSON3 0
++#define HAVE_MMI 0
++#define HAVE_ARMV5TE_EXTERNAL 0
++#define HAVE_ARMV6_EXTERNAL 0
++#define HAVE_ARMV6T2_EXTERNAL 0
++#define HAVE_ARMV8_EXTERNAL 0
++#define HAVE_NEON_EXTERNAL 0
++#define HAVE_VFP_EXTERNAL 0
++#define HAVE_VFPV3_EXTERNAL 0
++#define HAVE_SETEND_EXTERNAL 0
++#define HAVE_ALTIVEC_EXTERNAL 0
++#define HAVE_DCBZL_EXTERNAL 0
++#define HAVE_LDBRX_EXTERNAL 0
++#define HAVE_POWER8_EXTERNAL 0
++#define HAVE_PPC4XX_EXTERNAL 0
++#define HAVE_VSX_EXTERNAL 0
++#define HAVE_AESNI_EXTERNAL 0
++#define HAVE_AMD3DNOW_EXTERNAL 0
++#define HAVE_AMD3DNOWEXT_EXTERNAL 0
++#define HAVE_AVX_EXTERNAL 0
++#define HAVE_AVX2_EXTERNAL 0
++#define HAVE_AVX512_EXTERNAL 0
++#define HAVE_FMA3_EXTERNAL 0
++#define HAVE_FMA4_EXTERNAL 0
++#define HAVE_MMX_EXTERNAL 0
++#define HAVE_MMXEXT_EXTERNAL 0
++#define HAVE_SSE_EXTERNAL 0
++#define HAVE_SSE2_EXTERNAL 0
++#define HAVE_SSE3_EXTERNAL 0
++#define HAVE_SSE4_EXTERNAL 0
++#define HAVE_SSE42_EXTERNAL 0
++#define HAVE_SSSE3_EXTERNAL 0
++#define HAVE_XOP_EXTERNAL 0
++#define HAVE_CPUNOP_EXTERNAL 0
++#define HAVE_I686_EXTERNAL 0
++#define HAVE_MIPSFPU_EXTERNAL 0
++#define HAVE_MIPS32R2_EXTERNAL 0
++#define HAVE_MIPS32R5_EXTERNAL 0
++#define HAVE_MIPS64R2_EXTERNAL 0
++#define HAVE_MIPS32R6_EXTERNAL 0
++#define HAVE_MIPS64R6_EXTERNAL 0
++#define HAVE_MIPSDSP_EXTERNAL 0
++#define HAVE_MIPSDSPR2_EXTERNAL 0
++#define HAVE_MSA_EXTERNAL 0
++#define HAVE_MSA2_EXTERNAL 0
++#define HAVE_LOONGSON2_EXTERNAL 0
++#define HAVE_LOONGSON3_EXTERNAL 0
++#define HAVE_MMI_EXTERNAL 0
++#define HAVE_ARMV5TE_INLINE 0
++#define HAVE_ARMV6_INLINE 0
++#define HAVE_ARMV6T2_INLINE 0
++#define HAVE_ARMV8_INLINE 0
++#define HAVE_NEON_INLINE 0
++#define HAVE_VFP_INLINE 0
++#define HAVE_VFPV3_INLINE 0
++#define HAVE_SETEND_INLINE 0
++#define HAVE_ALTIVEC_INLINE 0
++#define HAVE_DCBZL_INLINE 0
++#define HAVE_LDBRX_INLINE 0
++#define HAVE_POWER8_INLINE 0
++#define HAVE_PPC4XX_INLINE 0
++#define HAVE_VSX_INLINE 0
++#define HAVE_AESNI_INLINE 0
++#define HAVE_AMD3DNOW_INLINE 0
++#define HAVE_AMD3DNOWEXT_INLINE 0
++#define HAVE_AVX_INLINE 0
++#define HAVE_AVX2_INLINE 0
++#define HAVE_AVX512_INLINE 0
++#define HAVE_FMA3_INLINE 0
++#define HAVE_FMA4_INLINE 0
++#define HAVE_MMX_INLINE 0
++#define HAVE_MMXEXT_INLINE 0
++#define HAVE_SSE_INLINE 0
++#define HAVE_SSE2_INLINE 0
++#define HAVE_SSE3_INLINE 0
++#define HAVE_SSE4_INLINE 0
++#define HAVE_SSE42_INLINE 0
++#define HAVE_SSSE3_INLINE 0
++#define HAVE_XOP_INLINE 0
++#define HAVE_CPUNOP_INLINE 0
++#define HAVE_I686_INLINE 0
++#define HAVE_MIPSFPU_INLINE 0
++#define HAVE_MIPS32R2_INLINE 0
++#define HAVE_MIPS32R5_INLINE 0
++#define HAVE_MIPS64R2_INLINE 0
++#define HAVE_MIPS32R6_INLINE 0
++#define HAVE_MIPS64R6_INLINE 0
++#define HAVE_MIPSDSP_INLINE 0
++#define HAVE_MIPSDSPR2_INLINE 0
++#define HAVE_MSA_INLINE 0
++#define HAVE_MSA2_INLINE 0
++#define HAVE_LOONGSON2_INLINE 0
++#define HAVE_LOONGSON3_INLINE 0
++#define HAVE_MMI_INLINE 0
++#define HAVE_ALIGNED_STACK 0
++#define HAVE_FAST_64BIT 0
++#define HAVE_FAST_CLZ 0
++#define HAVE_FAST_CMOV 0
++#define HAVE_LOCAL_ALIGNED 0
++#define HAVE_SIMD_ALIGN_16 0
++#define HAVE_SIMD_ALIGN_32 0
++#define HAVE_SIMD_ALIGN_64 0
++#define HAVE_ATOMIC_CAS_PTR 0
++#define HAVE_MACHINE_RW_BARRIER 0
++#define HAVE_MEMORYBARRIER 0
++#define HAVE_MM_EMPTY 0
++#define HAVE_RDTSC 0
++#define HAVE_SEM_TIMEDWAIT 1
++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
++#define HAVE_CABS 0
++#define HAVE_CEXP 0
++#define HAVE_INLINE_ASM 1
++#define HAVE_SYMVER 0
++#define HAVE_X86ASM 0
++#define HAVE_BIGENDIAN 0
++#define HAVE_FAST_UNALIGNED 0
++#define HAVE_ARPA_INET_H 0
++#define HAVE_ASM_TYPES_H 1
++#define HAVE_CDIO_PARANOIA_H 0
++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0
++#define HAVE_CUDA_H 0
++#define HAVE_DISPATCH_DISPATCH_H 0
++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
++#define HAVE_DEV_IC_BT8XX_H 0
++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
++#define HAVE_DIRECT_H 0
++#define HAVE_DIRENT_H 1
++#define HAVE_DXGIDEBUG_H 0
++#define HAVE_DXVA_H 0
++#define HAVE_ES2_GL_H 0
++#define HAVE_GSM_H 0
++#define HAVE_IO_H 0
++#define HAVE_LINUX_PERF_EVENT_H 1
++#define HAVE_MACHINE_IOCTL_BT848_H 0
++#define HAVE_MACHINE_IOCTL_METEOR_H 0
++#define HAVE_MALLOC_H 1
++#define HAVE_OPENCV2_CORE_CORE_C_H 0
++#define HAVE_OPENGL_GL3_H 0
++#define HAVE_POLL_H 1
++#define HAVE_SYS_PARAM_H 1
++#define HAVE_SYS_RESOURCE_H 1
++#define HAVE_SYS_SELECT_H 1
++#define HAVE_SYS_SOUNDCARD_H 1
++#define HAVE_SYS_TIME_H 1
++#define HAVE_SYS_UN_H 1
++#define HAVE_SYS_VIDEOIO_H 0
++#define HAVE_TERMIOS_H 1
++#define HAVE_UDPLITE_H 0
++#define HAVE_UNISTD_H 1
++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */
++#define HAVE_WINDOWS_H 0
++#define HAVE_WINSOCK2_H 0
++#define HAVE_INTRINSICS_NEON 0
++#define HAVE_ATANF 1
++#define HAVE_ATAN2F 1
++#define HAVE_CBRT 1
++#define HAVE_CBRTF 1
++#define HAVE_COPYSIGN 1
++#define HAVE_COSF 1
++#define HAVE_ERF 1
++#define HAVE_EXP2 1
++#define HAVE_EXP2F 1
++#define HAVE_EXPF 1
++#define HAVE_HYPOT 1
++#define HAVE_ISFINITE 1
++#define HAVE_ISINF 1
++#define HAVE_ISNAN 1
++#define HAVE_LDEXPF 1
++#define HAVE_LLRINT 1
++#define HAVE_LLRINTF 1
++#define HAVE_LOG2 1
++#define HAVE_LOG2F 1
++#define HAVE_LOG10F 1
++#define HAVE_LRINT 1
++#define HAVE_LRINTF 1
++#define HAVE_POWF 1
++#define HAVE_RINT 1
++#define HAVE_ROUND 1
++#define HAVE_ROUNDF 1
++#define HAVE_SINF 1
++#define HAVE_TRUNC 1
++#define HAVE_TRUNCF 1
++#define HAVE_DOS_PATHS 0
++#define HAVE_LIBC_MSVCRT 0
++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
++#define HAVE_SECTION_DATA_REL_RO 1
++#define HAVE_THREADS 1
++#define HAVE_UWP 0
++#define HAVE_WINRT 0
++#define HAVE_ACCESS 1
++#define HAVE_ALIGNED_MALLOC 0
++#define HAVE_ARC4RANDOM 0
++#define HAVE_CLOCK_GETTIME 1
++#define HAVE_CLOSESOCKET 0
++#define HAVE_COMMANDLINETOARGVW 0
++#define HAVE_FCNTL 1
++#define HAVE_GETADDRINFO 0
++#define HAVE_GETHRTIME 0
++#define HAVE_GETOPT 1
++#define HAVE_GETMODULEHANDLE 0
++#define HAVE_GETPROCESSAFFINITYMASK 0
++#define HAVE_GETPROCESSMEMORYINFO 0
++#define HAVE_GETPROCESSTIMES 0
++#define HAVE_GETRUSAGE 1
++#define HAVE_GETSTDHANDLE 0
++#define HAVE_GETSYSTEMTIMEASFILETIME 0
++#define HAVE_GETTIMEOFDAY 1
++#define HAVE_GLOB 1
++#define HAVE_GLXGETPROCADDRESS 0
++#define HAVE_GMTIME_R 1
++#define HAVE_INET_ATON 0
++#define HAVE_ISATTY 1
++#define HAVE_KBHIT 0
++#define HAVE_LOCALTIME_R 1
++#define HAVE_LSTAT 1
++#define HAVE_LZO1X_999_COMPRESS 0
++#define HAVE_MACH_ABSOLUTE_TIME 0
++#define HAVE_MAPVIEWOFFILE 0
++#define HAVE_MEMALIGN 1
++#define HAVE_MKSTEMP 1
++#define HAVE_MMAP 1
++#define HAVE_MPROTECT 1
++#define HAVE_NANOSLEEP 1
++#define HAVE_PEEKNAMEDPIPE 0
++#define HAVE_POSIX_MEMALIGN 1
++#define HAVE_PTHREAD_CANCEL 1
++#define HAVE_SCHED_GETAFFINITY 1
++#define HAVE_SECITEMIMPORT 0
++#define HAVE_SETCONSOLETEXTATTRIBUTE 0
++#define HAVE_SETCONSOLECTRLHANDLER 0
++#define HAVE_SETDLLDIRECTORY 0
++#define HAVE_SETMODE 0
++#define HAVE_SETRLIMIT 1
++#define HAVE_SLEEP 0
++#define HAVE_STRERROR_R 1
++#define HAVE_SYSCONF 1
++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */
++#define HAVE_USLEEP 1
++#define HAVE_UTGETOSTYPEFROMSTRING 0
++#define HAVE_VIRTUALALLOC 0
++#define HAVE_WGLGETPROCADDRESS 0
++#define HAVE_BCRYPT 0
++#define HAVE_VAAPI_DRM 0
++#define HAVE_VAAPI_X11 0
++#define HAVE_VDPAU_X11 0
++#define HAVE_PTHREADS 1
++#define HAVE_OS2THREADS 0
++#define HAVE_W32THREADS 0
++#define HAVE_AS_ARCH_DIRECTIVE 0
++#define HAVE_AS_DN_DIRECTIVE 0
++#define HAVE_AS_FPU_DIRECTIVE 0
++#define HAVE_AS_FUNC 0
++#define HAVE_AS_OBJECT_ARCH 0
++#define HAVE_ASM_MOD_Q 0
++#define HAVE_BLOCKS_EXTENSION 0
++#define HAVE_EBP_AVAILABLE 0
++#define HAVE_EBX_AVAILABLE 0
++#define HAVE_GNU_AS 0
++#define HAVE_GNU_WINDRES 0
++#define HAVE_IBM_ASM 0
++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
++#define HAVE_INLINE_ASM_LABELS 1
++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1
++#define HAVE_PRAGMA_DEPRECATED 1
++#define HAVE_RSYNC_CONTIMEOUT 0
++#define HAVE_SYMVER_ASM_LABEL 1
++#define HAVE_SYMVER_GNU_ASM 1
++#define HAVE_VFP_ARGS 0
++#define HAVE_XFORM_ASM 0
++#define HAVE_XMM_CLOBBERS 0
++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0
++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0
++#define HAVE_SOCKLEN_T 0
++#define HAVE_STRUCT_ADDRINFO 0
++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0
++#define HAVE_STRUCT_IP_MREQ_SOURCE 0
++#define HAVE_STRUCT_IPV6_MREQ 0
++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0
++#define HAVE_STRUCT_POLLFD 0
++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1
++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0
++#define HAVE_STRUCT_SOCKADDR_IN6 0
++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0
++#define HAVE_STRUCT_SOCKADDR_STORAGE 0
++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0
++#define HAVE_MAKEINFO 1
++#define HAVE_MAKEINFO_HTML 1
++#define HAVE_OPENCL_D3D11 0
++#define HAVE_OPENCL_DRM_ARM 0
++#define HAVE_OPENCL_DRM_BEIGNET 0
++#define HAVE_OPENCL_DXVA2 0
++#define HAVE_OPENCL_VAAPI_BEIGNET 0
++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
++#define HAVE_PERL 1
++#define HAVE_POD2MAN 1
++#define HAVE_TEXI2HTML 0
++#define CONFIG_DOC 0
++#define CONFIG_HTMLPAGES 0
++#define CONFIG_MANPAGES 0
++#define CONFIG_PODPAGES 0
++#define CONFIG_TXTPAGES 0
++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1
++#define CONFIG_AVIO_READING_EXAMPLE 1
++#define CONFIG_DECODE_AUDIO_EXAMPLE 1
++#define CONFIG_DECODE_VIDEO_EXAMPLE 1
++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1
++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1
++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1
++#define CONFIG_EXTRACT_MVS_EXAMPLE 1
++#define CONFIG_FILTER_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0
++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1
++#define CONFIG_HW_DECODE_EXAMPLE 1
++#define CONFIG_METADATA_EXAMPLE 1
++#define CONFIG_MUXING_EXAMPLE 0
++#define CONFIG_QSVDEC_EXAMPLE 0
++#define CONFIG_REMUXING_EXAMPLE 1
++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0
++#define CONFIG_SCALING_VIDEO_EXAMPLE 0
++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0
++#define CONFIG_TRANSCODING_EXAMPLE 0
++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
++#define CONFIG_AVISYNTH 0
++#define CONFIG_FREI0R 0
++#define CONFIG_LIBCDIO 0
++#define CONFIG_LIBDAVS2 0
++#define CONFIG_LIBRUBBERBAND 0
++#define CONFIG_LIBVIDSTAB 0
++#define CONFIG_LIBX264 0
++#define CONFIG_LIBX265 0
++#define CONFIG_LIBXAVS 0
++#define CONFIG_LIBXAVS2 0
++#define CONFIG_LIBXVID 0
++#define CONFIG_DECKLINK 0
++#define CONFIG_LIBFDK_AAC 0
++#define CONFIG_OPENSSL 0
++#define CONFIG_LIBTLS 0
++#define CONFIG_GMP 0
++#define CONFIG_LIBARIBB24 0
++#define CONFIG_LIBLENSFUN 0
++#define CONFIG_LIBOPENCORE_AMRNB 0
++#define CONFIG_LIBOPENCORE_AMRWB 0
++#define CONFIG_LIBVMAF 0
++#define CONFIG_LIBVO_AMRWBENC 0
++#define CONFIG_MBEDTLS 0
++#define CONFIG_RKMPP 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_CHROMAPRINT 0
++#define CONFIG_GCRYPT 0
++#define CONFIG_GNUTLS 0
++#define CONFIG_JNI 0
++#define CONFIG_LADSPA 0
++#define CONFIG_LIBAOM 0
++#define CONFIG_LIBASS 0
++#define CONFIG_LIBBLURAY 0
++#define CONFIG_LIBBS2B 0
++#define CONFIG_LIBCACA 0
++#define CONFIG_LIBCELT 0
++#define CONFIG_LIBCODEC2 0
++#define CONFIG_LIBDAV1D 0
++#define CONFIG_LIBDC1394 0
++#define CONFIG_LIBDRM 0
++#define CONFIG_LIBFLITE 0
++#define CONFIG_LIBFONTCONFIG 0
++#define CONFIG_LIBFREETYPE 0
++#define CONFIG_LIBFRIBIDI 0
++#define CONFIG_LIBGLSLANG 0
++#define CONFIG_LIBGME 0
++#define CONFIG_LIBGSM 0
++#define CONFIG_LIBIEC61883 0
++#define CONFIG_LIBILBC 0
++#define CONFIG_LIBJACK 0
++#define CONFIG_LIBKLVANC 0
++#define CONFIG_LIBKVAZAAR 0
++#define CONFIG_LIBMODPLUG 0
++#define CONFIG_LIBMP3LAME 0
++#define CONFIG_LIBMYSOFA 0
++#define CONFIG_LIBOPENCV 0
++#define CONFIG_LIBOPENH264 0
++#define CONFIG_LIBOPENJPEG 0
++#define CONFIG_LIBOPENMPT 0
++#define CONFIG_LIBOPUS 1
++#define CONFIG_LIBPULSE 0
++#define CONFIG_LIBRABBITMQ 0
++#define CONFIG_LIBRAV1E 0
++#define CONFIG_LIBRSVG 0
++#define CONFIG_LIBRTMP 0
++#define CONFIG_LIBSHINE 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_LIBSNAPPY 0
++#define CONFIG_LIBSOXR 0
++#define CONFIG_LIBSPEEX 0
++#define CONFIG_LIBSRT 0
++#define CONFIG_LIBSSH 0
++#define CONFIG_LIBTENSORFLOW 0
++#define CONFIG_LIBTESSERACT 0
++#define CONFIG_LIBTHEORA 0
++#define CONFIG_LIBTWOLAME 0
++#define CONFIG_LIBV4L2 0
++#define CONFIG_LIBVORBIS 0
++#define CONFIG_LIBVPX 0
++#define CONFIG_LIBWAVPACK 0
++#define CONFIG_LIBWEBP 0
++#define CONFIG_LIBXML2 0
++#define CONFIG_LIBZIMG 0
++#define CONFIG_LIBZMQ 0
++#define CONFIG_LIBZVBI 0
++#define CONFIG_LV2 0
++#define CONFIG_MEDIACODEC 0
++#define CONFIG_OPENAL 0
++#define CONFIG_OPENGL 0
++#define CONFIG_POCKETSPHINX 0
++#define CONFIG_VAPOURSYNTH 0
++#define CONFIG_ALSA 0
++#define CONFIG_APPKIT 0
++#define CONFIG_AVFOUNDATION 0
++#define CONFIG_BZLIB 0
++#define CONFIG_COREIMAGE 0
++#define CONFIG_ICONV 0
++#define CONFIG_LIBXCB 0
++#define CONFIG_LIBXCB_SHM 0
++#define CONFIG_LIBXCB_SHAPE 0
++#define CONFIG_LIBXCB_XFIXES 0
++#define CONFIG_LZMA 0
++#define CONFIG_SCHANNEL 0
++#define CONFIG_SDL2 0
++#define CONFIG_SECURETRANSPORT 0
++#define CONFIG_SNDIO 0
++#define CONFIG_XLIB 0
++#define CONFIG_ZLIB 0
++#define CONFIG_CUDA_NVCC 0
++#define CONFIG_CUDA_SDK 0
++#define CONFIG_LIBNPP 0
++#define CONFIG_LIBMFX 0
++#define CONFIG_MMAL 0
++#define CONFIG_OMX 0
++#define CONFIG_OPENCL 0
++#define CONFIG_VULKAN 0
++#define CONFIG_AMF 0
++#define CONFIG_AUDIOTOOLBOX 0
++#define CONFIG_CRYSTALHD 0
++#define CONFIG_CUDA 0
++#define CONFIG_CUDA_LLVM 0
++#define CONFIG_CUVID 0
++#define CONFIG_D3D11VA 0
++#define CONFIG_DXVA2 0
++#define CONFIG_FFNVCODEC 0
++#define CONFIG_NVDEC 0
++#define CONFIG_NVENC 0
++#define CONFIG_VAAPI 0
++#define CONFIG_VDPAU 0
++#define CONFIG_VIDEOTOOLBOX 0
++#define CONFIG_V4L2_M2M 0
++#define CONFIG_XVMC 0
++#define CONFIG_FTRAPV 0
++#define CONFIG_GRAY 0
++#define CONFIG_HARDCODED_TABLES 0
++#define CONFIG_OMX_RPI 0
++#define CONFIG_RUNTIME_CPUDETECT 1
++#define CONFIG_SAFE_BITSTREAM_READER 1
++#define CONFIG_SHARED 0
++#define CONFIG_SMALL 0
++#define CONFIG_STATIC 1
++#define CONFIG_SWSCALE_ALPHA 1
++#define CONFIG_GPL 0
++#define CONFIG_NONFREE 0
++#define CONFIG_VERSION3 0
++#define CONFIG_AVDEVICE 0
++#define CONFIG_AVFILTER 0
++#define CONFIG_SWSCALE 0
++#define CONFIG_POSTPROC 0
++#define CONFIG_AVFORMAT 1
++#define CONFIG_AVCODEC 1
++#define CONFIG_SWRESAMPLE 0
++#define CONFIG_AVRESAMPLE 0
++#define CONFIG_AVUTIL 1
++#define CONFIG_FFPLAY 0
++#define CONFIG_FFPROBE 0
++#define CONFIG_FFMPEG 0
++#define CONFIG_DCT 1
++#define CONFIG_DWT 0
++#define CONFIG_ERROR_RESILIENCE 0
++#define CONFIG_FAAN 0
++#define CONFIG_FAST_UNALIGNED 0
++#define CONFIG_FFT 1
++#define CONFIG_LSP 0
++#define CONFIG_LZO 0
++#define CONFIG_MDCT 1
++#define CONFIG_PIXELUTILS 0
++#define CONFIG_NETWORK 0
++#define CONFIG_RDFT 1
++#define CONFIG_AUTODETECT 0
++#define CONFIG_FONTCONFIG 0
++#define CONFIG_LARGE_TESTS 1
++#define CONFIG_LINUX_PERF 0
++#define CONFIG_MEMORY_POISONING 0
++#define CONFIG_NEON_CLOBBER_TEST 0
++#define CONFIG_OSSFUZZ 0
++#define CONFIG_PIC 1
++#define CONFIG_THUMB 0
++#define CONFIG_VALGRIND_BACKTRACE 0
++#define CONFIG_XMM_CLOBBER_TEST 0
++#define CONFIG_BSFS 1
++#define CONFIG_DECODERS 1
++#define CONFIG_ENCODERS 0
++#define CONFIG_HWACCELS 0
++#define CONFIG_PARSERS 1
++#define CONFIG_INDEVS 0
++#define CONFIG_OUTDEVS 0
++#define CONFIG_FILTERS 0
++#define CONFIG_DEMUXERS 1
++#define CONFIG_MUXERS 0
++#define CONFIG_PROTOCOLS 0
++#define CONFIG_AANDCTTABLES 0
++#define CONFIG_AC3DSP 0
++#define CONFIG_ADTS_HEADER 1
++#define CONFIG_AUDIO_FRAME_QUEUE 0
++#define CONFIG_AUDIODSP 0
++#define CONFIG_BLOCKDSP 0
++#define CONFIG_BSWAPDSP 0
++#define CONFIG_CABAC 1
++#define CONFIG_CBS 0
++#define CONFIG_CBS_AV1 0
++#define CONFIG_CBS_H264 0
++#define CONFIG_CBS_H265 0
++#define CONFIG_CBS_JPEG 0
++#define CONFIG_CBS_MPEG2 0
++#define CONFIG_CBS_VP9 0
++#define CONFIG_DIRAC_PARSE 1
++#define CONFIG_DNN 0
++#define CONFIG_DVPROFILE 0
++#define CONFIG_EXIF 0
++#define CONFIG_FAANDCT 0
++#define CONFIG_FAANIDCT 0
++#define CONFIG_FDCTDSP 0
++#define CONFIG_FLACDSP 1
++#define CONFIG_FMTCONVERT 0
++#define CONFIG_FRAME_THREAD_ENCODER 0
++#define CONFIG_G722DSP 0
++#define CONFIG_GOLOMB 1
++#define CONFIG_GPLV3 0
++#define CONFIG_H263DSP 0
++#define CONFIG_H264CHROMA 1
++#define CONFIG_H264DSP 1
++#define CONFIG_H264PARSE 1
++#define CONFIG_H264PRED 1
++#define CONFIG_H264QPEL 1
++#define CONFIG_HEVCPARSE 0
++#define CONFIG_HPELDSP 1
++#define CONFIG_HUFFMAN 0
++#define CONFIG_HUFFYUVDSP 0
++#define CONFIG_HUFFYUVENCDSP 0
++#define CONFIG_IDCTDSP 0
++#define CONFIG_IIRFILTER 0
++#define CONFIG_MDCT15 1
++#define CONFIG_INTRAX8 0
++#define CONFIG_ISO_MEDIA 1
++#define CONFIG_IVIDSP 0
++#define CONFIG_JPEGTABLES 0
++#define CONFIG_LGPLV3 0
++#define CONFIG_LIBX262 0
++#define CONFIG_LLAUDDSP 0
++#define CONFIG_LLVIDDSP 0
++#define CONFIG_LLVIDENCDSP 0
++#define CONFIG_LPC 0
++#define CONFIG_LZF 0
++#define CONFIG_ME_CMP 0
++#define CONFIG_MPEG_ER 0
++#define CONFIG_MPEGAUDIO 1
++#define CONFIG_MPEGAUDIODSP 1
++#define CONFIG_MPEGAUDIOHEADER 1
++#define CONFIG_MPEGVIDEO 0
++#define CONFIG_MPEGVIDEOENC 0
++#define CONFIG_MSS34DSP 0
++#define CONFIG_PIXBLOCKDSP 0
++#define CONFIG_QPELDSP 0
++#define CONFIG_QSV 0
++#define CONFIG_QSVDEC 0
++#define CONFIG_QSVENC 0
++#define CONFIG_QSVVPP 0
++#define CONFIG_RANGECODER 0
++#define CONFIG_RIFFDEC 1
++#define CONFIG_RIFFENC 0
++#define CONFIG_RTPDEC 0
++#define CONFIG_RTPENC_CHAIN 0
++#define CONFIG_RV34DSP 0
++#define CONFIG_SCENE_SAD 0
++#define CONFIG_SINEWIN 1
++#define CONFIG_SNAPPY 0
++#define CONFIG_SRTP 0
++#define CONFIG_STARTCODE 1
++#define CONFIG_TEXTUREDSP 0
++#define CONFIG_TEXTUREDSPENC 0
++#define CONFIG_TPELDSP 0
++#define CONFIG_VAAPI_1 0
++#define CONFIG_VAAPI_ENCODE 0
++#define CONFIG_VC1DSP 0
++#define CONFIG_VIDEODSP 1
++#define CONFIG_VP3DSP 1
++#define CONFIG_VP56DSP 0
++#define CONFIG_VP8DSP 1
++#define CONFIG_WMA_FREQS 0
++#define CONFIG_WMV2DSP 0
++#define CONFIG_AAC_ADTSTOASC_BSF 0
++#define CONFIG_AV1_FRAME_MERGE_BSF 0
++#define CONFIG_AV1_FRAME_SPLIT_BSF 0
++#define CONFIG_AV1_METADATA_BSF 0
++#define CONFIG_CHOMP_BSF 0
++#define CONFIG_DUMP_EXTRADATA_BSF 0
++#define CONFIG_DCA_CORE_BSF 0
++#define CONFIG_EAC3_CORE_BSF 0
++#define CONFIG_EXTRACT_EXTRADATA_BSF 0
++#define CONFIG_FILTER_UNITS_BSF 0
++#define CONFIG_H264_METADATA_BSF 0
++#define CONFIG_H264_MP4TOANNEXB_BSF 0
++#define CONFIG_H264_REDUNDANT_PPS_BSF 0
++#define CONFIG_HAPQA_EXTRACT_BSF 0
++#define CONFIG_HEVC_METADATA_BSF 0
++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0
++#define CONFIG_IMX_DUMP_HEADER_BSF 0
++#define CONFIG_MJPEG2JPEG_BSF 0
++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
++#define CONFIG_MPEG2_METADATA_BSF 0
++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
++#define CONFIG_MOV2TEXTSUB_BSF 0
++#define CONFIG_NOISE_BSF 0
++#define CONFIG_NULL_BSF 1
++#define CONFIG_PRORES_METADATA_BSF 0
++#define CONFIG_REMOVE_EXTRADATA_BSF 0
++#define CONFIG_TEXT2MOVSUB_BSF 0
++#define CONFIG_TRACE_HEADERS_BSF 0
++#define CONFIG_TRUEHD_CORE_BSF 0
++#define CONFIG_VP9_METADATA_BSF 0
++#define CONFIG_VP9_RAW_REORDER_BSF 0
++#define CONFIG_VP9_SUPERFRAME_BSF 0
++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
++#define CONFIG_AASC_DECODER 0
++#define CONFIG_AIC_DECODER 0
++#define CONFIG_ALIAS_PIX_DECODER 0
++#define CONFIG_AGM_DECODER 0
++#define CONFIG_AMV_DECODER 0
++#define CONFIG_ANM_DECODER 0
++#define CONFIG_ANSI_DECODER 0
++#define CONFIG_APNG_DECODER 0
++#define CONFIG_ARBC_DECODER 0
++#define CONFIG_ASV1_DECODER 0
++#define CONFIG_ASV2_DECODER 0
++#define CONFIG_AURA_DECODER 0
++#define CONFIG_AURA2_DECODER 0
++#define CONFIG_AVRP_DECODER 0
++#define CONFIG_AVRN_DECODER 0
++#define CONFIG_AVS_DECODER 0
++#define CONFIG_AVUI_DECODER 0
++#define CONFIG_AYUV_DECODER 0
++#define CONFIG_BETHSOFTVID_DECODER 0
++#define CONFIG_BFI_DECODER 0
++#define CONFIG_BINK_DECODER 0
++#define CONFIG_BITPACKED_DECODER 0
++#define CONFIG_BMP_DECODER 0
++#define CONFIG_BMV_VIDEO_DECODER 0
++#define CONFIG_BRENDER_PIX_DECODER 0
++#define CONFIG_C93_DECODER 0
++#define CONFIG_CAVS_DECODER 0
++#define CONFIG_CDGRAPHICS_DECODER 0
++#define CONFIG_CDTOONS_DECODER 0
++#define CONFIG_CDXL_DECODER 0
++#define CONFIG_CFHD_DECODER 0
++#define CONFIG_CINEPAK_DECODER 0
++#define CONFIG_CLEARVIDEO_DECODER 0
++#define CONFIG_CLJR_DECODER 0
++#define CONFIG_CLLC_DECODER 0
++#define CONFIG_COMFORTNOISE_DECODER 0
++#define CONFIG_CPIA_DECODER 0
++#define CONFIG_CSCD_DECODER 0
++#define CONFIG_CYUV_DECODER 0
++#define CONFIG_DDS_DECODER 0
++#define CONFIG_DFA_DECODER 0
++#define CONFIG_DIRAC_DECODER 0
++#define CONFIG_DNXHD_DECODER 0
++#define CONFIG_DPX_DECODER 0
++#define CONFIG_DSICINVIDEO_DECODER 0
++#define CONFIG_DVAUDIO_DECODER 0
++#define CONFIG_DVVIDEO_DECODER 0
++#define CONFIG_DXA_DECODER 0
++#define CONFIG_DXTORY_DECODER 0
++#define CONFIG_DXV_DECODER 0
++#define CONFIG_EACMV_DECODER 0
++#define CONFIG_EAMAD_DECODER 0
++#define CONFIG_EATGQ_DECODER 0
++#define CONFIG_EATGV_DECODER 0
++#define CONFIG_EATQI_DECODER 0
++#define CONFIG_EIGHTBPS_DECODER 0
++#define CONFIG_EIGHTSVX_EXP_DECODER 0
++#define CONFIG_EIGHTSVX_FIB_DECODER 0
++#define CONFIG_ESCAPE124_DECODER 0
++#define CONFIG_ESCAPE130_DECODER 0
++#define CONFIG_EXR_DECODER 0
++#define CONFIG_FFV1_DECODER 0
++#define CONFIG_FFVHUFF_DECODER 0
++#define CONFIG_FIC_DECODER 0
++#define CONFIG_FITS_DECODER 0
++#define CONFIG_FLASHSV_DECODER 0
++#define CONFIG_FLASHSV2_DECODER 0
++#define CONFIG_FLIC_DECODER 0
++#define CONFIG_FLV_DECODER 0
++#define CONFIG_FMVC_DECODER 0
++#define CONFIG_FOURXM_DECODER 0
++#define CONFIG_FRAPS_DECODER 0
++#define CONFIG_FRWU_DECODER 0
++#define CONFIG_G2M_DECODER 0
++#define CONFIG_GDV_DECODER 0
++#define CONFIG_GIF_DECODER 0
++#define CONFIG_H261_DECODER 0
++#define CONFIG_H263_DECODER 0
++#define CONFIG_H263I_DECODER 0
++#define CONFIG_H263P_DECODER 0
++#define CONFIG_H263_V4L2M2M_DECODER 0
++#define CONFIG_H264_DECODER 1
++#define CONFIG_H264_CRYSTALHD_DECODER 0
++#define CONFIG_H264_V4L2M2M_DECODER 0
++#define CONFIG_H264_MEDIACODEC_DECODER 0
++#define CONFIG_H264_MMAL_DECODER 0
++#define CONFIG_H264_QSV_DECODER 0
++#define CONFIG_H264_RKMPP_DECODER 0
++#define CONFIG_HAP_DECODER 0
++#define CONFIG_HEVC_DECODER 0
++#define CONFIG_HEVC_QSV_DECODER 0
++#define CONFIG_HEVC_RKMPP_DECODER 0
++#define CONFIG_HEVC_V4L2M2M_DECODER 0
++#define CONFIG_HNM4_VIDEO_DECODER 0
++#define CONFIG_HQ_HQA_DECODER 0
++#define CONFIG_HQX_DECODER 0
++#define CONFIG_HUFFYUV_DECODER 0
++#define CONFIG_HYMT_DECODER 0
++#define CONFIG_IDCIN_DECODER 0
++#define CONFIG_IFF_ILBM_DECODER 0
++#define CONFIG_IMM4_DECODER 0
++#define CONFIG_IMM5_DECODER 0
++#define CONFIG_INDEO2_DECODER 0
++#define CONFIG_INDEO3_DECODER 0
++#define CONFIG_INDEO4_DECODER 0
++#define CONFIG_INDEO5_DECODER 0
++#define CONFIG_INTERPLAY_VIDEO_DECODER 0
++#define CONFIG_JPEG2000_DECODER 0
++#define CONFIG_JPEGLS_DECODER 0
++#define CONFIG_JV_DECODER 0
++#define CONFIG_KGV1_DECODER 0
++#define CONFIG_KMVC_DECODER 0
++#define CONFIG_LAGARITH_DECODER 0
++#define CONFIG_LOCO_DECODER 0
++#define CONFIG_LSCR_DECODER 0
++#define CONFIG_M101_DECODER 0
++#define CONFIG_MAGICYUV_DECODER 0
++#define CONFIG_MDEC_DECODER 0
++#define CONFIG_MIMIC_DECODER 0
++#define CONFIG_MJPEG_DECODER 0
++#define CONFIG_MJPEGB_DECODER 0
++#define CONFIG_MMVIDEO_DECODER 0
++#define CONFIG_MOTIONPIXELS_DECODER 0
++#define CONFIG_MPEG1VIDEO_DECODER 0
++#define CONFIG_MPEG2VIDEO_DECODER 0
++#define CONFIG_MPEG4_DECODER 0
++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG4_V4L2M2M_DECODER 0
++#define CONFIG_MPEG4_MMAL_DECODER 0
++#define CONFIG_MPEGVIDEO_DECODER 0
++#define CONFIG_MPEG1_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_MMAL_DECODER 0
++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG2_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_QSV_DECODER 0
++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0
++#define CONFIG_MSA1_DECODER 0
++#define CONFIG_MSCC_DECODER 0
++#define CONFIG_MSMPEG4V1_DECODER 0
++#define CONFIG_MSMPEG4V2_DECODER 0
++#define CONFIG_MSMPEG4V3_DECODER 0
++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MSRLE_DECODER 0
++#define CONFIG_MSS1_DECODER 0
++#define CONFIG_MSS2_DECODER 0
++#define CONFIG_MSVIDEO1_DECODER 0
++#define CONFIG_MSZH_DECODER 0
++#define CONFIG_MTS2_DECODER 0
++#define CONFIG_MV30_DECODER 0
++#define CONFIG_MVC1_DECODER 0
++#define CONFIG_MVC2_DECODER 0
++#define CONFIG_MVDV_DECODER 0
++#define CONFIG_MVHA_DECODER 0
++#define CONFIG_MWSC_DECODER 0
++#define CONFIG_MXPEG_DECODER 0
++#define CONFIG_NUV_DECODER 0
++#define CONFIG_PAF_VIDEO_DECODER 0
++#define CONFIG_PAM_DECODER 0
++#define CONFIG_PBM_DECODER 0
++#define CONFIG_PCX_DECODER 0
++#define CONFIG_PGM_DECODER 0
++#define CONFIG_PGMYUV_DECODER 0
++#define CONFIG_PICTOR_DECODER 0
++#define CONFIG_PIXLET_DECODER 0
++#define CONFIG_PNG_DECODER 0
++#define CONFIG_PPM_DECODER 0
++#define CONFIG_PRORES_DECODER 0
++#define CONFIG_PROSUMER_DECODER 0
++#define CONFIG_PSD_DECODER 0
++#define CONFIG_PTX_DECODER 0
++#define CONFIG_QDRAW_DECODER 0
++#define CONFIG_QPEG_DECODER 0
++#define CONFIG_QTRLE_DECODER 0
++#define CONFIG_R10K_DECODER 0
++#define CONFIG_R210_DECODER 0
++#define CONFIG_RASC_DECODER 0
++#define CONFIG_RAWVIDEO_DECODER 0
++#define CONFIG_RL2_DECODER 0
++#define CONFIG_ROQ_DECODER 0
++#define CONFIG_RPZA_DECODER 0
++#define CONFIG_RSCC_DECODER 0
++#define CONFIG_RV10_DECODER 0
++#define CONFIG_RV20_DECODER 0
++#define CONFIG_RV30_DECODER 0
++#define CONFIG_RV40_DECODER 0
++#define CONFIG_S302M_DECODER 0
++#define CONFIG_SANM_DECODER 0
++#define CONFIG_SCPR_DECODER 0
++#define CONFIG_SCREENPRESSO_DECODER 0
++#define CONFIG_SGI_DECODER 0
++#define CONFIG_SGIRLE_DECODER 0
++#define CONFIG_SHEERVIDEO_DECODER 0
++#define CONFIG_SMACKER_DECODER 0
++#define CONFIG_SMC_DECODER 0
++#define CONFIG_SMVJPEG_DECODER 0
++#define CONFIG_SNOW_DECODER 0
++#define CONFIG_SP5X_DECODER 0
++#define CONFIG_SPEEDHQ_DECODER 0
++#define CONFIG_SRGC_DECODER 0
++#define CONFIG_SUNRAST_DECODER 0
++#define CONFIG_SVQ1_DECODER 0
++#define CONFIG_SVQ3_DECODER 0
++#define CONFIG_TARGA_DECODER 0
++#define CONFIG_TARGA_Y216_DECODER 0
++#define CONFIG_TDSC_DECODER 0
++#define CONFIG_THEORA_DECODER 1
++#define CONFIG_THP_DECODER 0
++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0
++#define CONFIG_TIFF_DECODER 0
++#define CONFIG_TMV_DECODER 0
++#define CONFIG_TRUEMOTION1_DECODER 0
++#define CONFIG_TRUEMOTION2_DECODER 0
++#define CONFIG_TRUEMOTION2RT_DECODER 0
++#define CONFIG_TSCC_DECODER 0
++#define CONFIG_TSCC2_DECODER 0
++#define CONFIG_TXD_DECODER 0
++#define CONFIG_ULTI_DECODER 0
++#define CONFIG_UTVIDEO_DECODER 0
++#define CONFIG_V210_DECODER 0
++#define CONFIG_V210X_DECODER 0
++#define CONFIG_V308_DECODER 0
++#define CONFIG_V408_DECODER 0
++#define CONFIG_V410_DECODER 0
++#define CONFIG_VB_DECODER 0
++#define CONFIG_VBLE_DECODER 0
++#define CONFIG_VC1_DECODER 0
++#define CONFIG_VC1_CRYSTALHD_DECODER 0
++#define CONFIG_VC1IMAGE_DECODER 0
++#define CONFIG_VC1_MMAL_DECODER 0
++#define CONFIG_VC1_QSV_DECODER 0
++#define CONFIG_VC1_V4L2M2M_DECODER 0
++#define CONFIG_VCR1_DECODER 0
++#define CONFIG_VMDVIDEO_DECODER 0
++#define CONFIG_VMNC_DECODER 0
++#define CONFIG_VP3_DECODER 1
++#define CONFIG_VP4_DECODER 0
++#define CONFIG_VP5_DECODER 0
++#define CONFIG_VP6_DECODER 0
++#define CONFIG_VP6A_DECODER 0
++#define CONFIG_VP6F_DECODER 0
++#define CONFIG_VP7_DECODER 0
++#define CONFIG_VP8_DECODER 1
++#define CONFIG_VP8_RKMPP_DECODER 0
++#define CONFIG_VP8_V4L2M2M_DECODER 0
++#define CONFIG_VP9_DECODER 0
++#define CONFIG_VP9_RKMPP_DECODER 0
++#define CONFIG_VP9_V4L2M2M_DECODER 0
++#define CONFIG_VQA_DECODER 0
++#define CONFIG_WEBP_DECODER 0
++#define CONFIG_WCMV_DECODER 0
++#define CONFIG_WRAPPED_AVFRAME_DECODER 0
++#define CONFIG_WMV1_DECODER 0
++#define CONFIG_WMV2_DECODER 0
++#define CONFIG_WMV3_DECODER 0
++#define CONFIG_WMV3_CRYSTALHD_DECODER 0
++#define CONFIG_WMV3IMAGE_DECODER 0
++#define CONFIG_WNV1_DECODER 0
++#define CONFIG_XAN_WC3_DECODER 0
++#define CONFIG_XAN_WC4_DECODER 0
++#define CONFIG_XBM_DECODER 0
++#define CONFIG_XFACE_DECODER 0
++#define CONFIG_XL_DECODER 0
++#define CONFIG_XPM_DECODER 0
++#define CONFIG_XWD_DECODER 0
++#define CONFIG_Y41P_DECODER 0
++#define CONFIG_YLC_DECODER 0
++#define CONFIG_YOP_DECODER 0
++#define CONFIG_YUV4_DECODER 0
++#define CONFIG_ZERO12V_DECODER 0
++#define CONFIG_ZEROCODEC_DECODER 0
++#define CONFIG_ZLIB_DECODER 0
++#define CONFIG_ZMBV_DECODER 0
++#define CONFIG_AAC_DECODER 1
++#define CONFIG_AAC_FIXED_DECODER 0
++#define CONFIG_AAC_LATM_DECODER 0
++#define CONFIG_AC3_DECODER 0
++#define CONFIG_AC3_FIXED_DECODER 0
++#define CONFIG_ACELP_KELVIN_DECODER 0
++#define CONFIG_ALAC_DECODER 0
++#define CONFIG_ALS_DECODER 0
++#define CONFIG_AMRNB_DECODER 0
++#define CONFIG_AMRWB_DECODER 0
++#define CONFIG_APE_DECODER 0
++#define CONFIG_APTX_DECODER 0
++#define CONFIG_APTX_HD_DECODER 0
++#define CONFIG_ATRAC1_DECODER 0
++#define CONFIG_ATRAC3_DECODER 0
++#define CONFIG_ATRAC3AL_DECODER 0
++#define CONFIG_ATRAC3P_DECODER 0
++#define CONFIG_ATRAC3PAL_DECODER 0
++#define CONFIG_ATRAC9_DECODER 0
++#define CONFIG_BINKAUDIO_DCT_DECODER 0
++#define CONFIG_BINKAUDIO_RDFT_DECODER 0
++#define CONFIG_BMV_AUDIO_DECODER 0
++#define CONFIG_COOK_DECODER 0
++#define CONFIG_DCA_DECODER 0
++#define CONFIG_DOLBY_E_DECODER 0
++#define CONFIG_DSD_LSBF_DECODER 0
++#define CONFIG_DSD_MSBF_DECODER 0
++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0
++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0
++#define CONFIG_DSICINAUDIO_DECODER 0
++#define CONFIG_DSS_SP_DECODER 0
++#define CONFIG_DST_DECODER 0
++#define CONFIG_EAC3_DECODER 0
++#define CONFIG_EVRC_DECODER 0
++#define CONFIG_FFWAVESYNTH_DECODER 0
++#define CONFIG_FLAC_DECODER 1
++#define CONFIG_G723_1_DECODER 0
++#define CONFIG_G729_DECODER 0
++#define CONFIG_GSM_DECODER 0
++#define CONFIG_GSM_MS_DECODER 0
++#define CONFIG_HCA_DECODER 0
++#define CONFIG_HCOM_DECODER 0
++#define CONFIG_IAC_DECODER 0
++#define CONFIG_ILBC_DECODER 0
++#define CONFIG_IMC_DECODER 0
++#define CONFIG_INTERPLAY_ACM_DECODER 0
++#define CONFIG_MACE3_DECODER 0
++#define CONFIG_MACE6_DECODER 0
++#define CONFIG_METASOUND_DECODER 0
++#define CONFIG_MLP_DECODER 0
++#define CONFIG_MP1_DECODER 0
++#define CONFIG_MP1FLOAT_DECODER 0
++#define CONFIG_MP2_DECODER 0
++#define CONFIG_MP2FLOAT_DECODER 0
++#define CONFIG_MP3FLOAT_DECODER 0
++#define CONFIG_MP3_DECODER 1
++#define CONFIG_MP3ADUFLOAT_DECODER 0
++#define CONFIG_MP3ADU_DECODER 0
++#define CONFIG_MP3ON4FLOAT_DECODER 0
++#define CONFIG_MP3ON4_DECODER 0
++#define CONFIG_MPC7_DECODER 0
++#define CONFIG_MPC8_DECODER 0
++#define CONFIG_NELLYMOSER_DECODER 0
++#define CONFIG_ON2AVC_DECODER 0
++#define CONFIG_OPUS_DECODER 0
++#define CONFIG_PAF_AUDIO_DECODER 0
++#define CONFIG_QCELP_DECODER 0
++#define CONFIG_QDM2_DECODER 0
++#define CONFIG_QDMC_DECODER 0
++#define CONFIG_RA_144_DECODER 0
++#define CONFIG_RA_288_DECODER 0
++#define CONFIG_RALF_DECODER 0
++#define CONFIG_SBC_DECODER 0
++#define CONFIG_SHORTEN_DECODER 0
++#define CONFIG_SIPR_DECODER 0
++#define CONFIG_SIREN_DECODER 0
++#define CONFIG_SMACKAUD_DECODER 0
++#define CONFIG_SONIC_DECODER 0
++#define CONFIG_TAK_DECODER 0
++#define CONFIG_TRUEHD_DECODER 0
++#define CONFIG_TRUESPEECH_DECODER 0
++#define CONFIG_TTA_DECODER 0
++#define CONFIG_TWINVQ_DECODER 0
++#define CONFIG_VMDAUDIO_DECODER 0
++#define CONFIG_VORBIS_DECODER 1
++#define CONFIG_WAVPACK_DECODER 0
++#define CONFIG_WMALOSSLESS_DECODER 0
++#define CONFIG_WMAPRO_DECODER 0
++#define CONFIG_WMAV1_DECODER 0
++#define CONFIG_WMAV2_DECODER 0
++#define CONFIG_WMAVOICE_DECODER 0
++#define CONFIG_WS_SND1_DECODER 0
++#define CONFIG_XMA1_DECODER 0
++#define CONFIG_XMA2_DECODER 0
++#define CONFIG_PCM_ALAW_DECODER 1
++#define CONFIG_PCM_BLURAY_DECODER 0
++#define CONFIG_PCM_DVD_DECODER 0
++#define CONFIG_PCM_F16LE_DECODER 0
++#define CONFIG_PCM_F24LE_DECODER 0
++#define CONFIG_PCM_F32BE_DECODER 0
++#define CONFIG_PCM_F32LE_DECODER 1
++#define CONFIG_PCM_F64BE_DECODER 0
++#define CONFIG_PCM_F64LE_DECODER 0
++#define CONFIG_PCM_LXF_DECODER 0
++#define CONFIG_PCM_MULAW_DECODER 1
++#define CONFIG_PCM_S8_DECODER 0
++#define CONFIG_PCM_S8_PLANAR_DECODER 0
++#define CONFIG_PCM_S16BE_DECODER 1
++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0
++#define CONFIG_PCM_S16LE_DECODER 1
++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S24BE_DECODER 1
++#define CONFIG_PCM_S24DAUD_DECODER 0
++#define CONFIG_PCM_S24LE_DECODER 1
++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S32BE_DECODER 0
++#define CONFIG_PCM_S32LE_DECODER 1
++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S64BE_DECODER 0
++#define CONFIG_PCM_S64LE_DECODER 0
++#define CONFIG_PCM_U8_DECODER 1
++#define CONFIG_PCM_U16BE_DECODER 0
++#define CONFIG_PCM_U16LE_DECODER 0
++#define CONFIG_PCM_U24BE_DECODER 0
++#define CONFIG_PCM_U24LE_DECODER 0
++#define CONFIG_PCM_U32BE_DECODER 0
++#define CONFIG_PCM_U32LE_DECODER 0
++#define CONFIG_PCM_VIDC_DECODER 0
++#define CONFIG_DERF_DPCM_DECODER 0
++#define CONFIG_GREMLIN_DPCM_DECODER 0
++#define CONFIG_INTERPLAY_DPCM_DECODER 0
++#define CONFIG_ROQ_DPCM_DECODER 0
++#define CONFIG_SDX2_DPCM_DECODER 0
++#define CONFIG_SOL_DPCM_DECODER 0
++#define CONFIG_XAN_DPCM_DECODER 0
++#define CONFIG_ADPCM_4XM_DECODER 0
++#define CONFIG_ADPCM_ADX_DECODER 0
++#define CONFIG_ADPCM_AFC_DECODER 0
++#define CONFIG_ADPCM_AGM_DECODER 0
++#define CONFIG_ADPCM_AICA_DECODER 0
++#define CONFIG_ADPCM_ARGO_DECODER 0
++#define CONFIG_ADPCM_CT_DECODER 0
++#define CONFIG_ADPCM_DTK_DECODER 0
++#define CONFIG_ADPCM_EA_DECODER 0
++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0
++#define CONFIG_ADPCM_EA_R1_DECODER 0
++#define CONFIG_ADPCM_EA_R2_DECODER 0
++#define CONFIG_ADPCM_EA_R3_DECODER 0
++#define CONFIG_ADPCM_EA_XAS_DECODER 0
++#define CONFIG_ADPCM_G722_DECODER 0
++#define CONFIG_ADPCM_G726_DECODER 0
++#define CONFIG_ADPCM_G726LE_DECODER 0
++#define CONFIG_ADPCM_IMA_AMV_DECODER 0
++#define CONFIG_ADPCM_IMA_ALP_DECODER 0
++#define CONFIG_ADPCM_IMA_APC_DECODER 0
++#define CONFIG_ADPCM_IMA_APM_DECODER 0
++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0
++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0
++#define CONFIG_ADPCM_IMA_DK3_DECODER 0
++#define CONFIG_ADPCM_IMA_DK4_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0
++#define CONFIG_ADPCM_IMA_ISS_DECODER 0
++#define CONFIG_ADPCM_IMA_MTF_DECODER 0
++#define CONFIG_ADPCM_IMA_OKI_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_DECODER 0
++#define CONFIG_ADPCM_IMA_RAD_DECODER 0
++#define CONFIG_ADPCM_IMA_SSI_DECODER 0
++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0
++#define CONFIG_ADPCM_IMA_WAV_DECODER 0
++#define CONFIG_ADPCM_IMA_WS_DECODER 0
++#define CONFIG_ADPCM_MS_DECODER 0
++#define CONFIG_ADPCM_MTAF_DECODER 0
++#define CONFIG_ADPCM_PSX_DECODER 0
++#define CONFIG_ADPCM_SBPRO_2_DECODER 0
++#define CONFIG_ADPCM_SBPRO_3_DECODER 0
++#define CONFIG_ADPCM_SBPRO_4_DECODER 0
++#define CONFIG_ADPCM_SWF_DECODER 0
++#define CONFIG_ADPCM_THP_DECODER 0
++#define CONFIG_ADPCM_THP_LE_DECODER 0
++#define CONFIG_ADPCM_VIMA_DECODER 0
++#define CONFIG_ADPCM_XA_DECODER 0
++#define CONFIG_ADPCM_YAMAHA_DECODER 0
++#define CONFIG_ADPCM_ZORK_DECODER 0
++#define CONFIG_SSA_DECODER 0
++#define CONFIG_ASS_DECODER 0
++#define CONFIG_CCAPTION_DECODER 0
++#define CONFIG_DVBSUB_DECODER 0
++#define CONFIG_DVDSUB_DECODER 0
++#define CONFIG_JACOSUB_DECODER 0
++#define CONFIG_MICRODVD_DECODER 0
++#define CONFIG_MOVTEXT_DECODER 0
++#define CONFIG_MPL2_DECODER 0
++#define CONFIG_PGSSUB_DECODER 0
++#define CONFIG_PJS_DECODER 0
++#define CONFIG_REALTEXT_DECODER 0
++#define CONFIG_SAMI_DECODER 0
++#define CONFIG_SRT_DECODER 0
++#define CONFIG_STL_DECODER 0
++#define CONFIG_SUBRIP_DECODER 0
++#define CONFIG_SUBVIEWER_DECODER 0
++#define CONFIG_SUBVIEWER1_DECODER 0
++#define CONFIG_TEXT_DECODER 0
++#define CONFIG_VPLAYER_DECODER 0
++#define CONFIG_WEBVTT_DECODER 0
++#define CONFIG_XSUB_DECODER 0
++#define CONFIG_AAC_AT_DECODER 0
++#define CONFIG_AC3_AT_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0
++#define CONFIG_ALAC_AT_DECODER 0
++#define CONFIG_AMR_NB_AT_DECODER 0
++#define CONFIG_EAC3_AT_DECODER 0
++#define CONFIG_GSM_MS_AT_DECODER 0
++#define CONFIG_ILBC_AT_DECODER 0
++#define CONFIG_MP1_AT_DECODER 0
++#define CONFIG_MP2_AT_DECODER 0
++#define CONFIG_MP3_AT_DECODER 0
++#define CONFIG_PCM_ALAW_AT_DECODER 0
++#define CONFIG_PCM_MULAW_AT_DECODER 0
++#define CONFIG_QDMC_AT_DECODER 0
++#define CONFIG_QDM2_AT_DECODER 0
++#define CONFIG_LIBARIBB24_DECODER 0
++#define CONFIG_LIBCELT_DECODER 0
++#define CONFIG_LIBCODEC2_DECODER 0
++#define CONFIG_LIBDAV1D_DECODER 0
++#define CONFIG_LIBDAVS2_DECODER 0
++#define CONFIG_LIBFDK_AAC_DECODER 0
++#define CONFIG_LIBGSM_DECODER 0
++#define CONFIG_LIBGSM_MS_DECODER 0
++#define CONFIG_LIBILBC_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0
++#define CONFIG_LIBOPENJPEG_DECODER 0
++#define CONFIG_LIBOPUS_DECODER 1
++#define CONFIG_LIBRSVG_DECODER 0
++#define CONFIG_LIBSPEEX_DECODER 0
++#define CONFIG_LIBVORBIS_DECODER 0
++#define CONFIG_LIBVPX_VP8_DECODER 0
++#define CONFIG_LIBVPX_VP9_DECODER 0
++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0
++#define CONFIG_BINTEXT_DECODER 0
++#define CONFIG_XBIN_DECODER 0
++#define CONFIG_IDF_DECODER 0
++#define CONFIG_LIBAOM_AV1_DECODER 0
++#define CONFIG_LIBOPENH264_DECODER 0
++#define CONFIG_H264_CUVID_DECODER 0
++#define CONFIG_HEVC_CUVID_DECODER 0
++#define CONFIG_HEVC_MEDIACODEC_DECODER 0
++#define CONFIG_MJPEG_CUVID_DECODER 0
++#define CONFIG_MJPEG_QSV_DECODER 0
++#define CONFIG_MPEG1_CUVID_DECODER 0
++#define CONFIG_MPEG2_CUVID_DECODER 0
++#define CONFIG_MPEG4_CUVID_DECODER 0
++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0
++#define CONFIG_VC1_CUVID_DECODER 0
++#define CONFIG_VP8_CUVID_DECODER 0
++#define CONFIG_VP8_MEDIACODEC_DECODER 0
++#define CONFIG_VP8_QSV_DECODER 0
++#define CONFIG_VP9_CUVID_DECODER 0
++#define CONFIG_VP9_MEDIACODEC_DECODER 0
++#define CONFIG_VP9_QSV_DECODER 0
++#define CONFIG_A64MULTI_ENCODER 0
++#define CONFIG_A64MULTI5_ENCODER 0
++#define CONFIG_ALIAS_PIX_ENCODER 0
++#define CONFIG_AMV_ENCODER 0
++#define CONFIG_APNG_ENCODER 0
++#define CONFIG_ASV1_ENCODER 0
++#define CONFIG_ASV2_ENCODER 0
++#define CONFIG_AVRP_ENCODER 0
++#define CONFIG_AVUI_ENCODER 0
++#define CONFIG_AYUV_ENCODER 0
++#define CONFIG_BMP_ENCODER 0
++#define CONFIG_CINEPAK_ENCODER 0
++#define CONFIG_CLJR_ENCODER 0
++#define CONFIG_COMFORTNOISE_ENCODER 0
++#define CONFIG_DNXHD_ENCODER 0
++#define CONFIG_DPX_ENCODER 0
++#define CONFIG_DVVIDEO_ENCODER 0
++#define CONFIG_FFV1_ENCODER 0
++#define CONFIG_FFVHUFF_ENCODER 0
++#define CONFIG_FITS_ENCODER 0
++#define CONFIG_FLASHSV_ENCODER 0
++#define CONFIG_FLASHSV2_ENCODER 0
++#define CONFIG_FLV_ENCODER 0
++#define CONFIG_GIF_ENCODER 0
++#define CONFIG_H261_ENCODER 0
++#define CONFIG_H263_ENCODER 0
++#define CONFIG_H263P_ENCODER 0
++#define CONFIG_HAP_ENCODER 0
++#define CONFIG_HUFFYUV_ENCODER 0
++#define CONFIG_JPEG2000_ENCODER 0
++#define CONFIG_JPEGLS_ENCODER 0
++#define CONFIG_LJPEG_ENCODER 0
++#define CONFIG_MAGICYUV_ENCODER 0
++#define CONFIG_MJPEG_ENCODER 0
++#define CONFIG_MPEG1VIDEO_ENCODER 0
++#define CONFIG_MPEG2VIDEO_ENCODER 0
++#define CONFIG_MPEG4_ENCODER 0
++#define CONFIG_MSMPEG4V2_ENCODER 0
++#define CONFIG_MSMPEG4V3_ENCODER 0
++#define CONFIG_MSVIDEO1_ENCODER 0
++#define CONFIG_PAM_ENCODER 0
++#define CONFIG_PBM_ENCODER 0
++#define CONFIG_PCX_ENCODER 0
++#define CONFIG_PGM_ENCODER 0
++#define CONFIG_PGMYUV_ENCODER 0
++#define CONFIG_PNG_ENCODER 0
++#define CONFIG_PPM_ENCODER 0
++#define CONFIG_PRORES_ENCODER 0
++#define CONFIG_PRORES_AW_ENCODER 0
++#define CONFIG_PRORES_KS_ENCODER 0
++#define CONFIG_QTRLE_ENCODER 0
++#define CONFIG_R10K_ENCODER 0
++#define CONFIG_R210_ENCODER 0
++#define CONFIG_RAWVIDEO_ENCODER 0
++#define CONFIG_ROQ_ENCODER 0
++#define CONFIG_RV10_ENCODER 0
++#define CONFIG_RV20_ENCODER 0
++#define CONFIG_S302M_ENCODER 0
++#define CONFIG_SGI_ENCODER 0
++#define CONFIG_SNOW_ENCODER 0
++#define CONFIG_SUNRAST_ENCODER 0
++#define CONFIG_SVQ1_ENCODER 0
++#define CONFIG_TARGA_ENCODER 0
++#define CONFIG_TIFF_ENCODER 0
++#define CONFIG_UTVIDEO_ENCODER 0
++#define CONFIG_V210_ENCODER 0
++#define CONFIG_V308_ENCODER 0
++#define CONFIG_V408_ENCODER 0
++#define CONFIG_V410_ENCODER 0
++#define CONFIG_VC2_ENCODER 0
++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
++#define CONFIG_WMV1_ENCODER 0
++#define CONFIG_WMV2_ENCODER 0
++#define CONFIG_XBM_ENCODER 0
++#define CONFIG_XFACE_ENCODER 0
++#define CONFIG_XWD_ENCODER 0
++#define CONFIG_Y41P_ENCODER 0
++#define CONFIG_YUV4_ENCODER 0
++#define CONFIG_ZLIB_ENCODER 0
++#define CONFIG_ZMBV_ENCODER 0
++#define CONFIG_AAC_ENCODER 0
++#define CONFIG_AC3_ENCODER 0
++#define CONFIG_AC3_FIXED_ENCODER 0
++#define CONFIG_ALAC_ENCODER 0
++#define CONFIG_APTX_ENCODER 0
++#define CONFIG_APTX_HD_ENCODER 0
++#define CONFIG_DCA_ENCODER 0
++#define CONFIG_EAC3_ENCODER 0
++#define CONFIG_FLAC_ENCODER 0
++#define CONFIG_G723_1_ENCODER 0
++#define CONFIG_MLP_ENCODER 0
++#define CONFIG_MP2_ENCODER 0
++#define CONFIG_MP2FIXED_ENCODER 0
++#define CONFIG_NELLYMOSER_ENCODER 0
++#define CONFIG_OPUS_ENCODER 0
++#define CONFIG_RA_144_ENCODER 0
++#define CONFIG_SBC_ENCODER 0
++#define CONFIG_SONIC_ENCODER 0
++#define CONFIG_SONIC_LS_ENCODER 0
++#define CONFIG_TRUEHD_ENCODER 0
++#define CONFIG_TTA_ENCODER 0
++#define CONFIG_VORBIS_ENCODER 0
++#define CONFIG_WAVPACK_ENCODER 0
++#define CONFIG_WMAV1_ENCODER 0
++#define CONFIG_WMAV2_ENCODER 0
++#define CONFIG_PCM_ALAW_ENCODER 0
++#define CONFIG_PCM_DVD_ENCODER 0
++#define CONFIG_PCM_F32BE_ENCODER 0
++#define CONFIG_PCM_F32LE_ENCODER 0
++#define CONFIG_PCM_F64BE_ENCODER 0
++#define CONFIG_PCM_F64LE_ENCODER 0
++#define CONFIG_PCM_MULAW_ENCODER 0
++#define CONFIG_PCM_S8_ENCODER 0
++#define CONFIG_PCM_S8_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16BE_ENCODER 0
++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16LE_ENCODER 0
++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S24BE_ENCODER 0
++#define CONFIG_PCM_S24DAUD_ENCODER 0
++#define CONFIG_PCM_S24LE_ENCODER 0
++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S32BE_ENCODER 0
++#define CONFIG_PCM_S32LE_ENCODER 0
++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S64BE_ENCODER 0
++#define CONFIG_PCM_S64LE_ENCODER 0
++#define CONFIG_PCM_U8_ENCODER 0
++#define CONFIG_PCM_U16BE_ENCODER 0
++#define CONFIG_PCM_U16LE_ENCODER 0
++#define CONFIG_PCM_U24BE_ENCODER 0
++#define CONFIG_PCM_U24LE_ENCODER 0
++#define CONFIG_PCM_U32BE_ENCODER 0
++#define CONFIG_PCM_U32LE_ENCODER 0
++#define CONFIG_PCM_VIDC_ENCODER 0
++#define CONFIG_ROQ_DPCM_ENCODER 0
++#define CONFIG_ADPCM_ADX_ENCODER 0
++#define CONFIG_ADPCM_G722_ENCODER 0
++#define CONFIG_ADPCM_G726_ENCODER 0
++#define CONFIG_ADPCM_G726LE_ENCODER 0
++#define CONFIG_ADPCM_IMA_QT_ENCODER 0
++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
++#define CONFIG_ADPCM_MS_ENCODER 0
++#define CONFIG_ADPCM_SWF_ENCODER 0
++#define CONFIG_ADPCM_YAMAHA_ENCODER 0
++#define CONFIG_SSA_ENCODER 0
++#define CONFIG_ASS_ENCODER 0
++#define CONFIG_DVBSUB_ENCODER 0
++#define CONFIG_DVDSUB_ENCODER 0
++#define CONFIG_MOVTEXT_ENCODER 0
++#define CONFIG_SRT_ENCODER 0
++#define CONFIG_SUBRIP_ENCODER 0
++#define CONFIG_TEXT_ENCODER 0
++#define CONFIG_WEBVTT_ENCODER 0
++#define CONFIG_XSUB_ENCODER 0
++#define CONFIG_AAC_AT_ENCODER 0
++#define CONFIG_ALAC_AT_ENCODER 0
++#define CONFIG_ILBC_AT_ENCODER 0
++#define CONFIG_PCM_ALAW_AT_ENCODER 0
++#define CONFIG_PCM_MULAW_AT_ENCODER 0
++#define CONFIG_LIBAOM_AV1_ENCODER 0
++#define CONFIG_LIBCODEC2_ENCODER 0
++#define CONFIG_LIBFDK_AAC_ENCODER 0
++#define CONFIG_LIBGSM_ENCODER 0
++#define CONFIG_LIBGSM_MS_ENCODER 0
++#define CONFIG_LIBILBC_ENCODER 0
++#define CONFIG_LIBMP3LAME_ENCODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
++#define CONFIG_LIBOPENJPEG_ENCODER 0
++#define CONFIG_LIBOPUS_ENCODER 0
++#define CONFIG_LIBRAV1E_ENCODER 0
++#define CONFIG_LIBSHINE_ENCODER 0
++#define CONFIG_LIBSPEEX_ENCODER 0
++#define CONFIG_LIBTHEORA_ENCODER 0
++#define CONFIG_LIBTWOLAME_ENCODER 0
++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
++#define CONFIG_LIBVORBIS_ENCODER 0
++#define CONFIG_LIBVPX_VP8_ENCODER 0
++#define CONFIG_LIBVPX_VP9_ENCODER 0
++#define CONFIG_LIBWAVPACK_ENCODER 0
++#define CONFIG_LIBWEBP_ANIM_ENCODER 0
++#define CONFIG_LIBWEBP_ENCODER 0
++#define CONFIG_LIBX262_ENCODER 0
++#define CONFIG_LIBX264_ENCODER 0
++#define CONFIG_LIBX264RGB_ENCODER 0
++#define CONFIG_LIBX265_ENCODER 0
++#define CONFIG_LIBXAVS_ENCODER 0
++#define CONFIG_LIBXAVS2_ENCODER 0
++#define CONFIG_LIBXVID_ENCODER 0
++#define CONFIG_H263_V4L2M2M_ENCODER 0
++#define CONFIG_LIBOPENH264_ENCODER 0
++#define CONFIG_H264_AMF_ENCODER 0
++#define CONFIG_H264_NVENC_ENCODER 0
++#define CONFIG_H264_OMX_ENCODER 0
++#define CONFIG_H264_QSV_ENCODER 0
++#define CONFIG_H264_V4L2M2M_ENCODER 0
++#define CONFIG_H264_VAAPI_ENCODER 0
++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_NVENC_ENCODER 0
++#define CONFIG_NVENC_H264_ENCODER 0
++#define CONFIG_NVENC_HEVC_ENCODER 0
++#define CONFIG_HEVC_AMF_ENCODER 0
++#define CONFIG_HEVC_NVENC_ENCODER 0
++#define CONFIG_HEVC_QSV_ENCODER 0
++#define CONFIG_HEVC_V4L2M2M_ENCODER 0
++#define CONFIG_HEVC_VAAPI_ENCODER 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_LIBKVAZAAR_ENCODER 0
++#define CONFIG_MJPEG_QSV_ENCODER 0
++#define CONFIG_MJPEG_VAAPI_ENCODER 0
++#define CONFIG_MPEG2_QSV_ENCODER 0
++#define CONFIG_MPEG2_VAAPI_ENCODER 0
++#define CONFIG_MPEG4_OMX_ENCODER 0
++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_VAAPI_ENCODER 0
++#define CONFIG_VP9_VAAPI_ENCODER 0
++#define CONFIG_VP9_QSV_ENCODER 0
++#define CONFIG_H263_VAAPI_HWACCEL 0
++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_H264_D3D11VA_HWACCEL 0
++#define CONFIG_H264_D3D11VA2_HWACCEL 0
++#define CONFIG_H264_DXVA2_HWACCEL 0
++#define CONFIG_H264_NVDEC_HWACCEL 0
++#define CONFIG_H264_VAAPI_HWACCEL 0
++#define CONFIG_H264_VDPAU_HWACCEL 0
++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
++#define CONFIG_HEVC_DXVA2_HWACCEL 0
++#define CONFIG_HEVC_NVDEC_HWACCEL 0
++#define CONFIG_HEVC_VAAPI_HWACCEL 0
++#define CONFIG_HEVC_VDPAU_HWACCEL 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MJPEG_NVDEC_HWACCEL 0
++#define CONFIG_MJPEG_VAAPI_HWACCEL 0
++#define CONFIG_MPEG1_NVDEC_HWACCEL 0
++#define CONFIG_MPEG1_VDPAU_HWACCEL 0
++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG1_XVMC_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
++#define CONFIG_MPEG2_NVDEC_HWACCEL 0
++#define CONFIG_MPEG2_DXVA2_HWACCEL 0
++#define CONFIG_MPEG2_VAAPI_HWACCEL 0
++#define CONFIG_MPEG2_VDPAU_HWACCEL 0
++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG2_XVMC_HWACCEL 0
++#define CONFIG_MPEG4_NVDEC_HWACCEL 0
++#define CONFIG_MPEG4_VAAPI_HWACCEL 0
++#define CONFIG_MPEG4_VDPAU_HWACCEL 0
++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_VC1_D3D11VA_HWACCEL 0
++#define CONFIG_VC1_D3D11VA2_HWACCEL 0
++#define CONFIG_VC1_DXVA2_HWACCEL 0
++#define CONFIG_VC1_NVDEC_HWACCEL 0
++#define CONFIG_VC1_VAAPI_HWACCEL 0
++#define CONFIG_VC1_VDPAU_HWACCEL 0
++#define CONFIG_VP8_NVDEC_HWACCEL 0
++#define CONFIG_VP8_VAAPI_HWACCEL 0
++#define CONFIG_VP9_D3D11VA_HWACCEL 0
++#define CONFIG_VP9_D3D11VA2_HWACCEL 0
++#define CONFIG_VP9_DXVA2_HWACCEL 0
++#define CONFIG_VP9_NVDEC_HWACCEL 0
++#define CONFIG_VP9_VAAPI_HWACCEL 0
++#define CONFIG_VP9_VDPAU_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
++#define CONFIG_WMV3_DXVA2_HWACCEL 0
++#define CONFIG_WMV3_NVDEC_HWACCEL 0
++#define CONFIG_WMV3_VAAPI_HWACCEL 0
++#define CONFIG_WMV3_VDPAU_HWACCEL 0
++#define CONFIG_AAC_PARSER 1
++#define CONFIG_AAC_LATM_PARSER 0
++#define CONFIG_AC3_PARSER 0
++#define CONFIG_ADX_PARSER 0
++#define CONFIG_AV1_PARSER 0
++#define CONFIG_AVS2_PARSER 0
++#define CONFIG_BMP_PARSER 0
++#define CONFIG_CAVSVIDEO_PARSER 0
++#define CONFIG_COOK_PARSER 0
++#define CONFIG_DCA_PARSER 0
++#define CONFIG_DIRAC_PARSER 0
++#define CONFIG_DNXHD_PARSER 0
++#define CONFIG_DPX_PARSER 0
++#define CONFIG_DVAUDIO_PARSER 0
++#define CONFIG_DVBSUB_PARSER 0
++#define CONFIG_DVDSUB_PARSER 0
++#define CONFIG_DVD_NAV_PARSER 0
++#define CONFIG_FLAC_PARSER 1
++#define CONFIG_G723_1_PARSER 0
++#define CONFIG_G729_PARSER 0
++#define CONFIG_GIF_PARSER 0
++#define CONFIG_GSM_PARSER 0
++#define CONFIG_H261_PARSER 0
++#define CONFIG_H263_PARSER 0
++#define CONFIG_H264_PARSER 1
++#define CONFIG_HEVC_PARSER 0
++#define CONFIG_MJPEG_PARSER 0
++#define CONFIG_MLP_PARSER 0
++#define CONFIG_MPEG4VIDEO_PARSER 0
++#define CONFIG_MPEGAUDIO_PARSER 1
++#define CONFIG_MPEGVIDEO_PARSER 0
++#define CONFIG_OPUS_PARSER 1
++#define CONFIG_PNG_PARSER 0
++#define CONFIG_PNM_PARSER 0
++#define CONFIG_RV30_PARSER 0
++#define CONFIG_RV40_PARSER 0
++#define CONFIG_SBC_PARSER 0
++#define CONFIG_SIPR_PARSER 0
++#define CONFIG_TAK_PARSER 0
++#define CONFIG_VC1_PARSER 0
++#define CONFIG_VORBIS_PARSER 1
++#define CONFIG_VP3_PARSER 1
++#define CONFIG_VP8_PARSER 1
++#define CONFIG_VP9_PARSER 1
++#define CONFIG_WEBP_PARSER 0
++#define CONFIG_XMA_PARSER 0
++#define CONFIG_ALSA_INDEV 0
++#define CONFIG_ANDROID_CAMERA_INDEV 0
++#define CONFIG_AVFOUNDATION_INDEV 0
++#define CONFIG_BKTR_INDEV 0
++#define CONFIG_DECKLINK_INDEV 0
++#define CONFIG_DSHOW_INDEV 0
++#define CONFIG_FBDEV_INDEV 0
++#define CONFIG_GDIGRAB_INDEV 0
++#define CONFIG_IEC61883_INDEV 0
++#define CONFIG_JACK_INDEV 0
++#define CONFIG_KMSGRAB_INDEV 0
++#define CONFIG_LAVFI_INDEV 0
++#define CONFIG_OPENAL_INDEV 0
++#define CONFIG_OSS_INDEV 0
++#define CONFIG_PULSE_INDEV 0
++#define CONFIG_SNDIO_INDEV 0
++#define CONFIG_V4L2_INDEV 0
++#define CONFIG_VFWCAP_INDEV 0
++#define CONFIG_XCBGRAB_INDEV 0
++#define CONFIG_LIBCDIO_INDEV 0
++#define CONFIG_LIBDC1394_INDEV 0
++#define CONFIG_ALSA_OUTDEV 0
++#define CONFIG_CACA_OUTDEV 0
++#define CONFIG_DECKLINK_OUTDEV 0
++#define CONFIG_FBDEV_OUTDEV 0
++#define CONFIG_OPENGL_OUTDEV 0
++#define CONFIG_OSS_OUTDEV 0
++#define CONFIG_PULSE_OUTDEV 0
++#define CONFIG_SDL2_OUTDEV 0
++#define CONFIG_SNDIO_OUTDEV 0
++#define CONFIG_V4L2_OUTDEV 0
++#define CONFIG_XV_OUTDEV 0
++#define CONFIG_ABENCH_FILTER 0
++#define CONFIG_ACOMPRESSOR_FILTER 0
++#define CONFIG_ACONTRAST_FILTER 0
++#define CONFIG_ACOPY_FILTER 0
++#define CONFIG_ACUE_FILTER 0
++#define CONFIG_ACROSSFADE_FILTER 0
++#define CONFIG_ACROSSOVER_FILTER 0
++#define CONFIG_ACRUSHER_FILTER 0
++#define CONFIG_ADECLICK_FILTER 0
++#define CONFIG_ADECLIP_FILTER 0
++#define CONFIG_ADELAY_FILTER 0
++#define CONFIG_ADERIVATIVE_FILTER 0
++#define CONFIG_AECHO_FILTER 0
++#define CONFIG_AEMPHASIS_FILTER 0
++#define CONFIG_AEVAL_FILTER 0
++#define CONFIG_AFADE_FILTER 0
++#define CONFIG_AFFTDN_FILTER 0
++#define CONFIG_AFFTFILT_FILTER 0
++#define CONFIG_AFIR_FILTER 0
++#define CONFIG_AFORMAT_FILTER 0
++#define CONFIG_AGATE_FILTER 0
++#define CONFIG_AIIR_FILTER 0
++#define CONFIG_AINTEGRAL_FILTER 0
++#define CONFIG_AINTERLEAVE_FILTER 0
++#define CONFIG_ALIMITER_FILTER 0
++#define CONFIG_ALLPASS_FILTER 0
++#define CONFIG_ALOOP_FILTER 0
++#define CONFIG_AMERGE_FILTER 0
++#define CONFIG_AMETADATA_FILTER 0
++#define CONFIG_AMIX_FILTER 0
++#define CONFIG_AMULTIPLY_FILTER 0
++#define CONFIG_ANEQUALIZER_FILTER 0
++#define CONFIG_ANLMDN_FILTER 0
++#define CONFIG_ANLMS_FILTER 0
++#define CONFIG_ANULL_FILTER 0
++#define CONFIG_APAD_FILTER 0
++#define CONFIG_APERMS_FILTER 0
++#define CONFIG_APHASER_FILTER 0
++#define CONFIG_APULSATOR_FILTER 0
++#define CONFIG_AREALTIME_FILTER 0
++#define CONFIG_ARESAMPLE_FILTER 0
++#define CONFIG_AREVERSE_FILTER 0
++#define CONFIG_ARNNDN_FILTER 0
++#define CONFIG_ASELECT_FILTER 0
++#define CONFIG_ASENDCMD_FILTER 0
++#define CONFIG_ASETNSAMPLES_FILTER 0
++#define CONFIG_ASETPTS_FILTER 0
++#define CONFIG_ASETRATE_FILTER 0
++#define CONFIG_ASETTB_FILTER 0
++#define CONFIG_ASHOWINFO_FILTER 0
++#define CONFIG_ASIDEDATA_FILTER 0
++#define CONFIG_ASOFTCLIP_FILTER 0
++#define CONFIG_ASPLIT_FILTER 0
++#define CONFIG_ASR_FILTER 0
++#define CONFIG_ASTATS_FILTER 0
++#define CONFIG_ASTREAMSELECT_FILTER 0
++#define CONFIG_ATEMPO_FILTER 0
++#define CONFIG_ATRIM_FILTER 0
++#define CONFIG_AXCORRELATE_FILTER 0
++#define CONFIG_AZMQ_FILTER 0
++#define CONFIG_BANDPASS_FILTER 0
++#define CONFIG_BANDREJECT_FILTER 0
++#define CONFIG_BASS_FILTER 0
++#define CONFIG_BIQUAD_FILTER 0
++#define CONFIG_BS2B_FILTER 0
++#define CONFIG_CHROMABER_VULKAN_FILTER 0
++#define CONFIG_CHANNELMAP_FILTER 0
++#define CONFIG_CHANNELSPLIT_FILTER 0
++#define CONFIG_CHORUS_FILTER 0
++#define CONFIG_COMPAND_FILTER 0
++#define CONFIG_COMPENSATIONDELAY_FILTER 0
++#define CONFIG_CROSSFEED_FILTER 0
++#define CONFIG_CRYSTALIZER_FILTER 0
++#define CONFIG_DCSHIFT_FILTER 0
++#define CONFIG_DEESSER_FILTER 0
++#define CONFIG_DRMETER_FILTER 0
++#define CONFIG_DYNAUDNORM_FILTER 0
++#define CONFIG_EARWAX_FILTER 0
++#define CONFIG_EBUR128_FILTER 0
++#define CONFIG_EQUALIZER_FILTER 0
++#define CONFIG_EXTRASTEREO_FILTER 0
++#define CONFIG_FIREQUALIZER_FILTER 0
++#define CONFIG_FLANGER_FILTER 0
++#define CONFIG_HAAS_FILTER 0
++#define CONFIG_HDCD_FILTER 0
++#define CONFIG_HEADPHONE_FILTER 0
++#define CONFIG_HIGHPASS_FILTER 0
++#define CONFIG_HIGHSHELF_FILTER 0
++#define CONFIG_JOIN_FILTER 0
++#define CONFIG_LADSPA_FILTER 0
++#define CONFIG_LOUDNORM_FILTER 0
++#define CONFIG_LOWPASS_FILTER 0
++#define CONFIG_LOWSHELF_FILTER 0
++#define CONFIG_LV2_FILTER 0
++#define CONFIG_MCOMPAND_FILTER 0
++#define CONFIG_PAN_FILTER 0
++#define CONFIG_REPLAYGAIN_FILTER 0
++#define CONFIG_RESAMPLE_FILTER 0
++#define CONFIG_RUBBERBAND_FILTER 0
++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
++#define CONFIG_SIDECHAINGATE_FILTER 0
++#define CONFIG_SILENCEDETECT_FILTER 0
++#define CONFIG_SILENCEREMOVE_FILTER 0
++#define CONFIG_SOFALIZER_FILTER 0
++#define CONFIG_STEREOTOOLS_FILTER 0
++#define CONFIG_STEREOWIDEN_FILTER 0
++#define CONFIG_SUPEREQUALIZER_FILTER 0
++#define CONFIG_SURROUND_FILTER 0
++#define CONFIG_TREBLE_FILTER 0
++#define CONFIG_TREMOLO_FILTER 0
++#define CONFIG_VIBRATO_FILTER 0
++#define CONFIG_VOLUME_FILTER 0
++#define CONFIG_VOLUMEDETECT_FILTER 0
++#define CONFIG_AEVALSRC_FILTER 0
++#define CONFIG_AFIRSRC_FILTER 0
++#define CONFIG_ANOISESRC_FILTER 0
++#define CONFIG_ANULLSRC_FILTER 0
++#define CONFIG_FLITE_FILTER 0
++#define CONFIG_HILBERT_FILTER 0
++#define CONFIG_SINC_FILTER 0
++#define CONFIG_SINE_FILTER 0
++#define CONFIG_ANULLSINK_FILTER 0
++#define CONFIG_ADDROI_FILTER 0
++#define CONFIG_ALPHAEXTRACT_FILTER 0
++#define CONFIG_ALPHAMERGE_FILTER 0
++#define CONFIG_AMPLIFY_FILTER 0
++#define CONFIG_ASS_FILTER 0
++#define CONFIG_ATADENOISE_FILTER 0
++#define CONFIG_AVGBLUR_FILTER 0
++#define CONFIG_AVGBLUR_OPENCL_FILTER 0
++#define CONFIG_AVGBLUR_VULKAN_FILTER 0
++#define CONFIG_BBOX_FILTER 0
++#define CONFIG_BENCH_FILTER 0
++#define CONFIG_BILATERAL_FILTER 0
++#define CONFIG_BITPLANENOISE_FILTER 0
++#define CONFIG_BLACKDETECT_FILTER 0
++#define CONFIG_BLACKFRAME_FILTER 0
++#define CONFIG_BLEND_FILTER 0
++#define CONFIG_BM3D_FILTER 0
++#define CONFIG_BOXBLUR_FILTER 0
++#define CONFIG_BOXBLUR_OPENCL_FILTER 0
++#define CONFIG_BWDIF_FILTER 0
++#define CONFIG_CAS_FILTER 0
++#define CONFIG_CHROMAHOLD_FILTER 0
++#define CONFIG_CHROMAKEY_FILTER 0
++#define CONFIG_CHROMASHIFT_FILTER 0
++#define CONFIG_CIESCOPE_FILTER 0
++#define CONFIG_CODECVIEW_FILTER 0
++#define CONFIG_COLORBALANCE_FILTER 0
++#define CONFIG_COLORCHANNELMIXER_FILTER 0
++#define CONFIG_COLORKEY_FILTER 0
++#define CONFIG_COLORKEY_OPENCL_FILTER 0
++#define CONFIG_COLORHOLD_FILTER 0
++#define CONFIG_COLORLEVELS_FILTER 0
++#define CONFIG_COLORMATRIX_FILTER 0
++#define CONFIG_COLORSPACE_FILTER 0
++#define CONFIG_CONVOLUTION_FILTER 0
++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
++#define CONFIG_CONVOLVE_FILTER 0
++#define CONFIG_COPY_FILTER 0
++#define CONFIG_COREIMAGE_FILTER 0
++#define CONFIG_COVER_RECT_FILTER 0
++#define CONFIG_CROP_FILTER 0
++#define CONFIG_CROPDETECT_FILTER 0
++#define CONFIG_CUE_FILTER 0
++#define CONFIG_CURVES_FILTER 0
++#define CONFIG_DATASCOPE_FILTER 0
++#define CONFIG_DCTDNOIZ_FILTER 0
++#define CONFIG_DEBAND_FILTER 0
++#define CONFIG_DEBLOCK_FILTER 0
++#define CONFIG_DECIMATE_FILTER 0
++#define CONFIG_DECONVOLVE_FILTER 0
++#define CONFIG_DEDOT_FILTER 0
++#define CONFIG_DEFLATE_FILTER 0
++#define CONFIG_DEFLICKER_FILTER 0
++#define CONFIG_DEINTERLACE_QSV_FILTER 0
++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
++#define CONFIG_DEJUDDER_FILTER 0
++#define CONFIG_DELOGO_FILTER 0
++#define CONFIG_DENOISE_VAAPI_FILTER 0
++#define CONFIG_DERAIN_FILTER 0
++#define CONFIG_DESHAKE_FILTER 0
++#define CONFIG_DESHAKE_OPENCL_FILTER 0
++#define CONFIG_DESPILL_FILTER 0
++#define CONFIG_DETELECINE_FILTER 0
++#define CONFIG_DILATION_FILTER 0
++#define CONFIG_DILATION_OPENCL_FILTER 0
++#define CONFIG_DISPLACE_FILTER 0
++#define CONFIG_DNN_PROCESSING_FILTER 0
++#define CONFIG_DOUBLEWEAVE_FILTER 0
++#define CONFIG_DRAWBOX_FILTER 0
++#define CONFIG_DRAWGRAPH_FILTER 0
++#define CONFIG_DRAWGRID_FILTER 0
++#define CONFIG_DRAWTEXT_FILTER 0
++#define CONFIG_EDGEDETECT_FILTER 0
++#define CONFIG_ELBG_FILTER 0
++#define CONFIG_ENTROPY_FILTER 0
++#define CONFIG_EQ_FILTER 0
++#define CONFIG_EROSION_FILTER 0
++#define CONFIG_EROSION_OPENCL_FILTER 0
++#define CONFIG_EXTRACTPLANES_FILTER 0
++#define CONFIG_FADE_FILTER 0
++#define CONFIG_FFTDNOIZ_FILTER 0
++#define CONFIG_FFTFILT_FILTER 0
++#define CONFIG_FIELD_FILTER 0
++#define CONFIG_FIELDHINT_FILTER 0
++#define CONFIG_FIELDMATCH_FILTER 0
++#define CONFIG_FIELDORDER_FILTER 0
++#define CONFIG_FILLBORDERS_FILTER 0
++#define CONFIG_FIND_RECT_FILTER 0
++#define CONFIG_FLOODFILL_FILTER 0
++#define CONFIG_FORMAT_FILTER 0
++#define CONFIG_FPS_FILTER 0
++#define CONFIG_FRAMEPACK_FILTER 0
++#define CONFIG_FRAMERATE_FILTER 0
++#define CONFIG_FRAMESTEP_FILTER 0
++#define CONFIG_FREEZEDETECT_FILTER 0
++#define CONFIG_FREEZEFRAMES_FILTER 0
++#define CONFIG_FREI0R_FILTER 0
++#define CONFIG_FSPP_FILTER 0
++#define CONFIG_GBLUR_FILTER 0
++#define CONFIG_GEQ_FILTER 0
++#define CONFIG_GRADFUN_FILTER 0
++#define CONFIG_GRAPHMONITOR_FILTER 0
++#define CONFIG_GREYEDGE_FILTER 0
++#define CONFIG_HALDCLUT_FILTER 0
++#define CONFIG_HFLIP_FILTER 0
++#define CONFIG_HISTEQ_FILTER 0
++#define CONFIG_HISTOGRAM_FILTER 0
++#define CONFIG_HQDN3D_FILTER 0
++#define CONFIG_HQX_FILTER 0
++#define CONFIG_HSTACK_FILTER 0
++#define CONFIG_HUE_FILTER 0
++#define CONFIG_HWDOWNLOAD_FILTER 0
++#define CONFIG_HWMAP_FILTER 0
++#define CONFIG_HWUPLOAD_FILTER 0
++#define CONFIG_HWUPLOAD_CUDA_FILTER 0
++#define CONFIG_HYSTERESIS_FILTER 0
++#define CONFIG_IDET_FILTER 0
++#define CONFIG_IL_FILTER 0
++#define CONFIG_INFLATE_FILTER 0
++#define CONFIG_INTERLACE_FILTER 0
++#define CONFIG_INTERLEAVE_FILTER 0
++#define CONFIG_KERNDEINT_FILTER 0
++#define CONFIG_LAGFUN_FILTER 0
++#define CONFIG_LENSCORRECTION_FILTER 0
++#define CONFIG_LENSFUN_FILTER 0
++#define CONFIG_LIBVMAF_FILTER 0
++#define CONFIG_LIMITER_FILTER 0
++#define CONFIG_LOOP_FILTER 0
++#define CONFIG_LUMAKEY_FILTER 0
++#define CONFIG_LUT_FILTER 0
++#define CONFIG_LUT1D_FILTER 0
++#define CONFIG_LUT2_FILTER 0
++#define CONFIG_LUT3D_FILTER 0
++#define CONFIG_LUTRGB_FILTER 0
++#define CONFIG_LUTYUV_FILTER 0
++#define CONFIG_MASKEDCLAMP_FILTER 0
++#define CONFIG_MASKEDMAX_FILTER 0
++#define CONFIG_MASKEDMERGE_FILTER 0
++#define CONFIG_MASKEDMIN_FILTER 0
++#define CONFIG_MASKEDTHRESHOLD_FILTER 0
++#define CONFIG_MASKFUN_FILTER 0
++#define CONFIG_MCDEINT_FILTER 0
++#define CONFIG_MEDIAN_FILTER 0
++#define CONFIG_MERGEPLANES_FILTER 0
++#define CONFIG_MESTIMATE_FILTER 0
++#define CONFIG_METADATA_FILTER 0
++#define CONFIG_MIDEQUALIZER_FILTER 0
++#define CONFIG_MINTERPOLATE_FILTER 0
++#define CONFIG_MIX_FILTER 0
++#define CONFIG_MPDECIMATE_FILTER 0
++#define CONFIG_NEGATE_FILTER 0
++#define CONFIG_NLMEANS_FILTER 0
++#define CONFIG_NLMEANS_OPENCL_FILTER 0
++#define CONFIG_NNEDI_FILTER 0
++#define CONFIG_NOFORMAT_FILTER 0
++#define CONFIG_NOISE_FILTER 0
++#define CONFIG_NORMALIZE_FILTER 0
++#define CONFIG_NULL_FILTER 0
++#define CONFIG_OCR_FILTER 0
++#define CONFIG_OCV_FILTER 0
++#define CONFIG_OSCILLOSCOPE_FILTER 0
++#define CONFIG_OVERLAY_FILTER 0
++#define CONFIG_OVERLAY_OPENCL_FILTER 0
++#define CONFIG_OVERLAY_QSV_FILTER 0
++#define CONFIG_OVERLAY_VULKAN_FILTER 0
++#define CONFIG_OVERLAY_CUDA_FILTER 0
++#define CONFIG_OWDENOISE_FILTER 0
++#define CONFIG_PAD_FILTER 0
++#define CONFIG_PAD_OPENCL_FILTER 0
++#define CONFIG_PALETTEGEN_FILTER 0
++#define CONFIG_PALETTEUSE_FILTER 0
++#define CONFIG_PERMS_FILTER 0
++#define CONFIG_PERSPECTIVE_FILTER 0
++#define CONFIG_PHASE_FILTER 0
++#define CONFIG_PHOTOSENSITIVITY_FILTER 0
++#define CONFIG_PIXDESCTEST_FILTER 0
++#define CONFIG_PIXSCOPE_FILTER 0
++#define CONFIG_PP_FILTER 0
++#define CONFIG_PP7_FILTER 0
++#define CONFIG_PREMULTIPLY_FILTER 0
++#define CONFIG_PREWITT_FILTER 0
++#define CONFIG_PREWITT_OPENCL_FILTER 0
++#define CONFIG_PROCAMP_VAAPI_FILTER 0
++#define CONFIG_PROGRAM_OPENCL_FILTER 0
++#define CONFIG_PSEUDOCOLOR_FILTER 0
++#define CONFIG_PSNR_FILTER 0
++#define CONFIG_PULLUP_FILTER 0
++#define CONFIG_QP_FILTER 0
++#define CONFIG_RANDOM_FILTER 0
++#define CONFIG_READEIA608_FILTER 0
++#define CONFIG_READVITC_FILTER 0
++#define CONFIG_REALTIME_FILTER 0
++#define CONFIG_REMAP_FILTER 0
++#define CONFIG_REMOVEGRAIN_FILTER 0
++#define CONFIG_REMOVELOGO_FILTER 0
++#define CONFIG_REPEATFIELDS_FILTER 0
++#define CONFIG_REVERSE_FILTER 0
++#define CONFIG_RGBASHIFT_FILTER 0
++#define CONFIG_ROBERTS_FILTER 0
++#define CONFIG_ROBERTS_OPENCL_FILTER 0
++#define CONFIG_ROTATE_FILTER 0
++#define CONFIG_SAB_FILTER 0
++#define CONFIG_SCALE_FILTER 0
++#define CONFIG_SCALE_CUDA_FILTER 0
++#define CONFIG_SCALE_NPP_FILTER 0
++#define CONFIG_SCALE_QSV_FILTER 0
++#define CONFIG_SCALE_VAAPI_FILTER 0
++#define CONFIG_SCALE_VULKAN_FILTER 0
++#define CONFIG_SCALE2REF_FILTER 0
++#define CONFIG_SCROLL_FILTER 0
++#define CONFIG_SELECT_FILTER 0
++#define CONFIG_SELECTIVECOLOR_FILTER 0
++#define CONFIG_SENDCMD_FILTER 0
++#define CONFIG_SEPARATEFIELDS_FILTER 0
++#define CONFIG_SETDAR_FILTER 0
++#define CONFIG_SETFIELD_FILTER 0
++#define CONFIG_SETPARAMS_FILTER 0
++#define CONFIG_SETPTS_FILTER 0
++#define CONFIG_SETRANGE_FILTER 0
++#define CONFIG_SETSAR_FILTER 0
++#define CONFIG_SETTB_FILTER 0
++#define CONFIG_SHARPNESS_VAAPI_FILTER 0
++#define CONFIG_SHOWINFO_FILTER 0
++#define CONFIG_SHOWPALETTE_FILTER 0
++#define CONFIG_SHUFFLEFRAMES_FILTER 0
++#define CONFIG_SHUFFLEPLANES_FILTER 0
++#define CONFIG_SIDEDATA_FILTER 0
++#define CONFIG_SIGNALSTATS_FILTER 0
++#define CONFIG_SIGNATURE_FILTER 0
++#define CONFIG_SMARTBLUR_FILTER 0
++#define CONFIG_SOBEL_FILTER 0
++#define CONFIG_SOBEL_OPENCL_FILTER 0
++#define CONFIG_SPLIT_FILTER 0
++#define CONFIG_SPP_FILTER 0
++#define CONFIG_SR_FILTER 0
++#define CONFIG_SSIM_FILTER 0
++#define CONFIG_STEREO3D_FILTER 0
++#define CONFIG_STREAMSELECT_FILTER 0
++#define CONFIG_SUBTITLES_FILTER 0
++#define CONFIG_SUPER2XSAI_FILTER 0
++#define CONFIG_SWAPRECT_FILTER 0
++#define CONFIG_SWAPUV_FILTER 0
++#define CONFIG_TBLEND_FILTER 0
++#define CONFIG_TELECINE_FILTER 0
++#define CONFIG_THISTOGRAM_FILTER 0
++#define CONFIG_THRESHOLD_FILTER 0
++#define CONFIG_THUMBNAIL_FILTER 0
++#define CONFIG_THUMBNAIL_CUDA_FILTER 0
++#define CONFIG_TILE_FILTER 0
++#define CONFIG_TINTERLACE_FILTER 0
++#define CONFIG_TLUT2_FILTER 0
++#define CONFIG_TMEDIAN_FILTER 0
++#define CONFIG_TMIX_FILTER 0
++#define CONFIG_TONEMAP_FILTER 0
++#define CONFIG_TONEMAP_OPENCL_FILTER 0
++#define CONFIG_TONEMAP_VAAPI_FILTER 0
++#define CONFIG_TPAD_FILTER 0
++#define CONFIG_TRANSPOSE_FILTER 0
++#define CONFIG_TRANSPOSE_NPP_FILTER 0
++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0
++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0
++#define CONFIG_TRIM_FILTER 0
++#define CONFIG_UNPREMULTIPLY_FILTER 0
++#define CONFIG_UNSHARP_FILTER 0
++#define CONFIG_UNSHARP_OPENCL_FILTER 0
++#define CONFIG_USPP_FILTER 0
++#define CONFIG_V360_FILTER 0
++#define CONFIG_VAGUEDENOISER_FILTER 0
++#define CONFIG_VECTORSCOPE_FILTER 0
++#define CONFIG_VFLIP_FILTER 0
++#define CONFIG_VFRDET_FILTER 0
++#define CONFIG_VIBRANCE_FILTER 0
++#define CONFIG_VIDSTABDETECT_FILTER 0
++#define CONFIG_VIDSTABTRANSFORM_FILTER 0
++#define CONFIG_VIGNETTE_FILTER 0
++#define CONFIG_VMAFMOTION_FILTER 0
++#define CONFIG_VPP_QSV_FILTER 0
++#define CONFIG_VSTACK_FILTER 0
++#define CONFIG_W3FDIF_FILTER 0
++#define CONFIG_WAVEFORM_FILTER 0
++#define CONFIG_WEAVE_FILTER 0
++#define CONFIG_XBR_FILTER 0
++#define CONFIG_XFADE_FILTER 0
++#define CONFIG_XFADE_OPENCL_FILTER 0
++#define CONFIG_XMEDIAN_FILTER 0
++#define CONFIG_XSTACK_FILTER 0
++#define CONFIG_YADIF_FILTER 0
++#define CONFIG_YADIF_CUDA_FILTER 0
++#define CONFIG_YAEPBLUR_FILTER 0
++#define CONFIG_ZMQ_FILTER 0
++#define CONFIG_ZOOMPAN_FILTER 0
++#define CONFIG_ZSCALE_FILTER 0
++#define CONFIG_ALLRGB_FILTER 0
++#define CONFIG_ALLYUV_FILTER 0
++#define CONFIG_CELLAUTO_FILTER 0
++#define CONFIG_COLOR_FILTER 0
++#define CONFIG_COREIMAGESRC_FILTER 0
++#define CONFIG_FREI0R_SRC_FILTER 0
++#define CONFIG_HALDCLUTSRC_FILTER 0
++#define CONFIG_LIFE_FILTER 0
++#define CONFIG_MANDELBROT_FILTER 0
++#define CONFIG_MPTESTSRC_FILTER 0
++#define CONFIG_NULLSRC_FILTER 0
++#define CONFIG_OPENCLSRC_FILTER 0
++#define CONFIG_PAL75BARS_FILTER 0
++#define CONFIG_PAL100BARS_FILTER 0
++#define CONFIG_RGBTESTSRC_FILTER 0
++#define CONFIG_SIERPINSKI_FILTER 0
++#define CONFIG_SMPTEBARS_FILTER 0
++#define CONFIG_SMPTEHDBARS_FILTER 0
++#define CONFIG_TESTSRC_FILTER 0
++#define CONFIG_TESTSRC2_FILTER 0
++#define CONFIG_YUVTESTSRC_FILTER 0
++#define CONFIG_NULLSINK_FILTER 0
++#define CONFIG_ABITSCOPE_FILTER 0
++#define CONFIG_ADRAWGRAPH_FILTER 0
++#define CONFIG_AGRAPHMONITOR_FILTER 0
++#define CONFIG_AHISTOGRAM_FILTER 0
++#define CONFIG_APHASEMETER_FILTER 0
++#define CONFIG_AVECTORSCOPE_FILTER 0
++#define CONFIG_CONCAT_FILTER 0
++#define CONFIG_SHOWCQT_FILTER 0
++#define CONFIG_SHOWFREQS_FILTER 0
++#define CONFIG_SHOWSPATIAL_FILTER 0
++#define CONFIG_SHOWSPECTRUM_FILTER 0
++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
++#define CONFIG_SHOWVOLUME_FILTER 0
++#define CONFIG_SHOWWAVES_FILTER 0
++#define CONFIG_SHOWWAVESPIC_FILTER 0
++#define CONFIG_SPECTRUMSYNTH_FILTER 0
++#define CONFIG_AMOVIE_FILTER 0
++#define CONFIG_MOVIE_FILTER 0
++#define CONFIG_AFIFO_FILTER 0
++#define CONFIG_FIFO_FILTER 0
++#define CONFIG_AA_DEMUXER 0
++#define CONFIG_AAC_DEMUXER 1
++#define CONFIG_AC3_DEMUXER 0
++#define CONFIG_ACM_DEMUXER 0
++#define CONFIG_ACT_DEMUXER 0
++#define CONFIG_ADF_DEMUXER 0
++#define CONFIG_ADP_DEMUXER 0
++#define CONFIG_ADS_DEMUXER 0
++#define CONFIG_ADX_DEMUXER 0
++#define CONFIG_AEA_DEMUXER 0
++#define CONFIG_AFC_DEMUXER 0
++#define CONFIG_AIFF_DEMUXER 0
++#define CONFIG_AIX_DEMUXER 0
++#define CONFIG_ALP_DEMUXER 0
++#define CONFIG_AMR_DEMUXER 0
++#define CONFIG_AMRNB_DEMUXER 0
++#define CONFIG_AMRWB_DEMUXER 0
++#define CONFIG_ANM_DEMUXER 0
++#define CONFIG_APC_DEMUXER 0
++#define CONFIG_APE_DEMUXER 0
++#define CONFIG_APM_DEMUXER 0
++#define CONFIG_APNG_DEMUXER 0
++#define CONFIG_APTX_DEMUXER 0
++#define CONFIG_APTX_HD_DEMUXER 0
++#define CONFIG_AQTITLE_DEMUXER 0
++#define CONFIG_ARGO_ASF_DEMUXER 0
++#define CONFIG_ASF_DEMUXER 0
++#define CONFIG_ASF_O_DEMUXER 0
++#define CONFIG_ASS_DEMUXER 0
++#define CONFIG_AST_DEMUXER 0
++#define CONFIG_AU_DEMUXER 0
++#define CONFIG_AV1_DEMUXER 0
++#define CONFIG_AVI_DEMUXER 0
++#define CONFIG_AVISYNTH_DEMUXER 0
++#define CONFIG_AVR_DEMUXER 0
++#define CONFIG_AVS_DEMUXER 0
++#define CONFIG_AVS2_DEMUXER 0
++#define CONFIG_BETHSOFTVID_DEMUXER 0
++#define CONFIG_BFI_DEMUXER 0
++#define CONFIG_BINTEXT_DEMUXER 0
++#define CONFIG_BINK_DEMUXER 0
++#define CONFIG_BIT_DEMUXER 0
++#define CONFIG_BMV_DEMUXER 0
++#define CONFIG_BFSTM_DEMUXER 0
++#define CONFIG_BRSTM_DEMUXER 0
++#define CONFIG_BOA_DEMUXER 0
++#define CONFIG_C93_DEMUXER 0
++#define CONFIG_CAF_DEMUXER 0
++#define CONFIG_CAVSVIDEO_DEMUXER 0
++#define CONFIG_CDG_DEMUXER 0
++#define CONFIG_CDXL_DEMUXER 0
++#define CONFIG_CINE_DEMUXER 0
++#define CONFIG_CODEC2_DEMUXER 0
++#define CONFIG_CODEC2RAW_DEMUXER 0
++#define CONFIG_CONCAT_DEMUXER 0
++#define CONFIG_DASH_DEMUXER 0
++#define CONFIG_DATA_DEMUXER 0
++#define CONFIG_DAUD_DEMUXER 0
++#define CONFIG_DCSTR_DEMUXER 0
++#define CONFIG_DERF_DEMUXER 0
++#define CONFIG_DFA_DEMUXER 0
++#define CONFIG_DHAV_DEMUXER 0
++#define CONFIG_DIRAC_DEMUXER 0
++#define CONFIG_DNXHD_DEMUXER 0
++#define CONFIG_DSF_DEMUXER 0
++#define CONFIG_DSICIN_DEMUXER 0
++#define CONFIG_DSS_DEMUXER 0
++#define CONFIG_DTS_DEMUXER 0
++#define CONFIG_DTSHD_DEMUXER 0
++#define CONFIG_DV_DEMUXER 0
++#define CONFIG_DVBSUB_DEMUXER 0
++#define CONFIG_DVBTXT_DEMUXER 0
++#define CONFIG_DXA_DEMUXER 0
++#define CONFIG_EA_DEMUXER 0
++#define CONFIG_EA_CDATA_DEMUXER 0
++#define CONFIG_EAC3_DEMUXER 0
++#define CONFIG_EPAF_DEMUXER 0
++#define CONFIG_FFMETADATA_DEMUXER 0
++#define CONFIG_FILMSTRIP_DEMUXER 0
++#define CONFIG_FITS_DEMUXER 0
++#define CONFIG_FLAC_DEMUXER 1
++#define CONFIG_FLIC_DEMUXER 0
++#define CONFIG_FLV_DEMUXER 0
++#define CONFIG_LIVE_FLV_DEMUXER 0
++#define CONFIG_FOURXM_DEMUXER 0
++#define CONFIG_FRM_DEMUXER 0
++#define CONFIG_FSB_DEMUXER 0
++#define CONFIG_FWSE_DEMUXER 0
++#define CONFIG_G722_DEMUXER 0
++#define CONFIG_G723_1_DEMUXER 0
++#define CONFIG_G726_DEMUXER 0
++#define CONFIG_G726LE_DEMUXER 0
++#define CONFIG_G729_DEMUXER 0
++#define CONFIG_GDV_DEMUXER 0
++#define CONFIG_GENH_DEMUXER 0
++#define CONFIG_GIF_DEMUXER 0
++#define CONFIG_GSM_DEMUXER 0
++#define CONFIG_GXF_DEMUXER 0
++#define CONFIG_H261_DEMUXER 0
++#define CONFIG_H263_DEMUXER 0
++#define CONFIG_H264_DEMUXER 0
++#define CONFIG_HCA_DEMUXER 0
++#define CONFIG_HCOM_DEMUXER 0
++#define CONFIG_HEVC_DEMUXER 0
++#define CONFIG_HLS_DEMUXER 0
++#define CONFIG_HNM_DEMUXER 0
++#define CONFIG_ICO_DEMUXER 0
++#define CONFIG_IDCIN_DEMUXER 0
++#define CONFIG_IDF_DEMUXER 0
++#define CONFIG_IFF_DEMUXER 0
++#define CONFIG_IFV_DEMUXER 0
++#define CONFIG_ILBC_DEMUXER 0
++#define CONFIG_IMAGE2_DEMUXER 0
++#define CONFIG_IMAGE2PIPE_DEMUXER 0
++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
++#define CONFIG_INGENIENT_DEMUXER 0
++#define CONFIG_IPMOVIE_DEMUXER 0
++#define CONFIG_IRCAM_DEMUXER 0
++#define CONFIG_ISS_DEMUXER 0
++#define CONFIG_IV8_DEMUXER 0
++#define CONFIG_IVF_DEMUXER 0
++#define CONFIG_IVR_DEMUXER 0
++#define CONFIG_JACOSUB_DEMUXER 0
++#define CONFIG_JV_DEMUXER 0
++#define CONFIG_KUX_DEMUXER 0
++#define CONFIG_KVAG_DEMUXER 0
++#define CONFIG_LMLM4_DEMUXER 0
++#define CONFIG_LOAS_DEMUXER 0
++#define CONFIG_LRC_DEMUXER 0
++#define CONFIG_LVF_DEMUXER 0
++#define CONFIG_LXF_DEMUXER 0
++#define CONFIG_M4V_DEMUXER 0
++#define CONFIG_MATROSKA_DEMUXER 1
++#define CONFIG_MGSTS_DEMUXER 0
++#define CONFIG_MICRODVD_DEMUXER 0
++#define CONFIG_MJPEG_DEMUXER 0
++#define CONFIG_MJPEG_2000_DEMUXER 0
++#define CONFIG_MLP_DEMUXER 0
++#define CONFIG_MLV_DEMUXER 0
++#define CONFIG_MM_DEMUXER 0
++#define CONFIG_MMF_DEMUXER 0
++#define CONFIG_MOV_DEMUXER 1
++#define CONFIG_MP3_DEMUXER 1
++#define CONFIG_MPC_DEMUXER 0
++#define CONFIG_MPC8_DEMUXER 0
++#define CONFIG_MPEGPS_DEMUXER 0
++#define CONFIG_MPEGTS_DEMUXER 0
++#define CONFIG_MPEGTSRAW_DEMUXER 0
++#define CONFIG_MPEGVIDEO_DEMUXER 0
++#define CONFIG_MPJPEG_DEMUXER 0
++#define CONFIG_MPL2_DEMUXER 0
++#define CONFIG_MPSUB_DEMUXER 0
++#define CONFIG_MSF_DEMUXER 0
++#define CONFIG_MSNWC_TCP_DEMUXER 0
++#define CONFIG_MTAF_DEMUXER 0
++#define CONFIG_MTV_DEMUXER 0
++#define CONFIG_MUSX_DEMUXER 0
++#define CONFIG_MV_DEMUXER 0
++#define CONFIG_MVI_DEMUXER 0
++#define CONFIG_MXF_DEMUXER 0
++#define CONFIG_MXG_DEMUXER 0
++#define CONFIG_NC_DEMUXER 0
++#define CONFIG_NISTSPHERE_DEMUXER 0
++#define CONFIG_NSP_DEMUXER 0
++#define CONFIG_NSV_DEMUXER 0
++#define CONFIG_NUT_DEMUXER 0
++#define CONFIG_NUV_DEMUXER 0
++#define CONFIG_OGG_DEMUXER 1
++#define CONFIG_OMA_DEMUXER 0
++#define CONFIG_PAF_DEMUXER 0
++#define CONFIG_PCM_ALAW_DEMUXER 0
++#define CONFIG_PCM_MULAW_DEMUXER 0
++#define CONFIG_PCM_VIDC_DEMUXER 0
++#define CONFIG_PCM_F64BE_DEMUXER 0
++#define CONFIG_PCM_F64LE_DEMUXER 0
++#define CONFIG_PCM_F32BE_DEMUXER 0
++#define CONFIG_PCM_F32LE_DEMUXER 0
++#define CONFIG_PCM_S32BE_DEMUXER 0
++#define CONFIG_PCM_S32LE_DEMUXER 0
++#define CONFIG_PCM_S24BE_DEMUXER 0
++#define CONFIG_PCM_S24LE_DEMUXER 0
++#define CONFIG_PCM_S16BE_DEMUXER 0
++#define CONFIG_PCM_S16LE_DEMUXER 0
++#define CONFIG_PCM_S8_DEMUXER 0
++#define CONFIG_PCM_U32BE_DEMUXER 0
++#define CONFIG_PCM_U32LE_DEMUXER 0
++#define CONFIG_PCM_U24BE_DEMUXER 0
++#define CONFIG_PCM_U24LE_DEMUXER 0
++#define CONFIG_PCM_U16BE_DEMUXER 0
++#define CONFIG_PCM_U16LE_DEMUXER 0
++#define CONFIG_PCM_U8_DEMUXER 0
++#define CONFIG_PJS_DEMUXER 0
++#define CONFIG_PMP_DEMUXER 0
++#define CONFIG_PVA_DEMUXER 0
++#define CONFIG_PVF_DEMUXER 0
++#define CONFIG_QCP_DEMUXER 0
++#define CONFIG_R3D_DEMUXER 0
++#define CONFIG_RAWVIDEO_DEMUXER 0
++#define CONFIG_REALTEXT_DEMUXER 0
++#define CONFIG_REDSPARK_DEMUXER 0
++#define CONFIG_RL2_DEMUXER 0
++#define CONFIG_RM_DEMUXER 0
++#define CONFIG_ROQ_DEMUXER 0
++#define CONFIG_RPL_DEMUXER 0
++#define CONFIG_RSD_DEMUXER 0
++#define CONFIG_RSO_DEMUXER 0
++#define CONFIG_RTP_DEMUXER 0
++#define CONFIG_RTSP_DEMUXER 0
++#define CONFIG_S337M_DEMUXER 0
++#define CONFIG_SAMI_DEMUXER 0
++#define CONFIG_SAP_DEMUXER 0
++#define CONFIG_SBC_DEMUXER 0
++#define CONFIG_SBG_DEMUXER 0
++#define CONFIG_SCC_DEMUXER 0
++#define CONFIG_SDP_DEMUXER 0
++#define CONFIG_SDR2_DEMUXER 0
++#define CONFIG_SDS_DEMUXER 0
++#define CONFIG_SDX_DEMUXER 0
++#define CONFIG_SEGAFILM_DEMUXER 0
++#define CONFIG_SER_DEMUXER 0
++#define CONFIG_SHORTEN_DEMUXER 0
++#define CONFIG_SIFF_DEMUXER 0
++#define CONFIG_SLN_DEMUXER 0
++#define CONFIG_SMACKER_DEMUXER 0
++#define CONFIG_SMJPEG_DEMUXER 0
++#define CONFIG_SMUSH_DEMUXER 0
++#define CONFIG_SOL_DEMUXER 0
++#define CONFIG_SOX_DEMUXER 0
++#define CONFIG_SPDIF_DEMUXER 0
++#define CONFIG_SRT_DEMUXER 0
++#define CONFIG_STR_DEMUXER 0
++#define CONFIG_STL_DEMUXER 0
++#define CONFIG_SUBVIEWER1_DEMUXER 0
++#define CONFIG_SUBVIEWER_DEMUXER 0
++#define CONFIG_SUP_DEMUXER 0
++#define CONFIG_SVAG_DEMUXER 0
++#define CONFIG_SWF_DEMUXER 0
++#define CONFIG_TAK_DEMUXER 0
++#define CONFIG_TEDCAPTIONS_DEMUXER 0
++#define CONFIG_THP_DEMUXER 0
++#define CONFIG_THREEDOSTR_DEMUXER 0
++#define CONFIG_TIERTEXSEQ_DEMUXER 0
++#define CONFIG_TMV_DEMUXER 0
++#define CONFIG_TRUEHD_DEMUXER 0
++#define CONFIG_TTA_DEMUXER 0
++#define CONFIG_TXD_DEMUXER 0
++#define CONFIG_TTY_DEMUXER 0
++#define CONFIG_TY_DEMUXER 0
++#define CONFIG_V210_DEMUXER 0
++#define CONFIG_V210X_DEMUXER 0
++#define CONFIG_VAG_DEMUXER 0
++#define CONFIG_VC1_DEMUXER 0
++#define CONFIG_VC1T_DEMUXER 0
++#define CONFIG_VIVIDAS_DEMUXER 0
++#define CONFIG_VIVO_DEMUXER 0
++#define CONFIG_VMD_DEMUXER 0
++#define CONFIG_VOBSUB_DEMUXER 0
++#define CONFIG_VOC_DEMUXER 0
++#define CONFIG_VPK_DEMUXER 0
++#define CONFIG_VPLAYER_DEMUXER 0
++#define CONFIG_VQF_DEMUXER 0
++#define CONFIG_W64_DEMUXER 0
++#define CONFIG_WAV_DEMUXER 1
++#define CONFIG_WC3_DEMUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
++#define CONFIG_WEBVTT_DEMUXER 0
++#define CONFIG_WSAUD_DEMUXER 0
++#define CONFIG_WSD_DEMUXER 0
++#define CONFIG_WSVQA_DEMUXER 0
++#define CONFIG_WTV_DEMUXER 0
++#define CONFIG_WVE_DEMUXER 0
++#define CONFIG_WV_DEMUXER 0
++#define CONFIG_XA_DEMUXER 0
++#define CONFIG_XBIN_DEMUXER 0
++#define CONFIG_XMV_DEMUXER 0
++#define CONFIG_XVAG_DEMUXER 0
++#define CONFIG_XWMA_DEMUXER 0
++#define CONFIG_YOP_DEMUXER 0
++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
++#define CONFIG_LIBGME_DEMUXER 0
++#define CONFIG_LIBMODPLUG_DEMUXER 0
++#define CONFIG_LIBOPENMPT_DEMUXER 0
++#define CONFIG_VAPOURSYNTH_DEMUXER 0
++#define CONFIG_A64_MUXER 0
++#define CONFIG_AC3_MUXER 0
++#define CONFIG_ADTS_MUXER 0
++#define CONFIG_ADX_MUXER 0
++#define CONFIG_AIFF_MUXER 0
++#define CONFIG_AMR_MUXER 0
++#define CONFIG_APNG_MUXER 0
++#define CONFIG_APTX_MUXER 0
++#define CONFIG_APTX_HD_MUXER 0
++#define CONFIG_ASF_MUXER 0
++#define CONFIG_ASS_MUXER 0
++#define CONFIG_AST_MUXER 0
++#define CONFIG_ASF_STREAM_MUXER 0
++#define CONFIG_AU_MUXER 0
++#define CONFIG_AVI_MUXER 0
++#define CONFIG_AVM2_MUXER 0
++#define CONFIG_AVS2_MUXER 0
++#define CONFIG_BIT_MUXER 0
++#define CONFIG_CAF_MUXER 0
++#define CONFIG_CAVSVIDEO_MUXER 0
++#define CONFIG_CODEC2_MUXER 0
++#define CONFIG_CODEC2RAW_MUXER 0
++#define CONFIG_CRC_MUXER 0
++#define CONFIG_DASH_MUXER 0
++#define CONFIG_DATA_MUXER 0
++#define CONFIG_DAUD_MUXER 0
++#define CONFIG_DIRAC_MUXER 0
++#define CONFIG_DNXHD_MUXER 0
++#define CONFIG_DTS_MUXER 0
++#define CONFIG_DV_MUXER 0
++#define CONFIG_EAC3_MUXER 0
++#define CONFIG_F4V_MUXER 0
++#define CONFIG_FFMETADATA_MUXER 0
++#define CONFIG_FIFO_MUXER 0
++#define CONFIG_FIFO_TEST_MUXER 0
++#define CONFIG_FILMSTRIP_MUXER 0
++#define CONFIG_FITS_MUXER 0
++#define CONFIG_FLAC_MUXER 0
++#define CONFIG_FLV_MUXER 0
++#define CONFIG_FRAMECRC_MUXER 0
++#define CONFIG_FRAMEHASH_MUXER 0
++#define CONFIG_FRAMEMD5_MUXER 0
++#define CONFIG_G722_MUXER 0
++#define CONFIG_G723_1_MUXER 0
++#define CONFIG_G726_MUXER 0
++#define CONFIG_G726LE_MUXER 0
++#define CONFIG_GIF_MUXER 0
++#define CONFIG_GSM_MUXER 0
++#define CONFIG_GXF_MUXER 0
++#define CONFIG_H261_MUXER 0
++#define CONFIG_H263_MUXER 0
++#define CONFIG_H264_MUXER 0
++#define CONFIG_HASH_MUXER 0
++#define CONFIG_HDS_MUXER 0
++#define CONFIG_HEVC_MUXER 0
++#define CONFIG_HLS_MUXER 0
++#define CONFIG_ICO_MUXER 0
++#define CONFIG_ILBC_MUXER 0
++#define CONFIG_IMAGE2_MUXER 0
++#define CONFIG_IMAGE2PIPE_MUXER 0
++#define CONFIG_IPOD_MUXER 0
++#define CONFIG_IRCAM_MUXER 0
++#define CONFIG_ISMV_MUXER 0
++#define CONFIG_IVF_MUXER 0
++#define CONFIG_JACOSUB_MUXER 0
++#define CONFIG_LATM_MUXER 0
++#define CONFIG_LRC_MUXER 0
++#define CONFIG_M4V_MUXER 0
++#define CONFIG_MD5_MUXER 0
++#define CONFIG_MATROSKA_MUXER 0
++#define CONFIG_MATROSKA_AUDIO_MUXER 0
++#define CONFIG_MICRODVD_MUXER 0
++#define CONFIG_MJPEG_MUXER 0
++#define CONFIG_MLP_MUXER 0
++#define CONFIG_MMF_MUXER 0
++#define CONFIG_MOV_MUXER 0
++#define CONFIG_MP2_MUXER 0
++#define CONFIG_MP3_MUXER 0
++#define CONFIG_MP4_MUXER 0
++#define CONFIG_MPEG1SYSTEM_MUXER 0
++#define CONFIG_MPEG1VCD_MUXER 0
++#define CONFIG_MPEG1VIDEO_MUXER 0
++#define CONFIG_MPEG2DVD_MUXER 0
++#define CONFIG_MPEG2SVCD_MUXER 0
++#define CONFIG_MPEG2VIDEO_MUXER 0
++#define CONFIG_MPEG2VOB_MUXER 0
++#define CONFIG_MPEGTS_MUXER 0
++#define CONFIG_MPJPEG_MUXER 0
++#define CONFIG_MXF_MUXER 0
++#define CONFIG_MXF_D10_MUXER 0
++#define CONFIG_MXF_OPATOM_MUXER 0
++#define CONFIG_NULL_MUXER 0
++#define CONFIG_NUT_MUXER 0
++#define CONFIG_OGA_MUXER 0
++#define CONFIG_OGG_MUXER 0
++#define CONFIG_OGV_MUXER 0
++#define CONFIG_OMA_MUXER 0
++#define CONFIG_OPUS_MUXER 0
++#define CONFIG_PCM_ALAW_MUXER 0
++#define CONFIG_PCM_MULAW_MUXER 0
++#define CONFIG_PCM_VIDC_MUXER 0
++#define CONFIG_PCM_F64BE_MUXER 0
++#define CONFIG_PCM_F64LE_MUXER 0
++#define CONFIG_PCM_F32BE_MUXER 0
++#define CONFIG_PCM_F32LE_MUXER 0
++#define CONFIG_PCM_S32BE_MUXER 0
++#define CONFIG_PCM_S32LE_MUXER 0
++#define CONFIG_PCM_S24BE_MUXER 0
++#define CONFIG_PCM_S24LE_MUXER 0
++#define CONFIG_PCM_S16BE_MUXER 0
++#define CONFIG_PCM_S16LE_MUXER 0
++#define CONFIG_PCM_S8_MUXER 0
++#define CONFIG_PCM_U32BE_MUXER 0
++#define CONFIG_PCM_U32LE_MUXER 0
++#define CONFIG_PCM_U24BE_MUXER 0
++#define CONFIG_PCM_U24LE_MUXER 0
++#define CONFIG_PCM_U16BE_MUXER 0
++#define CONFIG_PCM_U16LE_MUXER 0
++#define CONFIG_PCM_U8_MUXER 0
++#define CONFIG_PSP_MUXER 0
++#define CONFIG_RAWVIDEO_MUXER 0
++#define CONFIG_RM_MUXER 0
++#define CONFIG_ROQ_MUXER 0
++#define CONFIG_RSO_MUXER 0
++#define CONFIG_RTP_MUXER 0
++#define CONFIG_RTP_MPEGTS_MUXER 0
++#define CONFIG_RTSP_MUXER 0
++#define CONFIG_SAP_MUXER 0
++#define CONFIG_SBC_MUXER 0
++#define CONFIG_SCC_MUXER 0
++#define CONFIG_SEGAFILM_MUXER 0
++#define CONFIG_SEGMENT_MUXER 0
++#define CONFIG_STREAM_SEGMENT_MUXER 0
++#define CONFIG_SINGLEJPEG_MUXER 0
++#define CONFIG_SMJPEG_MUXER 0
++#define CONFIG_SMOOTHSTREAMING_MUXER 0
++#define CONFIG_SOX_MUXER 0
++#define CONFIG_SPX_MUXER 0
++#define CONFIG_SPDIF_MUXER 0
++#define CONFIG_SRT_MUXER 0
++#define CONFIG_STREAMHASH_MUXER 0
++#define CONFIG_SUP_MUXER 0
++#define CONFIG_SWF_MUXER 0
++#define CONFIG_TEE_MUXER 0
++#define CONFIG_TG2_MUXER 0
++#define CONFIG_TGP_MUXER 0
++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0
++#define CONFIG_TRUEHD_MUXER 0
++#define CONFIG_TTA_MUXER 0
++#define CONFIG_UNCODEDFRAMECRC_MUXER 0
++#define CONFIG_VC1_MUXER 0
++#define CONFIG_VC1T_MUXER 0
++#define CONFIG_VOC_MUXER 0
++#define CONFIG_W64_MUXER 0
++#define CONFIG_WAV_MUXER 0
++#define CONFIG_WEBM_MUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0
++#define CONFIG_WEBM_CHUNK_MUXER 0
++#define CONFIG_WEBP_MUXER 0
++#define CONFIG_WEBVTT_MUXER 0
++#define CONFIG_WTV_MUXER 0
++#define CONFIG_WV_MUXER 0
++#define CONFIG_YUV4MPEGPIPE_MUXER 0
++#define CONFIG_CHROMAPRINT_MUXER 0
++#define CONFIG_ASYNC_PROTOCOL 0
++#define CONFIG_BLURAY_PROTOCOL 0
++#define CONFIG_CACHE_PROTOCOL 0
++#define CONFIG_CONCAT_PROTOCOL 0
++#define CONFIG_CRYPTO_PROTOCOL 0
++#define CONFIG_DATA_PROTOCOL 0
++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0
++#define CONFIG_FFRTMPHTTP_PROTOCOL 0
++#define CONFIG_FILE_PROTOCOL 0
++#define CONFIG_FTP_PROTOCOL 0
++#define CONFIG_GOPHER_PROTOCOL 0
++#define CONFIG_HLS_PROTOCOL 0
++#define CONFIG_HTTP_PROTOCOL 0
++#define CONFIG_HTTPPROXY_PROTOCOL 0
++#define CONFIG_HTTPS_PROTOCOL 0
++#define CONFIG_ICECAST_PROTOCOL 0
++#define CONFIG_MMSH_PROTOCOL 0
++#define CONFIG_MMST_PROTOCOL 0
++#define CONFIG_MD5_PROTOCOL 0
++#define CONFIG_PIPE_PROTOCOL 0
++#define CONFIG_PROMPEG_PROTOCOL 0
++#define CONFIG_RTMP_PROTOCOL 0
++#define CONFIG_RTMPE_PROTOCOL 0
++#define CONFIG_RTMPS_PROTOCOL 0
++#define CONFIG_RTMPT_PROTOCOL 0
++#define CONFIG_RTMPTE_PROTOCOL 0
++#define CONFIG_RTMPTS_PROTOCOL 0
++#define CONFIG_RTP_PROTOCOL 0
++#define CONFIG_SCTP_PROTOCOL 0
++#define CONFIG_SRTP_PROTOCOL 0
++#define CONFIG_SUBFILE_PROTOCOL 0
++#define CONFIG_TEE_PROTOCOL 0
++#define CONFIG_TCP_PROTOCOL 0
++#define CONFIG_TLS_PROTOCOL 0
++#define CONFIG_UDP_PROTOCOL 0
++#define CONFIG_UDPLITE_PROTOCOL 0
++#define CONFIG_UNIX_PROTOCOL 0
++#define CONFIG_LIBAMQP_PROTOCOL 0
++#define CONFIG_LIBRTMP_PROTOCOL 0
++#define CONFIG_LIBRTMPE_PROTOCOL 0
++#define CONFIG_LIBRTMPS_PROTOCOL 0
++#define CONFIG_LIBRTMPT_PROTOCOL 0
++#define CONFIG_LIBRTMPTE_PROTOCOL 0
++#define CONFIG_LIBSRT_PROTOCOL 0
++#define CONFIG_LIBSSH_PROTOCOL 0
++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0
++#define CONFIG_LIBZMQ_PROTOCOL 0
++#endif /* FFMPEG_CONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c
+new file mode 100644
+index 00000000000..d31ece942a7
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c
+@@ -0,0 +1,3 @@
++static const AVBitStreamFilter * const bitstream_filters[] = {
++    &ff_null_bsf,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c
+new file mode 100644
+index 00000000000..49f757b2d86
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c
+@@ -0,0 +1,20 @@
++static const AVCodec * const codec_list[] = {
++    &ff_h264_decoder,
++    &ff_theora_decoder,
++    &ff_vp3_decoder,
++    &ff_vp8_decoder,
++    &ff_aac_decoder,
++    &ff_flac_decoder,
++    &ff_mp3_decoder,
++    &ff_vorbis_decoder,
++    &ff_pcm_alaw_decoder,
++    &ff_pcm_f32le_decoder,
++    &ff_pcm_mulaw_decoder,
++    &ff_pcm_s16be_decoder,
++    &ff_pcm_s16le_decoder,
++    &ff_pcm_s24be_decoder,
++    &ff_pcm_s24le_decoder,
++    &ff_pcm_s32le_decoder,
++    &ff_pcm_u8_decoder,
++    &ff_libopus_decoder,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c
+new file mode 100644
+index 00000000000..50acddb28e5
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c
+@@ -0,0 +1,11 @@
++static const AVCodecParser * const parser_list[] = {
++    &ff_aac_parser,
++    &ff_flac_parser,
++    &ff_h264_parser,
++    &ff_mpegaudio_parser,
++    &ff_opus_parser,
++    &ff_vorbis_parser,
++    &ff_vp3_parser,
++    &ff_vp8_parser,
++    &ff_vp9_parser,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c
+new file mode 100644
+index 00000000000..920b22bfa7d
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c
+@@ -0,0 +1,9 @@
++static const AVInputFormat * const demuxer_list[] = {
++    &ff_aac_demuxer,
++    &ff_flac_demuxer,
++    &ff_matroska_demuxer,
++    &ff_mov_demuxer,
++    &ff_mp3_demuxer,
++    &ff_ogg_demuxer,
++    &ff_wav_demuxer,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c
+new file mode 100644
+index 00000000000..f36d9499c6f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c
+@@ -0,0 +1,2 @@
++static const AVOutputFormat * const muxer_list[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c
+new file mode 100644
+index 00000000000..247e1e4c3a2
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c
+@@ -0,0 +1,2 @@
++static const URLProtocol * const url_protocols[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h
+new file mode 100644
+index 00000000000..8558b35027f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h
+@@ -0,0 +1,6 @@
++/* Generated by ffmpeg configure */
++#ifndef AVUTIL_AVCONFIG_H
++#define AVUTIL_AVCONFIG_H
++#define AV_HAVE_BIGENDIAN 0
++#define AV_HAVE_FAST_UNALIGNED 0
++#endif /* AVUTIL_AVCONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h
+new file mode 100644
+index 00000000000..31e5b5036dc
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h
+@@ -0,0 +1,5 @@
++/* Automatically generated by version.sh, do not manually edit! */
++#ifndef AVUTIL_FFVERSION_H
++#define AVUTIL_FFVERSION_H
++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25"
++#endif /* AVUTIL_FFVERSION_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h
+new file mode 100644
+index 00000000000..23fc2d09fd9
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h
+@@ -0,0 +1,2589 @@
++/* Automatically generated by configure - do not modify! */
++#ifndef FFMPEG_CONFIG_H
++#define FFMPEG_CONFIG_H
++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang --enable-decoder='aac,h264' --enable-demuxer=aac --enable-parser='aac,h264' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-parser=gsm" -- elide long configuration string from binary */
++#define FFMPEG_LICENSE "LGPL version 2.1 or later"
++#define CONFIG_THIS_YEAR 2020
++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
++#define AVCONV_DATADIR "/usr/local/share/ffmpeg"
++#define CC_IDENT "clang version 8.0.1"
++#define av_restrict restrict
++#define EXTERN_PREFIX ""
++#define EXTERN_ASM 
++#define BUILDSUF ""
++#define SLIBSUF ".so"
++#define HAVE_MMX2 HAVE_MMXEXT
++#define SWS_MAX_FILTER_SIZE 256
++#define ARCH_AARCH64 0
++#define ARCH_ALPHA 0
++#define ARCH_ARM 0
++#define ARCH_AVR32 0
++#define ARCH_AVR32_AP 0
++#define ARCH_AVR32_UC 0
++#define ARCH_BFIN 0
++#define ARCH_IA64 0
++#define ARCH_M68K 0
++#define ARCH_MIPS 0
++#define ARCH_MIPS64 0
++#define ARCH_PARISC 0
++#define ARCH_PPC 0
++#define ARCH_PPC64 0
++#define ARCH_S390 0
++#define ARCH_SH4 0
++#define ARCH_SPARC 0
++#define ARCH_SPARC64 0
++#define ARCH_TILEGX 0
++#define ARCH_TILEPRO 0
++#define ARCH_TOMI 0
++#define ARCH_X86 0
++#define ARCH_X86_32 0
++#define ARCH_X86_64 0
++#define HAVE_ARMV5TE 0
++#define HAVE_ARMV6 0
++#define HAVE_ARMV6T2 0
++#define HAVE_ARMV8 0
++#define HAVE_NEON 0
++#define HAVE_VFP 0
++#define HAVE_VFPV3 0
++#define HAVE_SETEND 0
++#define HAVE_ALTIVEC 0
++#define HAVE_DCBZL 0
++#define HAVE_LDBRX 0
++#define HAVE_POWER8 0
++#define HAVE_PPC4XX 0
++#define HAVE_VSX 0
++#define HAVE_AESNI 0
++#define HAVE_AMD3DNOW 0
++#define HAVE_AMD3DNOWEXT 0
++#define HAVE_AVX 0
++#define HAVE_AVX2 0
++#define HAVE_AVX512 0
++#define HAVE_FMA3 0
++#define HAVE_FMA4 0
++#define HAVE_MMX 0
++#define HAVE_MMXEXT 0
++#define HAVE_SSE 0
++#define HAVE_SSE2 0
++#define HAVE_SSE3 0
++#define HAVE_SSE4 0
++#define HAVE_SSE42 0
++#define HAVE_SSSE3 0
++#define HAVE_XOP 0
++#define HAVE_CPUNOP 0
++#define HAVE_I686 0
++#define HAVE_MIPSFPU 0
++#define HAVE_MIPS32R2 0
++#define HAVE_MIPS32R5 0
++#define HAVE_MIPS64R2 0
++#define HAVE_MIPS32R6 0
++#define HAVE_MIPS64R6 0
++#define HAVE_MIPSDSP 0
++#define HAVE_MIPSDSPR2 0
++#define HAVE_MSA 0
++#define HAVE_MSA2 0
++#define HAVE_LOONGSON2 0
++#define HAVE_LOONGSON3 0
++#define HAVE_MMI 0
++#define HAVE_ARMV5TE_EXTERNAL 0
++#define HAVE_ARMV6_EXTERNAL 0
++#define HAVE_ARMV6T2_EXTERNAL 0
++#define HAVE_ARMV8_EXTERNAL 0
++#define HAVE_NEON_EXTERNAL 0
++#define HAVE_VFP_EXTERNAL 0
++#define HAVE_VFPV3_EXTERNAL 0
++#define HAVE_SETEND_EXTERNAL 0
++#define HAVE_ALTIVEC_EXTERNAL 0
++#define HAVE_DCBZL_EXTERNAL 0
++#define HAVE_LDBRX_EXTERNAL 0
++#define HAVE_POWER8_EXTERNAL 0
++#define HAVE_PPC4XX_EXTERNAL 0
++#define HAVE_VSX_EXTERNAL 0
++#define HAVE_AESNI_EXTERNAL 0
++#define HAVE_AMD3DNOW_EXTERNAL 0
++#define HAVE_AMD3DNOWEXT_EXTERNAL 0
++#define HAVE_AVX_EXTERNAL 0
++#define HAVE_AVX2_EXTERNAL 0
++#define HAVE_AVX512_EXTERNAL 0
++#define HAVE_FMA3_EXTERNAL 0
++#define HAVE_FMA4_EXTERNAL 0
++#define HAVE_MMX_EXTERNAL 0
++#define HAVE_MMXEXT_EXTERNAL 0
++#define HAVE_SSE_EXTERNAL 0
++#define HAVE_SSE2_EXTERNAL 0
++#define HAVE_SSE3_EXTERNAL 0
++#define HAVE_SSE4_EXTERNAL 0
++#define HAVE_SSE42_EXTERNAL 0
++#define HAVE_SSSE3_EXTERNAL 0
++#define HAVE_XOP_EXTERNAL 0
++#define HAVE_CPUNOP_EXTERNAL 0
++#define HAVE_I686_EXTERNAL 0
++#define HAVE_MIPSFPU_EXTERNAL 0
++#define HAVE_MIPS32R2_EXTERNAL 0
++#define HAVE_MIPS32R5_EXTERNAL 0
++#define HAVE_MIPS64R2_EXTERNAL 0
++#define HAVE_MIPS32R6_EXTERNAL 0
++#define HAVE_MIPS64R6_EXTERNAL 0
++#define HAVE_MIPSDSP_EXTERNAL 0
++#define HAVE_MIPSDSPR2_EXTERNAL 0
++#define HAVE_MSA_EXTERNAL 0
++#define HAVE_MSA2_EXTERNAL 0
++#define HAVE_LOONGSON2_EXTERNAL 0
++#define HAVE_LOONGSON3_EXTERNAL 0
++#define HAVE_MMI_EXTERNAL 0
++#define HAVE_ARMV5TE_INLINE 0
++#define HAVE_ARMV6_INLINE 0
++#define HAVE_ARMV6T2_INLINE 0
++#define HAVE_ARMV8_INLINE 0
++#define HAVE_NEON_INLINE 0
++#define HAVE_VFP_INLINE 0
++#define HAVE_VFPV3_INLINE 0
++#define HAVE_SETEND_INLINE 0
++#define HAVE_ALTIVEC_INLINE 0
++#define HAVE_DCBZL_INLINE 0
++#define HAVE_LDBRX_INLINE 0
++#define HAVE_POWER8_INLINE 0
++#define HAVE_PPC4XX_INLINE 0
++#define HAVE_VSX_INLINE 0
++#define HAVE_AESNI_INLINE 0
++#define HAVE_AMD3DNOW_INLINE 0
++#define HAVE_AMD3DNOWEXT_INLINE 0
++#define HAVE_AVX_INLINE 0
++#define HAVE_AVX2_INLINE 0
++#define HAVE_AVX512_INLINE 0
++#define HAVE_FMA3_INLINE 0
++#define HAVE_FMA4_INLINE 0
++#define HAVE_MMX_INLINE 0
++#define HAVE_MMXEXT_INLINE 0
++#define HAVE_SSE_INLINE 0
++#define HAVE_SSE2_INLINE 0
++#define HAVE_SSE3_INLINE 0
++#define HAVE_SSE4_INLINE 0
++#define HAVE_SSE42_INLINE 0
++#define HAVE_SSSE3_INLINE 0
++#define HAVE_XOP_INLINE 0
++#define HAVE_CPUNOP_INLINE 0
++#define HAVE_I686_INLINE 0
++#define HAVE_MIPSFPU_INLINE 0
++#define HAVE_MIPS32R2_INLINE 0
++#define HAVE_MIPS32R5_INLINE 0
++#define HAVE_MIPS64R2_INLINE 0
++#define HAVE_MIPS32R6_INLINE 0
++#define HAVE_MIPS64R6_INLINE 0
++#define HAVE_MIPSDSP_INLINE 0
++#define HAVE_MIPSDSPR2_INLINE 0
++#define HAVE_MSA_INLINE 0
++#define HAVE_MSA2_INLINE 0
++#define HAVE_LOONGSON2_INLINE 0
++#define HAVE_LOONGSON3_INLINE 0
++#define HAVE_MMI_INLINE 0
++#define HAVE_ALIGNED_STACK 0
++#define HAVE_FAST_64BIT 0
++#define HAVE_FAST_CLZ 0
++#define HAVE_FAST_CMOV 0
++#define HAVE_LOCAL_ALIGNED 0
++#define HAVE_SIMD_ALIGN_16 0
++#define HAVE_SIMD_ALIGN_32 0
++#define HAVE_SIMD_ALIGN_64 0
++#define HAVE_ATOMIC_CAS_PTR 0
++#define HAVE_MACHINE_RW_BARRIER 0
++#define HAVE_MEMORYBARRIER 0
++#define HAVE_MM_EMPTY 0
++#define HAVE_RDTSC 0
++#define HAVE_SEM_TIMEDWAIT 1
++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
++#define HAVE_CABS 0
++#define HAVE_CEXP 0
++#define HAVE_INLINE_ASM 1
++#define HAVE_SYMVER 0
++#define HAVE_X86ASM 0
++#define HAVE_BIGENDIAN 0
++#define HAVE_FAST_UNALIGNED 0
++#define HAVE_ARPA_INET_H 0
++#define HAVE_ASM_TYPES_H 1
++#define HAVE_CDIO_PARANOIA_H 0
++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0
++#define HAVE_CUDA_H 0
++#define HAVE_DISPATCH_DISPATCH_H 0
++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
++#define HAVE_DEV_IC_BT8XX_H 0
++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
++#define HAVE_DIRECT_H 0
++#define HAVE_DIRENT_H 1
++#define HAVE_DXGIDEBUG_H 0
++#define HAVE_DXVA_H 0
++#define HAVE_ES2_GL_H 0
++#define HAVE_GSM_H 0
++#define HAVE_IO_H 0
++#define HAVE_LINUX_PERF_EVENT_H 1
++#define HAVE_MACHINE_IOCTL_BT848_H 0
++#define HAVE_MACHINE_IOCTL_METEOR_H 0
++#define HAVE_MALLOC_H 1
++#define HAVE_OPENCV2_CORE_CORE_C_H 0
++#define HAVE_OPENGL_GL3_H 0
++#define HAVE_POLL_H 1
++#define HAVE_SYS_PARAM_H 1
++#define HAVE_SYS_RESOURCE_H 1
++#define HAVE_SYS_SELECT_H 1
++#define HAVE_SYS_SOUNDCARD_H 1
++#define HAVE_SYS_TIME_H 1
++#define HAVE_SYS_UN_H 1
++#define HAVE_SYS_VIDEOIO_H 0
++#define HAVE_TERMIOS_H 1
++#define HAVE_UDPLITE_H 0
++#define HAVE_UNISTD_H 1
++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */
++#define HAVE_WINDOWS_H 0
++#define HAVE_WINSOCK2_H 0
++#define HAVE_INTRINSICS_NEON 0
++#define HAVE_ATANF 1
++#define HAVE_ATAN2F 1
++#define HAVE_CBRT 1
++#define HAVE_CBRTF 1
++#define HAVE_COPYSIGN 1
++#define HAVE_COSF 1
++#define HAVE_ERF 1
++#define HAVE_EXP2 1
++#define HAVE_EXP2F 1
++#define HAVE_EXPF 1
++#define HAVE_HYPOT 1
++#define HAVE_ISFINITE 1
++#define HAVE_ISINF 1
++#define HAVE_ISNAN 1
++#define HAVE_LDEXPF 1
++#define HAVE_LLRINT 1
++#define HAVE_LLRINTF 1
++#define HAVE_LOG2 1
++#define HAVE_LOG2F 1
++#define HAVE_LOG10F 1
++#define HAVE_LRINT 1
++#define HAVE_LRINTF 1
++#define HAVE_POWF 1
++#define HAVE_RINT 1
++#define HAVE_ROUND 1
++#define HAVE_ROUNDF 1
++#define HAVE_SINF 1
++#define HAVE_TRUNC 1
++#define HAVE_TRUNCF 1
++#define HAVE_DOS_PATHS 0
++#define HAVE_LIBC_MSVCRT 0
++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
++#define HAVE_SECTION_DATA_REL_RO 1
++#define HAVE_THREADS 1
++#define HAVE_UWP 0
++#define HAVE_WINRT 0
++#define HAVE_ACCESS 1
++#define HAVE_ALIGNED_MALLOC 0
++#define HAVE_ARC4RANDOM 0
++#define HAVE_CLOCK_GETTIME 1
++#define HAVE_CLOSESOCKET 0
++#define HAVE_COMMANDLINETOARGVW 0
++#define HAVE_FCNTL 1
++#define HAVE_GETADDRINFO 0
++#define HAVE_GETHRTIME 0
++#define HAVE_GETOPT 1
++#define HAVE_GETMODULEHANDLE 0
++#define HAVE_GETPROCESSAFFINITYMASK 0
++#define HAVE_GETPROCESSMEMORYINFO 0
++#define HAVE_GETPROCESSTIMES 0
++#define HAVE_GETRUSAGE 1
++#define HAVE_GETSTDHANDLE 0
++#define HAVE_GETSYSTEMTIMEASFILETIME 0
++#define HAVE_GETTIMEOFDAY 1
++#define HAVE_GLOB 1
++#define HAVE_GLXGETPROCADDRESS 0
++#define HAVE_GMTIME_R 1
++#define HAVE_INET_ATON 0
++#define HAVE_ISATTY 1
++#define HAVE_KBHIT 0
++#define HAVE_LOCALTIME_R 1
++#define HAVE_LSTAT 1
++#define HAVE_LZO1X_999_COMPRESS 0
++#define HAVE_MACH_ABSOLUTE_TIME 0
++#define HAVE_MAPVIEWOFFILE 0
++#define HAVE_MEMALIGN 1
++#define HAVE_MKSTEMP 1
++#define HAVE_MMAP 1
++#define HAVE_MPROTECT 1
++#define HAVE_NANOSLEEP 1
++#define HAVE_PEEKNAMEDPIPE 0
++#define HAVE_POSIX_MEMALIGN 1
++#define HAVE_PTHREAD_CANCEL 1
++#define HAVE_SCHED_GETAFFINITY 1
++#define HAVE_SECITEMIMPORT 0
++#define HAVE_SETCONSOLETEXTATTRIBUTE 0
++#define HAVE_SETCONSOLECTRLHANDLER 0
++#define HAVE_SETDLLDIRECTORY 0
++#define HAVE_SETMODE 0
++#define HAVE_SETRLIMIT 1
++#define HAVE_SLEEP 0
++#define HAVE_STRERROR_R 1
++#define HAVE_SYSCONF 1
++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */
++#define HAVE_USLEEP 1
++#define HAVE_UTGETOSTYPEFROMSTRING 0
++#define HAVE_VIRTUALALLOC 0
++#define HAVE_WGLGETPROCADDRESS 0
++#define HAVE_BCRYPT 0
++#define HAVE_VAAPI_DRM 0
++#define HAVE_VAAPI_X11 0
++#define HAVE_VDPAU_X11 0
++#define HAVE_PTHREADS 1
++#define HAVE_OS2THREADS 0
++#define HAVE_W32THREADS 0
++#define HAVE_AS_ARCH_DIRECTIVE 0
++#define HAVE_AS_DN_DIRECTIVE 0
++#define HAVE_AS_FPU_DIRECTIVE 0
++#define HAVE_AS_FUNC 0
++#define HAVE_AS_OBJECT_ARCH 0
++#define HAVE_ASM_MOD_Q 0
++#define HAVE_BLOCKS_EXTENSION 0
++#define HAVE_EBP_AVAILABLE 0
++#define HAVE_EBX_AVAILABLE 0
++#define HAVE_GNU_AS 0
++#define HAVE_GNU_WINDRES 0
++#define HAVE_IBM_ASM 0
++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
++#define HAVE_INLINE_ASM_LABELS 1
++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1
++#define HAVE_PRAGMA_DEPRECATED 1
++#define HAVE_RSYNC_CONTIMEOUT 0
++#define HAVE_SYMVER_ASM_LABEL 1
++#define HAVE_SYMVER_GNU_ASM 1
++#define HAVE_VFP_ARGS 0
++#define HAVE_XFORM_ASM 0
++#define HAVE_XMM_CLOBBERS 0
++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0
++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0
++#define HAVE_SOCKLEN_T 0
++#define HAVE_STRUCT_ADDRINFO 0
++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0
++#define HAVE_STRUCT_IP_MREQ_SOURCE 0
++#define HAVE_STRUCT_IPV6_MREQ 0
++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0
++#define HAVE_STRUCT_POLLFD 0
++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1
++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0
++#define HAVE_STRUCT_SOCKADDR_IN6 0
++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0
++#define HAVE_STRUCT_SOCKADDR_STORAGE 0
++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0
++#define HAVE_MAKEINFO 1
++#define HAVE_MAKEINFO_HTML 1
++#define HAVE_OPENCL_D3D11 0
++#define HAVE_OPENCL_DRM_ARM 0
++#define HAVE_OPENCL_DRM_BEIGNET 0
++#define HAVE_OPENCL_DXVA2 0
++#define HAVE_OPENCL_VAAPI_BEIGNET 0
++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
++#define HAVE_PERL 1
++#define HAVE_POD2MAN 1
++#define HAVE_TEXI2HTML 0
++#define CONFIG_DOC 0
++#define CONFIG_HTMLPAGES 0
++#define CONFIG_MANPAGES 0
++#define CONFIG_PODPAGES 0
++#define CONFIG_TXTPAGES 0
++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1
++#define CONFIG_AVIO_READING_EXAMPLE 1
++#define CONFIG_DECODE_AUDIO_EXAMPLE 1
++#define CONFIG_DECODE_VIDEO_EXAMPLE 1
++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1
++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1
++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1
++#define CONFIG_EXTRACT_MVS_EXAMPLE 1
++#define CONFIG_FILTER_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0
++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1
++#define CONFIG_HW_DECODE_EXAMPLE 1
++#define CONFIG_METADATA_EXAMPLE 1
++#define CONFIG_MUXING_EXAMPLE 0
++#define CONFIG_QSVDEC_EXAMPLE 0
++#define CONFIG_REMUXING_EXAMPLE 1
++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0
++#define CONFIG_SCALING_VIDEO_EXAMPLE 0
++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0
++#define CONFIG_TRANSCODING_EXAMPLE 0
++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
++#define CONFIG_AVISYNTH 0
++#define CONFIG_FREI0R 0
++#define CONFIG_LIBCDIO 0
++#define CONFIG_LIBDAVS2 0
++#define CONFIG_LIBRUBBERBAND 0
++#define CONFIG_LIBVIDSTAB 0
++#define CONFIG_LIBX264 0
++#define CONFIG_LIBX265 0
++#define CONFIG_LIBXAVS 0
++#define CONFIG_LIBXAVS2 0
++#define CONFIG_LIBXVID 0
++#define CONFIG_DECKLINK 0
++#define CONFIG_LIBFDK_AAC 0
++#define CONFIG_OPENSSL 0
++#define CONFIG_LIBTLS 0
++#define CONFIG_GMP 0
++#define CONFIG_LIBARIBB24 0
++#define CONFIG_LIBLENSFUN 0
++#define CONFIG_LIBOPENCORE_AMRNB 0
++#define CONFIG_LIBOPENCORE_AMRWB 0
++#define CONFIG_LIBVMAF 0
++#define CONFIG_LIBVO_AMRWBENC 0
++#define CONFIG_MBEDTLS 0
++#define CONFIG_RKMPP 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_CHROMAPRINT 0
++#define CONFIG_GCRYPT 0
++#define CONFIG_GNUTLS 0
++#define CONFIG_JNI 0
++#define CONFIG_LADSPA 0
++#define CONFIG_LIBAOM 0
++#define CONFIG_LIBASS 0
++#define CONFIG_LIBBLURAY 0
++#define CONFIG_LIBBS2B 0
++#define CONFIG_LIBCACA 0
++#define CONFIG_LIBCELT 0
++#define CONFIG_LIBCODEC2 0
++#define CONFIG_LIBDAV1D 0
++#define CONFIG_LIBDC1394 0
++#define CONFIG_LIBDRM 0
++#define CONFIG_LIBFLITE 0
++#define CONFIG_LIBFONTCONFIG 0
++#define CONFIG_LIBFREETYPE 0
++#define CONFIG_LIBFRIBIDI 0
++#define CONFIG_LIBGLSLANG 0
++#define CONFIG_LIBGME 0
++#define CONFIG_LIBGSM 0
++#define CONFIG_LIBIEC61883 0
++#define CONFIG_LIBILBC 0
++#define CONFIG_LIBJACK 0
++#define CONFIG_LIBKLVANC 0
++#define CONFIG_LIBKVAZAAR 0
++#define CONFIG_LIBMODPLUG 0
++#define CONFIG_LIBMP3LAME 0
++#define CONFIG_LIBMYSOFA 0
++#define CONFIG_LIBOPENCV 0
++#define CONFIG_LIBOPENH264 0
++#define CONFIG_LIBOPENJPEG 0
++#define CONFIG_LIBOPENMPT 0
++#define CONFIG_LIBOPUS 1
++#define CONFIG_LIBPULSE 0
++#define CONFIG_LIBRABBITMQ 0
++#define CONFIG_LIBRAV1E 0
++#define CONFIG_LIBRSVG 0
++#define CONFIG_LIBRTMP 0
++#define CONFIG_LIBSHINE 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_LIBSNAPPY 0
++#define CONFIG_LIBSOXR 0
++#define CONFIG_LIBSPEEX 0
++#define CONFIG_LIBSRT 0
++#define CONFIG_LIBSSH 0
++#define CONFIG_LIBTENSORFLOW 0
++#define CONFIG_LIBTESSERACT 0
++#define CONFIG_LIBTHEORA 0
++#define CONFIG_LIBTWOLAME 0
++#define CONFIG_LIBV4L2 0
++#define CONFIG_LIBVORBIS 0
++#define CONFIG_LIBVPX 0
++#define CONFIG_LIBWAVPACK 0
++#define CONFIG_LIBWEBP 0
++#define CONFIG_LIBXML2 0
++#define CONFIG_LIBZIMG 0
++#define CONFIG_LIBZMQ 0
++#define CONFIG_LIBZVBI 0
++#define CONFIG_LV2 0
++#define CONFIG_MEDIACODEC 0
++#define CONFIG_OPENAL 0
++#define CONFIG_OPENGL 0
++#define CONFIG_POCKETSPHINX 0
++#define CONFIG_VAPOURSYNTH 0
++#define CONFIG_ALSA 0
++#define CONFIG_APPKIT 0
++#define CONFIG_AVFOUNDATION 0
++#define CONFIG_BZLIB 0
++#define CONFIG_COREIMAGE 0
++#define CONFIG_ICONV 0
++#define CONFIG_LIBXCB 0
++#define CONFIG_LIBXCB_SHM 0
++#define CONFIG_LIBXCB_SHAPE 0
++#define CONFIG_LIBXCB_XFIXES 0
++#define CONFIG_LZMA 0
++#define CONFIG_SCHANNEL 0
++#define CONFIG_SDL2 0
++#define CONFIG_SECURETRANSPORT 0
++#define CONFIG_SNDIO 0
++#define CONFIG_XLIB 0
++#define CONFIG_ZLIB 0
++#define CONFIG_CUDA_NVCC 0
++#define CONFIG_CUDA_SDK 0
++#define CONFIG_LIBNPP 0
++#define CONFIG_LIBMFX 0
++#define CONFIG_MMAL 0
++#define CONFIG_OMX 0
++#define CONFIG_OPENCL 0
++#define CONFIG_VULKAN 0
++#define CONFIG_AMF 0
++#define CONFIG_AUDIOTOOLBOX 0
++#define CONFIG_CRYSTALHD 0
++#define CONFIG_CUDA 0
++#define CONFIG_CUDA_LLVM 0
++#define CONFIG_CUVID 0
++#define CONFIG_D3D11VA 0
++#define CONFIG_DXVA2 0
++#define CONFIG_FFNVCODEC 0
++#define CONFIG_NVDEC 0
++#define CONFIG_NVENC 0
++#define CONFIG_VAAPI 0
++#define CONFIG_VDPAU 0
++#define CONFIG_VIDEOTOOLBOX 0
++#define CONFIG_V4L2_M2M 0
++#define CONFIG_XVMC 0
++#define CONFIG_FTRAPV 0
++#define CONFIG_GRAY 0
++#define CONFIG_HARDCODED_TABLES 0
++#define CONFIG_OMX_RPI 0
++#define CONFIG_RUNTIME_CPUDETECT 1
++#define CONFIG_SAFE_BITSTREAM_READER 1
++#define CONFIG_SHARED 0
++#define CONFIG_SMALL 0
++#define CONFIG_STATIC 1
++#define CONFIG_SWSCALE_ALPHA 1
++#define CONFIG_GPL 0
++#define CONFIG_NONFREE 0
++#define CONFIG_VERSION3 0
++#define CONFIG_AVDEVICE 0
++#define CONFIG_AVFILTER 0
++#define CONFIG_SWSCALE 0
++#define CONFIG_POSTPROC 0
++#define CONFIG_AVFORMAT 1
++#define CONFIG_AVCODEC 1
++#define CONFIG_SWRESAMPLE 0
++#define CONFIG_AVRESAMPLE 0
++#define CONFIG_AVUTIL 1
++#define CONFIG_FFPLAY 0
++#define CONFIG_FFPROBE 0
++#define CONFIG_FFMPEG 0
++#define CONFIG_DCT 1
++#define CONFIG_DWT 0
++#define CONFIG_ERROR_RESILIENCE 1
++#define CONFIG_FAAN 0
++#define CONFIG_FAST_UNALIGNED 0
++#define CONFIG_FFT 1
++#define CONFIG_LSP 1
++#define CONFIG_LZO 0
++#define CONFIG_MDCT 1
++#define CONFIG_PIXELUTILS 0
++#define CONFIG_NETWORK 0
++#define CONFIG_RDFT 1
++#define CONFIG_AUTODETECT 0
++#define CONFIG_FONTCONFIG 0
++#define CONFIG_LARGE_TESTS 1
++#define CONFIG_LINUX_PERF 0
++#define CONFIG_MEMORY_POISONING 0
++#define CONFIG_NEON_CLOBBER_TEST 0
++#define CONFIG_OSSFUZZ 0
++#define CONFIG_PIC 1
++#define CONFIG_THUMB 0
++#define CONFIG_VALGRIND_BACKTRACE 0
++#define CONFIG_XMM_CLOBBER_TEST 0
++#define CONFIG_BSFS 1
++#define CONFIG_DECODERS 1
++#define CONFIG_ENCODERS 0
++#define CONFIG_HWACCELS 0
++#define CONFIG_PARSERS 1
++#define CONFIG_INDEVS 0
++#define CONFIG_OUTDEVS 0
++#define CONFIG_FILTERS 0
++#define CONFIG_DEMUXERS 1
++#define CONFIG_MUXERS 0
++#define CONFIG_PROTOCOLS 0
++#define CONFIG_AANDCTTABLES 0
++#define CONFIG_AC3DSP 0
++#define CONFIG_ADTS_HEADER 1
++#define CONFIG_AUDIO_FRAME_QUEUE 0
++#define CONFIG_AUDIODSP 0
++#define CONFIG_BLOCKDSP 1
++#define CONFIG_BSWAPDSP 0
++#define CONFIG_CABAC 1
++#define CONFIG_CBS 0
++#define CONFIG_CBS_AV1 0
++#define CONFIG_CBS_H264 0
++#define CONFIG_CBS_H265 0
++#define CONFIG_CBS_JPEG 0
++#define CONFIG_CBS_MPEG2 0
++#define CONFIG_CBS_VP9 0
++#define CONFIG_DIRAC_PARSE 1
++#define CONFIG_DNN 0
++#define CONFIG_DVPROFILE 0
++#define CONFIG_EXIF 1
++#define CONFIG_FAANDCT 0
++#define CONFIG_FAANIDCT 0
++#define CONFIG_FDCTDSP 1
++#define CONFIG_FLACDSP 1
++#define CONFIG_FMTCONVERT 0
++#define CONFIG_FRAME_THREAD_ENCODER 0
++#define CONFIG_G722DSP 0
++#define CONFIG_GOLOMB 1
++#define CONFIG_GPLV3 0
++#define CONFIG_H263DSP 1
++#define CONFIG_H264CHROMA 1
++#define CONFIG_H264DSP 1
++#define CONFIG_H264PARSE 1
++#define CONFIG_H264PRED 1
++#define CONFIG_H264QPEL 1
++#define CONFIG_HEVCPARSE 0
++#define CONFIG_HPELDSP 1
++#define CONFIG_HUFFMAN 0
++#define CONFIG_HUFFYUVDSP 0
++#define CONFIG_HUFFYUVENCDSP 0
++#define CONFIG_IDCTDSP 1
++#define CONFIG_IIRFILTER 0
++#define CONFIG_MDCT15 1
++#define CONFIG_INTRAX8 0
++#define CONFIG_ISO_MEDIA 1
++#define CONFIG_IVIDSP 0
++#define CONFIG_JPEGTABLES 0
++#define CONFIG_LGPLV3 0
++#define CONFIG_LIBX262 0
++#define CONFIG_LLAUDDSP 0
++#define CONFIG_LLVIDDSP 0
++#define CONFIG_LLVIDENCDSP 0
++#define CONFIG_LPC 0
++#define CONFIG_LZF 0
++#define CONFIG_ME_CMP 1
++#define CONFIG_MPEG_ER 1
++#define CONFIG_MPEGAUDIO 1
++#define CONFIG_MPEGAUDIODSP 1
++#define CONFIG_MPEGAUDIOHEADER 1
++#define CONFIG_MPEGVIDEO 1
++#define CONFIG_MPEGVIDEOENC 0
++#define CONFIG_MSS34DSP 0
++#define CONFIG_PIXBLOCKDSP 1
++#define CONFIG_QPELDSP 1
++#define CONFIG_QSV 0
++#define CONFIG_QSVDEC 0
++#define CONFIG_QSVENC 0
++#define CONFIG_QSVVPP 0
++#define CONFIG_RANGECODER 0
++#define CONFIG_RIFFDEC 1
++#define CONFIG_RIFFENC 0
++#define CONFIG_RTPDEC 0
++#define CONFIG_RTPENC_CHAIN 0
++#define CONFIG_RV34DSP 0
++#define CONFIG_SCENE_SAD 0
++#define CONFIG_SINEWIN 1
++#define CONFIG_SNAPPY 0
++#define CONFIG_SRTP 0
++#define CONFIG_STARTCODE 1
++#define CONFIG_TEXTUREDSP 0
++#define CONFIG_TEXTUREDSPENC 0
++#define CONFIG_TPELDSP 0
++#define CONFIG_VAAPI_1 0
++#define CONFIG_VAAPI_ENCODE 0
++#define CONFIG_VC1DSP 0
++#define CONFIG_VIDEODSP 1
++#define CONFIG_VP3DSP 1
++#define CONFIG_VP56DSP 0
++#define CONFIG_VP8DSP 1
++#define CONFIG_WMA_FREQS 0
++#define CONFIG_WMV2DSP 0
++#define CONFIG_AAC_ADTSTOASC_BSF 0
++#define CONFIG_AV1_FRAME_MERGE_BSF 0
++#define CONFIG_AV1_FRAME_SPLIT_BSF 0
++#define CONFIG_AV1_METADATA_BSF 0
++#define CONFIG_CHOMP_BSF 0
++#define CONFIG_DUMP_EXTRADATA_BSF 0
++#define CONFIG_DCA_CORE_BSF 0
++#define CONFIG_EAC3_CORE_BSF 0
++#define CONFIG_EXTRACT_EXTRADATA_BSF 0
++#define CONFIG_FILTER_UNITS_BSF 0
++#define CONFIG_H264_METADATA_BSF 0
++#define CONFIG_H264_MP4TOANNEXB_BSF 0
++#define CONFIG_H264_REDUNDANT_PPS_BSF 0
++#define CONFIG_HAPQA_EXTRACT_BSF 0
++#define CONFIG_HEVC_METADATA_BSF 0
++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0
++#define CONFIG_IMX_DUMP_HEADER_BSF 0
++#define CONFIG_MJPEG2JPEG_BSF 0
++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
++#define CONFIG_MPEG2_METADATA_BSF 0
++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
++#define CONFIG_MOV2TEXTSUB_BSF 0
++#define CONFIG_NOISE_BSF 0
++#define CONFIG_NULL_BSF 1
++#define CONFIG_PRORES_METADATA_BSF 0
++#define CONFIG_REMOVE_EXTRADATA_BSF 0
++#define CONFIG_TEXT2MOVSUB_BSF 0
++#define CONFIG_TRACE_HEADERS_BSF 0
++#define CONFIG_TRUEHD_CORE_BSF 0
++#define CONFIG_VP9_METADATA_BSF 0
++#define CONFIG_VP9_RAW_REORDER_BSF 0
++#define CONFIG_VP9_SUPERFRAME_BSF 0
++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
++#define CONFIG_AASC_DECODER 0
++#define CONFIG_AIC_DECODER 0
++#define CONFIG_ALIAS_PIX_DECODER 0
++#define CONFIG_AGM_DECODER 0
++#define CONFIG_AMV_DECODER 0
++#define CONFIG_ANM_DECODER 0
++#define CONFIG_ANSI_DECODER 0
++#define CONFIG_APNG_DECODER 0
++#define CONFIG_ARBC_DECODER 0
++#define CONFIG_ASV1_DECODER 0
++#define CONFIG_ASV2_DECODER 0
++#define CONFIG_AURA_DECODER 0
++#define CONFIG_AURA2_DECODER 0
++#define CONFIG_AVRP_DECODER 0
++#define CONFIG_AVRN_DECODER 0
++#define CONFIG_AVS_DECODER 0
++#define CONFIG_AVUI_DECODER 0
++#define CONFIG_AYUV_DECODER 0
++#define CONFIG_BETHSOFTVID_DECODER 0
++#define CONFIG_BFI_DECODER 0
++#define CONFIG_BINK_DECODER 0
++#define CONFIG_BITPACKED_DECODER 0
++#define CONFIG_BMP_DECODER 0
++#define CONFIG_BMV_VIDEO_DECODER 0
++#define CONFIG_BRENDER_PIX_DECODER 0
++#define CONFIG_C93_DECODER 0
++#define CONFIG_CAVS_DECODER 0
++#define CONFIG_CDGRAPHICS_DECODER 0
++#define CONFIG_CDTOONS_DECODER 0
++#define CONFIG_CDXL_DECODER 0
++#define CONFIG_CFHD_DECODER 0
++#define CONFIG_CINEPAK_DECODER 0
++#define CONFIG_CLEARVIDEO_DECODER 0
++#define CONFIG_CLJR_DECODER 0
++#define CONFIG_CLLC_DECODER 0
++#define CONFIG_COMFORTNOISE_DECODER 0
++#define CONFIG_CPIA_DECODER 0
++#define CONFIG_CSCD_DECODER 0
++#define CONFIG_CYUV_DECODER 0
++#define CONFIG_DDS_DECODER 0
++#define CONFIG_DFA_DECODER 0
++#define CONFIG_DIRAC_DECODER 0
++#define CONFIG_DNXHD_DECODER 0
++#define CONFIG_DPX_DECODER 0
++#define CONFIG_DSICINVIDEO_DECODER 0
++#define CONFIG_DVAUDIO_DECODER 0
++#define CONFIG_DVVIDEO_DECODER 0
++#define CONFIG_DXA_DECODER 0
++#define CONFIG_DXTORY_DECODER 0
++#define CONFIG_DXV_DECODER 0
++#define CONFIG_EACMV_DECODER 0
++#define CONFIG_EAMAD_DECODER 0
++#define CONFIG_EATGQ_DECODER 0
++#define CONFIG_EATGV_DECODER 0
++#define CONFIG_EATQI_DECODER 0
++#define CONFIG_EIGHTBPS_DECODER 0
++#define CONFIG_EIGHTSVX_EXP_DECODER 0
++#define CONFIG_EIGHTSVX_FIB_DECODER 0
++#define CONFIG_ESCAPE124_DECODER 0
++#define CONFIG_ESCAPE130_DECODER 0
++#define CONFIG_EXR_DECODER 0
++#define CONFIG_FFV1_DECODER 0
++#define CONFIG_FFVHUFF_DECODER 0
++#define CONFIG_FIC_DECODER 0
++#define CONFIG_FITS_DECODER 0
++#define CONFIG_FLASHSV_DECODER 0
++#define CONFIG_FLASHSV2_DECODER 0
++#define CONFIG_FLIC_DECODER 0
++#define CONFIG_FLV_DECODER 0
++#define CONFIG_FMVC_DECODER 0
++#define CONFIG_FOURXM_DECODER 0
++#define CONFIG_FRAPS_DECODER 0
++#define CONFIG_FRWU_DECODER 0
++#define CONFIG_G2M_DECODER 0
++#define CONFIG_GDV_DECODER 0
++#define CONFIG_GIF_DECODER 0
++#define CONFIG_H261_DECODER 0
++#define CONFIG_H263_DECODER 1
++#define CONFIG_H263I_DECODER 0
++#define CONFIG_H263P_DECODER 0
++#define CONFIG_H263_V4L2M2M_DECODER 0
++#define CONFIG_H264_DECODER 1
++#define CONFIG_H264_CRYSTALHD_DECODER 0
++#define CONFIG_H264_V4L2M2M_DECODER 0
++#define CONFIG_H264_MEDIACODEC_DECODER 0
++#define CONFIG_H264_MMAL_DECODER 0
++#define CONFIG_H264_QSV_DECODER 0
++#define CONFIG_H264_RKMPP_DECODER 0
++#define CONFIG_HAP_DECODER 0
++#define CONFIG_HEVC_DECODER 0
++#define CONFIG_HEVC_QSV_DECODER 0
++#define CONFIG_HEVC_RKMPP_DECODER 0
++#define CONFIG_HEVC_V4L2M2M_DECODER 0
++#define CONFIG_HNM4_VIDEO_DECODER 0
++#define CONFIG_HQ_HQA_DECODER 0
++#define CONFIG_HQX_DECODER 0
++#define CONFIG_HUFFYUV_DECODER 0
++#define CONFIG_HYMT_DECODER 0
++#define CONFIG_IDCIN_DECODER 0
++#define CONFIG_IFF_ILBM_DECODER 0
++#define CONFIG_IMM4_DECODER 0
++#define CONFIG_IMM5_DECODER 0
++#define CONFIG_INDEO2_DECODER 0
++#define CONFIG_INDEO3_DECODER 0
++#define CONFIG_INDEO4_DECODER 0
++#define CONFIG_INDEO5_DECODER 0
++#define CONFIG_INTERPLAY_VIDEO_DECODER 0
++#define CONFIG_JPEG2000_DECODER 0
++#define CONFIG_JPEGLS_DECODER 0
++#define CONFIG_JV_DECODER 0
++#define CONFIG_KGV1_DECODER 0
++#define CONFIG_KMVC_DECODER 0
++#define CONFIG_LAGARITH_DECODER 0
++#define CONFIG_LOCO_DECODER 0
++#define CONFIG_LSCR_DECODER 0
++#define CONFIG_M101_DECODER 0
++#define CONFIG_MAGICYUV_DECODER 0
++#define CONFIG_MDEC_DECODER 0
++#define CONFIG_MIMIC_DECODER 0
++#define CONFIG_MJPEG_DECODER 0
++#define CONFIG_MJPEGB_DECODER 0
++#define CONFIG_MMVIDEO_DECODER 0
++#define CONFIG_MOTIONPIXELS_DECODER 0
++#define CONFIG_MPEG1VIDEO_DECODER 0
++#define CONFIG_MPEG2VIDEO_DECODER 0
++#define CONFIG_MPEG4_DECODER 1
++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG4_V4L2M2M_DECODER 0
++#define CONFIG_MPEG4_MMAL_DECODER 0
++#define CONFIG_MPEGVIDEO_DECODER 0
++#define CONFIG_MPEG1_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_MMAL_DECODER 0
++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG2_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_QSV_DECODER 0
++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0
++#define CONFIG_MSA1_DECODER 0
++#define CONFIG_MSCC_DECODER 0
++#define CONFIG_MSMPEG4V1_DECODER 0
++#define CONFIG_MSMPEG4V2_DECODER 0
++#define CONFIG_MSMPEG4V3_DECODER 0
++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MSRLE_DECODER 0
++#define CONFIG_MSS1_DECODER 0
++#define CONFIG_MSS2_DECODER 0
++#define CONFIG_MSVIDEO1_DECODER 0
++#define CONFIG_MSZH_DECODER 0
++#define CONFIG_MTS2_DECODER 0
++#define CONFIG_MV30_DECODER 0
++#define CONFIG_MVC1_DECODER 0
++#define CONFIG_MVC2_DECODER 0
++#define CONFIG_MVDV_DECODER 0
++#define CONFIG_MVHA_DECODER 0
++#define CONFIG_MWSC_DECODER 0
++#define CONFIG_MXPEG_DECODER 0
++#define CONFIG_NUV_DECODER 0
++#define CONFIG_PAF_VIDEO_DECODER 0
++#define CONFIG_PAM_DECODER 0
++#define CONFIG_PBM_DECODER 0
++#define CONFIG_PCX_DECODER 0
++#define CONFIG_PGM_DECODER 0
++#define CONFIG_PGMYUV_DECODER 0
++#define CONFIG_PICTOR_DECODER 0
++#define CONFIG_PIXLET_DECODER 0
++#define CONFIG_PNG_DECODER 0
++#define CONFIG_PPM_DECODER 0
++#define CONFIG_PRORES_DECODER 0
++#define CONFIG_PROSUMER_DECODER 0
++#define CONFIG_PSD_DECODER 0
++#define CONFIG_PTX_DECODER 0
++#define CONFIG_QDRAW_DECODER 0
++#define CONFIG_QPEG_DECODER 0
++#define CONFIG_QTRLE_DECODER 0
++#define CONFIG_R10K_DECODER 0
++#define CONFIG_R210_DECODER 0
++#define CONFIG_RASC_DECODER 0
++#define CONFIG_RAWVIDEO_DECODER 0
++#define CONFIG_RL2_DECODER 0
++#define CONFIG_ROQ_DECODER 0
++#define CONFIG_RPZA_DECODER 0
++#define CONFIG_RSCC_DECODER 0
++#define CONFIG_RV10_DECODER 0
++#define CONFIG_RV20_DECODER 0
++#define CONFIG_RV30_DECODER 0
++#define CONFIG_RV40_DECODER 0
++#define CONFIG_S302M_DECODER 0
++#define CONFIG_SANM_DECODER 0
++#define CONFIG_SCPR_DECODER 0
++#define CONFIG_SCREENPRESSO_DECODER 0
++#define CONFIG_SGI_DECODER 0
++#define CONFIG_SGIRLE_DECODER 0
++#define CONFIG_SHEERVIDEO_DECODER 0
++#define CONFIG_SMACKER_DECODER 0
++#define CONFIG_SMC_DECODER 0
++#define CONFIG_SMVJPEG_DECODER 0
++#define CONFIG_SNOW_DECODER 0
++#define CONFIG_SP5X_DECODER 0
++#define CONFIG_SPEEDHQ_DECODER 0
++#define CONFIG_SRGC_DECODER 0
++#define CONFIG_SUNRAST_DECODER 0
++#define CONFIG_SVQ1_DECODER 0
++#define CONFIG_SVQ3_DECODER 0
++#define CONFIG_TARGA_DECODER 0
++#define CONFIG_TARGA_Y216_DECODER 0
++#define CONFIG_TDSC_DECODER 0
++#define CONFIG_THEORA_DECODER 1
++#define CONFIG_THP_DECODER 0
++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0
++#define CONFIG_TIFF_DECODER 0
++#define CONFIG_TMV_DECODER 0
++#define CONFIG_TRUEMOTION1_DECODER 0
++#define CONFIG_TRUEMOTION2_DECODER 0
++#define CONFIG_TRUEMOTION2RT_DECODER 0
++#define CONFIG_TSCC_DECODER 0
++#define CONFIG_TSCC2_DECODER 0
++#define CONFIG_TXD_DECODER 0
++#define CONFIG_ULTI_DECODER 0
++#define CONFIG_UTVIDEO_DECODER 0
++#define CONFIG_V210_DECODER 0
++#define CONFIG_V210X_DECODER 0
++#define CONFIG_V308_DECODER 0
++#define CONFIG_V408_DECODER 0
++#define CONFIG_V410_DECODER 0
++#define CONFIG_VB_DECODER 0
++#define CONFIG_VBLE_DECODER 0
++#define CONFIG_VC1_DECODER 0
++#define CONFIG_VC1_CRYSTALHD_DECODER 0
++#define CONFIG_VC1IMAGE_DECODER 0
++#define CONFIG_VC1_MMAL_DECODER 0
++#define CONFIG_VC1_QSV_DECODER 0
++#define CONFIG_VC1_V4L2M2M_DECODER 0
++#define CONFIG_VCR1_DECODER 0
++#define CONFIG_VMDVIDEO_DECODER 0
++#define CONFIG_VMNC_DECODER 0
++#define CONFIG_VP3_DECODER 1
++#define CONFIG_VP4_DECODER 0
++#define CONFIG_VP5_DECODER 0
++#define CONFIG_VP6_DECODER 0
++#define CONFIG_VP6A_DECODER 0
++#define CONFIG_VP6F_DECODER 0
++#define CONFIG_VP7_DECODER 0
++#define CONFIG_VP8_DECODER 1
++#define CONFIG_VP8_RKMPP_DECODER 0
++#define CONFIG_VP8_V4L2M2M_DECODER 0
++#define CONFIG_VP9_DECODER 0
++#define CONFIG_VP9_RKMPP_DECODER 0
++#define CONFIG_VP9_V4L2M2M_DECODER 0
++#define CONFIG_VQA_DECODER 0
++#define CONFIG_WEBP_DECODER 0
++#define CONFIG_WCMV_DECODER 0
++#define CONFIG_WRAPPED_AVFRAME_DECODER 0
++#define CONFIG_WMV1_DECODER 0
++#define CONFIG_WMV2_DECODER 0
++#define CONFIG_WMV3_DECODER 0
++#define CONFIG_WMV3_CRYSTALHD_DECODER 0
++#define CONFIG_WMV3IMAGE_DECODER 0
++#define CONFIG_WNV1_DECODER 0
++#define CONFIG_XAN_WC3_DECODER 0
++#define CONFIG_XAN_WC4_DECODER 0
++#define CONFIG_XBM_DECODER 0
++#define CONFIG_XFACE_DECODER 0
++#define CONFIG_XL_DECODER 0
++#define CONFIG_XPM_DECODER 0
++#define CONFIG_XWD_DECODER 0
++#define CONFIG_Y41P_DECODER 0
++#define CONFIG_YLC_DECODER 0
++#define CONFIG_YOP_DECODER 0
++#define CONFIG_YUV4_DECODER 0
++#define CONFIG_ZERO12V_DECODER 0
++#define CONFIG_ZEROCODEC_DECODER 0
++#define CONFIG_ZLIB_DECODER 0
++#define CONFIG_ZMBV_DECODER 0
++#define CONFIG_AAC_DECODER 1
++#define CONFIG_AAC_FIXED_DECODER 0
++#define CONFIG_AAC_LATM_DECODER 0
++#define CONFIG_AC3_DECODER 0
++#define CONFIG_AC3_FIXED_DECODER 0
++#define CONFIG_ACELP_KELVIN_DECODER 0
++#define CONFIG_ALAC_DECODER 0
++#define CONFIG_ALS_DECODER 0
++#define CONFIG_AMRNB_DECODER 1
++#define CONFIG_AMRWB_DECODER 1
++#define CONFIG_APE_DECODER 0
++#define CONFIG_APTX_DECODER 0
++#define CONFIG_APTX_HD_DECODER 0
++#define CONFIG_ATRAC1_DECODER 0
++#define CONFIG_ATRAC3_DECODER 0
++#define CONFIG_ATRAC3AL_DECODER 0
++#define CONFIG_ATRAC3P_DECODER 0
++#define CONFIG_ATRAC3PAL_DECODER 0
++#define CONFIG_ATRAC9_DECODER 0
++#define CONFIG_BINKAUDIO_DCT_DECODER 0
++#define CONFIG_BINKAUDIO_RDFT_DECODER 0
++#define CONFIG_BMV_AUDIO_DECODER 0
++#define CONFIG_COOK_DECODER 0
++#define CONFIG_DCA_DECODER 0
++#define CONFIG_DOLBY_E_DECODER 0
++#define CONFIG_DSD_LSBF_DECODER 0
++#define CONFIG_DSD_MSBF_DECODER 0
++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0
++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0
++#define CONFIG_DSICINAUDIO_DECODER 0
++#define CONFIG_DSS_SP_DECODER 0
++#define CONFIG_DST_DECODER 0
++#define CONFIG_EAC3_DECODER 0
++#define CONFIG_EVRC_DECODER 0
++#define CONFIG_FFWAVESYNTH_DECODER 0
++#define CONFIG_FLAC_DECODER 1
++#define CONFIG_G723_1_DECODER 0
++#define CONFIG_G729_DECODER 0
++#define CONFIG_GSM_DECODER 0
++#define CONFIG_GSM_MS_DECODER 1
++#define CONFIG_HCA_DECODER 0
++#define CONFIG_HCOM_DECODER 0
++#define CONFIG_IAC_DECODER 0
++#define CONFIG_ILBC_DECODER 0
++#define CONFIG_IMC_DECODER 0
++#define CONFIG_INTERPLAY_ACM_DECODER 0
++#define CONFIG_MACE3_DECODER 0
++#define CONFIG_MACE6_DECODER 0
++#define CONFIG_METASOUND_DECODER 0
++#define CONFIG_MLP_DECODER 0
++#define CONFIG_MP1_DECODER 0
++#define CONFIG_MP1FLOAT_DECODER 0
++#define CONFIG_MP2_DECODER 0
++#define CONFIG_MP2FLOAT_DECODER 0
++#define CONFIG_MP3FLOAT_DECODER 0
++#define CONFIG_MP3_DECODER 1
++#define CONFIG_MP3ADUFLOAT_DECODER 0
++#define CONFIG_MP3ADU_DECODER 0
++#define CONFIG_MP3ON4FLOAT_DECODER 0
++#define CONFIG_MP3ON4_DECODER 0
++#define CONFIG_MPC7_DECODER 0
++#define CONFIG_MPC8_DECODER 0
++#define CONFIG_NELLYMOSER_DECODER 0
++#define CONFIG_ON2AVC_DECODER 0
++#define CONFIG_OPUS_DECODER 0
++#define CONFIG_PAF_AUDIO_DECODER 0
++#define CONFIG_QCELP_DECODER 0
++#define CONFIG_QDM2_DECODER 0
++#define CONFIG_QDMC_DECODER 0
++#define CONFIG_RA_144_DECODER 0
++#define CONFIG_RA_288_DECODER 0
++#define CONFIG_RALF_DECODER 0
++#define CONFIG_SBC_DECODER 0
++#define CONFIG_SHORTEN_DECODER 0
++#define CONFIG_SIPR_DECODER 0
++#define CONFIG_SIREN_DECODER 0
++#define CONFIG_SMACKAUD_DECODER 0
++#define CONFIG_SONIC_DECODER 0
++#define CONFIG_TAK_DECODER 0
++#define CONFIG_TRUEHD_DECODER 0
++#define CONFIG_TRUESPEECH_DECODER 0
++#define CONFIG_TTA_DECODER 0
++#define CONFIG_TWINVQ_DECODER 0
++#define CONFIG_VMDAUDIO_DECODER 0
++#define CONFIG_VORBIS_DECODER 1
++#define CONFIG_WAVPACK_DECODER 0
++#define CONFIG_WMALOSSLESS_DECODER 0
++#define CONFIG_WMAPRO_DECODER 0
++#define CONFIG_WMAV1_DECODER 0
++#define CONFIG_WMAV2_DECODER 0
++#define CONFIG_WMAVOICE_DECODER 0
++#define CONFIG_WS_SND1_DECODER 0
++#define CONFIG_XMA1_DECODER 0
++#define CONFIG_XMA2_DECODER 0
++#define CONFIG_PCM_ALAW_DECODER 1
++#define CONFIG_PCM_BLURAY_DECODER 0
++#define CONFIG_PCM_DVD_DECODER 0
++#define CONFIG_PCM_F16LE_DECODER 0
++#define CONFIG_PCM_F24LE_DECODER 0
++#define CONFIG_PCM_F32BE_DECODER 0
++#define CONFIG_PCM_F32LE_DECODER 1
++#define CONFIG_PCM_F64BE_DECODER 0
++#define CONFIG_PCM_F64LE_DECODER 0
++#define CONFIG_PCM_LXF_DECODER 0
++#define CONFIG_PCM_MULAW_DECODER 1
++#define CONFIG_PCM_S8_DECODER 0
++#define CONFIG_PCM_S8_PLANAR_DECODER 0
++#define CONFIG_PCM_S16BE_DECODER 1
++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0
++#define CONFIG_PCM_S16LE_DECODER 1
++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S24BE_DECODER 1
++#define CONFIG_PCM_S24DAUD_DECODER 0
++#define CONFIG_PCM_S24LE_DECODER 1
++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S32BE_DECODER 0
++#define CONFIG_PCM_S32LE_DECODER 1
++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S64BE_DECODER 0
++#define CONFIG_PCM_S64LE_DECODER 0
++#define CONFIG_PCM_U8_DECODER 1
++#define CONFIG_PCM_U16BE_DECODER 0
++#define CONFIG_PCM_U16LE_DECODER 0
++#define CONFIG_PCM_U24BE_DECODER 0
++#define CONFIG_PCM_U24LE_DECODER 0
++#define CONFIG_PCM_U32BE_DECODER 0
++#define CONFIG_PCM_U32LE_DECODER 0
++#define CONFIG_PCM_VIDC_DECODER 0
++#define CONFIG_DERF_DPCM_DECODER 0
++#define CONFIG_GREMLIN_DPCM_DECODER 0
++#define CONFIG_INTERPLAY_DPCM_DECODER 0
++#define CONFIG_ROQ_DPCM_DECODER 0
++#define CONFIG_SDX2_DPCM_DECODER 0
++#define CONFIG_SOL_DPCM_DECODER 0
++#define CONFIG_XAN_DPCM_DECODER 0
++#define CONFIG_ADPCM_4XM_DECODER 0
++#define CONFIG_ADPCM_ADX_DECODER 0
++#define CONFIG_ADPCM_AFC_DECODER 0
++#define CONFIG_ADPCM_AGM_DECODER 0
++#define CONFIG_ADPCM_AICA_DECODER 0
++#define CONFIG_ADPCM_ARGO_DECODER 0
++#define CONFIG_ADPCM_CT_DECODER 0
++#define CONFIG_ADPCM_DTK_DECODER 0
++#define CONFIG_ADPCM_EA_DECODER 0
++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0
++#define CONFIG_ADPCM_EA_R1_DECODER 0
++#define CONFIG_ADPCM_EA_R2_DECODER 0
++#define CONFIG_ADPCM_EA_R3_DECODER 0
++#define CONFIG_ADPCM_EA_XAS_DECODER 0
++#define CONFIG_ADPCM_G722_DECODER 0
++#define CONFIG_ADPCM_G726_DECODER 0
++#define CONFIG_ADPCM_G726LE_DECODER 0
++#define CONFIG_ADPCM_IMA_AMV_DECODER 0
++#define CONFIG_ADPCM_IMA_ALP_DECODER 0
++#define CONFIG_ADPCM_IMA_APC_DECODER 0
++#define CONFIG_ADPCM_IMA_APM_DECODER 0
++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0
++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0
++#define CONFIG_ADPCM_IMA_DK3_DECODER 0
++#define CONFIG_ADPCM_IMA_DK4_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0
++#define CONFIG_ADPCM_IMA_ISS_DECODER 0
++#define CONFIG_ADPCM_IMA_MTF_DECODER 0
++#define CONFIG_ADPCM_IMA_OKI_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_DECODER 0
++#define CONFIG_ADPCM_IMA_RAD_DECODER 0
++#define CONFIG_ADPCM_IMA_SSI_DECODER 0
++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0
++#define CONFIG_ADPCM_IMA_WAV_DECODER 0
++#define CONFIG_ADPCM_IMA_WS_DECODER 0
++#define CONFIG_ADPCM_MS_DECODER 0
++#define CONFIG_ADPCM_MTAF_DECODER 0
++#define CONFIG_ADPCM_PSX_DECODER 0
++#define CONFIG_ADPCM_SBPRO_2_DECODER 0
++#define CONFIG_ADPCM_SBPRO_3_DECODER 0
++#define CONFIG_ADPCM_SBPRO_4_DECODER 0
++#define CONFIG_ADPCM_SWF_DECODER 0
++#define CONFIG_ADPCM_THP_DECODER 0
++#define CONFIG_ADPCM_THP_LE_DECODER 0
++#define CONFIG_ADPCM_VIMA_DECODER 0
++#define CONFIG_ADPCM_XA_DECODER 0
++#define CONFIG_ADPCM_YAMAHA_DECODER 0
++#define CONFIG_ADPCM_ZORK_DECODER 0
++#define CONFIG_SSA_DECODER 0
++#define CONFIG_ASS_DECODER 0
++#define CONFIG_CCAPTION_DECODER 0
++#define CONFIG_DVBSUB_DECODER 0
++#define CONFIG_DVDSUB_DECODER 0
++#define CONFIG_JACOSUB_DECODER 0
++#define CONFIG_MICRODVD_DECODER 0
++#define CONFIG_MOVTEXT_DECODER 0
++#define CONFIG_MPL2_DECODER 0
++#define CONFIG_PGSSUB_DECODER 0
++#define CONFIG_PJS_DECODER 0
++#define CONFIG_REALTEXT_DECODER 0
++#define CONFIG_SAMI_DECODER 0
++#define CONFIG_SRT_DECODER 0
++#define CONFIG_STL_DECODER 0
++#define CONFIG_SUBRIP_DECODER 0
++#define CONFIG_SUBVIEWER_DECODER 0
++#define CONFIG_SUBVIEWER1_DECODER 0
++#define CONFIG_TEXT_DECODER 0
++#define CONFIG_VPLAYER_DECODER 0
++#define CONFIG_WEBVTT_DECODER 0
++#define CONFIG_XSUB_DECODER 0
++#define CONFIG_AAC_AT_DECODER 0
++#define CONFIG_AC3_AT_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0
++#define CONFIG_ALAC_AT_DECODER 0
++#define CONFIG_AMR_NB_AT_DECODER 0
++#define CONFIG_EAC3_AT_DECODER 0
++#define CONFIG_GSM_MS_AT_DECODER 0
++#define CONFIG_ILBC_AT_DECODER 0
++#define CONFIG_MP1_AT_DECODER 0
++#define CONFIG_MP2_AT_DECODER 0
++#define CONFIG_MP3_AT_DECODER 0
++#define CONFIG_PCM_ALAW_AT_DECODER 0
++#define CONFIG_PCM_MULAW_AT_DECODER 0
++#define CONFIG_QDMC_AT_DECODER 0
++#define CONFIG_QDM2_AT_DECODER 0
++#define CONFIG_LIBARIBB24_DECODER 0
++#define CONFIG_LIBCELT_DECODER 0
++#define CONFIG_LIBCODEC2_DECODER 0
++#define CONFIG_LIBDAV1D_DECODER 0
++#define CONFIG_LIBDAVS2_DECODER 0
++#define CONFIG_LIBFDK_AAC_DECODER 0
++#define CONFIG_LIBGSM_DECODER 0
++#define CONFIG_LIBGSM_MS_DECODER 0
++#define CONFIG_LIBILBC_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0
++#define CONFIG_LIBOPENJPEG_DECODER 0
++#define CONFIG_LIBOPUS_DECODER 1
++#define CONFIG_LIBRSVG_DECODER 0
++#define CONFIG_LIBSPEEX_DECODER 0
++#define CONFIG_LIBVORBIS_DECODER 0
++#define CONFIG_LIBVPX_VP8_DECODER 0
++#define CONFIG_LIBVPX_VP9_DECODER 0
++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0
++#define CONFIG_BINTEXT_DECODER 0
++#define CONFIG_XBIN_DECODER 0
++#define CONFIG_IDF_DECODER 0
++#define CONFIG_LIBAOM_AV1_DECODER 0
++#define CONFIG_LIBOPENH264_DECODER 0
++#define CONFIG_H264_CUVID_DECODER 0
++#define CONFIG_HEVC_CUVID_DECODER 0
++#define CONFIG_HEVC_MEDIACODEC_DECODER 0
++#define CONFIG_MJPEG_CUVID_DECODER 0
++#define CONFIG_MJPEG_QSV_DECODER 0
++#define CONFIG_MPEG1_CUVID_DECODER 0
++#define CONFIG_MPEG2_CUVID_DECODER 0
++#define CONFIG_MPEG4_CUVID_DECODER 0
++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0
++#define CONFIG_VC1_CUVID_DECODER 0
++#define CONFIG_VP8_CUVID_DECODER 0
++#define CONFIG_VP8_MEDIACODEC_DECODER 0
++#define CONFIG_VP8_QSV_DECODER 0
++#define CONFIG_VP9_CUVID_DECODER 0
++#define CONFIG_VP9_MEDIACODEC_DECODER 0
++#define CONFIG_VP9_QSV_DECODER 0
++#define CONFIG_A64MULTI_ENCODER 0
++#define CONFIG_A64MULTI5_ENCODER 0
++#define CONFIG_ALIAS_PIX_ENCODER 0
++#define CONFIG_AMV_ENCODER 0
++#define CONFIG_APNG_ENCODER 0
++#define CONFIG_ASV1_ENCODER 0
++#define CONFIG_ASV2_ENCODER 0
++#define CONFIG_AVRP_ENCODER 0
++#define CONFIG_AVUI_ENCODER 0
++#define CONFIG_AYUV_ENCODER 0
++#define CONFIG_BMP_ENCODER 0
++#define CONFIG_CINEPAK_ENCODER 0
++#define CONFIG_CLJR_ENCODER 0
++#define CONFIG_COMFORTNOISE_ENCODER 0
++#define CONFIG_DNXHD_ENCODER 0
++#define CONFIG_DPX_ENCODER 0
++#define CONFIG_DVVIDEO_ENCODER 0
++#define CONFIG_FFV1_ENCODER 0
++#define CONFIG_FFVHUFF_ENCODER 0
++#define CONFIG_FITS_ENCODER 0
++#define CONFIG_FLASHSV_ENCODER 0
++#define CONFIG_FLASHSV2_ENCODER 0
++#define CONFIG_FLV_ENCODER 0
++#define CONFIG_GIF_ENCODER 0
++#define CONFIG_H261_ENCODER 0
++#define CONFIG_H263_ENCODER 0
++#define CONFIG_H263P_ENCODER 0
++#define CONFIG_HAP_ENCODER 0
++#define CONFIG_HUFFYUV_ENCODER 0
++#define CONFIG_JPEG2000_ENCODER 0
++#define CONFIG_JPEGLS_ENCODER 0
++#define CONFIG_LJPEG_ENCODER 0
++#define CONFIG_MAGICYUV_ENCODER 0
++#define CONFIG_MJPEG_ENCODER 0
++#define CONFIG_MPEG1VIDEO_ENCODER 0
++#define CONFIG_MPEG2VIDEO_ENCODER 0
++#define CONFIG_MPEG4_ENCODER 0
++#define CONFIG_MSMPEG4V2_ENCODER 0
++#define CONFIG_MSMPEG4V3_ENCODER 0
++#define CONFIG_MSVIDEO1_ENCODER 0
++#define CONFIG_PAM_ENCODER 0
++#define CONFIG_PBM_ENCODER 0
++#define CONFIG_PCX_ENCODER 0
++#define CONFIG_PGM_ENCODER 0
++#define CONFIG_PGMYUV_ENCODER 0
++#define CONFIG_PNG_ENCODER 0
++#define CONFIG_PPM_ENCODER 0
++#define CONFIG_PRORES_ENCODER 0
++#define CONFIG_PRORES_AW_ENCODER 0
++#define CONFIG_PRORES_KS_ENCODER 0
++#define CONFIG_QTRLE_ENCODER 0
++#define CONFIG_R10K_ENCODER 0
++#define CONFIG_R210_ENCODER 0
++#define CONFIG_RAWVIDEO_ENCODER 0
++#define CONFIG_ROQ_ENCODER 0
++#define CONFIG_RV10_ENCODER 0
++#define CONFIG_RV20_ENCODER 0
++#define CONFIG_S302M_ENCODER 0
++#define CONFIG_SGI_ENCODER 0
++#define CONFIG_SNOW_ENCODER 0
++#define CONFIG_SUNRAST_ENCODER 0
++#define CONFIG_SVQ1_ENCODER 0
++#define CONFIG_TARGA_ENCODER 0
++#define CONFIG_TIFF_ENCODER 0
++#define CONFIG_UTVIDEO_ENCODER 0
++#define CONFIG_V210_ENCODER 0
++#define CONFIG_V308_ENCODER 0
++#define CONFIG_V408_ENCODER 0
++#define CONFIG_V410_ENCODER 0
++#define CONFIG_VC2_ENCODER 0
++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
++#define CONFIG_WMV1_ENCODER 0
++#define CONFIG_WMV2_ENCODER 0
++#define CONFIG_XBM_ENCODER 0
++#define CONFIG_XFACE_ENCODER 0
++#define CONFIG_XWD_ENCODER 0
++#define CONFIG_Y41P_ENCODER 0
++#define CONFIG_YUV4_ENCODER 0
++#define CONFIG_ZLIB_ENCODER 0
++#define CONFIG_ZMBV_ENCODER 0
++#define CONFIG_AAC_ENCODER 0
++#define CONFIG_AC3_ENCODER 0
++#define CONFIG_AC3_FIXED_ENCODER 0
++#define CONFIG_ALAC_ENCODER 0
++#define CONFIG_APTX_ENCODER 0
++#define CONFIG_APTX_HD_ENCODER 0
++#define CONFIG_DCA_ENCODER 0
++#define CONFIG_EAC3_ENCODER 0
++#define CONFIG_FLAC_ENCODER 0
++#define CONFIG_G723_1_ENCODER 0
++#define CONFIG_MLP_ENCODER 0
++#define CONFIG_MP2_ENCODER 0
++#define CONFIG_MP2FIXED_ENCODER 0
++#define CONFIG_NELLYMOSER_ENCODER 0
++#define CONFIG_OPUS_ENCODER 0
++#define CONFIG_RA_144_ENCODER 0
++#define CONFIG_SBC_ENCODER 0
++#define CONFIG_SONIC_ENCODER 0
++#define CONFIG_SONIC_LS_ENCODER 0
++#define CONFIG_TRUEHD_ENCODER 0
++#define CONFIG_TTA_ENCODER 0
++#define CONFIG_VORBIS_ENCODER 0
++#define CONFIG_WAVPACK_ENCODER 0
++#define CONFIG_WMAV1_ENCODER 0
++#define CONFIG_WMAV2_ENCODER 0
++#define CONFIG_PCM_ALAW_ENCODER 0
++#define CONFIG_PCM_DVD_ENCODER 0
++#define CONFIG_PCM_F32BE_ENCODER 0
++#define CONFIG_PCM_F32LE_ENCODER 0
++#define CONFIG_PCM_F64BE_ENCODER 0
++#define CONFIG_PCM_F64LE_ENCODER 0
++#define CONFIG_PCM_MULAW_ENCODER 0
++#define CONFIG_PCM_S8_ENCODER 0
++#define CONFIG_PCM_S8_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16BE_ENCODER 0
++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16LE_ENCODER 0
++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S24BE_ENCODER 0
++#define CONFIG_PCM_S24DAUD_ENCODER 0
++#define CONFIG_PCM_S24LE_ENCODER 0
++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S32BE_ENCODER 0
++#define CONFIG_PCM_S32LE_ENCODER 0
++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S64BE_ENCODER 0
++#define CONFIG_PCM_S64LE_ENCODER 0
++#define CONFIG_PCM_U8_ENCODER 0
++#define CONFIG_PCM_U16BE_ENCODER 0
++#define CONFIG_PCM_U16LE_ENCODER 0
++#define CONFIG_PCM_U24BE_ENCODER 0
++#define CONFIG_PCM_U24LE_ENCODER 0
++#define CONFIG_PCM_U32BE_ENCODER 0
++#define CONFIG_PCM_U32LE_ENCODER 0
++#define CONFIG_PCM_VIDC_ENCODER 0
++#define CONFIG_ROQ_DPCM_ENCODER 0
++#define CONFIG_ADPCM_ADX_ENCODER 0
++#define CONFIG_ADPCM_G722_ENCODER 0
++#define CONFIG_ADPCM_G726_ENCODER 0
++#define CONFIG_ADPCM_G726LE_ENCODER 0
++#define CONFIG_ADPCM_IMA_QT_ENCODER 0
++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
++#define CONFIG_ADPCM_MS_ENCODER 0
++#define CONFIG_ADPCM_SWF_ENCODER 0
++#define CONFIG_ADPCM_YAMAHA_ENCODER 0
++#define CONFIG_SSA_ENCODER 0
++#define CONFIG_ASS_ENCODER 0
++#define CONFIG_DVBSUB_ENCODER 0
++#define CONFIG_DVDSUB_ENCODER 0
++#define CONFIG_MOVTEXT_ENCODER 0
++#define CONFIG_SRT_ENCODER 0
++#define CONFIG_SUBRIP_ENCODER 0
++#define CONFIG_TEXT_ENCODER 0
++#define CONFIG_WEBVTT_ENCODER 0
++#define CONFIG_XSUB_ENCODER 0
++#define CONFIG_AAC_AT_ENCODER 0
++#define CONFIG_ALAC_AT_ENCODER 0
++#define CONFIG_ILBC_AT_ENCODER 0
++#define CONFIG_PCM_ALAW_AT_ENCODER 0
++#define CONFIG_PCM_MULAW_AT_ENCODER 0
++#define CONFIG_LIBAOM_AV1_ENCODER 0
++#define CONFIG_LIBCODEC2_ENCODER 0
++#define CONFIG_LIBFDK_AAC_ENCODER 0
++#define CONFIG_LIBGSM_ENCODER 0
++#define CONFIG_LIBGSM_MS_ENCODER 0
++#define CONFIG_LIBILBC_ENCODER 0
++#define CONFIG_LIBMP3LAME_ENCODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
++#define CONFIG_LIBOPENJPEG_ENCODER 0
++#define CONFIG_LIBOPUS_ENCODER 0
++#define CONFIG_LIBRAV1E_ENCODER 0
++#define CONFIG_LIBSHINE_ENCODER 0
++#define CONFIG_LIBSPEEX_ENCODER 0
++#define CONFIG_LIBTHEORA_ENCODER 0
++#define CONFIG_LIBTWOLAME_ENCODER 0
++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
++#define CONFIG_LIBVORBIS_ENCODER 0
++#define CONFIG_LIBVPX_VP8_ENCODER 0
++#define CONFIG_LIBVPX_VP9_ENCODER 0
++#define CONFIG_LIBWAVPACK_ENCODER 0
++#define CONFIG_LIBWEBP_ANIM_ENCODER 0
++#define CONFIG_LIBWEBP_ENCODER 0
++#define CONFIG_LIBX262_ENCODER 0
++#define CONFIG_LIBX264_ENCODER 0
++#define CONFIG_LIBX264RGB_ENCODER 0
++#define CONFIG_LIBX265_ENCODER 0
++#define CONFIG_LIBXAVS_ENCODER 0
++#define CONFIG_LIBXAVS2_ENCODER 0
++#define CONFIG_LIBXVID_ENCODER 0
++#define CONFIG_H263_V4L2M2M_ENCODER 0
++#define CONFIG_LIBOPENH264_ENCODER 0
++#define CONFIG_H264_AMF_ENCODER 0
++#define CONFIG_H264_NVENC_ENCODER 0
++#define CONFIG_H264_OMX_ENCODER 0
++#define CONFIG_H264_QSV_ENCODER 0
++#define CONFIG_H264_V4L2M2M_ENCODER 0
++#define CONFIG_H264_VAAPI_ENCODER 0
++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_NVENC_ENCODER 0
++#define CONFIG_NVENC_H264_ENCODER 0
++#define CONFIG_NVENC_HEVC_ENCODER 0
++#define CONFIG_HEVC_AMF_ENCODER 0
++#define CONFIG_HEVC_NVENC_ENCODER 0
++#define CONFIG_HEVC_QSV_ENCODER 0
++#define CONFIG_HEVC_V4L2M2M_ENCODER 0
++#define CONFIG_HEVC_VAAPI_ENCODER 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_LIBKVAZAAR_ENCODER 0
++#define CONFIG_MJPEG_QSV_ENCODER 0
++#define CONFIG_MJPEG_VAAPI_ENCODER 0
++#define CONFIG_MPEG2_QSV_ENCODER 0
++#define CONFIG_MPEG2_VAAPI_ENCODER 0
++#define CONFIG_MPEG4_OMX_ENCODER 0
++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_VAAPI_ENCODER 0
++#define CONFIG_VP9_VAAPI_ENCODER 0
++#define CONFIG_VP9_QSV_ENCODER 0
++#define CONFIG_H263_VAAPI_HWACCEL 0
++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_H264_D3D11VA_HWACCEL 0
++#define CONFIG_H264_D3D11VA2_HWACCEL 0
++#define CONFIG_H264_DXVA2_HWACCEL 0
++#define CONFIG_H264_NVDEC_HWACCEL 0
++#define CONFIG_H264_VAAPI_HWACCEL 0
++#define CONFIG_H264_VDPAU_HWACCEL 0
++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
++#define CONFIG_HEVC_DXVA2_HWACCEL 0
++#define CONFIG_HEVC_NVDEC_HWACCEL 0
++#define CONFIG_HEVC_VAAPI_HWACCEL 0
++#define CONFIG_HEVC_VDPAU_HWACCEL 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MJPEG_NVDEC_HWACCEL 0
++#define CONFIG_MJPEG_VAAPI_HWACCEL 0
++#define CONFIG_MPEG1_NVDEC_HWACCEL 0
++#define CONFIG_MPEG1_VDPAU_HWACCEL 0
++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG1_XVMC_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
++#define CONFIG_MPEG2_NVDEC_HWACCEL 0
++#define CONFIG_MPEG2_DXVA2_HWACCEL 0
++#define CONFIG_MPEG2_VAAPI_HWACCEL 0
++#define CONFIG_MPEG2_VDPAU_HWACCEL 0
++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG2_XVMC_HWACCEL 0
++#define CONFIG_MPEG4_NVDEC_HWACCEL 0
++#define CONFIG_MPEG4_VAAPI_HWACCEL 0
++#define CONFIG_MPEG4_VDPAU_HWACCEL 0
++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_VC1_D3D11VA_HWACCEL 0
++#define CONFIG_VC1_D3D11VA2_HWACCEL 0
++#define CONFIG_VC1_DXVA2_HWACCEL 0
++#define CONFIG_VC1_NVDEC_HWACCEL 0
++#define CONFIG_VC1_VAAPI_HWACCEL 0
++#define CONFIG_VC1_VDPAU_HWACCEL 0
++#define CONFIG_VP8_NVDEC_HWACCEL 0
++#define CONFIG_VP8_VAAPI_HWACCEL 0
++#define CONFIG_VP9_D3D11VA_HWACCEL 0
++#define CONFIG_VP9_D3D11VA2_HWACCEL 0
++#define CONFIG_VP9_DXVA2_HWACCEL 0
++#define CONFIG_VP9_NVDEC_HWACCEL 0
++#define CONFIG_VP9_VAAPI_HWACCEL 0
++#define CONFIG_VP9_VDPAU_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
++#define CONFIG_WMV3_DXVA2_HWACCEL 0
++#define CONFIG_WMV3_NVDEC_HWACCEL 0
++#define CONFIG_WMV3_VAAPI_HWACCEL 0
++#define CONFIG_WMV3_VDPAU_HWACCEL 0
++#define CONFIG_AAC_PARSER 1
++#define CONFIG_AAC_LATM_PARSER 0
++#define CONFIG_AC3_PARSER 0
++#define CONFIG_ADX_PARSER 0
++#define CONFIG_AV1_PARSER 0
++#define CONFIG_AVS2_PARSER 0
++#define CONFIG_BMP_PARSER 0
++#define CONFIG_CAVSVIDEO_PARSER 0
++#define CONFIG_COOK_PARSER 0
++#define CONFIG_DCA_PARSER 0
++#define CONFIG_DIRAC_PARSER 0
++#define CONFIG_DNXHD_PARSER 0
++#define CONFIG_DPX_PARSER 0
++#define CONFIG_DVAUDIO_PARSER 0
++#define CONFIG_DVBSUB_PARSER 0
++#define CONFIG_DVDSUB_PARSER 0
++#define CONFIG_DVD_NAV_PARSER 0
++#define CONFIG_FLAC_PARSER 1
++#define CONFIG_G723_1_PARSER 0
++#define CONFIG_G729_PARSER 0
++#define CONFIG_GIF_PARSER 0
++#define CONFIG_GSM_PARSER 1
++#define CONFIG_H261_PARSER 0
++#define CONFIG_H263_PARSER 1
++#define CONFIG_H264_PARSER 1
++#define CONFIG_HEVC_PARSER 0
++#define CONFIG_MJPEG_PARSER 0
++#define CONFIG_MLP_PARSER 0
++#define CONFIG_MPEG4VIDEO_PARSER 1
++#define CONFIG_MPEGAUDIO_PARSER 1
++#define CONFIG_MPEGVIDEO_PARSER 0
++#define CONFIG_OPUS_PARSER 1
++#define CONFIG_PNG_PARSER 0
++#define CONFIG_PNM_PARSER 0
++#define CONFIG_RV30_PARSER 0
++#define CONFIG_RV40_PARSER 0
++#define CONFIG_SBC_PARSER 0
++#define CONFIG_SIPR_PARSER 0
++#define CONFIG_TAK_PARSER 0
++#define CONFIG_VC1_PARSER 0
++#define CONFIG_VORBIS_PARSER 1
++#define CONFIG_VP3_PARSER 1
++#define CONFIG_VP8_PARSER 1
++#define CONFIG_VP9_PARSER 1
++#define CONFIG_WEBP_PARSER 0
++#define CONFIG_XMA_PARSER 0
++#define CONFIG_ALSA_INDEV 0
++#define CONFIG_ANDROID_CAMERA_INDEV 0
++#define CONFIG_AVFOUNDATION_INDEV 0
++#define CONFIG_BKTR_INDEV 0
++#define CONFIG_DECKLINK_INDEV 0
++#define CONFIG_DSHOW_INDEV 0
++#define CONFIG_FBDEV_INDEV 0
++#define CONFIG_GDIGRAB_INDEV 0
++#define CONFIG_IEC61883_INDEV 0
++#define CONFIG_JACK_INDEV 0
++#define CONFIG_KMSGRAB_INDEV 0
++#define CONFIG_LAVFI_INDEV 0
++#define CONFIG_OPENAL_INDEV 0
++#define CONFIG_OSS_INDEV 0
++#define CONFIG_PULSE_INDEV 0
++#define CONFIG_SNDIO_INDEV 0
++#define CONFIG_V4L2_INDEV 0
++#define CONFIG_VFWCAP_INDEV 0
++#define CONFIG_XCBGRAB_INDEV 0
++#define CONFIG_LIBCDIO_INDEV 0
++#define CONFIG_LIBDC1394_INDEV 0
++#define CONFIG_ALSA_OUTDEV 0
++#define CONFIG_CACA_OUTDEV 0
++#define CONFIG_DECKLINK_OUTDEV 0
++#define CONFIG_FBDEV_OUTDEV 0
++#define CONFIG_OPENGL_OUTDEV 0
++#define CONFIG_OSS_OUTDEV 0
++#define CONFIG_PULSE_OUTDEV 0
++#define CONFIG_SDL2_OUTDEV 0
++#define CONFIG_SNDIO_OUTDEV 0
++#define CONFIG_V4L2_OUTDEV 0
++#define CONFIG_XV_OUTDEV 0
++#define CONFIG_ABENCH_FILTER 0
++#define CONFIG_ACOMPRESSOR_FILTER 0
++#define CONFIG_ACONTRAST_FILTER 0
++#define CONFIG_ACOPY_FILTER 0
++#define CONFIG_ACUE_FILTER 0
++#define CONFIG_ACROSSFADE_FILTER 0
++#define CONFIG_ACROSSOVER_FILTER 0
++#define CONFIG_ACRUSHER_FILTER 0
++#define CONFIG_ADECLICK_FILTER 0
++#define CONFIG_ADECLIP_FILTER 0
++#define CONFIG_ADELAY_FILTER 0
++#define CONFIG_ADERIVATIVE_FILTER 0
++#define CONFIG_AECHO_FILTER 0
++#define CONFIG_AEMPHASIS_FILTER 0
++#define CONFIG_AEVAL_FILTER 0
++#define CONFIG_AFADE_FILTER 0
++#define CONFIG_AFFTDN_FILTER 0
++#define CONFIG_AFFTFILT_FILTER 0
++#define CONFIG_AFIR_FILTER 0
++#define CONFIG_AFORMAT_FILTER 0
++#define CONFIG_AGATE_FILTER 0
++#define CONFIG_AIIR_FILTER 0
++#define CONFIG_AINTEGRAL_FILTER 0
++#define CONFIG_AINTERLEAVE_FILTER 0
++#define CONFIG_ALIMITER_FILTER 0
++#define CONFIG_ALLPASS_FILTER 0
++#define CONFIG_ALOOP_FILTER 0
++#define CONFIG_AMERGE_FILTER 0
++#define CONFIG_AMETADATA_FILTER 0
++#define CONFIG_AMIX_FILTER 0
++#define CONFIG_AMULTIPLY_FILTER 0
++#define CONFIG_ANEQUALIZER_FILTER 0
++#define CONFIG_ANLMDN_FILTER 0
++#define CONFIG_ANLMS_FILTER 0
++#define CONFIG_ANULL_FILTER 0
++#define CONFIG_APAD_FILTER 0
++#define CONFIG_APERMS_FILTER 0
++#define CONFIG_APHASER_FILTER 0
++#define CONFIG_APULSATOR_FILTER 0
++#define CONFIG_AREALTIME_FILTER 0
++#define CONFIG_ARESAMPLE_FILTER 0
++#define CONFIG_AREVERSE_FILTER 0
++#define CONFIG_ARNNDN_FILTER 0
++#define CONFIG_ASELECT_FILTER 0
++#define CONFIG_ASENDCMD_FILTER 0
++#define CONFIG_ASETNSAMPLES_FILTER 0
++#define CONFIG_ASETPTS_FILTER 0
++#define CONFIG_ASETRATE_FILTER 0
++#define CONFIG_ASETTB_FILTER 0
++#define CONFIG_ASHOWINFO_FILTER 0
++#define CONFIG_ASIDEDATA_FILTER 0
++#define CONFIG_ASOFTCLIP_FILTER 0
++#define CONFIG_ASPLIT_FILTER 0
++#define CONFIG_ASR_FILTER 0
++#define CONFIG_ASTATS_FILTER 0
++#define CONFIG_ASTREAMSELECT_FILTER 0
++#define CONFIG_ATEMPO_FILTER 0
++#define CONFIG_ATRIM_FILTER 0
++#define CONFIG_AXCORRELATE_FILTER 0
++#define CONFIG_AZMQ_FILTER 0
++#define CONFIG_BANDPASS_FILTER 0
++#define CONFIG_BANDREJECT_FILTER 0
++#define CONFIG_BASS_FILTER 0
++#define CONFIG_BIQUAD_FILTER 0
++#define CONFIG_BS2B_FILTER 0
++#define CONFIG_CHROMABER_VULKAN_FILTER 0
++#define CONFIG_CHANNELMAP_FILTER 0
++#define CONFIG_CHANNELSPLIT_FILTER 0
++#define CONFIG_CHORUS_FILTER 0
++#define CONFIG_COMPAND_FILTER 0
++#define CONFIG_COMPENSATIONDELAY_FILTER 0
++#define CONFIG_CROSSFEED_FILTER 0
++#define CONFIG_CRYSTALIZER_FILTER 0
++#define CONFIG_DCSHIFT_FILTER 0
++#define CONFIG_DEESSER_FILTER 0
++#define CONFIG_DRMETER_FILTER 0
++#define CONFIG_DYNAUDNORM_FILTER 0
++#define CONFIG_EARWAX_FILTER 0
++#define CONFIG_EBUR128_FILTER 0
++#define CONFIG_EQUALIZER_FILTER 0
++#define CONFIG_EXTRASTEREO_FILTER 0
++#define CONFIG_FIREQUALIZER_FILTER 0
++#define CONFIG_FLANGER_FILTER 0
++#define CONFIG_HAAS_FILTER 0
++#define CONFIG_HDCD_FILTER 0
++#define CONFIG_HEADPHONE_FILTER 0
++#define CONFIG_HIGHPASS_FILTER 0
++#define CONFIG_HIGHSHELF_FILTER 0
++#define CONFIG_JOIN_FILTER 0
++#define CONFIG_LADSPA_FILTER 0
++#define CONFIG_LOUDNORM_FILTER 0
++#define CONFIG_LOWPASS_FILTER 0
++#define CONFIG_LOWSHELF_FILTER 0
++#define CONFIG_LV2_FILTER 0
++#define CONFIG_MCOMPAND_FILTER 0
++#define CONFIG_PAN_FILTER 0
++#define CONFIG_REPLAYGAIN_FILTER 0
++#define CONFIG_RESAMPLE_FILTER 0
++#define CONFIG_RUBBERBAND_FILTER 0
++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
++#define CONFIG_SIDECHAINGATE_FILTER 0
++#define CONFIG_SILENCEDETECT_FILTER 0
++#define CONFIG_SILENCEREMOVE_FILTER 0
++#define CONFIG_SOFALIZER_FILTER 0
++#define CONFIG_STEREOTOOLS_FILTER 0
++#define CONFIG_STEREOWIDEN_FILTER 0
++#define CONFIG_SUPEREQUALIZER_FILTER 0
++#define CONFIG_SURROUND_FILTER 0
++#define CONFIG_TREBLE_FILTER 0
++#define CONFIG_TREMOLO_FILTER 0
++#define CONFIG_VIBRATO_FILTER 0
++#define CONFIG_VOLUME_FILTER 0
++#define CONFIG_VOLUMEDETECT_FILTER 0
++#define CONFIG_AEVALSRC_FILTER 0
++#define CONFIG_AFIRSRC_FILTER 0
++#define CONFIG_ANOISESRC_FILTER 0
++#define CONFIG_ANULLSRC_FILTER 0
++#define CONFIG_FLITE_FILTER 0
++#define CONFIG_HILBERT_FILTER 0
++#define CONFIG_SINC_FILTER 0
++#define CONFIG_SINE_FILTER 0
++#define CONFIG_ANULLSINK_FILTER 0
++#define CONFIG_ADDROI_FILTER 0
++#define CONFIG_ALPHAEXTRACT_FILTER 0
++#define CONFIG_ALPHAMERGE_FILTER 0
++#define CONFIG_AMPLIFY_FILTER 0
++#define CONFIG_ASS_FILTER 0
++#define CONFIG_ATADENOISE_FILTER 0
++#define CONFIG_AVGBLUR_FILTER 0
++#define CONFIG_AVGBLUR_OPENCL_FILTER 0
++#define CONFIG_AVGBLUR_VULKAN_FILTER 0
++#define CONFIG_BBOX_FILTER 0
++#define CONFIG_BENCH_FILTER 0
++#define CONFIG_BILATERAL_FILTER 0
++#define CONFIG_BITPLANENOISE_FILTER 0
++#define CONFIG_BLACKDETECT_FILTER 0
++#define CONFIG_BLACKFRAME_FILTER 0
++#define CONFIG_BLEND_FILTER 0
++#define CONFIG_BM3D_FILTER 0
++#define CONFIG_BOXBLUR_FILTER 0
++#define CONFIG_BOXBLUR_OPENCL_FILTER 0
++#define CONFIG_BWDIF_FILTER 0
++#define CONFIG_CAS_FILTER 0
++#define CONFIG_CHROMAHOLD_FILTER 0
++#define CONFIG_CHROMAKEY_FILTER 0
++#define CONFIG_CHROMASHIFT_FILTER 0
++#define CONFIG_CIESCOPE_FILTER 0
++#define CONFIG_CODECVIEW_FILTER 0
++#define CONFIG_COLORBALANCE_FILTER 0
++#define CONFIG_COLORCHANNELMIXER_FILTER 0
++#define CONFIG_COLORKEY_FILTER 0
++#define CONFIG_COLORKEY_OPENCL_FILTER 0
++#define CONFIG_COLORHOLD_FILTER 0
++#define CONFIG_COLORLEVELS_FILTER 0
++#define CONFIG_COLORMATRIX_FILTER 0
++#define CONFIG_COLORSPACE_FILTER 0
++#define CONFIG_CONVOLUTION_FILTER 0
++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
++#define CONFIG_CONVOLVE_FILTER 0
++#define CONFIG_COPY_FILTER 0
++#define CONFIG_COREIMAGE_FILTER 0
++#define CONFIG_COVER_RECT_FILTER 0
++#define CONFIG_CROP_FILTER 0
++#define CONFIG_CROPDETECT_FILTER 0
++#define CONFIG_CUE_FILTER 0
++#define CONFIG_CURVES_FILTER 0
++#define CONFIG_DATASCOPE_FILTER 0
++#define CONFIG_DCTDNOIZ_FILTER 0
++#define CONFIG_DEBAND_FILTER 0
++#define CONFIG_DEBLOCK_FILTER 0
++#define CONFIG_DECIMATE_FILTER 0
++#define CONFIG_DECONVOLVE_FILTER 0
++#define CONFIG_DEDOT_FILTER 0
++#define CONFIG_DEFLATE_FILTER 0
++#define CONFIG_DEFLICKER_FILTER 0
++#define CONFIG_DEINTERLACE_QSV_FILTER 0
++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
++#define CONFIG_DEJUDDER_FILTER 0
++#define CONFIG_DELOGO_FILTER 0
++#define CONFIG_DENOISE_VAAPI_FILTER 0
++#define CONFIG_DERAIN_FILTER 0
++#define CONFIG_DESHAKE_FILTER 0
++#define CONFIG_DESHAKE_OPENCL_FILTER 0
++#define CONFIG_DESPILL_FILTER 0
++#define CONFIG_DETELECINE_FILTER 0
++#define CONFIG_DILATION_FILTER 0
++#define CONFIG_DILATION_OPENCL_FILTER 0
++#define CONFIG_DISPLACE_FILTER 0
++#define CONFIG_DNN_PROCESSING_FILTER 0
++#define CONFIG_DOUBLEWEAVE_FILTER 0
++#define CONFIG_DRAWBOX_FILTER 0
++#define CONFIG_DRAWGRAPH_FILTER 0
++#define CONFIG_DRAWGRID_FILTER 0
++#define CONFIG_DRAWTEXT_FILTER 0
++#define CONFIG_EDGEDETECT_FILTER 0
++#define CONFIG_ELBG_FILTER 0
++#define CONFIG_ENTROPY_FILTER 0
++#define CONFIG_EQ_FILTER 0
++#define CONFIG_EROSION_FILTER 0
++#define CONFIG_EROSION_OPENCL_FILTER 0
++#define CONFIG_EXTRACTPLANES_FILTER 0
++#define CONFIG_FADE_FILTER 0
++#define CONFIG_FFTDNOIZ_FILTER 0
++#define CONFIG_FFTFILT_FILTER 0
++#define CONFIG_FIELD_FILTER 0
++#define CONFIG_FIELDHINT_FILTER 0
++#define CONFIG_FIELDMATCH_FILTER 0
++#define CONFIG_FIELDORDER_FILTER 0
++#define CONFIG_FILLBORDERS_FILTER 0
++#define CONFIG_FIND_RECT_FILTER 0
++#define CONFIG_FLOODFILL_FILTER 0
++#define CONFIG_FORMAT_FILTER 0
++#define CONFIG_FPS_FILTER 0
++#define CONFIG_FRAMEPACK_FILTER 0
++#define CONFIG_FRAMERATE_FILTER 0
++#define CONFIG_FRAMESTEP_FILTER 0
++#define CONFIG_FREEZEDETECT_FILTER 0
++#define CONFIG_FREEZEFRAMES_FILTER 0
++#define CONFIG_FREI0R_FILTER 0
++#define CONFIG_FSPP_FILTER 0
++#define CONFIG_GBLUR_FILTER 0
++#define CONFIG_GEQ_FILTER 0
++#define CONFIG_GRADFUN_FILTER 0
++#define CONFIG_GRAPHMONITOR_FILTER 0
++#define CONFIG_GREYEDGE_FILTER 0
++#define CONFIG_HALDCLUT_FILTER 0
++#define CONFIG_HFLIP_FILTER 0
++#define CONFIG_HISTEQ_FILTER 0
++#define CONFIG_HISTOGRAM_FILTER 0
++#define CONFIG_HQDN3D_FILTER 0
++#define CONFIG_HQX_FILTER 0
++#define CONFIG_HSTACK_FILTER 0
++#define CONFIG_HUE_FILTER 0
++#define CONFIG_HWDOWNLOAD_FILTER 0
++#define CONFIG_HWMAP_FILTER 0
++#define CONFIG_HWUPLOAD_FILTER 0
++#define CONFIG_HWUPLOAD_CUDA_FILTER 0
++#define CONFIG_HYSTERESIS_FILTER 0
++#define CONFIG_IDET_FILTER 0
++#define CONFIG_IL_FILTER 0
++#define CONFIG_INFLATE_FILTER 0
++#define CONFIG_INTERLACE_FILTER 0
++#define CONFIG_INTERLEAVE_FILTER 0
++#define CONFIG_KERNDEINT_FILTER 0
++#define CONFIG_LAGFUN_FILTER 0
++#define CONFIG_LENSCORRECTION_FILTER 0
++#define CONFIG_LENSFUN_FILTER 0
++#define CONFIG_LIBVMAF_FILTER 0
++#define CONFIG_LIMITER_FILTER 0
++#define CONFIG_LOOP_FILTER 0
++#define CONFIG_LUMAKEY_FILTER 0
++#define CONFIG_LUT_FILTER 0
++#define CONFIG_LUT1D_FILTER 0
++#define CONFIG_LUT2_FILTER 0
++#define CONFIG_LUT3D_FILTER 0
++#define CONFIG_LUTRGB_FILTER 0
++#define CONFIG_LUTYUV_FILTER 0
++#define CONFIG_MASKEDCLAMP_FILTER 0
++#define CONFIG_MASKEDMAX_FILTER 0
++#define CONFIG_MASKEDMERGE_FILTER 0
++#define CONFIG_MASKEDMIN_FILTER 0
++#define CONFIG_MASKEDTHRESHOLD_FILTER 0
++#define CONFIG_MASKFUN_FILTER 0
++#define CONFIG_MCDEINT_FILTER 0
++#define CONFIG_MEDIAN_FILTER 0
++#define CONFIG_MERGEPLANES_FILTER 0
++#define CONFIG_MESTIMATE_FILTER 0
++#define CONFIG_METADATA_FILTER 0
++#define CONFIG_MIDEQUALIZER_FILTER 0
++#define CONFIG_MINTERPOLATE_FILTER 0
++#define CONFIG_MIX_FILTER 0
++#define CONFIG_MPDECIMATE_FILTER 0
++#define CONFIG_NEGATE_FILTER 0
++#define CONFIG_NLMEANS_FILTER 0
++#define CONFIG_NLMEANS_OPENCL_FILTER 0
++#define CONFIG_NNEDI_FILTER 0
++#define CONFIG_NOFORMAT_FILTER 0
++#define CONFIG_NOISE_FILTER 0
++#define CONFIG_NORMALIZE_FILTER 0
++#define CONFIG_NULL_FILTER 0
++#define CONFIG_OCR_FILTER 0
++#define CONFIG_OCV_FILTER 0
++#define CONFIG_OSCILLOSCOPE_FILTER 0
++#define CONFIG_OVERLAY_FILTER 0
++#define CONFIG_OVERLAY_OPENCL_FILTER 0
++#define CONFIG_OVERLAY_QSV_FILTER 0
++#define CONFIG_OVERLAY_VULKAN_FILTER 0
++#define CONFIG_OVERLAY_CUDA_FILTER 0
++#define CONFIG_OWDENOISE_FILTER 0
++#define CONFIG_PAD_FILTER 0
++#define CONFIG_PAD_OPENCL_FILTER 0
++#define CONFIG_PALETTEGEN_FILTER 0
++#define CONFIG_PALETTEUSE_FILTER 0
++#define CONFIG_PERMS_FILTER 0
++#define CONFIG_PERSPECTIVE_FILTER 0
++#define CONFIG_PHASE_FILTER 0
++#define CONFIG_PHOTOSENSITIVITY_FILTER 0
++#define CONFIG_PIXDESCTEST_FILTER 0
++#define CONFIG_PIXSCOPE_FILTER 0
++#define CONFIG_PP_FILTER 0
++#define CONFIG_PP7_FILTER 0
++#define CONFIG_PREMULTIPLY_FILTER 0
++#define CONFIG_PREWITT_FILTER 0
++#define CONFIG_PREWITT_OPENCL_FILTER 0
++#define CONFIG_PROCAMP_VAAPI_FILTER 0
++#define CONFIG_PROGRAM_OPENCL_FILTER 0
++#define CONFIG_PSEUDOCOLOR_FILTER 0
++#define CONFIG_PSNR_FILTER 0
++#define CONFIG_PULLUP_FILTER 0
++#define CONFIG_QP_FILTER 0
++#define CONFIG_RANDOM_FILTER 0
++#define CONFIG_READEIA608_FILTER 0
++#define CONFIG_READVITC_FILTER 0
++#define CONFIG_REALTIME_FILTER 0
++#define CONFIG_REMAP_FILTER 0
++#define CONFIG_REMOVEGRAIN_FILTER 0
++#define CONFIG_REMOVELOGO_FILTER 0
++#define CONFIG_REPEATFIELDS_FILTER 0
++#define CONFIG_REVERSE_FILTER 0
++#define CONFIG_RGBASHIFT_FILTER 0
++#define CONFIG_ROBERTS_FILTER 0
++#define CONFIG_ROBERTS_OPENCL_FILTER 0
++#define CONFIG_ROTATE_FILTER 0
++#define CONFIG_SAB_FILTER 0
++#define CONFIG_SCALE_FILTER 0
++#define CONFIG_SCALE_CUDA_FILTER 0
++#define CONFIG_SCALE_NPP_FILTER 0
++#define CONFIG_SCALE_QSV_FILTER 0
++#define CONFIG_SCALE_VAAPI_FILTER 0
++#define CONFIG_SCALE_VULKAN_FILTER 0
++#define CONFIG_SCALE2REF_FILTER 0
++#define CONFIG_SCROLL_FILTER 0
++#define CONFIG_SELECT_FILTER 0
++#define CONFIG_SELECTIVECOLOR_FILTER 0
++#define CONFIG_SENDCMD_FILTER 0
++#define CONFIG_SEPARATEFIELDS_FILTER 0
++#define CONFIG_SETDAR_FILTER 0
++#define CONFIG_SETFIELD_FILTER 0
++#define CONFIG_SETPARAMS_FILTER 0
++#define CONFIG_SETPTS_FILTER 0
++#define CONFIG_SETRANGE_FILTER 0
++#define CONFIG_SETSAR_FILTER 0
++#define CONFIG_SETTB_FILTER 0
++#define CONFIG_SHARPNESS_VAAPI_FILTER 0
++#define CONFIG_SHOWINFO_FILTER 0
++#define CONFIG_SHOWPALETTE_FILTER 0
++#define CONFIG_SHUFFLEFRAMES_FILTER 0
++#define CONFIG_SHUFFLEPLANES_FILTER 0
++#define CONFIG_SIDEDATA_FILTER 0
++#define CONFIG_SIGNALSTATS_FILTER 0
++#define CONFIG_SIGNATURE_FILTER 0
++#define CONFIG_SMARTBLUR_FILTER 0
++#define CONFIG_SOBEL_FILTER 0
++#define CONFIG_SOBEL_OPENCL_FILTER 0
++#define CONFIG_SPLIT_FILTER 0
++#define CONFIG_SPP_FILTER 0
++#define CONFIG_SR_FILTER 0
++#define CONFIG_SSIM_FILTER 0
++#define CONFIG_STEREO3D_FILTER 0
++#define CONFIG_STREAMSELECT_FILTER 0
++#define CONFIG_SUBTITLES_FILTER 0
++#define CONFIG_SUPER2XSAI_FILTER 0
++#define CONFIG_SWAPRECT_FILTER 0
++#define CONFIG_SWAPUV_FILTER 0
++#define CONFIG_TBLEND_FILTER 0
++#define CONFIG_TELECINE_FILTER 0
++#define CONFIG_THISTOGRAM_FILTER 0
++#define CONFIG_THRESHOLD_FILTER 0
++#define CONFIG_THUMBNAIL_FILTER 0
++#define CONFIG_THUMBNAIL_CUDA_FILTER 0
++#define CONFIG_TILE_FILTER 0
++#define CONFIG_TINTERLACE_FILTER 0
++#define CONFIG_TLUT2_FILTER 0
++#define CONFIG_TMEDIAN_FILTER 0
++#define CONFIG_TMIX_FILTER 0
++#define CONFIG_TONEMAP_FILTER 0
++#define CONFIG_TONEMAP_OPENCL_FILTER 0
++#define CONFIG_TONEMAP_VAAPI_FILTER 0
++#define CONFIG_TPAD_FILTER 0
++#define CONFIG_TRANSPOSE_FILTER 0
++#define CONFIG_TRANSPOSE_NPP_FILTER 0
++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0
++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0
++#define CONFIG_TRIM_FILTER 0
++#define CONFIG_UNPREMULTIPLY_FILTER 0
++#define CONFIG_UNSHARP_FILTER 0
++#define CONFIG_UNSHARP_OPENCL_FILTER 0
++#define CONFIG_USPP_FILTER 0
++#define CONFIG_V360_FILTER 0
++#define CONFIG_VAGUEDENOISER_FILTER 0
++#define CONFIG_VECTORSCOPE_FILTER 0
++#define CONFIG_VFLIP_FILTER 0
++#define CONFIG_VFRDET_FILTER 0
++#define CONFIG_VIBRANCE_FILTER 0
++#define CONFIG_VIDSTABDETECT_FILTER 0
++#define CONFIG_VIDSTABTRANSFORM_FILTER 0
++#define CONFIG_VIGNETTE_FILTER 0
++#define CONFIG_VMAFMOTION_FILTER 0
++#define CONFIG_VPP_QSV_FILTER 0
++#define CONFIG_VSTACK_FILTER 0
++#define CONFIG_W3FDIF_FILTER 0
++#define CONFIG_WAVEFORM_FILTER 0
++#define CONFIG_WEAVE_FILTER 0
++#define CONFIG_XBR_FILTER 0
++#define CONFIG_XFADE_FILTER 0
++#define CONFIG_XFADE_OPENCL_FILTER 0
++#define CONFIG_XMEDIAN_FILTER 0
++#define CONFIG_XSTACK_FILTER 0
++#define CONFIG_YADIF_FILTER 0
++#define CONFIG_YADIF_CUDA_FILTER 0
++#define CONFIG_YAEPBLUR_FILTER 0
++#define CONFIG_ZMQ_FILTER 0
++#define CONFIG_ZOOMPAN_FILTER 0
++#define CONFIG_ZSCALE_FILTER 0
++#define CONFIG_ALLRGB_FILTER 0
++#define CONFIG_ALLYUV_FILTER 0
++#define CONFIG_CELLAUTO_FILTER 0
++#define CONFIG_COLOR_FILTER 0
++#define CONFIG_COREIMAGESRC_FILTER 0
++#define CONFIG_FREI0R_SRC_FILTER 0
++#define CONFIG_HALDCLUTSRC_FILTER 0
++#define CONFIG_LIFE_FILTER 0
++#define CONFIG_MANDELBROT_FILTER 0
++#define CONFIG_MPTESTSRC_FILTER 0
++#define CONFIG_NULLSRC_FILTER 0
++#define CONFIG_OPENCLSRC_FILTER 0
++#define CONFIG_PAL75BARS_FILTER 0
++#define CONFIG_PAL100BARS_FILTER 0
++#define CONFIG_RGBTESTSRC_FILTER 0
++#define CONFIG_SIERPINSKI_FILTER 0
++#define CONFIG_SMPTEBARS_FILTER 0
++#define CONFIG_SMPTEHDBARS_FILTER 0
++#define CONFIG_TESTSRC_FILTER 0
++#define CONFIG_TESTSRC2_FILTER 0
++#define CONFIG_YUVTESTSRC_FILTER 0
++#define CONFIG_NULLSINK_FILTER 0
++#define CONFIG_ABITSCOPE_FILTER 0
++#define CONFIG_ADRAWGRAPH_FILTER 0
++#define CONFIG_AGRAPHMONITOR_FILTER 0
++#define CONFIG_AHISTOGRAM_FILTER 0
++#define CONFIG_APHASEMETER_FILTER 0
++#define CONFIG_AVECTORSCOPE_FILTER 0
++#define CONFIG_CONCAT_FILTER 0
++#define CONFIG_SHOWCQT_FILTER 0
++#define CONFIG_SHOWFREQS_FILTER 0
++#define CONFIG_SHOWSPATIAL_FILTER 0
++#define CONFIG_SHOWSPECTRUM_FILTER 0
++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
++#define CONFIG_SHOWVOLUME_FILTER 0
++#define CONFIG_SHOWWAVES_FILTER 0
++#define CONFIG_SHOWWAVESPIC_FILTER 0
++#define CONFIG_SPECTRUMSYNTH_FILTER 0
++#define CONFIG_AMOVIE_FILTER 0
++#define CONFIG_MOVIE_FILTER 0
++#define CONFIG_AFIFO_FILTER 0
++#define CONFIG_FIFO_FILTER 0
++#define CONFIG_AA_DEMUXER 0
++#define CONFIG_AAC_DEMUXER 1
++#define CONFIG_AC3_DEMUXER 0
++#define CONFIG_ACM_DEMUXER 0
++#define CONFIG_ACT_DEMUXER 0
++#define CONFIG_ADF_DEMUXER 0
++#define CONFIG_ADP_DEMUXER 0
++#define CONFIG_ADS_DEMUXER 0
++#define CONFIG_ADX_DEMUXER 0
++#define CONFIG_AEA_DEMUXER 0
++#define CONFIG_AFC_DEMUXER 0
++#define CONFIG_AIFF_DEMUXER 0
++#define CONFIG_AIX_DEMUXER 0
++#define CONFIG_ALP_DEMUXER 0
++#define CONFIG_AMR_DEMUXER 1
++#define CONFIG_AMRNB_DEMUXER 0
++#define CONFIG_AMRWB_DEMUXER 0
++#define CONFIG_ANM_DEMUXER 0
++#define CONFIG_APC_DEMUXER 0
++#define CONFIG_APE_DEMUXER 0
++#define CONFIG_APM_DEMUXER 0
++#define CONFIG_APNG_DEMUXER 0
++#define CONFIG_APTX_DEMUXER 0
++#define CONFIG_APTX_HD_DEMUXER 0
++#define CONFIG_AQTITLE_DEMUXER 0
++#define CONFIG_ARGO_ASF_DEMUXER 0
++#define CONFIG_ASF_DEMUXER 0
++#define CONFIG_ASF_O_DEMUXER 0
++#define CONFIG_ASS_DEMUXER 0
++#define CONFIG_AST_DEMUXER 0
++#define CONFIG_AU_DEMUXER 0
++#define CONFIG_AV1_DEMUXER 0
++#define CONFIG_AVI_DEMUXER 1
++#define CONFIG_AVISYNTH_DEMUXER 0
++#define CONFIG_AVR_DEMUXER 0
++#define CONFIG_AVS_DEMUXER 0
++#define CONFIG_AVS2_DEMUXER 0
++#define CONFIG_BETHSOFTVID_DEMUXER 0
++#define CONFIG_BFI_DEMUXER 0
++#define CONFIG_BINTEXT_DEMUXER 0
++#define CONFIG_BINK_DEMUXER 0
++#define CONFIG_BIT_DEMUXER 0
++#define CONFIG_BMV_DEMUXER 0
++#define CONFIG_BFSTM_DEMUXER 0
++#define CONFIG_BRSTM_DEMUXER 0
++#define CONFIG_BOA_DEMUXER 0
++#define CONFIG_C93_DEMUXER 0
++#define CONFIG_CAF_DEMUXER 0
++#define CONFIG_CAVSVIDEO_DEMUXER 0
++#define CONFIG_CDG_DEMUXER 0
++#define CONFIG_CDXL_DEMUXER 0
++#define CONFIG_CINE_DEMUXER 0
++#define CONFIG_CODEC2_DEMUXER 0
++#define CONFIG_CODEC2RAW_DEMUXER 0
++#define CONFIG_CONCAT_DEMUXER 0
++#define CONFIG_DASH_DEMUXER 0
++#define CONFIG_DATA_DEMUXER 0
++#define CONFIG_DAUD_DEMUXER 0
++#define CONFIG_DCSTR_DEMUXER 0
++#define CONFIG_DERF_DEMUXER 0
++#define CONFIG_DFA_DEMUXER 0
++#define CONFIG_DHAV_DEMUXER 0
++#define CONFIG_DIRAC_DEMUXER 0
++#define CONFIG_DNXHD_DEMUXER 0
++#define CONFIG_DSF_DEMUXER 0
++#define CONFIG_DSICIN_DEMUXER 0
++#define CONFIG_DSS_DEMUXER 0
++#define CONFIG_DTS_DEMUXER 0
++#define CONFIG_DTSHD_DEMUXER 0
++#define CONFIG_DV_DEMUXER 0
++#define CONFIG_DVBSUB_DEMUXER 0
++#define CONFIG_DVBTXT_DEMUXER 0
++#define CONFIG_DXA_DEMUXER 0
++#define CONFIG_EA_DEMUXER 0
++#define CONFIG_EA_CDATA_DEMUXER 0
++#define CONFIG_EAC3_DEMUXER 0
++#define CONFIG_EPAF_DEMUXER 0
++#define CONFIG_FFMETADATA_DEMUXER 0
++#define CONFIG_FILMSTRIP_DEMUXER 0
++#define CONFIG_FITS_DEMUXER 0
++#define CONFIG_FLAC_DEMUXER 1
++#define CONFIG_FLIC_DEMUXER 0
++#define CONFIG_FLV_DEMUXER 0
++#define CONFIG_LIVE_FLV_DEMUXER 0
++#define CONFIG_FOURXM_DEMUXER 0
++#define CONFIG_FRM_DEMUXER 0
++#define CONFIG_FSB_DEMUXER 0
++#define CONFIG_FWSE_DEMUXER 0
++#define CONFIG_G722_DEMUXER 0
++#define CONFIG_G723_1_DEMUXER 0
++#define CONFIG_G726_DEMUXER 0
++#define CONFIG_G726LE_DEMUXER 0
++#define CONFIG_G729_DEMUXER 0
++#define CONFIG_GDV_DEMUXER 0
++#define CONFIG_GENH_DEMUXER 0
++#define CONFIG_GIF_DEMUXER 0
++#define CONFIG_GSM_DEMUXER 0
++#define CONFIG_GXF_DEMUXER 0
++#define CONFIG_H261_DEMUXER 0
++#define CONFIG_H263_DEMUXER 0
++#define CONFIG_H264_DEMUXER 0
++#define CONFIG_HCA_DEMUXER 0
++#define CONFIG_HCOM_DEMUXER 0
++#define CONFIG_HEVC_DEMUXER 0
++#define CONFIG_HLS_DEMUXER 0
++#define CONFIG_HNM_DEMUXER 0
++#define CONFIG_ICO_DEMUXER 0
++#define CONFIG_IDCIN_DEMUXER 0
++#define CONFIG_IDF_DEMUXER 0
++#define CONFIG_IFF_DEMUXER 0
++#define CONFIG_IFV_DEMUXER 0
++#define CONFIG_ILBC_DEMUXER 0
++#define CONFIG_IMAGE2_DEMUXER 0
++#define CONFIG_IMAGE2PIPE_DEMUXER 0
++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
++#define CONFIG_INGENIENT_DEMUXER 0
++#define CONFIG_IPMOVIE_DEMUXER 0
++#define CONFIG_IRCAM_DEMUXER 0
++#define CONFIG_ISS_DEMUXER 0
++#define CONFIG_IV8_DEMUXER 0
++#define CONFIG_IVF_DEMUXER 0
++#define CONFIG_IVR_DEMUXER 0
++#define CONFIG_JACOSUB_DEMUXER 0
++#define CONFIG_JV_DEMUXER 0
++#define CONFIG_KUX_DEMUXER 0
++#define CONFIG_KVAG_DEMUXER 0
++#define CONFIG_LMLM4_DEMUXER 0
++#define CONFIG_LOAS_DEMUXER 0
++#define CONFIG_LRC_DEMUXER 0
++#define CONFIG_LVF_DEMUXER 0
++#define CONFIG_LXF_DEMUXER 0
++#define CONFIG_M4V_DEMUXER 0
++#define CONFIG_MATROSKA_DEMUXER 1
++#define CONFIG_MGSTS_DEMUXER 0
++#define CONFIG_MICRODVD_DEMUXER 0
++#define CONFIG_MJPEG_DEMUXER 0
++#define CONFIG_MJPEG_2000_DEMUXER 0
++#define CONFIG_MLP_DEMUXER 0
++#define CONFIG_MLV_DEMUXER 0
++#define CONFIG_MM_DEMUXER 0
++#define CONFIG_MMF_DEMUXER 0
++#define CONFIG_MOV_DEMUXER 1
++#define CONFIG_MP3_DEMUXER 1
++#define CONFIG_MPC_DEMUXER 0
++#define CONFIG_MPC8_DEMUXER 0
++#define CONFIG_MPEGPS_DEMUXER 0
++#define CONFIG_MPEGTS_DEMUXER 0
++#define CONFIG_MPEGTSRAW_DEMUXER 0
++#define CONFIG_MPEGVIDEO_DEMUXER 0
++#define CONFIG_MPJPEG_DEMUXER 0
++#define CONFIG_MPL2_DEMUXER 0
++#define CONFIG_MPSUB_DEMUXER 0
++#define CONFIG_MSF_DEMUXER 0
++#define CONFIG_MSNWC_TCP_DEMUXER 0
++#define CONFIG_MTAF_DEMUXER 0
++#define CONFIG_MTV_DEMUXER 0
++#define CONFIG_MUSX_DEMUXER 0
++#define CONFIG_MV_DEMUXER 0
++#define CONFIG_MVI_DEMUXER 0
++#define CONFIG_MXF_DEMUXER 0
++#define CONFIG_MXG_DEMUXER 0
++#define CONFIG_NC_DEMUXER 0
++#define CONFIG_NISTSPHERE_DEMUXER 0
++#define CONFIG_NSP_DEMUXER 0
++#define CONFIG_NSV_DEMUXER 0
++#define CONFIG_NUT_DEMUXER 0
++#define CONFIG_NUV_DEMUXER 0
++#define CONFIG_OGG_DEMUXER 1
++#define CONFIG_OMA_DEMUXER 0
++#define CONFIG_PAF_DEMUXER 0
++#define CONFIG_PCM_ALAW_DEMUXER 0
++#define CONFIG_PCM_MULAW_DEMUXER 0
++#define CONFIG_PCM_VIDC_DEMUXER 0
++#define CONFIG_PCM_F64BE_DEMUXER 0
++#define CONFIG_PCM_F64LE_DEMUXER 0
++#define CONFIG_PCM_F32BE_DEMUXER 0
++#define CONFIG_PCM_F32LE_DEMUXER 0
++#define CONFIG_PCM_S32BE_DEMUXER 0
++#define CONFIG_PCM_S32LE_DEMUXER 0
++#define CONFIG_PCM_S24BE_DEMUXER 0
++#define CONFIG_PCM_S24LE_DEMUXER 0
++#define CONFIG_PCM_S16BE_DEMUXER 0
++#define CONFIG_PCM_S16LE_DEMUXER 0
++#define CONFIG_PCM_S8_DEMUXER 0
++#define CONFIG_PCM_U32BE_DEMUXER 0
++#define CONFIG_PCM_U32LE_DEMUXER 0
++#define CONFIG_PCM_U24BE_DEMUXER 0
++#define CONFIG_PCM_U24LE_DEMUXER 0
++#define CONFIG_PCM_U16BE_DEMUXER 0
++#define CONFIG_PCM_U16LE_DEMUXER 0
++#define CONFIG_PCM_U8_DEMUXER 0
++#define CONFIG_PJS_DEMUXER 0
++#define CONFIG_PMP_DEMUXER 0
++#define CONFIG_PVA_DEMUXER 0
++#define CONFIG_PVF_DEMUXER 0
++#define CONFIG_QCP_DEMUXER 0
++#define CONFIG_R3D_DEMUXER 0
++#define CONFIG_RAWVIDEO_DEMUXER 0
++#define CONFIG_REALTEXT_DEMUXER 0
++#define CONFIG_REDSPARK_DEMUXER 0
++#define CONFIG_RL2_DEMUXER 0
++#define CONFIG_RM_DEMUXER 0
++#define CONFIG_ROQ_DEMUXER 0
++#define CONFIG_RPL_DEMUXER 0
++#define CONFIG_RSD_DEMUXER 0
++#define CONFIG_RSO_DEMUXER 0
++#define CONFIG_RTP_DEMUXER 0
++#define CONFIG_RTSP_DEMUXER 0
++#define CONFIG_S337M_DEMUXER 0
++#define CONFIG_SAMI_DEMUXER 0
++#define CONFIG_SAP_DEMUXER 0
++#define CONFIG_SBC_DEMUXER 0
++#define CONFIG_SBG_DEMUXER 0
++#define CONFIG_SCC_DEMUXER 0
++#define CONFIG_SDP_DEMUXER 0
++#define CONFIG_SDR2_DEMUXER 0
++#define CONFIG_SDS_DEMUXER 0
++#define CONFIG_SDX_DEMUXER 0
++#define CONFIG_SEGAFILM_DEMUXER 0
++#define CONFIG_SER_DEMUXER 0
++#define CONFIG_SHORTEN_DEMUXER 0
++#define CONFIG_SIFF_DEMUXER 0
++#define CONFIG_SLN_DEMUXER 0
++#define CONFIG_SMACKER_DEMUXER 0
++#define CONFIG_SMJPEG_DEMUXER 0
++#define CONFIG_SMUSH_DEMUXER 0
++#define CONFIG_SOL_DEMUXER 0
++#define CONFIG_SOX_DEMUXER 0
++#define CONFIG_SPDIF_DEMUXER 0
++#define CONFIG_SRT_DEMUXER 0
++#define CONFIG_STR_DEMUXER 0
++#define CONFIG_STL_DEMUXER 0
++#define CONFIG_SUBVIEWER1_DEMUXER 0
++#define CONFIG_SUBVIEWER_DEMUXER 0
++#define CONFIG_SUP_DEMUXER 0
++#define CONFIG_SVAG_DEMUXER 0
++#define CONFIG_SWF_DEMUXER 0
++#define CONFIG_TAK_DEMUXER 0
++#define CONFIG_TEDCAPTIONS_DEMUXER 0
++#define CONFIG_THP_DEMUXER 0
++#define CONFIG_THREEDOSTR_DEMUXER 0
++#define CONFIG_TIERTEXSEQ_DEMUXER 0
++#define CONFIG_TMV_DEMUXER 0
++#define CONFIG_TRUEHD_DEMUXER 0
++#define CONFIG_TTA_DEMUXER 0
++#define CONFIG_TXD_DEMUXER 0
++#define CONFIG_TTY_DEMUXER 0
++#define CONFIG_TY_DEMUXER 0
++#define CONFIG_V210_DEMUXER 0
++#define CONFIG_V210X_DEMUXER 0
++#define CONFIG_VAG_DEMUXER 0
++#define CONFIG_VC1_DEMUXER 0
++#define CONFIG_VC1T_DEMUXER 0
++#define CONFIG_VIVIDAS_DEMUXER 0
++#define CONFIG_VIVO_DEMUXER 0
++#define CONFIG_VMD_DEMUXER 0
++#define CONFIG_VOBSUB_DEMUXER 0
++#define CONFIG_VOC_DEMUXER 0
++#define CONFIG_VPK_DEMUXER 0
++#define CONFIG_VPLAYER_DEMUXER 0
++#define CONFIG_VQF_DEMUXER 0
++#define CONFIG_W64_DEMUXER 0
++#define CONFIG_WAV_DEMUXER 1
++#define CONFIG_WC3_DEMUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
++#define CONFIG_WEBVTT_DEMUXER 0
++#define CONFIG_WSAUD_DEMUXER 0
++#define CONFIG_WSD_DEMUXER 0
++#define CONFIG_WSVQA_DEMUXER 0
++#define CONFIG_WTV_DEMUXER 0
++#define CONFIG_WVE_DEMUXER 0
++#define CONFIG_WV_DEMUXER 0
++#define CONFIG_XA_DEMUXER 0
++#define CONFIG_XBIN_DEMUXER 0
++#define CONFIG_XMV_DEMUXER 0
++#define CONFIG_XVAG_DEMUXER 0
++#define CONFIG_XWMA_DEMUXER 0
++#define CONFIG_YOP_DEMUXER 0
++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
++#define CONFIG_LIBGME_DEMUXER 0
++#define CONFIG_LIBMODPLUG_DEMUXER 0
++#define CONFIG_LIBOPENMPT_DEMUXER 0
++#define CONFIG_VAPOURSYNTH_DEMUXER 0
++#define CONFIG_A64_MUXER 0
++#define CONFIG_AC3_MUXER 0
++#define CONFIG_ADTS_MUXER 0
++#define CONFIG_ADX_MUXER 0
++#define CONFIG_AIFF_MUXER 0
++#define CONFIG_AMR_MUXER 0
++#define CONFIG_APNG_MUXER 0
++#define CONFIG_APTX_MUXER 0
++#define CONFIG_APTX_HD_MUXER 0
++#define CONFIG_ASF_MUXER 0
++#define CONFIG_ASS_MUXER 0
++#define CONFIG_AST_MUXER 0
++#define CONFIG_ASF_STREAM_MUXER 0
++#define CONFIG_AU_MUXER 0
++#define CONFIG_AVI_MUXER 0
++#define CONFIG_AVM2_MUXER 0
++#define CONFIG_AVS2_MUXER 0
++#define CONFIG_BIT_MUXER 0
++#define CONFIG_CAF_MUXER 0
++#define CONFIG_CAVSVIDEO_MUXER 0
++#define CONFIG_CODEC2_MUXER 0
++#define CONFIG_CODEC2RAW_MUXER 0
++#define CONFIG_CRC_MUXER 0
++#define CONFIG_DASH_MUXER 0
++#define CONFIG_DATA_MUXER 0
++#define CONFIG_DAUD_MUXER 0
++#define CONFIG_DIRAC_MUXER 0
++#define CONFIG_DNXHD_MUXER 0
++#define CONFIG_DTS_MUXER 0
++#define CONFIG_DV_MUXER 0
++#define CONFIG_EAC3_MUXER 0
++#define CONFIG_F4V_MUXER 0
++#define CONFIG_FFMETADATA_MUXER 0
++#define CONFIG_FIFO_MUXER 0
++#define CONFIG_FIFO_TEST_MUXER 0
++#define CONFIG_FILMSTRIP_MUXER 0
++#define CONFIG_FITS_MUXER 0
++#define CONFIG_FLAC_MUXER 0
++#define CONFIG_FLV_MUXER 0
++#define CONFIG_FRAMECRC_MUXER 0
++#define CONFIG_FRAMEHASH_MUXER 0
++#define CONFIG_FRAMEMD5_MUXER 0
++#define CONFIG_G722_MUXER 0
++#define CONFIG_G723_1_MUXER 0
++#define CONFIG_G726_MUXER 0
++#define CONFIG_G726LE_MUXER 0
++#define CONFIG_GIF_MUXER 0
++#define CONFIG_GSM_MUXER 0
++#define CONFIG_GXF_MUXER 0
++#define CONFIG_H261_MUXER 0
++#define CONFIG_H263_MUXER 0
++#define CONFIG_H264_MUXER 0
++#define CONFIG_HASH_MUXER 0
++#define CONFIG_HDS_MUXER 0
++#define CONFIG_HEVC_MUXER 0
++#define CONFIG_HLS_MUXER 0
++#define CONFIG_ICO_MUXER 0
++#define CONFIG_ILBC_MUXER 0
++#define CONFIG_IMAGE2_MUXER 0
++#define CONFIG_IMAGE2PIPE_MUXER 0
++#define CONFIG_IPOD_MUXER 0
++#define CONFIG_IRCAM_MUXER 0
++#define CONFIG_ISMV_MUXER 0
++#define CONFIG_IVF_MUXER 0
++#define CONFIG_JACOSUB_MUXER 0
++#define CONFIG_LATM_MUXER 0
++#define CONFIG_LRC_MUXER 0
++#define CONFIG_M4V_MUXER 0
++#define CONFIG_MD5_MUXER 0
++#define CONFIG_MATROSKA_MUXER 0
++#define CONFIG_MATROSKA_AUDIO_MUXER 0
++#define CONFIG_MICRODVD_MUXER 0
++#define CONFIG_MJPEG_MUXER 0
++#define CONFIG_MLP_MUXER 0
++#define CONFIG_MMF_MUXER 0
++#define CONFIG_MOV_MUXER 0
++#define CONFIG_MP2_MUXER 0
++#define CONFIG_MP3_MUXER 0
++#define CONFIG_MP4_MUXER 0
++#define CONFIG_MPEG1SYSTEM_MUXER 0
++#define CONFIG_MPEG1VCD_MUXER 0
++#define CONFIG_MPEG1VIDEO_MUXER 0
++#define CONFIG_MPEG2DVD_MUXER 0
++#define CONFIG_MPEG2SVCD_MUXER 0
++#define CONFIG_MPEG2VIDEO_MUXER 0
++#define CONFIG_MPEG2VOB_MUXER 0
++#define CONFIG_MPEGTS_MUXER 0
++#define CONFIG_MPJPEG_MUXER 0
++#define CONFIG_MXF_MUXER 0
++#define CONFIG_MXF_D10_MUXER 0
++#define CONFIG_MXF_OPATOM_MUXER 0
++#define CONFIG_NULL_MUXER 0
++#define CONFIG_NUT_MUXER 0
++#define CONFIG_OGA_MUXER 0
++#define CONFIG_OGG_MUXER 0
++#define CONFIG_OGV_MUXER 0
++#define CONFIG_OMA_MUXER 0
++#define CONFIG_OPUS_MUXER 0
++#define CONFIG_PCM_ALAW_MUXER 0
++#define CONFIG_PCM_MULAW_MUXER 0
++#define CONFIG_PCM_VIDC_MUXER 0
++#define CONFIG_PCM_F64BE_MUXER 0
++#define CONFIG_PCM_F64LE_MUXER 0
++#define CONFIG_PCM_F32BE_MUXER 0
++#define CONFIG_PCM_F32LE_MUXER 0
++#define CONFIG_PCM_S32BE_MUXER 0
++#define CONFIG_PCM_S32LE_MUXER 0
++#define CONFIG_PCM_S24BE_MUXER 0
++#define CONFIG_PCM_S24LE_MUXER 0
++#define CONFIG_PCM_S16BE_MUXER 0
++#define CONFIG_PCM_S16LE_MUXER 0
++#define CONFIG_PCM_S8_MUXER 0
++#define CONFIG_PCM_U32BE_MUXER 0
++#define CONFIG_PCM_U32LE_MUXER 0
++#define CONFIG_PCM_U24BE_MUXER 0
++#define CONFIG_PCM_U24LE_MUXER 0
++#define CONFIG_PCM_U16BE_MUXER 0
++#define CONFIG_PCM_U16LE_MUXER 0
++#define CONFIG_PCM_U8_MUXER 0
++#define CONFIG_PSP_MUXER 0
++#define CONFIG_RAWVIDEO_MUXER 0
++#define CONFIG_RM_MUXER 0
++#define CONFIG_ROQ_MUXER 0
++#define CONFIG_RSO_MUXER 0
++#define CONFIG_RTP_MUXER 0
++#define CONFIG_RTP_MPEGTS_MUXER 0
++#define CONFIG_RTSP_MUXER 0
++#define CONFIG_SAP_MUXER 0
++#define CONFIG_SBC_MUXER 0
++#define CONFIG_SCC_MUXER 0
++#define CONFIG_SEGAFILM_MUXER 0
++#define CONFIG_SEGMENT_MUXER 0
++#define CONFIG_STREAM_SEGMENT_MUXER 0
++#define CONFIG_SINGLEJPEG_MUXER 0
++#define CONFIG_SMJPEG_MUXER 0
++#define CONFIG_SMOOTHSTREAMING_MUXER 0
++#define CONFIG_SOX_MUXER 0
++#define CONFIG_SPX_MUXER 0
++#define CONFIG_SPDIF_MUXER 0
++#define CONFIG_SRT_MUXER 0
++#define CONFIG_STREAMHASH_MUXER 0
++#define CONFIG_SUP_MUXER 0
++#define CONFIG_SWF_MUXER 0
++#define CONFIG_TEE_MUXER 0
++#define CONFIG_TG2_MUXER 0
++#define CONFIG_TGP_MUXER 0
++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0
++#define CONFIG_TRUEHD_MUXER 0
++#define CONFIG_TTA_MUXER 0
++#define CONFIG_UNCODEDFRAMECRC_MUXER 0
++#define CONFIG_VC1_MUXER 0
++#define CONFIG_VC1T_MUXER 0
++#define CONFIG_VOC_MUXER 0
++#define CONFIG_W64_MUXER 0
++#define CONFIG_WAV_MUXER 0
++#define CONFIG_WEBM_MUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0
++#define CONFIG_WEBM_CHUNK_MUXER 0
++#define CONFIG_WEBP_MUXER 0
++#define CONFIG_WEBVTT_MUXER 0
++#define CONFIG_WTV_MUXER 0
++#define CONFIG_WV_MUXER 0
++#define CONFIG_YUV4MPEGPIPE_MUXER 0
++#define CONFIG_CHROMAPRINT_MUXER 0
++#define CONFIG_ASYNC_PROTOCOL 0
++#define CONFIG_BLURAY_PROTOCOL 0
++#define CONFIG_CACHE_PROTOCOL 0
++#define CONFIG_CONCAT_PROTOCOL 0
++#define CONFIG_CRYPTO_PROTOCOL 0
++#define CONFIG_DATA_PROTOCOL 0
++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0
++#define CONFIG_FFRTMPHTTP_PROTOCOL 0
++#define CONFIG_FILE_PROTOCOL 0
++#define CONFIG_FTP_PROTOCOL 0
++#define CONFIG_GOPHER_PROTOCOL 0
++#define CONFIG_HLS_PROTOCOL 0
++#define CONFIG_HTTP_PROTOCOL 0
++#define CONFIG_HTTPPROXY_PROTOCOL 0
++#define CONFIG_HTTPS_PROTOCOL 0
++#define CONFIG_ICECAST_PROTOCOL 0
++#define CONFIG_MMSH_PROTOCOL 0
++#define CONFIG_MMST_PROTOCOL 0
++#define CONFIG_MD5_PROTOCOL 0
++#define CONFIG_PIPE_PROTOCOL 0
++#define CONFIG_PROMPEG_PROTOCOL 0
++#define CONFIG_RTMP_PROTOCOL 0
++#define CONFIG_RTMPE_PROTOCOL 0
++#define CONFIG_RTMPS_PROTOCOL 0
++#define CONFIG_RTMPT_PROTOCOL 0
++#define CONFIG_RTMPTE_PROTOCOL 0
++#define CONFIG_RTMPTS_PROTOCOL 0
++#define CONFIG_RTP_PROTOCOL 0
++#define CONFIG_SCTP_PROTOCOL 0
++#define CONFIG_SRTP_PROTOCOL 0
++#define CONFIG_SUBFILE_PROTOCOL 0
++#define CONFIG_TEE_PROTOCOL 0
++#define CONFIG_TCP_PROTOCOL 0
++#define CONFIG_TLS_PROTOCOL 0
++#define CONFIG_UDP_PROTOCOL 0
++#define CONFIG_UDPLITE_PROTOCOL 0
++#define CONFIG_UNIX_PROTOCOL 0
++#define CONFIG_LIBAMQP_PROTOCOL 0
++#define CONFIG_LIBRTMP_PROTOCOL 0
++#define CONFIG_LIBRTMPE_PROTOCOL 0
++#define CONFIG_LIBRTMPS_PROTOCOL 0
++#define CONFIG_LIBRTMPT_PROTOCOL 0
++#define CONFIG_LIBRTMPTE_PROTOCOL 0
++#define CONFIG_LIBSRT_PROTOCOL 0
++#define CONFIG_LIBSSH_PROTOCOL 0
++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0
++#define CONFIG_LIBZMQ_PROTOCOL 0
++#endif /* FFMPEG_CONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c
+new file mode 100644
+index 00000000000..d31ece942a7
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c
+@@ -0,0 +1,3 @@
++static const AVBitStreamFilter * const bitstream_filters[] = {
++    &ff_null_bsf,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c
+new file mode 100644
+index 00000000000..8f4b18388c3
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c
+@@ -0,0 +1,25 @@
++static const AVCodec * const codec_list[] = {
++    &ff_h263_decoder,
++    &ff_h264_decoder,
++    &ff_mpeg4_decoder,
++    &ff_theora_decoder,
++    &ff_vp3_decoder,
++    &ff_vp8_decoder,
++    &ff_aac_decoder,
++    &ff_amrnb_decoder,
++    &ff_amrwb_decoder,
++    &ff_flac_decoder,
++    &ff_gsm_ms_decoder,
++    &ff_mp3_decoder,
++    &ff_vorbis_decoder,
++    &ff_pcm_alaw_decoder,
++    &ff_pcm_f32le_decoder,
++    &ff_pcm_mulaw_decoder,
++    &ff_pcm_s16be_decoder,
++    &ff_pcm_s16le_decoder,
++    &ff_pcm_s24be_decoder,
++    &ff_pcm_s24le_decoder,
++    &ff_pcm_s32le_decoder,
++    &ff_pcm_u8_decoder,
++    &ff_libopus_decoder,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c
+new file mode 100644
+index 00000000000..48dcf4122e6
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c
+@@ -0,0 +1,14 @@
++static const AVCodecParser * const parser_list[] = {
++    &ff_aac_parser,
++    &ff_flac_parser,
++    &ff_gsm_parser,
++    &ff_h263_parser,
++    &ff_h264_parser,
++    &ff_mpeg4video_parser,
++    &ff_mpegaudio_parser,
++    &ff_opus_parser,
++    &ff_vorbis_parser,
++    &ff_vp3_parser,
++    &ff_vp8_parser,
++    &ff_vp9_parser,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c
+new file mode 100644
+index 00000000000..0c96cf1ff7e
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c
+@@ -0,0 +1,11 @@
++static const AVInputFormat * const demuxer_list[] = {
++    &ff_aac_demuxer,
++    &ff_amr_demuxer,
++    &ff_avi_demuxer,
++    &ff_flac_demuxer,
++    &ff_matroska_demuxer,
++    &ff_mov_demuxer,
++    &ff_mp3_demuxer,
++    &ff_ogg_demuxer,
++    &ff_wav_demuxer,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c
+new file mode 100644
+index 00000000000..f36d9499c6f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c
+@@ -0,0 +1,2 @@
++static const AVOutputFormat * const muxer_list[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c
+new file mode 100644
+index 00000000000..247e1e4c3a2
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c
+@@ -0,0 +1,2 @@
++static const URLProtocol * const url_protocols[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h
+new file mode 100644
+index 00000000000..8558b35027f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h
+@@ -0,0 +1,6 @@
++/* Generated by ffmpeg configure */
++#ifndef AVUTIL_AVCONFIG_H
++#define AVUTIL_AVCONFIG_H
++#define AV_HAVE_BIGENDIAN 0
++#define AV_HAVE_FAST_UNALIGNED 0
++#endif /* AVUTIL_AVCONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h
+new file mode 100644
+index 00000000000..31e5b5036dc
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h
+@@ -0,0 +1,5 @@
++/* Automatically generated by version.sh, do not manually edit! */
++#ifndef AVUTIL_FFVERSION_H
++#define AVUTIL_FFVERSION_H
++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25"
++#endif /* AVUTIL_FFVERSION_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h
+new file mode 100644
+index 00000000000..87f5fcb366d
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h
+@@ -0,0 +1,2589 @@
++/* Automatically generated by configure - do not modify! */
++#ifndef FFMPEG_CONFIG_H
++#define FFMPEG_CONFIG_H
++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-error-resilience --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang" -- elide long configuration string from binary */
++#define FFMPEG_LICENSE "LGPL version 2.1 or later"
++#define CONFIG_THIS_YEAR 2020
++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
++#define AVCONV_DATADIR "/usr/local/share/ffmpeg"
++#define CC_IDENT "clang version 8.0.1"
++#define av_restrict restrict
++#define EXTERN_PREFIX ""
++#define EXTERN_ASM 
++#define BUILDSUF ""
++#define SLIBSUF ".so"
++#define HAVE_MMX2 HAVE_MMXEXT
++#define SWS_MAX_FILTER_SIZE 256
++#define ARCH_AARCH64 0
++#define ARCH_ALPHA 0
++#define ARCH_ARM 0
++#define ARCH_AVR32 0
++#define ARCH_AVR32_AP 0
++#define ARCH_AVR32_UC 0
++#define ARCH_BFIN 0
++#define ARCH_IA64 0
++#define ARCH_M68K 0
++#define ARCH_MIPS 0
++#define ARCH_MIPS64 0
++#define ARCH_PARISC 0
++#define ARCH_PPC 0
++#define ARCH_PPC64 0
++#define ARCH_S390 0
++#define ARCH_SH4 0
++#define ARCH_SPARC 0
++#define ARCH_SPARC64 0
++#define ARCH_TILEGX 0
++#define ARCH_TILEPRO 0
++#define ARCH_TOMI 0
++#define ARCH_X86 0
++#define ARCH_X86_32 0
++#define ARCH_X86_64 0
++#define HAVE_ARMV5TE 0
++#define HAVE_ARMV6 0
++#define HAVE_ARMV6T2 0
++#define HAVE_ARMV8 0
++#define HAVE_NEON 0
++#define HAVE_VFP 0
++#define HAVE_VFPV3 0
++#define HAVE_SETEND 0
++#define HAVE_ALTIVEC 0
++#define HAVE_DCBZL 0
++#define HAVE_LDBRX 0
++#define HAVE_POWER8 0
++#define HAVE_PPC4XX 0
++#define HAVE_VSX 0
++#define HAVE_AESNI 0
++#define HAVE_AMD3DNOW 0
++#define HAVE_AMD3DNOWEXT 0
++#define HAVE_AVX 0
++#define HAVE_AVX2 0
++#define HAVE_AVX512 0
++#define HAVE_FMA3 0
++#define HAVE_FMA4 0
++#define HAVE_MMX 0
++#define HAVE_MMXEXT 0
++#define HAVE_SSE 0
++#define HAVE_SSE2 0
++#define HAVE_SSE3 0
++#define HAVE_SSE4 0
++#define HAVE_SSE42 0
++#define HAVE_SSSE3 0
++#define HAVE_XOP 0
++#define HAVE_CPUNOP 0
++#define HAVE_I686 0
++#define HAVE_MIPSFPU 0
++#define HAVE_MIPS32R2 0
++#define HAVE_MIPS32R5 0
++#define HAVE_MIPS64R2 0
++#define HAVE_MIPS32R6 0
++#define HAVE_MIPS64R6 0
++#define HAVE_MIPSDSP 0
++#define HAVE_MIPSDSPR2 0
++#define HAVE_MSA 0
++#define HAVE_MSA2 0
++#define HAVE_LOONGSON2 0
++#define HAVE_LOONGSON3 0
++#define HAVE_MMI 0
++#define HAVE_ARMV5TE_EXTERNAL 0
++#define HAVE_ARMV6_EXTERNAL 0
++#define HAVE_ARMV6T2_EXTERNAL 0
++#define HAVE_ARMV8_EXTERNAL 0
++#define HAVE_NEON_EXTERNAL 0
++#define HAVE_VFP_EXTERNAL 0
++#define HAVE_VFPV3_EXTERNAL 0
++#define HAVE_SETEND_EXTERNAL 0
++#define HAVE_ALTIVEC_EXTERNAL 0
++#define HAVE_DCBZL_EXTERNAL 0
++#define HAVE_LDBRX_EXTERNAL 0
++#define HAVE_POWER8_EXTERNAL 0
++#define HAVE_PPC4XX_EXTERNAL 0
++#define HAVE_VSX_EXTERNAL 0
++#define HAVE_AESNI_EXTERNAL 0
++#define HAVE_AMD3DNOW_EXTERNAL 0
++#define HAVE_AMD3DNOWEXT_EXTERNAL 0
++#define HAVE_AVX_EXTERNAL 0
++#define HAVE_AVX2_EXTERNAL 0
++#define HAVE_AVX512_EXTERNAL 0
++#define HAVE_FMA3_EXTERNAL 0
++#define HAVE_FMA4_EXTERNAL 0
++#define HAVE_MMX_EXTERNAL 0
++#define HAVE_MMXEXT_EXTERNAL 0
++#define HAVE_SSE_EXTERNAL 0
++#define HAVE_SSE2_EXTERNAL 0
++#define HAVE_SSE3_EXTERNAL 0
++#define HAVE_SSE4_EXTERNAL 0
++#define HAVE_SSE42_EXTERNAL 0
++#define HAVE_SSSE3_EXTERNAL 0
++#define HAVE_XOP_EXTERNAL 0
++#define HAVE_CPUNOP_EXTERNAL 0
++#define HAVE_I686_EXTERNAL 0
++#define HAVE_MIPSFPU_EXTERNAL 0
++#define HAVE_MIPS32R2_EXTERNAL 0
++#define HAVE_MIPS32R5_EXTERNAL 0
++#define HAVE_MIPS64R2_EXTERNAL 0
++#define HAVE_MIPS32R6_EXTERNAL 0
++#define HAVE_MIPS64R6_EXTERNAL 0
++#define HAVE_MIPSDSP_EXTERNAL 0
++#define HAVE_MIPSDSPR2_EXTERNAL 0
++#define HAVE_MSA_EXTERNAL 0
++#define HAVE_MSA2_EXTERNAL 0
++#define HAVE_LOONGSON2_EXTERNAL 0
++#define HAVE_LOONGSON3_EXTERNAL 0
++#define HAVE_MMI_EXTERNAL 0
++#define HAVE_ARMV5TE_INLINE 0
++#define HAVE_ARMV6_INLINE 0
++#define HAVE_ARMV6T2_INLINE 0
++#define HAVE_ARMV8_INLINE 0
++#define HAVE_NEON_INLINE 0
++#define HAVE_VFP_INLINE 0
++#define HAVE_VFPV3_INLINE 0
++#define HAVE_SETEND_INLINE 0
++#define HAVE_ALTIVEC_INLINE 0
++#define HAVE_DCBZL_INLINE 0
++#define HAVE_LDBRX_INLINE 0
++#define HAVE_POWER8_INLINE 0
++#define HAVE_PPC4XX_INLINE 0
++#define HAVE_VSX_INLINE 0
++#define HAVE_AESNI_INLINE 0
++#define HAVE_AMD3DNOW_INLINE 0
++#define HAVE_AMD3DNOWEXT_INLINE 0
++#define HAVE_AVX_INLINE 0
++#define HAVE_AVX2_INLINE 0
++#define HAVE_AVX512_INLINE 0
++#define HAVE_FMA3_INLINE 0
++#define HAVE_FMA4_INLINE 0
++#define HAVE_MMX_INLINE 0
++#define HAVE_MMXEXT_INLINE 0
++#define HAVE_SSE_INLINE 0
++#define HAVE_SSE2_INLINE 0
++#define HAVE_SSE3_INLINE 0
++#define HAVE_SSE4_INLINE 0
++#define HAVE_SSE42_INLINE 0
++#define HAVE_SSSE3_INLINE 0
++#define HAVE_XOP_INLINE 0
++#define HAVE_CPUNOP_INLINE 0
++#define HAVE_I686_INLINE 0
++#define HAVE_MIPSFPU_INLINE 0
++#define HAVE_MIPS32R2_INLINE 0
++#define HAVE_MIPS32R5_INLINE 0
++#define HAVE_MIPS64R2_INLINE 0
++#define HAVE_MIPS32R6_INLINE 0
++#define HAVE_MIPS64R6_INLINE 0
++#define HAVE_MIPSDSP_INLINE 0
++#define HAVE_MIPSDSPR2_INLINE 0
++#define HAVE_MSA_INLINE 0
++#define HAVE_MSA2_INLINE 0
++#define HAVE_LOONGSON2_INLINE 0
++#define HAVE_LOONGSON3_INLINE 0
++#define HAVE_MMI_INLINE 0
++#define HAVE_ALIGNED_STACK 0
++#define HAVE_FAST_64BIT 0
++#define HAVE_FAST_CLZ 0
++#define HAVE_FAST_CMOV 0
++#define HAVE_LOCAL_ALIGNED 0
++#define HAVE_SIMD_ALIGN_16 0
++#define HAVE_SIMD_ALIGN_32 0
++#define HAVE_SIMD_ALIGN_64 0
++#define HAVE_ATOMIC_CAS_PTR 0
++#define HAVE_MACHINE_RW_BARRIER 0
++#define HAVE_MEMORYBARRIER 0
++#define HAVE_MM_EMPTY 0
++#define HAVE_RDTSC 0
++#define HAVE_SEM_TIMEDWAIT 1
++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
++#define HAVE_CABS 0
++#define HAVE_CEXP 0
++#define HAVE_INLINE_ASM 1
++#define HAVE_SYMVER 0
++#define HAVE_X86ASM 0
++#define HAVE_BIGENDIAN 0
++#define HAVE_FAST_UNALIGNED 0
++#define HAVE_ARPA_INET_H 0
++#define HAVE_ASM_TYPES_H 1
++#define HAVE_CDIO_PARANOIA_H 0
++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0
++#define HAVE_CUDA_H 0
++#define HAVE_DISPATCH_DISPATCH_H 0
++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
++#define HAVE_DEV_IC_BT8XX_H 0
++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0
++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
++#define HAVE_DIRECT_H 0
++#define HAVE_DIRENT_H 1
++#define HAVE_DXGIDEBUG_H 0
++#define HAVE_DXVA_H 0
++#define HAVE_ES2_GL_H 0
++#define HAVE_GSM_H 0
++#define HAVE_IO_H 0
++#define HAVE_LINUX_PERF_EVENT_H 1
++#define HAVE_MACHINE_IOCTL_BT848_H 0
++#define HAVE_MACHINE_IOCTL_METEOR_H 0
++#define HAVE_MALLOC_H 1
++#define HAVE_OPENCV2_CORE_CORE_C_H 0
++#define HAVE_OPENGL_GL3_H 0
++#define HAVE_POLL_H 1
++#define HAVE_SYS_PARAM_H 1
++#define HAVE_SYS_RESOURCE_H 1
++#define HAVE_SYS_SELECT_H 1
++#define HAVE_SYS_SOUNDCARD_H 1
++#define HAVE_SYS_TIME_H 1
++#define HAVE_SYS_UN_H 1
++#define HAVE_SYS_VIDEOIO_H 0
++#define HAVE_TERMIOS_H 1
++#define HAVE_UDPLITE_H 0
++#define HAVE_UNISTD_H 1
++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */
++#define HAVE_WINDOWS_H 0
++#define HAVE_WINSOCK2_H 0
++#define HAVE_INTRINSICS_NEON 0
++#define HAVE_ATANF 1
++#define HAVE_ATAN2F 1
++#define HAVE_CBRT 1
++#define HAVE_CBRTF 1
++#define HAVE_COPYSIGN 1
++#define HAVE_COSF 1
++#define HAVE_ERF 1
++#define HAVE_EXP2 1
++#define HAVE_EXP2F 1
++#define HAVE_EXPF 1
++#define HAVE_HYPOT 1
++#define HAVE_ISFINITE 1
++#define HAVE_ISINF 1
++#define HAVE_ISNAN 1
++#define HAVE_LDEXPF 1
++#define HAVE_LLRINT 1
++#define HAVE_LLRINTF 1
++#define HAVE_LOG2 1
++#define HAVE_LOG2F 1
++#define HAVE_LOG10F 1
++#define HAVE_LRINT 1
++#define HAVE_LRINTF 1
++#define HAVE_POWF 1
++#define HAVE_RINT 1
++#define HAVE_ROUND 1
++#define HAVE_ROUNDF 1
++#define HAVE_SINF 1
++#define HAVE_TRUNC 1
++#define HAVE_TRUNCF 1
++#define HAVE_DOS_PATHS 0
++#define HAVE_LIBC_MSVCRT 0
++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
++#define HAVE_SECTION_DATA_REL_RO 1
++#define HAVE_THREADS 1
++#define HAVE_UWP 0
++#define HAVE_WINRT 0
++#define HAVE_ACCESS 1
++#define HAVE_ALIGNED_MALLOC 0
++#define HAVE_ARC4RANDOM 0
++#define HAVE_CLOCK_GETTIME 1
++#define HAVE_CLOSESOCKET 0
++#define HAVE_COMMANDLINETOARGVW 0
++#define HAVE_FCNTL 1
++#define HAVE_GETADDRINFO 0
++#define HAVE_GETHRTIME 0
++#define HAVE_GETOPT 1
++#define HAVE_GETMODULEHANDLE 0
++#define HAVE_GETPROCESSAFFINITYMASK 0
++#define HAVE_GETPROCESSMEMORYINFO 0
++#define HAVE_GETPROCESSTIMES 0
++#define HAVE_GETRUSAGE 1
++#define HAVE_GETSTDHANDLE 0
++#define HAVE_GETSYSTEMTIMEASFILETIME 0
++#define HAVE_GETTIMEOFDAY 1
++#define HAVE_GLOB 1
++#define HAVE_GLXGETPROCADDRESS 0
++#define HAVE_GMTIME_R 1
++#define HAVE_INET_ATON 0
++#define HAVE_ISATTY 1
++#define HAVE_KBHIT 0
++#define HAVE_LOCALTIME_R 1
++#define HAVE_LSTAT 1
++#define HAVE_LZO1X_999_COMPRESS 0
++#define HAVE_MACH_ABSOLUTE_TIME 0
++#define HAVE_MAPVIEWOFFILE 0
++#define HAVE_MEMALIGN 1
++#define HAVE_MKSTEMP 1
++#define HAVE_MMAP 1
++#define HAVE_MPROTECT 1
++#define HAVE_NANOSLEEP 1
++#define HAVE_PEEKNAMEDPIPE 0
++#define HAVE_POSIX_MEMALIGN 1
++#define HAVE_PTHREAD_CANCEL 1
++#define HAVE_SCHED_GETAFFINITY 1
++#define HAVE_SECITEMIMPORT 0
++#define HAVE_SETCONSOLETEXTATTRIBUTE 0
++#define HAVE_SETCONSOLECTRLHANDLER 0
++#define HAVE_SETDLLDIRECTORY 0
++#define HAVE_SETMODE 0
++#define HAVE_SETRLIMIT 1
++#define HAVE_SLEEP 0
++#define HAVE_STRERROR_R 1
++#define HAVE_SYSCONF 1
++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */
++#define HAVE_USLEEP 1
++#define HAVE_UTGETOSTYPEFROMSTRING 0
++#define HAVE_VIRTUALALLOC 0
++#define HAVE_WGLGETPROCADDRESS 0
++#define HAVE_BCRYPT 0
++#define HAVE_VAAPI_DRM 0
++#define HAVE_VAAPI_X11 0
++#define HAVE_VDPAU_X11 0
++#define HAVE_PTHREADS 1
++#define HAVE_OS2THREADS 0
++#define HAVE_W32THREADS 0
++#define HAVE_AS_ARCH_DIRECTIVE 0
++#define HAVE_AS_DN_DIRECTIVE 0
++#define HAVE_AS_FPU_DIRECTIVE 0
++#define HAVE_AS_FUNC 0
++#define HAVE_AS_OBJECT_ARCH 0
++#define HAVE_ASM_MOD_Q 0
++#define HAVE_BLOCKS_EXTENSION 0
++#define HAVE_EBP_AVAILABLE 0
++#define HAVE_EBX_AVAILABLE 0
++#define HAVE_GNU_AS 0
++#define HAVE_GNU_WINDRES 0
++#define HAVE_IBM_ASM 0
++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
++#define HAVE_INLINE_ASM_LABELS 1
++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1
++#define HAVE_PRAGMA_DEPRECATED 1
++#define HAVE_RSYNC_CONTIMEOUT 0
++#define HAVE_SYMVER_ASM_LABEL 1
++#define HAVE_SYMVER_GNU_ASM 1
++#define HAVE_VFP_ARGS 0
++#define HAVE_XFORM_ASM 0
++#define HAVE_XMM_CLOBBERS 0
++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0
++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0
++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0
++#define HAVE_SOCKLEN_T 0
++#define HAVE_STRUCT_ADDRINFO 0
++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0
++#define HAVE_STRUCT_IP_MREQ_SOURCE 0
++#define HAVE_STRUCT_IPV6_MREQ 0
++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0
++#define HAVE_STRUCT_POLLFD 0
++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1
++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0
++#define HAVE_STRUCT_SOCKADDR_IN6 0
++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0
++#define HAVE_STRUCT_SOCKADDR_STORAGE 0
++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0
++#define HAVE_MAKEINFO 1
++#define HAVE_MAKEINFO_HTML 1
++#define HAVE_OPENCL_D3D11 0
++#define HAVE_OPENCL_DRM_ARM 0
++#define HAVE_OPENCL_DRM_BEIGNET 0
++#define HAVE_OPENCL_DXVA2 0
++#define HAVE_OPENCL_VAAPI_BEIGNET 0
++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
++#define HAVE_PERL 1
++#define HAVE_POD2MAN 1
++#define HAVE_TEXI2HTML 0
++#define CONFIG_DOC 0
++#define CONFIG_HTMLPAGES 0
++#define CONFIG_MANPAGES 0
++#define CONFIG_PODPAGES 0
++#define CONFIG_TXTPAGES 0
++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1
++#define CONFIG_AVIO_READING_EXAMPLE 1
++#define CONFIG_DECODE_AUDIO_EXAMPLE 1
++#define CONFIG_DECODE_VIDEO_EXAMPLE 1
++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1
++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1
++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1
++#define CONFIG_EXTRACT_MVS_EXAMPLE 1
++#define CONFIG_FILTER_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0
++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0
++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1
++#define CONFIG_HW_DECODE_EXAMPLE 1
++#define CONFIG_METADATA_EXAMPLE 1
++#define CONFIG_MUXING_EXAMPLE 0
++#define CONFIG_QSVDEC_EXAMPLE 0
++#define CONFIG_REMUXING_EXAMPLE 1
++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0
++#define CONFIG_SCALING_VIDEO_EXAMPLE 0
++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0
++#define CONFIG_TRANSCODING_EXAMPLE 0
++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
++#define CONFIG_AVISYNTH 0
++#define CONFIG_FREI0R 0
++#define CONFIG_LIBCDIO 0
++#define CONFIG_LIBDAVS2 0
++#define CONFIG_LIBRUBBERBAND 0
++#define CONFIG_LIBVIDSTAB 0
++#define CONFIG_LIBX264 0
++#define CONFIG_LIBX265 0
++#define CONFIG_LIBXAVS 0
++#define CONFIG_LIBXAVS2 0
++#define CONFIG_LIBXVID 0
++#define CONFIG_DECKLINK 0
++#define CONFIG_LIBFDK_AAC 0
++#define CONFIG_OPENSSL 0
++#define CONFIG_LIBTLS 0
++#define CONFIG_GMP 0
++#define CONFIG_LIBARIBB24 0
++#define CONFIG_LIBLENSFUN 0
++#define CONFIG_LIBOPENCORE_AMRNB 0
++#define CONFIG_LIBOPENCORE_AMRWB 0
++#define CONFIG_LIBVMAF 0
++#define CONFIG_LIBVO_AMRWBENC 0
++#define CONFIG_MBEDTLS 0
++#define CONFIG_RKMPP 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_CHROMAPRINT 0
++#define CONFIG_GCRYPT 0
++#define CONFIG_GNUTLS 0
++#define CONFIG_JNI 0
++#define CONFIG_LADSPA 0
++#define CONFIG_LIBAOM 0
++#define CONFIG_LIBASS 0
++#define CONFIG_LIBBLURAY 0
++#define CONFIG_LIBBS2B 0
++#define CONFIG_LIBCACA 0
++#define CONFIG_LIBCELT 0
++#define CONFIG_LIBCODEC2 0
++#define CONFIG_LIBDAV1D 0
++#define CONFIG_LIBDC1394 0
++#define CONFIG_LIBDRM 0
++#define CONFIG_LIBFLITE 0
++#define CONFIG_LIBFONTCONFIG 0
++#define CONFIG_LIBFREETYPE 0
++#define CONFIG_LIBFRIBIDI 0
++#define CONFIG_LIBGLSLANG 0
++#define CONFIG_LIBGME 0
++#define CONFIG_LIBGSM 0
++#define CONFIG_LIBIEC61883 0
++#define CONFIG_LIBILBC 0
++#define CONFIG_LIBJACK 0
++#define CONFIG_LIBKLVANC 0
++#define CONFIG_LIBKVAZAAR 0
++#define CONFIG_LIBMODPLUG 0
++#define CONFIG_LIBMP3LAME 0
++#define CONFIG_LIBMYSOFA 0
++#define CONFIG_LIBOPENCV 0
++#define CONFIG_LIBOPENH264 0
++#define CONFIG_LIBOPENJPEG 0
++#define CONFIG_LIBOPENMPT 0
++#define CONFIG_LIBOPUS 1
++#define CONFIG_LIBPULSE 0
++#define CONFIG_LIBRABBITMQ 0
++#define CONFIG_LIBRAV1E 0
++#define CONFIG_LIBRSVG 0
++#define CONFIG_LIBRTMP 0
++#define CONFIG_LIBSHINE 0
++#define CONFIG_LIBSMBCLIENT 0
++#define CONFIG_LIBSNAPPY 0
++#define CONFIG_LIBSOXR 0
++#define CONFIG_LIBSPEEX 0
++#define CONFIG_LIBSRT 0
++#define CONFIG_LIBSSH 0
++#define CONFIG_LIBTENSORFLOW 0
++#define CONFIG_LIBTESSERACT 0
++#define CONFIG_LIBTHEORA 0
++#define CONFIG_LIBTWOLAME 0
++#define CONFIG_LIBV4L2 0
++#define CONFIG_LIBVORBIS 0
++#define CONFIG_LIBVPX 0
++#define CONFIG_LIBWAVPACK 0
++#define CONFIG_LIBWEBP 0
++#define CONFIG_LIBXML2 0
++#define CONFIG_LIBZIMG 0
++#define CONFIG_LIBZMQ 0
++#define CONFIG_LIBZVBI 0
++#define CONFIG_LV2 0
++#define CONFIG_MEDIACODEC 0
++#define CONFIG_OPENAL 0
++#define CONFIG_OPENGL 0
++#define CONFIG_POCKETSPHINX 0
++#define CONFIG_VAPOURSYNTH 0
++#define CONFIG_ALSA 0
++#define CONFIG_APPKIT 0
++#define CONFIG_AVFOUNDATION 0
++#define CONFIG_BZLIB 0
++#define CONFIG_COREIMAGE 0
++#define CONFIG_ICONV 0
++#define CONFIG_LIBXCB 0
++#define CONFIG_LIBXCB_SHM 0
++#define CONFIG_LIBXCB_SHAPE 0
++#define CONFIG_LIBXCB_XFIXES 0
++#define CONFIG_LZMA 0
++#define CONFIG_SCHANNEL 0
++#define CONFIG_SDL2 0
++#define CONFIG_SECURETRANSPORT 0
++#define CONFIG_SNDIO 0
++#define CONFIG_XLIB 0
++#define CONFIG_ZLIB 0
++#define CONFIG_CUDA_NVCC 0
++#define CONFIG_CUDA_SDK 0
++#define CONFIG_LIBNPP 0
++#define CONFIG_LIBMFX 0
++#define CONFIG_MMAL 0
++#define CONFIG_OMX 0
++#define CONFIG_OPENCL 0
++#define CONFIG_VULKAN 0
++#define CONFIG_AMF 0
++#define CONFIG_AUDIOTOOLBOX 0
++#define CONFIG_CRYSTALHD 0
++#define CONFIG_CUDA 0
++#define CONFIG_CUDA_LLVM 0
++#define CONFIG_CUVID 0
++#define CONFIG_D3D11VA 0
++#define CONFIG_DXVA2 0
++#define CONFIG_FFNVCODEC 0
++#define CONFIG_NVDEC 0
++#define CONFIG_NVENC 0
++#define CONFIG_VAAPI 0
++#define CONFIG_VDPAU 0
++#define CONFIG_VIDEOTOOLBOX 0
++#define CONFIG_V4L2_M2M 0
++#define CONFIG_XVMC 0
++#define CONFIG_FTRAPV 0
++#define CONFIG_GRAY 0
++#define CONFIG_HARDCODED_TABLES 0
++#define CONFIG_OMX_RPI 0
++#define CONFIG_RUNTIME_CPUDETECT 1
++#define CONFIG_SAFE_BITSTREAM_READER 1
++#define CONFIG_SHARED 0
++#define CONFIG_SMALL 0
++#define CONFIG_STATIC 1
++#define CONFIG_SWSCALE_ALPHA 1
++#define CONFIG_GPL 0
++#define CONFIG_NONFREE 0
++#define CONFIG_VERSION3 0
++#define CONFIG_AVDEVICE 0
++#define CONFIG_AVFILTER 0
++#define CONFIG_SWSCALE 0
++#define CONFIG_POSTPROC 0
++#define CONFIG_AVFORMAT 1
++#define CONFIG_AVCODEC 1
++#define CONFIG_SWRESAMPLE 0
++#define CONFIG_AVRESAMPLE 0
++#define CONFIG_AVUTIL 1
++#define CONFIG_FFPLAY 0
++#define CONFIG_FFPROBE 0
++#define CONFIG_FFMPEG 0
++#define CONFIG_DCT 1
++#define CONFIG_DWT 0
++#define CONFIG_ERROR_RESILIENCE 0
++#define CONFIG_FAAN 0
++#define CONFIG_FAST_UNALIGNED 0
++#define CONFIG_FFT 1
++#define CONFIG_LSP 0
++#define CONFIG_LZO 0
++#define CONFIG_MDCT 1
++#define CONFIG_PIXELUTILS 0
++#define CONFIG_NETWORK 0
++#define CONFIG_RDFT 1
++#define CONFIG_AUTODETECT 0
++#define CONFIG_FONTCONFIG 0
++#define CONFIG_LARGE_TESTS 1
++#define CONFIG_LINUX_PERF 0
++#define CONFIG_MEMORY_POISONING 0
++#define CONFIG_NEON_CLOBBER_TEST 0
++#define CONFIG_OSSFUZZ 0
++#define CONFIG_PIC 1
++#define CONFIG_THUMB 0
++#define CONFIG_VALGRIND_BACKTRACE 0
++#define CONFIG_XMM_CLOBBER_TEST 0
++#define CONFIG_BSFS 1
++#define CONFIG_DECODERS 1
++#define CONFIG_ENCODERS 0
++#define CONFIG_HWACCELS 0
++#define CONFIG_PARSERS 1
++#define CONFIG_INDEVS 0
++#define CONFIG_OUTDEVS 0
++#define CONFIG_FILTERS 0
++#define CONFIG_DEMUXERS 1
++#define CONFIG_MUXERS 0
++#define CONFIG_PROTOCOLS 0
++#define CONFIG_AANDCTTABLES 0
++#define CONFIG_AC3DSP 0
++#define CONFIG_ADTS_HEADER 0
++#define CONFIG_AUDIO_FRAME_QUEUE 0
++#define CONFIG_AUDIODSP 0
++#define CONFIG_BLOCKDSP 0
++#define CONFIG_BSWAPDSP 0
++#define CONFIG_CABAC 0
++#define CONFIG_CBS 0
++#define CONFIG_CBS_AV1 0
++#define CONFIG_CBS_H264 0
++#define CONFIG_CBS_H265 0
++#define CONFIG_CBS_JPEG 0
++#define CONFIG_CBS_MPEG2 0
++#define CONFIG_CBS_VP9 0
++#define CONFIG_DIRAC_PARSE 1
++#define CONFIG_DNN 0
++#define CONFIG_DVPROFILE 0
++#define CONFIG_EXIF 0
++#define CONFIG_FAANDCT 0
++#define CONFIG_FAANIDCT 0
++#define CONFIG_FDCTDSP 0
++#define CONFIG_FLACDSP 1
++#define CONFIG_FMTCONVERT 0
++#define CONFIG_FRAME_THREAD_ENCODER 0
++#define CONFIG_G722DSP 0
++#define CONFIG_GOLOMB 1
++#define CONFIG_GPLV3 0
++#define CONFIG_H263DSP 0
++#define CONFIG_H264CHROMA 0
++#define CONFIG_H264DSP 0
++#define CONFIG_H264PARSE 0
++#define CONFIG_H264PRED 1
++#define CONFIG_H264QPEL 0
++#define CONFIG_HEVCPARSE 0
++#define CONFIG_HPELDSP 1
++#define CONFIG_HUFFMAN 0
++#define CONFIG_HUFFYUVDSP 0
++#define CONFIG_HUFFYUVENCDSP 0
++#define CONFIG_IDCTDSP 0
++#define CONFIG_IIRFILTER 0
++#define CONFIG_MDCT15 0
++#define CONFIG_INTRAX8 0
++#define CONFIG_ISO_MEDIA 1
++#define CONFIG_IVIDSP 0
++#define CONFIG_JPEGTABLES 0
++#define CONFIG_LGPLV3 0
++#define CONFIG_LIBX262 0
++#define CONFIG_LLAUDDSP 0
++#define CONFIG_LLVIDDSP 0
++#define CONFIG_LLVIDENCDSP 0
++#define CONFIG_LPC 0
++#define CONFIG_LZF 0
++#define CONFIG_ME_CMP 0
++#define CONFIG_MPEG_ER 0
++#define CONFIG_MPEGAUDIO 1
++#define CONFIG_MPEGAUDIODSP 1
++#define CONFIG_MPEGAUDIOHEADER 1
++#define CONFIG_MPEGVIDEO 0
++#define CONFIG_MPEGVIDEOENC 0
++#define CONFIG_MSS34DSP 0
++#define CONFIG_PIXBLOCKDSP 0
++#define CONFIG_QPELDSP 0
++#define CONFIG_QSV 0
++#define CONFIG_QSVDEC 0
++#define CONFIG_QSVENC 0
++#define CONFIG_QSVVPP 0
++#define CONFIG_RANGECODER 0
++#define CONFIG_RIFFDEC 1
++#define CONFIG_RIFFENC 0
++#define CONFIG_RTPDEC 0
++#define CONFIG_RTPENC_CHAIN 0
++#define CONFIG_RV34DSP 0
++#define CONFIG_SCENE_SAD 0
++#define CONFIG_SINEWIN 0
++#define CONFIG_SNAPPY 0
++#define CONFIG_SRTP 0
++#define CONFIG_STARTCODE 0
++#define CONFIG_TEXTUREDSP 0
++#define CONFIG_TEXTUREDSPENC 0
++#define CONFIG_TPELDSP 0
++#define CONFIG_VAAPI_1 0
++#define CONFIG_VAAPI_ENCODE 0
++#define CONFIG_VC1DSP 0
++#define CONFIG_VIDEODSP 1
++#define CONFIG_VP3DSP 1
++#define CONFIG_VP56DSP 0
++#define CONFIG_VP8DSP 1
++#define CONFIG_WMA_FREQS 0
++#define CONFIG_WMV2DSP 0
++#define CONFIG_AAC_ADTSTOASC_BSF 0
++#define CONFIG_AV1_FRAME_MERGE_BSF 0
++#define CONFIG_AV1_FRAME_SPLIT_BSF 0
++#define CONFIG_AV1_METADATA_BSF 0
++#define CONFIG_CHOMP_BSF 0
++#define CONFIG_DUMP_EXTRADATA_BSF 0
++#define CONFIG_DCA_CORE_BSF 0
++#define CONFIG_EAC3_CORE_BSF 0
++#define CONFIG_EXTRACT_EXTRADATA_BSF 0
++#define CONFIG_FILTER_UNITS_BSF 0
++#define CONFIG_H264_METADATA_BSF 0
++#define CONFIG_H264_MP4TOANNEXB_BSF 0
++#define CONFIG_H264_REDUNDANT_PPS_BSF 0
++#define CONFIG_HAPQA_EXTRACT_BSF 0
++#define CONFIG_HEVC_METADATA_BSF 0
++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0
++#define CONFIG_IMX_DUMP_HEADER_BSF 0
++#define CONFIG_MJPEG2JPEG_BSF 0
++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0
++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0
++#define CONFIG_MPEG2_METADATA_BSF 0
++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0
++#define CONFIG_MOV2TEXTSUB_BSF 0
++#define CONFIG_NOISE_BSF 0
++#define CONFIG_NULL_BSF 1
++#define CONFIG_PRORES_METADATA_BSF 0
++#define CONFIG_REMOVE_EXTRADATA_BSF 0
++#define CONFIG_TEXT2MOVSUB_BSF 0
++#define CONFIG_TRACE_HEADERS_BSF 0
++#define CONFIG_TRUEHD_CORE_BSF 0
++#define CONFIG_VP9_METADATA_BSF 0
++#define CONFIG_VP9_RAW_REORDER_BSF 0
++#define CONFIG_VP9_SUPERFRAME_BSF 0
++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0
++#define CONFIG_AASC_DECODER 0
++#define CONFIG_AIC_DECODER 0
++#define CONFIG_ALIAS_PIX_DECODER 0
++#define CONFIG_AGM_DECODER 0
++#define CONFIG_AMV_DECODER 0
++#define CONFIG_ANM_DECODER 0
++#define CONFIG_ANSI_DECODER 0
++#define CONFIG_APNG_DECODER 0
++#define CONFIG_ARBC_DECODER 0
++#define CONFIG_ASV1_DECODER 0
++#define CONFIG_ASV2_DECODER 0
++#define CONFIG_AURA_DECODER 0
++#define CONFIG_AURA2_DECODER 0
++#define CONFIG_AVRP_DECODER 0
++#define CONFIG_AVRN_DECODER 0
++#define CONFIG_AVS_DECODER 0
++#define CONFIG_AVUI_DECODER 0
++#define CONFIG_AYUV_DECODER 0
++#define CONFIG_BETHSOFTVID_DECODER 0
++#define CONFIG_BFI_DECODER 0
++#define CONFIG_BINK_DECODER 0
++#define CONFIG_BITPACKED_DECODER 0
++#define CONFIG_BMP_DECODER 0
++#define CONFIG_BMV_VIDEO_DECODER 0
++#define CONFIG_BRENDER_PIX_DECODER 0
++#define CONFIG_C93_DECODER 0
++#define CONFIG_CAVS_DECODER 0
++#define CONFIG_CDGRAPHICS_DECODER 0
++#define CONFIG_CDTOONS_DECODER 0
++#define CONFIG_CDXL_DECODER 0
++#define CONFIG_CFHD_DECODER 0
++#define CONFIG_CINEPAK_DECODER 0
++#define CONFIG_CLEARVIDEO_DECODER 0
++#define CONFIG_CLJR_DECODER 0
++#define CONFIG_CLLC_DECODER 0
++#define CONFIG_COMFORTNOISE_DECODER 0
++#define CONFIG_CPIA_DECODER 0
++#define CONFIG_CSCD_DECODER 0
++#define CONFIG_CYUV_DECODER 0
++#define CONFIG_DDS_DECODER 0
++#define CONFIG_DFA_DECODER 0
++#define CONFIG_DIRAC_DECODER 0
++#define CONFIG_DNXHD_DECODER 0
++#define CONFIG_DPX_DECODER 0
++#define CONFIG_DSICINVIDEO_DECODER 0
++#define CONFIG_DVAUDIO_DECODER 0
++#define CONFIG_DVVIDEO_DECODER 0
++#define CONFIG_DXA_DECODER 0
++#define CONFIG_DXTORY_DECODER 0
++#define CONFIG_DXV_DECODER 0
++#define CONFIG_EACMV_DECODER 0
++#define CONFIG_EAMAD_DECODER 0
++#define CONFIG_EATGQ_DECODER 0
++#define CONFIG_EATGV_DECODER 0
++#define CONFIG_EATQI_DECODER 0
++#define CONFIG_EIGHTBPS_DECODER 0
++#define CONFIG_EIGHTSVX_EXP_DECODER 0
++#define CONFIG_EIGHTSVX_FIB_DECODER 0
++#define CONFIG_ESCAPE124_DECODER 0
++#define CONFIG_ESCAPE130_DECODER 0
++#define CONFIG_EXR_DECODER 0
++#define CONFIG_FFV1_DECODER 0
++#define CONFIG_FFVHUFF_DECODER 0
++#define CONFIG_FIC_DECODER 0
++#define CONFIG_FITS_DECODER 0
++#define CONFIG_FLASHSV_DECODER 0
++#define CONFIG_FLASHSV2_DECODER 0
++#define CONFIG_FLIC_DECODER 0
++#define CONFIG_FLV_DECODER 0
++#define CONFIG_FMVC_DECODER 0
++#define CONFIG_FOURXM_DECODER 0
++#define CONFIG_FRAPS_DECODER 0
++#define CONFIG_FRWU_DECODER 0
++#define CONFIG_G2M_DECODER 0
++#define CONFIG_GDV_DECODER 0
++#define CONFIG_GIF_DECODER 0
++#define CONFIG_H261_DECODER 0
++#define CONFIG_H263_DECODER 0
++#define CONFIG_H263I_DECODER 0
++#define CONFIG_H263P_DECODER 0
++#define CONFIG_H263_V4L2M2M_DECODER 0
++#define CONFIG_H264_DECODER 0
++#define CONFIG_H264_CRYSTALHD_DECODER 0
++#define CONFIG_H264_V4L2M2M_DECODER 0
++#define CONFIG_H264_MEDIACODEC_DECODER 0
++#define CONFIG_H264_MMAL_DECODER 0
++#define CONFIG_H264_QSV_DECODER 0
++#define CONFIG_H264_RKMPP_DECODER 0
++#define CONFIG_HAP_DECODER 0
++#define CONFIG_HEVC_DECODER 0
++#define CONFIG_HEVC_QSV_DECODER 0
++#define CONFIG_HEVC_RKMPP_DECODER 0
++#define CONFIG_HEVC_V4L2M2M_DECODER 0
++#define CONFIG_HNM4_VIDEO_DECODER 0
++#define CONFIG_HQ_HQA_DECODER 0
++#define CONFIG_HQX_DECODER 0
++#define CONFIG_HUFFYUV_DECODER 0
++#define CONFIG_HYMT_DECODER 0
++#define CONFIG_IDCIN_DECODER 0
++#define CONFIG_IFF_ILBM_DECODER 0
++#define CONFIG_IMM4_DECODER 0
++#define CONFIG_IMM5_DECODER 0
++#define CONFIG_INDEO2_DECODER 0
++#define CONFIG_INDEO3_DECODER 0
++#define CONFIG_INDEO4_DECODER 0
++#define CONFIG_INDEO5_DECODER 0
++#define CONFIG_INTERPLAY_VIDEO_DECODER 0
++#define CONFIG_JPEG2000_DECODER 0
++#define CONFIG_JPEGLS_DECODER 0
++#define CONFIG_JV_DECODER 0
++#define CONFIG_KGV1_DECODER 0
++#define CONFIG_KMVC_DECODER 0
++#define CONFIG_LAGARITH_DECODER 0
++#define CONFIG_LOCO_DECODER 0
++#define CONFIG_LSCR_DECODER 0
++#define CONFIG_M101_DECODER 0
++#define CONFIG_MAGICYUV_DECODER 0
++#define CONFIG_MDEC_DECODER 0
++#define CONFIG_MIMIC_DECODER 0
++#define CONFIG_MJPEG_DECODER 0
++#define CONFIG_MJPEGB_DECODER 0
++#define CONFIG_MMVIDEO_DECODER 0
++#define CONFIG_MOTIONPIXELS_DECODER 0
++#define CONFIG_MPEG1VIDEO_DECODER 0
++#define CONFIG_MPEG2VIDEO_DECODER 0
++#define CONFIG_MPEG4_DECODER 0
++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG4_V4L2M2M_DECODER 0
++#define CONFIG_MPEG4_MMAL_DECODER 0
++#define CONFIG_MPEGVIDEO_DECODER 0
++#define CONFIG_MPEG1_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_MMAL_DECODER 0
++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0
++#define CONFIG_MPEG2_V4L2M2M_DECODER 0
++#define CONFIG_MPEG2_QSV_DECODER 0
++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0
++#define CONFIG_MSA1_DECODER 0
++#define CONFIG_MSCC_DECODER 0
++#define CONFIG_MSMPEG4V1_DECODER 0
++#define CONFIG_MSMPEG4V2_DECODER 0
++#define CONFIG_MSMPEG4V3_DECODER 0
++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0
++#define CONFIG_MSRLE_DECODER 0
++#define CONFIG_MSS1_DECODER 0
++#define CONFIG_MSS2_DECODER 0
++#define CONFIG_MSVIDEO1_DECODER 0
++#define CONFIG_MSZH_DECODER 0
++#define CONFIG_MTS2_DECODER 0
++#define CONFIG_MV30_DECODER 0
++#define CONFIG_MVC1_DECODER 0
++#define CONFIG_MVC2_DECODER 0
++#define CONFIG_MVDV_DECODER 0
++#define CONFIG_MVHA_DECODER 0
++#define CONFIG_MWSC_DECODER 0
++#define CONFIG_MXPEG_DECODER 0
++#define CONFIG_NUV_DECODER 0
++#define CONFIG_PAF_VIDEO_DECODER 0
++#define CONFIG_PAM_DECODER 0
++#define CONFIG_PBM_DECODER 0
++#define CONFIG_PCX_DECODER 0
++#define CONFIG_PGM_DECODER 0
++#define CONFIG_PGMYUV_DECODER 0
++#define CONFIG_PICTOR_DECODER 0
++#define CONFIG_PIXLET_DECODER 0
++#define CONFIG_PNG_DECODER 0
++#define CONFIG_PPM_DECODER 0
++#define CONFIG_PRORES_DECODER 0
++#define CONFIG_PROSUMER_DECODER 0
++#define CONFIG_PSD_DECODER 0
++#define CONFIG_PTX_DECODER 0
++#define CONFIG_QDRAW_DECODER 0
++#define CONFIG_QPEG_DECODER 0
++#define CONFIG_QTRLE_DECODER 0
++#define CONFIG_R10K_DECODER 0
++#define CONFIG_R210_DECODER 0
++#define CONFIG_RASC_DECODER 0
++#define CONFIG_RAWVIDEO_DECODER 0
++#define CONFIG_RL2_DECODER 0
++#define CONFIG_ROQ_DECODER 0
++#define CONFIG_RPZA_DECODER 0
++#define CONFIG_RSCC_DECODER 0
++#define CONFIG_RV10_DECODER 0
++#define CONFIG_RV20_DECODER 0
++#define CONFIG_RV30_DECODER 0
++#define CONFIG_RV40_DECODER 0
++#define CONFIG_S302M_DECODER 0
++#define CONFIG_SANM_DECODER 0
++#define CONFIG_SCPR_DECODER 0
++#define CONFIG_SCREENPRESSO_DECODER 0
++#define CONFIG_SGI_DECODER 0
++#define CONFIG_SGIRLE_DECODER 0
++#define CONFIG_SHEERVIDEO_DECODER 0
++#define CONFIG_SMACKER_DECODER 0
++#define CONFIG_SMC_DECODER 0
++#define CONFIG_SMVJPEG_DECODER 0
++#define CONFIG_SNOW_DECODER 0
++#define CONFIG_SP5X_DECODER 0
++#define CONFIG_SPEEDHQ_DECODER 0
++#define CONFIG_SRGC_DECODER 0
++#define CONFIG_SUNRAST_DECODER 0
++#define CONFIG_SVQ1_DECODER 0
++#define CONFIG_SVQ3_DECODER 0
++#define CONFIG_TARGA_DECODER 0
++#define CONFIG_TARGA_Y216_DECODER 0
++#define CONFIG_TDSC_DECODER 0
++#define CONFIG_THEORA_DECODER 1
++#define CONFIG_THP_DECODER 0
++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0
++#define CONFIG_TIFF_DECODER 0
++#define CONFIG_TMV_DECODER 0
++#define CONFIG_TRUEMOTION1_DECODER 0
++#define CONFIG_TRUEMOTION2_DECODER 0
++#define CONFIG_TRUEMOTION2RT_DECODER 0
++#define CONFIG_TSCC_DECODER 0
++#define CONFIG_TSCC2_DECODER 0
++#define CONFIG_TXD_DECODER 0
++#define CONFIG_ULTI_DECODER 0
++#define CONFIG_UTVIDEO_DECODER 0
++#define CONFIG_V210_DECODER 0
++#define CONFIG_V210X_DECODER 0
++#define CONFIG_V308_DECODER 0
++#define CONFIG_V408_DECODER 0
++#define CONFIG_V410_DECODER 0
++#define CONFIG_VB_DECODER 0
++#define CONFIG_VBLE_DECODER 0
++#define CONFIG_VC1_DECODER 0
++#define CONFIG_VC1_CRYSTALHD_DECODER 0
++#define CONFIG_VC1IMAGE_DECODER 0
++#define CONFIG_VC1_MMAL_DECODER 0
++#define CONFIG_VC1_QSV_DECODER 0
++#define CONFIG_VC1_V4L2M2M_DECODER 0
++#define CONFIG_VCR1_DECODER 0
++#define CONFIG_VMDVIDEO_DECODER 0
++#define CONFIG_VMNC_DECODER 0
++#define CONFIG_VP3_DECODER 1
++#define CONFIG_VP4_DECODER 0
++#define CONFIG_VP5_DECODER 0
++#define CONFIG_VP6_DECODER 0
++#define CONFIG_VP6A_DECODER 0
++#define CONFIG_VP6F_DECODER 0
++#define CONFIG_VP7_DECODER 0
++#define CONFIG_VP8_DECODER 1
++#define CONFIG_VP8_RKMPP_DECODER 0
++#define CONFIG_VP8_V4L2M2M_DECODER 0
++#define CONFIG_VP9_DECODER 0
++#define CONFIG_VP9_RKMPP_DECODER 0
++#define CONFIG_VP9_V4L2M2M_DECODER 0
++#define CONFIG_VQA_DECODER 0
++#define CONFIG_WEBP_DECODER 0
++#define CONFIG_WCMV_DECODER 0
++#define CONFIG_WRAPPED_AVFRAME_DECODER 0
++#define CONFIG_WMV1_DECODER 0
++#define CONFIG_WMV2_DECODER 0
++#define CONFIG_WMV3_DECODER 0
++#define CONFIG_WMV3_CRYSTALHD_DECODER 0
++#define CONFIG_WMV3IMAGE_DECODER 0
++#define CONFIG_WNV1_DECODER 0
++#define CONFIG_XAN_WC3_DECODER 0
++#define CONFIG_XAN_WC4_DECODER 0
++#define CONFIG_XBM_DECODER 0
++#define CONFIG_XFACE_DECODER 0
++#define CONFIG_XL_DECODER 0
++#define CONFIG_XPM_DECODER 0
++#define CONFIG_XWD_DECODER 0
++#define CONFIG_Y41P_DECODER 0
++#define CONFIG_YLC_DECODER 0
++#define CONFIG_YOP_DECODER 0
++#define CONFIG_YUV4_DECODER 0
++#define CONFIG_ZERO12V_DECODER 0
++#define CONFIG_ZEROCODEC_DECODER 0
++#define CONFIG_ZLIB_DECODER 0
++#define CONFIG_ZMBV_DECODER 0
++#define CONFIG_AAC_DECODER 0
++#define CONFIG_AAC_FIXED_DECODER 0
++#define CONFIG_AAC_LATM_DECODER 0
++#define CONFIG_AC3_DECODER 0
++#define CONFIG_AC3_FIXED_DECODER 0
++#define CONFIG_ACELP_KELVIN_DECODER 0
++#define CONFIG_ALAC_DECODER 0
++#define CONFIG_ALS_DECODER 0
++#define CONFIG_AMRNB_DECODER 0
++#define CONFIG_AMRWB_DECODER 0
++#define CONFIG_APE_DECODER 0
++#define CONFIG_APTX_DECODER 0
++#define CONFIG_APTX_HD_DECODER 0
++#define CONFIG_ATRAC1_DECODER 0
++#define CONFIG_ATRAC3_DECODER 0
++#define CONFIG_ATRAC3AL_DECODER 0
++#define CONFIG_ATRAC3P_DECODER 0
++#define CONFIG_ATRAC3PAL_DECODER 0
++#define CONFIG_ATRAC9_DECODER 0
++#define CONFIG_BINKAUDIO_DCT_DECODER 0
++#define CONFIG_BINKAUDIO_RDFT_DECODER 0
++#define CONFIG_BMV_AUDIO_DECODER 0
++#define CONFIG_COOK_DECODER 0
++#define CONFIG_DCA_DECODER 0
++#define CONFIG_DOLBY_E_DECODER 0
++#define CONFIG_DSD_LSBF_DECODER 0
++#define CONFIG_DSD_MSBF_DECODER 0
++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0
++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0
++#define CONFIG_DSICINAUDIO_DECODER 0
++#define CONFIG_DSS_SP_DECODER 0
++#define CONFIG_DST_DECODER 0
++#define CONFIG_EAC3_DECODER 0
++#define CONFIG_EVRC_DECODER 0
++#define CONFIG_FFWAVESYNTH_DECODER 0
++#define CONFIG_FLAC_DECODER 1
++#define CONFIG_G723_1_DECODER 0
++#define CONFIG_G729_DECODER 0
++#define CONFIG_GSM_DECODER 0
++#define CONFIG_GSM_MS_DECODER 0
++#define CONFIG_HCA_DECODER 0
++#define CONFIG_HCOM_DECODER 0
++#define CONFIG_IAC_DECODER 0
++#define CONFIG_ILBC_DECODER 0
++#define CONFIG_IMC_DECODER 0
++#define CONFIG_INTERPLAY_ACM_DECODER 0
++#define CONFIG_MACE3_DECODER 0
++#define CONFIG_MACE6_DECODER 0
++#define CONFIG_METASOUND_DECODER 0
++#define CONFIG_MLP_DECODER 0
++#define CONFIG_MP1_DECODER 0
++#define CONFIG_MP1FLOAT_DECODER 0
++#define CONFIG_MP2_DECODER 0
++#define CONFIG_MP2FLOAT_DECODER 0
++#define CONFIG_MP3FLOAT_DECODER 0
++#define CONFIG_MP3_DECODER 1
++#define CONFIG_MP3ADUFLOAT_DECODER 0
++#define CONFIG_MP3ADU_DECODER 0
++#define CONFIG_MP3ON4FLOAT_DECODER 0
++#define CONFIG_MP3ON4_DECODER 0
++#define CONFIG_MPC7_DECODER 0
++#define CONFIG_MPC8_DECODER 0
++#define CONFIG_NELLYMOSER_DECODER 0
++#define CONFIG_ON2AVC_DECODER 0
++#define CONFIG_OPUS_DECODER 0
++#define CONFIG_PAF_AUDIO_DECODER 0
++#define CONFIG_QCELP_DECODER 0
++#define CONFIG_QDM2_DECODER 0
++#define CONFIG_QDMC_DECODER 0
++#define CONFIG_RA_144_DECODER 0
++#define CONFIG_RA_288_DECODER 0
++#define CONFIG_RALF_DECODER 0
++#define CONFIG_SBC_DECODER 0
++#define CONFIG_SHORTEN_DECODER 0
++#define CONFIG_SIPR_DECODER 0
++#define CONFIG_SIREN_DECODER 0
++#define CONFIG_SMACKAUD_DECODER 0
++#define CONFIG_SONIC_DECODER 0
++#define CONFIG_TAK_DECODER 0
++#define CONFIG_TRUEHD_DECODER 0
++#define CONFIG_TRUESPEECH_DECODER 0
++#define CONFIG_TTA_DECODER 0
++#define CONFIG_TWINVQ_DECODER 0
++#define CONFIG_VMDAUDIO_DECODER 0
++#define CONFIG_VORBIS_DECODER 1
++#define CONFIG_WAVPACK_DECODER 0
++#define CONFIG_WMALOSSLESS_DECODER 0
++#define CONFIG_WMAPRO_DECODER 0
++#define CONFIG_WMAV1_DECODER 0
++#define CONFIG_WMAV2_DECODER 0
++#define CONFIG_WMAVOICE_DECODER 0
++#define CONFIG_WS_SND1_DECODER 0
++#define CONFIG_XMA1_DECODER 0
++#define CONFIG_XMA2_DECODER 0
++#define CONFIG_PCM_ALAW_DECODER 1
++#define CONFIG_PCM_BLURAY_DECODER 0
++#define CONFIG_PCM_DVD_DECODER 0
++#define CONFIG_PCM_F16LE_DECODER 0
++#define CONFIG_PCM_F24LE_DECODER 0
++#define CONFIG_PCM_F32BE_DECODER 0
++#define CONFIG_PCM_F32LE_DECODER 1
++#define CONFIG_PCM_F64BE_DECODER 0
++#define CONFIG_PCM_F64LE_DECODER 0
++#define CONFIG_PCM_LXF_DECODER 0
++#define CONFIG_PCM_MULAW_DECODER 1
++#define CONFIG_PCM_S8_DECODER 0
++#define CONFIG_PCM_S8_PLANAR_DECODER 0
++#define CONFIG_PCM_S16BE_DECODER 1
++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0
++#define CONFIG_PCM_S16LE_DECODER 1
++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S24BE_DECODER 1
++#define CONFIG_PCM_S24DAUD_DECODER 0
++#define CONFIG_PCM_S24LE_DECODER 1
++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S32BE_DECODER 0
++#define CONFIG_PCM_S32LE_DECODER 1
++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0
++#define CONFIG_PCM_S64BE_DECODER 0
++#define CONFIG_PCM_S64LE_DECODER 0
++#define CONFIG_PCM_U8_DECODER 1
++#define CONFIG_PCM_U16BE_DECODER 0
++#define CONFIG_PCM_U16LE_DECODER 0
++#define CONFIG_PCM_U24BE_DECODER 0
++#define CONFIG_PCM_U24LE_DECODER 0
++#define CONFIG_PCM_U32BE_DECODER 0
++#define CONFIG_PCM_U32LE_DECODER 0
++#define CONFIG_PCM_VIDC_DECODER 0
++#define CONFIG_DERF_DPCM_DECODER 0
++#define CONFIG_GREMLIN_DPCM_DECODER 0
++#define CONFIG_INTERPLAY_DPCM_DECODER 0
++#define CONFIG_ROQ_DPCM_DECODER 0
++#define CONFIG_SDX2_DPCM_DECODER 0
++#define CONFIG_SOL_DPCM_DECODER 0
++#define CONFIG_XAN_DPCM_DECODER 0
++#define CONFIG_ADPCM_4XM_DECODER 0
++#define CONFIG_ADPCM_ADX_DECODER 0
++#define CONFIG_ADPCM_AFC_DECODER 0
++#define CONFIG_ADPCM_AGM_DECODER 0
++#define CONFIG_ADPCM_AICA_DECODER 0
++#define CONFIG_ADPCM_ARGO_DECODER 0
++#define CONFIG_ADPCM_CT_DECODER 0
++#define CONFIG_ADPCM_DTK_DECODER 0
++#define CONFIG_ADPCM_EA_DECODER 0
++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0
++#define CONFIG_ADPCM_EA_R1_DECODER 0
++#define CONFIG_ADPCM_EA_R2_DECODER 0
++#define CONFIG_ADPCM_EA_R3_DECODER 0
++#define CONFIG_ADPCM_EA_XAS_DECODER 0
++#define CONFIG_ADPCM_G722_DECODER 0
++#define CONFIG_ADPCM_G726_DECODER 0
++#define CONFIG_ADPCM_G726LE_DECODER 0
++#define CONFIG_ADPCM_IMA_AMV_DECODER 0
++#define CONFIG_ADPCM_IMA_ALP_DECODER 0
++#define CONFIG_ADPCM_IMA_APC_DECODER 0
++#define CONFIG_ADPCM_IMA_APM_DECODER 0
++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0
++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0
++#define CONFIG_ADPCM_IMA_DK3_DECODER 0
++#define CONFIG_ADPCM_IMA_DK4_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0
++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0
++#define CONFIG_ADPCM_IMA_ISS_DECODER 0
++#define CONFIG_ADPCM_IMA_MTF_DECODER 0
++#define CONFIG_ADPCM_IMA_OKI_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_DECODER 0
++#define CONFIG_ADPCM_IMA_RAD_DECODER 0
++#define CONFIG_ADPCM_IMA_SSI_DECODER 0
++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0
++#define CONFIG_ADPCM_IMA_WAV_DECODER 0
++#define CONFIG_ADPCM_IMA_WS_DECODER 0
++#define CONFIG_ADPCM_MS_DECODER 0
++#define CONFIG_ADPCM_MTAF_DECODER 0
++#define CONFIG_ADPCM_PSX_DECODER 0
++#define CONFIG_ADPCM_SBPRO_2_DECODER 0
++#define CONFIG_ADPCM_SBPRO_3_DECODER 0
++#define CONFIG_ADPCM_SBPRO_4_DECODER 0
++#define CONFIG_ADPCM_SWF_DECODER 0
++#define CONFIG_ADPCM_THP_DECODER 0
++#define CONFIG_ADPCM_THP_LE_DECODER 0
++#define CONFIG_ADPCM_VIMA_DECODER 0
++#define CONFIG_ADPCM_XA_DECODER 0
++#define CONFIG_ADPCM_YAMAHA_DECODER 0
++#define CONFIG_ADPCM_ZORK_DECODER 0
++#define CONFIG_SSA_DECODER 0
++#define CONFIG_ASS_DECODER 0
++#define CONFIG_CCAPTION_DECODER 0
++#define CONFIG_DVBSUB_DECODER 0
++#define CONFIG_DVDSUB_DECODER 0
++#define CONFIG_JACOSUB_DECODER 0
++#define CONFIG_MICRODVD_DECODER 0
++#define CONFIG_MOVTEXT_DECODER 0
++#define CONFIG_MPL2_DECODER 0
++#define CONFIG_PGSSUB_DECODER 0
++#define CONFIG_PJS_DECODER 0
++#define CONFIG_REALTEXT_DECODER 0
++#define CONFIG_SAMI_DECODER 0
++#define CONFIG_SRT_DECODER 0
++#define CONFIG_STL_DECODER 0
++#define CONFIG_SUBRIP_DECODER 0
++#define CONFIG_SUBVIEWER_DECODER 0
++#define CONFIG_SUBVIEWER1_DECODER 0
++#define CONFIG_TEXT_DECODER 0
++#define CONFIG_VPLAYER_DECODER 0
++#define CONFIG_WEBVTT_DECODER 0
++#define CONFIG_XSUB_DECODER 0
++#define CONFIG_AAC_AT_DECODER 0
++#define CONFIG_AC3_AT_DECODER 0
++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0
++#define CONFIG_ALAC_AT_DECODER 0
++#define CONFIG_AMR_NB_AT_DECODER 0
++#define CONFIG_EAC3_AT_DECODER 0
++#define CONFIG_GSM_MS_AT_DECODER 0
++#define CONFIG_ILBC_AT_DECODER 0
++#define CONFIG_MP1_AT_DECODER 0
++#define CONFIG_MP2_AT_DECODER 0
++#define CONFIG_MP3_AT_DECODER 0
++#define CONFIG_PCM_ALAW_AT_DECODER 0
++#define CONFIG_PCM_MULAW_AT_DECODER 0
++#define CONFIG_QDMC_AT_DECODER 0
++#define CONFIG_QDM2_AT_DECODER 0
++#define CONFIG_LIBARIBB24_DECODER 0
++#define CONFIG_LIBCELT_DECODER 0
++#define CONFIG_LIBCODEC2_DECODER 0
++#define CONFIG_LIBDAV1D_DECODER 0
++#define CONFIG_LIBDAVS2_DECODER 0
++#define CONFIG_LIBFDK_AAC_DECODER 0
++#define CONFIG_LIBGSM_DECODER 0
++#define CONFIG_LIBGSM_MS_DECODER 0
++#define CONFIG_LIBILBC_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0
++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0
++#define CONFIG_LIBOPENJPEG_DECODER 0
++#define CONFIG_LIBOPUS_DECODER 1
++#define CONFIG_LIBRSVG_DECODER 0
++#define CONFIG_LIBSPEEX_DECODER 0
++#define CONFIG_LIBVORBIS_DECODER 0
++#define CONFIG_LIBVPX_VP8_DECODER 0
++#define CONFIG_LIBVPX_VP9_DECODER 0
++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0
++#define CONFIG_BINTEXT_DECODER 0
++#define CONFIG_XBIN_DECODER 0
++#define CONFIG_IDF_DECODER 0
++#define CONFIG_LIBAOM_AV1_DECODER 0
++#define CONFIG_LIBOPENH264_DECODER 0
++#define CONFIG_H264_CUVID_DECODER 0
++#define CONFIG_HEVC_CUVID_DECODER 0
++#define CONFIG_HEVC_MEDIACODEC_DECODER 0
++#define CONFIG_MJPEG_CUVID_DECODER 0
++#define CONFIG_MJPEG_QSV_DECODER 0
++#define CONFIG_MPEG1_CUVID_DECODER 0
++#define CONFIG_MPEG2_CUVID_DECODER 0
++#define CONFIG_MPEG4_CUVID_DECODER 0
++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0
++#define CONFIG_VC1_CUVID_DECODER 0
++#define CONFIG_VP8_CUVID_DECODER 0
++#define CONFIG_VP8_MEDIACODEC_DECODER 0
++#define CONFIG_VP8_QSV_DECODER 0
++#define CONFIG_VP9_CUVID_DECODER 0
++#define CONFIG_VP9_MEDIACODEC_DECODER 0
++#define CONFIG_VP9_QSV_DECODER 0
++#define CONFIG_A64MULTI_ENCODER 0
++#define CONFIG_A64MULTI5_ENCODER 0
++#define CONFIG_ALIAS_PIX_ENCODER 0
++#define CONFIG_AMV_ENCODER 0
++#define CONFIG_APNG_ENCODER 0
++#define CONFIG_ASV1_ENCODER 0
++#define CONFIG_ASV2_ENCODER 0
++#define CONFIG_AVRP_ENCODER 0
++#define CONFIG_AVUI_ENCODER 0
++#define CONFIG_AYUV_ENCODER 0
++#define CONFIG_BMP_ENCODER 0
++#define CONFIG_CINEPAK_ENCODER 0
++#define CONFIG_CLJR_ENCODER 0
++#define CONFIG_COMFORTNOISE_ENCODER 0
++#define CONFIG_DNXHD_ENCODER 0
++#define CONFIG_DPX_ENCODER 0
++#define CONFIG_DVVIDEO_ENCODER 0
++#define CONFIG_FFV1_ENCODER 0
++#define CONFIG_FFVHUFF_ENCODER 0
++#define CONFIG_FITS_ENCODER 0
++#define CONFIG_FLASHSV_ENCODER 0
++#define CONFIG_FLASHSV2_ENCODER 0
++#define CONFIG_FLV_ENCODER 0
++#define CONFIG_GIF_ENCODER 0
++#define CONFIG_H261_ENCODER 0
++#define CONFIG_H263_ENCODER 0
++#define CONFIG_H263P_ENCODER 0
++#define CONFIG_HAP_ENCODER 0
++#define CONFIG_HUFFYUV_ENCODER 0
++#define CONFIG_JPEG2000_ENCODER 0
++#define CONFIG_JPEGLS_ENCODER 0
++#define CONFIG_LJPEG_ENCODER 0
++#define CONFIG_MAGICYUV_ENCODER 0
++#define CONFIG_MJPEG_ENCODER 0
++#define CONFIG_MPEG1VIDEO_ENCODER 0
++#define CONFIG_MPEG2VIDEO_ENCODER 0
++#define CONFIG_MPEG4_ENCODER 0
++#define CONFIG_MSMPEG4V2_ENCODER 0
++#define CONFIG_MSMPEG4V3_ENCODER 0
++#define CONFIG_MSVIDEO1_ENCODER 0
++#define CONFIG_PAM_ENCODER 0
++#define CONFIG_PBM_ENCODER 0
++#define CONFIG_PCX_ENCODER 0
++#define CONFIG_PGM_ENCODER 0
++#define CONFIG_PGMYUV_ENCODER 0
++#define CONFIG_PNG_ENCODER 0
++#define CONFIG_PPM_ENCODER 0
++#define CONFIG_PRORES_ENCODER 0
++#define CONFIG_PRORES_AW_ENCODER 0
++#define CONFIG_PRORES_KS_ENCODER 0
++#define CONFIG_QTRLE_ENCODER 0
++#define CONFIG_R10K_ENCODER 0
++#define CONFIG_R210_ENCODER 0
++#define CONFIG_RAWVIDEO_ENCODER 0
++#define CONFIG_ROQ_ENCODER 0
++#define CONFIG_RV10_ENCODER 0
++#define CONFIG_RV20_ENCODER 0
++#define CONFIG_S302M_ENCODER 0
++#define CONFIG_SGI_ENCODER 0
++#define CONFIG_SNOW_ENCODER 0
++#define CONFIG_SUNRAST_ENCODER 0
++#define CONFIG_SVQ1_ENCODER 0
++#define CONFIG_TARGA_ENCODER 0
++#define CONFIG_TIFF_ENCODER 0
++#define CONFIG_UTVIDEO_ENCODER 0
++#define CONFIG_V210_ENCODER 0
++#define CONFIG_V308_ENCODER 0
++#define CONFIG_V408_ENCODER 0
++#define CONFIG_V410_ENCODER 0
++#define CONFIG_VC2_ENCODER 0
++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0
++#define CONFIG_WMV1_ENCODER 0
++#define CONFIG_WMV2_ENCODER 0
++#define CONFIG_XBM_ENCODER 0
++#define CONFIG_XFACE_ENCODER 0
++#define CONFIG_XWD_ENCODER 0
++#define CONFIG_Y41P_ENCODER 0
++#define CONFIG_YUV4_ENCODER 0
++#define CONFIG_ZLIB_ENCODER 0
++#define CONFIG_ZMBV_ENCODER 0
++#define CONFIG_AAC_ENCODER 0
++#define CONFIG_AC3_ENCODER 0
++#define CONFIG_AC3_FIXED_ENCODER 0
++#define CONFIG_ALAC_ENCODER 0
++#define CONFIG_APTX_ENCODER 0
++#define CONFIG_APTX_HD_ENCODER 0
++#define CONFIG_DCA_ENCODER 0
++#define CONFIG_EAC3_ENCODER 0
++#define CONFIG_FLAC_ENCODER 0
++#define CONFIG_G723_1_ENCODER 0
++#define CONFIG_MLP_ENCODER 0
++#define CONFIG_MP2_ENCODER 0
++#define CONFIG_MP2FIXED_ENCODER 0
++#define CONFIG_NELLYMOSER_ENCODER 0
++#define CONFIG_OPUS_ENCODER 0
++#define CONFIG_RA_144_ENCODER 0
++#define CONFIG_SBC_ENCODER 0
++#define CONFIG_SONIC_ENCODER 0
++#define CONFIG_SONIC_LS_ENCODER 0
++#define CONFIG_TRUEHD_ENCODER 0
++#define CONFIG_TTA_ENCODER 0
++#define CONFIG_VORBIS_ENCODER 0
++#define CONFIG_WAVPACK_ENCODER 0
++#define CONFIG_WMAV1_ENCODER 0
++#define CONFIG_WMAV2_ENCODER 0
++#define CONFIG_PCM_ALAW_ENCODER 0
++#define CONFIG_PCM_DVD_ENCODER 0
++#define CONFIG_PCM_F32BE_ENCODER 0
++#define CONFIG_PCM_F32LE_ENCODER 0
++#define CONFIG_PCM_F64BE_ENCODER 0
++#define CONFIG_PCM_F64LE_ENCODER 0
++#define CONFIG_PCM_MULAW_ENCODER 0
++#define CONFIG_PCM_S8_ENCODER 0
++#define CONFIG_PCM_S8_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16BE_ENCODER 0
++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S16LE_ENCODER 0
++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S24BE_ENCODER 0
++#define CONFIG_PCM_S24DAUD_ENCODER 0
++#define CONFIG_PCM_S24LE_ENCODER 0
++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S32BE_ENCODER 0
++#define CONFIG_PCM_S32LE_ENCODER 0
++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0
++#define CONFIG_PCM_S64BE_ENCODER 0
++#define CONFIG_PCM_S64LE_ENCODER 0
++#define CONFIG_PCM_U8_ENCODER 0
++#define CONFIG_PCM_U16BE_ENCODER 0
++#define CONFIG_PCM_U16LE_ENCODER 0
++#define CONFIG_PCM_U24BE_ENCODER 0
++#define CONFIG_PCM_U24LE_ENCODER 0
++#define CONFIG_PCM_U32BE_ENCODER 0
++#define CONFIG_PCM_U32LE_ENCODER 0
++#define CONFIG_PCM_VIDC_ENCODER 0
++#define CONFIG_ROQ_DPCM_ENCODER 0
++#define CONFIG_ADPCM_ADX_ENCODER 0
++#define CONFIG_ADPCM_G722_ENCODER 0
++#define CONFIG_ADPCM_G726_ENCODER 0
++#define CONFIG_ADPCM_G726LE_ENCODER 0
++#define CONFIG_ADPCM_IMA_QT_ENCODER 0
++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0
++#define CONFIG_ADPCM_MS_ENCODER 0
++#define CONFIG_ADPCM_SWF_ENCODER 0
++#define CONFIG_ADPCM_YAMAHA_ENCODER 0
++#define CONFIG_SSA_ENCODER 0
++#define CONFIG_ASS_ENCODER 0
++#define CONFIG_DVBSUB_ENCODER 0
++#define CONFIG_DVDSUB_ENCODER 0
++#define CONFIG_MOVTEXT_ENCODER 0
++#define CONFIG_SRT_ENCODER 0
++#define CONFIG_SUBRIP_ENCODER 0
++#define CONFIG_TEXT_ENCODER 0
++#define CONFIG_WEBVTT_ENCODER 0
++#define CONFIG_XSUB_ENCODER 0
++#define CONFIG_AAC_AT_ENCODER 0
++#define CONFIG_ALAC_AT_ENCODER 0
++#define CONFIG_ILBC_AT_ENCODER 0
++#define CONFIG_PCM_ALAW_AT_ENCODER 0
++#define CONFIG_PCM_MULAW_AT_ENCODER 0
++#define CONFIG_LIBAOM_AV1_ENCODER 0
++#define CONFIG_LIBCODEC2_ENCODER 0
++#define CONFIG_LIBFDK_AAC_ENCODER 0
++#define CONFIG_LIBGSM_ENCODER 0
++#define CONFIG_LIBGSM_MS_ENCODER 0
++#define CONFIG_LIBILBC_ENCODER 0
++#define CONFIG_LIBMP3LAME_ENCODER 0
++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0
++#define CONFIG_LIBOPENJPEG_ENCODER 0
++#define CONFIG_LIBOPUS_ENCODER 0
++#define CONFIG_LIBRAV1E_ENCODER 0
++#define CONFIG_LIBSHINE_ENCODER 0
++#define CONFIG_LIBSPEEX_ENCODER 0
++#define CONFIG_LIBTHEORA_ENCODER 0
++#define CONFIG_LIBTWOLAME_ENCODER 0
++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0
++#define CONFIG_LIBVORBIS_ENCODER 0
++#define CONFIG_LIBVPX_VP8_ENCODER 0
++#define CONFIG_LIBVPX_VP9_ENCODER 0
++#define CONFIG_LIBWAVPACK_ENCODER 0
++#define CONFIG_LIBWEBP_ANIM_ENCODER 0
++#define CONFIG_LIBWEBP_ENCODER 0
++#define CONFIG_LIBX262_ENCODER 0
++#define CONFIG_LIBX264_ENCODER 0
++#define CONFIG_LIBX264RGB_ENCODER 0
++#define CONFIG_LIBX265_ENCODER 0
++#define CONFIG_LIBXAVS_ENCODER 0
++#define CONFIG_LIBXAVS2_ENCODER 0
++#define CONFIG_LIBXVID_ENCODER 0
++#define CONFIG_H263_V4L2M2M_ENCODER 0
++#define CONFIG_LIBOPENH264_ENCODER 0
++#define CONFIG_H264_AMF_ENCODER 0
++#define CONFIG_H264_NVENC_ENCODER 0
++#define CONFIG_H264_OMX_ENCODER 0
++#define CONFIG_H264_QSV_ENCODER 0
++#define CONFIG_H264_V4L2M2M_ENCODER 0
++#define CONFIG_H264_VAAPI_ENCODER 0
++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_NVENC_ENCODER 0
++#define CONFIG_NVENC_H264_ENCODER 0
++#define CONFIG_NVENC_HEVC_ENCODER 0
++#define CONFIG_HEVC_AMF_ENCODER 0
++#define CONFIG_HEVC_NVENC_ENCODER 0
++#define CONFIG_HEVC_QSV_ENCODER 0
++#define CONFIG_HEVC_V4L2M2M_ENCODER 0
++#define CONFIG_HEVC_VAAPI_ENCODER 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0
++#define CONFIG_LIBKVAZAAR_ENCODER 0
++#define CONFIG_MJPEG_QSV_ENCODER 0
++#define CONFIG_MJPEG_VAAPI_ENCODER 0
++#define CONFIG_MPEG2_QSV_ENCODER 0
++#define CONFIG_MPEG2_VAAPI_ENCODER 0
++#define CONFIG_MPEG4_OMX_ENCODER 0
++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_V4L2M2M_ENCODER 0
++#define CONFIG_VP8_VAAPI_ENCODER 0
++#define CONFIG_VP9_VAAPI_ENCODER 0
++#define CONFIG_VP9_QSV_ENCODER 0
++#define CONFIG_H263_VAAPI_HWACCEL 0
++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_H264_D3D11VA_HWACCEL 0
++#define CONFIG_H264_D3D11VA2_HWACCEL 0
++#define CONFIG_H264_DXVA2_HWACCEL 0
++#define CONFIG_H264_NVDEC_HWACCEL 0
++#define CONFIG_H264_VAAPI_HWACCEL 0
++#define CONFIG_H264_VDPAU_HWACCEL 0
++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA_HWACCEL 0
++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0
++#define CONFIG_HEVC_DXVA2_HWACCEL 0
++#define CONFIG_HEVC_NVDEC_HWACCEL 0
++#define CONFIG_HEVC_VAAPI_HWACCEL 0
++#define CONFIG_HEVC_VDPAU_HWACCEL 0
++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MJPEG_NVDEC_HWACCEL 0
++#define CONFIG_MJPEG_VAAPI_HWACCEL 0
++#define CONFIG_MPEG1_NVDEC_HWACCEL 0
++#define CONFIG_MPEG1_VDPAU_HWACCEL 0
++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG1_XVMC_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0
++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0
++#define CONFIG_MPEG2_NVDEC_HWACCEL 0
++#define CONFIG_MPEG2_DXVA2_HWACCEL 0
++#define CONFIG_MPEG2_VAAPI_HWACCEL 0
++#define CONFIG_MPEG2_VDPAU_HWACCEL 0
++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_MPEG2_XVMC_HWACCEL 0
++#define CONFIG_MPEG4_NVDEC_HWACCEL 0
++#define CONFIG_MPEG4_VAAPI_HWACCEL 0
++#define CONFIG_MPEG4_VDPAU_HWACCEL 0
++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0
++#define CONFIG_VC1_D3D11VA_HWACCEL 0
++#define CONFIG_VC1_D3D11VA2_HWACCEL 0
++#define CONFIG_VC1_DXVA2_HWACCEL 0
++#define CONFIG_VC1_NVDEC_HWACCEL 0
++#define CONFIG_VC1_VAAPI_HWACCEL 0
++#define CONFIG_VC1_VDPAU_HWACCEL 0
++#define CONFIG_VP8_NVDEC_HWACCEL 0
++#define CONFIG_VP8_VAAPI_HWACCEL 0
++#define CONFIG_VP9_D3D11VA_HWACCEL 0
++#define CONFIG_VP9_D3D11VA2_HWACCEL 0
++#define CONFIG_VP9_DXVA2_HWACCEL 0
++#define CONFIG_VP9_NVDEC_HWACCEL 0
++#define CONFIG_VP9_VAAPI_HWACCEL 0
++#define CONFIG_VP9_VDPAU_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA_HWACCEL 0
++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0
++#define CONFIG_WMV3_DXVA2_HWACCEL 0
++#define CONFIG_WMV3_NVDEC_HWACCEL 0
++#define CONFIG_WMV3_VAAPI_HWACCEL 0
++#define CONFIG_WMV3_VDPAU_HWACCEL 0
++#define CONFIG_AAC_PARSER 0
++#define CONFIG_AAC_LATM_PARSER 0
++#define CONFIG_AC3_PARSER 0
++#define CONFIG_ADX_PARSER 0
++#define CONFIG_AV1_PARSER 0
++#define CONFIG_AVS2_PARSER 0
++#define CONFIG_BMP_PARSER 0
++#define CONFIG_CAVSVIDEO_PARSER 0
++#define CONFIG_COOK_PARSER 0
++#define CONFIG_DCA_PARSER 0
++#define CONFIG_DIRAC_PARSER 0
++#define CONFIG_DNXHD_PARSER 0
++#define CONFIG_DPX_PARSER 0
++#define CONFIG_DVAUDIO_PARSER 0
++#define CONFIG_DVBSUB_PARSER 0
++#define CONFIG_DVDSUB_PARSER 0
++#define CONFIG_DVD_NAV_PARSER 0
++#define CONFIG_FLAC_PARSER 1
++#define CONFIG_G723_1_PARSER 0
++#define CONFIG_G729_PARSER 0
++#define CONFIG_GIF_PARSER 0
++#define CONFIG_GSM_PARSER 0
++#define CONFIG_H261_PARSER 0
++#define CONFIG_H263_PARSER 0
++#define CONFIG_H264_PARSER 0
++#define CONFIG_HEVC_PARSER 0
++#define CONFIG_MJPEG_PARSER 0
++#define CONFIG_MLP_PARSER 0
++#define CONFIG_MPEG4VIDEO_PARSER 0
++#define CONFIG_MPEGAUDIO_PARSER 1
++#define CONFIG_MPEGVIDEO_PARSER 0
++#define CONFIG_OPUS_PARSER 1
++#define CONFIG_PNG_PARSER 0
++#define CONFIG_PNM_PARSER 0
++#define CONFIG_RV30_PARSER 0
++#define CONFIG_RV40_PARSER 0
++#define CONFIG_SBC_PARSER 0
++#define CONFIG_SIPR_PARSER 0
++#define CONFIG_TAK_PARSER 0
++#define CONFIG_VC1_PARSER 0
++#define CONFIG_VORBIS_PARSER 1
++#define CONFIG_VP3_PARSER 1
++#define CONFIG_VP8_PARSER 1
++#define CONFIG_VP9_PARSER 1
++#define CONFIG_WEBP_PARSER 0
++#define CONFIG_XMA_PARSER 0
++#define CONFIG_ALSA_INDEV 0
++#define CONFIG_ANDROID_CAMERA_INDEV 0
++#define CONFIG_AVFOUNDATION_INDEV 0
++#define CONFIG_BKTR_INDEV 0
++#define CONFIG_DECKLINK_INDEV 0
++#define CONFIG_DSHOW_INDEV 0
++#define CONFIG_FBDEV_INDEV 0
++#define CONFIG_GDIGRAB_INDEV 0
++#define CONFIG_IEC61883_INDEV 0
++#define CONFIG_JACK_INDEV 0
++#define CONFIG_KMSGRAB_INDEV 0
++#define CONFIG_LAVFI_INDEV 0
++#define CONFIG_OPENAL_INDEV 0
++#define CONFIG_OSS_INDEV 0
++#define CONFIG_PULSE_INDEV 0
++#define CONFIG_SNDIO_INDEV 0
++#define CONFIG_V4L2_INDEV 0
++#define CONFIG_VFWCAP_INDEV 0
++#define CONFIG_XCBGRAB_INDEV 0
++#define CONFIG_LIBCDIO_INDEV 0
++#define CONFIG_LIBDC1394_INDEV 0
++#define CONFIG_ALSA_OUTDEV 0
++#define CONFIG_CACA_OUTDEV 0
++#define CONFIG_DECKLINK_OUTDEV 0
++#define CONFIG_FBDEV_OUTDEV 0
++#define CONFIG_OPENGL_OUTDEV 0
++#define CONFIG_OSS_OUTDEV 0
++#define CONFIG_PULSE_OUTDEV 0
++#define CONFIG_SDL2_OUTDEV 0
++#define CONFIG_SNDIO_OUTDEV 0
++#define CONFIG_V4L2_OUTDEV 0
++#define CONFIG_XV_OUTDEV 0
++#define CONFIG_ABENCH_FILTER 0
++#define CONFIG_ACOMPRESSOR_FILTER 0
++#define CONFIG_ACONTRAST_FILTER 0
++#define CONFIG_ACOPY_FILTER 0
++#define CONFIG_ACUE_FILTER 0
++#define CONFIG_ACROSSFADE_FILTER 0
++#define CONFIG_ACROSSOVER_FILTER 0
++#define CONFIG_ACRUSHER_FILTER 0
++#define CONFIG_ADECLICK_FILTER 0
++#define CONFIG_ADECLIP_FILTER 0
++#define CONFIG_ADELAY_FILTER 0
++#define CONFIG_ADERIVATIVE_FILTER 0
++#define CONFIG_AECHO_FILTER 0
++#define CONFIG_AEMPHASIS_FILTER 0
++#define CONFIG_AEVAL_FILTER 0
++#define CONFIG_AFADE_FILTER 0
++#define CONFIG_AFFTDN_FILTER 0
++#define CONFIG_AFFTFILT_FILTER 0
++#define CONFIG_AFIR_FILTER 0
++#define CONFIG_AFORMAT_FILTER 0
++#define CONFIG_AGATE_FILTER 0
++#define CONFIG_AIIR_FILTER 0
++#define CONFIG_AINTEGRAL_FILTER 0
++#define CONFIG_AINTERLEAVE_FILTER 0
++#define CONFIG_ALIMITER_FILTER 0
++#define CONFIG_ALLPASS_FILTER 0
++#define CONFIG_ALOOP_FILTER 0
++#define CONFIG_AMERGE_FILTER 0
++#define CONFIG_AMETADATA_FILTER 0
++#define CONFIG_AMIX_FILTER 0
++#define CONFIG_AMULTIPLY_FILTER 0
++#define CONFIG_ANEQUALIZER_FILTER 0
++#define CONFIG_ANLMDN_FILTER 0
++#define CONFIG_ANLMS_FILTER 0
++#define CONFIG_ANULL_FILTER 0
++#define CONFIG_APAD_FILTER 0
++#define CONFIG_APERMS_FILTER 0
++#define CONFIG_APHASER_FILTER 0
++#define CONFIG_APULSATOR_FILTER 0
++#define CONFIG_AREALTIME_FILTER 0
++#define CONFIG_ARESAMPLE_FILTER 0
++#define CONFIG_AREVERSE_FILTER 0
++#define CONFIG_ARNNDN_FILTER 0
++#define CONFIG_ASELECT_FILTER 0
++#define CONFIG_ASENDCMD_FILTER 0
++#define CONFIG_ASETNSAMPLES_FILTER 0
++#define CONFIG_ASETPTS_FILTER 0
++#define CONFIG_ASETRATE_FILTER 0
++#define CONFIG_ASETTB_FILTER 0
++#define CONFIG_ASHOWINFO_FILTER 0
++#define CONFIG_ASIDEDATA_FILTER 0
++#define CONFIG_ASOFTCLIP_FILTER 0
++#define CONFIG_ASPLIT_FILTER 0
++#define CONFIG_ASR_FILTER 0
++#define CONFIG_ASTATS_FILTER 0
++#define CONFIG_ASTREAMSELECT_FILTER 0
++#define CONFIG_ATEMPO_FILTER 0
++#define CONFIG_ATRIM_FILTER 0
++#define CONFIG_AXCORRELATE_FILTER 0
++#define CONFIG_AZMQ_FILTER 0
++#define CONFIG_BANDPASS_FILTER 0
++#define CONFIG_BANDREJECT_FILTER 0
++#define CONFIG_BASS_FILTER 0
++#define CONFIG_BIQUAD_FILTER 0
++#define CONFIG_BS2B_FILTER 0
++#define CONFIG_CHROMABER_VULKAN_FILTER 0
++#define CONFIG_CHANNELMAP_FILTER 0
++#define CONFIG_CHANNELSPLIT_FILTER 0
++#define CONFIG_CHORUS_FILTER 0
++#define CONFIG_COMPAND_FILTER 0
++#define CONFIG_COMPENSATIONDELAY_FILTER 0
++#define CONFIG_CROSSFEED_FILTER 0
++#define CONFIG_CRYSTALIZER_FILTER 0
++#define CONFIG_DCSHIFT_FILTER 0
++#define CONFIG_DEESSER_FILTER 0
++#define CONFIG_DRMETER_FILTER 0
++#define CONFIG_DYNAUDNORM_FILTER 0
++#define CONFIG_EARWAX_FILTER 0
++#define CONFIG_EBUR128_FILTER 0
++#define CONFIG_EQUALIZER_FILTER 0
++#define CONFIG_EXTRASTEREO_FILTER 0
++#define CONFIG_FIREQUALIZER_FILTER 0
++#define CONFIG_FLANGER_FILTER 0
++#define CONFIG_HAAS_FILTER 0
++#define CONFIG_HDCD_FILTER 0
++#define CONFIG_HEADPHONE_FILTER 0
++#define CONFIG_HIGHPASS_FILTER 0
++#define CONFIG_HIGHSHELF_FILTER 0
++#define CONFIG_JOIN_FILTER 0
++#define CONFIG_LADSPA_FILTER 0
++#define CONFIG_LOUDNORM_FILTER 0
++#define CONFIG_LOWPASS_FILTER 0
++#define CONFIG_LOWSHELF_FILTER 0
++#define CONFIG_LV2_FILTER 0
++#define CONFIG_MCOMPAND_FILTER 0
++#define CONFIG_PAN_FILTER 0
++#define CONFIG_REPLAYGAIN_FILTER 0
++#define CONFIG_RESAMPLE_FILTER 0
++#define CONFIG_RUBBERBAND_FILTER 0
++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0
++#define CONFIG_SIDECHAINGATE_FILTER 0
++#define CONFIG_SILENCEDETECT_FILTER 0
++#define CONFIG_SILENCEREMOVE_FILTER 0
++#define CONFIG_SOFALIZER_FILTER 0
++#define CONFIG_STEREOTOOLS_FILTER 0
++#define CONFIG_STEREOWIDEN_FILTER 0
++#define CONFIG_SUPEREQUALIZER_FILTER 0
++#define CONFIG_SURROUND_FILTER 0
++#define CONFIG_TREBLE_FILTER 0
++#define CONFIG_TREMOLO_FILTER 0
++#define CONFIG_VIBRATO_FILTER 0
++#define CONFIG_VOLUME_FILTER 0
++#define CONFIG_VOLUMEDETECT_FILTER 0
++#define CONFIG_AEVALSRC_FILTER 0
++#define CONFIG_AFIRSRC_FILTER 0
++#define CONFIG_ANOISESRC_FILTER 0
++#define CONFIG_ANULLSRC_FILTER 0
++#define CONFIG_FLITE_FILTER 0
++#define CONFIG_HILBERT_FILTER 0
++#define CONFIG_SINC_FILTER 0
++#define CONFIG_SINE_FILTER 0
++#define CONFIG_ANULLSINK_FILTER 0
++#define CONFIG_ADDROI_FILTER 0
++#define CONFIG_ALPHAEXTRACT_FILTER 0
++#define CONFIG_ALPHAMERGE_FILTER 0
++#define CONFIG_AMPLIFY_FILTER 0
++#define CONFIG_ASS_FILTER 0
++#define CONFIG_ATADENOISE_FILTER 0
++#define CONFIG_AVGBLUR_FILTER 0
++#define CONFIG_AVGBLUR_OPENCL_FILTER 0
++#define CONFIG_AVGBLUR_VULKAN_FILTER 0
++#define CONFIG_BBOX_FILTER 0
++#define CONFIG_BENCH_FILTER 0
++#define CONFIG_BILATERAL_FILTER 0
++#define CONFIG_BITPLANENOISE_FILTER 0
++#define CONFIG_BLACKDETECT_FILTER 0
++#define CONFIG_BLACKFRAME_FILTER 0
++#define CONFIG_BLEND_FILTER 0
++#define CONFIG_BM3D_FILTER 0
++#define CONFIG_BOXBLUR_FILTER 0
++#define CONFIG_BOXBLUR_OPENCL_FILTER 0
++#define CONFIG_BWDIF_FILTER 0
++#define CONFIG_CAS_FILTER 0
++#define CONFIG_CHROMAHOLD_FILTER 0
++#define CONFIG_CHROMAKEY_FILTER 0
++#define CONFIG_CHROMASHIFT_FILTER 0
++#define CONFIG_CIESCOPE_FILTER 0
++#define CONFIG_CODECVIEW_FILTER 0
++#define CONFIG_COLORBALANCE_FILTER 0
++#define CONFIG_COLORCHANNELMIXER_FILTER 0
++#define CONFIG_COLORKEY_FILTER 0
++#define CONFIG_COLORKEY_OPENCL_FILTER 0
++#define CONFIG_COLORHOLD_FILTER 0
++#define CONFIG_COLORLEVELS_FILTER 0
++#define CONFIG_COLORMATRIX_FILTER 0
++#define CONFIG_COLORSPACE_FILTER 0
++#define CONFIG_CONVOLUTION_FILTER 0
++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0
++#define CONFIG_CONVOLVE_FILTER 0
++#define CONFIG_COPY_FILTER 0
++#define CONFIG_COREIMAGE_FILTER 0
++#define CONFIG_COVER_RECT_FILTER 0
++#define CONFIG_CROP_FILTER 0
++#define CONFIG_CROPDETECT_FILTER 0
++#define CONFIG_CUE_FILTER 0
++#define CONFIG_CURVES_FILTER 0
++#define CONFIG_DATASCOPE_FILTER 0
++#define CONFIG_DCTDNOIZ_FILTER 0
++#define CONFIG_DEBAND_FILTER 0
++#define CONFIG_DEBLOCK_FILTER 0
++#define CONFIG_DECIMATE_FILTER 0
++#define CONFIG_DECONVOLVE_FILTER 0
++#define CONFIG_DEDOT_FILTER 0
++#define CONFIG_DEFLATE_FILTER 0
++#define CONFIG_DEFLICKER_FILTER 0
++#define CONFIG_DEINTERLACE_QSV_FILTER 0
++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0
++#define CONFIG_DEJUDDER_FILTER 0
++#define CONFIG_DELOGO_FILTER 0
++#define CONFIG_DENOISE_VAAPI_FILTER 0
++#define CONFIG_DERAIN_FILTER 0
++#define CONFIG_DESHAKE_FILTER 0
++#define CONFIG_DESHAKE_OPENCL_FILTER 0
++#define CONFIG_DESPILL_FILTER 0
++#define CONFIG_DETELECINE_FILTER 0
++#define CONFIG_DILATION_FILTER 0
++#define CONFIG_DILATION_OPENCL_FILTER 0
++#define CONFIG_DISPLACE_FILTER 0
++#define CONFIG_DNN_PROCESSING_FILTER 0
++#define CONFIG_DOUBLEWEAVE_FILTER 0
++#define CONFIG_DRAWBOX_FILTER 0
++#define CONFIG_DRAWGRAPH_FILTER 0
++#define CONFIG_DRAWGRID_FILTER 0
++#define CONFIG_DRAWTEXT_FILTER 0
++#define CONFIG_EDGEDETECT_FILTER 0
++#define CONFIG_ELBG_FILTER 0
++#define CONFIG_ENTROPY_FILTER 0
++#define CONFIG_EQ_FILTER 0
++#define CONFIG_EROSION_FILTER 0
++#define CONFIG_EROSION_OPENCL_FILTER 0
++#define CONFIG_EXTRACTPLANES_FILTER 0
++#define CONFIG_FADE_FILTER 0
++#define CONFIG_FFTDNOIZ_FILTER 0
++#define CONFIG_FFTFILT_FILTER 0
++#define CONFIG_FIELD_FILTER 0
++#define CONFIG_FIELDHINT_FILTER 0
++#define CONFIG_FIELDMATCH_FILTER 0
++#define CONFIG_FIELDORDER_FILTER 0
++#define CONFIG_FILLBORDERS_FILTER 0
++#define CONFIG_FIND_RECT_FILTER 0
++#define CONFIG_FLOODFILL_FILTER 0
++#define CONFIG_FORMAT_FILTER 0
++#define CONFIG_FPS_FILTER 0
++#define CONFIG_FRAMEPACK_FILTER 0
++#define CONFIG_FRAMERATE_FILTER 0
++#define CONFIG_FRAMESTEP_FILTER 0
++#define CONFIG_FREEZEDETECT_FILTER 0
++#define CONFIG_FREEZEFRAMES_FILTER 0
++#define CONFIG_FREI0R_FILTER 0
++#define CONFIG_FSPP_FILTER 0
++#define CONFIG_GBLUR_FILTER 0
++#define CONFIG_GEQ_FILTER 0
++#define CONFIG_GRADFUN_FILTER 0
++#define CONFIG_GRAPHMONITOR_FILTER 0
++#define CONFIG_GREYEDGE_FILTER 0
++#define CONFIG_HALDCLUT_FILTER 0
++#define CONFIG_HFLIP_FILTER 0
++#define CONFIG_HISTEQ_FILTER 0
++#define CONFIG_HISTOGRAM_FILTER 0
++#define CONFIG_HQDN3D_FILTER 0
++#define CONFIG_HQX_FILTER 0
++#define CONFIG_HSTACK_FILTER 0
++#define CONFIG_HUE_FILTER 0
++#define CONFIG_HWDOWNLOAD_FILTER 0
++#define CONFIG_HWMAP_FILTER 0
++#define CONFIG_HWUPLOAD_FILTER 0
++#define CONFIG_HWUPLOAD_CUDA_FILTER 0
++#define CONFIG_HYSTERESIS_FILTER 0
++#define CONFIG_IDET_FILTER 0
++#define CONFIG_IL_FILTER 0
++#define CONFIG_INFLATE_FILTER 0
++#define CONFIG_INTERLACE_FILTER 0
++#define CONFIG_INTERLEAVE_FILTER 0
++#define CONFIG_KERNDEINT_FILTER 0
++#define CONFIG_LAGFUN_FILTER 0
++#define CONFIG_LENSCORRECTION_FILTER 0
++#define CONFIG_LENSFUN_FILTER 0
++#define CONFIG_LIBVMAF_FILTER 0
++#define CONFIG_LIMITER_FILTER 0
++#define CONFIG_LOOP_FILTER 0
++#define CONFIG_LUMAKEY_FILTER 0
++#define CONFIG_LUT_FILTER 0
++#define CONFIG_LUT1D_FILTER 0
++#define CONFIG_LUT2_FILTER 0
++#define CONFIG_LUT3D_FILTER 0
++#define CONFIG_LUTRGB_FILTER 0
++#define CONFIG_LUTYUV_FILTER 0
++#define CONFIG_MASKEDCLAMP_FILTER 0
++#define CONFIG_MASKEDMAX_FILTER 0
++#define CONFIG_MASKEDMERGE_FILTER 0
++#define CONFIG_MASKEDMIN_FILTER 0
++#define CONFIG_MASKEDTHRESHOLD_FILTER 0
++#define CONFIG_MASKFUN_FILTER 0
++#define CONFIG_MCDEINT_FILTER 0
++#define CONFIG_MEDIAN_FILTER 0
++#define CONFIG_MERGEPLANES_FILTER 0
++#define CONFIG_MESTIMATE_FILTER 0
++#define CONFIG_METADATA_FILTER 0
++#define CONFIG_MIDEQUALIZER_FILTER 0
++#define CONFIG_MINTERPOLATE_FILTER 0
++#define CONFIG_MIX_FILTER 0
++#define CONFIG_MPDECIMATE_FILTER 0
++#define CONFIG_NEGATE_FILTER 0
++#define CONFIG_NLMEANS_FILTER 0
++#define CONFIG_NLMEANS_OPENCL_FILTER 0
++#define CONFIG_NNEDI_FILTER 0
++#define CONFIG_NOFORMAT_FILTER 0
++#define CONFIG_NOISE_FILTER 0
++#define CONFIG_NORMALIZE_FILTER 0
++#define CONFIG_NULL_FILTER 0
++#define CONFIG_OCR_FILTER 0
++#define CONFIG_OCV_FILTER 0
++#define CONFIG_OSCILLOSCOPE_FILTER 0
++#define CONFIG_OVERLAY_FILTER 0
++#define CONFIG_OVERLAY_OPENCL_FILTER 0
++#define CONFIG_OVERLAY_QSV_FILTER 0
++#define CONFIG_OVERLAY_VULKAN_FILTER 0
++#define CONFIG_OVERLAY_CUDA_FILTER 0
++#define CONFIG_OWDENOISE_FILTER 0
++#define CONFIG_PAD_FILTER 0
++#define CONFIG_PAD_OPENCL_FILTER 0
++#define CONFIG_PALETTEGEN_FILTER 0
++#define CONFIG_PALETTEUSE_FILTER 0
++#define CONFIG_PERMS_FILTER 0
++#define CONFIG_PERSPECTIVE_FILTER 0
++#define CONFIG_PHASE_FILTER 0
++#define CONFIG_PHOTOSENSITIVITY_FILTER 0
++#define CONFIG_PIXDESCTEST_FILTER 0
++#define CONFIG_PIXSCOPE_FILTER 0
++#define CONFIG_PP_FILTER 0
++#define CONFIG_PP7_FILTER 0
++#define CONFIG_PREMULTIPLY_FILTER 0
++#define CONFIG_PREWITT_FILTER 0
++#define CONFIG_PREWITT_OPENCL_FILTER 0
++#define CONFIG_PROCAMP_VAAPI_FILTER 0
++#define CONFIG_PROGRAM_OPENCL_FILTER 0
++#define CONFIG_PSEUDOCOLOR_FILTER 0
++#define CONFIG_PSNR_FILTER 0
++#define CONFIG_PULLUP_FILTER 0
++#define CONFIG_QP_FILTER 0
++#define CONFIG_RANDOM_FILTER 0
++#define CONFIG_READEIA608_FILTER 0
++#define CONFIG_READVITC_FILTER 0
++#define CONFIG_REALTIME_FILTER 0
++#define CONFIG_REMAP_FILTER 0
++#define CONFIG_REMOVEGRAIN_FILTER 0
++#define CONFIG_REMOVELOGO_FILTER 0
++#define CONFIG_REPEATFIELDS_FILTER 0
++#define CONFIG_REVERSE_FILTER 0
++#define CONFIG_RGBASHIFT_FILTER 0
++#define CONFIG_ROBERTS_FILTER 0
++#define CONFIG_ROBERTS_OPENCL_FILTER 0
++#define CONFIG_ROTATE_FILTER 0
++#define CONFIG_SAB_FILTER 0
++#define CONFIG_SCALE_FILTER 0
++#define CONFIG_SCALE_CUDA_FILTER 0
++#define CONFIG_SCALE_NPP_FILTER 0
++#define CONFIG_SCALE_QSV_FILTER 0
++#define CONFIG_SCALE_VAAPI_FILTER 0
++#define CONFIG_SCALE_VULKAN_FILTER 0
++#define CONFIG_SCALE2REF_FILTER 0
++#define CONFIG_SCROLL_FILTER 0
++#define CONFIG_SELECT_FILTER 0
++#define CONFIG_SELECTIVECOLOR_FILTER 0
++#define CONFIG_SENDCMD_FILTER 0
++#define CONFIG_SEPARATEFIELDS_FILTER 0
++#define CONFIG_SETDAR_FILTER 0
++#define CONFIG_SETFIELD_FILTER 0
++#define CONFIG_SETPARAMS_FILTER 0
++#define CONFIG_SETPTS_FILTER 0
++#define CONFIG_SETRANGE_FILTER 0
++#define CONFIG_SETSAR_FILTER 0
++#define CONFIG_SETTB_FILTER 0
++#define CONFIG_SHARPNESS_VAAPI_FILTER 0
++#define CONFIG_SHOWINFO_FILTER 0
++#define CONFIG_SHOWPALETTE_FILTER 0
++#define CONFIG_SHUFFLEFRAMES_FILTER 0
++#define CONFIG_SHUFFLEPLANES_FILTER 0
++#define CONFIG_SIDEDATA_FILTER 0
++#define CONFIG_SIGNALSTATS_FILTER 0
++#define CONFIG_SIGNATURE_FILTER 0
++#define CONFIG_SMARTBLUR_FILTER 0
++#define CONFIG_SOBEL_FILTER 0
++#define CONFIG_SOBEL_OPENCL_FILTER 0
++#define CONFIG_SPLIT_FILTER 0
++#define CONFIG_SPP_FILTER 0
++#define CONFIG_SR_FILTER 0
++#define CONFIG_SSIM_FILTER 0
++#define CONFIG_STEREO3D_FILTER 0
++#define CONFIG_STREAMSELECT_FILTER 0
++#define CONFIG_SUBTITLES_FILTER 0
++#define CONFIG_SUPER2XSAI_FILTER 0
++#define CONFIG_SWAPRECT_FILTER 0
++#define CONFIG_SWAPUV_FILTER 0
++#define CONFIG_TBLEND_FILTER 0
++#define CONFIG_TELECINE_FILTER 0
++#define CONFIG_THISTOGRAM_FILTER 0
++#define CONFIG_THRESHOLD_FILTER 0
++#define CONFIG_THUMBNAIL_FILTER 0
++#define CONFIG_THUMBNAIL_CUDA_FILTER 0
++#define CONFIG_TILE_FILTER 0
++#define CONFIG_TINTERLACE_FILTER 0
++#define CONFIG_TLUT2_FILTER 0
++#define CONFIG_TMEDIAN_FILTER 0
++#define CONFIG_TMIX_FILTER 0
++#define CONFIG_TONEMAP_FILTER 0
++#define CONFIG_TONEMAP_OPENCL_FILTER 0
++#define CONFIG_TONEMAP_VAAPI_FILTER 0
++#define CONFIG_TPAD_FILTER 0
++#define CONFIG_TRANSPOSE_FILTER 0
++#define CONFIG_TRANSPOSE_NPP_FILTER 0
++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0
++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0
++#define CONFIG_TRIM_FILTER 0
++#define CONFIG_UNPREMULTIPLY_FILTER 0
++#define CONFIG_UNSHARP_FILTER 0
++#define CONFIG_UNSHARP_OPENCL_FILTER 0
++#define CONFIG_USPP_FILTER 0
++#define CONFIG_V360_FILTER 0
++#define CONFIG_VAGUEDENOISER_FILTER 0
++#define CONFIG_VECTORSCOPE_FILTER 0
++#define CONFIG_VFLIP_FILTER 0
++#define CONFIG_VFRDET_FILTER 0
++#define CONFIG_VIBRANCE_FILTER 0
++#define CONFIG_VIDSTABDETECT_FILTER 0
++#define CONFIG_VIDSTABTRANSFORM_FILTER 0
++#define CONFIG_VIGNETTE_FILTER 0
++#define CONFIG_VMAFMOTION_FILTER 0
++#define CONFIG_VPP_QSV_FILTER 0
++#define CONFIG_VSTACK_FILTER 0
++#define CONFIG_W3FDIF_FILTER 0
++#define CONFIG_WAVEFORM_FILTER 0
++#define CONFIG_WEAVE_FILTER 0
++#define CONFIG_XBR_FILTER 0
++#define CONFIG_XFADE_FILTER 0
++#define CONFIG_XFADE_OPENCL_FILTER 0
++#define CONFIG_XMEDIAN_FILTER 0
++#define CONFIG_XSTACK_FILTER 0
++#define CONFIG_YADIF_FILTER 0
++#define CONFIG_YADIF_CUDA_FILTER 0
++#define CONFIG_YAEPBLUR_FILTER 0
++#define CONFIG_ZMQ_FILTER 0
++#define CONFIG_ZOOMPAN_FILTER 0
++#define CONFIG_ZSCALE_FILTER 0
++#define CONFIG_ALLRGB_FILTER 0
++#define CONFIG_ALLYUV_FILTER 0
++#define CONFIG_CELLAUTO_FILTER 0
++#define CONFIG_COLOR_FILTER 0
++#define CONFIG_COREIMAGESRC_FILTER 0
++#define CONFIG_FREI0R_SRC_FILTER 0
++#define CONFIG_HALDCLUTSRC_FILTER 0
++#define CONFIG_LIFE_FILTER 0
++#define CONFIG_MANDELBROT_FILTER 0
++#define CONFIG_MPTESTSRC_FILTER 0
++#define CONFIG_NULLSRC_FILTER 0
++#define CONFIG_OPENCLSRC_FILTER 0
++#define CONFIG_PAL75BARS_FILTER 0
++#define CONFIG_PAL100BARS_FILTER 0
++#define CONFIG_RGBTESTSRC_FILTER 0
++#define CONFIG_SIERPINSKI_FILTER 0
++#define CONFIG_SMPTEBARS_FILTER 0
++#define CONFIG_SMPTEHDBARS_FILTER 0
++#define CONFIG_TESTSRC_FILTER 0
++#define CONFIG_TESTSRC2_FILTER 0
++#define CONFIG_YUVTESTSRC_FILTER 0
++#define CONFIG_NULLSINK_FILTER 0
++#define CONFIG_ABITSCOPE_FILTER 0
++#define CONFIG_ADRAWGRAPH_FILTER 0
++#define CONFIG_AGRAPHMONITOR_FILTER 0
++#define CONFIG_AHISTOGRAM_FILTER 0
++#define CONFIG_APHASEMETER_FILTER 0
++#define CONFIG_AVECTORSCOPE_FILTER 0
++#define CONFIG_CONCAT_FILTER 0
++#define CONFIG_SHOWCQT_FILTER 0
++#define CONFIG_SHOWFREQS_FILTER 0
++#define CONFIG_SHOWSPATIAL_FILTER 0
++#define CONFIG_SHOWSPECTRUM_FILTER 0
++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0
++#define CONFIG_SHOWVOLUME_FILTER 0
++#define CONFIG_SHOWWAVES_FILTER 0
++#define CONFIG_SHOWWAVESPIC_FILTER 0
++#define CONFIG_SPECTRUMSYNTH_FILTER 0
++#define CONFIG_AMOVIE_FILTER 0
++#define CONFIG_MOVIE_FILTER 0
++#define CONFIG_AFIFO_FILTER 0
++#define CONFIG_FIFO_FILTER 0
++#define CONFIG_AA_DEMUXER 0
++#define CONFIG_AAC_DEMUXER 0
++#define CONFIG_AC3_DEMUXER 0
++#define CONFIG_ACM_DEMUXER 0
++#define CONFIG_ACT_DEMUXER 0
++#define CONFIG_ADF_DEMUXER 0
++#define CONFIG_ADP_DEMUXER 0
++#define CONFIG_ADS_DEMUXER 0
++#define CONFIG_ADX_DEMUXER 0
++#define CONFIG_AEA_DEMUXER 0
++#define CONFIG_AFC_DEMUXER 0
++#define CONFIG_AIFF_DEMUXER 0
++#define CONFIG_AIX_DEMUXER 0
++#define CONFIG_ALP_DEMUXER 0
++#define CONFIG_AMR_DEMUXER 0
++#define CONFIG_AMRNB_DEMUXER 0
++#define CONFIG_AMRWB_DEMUXER 0
++#define CONFIG_ANM_DEMUXER 0
++#define CONFIG_APC_DEMUXER 0
++#define CONFIG_APE_DEMUXER 0
++#define CONFIG_APM_DEMUXER 0
++#define CONFIG_APNG_DEMUXER 0
++#define CONFIG_APTX_DEMUXER 0
++#define CONFIG_APTX_HD_DEMUXER 0
++#define CONFIG_AQTITLE_DEMUXER 0
++#define CONFIG_ARGO_ASF_DEMUXER 0
++#define CONFIG_ASF_DEMUXER 0
++#define CONFIG_ASF_O_DEMUXER 0
++#define CONFIG_ASS_DEMUXER 0
++#define CONFIG_AST_DEMUXER 0
++#define CONFIG_AU_DEMUXER 0
++#define CONFIG_AV1_DEMUXER 0
++#define CONFIG_AVI_DEMUXER 0
++#define CONFIG_AVISYNTH_DEMUXER 0
++#define CONFIG_AVR_DEMUXER 0
++#define CONFIG_AVS_DEMUXER 0
++#define CONFIG_AVS2_DEMUXER 0
++#define CONFIG_BETHSOFTVID_DEMUXER 0
++#define CONFIG_BFI_DEMUXER 0
++#define CONFIG_BINTEXT_DEMUXER 0
++#define CONFIG_BINK_DEMUXER 0
++#define CONFIG_BIT_DEMUXER 0
++#define CONFIG_BMV_DEMUXER 0
++#define CONFIG_BFSTM_DEMUXER 0
++#define CONFIG_BRSTM_DEMUXER 0
++#define CONFIG_BOA_DEMUXER 0
++#define CONFIG_C93_DEMUXER 0
++#define CONFIG_CAF_DEMUXER 0
++#define CONFIG_CAVSVIDEO_DEMUXER 0
++#define CONFIG_CDG_DEMUXER 0
++#define CONFIG_CDXL_DEMUXER 0
++#define CONFIG_CINE_DEMUXER 0
++#define CONFIG_CODEC2_DEMUXER 0
++#define CONFIG_CODEC2RAW_DEMUXER 0
++#define CONFIG_CONCAT_DEMUXER 0
++#define CONFIG_DASH_DEMUXER 0
++#define CONFIG_DATA_DEMUXER 0
++#define CONFIG_DAUD_DEMUXER 0
++#define CONFIG_DCSTR_DEMUXER 0
++#define CONFIG_DERF_DEMUXER 0
++#define CONFIG_DFA_DEMUXER 0
++#define CONFIG_DHAV_DEMUXER 0
++#define CONFIG_DIRAC_DEMUXER 0
++#define CONFIG_DNXHD_DEMUXER 0
++#define CONFIG_DSF_DEMUXER 0
++#define CONFIG_DSICIN_DEMUXER 0
++#define CONFIG_DSS_DEMUXER 0
++#define CONFIG_DTS_DEMUXER 0
++#define CONFIG_DTSHD_DEMUXER 0
++#define CONFIG_DV_DEMUXER 0
++#define CONFIG_DVBSUB_DEMUXER 0
++#define CONFIG_DVBTXT_DEMUXER 0
++#define CONFIG_DXA_DEMUXER 0
++#define CONFIG_EA_DEMUXER 0
++#define CONFIG_EA_CDATA_DEMUXER 0
++#define CONFIG_EAC3_DEMUXER 0
++#define CONFIG_EPAF_DEMUXER 0
++#define CONFIG_FFMETADATA_DEMUXER 0
++#define CONFIG_FILMSTRIP_DEMUXER 0
++#define CONFIG_FITS_DEMUXER 0
++#define CONFIG_FLAC_DEMUXER 1
++#define CONFIG_FLIC_DEMUXER 0
++#define CONFIG_FLV_DEMUXER 0
++#define CONFIG_LIVE_FLV_DEMUXER 0
++#define CONFIG_FOURXM_DEMUXER 0
++#define CONFIG_FRM_DEMUXER 0
++#define CONFIG_FSB_DEMUXER 0
++#define CONFIG_FWSE_DEMUXER 0
++#define CONFIG_G722_DEMUXER 0
++#define CONFIG_G723_1_DEMUXER 0
++#define CONFIG_G726_DEMUXER 0
++#define CONFIG_G726LE_DEMUXER 0
++#define CONFIG_G729_DEMUXER 0
++#define CONFIG_GDV_DEMUXER 0
++#define CONFIG_GENH_DEMUXER 0
++#define CONFIG_GIF_DEMUXER 0
++#define CONFIG_GSM_DEMUXER 0
++#define CONFIG_GXF_DEMUXER 0
++#define CONFIG_H261_DEMUXER 0
++#define CONFIG_H263_DEMUXER 0
++#define CONFIG_H264_DEMUXER 0
++#define CONFIG_HCA_DEMUXER 0
++#define CONFIG_HCOM_DEMUXER 0
++#define CONFIG_HEVC_DEMUXER 0
++#define CONFIG_HLS_DEMUXER 0
++#define CONFIG_HNM_DEMUXER 0
++#define CONFIG_ICO_DEMUXER 0
++#define CONFIG_IDCIN_DEMUXER 0
++#define CONFIG_IDF_DEMUXER 0
++#define CONFIG_IFF_DEMUXER 0
++#define CONFIG_IFV_DEMUXER 0
++#define CONFIG_ILBC_DEMUXER 0
++#define CONFIG_IMAGE2_DEMUXER 0
++#define CONFIG_IMAGE2PIPE_DEMUXER 0
++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0
++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0
++#define CONFIG_INGENIENT_DEMUXER 0
++#define CONFIG_IPMOVIE_DEMUXER 0
++#define CONFIG_IRCAM_DEMUXER 0
++#define CONFIG_ISS_DEMUXER 0
++#define CONFIG_IV8_DEMUXER 0
++#define CONFIG_IVF_DEMUXER 0
++#define CONFIG_IVR_DEMUXER 0
++#define CONFIG_JACOSUB_DEMUXER 0
++#define CONFIG_JV_DEMUXER 0
++#define CONFIG_KUX_DEMUXER 0
++#define CONFIG_KVAG_DEMUXER 0
++#define CONFIG_LMLM4_DEMUXER 0
++#define CONFIG_LOAS_DEMUXER 0
++#define CONFIG_LRC_DEMUXER 0
++#define CONFIG_LVF_DEMUXER 0
++#define CONFIG_LXF_DEMUXER 0
++#define CONFIG_M4V_DEMUXER 0
++#define CONFIG_MATROSKA_DEMUXER 1
++#define CONFIG_MGSTS_DEMUXER 0
++#define CONFIG_MICRODVD_DEMUXER 0
++#define CONFIG_MJPEG_DEMUXER 0
++#define CONFIG_MJPEG_2000_DEMUXER 0
++#define CONFIG_MLP_DEMUXER 0
++#define CONFIG_MLV_DEMUXER 0
++#define CONFIG_MM_DEMUXER 0
++#define CONFIG_MMF_DEMUXER 0
++#define CONFIG_MOV_DEMUXER 1
++#define CONFIG_MP3_DEMUXER 1
++#define CONFIG_MPC_DEMUXER 0
++#define CONFIG_MPC8_DEMUXER 0
++#define CONFIG_MPEGPS_DEMUXER 0
++#define CONFIG_MPEGTS_DEMUXER 0
++#define CONFIG_MPEGTSRAW_DEMUXER 0
++#define CONFIG_MPEGVIDEO_DEMUXER 0
++#define CONFIG_MPJPEG_DEMUXER 0
++#define CONFIG_MPL2_DEMUXER 0
++#define CONFIG_MPSUB_DEMUXER 0
++#define CONFIG_MSF_DEMUXER 0
++#define CONFIG_MSNWC_TCP_DEMUXER 0
++#define CONFIG_MTAF_DEMUXER 0
++#define CONFIG_MTV_DEMUXER 0
++#define CONFIG_MUSX_DEMUXER 0
++#define CONFIG_MV_DEMUXER 0
++#define CONFIG_MVI_DEMUXER 0
++#define CONFIG_MXF_DEMUXER 0
++#define CONFIG_MXG_DEMUXER 0
++#define CONFIG_NC_DEMUXER 0
++#define CONFIG_NISTSPHERE_DEMUXER 0
++#define CONFIG_NSP_DEMUXER 0
++#define CONFIG_NSV_DEMUXER 0
++#define CONFIG_NUT_DEMUXER 0
++#define CONFIG_NUV_DEMUXER 0
++#define CONFIG_OGG_DEMUXER 1
++#define CONFIG_OMA_DEMUXER 0
++#define CONFIG_PAF_DEMUXER 0
++#define CONFIG_PCM_ALAW_DEMUXER 0
++#define CONFIG_PCM_MULAW_DEMUXER 0
++#define CONFIG_PCM_VIDC_DEMUXER 0
++#define CONFIG_PCM_F64BE_DEMUXER 0
++#define CONFIG_PCM_F64LE_DEMUXER 0
++#define CONFIG_PCM_F32BE_DEMUXER 0
++#define CONFIG_PCM_F32LE_DEMUXER 0
++#define CONFIG_PCM_S32BE_DEMUXER 0
++#define CONFIG_PCM_S32LE_DEMUXER 0
++#define CONFIG_PCM_S24BE_DEMUXER 0
++#define CONFIG_PCM_S24LE_DEMUXER 0
++#define CONFIG_PCM_S16BE_DEMUXER 0
++#define CONFIG_PCM_S16LE_DEMUXER 0
++#define CONFIG_PCM_S8_DEMUXER 0
++#define CONFIG_PCM_U32BE_DEMUXER 0
++#define CONFIG_PCM_U32LE_DEMUXER 0
++#define CONFIG_PCM_U24BE_DEMUXER 0
++#define CONFIG_PCM_U24LE_DEMUXER 0
++#define CONFIG_PCM_U16BE_DEMUXER 0
++#define CONFIG_PCM_U16LE_DEMUXER 0
++#define CONFIG_PCM_U8_DEMUXER 0
++#define CONFIG_PJS_DEMUXER 0
++#define CONFIG_PMP_DEMUXER 0
++#define CONFIG_PVA_DEMUXER 0
++#define CONFIG_PVF_DEMUXER 0
++#define CONFIG_QCP_DEMUXER 0
++#define CONFIG_R3D_DEMUXER 0
++#define CONFIG_RAWVIDEO_DEMUXER 0
++#define CONFIG_REALTEXT_DEMUXER 0
++#define CONFIG_REDSPARK_DEMUXER 0
++#define CONFIG_RL2_DEMUXER 0
++#define CONFIG_RM_DEMUXER 0
++#define CONFIG_ROQ_DEMUXER 0
++#define CONFIG_RPL_DEMUXER 0
++#define CONFIG_RSD_DEMUXER 0
++#define CONFIG_RSO_DEMUXER 0
++#define CONFIG_RTP_DEMUXER 0
++#define CONFIG_RTSP_DEMUXER 0
++#define CONFIG_S337M_DEMUXER 0
++#define CONFIG_SAMI_DEMUXER 0
++#define CONFIG_SAP_DEMUXER 0
++#define CONFIG_SBC_DEMUXER 0
++#define CONFIG_SBG_DEMUXER 0
++#define CONFIG_SCC_DEMUXER 0
++#define CONFIG_SDP_DEMUXER 0
++#define CONFIG_SDR2_DEMUXER 0
++#define CONFIG_SDS_DEMUXER 0
++#define CONFIG_SDX_DEMUXER 0
++#define CONFIG_SEGAFILM_DEMUXER 0
++#define CONFIG_SER_DEMUXER 0
++#define CONFIG_SHORTEN_DEMUXER 0
++#define CONFIG_SIFF_DEMUXER 0
++#define CONFIG_SLN_DEMUXER 0
++#define CONFIG_SMACKER_DEMUXER 0
++#define CONFIG_SMJPEG_DEMUXER 0
++#define CONFIG_SMUSH_DEMUXER 0
++#define CONFIG_SOL_DEMUXER 0
++#define CONFIG_SOX_DEMUXER 0
++#define CONFIG_SPDIF_DEMUXER 0
++#define CONFIG_SRT_DEMUXER 0
++#define CONFIG_STR_DEMUXER 0
++#define CONFIG_STL_DEMUXER 0
++#define CONFIG_SUBVIEWER1_DEMUXER 0
++#define CONFIG_SUBVIEWER_DEMUXER 0
++#define CONFIG_SUP_DEMUXER 0
++#define CONFIG_SVAG_DEMUXER 0
++#define CONFIG_SWF_DEMUXER 0
++#define CONFIG_TAK_DEMUXER 0
++#define CONFIG_TEDCAPTIONS_DEMUXER 0
++#define CONFIG_THP_DEMUXER 0
++#define CONFIG_THREEDOSTR_DEMUXER 0
++#define CONFIG_TIERTEXSEQ_DEMUXER 0
++#define CONFIG_TMV_DEMUXER 0
++#define CONFIG_TRUEHD_DEMUXER 0
++#define CONFIG_TTA_DEMUXER 0
++#define CONFIG_TXD_DEMUXER 0
++#define CONFIG_TTY_DEMUXER 0
++#define CONFIG_TY_DEMUXER 0
++#define CONFIG_V210_DEMUXER 0
++#define CONFIG_V210X_DEMUXER 0
++#define CONFIG_VAG_DEMUXER 0
++#define CONFIG_VC1_DEMUXER 0
++#define CONFIG_VC1T_DEMUXER 0
++#define CONFIG_VIVIDAS_DEMUXER 0
++#define CONFIG_VIVO_DEMUXER 0
++#define CONFIG_VMD_DEMUXER 0
++#define CONFIG_VOBSUB_DEMUXER 0
++#define CONFIG_VOC_DEMUXER 0
++#define CONFIG_VPK_DEMUXER 0
++#define CONFIG_VPLAYER_DEMUXER 0
++#define CONFIG_VQF_DEMUXER 0
++#define CONFIG_W64_DEMUXER 0
++#define CONFIG_WAV_DEMUXER 1
++#define CONFIG_WC3_DEMUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0
++#define CONFIG_WEBVTT_DEMUXER 0
++#define CONFIG_WSAUD_DEMUXER 0
++#define CONFIG_WSD_DEMUXER 0
++#define CONFIG_WSVQA_DEMUXER 0
++#define CONFIG_WTV_DEMUXER 0
++#define CONFIG_WVE_DEMUXER 0
++#define CONFIG_WV_DEMUXER 0
++#define CONFIG_XA_DEMUXER 0
++#define CONFIG_XBIN_DEMUXER 0
++#define CONFIG_XMV_DEMUXER 0
++#define CONFIG_XVAG_DEMUXER 0
++#define CONFIG_XWMA_DEMUXER 0
++#define CONFIG_YOP_DEMUXER 0
++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0
++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0
++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0
++#define CONFIG_LIBGME_DEMUXER 0
++#define CONFIG_LIBMODPLUG_DEMUXER 0
++#define CONFIG_LIBOPENMPT_DEMUXER 0
++#define CONFIG_VAPOURSYNTH_DEMUXER 0
++#define CONFIG_A64_MUXER 0
++#define CONFIG_AC3_MUXER 0
++#define CONFIG_ADTS_MUXER 0
++#define CONFIG_ADX_MUXER 0
++#define CONFIG_AIFF_MUXER 0
++#define CONFIG_AMR_MUXER 0
++#define CONFIG_APNG_MUXER 0
++#define CONFIG_APTX_MUXER 0
++#define CONFIG_APTX_HD_MUXER 0
++#define CONFIG_ASF_MUXER 0
++#define CONFIG_ASS_MUXER 0
++#define CONFIG_AST_MUXER 0
++#define CONFIG_ASF_STREAM_MUXER 0
++#define CONFIG_AU_MUXER 0
++#define CONFIG_AVI_MUXER 0
++#define CONFIG_AVM2_MUXER 0
++#define CONFIG_AVS2_MUXER 0
++#define CONFIG_BIT_MUXER 0
++#define CONFIG_CAF_MUXER 0
++#define CONFIG_CAVSVIDEO_MUXER 0
++#define CONFIG_CODEC2_MUXER 0
++#define CONFIG_CODEC2RAW_MUXER 0
++#define CONFIG_CRC_MUXER 0
++#define CONFIG_DASH_MUXER 0
++#define CONFIG_DATA_MUXER 0
++#define CONFIG_DAUD_MUXER 0
++#define CONFIG_DIRAC_MUXER 0
++#define CONFIG_DNXHD_MUXER 0
++#define CONFIG_DTS_MUXER 0
++#define CONFIG_DV_MUXER 0
++#define CONFIG_EAC3_MUXER 0
++#define CONFIG_F4V_MUXER 0
++#define CONFIG_FFMETADATA_MUXER 0
++#define CONFIG_FIFO_MUXER 0
++#define CONFIG_FIFO_TEST_MUXER 0
++#define CONFIG_FILMSTRIP_MUXER 0
++#define CONFIG_FITS_MUXER 0
++#define CONFIG_FLAC_MUXER 0
++#define CONFIG_FLV_MUXER 0
++#define CONFIG_FRAMECRC_MUXER 0
++#define CONFIG_FRAMEHASH_MUXER 0
++#define CONFIG_FRAMEMD5_MUXER 0
++#define CONFIG_G722_MUXER 0
++#define CONFIG_G723_1_MUXER 0
++#define CONFIG_G726_MUXER 0
++#define CONFIG_G726LE_MUXER 0
++#define CONFIG_GIF_MUXER 0
++#define CONFIG_GSM_MUXER 0
++#define CONFIG_GXF_MUXER 0
++#define CONFIG_H261_MUXER 0
++#define CONFIG_H263_MUXER 0
++#define CONFIG_H264_MUXER 0
++#define CONFIG_HASH_MUXER 0
++#define CONFIG_HDS_MUXER 0
++#define CONFIG_HEVC_MUXER 0
++#define CONFIG_HLS_MUXER 0
++#define CONFIG_ICO_MUXER 0
++#define CONFIG_ILBC_MUXER 0
++#define CONFIG_IMAGE2_MUXER 0
++#define CONFIG_IMAGE2PIPE_MUXER 0
++#define CONFIG_IPOD_MUXER 0
++#define CONFIG_IRCAM_MUXER 0
++#define CONFIG_ISMV_MUXER 0
++#define CONFIG_IVF_MUXER 0
++#define CONFIG_JACOSUB_MUXER 0
++#define CONFIG_LATM_MUXER 0
++#define CONFIG_LRC_MUXER 0
++#define CONFIG_M4V_MUXER 0
++#define CONFIG_MD5_MUXER 0
++#define CONFIG_MATROSKA_MUXER 0
++#define CONFIG_MATROSKA_AUDIO_MUXER 0
++#define CONFIG_MICRODVD_MUXER 0
++#define CONFIG_MJPEG_MUXER 0
++#define CONFIG_MLP_MUXER 0
++#define CONFIG_MMF_MUXER 0
++#define CONFIG_MOV_MUXER 0
++#define CONFIG_MP2_MUXER 0
++#define CONFIG_MP3_MUXER 0
++#define CONFIG_MP4_MUXER 0
++#define CONFIG_MPEG1SYSTEM_MUXER 0
++#define CONFIG_MPEG1VCD_MUXER 0
++#define CONFIG_MPEG1VIDEO_MUXER 0
++#define CONFIG_MPEG2DVD_MUXER 0
++#define CONFIG_MPEG2SVCD_MUXER 0
++#define CONFIG_MPEG2VIDEO_MUXER 0
++#define CONFIG_MPEG2VOB_MUXER 0
++#define CONFIG_MPEGTS_MUXER 0
++#define CONFIG_MPJPEG_MUXER 0
++#define CONFIG_MXF_MUXER 0
++#define CONFIG_MXF_D10_MUXER 0
++#define CONFIG_MXF_OPATOM_MUXER 0
++#define CONFIG_NULL_MUXER 0
++#define CONFIG_NUT_MUXER 0
++#define CONFIG_OGA_MUXER 0
++#define CONFIG_OGG_MUXER 0
++#define CONFIG_OGV_MUXER 0
++#define CONFIG_OMA_MUXER 0
++#define CONFIG_OPUS_MUXER 0
++#define CONFIG_PCM_ALAW_MUXER 0
++#define CONFIG_PCM_MULAW_MUXER 0
++#define CONFIG_PCM_VIDC_MUXER 0
++#define CONFIG_PCM_F64BE_MUXER 0
++#define CONFIG_PCM_F64LE_MUXER 0
++#define CONFIG_PCM_F32BE_MUXER 0
++#define CONFIG_PCM_F32LE_MUXER 0
++#define CONFIG_PCM_S32BE_MUXER 0
++#define CONFIG_PCM_S32LE_MUXER 0
++#define CONFIG_PCM_S24BE_MUXER 0
++#define CONFIG_PCM_S24LE_MUXER 0
++#define CONFIG_PCM_S16BE_MUXER 0
++#define CONFIG_PCM_S16LE_MUXER 0
++#define CONFIG_PCM_S8_MUXER 0
++#define CONFIG_PCM_U32BE_MUXER 0
++#define CONFIG_PCM_U32LE_MUXER 0
++#define CONFIG_PCM_U24BE_MUXER 0
++#define CONFIG_PCM_U24LE_MUXER 0
++#define CONFIG_PCM_U16BE_MUXER 0
++#define CONFIG_PCM_U16LE_MUXER 0
++#define CONFIG_PCM_U8_MUXER 0
++#define CONFIG_PSP_MUXER 0
++#define CONFIG_RAWVIDEO_MUXER 0
++#define CONFIG_RM_MUXER 0
++#define CONFIG_ROQ_MUXER 0
++#define CONFIG_RSO_MUXER 0
++#define CONFIG_RTP_MUXER 0
++#define CONFIG_RTP_MPEGTS_MUXER 0
++#define CONFIG_RTSP_MUXER 0
++#define CONFIG_SAP_MUXER 0
++#define CONFIG_SBC_MUXER 0
++#define CONFIG_SCC_MUXER 0
++#define CONFIG_SEGAFILM_MUXER 0
++#define CONFIG_SEGMENT_MUXER 0
++#define CONFIG_STREAM_SEGMENT_MUXER 0
++#define CONFIG_SINGLEJPEG_MUXER 0
++#define CONFIG_SMJPEG_MUXER 0
++#define CONFIG_SMOOTHSTREAMING_MUXER 0
++#define CONFIG_SOX_MUXER 0
++#define CONFIG_SPX_MUXER 0
++#define CONFIG_SPDIF_MUXER 0
++#define CONFIG_SRT_MUXER 0
++#define CONFIG_STREAMHASH_MUXER 0
++#define CONFIG_SUP_MUXER 0
++#define CONFIG_SWF_MUXER 0
++#define CONFIG_TEE_MUXER 0
++#define CONFIG_TG2_MUXER 0
++#define CONFIG_TGP_MUXER 0
++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0
++#define CONFIG_TRUEHD_MUXER 0
++#define CONFIG_TTA_MUXER 0
++#define CONFIG_UNCODEDFRAMECRC_MUXER 0
++#define CONFIG_VC1_MUXER 0
++#define CONFIG_VC1T_MUXER 0
++#define CONFIG_VOC_MUXER 0
++#define CONFIG_W64_MUXER 0
++#define CONFIG_WAV_MUXER 0
++#define CONFIG_WEBM_MUXER 0
++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0
++#define CONFIG_WEBM_CHUNK_MUXER 0
++#define CONFIG_WEBP_MUXER 0
++#define CONFIG_WEBVTT_MUXER 0
++#define CONFIG_WTV_MUXER 0
++#define CONFIG_WV_MUXER 0
++#define CONFIG_YUV4MPEGPIPE_MUXER 0
++#define CONFIG_CHROMAPRINT_MUXER 0
++#define CONFIG_ASYNC_PROTOCOL 0
++#define CONFIG_BLURAY_PROTOCOL 0
++#define CONFIG_CACHE_PROTOCOL 0
++#define CONFIG_CONCAT_PROTOCOL 0
++#define CONFIG_CRYPTO_PROTOCOL 0
++#define CONFIG_DATA_PROTOCOL 0
++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0
++#define CONFIG_FFRTMPHTTP_PROTOCOL 0
++#define CONFIG_FILE_PROTOCOL 0
++#define CONFIG_FTP_PROTOCOL 0
++#define CONFIG_GOPHER_PROTOCOL 0
++#define CONFIG_HLS_PROTOCOL 0
++#define CONFIG_HTTP_PROTOCOL 0
++#define CONFIG_HTTPPROXY_PROTOCOL 0
++#define CONFIG_HTTPS_PROTOCOL 0
++#define CONFIG_ICECAST_PROTOCOL 0
++#define CONFIG_MMSH_PROTOCOL 0
++#define CONFIG_MMST_PROTOCOL 0
++#define CONFIG_MD5_PROTOCOL 0
++#define CONFIG_PIPE_PROTOCOL 0
++#define CONFIG_PROMPEG_PROTOCOL 0
++#define CONFIG_RTMP_PROTOCOL 0
++#define CONFIG_RTMPE_PROTOCOL 0
++#define CONFIG_RTMPS_PROTOCOL 0
++#define CONFIG_RTMPT_PROTOCOL 0
++#define CONFIG_RTMPTE_PROTOCOL 0
++#define CONFIG_RTMPTS_PROTOCOL 0
++#define CONFIG_RTP_PROTOCOL 0
++#define CONFIG_SCTP_PROTOCOL 0
++#define CONFIG_SRTP_PROTOCOL 0
++#define CONFIG_SUBFILE_PROTOCOL 0
++#define CONFIG_TEE_PROTOCOL 0
++#define CONFIG_TCP_PROTOCOL 0
++#define CONFIG_TLS_PROTOCOL 0
++#define CONFIG_UDP_PROTOCOL 0
++#define CONFIG_UDPLITE_PROTOCOL 0
++#define CONFIG_UNIX_PROTOCOL 0
++#define CONFIG_LIBAMQP_PROTOCOL 0
++#define CONFIG_LIBRTMP_PROTOCOL 0
++#define CONFIG_LIBRTMPE_PROTOCOL 0
++#define CONFIG_LIBRTMPS_PROTOCOL 0
++#define CONFIG_LIBRTMPT_PROTOCOL 0
++#define CONFIG_LIBRTMPTE_PROTOCOL 0
++#define CONFIG_LIBSRT_PROTOCOL 0
++#define CONFIG_LIBSSH_PROTOCOL 0
++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0
++#define CONFIG_LIBZMQ_PROTOCOL 0
++#endif /* FFMPEG_CONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c
+new file mode 100644
+index 00000000000..d31ece942a7
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c
+@@ -0,0 +1,3 @@
++static const AVBitStreamFilter * const bitstream_filters[] = {
++    &ff_null_bsf,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c
+new file mode 100644
+index 00000000000..9407bd2775e
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c
+@@ -0,0 +1,18 @@
++static const AVCodec * const codec_list[] = {
++    &ff_theora_decoder,
++    &ff_vp3_decoder,
++    &ff_vp8_decoder,
++    &ff_flac_decoder,
++    &ff_mp3_decoder,
++    &ff_vorbis_decoder,
++    &ff_pcm_alaw_decoder,
++    &ff_pcm_f32le_decoder,
++    &ff_pcm_mulaw_decoder,
++    &ff_pcm_s16be_decoder,
++    &ff_pcm_s16le_decoder,
++    &ff_pcm_s24be_decoder,
++    &ff_pcm_s24le_decoder,
++    &ff_pcm_s32le_decoder,
++    &ff_pcm_u8_decoder,
++    &ff_libopus_decoder,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c
+new file mode 100644
+index 00000000000..f81fbe8bbcf
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c
+@@ -0,0 +1,9 @@
++static const AVCodecParser * const parser_list[] = {
++    &ff_flac_parser,
++    &ff_mpegaudio_parser,
++    &ff_opus_parser,
++    &ff_vorbis_parser,
++    &ff_vp3_parser,
++    &ff_vp8_parser,
++    &ff_vp9_parser,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c
+new file mode 100644
+index 00000000000..1908ba19e77
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c
+@@ -0,0 +1,8 @@
++static const AVInputFormat * const demuxer_list[] = {
++    &ff_flac_demuxer,
++    &ff_matroska_demuxer,
++    &ff_mov_demuxer,
++    &ff_mp3_demuxer,
++    &ff_ogg_demuxer,
++    &ff_wav_demuxer,
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c
+new file mode 100644
+index 00000000000..f36d9499c6f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c
+@@ -0,0 +1,2 @@
++static const AVOutputFormat * const muxer_list[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c
+new file mode 100644
+index 00000000000..247e1e4c3a2
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c
+@@ -0,0 +1,2 @@
++static const URLProtocol * const url_protocols[] = {
++    NULL };
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h
+new file mode 100644
+index 00000000000..8558b35027f
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h
+@@ -0,0 +1,6 @@
++/* Generated by ffmpeg configure */
++#ifndef AVUTIL_AVCONFIG_H
++#define AVUTIL_AVCONFIG_H
++#define AV_HAVE_BIGENDIAN 0
++#define AV_HAVE_FAST_UNALIGNED 0
++#endif /* AVUTIL_AVCONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h
+new file mode 100644
+index 00000000000..31e5b5036dc
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h
+@@ -0,0 +1,5 @@
++/* Automatically generated by version.sh, do not manually edit! */
++#ifndef AVUTIL_FFVERSION_H
++#define AVUTIL_FFVERSION_H
++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25"
++#endif /* AVUTIL_FFVERSION_H */
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py
+index 23d5c0f5739..dfe821557de 100755
+--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py
+@@ -36,7 +36,7 @@ ARCH_MAP = {
+     'android': ['ia32', 'x64', 'arm-neon', 'arm64'],
+     'linux': [
+         'ia32', 'x64', 'mipsel', 'mips64el', 'noasm-x64', 'arm', 'arm-neon',
+-        'arm64'
++        'arm64', 'la64'
+     ],
+     'mac': ['x64'],
+     'win': ['ia32', 'x64', 'arm64'],
+@@ -126,6 +126,8 @@ def DetermineHostOsAndArch():
+     host_arch = 'mipsel'
+   elif platform.machine() == 'mips64':
+     host_arch = 'mips64el'
++  elif platform.machine() == 'loongarch64':
++    host_arch = 'la64'
+   elif platform.machine().startswith('arm'):
+     host_arch = 'arm'
+   else:
+@@ -197,6 +199,10 @@ def SetupAndroidToolchain(target_arch):
+   elif target_arch == 'mipsel':
+     sysroot_arch = 'mips'
+     toolchain_bin_prefix = toolchain_dir_prefix = 'mipsel-linux-android'
++  elif target_arch == 'la64':
++    toolchain_level = api64_level
++    sysroot_arch = 'la64'
++    toolchain_bin_prefix = toolchain_dir_prefix = 'la64-linux-android'
+   elif target_arch == 'mips64el':
+     toolchain_level = api64_level
+     sysroot_arch = 'mips64'
+@@ -789,6 +795,21 @@ def ConfigureAndBuild(target_arch, target_os, host_os, host_arch, parallel_jobs,
+             '--extra-cflags=--target=mips64el-linux-gnuabi64',
+             '--extra-ldflags=--target=mips64el-linux-gnuabi64',
+         ])
++    elif target_arch == 'la64':
++      # These flags taken from android chrome build with target_cpu='mips64el'
++      configure_flags['Common'].extend([
++      ])
++      if target_os == 'android':
++        configure_flags['Common'].extend([
++            '--enable-mips64r6',
++            '--extra-cflags=-mcpu=mips64r6',
++            '--disable-mips64r2',
++            '--enable-msa',
++        ])
++      if target_os == 'linux':
++        configure_flags['Common'].extend([
++            '--target-os=linux',
++        ])
+     else:
+       print(
+           'Error: Unknown target arch %r for target OS %r!' % (target_arch,
+@@ -814,8 +835,8 @@ def ConfigureAndBuild(target_arch, target_os, host_os, host_arch, parallel_jobs,
+     # typically be the system one, so explicitly configure use of Clang's
+     # ld.lld, to ensure that things like cross-compilation and LTO work.
+     # This does not work for ia32 and is always used on mac.
+-    if target_arch != 'ia32' and target_os != 'mac':
+-      configure_flags['Common'].append('--extra-ldflags=-fuse-ld=lld')
++    #if target_arch != 'ia32' and target_os != 'mac':
++    #  configure_flags['Common'].append('--extra-ldflags=-fuse-ld=lld')
+ 
+   # Should be run on Mac, unless we're cross-compiling on Linux.
+   if target_os == 'mac':
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh
+index 0e5159d6f40..a982a3bd45d 100755
+--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh
+@@ -10,7 +10,7 @@ for os in android linux linux-noasm mac win; do
+     # Copy config files for various architectures:
+     #   - ia32/x64 have config.asm, config.h
+     #   - arm/arm-neon have config.h
+-    for arch in arm arm-neon arm64 ia32 x64 mipsel mips64el; do
++    for arch in arm arm-neon arm64 ia32 x64 mipsel mips64el la64; do
+       # Don't waste time on non-existent configs, if no config.h then skip.
+       [ ! -e "build.$arch.$os/$target/config.h" ] && continue
+       for f in config.h config.asm libavutil/avconfig.h libavutil/ffversion.h libavcodec/bsf_list.c libavcodec/codec_list.c libavcodec/parser_list.c libavformat/demuxer_list.c libavformat/muxer_list.c libavformat/protocol_list.c; do
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py
+index d2b3d1052aa..5b4dd10e5c6 100755
+--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py
++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py
+@@ -77,7 +77,7 @@ GN_SOURCE_END = """]
+ Attr = enum('ARCHITECTURE', 'TARGET', 'PLATFORM')
+ SUPPORT_MATRIX = {
+     Attr.ARCHITECTURE:
+-        set(['ia32', 'x64', 'arm', 'arm64', 'arm-neon', 'mipsel', 'mips64el']),
++        set(['ia32', 'x64', 'arm', 'arm64', 'arm-neon', 'mipsel', 'mips64el', 'la64']),
+     Attr.TARGET:
+         set(['Chromium', 'Chrome', 'ChromeOS']),
+     Attr.PLATFORM:
+diff --git a/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni b/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni
+index fa2b74e0963..fdecb888d90 100644
+--- a/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni
++++ b/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni
+@@ -14,17 +14,14 @@ ffmpeg_asm_sources = []
+ 
+ use_linux_config = is_linux || is_fuchsia
+ 
+-if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && current_cpu == "arm64") || (is_android && current_cpu == "x64") || (is_android && current_cpu == "x86") || (is_mac) || (is_win) || (use_linux_config)) {
++if (use_linux_config && current_cpu == "la64") {
+   ffmpeg_c_sources += [
+     "libavcodec/ac3_parser.c",
+     "libavcodec/ac3tab.c",
+     "libavcodec/adts_parser.c",
+     "libavcodec/allcodecs.c",
+     "libavcodec/autorename_libavcodec_flacdec.c",
+-    "libavcodec/autorename_libavcodec_flacdsp.c",
+-    "libavcodec/autorename_libavcodec_mpegaudiodsp.c",
+     "libavcodec/autorename_libavcodec_utils.c",
+-    "libavcodec/autorename_libavcodec_vorbisdsp.c",
+     "libavcodec/avdct.c",
+     "libavcodec/avfft.c",
+     "libavcodec/avpacket.c",
+@@ -49,7 +46,10 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavcodec/flac.c",
+     "libavcodec/flac_parser.c",
+     "libavcodec/flacdata.c",
++    "libavcodec/flacdsp.c",
+     "libavcodec/golomb.c",
++    "libavcodec/h264pred.c",
++    "libavcodec/hpeldsp.c",
+     "libavcodec/imgconvert.c",
+     "libavcodec/jni.c",
+     "libavcodec/libopus.c",
+@@ -67,6 +67,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavcodec/mpegaudiodata.c",
+     "libavcodec/mpegaudiodec_fixed.c",
+     "libavcodec/mpegaudiodecheader.c",
++    "libavcodec/mpegaudiodsp.c",
+     "libavcodec/mpegaudiodsp_data.c",
+     "libavcodec/mpegaudiodsp_fixed.c",
+     "libavcodec/mpegaudiodsp_float.c",
+@@ -86,10 +87,19 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavcodec/qsv_api.c",
+     "libavcodec/raw.c",
+     "libavcodec/rdft.c",
++    "libavcodec/videodsp.c",
+     "libavcodec/vorbis.c",
+     "libavcodec/vorbis_data.c",
+     "libavcodec/vorbis_parser.c",
+     "libavcodec/vorbisdec.c",
++    "libavcodec/vorbisdsp.c",
++    "libavcodec/vp3.c",
++    "libavcodec/vp3_parser.c",
++    "libavcodec/vp3dsp.c",
++    "libavcodec/vp56rac.c",
++    "libavcodec/vp8.c",
++    "libavcodec/vp8_parser.c",
++    "libavcodec/vp8dsp.c",
+     "libavcodec/vp9_parser.c",
+     "libavcodec/xiph.c",
+     "libavformat/allformats.c",
+@@ -135,10 +145,6 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavformat/wavdec.c",
+     "libavutil/aes.c",
+     "libavutil/aes_ctr.c",
+-    "libavutil/autorename_libavutil_cpu.c",
+-    "libavutil/autorename_libavutil_fixed_dsp.c",
+-    "libavutil/autorename_libavutil_float_dsp.c",
+-    "libavutil/autorename_libavutil_imgutils.c",
+     "libavutil/avsscanf.c",
+     "libavutil/avstring.c",
+     "libavutil/base64.c",
+@@ -147,6 +153,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavutil/camellia.c",
+     "libavutil/channel_layout.c",
+     "libavutil/color_utils.c",
++    "libavutil/cpu.c",
+     "libavutil/crc.c",
+     "libavutil/dict.c",
+     "libavutil/display.c",
+@@ -156,9 +163,12 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+     "libavutil/eval.c",
+     "libavutil/fifo.c",
+     "libavutil/file_open.c",
++    "libavutil/fixed_dsp.c",
++    "libavutil/float_dsp.c",
+     "libavutil/frame.c",
+     "libavutil/hdr_dynamic_metadata.c",
+     "libavutil/hwcontext.c",
++    "libavutil/imgutils.c",
+     "libavutil/integer.c",
+     "libavutil/intmath.c",
+     "libavutil/lfg.c",
+@@ -193,22 +203,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre
+   ]
+ }
+ 
+-if ((is_mac) || (is_win) || (use_linux_config)) {
+-  ffmpeg_c_sources += [
+-    "libavcodec/autorename_libavcodec_hpeldsp.c",
+-    "libavcodec/autorename_libavcodec_videodsp.c",
+-    "libavcodec/autorename_libavcodec_vp3dsp.c",
+-    "libavcodec/autorename_libavcodec_vp8dsp.c",
+-    "libavcodec/h264pred.c",
+-    "libavcodec/vp3.c",
+-    "libavcodec/vp3_parser.c",
+-    "libavcodec/vp56rac.c",
+-    "libavcodec/vp8.c",
+-    "libavcodec/vp8_parser.c",
+-  ]
+-}
+-
+-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "arm" && arm_use_neon && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "arm64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "ChromeOS")) {
++if ((use_linux_config && current_cpu == "la64" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "la64" && ffmpeg_branding == "ChromeOS")) {
+   ffmpeg_c_sources += [
+     "libavcodec/aac_ac3_parser.c",
+     "libavcodec/aac_parser.c",
+@@ -218,21 +213,8 @@ if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && curr
+     "libavcodec/aactab.c",
+     "libavcodec/adts_header.c",
+     "libavcodec/autorename_libavcodec_aacdec.c",
+-    "libavcodec/autorename_libavcodec_mdct15.c",
+-    "libavcodec/autorename_libavcodec_sbrdsp.c",
+-    "libavcodec/cbrt_data.c",
+-    "libavcodec/kbdwin.c",
+-    "libavcodec/sinewin.c",
+-    "libavcodec/sinewin_fixed.c",
+-    "libavformat/aacdec.c",
+-    "libavformat/apetag.c",
+-    "libavformat/img2.c",
+-  ]
+-}
+-
+-if ((is_mac && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "ChromeOS")) {
+-  ffmpeg_c_sources += [
+     "libavcodec/cabac.c",
++    "libavcodec/cbrt_data.c",
+     "libavcodec/h2645_parse.c",
+     "libavcodec/h264_cabac.c",
+     "libavcodec/h264_cavlc.c",
+@@ -252,122 +234,26 @@ if ((is_mac && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Ch
+     "libavcodec/h264dsp.c",
+     "libavcodec/h264idct.c",
+     "libavcodec/h264qpel.c",
++    "libavcodec/kbdwin.c",
++    "libavcodec/mdct15.c",
++    "libavcodec/sbrdsp.c",
++    "libavcodec/sinewin.c",
++    "libavcodec/sinewin_fixed.c",
+     "libavcodec/startcode.c",
++    "libavformat/aacdec.c",
++    "libavformat/apetag.c",
++    "libavformat/img2.c",
+   ]
+ }
+ 
+-if ((is_android && current_cpu == "x64") || (is_android && current_cpu == "x86") || (is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) {
+-  ffmpeg_c_sources += [
+-    "libavcodec/x86/autorename_libavcodec_x86_vorbisdsp_init.c",
+-    "libavcodec/x86/constants.c",
+-    "libavcodec/x86/dct_init.c",
+-    "libavcodec/x86/fft_init.c",
+-    "libavcodec/x86/flacdsp_init.c",
+-    "libavcodec/x86/mpegaudiodsp.c",
+-    "libavutil/x86/autorename_libavutil_x86_cpu.c",
+-    "libavutil/x86/autorename_libavutil_x86_float_dsp_init.c",
+-    "libavutil/x86/fixed_dsp_init.c",
+-    "libavutil/x86/imgutils_init.c",
+-    "libavutil/x86/lls_init.c",
+-  ]
+-}
+-
+-if ((is_android && current_cpu == "arm" && arm_use_neon) || (use_linux_config && current_cpu == "arm" && arm_use_neon) || (use_linux_config && current_cpu == "arm")) {
+-  ffmpeg_c_sources += [
+-    "libavcodec/arm/fft_fixed_init_arm.c",
+-    "libavcodec/arm/fft_init_arm.c",
+-    "libavcodec/arm/flacdsp_init_arm.c",
+-    "libavcodec/arm/mpegaudiodsp_init_arm.c",
+-    "libavcodec/arm/rdft_init_arm.c",
+-    "libavcodec/arm/vorbisdsp_init_arm.c",
+-    "libavutil/arm/autorename_libavutil_arm_cpu.c",
+-    "libavutil/arm/float_dsp_init_arm.c",
+-    "libavutil/arm/float_dsp_init_vfp.c",
+-  ]
+-  ffmpeg_gas_sources += [
+-    "libavcodec/arm/fft_vfp.S",
+-    "libavcodec/arm/flacdsp_arm.S",
+-    "libavcodec/arm/mdct_vfp.S",
+-    "libavcodec/arm/mpegaudiodsp_fixed_armv6.S",
+-    "libavutil/arm/float_dsp_vfp.S",
+-  ]
+-}
+-
+-if ((is_android && current_cpu == "x64") || (is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) {
+-  ffmpeg_asm_sources += [
+-    "libavcodec/x86/dct32.asm",
+-    "libavcodec/x86/fft.asm",
+-    "libavcodec/x86/flacdsp.asm",
+-    "libavcodec/x86/imdct36.asm",
+-    "libavcodec/x86/vorbisdsp.asm",
+-    "libavutil/x86/cpuid.asm",
+-    "libavutil/x86/fixed_dsp.asm",
+-    "libavutil/x86/float_dsp.asm",
+-    "libavutil/x86/imgutils.asm",
+-    "libavutil/x86/lls.asm",
+-  ]
+-}
+-
+-if ((is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) {
+-  ffmpeg_c_sources += [
+-    "libavcodec/x86/autorename_libavcodec_x86_videodsp_init.c",
+-    "libavcodec/x86/h264_intrapred_init.c",
+-    "libavcodec/x86/hpeldsp_init.c",
+-    "libavcodec/x86/hpeldsp_vp3_init.c",
+-    "libavcodec/x86/vp3dsp_init.c",
+-    "libavcodec/x86/vp8dsp_init.c",
+-  ]
+-  ffmpeg_asm_sources += [
+-    "libavcodec/x86/autorename_libavcodec_x86_videodsp.asm",
+-    "libavcodec/x86/fpel.asm",
+-    "libavcodec/x86/h264_intrapred.asm",
+-    "libavcodec/x86/h264_intrapred_10bit.asm",
+-    "libavcodec/x86/hpeldsp.asm",
+-    "libavcodec/x86/hpeldsp_vp3.asm",
+-    "libavcodec/x86/vp3dsp.asm",
+-    "libavcodec/x86/vp8dsp.asm",
+-    "libavcodec/x86/vp8dsp_loopfilter.asm",
+-  ]
+-}
+-
+-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (is_win && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x64" && ffmpeg_branding == "ChromeOS") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "ChromeOS")) {
+-  ffmpeg_c_sources += [
+-    "libavcodec/x86/aacpsdsp_init.c",
+-    "libavcodec/x86/mdct15_init.c",
+-    "libavcodec/x86/sbrdsp_init.c",
+-  ]
+-}
+-
+-if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && current_cpu == "arm64") || (is_android && current_cpu == "x64") || (is_android && current_cpu == "x86")) {
+-  ffmpeg_c_sources += [
+-    "compat/strtod.c",
+-  ]
+-}
+-
+-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_win && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x64" && ffmpeg_branding == "ChromeOS") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "ChromeOS")) {
+-  ffmpeg_asm_sources += [
+-    "libavcodec/x86/aacpsdsp.asm",
+-    "libavcodec/x86/mdct15.asm",
+-    "libavcodec/x86/sbrdsp.asm",
+-  ]
+-}
+-
+-if (use_linux_config && ffmpeg_branding == "ChromeOS") {
++if (use_linux_config && current_cpu == "la64" && ffmpeg_branding == "ChromeOS") {
+   ffmpeg_c_sources += [
+     "libavcodec/acelp_filters.c",
+     "libavcodec/acelp_pitch_delay.c",
+     "libavcodec/acelp_vectors.c",
+     "libavcodec/amrnbdec.c",
+     "libavcodec/amrwbdec.c",
+-    "libavcodec/autorename_libavcodec_blockdsp.c",
+-    "libavcodec/autorename_libavcodec_idctdsp.c",
+-    "libavcodec/autorename_libavcodec_me_cmp.c",
+-    "libavcodec/autorename_libavcodec_mpegvideo.c",
+-    "libavcodec/autorename_libavcodec_mpegvideodsp.c",
+-    "libavcodec/autorename_libavcodec_pixblockdsp.c",
+-    "libavcodec/autorename_libavcodec_qpeldsp.c",
+-    "libavcodec/autorename_libavcodec_simple_idct.c",
+-    "libavcodec/autorename_libavcodec_xvididct.c",
++    "libavcodec/blockdsp.c",
+     "libavcodec/celp_filters.c",
+     "libavcodec/celp_math.c",
+     "libavcodec/error_resilience.c",
+@@ -382,23 +268,31 @@ if (use_linux_config && ffmpeg_branding == "ChromeOS") {
+     "libavcodec/h263data.c",
+     "libavcodec/h263dec.c",
+     "libavcodec/h263dsp.c",
++    "libavcodec/idctdsp.c",
+     "libavcodec/intelh263dec.c",
+     "libavcodec/ituh263dec.c",
+     "libavcodec/jfdctfst.c",
+     "libavcodec/jfdctint.c",
+     "libavcodec/jrevdct.c",
+     "libavcodec/lsp.c",
++    "libavcodec/me_cmp.c",
+     "libavcodec/mpeg4video.c",
+     "libavcodec/mpeg4video_parser.c",
+     "libavcodec/mpeg4videodec.c",
+     "libavcodec/mpeg_er.c",
+     "libavcodec/mpegpicture.c",
+     "libavcodec/mpegutils.c",
++    "libavcodec/mpegvideo.c",
+     "libavcodec/mpegvideo_motion.c",
+     "libavcodec/mpegvideodata.c",
++    "libavcodec/mpegvideodsp.c",
+     "libavcodec/msgsmdec.c",
++    "libavcodec/pixblockdsp.c",
++    "libavcodec/qpeldsp.c",
+     "libavcodec/rl.c",
++    "libavcodec/simple_idct.c",
+     "libavcodec/tiff_common.c",
++    "libavcodec/xvididct.c",
+     "libavformat/amr.c",
+     "libavformat/avidec.c",
+   ]
+diff --git a/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h b/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h
+index c5439ca150d..ed6fb54632b 100644
+--- a/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h
++++ b/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h
+@@ -103,7 +103,7 @@ int main(int argc, char** argv) {
+ #if defined(_M_X64) || defined(__x86_64__) || \
+     defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
+     defined(__hppa__) || defined(__ia64__) || \
+-    defined(__mips__) || \
++    defined(__mips__) || defined(__loongarch__) || \
+     defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+     defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+diff --git a/src/3rdparty/chromium/third_party/libvpx/BUILD.gn b/src/3rdparty/chromium/third_party/libvpx/BUILD.gn
+index 9b92313b41e..75f0869ae10 100644
+--- a/src/3rdparty/chromium/third_party/libvpx/BUILD.gn
++++ b/src/3rdparty/chromium/third_party/libvpx/BUILD.gn
+@@ -326,6 +326,8 @@ static_library("bundled_libvpx") {
+     }
+   } else if (current_cpu == "mipsel" || current_cpu == "mips64el") {
+     sources = libvpx_srcs_mips
++  } else if (current_cpu == "la64") {
++    sources = libvpx_srcs_generic
+   } else if (current_cpu == "arm") {
+     if (is_chromeos) {
+       sources = libvpx_srcs_arm_neon_highbd
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h
+new file mode 100644
+index 00000000000..aa475b55faf
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h
+@@ -0,0 +1,357 @@
++// This file is generated. Do not edit.
++#ifndef VP8_RTCD_H_
++#define VP8_RTCD_H_
++
++#ifdef RTCD_C
++#define RTCD_EXTERN
++#else
++#define RTCD_EXTERN extern
++#endif
++
++/*
++ * VP8
++ */
++
++struct blockd;
++struct macroblockd;
++struct loop_filter_info;
++
++/* Encoder forward decls */
++struct block;
++struct macroblock;
++struct variance_vtable;
++union int_mv;
++struct yv12_buffer_config;
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void vp8_bilinear_predict16x16_c(unsigned char* src_ptr,
++                                 int src_pixels_per_line,
++                                 int xoffset,
++                                 int yoffset,
++                                 unsigned char* dst_ptr,
++                                 int dst_pitch);
++#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c
++
++void vp8_bilinear_predict4x4_c(unsigned char* src_ptr,
++                               int src_pixels_per_line,
++                               int xoffset,
++                               int yoffset,
++                               unsigned char* dst_ptr,
++                               int dst_pitch);
++#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
++
++void vp8_bilinear_predict8x4_c(unsigned char* src_ptr,
++                               int src_pixels_per_line,
++                               int xoffset,
++                               int yoffset,
++                               unsigned char* dst_ptr,
++                               int dst_pitch);
++#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c
++
++void vp8_bilinear_predict8x8_c(unsigned char* src_ptr,
++                               int src_pixels_per_line,
++                               int xoffset,
++                               int yoffset,
++                               unsigned char* dst_ptr,
++                               int dst_pitch);
++#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c
++
++void vp8_blend_b_c(unsigned char* y,
++                   unsigned char* u,
++                   unsigned char* v,
++                   int y_1,
++                   int u_1,
++                   int v_1,
++                   int alpha,
++                   int stride);
++#define vp8_blend_b vp8_blend_b_c
++
++void vp8_blend_mb_inner_c(unsigned char* y,
++                          unsigned char* u,
++                          unsigned char* v,
++                          int y_1,
++                          int u_1,
++                          int v_1,
++                          int alpha,
++                          int stride);
++#define vp8_blend_mb_inner vp8_blend_mb_inner_c
++
++void vp8_blend_mb_outer_c(unsigned char* y,
++                          unsigned char* u,
++                          unsigned char* v,
++                          int y_1,
++                          int u_1,
++                          int v_1,
++                          int alpha,
++                          int stride);
++#define vp8_blend_mb_outer vp8_blend_mb_outer_c
++
++int vp8_block_error_c(short* coeff, short* dqcoeff);
++#define vp8_block_error vp8_block_error_c
++
++void vp8_copy32xn_c(const unsigned char* src_ptr,
++                    int src_stride,
++                    unsigned char* dst_ptr,
++                    int dst_stride,
++                    int height);
++#define vp8_copy32xn vp8_copy32xn_c
++
++void vp8_copy_mem16x16_c(unsigned char* src,
++                         int src_stride,
++                         unsigned char* dst,
++                         int dst_stride);
++#define vp8_copy_mem16x16 vp8_copy_mem16x16_c
++
++void vp8_copy_mem8x4_c(unsigned char* src,
++                       int src_stride,
++                       unsigned char* dst,
++                       int dst_stride);
++#define vp8_copy_mem8x4 vp8_copy_mem8x4_c
++
++void vp8_copy_mem8x8_c(unsigned char* src,
++                       int src_stride,
++                       unsigned char* dst,
++                       int dst_stride);
++#define vp8_copy_mem8x8 vp8_copy_mem8x8_c
++
++void vp8_dc_only_idct_add_c(short input_dc,
++                            unsigned char* pred_ptr,
++                            int pred_stride,
++                            unsigned char* dst_ptr,
++                            int dst_stride);
++#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
++
++int vp8_denoiser_filter_c(unsigned char* mc_running_avg_y,
++                          int mc_avg_y_stride,
++                          unsigned char* running_avg_y,
++                          int avg_y_stride,
++                          unsigned char* sig,
++                          int sig_stride,
++                          unsigned int motion_magnitude,
++                          int increase_denoising);
++#define vp8_denoiser_filter vp8_denoiser_filter_c
++
++int vp8_denoiser_filter_uv_c(unsigned char* mc_running_avg,
++                             int mc_avg_stride,
++                             unsigned char* running_avg,
++                             int avg_stride,
++                             unsigned char* sig,
++                             int sig_stride,
++                             unsigned int motion_magnitude,
++                             int increase_denoising);
++#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c
++
++void vp8_dequant_idct_add_c(short* input,
++                            short* dq,
++                            unsigned char* dest,
++                            int stride);
++#define vp8_dequant_idct_add vp8_dequant_idct_add_c
++
++void vp8_dequant_idct_add_uv_block_c(short* q,
++                                     short* dq,
++                                     unsigned char* dst_u,
++                                     unsigned char* dst_v,
++                                     int stride,
++                                     char* eobs);
++#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
++
++void vp8_dequant_idct_add_y_block_c(short* q,
++                                    short* dq,
++                                    unsigned char* dst,
++                                    int stride,
++                                    char* eobs);
++#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
++
++void vp8_dequantize_b_c(struct blockd*, short* DQC);
++#define vp8_dequantize_b vp8_dequantize_b_c
++
++int vp8_diamond_search_sad_c(struct macroblock* x,
++                             struct block* b,
++                             struct blockd* d,
++                             union int_mv* ref_mv,
++                             union int_mv* best_mv,
++                             int search_param,
++                             int sad_per_bit,
++                             int* num00,
++                             struct variance_vtable* fn_ptr,
++                             int* mvcost[2],
++                             union int_mv* center_mv);
++#define vp8_diamond_search_sad vp8_diamond_search_sad_c
++
++void vp8_fast_quantize_b_c(struct block*, struct blockd*);
++#define vp8_fast_quantize_b vp8_fast_quantize_b_c
++
++void vp8_filter_by_weight16x16_c(unsigned char* src,
++                                 int src_stride,
++                                 unsigned char* dst,
++                                 int dst_stride,
++                                 int src_weight);
++#define vp8_filter_by_weight16x16 vp8_filter_by_weight16x16_c
++
++void vp8_filter_by_weight4x4_c(unsigned char* src,
++                               int src_stride,
++                               unsigned char* dst,
++                               int dst_stride,
++                               int src_weight);
++#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c
++
++void vp8_filter_by_weight8x8_c(unsigned char* src,
++                               int src_stride,
++                               unsigned char* dst,
++                               int dst_stride,
++                               int src_weight);
++#define vp8_filter_by_weight8x8 vp8_filter_by_weight8x8_c
++
++int vp8_full_search_sad_c(struct macroblock* x,
++                          struct block* b,
++                          struct blockd* d,
++                          union int_mv* ref_mv,
++                          int sad_per_bit,
++                          int distance,
++                          struct variance_vtable* fn_ptr,
++                          int* mvcost[2],
++                          union int_mv* center_mv);
++#define vp8_full_search_sad vp8_full_search_sad_c
++
++void vp8_loop_filter_bh_c(unsigned char* y_ptr,
++                          unsigned char* u_ptr,
++                          unsigned char* v_ptr,
++                          int y_stride,
++                          int uv_stride,
++                          struct loop_filter_info* lfi);
++#define vp8_loop_filter_bh vp8_loop_filter_bh_c
++
++void vp8_loop_filter_bv_c(unsigned char* y_ptr,
++                          unsigned char* u_ptr,
++                          unsigned char* v_ptr,
++                          int y_stride,
++                          int uv_stride,
++                          struct loop_filter_info* lfi);
++#define vp8_loop_filter_bv vp8_loop_filter_bv_c
++
++void vp8_loop_filter_mbh_c(unsigned char* y_ptr,
++                           unsigned char* u_ptr,
++                           unsigned char* v_ptr,
++                           int y_stride,
++                           int uv_stride,
++                           struct loop_filter_info* lfi);
++#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c
++
++void vp8_loop_filter_mbv_c(unsigned char* y_ptr,
++                           unsigned char* u_ptr,
++                           unsigned char* v_ptr,
++                           int y_stride,
++                           int uv_stride,
++                           struct loop_filter_info* lfi);
++#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c
++
++void vp8_loop_filter_bhs_c(unsigned char* y_ptr,
++                           int y_stride,
++                           const unsigned char* blimit);
++#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c
++
++void vp8_loop_filter_bvs_c(unsigned char* y_ptr,
++                           int y_stride,
++                           const unsigned char* blimit);
++#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c
++
++void vp8_loop_filter_simple_horizontal_edge_c(unsigned char* y_ptr,
++                                              int y_stride,
++                                              const unsigned char* blimit);
++#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c
++
++void vp8_loop_filter_simple_vertical_edge_c(unsigned char* y_ptr,
++                                            int y_stride,
++                                            const unsigned char* blimit);
++#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c
++
++int vp8_mbblock_error_c(struct macroblock* mb, int dc);
++#define vp8_mbblock_error vp8_mbblock_error_c
++
++int vp8_mbuverror_c(struct macroblock* mb);
++#define vp8_mbuverror vp8_mbuverror_c
++
++int vp8_refining_search_sad_c(struct macroblock* x,
++                              struct block* b,
++                              struct blockd* d,
++                              union int_mv* ref_mv,
++                              int error_per_bit,
++                              int search_range,
++                              struct variance_vtable* fn_ptr,
++                              int* mvcost[2],
++                              union int_mv* center_mv);
++#define vp8_refining_search_sad vp8_refining_search_sad_c
++
++void vp8_regular_quantize_b_c(struct block*, struct blockd*);
++#define vp8_regular_quantize_b vp8_regular_quantize_b_c
++
++void vp8_short_fdct4x4_c(short* input, short* output, int pitch);
++#define vp8_short_fdct4x4 vp8_short_fdct4x4_c
++
++void vp8_short_fdct8x4_c(short* input, short* output, int pitch);
++#define vp8_short_fdct8x4 vp8_short_fdct8x4_c
++
++void vp8_short_idct4x4llm_c(short* input,
++                            unsigned char* pred_ptr,
++                            int pred_stride,
++                            unsigned char* dst_ptr,
++                            int dst_stride);
++#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c
++
++void vp8_short_inv_walsh4x4_c(short* input, short* mb_dqcoeff);
++#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c
++
++void vp8_short_inv_walsh4x4_1_c(short* input, short* mb_dqcoeff);
++#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
++
++void vp8_short_walsh4x4_c(short* input, short* output, int pitch);
++#define vp8_short_walsh4x4 vp8_short_walsh4x4_c
++
++void vp8_sixtap_predict16x16_c(unsigned char* src_ptr,
++                               int src_pixels_per_line,
++                               int xoffset,
++                               int yoffset,
++                               unsigned char* dst_ptr,
++                               int dst_pitch);
++#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c
++
++void vp8_sixtap_predict4x4_c(unsigned char* src_ptr,
++                             int src_pixels_per_line,
++                             int xoffset,
++                             int yoffset,
++                             unsigned char* dst_ptr,
++                             int dst_pitch);
++#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
++
++void vp8_sixtap_predict8x4_c(unsigned char* src_ptr,
++                             int src_pixels_per_line,
++                             int xoffset,
++                             int yoffset,
++                             unsigned char* dst_ptr,
++                             int dst_pitch);
++#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c
++
++void vp8_sixtap_predict8x8_c(unsigned char* src_ptr,
++                             int src_pixels_per_line,
++                             int xoffset,
++                             int yoffset,
++                             unsigned char* dst_ptr,
++                             int dst_pitch);
++#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
++
++void vp8_rtcd(void);
++
++#include "vpx_config.h"
++
++#ifdef RTCD_C
++static void setup_rtcd_internal(void) {}
++#endif
++
++#ifdef __cplusplus
++}  // extern "C"
++#endif
++
++#endif
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h
+new file mode 100644
+index 00000000000..00913931484
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h
+@@ -0,0 +1,275 @@
++// This file is generated. Do not edit.
++#ifndef VP9_RTCD_H_
++#define VP9_RTCD_H_
++
++#ifdef RTCD_C
++#define RTCD_EXTERN
++#else
++#define RTCD_EXTERN extern
++#endif
++
++/*
++ * VP9
++ */
++
++#include "vp9/common/vp9_common.h"
++#include "vp9/common/vp9_enums.h"
++#include "vp9/common/vp9_filter.h"
++#include "vpx/vpx_integer.h"
++
++struct macroblockd;
++
++/* Encoder forward decls */
++struct macroblock;
++struct vp9_variance_vtable;
++struct search_site_config;
++struct mv;
++union int_mv;
++struct yv12_buffer_config;
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++int64_t vp9_block_error_c(const tran_low_t* coeff,
++                          const tran_low_t* dqcoeff,
++                          intptr_t block_size,
++                          int64_t* ssz);
++#define vp9_block_error vp9_block_error_c
++
++int64_t vp9_block_error_fp_c(const tran_low_t* coeff,
++                             const tran_low_t* dqcoeff,
++                             int block_size);
++#define vp9_block_error_fp vp9_block_error_fp_c
++
++int vp9_denoiser_filter_c(const uint8_t* sig,
++                          int sig_stride,
++                          const uint8_t* mc_avg,
++                          int mc_avg_stride,
++                          uint8_t* avg,
++                          int avg_stride,
++                          int increase_denoising,
++                          BLOCK_SIZE bs,
++                          int motion_magnitude);
++#define vp9_denoiser_filter vp9_denoiser_filter_c
++
++int vp9_diamond_search_sad_c(const struct macroblock* x,
++                             const struct search_site_config* cfg,
++                             struct mv* ref_mv,
++                             struct mv* best_mv,
++                             int search_param,
++                             int sad_per_bit,
++                             int* num00,
++                             const struct vp9_variance_vtable* fn_ptr,
++                             const struct mv* center_mv);
++#define vp9_diamond_search_sad vp9_diamond_search_sad_c
++
++void vp9_fht16x16_c(const int16_t* input,
++                    tran_low_t* output,
++                    int stride,
++                    int tx_type);
++#define vp9_fht16x16 vp9_fht16x16_c
++
++void vp9_fht4x4_c(const int16_t* input,
++                  tran_low_t* output,
++                  int stride,
++                  int tx_type);
++#define vp9_fht4x4 vp9_fht4x4_c
++
++void vp9_fht8x8_c(const int16_t* input,
++                  tran_low_t* output,
++                  int stride,
++                  int tx_type);
++#define vp9_fht8x8 vp9_fht8x8_c
++
++void vp9_filter_by_weight16x16_c(const uint8_t* src,
++                                 int src_stride,
++                                 uint8_t* dst,
++                                 int dst_stride,
++                                 int src_weight);
++#define vp9_filter_by_weight16x16 vp9_filter_by_weight16x16_c
++
++void vp9_filter_by_weight8x8_c(const uint8_t* src,
++                               int src_stride,
++                               uint8_t* dst,
++                               int dst_stride,
++                               int src_weight);
++#define vp9_filter_by_weight8x8 vp9_filter_by_weight8x8_c
++
++void vp9_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
++#define vp9_fwht4x4 vp9_fwht4x4_c
++
++int64_t vp9_highbd_block_error_c(const tran_low_t* coeff,
++                                 const tran_low_t* dqcoeff,
++                                 intptr_t block_size,
++                                 int64_t* ssz,
++                                 int bd);
++#define vp9_highbd_block_error vp9_highbd_block_error_c
++
++void vp9_highbd_fht16x16_c(const int16_t* input,
++                           tran_low_t* output,
++                           int stride,
++                           int tx_type);
++#define vp9_highbd_fht16x16 vp9_highbd_fht16x16_c
++
++void vp9_highbd_fht4x4_c(const int16_t* input,
++                         tran_low_t* output,
++                         int stride,
++                         int tx_type);
++#define vp9_highbd_fht4x4 vp9_highbd_fht4x4_c
++
++void vp9_highbd_fht8x8_c(const int16_t* input,
++                         tran_low_t* output,
++                         int stride,
++                         int tx_type);
++#define vp9_highbd_fht8x8 vp9_highbd_fht8x8_c
++
++void vp9_highbd_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
++#define vp9_highbd_fwht4x4 vp9_highbd_fwht4x4_c
++
++void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
++                                   uint16_t* dest,
++                                   int stride,
++                                   int tx_type,
++                                   int bd);
++#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
++
++void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
++                                uint16_t* dest,
++                                int stride,
++                                int tx_type,
++                                int bd);
++#define vp9_highbd_iht4x4_16_add vp9_highbd_iht4x4_16_add_c
++
++void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
++                                uint16_t* dest,
++                                int stride,
++                                int tx_type,
++                                int bd);
++#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
++
++void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
++                                        int pitch,
++                                        int rows,
++                                        int cols,
++                                        int flimit);
++#define vp9_highbd_mbpost_proc_across_ip vp9_highbd_mbpost_proc_across_ip_c
++
++void vp9_highbd_mbpost_proc_down_c(uint16_t* dst,
++                                   int pitch,
++                                   int rows,
++                                   int cols,
++                                   int flimit);
++#define vp9_highbd_mbpost_proc_down vp9_highbd_mbpost_proc_down_c
++
++void vp9_highbd_post_proc_down_and_across_c(const uint16_t* src_ptr,
++                                            uint16_t* dst_ptr,
++                                            int src_pixels_per_line,
++                                            int dst_pixels_per_line,
++                                            int rows,
++                                            int cols,
++                                            int flimit);
++#define vp9_highbd_post_proc_down_and_across \
++  vp9_highbd_post_proc_down_and_across_c
++
++void vp9_highbd_quantize_fp_c(const tran_low_t* coeff_ptr,
++                              intptr_t n_coeffs,
++                              int skip_block,
++                              const int16_t* round_ptr,
++                              const int16_t* quant_ptr,
++                              tran_low_t* qcoeff_ptr,
++                              tran_low_t* dqcoeff_ptr,
++                              const int16_t* dequant_ptr,
++                              uint16_t* eob_ptr,
++                              const int16_t* scan,
++                              const int16_t* iscan);
++#define vp9_highbd_quantize_fp vp9_highbd_quantize_fp_c
++
++void vp9_highbd_quantize_fp_32x32_c(const tran_low_t* coeff_ptr,
++                                    intptr_t n_coeffs,
++                                    int skip_block,
++                                    const int16_t* round_ptr,
++                                    const int16_t* quant_ptr,
++                                    tran_low_t* qcoeff_ptr,
++                                    tran_low_t* dqcoeff_ptr,
++                                    const int16_t* dequant_ptr,
++                                    uint16_t* eob_ptr,
++                                    const int16_t* scan,
++                                    const int16_t* iscan);
++#define vp9_highbd_quantize_fp_32x32 vp9_highbd_quantize_fp_32x32_c
++
++void vp9_highbd_temporal_filter_apply_c(const uint8_t* frame1,
++                                        unsigned int stride,
++                                        const uint8_t* frame2,
++                                        unsigned int block_width,
++                                        unsigned int block_height,
++                                        int strength,
++                                        int* blk_fw,
++                                        int use_32x32,
++                                        uint32_t* accumulator,
++                                        uint16_t* count);
++#define vp9_highbd_temporal_filter_apply vp9_highbd_temporal_filter_apply_c
++
++void vp9_iht16x16_256_add_c(const tran_low_t* input,
++                            uint8_t* dest,
++                            int stride,
++                            int tx_type);
++#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
++
++void vp9_iht4x4_16_add_c(const tran_low_t* input,
++                         uint8_t* dest,
++                         int stride,
++                         int tx_type);
++#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c
++
++void vp9_iht8x8_64_add_c(const tran_low_t* input,
++                         uint8_t* dest,
++                         int stride,
++                         int tx_type);
++#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
++
++void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
++                       intptr_t n_coeffs,
++                       int skip_block,
++                       const int16_t* round_ptr,
++                       const int16_t* quant_ptr,
++                       tran_low_t* qcoeff_ptr,
++                       tran_low_t* dqcoeff_ptr,
++                       const int16_t* dequant_ptr,
++                       uint16_t* eob_ptr,
++                       const int16_t* scan,
++                       const int16_t* iscan);
++#define vp9_quantize_fp vp9_quantize_fp_c
++
++void vp9_quantize_fp_32x32_c(const tran_low_t* coeff_ptr,
++                             intptr_t n_coeffs,
++                             int skip_block,
++                             const int16_t* round_ptr,
++                             const int16_t* quant_ptr,
++                             tran_low_t* qcoeff_ptr,
++                             tran_low_t* dqcoeff_ptr,
++                             const int16_t* dequant_ptr,
++                             uint16_t* eob_ptr,
++                             const int16_t* scan,
++                             const int16_t* iscan);
++#define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c
++
++void vp9_scale_and_extend_frame_c(const struct yv12_buffer_config* src,
++                                  struct yv12_buffer_config* dst,
++                                  INTERP_FILTER filter_type,
++                                  int phase_scaler);
++#define vp9_scale_and_extend_frame vp9_scale_and_extend_frame_c
++
++void vp9_rtcd(void);
++
++#include "vpx_config.h"
++
++#ifdef RTCD_C
++static void setup_rtcd_internal(void) {}
++#endif
++
++#ifdef __cplusplus
++}  // extern "C"
++#endif
++
++#endif
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm
+new file mode 100644
+index 00000000000..00712e52bbb
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm
+@@ -0,0 +1,98 @@
++@ This file was created from a .asm file
++@  using the ads2gas.pl script.
++	.syntax unified
++.equ VPX_ARCH_ARM ,  0
++.equ ARCH_ARM ,  0
++.equ VPX_ARCH_MIPS ,  0
++.equ ARCH_MIPS ,  0
++.equ VPX_ARCH_X86 ,  0
++.equ ARCH_X86 ,  0
++.equ VPX_ARCH_X86_64 ,  0
++.equ ARCH_X86_64 ,  0
++.equ VPX_ARCH_PPC ,  0
++.equ ARCH_PPC ,  0
++.equ HAVE_NEON ,  0
++.equ HAVE_NEON_ASM ,  0
++.equ HAVE_MIPS32 ,  0
++.equ HAVE_DSPR2 ,  0
++.equ HAVE_MSA ,  0
++.equ HAVE_MIPS64 ,  0
++.equ HAVE_MMX ,  0
++.equ HAVE_SSE ,  0
++.equ HAVE_SSE2 ,  0
++.equ HAVE_SSE3 ,  0
++.equ HAVE_SSSE3 ,  0
++.equ HAVE_SSE4_1 ,  0
++.equ HAVE_AVX ,  0
++.equ HAVE_AVX2 ,  0
++.equ HAVE_AVX512 ,  0
++.equ HAVE_VSX ,  0
++.equ HAVE_MMI ,  0
++.equ HAVE_VPX_PORTS ,  1
++.equ HAVE_PTHREAD_H ,  1
++.equ HAVE_UNISTD_H ,  0
++.equ CONFIG_DEPENDENCY_TRACKING ,  1
++.equ CONFIG_EXTERNAL_BUILD ,  1
++.equ CONFIG_INSTALL_DOCS ,  0
++.equ CONFIG_INSTALL_BINS ,  1
++.equ CONFIG_INSTALL_LIBS ,  1
++.equ CONFIG_INSTALL_SRCS ,  0
++.equ CONFIG_DEBUG ,  0
++.equ CONFIG_GPROF ,  0
++.equ CONFIG_GCOV ,  0
++.equ CONFIG_RVCT ,  0
++.equ CONFIG_GCC ,  1
++.equ CONFIG_MSVS ,  0
++.equ CONFIG_PIC ,  0
++.equ CONFIG_BIG_ENDIAN ,  0
++.equ CONFIG_CODEC_SRCS ,  0
++.equ CONFIG_DEBUG_LIBS ,  0
++.equ CONFIG_DEQUANT_TOKENS ,  0
++.equ CONFIG_DC_RECON ,  0
++.equ CONFIG_RUNTIME_CPU_DETECT ,  0
++.equ CONFIG_POSTPROC ,  1
++.equ CONFIG_VP9_POSTPROC ,  1
++.equ CONFIG_MULTITHREAD ,  1
++.equ CONFIG_INTERNAL_STATS ,  0
++.equ CONFIG_VP8_ENCODER ,  1
++.equ CONFIG_VP8_DECODER ,  1
++.equ CONFIG_VP9_ENCODER ,  1
++.equ CONFIG_VP9_DECODER ,  1
++.equ CONFIG_VP8 ,  1
++.equ CONFIG_VP9 ,  1
++.equ CONFIG_ENCODERS ,  1
++.equ CONFIG_DECODERS ,  1
++.equ CONFIG_STATIC_MSVCRT ,  0
++.equ CONFIG_SPATIAL_RESAMPLING ,  1
++.equ CONFIG_REALTIME_ONLY ,  1
++.equ CONFIG_ONTHEFLY_BITPACKING ,  0
++.equ CONFIG_ERROR_CONCEALMENT ,  0
++.equ CONFIG_SHARED ,  0
++.equ CONFIG_STATIC ,  1
++.equ CONFIG_SMALL ,  0
++.equ CONFIG_POSTPROC_VISUALIZER ,  0
++.equ CONFIG_OS_SUPPORT ,  1
++.equ CONFIG_UNIT_TESTS ,  1
++.equ CONFIG_WEBM_IO ,  1
++.equ CONFIG_LIBYUV ,  0
++.equ CONFIG_DECODE_PERF_TESTS ,  0
++.equ CONFIG_ENCODE_PERF_TESTS ,  0
++.equ CONFIG_MULTI_RES_ENCODING ,  1
++.equ CONFIG_TEMPORAL_DENOISING ,  1
++.equ CONFIG_VP9_TEMPORAL_DENOISING ,  1
++.equ CONFIG_CONSISTENT_RECODE ,  0
++.equ CONFIG_COEFFICIENT_RANGE_CHECKING ,  0
++.equ CONFIG_VP9_HIGHBITDEPTH ,  1
++.equ CONFIG_BETTER_HW_COMPATIBILITY ,  0
++.equ CONFIG_EXPERIMENTAL ,  0
++.equ CONFIG_SIZE_LIMIT ,  1
++.equ CONFIG_ALWAYS_ADJUST_BPM ,  0
++.equ CONFIG_BITSTREAM_DEBUG ,  0
++.equ CONFIG_MISMATCH_DEBUG ,  0
++.equ CONFIG_FP_MB_STATS ,  0
++.equ CONFIG_EMULATE_HARDWARE ,  0
++.equ CONFIG_NON_GREEDY_MV ,  0
++.equ CONFIG_RATE_CTRL ,  0
++.equ DECODE_WIDTH_LIMIT ,  16384
++.equ DECODE_HEIGHT_LIMIT ,  16384
++	.section	.note.GNU-stack,"",%progbits
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c
+new file mode 100644
+index 00000000000..8aad25ff174
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c
+@@ -0,0 +1,10 @@
++/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
++/*  */
++/* Use of this source code is governed by a BSD-style license */
++/* that can be found in the LICENSE file in the root of the source */
++/* tree. An additional intellectual property rights grant can be found */
++/* in the file PATENTS.  All contributing project authors may */
++/* be found in the AUTHORS file in the root of the source tree. */
++#include "vpx/vpx_codec.h"
++static const char* const cfg = "--target=generic-gnu --enable-vp9-highbitdepth --enable-external-build --enable-postproc --enable-multi-res-encoding --enable-temporal-denoising --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384 --enable-realtime-only --disable-install-docs --disable-libyuv";
++const char *vpx_codec_build_config(void) {return cfg;}
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h
+new file mode 100644
+index 00000000000..fddb76bd2f9
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h
+@@ -0,0 +1,107 @@
++/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
++/*  */
++/* Use of this source code is governed by a BSD-style license */
++/* that can be found in the LICENSE file in the root of the source */
++/* tree. An additional intellectual property rights grant can be found */
++/* in the file PATENTS.  All contributing project authors may */
++/* be found in the AUTHORS file in the root of the source tree. */
++/* This file automatically generated by configure. Do not edit! */
++#ifndef VPX_CONFIG_H
++#define VPX_CONFIG_H
++#define RESTRICT    
++#define INLINE      inline
++#define VPX_ARCH_ARM 0
++#define ARCH_ARM 0
++#define VPX_ARCH_MIPS 0
++#define ARCH_MIPS 0
++#define VPX_ARCH_X86 0
++#define ARCH_X86 0
++#define VPX_ARCH_X86_64 0
++#define ARCH_X86_64 0
++#define VPX_ARCH_PPC 0
++#define ARCH_PPC 0
++#define HAVE_NEON 0
++#define HAVE_NEON_ASM 0
++#define HAVE_MIPS32 0
++#define HAVE_DSPR2 0
++#define HAVE_MSA 0
++#define HAVE_MIPS64 0
++#define HAVE_MMX 0
++#define HAVE_SSE 0
++#define HAVE_SSE2 0
++#define HAVE_SSE3 0
++#define HAVE_SSSE3 0
++#define HAVE_SSE4_1 0
++#define HAVE_AVX 0
++#define HAVE_AVX2 0
++#define HAVE_AVX512 0
++#define HAVE_VSX 0
++#define HAVE_MMI 0
++#define HAVE_VPX_PORTS 1
++#define HAVE_PTHREAD_H 1
++#define HAVE_UNISTD_H 0
++#define CONFIG_DEPENDENCY_TRACKING 1
++#define CONFIG_EXTERNAL_BUILD 1
++#define CONFIG_INSTALL_DOCS 0
++#define CONFIG_INSTALL_BINS 1
++#define CONFIG_INSTALL_LIBS 1
++#define CONFIG_INSTALL_SRCS 0
++#define CONFIG_DEBUG 0
++#define CONFIG_GPROF 0
++#define CONFIG_GCOV 0
++#define CONFIG_RVCT 0
++#define CONFIG_GCC 1
++#define CONFIG_MSVS 0
++#define CONFIG_PIC 0
++#define CONFIG_BIG_ENDIAN 0
++#define CONFIG_CODEC_SRCS 0
++#define CONFIG_DEBUG_LIBS 0
++#define CONFIG_DEQUANT_TOKENS 0
++#define CONFIG_DC_RECON 0
++#define CONFIG_RUNTIME_CPU_DETECT 0
++#define CONFIG_POSTPROC 1
++#define CONFIG_VP9_POSTPROC 1
++#define CONFIG_MULTITHREAD 1
++#define CONFIG_INTERNAL_STATS 0
++#define CONFIG_VP8_ENCODER 1
++#define CONFIG_VP8_DECODER 1
++#define CONFIG_VP9_ENCODER 1
++#define CONFIG_VP9_DECODER 1
++#define CONFIG_VP8 1
++#define CONFIG_VP9 1
++#define CONFIG_ENCODERS 1
++#define CONFIG_DECODERS 1
++#define CONFIG_STATIC_MSVCRT 0
++#define CONFIG_SPATIAL_RESAMPLING 1
++#define CONFIG_REALTIME_ONLY 1
++#define CONFIG_ONTHEFLY_BITPACKING 0
++#define CONFIG_ERROR_CONCEALMENT 0
++#define CONFIG_SHARED 0
++#define CONFIG_STATIC 1
++#define CONFIG_SMALL 0
++#define CONFIG_POSTPROC_VISUALIZER 0
++#define CONFIG_OS_SUPPORT 1
++#define CONFIG_UNIT_TESTS 1
++#define CONFIG_WEBM_IO 1
++#define CONFIG_LIBYUV 0
++#define CONFIG_DECODE_PERF_TESTS 0
++#define CONFIG_ENCODE_PERF_TESTS 0
++#define CONFIG_MULTI_RES_ENCODING 1
++#define CONFIG_TEMPORAL_DENOISING 1
++#define CONFIG_VP9_TEMPORAL_DENOISING 1
++#define CONFIG_CONSISTENT_RECODE 0
++#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
++#define CONFIG_VP9_HIGHBITDEPTH 1
++#define CONFIG_BETTER_HW_COMPATIBILITY 0
++#define CONFIG_EXPERIMENTAL 0
++#define CONFIG_SIZE_LIMIT 1
++#define CONFIG_ALWAYS_ADJUST_BPM 0
++#define CONFIG_BITSTREAM_DEBUG 0
++#define CONFIG_MISMATCH_DEBUG 0
++#define CONFIG_FP_MB_STATS 0
++#define CONFIG_EMULATE_HARDWARE 0
++#define CONFIG_NON_GREEDY_MV 0
++#define CONFIG_RATE_CTRL 0
++#define DECODE_WIDTH_LIMIT 16384
++#define DECODE_HEIGHT_LIMIT 16384
++#endif /* VPX_CONFIG_H */
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h
+new file mode 100644
+index 00000000000..8ba4d88055d
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h
+@@ -0,0 +1,3868 @@
++// This file is generated. Do not edit.
++#ifndef VPX_DSP_RTCD_H_
++#define VPX_DSP_RTCD_H_
++
++#ifdef RTCD_C
++#define RTCD_EXTERN
++#else
++#define RTCD_EXTERN extern
++#endif
++
++/*
++ * DSP
++ */
++
++#include "vpx/vpx_integer.h"
++#include "vpx_dsp/vpx_dsp_common.h"
++#include "vpx_dsp/vpx_filter.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++unsigned int vpx_avg_4x4_c(const uint8_t*, int p);
++#define vpx_avg_4x4 vpx_avg_4x4_c
++
++unsigned int vpx_avg_8x8_c(const uint8_t*, int p);
++#define vpx_avg_8x8 vpx_avg_8x8_c
++
++void vpx_comp_avg_pred_c(uint8_t* comp_pred,
++                         const uint8_t* pred,
++                         int width,
++                         int height,
++                         const uint8_t* ref,
++                         int ref_stride);
++#define vpx_comp_avg_pred vpx_comp_avg_pred_c
++
++void vpx_convolve8_c(const uint8_t* src,
++                     ptrdiff_t src_stride,
++                     uint8_t* dst,
++                     ptrdiff_t dst_stride,
++                     const InterpKernel* filter,
++                     int x0_q4,
++                     int x_step_q4,
++                     int y0_q4,
++                     int y_step_q4,
++                     int w,
++                     int h);
++#define vpx_convolve8 vpx_convolve8_c
++
++void vpx_convolve8_avg_c(const uint8_t* src,
++                         ptrdiff_t src_stride,
++                         uint8_t* dst,
++                         ptrdiff_t dst_stride,
++                         const InterpKernel* filter,
++                         int x0_q4,
++                         int x_step_q4,
++                         int y0_q4,
++                         int y_step_q4,
++                         int w,
++                         int h);
++#define vpx_convolve8_avg vpx_convolve8_avg_c
++
++void vpx_convolve8_avg_horiz_c(const uint8_t* src,
++                               ptrdiff_t src_stride,
++                               uint8_t* dst,
++                               ptrdiff_t dst_stride,
++                               const InterpKernel* filter,
++                               int x0_q4,
++                               int x_step_q4,
++                               int y0_q4,
++                               int y_step_q4,
++                               int w,
++                               int h);
++#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c
++
++void vpx_convolve8_avg_vert_c(const uint8_t* src,
++                              ptrdiff_t src_stride,
++                              uint8_t* dst,
++                              ptrdiff_t dst_stride,
++                              const InterpKernel* filter,
++                              int x0_q4,
++                              int x_step_q4,
++                              int y0_q4,
++                              int y_step_q4,
++                              int w,
++                              int h);
++#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c
++
++void vpx_convolve8_horiz_c(const uint8_t* src,
++                           ptrdiff_t src_stride,
++                           uint8_t* dst,
++                           ptrdiff_t dst_stride,
++                           const InterpKernel* filter,
++                           int x0_q4,
++                           int x_step_q4,
++                           int y0_q4,
++                           int y_step_q4,
++                           int w,
++                           int h);
++#define vpx_convolve8_horiz vpx_convolve8_horiz_c
++
++void vpx_convolve8_vert_c(const uint8_t* src,
++                          ptrdiff_t src_stride,
++                          uint8_t* dst,
++                          ptrdiff_t dst_stride,
++                          const InterpKernel* filter,
++                          int x0_q4,
++                          int x_step_q4,
++                          int y0_q4,
++                          int y_step_q4,
++                          int w,
++                          int h);
++#define vpx_convolve8_vert vpx_convolve8_vert_c
++
++void vpx_convolve_avg_c(const uint8_t* src,
++                        ptrdiff_t src_stride,
++                        uint8_t* dst,
++                        ptrdiff_t dst_stride,
++                        const InterpKernel* filter,
++                        int x0_q4,
++                        int x_step_q4,
++                        int y0_q4,
++                        int y_step_q4,
++                        int w,
++                        int h);
++#define vpx_convolve_avg vpx_convolve_avg_c
++
++void vpx_convolve_copy_c(const uint8_t* src,
++                         ptrdiff_t src_stride,
++                         uint8_t* dst,
++                         ptrdiff_t dst_stride,
++                         const InterpKernel* filter,
++                         int x0_q4,
++                         int x_step_q4,
++                         int y0_q4,
++                         int y_step_q4,
++                         int w,
++                         int h);
++#define vpx_convolve_copy vpx_convolve_copy_c
++
++void vpx_d117_predictor_16x16_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
++
++void vpx_d117_predictor_32x32_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
++
++void vpx_d117_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
++
++void vpx_d117_predictor_8x8_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
++
++void vpx_d135_predictor_16x16_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
++
++void vpx_d135_predictor_32x32_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
++
++void vpx_d135_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
++
++void vpx_d135_predictor_8x8_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
++
++void vpx_d153_predictor_16x16_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
++
++void vpx_d153_predictor_32x32_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
++
++void vpx_d153_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
++
++void vpx_d153_predictor_8x8_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
++
++void vpx_d207_predictor_16x16_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
++
++void vpx_d207_predictor_32x32_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
++
++void vpx_d207_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
++
++void vpx_d207_predictor_8x8_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
++
++void vpx_d45_predictor_16x16_c(uint8_t* dst,
++                               ptrdiff_t stride,
++                               const uint8_t* above,
++                               const uint8_t* left);
++#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
++
++void vpx_d45_predictor_32x32_c(uint8_t* dst,
++                               ptrdiff_t stride,
++                               const uint8_t* above,
++                               const uint8_t* left);
++#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
++
++void vpx_d45_predictor_4x4_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c
++
++void vpx_d45_predictor_8x8_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
++
++void vpx_d45e_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
++
++void vpx_d63_predictor_16x16_c(uint8_t* dst,
++                               ptrdiff_t stride,
++                               const uint8_t* above,
++                               const uint8_t* left);
++#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
++
++void vpx_d63_predictor_32x32_c(uint8_t* dst,
++                               ptrdiff_t stride,
++                               const uint8_t* above,
++                               const uint8_t* left);
++#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
++
++void vpx_d63_predictor_4x4_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
++
++void vpx_d63_predictor_8x8_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
++
++void vpx_d63e_predictor_4x4_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
++
++void vpx_dc_128_predictor_16x16_c(uint8_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint8_t* above,
++                                  const uint8_t* left);
++#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
++
++void vpx_dc_128_predictor_32x32_c(uint8_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint8_t* above,
++                                  const uint8_t* left);
++#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c
++
++void vpx_dc_128_predictor_4x4_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c
++
++void vpx_dc_128_predictor_8x8_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c
++
++void vpx_dc_left_predictor_16x16_c(uint8_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint8_t* above,
++                                   const uint8_t* left);
++#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c
++
++void vpx_dc_left_predictor_32x32_c(uint8_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint8_t* above,
++                                   const uint8_t* left);
++#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c
++
++void vpx_dc_left_predictor_4x4_c(uint8_t* dst,
++                                 ptrdiff_t stride,
++                                 const uint8_t* above,
++                                 const uint8_t* left);
++#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c
++
++void vpx_dc_left_predictor_8x8_c(uint8_t* dst,
++                                 ptrdiff_t stride,
++                                 const uint8_t* above,
++                                 const uint8_t* left);
++#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c
++
++void vpx_dc_predictor_16x16_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c
++
++void vpx_dc_predictor_32x32_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c
++
++void vpx_dc_predictor_4x4_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c
++
++void vpx_dc_predictor_8x8_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c
++
++void vpx_dc_top_predictor_16x16_c(uint8_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint8_t* above,
++                                  const uint8_t* left);
++#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c
++
++void vpx_dc_top_predictor_32x32_c(uint8_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint8_t* above,
++                                  const uint8_t* left);
++#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c
++
++void vpx_dc_top_predictor_4x4_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c
++
++void vpx_dc_top_predictor_8x8_c(uint8_t* dst,
++                                ptrdiff_t stride,
++                                const uint8_t* above,
++                                const uint8_t* left);
++#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c
++
++void vpx_fdct16x16_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct16x16 vpx_fdct16x16_c
++
++void vpx_fdct16x16_1_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct16x16_1 vpx_fdct16x16_1_c
++
++void vpx_fdct32x32_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct32x32 vpx_fdct32x32_c
++
++void vpx_fdct32x32_1_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct32x32_1 vpx_fdct32x32_1_c
++
++void vpx_fdct32x32_rd_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct32x32_rd vpx_fdct32x32_rd_c
++
++void vpx_fdct4x4_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct4x4 vpx_fdct4x4_c
++
++void vpx_fdct4x4_1_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct4x4_1 vpx_fdct4x4_1_c
++
++void vpx_fdct8x8_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct8x8 vpx_fdct8x8_c
++
++void vpx_fdct8x8_1_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_fdct8x8_1 vpx_fdct8x8_1_c
++
++void vpx_get16x16var_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* ref_ptr,
++                       int ref_stride,
++                       unsigned int* sse,
++                       int* sum);
++#define vpx_get16x16var vpx_get16x16var_c
++
++unsigned int vpx_get4x4sse_cs_c(const unsigned char* src_ptr,
++                                int src_stride,
++                                const unsigned char* ref_ptr,
++                                int ref_stride);
++#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
++
++void vpx_get8x8var_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* ref_ptr,
++                     int ref_stride,
++                     unsigned int* sse,
++                     int* sum);
++#define vpx_get8x8var vpx_get8x8var_c
++
++unsigned int vpx_get_mb_ss_c(const int16_t*);
++#define vpx_get_mb_ss vpx_get_mb_ss_c
++
++void vpx_h_predictor_16x16_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c
++
++void vpx_h_predictor_32x32_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c
++
++void vpx_h_predictor_4x4_c(uint8_t* dst,
++                           ptrdiff_t stride,
++                           const uint8_t* above,
++                           const uint8_t* left);
++#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c
++
++void vpx_h_predictor_8x8_c(uint8_t* dst,
++                           ptrdiff_t stride,
++                           const uint8_t* above,
++                           const uint8_t* left);
++#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
++
++void vpx_hadamard_16x16_c(const int16_t* src_diff,
++                          ptrdiff_t src_stride,
++                          tran_low_t* coeff);
++#define vpx_hadamard_16x16 vpx_hadamard_16x16_c
++
++void vpx_hadamard_32x32_c(const int16_t* src_diff,
++                          ptrdiff_t src_stride,
++                          tran_low_t* coeff);
++#define vpx_hadamard_32x32 vpx_hadamard_32x32_c
++
++void vpx_hadamard_8x8_c(const int16_t* src_diff,
++                        ptrdiff_t src_stride,
++                        tran_low_t* coeff);
++#define vpx_hadamard_8x8 vpx_hadamard_8x8_c
++
++void vpx_he_predictor_4x4_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
++
++void vpx_highbd_10_get16x16var_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse,
++                                 int* sum);
++#define vpx_highbd_10_get16x16var vpx_highbd_10_get16x16var_c
++
++void vpx_highbd_10_get8x8var_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse,
++                               int* sum);
++#define vpx_highbd_10_get8x8var vpx_highbd_10_get8x8var_c
++
++unsigned int vpx_highbd_10_mse16x16_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      unsigned int* sse);
++#define vpx_highbd_10_mse16x16 vpx_highbd_10_mse16x16_c
++
++unsigned int vpx_highbd_10_mse16x8_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     unsigned int* sse);
++#define vpx_highbd_10_mse16x8 vpx_highbd_10_mse16x8_c
++
++unsigned int vpx_highbd_10_mse8x16_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     unsigned int* sse);
++#define vpx_highbd_10_mse8x16 vpx_highbd_10_mse8x16_c
++
++unsigned int vpx_highbd_10_mse8x8_c(const uint8_t* src_ptr,
++                                    int src_stride,
++                                    const uint8_t* ref_ptr,
++                                    int ref_stride,
++                                    unsigned int* sse);
++#define vpx_highbd_10_mse8x8 vpx_highbd_10_mse8x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance16x16 \
++  vpx_highbd_10_sub_pixel_avg_variance16x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance16x32 \
++  vpx_highbd_10_sub_pixel_avg_variance16x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance16x8 \
++  vpx_highbd_10_sub_pixel_avg_variance16x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance32x16 \
++  vpx_highbd_10_sub_pixel_avg_variance32x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance32x32 \
++  vpx_highbd_10_sub_pixel_avg_variance32x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance32x64 \
++  vpx_highbd_10_sub_pixel_avg_variance32x64_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance4x4 \
++  vpx_highbd_10_sub_pixel_avg_variance4x4_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance4x8 \
++  vpx_highbd_10_sub_pixel_avg_variance4x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance64x32 \
++  vpx_highbd_10_sub_pixel_avg_variance64x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance64x64 \
++  vpx_highbd_10_sub_pixel_avg_variance64x64_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance8x16 \
++  vpx_highbd_10_sub_pixel_avg_variance8x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance8x4 \
++  vpx_highbd_10_sub_pixel_avg_variance8x4_c
++
++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_10_sub_pixel_avg_variance8x8 \
++  vpx_highbd_10_sub_pixel_avg_variance8x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance16x16_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance16x16 \
++  vpx_highbd_10_sub_pixel_variance16x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance16x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance16x32 \
++  vpx_highbd_10_sub_pixel_variance16x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance16x8_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance16x8 \
++  vpx_highbd_10_sub_pixel_variance16x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance32x16_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance32x16 \
++  vpx_highbd_10_sub_pixel_variance32x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance32x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance32x32 \
++  vpx_highbd_10_sub_pixel_variance32x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance32x64_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance32x64 \
++  vpx_highbd_10_sub_pixel_variance32x64_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance4x4_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance4x4 \
++  vpx_highbd_10_sub_pixel_variance4x4_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance4x8_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance4x8 \
++  vpx_highbd_10_sub_pixel_variance4x8_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance64x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance64x32 \
++  vpx_highbd_10_sub_pixel_variance64x32_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance64x64_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance64x64 \
++  vpx_highbd_10_sub_pixel_variance64x64_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance8x16_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance8x16 \
++  vpx_highbd_10_sub_pixel_variance8x16_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance8x4_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance8x4 \
++  vpx_highbd_10_sub_pixel_variance8x4_c
++
++uint32_t vpx_highbd_10_sub_pixel_variance8x8_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_10_sub_pixel_variance8x8 \
++  vpx_highbd_10_sub_pixel_variance8x8_c
++
++unsigned int vpx_highbd_10_variance16x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance16x16 vpx_highbd_10_variance16x16_c
++
++unsigned int vpx_highbd_10_variance16x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance16x32 vpx_highbd_10_variance16x32_c
++
++unsigned int vpx_highbd_10_variance16x8_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_10_variance16x8 vpx_highbd_10_variance16x8_c
++
++unsigned int vpx_highbd_10_variance32x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance32x16 vpx_highbd_10_variance32x16_c
++
++unsigned int vpx_highbd_10_variance32x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance32x32 vpx_highbd_10_variance32x32_c
++
++unsigned int vpx_highbd_10_variance32x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance32x64 vpx_highbd_10_variance32x64_c
++
++unsigned int vpx_highbd_10_variance4x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_10_variance4x4 vpx_highbd_10_variance4x4_c
++
++unsigned int vpx_highbd_10_variance4x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_10_variance4x8 vpx_highbd_10_variance4x8_c
++
++unsigned int vpx_highbd_10_variance64x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance64x32 vpx_highbd_10_variance64x32_c
++
++unsigned int vpx_highbd_10_variance64x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_10_variance64x64 vpx_highbd_10_variance64x64_c
++
++unsigned int vpx_highbd_10_variance8x16_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_10_variance8x16 vpx_highbd_10_variance8x16_c
++
++unsigned int vpx_highbd_10_variance8x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_10_variance8x4 vpx_highbd_10_variance8x4_c
++
++unsigned int vpx_highbd_10_variance8x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_10_variance8x8 vpx_highbd_10_variance8x8_c
++
++void vpx_highbd_12_get16x16var_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse,
++                                 int* sum);
++#define vpx_highbd_12_get16x16var vpx_highbd_12_get16x16var_c
++
++void vpx_highbd_12_get8x8var_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse,
++                               int* sum);
++#define vpx_highbd_12_get8x8var vpx_highbd_12_get8x8var_c
++
++unsigned int vpx_highbd_12_mse16x16_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      unsigned int* sse);
++#define vpx_highbd_12_mse16x16 vpx_highbd_12_mse16x16_c
++
++unsigned int vpx_highbd_12_mse16x8_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     unsigned int* sse);
++#define vpx_highbd_12_mse16x8 vpx_highbd_12_mse16x8_c
++
++unsigned int vpx_highbd_12_mse8x16_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     unsigned int* sse);
++#define vpx_highbd_12_mse8x16 vpx_highbd_12_mse8x16_c
++
++unsigned int vpx_highbd_12_mse8x8_c(const uint8_t* src_ptr,
++                                    int src_stride,
++                                    const uint8_t* ref_ptr,
++                                    int ref_stride,
++                                    unsigned int* sse);
++#define vpx_highbd_12_mse8x8 vpx_highbd_12_mse8x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance16x16 \
++  vpx_highbd_12_sub_pixel_avg_variance16x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance16x32 \
++  vpx_highbd_12_sub_pixel_avg_variance16x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance16x8 \
++  vpx_highbd_12_sub_pixel_avg_variance16x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance32x16 \
++  vpx_highbd_12_sub_pixel_avg_variance32x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance32x32 \
++  vpx_highbd_12_sub_pixel_avg_variance32x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance32x64 \
++  vpx_highbd_12_sub_pixel_avg_variance32x64_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance4x4 \
++  vpx_highbd_12_sub_pixel_avg_variance4x4_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance4x8 \
++  vpx_highbd_12_sub_pixel_avg_variance4x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance64x32 \
++  vpx_highbd_12_sub_pixel_avg_variance64x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64_c(
++    const uint8_t* src_ptr,
++    int src_stride,
++    int x_offset,
++    int y_offset,
++    const uint8_t* ref_ptr,
++    int ref_stride,
++    uint32_t* sse,
++    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance64x64 \
++  vpx_highbd_12_sub_pixel_avg_variance64x64_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance8x16 \
++  vpx_highbd_12_sub_pixel_avg_variance8x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance8x4 \
++  vpx_highbd_12_sub_pixel_avg_variance8x4_c
++
++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_12_sub_pixel_avg_variance8x8 \
++  vpx_highbd_12_sub_pixel_avg_variance8x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance16x16_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance16x16 \
++  vpx_highbd_12_sub_pixel_variance16x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance16x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance16x32 \
++  vpx_highbd_12_sub_pixel_variance16x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance16x8_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance16x8 \
++  vpx_highbd_12_sub_pixel_variance16x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance32x16_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance32x16 \
++  vpx_highbd_12_sub_pixel_variance32x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance32x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance32x32 \
++  vpx_highbd_12_sub_pixel_variance32x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance32x64_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance32x64 \
++  vpx_highbd_12_sub_pixel_variance32x64_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance4x4_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance4x4 \
++  vpx_highbd_12_sub_pixel_variance4x4_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance4x8_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance4x8 \
++  vpx_highbd_12_sub_pixel_variance4x8_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance64x32_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance64x32 \
++  vpx_highbd_12_sub_pixel_variance64x32_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance64x64_c(const uint8_t* src_ptr,
++                                                 int src_stride,
++                                                 int x_offset,
++                                                 int y_offset,
++                                                 const uint8_t* ref_ptr,
++                                                 int ref_stride,
++                                                 uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance64x64 \
++  vpx_highbd_12_sub_pixel_variance64x64_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance8x16_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance8x16 \
++  vpx_highbd_12_sub_pixel_variance8x16_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance8x4_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance8x4 \
++  vpx_highbd_12_sub_pixel_variance8x4_c
++
++uint32_t vpx_highbd_12_sub_pixel_variance8x8_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_12_sub_pixel_variance8x8 \
++  vpx_highbd_12_sub_pixel_variance8x8_c
++
++unsigned int vpx_highbd_12_variance16x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance16x16 vpx_highbd_12_variance16x16_c
++
++unsigned int vpx_highbd_12_variance16x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance16x32 vpx_highbd_12_variance16x32_c
++
++unsigned int vpx_highbd_12_variance16x8_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_12_variance16x8 vpx_highbd_12_variance16x8_c
++
++unsigned int vpx_highbd_12_variance32x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance32x16 vpx_highbd_12_variance32x16_c
++
++unsigned int vpx_highbd_12_variance32x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance32x32 vpx_highbd_12_variance32x32_c
++
++unsigned int vpx_highbd_12_variance32x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance32x64 vpx_highbd_12_variance32x64_c
++
++unsigned int vpx_highbd_12_variance4x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_12_variance4x4 vpx_highbd_12_variance4x4_c
++
++unsigned int vpx_highbd_12_variance4x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_12_variance4x8 vpx_highbd_12_variance4x8_c
++
++unsigned int vpx_highbd_12_variance64x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance64x32 vpx_highbd_12_variance64x32_c
++
++unsigned int vpx_highbd_12_variance64x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           unsigned int* sse);
++#define vpx_highbd_12_variance64x64 vpx_highbd_12_variance64x64_c
++
++unsigned int vpx_highbd_12_variance8x16_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_12_variance8x16 vpx_highbd_12_variance8x16_c
++
++unsigned int vpx_highbd_12_variance8x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_12_variance8x4 vpx_highbd_12_variance8x4_c
++
++unsigned int vpx_highbd_12_variance8x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_12_variance8x8 vpx_highbd_12_variance8x8_c
++
++void vpx_highbd_8_get16x16var_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                unsigned int* sse,
++                                int* sum);
++#define vpx_highbd_8_get16x16var vpx_highbd_8_get16x16var_c
++
++void vpx_highbd_8_get8x8var_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* ref_ptr,
++                              int ref_stride,
++                              unsigned int* sse,
++                              int* sum);
++#define vpx_highbd_8_get8x8var vpx_highbd_8_get8x8var_c
++
++unsigned int vpx_highbd_8_mse16x16_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     unsigned int* sse);
++#define vpx_highbd_8_mse16x16 vpx_highbd_8_mse16x16_c
++
++unsigned int vpx_highbd_8_mse16x8_c(const uint8_t* src_ptr,
++                                    int src_stride,
++                                    const uint8_t* ref_ptr,
++                                    int ref_stride,
++                                    unsigned int* sse);
++#define vpx_highbd_8_mse16x8 vpx_highbd_8_mse16x8_c
++
++unsigned int vpx_highbd_8_mse8x16_c(const uint8_t* src_ptr,
++                                    int src_stride,
++                                    const uint8_t* ref_ptr,
++                                    int ref_stride,
++                                    unsigned int* sse);
++#define vpx_highbd_8_mse8x16 vpx_highbd_8_mse8x16_c
++
++unsigned int vpx_highbd_8_mse8x8_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride,
++                                   unsigned int* sse);
++#define vpx_highbd_8_mse8x8 vpx_highbd_8_mse8x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance16x16 \
++  vpx_highbd_8_sub_pixel_avg_variance16x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance16x32 \
++  vpx_highbd_8_sub_pixel_avg_variance16x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance16x8 \
++  vpx_highbd_8_sub_pixel_avg_variance16x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance32x16 \
++  vpx_highbd_8_sub_pixel_avg_variance32x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance32x32 \
++  vpx_highbd_8_sub_pixel_avg_variance32x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance32x64 \
++  vpx_highbd_8_sub_pixel_avg_variance32x64_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr,
++                                                  int src_stride,
++                                                  int x_offset,
++                                                  int y_offset,
++                                                  const uint8_t* ref_ptr,
++                                                  int ref_stride,
++                                                  uint32_t* sse,
++                                                  const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance4x4 \
++  vpx_highbd_8_sub_pixel_avg_variance4x4_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr,
++                                                  int src_stride,
++                                                  int x_offset,
++                                                  int y_offset,
++                                                  const uint8_t* ref_ptr,
++                                                  int ref_stride,
++                                                  uint32_t* sse,
++                                                  const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance4x8 \
++  vpx_highbd_8_sub_pixel_avg_variance4x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance64x32 \
++  vpx_highbd_8_sub_pixel_avg_variance64x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64_c(const uint8_t* src_ptr,
++                                                    int src_stride,
++                                                    int x_offset,
++                                                    int y_offset,
++                                                    const uint8_t* ref_ptr,
++                                                    int ref_stride,
++                                                    uint32_t* sse,
++                                                    const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance64x64 \
++  vpx_highbd_8_sub_pixel_avg_variance64x64_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr,
++                                                   int src_stride,
++                                                   int x_offset,
++                                                   int y_offset,
++                                                   const uint8_t* ref_ptr,
++                                                   int ref_stride,
++                                                   uint32_t* sse,
++                                                   const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance8x16 \
++  vpx_highbd_8_sub_pixel_avg_variance8x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr,
++                                                  int src_stride,
++                                                  int x_offset,
++                                                  int y_offset,
++                                                  const uint8_t* ref_ptr,
++                                                  int ref_stride,
++                                                  uint32_t* sse,
++                                                  const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance8x4 \
++  vpx_highbd_8_sub_pixel_avg_variance8x4_c
++
++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr,
++                                                  int src_stride,
++                                                  int x_offset,
++                                                  int y_offset,
++                                                  const uint8_t* ref_ptr,
++                                                  int ref_stride,
++                                                  uint32_t* sse,
++                                                  const uint8_t* second_pred);
++#define vpx_highbd_8_sub_pixel_avg_variance8x8 \
++  vpx_highbd_8_sub_pixel_avg_variance8x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance16x16_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance16x16 \
++  vpx_highbd_8_sub_pixel_variance16x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance16x32_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance16x32 \
++  vpx_highbd_8_sub_pixel_variance16x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance16x8_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance16x8 \
++  vpx_highbd_8_sub_pixel_variance16x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance32x16_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance32x16 \
++  vpx_highbd_8_sub_pixel_variance32x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance32x32_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance32x32 \
++  vpx_highbd_8_sub_pixel_variance32x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance32x64_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance32x64 \
++  vpx_highbd_8_sub_pixel_variance32x64_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance4x4_c(const uint8_t* src_ptr,
++                                              int src_stride,
++                                              int x_offset,
++                                              int y_offset,
++                                              const uint8_t* ref_ptr,
++                                              int ref_stride,
++                                              uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance4x4 vpx_highbd_8_sub_pixel_variance4x4_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance4x8_c(const uint8_t* src_ptr,
++                                              int src_stride,
++                                              int x_offset,
++                                              int y_offset,
++                                              const uint8_t* ref_ptr,
++                                              int ref_stride,
++                                              uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance4x8 vpx_highbd_8_sub_pixel_variance4x8_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance64x32_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance64x32 \
++  vpx_highbd_8_sub_pixel_variance64x32_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance64x64_c(const uint8_t* src_ptr,
++                                                int src_stride,
++                                                int x_offset,
++                                                int y_offset,
++                                                const uint8_t* ref_ptr,
++                                                int ref_stride,
++                                                uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance64x64 \
++  vpx_highbd_8_sub_pixel_variance64x64_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance8x16_c(const uint8_t* src_ptr,
++                                               int src_stride,
++                                               int x_offset,
++                                               int y_offset,
++                                               const uint8_t* ref_ptr,
++                                               int ref_stride,
++                                               uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance8x16 \
++  vpx_highbd_8_sub_pixel_variance8x16_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance8x4_c(const uint8_t* src_ptr,
++                                              int src_stride,
++                                              int x_offset,
++                                              int y_offset,
++                                              const uint8_t* ref_ptr,
++                                              int ref_stride,
++                                              uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance8x4 vpx_highbd_8_sub_pixel_variance8x4_c
++
++uint32_t vpx_highbd_8_sub_pixel_variance8x8_c(const uint8_t* src_ptr,
++                                              int src_stride,
++                                              int x_offset,
++                                              int y_offset,
++                                              const uint8_t* ref_ptr,
++                                              int ref_stride,
++                                              uint32_t* sse);
++#define vpx_highbd_8_sub_pixel_variance8x8 vpx_highbd_8_sub_pixel_variance8x8_c
++
++unsigned int vpx_highbd_8_variance16x16_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance16x16 vpx_highbd_8_variance16x16_c
++
++unsigned int vpx_highbd_8_variance16x32_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance16x32 vpx_highbd_8_variance16x32_c
++
++unsigned int vpx_highbd_8_variance16x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_8_variance16x8 vpx_highbd_8_variance16x8_c
++
++unsigned int vpx_highbd_8_variance32x16_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance32x16 vpx_highbd_8_variance32x16_c
++
++unsigned int vpx_highbd_8_variance32x32_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance32x32 vpx_highbd_8_variance32x32_c
++
++unsigned int vpx_highbd_8_variance32x64_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance32x64 vpx_highbd_8_variance32x64_c
++
++unsigned int vpx_highbd_8_variance4x4_c(const uint8_t* src_ptr,
++                                        int src_stride,
++                                        const uint8_t* ref_ptr,
++                                        int ref_stride,
++                                        unsigned int* sse);
++#define vpx_highbd_8_variance4x4 vpx_highbd_8_variance4x4_c
++
++unsigned int vpx_highbd_8_variance4x8_c(const uint8_t* src_ptr,
++                                        int src_stride,
++                                        const uint8_t* ref_ptr,
++                                        int ref_stride,
++                                        unsigned int* sse);
++#define vpx_highbd_8_variance4x8 vpx_highbd_8_variance4x8_c
++
++unsigned int vpx_highbd_8_variance64x32_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance64x32 vpx_highbd_8_variance64x32_c
++
++unsigned int vpx_highbd_8_variance64x64_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          unsigned int* sse);
++#define vpx_highbd_8_variance64x64 vpx_highbd_8_variance64x64_c
++
++unsigned int vpx_highbd_8_variance8x16_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         unsigned int* sse);
++#define vpx_highbd_8_variance8x16 vpx_highbd_8_variance8x16_c
++
++unsigned int vpx_highbd_8_variance8x4_c(const uint8_t* src_ptr,
++                                        int src_stride,
++                                        const uint8_t* ref_ptr,
++                                        int ref_stride,
++                                        unsigned int* sse);
++#define vpx_highbd_8_variance8x4 vpx_highbd_8_variance8x4_c
++
++unsigned int vpx_highbd_8_variance8x8_c(const uint8_t* src_ptr,
++                                        int src_stride,
++                                        const uint8_t* ref_ptr,
++                                        int ref_stride,
++                                        unsigned int* sse);
++#define vpx_highbd_8_variance8x8 vpx_highbd_8_variance8x8_c
++
++unsigned int vpx_highbd_avg_4x4_c(const uint8_t* s8, int p);
++#define vpx_highbd_avg_4x4 vpx_highbd_avg_4x4_c
++
++unsigned int vpx_highbd_avg_8x8_c(const uint8_t* s8, int p);
++#define vpx_highbd_avg_8x8 vpx_highbd_avg_8x8_c
++
++void vpx_highbd_comp_avg_pred_c(uint16_t* comp_pred,
++                                const uint16_t* pred,
++                                int width,
++                                int height,
++                                const uint16_t* ref,
++                                int ref_stride);
++#define vpx_highbd_comp_avg_pred vpx_highbd_comp_avg_pred_c
++
++void vpx_highbd_convolve8_c(const uint16_t* src,
++                            ptrdiff_t src_stride,
++                            uint16_t* dst,
++                            ptrdiff_t dst_stride,
++                            const InterpKernel* filter,
++                            int x0_q4,
++                            int x_step_q4,
++                            int y0_q4,
++                            int y_step_q4,
++                            int w,
++                            int h,
++                            int bd);
++#define vpx_highbd_convolve8 vpx_highbd_convolve8_c
++
++void vpx_highbd_convolve8_avg_c(const uint16_t* src,
++                                ptrdiff_t src_stride,
++                                uint16_t* dst,
++                                ptrdiff_t dst_stride,
++                                const InterpKernel* filter,
++                                int x0_q4,
++                                int x_step_q4,
++                                int y0_q4,
++                                int y_step_q4,
++                                int w,
++                                int h,
++                                int bd);
++#define vpx_highbd_convolve8_avg vpx_highbd_convolve8_avg_c
++
++void vpx_highbd_convolve8_avg_horiz_c(const uint16_t* src,
++                                      ptrdiff_t src_stride,
++                                      uint16_t* dst,
++                                      ptrdiff_t dst_stride,
++                                      const InterpKernel* filter,
++                                      int x0_q4,
++                                      int x_step_q4,
++                                      int y0_q4,
++                                      int y_step_q4,
++                                      int w,
++                                      int h,
++                                      int bd);
++#define vpx_highbd_convolve8_avg_horiz vpx_highbd_convolve8_avg_horiz_c
++
++void vpx_highbd_convolve8_avg_vert_c(const uint16_t* src,
++                                     ptrdiff_t src_stride,
++                                     uint16_t* dst,
++                                     ptrdiff_t dst_stride,
++                                     const InterpKernel* filter,
++                                     int x0_q4,
++                                     int x_step_q4,
++                                     int y0_q4,
++                                     int y_step_q4,
++                                     int w,
++                                     int h,
++                                     int bd);
++#define vpx_highbd_convolve8_avg_vert vpx_highbd_convolve8_avg_vert_c
++
++void vpx_highbd_convolve8_horiz_c(const uint16_t* src,
++                                  ptrdiff_t src_stride,
++                                  uint16_t* dst,
++                                  ptrdiff_t dst_stride,
++                                  const InterpKernel* filter,
++                                  int x0_q4,
++                                  int x_step_q4,
++                                  int y0_q4,
++                                  int y_step_q4,
++                                  int w,
++                                  int h,
++                                  int bd);
++#define vpx_highbd_convolve8_horiz vpx_highbd_convolve8_horiz_c
++
++void vpx_highbd_convolve8_vert_c(const uint16_t* src,
++                                 ptrdiff_t src_stride,
++                                 uint16_t* dst,
++                                 ptrdiff_t dst_stride,
++                                 const InterpKernel* filter,
++                                 int x0_q4,
++                                 int x_step_q4,
++                                 int y0_q4,
++                                 int y_step_q4,
++                                 int w,
++                                 int h,
++                                 int bd);
++#define vpx_highbd_convolve8_vert vpx_highbd_convolve8_vert_c
++
++void vpx_highbd_convolve_avg_c(const uint16_t* src,
++                               ptrdiff_t src_stride,
++                               uint16_t* dst,
++                               ptrdiff_t dst_stride,
++                               const InterpKernel* filter,
++                               int x0_q4,
++                               int x_step_q4,
++                               int y0_q4,
++                               int y_step_q4,
++                               int w,
++                               int h,
++                               int bd);
++#define vpx_highbd_convolve_avg vpx_highbd_convolve_avg_c
++
++void vpx_highbd_convolve_copy_c(const uint16_t* src,
++                                ptrdiff_t src_stride,
++                                uint16_t* dst,
++                                ptrdiff_t dst_stride,
++                                const InterpKernel* filter,
++                                int x0_q4,
++                                int x_step_q4,
++                                int y0_q4,
++                                int y_step_q4,
++                                int w,
++                                int h,
++                                int bd);
++#define vpx_highbd_convolve_copy vpx_highbd_convolve_copy_c
++
++void vpx_highbd_d117_predictor_16x16_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d117_predictor_16x16 vpx_highbd_d117_predictor_16x16_c
++
++void vpx_highbd_d117_predictor_32x32_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d117_predictor_32x32 vpx_highbd_d117_predictor_32x32_c
++
++void vpx_highbd_d117_predictor_4x4_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d117_predictor_4x4 vpx_highbd_d117_predictor_4x4_c
++
++void vpx_highbd_d117_predictor_8x8_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d117_predictor_8x8 vpx_highbd_d117_predictor_8x8_c
++
++void vpx_highbd_d135_predictor_16x16_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d135_predictor_16x16 vpx_highbd_d135_predictor_16x16_c
++
++void vpx_highbd_d135_predictor_32x32_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d135_predictor_32x32 vpx_highbd_d135_predictor_32x32_c
++
++void vpx_highbd_d135_predictor_4x4_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d135_predictor_4x4 vpx_highbd_d135_predictor_4x4_c
++
++void vpx_highbd_d135_predictor_8x8_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d135_predictor_8x8 vpx_highbd_d135_predictor_8x8_c
++
++void vpx_highbd_d153_predictor_16x16_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d153_predictor_16x16 vpx_highbd_d153_predictor_16x16_c
++
++void vpx_highbd_d153_predictor_32x32_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d153_predictor_32x32 vpx_highbd_d153_predictor_32x32_c
++
++void vpx_highbd_d153_predictor_4x4_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d153_predictor_4x4 vpx_highbd_d153_predictor_4x4_c
++
++void vpx_highbd_d153_predictor_8x8_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d153_predictor_8x8 vpx_highbd_d153_predictor_8x8_c
++
++void vpx_highbd_d207_predictor_16x16_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d207_predictor_16x16 vpx_highbd_d207_predictor_16x16_c
++
++void vpx_highbd_d207_predictor_32x32_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_d207_predictor_32x32 vpx_highbd_d207_predictor_32x32_c
++
++void vpx_highbd_d207_predictor_4x4_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d207_predictor_4x4 vpx_highbd_d207_predictor_4x4_c
++
++void vpx_highbd_d207_predictor_8x8_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_d207_predictor_8x8 vpx_highbd_d207_predictor_8x8_c
++
++void vpx_highbd_d45_predictor_16x16_c(uint16_t* dst,
++                                      ptrdiff_t stride,
++                                      const uint16_t* above,
++                                      const uint16_t* left,
++                                      int bd);
++#define vpx_highbd_d45_predictor_16x16 vpx_highbd_d45_predictor_16x16_c
++
++void vpx_highbd_d45_predictor_32x32_c(uint16_t* dst,
++                                      ptrdiff_t stride,
++                                      const uint16_t* above,
++                                      const uint16_t* left,
++                                      int bd);
++#define vpx_highbd_d45_predictor_32x32 vpx_highbd_d45_predictor_32x32_c
++
++void vpx_highbd_d45_predictor_4x4_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_d45_predictor_4x4 vpx_highbd_d45_predictor_4x4_c
++
++void vpx_highbd_d45_predictor_8x8_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_d45_predictor_8x8 vpx_highbd_d45_predictor_8x8_c
++
++void vpx_highbd_d63_predictor_16x16_c(uint16_t* dst,
++                                      ptrdiff_t stride,
++                                      const uint16_t* above,
++                                      const uint16_t* left,
++                                      int bd);
++#define vpx_highbd_d63_predictor_16x16 vpx_highbd_d63_predictor_16x16_c
++
++void vpx_highbd_d63_predictor_32x32_c(uint16_t* dst,
++                                      ptrdiff_t stride,
++                                      const uint16_t* above,
++                                      const uint16_t* left,
++                                      int bd);
++#define vpx_highbd_d63_predictor_32x32 vpx_highbd_d63_predictor_32x32_c
++
++void vpx_highbd_d63_predictor_4x4_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_d63_predictor_4x4 vpx_highbd_d63_predictor_4x4_c
++
++void vpx_highbd_d63_predictor_8x8_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_d63_predictor_8x8 vpx_highbd_d63_predictor_8x8_c
++
++void vpx_highbd_dc_128_predictor_16x16_c(uint16_t* dst,
++                                         ptrdiff_t stride,
++                                         const uint16_t* above,
++                                         const uint16_t* left,
++                                         int bd);
++#define vpx_highbd_dc_128_predictor_16x16 vpx_highbd_dc_128_predictor_16x16_c
++
++void vpx_highbd_dc_128_predictor_32x32_c(uint16_t* dst,
++                                         ptrdiff_t stride,
++                                         const uint16_t* above,
++                                         const uint16_t* left,
++                                         int bd);
++#define vpx_highbd_dc_128_predictor_32x32 vpx_highbd_dc_128_predictor_32x32_c
++
++void vpx_highbd_dc_128_predictor_4x4_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_dc_128_predictor_4x4 vpx_highbd_dc_128_predictor_4x4_c
++
++void vpx_highbd_dc_128_predictor_8x8_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_dc_128_predictor_8x8 vpx_highbd_dc_128_predictor_8x8_c
++
++void vpx_highbd_dc_left_predictor_16x16_c(uint16_t* dst,
++                                          ptrdiff_t stride,
++                                          const uint16_t* above,
++                                          const uint16_t* left,
++                                          int bd);
++#define vpx_highbd_dc_left_predictor_16x16 vpx_highbd_dc_left_predictor_16x16_c
++
++void vpx_highbd_dc_left_predictor_32x32_c(uint16_t* dst,
++                                          ptrdiff_t stride,
++                                          const uint16_t* above,
++                                          const uint16_t* left,
++                                          int bd);
++#define vpx_highbd_dc_left_predictor_32x32 vpx_highbd_dc_left_predictor_32x32_c
++
++void vpx_highbd_dc_left_predictor_4x4_c(uint16_t* dst,
++                                        ptrdiff_t stride,
++                                        const uint16_t* above,
++                                        const uint16_t* left,
++                                        int bd);
++#define vpx_highbd_dc_left_predictor_4x4 vpx_highbd_dc_left_predictor_4x4_c
++
++void vpx_highbd_dc_left_predictor_8x8_c(uint16_t* dst,
++                                        ptrdiff_t stride,
++                                        const uint16_t* above,
++                                        const uint16_t* left,
++                                        int bd);
++#define vpx_highbd_dc_left_predictor_8x8 vpx_highbd_dc_left_predictor_8x8_c
++
++void vpx_highbd_dc_predictor_16x16_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_dc_predictor_16x16 vpx_highbd_dc_predictor_16x16_c
++
++void vpx_highbd_dc_predictor_32x32_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_dc_predictor_32x32 vpx_highbd_dc_predictor_32x32_c
++
++void vpx_highbd_dc_predictor_4x4_c(uint16_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint16_t* above,
++                                   const uint16_t* left,
++                                   int bd);
++#define vpx_highbd_dc_predictor_4x4 vpx_highbd_dc_predictor_4x4_c
++
++void vpx_highbd_dc_predictor_8x8_c(uint16_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint16_t* above,
++                                   const uint16_t* left,
++                                   int bd);
++#define vpx_highbd_dc_predictor_8x8 vpx_highbd_dc_predictor_8x8_c
++
++void vpx_highbd_dc_top_predictor_16x16_c(uint16_t* dst,
++                                         ptrdiff_t stride,
++                                         const uint16_t* above,
++                                         const uint16_t* left,
++                                         int bd);
++#define vpx_highbd_dc_top_predictor_16x16 vpx_highbd_dc_top_predictor_16x16_c
++
++void vpx_highbd_dc_top_predictor_32x32_c(uint16_t* dst,
++                                         ptrdiff_t stride,
++                                         const uint16_t* above,
++                                         const uint16_t* left,
++                                         int bd);
++#define vpx_highbd_dc_top_predictor_32x32 vpx_highbd_dc_top_predictor_32x32_c
++
++void vpx_highbd_dc_top_predictor_4x4_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_dc_top_predictor_4x4 vpx_highbd_dc_top_predictor_4x4_c
++
++void vpx_highbd_dc_top_predictor_8x8_c(uint16_t* dst,
++                                       ptrdiff_t stride,
++                                       const uint16_t* above,
++                                       const uint16_t* left,
++                                       int bd);
++#define vpx_highbd_dc_top_predictor_8x8 vpx_highbd_dc_top_predictor_8x8_c
++
++void vpx_highbd_fdct16x16_c(const int16_t* input,
++                            tran_low_t* output,
++                            int stride);
++#define vpx_highbd_fdct16x16 vpx_highbd_fdct16x16_c
++
++void vpx_highbd_fdct16x16_1_c(const int16_t* input,
++                              tran_low_t* output,
++                              int stride);
++#define vpx_highbd_fdct16x16_1 vpx_highbd_fdct16x16_1_c
++
++void vpx_highbd_fdct32x32_c(const int16_t* input,
++                            tran_low_t* output,
++                            int stride);
++#define vpx_highbd_fdct32x32 vpx_highbd_fdct32x32_c
++
++void vpx_highbd_fdct32x32_1_c(const int16_t* input,
++                              tran_low_t* output,
++                              int stride);
++#define vpx_highbd_fdct32x32_1 vpx_highbd_fdct32x32_1_c
++
++void vpx_highbd_fdct32x32_rd_c(const int16_t* input,
++                               tran_low_t* output,
++                               int stride);
++#define vpx_highbd_fdct32x32_rd vpx_highbd_fdct32x32_rd_c
++
++void vpx_highbd_fdct4x4_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_highbd_fdct4x4 vpx_highbd_fdct4x4_c
++
++void vpx_highbd_fdct8x8_c(const int16_t* input, tran_low_t* output, int stride);
++#define vpx_highbd_fdct8x8 vpx_highbd_fdct8x8_c
++
++void vpx_highbd_fdct8x8_1_c(const int16_t* input,
++                            tran_low_t* output,
++                            int stride);
++#define vpx_highbd_fdct8x8_1 vpx_highbd_fdct8x8_1_c
++
++void vpx_highbd_h_predictor_16x16_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_h_predictor_16x16 vpx_highbd_h_predictor_16x16_c
++
++void vpx_highbd_h_predictor_32x32_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_h_predictor_32x32 vpx_highbd_h_predictor_32x32_c
++
++void vpx_highbd_h_predictor_4x4_c(uint16_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint16_t* above,
++                                  const uint16_t* left,
++                                  int bd);
++#define vpx_highbd_h_predictor_4x4 vpx_highbd_h_predictor_4x4_c
++
++void vpx_highbd_h_predictor_8x8_c(uint16_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint16_t* above,
++                                  const uint16_t* left,
++                                  int bd);
++#define vpx_highbd_h_predictor_8x8 vpx_highbd_h_predictor_8x8_c
++
++void vpx_highbd_hadamard_16x16_c(const int16_t* src_diff,
++                                 ptrdiff_t src_stride,
++                                 tran_low_t* coeff);
++#define vpx_highbd_hadamard_16x16 vpx_highbd_hadamard_16x16_c
++
++void vpx_highbd_hadamard_32x32_c(const int16_t* src_diff,
++                                 ptrdiff_t src_stride,
++                                 tran_low_t* coeff);
++#define vpx_highbd_hadamard_32x32 vpx_highbd_hadamard_32x32_c
++
++void vpx_highbd_hadamard_8x8_c(const int16_t* src_diff,
++                               ptrdiff_t src_stride,
++                               tran_low_t* coeff);
++#define vpx_highbd_hadamard_8x8 vpx_highbd_hadamard_8x8_c
++
++void vpx_highbd_idct16x16_10_add_c(const tran_low_t* input,
++                                   uint16_t* dest,
++                                   int stride,
++                                   int bd);
++#define vpx_highbd_idct16x16_10_add vpx_highbd_idct16x16_10_add_c
++
++void vpx_highbd_idct16x16_1_add_c(const tran_low_t* input,
++                                  uint16_t* dest,
++                                  int stride,
++                                  int bd);
++#define vpx_highbd_idct16x16_1_add vpx_highbd_idct16x16_1_add_c
++
++void vpx_highbd_idct16x16_256_add_c(const tran_low_t* input,
++                                    uint16_t* dest,
++                                    int stride,
++                                    int bd);
++#define vpx_highbd_idct16x16_256_add vpx_highbd_idct16x16_256_add_c
++
++void vpx_highbd_idct16x16_38_add_c(const tran_low_t* input,
++                                   uint16_t* dest,
++                                   int stride,
++                                   int bd);
++#define vpx_highbd_idct16x16_38_add vpx_highbd_idct16x16_38_add_c
++
++void vpx_highbd_idct32x32_1024_add_c(const tran_low_t* input,
++                                     uint16_t* dest,
++                                     int stride,
++                                     int bd);
++#define vpx_highbd_idct32x32_1024_add vpx_highbd_idct32x32_1024_add_c
++
++void vpx_highbd_idct32x32_135_add_c(const tran_low_t* input,
++                                    uint16_t* dest,
++                                    int stride,
++                                    int bd);
++#define vpx_highbd_idct32x32_135_add vpx_highbd_idct32x32_135_add_c
++
++void vpx_highbd_idct32x32_1_add_c(const tran_low_t* input,
++                                  uint16_t* dest,
++                                  int stride,
++                                  int bd);
++#define vpx_highbd_idct32x32_1_add vpx_highbd_idct32x32_1_add_c
++
++void vpx_highbd_idct32x32_34_add_c(const tran_low_t* input,
++                                   uint16_t* dest,
++                                   int stride,
++                                   int bd);
++#define vpx_highbd_idct32x32_34_add vpx_highbd_idct32x32_34_add_c
++
++void vpx_highbd_idct4x4_16_add_c(const tran_low_t* input,
++                                 uint16_t* dest,
++                                 int stride,
++                                 int bd);
++#define vpx_highbd_idct4x4_16_add vpx_highbd_idct4x4_16_add_c
++
++void vpx_highbd_idct4x4_1_add_c(const tran_low_t* input,
++                                uint16_t* dest,
++                                int stride,
++                                int bd);
++#define vpx_highbd_idct4x4_1_add vpx_highbd_idct4x4_1_add_c
++
++void vpx_highbd_idct8x8_12_add_c(const tran_low_t* input,
++                                 uint16_t* dest,
++                                 int stride,
++                                 int bd);
++#define vpx_highbd_idct8x8_12_add vpx_highbd_idct8x8_12_add_c
++
++void vpx_highbd_idct8x8_1_add_c(const tran_low_t* input,
++                                uint16_t* dest,
++                                int stride,
++                                int bd);
++#define vpx_highbd_idct8x8_1_add vpx_highbd_idct8x8_1_add_c
++
++void vpx_highbd_idct8x8_64_add_c(const tran_low_t* input,
++                                 uint16_t* dest,
++                                 int stride,
++                                 int bd);
++#define vpx_highbd_idct8x8_64_add vpx_highbd_idct8x8_64_add_c
++
++void vpx_highbd_iwht4x4_16_add_c(const tran_low_t* input,
++                                 uint16_t* dest,
++                                 int stride,
++                                 int bd);
++#define vpx_highbd_iwht4x4_16_add vpx_highbd_iwht4x4_16_add_c
++
++void vpx_highbd_iwht4x4_1_add_c(const tran_low_t* input,
++                                uint16_t* dest,
++                                int stride,
++                                int bd);
++#define vpx_highbd_iwht4x4_1_add vpx_highbd_iwht4x4_1_add_c
++
++void vpx_highbd_lpf_horizontal_16_c(uint16_t* s,
++                                    int pitch,
++                                    const uint8_t* blimit,
++                                    const uint8_t* limit,
++                                    const uint8_t* thresh,
++                                    int bd);
++#define vpx_highbd_lpf_horizontal_16 vpx_highbd_lpf_horizontal_16_c
++
++void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t* s,
++                                         int pitch,
++                                         const uint8_t* blimit,
++                                         const uint8_t* limit,
++                                         const uint8_t* thresh,
++                                         int bd);
++#define vpx_highbd_lpf_horizontal_16_dual vpx_highbd_lpf_horizontal_16_dual_c
++
++void vpx_highbd_lpf_horizontal_4_c(uint16_t* s,
++                                   int pitch,
++                                   const uint8_t* blimit,
++                                   const uint8_t* limit,
++                                   const uint8_t* thresh,
++                                   int bd);
++#define vpx_highbd_lpf_horizontal_4 vpx_highbd_lpf_horizontal_4_c
++
++void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t* s,
++                                        int pitch,
++                                        const uint8_t* blimit0,
++                                        const uint8_t* limit0,
++                                        const uint8_t* thresh0,
++                                        const uint8_t* blimit1,
++                                        const uint8_t* limit1,
++                                        const uint8_t* thresh1,
++                                        int bd);
++#define vpx_highbd_lpf_horizontal_4_dual vpx_highbd_lpf_horizontal_4_dual_c
++
++void vpx_highbd_lpf_horizontal_8_c(uint16_t* s,
++                                   int pitch,
++                                   const uint8_t* blimit,
++                                   const uint8_t* limit,
++                                   const uint8_t* thresh,
++                                   int bd);
++#define vpx_highbd_lpf_horizontal_8 vpx_highbd_lpf_horizontal_8_c
++
++void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t* s,
++                                        int pitch,
++                                        const uint8_t* blimit0,
++                                        const uint8_t* limit0,
++                                        const uint8_t* thresh0,
++                                        const uint8_t* blimit1,
++                                        const uint8_t* limit1,
++                                        const uint8_t* thresh1,
++                                        int bd);
++#define vpx_highbd_lpf_horizontal_8_dual vpx_highbd_lpf_horizontal_8_dual_c
++
++void vpx_highbd_lpf_vertical_16_c(uint16_t* s,
++                                  int pitch,
++                                  const uint8_t* blimit,
++                                  const uint8_t* limit,
++                                  const uint8_t* thresh,
++                                  int bd);
++#define vpx_highbd_lpf_vertical_16 vpx_highbd_lpf_vertical_16_c
++
++void vpx_highbd_lpf_vertical_16_dual_c(uint16_t* s,
++                                       int pitch,
++                                       const uint8_t* blimit,
++                                       const uint8_t* limit,
++                                       const uint8_t* thresh,
++                                       int bd);
++#define vpx_highbd_lpf_vertical_16_dual vpx_highbd_lpf_vertical_16_dual_c
++
++void vpx_highbd_lpf_vertical_4_c(uint16_t* s,
++                                 int pitch,
++                                 const uint8_t* blimit,
++                                 const uint8_t* limit,
++                                 const uint8_t* thresh,
++                                 int bd);
++#define vpx_highbd_lpf_vertical_4 vpx_highbd_lpf_vertical_4_c
++
++void vpx_highbd_lpf_vertical_4_dual_c(uint16_t* s,
++                                      int pitch,
++                                      const uint8_t* blimit0,
++                                      const uint8_t* limit0,
++                                      const uint8_t* thresh0,
++                                      const uint8_t* blimit1,
++                                      const uint8_t* limit1,
++                                      const uint8_t* thresh1,
++                                      int bd);
++#define vpx_highbd_lpf_vertical_4_dual vpx_highbd_lpf_vertical_4_dual_c
++
++void vpx_highbd_lpf_vertical_8_c(uint16_t* s,
++                                 int pitch,
++                                 const uint8_t* blimit,
++                                 const uint8_t* limit,
++                                 const uint8_t* thresh,
++                                 int bd);
++#define vpx_highbd_lpf_vertical_8 vpx_highbd_lpf_vertical_8_c
++
++void vpx_highbd_lpf_vertical_8_dual_c(uint16_t* s,
++                                      int pitch,
++                                      const uint8_t* blimit0,
++                                      const uint8_t* limit0,
++                                      const uint8_t* thresh0,
++                                      const uint8_t* blimit1,
++                                      const uint8_t* limit1,
++                                      const uint8_t* thresh1,
++                                      int bd);
++#define vpx_highbd_lpf_vertical_8_dual vpx_highbd_lpf_vertical_8_dual_c
++
++void vpx_highbd_minmax_8x8_c(const uint8_t* s8,
++                             int p,
++                             const uint8_t* d8,
++                             int dp,
++                             int* min,
++                             int* max);
++#define vpx_highbd_minmax_8x8 vpx_highbd_minmax_8x8_c
++
++void vpx_highbd_quantize_b_c(const tran_low_t* coeff_ptr,
++                             intptr_t n_coeffs,
++                             int skip_block,
++                             const int16_t* zbin_ptr,
++                             const int16_t* round_ptr,
++                             const int16_t* quant_ptr,
++                             const int16_t* quant_shift_ptr,
++                             tran_low_t* qcoeff_ptr,
++                             tran_low_t* dqcoeff_ptr,
++                             const int16_t* dequant_ptr,
++                             uint16_t* eob_ptr,
++                             const int16_t* scan,
++                             const int16_t* iscan);
++#define vpx_highbd_quantize_b vpx_highbd_quantize_b_c
++
++void vpx_highbd_quantize_b_32x32_c(const tran_low_t* coeff_ptr,
++                                   intptr_t n_coeffs,
++                                   int skip_block,
++                                   const int16_t* zbin_ptr,
++                                   const int16_t* round_ptr,
++                                   const int16_t* quant_ptr,
++                                   const int16_t* quant_shift_ptr,
++                                   tran_low_t* qcoeff_ptr,
++                                   tran_low_t* dqcoeff_ptr,
++                                   const int16_t* dequant_ptr,
++                                   uint16_t* eob_ptr,
++                                   const int16_t* scan,
++                                   const int16_t* iscan);
++#define vpx_highbd_quantize_b_32x32 vpx_highbd_quantize_b_32x32_c
++
++unsigned int vpx_highbd_sad16x16_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad16x16 vpx_highbd_sad16x16_c
++
++unsigned int vpx_highbd_sad16x16_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad16x16_avg vpx_highbd_sad16x16_avg_c
++
++void vpx_highbd_sad16x16x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad16x16x4d vpx_highbd_sad16x16x4d_c
++
++unsigned int vpx_highbd_sad16x32_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad16x32 vpx_highbd_sad16x32_c
++
++unsigned int vpx_highbd_sad16x32_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad16x32_avg vpx_highbd_sad16x32_avg_c
++
++void vpx_highbd_sad16x32x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad16x32x4d vpx_highbd_sad16x32x4d_c
++
++unsigned int vpx_highbd_sad16x8_c(const uint8_t* src_ptr,
++                                  int src_stride,
++                                  const uint8_t* ref_ptr,
++                                  int ref_stride);
++#define vpx_highbd_sad16x8 vpx_highbd_sad16x8_c
++
++unsigned int vpx_highbd_sad16x8_avg_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      const uint8_t* second_pred);
++#define vpx_highbd_sad16x8_avg vpx_highbd_sad16x8_avg_c
++
++void vpx_highbd_sad16x8x4d_c(const uint8_t* src_ptr,
++                             int src_stride,
++                             const uint8_t* const ref_array[],
++                             int ref_stride,
++                             uint32_t* sad_array);
++#define vpx_highbd_sad16x8x4d vpx_highbd_sad16x8x4d_c
++
++unsigned int vpx_highbd_sad32x16_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad32x16 vpx_highbd_sad32x16_c
++
++unsigned int vpx_highbd_sad32x16_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad32x16_avg vpx_highbd_sad32x16_avg_c
++
++void vpx_highbd_sad32x16x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad32x16x4d vpx_highbd_sad32x16x4d_c
++
++unsigned int vpx_highbd_sad32x32_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad32x32 vpx_highbd_sad32x32_c
++
++unsigned int vpx_highbd_sad32x32_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad32x32_avg vpx_highbd_sad32x32_avg_c
++
++void vpx_highbd_sad32x32x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad32x32x4d vpx_highbd_sad32x32x4d_c
++
++unsigned int vpx_highbd_sad32x64_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad32x64 vpx_highbd_sad32x64_c
++
++unsigned int vpx_highbd_sad32x64_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad32x64_avg vpx_highbd_sad32x64_avg_c
++
++void vpx_highbd_sad32x64x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad32x64x4d vpx_highbd_sad32x64x4d_c
++
++unsigned int vpx_highbd_sad4x4_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride);
++#define vpx_highbd_sad4x4 vpx_highbd_sad4x4_c
++
++unsigned int vpx_highbd_sad4x4_avg_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     const uint8_t* second_pred);
++#define vpx_highbd_sad4x4_avg vpx_highbd_sad4x4_avg_c
++
++void vpx_highbd_sad4x4x4d_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* const ref_array[],
++                            int ref_stride,
++                            uint32_t* sad_array);
++#define vpx_highbd_sad4x4x4d vpx_highbd_sad4x4x4d_c
++
++unsigned int vpx_highbd_sad4x8_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride);
++#define vpx_highbd_sad4x8 vpx_highbd_sad4x8_c
++
++unsigned int vpx_highbd_sad4x8_avg_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     const uint8_t* second_pred);
++#define vpx_highbd_sad4x8_avg vpx_highbd_sad4x8_avg_c
++
++void vpx_highbd_sad4x8x4d_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* const ref_array[],
++                            int ref_stride,
++                            uint32_t* sad_array);
++#define vpx_highbd_sad4x8x4d vpx_highbd_sad4x8x4d_c
++
++unsigned int vpx_highbd_sad64x32_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad64x32 vpx_highbd_sad64x32_c
++
++unsigned int vpx_highbd_sad64x32_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad64x32_avg vpx_highbd_sad64x32_avg_c
++
++void vpx_highbd_sad64x32x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad64x32x4d vpx_highbd_sad64x32x4d_c
++
++unsigned int vpx_highbd_sad64x64_c(const uint8_t* src_ptr,
++                                   int src_stride,
++                                   const uint8_t* ref_ptr,
++                                   int ref_stride);
++#define vpx_highbd_sad64x64 vpx_highbd_sad64x64_c
++
++unsigned int vpx_highbd_sad64x64_avg_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       const uint8_t* second_pred);
++#define vpx_highbd_sad64x64_avg vpx_highbd_sad64x64_avg_c
++
++void vpx_highbd_sad64x64x4d_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* const ref_array[],
++                              int ref_stride,
++                              uint32_t* sad_array);
++#define vpx_highbd_sad64x64x4d vpx_highbd_sad64x64x4d_c
++
++unsigned int vpx_highbd_sad8x16_c(const uint8_t* src_ptr,
++                                  int src_stride,
++                                  const uint8_t* ref_ptr,
++                                  int ref_stride);
++#define vpx_highbd_sad8x16 vpx_highbd_sad8x16_c
++
++unsigned int vpx_highbd_sad8x16_avg_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      const uint8_t* second_pred);
++#define vpx_highbd_sad8x16_avg vpx_highbd_sad8x16_avg_c
++
++void vpx_highbd_sad8x16x4d_c(const uint8_t* src_ptr,
++                             int src_stride,
++                             const uint8_t* const ref_array[],
++                             int ref_stride,
++                             uint32_t* sad_array);
++#define vpx_highbd_sad8x16x4d vpx_highbd_sad8x16x4d_c
++
++unsigned int vpx_highbd_sad8x4_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride);
++#define vpx_highbd_sad8x4 vpx_highbd_sad8x4_c
++
++unsigned int vpx_highbd_sad8x4_avg_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     const uint8_t* second_pred);
++#define vpx_highbd_sad8x4_avg vpx_highbd_sad8x4_avg_c
++
++void vpx_highbd_sad8x4x4d_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* const ref_array[],
++                            int ref_stride,
++                            uint32_t* sad_array);
++#define vpx_highbd_sad8x4x4d vpx_highbd_sad8x4x4d_c
++
++unsigned int vpx_highbd_sad8x8_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride);
++#define vpx_highbd_sad8x8 vpx_highbd_sad8x8_c
++
++unsigned int vpx_highbd_sad8x8_avg_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     const uint8_t* second_pred);
++#define vpx_highbd_sad8x8_avg vpx_highbd_sad8x8_avg_c
++
++void vpx_highbd_sad8x8x4d_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* const ref_array[],
++                            int ref_stride,
++                            uint32_t* sad_array);
++#define vpx_highbd_sad8x8x4d vpx_highbd_sad8x8x4d_c
++
++int vpx_highbd_satd_c(const tran_low_t* coeff, int length);
++#define vpx_highbd_satd vpx_highbd_satd_c
++
++void vpx_highbd_subtract_block_c(int rows,
++                                 int cols,
++                                 int16_t* diff_ptr,
++                                 ptrdiff_t diff_stride,
++                                 const uint8_t* src8_ptr,
++                                 ptrdiff_t src_stride,
++                                 const uint8_t* pred8_ptr,
++                                 ptrdiff_t pred_stride,
++                                 int bd);
++#define vpx_highbd_subtract_block vpx_highbd_subtract_block_c
++
++void vpx_highbd_tm_predictor_16x16_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_tm_predictor_16x16 vpx_highbd_tm_predictor_16x16_c
++
++void vpx_highbd_tm_predictor_32x32_c(uint16_t* dst,
++                                     ptrdiff_t stride,
++                                     const uint16_t* above,
++                                     const uint16_t* left,
++                                     int bd);
++#define vpx_highbd_tm_predictor_32x32 vpx_highbd_tm_predictor_32x32_c
++
++void vpx_highbd_tm_predictor_4x4_c(uint16_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint16_t* above,
++                                   const uint16_t* left,
++                                   int bd);
++#define vpx_highbd_tm_predictor_4x4 vpx_highbd_tm_predictor_4x4_c
++
++void vpx_highbd_tm_predictor_8x8_c(uint16_t* dst,
++                                   ptrdiff_t stride,
++                                   const uint16_t* above,
++                                   const uint16_t* left,
++                                   int bd);
++#define vpx_highbd_tm_predictor_8x8 vpx_highbd_tm_predictor_8x8_c
++
++void vpx_highbd_v_predictor_16x16_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_v_predictor_16x16 vpx_highbd_v_predictor_16x16_c
++
++void vpx_highbd_v_predictor_32x32_c(uint16_t* dst,
++                                    ptrdiff_t stride,
++                                    const uint16_t* above,
++                                    const uint16_t* left,
++                                    int bd);
++#define vpx_highbd_v_predictor_32x32 vpx_highbd_v_predictor_32x32_c
++
++void vpx_highbd_v_predictor_4x4_c(uint16_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint16_t* above,
++                                  const uint16_t* left,
++                                  int bd);
++#define vpx_highbd_v_predictor_4x4 vpx_highbd_v_predictor_4x4_c
++
++void vpx_highbd_v_predictor_8x8_c(uint16_t* dst,
++                                  ptrdiff_t stride,
++                                  const uint16_t* above,
++                                  const uint16_t* left,
++                                  int bd);
++#define vpx_highbd_v_predictor_8x8 vpx_highbd_v_predictor_8x8_c
++
++void vpx_idct16x16_10_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
++
++void vpx_idct16x16_1_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c
++
++void vpx_idct16x16_256_add_c(const tran_low_t* input,
++                             uint8_t* dest,
++                             int stride);
++#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c
++
++void vpx_idct16x16_38_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct16x16_38_add vpx_idct16x16_38_add_c
++
++void vpx_idct32x32_1024_add_c(const tran_low_t* input,
++                              uint8_t* dest,
++                              int stride);
++#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c
++
++void vpx_idct32x32_135_add_c(const tran_low_t* input,
++                             uint8_t* dest,
++                             int stride);
++#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c
++
++void vpx_idct32x32_1_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c
++
++void vpx_idct32x32_34_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c
++
++void vpx_idct4x4_16_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c
++
++void vpx_idct4x4_1_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c
++
++void vpx_idct8x8_12_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c
++
++void vpx_idct8x8_1_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c
++
++void vpx_idct8x8_64_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c
++
++int16_t vpx_int_pro_col_c(const uint8_t* ref, const int width);
++#define vpx_int_pro_col vpx_int_pro_col_c
++
++void vpx_int_pro_row_c(int16_t* hbuf,
++                       const uint8_t* ref,
++                       const int ref_stride,
++                       const int height);
++#define vpx_int_pro_row vpx_int_pro_row_c
++
++void vpx_iwht4x4_16_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
++
++void vpx_iwht4x4_1_add_c(const tran_low_t* input, uint8_t* dest, int stride);
++#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
++
++void vpx_lpf_horizontal_16_c(uint8_t* s,
++                             int pitch,
++                             const uint8_t* blimit,
++                             const uint8_t* limit,
++                             const uint8_t* thresh);
++#define vpx_lpf_horizontal_16 vpx_lpf_horizontal_16_c
++
++void vpx_lpf_horizontal_16_dual_c(uint8_t* s,
++                                  int pitch,
++                                  const uint8_t* blimit,
++                                  const uint8_t* limit,
++                                  const uint8_t* thresh);
++#define vpx_lpf_horizontal_16_dual vpx_lpf_horizontal_16_dual_c
++
++void vpx_lpf_horizontal_4_c(uint8_t* s,
++                            int pitch,
++                            const uint8_t* blimit,
++                            const uint8_t* limit,
++                            const uint8_t* thresh);
++#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c
++
++void vpx_lpf_horizontal_4_dual_c(uint8_t* s,
++                                 int pitch,
++                                 const uint8_t* blimit0,
++                                 const uint8_t* limit0,
++                                 const uint8_t* thresh0,
++                                 const uint8_t* blimit1,
++                                 const uint8_t* limit1,
++                                 const uint8_t* thresh1);
++#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c
++
++void vpx_lpf_horizontal_8_c(uint8_t* s,
++                            int pitch,
++                            const uint8_t* blimit,
++                            const uint8_t* limit,
++                            const uint8_t* thresh);
++#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c
++
++void vpx_lpf_horizontal_8_dual_c(uint8_t* s,
++                                 int pitch,
++                                 const uint8_t* blimit0,
++                                 const uint8_t* limit0,
++                                 const uint8_t* thresh0,
++                                 const uint8_t* blimit1,
++                                 const uint8_t* limit1,
++                                 const uint8_t* thresh1);
++#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c
++
++void vpx_lpf_vertical_16_c(uint8_t* s,
++                           int pitch,
++                           const uint8_t* blimit,
++                           const uint8_t* limit,
++                           const uint8_t* thresh);
++#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c
++
++void vpx_lpf_vertical_16_dual_c(uint8_t* s,
++                                int pitch,
++                                const uint8_t* blimit,
++                                const uint8_t* limit,
++                                const uint8_t* thresh);
++#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c
++
++void vpx_lpf_vertical_4_c(uint8_t* s,
++                          int pitch,
++                          const uint8_t* blimit,
++                          const uint8_t* limit,
++                          const uint8_t* thresh);
++#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c
++
++void vpx_lpf_vertical_4_dual_c(uint8_t* s,
++                               int pitch,
++                               const uint8_t* blimit0,
++                               const uint8_t* limit0,
++                               const uint8_t* thresh0,
++                               const uint8_t* blimit1,
++                               const uint8_t* limit1,
++                               const uint8_t* thresh1);
++#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c
++
++void vpx_lpf_vertical_8_c(uint8_t* s,
++                          int pitch,
++                          const uint8_t* blimit,
++                          const uint8_t* limit,
++                          const uint8_t* thresh);
++#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c
++
++void vpx_lpf_vertical_8_dual_c(uint8_t* s,
++                               int pitch,
++                               const uint8_t* blimit0,
++                               const uint8_t* limit0,
++                               const uint8_t* thresh0,
++                               const uint8_t* blimit1,
++                               const uint8_t* limit1,
++                               const uint8_t* thresh1);
++#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c
++
++void vpx_mbpost_proc_across_ip_c(unsigned char* src,
++                                 int pitch,
++                                 int rows,
++                                 int cols,
++                                 int flimit);
++#define vpx_mbpost_proc_across_ip vpx_mbpost_proc_across_ip_c
++
++void vpx_mbpost_proc_down_c(unsigned char* dst,
++                            int pitch,
++                            int rows,
++                            int cols,
++                            int flimit);
++#define vpx_mbpost_proc_down vpx_mbpost_proc_down_c
++
++void vpx_minmax_8x8_c(const uint8_t* s,
++                      int p,
++                      const uint8_t* d,
++                      int dp,
++                      int* min,
++                      int* max);
++#define vpx_minmax_8x8 vpx_minmax_8x8_c
++
++unsigned int vpx_mse16x16_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride,
++                            unsigned int* sse);
++#define vpx_mse16x16 vpx_mse16x16_c
++
++unsigned int vpx_mse16x8_c(const uint8_t* src_ptr,
++                           int src_stride,
++                           const uint8_t* ref_ptr,
++                           int ref_stride,
++                           unsigned int* sse);
++#define vpx_mse16x8 vpx_mse16x8_c
++
++unsigned int vpx_mse8x16_c(const uint8_t* src_ptr,
++                           int src_stride,
++                           const uint8_t* ref_ptr,
++                           int ref_stride,
++                           unsigned int* sse);
++#define vpx_mse8x16 vpx_mse8x16_c
++
++unsigned int vpx_mse8x8_c(const uint8_t* src_ptr,
++                          int src_stride,
++                          const uint8_t* ref_ptr,
++                          int ref_stride,
++                          unsigned int* sse);
++#define vpx_mse8x8 vpx_mse8x8_c
++
++void vpx_plane_add_noise_c(uint8_t* start,
++                           const int8_t* noise,
++                           int blackclamp,
++                           int whiteclamp,
++                           int width,
++                           int height,
++                           int pitch);
++#define vpx_plane_add_noise vpx_plane_add_noise_c
++
++void vpx_post_proc_down_and_across_mb_row_c(unsigned char* src,
++                                            unsigned char* dst,
++                                            int src_pitch,
++                                            int dst_pitch,
++                                            int cols,
++                                            unsigned char* flimits,
++                                            int size);
++#define vpx_post_proc_down_and_across_mb_row \
++  vpx_post_proc_down_and_across_mb_row_c
++
++void vpx_quantize_b_c(const tran_low_t* coeff_ptr,
++                      intptr_t n_coeffs,
++                      int skip_block,
++                      const int16_t* zbin_ptr,
++                      const int16_t* round_ptr,
++                      const int16_t* quant_ptr,
++                      const int16_t* quant_shift_ptr,
++                      tran_low_t* qcoeff_ptr,
++                      tran_low_t* dqcoeff_ptr,
++                      const int16_t* dequant_ptr,
++                      uint16_t* eob_ptr,
++                      const int16_t* scan,
++                      const int16_t* iscan);
++#define vpx_quantize_b vpx_quantize_b_c
++
++void vpx_quantize_b_32x32_c(const tran_low_t* coeff_ptr,
++                            intptr_t n_coeffs,
++                            int skip_block,
++                            const int16_t* zbin_ptr,
++                            const int16_t* round_ptr,
++                            const int16_t* quant_ptr,
++                            const int16_t* quant_shift_ptr,
++                            tran_low_t* qcoeff_ptr,
++                            tran_low_t* dqcoeff_ptr,
++                            const int16_t* dequant_ptr,
++                            uint16_t* eob_ptr,
++                            const int16_t* scan,
++                            const int16_t* iscan);
++#define vpx_quantize_b_32x32 vpx_quantize_b_32x32_c
++
++unsigned int vpx_sad16x16_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad16x16 vpx_sad16x16_c
++
++unsigned int vpx_sad16x16_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad16x16_avg vpx_sad16x16_avg_c
++
++void vpx_sad16x16x3_c(const uint8_t* src_ptr,
++                      int src_stride,
++                      const uint8_t* ref_ptr,
++                      int ref_stride,
++                      uint32_t* sad_array);
++#define vpx_sad16x16x3 vpx_sad16x16x3_c
++
++void vpx_sad16x16x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad16x16x4d vpx_sad16x16x4d_c
++
++void vpx_sad16x16x8_c(const uint8_t* src_ptr,
++                      int src_stride,
++                      const uint8_t* ref_ptr,
++                      int ref_stride,
++                      uint32_t* sad_array);
++#define vpx_sad16x16x8 vpx_sad16x16x8_c
++
++unsigned int vpx_sad16x32_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad16x32 vpx_sad16x32_c
++
++unsigned int vpx_sad16x32_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad16x32_avg vpx_sad16x32_avg_c
++
++void vpx_sad16x32x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad16x32x4d vpx_sad16x32x4d_c
++
++unsigned int vpx_sad16x8_c(const uint8_t* src_ptr,
++                           int src_stride,
++                           const uint8_t* ref_ptr,
++                           int ref_stride);
++#define vpx_sad16x8 vpx_sad16x8_c
++
++unsigned int vpx_sad16x8_avg_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               const uint8_t* second_pred);
++#define vpx_sad16x8_avg vpx_sad16x8_avg_c
++
++void vpx_sad16x8x3_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* ref_ptr,
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad16x8x3 vpx_sad16x8x3_c
++
++void vpx_sad16x8x4d_c(const uint8_t* src_ptr,
++                      int src_stride,
++                      const uint8_t* const ref_array[],
++                      int ref_stride,
++                      uint32_t* sad_array);
++#define vpx_sad16x8x4d vpx_sad16x8x4d_c
++
++void vpx_sad16x8x8_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* ref_ptr,
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad16x8x8 vpx_sad16x8x8_c
++
++unsigned int vpx_sad32x16_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad32x16 vpx_sad32x16_c
++
++unsigned int vpx_sad32x16_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad32x16_avg vpx_sad32x16_avg_c
++
++void vpx_sad32x16x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad32x16x4d vpx_sad32x16x4d_c
++
++unsigned int vpx_sad32x32_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad32x32 vpx_sad32x32_c
++
++unsigned int vpx_sad32x32_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad32x32_avg vpx_sad32x32_avg_c
++
++void vpx_sad32x32x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad32x32x4d vpx_sad32x32x4d_c
++
++void vpx_sad32x32x8_c(const uint8_t* src_ptr,
++                      int src_stride,
++                      const uint8_t* ref_ptr,
++                      int ref_stride,
++                      uint32_t* sad_array);
++#define vpx_sad32x32x8 vpx_sad32x32x8_c
++
++unsigned int vpx_sad32x64_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad32x64 vpx_sad32x64_c
++
++unsigned int vpx_sad32x64_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad32x64_avg vpx_sad32x64_avg_c
++
++void vpx_sad32x64x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad32x64x4d vpx_sad32x64x4d_c
++
++unsigned int vpx_sad4x4_c(const uint8_t* src_ptr,
++                          int src_stride,
++                          const uint8_t* ref_ptr,
++                          int ref_stride);
++#define vpx_sad4x4 vpx_sad4x4_c
++
++unsigned int vpx_sad4x4_avg_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* ref_ptr,
++                              int ref_stride,
++                              const uint8_t* second_pred);
++#define vpx_sad4x4_avg vpx_sad4x4_avg_c
++
++void vpx_sad4x4x3_c(const uint8_t* src_ptr,
++                    int src_stride,
++                    const uint8_t* ref_ptr,
++                    int ref_stride,
++                    uint32_t* sad_array);
++#define vpx_sad4x4x3 vpx_sad4x4x3_c
++
++void vpx_sad4x4x4d_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* const ref_array[],
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad4x4x4d vpx_sad4x4x4d_c
++
++void vpx_sad4x4x8_c(const uint8_t* src_ptr,
++                    int src_stride,
++                    const uint8_t* ref_ptr,
++                    int ref_stride,
++                    uint32_t* sad_array);
++#define vpx_sad4x4x8 vpx_sad4x4x8_c
++
++unsigned int vpx_sad4x8_c(const uint8_t* src_ptr,
++                          int src_stride,
++                          const uint8_t* ref_ptr,
++                          int ref_stride);
++#define vpx_sad4x8 vpx_sad4x8_c
++
++unsigned int vpx_sad4x8_avg_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* ref_ptr,
++                              int ref_stride,
++                              const uint8_t* second_pred);
++#define vpx_sad4x8_avg vpx_sad4x8_avg_c
++
++void vpx_sad4x8x4d_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* const ref_array[],
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad4x8x4d vpx_sad4x8x4d_c
++
++unsigned int vpx_sad64x32_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad64x32 vpx_sad64x32_c
++
++unsigned int vpx_sad64x32_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad64x32_avg vpx_sad64x32_avg_c
++
++void vpx_sad64x32x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad64x32x4d vpx_sad64x32x4d_c
++
++unsigned int vpx_sad64x64_c(const uint8_t* src_ptr,
++                            int src_stride,
++                            const uint8_t* ref_ptr,
++                            int ref_stride);
++#define vpx_sad64x64 vpx_sad64x64_c
++
++unsigned int vpx_sad64x64_avg_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                const uint8_t* second_pred);
++#define vpx_sad64x64_avg vpx_sad64x64_avg_c
++
++void vpx_sad64x64x4d_c(const uint8_t* src_ptr,
++                       int src_stride,
++                       const uint8_t* const ref_array[],
++                       int ref_stride,
++                       uint32_t* sad_array);
++#define vpx_sad64x64x4d vpx_sad64x64x4d_c
++
++unsigned int vpx_sad8x16_c(const uint8_t* src_ptr,
++                           int src_stride,
++                           const uint8_t* ref_ptr,
++                           int ref_stride);
++#define vpx_sad8x16 vpx_sad8x16_c
++
++unsigned int vpx_sad8x16_avg_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               const uint8_t* second_pred);
++#define vpx_sad8x16_avg vpx_sad8x16_avg_c
++
++void vpx_sad8x16x3_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* ref_ptr,
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad8x16x3 vpx_sad8x16x3_c
++
++void vpx_sad8x16x4d_c(const uint8_t* src_ptr,
++                      int src_stride,
++                      const uint8_t* const ref_array[],
++                      int ref_stride,
++                      uint32_t* sad_array);
++#define vpx_sad8x16x4d vpx_sad8x16x4d_c
++
++void vpx_sad8x16x8_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* ref_ptr,
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad8x16x8 vpx_sad8x16x8_c
++
++unsigned int vpx_sad8x4_c(const uint8_t* src_ptr,
++                          int src_stride,
++                          const uint8_t* ref_ptr,
++                          int ref_stride);
++#define vpx_sad8x4 vpx_sad8x4_c
++
++unsigned int vpx_sad8x4_avg_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* ref_ptr,
++                              int ref_stride,
++                              const uint8_t* second_pred);
++#define vpx_sad8x4_avg vpx_sad8x4_avg_c
++
++void vpx_sad8x4x4d_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* const ref_array[],
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad8x4x4d vpx_sad8x4x4d_c
++
++unsigned int vpx_sad8x8_c(const uint8_t* src_ptr,
++                          int src_stride,
++                          const uint8_t* ref_ptr,
++                          int ref_stride);
++#define vpx_sad8x8 vpx_sad8x8_c
++
++unsigned int vpx_sad8x8_avg_c(const uint8_t* src_ptr,
++                              int src_stride,
++                              const uint8_t* ref_ptr,
++                              int ref_stride,
++                              const uint8_t* second_pred);
++#define vpx_sad8x8_avg vpx_sad8x8_avg_c
++
++void vpx_sad8x8x3_c(const uint8_t* src_ptr,
++                    int src_stride,
++                    const uint8_t* ref_ptr,
++                    int ref_stride,
++                    uint32_t* sad_array);
++#define vpx_sad8x8x3 vpx_sad8x8x3_c
++
++void vpx_sad8x8x4d_c(const uint8_t* src_ptr,
++                     int src_stride,
++                     const uint8_t* const ref_array[],
++                     int ref_stride,
++                     uint32_t* sad_array);
++#define vpx_sad8x8x4d vpx_sad8x8x4d_c
++
++void vpx_sad8x8x8_c(const uint8_t* src_ptr,
++                    int src_stride,
++                    const uint8_t* ref_ptr,
++                    int ref_stride,
++                    uint32_t* sad_array);
++#define vpx_sad8x8x8 vpx_sad8x8x8_c
++
++int vpx_satd_c(const tran_low_t* coeff, int length);
++#define vpx_satd vpx_satd_c
++
++void vpx_scaled_2d_c(const uint8_t* src,
++                     ptrdiff_t src_stride,
++                     uint8_t* dst,
++                     ptrdiff_t dst_stride,
++                     const InterpKernel* filter,
++                     int x0_q4,
++                     int x_step_q4,
++                     int y0_q4,
++                     int y_step_q4,
++                     int w,
++                     int h);
++#define vpx_scaled_2d vpx_scaled_2d_c
++
++void vpx_scaled_avg_2d_c(const uint8_t* src,
++                         ptrdiff_t src_stride,
++                         uint8_t* dst,
++                         ptrdiff_t dst_stride,
++                         const InterpKernel* filter,
++                         int x0_q4,
++                         int x_step_q4,
++                         int y0_q4,
++                         int y_step_q4,
++                         int w,
++                         int h);
++#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
++
++void vpx_scaled_avg_horiz_c(const uint8_t* src,
++                            ptrdiff_t src_stride,
++                            uint8_t* dst,
++                            ptrdiff_t dst_stride,
++                            const InterpKernel* filter,
++                            int x0_q4,
++                            int x_step_q4,
++                            int y0_q4,
++                            int y_step_q4,
++                            int w,
++                            int h);
++#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
++
++void vpx_scaled_avg_vert_c(const uint8_t* src,
++                           ptrdiff_t src_stride,
++                           uint8_t* dst,
++                           ptrdiff_t dst_stride,
++                           const InterpKernel* filter,
++                           int x0_q4,
++                           int x_step_q4,
++                           int y0_q4,
++                           int y_step_q4,
++                           int w,
++                           int h);
++#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
++
++void vpx_scaled_horiz_c(const uint8_t* src,
++                        ptrdiff_t src_stride,
++                        uint8_t* dst,
++                        ptrdiff_t dst_stride,
++                        const InterpKernel* filter,
++                        int x0_q4,
++                        int x_step_q4,
++                        int y0_q4,
++                        int y_step_q4,
++                        int w,
++                        int h);
++#define vpx_scaled_horiz vpx_scaled_horiz_c
++
++void vpx_scaled_vert_c(const uint8_t* src,
++                       ptrdiff_t src_stride,
++                       uint8_t* dst,
++                       ptrdiff_t dst_stride,
++                       const InterpKernel* filter,
++                       int x0_q4,
++                       int x_step_q4,
++                       int y0_q4,
++                       int y_step_q4,
++                       int w,
++                       int h);
++#define vpx_scaled_vert vpx_scaled_vert_c
++
++uint32_t vpx_sub_pixel_avg_variance16x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance16x16 vpx_sub_pixel_avg_variance16x16_c
++
++uint32_t vpx_sub_pixel_avg_variance16x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance16x32 vpx_sub_pixel_avg_variance16x32_c
++
++uint32_t vpx_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          int x_offset,
++                                          int y_offset,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          uint32_t* sse,
++                                          const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance16x8 vpx_sub_pixel_avg_variance16x8_c
++
++uint32_t vpx_sub_pixel_avg_variance32x16_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance32x16 vpx_sub_pixel_avg_variance32x16_c
++
++uint32_t vpx_sub_pixel_avg_variance32x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance32x32 vpx_sub_pixel_avg_variance32x32_c
++
++uint32_t vpx_sub_pixel_avg_variance32x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance32x64 vpx_sub_pixel_avg_variance32x64_c
++
++uint32_t vpx_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         int x_offset,
++                                         int y_offset,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         uint32_t* sse,
++                                         const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance4x4 vpx_sub_pixel_avg_variance4x4_c
++
++uint32_t vpx_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         int x_offset,
++                                         int y_offset,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         uint32_t* sse,
++                                         const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance4x8 vpx_sub_pixel_avg_variance4x8_c
++
++uint32_t vpx_sub_pixel_avg_variance64x32_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance64x32 vpx_sub_pixel_avg_variance64x32_c
++
++uint32_t vpx_sub_pixel_avg_variance64x64_c(const uint8_t* src_ptr,
++                                           int src_stride,
++                                           int x_offset,
++                                           int y_offset,
++                                           const uint8_t* ref_ptr,
++                                           int ref_stride,
++                                           uint32_t* sse,
++                                           const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance64x64 vpx_sub_pixel_avg_variance64x64_c
++
++uint32_t vpx_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr,
++                                          int src_stride,
++                                          int x_offset,
++                                          int y_offset,
++                                          const uint8_t* ref_ptr,
++                                          int ref_stride,
++                                          uint32_t* sse,
++                                          const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance8x16 vpx_sub_pixel_avg_variance8x16_c
++
++uint32_t vpx_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         int x_offset,
++                                         int y_offset,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         uint32_t* sse,
++                                         const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance8x4 vpx_sub_pixel_avg_variance8x4_c
++
++uint32_t vpx_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr,
++                                         int src_stride,
++                                         int x_offset,
++                                         int y_offset,
++                                         const uint8_t* ref_ptr,
++                                         int ref_stride,
++                                         uint32_t* sse,
++                                         const uint8_t* second_pred);
++#define vpx_sub_pixel_avg_variance8x8 vpx_sub_pixel_avg_variance8x8_c
++
++uint32_t vpx_sub_pixel_variance16x16_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance16x16 vpx_sub_pixel_variance16x16_c
++
++uint32_t vpx_sub_pixel_variance16x32_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance16x32 vpx_sub_pixel_variance16x32_c
++
++uint32_t vpx_sub_pixel_variance16x8_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      int x_offset,
++                                      int y_offset,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      uint32_t* sse);
++#define vpx_sub_pixel_variance16x8 vpx_sub_pixel_variance16x8_c
++
++uint32_t vpx_sub_pixel_variance32x16_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance32x16 vpx_sub_pixel_variance32x16_c
++
++uint32_t vpx_sub_pixel_variance32x32_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance32x32 vpx_sub_pixel_variance32x32_c
++
++uint32_t vpx_sub_pixel_variance32x64_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance32x64 vpx_sub_pixel_variance32x64_c
++
++uint32_t vpx_sub_pixel_variance4x4_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     int x_offset,
++                                     int y_offset,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     uint32_t* sse);
++#define vpx_sub_pixel_variance4x4 vpx_sub_pixel_variance4x4_c
++
++uint32_t vpx_sub_pixel_variance4x8_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     int x_offset,
++                                     int y_offset,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     uint32_t* sse);
++#define vpx_sub_pixel_variance4x8 vpx_sub_pixel_variance4x8_c
++
++uint32_t vpx_sub_pixel_variance64x32_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance64x32 vpx_sub_pixel_variance64x32_c
++
++uint32_t vpx_sub_pixel_variance64x64_c(const uint8_t* src_ptr,
++                                       int src_stride,
++                                       int x_offset,
++                                       int y_offset,
++                                       const uint8_t* ref_ptr,
++                                       int ref_stride,
++                                       uint32_t* sse);
++#define vpx_sub_pixel_variance64x64 vpx_sub_pixel_variance64x64_c
++
++uint32_t vpx_sub_pixel_variance8x16_c(const uint8_t* src_ptr,
++                                      int src_stride,
++                                      int x_offset,
++                                      int y_offset,
++                                      const uint8_t* ref_ptr,
++                                      int ref_stride,
++                                      uint32_t* sse);
++#define vpx_sub_pixel_variance8x16 vpx_sub_pixel_variance8x16_c
++
++uint32_t vpx_sub_pixel_variance8x4_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     int x_offset,
++                                     int y_offset,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     uint32_t* sse);
++#define vpx_sub_pixel_variance8x4 vpx_sub_pixel_variance8x4_c
++
++uint32_t vpx_sub_pixel_variance8x8_c(const uint8_t* src_ptr,
++                                     int src_stride,
++                                     int x_offset,
++                                     int y_offset,
++                                     const uint8_t* ref_ptr,
++                                     int ref_stride,
++                                     uint32_t* sse);
++#define vpx_sub_pixel_variance8x8 vpx_sub_pixel_variance8x8_c
++
++void vpx_subtract_block_c(int rows,
++                          int cols,
++                          int16_t* diff_ptr,
++                          ptrdiff_t diff_stride,
++                          const uint8_t* src_ptr,
++                          ptrdiff_t src_stride,
++                          const uint8_t* pred_ptr,
++                          ptrdiff_t pred_stride);
++#define vpx_subtract_block vpx_subtract_block_c
++
++uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
++#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
++
++void vpx_tm_predictor_16x16_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c
++
++void vpx_tm_predictor_32x32_c(uint8_t* dst,
++                              ptrdiff_t stride,
++                              const uint8_t* above,
++                              const uint8_t* left);
++#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c
++
++void vpx_tm_predictor_4x4_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c
++
++void vpx_tm_predictor_8x8_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c
++
++void vpx_v_predictor_16x16_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c
++
++void vpx_v_predictor_32x32_c(uint8_t* dst,
++                             ptrdiff_t stride,
++                             const uint8_t* above,
++                             const uint8_t* left);
++#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c
++
++void vpx_v_predictor_4x4_c(uint8_t* dst,
++                           ptrdiff_t stride,
++                           const uint8_t* above,
++                           const uint8_t* left);
++#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c
++
++void vpx_v_predictor_8x8_c(uint8_t* dst,
++                           ptrdiff_t stride,
++                           const uint8_t* above,
++                           const uint8_t* left);
++#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c
++
++unsigned int vpx_variance16x16_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance16x16 vpx_variance16x16_c
++
++unsigned int vpx_variance16x32_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance16x32 vpx_variance16x32_c
++
++unsigned int vpx_variance16x8_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                unsigned int* sse);
++#define vpx_variance16x8 vpx_variance16x8_c
++
++unsigned int vpx_variance32x16_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance32x16 vpx_variance32x16_c
++
++unsigned int vpx_variance32x32_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance32x32 vpx_variance32x32_c
++
++unsigned int vpx_variance32x64_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance32x64 vpx_variance32x64_c
++
++unsigned int vpx_variance4x4_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse);
++#define vpx_variance4x4 vpx_variance4x4_c
++
++unsigned int vpx_variance4x8_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse);
++#define vpx_variance4x8 vpx_variance4x8_c
++
++unsigned int vpx_variance64x32_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance64x32 vpx_variance64x32_c
++
++unsigned int vpx_variance64x64_c(const uint8_t* src_ptr,
++                                 int src_stride,
++                                 const uint8_t* ref_ptr,
++                                 int ref_stride,
++                                 unsigned int* sse);
++#define vpx_variance64x64 vpx_variance64x64_c
++
++unsigned int vpx_variance8x16_c(const uint8_t* src_ptr,
++                                int src_stride,
++                                const uint8_t* ref_ptr,
++                                int ref_stride,
++                                unsigned int* sse);
++#define vpx_variance8x16 vpx_variance8x16_c
++
++unsigned int vpx_variance8x4_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse);
++#define vpx_variance8x4 vpx_variance8x4_c
++
++unsigned int vpx_variance8x8_c(const uint8_t* src_ptr,
++                               int src_stride,
++                               const uint8_t* ref_ptr,
++                               int ref_stride,
++                               unsigned int* sse);
++#define vpx_variance8x8 vpx_variance8x8_c
++
++void vpx_ve_predictor_4x4_c(uint8_t* dst,
++                            ptrdiff_t stride,
++                            const uint8_t* above,
++                            const uint8_t* left);
++#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
++
++int vpx_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
++#define vpx_vector_var vpx_vector_var_c
++
++void vpx_dsp_rtcd(void);
++
++#include "vpx_config.h"
++
++#ifdef RTCD_C
++static void setup_rtcd_internal(void) {}
++#endif
++
++#ifdef __cplusplus
++}  // extern "C"
++#endif
++
++#endif
+diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h
+new file mode 100644
+index 00000000000..c5196db4dc6
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h
+@@ -0,0 +1,96 @@
++// This file is generated. Do not edit.
++#ifndef VPX_SCALE_RTCD_H_
++#define VPX_SCALE_RTCD_H_
++
++#ifdef RTCD_C
++#define RTCD_EXTERN
++#else
++#define RTCD_EXTERN extern
++#endif
++
++struct yv12_buffer_config;
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void vp8_horizontal_line_2_1_scale_c(const unsigned char* source,
++                                     unsigned int source_width,
++                                     unsigned char* dest,
++                                     unsigned int dest_width);
++#define vp8_horizontal_line_2_1_scale vp8_horizontal_line_2_1_scale_c
++
++void vp8_horizontal_line_5_3_scale_c(const unsigned char* source,
++                                     unsigned int source_width,
++                                     unsigned char* dest,
++                                     unsigned int dest_width);
++#define vp8_horizontal_line_5_3_scale vp8_horizontal_line_5_3_scale_c
++
++void vp8_horizontal_line_5_4_scale_c(const unsigned char* source,
++                                     unsigned int source_width,
++                                     unsigned char* dest,
++                                     unsigned int dest_width);
++#define vp8_horizontal_line_5_4_scale vp8_horizontal_line_5_4_scale_c
++
++void vp8_vertical_band_2_1_scale_c(unsigned char* source,
++                                   unsigned int src_pitch,
++                                   unsigned char* dest,
++                                   unsigned int dest_pitch,
++                                   unsigned int dest_width);
++#define vp8_vertical_band_2_1_scale vp8_vertical_band_2_1_scale_c
++
++void vp8_vertical_band_2_1_scale_i_c(unsigned char* source,
++                                     unsigned int src_pitch,
++                                     unsigned char* dest,
++                                     unsigned int dest_pitch,
++                                     unsigned int dest_width);
++#define vp8_vertical_band_2_1_scale_i vp8_vertical_band_2_1_scale_i_c
++
++void vp8_vertical_band_5_3_scale_c(unsigned char* source,
++                                   unsigned int src_pitch,
++                                   unsigned char* dest,
++                                   unsigned int dest_pitch,
++                                   unsigned int dest_width);
++#define vp8_vertical_band_5_3_scale vp8_vertical_band_5_3_scale_c
++
++void vp8_vertical_band_5_4_scale_c(unsigned char* source,
++                                   unsigned int src_pitch,
++                                   unsigned char* dest,
++                                   unsigned int dest_pitch,
++                                   unsigned int dest_width);
++#define vp8_vertical_band_5_4_scale vp8_vertical_band_5_4_scale_c
++
++void vp8_yv12_copy_frame_c(const struct yv12_buffer_config* src_ybc,
++                           struct yv12_buffer_config* dst_ybc);
++#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c
++
++void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config* ybf);
++#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c
++
++void vpx_extend_frame_borders_c(struct yv12_buffer_config* ybf);
++#define vpx_extend_frame_borders vpx_extend_frame_borders_c
++
++void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config* ybf);
++#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c
++
++void vpx_yv12_copy_frame_c(const struct yv12_buffer_config* src_ybc,
++                           struct yv12_buffer_config* dst_ybc);
++#define vpx_yv12_copy_frame vpx_yv12_copy_frame_c
++
++void vpx_yv12_copy_y_c(const struct yv12_buffer_config* src_ybc,
++                       struct yv12_buffer_config* dst_ybc);
++#define vpx_yv12_copy_y vpx_yv12_copy_y_c
++
++void vpx_scale_rtcd(void);
++
++#include "vpx_config.h"
++
++#ifdef RTCD_C
++static void setup_rtcd_internal(void) {}
++#endif
++
++#ifdef __cplusplus
++}  // extern "C"
++#endif
++
++#endif
+diff --git a/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h b/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h
+index d2baee9d243..fed8f2ed2c2 100644
+--- a/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h
++++ b/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h
+@@ -88,7 +88,7 @@
+  */
+ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) ||   \
+      defined(__mips__) || defined(__PPC__) || defined(__ARM_EABI__) || \
+-     defined(__aarch64__) || defined(__s390__)) \
++     defined(__aarch64__) || defined(__s390__) || defined(__loongarch64)) \
+   && (defined(__linux) || defined(__ANDROID__))
+ 
+ #ifndef SYS_CPLUSPLUS
+@@ -299,7 +299,7 @@ struct kernel_old_sigaction {
+ } __attribute__((packed,aligned(4)));
+ #elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+   #define kernel_old_sigaction kernel_sigaction
+-#elif defined(__aarch64__)
++#elif defined(__aarch64__) || defined(__loongarch64)
+   // No kernel_old_sigaction defined for arm64.
+ #endif
+ 
+@@ -312,7 +312,7 @@ struct kernel_old_sigaction {
+  * actual number of signals is obviously the same, but the constants
+  * differ by one.
+  */
+-#ifdef __mips__
++#if defined(__mips__) || defined(__loongarch64)
+ #define KERNEL_NSIG 128
+ #else
+ #define KERNEL_NSIG  64
+@@ -517,7 +517,7 @@ struct kernel_stat {
+   int                st_blocks;
+   int                st_pad4[14];
+ };
+-#elif defined(__aarch64__)
++#elif defined(__aarch64__) || defined(__loongarch64)
+ struct kernel_stat {
+   unsigned long      st_dev;
+   unsigned long      st_ino;
+@@ -604,7 +604,7 @@ struct kernel_statfs64 {
+   unsigned long      f_spare[6];
+ };
+ #endif
+-#elif defined(__s390__)
++#elif defined(__s390__) || defined(__loongarch64)
+ /* See also arch/s390/include/asm/compat.h                                   */
+ struct kernel_statfs64 {
+   unsigned int       f_type;
+@@ -668,7 +668,7 @@ struct kernel_statfs {
+   uint64_t           f_frsize;
+   uint64_t           f_spare[5];
+ };
+-#elif defined(__s390__)
++#elif defined(__s390__) || defined(__loongarch64)
+ struct kernel_statfs {
+   unsigned int       f_type;
+   unsigned int       f_bsize;
+@@ -1057,7 +1057,7 @@ struct kernel_statfs {
+ #define __NR_getcpu             (__NR_SYSCALL_BASE + 345)
+ #endif
+ /* End of ARM 3/EABI definitions                                             */
+-#elif defined(__aarch64__)
++#elif defined(__aarch64__) || defined(__loongarch64)
+ #ifndef __NR_setxattr
+ #define __NR_setxattr             5
+ #endif
+@@ -1860,7 +1860,8 @@ struct kernel_statfs {
+ 
+   #undef  LSS_RETURN
+   #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \
+-       || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__))
++       || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__) \
++       || defined(__loongarch64))
+   /* Failing system calls return a negative result in the range of
+    * -1..-4095. These are "errno" values with the sign inverted.
+    */
+@@ -1960,7 +1961,7 @@ struct kernel_statfs {
+                            LSS_ENTRYPOINT                                     \
+                            "pop %%ebx"                                        \
+                            args                                               \
+-                           : "esp", "memory");                                \
++                           : "memory");                                       \
+       LSS_RETURN(type,__res)
+     #undef  _syscall0
+     #define _syscall0(type,name)                                              \
+@@ -2017,7 +2018,7 @@ struct kernel_statfs {
+                              : "i" (__NR_##name), "ri" ((long)(arg1)),        \
+                                "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                                "S" ((long)(arg4)), "D" ((long)(arg5))         \
+-                             : "esp", "memory");                              \
++                             : "memory");                                     \
+         LSS_RETURN(type,__res);                                               \
+       }
+     #undef  _syscall6
+@@ -2039,7 +2040,7 @@ struct kernel_statfs {
+                              : "i" (__NR_##name),  "0" ((long)(&__s)),        \
+                                "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                                "S" ((long)(arg4)), "D" ((long)(arg5))         \
+-                             : "esp", "memory");                              \
++                             : "memory");                                     \
+         LSS_RETURN(type,__res);                                               \
+       }
+     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+@@ -2125,7 +2126,7 @@ struct kernel_statfs {
+                            : "0"(-EINVAL), "i"(__NR_clone),
+                              "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
+                              "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
+-                           : "esp", "memory", "ecx", "edx", "esi", "edi");
++                           : "memory", "ecx", "edx", "esi", "edi");
+       LSS_RETURN(int, __res);
+     }
+ 
+@@ -2813,6 +2814,126 @@ struct kernel_statfs {
+       }
+       LSS_RETURN(int, __res);
+     }
++  #elif defined(__loongarch64)
++    /* Most definitions of _syscallX() neglect to mark "memory" as being
++     * clobbered. This causes problems with compilers, that do a better job
++     * at optimizing across __asm__ calls.
++     * So, we just have to redefine all of the _syscallX() macros.
++     */
++    #undef LSS_REG
++    #define LSS_REG(ar,a) register int64_t __a##ar __asm__("a"#ar) = (int64_t)a
++    #undef  LSS_BODY
++    #define LSS_BODY(type,name,args...)                                       \
++          register int64_t __res_a0 __asm__("a0");                            \
++          int64_t __res;                                                      \
++          __asm__ __volatile__ ("li $a7, %1\n"                                \
++                                "syscall 0x0\n"                               \
++                                : "=r"(__res_a0)                              \
++                                : "i"(__NR_##name) , ## args                  \
++                                : "$a7", "memory");                            \
++          __res = __res_a0;                                                   \
++          LSS_RETURN(type, __res)
++    #undef _syscall0
++    #define _syscall0(type, name)                                             \
++      type LSS_NAME(name)(void) {                                             \
++        LSS_BODY(type, name);                                                 \
++      }
++    #undef _syscall1
++    #define _syscall1(type, name, type1, arg1)                                \
++      type LSS_NAME(name)(type1 arg1) {                                       \
++        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__a0));                    \
++      }
++    #undef _syscall2
++    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
++      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
++        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
++        LSS_BODY(type, name, "r"(__a0), "r"(__a1));                           \
++      }
++    #undef _syscall3
++    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
++      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
++        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
++        LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2));                \
++      }
++    #undef _syscall4
++    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
++      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
++        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
++        LSS_REG(3, arg4);                                                     \
++        LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3));     \
++      }
++    #undef _syscall5
++    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
++                      type5,arg5)                                             \
++      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
++                          type5 arg5) {                                       \
++        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
++        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
++        LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3),      \
++                             "r"(__a4));                                      \
++      }
++    #undef _syscall6
++    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
++                      type5,arg5,type6,arg6)                                  \
++      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
++                          type5 arg5, type6 arg6) {                           \
++        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
++        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
++        LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3),      \
++                             "r"(__a4), "r"(__a5));                           \
++      }
++
++    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
++                                   int flags, void *arg, int *parent_tidptr,
++                                   void *newtls, int *child_tidptr) {
++      int64_t __res;
++      {
++        register uint64_t __flags __asm__("a0") = flags;
++        register void *__stack __asm__("a1") = child_stack;
++        register void *__ptid  __asm__("a2") = parent_tidptr;
++        register void *__tls   __asm__("a3") = newtls;
++        register int  *__ctid  __asm__("a4") = child_tidptr;
++        __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be
++                              * used by the child.
++                              */
++                             "sub.d   $%2, 16\n"
++                             "st.d    %1, %2, 8\n"
++                             "st.d    %4, %2, 0\n"
++
++                             /* %a0 = syscall(%a0 = flags,
++                              *               %a1 = child_stack,
++                              *               %a2 = parent_tidptr,
++                              *               %a3 = newtls,
++                              *               %a4 = child_tidptr)
++                              */
++                             "li     a7, %8\n"
++                             "syscall    0x0\n"
++
++                             /* if (%a0 != 0)
++                              *   return %a0;
++                              */
++                             "bnz    a0, 1f\n"
++
++                             /* In the child, now. Call "fn(arg)".
++                              */
++                             "ld.d    a0, $sp, 0\n"
++                             "ld.d    a1, $sp, 8\n"
++                             "add.d   $sp, 16\n"
++                             "bl      $a1\n"
++
++                             /* Call _exit(%a0).
++                              */
++                             "li     $a7, %9\n"
++                             "syscall    0x0\n"
++                           "1:\n"
++                             : "=r" (__res)
++                             : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
++                               "r"(__ptid), "r"(__tls), "r"(__ctid),
++                               "i"(__NR_clone), "i"(__NR_exit)
++                             : "cc", "a7", "memory");
++      }
++      LSS_RETURN(int, __res);
++    }
+   #elif defined(__mips__)
+     #undef LSS_REG
+     #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
+@@ -3396,9 +3517,10 @@ struct kernel_statfs {
+     LSS_INLINE _syscall2(int, ftruncate,           int,         f,
+                          off_t,          l)
+   #endif
+-  LSS_INLINE _syscall4(int,     futex,           int*,        a,
+-                       int,            o, int,    v,
+-                      struct kernel_timespec*, t)
++  LSS_INLINE _syscall6(int,     futex,          int*,        u,
++                       int,     o,              int,         v,
++                       struct kernel_timespec*, t,
++                       int*,    u2,             int,         v2)
+   LSS_INLINE _syscall3(int,     getdents,        int,         f,
+                        struct kernel_dirent*, d, int,    c)
+   LSS_INLINE _syscall3(int,     getdents64,      int,         f,
+@@ -4156,7 +4278,7 @@ struct kernel_statfs {
+       LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
+     }
+   #endif
+-  #if defined(__ARM_EABI__) || defined (__aarch64__)
++  #if defined(__ARM_EABI__) || defined (__aarch64__) || defined (__loongarch64)
+     LSS_INLINE _syscall3(ssize_t, recvmsg, int, s, struct kernel_msghdr*, msg,
+                          int, flags)
+     LSS_INLINE _syscall3(ssize_t, sendmsg, int, s, const struct kernel_msghdr*,
+@@ -4478,7 +4600,7 @@ struct kernel_statfs {
+ // TODO: define this in an arch-independant way instead of inlining the clone
+ //       syscall body.
+ 
+-# if defined(__aarch64__)
++# if defined(__aarch64__) || defined(__loongarch64)
+   LSS_INLINE pid_t LSS_NAME(fork)(void) {
+     // No fork syscall on aarch64 - implement by means of the clone syscall.
+     // Note that this does not reset glibc's cached view of the PID/TID, so
+@@ -4494,8 +4616,8 @@ struct kernel_statfs {
+     LSS_REG(2, parent_tidptr);
+     LSS_REG(3, newtls);
+     LSS_REG(4, child_tidptr);
+-    LSS_BODY(pid_t, clone, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),
+-             "r"(__r4));
++    LSS_BODY(pid_t, clone, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3),
++             "r"(__a4));
+   }
+ # elif defined(__x86_64__)
+   LSS_INLINE pid_t LSS_NAME(fork)(void) {
+diff --git a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h
+index 567e3a3b91c..2208d4f207a 100644
+--- a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h
++++ b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h
+@@ -13,7 +13,7 @@ namespace pdfium {
+ namespace base {
+ #if defined(OS_WIN) || defined(ARCH_CPU_PPC64)
+ static constexpr size_t kPageAllocationGranularityShift = 16;  // 64KB
+-#elif defined(_MIPS_ARCH_LOONGSON)
++#elif defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__)
+ static constexpr size_t kPageAllocationGranularityShift = 14;  // 16KB
+ #else
+ static constexpr size_t kPageAllocationGranularityShift = 12;  // 4KB
+@@ -25,7 +25,7 @@ static constexpr size_t kPageAllocationGranularityOffsetMask =
+ static constexpr size_t kPageAllocationGranularityBaseMask =
+     ~kPageAllocationGranularityOffsetMask;
+ 
+-#if defined(_MIPS_ARCH_LOONGSON)
++#if defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__)
+ static constexpr size_t kSystemPageSize = 16384;
+ #elif defined(ARCH_CPU_PPC64)
+ // Modern ppc64 systems support 4KB and 64KB page sizes.
+diff --git a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h
+index 8ebc4f5bac7..600dbdb5134 100644
+--- a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h
++++ b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h
+@@ -35,7 +35,7 @@ static const size_t kBucketShift = (kAllocationGranularity == 8) ? 3 : 2;
+ // other constant values, we pack _all_ `PartitionRootGeneric::Alloc` sizes
+ // perfectly up against the end of a system page.
+ 
+-#if defined(_MIPS_ARCH_LOONGSON)
++#if defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__)
+ static const size_t kPartitionPageShift = 16;  // 64 KiB
+ #elif defined(ARCH_CPU_PPC64)
+ static const size_t kPartitionPageShift = 18;  // 256 KiB
+diff --git a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn
+index 1154dba4288..403d53aaae5 100644
+--- a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn
++++ b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn
+@@ -19,7 +19,7 @@ declare_args() {
+   # PPC64.
+   use_swiftshader_with_subzero =
+       current_cpu != "arm64" && current_cpu != "mips64el" && current_cpu != "ppc64"
+-  supports_llvm = is_linux || is_fuchsia || is_win || is_android || is_mac
++  supports_llvm = (is_linux || is_fuchsia || is_win || is_android || is_mac) && current_cpu != "la64"
+ }
+ 
+ config("swiftshader_reactor_private_config") {
+diff --git a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp
+index 522b5668772..f34d37b73d5 100644
+--- a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp
++++ b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp
+@@ -324,6 +324,8 @@ private:
+ 		return false;
+ #elif defined(__mips__)
+ 		return false;
++#elif defined(__loongarch__)
++		return false;
+ #else
+ #	error "Unknown architecture"
+ #endif
+@@ -665,6 +667,7 @@ std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<con
+ 	ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
+ #elif defined(__mips__)
+ 	ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
++#elif defined(__loongarch__)
+ #else
+ #	error "Unsupported platform"
+ #endif
+diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S
+new file mode 100644
+index 00000000000..a41e0be09e3
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S
+@@ -0,0 +1,86 @@
++// Copyright 2020 The Marl Authors.
++//
++// Licensed under the Apache License, Version 2.0 (the "License");
++// you may not use this file except in compliance with the License.
++// You may obtain a copy of the License at
++//
++//     https://www.apache.org/licenses/LICENSE-2.0
++//
++// Unless required by applicable law or agreed to in writing, software
++// distributed under the License is distributed on an "AS IS" BASIS,
++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++// See the License for the specific language governing permissions and
++// limitations under the License.
++
++#if defined(__loongarch__)
++
++#define MARL_BUILD_ASM 1
++#include "osfiber_asm_la64.h"
++
++// void marl_fiber_swap(marl_fiber_context* from, const marl_fiber_context* to)
++// a0: from
++// v0: to
++.text
++.global MARL_ASM_SYMBOL(marl_fiber_swap)
++.align 4
++MARL_ASM_SYMBOL(marl_fiber_swap):
++
++    // Save context 'from'
++
++    // Store callee-preserved registers
++    sd  $s0, MARL_REG_s0($a0)
++    sd  $s1, MARL_REG_s1($a0)
++    sd  $s2, MARL_REG_s2($a0)
++    sd  $s3, MARL_REG_s3($a0)
++    sd  $s4, MARL_REG_s4($a0)
++    sd  $s5, MARL_REG_s5($a0)
++    sd  $s6, MARL_REG_s6($a0)
++    sd  $s7, MARL_REG_s7($a0)
++
++    s.d  $f24, MARL_REG_f24($a0)
++    s.d  $f25, MARL_REG_f25($a0)
++    s.d  $f26, MARL_REG_f26($a0)
++    s.d  $f27, MARL_REG_f27($a0)
++    s.d  $f28, MARL_REG_f28($a0)
++    s.d  $f29, MARL_REG_f29($a0)
++    s.d  $f31, MARL_REG_f30($a0)
++    s.d  $f31, MARL_REG_f31($a0)
++
++    sd  $gp, MARL_REG_gp($a0)
++    sd  $sp, MARL_REG_sp($a0)
++    sd  $fp, MARL_REG_fp($a0)
++    sd  $ra, MARL_REG_ra($a0)
++
++    move  $v0, $a1 // Function have no return, so safe to touch v0
++
++    // Recover callee-preserved registers
++    ld  $s0, MARL_REG_s0($v0)
++    ld  $s1, MARL_REG_s1($v0)
++    ld  $s2, MARL_REG_s2($v0)
++    ld  $s3, MARL_REG_s3($v0)
++    ld  $s4, MARL_REG_s4($v0)
++    ld  $s5, MARL_REG_s5($v0)
++    ld  $s6, MARL_REG_s6($v0)
++    ld  $s7, MARL_REG_s7($v0)
++
++    l.d  $f24, MARL_REG_f24($v0)
++    l.d  $f25, MARL_REG_f25($v0)
++    l.d  $f26, MARL_REG_f26($v0)
++    l.d  $f27, MARL_REG_f27($v0)
++    l.d  $f28, MARL_REG_f28($v0)
++    l.d  $f29, MARL_REG_f29($v0)
++    l.d  $f31, MARL_REG_f30($v0)
++    l.d  $f31, MARL_REG_f31($v0)
++
++    ld  $gp, MARL_REG_gp($v0)
++    ld  $sp, MARL_REG_sp($v0)
++    ld  $fp, MARL_REG_fp($v0)
++    ld  $ra, MARL_REG_ra($v0)
++
++    // Recover arguments
++    ld  $a0, MARL_REG_a0($v0)
++    ld  $a1, MARL_REG_a1($v0)
++
++    jr	$ra
++
++#endif // defined(__loongarch__)
+diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h
+new file mode 100644
+index 00000000000..e444e1c78bf
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h
+@@ -0,0 +1,126 @@
++// Copyright 2020 The Marl Authors.
++//
++// Licensed under the Apache License, Version 2.0 (the "License");
++// you may not use this file except in compliance with the License.
++// You may obtain a copy of the License at
++//
++//     https://www.apache.org/licenses/LICENSE-2.0
++//
++// Unless required by applicable law or agreed to in writing, software
++// distributed under the License is distributed on an "AS IS" BASIS,
++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++// See the License for the specific language governing permissions and
++// limitations under the License.
++
++#define MARL_REG_a0 0x00
++#define MARL_REG_a1 0x08
++#define MARL_REG_s0 0x10
++#define MARL_REG_s1 0x18
++#define MARL_REG_s2 0x20
++#define MARL_REG_s3 0x28
++#define MARL_REG_s4 0x30
++#define MARL_REG_s5 0x38
++#define MARL_REG_s6 0x40
++#define MARL_REG_s7 0x48
++#define MARL_REG_f24 0x50
++#define MARL_REG_f25 0x58
++#define MARL_REG_f26 0x60
++#define MARL_REG_f27 0x68
++#define MARL_REG_f28 0x70
++#define MARL_REG_f29 0x78
++#define MARL_REG_f30 0x80
++#define MARL_REG_f31 0x88
++#define MARL_REG_gp 0x90
++#define MARL_REG_sp 0x98
++#define MARL_REG_fp 0xa0
++#define MARL_REG_ra 0xa8
++
++#if defined(__APPLE__)
++#define MARL_ASM_SYMBOL(x) _##x
++#else
++#define MARL_ASM_SYMBOL(x) x
++#endif
++
++#ifndef MARL_BUILD_ASM
++
++#include <stdint.h>
++
++struct marl_fiber_context {
++  // parameter registers (First two)
++  uintptr_t a0;
++  uintptr_t a1;
++
++  // callee-saved registers
++  uintptr_t s0;
++  uintptr_t s1;
++  uintptr_t s2;
++  uintptr_t s3;
++  uintptr_t s4;
++  uintptr_t s5;
++  uintptr_t s6;
++  uintptr_t s7;
++
++  uintptr_t f24;
++  uintptr_t f25;
++  uintptr_t f26;
++  uintptr_t f27;
++  uintptr_t f28;
++  uintptr_t f29;
++  uintptr_t f30;
++  uintptr_t f31;
++
++  uintptr_t gp;
++  uintptr_t sp;
++  uintptr_t fp;
++  uintptr_t ra;
++};
++
++#ifdef __cplusplus
++#include <cstddef>
++static_assert(offsetof(marl_fiber_context, a0) == MARL_REG_a0,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, a1) == MARL_REG_a1,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s0) == MARL_REG_s0,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s1) == MARL_REG_s1,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s2) == MARL_REG_s2,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s3) == MARL_REG_s3,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s4) == MARL_REG_s4,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s5) == MARL_REG_s5,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s6) == MARL_REG_s6,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, s7) == MARL_REG_s7,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f24) == MARL_REG_f24,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f25) == MARL_REG_f25,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f26) == MARL_REG_f26,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f27) == MARL_REG_f27,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f28) == MARL_REG_f28,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f29) == MARL_REG_f29,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f30) == MARL_REG_f30,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, f31) == MARL_REG_f31,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, gp) == MARL_REG_gp,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, sp) == MARL_REG_sp,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, fp) == MARL_REG_fp,
++              "Bad register offset");
++static_assert(offsetof(marl_fiber_context, ra) == MARL_REG_ra,
++              "Bad register offset");
++#endif  // __cplusplus
++
++#endif  // MARL_BUILD_ASM
+diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c
+new file mode 100644
+index 00000000000..4838f9f0964
+--- /dev/null
++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c
+@@ -0,0 +1,35 @@
++// Copyright 2020 The Marl Authors.
++//
++// Licensed under the Apache License, Version 2.0 (the "License");
++// you may not use this file except in compliance with the License.
++// You may obtain a copy of the License at
++//
++//     https://www.apache.org/licenses/LICENSE-2.0
++//
++// Unless required by applicable law or agreed to in writing, software
++// distributed under the License is distributed on an "AS IS" BASIS,
++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++// See the License for the specific language governing permissions and
++// limitations under the License.
++
++#if defined(__loongarch__)
++
++#include "osfiber_asm_la64.h"
++
++void marl_fiber_trampoline(void (*target)(void*), void* arg) {
++  target(arg);
++}
++
++void marl_fiber_set_target(struct marl_fiber_context* ctx,
++                           void* stack,
++                           uint32_t stack_size,
++                           void (*target)(void*),
++                           void* arg) {
++  uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
++  ctx->ra = (uintptr_t)&marl_fiber_trampoline;
++  ctx->a0 = (uintptr_t)target;
++  ctx->a1 = (uintptr_t)arg;
++  ctx->sp = ((uintptr_t)stack_top) & ~(uintptr_t)15;
++}
++
++#endif // defined(__loongarch__)
+diff --git a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h
+index 3bf59f4e5f7..8e723831d63 100644
+--- a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h
++++ b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h
+@@ -379,6 +379,8 @@ class AssignAttributeStartEnd {
+     // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
+ # elif (defined(__mips__))
+ #   define CACHELINE_ALIGNED __attribute__((aligned(128)))
++# elif (defined(__loongarch__))
++#   define CACHELINE_ALIGNED __attribute__((aligned(128)))  
+ # elif (defined(__aarch64__))
+ #   define CACHELINE_ALIGNED __attribute__((aligned(64)))
+     // implementation specific, Cortex-A53 and 57 should have 64 bytes
+diff --git a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h
+index ece8477b972..c4a4ab715d8 100644
+--- a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h
++++ b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h
+@@ -41,6 +41,7 @@
+ #define FUTEX_WAIT 0
+ #define FUTEX_WAKE 1
+ #define FUTEX_PRIVATE_FLAG 128
++#define __NR_futex 98
+ 
+ // Note: Instead of making direct system calls that are inlined, we rely
+ //       on the syscall() function in glibc to do the right thing. This
+diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc b/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc
+index dd9ab457e0f..ce0d26c953f 100644
+--- a/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc
++++ b/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc
+@@ -29,7 +29,7 @@ bool VectorDifference_C(const uint8_t* image1, const uint8_t* image2) {
+ bool VectorDifference(const uint8_t* image1, const uint8_t* image2) {
+   static bool (*diff_proc)(const uint8_t*, const uint8_t*) = nullptr;
+ 
+-  if (!diff_proc) {
++/*  if (!diff_proc) {
+ #if defined(WEBRTC_ARCH_ARM_FAMILY) || defined(WEBRTC_ARCH_MIPS_FAMILY)
+     // For ARM and MIPS processors, always use C version.
+     // TODO(hclam): Implement a NEON version.
+@@ -47,7 +47,8 @@ bool VectorDifference(const uint8_t* image1, const uint8_t* image2) {
+ #endif
+   }
+ 
+-  return diff_proc(image1, image2);
++  return diff_proc(image1, image2);*/
++  return false;
+ }
+ 
+ bool BlockDifference(const uint8_t* image1,
+diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc
+index 1a3df403aec..6bbede54d5e 100644
+--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc
++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc
+@@ -26,6 +26,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+                          unsigned int d_w,
+                          unsigned int d_h,
+                          unsigned int align) const override {
++#if defined(__loongarch__)
++	  return NULL;
++#endif
+     return ::vpx_img_alloc(img, fmt, d_w, d_h, align);
+   }
+ 
+@@ -35,20 +38,34 @@ class LibvpxVp8Facade : public LibvpxInterface {
+                         unsigned int d_h,
+                         unsigned int stride_align,
+                         unsigned char* img_data) const override {
++#if defined(__loongarch__)
++	  return NULL;
++#endif
+     return ::vpx_img_wrap(img, fmt, d_w, d_h, stride_align, img_data);
+   }
+ 
+-  void img_free(vpx_image_t* img) const override { ::vpx_img_free(img); }
++  void img_free(vpx_image_t* img) const override {
++#if defined(__loongarch__)
++	  return ;
++#endif
++	  ::vpx_img_free(img);
++  }
+ 
+   vpx_codec_err_t codec_enc_config_set(
+       vpx_codec_ctx_t* ctx,
+       const vpx_codec_enc_cfg_t* cfg) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_enc_config_set(ctx, cfg);
+   }
+ 
+   vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface,
+                                            vpx_codec_enc_cfg_t* cfg,
+                                            unsigned int usage) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_enc_config_default(iface, cfg, usage);
+   }
+ 
+@@ -56,6 +73,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+                                  vpx_codec_iface_t* iface,
+                                  const vpx_codec_enc_cfg_t* cfg,
+                                  vpx_codec_flags_t flags) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_enc_init(ctx, iface, cfg, flags);
+   }
+ 
+@@ -65,10 +85,16 @@ class LibvpxVp8Facade : public LibvpxInterface {
+                                        int num_enc,
+                                        vpx_codec_flags_t flags,
+                                        vpx_rational_t* dsf) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf);
+   }
+ 
+   vpx_codec_err_t codec_destroy(vpx_codec_ctx_t* ctx) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_destroy(ctx);
+   }
+ 
+@@ -78,6 +104,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 uint32_t param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     // We need an explicit call for each type since vpx_codec_control is a
+     // macro that gets expanded into another call based on the parameter name.
+     switch (ctrl_id) {
+@@ -113,6 +142,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 int param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     switch (ctrl_id) {
+       case VP8E_SET_FRAME_FLAGS:
+         return vpx_codec_control(ctx, VP8E_SET_FRAME_FLAGS, param);
+@@ -134,6 +166,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 int* param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     switch (ctrl_id) {
+       case VP8E_GET_LAST_QUANTIZER:
+         return vpx_codec_control(ctx, VP8E_GET_LAST_QUANTIZER, param);
+@@ -148,6 +183,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 vpx_roi_map* param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     switch (ctrl_id) {
+       case VP8E_SET_ROI_MAP:
+         return vpx_codec_control(ctx, VP8E_SET_ROI_MAP, param);
+@@ -160,6 +198,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 vpx_active_map* param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     switch (ctrl_id) {
+       case VP8E_SET_ACTIVEMAP:
+         return vpx_codec_control(ctx, VP8E_SET_ACTIVEMAP, param);
+@@ -172,6 +213,9 @@ class LibvpxVp8Facade : public LibvpxInterface {
+   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
+                                 vp8e_enc_control_id ctrl_id,
+                                 vpx_scaling_mode* param) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     switch (ctrl_id) {
+       case VP8E_SET_SCALEMODE:
+         return vpx_codec_control(ctx, VP8E_SET_SCALEMODE, param);
+@@ -187,12 +231,18 @@ class LibvpxVp8Facade : public LibvpxInterface {
+                                uint64_t duration,
+                                vpx_enc_frame_flags_t flags,
+                                uint64_t deadline) const override {
++#if defined(__loongarch__)
++    return VPX_CODEC_ERROR;
++#endif
+     return ::vpx_codec_encode(ctx, img, pts, duration, flags, deadline);
+   }
+ 
+   const vpx_codec_cx_pkt_t* codec_get_cx_data(
+       vpx_codec_ctx_t* ctx,
+       vpx_codec_iter_t* iter) const override {
++#if defined(__loongarch__)
++    return NULL;
++#endif
+     return ::vpx_codec_get_cx_data(ctx, iter);
+   }
+ };
+diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
+index a3ee2c0c41d..782c5653b07 100644
+--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
+@@ -118,6 +118,9 @@ LibvpxVp8Decoder::~LibvpxVp8Decoder() {
+ }
+ 
+ int LibvpxVp8Decoder::InitDecode(const VideoCodec* inst, int number_of_cores) {
++#if defined(__loongarch__)
++  return WEBRTC_VIDEO_CODEC_OK;
++#endif
+   int ret_val = Release();
+   if (ret_val < 0) {
+     return ret_val;
+@@ -160,6 +163,9 @@ int LibvpxVp8Decoder::InitDecode(const VideoCodec* inst, int number_of_cores) {
+ int LibvpxVp8Decoder::Decode(const EncodedImage& input_image,
+                              bool missing_frames,
+                              int64_t /*render_time_ms*/) {
++#if defined(__loongarch__)
++  return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
++#endif
+   if (!inited_) {
+     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+   }
+@@ -342,7 +348,9 @@ int LibvpxVp8Decoder::RegisterDecodeCompleteCallback(
+ 
+ int LibvpxVp8Decoder::Release() {
+   int ret_val = WEBRTC_VIDEO_CODEC_OK;
+-
++#if defined(__loongarch__)
++  return ret_val;
++#endif
+   if (decoder_ != NULL) {
+     if (inited_) {
+       if (vpx_codec_destroy(decoder_)) {
+diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+index 7694dae6ece..0933cfa2d7c 100644
+--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+@@ -452,6 +452,9 @@ void LibvpxVp8Encoder::SetFecControllerOverride(
+ // TODO(eladalon): s/inst/codec_settings/g.
+ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst,
+                                  const VideoEncoder::Settings& settings) {
++#if defined(__loongarch__)
++  return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
++#endif
+   if (inst == NULL) {
+     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
+   }
+@@ -793,6 +796,9 @@ int LibvpxVp8Encoder::NumberOfThreads(int width, int height, int cpus) {
+ }
+ 
+ int LibvpxVp8Encoder::InitAndSetControlSettings() {
++#if defined(__loongarch__)
++  return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
++#endif
+   vpx_codec_flags_t flags = 0;
+   flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
+ 
+@@ -943,6 +949,9 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame,
+                              const std::vector<VideoFrameType>* frame_types) {
+   RTC_DCHECK_EQ(frame.width(), codec_.width);
+   RTC_DCHECK_EQ(frame.height(), codec_.height);
++#if defined(__loongarch__)
++  return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
++#endif
+ 
+   if (!inited_)
+     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
+diff --git a/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h b/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h
+index ed216e660fd..01cb78c186e 100644
+--- a/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h
++++ b/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h
+@@ -38,6 +38,8 @@
+ #define WEBRTC_ARCH_LITTLE_ENDIAN
+ #elif defined(__MIPSEL__)
+ #define WEBRTC_ARCH_MIPS_FAMILY
++#elif defined(__loongarch__)
++#define WEBRTC_ARCH_LOONGARCH_FAMILY
+ #if defined(__LP64__)
+ #define WEBRTC_ARCH_64_BITS
+ #else
+diff --git a/src/3rdparty/chromium/ui/base/x/BUILD.gn b/src/3rdparty/chromium/ui/base/x/BUILD.gn
+index 782f009ddcc..6941d79df85 100644
+--- a/src/3rdparty/chromium/ui/base/x/BUILD.gn
++++ b/src/3rdparty/chromium/ui/base/x/BUILD.gn
+@@ -57,6 +57,8 @@ component("x") {
+     "//build/config/linux:xrandr",
+   ]
+ 
++  cflags = ["-fpermissive", ]
++
+   defines = [ "IS_UI_BASE_X_IMPL" ]
+ 
+   deps = [
+diff --git a/src/3rdparty/chromium/ui/gl/BUILD.gn b/src/3rdparty/chromium/ui/gl/BUILD.gn
+index adc9c08b0be..e3fbeb64b4b 100644
+--- a/src/3rdparty/chromium/ui/gl/BUILD.gn
++++ b/src/3rdparty/chromium/ui/gl/BUILD.gn
+@@ -33,6 +33,7 @@ buildflag_header("buildflags") {
+ 
+ config("gl_config") {
+   defines = []
++  cflags = []
+   if (use_glx) {
+     defines += [
+       "GL_GLEXT_PROTOTYPES",
+@@ -42,6 +43,8 @@ config("gl_config") {
+   if (use_egl) {
+     defines += [ "USE_EGL" ]
+   }
++
++  cflags += [ "-fpermissive", ]
+ }
+ 
+ jumbo_component("gl") {
+diff --git a/src/3rdparty/chromium/ui/views/layout/layout_types.h b/src/3rdparty/chromium/ui/views/layout/layout_types.h
+index b349e63825f..35142a56277 100644
+--- a/src/3rdparty/chromium/ui/views/layout/layout_types.h
++++ b/src/3rdparty/chromium/ui/views/layout/layout_types.h
+@@ -46,10 +46,10 @@ class VIEWS_EXPORT SizeBounds {
+   ~SizeBounds() = default;
+ 
+   constexpr const base::Optional<int>& width() const { return width_; }
+-  void set_width(base::Optional<int> width) { width_ = std::move(width); }
++  void set_width(base::Optional<int> width) { width_ = width; }
+ 
+   constexpr const base::Optional<int>& height() const { return height_; }
+-  void set_height(base::Optional<int> height) { height_ = std::move(height); }
++  void set_height(base::Optional<int> height) { height_ = height; }
+ 
+   constexpr bool is_fully_bounded() const { return width_ && height_; }
+ 
+diff --git a/src/3rdparty/chromium/v8/BUILD.gn b/src/3rdparty/chromium/v8/BUILD.gn
+index ab20142de9a..f7763f54c15 100644
+--- a/src/3rdparty/chromium/v8/BUILD.gn
++++ b/src/3rdparty/chromium/v8/BUILD.gn
+@@ -670,6 +670,16 @@ config("toolchain") {
+       cflags += [ "-march=z196" ]
+     }
+   }
++
++  # la64 simulators.
++  if (target_is_simulator && v8_current_cpu == "la64") {
++    defines += [ "_LA64_TARGET_SIMULATOR" ]
++  }
++
++  if (v8_current_cpu == "la64") {
++    defines += [ "V8_TARGET_ARCH_LA64" ]
++  }
++
+   if (v8_current_cpu == "ppc" || v8_current_cpu == "ppc64") {
+     if (v8_current_cpu == "ppc") {
+       defines += [ "V8_TARGET_ARCH_PPC" ]
+@@ -1695,6 +1705,11 @@ v8_source_set("v8_initializers") {
+       ### gcmole(arch:mips64el) ###
+       "src/builtins/mips64/builtins-mips64.cc",
+     ]
++  } else if (v8_current_cpu == "la64") {
++    sources += [
++      ### gcmole(arch:la64) ###
++      "src/builtins/la64/builtins-la64.cc",
++    ]
+   } else if (v8_current_cpu == "ppc") {
+     sources += [
+       ### gcmole(arch:ppc) ###
+@@ -3362,6 +3377,33 @@ v8_source_set("v8_base_without_compiler") {
+       "src/regexp/mips64/regexp-macro-assembler-mips64.h",
+       "src/wasm/baseline/mips64/liftoff-assembler-mips64.h",
+     ]
++  } else if (v8_current_cpu == "la64") {
++    sources += [  ### gcmole(arch:la64) ###
++      "src/codegen/la64/assembler-la64-inl.h",
++      "src/codegen/la64/assembler-la64.cc",
++      "src/codegen/la64/assembler-la64.h",
++      "src/codegen/la64/constants-la64.cc",
++      "src/codegen/la64/constants-la64.h",
++      "src/codegen/la64/cpu-la64.cc",
++      "src/codegen/la64/interface-descriptors-la64.cc",
++      "src/codegen/la64/macro-assembler-la64.cc",
++      "src/codegen/la64/macro-assembler-la64.h",
++      "src/codegen/la64/register-la64.h",
++      "src/compiler/backend/la64/code-generator-la64.cc",
++      "src/compiler/backend/la64/instruction-codes-la64.h",
++      "src/compiler/backend/la64/instruction-scheduler-la64.cc",
++      "src/compiler/backend/la64/instruction-selector-la64.cc",
++      "src/debug/la64/debug-la64.cc",
++      "src/deoptimizer/la64/deoptimizer-la64.cc",
++      "src/diagnostics/la64/disasm-la64.cc",
++      "src/execution/la64/frame-constants-la64.cc",
++      "src/execution/la64/frame-constants-la64.h",
++      "src/execution/la64/simulator-la64.cc",
++      "src/execution/la64/simulator-la64.h",
++      "src/regexp/la64/regexp-macro-assembler-la64.cc",
++      "src/regexp/la64/regexp-macro-assembler-la64.h",
++      "src/wasm/baseline/la64/liftoff-assembler-la64.h",
++    ]
+   } else if (v8_current_cpu == "ppc") {
+     sources += [  ### gcmole(arch:ppc) ###
+       "src/codegen/ppc/assembler-ppc-inl.h",
+diff --git a/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni b/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni
+index b5fb1823b38..8ada11d790d 100644
+--- a/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni
++++ b/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni
+@@ -79,7 +79,8 @@ if (v8_snapshot_toolchain == "") {
+ 
+     if (v8_current_cpu == "x64" || v8_current_cpu == "x86") {
+       _cpus = v8_current_cpu
+-    } else if (v8_current_cpu == "arm64" || v8_current_cpu == "mips64el") {
++    } else if (v8_current_cpu == "arm64" || v8_current_cpu == "mips64el" ||
++               v8_current_cpu == "la64") {
+       if (is_win && v8_current_cpu == "arm64") {
+         # set _cpus to blank for Windows ARM64 so host_toolchain could be
+         # selected as snapshot toolchain later.
+diff --git a/src/3rdparty/chromium/v8/src/base/build_config.h b/src/3rdparty/chromium/v8/src/base/build_config.h
+index 8d142c456c9..a9a8a07b12e 100644
+--- a/src/3rdparty/chromium/v8/src/base/build_config.h
++++ b/src/3rdparty/chromium/v8/src/base/build_config.h
+@@ -33,6 +33,9 @@
+ #elif defined(__MIPSEB__) || defined(__MIPSEL__)
+ #define V8_HOST_ARCH_MIPS 1
+ #define V8_HOST_ARCH_32_BIT 1
++#elif defined(__loongarch64)
++#define V8_HOST_ARCH_LA64 1
++#define V8_HOST_ARCH_64_BIT 1
+ #elif defined(__PPC64__) || defined(_ARCH_PPC64)
+ #define V8_HOST_ARCH_PPC64 1
+ #define V8_HOST_ARCH_64_BIT 1
+@@ -77,7 +80,8 @@
+ // environment as presented by the compiler.
+ #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM &&      \
+     !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 && \
+-    !V8_TARGET_ARCH_PPC && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_S390
++    !V8_TARGET_ARCH_PPC && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_S390 &&    \
++    !V8_TARGET_ARCH_LA64
+ #if defined(_M_X64) || defined(__x86_64__)
+ #define V8_TARGET_ARCH_X64 1
+ #elif defined(_M_IX86) || defined(__i386__)
+@@ -118,6 +122,8 @@
+ #define V8_TARGET_ARCH_32_BIT 1
+ #elif V8_TARGET_ARCH_MIPS64
+ #define V8_TARGET_ARCH_64_BIT 1
++#elif V8_TARGET_ARCH_LA64
++#define V8_TARGET_ARCH_64_BIT 1
+ #elif V8_TARGET_ARCH_PPC
+ #define V8_TARGET_ARCH_32_BIT 1
+ #elif V8_TARGET_ARCH_PPC64
+@@ -156,6 +162,9 @@
+ #if (V8_TARGET_ARCH_MIPS64 && !(V8_HOST_ARCH_X64 || V8_HOST_ARCH_MIPS64))
+ #error Target architecture mips64 is only supported on mips64 and x64 host
+ #endif
++#if (V8_TARGET_ARCH_LA64 && !(V8_HOST_ARCH_X64 || V8_HOST_ARCH_LA64))
++#error Target architecture la64 is only supported on la64 and x64 host
++#endif
+ 
+ // Determine architecture endianness.
+ #if V8_TARGET_ARCH_IA32
+@@ -166,6 +175,8 @@
+ #define V8_TARGET_LITTLE_ENDIAN 1
+ #elif V8_TARGET_ARCH_ARM64
+ #define V8_TARGET_LITTLE_ENDIAN 1
++#elif V8_TARGET_ARCH_LA64
++#define V8_TARGET_LITTLE_ENDIAN 1
+ #elif V8_TARGET_ARCH_MIPS
+ #if defined(__MIPSEB__)
+ #define V8_TARGET_BIG_ENDIAN 1
+diff --git a/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc b/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc
+index 1e600c7891b..f0e95f6443b 100644
+--- a/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc
++++ b/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc
+@@ -297,6 +297,10 @@ void* OS::GetRandomMmapAddr() {
+   // 42 bits of virtual addressing. Truncate to 40 bits to allow kernel chance
+   // to fulfill request.
+   raw_addr &= uint64_t{0xFFFFFF0000};
++#elif V8_TARGET_ARCH_LA64
++  // 42 bits of virtual addressing. Truncate to 40 bits to allow kernel chance
++  // to fulfill request.
++  raw_addr &= uint64_t{0xFFFFFF0000};
+ #else
+   raw_addr &= 0x3FFFF000;
+ 
+@@ -470,6 +474,8 @@ void OS::DebugBreak() {
+   asm("break");
+ #elif V8_HOST_ARCH_MIPS64
+   asm("break");
++#elif V8_HOST_ARCH_LA64
++  asm("break 0");
+ #elif V8_HOST_ARCH_PPC || V8_HOST_ARCH_PPC64
+   asm("twge 2,2");
+ #elif V8_HOST_ARCH_IA32
+diff --git a/src/3rdparty/chromium/v8/src/builtins/builtins.cc b/src/3rdparty/chromium/v8/src/builtins/builtins.cc
+index 34f7ddc18ad..72f28d08e98 100644
+--- a/src/3rdparty/chromium/v8/src/builtins/builtins.cc
++++ b/src/3rdparty/chromium/v8/src/builtins/builtins.cc
+@@ -466,7 +466,7 @@ bool Builtins::CodeObjectIsExecutable(int builtin_index) {
+     case Builtins::kCEntry_Return1_DontSaveFPRegs_ArgvOnStack_NoBuiltinExit:
+       return true;
+     default:
+-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
++#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+       // TODO(Loongson): Move non-JS linkage builtins code objects into RO_SPACE
+       // caused MIPS platform to crash, and we need some time to handle it. Now
+       // disable this change temporarily on MIPS platform.
+diff --git a/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc b/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc
+new file mode 100644
+index 00000000000..cdfb9abed10
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc
+@@ -0,0 +1,3173 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/api/api-arguments.h"
++#include "src/codegen/code-factory.h"
++#include "src/debug/debug.h"
++#include "src/deoptimizer/deoptimizer.h"
++#include "src/execution/frame-constants.h"
++#include "src/execution/frames.h"
++#include "src/logging/counters.h"
++// For interpreter_entry_return_pc_offset. TODO(jkummerow): Drop.
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/macro-assembler-inl.h"
++#include "src/codegen/register-configuration.h"
++#include "src/heap/heap-inl.h"
++#include "src/objects/cell.h"
++#include "src/objects/foreign.h"
++#include "src/objects/heap-number.h"
++#include "src/objects/js-generator.h"
++#include "src/objects/objects-inl.h"
++#include "src/objects/smi.h"
++#include "src/runtime/runtime.h"
++#include "src/wasm/wasm-linkage.h"
++#include "src/wasm/wasm-objects.h"
++
++namespace v8 {
++namespace internal {
++
++#define __ ACCESS_MASM(masm)
++
++void Builtins::Generate_Adaptor(MacroAssembler* masm, Address address) {
++  __ li(kJavaScriptCallExtraArg1Register, ExternalReference::Create(address));
++  __ Jump(BUILTIN_CODE(masm->isolate(), AdaptorWithBuiltinExitFrame),
++          RelocInfo::CODE_TARGET);
++}
++
++static void GenerateTailCallToReturnedCode(MacroAssembler* masm,
++                                           Runtime::FunctionId function_id) {
++  // ----------- S t a t e -------------
++  //  -- a1 : target function (preserved for callee)
++  //  -- a3 : new target (preserved for callee)
++  // -----------------------------------
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    // Push a copy of the function onto the stack.
++    // Push a copy of the target function and the new target.
++    __ Push(a1, a3, a1);
++
++    __ CallRuntime(function_id, 1);
++    // Restore target function and new target.
++    __ Pop(a1, a3);
++  }
++
++  static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++  __ Add_d(a2, a0, Operand(Code::kHeaderSize - kHeapObjectTag));
++  __ Jump(a2);
++}
++
++namespace {
++
++enum StackLimitKind { kInterruptStackLimit, kRealStackLimit };
++
++void LoadStackLimit(MacroAssembler* masm, Register destination,
++                    StackLimitKind kind) {
++  DCHECK(masm->root_array_available());
++  Isolate* isolate = masm->isolate();
++  ExternalReference limit =
++      kind == StackLimitKind::kRealStackLimit
++          ? ExternalReference::address_of_real_jslimit(isolate)
++          : ExternalReference::address_of_jslimit(isolate);
++  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
++
++  intptr_t offset =
++      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
++  CHECK(is_int32(offset));
++  __ Ld_d(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset)));
++}
++
++void Generate_JSBuiltinsConstructStubHelper(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0     : number of arguments
++  //  -- a1     : constructor function
++  //  -- a3     : new target
++  //  -- cp     : context
++  //  -- ra     : return address
++  //  -- sp[...]: constructor arguments
++  // -----------------------------------
++
++  // Enter a construct frame.
++  {
++    FrameScope scope(masm, StackFrame::CONSTRUCT);
++
++    // Preserve the incoming parameters on the stack.
++    __ SmiTag(a0);
++    __ Push(cp, a0);
++    __ SmiUntag(a0);
++
++    // The receiver for the builtin/api call.
++    __ PushRoot(RootIndex::kTheHoleValue);
++
++    // Set up pointer to last argument.
++    __ Add_d(t2, fp, Operand(StandardFrameConstants::kCallerSPOffset));
++
++    // Copy arguments and receiver to the expression stack.
++    Label loop, entry;
++    __ mov(t3, a0);
++    // ----------- S t a t e -------------
++    //  --                        a0: number of arguments (untagged)
++    //  --                        a3: new target
++    //  --                        t2: pointer to last argument
++    //  --                        t3: counter
++    //  --        sp[0*kPointerSize]: the hole (receiver)
++    //  --        sp[1*kPointerSize]: number of arguments (tagged)
++    //  --        sp[2*kPointerSize]: context
++    // -----------------------------------
++    __ jmp(&entry);
++    __ bind(&loop);
++    __ Alsl_d(t0, t3, t2, kPointerSizeLog2, t7);
++    __ Ld_d(t1, MemOperand(t0, 0));
++    __ push(t1);
++    __ bind(&entry);
++    __ Add_d(t3, t3, Operand(-1));
++    __ Branch(&loop, greater_equal, t3, Operand(zero_reg));
++
++    // Call the function.
++    // a0: number of arguments (untagged)
++    // a1: constructor function
++    // a3: new target
++    __ InvokeFunctionWithNewTarget(a1, a3, a0, CALL_FUNCTION);
++
++    // Restore context from the frame.
++    __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset));
++    // Restore smi-tagged arguments count from the frame.
++    __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kLengthOffset));
++    // Leave construct frame.
++  }
++
++  // Remove caller arguments from the stack and return.
++  __ SmiScale(a4, a1, kPointerSizeLog2);
++  __ Add_d(sp, sp, a4);
++  __ Add_d(sp, sp, kPointerSize);
++  __ Ret();
++}
++
++static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args,
++                                        Register scratch1, Register scratch2,
++                                        Label* stack_overflow) {
++  // Check the stack for overflow. We are not trying to catch
++  // interruptions (e.g. debug break and preemption) here, so the "real stack
++  // limit" is checked.
++  LoadStackLimit(masm, scratch1, StackLimitKind::kRealStackLimit);
++  // Make scratch1 the space we have left. The stack might already be overflowed
++  // here which will cause scratch1 to become negative.
++  __ sub_d(scratch1, sp, scratch1);
++  // Check if the arguments will overflow the stack.
++  __ slli_d(scratch2, num_args, kPointerSizeLog2);
++  // Signed comparison.
++  __ Branch(stack_overflow, le, scratch1, Operand(scratch2));
++}
++
++}  // namespace
++
++// The construct stub for ES5 constructor functions and ES6 class constructors.
++void Builtins::Generate_JSConstructStubGeneric(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  --      a0: number of arguments (untagged)
++  //  --      a1: constructor function
++  //  --      a3: new target
++  //  --      cp: context
++  //  --      ra: return address
++  //  -- sp[...]: constructor arguments
++  // -----------------------------------
++
++  // Enter a construct frame.
++  {
++    FrameScope scope(masm, StackFrame::CONSTRUCT);
++    Label post_instantiation_deopt_entry, not_create_implicit_receiver;
++
++    // Preserve the incoming parameters on the stack.
++    __ SmiTag(a0);
++    __ Push(cp, a0, a1);
++    __ PushRoot(RootIndex::kTheHoleValue);
++    __ Push(a3);
++
++    // ----------- S t a t e -------------
++    //  --        sp[0*kPointerSize]: new target
++    //  --        sp[1*kPointerSize]: padding
++    //  -- a1 and sp[2*kPointerSize]: constructor function
++    //  --        sp[3*kPointerSize]: number of arguments (tagged)
++    //  --        sp[4*kPointerSize]: context
++    // -----------------------------------
++
++    __ Ld_d(t2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++    __ Ld_wu(t2, FieldMemOperand(t2, SharedFunctionInfo::kFlagsOffset));
++    __ DecodeField<SharedFunctionInfo::FunctionKindBits>(t2);
++    __ JumpIfIsInRange(t2, kDefaultDerivedConstructor, kDerivedConstructor,
++                       &not_create_implicit_receiver);
++
++    // If not derived class constructor: Allocate the new receiver object.
++    __ IncrementCounter(masm->isolate()->counters()->constructed_objects(), 1,
++                        t2, t3);
++    __ Call(BUILTIN_CODE(masm->isolate(), FastNewObject),
++            RelocInfo::CODE_TARGET);
++    __ Branch(&post_instantiation_deopt_entry);
++
++    // Else: use TheHoleValue as receiver for constructor call
++    __ bind(&not_create_implicit_receiver);
++    __ LoadRoot(a0, RootIndex::kTheHoleValue);
++
++    // ----------- S t a t e -------------
++    //  --                          a0: receiver
++    //  -- Slot 4 / sp[0*kPointerSize]: new target
++    //  -- Slot 3 / sp[1*kPointerSize]: padding
++    //  -- Slot 2 / sp[2*kPointerSize]: constructor function
++    //  -- Slot 1 / sp[3*kPointerSize]: number of arguments (tagged)
++    //  -- Slot 0 / sp[4*kPointerSize]: context
++    // -----------------------------------
++    // Deoptimizer enters here.
++    masm->isolate()->heap()->SetConstructStubCreateDeoptPCOffset(
++        masm->pc_offset());
++    __ bind(&post_instantiation_deopt_entry);
++
++    // Restore new target.
++    __ Pop(a3);
++    // Push the allocated receiver to the stack. We need two copies
++    // because we may have to return the original one and the calling
++    // conventions dictate that the called function pops the receiver.
++    __ Push(a0, a0);
++
++    // ----------- S t a t e -------------
++    //  --                 r3: new target
++    //  -- sp[0*kPointerSize]: implicit receiver
++    //  -- sp[1*kPointerSize]: implicit receiver
++    //  -- sp[2*kPointerSize]: padding
++    //  -- sp[3*kPointerSize]: constructor function
++    //  -- sp[4*kPointerSize]: number of arguments (tagged)
++    //  -- sp[5*kPointerSize]: context
++    // -----------------------------------
++
++    // Restore constructor function and argument count.
++    __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kConstructorOffset));
++    __ Ld_d(a0, MemOperand(fp, ConstructFrameConstants::kLengthOffset));
++    __ SmiUntag(a0);
++
++    // Set up pointer to last argument.
++    __ Add_d(t2, fp, Operand(StandardFrameConstants::kCallerSPOffset));
++
++    Label enough_stack_space, stack_overflow;
++    Generate_StackOverflowCheck(masm, a0, t0, t1, &stack_overflow);
++    __ Branch(&enough_stack_space);
++
++    __ bind(&stack_overflow);
++    // Restore the context from the frame.
++    __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset));
++    __ CallRuntime(Runtime::kThrowStackOverflow);
++    // Unreachable code.
++    __ break_(0xCC);
++
++    __ bind(&enough_stack_space);
++
++    // Copy arguments and receiver to the expression stack.
++    Label loop, entry;
++    __ mov(t3, a0);
++    // ----------- S t a t e -------------
++    //  --                        a0: number of arguments (untagged)
++    //  --                        a3: new target
++    //  --                        t2: pointer to last argument
++    //  --                        t3: counter
++    //  --        sp[0*kPointerSize]: implicit receiver
++    //  --        sp[1*kPointerSize]: implicit receiver
++    //  --        sp[2*kPointerSize]: padding
++    //  -- a1 and sp[3*kPointerSize]: constructor function
++    //  --        sp[4*kPointerSize]: number of arguments (tagged)
++    //  --        sp[5*kPointerSize]: context
++    // -----------------------------------
++    __ jmp(&entry);
++    __ bind(&loop);
++    __ Alsl_d(t0, t3, t2, kPointerSizeLog2, t7);
++    __ Ld_d(t1, MemOperand(t0, 0));
++    __ push(t1);
++    __ bind(&entry);
++    __ Add_d(t3, t3, Operand(-1));
++    __ Branch(&loop, greater_equal, t3, Operand(zero_reg));
++
++    // Call the function.
++    __ InvokeFunctionWithNewTarget(a1, a3, a0, CALL_FUNCTION);
++
++    // ----------- S t a t e -------------
++    //  --                 t5: constructor result
++    //  -- sp[0*kPointerSize]: implicit receiver
++    //  -- sp[1*kPointerSize]: padding
++    //  -- sp[2*kPointerSize]: constructor function
++    //  -- sp[3*kPointerSize]: number of arguments
++    //  -- sp[4*kPointerSize]: context
++    // -----------------------------------
++
++    // Store offset of return address for deoptimizer.
++    masm->isolate()->heap()->SetConstructStubInvokeDeoptPCOffset(
++        masm->pc_offset());
++
++    // Restore the context from the frame.
++    __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset));
++
++    // If the result is an object (in the ECMA sense), we should get rid
++    // of the receiver and use the result; see ECMA-262 section 13.2.2-7
++    // on page 74.
++    Label use_receiver, do_throw, leave_frame;
++
++    // If the result is undefined, we jump out to using the implicit receiver.
++    __ JumpIfRoot(a0, RootIndex::kUndefinedValue, &use_receiver);
++
++    // Otherwise we do a smi check and fall through to check if the return value
++    // is a valid receiver.
++
++    // If the result is a smi, it is *not* an object in the ECMA sense.
++    __ JumpIfSmi(a0, &use_receiver);
++
++    // If the type of the result (stored in its map) is less than
++    // FIRST_JS_RECEIVER_TYPE, it is not an object in the ECMA sense.
++    __ GetObjectType(a0, t2, t2);
++    STATIC_ASSERT(LAST_JS_RECEIVER_TYPE == LAST_TYPE);
++    __ Branch(&leave_frame, greater_equal, t2, Operand(FIRST_JS_RECEIVER_TYPE));
++    __ Branch(&use_receiver);
++
++    __ bind(&do_throw);
++    __ CallRuntime(Runtime::kThrowConstructorReturnedNonObject);
++
++    // Throw away the result of the constructor invocation and use the
++    // on-stack receiver as the result.
++    __ bind(&use_receiver);
++    __ Ld_d(a0, MemOperand(sp, 0 * kPointerSize));
++    __ JumpIfRoot(a0, RootIndex::kTheHoleValue, &do_throw);
++
++    __ bind(&leave_frame);
++    // Restore smi-tagged arguments count from the frame.
++    __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kLengthOffset));
++    // Leave construct frame.
++  }
++  // Remove caller arguments from the stack and return.
++  __ SmiScale(a4, a1, kPointerSizeLog2);
++  __ Add_d(sp, sp, a4);
++  __ Add_d(sp, sp, kPointerSize);
++  __ Ret();
++}
++
++void Builtins::Generate_JSBuiltinsConstructStub(MacroAssembler* masm) {
++  Generate_JSBuiltinsConstructStubHelper(masm);
++}
++
++static void GetSharedFunctionInfoBytecode(MacroAssembler* masm,
++                                          Register sfi_data,
++                                          Register scratch1) {
++  Label done;
++
++  __ GetObjectType(sfi_data, scratch1, scratch1);
++  __ Branch(&done, ne, scratch1, Operand(INTERPRETER_DATA_TYPE));
++  __ Ld_d(sfi_data,
++          FieldMemOperand(sfi_data, InterpreterData::kBytecodeArrayOffset));
++
++  __ bind(&done);
++}
++
++// static
++void Builtins::Generate_ResumeGeneratorTrampoline(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the value to pass to the generator
++  //  -- a1 : the JSGeneratorObject to resume
++  //  -- ra : return address
++  // -----------------------------------
++  __ AssertGeneratorObject(a1);
++
++  // Store input value into generator object.
++  __ St_d(a0, FieldMemOperand(a1, JSGeneratorObject::kInputOrDebugPosOffset));
++  __ RecordWriteField(a1, JSGeneratorObject::kInputOrDebugPosOffset, a0, a3,
++                      kRAHasNotBeenSaved, kDontSaveFPRegs);
++
++  // Load suspended function and context.
++  __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset));
++  __ Ld_d(cp, FieldMemOperand(a4, JSFunction::kContextOffset));
++
++  // Flood function if we are stepping.
++  Label prepare_step_in_if_stepping, prepare_step_in_suspended_generator;
++  Label stepping_prepared;
++  ExternalReference debug_hook =
++      ExternalReference::debug_hook_on_function_call_address(masm->isolate());
++  __ li(a5, debug_hook);
++  __ Ld_b(a5, MemOperand(a5, 0));
++  __ Branch(&prepare_step_in_if_stepping, ne, a5, Operand(zero_reg));
++
++  // Flood function if we need to continue stepping in the suspended generator.
++  ExternalReference debug_suspended_generator =
++      ExternalReference::debug_suspended_generator_address(masm->isolate());
++  __ li(a5, debug_suspended_generator);
++  __ Ld_d(a5, MemOperand(a5, 0));
++  __ Branch(&prepare_step_in_suspended_generator, eq, a1, Operand(a5));
++  __ bind(&stepping_prepared);
++
++  // Check the stack for overflow. We are not trying to catch interruptions
++  // (i.e. debug break and preemption) here, so check the "real stack limit".
++  Label stack_overflow;
++  LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit);
++  __ Branch(&stack_overflow, lo, sp, Operand(kScratchReg));
++
++  // Push receiver.
++  __ Ld_d(a5, FieldMemOperand(a1, JSGeneratorObject::kReceiverOffset));
++  __ Push(a5);
++
++  // ----------- S t a t e -------------
++  //  -- a1    : the JSGeneratorObject to resume
++  //  -- a4    : generator function
++  //  -- cp    : generator context
++  //  -- ra    : return address
++  //  -- sp[0] : generator receiver
++  // -----------------------------------
++
++  // Push holes for arguments to generator function. Since the parser forced
++  // context allocation for any variables in generators, the actual argument
++  // values have already been copied into the context and these dummy values
++  // will never be used.
++  __ Ld_d(a3, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_hu(
++      a3, FieldMemOperand(a3, SharedFunctionInfo::kFormalParameterCountOffset));
++  __ Ld_d(t1, FieldMemOperand(
++                  a1, JSGeneratorObject::kParametersAndRegistersOffset));
++  {
++    Label done_loop, loop;
++    __ Move(t2, zero_reg);
++    __ bind(&loop);
++    __ Sub_d(a3, a3, Operand(1));
++    __ Branch(&done_loop, lt, a3, Operand(zero_reg));
++    __ Alsl_d(kScratchReg, t2, t1, kPointerSizeLog2, t7);
++    __ Ld_d(kScratchReg, FieldMemOperand(kScratchReg, FixedArray::kHeaderSize));
++    __ Push(kScratchReg);
++    __ Add_d(t2, t2, Operand(1));
++    __ Branch(&loop);
++    __ bind(&done_loop);
++  }
++
++  // Underlying function needs to have bytecode available.
++  if (FLAG_debug_code) {
++    __ Ld_d(a3, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset));
++    __ Ld_d(a3, FieldMemOperand(a3, SharedFunctionInfo::kFunctionDataOffset));
++    GetSharedFunctionInfoBytecode(masm, a3, t5);
++    __ GetObjectType(a3, a3, a3);
++    __ Assert(eq, AbortReason::kMissingBytecodeArray, a3,
++              Operand(BYTECODE_ARRAY_TYPE));
++  }
++
++  // Resume (Ignition/TurboFan) generator object.
++  {
++    __ Ld_d(a0, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset));
++    __ Ld_hu(a0, FieldMemOperand(
++                     a0, SharedFunctionInfo::kFormalParameterCountOffset));
++    // We abuse new.target both to indicate that this is a resume call and to
++    // pass in the generator object.  In ordinary calls, new.target is always
++    // undefined because generator functions are non-constructable.
++    __ Move(a3, a1);
++    __ Move(a1, a4);
++    static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++    __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset));
++    __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag));
++    __ Jump(a2);
++  }
++
++  __ bind(&prepare_step_in_if_stepping);
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ Push(a1, a4);
++    // Push hole as receiver since we do not use it for stepping.
++    __ PushRoot(RootIndex::kTheHoleValue);
++    __ CallRuntime(Runtime::kDebugOnFunctionCall);
++    __ Pop(a1);
++  }
++  __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset));
++  __ Branch(&stepping_prepared);
++
++  __ bind(&prepare_step_in_suspended_generator);
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ Push(a1);
++    __ CallRuntime(Runtime::kDebugPrepareStepInSuspendedGenerator);
++    __ Pop(a1);
++  }
++  __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset));
++  __ Branch(&stepping_prepared);
++
++  __ bind(&stack_overflow);
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ CallRuntime(Runtime::kThrowStackOverflow);
++    __ break_(0xCC);  // This should be unreachable.
++  }
++}
++
++void Builtins::Generate_ConstructedNonConstructable(MacroAssembler* masm) {
++  FrameScope scope(masm, StackFrame::INTERNAL);
++  __ Push(a1);
++  __ CallRuntime(Runtime::kThrowConstructedNonConstructable);
++}
++
++// Clobbers scratch1 and scratch2; preserves all other registers.
++static void Generate_CheckStackOverflow(MacroAssembler* masm, Register argc,
++                                        Register scratch1, Register scratch2) {
++  // Check the stack for overflow. We are not trying to catch
++  // interruptions (e.g. debug break and preemption) here, so the "real stack
++  // limit" is checked.
++  Label okay;
++  LoadStackLimit(masm, scratch1, StackLimitKind::kRealStackLimit);
++  // Make a2 the space we have left. The stack might already be overflowed
++  // here which will cause r2 to become negative.
++  __ sub_d(scratch1, sp, scratch1);
++  // Check if the arguments will overflow the stack.
++  __ slli_d(scratch2, argc, kPointerSizeLog2);
++  __ Branch(&okay, gt, scratch1, Operand(scratch2));  // Signed comparison.
++
++  // Out of stack space.
++  __ CallRuntime(Runtime::kThrowStackOverflow);
++
++  __ bind(&okay);
++}
++
++namespace {
++
++// Called with the native C calling convention. The corresponding function
++// signature is either:
++//
++//   using JSEntryFunction = GeneratedCode<Address(
++//       Address root_register_value, Address new_target, Address target,
++//       Address receiver, intptr_t argc, Address** args)>;
++// or
++//   using JSEntryFunction = GeneratedCode<Address(
++//       Address root_register_value, MicrotaskQueue* microtask_queue)>;
++void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
++                             Builtins::Name entry_trampoline) {
++  Label invoke, handler_entry, exit;
++
++  {
++    NoRootArrayScope no_root_array(masm);
++
++    // TODO(plind): unify the ABI description here.
++    // Registers:
++    //  either
++    //   a0: root register value
++    //   a1: entry address
++    //   a2: function
++    //   a3: receiver
++    //   a4: argc
++    //   a5: argv
++    //  or
++    //   a0: root register value
++    //   a1: microtask_queue
++    //
++    // Stack:
++    // 0 arg slots on mips64 (4 args slots on mips)
++
++    // Save callee saved registers on the stack.
++    __ MultiPush(kCalleeSaved | ra.bit());
++
++    // Save callee-saved FPU registers.
++    __ MultiPushFPU(kCalleeSavedFPU);
++    // Set up the reserved register for 0.0.
++    __ Move(kDoubleRegZero, 0.0);
++
++    // Initialize the root register.
++    // C calling convention. The first argument is passed in a0.
++    __ mov(kRootRegister, a0);
++  }
++
++  // a1: entry address
++  // a2: function
++  // a3: receiver
++  // a4: argc
++  // a5: argv
++
++  // We build an EntryFrame.
++  __ li(s1, Operand(-1));  // Push a bad frame pointer to fail if it is used.
++  __ li(s2, Operand(StackFrame::TypeToMarker(type)));
++  __ li(s3, Operand(StackFrame::TypeToMarker(type)));
++  ExternalReference c_entry_fp = ExternalReference::Create(
++      IsolateAddressId::kCEntryFPAddress, masm->isolate());
++  __ li(s4, c_entry_fp);
++  __ Ld_d(s4, MemOperand(s4, 0));
++  __ Push(s1, s2, s3, s4);
++  // Set up frame pointer for the frame to be pushed.
++  __ addi_d(fp, sp, -EntryFrameConstants::kCallerFPOffset);
++
++  // Registers:
++  //  either
++  //   a1: entry address
++  //   a2: function
++  //   a3: receiver
++  //   a4: argc
++  //   a5: argv
++  //  or
++  //   a1: microtask_queue
++  //
++  // Stack:
++  // caller fp          |
++  // function slot      | entry frame
++  // context slot       |
++  // bad fp (0xFF...F)  |
++  // callee saved registers + ra
++  // [ O32: 4 args slots]
++  // args
++
++  // If this is the outermost JS call, set js_entry_sp value.
++  Label non_outermost_js;
++  ExternalReference js_entry_sp = ExternalReference::Create(
++      IsolateAddressId::kJSEntrySPAddress, masm->isolate());
++  __ li(s1, js_entry_sp);
++  __ Ld_d(s2, MemOperand(s1, 0));
++  __ Branch(&non_outermost_js, ne, s2, Operand(zero_reg));
++  __ St_d(fp, MemOperand(s1, 0));
++  __ li(s3, Operand(StackFrame::OUTERMOST_JSENTRY_FRAME));
++  Label cont;
++  __ b(&cont);
++  __ nop();  // Branch delay slot nop.
++  __ bind(&non_outermost_js);
++  __ li(s3, Operand(StackFrame::INNER_JSENTRY_FRAME));
++  __ bind(&cont);
++  __ push(s3);
++
++  // Jump to a faked try block that does the invoke, with a faked catch
++  // block that sets the pending exception.
++  __ jmp(&invoke);
++  __ bind(&handler_entry);
++
++  // Store the current pc as the handler offset. It's used later to create the
++  // handler table.
++  masm->isolate()->builtins()->SetJSEntryHandlerOffset(handler_entry.pos());
++
++  // Caught exception: Store result (exception) in the pending exception
++  // field in the JSEnv and return a failure sentinel.  Coming in here the
++  // fp will be invalid because the PushStackHandler below sets it to 0 to
++  // signal the existence of the JSEntry frame.
++  __ li(s1, ExternalReference::Create(
++                IsolateAddressId::kPendingExceptionAddress, masm->isolate()));
++  __ St_d(a0,
++          MemOperand(s1, 0));  // We come back from 'invoke'. result is in a0.
++  __ LoadRoot(a0, RootIndex::kException);
++  __ b(&exit);  // b exposes branch delay slot.
++  __ nop();     // Branch delay slot nop.
++
++  // Invoke: Link this frame into the handler chain.
++  __ bind(&invoke);
++  __ PushStackHandler();
++  // If an exception not caught by another handler occurs, this handler
++  // returns control to the code after the bal(&invoke) above, which
++  // restores all kCalleeSaved registers (including cp and fp) to their
++  // saved values before returning a failure to C.
++  //
++  // Registers:
++  //  either
++  //   a0: root register value
++  //   a1: entry address
++  //   a2: function
++  //   a3: receiver
++  //   a4: argc
++  //   a5: argv
++  //  or
++  //   a0: root register value
++  //   a1: microtask_queue
++  //
++  // Stack:
++  // handler frame
++  // entry frame
++  // callee saved registers + ra
++  // [ O32: 4 args slots]
++  // args
++  //
++  // Invoke the function by calling through JS entry trampoline builtin and
++  // pop the faked function when we return.
++
++  Handle<Code> trampoline_code =
++      masm->isolate()->builtins()->builtin_handle(entry_trampoline);
++  __ Call(trampoline_code, RelocInfo::CODE_TARGET);
++
++  // Unlink this frame from the handler chain.
++  __ PopStackHandler();
++
++  __ bind(&exit);  // a0 holds result
++  // Check if the current stack frame is marked as the outermost JS frame.
++  Label non_outermost_js_2;
++  __ pop(a5);
++  __ Branch(&non_outermost_js_2, ne, a5,
++            Operand(StackFrame::OUTERMOST_JSENTRY_FRAME));
++  __ li(a5, js_entry_sp);
++  __ St_d(zero_reg, MemOperand(a5, 0));
++  __ bind(&non_outermost_js_2);
++
++  // Restore the top frame descriptors from the stack.
++  __ pop(a5);
++  __ li(a4, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
++                                      masm->isolate()));
++  __ St_d(a5, MemOperand(a4, 0));
++
++  // Reset the stack to the callee saved registers.
++  __ addi_d(sp, sp, -EntryFrameConstants::kCallerFPOffset);
++
++  // Restore callee-saved fpu registers.
++  __ MultiPopFPU(kCalleeSavedFPU);
++
++  // Restore callee saved registers from the stack.
++  __ MultiPop(kCalleeSaved | ra.bit());
++  // Return.
++  __ Jump(ra);
++}
++
++}  // namespace
++
++void Builtins::Generate_JSEntry(MacroAssembler* masm) {
++  Generate_JSEntryVariant(masm, StackFrame::ENTRY,
++                          Builtins::kJSEntryTrampoline);
++}
++
++void Builtins::Generate_JSConstructEntry(MacroAssembler* masm) {
++  Generate_JSEntryVariant(masm, StackFrame::CONSTRUCT_ENTRY,
++                          Builtins::kJSConstructEntryTrampoline);
++}
++
++void Builtins::Generate_JSRunMicrotasksEntry(MacroAssembler* masm) {
++  Generate_JSEntryVariant(masm, StackFrame::ENTRY,
++                          Builtins::kRunMicrotasksTrampoline);
++}
++
++static void Generate_JSEntryTrampolineHelper(MacroAssembler* masm,
++                                             bool is_construct) {
++  // ----------- S t a t e -------------
++  //  -- a1: new.target
++  //  -- a2: function
++  //  -- a3: receiver_pointer
++  //  -- a4: argc
++  //  -- a5: argv
++  // -----------------------------------
++
++  // Enter an internal frame.
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++
++    // Setup the context (we need to use the caller context from the isolate).
++    ExternalReference context_address = ExternalReference::Create(
++        IsolateAddressId::kContextAddress, masm->isolate());
++    __ li(cp, context_address);
++    __ Ld_d(cp, MemOperand(cp, 0));
++
++    // Push the function and the receiver onto the stack.
++    __ Push(a2, a3);
++
++    // Check if we have enough stack space to push all arguments.
++    // Clobbers a0 and a3.
++    Generate_CheckStackOverflow(masm, a4, t5, a3);
++
++    // Setup new.target, function and argc.
++    __ mov(a3, a1);
++    __ mov(a1, a2);
++    __ mov(a0, a4);
++
++    // a0: argc
++    // a1: function
++    // a3: new.target
++    // a5: argv
++
++    // Copy arguments to the stack in a loop.
++    // a3: argc
++    // a5: argv, i.e. points to first arg
++    Label loop, entry;
++    __ Alsl_d(s1, a4, a5, kPointerSizeLog2, t7);
++    __ b(&entry);
++    __ nop();  // Branch delay slot nop.
++    // s1 points past last arg.
++    __ bind(&loop);
++    __ Ld_d(s2, MemOperand(a5, 0));  // Read next parameter.
++    __ addi_d(a5, a5, kPointerSize);
++    __ Ld_d(s2, MemOperand(s2, 0));  // Dereference handle.
++    __ push(s2);                     // Push parameter.
++    __ bind(&entry);
++    __ Branch(&loop, ne, a5, Operand(s1));
++
++    // a0: argc
++    // a1: function
++    // a3: new.target
++
++    // Initialize all JavaScript callee-saved registers, since they will be seen
++    // by the garbage collector as part of handlers.
++    __ LoadRoot(a4, RootIndex::kUndefinedValue);
++    __ mov(a5, a4);
++    __ mov(s1, a4);
++    __ mov(s2, a4);
++    __ mov(s3, a4);
++    __ mov(s4, a4);
++    __ mov(s5, a4);
++    // s6 holds the root address. Do not clobber.
++    // s7 is cp. Do not init.
++
++    // Invoke the code.
++    Handle<Code> builtin = is_construct
++                               ? BUILTIN_CODE(masm->isolate(), Construct)
++                               : masm->isolate()->builtins()->Call();
++    __ Call(builtin, RelocInfo::CODE_TARGET);
++
++    // Leave internal frame.
++  }
++  __ Jump(ra);
++}
++
++void Builtins::Generate_JSEntryTrampoline(MacroAssembler* masm) {
++  Generate_JSEntryTrampolineHelper(masm, false);
++}
++
++void Builtins::Generate_JSConstructEntryTrampoline(MacroAssembler* masm) {
++  Generate_JSEntryTrampolineHelper(masm, true);
++}
++
++void Builtins::Generate_RunMicrotasksTrampoline(MacroAssembler* masm) {
++  // a1: microtask_queue
++  __ mov(RunMicrotasksDescriptor::MicrotaskQueueRegister(), a1);
++  __ Jump(BUILTIN_CODE(masm->isolate(), RunMicrotasks), RelocInfo::CODE_TARGET);
++}
++
++static void ReplaceClosureCodeWithOptimizedCode(MacroAssembler* masm,
++                                                Register optimized_code,
++                                                Register closure,
++                                                Register scratch1,
++                                                Register scratch2) {
++  // Store code entry in the closure.
++  __ St_d(optimized_code, FieldMemOperand(closure, JSFunction::kCodeOffset));
++  __ mov(scratch1, optimized_code);  // Write barrier clobbers scratch1 below.
++  __ RecordWriteField(closure, JSFunction::kCodeOffset, scratch1, scratch2,
++                      kRAHasNotBeenSaved, kDontSaveFPRegs, OMIT_REMEMBERED_SET,
++                      OMIT_SMI_CHECK);
++}
++
++static void LeaveInterpreterFrame(MacroAssembler* masm, Register scratch) {
++  Register args_count = scratch;
++
++  // Get the arguments + receiver count.
++  __ Ld_d(args_count,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
++  __ Ld_w(t0, FieldMemOperand(args_count, BytecodeArray::kParameterSizeOffset));
++
++  // Leave the frame (also dropping the register file).
++  __ LeaveFrame(StackFrame::INTERPRETED);
++
++  // Drop receiver + arguments.
++  __ Add_d(sp, sp, args_count);
++}
++
++// Tail-call |function_id| if |smi_entry| == |marker|
++static void TailCallRuntimeIfMarkerEquals(MacroAssembler* masm,
++                                          Register smi_entry,
++                                          OptimizationMarker marker,
++                                          Runtime::FunctionId function_id) {
++  Label no_match;
++  __ Branch(&no_match, ne, smi_entry, Operand(Smi::FromEnum(marker)));
++  GenerateTailCallToReturnedCode(masm, function_id);
++  __ bind(&no_match);
++}
++
++static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
++                                      Register optimized_code_entry,
++                                      Register scratch1, Register scratch2) {
++  // ----------- S t a t e -------------
++  //  -- a3 : new target (preserved for callee if needed, and caller)
++  //  -- a1 : target function (preserved for callee if needed, and caller)
++  // -----------------------------------
++  DCHECK(!AreAliased(optimized_code_entry, a1, a3, scratch1, scratch2));
++
++  Register closure = a1;
++
++  // Check if the optimized code is marked for deopt. If it is, call the
++  // runtime to clear it.
++  Label found_deoptimized_code;
++  __ Ld_d(a5, FieldMemOperand(optimized_code_entry,
++                              Code::kCodeDataContainerOffset));
++  __ Ld_w(a5, FieldMemOperand(a5, CodeDataContainer::kKindSpecificFlagsOffset));
++  __ And(a5, a5, Operand(1 << Code::kMarkedForDeoptimizationBit));
++  __ Branch(&found_deoptimized_code, ne, a5, Operand(zero_reg));
++
++  // Optimized code is good, get it into the closure and link the closure into
++  // the optimized functions list, then tail call the optimized code.
++  // The feedback vector is no longer used, so re-use it as a scratch
++  // register.
++  ReplaceClosureCodeWithOptimizedCode(masm, optimized_code_entry, closure,
++                                      scratch1, scratch2);
++
++  static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++  __ Add_d(a2, optimized_code_entry,
++           Operand(Code::kHeaderSize - kHeapObjectTag));
++  __ Jump(a2);
++
++  // Optimized code slot contains deoptimized code, evict it and re-enter the
++  // closure's code.
++  __ bind(&found_deoptimized_code);
++  GenerateTailCallToReturnedCode(masm, Runtime::kEvictOptimizedCodeSlot);
++}
++
++static void MaybeOptimizeCode(MacroAssembler* masm, Register feedback_vector,
++                              Register optimization_marker) {
++  // ----------- S t a t e -------------
++  //  -- a3 : new target (preserved for callee if needed, and caller)
++  //  -- a1 : target function (preserved for callee if needed, and caller)
++  //  -- feedback vector (preserved for caller if needed)
++  //  -- optimization_marker : a Smi containing a non-zero optimization marker.
++  // -----------------------------------
++  DCHECK(!AreAliased(feedback_vector, a1, a3, optimization_marker));
++
++  // TODO(v8:8394): The logging of first execution will break if
++  // feedback vectors are not allocated. We need to find a different way of
++  // logging these events if required.
++  TailCallRuntimeIfMarkerEquals(masm, optimization_marker,
++                                OptimizationMarker::kLogFirstExecution,
++                                Runtime::kFunctionFirstExecution);
++  TailCallRuntimeIfMarkerEquals(masm, optimization_marker,
++                                OptimizationMarker::kCompileOptimized,
++                                Runtime::kCompileOptimized_NotConcurrent);
++  TailCallRuntimeIfMarkerEquals(masm, optimization_marker,
++                                OptimizationMarker::kCompileOptimizedConcurrent,
++                                Runtime::kCompileOptimized_Concurrent);
++
++  // Otherwise, the marker is InOptimizationQueue, so fall through hoping
++  // that an interrupt will eventually update the slot with optimized code.
++  if (FLAG_debug_code) {
++    __ Assert(eq, AbortReason::kExpectedOptimizationSentinel,
++              optimization_marker,
++              Operand(Smi::FromEnum(OptimizationMarker::kInOptimizationQueue)));
++  }
++}
++
++// Advance the current bytecode offset. This simulates what all bytecode
++// handlers do upon completion of the underlying operation. Will bail out to a
++// label if the bytecode (without prefix) is a return bytecode.
++static void AdvanceBytecodeOffsetOrReturn(MacroAssembler* masm,
++                                          Register bytecode_array,
++                                          Register bytecode_offset,
++                                          Register bytecode, Register scratch1,
++                                          Register scratch2, Label* if_return) {
++  Register bytecode_size_table = scratch1;
++  DCHECK(!AreAliased(bytecode_array, bytecode_offset, bytecode_size_table,
++                     bytecode));
++
++  __ li(bytecode_size_table, ExternalReference::bytecode_size_table_address());
++
++  // Check if the bytecode is a Wide or ExtraWide prefix bytecode.
++  Label process_bytecode, extra_wide;
++  STATIC_ASSERT(0 == static_cast<int>(interpreter::Bytecode::kWide));
++  STATIC_ASSERT(1 == static_cast<int>(interpreter::Bytecode::kExtraWide));
++  STATIC_ASSERT(2 == static_cast<int>(interpreter::Bytecode::kDebugBreakWide));
++  STATIC_ASSERT(3 ==
++                static_cast<int>(interpreter::Bytecode::kDebugBreakExtraWide));
++  __ Branch(&process_bytecode, hi, bytecode, Operand(3));
++  __ And(scratch2, bytecode, Operand(1));
++  __ Branch(&extra_wide, ne, scratch2, Operand(zero_reg));
++
++  // Load the next bytecode and update table to the wide scaled table.
++  __ Add_d(bytecode_offset, bytecode_offset, Operand(1));
++  __ Add_d(scratch2, bytecode_array, bytecode_offset);
++  __ Ld_bu(bytecode, MemOperand(scratch2, 0));
++  __ Add_d(bytecode_size_table, bytecode_size_table,
++           Operand(kIntSize * interpreter::Bytecodes::kBytecodeCount));
++  __ jmp(&process_bytecode);
++
++  __ bind(&extra_wide);
++  // Load the next bytecode and update table to the extra wide scaled table.
++  __ Add_d(bytecode_offset, bytecode_offset, Operand(1));
++  __ Add_d(scratch2, bytecode_array, bytecode_offset);
++  __ Ld_bu(bytecode, MemOperand(scratch2, 0));
++  __ Add_d(bytecode_size_table, bytecode_size_table,
++           Operand(2 * kIntSize * interpreter::Bytecodes::kBytecodeCount));
++
++  __ bind(&process_bytecode);
++
++// Bailout to the return label if this is a return bytecode.
++#define JUMP_IF_EQUAL(NAME)          \
++  __ Branch(if_return, eq, bytecode, \
++            Operand(static_cast<int>(interpreter::Bytecode::k##NAME)));
++  RETURN_BYTECODE_LIST(JUMP_IF_EQUAL)
++#undef JUMP_IF_EQUAL
++
++  // Otherwise, load the size of the current bytecode and advance the offset.
++  __ Alsl_d(scratch2, bytecode, bytecode_size_table, 2, t7);
++  __ Ld_w(scratch2, MemOperand(scratch2, 0));
++  __ Add_d(bytecode_offset, bytecode_offset, scratch2);
++}
++
++// Generate code for entering a JS function with the interpreter.
++// On entry to the function the receiver and arguments have been pushed on the
++// stack left to right.  The actual argument count matches the formal parameter
++// count expected by the function.
++//
++// The live registers are:
++//   o a1: the JS function object being called.
++//   o a3: the incoming new target or generator object
++//   o cp: our context
++//   o fp: the caller's frame pointer
++//   o sp: stack pointer
++//   o ra: return address
++//
++// The function builds an interpreter frame.  See InterpreterFrameConstants in
++// frames.h for its layout.
++void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
++  Register closure = a1;
++  Register feedback_vector = a2;
++
++  // Get the bytecode array from the function object and load it into
++  // kInterpreterBytecodeArrayRegister.
++  __ Ld_d(t5, FieldMemOperand(closure, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_d(kInterpreterBytecodeArrayRegister,
++          FieldMemOperand(t5, SharedFunctionInfo::kFunctionDataOffset));
++  GetSharedFunctionInfoBytecode(masm, kInterpreterBytecodeArrayRegister, a4);
++
++  // The bytecode array could have been flushed from the shared function info,
++  // if so, call into CompileLazy.
++  Label compile_lazy;
++  __ GetObjectType(kInterpreterBytecodeArrayRegister, t5, t5);
++  __ Branch(&compile_lazy, ne, t5, Operand(BYTECODE_ARRAY_TYPE));
++
++  // Load the feedback vector from the closure.
++  __ Ld_d(feedback_vector,
++          FieldMemOperand(closure, JSFunction::kFeedbackCellOffset));
++  __ Ld_d(feedback_vector,
++          FieldMemOperand(feedback_vector, Cell::kValueOffset));
++
++  Label push_stack_frame;
++  // Check if feedback vector is valid. If valid, check for optimized code
++  // and update invocation count. Otherwise, setup the stack frame.
++  __ Ld_d(a4, FieldMemOperand(feedback_vector, HeapObject::kMapOffset));
++  __ Ld_hu(a4, FieldMemOperand(a4, Map::kInstanceTypeOffset));
++  __ Branch(&push_stack_frame, ne, a4, Operand(FEEDBACK_VECTOR_TYPE));
++
++  // Read off the optimized code slot in the feedback vector, and if there
++  // is optimized code or an optimization marker, call that instead.
++  Register optimized_code_entry = a4;
++  __ Ld_d(optimized_code_entry,
++          FieldMemOperand(feedback_vector,
++                          FeedbackVector::kOptimizedCodeWeakOrSmiOffset));
++
++  // Check if the optimized code slot is not empty.
++  Label optimized_code_slot_not_empty;
++
++  __ Branch(&optimized_code_slot_not_empty, ne, optimized_code_entry,
++            Operand(Smi::FromEnum(OptimizationMarker::kNone)));
++
++  Label not_optimized;
++  __ bind(&not_optimized);
++
++  // Increment invocation count for the function.
++  __ Ld_w(a4, FieldMemOperand(feedback_vector,
++                              FeedbackVector::kInvocationCountOffset));
++  __ Add_w(a4, a4, Operand(1));
++  __ St_w(a4, FieldMemOperand(feedback_vector,
++                              FeedbackVector::kInvocationCountOffset));
++
++  // Open a frame scope to indicate that there is a frame on the stack.  The
++  // MANUAL indicates that the scope shouldn't actually generate code to set up
++  // the frame (that is done below).
++  __ bind(&push_stack_frame);
++  FrameScope frame_scope(masm, StackFrame::MANUAL);
++  __ PushStandardFrame(closure);
++
++  // Reset code age and the OSR arming. The OSR field and BytecodeAgeOffset are
++  // 8-bit fields next to each other, so we could just optimize by writing a
++  // 16-bit. These static asserts guard our assumption is valid.
++  STATIC_ASSERT(BytecodeArray::kBytecodeAgeOffset ==
++                BytecodeArray::kOsrNestingLevelOffset + kCharSize);
++  STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
++  __ St_h(zero_reg, FieldMemOperand(kInterpreterBytecodeArrayRegister,
++                                    BytecodeArray::kOsrNestingLevelOffset));
++
++  // Load initial bytecode offset.
++  __ li(kInterpreterBytecodeOffsetRegister,
++        Operand(BytecodeArray::kHeaderSize - kHeapObjectTag));
++
++  // Push bytecode array and Smi tagged bytecode array offset.
++  __ SmiTag(a4, kInterpreterBytecodeOffsetRegister);
++  __ Push(kInterpreterBytecodeArrayRegister, a4);
++
++  // Allocate the local and temporary register file on the stack.
++  Label stack_overflow;
++  {
++    // Load frame size (word) from the BytecodeArray object.
++    __ Ld_w(a4, FieldMemOperand(kInterpreterBytecodeArrayRegister,
++                                BytecodeArray::kFrameSizeOffset));
++
++    // Do a stack check to ensure we don't go over the limit.
++    __ Sub_d(a5, sp, Operand(a4));
++    LoadStackLimit(masm, a2, StackLimitKind::kRealStackLimit);
++    __ Branch(&stack_overflow, lo, a5, Operand(a2));
++
++    // If ok, push undefined as the initial value for all register file entries.
++    Label loop_header;
++    Label loop_check;
++    __ LoadRoot(a5, RootIndex::kUndefinedValue);
++    __ Branch(&loop_check);
++    __ bind(&loop_header);
++    // TODO(rmcilroy): Consider doing more than one push per loop iteration.
++    __ push(a5);
++    // Continue loop if not done.
++    __ bind(&loop_check);
++    __ Sub_d(a4, a4, Operand(kPointerSize));
++    __ Branch(&loop_header, ge, a4, Operand(zero_reg));
++  }
++
++  // If the bytecode array has a valid incoming new target or generator object
++  // register, initialize it with incoming value which was passed in r3.
++  Label no_incoming_new_target_or_generator_register;
++  __ Ld_w(a5, FieldMemOperand(
++                  kInterpreterBytecodeArrayRegister,
++                  BytecodeArray::kIncomingNewTargetOrGeneratorRegisterOffset));
++  __ Branch(&no_incoming_new_target_or_generator_register, eq, a5,
++            Operand(zero_reg));
++  __ Alsl_d(a5, a5, fp, kPointerSizeLog2, t7);
++  __ St_d(a3, MemOperand(a5, 0));
++  __ bind(&no_incoming_new_target_or_generator_register);
++
++  // Perform interrupt stack check.
++  // TODO(solanes): Merge with the real stack limit check above.
++  Label stack_check_interrupt, after_stack_check_interrupt;
++  LoadStackLimit(masm, a5, StackLimitKind::kInterruptStackLimit);
++  __ Branch(&stack_check_interrupt, lo, sp, Operand(a5));
++  __ bind(&after_stack_check_interrupt);
++
++  // Load accumulator as undefined.
++  __ LoadRoot(kInterpreterAccumulatorRegister, RootIndex::kUndefinedValue);
++
++  // Load the dispatch table into a register and dispatch to the bytecode
++  // handler at the current bytecode offset.
++  Label do_dispatch;
++  __ bind(&do_dispatch);
++  __ li(kInterpreterDispatchTableRegister,
++        ExternalReference::interpreter_dispatch_table_address(masm->isolate()));
++  __ Add_d(t5, kInterpreterBytecodeArrayRegister,
++           kInterpreterBytecodeOffsetRegister);
++  __ Ld_bu(a7, MemOperand(t5, 0));
++  __ Alsl_d(kScratchReg, a7, kInterpreterDispatchTableRegister,
++            kPointerSizeLog2, t7);
++  __ Ld_d(kJavaScriptCallCodeStartRegister, MemOperand(kScratchReg, 0));
++  __ Call(kJavaScriptCallCodeStartRegister);
++  masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset());
++
++  // Any returns to the entry trampoline are either due to the return bytecode
++  // or the interpreter tail calling a builtin and then a dispatch.
++
++  // Get bytecode array and bytecode offset from the stack frame.
++  __ Ld_d(kInterpreterBytecodeArrayRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
++  __ Ld_d(kInterpreterBytecodeOffsetRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++  __ SmiUntag(kInterpreterBytecodeOffsetRegister);
++
++  // Either return, or advance to the next bytecode and dispatch.
++  Label do_return;
++  __ Add_d(a1, kInterpreterBytecodeArrayRegister,
++           kInterpreterBytecodeOffsetRegister);
++  __ Ld_bu(a1, MemOperand(a1, 0));
++  AdvanceBytecodeOffsetOrReturn(masm, kInterpreterBytecodeArrayRegister,
++                                kInterpreterBytecodeOffsetRegister, a1, a2, a3,
++                                &do_return);
++  __ jmp(&do_dispatch);
++
++  __ bind(&do_return);
++  // The return value is in a0.
++  LeaveInterpreterFrame(masm, t0);
++  __ Jump(ra);
++
++  __ bind(&stack_check_interrupt);
++  // Modify the bytecode offset in the stack to be kFunctionEntryBytecodeOffset
++  // for the call to the StackGuard.
++  __ li(kInterpreterBytecodeOffsetRegister,
++        Operand(Smi::FromInt(BytecodeArray::kHeaderSize - kHeapObjectTag +
++                             kFunctionEntryBytecodeOffset)));
++  __ St_d(kInterpreterBytecodeOffsetRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++  __ CallRuntime(Runtime::kStackGuard);
++
++  // After the call, restore the bytecode array, bytecode offset and accumulator
++  // registers again. Also, restore the bytecode offset in the stack to its
++  // previous value.
++  __ Ld_d(kInterpreterBytecodeArrayRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
++  __ li(kInterpreterBytecodeOffsetRegister,
++        Operand(BytecodeArray::kHeaderSize - kHeapObjectTag));
++  __ LoadRoot(kInterpreterAccumulatorRegister, RootIndex::kUndefinedValue);
++
++  __ SmiTag(a5, kInterpreterBytecodeOffsetRegister);
++  __ St_d(a5, MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++
++  __ jmp(&after_stack_check_interrupt);
++
++  __ bind(&optimized_code_slot_not_empty);
++  Label maybe_has_optimized_code;
++  // Check if optimized code marker is actually a weak reference to the
++  // optimized code as opposed to an optimization marker.
++  __ JumpIfNotSmi(optimized_code_entry, &maybe_has_optimized_code, t7);
++  MaybeOptimizeCode(masm, feedback_vector, optimized_code_entry);
++  // Fall through if there's no runnable optimized code.
++  __ jmp(&not_optimized);
++
++  __ bind(&maybe_has_optimized_code);
++  // Load code entry from the weak reference, if it was cleared, resume
++  // execution of unoptimized code.
++  __ LoadWeakValue(optimized_code_entry, optimized_code_entry, &not_optimized);
++  TailCallOptimizedCodeSlot(masm, optimized_code_entry, t3, a5);
++
++  __ bind(&compile_lazy);
++  GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy);
++  // Unreachable code.
++  __ break_(0xCC);
++
++  __ bind(&stack_overflow);
++  __ CallRuntime(Runtime::kThrowStackOverflow);
++  // Unreachable code.
++  __ break_(0xCC);
++}
++
++static void Generate_InterpreterPushArgs(MacroAssembler* masm,
++                                         Register num_args, Register index,
++                                         Register scratch, Register scratch2) {
++  // Find the address of the last argument.
++  __ mov(scratch2, num_args);
++  __ slli_d(scratch2, scratch2, kPointerSizeLog2);
++  __ Sub_d(scratch2, index, Operand(scratch2));
++
++  // Push the arguments.
++  Label loop_header, loop_check;
++  __ Branch(&loop_check);
++  __ bind(&loop_header);
++  __ Ld_d(scratch, MemOperand(index, 0));
++  __ Add_d(index, index, Operand(-kPointerSize));
++  __ push(scratch);
++  __ bind(&loop_check);
++  __ Branch(&loop_header, hi, index, Operand(scratch2));
++}
++
++// static
++void Builtins::Generate_InterpreterPushArgsThenCallImpl(
++    MacroAssembler* masm, ConvertReceiverMode receiver_mode,
++    InterpreterPushArgsMode mode) {
++  DCHECK(mode != InterpreterPushArgsMode::kArrayFunction);
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a2 : the address of the first argument to be pushed. Subsequent
++  //          arguments should be consecutive above this, in the same order as
++  //          they are to be pushed onto the stack.
++  //  -- a1 : the target to call (can be any Object).
++  // -----------------------------------
++  Label stack_overflow;
++
++  __ Add_d(a3, a0, Operand(1));  // Add one for receiver.
++
++  // Push "undefined" as the receiver arg if we need to.
++  if (receiver_mode == ConvertReceiverMode::kNullOrUndefined) {
++    __ PushRoot(RootIndex::kUndefinedValue);
++    __ Sub_d(a3, a3, Operand(1));  // Subtract one for receiver.
++  }
++
++  Generate_StackOverflowCheck(masm, a3, a4, t0, &stack_overflow);
++
++  // This function modifies a2, t0 and a4.
++  Generate_InterpreterPushArgs(masm, a3, a2, a4, t0);
++
++  if (mode == InterpreterPushArgsMode::kWithFinalSpread) {
++    __ Pop(a2);                    // Pass the spread in a register
++    __ Sub_d(a0, a0, Operand(1));  // Subtract one for spread
++  }
++
++  // Call the target.
++  if (mode == InterpreterPushArgsMode::kWithFinalSpread) {
++    __ Jump(BUILTIN_CODE(masm->isolate(), CallWithSpread),
++            RelocInfo::CODE_TARGET);
++  } else {
++    __ Jump(masm->isolate()->builtins()->Call(ConvertReceiverMode::kAny),
++            RelocInfo::CODE_TARGET);
++  }
++
++  __ bind(&stack_overflow);
++  {
++    __ TailCallRuntime(Runtime::kThrowStackOverflow);
++    // Unreachable code.
++    __ break_(0xCC);
++  }
++}
++
++// static
++void Builtins::Generate_InterpreterPushArgsThenConstructImpl(
++    MacroAssembler* masm, InterpreterPushArgsMode mode) {
++  // ----------- S t a t e -------------
++  // -- a0 : argument count (not including receiver)
++  // -- a3 : new target
++  // -- a1 : constructor to call
++  // -- a2 : allocation site feedback if available, undefined otherwise.
++  // -- a4 : address of the first argument
++  // -----------------------------------
++  Label stack_overflow;
++
++  // Push a slot for the receiver.
++  __ push(zero_reg);
++
++  Generate_StackOverflowCheck(masm, a0, a5, t0, &stack_overflow);
++
++  // This function modifies t0, a4 and a5.
++  Generate_InterpreterPushArgs(masm, a0, a4, a5, t0);
++
++  if (mode == InterpreterPushArgsMode::kWithFinalSpread) {
++    __ Pop(a2);                    // Pass the spread in a register
++    __ Sub_d(a0, a0, Operand(1));  // Subtract one for spread
++  } else {
++    __ AssertUndefinedOrAllocationSite(a2, t0);
++  }
++
++  if (mode == InterpreterPushArgsMode::kArrayFunction) {
++    __ AssertFunction(a1);
++
++    // Tail call to the function-specific construct stub (still in the caller
++    // context at this point).
++    __ Jump(BUILTIN_CODE(masm->isolate(), ArrayConstructorImpl),
++            RelocInfo::CODE_TARGET);
++  } else if (mode == InterpreterPushArgsMode::kWithFinalSpread) {
++    // Call the constructor with a0, a1, and a3 unmodified.
++    __ Jump(BUILTIN_CODE(masm->isolate(), ConstructWithSpread),
++            RelocInfo::CODE_TARGET);
++  } else {
++    DCHECK_EQ(InterpreterPushArgsMode::kOther, mode);
++    // Call the constructor with a0, a1, and a3 unmodified.
++    __ Jump(BUILTIN_CODE(masm->isolate(), Construct), RelocInfo::CODE_TARGET);
++  }
++
++  __ bind(&stack_overflow);
++  {
++    __ TailCallRuntime(Runtime::kThrowStackOverflow);
++    // Unreachable code.
++    __ break_(0xCC);
++  }
++}
++
++static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) {
++  // Set the return address to the correct point in the interpreter entry
++  // trampoline.
++  Label builtin_trampoline, trampoline_loaded;
++  Smi interpreter_entry_return_pc_offset(
++      masm->isolate()->heap()->interpreter_entry_return_pc_offset());
++  DCHECK_NE(interpreter_entry_return_pc_offset, Smi::zero());
++
++  // If the SFI function_data is an InterpreterData, the function will have a
++  // custom copy of the interpreter entry trampoline for profiling. If so,
++  // get the custom trampoline, otherwise grab the entry address of the global
++  // trampoline.
++  __ Ld_d(t0, MemOperand(fp, StandardFrameConstants::kFunctionOffset));
++  __ Ld_d(t0, FieldMemOperand(t0, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_d(t0, FieldMemOperand(t0, SharedFunctionInfo::kFunctionDataOffset));
++  __ GetObjectType(t0, kInterpreterDispatchTableRegister,
++                   kInterpreterDispatchTableRegister);
++  __ Branch(&builtin_trampoline, ne, kInterpreterDispatchTableRegister,
++            Operand(INTERPRETER_DATA_TYPE));
++
++  __ Ld_d(t0,
++          FieldMemOperand(t0, InterpreterData::kInterpreterTrampolineOffset));
++  __ Add_d(t0, t0, Operand(Code::kHeaderSize - kHeapObjectTag));
++  __ Branch(&trampoline_loaded);
++
++  __ bind(&builtin_trampoline);
++  __ li(t0, ExternalReference::
++                address_of_interpreter_entry_trampoline_instruction_start(
++                    masm->isolate()));
++  __ Ld_d(t0, MemOperand(t0, 0));
++
++  __ bind(&trampoline_loaded);
++  __ Add_d(ra, t0, Operand(interpreter_entry_return_pc_offset.value()));
++
++  // Initialize the dispatch table register.
++  __ li(kInterpreterDispatchTableRegister,
++        ExternalReference::interpreter_dispatch_table_address(masm->isolate()));
++
++  // Get the bytecode array pointer from the frame.
++  __ Ld_d(kInterpreterBytecodeArrayRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
++
++  if (FLAG_debug_code) {
++    // Check function data field is actually a BytecodeArray object.
++    __ SmiTst(kInterpreterBytecodeArrayRegister, kScratchReg);
++    __ Assert(ne,
++              AbortReason::kFunctionDataShouldBeBytecodeArrayOnInterpreterEntry,
++              kScratchReg, Operand(zero_reg));
++    __ GetObjectType(kInterpreterBytecodeArrayRegister, a1, a1);
++    __ Assert(eq,
++              AbortReason::kFunctionDataShouldBeBytecodeArrayOnInterpreterEntry,
++              a1, Operand(BYTECODE_ARRAY_TYPE));
++  }
++
++  // Get the target bytecode offset from the frame.
++  __ SmiUntag(kInterpreterBytecodeOffsetRegister,
++              MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++
++  if (FLAG_debug_code) {
++    Label okay;
++    __ Branch(&okay, ge, kInterpreterBytecodeOffsetRegister,
++              Operand(BytecodeArray::kHeaderSize - kHeapObjectTag));
++    // Unreachable code.
++    __ break_(0xCC);
++    __ bind(&okay);
++  }
++
++  // Dispatch to the target bytecode.
++  __ Add_d(a1, kInterpreterBytecodeArrayRegister,
++           kInterpreterBytecodeOffsetRegister);
++  __ Ld_bu(a7, MemOperand(a1, 0));
++  __ Alsl_d(a1, a7, kInterpreterDispatchTableRegister, kPointerSizeLog2, t7);
++  __ Ld_d(kJavaScriptCallCodeStartRegister, MemOperand(a1, 0));
++  __ Jump(kJavaScriptCallCodeStartRegister);
++}
++
++void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) {
++  // Advance the current bytecode offset stored within the given interpreter
++  // stack frame. This simulates what all bytecode handlers do upon completion
++  // of the underlying operation.
++  __ Ld_d(kInterpreterBytecodeArrayRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
++  __ Ld_d(kInterpreterBytecodeOffsetRegister,
++          MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++  __ SmiUntag(kInterpreterBytecodeOffsetRegister);
++
++  Label enter_bytecode, function_entry_bytecode;
++  __ Branch(&function_entry_bytecode, eq, kInterpreterBytecodeOffsetRegister,
++            Operand(BytecodeArray::kHeaderSize - kHeapObjectTag +
++                    kFunctionEntryBytecodeOffset));
++
++  // Load the current bytecode.
++  __ Add_d(a1, kInterpreterBytecodeArrayRegister,
++           kInterpreterBytecodeOffsetRegister);
++  __ Ld_bu(a1, MemOperand(a1, 0));
++
++  // Advance to the next bytecode.
++  Label if_return;
++  AdvanceBytecodeOffsetOrReturn(masm, kInterpreterBytecodeArrayRegister,
++                                kInterpreterBytecodeOffsetRegister, a1, a2, a3,
++                                &if_return);
++
++  __ bind(&enter_bytecode);
++  // Convert new bytecode offset to a Smi and save in the stackframe.
++  __ SmiTag(a2, kInterpreterBytecodeOffsetRegister);
++  __ St_d(a2, MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
++
++  Generate_InterpreterEnterBytecode(masm);
++
++  __ bind(&function_entry_bytecode);
++  // If the code deoptimizes during the implicit function entry stack interrupt
++  // check, it will have a bailout ID of kFunctionEntryBytecodeOffset, which is
++  // not a valid bytecode offset. Detect this case and advance to the first
++  // actual bytecode.
++  __ li(kInterpreterBytecodeOffsetRegister,
++        Operand(BytecodeArray::kHeaderSize - kHeapObjectTag));
++  __ Branch(&enter_bytecode);
++
++  // We should never take the if_return path.
++  __ bind(&if_return);
++  __ Abort(AbortReason::kInvalidBytecodeAdvance);
++}
++
++void Builtins::Generate_InterpreterEnterBytecodeDispatch(MacroAssembler* masm) {
++  Generate_InterpreterEnterBytecode(masm);
++}
++
++namespace {
++void Generate_ContinueToBuiltinHelper(MacroAssembler* masm,
++                                      bool java_script_builtin,
++                                      bool with_result) {
++  const RegisterConfiguration* config(RegisterConfiguration::Default());
++  int allocatable_register_count = config->num_allocatable_general_registers();
++  if (with_result) {
++    // Overwrite the hole inserted by the deoptimizer with the return value from
++    // the LAZY deopt point.
++    __ St_d(a0,
++            MemOperand(
++                sp, config->num_allocatable_general_registers() * kPointerSize +
++                        BuiltinContinuationFrameConstants::kFixedFrameSize));
++  }
++  for (int i = allocatable_register_count - 1; i >= 0; --i) {
++    int code = config->GetAllocatableGeneralCode(i);
++    __ Pop(Register::from_code(code));
++    if (java_script_builtin && code == kJavaScriptCallArgCountRegister.code()) {
++      __ SmiUntag(Register::from_code(code));
++    }
++  }
++  __ Ld_d(
++      fp,
++      MemOperand(sp, BuiltinContinuationFrameConstants::kFixedFrameSizeFromFp));
++  // Load builtin index (stored as a Smi) and use it to get the builtin start
++  // address from the builtins table.
++  __ Pop(t0);
++  __ Add_d(sp, sp,
++           Operand(BuiltinContinuationFrameConstants::kFixedFrameSizeFromFp));
++  __ Pop(ra);
++  __ LoadEntryFromBuiltinIndex(t0);
++  __ Jump(t0);
++}
++}  // namespace
++
++void Builtins::Generate_ContinueToCodeStubBuiltin(MacroAssembler* masm) {
++  Generate_ContinueToBuiltinHelper(masm, false, false);
++}
++
++void Builtins::Generate_ContinueToCodeStubBuiltinWithResult(
++    MacroAssembler* masm) {
++  Generate_ContinueToBuiltinHelper(masm, false, true);
++}
++
++void Builtins::Generate_ContinueToJavaScriptBuiltin(MacroAssembler* masm) {
++  Generate_ContinueToBuiltinHelper(masm, true, false);
++}
++
++void Builtins::Generate_ContinueToJavaScriptBuiltinWithResult(
++    MacroAssembler* masm) {
++  Generate_ContinueToBuiltinHelper(masm, true, true);
++}
++
++void Builtins::Generate_NotifyDeoptimized(MacroAssembler* masm) {
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ CallRuntime(Runtime::kNotifyDeoptimized);
++  }
++
++  DCHECK_EQ(kInterpreterAccumulatorRegister.code(), a0.code());
++  __ Ld_d(a0, MemOperand(sp, 0 * kPointerSize));
++  __ Add_d(sp, sp, Operand(1 * kPointerSize));  // Remove state.
++  __ Ret();
++}
++
++void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ CallRuntime(Runtime::kCompileForOnStackReplacement);
++  }
++
++  // If the code object is null, just return to the caller.
++  __ Ret(eq, a0, Operand(Smi::zero()));
++
++  // Drop the handler frame that is be sitting on top of the actual
++  // JavaScript frame. This is the case then OSR is triggered from bytecode.
++  __ LeaveFrame(StackFrame::STUB);
++
++  // Load deoptimization data from the code object.
++  // <deopt_data> = <code>[#deoptimization_data_offset]
++  __ Ld_d(a1, MemOperand(a0, Code::kDeoptimizationDataOffset - kHeapObjectTag));
++
++  // Load the OSR entrypoint offset from the deoptimization data.
++  // <osr_offset> = <deopt_data>[#header_size + #osr_pc_offset]
++  __ SmiUntag(a1, MemOperand(a1, FixedArray::OffsetOfElementAt(
++                                     DeoptimizationData::kOsrPcOffsetIndex) -
++                                     kHeapObjectTag));
++
++  // Compute the target address = code_obj + header_size + osr_offset
++  // <entry_addr> = <code_obj> + #header_size + <osr_offset>
++  __ Add_d(a0, a0, a1);
++  __ addi_d(ra, a0, Code::kHeaderSize - kHeapObjectTag);
++
++  // And "return" to the OSR entry point of the function.
++  __ Ret();
++}
++
++// static
++void Builtins::Generate_FunctionPrototypeApply(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0    : argc
++  //  -- sp[0] : argArray
++  //  -- sp[4] : thisArg
++  //  -- sp[8] : receiver
++  // -----------------------------------
++
++  Register argc = a0;
++  Register arg_array = a2;
++  Register receiver = a1;
++  Register this_arg = a5;
++  Register undefined_value = a3;
++  Register scratch = a4;
++
++  __ LoadRoot(undefined_value, RootIndex::kUndefinedValue);
++
++  // 1. Load receiver into a1, argArray into a2 (if present), remove all
++  // arguments from the stack (including the receiver), and push thisArg (if
++  // present) instead.
++  {
++    // Claim (2 - argc) dummy arguments form the stack, to put the stack in a
++    // consistent state for a simple pop operation.
++
++    __ Sub_d(sp, sp, Operand(2 * kPointerSize));
++    __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7);
++    __ mov(scratch, argc);
++    __ Pop(this_arg, arg_array);                   // Overwrite argc
++    __ Movz(arg_array, undefined_value, scratch);  // if argc == 0
++    __ Movz(this_arg, undefined_value, scratch);   // if argc == 0
++    __ Sub_d(scratch, scratch, Operand(1));
++    __ Movz(arg_array, undefined_value, scratch);  // if argc == 1
++    __ Ld_d(receiver, MemOperand(sp, 0));
++    __ St_d(this_arg, MemOperand(sp, 0));
++  }
++
++  // ----------- S t a t e -------------
++  //  -- a2    : argArray
++  //  -- a1    : receiver
++  //  -- a3    : undefined root value
++  //  -- sp[0] : thisArg
++  // -----------------------------------
++
++  // 2. We don't need to check explicitly for callable receiver here,
++  // since that's the first thing the Call/CallWithArrayLike builtins
++  // will do.
++
++  // 3. Tail call with no arguments if argArray is null or undefined.
++  Label no_arguments;
++  __ JumpIfRoot(arg_array, RootIndex::kNullValue, &no_arguments);
++  __ Branch(&no_arguments, eq, arg_array, Operand(undefined_value));
++
++  // 4a. Apply the receiver to the given argArray.
++  __ Jump(BUILTIN_CODE(masm->isolate(), CallWithArrayLike),
++          RelocInfo::CODE_TARGET);
++
++  // 4b. The argArray is either null or undefined, so we tail call without any
++  // arguments to the receiver.
++  __ bind(&no_arguments);
++  {
++    __ mov(a0, zero_reg);
++    DCHECK(receiver == a1);
++    __ Jump(masm->isolate()->builtins()->Call(), RelocInfo::CODE_TARGET);
++  }
++}
++
++// static
++void Builtins::Generate_FunctionPrototypeCall(MacroAssembler* masm) {
++  // 1. Make sure we have at least one argument.
++  // a0: actual number of arguments
++  {
++    Label done;
++    __ Branch(&done, ne, a0, Operand(zero_reg));
++    __ PushRoot(RootIndex::kUndefinedValue);
++    __ Add_d(a0, a0, Operand(1));
++    __ bind(&done);
++  }
++
++  // 2. Get the function to call (passed as receiver) from the stack.
++  // a0: actual number of arguments
++  __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7);
++  __ Ld_d(a1, MemOperand(kScratchReg, 0));
++
++  // 3. Shift arguments and return address one slot down on the stack
++  //    (overwriting the original receiver).  Adjust argument count to make
++  //    the original first argument the new receiver.
++  // a0: actual number of arguments
++  // a1: function
++  {
++    Label loop;
++    // Calculate the copy start address (destination). Copy end address is sp.
++    __ Alsl_d(a2, a0, sp, kPointerSizeLog2, t7);
++
++    __ bind(&loop);
++    __ Ld_d(kScratchReg, MemOperand(a2, -kPointerSize));
++    __ St_d(kScratchReg, MemOperand(a2, 0));
++    __ Sub_d(a2, a2, Operand(kPointerSize));
++    __ Branch(&loop, ne, a2, Operand(sp));
++    // Adjust the actual number of arguments and remove the top element
++    // (which is a copy of the last argument).
++    __ Sub_d(a0, a0, Operand(1));
++    __ Pop();
++  }
++
++  // 4. Call the callable.
++  __ Jump(masm->isolate()->builtins()->Call(), RelocInfo::CODE_TARGET);
++}
++
++void Builtins::Generate_ReflectApply(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0     : argc
++  //  -- sp[0]  : argumentsList  (if argc ==3)
++  //  -- sp[4]  : thisArgument   (if argc >=2)
++  //  -- sp[8]  : target         (if argc >=1)
++  //  -- sp[12] : receiver
++  // -----------------------------------
++
++  Register argc = a0;
++  Register arguments_list = a2;
++  Register target = a1;
++  Register this_argument = a5;
++  Register undefined_value = a3;
++  Register scratch = a4;
++
++  __ LoadRoot(undefined_value, RootIndex::kUndefinedValue);
++
++  // 1. Load target into a1 (if present), argumentsList into a2 (if present),
++  // remove all arguments from the stack (including the receiver), and push
++  // thisArgument (if present) instead.
++  {
++    // Claim (3 - argc) dummy arguments form the stack, to put the stack in a
++    // consistent state for a simple pop operation.
++
++    __ Sub_d(sp, sp, Operand(3 * kPointerSize));
++    __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7);
++    __ mov(scratch, argc);
++    __ Pop(target, this_argument, arguments_list);
++    __ Movz(arguments_list, undefined_value, scratch);  // if argc == 0
++    __ Movz(this_argument, undefined_value, scratch);   // if argc == 0
++    __ Movz(target, undefined_value, scratch);          // if argc == 0
++    __ Sub_d(scratch, scratch, Operand(1));
++    __ Movz(arguments_list, undefined_value, scratch);  // if argc == 1
++    __ Movz(this_argument, undefined_value, scratch);   // if argc == 1
++    __ Sub_d(scratch, scratch, Operand(1));
++    __ Movz(arguments_list, undefined_value, scratch);  // if argc == 2
++
++    __ St_d(this_argument, MemOperand(sp, 0));  // Overwrite receiver
++  }
++
++  // ----------- S t a t e -------------
++  //  -- a2    : argumentsList
++  //  -- a1    : target
++  //  -- a3    : undefined root value
++  //  -- sp[0] : thisArgument
++  // -----------------------------------
++
++  // 2. We don't need to check explicitly for callable target here,
++  // since that's the first thing the Call/CallWithArrayLike builtins
++  // will do.
++
++  // 3. Apply the target to the given argumentsList.
++  __ Jump(BUILTIN_CODE(masm->isolate(), CallWithArrayLike),
++          RelocInfo::CODE_TARGET);
++}
++
++void Builtins::Generate_ReflectConstruct(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0     : argc
++  //  -- sp[0]  : new.target (optional) (dummy value if argc <= 2)
++  //  -- sp[4]  : argumentsList         (dummy value if argc <= 1)
++  //  -- sp[8]  : target                (dummy value if argc == 0)
++  //  -- sp[12] : receiver
++  // -----------------------------------
++  Register argc = a0;
++  Register arguments_list = a2;
++  Register target = a1;
++  Register new_target = a3;
++  Register undefined_value = a4;
++  Register scratch = a5;
++
++  __ LoadRoot(undefined_value, RootIndex::kUndefinedValue);
++
++  // 1. Load target into a1 (if present), argumentsList into a2 (if present),
++  // new.target into a3 (if present, otherwise use target), remove all
++  // arguments from the stack (including the receiver), and push thisArgument
++  // (if present) instead.
++  {
++    // Claim (3 - argc) dummy arguments form the stack, to put the stack in a
++    // consistent state for a simple pop operation.
++
++    __ Sub_d(sp, sp, Operand(3 * kPointerSize));
++    __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7);
++    __ mov(scratch, argc);
++    __ Pop(target, arguments_list, new_target);
++    __ Movz(arguments_list, undefined_value, scratch);  // if argc == 0
++    __ Movz(new_target, undefined_value, scratch);      // if argc == 0
++    __ Movz(target, undefined_value, scratch);          // if argc == 0
++    __ Sub_d(scratch, scratch, Operand(1));
++    __ Movz(arguments_list, undefined_value, scratch);  // if argc == 1
++    __ Movz(new_target, target, scratch);               // if argc == 1
++    __ Sub_d(scratch, scratch, Operand(1));
++    __ Movz(new_target, target, scratch);  // if argc == 2
++
++    __ St_d(undefined_value, MemOperand(sp, 0));  // Overwrite receiver
++  }
++
++  // ----------- S t a t e -------------
++  //  -- a2    : argumentsList
++  //  -- a1    : target
++  //  -- a3    : new.target
++  //  -- sp[0] : receiver (undefined)
++  // -----------------------------------
++
++  // 2. We don't need to check explicitly for constructor target here,
++  // since that's the first thing the Construct/ConstructWithArrayLike
++  // builtins will do.
++
++  // 3. We don't need to check explicitly for constructor new.target here,
++  // since that's the second thing the Construct/ConstructWithArrayLike
++  // builtins will do.
++
++  // 4. Construct the target with the given new.target and argumentsList.
++  __ Jump(BUILTIN_CODE(masm->isolate(), ConstructWithArrayLike),
++          RelocInfo::CODE_TARGET);
++}
++
++static void EnterArgumentsAdaptorFrame(MacroAssembler* masm) {
++  __ SmiTag(a0);
++  __ li(a4, Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
++  __ Push(ra, fp, a4, a1, a0);
++  __ Push(Smi::zero());  // Padding.
++  __ Add_d(fp, sp,
++           Operand(ArgumentsAdaptorFrameConstants::kFixedFrameSizeFromFp));
++}
++
++static void LeaveArgumentsAdaptorFrame(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : result being passed through
++  // -----------------------------------
++  // Get the number of arguments passed (as a smi), tear down the frame and
++  // then tear down the parameters.
++  __ Ld_d(a1, MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
++  __ mov(sp, fp);
++  __ Pop(ra, fp);
++  __ SmiScale(a4, a1, kPointerSizeLog2);
++  __ Add_d(sp, sp, a4);
++  // Adjust for the receiver.
++  __ Add_d(sp, sp, Operand(kPointerSize));
++}
++
++// static
++void Builtins::Generate_CallOrConstructVarargs(MacroAssembler* masm,
++                                               Handle<Code> code) {
++  // ----------- S t a t e -------------
++  //  -- a1 : target
++  //  -- a0 : number of parameters on the stack (not including the receiver)
++  //  -- a2 : arguments list (a FixedArray)
++  //  -- a4 : len (number of elements to push from args)
++  //  -- a3 : new.target (for [[Construct]])
++  // -----------------------------------
++  if (masm->emit_debug_code()) {
++    // Allow a2 to be a FixedArray, or a FixedDoubleArray if a4 == 0.
++    Label ok, fail;
++    __ AssertNotSmi(a2);
++    __ GetObjectType(a2, t8, t8);
++    __ Branch(&ok, eq, t8, Operand(FIXED_ARRAY_TYPE));
++    __ Branch(&fail, ne, t8, Operand(FIXED_DOUBLE_ARRAY_TYPE));
++    __ Branch(&ok, eq, a4, Operand(zero_reg));
++    // Fall through.
++    __ bind(&fail);
++    __ Abort(AbortReason::kOperandIsNotAFixedArray);
++
++    __ bind(&ok);
++  }
++
++  Register args = a2;
++  Register len = a4;
++
++  // Check for stack overflow.
++  Label stack_overflow;
++  Generate_StackOverflowCheck(masm, len, kScratchReg, a5, &stack_overflow);
++
++  // Push arguments onto the stack (thisArgument is already on the stack).
++  {
++    Label done, push, loop;
++    Register src = a6;
++    Register scratch = len;
++
++    __ addi_d(src, args, FixedArray::kHeaderSize - kHeapObjectTag);
++    __ Add_d(a0, a0, len);  // The 'len' argument for Call() or Construct().
++    __ Branch(&done, eq, len, Operand(zero_reg));
++    __ slli_d(scratch, len, kPointerSizeLog2);
++    __ Sub_d(scratch, sp, Operand(scratch));
++    __ LoadRoot(t1, RootIndex::kTheHoleValue);
++    __ bind(&loop);
++    __ Ld_d(a5, MemOperand(src, 0));
++    __ Branch(&push, ne, a5, Operand(t1));
++    __ LoadRoot(a5, RootIndex::kUndefinedValue);
++    __ bind(&push);
++    __ addi_d(src, src, kPointerSize);
++    __ Push(a5);
++    __ Branch(&loop, ne, scratch, Operand(sp));
++    __ bind(&done);
++  }
++
++  // Tail-call to the actual Call or Construct builtin.
++  __ Jump(code, RelocInfo::CODE_TARGET);
++
++  __ bind(&stack_overflow);
++  __ TailCallRuntime(Runtime::kThrowStackOverflow);
++}
++
++// static
++void Builtins::Generate_CallOrConstructForwardVarargs(MacroAssembler* masm,
++                                                      CallOrConstructMode mode,
++                                                      Handle<Code> code) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a3 : the new.target (for [[Construct]] calls)
++  //  -- a1 : the target to call (can be any Object)
++  //  -- a2 : start index (to support rest parameters)
++  // -----------------------------------
++
++  // Check if new.target has a [[Construct]] internal method.
++  if (mode == CallOrConstructMode::kConstruct) {
++    Label new_target_constructor, new_target_not_constructor;
++    __ JumpIfSmi(a3, &new_target_not_constructor);
++    __ Ld_d(t1, FieldMemOperand(a3, HeapObject::kMapOffset));
++    __ Ld_bu(t1, FieldMemOperand(t1, Map::kBitFieldOffset));
++    __ And(t1, t1, Operand(Map::Bits1::IsConstructorBit::kMask));
++    __ Branch(&new_target_constructor, ne, t1, Operand(zero_reg));
++    __ bind(&new_target_not_constructor);
++    {
++      FrameScope scope(masm, StackFrame::MANUAL);
++      __ EnterFrame(StackFrame::INTERNAL);
++      __ Push(a3);
++      __ CallRuntime(Runtime::kThrowNotConstructor);
++    }
++    __ bind(&new_target_constructor);
++  }
++
++  // Check if we have an arguments adaptor frame below the function frame.
++  Label arguments_adaptor, arguments_done;
++  __ Ld_d(a6, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
++  __ Ld_d(a7, MemOperand(a6, CommonFrameConstants::kContextOrFrameTypeOffset));
++  __ Branch(&arguments_adaptor, eq, a7,
++            Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
++  {
++    __ Ld_d(a7, MemOperand(fp, StandardFrameConstants::kFunctionOffset));
++    __ Ld_d(a7, FieldMemOperand(a7, JSFunction::kSharedFunctionInfoOffset));
++    __ Ld_hu(a7, FieldMemOperand(
++                     a7, SharedFunctionInfo::kFormalParameterCountOffset));
++    __ mov(a6, fp);
++  }
++  __ Branch(&arguments_done);
++  __ bind(&arguments_adaptor);
++  {
++    // Just get the length from the ArgumentsAdaptorFrame.
++    __ SmiUntag(a7,
++                MemOperand(a6, ArgumentsAdaptorFrameConstants::kLengthOffset));
++  }
++  __ bind(&arguments_done);
++
++  Label stack_done, stack_overflow;
++  __ Sub_w(a7, a7, a2);
++  __ Branch(&stack_done, le, a7, Operand(zero_reg));
++  {
++    // Check for stack overflow.
++    Generate_StackOverflowCheck(masm, a7, a4, a5, &stack_overflow);
++
++    // Forward the arguments from the caller frame.
++    {
++      Label loop;
++      __ Add_d(a0, a0, a7);
++      __ bind(&loop);
++      {
++        __ Alsl_d(kScratchReg, a7, a6, kPointerSizeLog2, t7);
++        __ Ld_d(kScratchReg, MemOperand(kScratchReg, 1 * kPointerSize));
++        __ push(kScratchReg);
++        __ Sub_w(a7, a7, Operand(1));
++        __ Branch(&loop, ne, a7, Operand(zero_reg));
++      }
++    }
++  }
++  __ Branch(&stack_done);
++  __ bind(&stack_overflow);
++  __ TailCallRuntime(Runtime::kThrowStackOverflow);
++  __ bind(&stack_done);
++
++  // Tail-call to the {code} handler.
++  __ Jump(code, RelocInfo::CODE_TARGET);
++}
++
++// static
++void Builtins::Generate_CallFunction(MacroAssembler* masm,
++                                     ConvertReceiverMode mode) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSFunction)
++  // -----------------------------------
++  __ AssertFunction(a1);
++
++  // See ES6 section 9.2.1 [[Call]] ( thisArgument, argumentsList)
++  // Check that function is not a "classConstructor".
++  Label class_constructor;
++  __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_wu(a3, FieldMemOperand(a2, SharedFunctionInfo::kFlagsOffset));
++  __ And(kScratchReg, a3,
++         Operand(SharedFunctionInfo::IsClassConstructorBit::kMask));
++  __ Branch(&class_constructor, ne, kScratchReg, Operand(zero_reg));
++
++  // Enter the context of the function; ToObject has to run in the function
++  // context, and we also need to take the global proxy from the function
++  // context in case of conversion.
++  __ Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
++  // We need to convert the receiver for non-native sloppy mode functions.
++  Label done_convert;
++  __ Ld_wu(a3, FieldMemOperand(a2, SharedFunctionInfo::kFlagsOffset));
++  __ And(kScratchReg, a3,
++         Operand(SharedFunctionInfo::IsNativeBit::kMask |
++                 SharedFunctionInfo::IsStrictBit::kMask));
++  __ Branch(&done_convert, ne, kScratchReg, Operand(zero_reg));
++  {
++    // ----------- S t a t e -------------
++    //  -- a0 : the number of arguments (not including the receiver)
++    //  -- a1 : the function to call (checked to be a JSFunction)
++    //  -- a2 : the shared function info.
++    //  -- cp : the function context.
++    // -----------------------------------
++
++    if (mode == ConvertReceiverMode::kNullOrUndefined) {
++      // Patch receiver to global proxy.
++      __ LoadGlobalProxy(a3);
++    } else {
++      Label convert_to_object, convert_receiver;
++      __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7);
++      __ Ld_d(a3, MemOperand(kScratchReg, 0));
++      __ JumpIfSmi(a3, &convert_to_object);
++      STATIC_ASSERT(LAST_JS_RECEIVER_TYPE == LAST_TYPE);
++      __ GetObjectType(a3, a4, a4);
++      __ Branch(&done_convert, hs, a4, Operand(FIRST_JS_RECEIVER_TYPE));
++      if (mode != ConvertReceiverMode::kNotNullOrUndefined) {
++        Label convert_global_proxy;
++        __ JumpIfRoot(a3, RootIndex::kUndefinedValue, &convert_global_proxy);
++        __ JumpIfNotRoot(a3, RootIndex::kNullValue, &convert_to_object);
++        __ bind(&convert_global_proxy);
++        {
++          // Patch receiver to global proxy.
++          __ LoadGlobalProxy(a3);
++        }
++        __ Branch(&convert_receiver);
++      }
++      __ bind(&convert_to_object);
++      {
++        // Convert receiver using ToObject.
++        // TODO(bmeurer): Inline the allocation here to avoid building the frame
++        // in the fast case? (fall back to AllocateInNewSpace?)
++        FrameScope scope(masm, StackFrame::INTERNAL);
++        __ SmiTag(a0);
++        __ Push(a0, a1);
++        __ mov(a0, a3);
++        __ Push(cp);
++        __ Call(BUILTIN_CODE(masm->isolate(), ToObject),
++                RelocInfo::CODE_TARGET);
++        __ Pop(cp);
++        __ mov(a3, a0);
++        __ Pop(a0, a1);
++        __ SmiUntag(a0);
++      }
++      __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++      __ bind(&convert_receiver);
++    }
++    __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7);
++    __ St_d(a3, MemOperand(kScratchReg, 0));
++  }
++  __ bind(&done_convert);
++
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSFunction)
++  //  -- a2 : the shared function info.
++  //  -- cp : the function context.
++  // -----------------------------------
++
++  __ Ld_hu(
++      a2, FieldMemOperand(a2, SharedFunctionInfo::kFormalParameterCountOffset));
++  __ InvokeFunctionCode(a1, no_reg, a2, a0, JUMP_FUNCTION);
++
++  // The function is a "classConstructor", need to raise an exception.
++  __ bind(&class_constructor);
++  {
++    FrameScope frame(masm, StackFrame::INTERNAL);
++    __ Push(a1);
++    __ CallRuntime(Runtime::kThrowConstructorNonCallableError);
++  }
++}
++
++// static
++void Builtins::Generate_CallBoundFunctionImpl(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSBoundFunction)
++  // -----------------------------------
++  __ AssertBoundFunction(a1);
++
++  // Patch the receiver to [[BoundThis]].
++  {
++    __ Ld_d(kScratchReg,
++            FieldMemOperand(a1, JSBoundFunction::kBoundThisOffset));
++    __ Alsl_d(a4, a0, sp, kPointerSizeLog2, t7);
++    __ St_d(kScratchReg, MemOperand(a4, 0));
++  }
++
++  // Load [[BoundArguments]] into a2 and length of that into a4.
++  __ Ld_d(a2, FieldMemOperand(a1, JSBoundFunction::kBoundArgumentsOffset));
++  __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset));
++
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSBoundFunction)
++  //  -- a2 : the [[BoundArguments]] (implemented as FixedArray)
++  //  -- a4 : the number of [[BoundArguments]]
++  // -----------------------------------
++
++  // Reserve stack space for the [[BoundArguments]].
++  {
++    Label done;
++    __ slli_d(a5, a4, kPointerSizeLog2);
++    __ Sub_d(sp, sp, Operand(a5));
++    // Check the stack for overflow. We are not trying to catch interruptions
++    // (i.e. debug break and preemption) here, so check the "real stack limit".
++    LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit);
++    __ Branch(&done, hs, sp, Operand(kScratchReg));
++    // Restore the stack pointer.
++    __ Add_d(sp, sp, Operand(a5));
++    {
++      FrameScope scope(masm, StackFrame::MANUAL);
++      __ EnterFrame(StackFrame::INTERNAL);
++      __ CallRuntime(Runtime::kThrowStackOverflow);
++    }
++    __ bind(&done);
++  }
++
++  // Relocate arguments down the stack.
++  {
++    Label loop, done_loop;
++    __ mov(a5, zero_reg);
++    __ bind(&loop);
++    __ Branch(&done_loop, gt, a5, Operand(a0));
++    __ Alsl_d(a6, a4, sp, kPointerSizeLog2, t7);
++    __ Ld_d(kScratchReg, MemOperand(a6, 0));
++    __ Alsl_d(a6, a5, sp, kPointerSizeLog2, t7);
++    __ St_d(kScratchReg, MemOperand(a6, 0));
++    __ Add_d(a4, a4, Operand(1));
++    __ Add_d(a5, a5, Operand(1));
++    __ Branch(&loop);
++    __ bind(&done_loop);
++  }
++
++  // Copy [[BoundArguments]] to the stack (below the arguments).
++  {
++    Label loop, done_loop;
++    __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset));
++    __ Add_d(a2, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag));
++    __ bind(&loop);
++    __ Sub_d(a4, a4, Operand(1));
++    __ Branch(&done_loop, lt, a4, Operand(zero_reg));
++    __ Alsl_d(a5, a4, a2, kPointerSizeLog2, t7);
++    __ Ld_d(kScratchReg, MemOperand(a5, 0));
++    __ Alsl_d(a5, a0, sp, kPointerSizeLog2, t7);
++    __ St_d(kScratchReg, MemOperand(a5, 0));
++    __ Add_d(a0, a0, Operand(1));
++    __ Branch(&loop);
++    __ bind(&done_loop);
++  }
++
++  // Call the [[BoundTargetFunction]] via the Call builtin.
++  __ Ld_d(a1, FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset));
++  __ Jump(BUILTIN_CODE(masm->isolate(), Call_ReceiverIsAny),
++          RelocInfo::CODE_TARGET);
++}
++
++// static
++void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the target to call (can be any Object).
++  // -----------------------------------
++
++  Label non_callable, non_smi;
++  __ JumpIfSmi(a1, &non_callable);
++  __ bind(&non_smi);
++  __ GetObjectType(a1, t1, t2);
++  __ Jump(masm->isolate()->builtins()->CallFunction(mode),
++          RelocInfo::CODE_TARGET, eq, t2, Operand(JS_FUNCTION_TYPE));
++  __ Jump(BUILTIN_CODE(masm->isolate(), CallBoundFunction),
++          RelocInfo::CODE_TARGET, eq, t2, Operand(JS_BOUND_FUNCTION_TYPE));
++
++  // Check if target has a [[Call]] internal method.
++  __ Ld_bu(t1, FieldMemOperand(t1, Map::kBitFieldOffset));
++  __ And(t1, t1, Operand(Map::Bits1::IsCallableBit::kMask));
++  __ Branch(&non_callable, eq, t1, Operand(zero_reg));
++
++  __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET, eq,
++          t2, Operand(JS_PROXY_TYPE));
++
++  // 2. Call to something else, which might have a [[Call]] internal method (if
++  // not we raise an exception).
++  // Overwrite the original receiver with the (original) target.
++  __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7);
++  __ St_d(a1, MemOperand(kScratchReg, 0));
++  // Let the "call_as_function_delegate" take care of the rest.
++  __ LoadNativeContextSlot(Context::CALL_AS_FUNCTION_DELEGATE_INDEX, a1);
++  __ Jump(masm->isolate()->builtins()->CallFunction(
++              ConvertReceiverMode::kNotNullOrUndefined),
++          RelocInfo::CODE_TARGET);
++
++  // 3. Call to something that is not callable.
++  __ bind(&non_callable);
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ Push(a1);
++    __ CallRuntime(Runtime::kThrowCalledNonCallable);
++  }
++}
++
++void Builtins::Generate_ConstructFunction(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the constructor to call (checked to be a JSFunction)
++  //  -- a3 : the new target (checked to be a constructor)
++  // -----------------------------------
++  __ AssertConstructor(a1);
++  __ AssertFunction(a1);
++
++  // Calling convention for function specific ConstructStubs require
++  // a2 to contain either an AllocationSite or undefined.
++  __ LoadRoot(a2, RootIndex::kUndefinedValue);
++
++  Label call_generic_stub;
++
++  // Jump to JSBuiltinsConstructStub or JSConstructStubGeneric.
++  __ Ld_d(a4, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_wu(a4, FieldMemOperand(a4, SharedFunctionInfo::kFlagsOffset));
++  __ And(a4, a4, Operand(SharedFunctionInfo::ConstructAsBuiltinBit::kMask));
++  __ Branch(&call_generic_stub, eq, a4, Operand(zero_reg));
++
++  __ Jump(BUILTIN_CODE(masm->isolate(), JSBuiltinsConstructStub),
++          RelocInfo::CODE_TARGET);
++
++  __ bind(&call_generic_stub);
++  __ Jump(BUILTIN_CODE(masm->isolate(), JSConstructStubGeneric),
++          RelocInfo::CODE_TARGET);
++}
++
++// static
++void Builtins::Generate_ConstructBoundFunction(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSBoundFunction)
++  //  -- a3 : the new target (checked to be a constructor)
++  // -----------------------------------
++  __ AssertConstructor(a1);
++  __ AssertBoundFunction(a1);
++
++  // Load [[BoundArguments]] into a2 and length of that into a4.
++  __ Ld_d(a2, FieldMemOperand(a1, JSBoundFunction::kBoundArgumentsOffset));
++  __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset));
++
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the function to call (checked to be a JSBoundFunction)
++  //  -- a2 : the [[BoundArguments]] (implemented as FixedArray)
++  //  -- a3 : the new target (checked to be a constructor)
++  //  -- a4 : the number of [[BoundArguments]]
++  // -----------------------------------
++
++  // Reserve stack space for the [[BoundArguments]].
++  {
++    Label done;
++    __ slli_d(a5, a4, kPointerSizeLog2);
++    __ Sub_d(sp, sp, Operand(a5));
++    // Check the stack for overflow. We are not trying to catch interruptions
++    // (i.e. debug break and preemption) here, so check the "real stack limit".
++    LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit);
++    __ Branch(&done, hs, sp, Operand(kScratchReg));
++    // Restore the stack pointer.
++    __ Add_d(sp, sp, Operand(a5));
++    {
++      FrameScope scope(masm, StackFrame::MANUAL);
++      __ EnterFrame(StackFrame::INTERNAL);
++      __ CallRuntime(Runtime::kThrowStackOverflow);
++    }
++    __ bind(&done);
++  }
++
++  // Relocate arguments down the stack.
++  {
++    Label loop, done_loop;
++    __ mov(a5, zero_reg);
++    __ bind(&loop);
++    __ Branch(&done_loop, ge, a5, Operand(a0));
++    __ Alsl_d(a6, a4, sp, kPointerSizeLog2, t7);
++    __ Ld_d(kScratchReg, MemOperand(a6, 0));
++    __ Alsl_d(a6, a5, sp, kPointerSizeLog2, t7);
++    __ St_d(kScratchReg, MemOperand(a6, 0));
++    __ Add_d(a4, a4, Operand(1));
++    __ Add_d(a5, a5, Operand(1));
++    __ Branch(&loop);
++    __ bind(&done_loop);
++  }
++
++  // Copy [[BoundArguments]] to the stack (below the arguments).
++  {
++    Label loop, done_loop;
++    __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset));
++    __ Add_d(a2, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag));
++    __ bind(&loop);
++    __ Sub_d(a4, a4, Operand(1));
++    __ Branch(&done_loop, lt, a4, Operand(zero_reg));
++    __ Alsl_d(a5, a4, a2, kPointerSizeLog2, t7);
++    __ Ld_d(kScratchReg, MemOperand(a5, 0));
++    __ Alsl_d(a5, a0, sp, kPointerSizeLog2, t7);
++    __ St_d(kScratchReg, MemOperand(a5, 0));
++    __ Add_d(a0, a0, Operand(1));
++    __ Branch(&loop);
++    __ bind(&done_loop);
++  }
++
++  // Patch new.target to [[BoundTargetFunction]] if new.target equals target.
++  {
++    Label skip_load;
++    __ Branch(&skip_load, ne, a1, Operand(a3));
++    __ Ld_d(a3,
++            FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset));
++    __ bind(&skip_load);
++  }
++
++  // Construct the [[BoundTargetFunction]] via the Construct builtin.
++  __ Ld_d(a1, FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset));
++  __ Jump(BUILTIN_CODE(masm->isolate(), Construct), RelocInfo::CODE_TARGET);
++}
++
++// static
++void Builtins::Generate_Construct(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- a0 : the number of arguments (not including the receiver)
++  //  -- a1 : the constructor to call (can be any Object)
++  //  -- a3 : the new target (either the same as the constructor or
++  //          the JSFunction on which new was invoked initially)
++  // -----------------------------------
++
++  // Check if target is a Smi.
++  Label non_constructor, non_proxy;
++  __ JumpIfSmi(a1, &non_constructor);
++
++  // Check if target has a [[Construct]] internal method.
++  __ Ld_d(t1, FieldMemOperand(a1, HeapObject::kMapOffset));
++  __ Ld_bu(t3, FieldMemOperand(t1, Map::kBitFieldOffset));
++  __ And(t3, t3, Operand(Map::Bits1::IsConstructorBit::kMask));
++  __ Branch(&non_constructor, eq, t3, Operand(zero_reg));
++
++  // Dispatch based on instance type.
++  __ Ld_hu(t2, FieldMemOperand(t1, Map::kInstanceTypeOffset));
++  __ Jump(BUILTIN_CODE(masm->isolate(), ConstructFunction),
++          RelocInfo::CODE_TARGET, eq, t2, Operand(JS_FUNCTION_TYPE));
++
++  // Only dispatch to bound functions after checking whether they are
++  // constructors.
++  __ Jump(BUILTIN_CODE(masm->isolate(), ConstructBoundFunction),
++          RelocInfo::CODE_TARGET, eq, t2, Operand(JS_BOUND_FUNCTION_TYPE));
++
++  // Only dispatch to proxies after checking whether they are constructors.
++  __ Branch(&non_proxy, ne, t2, Operand(JS_PROXY_TYPE));
++  __ Jump(BUILTIN_CODE(masm->isolate(), ConstructProxy),
++          RelocInfo::CODE_TARGET);
++
++  // Called Construct on an exotic Object with a [[Construct]] internal method.
++  __ bind(&non_proxy);
++  {
++    // Overwrite the original receiver with the (original) target.
++    __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7);
++    __ St_d(a1, MemOperand(kScratchReg, 0));
++    // Let the "call_as_constructor_delegate" take care of the rest.
++    __ LoadNativeContextSlot(Context::CALL_AS_CONSTRUCTOR_DELEGATE_INDEX, a1);
++    __ Jump(masm->isolate()->builtins()->CallFunction(),
++            RelocInfo::CODE_TARGET);
++  }
++
++  // Called Construct on an Object that doesn't have a [[Construct]] internal
++  // method.
++  __ bind(&non_constructor);
++  __ Jump(BUILTIN_CODE(masm->isolate(), ConstructedNonConstructable),
++          RelocInfo::CODE_TARGET);
++}
++
++void Builtins::Generate_ArgumentsAdaptorTrampoline(MacroAssembler* masm) {
++  // State setup as expected by MacroAssembler::InvokePrologue.
++  // ----------- S t a t e -------------
++  //  -- a0: actual arguments count
++  //  -- a1: function (passed through to callee)
++  //  -- a2: expected arguments count
++  //  -- a3: new target (passed through to callee)
++  // -----------------------------------
++
++  Label invoke, dont_adapt_arguments, stack_overflow;
++
++  Label enough, too_few;
++  __ Branch(&dont_adapt_arguments, eq, a2,
++            Operand(kDontAdaptArgumentsSentinel));
++  // We use Uless as the number of argument should always be greater than 0.
++  __ Branch(&too_few, Uless, a0, Operand(a2));
++
++  {  // Enough parameters: actual >= expected.
++    // a0: actual number of arguments as a smi
++    // a1: function
++    // a2: expected number of arguments
++    // a3: new target (passed through to callee)
++    __ bind(&enough);
++    EnterArgumentsAdaptorFrame(masm);
++    Generate_StackOverflowCheck(masm, a2, a5, kScratchReg, &stack_overflow);
++
++    // Calculate copy start address into a0 and copy end address into a4.
++    __ SmiScale(a0, a0, kPointerSizeLog2);
++    __ Add_d(a0, fp, a0);
++    // Adjust for return address and receiver.
++    __ Add_d(a0, a0, Operand(2 * kPointerSize));
++    // Compute copy end address.
++    __ slli_d(a4, a2, kPointerSizeLog2);
++    __ sub_d(a4, a0, a4);
++
++    // Copy the arguments (including the receiver) to the new stack frame.
++    // a0: copy start address
++    // a1: function
++    // a2: expected number of arguments
++    // a3: new target (passed through to callee)
++    // a4: copy end address
++
++    Label copy;
++    __ bind(&copy);
++    __ Ld_d(a5, MemOperand(a0, 0));
++    __ push(a5);
++    __ addi_d(a0, a0, -kPointerSize);
++    __ Branch(&copy, ge, a0, Operand(a4));
++
++    __ jmp(&invoke);
++  }
++
++  {  // Too few parameters: Actual < expected.
++    __ bind(&too_few);
++    EnterArgumentsAdaptorFrame(masm);
++    Generate_StackOverflowCheck(masm, a2, a5, kScratchReg, &stack_overflow);
++
++    // Calculate copy start address into a0 and copy end address into a7.
++    // a0: actual number of arguments as a smi
++    // a1: function
++    // a2: expected number of arguments
++    // a3: new target (passed through to callee)
++    __ SmiScale(a0, a0, kPointerSizeLog2);
++    __ Add_d(a0, fp, a0);
++    // Adjust for return address and receiver.
++    __ Add_d(a0, a0, Operand(2 * kPointerSize));
++    // Compute copy end address. Also adjust for return address.
++    __ Add_d(a7, fp, kPointerSize);
++
++    // Copy the arguments (including the receiver) to the new stack frame.
++    // a0: copy start address
++    // a1: function
++    // a2: expected number of arguments
++    // a3: new target (passed through to callee)
++    // a7: copy end address
++    Label copy;
++    __ bind(&copy);
++    __ Ld_d(a4,
++            MemOperand(a0, 0));  // Adjusted above for return addr and receiver.
++    __ Sub_d(sp, sp, kPointerSize);
++    __ Sub_d(a0, a0, kPointerSize);
++    __ St_d(a4, MemOperand(sp, 0));
++    __ Branch(&copy, ne, a0, Operand(a7));
++
++    // Fill the remaining expected arguments with undefined.
++    // a1: function
++    // a2: expected number of arguments
++    // a3: new target (passed through to callee)
++    __ LoadRoot(a5, RootIndex::kUndefinedValue);
++    __ slli_d(a6, a2, kPointerSizeLog2);
++    __ Sub_d(a4, fp, Operand(a6));
++    // Adjust for frame.
++    __ Sub_d(a4, a4,
++             Operand(ArgumentsAdaptorFrameConstants::kFixedFrameSizeFromFp +
++                     kPointerSize));
++
++    Label fill;
++    __ bind(&fill);
++    __ Sub_d(sp, sp, kPointerSize);
++    __ St_d(a5, MemOperand(sp, 0));
++    __ Branch(&fill, ne, sp, Operand(a4));
++  }
++
++  // Call the entry point.
++  __ bind(&invoke);
++  __ mov(a0, a2);
++  // a0 : expected number of arguments
++  // a1 : function (passed through to callee)
++  // a3: new target (passed through to callee)
++  static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++  __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset));
++  __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag));
++  __ Call(a2);
++
++  // Store offset of return address for deoptimizer.
++  masm->isolate()->heap()->SetArgumentsAdaptorDeoptPCOffset(masm->pc_offset());
++
++  // Exit frame and return.
++  LeaveArgumentsAdaptorFrame(masm);
++  __ Ret();
++
++  // -------------------------------------------
++  // Don't adapt arguments.
++  // -------------------------------------------
++  __ bind(&dont_adapt_arguments);
++  static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++  __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset));
++  __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag));
++  __ Jump(a2);
++
++  __ bind(&stack_overflow);
++  {
++    FrameScope frame(masm, StackFrame::MANUAL);
++    __ CallRuntime(Runtime::kThrowStackOverflow);
++    __ break_(0xCC);
++  }
++}
++
++void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) {
++  // The function index was put in t0 by the jump table trampoline.
++  // Convert to Smi for the runtime call
++  __ SmiTag(kWasmCompileLazyFuncIndexRegister);
++  {
++    HardAbortScope hard_abort(masm);  // Avoid calls to Abort.
++    FrameScope scope(masm, StackFrame::WASM_COMPILE_LAZY);
++
++    // Save all parameter registers (see wasm-linkage.cc). They might be
++    // overwritten in the runtime call below. We don't have any callee-saved
++    // registers in wasm, so no need to store anything else.
++    constexpr RegList gp_regs = Register::ListOf(a0, a2, a3, a4, a5, a6, a7);
++    constexpr RegList fp_regs =
++        DoubleRegister::ListOf(f2, f4, f6, f8, f10, f12, f14);
++    __ MultiPush(gp_regs);
++    __ MultiPushFPU(fp_regs);
++
++    // Pass instance and function index as an explicit arguments to the runtime
++    // function.
++    __ Push(kWasmInstanceRegister, kWasmCompileLazyFuncIndexRegister);
++    // Initialize the JavaScript context with 0. CEntry will use it to
++    // set the current context on the isolate.
++    __ Move(kContextRegister, Smi::zero());
++    __ CallRuntime(Runtime::kWasmCompileLazy, 2);
++    __ mov(t8, a0);
++
++    // Restore registers.
++    __ MultiPopFPU(fp_regs);
++    __ MultiPop(gp_regs);
++  }
++  // Finally, jump to the entrypoint.
++  __ Jump(t8);
++}
++
++void Builtins::Generate_WasmDebugBreak(MacroAssembler* masm) {
++  HardAbortScope hard_abort(masm);  // Avoid calls to Abort.
++  {
++    FrameScope scope(masm, StackFrame::WASM_DEBUG_BREAK);
++
++    // Save all parameter registers. They might hold live values, we restore
++    // them after the runtime call.
++    __ MultiPush(WasmDebugBreakFrameConstants::kPushedGpRegs);
++    __ MultiPushFPU(WasmDebugBreakFrameConstants::kPushedFpRegs);
++
++    // Initialize the JavaScript context with 0. CEntry will use it to
++    // set the current context on the isolate.
++    __ Move(cp, Smi::zero());
++    __ CallRuntime(Runtime::kWasmDebugBreak, 0);
++
++    // Restore registers.
++    __ MultiPopFPU(WasmDebugBreakFrameConstants::kPushedFpRegs);
++    __ MultiPop(WasmDebugBreakFrameConstants::kPushedGpRegs);
++  }
++  __ Ret();
++}
++
++void Builtins::Generate_CEntry(MacroAssembler* masm, int result_size,
++                               SaveFPRegsMode save_doubles, ArgvMode argv_mode,
++                               bool builtin_exit_frame) {
++  // Called from JavaScript; parameters are on stack as if calling JS function
++  // a0: number of arguments including receiver
++  // a1: pointer to builtin function
++  // fp: frame pointer    (restored after C call)
++  // sp: stack pointer    (restored as callee's sp after C call)
++  // cp: current context  (C callee-saved)
++  //
++  // If argv_mode == kArgvInRegister:
++  // a2: pointer to the first argument
++
++  if (argv_mode == kArgvInRegister) {
++    // Move argv into the correct register.
++    __ mov(s1, a2);
++  } else {
++    // Compute the argv pointer in a callee-saved register.
++    __ Alsl_d(s1, a0, sp, kPointerSizeLog2, t7);
++    __ Sub_d(s1, s1, kPointerSize);
++  }
++
++  // Enter the exit frame that transitions from JavaScript to C++.
++  FrameScope scope(masm, StackFrame::MANUAL);
++  __ EnterExitFrame(
++      save_doubles == kSaveFPRegs, 0,
++      builtin_exit_frame ? StackFrame::BUILTIN_EXIT : StackFrame::EXIT);
++
++  // s0: number of arguments  including receiver (C callee-saved)
++  // s1: pointer to first argument (C callee-saved)
++  // s2: pointer to builtin function (C callee-saved)
++
++  // Prepare arguments for C routine.
++  // a0 = argc
++  __ mov(s0, a0);
++  __ mov(s2, a1);
++
++  // We are calling compiled C/C++ code. a0 and a1 hold our two arguments. We
++  // also need to reserve the 4 argument slots on the stack.
++
++  __ AssertStackIsAligned();
++
++  // a0 = argc, a1 = argv, a2 = isolate
++  __ li(a2, ExternalReference::isolate_address(masm->isolate()));
++  __ mov(a1, s1);
++
++  __ StoreReturnAddressAndCall(s2);
++
++  // Result returned in a0 or a1:a0 - do not destroy these registers!
++
++  // Check result for exception sentinel.
++  Label exception_returned;
++  __ LoadRoot(a4, RootIndex::kException);
++  __ Branch(&exception_returned, eq, a4, Operand(a0));
++
++  // Check that there is no pending exception, otherwise we
++  // should have returned the exception sentinel.
++  if (FLAG_debug_code) {
++    Label okay;
++    ExternalReference pending_exception_address = ExternalReference::Create(
++        IsolateAddressId::kPendingExceptionAddress, masm->isolate());
++    __ li(a2, pending_exception_address);
++    __ Ld_d(a2, MemOperand(a2, 0));
++    __ LoadRoot(a4, RootIndex::kTheHoleValue);
++    // Cannot use check here as it attempts to generate call into runtime.
++    __ Branch(&okay, eq, a4, Operand(a2));
++    __ stop();
++    __ bind(&okay);
++  }
++
++  // Exit C frame and return.
++  // a0:a1: result
++  // sp: stack pointer
++  // fp: frame pointer
++  Register argc = argv_mode == kArgvInRegister
++                      // We don't want to pop arguments so set argc to no_reg.
++                      ? no_reg
++                      // s0: still holds argc (callee-saved).
++                      : s0;
++  __ LeaveExitFrame(save_doubles == kSaveFPRegs, argc, EMIT_RETURN);
++
++  // Handling of exception.
++  __ bind(&exception_returned);
++
++  ExternalReference pending_handler_context_address = ExternalReference::Create(
++      IsolateAddressId::kPendingHandlerContextAddress, masm->isolate());
++  ExternalReference pending_handler_entrypoint_address =
++      ExternalReference::Create(
++          IsolateAddressId::kPendingHandlerEntrypointAddress, masm->isolate());
++  ExternalReference pending_handler_fp_address = ExternalReference::Create(
++      IsolateAddressId::kPendingHandlerFPAddress, masm->isolate());
++  ExternalReference pending_handler_sp_address = ExternalReference::Create(
++      IsolateAddressId::kPendingHandlerSPAddress, masm->isolate());
++
++  // Ask the runtime for help to determine the handler. This will set a0 to
++  // contain the current pending exception, don't clobber it.
++  ExternalReference find_handler =
++      ExternalReference::Create(Runtime::kUnwindAndFindExceptionHandler);
++  {
++    FrameScope scope(masm, StackFrame::MANUAL);
++    __ PrepareCallCFunction(3, 0, a0);
++    __ mov(a0, zero_reg);
++    __ mov(a1, zero_reg);
++    __ li(a2, ExternalReference::isolate_address(masm->isolate()));
++    __ CallCFunction(find_handler, 3);
++  }
++
++  // Retrieve the handler context, SP and FP.
++  __ li(cp, pending_handler_context_address);
++  __ Ld_d(cp, MemOperand(cp, 0));
++  __ li(sp, pending_handler_sp_address);
++  __ Ld_d(sp, MemOperand(sp, 0));
++  __ li(fp, pending_handler_fp_address);
++  __ Ld_d(fp, MemOperand(fp, 0));
++
++  // If the handler is a JS frame, restore the context to the frame. Note that
++  // the context will be set to (cp == 0) for non-JS frames.
++  Label zero;
++  __ Branch(&zero, eq, cp, Operand(zero_reg));
++  __ St_d(cp, MemOperand(fp, StandardFrameConstants::kContextOffset));
++  __ bind(&zero);
++
++  // Reset the masking register. This is done independent of the underlying
++  // feature flag {FLAG_untrusted_code_mitigations} to make the snapshot work
++  // with both configurations. It is safe to always do this, because the
++  // underlying register is caller-saved and can be arbitrarily clobbered.
++  __ ResetSpeculationPoisonRegister();
++
++  // Compute the handler entry address and jump to it.
++  __ li(t7, pending_handler_entrypoint_address);
++  __ Ld_d(t7, MemOperand(t7, 0));
++  __ Jump(t7);
++}
++
++void Builtins::Generate_DoubleToI(MacroAssembler* masm) {
++  Label done;
++  Register result_reg = t0;
++
++  Register scratch = GetRegisterThatIsNotOneOf(result_reg);
++  Register scratch2 = GetRegisterThatIsNotOneOf(result_reg, scratch);
++  Register scratch3 = GetRegisterThatIsNotOneOf(result_reg, scratch, scratch2);
++  DoubleRegister double_scratch = kScratchDoubleReg;
++
++  // Account for saved regs.
++  const int kArgumentOffset = 4 * kPointerSize;
++
++  __ Push(result_reg);
++  __ Push(scratch, scratch2, scratch3);
++
++  // Load double input.
++  __ Fld_d(double_scratch, MemOperand(sp, kArgumentOffset));
++
++  // Clear cumulative exception flags and save the FCSR.
++  //  __ movfcsr2gr(scratch2, FCSR);
++  //  __ movgr2fcsr(FCSR, zero_reg);
++
++  // Try a conversion to a signed integer.
++  __ ftintrz_w_d(double_scratch, double_scratch);
++  // Move the converted value into the result register.
++  __ movfr2gr_s(scratch3, double_scratch);
++
++  // Retrieve and restore the FCSR.
++  __ movfcsr2gr(scratch);  // __ cfc1(scratch, FCSR);
++                           //  __ ctc1(scratch2, FCSR);
++
++  // Check for overflow and NaNs.
++  __ And(
++      scratch, scratch,
++      kFCSROverflowFlagMask | kFCSRUnderflowFlagMask | kFCSRInvalidOpFlagMask);
++  // If we had no exceptions then set result_reg and we are done.
++  Label error;
++  __ Branch(&error, ne, scratch, Operand(zero_reg));
++  __ Move(result_reg, scratch3);
++  __ Branch(&done);
++  __ bind(&error);
++
++  // Load the double value and perform a manual truncation.
++  Register input_high = scratch2;
++  Register input_low = scratch3;
++
++  __ Ld_w(input_low,
++          MemOperand(sp, kArgumentOffset + Register::kMantissaOffset));
++  __ Ld_w(input_high,
++          MemOperand(sp, kArgumentOffset + Register::kExponentOffset));
++
++  Label normal_exponent;
++  // Extract the biased exponent in result.
++  __ bstrpick_w(result_reg, input_high,
++                HeapNumber::kExponentShift + HeapNumber::kExponentBits - 1,
++                HeapNumber::kExponentShift);
++
++  // Check for Infinity and NaNs, which should return 0.
++  __ Sub_w(scratch, result_reg, HeapNumber::kExponentMask);
++  __ Movz(result_reg, zero_reg, scratch);
++  __ Branch(&done, eq, scratch, Operand(zero_reg));
++
++  // Express exponent as delta to (number of mantissa bits + 31).
++  __ Sub_w(result_reg, result_reg,
++           Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31));
++
++  // If the delta is strictly positive, all bits would be shifted away,
++  // which means that we can return 0.
++  __ Branch(&normal_exponent, le, result_reg, Operand(zero_reg));
++  __ mov(result_reg, zero_reg);
++  __ Branch(&done);
++
++  __ bind(&normal_exponent);
++  const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
++  // Calculate shift.
++  __ Add_w(scratch, result_reg,
++           Operand(kShiftBase + HeapNumber::kMantissaBits));
++
++  // Save the sign.
++  Register sign = result_reg;
++  result_reg = no_reg;
++  __ And(sign, input_high, Operand(HeapNumber::kSignMask));
++
++  // On ARM shifts > 31 bits are valid and will result in zero. On MIPS we need
++  // to check for this specific case.
++  Label high_shift_needed, high_shift_done;
++  __ Branch(&high_shift_needed, lt, scratch, Operand(32));
++  __ mov(input_high, zero_reg);
++  __ Branch(&high_shift_done);
++  __ bind(&high_shift_needed);
++
++  // Set the implicit 1 before the mantissa part in input_high.
++  __ Or(input_high, input_high,
++        Operand(1 << HeapNumber::kMantissaBitsInTopWord));
++  // Shift the mantissa bits to the correct position.
++  // We don't need to clear non-mantissa bits as they will be shifted away.
++  // If they weren't, it would mean that the answer is in the 32bit range.
++  __ sll_w(input_high, input_high, scratch);
++
++  __ bind(&high_shift_done);
++
++  // Replace the shifted bits with bits from the lower mantissa word.
++  Label pos_shift, shift_done;
++  __ li(kScratchReg, 32);
++  __ sub_w(scratch, kScratchReg, scratch);
++  __ Branch(&pos_shift, ge, scratch, Operand(zero_reg));
++
++  // Negate scratch.
++  __ Sub_w(scratch, zero_reg, scratch);
++  __ sll_w(input_low, input_low, scratch);
++  __ Branch(&shift_done);
++
++  __ bind(&pos_shift);
++  __ srl_w(input_low, input_low, scratch);
++
++  __ bind(&shift_done);
++  __ Or(input_high, input_high, Operand(input_low));
++  // Restore sign if necessary.
++  __ mov(scratch, sign);
++  result_reg = sign;
++  sign = no_reg;
++  __ Sub_w(result_reg, zero_reg, input_high);
++  __ Movz(result_reg, input_high, scratch);
++
++  __ bind(&done);
++
++  __ St_d(result_reg, MemOperand(sp, kArgumentOffset));
++  __ Pop(scratch, scratch2, scratch3);
++  __ Pop(result_reg);
++  __ Ret();
++}
++
++namespace {
++
++int AddressOffset(ExternalReference ref0, ExternalReference ref1) {
++  int64_t offset = (ref0.address() - ref1.address());
++  DCHECK(static_cast<int>(offset) == offset);
++  return static_cast<int>(offset);
++}
++
++// Calls an API function.  Allocates HandleScope, extracts returned value
++// from handle and propagates exceptions.  Restores context.  stack_space
++// - space to be unwound on exit (includes the call JS arguments space and
++// the additional space allocated for the fast call).
++void CallApiFunctionAndReturn(MacroAssembler* masm, Register function_address,
++                              ExternalReference thunk_ref, int stack_space,
++                              MemOperand* stack_space_operand,
++                              MemOperand return_value_operand) {
++  Isolate* isolate = masm->isolate();
++  ExternalReference next_address =
++      ExternalReference::handle_scope_next_address(isolate);
++  const int kNextOffset = 0;
++  const int kLimitOffset = AddressOffset(
++      ExternalReference::handle_scope_limit_address(isolate), next_address);
++  const int kLevelOffset = AddressOffset(
++      ExternalReference::handle_scope_level_address(isolate), next_address);
++
++  DCHECK(function_address == a1 || function_address == a2);
++
++  Label profiler_enabled, end_profiler_check;
++  __ li(t7, ExternalReference::is_profiling_address(isolate));
++  __ Ld_b(t7, MemOperand(t7, 0));
++  __ Branch(&profiler_enabled, ne, t7, Operand(zero_reg));
++  __ li(t7, ExternalReference::address_of_runtime_stats_flag());
++  __ Ld_w(t7, MemOperand(t7, 0));
++  __ Branch(&profiler_enabled, ne, t7, Operand(zero_reg));
++  {
++    // Call the api function directly.
++    __ mov(t7, function_address);
++    __ Branch(&end_profiler_check);
++  }
++
++  __ bind(&profiler_enabled);
++  {
++    // Additional parameter is the address of the actual callback.
++    __ li(t7, thunk_ref);
++  }
++  __ bind(&end_profiler_check);
++
++  // Allocate HandleScope in callee-save registers.
++  __ li(s5, next_address);
++  __ Ld_d(s0, MemOperand(s5, kNextOffset));
++  __ Ld_d(s1, MemOperand(s5, kLimitOffset));
++  __ Ld_w(s2, MemOperand(s5, kLevelOffset));
++  __ Add_w(s2, s2, Operand(1));
++  __ St_w(s2, MemOperand(s5, kLevelOffset));
++
++  __ StoreReturnAddressAndCall(t7);
++
++  Label promote_scheduled_exception;
++  Label delete_allocated_handles;
++  Label leave_exit_frame;
++  Label return_value_loaded;
++
++  // Load value from ReturnValue.
++  __ Ld_d(a0, return_value_operand);
++  __ bind(&return_value_loaded);
++
++  // No more valid handles (the result handle was the last one). Restore
++  // previous handle scope.
++  __ St_d(s0, MemOperand(s5, kNextOffset));
++  if (__ emit_debug_code()) {
++    __ Ld_w(a1, MemOperand(s5, kLevelOffset));
++    __ Check(eq, AbortReason::kUnexpectedLevelAfterReturnFromApiCall, a1,
++             Operand(s2));
++  }
++  __ Sub_w(s2, s2, Operand(1));
++  __ St_w(s2, MemOperand(s5, kLevelOffset));
++  __ Ld_d(kScratchReg, MemOperand(s5, kLimitOffset));
++  __ Branch(&delete_allocated_handles, ne, s1, Operand(kScratchReg));
++
++  // Leave the API exit frame.
++  __ bind(&leave_exit_frame);
++
++  if (stack_space_operand == nullptr) {
++    DCHECK_NE(stack_space, 0);
++    __ li(s0, Operand(stack_space));
++  } else {
++    DCHECK_EQ(stack_space, 0);
++    STATIC_ASSERT(kCArgSlotCount == 0);
++    __ Ld_d(s0, *stack_space_operand);
++  }
++
++  static constexpr bool kDontSaveDoubles = false;
++  static constexpr bool kRegisterContainsSlotCount = false;
++  __ LeaveExitFrame(kDontSaveDoubles, s0, NO_EMIT_RETURN,
++                    kRegisterContainsSlotCount);
++
++  // Check if the function scheduled an exception.
++  __ LoadRoot(a4, RootIndex::kTheHoleValue);
++  __ li(kScratchReg, ExternalReference::scheduled_exception_address(isolate));
++  __ Ld_d(a5, MemOperand(kScratchReg, 0));
++  __ Branch(&promote_scheduled_exception, ne, a4, Operand(a5));
++
++  __ Ret();
++
++  // Re-throw by promoting a scheduled exception.
++  __ bind(&promote_scheduled_exception);
++  __ TailCallRuntime(Runtime::kPromoteScheduledException);
++
++  // HandleScope limit has changed. Delete allocated extensions.
++  __ bind(&delete_allocated_handles);
++  __ St_d(s1, MemOperand(s5, kLimitOffset));
++  __ mov(s0, a0);
++  __ PrepareCallCFunction(1, s1);
++  __ li(a0, ExternalReference::isolate_address(isolate));
++  __ CallCFunction(ExternalReference::delete_handle_scope_extensions(), 1);
++  __ mov(a0, s0);
++  __ jmp(&leave_exit_frame);
++}
++
++}  // namespace
++
++void Builtins::Generate_CallApiCallback(MacroAssembler* masm) {
++  // ----------- S t a t e -------------
++  //  -- cp                  : context
++  //  -- a1                  : api function address
++  //  -- a2                  : arguments count (not including the receiver)
++  //  -- a3                  : call data
++  //  -- a0                  : holder
++  //  --
++  //  -- sp[0]               : last argument
++  //  -- ...
++  //  -- sp[(argc - 1) * 8]  : first argument
++  //  -- sp[(argc + 0) * 8]  : receiver
++  // -----------------------------------
++
++  Register api_function_address = a1;
++  Register argc = a2;
++  Register call_data = a3;
++  Register holder = a0;
++  Register scratch = t0;
++  Register base = t1;  // For addressing MemOperands on the stack.
++
++  DCHECK(!AreAliased(api_function_address, argc, call_data, holder, scratch,
++                     base));
++
++  using FCA = FunctionCallbackArguments;
++
++  STATIC_ASSERT(FCA::kArgsLength == 6);
++  STATIC_ASSERT(FCA::kNewTargetIndex == 5);
++  STATIC_ASSERT(FCA::kDataIndex == 4);
++  STATIC_ASSERT(FCA::kReturnValueOffset == 3);
++  STATIC_ASSERT(FCA::kReturnValueDefaultValueIndex == 2);
++  STATIC_ASSERT(FCA::kIsolateIndex == 1);
++  STATIC_ASSERT(FCA::kHolderIndex == 0);
++
++  // Set up FunctionCallbackInfo's implicit_args on the stack as follows:
++  //
++  // Target state:
++  //   sp[0 * kPointerSize]: kHolder
++  //   sp[1 * kPointerSize]: kIsolate
++  //   sp[2 * kPointerSize]: undefined (kReturnValueDefaultValue)
++  //   sp[3 * kPointerSize]: undefined (kReturnValue)
++  //   sp[4 * kPointerSize]: kData
++  //   sp[5 * kPointerSize]: undefined (kNewTarget)
++
++  // Set up the base register for addressing through MemOperands. It will point
++  // at the receiver (located at sp + argc * kPointerSize).
++  __ Alsl_d(base, argc, sp, kPointerSizeLog2, t7);
++
++  // Reserve space on the stack.
++  __ Sub_d(sp, sp, Operand(FCA::kArgsLength * kPointerSize));
++
++  // kHolder.
++  __ St_d(holder, MemOperand(sp, 0 * kPointerSize));
++
++  // kIsolate.
++  __ li(scratch, ExternalReference::isolate_address(masm->isolate()));
++  __ St_d(scratch, MemOperand(sp, 1 * kPointerSize));
++
++  // kReturnValueDefaultValue and kReturnValue.
++  __ LoadRoot(scratch, RootIndex::kUndefinedValue);
++  __ St_d(scratch, MemOperand(sp, 2 * kPointerSize));
++  __ St_d(scratch, MemOperand(sp, 3 * kPointerSize));
++
++  // kData.
++  __ St_d(call_data, MemOperand(sp, 4 * kPointerSize));
++
++  // kNewTarget.
++  __ St_d(scratch, MemOperand(sp, 5 * kPointerSize));
++
++  // Keep a pointer to kHolder (= implicit_args) in a scratch register.
++  // We use it below to set up the FunctionCallbackInfo object.
++  __ mov(scratch, sp);
++
++  // Allocate the v8::Arguments structure in the arguments' space since
++  // it's not controlled by GC.
++  static constexpr int kApiStackSpace = 4;
++  static constexpr bool kDontSaveDoubles = false;
++  FrameScope frame_scope(masm, StackFrame::MANUAL);
++  __ EnterExitFrame(kDontSaveDoubles, kApiStackSpace);
++
++  // EnterExitFrame may align the sp.
++
++  // FunctionCallbackInfo::implicit_args_ (points at kHolder as set up above).
++  // Arguments are after the return address (pushed by EnterExitFrame()).
++  __ St_d(scratch, MemOperand(sp, 1 * kPointerSize));
++
++  // FunctionCallbackInfo::values_ (points at the first varargs argument passed
++  // on the stack).
++  __ Sub_d(scratch, base, Operand(1 * kPointerSize));
++  __ St_d(scratch, MemOperand(sp, 2 * kPointerSize));
++
++  // FunctionCallbackInfo::length_.
++  // Stored as int field, 32-bit integers within struct on stack always left
++  // justified by n64 ABI.
++  __ St_w(argc, MemOperand(sp, 3 * kPointerSize));
++
++  // We also store the number of bytes to drop from the stack after returning
++  // from the API function here.
++  // Note: Unlike on other architectures, this stores the number of slots to
++  // drop, not the number of bytes.
++  __ Add_d(scratch, argc, Operand(FCA::kArgsLength + 1 /* receiver */));
++  __ St_d(scratch, MemOperand(sp, 4 * kPointerSize));
++
++  // v8::InvocationCallback's argument.
++  DCHECK(!AreAliased(api_function_address, scratch, a0));
++  __ Add_d(a0, sp, Operand(1 * kPointerSize));
++
++  ExternalReference thunk_ref = ExternalReference::invoke_function_callback();
++
++  // There are two stack slots above the arguments we constructed on the stack.
++  // TODO(jgruber): Document what these arguments are.
++  static constexpr int kStackSlotsAboveFCA = 2;
++  MemOperand return_value_operand(
++      fp, (kStackSlotsAboveFCA + FCA::kReturnValueOffset) * kPointerSize);
++
++  static constexpr int kUseStackSpaceOperand = 0;
++  MemOperand stack_space_operand(sp, 4 * kPointerSize);
++
++  AllowExternalCallThatCantCauseGC scope(masm);
++  CallApiFunctionAndReturn(masm, api_function_address, thunk_ref,
++                           kUseStackSpaceOperand, &stack_space_operand,
++                           return_value_operand);
++}
++
++void Builtins::Generate_CallApiGetter(MacroAssembler* masm) {
++  // Build v8::PropertyCallbackInfo::args_ array on the stack and push property
++  // name below the exit frame to make GC aware of them.
++  STATIC_ASSERT(PropertyCallbackArguments::kShouldThrowOnErrorIndex == 0);
++  STATIC_ASSERT(PropertyCallbackArguments::kHolderIndex == 1);
++  STATIC_ASSERT(PropertyCallbackArguments::kIsolateIndex == 2);
++  STATIC_ASSERT(PropertyCallbackArguments::kReturnValueDefaultValueIndex == 3);
++  STATIC_ASSERT(PropertyCallbackArguments::kReturnValueOffset == 4);
++  STATIC_ASSERT(PropertyCallbackArguments::kDataIndex == 5);
++  STATIC_ASSERT(PropertyCallbackArguments::kThisIndex == 6);
++  STATIC_ASSERT(PropertyCallbackArguments::kArgsLength == 7);
++
++  Register receiver = ApiGetterDescriptor::ReceiverRegister();
++  Register holder = ApiGetterDescriptor::HolderRegister();
++  Register callback = ApiGetterDescriptor::CallbackRegister();
++  Register scratch = a4;
++  DCHECK(!AreAliased(receiver, holder, callback, scratch));
++
++  Register api_function_address = a2;
++
++  // Here and below +1 is for name() pushed after the args_ array.
++  using PCA = PropertyCallbackArguments;
++  __ Sub_d(sp, sp, (PCA::kArgsLength + 1) * kPointerSize);
++  __ St_d(receiver, MemOperand(sp, (PCA::kThisIndex + 1) * kPointerSize));
++  __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kDataOffset));
++  __ St_d(scratch, MemOperand(sp, (PCA::kDataIndex + 1) * kPointerSize));
++  __ LoadRoot(scratch, RootIndex::kUndefinedValue);
++  __ St_d(scratch,
++          MemOperand(sp, (PCA::kReturnValueOffset + 1) * kPointerSize));
++  __ St_d(scratch, MemOperand(sp, (PCA::kReturnValueDefaultValueIndex + 1) *
++                                      kPointerSize));
++  __ li(scratch, ExternalReference::isolate_address(masm->isolate()));
++  __ St_d(scratch, MemOperand(sp, (PCA::kIsolateIndex + 1) * kPointerSize));
++  __ St_d(holder, MemOperand(sp, (PCA::kHolderIndex + 1) * kPointerSize));
++  // should_throw_on_error -> false
++  DCHECK_EQ(0, Smi::zero().ptr());
++  __ St_d(zero_reg,
++          MemOperand(sp, (PCA::kShouldThrowOnErrorIndex + 1) * kPointerSize));
++  __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kNameOffset));
++  __ St_d(scratch, MemOperand(sp, 0 * kPointerSize));
++
++  // v8::PropertyCallbackInfo::args_ array and name handle.
++  const int kStackUnwindSpace = PropertyCallbackArguments::kArgsLength + 1;
++
++  // Load address of v8::PropertyAccessorInfo::args_ array and name handle.
++  __ mov(a0, sp);                               // a0 = Handle<Name>
++  __ Add_d(a1, a0, Operand(1 * kPointerSize));  // a1 = v8::PCI::args_
++
++  const int kApiStackSpace = 1;
++  FrameScope frame_scope(masm, StackFrame::MANUAL);
++  __ EnterExitFrame(false, kApiStackSpace);
++
++  // Create v8::PropertyCallbackInfo object on the stack and initialize
++  // it's args_ field.
++  __ St_d(a1, MemOperand(sp, 1 * kPointerSize));
++  __ Add_d(a1, sp, Operand(1 * kPointerSize));
++  // a1 = v8::PropertyCallbackInfo&
++
++  ExternalReference thunk_ref =
++      ExternalReference::invoke_accessor_getter_callback();
++
++  __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kJsGetterOffset));
++  __ Ld_d(api_function_address,
++          FieldMemOperand(scratch, Foreign::kForeignAddressOffset));
++
++  // +3 is to skip prolog, return address and name handle.
++  MemOperand return_value_operand(
++      fp, (PropertyCallbackArguments::kReturnValueOffset + 3) * kPointerSize);
++  MemOperand* const kUseStackSpaceConstant = nullptr;
++  CallApiFunctionAndReturn(masm, api_function_address, thunk_ref,
++                           kStackUnwindSpace, kUseStackSpaceConstant,
++                           return_value_operand);
++}
++
++void Builtins::Generate_DirectCEntry(MacroAssembler* masm) {
++  // The sole purpose of DirectCEntry is for movable callers (e.g. any general
++  // purpose Code object) to be able to call into C functions that may trigger
++  // GC and thus move the caller.
++  //
++  // DirectCEntry places the return address on the stack (updated by the GC),
++  // making the call GC safe. The irregexp backend relies on this.
++
++  // Make place for arguments to fit C calling convention. Callers use
++  // EnterExitFrame/LeaveExitFrame so they handle stack restoring and we don't
++  // have to do that here. Any caller must drop kCArgsSlotsSize stack space
++  // after the call.
++  __ addi_d(sp, sp, -kCArgsSlotsSize);
++
++  __ St_d(ra, MemOperand(sp, kCArgsSlotsSize));  // Store the return address.
++  __ Call(t7);                                   // Call the C++ function.
++  __ Ld_d(t7, MemOperand(sp, kCArgsSlotsSize));  // Return to calling code.
++
++  if (FLAG_debug_code && FLAG_enable_slow_asserts) {
++    // In case of an error the return address may point to a memory area
++    // filled with kZapValue by the GC. Dereference the address and check for
++    // this.
++    __ Ld_d(a4, MemOperand(t7, 0));
++    __ Assert(ne, AbortReason::kReceivedInvalidReturnAddress, a4,
++              Operand(reinterpret_cast<uint64_t>(kZapValue)));
++  }
++
++  __ Jump(t7);
++}
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h b/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h
+index d56b3725046..6d5ad8bbf19 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h
++++ b/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h
+@@ -21,6 +21,8 @@
+ #include "src/codegen/mips/assembler-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/codegen/mips64/assembler-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/codegen/la64/assembler-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/codegen/s390/assembler-s390.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h b/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h
+index 8c81315d50d..304eed44f06 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h
++++ b/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h
+@@ -21,6 +21,8 @@
+ #include "src/codegen/mips/assembler-mips-inl.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/codegen/mips64/assembler-mips64-inl.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/codegen/la64/assembler-la64-inl.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/codegen/s390/assembler-s390-inl.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/codegen/constants-arch.h b/src/3rdparty/chromium/v8/src/codegen/constants-arch.h
+index 7a222c960ff..701c3c08a92 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/constants-arch.h
++++ b/src/3rdparty/chromium/v8/src/codegen/constants-arch.h
+@@ -15,6 +15,8 @@
+ #include "src/codegen/mips/constants-mips.h"  // NOLINT
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/codegen/mips64/constants-mips64.h"  // NOLINT
++#elif V8_TARGET_ARCH_LA64
++#include "src/codegen/la64/constants-la64.h"  // NOLINT
+ #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+ #include "src/codegen/ppc/constants-ppc.h"  // NOLINT
+ #elif V8_TARGET_ARCH_S390
+diff --git a/src/3rdparty/chromium/v8/src/codegen/cpu-features.h b/src/3rdparty/chromium/v8/src/codegen/cpu-features.h
+index 14c94ebae9a..d0bb89367ef 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/cpu-features.h
++++ b/src/3rdparty/chromium/v8/src/codegen/cpu-features.h
+@@ -47,6 +47,9 @@ enum CpuFeature {
+   MIPSr6,
+   MIPS_SIMD,  // MSA instructions
+ 
++#elif V8_TARGET_ARCH_LA64
++  FPU,  // TODO
++
+ #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+   FPU,
+   FPR_GPR_MOV,
+diff --git a/src/3rdparty/chromium/v8/src/codegen/external-reference.cc b/src/3rdparty/chromium/v8/src/codegen/external-reference.cc
+index 7a42e40461c..3bf4edef3bd 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/external-reference.cc
++++ b/src/3rdparty/chromium/v8/src/codegen/external-reference.cc
+@@ -472,6 +472,8 @@ ExternalReference ExternalReference::invoke_accessor_getter_callback() {
+ #define re_stack_check_func RegExpMacroAssemblerMIPS::CheckStackGuardState
+ #elif V8_TARGET_ARCH_MIPS64
+ #define re_stack_check_func RegExpMacroAssemblerMIPS::CheckStackGuardState
++#elif V8_TARGET_ARCH_LA64
++#define re_stack_check_func RegExpMacroAssemblerLA64::CheckStackGuardState
+ #elif V8_TARGET_ARCH_S390
+ #define re_stack_check_func RegExpMacroAssemblerS390::CheckStackGuardState
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc b/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc
+index 42b45c0f33a..d0b2bfe1e7e 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc
++++ b/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc
+@@ -128,7 +128,8 @@ const char* CallInterfaceDescriptor::DebugName() const {
+   return "";
+ }
+ 
+-#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64)
++#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) && \
++    !defined(V8_TARGET_ARCH_LA64)
+ bool CallInterfaceDescriptor::IsValidFloatParameterRegister(Register reg) {
+   return true;
+ }
+@@ -412,7 +413,8 @@ void WasmAtomicNotifyDescriptor::InitializePlatformSpecific(
+   DefaultInitializePlatformSpecific(data, kParameterCount);
+ }
+ 
+-#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64)
++#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) && \
++    !defined(V8_TARGET_ARCH_LA64)
+ void WasmI32AtomicWait32Descriptor::InitializePlatformSpecific(
+     CallInterfaceDescriptorData* data) {
+   DefaultInitializePlatformSpecific(data, kParameterCount);
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h
+new file mode 100644
+index 00000000000..e2ead3948c9
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h
+@@ -0,0 +1,268 @@
++// Copyright (c) 1994-2006 Sun Microsystems Inc.
++// All Rights Reserved.
++//
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++// - Redistributions of source code must retain the above copyright notice,
++// this list of conditions and the following disclaimer.
++//
++// - Redistribution in binary form must reproduce the above copyright
++// notice, this list of conditions and the following disclaimer in the
++// documentation and/or other materials provided with the distribution.
++//
++// - Neither the name of Sun Microsystems or the names of contributors may
++// be used to endorse or promote products derived from this software without
++// specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++// The original source code covered by the above license above has been
++// modified significantly by Google Inc.
++// Copyright 2012 the V8 project authors. All rights reserved.
++
++#ifndef V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_
++#define V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_
++
++#include "src/codegen/la64/assembler-la64.h"
++
++#include "src/codegen/assembler.h"
++#include "src/debug/debug.h"
++#include "src/objects/objects-inl.h"
++
++namespace v8 {
++namespace internal {
++
++bool CpuFeatures::SupportsOptimizer() { return IsSupported(FPU); }
++
++bool CpuFeatures::SupportsWasmSimd128() { return false; }
++
++// -----------------------------------------------------------------------------
++// Operand and MemOperand.
++
++bool Operand::is_reg() const { return rm_.is_valid(); }
++
++int64_t Operand::immediate() const {
++  DCHECK(!is_reg());
++  DCHECK(!IsHeapObjectRequest());
++  return value_.immediate;
++}
++
++// -----------------------------------------------------------------------------
++// RelocInfo.
++
++void RelocInfo::apply(intptr_t delta) {
++  if (IsInternalReference(rmode_) || IsInternalReferenceEncoded(rmode_)) {
++    // Absolute code pointer inside code object moves with the code object.
++    Assembler::RelocateInternalReference(rmode_, pc_, delta);
++  }
++}
++
++Address RelocInfo::target_address() {
++  DCHECK(IsCodeTarget(rmode_) || IsRuntimeEntry(rmode_) || IsWasmCall(rmode_));
++  return Assembler::target_address_at(pc_, constant_pool_);
++}
++
++Address RelocInfo::target_address_address() {
++  DCHECK(HasTargetAddressAddress());
++  // Read the address of the word containing the target_address in an
++  // instruction stream.
++  // The only architecture-independent user of this function is the serializer.
++  // The serializer uses it to find out how many raw bytes of instruction to
++  // output before the next target.
++  // For an instruction like LUI/ORI where the target bits are mixed into the
++  // instruction bits, the size of the target will be zero, indicating that the
++  // serializer should not step forward in memory after a target is resolved
++  // and written. In this case the target_address_address function should
++  // return the end of the instructions to be patched, allowing the
++  // deserializer to deserialize the instructions as raw bytes and put them in
++  // place, ready to be patched with the target. After jump optimization,
++  // that is the address of the instruction that follows J/JAL/JR/JALR
++  // instruction.
++  return pc_ + Assembler::kInstructionsFor64BitConstant * kInstrSize;
++}
++
++Address RelocInfo::constant_pool_entry_address() { UNREACHABLE(); }
++
++int RelocInfo::target_address_size() { return Assembler::kSpecialTargetSize; }
++
++void Assembler::deserialization_set_special_target_at(
++    Address instruction_payload, Code code, Address target) {
++  set_target_address_at(instruction_payload,
++                        !code.is_null() ? code.constant_pool() : kNullAddress,
++                        target);
++}
++
++int Assembler::deserialization_special_target_size(
++    Address instruction_payload) {
++  return kSpecialTargetSize;
++}
++
++void Assembler::set_target_internal_reference_encoded_at(Address pc,
++                                                         Address target) {
++  // TODO, see AssembleJumpTable, la64 does not generate internal reference?
++  abort();
++}
++
++void Assembler::deserialization_set_target_internal_reference_at(
++    Address pc, Address target, RelocInfo::Mode mode) {
++  if (mode == RelocInfo::INTERNAL_REFERENCE_ENCODED) {
++    DCHECK(IsJ(instr_at(pc)));
++    set_target_internal_reference_encoded_at(pc, target);
++  } else {
++    DCHECK(mode == RelocInfo::INTERNAL_REFERENCE);
++    Memory<Address>(pc) = target;
++  }
++}
++
++HeapObject RelocInfo::target_object() {
++  DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_));
++  return HeapObject::cast(
++      Object(Assembler::target_address_at(pc_, constant_pool_)));
++}
++
++HeapObject RelocInfo::target_object_no_host(Isolate* isolate) {
++  return target_object();
++}
++
++Handle<HeapObject> RelocInfo::target_object_handle(Assembler* origin) {
++  DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_));
++  return Handle<HeapObject>(reinterpret_cast<Address*>(
++      Assembler::target_address_at(pc_, constant_pool_)));
++}
++
++void RelocInfo::set_target_object(Heap* heap, HeapObject target,
++                                  WriteBarrierMode write_barrier_mode,
++                                  ICacheFlushMode icache_flush_mode) {
++  DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_));
++  Assembler::set_target_address_at(pc_, constant_pool_, target.ptr(),
++                                   icache_flush_mode);
++  if (write_barrier_mode == UPDATE_WRITE_BARRIER && !host().is_null() &&
++      !FLAG_disable_write_barriers) {
++    WriteBarrierForCode(host(), this, target);
++  }
++}
++
++Address RelocInfo::target_external_reference() {
++  DCHECK(rmode_ == EXTERNAL_REFERENCE);
++  return Assembler::target_address_at(pc_, constant_pool_);
++}
++
++void RelocInfo::set_target_external_reference(
++    Address target, ICacheFlushMode icache_flush_mode) {
++  DCHECK(rmode_ == RelocInfo::EXTERNAL_REFERENCE);
++  Assembler::set_target_address_at(pc_, constant_pool_, target,
++                                   icache_flush_mode);
++}
++
++Address RelocInfo::target_internal_reference() {
++  if (rmode_ == INTERNAL_REFERENCE) {
++    return Memory<Address>(pc_);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++Address RelocInfo::target_internal_reference_address() {
++  DCHECK(rmode_ == INTERNAL_REFERENCE || rmode_ == INTERNAL_REFERENCE_ENCODED);
++  return pc_;
++}
++
++Address RelocInfo::target_runtime_entry(Assembler* origin) {
++  DCHECK(IsRuntimeEntry(rmode_));
++  return target_address();
++}
++
++void RelocInfo::set_target_runtime_entry(Address target,
++                                         WriteBarrierMode write_barrier_mode,
++                                         ICacheFlushMode icache_flush_mode) {
++  DCHECK(IsRuntimeEntry(rmode_));
++  if (target_address() != target)
++    set_target_address(target, write_barrier_mode, icache_flush_mode);
++}
++
++Address RelocInfo::target_off_heap_target() {
++  DCHECK(IsOffHeapTarget(rmode_));
++  return Assembler::target_address_at(pc_, constant_pool_);
++}
++
++void RelocInfo::WipeOut() {
++  DCHECK(IsFullEmbeddedObject(rmode_) || IsCodeTarget(rmode_) ||
++         IsRuntimeEntry(rmode_) || IsExternalReference(rmode_) ||
++         IsInternalReference(rmode_) || IsInternalReferenceEncoded(rmode_) ||
++         IsOffHeapTarget(rmode_));
++  if (IsInternalReference(rmode_)) {
++    Memory<Address>(pc_) = kNullAddress;
++  } else if (IsInternalReferenceEncoded(rmode_)) {
++    Assembler::set_target_internal_reference_encoded_at(pc_, kNullAddress);
++  } else {
++    Assembler::set_target_address_at(pc_, constant_pool_, kNullAddress);
++  }
++}
++
++// -----------------------------------------------------------------------------
++// Assembler.
++
++void Assembler::CheckBuffer() {
++  if (buffer_space() <= kGap) {
++    GrowBuffer();
++  }
++}
++
++void Assembler::EmitHelper(Instr x) {
++  *reinterpret_cast<Instr*>(pc_) = x;
++  pc_ += kInstrSize;
++  CheckTrampolinePoolQuick();
++}
++
++template <>
++inline void Assembler::EmitHelper(uint8_t x);
++
++template <typename T>
++void Assembler::EmitHelper(T x) {
++  *reinterpret_cast<T*>(pc_) = x;
++  pc_ += sizeof(x);
++  CheckTrampolinePoolQuick();
++}
++
++template <>
++void Assembler::EmitHelper(uint8_t x) {
++  *reinterpret_cast<uint8_t*>(pc_) = x;
++  pc_ += sizeof(x);
++  if (reinterpret_cast<intptr_t>(pc_) % kInstrSize == 0) {
++    CheckTrampolinePoolQuick();
++  }
++}
++
++void Assembler::emit(Instr x) {
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  EmitHelper(x);
++}
++
++void Assembler::emit(uint64_t data) {
++  //  CheckForEmitInForbiddenSlot();
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  EmitHelper(data);
++}
++
++EnsureSpace::EnsureSpace(Assembler* assembler) { assembler->CheckBuffer(); }
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc
+new file mode 100644
+index 00000000000..0272caeaaf7
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc
+@@ -0,0 +1,2856 @@
++// Copyright (c) 1994-2006 Sun Microsystems Inc.
++// All Rights Reserved.
++//
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++// - Redistributions of source code must retain the above copyright notice,
++// this list of conditions and the following disclaimer.
++//
++// - Redistribution in binary form must reproduce the above copyright
++// notice, this list of conditions and the following disclaimer in the
++// documentation and/or other materials provided with the distribution.
++//
++// - Neither the name of Sun Microsystems or the names of contributors may
++// be used to endorse or promote products derived from this software without
++// specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++// The original source code covered by the above license above has been
++// modified significantly by Google Inc.
++// Copyright 2012 the V8 project authors. All rights reserved.
++
++#include "src/codegen/la64/assembler-la64.h"
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/base/cpu.h"
++#include "src/codegen/la64/assembler-la64-inl.h"
++#include "src/codegen/safepoint-table.h"
++#include "src/codegen/string-constants.h"
++#include "src/deoptimizer/deoptimizer.h"
++#include "src/objects/heap-number-inl.h"
++
++namespace v8 {
++namespace internal {
++
++void CpuFeatures::ProbeImpl(bool cross_compile) {
++  supported_ |= 1u << FPU;
++
++  // Only use statically determined features for cross compile (snapshot).
++  if (cross_compile) return;
++
++#if defined(_loongisa_vec)
++  supported_ |= 0u;
++#endif
++  // If the compiler is allowed to use fpu then we can use fpu too in our
++  // code generation.
++#ifdef __loongarch__
++  // Probe for additional features at runtime.
++  base::CPU cpu;
++  supported_ |= 0u;
++#endif
++}
++
++void CpuFeatures::PrintTarget() {}
++void CpuFeatures::PrintFeatures() {}
++
++int ToNumber(Register reg) {
++  DCHECK(reg.is_valid());
++  const int kNumbers[] = {
++      0,   // zero_reg
++      1,   // r1 ra
++      2,   // r2 gp
++      3,   // r3 sp
++      4,   // a0 v0
++      5,   // a1 v1
++      6,   // a2
++      7,   // a3
++      8,   // a4
++      9,   // a5
++      10,  // a6
++      11,  // a7
++      12,  // t0
++      13,  // t1
++      14,  // t2
++      15,  // t3
++      16,  // t4
++      17,  // t5
++      18,  // t6
++      19,  // t7
++      20,  // t8
++      21,  // tp
++      22,  // fp
++      23,  // s0
++      24,  // s1
++      25,  // s2
++      26,  // s3
++      27,  // s4
++      28,  // s5
++      29,  // s6
++      30,  // s7
++      31,  // s8
++  };
++  return kNumbers[reg.code()];
++}
++
++Register ToRegister(int num) {
++  DCHECK(num >= 0 && num < kNumRegisters);
++  const Register kRegisters[] = {
++      zero_reg, ra, gp, sp, a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, t3,
++      t4,       t5, t6, t7, t8, tp, fp, s0, s1, s2, s3, s4, s5, s6, s7, s8};
++  return kRegisters[num];
++}
++
++// -----------------------------------------------------------------------------
++// Implementation of RelocInfo.
++
++const int RelocInfo::kApplyMask =
++    RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE) |
++    RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE_ENCODED);
++
++bool RelocInfo::IsCodedSpecially() {
++  // The deserializer needs to know whether a pointer is specially coded.  Being
++  // specially coded on loongisa means that it is a lui/ori instruction, and
++  // that is always the case inside code objects.
++  return true;
++}
++
++bool RelocInfo::IsInConstantPool() { return false; }
++
++uint32_t RelocInfo::wasm_call_tag() const {
++  DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
++  return static_cast<uint32_t>(
++      Assembler::target_address_at(pc_, constant_pool_));
++}
++
++// -----------------------------------------------------------------------------
++// Implementation of Operand and MemOperand.
++// See assembler-la64-inl.h for inlined constructors.
++
++Operand::Operand(Handle<HeapObject> handle)
++    : rm_(no_reg), rmode_(RelocInfo::FULL_EMBEDDED_OBJECT) {
++  value_.immediate = static_cast<intptr_t>(handle.address());
++}
++
++Operand Operand::EmbeddedNumber(double value) {
++  int32_t smi;
++  if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
++  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
++  result.is_heap_object_request_ = true;
++  result.value_.heap_object_request = HeapObjectRequest(value);
++  return result;
++}
++
++Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
++  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
++  result.is_heap_object_request_ = true;
++  result.value_.heap_object_request = HeapObjectRequest(str);
++  return result;
++}
++
++MemOperand::MemOperand(Register base, int32_t offset)
++    : base_(base), index_(no_reg), offset_(offset) {}
++
++MemOperand::MemOperand(Register base, Register index)
++    : base_(base), index_(index), offset_(0) {}
++
++void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
++  DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
++  for (auto& request : heap_object_requests_) {
++    Handle<HeapObject> object;
++    switch (request.kind()) {
++      case HeapObjectRequest::kHeapNumber:
++        object = isolate->factory()->NewHeapNumber<AllocationType::kOld>(
++            request.heap_number());
++        break;
++      case HeapObjectRequest::kStringConstant:
++        const StringConstantBase* str = request.string();
++        CHECK_NOT_NULL(str);
++        object = str->AllocateStringConstant(isolate);
++        break;
++    }
++    Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
++    set_target_value_at(pc, reinterpret_cast<uint64_t>(object.location()));
++  }
++}
++
++// -----------------------------------------------------------------------------
++// Specific instructions, constants, and masks.
++
++// addi_d(sp, sp, 8) aka Pop() operation or part of Pop(r)
++// operations as post-increment of sp.
++const Instr kPopInstruction = ADDI_D | (kPointerSize & kImm12Mask) << kRkShift |
++                              (sp.code() << kRjShift) | sp.code();  // NOLINT
++// addi_d(sp, sp, -8) part of Push(r) operation as pre-decrement of sp.
++const Instr kPushInstruction = ADDI_D |
++                               (-kPointerSize & kImm12Mask) << kRkShift |
++                               (sp.code() << kRjShift) | sp.code();  // NOLINT
++// St_d(r, MemOperand(sp, 0))
++const Instr kPushRegPattern = ST_D | (sp.code() << kRjShift);  // NOLINT
++//  Ld_d(r, MemOperand(sp, 0))
++const Instr kPopRegPattern = LD_D | (sp.code() << kRjShift);  // NOLINT
++
++Assembler::Assembler(const AssemblerOptions& options,
++                     std::unique_ptr<AssemblerBuffer> buffer)
++    : AssemblerBase(options, std::move(buffer)),
++      scratch_register_list_(t7.bit() | t6.bit()) {
++  reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
++
++  last_trampoline_pool_end_ = 0;
++  no_trampoline_pool_before_ = 0;
++  trampoline_pool_blocked_nesting_ = 0;
++  // We leave space (16 * kTrampolineSlotsSize)
++  // for BlockTrampolinePoolScope buffer.
++  next_buffer_check_ = FLAG_force_long_branches
++                           ? kMaxInt
++                           : kMax16BranchOffset - kTrampolineSlotsSize * 16;
++  internal_trampoline_exception_ = false;
++  last_bound_pos_ = 0;
++
++  trampoline_emitted_ = FLAG_force_long_branches;  // TODO remove this
++  unbound_labels_count_ = 0;
++  block_buffer_growth_ = false;
++}
++
++void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
++                        SafepointTableBuilder* safepoint_table_builder,
++                        int handler_table_offset) {
++  // EmitForbiddenSlotInstruction(); // TODO why?
++
++  int code_comments_size = WriteCodeComments();
++
++  DCHECK(pc_ <= reloc_info_writer.pos());  // No overlap.
++
++  AllocateAndInstallRequestedHeapObjects(isolate);
++
++  // Set up code descriptor.
++  // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
++  // this point to make CodeDesc initialization less fiddly.
++
++  static constexpr int kConstantPoolSize = 0;
++  const int instruction_size = pc_offset();
++  const int code_comments_offset = instruction_size - code_comments_size;
++  const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
++  const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
++                                        ? constant_pool_offset
++                                        : handler_table_offset;
++  const int safepoint_table_offset =
++      (safepoint_table_builder == kNoSafepointTable)
++          ? handler_table_offset2
++          : safepoint_table_builder->GetCodeOffset();
++  const int reloc_info_offset =
++      static_cast<int>(reloc_info_writer.pos() - buffer_->start());
++  CodeDesc::Initialize(desc, this, safepoint_table_offset,
++                       handler_table_offset2, constant_pool_offset,
++                       code_comments_offset, reloc_info_offset);
++}
++
++void Assembler::Align(int m) {
++  DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
++  while ((pc_offset() & (m - 1)) != 0) {
++    nop();
++  }
++}
++
++void Assembler::CodeTargetAlign() {
++  // No advantage to aligning branch/call targets to more than
++  // single instruction, that I am aware of.
++  Align(4);
++}
++
++Register Assembler::GetRkReg(Instr instr) {
++  return Register::from_code((instr & kRkFieldMask) >> kRkShift);
++}
++
++Register Assembler::GetRjReg(Instr instr) {
++  return Register::from_code((instr & kRjFieldMask) >> kRjShift);
++}
++
++Register Assembler::GetRdReg(Instr instr) {
++  return Register::from_code((instr & kRdFieldMask) >> kRdShift);
++}
++
++uint32_t Assembler::GetRk(Instr instr) {
++  return (instr & kRkFieldMask) >> kRkShift;
++}
++
++uint32_t Assembler::GetRkField(Instr instr) { return instr & kRkFieldMask; }
++
++uint32_t Assembler::GetRj(Instr instr) {
++  return (instr & kRjFieldMask) >> kRjShift;
++}
++
++uint32_t Assembler::GetRjField(Instr instr) { return instr & kRjFieldMask; }
++
++uint32_t Assembler::GetRd(Instr instr) {
++  return (instr & kRdFieldMask) >> kRdShift;
++}
++
++uint32_t Assembler::GetRdField(Instr instr) { return instr & kRdFieldMask; }
++
++uint32_t Assembler::GetSa2(Instr instr) {
++  return (instr & kSa2FieldMask) >> kSaShift;
++}
++
++uint32_t Assembler::GetSa2Field(Instr instr) { return instr & kSa2FieldMask; }
++
++uint32_t Assembler::GetSa3(Instr instr) {
++  return (instr & kSa3FieldMask) >> kSaShift;
++}
++
++uint32_t Assembler::GetSa3Field(Instr instr) { return instr & kSa3FieldMask; }
++
++bool Assembler::IsPop(Instr instr) {
++  return (instr & 0xffc003e0) == kPopRegPattern;
++}
++
++bool Assembler::IsPush(Instr instr) {
++  return (instr & 0xffc003e0) == kPushRegPattern;
++}
++
++// Labels refer to positions in the (to be) generated code.
++// There are bound, linked, and unused labels.
++//
++// Bound labels refer to known positions in the already
++// generated code. pos() is the position the label refers to.
++//
++// Linked labels refer to unknown positions in the code
++// to be generated; pos() is the position of the last
++// instruction using the label.
++
++// The link chain is terminated by a value in the instruction of -1,
++// which is an otherwise illegal value (branch -1 is inf loop).
++// The instruction 16-bit offset field addresses 32-bit words, but in
++// code is conv to an 18-bit value addressing bytes, hence the -4 value.
++
++const int kEndOfChain = 0;
++// Determines the end of the Jump chain (a subset of the label link chain).
++const int kEndOfJumpChain = 0;
++
++bool Assembler::IsBranch(Instr instr) {
++  uint32_t opcode = (instr >> 26) << 26;
++  // Checks if the instruction is a branch.
++  bool isBranch = opcode == BEQZ || opcode == BNEZ || opcode == BCZ ||
++                  opcode == B || opcode == BL || opcode == BEQ ||
++                  opcode == BNE || opcode == BLT || opcode == BGE ||
++                  opcode == BLTU || opcode == BGEU;
++  return isBranch;
++}
++
++bool Assembler::IsB(Instr instr) {
++  uint32_t opcode = (instr >> 26) << 26;
++  // Checks if the instruction is a b.
++  bool isBranch = opcode == B || opcode == BL;
++  return isBranch;
++}
++
++bool Assembler::IsBz(Instr instr) {
++  uint32_t opcode = (instr >> 26) << 26;
++  // Checks if the instruction is a branch.
++  bool isBranch = opcode == BEQZ || opcode == BNEZ || opcode == BCZ;
++  return isBranch;
++}
++
++bool Assembler::IsEmittedConstant(Instr instr) {
++  // Add GetLabelConst function?
++  uint32_t label_constant = instr & ~kImm16Mask;
++  return label_constant == 0;  // Emitted label const in reg-exp engine.
++}
++
++bool Assembler::IsJ(Instr instr) {
++  uint32_t opcode = (instr >> 26) << 26;
++  // Checks if the instruction is a jump.
++  return opcode == JIRL;
++}
++
++bool Assembler::IsLu12i_w(Instr instr) {
++  uint32_t opcode = (instr >> 25) << 25;
++  return opcode == LU12I_W;
++}
++
++bool Assembler::IsOri(Instr instr) {
++  uint32_t opcode = (instr >> 22) << 22;
++  return opcode == ORI;
++}
++
++bool Assembler::IsLu32i_d(Instr instr) {
++  uint32_t opcode = (instr >> 25) << 25;
++  return opcode == LU32I_D;
++}
++
++bool Assembler::IsLu52i_d(Instr instr) {
++  uint32_t opcode = (instr >> 22) << 22;
++  return opcode == LU52I_D;
++}
++
++bool Assembler::IsMov(Instr instr, Register rd, Register rj) {
++  // Checks if the instruction is a OR with zero_reg argument (aka MOV).
++  Instr instr1 =
++      OR | zero_reg.code() << kRkShift | rj.code() << kRjShift | rd.code();
++  return instr == instr1;
++}
++
++bool Assembler::IsPcAddi(Instr instr, Register rd, int32_t si20) {
++  DCHECK(is_int20(si20));
++  Instr instr1 = PCADDI | (si20 & 0xfffff) << kRjShift | rd.code();
++  return instr == instr1;
++}
++
++bool Assembler::IsNop(Instr instr, unsigned int type) {
++  // See Assembler::nop(type).
++  DCHECK_LT(type, 32);
++  // Traditional loongisa nop == andi(zero_reg, zero_reg, 0)
++  // When marking non-zero type, use andi(zero_reg, t7, type)
++  // to avoid use of ssnop and ehb special encodings of the
++  // andi instruction.
++
++  Register nop_rt_reg = (type == 0) ? zero_reg : t7;
++  Instr instr1 = ANDI | ((type & kImm12Mask) << kRkShift) |
++                 (nop_rt_reg.code() << kRjShift);
++
++  return instr == instr1;
++}
++
++static inline int32_t GetOffsetOfBranch(Instr instr,
++                                        Assembler::OffsetSize bits) {
++  int32_t result = 0;
++  if (bits == 16) {
++    result = (instr << 6) >> 16;
++  } else if (bits == 21) {
++    uint32_t low16 = instr << 6;
++    low16 = low16 >> 16;
++    low16 &= 0xffff;
++    int32_t hi5 = (instr << 27) >> 11;
++    result = hi5 | low16;
++  } else {
++    uint32_t low16 = instr << 6;
++    low16 = low16 >> 16;
++    low16 &= 0xffff;
++    int32_t hi10 = (instr << 22) >> 6;
++    result = hi10 | low16;
++    DCHECK_EQ(bits, 26);
++  }
++  return result << 2;
++}
++
++static Assembler::OffsetSize OffsetSizeInBits(Instr instr) {
++  if (Assembler::IsB(instr)) {
++    return Assembler::OffsetSize::kOffset26;
++  } else if (Assembler::IsBz(instr)) {
++    return Assembler::OffsetSize::kOffset21;
++  } else {
++    DCHECK(Assembler::IsBranch(instr));
++    return Assembler::OffsetSize::kOffset16;
++  }
++}
++
++static inline int32_t AddBranchOffset(int pos, Instr instr) {
++  Assembler::OffsetSize bits = OffsetSizeInBits(instr);
++
++  int32_t imm = GetOffsetOfBranch(instr, bits);
++
++  if (imm == kEndOfChain) {
++    // EndOfChain sentinel is returned directly, not relative to pc or pos.
++    return kEndOfChain;
++  } else {
++    // Handle the case that next branch position is 0.
++    // TODO: Define -4 as a constant
++    int32_t offset = pos + Assembler::kBranchPCOffset + imm;
++    return offset == 0 ? -4 : offset;
++  }
++}
++
++int Assembler::target_at(int pos, bool is_internal) {
++  if (is_internal) {
++    int64_t* p = reinterpret_cast<int64_t*>(buffer_start_ + pos);
++    int64_t address = *p;
++    if (address == kEndOfJumpChain) {
++      return kEndOfChain;
++    } else {
++      int64_t instr_address = reinterpret_cast<int64_t>(p);
++      DCHECK(instr_address - address < INT_MAX);
++      int delta = static_cast<int>(instr_address - address);
++      DCHECK(pos > delta);
++      return pos - delta;
++    }
++  }
++  Instr instr = instr_at(pos);
++
++  // TODO remove after remove label_at_put?
++  if ((instr & ~kImm16Mask) == 0) {
++    // Emitted label constant, not part of a branch.
++    if (instr == 0) {
++      return kEndOfChain;
++    } else {
++      int32_t imm18 = ((instr & static_cast<int32_t>(kImm16Mask)) << 16) >> 14;
++      return (imm18 + pos);
++    }
++  }
++
++  // Check we have a branch or jump instruction.
++  DCHECK(IsBranch(instr) || IsJ(instr) || IsLu12i_w(instr) ||
++         IsPcAddi(instr, t8, 16));
++  // Do NOT change this to <<2. We rely on arithmetic shifts here, assuming
++  // the compiler uses arithmetic shifts for signed integers.
++  if (IsBranch(instr)) {
++    return AddBranchOffset(pos, instr);
++  } else if (IsPcAddi(instr, t8, 16)) {
++    // see BranchLong(Label* L) and BranchAndLinkLong ??
++    int32_t imm32;
++    Instr instr_lu12i_w = instr_at(pos + 1 * kInstrSize);
++    Instr instr_ori = instr_at(pos + 2 * kInstrSize);
++    DCHECK(IsLu12i_w(instr_lu12i_w));
++    // DCHECK(IsOri(instr_ori));
++    imm32 = ((instr_lu12i_w >> 5) & 0xfffff) << 12;
++    imm32 |= ((instr_ori >> 10) & static_cast<int32_t>(kImm12Mask));
++    if (imm32 == kEndOfJumpChain) {
++      // EndOfChain sentinel is returned directly, not relative to pc or pos.
++      return kEndOfChain;
++    }
++    return pos + imm32;
++  } else if (IsLu12i_w(instr)) {
++    abort();
++    // TODO no used??
++    /*    Instr instr_lui = instr_at(pos + 0 * kInstrSize);
++        Instr instr_ori = instr_at(pos + 1 * kInstrSize);
++        Instr instr_ori2 = instr_at(pos + 3 * kInstrSize);
++        DCHECK(IsOri(instr_ori));
++        DCHECK(IsOri(instr_ori2));
++
++        // TODO(plind) create named constants for shift values.
++        int64_t imm = static_cast<int64_t>(instr_lui & kImm16Mask) << 48;
++        imm |= static_cast<int64_t>(instr_ori & kImm16Mask) << 32;
++        imm |= static_cast<int64_t>(instr_ori2 & kImm16Mask) << 16;
++        // Sign extend address;
++        imm >>= 16;
++
++        if (imm == kEndOfJumpChain) {
++          // EndOfChain sentinel is returned directly, not relative to pc or
++       pos. return kEndOfChain; } else { uint64_t instr_address =
++       reinterpret_cast<int64_t>(buffer_start_ + pos); DCHECK(instr_address -
++       imm < INT_MAX); int delta = static_cast<int>(instr_address - imm);
++          DCHECK(pos > delta);
++          return pos - delta;
++        }*/
++  } else {
++    DCHECK(IsJ(instr));
++    // TODO not used???
++    abort();
++  }
++}
++
++static inline Instr SetBranchOffset(int32_t pos, int32_t target_pos,
++                                    Instr instr) {
++  int32_t bits = OffsetSizeInBits(instr);
++  int32_t imm = target_pos - pos;
++  DCHECK_EQ(imm & 3, 0);
++  imm >>= 2;
++
++  DCHECK(is_intn(imm, bits));
++
++  if (bits == 16) {
++    const int32_t mask = ((1 << 16) - 1) << 10;
++    instr &= ~mask;
++    return instr | ((imm << 10) & mask);
++  } else if (bits == 21) {
++    const int32_t mask = 0x3fffc1f;
++    instr &= ~mask;
++    uint32_t low16 = (imm & kImm16Mask) << 10;
++    int32_t hi5 = (imm >> 16) & 0x1f;
++    return instr | low16 | hi5;
++  } else {
++    DCHECK_EQ(bits, 26);
++    const int32_t mask = 0x3ffffff;
++    instr &= ~mask;
++    uint32_t low16 = (imm & kImm16Mask) << 10;
++    int32_t hi10 = (imm >> 16) & 0x3ff;
++    return instr | low16 | hi10;
++  }
++}
++
++void Assembler::target_at_put(int pos, int target_pos, bool is_internal) {
++  if (is_internal) {
++    uint64_t imm = reinterpret_cast<uint64_t>(buffer_start_) + target_pos;
++    *reinterpret_cast<uint64_t*>(buffer_start_ + pos) = imm;
++    return;
++  }
++  Instr instr = instr_at(pos);
++  if ((instr & ~kImm16Mask) == 0) {
++    DCHECK(target_pos == kEndOfChain || target_pos >= 0);
++    // Emitted label constant, not part of a branch.
++    // Make label relative to Code pointer of generated Code object.
++    instr_at_put(pos, target_pos + (Code::kHeaderSize - kHeapObjectTag));
++    return;
++  }
++
++  if (IsBranch(instr)) {
++    instr = SetBranchOffset(pos, target_pos, instr);
++    instr_at_put(pos, instr);
++  } else if (0 == 1 /*IsLui(instr)*/) {
++    /* if (IsPcAddi(instr, t8, 16)) {
++       Instr instr_lui = instr_at(pos + 0 * kInstrSize);
++       Instr instr_ori = instr_at(pos + 2 * kInstrSize);
++       DCHECK(IsLui(instr_lui));
++       DCHECK(IsOri(instr_ori));
++       int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
++       DCHECK_EQ(imm & 3, 0);
++       if (is_int16(imm + Assembler::kLongBranchPCOffset -
++                    Assembler::kBranchPCOffset)) {
++         // Optimize by converting to regular branch and link with 16-bit
++         // offset.
++         Instr instr_b = REGIMM | BGEZAL;  // Branch and link.
++         instr_b = SetBranchOffset(pos, target_pos, instr_b);
++         // Correct ra register to point to one instruction after jalr from
++         // TurboAssembler::BranchAndLinkLong.
++         Instr instr_a = DADDIU | ra.code() << kRsShift | ra.code() << kRtShift
++     | kOptimizedBranchAndLinkLongReturnOffset;
++
++         instr_at_put(pos, instr_b);
++         instr_at_put(pos + 1 * kInstrSize, instr_a);
++       } else {
++         instr_lui &= ~kImm16Mask;
++         instr_ori &= ~kImm16Mask;
++
++         instr_at_put(pos + 0 * kInstrSize,
++                      instr_lui | ((imm >> kLuiShift) & kImm16Mask));
++         instr_at_put(pos + 2 * kInstrSize, instr_ori | (imm & kImm16Mask));
++       }
++     } else {
++       Instr instr_lui = instr_at(pos + 0 * kInstrSize);
++       Instr instr_ori = instr_at(pos + 1 * kInstrSize);
++       Instr instr_ori2 = instr_at(pos + 3 * kInstrSize);
++       DCHECK(IsOri(instr_ori));
++       DCHECK(IsOri(instr_ori2));
++
++       uint64_t imm = reinterpret_cast<uint64_t>(buffer_start_) + target_pos;
++       DCHECK_EQ(imm & 3, 0);
++
++       instr_lui &= ~kImm16Mask;
++       instr_ori &= ~kImm16Mask;
++       instr_ori2 &= ~kImm16Mask;
++
++       instr_at_put(pos + 0 * kInstrSize,
++                    instr_lui | ((imm >> 32) & kImm16Mask));
++       instr_at_put(pos + 1 * kInstrSize,
++                    instr_ori | ((imm >> 16) & kImm16Mask));
++       instr_at_put(pos + 3 * kInstrSize, instr_ori2 | (imm & kImm16Mask));
++     }*/
++  } else if (IsPcAddi(instr, t8, 16)) {
++    abort(); /*
++     Instr instr_lu12i_w = instr_at(pos + 1 * kInstrSize);
++     Instr instr_ori = instr_at(pos + 2 * kInstrSize);
++     DCHECK(IsLu12i_w(instr_lu12i_w));
++     //DCHECK(IsOri(instr_ori));
++
++     int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset);
++
++     if (is_int21(imm_short)) {
++       // Optimize by converting to regular branch with 21-bit
++       // offset
++       Instr instr_b = B;
++       instr_b = SetBranchOffset(pos, target_pos, instr_b);
++
++       instr_at_put(pos, instr_b);
++     } else {
++       int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset);
++       DCHECK_EQ(imm & 3, 0);
++
++       instr_lu12i_w &= 0xfe00001fu;  // opcode:7 | bit20 | rd:5
++       instr_ori &= 0xffc003ffu;  // opcode:10 | bit12 | rj:5 | rd:5
++
++       instr_at_put(pos + 1 * kInstrSize,
++                    instr_lu12i_w | (((imm >> 12) & 0xfffff) << 5));
++       instr_at_put(pos + 2 * kInstrSize, instr_ori |
++                                          ((imm & 0xfff) << 10));
++     }*/
++  } else if (IsJ(instr)) {
++    /*
++      int32_t imm28 = target_pos - pos;
++      DCHECK_EQ(imm28 & 3, 0);
++
++      uint32_t imm26 = static_cast<uint32_t>(imm28 >> 2);
++      DCHECK(is_uint26(imm26));
++      // Place 26-bit signed offset with markings.
++      // When code is committed it will be resolved to j/jal.
++      int32_t mark = IsJ(instr) ? kJRawMark : kJalRawMark;
++      instr_at_put(pos, mark | (imm26 & kImm26Mask));*/
++    abort();
++  } else {
++    /*    int32_t imm28 = target_pos - pos;
++        DCHECK_EQ(imm28 & 3, 0);
++
++        uint32_t imm26 = static_cast<uint32_t>(imm28 >> 2);
++        DCHECK(is_uint26(imm26));
++        // Place raw 26-bit signed offset.
++        // When code is committed it will be resolved to j/jal.
++        instr &= ~kImm26Mask;
++        instr_at_put(pos, instr | (imm26 & kImm26Mask));*/
++    abort();
++  }
++}
++
++void Assembler::print(const Label* L) {
++  if (L->is_unused()) {
++    PrintF("unused label\n");
++  } else if (L->is_bound()) {
++    PrintF("bound label to %d\n", L->pos());
++  } else if (L->is_linked()) {
++    Label l;
++    l.link_to(L->pos());
++    PrintF("unbound label");
++    while (l.is_linked()) {
++      PrintF("@ %d ", l.pos());
++      Instr instr = instr_at(l.pos());
++      if ((instr & ~kImm16Mask) == 0) {
++        PrintF("value\n");
++      } else {
++        PrintF("%d\n", instr);
++      }
++      next(&l, is_internal_reference(&l));
++    }
++  } else {
++    PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
++  }
++}
++
++void Assembler::bind_to(Label* L, int pos) {
++  DCHECK(0 <= pos && pos <= pc_offset());  // Must have valid binding position.
++  int trampoline_pos = kInvalidSlotPos;
++  bool is_internal = false;
++  if (L->is_linked() && !trampoline_emitted_) {
++    unbound_labels_count_--;
++    if (!is_internal_reference(L)) {
++      next_buffer_check_ += kTrampolineSlotsSize;
++    }
++  }
++
++  while (L->is_linked()) {
++    int fixup_pos = L->pos();
++    int dist = pos - fixup_pos;
++    is_internal = is_internal_reference(L);
++    next(L, is_internal);  // Call next before overwriting link with target at
++                           // fixup_pos.
++    Instr instr = instr_at(fixup_pos);
++    if (is_internal) {
++      target_at_put(fixup_pos, pos, is_internal);
++    } else {
++      if (IsBranch(instr)) {
++        int branch_offset = BranchOffset(instr);
++        if (dist > branch_offset) {
++          if (trampoline_pos == kInvalidSlotPos) {
++            trampoline_pos = get_trampoline_entry(fixup_pos);
++            CHECK_NE(trampoline_pos, kInvalidSlotPos);
++          }
++          CHECK((trampoline_pos - fixup_pos) <= branch_offset);
++          target_at_put(fixup_pos, trampoline_pos, false);
++          fixup_pos = trampoline_pos;
++        }
++        target_at_put(fixup_pos, pos, false);
++      } else {
++        DCHECK(IsJ(instr) || IsLu12i_w(instr) || IsEmittedConstant(instr) ||
++               IsPcAddi(instr, t8, 8));
++        target_at_put(fixup_pos, pos, false);
++      }
++    }
++  }
++  L->bind_to(pos);
++
++  // Keep track of the last bound label so we don't eliminate any instructions
++  // before a bound label.
++  if (pos > last_bound_pos_) last_bound_pos_ = pos;
++}
++
++void Assembler::bind(Label* L) {
++  DCHECK(!L->is_bound());  // Label can only be bound once.
++  bind_to(L, pc_offset());
++}
++
++void Assembler::next(Label* L, bool is_internal) {
++  DCHECK(L->is_linked());
++  int link = target_at(L->pos(), is_internal);
++  if (link == kEndOfChain) {
++    L->Unuse();
++  } else if (link == -4) {
++    // Next position is pc_offset == 0
++    L->link_to(0);
++  } else {
++    DCHECK_GE(link, 0);
++    L->link_to(link);
++  }
++}
++
++bool Assembler::is_near_c(Label* L) {
++  DCHECK(L->is_bound());
++  return pc_offset() - L->pos() < kMax16BranchOffset - 4 * kInstrSize;
++}
++
++bool Assembler::is_near(Label* L, OffsetSize bits) {
++  DCHECK(L->is_bound());
++  return ((pc_offset() - L->pos()) <
++          (1 << (bits + 2 - 1)) - 1 - 5 * kInstrSize);
++}
++
++bool Assembler::is_near_a(Label* L) {
++  DCHECK(L->is_bound());
++  return pc_offset() - L->pos() <= kMax26BranchOffset - 4 * kInstrSize;
++}
++
++int Assembler::BranchOffset(Instr instr) {
++  int bits = OffsetSize::kOffset16;
++
++  uint32_t opcode = (instr >> 26) << 26;
++  switch (opcode) {
++    case B:
++    case BL:
++      bits = OffsetSize::kOffset26;
++      break;
++    case BNEZ:
++    case BEQZ:
++    case BCZ:
++      bits = OffsetSize::kOffset21;
++      break;
++    case BNE:
++    case BEQ:
++    case BLT:
++    case BGE:
++    case BLTU:
++    case BGEU:
++    case JIRL:
++      bits = OffsetSize::kOffset16;
++      break;
++    default:
++      break;
++  }
++
++  return (1 << (bits + 2 - 1)) - 1;
++}
++
++// We have to use a temporary register for things that can be relocated even
++// if they can be encoded in the LA's 16 bits of immediate-offset instruction
++// space.  There is no guarantee that the relocated location can be similarly
++// encoded.
++bool Assembler::MustUseReg(RelocInfo::Mode rmode) {
++  return !RelocInfo::IsNone(rmode);
++}
++
++void Assembler::GenB(Opcode opcode, Register rj, int32_t si21) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  DCHECK((BEQZ == opcode || BNEZ == opcode) && is_int21(si21) && rj.is_valid());
++  Instr instr = opcode | (si21 & kImm16Mask) << kRkShift |
++                (rj.code() << kRjShift) | ((si21 & 0x1fffff) >> 16);
++  emit(instr);
++}
++
++void Assembler::GenB(Opcode opcode, CFRegister cj, int32_t si21, bool isEq) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  DCHECK(BCZ == opcode && is_int21(si21));
++  DCHECK(cj >= 0 && cj <= 7);
++  int32_t sc = (isEq ? cj : cj + 8);
++  Instr instr = opcode | (si21 & kImm16Mask) << kRkShift | (sc << kRjShift) |
++                ((si21 & 0x1fffff) >> 16);
++  emit(instr);
++}
++
++void Assembler::GenB(Opcode opcode, int32_t si26) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  DCHECK((B == opcode || BL == opcode) && is_int26(si26));
++  Instr instr =
++      opcode | ((si26 & kImm16Mask) << kRkShift) | ((si26 & kImm26Mask) >> 16);
++  emit(instr);
++}
++
++void Assembler::GenBJ(Opcode opcode, Register rj, Register rd, int32_t si16) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  DCHECK(is_int16(si16));
++  Instr instr = opcode | ((si16 & kImm16Mask) << kRkShift) |
++                (rj.code() << kRjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenCmp(Opcode opcode, FPUCondition cond, FPURegister fk,
++                       FPURegister fj, CFRegister cd) {
++  DCHECK(opcode == FCMP_COND_S || opcode == FCMP_COND_D);
++  Instr instr = opcode | cond << kCondShift | (fk.code() << kFkShift) |
++                (fj.code() << kFjShift) | cd;
++  emit(instr);
++}
++
++void Assembler::GenSel(Opcode opcode, CFRegister ca, FPURegister fk,
++                       FPURegister fj, FPURegister rd) {
++  DCHECK((opcode == FSEL));
++  Instr instr = opcode | ca << kCondShift | (fk.code() << kFkShift) |
++                (fj.code() << kFjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rj, Register rd,
++                            bool rjrd) {
++  Instr instr = 0;
++  if (rjrd) {
++    instr = opcode | (rj.code() << kRjShift) | rd.code();
++  } else {
++    DCHECK(opcode == ASRTLE_D || opcode == ASRTGT_D);
++    instr = opcode | (rj.code() << kRkShift) | rd.code() << kRjShift;
++  }
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPURegister fj, FPURegister fd) {
++  Instr instr = opcode | (fj.code() << kFjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rj, FPURegister fd) {
++  DCHECK((opcode == MOVGR2FR_W) || (opcode == MOVGR2FR_D) ||
++         (opcode == MOVGR2FRH_W));
++  Instr instr = opcode | (rj.code() << kRjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPURegister fj, Register rd) {
++  DCHECK((opcode == MOVFR2GR_S) || (opcode == MOVFR2GR_D) ||
++         (opcode == MOVFRH2GR_S));
++  Instr instr = opcode | (fj.code() << kFjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rj, FPUControlRegister fd) {
++  DCHECK((opcode == MOVGR2FCSR));
++  Instr instr = opcode | (rj.code() << kRjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPUControlRegister fj, Register rd) {
++  DCHECK((opcode == MOVFCSR2GR));
++  Instr instr = opcode | (fj.code() << kFjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPURegister fj, CFRegister cd) {
++  DCHECK((opcode == MOVFR2CF));
++  Instr instr = opcode | (fj.code() << kFjShift) | cd;
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, CFRegister cj, FPURegister fd) {
++  DCHECK((opcode == MOVCF2FR));
++  Instr instr = opcode | cj << kFjShift | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rj, CFRegister cd) {
++  DCHECK((opcode == MOVGR2CF));
++  Instr instr = opcode | (rj.code() << kRjShift) | cd;
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, CFRegister cj, Register rd) {
++  DCHECK((opcode == MOVCF2GR));
++  Instr instr = opcode | cj << kFjShift | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rk, Register rj,
++                            Register rd) {
++  Instr instr =
++      opcode | (rk.code() << kRkShift) | (rj.code() << kRjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPURegister fk, FPURegister fj,
++                            FPURegister fd) {
++  Instr instr =
++      opcode | (fk.code() << kFkShift) | (fj.code() << kFjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, FPURegister fa, FPURegister fk,
++                            FPURegister fj, FPURegister fd) {
++  Instr instr = opcode | (fa.code() << kFaShift) | (fk.code() << kFkShift) |
++                (fj.code() << kFjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenRegister(Opcode opcode, Register rk, Register rj,
++                            FPURegister fd) {
++  Instr instr =
++      opcode | (rk.code() << kRkShift) | (rj.code() << kRjShift) | fd.code();
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t bit3, Register rk, Register rj,
++                       Register rd) {
++  DCHECK(is_uint3(bit3));
++  Instr instr = opcode | (bit3 & 0x7) << kSaShift | (rk.code() << kRkShift) |
++                (rj.code() << kRjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t bit6m, int32_t bit6l, Register rj,
++                       Register rd) {
++  DCHECK(is_uint6(bit6m) && is_uint6(bit6l));
++  Instr instr = opcode | (bit6m & 0x3f) << 16 | (bit6l & 0x3f) << kRkShift |
++                (rj.code() << kRjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t bit20, Register rd) {
++  //  DCHECK(is_uint20(bit20) || is_int20(bit20));
++  Instr instr = opcode | (bit20 & 0xfffff) << kRjShift | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t bit15) {
++  DCHECK(is_uint15(bit15));
++  Instr instr = opcode | (bit15 & 0x7fff);
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t value, Register rj, Register rd,
++                       int32_t value_bits) {
++  DCHECK(value_bits == 6 || value_bits == 12 || value_bits == 14 ||
++         value_bits == 16);
++  uint32_t imm = value & 0x3f;
++  if (value_bits == 12) {
++    imm = value & kImm12Mask;
++  } else if (value_bits == 14) {
++    imm = value & 0x3fff;
++  } else if (value_bits == 16) {
++    imm = value & kImm16Mask;
++  }
++  Instr instr = opcode | imm << kRkShift | (rj.code() << kRjShift) | rd.code();
++  emit(instr);
++}
++
++void Assembler::GenImm(Opcode opcode, int32_t bit12, Register rj,
++                       FPURegister fd) {
++  DCHECK(is_int12(bit12));
++  Instr instr = opcode | ((bit12 & kImm12Mask) << kRkShift) |
++                (rj.code() << kRjShift) | fd.code();
++  emit(instr);
++}
++
++// Returns the next free trampoline entry.
++int32_t Assembler::get_trampoline_entry(int32_t pos) {
++  int32_t trampoline_entry = kInvalidSlotPos;
++  if (!internal_trampoline_exception_) {
++    if (trampoline_.start() > pos) {
++      trampoline_entry = trampoline_.take_slot();
++    }
++
++    if (kInvalidSlotPos == trampoline_entry) {
++      internal_trampoline_exception_ = true;
++    }
++  }
++  return trampoline_entry;
++}
++
++uint64_t Assembler::jump_address(Label* L) {
++  int64_t target_pos;
++  if (L->is_bound()) {
++    target_pos = L->pos();
++  } else {
++    if (L->is_linked()) {
++      target_pos = L->pos();  // L's link.
++      L->link_to(pc_offset());
++    } else {
++      L->link_to(pc_offset());
++      return kEndOfJumpChain;
++    }
++  }
++  uint64_t imm = reinterpret_cast<uint64_t>(buffer_start_) + target_pos;
++  DCHECK_EQ(imm & 3, 0);
++
++  return imm;
++}
++
++uint64_t Assembler::branch_long_offset(Label* L) {
++  int64_t target_pos;
++
++  if (L->is_bound()) {
++    target_pos = L->pos();
++  } else {
++    if (L->is_linked()) {
++      target_pos = L->pos();  // L's link.
++      L->link_to(pc_offset());
++    } else {
++      L->link_to(pc_offset());
++      return kEndOfJumpChain;
++    }
++  }
++  int64_t offset = target_pos - (pc_offset() + kLongBranchPCOffset);
++  DCHECK_EQ(offset & 3, 0);
++
++  return static_cast<uint64_t>(offset);
++}
++
++int32_t Assembler::branch_offset_helper(Label* L, OffsetSize bits) {
++  int32_t target_pos;
++
++  if (L->is_bound()) {
++    target_pos = L->pos();
++  } else {
++    if (L->is_linked()) {
++      target_pos = L->pos();
++      L->link_to(pc_offset());
++    } else {
++      L->link_to(pc_offset());
++      if (!trampoline_emitted_) {
++        unbound_labels_count_++;
++        next_buffer_check_ -= kTrampolineSlotsSize;
++      }
++      return kEndOfChain;
++    }
++  }
++
++  int32_t offset = target_pos - (pc_offset() + kBranchPCOffset);
++  DCHECK(is_intn(offset, bits + 2));
++  DCHECK_EQ(offset & 3, 0);
++
++  return offset;
++}
++
++void Assembler::label_at_put(Label* L, int at_offset) {
++  int target_pos;
++  if (L->is_bound()) {
++    target_pos = L->pos();
++    instr_at_put(at_offset, target_pos + (Code::kHeaderSize - kHeapObjectTag));
++  } else {
++    if (L->is_linked()) {
++      target_pos = L->pos();  // L's link.
++      int32_t imm18 = target_pos - at_offset;
++      DCHECK_EQ(imm18 & 3, 0);
++      int32_t imm16 = imm18 >> 2;
++      DCHECK(is_int16(imm16));
++      instr_at_put(at_offset, (imm16 & kImm16Mask));
++    } else {
++      target_pos = kEndOfChain;
++      instr_at_put(at_offset, 0);
++      if (!trampoline_emitted_) {
++        unbound_labels_count_++;
++        next_buffer_check_ -= kTrampolineSlotsSize;
++      }
++    }
++    L->link_to(at_offset);
++  }
++  // TODO PushBackTrack()
++}
++
++//------- Branch and jump instructions --------
++
++void Assembler::b(int32_t offset) { GenB(B, offset); }
++
++void Assembler::bl(int32_t offset) { GenB(BL, offset); }
++
++void Assembler::beq(Register rj, Register rd, int32_t offset) {
++  GenBJ(BEQ, rj, rd, offset);
++}
++
++void Assembler::bne(Register rj, Register rd, int32_t offset) {
++  GenBJ(BNE, rj, rd, offset);
++}
++
++void Assembler::blt(Register rj, Register rd, int32_t offset) {
++  GenBJ(BLT, rj, rd, offset);
++}
++
++void Assembler::bge(Register rj, Register rd, int32_t offset) {
++  GenBJ(BGE, rj, rd, offset);
++}
++
++void Assembler::bltu(Register rj, Register rd, int32_t offset) {
++  GenBJ(BLTU, rj, rd, offset);
++}
++
++void Assembler::bgeu(Register rj, Register rd, int32_t offset) {
++  GenBJ(BGEU, rj, rd, offset);
++}
++
++void Assembler::beqz(Register rj, int32_t offset) { GenB(BEQZ, rj, offset); }
++void Assembler::bnez(Register rj, int32_t offset) { GenB(BNEZ, rj, offset); }
++
++void Assembler::jirl(Register rd, Register rj, int32_t offset) {
++  GenBJ(JIRL, rj, rd, offset);
++}
++
++void Assembler::bceqz(CFRegister cj, int32_t si21) {
++  GenB(BCZ, cj, si21, true);
++}
++
++void Assembler::bcnez(CFRegister cj, int32_t si21) {
++  GenB(BCZ, cj, si21, false);
++}
++
++// -------Data-processing-instructions---------
++
++// Arithmetic.
++void Assembler::add_w(Register rd, Register rj, Register rk) {
++  GenRegister(ADD_W, rk, rj, rd);
++}
++
++void Assembler::add_d(Register rd, Register rj, Register rk) {
++  GenRegister(ADD_D, rk, rj, rd);
++}
++
++void Assembler::sub_w(Register rd, Register rj, Register rk) {
++  GenRegister(SUB_W, rk, rj, rd);
++}
++
++void Assembler::sub_d(Register rd, Register rj, Register rk) {
++  GenRegister(SUB_D, rk, rj, rd);
++}
++
++void Assembler::addi_w(Register rd, Register rj, int32_t si12) {
++  GenImm(ADDI_W, si12, rj, rd, 12);
++}
++
++void Assembler::addi_d(Register rd, Register rj, int32_t si12) {
++  GenImm(ADDI_D, si12, rj, rd, 12);
++}
++
++void Assembler::addu16i_d(Register rd, Register rj, int32_t si16) {
++  GenImm(ADDU16I_D, si16, rj, rd, 16);
++}
++
++void Assembler::alsl_w(Register rd, Register rj, Register rk, int32_t sa2) {
++  DCHECK(is_uint2(sa2 - 1));
++  GenImm(ALSL_W, sa2 - 1, rk, rj, rd);
++}
++
++void Assembler::alsl_wu(Register rd, Register rj, Register rk, int32_t sa2) {
++  DCHECK(is_uint2(sa2 - 1));
++  GenImm(ALSL_WU, sa2 + 3, rk, rj, rd);
++}
++
++void Assembler::alsl_d(Register rd, Register rj, Register rk, int32_t sa2) {
++  DCHECK(is_uint2(sa2 - 1));
++  GenImm(ALSL_D, sa2 - 1, rk, rj, rd);
++}
++
++void Assembler::lu12i_w(Register rd, int32_t si20) {
++  GenImm(LU12I_W, si20, rd);
++}
++
++void Assembler::lu32i_d(Register rd, int32_t si20) {
++  GenImm(LU32I_D, si20, rd);
++}
++
++void Assembler::lu52i_d(Register rd, Register rj, int32_t si12) {
++  GenImm(LU52I_D, si12, rj, rd, 12);
++}
++
++void Assembler::slt(Register rd, Register rj, Register rk) {
++  GenRegister(SLT, rk, rj, rd);
++}
++
++void Assembler::sltu(Register rd, Register rj, Register rk) {
++  GenRegister(SLTU, rk, rj, rd);
++}
++
++void Assembler::slti(Register rd, Register rj, int32_t si12) {
++  GenImm(SLTI, si12, rj, rd, 12);
++}
++
++void Assembler::sltui(Register rd, Register rj, int32_t si12) {
++  GenImm(SLTUI, si12, rj, rd, 12);
++}
++
++void Assembler::pcaddi(Register rd, int32_t si20) { GenImm(PCADDI, si20, rd); }
++
++void Assembler::pcaddu12i(Register rd, int32_t si20) {
++  GenImm(PCADDU12I, si20, rd);
++}
++
++void Assembler::pcaddu18i(Register rd, int32_t si20) {
++  GenImm(PCADDU18I, si20, rd);
++}
++
++void Assembler::pcalau12i(Register rd, int32_t si20) {
++  GenImm(PCALAU12I, si20, rd);
++}
++
++void Assembler::and_(Register rd, Register rj, Register rk) {
++  GenRegister(AND, rk, rj, rd);
++}
++
++void Assembler::or_(Register rd, Register rj, Register rk) {
++  GenRegister(OR, rk, rj, rd);
++}
++
++void Assembler::xor_(Register rd, Register rj, Register rk) {
++  GenRegister(XOR, rk, rj, rd);
++}
++
++void Assembler::nor(Register rd, Register rj, Register rk) {
++  GenRegister(NOR, rk, rj, rd);
++}
++
++void Assembler::andn(Register rd, Register rj, Register rk) {
++  GenRegister(ANDN, rk, rj, rd);
++}
++
++void Assembler::orn(Register rd, Register rj, Register rk) {
++  GenRegister(ORN, rk, rj, rd);
++}
++
++void Assembler::andi(Register rd, Register rj, int32_t ui12) {
++  GenImm(ANDI, ui12, rj, rd, 12);
++}
++
++void Assembler::ori(Register rd, Register rj, int32_t ui12) {
++  GenImm(ORI, ui12, rj, rd, 12);
++}
++
++void Assembler::xori(Register rd, Register rj, int32_t ui12) {
++  GenImm(XORI, ui12, rj, rd, 12);
++}
++
++void Assembler::mul_w(Register rd, Register rj, Register rk) {
++  GenRegister(MUL_W, rk, rj, rd);
++}
++
++void Assembler::mulh_w(Register rd, Register rj, Register rk) {
++  GenRegister(MULH_W, rk, rj, rd);
++}
++
++void Assembler::mulh_wu(Register rd, Register rj, Register rk) {
++  GenRegister(MULH_WU, rk, rj, rd);
++}
++
++void Assembler::mul_d(Register rd, Register rj, Register rk) {
++  GenRegister(MUL_D, rk, rj, rd);
++}
++
++void Assembler::mulh_d(Register rd, Register rj, Register rk) {
++  GenRegister(MULH_D, rk, rj, rd);
++}
++
++void Assembler::mulh_du(Register rd, Register rj, Register rk) {
++  GenRegister(MULH_DU, rk, rj, rd);
++}
++
++void Assembler::mulw_d_w(Register rd, Register rj, Register rk) {
++  GenRegister(MULW_D_W, rk, rj, rd);
++}
++
++void Assembler::mulw_d_wu(Register rd, Register rj, Register rk) {
++  GenRegister(MULW_D_WU, rk, rj, rd);
++}
++
++void Assembler::div_w(Register rd, Register rj, Register rk) {
++  GenRegister(DIV_W, rk, rj, rd);
++}
++
++void Assembler::mod_w(Register rd, Register rj, Register rk) {
++  GenRegister(MOD_W, rk, rj, rd);
++}
++
++void Assembler::div_wu(Register rd, Register rj, Register rk) {
++  GenRegister(DIV_WU, rk, rj, rd);
++}
++
++void Assembler::mod_wu(Register rd, Register rj, Register rk) {
++  GenRegister(MOD_WU, rk, rj, rd);
++}
++
++void Assembler::div_d(Register rd, Register rj, Register rk) {
++  GenRegister(DIV_D, rk, rj, rd);
++}
++
++void Assembler::mod_d(Register rd, Register rj, Register rk) {
++  GenRegister(MOD_D, rk, rj, rd);
++}
++
++void Assembler::div_du(Register rd, Register rj, Register rk) {
++  GenRegister(DIV_DU, rk, rj, rd);
++}
++
++void Assembler::mod_du(Register rd, Register rj, Register rk) {
++  GenRegister(MOD_DU, rk, rj, rd);
++}
++
++// Shifts.
++void Assembler::sll_w(Register rd, Register rj, Register rk) {
++  GenRegister(SLL_W, rk, rj, rd);
++}
++
++void Assembler::srl_w(Register rd, Register rj, Register rk) {
++  GenRegister(SRL_W, rk, rj, rd);
++}
++
++void Assembler::sra_w(Register rd, Register rj, Register rk) {
++  GenRegister(SRA_W, rk, rj, rd);
++}
++
++void Assembler::rotr_w(Register rd, Register rj, Register rk) {
++  GenRegister(ROTR_W, rk, rj, rd);
++}
++
++void Assembler::slli_w(Register rd, Register rj, int32_t ui5) {
++  DCHECK(is_uint5(ui5));
++  GenImm(SLLI_W, ui5 + 0x20, rj, rd, 6);
++}
++
++void Assembler::srli_w(Register rd, Register rj, int32_t ui5) {
++  DCHECK(is_uint5(ui5));
++  GenImm(SRLI_W, ui5 + 0x20, rj, rd, 6);
++}
++
++void Assembler::srai_w(Register rd, Register rj, int32_t ui5) {
++  DCHECK(is_uint5(ui5));
++  GenImm(SRAI_W, ui5 + 0x20, rj, rd, 6);
++}
++
++void Assembler::rotri_w(Register rd, Register rj, int32_t ui5) {
++  DCHECK(is_uint5(ui5));
++  GenImm(ROTRI_W, ui5 + 0x20, rj, rd, 6);
++}
++
++void Assembler::sll_d(Register rd, Register rj, Register rk) {
++  GenRegister(SLL_D, rk, rj, rd);
++}
++
++void Assembler::srl_d(Register rd, Register rj, Register rk) {
++  GenRegister(SRL_D, rk, rj, rd);
++}
++
++void Assembler::sra_d(Register rd, Register rj, Register rk) {
++  GenRegister(SRA_D, rk, rj, rd);
++}
++
++void Assembler::rotr_d(Register rd, Register rj, Register rk) {
++  GenRegister(ROTR_D, rk, rj, rd);
++}
++
++void Assembler::slli_d(Register rd, Register rj, int32_t ui6) {
++  GenImm(SLLI_D, ui6, rj, rd, 6);
++}
++
++void Assembler::srli_d(Register rd, Register rj, int32_t ui6) {
++  GenImm(SRLI_D, ui6, rj, rd, 6);
++}
++
++void Assembler::srai_d(Register rd, Register rj, int32_t ui6) {
++  GenImm(SRAI_D, ui6, rj, rd, 6);
++}
++
++void Assembler::rotri_d(Register rd, Register rj, int32_t ui6) {
++  GenImm(ROTRI_D, ui6, rj, rd, 6);
++}
++
++// Bit twiddling.
++void Assembler::ext_w_b(Register rd, Register rj) {
++  GenRegister(EXT_W_B, rj, rd);
++}
++
++void Assembler::ext_w_h(Register rd, Register rj) {
++  GenRegister(EXT_W_H, rj, rd);
++}
++
++void Assembler::clo_w(Register rd, Register rj) { GenRegister(CLO_W, rj, rd); }
++
++void Assembler::clz_w(Register rd, Register rj) { GenRegister(CLZ_W, rj, rd); }
++
++void Assembler::cto_w(Register rd, Register rj) { GenRegister(CTO_W, rj, rd); }
++
++void Assembler::ctz_w(Register rd, Register rj) { GenRegister(CTZ_W, rj, rd); }
++
++void Assembler::clo_d(Register rd, Register rj) { GenRegister(CLO_D, rj, rd); }
++
++void Assembler::clz_d(Register rd, Register rj) { GenRegister(CLZ_D, rj, rd); }
++
++void Assembler::cto_d(Register rd, Register rj) { GenRegister(CTO_D, rj, rd); }
++
++void Assembler::ctz_d(Register rd, Register rj) { GenRegister(CTZ_D, rj, rd); }
++
++void Assembler::bytepick_w(Register rd, Register rj, Register rk, int32_t sa2) {
++  DCHECK(is_uint2(sa2));
++  GenImm(BYTEPICK_W, sa2, rk, rj, rd);
++}
++
++void Assembler::bytepick_d(Register rd, Register rj, Register rk, int32_t sa3) {
++  GenImm(BYTEPICK_D, sa3, rk, rj, rd);
++}
++
++void Assembler::revb_2h(Register rd, Register rj) {
++  GenRegister(REVB_2H, rj, rd);
++}
++
++void Assembler::revb_4h(Register rd, Register rj) {
++  GenRegister(REVB_4H, rj, rd);
++}
++
++void Assembler::revb_2w(Register rd, Register rj) {
++  GenRegister(REVB_2W, rj, rd);
++}
++
++void Assembler::revb_d(Register rd, Register rj) {
++  GenRegister(REVB_D, rj, rd);
++}
++
++void Assembler::revh_2w(Register rd, Register rj) {
++  GenRegister(REVH_2W, rj, rd);
++}
++
++void Assembler::revh_d(Register rd, Register rj) {
++  GenRegister(REVH_D, rj, rd);
++}
++
++void Assembler::bitrev_4b(Register rd, Register rj) {
++  GenRegister(BITREV_4B, rj, rd);
++}
++
++void Assembler::bitrev_8b(Register rd, Register rj) {
++  GenRegister(BITREV_8B, rj, rd);
++}
++
++void Assembler::bitrev_w(Register rd, Register rj) {
++  GenRegister(BITREV_W, rj, rd);
++}
++
++void Assembler::bitrev_d(Register rd, Register rj) {
++  GenRegister(BITREV_D, rj, rd);
++}
++
++void Assembler::bstrins_w(Register rd, Register rj, int32_t msbw,
++                          int32_t lsbw) {
++  DCHECK(is_uint5(msbw) && is_uint5(lsbw));
++  GenImm(BSTR_W, msbw + 0x20, lsbw, rj, rd);
++}
++
++void Assembler::bstrins_d(Register rd, Register rj, int32_t msbd,
++                          int32_t lsbd) {
++  GenImm(BSTRINS_D, msbd, lsbd, rj, rd);
++}
++
++void Assembler::bstrpick_w(Register rd, Register rj, int32_t msbw,
++                           int32_t lsbw) {
++  DCHECK(is_uint5(msbw) && is_uint5(lsbw));
++  GenImm(BSTR_W, msbw + 0x20, lsbw + 0x20, rj, rd);
++}
++
++void Assembler::bstrpick_d(Register rd, Register rj, int32_t msbd,
++                           int32_t lsbd) {
++  GenImm(BSTRPICK_D, msbd, lsbd, rj, rd);
++}
++
++void Assembler::maskeqz(Register rd, Register rj, Register rk) {
++  GenRegister(MASKEQZ, rk, rj, rd);
++}
++
++void Assembler::masknez(Register rd, Register rj, Register rk) {
++  GenRegister(MASKNEZ, rk, rj, rd);
++}
++
++// Memory-instructions
++void Assembler::ld_b(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_B, si12, rj, rd, 12);
++}
++
++void Assembler::ld_h(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_H, si12, rj, rd, 12);
++}
++
++void Assembler::ld_w(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_W, si12, rj, rd, 12);
++}
++
++void Assembler::ld_d(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_D, si12, rj, rd, 12);
++}
++
++void Assembler::ld_bu(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_BU, si12, rj, rd, 12);
++}
++
++void Assembler::ld_hu(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_HU, si12, rj, rd, 12);
++}
++
++void Assembler::ld_wu(Register rd, Register rj, int32_t si12) {
++  GenImm(LD_WU, si12, rj, rd, 12);
++}
++
++void Assembler::st_b(Register rd, Register rj, int32_t si12) {
++  GenImm(ST_B, si12, rj, rd, 12);
++}
++
++void Assembler::st_h(Register rd, Register rj, int32_t si12) {
++  GenImm(ST_H, si12, rj, rd, 12);
++}
++
++void Assembler::st_w(Register rd, Register rj, int32_t si12) {
++  GenImm(ST_W, si12, rj, rd, 12);
++}
++
++void Assembler::st_d(Register rd, Register rj, int32_t si12) {
++  GenImm(ST_D, si12, rj, rd, 12);
++}
++
++void Assembler::ldx_b(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_B, rk, rj, rd);
++}
++
++void Assembler::ldx_h(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_H, rk, rj, rd);
++}
++
++void Assembler::ldx_w(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_W, rk, rj, rd);
++}
++
++void Assembler::ldx_d(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_D, rk, rj, rd);
++}
++
++void Assembler::ldx_bu(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_BU, rk, rj, rd);
++}
++
++void Assembler::ldx_hu(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_HU, rk, rj, rd);
++}
++
++void Assembler::ldx_wu(Register rd, Register rj, Register rk) {
++  GenRegister(LDX_WU, rk, rj, rd);
++}
++
++void Assembler::stx_b(Register rd, Register rj, Register rk) {
++  GenRegister(STX_B, rk, rj, rd);
++}
++
++void Assembler::stx_h(Register rd, Register rj, Register rk) {
++  GenRegister(STX_H, rk, rj, rd);
++}
++
++void Assembler::stx_w(Register rd, Register rj, Register rk) {
++  GenRegister(STX_W, rk, rj, rd);
++}
++
++void Assembler::stx_d(Register rd, Register rj, Register rk) {
++  GenRegister(STX_D, rk, rj, rd);
++}
++
++void Assembler::ldptr_w(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(LDPTR_W, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::ldptr_d(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(LDPTR_D, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::stptr_w(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(STPTR_W, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::stptr_d(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(STPTR_D, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::preld(int32_t hint, Register rj, int32_t si12) {
++  DCHECK(is_uint5(hint));
++  GenImm(PRELD, si12, rj, Register::from_code(hint), 12);
++}
++
++void Assembler::preldx(int32_t hint, Register rj, Register rk) {
++  DCHECK(is_uint5(hint));
++  GenRegister(PRELDX, rk, rj, Register::from_code(hint));
++}
++
++void Assembler::ldgt_b(Register rd, Register rj, Register rk) {
++  GenRegister(LDGT_B, rk, rj, rd);
++}
++
++void Assembler::ldgt_h(Register rd, Register rj, Register rk) {
++  GenRegister(LDGT_H, rk, rj, rd);
++}
++
++void Assembler::ldgt_w(Register rd, Register rj, Register rk) {
++  GenRegister(LDGT_W, rk, rj, rd);
++}
++
++void Assembler::ldgt_d(Register rd, Register rj, Register rk) {
++  GenRegister(LDGT_D, rk, rj, rd);
++}
++
++void Assembler::ldle_b(Register rd, Register rj, Register rk) {
++  GenRegister(LDLE_B, rk, rj, rd);
++}
++
++void Assembler::ldle_h(Register rd, Register rj, Register rk) {
++  GenRegister(LDLE_H, rk, rj, rd);
++}
++
++void Assembler::ldle_w(Register rd, Register rj, Register rk) {
++  GenRegister(LDLE_W, rk, rj, rd);
++}
++
++void Assembler::ldle_d(Register rd, Register rj, Register rk) {
++  GenRegister(LDLE_D, rk, rj, rd);
++}
++
++void Assembler::stgt_b(Register rd, Register rj, Register rk) {
++  GenRegister(STGT_B, rk, rj, rd);
++}
++
++void Assembler::stgt_h(Register rd, Register rj, Register rk) {
++  GenRegister(STGT_H, rk, rj, rd);
++}
++
++void Assembler::stgt_w(Register rd, Register rj, Register rk) {
++  GenRegister(STGT_W, rk, rj, rd);
++}
++
++void Assembler::stgt_d(Register rd, Register rj, Register rk) {
++  GenRegister(STGT_D, rk, rj, rd);
++}
++
++void Assembler::stle_b(Register rd, Register rj, Register rk) {
++  GenRegister(STLE_B, rk, rj, rd);
++}
++
++void Assembler::stle_h(Register rd, Register rj, Register rk) {
++  GenRegister(STLE_H, rk, rj, rd);
++}
++
++void Assembler::stle_w(Register rd, Register rj, Register rk) {
++  GenRegister(STLE_W, rk, rj, rd);
++}
++
++void Assembler::stle_d(Register rd, Register rj, Register rk) {
++  GenRegister(STLE_D, rk, rj, rd);
++}
++
++void Assembler::amswap_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMSWAP_W, rk, rj, rd);
++}
++
++void Assembler::amswap_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMSWAP_D, rk, rj, rd);
++}
++
++void Assembler::amadd_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMADD_W, rk, rj, rd);
++}
++
++void Assembler::amadd_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMADD_D, rk, rj, rd);
++}
++
++void Assembler::amand_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMAND_W, rk, rj, rd);
++}
++
++void Assembler::amand_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMAND_D, rk, rj, rd);
++}
++
++void Assembler::amor_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMOR_W, rk, rj, rd);
++}
++
++void Assembler::amor_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMOR_D, rk, rj, rd);
++}
++
++void Assembler::amxor_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMXOR_W, rk, rj, rd);
++}
++
++void Assembler::amxor_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMXOR_D, rk, rj, rd);
++}
++
++void Assembler::ammax_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_W, rk, rj, rd);
++}
++
++void Assembler::ammax_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_D, rk, rj, rd);
++}
++
++void Assembler::ammin_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_W, rk, rj, rd);
++}
++
++void Assembler::ammin_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_D, rk, rj, rd);
++}
++
++void Assembler::ammax_wu(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_WU, rk, rj, rd);
++}
++
++void Assembler::ammax_du(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_DU, rk, rj, rd);
++}
++
++void Assembler::ammin_wu(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_WU, rk, rj, rd);
++}
++
++void Assembler::ammin_du(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_DU, rk, rj, rd);
++}
++
++void Assembler::amswap_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMSWAP_DB_W, rk, rj, rd);
++}
++
++void Assembler::amswap_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMSWAP_DB_D, rk, rj, rd);
++}
++
++void Assembler::amadd_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMADD_DB_W, rk, rj, rd);
++}
++
++void Assembler::amadd_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMADD_DB_D, rk, rj, rd);
++}
++
++void Assembler::amand_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMAND_DB_W, rk, rj, rd);
++}
++
++void Assembler::amand_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMAND_DB_D, rk, rj, rd);
++}
++
++void Assembler::amor_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMOR_DB_W, rk, rj, rd);
++}
++
++void Assembler::amor_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMOR_DB_D, rk, rj, rd);
++}
++
++void Assembler::amxor_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMXOR_DB_W, rk, rj, rd);
++}
++
++void Assembler::amxor_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMXOR_DB_D, rk, rj, rd);
++}
++
++void Assembler::ammax_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_DB_W, rk, rj, rd);
++}
++
++void Assembler::ammax_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_DB_D, rk, rj, rd);
++}
++
++void Assembler::ammin_db_w(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_DB_W, rk, rj, rd);
++}
++
++void Assembler::ammin_db_d(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_DB_D, rk, rj, rd);
++}
++
++void Assembler::ammax_db_wu(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_DB_WU, rk, rj, rd);
++}
++
++void Assembler::ammax_db_du(Register rd, Register rk, Register rj) {
++  GenRegister(AMMAX_DB_DU, rk, rj, rd);
++}
++
++void Assembler::ammin_db_wu(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_DB_WU, rk, rj, rd);
++}
++
++void Assembler::ammin_db_du(Register rd, Register rk, Register rj) {
++  GenRegister(AMMIN_DB_DU, rk, rj, rd);
++}
++
++void Assembler::ll_w(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(LL_W, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::ll_d(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(LL_D, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::sc_w(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(SC_W, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::sc_d(Register rd, Register rj, int32_t si14) {
++  DCHECK(is_int16(si14) && ((si14 & 0x3) == 0));
++  GenImm(SC_D, si14 >> 2, rj, rd, 14);
++}
++
++void Assembler::dbar(int32_t hint) { GenImm(DBAR, hint); }
++
++void Assembler::ibar(int32_t hint) { GenImm(IBAR, hint); }
++
++void Assembler::crc_w_b_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRC_W_B_W, rk, rj, rd);
++}
++
++void Assembler::crc_w_h_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRC_W_H_W, rk, rj, rd);
++}
++
++void Assembler::crc_w_w_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRC_W_W_W, rk, rj, rd);
++}
++
++void Assembler::crc_w_d_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRC_W_D_W, rk, rj, rd);
++}
++
++void Assembler::crcc_w_b_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRCC_W_B_W, rk, rj, rd);
++}
++
++void Assembler::crcc_w_h_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRCC_W_H_W, rk, rj, rd);
++}
++
++void Assembler::crcc_w_w_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRCC_W_W_W, rk, rj, rd);
++}
++
++void Assembler::crcc_w_d_w(Register rd, Register rj, Register rk) {
++  GenRegister(CRCC_W_D_W, rk, rj, rd);
++}
++
++void Assembler::syscall(int32_t code) { GenImm(SYSCALL, code); }
++
++void Assembler::asrtle_d(Register rj, Register rk) {
++  GenRegister(ASRTLE_D, rk, rj, false);
++}
++
++void Assembler::asrtgt_d(Register rj, Register rk) {
++  GenRegister(ASRTGT_D, rk, rj, false);
++}
++
++void Assembler::rdtimel_w(Register rd, Register rj) {
++  GenRegister(RDTIMEL_W, rj, rd);
++}
++
++void Assembler::rdtimeh_w(Register rd, Register rj) {
++  GenRegister(RDTIMEH_W, rj, rd);
++}
++
++void Assembler::rdtime_d(Register rd, Register rj) {
++  GenRegister(RDTIME_D, rj, rd);
++}
++
++void Assembler::cpucfg(Register rd, Register rj) {
++  GenRegister(CPUCFG_W, rj, rd);
++}
++
++// Break / Trap instructions.
++void Assembler::break_(uint32_t code, bool break_as_stop) {
++  DCHECK(
++      (break_as_stop && code <= kMaxStopCode && code > kMaxWatchpointCode) ||
++      (!break_as_stop && (code > kMaxStopCode || code <= kMaxWatchpointCode)));
++  GenImm(BREAK, code);
++}
++
++void Assembler::stop(uint32_t code) {
++  DCHECK_GT(code, kMaxWatchpointCode);
++  DCHECK_LE(code, kMaxStopCode);
++#if defined(V8_HOST_ARCH_LA64)
++  break_(0x4321);
++#else  // V8_HOST_ARCH_LA64
++  break_(code, true);
++#endif
++}
++
++void Assembler::fadd_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FADD_S, fk, fj, fd);
++}
++
++void Assembler::fadd_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FADD_D, fk, fj, fd);
++}
++
++void Assembler::fsub_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FSUB_S, fk, fj, fd);
++}
++
++void Assembler::fsub_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FSUB_D, fk, fj, fd);
++}
++
++void Assembler::fmul_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMUL_S, fk, fj, fd);
++}
++
++void Assembler::fmul_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMUL_D, fk, fj, fd);
++}
++
++void Assembler::fdiv_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FDIV_S, fk, fj, fd);
++}
++
++void Assembler::fdiv_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FDIV_D, fk, fj, fd);
++}
++
++void Assembler::fmadd_s(FPURegister fd, FPURegister fj, FPURegister fk,
++                        FPURegister fa) {
++  GenRegister(FMADD_S, fa, fk, fj, fd);
++}
++
++void Assembler::fmadd_d(FPURegister fd, FPURegister fj, FPURegister fk,
++                        FPURegister fa) {
++  GenRegister(FMADD_D, fa, fk, fj, fd);
++}
++
++void Assembler::fmsub_s(FPURegister fd, FPURegister fj, FPURegister fk,
++                        FPURegister fa) {
++  GenRegister(FMSUB_S, fa, fk, fj, fd);
++}
++
++void Assembler::fmsub_d(FPURegister fd, FPURegister fj, FPURegister fk,
++                        FPURegister fa) {
++  GenRegister(FMSUB_D, fa, fk, fj, fd);
++}
++
++void Assembler::fnmadd_s(FPURegister fd, FPURegister fj, FPURegister fk,
++                         FPURegister fa) {
++  GenRegister(FNMADD_S, fa, fk, fj, fd);
++}
++
++void Assembler::fnmadd_d(FPURegister fd, FPURegister fj, FPURegister fk,
++                         FPURegister fa) {
++  GenRegister(FNMADD_D, fa, fk, fj, fd);
++}
++
++void Assembler::fnmsub_s(FPURegister fd, FPURegister fj, FPURegister fk,
++                         FPURegister fa) {
++  GenRegister(FNMSUB_S, fa, fk, fj, fd);
++}
++
++void Assembler::fnmsub_d(FPURegister fd, FPURegister fj, FPURegister fk,
++                         FPURegister fa) {
++  GenRegister(FNMSUB_D, fa, fk, fj, fd);
++}
++
++void Assembler::fmax_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMAX_S, fk, fj, fd);
++}
++
++void Assembler::fmax_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMAX_D, fk, fj, fd);
++}
++
++void Assembler::fmin_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMIN_S, fk, fj, fd);
++}
++
++void Assembler::fmin_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMIN_D, fk, fj, fd);
++}
++
++void Assembler::fmaxa_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMAXA_S, fk, fj, fd);
++}
++
++void Assembler::fmaxa_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMAXA_D, fk, fj, fd);
++}
++
++void Assembler::fmina_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMINA_S, fk, fj, fd);
++}
++
++void Assembler::fmina_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FMINA_D, fk, fj, fd);
++}
++
++void Assembler::fabs_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FABS_S, fj, fd);
++}
++
++void Assembler::fabs_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FABS_D, fj, fd);
++}
++
++void Assembler::fneg_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FNEG_S, fj, fd);
++}
++
++void Assembler::fneg_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FNEG_D, fj, fd);
++}
++
++void Assembler::fsqrt_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FSQRT_S, fj, fd);
++}
++
++void Assembler::fsqrt_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FSQRT_D, fj, fd);
++}
++
++void Assembler::frecip_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FRECIP_S, fj, fd);
++}
++
++void Assembler::frecip_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FRECIP_D, fj, fd);
++}
++
++void Assembler::frsqrt_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FRSQRT_S, fj, fd);
++}
++
++void Assembler::frsqrt_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FRSQRT_D, fj, fd);
++}
++
++void Assembler::fscaleb_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FSCALEB_S, fk, fj, fd);
++}
++
++void Assembler::fscaleb_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FSCALEB_D, fk, fj, fd);
++}
++
++void Assembler::flogb_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FLOGB_S, fj, fd);
++}
++
++void Assembler::flogb_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FLOGB_D, fj, fd);
++}
++
++void Assembler::fcopysign_s(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FCOPYSIGN_S, fk, fj, fd);
++}
++
++void Assembler::fcopysign_d(FPURegister fd, FPURegister fj, FPURegister fk) {
++  GenRegister(FCOPYSIGN_D, fk, fj, fd);
++}
++
++void Assembler::fclass_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FCLASS_S, fj, fd);
++}
++
++void Assembler::fclass_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FCLASS_D, fj, fd);
++}
++
++void Assembler::fcmp_cond_s(FPUCondition cc, FPURegister fj, FPURegister fk,
++                            CFRegister cd) {
++  GenCmp(FCMP_COND_S, cc, fk, fj, cd);
++}
++
++void Assembler::fcmp_cond_d(FPUCondition cc, FPURegister fj, FPURegister fk,
++                            CFRegister cd) {
++  GenCmp(FCMP_COND_D, cc, fk, fj, cd);
++}
++
++void Assembler::fcvt_s_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FCVT_S_D, fj, fd);
++}
++
++void Assembler::fcvt_d_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FCVT_D_S, fj, fd);
++}
++
++void Assembler::ffint_s_w(FPURegister fd, FPURegister fj) {
++  GenRegister(FFINT_S_W, fj, fd);
++}
++
++void Assembler::ffint_s_l(FPURegister fd, FPURegister fj) {
++  GenRegister(FFINT_S_L, fj, fd);
++}
++
++void Assembler::ffint_d_w(FPURegister fd, FPURegister fj) {
++  GenRegister(FFINT_D_W, fj, fd);
++}
++
++void Assembler::ffint_d_l(FPURegister fd, FPURegister fj) {
++  GenRegister(FFINT_D_L, fj, fd);
++}
++
++void Assembler::ftint_w_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINT_W_S, fj, fd);
++}
++
++void Assembler::ftint_w_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINT_W_D, fj, fd);
++}
++
++void Assembler::ftint_l_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINT_L_S, fj, fd);
++}
++
++void Assembler::ftint_l_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINT_L_D, fj, fd);
++}
++
++void Assembler::ftintrm_w_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRM_W_S, fj, fd);
++}
++
++void Assembler::ftintrm_w_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRM_W_D, fj, fd);
++}
++
++void Assembler::ftintrm_l_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRM_L_S, fj, fd);
++}
++
++void Assembler::ftintrm_l_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRM_L_D, fj, fd);
++}
++
++void Assembler::ftintrp_w_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRP_W_S, fj, fd);
++}
++
++void Assembler::ftintrp_w_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRP_W_D, fj, fd);
++}
++
++void Assembler::ftintrp_l_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRP_L_S, fj, fd);
++}
++
++void Assembler::ftintrp_l_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRP_L_D, fj, fd);
++}
++
++void Assembler::ftintrz_w_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRZ_W_S, fj, fd);
++}
++
++void Assembler::ftintrz_w_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRZ_W_D, fj, fd);
++}
++
++void Assembler::ftintrz_l_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRZ_L_S, fj, fd);
++}
++
++void Assembler::ftintrz_l_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRZ_L_D, fj, fd);
++}
++
++void Assembler::ftintrne_w_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRNE_W_S, fj, fd);
++}
++
++void Assembler::ftintrne_w_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRNE_W_D, fj, fd);
++}
++
++void Assembler::ftintrne_l_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRNE_L_S, fj, fd);
++}
++
++void Assembler::ftintrne_l_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FTINTRNE_L_D, fj, fd);
++}
++
++void Assembler::frint_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FRINT_S, fj, fd);
++}
++
++void Assembler::frint_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FRINT_D, fj, fd);
++}
++
++void Assembler::fmov_s(FPURegister fd, FPURegister fj) {
++  GenRegister(FMOV_S, fj, fd);
++}
++
++void Assembler::fmov_d(FPURegister fd, FPURegister fj) {
++  GenRegister(FMOV_D, fj, fd);
++}
++
++void Assembler::fsel(CFRegister ca, FPURegister fd, FPURegister fj,
++                     FPURegister fk) {
++  GenSel(FSEL, ca, fk, fj, fd);
++}
++
++void Assembler::movgr2fr_w(FPURegister fd, Register rj) {
++  GenRegister(MOVGR2FR_W, rj, fd);
++}
++
++void Assembler::movgr2fr_d(FPURegister fd, Register rj) {
++  GenRegister(MOVGR2FR_D, rj, fd);
++}
++
++void Assembler::movgr2frh_w(FPURegister fd, Register rj) {
++  GenRegister(MOVGR2FRH_W, rj, fd);
++}
++
++void Assembler::movfr2gr_s(Register rd, FPURegister fj) {
++  GenRegister(MOVFR2GR_S, fj, rd);
++}
++
++void Assembler::movfr2gr_d(Register rd, FPURegister fj) {
++  GenRegister(MOVFR2GR_D, fj, rd);
++}
++
++void Assembler::movfrh2gr_s(Register rd, FPURegister fj) {
++  GenRegister(MOVFRH2GR_S, fj, rd);
++}
++
++void Assembler::movgr2fcsr(Register rj) { GenRegister(MOVGR2FCSR, rj, FCSR); }
++
++void Assembler::movfcsr2gr(Register rd) { GenRegister(MOVFCSR2GR, FCSR, rd); }
++
++void Assembler::movfr2cf(CFRegister cd, FPURegister fj) {
++  GenRegister(MOVFR2CF, fj, cd);
++}
++
++void Assembler::movcf2fr(FPURegister fd, CFRegister cj) {
++  GenRegister(MOVCF2FR, cj, fd);
++}
++
++void Assembler::movgr2cf(CFRegister cd, Register rj) {
++  GenRegister(MOVGR2CF, rj, cd);
++}
++
++void Assembler::movcf2gr(Register rd, CFRegister cj) {
++  GenRegister(MOVCF2GR, cj, rd);
++}
++
++void Assembler::fld_s(FPURegister fd, Register rj, int32_t si12) {
++  GenImm(FLD_S, si12, rj, fd);
++}
++
++void Assembler::fld_d(FPURegister fd, Register rj, int32_t si12) {
++  GenImm(FLD_D, si12, rj, fd);
++}
++
++void Assembler::fst_s(FPURegister fd, Register rj, int32_t si12) {
++  GenImm(FST_S, si12, rj, fd);
++}
++
++void Assembler::fst_d(FPURegister fd, Register rj, int32_t si12) {
++  GenImm(FST_D, si12, rj, fd);
++}
++
++void Assembler::fldx_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDX_S, rk, rj, fd);
++}
++
++void Assembler::fldx_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDX_D, rk, rj, fd);
++}
++
++void Assembler::fstx_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTX_S, rk, rj, fd);
++}
++
++void Assembler::fstx_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTX_D, rk, rj, fd);
++}
++
++void Assembler::fldgt_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDGT_S, rk, rj, fd);
++}
++
++void Assembler::fldgt_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDGT_D, rk, rj, fd);
++}
++
++void Assembler::fldle_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDLE_S, rk, rj, fd);
++}
++
++void Assembler::fldle_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FLDLE_D, rk, rj, fd);
++}
++
++void Assembler::fstgt_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTGT_S, rk, rj, fd);
++}
++
++void Assembler::fstgt_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTGT_D, rk, rj, fd);
++}
++
++void Assembler::fstle_s(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTLE_S, rk, rj, fd);
++}
++
++void Assembler::fstle_d(FPURegister fd, Register rj, Register rk) {
++  GenRegister(FSTLE_D, rk, rj, fd);
++}
++
++// ------------Memory-instructions-------------
++
++/*void Assembler::AdjustBaseAndOffset(MemOperand* src,
++                                    OffsetAccessType access_type,
++                                    int second_access_add_to_offset) {
++  // TODO should be optimized.
++  // This method is used to adjust the base register and offset pair
++  // for a load/store when the offset doesn't fit into int12_t.
++
++  bool doubleword_aligned = (src->offset() & (kDoubleSize - 1)) == 0;
++  bool two_accesses = static_cast<bool>(access_type) || !doubleword_aligned;
++  DCHECK_LE(second_access_add_to_offset, 7);  // Must be <= 7.
++
++  // is_int12 must be passed a signed value, hence the static cast below.
++  if (is_int12(src->offset()) &&
++      (!two_accesses || is_int12(static_cast<int32_t>(
++                            src->offset() + second_access_add_to_offset)))) {
++    // Nothing to do: 'offset' (and, if needed, 'offset + 4', or other specified
++    // value) fits into int16_t.
++    return;
++  }
++
++  DCHECK(src->rm() !=
++         at);  // Must not overwrite the register 'base' while loading 'offset'.
++
++#ifdef DEBUG
++  // Remember the "(mis)alignment" of 'offset', it will be checked at the end.
++  uint32_t misalignment = src->offset() & (kDoubleSize - 1);
++#endif
++
++  // Do not load the whole 32-bit 'offset' if it can be represented as
++  // a sum of two 16-bit signed offsets. This can save an instruction or two.
++  // To simplify matters, only do this for a symmetric range of offsets from
++  // about -64KB to about +64KB, allowing further addition of 4 when accessing
++  // 64-bit variables with two 32-bit accesses.
++  constexpr int32_t kMinOffsetForSimpleAdjustment =
++      0x7FF8;  // Max int16_t that's a multiple of 8.
++  constexpr int32_t kMaxOffsetForSimpleAdjustment =
++      2 * kMinOffsetForSimpleAdjustment;
++
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  if (0 <= src->offset() && src->offset() <= kMaxOffsetForSimpleAdjustment) {
++    daddiu(scratch, src->rm(), kMinOffsetForSimpleAdjustment);
++    src->offset_ -= kMinOffsetForSimpleAdjustment;
++  } else if (-kMaxOffsetForSimpleAdjustment <= src->offset() &&
++             src->offset() < 0) {
++    daddiu(scratch, src->rm(), -kMinOffsetForSimpleAdjustment);
++    src->offset_ += kMinOffsetForSimpleAdjustment;
++  } else if (kArchVariant == kMips64r6) {
++    // On r6 take advantage of the daui instruction, e.g.:
++    //    daui   at, base, offset_high
++    //   [dahi   at, 1]                       // When `offset` is close to +2GB.
++    //    lw     reg_lo, offset_low(at)
++    //   [lw     reg_hi, (offset_low+4)(at)]  // If misaligned 64-bit load.
++    // or when offset_low+4 overflows int16_t:
++    //    daui   at, base, offset_high
++    //    daddiu at, at, 8
++    //    lw     reg_lo, (offset_low-8)(at)
++    //    lw     reg_hi, (offset_low-4)(at)
++    int16_t offset_low = static_cast<uint16_t>(src->offset());
++    int32_t offset_low32 = offset_low;
++    int16_t offset_high = static_cast<uint16_t>(src->offset() >> 16);
++    bool increment_hi16 = offset_low < 0;
++    bool overflow_hi16 = false;
++
++    if (increment_hi16) {
++      offset_high++;
++      overflow_hi16 = (offset_high == -32768);
++    }
++    daui(scratch, src->rm(), static_cast<uint16_t>(offset_high));
++
++    if (overflow_hi16) {
++      dahi(scratch, 1);
++    }
++
++    if (two_accesses && !is_int16(static_cast<int32_t>(
++                            offset_low32 + second_access_add_to_offset))) {
++      // Avoid overflow in the 16-bit offset of the load/store instruction when
++      // adding 4.
++      daddiu(scratch, scratch, kDoubleSize);
++      offset_low32 -= kDoubleSize;
++    }
++
++    src->offset_ = offset_low32;
++  } else {
++    // Do not load the whole 32-bit 'offset' if it can be represented as
++    // a sum of three 16-bit signed offsets. This can save an instruction.
++    // To simplify matters, only do this for a symmetric range of offsets from
++    // about -96KB to about +96KB, allowing further addition of 4 when accessing
++    // 64-bit variables with two 32-bit accesses.
++    constexpr int32_t kMinOffsetForMediumAdjustment =
++        2 * kMinOffsetForSimpleAdjustment;
++    constexpr int32_t kMaxOffsetForMediumAdjustment =
++        3 * kMinOffsetForSimpleAdjustment;
++    if (0 <= src->offset() && src->offset() <= kMaxOffsetForMediumAdjustment) {
++      daddiu(scratch, src->rm(), kMinOffsetForMediumAdjustment / 2);
++      daddiu(scratch, scratch, kMinOffsetForMediumAdjustment / 2);
++      src->offset_ -= kMinOffsetForMediumAdjustment;
++    } else if (-kMaxOffsetForMediumAdjustment <= src->offset() &&
++               src->offset() < 0) {
++      daddiu(scratch, src->rm(), -kMinOffsetForMediumAdjustment / 2);
++      daddiu(scratch, scratch, -kMinOffsetForMediumAdjustment / 2);
++      src->offset_ += kMinOffsetForMediumAdjustment;
++    } else {
++      // Now that all shorter options have been exhausted, load the full 32-bit
++      // offset.
++      int32_t loaded_offset = RoundDown(src->offset(), kDoubleSize);
++      lui(scratch, (loaded_offset >> kLuiShift) & kImm16Mask);
++      ori(scratch, scratch, loaded_offset & kImm16Mask);  // Load 32-bit offset.
++      daddu(scratch, scratch, src->rm());
++      src->offset_ -= loaded_offset;
++    }
++  }
++  src->rm_ = scratch;
++
++  DCHECK(is_int16(src->offset()));
++  if (two_accesses) {
++    DCHECK(is_int16(
++        static_cast<int32_t>(src->offset() + second_access_add_to_offset)));
++  }
++  DCHECK(misalignment == (src->offset() & (kDoubleSize - 1)));
++}*/
++
++void Assembler::AdjustBaseAndOffset(MemOperand* src) {
++  // is_int12 must be passed a signed value, hence the static cast below.
++  if ((!src->hasIndexReg() && is_int12(src->offset())) || src->hasIndexReg()) {
++    return;
++  }
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  if (is_uint12(static_cast<int32_t>(src->offset()))) {
++    ori(scratch, zero_reg, src->offset() & kImm12Mask);
++  } else {
++    lu12i_w(scratch, src->offset() >> 12 & 0xfffff);
++    if (src->offset() & kImm12Mask) {
++      ori(scratch, scratch, src->offset() & kImm12Mask);
++    }
++  }
++  src->index_ = scratch;
++  src->offset_ = 0;
++  // TODO can be optimized, for example 2 * [int12_min, int12_max]
++  // addi_d scratch base, offset/2  only on instr
++  // base = scratch
++  // offset = offset - offset / 2
++}
++
++int Assembler::RelocateInternalReference(RelocInfo::Mode rmode, Address pc,
++                                         intptr_t pc_delta) {
++  if (RelocInfo::IsInternalReference(rmode)) {
++    int64_t* p = reinterpret_cast<int64_t*>(pc);
++    if (*p == kEndOfJumpChain) {
++      return 0;  // Number of instructions patched.
++    }
++    *p += pc_delta;
++    return 2;  // Number of instructions patched.
++  }
++  abort();
++  /*  Instr instr = instr_at(pc);
++    DCHECK(RelocInfo::IsInternalReferenceEncoded(rmode));
++    if (IsLui(instr)) {
++      Instr instr_lui = instr_at(pc + 0 * kInstrSize);
++      Instr instr_ori = instr_at(pc + 1 * kInstrSize);
++      Instr instr_ori2 = instr_at(pc + 3 * kInstrSize);
++      DCHECK(IsOri(instr_ori));
++      DCHECK(IsOri(instr_ori2));
++      // TODO(plind): symbolic names for the shifts.
++      int64_t imm = (instr_lui & static_cast<int64_t>(kImm16Mask)) << 48;
++      imm |= (instr_ori & static_cast<int64_t>(kImm16Mask)) << 32;
++      imm |= (instr_ori2 & static_cast<int64_t>(kImm16Mask)) << 16;
++      // Sign extend address.
++      imm >>= 16;
++
++      if (imm == kEndOfJumpChain) {
++        return 0;  // Number of instructions patched.
++      }
++      imm += pc_delta;
++      DCHECK_EQ(imm & 3, 0);
++
++      instr_lui &= ~kImm16Mask;
++      instr_ori &= ~kImm16Mask;
++      instr_ori2 &= ~kImm16Mask;
++
++      instr_at_put(pc + 0 * kInstrSize, instr_lui | ((imm >> 32) & kImm16Mask));
++      instr_at_put(pc + 1 * kInstrSize, instr_ori | (imm >> 16 & kImm16Mask));
++      instr_at_put(pc + 3 * kInstrSize, instr_ori2 | (imm & kImm16Mask));
++      return 4;  // Number of instructions patched.
++    } else if (IsJ(instr) || IsJal(instr)) {
++      // Regular j/jal relocation.
++      uint32_t imm28 = (instr & static_cast<int32_t>(kImm26Mask)) << 2;
++      imm28 += pc_delta;
++      imm28 &= kImm28Mask;
++      instr &= ~kImm26Mask;
++      DCHECK_EQ(imm28 & 3, 0);
++      uint32_t imm26 = static_cast<uint32_t>(imm28 >> 2);
++      instr_at_put(pc, instr | (imm26 & kImm26Mask));
++      return 1;  // Number of instructions patched.
++    } else {
++      DCHECK(((instr & kJumpRawMask) == kJRawMark) ||
++             ((instr & kJumpRawMask) == kJalRawMark));
++      // Unbox raw offset and emit j/jal.
++      int32_t imm28 = (instr & static_cast<int32_t>(kImm26Mask)) << 2;
++      // Sign extend 28-bit offset to 32-bit.
++      imm28 = (imm28 << 4) >> 4;
++      uint64_t target =
++          static_cast<int64_t>(imm28) + reinterpret_cast<uint64_t>(pc);
++      target &= kImm28Mask;
++      DCHECK_EQ(imm28 & 3, 0);
++      uint32_t imm26 = static_cast<uint32_t>(target >> 2);
++      // Check markings whether to emit j or jal.
++      uint32_t unbox = (instr & kJRawMark) ? J : JAL;
++      instr_at_put(pc, unbox | (imm26 & kImm26Mask));
++      return 1;  // Number of instructions patched.
++    }*/
++}
++
++void Assembler::GrowBuffer() {
++  // Compute new buffer size.
++  int old_size = buffer_->size();
++  int new_size = std::min(2 * old_size, old_size + 1 * MB);
++
++  // Some internal data structures overflow for very large buffers,
++  // they must ensure that kMaximalBufferSize is not too large.
++  if (new_size > kMaximalBufferSize) {
++    V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
++  }
++
++  // Set up new buffer.
++  std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
++  DCHECK_EQ(new_size, new_buffer->size());
++  byte* new_start = new_buffer->start();
++
++  // Copy the data.
++  intptr_t pc_delta = new_start - buffer_start_;
++  intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
++  size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
++  MemMove(new_start, buffer_start_, pc_offset());
++  MemMove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
++          reloc_size);
++
++  // Switch buffers.
++  buffer_ = std::move(new_buffer);
++  buffer_start_ = new_start;
++  pc_ += pc_delta;
++  reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
++                               reloc_info_writer.last_pc() + pc_delta);
++
++  // Relocate runtime entries.
++  Vector<byte> instructions{buffer_start_, pc_offset()};
++  Vector<const byte> reloc_info{reloc_info_writer.pos(), reloc_size};
++  for (RelocIterator it(instructions, reloc_info, 0); !it.done(); it.next()) {
++    RelocInfo::Mode rmode = it.rinfo()->rmode();
++    if (rmode == RelocInfo::INTERNAL_REFERENCE) {
++      RelocateInternalReference(rmode, it.rinfo()->pc(), pc_delta);
++    }
++  }
++  DCHECK(!overflow());
++}
++
++void Assembler::db(uint8_t data) {
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  *reinterpret_cast<uint8_t*>(pc_) = data;
++  pc_ += sizeof(uint8_t);
++}
++
++void Assembler::dd(uint32_t data) {
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  *reinterpret_cast<uint32_t*>(pc_) = data;
++  pc_ += sizeof(uint32_t);
++}
++
++void Assembler::dq(uint64_t data) {
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  *reinterpret_cast<uint64_t*>(pc_) = data;
++  pc_ += sizeof(uint64_t);
++}
++
++void Assembler::dd(Label* label) {
++  if (!is_buffer_growth_blocked()) {
++    CheckBuffer();
++  }
++  uint64_t data;
++  if (label->is_bound()) {
++    data = reinterpret_cast<uint64_t>(buffer_start_ + label->pos());
++  } else {
++    data = jump_address(label);
++    unbound_labels_count_++;
++    internal_reference_positions_.insert(label->pos());
++  }
++  RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
++  EmitHelper(data);
++}
++
++void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
++  if (!ShouldRecordRelocInfo(rmode)) return;
++  // We do not try to reuse pool constants.
++  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
++  DCHECK_GE(buffer_space(), kMaxRelocSize);  // Too late to grow buffer here.
++  reloc_info_writer.Write(&rinfo);
++}
++
++void Assembler::BlockTrampolinePoolFor(int instructions) {
++  CheckTrampolinePoolQuick(instructions);
++  BlockTrampolinePoolBefore(pc_offset() + instructions * kInstrSize);
++}
++
++void Assembler::CheckTrampolinePool() {
++  // Some small sequences of instructions must not be broken up by the
++  // insertion of a trampoline pool; such sequences are protected by setting
++  // either trampoline_pool_blocked_nesting_ or no_trampoline_pool_before_,
++  // which are both checked here. Also, recursive calls to CheckTrampolinePool
++  // are blocked by trampoline_pool_blocked_nesting_.
++  if ((trampoline_pool_blocked_nesting_ > 0) ||
++      (pc_offset() < no_trampoline_pool_before_)) {
++    // Emission is currently blocked; make sure we try again as soon as
++    // possible.
++    if (trampoline_pool_blocked_nesting_ > 0) {
++      next_buffer_check_ = pc_offset() + kInstrSize;
++    } else {
++      next_buffer_check_ = no_trampoline_pool_before_;
++    }
++    return;
++  }
++
++  DCHECK(!trampoline_emitted_);
++  DCHECK_GE(unbound_labels_count_, 0);
++  if (unbound_labels_count_ > 0) {
++    // First we emit jump (2 instructions), then we emit trampoline pool.
++    {
++      BlockTrampolinePoolScope block_trampoline_pool(this);
++      Label after_pool;
++      b(&after_pool);
++      nop();  // TODO remove this
++
++      int pool_start = pc_offset();
++      for (int i = 0; i < unbound_labels_count_; i++) {
++        {
++          b(&after_pool);
++          nop();  // TODO remove this
++        }
++      }
++      nop();
++      bind(&after_pool);
++      trampoline_ = Trampoline(pool_start, unbound_labels_count_);
++
++      trampoline_emitted_ = true;
++      // As we are only going to emit trampoline once, we need to prevent any
++      // further emission.
++      next_buffer_check_ = kMaxInt;
++    }
++  } else {
++    // Number of branches to unbound label at this point is zero, so we can
++    // move next buffer check to maximum.
++    next_buffer_check_ =
++        pc_offset() + kMax16BranchOffset - kTrampolineSlotsSize * 16;
++  }
++  return;
++}
++
++Address Assembler::target_address_at(Address pc) {
++  Instr instr0 = instr_at(pc);
++  Instr instr1 = instr_at(pc + 1 * kInstrSize);
++  Instr instr2 = instr_at(pc + 2 * kInstrSize);
++
++  // Interpret 4 instructions for address generated by li: See listing in
++  // Assembler::set_target_address_at() just below.
++  DCHECK((IsLu12i_w(instr0) && (IsOri(instr1)) && (IsLu32i_d(instr2))));
++
++  // Assemble the 48 bit value.
++  uint64_t hi20 = ((uint64_t)(instr2 >> 5) & 0xfffff) << 32;
++  uint64_t mid20 = ((uint64_t)(instr0 >> 5) & 0xfffff) << 12;
++  uint64_t low12 = ((uint64_t)(instr1 >> 10) & 0xfff);
++  int64_t addr = static_cast<int64_t>(hi20 | mid20 | low12);
++
++  // Sign extend to get canonical address.
++  addr = (addr << 16) >> 16;
++  //  printf("add : 0x%lx 0x%lx 0x%lx 0x%lx\n", addr, hi20, mid20, low12);
++  return static_cast<Address>(addr);
++}
++
++// On la64, a target address is stored in a 3-instruction sequence:
++//    0: lu12i_w(rd, (j.imm64_ >> 12) & kImm20Mask);
++//    1: ori(rd, rd, j.imm64_  & kImm12Mask);
++//    2: lu32i_d(rd, (j.imm64_ >> 32) & kImm20Mask);
++//
++// Patching the address must replace all the lui & ori instructions,
++// and flush the i-cache.
++//
++// There is an optimization below, which emits a nop when the address
++// fits in just 16 bits. This is unlikely to help, and should be benchmarked,
++// and possibly removed.
++void Assembler::set_target_value_at(Address pc, uint64_t target,
++                                    ICacheFlushMode icache_flush_mode) {
++  // There is an optimization where only 3 instructions are used to load address
++  // in code on LA64 because only 48-bits of address is effectively used.
++  // It relies on fact the upper [63:48] bits are not used for virtual address
++  // translation and they have to be set according to value of bit 47 in order
++  // get canonical address.
++#ifdef DEBUG
++  // Check we have the result from a li macro-instruction.
++  Instr instr0 = instr_at(pc);
++  Instr instr1 = instr_at(pc + kInstrSize);
++  Instr instr2 = instr_at(pc + kInstrSize * 2);
++  DCHECK(IsLu12i_w(instr0) && IsOri(instr1) && IsLu32i_d(instr2));
++#endif
++
++  Instr instr = instr_at(pc);
++  uint32_t rd_code = GetRd(instr);
++  uint32_t* p = reinterpret_cast<uint32_t*>(pc);
++
++  // Must use 3 instructions to insure patchable code.
++  // lu12i_w rd, middle-20.
++  // ori rd, rd, low-12.
++  // li32i_d rd, high-20.
++  *p = LU12I_W | (((target >> 12) & 0xfffff) << kRjShift) | rd_code;
++  *(p + 1) =
++      ORI | (target & 0xfff) << kRkShift | (rd_code << kRjShift) | rd_code;
++  *(p + 2) = LU32I_D | (((target >> 32) & 0xfffff) << kRjShift) | rd_code;
++
++  if (icache_flush_mode != SKIP_ICACHE_FLUSH) {
++    FlushInstructionCache(pc, 3 * kInstrSize);
++  }
++}
++
++UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
++    : available_(assembler->GetScratchRegisterList()),
++      old_available_(*available_) {}
++
++UseScratchRegisterScope::~UseScratchRegisterScope() {
++  *available_ = old_available_;
++}
++
++Register UseScratchRegisterScope::Acquire() {
++  DCHECK_NOT_NULL(available_);
++  DCHECK_NE(*available_, 0);
++  int index = static_cast<int>(base::bits::CountTrailingZeros32(*available_));
++  *available_ &= ~(1UL << index);
++
++  return Register::from_code(index);
++}
++
++bool UseScratchRegisterScope::hasAvailable() const { return *available_ != 0; }
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h
+new file mode 100644
+index 00000000000..03a0103b1ca
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h
+@@ -0,0 +1,1171 @@
++// Copyright (c) 1994-2006 Sun Microsystems Inc.
++// All Rights Reserved.
++//
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++// - Redistributions of source code must retain the above copyright notice,
++// this list of conditions and the following disclaimer.
++//
++// - Redistribution in binary form must reproduce the above copyright
++// notice, this list of conditions and the following disclaimer in the
++// documentation and/or other materials provided with the distribution.
++//
++// - Neither the name of Sun Microsystems or the names of contributors may
++// be used to endorse or promote products derived from this software without
++// specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++// The original source code covered by the above license above has been
++// modified significantly by Google Inc.
++// Copyright 2012 the V8 project authors. All rights reserved.
++
++#ifndef V8_CODEGEN_LA64_ASSEMBLER_LA64_H_
++#define V8_CODEGEN_LA64_ASSEMBLER_LA64_H_
++
++#include <stdio.h>
++#include <memory>
++#include <set>
++
++#include "src/codegen/assembler.h"
++#include "src/codegen/external-reference.h"
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/la64/register-la64.h"
++#include "src/codegen/label.h"
++#include "src/objects/contexts.h"
++#include "src/objects/smi.h"
++
++namespace v8 {
++namespace internal {
++
++class SafepointTableBuilder;
++
++// -----------------------------------------------------------------------------
++// Machine instruction Operands.
++constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize;
++constexpr uint64_t kSmiShiftMask = (1UL << kSmiShift) - 1;
++// Class Operand represents a shifter operand in data processing instructions.
++class Operand {
++ public:
++  // Immediate.
++  V8_INLINE explicit Operand(int64_t immediate,
++                             RelocInfo::Mode rmode = RelocInfo::NONE)
++      : rm_(no_reg), rmode_(rmode) {
++    value_.immediate = immediate;
++  }
++  V8_INLINE explicit Operand(const ExternalReference& f)
++      : rm_(no_reg), rmode_(RelocInfo::EXTERNAL_REFERENCE) {
++    value_.immediate = static_cast<int64_t>(f.address());
++  }
++  V8_INLINE explicit Operand(const char* s);
++  explicit Operand(Handle<HeapObject> handle);
++  V8_INLINE explicit Operand(Smi value) : rm_(no_reg), rmode_(RelocInfo::NONE) {
++    value_.immediate = static_cast<intptr_t>(value.ptr());
++  }
++
++  static Operand EmbeddedNumber(double number);  // Smi or HeapNumber.
++  static Operand EmbeddedStringConstant(const StringConstantBase* str);
++
++  // Register.
++  V8_INLINE explicit Operand(Register rm) : rm_(rm) {}
++
++  // Return true if this is a register operand.
++  V8_INLINE bool is_reg() const;
++
++  inline int64_t immediate() const;
++
++  bool IsImmediate() const { return !rm_.is_valid(); }
++
++  HeapObjectRequest heap_object_request() const {
++    DCHECK(IsHeapObjectRequest());
++    return value_.heap_object_request;
++  }
++
++  bool IsHeapObjectRequest() const {
++    DCHECK_IMPLIES(is_heap_object_request_, IsImmediate());
++    DCHECK_IMPLIES(is_heap_object_request_,
++                   rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT ||
++                       rmode_ == RelocInfo::CODE_TARGET);
++    return is_heap_object_request_;
++  }
++
++  Register rm() const { return rm_; }
++
++  RelocInfo::Mode rmode() const { return rmode_; }
++
++ private:
++  Register rm_;
++  union Value {
++    Value() {}
++    HeapObjectRequest heap_object_request;  // if is_heap_object_request_
++    int64_t immediate;                      // otherwise
++  } value_;                                 // valid if rm_ == no_reg
++  bool is_heap_object_request_ = false;
++  RelocInfo::Mode rmode_;
++
++  friend class Assembler;
++  friend class MacroAssembler;
++};
++
++// Class MemOperand represents a memory operand in load and store instructions.
++// 1: base_reg + off_imm( si12 | si14<<2)
++// 2: base_reg + offset_reg
++class V8_EXPORT_PRIVATE MemOperand {
++ public:
++  explicit MemOperand(Register rj, int32_t offset = 0);
++  explicit MemOperand(Register rj, Register offset = no_reg);
++  Register base() const { return base_; }
++  Register index() const { return index_; }
++  int32_t offset() const { return offset_; }
++
++  bool hasIndexReg() const { return index_ != no_reg; }
++
++ private:
++  Register base_;   // base
++  Register index_;  // index
++  int32_t offset_;  // offset
++
++  friend class Assembler;
++};
++
++class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
++ public:
++  // Create an assembler. Instructions and relocation information are emitted
++  // into a buffer, with the instructions starting from the beginning and the
++  // relocation information starting from the end of the buffer. See CodeDesc
++  // for a detailed comment on the layout (globals.h).
++  //
++  // If the provided buffer is nullptr, the assembler allocates and grows its
++  // own buffer. Otherwise it takes ownership of the provided buffer.
++  explicit Assembler(const AssemblerOptions&,
++                     std::unique_ptr<AssemblerBuffer> = {});
++
++  virtual ~Assembler() {}
++
++  // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
++  static constexpr int kNoHandlerTable = 0;
++  static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
++  void GetCode(Isolate* isolate, CodeDesc* desc,
++               SafepointTableBuilder* safepoint_table_builder,
++               int handler_table_offset);
++
++  // Convenience wrapper for code without safepoint or handler tables.
++  void GetCode(Isolate* isolate, CodeDesc* desc) {
++    GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
++  }
++
++  // Unused on this architecture.
++  void MaybeEmitOutOfLineConstantPool() {}
++
++  // Label operations & relative jumps (PPUM Appendix D).
++  //
++  // Takes a branch opcode (cc) and a label (L) and generates
++  // either a backward branch or a forward branch and links it
++  // to the label fixup chain. Usage:
++  //
++  // Label L;    // unbound label
++  // j(cc, &L);  // forward branch to unbound label
++  // bind(&L);   // bind label to the current pc
++  // j(cc, &L);  // backward branch to bound label
++  // bind(&L);   // illegal: a label may be bound only once
++  //
++  // Note: The same Label can be used for forward and backward branches
++  // but it may be bound only once.
++  void bind(Label* L);  // Binds an unbound label L to current code position.
++
++  enum OffsetSize : int { kOffset26 = 26, kOffset21 = 21, kOffset16 = 16 };
++
++  // Determines if Label is bound and near enough so that branch instruction
++  // can be used to reach it, instead of jump instruction.
++  // c means conditinal branch, a means always branch.
++  bool is_near_c(Label* L);
++  bool is_near(Label* L, OffsetSize bits);
++  bool is_near_a(Label* L);
++
++  int BranchOffset(Instr instr);
++
++  // Returns the branch offset to the given label from the current code
++  // position. Links the label to the current position if it is still unbound.
++  // Manages the jump elimination optimization if the second parameter is true.
++  int32_t branch_offset_helper(Label* L, OffsetSize bits);
++  inline int32_t branch_offset(Label* L) {
++    return branch_offset_helper(L, OffsetSize::kOffset16);
++  }
++  inline int32_t branch_offset21(Label* L) {
++    return branch_offset_helper(L, OffsetSize::kOffset21);
++  }
++  inline int32_t branch_offset26(Label* L) {
++    return branch_offset_helper(L, OffsetSize::kOffset26);
++  }
++  inline int32_t shifted_branch_offset(Label* L) {
++    return branch_offset(L) >> 2;
++  }
++  inline int32_t shifted_branch_offset21(Label* L) {
++    return branch_offset21(L) >> 2;
++  }
++  inline int32_t shifted_branch_offset26(Label* L) {
++    return branch_offset26(L) >> 2;
++  }
++  uint64_t jump_address(Label* L);
++  uint64_t jump_offset(Label* L);
++  uint64_t branch_long_offset(Label* L);
++
++  // Puts a labels target address at the given position.
++  // The high 8 bits are set to zero.
++  void label_at_put(Label* L, int at_offset);
++
++  // Read/Modify the code target address in the branch/call instruction at pc.
++  // The isolate argument is unused (and may be nullptr) when skipping flushing.
++  static Address target_address_at(Address pc);
++  V8_INLINE static void set_target_address_at(
++      Address pc, Address target,
++      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
++    set_target_value_at(pc, target, icache_flush_mode);
++  }
++  // On MIPS there is no Constant Pool so we skip that parameter.
++  V8_INLINE static Address target_address_at(Address pc,
++                                             Address constant_pool) {
++    return target_address_at(pc);
++  }
++  V8_INLINE static void set_target_address_at(
++      Address pc, Address constant_pool, Address target,
++      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
++    set_target_address_at(pc, target, icache_flush_mode);
++  }
++
++  static void set_target_value_at(
++      Address pc, uint64_t target,
++      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
++
++  static void JumpLabelToJumpRegister(Address pc);
++
++  // This sets the branch destination (which gets loaded at the call address).
++  // This is for calls and branches within generated code.  The serializer
++  // has already deserialized the lui/ori instructions etc.
++  inline static void deserialization_set_special_target_at(
++      Address instruction_payload, Code code, Address target);
++
++  // Get the size of the special target encoded at 'instruction_payload'.
++  inline static int deserialization_special_target_size(
++      Address instruction_payload);
++
++  // This sets the internal reference at the pc.
++  inline static void deserialization_set_target_internal_reference_at(
++      Address pc, Address target,
++      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
++
++  // Here we are patching the address in the LUI/ORI instruction pair.
++  // These values are used in the serialization process and must be zero for
++  // LA platform, as Code, Embedded Object or External-reference pointers
++  // are split across two consecutive instructions and don't exist separately
++  // in the code, so the serializer should not step forwards in memory after
++  // a target is resolved and written.
++  static constexpr int kSpecialTargetSize = 0;
++
++  // Number of consecutive instructions used to store 32bit/64bit constant.
++  // This constant was used in RelocInfo::target_address_address() function
++  // to tell serializer address of the instruction that follows
++  // LUI/ORI instruction pair.
++  // TODO check this
++  static constexpr int kInstructionsFor64BitConstant = 4;
++
++  // Difference between address of current opcode and target address offset.
++  static constexpr int kBranchPCOffset = 0;
++
++  // Difference between address of current opcode and target address offset,
++  // when we are generatinga sequence of instructions for long relative PC
++  // branches
++  static constexpr int kLongBranchPCOffset = 0;  // 3 * kInstrSize;
++
++  // Max offset for instructions with 16-bit offset field
++  static constexpr int kMax16BranchOffset = (1 << (18 - 1)) - 1;
++
++  // Max offset for instructions with 21-bit offset field
++  static constexpr int kMax21BranchOffset = (1 << (23 - 1)) - 1;
++
++  // Max offset for compact branch instructions with 26-bit offset field
++  static constexpr int kMax26BranchOffset = (1 << (28 - 1)) - 1;
++
++  static constexpr int kTrampolineSlotsSize = 2 * kInstrSize;
++
++  RegList* GetScratchRegisterList() { return &scratch_register_list_; }
++
++  // ---------------------------------------------------------------------------
++  // Code generation.
++
++  // Insert the smallest number of nop instructions
++  // possible to align the pc offset to a multiple
++  // of m. m must be a power of 2 (>= 4).
++  void Align(int m);
++  // Insert the smallest number of zero bytes possible to align the pc offset
++  // to a mulitple of m. m must be a power of 2 (>= 2).
++  void DataAlign(int m);
++  // Aligns code to something that's optimal for a jump target for the platform.
++  void CodeTargetAlign();
++
++  // Different nop operations are used by the code generator to detect certain
++  // states of the generated code.
++  enum NopMarkerTypes {
++    NON_MARKING_NOP = 0,
++    DEBUG_BREAK_NOP,
++    // IC markers.
++    PROPERTY_ACCESS_INLINED,
++    PROPERTY_ACCESS_INLINED_CONTEXT,
++    PROPERTY_ACCESS_INLINED_CONTEXT_DONT_DELETE,
++    // Helper values.
++    LAST_CODE_MARKER,
++    FIRST_IC_MARKER = PROPERTY_ACCESS_INLINED,
++  };
++
++  // Type == 0 is the default non-marking nop. For loongisa this is a
++  // andi(zero_reg, zero_reg, 0). We use rt_reg == r1 for non-zero
++  // marking, to avoid conflict with ssnop and ehb instructions.
++  void nop(unsigned int type = 0) {
++    DCHECK_LT(type, 32);
++    Register nop_rt_reg = (type == 0) ? zero_reg : t7;
++    andi(zero_reg, nop_rt_reg, type);
++  }
++
++  // --------Branch-and-jump-instructions----------
++  // We don't use likely variant of instructions.
++  void b(int32_t offset);
++  inline void b(Label* L) { b(shifted_branch_offset26(L)); }
++  void bl(int32_t offset);
++  inline void bl(Label* L) { bl(shifted_branch_offset26(L)); }
++
++  void beq(Register rj, Register rd, int32_t offset);
++  inline void beq(Register rj, Register rd, Label* L) {
++    beq(rj, rd, shifted_branch_offset(L));
++  }
++  void bne(Register rj, Register rd, int32_t offset);
++  inline void bne(Register rj, Register rd, Label* L) {
++    bne(rj, rd, shifted_branch_offset(L));
++  }
++  void blt(Register rj, Register rd, int32_t offset);
++  inline void blt(Register rj, Register rd, Label* L) {
++    blt(rj, rd, shifted_branch_offset(L));
++  }
++  void bge(Register rj, Register rd, int32_t offset);
++  inline void bge(Register rj, Register rd, Label* L) {
++    bge(rj, rd, shifted_branch_offset(L));
++  }
++  void bltu(Register rj, Register rd, int32_t offset);
++  inline void bltu(Register rj, Register rd, Label* L) {
++    bltu(rj, rd, shifted_branch_offset(L));
++  }
++  void bgeu(Register rj, Register rd, int32_t offset);
++  inline void bgeu(Register rj, Register rd, Label* L) {
++    bgeu(rj, rd, shifted_branch_offset(L));
++  }
++  void beqz(Register rj, int32_t offset);
++  inline void beqz(Register rj, Label* L) {
++    beqz(rj, shifted_branch_offset21(L));
++  }
++  void bnez(Register rj, int32_t offset);
++  inline void bnez(Register rj, Label* L) {
++    bnez(rj, shifted_branch_offset21(L));
++  }
++
++  void jirl(Register rd, Register rj, int32_t offset);
++
++  void bceqz(CFRegister cj, int32_t si21);
++  inline void bceqz(CFRegister cj, Label* L) {
++    bceqz(cj, shifted_branch_offset21(L));
++  }
++  void bcnez(CFRegister cj, int32_t si21);
++  inline void bcnez(CFRegister cj, Label* L) {
++    bcnez(cj, shifted_branch_offset21(L));
++  }
++
++  // -------Data-processing-instructions---------
++
++  // Arithmetic.
++  void add_w(Register rd, Register rj, Register rk);
++  void add_d(Register rd, Register rj, Register rk);
++  void sub_w(Register rd, Register rj, Register rk);
++  void sub_d(Register rd, Register rj, Register rk);
++
++  void addi_w(Register rd, Register rj, int32_t si12);
++  void addi_d(Register rd, Register rj, int32_t si12);
++
++  void addu16i_d(Register rd, Register rj, int32_t si16);
++
++  void alsl_w(Register rd, Register rj, Register rk, int32_t sa2);
++  void alsl_wu(Register rd, Register rj, Register rk, int32_t sa2);
++  void alsl_d(Register rd, Register rj, Register rk, int32_t sa2);
++
++  void lu12i_w(Register rd, int32_t si20);
++  void lu32i_d(Register rd, int32_t si20);
++  void lu52i_d(Register rd, Register rj, int32_t si12);
++
++  void slt(Register rd, Register rj, Register rk);
++  void sltu(Register rd, Register rj, Register rk);
++  void slti(Register rd, Register rj, int32_t si12);
++  void sltui(Register rd, Register rj, int32_t si12);
++
++  void pcaddi(Register rd, int32_t si20);
++  void pcaddu12i(Register rd, int32_t si20);
++  void pcaddu18i(Register rd, int32_t si20);
++  void pcalau12i(Register rd, int32_t si20);
++
++  void and_(Register rd, Register rj, Register rk);
++  void or_(Register rd, Register rj, Register rk);
++  void xor_(Register rd, Register rj, Register rk);
++  void nor(Register rd, Register rj, Register rk);
++  void andn(Register rd, Register rj, Register rk);
++  void orn(Register rd, Register rj, Register rk);
++
++  void andi(Register rd, Register rj, int32_t ui12);
++  void ori(Register rd, Register rj, int32_t ui12);
++  void xori(Register rd, Register rj, int32_t ui12);
++
++  void mul_w(Register rd, Register rj, Register rk);
++  void mulh_w(Register rd, Register rj, Register rk);
++  void mulh_wu(Register rd, Register rj, Register rk);
++  void mul_d(Register rd, Register rj, Register rk);
++  void mulh_d(Register rd, Register rj, Register rk);
++  void mulh_du(Register rd, Register rj, Register rk);
++
++  void mulw_d_w(Register rd, Register rj, Register rk);
++  void mulw_d_wu(Register rd, Register rj, Register rk);
++
++  void div_w(Register rd, Register rj, Register rk);
++  void mod_w(Register rd, Register rj, Register rk);
++  void div_wu(Register rd, Register rj, Register rk);
++  void mod_wu(Register rd, Register rj, Register rk);
++  void div_d(Register rd, Register rj, Register rk);
++  void mod_d(Register rd, Register rj, Register rk);
++  void div_du(Register rd, Register rj, Register rk);
++  void mod_du(Register rd, Register rj, Register rk);
++
++  // Shifts.
++  void sll_w(Register rd, Register rj, Register rk);
++  void srl_w(Register rd, Register rj, Register rk);
++  void sra_w(Register rd, Register rj, Register rk);
++  void rotr_w(Register rd, Register rj, Register rk);
++
++  void slli_w(Register rd, Register rj, int32_t ui5);
++  void srli_w(Register rd, Register rj, int32_t ui5);
++  void srai_w(Register rd, Register rj, int32_t ui5);
++  void rotri_w(Register rd, Register rj, int32_t ui5);
++
++  void sll_d(Register rd, Register rj, Register rk);
++  void srl_d(Register rd, Register rj, Register rk);
++  void sra_d(Register rd, Register rj, Register rk);
++  void rotr_d(Register rd, Register rj, Register rk);
++
++  void slli_d(Register rd, Register rj, int32_t ui6);
++  void srli_d(Register rd, Register rj, int32_t ui6);
++  void srai_d(Register rd, Register rj, int32_t ui6);
++  void rotri_d(Register rd, Register rj, int32_t ui6);
++
++  // Bit twiddling.
++  void ext_w_b(Register rd, Register rj);
++  void ext_w_h(Register rd, Register rj);
++
++  void clo_w(Register rd, Register rj);
++  void clz_w(Register rd, Register rj);
++  void cto_w(Register rd, Register rj);
++  void ctz_w(Register rd, Register rj);
++  void clo_d(Register rd, Register rj);
++  void clz_d(Register rd, Register rj);
++  void cto_d(Register rd, Register rj);
++  void ctz_d(Register rd, Register rj);
++
++  void bytepick_w(Register rd, Register rj, Register rk, int32_t sa2);
++  void bytepick_d(Register rd, Register rj, Register rk, int32_t sa3);
++
++  void revb_2h(Register rd, Register rj);
++  void revb_4h(Register rd, Register rj);
++  void revb_2w(Register rd, Register rj);
++  void revb_d(Register rd, Register rj);
++
++  void revh_2w(Register rd, Register rj);
++  void revh_d(Register rd, Register rj);
++
++  void bitrev_4b(Register rd, Register rj);
++  void bitrev_8b(Register rd, Register rj);
++
++  void bitrev_w(Register rd, Register rj);
++  void bitrev_d(Register rd, Register rj);
++
++  void bstrins_w(Register rd, Register rj, int32_t msbw, int32_t lsbw);
++  void bstrins_d(Register rd, Register rj, int32_t msbd, int32_t lsbd);
++
++  void bstrpick_w(Register rd, Register rj, int32_t msbw, int32_t lsbw);
++  void bstrpick_d(Register rd, Register rj, int32_t msbd, int32_t lsbd);
++
++  void maskeqz(Register rd, Register rj, Register rk);
++  void masknez(Register rd, Register rj, Register rk);
++
++  // Memory-instructions
++  void ld_b(Register rd, Register rj, int32_t si12);
++  void ld_h(Register rd, Register rj, int32_t si12);
++  void ld_w(Register rd, Register rj, int32_t si12);
++  void ld_d(Register rd, Register rj, int32_t si12);
++  void ld_bu(Register rd, Register rj, int32_t si12);
++  void ld_hu(Register rd, Register rj, int32_t si12);
++  void ld_wu(Register rd, Register rj, int32_t si12);
++  void st_b(Register rd, Register rj, int32_t si12);
++  void st_h(Register rd, Register rj, int32_t si12);
++  void st_w(Register rd, Register rj, int32_t si12);
++  void st_d(Register rd, Register rj, int32_t si12);
++
++  void ldx_b(Register rd, Register rj, Register rk);
++  void ldx_h(Register rd, Register rj, Register rk);
++  void ldx_w(Register rd, Register rj, Register rk);
++  void ldx_d(Register rd, Register rj, Register rk);
++  void ldx_bu(Register rd, Register rj, Register rk);
++  void ldx_hu(Register rd, Register rj, Register rk);
++  void ldx_wu(Register rd, Register rj, Register rk);
++  void stx_b(Register rd, Register rj, Register rk);
++  void stx_h(Register rd, Register rj, Register rk);
++  void stx_w(Register rd, Register rj, Register rk);
++  void stx_d(Register rd, Register rj, Register rk);
++
++  void ldptr_w(Register rd, Register rj, int32_t si14);
++  void ldptr_d(Register rd, Register rj, int32_t si14);
++  void stptr_w(Register rd, Register rj, int32_t si14);
++  void stptr_d(Register rd, Register rj, int32_t si14);
++
++  void preld(int32_t hint, Register rj, int32_t si12);
++
++  void preldx(int32_t hint, Register rj, Register rk);
++
++  void ldgt_b(Register rd, Register rj, Register rk);
++  void ldgt_h(Register rd, Register rj, Register rk);
++  void ldgt_w(Register rd, Register rj, Register rk);
++  void ldgt_d(Register rd, Register rj, Register rk);
++
++  void ldle_b(Register rd, Register rj, Register rk);
++  void ldle_h(Register rd, Register rj, Register rk);
++  void ldle_w(Register rd, Register rj, Register rk);
++  void ldle_d(Register rd, Register rj, Register rk);
++
++  void stgt_b(Register rd, Register rj, Register rk);
++  void stgt_h(Register rd, Register rj, Register rk);
++  void stgt_w(Register rd, Register rj, Register rk);
++  void stgt_d(Register rd, Register rj, Register rk);
++
++  void stle_b(Register rd, Register rj, Register rk);
++  void stle_h(Register rd, Register rj, Register rk);
++  void stle_w(Register rd, Register rj, Register rk);
++  void stle_d(Register rd, Register rj, Register rk);
++
++  void amswap_w(Register rd, Register rk, Register rj);
++  void amswap_d(Register rd, Register rk, Register rj);
++  void amadd_w(Register rd, Register rk, Register rj);
++  void amadd_d(Register rd, Register rk, Register rj);
++  void amand_w(Register rd, Register rk, Register rj);
++  void amand_d(Register rd, Register rk, Register rj);
++  void amor_w(Register rd, Register rk, Register rj);
++  void amor_d(Register rd, Register rk, Register rj);
++  void amxor_w(Register rd, Register rk, Register rj);
++  void amxor_d(Register rd, Register rk, Register rj);
++  void ammax_w(Register rd, Register rk, Register rj);
++  void ammax_d(Register rd, Register rk, Register rj);
++  void ammin_w(Register rd, Register rk, Register rj);
++  void ammin_d(Register rd, Register rk, Register rj);
++  void ammax_wu(Register rd, Register rk, Register rj);
++  void ammax_du(Register rd, Register rk, Register rj);
++  void ammin_wu(Register rd, Register rk, Register rj);
++  void ammin_du(Register rd, Register rk, Register rj);
++
++  void amswap_db_w(Register rd, Register rk, Register rj);
++  void amswap_db_d(Register rd, Register rk, Register rj);
++  void amadd_db_w(Register rd, Register rk, Register rj);
++  void amadd_db_d(Register rd, Register rk, Register rj);
++  void amand_db_w(Register rd, Register rk, Register rj);
++  void amand_db_d(Register rd, Register rk, Register rj);
++  void amor_db_w(Register rd, Register rk, Register rj);
++  void amor_db_d(Register rd, Register rk, Register rj);
++  void amxor_db_w(Register rd, Register rk, Register rj);
++  void amxor_db_d(Register rd, Register rk, Register rj);
++  void ammax_db_w(Register rd, Register rk, Register rj);
++  void ammax_db_d(Register rd, Register rk, Register rj);
++  void ammin_db_w(Register rd, Register rk, Register rj);
++  void ammin_db_d(Register rd, Register rk, Register rj);
++  void ammax_db_wu(Register rd, Register rk, Register rj);
++  void ammax_db_du(Register rd, Register rk, Register rj);
++  void ammin_db_wu(Register rd, Register rk, Register rj);
++  void ammin_db_du(Register rd, Register rk, Register rj);
++
++  void ll_w(Register rd, Register rj, int32_t si14);
++  void ll_d(Register rd, Register rj, int32_t si14);
++  void sc_w(Register rd, Register rj, int32_t si14);
++  void sc_d(Register rd, Register rj, int32_t si14);
++
++  void dbar(int32_t hint);
++  void ibar(int32_t hint);
++
++  void crc_w_b_w(Register rd, Register rj, Register rk);
++  void crc_w_h_w(Register rd, Register rj, Register rk);
++  void crc_w_w_w(Register rd, Register rj, Register rk);
++  void crc_w_d_w(Register rd, Register rj, Register rk);
++  void crcc_w_b_w(Register rd, Register rj, Register rk);
++  void crcc_w_h_w(Register rd, Register rj, Register rk);
++  void crcc_w_w_w(Register rd, Register rj, Register rk);
++  void crcc_w_d_w(Register rd, Register rj, Register rk);
++
++  void syscall(int32_t code);
++
++  void asrtle_d(Register rj, Register rk);
++  void asrtgt_d(Register rj, Register rk);
++
++  void rdtimel_w(Register rd, Register rj);
++  void rdtimeh_w(Register rd, Register rj);
++  void rdtime_d(Register rd, Register rj);
++
++  void cpucfg(Register rd, Register rj);
++
++  // Break / Trap instructions.
++  void break_(uint32_t code, bool break_as_stop = false);
++  void stop(uint32_t code = kMaxStopCode);
++
++  // Arithmetic.
++  void fadd_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fadd_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fsub_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fsub_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmul_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmul_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fdiv_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fdiv_d(FPURegister fd, FPURegister fj, FPURegister fk);
++
++  void fmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fnmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fnmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fnmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++  void fnmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa);
++
++  void fmax_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmax_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmin_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmin_d(FPURegister fd, FPURegister fj, FPURegister fk);
++
++  void fmaxa_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmaxa_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmina_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fmina_d(FPURegister fd, FPURegister fj, FPURegister fk);
++
++  void fabs_s(FPURegister fd, FPURegister fj);
++  void fabs_d(FPURegister fd, FPURegister fj);
++  void fneg_s(FPURegister fd, FPURegister fj);
++  void fneg_d(FPURegister fd, FPURegister fj);
++
++  void fsqrt_s(FPURegister fd, FPURegister fj);
++  void fsqrt_d(FPURegister fd, FPURegister fj);
++  void frecip_s(FPURegister fd, FPURegister fj);
++  void frecip_d(FPURegister fd, FPURegister fj);
++  void frsqrt_s(FPURegister fd, FPURegister fj);
++  void frsqrt_d(FPURegister fd, FPURegister fj);
++
++  void fscaleb_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fscaleb_d(FPURegister fd, FPURegister fj, FPURegister fk);
++  void flogb_s(FPURegister fd, FPURegister fj);
++  void flogb_d(FPURegister fd, FPURegister fj);
++  void fcopysign_s(FPURegister fd, FPURegister fj, FPURegister fk);
++  void fcopysign_d(FPURegister fd, FPURegister fj, FPURegister fk);
++
++  void fclass_s(FPURegister fd, FPURegister fj);
++  void fclass_d(FPURegister fd, FPURegister fj);
++
++  void fcmp_cond_s(FPUCondition cc, FPURegister fj, FPURegister fk,
++                   CFRegister cd);
++  void fcmp_cond_d(FPUCondition cc, FPURegister fj, FPURegister fk,
++                   CFRegister cd);
++
++  void fcvt_s_d(FPURegister fd, FPURegister fj);
++  void fcvt_d_s(FPURegister fd, FPURegister fj);
++
++  void ffint_s_w(FPURegister fd, FPURegister fj);
++  void ffint_s_l(FPURegister fd, FPURegister fj);
++  void ffint_d_w(FPURegister fd, FPURegister fj);
++  void ffint_d_l(FPURegister fd, FPURegister fj);
++  void ftint_w_s(FPURegister fd, FPURegister fj);
++  void ftint_w_d(FPURegister fd, FPURegister fj);
++  void ftint_l_s(FPURegister fd, FPURegister fj);
++  void ftint_l_d(FPURegister fd, FPURegister fj);
++
++  void ftintrm_w_s(FPURegister fd, FPURegister fj);
++  void ftintrm_w_d(FPURegister fd, FPURegister fj);
++  void ftintrm_l_s(FPURegister fd, FPURegister fj);
++  void ftintrm_l_d(FPURegister fd, FPURegister fj);
++  void ftintrp_w_s(FPURegister fd, FPURegister fj);
++  void ftintrp_w_d(FPURegister fd, FPURegister fj);
++  void ftintrp_l_s(FPURegister fd, FPURegister fj);
++  void ftintrp_l_d(FPURegister fd, FPURegister fj);
++  void ftintrz_w_s(FPURegister fd, FPURegister fj);
++  void ftintrz_w_d(FPURegister fd, FPURegister fj);
++  void ftintrz_l_s(FPURegister fd, FPURegister fj);
++  void ftintrz_l_d(FPURegister fd, FPURegister fj);
++  void ftintrne_w_s(FPURegister fd, FPURegister fj);
++  void ftintrne_w_d(FPURegister fd, FPURegister fj);
++  void ftintrne_l_s(FPURegister fd, FPURegister fj);
++  void ftintrne_l_d(FPURegister fd, FPURegister fj);
++
++  void frint_s(FPURegister fd, FPURegister fj);
++  void frint_d(FPURegister fd, FPURegister fj);
++
++  void fmov_s(FPURegister fd, FPURegister fj);
++  void fmov_d(FPURegister fd, FPURegister fj);
++
++  void fsel(CFRegister ca, FPURegister fd, FPURegister fj, FPURegister fk);
++
++  void movgr2fr_w(FPURegister fd, Register rj);
++  void movgr2fr_d(FPURegister fd, Register rj);
++  void movgr2frh_w(FPURegister fd, Register rj);
++
++  void movfr2gr_s(Register rd, FPURegister fj);
++  void movfr2gr_d(Register rd, FPURegister fj);
++  void movfrh2gr_s(Register rd, FPURegister fj);
++
++  void movgr2fcsr(Register rj);
++  void movfcsr2gr(Register rd);
++
++  void movfr2cf(CFRegister cd, FPURegister fj);
++  void movcf2fr(FPURegister fd, CFRegister cj);
++
++  void movgr2cf(CFRegister cd, Register rj);
++  void movcf2gr(Register rd, CFRegister cj);
++
++  void fld_s(FPURegister fd, Register rj, int32_t si12);
++  void fld_d(FPURegister fd, Register rj, int32_t si12);
++  void fst_s(FPURegister fd, Register rj, int32_t si12);
++  void fst_d(FPURegister fd, Register rj, int32_t si12);
++
++  void fldx_s(FPURegister fd, Register rj, Register rk);
++  void fldx_d(FPURegister fd, Register rj, Register rk);
++  void fstx_s(FPURegister fd, Register rj, Register rk);
++  void fstx_d(FPURegister fd, Register rj, Register rk);
++
++  void fldgt_s(FPURegister fd, Register rj, Register rk);
++  void fldgt_d(FPURegister fd, Register rj, Register rk);
++  void fldle_s(FPURegister fd, Register rj, Register rk);
++  void fldle_d(FPURegister fd, Register rj, Register rk);
++  void fstgt_s(FPURegister fd, Register rj, Register rk);
++  void fstgt_d(FPURegister fd, Register rj, Register rk);
++  void fstle_s(FPURegister fd, Register rj, Register rk);
++  void fstle_d(FPURegister fd, Register rj, Register rk);
++
++  // Check the code size generated from label to here.
++  int SizeOfCodeGeneratedSince(Label* label) {
++    return pc_offset() - label->pos();
++  }
++
++  // Check the number of instructions generated from label to here.
++  int InstructionsGeneratedSince(Label* label) {
++    return SizeOfCodeGeneratedSince(label) / kInstrSize;
++  }
++
++  // Class for scoping postponing the trampoline pool generation.
++  class BlockTrampolinePoolScope {
++   public:
++    explicit BlockTrampolinePoolScope(Assembler* assem) : assem_(assem) {
++      assem_->StartBlockTrampolinePool();
++    }
++    ~BlockTrampolinePoolScope() { assem_->EndBlockTrampolinePool(); }
++
++   private:
++    Assembler* assem_;
++
++    DISALLOW_IMPLICIT_CONSTRUCTORS(BlockTrampolinePoolScope);
++  };
++
++  // Class for postponing the assembly buffer growth. Typically used for
++  // sequences of instructions that must be emitted as a unit, before
++  // buffer growth (and relocation) can occur.
++  // This blocking scope is not nestable.
++  class BlockGrowBufferScope {
++   public:
++    explicit BlockGrowBufferScope(Assembler* assem) : assem_(assem) {
++      assem_->StartBlockGrowBuffer();
++    }
++    ~BlockGrowBufferScope() { assem_->EndBlockGrowBuffer(); }
++
++   private:
++    Assembler* assem_;
++
++    DISALLOW_IMPLICIT_CONSTRUCTORS(BlockGrowBufferScope);
++  };
++
++  // Record a deoptimization reason that can be used by a log or cpu profiler.
++  // Use --trace-deopt to enable.
++  void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
++                         int id);
++
++  static int RelocateInternalReference(RelocInfo::Mode rmode, Address pc,
++                                       intptr_t pc_delta);
++
++  // Writes a single byte or word of data in the code stream.  Used for
++  // inline tables, e.g., jump-tables.
++  void db(uint8_t data);
++  void dd(uint32_t data);
++  void dq(uint64_t data);
++  void dp(uintptr_t data) { dq(data); }
++  void dd(Label* label);
++
++  // Postpone the generation of the trampoline pool for the specified number of
++  // instructions.
++  void BlockTrampolinePoolFor(int instructions);
++
++  // Check if there is less than kGap bytes available in the buffer.
++  // If this is the case, we need to grow the buffer before emitting
++  // an instruction or relocation information.
++  inline bool overflow() const { return pc_ >= reloc_info_writer.pos() - kGap; }
++
++  // Get the number of bytes available in the buffer.
++  inline intptr_t available_space() const {
++    return reloc_info_writer.pos() - pc_;
++  }
++
++  // Read/patch instructions.
++  static Instr instr_at(Address pc) { return *reinterpret_cast<Instr*>(pc); }
++  static void instr_at_put(Address pc, Instr instr) {
++    *reinterpret_cast<Instr*>(pc) = instr;
++  }
++  Instr instr_at(int pos) {
++    return *reinterpret_cast<Instr*>(buffer_start_ + pos);
++  }
++  void instr_at_put(int pos, Instr instr) {
++    *reinterpret_cast<Instr*>(buffer_start_ + pos) = instr;
++  }
++
++  // Check if an instruction is a branch of some kind.
++  static bool IsBranch(Instr instr);
++  static bool IsB(Instr instr);
++  static bool IsBz(Instr instr);
++  static bool IsNal(Instr instr);
++
++  static bool IsBeq(Instr instr);
++  static bool IsBne(Instr instr);
++
++  static bool IsJump(Instr instr);
++  static bool IsMov(Instr instr, Register rd, Register rs);
++  static bool IsPcAddi(Instr instr, Register rd, int32_t si20);
++
++  static bool IsJ(Instr instr);
++  static bool IsLu12i_w(Instr instr);
++  static bool IsOri(Instr instr);
++  static bool IsLu32i_d(Instr instr);
++  static bool IsLu52i_d(Instr instr);
++
++  static bool IsNop(Instr instr, unsigned int type);
++  static bool IsPop(Instr instr);
++  static bool IsPush(Instr instr);
++  //  static bool IsLwRegFpOffset(Instr instr);
++  //  static bool IsSwRegFpOffset(Instr instr);
++  //  static bool IsLwRegFpNegOffset(Instr instr);
++  //  static bool IsSwRegFpNegOffset(Instr instr);
++
++  static Register GetRjReg(Instr instr);
++  static Register GetRkReg(Instr instr);
++  static Register GetRdReg(Instr instr);
++
++  static uint32_t GetRj(Instr instr);
++  static uint32_t GetRjField(Instr instr);
++  static uint32_t GetRk(Instr instr);
++  static uint32_t GetRkField(Instr instr);
++  static uint32_t GetRd(Instr instr);
++  static uint32_t GetRdField(Instr instr);
++  static uint32_t GetSa2(Instr instr);
++  static uint32_t GetSa3(Instr instr);
++  static uint32_t GetSa2Field(Instr instr);
++  static uint32_t GetSa3Field(Instr instr);
++  static uint32_t GetOpcodeField(Instr instr);
++  static uint32_t GetFunction(Instr instr);
++  static uint32_t GetFunctionField(Instr instr);
++  static uint32_t GetImmediate16(Instr instr);
++  static uint32_t GetLabelConst(Instr instr);
++
++  static bool IsAddImmediate(Instr instr);
++  static Instr SetAddImmediateOffset(Instr instr, int16_t offset);
++
++  static bool IsAndImmediate(Instr instr);
++  static bool IsEmittedConstant(Instr instr);
++
++  void CheckTrampolinePool();
++
++  inline int UnboundLabelsCount() { return unbound_labels_count_; }
++
++ protected:
++  // Helper function for memory load/store.
++  void AdjustBaseAndOffset(MemOperand* src);
++
++  inline static void set_target_internal_reference_encoded_at(Address pc,
++                                                              Address target);
++
++  int64_t buffer_space() const { return reloc_info_writer.pos() - pc_; }
++
++  // Decode branch instruction at pos and return branch target pos.
++  int target_at(int pos, bool is_internal);
++
++  // Patch branch instruction at pos to branch to given branch target pos.
++  void target_at_put(int pos, int target_pos, bool is_internal);
++
++  // Say if we need to relocate with this mode.
++  bool MustUseReg(RelocInfo::Mode rmode);
++
++  // Record reloc info for current pc_.
++  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
++
++  // Block the emission of the trampoline pool before pc_offset.
++  void BlockTrampolinePoolBefore(int pc_offset) {
++    if (no_trampoline_pool_before_ < pc_offset)
++      no_trampoline_pool_before_ = pc_offset;
++  }
++
++  void StartBlockTrampolinePool() { trampoline_pool_blocked_nesting_++; }
++
++  void EndBlockTrampolinePool() {
++    trampoline_pool_blocked_nesting_--;
++    if (trampoline_pool_blocked_nesting_ == 0) {
++      CheckTrampolinePoolQuick(1);
++    }
++  }
++
++  bool is_trampoline_pool_blocked() const {
++    return trampoline_pool_blocked_nesting_ > 0;
++  }
++
++  bool has_exception() const { return internal_trampoline_exception_; }
++
++  bool is_trampoline_emitted() const { return trampoline_emitted_; }
++
++  // Temporarily block automatic assembly buffer growth.
++  void StartBlockGrowBuffer() {
++    DCHECK(!block_buffer_growth_);
++    block_buffer_growth_ = true;
++  }
++
++  void EndBlockGrowBuffer() {
++    DCHECK(block_buffer_growth_);
++    block_buffer_growth_ = false;
++  }
++
++  bool is_buffer_growth_blocked() const { return block_buffer_growth_; }
++
++  void CheckTrampolinePoolQuick(int extra_instructions = 0) {
++    if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) {
++      CheckTrampolinePool();
++    }
++  }
++
++ private:
++  // Avoid overflows for displacements etc.
++  static const int kMaximalBufferSize = 512 * MB;
++
++  // Buffer size and constant pool distance are checked together at regular
++  // intervals of kBufferCheckInterval emitted bytes.
++  static constexpr int kBufferCheckInterval = 1 * KB / 2;
++
++  // Code generation.
++  // The relocation writer's position is at least kGap bytes below the end of
++  // the generated instructions. This is so that multi-instruction sequences do
++  // not have to check for overflow. The same is true for writes of large
++  // relocation info entries.
++  static constexpr int kGap = 64;
++  STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
++
++  // Repeated checking whether the trampoline pool should be emitted is rather
++  // expensive. By default we only check again once a number of instructions
++  // has been generated.
++  static constexpr int kCheckConstIntervalInst = 32;
++  static constexpr int kCheckConstInterval =
++      kCheckConstIntervalInst * kInstrSize;
++
++  int next_buffer_check_;  // pc offset of next buffer check.
++
++  // Emission of the trampoline pool may be blocked in some code sequences.
++  int trampoline_pool_blocked_nesting_;  // Block emission if this is not zero.
++  int no_trampoline_pool_before_;  // Block emission before this pc offset.
++
++  // Keep track of the last emitted pool to guarantee a maximal distance.
++  int last_trampoline_pool_end_;  // pc offset of the end of the last pool.
++
++  // Automatic growth of the assembly buffer may be blocked for some sequences.
++  bool block_buffer_growth_;  // Block growth when true.
++
++  // Relocation information generation.
++  // Each relocation is encoded as a variable size value.
++  static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize;
++  RelocInfoWriter reloc_info_writer;
++
++  // The bound position, before this we cannot do instruction elimination.
++  int last_bound_pos_;
++
++  // Code emission.
++  inline void CheckBuffer();
++  void GrowBuffer();
++  inline void emit(Instr x);
++  inline void emit(uint64_t x);
++  //  inline void CheckForEmitInForbiddenSlot();
++  template <typename T>
++  inline void EmitHelper(T x);
++  inline void EmitHelper(Instr x);
++
++  void GenB(Opcode opcode, Register rj, int32_t si21);  // opcode:6
++  void GenB(Opcode opcode, CFRegister cj, int32_t si21, bool isEq);
++  void GenB(Opcode opcode, int32_t si26);
++  void GenBJ(Opcode opcode, Register rj, Register rd, int32_t si16);
++  void GenCmp(Opcode opcode, FPUCondition cond, FPURegister fk, FPURegister fj,
++              CFRegister cd);
++  void GenSel(Opcode opcode, CFRegister ca, FPURegister fk, FPURegister fj,
++              FPURegister rd);
++
++  void GenRegister(Opcode opcode, Register rj, Register rd, bool rjrd = true);
++  void GenRegister(Opcode opcode, FPURegister fj, FPURegister fd);
++  void GenRegister(Opcode opcode, Register rj, FPURegister fd);
++  void GenRegister(Opcode opcode, FPURegister fj, Register rd);
++  void GenRegister(Opcode opcode, Register rj, FPUControlRegister fd);
++  void GenRegister(Opcode opcode, FPUControlRegister fj, Register rd);
++  void GenRegister(Opcode opcode, FPURegister fj, CFRegister cd);
++  void GenRegister(Opcode opcode, CFRegister cj, FPURegister fd);
++  void GenRegister(Opcode opcode, Register rj, CFRegister cd);
++  void GenRegister(Opcode opcode, CFRegister cj, Register rd);
++
++  void GenRegister(Opcode opcode, Register rk, Register rj, Register rd);
++  void GenRegister(Opcode opcode, FPURegister fk, FPURegister fj,
++                   FPURegister fd);
++
++  void GenRegister(Opcode opcode, FPURegister fa, FPURegister fk,
++                   FPURegister fj, FPURegister fd);
++  void GenRegister(Opcode opcode, Register rk, Register rj, FPURegister fd);
++
++  void GenImm(Opcode opcode, int32_t bit3, Register rk, Register rj,
++              Register rd);
++  void GenImm(Opcode opcode, int32_t bit6m, int32_t bit6l, Register rj,
++              Register rd);
++  void GenImm(Opcode opcode, int32_t bit20, Register rd);
++  void GenImm(Opcode opcode, int32_t bit15);
++  void GenImm(Opcode opcode, int32_t value, Register rj, Register rd,
++              int32_t value_bits);  // 6 | 12 | 14 | 16
++  void GenImm(Opcode opcode, int32_t bit12, Register rj, FPURegister fd);
++
++  // Labels.
++  void print(const Label* L);
++  void bind_to(Label* L, int pos);
++  void next(Label* L, bool is_internal);
++
++  // One trampoline consists of:
++  // - space for trampoline slots,
++  // - space for labels.
++  //
++  // Space for trampoline slots is equal to slot_count * 2 * kInstrSize.
++  // Space for trampoline slots precedes space for labels. Each label is of one
++  // instruction size, so total amount for labels is equal to
++  // label_count *  kInstrSize.
++  class Trampoline {
++   public:
++    Trampoline() {
++      start_ = 0;
++      next_slot_ = 0;
++      free_slot_count_ = 0;
++      end_ = 0;
++    }
++    Trampoline(int start, int slot_count) {
++      start_ = start;
++      next_slot_ = start;
++      free_slot_count_ = slot_count;
++      end_ = start + slot_count * kTrampolineSlotsSize;
++    }
++    int start() { return start_; }
++    int end() { return end_; }
++    int take_slot() {
++      int trampoline_slot = kInvalidSlotPos;
++      if (free_slot_count_ <= 0) {
++        // We have run out of space on trampolines.
++        // Make sure we fail in debug mode, so we become aware of each case
++        // when this happens.
++        DCHECK(0);
++        // Internal exception will be caught.
++      } else {
++        trampoline_slot = next_slot_;
++        free_slot_count_--;
++        next_slot_ += kTrampolineSlotsSize;
++      }
++      return trampoline_slot;
++    }
++
++   private:
++    int start_;
++    int end_;
++    int next_slot_;
++    int free_slot_count_;
++  };
++
++  int32_t get_trampoline_entry(int32_t pos);
++  int unbound_labels_count_;
++  // After trampoline is emitted, long branches are used in generated code for
++  // the forward branches whose target offsets could be beyond reach of branch
++  // instruction. We use this information to trigger different mode of
++  // branch instruction generation, where we use jump instructions rather
++  // than regular branch instructions.
++  bool trampoline_emitted_;
++  static constexpr int kInvalidSlotPos = -1;
++
++  // Internal reference positions, required for unbounded internal reference
++  // labels.
++  std::set<int64_t> internal_reference_positions_;
++  bool is_internal_reference(Label* L) {
++    return internal_reference_positions_.find(L->pos()) !=
++           internal_reference_positions_.end();
++  }
++
++  void EmittedCompactBranchInstruction() { prev_instr_compact_branch_ = true; }
++  void ClearCompactBranchState() { prev_instr_compact_branch_ = false; }
++  bool prev_instr_compact_branch_ = false;
++
++  Trampoline trampoline_;
++  bool internal_trampoline_exception_;
++
++  RegList scratch_register_list_;
++
++ private:
++  void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
++
++  int WriteCodeComments();
++
++  friend class RegExpMacroAssemblerMIPS;
++  friend class RelocInfo;
++  friend class BlockTrampolinePoolScope;
++  friend class EnsureSpace;
++};
++
++class EnsureSpace {
++ public:
++  explicit inline EnsureSpace(Assembler* assembler);
++};
++
++class V8_EXPORT_PRIVATE UseScratchRegisterScope {
++ public:
++  explicit UseScratchRegisterScope(Assembler* assembler);
++  ~UseScratchRegisterScope();
++
++  Register Acquire();
++  bool hasAvailable() const;
++
++ private:
++  RegList* available_;
++  RegList old_available_;
++};
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_CODEGEN_LA64_ASSEMBLER_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc
+new file mode 100644
+index 00000000000..1a406a8c4d3
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc
+@@ -0,0 +1,100 @@
++// Copyright 2011 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/codegen/la64/constants-la64.h"
++
++namespace v8 {
++namespace internal {
++
++// -----------------------------------------------------------------------------
++// Registers.
++
++// These register names are defined in a way to match the native disassembler
++// formatting. See for example the command "objdump -d <binary file>".
++const char* Registers::names_[kNumSimuRegisters] = {
++    "zero_reg", "ra", "gp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6",
++    "a7",       "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "tp",
++    "fp",       "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "pc"};
++
++// List of alias names which can be used when referring to MIPS registers.
++const Registers::RegisterAlias Registers::aliases_[] = {
++    {0, "zero"}, {23, "cp"}, {kInvalidRegister, nullptr}};
++
++const char* Registers::Name(int reg) {
++  const char* result;
++  if ((0 <= reg) && (reg < kNumSimuRegisters)) {
++    result = names_[reg];
++  } else {
++    result = "noreg";
++  }
++  return result;
++}
++
++int Registers::Number(const char* name) {
++  // Look through the canonical names.
++  for (int i = 0; i < kNumSimuRegisters; i++) {
++    if (strcmp(names_[i], name) == 0) {
++      return i;
++    }
++  }
++
++  // Look through the alias names.
++  int i = 0;
++  while (aliases_[i].reg != kInvalidRegister) {
++    if (strcmp(aliases_[i].name, name) == 0) {
++      return aliases_[i].reg;
++    }
++    i++;
++  }
++
++  // No register with the reguested name found.
++  return kInvalidRegister;
++}
++
++const char* FPURegisters::names_[kNumFPURegisters] = {
++    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",  "f8",  "f9",  "f10",
++    "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21",
++    "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"};
++
++// List of alias names which can be used when referring to MIPS registers.
++const FPURegisters::RegisterAlias FPURegisters::aliases_[] = {
++    {kInvalidRegister, nullptr}};
++
++const char* FPURegisters::Name(int creg) {
++  const char* result;
++  if ((0 <= creg) && (creg < kNumFPURegisters)) {
++    result = names_[creg];
++  } else {
++    result = "nocreg";
++  }
++  return result;
++}
++
++int FPURegisters::Number(const char* name) {
++  // Look through the canonical names.
++  for (int i = 0; i < kNumFPURegisters; i++) {
++    if (strcmp(names_[i], name) == 0) {
++      return i;
++    }
++  }
++
++  // Look through the alias names.
++  int i = 0;
++  while (aliases_[i].creg != kInvalidRegister) {
++    if (strcmp(aliases_[i].name, name) == 0) {
++      return aliases_[i].creg;
++    }
++    i++;
++  }
++
++  // No Cregister with the reguested name found.
++  return kInvalidFPURegister;
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h
+new file mode 100644
+index 00000000000..6cf2ec3b7ec
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h
+@@ -0,0 +1,1479 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_CODEGEN_LA64_CONSTANTS_LA64_H_
++#define V8_CODEGEN_LA64_CONSTANTS_LA64_H_
++
++#include "src/base/logging.h"
++#include "src/base/macros.h"
++#include "src/common/globals.h"
++
++// UNIMPLEMENTED_ macro for LOONGISA.
++#ifdef DEBUG
++#define UNIMPLEMENTED_LOONGISA()                                           \
++  v8::internal::PrintF("%s, \tline %d: \tfunction %s not implemented. \n", \
++                       __FILE__, __LINE__, __func__)
++#else
++#define UNIMPLEMENTED_LOONGISA()
++#endif
++
++#define UNSUPPORTED_LOONGISA() \
++  v8::internal::PrintF("Unsupported instruction.\n")
++
++const uint32_t kLeastSignificantByteInInt32Offset = 0;
++const uint32_t kLessSignificantWordInDoublewordOffset = 0;
++
++#ifndef __STDC_FORMAT_MACROS
++#define __STDC_FORMAT_MACROS
++#endif
++#include <inttypes.h>
++
++// Defines constants and accessor classes to assemble, disassemble and
++// simulate LA64 instructions.
++
++namespace v8 {
++namespace internal {
++
++constexpr size_t kMaxPCRelativeCodeRangeInMB = 128;
++
++// -----------------------------------------------------------------------------
++// Registers and FPURegisters.
++
++// Number of general purpose registers.
++const int kNumRegisters = 32;
++const int kInvalidRegister = -1;
++
++// Number of registers with pc.
++const int kNumSimuRegisters = 33;
++
++// In the simulator, the PC register is simulated as the 34th register.
++const int kPCRegister = 32;
++
++// Number coprocessor registers.
++const int kNumFPURegisters = 32;
++const int kInvalidFPURegister = -1;
++
++// FPU (coprocessor 1) control registers. Currently only FCSR is implemented.
++// TODO fcsr0 fcsr1 fcsr2 fcsr3
++const int kFCSRRegister = 0;
++const int kInvalidFPUControlRegister = -1;
++const uint32_t kFPUInvalidResult = static_cast<uint32_t>(1u << 31) - 1;
++const int32_t kFPUInvalidResultNegative = static_cast<int32_t>(1u << 31);
++const uint64_t kFPU64InvalidResult =
++    static_cast<uint64_t>(static_cast<uint64_t>(1) << 63) - 1;
++const int64_t kFPU64InvalidResultNegative =
++    static_cast<int64_t>(static_cast<uint64_t>(1) << 63);
++
++// FCSR constants.
++// TODO
++const uint32_t kFCSRInexactFlagBit = 16;
++const uint32_t kFCSRUnderflowFlagBit = 17;
++const uint32_t kFCSROverflowFlagBit = 18;
++const uint32_t kFCSRDivideByZeroFlagBit = 19;
++const uint32_t kFCSRInvalidOpFlagBit = 20;
++
++const uint32_t kFCSRInexactFlagMask = 1 << kFCSRInexactFlagBit;
++const uint32_t kFCSRUnderflowFlagMask = 1 << kFCSRUnderflowFlagBit;
++const uint32_t kFCSROverflowFlagMask = 1 << kFCSROverflowFlagBit;
++const uint32_t kFCSRDivideByZeroFlagMask = 1 << kFCSRDivideByZeroFlagBit;
++const uint32_t kFCSRInvalidOpFlagMask = 1 << kFCSRInvalidOpFlagBit;
++
++const uint32_t kFCSRFlagMask =
++    kFCSRInexactFlagMask | kFCSRUnderflowFlagMask | kFCSROverflowFlagMask |
++    kFCSRDivideByZeroFlagMask | kFCSRInvalidOpFlagMask;
++
++const uint32_t kFCSRExceptionFlagMask = kFCSRFlagMask ^ kFCSRInexactFlagMask;
++
++// 'preld' instruction hints
++const int32_t kPrefHintLoad = 0;
++const int32_t kPrefHintStore = 8;
++
++// Actual value of root register is offset from the root array's start
++// to take advantage of negative displacement values.
++// TODO(sigurds): Choose best value.
++constexpr int kRootRegisterBias = 256;
++
++// Helper functions for converting between register numbers and names.
++class Registers {
++ public:
++  // Return the name of the register.
++  static const char* Name(int reg);
++
++  // Lookup the register number for the name provided.
++  static int Number(const char* name);
++
++  struct RegisterAlias {
++    int reg;
++    const char* name;
++  };
++
++  static const int64_t kMaxValue = 0x7fffffffffffffffl;
++  static const int64_t kMinValue = 0x8000000000000000l;
++
++ private:
++  static const char* names_[kNumSimuRegisters];
++  static const RegisterAlias aliases_[];
++};
++
++// Helper functions for converting between register numbers and names.
++class FPURegisters {
++ public:
++  // Return the name of the register.
++  static const char* Name(int reg);
++
++  // Lookup the register number for the name provided.
++  static int Number(const char* name);
++
++  struct RegisterAlias {
++    int creg;
++    const char* name;
++  };
++
++ private:
++  static const char* names_[kNumFPURegisters];
++  static const RegisterAlias aliases_[];
++};
++
++// -----------------------------------------------------------------------------
++// Instructions encoding constants.
++
++// On LoongISA all instructions are 32 bits.
++using Instr = int32_t;
++
++// Special Software Interrupt codes when used in the presence of the LA64
++// simulator.
++enum SoftwareInterruptCodes {
++  // Transition to C code.
++  call_rt_redirected = 0x7fff
++};
++
++// On LA64 Simulator breakpoints can have different codes:
++// - Breaks between 0 and kMaxWatchpointCode are treated as simple watchpoints,
++//   the simulator will run through them and print the registers.
++// - Breaks between kMaxWatchpointCode and kMaxStopCode are treated as stop()
++//   instructions (see Assembler::stop()).
++// - Breaks larger than kMaxStopCode are simple breaks, dropping you into the
++//   debugger.
++const uint32_t kMaxWatchpointCode = 31;
++const uint32_t kMaxStopCode = 127;
++STATIC_ASSERT(kMaxWatchpointCode < kMaxStopCode);
++
++// ----- Fields offset and length.
++const int kRjShift = 5;
++const int kRjBits = 5;
++const int kRkShift = 10;
++const int kRkBits = 5;
++const int kRdShift = 0;
++const int kRdBits = 5;
++const int kSaShift = 15;
++const int kSa2Bits = 2;
++const int kSa3Bits = 3;
++const int kCdShift = 0;
++const int kCdBits = 3;
++const int kCjShift = 5;
++const int kCjBits = 3;
++const int kCodeShift = 0;
++const int kCodeBits = 15;
++const int kCondShift = 15;
++const int kCondBits = 5;
++const int kUi5Shift = 10;
++const int kUi5Bits = 5;
++const int kUi6Shift = 10;
++const int kUi6Bits = 6;
++const int kUi12Shift = 10;
++const int kUi12Bits = 12;
++const int kSi12Shift = 10;
++const int kSi12Bits = 12;
++const int kSi14Shift = 10;
++const int kSi14Bits = 14;
++const int kSi16Shift = 10;
++const int kSi16Bits = 16;
++const int kSi20Shift = 5;
++const int kSi20Bits = 20;
++const int kMsbwShift = 16;
++const int kMsbwBits = 5;
++const int kLsbwShift = 10;
++const int kLsbwBits = 5;
++const int kMsbdShift = 16;
++const int kMsbdBits = 6;
++const int kLsbdShift = 10;
++const int kLsbdBits = 6;
++const int kFdShift = 0;
++const int kFdBits = 5;
++const int kFjShift = 5;
++const int kFjBits = 5;
++const int kFkShift = 10;
++const int kFkBits = 5;
++const int kFaShift = 15;
++const int kFaBits = 5;
++const int kCaShift = 15;
++const int kCaBits = 3;
++const int kHint15Shift = 0;
++const int kHint15Bits = 15;
++const int kHint5Shift = 0;
++const int kHint5Bits = 5;
++const int kOffsLowShift = 10;
++const int kOffsLowBits = 16;
++const int kOffs26HighShift = 0;
++const int kOffs26HighBits = 10;
++const int kOffs21HighShift = 0;
++const int kOffs21HighBits = 5;
++const int kImm12Shift = 0;
++const int kImm12Bits = 12;
++const int kImm16Shift = 0;
++const int kImm16Bits = 16;
++const int kImm26Shift = 0;
++const int kImm26Bits = 26;
++const int kImm28Shift = 0;
++const int kImm28Bits = 28;
++const int kImm32Shift = 0;
++const int kImm32Bits = 32;
++
++// ----- Miscellaneous useful masks.
++// Instruction bit masks.
++const int kRjFieldMask = ((1 << kRjBits) - 1) << kRjShift;
++const int kRkFieldMask = ((1 << kRkBits) - 1) << kRkShift;
++const int kRdFieldMask = ((1 << kRdBits) - 1) << kRdShift;
++const int kSa2FieldMask = ((1 << kSa2Bits) - 1) << kSaShift;
++const int kSa3FieldMask = ((1 << kSa3Bits) - 1) << kSaShift;
++// Misc masks.
++const int kHiMaskOf32 = 0xffff << 16;  // Only to be used with 32-bit values
++const int kLoMaskOf32 = 0xffff;
++const int kSignMaskOf32 = 0x80000000;  // Only to be used with 32-bit values
++const int64_t kTop16MaskOf64 = (int64_t)0xffff << 48;
++const int64_t kHigher16MaskOf64 = (int64_t)0xffff << 32;
++const int64_t kUpper16MaskOf64 = (int64_t)0xffff << 16;
++
++const int kImm12Mask = ((1 << kImm12Bits) - 1) << kImm12Shift;
++const int kImm16Mask = ((1 << kImm16Bits) - 1) << kImm16Shift;
++const int kImm26Mask = ((1 << kImm26Bits) - 1) << kImm26Shift;
++const int kImm28Mask = ((1 << kImm28Bits) - 1) << kImm28Shift;
++
++// ----- LA64 Opcodes and Function Fields.
++enum Opcode : uint32_t {
++  BEQZ = 0x10U << 26,
++  BNEZ = 0x11U << 26,
++  BCZ = 0x12U << 26,  // BCEQZ & BCNEZ
++  JIRL = 0x13U << 26,
++  B = 0x14U << 26,
++  BL = 0x15U << 26,
++  BEQ = 0x16U << 26,
++  BNE = 0x17U << 26,
++  BLT = 0x18U << 26,
++  BGE = 0x19U << 26,
++  BLTU = 0x1aU << 26,
++  BGEU = 0x1bU << 26,
++
++  ADDU16I_D = 0x4U << 26,
++
++  LU12I_W = 0xaU << 25,
++  LU32I_D = 0xbU << 25,
++  PCADDI = 0xcU << 25,
++  PCALAU12I = 0xdU << 25,
++  PCADDU12I = 0xeU << 25,
++  PCADDU18I = 0xfU << 25,
++
++  CSR = 0x4U << 24,  // CSRRD & CSRWR & CSRXCHG
++
++  LL_W = 0x20U << 24,
++  SC_W = 0x21U << 24,
++  LL_D = 0x22U << 24,
++  SC_D = 0x23U << 24,
++  LDPTR_W = 0x24U << 24,
++  STPTR_W = 0x25U << 24,
++  LDPTR_D = 0x26U << 24,
++  STPTR_D = 0x27U << 24,
++
++  BSTR_W = 0x1U << 22,  // BSTRINS_W & BSTRPICK_W
++  BSTRINS_W = BSTR_W,
++  BSTRPICK_W = BSTR_W,
++  BSTRINS_D = 0x2U << 22,
++  BSTRPICK_D = 0x3U << 22,
++
++  SLTI = 0x8U << 22,
++  SLTUI = 0x9U << 22,
++  ADDI_W = 0xaU << 22,
++  ADDI_D = 0xbU << 22,
++  LU52I_D = 0xcU << 22,
++  ANDI = 0xdU << 22,
++  ORI = 0xeU << 22,
++  XORI = 0xfU << 22,
++
++  CACHE = 0x18U << 22,
++
++  LD_B = 0xa0U << 22,
++  LD_H = 0xa1U << 22,
++  LD_W = 0xa2U << 22,
++  LD_D = 0xa3U << 22,
++  ST_B = 0xa4U << 22,
++  ST_H = 0xa5U << 22,
++  ST_W = 0xa6U << 22,
++  ST_D = 0xa7U << 22,
++  LD_BU = 0xa8U << 22,
++  LD_HU = 0xa9U << 22,
++  LD_WU = 0xaaU << 22,
++  PRELD = 0xabU << 22,
++  FLD_S = 0xacU << 22,
++  FST_S = 0xadU << 22,
++  FLD_D = 0xaeU << 22,
++  FST_D = 0xafU << 22,
++
++  FMADD_S = 0x81U << 20,
++  FMADD_D = 0x82U << 20,
++  FMSUB_S = 0x85U << 20,
++  FMSUB_D = 0x86U << 20,
++  FNMADD_S = 0x89U << 20,
++  FNMADD_D = 0x8aU << 20,
++  FNMSUB_S = 0x8dU << 20,
++  FNMSUB_D = 0x8eU << 20,
++  FCMP_COND_S = 0xc1U << 20,
++  FCMP_COND_D = 0xc2U << 20,
++
++  BYTEPICK_D = 0x3U << 18,
++  BYTEPICK_W = 0x2U << 18,
++
++  LDDIR = 0x190U << 18,
++  LDPTE = 0x191U << 18,
++
++  FSEL = 0x340U << 18,
++
++  ALSL = 0x1U << 18,
++  ALSL_W = ALSL,
++  ALSL_WU = ALSL,
++
++  ALSL_D = 0xbU << 18,
++
++  SLLI_W = 0x40U << 16,
++  SRLI_W = 0x44U << 16,
++  SRAI_W = 0x48U << 16,
++  ROTRI_W = 0x4cU << 16,
++
++  SLLI_D = 0x41U << 16,
++  SRLI_D = 0x45U << 16,
++  SRAI_D = 0x49U << 16,
++  ROTRI_D = 0x4dU << 16,
++
++  SLLI = 0x10U << 18,
++  SRLI = 0x11U << 18,
++  SRAI = 0x12U << 18,
++  ROTRI = 0x13U << 18,
++
++  ASRTLE_D = 0x2U << 15,
++  ASRTGT_D = 0x3U << 15,
++
++  ADD_W = 0x20U << 15,
++  ADD_D = 0x21U << 15,
++  SUB_W = 0x22U << 15,
++  SUB_D = 0x23U << 15,
++  SLT = 0x24U << 15,
++  SLTU = 0x25U << 15,
++  MASKNEZ = 0x26U << 15,
++  MASKEQZ = 0x27U << 15,
++  NOR = 0x28U << 15,
++  AND = 0x29U << 15,
++  OR = 0x2aU << 15,
++  XOR = 0x2bU << 15,
++  ORN = 0x2cU << 15,
++  ANDN = 0x2dU << 15,
++  SLL_W = 0x2eU << 15,
++  SRL_W = 0x2fU << 15,
++  SRA_W = 0x30U << 15,
++  SLL_D = 0x31U << 15,
++  SRL_D = 0x32U << 15,
++  SRA_D = 0x33U << 15,
++  ROTR_W = 0x36U << 15,
++  ROTR_D = 0x37U << 15,
++  MUL_W = 0x38U << 15,
++  MULH_W = 0x39U << 15,
++  MULH_WU = 0x3aU << 15,
++  MUL_D = 0x3bU << 15,
++  MULH_D = 0x3cU << 15,
++  MULH_DU = 0x3dU << 15,
++  MULW_D_W = 0x3eU << 15,
++  MULW_D_WU = 0x3fU << 15,
++
++  DIV_W = 0x40U << 15,
++  MOD_W = 0x41U << 15,
++  DIV_WU = 0x42U << 15,
++  MOD_WU = 0x43U << 15,
++  DIV_D = 0x44U << 15,
++  MOD_D = 0x45U << 15,
++  DIV_DU = 0x46U << 15,
++  MOD_DU = 0x47U << 15,
++
++  CRC_W_B_W = 0x48U << 15,
++  CRC_W_H_W = 0x49U << 15,
++  CRC_W_W_W = 0x4aU << 15,
++  CRC_W_D_W = 0x4bU << 15,
++  CRCC_W_B_W = 0x4cU << 15,
++  CRCC_W_H_W = 0x4dU << 15,
++  CRCC_W_W_W = 0x4eU << 15,
++  CRCC_W_D_W = 0x4fU << 15,
++
++  BREAK = 0x54U << 15,
++  DBGCALL = 0x55U << 15,
++  SYSCALL = 0x56U << 15,
++  HYPCALL = 0x57U << 15,
++
++  FADD_S = 0x201U << 15,
++  FADD_D = 0x202U << 15,
++  FSUB_S = 0x205U << 15,
++  FSUB_D = 0x206U << 15,
++  FMUL_S = 0x209U << 15,
++  FMUL_D = 0x20aU << 15,
++  FDIV_S = 0x20dU << 15,
++  FDIV_D = 0x20eU << 15,
++  FMAX_S = 0x211U << 15,
++  FMAX_D = 0x212U << 15,
++  FMIN_S = 0x215U << 15,
++  FMIN_D = 0x216U << 15,
++  FMAXA_S = 0x219U << 15,
++  FMAXA_D = 0x21aU << 15,
++  FMINA_S = 0x21dU << 15,
++  FMINA_D = 0x21eU << 15,
++  FSCALEB_S = 0x221U << 15,
++  FSCALEB_D = 0x222U << 15,
++  FCOPYSIGN_S = 0x225U << 15,
++  FCOPYSIGN_D = 0x226U << 15,
++
++  WAIT_INVTLB = 0xc91U << 15,  // wait & invtlb
++
++  LDX_B = 0x7000U << 15,
++  LDX_H = 0x7008U << 15,
++  LDX_W = 0x7010U << 15,
++  LDX_D = 0x7018U << 15,
++  STX_B = 0x7020U << 15,
++  STX_H = 0x7028U << 15,
++  STX_W = 0x7030U << 15,
++  STX_D = 0x7038U << 15,
++  LDX_BU = 0x7040U << 15,
++  LDX_HU = 0x7048U << 15,
++  LDX_WU = 0x7050U << 15,
++  PRELDX = 0x7058U << 15,
++  FLDX_S = 0x7060U << 15,
++  FLDX_D = 0x7068U << 15,
++  FSTX_S = 0x7070U << 15,
++  FSTX_D = 0x7078U << 15,
++
++  AMSWAP_W = 0x70c0U << 15,
++  AMSWAP_D = 0x70c1U << 15,
++  AMADD_W = 0x70c2U << 15,
++  AMADD_D = 0x70c3U << 15,
++  AMAND_W = 0x70c4U << 15,
++  AMAND_D = 0x70c5U << 15,
++  AMOR_W = 0x70c6U << 15,
++  AMOR_D = 0x70c7U << 15,
++  AMXOR_W = 0x70c8U << 15,
++  AMXOR_D = 0x70c9U << 15,
++  AMMAX_W = 0x70caU << 15,
++  AMMAX_D = 0x70cbU << 15,
++  AMMIN_W = 0x70ccU << 15,
++  AMMIN_D = 0x70cdU << 15,
++  AMMAX_WU = 0x70ceU << 15,
++  AMMAX_DU = 0x70cfU << 15,
++  AMMIN_WU = 0x70d0U << 15,
++  AMMIN_DU = 0x70d1U << 15,
++  AMSWAP_DB_W = 0x70d2U << 15,
++  AMSWAP_DB_D = 0x70d3U << 15,
++  AMADD_DB_W = 0x70d4U << 15,
++  AMADD_DB_D = 0x70d5U << 15,
++  AMAND_DB_W = 0x70d6U << 15,
++  AMAND_DB_D = 0x70d7U << 15,
++  AMOR_DB_W = 0x70d8U << 15,
++  AMOR_DB_D = 0x70d9U << 15,
++  AMXOR_DB_W = 0x70daU << 15,
++  AMXOR_DB_D = 0x70dbU << 15,
++  AMMAX_DB_W = 0x70dcU << 15,
++  AMMAX_DB_D = 0x70ddU << 15,
++  AMMIN_DB_W = 0x70deU << 15,
++  AMMIN_DB_D = 0x70dfU << 15,
++  AMMAX_DB_WU = 0x70e0U << 15,
++  AMMAX_DB_DU = 0x70e1U << 15,
++  AMMIN_DB_WU = 0x70e2U << 15,
++  AMMIN_DB_DU = 0x70e3U << 15,
++
++  DBAR = 0x70e4U << 15,
++  IBAR = 0x70e5U << 15,
++
++  FLDGT_S = 0x70e8U << 15,
++  FLDGT_D = 0x70e9U << 15,
++  FLDLE_S = 0x70eaU << 15,
++  FLDLE_D = 0x70ebU << 15,
++  FSTGT_S = 0x70ecU << 15,
++  FSTGT_D = 0x70edU << 15,
++  FSTLE_S = 0x70eeU << 15,
++  FSTLE_D = 0x70efU << 15,
++  LDGT_B = 0x70f0U << 15,
++  LDGT_H = 0x70f1U << 15,
++  LDGT_W = 0x70f2U << 15,
++  LDGT_D = 0x70f3U << 15,
++  LDLE_B = 0x70f4U << 15,
++  LDLE_H = 0x70f5U << 15,
++  LDLE_W = 0x70f6U << 15,
++  LDLE_D = 0x70f7U << 15,
++  STGT_B = 0x70f8U << 15,
++  STGT_H = 0x70f9U << 15,
++  STGT_W = 0x70faU << 15,
++  STGT_D = 0x70fbU << 15,
++  STLE_B = 0x70fcU << 15,
++  STLE_H = 0x70fdU << 15,
++  STLE_W = 0x70feU << 15,
++  STLE_D = 0x70ffU << 15,
++
++  CLO_W = 0X4U << 10,
++  CLZ_W = 0X5U << 10,
++  CTO_W = 0X6U << 10,
++  CTZ_W = 0X7U << 10,
++  CLO_D = 0X8U << 10,
++  CLZ_D = 0X9U << 10,
++  CTO_D = 0XaU << 10,
++  CTZ_D = 0XbU << 10,
++  REVB_2H = 0XcU << 10,
++  REVB_4H = 0XdU << 10,
++  REVB_2W = 0XeU << 10,
++  REVB_D = 0XfU << 10,
++  REVH_2W = 0X10U << 10,
++  REVH_D = 0X11U << 10,
++  BITREV_4B = 0X12U << 10,
++  BITREV_8B = 0X13U << 10,
++  BITREV_W = 0X14U << 10,
++  BITREV_D = 0X15U << 10,
++  EXT_W_H = 0X16U << 10,
++  EXT_W_B = 0X17U << 10,
++  RDTIMEL_W = 0X18U << 10,
++  RDTIMEH_W = 0X19U << 10,
++  RDTIME_D = 0X1aU << 10,
++  CPUCFG_W = 0X1bU << 10,
++
++  FABS_S = 0X4501U << 10,
++  FABS_D = 0X4502U << 10,
++  FNEG_S = 0X4505U << 10,
++  FNEG_D = 0X4506U << 10,
++  FLOGB_S = 0X4509U << 10,
++  FLOGB_D = 0X450aU << 10,
++  FCLASS_S = 0X450dU << 10,
++  FCLASS_D = 0X450eU << 10,
++  FSQRT_S = 0X4511U << 10,
++  FSQRT_D = 0X4512U << 10,
++  FRECIP_S = 0X4515U << 10,
++  FRECIP_D = 0X4516U << 10,
++  FRSQRT_S = 0X4519U << 10,
++  FRSQRT_D = 0X451aU << 10,
++  FMOV_S = 0X4525U << 10,
++  FMOV_D = 0X4526U << 10,
++  MOVGR2FR_W = 0X4529U << 10,
++  MOVGR2FR_D = 0X452aU << 10,
++  MOVGR2FRH_W = 0X452bU << 10,
++  MOVFR2GR_S = 0X452dU << 10,
++  MOVFR2GR_D = 0X452eU << 10,
++  MOVFRH2GR_S = 0X452fU << 10,
++  MOVGR2FCSR = 0X4530U << 10,
++  MOVFCSR2GR = 0X4532U << 10,
++  MOVFR2CF = 0X4534U << 10,
++  MOVGR2CF = 0X4536U << 10,
++
++  FCVT_S_D = 0x4646U << 10,
++  FCVT_D_S = 0x4649U << 10,
++  FTINTRM_W_S = 0x4681U << 10,
++  FTINTRM_W_D = 0x4682U << 10,
++  FTINTRM_L_S = 0x4689U << 10,
++  FTINTRM_L_D = 0x468aU << 10,
++  FTINTRP_W_S = 0x4691U << 10,
++  FTINTRP_W_D = 0x4692U << 10,
++  FTINTRP_L_S = 0x4699U << 10,
++  FTINTRP_L_D = 0x469aU << 10,
++  FTINTRZ_W_S = 0x46a1U << 10,
++  FTINTRZ_W_D = 0x46a2U << 10,
++  FTINTRZ_L_S = 0x46a9U << 10,
++  FTINTRZ_L_D = 0x46aaU << 10,
++  FTINTRNE_W_S = 0x46b1U << 10,
++  FTINTRNE_W_D = 0x46b2U << 10,
++  FTINTRNE_L_S = 0x46b9U << 10,
++  FTINTRNE_L_D = 0x46baU << 10,
++  FTINT_W_S = 0x46c1U << 10,
++  FTINT_W_D = 0x46c2U << 10,
++  FTINT_L_S = 0x46c9U << 10,
++  FTINT_L_D = 0x46caU << 10,
++  FFINT_S_W = 0x4744U << 10,
++  FFINT_S_L = 0x4746U << 10,
++  FFINT_D_W = 0x4748U << 10,
++  FFINT_D_L = 0x474aU << 10,
++  FRINT_S = 0x4791U << 10,
++  FRINT_D = 0x4792U << 10,
++
++  IOCSRRD_B = 0x19200U << 10,
++  IOCSRRD_H = 0x19201U << 10,
++  IOCSRRD_W = 0x19202U << 10,
++  IOCSRRD_D = 0x19203U << 10,
++  IOCSRWR_B = 0x19204U << 10,
++  IOCSRWR_H = 0x19205U << 10,
++  IOCSRWR_W = 0x19206U << 10,
++  IOCSRWR_D = 0x19207U << 10,
++
++  MOVCF2FR = 0x4535U << 10,
++  MOVCF2GR = 0x4537U << 10,
++
++  TLBINV = 0x06482000U,
++  TLBFLUSH = 0x06482400U,
++  TLBP = 0x06482800U,
++  TLBR = 0x06482c00U,
++  TLBWI = 0x06483000U,
++  TLBWR = 0x06483400U,
++  ERET = 0x06483800U
++};
++
++// ----- Emulated conditions.
++// On LA64 we use this enum to abstract from conditional branch instructions.
++// The 'U' prefix is used to specify unsigned comparisons.
++enum Condition {
++  // Any value < 0 is considered no_condition.
++  kNoCondition = -1,
++  overflow = 0,
++  no_overflow = 1,
++  Uless = 2,
++  Ugreater_equal = 3,
++  Uless_equal = 4,
++  Ugreater = 5,
++  equal = 6,
++  not_equal = 7,  // Unordered or Not Equal.
++  negative = 8,
++  positive = 9,
++  parity_even = 10,
++  parity_odd = 11,
++  less = 12,
++  greater_equal = 13,
++  less_equal = 14,
++  greater = 15,
++  ueq = 16,  // Unordered or Equal.
++  ogl = 17,  // Ordered and Not Equal.
++  cc_always = 18,
++
++  // Aliases.
++  carry = Uless,
++  not_carry = Ugreater_equal,
++  zero = equal,
++  eq = equal,
++  not_zero = not_equal,
++  ne = not_equal,
++  nz = not_equal,
++  sign = negative,
++  not_sign = positive,
++  mi = negative,
++  pl = positive,
++  hi = Ugreater,
++  ls = Uless_equal,
++  ge = greater_equal,
++  lt = less,
++  gt = greater,
++  le = less_equal,
++  hs = Ugreater_equal,
++  lo = Uless,
++  al = cc_always,
++  ult = Uless,
++  uge = Ugreater_equal,
++  ule = Uless_equal,
++  ugt = Ugreater,
++  cc_default = kNoCondition
++};
++
++// Returns the equivalent of !cc.
++// Negation of the default kNoCondition (-1) results in a non-default
++// no_condition value (-2). As long as tests for no_condition check
++// for condition < 0, this will work as expected.
++inline Condition NegateCondition(Condition cc) {
++  DCHECK(cc != cc_always);
++  return static_cast<Condition>(cc ^ 1);
++}
++
++inline Condition NegateFpuCondition(Condition cc) {
++  DCHECK(cc != cc_always);
++  switch (cc) {
++    case ult:
++      return ge;
++    case ugt:
++      return le;
++    case uge:
++      return lt;
++    case ule:
++      return gt;
++    case lt:
++      return uge;
++    case gt:
++      return ule;
++    case ge:
++      return ult;
++    case le:
++      return ugt;
++    case eq:
++      return ne;
++    case ne:
++      return eq;
++    case ueq:
++      return ogl;
++    case ogl:
++      return ueq;
++    default:
++      return cc;
++  }
++}
++
++// ----- Coprocessor conditions.
++enum FPUCondition {
++  kNoFPUCondition = -1,
++
++  CAF = 0x00,  // False.
++  SAF = 0x01,  // False.
++  CLT = 0x02,  // Less Than quiet
++               //  SLT  = 0x03,    // Less Than signaling
++  CEQ = 0x04,
++  SEQ = 0x05,
++  CLE = 0x06,
++  SLE = 0x07,
++  CUN = 0x08,
++  SUN = 0x09,
++  CULT = 0x0a,
++  SULT = 0x0b,
++  CUEQ = 0x0c,
++  SUEQ = 0x0d,
++  CULE = 0x0e,
++  SULE = 0x0f,
++  CNE = 0x10,
++  SNE = 0x11,
++  COR = 0x14,
++  SOR = 0x15,
++  CUNE = 0x18,
++  SUNE = 0x19,
++};
++
++const uint32_t kFPURoundingModeShift = 8;
++const uint32_t kFPURoundingModeMask = 0b11 << kFPURoundingModeShift;
++
++// FPU rounding modes.
++enum FPURoundingMode {
++  RN = 0b00 << kFPURoundingModeShift,  // Round to Nearest.
++  RZ = 0b01 << kFPURoundingModeShift,  // Round towards zero.
++  RP = 0b10 << kFPURoundingModeShift,  // Round towards Plus Infinity.
++  RM = 0b11 << kFPURoundingModeShift,  // Round towards Minus Infinity.
++
++  // Aliases.
++  kRoundToNearest = RN,
++  kRoundToZero = RZ,
++  kRoundToPlusInf = RP,
++  kRoundToMinusInf = RM,
++
++  mode_round = RN,
++  mode_ceil = RP,
++  mode_floor = RM,
++  mode_trunc = RZ
++};
++
++enum CheckForInexactConversion {
++  kCheckForInexactConversion,
++  kDontCheckForInexactConversion
++};
++
++enum class MaxMinKind : int { kMin = 0, kMax = 1 };
++
++// -----------------------------------------------------------------------------
++// Hints.
++
++// Branch hints are not used on the LA64.  They are defined so that they can
++// appear in shared function signatures, but will be ignored in LA64
++// implementations.
++enum Hint { no_hint = 0 };
++
++inline Hint NegateHint(Hint hint) { return no_hint; }
++
++// -----------------------------------------------------------------------------
++// Specific instructions, constants, and masks.
++// These constants are declared in assembler-mips.cc, as they use named
++// registers and other constants.
++
++// addi_d(sp, sp, 8) aka Pop() operation or part of Pop(r)
++// operations as post-increment of sp.
++extern const Instr kPopInstruction;
++// addi_d(sp, sp, -8) part of Push(r) operation as pre-decrement of sp.
++extern const Instr kPushInstruction;
++// St_d(r, MemOperand(sp, 0))
++extern const Instr kPushRegPattern;
++// Ld_d(r, MemOperand(sp, 0))
++extern const Instr kPopRegPattern;
++// extern const Instr kLwRegFpOffsetPattern;
++// extern const Instr kSwRegFpOffsetPattern;
++// extern const Instr kLwRegFpNegOffsetPattern;
++// extern const Instr kSwRegFpNegOffsetPattern;
++// A mask for the Rk register for push, pop, lw, sw instructions.
++extern const Instr kRtMask;
++// extern const Instr kLwSwInstrTypeMask;
++// extern const Instr kLwSwInstrArgumentMask;
++// extern const Instr kLwSwOffsetMask;
++
++// Break 0xfffff, reserved for redirected real time call.
++const Instr rtCallRedirInstr = BREAK | call_rt_redirected;
++// A nop instruction. (Encoding of addi_w 0 0 0).
++const Instr nopInstr = ADDI_W;
++
++constexpr uint8_t kInstrSize = 4;
++constexpr uint8_t kInstrSizeLog2 = 2;
++
++class InstructionBase {
++ public:
++  enum {
++    // On Loonisa PC cannot actually be directly accessed. We behave as if PC
++    // was
++    // always the value of the current instruction being executed.
++    kPCReadOffset = 0
++  };
++
++  enum Type {
++    kOp6Type,
++    kOp7Type,
++    kOp8Type,
++    kOp10Type,
++    kOp12Type,
++    kOp14Type,
++    kOp17Type,
++    kOp22Type,
++    kUnsupported = -1
++  };
++
++  // Get the raw instruction bits.
++  inline Instr InstructionBits() const {
++    return *reinterpret_cast<const Instr*>(this);
++  }
++
++  // Set the raw instruction bits to value.
++  inline void SetInstructionBits(Instr value) {
++    *reinterpret_cast<Instr*>(this) = value;
++  }
++
++  // Read one particular bit out of the instruction bits.
++  inline int Bit(int nr) const { return (InstructionBits() >> nr) & 1; }
++
++  // Read a bit field out of the instruction bits.
++  inline int Bits(int hi, int lo) const {
++    return (InstructionBits() >> lo) & ((2U << (hi - lo)) - 1);
++  }
++
++  // Safe to call within InstructionType().
++  inline int RjFieldRawNoAssert() const {
++    return InstructionBits() & kRjFieldMask;
++  }
++
++  // Get the encoding type of the instruction.
++  inline Type InstructionType() const;
++
++ protected:
++  InstructionBase() {}
++};
++
++template <class T>
++class InstructionGetters : public T {
++ public:
++  inline int RjValue() const {
++    return this->Bits(kRjShift + kRjBits - 1, kRjShift);
++  }
++
++  inline int RkValue() const {
++    return this->Bits(kRkShift + kRkBits - 1, kRkShift);
++  }
++
++  inline int RdValue() const {
++    return this->Bits(kRdShift + kRdBits - 1, kRdShift);
++  }
++
++  inline int Sa2Value() const {
++    return this->Bits(kSaShift + kSa2Bits - 1, kSaShift);
++  }
++
++  inline int Sa3Value() const {
++    return this->Bits(kSaShift + kSa3Bits - 1, kSaShift);
++  }
++
++  inline int Ui5Value() const {
++    return this->Bits(kUi5Shift + kUi5Bits - 1, kUi5Shift);
++  }
++
++  inline int Ui6Value() const {
++    return this->Bits(kUi6Shift + kUi6Bits - 1, kUi6Shift);
++  }
++
++  inline int Ui12Value() const {
++    return this->Bits(kUi12Shift + kUi12Bits - 1, kUi12Shift);
++  }
++
++  inline int LsbwValue() const {
++    return this->Bits(kLsbwShift + kLsbwBits - 1, kLsbwShift);
++  }
++
++  inline int MsbwValue() const {
++    return this->Bits(kMsbwShift + kMsbwBits - 1, kMsbwShift);
++  }
++
++  inline int LsbdValue() const {
++    return this->Bits(kLsbdShift + kLsbdBits - 1, kLsbdShift);
++  }
++
++  inline int MsbdValue() const {
++    return this->Bits(kMsbdShift + kMsbdBits - 1, kMsbdShift);
++  }
++
++  inline int CondValue() const {
++    return this->Bits(kCondShift + kCondBits - 1, kCondShift);
++  }
++
++  inline int Si12Value() const {
++    return this->Bits(kSi12Shift + kSi12Bits - 1, kSi12Shift);
++  }
++
++  inline int Si14Value() const {
++    return this->Bits(kSi14Shift + kSi14Bits - 1, kSi14Shift);
++  }
++
++  inline int Si16Value() const {
++    return this->Bits(kSi16Shift + kSi16Bits - 1, kSi16Shift);
++  }
++
++  inline int Si20Value() const {
++    return this->Bits(kSi20Shift + kSi20Bits - 1, kSi20Shift);
++  }
++
++  inline int FdValue() const {
++    return this->Bits(kFdShift + kFdBits - 1, kFdShift);
++  }
++
++  inline int FaValue() const {
++    return this->Bits(kFaShift + kFaBits - 1, kFaShift);
++  }
++
++  inline int FjValue() const {
++    return this->Bits(kFjShift + kFjBits - 1, kFjShift);
++  }
++
++  inline int FkValue() const {
++    return this->Bits(kFkShift + kFkBits - 1, kFkShift);
++  }
++
++  inline int CjValue() const {
++    return this->Bits(kCjShift + kCjBits - 1, kCjShift);
++  }
++
++  inline int CdValue() const {
++    return this->Bits(kCdShift + kCdBits - 1, kCdShift);
++  }
++
++  inline int CaValue() const {
++    return this->Bits(kCaShift + kCaBits - 1, kCaShift);
++  }
++
++  inline int CodeValue() const {
++    return this->Bits(kCodeShift + kCodeBits - 1, kCodeShift);
++  }
++
++  inline int Hint5Value() const {
++    return this->Bits(kHint5Shift + kHint5Bits - 1, kHint5Shift);
++  }
++
++  inline int Hint15Value() const {
++    return this->Bits(kHint15Shift + kHint15Bits - 1, kHint15Shift);
++  }
++
++  inline int Offs16Value() const {
++    return this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift);
++  }
++
++  inline int Offs21Value() const {
++    int low = this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift);
++    int high =
++        this->Bits(kOffs21HighShift + kOffs21HighBits - 1, kOffs21HighShift);
++    return ((high << kOffsLowBits) + low);
++  }
++
++  inline int Offs26Value() const {
++    int low = this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift);
++    int high =
++        this->Bits(kOffs26HighShift + kOffs26HighBits - 1, kOffs26HighShift);
++    return ((high << kOffsLowBits) + low);
++  }
++
++  inline int RjFieldRaw() const {
++    return this->InstructionBits() & kRjFieldMask;
++  }
++
++  inline int RkFieldRaw() const {
++    return this->InstructionBits() & kRkFieldMask;
++  }
++
++  inline int RdFieldRaw() const {
++    return this->InstructionBits() & kRdFieldMask;
++  }
++
++  inline int32_t ImmValue(int bits) const { return this->Bits(bits - 1, 0); }
++
++  /*TODO*/
++  inline int32_t Imm12Value() const { abort(); }
++
++  inline int32_t Imm14Value() const { abort(); }
++
++  inline int32_t Imm16Value() const { abort(); }
++
++  // Say if the instruction 'links'. e.g. jal, bal.
++  bool IsLinkingInstruction() const;
++  // Say if the instruction is a break or a trap.
++  bool IsTrap() const;
++};
++
++class Instruction : public InstructionGetters<InstructionBase> {
++ public:
++  // Instructions are read of out a code stream. The only way to get a
++  // reference to an instruction is to convert a pointer. There is no way
++  // to allocate or create instances of class Instruction.
++  // Use the At(pc) function to create references to Instruction.
++  static Instruction* At(byte* pc) {
++    return reinterpret_cast<Instruction*>(pc);
++  }
++
++ private:
++  // We need to prevent the creation of instances of class Instruction.
++  DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction);
++};
++
++// -----------------------------------------------------------------------------
++// LA64 assembly various constants.
++
++// C/C++ argument slots size.
++const int kCArgSlotCount = 0;
++
++const int kCArgsSlotsSize = kCArgSlotCount * kInstrSize * 2;
++
++const int kInvalidStackOffset = -1;
++
++static const int kNegOffset = 0x00008000;
++
++InstructionBase::Type InstructionBase::InstructionType() const {
++  InstructionBase::Type kType = kUnsupported;
++
++  // Check for kOp6Type
++  switch (Bits(31, 26) << 26) {
++    case ADDU16I_D:
++    case BEQZ:
++    case BNEZ:
++    case BCZ:
++    case JIRL:
++    case B:
++    case BL:
++    case BEQ:
++    case BNE:
++    case BLT:
++    case BGE:
++    case BLTU:
++    case BGEU:
++      kType = kOp6Type;
++      break;
++    default:
++      kType = kUnsupported;
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp7Type
++    switch (Bits(31, 25) << 25) {
++      case LU12I_W:
++      case LU32I_D:
++      case PCADDI:
++      case PCALAU12I:
++      case PCADDU12I:
++      case PCADDU18I:
++        kType = kOp7Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp8Type
++    switch (Bits(31, 24) << 24) {
++      case LDPTR_W:
++      case STPTR_W:
++      case LDPTR_D:
++      case STPTR_D:
++      case LL_W:
++      case SC_W:
++      case LL_D:
++      case SC_D:
++      case CSR:
++        kType = kOp8Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp10Type
++    switch (Bits(31, 22) << 22) {
++      case BSTR_W: {
++        // If Bit(21) = 0, then the Opcode is not BSTR_W.
++        if (Bit(21) == 0)
++          kType = kUnsupported;
++        else
++          kType = kOp10Type;
++        break;
++      }
++      case BSTRINS_D:
++      case BSTRPICK_D:
++      case SLTI:
++      case SLTUI:
++      case ADDI_W:
++      case ADDI_D:
++      case LU52I_D:
++      case ANDI:
++      case ORI:
++      case XORI:
++      case LD_B:
++      case LD_H:
++      case LD_W:
++      case LD_D:
++      case ST_B:
++      case ST_H:
++      case ST_W:
++      case ST_D:
++      case LD_BU:
++      case LD_HU:
++      case LD_WU:
++      case PRELD:
++      case FLD_S:
++      case FST_S:
++      case FLD_D:
++      case FST_D:
++      case CACHE:
++        kType = kOp10Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp12Type
++    switch (Bits(31, 20) << 20) {
++      case FMADD_S:
++      case FMADD_D:
++      case FMSUB_S:
++      case FMSUB_D:
++      case FNMADD_S:
++      case FNMADD_D:
++      case FNMSUB_S:
++      case FNMSUB_D:
++      case FCMP_COND_S:
++      case FCMP_COND_D:
++      case FSEL:
++        kType = kOp12Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp14Type
++    switch (Bits(31, 18) << 18) {
++      case ALSL:
++      case BYTEPICK_W:
++      case BYTEPICK_D:
++      case ALSL_D:
++      case SLLI:
++      case SRLI:
++      case SRAI:
++      case ROTRI:
++      case LDDIR:
++      case LDPTE:
++        kType = kOp14Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp17Type
++    switch (Bits(31, 15) << 15) {
++      case ADD_W:
++      case ADD_D:
++      case SUB_W:
++      case SUB_D:
++      case SLT:
++      case SLTU:
++      case MASKEQZ:
++      case MASKNEZ:
++      case NOR:
++      case AND:
++      case OR:
++      case XOR:
++      case ORN:
++      case ANDN:
++      case SLL_W:
++      case SRL_W:
++      case SRA_W:
++      case SLL_D:
++      case SRL_D:
++      case SRA_D:
++      case ROTR_D:
++      case ROTR_W:
++      case MUL_W:
++      case MULH_W:
++      case MULH_WU:
++      case MUL_D:
++      case MULH_D:
++      case MULH_DU:
++      case MULW_D_W:
++      case MULW_D_WU:
++      case DIV_W:
++      case MOD_W:
++      case DIV_WU:
++      case MOD_WU:
++      case DIV_D:
++      case MOD_D:
++      case DIV_DU:
++      case MOD_DU:
++      case BREAK:
++      case FADD_S:
++      case FADD_D:
++      case FSUB_S:
++      case FSUB_D:
++      case FMUL_S:
++      case FMUL_D:
++      case FDIV_S:
++      case FDIV_D:
++      case FMAX_S:
++      case FMAX_D:
++      case FMIN_S:
++      case FMIN_D:
++      case FMAXA_S:
++      case FMAXA_D:
++      case FMINA_S:
++      case FMINA_D:
++      case LDX_B:
++      case LDX_H:
++      case LDX_W:
++      case LDX_D:
++      case STX_B:
++      case STX_H:
++      case STX_W:
++      case STX_D:
++      case LDX_BU:
++      case LDX_HU:
++      case LDX_WU:
++      case PRELDX:
++      case FLDX_S:
++      case FLDX_D:
++      case FSTX_S:
++      case FSTX_D:
++      case ASRTLE_D:
++      case ASRTGT_D:
++      case DBGCALL:
++      case SYSCALL:
++      case HYPCALL:
++      case AMSWAP_W:
++      case AMSWAP_D:
++      case AMADD_W:
++      case AMADD_D:
++      case AMAND_W:
++      case AMAND_D:
++      case AMOR_W:
++      case AMOR_D:
++      case AMXOR_W:
++      case AMXOR_D:
++      case AMMAX_W:
++      case AMMAX_D:
++      case AMMIN_W:
++      case AMMIN_D:
++      case AMMAX_WU:
++      case AMMAX_DU:
++      case AMMIN_WU:
++      case AMMIN_DU:
++      case AMSWAP_DB_W:
++      case AMSWAP_DB_D:
++      case AMADD_DB_W:
++      case AMADD_DB_D:
++      case AMAND_DB_W:
++      case AMAND_DB_D:
++      case AMOR_DB_W:
++      case AMOR_DB_D:
++      case AMXOR_DB_W:
++      case AMXOR_DB_D:
++      case AMMAX_DB_W:
++      case AMMAX_DB_D:
++      case AMMIN_DB_W:
++      case AMMIN_DB_D:
++      case AMMAX_DB_WU:
++      case AMMAX_DB_DU:
++      case AMMIN_DB_WU:
++      case AMMIN_DB_DU:
++      case DBAR:
++      case IBAR:
++      case FLDGT_S:
++      case FLDGT_D:
++      case FLDLE_S:
++      case FLDLE_D:
++      case FSTGT_S:
++      case FSTGT_D:
++      case FSTLE_S:
++      case FSTLE_D:
++      case LDGT_B:
++      case LDGT_H:
++      case LDGT_W:
++      case LDGT_D:
++      case LDLE_B:
++      case LDLE_H:
++      case LDLE_W:
++      case LDLE_D:
++      case STGT_B:
++      case STGT_H:
++      case STGT_W:
++      case STGT_D:
++      case STLE_B:
++      case STLE_H:
++      case STLE_W:
++      case STLE_D:
++      case WAIT_INVTLB:
++      case FSCALEB_S:
++      case FSCALEB_D:
++      case FCOPYSIGN_S:
++      case FCOPYSIGN_D:
++      case CRC_W_B_W:
++      case CRC_W_H_W:
++      case CRC_W_W_W:
++      case CRC_W_D_W:
++      case CRCC_W_B_W:
++      case CRCC_W_H_W:
++      case CRCC_W_W_W:
++      case CRCC_W_D_W:
++        kType = kOp17Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  if (kType == kUnsupported) {
++    // Check for kOp22Type
++    switch (Bits(31, 10) << 10) {
++      case CLZ_W:
++      case CTZ_W:
++      case CLZ_D:
++      case CTZ_D:
++      case REVB_2H:
++      case REVB_4H:
++      case REVB_2W:
++      case REVB_D:
++      case REVH_2W:
++      case REVH_D:
++      case BITREV_4B:
++      case BITREV_8B:
++      case BITREV_W:
++      case BITREV_D:
++      case EXT_W_B:
++      case EXT_W_H:
++      case FABS_S:
++      case FABS_D:
++      case FNEG_S:
++      case FNEG_D:
++      case FSQRT_S:
++      case FSQRT_D:
++      case FMOV_S:
++      case FMOV_D:
++      case MOVGR2FR_W:
++      case MOVGR2FR_D:
++      case MOVGR2FRH_W:
++      case MOVFR2GR_S:
++      case MOVFR2GR_D:
++      case MOVFRH2GR_S:
++      case MOVGR2FCSR:
++      case MOVFCSR2GR:
++      case FCVT_S_D:
++      case FCVT_D_S:
++      case FTINTRM_W_S:
++      case FTINTRM_W_D:
++      case FTINTRM_L_S:
++      case FTINTRM_L_D:
++      case FTINTRP_W_S:
++      case FTINTRP_W_D:
++      case FTINTRP_L_S:
++      case FTINTRP_L_D:
++      case FTINTRZ_W_S:
++      case FTINTRZ_W_D:
++      case FTINTRZ_L_S:
++      case FTINTRZ_L_D:
++      case FTINTRNE_W_S:
++      case FTINTRNE_W_D:
++      case FTINTRNE_L_S:
++      case FTINTRNE_L_D:
++      case FTINT_W_S:
++      case FTINT_W_D:
++      case FTINT_L_S:
++      case FTINT_L_D:
++      case FFINT_S_W:
++      case FFINT_S_L:
++      case FFINT_D_W:
++      case FFINT_D_L:
++      case FRINT_S:
++      case FRINT_D:
++      case MOVFR2CF:
++      case MOVCF2FR:
++      case MOVGR2CF:
++      case MOVCF2GR:
++      case FRECIP_S:
++      case FRECIP_D:
++      case FRSQRT_S:
++      case FRSQRT_D:
++      case FCLASS_S:
++      case FCLASS_D:
++      case FLOGB_S:
++      case FLOGB_D:
++      case CLO_W:
++      case CTO_W:
++      case CLO_D:
++      case CTO_D:
++      case IOCSRRD_B:
++      case IOCSRRD_H:
++      case IOCSRRD_W:
++      case IOCSRRD_D:
++      case IOCSRWR_B:
++      case IOCSRWR_H:
++      case IOCSRWR_W:
++      case IOCSRWR_D:
++      case TLBINV:
++      case TLBFLUSH:
++      case TLBP:
++      case TLBR:
++      case TLBWI:
++      case TLBWR:
++      case ERET:
++      case RDTIMEL_W:
++      case RDTIMEH_W:
++      case RDTIME_D:
++        // case CPUCFG:
++        kType = kOp22Type;
++        break;
++      default:
++        kType = kUnsupported;
++    }
++  }
++
++  return kType;
++}
++
++// -----------------------------------------------------------------------------
++// Instructions.
++
++template <class P>
++bool InstructionGetters<P>::IsTrap() const {
++  return true;
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_CODEGEN_LA64_CONSTANTS_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc
+new file mode 100644
+index 00000000000..3e11a88313e
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc
+@@ -0,0 +1,38 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++// CPU specific code for loongisa independent of OS goes here.
++
++#include <sys/syscall.h>
++#include <unistd.h>
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/codegen/cpu-features.h"
++
++namespace v8 {
++namespace internal {
++
++void CpuFeatures::FlushICache(void* start, size_t size) {
++#if !defined(USE_SIMULATOR)
++  // Nothing to do, flushing no instructions.
++  if (size == 0) {
++    return;
++  }
++
++#if defined(ANDROID) && !defined(__LP64__)
++  // Bionic cacheflush can typically run in userland, avoiding kernel call.
++  char* end = reinterpret_cast<char*>(start) + size;
++  cacheflush(reinterpret_cast<intptr_t>(start), reinterpret_cast<intptr_t>(end),
++             0);
++#else   // ANDROID
++  asm("ibar 0\n");
++#endif  // ANDROID
++#endif  // !USE_SIMULATOR.
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc
+new file mode 100644
+index 00000000000..b72ee4f917f
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc
+@@ -0,0 +1,332 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/codegen/interface-descriptors.h"
++
++#include "src/execution/frames.h"
++
++namespace v8 {
++namespace internal {
++
++const Register CallInterfaceDescriptor::ContextRegister() { return cp; }
++
++void CallInterfaceDescriptor::DefaultInitializePlatformSpecific(
++    CallInterfaceDescriptorData* data, int register_parameter_count) {
++  const Register default_stub_registers[] = {a0, a1, a2, a3, a4};
++  CHECK_LE(static_cast<size_t>(register_parameter_count),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(register_parameter_count,
++                                   default_stub_registers);
++}
++
++// On MIPS it is not allowed to use odd numbered floating point registers
++// (e.g. f1, f3, etc.) for parameters. This can happen if we use
++// DefaultInitializePlatformSpecific to assign float registers for parameters.
++// E.g if fourth parameter goes to float register, f7 would be assigned for
++// parameter (a3 casted to int is 7).
++bool CallInterfaceDescriptor::IsValidFloatParameterRegister(Register reg) {
++  return reg.code() % 2 == 0;
++}
++
++void WasmI32AtomicWait32Descriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2, a3};
++  CHECK_EQ(static_cast<size_t>(kParameterCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount, default_stub_registers);
++}
++
++void WasmI32AtomicWait64Descriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2};
++  CHECK_EQ(static_cast<size_t>(kParameterCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount, default_stub_registers);
++}
++
++void WasmI64AtomicWait32Descriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2, a3, a4};
++  CHECK_EQ(static_cast<size_t>(kParameterCount - kStackArgumentsCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount - kStackArgumentsCount,
++                                   default_stub_registers);
++}
++
++void WasmI64AtomicWait64Descriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2};
++  CHECK_EQ(static_cast<size_t>(kParameterCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount, default_stub_registers);
++}
++
++void RecordWriteDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2, a3, a4};
++
++  data->RestrictAllocatableRegisters(default_stub_registers,
++                                     arraysize(default_stub_registers));
++
++  CHECK_LE(static_cast<size_t>(kParameterCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount, default_stub_registers);
++}
++
++void EphemeronKeyBarrierDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  const Register default_stub_registers[] = {a0, a1, a2, a3, a4};
++
++  data->RestrictAllocatableRegisters(default_stub_registers,
++                                     arraysize(default_stub_registers));
++
++  CHECK_LE(static_cast<size_t>(kParameterCount),
++           arraysize(default_stub_registers));
++  data->InitializePlatformSpecific(kParameterCount, default_stub_registers);
++}
++
++const Register FastNewFunctionContextDescriptor::ScopeInfoRegister() {
++  return a1;
++}
++const Register FastNewFunctionContextDescriptor::SlotsRegister() { return a0; }
++
++const Register LoadDescriptor::ReceiverRegister() { return a1; }
++const Register LoadDescriptor::NameRegister() { return a2; }
++const Register LoadDescriptor::SlotRegister() { return a0; }
++
++const Register LoadWithVectorDescriptor::VectorRegister() { return a3; }
++
++const Register StoreDescriptor::ReceiverRegister() { return a1; }
++const Register StoreDescriptor::NameRegister() { return a2; }
++const Register StoreDescriptor::ValueRegister() { return a0; }
++const Register StoreDescriptor::SlotRegister() { return a4; }
++
++const Register StoreWithVectorDescriptor::VectorRegister() { return a3; }
++
++const Register StoreTransitionDescriptor::SlotRegister() { return a4; }
++const Register StoreTransitionDescriptor::VectorRegister() { return a3; }
++const Register StoreTransitionDescriptor::MapRegister() { return a5; }
++
++const Register ApiGetterDescriptor::HolderRegister() { return a0; }
++const Register ApiGetterDescriptor::CallbackRegister() { return a3; }
++
++const Register GrowArrayElementsDescriptor::ObjectRegister() { return a0; }
++const Register GrowArrayElementsDescriptor::KeyRegister() { return a3; }
++
++// static
++const Register TypeConversionDescriptor::ArgumentRegister() { return a0; }
++
++void TypeofDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {a3};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallTrampolineDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1: target
++  // a0: number of arguments
++  Register registers[] = {a1, a0};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallVarargsDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a0 : number of arguments (on the stack, not including receiver)
++  // a1 : the target to call
++  // a4 : arguments list length (untagged)
++  // a2 : arguments list (FixedArray)
++  Register registers[] = {a1, a0, a4, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallForwardVarargsDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1: the target to call
++  // a0: number of arguments
++  // a2: start index (to support rest parameters)
++  Register registers[] = {a1, a0, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallFunctionTemplateDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1 : function template info
++  // a0 : number of arguments (on the stack, not including receiver)
++  Register registers[] = {a1, a0};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallWithSpreadDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a0 : number of arguments (on the stack, not including receiver)
++  // a1 : the target to call
++  // a2 : the object to spread
++  Register registers[] = {a1, a0, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void CallWithArrayLikeDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1 : the target to call
++  // a2 : the arguments list
++  Register registers[] = {a1, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ConstructVarargsDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a0 : number of arguments (on the stack, not including receiver)
++  // a1 : the target to call
++  // a3 : the new target
++  // a4 : arguments list length (untagged)
++  // a2 : arguments list (FixedArray)
++  Register registers[] = {a1, a3, a0, a4, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ConstructForwardVarargsDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1: the target to call
++  // a3: new target
++  // a0: number of arguments
++  // a2: start index (to support rest parameters)
++  Register registers[] = {a1, a3, a0, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ConstructWithSpreadDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a0 : number of arguments (on the stack, not including receiver)
++  // a1 : the target to call
++  // a3 : the new target
++  // a2 : the object to spread
++  Register registers[] = {a1, a3, a0, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ConstructWithArrayLikeDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1 : the target to call
++  // a3 : the new target
++  // a2 : the arguments list
++  Register registers[] = {a1, a3, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ConstructStubDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // a1: target
++  // a3: new target
++  // a0: number of arguments
++  // a2: allocation site or undefined
++  Register registers[] = {a1, a3, a0, a2};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void AbortDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {a0};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void AllocateHeapNumberDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  // register state
++  data->InitializePlatformSpecific(0, nullptr);
++}
++
++void CompareDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {a1, a0};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void BinaryOpDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {a1, a0};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ArgumentsAdaptorDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a1,  // JSFunction
++      a3,  // the new target
++      a0,  // actual number of arguments
++      a2,  // expected number of arguments
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ApiCallbackDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a1,  // kApiFunctionAddress
++      a2,  // kArgc
++      a3,  // kCallData
++      a0,  // kHolder
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void InterpreterDispatchDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      kInterpreterAccumulatorRegister, kInterpreterBytecodeOffsetRegister,
++      kInterpreterBytecodeArrayRegister, kInterpreterDispatchTableRegister};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void InterpreterPushArgsThenCallDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a0,  // argument count (not including receiver)
++      a2,  // address of first argument
++      a1   // the target callable to be call
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void InterpreterPushArgsThenConstructDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a0,  // argument count (not including receiver)
++      a4,  // address of the first argument
++      a1,  // constructor to call
++      a3,  // new target
++      a2,  // allocation site feedback if available, undefined otherwise
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void ResumeGeneratorDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a0,  // the value to pass to the generator
++      a1   // the JSGeneratorObject to resume
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void FrameDropperTrampolineDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {
++      a1,  // loaded new FP
++  };
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++void RunMicrotasksEntryDescriptor::InitializePlatformSpecific(
++    CallInterfaceDescriptorData* data) {
++  Register registers[] = {a0, a1};
++  data->InitializePlatformSpecific(arraysize(registers), registers);
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc
+new file mode 100644
+index 00000000000..3fde2b9dfa7
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc
+@@ -0,0 +1,3992 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include <limits.h>  // For LONG_MIN, LONG_MAX.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/base/bits.h"
++#include "src/base/division-by-constant.h"
++#include "src/codegen/assembler-inl.h"
++#include "src/codegen/callable.h"
++#include "src/codegen/code-factory.h"
++#include "src/codegen/external-reference-table.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/codegen/register-configuration.h"
++#include "src/debug/debug.h"
++#include "src/execution/frames-inl.h"
++#include "src/heap/heap-inl.h"  // For MemoryChunk.
++#include "src/init/bootstrapper.h"
++#include "src/logging/counters.h"
++#include "src/objects/heap-number.h"
++#include "src/runtime/runtime.h"
++#include "src/snapshot/embedded/embedded-data.h"
++#include "src/snapshot/snapshot.h"
++#include "src/wasm/wasm-code-manager.h"
++
++// Satisfy cpplint check, but don't include platform-specific header. It is
++// included recursively via macro-assembler.h.
++#if 0
++#include "src/codegen/la64/macro-assembler-la64.h"
++#endif
++
++namespace v8 {
++namespace internal {
++
++static inline bool IsZero(const Operand& rk) {
++  if (rk.is_reg()) {
++    return rk.rm() == zero_reg;
++  } else {
++    return rk.immediate() == 0;
++  }
++}
++
++int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
++                                                    Register exclusion1,
++                                                    Register exclusion2,
++                                                    Register exclusion3) const {
++  int bytes = 0;
++  RegList exclusions = 0;
++  if (exclusion1 != no_reg) {
++    exclusions |= exclusion1.bit();
++    if (exclusion2 != no_reg) {
++      exclusions |= exclusion2.bit();
++      if (exclusion3 != no_reg) {
++        exclusions |= exclusion3.bit();
++      }
++    }
++  }
++
++  RegList list = kJSCallerSaved & ~exclusions;
++  bytes += NumRegs(list) * kPointerSize;
++
++  if (fp_mode == kSaveFPRegs) {
++    bytes += NumRegs(kCallerSavedFPU) * kDoubleSize;
++  }
++
++  return bytes;
++}
++
++int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
++                                    Register exclusion2, Register exclusion3) {
++  int bytes = 0;
++  RegList exclusions = 0;
++  if (exclusion1 != no_reg) {
++    exclusions |= exclusion1.bit();
++    if (exclusion2 != no_reg) {
++      exclusions |= exclusion2.bit();
++      if (exclusion3 != no_reg) {
++        exclusions |= exclusion3.bit();
++      }
++    }
++  }
++
++  RegList list = kJSCallerSaved & ~exclusions;
++  MultiPush(list);
++  bytes += NumRegs(list) * kPointerSize;
++
++  if (fp_mode == kSaveFPRegs) {
++    MultiPushFPU(kCallerSavedFPU);
++    bytes += NumRegs(kCallerSavedFPU) * kDoubleSize;
++  }
++
++  return bytes;
++}
++
++int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
++                                   Register exclusion2, Register exclusion3) {
++  int bytes = 0;
++  if (fp_mode == kSaveFPRegs) {
++    MultiPopFPU(kCallerSavedFPU);
++    bytes += NumRegs(kCallerSavedFPU) * kDoubleSize;
++  }
++
++  RegList exclusions = 0;
++  if (exclusion1 != no_reg) {
++    exclusions |= exclusion1.bit();
++    if (exclusion2 != no_reg) {
++      exclusions |= exclusion2.bit();
++      if (exclusion3 != no_reg) {
++        exclusions |= exclusion3.bit();
++      }
++    }
++  }
++
++  RegList list = kJSCallerSaved & ~exclusions;
++  MultiPop(list);
++  bytes += NumRegs(list) * kPointerSize;
++
++  return bytes;
++}
++
++void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
++  Ld_d(destination, MemOperand(s6, RootRegisterOffsetForRootIndex(index)));
++}
++
++void TurboAssembler::PushCommonFrame(Register marker_reg) {
++  if (marker_reg.is_valid()) {
++    Push(ra, fp, marker_reg);
++    Add_d(fp, sp, Operand(kPointerSize));
++  } else {
++    Push(ra, fp);
++    mov(fp, sp);
++  }
++}
++
++void TurboAssembler::PushStandardFrame(Register function_reg) {
++  int offset = -StandardFrameConstants::kContextOffset;
++  if (function_reg.is_valid()) {
++    Push(ra, fp, cp, function_reg);
++    offset += kPointerSize;
++  } else {
++    Push(ra, fp, cp);
++  }
++  Add_d(fp, sp, Operand(offset));
++}
++
++int MacroAssembler::SafepointRegisterStackIndex(int reg_code) {
++  // The registers are pushed starting with the highest encoding,
++  // which means that lowest encodings are closest to the stack pointer.
++  return kSafepointRegisterStackIndexMap[reg_code];
++}
++
++// Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved)
++// The register 'object' contains a heap object pointer.  The heap object
++// tag is shifted away.
++void MacroAssembler::RecordWriteField(Register object, int offset,
++                                      Register value, Register dst,
++                                      RAStatus ra_status,
++                                      SaveFPRegsMode save_fp,
++                                      RememberedSetAction remembered_set_action,
++                                      SmiCheck smi_check) {
++  DCHECK(!AreAliased(value, dst, t8, object));
++  // First, check if a write barrier is even needed. The tests below
++  // catch stores of Smis.
++  Label done;
++
++  // Skip barrier if writing a smi.
++  if (smi_check == INLINE_SMI_CHECK) {
++    JumpIfSmi(value, &done);
++  }
++
++  // Although the object register is tagged, the offset is relative to the start
++  // of the object, so so offset must be a multiple of kPointerSize.
++  DCHECK(IsAligned(offset, kPointerSize));
++
++  Add_d(dst, object, Operand(offset - kHeapObjectTag));
++  if (emit_debug_code()) {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    Label ok;
++    And(t8, dst, Operand(kPointerSize - 1));
++    Branch(&ok, eq, t8, Operand(zero_reg));
++    stop();
++    bind(&ok);
++  }
++
++  RecordWrite(object, dst, value, ra_status, save_fp, remembered_set_action,
++              OMIT_SMI_CHECK);
++
++  bind(&done);
++
++  // Clobber clobbered input registers when running with the debug-code flag
++  // turned on to provoke errors.
++  if (emit_debug_code()) {
++    li(value, Operand(bit_cast<int64_t>(kZapValue + 4)));
++    li(dst, Operand(bit_cast<int64_t>(kZapValue + 8)));
++  }
++}
++
++void TurboAssembler::SaveRegisters(RegList registers) {
++  DCHECK_GT(NumRegs(registers), 0);
++  RegList regs = 0;
++  for (int i = 0; i < Register::kNumRegisters; ++i) {
++    if ((registers >> i) & 1u) {
++      regs |= Register::from_code(i).bit();
++    }
++  }
++  MultiPush(regs);
++}
++
++void TurboAssembler::RestoreRegisters(RegList registers) {
++  DCHECK_GT(NumRegs(registers), 0);
++  RegList regs = 0;
++  for (int i = 0; i < Register::kNumRegisters; ++i) {
++    if ((registers >> i) & 1u) {
++      regs |= Register::from_code(i).bit();
++    }
++  }
++  MultiPop(regs);
++}
++
++void TurboAssembler::CallEphemeronKeyBarrier(Register object, Register address,
++                                             SaveFPRegsMode fp_mode) {
++  EphemeronKeyBarrierDescriptor descriptor;
++  RegList registers = descriptor.allocatable_registers();
++
++  SaveRegisters(registers);
++
++  Register object_parameter(
++      descriptor.GetRegisterParameter(EphemeronKeyBarrierDescriptor::kObject));
++  Register slot_parameter(descriptor.GetRegisterParameter(
++      EphemeronKeyBarrierDescriptor::kSlotAddress));
++  Register fp_mode_parameter(
++      descriptor.GetRegisterParameter(EphemeronKeyBarrierDescriptor::kFPMode));
++
++  Push(object);
++  Push(address);
++
++  Pop(slot_parameter);
++  Pop(object_parameter);
++
++  Move(fp_mode_parameter, Smi::FromEnum(fp_mode));
++  Call(isolate()->builtins()->builtin_handle(Builtins::kEphemeronKeyBarrier),
++       RelocInfo::CODE_TARGET);
++  RestoreRegisters(registers);
++}
++
++void TurboAssembler::CallRecordWriteStub(
++    Register object, Register address,
++    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode) {
++  CallRecordWriteStub(
++      object, address, remembered_set_action, fp_mode,
++      isolate()->builtins()->builtin_handle(Builtins::kRecordWrite),
++      kNullAddress);
++}
++
++void TurboAssembler::CallRecordWriteStub(
++    Register object, Register address,
++    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
++    Address wasm_target) {
++  CallRecordWriteStub(object, address, remembered_set_action, fp_mode,
++                      Handle<Code>::null(), wasm_target);
++}
++
++void TurboAssembler::CallRecordWriteStub(
++    Register object, Register address,
++    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
++    Handle<Code> code_target, Address wasm_target) {
++  DCHECK_NE(code_target.is_null(), wasm_target == kNullAddress);
++  // TODO(albertnetymk): For now we ignore remembered_set_action and fp_mode,
++  // i.e. always emit remember set and save FP registers in RecordWriteStub. If
++  // large performance regression is observed, we should use these values to
++  // avoid unnecessary work.
++
++  RecordWriteDescriptor descriptor;
++  RegList registers = descriptor.allocatable_registers();
++
++  SaveRegisters(registers);
++  Register object_parameter(
++      descriptor.GetRegisterParameter(RecordWriteDescriptor::kObject));
++  Register slot_parameter(
++      descriptor.GetRegisterParameter(RecordWriteDescriptor::kSlot));
++  Register remembered_set_parameter(
++      descriptor.GetRegisterParameter(RecordWriteDescriptor::kRememberedSet));
++  Register fp_mode_parameter(
++      descriptor.GetRegisterParameter(RecordWriteDescriptor::kFPMode));
++
++  Push(object);
++  Push(address);
++
++  Pop(slot_parameter);
++  Pop(object_parameter);
++
++  Move(remembered_set_parameter, Smi::FromEnum(remembered_set_action));
++  Move(fp_mode_parameter, Smi::FromEnum(fp_mode));
++  if (code_target.is_null()) {
++    Call(wasm_target, RelocInfo::WASM_STUB_CALL);
++  } else {
++    Call(code_target, RelocInfo::CODE_TARGET);
++  }
++
++  RestoreRegisters(registers);
++}
++
++// Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved)
++// The register 'object' contains a heap object pointer.  The heap object
++// tag is shifted away.
++void MacroAssembler::RecordWrite(Register object, Register address,
++                                 Register value, RAStatus ra_status,
++                                 SaveFPRegsMode fp_mode,
++                                 RememberedSetAction remembered_set_action,
++                                 SmiCheck smi_check) {
++  DCHECK(!AreAliased(object, address, value));
++
++  if (emit_debug_code()) {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    Ld_d(scratch, MemOperand(address, 0));
++    Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, scratch,
++           Operand(value));
++  }
++
++  if ((remembered_set_action == OMIT_REMEMBERED_SET &&
++       !FLAG_incremental_marking) ||
++      FLAG_disable_write_barriers) {
++    return;
++  }
++
++  // First, check if a write barrier is even needed. The tests below
++  // catch stores of smis and stores into the young generation.
++  Label done;
++
++  if (smi_check == INLINE_SMI_CHECK) {
++    DCHECK_EQ(0, kSmiTag);
++    JumpIfSmi(value, &done);
++  }
++
++  CheckPageFlag(value,
++                value,  // Used as scratch.
++                MemoryChunk::kPointersToHereAreInterestingMask, eq, &done);
++  CheckPageFlag(object,
++                value,  // Used as scratch.
++                MemoryChunk::kPointersFromHereAreInterestingMask, eq, &done);
++
++  // Record the actual write.
++  if (ra_status == kRAHasNotBeenSaved) {
++    push(ra);
++  }
++  CallRecordWriteStub(object, address, remembered_set_action, fp_mode);
++  if (ra_status == kRAHasNotBeenSaved) {
++    pop(ra);
++  }
++
++  bind(&done);
++
++  // Clobber clobbered registers when running with the debug-code flag
++  // turned on to provoke errors.
++  if (emit_debug_code()) {
++    li(address, Operand(bit_cast<int64_t>(kZapValue + 12)));
++    li(value, Operand(bit_cast<int64_t>(kZapValue + 16)));
++  }
++}
++
++// ---------------------------------------------------------------------------
++// Instruction macros.
++
++void TurboAssembler::Add_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    add_w(rd, rj, rk.rm());
++  } else {
++    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      addi_w(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      add_w(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Add_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    add_d(rd, rj, rk.rm());
++  } else {
++    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      addi_d(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      add_d(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Sub_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    sub_w(rd, rj, rk.rm());
++  } else {
++    DCHECK(is_int32(rk.immediate()));
++    if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) {
++      addi_w(rd, rj,
++             static_cast<int32_t>(
++                 -rk.immediate()));  // No subi_w instr, use addi_w(x, y, -imm).
++    } else {
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      if (-rk.immediate() >> 12 == 0 && !MustUseReg(rk.rmode())) {
++        // Use load -imm and addu when loading -imm generates one instruction.
++        li(scratch, -rk.immediate());
++        add_w(rd, rj, scratch);
++      } else {
++        // li handles the relocation.
++        li(scratch, rk);
++        sub_w(rd, rj, scratch);
++      }
++    }
++  }
++}
++
++void TurboAssembler::Sub_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    sub_d(rd, rj, rk.rm());
++  } else if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) {
++    addi_d(rd, rj,
++           static_cast<int32_t>(
++               -rk.immediate()));  // No subi_d instr, use addi_d(x, y, -imm).
++  } else {
++    DCHECK(rj != t7);
++    int li_count = InstrCountForLi64Bit(rk.immediate());
++    int li_neg_count = InstrCountForLi64Bit(-rk.immediate());
++    if (li_neg_count < li_count && !MustUseReg(rk.rmode())) {
++      // Use load -imm and add_d when loading -imm generates one instruction.
++      DCHECK(rk.immediate() != std::numeric_limits<int32_t>::min());
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      li(scratch, Operand(-rk.immediate()));
++      add_d(rd, rj, scratch);
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      li(scratch, rk);
++      sub_d(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Mul_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mul_w(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mul_w(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mulh_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mulh_w(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mulh_w(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mulh_wu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mulh_wu(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mulh_wu(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mul_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mul_d(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mul_d(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mulh_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mulh_d(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mulh_d(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Div_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    div_w(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    div_w(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mod_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mod_w(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mod_w(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mod_wu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mod_wu(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mod_wu(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Div_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    div_d(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    div_d(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Div_wu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    div_wu(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    div_wu(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Div_du(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    div_du(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    div_du(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mod_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mod_d(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mod_d(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Mod_du(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    mod_du(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    mod_du(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::And(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    and_(rd, rj, rk.rm());
++  } else {
++    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      andi(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      and_(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Or(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    or_(rd, rj, rk.rm());
++  } else {
++    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      ori(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      or_(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Xor(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    xor_(rd, rj, rk.rm());
++  } else {
++    if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      xori(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      Register scratch = temps.Acquire();
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      xor_(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Nor(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    nor(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    nor(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Andn(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    andn(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    andn(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Orn(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    orn(rd, rj, rk.rm());
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    orn(rd, rj, scratch);
++  }
++}
++
++void TurboAssembler::Neg(Register rj, const Operand& rk) {
++  DCHECK(rk.is_reg());
++  sub_d(rj, zero_reg, rk.rm());
++}
++
++void TurboAssembler::Slt(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    slt(rd, rj, rk.rm());
++  } else {
++    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      slti(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      // TODO why??
++      BlockTrampolinePoolScope block_trampoline_pool(this);
++      Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      slt(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Sltu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    sltu(rd, rj, rk.rm());
++  } else {
++    if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) {
++      sltui(rd, rj, static_cast<int32_t>(rk.immediate()));
++    } else {
++      // li handles the relocation.
++      UseScratchRegisterScope temps(this);
++      BlockTrampolinePoolScope block_trampoline_pool(this);
++      Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++      DCHECK(rj != scratch);
++      li(scratch, rk);
++      sltu(rd, rj, scratch);
++    }
++  }
++}
++
++void TurboAssembler::Sle(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    slt(rd, rk.rm(), rj);
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    slt(rd, scratch, rj);
++  }
++  xori(rd, rd, 1);
++}
++
++void TurboAssembler::Sleu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    sltu(rd, rk.rm(), rj);
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    sltu(rd, scratch, rj);
++  }
++  xori(rd, rd, 1);
++}
++
++void TurboAssembler::Sge(Register rd, Register rj, const Operand& rk) {
++  Slt(rd, rj, rk);
++  xori(rd, rd, 1);
++}
++
++void TurboAssembler::Sgeu(Register rd, Register rj, const Operand& rk) {
++  Sltu(rd, rj, rk);
++  xori(rd, rd, 1);
++}
++
++void TurboAssembler::Sgt(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    slt(rd, rk.rm(), rj);
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    slt(rd, scratch, rj);
++  }
++}
++
++void TurboAssembler::Sgtu(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    sltu(rd, rk.rm(), rj);
++  } else {
++    // li handles the relocation.
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    DCHECK(rj != scratch);
++    li(scratch, rk);
++    sltu(rd, scratch, rj);
++  }
++}
++
++void TurboAssembler::Rotr_w(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    rotr_w(rd, rj, rk.rm());
++  } else {
++    int64_t ror_value = rk.immediate() % 32;
++    if (ror_value < 0) {
++      ror_value += 32;
++    }
++    rotri_w(rd, rj, ror_value);
++  }
++}
++
++void TurboAssembler::Rotr_d(Register rd, Register rj, const Operand& rk) {
++  if (rk.is_reg()) {
++    rotr_d(rd, rj, rk.rm());
++  } else {
++    int64_t dror_value = rk.immediate() % 64;
++    if (dror_value < 0) dror_value += 64;
++    rotri_d(rd, rj, dror_value);
++  }
++}
++
++void MacroAssembler::Pref(int32_t hint, const MemOperand& rj) {
++  // TODO
++  // pref(hint);
++}
++
++void TurboAssembler::Alsl_w(Register rd, Register rj, Register rk, uint8_t sa,
++                            Register scratch) {
++  DCHECK(sa >= 1 && sa <= 31);
++  if (sa <= 4) {
++    alsl_w(rd, rj, rk, sa);
++  } else {
++    Register tmp = rd == rk ? scratch : rd;
++    DCHECK(tmp != rk);
++    slli_w(tmp, rj, sa);
++    add_w(rd, rk, tmp);
++  }
++}
++
++void TurboAssembler::Alsl_d(Register rd, Register rj, Register rk, uint8_t sa,
++                            Register scratch) {
++  DCHECK(sa >= 1 && sa <= 31);
++  if (sa <= 4) {
++    alsl_d(rd, rj, rk, sa);
++  } else {
++    Register tmp = rd == rk ? scratch : rd;
++    DCHECK(tmp != rk);
++    slli_d(tmp, rj, sa);
++    add_d(rd, rk, tmp);
++  }
++}
++
++// ------------Pseudo-instructions-------------
++
++// Change endianness
++void TurboAssembler::ByteSwapSigned(Register dest, Register src,
++                                    int operand_size) {
++  DCHECK(operand_size == 2 || operand_size == 4 || operand_size == 8);
++  if (operand_size == 2) {
++    revb_2h(dest, src);
++    ext_w_h(dest, dest);
++  } else if (operand_size == 4) {
++    revb_2w(dest, src);
++    slli_w(dest, dest, 0);
++  } else {
++    revb_d(dest, dest);
++  }
++}
++
++void TurboAssembler::ByteSwapUnsigned(Register dest, Register src,
++                                      int operand_size) {
++  DCHECK(operand_size == 2 || operand_size == 4);
++  if (operand_size == 2) {
++    revb_2h(dest, src);
++    bstrins_d(dest, zero_reg, 63, 16);
++  } else {
++    revb_2w(dest, src);
++    bstrins_d(dest, zero_reg, 63, 32);
++  }
++}
++
++void TurboAssembler::Ld_b(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_b(rd, source.base(), source.index());
++  } else {
++    ld_b(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_bu(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_bu(rd, source.base(), source.index());
++  } else {
++    ld_bu(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::St_b(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    stx_b(rd, source.base(), source.index());
++  } else {
++    st_b(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_h(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_h(rd, source.base(), source.index());
++  } else {
++    ld_h(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_hu(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_hu(rd, source.base(), source.index());
++  } else {
++    ld_hu(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::St_h(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    stx_h(rd, source.base(), source.index());
++  } else {
++    st_h(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_w(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);  // TODO ldptr_w ??
++  if (source.hasIndexReg()) {
++    ldx_w(rd, source.base(), source.index());
++  } else {
++    ld_w(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_wu(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_wu(rd, source.base(), source.index());
++  } else {
++    ld_wu(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::St_w(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    stx_w(rd, source.base(), source.index());
++  } else {
++    st_w(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Ld_d(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    ldx_d(rd, source.base(), source.index());
++  } else {
++    ld_d(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::St_d(Register rd, const MemOperand& rj) {
++  MemOperand source = rj;
++  AdjustBaseAndOffset(&source);
++  if (source.hasIndexReg()) {
++    stx_d(rd, source.base(), source.index());
++  } else {
++    st_d(rd, source.base(), source.offset());
++  }
++}
++
++void TurboAssembler::Fld_s(FPURegister fd, const MemOperand& src) {
++  MemOperand tmp = src;
++  AdjustBaseAndOffset(&tmp);
++  if (tmp.hasIndexReg()) {
++    fldx_s(fd, tmp.base(), tmp.index());
++  } else {
++    fld_s(fd, tmp.base(), tmp.offset());
++  }
++}
++
++void TurboAssembler::Fst_s(FPURegister fs, const MemOperand& src) {
++  MemOperand tmp = src;
++  AdjustBaseAndOffset(&tmp);
++  if (tmp.hasIndexReg()) {
++    fstx_s(fs, tmp.base(), tmp.index());
++  } else {
++    fst_s(fs, tmp.base(), tmp.offset());
++  }
++}
++
++void TurboAssembler::Fld_d(FPURegister fd, const MemOperand& src) {
++  MemOperand tmp = src;
++  AdjustBaseAndOffset(&tmp);
++  if (tmp.hasIndexReg()) {
++    fldx_d(fd, tmp.base(), tmp.index());
++  } else {
++    fld_d(fd, tmp.base(), tmp.offset());
++  }
++}
++
++void TurboAssembler::Fst_d(FPURegister fs, const MemOperand& src) {
++  MemOperand tmp = src;
++  AdjustBaseAndOffset(&tmp);
++  if (tmp.hasIndexReg()) {
++    fstx_d(fs, tmp.base(), tmp.index());
++  } else {
++    fst_d(fs, tmp.base(), tmp.offset());
++  }
++}
++
++void TurboAssembler::Ll_w(Register rd, const MemOperand& rj) {
++  DCHECK(!rj.hasIndexReg());
++  bool is_one_instruction = is_int14(rj.offset());
++  if (is_one_instruction) {
++    ll_w(rd, rj.base(), rj.offset());
++  } else {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, rj.offset());
++    add_d(scratch, scratch, rj.base());
++    ll_w(rd, scratch, 0);
++  }
++}
++
++void TurboAssembler::Ll_d(Register rd, const MemOperand& rj) {
++  DCHECK(!rj.hasIndexReg());
++  bool is_one_instruction = is_int14(rj.offset());
++  if (is_one_instruction) {
++    ll_d(rd, rj.base(), rj.offset());
++  } else {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, rj.offset());
++    add_d(scratch, scratch, rj.base());
++    ll_d(rd, scratch, 0);
++  }
++}
++
++void TurboAssembler::Sc_w(Register rd, const MemOperand& rj) {
++  DCHECK(!rj.hasIndexReg());
++  bool is_one_instruction = is_int14(rj.offset());
++  if (is_one_instruction) {
++    sc_w(rd, rj.base(), rj.offset());
++  } else {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, rj.offset());
++    add_d(scratch, scratch, rj.base());
++    sc_w(rd, scratch, 0);
++  }
++}
++
++void TurboAssembler::Sc_d(Register rd, const MemOperand& rj) {
++  DCHECK(!rj.hasIndexReg());
++  bool is_one_instruction = is_int14(rj.offset());
++  if (is_one_instruction) {
++    sc_d(rd, rj.base(), rj.offset());
++  } else {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, rj.offset());
++    add_d(scratch, scratch, rj.base());
++    sc_d(rd, scratch, 0);
++  }
++}
++
++void TurboAssembler::li(Register dst, Handle<HeapObject> value, LiFlags mode) {
++  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
++  // non-isolate-independent code. In many cases it might be cheaper than
++  // embedding the relocatable value.
++  if (root_array_available_ && options().isolate_independent_code) {
++    IndirectLoadConstant(dst, value);
++    return;
++  }
++  li(dst, Operand(value), mode);
++}
++
++void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) {
++  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
++  // non-isolate-independent code. In many cases it might be cheaper than
++  // embedding the relocatable value.
++  if (root_array_available_ && options().isolate_independent_code) {
++    IndirectLoadExternalReference(dst, value);
++    return;
++  }
++  li(dst, Operand(value), mode);
++}
++
++void TurboAssembler::li(Register dst, const StringConstantBase* string,
++                        LiFlags mode) {
++  li(dst, Operand::EmbeddedStringConstant(string), mode);
++}
++
++static inline int InstrCountForLiLower32Bit(int64_t value) {
++  if (is_int12(static_cast<int32_t>(value)) ||
++      is_uint12(static_cast<int32_t>(value)) || !(value & kImm12Mask)) {
++    return 1;
++  } else {
++    return 2;
++  }
++}
++
++void TurboAssembler::LiLower32BitHelper(Register rd, Operand j) {
++  if (is_int12(static_cast<int32_t>(j.immediate()))) {
++    addi_d(rd, zero_reg, j.immediate());
++  } else if (is_uint12(static_cast<int32_t>(j.immediate()))) {
++    ori(rd, zero_reg, j.immediate() & kImm12Mask);
++  } else {
++    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
++    if (j.immediate() & kImm12Mask) {
++      ori(rd, rd, j.immediate() & kImm12Mask);
++    }
++  }
++}
++
++int TurboAssembler::InstrCountForLi64Bit(int64_t value) {
++  if (is_int32(value)) {
++    return InstrCountForLiLower32Bit(value);
++  } else if (is_int52(value)) {
++    return InstrCountForLiLower32Bit(value) + 1;
++  } else if ((value & 0xffffffffL) == 0) {
++    // 32 LSBs (Least Significant Bits) all set to zero.
++    uint8_t tzc = base::bits::CountTrailingZeros32(value >> 32);
++    uint8_t lzc = base::bits::CountLeadingZeros32(value >> 32);
++    if (tzc >= 20) {
++      return 1;
++    } else if (tzc + lzc > 12) {
++      return 2;
++    } else {
++      return 3;
++    }
++  } else {
++    int64_t imm21 = (value >> 31) & 0x1fffffL;
++    if (imm21 != 0x1fffffL && imm21 != 0) {
++      return InstrCountForLiLower32Bit(value) + 2;
++    } else {
++      return InstrCountForLiLower32Bit(value) + 1;
++    }
++  }
++  UNREACHABLE();
++  return INT_MAX;
++}
++
++// All changes to if...else conditions here must be added to
++// InstrCountForLi64Bit as well.
++void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
++  DCHECK(!j.is_reg());
++  DCHECK(!MustUseReg(j.rmode()));
++  DCHECK(mode == OPTIMIZE_SIZE);
++  int64_t imm = j.immediate();
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Normal load of an immediate value which does not need Relocation Info.
++  if (is_int32(imm)) {
++    LiLower32BitHelper(rd, j);
++  } else if (is_int52(imm)) {
++    LiLower32BitHelper(rd, j);
++    lu32i_d(rd, imm >> 32 & 0xfffff);
++  } else if ((imm & 0xffffffffL) == 0) {
++    // 32 LSBs (Least Significant Bits) all set to zero.
++    uint8_t tzc = base::bits::CountTrailingZeros32(imm >> 32);
++    uint8_t lzc = base::bits::CountLeadingZeros32(imm >> 32);
++    if (tzc >= 20) {
++      lu52i_d(rd, zero_reg, imm >> 52 & kImm12Mask);
++    } else if (tzc + lzc > 12) {
++      int32_t mask = (1 << (32 - tzc)) - 1;
++      lu12i_w(rd, imm >> (tzc + 32) & mask);
++      slli_d(rd, rd, tzc + 20);
++    } else {
++      xor_(rd, rd, rd);
++      lu32i_d(rd, imm >> 32 & 0xfffff);
++      lu52i_d(rd, rd, imm >> 52 & kImm12Mask);
++    }
++  } else {
++    int64_t imm21 = (imm >> 31) & 0x1fffffL;
++    LiLower32BitHelper(rd, j);
++    if (imm21 != 0x1fffffL && imm21 != 0) lu32i_d(rd, imm >> 32 & 0xfffff);
++    lu52i_d(rd, rd, imm >> 52 & kImm12Mask);
++  }
++}
++
++void TurboAssembler::li(Register rd, Operand j, LiFlags mode) {
++  DCHECK(!j.is_reg());
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) {
++    li_optimized(rd, j, mode);
++  } else if (MustUseReg(j.rmode())) {
++    int64_t immediate;
++    if (j.IsHeapObjectRequest()) {
++      RequestHeapObject(j.heap_object_request());
++      immediate = 0;
++    } else {
++      immediate = j.immediate();
++    }
++
++    RecordRelocInfo(j.rmode(), immediate);
++    lu12i_w(rd, immediate >> 12 & 0xfffff);
++    ori(rd, rd, immediate & kImm12Mask);
++    lu32i_d(rd, immediate >> 32 & 0xfffff);
++  } else if (mode == ADDRESS_LOAD) {
++    // We always need the same number of instructions as we may need to patch
++    // this code to load another value which may need all 3 instructions.
++    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
++    ori(rd, rd, j.immediate() & kImm12Mask);
++    lu32i_d(rd, j.immediate() >> 32 & 0xfffff);
++  } else {  // mode == CONSTANT_SIZE - always emit the same instruction
++            // sequence.
++    lu12i_w(rd, j.immediate() >> 12 & 0xfffff);
++    ori(rd, rd, j.immediate() & kImm12Mask);
++    lu32i_d(rd, j.immediate() >> 32 & 0xfffff);
++    lu52i_d(rd, rd, j.immediate() >> 52 & kImm12Mask);
++  }
++}
++
++void TurboAssembler::MultiPush(RegList regs) {
++  int16_t stack_offset = 0;
++
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPush(RegList regs1, RegList regs2) {
++  DCHECK_EQ(regs1 & regs2, 0);
++  int16_t stack_offset = 0;
++
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs1 & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs2 & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPush(RegList regs1, RegList regs2, RegList regs3) {
++  DCHECK_EQ(regs1 & regs2, 0);
++  DCHECK_EQ(regs1 & regs3, 0);
++  DCHECK_EQ(regs2 & regs3, 0);
++  int16_t stack_offset = 0;
++
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs1 & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs2 & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs3 & (1 << i)) != 0) {
++      stack_offset -= kPointerSize;
++      St_d(ToRegister(i), MemOperand(sp, stack_offset));
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPop(RegList regs) {
++  int16_t stack_offset = 0;
++
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPop(RegList regs1, RegList regs2) {
++  DCHECK_EQ(regs1 & regs2, 0);
++  int16_t stack_offset = 0;
++
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs2 & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs1 & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPop(RegList regs1, RegList regs2, RegList regs3) {
++  DCHECK_EQ(regs1 & regs2, 0);
++  DCHECK_EQ(regs1 & regs3, 0);
++  DCHECK_EQ(regs2 & regs3, 0);
++  int16_t stack_offset = 0;
++
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs3 & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs2 & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs1 & (1 << i)) != 0) {
++      Ld_d(ToRegister(i), MemOperand(sp, stack_offset));
++      stack_offset += kPointerSize;
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::MultiPushFPU(RegList regs) {
++  int16_t num_to_push = base::bits::CountPopulation(regs);
++  int16_t stack_offset = num_to_push * kDoubleSize;
++
++  Sub_d(sp, sp, Operand(stack_offset));
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    if ((regs & (1 << i)) != 0) {
++      stack_offset -= kDoubleSize;
++      Fst_d(FPURegister::from_code(i), MemOperand(sp, stack_offset));
++    }
++  }
++}
++
++void TurboAssembler::MultiPopFPU(RegList regs) {
++  int16_t stack_offset = 0;
++
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    if ((regs & (1 << i)) != 0) {
++      Fld_d(FPURegister::from_code(i), MemOperand(sp, stack_offset));
++      stack_offset += kDoubleSize;
++    }
++  }
++  addi_d(sp, sp, stack_offset);
++}
++
++void TurboAssembler::Bstrpick_w(Register rk, Register rj, uint16_t msbw,
++                                uint16_t lsbw) {
++  DCHECK_LT(lsbw, msbw);
++  DCHECK_LT(lsbw, 32);
++  DCHECK_LT(msbw, 32);
++  bstrpick_w(rk, rj, msbw, lsbw);
++}
++
++void TurboAssembler::Bstrpick_d(Register rk, Register rj, uint16_t msbw,
++                                uint16_t lsbw) {
++  DCHECK_LT(lsbw, msbw);
++  DCHECK_LT(lsbw, 64);
++  DCHECK_LT(msbw, 64);
++  bstrpick_d(rk, rj, msbw, lsbw);
++}
++
++void TurboAssembler::Neg_s(FPURegister fd, FPURegister fj) { fneg_s(fd, fj); }
++
++void TurboAssembler::Neg_d(FPURegister fd, FPURegister fj) { fneg_d(fd, fj); }
++
++void TurboAssembler::Ffint_d_uw(FPURegister fd, FPURegister fj) {
++  // Move the data from fs to t8.
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  movfr2gr_s(t8, fj);
++  Ffint_d_uw(fd, t8);
++}
++
++void TurboAssembler::Ffint_d_uw(FPURegister fd, Register rj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++
++  // Convert rj to a FP value in fd.
++  DCHECK(rj != t7);
++
++  // Zero extend int32 in rj.
++  Bstrpick_d(t7, rj, 31, 0);
++  movgr2fr_d(fd, t7);
++  ffint_d_l(fd, fd);
++}
++
++void TurboAssembler::Ffint_d_ul(FPURegister fd, FPURegister fj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Move the data from fs to t8.
++  movfr2gr_d(t8, fj);
++  Ffint_d_ul(fd, t8);
++}
++
++void TurboAssembler::Ffint_d_ul(FPURegister fd, Register rj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Convert rj to a FP value in fd.
++
++  DCHECK(rj != t7);
++
++  Label msb_clear, conversion_done;
++
++  Branch(&msb_clear, ge, rj, Operand(zero_reg));
++
++  // Rj >= 2^63
++  andi(t7, rj, 1);
++  srli_d(rj, rj, 1);
++  or_(t7, t7, rj);
++  movgr2fr_d(fd, t7);
++  ffint_d_l(fd, fd);
++  fadd_d(fd, fd, fd);
++  Branch(&conversion_done);
++
++  bind(&msb_clear);
++  // Rs < 2^63, we can do simple conversion.
++  movgr2fr_d(fd, rj);
++  ffint_d_l(fd, fd);
++
++  bind(&conversion_done);
++}
++
++void TurboAssembler::Ffint_s_uw(FPURegister fd, FPURegister fj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Move the data from fs to t8.
++  movfr2gr_d(t8, fj);
++  Ffint_s_uw(fd, t8);
++}
++
++void TurboAssembler::Ffint_s_uw(FPURegister fd, Register rj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Convert rj to a FP value in fd.
++  DCHECK(rj != t7);
++
++  // Zero extend int32 in rj.
++  bstrpick_d(t7, rj, 31, 0);
++  movgr2fr_d(fd, t7);
++  ffint_s_l(fd, fd);
++}
++
++void TurboAssembler::Ffint_s_ul(FPURegister fd, FPURegister fj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Move the data from fs to t8.
++  movfr2gr_d(t8, fj);
++  Ffint_s_ul(fd, t8);
++}
++
++void TurboAssembler::Ffint_s_ul(FPURegister fd, Register rj) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Convert rj to a FP value in fd.
++
++  DCHECK(rj != t7);
++
++  Label positive, conversion_done;
++
++  Branch(&positive, ge, rj, Operand(zero_reg));
++
++  // Rs >= 2^31.
++  andi(t7, rj, 1);
++  srli_d(rj, rj, 1);
++  or_(t7, t7, rj);
++  movgr2fr_d(fd, t7);
++  ffint_s_l(fd, fd);
++  fadd_s(fd, fd, fd);
++  Branch(&conversion_done);
++
++  bind(&positive);
++  // Rs < 2^31, we can do simple conversion.
++  movgr2fr_d(fd, rj);
++  ffint_s_l(fd, fd);
++
++  bind(&conversion_done);
++}
++
++void MacroAssembler::Ftintrne_l_d(FPURegister fd, FPURegister fj) {
++  ftintrne_l_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrm_l_d(FPURegister fd, FPURegister fj) {
++  ftintrm_l_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrp_l_d(FPURegister fd, FPURegister fj) {
++  ftintrp_l_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrz_l_d(FPURegister fd, FPURegister fj) {
++  ftintrz_l_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrz_l_ud(FPURegister fd, FPURegister fj,
++                                  FPURegister scratch) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Load to GPR.
++  movfr2gr_d(t8, fj);
++  // Reset sign bit.
++  {
++    UseScratchRegisterScope temps(this);
++    Register scratch1 = temps.Acquire();
++    li(scratch1, 0x7FFFFFFFFFFFFFFFl);
++    and_(t8, t8, scratch1);
++  }
++  movgr2fr_d(scratch, t8);
++  Ftintrz_l_d(fd, scratch);
++}
++
++void TurboAssembler::Ftintrz_uw_d(FPURegister fd, FPURegister fj,
++                                  FPURegister scratch) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Ftintrz_uw_d(t8, fj, scratch);
++  movgr2fr_w(fd, t8);
++}
++
++void TurboAssembler::Ftintrz_uw_s(FPURegister fd, FPURegister fj,
++                                  FPURegister scratch) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Ftintrz_uw_s(t8, fj, scratch);
++  movgr2fr_w(fd, t8);
++}
++
++void TurboAssembler::Ftintrz_ul_d(FPURegister fd, FPURegister fj,
++                                  FPURegister scratch, Register result) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Ftintrz_ul_d(t8, fj, scratch, result);
++  movgr2fr_d(fd, t8);
++}
++
++void TurboAssembler::Ftintrz_ul_s(FPURegister fd, FPURegister fj,
++                                  FPURegister scratch, Register result) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Ftintrz_ul_s(t8, fj, scratch, result);
++  movgr2fr_d(fd, t8);
++}
++
++void MacroAssembler::Ftintrz_w_d(FPURegister fd, FPURegister fj) {
++  ftintrz_w_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrne_w_d(FPURegister fd, FPURegister fj) {
++  ftintrne_w_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrm_w_d(FPURegister fd, FPURegister fj) {
++  ftintrm_w_d(fd, fj);
++}
++
++void MacroAssembler::Ftintrp_w_d(FPURegister fd, FPURegister fj) {
++  ftintrp_w_d(fd, fj);
++}
++
++void TurboAssembler::Ftintrz_uw_d(Register rd, FPURegister fj,
++                                  FPURegister scratch) {
++  DCHECK(fj != scratch);
++  DCHECK(rd != t7);
++
++  {
++    // Load 2^31 into scratch as its float representation.
++    UseScratchRegisterScope temps(this);
++    Register scratch1 = temps.Acquire();
++    li(scratch1, 0x41E00000);
++    movgr2fr_w(scratch, zero_reg);
++    movgr2frh_w(scratch, scratch1);
++  }
++  // Test if scratch > fd.
++  // If fd < 2^31 we can convert it normally.
++  Label simple_convert;
++  CompareF64(fj, scratch, CLT);
++  BranchTrueShortF(&simple_convert);
++
++  // First we subtract 2^31 from fd, then trunc it to rs
++  // and add 2^31 to rj.
++  fsub_d(scratch, fj, scratch);
++  ftintrz_w_d(scratch, scratch);
++  movfr2gr_s(rd, scratch);
++  Or(rd, rd, 1 << 31);
++
++  Label done;
++  Branch(&done);
++  // Simple conversion.
++  bind(&simple_convert);
++  ftintrz_w_d(scratch, fj);
++  movfr2gr_s(rd, scratch);
++
++  bind(&done);
++}
++
++void TurboAssembler::Ftintrz_uw_s(Register rd, FPURegister fj,
++                                  FPURegister scratch) {
++  DCHECK(fj != scratch);
++  DCHECK(rd != t7);
++  {
++    // Load 2^31 into scratch as its float representation.
++    UseScratchRegisterScope temps(this);
++    Register scratch1 = temps.Acquire();
++    li(scratch1, 0x4F000000);
++    movgr2fr_w(scratch, scratch1);
++  }
++  // Test if scratch > fs.
++  // If fs < 2^31 we can convert it normally.
++  Label simple_convert;
++  CompareF32(fj, scratch, CLT);
++  BranchTrueShortF(&simple_convert);
++
++  // First we subtract 2^31 from fs, then trunc it to rd
++  // and add 2^31 to rd.
++  fsub_s(scratch, fj, scratch);
++  ftintrz_w_s(scratch, scratch);
++  movfr2gr_s(rd, scratch);
++  Or(rd, rd, 1 << 31);
++
++  Label done;
++  Branch(&done);
++  // Simple conversion.
++  bind(&simple_convert);
++  ftintrz_w_s(scratch, fj);
++  movfr2gr_s(rd, scratch);
++
++  bind(&done);
++}
++
++void TurboAssembler::Ftintrz_ul_d(Register rd, FPURegister fj,
++                                  FPURegister scratch, Register result) {
++  DCHECK(fj != scratch);
++  DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7));
++
++  Label simple_convert, done, fail;
++  if (result.is_valid()) {
++    mov(result, zero_reg);
++    Move(scratch, -1.0);
++    // If fd =< -1 or unordered, then the conversion fails.
++    CompareF64(fj, scratch, CLE);
++    BranchTrueShortF(&fail);
++    CompareIsNanF64(fj, scratch);
++    BranchTrueShortF(&fail);
++  }
++
++  // Load 2^63 into scratch as its double representation.
++  li(t7, 0x43E0000000000000);
++  movgr2fr_d(scratch, t7);
++
++  // Test if scratch > fs.
++  // If fs < 2^63 we can convert it normally.
++  CompareF64(fj, scratch, CLT);
++  BranchTrueShortF(&simple_convert);
++
++  // First we subtract 2^63 from fs, then trunc it to rd
++  // and add 2^63 to rd.
++  fsub_d(scratch, fj, scratch);
++  ftintrz_l_d(scratch, scratch);
++  movfr2gr_d(rd, scratch);
++  Or(rd, rd, Operand(1UL << 63));
++  Branch(&done);
++
++  // Simple conversion.
++  bind(&simple_convert);
++  ftintrz_l_d(scratch, fj);
++  movfr2gr_d(rd, scratch);
++
++  bind(&done);
++  if (result.is_valid()) {
++    // Conversion is failed if the result is negative.
++    {
++      UseScratchRegisterScope temps(this);
++      Register scratch1 = temps.Acquire();
++      addi_d(scratch1, zero_reg, -1);
++      srli_d(scratch1, scratch1, 1);  // Load 2^62.
++      movfr2gr_d(result, scratch);
++      xor_(result, result, scratch1);
++    }
++    Slt(result, zero_reg, result);
++  }
++
++  bind(&fail);
++}
++
++void TurboAssembler::Ftintrz_ul_s(Register rd, FPURegister fj,
++                                  FPURegister scratch, Register result) {
++  DCHECK(fj != scratch);
++  DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7));
++
++  Label simple_convert, done, fail;
++  if (result.is_valid()) {
++    mov(result, zero_reg);
++    Move(scratch, -1.0f);
++    // If fd =< -1 or unordered, then the conversion fails.
++    CompareF32(fj, scratch, CLE);
++    BranchTrueShortF(&fail);
++    CompareIsNanF32(fj, scratch);
++    BranchTrueShortF(&fail);
++  }
++
++  {
++    // Load 2^63 into scratch as its float representation.
++    UseScratchRegisterScope temps(this);
++    Register scratch1 = temps.Acquire();
++    li(scratch1, 0x5F000000);
++    movgr2fr_w(scratch, scratch1);
++  }
++
++  // Test if scratch > fs.
++  // If fs < 2^63 we can convert it normally.
++  CompareF32(fj, scratch, CLT);
++  BranchTrueShortF(&simple_convert);
++
++  // First we subtract 2^63 from fs, then trunc it to rd
++  // and add 2^63 to rd.
++  fsub_s(scratch, fj, scratch);
++  ftintrz_l_s(scratch, scratch);
++  movfr2gr_d(rd, scratch);
++  Or(rd, rd, Operand(1UL << 63));
++  Branch(&done);
++
++  // Simple conversion.
++  bind(&simple_convert);
++  ftintrz_l_s(scratch, fj);
++  movfr2gr_d(rd, scratch);
++
++  bind(&done);
++  if (result.is_valid()) {
++    // Conversion is failed if the result is negative or unordered.
++    {
++      UseScratchRegisterScope temps(this);
++      Register scratch1 = temps.Acquire();
++      addi_d(scratch1, zero_reg, -1);
++      srli_d(scratch1, scratch1, 1);  // Load 2^62.
++      movfr2gr_d(result, scratch);
++      xor_(result, result, scratch1);
++    }
++    Slt(result, zero_reg, result);
++  }
++
++  bind(&fail);
++}
++
++void TurboAssembler::RoundDouble(FPURegister dst, FPURegister src,
++                                 FPURoundingMode mode) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = t8;
++  movfcsr2gr(scratch);
++  li(t7, Operand(mode));
++  movgr2fcsr(t7);
++  frint_d(dst, src);
++  movgr2fcsr(scratch);
++}
++
++void TurboAssembler::Floor_d(FPURegister dst, FPURegister src) {
++  RoundDouble(dst, src, mode_floor);
++}
++
++void TurboAssembler::Ceil_d(FPURegister dst, FPURegister src) {
++  RoundDouble(dst, src, mode_ceil);
++}
++
++void TurboAssembler::Trunc_d(FPURegister dst, FPURegister src) {
++  RoundDouble(dst, src, mode_trunc);
++}
++
++void TurboAssembler::Round_d(FPURegister dst, FPURegister src) {
++  RoundDouble(dst, src, mode_round);
++}
++
++void TurboAssembler::RoundFloat(FPURegister dst, FPURegister src,
++                                FPURoundingMode mode) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = t8;
++  movfcsr2gr(scratch);
++  li(t7, Operand(mode));
++  movgr2fcsr(t7);
++  frint_s(dst, src);
++  movgr2fcsr(scratch);
++}
++
++void TurboAssembler::Floor_s(FPURegister dst, FPURegister src) {
++  RoundFloat(dst, src, mode_floor);
++}
++
++void TurboAssembler::Ceil_s(FPURegister dst, FPURegister src) {
++  RoundFloat(dst, src, mode_ceil);
++}
++
++void TurboAssembler::Trunc_s(FPURegister dst, FPURegister src) {
++  RoundFloat(dst, src, mode_trunc);
++}
++
++void TurboAssembler::Round_s(FPURegister dst, FPURegister src) {
++  RoundFloat(dst, src, mode_round);
++}
++
++void TurboAssembler::CompareF(FPURegister cmp1, FPURegister cmp2,
++                              FPUCondition cc, CFRegister cd, bool f32) {
++  if (f32) {
++    fcmp_cond_s(cc, cmp1, cmp2, cd);
++  } else {
++    fcmp_cond_d(cc, cmp1, cmp2, cd);
++  }
++}
++
++void TurboAssembler::CompareIsNanF(FPURegister cmp1, FPURegister cmp2,
++                                   CFRegister cd, bool f32) {
++  CompareF(cmp1, cmp2, CUN, cd, f32);
++}
++
++void TurboAssembler::BranchTrueShortF(Label* target, CFRegister cj) {
++  bcnez(cj, target);
++}
++
++void TurboAssembler::BranchFalseShortF(Label* target, CFRegister cj) {
++  bceqz(cj, target);
++}
++
++void TurboAssembler::BranchTrueF(Label* target, CFRegister cj) {
++  // TODO can be optimzed
++  bool long_branch = target->is_bound()
++                         ? !is_near(target, OffsetSize::kOffset21)
++                         : is_trampoline_emitted();
++  if (long_branch) {
++    Label skip;
++    BranchFalseShortF(&skip, cj);
++    Branch(target);
++    bind(&skip);
++  } else {
++    BranchTrueShortF(target, cj);
++  }
++}
++
++void TurboAssembler::BranchFalseF(Label* target, CFRegister cj) {
++  bool long_branch = target->is_bound()
++                         ? !is_near(target, OffsetSize::kOffset21)
++                         : is_trampoline_emitted();
++  if (long_branch) {
++    Label skip;
++    BranchTrueShortF(&skip, cj);
++    Branch(target);
++    bind(&skip);
++  } else {
++    BranchFalseShortF(target, cj);
++  }
++}
++
++void TurboAssembler::FmoveLow(FPURegister dst, Register src_low) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  DCHECK(src_low != scratch);
++  movfrh2gr_s(scratch, dst);
++  movgr2fr_w(dst, src_low);
++  movgr2frh_w(dst, scratch);
++}
++
++void TurboAssembler::Move(FPURegister dst, uint32_t src) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  li(scratch, Operand(static_cast<int32_t>(src)));
++  movgr2fr_w(dst, scratch);
++}
++
++void TurboAssembler::Move(FPURegister dst, uint64_t src) {
++  // Handle special values first.
++  if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) {
++    fmov_d(dst, kDoubleRegZero);
++  } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) {
++    Neg_d(dst, kDoubleRegZero);
++  } else {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, Operand(static_cast<int64_t>(src)));
++    movgr2fr_d(dst, scratch);
++    if (dst == kDoubleRegZero) has_double_zero_reg_set_ = true;
++  }
++}
++
++void TurboAssembler::Movz(Register rd, Register rj, Register rk) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  maskeqz(scratch, rj, rk);
++  masknez(rd, rd, rk);
++  or_(rd, rd, scratch);
++}
++
++void TurboAssembler::Movn(Register rd, Register rj, Register rk) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  masknez(scratch, rj, rk);
++  maskeqz(rd, rd, rk);
++  or_(rd, rd, scratch);
++}
++
++void TurboAssembler::LoadZeroOnCondition(Register rd, Register rj,
++                                         const Operand& rk, Condition cond) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  switch (cond) {
++    case cc_always:
++      mov(rd, zero_reg);
++      break;
++    case eq:
++      if (rj == zero_reg) {
++        if (rk.is_reg()) {
++          LoadZeroIfConditionZero(rd, rk.rm());
++        } else {
++          if (rk.immediate() == 0) {
++            mov(rd, zero_reg);
++          } else {
++            // nop();
++          }
++        }
++      } else if (IsZero(rk)) {
++        LoadZeroIfConditionZero(rd, rj);
++      } else {
++        Sub_d(t7, rj, rk);
++        LoadZeroIfConditionZero(rd, t7);
++      }
++      break;
++    case ne:
++      if (rj == zero_reg) {
++        if (rk.is_reg()) {
++          LoadZeroIfConditionNotZero(rd, rk.rm());
++        } else {
++          if (rk.immediate() != 0) {
++            mov(rd, zero_reg);
++          } else {
++            // nop();
++          }
++        }
++      } else if (IsZero(rk)) {
++        LoadZeroIfConditionNotZero(rd, rj);
++      } else {
++        Sub_d(t7, rj, rk);
++        LoadZeroIfConditionNotZero(rd, t7);
++      }
++      break;
++
++    // Signed comparison.
++    case greater:
++      Sgt(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      break;
++    case greater_equal:
++      Sge(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj >= rk
++      break;
++    case less:
++      Slt(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj < rk
++      break;
++    case less_equal:
++      Sle(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj <= rk
++      break;
++
++    // Unsigned comparison.
++    case Ugreater:
++      Sgtu(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj > rk
++      break;
++
++    case Ugreater_equal:
++      Sgeu(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj >= rk
++      break;
++    case Uless:
++      Sltu(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj < rk
++      break;
++    case Uless_equal:
++      Sleu(t7, rj, rk);
++      LoadZeroIfConditionNotZero(rd, t7);
++      // rj <= rk
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void TurboAssembler::LoadZeroIfConditionNotZero(Register dest,
++                                                Register condition) {
++  maskeqz(dest, dest, condition);
++}
++
++void TurboAssembler::LoadZeroIfConditionZero(Register dest,
++                                             Register condition) {
++  masknez(dest, dest, condition);
++}
++
++void TurboAssembler::LoadZeroIfFPUCondition(Register dest, CFRegister cc) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  movcf2gr(scratch, cc);
++  LoadZeroIfConditionNotZero(dest, scratch);
++}
++
++void TurboAssembler::LoadZeroIfNotFPUCondition(Register dest, CFRegister cc) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  movcf2gr(scratch, cc);
++  LoadZeroIfConditionZero(dest, scratch);
++}
++
++void TurboAssembler::Clz_w(Register rd, Register rj) { clz_w(rd, rj); }
++
++void TurboAssembler::Clz_d(Register rd, Register rj) { clz_d(rd, rj); }
++
++void TurboAssembler::Ctz_w(Register rd, Register rj) { ctz_w(rd, rj); }
++
++void TurboAssembler::Ctz_d(Register rd, Register rj) { ctz_d(rd, rj); }
++
++// TODO: Optimize like arm64, use simd instruction
++void TurboAssembler::Popcnt_w(Register rd, Register rj) {
++  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
++  //
++  // A generalization of the best bit counting method to integers of
++  // bit-widths up to 128 (parameterized by type T) is this:
++  //
++  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
++  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
++  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
++  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count
++  //
++  // For comparison, for 32-bit quantities, this algorithm can be executed
++  // using 20 MIPS instructions (the calls to LoadConst32() generate two
++  // machine instructions each for the values being used in this algorithm).
++  // A(n unrolled) loop-based algorithm requires 25 instructions.
++  //
++  // For a 64-bit operand this can be performed in 24 instructions compared
++  // to a(n unrolled) loop based algorithm which requires 38 instructions.
++  //
++  // There are algorithms which are faster in the cases where very few
++  // bits are set but the algorithm here attempts to minimize the total
++  // number of instructions executed even when a large number of bits
++  // are set.
++  int32_t B0 = 0x55555555;     // (T)~(T)0/3
++  int32_t B1 = 0x33333333;     // (T)~(T)0/15*3
++  int32_t B2 = 0x0F0F0F0F;     // (T)~(T)0/255*15
++  int32_t value = 0x01010101;  // (T)~(T)0/255
++  uint32_t shift = 24;         // (sizeof(T) - 1) * BITS_PER_BYTE
++
++  UseScratchRegisterScope temps(this);
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = temps.Acquire();
++  Register scratch2 = t8;
++  srli_w(scratch, rj, 1);
++  li(scratch2, B0);
++  And(scratch, scratch, scratch2);
++  Sub_w(scratch, rj, scratch);
++  li(scratch2, B1);
++  And(rd, scratch, scratch2);
++  srli_w(scratch, scratch, 2);
++  And(scratch, scratch, scratch2);
++  Add_w(scratch, rd, scratch);
++  srli_w(rd, scratch, 4);
++  Add_w(rd, rd, scratch);
++  li(scratch2, B2);
++  And(rd, rd, scratch2);
++  li(scratch, value);
++  Mul_w(rd, rd, scratch);
++  srli_w(rd, rd, shift);
++}
++
++void TurboAssembler::Popcnt_d(Register rd, Register rj) {
++  int64_t B0 = 0x5555555555555555l;     // (T)~(T)0/3
++  int64_t B1 = 0x3333333333333333l;     // (T)~(T)0/15*3
++  int64_t B2 = 0x0F0F0F0F0F0F0F0Fl;     // (T)~(T)0/255*15
++  int64_t value = 0x0101010101010101l;  // (T)~(T)0/255
++  uint32_t shift = 56;                  // (sizeof(T) - 1) * BITS_PER_BYTE
++
++  UseScratchRegisterScope temps(this);
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = temps.Acquire();
++  Register scratch2 = t8;
++  srli_d(scratch, rj, 1);
++  li(scratch2, B0);
++  And(scratch, scratch, scratch2);
++  Sub_d(scratch, rj, scratch);
++  li(scratch2, B1);
++  And(rd, scratch, scratch2);
++  srli_d(scratch, scratch, 2);
++  And(scratch, scratch, scratch2);
++  Add_d(scratch, rd, scratch);
++  srli_d(rd, scratch, 4);
++  Add_d(rd, rd, scratch);
++  li(scratch2, B2);
++  And(rd, rd, scratch2);
++  li(scratch, value);
++  Mul_d(rd, rd, scratch);
++  srli_d(rd, rd, shift);
++}
++
++void TurboAssembler::ExtractBits(Register dest, Register source, Register pos,
++                                 int size, bool sign_extend) {
++  sra_d(dest, source, pos);
++  bstrpick_d(dest, dest, size - 1, 0);
++  if (sign_extend) {
++    switch (size) {
++      case 8:
++        ext_w_b(dest, dest);
++        break;
++      case 16:
++        ext_w_h(dest, dest);
++        break;
++      case 32:
++        // sign-extend word
++        slli_w(dest, dest, 0);
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++void TurboAssembler::InsertBits(Register dest, Register source, Register pos,
++                                int size) {
++  Rotr_d(dest, dest, pos);
++  bstrins_d(dest, source, size - 1, 0);
++  {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    Sub_d(scratch, zero_reg, pos);
++    Rotr_d(dest, dest, scratch);
++  }
++}
++
++void MacroAssembler::EmitFPUTruncate(
++    FPURoundingMode rounding_mode, Register result, DoubleRegister double_input,
++    Register scratch, DoubleRegister double_scratch, Register except_flag,
++    CheckForInexactConversion check_inexact) {
++  break_(3);
++}
++
++void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
++                                                DoubleRegister double_input,
++                                                Label* done) {
++  DoubleRegister single_scratch = kScratchDoubleReg.low();
++  UseScratchRegisterScope temps(this);
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = temps.Acquire();
++  Register scratch2 = t7;
++
++  // Clear cumulative exception flags and save the FCSR.
++  /*  movfcsr2gr(scratch2, FCSR);
++    movgr2fcsr(FCSR, zero_reg);
++    // Try a conversion to a signed integer.
++    ftintrz_w_d(single_scratch, double_input);
++    movfr2gr_w(result, single_scratch);
++    // Retrieve and restore the FCSR.
++    movfcsr2gr(scratch, FCSR);
++    movgr2fcsr(FCSR, scratch2);
++    // Check for overflow and NaNs.
++    And(scratch, scratch,
++        kFCSROverflowFlagMask | kFCSRUnderflowFlagMask |
++    kFCSRInvalidOpFlagMask);
++    // If we had no exceptions we are done.
++    Branch(done, eq, scratch, Operand(zero_reg));*/
++
++  CompareIsNanF64(double_input, double_input);
++  Move(result, zero_reg);
++  bcnez(FCC0, done);
++  ftintrz_l_d(single_scratch, double_input);
++  movfr2gr_d(scratch2, single_scratch);
++  li(scratch, 1L << 63);
++  Xor(scratch, scratch, scratch2);
++  rotri_d(scratch2, scratch, 1);
++  movfr2gr_s(result, single_scratch);
++  Branch(done, ne, scratch, Operand(scratch2));
++}
++
++void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
++                                       Register result,
++                                       DoubleRegister double_input,
++                                       StubCallMode stub_mode) {
++  Label done;
++
++  TryInlineTruncateDoubleToI(result, double_input, &done);
++
++  // If we fell through then inline version didn't succeed - call stub instead.
++  Sub_d(sp, sp,
++        Operand(kDoubleSize + kSystemPointerSize));  // Put input on stack.
++  St_d(ra, MemOperand(sp, kSystemPointerSize));
++  Fst_d(double_input, MemOperand(sp, 0));
++
++  if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
++    Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
++  } else {
++    Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
++  }
++
++  Pop(ra, result);
++  bind(&done);
++}
++
++// BRANCH_ARGS_CHECK checks that conditional jump arguments are correct.
++#define BRANCH_ARGS_CHECK(cond, rj, rk)                                  \
++  DCHECK((cond == cc_always && rj == zero_reg && rk.rm() == zero_reg) || \
++         (cond != cc_always && (rj != zero_reg || rk.rm() != zero_reg)))
++
++void TurboAssembler::Branch(Label* L, bool need_link) {
++  int offset = GetOffset(L, OffsetSize::kOffset26);
++  if (need_link) {
++    bl(offset);
++  } else {
++    b(offset);
++  }
++}
++
++void TurboAssembler::Branch(Label* L, Condition cond, Register rj,
++                            const Operand& rk, bool need_link) {
++  if (L->is_bound()) {
++    BRANCH_ARGS_CHECK(cond, rj, rk);
++    if (!BranchShortOrFallback(L, cond, rj, rk, need_link)) {
++      if (cond != cc_always) {
++        Label skip;
++        Condition neg_cond = NegateCondition(cond);
++        BranchShort(&skip, neg_cond, rj, rk, need_link);
++        Branch(L, need_link);
++        bind(&skip);
++      } else {
++        Branch(L);
++      }
++    }
++  } else {
++    if (is_trampoline_emitted()) {
++      if (cond != cc_always) {
++        Label skip;
++        Condition neg_cond = NegateCondition(cond);
++        BranchShort(&skip, neg_cond, rj, rk, need_link);
++        Branch(L, need_link);
++        bind(&skip);
++      } else {
++        Branch(L);
++      }
++    } else {
++      BranchShort(L, cond, rj, rk, need_link);
++    }
++  }
++}
++
++void TurboAssembler::Branch(Label* L, Condition cond, Register rj,
++                            RootIndex index) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  LoadRoot(scratch, index);
++  Branch(L, cond, rj, Operand(scratch));
++}
++
++int32_t TurboAssembler::GetOffset(Label* L, OffsetSize bits) {
++  return branch_offset_helper(L, bits) >> 2;
++}
++
++Register TurboAssembler::GetRkAsRegisterHelper(const Operand& rk,
++                                               Register scratch) {
++  Register r2 = no_reg;
++  if (rk.is_reg()) {
++    r2 = rk.rm();
++  } else {
++    r2 = scratch;
++    li(r2, rk);
++  }
++
++  return r2;
++}
++
++bool TurboAssembler::BranchShortOrFallback(Label* L, Condition cond,
++                                           Register rj, const Operand& rk,
++                                           bool need_link) {
++  UseScratchRegisterScope temps(this);
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register scratch = temps.hasAvailable() ? temps.Acquire() : t8;
++
++  // Be careful to always use shifted_branch_offset only just before the
++  // branch instruction, as the location will be remember for patching the
++  // target.
++  {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    int offset = 0;
++    switch (cond) {
++      case cc_always:
++        if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++        offset = GetOffset(L, OffsetSize::kOffset26);
++        if (need_link) {
++          bl(offset);
++        } else {
++          b(offset);
++        }
++        break;
++      case eq:
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          // beq is used here to make the code patchable. Otherwise b should
++          // be used which has no condition field so is not patchable.
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          beq(rj, rj, offset);
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset21);
++          beqz(rj, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          // We don't want any other register but scratch clobbered.
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          beq(rj, sc, offset);
++        }
++        break;
++      case ne:
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          // bne is used here to make the code patchable. Otherwise we
++          // should not generate any instruction.
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bne(rj, rj, offset);
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset21);
++          bnez(rj, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          // We don't want any other register but scratch clobbered.
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bne(rj, sc, offset);
++        }
++        break;
++
++      // Signed comparison.
++      case greater:
++        // rj > rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          // No code needs to be emitted.
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          blt(zero_reg, rj, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          blt(sc, rj, offset);
++        }
++        break;
++      case greater_equal:
++        // rj >= rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bge(rj, zero_reg, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bge(rj, sc, offset);
++        }
++        break;
++      case less:
++        // rj < rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          // No code needs to be emitted.
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          blt(rj, zero_reg, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          blt(rj, sc, offset);
++        }
++        break;
++      case less_equal:
++        // rj <= rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bge(zero_reg, rj, offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bge(sc, rj, offset);
++        }
++        break;
++
++      // Unsigned comparison.
++      case Ugreater:
++        // rj > rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          // No code needs to be emitted.
++        } else if (rj == zero_reg) {
++          // No code needs to be emitted.
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bltu(sc, rj, offset);
++        }
++        break;
++      case Ugreater_equal:
++        // rj >= rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (IsZero(rk)) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (rj == zero_reg) {
++          // No code needs to be emitted.
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bgeu(rj, sc, offset);
++        }
++        break;
++      case Uless:
++        // rj < rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          // No code needs to be emitted.
++        } else if (IsZero(rk)) {
++          // No code needs to be emitted.
++        } else if (rj == zero_reg) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bltu(rj, sc, offset);
++        }
++        break;
++      case Uless_equal:
++        // rj <= rk
++        if (rk.is_reg() && rj.code() == rk.rm().code()) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (rj == zero_reg) {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false;
++          if (need_link) pcaddi(ra, 2);
++          offset = GetOffset(L, OffsetSize::kOffset26);
++          b(offset);
++        } else if (IsZero(rk)) {
++          // No code needs to be emitted.
++        } else {
++          if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false;
++          if (need_link) pcaddi(ra, 2);
++          Register sc = GetRkAsRegisterHelper(rk, scratch);
++          DCHECK(rj != sc);
++          offset = GetOffset(L, OffsetSize::kOffset16);
++          bgeu(sc, rj, offset);
++        }
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++  return true;
++}
++
++void TurboAssembler::BranchShort(Label* L, Condition cond, Register rj,
++                                 const Operand& rk, bool need_link) {
++  BRANCH_ARGS_CHECK(cond, rj, rk);
++  bool result = BranchShortOrFallback(L, cond, rj, rk, need_link);
++  DCHECK(result);
++  USE(result);
++}
++
++void TurboAssembler::LoadFromConstantsTable(Register destination,
++                                            int constant_index) {
++  DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
++  LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
++  Ld_d(destination,
++       FieldMemOperand(destination, FixedArray::kHeaderSize +
++                                        constant_index * kPointerSize));
++}
++
++void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
++  Ld_d(destination, MemOperand(kRootRegister, offset));
++}
++
++void TurboAssembler::LoadRootRegisterOffset(Register destination,
++                                            intptr_t offset) {
++  if (offset == 0) {
++    Move(destination, kRootRegister);
++  } else {
++    Add_d(destination, kRootRegister, Operand(offset));
++  }
++}
++
++void TurboAssembler::Jump(Register target, Condition cond, Register rj,
++                          const Operand& rk) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  if (cond == cc_always) {
++    jirl(zero_reg, target, 0);
++  } else {
++    BRANCH_ARGS_CHECK(cond, rj, rk);
++    Label skip;
++    Branch(&skip, NegateCondition(cond), rj, rk);
++    jirl(zero_reg, target, 0);
++    bind(&skip);
++  }
++}
++
++void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
++                          Condition cond, Register rj, const Operand& rk) {
++  Label skip;
++  if (cond != cc_always) {
++    Branch(&skip, NegateCondition(cond), rj, rk);
++  }
++  {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    li(t7, Operand(target, rmode));
++    jirl(zero_reg, t7, 0);
++    bind(&skip);
++  }
++}
++
++void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond,
++                          Register rj, const Operand& rk) {
++  DCHECK(!RelocInfo::IsCodeTarget(rmode));
++  Jump(static_cast<intptr_t>(target), rmode, cond, rj, rk);
++}
++
++void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
++                          Condition cond, Register rj, const Operand& rk) {
++  DCHECK(RelocInfo::IsCodeTarget(rmode));
++
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  if (root_array_available_ && options().isolate_independent_code) {
++    IndirectLoadConstant(t7, code);
++    Add_d(t7, t7, Operand(Code::kHeaderSize - kHeapObjectTag));
++    Jump(t7, cond, rj, rk);
++    return;
++  } else if (options().inline_offheap_trampolines) {
++    int builtin_index = Builtins::kNoBuiltinId;
++    if (isolate()->builtins()->IsBuiltinHandle(code, &builtin_index) &&
++        Builtins::IsIsolateIndependent(builtin_index)) {
++      // Inline the trampoline.
++      RecordCommentForOffHeapTrampoline(builtin_index);
++      CHECK_NE(builtin_index, Builtins::kNoBuiltinId);
++      EmbeddedData d = EmbeddedData::FromBlob();
++      Address entry = d.InstructionStartOfBuiltin(builtin_index);
++      li(t7, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
++      Jump(t7, cond, rj, rk);
++      return;
++    }
++  }
++
++  Jump(static_cast<intptr_t>(code.address()), rmode, cond, rj, rk);
++}
++
++void TurboAssembler::Jump(const ExternalReference& reference) {
++  li(t7, reference);
++  Jump(t7);
++}
++
++// Note: To call gcc-compiled C code on loonarch, you must call through t[0-8].
++void TurboAssembler::Call(Register target, Condition cond, Register rj,
++                          const Operand& rk) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  if (cond == cc_always) {
++    jirl(ra, target, 0);
++  } else {
++    BRANCH_ARGS_CHECK(cond, rj, rk);
++    Label skip;
++    Branch(&skip, NegateCondition(cond), rj, rk);
++    jirl(ra, target, 0);
++    bind(&skip);
++  }
++}
++
++void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
++                                     unsigned higher_limit,
++                                     Label* on_in_range) {
++  if (lower_limit != 0) {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    Sub_d(scratch, value, Operand(lower_limit));
++    Branch(on_in_range, ls, scratch, Operand(higher_limit - lower_limit));
++  } else {
++    Branch(on_in_range, ls, value, Operand(higher_limit - lower_limit));
++  }
++}
++
++void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond,
++                          Register rj, const Operand& rk) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  li(t7, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD);
++  Call(t7, cond, rj, rk);
++}
++
++void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
++                          Condition cond, Register rj, const Operand& rk) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++
++  if (root_array_available_ && options().isolate_independent_code) {
++    IndirectLoadConstant(t7, code);
++    Add_d(t7, t7, Operand(Code::kHeaderSize - kHeapObjectTag));
++    Call(t7, cond, rj, rk);
++    return;
++  } else if (options().inline_offheap_trampolines) {
++    int builtin_index = Builtins::kNoBuiltinId;
++    if (isolate()->builtins()->IsBuiltinHandle(code, &builtin_index) &&
++        Builtins::IsIsolateIndependent(builtin_index)) {
++      // Inline the trampoline.
++      RecordCommentForOffHeapTrampoline(builtin_index);
++      CHECK_NE(builtin_index, Builtins::kNoBuiltinId);
++      EmbeddedData d = EmbeddedData::FromBlob();
++      Address entry = d.InstructionStartOfBuiltin(builtin_index);
++      li(t7, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
++      Call(t7, cond, rj, rk);
++      return;
++    }
++  }
++
++  DCHECK(RelocInfo::IsCodeTarget(rmode));
++  DCHECK(code->IsExecutable());
++  Call(code.address(), rmode, cond, rj, rk);
++}
++
++void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) {
++  STATIC_ASSERT(kSystemPointerSize == 8);
++  STATIC_ASSERT(kSmiTagSize == 1);
++  STATIC_ASSERT(kSmiTag == 0);
++
++  // The builtin_index register contains the builtin index as a Smi.
++  SmiUntag(builtin_index, builtin_index);
++  Alsl_d(builtin_index, builtin_index, kRootRegister, kSystemPointerSizeLog2,
++         t7);
++  Ld_d(builtin_index,
++       MemOperand(builtin_index, IsolateData::builtin_entry_table_offset()));
++}
++
++void TurboAssembler::CallBuiltinByIndex(Register builtin_index) {
++  LoadEntryFromBuiltinIndex(builtin_index);
++  Call(builtin_index);
++}
++
++void TurboAssembler::PatchAndJump(Address target) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  pcaddi(scratch, 4);
++  Ld_d(t7, MemOperand(scratch, 0));
++  jirl(zero_reg, t7, 0);
++  nop();
++  DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0);
++  *reinterpret_cast<uint64_t*>(pc_) = target;  // pc_ should be align.
++  pc_ += sizeof(uint64_t);
++}
++
++void TurboAssembler::StoreReturnAddressAndCall(Register target) {
++  // This generates the final instruction sequence for calls to C functions
++  // once an exit frame has been constructed.
++  //
++  // Note that this assumes the caller code (i.e. the Code object currently
++  // being generated) is immovable or that the callee function cannot trigger
++  // GC, since the callee function will return to it.
++
++  Assembler::BlockTrampolinePoolScope block_trampoline_pool(this);
++  static constexpr int kNumInstructionsToJump = 2;
++  Label find_ra;
++  // Adjust the value in ra to point to the correct return location, 2nd
++  // instruction past the real call into C code (the jirl)), and push it.
++  // This is the return address of the exit frame.
++  pcaddi(ra, kNumInstructionsToJump + 1);
++  bind(&find_ra);
++
++  // This spot was reserved in EnterExitFrame.
++  St_d(ra, MemOperand(sp, 0));
++  // Stack is still aligned.
++
++  // TODO can be jirl target? a0 -- a7?
++  jirl(zero_reg, target, 0);
++  // Make sure the stored 'ra' points to this position.
++  DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra));
++}
++
++void TurboAssembler::Ret(Condition cond, Register rj, const Operand& rk) {
++  Jump(ra, cond, rj, rk);
++}
++
++void TurboAssembler::DropAndRet(int drop) {
++  DCHECK(is_int16(drop * kPointerSize));
++  addi_d(sp, sp, drop * kPointerSize);
++  Ret();
++}
++
++void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1,
++                                const Operand& r2) {
++  // Both Drop and Ret need to be conditional.
++  Label skip;
++  if (cond != cc_always) {
++    Branch(&skip, NegateCondition(cond), r1, r2);
++  }
++
++  Drop(drop);
++  Ret();
++
++  if (cond != cc_always) {
++    bind(&skip);
++  }
++}
++
++void TurboAssembler::Drop(int count, Condition cond, Register reg,
++                          const Operand& op) {
++  if (count <= 0) {
++    return;
++  }
++
++  Label skip;
++
++  if (cond != al) {
++    Branch(&skip, NegateCondition(cond), reg, op);
++  }
++
++  Add_d(sp, sp, Operand(count * kPointerSize));
++
++  if (cond != al) {
++    bind(&skip);
++  }
++}
++
++void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) {
++  if (scratch == no_reg) {
++    Xor(reg1, reg1, Operand(reg2));
++    Xor(reg2, reg2, Operand(reg1));
++    Xor(reg1, reg1, Operand(reg2));
++  } else {
++    mov(scratch, reg1);
++    mov(reg1, reg2);
++    mov(reg2, scratch);
++  }
++}
++
++void TurboAssembler::Call(Label* target) { Branch(target, true); }
++
++void TurboAssembler::Push(Smi smi) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  li(scratch, Operand(smi));
++  push(scratch);
++}
++
++void TurboAssembler::Push(Handle<HeapObject> handle) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  li(scratch, Operand(handle));
++  push(scratch);
++}
++
++void MacroAssembler::MaybeDropFrames() {
++  // Check whether we need to drop frames to restart a function on the stack.
++  li(a1, ExternalReference::debug_restart_fp_address(isolate()));
++  Ld_d(a1, MemOperand(a1, 0));
++  Jump(BUILTIN_CODE(isolate(), FrameDropperTrampoline), RelocInfo::CODE_TARGET,
++       ne, a1, Operand(zero_reg));
++}
++
++// ---------------------------------------------------------------------------
++// Exception handling.
++
++void MacroAssembler::PushStackHandler() {
++  // Adjust this code if not the case.
++  STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kPointerSize);
++  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kPointerSize);
++
++  Push(Smi::zero());  // Padding.
++
++  // Link the current handler as the next handler.
++  li(t2,
++     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
++  Ld_d(t1, MemOperand(t2, 0));
++  push(t1);
++
++  // Set this new handler as the current one.
++  St_d(sp, MemOperand(t2, 0));
++}
++
++void MacroAssembler::PopStackHandler() {
++  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
++  pop(a1);
++  Add_d(sp, sp,
++        Operand(
++            static_cast<int64_t>(StackHandlerConstants::kSize - kPointerSize)));
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  li(scratch,
++     ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
++  St_d(a1, MemOperand(scratch, 0));
++}
++
++void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst,
++                                        const DoubleRegister src) {
++  fsub_d(dst, src, kDoubleRegZero);
++}
++
++void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
++  Move(dst, f0);  // Reg f0 is loongarch return value
++}
++
++void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
++  Move(dst, f0);  // Reg f0 is loongarch first argument value.
++}
++
++void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(f0, src); }
++
++void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(f0, src); }
++
++void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
++                                          DoubleRegister src2) {
++  const DoubleRegister fparg2 = f1;
++  if (src2 == f0) {
++    DCHECK(src1 != fparg2);
++    Move(fparg2, src2);
++    Move(f0, src1);
++  } else {
++    Move(f0, src1);
++    Move(fparg2, src2);
++  }
++}
++
++// -----------------------------------------------------------------------------
++// JavaScript invokes.
++
++void TurboAssembler::PrepareForTailCall(Register callee_args_count,
++                                        Register caller_args_count,
++                                        Register scratch0, Register scratch1) {
++  // Calculate the end of destination area where we will put the arguments
++  // after we drop current frame. We add kPointerSize to count the receiver
++  // argument which is not included into formal parameters count.
++  Register dst_reg = scratch0;
++  Alsl_d(dst_reg, caller_args_count, fp, kPointerSizeLog2, t7);
++  Add_d(dst_reg, dst_reg,
++        Operand(StandardFrameConstants::kCallerSPOffset + kPointerSize));
++
++  Register src_reg = caller_args_count;
++  // Calculate the end of source area. +kPointerSize is for the receiver.
++  Alsl_d(src_reg, callee_args_count, sp, kPointerSizeLog2, t7);
++  Add_d(src_reg, src_reg, Operand(kPointerSize));
++
++  if (FLAG_debug_code) {
++    Check(lo, AbortReason::kStackAccessBelowStackPointer, src_reg,
++          Operand(dst_reg));
++  }
++
++  // Restore caller's frame pointer and return address now as they will be
++  // overwritten by the copying loop.
++  Ld_d(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
++  Ld_d(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
++
++  // Now copy callee arguments to the caller frame going backwards to avoid
++  // callee arguments corruption (source and destination areas could overlap).
++
++  // Both src_reg and dst_reg are pointing to the word after the one to copy,
++  // so they must be pre-decremented in the loop.
++  Register tmp_reg = scratch1;
++  Label loop, entry;
++  Branch(&entry);
++  bind(&loop);
++  Sub_d(src_reg, src_reg, Operand(kPointerSize));
++  Sub_d(dst_reg, dst_reg, Operand(kPointerSize));
++  Ld_d(tmp_reg, MemOperand(src_reg, 0));
++  St_d(tmp_reg, MemOperand(dst_reg, 0));
++  bind(&entry);
++  Branch(&loop, ne, sp, Operand(src_reg));
++
++  // Leave current frame.
++  mov(sp, dst_reg);
++}
++
++void MacroAssembler::InvokePrologue(Register expected_parameter_count,
++                                    Register actual_parameter_count,
++                                    Label* done, InvokeFlag flag) {
++  Label regular_invoke;
++
++  // Check whether the expected and actual arguments count match. The registers
++  // are set up according to contract with ArgumentsAdaptorTrampoline:
++  //  a0: actual arguments count
++  //  a1: function (passed through to callee)
++  //  a2: expected arguments count
++
++  // The code below is made a lot easier because the calling code already sets
++  // up actual and expected registers according to the contract.
++
++  DCHECK_EQ(actual_parameter_count, a0);
++  DCHECK_EQ(expected_parameter_count, a2);
++
++  Branch(&regular_invoke, eq, expected_parameter_count,
++         Operand(actual_parameter_count));
++
++  Handle<Code> adaptor = BUILTIN_CODE(isolate(), ArgumentsAdaptorTrampoline);
++  if (flag == CALL_FUNCTION) {
++    Call(adaptor);
++    Branch(done);
++  } else {
++    Jump(adaptor, RelocInfo::CODE_TARGET);
++  }
++
++  bind(&regular_invoke);
++}
++
++void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
++                                    Register expected_parameter_count,
++                                    Register actual_parameter_count) {
++  Label skip_hook;
++
++  li(t0, ExternalReference::debug_hook_on_function_call_address(isolate()));
++  Ld_b(t0, MemOperand(t0, 0));
++  Branch(&skip_hook, eq, t0, Operand(zero_reg));
++
++  {
++    // Load receiver to pass it later to DebugOnFunctionCall hook.
++    Alsl_d(t0, actual_parameter_count, sp, kPointerSizeLog2, t7);
++    Ld_d(t0, MemOperand(t0, 0));
++    FrameScope frame(this,
++                     has_frame() ? StackFrame::NONE : StackFrame::INTERNAL);
++    SmiTag(expected_parameter_count);
++    Push(expected_parameter_count);
++
++    SmiTag(actual_parameter_count);
++    Push(actual_parameter_count);
++
++    if (new_target.is_valid()) {
++      Push(new_target);
++    }
++    // TODO: MultiPush/Pop
++    Push(fun);
++    Push(fun);
++    Push(t0);
++    CallRuntime(Runtime::kDebugOnFunctionCall);
++    Pop(fun);
++    if (new_target.is_valid()) {
++      Pop(new_target);
++    }
++
++    Pop(actual_parameter_count);
++    SmiUntag(actual_parameter_count);
++
++    Pop(expected_parameter_count);
++    SmiUntag(expected_parameter_count);
++  }
++  bind(&skip_hook);
++}
++
++void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
++                                        Register expected_parameter_count,
++                                        Register actual_parameter_count,
++                                        InvokeFlag flag) {
++  // You can't call a function without a valid frame.
++  DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame());
++  DCHECK_EQ(function, a1);
++  DCHECK_IMPLIES(new_target.is_valid(), new_target == a3);
++
++  // On function call, call into the debugger if necessary.
++  CheckDebugHook(function, new_target, expected_parameter_count,
++                 actual_parameter_count);
++
++  // Clear the new.target register if not given.
++  if (!new_target.is_valid()) {
++    LoadRoot(a3, RootIndex::kUndefinedValue);
++  }
++
++  Label done;
++  InvokePrologue(expected_parameter_count, actual_parameter_count, &done, flag);
++  // We call indirectly through the code field in the function to
++  // allow recompilation to take effect without changing any of the
++  // call sites.
++  Register code = kJavaScriptCallCodeStartRegister;
++  Ld_d(code, FieldMemOperand(function, JSFunction::kCodeOffset));
++  if (flag == CALL_FUNCTION) {
++    Add_d(code, code, Operand(Code::kHeaderSize - kHeapObjectTag));
++    Call(code);
++  } else {
++    DCHECK(flag == JUMP_FUNCTION);
++    Add_d(code, code, Operand(Code::kHeaderSize - kHeapObjectTag));
++    Jump(code);
++  }
++
++  // Continue here if InvokePrologue does handle the invocation due to
++  // mismatched parameter counts.
++  bind(&done);
++}
++
++void MacroAssembler::InvokeFunctionWithNewTarget(
++    Register function, Register new_target, Register actual_parameter_count,
++    InvokeFlag flag) {
++  // You can't call a function without a valid frame.
++  DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame());
++
++  // Contract with called JS functions requires that function is passed in a1.
++  DCHECK_EQ(function, a1);
++  Register expected_parameter_count = a2;
++  Register temp_reg = t0;
++  Ld_d(temp_reg, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++  Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
++  // The argument count is stored as uint16_t
++  Ld_hu(expected_parameter_count,
++        FieldMemOperand(temp_reg,
++                        SharedFunctionInfo::kFormalParameterCountOffset));
++
++  InvokeFunctionCode(a1, new_target, expected_parameter_count,
++                     actual_parameter_count, flag);
++}
++
++void MacroAssembler::InvokeFunction(Register function,
++                                    Register expected_parameter_count,
++                                    Register actual_parameter_count,
++                                    InvokeFlag flag) {
++  // You can't call a function without a valid frame.
++  DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame());
++
++  // Contract with called JS functions requires that function is passed in a1.
++  DCHECK_EQ(function, a1);
++
++  // Get the function and setup the context.
++  Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
++
++  InvokeFunctionCode(a1, no_reg, expected_parameter_count,
++                     actual_parameter_count, flag);
++}
++
++// ---------------------------------------------------------------------------
++// Support functions.
++
++void MacroAssembler::GetObjectType(Register object, Register map,
++                                   Register type_reg) {
++  LoadMap(map, object);
++  Ld_hu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
++}
++
++// -----------------------------------------------------------------------------
++// Runtime calls.
++
++void TurboAssembler::AdddOverflow(Register dst, Register left,
++                                  const Operand& right, Register overflow) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register right_reg = no_reg;
++  Register scratch = t8;
++  if (!right.is_reg()) {
++    li(t7, Operand(right));
++    right_reg = t7;
++  } else {
++    right_reg = right.rm();
++  }
++
++  DCHECK(left != scratch && right_reg != scratch && dst != scratch &&
++         overflow != scratch);
++  DCHECK(overflow != left && overflow != right_reg);
++
++  if (dst == left || dst == right_reg) {
++    add_d(scratch, left, right_reg);
++    xor_(overflow, scratch, left);
++    xor_(t7, scratch, right_reg);
++    and_(overflow, overflow, t7);
++    mov(dst, scratch);
++  } else {
++    add_d(dst, left, right_reg);
++    xor_(overflow, dst, left);
++    xor_(t7, dst, right_reg);
++    and_(overflow, overflow, t7);
++  }
++}
++
++void TurboAssembler::SubdOverflow(Register dst, Register left,
++                                  const Operand& right, Register overflow) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register right_reg = no_reg;
++  Register scratch = t8;
++  if (!right.is_reg()) {
++    li(t7, Operand(right));
++    right_reg = t7;
++  } else {
++    right_reg = right.rm();
++  }
++
++  DCHECK(left != scratch && right_reg != scratch && dst != scratch &&
++         overflow != scratch);
++  DCHECK(overflow != left && overflow != right_reg);
++
++  if (dst == left || dst == right_reg) {
++    Sub_d(scratch, left, right_reg);
++    xor_(overflow, left, scratch);
++    xor_(t7, left, right_reg);
++    and_(overflow, overflow, t7);
++    mov(dst, scratch);
++  } else {
++    sub_d(dst, left, right_reg);
++    xor_(overflow, left, dst);
++    xor_(t7, left, right_reg);
++    and_(overflow, overflow, t7);
++  }
++}
++
++void TurboAssembler::MulOverflow(Register dst, Register left,
++                                 const Operand& right, Register overflow) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  Register right_reg = no_reg;
++  Register scratch = t8;
++  if (!right.is_reg()) {
++    li(t7, Operand(right));
++    right_reg = t7;
++  } else {
++    right_reg = right.rm();
++  }
++
++  DCHECK(left != scratch && right_reg != scratch && dst != scratch &&
++         overflow != scratch);
++  DCHECK(overflow != left && overflow != right_reg);
++
++  if (dst == left || dst == right_reg) {
++    Mul_w(scratch, left, right_reg);
++    Mulh_w(overflow, left, right_reg);
++    mov(dst, scratch);
++  } else {
++    Mul_w(dst, left, right_reg);
++    Mulh_w(overflow, left, right_reg);
++  }
++
++  srai_d(scratch, dst, 32);
++  xor_(overflow, overflow, scratch);
++}
++
++void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
++                                 SaveFPRegsMode save_doubles) {
++  // All parameters are on the stack. v0 has the return value after call.
++
++  // If the expected number of arguments of the runtime function is
++  // constant, we check that the actual number of arguments match the
++  // expectation.
++  CHECK(f->nargs < 0 || f->nargs == num_arguments);
++
++  // TODO(1236192): Most runtime routines don't need the number of
++  // arguments passed in because it is constant. At some point we
++  // should remove this need and make the runtime routine entry code
++  // smarter.
++  PrepareCEntryArgs(num_arguments);
++  PrepareCEntryFunction(ExternalReference::Create(f));
++  Handle<Code> code =
++      CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
++  Call(code, RelocInfo::CODE_TARGET);
++}
++
++void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
++  const Runtime::Function* function = Runtime::FunctionForId(fid);
++  DCHECK_EQ(1, function->result_size);
++  if (function->nargs >= 0) {
++    PrepareCEntryArgs(function->nargs);
++  }
++  JumpToExternalReference(ExternalReference::Create(fid));
++}
++
++void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
++                                             bool builtin_exit_frame) {
++  PrepareCEntryFunction(builtin);
++  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, kDontSaveFPRegs,
++                                          kArgvOnStack, builtin_exit_frame);
++  Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg));
++}
++
++void MacroAssembler::JumpToInstructionStream(Address entry) {
++  li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
++  Jump(kOffHeapTrampolineRegister);
++}
++
++void MacroAssembler::LoadWeakValue(Register out, Register in,
++                                   Label* target_if_cleared) {
++  Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32));
++
++  And(out, in, Operand(~kWeakHeapObjectMask));
++}
++
++void MacroAssembler::IncrementCounter(StatsCounter* counter, int value,
++                                      Register scratch1, Register scratch2) {
++  DCHECK_GT(value, 0);
++  if (FLAG_native_code_counters && counter->Enabled()) {
++    // This operation has to be exactly 32-bit wide in case the external
++    // reference table redirects the counter to a uint32_t dummy_stats_counter_
++    // field.
++    li(scratch2, ExternalReference::Create(counter));
++    Ld_w(scratch1, MemOperand(scratch2, 0));
++    Add_w(scratch1, scratch1, Operand(value));
++    St_w(scratch1, MemOperand(scratch2, 0));
++  }
++}
++
++void MacroAssembler::DecrementCounter(StatsCounter* counter, int value,
++                                      Register scratch1, Register scratch2) {
++  DCHECK_GT(value, 0);
++  if (FLAG_native_code_counters && counter->Enabled()) {
++    // This operation has to be exactly 32-bit wide in case the external
++    // reference table redirects the counter to a uint32_t dummy_stats_counter_
++    // field.
++    li(scratch2, ExternalReference::Create(counter));
++    Ld_w(scratch1, MemOperand(scratch2, 0));
++    Sub_w(scratch1, scratch1, Operand(value));
++    St_w(scratch1, MemOperand(scratch2, 0));
++  }
++}
++
++// -----------------------------------------------------------------------------
++// Debugging.
++
++void TurboAssembler::Trap() { stop(); }
++void TurboAssembler::DebugBreak() { stop(); }
++
++void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs,
++                            Operand rk) {
++  if (emit_debug_code()) Check(cc, reason, rs, rk);
++}
++
++void TurboAssembler::Check(Condition cc, AbortReason reason, Register rj,
++                           Operand rk) {
++  Label L;
++  Branch(&L, cc, rj, rk);
++  Abort(reason);
++  // Will not return here.
++  bind(&L);
++}
++
++void TurboAssembler::Abort(AbortReason reason) {
++  Label abort_start;
++  bind(&abort_start);
++#ifdef DEBUG
++  const char* msg = GetAbortReason(reason);
++  RecordComment("Abort message: ");
++  RecordComment(msg);
++#endif
++
++  // Avoid emitting call to builtin if requested.
++  if (trap_on_abort()) {
++    stop();
++    return;
++  }
++
++  if (should_abort_hard()) {
++    // We don't care if we constructed a frame. Just pretend we did.
++    FrameScope assume_frame(this, StackFrame::NONE);
++    PrepareCallCFunction(0, a0);
++    li(a0, Operand(static_cast<int>(reason)));
++    CallCFunction(ExternalReference::abort_with_reason(), 1);
++    return;
++  }
++
++  Move(a0, Smi::FromInt(static_cast<int>(reason)));
++
++  // Disable stub call restrictions to always allow calls to abort.
++  if (!has_frame()) {
++    // We don't actually want to generate a pile of code for this, so just
++    // claim there is a stack frame, without generating one.
++    FrameScope scope(this, StackFrame::NONE);
++    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
++  } else {
++    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
++  }
++  // Will not return here.
++  if (is_trampoline_pool_blocked()) {
++    // If the calling code cares about the exact number of
++    // instructions generated, we insert padding here to keep the size
++    // of the Abort macro constant.
++    // Currently in debug mode with debug_code enabled the number of
++    // generated instructions is 10, so we use this as a maximum value.
++    static const int kExpectedAbortInstructions = 10;
++    int abort_instructions = InstructionsGeneratedSince(&abort_start);
++    DCHECK_LE(abort_instructions, kExpectedAbortInstructions);
++    while (abort_instructions++ < kExpectedAbortInstructions) {
++      nop();
++    }
++  }
++}
++
++void MacroAssembler::LoadMap(Register destination, Register object) {
++  Ld_d(destination, FieldMemOperand(object, HeapObject::kMapOffset));
++}
++
++void MacroAssembler::LoadNativeContextSlot(int index, Register dst) {
++  LoadMap(dst, cp);
++  Ld_d(dst, FieldMemOperand(
++                dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
++  Ld_d(dst, MemOperand(dst, Context::SlotOffset(index)));
++}
++
++void TurboAssembler::StubPrologue(StackFrame::Type type) {
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  li(scratch, Operand(StackFrame::TypeToMarker(type)));
++  PushCommonFrame(scratch);
++}
++
++void TurboAssembler::Prologue() { PushStandardFrame(a1); }
++
++void TurboAssembler::EnterFrame(StackFrame::Type type) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  int stack_offset = -3 * kPointerSize;
++  const int fp_offset = 1 * kPointerSize;
++  addi_d(sp, sp, stack_offset);
++  stack_offset = -stack_offset - kPointerSize;
++  St_d(ra, MemOperand(sp, stack_offset));
++  stack_offset -= kPointerSize;
++  St_d(fp, MemOperand(sp, stack_offset));
++  stack_offset -= kPointerSize;
++  li(t7, Operand(StackFrame::TypeToMarker(type)));
++  St_d(t7, MemOperand(sp, stack_offset));
++  // Adjust FP to point to saved FP.
++  DCHECK_EQ(stack_offset, 0);
++  Add_d(fp, sp, Operand(fp_offset));
++}
++
++void TurboAssembler::LeaveFrame(StackFrame::Type type) {
++  addi_d(sp, fp, 2 * kPointerSize);
++  Ld_d(ra, MemOperand(fp, 1 * kPointerSize));
++  Ld_d(fp, MemOperand(fp, 0 * kPointerSize));
++}
++
++void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
++                                    StackFrame::Type frame_type) {
++  DCHECK(frame_type == StackFrame::EXIT ||
++         frame_type == StackFrame::BUILTIN_EXIT);
++
++  // Set up the frame structure on the stack.
++  STATIC_ASSERT(2 * kPointerSize == ExitFrameConstants::kCallerSPDisplacement);
++  STATIC_ASSERT(1 * kPointerSize == ExitFrameConstants::kCallerPCOffset);
++  STATIC_ASSERT(0 * kPointerSize == ExitFrameConstants::kCallerFPOffset);
++
++  // This is how the stack will look:
++  // fp + 2 (==kCallerSPDisplacement) - old stack's end
++  // [fp + 1 (==kCallerPCOffset)] - saved old ra
++  // [fp + 0 (==kCallerFPOffset)] - saved old fp
++  // [fp - 1 StackFrame::EXIT Smi
++  // [fp - 2 (==kSPOffset)] - sp of the called function
++  // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the
++  //   new stack (will contain saved ra)
++
++  // Save registers and reserve room for saved entry sp.
++  addi_d(sp, sp, -2 * kPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp);
++  St_d(ra, MemOperand(sp, 3 * kPointerSize));
++  St_d(fp, MemOperand(sp, 2 * kPointerSize));
++  {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    li(scratch, Operand(StackFrame::TypeToMarker(frame_type)));
++    St_d(scratch, MemOperand(sp, 1 * kPointerSize));
++  }
++  // Set up new frame pointer.
++  addi_d(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp);
++
++  if (emit_debug_code()) {
++    St_d(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset));
++  }
++
++  {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    // Save the frame pointer and the context in top.
++    li(t8, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
++                                     isolate()));
++    St_d(fp, MemOperand(t8, 0));
++    li(t8,
++       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
++    St_d(cp, MemOperand(t8, 0));
++  }
++
++  const int frame_alignment = MacroAssembler::ActivationFrameAlignment();
++  if (save_doubles) {
++    // The stack is already aligned to 0 modulo 8 for stores with sdc1.
++    int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2;
++    int space = kNumOfSavedRegisters * kDoubleSize;
++    Sub_d(sp, sp, Operand(space));
++    // Remember: we only need to save every 2nd double FPU value.
++    for (int i = 0; i < kNumOfSavedRegisters; i++) {
++      FPURegister reg = FPURegister::from_code(2 * i);
++      Fst_d(reg, MemOperand(sp, i * kDoubleSize));
++    }
++  }
++
++  // Reserve place for the return address, stack space and an optional slot
++  // (used by DirectCEntry to hold the return value if a struct is
++  // returned) and align the frame preparing for calling the runtime function.
++  DCHECK_GE(stack_space, 0);
++  Sub_d(sp, sp, Operand((stack_space + 2) * kPointerSize));
++  if (frame_alignment > 0) {
++    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
++    And(sp, sp, Operand(-frame_alignment));  // Align stack.
++  }
++
++  // Set the exit frame sp value to point just before the return address
++  // location.
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  addi_d(scratch, sp, kPointerSize);
++  St_d(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
++}
++
++void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
++                                    bool do_return,
++                                    bool argument_count_is_length) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  // Optionally restore all double registers.
++  if (save_doubles) {
++    // Remember: we only need to restore every 2nd double FPU value.
++    int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2;
++    Sub_d(t8, fp,
++          Operand(ExitFrameConstants::kFixedFrameSizeFromFp +
++                  kNumOfSavedRegisters * kDoubleSize));
++    for (int i = 0; i < kNumOfSavedRegisters; i++) {
++      FPURegister reg = FPURegister::from_code(2 * i);
++      Fld_d(reg, MemOperand(t8, i * kDoubleSize));
++    }
++  }
++
++  // Clear top frame.
++  li(t8,
++     ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
++  St_d(zero_reg, MemOperand(t8, 0));
++
++  // Restore current context from top and clear it in debug mode.
++  li(t8,
++     ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
++  Ld_d(cp, MemOperand(t8, 0));
++
++#ifdef DEBUG
++  li(t8,
++     ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
++  St_d(a3, MemOperand(t8, 0));
++#endif
++
++  // Pop the arguments, restore registers, and return.
++  mov(sp, fp);  // Respect ABI stack constraint.
++  Ld_d(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
++  Ld_d(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
++
++  if (argument_count.is_valid()) {
++    if (argument_count_is_length) {
++      add_d(sp, sp, argument_count);
++    } else {
++      Alsl_d(sp, argument_count, sp, kPointerSizeLog2, t8);
++    }
++  }
++
++  addi_d(sp, sp, 2 * kPointerSize);
++  if (do_return) {
++    Ret();
++  }
++}
++
++int TurboAssembler::ActivationFrameAlignment() {
++#if V8_HOST_ARCH_LA64
++  // Running on the real platform. Use the alignment as mandated by the local
++  // environment.
++  // Note: This will break if we ever start generating snapshots on one Mips
++  // platform for another Mips platform with a different alignment.
++  return base::OS::ActivationFrameAlignment();
++#else   // V8_HOST_ARCH_LA64
++  // If we are using the simulator then we should always align to the expected
++  // alignment. As the simulator is used to generate snapshots we do not know
++  // if the target platform will need alignment, so this is controlled from a
++  // flag.
++  return FLAG_sim_stack_alignment;
++#endif  // V8_HOST_ARCH_LA64
++}
++
++void MacroAssembler::AssertStackIsAligned() {
++  if (emit_debug_code()) {
++    const int frame_alignment = ActivationFrameAlignment();
++    const int frame_alignment_mask = frame_alignment - 1;
++
++    if (frame_alignment > kPointerSize) {
++      Label alignment_as_expected;
++      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
++      {
++        UseScratchRegisterScope temps(this);
++        Register scratch = temps.Acquire();
++        andi(scratch, sp, frame_alignment_mask);
++        Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg));
++      }
++      // Don't use Check here, as it will call Runtime_Abort re-entering here.
++      stop();
++      bind(&alignment_as_expected);
++    }
++  }
++}
++
++void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
++  if (SmiValuesAre32Bits()) {
++    Ld_w(dst, MemOperand(src.base(), SmiWordOffset(src.offset())));
++  } else {
++    DCHECK(SmiValuesAre31Bits());
++    Ld_w(dst, src);
++    SmiUntag(dst);
++  }
++}
++
++void TurboAssembler::JumpIfSmi(Register value, Label* smi_label,
++                               Register scratch) {
++  DCHECK_EQ(0, kSmiTag);
++  andi(scratch, value, kSmiTagMask);
++  Branch(smi_label, eq, scratch, Operand(zero_reg));
++}
++
++void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label,
++                                  Register scratch) {
++  DCHECK_EQ(0, kSmiTag);
++  andi(scratch, value, kSmiTagMask);
++  Branch(not_smi_label, ne, scratch, Operand(zero_reg));
++}
++
++void MacroAssembler::AssertNotSmi(Register object) {
++  if (emit_debug_code()) {
++    STATIC_ASSERT(kSmiTag == 0);
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    andi(scratch, object, kSmiTagMask);
++    Check(ne, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg));
++  }
++}
++
++void MacroAssembler::AssertSmi(Register object) {
++  if (emit_debug_code()) {
++    STATIC_ASSERT(kSmiTag == 0);
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    andi(scratch, object, kSmiTagMask);
++    Check(eq, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg));
++  }
++}
++
++void MacroAssembler::AssertConstructor(Register object) {
++  if (emit_debug_code()) {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    STATIC_ASSERT(kSmiTag == 0);
++    SmiTst(object, t8);
++    Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, t8,
++          Operand(zero_reg));
++
++    LoadMap(t8, object);
++    Ld_bu(t8, FieldMemOperand(t8, Map::kBitFieldOffset));
++    And(t8, t8, Operand(Map::Bits1::IsConstructorBit::kMask));
++    Check(ne, AbortReason::kOperandIsNotAConstructor, t8, Operand(zero_reg));
++  }
++}
++
++void MacroAssembler::AssertFunction(Register object) {
++  if (emit_debug_code()) {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    STATIC_ASSERT(kSmiTag == 0);
++    SmiTst(object, t8);
++    Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, t8,
++          Operand(zero_reg));
++    GetObjectType(object, t8, t8);
++    Check(eq, AbortReason::kOperandIsNotAFunction, t8,
++          Operand(JS_FUNCTION_TYPE));
++  }
++}
++
++void MacroAssembler::AssertBoundFunction(Register object) {
++  if (emit_debug_code()) {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    STATIC_ASSERT(kSmiTag == 0);
++    SmiTst(object, t8);
++    Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, t8,
++          Operand(zero_reg));
++    GetObjectType(object, t8, t8);
++    Check(eq, AbortReason::kOperandIsNotABoundFunction, t8,
++          Operand(JS_BOUND_FUNCTION_TYPE));
++  }
++}
++
++void MacroAssembler::AssertGeneratorObject(Register object) {
++  if (!emit_debug_code()) return;
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  STATIC_ASSERT(kSmiTag == 0);
++  SmiTst(object, t8);
++  Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, t8,
++        Operand(zero_reg));
++
++  GetObjectType(object, t8, t8);
++
++  Label done;
++
++  // Check if JSGeneratorObject
++  Branch(&done, eq, t8, Operand(JS_GENERATOR_OBJECT_TYPE));
++
++  // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
++  Branch(&done, eq, t8, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
++
++  // Check if JSAsyncGeneratorObject
++  Branch(&done, eq, t8, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
++
++  Abort(AbortReason::kOperandIsNotAGeneratorObject);
++
++  bind(&done);
++}
++
++void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
++                                                     Register scratch) {
++  if (emit_debug_code()) {
++    Label done_checking;
++    AssertNotSmi(object);
++    LoadRoot(scratch, RootIndex::kUndefinedValue);
++    Branch(&done_checking, eq, object, Operand(scratch));
++    GetObjectType(object, scratch, scratch);
++    Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch,
++           Operand(ALLOCATION_SITE_TYPE));
++    bind(&done_checking);
++  }
++}
++
++void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1,
++                                FPURegister src2, Label* out_of_line) {
++  if (src1 == src2) {
++    Move_s(dst, src1);
++    return;
++  }
++
++  // Check if one of operands is NaN.
++  CompareIsNanF32(src1, src2);
++  BranchTrueF(out_of_line);
++
++  fmax_s(dst, src1, src2);
++}
++
++void TurboAssembler::Float32MaxOutOfLine(FPURegister dst, FPURegister src1,
++                                         FPURegister src2) {
++  fadd_s(dst, src1, src2);
++}
++
++void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1,
++                                FPURegister src2, Label* out_of_line) {
++  if (src1 == src2) {
++    Move_s(dst, src1);
++    return;
++  }
++
++  // Check if one of operands is NaN.
++  CompareIsNanF32(src1, src2);
++  BranchTrueF(out_of_line);
++
++  fmin_s(dst, src1, src2);
++}
++
++void TurboAssembler::Float32MinOutOfLine(FPURegister dst, FPURegister src1,
++                                         FPURegister src2) {
++  fadd_s(dst, src1, src2);
++}
++
++void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1,
++                                FPURegister src2, Label* out_of_line) {
++  if (src1 == src2) {
++    Move_d(dst, src1);
++    return;
++  }
++
++  // Check if one of operands is NaN.
++  CompareIsNanF64(src1, src2);
++  BranchTrueF(out_of_line);
++
++  fmax_d(dst, src1, src2);
++}
++
++void TurboAssembler::Float64MaxOutOfLine(FPURegister dst, FPURegister src1,
++                                         FPURegister src2) {
++  fadd_d(dst, src1, src2);
++}
++
++void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1,
++                                FPURegister src2, Label* out_of_line) {
++  if (src1 == src2) {
++    Move_d(dst, src1);
++    return;
++  }
++
++  // Check if one of operands is NaN.
++  CompareIsNanF64(src1, src2);
++  BranchTrueF(out_of_line);
++
++  fmin_d(dst, src1, src2);
++}
++
++void TurboAssembler::Float64MinOutOfLine(FPURegister dst, FPURegister src1,
++                                         FPURegister src2) {
++  fadd_d(dst, src1, src2);
++}
++
++static const int kRegisterPassedArguments = 8;
++
++int TurboAssembler::CalculateStackPassedWords(int num_reg_arguments,
++                                              int num_double_arguments) {
++  int stack_passed_words = 0;
++  num_reg_arguments += 2 * num_double_arguments;
++
++  // O32: Up to four simple arguments are passed in registers a0..a3.
++  // N64: Up to eight simple arguments are passed in registers a0..a7.
++  if (num_reg_arguments > kRegisterPassedArguments) {
++    stack_passed_words += num_reg_arguments - kRegisterPassedArguments;
++  }
++  stack_passed_words += kCArgSlotCount;
++  return stack_passed_words;
++}
++
++void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
++                                          int num_double_arguments,
++                                          Register scratch) {
++  int frame_alignment = ActivationFrameAlignment();
++
++  // n64: Up to eight simple arguments in a0..a3, a4..a7, No argument slots.
++  // O32: Up to four simple arguments are passed in registers a0..a3.
++  // Those four arguments must have reserved argument slots on the stack for
++  // mips, even though those argument slots are not normally used.
++  // Both ABIs: Remaining arguments are pushed on the stack, above (higher
++  // address than) the (O32) argument slots. (arg slot calculation handled by
++  // CalculateStackPassedWords()).
++  int stack_passed_arguments =
++      CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
++  if (frame_alignment > kPointerSize) {
++    // Make stack end at alignment and make room for num_arguments - 4 words
++    // and the original value of sp.
++    mov(scratch, sp);
++    Sub_d(sp, sp, Operand((stack_passed_arguments + 1) * kPointerSize));
++    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
++    bstrins_d(sp, zero_reg, std::log2(frame_alignment) - 1, 0);
++    St_d(scratch, MemOperand(sp, stack_passed_arguments * kPointerSize));
++  } else {
++    Sub_d(sp, sp, Operand(stack_passed_arguments * kPointerSize));
++  }
++}
++
++void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
++                                          Register scratch) {
++  PrepareCallCFunction(num_reg_arguments, 0, scratch);
++}
++
++void TurboAssembler::CallCFunction(ExternalReference function,
++                                   int num_reg_arguments,
++                                   int num_double_arguments) {
++  BlockTrampolinePoolScope block_trampoline_pool(this);
++  li(t7, function);
++  CallCFunctionHelper(t7, num_reg_arguments, num_double_arguments);
++}
++
++void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
++                                   int num_double_arguments) {
++  CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
++}
++
++void TurboAssembler::CallCFunction(ExternalReference function,
++                                   int num_arguments) {
++  CallCFunction(function, num_arguments, 0);
++}
++
++void TurboAssembler::CallCFunction(Register function, int num_arguments) {
++  CallCFunction(function, num_arguments, 0);
++}
++
++void TurboAssembler::CallCFunctionHelper(Register function,
++                                         int num_reg_arguments,
++                                         int num_double_arguments) {
++  DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
++  DCHECK(has_frame());
++  // Make sure that the stack is aligned before calling a C function unless
++  // running in the simulator. The simulator has its own alignment check which
++  // provides more information.
++  // The argument stots are presumed to have been set up by
++  // PrepareCallCFunction. The C function must be called via t9, for mips ABI.
++
++#if V8_HOST_ARCH_LA64
++  if (emit_debug_code()) {
++    int frame_alignment = base::OS::ActivationFrameAlignment();
++    int frame_alignment_mask = frame_alignment - 1;
++    if (frame_alignment > kPointerSize) {
++      DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
++      Label alignment_as_expected;
++      {
++        UseScratchRegisterScope temps(this);
++        Register scratch = temps.Acquire();
++        And(scratch, sp, Operand(frame_alignment_mask));
++        Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg));
++      }
++      // Don't use Check here, as it will call Runtime_Abort possibly
++      // re-entering here.
++      stop();
++      bind(&alignment_as_expected);
++    }
++  }
++#endif  // V8_HOST_ARCH_LA64
++
++  // Just call directly. The function called cannot cause a GC, or
++  // allow preemption, so the return address in the link register
++  // stays correct.
++  {
++    BlockTrampolinePoolScope block_trampoline_pool(this);
++    if (function != t7) {
++      mov(t7, function);
++      function = t7;
++    }
++
++    // Save the frame pointer and PC so that the stack layout remains iterable,
++    // even without an ExitFrame which normally exists between JS and C frames.
++    // 't' registers are caller-saved so this is safe as a scratch register.
++    Register pc_scratch = t1;
++    Register scratch = t2;
++    DCHECK(!AreAliased(pc_scratch, scratch, function));
++
++    pcaddi(pc_scratch, 1);
++
++    // See x64 code for reasoning about how to address the isolate data fields.
++    if (root_array_available()) {
++      St_d(pc_scratch, MemOperand(kRootRegister,
++                                  IsolateData::fast_c_call_caller_pc_offset()));
++      St_d(fp, MemOperand(kRootRegister,
++                          IsolateData::fast_c_call_caller_fp_offset()));
++    } else {
++      DCHECK_NOT_NULL(isolate());
++      li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate()));
++      St_d(pc_scratch, MemOperand(scratch, 0));
++      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
++      St_d(fp, MemOperand(scratch, 0));
++    }
++
++    Call(function);
++
++    // We don't unset the PC; the FP is the source of truth.
++    if (root_array_available()) {
++      St_d(zero_reg, MemOperand(kRootRegister,
++                                IsolateData::fast_c_call_caller_fp_offset()));
++    } else {
++      DCHECK_NOT_NULL(isolate());
++      li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
++      St_d(zero_reg, MemOperand(scratch, 0));
++    }
++  }
++
++  int stack_passed_arguments =
++      CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
++
++  if (base::OS::ActivationFrameAlignment() > kPointerSize) {
++    Ld_d(sp, MemOperand(sp, stack_passed_arguments * kPointerSize));
++  } else {
++    Add_d(sp, sp, Operand(stack_passed_arguments * kPointerSize));
++  }
++}
++
++#undef BRANCH_ARGS_CHECK
++
++void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask,
++                                   Condition cc, Label* condition_met) {
++  And(scratch, object, Operand(~kPageAlignmentMask));
++  Ld_d(scratch, MemOperand(scratch, MemoryChunk::kFlagsOffset));
++  And(scratch, scratch, Operand(mask));
++  Branch(condition_met, cc, scratch, Operand(zero_reg));
++}
++
++Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
++                                   Register reg4, Register reg5,
++                                   Register reg6) {
++  RegList regs = 0;
++  if (reg1.is_valid()) regs |= reg1.bit();
++  if (reg2.is_valid()) regs |= reg2.bit();
++  if (reg3.is_valid()) regs |= reg3.bit();
++  if (reg4.is_valid()) regs |= reg4.bit();
++  if (reg5.is_valid()) regs |= reg5.bit();
++  if (reg6.is_valid()) regs |= reg6.bit();
++
++  const RegisterConfiguration* config = RegisterConfiguration::Default();
++  for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
++    int code = config->GetAllocatableGeneralCode(i);
++    Register candidate = Register::from_code(code);
++    if (regs & candidate.bit()) continue;
++    return candidate;
++  }
++  UNREACHABLE();
++}
++
++void TurboAssembler::ComputeCodeStartAddress(Register dst) {
++  // TODO: range check, add Pcadd macro function?
++  pcaddi(dst, -pc_offset() >> 2);
++}
++
++void TurboAssembler::ResetSpeculationPoisonRegister() {
++  li(kSpeculationPoisonRegister, -1);
++}
++
++void TurboAssembler::CallForDeoptimization(Address target, int deopt_id,
++                                           Label* exit, DeoptimizeKind kind) {
++  USE(exit, kind);
++  NoRootArrayScope no_root_array(this);
++
++  // Save the deopt id in kRootRegister (we don't need the roots array from now
++  // on).
++  DCHECK_LE(deopt_id, 0xFFFF);
++  li(kRootRegister, deopt_id);
++  Call(target, RelocInfo::RUNTIME_ENTRY);
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h
+new file mode 100644
+index 00000000000..64116977c09
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h
+@@ -0,0 +1,1084 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef INCLUDED_FROM_MACRO_ASSEMBLER_H
++#error This header must be included via macro-assembler.h
++#endif
++
++#ifndef V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_
++#define V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_
++
++#include "src/codegen/assembler.h"
++#include "src/codegen/la64/assembler-la64.h"
++#include "src/common/globals.h"
++
++namespace v8 {
++namespace internal {
++
++// Forward declarations.
++enum class AbortReason : uint8_t;
++
++// Reserved Register Usage Summary.
++//
++// Registers t8 and t7 are reserved for use by the MacroAssembler.
++//
++// The programmer should know that the MacroAssembler may clobber these two,
++// but won't touch other registers except in special cases.
++//
++// Per the MIPS ABI, register t0 -- t8 must be used for indirect function call
++// via 'jirl t[0-8]' instructions. gcc?
++
++// Flags used for LeaveExitFrame function.
++enum LeaveExitFrameMode { EMIT_RETURN = true, NO_EMIT_RETURN = false };
++
++// Flags used for the li macro-assembler function.
++enum LiFlags {
++  // If the constant value can be represented in just 12 bits, then
++  // optimize the li to use a single instruction, rather than lu12i_w/lu32i_d/
++  // lu52i_d/ori sequence. A number of other optimizations that emits less than
++  // maximum number of instructions exists.
++  OPTIMIZE_SIZE = 0,
++  // Always use 4 instructions (lu12i_w/ori/lu32i_d/lu52i_d sequence),
++  // even if the constant could be loaded with just one, so that this value is
++  // patchable later.
++  CONSTANT_SIZE = 1,
++  // For address loads only 3 instruction are required. Used to mark
++  // constant load that will be used as address without relocation
++  // information. It ensures predictable code size, so specific sites
++  // in code are patchable.
++  ADDRESS_LOAD = 2
++};
++
++enum RememberedSetAction { EMIT_REMEMBERED_SET, OMIT_REMEMBERED_SET };
++enum SmiCheck { INLINE_SMI_CHECK, OMIT_SMI_CHECK };
++enum RAStatus { kRAHasNotBeenSaved, kRAHasBeenSaved };
++
++Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2 = no_reg,
++                                   Register reg3 = no_reg,
++                                   Register reg4 = no_reg,
++                                   Register reg5 = no_reg,
++                                   Register reg6 = no_reg);
++
++// -----------------------------------------------------------------------------
++// Static helper functions.
++
++#define SmiWordOffset(offset) (offset + kPointerSize / 2)
++
++// Generate a MemOperand for loading a field from an object.
++inline MemOperand FieldMemOperand(Register object, int offset) {
++  return MemOperand(object, offset - kHeapObjectTag);
++}
++
++class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
++ public:
++  using TurboAssemblerBase::TurboAssemblerBase;
++
++  // Activation support.
++  void EnterFrame(StackFrame::Type type);
++  void EnterFrame(StackFrame::Type type, bool load_constant_pool_pointer_reg) {
++    // Out-of-line constant pool not implemented on la64.
++    UNREACHABLE();
++  }
++  void LeaveFrame(StackFrame::Type type);
++
++  // Generates function and stub prologue code.
++  void StubPrologue(StackFrame::Type type);
++  void Prologue();
++
++  void InitializeRootRegister() {
++    ExternalReference isolate_root = ExternalReference::isolate_root(isolate());
++    li(kRootRegister, Operand(isolate_root));
++  }
++
++  // Jump unconditionally to given label.
++  // Use rather b(Label) for code generation.
++  void jmp(Label* L) { Branch(L); }
++
++  // -------------------------------------------------------------------------
++  // Debugging.
++
++  void Trap() override;
++  void DebugBreak() override;
++
++  // Calls Abort(msg) if the condition cc is not satisfied.
++  // Use --debug_code to enable.
++  void Assert(Condition cc, AbortReason reason, Register rj, Operand rk);
++
++  // Like Assert(), but always enabled.
++  void Check(Condition cc, AbortReason reason, Register rj, Operand rk);
++
++  // Print a message to stdout and abort execution.
++  void Abort(AbortReason msg);
++
++  void Branch(Label* label, bool need_link = false);
++  void Branch(Label* label, Condition cond, Register r1, const Operand& r2,
++              bool need_link = false);
++  void BranchShort(Label* label, Condition cond, Register r1, const Operand& r2,
++                   bool need_link = false);
++  void Branch(Label* L, Condition cond, Register rj, RootIndex index);
++
++  // Floating point branches
++  void CompareF32(FPURegister cmp1, FPURegister cmp2, FPUCondition cc,
++                  CFRegister cd = FCC0) {
++    CompareF(cmp1, cmp2, cc, cd, true);
++  }
++
++  void CompareIsNanF32(FPURegister cmp1, FPURegister cmp2,
++                       CFRegister cd = FCC0) {
++    CompareIsNanF(cmp1, cmp2, cd, true);
++  }
++
++  void CompareF64(FPURegister cmp1, FPURegister cmp2, FPUCondition cc,
++                  CFRegister cd = FCC0) {
++    CompareF(cmp1, cmp2, cc, cd, false);
++  }
++
++  void CompareIsNanF64(FPURegister cmp1, FPURegister cmp2,
++                       CFRegister cd = FCC0) {
++    CompareIsNanF(cmp1, cmp2, cd, false);
++  }
++
++  void BranchTrueShortF(Label* target, CFRegister cc = FCC0);
++  void BranchFalseShortF(Label* target, CFRegister cc = FCC0);
++
++  void BranchTrueF(Label* target, CFRegister cc = FCC0);
++  void BranchFalseF(Label* target, CFRegister cc = FCC0);
++
++  static int InstrCountForLi64Bit(int64_t value);
++  inline void LiLower32BitHelper(Register rd, Operand j);
++  void li_optimized(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE);
++  void li(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE);
++  inline void li(Register rd, int64_t j, LiFlags mode = OPTIMIZE_SIZE) {
++    li(rd, Operand(j), mode);
++  }
++  inline void li(Register rd, int32_t j, LiFlags mode = OPTIMIZE_SIZE) {
++    li(rd, Operand(static_cast<int64_t>(j)), mode);
++  }
++  void li(Register dst, Handle<HeapObject> value, LiFlags mode = OPTIMIZE_SIZE);
++  void li(Register dst, ExternalReference value, LiFlags mode = OPTIMIZE_SIZE);
++  void li(Register dst, const StringConstantBase* string,
++          LiFlags mode = OPTIMIZE_SIZE);
++
++  void LoadFromConstantsTable(Register destination,
++                              int constant_index) override;
++  void LoadRootRegisterOffset(Register destination, intptr_t offset) override;
++  void LoadRootRelative(Register destination, int32_t offset) override;
++
++// Jump, Call, and Ret pseudo instructions implementing inter-working.
++#define COND_ARGS                              \
++  Condition cond = al, Register rj = zero_reg, \
++            const Operand &rk = Operand(zero_reg)
++
++  void Jump(Register target, COND_ARGS);
++  void Jump(intptr_t target, RelocInfo::Mode rmode, COND_ARGS);
++  void Jump(Address target, RelocInfo::Mode rmode, COND_ARGS);
++  // Deffer from li, this method save target to the memory, and then load
++  // it to register use ld_d, it can be used in wasm jump table for concurrent
++  // patching.
++  void PatchAndJump(Address target);
++  void Jump(Handle<Code> code, RelocInfo::Mode rmode, COND_ARGS);
++  void Jump(const ExternalReference& reference) override;
++  void Call(Register target, COND_ARGS);
++  void Call(Address target, RelocInfo::Mode rmode, COND_ARGS);
++  void Call(Handle<Code> code, RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
++            COND_ARGS);
++  void Call(Label* target);
++  void LoadAddress(Register dst, Label* target);
++
++  // Load the builtin given by the Smi in |builtin_index| into the same
++  // register.
++  void LoadEntryFromBuiltinIndex(Register builtin_index);
++  void CallBuiltinByIndex(Register builtin_index) override;
++
++  void LoadCodeObjectEntry(Register destination,
++                           Register code_object) override {
++    // TODO(mips): Implement.
++    UNIMPLEMENTED();
++  }
++  void CallCodeObject(Register code_object) override {
++    // TODO(mips): Implement.
++    UNIMPLEMENTED();
++  }
++  void JumpCodeObject(Register code_object) override {
++    // TODO(mips): Implement.
++    UNIMPLEMENTED();
++  }
++
++  // Generates an instruction sequence s.t. the return address points to the
++  // instruction following the call.
++  // The return address on the stack is used by frame iteration.
++  void StoreReturnAddressAndCall(Register target);
++
++  void CallForDeoptimization(Address target, int deopt_id, Label* exit,
++                             DeoptimizeKind kind);
++
++  void Ret(COND_ARGS);
++
++  // Emit code to discard a non-negative number of pointer-sized elements
++  // from the stack, clobbering only the sp register.
++  void Drop(int count, Condition cond = cc_always, Register reg = no_reg,
++            const Operand& op = Operand(no_reg));
++
++  // Trivial case of DropAndRet that utilizes the delay slot and only emits
++  // 2 instructions.
++  void DropAndRet(int drop);
++
++  void DropAndRet(int drop, Condition cond, Register reg, const Operand& op);
++
++  void Ld_d(Register rd, const MemOperand& rj);
++  void St_d(Register rd, const MemOperand& rj);
++
++  void push(Register src) {
++    Add_d(sp, sp, Operand(-kPointerSize));
++    St_d(src, MemOperand(sp, 0));
++  }
++  void Push(Register src) { push(src); }
++  void Push(Handle<HeapObject> handle);
++  void Push(Smi smi);
++
++  // Push two registers. Pushes leftmost register first (to highest address).
++  void Push(Register src1, Register src2) {
++    Sub_d(sp, sp, Operand(2 * kPointerSize));
++    St_d(src1, MemOperand(sp, 1 * kPointerSize));
++    St_d(src2, MemOperand(sp, 0 * kPointerSize));
++  }
++
++  // Push three registers. Pushes leftmost register first (to highest address).
++  void Push(Register src1, Register src2, Register src3) {
++    Sub_d(sp, sp, Operand(3 * kPointerSize));
++    St_d(src1, MemOperand(sp, 2 * kPointerSize));
++    St_d(src2, MemOperand(sp, 1 * kPointerSize));
++    St_d(src3, MemOperand(sp, 0 * kPointerSize));
++  }
++
++  // Push four registers. Pushes leftmost register first (to highest address).
++  void Push(Register src1, Register src2, Register src3, Register src4) {
++    Sub_d(sp, sp, Operand(4 * kPointerSize));
++    St_d(src1, MemOperand(sp, 3 * kPointerSize));
++    St_d(src2, MemOperand(sp, 2 * kPointerSize));
++    St_d(src3, MemOperand(sp, 1 * kPointerSize));
++    St_d(src4, MemOperand(sp, 0 * kPointerSize));
++  }
++
++  // Push five registers. Pushes leftmost register first (to highest address).
++  void Push(Register src1, Register src2, Register src3, Register src4,
++            Register src5) {
++    Sub_d(sp, sp, Operand(5 * kPointerSize));
++    St_d(src1, MemOperand(sp, 4 * kPointerSize));
++    St_d(src2, MemOperand(sp, 3 * kPointerSize));
++    St_d(src3, MemOperand(sp, 2 * kPointerSize));
++    St_d(src4, MemOperand(sp, 1 * kPointerSize));
++    St_d(src5, MemOperand(sp, 0 * kPointerSize));
++  }
++
++  void Push(Register src, Condition cond, Register tst1, Register tst2) {
++    // Since we don't have conditional execution we use a Branch.
++    Label skip;
++    Branch(&skip, cond, tst1, Operand(tst2));
++    addi_d(sp, sp, -kPointerSize);
++    st_d(src, sp, 0);
++    bind(&skip);
++  }
++
++  void SaveRegisters(RegList registers);
++  void RestoreRegisters(RegList registers);
++
++  void CallRecordWriteStub(Register object, Register address,
++                           RememberedSetAction remembered_set_action,
++                           SaveFPRegsMode fp_mode);
++  void CallRecordWriteStub(Register object, Register address,
++                           RememberedSetAction remembered_set_action,
++                           SaveFPRegsMode fp_mode, Address wasm_target);
++  void CallEphemeronKeyBarrier(Register object, Register address,
++                               SaveFPRegsMode fp_mode);
++
++  // Push multiple registers on the stack.
++  // Registers are saved in numerical order, with higher numbered registers
++  // saved in higher memory addresses.
++  void MultiPush(RegList regs);
++  void MultiPush(RegList regs1, RegList regs2);
++  void MultiPush(RegList regs1, RegList regs2, RegList regs3);
++  void MultiPushFPU(RegList regs);
++
++  // Calculate how much stack space (in bytes) are required to store caller
++  // registers excluding those specified in the arguments.
++  int RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
++                                      Register exclusion1 = no_reg,
++                                      Register exclusion2 = no_reg,
++                                      Register exclusion3 = no_reg) const;
++
++  // Push caller saved registers on the stack, and return the number of bytes
++  // stack pointer is adjusted.
++  int PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg,
++                      Register exclusion2 = no_reg,
++                      Register exclusion3 = no_reg);
++  // Restore caller saved registers from the stack, and return the number of
++  // bytes stack pointer is adjusted.
++  int PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg,
++                     Register exclusion2 = no_reg,
++                     Register exclusion3 = no_reg);
++
++  void pop(Register dst) {
++    Ld_d(dst, MemOperand(sp, 0));
++    Add_d(sp, sp, Operand(kPointerSize));
++  }
++  void Pop(Register dst) { pop(dst); }
++
++  // Pop two registers. Pops rightmost register first (from lower address).
++  void Pop(Register src1, Register src2) {
++    DCHECK(src1 != src2);
++    Ld_d(src2, MemOperand(sp, 0 * kPointerSize));
++    Ld_d(src1, MemOperand(sp, 1 * kPointerSize));
++    Add_d(sp, sp, 2 * kPointerSize);
++  }
++
++  // Pop three registers. Pops rightmost register first (from lower address).
++  void Pop(Register src1, Register src2, Register src3) {
++    Ld_d(src3, MemOperand(sp, 0 * kPointerSize));
++    Ld_d(src2, MemOperand(sp, 1 * kPointerSize));
++    Ld_d(src1, MemOperand(sp, 2 * kPointerSize));
++    Add_d(sp, sp, 3 * kPointerSize);
++  }
++
++  void Pop(uint32_t count = 1) { Add_d(sp, sp, Operand(count * kPointerSize)); }
++
++  // Pops multiple values from the stack and load them in the
++  // registers specified in regs. Pop order is the opposite as in MultiPush.
++  void MultiPop(RegList regs);
++  void MultiPop(RegList regs1, RegList regs2);
++  void MultiPop(RegList regs1, RegList regs2, RegList regs3);
++
++  void MultiPopFPU(RegList regs);
++
++#define DEFINE_INSTRUCTION(instr)                          \
++  void instr(Register rd, Register rj, const Operand& rk); \
++  void instr(Register rd, Register rj, Register rk) {      \
++    instr(rd, rj, Operand(rk));                            \
++  }                                                        \
++  void instr(Register rj, Register rk, int32_t j) { instr(rj, rk, Operand(j)); }
++
++#define DEFINE_INSTRUCTION2(instr)                                 \
++  void instr(Register rj, const Operand& rk);                      \
++  void instr(Register rj, Register rk) { instr(rj, Operand(rk)); } \
++  void instr(Register rj, int32_t j) { instr(rj, Operand(j)); }
++
++  DEFINE_INSTRUCTION(Add_w)
++  DEFINE_INSTRUCTION(Add_d)
++  DEFINE_INSTRUCTION(Div_w)
++  DEFINE_INSTRUCTION(Div_wu)
++  DEFINE_INSTRUCTION(Div_du)
++  DEFINE_INSTRUCTION(Mod_w)
++  DEFINE_INSTRUCTION(Mod_wu)
++  DEFINE_INSTRUCTION(Div_d)
++  DEFINE_INSTRUCTION(Sub_w)
++  DEFINE_INSTRUCTION(Sub_d)
++  DEFINE_INSTRUCTION(Mod_d)
++  DEFINE_INSTRUCTION(Mod_du)
++  DEFINE_INSTRUCTION(Mul_w)
++  DEFINE_INSTRUCTION(Mulh_w)
++  DEFINE_INSTRUCTION(Mulh_wu)
++  DEFINE_INSTRUCTION(Mul_d)
++  DEFINE_INSTRUCTION(Mulh_d)
++  DEFINE_INSTRUCTION2(Div_w)
++  DEFINE_INSTRUCTION2(Div_d)
++  DEFINE_INSTRUCTION2(Div_wu)
++  DEFINE_INSTRUCTION2(Div_du)
++
++  DEFINE_INSTRUCTION(And)
++  DEFINE_INSTRUCTION(Or)
++  DEFINE_INSTRUCTION(Xor)
++  DEFINE_INSTRUCTION(Nor)
++  DEFINE_INSTRUCTION2(Neg)
++  DEFINE_INSTRUCTION(Andn)
++  DEFINE_INSTRUCTION(Orn)
++
++  DEFINE_INSTRUCTION(Slt)
++  DEFINE_INSTRUCTION(Sltu)
++  DEFINE_INSTRUCTION(Slti)
++  DEFINE_INSTRUCTION(Sltiu)
++  DEFINE_INSTRUCTION(Sle)
++  DEFINE_INSTRUCTION(Sleu)
++  DEFINE_INSTRUCTION(Sgt)
++  DEFINE_INSTRUCTION(Sgtu)
++  DEFINE_INSTRUCTION(Sge)
++  DEFINE_INSTRUCTION(Sgeu)
++
++  DEFINE_INSTRUCTION(Rotr_w)
++  DEFINE_INSTRUCTION(Rotr_d)
++
++#undef DEFINE_INSTRUCTION
++#undef DEFINE_INSTRUCTION2
++#undef DEFINE_INSTRUCTION3
++
++  void SmiUntag(Register dst, const MemOperand& src);
++  void SmiUntag(Register dst, Register src) {
++    if (SmiValuesAre32Bits()) {
++      srai_d(dst, src, kSmiShift);
++    } else {
++      DCHECK(SmiValuesAre31Bits());
++      srai_w(dst, src, kSmiShift);
++    }
++  }
++
++  void SmiUntag(Register reg) { SmiUntag(reg, reg); }
++
++  // Removes current frame and its arguments from the stack preserving
++  // the arguments and a return address pushed to the stack for the next call.
++  // Both |callee_args_count| and |caller_args_count| do not include
++  // receiver. |callee_args_count| is not modified. |caller_args_count|
++  // is trashed.
++  void PrepareForTailCall(Register callee_args_count,
++                          Register caller_args_count, Register scratch0,
++                          Register scratch1);
++
++  int CalculateStackPassedWords(int num_reg_arguments,
++                                int num_double_arguments);
++
++  // Before calling a C-function from generated code, align arguments on stack
++  // and add space for the four mips argument slots.
++  // After aligning the frame, non-register arguments must be stored on the
++  // stack, after the argument-slots using helper: CFunctionArgumentOperand().
++  // The argument count assumes all arguments are word sized.
++  // Some compilers/platforms require the stack to be aligned when calling
++  // C++ code.
++  // Needs a scratch register to do some arithmetic. This register will be
++  // trashed.
++  void PrepareCallCFunction(int num_reg_arguments, int num_double_registers,
++                            Register scratch);
++  void PrepareCallCFunction(int num_reg_arguments, Register scratch);
++
++  // Calls a C function and cleans up the space for arguments allocated
++  // by PrepareCallCFunction. The called function is not allowed to trigger a
++  // garbage collection, since that might move the code and invalidate the
++  // return address (unless this is somehow accounted for by the called
++  // function).
++  void CallCFunction(ExternalReference function, int num_arguments);
++  void CallCFunction(Register function, int num_arguments);
++  void CallCFunction(ExternalReference function, int num_reg_arguments,
++                     int num_double_arguments);
++  void CallCFunction(Register function, int num_reg_arguments,
++                     int num_double_arguments);
++  void MovFromFloatResult(DoubleRegister dst);
++  void MovFromFloatParameter(DoubleRegister dst);
++
++  // There are two ways of passing double arguments on MIPS, depending on
++  // whether soft or hard floating point ABI is used. These functions
++  // abstract parameter passing for the three different ways we call
++  // C functions from generated code.
++  void MovToFloatParameter(DoubleRegister src);
++  void MovToFloatParameters(DoubleRegister src1, DoubleRegister src2);
++  void MovToFloatResult(DoubleRegister src);
++
++  // See comments at the beginning of Builtins::Generate_CEntry.
++  inline void PrepareCEntryArgs(int num_args) { li(a0, num_args); }
++  inline void PrepareCEntryFunction(const ExternalReference& ref) {
++    li(a1, ref);
++  }
++
++  void CheckPageFlag(Register object, Register scratch, int mask, Condition cc,
++                     Label* condition_met);
++#undef COND_ARGS
++
++  // Performs a truncating conversion of a floating point number as used by
++  // the JS bitwise operations. See ECMA-262 9.5: ToInt32.
++  // Exits with 'result' holding the answer.
++  void TruncateDoubleToI(Isolate* isolate, Zone* zone, Register result,
++                         DoubleRegister double_input, StubCallMode stub_mode);
++
++  // Conditional move.
++  void Movz(Register rd, Register rj, Register rk);
++  void Movn(Register rd, Register rj, Register rk);
++
++  void LoadZeroIfFPUCondition(Register dest, CFRegister = FCC0);
++  void LoadZeroIfNotFPUCondition(Register dest, CFRegister = FCC0);
++
++  void LoadZeroIfConditionNotZero(Register dest, Register condition);
++  void LoadZeroIfConditionZero(Register dest, Register condition);
++  void LoadZeroOnCondition(Register rd, Register rj, const Operand& rk,
++                           Condition cond);
++
++  void Clz_w(Register rd, Register rj);
++  void Clz_d(Register rd, Register rj);
++  void Ctz_w(Register rd, Register rj);
++  void Ctz_d(Register rd, Register rj);
++  void Popcnt_w(Register rd, Register rj);
++  void Popcnt_d(Register rd, Register rj);
++
++  void ExtractBits(Register dest, Register source, Register pos, int size,
++                   bool sign_extend = false);
++  void InsertBits(Register dest, Register source, Register pos, int size);
++
++  void Bstrins_w(Register rk, Register rj, uint16_t msbw, uint16_t lswb);
++  void Bstrins_d(Register rk, Register rj, uint16_t msbw, uint16_t lsbw);
++  void Bstrpick_w(Register rk, Register rj, uint16_t msbw, uint16_t lsbw);
++  void Bstrpick_d(Register rk, Register rj, uint16_t msbw, uint16_t lsbw);
++  void Neg_s(FPURegister fd, FPURegister fj);
++  void Neg_d(FPURegister fd, FPURegister fk);
++
++  // Convert single to unsigned word.
++  void Trunc_uw_s(FPURegister fd, FPURegister fj, FPURegister scratch);
++  void Trunc_uw_s(Register rd, FPURegister fj, FPURegister scratch);
++
++  // Change endianness
++  void ByteSwapSigned(Register dest, Register src, int operand_size);
++  void ByteSwapUnsigned(Register dest, Register src, int operand_size);
++
++  void Ld_b(Register rd, const MemOperand& rj);
++  void Ld_bu(Register rd, const MemOperand& rj);
++  void St_b(Register rd, const MemOperand& rj);
++
++  void Ld_h(Register rd, const MemOperand& rj);
++  void Ld_hu(Register rd, const MemOperand& rj);
++  void St_h(Register rd, const MemOperand& rj);
++
++  void Ld_w(Register rd, const MemOperand& rj);
++  void Ld_wu(Register rd, const MemOperand& rj);
++  void St_w(Register rd, const MemOperand& rj);
++
++  void Fld_s(FPURegister fd, const MemOperand& src);
++  void Fst_s(FPURegister fj, const MemOperand& dst);
++
++  void Fld_d(FPURegister fd, const MemOperand& src);
++  void Fst_d(FPURegister fj, const MemOperand& dst);
++
++  void Ll_w(Register rd, const MemOperand& rj);
++  void Sc_w(Register rd, const MemOperand& rj);
++
++  void Ll_d(Register rd, const MemOperand& rj);
++  void Sc_d(Register rd, const MemOperand& rj);
++
++  // These functions assume (and assert) that src1!=src2. It is permitted
++  // for the result to alias either input register.
++  void Float32Max(FPURegister dst, FPURegister src1, FPURegister src2,
++                  Label* out_of_line);
++  void Float32Min(FPURegister dst, FPURegister src1, FPURegister src2,
++                  Label* out_of_line);
++  void Float64Max(FPURegister dst, FPURegister src1, FPURegister src2,
++                  Label* out_of_line);
++  void Float64Min(FPURegister dst, FPURegister src1, FPURegister src2,
++                  Label* out_of_line);
++
++  // Generate out-of-line cases for the macros above.
++  void Float32MaxOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2);
++  void Float32MinOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2);
++  void Float64MaxOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2);
++  void Float64MinOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2);
++
++  bool IsDoubleZeroRegSet() { return has_double_zero_reg_set_; }
++
++  void mov(Register rd, Register rj) { or_(rd, rj, zero_reg); }
++
++  inline void Move(Register dst, Handle<HeapObject> handle) { li(dst, handle); }
++  inline void Move(Register dst, Smi smi) { li(dst, Operand(smi)); }
++
++  inline void Move(Register dst, Register src) {
++    if (dst != src) {
++      mov(dst, src);
++    }
++  }
++
++  inline void FmoveLow(Register dst_low, FPURegister src) {
++    movfr2gr_s(dst_low, src);
++  }
++
++  void FmoveLow(FPURegister dst, Register src_low);
++
++  inline void Move(FPURegister dst, FPURegister src) { Move_d(dst, src); }
++
++  inline void Move_d(FPURegister dst, FPURegister src) {
++    if (dst != src) {
++      fmov_d(dst, src);
++    }
++  }
++
++  inline void Move_s(FPURegister dst, FPURegister src) {
++    if (dst != src) {
++      fmov_s(dst, src);
++    }
++  }
++
++  void Move(FPURegister dst, float imm) { Move(dst, bit_cast<uint32_t>(imm)); }
++  void Move(FPURegister dst, double imm) { Move(dst, bit_cast<uint64_t>(imm)); }
++  void Move(FPURegister dst, uint32_t src);
++  void Move(FPURegister dst, uint64_t src);
++
++  // AdddOverflow sets overflow register to a negative value if
++  // overflow occured, otherwise it is zero or positive
++  void AdddOverflow(Register dst, Register left, const Operand& right,
++                    Register overflow);
++  // SubdOverflow sets overflow register to a negative value if
++  // overflow occured, otherwise it is zero or positive
++  void SubdOverflow(Register dst, Register left, const Operand& right,
++                    Register overflow);
++  // MulOverflow sets overflow register to zero if no overflow occured
++  void MulOverflow(Register dst, Register left, const Operand& right,
++                   Register overflow);
++
++  // Number of instructions needed for calculation of switch table entry address
++  static const int kSwitchTablePrologueSize = 5;
++
++  // GetLabelFunction must be lambda '[](size_t index) -> Label*' or a
++  // functor/function with 'Label *func(size_t index)' declaration.
++  template <typename Func>
++  void GenerateSwitchTable(Register index, size_t case_count,
++                           Func GetLabelFunction);
++
++  // Load an object from the root table.
++  void LoadRoot(Register destination, RootIndex index) override;
++  void LoadRoot(Register destination, RootIndex index, Condition cond,
++                Register src1, const Operand& src2);
++
++  // If the value is a NaN, canonicalize the value, src must be nan.
++  void FPUCanonicalizeNaN(const DoubleRegister dst, const DoubleRegister src);
++
++  // ---------------------------------------------------------------------------
++  // FPU macros. These do not handle special cases like NaN or +- inf.
++
++  // Convert unsigned word to double.
++  void Ffint_d_uw(FPURegister fd, FPURegister fj);
++  void Ffint_d_uw(FPURegister fd, Register rj);
++
++  // Convert unsigned long to double.
++  void Ffint_d_ul(FPURegister fd, FPURegister fj);
++  void Ffint_d_ul(FPURegister fd, Register rj);
++
++  // Convert unsigned word to float.
++  void Ffint_s_uw(FPURegister fd, FPURegister fj);
++  void Ffint_s_uw(FPURegister fd, Register rj);
++
++  // Convert unsigned long to float.
++  void Ffint_s_ul(FPURegister fd, FPURegister fj);
++  void Ffint_s_ul(FPURegister fd, Register rj);
++
++  // Convert double to unsigned word.
++  void Ftintrz_uw_d(FPURegister fd, FPURegister fj, FPURegister scratch);
++  void Ftintrz_uw_d(Register rd, FPURegister fj, FPURegister scratch);
++
++  // Convert single to unsigned word.
++  void Ftintrz_uw_s(FPURegister fd, FPURegister fs, FPURegister scratch);
++  void Ftintrz_uw_s(Register rd, FPURegister fs, FPURegister scratch);
++
++  // Convert double to unsigned long.
++  void Ftintrz_ul_d(FPURegister fd, FPURegister fj, FPURegister scratch,
++                    Register result = no_reg);
++  void Ftintrz_ul_d(Register rd, FPURegister fj, FPURegister scratch,
++                    Register result = no_reg);
++
++  // Convert single to unsigned long.
++  void Ftintrz_ul_s(FPURegister fd, FPURegister fj, FPURegister scratch,
++                    Register result = no_reg);
++  void Ftintrz_ul_s(Register rd, FPURegister fj, FPURegister scratch,
++                    Register result = no_reg);
++
++  // Round double functions
++  void Trunc_d(FPURegister fd, FPURegister fj);
++  void Round_d(FPURegister fd, FPURegister fj);
++  void Floor_d(FPURegister fd, FPURegister fj);
++  void Ceil_d(FPURegister fd, FPURegister fj);
++
++  // Round float functions
++  void Trunc_s(FPURegister fd, FPURegister fj);
++  void Round_s(FPURegister fd, FPURegister fj);
++  void Floor_s(FPURegister fd, FPURegister fj);
++  void Ceil_s(FPURegister fd, FPURegister fj);
++
++  // Jump the register contains a smi.
++  void JumpIfSmi(Register value, Label* smi_label, Register scratch = t7);
++
++  void JumpIfEqual(Register a, int32_t b, Label* dest) {
++    li(kScratchReg, Operand(b));
++    Branch(dest, eq, a, Operand(kScratchReg));
++  }
++
++  void JumpIfLessThan(Register a, int32_t b, Label* dest) {
++    li(kScratchReg, Operand(b));
++    Branch(dest, lt, a, Operand(kScratchReg));
++  }
++
++  // Push a standard frame, consisting of ra, fp, context and JS function.
++  void PushStandardFrame(Register function_reg);
++
++  // Get the actual activation frame alignment for target environment.
++  static int ActivationFrameAlignment();
++
++  // Load Scaled Address instructions. Parameter sa (shift argument) must be
++  // between [1, 31] (inclusive). The scratch register may be clobbered.
++  void Alsl_w(Register rd, Register rj, Register rk, uint8_t sa,
++              Register scratch = t7);
++  void Alsl_d(Register rd, Register rj, Register rk, uint8_t sa,
++              Register scratch = t7);
++
++  // Compute the start of the generated instruction stream from the current PC.
++  // This is an alternative to embedding the {CodeObject} handle as a reference.
++  void ComputeCodeStartAddress(Register dst);
++
++  void ResetSpeculationPoisonRegister();
++
++  // Control-flow integrity:
++
++  // Define a function entrypoint. This doesn't emit any code for this
++  // architecture, as control-flow integrity is not supported for it.
++  void CodeEntry() {}
++  // Define an exception handler.
++  void ExceptionHandler() {}
++  // Define an exception handler and bind a label.
++  void BindExceptionHandler(Label* label) { bind(label); }
++
++ protected:
++  inline Register GetRkAsRegisterHelper(const Operand& rk, Register scratch);
++  inline int32_t GetOffset(Label* L, OffsetSize bits);
++
++ private:
++  bool has_double_zero_reg_set_ = false;
++
++  // Performs a truncating conversion of a floating point number as used by
++  // the JS bitwise operations. See ECMA-262 9.5: ToInt32. Goes to 'done' if it
++  // succeeds, otherwise falls through if result is saturated. On return
++  // 'result' either holds answer, or is clobbered on fall through.
++  void TryInlineTruncateDoubleToI(Register result, DoubleRegister input,
++                                  Label* done);
++
++  bool BranchShortOrFallback(Label* L, Condition cond, Register rj,
++                             const Operand& rk, bool need_link);
++
++  // f32 or f64
++  void CompareF(FPURegister cmp1, FPURegister cmp2, FPUCondition cc,
++                CFRegister cd, bool f32 = true);
++
++  void CompareIsNanF(FPURegister cmp1, FPURegister cmp2, CFRegister cd,
++                     bool f32 = true);
++
++  void CallCFunctionHelper(Register function, int num_reg_arguments,
++                           int num_double_arguments);
++
++  void RoundDouble(FPURegister dst, FPURegister src, FPURoundingMode mode);
++
++  void RoundFloat(FPURegister dst, FPURegister src, FPURoundingMode mode);
++
++  // Push a fixed frame, consisting of ra, fp.
++  void PushCommonFrame(Register marker_reg = no_reg);
++
++  void CallRecordWriteStub(Register object, Register address,
++                           RememberedSetAction remembered_set_action,
++                           SaveFPRegsMode fp_mode, Handle<Code> code_target,
++                           Address wasm_target);
++};
++
++// MacroAssembler implements a collection of frequently used macros.
++class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
++ public:
++  using TurboAssembler::TurboAssembler;
++
++  bool IsNear(Label* L, Condition cond, int rs_reg);
++
++  // Swap two registers.  If the scratch register is omitted then a slightly
++  // less efficient form using xor instead of mov is emitted.
++  void Swap(Register reg1, Register reg2, Register scratch = no_reg);
++
++  void PushRoot(RootIndex index) {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    LoadRoot(scratch, index);
++    Push(scratch);
++  }
++
++  // Compare the object in a register to a value and jump if they are equal.
++  void JumpIfRoot(Register with, RootIndex index, Label* if_equal) {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    LoadRoot(scratch, index);
++    Branch(if_equal, eq, with, Operand(scratch));
++  }
++
++  // Compare the object in a register to a value and jump if they are not equal.
++  void JumpIfNotRoot(Register with, RootIndex index, Label* if_not_equal) {
++    UseScratchRegisterScope temps(this);
++    Register scratch = temps.Acquire();
++    LoadRoot(scratch, index);
++    Branch(if_not_equal, ne, with, Operand(scratch));
++  }
++
++  // Checks if value is in range [lower_limit, higher_limit] using a single
++  // comparison.
++  void JumpIfIsInRange(Register value, unsigned lower_limit,
++                       unsigned higher_limit, Label* on_in_range);
++
++  // ---------------------------------------------------------------------------
++  // GC Support
++
++  // Notify the garbage collector that we wrote a pointer into an object.
++  // |object| is the object being stored into, |value| is the object being
++  // stored.  value and scratch registers are clobbered by the operation.
++  // The offset is the offset from the start of the object, not the offset from
++  // the tagged HeapObject pointer.  For use with FieldOperand(reg, off).
++  void RecordWriteField(
++      Register object, int offset, Register value, Register scratch,
++      RAStatus ra_status, SaveFPRegsMode save_fp,
++      RememberedSetAction remembered_set_action = EMIT_REMEMBERED_SET,
++      SmiCheck smi_check = INLINE_SMI_CHECK);
++
++  // For a given |object| notify the garbage collector that the slot |address|
++  // has been written.  |value| is the object being stored. The value and
++  // address registers are clobbered by the operation.
++  void RecordWrite(
++      Register object, Register address, Register value, RAStatus ra_status,
++      SaveFPRegsMode save_fp,
++      RememberedSetAction remembered_set_action = EMIT_REMEMBERED_SET,
++      SmiCheck smi_check = INLINE_SMI_CHECK);
++
++  void Pref(int32_t hint, const MemOperand& rs);
++
++  // ---------------------------------------------------------------------------
++  // Pseudo-instructions.
++
++  void LoadWordPair(Register rd, const MemOperand& rj, Register scratch);
++  void StoreWordPair(Register rd, const MemOperand& rj, Register scratch);
++
++  // Convert double to unsigned long.
++  void Ftintrz_l_ud(FPURegister fd, FPURegister fj, FPURegister scratch);
++
++  void Ftintrz_l_d(FPURegister fd, FPURegister fj);
++  void Ftintrne_l_d(FPURegister fd, FPURegister fj);
++  void Ftintrm_l_d(FPURegister fd, FPURegister fj);
++  void Ftintrp_l_d(FPURegister fd, FPURegister fj);
++
++  void Ftintrz_w_d(FPURegister fd, FPURegister fj);
++  void Ftintrne_w_d(FPURegister fd, FPURegister fj);
++  void Ftintrm_w_d(FPURegister fd, FPURegister fj);
++  void Ftintrp_w_d(FPURegister fd, FPURegister fj);
++
++  void Madd_s(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk);
++  void Madd_d(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk);
++  void Msub_s(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk);
++  void Msub_d(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk);
++
++  // Truncates a double using a specific rounding mode, and writes the value
++  // to the result register.
++  // The except_flag will contain any exceptions caused by the instruction.
++  // If check_inexact is kDontCheckForInexactConversion, then the inexact
++  // exception is masked.
++  void EmitFPUTruncate(
++      FPURoundingMode rounding_mode, Register result,
++      DoubleRegister double_input, Register scratch,
++      DoubleRegister double_scratch, Register except_flag,
++      CheckForInexactConversion check_inexact = kDontCheckForInexactConversion);
++
++  // Enter exit frame.
++  // argc - argument count to be dropped by LeaveExitFrame.
++  // save_doubles - saves FPU registers on stack, currently disabled.
++  // stack_space - extra stack space.
++  void EnterExitFrame(bool save_doubles, int stack_space = 0,
++                      StackFrame::Type frame_type = StackFrame::EXIT);
++
++  // Leave the current exit frame.
++  void LeaveExitFrame(bool save_doubles, Register arg_count,
++                      bool do_return = NO_EMIT_RETURN,
++                      bool argument_count_is_length = false);
++
++  void LoadMap(Register destination, Register object);
++
++  // Make sure the stack is aligned. Only emits code in debug mode.
++  void AssertStackIsAligned();
++
++  // Load the global proxy from the current context.
++  void LoadGlobalProxy(Register dst) {
++    LoadNativeContextSlot(Context::GLOBAL_PROXY_INDEX, dst);
++  }
++
++  void LoadNativeContextSlot(int index, Register dst);
++
++  // Load the initial map from the global function. The registers
++  // function and map can be the same, function is then overwritten.
++  void LoadGlobalFunctionInitialMap(Register function, Register map,
++                                    Register scratch);
++
++  // -------------------------------------------------------------------------
++  // JavaScript invokes.
++
++  // Invoke the JavaScript function code by either calling or jumping.
++  void InvokeFunctionCode(Register function, Register new_target,
++                          Register expected_parameter_count,
++                          Register actual_parameter_count, InvokeFlag flag);
++
++  // On function call, call into the debugger if necessary.
++  void CheckDebugHook(Register fun, Register new_target,
++                      Register expected_parameter_count,
++                      Register actual_parameter_count);
++
++  // Invoke the JavaScript function in the given register. Changes the
++  // current context to the context in the function before invoking.
++  void InvokeFunctionWithNewTarget(Register function, Register new_target,
++                                   Register actual_parameter_count,
++                                   InvokeFlag flag);
++  void InvokeFunction(Register function, Register expected_parameter_count,
++                      Register actual_parameter_count, InvokeFlag flag);
++
++  // Frame restart support.
++  void MaybeDropFrames();
++
++  // Exception handling.
++
++  // Push a new stack handler and link into stack handler chain.
++  void PushStackHandler();
++
++  // Unlink the stack handler on top of the stack from the stack handler chain.
++  // Must preserve the result register.
++  void PopStackHandler();
++
++  // -------------------------------------------------------------------------
++  // Support functions.
++
++  void GetObjectType(Register function, Register map, Register type_reg);
++
++  // -------------------------------------------------------------------------
++  // Runtime calls.
++
++  // Call a runtime routine.
++  void CallRuntime(const Runtime::Function* f, int num_arguments,
++                   SaveFPRegsMode save_doubles = kDontSaveFPRegs);
++
++  // Convenience function: Same as above, but takes the fid instead.
++  void CallRuntime(Runtime::FunctionId fid,
++                   SaveFPRegsMode save_doubles = kDontSaveFPRegs) {
++    const Runtime::Function* function = Runtime::FunctionForId(fid);
++    CallRuntime(function, function->nargs, save_doubles);
++  }
++
++  // Convenience function: Same as above, but takes the fid instead.
++  void CallRuntime(Runtime::FunctionId fid, int num_arguments,
++                   SaveFPRegsMode save_doubles = kDontSaveFPRegs) {
++    CallRuntime(Runtime::FunctionForId(fid), num_arguments, save_doubles);
++  }
++
++  // Convenience function: tail call a runtime routine (jump).
++  void TailCallRuntime(Runtime::FunctionId fid);
++
++  // Jump to the builtin routine.
++  void JumpToExternalReference(const ExternalReference& builtin,
++                               bool builtin_exit_frame = false);
++
++  // Generates a trampoline to jump to the off-heap instruction stream.
++  void JumpToInstructionStream(Address entry);
++
++  // ---------------------------------------------------------------------------
++  // In-place weak references.
++  void LoadWeakValue(Register out, Register in, Label* target_if_cleared);
++
++  // -------------------------------------------------------------------------
++  // StatsCounter support.
++
++  void IncrementCounter(StatsCounter* counter, int value, Register scratch1,
++                        Register scratch2);
++  void DecrementCounter(StatsCounter* counter, int value, Register scratch1,
++                        Register scratch2);
++
++  // -------------------------------------------------------------------------
++  // Smi utilities.
++
++  void SmiTag(Register dst, Register src) {
++    STATIC_ASSERT(kSmiTag == 0);
++    if (SmiValuesAre32Bits()) {
++      slli_d(dst, src, 32);
++    } else {
++      DCHECK(SmiValuesAre31Bits());
++      add_w(dst, src, src);
++    }
++  }
++
++  void SmiTag(Register reg) { SmiTag(reg, reg); }
++
++  // Left-shifted from int32 equivalent of Smi.
++  void SmiScale(Register dst, Register src, int scale) {
++    if (SmiValuesAre32Bits()) {
++      // The int portion is upper 32-bits of 64-bit word.
++      srai_d(dst, src, kSmiShift - scale);
++    } else {
++      DCHECK(SmiValuesAre31Bits());
++      DCHECK_GE(scale, kSmiTagSize);
++      slli_w(dst, src, scale - kSmiTagSize);
++    }
++  }
++
++  // Test if the register contains a smi.
++  inline void SmiTst(Register value, Register scratch) {
++    And(scratch, value, Operand(kSmiTagMask));
++  }
++
++  // Jump if the register contains a non-smi.
++  void JumpIfNotSmi(Register value, Label* not_smi_label, Register scratch);
++
++  // Abort execution if argument is a smi, enabled via --debug-code.
++  void AssertNotSmi(Register object);
++  void AssertSmi(Register object);
++
++  // Abort execution if argument is not a Constructor, enabled via --debug-code.
++  void AssertConstructor(Register object);
++
++  // Abort execution if argument is not a JSFunction, enabled via --debug-code.
++  void AssertFunction(Register object);
++
++  // Abort execution if argument is not a JSBoundFunction,
++  // enabled via --debug-code.
++  void AssertBoundFunction(Register object);
++
++  // Abort execution if argument is not a JSGeneratorObject (or subclass),
++  // enabled via --debug-code.
++  void AssertGeneratorObject(Register object);
++
++  // Abort execution if argument is not undefined or an AllocationSite, enabled
++  // via --debug-code.
++  void AssertUndefinedOrAllocationSite(Register object, Register scratch);
++
++  template <typename Field>
++  void DecodeField(Register dst, Register src) {
++    Bstrpick_d(dst, src, Field::kShift + Field::kSize - 1, Field::kShift);
++  }
++
++  template <typename Field>
++  void DecodeField(Register reg) {
++    DecodeField<Field>(reg, reg);
++  }
++
++ private:
++  // Helper functions for generating invokes.
++  void InvokePrologue(Register expected_parameter_count,
++                      Register actual_parameter_count, Label* done,
++                      InvokeFlag flag);
++
++  // Compute memory operands for safepoint stack slots.
++  static int SafepointRegisterStackIndex(int reg_code);
++
++  // Needs access to SafepointRegisterStackIndex for compiled frame
++  // traversal.
++  friend class StandardFrame;
++
++  DISALLOW_IMPLICIT_CONSTRUCTORS(MacroAssembler);
++};
++
++template <typename Func>
++void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count,
++                                         Func GetLabelFunction) {
++  // Ensure that dd-ed labels following this instruction use 8 bytes aligned
++  // addresses.
++  BlockTrampolinePoolFor(static_cast<int>(case_count) * 2 +
++                         kSwitchTablePrologueSize);
++  UseScratchRegisterScope temps(this);
++  Register scratch = temps.Acquire();
++  Align(8);  // next is 4 instrs.
++  pcaddi(scratch, 4);
++  // alsl_d will do sa
++  alsl_d(scratch, index, scratch, kPointerSizeLog2);
++  Ld_d(scratch, MemOperand(scratch, 0));
++  jirl(zero_reg, scratch, 0);
++  for (size_t index = 0; index < case_count; ++index) {
++    dd(GetLabelFunction(index));
++  }
++}
++
++#define ACCESS_MASM(masm) masm->
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h
+new file mode 100644
+index 00000000000..f2025e28e5f
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h
+@@ -0,0 +1,328 @@
++// Copyright 2018 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_CODEGEN_LA64_REGISTER_LA64_H_
++#define V8_CODEGEN_LA64_REGISTER_LA64_H_
++
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/register.h"
++#include "src/codegen/reglist.h"
++
++namespace v8 {
++namespace internal {
++
++// clang-format off
++#define GENERAL_REGISTERS(V)                              \
++  V(zero_reg)  V(ra)  V(gp)  V(sp) \
++  V(a0)  V(a1)  V(a2)  V(a3) V(a4)  V(a5)  V(a6)  V(a7)  \
++  V(t0)  V(t1)  V(t2)  V(t3) V(t4)  V(t5)  V(t6)  V(t7)  V(t8) \
++  V(tp)  V(fp)  \
++  V(s0)  V(s1)  V(s2)  V(s3)  V(s4)  V(s5)  V(s6)  V(s7)  V(s8) \
++
++#define ALLOCATABLE_GENERAL_REGISTERS(V) \
++  V(a0)  V(a1)  V(a2)  V(a3)  V(a4)  V(a5)  V(a6)  V(a7) \
++  V(t0)  V(t1)  V(t2)  V(t3)  V(t4)  V(t5)  V(s7)
++
++#define DOUBLE_REGISTERS(V)                               \
++  V(f0)  V(f1)  V(f2)  V(f3)  V(f4)  V(f5)  V(f6)  V(f7)  \
++  V(f8)  V(f9)  V(f10) V(f11) V(f12) V(f13) V(f14) V(f15) \
++  V(f16) V(f17) V(f18) V(f19) V(f20) V(f21) V(f22) V(f23) \
++  V(f24) V(f25) V(f26) V(f27) V(f28) V(f29) V(f30) V(f31)
++
++#define FLOAT_REGISTERS DOUBLE_REGISTERS
++#define SIMD128_REGISTERS(V)                              \
++  V(w0)  V(w1)  V(w2)  V(w3)  V(w4)  V(w5)  V(w6)  V(w7)  \
++  V(w8)  V(w9)  V(w10) V(w11) V(w12) V(w13) V(w14) V(w15) \
++  V(w16) V(w17) V(w18) V(w19) V(w20) V(w21) V(w22) V(w23) \
++  V(w24) V(w25) V(w26) V(w27) V(w28) V(w29) V(w30) V(w31)
++
++#define ALLOCATABLE_DOUBLE_REGISTERS(V)                   \
++  V(f0)  V(f1)  V(f2)  V(f3)  V(f4)  V(f5) V(f6) V(f7) \
++  V(f8) V(f9) V(f10) V(f11) V(f12) V(f13) V(f14) V(f15) V(f16) \
++  V(f17) V(f18) V(f19) V(f20) V(f21) V(f22) V(f23)
++// clang-format on
++
++// Note that the bit values must match those used in actual instruction
++// encoding.
++const int kNumRegs = 32;
++
++const RegList kJSCallerSaved = 1 << 4 |   // a0
++                               1 << 5 |   // a1
++                               1 << 6 |   // a2
++                               1 << 7 |   // a3
++                               1 << 8 |   // a4
++                               1 << 9 |   // a5
++                               1 << 10 |  // a6
++                               1 << 11 |  // a7
++                               1 << 12 |  // t0
++                               1 << 13 |  // t1
++                               1 << 14 |  // t2
++                               1 << 15 |  // t3
++                               1 << 16 |  // t4
++                               1 << 17 |  // t5
++                               1 << 20;   // t8
++
++const int kNumJSCallerSaved = 15;
++
++// Callee-saved registers preserved when switching from C to JavaScript.
++const RegList kCalleeSaved = 1 << 22 |  // fp
++                             1 << 23 |  // s0
++                             1 << 24 |  // s1
++                             1 << 25 |  // s2
++                             1 << 26 |  // s3
++                             1 << 27 |  // s4
++                             1 << 28 |  // s5
++                             1 << 29 |  // s6 (roots in Javascript code)
++                             1 << 30 |  // s7 (cp in Javascript code)
++                             1 << 31;   // s8
++
++const int kNumCalleeSaved = 10;
++
++const RegList kCalleeSavedFPU = 1 << 24 |  // f24
++                                1 << 25 |  // f25
++                                1 << 26 |  // f26
++                                1 << 27 |  // f27
++                                1 << 28 |  // f28
++                                1 << 29 |  // f29
++                                1 << 30 |  // f30
++                                1 << 31;   // f31
++
++const int kNumCalleeSavedFPU = 8;
++
++const RegList kCallerSavedFPU = 1 << 0 |   // f0
++                                1 << 1 |   // f1
++                                1 << 2 |   // f2
++                                1 << 3 |   // f3
++                                1 << 4 |   // f4
++                                1 << 5 |   // f5
++                                1 << 6 |   // f6
++                                1 << 7 |   // f7
++                                1 << 8 |   // f8
++                                1 << 9 |   // f9
++                                1 << 10 |  // f10
++                                1 << 11 |  // f11
++                                1 << 12 |  // f12
++                                1 << 13 |  // f13
++                                1 << 14 |  // f14
++                                1 << 15 |  // f15
++                                1 << 16 |  // f16
++                                1 << 17 |  // f17
++                                1 << 18 |  // f18
++                                1 << 19 |  // f19
++                                1 << 20 |  // f20
++                                1 << 21 |  // f21
++                                1 << 22 |  // f22
++                                1 << 23;   // f23
++
++// Number of registers for which space is reserved in safepoints. Must be a
++// multiple of 8.
++const int kNumSafepointRegisters = 32;
++
++// Define the list of registers actually saved at safepoints.
++// Note that the number of saved registers may be smaller than the reserved
++// space, i.e. kNumSafepointSavedRegisters <= kNumSafepointRegisters.
++const RegList kSafepointSavedRegisters = kJSCallerSaved | kCalleeSaved;
++const int kNumSafepointSavedRegisters = kNumJSCallerSaved + kNumCalleeSaved;
++
++const int kUndefIndex = -1;
++// Map with indexes on stack that corresponds to codes of saved registers.
++const int kSafepointRegisterStackIndexMap[kNumRegs] = {kUndefIndex,  // zero_reg
++                                                       kUndefIndex,  // ra
++                                                       kUndefIndex,  // gp
++                                                       kUndefIndex,  // sp
++                                                       0,            // a0
++                                                       1,            // a1
++                                                       2,            // a2
++                                                       3,            // a3
++                                                       4,            // a4
++                                                       5,            // a5
++                                                       6,            // a6
++                                                       7,            // a7
++                                                       8,            // t0
++                                                       9,            // t1
++                                                       10,           // t2
++                                                       11,           // t3
++                                                       12,           // t4
++                                                       13,           // t5
++                                                       kUndefIndex,  // t6
++                                                       kUndefIndex,  // t7
++                                                       14,           // t8
++                                                       kUndefIndex,  // tp
++                                                       15,           // fp
++                                                       16,           // s0
++                                                       17,           // s1
++                                                       28,           // s2
++                                                       29,           // s3
++                                                       20,           // s4
++                                                       21,           // s5
++                                                       22,           // s6
++                                                       23,           // s7
++                                                       24};          // s8
++
++// CPU Registers.
++//
++// 1) We would prefer to use an enum, but enum values are assignment-
++// compatible with int, which has caused code-generation bugs.
++//
++// 2) We would prefer to use a class instead of a struct but we don't like
++// the register initialization to depend on the particular initialization
++// order (which appears to be different on OS X, Linux, and Windows for the
++// installed versions of C++ we tried). Using a struct permits C-style
++// "initialization". Also, the Register objects cannot be const as this
++// forces initialization stubs in MSVC, making us dependent on initialization
++// order.
++//
++// 3) By not using an enum, we are possibly preventing the compiler from
++// doing certain constant folds, which may significantly reduce the
++// code generated for some assembly instructions (because they boil down
++// to a few constants). If this is a problem, we could change the code
++// such that we use an enum in optimized mode, and the struct in debug
++// mode. This way we get the compile-time error checking in debug mode
++// and best performance in optimized code.
++
++// -----------------------------------------------------------------------------
++// Implementation of Register and FPURegister.
++
++enum RegisterCode {
++#define REGISTER_CODE(R) kRegCode_##R,
++  GENERAL_REGISTERS(REGISTER_CODE)
++#undef REGISTER_CODE
++      kRegAfterLast
++};
++
++class Register : public RegisterBase<Register, kRegAfterLast> {
++ public:
++  static constexpr int kMantissaOffset = 0;
++  static constexpr int kExponentOffset = 4;
++
++ private:
++  friend class RegisterBase;
++  explicit constexpr Register(int code) : RegisterBase(code) {}
++};
++
++// s7: context register
++// s3: scratch register
++// s4: scratch register 2
++#define DECLARE_REGISTER(R) \
++  constexpr Register R = Register::from_code(kRegCode_##R);
++GENERAL_REGISTERS(DECLARE_REGISTER)
++#undef DECLARE_REGISTER
++
++constexpr Register no_reg = Register::no_reg();
++
++int ToNumber(Register reg);
++
++Register ToRegister(int num);
++
++constexpr bool kPadArguments = false;
++constexpr bool kSimpleFPAliasing = true;
++constexpr bool kSimdMaskRegisters = false;
++
++enum DoubleRegisterCode {
++#define REGISTER_CODE(R) kDoubleCode_##R,
++  DOUBLE_REGISTERS(REGISTER_CODE)
++#undef REGISTER_CODE
++      kDoubleAfterLast
++};
++
++// Coprocessor register.
++class FPURegister : public RegisterBase<FPURegister, kDoubleAfterLast> {
++ public:
++  FPURegister low() const {
++    // TODO(plind): Create DCHECK for FR=0 mode. This usage suspect for FR=1.
++    // Find low reg of a Double-reg pair, which is the reg itself.
++    DCHECK_EQ(code() % 2, 0);  // Specified Double reg must be even.
++    return FPURegister::from_code(code());
++  }
++
++ private:
++  friend class RegisterBase;
++  explicit constexpr FPURegister(int code) : RegisterBase(code) {}
++};
++
++enum CFRegister { FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7 };
++
++using FloatRegister = FPURegister;
++
++using DoubleRegister = FPURegister;
++
++// TODO here only for build success
++using Simd128Register = FPURegister;
++
++#define DECLARE_DOUBLE_REGISTER(R) \
++  constexpr DoubleRegister R = DoubleRegister::from_code(kDoubleCode_##R);
++DOUBLE_REGISTERS(DECLARE_DOUBLE_REGISTER)
++#undef DECLARE_DOUBLE_REGISTER
++
++constexpr DoubleRegister no_dreg = DoubleRegister::no_reg();
++
++// Register aliases.
++// cp is assumed to be a callee saved register.
++constexpr Register kRootRegister = s6;
++constexpr Register cp = s7;
++constexpr Register kScratchReg = s3;
++constexpr Register kScratchReg2 = s4;
++constexpr DoubleRegister kScratchDoubleReg = f30;
++// FPU zero reg is often used to hold 0.0, but it's not hardwired to 0.0.
++constexpr DoubleRegister kDoubleRegZero = f28;
++
++// FPU (coprocessor 1) control registers.
++// Currently only FCSR0 is implemented.
++// TODO fscr0 fcsr1 fcsr2 fscsr3
++struct FPUControlRegister {
++  bool is_valid() const { return reg_code == kFCSRRegister; }
++  bool is(FPUControlRegister creg) const { return reg_code == creg.reg_code; }
++  int code() const {
++    DCHECK(is_valid());
++    return reg_code;
++  }
++  int bit() const {
++    DCHECK(is_valid());
++    return 1 << reg_code;
++  }
++  void setcode(int f) {
++    reg_code = f;
++    DCHECK(is_valid());
++  }
++  // Unfortunately we can't make this private in a struct.
++  int reg_code;
++};
++
++constexpr FPUControlRegister no_fpucreg = {kInvalidFPUControlRegister};
++constexpr FPUControlRegister FCSR = {kFCSRRegister};
++
++// Define {RegisterName} methods for the register types.
++DEFINE_REGISTER_NAMES(Register, GENERAL_REGISTERS)
++DEFINE_REGISTER_NAMES(FPURegister, DOUBLE_REGISTERS)
++
++// Give alias names to registers for calling conventions.
++constexpr Register kReturnRegister0 = a0;
++constexpr Register kReturnRegister1 = a1;
++constexpr Register kReturnRegister2 = a2;
++constexpr Register kJSFunctionRegister = a1;
++constexpr Register kContextRegister = s7;
++constexpr Register kAllocateSizeRegister = a0;
++constexpr Register kSpeculationPoisonRegister = t3;
++constexpr Register kInterpreterAccumulatorRegister = a0;
++constexpr Register kInterpreterBytecodeOffsetRegister = t0;
++constexpr Register kInterpreterBytecodeArrayRegister = t1;
++constexpr Register kInterpreterDispatchTableRegister = t2;
++
++constexpr Register kJavaScriptCallArgCountRegister = a0;
++constexpr Register kJavaScriptCallCodeStartRegister = a2;
++constexpr Register kJavaScriptCallTargetRegister = kJSFunctionRegister;
++constexpr Register kJavaScriptCallNewTargetRegister = a3;
++constexpr Register kJavaScriptCallExtraArg1Register = a2;
++
++constexpr Register kOffHeapTrampolineRegister = t7;
++constexpr Register kRuntimeCallFunctionRegister = a1;
++constexpr Register kRuntimeCallArgCountRegister = a0;
++constexpr Register kRuntimeCallArgvRegister = a2;
++constexpr Register kWasmInstanceRegister = a0;
++constexpr Register kWasmCompileLazyFuncIndexRegister = t0;
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_CODEGEN_LA64_REGISTER_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h b/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h
+index 01175e585e9..9c2fa9e3108 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h
++++ b/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h
+@@ -49,6 +49,9 @@ enum AllocationFlags {
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/codegen/mips64/constants-mips64.h"
+ #include "src/codegen/mips64/macro-assembler-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/la64/macro-assembler-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/codegen/s390/constants-s390.h"
+ #include "src/codegen/s390/macro-assembler-s390.h"
+diff --git a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc
+index 37a05585c4b..cafcfef81d1 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc
++++ b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc
+@@ -996,7 +996,7 @@ void Assembler::next(Label* L, bool is_internal) {
+ }
+ 
+ bool Assembler::is_near(Label* L) {
+-  DCHECK(L->is_bound());
++  if (L == nullptr || !L->is_bound()) return true;
+   return pc_offset() - L->pos() < kMaxBranchOffset - 4 * kInstrSize;
+ }
+ 
+diff --git a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h
+index f70e46f81b3..c585840a7ad 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h
++++ b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h
+@@ -1864,6 +1864,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
+   // instruction. We use this information to trigger different mode of
+   // branch instruction generation, where we use jump instructions rather
+   // than regular branch instructions.
++  // TODO can this be optimied??????
+   bool trampoline_emitted_;
+   static constexpr int kInvalidSlotPos = -1;
+ 
+diff --git a/src/3rdparty/chromium/v8/src/codegen/register-arch.h b/src/3rdparty/chromium/v8/src/codegen/register-arch.h
+index 21a72330169..5ee6c4683d9 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/register-arch.h
++++ b/src/3rdparty/chromium/v8/src/codegen/register-arch.h
+@@ -22,6 +22,8 @@
+ #include "src/codegen/mips/register-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/codegen/mips64/register-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/codegen/la64/register-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/codegen/s390/register-s390.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc b/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc
+index 5752b463392..2c4bb1426a1 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc
++++ b/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc
+@@ -58,6 +58,8 @@ static int get_num_allocatable_double_registers() {
+       kMaxAllocatableDoubleRegisterCount;
+ #elif V8_TARGET_ARCH_MIPS64
+       kMaxAllocatableDoubleRegisterCount;
++#elif V8_TARGET_ARCH_LA64
++      kMaxAllocatableDoubleRegisterCount;
+ #elif V8_TARGET_ARCH_PPC
+       kMaxAllocatableDoubleRegisterCount;
+ #elif V8_TARGET_ARCH_PPC64
+diff --git a/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc b/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc
+index 9f079789326..ccbd7a355c2 100644
+--- a/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc
++++ b/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc
+@@ -329,7 +329,8 @@ bool RelocInfo::OffHeapTargetIsCodedSpecially() {
+   return false;
+ #elif defined(V8_TARGET_ARCH_IA32) || defined(V8_TARGET_ARCH_MIPS) || \
+     defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_PPC) ||  \
+-    defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_S390)
++    defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_S390) ||  \
++    defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_LA64)
+   return true;
+ #endif
+ }
+diff --git a/src/3rdparty/chromium/v8/src/common/globals.h b/src/3rdparty/chromium/v8/src/common/globals.h
+index c79b3b633cd..05078cb3f29 100644
+--- a/src/3rdparty/chromium/v8/src/common/globals.h
++++ b/src/3rdparty/chromium/v8/src/common/globals.h
+@@ -58,6 +58,9 @@ constexpr int GB = MB * 1024;
+ #if (V8_TARGET_ARCH_S390 && !V8_HOST_ARCH_S390)
+ #define USE_SIMULATOR 1
+ #endif
++#if (V8_TARGET_ARCH_LA64 && !V8_HOST_ARCH_LA64)
++#define USE_SIMULATOR 1
++#endif
+ #endif
+ 
+ // Determine whether the architecture uses an embedded constant pool
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h
+index 84d5d249b83..353594436e4 100644
+--- a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h
+@@ -17,6 +17,8 @@
+ #include "src/compiler/backend/mips/instruction-codes-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/compiler/backend/mips64/instruction-codes-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/compiler/backend/la64/instruction-codes-la64.h"
+ #elif V8_TARGET_ARCH_X64
+ #include "src/compiler/backend/x64/instruction-codes-x64.h"
+ #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc
+index 7d72dbbf2d0..628ba0e7c53 100644
+--- a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc
+@@ -2573,7 +2573,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
+ #endif  // !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS
+ 
+ #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS64 && \
+-    !V8_TARGET_ARCH_S390 && !V8_TARGET_ARCH_PPC64
++    !V8_TARGET_ARCH_S390 && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_LA64
+ void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }
+ 
+ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+@@ -2598,7 +2598,8 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+   UNIMPLEMENTED();
+ }
+ #endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC64
+-        // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
++        // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390 &&
++        // !V8_TARGET_ARCH_LA64
+ 
+ #if !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
+ // This is only needed on 32-bit to split the 64-bit value into two operands.
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc
+new file mode 100644
+index 00000000000..29bfffb5f63
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc
+@@ -0,0 +1,2847 @@
++// Copyright 2014 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "src/codegen/assembler-inl.h"
++#include "src/codegen/callable.h"
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/codegen/optimized-compilation-info.h"
++#include "src/compiler/backend/code-generator-impl.h"
++#include "src/compiler/backend/code-generator.h"
++#include "src/compiler/backend/gap-resolver.h"
++#include "src/compiler/node-matchers.h"
++#include "src/compiler/osr.h"
++#include "src/heap/heap-inl.h"  // crbug.com/v8/8499
++#include "src/wasm/wasm-code-manager.h"
++
++namespace v8 {
++namespace internal {
++namespace compiler {
++
++#define __ tasm()->
++
++// TODO(plind): consider renaming these macros.
++#define TRACE_MSG(msg)                                                      \
++  PrintF("code_gen: \'%s\' in function %s at line %d\n", msg, __FUNCTION__, \
++         __LINE__)
++
++#define TRACE_UNIMPL()                                                       \
++  PrintF("UNIMPLEMENTED code_generator_la64: %s at line %d\n", __FUNCTION__, \
++         __LINE__)
++
++// Adds La64-specific methods to convert InstructionOperands.
++class La64OperandConverter final : public InstructionOperandConverter {
++ public:
++  La64OperandConverter(CodeGenerator* gen, Instruction* instr)
++      : InstructionOperandConverter(gen, instr) {}
++
++  FloatRegister OutputSingleRegister(size_t index = 0) {
++    return ToSingleRegister(instr_->OutputAt(index));
++  }
++
++  FloatRegister InputSingleRegister(size_t index) {
++    return ToSingleRegister(instr_->InputAt(index));
++  }
++
++  FloatRegister ToSingleRegister(InstructionOperand* op) {
++    // Single (Float) and Double register namespace is same on LA64,
++    // both are typedefs of FPURegister.
++    return ToDoubleRegister(op);
++  }
++
++  Register InputOrZeroRegister(size_t index) {
++    if (instr_->InputAt(index)->IsImmediate()) {
++      DCHECK_EQ(0, InputInt32(index));
++      return zero_reg;
++    }
++    return InputRegister(index);
++  }
++
++  DoubleRegister InputOrZeroDoubleRegister(size_t index) {
++    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
++
++    return InputDoubleRegister(index);
++  }
++
++  DoubleRegister InputOrZeroSingleRegister(size_t index) {
++    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
++
++    return InputSingleRegister(index);
++  }
++
++  Operand InputImmediate(size_t index) {
++    Constant constant = ToConstant(instr_->InputAt(index));
++    switch (constant.type()) {
++      case Constant::kInt32:
++        return Operand(constant.ToInt32());
++      case Constant::kInt64:
++        return Operand(constant.ToInt64());
++      case Constant::kFloat32:
++        return Operand::EmbeddedNumber(constant.ToFloat32());
++      case Constant::kFloat64:
++        return Operand::EmbeddedNumber(constant.ToFloat64().value());
++      case Constant::kExternalReference:
++      case Constant::kCompressedHeapObject:
++      case Constant::kHeapObject:
++        // TODO(plind): Maybe we should handle ExtRef & HeapObj here?
++        //    maybe not done on arm due to const pool ??
++        break;
++      case Constant::kDelayedStringConstant:
++        return Operand::EmbeddedStringConstant(
++            constant.ToDelayedStringConstant());
++      case Constant::kRpoNumber:
++        UNREACHABLE();  // TODO(titzer): RPO immediates on la64?
++        break;
++    }
++    UNREACHABLE();
++  }
++
++  Operand InputOperand(size_t index) {
++    InstructionOperand* op = instr_->InputAt(index);
++    if (op->IsRegister()) {
++      return Operand(ToRegister(op));
++    }
++    return InputImmediate(index);
++  }
++
++  MemOperand MemoryOperand(size_t* first_index) {
++    const size_t index = *first_index;
++    switch (AddressingModeField::decode(instr_->opcode())) {
++      case kMode_None:
++        break;
++      case kMode_MRI:
++        *first_index += 2;
++        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
++      case kMode_MRR:
++        *first_index += 2;
++        return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
++    }
++    UNREACHABLE();
++  }
++
++  MemOperand MemoryOperand(size_t index = 0) { return MemoryOperand(&index); }
++
++  MemOperand ToMemOperand(InstructionOperand* op) const {
++    DCHECK_NOT_NULL(op);
++    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
++    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
++  }
++
++  MemOperand SlotToMemOperand(int slot) const {
++    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
++    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
++  }
++};
++
++static inline bool HasRegisterInput(Instruction* instr, size_t index) {
++  return instr->InputAt(index)->IsRegister();
++}
++
++namespace {
++
++class OutOfLineRecordWrite final : public OutOfLineCode {
++ public:
++  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
++                       Register value, Register scratch0, Register scratch1,
++                       RecordWriteMode mode, StubCallMode stub_mode)
++      : OutOfLineCode(gen),
++        object_(object),
++        index_(index),
++        value_(value),
++        scratch0_(scratch0),
++        scratch1_(scratch1),
++        mode_(mode),
++        stub_mode_(stub_mode),
++        must_save_lr_(!gen->frame_access_state()->has_frame()),
++        zone_(gen->zone()) {}
++
++  void Generate() final {
++    if (mode_ > RecordWriteMode::kValueIsPointer) {
++      __ JumpIfSmi(value_, exit());
++    }
++    __ CheckPageFlag(value_, scratch0_,
++                     MemoryChunk::kPointersToHereAreInterestingMask, eq,
++                     exit());
++    __ Add_d(scratch1_, object_, index_);
++    RememberedSetAction const remembered_set_action =
++        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
++                                             : OMIT_REMEMBERED_SET;
++    SaveFPRegsMode const save_fp_mode =
++        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
++    if (must_save_lr_) {
++      // We need to save and restore ra if the frame was elided.
++      __ Push(ra);
++    }
++    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
++      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
++    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
++      // A direct call to a wasm runtime stub defined in this module.
++      // Just encode the stub index. This will be patched when the code
++      // is added to the native module and copied into wasm code space.
++      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
++                             save_fp_mode, wasm::WasmCode::kRecordWrite);
++    } else {
++      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
++                             save_fp_mode);
++    }
++    if (must_save_lr_) {
++      __ Pop(ra);
++    }
++  }
++
++ private:
++  Register const object_;
++  Register const index_;
++  Register const value_;
++  Register const scratch0_;
++  Register const scratch1_;
++  RecordWriteMode const mode_;
++  StubCallMode const stub_mode_;
++  bool must_save_lr_;
++  Zone* zone_;
++};
++
++#define CREATE_OOL_CLASS(ool_name, tasm_ool_name, T)                 \
++  class ool_name final : public OutOfLineCode {                      \
++   public:                                                           \
++    ool_name(CodeGenerator* gen, T dst, T src1, T src2)              \
++        : OutOfLineCode(gen), dst_(dst), src1_(src1), src2_(src2) {} \
++                                                                     \
++    void Generate() final { __ tasm_ool_name(dst_, src1_, src2_); }  \
++                                                                     \
++   private:                                                          \
++    T const dst_;                                                    \
++    T const src1_;                                                   \
++    T const src2_;                                                   \
++  }
++
++CREATE_OOL_CLASS(OutOfLineFloat32Max, Float32MaxOutOfLine, FPURegister);
++CREATE_OOL_CLASS(OutOfLineFloat32Min, Float32MinOutOfLine, FPURegister);
++CREATE_OOL_CLASS(OutOfLineFloat64Max, Float64MaxOutOfLine, FPURegister);
++CREATE_OOL_CLASS(OutOfLineFloat64Min, Float64MinOutOfLine, FPURegister);
++
++#undef CREATE_OOL_CLASS
++
++Condition FlagsConditionToConditionCmp(FlagsCondition condition) {
++  switch (condition) {
++    case kEqual:
++      return eq;
++    case kNotEqual:
++      return ne;
++    case kSignedLessThan:
++      return lt;
++    case kSignedGreaterThanOrEqual:
++      return ge;
++    case kSignedLessThanOrEqual:
++      return le;
++    case kSignedGreaterThan:
++      return gt;
++    case kUnsignedLessThan:
++      return lo;
++    case kUnsignedGreaterThanOrEqual:
++      return hs;
++    case kUnsignedLessThanOrEqual:
++      return ls;
++    case kUnsignedGreaterThan:
++      return hi;
++    case kUnorderedEqual:
++    case kUnorderedNotEqual:
++      break;
++    default:
++      break;
++  }
++  UNREACHABLE();
++}
++
++Condition FlagsConditionToConditionTst(FlagsCondition condition) {
++  switch (condition) {
++    case kNotEqual:
++      return ne;
++    case kEqual:
++      return eq;
++    default:
++      break;
++  }
++  UNREACHABLE();
++}
++
++Condition FlagsConditionToConditionOvf(FlagsCondition condition) {
++  switch (condition) {
++    case kOverflow:
++      return ne;
++    case kNotOverflow:
++      return eq;
++    default:
++      break;
++  }
++  UNREACHABLE();
++}
++
++FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
++                                             FlagsCondition condition) {
++  switch (condition) {
++    case kEqual:
++      *predicate = true;
++      return CEQ;
++    case kNotEqual:
++      *predicate = false;
++      return CEQ;
++    case kUnsignedLessThan:
++      *predicate = true;
++      return CLT;
++    case kUnsignedGreaterThanOrEqual:
++      *predicate = false;
++      return CLT;
++    case kUnsignedLessThanOrEqual:
++      *predicate = true;
++      return CLE;
++    case kUnsignedGreaterThan:
++      *predicate = false;
++      return CLE;
++    case kUnorderedEqual:
++    case kUnorderedNotEqual:
++      *predicate = true;
++      break;
++    default:
++      *predicate = true;
++      break;
++  }
++  UNREACHABLE();
++}
++
++void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
++                                   InstructionCode opcode, Instruction* instr,
++                                   La64OperandConverter const& i) {
++  const MemoryAccessMode access_mode =
++      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
++  if (access_mode == kMemoryAccessPoisoned) {
++    Register value = i.OutputRegister();
++    codegen->tasm()->And(value, value, kSpeculationPoisonRegister);
++  }
++}
++
++}  // namespace
++
++#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)          \
++  do {                                                   \
++    __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
++    __ dbar(0);                                          \
++  } while (0)
++
++// TODO remove second dbar?
++#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr)               \
++  do {                                                         \
++    __ dbar(0);                                                \
++    __ asm_instr(i.InputOrZeroRegister(2), i.MemoryOperand()); \
++    __ dbar(0);                                                \
++  } while (0)
++
++// only use for sub_w and sub_d
++#define ASSEMBLE_ATOMIC_BINOP(load_linked, store_conditional, bin_instr)       \
++  do {                                                                         \
++    Label binop;                                                               \
++    __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
++    __ dbar(0);                                                                \
++    __ bind(&binop);                                                           \
++    __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));     \
++    __ bin_instr(i.TempRegister(1), i.OutputRegister(0),                       \
++                 Operand(i.InputRegister(2)));                                 \
++    __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
++    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));          \
++    __ dbar(0);                                                                \
++  } while (0)
++
++// TODO remove second dbar?
++#define ASSEMBLE_ATOMIC_BINOP_EXT(load_linked, store_conditional, sign_extend, \
++                                  size, bin_instr, representation)             \
++  do {                                                                         \
++    Label binop;                                                               \
++    __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
++    if (representation == 32) {                                                \
++      __ andi(i.TempRegister(3), i.TempRegister(0), 0x3);                      \
++    } else {                                                                   \
++      DCHECK_EQ(representation, 64);                                           \
++      __ andi(i.TempRegister(3), i.TempRegister(0), 0x7);                      \
++    }                                                                          \
++    __ Sub_d(i.TempRegister(0), i.TempRegister(0),                             \
++             Operand(i.TempRegister(3)));                                      \
++    __ slli_w(i.TempRegister(3), i.TempRegister(3), 3);                        \
++    __ dbar(0);                                                                \
++    __ bind(&binop);                                                           \
++    __ load_linked(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));       \
++    __ ExtractBits(i.OutputRegister(0), i.TempRegister(1), i.TempRegister(3),  \
++                   size, sign_extend);                                         \
++    __ bin_instr(i.TempRegister(2), i.OutputRegister(0),                       \
++                 Operand(i.InputRegister(2)));                                 \
++    __ InsertBits(i.TempRegister(1), i.TempRegister(2), i.TempRegister(3),     \
++                  size);                                                       \
++    __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
++    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));          \
++    __ dbar(0);                                                                \
++  } while (0)
++
++// TODO remove second dbar?
++#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(                                  \
++    load_linked, store_conditional, sign_extend, size, representation)         \
++  do {                                                                         \
++    Label exchange;                                                            \
++    __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
++    if (representation == 32) {                                                \
++      __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                      \
++    } else {                                                                   \
++      DCHECK_EQ(representation, 64);                                           \
++      __ andi(i.TempRegister(1), i.TempRegister(0), 0x7);                      \
++    }                                                                          \
++    __ Sub_d(i.TempRegister(0), i.TempRegister(0),                             \
++             Operand(i.TempRegister(1)));                                      \
++    __ slli_w(i.TempRegister(1), i.TempRegister(1), 3);                        \
++    __ dbar(0);                                                                \
++    __ bind(&exchange);                                                        \
++    __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));       \
++    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
++                   size, sign_extend);                                         \
++    __ InsertBits(i.TempRegister(2), i.InputRegister(2), i.TempRegister(1),    \
++                  size);                                                       \
++    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
++    __ BranchShort(&exchange, eq, i.TempRegister(2), Operand(zero_reg));       \
++    __ dbar(0);                                                                \
++  } while (0)
++
++// TODO remove second dbar?
++#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_linked,                  \
++                                                 store_conditional)            \
++  do {                                                                         \
++    Label compareExchange;                                                     \
++    Label exit;                                                                \
++    __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
++    __ dbar(0);                                                                \
++    __ bind(&compareExchange);                                                 \
++    __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));     \
++    __ BranchShort(&exit, ne, i.InputRegister(2),                              \
++                   Operand(i.OutputRegister(0)));                              \
++    __ mov(i.TempRegister(2), i.InputRegister(3));                             \
++    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
++    __ BranchShort(&compareExchange, eq, i.TempRegister(2),                    \
++                   Operand(zero_reg));                                         \
++    __ bind(&exit);                                                            \
++    __ dbar(0);                                                                \
++  } while (0)
++
++// TODO remove second dbar?
++#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(                          \
++    load_linked, store_conditional, sign_extend, size, representation)         \
++  do {                                                                         \
++    Label compareExchange;                                                     \
++    Label exit;                                                                \
++    __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
++    if (representation == 32) {                                                \
++      __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                      \
++    } else {                                                                   \
++      DCHECK_EQ(representation, 64);                                           \
++      __ andi(i.TempRegister(1), i.TempRegister(0), 0x7);                      \
++    }                                                                          \
++    __ Sub_d(i.TempRegister(0), i.TempRegister(0),                             \
++             Operand(i.TempRegister(1)));                                      \
++    __ slli_w(i.TempRegister(1), i.TempRegister(1), 3);                        \
++    __ dbar(0);                                                                \
++    __ bind(&compareExchange);                                                 \
++    __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));       \
++    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
++                   size, sign_extend);                                         \
++    __ ExtractBits(i.InputRegister(2), i.InputRegister(2), i.TempRegister(1),  \
++                   size, sign_extend);                                         \
++    __ BranchShort(&exit, ne, i.InputRegister(2),                              \
++                   Operand(i.OutputRegister(0)));                              \
++    __ InsertBits(i.TempRegister(2), i.InputRegister(3), i.TempRegister(1),    \
++                  size);                                                       \
++    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
++    __ BranchShort(&compareExchange, eq, i.TempRegister(2),                    \
++                   Operand(zero_reg));                                         \
++    __ bind(&exit);                                                            \
++    __ dbar(0);                                                                \
++  } while (0)
++
++#define ASSEMBLE_IEEE754_BINOP(name)                                        \
++  do {                                                                      \
++    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
++    __ PrepareCallCFunction(0, 2, kScratchReg);                             \
++    __ MovToFloatParameters(i.InputDoubleRegister(0),                       \
++                            i.InputDoubleRegister(1));                      \
++    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
++    /* Move the result in the double result register. */                    \
++    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
++  } while (0)
++
++#define ASSEMBLE_IEEE754_UNOP(name)                                         \
++  do {                                                                      \
++    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
++    __ PrepareCallCFunction(0, 1, kScratchReg);                             \
++    __ MovToFloatParameter(i.InputDoubleRegister(0));                       \
++    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
++    /* Move the result in the double result register. */                    \
++    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
++  } while (0)
++
++#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)                     \
++  do {                                                          \
++    __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \
++          i.InputSimd128Register(1));                           \
++  } while (0)
++
++void CodeGenerator::AssembleDeconstructFrame() {
++  __ mov(sp, fp);
++  __ Pop(ra, fp);
++}
++
++void CodeGenerator::AssemblePrepareTailCall() {
++  if (frame_access_state()->has_frame()) {
++    __ Ld_d(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
++    __ Ld_d(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
++  }
++  frame_access_state()->SetFrameAccessToSP();
++}
++
++void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
++                                                     Register scratch1,
++                                                     Register scratch2,
++                                                     Register scratch3) {
++  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
++  Label done;
++
++  // Check if current frame is an arguments adaptor frame.
++  __ Ld_d(scratch3, MemOperand(fp, StandardFrameConstants::kContextOffset));
++  __ Branch(&done, ne, scratch3,
++            Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
++
++  // Load arguments count from current arguments adaptor frame (note, it
++  // does not include receiver).
++  Register caller_args_count_reg = scratch1;
++  __ Ld_d(caller_args_count_reg,
++          MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
++  __ SmiUntag(caller_args_count_reg);
++
++  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
++  __ bind(&done);
++}
++
++namespace {
++
++void AdjustStackPointerForTailCall(TurboAssembler* tasm,
++                                   FrameAccessState* state,
++                                   int new_slot_above_sp,
++                                   bool allow_shrinkage = true) {
++  int current_sp_offset = state->GetSPToFPSlotCount() +
++                          StandardFrameConstants::kFixedSlotCountAboveFp;
++  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
++  if (stack_slot_delta > 0) {
++    tasm->Sub_d(sp, sp, stack_slot_delta * kSystemPointerSize);
++    state->IncreaseSPDelta(stack_slot_delta);
++  } else if (allow_shrinkage && stack_slot_delta < 0) {
++    tasm->Add_d(sp, sp, -stack_slot_delta * kSystemPointerSize);
++    state->IncreaseSPDelta(stack_slot_delta);
++  }
++}
++
++}  // namespace
++
++void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
++                                              int first_unused_stack_slot) {
++  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
++                                first_unused_stack_slot, false);
++}
++
++void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
++                                             int first_unused_stack_slot) {
++  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
++                                first_unused_stack_slot);
++}
++
++// Check that {kJavaScriptCallCodeStartRegister} is correct.
++void CodeGenerator::AssembleCodeStartRegisterCheck() {
++  __ ComputeCodeStartAddress(kScratchReg);
++  __ Assert(eq, AbortReason::kWrongFunctionCodeStart,
++            kJavaScriptCallCodeStartRegister, Operand(kScratchReg));
++}
++
++// Check if the code object is marked for deoptimization. If it is, then it
++// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
++// to:
++//    1. read from memory the word that contains that bit, which can be found in
++//       the flags in the referenced {CodeDataContainer} object;
++//    2. test kMarkedForDeoptimizationBit in those flags; and
++//    3. if it is not zero then it jumps to the builtin.
++void CodeGenerator::BailoutIfDeoptimized() {
++  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
++  __ Ld_d(kScratchReg, MemOperand(kJavaScriptCallCodeStartRegister, offset));
++  __ Ld_w(kScratchReg,
++          FieldMemOperand(kScratchReg,
++                          CodeDataContainer::kKindSpecificFlagsOffset));
++  __ And(kScratchReg, kScratchReg,
++         Operand(1 << Code::kMarkedForDeoptimizationBit));
++  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
++          RelocInfo::CODE_TARGET, ne, kScratchReg, Operand(zero_reg));
++}
++
++void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
++  // Calculate a mask which has all bits set in the normal case, but has all
++  // bits cleared if we are speculatively executing the wrong PC.
++  __ li(kSpeculationPoisonRegister, -1);
++  __ ComputeCodeStartAddress(kScratchReg);
++  __ sub_d(kScratchReg, kScratchReg, kJavaScriptCallCodeStartRegister);
++  __ maskeqz(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
++             kScratchReg);
++}
++
++void CodeGenerator::AssembleRegisterArgumentPoisoning() {
++  __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
++  __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
++  __ And(sp, sp, kSpeculationPoisonRegister);
++}
++
++// Assembles an instruction after register allocation, producing machine code.
++CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
++    Instruction* instr) {
++  La64OperandConverter i(this, instr);
++  InstructionCode opcode = instr->opcode();
++  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
++  switch (arch_opcode) {
++    case kArchCallCodeObject: {
++      if (instr->InputAt(0)->IsImmediate()) {
++        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
++      } else {
++        Register reg = i.InputRegister(0);
++        DCHECK_IMPLIES(
++            HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
++            reg == kJavaScriptCallCodeStartRegister);
++        __ addi_d(reg, reg, Code::kHeaderSize - kHeapObjectTag);
++        __ Call(reg);
++      }
++      RecordCallPosition(instr);
++      frame_access_state()->ClearSPDelta();
++      break;
++    }
++    case kArchCallBuiltinPointer: {
++      DCHECK(!instr->InputAt(0)->IsImmediate());
++      Register builtin_index = i.InputRegister(0);
++      __ CallBuiltinByIndex(builtin_index);
++      RecordCallPosition(instr);
++      frame_access_state()->ClearSPDelta();
++      break;
++    }
++    case kArchCallWasmFunction: {
++      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
++        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
++                                         i.TempRegister(0), i.TempRegister(1),
++                                         i.TempRegister(2));
++      }
++      if (instr->InputAt(0)->IsImmediate()) {
++        Constant constant = i.ToConstant(instr->InputAt(0));
++        Address wasm_code = static_cast<Address>(constant.ToInt64());
++        __ Call(wasm_code, constant.rmode());
++      } else {
++        __ addi_d(kScratchReg, i.InputRegister(0), 0);
++        __ Call(kScratchReg);
++      }
++      RecordCallPosition(instr);
++      frame_access_state()->ClearSPDelta();
++      break;
++    }
++    case kArchTailCallCodeObjectFromJSFunction:
++    case kArchTailCallCodeObject: {
++      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
++        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
++                                         i.TempRegister(0), i.TempRegister(1),
++                                         i.TempRegister(2));
++      }
++      if (instr->InputAt(0)->IsImmediate()) {
++        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
++      } else {
++        Register reg = i.InputRegister(0);
++        DCHECK_IMPLIES(
++            HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
++            reg == kJavaScriptCallCodeStartRegister);
++        __ addi_d(reg, reg, Code::kHeaderSize - kHeapObjectTag);
++        __ Jump(reg);
++      }
++      frame_access_state()->ClearSPDelta();
++      frame_access_state()->SetFrameAccessToDefault();
++      break;
++    }
++    case kArchTailCallWasm: {
++      if (instr->InputAt(0)->IsImmediate()) {
++        Constant constant = i.ToConstant(instr->InputAt(0));
++        Address wasm_code = static_cast<Address>(constant.ToInt64());
++        __ Jump(wasm_code, constant.rmode());
++      } else {
++        __ addi_d(kScratchReg, i.InputRegister(0), 0);
++        __ Jump(kScratchReg);
++      }
++      frame_access_state()->ClearSPDelta();
++      frame_access_state()->SetFrameAccessToDefault();
++      break;
++    }
++    case kArchTailCallAddress: {
++      CHECK(!instr->InputAt(0)->IsImmediate());
++      Register reg = i.InputRegister(0);
++      DCHECK_IMPLIES(
++          HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
++          reg == kJavaScriptCallCodeStartRegister);
++      __ Jump(reg);
++      frame_access_state()->ClearSPDelta();
++      frame_access_state()->SetFrameAccessToDefault();
++      break;
++    }
++    case kArchCallJSFunction: {
++      Register func = i.InputRegister(0);
++      if (FLAG_debug_code) {
++        // Check the function's context matches the context argument.
++        __ Ld_d(kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
++        __ Assert(eq, AbortReason::kWrongFunctionContext, cp,
++                  Operand(kScratchReg));
++      }
++      static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
++      __ Ld_d(a2, FieldMemOperand(func, JSFunction::kCodeOffset));
++      __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag));
++      __ Call(a2);
++      RecordCallPosition(instr);
++      frame_access_state()->ClearSPDelta();
++      break;
++    }
++    case kArchPrepareCallCFunction: {
++      int const num_parameters = MiscField::decode(instr->opcode());
++      __ PrepareCallCFunction(num_parameters, kScratchReg);
++      // Frame alignment requires using FP-relative frame addressing.
++      frame_access_state()->SetFrameAccessToFP();
++      break;
++    }
++    case kArchSaveCallerRegisters: {
++      fp_mode_ =
++          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
++      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
++      // kReturnRegister0 should have been saved before entering the stub.
++      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
++      DCHECK(IsAligned(bytes, kSystemPointerSize));
++      DCHECK_EQ(0, frame_access_state()->sp_delta());
++      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
++      DCHECK(!caller_registers_saved_);
++      caller_registers_saved_ = true;
++      break;
++    }
++    case kArchRestoreCallerRegisters: {
++      DCHECK(fp_mode_ ==
++             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
++      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
++      // Don't overwrite the returned value.
++      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
++      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
++      DCHECK_EQ(0, frame_access_state()->sp_delta());
++      DCHECK(caller_registers_saved_);
++      caller_registers_saved_ = false;
++      break;
++    }
++    case kArchPrepareTailCall:
++      AssemblePrepareTailCall();
++      break;
++    case kArchCallCFunction: {
++      int const num_parameters = MiscField::decode(instr->opcode());
++      Label start_call;
++      bool isWasmCapiFunction =
++          linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
++      // from start_call to return address.
++      int offset = __ root_array_available() ? 44 : 80;  // 11 or 20 instrs
++#if V8_HOST_ARCH_LA64
++      if (__ emit_debug_code()) {
++        offset += 12;  // see CallCFunction
++      }
++#endif
++      if (isWasmCapiFunction) {
++        // Put the return address in a stack slot.
++        // __ mov(kScratchReg, ra);
++        __ bind(&start_call);
++        __ pcaddi(t7, -4);  // __ nal();
++        //__ nop();
++        //__ Daddu(ra, ra, offset - 8);  // 8 = nop + nal
++        __ St_d(t7, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
++        // __ mov(ra, kScratchReg);
++      }
++      if (instr->InputAt(0)->IsImmediate()) {
++        ExternalReference ref = i.InputExternalReference(0);
++        __ CallCFunction(ref, num_parameters);
++      } else {
++        Register func = i.InputRegister(0);
++        __ CallCFunction(func, num_parameters);
++      }
++      if (isWasmCapiFunction) {
++        CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
++        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
++      }
++
++      frame_access_state()->SetFrameAccessToDefault();
++      // Ideally, we should decrement SP delta to match the change of stack
++      // pointer in CallCFunction. However, for certain architectures (e.g.
++      // ARM), there may be more strict alignment requirement, causing old SP
++      // to be saved on the stack. In those cases, we can not calculate the SP
++      // delta statically.
++      frame_access_state()->ClearSPDelta();
++      if (caller_registers_saved_) {
++        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
++        // Here, we assume the sequence to be:
++        //   kArchSaveCallerRegisters;
++        //   kArchCallCFunction;
++        //   kArchRestoreCallerRegisters;
++        int bytes =
++            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
++        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
++      }
++      break;
++    }
++    case kArchJmp:
++      AssembleArchJump(i.InputRpo(0));
++      break;
++    case kArchBinarySearchSwitch:
++      AssembleArchBinarySearchSwitch(instr);
++      break;
++      break;
++    case kArchTableSwitch:
++      AssembleArchTableSwitch(instr);
++      break;
++    case kArchAbortCSAAssert:
++      DCHECK(i.InputRegister(0) == a0);
++      {
++        // We don't actually want to generate a pile of code for this, so just
++        // claim there is a stack frame, without generating one.
++        FrameScope scope(tasm(), StackFrame::NONE);
++        __ Call(
++            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
++            RelocInfo::CODE_TARGET);
++      }
++      __ stop();
++      break;
++    case kArchDebugBreak:
++      __ DebugBreak();
++      break;
++    case kArchComment:
++      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
++      break;
++    case kArchNop:
++    case kArchThrowTerminator:
++      // don't emit code for nops.
++      break;
++    case kArchDeoptimize: {
++      DeoptimizationExit* exit =
++          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
++      CodeGenResult result = AssembleDeoptimizerCall(exit);
++      if (result != kSuccess) return result;
++      break;
++    }
++    case kArchRet:
++      AssembleReturn(instr->InputAt(0));
++      break;
++    case kArchStackPointerGreaterThan:
++      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
++      break;
++    case kArchStackCheckOffset:
++      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
++      break;
++    case kArchFramePointer:
++      __ mov(i.OutputRegister(), fp);
++      break;
++    case kArchParentFramePointer:
++      if (frame_access_state()->has_frame()) {
++        __ Ld_d(i.OutputRegister(), MemOperand(fp, 0));
++      } else {
++        __ mov(i.OutputRegister(), fp);
++      }
++      break;
++    case kArchTruncateDoubleToI:
++      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
++                           i.InputDoubleRegister(0), DetermineStubCallMode());
++      break;
++    case kArchStoreWithWriteBarrier: {
++      RecordWriteMode mode =
++          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
++      Register object = i.InputRegister(0);
++      Register index = i.InputRegister(1);
++      Register value = i.InputRegister(2);
++      Register scratch0 = i.TempRegister(0);
++      Register scratch1 = i.TempRegister(1);
++      auto ool = new (zone())
++          OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1,
++                               mode, DetermineStubCallMode());
++      __ Add_d(kScratchReg, object, index);
++      __ St_d(value, MemOperand(kScratchReg, 0));
++      __ CheckPageFlag(object, scratch0,
++                       MemoryChunk::kPointersFromHereAreInterestingMask, ne,
++                       ool->entry());
++      __ bind(ool->exit());
++      break;
++    }
++    case kArchStackSlot: {
++      FrameOffset offset =
++          frame_access_state()->GetFrameOffset(i.InputInt32(0));
++      Register base_reg = offset.from_stack_pointer() ? sp : fp;
++      __ Add_d(i.OutputRegister(), base_reg, Operand(offset.offset()));
++      int alignment = i.InputInt32(1);
++      DCHECK(alignment == 0 || alignment == 4 || alignment == 8 ||
++             alignment == 16);
++      if (FLAG_debug_code && alignment > 0) {
++        // Verify that the output_register is properly aligned
++        __ And(kScratchReg, i.OutputRegister(),
++               Operand(kSystemPointerSize - 1));
++        __ Assert(eq, AbortReason::kAllocationIsNotDoubleAligned, kScratchReg,
++                  Operand(zero_reg));
++      }
++      if (alignment == 2 * kSystemPointerSize) {
++        Label done;
++        __ Add_d(kScratchReg, base_reg, Operand(offset.offset()));
++        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
++        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
++        __ Add_d(i.OutputRegister(), i.OutputRegister(), kSystemPointerSize);
++        __ bind(&done);
++      } else if (alignment > 2 * kSystemPointerSize) {
++        Label done;
++        __ Add_d(kScratchReg, base_reg, Operand(offset.offset()));
++        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
++        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
++        __ li(kScratchReg2, alignment);
++        __ Sub_d(kScratchReg2, kScratchReg2, Operand(kScratchReg));
++        __ Add_d(i.OutputRegister(), i.OutputRegister(), kScratchReg2);
++        __ bind(&done);
++      }
++
++      break;
++    }
++    case kArchWordPoisonOnSpeculation:
++      __ And(i.OutputRegister(), i.InputRegister(0),
++             kSpeculationPoisonRegister);
++      break;
++    case kIeee754Float64Acos:
++      ASSEMBLE_IEEE754_UNOP(acos);
++      break;
++    case kIeee754Float64Acosh:
++      ASSEMBLE_IEEE754_UNOP(acosh);
++      break;
++    case kIeee754Float64Asin:
++      ASSEMBLE_IEEE754_UNOP(asin);
++      break;
++    case kIeee754Float64Asinh:
++      ASSEMBLE_IEEE754_UNOP(asinh);
++      break;
++    case kIeee754Float64Atan:
++      ASSEMBLE_IEEE754_UNOP(atan);
++      break;
++    case kIeee754Float64Atanh:
++      ASSEMBLE_IEEE754_UNOP(atanh);
++      break;
++    case kIeee754Float64Atan2:
++      ASSEMBLE_IEEE754_BINOP(atan2);
++      break;
++    case kIeee754Float64Cos:
++      ASSEMBLE_IEEE754_UNOP(cos);
++      break;
++    case kIeee754Float64Cosh:
++      ASSEMBLE_IEEE754_UNOP(cosh);
++      break;
++    case kIeee754Float64Cbrt:
++      ASSEMBLE_IEEE754_UNOP(cbrt);
++      break;
++    case kIeee754Float64Exp:
++      ASSEMBLE_IEEE754_UNOP(exp);
++      break;
++    case kIeee754Float64Expm1:
++      ASSEMBLE_IEEE754_UNOP(expm1);
++      break;
++    case kIeee754Float64Log:
++      ASSEMBLE_IEEE754_UNOP(log);
++      break;
++    case kIeee754Float64Log1p:
++      ASSEMBLE_IEEE754_UNOP(log1p);
++      break;
++    case kIeee754Float64Log2:
++      ASSEMBLE_IEEE754_UNOP(log2);
++      break;
++    case kIeee754Float64Log10:
++      ASSEMBLE_IEEE754_UNOP(log10);
++      break;
++    case kIeee754Float64Pow:
++      ASSEMBLE_IEEE754_BINOP(pow);
++      break;
++    case kIeee754Float64Sin:
++      ASSEMBLE_IEEE754_UNOP(sin);
++      break;
++    case kIeee754Float64Sinh:
++      ASSEMBLE_IEEE754_UNOP(sinh);
++      break;
++    case kIeee754Float64Tan:
++      ASSEMBLE_IEEE754_UNOP(tan);
++      break;
++    case kIeee754Float64Tanh:
++      ASSEMBLE_IEEE754_UNOP(tanh);
++      break;
++    case kLa64Add:
++      __ Add_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Dadd:
++      __ Add_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64DaddOvf:
++      __ AdddOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
++                      kScratchReg);
++      break;
++    case kLa64Sub:
++      __ Sub_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Dsub:
++      __ Sub_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64DsubOvf:
++      __ SubdOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
++                      kScratchReg);
++      break;
++    case kLa64Mul:
++      __ Mul_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64MulOvf:
++      __ MulOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
++                     kScratchReg);
++      break;
++    case kLa64MulHigh:
++      __ Mulh_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64MulHighU:
++      __ Mulh_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64DMulHigh:
++      __ Mulh_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Div:
++      __ Div_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      break;
++    case kLa64DivU:
++      __ Div_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      break;
++    case kLa64Mod:
++      __ Mod_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64ModU:
++      __ Mod_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Dmul:
++      __ Mul_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Ddiv:
++      __ Div_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      break;
++    case kLa64DdivU:
++      __ Div_du(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      break;
++    case kLa64Dmod:
++      __ Mod_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64DmodU:
++      __ Mod_du(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Dlsa:
++      DCHECK(instr->InputAt(2)->IsImmediate());
++      __ Alsl_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
++                i.InputInt8(2), t7);
++      break;
++    case kLa64Lsa:
++      DCHECK(instr->InputAt(2)->IsImmediate());
++      __ Alsl_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
++                i.InputInt8(2), t7);
++      break;
++    case kLa64And:
++      __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64And32:
++      __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0);
++      break;
++    case kLa64Or:
++      __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Or32:
++      __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0);
++      break;
++    case kLa64Nor:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      } else {
++        DCHECK_EQ(0, i.InputOperand(1).immediate());
++        __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg);
++      }
++      break;
++    case kLa64Nor32:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++        __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0);
++      } else {
++        DCHECK_EQ(0, i.InputOperand(1).immediate());
++        __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg);
++        __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0);
++      }
++      break;
++    case kLa64Xor:
++      __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Xor32:
++      __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0);
++      break;
++    case kLa64Clz:
++      __ Clz_w(i.OutputRegister(), i.InputRegister(0));
++      break;
++    case kLa64Dclz:
++      __ clz_d(i.OutputRegister(), i.InputRegister(0));
++      break;
++    case kLa64Ctz: {
++      Register src = i.InputRegister(0);
++      Register dst = i.OutputRegister();
++      __ Ctz_w(dst, src);
++    } break;
++    case kLa64Dctz: {
++      Register src = i.InputRegister(0);
++      Register dst = i.OutputRegister();
++      __ Ctz_d(dst, src);
++    } break;
++    case kLa64Popcnt: {
++      Register src = i.InputRegister(0);
++      Register dst = i.OutputRegister();
++      __ Popcnt_w(dst, src);
++    } break;
++    case kLa64Dpopcnt: {
++      Register src = i.InputRegister(0);
++      Register dst = i.OutputRegister();
++      __ Popcnt_d(dst, src);
++    } break;
++    case kLa64Shl:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ sll_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ slli_w(i.OutputRegister(), i.InputRegister(0),
++                  static_cast<uint16_t>(imm));
++      }
++      break;
++    case kLa64Shr:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ slli_w(i.InputRegister(0), i.InputRegister(0), 0x0);
++        __ srl_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ slli_w(i.OutputRegister(), i.InputRegister(0), 0x0);
++        __ srli_w(i.OutputRegister(), i.OutputRegister(),
++                  static_cast<uint16_t>(imm));
++      }
++      break;
++    case kLa64Sar:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ slli_w(i.InputRegister(0), i.InputRegister(0), 0x0);
++        __ sra_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ slli_w(i.OutputRegister(), i.InputRegister(0), 0x0);
++        __ srai_w(i.OutputRegister(), i.OutputRegister(),
++                  static_cast<uint16_t>(imm));
++      }
++      break;
++    case kLa64Ext:
++      __ bstrpick_w(i.OutputRegister(), i.InputRegister(0),
++                    i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      break;
++    case kLa64Ins:
++      if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) {
++        __ bstrins_w(i.OutputRegister(), zero_reg,
++                     i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      } else {
++        __ bstrins_w(i.OutputRegister(), i.InputRegister(0),
++                     i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      }
++      break;
++    case kLa64Dext: {
++      __ bstrpick_d(i.OutputRegister(), i.InputRegister(0),
++                    i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      break;
++    }
++    case kLa64Dins:
++      if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) {
++        __ bstrins_d(i.OutputRegister(), zero_reg,
++                     i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      } else {
++        __ bstrins_d(i.OutputRegister(), i.InputRegister(0),
++                     i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1));
++      }
++      break;
++    case kLa64Dshl:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ sll_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ slli_d(i.OutputRegister(), i.InputRegister(0),
++                  static_cast<uint16_t>(imm));
++      }
++      break;
++    case kLa64Dshr:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ srl_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ srli_d(i.OutputRegister(), i.InputRegister(0),
++                  static_cast<uint16_t>(imm));
++      }
++      break;
++    case kLa64Dsar:
++      if (instr->InputAt(1)->IsRegister()) {
++        __ sra_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
++      } else {
++        int64_t imm = i.InputOperand(1).immediate();
++        __ srai_d(i.OutputRegister(), i.InputRegister(0), imm);
++      }
++      break;
++    case kLa64Ror:
++      __ Rotr_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Dror:
++      __ Rotr_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
++      break;
++    case kLa64Tst:
++      __ And(kScratchReg, i.InputRegister(0), i.InputOperand(1));
++      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
++      break;
++    case kLa64Cmp:
++      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
++      break;
++    case kLa64Mov:
++      // TODO(plind): Should we combine mov/li like this, or use separate instr?
++      //    - Also see x64 ASSEMBLE_BINOP & RegisterOrOperandType
++      if (HasRegisterInput(instr, 0)) {
++        __ mov(i.OutputRegister(), i.InputRegister(0));
++      } else {
++        __ li(i.OutputRegister(), i.InputOperand(0));
++      }
++      break;
++
++    case kLa64CmpS: {
++      FPURegister left = i.InputOrZeroSingleRegister(0);
++      FPURegister right = i.InputOrZeroSingleRegister(1);
++      bool predicate;
++      FPUCondition cc =
++          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
++
++      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
++          !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ CompareF32(left, right, cc);
++    } break;
++    case kLa64AddS:
++      // TODO(plind): add special case: combine mult & add.
++      __ fadd_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64SubS:
++      __ fsub_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64MulS:
++      // TODO(plind): add special case: right op is -1.0, see arm port.
++      __ fmul_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64DivS:
++      __ fdiv_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64ModS: {
++      // TODO(bmeurer): We should really get rid of this special instruction,
++      // and generate a CallAddress instruction instead.
++      FrameScope scope(tasm(), StackFrame::MANUAL);
++      __ PrepareCallCFunction(0, 2, kScratchReg);
++      __ MovToFloatParameters(i.InputDoubleRegister(0),
++                              i.InputDoubleRegister(1));
++      // TODO(balazs.kilvady): implement mod_two_floats_operation(isolate())
++      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
++      // Move the result in the double result register.
++      __ MovFromFloatResult(i.OutputSingleRegister());
++      break;
++    }
++    case kLa64AbsS:
++      __ fabs_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    case kLa64NegS:
++      __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    case kLa64SqrtS: {
++      __ fsqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64MaxS:
++      __ fmax_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64MinS:
++      __ fmin_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64CmpD: {
++      FPURegister left = i.InputOrZeroDoubleRegister(0);
++      FPURegister right = i.InputOrZeroDoubleRegister(1);
++      bool predicate;
++      FPUCondition cc =
++          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
++      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
++          !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ CompareF64(left, right, cc);
++    } break;
++    case kLa64AddD:
++      // TODO(plind): add special case: combine mult & add.
++      __ fadd_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64SubD:
++      __ fsub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64MulD:
++      // TODO(plind): add special case: right op is -1.0, see arm port.
++      __ fmul_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64DivD:
++      __ fdiv_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64ModD: {
++      // TODO(bmeurer): We should really get rid of this special instruction,
++      // and generate a CallAddress instruction instead.
++      FrameScope scope(tasm(), StackFrame::MANUAL);
++      __ PrepareCallCFunction(0, 2, kScratchReg);
++      __ MovToFloatParameters(i.InputDoubleRegister(0),
++                              i.InputDoubleRegister(1));
++      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
++      // Move the result in the double result register.
++      __ MovFromFloatResult(i.OutputDoubleRegister());
++      break;
++    }
++    case kLa64AbsD:
++      __ fabs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64NegD:
++      __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64SqrtD: {
++      __ fsqrt_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64MaxD:
++      __ fmax_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64MinD:
++      __ fmin_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
++                i.InputDoubleRegister(1));
++      break;
++    case kLa64Float64RoundDown: {
++      __ Floor_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64Float32RoundDown: {
++      __ Floor_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    }
++    case kLa64Float64RoundTruncate: {
++      __ Trunc_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64Float32RoundTruncate: {
++      __ Trunc_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    }
++    case kLa64Float64RoundUp: {
++      __ Ceil_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64Float32RoundUp: {
++      __ Ceil_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    }
++    case kLa64Float64RoundTiesEven: {
++      __ Round_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    }
++    case kLa64Float32RoundTiesEven: {
++      __ Round_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
++      break;
++    }
++    case kLa64Float32Max: {
++      FPURegister dst = i.OutputSingleRegister();
++      FPURegister src1 = i.InputSingleRegister(0);
++      FPURegister src2 = i.InputSingleRegister(1);
++      auto ool = new (zone()) OutOfLineFloat32Max(this, dst, src1, src2);
++      __ Float32Max(dst, src1, src2, ool->entry());
++      __ bind(ool->exit());
++      break;
++    }
++    case kLa64Float64Max: {
++      FPURegister dst = i.OutputDoubleRegister();
++      FPURegister src1 = i.InputDoubleRegister(0);
++      FPURegister src2 = i.InputDoubleRegister(1);
++      auto ool = new (zone()) OutOfLineFloat64Max(this, dst, src1, src2);
++      __ Float64Max(dst, src1, src2, ool->entry());
++      __ bind(ool->exit());
++      break;
++    }
++    case kLa64Float32Min: {
++      FPURegister dst = i.OutputSingleRegister();
++      FPURegister src1 = i.InputSingleRegister(0);
++      FPURegister src2 = i.InputSingleRegister(1);
++      auto ool = new (zone()) OutOfLineFloat32Min(this, dst, src1, src2);
++      __ Float32Min(dst, src1, src2, ool->entry());
++      __ bind(ool->exit());
++      break;
++    }
++    case kLa64Float64Min: {
++      FPURegister dst = i.OutputDoubleRegister();
++      FPURegister src1 = i.InputDoubleRegister(0);
++      FPURegister src2 = i.InputDoubleRegister(1);
++      auto ool = new (zone()) OutOfLineFloat64Min(this, dst, src1, src2);
++      __ Float64Min(dst, src1, src2, ool->entry());
++      __ bind(ool->exit());
++      break;
++    }
++    case kLa64Float64SilenceNaN:
++      __ FPUCanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64CvtSD:
++      __ fcvt_s_d(i.OutputSingleRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64CvtDS:
++      __ fcvt_d_s(i.OutputDoubleRegister(), i.InputSingleRegister(0));
++      break;
++    case kLa64CvtDW: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ movgr2fr_w(scratch, i.InputRegister(0));
++      __ ffint_d_w(i.OutputDoubleRegister(), scratch);
++      break;
++    }
++    case kLa64CvtSW: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ movgr2fr_w(scratch, i.InputRegister(0));
++      __ ffint_s_w(i.OutputDoubleRegister(), scratch);
++      break;
++    }
++    case kLa64CvtSUw: {
++      __ Ffint_s_uw(i.OutputDoubleRegister(), i.InputRegister(0));
++      break;
++    }
++    case kLa64CvtSL: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ movgr2fr_d(scratch, i.InputRegister(0));
++      __ ffint_s_l(i.OutputDoubleRegister(), scratch);
++      break;
++    }
++    case kLa64CvtDL: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ movgr2fr_d(scratch, i.InputRegister(0));
++      __ ffint_d_l(i.OutputDoubleRegister(), scratch);
++      break;
++    }
++    case kLa64CvtDUw: {
++      __ Ffint_d_uw(i.OutputDoubleRegister(), i.InputRegister(0));
++      break;
++    }
++    case kLa64CvtDUl: {
++      __ Ffint_d_ul(i.OutputDoubleRegister(), i.InputRegister(0));
++      break;
++    }
++    case kLa64CvtSUl: {
++      __ Ffint_s_ul(i.OutputDoubleRegister(), i.InputRegister(0));
++      break;
++    }
++    case kLa64FloorWD: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrm_w_d(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64CeilWD: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrp_w_d(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64RoundWD: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrne_w_d(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64TruncWD: {
++      FPURegister scratch = kScratchDoubleReg;
++      // Other arches use round to zero here, so we follow.
++      __ ftintrz_w_d(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64FloorWS: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrm_w_s(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64CeilWS: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrp_w_s(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64RoundWS: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrne_w_s(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      break;
++    }
++    case kLa64TruncWS: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ ftintrz_w_s(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_s(i.OutputRegister(), scratch);
++      // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
++      // because INT32_MIN allows easier out-of-bounds detection.
++      __ addi_w(kScratchReg, i.OutputRegister(), 1);
++      __ slt(kScratchReg2, kScratchReg, i.OutputRegister());
++      __ Movn(i.OutputRegister(), kScratchReg, kScratchReg2);
++      break;
++    }
++    case kLa64TruncLS: {
++      FPURegister scratch = kScratchDoubleReg;
++      Register tmp_fcsr = kScratchReg;
++      Register result = kScratchReg2;
++
++      bool load_status = instr->OutputCount() > 1;
++      if (load_status) {
++        // Save FCSR.
++        __ movfcsr2gr(tmp_fcsr);  // __ cfc1(tmp_fcsr, FCSR);
++        // Clear FPU flags.
++        __ movgr2fcsr(zero_reg);  // __ ctc1(zero_reg, FCSR);
++      }
++      // Other arches use round to zero here, so we follow.
++      __ ftintrz_l_s(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_d(i.OutputRegister(), scratch);
++      if (load_status) {
++        __ movfcsr2gr(result);  // __ cfc1(result, FCSR);
++        // Check for overflow and NaNs.
++        __ And(result, result,
++               (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask));
++        __ Slt(result, zero_reg, result);
++        __ xori(result, result, 1);
++        __ mov(i.OutputRegister(1), result);
++        // Restore FCSR
++        __ movgr2fcsr(tmp_fcsr);  // __ ctc1(tmp_fcsr, FCSR);
++      }
++      break;
++    }
++    case kLa64TruncLD: {
++      FPURegister scratch = kScratchDoubleReg;
++      Register tmp_fcsr = kScratchReg;
++      Register result = kScratchReg2;
++
++      bool load_status = instr->OutputCount() > 1;
++      if (load_status) {
++        // Save FCSR.
++        __ movfcsr2gr(tmp_fcsr);  // __ cfc1(tmp_fcsr, FCSR);
++        // Clear FPU flags.
++        __ movgr2fcsr(zero_reg);  // __ ctc1(zero_reg, FCSR);
++      }
++      // Other arches use round to zero here, so we follow.
++      __ ftintrz_l_d(scratch, i.InputDoubleRegister(0));
++      __ movfr2gr_d(i.OutputRegister(0), scratch);
++      if (load_status) {
++        __ movfcsr2gr(result);  // __ cfc1(result, FCSR);
++        // Check for overflow and NaNs.
++        __ And(result, result,
++               (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask));
++        __ Slt(result, zero_reg, result);
++        __ xori(result, result, 1);
++        __ mov(i.OutputRegister(1), result);
++        // Restore FCSR
++        __ movgr2fcsr(tmp_fcsr);  // __ ctc1(tmp_fcsr, FCSR);
++      }
++      break;
++    }
++    case kLa64TruncUwD: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ Ftintrz_uw_d(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
++      break;
++    }
++    case kLa64TruncUwS: {
++      FPURegister scratch = kScratchDoubleReg;
++      __ Ftintrz_uw_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
++      // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
++      // because 0 allows easier out-of-bounds detection.
++      __ addi_w(kScratchReg, i.OutputRegister(), 1);
++      __ Movz(i.OutputRegister(), zero_reg, kScratchReg);
++      break;
++    }
++    case kLa64TruncUlS: {
++      FPURegister scratch = kScratchDoubleReg;
++      Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
++      __ Ftintrz_ul_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch,
++                      result);
++      break;
++    }
++    case kLa64TruncUlD: {
++      FPURegister scratch = kScratchDoubleReg;
++      Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
++      __ Ftintrz_ul_d(i.OutputRegister(0), i.InputDoubleRegister(0), scratch,
++                      result);
++      break;
++    }
++    case kLa64BitcastDL:
++      __ movfr2gr_d(i.OutputRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64BitcastLD:
++      __ movgr2fr_d(i.OutputDoubleRegister(), i.InputRegister(0));
++      break;
++    case kLa64Float64ExtractLowWord32:
++      __ FmoveLow(i.OutputRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64Float64ExtractHighWord32:
++      __ movfrh2gr_s(i.OutputRegister(), i.InputDoubleRegister(0));
++      break;
++    case kLa64Float64InsertLowWord32:
++      __ FmoveLow(i.OutputDoubleRegister(), i.InputRegister(1));
++      break;
++    case kLa64Float64InsertHighWord32:
++      __ movgr2frh_w(i.OutputDoubleRegister(), i.InputRegister(1));
++      break;
++      // ... more basic instructions ...
++
++    case kLa64Seb:
++      __ ext_w_b(i.OutputRegister(), i.InputRegister(0));
++      break;
++    case kLa64Seh:
++      __ ext_w_h(i.OutputRegister(), i.InputRegister(0));
++      break;
++    case kLa64Lbu:
++      __ Ld_bu(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Lb:
++      __ Ld_b(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Sb:
++      __ St_b(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Lhu:
++      __ Ld_hu(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Ulhu:
++      __ Ld_hu(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Lh:
++      __ Ld_h(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Ulh:
++      __ Ld_h(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Sh:
++      __ St_h(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Ush:
++      __ St_h(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Lw:
++      __ Ld_w(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Ulw:
++      __ Ld_w(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Lwu:
++      __ Ld_wu(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Ulwu:
++      __ Ld_wu(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Ld:
++      __ Ld_d(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Uld:
++      __ Ld_d(i.OutputRegister(), i.MemoryOperand());
++      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
++      break;
++    case kLa64Sw:
++      __ St_w(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Usw:
++      __ St_w(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Sd:
++      __ St_d(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Usd:
++      __ St_d(i.InputOrZeroRegister(2), i.MemoryOperand());
++      break;
++    case kLa64Lwc1: {
++      __ Fld_s(i.OutputSingleRegister(), i.MemoryOperand());
++      break;
++    }
++    case kLa64Ulwc1: {
++      __ Fld_s(i.OutputSingleRegister(), i.MemoryOperand());
++      break;
++    }
++    case kLa64Swc1: {
++      size_t index = 0;
++      MemOperand operand = i.MemoryOperand(&index);
++      FPURegister ft = i.InputOrZeroSingleRegister(index);
++      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ Fst_s(ft, operand);
++      break;
++    }
++    case kLa64Uswc1: {
++      size_t index = 0;
++      MemOperand operand = i.MemoryOperand(&index);
++      FPURegister ft = i.InputOrZeroSingleRegister(index);
++      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ Fst_s(ft, operand);
++      break;
++    }
++    case kLa64Ldc1:
++      __ Fld_d(i.OutputDoubleRegister(), i.MemoryOperand());
++      break;
++    case kLa64Uldc1:
++      __ Fld_d(i.OutputDoubleRegister(), i.MemoryOperand());
++      break;
++    case kLa64Sdc1: {
++      FPURegister ft = i.InputOrZeroDoubleRegister(2);
++      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ Fst_d(ft, i.MemoryOperand());
++      break;
++    }
++    case kLa64Usdc1: {
++      FPURegister ft = i.InputOrZeroDoubleRegister(2);
++      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
++        __ Move(kDoubleRegZero, 0.0);
++      }
++
++      __ Fst_d(ft, i.MemoryOperand());
++      break;
++    }
++    case kLa64Sync: {
++      __ dbar(0);
++      break;
++    }
++    case kLa64Push:
++      if (instr->InputAt(0)->IsFPRegister()) {
++        __ Fst_d(i.InputDoubleRegister(0), MemOperand(sp, -kDoubleSize));
++        __ Sub_d(sp, sp, Operand(kDoubleSize));
++        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
++      } else {
++        __ Push(i.InputRegister(0));
++        frame_access_state()->IncreaseSPDelta(1);
++      }
++      break;
++    case kLa64Peek: {
++      // The incoming value is 0-based, but we need a 1-based value.
++      int reverse_slot = i.InputInt32(0) + 1;
++      int offset =
++          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
++      if (instr->OutputAt(0)->IsFPRegister()) {
++        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
++        if (op->representation() == MachineRepresentation::kFloat64) {
++          __ Fld_d(i.OutputDoubleRegister(), MemOperand(fp, offset));
++        } else {
++          DCHECK_EQ(op->representation(), MachineRepresentation::kFloat32);
++          __ Fld_s(
++              i.OutputSingleRegister(0),
++              MemOperand(fp, offset + kLessSignificantWordInDoublewordOffset));
++        }
++      } else {
++        __ Ld_d(i.OutputRegister(0), MemOperand(fp, offset));
++      }
++      break;
++    }
++    case kLa64StackClaim: {
++      __ Sub_d(sp, sp, Operand(i.InputInt32(0)));
++      frame_access_state()->IncreaseSPDelta(i.InputInt32(0) /
++                                            kSystemPointerSize);
++      break;
++    }
++    case kLa64StoreToStackSlot: {
++      if (instr->InputAt(0)->IsFPRegister()) {
++        __ Fst_d(i.InputDoubleRegister(0), MemOperand(sp, i.InputInt32(1)));
++      } else {
++        __ St_d(i.InputRegister(0), MemOperand(sp, i.InputInt32(1)));
++      }
++      break;
++    }
++    case kLa64ByteSwap64: {
++      __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 8);
++      break;
++    }
++    case kLa64ByteSwap32: {
++      __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4);
++      break;
++    }
++    case kWord32AtomicLoadInt8:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_b);
++      break;
++    case kWord32AtomicLoadUint8:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_bu);
++      break;
++    case kWord32AtomicLoadInt16:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_h);
++      break;
++    case kWord32AtomicLoadUint16:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_hu);
++      break;
++    case kWord32AtomicLoadWord32:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_w);
++      break;
++    case kLa64Word64AtomicLoadUint8:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_bu);
++      break;
++    case kLa64Word64AtomicLoadUint16:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_hu);
++      break;
++    case kLa64Word64AtomicLoadUint32:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_wu);
++      break;
++    case kLa64Word64AtomicLoadUint64:
++      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_d);
++      break;
++    case kWord32AtomicStoreWord8:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_b);
++      break;
++    case kWord32AtomicStoreWord16:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_h);
++      break;
++    case kWord32AtomicStoreWord32:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_w);
++      break;
++    case kLa64Word64AtomicStoreWord8:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_b);
++      break;
++    case kLa64Word64AtomicStoreWord16:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_h);
++      break;
++    case kLa64Word64AtomicStoreWord32:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_w);
++      break;
++    case kLa64Word64AtomicStoreWord64:
++      ASSEMBLE_ATOMIC_STORE_INTEGER(St_d);
++      break;
++    case kWord32AtomicExchangeInt8:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 8, 32);
++      break;
++    case kWord32AtomicExchangeUint8:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 8, 32);
++      break;
++    case kWord32AtomicExchangeInt16:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 16, 32);
++      break;
++    case kWord32AtomicExchangeUint16:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 16, 32);
++      break;
++    case kWord32AtomicExchangeWord32:
++      __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amswap_db_w(i.OutputRegister(0), i.InputRegister(2),
++                     i.TempRegister(0));
++      break;
++    case kLa64Word64AtomicExchangeUint8:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 8, 64);
++      break;
++    case kLa64Word64AtomicExchangeUint16:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 16, 64);
++      break;
++    case kLa64Word64AtomicExchangeUint32:
++      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 32, 64);
++      break;
++    case kLa64Word64AtomicExchangeUint64:
++      __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amswap_db_d(i.OutputRegister(0), i.InputRegister(2),
++                     i.TempRegister(0));
++      break;
++    case kWord32AtomicCompareExchangeInt8:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 8, 32);
++      break;
++    case kWord32AtomicCompareExchangeUint8:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 8, 32);
++      break;
++    case kWord32AtomicCompareExchangeInt16:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 16, 32);
++      break;
++    case kWord32AtomicCompareExchangeUint16:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 16, 32);
++      break;
++    case kWord32AtomicCompareExchangeWord32:
++      __ slli_w(i.InputRegister(2), i.InputRegister(2), 0);
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll_w, Sc_w);
++      break;
++    case kLa64Word64AtomicCompareExchangeUint8:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 8, 64);
++      break;
++    case kLa64Word64AtomicCompareExchangeUint16:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 16, 64);
++      break;
++    case kLa64Word64AtomicCompareExchangeUint32:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 32, 64);
++      break;
++    case kLa64Word64AtomicCompareExchangeUint64:
++      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll_d, Sc_d);
++      break;
++    case kWord32AtomicAddWord32:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amadd_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kWord32AtomicSubWord32:
++      ASSEMBLE_ATOMIC_BINOP(Ll_w, Sc_w, Sub_w);
++      break;
++    case kWord32AtomicAndWord32:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amand_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kWord32AtomicOrWord32:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amor_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kWord32AtomicXorWord32:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amxor_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++#define ATOMIC_BINOP_CASE(op, inst)                             \
++  case kWord32Atomic##op##Int8:                                 \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, true, 8, inst, 32);   \
++    break;                                                      \
++  case kWord32Atomic##op##Uint8:                                \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, false, 8, inst, 32);  \
++    break;                                                      \
++  case kWord32Atomic##op##Int16:                                \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, true, 16, inst, 32);  \
++    break;                                                      \
++  case kWord32Atomic##op##Uint16:                               \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, false, 16, inst, 32); \
++    break;
++      ATOMIC_BINOP_CASE(Add, Add_w)
++      ATOMIC_BINOP_CASE(Sub, Sub_w)
++      ATOMIC_BINOP_CASE(And, And)
++      ATOMIC_BINOP_CASE(Or, Or)
++      ATOMIC_BINOP_CASE(Xor, Xor)
++#undef ATOMIC_BINOP_CASE
++
++    case kLa64Word64AtomicAddUint64:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amadd_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kLa64Word64AtomicSubUint64:
++      ASSEMBLE_ATOMIC_BINOP(Ll_d, Sc_d, Sub_d);
++      break;
++    case kLa64Word64AtomicAndUint64:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amand_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kLa64Word64AtomicOrUint64:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amor_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++    case kLa64Word64AtomicXorUint64:
++      __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
++      __ amxor_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0));
++      break;
++#define ATOMIC_BINOP_CASE(op, inst)                             \
++  case kLa64Word64Atomic##op##Uint8:                            \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 8, inst, 64);  \
++    break;                                                      \
++  case kLa64Word64Atomic##op##Uint16:                           \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 16, inst, 64); \
++    break;                                                      \
++  case kLa64Word64Atomic##op##Uint32:                           \
++    ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 32, inst, 64); \
++    break;
++      ATOMIC_BINOP_CASE(Add, Add_d)
++      ATOMIC_BINOP_CASE(Sub, Sub_d)
++      ATOMIC_BINOP_CASE(And, And)
++      ATOMIC_BINOP_CASE(Or, Or)
++      ATOMIC_BINOP_CASE(Xor, Xor)
++#undef ATOMIC_BINOP_CASE
++    case kLa64AssertEqual:
++      __ Assert(eq, static_cast<AbortReason>(i.InputOperand(2).immediate()),
++                i.InputRegister(0), Operand(i.InputRegister(1)));
++      break;
++    case kLa64S128Zero:
++    case kLa64I32x4Splat:
++    case kLa64I32x4ExtractLane:
++    case kLa64I32x4AddHoriz:
++    case kLa64I32x4Add:
++    case kLa64I32x4ReplaceLane:
++    case kLa64I32x4Sub:
++    case kLa64F64x2Abs:
++    default:
++      break;
++  }
++  return kSuccess;
++}  // NOLINT(readability/fn_size)
++
++#define UNSUPPORTED_COND(opcode, condition)                                    \
++  StdoutStream{} << "Unsupported " << #opcode << " condition: \"" << condition \
++                 << "\"";                                                      \
++  UNIMPLEMENTED();
++
++void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
++                            Instruction* instr, FlagsCondition condition,
++                            Label* tlabel, Label* flabel, bool fallthru) {
++#undef __
++#define __ tasm->
++  La64OperandConverter i(gen, instr);
++
++  Condition cc = kNoCondition;
++  // LA64 does not have condition code flags, so compare and branch are
++  // implemented differently than on the other arch's. The compare operations
++  // emit la64 pseudo-instructions, which are handled here by branch
++  // instructions that do the actual comparison. Essential that the input
++  // registers to compare pseudo-op are not modified before this branch op, as
++  // they are tested here.
++
++  if (instr->arch_opcode() == kLa64Tst) {
++    cc = FlagsConditionToConditionTst(condition);
++    __ Branch(tlabel, cc, kScratchReg, Operand(zero_reg));
++  } else if (instr->arch_opcode() == kLa64Dadd ||
++             instr->arch_opcode() == kLa64Dsub) {
++    cc = FlagsConditionToConditionOvf(condition);
++    __ srai_d(kScratchReg, i.OutputRegister(), 32);
++    __ srai_w(kScratchReg2, i.OutputRegister(), 31);
++    __ Branch(tlabel, cc, kScratchReg2, Operand(kScratchReg));
++  } else if (instr->arch_opcode() == kLa64DaddOvf ||
++             instr->arch_opcode() == kLa64DsubOvf) {
++    switch (condition) {
++      // Overflow occurs if overflow register is negative
++      case kOverflow:
++        __ Branch(tlabel, lt, kScratchReg, Operand(zero_reg));
++        break;
++      case kNotOverflow:
++        __ Branch(tlabel, ge, kScratchReg, Operand(zero_reg));
++        break;
++      default:
++        UNSUPPORTED_COND(instr->arch_opcode(), condition);
++        break;
++    }
++  } else if (instr->arch_opcode() == kLa64MulOvf) {
++    // Overflow occurs if overflow register is not zero
++    switch (condition) {
++      case kOverflow:
++        __ Branch(tlabel, ne, kScratchReg, Operand(zero_reg));
++        break;
++      case kNotOverflow:
++        __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg));
++        break;
++      default:
++        UNSUPPORTED_COND(kLa64MulOvf, condition);
++        break;
++    }
++  } else if (instr->arch_opcode() == kLa64Cmp) {
++    cc = FlagsConditionToConditionCmp(condition);
++    __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1));
++  } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) {
++    cc = FlagsConditionToConditionCmp(condition);
++    Register lhs_register = sp;
++    uint32_t offset;
++    if (gen->ShouldApplyOffsetToStackCheck(instr, &offset)) {
++      lhs_register = i.TempRegister(0);
++      __ Sub_d(lhs_register, sp, offset);
++    }
++    __ Branch(tlabel, cc, lhs_register, Operand(i.InputRegister(0)));
++  } else if (instr->arch_opcode() == kLa64CmpS ||
++             instr->arch_opcode() == kLa64CmpD) {
++    bool predicate;
++    FlagsConditionToConditionCmpFPU(&predicate, condition);
++    if (predicate) {
++      __ BranchTrueF(tlabel);
++    } else {
++      __ BranchFalseF(tlabel);
++    }
++  } else {
++    PrintF("AssembleArchBranch Unimplemented arch_opcode: %d\n",
++           instr->arch_opcode());
++    UNIMPLEMENTED();
++  }
++  if (!fallthru) __ Branch(flabel);  // no fallthru to flabel.
++#undef __
++#define __ tasm()->
++}
++
++// Assembles branches after an instruction.
++void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
++  Label* tlabel = branch->true_label;
++  Label* flabel = branch->false_label;
++
++  AssembleBranchToLabels(this, tasm(), instr, branch->condition, tlabel, flabel,
++                         branch->fallthru);
++}
++
++void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
++                                            Instruction* instr) {
++  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
++  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
++    return;
++  }
++
++  La64OperandConverter i(this, instr);
++  condition = NegateFlagsCondition(condition);
++
++  switch (instr->arch_opcode()) {
++    case kLa64Cmp: {
++      __ LoadZeroOnCondition(kSpeculationPoisonRegister, i.InputRegister(0),
++                             i.InputOperand(1),
++                             FlagsConditionToConditionCmp(condition));
++    }
++      return;
++    case kLa64Tst: {
++      switch (condition) {
++        case kEqual:
++          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
++          break;
++        case kNotEqual:
++          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
++                                        kScratchReg);
++          break;
++        default:
++          UNREACHABLE();
++      }
++    }
++      return;
++    case kLa64Dadd:
++    case kLa64Dsub: {
++      // Check for overflow creates 1 or 0 for result.
++      __ srli_d(kScratchReg, i.OutputRegister(), 63);
++      __ srli_w(kScratchReg2, i.OutputRegister(), 31);
++      __ xor_(kScratchReg2, kScratchReg, kScratchReg2);
++      switch (condition) {
++        case kOverflow:
++          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
++                                        kScratchReg2);
++          break;
++        case kNotOverflow:
++          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2);
++          break;
++        default:
++          UNSUPPORTED_COND(instr->arch_opcode(), condition);
++      }
++    }
++      return;
++    case kLa64DaddOvf:
++    case kLa64DsubOvf: {
++      // Overflow occurs if overflow register is negative
++      __ Slt(kScratchReg2, kScratchReg, zero_reg);
++      switch (condition) {
++        case kOverflow:
++          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
++                                        kScratchReg2);
++          break;
++        case kNotOverflow:
++          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2);
++          break;
++        default:
++          UNSUPPORTED_COND(instr->arch_opcode(), condition);
++      }
++    }
++      return;
++    case kLa64MulOvf: {
++      // Overflow occurs if overflow register is not zero
++      switch (condition) {
++        case kOverflow:
++          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
++                                        kScratchReg);
++          break;
++        case kNotOverflow:
++          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
++          break;
++        default:
++          UNSUPPORTED_COND(instr->arch_opcode(), condition);
++      }
++    }
++      return;
++    case kLa64CmpS:
++    case kLa64CmpD: {
++      bool predicate;
++      FlagsConditionToConditionCmpFPU(&predicate, condition);
++      if (predicate) {
++        __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister);
++      } else {
++        __ LoadZeroIfNotFPUCondition(kSpeculationPoisonRegister);
++      }
++    }
++      return;
++    default:
++      UNREACHABLE();
++  }
++}
++
++#undef UNSUPPORTED_COND
++
++void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
++                                            BranchInfo* branch) {
++  AssembleArchBranch(instr, branch);
++}
++
++void CodeGenerator::AssembleArchJump(RpoNumber target) {
++  if (!IsNextInAssemblyOrder(target)) __ Branch(GetLabel(target));
++}
++
++void CodeGenerator::AssembleArchTrap(Instruction* instr,
++                                     FlagsCondition condition) {
++  class OutOfLineTrap final : public OutOfLineCode {
++   public:
++    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
++        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
++    void Generate() final {
++      La64OperandConverter i(gen_, instr_);
++      TrapId trap_id =
++          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
++      GenerateCallToTrap(trap_id);
++    }
++
++   private:
++    void GenerateCallToTrap(TrapId trap_id) {
++      if (trap_id == TrapId::kInvalid) {
++        // We cannot test calls to the runtime in cctest/test-run-wasm.
++        // Therefore we emit a call to C here instead of a call to the runtime.
++        // We use the context register as the scratch register, because we do
++        // not have a context here.
++        __ PrepareCallCFunction(0, 0, cp);
++        __ CallCFunction(
++            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
++        __ LeaveFrame(StackFrame::WASM_COMPILED);
++        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
++        int pop_count =
++            static_cast<int>(call_descriptor->StackParameterCount());
++        pop_count += (pop_count & 1);  // align
++        __ Drop(pop_count);
++        __ Ret();
++      } else {
++        gen_->AssembleSourcePosition(instr_);
++        // A direct call to a wasm runtime stub defined in this module.
++        // Just encode the stub index. This will be patched when the code
++        // is added to the native module and copied into wasm code space.
++        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
++        ReferenceMap* reference_map =
++            new (gen_->zone()) ReferenceMap(gen_->zone());
++        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
++        if (FLAG_debug_code) {
++          __ stop();
++        }
++      }
++    }
++    Instruction* instr_;
++    CodeGenerator* gen_;
++  };
++  auto ool = new (zone()) OutOfLineTrap(this, instr);
++  Label* tlabel = ool->entry();
++  AssembleBranchToLabels(this, tasm(), instr, condition, tlabel, nullptr, true);
++}
++
++// Assembles boolean materializations after an instruction.
++void CodeGenerator::AssembleArchBoolean(Instruction* instr,
++                                        FlagsCondition condition) {
++  La64OperandConverter i(this, instr);
++
++  // Materialize a full 32-bit 1 or 0 value. The result register is always the
++  // last output of the instruction.
++  DCHECK_NE(0u, instr->OutputCount());
++  Register result = i.OutputRegister(instr->OutputCount() - 1);
++  Condition cc = kNoCondition;
++  // La64 does not have condition code flags, so compare and branch are
++  // implemented differently than on the other arch's. The compare operations
++  // emit la64 pseudo-instructions, which are checked and handled here.
++
++  if (instr->arch_opcode() == kLa64Tst) {
++    cc = FlagsConditionToConditionTst(condition);
++    if (cc == eq) {
++      __ Sltu(result, kScratchReg, 1);
++    } else {
++      __ Sltu(result, zero_reg, kScratchReg);
++    }
++    return;
++  } else if (instr->arch_opcode() == kLa64Dadd ||
++             instr->arch_opcode() == kLa64Dsub) {
++    cc = FlagsConditionToConditionOvf(condition);
++    // Check for overflow creates 1 or 0 for result.
++    __ srli_d(kScratchReg, i.OutputRegister(), 63);
++    __ srli_w(kScratchReg2, i.OutputRegister(), 31);
++    __ xor_(result, kScratchReg, kScratchReg2);
++    if (cc == eq)  // Toggle result for not overflow.
++      __ xori(result, result, 1);
++    return;
++  } else if (instr->arch_opcode() == kLa64DaddOvf ||
++             instr->arch_opcode() == kLa64DsubOvf) {
++    // Overflow occurs if overflow register is negative
++    __ slt(result, kScratchReg, zero_reg);
++  } else if (instr->arch_opcode() == kLa64MulOvf) {
++    // Overflow occurs if overflow register is not zero
++    __ Sgtu(result, kScratchReg, zero_reg);
++  } else if (instr->arch_opcode() == kLa64Cmp) {
++    cc = FlagsConditionToConditionCmp(condition);
++    switch (cc) {
++      case eq:
++      case ne: {
++        Register left = i.InputRegister(0);
++        Operand right = i.InputOperand(1);
++        if (instr->InputAt(1)->IsImmediate()) {
++          if (is_int12(-right.immediate())) {
++            if (right.immediate() == 0) {
++              if (cc == eq) {
++                __ Sltu(result, left, 1);
++              } else {
++                __ Sltu(result, zero_reg, left);
++              }
++            } else {
++              __ Add_d(result, left, Operand(-right.immediate()));
++              if (cc == eq) {
++                __ Sltu(result, result, 1);
++              } else {
++                __ Sltu(result, zero_reg, result);
++              }
++            }
++          } else {
++            if (is_uint12(right.immediate())) {
++              __ Xor(result, left, right);
++            } else {
++              __ li(kScratchReg, right);
++              __ Xor(result, left, kScratchReg);
++            }
++            if (cc == eq) {
++              __ Sltu(result, result, 1);
++            } else {
++              __ Sltu(result, zero_reg, result);
++            }
++          }
++        } else {
++          __ Xor(result, left, right);
++          if (cc == eq) {
++            __ Sltu(result, result, 1);
++          } else {
++            __ Sltu(result, zero_reg, result);
++          }
++        }
++      } break;
++      case lt:
++      case ge: {
++        Register left = i.InputRegister(0);
++        Operand right = i.InputOperand(1);
++        __ Slt(result, left, right);
++        if (cc == ge) {
++          __ xori(result, result, 1);
++        }
++      } break;
++      case gt:
++      case le: {
++        Register left = i.InputRegister(1);
++        Operand right = i.InputOperand(0);
++        __ Slt(result, left, right);
++        if (cc == le) {
++          __ xori(result, result, 1);
++        }
++      } break;
++      case lo:
++      case hs: {
++        Register left = i.InputRegister(0);
++        Operand right = i.InputOperand(1);
++        __ Sltu(result, left, right);
++        if (cc == hs) {
++          __ xori(result, result, 1);
++        }
++      } break;
++      case hi:
++      case ls: {
++        Register left = i.InputRegister(1);
++        Operand right = i.InputOperand(0);
++        __ Sltu(result, left, right);
++        if (cc == ls) {
++          __ xori(result, result, 1);
++        }
++      } break;
++      default:
++        UNREACHABLE();
++    }
++    return;
++  } else if (instr->arch_opcode() == kLa64CmpD ||
++             instr->arch_opcode() == kLa64CmpS) {
++    FPURegister left = i.InputOrZeroDoubleRegister(0);
++    FPURegister right = i.InputOrZeroDoubleRegister(1);
++    if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
++        !__ IsDoubleZeroRegSet()) {
++      __ Move(kDoubleRegZero, 0.0);
++    }
++    bool predicate;
++    FlagsConditionToConditionCmpFPU(&predicate, condition);
++    {
++      __ movcf2gr(result, FCC0);
++      if (!predicate) {
++        __ xori(result, result, 1);
++      }
++    }
++    return;
++  } else {
++    PrintF("AssembleArchBranch Unimplemented arch_opcode is : %d\n",
++           instr->arch_opcode());
++    TRACE_UNIMPL();
++    UNIMPLEMENTED();
++  }
++}
++
++void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
++  La64OperandConverter i(this, instr);
++  Register input = i.InputRegister(0);
++  std::vector<std::pair<int32_t, Label*>> cases;
++  for (size_t index = 2; index < instr->InputCount(); index += 2) {
++    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
++  }
++  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
++                                      cases.data() + cases.size());
++}
++
++void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
++  La64OperandConverter i(this, instr);
++  Register input = i.InputRegister(0);
++  size_t const case_count = instr->InputCount() - 2;
++
++  __ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count));
++  __ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
++    return GetLabel(i.InputRpo(index + 2));
++  });
++}
++
++void CodeGenerator::FinishFrame(Frame* frame) {
++  auto call_descriptor = linkage()->GetIncomingDescriptor();
++
++  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
++  if (saves_fpu != 0) {
++    int count = base::bits::CountPopulation(saves_fpu);
++    DCHECK_EQ(kNumCalleeSavedFPU, count);
++    frame->AllocateSavedCalleeRegisterSlots(count *
++                                            (kDoubleSize / kSystemPointerSize));
++  }
++
++  const RegList saves = call_descriptor->CalleeSavedRegisters();
++  if (saves != 0) {
++    int count = base::bits::CountPopulation(saves);
++    DCHECK_EQ(kNumCalleeSaved, count + 1);
++    frame->AllocateSavedCalleeRegisterSlots(count);
++  }
++}
++
++void CodeGenerator::AssembleConstructFrame() {
++  auto call_descriptor = linkage()->GetIncomingDescriptor();
++
++  if (frame_access_state()->has_frame()) {
++    if (call_descriptor->IsCFunctionCall()) {
++      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
++        __ StubPrologue(StackFrame::C_WASM_ENTRY);
++        // Reserve stack space for saving the c_entry_fp later.
++        __ Sub_d(sp, sp, Operand(kSystemPointerSize));
++      } else {
++        __ Push(ra, fp);
++        __ mov(fp, sp);
++      }
++    } else if (call_descriptor->IsJSFunctionCall()) {
++      __ Prologue();
++      if (call_descriptor->PushArgumentCount()) {
++        __ Push(kJavaScriptCallArgCountRegister);
++      }
++    } else {
++      __ StubPrologue(info()->GetOutputStackFrameType());
++      if (call_descriptor->IsWasmFunctionCall()) {
++        __ Push(kWasmInstanceRegister);
++      } else if (call_descriptor->IsWasmImportWrapper() ||
++                 call_descriptor->IsWasmCapiFunction()) {
++        // Wasm import wrappers are passed a tuple in the place of the instance.
++        // Unpack the tuple into the instance and the target callable.
++        // This must be done here in the codegen because it cannot be expressed
++        // properly in the graph.
++        __ Ld_d(kJSFunctionRegister,
++                FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
++        __ Ld_d(kWasmInstanceRegister,
++                FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
++        __ Push(kWasmInstanceRegister);
++        if (call_descriptor->IsWasmCapiFunction()) {
++          // Reserve space for saving the PC later.
++          __ Sub_d(sp, sp, Operand(kSystemPointerSize));
++        }
++      }
++    }
++  }
++
++  int required_slots =
++      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
++
++  if (info()->is_osr()) {
++    // TurboFan OSR-compiled functions cannot be entered directly.
++    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
++
++    // Unoptimized code jumps directly to this entrypoint while the unoptimized
++    // frame is still on the stack. Optimized code uses OSR values directly from
++    // the unoptimized frame. Thus, all that needs to be done is to allocate the
++    // remaining stack slots.
++    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
++    osr_pc_offset_ = __ pc_offset();
++    required_slots -= osr_helper()->UnoptimizedFrameSlots();
++    ResetSpeculationPoison();
++  }
++
++  const RegList saves = call_descriptor->CalleeSavedRegisters();
++  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
++
++  if (required_slots > 0) {
++    DCHECK(frame_access_state()->has_frame());
++    if (info()->IsWasm() && required_slots > 128) {
++      // For WebAssembly functions with big frames we have to do the stack
++      // overflow check before we construct the frame. Otherwise we may not
++      // have enough space on the stack to call the runtime for the stack
++      // overflow.
++      Label done;
++
++      // If the frame is bigger than the stack, we throw the stack overflow
++      // exception unconditionally. Thereby we can avoid the integer overflow
++      // check in the condition code.
++      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
++        __ Ld_d(
++            kScratchReg,
++            FieldMemOperand(kWasmInstanceRegister,
++                            WasmInstanceObject::kRealStackLimitAddressOffset));
++        __ Ld_d(kScratchReg, MemOperand(kScratchReg, 0));
++        __ Add_d(kScratchReg, kScratchReg,
++                 Operand(required_slots * kSystemPointerSize));
++        __ Branch(&done, uge, sp, Operand(kScratchReg));
++      }
++
++      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
++      // We come from WebAssembly, there are no references for the GC.
++      ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
++      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
++      if (FLAG_debug_code) {
++        __ stop();
++      }
++
++      __ bind(&done);
++    }
++  }
++
++  const int returns = frame()->GetReturnSlotCount();
++
++  // Skip callee-saved and return slots, which are pushed below.
++  required_slots -= base::bits::CountPopulation(saves);
++  required_slots -= base::bits::CountPopulation(saves_fpu);
++  required_slots -= returns;
++  if (required_slots > 0) {
++    __ Sub_d(sp, sp, Operand(required_slots * kSystemPointerSize));
++  }
++
++  if (saves_fpu != 0) {
++    // Save callee-saved FPU registers.
++    __ MultiPushFPU(saves_fpu);
++    DCHECK_EQ(kNumCalleeSavedFPU, base::bits::CountPopulation(saves_fpu));
++  }
++
++  if (saves != 0) {
++    // Save callee-saved registers.
++    __ MultiPush(saves);
++    DCHECK_EQ(kNumCalleeSaved, base::bits::CountPopulation(saves) + 1);
++  }
++
++  if (returns != 0) {
++    // Create space for returns.
++    __ Sub_d(sp, sp, Operand(returns * kSystemPointerSize));
++  }
++}
++
++void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
++  auto call_descriptor = linkage()->GetIncomingDescriptor();
++
++  const int returns = frame()->GetReturnSlotCount();
++  if (returns != 0) {
++    __ Add_d(sp, sp, Operand(returns * kSystemPointerSize));
++  }
++
++  // Restore GP registers.
++  const RegList saves = call_descriptor->CalleeSavedRegisters();
++  if (saves != 0) {
++    __ MultiPop(saves);
++  }
++
++  // Restore FPU registers.
++  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
++  if (saves_fpu != 0) {
++    __ MultiPopFPU(saves_fpu);
++  }
++
++  La64OperandConverter g(this, nullptr);
++  if (call_descriptor->IsCFunctionCall()) {
++    AssembleDeconstructFrame();
++  } else if (frame_access_state()->has_frame()) {
++    // Canonicalize JSFunction return sites for now unless they have an variable
++    // number of stack slot pops.
++    if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
++      if (return_label_.is_bound()) {
++        __ Branch(&return_label_);
++        return;
++      } else {
++        __ bind(&return_label_);
++        AssembleDeconstructFrame();
++      }
++    } else {
++      AssembleDeconstructFrame();
++    }
++  }
++  int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
++  if (pop->IsImmediate()) {
++    pop_count += g.ToConstant(pop).ToInt32();
++  } else {
++    Register pop_reg = g.ToRegister(pop);
++    __ slli_d(pop_reg, pop_reg, kSystemPointerSizeLog2);
++    __ Add_d(sp, sp, pop_reg);
++  }
++  if (pop_count != 0) {
++    __ DropAndRet(pop_count);
++  } else {
++    __ Ret();
++  }
++}
++
++void CodeGenerator::FinishCode() {}
++
++void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {}
++
++void CodeGenerator::AssembleMove(InstructionOperand* source,
++                                 InstructionOperand* destination) {
++  La64OperandConverter g(this, nullptr);
++  // Dispatch on the source and destination operand kinds.  Not all
++  // combinations are possible.
++  if (source->IsRegister()) {
++    DCHECK(destination->IsRegister() || destination->IsStackSlot());
++    Register src = g.ToRegister(source);
++    if (destination->IsRegister()) {
++      __ mov(g.ToRegister(destination), src);
++    } else {
++      __ St_d(src, g.ToMemOperand(destination));
++    }
++  } else if (source->IsStackSlot()) {
++    DCHECK(destination->IsRegister() || destination->IsStackSlot());
++    MemOperand src = g.ToMemOperand(source);
++    if (destination->IsRegister()) {
++      __ Ld_d(g.ToRegister(destination), src);
++    } else {
++      Register temp = kScratchReg;
++      __ Ld_d(temp, src);
++      __ St_d(temp, g.ToMemOperand(destination));
++    }
++  } else if (source->IsConstant()) {
++    Constant src = g.ToConstant(source);
++    if (destination->IsRegister() || destination->IsStackSlot()) {
++      Register dst =
++          destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
++      switch (src.type()) {
++        case Constant::kInt32:
++          __ li(dst, Operand(src.ToInt32()));
++          break;
++        case Constant::kFloat32:
++          __ li(dst, Operand::EmbeddedNumber(src.ToFloat32()));
++          break;
++        case Constant::kInt64:
++          if (RelocInfo::IsWasmReference(src.rmode())) {
++            __ li(dst, Operand(src.ToInt64(), src.rmode()));
++          } else {
++            __ li(dst, Operand(src.ToInt64()));
++          }
++          break;
++        case Constant::kFloat64:
++          __ li(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
++          break;
++        case Constant::kExternalReference:
++          __ li(dst, src.ToExternalReference());
++          break;
++        case Constant::kDelayedStringConstant:
++          __ li(dst, src.ToDelayedStringConstant());
++          break;
++        case Constant::kHeapObject: {
++          Handle<HeapObject> src_object = src.ToHeapObject();
++          RootIndex index;
++          if (IsMaterializableFromRoot(src_object, &index)) {
++            __ LoadRoot(dst, index);
++          } else {
++            __ li(dst, src_object);
++          }
++          break;
++        }
++        case Constant::kCompressedHeapObject:
++          UNREACHABLE();
++        case Constant::kRpoNumber:
++          UNREACHABLE();  // TODO(titzer): loading RPO numbers on LA64.
++          break;
++      }
++      if (destination->IsStackSlot()) __ St_d(dst, g.ToMemOperand(destination));
++    } else if (src.type() == Constant::kFloat32) {
++      if (destination->IsFPStackSlot()) {
++        MemOperand dst = g.ToMemOperand(destination);
++        if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
++          __ St_d(zero_reg, dst);
++        } else {
++          __ li(kScratchReg, Operand(bit_cast<int32_t>(src.ToFloat32())));
++          __ St_d(kScratchReg, dst);
++        }
++      } else {
++        DCHECK(destination->IsFPRegister());
++        FloatRegister dst = g.ToSingleRegister(destination);
++        __ Move(dst, src.ToFloat32());
++      }
++    } else {
++      DCHECK_EQ(Constant::kFloat64, src.type());
++      DoubleRegister dst = destination->IsFPRegister()
++                               ? g.ToDoubleRegister(destination)
++                               : kScratchDoubleReg;
++      __ Move(dst, src.ToFloat64().value());
++      if (destination->IsFPStackSlot()) {
++        __ Fst_d(dst, g.ToMemOperand(destination));
++      }
++    }
++  } else if (source->IsFPRegister()) {
++    FPURegister src = g.ToDoubleRegister(source);
++    if (destination->IsFPRegister()) {
++      FPURegister dst = g.ToDoubleRegister(destination);
++      __ Move(dst, src);
++    } else {
++      DCHECK(destination->IsFPStackSlot());
++      __ Fst_d(src, g.ToMemOperand(destination));
++    }
++  } else if (source->IsFPStackSlot()) {
++    DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
++    MemOperand src = g.ToMemOperand(source);
++    if (destination->IsFPRegister()) {
++      __ Fld_d(g.ToDoubleRegister(destination), src);
++    } else {
++      DCHECK(destination->IsFPStackSlot());
++      FPURegister temp = kScratchDoubleReg;
++      __ Fld_d(temp, src);
++      __ Fst_d(temp, g.ToMemOperand(destination));
++    }
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void CodeGenerator::AssembleSwap(InstructionOperand* source,
++                                 InstructionOperand* destination) {
++  La64OperandConverter g(this, nullptr);
++  // Dispatch on the source and destination operand kinds.  Not all
++  // combinations are possible.
++  if (source->IsRegister()) {
++    // Register-register.
++    Register temp = kScratchReg;
++    Register src = g.ToRegister(source);
++    if (destination->IsRegister()) {
++      Register dst = g.ToRegister(destination);
++      __ Move(temp, src);
++      __ Move(src, dst);
++      __ Move(dst, temp);
++    } else {
++      DCHECK(destination->IsStackSlot());
++      MemOperand dst = g.ToMemOperand(destination);
++      __ mov(temp, src);
++      __ Ld_d(src, dst);
++      __ St_d(temp, dst);
++    }
++  } else if (source->IsStackSlot()) {
++    DCHECK(destination->IsStackSlot());
++    Register temp_0 = kScratchReg;
++    Register temp_1 = kScratchReg2;
++    MemOperand src = g.ToMemOperand(source);
++    MemOperand dst = g.ToMemOperand(destination);
++    __ Ld_d(temp_0, src);
++    __ Ld_d(temp_1, dst);
++    __ St_d(temp_0, dst);
++    __ St_d(temp_1, src);
++  } else if (source->IsFPRegister()) {
++    FPURegister temp = kScratchDoubleReg;
++    FPURegister src = g.ToDoubleRegister(source);
++    if (destination->IsFPRegister()) {
++      FPURegister dst = g.ToDoubleRegister(destination);
++      __ Move(temp, src);
++      __ Move(src, dst);
++      __ Move(dst, temp);
++    } else {
++      DCHECK(destination->IsFPStackSlot());
++      MemOperand dst = g.ToMemOperand(destination);
++      __ Move(temp, src);
++      __ Fld_d(src, dst);
++      __ Fst_d(temp, dst);
++    }
++  } else if (source->IsFPStackSlot()) {
++    DCHECK(destination->IsFPStackSlot());
++    Register temp_0 = kScratchReg;
++    MemOperand src0 = g.ToMemOperand(source);
++    MemOperand src1(src0.base(), src0.offset() + kIntSize);
++    MemOperand dst0 = g.ToMemOperand(destination);
++    MemOperand dst1(dst0.base(), dst0.offset() + kIntSize);
++    FPURegister temp_1 = kScratchDoubleReg;
++    __ Fld_d(temp_1, dst0);  // Save destination in temp_1.
++    __ Ld_w(temp_0, src0);   // Then use temp_0 to copy source to destination.
++    __ St_w(temp_0, dst0);
++    __ Ld_w(temp_0, src1);
++    __ St_w(temp_0, dst1);
++    __ Fst_d(temp_1, src0);
++  } else {
++    // No other combinations are possible.
++    UNREACHABLE();
++  }
++}
++
++void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
++  // On 64-bit LA64 we emit the jump tables inline.
++  UNREACHABLE();
++}
++
++#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
++#undef ASSEMBLE_ATOMIC_STORE_INTEGER
++#undef ASSEMBLE_ATOMIC_BINOP
++#undef ASSEMBLE_ATOMIC_BINOP_EXT
++#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
++#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT
++#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
++#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT
++#undef ASSEMBLE_IEEE754_BINOP
++#undef ASSEMBLE_IEEE754_UNOP
++
++#undef TRACE_MSG
++#undef TRACE_UNIMPL
++#undef __
++
++}  // namespace compiler
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h
+new file mode 100644
+index 00000000000..b8a2d97961a
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h
+@@ -0,0 +1,412 @@
++// Copyright 2014 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_
++#define V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_
++
++namespace v8 {
++namespace internal {
++namespace compiler {
++
++// LA64-specific opcodes that specify which assembly sequence to emit.
++// Most opcodes specify a single instruction.
++#define TARGET_ARCH_OPCODE_LIST(V)         \
++  V(La64Add)                               \
++  V(La64Dadd)                              \
++  V(La64DaddOvf)                           \
++  V(La64Sub)                               \
++  V(La64Dsub)                              \
++  V(La64DsubOvf)                           \
++  V(La64Mul)                               \
++  V(La64MulOvf)                            \
++  V(La64MulHigh)                           \
++  V(La64DMulHigh)                          \
++  V(La64MulHighU)                          \
++  V(La64Dmul)                              \
++  V(La64Div)                               \
++  V(La64Ddiv)                              \
++  V(La64DivU)                              \
++  V(La64DdivU)                             \
++  V(La64Mod)                               \
++  V(La64Dmod)                              \
++  V(La64ModU)                              \
++  V(La64DmodU)                             \
++  V(La64And)                               \
++  V(La64And32)                             \
++  V(La64Or)                                \
++  V(La64Or32)                              \
++  V(La64Nor)                               \
++  V(La64Nor32)                             \
++  V(La64Xor)                               \
++  V(La64Xor32)                             \
++  V(La64Clz)                               \
++  V(La64Lsa)                               \
++  V(La64Dlsa)                              \
++  V(La64Shl)                               \
++  V(La64Shr)                               \
++  V(La64Sar)                               \
++  V(La64Ext)                               \
++  V(La64Ins)                               \
++  V(La64Dext)                              \
++  V(La64Dins)                              \
++  V(La64Dclz)                              \
++  V(La64Ctz)                               \
++  V(La64Dctz)                              \
++  V(La64Popcnt)                            \
++  V(La64Dpopcnt)                           \
++  V(La64Dshl)                              \
++  V(La64Dshr)                              \
++  V(La64Dsar)                              \
++  V(La64Ror)                               \
++  V(La64Dror)                              \
++  V(La64Mov)                               \
++  V(La64Tst)                               \
++  V(La64Cmp)                               \
++  V(La64CmpS)                              \
++  V(La64AddS)                              \
++  V(La64SubS)                              \
++  V(La64MulS)                              \
++  V(La64DivS)                              \
++  V(La64ModS)                              \
++  V(La64AbsS)                              \
++  V(La64NegS)                              \
++  V(La64SqrtS)                             \
++  V(La64MaxS)                              \
++  V(La64MinS)                              \
++  V(La64CmpD)                              \
++  V(La64AddD)                              \
++  V(La64SubD)                              \
++  V(La64MulD)                              \
++  V(La64DivD)                              \
++  V(La64ModD)                              \
++  V(La64AbsD)                              \
++  V(La64NegD)                              \
++  V(La64SqrtD)                             \
++  V(La64MaxD)                              \
++  V(La64MinD)                              \
++  V(La64Float64RoundDown)                  \
++  V(La64Float64RoundTruncate)              \
++  V(La64Float64RoundUp)                    \
++  V(La64Float64RoundTiesEven)              \
++  V(La64Float32RoundDown)                  \
++  V(La64Float32RoundTruncate)              \
++  V(La64Float32RoundUp)                    \
++  V(La64Float32RoundTiesEven)              \
++  V(La64CvtSD)                             \
++  V(La64CvtDS)                             \
++  V(La64TruncWD)                           \
++  V(La64RoundWD)                           \
++  V(La64FloorWD)                           \
++  V(La64CeilWD)                            \
++  V(La64TruncWS)                           \
++  V(La64RoundWS)                           \
++  V(La64FloorWS)                           \
++  V(La64CeilWS)                            \
++  V(La64TruncLS)                           \
++  V(La64TruncLD)                           \
++  V(La64TruncUwD)                          \
++  V(La64TruncUwS)                          \
++  V(La64TruncUlS)                          \
++  V(La64TruncUlD)                          \
++  V(La64CvtDW)                             \
++  V(La64CvtSL)                             \
++  V(La64CvtSW)                             \
++  V(La64CvtSUw)                            \
++  V(La64CvtSUl)                            \
++  V(La64CvtDL)                             \
++  V(La64CvtDUw)                            \
++  V(La64CvtDUl)                            \
++  V(La64Lb)                                \
++  V(La64Lbu)                               \
++  V(La64Sb)                                \
++  V(La64Lh)                                \
++  V(La64Ulh)                               \
++  V(La64Lhu)                               \
++  V(La64Ulhu)                              \
++  V(La64Sh)                                \
++  V(La64Ush)                               \
++  V(La64Ld)                                \
++  V(La64Uld)                               \
++  V(La64Lw)                                \
++  V(La64Ulw)                               \
++  V(La64Lwu)                               \
++  V(La64Ulwu)                              \
++  V(La64Sw)                                \
++  V(La64Usw)                               \
++  V(La64Sd)                                \
++  V(La64Usd)                               \
++  V(La64Lwc1)                              \
++  V(La64Ulwc1)                             \
++  V(La64Swc1)                              \
++  V(La64Uswc1)                             \
++  V(La64Ldc1)                              \
++  V(La64Uldc1)                             \
++  V(La64Sdc1)                              \
++  V(La64Usdc1)                             \
++  V(La64BitcastDL)                         \
++  V(La64BitcastLD)                         \
++  V(La64Float64ExtractLowWord32)           \
++  V(La64Float64ExtractHighWord32)          \
++  V(La64Float64InsertLowWord32)            \
++  V(La64Float64InsertHighWord32)           \
++  V(La64Float32Max)                        \
++  V(La64Float64Max)                        \
++  V(La64Float32Min)                        \
++  V(La64Float64Min)                        \
++  V(La64Float64SilenceNaN)                 \
++  V(La64Push)                              \
++  V(La64Peek)                              \
++  V(La64StoreToStackSlot)                  \
++  V(La64ByteSwap64)                        \
++  V(La64ByteSwap32)                        \
++  V(La64StackClaim)                        \
++  V(La64Seb)                               \
++  V(La64Seh)                               \
++  V(La64Sync)                              \
++  V(La64AssertEqual)                       \
++  V(La64S128Zero)                          \
++  V(La64I32x4Splat)                        \
++  V(La64I32x4ExtractLane)                  \
++  V(La64I32x4ReplaceLane)                  \
++  V(La64I32x4Add)                          \
++  V(La64I32x4AddHoriz)                     \
++  V(La64I32x4Sub)                          \
++  V(La64F64x2Abs)                          \
++  V(La64F64x2Neg)                          \
++  V(La64F32x4Splat)                        \
++  V(La64F32x4ExtractLane)                  \
++  V(La64F32x4ReplaceLane)                  \
++  V(La64F32x4SConvertI32x4)                \
++  V(La64F32x4UConvertI32x4)                \
++  V(La64I32x4Mul)                          \
++  V(La64I32x4MaxS)                         \
++  V(La64I32x4MinS)                         \
++  V(La64I32x4Eq)                           \
++  V(La64I32x4Ne)                           \
++  V(La64I32x4Shl)                          \
++  V(La64I32x4ShrS)                         \
++  V(La64I32x4ShrU)                         \
++  V(La64I32x4MaxU)                         \
++  V(La64I32x4MinU)                         \
++  V(La64F64x2Sqrt)                         \
++  V(La64F64x2Add)                          \
++  V(La64F64x2Sub)                          \
++  V(La64F64x2Mul)                          \
++  V(La64F64x2Div)                          \
++  V(La64F64x2Min)                          \
++  V(La64F64x2Max)                          \
++  V(La64F64x2Eq)                           \
++  V(La64F64x2Ne)                           \
++  V(La64F64x2Lt)                           \
++  V(La64F64x2Le)                           \
++  V(La64F64x2Splat)                        \
++  V(La64F64x2ExtractLane)                  \
++  V(La64F64x2ReplaceLane)                  \
++  V(La64I64x2Add)                          \
++  V(La64I64x2Sub)                          \
++  V(La64I64x2Mul)                          \
++  V(La64I64x2Neg)                          \
++  V(La64I64x2Shl)                          \
++  V(La64I64x2ShrS)                         \
++  V(La64I64x2ShrU)                         \
++  V(La64F32x4Abs)                          \
++  V(La64F32x4Neg)                          \
++  V(La64F32x4Sqrt)                         \
++  V(La64F32x4RecipApprox)                  \
++  V(La64F32x4RecipSqrtApprox)              \
++  V(La64F32x4Add)                          \
++  V(La64F32x4AddHoriz)                     \
++  V(La64F32x4Sub)                          \
++  V(La64F32x4Mul)                          \
++  V(La64F32x4Div)                          \
++  V(La64F32x4Max)                          \
++  V(La64F32x4Min)                          \
++  V(La64F32x4Eq)                           \
++  V(La64F32x4Ne)                           \
++  V(La64F32x4Lt)                           \
++  V(La64F32x4Le)                           \
++  V(La64I32x4SConvertF32x4)                \
++  V(La64I32x4UConvertF32x4)                \
++  V(La64I32x4Neg)                          \
++  V(La64I32x4GtS)                          \
++  V(La64I32x4GeS)                          \
++  V(La64I32x4GtU)                          \
++  V(La64I32x4GeU)                          \
++  V(La64I32x4Abs)                          \
++  V(La64I16x8Splat)                        \
++  V(La64I16x8ExtractLaneU)                 \
++  V(La64I16x8ExtractLaneS)                 \
++  V(La64I16x8ReplaceLane)                  \
++  V(La64I16x8Neg)                          \
++  V(La64I16x8Shl)                          \
++  V(La64I16x8ShrS)                         \
++  V(La64I16x8ShrU)                         \
++  V(La64I16x8Add)                          \
++  V(La64I16x8AddSaturateS)                 \
++  V(La64I16x8AddHoriz)                     \
++  V(La64I16x8Sub)                          \
++  V(La64I16x8SubSaturateS)                 \
++  V(La64I16x8Mul)                          \
++  V(La64I16x8MaxS)                         \
++  V(La64I16x8MinS)                         \
++  V(La64I16x8Eq)                           \
++  V(La64I16x8Ne)                           \
++  V(La64I16x8GtS)                          \
++  V(La64I16x8GeS)                          \
++  V(La64I16x8AddSaturateU)                 \
++  V(La64I16x8SubSaturateU)                 \
++  V(La64I16x8MaxU)                         \
++  V(La64I16x8MinU)                         \
++  V(La64I16x8GtU)                          \
++  V(La64I16x8GeU)                          \
++  V(La64I16x8RoundingAverageU)             \
++  V(La64I16x8Abs)                          \
++  V(La64I8x16Splat)                        \
++  V(La64I8x16ExtractLaneU)                 \
++  V(La64I8x16ExtractLaneS)                 \
++  V(La64I8x16ReplaceLane)                  \
++  V(La64I8x16Neg)                          \
++  V(La64I8x16Shl)                          \
++  V(La64I8x16ShrS)                         \
++  V(La64I8x16Add)                          \
++  V(La64I8x16AddSaturateS)                 \
++  V(La64I8x16Sub)                          \
++  V(La64I8x16SubSaturateS)                 \
++  V(La64I8x16Mul)                          \
++  V(La64I8x16MaxS)                         \
++  V(La64I8x16MinS)                         \
++  V(La64I8x16Eq)                           \
++  V(La64I8x16Ne)                           \
++  V(La64I8x16GtS)                          \
++  V(La64I8x16GeS)                          \
++  V(La64I8x16ShrU)                         \
++  V(La64I8x16AddSaturateU)                 \
++  V(La64I8x16SubSaturateU)                 \
++  V(La64I8x16MaxU)                         \
++  V(La64I8x16MinU)                         \
++  V(La64I8x16GtU)                          \
++  V(La64I8x16GeU)                          \
++  V(La64I8x16RoundingAverageU)             \
++  V(La64I8x16Abs)                          \
++  V(La64S128And)                           \
++  V(La64S128Or)                            \
++  V(La64S128Xor)                           \
++  V(La64S128Not)                           \
++  V(La64S128Select)                        \
++  V(La64S128AndNot)                        \
++  V(La64S1x4AnyTrue)                       \
++  V(La64S1x4AllTrue)                       \
++  V(La64S1x8AnyTrue)                       \
++  V(La64S1x8AllTrue)                       \
++  V(La64S1x16AnyTrue)                      \
++  V(La64S1x16AllTrue)                      \
++  V(La64S32x4InterleaveRight)              \
++  V(La64S32x4InterleaveLeft)               \
++  V(La64S32x4PackEven)                     \
++  V(La64S32x4PackOdd)                      \
++  V(La64S32x4InterleaveEven)               \
++  V(La64S32x4InterleaveOdd)                \
++  V(La64S32x4Shuffle)                      \
++  V(La64S16x8InterleaveRight)              \
++  V(La64S16x8InterleaveLeft)               \
++  V(La64S16x8PackEven)                     \
++  V(La64S16x8PackOdd)                      \
++  V(La64S16x8InterleaveEven)               \
++  V(La64S16x8InterleaveOdd)                \
++  V(La64S16x4Reverse)                      \
++  V(La64S16x2Reverse)                      \
++  V(La64S8x16InterleaveRight)              \
++  V(La64S8x16InterleaveLeft)               \
++  V(La64S8x16PackEven)                     \
++  V(La64S8x16PackOdd)                      \
++  V(La64S8x16InterleaveEven)               \
++  V(La64S8x16InterleaveOdd)                \
++  V(La64S8x16Shuffle)                      \
++  V(La64S8x16Swizzle)                      \
++  V(La64S8x16Concat)                       \
++  V(La64S8x8Reverse)                       \
++  V(La64S8x4Reverse)                       \
++  V(La64S8x2Reverse)                       \
++  V(La64S8x16LoadSplat)                    \
++  V(La64S16x8LoadSplat)                    \
++  V(La64S32x4LoadSplat)                    \
++  V(La64S64x2LoadSplat)                    \
++  V(La64I16x8Load8x8S)                     \
++  V(La64I16x8Load8x8U)                     \
++  V(La64I32x4Load16x4S)                    \
++  V(La64I32x4Load16x4U)                    \
++  V(La64I64x2Load32x2S)                    \
++  V(La64I64x2Load32x2U)                    \
++  V(La64I32x4SConvertI16x8Low)             \
++  V(La64I32x4SConvertI16x8High)            \
++  V(La64I32x4UConvertI16x8Low)             \
++  V(La64I32x4UConvertI16x8High)            \
++  V(La64I16x8SConvertI8x16Low)             \
++  V(La64I16x8SConvertI8x16High)            \
++  V(La64I16x8SConvertI32x4)                \
++  V(La64I16x8UConvertI32x4)                \
++  V(La64I16x8UConvertI8x16Low)             \
++  V(La64I16x8UConvertI8x16High)            \
++  V(La64I8x16SConvertI16x8)                \
++  V(La64I8x16UConvertI16x8)                \
++  V(La64Word64AtomicLoadUint8)             \
++  V(La64Word64AtomicLoadUint16)            \
++  V(La64Word64AtomicLoadUint32)            \
++  V(La64Word64AtomicLoadUint64)            \
++  V(La64Word64AtomicStoreWord8)            \
++  V(La64Word64AtomicStoreWord16)           \
++  V(La64Word64AtomicStoreWord32)           \
++  V(La64Word64AtomicStoreWord64)           \
++  V(La64Word64AtomicAddUint8)              \
++  V(La64Word64AtomicAddUint16)             \
++  V(La64Word64AtomicAddUint32)             \
++  V(La64Word64AtomicAddUint64)             \
++  V(La64Word64AtomicSubUint8)              \
++  V(La64Word64AtomicSubUint16)             \
++  V(La64Word64AtomicSubUint32)             \
++  V(La64Word64AtomicSubUint64)             \
++  V(La64Word64AtomicAndUint8)              \
++  V(La64Word64AtomicAndUint16)             \
++  V(La64Word64AtomicAndUint32)             \
++  V(La64Word64AtomicAndUint64)             \
++  V(La64Word64AtomicOrUint8)               \
++  V(La64Word64AtomicOrUint16)              \
++  V(La64Word64AtomicOrUint32)              \
++  V(La64Word64AtomicOrUint64)              \
++  V(La64Word64AtomicXorUint8)              \
++  V(La64Word64AtomicXorUint16)             \
++  V(La64Word64AtomicXorUint32)             \
++  V(La64Word64AtomicXorUint64)             \
++  V(La64Word64AtomicExchangeUint8)         \
++  V(La64Word64AtomicExchangeUint16)        \
++  V(La64Word64AtomicExchangeUint32)        \
++  V(La64Word64AtomicExchangeUint64)        \
++  V(La64Word64AtomicCompareExchangeUint8)  \
++  V(La64Word64AtomicCompareExchangeUint16) \
++  V(La64Word64AtomicCompareExchangeUint32) \
++  V(La64Word64AtomicCompareExchangeUint64)
++
++// Addressing modes represent the "shape" of inputs to an instruction.
++// Many instructions support multiple addressing modes. Addressing modes
++// are encoded into the InstructionCode of the instruction and tell the
++// code generator after register allocation which assembler method to call.
++//
++// We use the following local notation for addressing modes:
++//
++// R = register
++// O = register or stack slot
++// D = double register
++// I = immediate (handle, external, int32)
++// MRI = [register + immediate]
++// MRR = [register + register]
++// TODO(plind): Add the new r6 address modes.
++#define TARGET_ADDRESSING_MODE_LIST(V) \
++  V(MRI) /* [%r0 + K] */               \
++  V(MRR) /* [%r0 + %r1] */
++
++}  // namespace compiler
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc
+new file mode 100644
+index 00000000000..a1a5a771d32
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc
+@@ -0,0 +1,1534 @@
++// Copyright 2015 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "src/codegen/macro-assembler.h"
++#include "src/compiler/backend/instruction-scheduler.h"
++
++namespace v8 {
++namespace internal {
++namespace compiler {
++
++bool InstructionScheduler::SchedulerSupported() { return true; }
++
++int InstructionScheduler::GetTargetInstructionFlags(
++    const Instruction* instr) const {
++  switch (instr->arch_opcode()) {
++    case kLa64AbsD:
++    case kLa64AbsS:
++    case kLa64Add:
++    case kLa64AddD:
++    case kLa64AddS:
++    case kLa64And:
++    case kLa64And32:
++    case kLa64AssertEqual:
++    case kLa64BitcastDL:
++    case kLa64BitcastLD:
++    case kLa64ByteSwap32:
++    case kLa64ByteSwap64:
++    case kLa64CeilWD:
++    case kLa64CeilWS:
++    case kLa64Clz:
++    case kLa64Cmp:
++    case kLa64CmpD:
++    case kLa64CmpS:
++    case kLa64Ctz:
++    case kLa64CvtDL:
++    case kLa64CvtDS:
++    case kLa64CvtDUl:
++    case kLa64CvtDUw:
++    case kLa64CvtDW:
++    case kLa64CvtSD:
++    case kLa64CvtSL:
++    case kLa64CvtSUl:
++    case kLa64CvtSUw:
++    case kLa64CvtSW:
++    case kLa64DMulHigh:
++    case kLa64MulHighU:
++    case kLa64Dadd:
++    case kLa64DaddOvf:
++    case kLa64Dclz:
++    case kLa64Dctz:
++    case kLa64Ddiv:
++    case kLa64DdivU:
++    case kLa64Dext:
++    case kLa64Dins:
++    case kLa64Div:
++    case kLa64DivD:
++    case kLa64DivS:
++    case kLa64DivU:
++    case kLa64Dlsa:
++    case kLa64Dmod:
++    case kLa64DmodU:
++    case kLa64Dmul:
++    case kLa64Dpopcnt:
++    case kLa64Dror:
++    case kLa64Dsar:
++    case kLa64Dshl:
++    case kLa64Dshr:
++    case kLa64Dsub:
++    case kLa64DsubOvf:
++    case kLa64Ext:
++    case kLa64F64x2Abs:
++    case kLa64F64x2Neg:
++    case kLa64F64x2Sqrt:
++    case kLa64F64x2Add:
++    case kLa64F64x2Sub:
++    case kLa64F64x2Mul:
++    case kLa64F64x2Div:
++    case kLa64F64x2Min:
++    case kLa64F64x2Max:
++    case kLa64F64x2Eq:
++    case kLa64F64x2Ne:
++    case kLa64F64x2Lt:
++    case kLa64F64x2Le:
++    case kLa64I64x2Add:
++    case kLa64I64x2Sub:
++    case kLa64I64x2Mul:
++    case kLa64I64x2Neg:
++    case kLa64I64x2Shl:
++    case kLa64I64x2ShrS:
++    case kLa64I64x2ShrU:
++    case kLa64F32x4Abs:
++    case kLa64F32x4Add:
++    case kLa64F32x4AddHoriz:
++    case kLa64F32x4Eq:
++    case kLa64F32x4ExtractLane:
++    case kLa64F32x4Lt:
++    case kLa64F32x4Le:
++    case kLa64F32x4Max:
++    case kLa64F32x4Min:
++    case kLa64F32x4Mul:
++    case kLa64F32x4Div:
++    case kLa64F32x4Ne:
++    case kLa64F32x4Neg:
++    case kLa64F32x4Sqrt:
++    case kLa64F32x4RecipApprox:
++    case kLa64F32x4RecipSqrtApprox:
++    case kLa64F32x4ReplaceLane:
++    case kLa64F32x4SConvertI32x4:
++    case kLa64F32x4Splat:
++    case kLa64F32x4Sub:
++    case kLa64F32x4UConvertI32x4:
++    case kLa64F64x2Splat:
++    case kLa64F64x2ExtractLane:
++    case kLa64F64x2ReplaceLane:
++    case kLa64Float32Max:
++    case kLa64Float32Min:
++    case kLa64Float32RoundDown:
++    case kLa64Float32RoundTiesEven:
++    case kLa64Float32RoundTruncate:
++    case kLa64Float32RoundUp:
++    case kLa64Float64ExtractLowWord32:
++    case kLa64Float64ExtractHighWord32:
++    case kLa64Float64InsertLowWord32:
++    case kLa64Float64InsertHighWord32:
++    case kLa64Float64Max:
++    case kLa64Float64Min:
++    case kLa64Float64RoundDown:
++    case kLa64Float64RoundTiesEven:
++    case kLa64Float64RoundTruncate:
++    case kLa64Float64RoundUp:
++    case kLa64Float64SilenceNaN:
++    case kLa64FloorWD:
++    case kLa64FloorWS:
++    case kLa64I16x8Add:
++    case kLa64I16x8AddHoriz:
++    case kLa64I16x8AddSaturateS:
++    case kLa64I16x8AddSaturateU:
++    case kLa64I16x8Eq:
++    case kLa64I16x8ExtractLaneU:
++    case kLa64I16x8ExtractLaneS:
++    case kLa64I16x8GeS:
++    case kLa64I16x8GeU:
++    case kLa64I16x8GtS:
++    case kLa64I16x8GtU:
++    case kLa64I16x8MaxS:
++    case kLa64I16x8MaxU:
++    case kLa64I16x8MinS:
++    case kLa64I16x8MinU:
++    case kLa64I16x8Mul:
++    case kLa64I16x8Ne:
++    case kLa64I16x8Neg:
++    case kLa64I16x8ReplaceLane:
++    case kLa64I8x16SConvertI16x8:
++    case kLa64I16x8SConvertI32x4:
++    case kLa64I16x8SConvertI8x16High:
++    case kLa64I16x8SConvertI8x16Low:
++    case kLa64I16x8Shl:
++    case kLa64I16x8ShrS:
++    case kLa64I16x8ShrU:
++    case kLa64I16x8Splat:
++    case kLa64I16x8Sub:
++    case kLa64I16x8SubSaturateS:
++    case kLa64I16x8SubSaturateU:
++    case kLa64I8x16UConvertI16x8:
++    case kLa64I16x8UConvertI32x4:
++    case kLa64I16x8UConvertI8x16High:
++    case kLa64I16x8UConvertI8x16Low:
++    case kLa64I16x8RoundingAverageU:
++    case kLa64I16x8Abs:
++    case kLa64I32x4Add:
++    case kLa64I32x4AddHoriz:
++    case kLa64I32x4Eq:
++    case kLa64I32x4ExtractLane:
++    case kLa64I32x4GeS:
++    case kLa64I32x4GeU:
++    case kLa64I32x4GtS:
++    case kLa64I32x4GtU:
++    case kLa64I32x4MaxS:
++    case kLa64I32x4MaxU:
++    case kLa64I32x4MinS:
++    case kLa64I32x4MinU:
++    case kLa64I32x4Mul:
++    case kLa64I32x4Ne:
++    case kLa64I32x4Neg:
++    case kLa64I32x4ReplaceLane:
++    case kLa64I32x4SConvertF32x4:
++    case kLa64I32x4SConvertI16x8High:
++    case kLa64I32x4SConvertI16x8Low:
++    case kLa64I32x4Shl:
++    case kLa64I32x4ShrS:
++    case kLa64I32x4ShrU:
++    case kLa64I32x4Splat:
++    case kLa64I32x4Sub:
++    case kLa64I32x4UConvertF32x4:
++    case kLa64I32x4UConvertI16x8High:
++    case kLa64I32x4UConvertI16x8Low:
++    case kLa64I32x4Abs:
++    case kLa64I8x16Add:
++    case kLa64I8x16AddSaturateS:
++    case kLa64I8x16AddSaturateU:
++    case kLa64I8x16Eq:
++    case kLa64I8x16ExtractLaneU:
++    case kLa64I8x16ExtractLaneS:
++    case kLa64I8x16GeS:
++    case kLa64I8x16GeU:
++    case kLa64I8x16GtS:
++    case kLa64I8x16GtU:
++    case kLa64I8x16MaxS:
++    case kLa64I8x16MaxU:
++    case kLa64I8x16MinS:
++    case kLa64I8x16MinU:
++    case kLa64I8x16Mul:
++    case kLa64I8x16Ne:
++    case kLa64I8x16Neg:
++    case kLa64I8x16ReplaceLane:
++    case kLa64I8x16Shl:
++    case kLa64I8x16ShrS:
++    case kLa64I8x16ShrU:
++    case kLa64I8x16Splat:
++    case kLa64I8x16Sub:
++    case kLa64I8x16SubSaturateS:
++    case kLa64I8x16SubSaturateU:
++    case kLa64I8x16RoundingAverageU:
++    case kLa64I8x16Abs:
++    case kLa64Ins:
++    case kLa64Lsa:
++    case kLa64MaxD:
++    case kLa64MaxS:
++    case kLa64MinD:
++    case kLa64MinS:
++    case kLa64Mod:
++    case kLa64ModU:
++    case kLa64Mov:
++    case kLa64Mul:
++    case kLa64MulD:
++    case kLa64MulHigh:
++    case kLa64MulOvf:
++    case kLa64MulS:
++    case kLa64NegD:
++    case kLa64NegS:
++    case kLa64Nor:
++    case kLa64Nor32:
++    case kLa64Or:
++    case kLa64Or32:
++    case kLa64Popcnt:
++    case kLa64Ror:
++    case kLa64RoundWD:
++    case kLa64RoundWS:
++    case kLa64S128And:
++    case kLa64S128Or:
++    case kLa64S128Not:
++    case kLa64S128Select:
++    case kLa64S128AndNot:
++    case kLa64S128Xor:
++    case kLa64S128Zero:
++    case kLa64S16x8InterleaveEven:
++    case kLa64S16x8InterleaveOdd:
++    case kLa64S16x8InterleaveLeft:
++    case kLa64S16x8InterleaveRight:
++    case kLa64S16x8PackEven:
++    case kLa64S16x8PackOdd:
++    case kLa64S16x2Reverse:
++    case kLa64S16x4Reverse:
++    case kLa64S1x16AllTrue:
++    case kLa64S1x16AnyTrue:
++    case kLa64S1x4AllTrue:
++    case kLa64S1x4AnyTrue:
++    case kLa64S1x8AllTrue:
++    case kLa64S1x8AnyTrue:
++    case kLa64S32x4InterleaveEven:
++    case kLa64S32x4InterleaveOdd:
++    case kLa64S32x4InterleaveLeft:
++    case kLa64S32x4InterleaveRight:
++    case kLa64S32x4PackEven:
++    case kLa64S32x4PackOdd:
++    case kLa64S32x4Shuffle:
++    case kLa64S8x16Concat:
++    case kLa64S8x16InterleaveEven:
++    case kLa64S8x16InterleaveOdd:
++    case kLa64S8x16InterleaveLeft:
++    case kLa64S8x16InterleaveRight:
++    case kLa64S8x16PackEven:
++    case kLa64S8x16PackOdd:
++    case kLa64S8x2Reverse:
++    case kLa64S8x4Reverse:
++    case kLa64S8x8Reverse:
++    case kLa64S8x16Shuffle:
++    case kLa64S8x16Swizzle:
++    case kLa64Sar:
++    case kLa64Seb:
++    case kLa64Seh:
++    case kLa64Shl:
++    case kLa64Shr:
++    case kLa64SqrtD:
++    case kLa64SqrtS:
++    case kLa64Sub:
++    case kLa64SubD:
++    case kLa64SubS:
++    case kLa64TruncLD:
++    case kLa64TruncLS:
++    case kLa64TruncUlD:
++    case kLa64TruncUlS:
++    case kLa64TruncUwD:
++    case kLa64TruncUwS:
++    case kLa64TruncWD:
++    case kLa64TruncWS:
++    case kLa64Tst:
++    case kLa64Xor:
++    case kLa64Xor32:
++      return kNoOpcodeFlags;
++
++    case kLa64Lb:
++    case kLa64Lbu:
++    case kLa64Ld:
++    case kLa64Ldc1:
++    case kLa64Lh:
++    case kLa64Lhu:
++    case kLa64Lw:
++    case kLa64Lwc1:
++    case kLa64Lwu:
++    case kLa64Peek:
++    case kLa64Uld:
++    case kLa64Uldc1:
++    case kLa64Ulh:
++    case kLa64Ulhu:
++    case kLa64Ulw:
++    case kLa64Ulwu:
++    case kLa64Ulwc1:
++    case kLa64S8x16LoadSplat:
++    case kLa64S16x8LoadSplat:
++    case kLa64S32x4LoadSplat:
++    case kLa64S64x2LoadSplat:
++    case kLa64I16x8Load8x8S:
++    case kLa64I16x8Load8x8U:
++    case kLa64I32x4Load16x4S:
++    case kLa64I32x4Load16x4U:
++    case kLa64I64x2Load32x2S:
++    case kLa64I64x2Load32x2U:
++    case kLa64Word64AtomicLoadUint8:
++    case kLa64Word64AtomicLoadUint16:
++    case kLa64Word64AtomicLoadUint32:
++    case kLa64Word64AtomicLoadUint64:
++
++      return kIsLoadOperation;
++
++    case kLa64ModD:
++    case kLa64ModS:
++    case kLa64Push:
++    case kLa64Sb:
++    case kLa64Sd:
++    case kLa64Sdc1:
++    case kLa64Sh:
++    case kLa64StackClaim:
++    case kLa64StoreToStackSlot:
++    case kLa64Sw:
++    case kLa64Swc1:
++    case kLa64Usd:
++    case kLa64Usdc1:
++    case kLa64Ush:
++    case kLa64Usw:
++    case kLa64Uswc1:
++    case kLa64Sync:
++    case kLa64Word64AtomicStoreWord8:
++    case kLa64Word64AtomicStoreWord16:
++    case kLa64Word64AtomicStoreWord32:
++    case kLa64Word64AtomicStoreWord64:
++    case kLa64Word64AtomicAddUint8:
++    case kLa64Word64AtomicAddUint16:
++    case kLa64Word64AtomicAddUint32:
++    case kLa64Word64AtomicAddUint64:
++    case kLa64Word64AtomicSubUint8:
++    case kLa64Word64AtomicSubUint16:
++    case kLa64Word64AtomicSubUint32:
++    case kLa64Word64AtomicSubUint64:
++    case kLa64Word64AtomicAndUint8:
++    case kLa64Word64AtomicAndUint16:
++    case kLa64Word64AtomicAndUint32:
++    case kLa64Word64AtomicAndUint64:
++    case kLa64Word64AtomicOrUint8:
++    case kLa64Word64AtomicOrUint16:
++    case kLa64Word64AtomicOrUint32:
++    case kLa64Word64AtomicOrUint64:
++    case kLa64Word64AtomicXorUint8:
++    case kLa64Word64AtomicXorUint16:
++    case kLa64Word64AtomicXorUint32:
++    case kLa64Word64AtomicXorUint64:
++    case kLa64Word64AtomicExchangeUint8:
++    case kLa64Word64AtomicExchangeUint16:
++    case kLa64Word64AtomicExchangeUint32:
++    case kLa64Word64AtomicExchangeUint64:
++    case kLa64Word64AtomicCompareExchangeUint8:
++    case kLa64Word64AtomicCompareExchangeUint16:
++    case kLa64Word64AtomicCompareExchangeUint32:
++    case kLa64Word64AtomicCompareExchangeUint64:
++      return kHasSideEffect;
++
++#define CASE(Name) case k##Name:
++      COMMON_ARCH_OPCODE_LIST(CASE)
++#undef CASE
++      // Already covered in architecture independent code.
++      UNREACHABLE();
++  }
++
++  UNREACHABLE();
++}
++
++enum Latency {
++  BRANCH = 4,  // Estimated max.
++  RINT_S = 4,  // Estimated.
++  RINT_D = 4,  // Estimated.
++
++  MULT = 4,
++  MULTU = 4,
++  DMULT = 4,
++  DMULTU = 4,
++
++  MUL = 7,
++  DMUL = 7,
++  MUH = 7,
++  MUHU = 7,
++  DMUH = 7,
++  DMUHU = 7,
++
++  DIV = 50,  // Min:11 Max:50
++  DDIV = 50,
++  DIVU = 50,
++  DDIVU = 50,
++
++  ABS_S = 4,
++  ABS_D = 4,
++  NEG_S = 4,
++  NEG_D = 4,
++  ADD_S = 4,
++  ADD_D = 4,
++  SUB_S = 4,
++  SUB_D = 4,
++  MAX_S = 4,  // Estimated.
++  MIN_S = 4,
++  MAX_D = 4,  // Estimated.
++  MIN_D = 4,
++  C_cond_S = 4,
++  C_cond_D = 4,
++  MUL_S = 4,
++
++  MADD_S = 4,
++  MSUB_S = 4,
++  NMADD_S = 4,
++  NMSUB_S = 4,
++
++  CABS_cond_S = 4,
++  CABS_cond_D = 4,
++
++  CVT_D_S = 4,
++  CVT_PS_PW = 4,
++
++  CVT_S_W = 4,
++  CVT_S_L = 4,
++  CVT_D_W = 4,
++  CVT_D_L = 4,
++
++  CVT_S_D = 4,
++
++  CVT_W_S = 4,
++  CVT_W_D = 4,
++  CVT_L_S = 4,
++  CVT_L_D = 4,
++
++  CEIL_W_S = 4,
++  CEIL_W_D = 4,
++  CEIL_L_S = 4,
++  CEIL_L_D = 4,
++
++  FLOOR_W_S = 4,
++  FLOOR_W_D = 4,
++  FLOOR_L_S = 4,
++  FLOOR_L_D = 4,
++
++  ROUND_W_S = 4,
++  ROUND_W_D = 4,
++  ROUND_L_S = 4,
++  ROUND_L_D = 4,
++
++  TRUNC_W_S = 4,
++  TRUNC_W_D = 4,
++  TRUNC_L_S = 4,
++  TRUNC_L_D = 4,
++
++  MOV_S = 4,
++  MOV_D = 4,
++
++  MOVF_S = 4,
++  MOVF_D = 4,
++
++  MOVN_S = 4,
++  MOVN_D = 4,
++
++  MOVT_S = 4,
++  MOVT_D = 4,
++
++  MOVZ_S = 4,
++  MOVZ_D = 4,
++
++  MUL_D = 5,
++  MADD_D = 5,
++  MSUB_D = 5,
++  NMADD_D = 5,
++  NMSUB_D = 5,
++
++  RECIP_S = 13,
++  RECIP_D = 26,
++
++  RSQRT_S = 17,
++  RSQRT_D = 36,
++
++  DIV_S = 17,
++  SQRT_S = 17,
++
++  DIV_D = 32,
++  SQRT_D = 32,
++
++  MTC1 = 4,
++  MTHC1 = 4,
++  DMTC1 = 4,
++  LWC1 = 4,
++  LDC1 = 4,
++
++  MFC1 = 1,
++  MFHC1 = 1,
++  DMFC1 = 1,
++  MFHI = 1,
++  MFLO = 1,
++  SWC1 = 1,
++  SDC1 = 1,
++};
++
++int DadduLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return 1;
++  } else {
++    return 2;  // Estimated max.
++  }
++}
++
++int DsubuLatency(bool is_operand_register = true) {
++  return DadduLatency(is_operand_register);
++}
++
++int AndLatency(bool is_operand_register = true) {
++  return DadduLatency(is_operand_register);
++}
++
++int OrLatency(bool is_operand_register = true) {
++  return DadduLatency(is_operand_register);
++}
++
++int NorLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return 1;
++  } else {
++    return 2;  // Estimated max.
++  }
++}
++
++int XorLatency(bool is_operand_register = true) {
++  return DadduLatency(is_operand_register);
++}
++
++int MulLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return Latency::MUL;
++  } else {
++    return Latency::MUL + 1;
++  }
++}
++
++int DmulLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::DMUL;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int MulhLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::MUH;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int MulhuLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::MUH;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int DMulhLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::DMUH;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int DivLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return Latency::DIV;
++  } else {
++    return Latency::DIV + 1;
++  }
++}
++
++int DivuLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return Latency::DIVU;
++  } else {
++    return Latency::DIVU + 1;
++  }
++}
++
++int DdivLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::DDIV;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int DdivuLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = Latency::DDIVU;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int ModLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = 1;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int ModuLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = 1;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int DmodLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = 1;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int DmoduLatency(bool is_operand_register = true) {
++  int latency = 0;
++  latency = 1;
++  if (!is_operand_register) {
++    latency += 1;
++  }
++  return latency;
++}
++
++int MovzLatency() { return Latency::BRANCH + 1; }
++
++int MovnLatency() { return Latency::BRANCH + 1; }
++
++int DlsaLatency() {
++  // Estimated max.
++  return DadduLatency() + 1;
++}
++
++int CallLatency() {
++  // Estimated.
++  return DadduLatency(false) + Latency::BRANCH + 5;
++}
++
++int JumpLatency() {
++  // Estimated max.
++  return 1 + DadduLatency() + Latency::BRANCH + 2;
++}
++
++int SmiUntagLatency() { return 1; }
++
++int PrepareForTailCallLatency() {
++  // Estimated max.
++  return 2 * (DlsaLatency() + DadduLatency(false)) + 2 + Latency::BRANCH +
++         Latency::BRANCH + 2 * DsubuLatency(false) + 2 + Latency::BRANCH + 1;
++}
++
++int AssemblePopArgumentsAdoptFrameLatency() {
++  return 1 + Latency::BRANCH + 1 + SmiUntagLatency() +
++         PrepareForTailCallLatency();
++}
++
++int AssertLatency() { return 1; }
++
++int PrepareCallCFunctionLatency() {
++  int frame_alignment = TurboAssembler::ActivationFrameAlignment();
++  if (frame_alignment > kSystemPointerSize) {
++    return 1 + DsubuLatency(false) + AndLatency(false) + 1;
++  } else {
++    return DsubuLatency(false);
++  }
++}
++
++int AdjustBaseAndOffsetLatency() {
++  return 3;  // Estimated max.
++}
++
++int AlignedMemoryLatency() { return AdjustBaseAndOffsetLatency() + 1; }
++
++int UlhuLatency() { return AlignedMemoryLatency(); }
++
++int UlwLatency() { return AlignedMemoryLatency(); }
++
++int UlwuLatency() { return AlignedMemoryLatency(); }
++
++int UldLatency() { return AlignedMemoryLatency(); }
++
++int Ulwc1Latency() { return AlignedMemoryLatency(); }
++
++int Uldc1Latency() { return AlignedMemoryLatency(); }
++
++int UshLatency() { return AlignedMemoryLatency(); }
++
++int UswLatency() { return AlignedMemoryLatency(); }
++
++int UsdLatency() { return AlignedMemoryLatency(); }
++
++int Uswc1Latency() { return AlignedMemoryLatency(); }
++
++int Usdc1Latency() { return AlignedMemoryLatency(); }
++
++int Lwc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LWC1; }
++
++int Swc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SWC1; }
++
++int Sdc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SDC1; }
++
++int Ldc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LDC1; }
++
++int MultiPushLatency() {
++  int latency = DsubuLatency(false);
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    latency++;
++  }
++  return latency;
++}
++
++int MultiPushFPULatency() {
++  int latency = DsubuLatency(false);
++  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
++    latency += Sdc1Latency();
++  }
++  return latency;
++}
++
++int PushCallerSavedLatency(SaveFPRegsMode fp_mode) {
++  int latency = MultiPushLatency();
++  if (fp_mode == kSaveFPRegs) {
++    latency += MultiPushFPULatency();
++  }
++  return latency;
++}
++
++int MultiPopLatency() {
++  int latency = DadduLatency(false);
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    latency++;
++  }
++  return latency;
++}
++
++int MultiPopFPULatency() {
++  int latency = DadduLatency(false);
++  for (int16_t i = 0; i < kNumRegisters; i++) {
++    latency += Ldc1Latency();
++  }
++  return latency;
++}
++
++int PopCallerSavedLatency(SaveFPRegsMode fp_mode) {
++  int latency = MultiPopLatency();
++  if (fp_mode == kSaveFPRegs) {
++    latency += MultiPopFPULatency();
++  }
++  return latency;
++}
++
++int CallCFunctionHelperLatency() {
++  // Estimated.
++  int latency = AndLatency(false) + Latency::BRANCH + 2 + CallLatency();
++  if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
++    latency++;
++  } else {
++    latency += DadduLatency(false);
++  }
++  return latency;
++}
++
++int CallCFunctionLatency() { return 1 + CallCFunctionHelperLatency(); }
++
++int AssembleArchJumpLatency() {
++  // Estimated max.
++  return Latency::BRANCH;
++}
++
++int GenerateSwitchTableLatency() {
++  int latency = 0;
++  latency = DlsaLatency() + 2;
++  latency += 2;
++  return latency;
++}
++
++int AssembleArchTableSwitchLatency() {
++  return Latency::BRANCH + GenerateSwitchTableLatency();
++}
++
++int DropAndRetLatency() {
++  // Estimated max.
++  return DadduLatency(false) + JumpLatency();
++}
++
++int AssemblerReturnLatency() {
++  // Estimated max.
++  return DadduLatency(false) + MultiPopLatency() + MultiPopFPULatency() +
++         Latency::BRANCH + DadduLatency() + 1 + DropAndRetLatency();
++}
++
++int TryInlineTruncateDoubleToILatency() {
++  return 2 + Latency::TRUNC_W_D + Latency::MFC1 + 2 + AndLatency(false) +
++         Latency::BRANCH;
++}
++
++int CallStubDelayedLatency() { return 1 + CallLatency(); }
++
++int TruncateDoubleToIDelayedLatency() {
++  // TODO(la64): This no longer reflects how TruncateDoubleToI is called.
++  return TryInlineTruncateDoubleToILatency() + 1 + DsubuLatency(false) +
++         Sdc1Latency() + CallStubDelayedLatency() + DadduLatency(false) + 1;
++}
++
++int CheckPageFlagLatency() {
++  return AndLatency(false) + AlignedMemoryLatency() + AndLatency(false) +
++         Latency::BRANCH;
++}
++
++int SltuLatency(bool is_operand_register = true) {
++  if (is_operand_register) {
++    return 1;
++  } else {
++    return 2;  // Estimated max.
++  }
++}
++
++int BranchShortHelperLatency() {
++  return 2;  // Estimated max.
++}
++
++int BranchShortLatency() { return BranchShortHelperLatency(); }
++
++int MoveLatency() { return 1; }
++
++int MovToFloatParametersLatency() { return 2 * MoveLatency(); }
++
++int MovFromFloatResultLatency() { return MoveLatency(); }
++
++int DaddOverflowLatency() {
++  // Estimated max.
++  return 6;
++}
++
++int DsubOverflowLatency() {
++  // Estimated max.
++  return 6;
++}
++
++int MulOverflowLatency() {
++  // Estimated max.
++  return MulLatency() + MulhLatency() + 2;
++}
++
++int DclzLatency() { return 1; }
++
++int CtzLatency() { return 3 + DclzLatency(); }
++
++int DctzLatency() { return 4; }
++
++int PopcntLatency() {
++  return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 +
++         AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() +
++         1 + MulLatency() + 1;
++}
++
++int DpopcntLatency() {
++  return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 +
++         AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() +
++         1 + DmulLatency() + 1;
++}
++
++int CompareFLatency() { return Latency::C_cond_S; }
++
++int CompareF32Latency() { return CompareFLatency(); }
++
++int CompareF64Latency() { return CompareFLatency(); }
++
++int CompareIsNanFLatency() { return CompareFLatency(); }
++
++int CompareIsNanF32Latency() { return CompareIsNanFLatency(); }
++
++int CompareIsNanF64Latency() { return CompareIsNanFLatency(); }
++
++int NegsLatency() { return Latency::NEG_S; }
++
++int NegdLatency() { return Latency::NEG_D; }
++
++int Float64RoundLatency() { return Latency::RINT_D + 4; }
++
++int Float32RoundLatency() { return Latency::RINT_S + 4; }
++
++int Float32MaxLatency() {
++  // Estimated max.
++  int latency = CompareIsNanF32Latency() + Latency::BRANCH;
++  return latency + Latency::MAX_S;
++}
++
++int Float64MaxLatency() {
++  // Estimated max.
++  int latency = CompareIsNanF64Latency() + Latency::BRANCH;
++  return latency + Latency::MAX_D;
++}
++
++int Float32MinLatency() {
++  // Estimated max.
++  int latency = CompareIsNanF32Latency() + Latency::BRANCH;
++  return latency + Latency::MIN_S;
++}
++
++int Float64MinLatency() {
++  // Estimated max.
++  int latency = CompareIsNanF64Latency() + Latency::BRANCH;
++  return latency + Latency::MIN_D;
++}
++
++int TruncLSLatency(bool load_status) {
++  int latency = Latency::TRUNC_L_S + Latency::DMFC1;
++  if (load_status) {
++    latency += SltuLatency() + 7;
++  }
++  return latency;
++}
++
++int TruncLDLatency(bool load_status) {
++  int latency = Latency::TRUNC_L_D + Latency::DMFC1;
++  if (load_status) {
++    latency += SltuLatency() + 7;
++  }
++  return latency;
++}
++
++int TruncUlSLatency() {
++  // Estimated max.
++  return 2 * CompareF32Latency() + CompareIsNanF32Latency() +
++         4 * Latency::BRANCH + Latency::SUB_S + 2 * Latency::TRUNC_L_S +
++         3 * Latency::DMFC1 + OrLatency() + Latency::MTC1 + Latency::MOV_S +
++         SltuLatency() + 4;
++}
++
++int TruncUlDLatency() {
++  // Estimated max.
++  return 2 * CompareF64Latency() + CompareIsNanF64Latency() +
++         4 * Latency::BRANCH + Latency::SUB_D + 2 * Latency::TRUNC_L_D +
++         3 * Latency::DMFC1 + OrLatency() + Latency::DMTC1 + Latency::MOV_D +
++         SltuLatency() + 4;
++}
++
++int PushLatency() { return DadduLatency() + AlignedMemoryLatency(); }
++
++int ByteSwapSignedLatency() { return 2; }
++
++int LlLatency(int offset) {
++  bool is_one_instruction = is_int14(offset);
++  if (is_one_instruction) {
++    return 1;
++  } else {
++    return 3;
++  }
++}
++
++int ExtractBitsLatency(bool sign_extend, int size) {
++  int latency = 2;
++  if (sign_extend) {
++    switch (size) {
++      case 8:
++      case 16:
++      case 32:
++        latency += 1;
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++  return latency;
++}
++
++int InsertBitsLatency() { return 2 + DsubuLatency(false) + 2; }
++
++int ScLatency(int offset) {
++  bool is_one_instruction = is_int14(offset);
++  if (is_one_instruction) {
++    return 1;
++  } else {
++    return 3;
++  }
++}
++
++int Word32AtomicExchangeLatency(bool sign_extend, int size) {
++  return DadduLatency(false) + 1 + DsubuLatency() + 2 + LlLatency(0) +
++         ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
++         ScLatency(0) + BranchShortLatency() + 1;
++}
++
++int Word32AtomicCompareExchangeLatency(bool sign_extend, int size) {
++  return 2 + DsubuLatency() + 2 + LlLatency(0) +
++         ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
++         ScLatency(0) + BranchShortLatency() + 1;
++}
++
++int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
++  // Basic latency modeling for LA64 instructions. They have been determined
++  // in empirical way.
++  switch (instr->arch_opcode()) {
++    case kArchCallCodeObject:
++    case kArchCallWasmFunction:
++      return CallLatency();
++    case kArchTailCallCodeObjectFromJSFunction:
++    case kArchTailCallCodeObject: {
++      int latency = 0;
++      if (instr->arch_opcode() == kArchTailCallCodeObjectFromJSFunction) {
++        latency = AssemblePopArgumentsAdoptFrameLatency();
++      }
++      return latency + JumpLatency();
++    }
++    case kArchTailCallWasm:
++    case kArchTailCallAddress:
++      return JumpLatency();
++    case kArchCallJSFunction: {
++      int latency = 0;
++      if (FLAG_debug_code) {
++        latency = 1 + AssertLatency();
++      }
++      return latency + 1 + DadduLatency(false) + CallLatency();
++    }
++    case kArchPrepareCallCFunction:
++      return PrepareCallCFunctionLatency();
++    case kArchSaveCallerRegisters: {
++      auto fp_mode =
++          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
++      return PushCallerSavedLatency(fp_mode);
++    }
++    case kArchRestoreCallerRegisters: {
++      auto fp_mode =
++          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
++      return PopCallerSavedLatency(fp_mode);
++    }
++    case kArchPrepareTailCall:
++      return 2;
++    case kArchCallCFunction:
++      return CallCFunctionLatency();
++    case kArchJmp:
++      return AssembleArchJumpLatency();
++    case kArchTableSwitch:
++      return AssembleArchTableSwitchLatency();
++    case kArchAbortCSAAssert:
++      return CallLatency() + 1;
++    case kArchDebugBreak:
++      return 1;
++    case kArchComment:
++    case kArchNop:
++    case kArchThrowTerminator:
++    case kArchDeoptimize:
++      return 0;
++    case kArchRet:
++      return AssemblerReturnLatency();
++    case kArchFramePointer:
++      return 1;
++    case kArchParentFramePointer:
++      // Estimated max.
++      return AlignedMemoryLatency();
++    case kArchTruncateDoubleToI:
++      return TruncateDoubleToIDelayedLatency();
++    case kArchStoreWithWriteBarrier:
++      return DadduLatency() + 1 + CheckPageFlagLatency();
++    case kArchStackSlot:
++      // Estimated max.
++      return DadduLatency(false) + AndLatency(false) + AssertLatency() +
++             DadduLatency(false) + AndLatency(false) + BranchShortLatency() +
++             1 + DsubuLatency() + DadduLatency();
++    case kArchWordPoisonOnSpeculation:
++      return AndLatency();
++    case kIeee754Float64Acos:
++    case kIeee754Float64Acosh:
++    case kIeee754Float64Asin:
++    case kIeee754Float64Asinh:
++    case kIeee754Float64Atan:
++    case kIeee754Float64Atanh:
++    case kIeee754Float64Atan2:
++    case kIeee754Float64Cos:
++    case kIeee754Float64Cosh:
++    case kIeee754Float64Cbrt:
++    case kIeee754Float64Exp:
++    case kIeee754Float64Expm1:
++    case kIeee754Float64Log:
++    case kIeee754Float64Log1p:
++    case kIeee754Float64Log10:
++    case kIeee754Float64Log2:
++    case kIeee754Float64Pow:
++    case kIeee754Float64Sin:
++    case kIeee754Float64Sinh:
++    case kIeee754Float64Tan:
++    case kIeee754Float64Tanh:
++      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
++             CallCFunctionLatency() + MovFromFloatResultLatency();
++    case kLa64Add:
++    case kLa64Dadd:
++      return DadduLatency(instr->InputAt(1)->IsRegister());
++    case kLa64DaddOvf:
++      return DaddOverflowLatency();
++    case kLa64Sub:
++    case kLa64Dsub:
++      return DsubuLatency(instr->InputAt(1)->IsRegister());
++    case kLa64DsubOvf:
++      return DsubOverflowLatency();
++    case kLa64Mul:
++      return MulLatency();
++    case kLa64MulOvf:
++      return MulOverflowLatency();
++    case kLa64MulHigh:
++      return MulhLatency();
++    case kLa64MulHighU:
++      return MulhuLatency();
++    case kLa64DMulHigh:
++      return DMulhLatency();
++    case kLa64Div: {
++      int latency = DivLatency(instr->InputAt(1)->IsRegister());
++      return latency++;
++    }
++    case kLa64DivU: {
++      int latency = DivuLatency(instr->InputAt(1)->IsRegister());
++      return latency++;
++    }
++    case kLa64Mod:
++      return ModLatency();
++    case kLa64ModU:
++      return ModuLatency();
++    case kLa64Dmul:
++      return DmulLatency();
++    case kLa64Ddiv: {
++      int latency = DdivLatency();
++      return latency++;
++    }
++    case kLa64DdivU: {
++      int latency = DdivuLatency();
++      return latency++;
++    }
++    case kLa64Dmod:
++      return DmodLatency();
++    case kLa64DmodU:
++      return DmoduLatency();
++    case kLa64Dlsa:
++    case kLa64Lsa:
++      return DlsaLatency();
++    case kLa64And:
++      return AndLatency(instr->InputAt(1)->IsRegister());
++    case kLa64And32: {
++      bool is_operand_register = instr->InputAt(1)->IsRegister();
++      int latency = AndLatency(is_operand_register);
++      if (is_operand_register) {
++        return latency + 2;
++      } else {
++        return latency + 1;
++      }
++    }
++    case kLa64Or:
++      return OrLatency(instr->InputAt(1)->IsRegister());
++    case kLa64Or32: {
++      bool is_operand_register = instr->InputAt(1)->IsRegister();
++      int latency = OrLatency(is_operand_register);
++      if (is_operand_register) {
++        return latency + 2;
++      } else {
++        return latency + 1;
++      }
++    }
++    case kLa64Nor:
++      return NorLatency(instr->InputAt(1)->IsRegister());
++    case kLa64Nor32: {
++      bool is_operand_register = instr->InputAt(1)->IsRegister();
++      int latency = NorLatency(is_operand_register);
++      if (is_operand_register) {
++        return latency + 2;
++      } else {
++        return latency + 1;
++      }
++    }
++    case kLa64Xor:
++      return XorLatency(instr->InputAt(1)->IsRegister());
++    case kLa64Xor32: {
++      bool is_operand_register = instr->InputAt(1)->IsRegister();
++      int latency = XorLatency(is_operand_register);
++      if (is_operand_register) {
++        return latency + 2;
++      } else {
++        return latency + 1;
++      }
++    }
++    case kLa64Clz:
++    case kLa64Dclz:
++      return DclzLatency();
++    case kLa64Ctz:
++      return CtzLatency();
++    case kLa64Dctz:
++      return DctzLatency();
++    case kLa64Popcnt:
++      return PopcntLatency();
++    case kLa64Dpopcnt:
++      return DpopcntLatency();
++    case kLa64Shl:
++      return 1;
++    case kLa64Shr:
++    case kLa64Sar:
++      return 2;
++    case kLa64Ext:
++    case kLa64Ins:
++    case kLa64Dext:
++    case kLa64Dins:
++    case kLa64Dshl:
++    case kLa64Dshr:
++    case kLa64Dsar:
++    case kLa64Ror:
++    case kLa64Dror:
++      return 1;
++    case kLa64Tst:
++      return AndLatency(instr->InputAt(1)->IsRegister());
++    case kLa64Mov:
++      return 1;
++    case kLa64CmpS:
++      return MoveLatency() + CompareF32Latency();
++    case kLa64AddS:
++      return Latency::ADD_S;
++    case kLa64SubS:
++      return Latency::SUB_S;
++    case kLa64MulS:
++      return Latency::MUL_S;
++    case kLa64DivS:
++      return Latency::DIV_S;
++    case kLa64ModS:
++      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
++             CallCFunctionLatency() + MovFromFloatResultLatency();
++    case kLa64AbsS:
++      return Latency::ABS_S;
++    case kLa64NegS:
++      return NegdLatency();
++    case kLa64SqrtS:
++      return Latency::SQRT_S;
++    case kLa64MaxS:
++      return Latency::MAX_S;
++    case kLa64MinS:
++      return Latency::MIN_S;
++    case kLa64CmpD:
++      return MoveLatency() + CompareF64Latency();
++    case kLa64AddD:
++      return Latency::ADD_D;
++    case kLa64SubD:
++      return Latency::SUB_D;
++    case kLa64MulD:
++      return Latency::MUL_D;
++    case kLa64DivD:
++      return Latency::DIV_D;
++    case kLa64ModD:
++      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
++             CallCFunctionLatency() + MovFromFloatResultLatency();
++    case kLa64AbsD:
++      return Latency::ABS_D;
++    case kLa64NegD:
++      return NegdLatency();
++    case kLa64SqrtD:
++      return Latency::SQRT_D;
++    case kLa64MaxD:
++      return Latency::MAX_D;
++    case kLa64MinD:
++      return Latency::MIN_D;
++    case kLa64Float64RoundDown:
++    case kLa64Float64RoundTruncate:
++    case kLa64Float64RoundUp:
++    case kLa64Float64RoundTiesEven:
++      return Float64RoundLatency();
++    case kLa64Float32RoundDown:
++    case kLa64Float32RoundTruncate:
++    case kLa64Float32RoundUp:
++    case kLa64Float32RoundTiesEven:
++      return Float32RoundLatency();
++    case kLa64Float32Max:
++      return Float32MaxLatency();
++    case kLa64Float64Max:
++      return Float64MaxLatency();
++    case kLa64Float32Min:
++      return Float32MinLatency();
++    case kLa64Float64Min:
++      return Float64MinLatency();
++    case kLa64Float64SilenceNaN:
++      return Latency::SUB_D;
++    case kLa64CvtSD:
++      return Latency::CVT_S_D;
++    case kLa64CvtDS:
++      return Latency::CVT_D_S;
++    case kLa64CvtDW:
++      return Latency::MTC1 + Latency::CVT_D_W;
++    case kLa64CvtSW:
++      return Latency::MTC1 + Latency::CVT_S_W;
++    case kLa64CvtSUw:
++      return 1 + Latency::DMTC1 + Latency::CVT_S_L;
++    case kLa64CvtSL:
++      return Latency::DMTC1 + Latency::CVT_S_L;
++    case kLa64CvtDL:
++      return Latency::DMTC1 + Latency::CVT_D_L;
++    case kLa64CvtDUw:
++      return 1 + Latency::DMTC1 + Latency::CVT_D_L;
++    case kLa64CvtDUl:
++      return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 +
++             2 * Latency::CVT_D_L + Latency::ADD_D;
++    case kLa64CvtSUl:
++      return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 +
++             2 * Latency::CVT_S_L + Latency::ADD_S;
++    case kLa64FloorWD:
++      return Latency::FLOOR_W_D + Latency::MFC1;
++    case kLa64CeilWD:
++      return Latency::CEIL_W_D + Latency::MFC1;
++    case kLa64RoundWD:
++      return Latency::ROUND_W_D + Latency::MFC1;
++    case kLa64TruncWD:
++      return Latency::TRUNC_W_D + Latency::MFC1;
++    case kLa64FloorWS:
++      return Latency::FLOOR_W_S + Latency::MFC1;
++    case kLa64CeilWS:
++      return Latency::CEIL_W_S + Latency::MFC1;
++    case kLa64RoundWS:
++      return Latency::ROUND_W_S + Latency::MFC1;
++    case kLa64TruncWS:
++      return Latency::TRUNC_W_S + Latency::MFC1 + 2 + MovnLatency();
++    case kLa64TruncLS:
++      return TruncLSLatency(instr->OutputCount() > 1);
++    case kLa64TruncLD:
++      return TruncLDLatency(instr->OutputCount() > 1);
++    case kLa64TruncUwD:
++      // Estimated max.
++      return CompareF64Latency() + 2 * Latency::BRANCH +
++             2 * Latency::TRUNC_W_D + Latency::SUB_D + OrLatency() +
++             Latency::MTC1 + Latency::MFC1 + Latency::MTHC1 + 1;
++    case kLa64TruncUwS:
++      // Estimated max.
++      return CompareF32Latency() + 2 * Latency::BRANCH +
++             2 * Latency::TRUNC_W_S + Latency::SUB_S + OrLatency() +
++             Latency::MTC1 + 2 * Latency::MFC1 + 2 + MovzLatency();
++    case kLa64TruncUlS:
++      return TruncUlSLatency();
++    case kLa64TruncUlD:
++      return TruncUlDLatency();
++    case kLa64BitcastDL:
++      return Latency::DMFC1;
++    case kLa64BitcastLD:
++      return Latency::DMTC1;
++    case kLa64Float64ExtractLowWord32:
++      return Latency::MFC1;
++    case kLa64Float64InsertLowWord32:
++      return Latency::MFHC1 + Latency::MTC1 + Latency::MTHC1;
++    case kLa64Float64ExtractHighWord32:
++      return Latency::MFHC1;
++    case kLa64Float64InsertHighWord32:
++      return Latency::MTHC1;
++    case kLa64Seb:
++    case kLa64Seh:
++      return 1;
++    case kLa64Lbu:
++    case kLa64Lb:
++    case kLa64Lhu:
++    case kLa64Lh:
++    case kLa64Lwu:
++    case kLa64Lw:
++    case kLa64Ld:
++    case kLa64Sb:
++    case kLa64Sh:
++    case kLa64Sw:
++    case kLa64Sd:
++      return AlignedMemoryLatency();
++    case kLa64Lwc1:
++      return Lwc1Latency();
++    case kLa64Ldc1:
++      return Ldc1Latency();
++    case kLa64Swc1:
++      return Swc1Latency();
++    case kLa64Sdc1:
++      return Sdc1Latency();
++    case kLa64Ulhu:
++    case kLa64Ulh:
++      return UlhuLatency();
++    case kLa64Ulwu:
++      return UlwuLatency();
++    case kLa64Ulw:
++      return UlwLatency();
++    case kLa64Uld:
++      return UldLatency();
++    case kLa64Ulwc1:
++      return Ulwc1Latency();
++    case kLa64Uldc1:
++      return Uldc1Latency();
++    case kLa64Ush:
++      return UshLatency();
++    case kLa64Usw:
++      return UswLatency();
++    case kLa64Usd:
++      return UsdLatency();
++    case kLa64Uswc1:
++      return Uswc1Latency();
++    case kLa64Usdc1:
++      return Usdc1Latency();
++    case kLa64Push: {
++      int latency = 0;
++      if (instr->InputAt(0)->IsFPRegister()) {
++        latency = Sdc1Latency() + DsubuLatency(false);
++      } else {
++        latency = PushLatency();
++      }
++      return latency;
++    }
++    case kLa64Peek: {
++      int latency = 0;
++      if (instr->OutputAt(0)->IsFPRegister()) {
++        auto op = LocationOperand::cast(instr->OutputAt(0));
++        switch (op->representation()) {
++          case MachineRepresentation::kFloat64:
++            latency = Ldc1Latency();
++            break;
++          case MachineRepresentation::kFloat32:
++            latency = Latency::LWC1;
++            break;
++          default:
++            UNREACHABLE();
++        }
++      } else {
++        latency = AlignedMemoryLatency();
++      }
++      return latency;
++    }
++    case kLa64StackClaim:
++      return DsubuLatency(false);
++    case kLa64StoreToStackSlot: {
++      int latency = 0;
++      if (instr->InputAt(0)->IsFPRegister()) {
++        if (instr->InputAt(0)->IsSimd128Register()) {
++          latency = 1;  // Estimated value.
++        } else {
++          latency = Sdc1Latency();
++        }
++      } else {
++        latency = AlignedMemoryLatency();
++      }
++      return latency;
++    }
++    case kLa64ByteSwap64:
++      return ByteSwapSignedLatency();
++    case kLa64ByteSwap32:
++      return ByteSwapSignedLatency();
++    case kWord32AtomicLoadInt8:
++    case kWord32AtomicLoadUint8:
++    case kWord32AtomicLoadInt16:
++    case kWord32AtomicLoadUint16:
++    case kWord32AtomicLoadWord32:
++      return 2;
++    case kWord32AtomicStoreWord8:
++    case kWord32AtomicStoreWord16:
++    case kWord32AtomicStoreWord32:
++      return 3;
++    case kWord32AtomicExchangeInt8:
++      return Word32AtomicExchangeLatency(true, 8);
++    case kWord32AtomicExchangeUint8:
++      return Word32AtomicExchangeLatency(false, 8);
++    case kWord32AtomicExchangeInt16:
++      return Word32AtomicExchangeLatency(true, 16);
++    case kWord32AtomicExchangeUint16:
++      return Word32AtomicExchangeLatency(false, 16);
++    case kWord32AtomicExchangeWord32:
++      return 2 + LlLatency(0) + 1 + ScLatency(0) + BranchShortLatency() + 1;
++    case kWord32AtomicCompareExchangeInt8:
++      return Word32AtomicCompareExchangeLatency(true, 8);
++    case kWord32AtomicCompareExchangeUint8:
++      return Word32AtomicCompareExchangeLatency(false, 8);
++    case kWord32AtomicCompareExchangeInt16:
++      return Word32AtomicCompareExchangeLatency(true, 16);
++    case kWord32AtomicCompareExchangeUint16:
++      return Word32AtomicCompareExchangeLatency(false, 16);
++    case kWord32AtomicCompareExchangeWord32:
++      return 3 + LlLatency(0) + BranchShortLatency() + 1 + ScLatency(0) +
++             BranchShortLatency() + 1;
++    case kLa64AssertEqual:
++      return AssertLatency();
++    default:
++      return 1;
++  }
++}
++
++}  // namespace compiler
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc
+new file mode 100644
+index 00000000000..67ea5efe39f
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc
+@@ -0,0 +1,3096 @@
++// Copyright 2014 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "src/base/bits.h"
++#include "src/compiler/backend/instruction-selector-impl.h"
++#include "src/compiler/node-matchers.h"
++#include "src/compiler/node-properties.h"
++
++namespace v8 {
++namespace internal {
++namespace compiler {
++
++#define TRACE_UNIMPL() \
++  PrintF("UNIMPLEMENTED instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
++
++#define TRACE() PrintF("instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
++
++// Adds la64-specific methods for generating InstructionOperands.
++class La64OperandGenerator final : public OperandGenerator {
++ public:
++  explicit La64OperandGenerator(InstructionSelector* selector)
++      : OperandGenerator(selector) {}
++
++  InstructionOperand UseOperand(Node* node, InstructionCode opcode) {
++    if (CanBeImmediate(node, opcode)) {
++      return UseImmediate(node);
++    }
++    return UseRegister(node);
++  }
++
++  // Use the zero register if the node has the immediate value zero, otherwise
++  // assign a register.
++  InstructionOperand UseRegisterOrImmediateZero(Node* node) {
++    if ((IsIntegerConstant(node) && (GetIntegerConstantValue(node) == 0)) ||
++        (IsFloatConstant(node) &&
++         (bit_cast<int64_t>(GetFloatConstantValue(node)) == 0))) {
++      return UseImmediate(node);
++    }
++    return UseRegister(node);
++  }
++
++  bool IsIntegerConstant(Node* node) {
++    return (node->opcode() == IrOpcode::kInt32Constant) ||
++           (node->opcode() == IrOpcode::kInt64Constant);
++  }
++
++  int64_t GetIntegerConstantValue(Node* node) {
++    if (node->opcode() == IrOpcode::kInt32Constant) {
++      return OpParameter<int32_t>(node->op());
++    }
++    DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode());
++    return OpParameter<int64_t>(node->op());
++  }
++
++  bool IsFloatConstant(Node* node) {
++    return (node->opcode() == IrOpcode::kFloat32Constant) ||
++           (node->opcode() == IrOpcode::kFloat64Constant);
++  }
++
++  double GetFloatConstantValue(Node* node) {
++    if (node->opcode() == IrOpcode::kFloat32Constant) {
++      return OpParameter<float>(node->op());
++    }
++    DCHECK_EQ(IrOpcode::kFloat64Constant, node->opcode());
++    return OpParameter<double>(node->op());
++  }
++
++  bool CanBeImmediate(Node* node, InstructionCode mode) {
++    return IsIntegerConstant(node) &&
++           CanBeImmediate(GetIntegerConstantValue(node), mode);
++  }
++
++  bool CanBeImmediate(int64_t value, InstructionCode opcode) {
++    switch (ArchOpcodeField::decode(opcode)) {
++      case kLa64Shl:
++      case kLa64Sar:
++      case kLa64Shr:
++        return is_uint5(value);
++      case kLa64Dshl:
++      case kLa64Dsar:
++      case kLa64Dshr:
++        return is_uint6(value);
++      case kLa64Add:
++      case kLa64And32:
++      case kLa64And:
++      case kLa64Dadd:
++      case kLa64Or32:
++      case kLa64Or:
++      case kLa64Tst:
++      case kLa64Xor:
++        return is_uint12(value);
++      case kLa64Lb:
++      case kLa64Lbu:
++      case kLa64Sb:
++      case kLa64Lh:
++      case kLa64Lhu:
++      case kLa64Sh:
++      case kLa64Lw:
++      case kLa64Sw:
++      case kLa64Ld:
++      case kLa64Sd:
++      case kLa64Lwc1:
++      case kLa64Swc1:
++      case kLa64Ldc1:
++      case kLa64Sdc1:
++        return is_int12(value);
++      default:
++        return is_int12(value);
++    }
++  }
++
++ private:
++  bool ImmediateFitsAddrMode1Instruction(int32_t imm) const {
++    TRACE_UNIMPL();
++    return false;
++  }
++};
++
++static void VisitRR(InstructionSelector* selector, ArchOpcode opcode,
++                    Node* node) {
++  La64OperandGenerator g(selector);
++  selector->Emit(opcode, g.DefineAsRegister(node),
++                 g.UseRegister(node->InputAt(0)));
++}
++
++static void VisitRRI(InstructionSelector* selector, ArchOpcode opcode,
++                     Node* node) {
++  La64OperandGenerator g(selector);
++  int32_t imm = OpParameter<int32_t>(node->op());
++  selector->Emit(opcode, g.DefineAsRegister(node),
++                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm));
++}
++
++static void VisitSimdShift(InstructionSelector* selector, ArchOpcode opcode,
++                           Node* node) {
++  La64OperandGenerator g(selector);
++  if (g.IsIntegerConstant(node->InputAt(1))) {
++    selector->Emit(opcode, g.DefineAsRegister(node),
++                   g.UseRegister(node->InputAt(0)),
++                   g.UseImmediate(node->InputAt(1)));
++  } else {
++    selector->Emit(opcode, g.DefineAsRegister(node),
++                   g.UseRegister(node->InputAt(0)),
++                   g.UseRegister(node->InputAt(1)));
++  }
++}
++
++static void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode,
++                      Node* node) {
++  La64OperandGenerator g(selector);
++  int32_t imm = OpParameter<int32_t>(node->op());
++  selector->Emit(opcode, g.DefineAsRegister(node),
++                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
++                 g.UseRegister(node->InputAt(1)));
++}
++
++static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
++                     Node* node) {
++  La64OperandGenerator g(selector);
++  selector->Emit(opcode, g.DefineAsRegister(node),
++                 g.UseRegister(node->InputAt(0)),
++                 g.UseRegister(node->InputAt(1)));
++}
++
++void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
++  La64OperandGenerator g(selector);
++  selector->Emit(
++      opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
++      g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
++}
++
++static void VisitRRO(InstructionSelector* selector, ArchOpcode opcode,
++                     Node* node) {
++  La64OperandGenerator g(selector);
++  selector->Emit(opcode, g.DefineAsRegister(node),
++                 g.UseRegister(node->InputAt(0)),
++                 g.UseOperand(node->InputAt(1), opcode));
++}
++
++struct ExtendingLoadMatcher {
++  ExtendingLoadMatcher(Node* node, InstructionSelector* selector)
++      : matches_(false), selector_(selector), base_(nullptr), immediate_(0) {
++    Initialize(node);
++  }
++
++  bool Matches() const { return matches_; }
++
++  Node* base() const {
++    DCHECK(Matches());
++    return base_;
++  }
++  int64_t immediate() const {
++    DCHECK(Matches());
++    return immediate_;
++  }
++  ArchOpcode opcode() const {
++    DCHECK(Matches());
++    return opcode_;
++  }
++
++ private:
++  bool matches_;
++  InstructionSelector* selector_;
++  Node* base_;
++  int64_t immediate_;
++  ArchOpcode opcode_;
++
++  void Initialize(Node* node) {
++    Int64BinopMatcher m(node);
++    // When loading a 64-bit value and shifting by 32, we should
++    // just load and sign-extend the interesting 4 bytes instead.
++    // This happens, for example, when we're loading and untagging SMIs.
++    DCHECK(m.IsWord64Sar());
++    if (m.left().IsLoad() && m.right().Is(32) &&
++        selector_->CanCover(m.node(), m.left().node())) {
++      DCHECK_EQ(selector_->GetEffectLevel(node),
++                selector_->GetEffectLevel(m.left().node()));
++      MachineRepresentation rep =
++          LoadRepresentationOf(m.left().node()->op()).representation();
++      DCHECK_EQ(3, ElementSizeLog2Of(rep));
++      if (rep != MachineRepresentation::kTaggedSigned &&
++          rep != MachineRepresentation::kTaggedPointer &&
++          rep != MachineRepresentation::kTagged &&
++          rep != MachineRepresentation::kWord64) {
++        return;
++      }
++
++      La64OperandGenerator g(selector_);
++      Node* load = m.left().node();
++      Node* offset = load->InputAt(1);
++      base_ = load->InputAt(0);
++      opcode_ = kLa64Lw;
++      if (g.CanBeImmediate(offset, opcode_)) {
++        immediate_ = g.GetIntegerConstantValue(offset) + 4;
++        matches_ = g.CanBeImmediate(immediate_, kLa64Lw);
++      }
++    }
++  }
++};
++
++bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node,
++                          Node* output_node) {
++  ExtendingLoadMatcher m(node, selector);
++  La64OperandGenerator g(selector);
++  if (m.Matches()) {
++    InstructionOperand inputs[2];
++    inputs[0] = g.UseRegister(m.base());
++    InstructionCode opcode =
++        m.opcode() | AddressingModeField::encode(kMode_MRI);
++    DCHECK(is_int32(m.immediate()));
++    inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
++    InstructionOperand outputs[] = {g.DefineAsRegister(output_node)};
++    selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
++                   inputs);
++    return true;
++  }
++  return false;
++}
++
++bool TryMatchImmediate(InstructionSelector* selector,
++                       InstructionCode* opcode_return, Node* node,
++                       size_t* input_count_return, InstructionOperand* inputs) {
++  La64OperandGenerator g(selector);
++  if (g.CanBeImmediate(node, *opcode_return)) {
++    *opcode_return |= AddressingModeField::encode(kMode_MRI);
++    inputs[0] = g.UseImmediate(node);
++    *input_count_return = 1;
++    return true;
++  }
++  return false;
++}
++
++static void VisitBinop(InstructionSelector* selector, Node* node,
++                       InstructionCode opcode, bool has_reverse_opcode,
++                       InstructionCode reverse_opcode,
++                       FlagsContinuation* cont) {
++  La64OperandGenerator g(selector);
++  Int32BinopMatcher m(node);
++  InstructionOperand inputs[2];
++  size_t input_count = 0;
++  InstructionOperand outputs[1];
++  size_t output_count = 0;
++
++  if (TryMatchImmediate(selector, &opcode, m.right().node(), &input_count,
++                        &inputs[1])) {
++    inputs[0] = g.UseRegister(m.left().node());
++    input_count++;
++  } else if (has_reverse_opcode &&
++             TryMatchImmediate(selector, &reverse_opcode, m.left().node(),
++                               &input_count, &inputs[1])) {
++    inputs[0] = g.UseRegister(m.right().node());
++    opcode = reverse_opcode;
++    input_count++;
++  } else {
++    inputs[input_count++] = g.UseRegister(m.left().node());
++    inputs[input_count++] = g.UseOperand(m.right().node(), opcode);
++  }
++
++  if (cont->IsDeoptimize()) {
++    // If we can deoptimize as a result of the binop, we need to make sure that
++    // the deopt inputs are not overwritten by the binop result. One way
++    // to achieve that is to declare the output register as same-as-first.
++    outputs[output_count++] = g.DefineSameAsFirst(node);
++  } else {
++    outputs[output_count++] = g.DefineAsRegister(node);
++  }
++
++  DCHECK_NE(0u, input_count);
++  DCHECK_EQ(1u, output_count);
++  DCHECK_GE(arraysize(inputs), input_count);
++  DCHECK_GE(arraysize(outputs), output_count);
++
++  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
++                                 inputs, cont);
++}
++
++static void VisitBinop(InstructionSelector* selector, Node* node,
++                       InstructionCode opcode, bool has_reverse_opcode,
++                       InstructionCode reverse_opcode) {
++  FlagsContinuation cont;
++  VisitBinop(selector, node, opcode, has_reverse_opcode, reverse_opcode, &cont);
++}
++
++static void VisitBinop(InstructionSelector* selector, Node* node,
++                       InstructionCode opcode, FlagsContinuation* cont) {
++  VisitBinop(selector, node, opcode, false, kArchNop, cont);
++}
++
++static void VisitBinop(InstructionSelector* selector, Node* node,
++                       InstructionCode opcode) {
++  VisitBinop(selector, node, opcode, false, kArchNop);
++}
++
++void InstructionSelector::VisitStackSlot(Node* node) {
++  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
++  int alignment = rep.alignment();
++  int slot = frame_->AllocateSpillSlot(rep.size(), alignment);
++  OperandGenerator g(this);
++
++  Emit(kArchStackSlot, g.DefineAsRegister(node),
++       sequence()->AddImmediate(Constant(slot)),
++       sequence()->AddImmediate(Constant(alignment)), 0, nullptr);
++}
++
++void InstructionSelector::VisitAbortCSAAssert(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), a0));
++}
++
++void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
++              Node* output = nullptr) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++
++  if (g.CanBeImmediate(index, opcode)) {
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
++                   g.DefineAsRegister(output == nullptr ? node : output),
++                   g.UseRegister(base), g.UseImmediate(index));
++  } else {
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRR),
++                   g.DefineAsRegister(output == nullptr ? node : output),
++                   g.UseRegister(base), g.UseRegister(index));
++  }
++}
++
++void InstructionSelector::VisitLoadTransform(Node* node) {
++  LoadTransformParameters params = LoadTransformParametersOf(node->op());
++
++  InstructionCode opcode = kArchNop;
++  switch (params.transformation) {
++    case LoadTransformation::kS8x16LoadSplat:
++      opcode = kLa64S8x16LoadSplat;
++      break;
++    case LoadTransformation::kS16x8LoadSplat:
++      opcode = kLa64S16x8LoadSplat;
++      break;
++    case LoadTransformation::kS32x4LoadSplat:
++      opcode = kLa64S32x4LoadSplat;
++      break;
++    case LoadTransformation::kS64x2LoadSplat:
++      opcode = kLa64S64x2LoadSplat;
++      break;
++    case LoadTransformation::kI16x8Load8x8S:
++      opcode = kLa64I16x8Load8x8S;
++      break;
++    case LoadTransformation::kI16x8Load8x8U:
++      opcode = kLa64I16x8Load8x8U;
++      break;
++    case LoadTransformation::kI32x4Load16x4S:
++      opcode = kLa64I32x4Load16x4S;
++      break;
++    case LoadTransformation::kI32x4Load16x4U:
++      opcode = kLa64I32x4Load16x4U;
++      break;
++    case LoadTransformation::kI64x2Load32x2S:
++      opcode = kLa64I64x2Load32x2S;
++      break;
++    case LoadTransformation::kI64x2Load32x2U:
++      opcode = kLa64I64x2Load32x2U;
++      break;
++    default:
++      UNIMPLEMENTED();
++  }
++
++  EmitLoad(this, node, opcode);
++}
++
++void InstructionSelector::VisitLoad(Node* node) {
++  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
++
++  InstructionCode opcode = kArchNop;
++  switch (load_rep.representation()) {
++    case MachineRepresentation::kFloat32:
++      opcode = kLa64Lwc1;
++      break;
++    case MachineRepresentation::kFloat64:
++      opcode = kLa64Ldc1;
++      break;
++    case MachineRepresentation::kBit:  // Fall through.
++    case MachineRepresentation::kWord8:
++      opcode = load_rep.IsUnsigned() ? kLa64Lbu : kLa64Lb;
++      break;
++    case MachineRepresentation::kWord16:
++      opcode = load_rep.IsUnsigned() ? kLa64Lhu : kLa64Lh;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = load_rep.IsUnsigned() ? kLa64Lwu : kLa64Lw;
++      break;
++    case MachineRepresentation::kTaggedSigned:   // Fall through.
++    case MachineRepresentation::kTaggedPointer:  // Fall through.
++    case MachineRepresentation::kTagged:         // Fall through.
++    case MachineRepresentation::kWord64:
++      opcode = kLa64Ld;
++      break;
++    case MachineRepresentation::kCompressedPointer:  // Fall through.
++    case MachineRepresentation::kCompressed:         // Fall through.
++    case MachineRepresentation::kNone:
++    case MachineRepresentation::kSimd128:
++      UNREACHABLE();
++  }
++  if (node->opcode() == IrOpcode::kPoisonedLoad) {
++    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
++    opcode |= MiscField::encode(kMemoryAccessPoisoned);
++  }
++
++  EmitLoad(this, node, opcode);
++}
++
++void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
++
++void InstructionSelector::VisitProtectedLoad(Node* node) {
++  // TODO(eholk)
++  UNIMPLEMENTED();
++}
++
++void InstructionSelector::VisitStore(Node* node) {
++  La64OperandGenerator g(this);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* value = node->InputAt(2);
++
++  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
++  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
++  MachineRepresentation rep = store_rep.representation();
++
++  // TODO(la64): I guess this could be done in a better way.
++  if (write_barrier_kind != kNoWriteBarrier &&
++      V8_LIKELY(!FLAG_disable_write_barriers)) {
++    DCHECK(CanBeTaggedPointer(rep));
++    InstructionOperand inputs[3];
++    size_t input_count = 0;
++    inputs[input_count++] = g.UseUniqueRegister(base);
++    inputs[input_count++] = g.UseUniqueRegister(index);
++    inputs[input_count++] = g.UseUniqueRegister(value);
++    RecordWriteMode record_write_mode =
++        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
++    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
++    size_t const temp_count = arraysize(temps);
++    InstructionCode code = kArchStoreWithWriteBarrier;
++    code |= MiscField::encode(static_cast<int>(record_write_mode));
++    Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
++  } else {
++    ArchOpcode opcode = kArchNop;
++    switch (rep) {
++      case MachineRepresentation::kFloat32:
++        opcode = kLa64Swc1;
++        break;
++      case MachineRepresentation::kFloat64:
++        opcode = kLa64Sdc1;
++        break;
++      case MachineRepresentation::kBit:  // Fall through.
++      case MachineRepresentation::kWord8:
++        opcode = kLa64Sb;
++        break;
++      case MachineRepresentation::kWord16:
++        opcode = kLa64Sh;
++        break;
++      case MachineRepresentation::kWord32:
++        opcode = kLa64Sw;
++        break;
++      case MachineRepresentation::kTaggedSigned:   // Fall through.
++      case MachineRepresentation::kTaggedPointer:  // Fall through.
++      case MachineRepresentation::kTagged:         // Fall through.
++      case MachineRepresentation::kWord64:
++        opcode = kLa64Sd;
++        break;
++      case MachineRepresentation::kCompressedPointer:  // Fall through.
++      case MachineRepresentation::kCompressed:         // Fall through.
++      case MachineRepresentation::kNone:
++      case MachineRepresentation::kSimd128:
++        UNREACHABLE();
++        return;
++    }
++
++    if (g.CanBeImmediate(index, opcode)) {
++      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
++           g.UseRegister(base), g.UseImmediate(index),
++           g.UseRegisterOrImmediateZero(value));
++    } else {
++      Emit(opcode | AddressingModeField::encode(kMode_MRR), g.NoOutput(),
++           g.UseRegister(base), g.UseRegister(index),
++           g.UseRegisterOrImmediateZero(value));
++    }
++  }
++}
++
++void InstructionSelector::VisitProtectedStore(Node* node) {
++  // TODO(eholk)
++  UNIMPLEMENTED();
++}
++
++void InstructionSelector::VisitWord32And(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  if (m.left().IsWord32Shr() && CanCover(node, m.left().node()) &&
++      m.right().HasValue()) {
++    uint32_t mask = m.right().Value();
++    uint32_t mask_width = base::bits::CountPopulation(mask);
++    uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
++    if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
++      // The mask must be contiguous, and occupy the least-significant bits.
++      DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
++
++      // Select Ext for And(Shr(x, imm), mask) where the mask is in the least
++      // significant bits.
++      Int32BinopMatcher mleft(m.left().node());
++      if (mleft.right().HasValue()) {
++        // Any shift value can match; int32 shifts use `value % 32`.
++        uint32_t lsb = mleft.right().Value() & 0x1F;
++
++        // Ext cannot extract bits past the register size, however since
++        // shifting the original value would have introduced some zeros we can
++        // still use Ext with a smaller mask and the remaining bits will be
++        // zeros.
++        if (lsb + mask_width > 32) mask_width = 32 - lsb;
++
++        Emit(kLa64Ext, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
++             g.TempImmediate(mask_width));
++        return;
++      }
++      // Other cases fall through to the normal And operation.
++    }
++  }
++  if (m.right().HasValue()) {
++    uint32_t mask = m.right().Value();
++    uint32_t shift = base::bits::CountPopulation(~mask);
++    uint32_t msb = base::bits::CountLeadingZeros32(~mask);
++    if (shift != 0 && shift != 32 && msb + shift == 32) {
++      // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction
++      // and remove constant loading of inverted mask.
++      Emit(kLa64Ins, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++           g.TempImmediate(0), g.TempImmediate(shift));
++      return;
++    }
++  }
++  VisitBinop(this, node, kLa64And32, true, kLa64And32);
++}
++
++void InstructionSelector::VisitWord64And(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  if (m.left().IsWord64Shr() && CanCover(node, m.left().node()) &&
++      m.right().HasValue()) {
++    uint64_t mask = m.right().Value();
++    uint32_t mask_width = base::bits::CountPopulation(mask);
++    uint32_t mask_msb = base::bits::CountLeadingZeros64(mask);
++    if ((mask_width != 0) && (mask_msb + mask_width == 64)) {
++      // The mask must be contiguous, and occupy the least-significant bits.
++      DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
++
++      // Select Dext for And(Shr(x, imm), mask) where the mask is in the least
++      // significant bits.
++      Int64BinopMatcher mleft(m.left().node());
++      if (mleft.right().HasValue()) {
++        // Any shift value can match; int64 shifts use `value % 64`.
++        uint32_t lsb = static_cast<uint32_t>(mleft.right().Value() & 0x3F);
++
++        // Dext cannot extract bits past the register size, however since
++        // shifting the original value would have introduced some zeros we can
++        // still use Dext with a smaller mask and the remaining bits will be
++        // zeros.
++        if (lsb + mask_width > 64) mask_width = 64 - lsb;
++
++        if (lsb == 0 && mask_width == 64) {
++          Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(mleft.left().node()));
++        } else {
++          Emit(kLa64Dext, g.DefineAsRegister(node),
++               g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
++               g.TempImmediate(static_cast<int32_t>(mask_width)));
++        }
++        return;
++      }
++      // Other cases fall through to the normal And operation.
++    }
++  }
++  if (m.right().HasValue()) {
++    uint64_t mask = m.right().Value();
++    uint32_t shift = base::bits::CountPopulation(~mask);
++    uint32_t msb = base::bits::CountLeadingZeros64(~mask);
++    if (shift != 0 && shift < 32 && msb + shift == 64) {
++      // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction
++      // and remove constant loading of inverted mask. Dins cannot insert bits
++      // past word size, so shifts smaller than 32 are covered.
++      Emit(kLa64Dins, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++           g.TempImmediate(0), g.TempImmediate(shift));
++      return;
++    }
++  }
++  VisitBinop(this, node, kLa64And, true, kLa64And);
++}
++
++void InstructionSelector::VisitWord32Or(Node* node) {
++  VisitBinop(this, node, kLa64Or32, true, kLa64Or32);
++}
++
++void InstructionSelector::VisitWord64Or(Node* node) {
++  VisitBinop(this, node, kLa64Or, true, kLa64Or);
++}
++
++void InstructionSelector::VisitWord32Xor(Node* node) {
++  Int32BinopMatcher m(node);
++  if (m.left().IsWord32Or() && CanCover(node, m.left().node()) &&
++      m.right().Is(-1)) {
++    Int32BinopMatcher mleft(m.left().node());
++    if (!mleft.right().HasValue()) {
++      La64OperandGenerator g(this);
++      Emit(kLa64Nor32, g.DefineAsRegister(node),
++           g.UseRegister(mleft.left().node()),
++           g.UseRegister(mleft.right().node()));
++      return;
++    }
++  }
++  if (m.right().Is(-1)) {
++    // Use Nor for bit negation and eliminate constant loading for xori.
++    La64OperandGenerator g(this);
++    Emit(kLa64Nor32, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++         g.TempImmediate(0));
++    return;
++  }
++  VisitBinop(this, node, kLa64Xor32, true, kLa64Xor32);
++}
++
++void InstructionSelector::VisitWord64Xor(Node* node) {
++  Int64BinopMatcher m(node);
++  if (m.left().IsWord64Or() && CanCover(node, m.left().node()) &&
++      m.right().Is(-1)) {
++    Int64BinopMatcher mleft(m.left().node());
++    if (!mleft.right().HasValue()) {
++      La64OperandGenerator g(this);
++      Emit(kLa64Nor, g.DefineAsRegister(node),
++           g.UseRegister(mleft.left().node()),
++           g.UseRegister(mleft.right().node()));
++      return;
++    }
++  }
++  if (m.right().Is(-1)) {
++    // Use Nor for bit negation and eliminate constant loading for xori.
++    La64OperandGenerator g(this);
++    Emit(kLa64Nor, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++         g.TempImmediate(0));
++    return;
++  }
++  VisitBinop(this, node, kLa64Xor, true, kLa64Xor);
++}
++
++void InstructionSelector::VisitWord32Shl(Node* node) {
++  Int32BinopMatcher m(node);
++  if (m.left().IsWord32And() && CanCover(node, m.left().node()) &&
++      m.right().IsInRange(1, 31)) {
++    La64OperandGenerator g(this);
++    Int32BinopMatcher mleft(m.left().node());
++    // Match Word32Shl(Word32And(x, mask), imm) to Shl where the mask is
++    // contiguous, and the shift immediate non-zero.
++    if (mleft.right().HasValue()) {
++      uint32_t mask = mleft.right().Value();
++      uint32_t mask_width = base::bits::CountPopulation(mask);
++      uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
++      if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
++        uint32_t shift = m.right().Value();
++        DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
++        DCHECK_NE(0u, shift);
++        if ((shift + mask_width) >= 32) {
++          // If the mask is contiguous and reaches or extends beyond the top
++          // bit, only the shift is needed.
++          Emit(kLa64Shl, g.DefineAsRegister(node),
++               g.UseRegister(mleft.left().node()),
++               g.UseImmediate(m.right().node()));
++          return;
++        }
++      }
++    }
++  }
++  VisitRRO(this, kLa64Shl, node);
++}
++
++void InstructionSelector::VisitWord32Shr(Node* node) {
++  Int32BinopMatcher m(node);
++  if (m.left().IsWord32And() && m.right().HasValue()) {
++    uint32_t lsb = m.right().Value() & 0x1F;
++    Int32BinopMatcher mleft(m.left().node());
++    if (mleft.right().HasValue() && mleft.right().Value() != 0) {
++      // Select Ext for Shr(And(x, mask), imm) where the result of the mask is
++      // shifted into the least-significant bits.
++      uint32_t mask = (mleft.right().Value() >> lsb) << lsb;
++      unsigned mask_width = base::bits::CountPopulation(mask);
++      unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
++      if ((mask_msb + mask_width + lsb) == 32) {
++        La64OperandGenerator g(this);
++        DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask));
++        Emit(kLa64Ext, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
++             g.TempImmediate(mask_width));
++        return;
++      }
++    }
++  }
++  VisitRRO(this, kLa64Shr, node);
++}
++
++void InstructionSelector::VisitWord32Sar(Node* node) {
++  Int32BinopMatcher m(node);
++  if (m.left().IsWord32Shl() && CanCover(node, m.left().node())) {
++    Int32BinopMatcher mleft(m.left().node());
++    if (m.right().HasValue() && mleft.right().HasValue()) {
++      La64OperandGenerator g(this);
++      uint32_t sar = m.right().Value();
++      uint32_t shl = mleft.right().Value();
++      if ((sar == shl) && (sar == 16)) {
++        Emit(kLa64Seh, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()));
++        return;
++      } else if ((sar == shl) && (sar == 24)) {
++        Emit(kLa64Seb, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()));
++        return;
++      } else if ((sar == shl) && (sar == 32)) {
++        Emit(kLa64Shl, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()), g.TempImmediate(0));
++        return;
++      }
++    }
++  }
++  VisitRRO(this, kLa64Sar, node);
++}
++
++void InstructionSelector::VisitWord64Shl(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  if ((m.left().IsChangeInt32ToInt64() || m.left().IsChangeUint32ToUint64()) &&
++      m.right().IsInRange(32, 63) && CanCover(node, m.left().node())) {
++    // There's no need to sign/zero-extend to 64-bit if we shift out the upper
++    // 32 bits anyway.
++    Emit(kLa64Dshl, g.DefineSameAsFirst(node),
++         g.UseRegister(m.left().node()->InputAt(0)),
++         g.UseImmediate(m.right().node()));
++    return;
++  }
++  if (m.left().IsWord64And() && CanCover(node, m.left().node()) &&
++      m.right().IsInRange(1, 63)) {
++    // Match Word64Shl(Word64And(x, mask), imm) to Dshl where the mask is
++    // contiguous, and the shift immediate non-zero.
++    Int64BinopMatcher mleft(m.left().node());
++    if (mleft.right().HasValue()) {
++      uint64_t mask = mleft.right().Value();
++      uint32_t mask_width = base::bits::CountPopulation(mask);
++      uint32_t mask_msb = base::bits::CountLeadingZeros64(mask);
++      if ((mask_width != 0) && (mask_msb + mask_width == 64)) {
++        uint64_t shift = m.right().Value();
++        DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
++        DCHECK_NE(0u, shift);
++
++        if ((shift + mask_width) >= 64) {
++          // If the mask is contiguous and reaches or extends beyond the top
++          // bit, only the shift is needed.
++          Emit(kLa64Dshl, g.DefineAsRegister(node),
++               g.UseRegister(mleft.left().node()),
++               g.UseImmediate(m.right().node()));
++          return;
++        }
++      }
++    }
++  }
++  VisitRRO(this, kLa64Dshl, node);
++}
++
++void InstructionSelector::VisitWord64Shr(Node* node) {
++  Int64BinopMatcher m(node);
++  if (m.left().IsWord64And() && m.right().HasValue()) {
++    uint32_t lsb = m.right().Value() & 0x3F;
++    Int64BinopMatcher mleft(m.left().node());
++    if (mleft.right().HasValue() && mleft.right().Value() != 0) {
++      // Select Dext for Shr(And(x, mask), imm) where the result of the mask is
++      // shifted into the least-significant bits.
++      uint64_t mask = (mleft.right().Value() >> lsb) << lsb;
++      unsigned mask_width = base::bits::CountPopulation(mask);
++      unsigned mask_msb = base::bits::CountLeadingZeros64(mask);
++      if ((mask_msb + mask_width + lsb) == 64) {
++        La64OperandGenerator g(this);
++        DCHECK_EQ(lsb, base::bits::CountTrailingZeros64(mask));
++        Emit(kLa64Dext, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
++             g.TempImmediate(mask_width));
++        return;
++      }
++    }
++  }
++  VisitRRO(this, kLa64Dshr, node);
++}
++
++void InstructionSelector::VisitWord64Sar(Node* node) {
++  if (TryEmitExtendingLoad(this, node, node)) return;
++  VisitRRO(this, kLa64Dsar, node);
++}
++
++void InstructionSelector::VisitWord32Ror(Node* node) {
++  VisitRRO(this, kLa64Ror, node);
++}
++
++void InstructionSelector::VisitWord32Clz(Node* node) {
++  VisitRR(this, kLa64Clz, node);
++}
++
++void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
++
++void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
++
++void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64ByteSwap64, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64ByteSwap32, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
++  UNREACHABLE();
++}
++
++void InstructionSelector::VisitWord32Ctz(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitWord64Ctz(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Dctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitWord32Popcnt(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Popcnt, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitWord64Popcnt(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Dpopcnt, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitWord64Ror(Node* node) {
++  VisitRRO(this, kLa64Dror, node);
++}
++
++void InstructionSelector::VisitWord64Clz(Node* node) {
++  VisitRR(this, kLa64Dclz, node);
++}
++
++void InstructionSelector::VisitInt32Add(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++
++  // Select Lsa for (left + (left_of_right << imm)).
++  if (m.right().opcode() == IrOpcode::kWord32Shl &&
++      CanCover(node, m.left().node()) && CanCover(node, m.right().node())) {
++    Int32BinopMatcher mright(m.right().node());
++    if (mright.right().HasValue() && !m.left().HasValue()) {
++      int32_t shift_value = static_cast<int32_t>(mright.right().Value());
++      if (shift_value > 0 && shift_value <= 31) {
++        Emit(kLa64Lsa, g.DefineAsRegister(node),
++             g.UseRegister(mright.left().node()),
++             g.UseRegister(m.left().node()), g.TempImmediate(shift_value));
++        return;
++      }
++    }
++  }
++
++  // Select Lsa for ((left_of_left << imm) + right).
++  if (m.left().opcode() == IrOpcode::kWord32Shl &&
++      CanCover(node, m.right().node()) && CanCover(node, m.left().node())) {
++    Int32BinopMatcher mleft(m.left().node());
++    if (mleft.right().HasValue() && !m.right().HasValue()) {
++      int32_t shift_value = static_cast<int32_t>(mleft.right().Value());
++      if (shift_value > 0 && shift_value <= 31) {
++        Emit(kLa64Lsa, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()),
++             g.UseRegister(m.right().node()), g.TempImmediate(shift_value));
++        return;
++      }
++    }
++  }
++
++  VisitBinop(this, node, kLa64Add, true, kLa64Add);
++}
++
++void InstructionSelector::VisitInt64Add(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++
++  // Select Dlsa for (left + (left_of_right << imm)).
++  if (m.right().opcode() == IrOpcode::kWord64Shl &&
++      CanCover(node, m.left().node()) && CanCover(node, m.right().node())) {
++    Int64BinopMatcher mright(m.right().node());
++    if (mright.right().HasValue() && !m.left().HasValue()) {
++      int32_t shift_value = static_cast<int32_t>(mright.right().Value());
++      if (shift_value > 0 && shift_value <= 31) {
++        Emit(kLa64Dlsa, g.DefineAsRegister(node),
++             g.UseRegister(mright.left().node()),
++             g.UseRegister(m.left().node()), g.TempImmediate(shift_value));
++        return;
++      }
++    }
++  }
++
++  // Select Dlsa for ((left_of_left << imm) + right).
++  if (m.left().opcode() == IrOpcode::kWord64Shl &&
++      CanCover(node, m.right().node()) && CanCover(node, m.left().node())) {
++    Int64BinopMatcher mleft(m.left().node());
++    if (mleft.right().HasValue() && !m.right().HasValue()) {
++      int32_t shift_value = static_cast<int32_t>(mleft.right().Value());
++      if (shift_value > 0 && shift_value <= 31) {
++        Emit(kLa64Dlsa, g.DefineAsRegister(node),
++             g.UseRegister(mleft.left().node()),
++             g.UseRegister(m.right().node()), g.TempImmediate(shift_value));
++        return;
++      }
++    }
++  }
++
++  VisitBinop(this, node, kLa64Dadd, true, kLa64Dadd);
++}
++
++void InstructionSelector::VisitInt32Sub(Node* node) {
++  VisitBinop(this, node, kLa64Sub);
++}
++
++void InstructionSelector::VisitInt64Sub(Node* node) {
++  VisitBinop(this, node, kLa64Dsub);
++}
++
++void InstructionSelector::VisitInt32Mul(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  if (m.right().HasValue() && m.right().Value() > 0) {
++    uint32_t value = static_cast<uint32_t>(m.right().Value());
++    if (base::bits::IsPowerOfTwo(value)) {
++      Emit(kLa64Shl | AddressingModeField::encode(kMode_None),
++           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value)));
++      return;
++    }
++    if (base::bits::IsPowerOfTwo(value - 1) && /*kArchVariant == kLa64r6 &&*/
++        value - 1 > 0 && value - 1 <= 31) {
++      Emit(kLa64Lsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++           g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
++      return;
++    }
++    if (base::bits::IsPowerOfTwo(value + 1)) {
++      InstructionOperand temp = g.TempRegister();
++      Emit(kLa64Shl | AddressingModeField::encode(kMode_None), temp,
++           g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
++      Emit(kLa64Sub | AddressingModeField::encode(kMode_None),
++           g.DefineAsRegister(node), temp, g.UseRegister(m.left().node()));
++      return;
++    }
++  }
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++  if (CanCover(node, left) && CanCover(node, right)) {
++    if (left->opcode() == IrOpcode::kWord64Sar &&
++        right->opcode() == IrOpcode::kWord64Sar) {
++      Int64BinopMatcher leftInput(left), rightInput(right);
++      if (leftInput.right().Is(32) && rightInput.right().Is(32)) {
++        // Combine untagging shifts with Dmul high.
++        Emit(kLa64DMulHigh, g.DefineSameAsFirst(node),
++             g.UseRegister(leftInput.left().node()),
++             g.UseRegister(rightInput.left().node()));
++        return;
++      }
++    }
++  }
++  VisitRRR(this, kLa64Mul, node);
++}
++
++void InstructionSelector::VisitInt32MulHigh(Node* node) {
++  VisitRRR(this, kLa64MulHigh, node);
++}
++
++void InstructionSelector::VisitUint32MulHigh(Node* node) {
++  VisitRRR(this, kLa64MulHighU, node);
++}
++
++void InstructionSelector::VisitInt64Mul(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  // TODO(dusmil): Add optimization for shifts larger than 32.
++  if (m.right().HasValue() && m.right().Value() > 0) {
++    uint32_t value = static_cast<uint32_t>(m.right().Value());
++    if (base::bits::IsPowerOfTwo(value)) {
++      Emit(kLa64Dshl | AddressingModeField::encode(kMode_None),
++           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value)));
++      return;
++    }
++    if (base::bits::IsPowerOfTwo(value - 1) && /*kArchVariant == kLa64r6 &&*/
++        value - 1 > 0 && value - 1 <= 31) {
++      // Dlsa macro will handle the shifting value out of bound cases.
++      Emit(kLa64Dlsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++           g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
++      return;
++    }
++    if (base::bits::IsPowerOfTwo(value + 1)) {
++      InstructionOperand temp = g.TempRegister();
++      Emit(kLa64Dshl | AddressingModeField::encode(kMode_None), temp,
++           g.UseRegister(m.left().node()),
++           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
++      Emit(kLa64Dsub | AddressingModeField::encode(kMode_None),
++           g.DefineAsRegister(node), temp, g.UseRegister(m.left().node()));
++      return;
++    }
++  }
++  Emit(kLa64Dmul, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitInt32Div(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++  if (CanCover(node, left) && CanCover(node, right)) {
++    if (left->opcode() == IrOpcode::kWord64Sar &&
++        right->opcode() == IrOpcode::kWord64Sar) {
++      Int64BinopMatcher rightInput(right), leftInput(left);
++      if (rightInput.right().Is(32) && leftInput.right().Is(32)) {
++        // Combine both shifted operands with Ddiv.
++        Emit(kLa64Ddiv, g.DefineSameAsFirst(node),
++             g.UseRegister(leftInput.left().node()),
++             g.UseRegister(rightInput.left().node()));
++        return;
++      }
++    }
++  }
++  Emit(kLa64Div, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitUint32Div(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  Emit(kLa64DivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitInt32Mod(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++  if (CanCover(node, left) && CanCover(node, right)) {
++    if (left->opcode() == IrOpcode::kWord64Sar &&
++        right->opcode() == IrOpcode::kWord64Sar) {
++      Int64BinopMatcher rightInput(right), leftInput(left);
++      if (rightInput.right().Is(32) && leftInput.right().Is(32)) {
++        // Combine both shifted operands with Dmod.
++        Emit(kLa64Dmod, g.DefineSameAsFirst(node),
++             g.UseRegister(leftInput.left().node()),
++             g.UseRegister(rightInput.left().node()));
++        return;
++      }
++    }
++  }
++  Emit(kLa64Mod, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitUint32Mod(Node* node) {
++  La64OperandGenerator g(this);
++  Int32BinopMatcher m(node);
++  Emit(kLa64ModU, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitInt64Div(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  Emit(kLa64Ddiv, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitUint64Div(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  Emit(kLa64DdivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitInt64Mod(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  Emit(kLa64Dmod, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitUint64Mod(Node* node) {
++  La64OperandGenerator g(this);
++  Int64BinopMatcher m(node);
++  Emit(kLa64DmodU, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
++       g.UseRegister(m.right().node()));
++}
++
++void InstructionSelector::VisitChangeFloat32ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDS, node);
++}
++
++void InstructionSelector::VisitRoundInt32ToFloat32(Node* node) {
++  VisitRR(this, kLa64CvtSW, node);
++}
++
++void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
++  VisitRR(this, kLa64CvtSUw, node);
++}
++
++void InstructionSelector::VisitChangeInt32ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDW, node);
++}
++
++void InstructionSelector::VisitChangeInt64ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDL, node);
++}
++
++void InstructionSelector::VisitChangeUint32ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDUw, node);
++}
++
++void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
++  VisitRR(this, kLa64TruncWS, node);
++}
++
++void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
++  VisitRR(this, kLa64TruncUwS, node);
++}
++
++void InstructionSelector::VisitChangeFloat64ToInt32(Node* node) {
++  La64OperandGenerator g(this);
++  Node* value = node->InputAt(0);
++  // Match ChangeFloat64ToInt32(Float64Round##OP) to corresponding instruction
++  // which does rounding and conversion to integer format.
++  if (CanCover(node, value)) {
++    switch (value->opcode()) {
++      case IrOpcode::kFloat64RoundDown:
++        Emit(kLa64FloorWD, g.DefineAsRegister(node),
++             g.UseRegister(value->InputAt(0)));
++        return;
++      case IrOpcode::kFloat64RoundUp:
++        Emit(kLa64CeilWD, g.DefineAsRegister(node),
++             g.UseRegister(value->InputAt(0)));
++        return;
++      case IrOpcode::kFloat64RoundTiesEven:
++        Emit(kLa64RoundWD, g.DefineAsRegister(node),
++             g.UseRegister(value->InputAt(0)));
++        return;
++      case IrOpcode::kFloat64RoundTruncate:
++        Emit(kLa64TruncWD, g.DefineAsRegister(node),
++             g.UseRegister(value->InputAt(0)));
++        return;
++      default:
++        break;
++    }
++    if (value->opcode() == IrOpcode::kChangeFloat32ToFloat64) {
++      Node* next = value->InputAt(0);
++      if (CanCover(value, next)) {
++        // Match ChangeFloat64ToInt32(ChangeFloat32ToFloat64(Float64Round##OP))
++        switch (next->opcode()) {
++          case IrOpcode::kFloat32RoundDown:
++            Emit(kLa64FloorWS, g.DefineAsRegister(node),
++                 g.UseRegister(next->InputAt(0)));
++            return;
++          case IrOpcode::kFloat32RoundUp:
++            Emit(kLa64CeilWS, g.DefineAsRegister(node),
++                 g.UseRegister(next->InputAt(0)));
++            return;
++          case IrOpcode::kFloat32RoundTiesEven:
++            Emit(kLa64RoundWS, g.DefineAsRegister(node),
++                 g.UseRegister(next->InputAt(0)));
++            return;
++          case IrOpcode::kFloat32RoundTruncate:
++            Emit(kLa64TruncWS, g.DefineAsRegister(node),
++                 g.UseRegister(next->InputAt(0)));
++            return;
++          default:
++            Emit(kLa64TruncWS, g.DefineAsRegister(node),
++                 g.UseRegister(value->InputAt(0)));
++            return;
++        }
++      } else {
++        // Match float32 -> float64 -> int32 representation change path.
++        Emit(kLa64TruncWS, g.DefineAsRegister(node),
++             g.UseRegister(value->InputAt(0)));
++        return;
++      }
++    }
++  }
++  VisitRR(this, kLa64TruncWD, node);
++}
++
++void InstructionSelector::VisitChangeFloat64ToInt64(Node* node) {
++  VisitRR(this, kLa64TruncLD, node);
++}
++
++void InstructionSelector::VisitChangeFloat64ToUint32(Node* node) {
++  VisitRR(this, kLa64TruncUwD, node);
++}
++
++void InstructionSelector::VisitChangeFloat64ToUint64(Node* node) {
++  VisitRR(this, kLa64TruncUlD, node);
++}
++
++void InstructionSelector::VisitTruncateFloat64ToUint32(Node* node) {
++  VisitRR(this, kLa64TruncUwD, node);
++}
++
++void InstructionSelector::VisitTruncateFloat64ToInt64(Node* node) {
++  VisitRR(this, kLa64TruncLD, node);
++}
++
++void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
++  La64OperandGenerator g(this);
++  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
++  InstructionOperand outputs[2];
++  size_t output_count = 0;
++  outputs[output_count++] = g.DefineAsRegister(node);
++
++  Node* success_output = NodeProperties::FindProjection(node, 1);
++  if (success_output) {
++    outputs[output_count++] = g.DefineAsRegister(success_output);
++  }
++
++  this->Emit(kLa64TruncLS, output_count, outputs, 1, inputs);
++}
++
++void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
++  La64OperandGenerator g(this);
++  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
++  InstructionOperand outputs[2];
++  size_t output_count = 0;
++  outputs[output_count++] = g.DefineAsRegister(node);
++
++  Node* success_output = NodeProperties::FindProjection(node, 1);
++  if (success_output) {
++    outputs[output_count++] = g.DefineAsRegister(success_output);
++  }
++
++  Emit(kLa64TruncLD, output_count, outputs, 1, inputs);
++}
++
++void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
++  La64OperandGenerator g(this);
++  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
++  InstructionOperand outputs[2];
++  size_t output_count = 0;
++  outputs[output_count++] = g.DefineAsRegister(node);
++
++  Node* success_output = NodeProperties::FindProjection(node, 1);
++  if (success_output) {
++    outputs[output_count++] = g.DefineAsRegister(success_output);
++  }
++
++  Emit(kLa64TruncUlS, output_count, outputs, 1, inputs);
++}
++
++void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
++  La64OperandGenerator g(this);
++
++  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
++  InstructionOperand outputs[2];
++  size_t output_count = 0;
++  outputs[output_count++] = g.DefineAsRegister(node);
++
++  Node* success_output = NodeProperties::FindProjection(node, 1);
++  if (success_output) {
++    outputs[output_count++] = g.DefineAsRegister(success_output);
++  }
++
++  Emit(kLa64TruncUlD, output_count, outputs, 1, inputs);
++}
++
++void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
++  UNIMPLEMENTED();
++}
++
++void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
++  Node* value = node->InputAt(0);
++  if (value->opcode() == IrOpcode::kLoad && CanCover(node, value)) {
++    // Generate sign-extending load.
++    LoadRepresentation load_rep = LoadRepresentationOf(value->op());
++    InstructionCode opcode = kArchNop;
++    switch (load_rep.representation()) {
++      case MachineRepresentation::kBit:  // Fall through.
++      case MachineRepresentation::kWord8:
++        opcode = load_rep.IsUnsigned() ? kLa64Lbu : kLa64Lb;
++        break;
++      case MachineRepresentation::kWord16:
++        opcode = load_rep.IsUnsigned() ? kLa64Lhu : kLa64Lh;
++        break;
++      case MachineRepresentation::kWord32:
++        opcode = kLa64Lw;
++        break;
++      default:
++        UNREACHABLE();
++        return;
++    }
++    EmitLoad(this, value, opcode, node);
++  } else {
++    La64OperandGenerator g(this);
++    Emit(kLa64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
++         g.TempImmediate(0));
++  }
++}
++
++void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
++  La64OperandGenerator g(this);
++  Node* value = node->InputAt(0);
++  switch (value->opcode()) {
++    // 32-bit operations will write their result in a 64 bit register,
++    // clearing the top 32 bits of the destination register.
++    case IrOpcode::kUint32Div:
++    case IrOpcode::kUint32Mod:
++    case IrOpcode::kUint32MulHigh: {
++      Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value));
++      return;
++    }
++    case IrOpcode::kLoad: {
++      LoadRepresentation load_rep = LoadRepresentationOf(value->op());
++      if (load_rep.IsUnsigned()) {
++        switch (load_rep.representation()) {
++          case MachineRepresentation::kWord8:
++          case MachineRepresentation::kWord16:
++          case MachineRepresentation::kWord32:
++            Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value));
++            return;
++          default:
++            break;
++        }
++      }
++      break;
++    }
++    default:
++      break;
++  }
++  Emit(kLa64Dext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
++       g.TempImmediate(0), g.TempImmediate(32));
++}
++
++void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
++  La64OperandGenerator g(this);
++  Node* value = node->InputAt(0);
++  if (CanCover(node, value)) {
++    switch (value->opcode()) {
++      case IrOpcode::kWord64Sar: {
++        if (CanCoverTransitively(node, value, value->InputAt(0)) &&
++            TryEmitExtendingLoad(this, value, node)) {
++          return;
++        } else {
++          Int64BinopMatcher m(value);
++          if (m.right().IsInRange(32, 63)) {
++            // After smi untagging no need for truncate. Combine sequence.
++            Emit(kLa64Dsar, g.DefineSameAsFirst(node),
++                 g.UseRegister(m.left().node()),
++                 g.UseImmediate(m.right().node()));
++            return;
++          }
++        }
++        break;
++      }
++      default:
++        break;
++    }
++  }
++  Emit(kLa64Ext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
++       g.TempImmediate(0), g.TempImmediate(32));
++}
++
++void InstructionSelector::VisitTruncateFloat64ToFloat32(Node* node) {
++  La64OperandGenerator g(this);
++  Node* value = node->InputAt(0);
++  // Match TruncateFloat64ToFloat32(ChangeInt32ToFloat64) to corresponding
++  // instruction.
++  if (CanCover(node, value) &&
++      value->opcode() == IrOpcode::kChangeInt32ToFloat64) {
++    Emit(kLa64CvtSW, g.DefineAsRegister(node),
++         g.UseRegister(value->InputAt(0)));
++    return;
++  }
++  VisitRR(this, kLa64CvtSD, node);
++}
++
++void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
++  VisitRR(this, kArchTruncateDoubleToI, node);
++}
++
++void InstructionSelector::VisitRoundFloat64ToInt32(Node* node) {
++  VisitRR(this, kLa64TruncWD, node);
++}
++
++void InstructionSelector::VisitRoundInt64ToFloat32(Node* node) {
++  VisitRR(this, kLa64CvtSL, node);
++}
++
++void InstructionSelector::VisitRoundInt64ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDL, node);
++}
++
++void InstructionSelector::VisitRoundUint64ToFloat32(Node* node) {
++  VisitRR(this, kLa64CvtSUl, node);
++}
++
++void InstructionSelector::VisitRoundUint64ToFloat64(Node* node) {
++  VisitRR(this, kLa64CvtDUl, node);
++}
++
++void InstructionSelector::VisitBitcastFloat32ToInt32(Node* node) {
++  VisitRR(this, kLa64Float64ExtractLowWord32, node);
++}
++
++void InstructionSelector::VisitBitcastFloat64ToInt64(Node* node) {
++  VisitRR(this, kLa64BitcastDL, node);
++}
++
++void InstructionSelector::VisitBitcastInt32ToFloat32(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Float64InsertLowWord32, g.DefineAsRegister(node),
++       ImmediateOperand(ImmediateOperand::INLINE, 0),
++       g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitBitcastInt64ToFloat64(Node* node) {
++  VisitRR(this, kLa64BitcastLD, node);
++}
++
++void InstructionSelector::VisitFloat32Add(Node* node) {
++  // Optimization with Madd.S(z, x, y) is intentionally removed.
++  // See explanation for madd_s in assembler-la64.cc.
++  VisitRRR(this, kLa64AddS, node);
++}
++
++void InstructionSelector::VisitFloat64Add(Node* node) {
++  // Optimization with Madd.D(z, x, y) is intentionally removed.
++  // See explanation for madd_d in assembler-la64.cc.
++  VisitRRR(this, kLa64AddD, node);
++}
++
++void InstructionSelector::VisitFloat32Sub(Node* node) {
++  // Optimization with Msub.S(z, x, y) is intentionally removed.
++  // See explanation for madd_s in assembler-la64.cc.
++  VisitRRR(this, kLa64SubS, node);
++}
++
++void InstructionSelector::VisitFloat64Sub(Node* node) {
++  // Optimization with Msub.D(z, x, y) is intentionally removed.
++  // See explanation for madd_d in assembler-la64.cc.
++  VisitRRR(this, kLa64SubD, node);
++}
++
++void InstructionSelector::VisitFloat32Mul(Node* node) {
++  VisitRRR(this, kLa64MulS, node);
++}
++
++void InstructionSelector::VisitFloat64Mul(Node* node) {
++  VisitRRR(this, kLa64MulD, node);
++}
++
++void InstructionSelector::VisitFloat32Div(Node* node) {
++  VisitRRR(this, kLa64DivS, node);
++}
++
++void InstructionSelector::VisitFloat64Div(Node* node) {
++  VisitRRR(this, kLa64DivD, node);
++}
++
++void InstructionSelector::VisitFloat64Mod(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64ModD, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f0),
++       g.UseFixed(node->InputAt(1), f1))
++      ->MarkAsCall();
++}
++
++void InstructionSelector::VisitFloat32Max(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Float32Max, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
++}
++
++void InstructionSelector::VisitFloat64Max(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Float64Max, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
++}
++
++void InstructionSelector::VisitFloat32Min(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Float32Min, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
++}
++
++void InstructionSelector::VisitFloat64Min(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Float64Min, g.DefineAsRegister(node),
++       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
++}
++
++void InstructionSelector::VisitFloat32Abs(Node* node) {
++  VisitRR(this, kLa64AbsS, node);
++}
++
++void InstructionSelector::VisitFloat64Abs(Node* node) {
++  VisitRR(this, kLa64AbsD, node);
++}
++
++void InstructionSelector::VisitFloat32Sqrt(Node* node) {
++  VisitRR(this, kLa64SqrtS, node);
++}
++
++void InstructionSelector::VisitFloat64Sqrt(Node* node) {
++  VisitRR(this, kLa64SqrtD, node);
++}
++
++void InstructionSelector::VisitFloat32RoundDown(Node* node) {
++  VisitRR(this, kLa64Float32RoundDown, node);
++}
++
++void InstructionSelector::VisitFloat64RoundDown(Node* node) {
++  VisitRR(this, kLa64Float64RoundDown, node);
++}
++
++void InstructionSelector::VisitFloat32RoundUp(Node* node) {
++  VisitRR(this, kLa64Float32RoundUp, node);
++}
++
++void InstructionSelector::VisitFloat64RoundUp(Node* node) {
++  VisitRR(this, kLa64Float64RoundUp, node);
++}
++
++void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
++  VisitRR(this, kLa64Float32RoundTruncate, node);
++}
++
++void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
++  VisitRR(this, kLa64Float64RoundTruncate, node);
++}
++
++void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
++  UNREACHABLE();
++}
++
++void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
++  VisitRR(this, kLa64Float32RoundTiesEven, node);
++}
++
++void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
++  VisitRR(this, kLa64Float64RoundTiesEven, node);
++}
++
++void InstructionSelector::VisitFloat32Neg(Node* node) {
++  VisitRR(this, kLa64NegS, node);
++}
++
++void InstructionSelector::VisitFloat64Neg(Node* node) {
++  VisitRR(this, kLa64NegD, node);
++}
++
++void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
++                                                   InstructionCode opcode) {
++  La64OperandGenerator g(this);
++  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f2),
++       g.UseFixed(node->InputAt(1), f4))
++      ->MarkAsCall();
++}
++
++void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
++                                                  InstructionCode opcode) {
++  La64OperandGenerator g(this);
++  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f0))
++      ->MarkAsCall();
++}
++
++void InstructionSelector::EmitPrepareArguments(
++    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
++    Node* node) {
++  La64OperandGenerator g(this);
++
++  // Prepare for C function call.
++  if (call_descriptor->IsCFunctionCall()) {
++    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
++                                         call_descriptor->ParameterCount())),
++         0, nullptr, 0, nullptr);
++
++    // Poke any stack arguments.
++    int slot = kCArgSlotCount;
++    for (PushParameter input : (*arguments)) {
++      Emit(kLa64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
++           g.TempImmediate(slot << kSystemPointerSizeLog2));
++      ++slot;
++    }
++  } else {
++    int push_count = static_cast<int>(call_descriptor->StackParameterCount());
++    if (push_count > 0) {
++      // Calculate needed space
++      int stack_size = 0;
++      for (PushParameter input : (*arguments)) {
++        if (input.node) {
++          stack_size += input.location.GetSizeInPointers();
++        }
++      }
++      Emit(kLa64StackClaim, g.NoOutput(),
++           g.TempImmediate(stack_size << kSystemPointerSizeLog2));
++    }
++    for (size_t n = 0; n < arguments->size(); ++n) {
++      PushParameter input = (*arguments)[n];
++      if (input.node) {
++        Emit(kLa64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
++             g.TempImmediate(static_cast<int>(n << kSystemPointerSizeLog2)));
++      }
++    }
++  }
++}
++
++void InstructionSelector::EmitPrepareResults(
++    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
++    Node* node) {
++  La64OperandGenerator g(this);
++
++  int reverse_slot = 0;
++  for (PushParameter output : *results) {
++    if (!output.location.IsCallerFrameSlot()) continue;
++    // Skip any alignment holes in nodes.
++    if (output.node != nullptr) {
++      DCHECK(!call_descriptor->IsCFunctionCall());
++      if (output.location.GetType() == MachineType::Float32()) {
++        MarkAsFloat32(output.node);
++      } else if (output.location.GetType() == MachineType::Float64()) {
++        MarkAsFloat64(output.node);
++      }
++      Emit(kLa64Peek, g.DefineAsRegister(output.node),
++           g.UseImmediate(reverse_slot));
++    }
++    reverse_slot += output.location.GetSizeInPointers();
++  }
++}
++
++bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
++
++int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
++
++void InstructionSelector::VisitUnalignedLoad(Node* node) {
++  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
++  La64OperandGenerator g(this);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++
++  ArchOpcode opcode = kArchNop;
++  switch (load_rep.representation()) {
++    case MachineRepresentation::kFloat32:
++      opcode = kLa64Ulwc1;
++      break;
++    case MachineRepresentation::kFloat64:
++      opcode = kLa64Uldc1;
++      break;
++    case MachineRepresentation::kBit:  // Fall through.
++    case MachineRepresentation::kWord8:
++      UNREACHABLE();
++    case MachineRepresentation::kWord16:
++      opcode = load_rep.IsUnsigned() ? kLa64Ulhu : kLa64Ulh;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = load_rep.IsUnsigned() ? kLa64Ulwu : kLa64Ulw;
++      break;
++    case MachineRepresentation::kTaggedSigned:   // Fall through.
++    case MachineRepresentation::kTaggedPointer:  // Fall through.
++    case MachineRepresentation::kTagged:         // Fall through.
++    case MachineRepresentation::kWord64:
++      opcode = kLa64Uld;
++      break;
++    case MachineRepresentation::kCompressedPointer:  // Fall through.
++    case MachineRepresentation::kCompressed:         // Fall through.
++    case MachineRepresentation::kNone:
++    case MachineRepresentation::kSimd128:
++      UNREACHABLE();
++  }
++
++  if (g.CanBeImmediate(index, opcode)) {
++    Emit(opcode | AddressingModeField::encode(kMode_MRI),
++         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
++  } else {
++    InstructionOperand addr_reg = g.TempRegister();
++    Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), addr_reg,
++         g.UseRegister(index), g.UseRegister(base));
++    // Emit desired load opcode, using temp addr_reg.
++    Emit(opcode | AddressingModeField::encode(kMode_MRI),
++         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
++  }
++}
++
++void InstructionSelector::VisitUnalignedStore(Node* node) {
++  La64OperandGenerator g(this);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* value = node->InputAt(2);
++
++  UnalignedStoreRepresentation rep = UnalignedStoreRepresentationOf(node->op());
++  ArchOpcode opcode = kArchNop;
++  switch (rep) {
++    case MachineRepresentation::kFloat32:
++      opcode = kLa64Uswc1;
++      break;
++    case MachineRepresentation::kFloat64:
++      opcode = kLa64Usdc1;
++      break;
++    case MachineRepresentation::kBit:  // Fall through.
++    case MachineRepresentation::kWord8:
++      UNREACHABLE();
++    case MachineRepresentation::kWord16:
++      opcode = kLa64Ush;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = kLa64Usw;
++      break;
++    case MachineRepresentation::kTaggedSigned:   // Fall through.
++    case MachineRepresentation::kTaggedPointer:  // Fall through.
++    case MachineRepresentation::kTagged:         // Fall through.
++    case MachineRepresentation::kWord64:
++      opcode = kLa64Usd;
++      break;
++    case MachineRepresentation::kCompressedPointer:  // Fall through.
++    case MachineRepresentation::kCompressed:         // Fall through.
++    case MachineRepresentation::kNone:
++    case MachineRepresentation::kSimd128:
++      UNREACHABLE();
++  }
++
++  if (g.CanBeImmediate(index, opcode)) {
++    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
++         g.UseRegister(base), g.UseImmediate(index),
++         g.UseRegisterOrImmediateZero(value));
++  } else {
++    InstructionOperand addr_reg = g.TempRegister();
++    Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), addr_reg,
++         g.UseRegister(index), g.UseRegister(base));
++    // Emit desired store opcode, using temp addr_reg.
++    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
++         addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
++  }
++}
++
++namespace {
++
++// Shared routine for multiple compare operations.
++static void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
++                         InstructionOperand left, InstructionOperand right,
++                         FlagsContinuation* cont) {
++  selector->EmitWithContinuation(opcode, left, right, cont);
++}
++
++// Shared routine for multiple float32 compare operations.
++void VisitFloat32Compare(InstructionSelector* selector, Node* node,
++                         FlagsContinuation* cont) {
++  La64OperandGenerator g(selector);
++  Float32BinopMatcher m(node);
++  InstructionOperand lhs, rhs;
++
++  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
++                          : g.UseRegister(m.left().node());
++  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
++                           : g.UseRegister(m.right().node());
++  VisitCompare(selector, kLa64CmpS, lhs, rhs, cont);
++}
++
++// Shared routine for multiple float64 compare operations.
++void VisitFloat64Compare(InstructionSelector* selector, Node* node,
++                         FlagsContinuation* cont) {
++  La64OperandGenerator g(selector);
++  Float64BinopMatcher m(node);
++  InstructionOperand lhs, rhs;
++
++  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
++                          : g.UseRegister(m.left().node());
++  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
++                           : g.UseRegister(m.right().node());
++  VisitCompare(selector, kLa64CmpD, lhs, rhs, cont);
++}
++
++// Shared routine for multiple word compare operations.
++void VisitWordCompare(InstructionSelector* selector, Node* node,
++                      InstructionCode opcode, FlagsContinuation* cont,
++                      bool commutative) {
++  La64OperandGenerator g(selector);
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++
++  // Match immediates on left or right side of comparison.
++  if (g.CanBeImmediate(right, opcode)) {
++    if (opcode == kLa64Tst) {
++      VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
++                   cont);
++    } else {
++      switch (cont->condition()) {
++        case kEqual:
++        case kNotEqual:
++          if (cont->IsSet()) {
++            VisitCompare(selector, opcode, g.UseRegister(left),
++                         g.UseImmediate(right), cont);
++          } else {
++            VisitCompare(selector, opcode, g.UseRegister(left),
++                         g.UseRegister(right), cont);
++          }
++          break;
++        case kSignedLessThan:
++        case kSignedGreaterThanOrEqual:
++        case kUnsignedLessThan:
++        case kUnsignedGreaterThanOrEqual:
++          VisitCompare(selector, opcode, g.UseRegister(left),
++                       g.UseImmediate(right), cont);
++          break;
++        default:
++          VisitCompare(selector, opcode, g.UseRegister(left),
++                       g.UseRegister(right), cont);
++      }
++    }
++  } else if (g.CanBeImmediate(left, opcode)) {
++    if (!commutative) cont->Commute();
++    if (opcode == kLa64Tst) {
++      VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left),
++                   cont);
++    } else {
++      switch (cont->condition()) {
++        case kEqual:
++        case kNotEqual:
++          if (cont->IsSet()) {
++            VisitCompare(selector, opcode, g.UseRegister(right),
++                         g.UseImmediate(left), cont);
++          } else {
++            VisitCompare(selector, opcode, g.UseRegister(right),
++                         g.UseRegister(left), cont);
++          }
++          break;
++        case kSignedLessThan:
++        case kSignedGreaterThanOrEqual:
++        case kUnsignedLessThan:
++        case kUnsignedGreaterThanOrEqual:
++          VisitCompare(selector, opcode, g.UseRegister(right),
++                       g.UseImmediate(left), cont);
++          break;
++        default:
++          VisitCompare(selector, opcode, g.UseRegister(right),
++                       g.UseRegister(left), cont);
++      }
++    }
++  } else {
++    VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
++                 cont);
++  }
++}
++
++bool IsNodeUnsigned(Node* n) {
++  NodeMatcher m(n);
++
++  if (m.IsLoad() || m.IsUnalignedLoad() || m.IsPoisonedLoad() ||
++      m.IsProtectedLoad() || m.IsWord32AtomicLoad() || m.IsWord64AtomicLoad()) {
++    LoadRepresentation load_rep = LoadRepresentationOf(n->op());
++    return load_rep.IsUnsigned();
++  } else {
++    return m.IsUint32Div() || m.IsUint32LessThan() ||
++           m.IsUint32LessThanOrEqual() || m.IsUint32Mod() ||
++           m.IsUint32MulHigh() || m.IsChangeFloat64ToUint32() ||
++           m.IsTruncateFloat64ToUint32() || m.IsTruncateFloat32ToUint32();
++  }
++}
++
++// Shared routine for multiple word compare operations.
++void VisitFullWord32Compare(InstructionSelector* selector, Node* node,
++                            InstructionCode opcode, FlagsContinuation* cont) {
++  La64OperandGenerator g(selector);
++  InstructionOperand leftOp = g.TempRegister();
++  InstructionOperand rightOp = g.TempRegister();
++
++  selector->Emit(kLa64Dshl, leftOp, g.UseRegister(node->InputAt(0)),
++                 g.TempImmediate(32));
++  selector->Emit(kLa64Dshl, rightOp, g.UseRegister(node->InputAt(1)),
++                 g.TempImmediate(32));
++
++  VisitCompare(selector, opcode, leftOp, rightOp, cont);
++}
++
++void VisitOptimizedWord32Compare(InstructionSelector* selector, Node* node,
++                                 InstructionCode opcode,
++                                 FlagsContinuation* cont) {
++  if (FLAG_debug_code) {
++    La64OperandGenerator g(selector);
++    InstructionOperand leftOp = g.TempRegister();
++    InstructionOperand rightOp = g.TempRegister();
++    InstructionOperand optimizedResult = g.TempRegister();
++    InstructionOperand fullResult = g.TempRegister();
++    FlagsCondition condition = cont->condition();
++    InstructionCode testOpcode = opcode |
++                                 FlagsConditionField::encode(condition) |
++                                 FlagsModeField::encode(kFlags_set);
++
++    selector->Emit(testOpcode, optimizedResult, g.UseRegister(node->InputAt(0)),
++                   g.UseRegister(node->InputAt(1)));
++
++    selector->Emit(kLa64Dshl, leftOp, g.UseRegister(node->InputAt(0)),
++                   g.TempImmediate(32));
++    selector->Emit(kLa64Dshl, rightOp, g.UseRegister(node->InputAt(1)),
++                   g.TempImmediate(32));
++    selector->Emit(testOpcode, fullResult, leftOp, rightOp);
++
++    selector->Emit(kLa64AssertEqual, g.NoOutput(), optimizedResult, fullResult,
++                   g.TempImmediate(static_cast<int>(
++                       AbortReason::kUnsupportedNonPrimitiveCompare)));
++  }
++
++  VisitWordCompare(selector, node, opcode, cont, false);
++}
++
++void VisitWord32Compare(InstructionSelector* selector, Node* node,
++                        FlagsContinuation* cont) {
++  // LA64 doesn't support Word32 compare instructions. Instead it relies
++  // that the values in registers are correctly sign-extended and uses
++  // Word64 comparison instead. This behavior is correct in most cases,
++  // but doesn't work when comparing signed with unsigned operands.
++  // We could simulate full Word32 compare in all cases but this would
++  // create an unnecessary overhead since unsigned integers are rarely
++  // used in JavaScript.
++  // The solution proposed here tries to match a comparison of signed
++  // with unsigned operand, and perform full Word32Compare only
++  // in those cases. Unfortunately, the solution is not complete because
++  // it might skip cases where Word32 full compare is needed, so
++  // basically it is a hack.
++  // When call to a host function in simulator, if the function return a
++  // int32 value, the simulator do not sign-extended to int64 because in
++  // simulator we do not know the function whether return a int32 or int64.
++  // so we need do a full word32 compare in this case.
++#ifndef USE_SIMULATOR
++  if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1))) {
++#else
++  if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1)) ||
++      node->InputAt(0)->opcode() == IrOpcode::kCall ||
++      node->InputAt(1)->opcode() == IrOpcode::kCall) {
++#endif
++    VisitFullWord32Compare(selector, node, kLa64Cmp, cont);
++  } else {
++    VisitOptimizedWord32Compare(selector, node, kLa64Cmp, cont);
++  }
++}
++
++void VisitWord64Compare(InstructionSelector* selector, Node* node,
++                        FlagsContinuation* cont) {
++  VisitWordCompare(selector, node, kLa64Cmp, cont, false);
++}
++
++void EmitWordCompareZero(InstructionSelector* selector, Node* value,
++                         FlagsContinuation* cont) {
++  La64OperandGenerator g(selector);
++  selector->EmitWithContinuation(kLa64Cmp, g.UseRegister(value),
++                                 g.TempImmediate(0), cont);
++}
++
++void VisitAtomicLoad(InstructionSelector* selector, Node* node,
++                     ArchOpcode opcode) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  if (g.CanBeImmediate(index, opcode)) {
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
++                   g.DefineAsRegister(node), g.UseRegister(base),
++                   g.UseImmediate(index));
++  } else {
++    InstructionOperand addr_reg = g.TempRegister();
++    selector->Emit(kLa64Dadd | AddressingModeField::encode(kMode_None),
++                   addr_reg, g.UseRegister(index), g.UseRegister(base));
++    // Emit desired load opcode, using temp addr_reg.
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
++                   g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
++  }
++}
++
++void VisitAtomicStore(InstructionSelector* selector, Node* node,
++                      ArchOpcode opcode) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* value = node->InputAt(2);
++
++  if (g.CanBeImmediate(index, opcode)) {
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
++                   g.NoOutput(), g.UseRegister(base), g.UseImmediate(index),
++                   g.UseRegisterOrImmediateZero(value));
++  } else {
++    InstructionOperand addr_reg = g.TempRegister();
++    selector->Emit(kLa64Dadd | AddressingModeField::encode(kMode_None),
++                   addr_reg, g.UseRegister(index), g.UseRegister(base));
++    // Emit desired store opcode, using temp addr_reg.
++    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
++                   g.NoOutput(), addr_reg, g.TempImmediate(0),
++                   g.UseRegisterOrImmediateZero(value));
++  }
++}
++
++void VisitAtomicExchange(InstructionSelector* selector, Node* node,
++                         ArchOpcode opcode) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* value = node->InputAt(2);
++
++  AddressingMode addressing_mode = kMode_MRI;
++  InstructionOperand inputs[3];
++  size_t input_count = 0;
++  inputs[input_count++] = g.UseUniqueRegister(base);
++  inputs[input_count++] = g.UseUniqueRegister(index);
++  inputs[input_count++] = g.UseUniqueRegister(value);
++  InstructionOperand outputs[1];
++  outputs[0] = g.UseUniqueRegister(node);
++  InstructionOperand temp[3];
++  temp[0] = g.TempRegister();
++  temp[1] = g.TempRegister();
++  temp[2] = g.TempRegister();
++  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
++  selector->Emit(code, 1, outputs, input_count, inputs, 3, temp);
++}
++
++void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
++                                ArchOpcode opcode) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* old_value = node->InputAt(2);
++  Node* new_value = node->InputAt(3);
++
++  AddressingMode addressing_mode = kMode_MRI;
++  InstructionOperand inputs[4];
++  size_t input_count = 0;
++  inputs[input_count++] = g.UseUniqueRegister(base);
++  inputs[input_count++] = g.UseUniqueRegister(index);
++  inputs[input_count++] = g.UseUniqueRegister(old_value);
++  inputs[input_count++] = g.UseUniqueRegister(new_value);
++  InstructionOperand outputs[1];
++  outputs[0] = g.UseUniqueRegister(node);
++  InstructionOperand temp[3];
++  temp[0] = g.TempRegister();
++  temp[1] = g.TempRegister();
++  temp[2] = g.TempRegister();
++  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
++  selector->Emit(code, 1, outputs, input_count, inputs, 3, temp);
++}
++
++void VisitAtomicBinop(InstructionSelector* selector, Node* node,
++                      ArchOpcode opcode) {
++  La64OperandGenerator g(selector);
++  Node* base = node->InputAt(0);
++  Node* index = node->InputAt(1);
++  Node* value = node->InputAt(2);
++
++  AddressingMode addressing_mode = kMode_MRI;
++  InstructionOperand inputs[3];
++  size_t input_count = 0;
++  inputs[input_count++] = g.UseUniqueRegister(base);
++  inputs[input_count++] = g.UseUniqueRegister(index);
++  inputs[input_count++] = g.UseUniqueRegister(value);
++  InstructionOperand outputs[1];
++  outputs[0] = g.UseUniqueRegister(node);
++  InstructionOperand temps[4];
++  temps[0] = g.TempRegister();
++  temps[1] = g.TempRegister();
++  temps[2] = g.TempRegister();
++  temps[3] = g.TempRegister();
++  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
++  selector->Emit(code, 1, outputs, input_count, inputs, 4, temps);
++}
++
++}  // namespace
++
++void InstructionSelector::VisitStackPointerGreaterThan(
++    Node* node, FlagsContinuation* cont) {
++  StackCheckKind kind = StackCheckKindOf(node->op());
++  InstructionCode opcode =
++      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
++
++  La64OperandGenerator g(this);
++
++  // No outputs.
++  InstructionOperand* const outputs = nullptr;
++  const int output_count = 0;
++
++  // Applying an offset to this stack check requires a temp register. Offsets
++  // are only applied to the first stack check. If applying an offset, we must
++  // ensure the input and temp registers do not alias, thus kUniqueRegister.
++  InstructionOperand temps[] = {g.TempRegister()};
++  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry ? 1 : 0);
++  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
++                                 ? OperandGenerator::kUniqueRegister
++                                 : OperandGenerator::kRegister;
++
++  Node* const value = node->InputAt(0);
++  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
++  static constexpr int input_count = arraysize(inputs);
++
++  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
++                       temp_count, temps, cont);
++}
++
++// Shared routine for word comparisons against zero.
++void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
++                                               FlagsContinuation* cont) {
++  // Try to combine with comparisons against 0 by simply inverting the branch.
++  while (CanCover(user, value)) {
++    if (value->opcode() == IrOpcode::kWord32Equal) {
++      Int32BinopMatcher m(value);
++      if (!m.right().Is(0)) break;
++      user = value;
++      value = m.left().node();
++    } else if (value->opcode() == IrOpcode::kWord64Equal) {
++      Int64BinopMatcher m(value);
++      if (!m.right().Is(0)) break;
++      user = value;
++      value = m.left().node();
++    } else {
++      break;
++    }
++
++    cont->Negate();
++  }
++
++  if (CanCover(user, value)) {
++    switch (value->opcode()) {
++      case IrOpcode::kWord32Equal:
++        cont->OverwriteAndNegateIfEqual(kEqual);
++        return VisitWord32Compare(this, value, cont);
++      case IrOpcode::kInt32LessThan:
++        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
++        return VisitWord32Compare(this, value, cont);
++      case IrOpcode::kInt32LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
++        return VisitWord32Compare(this, value, cont);
++      case IrOpcode::kUint32LessThan:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
++        return VisitWord32Compare(this, value, cont);
++      case IrOpcode::kUint32LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
++        return VisitWord32Compare(this, value, cont);
++      case IrOpcode::kWord64Equal:
++        cont->OverwriteAndNegateIfEqual(kEqual);
++        return VisitWord64Compare(this, value, cont);
++      case IrOpcode::kInt64LessThan:
++        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
++        return VisitWord64Compare(this, value, cont);
++      case IrOpcode::kInt64LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
++        return VisitWord64Compare(this, value, cont);
++      case IrOpcode::kUint64LessThan:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
++        return VisitWord64Compare(this, value, cont);
++      case IrOpcode::kUint64LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
++        return VisitWord64Compare(this, value, cont);
++      case IrOpcode::kFloat32Equal:
++        cont->OverwriteAndNegateIfEqual(kEqual);
++        return VisitFloat32Compare(this, value, cont);
++      case IrOpcode::kFloat32LessThan:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
++        return VisitFloat32Compare(this, value, cont);
++      case IrOpcode::kFloat32LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
++        return VisitFloat32Compare(this, value, cont);
++      case IrOpcode::kFloat64Equal:
++        cont->OverwriteAndNegateIfEqual(kEqual);
++        return VisitFloat64Compare(this, value, cont);
++      case IrOpcode::kFloat64LessThan:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
++        return VisitFloat64Compare(this, value, cont);
++      case IrOpcode::kFloat64LessThanOrEqual:
++        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
++        return VisitFloat64Compare(this, value, cont);
++      case IrOpcode::kProjection:
++        // Check if this is the overflow output projection of an
++        // <Operation>WithOverflow node.
++        if (ProjectionIndexOf(value->op()) == 1u) {
++          // We cannot combine the <Operation>WithOverflow with this branch
++          // unless the 0th projection (the use of the actual value of the
++          // <Operation> is either nullptr, which means there's no use of the
++          // actual value, or was already defined, which means it is scheduled
++          // *AFTER* this branch).
++          Node* const node = value->InputAt(0);
++          Node* const result = NodeProperties::FindProjection(node, 0);
++          if (result == nullptr || IsDefined(result)) {
++            switch (node->opcode()) {
++              case IrOpcode::kInt32AddWithOverflow:
++                cont->OverwriteAndNegateIfEqual(kOverflow);
++                return VisitBinop(this, node, kLa64Dadd, cont);
++              case IrOpcode::kInt32SubWithOverflow:
++                cont->OverwriteAndNegateIfEqual(kOverflow);
++                return VisitBinop(this, node, kLa64Dsub, cont);
++              case IrOpcode::kInt32MulWithOverflow:
++                cont->OverwriteAndNegateIfEqual(kOverflow);
++                return VisitBinop(this, node, kLa64MulOvf, cont);
++              case IrOpcode::kInt64AddWithOverflow:
++                cont->OverwriteAndNegateIfEqual(kOverflow);
++                return VisitBinop(this, node, kLa64DaddOvf, cont);
++              case IrOpcode::kInt64SubWithOverflow:
++                cont->OverwriteAndNegateIfEqual(kOverflow);
++                return VisitBinop(this, node, kLa64DsubOvf, cont);
++              default:
++                break;
++            }
++          }
++        }
++        break;
++      case IrOpcode::kWord32And:
++      case IrOpcode::kWord64And:
++        return VisitWordCompare(this, value, kLa64Tst, cont, true);
++      case IrOpcode::kStackPointerGreaterThan:
++        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
++        return VisitStackPointerGreaterThan(value, cont);
++      default:
++        break;
++    }
++  }
++
++  // Continuation could not be combined with a compare, emit compare against 0.
++  EmitWordCompareZero(this, value, cont);
++}
++
++void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
++  La64OperandGenerator g(this);
++  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
++
++  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
++  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
++    static const size_t kMaxTableSwitchValueRange = 2 << 16;
++    size_t table_space_cost = 10 + 2 * sw.value_range();
++    size_t table_time_cost = 3;
++    size_t lookup_space_cost = 2 + 2 * sw.case_count();
++    size_t lookup_time_cost = sw.case_count();
++    if (sw.case_count() > 0 &&
++        table_space_cost + 3 * table_time_cost <=
++            lookup_space_cost + 3 * lookup_time_cost &&
++        sw.min_value() > std::numeric_limits<int32_t>::min() &&
++        sw.value_range() <= kMaxTableSwitchValueRange) {
++      InstructionOperand index_operand = value_operand;
++      if (sw.min_value()) {
++        index_operand = g.TempRegister();
++        Emit(kLa64Sub, index_operand, value_operand,
++             g.TempImmediate(sw.min_value()));
++      }
++      // Generate a table lookup.
++      return EmitTableSwitch(sw, index_operand);
++    }
++  }
++
++  // Generate a tree of conditional jumps.
++  return EmitBinarySearchSwitch(sw, value_operand);
++}
++
++void InstructionSelector::VisitWord32Equal(Node* const node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
++  Int32BinopMatcher m(node);
++  if (m.right().Is(0)) {
++    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
++  }
++
++  VisitWord32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitInt32LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
++  VisitWord32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
++  VisitWord32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitUint32LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
++  VisitWord32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
++  VisitWord32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
++  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
++    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
++    return VisitBinop(this, node, kLa64Dadd, &cont);
++  }
++  FlagsContinuation cont;
++  VisitBinop(this, node, kLa64Dadd, &cont);
++}
++
++void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
++  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
++    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
++    return VisitBinop(this, node, kLa64Dsub, &cont);
++  }
++  FlagsContinuation cont;
++  VisitBinop(this, node, kLa64Dsub, &cont);
++}
++
++void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
++  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
++    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
++    return VisitBinop(this, node, kLa64MulOvf, &cont);
++  }
++  FlagsContinuation cont;
++  VisitBinop(this, node, kLa64MulOvf, &cont);
++}
++
++void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
++  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
++    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
++    return VisitBinop(this, node, kLa64DaddOvf, &cont);
++  }
++  FlagsContinuation cont;
++  VisitBinop(this, node, kLa64DaddOvf, &cont);
++}
++
++void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
++  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
++    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
++    return VisitBinop(this, node, kLa64DsubOvf, &cont);
++  }
++  FlagsContinuation cont;
++  VisitBinop(this, node, kLa64DsubOvf, &cont);
++}
++
++void InstructionSelector::VisitWord64Equal(Node* const node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
++  Int64BinopMatcher m(node);
++  if (m.right().Is(0)) {
++    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
++  }
++
++  VisitWord64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitInt64LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
++  VisitWord64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
++  VisitWord64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitUint64LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
++  VisitWord64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
++  VisitWord64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat32Equal(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
++  VisitFloat32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat32LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
++  VisitFloat32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
++  VisitFloat32Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat64Equal(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
++  VisitFloat64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat64LessThan(Node* node) {
++  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
++  VisitFloat64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
++  FlagsContinuation cont =
++      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
++  VisitFloat64Compare(this, node, &cont);
++}
++
++void InstructionSelector::VisitFloat64ExtractLowWord32(Node* node) {
++  VisitRR(this, kLa64Float64ExtractLowWord32, node);
++}
++
++void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) {
++  VisitRR(this, kLa64Float64ExtractHighWord32, node);
++}
++
++void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
++  VisitRR(this, kLa64Float64SilenceNaN, node);
++}
++
++void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
++  La64OperandGenerator g(this);
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++  Emit(kLa64Float64InsertLowWord32, g.DefineSameAsFirst(node),
++       g.UseRegister(left), g.UseRegister(right));
++}
++
++void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
++  La64OperandGenerator g(this);
++  Node* left = node->InputAt(0);
++  Node* right = node->InputAt(1);
++  Emit(kLa64Float64InsertHighWord32, g.DefineSameAsFirst(node),
++       g.UseRegister(left), g.UseRegister(right));
++}
++
++void InstructionSelector::VisitMemoryBarrier(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Sync, g.NoOutput());
++}
++
++void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
++  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
++  ArchOpcode opcode = kArchNop;
++  switch (load_rep.representation()) {
++    case MachineRepresentation::kWord8:
++      opcode =
++          load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8;
++      break;
++    case MachineRepresentation::kWord16:
++      opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16
++                                   : kWord32AtomicLoadUint16;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = kWord32AtomicLoadWord32;
++      break;
++    default:
++      UNREACHABLE();
++  }
++  VisitAtomicLoad(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord32AtomicStore(Node* node) {
++  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
++  ArchOpcode opcode = kArchNop;
++  switch (rep) {
++    case MachineRepresentation::kWord8:
++      opcode = kWord32AtomicStoreWord8;
++      break;
++    case MachineRepresentation::kWord16:
++      opcode = kWord32AtomicStoreWord16;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = kWord32AtomicStoreWord32;
++      break;
++    default:
++      UNREACHABLE();
++  }
++
++  VisitAtomicStore(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord64AtomicLoad(Node* node) {
++  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
++  ArchOpcode opcode = kArchNop;
++  switch (load_rep.representation()) {
++    case MachineRepresentation::kWord8:
++      opcode = kLa64Word64AtomicLoadUint8;
++      break;
++    case MachineRepresentation::kWord16:
++      opcode = kLa64Word64AtomicLoadUint16;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = kLa64Word64AtomicLoadUint32;
++      break;
++    case MachineRepresentation::kWord64:
++      opcode = kLa64Word64AtomicLoadUint64;
++      break;
++    default:
++      UNREACHABLE();
++  }
++  VisitAtomicLoad(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord64AtomicStore(Node* node) {
++  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
++  ArchOpcode opcode = kArchNop;
++  switch (rep) {
++    case MachineRepresentation::kWord8:
++      opcode = kLa64Word64AtomicStoreWord8;
++      break;
++    case MachineRepresentation::kWord16:
++      opcode = kLa64Word64AtomicStoreWord16;
++      break;
++    case MachineRepresentation::kWord32:
++      opcode = kLa64Word64AtomicStoreWord32;
++      break;
++    case MachineRepresentation::kWord64:
++      opcode = kLa64Word64AtomicStoreWord64;
++      break;
++    default:
++      UNREACHABLE();
++  }
++
++  VisitAtomicStore(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Int8()) {
++    opcode = kWord32AtomicExchangeInt8;
++  } else if (type == MachineType::Uint8()) {
++    opcode = kWord32AtomicExchangeUint8;
++  } else if (type == MachineType::Int16()) {
++    opcode = kWord32AtomicExchangeInt16;
++  } else if (type == MachineType::Uint16()) {
++    opcode = kWord32AtomicExchangeUint16;
++  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
++    opcode = kWord32AtomicExchangeWord32;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++
++  VisitAtomicExchange(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Uint8()) {
++    opcode = kLa64Word64AtomicExchangeUint8;
++  } else if (type == MachineType::Uint16()) {
++    opcode = kLa64Word64AtomicExchangeUint16;
++  } else if (type == MachineType::Uint32()) {
++    opcode = kLa64Word64AtomicExchangeUint32;
++  } else if (type == MachineType::Uint64()) {
++    opcode = kLa64Word64AtomicExchangeUint64;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++  VisitAtomicExchange(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Int8()) {
++    opcode = kWord32AtomicCompareExchangeInt8;
++  } else if (type == MachineType::Uint8()) {
++    opcode = kWord32AtomicCompareExchangeUint8;
++  } else if (type == MachineType::Int16()) {
++    opcode = kWord32AtomicCompareExchangeInt16;
++  } else if (type == MachineType::Uint16()) {
++    opcode = kWord32AtomicCompareExchangeUint16;
++  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
++    opcode = kWord32AtomicCompareExchangeWord32;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++
++  VisitAtomicCompareExchange(this, node, opcode);
++}
++
++void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Uint8()) {
++    opcode = kLa64Word64AtomicCompareExchangeUint8;
++  } else if (type == MachineType::Uint16()) {
++    opcode = kLa64Word64AtomicCompareExchangeUint16;
++  } else if (type == MachineType::Uint32()) {
++    opcode = kLa64Word64AtomicCompareExchangeUint32;
++  } else if (type == MachineType::Uint64()) {
++    opcode = kLa64Word64AtomicCompareExchangeUint64;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++  VisitAtomicCompareExchange(this, node, opcode);
++}
++void InstructionSelector::VisitWord32AtomicBinaryOperation(
++    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
++    ArchOpcode uint16_op, ArchOpcode word32_op) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Int8()) {
++    opcode = int8_op;
++  } else if (type == MachineType::Uint8()) {
++    opcode = uint8_op;
++  } else if (type == MachineType::Int16()) {
++    opcode = int16_op;
++  } else if (type == MachineType::Uint16()) {
++    opcode = uint16_op;
++  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
++    opcode = word32_op;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++
++  VisitAtomicBinop(this, node, opcode);
++}
++
++#define VISIT_ATOMIC_BINOP(op)                                   \
++  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
++    VisitWord32AtomicBinaryOperation(                            \
++        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
++        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
++        kWord32Atomic##op##Word32);                              \
++  }
++VISIT_ATOMIC_BINOP(Add)
++VISIT_ATOMIC_BINOP(Sub)
++VISIT_ATOMIC_BINOP(And)
++VISIT_ATOMIC_BINOP(Or)
++VISIT_ATOMIC_BINOP(Xor)
++#undef VISIT_ATOMIC_BINOP
++
++void InstructionSelector::VisitWord64AtomicBinaryOperation(
++    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op,
++    ArchOpcode uint64_op) {
++  ArchOpcode opcode = kArchNop;
++  MachineType type = AtomicOpType(node->op());
++  if (type == MachineType::Uint8()) {
++    opcode = uint8_op;
++  } else if (type == MachineType::Uint16()) {
++    opcode = uint16_op;
++  } else if (type == MachineType::Uint32()) {
++    opcode = uint32_op;
++  } else if (type == MachineType::Uint64()) {
++    opcode = uint64_op;
++  } else {
++    UNREACHABLE();
++    return;
++  }
++  VisitAtomicBinop(this, node, opcode);
++}
++
++#define VISIT_ATOMIC_BINOP(op)                                             \
++  void InstructionSelector::VisitWord64Atomic##op(Node* node) {            \
++    VisitWord64AtomicBinaryOperation(                                      \
++        node, kLa64Word64Atomic##op##Uint8, kLa64Word64Atomic##op##Uint16, \
++        kLa64Word64Atomic##op##Uint32, kLa64Word64Atomic##op##Uint64);     \
++  }
++VISIT_ATOMIC_BINOP(Add)
++VISIT_ATOMIC_BINOP(Sub)
++VISIT_ATOMIC_BINOP(And)
++VISIT_ATOMIC_BINOP(Or)
++VISIT_ATOMIC_BINOP(Xor)
++#undef VISIT_ATOMIC_BINOP
++
++void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
++  UNREACHABLE();
++}
++
++void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
++  UNREACHABLE();
++}
++
++#define SIMD_TYPE_LIST(V) \
++  V(F32x4)                \
++  V(I32x4)                \
++  V(I16x8)                \
++  V(I8x16)
++
++#define SIMD_UNOP_LIST(V)                                \
++  V(F64x2Abs, kLa64F64x2Abs)                             \
++  V(F64x2Neg, kLa64F64x2Neg)                             \
++  V(F64x2Sqrt, kLa64F64x2Sqrt)                           \
++  V(I64x2Neg, kLa64I64x2Neg)                             \
++  V(F32x4SConvertI32x4, kLa64F32x4SConvertI32x4)         \
++  V(F32x4UConvertI32x4, kLa64F32x4UConvertI32x4)         \
++  V(F32x4Abs, kLa64F32x4Abs)                             \
++  V(F32x4Neg, kLa64F32x4Neg)                             \
++  V(F32x4Sqrt, kLa64F32x4Sqrt)                           \
++  V(F32x4RecipApprox, kLa64F32x4RecipApprox)             \
++  V(F32x4RecipSqrtApprox, kLa64F32x4RecipSqrtApprox)     \
++  V(I32x4SConvertF32x4, kLa64I32x4SConvertF32x4)         \
++  V(I32x4UConvertF32x4, kLa64I32x4UConvertF32x4)         \
++  V(I32x4Neg, kLa64I32x4Neg)                             \
++  V(I32x4SConvertI16x8Low, kLa64I32x4SConvertI16x8Low)   \
++  V(I32x4SConvertI16x8High, kLa64I32x4SConvertI16x8High) \
++  V(I32x4UConvertI16x8Low, kLa64I32x4UConvertI16x8Low)   \
++  V(I32x4UConvertI16x8High, kLa64I32x4UConvertI16x8High) \
++  V(I32x4Abs, kLa64I32x4Abs)                             \
++  V(I16x8Neg, kLa64I16x8Neg)                             \
++  V(I16x8SConvertI8x16Low, kLa64I16x8SConvertI8x16Low)   \
++  V(I16x8SConvertI8x16High, kLa64I16x8SConvertI8x16High) \
++  V(I16x8UConvertI8x16Low, kLa64I16x8UConvertI8x16Low)   \
++  V(I16x8UConvertI8x16High, kLa64I16x8UConvertI8x16High) \
++  V(I16x8Abs, kLa64I16x8Abs)                             \
++  V(I8x16Neg, kLa64I8x16Neg)                             \
++  V(I8x16Abs, kLa64I8x16Abs)                             \
++  V(S128Not, kLa64S128Not)                               \
++  V(S1x4AnyTrue, kLa64S1x4AnyTrue)                       \
++  V(S1x4AllTrue, kLa64S1x4AllTrue)                       \
++  V(S1x8AnyTrue, kLa64S1x8AnyTrue)                       \
++  V(S1x8AllTrue, kLa64S1x8AllTrue)                       \
++  V(S1x16AnyTrue, kLa64S1x16AnyTrue)                     \
++  V(S1x16AllTrue, kLa64S1x16AllTrue)
++
++#define SIMD_SHIFT_OP_LIST(V) \
++  V(I64x2Shl)                 \
++  V(I64x2ShrS)                \
++  V(I64x2ShrU)                \
++  V(I32x4Shl)                 \
++  V(I32x4ShrS)                \
++  V(I32x4ShrU)                \
++  V(I16x8Shl)                 \
++  V(I16x8ShrS)                \
++  V(I16x8ShrU)                \
++  V(I8x16Shl)                 \
++  V(I8x16ShrS)                \
++  V(I8x16ShrU)
++
++#define SIMD_BINOP_LIST(V)                             \
++  V(F64x2Add, kLa64F64x2Add)                           \
++  V(F64x2Sub, kLa64F64x2Sub)                           \
++  V(F64x2Mul, kLa64F64x2Mul)                           \
++  V(F64x2Div, kLa64F64x2Div)                           \
++  V(F64x2Min, kLa64F64x2Min)                           \
++  V(F64x2Max, kLa64F64x2Max)                           \
++  V(F64x2Eq, kLa64F64x2Eq)                             \
++  V(F64x2Ne, kLa64F64x2Ne)                             \
++  V(F64x2Lt, kLa64F64x2Lt)                             \
++  V(F64x2Le, kLa64F64x2Le)                             \
++  V(I64x2Add, kLa64I64x2Add)                           \
++  V(I64x2Sub, kLa64I64x2Sub)                           \
++  V(I64x2Mul, kLa64I64x2Mul)                           \
++  V(F32x4Add, kLa64F32x4Add)                           \
++  V(F32x4AddHoriz, kLa64F32x4AddHoriz)                 \
++  V(F32x4Sub, kLa64F32x4Sub)                           \
++  V(F32x4Mul, kLa64F32x4Mul)                           \
++  V(F32x4Div, kLa64F32x4Div)                           \
++  V(F32x4Max, kLa64F32x4Max)                           \
++  V(F32x4Min, kLa64F32x4Min)                           \
++  V(F32x4Eq, kLa64F32x4Eq)                             \
++  V(F32x4Ne, kLa64F32x4Ne)                             \
++  V(F32x4Lt, kLa64F32x4Lt)                             \
++  V(F32x4Le, kLa64F32x4Le)                             \
++  V(I32x4Add, kLa64I32x4Add)                           \
++  V(I32x4AddHoriz, kLa64I32x4AddHoriz)                 \
++  V(I32x4Sub, kLa64I32x4Sub)                           \
++  V(I32x4Mul, kLa64I32x4Mul)                           \
++  V(I32x4MaxS, kLa64I32x4MaxS)                         \
++  V(I32x4MinS, kLa64I32x4MinS)                         \
++  V(I32x4MaxU, kLa64I32x4MaxU)                         \
++  V(I32x4MinU, kLa64I32x4MinU)                         \
++  V(I32x4Eq, kLa64I32x4Eq)                             \
++  V(I32x4Ne, kLa64I32x4Ne)                             \
++  V(I32x4GtS, kLa64I32x4GtS)                           \
++  V(I32x4GeS, kLa64I32x4GeS)                           \
++  V(I32x4GtU, kLa64I32x4GtU)                           \
++  V(I32x4GeU, kLa64I32x4GeU)                           \
++  V(I16x8Add, kLa64I16x8Add)                           \
++  V(I16x8AddSaturateS, kLa64I16x8AddSaturateS)         \
++  V(I16x8AddSaturateU, kLa64I16x8AddSaturateU)         \
++  V(I16x8AddHoriz, kLa64I16x8AddHoriz)                 \
++  V(I16x8Sub, kLa64I16x8Sub)                           \
++  V(I16x8SubSaturateS, kLa64I16x8SubSaturateS)         \
++  V(I16x8SubSaturateU, kLa64I16x8SubSaturateU)         \
++  V(I16x8Mul, kLa64I16x8Mul)                           \
++  V(I16x8MaxS, kLa64I16x8MaxS)                         \
++  V(I16x8MinS, kLa64I16x8MinS)                         \
++  V(I16x8MaxU, kLa64I16x8MaxU)                         \
++  V(I16x8MinU, kLa64I16x8MinU)                         \
++  V(I16x8Eq, kLa64I16x8Eq)                             \
++  V(I16x8Ne, kLa64I16x8Ne)                             \
++  V(I16x8GtS, kLa64I16x8GtS)                           \
++  V(I16x8GeS, kLa64I16x8GeS)                           \
++  V(I16x8GtU, kLa64I16x8GtU)                           \
++  V(I16x8GeU, kLa64I16x8GeU)                           \
++  V(I16x8RoundingAverageU, kLa64I16x8RoundingAverageU) \
++  V(I16x8SConvertI32x4, kLa64I16x8SConvertI32x4)       \
++  V(I16x8UConvertI32x4, kLa64I16x8UConvertI32x4)       \
++  V(I8x16Add, kLa64I8x16Add)                           \
++  V(I8x16AddSaturateS, kLa64I8x16AddSaturateS)         \
++  V(I8x16AddSaturateU, kLa64I8x16AddSaturateU)         \
++  V(I8x16Sub, kLa64I8x16Sub)                           \
++  V(I8x16SubSaturateS, kLa64I8x16SubSaturateS)         \
++  V(I8x16SubSaturateU, kLa64I8x16SubSaturateU)         \
++  V(I8x16Mul, kLa64I8x16Mul)                           \
++  V(I8x16MaxS, kLa64I8x16MaxS)                         \
++  V(I8x16MinS, kLa64I8x16MinS)                         \
++  V(I8x16MaxU, kLa64I8x16MaxU)                         \
++  V(I8x16MinU, kLa64I8x16MinU)                         \
++  V(I8x16Eq, kLa64I8x16Eq)                             \
++  V(I8x16Ne, kLa64I8x16Ne)                             \
++  V(I8x16GtS, kLa64I8x16GtS)                           \
++  V(I8x16GeS, kLa64I8x16GeS)                           \
++  V(I8x16GtU, kLa64I8x16GtU)                           \
++  V(I8x16GeU, kLa64I8x16GeU)                           \
++  V(I8x16RoundingAverageU, kLa64I8x16RoundingAverageU) \
++  V(I8x16SConvertI16x8, kLa64I8x16SConvertI16x8)       \
++  V(I8x16UConvertI16x8, kLa64I8x16UConvertI16x8)       \
++  V(S128And, kLa64S128And)                             \
++  V(S128Or, kLa64S128Or)                               \
++  V(S128Xor, kLa64S128Xor)                             \
++  V(S128AndNot, kLa64S128AndNot)
++
++void InstructionSelector::VisitS128Zero(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64S128Zero, g.DefineAsRegister(node));
++}
++
++#define SIMD_VISIT_SPLAT(Type)                               \
++  void InstructionSelector::Visit##Type##Splat(Node* node) { \
++    VisitRR(this, kLa64##Type##Splat, node);                 \
++  }
++SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
++SIMD_VISIT_SPLAT(F64x2)
++#undef SIMD_VISIT_SPLAT
++
++#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
++  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
++    VisitRRI(this, kLa64##Type##ExtractLane##Sign, node);                \
++  }
++SIMD_VISIT_EXTRACT_LANE(F64x2, )
++SIMD_VISIT_EXTRACT_LANE(F32x4, )
++SIMD_VISIT_EXTRACT_LANE(I32x4, )
++SIMD_VISIT_EXTRACT_LANE(I16x8, U)
++SIMD_VISIT_EXTRACT_LANE(I16x8, S)
++SIMD_VISIT_EXTRACT_LANE(I8x16, U)
++SIMD_VISIT_EXTRACT_LANE(I8x16, S)
++#undef SIMD_VISIT_EXTRACT_LANE
++
++#define SIMD_VISIT_REPLACE_LANE(Type)                              \
++  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
++    VisitRRIR(this, kLa64##Type##ReplaceLane, node);               \
++  }
++SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
++SIMD_VISIT_REPLACE_LANE(F64x2)
++#undef SIMD_VISIT_REPLACE_LANE
++
++#define SIMD_VISIT_UNOP(Name, instruction)            \
++  void InstructionSelector::Visit##Name(Node* node) { \
++    VisitRR(this, instruction, node);                 \
++  }
++SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
++#undef SIMD_VISIT_UNOP
++
++#define SIMD_VISIT_SHIFT_OP(Name)                     \
++  void InstructionSelector::Visit##Name(Node* node) { \
++    VisitSimdShift(this, kLa64##Name, node);          \
++  }
++SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
++#undef SIMD_VISIT_SHIFT_OP
++
++#define SIMD_VISIT_BINOP(Name, instruction)           \
++  void InstructionSelector::Visit##Name(Node* node) { \
++    VisitRRR(this, instruction, node);                \
++  }
++SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
++#undef SIMD_VISIT_BINOP
++
++void InstructionSelector::VisitS128Select(Node* node) {
++  VisitRRRR(this, kLa64S128Select, node);
++}
++
++namespace {
++
++struct ShuffleEntry {
++  uint8_t shuffle[kSimd128Size];
++  ArchOpcode opcode;
++};
++
++static const ShuffleEntry arch_shuffles[] = {
++    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
++     kLa64S32x4InterleaveRight},
++    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
++     kLa64S32x4InterleaveLeft},
++    {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
++     kLa64S32x4PackEven},
++    {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
++     kLa64S32x4PackOdd},
++    {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
++     kLa64S32x4InterleaveEven},
++    {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
++     kLa64S32x4InterleaveOdd},
++
++    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
++     kLa64S16x8InterleaveRight},
++    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
++     kLa64S16x8InterleaveLeft},
++    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
++     kLa64S16x8PackEven},
++    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
++     kLa64S16x8PackOdd},
++    {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
++     kLa64S16x8InterleaveEven},
++    {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
++     kLa64S16x8InterleaveOdd},
++    {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kLa64S16x4Reverse},
++    {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kLa64S16x2Reverse},
++
++    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
++     kLa64S8x16InterleaveRight},
++    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
++     kLa64S8x16InterleaveLeft},
++    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
++     kLa64S8x16PackEven},
++    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
++     kLa64S8x16PackOdd},
++    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
++     kLa64S8x16InterleaveEven},
++    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
++     kLa64S8x16InterleaveOdd},
++    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kLa64S8x8Reverse},
++    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kLa64S8x4Reverse},
++    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kLa64S8x2Reverse}};
++
++bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
++                         size_t num_entries, bool is_swizzle,
++                         ArchOpcode* opcode) {
++  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
++  for (size_t i = 0; i < num_entries; ++i) {
++    const ShuffleEntry& entry = table[i];
++    int j = 0;
++    for (; j < kSimd128Size; ++j) {
++      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
++        break;
++      }
++    }
++    if (j == kSimd128Size) {
++      *opcode = entry.opcode;
++      return true;
++    }
++  }
++  return false;
++}
++
++}  // namespace
++
++void InstructionSelector::VisitS8x16Shuffle(Node* node) {
++  uint8_t shuffle[kSimd128Size];
++  bool is_swizzle;
++  CanonicalizeShuffle(node, shuffle, &is_swizzle);
++  uint8_t shuffle32x4[4];
++  ArchOpcode opcode;
++  if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
++                          is_swizzle, &opcode)) {
++    VisitRRR(this, opcode, node);
++    return;
++  }
++  Node* input0 = node->InputAt(0);
++  Node* input1 = node->InputAt(1);
++  uint8_t offset;
++  La64OperandGenerator g(this);
++  if (TryMatchConcat(shuffle, &offset)) {
++    Emit(kLa64S8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input1),
++         g.UseRegister(input0), g.UseImmediate(offset));
++    return;
++  }
++  if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
++    Emit(kLa64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
++         g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
++    return;
++  }
++  Emit(kLa64S8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
++       g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)),
++       g.UseImmediate(Pack4Lanes(shuffle + 4)),
++       g.UseImmediate(Pack4Lanes(shuffle + 8)),
++       g.UseImmediate(Pack4Lanes(shuffle + 12)));
++}
++
++void InstructionSelector::VisitS8x16Swizzle(Node* node) {
++  La64OperandGenerator g(this);
++  InstructionOperand temps[] = {g.TempSimd128Register()};
++  // We don't want input 0 or input 1 to be the same as output, since we will
++  // modify output before do the calculation.
++  Emit(kLa64S8x16Swizzle, g.DefineAsRegister(node),
++       g.UseUniqueRegister(node->InputAt(0)),
++       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
++}
++
++void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
++}
++
++void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
++  La64OperandGenerator g(this);
++  Emit(kLa64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
++       g.TempImmediate(0));
++}
++
++// static
++MachineOperatorBuilder::Flags
++InstructionSelector::SupportedMachineOperatorFlags() {
++  MachineOperatorBuilder::Flags flags = MachineOperatorBuilder::kNoFlags;
++  return flags | MachineOperatorBuilder::kWord32Ctz |
++         MachineOperatorBuilder::kWord64Ctz |
++         MachineOperatorBuilder::kWord32Popcnt |
++         MachineOperatorBuilder::kWord64Popcnt |
++         MachineOperatorBuilder::kWord32ShiftIsSafe |
++         MachineOperatorBuilder::kInt32DivIsSafe |
++         MachineOperatorBuilder::kUint32DivIsSafe |
++         MachineOperatorBuilder::kFloat64RoundDown |
++         MachineOperatorBuilder::kFloat32RoundDown |
++         MachineOperatorBuilder::kFloat64RoundUp |
++         MachineOperatorBuilder::kFloat32RoundUp |
++         MachineOperatorBuilder::kFloat64RoundTruncate |
++         MachineOperatorBuilder::kFloat32RoundTruncate |
++         MachineOperatorBuilder::kFloat64RoundTiesEven |
++         MachineOperatorBuilder::kFloat32RoundTiesEven;
++}
++
++// static
++MachineOperatorBuilder::AlignmentRequirements
++InstructionSelector::AlignmentRequirements() {
++  return MachineOperatorBuilder::AlignmentRequirements::
++      FullUnalignedAccessSupport();
++}
++
++#undef SIMD_BINOP_LIST
++#undef SIMD_SHIFT_OP_LIST
++#undef SIMD_UNOP_LIST
++#undef SIMD_TYPE_LIST
++#undef TRACE_UNIMPL
++#undef TRACE
++
++}  // namespace compiler
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc b/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc
+index 4967f2bbfa1..c3701e3ef07 100644
+--- a/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc
++++ b/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc
+@@ -94,9 +94,22 @@ namespace {
+ #define PARAM_REGISTERS a0, a1, a2, a3, a4, a5, a6, a7
+ #define CALLEE_SAVE_REGISTERS                                                  \
+   s0.bit() | s1.bit() | s2.bit() | s3.bit() | s4.bit() | s5.bit() | s6.bit() | \
+-      s7.bit()
+-#define CALLEE_SAVE_FP_REGISTERS \
+-  f20.bit() | f22.bit() | f24.bit() | f26.bit() | f28.bit() | f30.bit()
++      s7.bit() | fp.bit()
++#define CALLEE_SAVE_FP_REGISTERS                                          \
++  f24.bit() | f25.bit() | f26.bit() | f27.bit() | f28.bit() | f29.bit() | \
++      f30.bit() | f31.bit()
++
++#elif V8_TARGET_ARCH_LA64
++// ===========================================================================
++// == la64 =================================================================
++// ===========================================================================
++#define PARAM_REGISTERS a0, a1, a2, a3, a4, a5, a6, a7
++#define CALLEE_SAVE_REGISTERS                                                  \
++  s0.bit() | s1.bit() | s2.bit() | s3.bit() | s4.bit() | s5.bit() | s6.bit() | \
++      s7.bit() | fp.bit()
++#define CALLEE_SAVE_FP_REGISTERS                                          \
++  f24.bit() | f25.bit() | f26.bit() | f27.bit() | f28.bit() | f29.bit() | \
++      f30.bit() | f31.bit()
+ 
+ #elif V8_TARGET_ARCH_PPC64
+ // ===========================================================================
+diff --git a/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc b/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc
+index fcf9b8448a8..f704f0b6d84 100644
+--- a/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc
++++ b/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc
+@@ -1067,7 +1067,7 @@ void DebugEvaluate::VerifyTransitiveBuiltins(Isolate* isolate) {
+   }
+   CHECK(!failed);
+ #if defined(V8_TARGET_ARCH_PPC) || defined(V8_TARGET_ARCH_PPC64) || \
+-    defined(V8_TARGET_ARCH_MIPS64)
++    defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_LA64)
+   // Isolate-independent builtin calls and jumps do not emit reloc infos
+   // on PPC. We try to avoid using PC relative code due to performance
+   // issue with especially older hardwares.
+diff --git a/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc b/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc
+new file mode 100644
+index 00000000000..081135d3bb4
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc
+@@ -0,0 +1,56 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/debug/debug.h"
++
++#include "src/codegen/macro-assembler.h"
++#include "src/debug/liveedit.h"
++#include "src/execution/frames-inl.h"
++
++namespace v8 {
++namespace internal {
++
++#define __ ACCESS_MASM(masm)
++
++void DebugCodegen::GenerateHandleDebuggerStatement(MacroAssembler* masm) {
++  {
++    FrameScope scope(masm, StackFrame::INTERNAL);
++    __ CallRuntime(Runtime::kHandleDebuggerStatement, 0);
++  }
++  __ MaybeDropFrames();
++
++  // Return to caller.
++  __ Ret();
++}
++
++void DebugCodegen::GenerateFrameDropperTrampoline(MacroAssembler* masm) {
++  // Frame is being dropped:
++  // - Drop to the target frame specified by a1.
++  // - Look up current function on the frame.
++  // - Leave the frame.
++  // - Restart the frame by calling the function.
++  __ mov(fp, a1);
++  __ Ld_d(a1, MemOperand(fp, StandardFrameConstants::kFunctionOffset));
++
++  // Pop return address and frame.
++  __ LeaveFrame(StackFrame::INTERNAL);
++
++  __ Ld_d(a0, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset));
++  __ Ld_hu(
++      a0, FieldMemOperand(a0, SharedFunctionInfo::kFormalParameterCountOffset));
++  __ mov(a2, a0);
++
++  __ InvokeFunction(a1, a2, a0, JUMP_FUNCTION);
++}
++
++const bool LiveEdit::kFrameDropperSupported = true;
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc b/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc
+new file mode 100644
+index 00000000000..23a0051d93d
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc
+@@ -0,0 +1,241 @@
++// Copyright 2011 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "src/codegen/macro-assembler.h"
++#include "src/codegen/register-configuration.h"
++#include "src/codegen/safepoint-table.h"
++#include "src/deoptimizer/deoptimizer.h"
++
++namespace v8 {
++namespace internal {
++
++const bool Deoptimizer::kSupportsFixedDeoptExitSizes = false;
++const int Deoptimizer::kNonLazyDeoptExitSize = 0;
++const int Deoptimizer::kLazyDeoptExitSize = 0;
++
++#define __ masm->
++
++// This code tries to be close to ia32 code so that any changes can be
++// easily ported.
++void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
++                                                Isolate* isolate,
++                                                DeoptimizeKind deopt_kind) {
++  NoRootArrayScope no_root_array(masm);
++
++  // Unlike on ARM we don't save all the registers, just the useful ones.
++  // For the rest, there are gaps on the stack, so the offsets remain the same.
++  const int kNumberOfRegisters = Register::kNumRegisters;
++
++  RegList restored_regs = kJSCallerSaved | kCalleeSaved;
++  RegList saved_regs = restored_regs | sp.bit() | ra.bit();
++
++  const int kDoubleRegsSize = kDoubleSize * DoubleRegister::kNumRegisters;
++
++  // Save all double FPU registers before messing with them.
++  __ Sub_d(sp, sp, Operand(kDoubleRegsSize));
++  const RegisterConfiguration* config = RegisterConfiguration::Default();
++  for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
++    int code = config->GetAllocatableDoubleCode(i);
++    const DoubleRegister fpu_reg = DoubleRegister::from_code(code);
++    int offset = code * kDoubleSize;
++    __ Fst_d(fpu_reg, MemOperand(sp, offset));
++  }
++
++  // Push saved_regs (needed to populate FrameDescription::registers_).
++  // Leave gaps for other registers.
++  __ Sub_d(sp, sp, kNumberOfRegisters * kPointerSize);
++  for (int16_t i = kNumberOfRegisters - 1; i >= 0; i--) {
++    if ((saved_regs & (1 << i)) != 0) {
++      __ St_d(ToRegister(i), MemOperand(sp, kPointerSize * i));
++    }
++  }
++
++  __ li(a2, Operand(ExternalReference::Create(
++                IsolateAddressId::kCEntryFPAddress, isolate)));
++  __ St_d(fp, MemOperand(a2, 0));
++
++  const int kSavedRegistersAreaSize =
++      (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize;
++
++  // Get the bailout is passed as kRootRegister by the caller.
++  __ mov(a2, kRootRegister);
++
++  // Get the address of the location in the code object (a3) (return
++  // address for lazy deoptimization) and compute the fp-to-sp delta in
++  // register a4.
++  __ mov(a3, ra);
++  __ Add_d(a4, sp, Operand(kSavedRegistersAreaSize));
++
++  __ Sub_d(a4, fp, a4);
++
++  // Allocate a new deoptimizer object.
++  __ PrepareCallCFunction(6, a5);
++  // Pass six arguments, according to n64 ABI.
++  __ mov(a0, zero_reg);
++  Label context_check;
++  __ Ld_d(a1, MemOperand(fp, CommonFrameConstants::kContextOrFrameTypeOffset));
++  __ JumpIfSmi(a1, &context_check);
++  __ Ld_d(a0, MemOperand(fp, StandardFrameConstants::kFunctionOffset));
++  __ bind(&context_check);
++  __ li(a1, Operand(static_cast<int>(deopt_kind)));
++  // a2: bailout id already loaded.
++  // a3: code address or 0 already loaded.
++  // a4: already has fp-to-sp delta.
++  __ li(a5, Operand(ExternalReference::isolate_address(isolate)));
++
++  // Call Deoptimizer::New().
++  {
++    AllowExternalCallThatCantCauseGC scope(masm);
++    __ CallCFunction(ExternalReference::new_deoptimizer_function(), 6);
++  }
++
++  // Preserve "deoptimizer" object in register v0 and get the input
++  // frame descriptor pointer to a1 (deoptimizer->input_);
++  // Move deopt-obj to a0 for call to Deoptimizer::ComputeOutputFrames() below.
++  // TODO save a0
++  //__ mov(a0, v0);
++  __ Ld_d(a1, MemOperand(a0, Deoptimizer::input_offset()));
++
++  // Copy core registers into FrameDescription::registers_[kNumRegisters].
++  DCHECK_EQ(Register::kNumRegisters, kNumberOfRegisters);
++  for (int i = 0; i < kNumberOfRegisters; i++) {
++    int offset = (i * kPointerSize) + FrameDescription::registers_offset();
++    if ((saved_regs & (1 << i)) != 0) {
++      __ Ld_d(a2, MemOperand(sp, i * kPointerSize));
++      __ St_d(a2, MemOperand(a1, offset));
++    } else if (FLAG_debug_code) {
++      __ li(a2, Operand(kDebugZapValue));
++      __ St_d(a2, MemOperand(a1, offset));
++    }
++  }
++
++  int double_regs_offset = FrameDescription::double_registers_offset();
++  // Copy FPU registers to
++  // double_registers_[DoubleRegister::kNumAllocatableRegisters]
++  for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
++    int code = config->GetAllocatableDoubleCode(i);
++    int dst_offset = code * kDoubleSize + double_regs_offset;
++    int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize;
++    __ Fld_d(f0, MemOperand(sp, src_offset));
++    __ Fst_d(f0, MemOperand(a1, dst_offset));
++  }
++
++  // Remove the saved registers from the stack.
++  __ Add_d(sp, sp, Operand(kSavedRegistersAreaSize));
++
++  // Compute a pointer to the unwinding limit in register a2; that is
++  // the first stack slot not part of the input frame.
++  __ Ld_d(a2, MemOperand(a1, FrameDescription::frame_size_offset()));
++  __ Add_d(a2, a2, sp);
++
++  // Unwind the stack down to - but not including - the unwinding
++  // limit and copy the contents of the activation frame to the input
++  // frame description.
++  __ Add_d(a3, a1, Operand(FrameDescription::frame_content_offset()));
++  Label pop_loop;
++  Label pop_loop_header;
++  __ Branch(&pop_loop_header);
++  __ bind(&pop_loop);
++  __ pop(a4);
++  __ St_d(a4, MemOperand(a3, 0));
++  __ addi_d(a3, a3, sizeof(uint64_t));
++  __ bind(&pop_loop_header);
++  __ BranchShort(&pop_loop, ne, a2, Operand(sp));
++  // Compute the output frame in the deoptimizer.
++  __ push(a0);  // Preserve deoptimizer object across call.
++  // a0: deoptimizer object; a1: scratch.
++  __ PrepareCallCFunction(1, a1);
++  // Call Deoptimizer::ComputeOutputFrames().
++  {
++    AllowExternalCallThatCantCauseGC scope(masm);
++    __ CallCFunction(ExternalReference::compute_output_frames_function(), 1);
++  }
++  __ pop(a0);  // Restore deoptimizer object (class Deoptimizer).
++
++  __ Ld_d(sp, MemOperand(a0, Deoptimizer::caller_frame_top_offset()));
++
++  // Replace the current (input) frame with the output frames.
++  Label outer_push_loop, inner_push_loop, outer_loop_header, inner_loop_header;
++  // Outer loop state: a4 = current "FrameDescription** output_",
++  // a1 = one past the last FrameDescription**.
++  __ Ld_w(a1, MemOperand(a0, Deoptimizer::output_count_offset()));
++  __ Ld_d(a4, MemOperand(a0, Deoptimizer::output_offset()));  // a4 is output_.
++  __ Alsl_d(a1, a1, a4, kPointerSizeLog2, t7);
++  __ Branch(&outer_loop_header);
++  __ bind(&outer_push_loop);
++  // Inner loop state: a2 = current FrameDescription*, a3 = loop index.
++  __ Ld_d(a2, MemOperand(a4, 0));  // output_[ix]
++  __ Ld_d(a3, MemOperand(a2, FrameDescription::frame_size_offset()));
++  __ Branch(&inner_loop_header);
++  __ bind(&inner_push_loop);
++  __ Sub_d(a3, a3, Operand(sizeof(uint64_t)));
++  __ Add_d(a6, a2, Operand(a3));
++  __ Ld_d(a7, MemOperand(a6, FrameDescription::frame_content_offset()));
++  __ push(a7);
++  __ bind(&inner_loop_header);
++  __ BranchShort(&inner_push_loop, ne, a3, Operand(zero_reg));
++
++  __ Add_d(a4, a4, Operand(kPointerSize));
++  __ bind(&outer_loop_header);
++  __ BranchShort(&outer_push_loop, lt, a4, Operand(a1));
++
++  __ Ld_d(a1, MemOperand(a0, Deoptimizer::input_offset()));
++  for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
++    int code = config->GetAllocatableDoubleCode(i);
++    const DoubleRegister fpu_reg = DoubleRegister::from_code(code);
++    int src_offset = code * kDoubleSize + double_regs_offset;
++    __ Fld_d(fpu_reg, MemOperand(a1, src_offset));
++  }
++
++  // Push pc and continuation from the last output frame.
++  __ Ld_d(a6, MemOperand(a2, FrameDescription::pc_offset()));
++  __ push(a6);
++  __ Ld_d(a6, MemOperand(a2, FrameDescription::continuation_offset()));
++  __ push(a6);
++
++  // Technically restoring 'at' should work unless zero_reg is also restored
++  // but it's safer to check for this.
++  DCHECK(!(t7.bit() & restored_regs));
++  // Restore the registers from the last output frame.
++  __ mov(t7, a2);
++  for (int i = kNumberOfRegisters - 1; i >= 0; i--) {
++    int offset = (i * kPointerSize) + FrameDescription::registers_offset();
++    if ((restored_regs & (1 << i)) != 0) {
++      __ Ld_d(ToRegister(i), MemOperand(t7, offset));
++    }
++  }
++
++  __ pop(t7);  // Get continuation, leave pc on stack.
++  __ pop(ra);
++  __ Jump(t7);
++  __ stop();
++}
++
++// Maximum size of a table entry generated below.
++const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
++
++Float32 RegisterValues::GetFloatRegister(unsigned n) const {
++  return Float32::FromBits(
++      static_cast<uint32_t>(double_registers_[n].get_bits()));
++}
++
++void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
++  SetFrameSlot(offset, value);
++}
++
++void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) {
++  SetFrameSlot(offset, value);
++}
++
++void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) {
++  // No embedded constant pool support.
++  UNREACHABLE();
++}
++
++void FrameDescription::SetPc(intptr_t pc) { pc_ = pc; }
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc b/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc
+index 5f364373027..4ef6eba3273 100644
+--- a/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc
++++ b/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc
+@@ -1077,6 +1077,8 @@ class DebugInfoSection : public DebugSection {
+       UNIMPLEMENTED();
+ #elif V8_TARGET_ARCH_MIPS64
+       UNIMPLEMENTED();
++#elif V8_TARGET_ARCH_LA64
++      UNIMPLEMENTED();
+ #elif V8_TARGET_ARCH_PPC64 && V8_OS_LINUX
+       w->Write<uint8_t>(DW_OP_reg31);  // The frame pointer is here on PPC64.
+ #elif V8_TARGET_ARCH_S390
+diff --git a/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc b/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc
+new file mode 100644
+index 00000000000..0d3e8ee89f7
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc
+@@ -0,0 +1,1841 @@
++#include <assert.h>
++#include <stdarg.h>
++#include <stdio.h>
++#include <string.h>
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/base/platform/platform.h"
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/diagnostics/disasm.h"
++
++namespace v8 {
++namespace internal {
++
++//------------------------------------------------------------------------------
++
++// Decoder decodes and disassembles instructions into an output buffer.
++// It uses the converter to convert register names and call destinations into
++// more informative description.
++class Decoder {
++ public:
++  Decoder(const disasm::NameConverter& converter,
++          v8::internal::Vector<char> out_buffer)
++      : converter_(converter), out_buffer_(out_buffer), out_buffer_pos_(0) {
++    out_buffer_[out_buffer_pos_] = '\0';
++  }
++
++  ~Decoder() {}
++
++  // Writes one disassembled instruction into 'buffer' (0-terminated).
++  // Returns the length of the disassembled machine instruction in bytes.
++  int InstructionDecode(byte* instruction);
++
++ private:
++  // Bottleneck functions to print into the out_buffer.
++  void PrintChar(const char ch);
++  void Print(const char* str);
++
++  // Printing of common values.
++  void PrintRegister(int reg);
++  void PrintFPURegister(int freg);
++  void PrintFPUStatusRegister(int freg);
++  void PrintRj(Instruction* instr);
++  void PrintRk(Instruction* instr);
++  void PrintRd(Instruction* instr);
++  void PrintFj(Instruction* instr);
++  void PrintFk(Instruction* instr);
++  void PrintFd(Instruction* instr);
++  void PrintFa(Instruction* instr);
++  void PrintSa2(Instruction* instr);
++  void PrintSa3(Instruction* instr);
++  void PrintUi5(Instruction* instr);
++  void PrintUi6(Instruction* instr);
++  void PrintUi12(Instruction* instr);
++  void PrintXi12(Instruction* instr);
++  void PrintMsbw(Instruction* instr);
++  void PrintLsbw(Instruction* instr);
++  void PrintMsbd(Instruction* instr);
++  void PrintLsbd(Instruction* instr);
++  //  void PrintCond(Instruction* instr);
++  void PrintSi12(Instruction* instr);
++  void PrintSi14(Instruction* instr);
++  void PrintSi16(Instruction* instr);
++  void PrintSi20(Instruction* instr);
++  void PrintCj(Instruction* instr);
++  void PrintCd(Instruction* instr);
++  void PrintCa(Instruction* instr);
++  void PrintCode(Instruction* instr);
++  void PrintHint5(Instruction* instr);
++  void PrintHint15(Instruction* instr);
++  void PrintPCOffs16(Instruction* instr);
++  void PrintPCOffs21(Instruction* instr);
++  void PrintPCOffs26(Instruction* instr);
++  void PrintOffs16(Instruction* instr);
++  void PrintOffs21(Instruction* instr);
++  void PrintOffs26(Instruction* instr);
++
++  // Handle formatting of instructions and their options.
++  int FormatRegister(Instruction* instr, const char* option);
++  int FormatFPURegister(Instruction* instr, const char* option);
++  int FormatOption(Instruction* instr, const char* option);
++  void Format(Instruction* instr, const char* format);
++  void Unknown(Instruction* instr);
++  int DecodeBreakInstr(Instruction* instr);
++
++  // Each of these functions decodes one particular instruction type.
++  int InstructionDecode(Instruction* instr);
++  void DecodeTypekOp6(Instruction* instr);
++  void DecodeTypekOp7(Instruction* instr);
++  void DecodeTypekOp8(Instruction* instr);
++  void DecodeTypekOp10(Instruction* instr);
++  void DecodeTypekOp12(Instruction* instr);
++  void DecodeTypekOp14(Instruction* instr);
++  int DecodeTypekOp17(Instruction* instr);
++  void DecodeTypekOp22(Instruction* instr);
++
++  const disasm::NameConverter& converter_;
++  v8::internal::Vector<char> out_buffer_;
++  int out_buffer_pos_;
++
++  DISALLOW_COPY_AND_ASSIGN(Decoder);
++};
++
++// Support for assertions in the Decoder formatting functions.
++#define STRING_STARTS_WITH(string, compare_string) \
++  (strncmp(string, compare_string, strlen(compare_string)) == 0)
++
++// Append the ch to the output buffer.
++void Decoder::PrintChar(const char ch) { out_buffer_[out_buffer_pos_++] = ch; }
++
++// Append the str to the output buffer.
++void Decoder::Print(const char* str) {
++  char cur = *str++;
++  while (cur != '\0' && (out_buffer_pos_ < (out_buffer_.length() - 1))) {
++    PrintChar(cur);
++    cur = *str++;
++  }
++  out_buffer_[out_buffer_pos_] = 0;
++}
++
++// Print the register name according to the active name converter.
++void Decoder::PrintRegister(int reg) {
++  Print(converter_.NameOfCPURegister(reg));
++}
++
++void Decoder::PrintRj(Instruction* instr) {
++  int reg = instr->RjValue();
++  PrintRegister(reg);
++}
++
++void Decoder::PrintRk(Instruction* instr) {
++  int reg = instr->RkValue();
++  PrintRegister(reg);
++}
++
++void Decoder::PrintRd(Instruction* instr) {
++  int reg = instr->RdValue();
++  PrintRegister(reg);
++}
++
++// Print the FPUregister name according to the active name converter.
++void Decoder::PrintFPURegister(int freg) {
++  Print(converter_.NameOfXMMRegister(freg));
++}
++
++void Decoder::PrintFj(Instruction* instr) {
++  int freg = instr->FjValue();
++  PrintFPURegister(freg);
++}
++
++void Decoder::PrintFk(Instruction* instr) {
++  int freg = instr->FkValue();
++  PrintFPURegister(freg);
++}
++
++void Decoder::PrintFd(Instruction* instr) {
++  int freg = instr->FdValue();
++  PrintFPURegister(freg);
++}
++
++void Decoder::PrintFa(Instruction* instr) {
++  int freg = instr->FaValue();
++  PrintFPURegister(freg);
++}
++
++// Print the integer value of the sa field.
++void Decoder::PrintSa2(Instruction* instr) {
++  int sa = instr->Sa2Value();
++  uint32_t opcode = (instr->InstructionBits() >> 18) << 18;
++  if (opcode == ALSL || opcode == ALSL_D) {
++    sa += 1;
++  }
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", sa);
++}
++
++void Decoder::PrintSa3(Instruction* instr) {
++  int sa = instr->Sa3Value();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", sa);
++}
++
++void Decoder::PrintUi5(Instruction* instr) {
++  int ui = instr->Ui5Value();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui);
++}
++
++void Decoder::PrintUi6(Instruction* instr) {
++  int ui = instr->Ui6Value();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui);
++}
++
++void Decoder::PrintUi12(Instruction* instr) {
++  int ui = instr->Ui12Value();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui);
++}
++
++void Decoder::PrintXi12(Instruction* instr) {
++  int xi = instr->Ui12Value();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x", xi);
++}
++
++void Decoder::PrintMsbd(Instruction* instr) {
++  int msbd = instr->MsbdValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", msbd);
++}
++
++void Decoder::PrintLsbd(Instruction* instr) {
++  int lsbd = instr->LsbdValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", lsbd);
++}
++
++void Decoder::PrintMsbw(Instruction* instr) {
++  int msbw = instr->MsbwValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", msbw);
++}
++
++void Decoder::PrintLsbw(Instruction* instr) {
++  int lsbw = instr->LsbwValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", lsbw);
++}
++
++void Decoder::PrintSi12(Instruction* instr) {
++  int si = ((instr->Si12Value()) << (32 - kSi12Bits)) >> (32 - kSi12Bits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si);
++}
++
++void Decoder::PrintSi14(Instruction* instr) {
++  int si = ((instr->Si14Value()) << (32 - kSi14Bits)) >> (32 - kSi14Bits);
++  si <<= 2;
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si);
++}
++
++void Decoder::PrintSi16(Instruction* instr) {
++  int si = ((instr->Si16Value()) << (32 - kSi16Bits)) >> (32 - kSi16Bits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si);
++}
++
++void Decoder::PrintSi20(Instruction* instr) {
++  int si = ((instr->Si20Value()) << (32 - kSi20Bits)) >> (32 - kSi20Bits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si);
++}
++
++void Decoder::PrintCj(Instruction* instr) {
++  int cj = instr->CjValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", cj);
++}
++
++void Decoder::PrintCd(Instruction* instr) {
++  int cd = instr->CdValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", cd);
++}
++
++void Decoder::PrintCa(Instruction* instr) {
++  int ca = instr->CaValue();
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ca);
++}
++
++void Decoder::PrintCode(Instruction* instr) {
++  int code = instr->CodeValue();
++  out_buffer_pos_ +=
++      SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", code, code);
++}
++
++void Decoder::PrintHint5(Instruction* instr) {
++  int hint = instr->Hint5Value();
++  out_buffer_pos_ +=
++      SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", hint, hint);
++}
++
++void Decoder::PrintHint15(Instruction* instr) {
++  int hint = instr->Hint15Value();
++  out_buffer_pos_ +=
++      SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", hint, hint);
++}
++
++void Decoder::PrintPCOffs16(Instruction* instr) {
++  int n_bits = 2;
++  int offs = instr->Offs16Value();
++  int target = ((offs << n_bits) << (32 - kOffsLowBits - n_bits)) >>
++               (32 - kOffsLowBits - n_bits);
++  out_buffer_pos_ += SNPrintF(
++      out_buffer_ + out_buffer_pos_, "%s",
++      converter_.NameOfAddress(reinterpret_cast<byte*>(instr) + target));
++}
++
++void Decoder::PrintPCOffs21(Instruction* instr) {
++  int n_bits = 2;
++  int offs = instr->Offs21Value();
++  int target =
++      ((offs << n_bits) << (32 - kOffsLowBits - kOffs21HighBits - n_bits)) >>
++      (32 - kOffsLowBits - kOffs21HighBits - n_bits);
++  out_buffer_pos_ += SNPrintF(
++      out_buffer_ + out_buffer_pos_, "%s",
++      converter_.NameOfAddress(reinterpret_cast<byte*>(instr) + target));
++}
++
++void Decoder::PrintPCOffs26(Instruction* instr) {
++  int n_bits = 2;
++  int offs = instr->Offs26Value();
++  int target =
++      ((offs << n_bits) << (32 - kOffsLowBits - kOffs26HighBits - n_bits)) >>
++      (32 - kOffsLowBits - kOffs26HighBits - n_bits);
++  out_buffer_pos_ += SNPrintF(
++      out_buffer_ + out_buffer_pos_, "%s",
++      converter_.NameOfAddress(reinterpret_cast<byte*>(instr) + target));
++}
++
++void Decoder::PrintOffs16(Instruction* instr) {
++  int offs = instr->Offs16Value();
++  offs <<= (32 - kOffsLowBits);
++  offs >>= (32 - kOffsLowBits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs);
++}
++
++void Decoder::PrintOffs21(Instruction* instr) {
++  int offs = instr->Offs21Value();
++  offs <<= (32 - kOffsLowBits - kOffs21HighBits);
++  offs >>= (32 - kOffsLowBits - kOffs21HighBits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs);
++}
++
++void Decoder::PrintOffs26(Instruction* instr) {
++  int offs = instr->Offs26Value();
++  offs <<= (32 - kOffsLowBits - kOffs26HighBits);
++  offs >>= (32 - kOffsLowBits - kOffs26HighBits);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs);
++}
++
++// Handle all register based formatting in this function to reduce the
++// complexity of FormatOption.
++int Decoder::FormatRegister(Instruction* instr, const char* format) {
++  DCHECK_EQ(format[0], 'r');
++  if (format[1] == 'j') {  // 'rj: Rj register.
++    int reg = instr->RjValue();
++    PrintRegister(reg);
++    return 2;
++  } else if (format[1] == 'k') {  // 'rk: rk register.
++    int reg = instr->RkValue();
++    PrintRegister(reg);
++    return 2;
++  } else if (format[1] == 'd') {  // 'rd: rd register.
++    int reg = instr->RdValue();
++    PrintRegister(reg);
++    return 2;
++  }
++  UNREACHABLE();
++  return 0;
++}
++
++// Handle all FPUregister based formatting in this function to reduce the
++// complexity of FormatOption.
++int Decoder::FormatFPURegister(Instruction* instr, const char* format) {
++  DCHECK_EQ(format[0], 'f');
++  if (format[1] == 'j') {  // 'fj: fj register.
++    int reg = instr->FjValue();
++    PrintFPURegister(reg);
++    return 2;
++  } else if (format[1] == 'k') {  // 'fk: fk register.
++    int reg = instr->FkValue();
++    PrintFPURegister(reg);
++    return 2;
++  } else if (format[1] == 'd') {  // 'fd: fd register.
++    int reg = instr->FdValue();
++    PrintFPURegister(reg);
++    return 2;
++  } else if (format[1] == 'a') {  // 'fa: fa register.
++    int reg = instr->FaValue();
++    PrintFPURegister(reg);
++    return 2;
++  }
++  UNREACHABLE();
++  return 0;
++}
++
++// FormatOption takes a formatting string and interprets it based on
++// the current instructions. The format string points to the first
++// character of the option string (the option escape has already been
++// consumed by the caller.)  FormatOption returns the number of
++// characters that were consumed from the formatting string.
++int Decoder::FormatOption(Instruction* instr, const char* format) {
++  switch (format[0]) {
++    case 'c': {
++      switch (format[1]) {
++        case 'a':
++          DCHECK(STRING_STARTS_WITH(format, "ca"));
++          PrintCa(instr);
++          return 2;
++        case 'd':
++          DCHECK(STRING_STARTS_WITH(format, "cd"));
++          PrintCd(instr);
++          return 2;
++        case 'j':
++          DCHECK(STRING_STARTS_WITH(format, "cj"));
++          PrintCj(instr);
++          return 2;
++        case 'o':
++          DCHECK(STRING_STARTS_WITH(format, "code"));
++          PrintCode(instr);
++          return 4;
++      }
++    }
++    case 'f': {
++      return FormatFPURegister(instr, format);
++    }
++    case 'h': {
++      if (format[4] == '5') {
++        DCHECK(STRING_STARTS_WITH(format, "hint5"));
++        PrintHint5(instr);
++        return 5;
++      } else if (format[4] == '1') {
++        DCHECK(STRING_STARTS_WITH(format, "hint15"));
++        PrintHint15(instr);
++        return 6;
++      }
++      break;
++    }
++    case 'l': {
++      switch (format[3]) {
++        case 'w':
++          DCHECK(STRING_STARTS_WITH(format, "lsbw"));
++          PrintLsbw(instr);
++          return 4;
++        case 'd':
++          DCHECK(STRING_STARTS_WITH(format, "lsbd"));
++          PrintLsbd(instr);
++          return 4;
++        default:
++          return 0;
++      }
++    }
++    case 'm': {
++      if (format[3] == 'w') {
++        DCHECK(STRING_STARTS_WITH(format, "msbw"));
++        PrintMsbw(instr);
++      } else if (format[3] == 'd') {
++        DCHECK(STRING_STARTS_WITH(format, "msbd"));
++        PrintMsbd(instr);
++      }
++      return 4;
++    }
++    case 'o': {
++      if (format[1] == 'f') {
++        if (format[4] == '1') {
++          DCHECK(STRING_STARTS_WITH(format, "offs16"));
++          PrintOffs16(instr);
++          return 6;
++        } else if (format[4] == '2') {
++          if (format[5] == '1') {
++            DCHECK(STRING_STARTS_WITH(format, "offs21"));
++            PrintOffs21(instr);
++            return 6;
++          } else if (format[5] == '6') {
++            DCHECK(STRING_STARTS_WITH(format, "offs26"));
++            PrintOffs26(instr);
++            return 6;
++          }
++        }
++      }
++      break;
++    }
++    case 'p': {
++      if (format[6] == '1') {
++        DCHECK(STRING_STARTS_WITH(format, "pcoffs16"));
++        PrintPCOffs16(instr);
++        return 8;
++      } else if (format[6] == '2') {
++        if (format[7] == '1') {
++          DCHECK(STRING_STARTS_WITH(format, "pcoffs21"));
++          PrintPCOffs21(instr);
++          return 8;
++        } else if (format[7] == '6') {
++          DCHECK(STRING_STARTS_WITH(format, "pcoffs26"));
++          PrintPCOffs26(instr);
++          return 8;
++        }
++      }
++      break;
++    }
++    case 'r': {
++      return FormatRegister(instr, format);
++      break;
++    }
++    case 's': {
++      switch (format[1]) {
++        case 'a':
++          if (format[2] == '2') {
++            DCHECK(STRING_STARTS_WITH(format, "sa2"));
++            PrintSa2(instr);
++          } else if (format[2] == '3') {
++            DCHECK(STRING_STARTS_WITH(format, "sa3"));
++            PrintSa3(instr);
++          }
++          return 3;
++        case 'i':
++          if (format[2] == '2') {
++            DCHECK(STRING_STARTS_WITH(format, "si20"));
++            PrintSi20(instr);
++            return 4;
++          } else if (format[2] == '1') {
++            switch (format[3]) {
++              case '2':
++                DCHECK(STRING_STARTS_WITH(format, "si12"));
++                PrintSi12(instr);
++                return 4;
++              case '4':
++                DCHECK(STRING_STARTS_WITH(format, "si14"));
++                PrintSi14(instr);
++                return 4;
++              case '6':
++                DCHECK(STRING_STARTS_WITH(format, "si16"));
++                PrintSi16(instr);
++                return 4;
++              default:
++                break;
++            }
++          }
++          break;
++        default:
++          break;
++      }
++      break;
++    }
++    case 'u': {
++      if (format[2] == '5') {
++        DCHECK(STRING_STARTS_WITH(format, "ui5"));
++        PrintUi5(instr);
++        return 3;
++      } else if (format[2] == '6') {
++        DCHECK(STRING_STARTS_WITH(format, "ui6"));
++        PrintUi6(instr);
++        return 3;
++      } else if (format[2] == '1') {
++        DCHECK(STRING_STARTS_WITH(format, "ui12"));
++        PrintUi12(instr);
++        return 4;
++      }
++      break;
++    }
++    case 'x': {
++      DCHECK(STRING_STARTS_WITH(format, "xi12"));
++      PrintXi12(instr);
++      return 4;
++    }
++    default:
++      UNREACHABLE();
++  }
++  return 0;
++}
++
++// Format takes a formatting string for a whole instruction and prints it into
++// the output buffer. All escaped options are handed to FormatOption to be
++// parsed further.
++void Decoder::Format(Instruction* instr, const char* format) {
++  char cur = *format++;
++  while ((cur != 0) && (out_buffer_pos_ < (out_buffer_.length() - 1))) {
++    if (cur == '\'') {  // Single quote is used as the formatting escape.
++      format += FormatOption(instr, format);
++    } else {
++      out_buffer_[out_buffer_pos_++] = cur;
++    }
++    cur = *format++;
++  }
++  out_buffer_[out_buffer_pos_] = '\0';
++}
++
++// For currently unimplemented decodings the disassembler calls Unknown(instr)
++// which will just print "unknown" of the instruction bits.
++void Decoder::Unknown(Instruction* instr) { Format(instr, "unknown"); }
++
++int Decoder::DecodeBreakInstr(Instruction* instr) {
++  // This is already known to be BREAK instr, just extract the code.
++  /*if (instr->Bits(14, 0) == static_cast<int>(kMaxStopCode)) {
++    // This is stop(msg).
++    Format(instr, "break, code: 'code");
++    out_buffer_pos_ += SNPrintF(
++        out_buffer_ + out_buffer_pos_, "\n%p       %08" PRIx64,
++        static_cast<void*>(reinterpret_cast<int32_t*>(instr + kInstrSize)),
++        reinterpret_cast<uint64_t>(
++            *reinterpret_cast<char**>(instr + kInstrSize)));
++    // Size 3: the break_ instr, plus embedded 64-bit char pointer.
++    return 3 * kInstrSize;
++  } else {
++    Format(instr, "break, code: 'code");
++    return kInstrSize;
++  }*/
++  Format(instr, "break    code: 'code");
++  return kInstrSize;
++}  //===================================================
++
++void Decoder::DecodeTypekOp6(Instruction* instr) {
++  switch (instr->Bits(31, 26) << 26) {
++    case ADDU16I_D:
++      Format(instr, "addu16i.d     'rd, 'rj, 'si16");
++      break;
++    case BEQZ:
++      Format(instr, "beqz     'rj, 'offs21 -> 'pcoffs21");
++      break;
++    case BNEZ:
++      Format(instr, "bnez     'rj, 'offs21 -> 'pcoffs21");
++      break;
++    case BCZ:
++      if (instr->Bit(8))
++        Format(instr, "bcnez     fcc'cj, 'offs21 -> 'pcoffs21");
++      else
++        Format(instr, "bceqz     fcc'cj, 'offs21 -> 'pcoffs21");
++      break;
++    case JIRL:
++      Format(instr, "jirl     'rd, 'rj, 'offs16");
++      break;
++    case B:
++      Format(instr, "b     'offs26 -> 'pcoffs26");
++      break;
++    case BL:
++      Format(instr, "bl     'offs26 -> 'pcoffs26");
++      break;
++    case BEQ:
++      Format(instr, "beq     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    case BNE:
++      Format(instr, "bne     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    case BLT:
++      Format(instr, "blt     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    case BGE:
++      Format(instr, "bge     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    case BLTU:
++      Format(instr, "bltu     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    case BGEU:
++      Format(instr, "bgeu     'rj, 'rd, 'offs16 -> 'pcoffs16");
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Decoder::DecodeTypekOp7(Instruction* instr) {
++  switch (instr->Bits(31, 25) << 25) {
++    case LU12I_W:
++      Format(instr, "lu12i.w     'rd, 'si20");
++      break;
++    case LU32I_D:
++      Format(instr, "lu32i.d     'rd, 'si20");
++      break;
++    case PCADDI:
++      Format(instr, "pcaddi     'rd, 'si20");
++      break;
++    case PCALAU12I:
++      Format(instr, "pcalau12i     'rd, 'si20");
++      break;
++    case PCADDU12I:
++      Format(instr, "pcaddu12i     'rd, 'si20");
++      break;
++    case PCADDU18I:
++      Format(instr, "pcaddu18i     'rd, 'si20");
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Decoder::DecodeTypekOp8(Instruction* instr) {
++  switch (instr->Bits(31, 24) << 24) {
++    case LDPTR_W:
++      Format(instr, "ldptr.w     'rd, 'rj, 'si14");
++      break;
++    case STPTR_W:
++      Format(instr, "stptr.w     'rd, 'rj, 'si14");
++      break;
++    case LDPTR_D:
++      Format(instr, "ldptr.d     'rd, 'rj, 'si14");
++      break;
++    case STPTR_D:
++      Format(instr, "stptr.d     'rd, 'rj, 'si14");
++      break;
++    case LL_W:
++      Format(instr, "ll.w     'rd, 'rj, 'si14");
++      break;
++    case SC_W:
++      Format(instr, "sc.w     'rd, 'rj, 'si14");
++      break;
++    case LL_D:
++      Format(instr, "ll.d     'rd, 'rj, 'si14");
++      break;
++    case SC_D:
++      Format(instr, "sc.d     'rd, 'rj, 'si14");
++      break;
++    case CSR:
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Decoder::DecodeTypekOp10(Instruction* instr) {
++  switch (instr->Bits(31, 22) << 22) {
++    case BSTR_W: {
++      if (instr->Bit(21) != 0) {
++        if (instr->Bit(15) == 0) {
++          Format(instr, "bstrins.w     'rd, 'rj, 'msbw, 'lsbw");
++        } else {
++          Format(instr, "bstrpick.w     'rd, 'rj, 'msbw, 'lsbw");
++        }
++      }
++      break;
++    }
++    case BSTRINS_D:
++      Format(instr, "bstrins.d     'rd, 'rj, 'msbd, 'lsbd");
++      break;
++    case BSTRPICK_D:
++      Format(instr, "bstrpick.d     'rd, 'rj, 'msbd, 'lsbd");
++      break;
++    case SLTI:
++      Format(instr, "slti     'rd, 'rj, 'si12");
++      break;
++    case SLTUI:
++      Format(instr, "sltui     'rd, 'rj, 'si12");
++      break;
++    case ADDI_W:
++      Format(instr, "addi.w     'rd, 'rj, 'si12");
++      break;
++    case ADDI_D:
++      Format(instr, "addi.d     'rd, 'rj, 'si12");
++      break;
++    case LU52I_D:
++      Format(instr, "lu52i.d     'rd, 'rj, 'si12");
++      break;
++    case ANDI:
++      Format(instr, "andi     'rd, 'rj, 'xi12");
++      break;
++    case ORI:
++      Format(instr, "ori     'rd, 'rj, 'xi12");
++      break;
++    case XORI:
++      Format(instr, "xori     'rd, 'rj, 'xi12");
++      break;
++    case LD_B:
++      Format(instr, "ld.b     'rd, 'rj, 'si12");
++      break;
++    case LD_H:
++      Format(instr, "ld.h     'rd, 'rj, 'si12");
++      break;
++    case LD_W:
++      Format(instr, "ld.w     'rd, 'rj, 'si12");
++      break;
++    case LD_D:
++      Format(instr, "ld.d     'rd, 'rj, 'si12");
++      break;
++    case ST_B:
++      Format(instr, "st.b     'rd, 'rj, 'si12");
++      break;
++    case ST_H:
++      Format(instr, "st.h     'rd, 'rj, 'si12");
++      break;
++    case ST_W:
++      Format(instr, "st.w     'rd, 'rj, 'si12");
++      break;
++    case ST_D:
++      Format(instr, "st.d     'rd, 'rj, 'si12");
++      break;
++    case LD_BU:
++      Format(instr, "ld.bu     'rd, 'rj, 'si12");
++      break;
++    case LD_HU:
++      Format(instr, "ld.hu     'rd, 'rj, 'si12");
++      break;
++    case LD_WU:
++      Format(instr, "ld.wu     'rd, 'rj, 'si12");
++      break;
++    case PRELD:
++      Format(instr, "preld     'hint5, 'rj, 'si12");
++      break;
++    case FLD_S:
++      Format(instr, "fld.s     'fd, 'rj, 'si12");
++      break;
++    case FST_S:
++      Format(instr, "fst.s     'fd, 'rj, 'si12");
++      break;
++    case FLD_D:
++      Format(instr, "fld.d     'fd, 'rj, 'si12");
++      break;
++    case FST_D:
++      Format(instr, "fst.d     'fd, 'rj, 'si12");
++      break;
++    case CACHE:
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Decoder::DecodeTypekOp12(Instruction* instr) {
++  switch (instr->Bits(31, 20) << 20) {
++    case FMADD_S:
++      Format(instr, "fmadd.s     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FMADD_D:
++      Format(instr, "fmadd.d     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FMSUB_S:
++      Format(instr, "fmsub.s     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FMSUB_D:
++      Format(instr, "fmsub.d     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FNMADD_S:
++      Format(instr, "fnmadd.s     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FNMADD_D:
++      Format(instr, "fnmadd.d     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FNMSUB_S:
++      Format(instr, "fnmsub.s     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FNMSUB_D:
++      Format(instr, "fnmsub.d     'fd, 'fj, 'fk, 'fa");
++      break;
++    case FCMP_COND_S:
++      switch (instr->Bits(19, 15)) {
++        case CAF:
++          Format(instr, "fcmp.caf.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SAF:
++          Format(instr, "fcmp.saf.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CLT:
++          Format(instr, "fcmp.clt.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CEQ:
++          Format(instr, "fcmp.ceq.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SEQ:
++          Format(instr, "fcmp.seq.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CLE:
++          Format(instr, "fcmp.cle.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SLE:
++          Format(instr, "fcmp.sle.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CUN:
++          Format(instr, "fcmp.cun.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SUN:
++          Format(instr, "fcmp.sun.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CULT:
++          Format(instr, "fcmp.cult.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SULT:
++          Format(instr, "fcmp.sult.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CUEQ:
++          Format(instr, "fcmp.cueq.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SUEQ:
++          Format(instr, "fcmp.sueq.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CULE:
++          Format(instr, "fcmp.cule.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SULE:
++          Format(instr, "fcmp.sule.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CNE:
++          Format(instr, "fcmp.cne.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SNE:
++          Format(instr, "fcmp.sne.s     fcc'cd, 'fj, 'fk");
++          break;
++        case COR:
++          Format(instr, "fcmp.cor.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SOR:
++          Format(instr, "fcmp.sor.s     fcc'cd, 'fj, 'fk");
++          break;
++        case CUNE:
++          Format(instr, "fcmp.cune.s     fcc'cd, 'fj, 'fk");
++          break;
++        case SUNE:
++          Format(instr, "fcmp.sune.s     fcc'cd, 'fj, 'fk");
++          break;
++        default:
++          UNREACHABLE();
++      }
++      break;
++    case FCMP_COND_D:
++      switch (instr->Bits(19, 15)) {
++        case CAF:
++          Format(instr, "fcmp.caf.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SAF:
++          Format(instr, "fcmp.saf.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CLT:
++          Format(instr, "fcmp.clt.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CEQ:
++          Format(instr, "fcmp.ceq.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SEQ:
++          Format(instr, "fcmp.seq.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CLE:
++          Format(instr, "fcmp.cle.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SLE:
++          Format(instr, "fcmp.sle.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CUN:
++          Format(instr, "fcmp.cun.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SUN:
++          Format(instr, "fcmp.sun.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CULT:
++          Format(instr, "fcmp.cult.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SULT:
++          Format(instr, "fcmp.sult.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CUEQ:
++          Format(instr, "fcmp.cueq.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SUEQ:
++          Format(instr, "fcmp.sueq.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CULE:
++          Format(instr, "fcmp.cule.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SULE:
++          Format(instr, "fcmp.sule.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CNE:
++          Format(instr, "fcmp.cne.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SNE:
++          Format(instr, "fcmp.sne.d     fcc'cd, 'fj, 'fk");
++          break;
++        case COR:
++          Format(instr, "fcmp.cor.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SOR:
++          Format(instr, "fcmp.sor.d     fcc'cd, 'fj, 'fk");
++          break;
++        case CUNE:
++          Format(instr, "fcmp.cune.d     fcc'cd, 'fj, 'fk");
++          break;
++        case SUNE:
++          Format(instr, "fcmp.sune.d     fcc'cd, 'fj, 'fk");
++          break;
++        default:
++          UNREACHABLE();
++      }
++      break;
++    case FSEL:
++      Format(instr, "fsel     'fd, 'fj, 'fk, fcc'ca");
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Decoder::DecodeTypekOp14(Instruction* instr) {
++  switch (instr->Bits(31, 18) << 18) {
++    case ALSL:
++      if (instr->Bit(17))
++        Format(instr, "alsl.wu     'rd, 'rj, 'rk, 'sa2");
++      else
++        Format(instr, "alsl.w     'rd, 'rj, 'rk, 'sa2");
++      break;
++    case BYTEPICK_W:
++      Format(instr, "bytepick.w     'rd, 'rj, 'rk, 'sa2");
++      break;
++    case BYTEPICK_D:
++      Format(instr, "bytepick.d     'rd, 'rj, 'rk, 'sa3");
++      break;
++    case ALSL_D:
++      Format(instr, "alsl.d     'rd, 'rj, 'rk, 'sa2");
++      break;
++    case SLLI:
++      if (instr->Bit(16))
++        Format(instr, "slli.d     'rd, 'rj, 'ui6");
++      else
++        Format(instr, "slli.w     'rd, 'rj, 'ui5");
++      break;
++    case SRLI:
++      if (instr->Bit(16))
++        Format(instr, "srli.d     'rd, 'rj, 'ui6");
++      else
++        Format(instr, "srli.w     'rd, 'rj, 'ui5");
++      break;
++    case SRAI:
++      if (instr->Bit(16))
++        Format(instr, "srai.d     'rd, 'rj, 'ui6");
++      else
++        Format(instr, "srai.w     'rd, 'rj, 'ui5");
++      break;
++    case ROTRI:
++      if (instr->Bit(16))
++        Format(instr, "rotri.d     'rd, 'rj, 'ui6");
++      else
++        Format(instr, "rotri.w     'rd, 'rj, 'ui5");
++      break;
++    case LDDIR:
++    case LDPTE:
++    default:
++      UNREACHABLE();
++  }
++}
++
++int Decoder::DecodeTypekOp17(Instruction* instr) {
++  switch (instr->Bits(31, 15) << 15) {
++    case ADD_W:
++      Format(instr, "add.w     'rd, 'rj, 'rk");
++      break;
++    case ADD_D:
++      Format(instr, "add.d     'rd, 'rj, 'rk");
++      break;
++    case SUB_W:
++      Format(instr, "sub.w     'rd, 'rj, 'rk");
++      break;
++    case SUB_D:
++      Format(instr, "sub.d     'rd, 'rj, 'rk");
++      break;
++    case SLT:
++      Format(instr, "slt     'rd, 'rj, 'rk");
++      break;
++    case SLTU:
++      Format(instr, "sltu     'rd, 'rj, 'rk");
++      break;
++    case MASKEQZ:
++      Format(instr, "maskeqz     'rd, 'rj, 'rk");
++      break;
++    case MASKNEZ:
++      Format(instr, "masknez     'rd, 'rj, 'rk");
++      break;
++    case NOR:
++      Format(instr, "nor     'rd, 'rj, 'rk");
++      break;
++    case AND:
++      Format(instr, "and     'rd, 'rj, 'rk");
++      break;
++    case OR:
++      Format(instr, "or     'rd, 'rj, 'rk");
++      break;
++    case XOR:
++      Format(instr, "xor     'rd, 'rj, 'rk");
++      break;
++    case ORN:
++      Format(instr, "orn     'rd, 'rj, 'rk");
++      break;
++    case ANDN:
++      Format(instr, "andn     'rd, 'rj, 'rk");
++      break;
++    case SLL_W:
++      Format(instr, "sll.w     'rd, 'rj, 'rk");
++      break;
++    case SRL_W:
++      Format(instr, "srl.w     'rd, 'rj, 'rk");
++      break;
++    case SRA_W:
++      Format(instr, "sra.w     'rd, 'rj, 'rk");
++      break;
++    case SLL_D:
++      Format(instr, "sll.d     'rd, 'rj, 'rk");
++      break;
++    case SRL_D:
++      Format(instr, "srl.d     'rd, 'rj, 'rk");
++      break;
++    case SRA_D:
++      Format(instr, "sra.d     'rd, 'rj, 'rk");
++      break;
++    case ROTR_D:
++      Format(instr, "rotr.d     'rd, 'rj, 'rk");
++      break;
++    case ROTR_W:
++      Format(instr, "rotr.w     'rd, 'rj, 'rk");
++      break;
++    case MUL_W:
++      Format(instr, "mul.w     'rd, 'rj, 'rk");
++      break;
++    case MULH_W:
++      Format(instr, "mulh.w     'rd, 'rj, 'rk");
++      break;
++    case MULH_WU:
++      Format(instr, "mulh.wu     'rd, 'rj, 'rk");
++      break;
++    case MUL_D:
++      Format(instr, "mul.d     'rd, 'rj, 'rk");
++      break;
++    case MULH_D:
++      Format(instr, "mulh.d     'rd, 'rj, 'rk");
++      break;
++    case MULH_DU:
++      Format(instr, "mulh.du     'rd, 'rj, 'rk");
++      break;
++    case MULW_D_W:
++      Format(instr, "mulw.d.w     'rd, 'rj, 'rk");
++      break;
++    case MULW_D_WU:
++      Format(instr, "mulw.d.wu     'rd, 'rj, 'rk");
++      break;
++    case DIV_W:
++      Format(instr, "div.w     'rd, 'rj, 'rk");
++      break;
++    case MOD_W:
++      Format(instr, "mod.w     'rd, 'rj, 'rk");
++      break;
++    case DIV_WU:
++      Format(instr, "div.wu     'rd, 'rj, 'rk");
++      break;
++    case MOD_WU:
++      Format(instr, "mod.wu     'rd, 'rj, 'rk");
++      break;
++    case DIV_D:
++      Format(instr, "div.d     'rd, 'rj, 'rk");
++      break;
++    case MOD_D:
++      Format(instr, "mod.d     'rd, 'rj, 'rk");
++      break;
++    case DIV_DU:
++      Format(instr, "div.du     'rd, 'rj, 'rk");
++      break;
++    case MOD_DU:
++      Format(instr, "mod.du     'rd, 'rj, 'rk");
++      break;
++    case BREAK:
++      return DecodeBreakInstr(instr);
++    case FADD_S:
++      Format(instr, "fadd.s     'fd, 'fj, 'fk");
++      break;
++    case FADD_D:
++      Format(instr, "fadd.d     'fd, 'fj, 'fk");
++      break;
++    case FSUB_S:
++      Format(instr, "fsub.s     'fd, 'fj, 'fk");
++      break;
++    case FSUB_D:
++      Format(instr, "fsub.d     'fd, 'fj, 'fk");
++      break;
++    case FMUL_S:
++      Format(instr, "fmul.s     'fd, 'fj, 'fk");
++      break;
++    case FMUL_D:
++      Format(instr, "fmul.d     'fd, 'fj, 'fk");
++      break;
++    case FDIV_S:
++      Format(instr, "fdiv.s     'fd, 'fj, 'fk");
++      break;
++    case FDIV_D:
++      Format(instr, "fdiv.d     'fd, 'fj, 'fk");
++      break;
++    case FMAX_S:
++      Format(instr, "fmax.s     'fd, 'fj, 'fk");
++      break;
++    case FMAX_D:
++      Format(instr, "fmax.d     'fd, 'fj, 'fk");
++      break;
++    case FMIN_S:
++      Format(instr, "fmin.s     'fd, 'fj, 'fk");
++      break;
++    case FMIN_D:
++      Format(instr, "fmin.d     'fd, 'fj, 'fk");
++      break;
++    case FMAXA_S:
++      Format(instr, "fmaxa.s     'fd, 'fj, 'fk");
++      break;
++    case FMAXA_D:
++      Format(instr, "fmaxa.d     'fd, 'fj, 'fk");
++      break;
++    case FMINA_S:
++      Format(instr, "fmina.s     'fd, 'fj, 'fk");
++      break;
++    case FMINA_D:
++      Format(instr, "fmina.d     'fd, 'fj, 'fk");
++      break;
++    case LDX_B:
++      Format(instr, "ldx.b     'rd, 'rj, 'rk");
++      break;
++    case LDX_H:
++      Format(instr, "ldx.h     'rd, 'rj, 'rk");
++      break;
++    case LDX_W:
++      Format(instr, "ldx.w     'rd, 'rj, 'rk");
++      break;
++    case LDX_D:
++      Format(instr, "ldx.d     'rd, 'rj, 'rk");
++      break;
++    case STX_B:
++      Format(instr, "stx.b     'rd, 'rj, 'rk");
++      break;
++    case STX_H:
++      Format(instr, "stx.h     'rd, 'rj, 'rk");
++      break;
++    case STX_W:
++      Format(instr, "stx.w     'rd, 'rj, 'rk");
++      break;
++    case STX_D:
++      Format(instr, "stx.d     'rd, 'rj, 'rk");
++      break;
++    case LDX_BU:
++      Format(instr, "ldx.bu     'rd, 'rj, 'rk");
++      break;
++    case LDX_HU:
++      Format(instr, "ldx.hu     'rd, 'rj, 'rk");
++      break;
++    case LDX_WU:
++      Format(instr, "ldx.wu     'rd, 'rj, 'rk");
++      break;
++    case PRELDX:
++      Format(instr, "preldx     'hint5, 'rj, 'rk");
++      break;
++    case FLDX_S:
++      Format(instr, "fldx.s     'fd, 'rj, 'rk");
++      break;
++    case FLDX_D:
++      Format(instr, "fldx.d     'fd, 'rj, 'rk");
++      break;
++    case FSTX_S:
++      Format(instr, "fstx.s     'fd, 'rj, 'rk");
++      break;
++    case FSTX_D:
++      Format(instr, "fstx.d     'fd, 'rj, 'rk");
++      break;
++    case ASRTLE_D:
++      Format(instr, "asrtle.d     'rj, 'rk");
++      break;
++    case ASRTGT_D:
++      Format(instr, "asrtgt.d     'rj, 'rk");
++      break;
++    case SYSCALL:
++      Format(instr, "syscall     code 'code");
++      break;
++    case HYPCALL:
++      Format(instr, "hypcall     code 'code");
++      break;
++    case AMSWAP_W:
++      Format(instr, "amswap.w     'rd, 'rk, 'rj");
++      break;
++    case AMSWAP_D:
++      Format(instr, "amswap.d     'rd, 'rk, 'rj");
++      break;
++    case AMADD_W:
++      Format(instr, "amadd.w     'rd, 'rk, 'rj");
++      break;
++    case AMADD_D:
++      Format(instr, "amadd.d     'rd, 'rk, 'rj");
++      break;
++    case AMAND_W:
++      Format(instr, "amand.w     'rd, 'rk, 'rj");
++      break;
++    case AMAND_D:
++      Format(instr, "amand.d     'rd, 'rk, 'rj");
++      break;
++    case AMOR_W:
++      Format(instr, "amor.w     'rd, 'rk, 'rj");
++      break;
++    case AMOR_D:
++      Format(instr, "amor.d     'rd, 'rk, 'rj");
++      break;
++    case AMXOR_W:
++      Format(instr, "amxor.w     'rd, 'rk, 'rj");
++      break;
++    case AMXOR_D:
++      Format(instr, "amxor.d     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_W:
++      Format(instr, "ammax.w     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_D:
++      Format(instr, "ammax.d     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_W:
++      Format(instr, "ammin.w     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_D:
++      Format(instr, "ammin.d     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_WU:
++      Format(instr, "ammax.wu     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_DU:
++      Format(instr, "ammax.du     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_WU:
++      Format(instr, "ammin.wu     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_DU:
++      Format(instr, "ammin.du     'rd, 'rk, 'rj");
++      break;
++    case AMSWAP_DB_W:
++      Format(instr, "amswap_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMSWAP_DB_D:
++      Format(instr, "amswap_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMADD_DB_W:
++      Format(instr, "amadd_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMADD_DB_D:
++      Format(instr, "amadd_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMAND_DB_W:
++      Format(instr, "amand_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMAND_DB_D:
++      Format(instr, "amand_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMOR_DB_W:
++      Format(instr, "amor_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMOR_DB_D:
++      Format(instr, "amor_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMXOR_DB_W:
++      Format(instr, "amxor_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMXOR_DB_D:
++      Format(instr, "amxor_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_DB_W:
++      Format(instr, "ammax_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_DB_D:
++      Format(instr, "ammax_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_DB_W:
++      Format(instr, "ammin_db.w     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_DB_D:
++      Format(instr, "ammin_db.d     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_DB_WU:
++      Format(instr, "ammax_db.wu     'rd, 'rk, 'rj");
++      break;
++    case AMMAX_DB_DU:
++      Format(instr, "ammax_db.du     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_DB_WU:
++      Format(instr, "ammin_db.wu     'rd, 'rk, 'rj");
++      break;
++    case AMMIN_DB_DU:
++      Format(instr, "ammin_db.du     'rd, 'rk, 'rj");
++      break;
++    case DBAR:
++      Format(instr, "dbar     'hint15");
++      break;
++    case IBAR:
++      Format(instr, "ibar     'hint15");
++      break;
++    case FLDGT_S:
++      Format(instr, "fldgt.s     'fd, 'rj, 'rk");
++      break;
++    case FLDGT_D:
++      Format(instr, "fldgt.d     'fd, 'rj, 'rk");
++      break;
++    case FLDLE_S:
++      Format(instr, "fldle.s     'fd, 'rj, 'rk");
++      break;
++    case FLDLE_D:
++      Format(instr, "fldle.d     'fd, 'rj, 'rk");
++      break;
++    case FSTGT_S:
++      Format(instr, "fstgt.s     'fd, 'rj, 'rk");
++      break;
++    case FSTGT_D:
++      Format(instr, "fstgt.d     'fd, 'rj, 'rk");
++      break;
++    case FSTLE_S:
++      Format(instr, "fstle.s     'fd, 'rj, 'rk");
++      break;
++    case FSTLE_D:
++      Format(instr, "fstle.d     'fd, 'rj, 'rk");
++      break;
++    case LDGT_B:
++      Format(instr, "ldgt.b     'rd, 'rj, 'rk");
++      break;
++    case LDGT_H:
++      Format(instr, "ldgt.h     'rd, 'rj, 'rk");
++      break;
++    case LDGT_W:
++      Format(instr, "ldgt.w     'rd, 'rj, 'rk");
++      break;
++    case LDGT_D:
++      Format(instr, "ldgt.d     'rd, 'rj, 'rk");
++      break;
++    case LDLE_B:
++      Format(instr, "ldle.b     'rd, 'rj, 'rk");
++      break;
++    case LDLE_H:
++      Format(instr, "ldle.h     'rd, 'rj, 'rk");
++      break;
++    case LDLE_W:
++      Format(instr, "ldle.w     'rd, 'rj, 'rk");
++      break;
++    case LDLE_D:
++      Format(instr, "ldle.d     'rd, 'rj, 'rk");
++      break;
++    case STGT_B:
++      Format(instr, "stgt.b     'rd, 'rj, 'rk");
++      break;
++    case STGT_H:
++      Format(instr, "stgt.h     'rd, 'rj, 'rk");
++      break;
++    case STGT_W:
++      Format(instr, "stgt.w     'rd, 'rj, 'rk");
++      break;
++    case STGT_D:
++      Format(instr, "stgt.d     'rd, 'rj, 'rk");
++      break;
++    case STLE_B:
++      Format(instr, "stle.b     'rd, 'rj, 'rk");
++      break;
++    case STLE_H:
++      Format(instr, "stle.h     'rd, 'rj, 'rk");
++      break;
++    case STLE_W:
++      Format(instr, "stle.w     'rd, 'rj, 'rk");
++      break;
++    case STLE_D:
++      Format(instr, "stle.d     'rd, 'rj, 'rk");
++      break;
++    case FSCALEB_S:
++      Format(instr, "fscaleb.s     'fd, 'fj, 'fk");
++      break;
++    case FSCALEB_D:
++      Format(instr, "fscaleb.d     'fd, 'fj, 'fk");
++      break;
++    case FCOPYSIGN_S:
++      Format(instr, "fcopysign.s     'fd, 'fj, 'fk");
++      break;
++    case FCOPYSIGN_D:
++      Format(instr, "fcopysign.d     'fd, 'fj, 'fk");
++      break;
++    case CRC_W_B_W:
++      Format(instr, "crc.w.b.w     'rd, 'rj, 'rk");
++      break;
++    case CRC_W_H_W:
++      Format(instr, "crc.w.h.w     'rd, 'rj, 'rk");
++      break;
++    case CRC_W_W_W:
++      Format(instr, "crc.w.w.w     'rd, 'rj, 'rk");
++      break;
++    case CRC_W_D_W:
++      Format(instr, "crc.w.d.w     'rd, 'rj, 'rk");
++      break;
++    case CRCC_W_B_W:
++      Format(instr, "crcc.w.b.w     'rd, 'rj, 'rk");
++      break;
++    case CRCC_W_H_W:
++      Format(instr, "crcc.w.h.w     'rd, 'rj, 'rk");
++      break;
++    case CRCC_W_W_W:
++      Format(instr, "crcc.w.w.w     'rd, 'rj, 'rk");
++      break;
++    case CRCC_W_D_W:
++      Format(instr, "crcc.w.d.w     'rd, 'rj, 'rk");
++      break;
++    case WAIT_INVTLB:
++    case DBGCALL:
++    default:
++      UNREACHABLE();
++  }
++  return kInstrSize;
++}
++
++void Decoder::DecodeTypekOp22(Instruction* instr) {
++  switch (instr->Bits(31, 10) << 10) {
++    case CLZ_W:
++      Format(instr, "clz.w     'rd, 'rj");
++      break;
++    case CTZ_W:
++      Format(instr, "ctz.w     'rd, 'rj");
++      break;
++    case CLZ_D:
++      Format(instr, "clz.d     'rd, 'rj");
++      break;
++    case CTZ_D:
++      Format(instr, "ctz.d     'rd, 'rj");
++      break;
++    case REVB_2H:
++      Format(instr, "revb.2h     'rd, 'rj");
++      break;
++    case REVB_4H:
++      Format(instr, "revb.4h     'rd, 'rj");
++      break;
++    case REVB_2W:
++      Format(instr, "revb.2w     'rd, 'rj");
++      break;
++    case REVB_D:
++      Format(instr, "revb.d     'rd, 'rj");
++      break;
++    case REVH_2W:
++      Format(instr, "revh.2w     'rd, 'rj");
++      break;
++    case REVH_D:
++      Format(instr, "revh.d     'rd, 'rj");
++      break;
++    case BITREV_4B:
++      Format(instr, "bitrev.4b     'rd, 'rj");
++      break;
++    case BITREV_8B:
++      Format(instr, "bitrev.8b     'rd, 'rj");
++      break;
++    case BITREV_W:
++      Format(instr, "bitrev.w     'rd, 'rj");
++      break;
++    case BITREV_D:
++      Format(instr, "bitrev.d     'rd, 'rj");
++      break;
++    case EXT_W_B:
++      Format(instr, "ext.w.b     'rd, 'rj");
++      break;
++    case EXT_W_H:
++      Format(instr, "ext.w.h     'rd, 'rj");
++      break;
++    case FABS_S:
++      Format(instr, "fabs.s     'fd, 'fj");
++      break;
++    case FABS_D:
++      Format(instr, "fabs.d     'fd, 'fj");
++      break;
++    case FNEG_S:
++      Format(instr, "fneg.s     'fd, 'fj");
++      break;
++    case FNEG_D:
++      Format(instr, "fneg.d     'fd, 'fj");
++      break;
++    case FSQRT_S:
++      Format(instr, "fsqrt.s     'fd, 'fj");
++      break;
++    case FSQRT_D:
++      Format(instr, "fsqrt.d     'fd, 'fj");
++      break;
++    case FMOV_S:
++      Format(instr, "fmov.s     'fd, 'fj");
++      break;
++    case FMOV_D:
++      Format(instr, "fmov.d     'fd, 'fj");
++      break;
++    case MOVGR2FR_W:
++      Format(instr, "movgr2fr.w     'fd, 'rj");
++      break;
++    case MOVGR2FR_D:
++      Format(instr, "movgr2fr.d     'fd, 'rj");
++      break;
++    case MOVGR2FRH_W:
++      Format(instr, "movgr2frh.w     'fd, 'rj");
++      break;
++    case MOVFR2GR_S:
++      Format(instr, "movfr2gr.s     'rd, 'fj");
++      break;
++    case MOVFR2GR_D:
++      Format(instr, "movfr2gr.d     'rd, 'fj");
++      break;
++    case MOVFRH2GR_S:
++      Format(instr, "movfrh2gr.s     'rd, 'fj");
++      break;
++    case MOVGR2FCSR:
++      Format(instr, "movgr2fcsr     fcsr, 'rj");
++      break;
++    case MOVFCSR2GR:
++      Format(instr, "movfcsr2gr     'rd, fcsr");
++      break;
++    case FCVT_S_D:
++      Format(instr, "fcvt.s.d     'fd, 'fj");
++      break;
++    case FCVT_D_S:
++      Format(instr, "fcvt.d.s     'fd, 'fj");
++      break;
++    case FTINTRM_W_S:
++      Format(instr, "ftintrm.w.s     'fd, 'fj");
++      break;
++    case FTINTRM_W_D:
++      Format(instr, "ftintrm.w.d     'fd, 'fj");
++      break;
++    case FTINTRM_L_S:
++      Format(instr, "ftintrm.l.s     'fd, 'fj");
++      break;
++    case FTINTRM_L_D:
++      Format(instr, "ftintrm.l.d     'fd, 'fj");
++      break;
++    case FTINTRP_W_S:
++      Format(instr, "ftintrp.w.s     'fd, 'fj");
++      break;
++    case FTINTRP_W_D:
++      Format(instr, "ftintrp.w.d     'fd, 'fj");
++      break;
++    case FTINTRP_L_S:
++      Format(instr, "ftintrp.l.s     'fd, 'fj");
++      break;
++    case FTINTRP_L_D:
++      Format(instr, "ftintrp.l.d     'fd, 'fj");
++      break;
++    case FTINTRZ_W_S:
++      Format(instr, "ftintrz.w.s     'fd, 'fj");
++      break;
++    case FTINTRZ_W_D:
++      Format(instr, "ftintrz.w.d     'fd, 'fj");
++      break;
++    case FTINTRZ_L_S:
++      Format(instr, "ftintrz.l.s     'fd, 'fj");
++      break;
++    case FTINTRZ_L_D:
++      Format(instr, "ftintrz.l.d     'fd, 'fj");
++      break;
++    case FTINTRNE_W_S:
++      Format(instr, "ftintrne.w.s     'fd, 'fj");
++      break;
++    case FTINTRNE_W_D:
++      Format(instr, "ftintrne.w.d     'fd, 'fj");
++      break;
++    case FTINTRNE_L_S:
++      Format(instr, "ftintrne.l.s     'fd, 'fj");
++      break;
++    case FTINTRNE_L_D:
++      Format(instr, "ftintrne.l.d     'fd, 'fj");
++      break;
++    case FTINT_W_S:
++      Format(instr, "ftint.w.s     'fd, 'fj");
++      break;
++    case FTINT_W_D:
++      Format(instr, "ftint.w.d     'fd, 'fj");
++      break;
++    case FTINT_L_S:
++      Format(instr, "ftint.l.s     'fd, 'fj");
++      break;
++    case FTINT_L_D:
++      Format(instr, "ftint.l.d     'fd, 'fj");
++      break;
++    case FFINT_S_W:
++      Format(instr, "ffint.s.w     'fd, 'fj");
++      break;
++    case FFINT_S_L:
++      Format(instr, "ffint.s.l     'fd, 'fj");
++      break;
++    case FFINT_D_W:
++      Format(instr, "ffint.d.w     'fd, 'fj");
++      break;
++    case FFINT_D_L:
++      Format(instr, "ffint.d.l     'fd, 'fj");
++      break;
++    case FRINT_S:
++      Format(instr, "frint.s     'fd, 'fj");
++      break;
++    case FRINT_D:
++      Format(instr, "frint.d     'fd, 'fj");
++      break;
++    case MOVFR2CF:
++      Format(instr, "movfr2cf     fcc'cd, 'fj");
++      break;
++    case MOVCF2FR:
++      Format(instr, "movcf2fr     'fd, fcc'cj");
++      break;
++    case MOVGR2CF:
++      Format(instr, "movgr2cf     fcc'cd, 'rj");
++      break;
++    case MOVCF2GR:
++      Format(instr, "movcf2gr     'rd, fcc'cj");
++      break;
++    case FRECIP_S:
++      Format(instr, "frecip.s     'fd, 'fj");
++      break;
++    case FRECIP_D:
++      Format(instr, "frecip.d     'fd, 'fj");
++      break;
++    case FRSQRT_S:
++      Format(instr, "frsqrt.s     'fd, 'fj");
++      break;
++    case FRSQRT_D:
++      Format(instr, "frsqrt.d     'fd, 'fj");
++      break;
++    case FCLASS_S:
++      Format(instr, "fclass.s     'fd, 'fj");
++      break;
++    case FCLASS_D:
++      Format(instr, "fclass.d     'fd, 'fj");
++      break;
++    case FLOGB_S:
++      Format(instr, "flogb.s     'fd, 'fj");
++      break;
++    case FLOGB_D:
++      Format(instr, "flogb.d     'fd, 'fj");
++      break;
++    case CLO_W:
++      Format(instr, "clo.w     'rd, 'rj");
++      break;
++    case CTO_W:
++      Format(instr, "cto.w     'rd, 'rj");
++      break;
++    case CLO_D:
++      Format(instr, "clo.d     'rd, 'rj");
++      break;
++    case CTO_D:
++      Format(instr, "cto.d     'rd, 'rj");
++      break;
++    case RDTIMEL_W:
++      Format(instr, "rdtimel.w     'rd, 'rj");
++      break;
++    case RDTIMEH_W:
++      Format(instr, "rdtimeh.w     'rd, 'rj");
++      break;
++    case RDTIME_D:
++      Format(instr, "rdtime.d     'rd, 'rj");
++      break;
++    // case CPUCFG:
++    //   Format(instr, "cpucfg    'rd, 'rj");
++    //   break;
++    case IOCSRRD_B:
++    case IOCSRRD_H:
++    case IOCSRRD_W:
++    case IOCSRRD_D:
++    case IOCSRWR_B:
++    case IOCSRWR_H:
++    case IOCSRWR_W:
++    case IOCSRWR_D:
++    case TLBINV:
++    case TLBFLUSH:
++    case TLBP:
++    case TLBR:
++    case TLBWI:
++    case TLBWR:
++    case ERET:
++    default:
++      UNREACHABLE();
++  }
++}
++
++int Decoder::InstructionDecode(byte* instr_ptr) {
++  Instruction* instr = Instruction::At(instr_ptr);
++  out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%08x       ",
++                              instr->InstructionBits());
++  switch (instr->InstructionType()) {
++    case Instruction::kOp6Type: {
++      DecodeTypekOp6(instr);
++      break;
++    }
++    case Instruction::kOp7Type: {
++      DecodeTypekOp7(instr);
++      break;
++    }
++    case Instruction::kOp8Type: {
++      DecodeTypekOp8(instr);
++      break;
++    }
++    case Instruction::kOp10Type: {
++      DecodeTypekOp10(instr);
++      break;
++    }
++    case Instruction::kOp12Type: {
++      DecodeTypekOp12(instr);
++      break;
++    }
++    case Instruction::kOp14Type: {
++      DecodeTypekOp14(instr);
++      break;
++    }
++    case Instruction::kOp17Type: {
++      return DecodeTypekOp17(instr);
++    }
++    case Instruction::kOp22Type: {
++      DecodeTypekOp22(instr);
++      break;
++    }
++    case Instruction::kUnsupported: {
++      Format(instr, "UNSUPPORTED");
++      break;
++    }
++    default: {
++      Format(instr, "UNSUPPORTED");
++      break;
++    }
++  }
++  return kInstrSize;
++}
++
++}  // namespace internal
++}  // namespace v8
++
++//------------------------------------------------------------------------------
++
++namespace disasm {
++
++const char* NameConverter::NameOfAddress(byte* addr) const {
++  v8::internal::SNPrintF(tmp_buffer_, "%p", static_cast<void*>(addr));
++  return tmp_buffer_.begin();
++}
++
++const char* NameConverter::NameOfConstant(byte* addr) const {
++  return NameOfAddress(addr);
++}
++
++const char* NameConverter::NameOfCPURegister(int reg) const {
++  return v8::internal::Registers::Name(reg);
++}
++
++const char* NameConverter::NameOfXMMRegister(int reg) const {
++  return v8::internal::FPURegisters::Name(reg);
++}
++
++const char* NameConverter::NameOfByteCPURegister(int reg) const {
++  UNREACHABLE();
++  return "nobytereg";
++}
++
++const char* NameConverter::NameInCode(byte* addr) const {
++  // The default name converter is called for unknown code. So we will not try
++  // to access any memory.
++  return "";
++}
++
++//------------------------------------------------------------------------------
++
++int Disassembler::InstructionDecode(v8::internal::Vector<char> buffer,
++                                    byte* instruction) {
++  v8::internal::Decoder d(converter_, buffer);
++  return d.InstructionDecode(instruction);
++}
++
++int Disassembler::ConstantPoolSizeAt(byte* instruction) { return -1; }
++
++void Disassembler::Disassemble(FILE* f, byte* begin, byte* end,
++                               UnimplementedOpcodeAction unimplemented_action) {
++  NameConverter converter;
++  Disassembler d(converter, unimplemented_action);
++  for (byte* pc = begin; pc < end;) {
++    v8::internal::EmbeddedVector<char, 128> buffer;
++    buffer[0] = '\0';
++    byte* prev_pc = pc;
++    pc += d.InstructionDecode(buffer, pc);
++    v8::internal::PrintF(f, "%p    %08x      %s\n", static_cast<void*>(prev_pc),
++                         *reinterpret_cast<int32_t*>(prev_pc), buffer.begin());
++  }
++}
++
++#undef STRING_STARTS_WITH
++
++}  // namespace disasm
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h b/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h
+index dbe78ddf2d8..cb745ef8cc0 100644
+--- a/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h
++++ b/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h
+@@ -83,6 +83,7 @@ class PerfJitLogger : public CodeEventLogger {
+   static const uint32_t kElfMachARM = 40;
+   static const uint32_t kElfMachMIPS = 8;
+   static const uint32_t kElfMachMIPS64 = 8;
++  static const uint32_t kElfMachLA64 = 258;
+   static const uint32_t kElfMachARM64 = 183;
+   static const uint32_t kElfMachS390x = 22;
+   static const uint32_t kElfMachPPC64 = 21;
+@@ -98,6 +99,8 @@ class PerfJitLogger : public CodeEventLogger {
+     return kElfMachMIPS;
+ #elif V8_TARGET_ARCH_MIPS64
+     return kElfMachMIPS64;
++#elif V8_TARGET_ARCH_LA64
++    return kElfMachLA64;
+ #elif V8_TARGET_ARCH_ARM64
+     return kElfMachARM64;
+ #elif V8_TARGET_ARCH_S390X
+diff --git a/src/3rdparty/chromium/v8/src/execution/frame-constants.h b/src/3rdparty/chromium/v8/src/execution/frame-constants.h
+index 8c3f7743192..f8508468ae4 100644
+--- a/src/3rdparty/chromium/v8/src/execution/frame-constants.h
++++ b/src/3rdparty/chromium/v8/src/execution/frame-constants.h
+@@ -389,6 +389,8 @@ inline static int FrameSlotToFPOffset(int slot) {
+ #include "src/execution/mips/frame-constants-mips.h"  // NOLINT
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/execution/mips64/frame-constants-mips64.h"  // NOLINT
++#elif V8_TARGET_ARCH_LA64
++#include "src/execution/la64/frame-constants-la64.h"  // NOLINT
+ #elif V8_TARGET_ARCH_S390
+ #include "src/execution/s390/frame-constants-s390.h"  // NOLINT
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc
+new file mode 100644
+index 00000000000..185f0abe3db
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc
+@@ -0,0 +1,32 @@
++// Copyright 2020 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/codegen/la64/assembler-la64-inl.h"
++#include "src/execution/frame-constants.h"
++#include "src/execution/frames.h"
++
++#include "src/execution/la64/frame-constants-la64.h"
++
++namespace v8 {
++namespace internal {
++
++Register JavaScriptFrame::fp_register() { return v8::internal::fp; }
++Register JavaScriptFrame::context_register() { return cp; }
++Register JavaScriptFrame::constant_pool_pointer_register() { UNREACHABLE(); }
++
++int InterpreterFrameConstants::RegisterStackSlotCount(int register_count) {
++  return register_count;
++}
++
++int BuiltinContinuationFrameConstants::PaddingSlotCount(int register_count) {
++  USE(register_count);
++  return 0;
++}
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h
+new file mode 100644
+index 00000000000..e6069a60e97
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h
+@@ -0,0 +1,75 @@
++// Copyright 2020 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_
++#define V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_
++
++#include "src/base/bits.h"
++#include "src/base/macros.h"
++#include "src/execution/frame-constants.h"
++
++namespace v8 {
++namespace internal {
++
++class EntryFrameConstants : public AllStatic {
++ public:
++  // This is the offset to where JSEntry pushes the current value of
++  // Isolate::c_entry_fp onto the stack.
++  static constexpr int kCallerFPOffset =
++      -(StandardFrameConstants::kFixedFrameSizeFromFp + kPointerSize);
++};
++
++class WasmCompileLazyFrameConstants : public TypedFrameConstants {
++ public:
++  static constexpr int kNumberOfSavedGpParamRegs = 7;
++  static constexpr int kNumberOfSavedFpParamRegs = 7;
++
++  // FP-relative.
++  static constexpr int kWasmInstanceOffset = TYPED_FRAME_PUSHED_VALUE_OFFSET(7);
++  static constexpr int kFixedFrameSizeFromFp =
++      TypedFrameConstants::kFixedFrameSizeFromFp +
++      kNumberOfSavedGpParamRegs * kPointerSize +
++      kNumberOfSavedFpParamRegs * kDoubleSize;
++};
++
++// Frame constructed by the {WasmDebugBreak} builtin.
++// After pushing the frame type marker, the builtin pushes all Liftoff cache
++// registers (see liftoff-assembler-defs.h).
++class WasmDebugBreakFrameConstants : public TypedFrameConstants {
++ public:
++  // {a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, t3, t4, t5, t6, t7, t8}
++  static constexpr uint32_t kPushedGpRegs = 0b111111111111111110000;
++  // {f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26}
++  static constexpr uint32_t kPushedFpRegs = 0b101010101010101010101010101;
++
++  static constexpr int kNumPushedGpRegisters =
++      base::bits::CountPopulation(kPushedGpRegs);
++  static constexpr int kNumPushedFpRegisters =
++      base::bits::CountPopulation(kPushedFpRegs);
++
++  static constexpr int kLastPushedGpRegisterOffset =
++      -kFixedFrameSizeFromFp - kNumPushedGpRegisters * kSystemPointerSize;
++  static constexpr int kLastPushedFpRegisterOffset =
++      kLastPushedGpRegisterOffset - kNumPushedFpRegisters * kDoubleSize;
++
++  // Offsets are fp-relative.
++  static int GetPushedGpRegisterOffset(int reg_code) {
++    DCHECK_NE(0, kPushedGpRegs & (1 << reg_code));
++    uint32_t lower_regs = kPushedGpRegs & ((uint32_t{1} << reg_code) - 1);
++    return kLastPushedGpRegisterOffset +
++           base::bits::CountPopulation(lower_regs) * kSystemPointerSize;
++  }
++
++  static int GetPushedFpRegisterOffset(int reg_code) {
++    DCHECK_NE(0, kPushedFpRegs & (1 << reg_code));
++    uint32_t lower_regs = kPushedFpRegs & ((uint32_t{1} << reg_code) - 1);
++    return kLastPushedFpRegisterOffset +
++           base::bits::CountPopulation(lower_regs) * kDoubleSize;
++  }
++};
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc
+new file mode 100644
+index 00000000000..f4bafa1d8dd
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc
+@@ -0,0 +1,5804 @@
++// Copyright 2020 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "src/execution/la64/simulator-la64.h"
++
++// Only build the simulator if not compiling for real LA64 hardware.
++#if defined(USE_SIMULATOR)
++
++#include <limits.h>
++#include <stdarg.h>
++#include <stdlib.h>
++#include <cmath>
++
++#include "src/base/bits.h"
++#include "src/codegen/assembler-inl.h"
++#include "src/codegen/la64/constants-la64.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/diagnostics/disasm.h"
++#include "src/heap/combined-heap.h"
++#include "src/runtime/runtime-utils.h"
++#include "src/utils/ostreams.h"
++#include "src/utils/vector.h"
++
++namespace v8 {
++namespace internal {
++
++DEFINE_LAZY_LEAKY_OBJECT_GETTER(Simulator::GlobalMonitor,
++                                Simulator::GlobalMonitor::Get)
++
++// #define PRINT_SIM_LOG
++
++// Util functions.
++inline bool HaveSameSign(int64_t a, int64_t b) { return ((a ^ b) >= 0); }
++
++uint32_t get_fcsr_condition_bit(uint32_t cc) {
++  if (cc == 0) {
++    return 23;
++  } else {
++    return 24 + cc;
++  }
++}
++
++static int64_t MultiplyHighSigned(int64_t u, int64_t v) {
++  uint64_t u0, v0, w0;
++  int64_t u1, v1, w1, w2, t;
++
++  u0 = u & 0xFFFFFFFFL;
++  u1 = u >> 32;
++  v0 = v & 0xFFFFFFFFL;
++  v1 = v >> 32;
++
++  w0 = u0 * v0;
++  t = u1 * v0 + (w0 >> 32);
++  w1 = t & 0xFFFFFFFFL;
++  w2 = t >> 32;
++  w1 = u0 * v1 + w1;
++
++  return u1 * v1 + w2 + (w1 >> 32);
++}
++
++static uint64_t MultiplyHighUnsigned(uint64_t u, uint64_t v) {
++  uint64_t u0, v0, w0;
++  uint64_t u1, v1, w1, w2, t;
++
++  u0 = u & 0xFFFFFFFFL;
++  u1 = u >> 32;
++  v0 = v & 0xFFFFFFFFL;
++  v1 = v >> 32;
++
++  w0 = u0 * v0;
++  t = u1 * v0 + (w0 >> 32);
++  w1 = t & 0xFFFFFFFFL;
++  w2 = t >> 32;
++  w1 = u0 * v1 + w1;
++
++  return u1 * v1 + w2 + (w1 >> 32);
++}
++
++#ifdef PRINT_SIM_LOG
++inline void printf_instr(const char* _Format, ...) {
++  va_list varList;
++  va_start(varList, _Format);
++  vprintf(_Format, varList);
++  va_end(varList);
++}
++#else
++#define printf_instr(...)
++#endif
++
++// This macro provides a platform independent use of sscanf. The reason for
++// SScanF not being implemented in a platform independent was through
++// ::v8::internal::OS in the same way as SNPrintF is that the Windows C Run-Time
++// Library does not provide vsscanf.
++#define SScanF sscanf  // NOLINT
++
++// The La64Debugger class is used by the simulator while debugging simulated
++// code.
++class La64Debugger {
++ public:
++  explicit La64Debugger(Simulator* sim) : sim_(sim) {}
++
++  void Stop(Instruction* instr);
++  void Debug();
++  // Print all registers with a nice formatting.
++  void PrintAllRegs();
++  void PrintAllRegsIncludingFPU();
++
++ private:
++  // We set the breakpoint code to 0xFFFF to easily recognize it.
++  static const Instr kBreakpointInstr = BREAK | 0xFFFF;
++  static const Instr kNopInstr = 0x0;
++
++  Simulator* sim_;
++
++  int64_t GetRegisterValue(int regnum);
++  int64_t GetFPURegisterValue(int regnum);
++  float GetFPURegisterValueFloat(int regnum);
++  double GetFPURegisterValueDouble(int regnum);
++  bool GetValue(const char* desc, int64_t* value);
++
++  // Set or delete a breakpoint. Returns true if successful.
++  bool SetBreakpoint(Instruction* breakpc);
++  bool DeleteBreakpoint(Instruction* breakpc);
++
++  // Undo and redo all breakpoints. This is needed to bracket disassembly and
++  // execution to skip past breakpoints when run from the debugger.
++  void UndoBreakpoints();
++  void RedoBreakpoints();
++};
++
++inline void UNSUPPORTED() { printf("Sim: Unsupported instruction.\n"); }
++
++void La64Debugger::Stop(Instruction* instr) {
++  // Get the stop code.
++  uint32_t code = instr->Bits(25, 6);
++  PrintF("Simulator hit (%u)\n", code);
++  Debug();
++}
++
++int64_t La64Debugger::GetRegisterValue(int regnum) {
++  if (regnum == kNumSimuRegisters) {
++    return sim_->get_pc();
++  } else {
++    return sim_->get_register(regnum);
++  }
++}
++
++int64_t La64Debugger::GetFPURegisterValue(int regnum) {
++  if (regnum == kNumFPURegisters) {
++    return sim_->get_pc();
++  } else {
++    return sim_->get_fpu_register(regnum);
++  }
++}
++
++float La64Debugger::GetFPURegisterValueFloat(int regnum) {
++  if (regnum == kNumFPURegisters) {
++    return sim_->get_pc();
++  } else {
++    return sim_->get_fpu_register_float(regnum);
++  }
++}
++
++double La64Debugger::GetFPURegisterValueDouble(int regnum) {
++  if (regnum == kNumFPURegisters) {
++    return sim_->get_pc();
++  } else {
++    return sim_->get_fpu_register_double(regnum);
++  }
++}
++
++bool La64Debugger::GetValue(const char* desc, int64_t* value) {
++  int regnum = Registers::Number(desc);
++  int fpuregnum = FPURegisters::Number(desc);
++
++  if (regnum != kInvalidRegister) {
++    *value = GetRegisterValue(regnum);
++    return true;
++  } else if (fpuregnum != kInvalidFPURegister) {
++    *value = GetFPURegisterValue(fpuregnum);
++    return true;
++  } else if (strncmp(desc, "0x", 2) == 0) {
++    return SScanF(desc + 2, "%" SCNx64, reinterpret_cast<uint64_t*>(value)) ==
++           1;
++  } else {
++    return SScanF(desc, "%" SCNu64, reinterpret_cast<uint64_t*>(value)) == 1;
++  }
++  return false;
++}
++
++bool La64Debugger::SetBreakpoint(Instruction* breakpc) {
++  // Check if a breakpoint can be set. If not return without any side-effects.
++  if (sim_->break_pc_ != nullptr) {
++    return false;
++  }
++
++  // Set the breakpoint.
++  sim_->break_pc_ = breakpc;
++  sim_->break_instr_ = breakpc->InstructionBits();
++  // Not setting the breakpoint instruction in the code itself. It will be set
++  // when the debugger shell continues.
++  return true;
++}
++
++bool La64Debugger::DeleteBreakpoint(Instruction* breakpc) {
++  if (sim_->break_pc_ != nullptr) {
++    sim_->break_pc_->SetInstructionBits(sim_->break_instr_);
++  }
++
++  sim_->break_pc_ = nullptr;
++  sim_->break_instr_ = 0;
++  return true;
++}
++
++void La64Debugger::UndoBreakpoints() {
++  if (sim_->break_pc_ != nullptr) {
++    sim_->break_pc_->SetInstructionBits(sim_->break_instr_);
++  }
++}
++
++void La64Debugger::RedoBreakpoints() {
++  if (sim_->break_pc_ != nullptr) {
++    sim_->break_pc_->SetInstructionBits(kBreakpointInstr);
++  }
++}
++
++void La64Debugger::PrintAllRegs() {
++#define REG_INFO(n) Registers::Name(n), GetRegisterValue(n), GetRegisterValue(n)
++
++  PrintF("\n");
++  // at, v0, a0.
++  PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 "\t%3s: 0x%016" PRIx64 " %14" PRId64
++         "\t%3s: 0x%016" PRIx64 " %14" PRId64 "\n",
++         REG_INFO(1), REG_INFO(2), REG_INFO(4));
++  // v1, a1.
++  PrintF("%34s\t%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++         "  %14" PRId64 " \n",
++         "", REG_INFO(3), REG_INFO(5));
++  // a2.
++  PrintF("%34s\t%34s\t%3s: 0x%016" PRIx64 "  %14" PRId64 " \n", "", "",
++         REG_INFO(6));
++  // a3.
++  PrintF("%34s\t%34s\t%3s: 0x%016" PRIx64 "  %14" PRId64 " \n", "", "",
++         REG_INFO(7));
++  PrintF("\n");
++  // a4-t3, s0-s7
++  for (int i = 0; i < 8; i++) {
++    PrintF("%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++           "  %14" PRId64 " \n",
++           REG_INFO(8 + i), REG_INFO(16 + i));
++  }
++  PrintF("\n");
++  // t8, k0, LO.
++  PrintF("%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++         "  %14" PRId64 " \t%3s: 0x%016" PRIx64 "  %14" PRId64 " \n",
++         REG_INFO(24), REG_INFO(26), REG_INFO(32));
++  // t9, k1, HI.
++  PrintF("%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++         "  %14" PRId64 " \t%3s: 0x%016" PRIx64 "  %14" PRId64 " \n",
++         REG_INFO(25), REG_INFO(27), REG_INFO(33));
++  // sp, fp, gp.
++  PrintF("%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++         "  %14" PRId64 " \t%3s: 0x%016" PRIx64 "  %14" PRId64 " \n",
++         REG_INFO(29), REG_INFO(30), REG_INFO(28));
++  // pc.
++  PrintF("%3s: 0x%016" PRIx64 "  %14" PRId64 " \t%3s: 0x%016" PRIx64
++         "  %14" PRId64 " \n",
++         REG_INFO(31), REG_INFO(34));
++
++#undef REG_INFO
++}
++
++void La64Debugger::PrintAllRegsIncludingFPU() {
++#define FPU_REG_INFO(n) \
++  FPURegisters::Name(n), GetFPURegisterValue(n), GetFPURegisterValueDouble(n)
++
++  PrintAllRegs();
++
++  PrintF("\n\n");
++  // f0, f1, f2, ... f31.
++  // TODO(plind): consider printing 2 columns for space efficiency.
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(0));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(1));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(2));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(3));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(4));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(5));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(6));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(7));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(8));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(9));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(10));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(11));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(12));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(13));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(14));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(15));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(16));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(17));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(18));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(19));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(20));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(21));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(22));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(23));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(24));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(25));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(26));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(27));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(28));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(29));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(30));
++  PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n", FPU_REG_INFO(31));
++
++#undef FPU_REG_INFO
++}
++
++void La64Debugger::Debug() {
++  intptr_t last_pc = -1;
++  bool done = false;
++
++#define COMMAND_SIZE 63
++#define ARG_SIZE 255
++
++#define STR(a) #a
++#define XSTR(a) STR(a)
++
++  char cmd[COMMAND_SIZE + 1];
++  char arg1[ARG_SIZE + 1];
++  char arg2[ARG_SIZE + 1];
++  char* argv[3] = {cmd, arg1, arg2};
++
++  // Make sure to have a proper terminating character if reaching the limit.
++  cmd[COMMAND_SIZE] = 0;
++  arg1[ARG_SIZE] = 0;
++  arg2[ARG_SIZE] = 0;
++
++  // Undo all set breakpoints while running in the debugger shell. This will
++  // make them invisible to all commands.
++  UndoBreakpoints();
++
++  while (!done && (sim_->get_pc() != Simulator::end_sim_pc)) {
++    if (last_pc != sim_->get_pc()) {
++      disasm::NameConverter converter;
++      disasm::Disassembler dasm(converter);
++      // Use a reasonably large buffer.
++      v8::internal::EmbeddedVector<char, 256> buffer;
++      dasm.InstructionDecode(buffer, reinterpret_cast<byte*>(sim_->get_pc()));
++      PrintF("  0x%016" PRIx64 "   %s\n", sim_->get_pc(), buffer.begin());
++      last_pc = sim_->get_pc();
++    }
++    char* line = ReadLine("sim> ");
++    if (line == nullptr) {
++      break;
++    } else {
++      char* last_input = sim_->last_debugger_input();
++      if (strcmp(line, "\n") == 0 && last_input != nullptr) {
++        line = last_input;
++      } else {
++        // Ownership is transferred to sim_;
++        sim_->set_last_debugger_input(line);
++      }
++      // Use sscanf to parse the individual parts of the command line. At the
++      // moment no command expects more than two parameters.
++      int argc = SScanF(line,
++                        "%" XSTR(COMMAND_SIZE) "s "
++                        "%" XSTR(ARG_SIZE) "s "
++                        "%" XSTR(ARG_SIZE) "s",
++                        cmd, arg1, arg2);
++      if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) {
++        Instruction* instr = reinterpret_cast<Instruction*>(sim_->get_pc());
++        if (!(instr->IsTrap()) ||
++            instr->InstructionBits() == rtCallRedirInstr) {
++          sim_->InstructionDecode(
++              reinterpret_cast<Instruction*>(sim_->get_pc()));
++        } else {
++          // Allow si to jump over generated breakpoints.
++          PrintF("/!\\ Jumping over generated breakpoint.\n");
++          sim_->set_pc(sim_->get_pc() + kInstrSize);
++        }
++      } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) {
++        // Execute the one instruction we broke at with breakpoints disabled.
++        sim_->InstructionDecode(reinterpret_cast<Instruction*>(sim_->get_pc()));
++        // Leave the debugger shell.
++        done = true;
++      } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) {
++        if (argc == 2) {
++          int64_t value;
++          double dvalue;
++          if (strcmp(arg1, "all") == 0) {
++            PrintAllRegs();
++          } else if (strcmp(arg1, "allf") == 0) {
++            PrintAllRegsIncludingFPU();
++          } else {
++            int regnum = Registers::Number(arg1);
++            int fpuregnum = FPURegisters::Number(arg1);
++
++            if (regnum != kInvalidRegister) {
++              value = GetRegisterValue(regnum);
++              PrintF("%s: 0x%08" PRIx64 "  %" PRId64 "  \n", arg1, value,
++                     value);
++            } else if (fpuregnum != kInvalidFPURegister) {
++              value = GetFPURegisterValue(fpuregnum);
++              dvalue = GetFPURegisterValueDouble(fpuregnum);
++              PrintF("%3s: 0x%016" PRIx64 "  %16.4e\n",
++                     FPURegisters::Name(fpuregnum), value, dvalue);
++            } else {
++              PrintF("%s unrecognized\n", arg1);
++            }
++          }
++        } else {
++          if (argc == 3) {
++            if (strcmp(arg2, "single") == 0) {
++              int64_t value;
++              float fvalue;
++              int fpuregnum = FPURegisters::Number(arg1);
++
++              if (fpuregnum != kInvalidFPURegister) {
++                value = GetFPURegisterValue(fpuregnum);
++                value &= 0xFFFFFFFFUL;
++                fvalue = GetFPURegisterValueFloat(fpuregnum);
++                PrintF("%s: 0x%08" PRIx64 "  %11.4e\n", arg1, value, fvalue);
++              } else {
++                PrintF("%s unrecognized\n", arg1);
++              }
++            } else {
++              PrintF("print <fpu register> single\n");
++            }
++          } else {
++            PrintF("print <register> or print <fpu register> single\n");
++          }
++        }
++      } else if ((strcmp(cmd, "po") == 0) ||
++                 (strcmp(cmd, "printobject") == 0)) {
++        if (argc == 2) {
++          int64_t value;
++          StdoutStream os;
++          if (GetValue(arg1, &value)) {
++            Object obj(value);
++            os << arg1 << ": \n";
++#ifdef DEBUG
++            obj.Print(os);
++            os << "\n";
++#else
++            os << Brief(obj) << "\n";
++#endif
++          } else {
++            os << arg1 << " unrecognized\n";
++          }
++        } else {
++          PrintF("printobject <value>\n");
++        }
++      } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0 ||
++                 strcmp(cmd, "dump") == 0) {
++        int64_t* cur = nullptr;
++        int64_t* end = nullptr;
++        int next_arg = 1;
++
++        if (strcmp(cmd, "stack") == 0) {
++          cur = reinterpret_cast<int64_t*>(sim_->get_register(Simulator::sp));
++        } else {  // Command "mem".
++          int64_t value;
++          if (!GetValue(arg1, &value)) {
++            PrintF("%s unrecognized\n", arg1);
++            continue;
++          }
++          cur = reinterpret_cast<int64_t*>(value);
++          next_arg++;
++        }
++
++        int64_t words;
++        if (argc == next_arg) {
++          words = 10;
++        } else {
++          if (!GetValue(argv[next_arg], &words)) {
++            words = 10;
++          }
++        }
++        end = cur + words;
++
++        bool skip_obj_print = (strcmp(cmd, "dump") == 0);
++        while (cur < end) {
++          PrintF("  0x%012" PRIxPTR " :  0x%016" PRIx64 "  %14" PRId64 " ",
++                 reinterpret_cast<intptr_t>(cur), *cur, *cur);
++          Object obj(*cur);
++          Heap* current_heap = sim_->isolate_->heap();
++          if (!skip_obj_print) {
++            if (obj.IsSmi() ||
++                IsValidHeapObject(current_heap, HeapObject::cast(obj))) {
++              PrintF(" (");
++              if (obj.IsSmi()) {
++                PrintF("smi %d", Smi::ToInt(obj));
++              } else {
++                obj.ShortPrint();
++              }
++              PrintF(")");
++            }
++          }
++          PrintF("\n");
++          cur++;
++        }
++
++      } else if ((strcmp(cmd, "disasm") == 0) || (strcmp(cmd, "dpc") == 0) ||
++                 (strcmp(cmd, "di") == 0)) {
++        disasm::NameConverter converter;
++        disasm::Disassembler dasm(converter);
++        // Use a reasonably large buffer.
++        v8::internal::EmbeddedVector<char, 256> buffer;
++
++        byte* cur = nullptr;
++        byte* end = nullptr;
++
++        if (argc == 1) {
++          cur = reinterpret_cast<byte*>(sim_->get_pc());
++          end = cur + (10 * kInstrSize);
++        } else if (argc == 2) {
++          int regnum = Registers::Number(arg1);
++          if (regnum != kInvalidRegister || strncmp(arg1, "0x", 2) == 0) {
++            // The argument is an address or a register name.
++            int64_t value;
++            if (GetValue(arg1, &value)) {
++              cur = reinterpret_cast<byte*>(value);
++              // Disassemble 10 instructions at <arg1>.
++              end = cur + (10 * kInstrSize);
++            }
++          } else {
++            // The argument is the number of instructions.
++            int64_t value;
++            if (GetValue(arg1, &value)) {
++              cur = reinterpret_cast<byte*>(sim_->get_pc());
++              // Disassemble <arg1> instructions.
++              end = cur + (value * kInstrSize);
++            }
++          }
++        } else {
++          int64_t value1;
++          int64_t value2;
++          if (GetValue(arg1, &value1) && GetValue(arg2, &value2)) {
++            cur = reinterpret_cast<byte*>(value1);
++            end = cur + (value2 * kInstrSize);
++          }
++        }
++
++        while (cur < end) {
++          dasm.InstructionDecode(buffer, cur);
++          PrintF("  0x%08" PRIxPTR "   %s\n", reinterpret_cast<intptr_t>(cur),
++                 buffer.begin());
++          cur += kInstrSize;
++        }
++      } else if (strcmp(cmd, "gdb") == 0) {
++        PrintF("relinquishing control to gdb\n");
++        v8::base::OS::DebugBreak();
++        PrintF("regaining control from gdb\n");
++      } else if (strcmp(cmd, "break") == 0) {
++        if (argc == 2) {
++          int64_t value;
++          if (GetValue(arg1, &value)) {
++            if (!SetBreakpoint(reinterpret_cast<Instruction*>(value))) {
++              PrintF("setting breakpoint failed\n");
++            }
++          } else {
++            PrintF("%s unrecognized\n", arg1);
++          }
++        } else {
++          PrintF("break <address>\n");
++        }
++      } else if (strcmp(cmd, "del") == 0) {
++        if (!DeleteBreakpoint(nullptr)) {
++          PrintF("deleting breakpoint failed\n");
++        }
++      } else if (strcmp(cmd, "flags") == 0) {
++        PrintF("No flags on LA64 !\n");
++      } else if (strcmp(cmd, "stop") == 0) {
++        int64_t value;
++        intptr_t stop_pc = sim_->get_pc() - 2 * kInstrSize;
++        Instruction* stop_instr = reinterpret_cast<Instruction*>(stop_pc);
++        Instruction* msg_address =
++            reinterpret_cast<Instruction*>(stop_pc + kInstrSize);
++        if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) {
++          // Remove the current stop.
++          if (sim_->IsStopInstruction(stop_instr)) {
++            stop_instr->SetInstructionBits(kNopInstr);
++            msg_address->SetInstructionBits(kNopInstr);
++          } else {
++            PrintF("Not at debugger stop.\n");
++          }
++        } else if (argc == 3) {
++          // Print information about all/the specified breakpoint(s).
++          if (strcmp(arg1, "info") == 0) {
++            if (strcmp(arg2, "all") == 0) {
++              PrintF("Stop information:\n");
++              for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
++                   i++) {
++                sim_->PrintStopInfo(i);
++              }
++            } else if (GetValue(arg2, &value)) {
++              sim_->PrintStopInfo(value);
++            } else {
++              PrintF("Unrecognized argument.\n");
++            }
++          } else if (strcmp(arg1, "enable") == 0) {
++            // Enable all/the specified breakpoint(s).
++            if (strcmp(arg2, "all") == 0) {
++              for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
++                   i++) {
++                sim_->EnableStop(i);
++              }
++            } else if (GetValue(arg2, &value)) {
++              sim_->EnableStop(value);
++            } else {
++              PrintF("Unrecognized argument.\n");
++            }
++          } else if (strcmp(arg1, "disable") == 0) {
++            // Disable all/the specified breakpoint(s).
++            if (strcmp(arg2, "all") == 0) {
++              for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
++                   i++) {
++                sim_->DisableStop(i);
++              }
++            } else if (GetValue(arg2, &value)) {
++              sim_->DisableStop(value);
++            } else {
++              PrintF("Unrecognized argument.\n");
++            }
++          }
++        } else {
++          PrintF("Wrong usage. Use help command for more information.\n");
++        }
++      } else if ((strcmp(cmd, "stat") == 0) || (strcmp(cmd, "st") == 0)) {
++        // Print registers and disassemble.
++        PrintAllRegs();
++        PrintF("\n");
++
++        disasm::NameConverter converter;
++        disasm::Disassembler dasm(converter);
++        // Use a reasonably large buffer.
++        v8::internal::EmbeddedVector<char, 256> buffer;
++
++        byte* cur = nullptr;
++        byte* end = nullptr;
++
++        if (argc == 1) {
++          cur = reinterpret_cast<byte*>(sim_->get_pc());
++          end = cur + (10 * kInstrSize);
++        } else if (argc == 2) {
++          int64_t value;
++          if (GetValue(arg1, &value)) {
++            cur = reinterpret_cast<byte*>(value);
++            // no length parameter passed, assume 10 instructions
++            end = cur + (10 * kInstrSize);
++          }
++        } else {
++          int64_t value1;
++          int64_t value2;
++          if (GetValue(arg1, &value1) && GetValue(arg2, &value2)) {
++            cur = reinterpret_cast<byte*>(value1);
++            end = cur + (value2 * kInstrSize);
++          }
++        }
++
++        while (cur < end) {
++          dasm.InstructionDecode(buffer, cur);
++          PrintF("  0x%08" PRIxPTR "   %s\n", reinterpret_cast<intptr_t>(cur),
++                 buffer.begin());
++          cur += kInstrSize;
++        }
++      } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) {
++        PrintF("cont\n");
++        PrintF("  continue execution (alias 'c')\n");
++        PrintF("stepi\n");
++        PrintF("  step one instruction (alias 'si')\n");
++        PrintF("print <register>\n");
++        PrintF("  print register content (alias 'p')\n");
++        PrintF("  use register name 'all' to print all registers\n");
++        PrintF("printobject <register>\n");
++        PrintF("  print an object from a register (alias 'po')\n");
++        PrintF("stack [<words>]\n");
++        PrintF("  dump stack content, default dump 10 words)\n");
++        PrintF("mem <address> [<words>]\n");
++        PrintF("  dump memory content, default dump 10 words)\n");
++        PrintF("dump [<words>]\n");
++        PrintF(
++            "  dump memory content without pretty printing JS objects, default "
++            "dump 10 words)\n");
++        PrintF("flags\n");
++        PrintF("  print flags\n");
++        PrintF("disasm [<instructions>]\n");
++        PrintF("disasm [<address/register>]\n");
++        PrintF("disasm [[<address/register>] <instructions>]\n");
++        PrintF("  disassemble code, default is 10 instructions\n");
++        PrintF("  from pc (alias 'di')\n");
++        PrintF("gdb\n");
++        PrintF("  enter gdb\n");
++        PrintF("break <address>\n");
++        PrintF("  set a break point on the address\n");
++        PrintF("del\n");
++        PrintF("  delete the breakpoint\n");
++        PrintF("stop feature:\n");
++        PrintF("  Description:\n");
++        PrintF("    Stops are debug instructions inserted by\n");
++        PrintF("    the Assembler::stop() function.\n");
++        PrintF("    When hitting a stop, the Simulator will\n");
++        PrintF("    stop and give control to the Debugger.\n");
++        PrintF("    All stop codes are watched:\n");
++        PrintF("    - They can be enabled / disabled: the Simulator\n");
++        PrintF("       will / won't stop when hitting them.\n");
++        PrintF("    - The Simulator keeps track of how many times they \n");
++        PrintF("      are met. (See the info command.) Going over a\n");
++        PrintF("      disabled stop still increases its counter. \n");
++        PrintF("  Commands:\n");
++        PrintF("    stop info all/<code> : print infos about number <code>\n");
++        PrintF("      or all stop(s).\n");
++        PrintF("    stop enable/disable all/<code> : enables / disables\n");
++        PrintF("      all or number <code> stop(s)\n");
++        PrintF("    stop unstop\n");
++        PrintF("      ignore the stop instruction at the current location\n");
++        PrintF("      from now on\n");
++      } else {
++        PrintF("Unknown command: %s\n", cmd);
++      }
++    }
++  }
++
++  // Add all the breakpoints back to stop execution and enter the debugger
++  // shell when hit.
++  RedoBreakpoints();
++
++#undef COMMAND_SIZE
++#undef ARG_SIZE
++
++#undef STR
++#undef XSTR
++}
++
++bool Simulator::ICacheMatch(void* one, void* two) {
++  DCHECK_EQ(reinterpret_cast<intptr_t>(one) & CachePage::kPageMask, 0);
++  DCHECK_EQ(reinterpret_cast<intptr_t>(two) & CachePage::kPageMask, 0);
++  return one == two;
++}
++
++static uint32_t ICacheHash(void* key) {
++  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key)) >> 2;
++}
++
++static bool AllOnOnePage(uintptr_t start, size_t size) {
++  intptr_t start_page = (start & ~CachePage::kPageMask);
++  intptr_t end_page = ((start + size) & ~CachePage::kPageMask);
++  return start_page == end_page;
++}
++
++void Simulator::set_last_debugger_input(char* input) {
++  DeleteArray(last_debugger_input_);
++  last_debugger_input_ = input;
++}
++
++void Simulator::SetRedirectInstruction(Instruction* instruction) {
++  instruction->SetInstructionBits(rtCallRedirInstr);
++}
++
++void Simulator::FlushICache(base::CustomMatcherHashMap* i_cache,
++                            void* start_addr, size_t size) {
++  int64_t start = reinterpret_cast<int64_t>(start_addr);
++  int64_t intra_line = (start & CachePage::kLineMask);
++  start -= intra_line;
++  size += intra_line;
++  size = ((size - 1) | CachePage::kLineMask) + 1;
++  int offset = (start & CachePage::kPageMask);
++  while (!AllOnOnePage(start, size - 1)) {
++    int bytes_to_flush = CachePage::kPageSize - offset;
++    FlushOnePage(i_cache, start, bytes_to_flush);
++    start += bytes_to_flush;
++    size -= bytes_to_flush;
++    DCHECK_EQ((int64_t)0, start & CachePage::kPageMask);
++    offset = 0;
++  }
++  if (size != 0) {
++    FlushOnePage(i_cache, start, size);
++  }
++}
++
++CachePage* Simulator::GetCachePage(base::CustomMatcherHashMap* i_cache,
++                                   void* page) {
++  base::HashMap::Entry* entry = i_cache->LookupOrInsert(page, ICacheHash(page));
++  if (entry->value == nullptr) {
++    CachePage* new_page = new CachePage();
++    entry->value = new_page;
++  }
++  return reinterpret_cast<CachePage*>(entry->value);
++}
++
++// Flush from start up to and not including start + size.
++void Simulator::FlushOnePage(base::CustomMatcherHashMap* i_cache,
++                             intptr_t start, size_t size) {
++  DCHECK_LE(size, CachePage::kPageSize);
++  DCHECK(AllOnOnePage(start, size - 1));
++  DCHECK_EQ(start & CachePage::kLineMask, 0);
++  DCHECK_EQ(size & CachePage::kLineMask, 0);
++  void* page = reinterpret_cast<void*>(start & (~CachePage::kPageMask));
++  int offset = (start & CachePage::kPageMask);
++  CachePage* cache_page = GetCachePage(i_cache, page);
++  char* valid_bytemap = cache_page->ValidityByte(offset);
++  memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift);
++}
++
++void Simulator::CheckICache(base::CustomMatcherHashMap* i_cache,
++                            Instruction* instr) {
++  int64_t address = reinterpret_cast<int64_t>(instr);
++  void* page = reinterpret_cast<void*>(address & (~CachePage::kPageMask));
++  void* line = reinterpret_cast<void*>(address & (~CachePage::kLineMask));
++  int offset = (address & CachePage::kPageMask);
++  CachePage* cache_page = GetCachePage(i_cache, page);
++  char* cache_valid_byte = cache_page->ValidityByte(offset);
++  bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID);
++  char* cached_line = cache_page->CachedData(offset & ~CachePage::kLineMask);
++  if (cache_hit) {
++    // Check that the data in memory matches the contents of the I-cache.
++    CHECK_EQ(0, memcmp(reinterpret_cast<void*>(instr),
++                       cache_page->CachedData(offset), kInstrSize));
++  } else {
++    // Cache miss.  Load memory into the cache.
++    memcpy(cached_line, line, CachePage::kLineLength);
++    *cache_valid_byte = CachePage::LINE_VALID;
++  }
++}
++
++Simulator::Simulator(Isolate* isolate) : isolate_(isolate) {
++  // Set up simulator support first. Some of this information is needed to
++  // setup the architecture state.
++  stack_size_ = FLAG_sim_stack_size * KB;
++  stack_ = reinterpret_cast<char*>(malloc(stack_size_));
++  pc_modified_ = false;
++  icount_ = 0;
++  break_count_ = 0;
++  break_pc_ = nullptr;
++  break_instr_ = 0;
++
++  // Set up architecture state.
++  // All registers are initialized to zero to start with.
++  for (int i = 0; i < kNumSimuRegisters; i++) {
++    registers_[i] = 0;
++  }
++  for (int i = 0; i < kNumFPURegisters; i++) {
++    FPUregisters_[i] = 0;
++  }
++  for (int i = 0; i < kNumCFRegisters; i++) {
++    CFregisters_[i] = 0;
++  }
++
++  FCSR_ = 0;
++
++  // The sp is initialized to point to the bottom (high address) of the
++  // allocated stack area. To be safe in potential stack underflows we leave
++  // some buffer below.
++  registers_[sp] = reinterpret_cast<int64_t>(stack_) + stack_size_ - 64;
++  // The ra and pc are initialized to a known bad value that will cause an
++  // access violation if the simulator ever tries to execute it.
++  registers_[pc] = bad_ra;
++  registers_[ra] = bad_ra;
++
++  last_debugger_input_ = nullptr;
++}
++
++Simulator::~Simulator() {
++  GlobalMonitor::Get()->RemoveLinkedAddress(&global_monitor_thread_);
++  free(stack_);
++}
++
++// Get the active Simulator for the current thread.
++Simulator* Simulator::current(Isolate* isolate) {
++  v8::internal::Isolate::PerIsolateThreadData* isolate_data =
++      isolate->FindOrAllocatePerThreadDataForThisThread();
++  DCHECK_NOT_NULL(isolate_data);
++
++  Simulator* sim = isolate_data->simulator();
++  if (sim == nullptr) {
++    // TODO(146): delete the simulator object when a thread/isolate goes away.
++    sim = new Simulator(isolate);
++    isolate_data->set_simulator(sim);
++  }
++  return sim;
++}
++
++// Sets the register in the architecture state. It will also deal with updating
++// Simulator internal state for special registers such as PC.
++void Simulator::set_register(int reg, int64_t value) {
++  DCHECK((reg >= 0) && (reg < kNumSimuRegisters));
++  if (reg == pc) {
++    pc_modified_ = true;
++  }
++
++  // Zero register always holds 0.
++  registers_[reg] = (reg == 0) ? 0 : value;
++}
++
++void Simulator::set_dw_register(int reg, const int* dbl) {
++  DCHECK((reg >= 0) && (reg < kNumSimuRegisters));
++  registers_[reg] = dbl[1];
++  registers_[reg] = registers_[reg] << 32;
++  registers_[reg] += dbl[0];
++}
++
++void Simulator::set_fpu_register(int fpureg, int64_t value) {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  FPUregisters_[fpureg] = value;
++}
++
++void Simulator::set_fpu_register_word(int fpureg, int32_t value) {
++  // Set ONLY lower 32-bits, leaving upper bits untouched.
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  int32_t* pword;
++  pword = reinterpret_cast<int32_t*>(&FPUregisters_[fpureg]);
++
++  *pword = value;
++}
++
++void Simulator::set_fpu_register_hi_word(int fpureg, int32_t value) {
++  // Set ONLY upper 32-bits, leaving lower bits untouched.
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  int32_t* phiword;
++  phiword = (reinterpret_cast<int32_t*>(&FPUregisters_[fpureg])) + 1;
++
++  *phiword = value;
++}
++
++void Simulator::set_fpu_register_float(int fpureg, float value) {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  *bit_cast<float*>(&FPUregisters_[fpureg]) = value;
++}
++
++void Simulator::set_fpu_register_double(int fpureg, double value) {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  *bit_cast<double*>(&FPUregisters_[fpureg]) = value;
++}
++
++void Simulator::set_cf_register(int cfreg, bool value) {
++  DCHECK((cfreg >= 0) && (cfreg < kNumCFRegisters));
++  CFregisters_[cfreg] = value;
++}
++
++// Get the register from the architecture state. This function does handle
++// the special case of accessing the PC register.
++int64_t Simulator::get_register(int reg) const {
++  DCHECK((reg >= 0) && (reg < kNumSimuRegisters));
++  if (reg == 0)
++    return 0;
++  else
++    return registers_[reg] + ((reg == pc) ? Instruction::kPCReadOffset : 0);
++}
++
++double Simulator::get_double_from_register_pair(int reg) {
++  // TODO(plind): bad ABI stuff, refactor or remove.
++  DCHECK((reg >= 0) && (reg < kNumSimuRegisters));
++
++  double dm_val = 0.0;
++  // Read the bits from the unsigned integer register_[] array
++  // into the double precision floating point value and return it.
++  char buffer[sizeof(registers_[0])];
++  memcpy(buffer, &registers_[reg], sizeof(registers_[0]));
++  memcpy(&dm_val, buffer, sizeof(registers_[0]));
++  return (dm_val);
++}
++
++int64_t Simulator::get_fpu_register(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return FPUregisters_[fpureg];
++}
++
++int32_t Simulator::get_fpu_register_word(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return static_cast<int32_t>(FPUregisters_[fpureg] & 0xFFFFFFFF);
++}
++
++int32_t Simulator::get_fpu_register_signed_word(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return static_cast<int32_t>(FPUregisters_[fpureg] & 0xFFFFFFFF);
++}
++
++int32_t Simulator::get_fpu_register_hi_word(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return static_cast<int32_t>((FPUregisters_[fpureg] >> 32) & 0xFFFFFFFF);
++}
++
++float Simulator::get_fpu_register_float(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return *bit_cast<float*>(const_cast<int64_t*>(&FPUregisters_[fpureg]));
++}
++
++double Simulator::get_fpu_register_double(int fpureg) const {
++  DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters));
++  return *bit_cast<double*>(&FPUregisters_[fpureg]);
++}
++
++bool Simulator::get_cf_register(int cfreg) const {
++  DCHECK((cfreg >= 0) && (cfreg < kNumCFRegisters));
++  return CFregisters_[cfreg];
++}
++
++// Runtime FP routines take up to two double arguments and zero
++// or one integer arguments. All are constructed here,
++// from a0-a3 or fa0 and fa1 (n64).
++void Simulator::GetFpArgs(double* x, double* y, int32_t* z) {
++  const int fparg2 = f1;
++  *x = get_fpu_register_double(f0);
++  *y = get_fpu_register_double(fparg2);
++  *z = static_cast<int32_t>(get_register(a2));
++}
++
++// The return value is either in v0/v1 or f0.
++void Simulator::SetFpResult(const double& result) {
++  set_fpu_register_double(0, result);
++}
++
++// Helper functions for setting and testing the FCSR register's bits.
++void Simulator::set_fcsr_bit(uint32_t cc, bool value) {
++  if (value) {
++    FCSR_ |= (1 << cc);
++  } else {
++    FCSR_ &= ~(1 << cc);
++  }
++}
++
++bool Simulator::test_fcsr_bit(uint32_t cc) { return FCSR_ & (1 << cc); }
++
++void Simulator::set_fcsr_rounding_mode(FPURoundingMode mode) {
++  FCSR_ |= mode & kFPURoundingModeMask;
++}
++
++unsigned int Simulator::get_fcsr_rounding_mode() {
++  return FCSR_ & kFPURoundingModeMask;
++}
++
++// Sets the rounding error codes in FCSR based on the result of the rounding.
++// Returns true if the operation was invalid.
++bool Simulator::set_fcsr_round_error(double original, double rounded) {
++  bool ret = false;
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++
++  if (!std::isfinite(original) || !std::isfinite(rounded)) {
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  if (original != rounded) {
++    set_fcsr_bit(kFCSRInexactFlagBit, true);
++  }
++
++  if (rounded < DBL_MIN && rounded > -DBL_MIN && rounded != 0) {
++    set_fcsr_bit(kFCSRUnderflowFlagBit, true);
++    ret = true;
++  }
++
++  if (rounded > max_int32 || rounded < min_int32) {
++    set_fcsr_bit(kFCSROverflowFlagBit, true);
++    // The reference is not really clear but it seems this is required:
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  return ret;
++}
++
++// Sets the rounding error codes in FCSR based on the result of the rounding.
++// Returns true if the operation was invalid.
++bool Simulator::set_fcsr_round64_error(double original, double rounded) {
++  bool ret = false;
++  // The value of INT64_MAX (2^63-1) can't be represented as double exactly,
++  // loading the most accurate representation into max_int64, which is 2^63.
++  double max_int64 = std::numeric_limits<int64_t>::max();
++  double min_int64 = std::numeric_limits<int64_t>::min();
++
++  if (!std::isfinite(original) || !std::isfinite(rounded)) {
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  if (original != rounded) {
++    set_fcsr_bit(kFCSRInexactFlagBit, true);
++  }
++
++  if (rounded < DBL_MIN && rounded > -DBL_MIN && rounded != 0) {
++    set_fcsr_bit(kFCSRUnderflowFlagBit, true);
++    ret = true;
++  }
++
++  if (rounded >= max_int64 || rounded < min_int64) {
++    set_fcsr_bit(kFCSROverflowFlagBit, true);
++    // The reference is not really clear but it seems this is required:
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  return ret;
++}
++
++// Sets the rounding error codes in FCSR based on the result of the rounding.
++// Returns true if the operation was invalid.
++bool Simulator::set_fcsr_round_error(float original, float rounded) {
++  bool ret = false;
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++
++  if (!std::isfinite(original) || !std::isfinite(rounded)) {
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  if (original != rounded) {
++    set_fcsr_bit(kFCSRInexactFlagBit, true);
++  }
++
++  if (rounded < FLT_MIN && rounded > -FLT_MIN && rounded != 0) {
++    set_fcsr_bit(kFCSRUnderflowFlagBit, true);
++    ret = true;
++  }
++
++  if (rounded > max_int32 || rounded < min_int32) {
++    set_fcsr_bit(kFCSROverflowFlagBit, true);
++    // The reference is not really clear but it seems this is required:
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  return ret;
++}
++
++void Simulator::set_fpu_register_word_invalid_result(float original,
++                                                     float rounded) {
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register_word(fd_reg(), 0);
++  } else if (rounded > max_int32) {
++    set_fpu_register_word(fd_reg(), kFPUInvalidResult);
++  } else if (rounded < min_int32) {
++    set_fpu_register_word(fd_reg(), kFPUInvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void Simulator::set_fpu_register_invalid_result(float original, float rounded) {
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register(fd_reg(), 0);
++  } else if (rounded > max_int32) {
++    set_fpu_register(fd_reg(), kFPUInvalidResult);
++  } else if (rounded < min_int32) {
++    set_fpu_register(fd_reg(), kFPUInvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void Simulator::set_fpu_register_invalid_result64(float original,
++                                                  float rounded) {
++  // The value of INT64_MAX (2^63-1) can't be represented as double exactly,
++  // loading the most accurate representation into max_int64, which is 2^63.
++  double max_int64 = std::numeric_limits<int64_t>::max();
++  double min_int64 = std::numeric_limits<int64_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register(fd_reg(), 0);
++  } else if (rounded >= max_int64) {
++    set_fpu_register(fd_reg(), kFPU64InvalidResult);
++  } else if (rounded < min_int64) {
++    set_fpu_register(fd_reg(), kFPU64InvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void Simulator::set_fpu_register_word_invalid_result(double original,
++                                                     double rounded) {
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register_word(fd_reg(), 0);
++  } else if (rounded > max_int32) {
++    set_fpu_register_word(fd_reg(), kFPUInvalidResult);
++  } else if (rounded < min_int32) {
++    set_fpu_register_word(fd_reg(), kFPUInvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void Simulator::set_fpu_register_invalid_result(double original,
++                                                double rounded) {
++  double max_int32 = std::numeric_limits<int32_t>::max();
++  double min_int32 = std::numeric_limits<int32_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register(fd_reg(), 0);
++  } else if (rounded > max_int32) {
++    set_fpu_register(fd_reg(), kFPUInvalidResult);
++  } else if (rounded < min_int32) {
++    set_fpu_register(fd_reg(), kFPUInvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++void Simulator::set_fpu_register_invalid_result64(double original,
++                                                  double rounded) {
++  // The value of INT64_MAX (2^63-1) can't be represented as double exactly,
++  // loading the most accurate representation into max_int64, which is 2^63.
++  double max_int64 = std::numeric_limits<int64_t>::max();
++  double min_int64 = std::numeric_limits<int64_t>::min();
++  if (std::isnan(original)) {
++    set_fpu_register(fd_reg(), 0);
++  } else if (rounded >= max_int64) {
++    set_fpu_register(fd_reg(), kFPU64InvalidResult);
++  } else if (rounded < min_int64) {
++    set_fpu_register(fd_reg(), kFPU64InvalidResultNegative);
++  } else {
++    UNREACHABLE();
++  }
++}
++
++// Sets the rounding error codes in FCSR based on the result of the rounding.
++// Returns true if the operation was invalid.
++bool Simulator::set_fcsr_round64_error(float original, float rounded) {
++  bool ret = false;
++  // The value of INT64_MAX (2^63-1) can't be represented as double exactly,
++  // loading the most accurate representation into max_int64, which is 2^63.
++  double max_int64 = std::numeric_limits<int64_t>::max();
++  double min_int64 = std::numeric_limits<int64_t>::min();
++
++  if (!std::isfinite(original) || !std::isfinite(rounded)) {
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  if (original != rounded) {
++    set_fcsr_bit(kFCSRInexactFlagBit, true);
++  }
++
++  if (rounded < FLT_MIN && rounded > -FLT_MIN && rounded != 0) {
++    set_fcsr_bit(kFCSRUnderflowFlagBit, true);
++    ret = true;
++  }
++
++  if (rounded >= max_int64 || rounded < min_int64) {
++    set_fcsr_bit(kFCSROverflowFlagBit, true);
++    // The reference is not really clear but it seems this is required:
++    set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++    ret = true;
++  }
++
++  return ret;
++}
++
++// For ftint instructions only
++void Simulator::round_according_to_fcsr(double toRound, double* rounded,
++                                        int32_t* rounded_int) {
++  // 0 RN (round to nearest): Round a result to the nearest
++  // representable value; if the result is exactly halfway between
++  // two representable values, round to zero.
++
++  // 1 RZ (round toward zero): Round a result to the closest
++  // representable value whose absolute value is less than or
++  // equal to the infinitely accurate result.
++
++  // 2 RP (round up, or toward +infinity): Round a result to the
++  // next representable value up.
++
++  // 3 RN (round down, or toward −infinity): Round a result to
++  // the next representable value down.
++  // switch ((FCSR_ >> 8) & 3) {
++  switch (FCSR_ & kFPURoundingModeMask) {
++    case kRoundToNearest:
++      *rounded = std::floor(toRound + 0.5);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        *rounded_int -= 1;
++        *rounded -= 1.;
++      }
++      break;
++    case kRoundToZero:
++      *rounded = trunc(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++    case kRoundToPlusInf:
++      *rounded = std::ceil(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++    case kRoundToMinusInf:
++      *rounded = std::floor(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++  }
++}
++
++void Simulator::round64_according_to_fcsr(double toRound, double* rounded,
++                                          int64_t* rounded_int) {
++  // 0 RN (round to nearest): Round a result to the nearest
++  // representable value; if the result is exactly halfway between
++  // two representable values, round to zero.
++
++  // 1 RZ (round toward zero): Round a result to the closest
++  // representable value whose absolute value is less than or.
++  // equal to the infinitely accurate result.
++
++  // 2 RP (round up, or toward +infinity): Round a result to the
++  // next representable value up.
++
++  // 3 RN (round down, or toward −infinity): Round a result to
++  // the next representable value down.
++  switch (FCSR_ & kFPURoundingModeMask) {
++    case kRoundToNearest:
++      *rounded = std::floor(toRound + 0.5);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        *rounded_int -= 1;
++        *rounded -= 1.;
++      }
++      break;
++    case kRoundToZero:
++      *rounded = std::trunc(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++    case kRoundToPlusInf:
++      *rounded = std::ceil(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++    case kRoundToMinusInf:
++      *rounded = std::floor(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++  }
++}
++
++void Simulator::round_according_to_fcsr(float toRound, float* rounded,
++                                        int32_t* rounded_int) {
++  // 0 RN (round to nearest): Round a result to the nearest
++  // representable value; if the result is exactly halfway between
++  // two representable values, round to zero.
++
++  // 1 RZ (round toward zero): Round a result to the closest
++  // representable value whose absolute value is less than or
++  // equal to the infinitely accurate result.
++
++  // 2 RP (round up, or toward +infinity): Round a result to the
++  // next representable value up.
++
++  // 3 RN (round down, or toward −infinity): Round a result to
++  // the next representable value down.
++  switch (FCSR_ & kFPURoundingModeMask) {
++    case kRoundToNearest:
++      *rounded = std::floor(toRound + 0.5);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        *rounded_int -= 1;
++        *rounded -= 1.f;
++      }
++      break;
++    case kRoundToZero:
++      *rounded = std::trunc(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++    case kRoundToPlusInf:
++      *rounded = std::ceil(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++    case kRoundToMinusInf:
++      *rounded = std::floor(toRound);
++      *rounded_int = static_cast<int32_t>(*rounded);
++      break;
++  }
++}
++
++void Simulator::round64_according_to_fcsr(float toRound, float* rounded,
++                                          int64_t* rounded_int) {
++  // 0 RN (round to nearest): Round a result to the nearest
++  // representable value; if the result is exactly halfway between
++  // two representable values, round to zero.
++
++  // 1 RZ (round toward zero): Round a result to the closest
++  // representable value whose absolute value is less than or.
++  // equal to the infinitely accurate result.
++
++  // 2 RP (round up, or toward +infinity): Round a result to the
++  // next representable value up.
++
++  // 3 RN (round down, or toward −infinity): Round a result to
++  // the next representable value down.
++  switch (FCSR_ & kFPURoundingModeMask) {
++    case kRoundToNearest:
++      *rounded = std::floor(toRound + 0.5);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        *rounded_int -= 1;
++        *rounded -= 1.f;
++      }
++      break;
++    case kRoundToZero:
++      *rounded = trunc(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++    case kRoundToPlusInf:
++      *rounded = std::ceil(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++    case kRoundToMinusInf:
++      *rounded = std::floor(toRound);
++      *rounded_int = static_cast<int64_t>(*rounded);
++      break;
++  }
++}
++
++// Raw access to the PC register.
++void Simulator::set_pc(int64_t value) {
++  pc_modified_ = true;
++  registers_[pc] = value;
++}
++
++bool Simulator::has_bad_pc() const {
++  return ((registers_[pc] == bad_ra) || (registers_[pc] == end_sim_pc));
++}
++
++// Raw access to the PC register without the special adjustment when reading.
++int64_t Simulator::get_pc() const { return registers_[pc]; }
++
++// TODO(plind): refactor this messy debug code when we do unaligned access.
++void Simulator::DieOrDebug() {
++  if ((1)) {  // Flag for this was removed.
++    La64Debugger dbg(this);
++    dbg.Debug();
++  } else {
++    base::OS::Abort();
++  }
++}
++
++void Simulator::TraceRegWr(int64_t value, TraceType t) {
++  if (::v8::internal::FLAG_trace_sim) {
++    union {
++      int64_t fmt_int64;
++      int32_t fmt_int32[2];
++      float fmt_float[2];
++      double fmt_double;
++    } v;
++    v.fmt_int64 = value;
++
++    switch (t) {
++      case WORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "    (%" PRId64 ")    int32:%" PRId32
++                 " uint32:%" PRIu32,
++                 v.fmt_int64, icount_, v.fmt_int32[0], v.fmt_int32[0]);
++        break;
++      case DWORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "    (%" PRId64 ")    int64:%" PRId64
++                 " uint64:%" PRIu64,
++                 value, icount_, value, value);
++        break;
++      case FLOAT:
++        SNPrintF(trace_buf_, "%016" PRIx64 "    (%" PRId64 ")    flt:%e",
++                 v.fmt_int64, icount_, v.fmt_float[0]);
++        break;
++      case DOUBLE:
++        SNPrintF(trace_buf_, "%016" PRIx64 "    (%" PRId64 ")    dbl:%e",
++                 v.fmt_int64, icount_, v.fmt_double);
++        break;
++      case FLOAT_DOUBLE:
++        SNPrintF(trace_buf_, "%016" PRIx64 "    (%" PRId64 ")    flt:%e dbl:%e",
++                 v.fmt_int64, icount_, v.fmt_float[0], v.fmt_double);
++        break;
++      case WORD_DWORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "    (%" PRId64 ")    int32:%" PRId32
++                 " uint32:%" PRIu32 " int64:%" PRId64 " uint64:%" PRIu64,
++                 v.fmt_int64, icount_, v.fmt_int32[0], v.fmt_int32[0],
++                 v.fmt_int64, v.fmt_int64);
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++// TODO(plind): consider making icount_ printing a flag option.
++void Simulator::TraceMemRd(int64_t addr, int64_t value, TraceType t) {
++  if (::v8::internal::FLAG_trace_sim) {
++    union {
++      int64_t fmt_int64;
++      int32_t fmt_int32[2];
++      float fmt_float[2];
++      double fmt_double;
++    } v;
++    v.fmt_int64 = value;
++
++    switch (t) {
++      case WORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  <-- [%016" PRIx64 "]    (%" PRId64
++                 ")    int32:%" PRId32 " uint32:%" PRIu32,
++                 v.fmt_int64, addr, icount_, v.fmt_int32[0], v.fmt_int32[0]);
++        break;
++      case DWORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  <-- [%016" PRIx64 "]    (%" PRId64
++                 ")    int64:%" PRId64 " uint64:%" PRIu64,
++                 value, addr, icount_, value, value);
++        break;
++      case FLOAT:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  <-- [%016" PRIx64 "]    (%" PRId64
++                 ")    flt:%e",
++                 v.fmt_int64, addr, icount_, v.fmt_float[0]);
++        break;
++      case DOUBLE:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  <-- [%016" PRIx64 "]    (%" PRId64
++                 ")    dbl:%e",
++                 v.fmt_int64, addr, icount_, v.fmt_double);
++        break;
++      case FLOAT_DOUBLE:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  <-- [%016" PRIx64 "]    (%" PRId64
++                 ")    flt:%e dbl:%e",
++                 v.fmt_int64, addr, icount_, v.fmt_float[0], v.fmt_double);
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++void Simulator::TraceMemWr(int64_t addr, int64_t value, TraceType t) {
++  if (::v8::internal::FLAG_trace_sim) {
++    switch (t) {
++      case BYTE:
++        SNPrintF(trace_buf_,
++                 "               %02" PRIx8 " --> [%016" PRIx64 "]    (%" PRId64
++                 ")",
++                 static_cast<uint8_t>(value), addr, icount_);
++        break;
++      case HALF:
++        SNPrintF(trace_buf_,
++                 "            %04" PRIx16 " --> [%016" PRIx64 "]    (%" PRId64
++                 ")",
++                 static_cast<uint16_t>(value), addr, icount_);
++        break;
++      case WORD:
++        SNPrintF(trace_buf_,
++                 "        %08" PRIx32 " --> [%016" PRIx64 "]    (%" PRId64 ")",
++                 static_cast<uint32_t>(value), addr, icount_);
++        break;
++      case DWORD:
++        SNPrintF(trace_buf_,
++                 "%016" PRIx64 "  --> [%016" PRIx64 "]    (%" PRId64 " )",
++                 value, addr, icount_);
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++template <typename T>
++void Simulator::TraceMemRd(int64_t addr, T value) {
++  if (::v8::internal::FLAG_trace_sim) {
++    switch (sizeof(T)) {
++      case 1:
++        SNPrintF(trace_buf_,
++                 "%08" PRIx8 " <-- [%08" PRIx64 "]    (%" PRIu64
++                 ")    int8:%" PRId8 " uint8:%" PRIu8,
++                 static_cast<uint8_t>(value), addr, icount_,
++                 static_cast<int8_t>(value), static_cast<uint8_t>(value));
++        break;
++      case 2:
++        SNPrintF(trace_buf_,
++                 "%08" PRIx16 " <-- [%08" PRIx64 "]    (%" PRIu64
++                 ")    int16:%" PRId16 " uint16:%" PRIu16,
++                 static_cast<uint16_t>(value), addr, icount_,
++                 static_cast<int16_t>(value), static_cast<uint16_t>(value));
++        break;
++      case 4:
++        SNPrintF(trace_buf_,
++                 "%08" PRIx32 " <-- [%08" PRIx64 "]    (%" PRIu64
++                 ")    int32:%" PRId32 " uint32:%" PRIu32,
++                 static_cast<uint32_t>(value), addr, icount_,
++                 static_cast<int32_t>(value), static_cast<uint32_t>(value));
++        break;
++      case 8:
++        SNPrintF(trace_buf_,
++                 "%08" PRIx64 " <-- [%08" PRIx64 "]    (%" PRIu64
++                 ")    int64:%" PRId64 " uint64:%" PRIu64,
++                 static_cast<uint64_t>(value), addr, icount_,
++                 static_cast<int64_t>(value), static_cast<uint64_t>(value));
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++template <typename T>
++void Simulator::TraceMemWr(int64_t addr, T value) {
++  if (::v8::internal::FLAG_trace_sim) {
++    switch (sizeof(T)) {
++      case 1:
++        SNPrintF(trace_buf_,
++                 "      %02" PRIx8 " --> [%08" PRIx64 "]    (%" PRIu64 ")",
++                 static_cast<uint8_t>(value), addr, icount_);
++        break;
++      case 2:
++        SNPrintF(trace_buf_,
++                 "    %04" PRIx16 " --> [%08" PRIx64 "]    (%" PRIu64 ")",
++                 static_cast<uint16_t>(value), addr, icount_);
++        break;
++      case 4:
++        SNPrintF(trace_buf_,
++                 "%08" PRIx32 " --> [%08" PRIx64 "]    (%" PRIu64 ")",
++                 static_cast<uint32_t>(value), addr, icount_);
++        break;
++      case 8:
++        SNPrintF(trace_buf_,
++                 "%16" PRIx64 " --> [%08" PRIx64 "]    (%" PRIu64 ")",
++                 static_cast<uint64_t>(value), addr, icount_);
++        break;
++      default:
++        UNREACHABLE();
++    }
++  }
++}
++
++// TODO(plind): sign-extend and zero-extend not implmented properly
++// on all the ReadXX functions, I don't think re-interpret cast does it.
++int32_t Simulator::ReadW(int64_t addr, Instruction* instr, TraceType t) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           " \n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  /* if ((addr & 0x3) == 0)*/ {
++    local_monitor_.NotifyLoad();
++    int32_t* ptr = reinterpret_cast<int32_t*>(addr);
++    TraceMemRd(addr, static_cast<int64_t>(*ptr), t);
++    return *ptr;
++  }
++  //  PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
++  //  addr,
++  //         reinterpret_cast<intptr_t>(instr));
++  //  DieOrDebug();
++  //  return 0;
++}
++
++uint32_t Simulator::ReadWU(int64_t addr, Instruction* instr) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           " \n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  // if ((addr & 0x3) == 0) {
++  local_monitor_.NotifyLoad();
++  uint32_t* ptr = reinterpret_cast<uint32_t*>(addr);
++  TraceMemRd(addr, static_cast<int64_t>(*ptr), WORD);
++  return *ptr;
++  // }
++  // PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr,
++  //        reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++  // return 0;
++}
++
++void Simulator::WriteW(int64_t addr, int32_t value, Instruction* instr) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           " \n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  /*if ((addr & 0x3) == 0)*/ {
++    local_monitor_.NotifyStore();
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++    TraceMemWr(addr, value, WORD);
++    int* ptr = reinterpret_cast<int*>(addr);
++    *ptr = value;
++    return;
++  }
++  //  PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
++  //  addr,
++  //         reinterpret_cast<intptr_t>(instr));
++  //  DieOrDebug();
++}
++
++void Simulator::WriteConditionalW(int64_t addr, int32_t value,
++                                  Instruction* instr, int32_t rk_reg) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           " \n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  if ((addr & 0x3) == 0) {
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    if (local_monitor_.NotifyStoreConditional(addr, TransactionSize::Word) &&
++        GlobalMonitor::Get()->NotifyStoreConditional_Locked(
++            addr, &global_monitor_thread_)) {
++      local_monitor_.NotifyStore();
++      GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++      TraceMemWr(addr, value, WORD);
++      int* ptr = reinterpret_cast<int*>(addr);
++      *ptr = value;
++      set_register(rk_reg, 1);
++    } else {
++      set_register(rk_reg, 0);
++    }
++    return;
++  }
++  PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr,
++         reinterpret_cast<intptr_t>(instr));
++  DieOrDebug();
++}
++
++int64_t Simulator::Read2W(int64_t addr, Instruction* instr) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           " \n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  /*  if ((addr & kPointerAlignmentMask) == 0)*/ {
++    local_monitor_.NotifyLoad();
++    int64_t* ptr = reinterpret_cast<int64_t*>(addr);
++    TraceMemRd(addr, *ptr);
++    return *ptr;
++  }
++  //  PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
++  //  addr,
++  //         reinterpret_cast<intptr_t>(instr));
++  //  DieOrDebug();
++  //  return 0;
++}
++
++void Simulator::Write2W(int64_t addr, int64_t value, Instruction* instr) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           "\n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  /*if ((addr & kPointerAlignmentMask) == 0)*/ {
++    local_monitor_.NotifyStore();
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++    TraceMemWr(addr, value, DWORD);
++    int64_t* ptr = reinterpret_cast<int64_t*>(addr);
++    *ptr = value;
++    return;
++  }
++  //  PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n",
++  //  addr,
++  //         reinterpret_cast<intptr_t>(instr));
++  //  DieOrDebug();
++}
++
++void Simulator::WriteConditional2W(int64_t addr, int64_t value,
++                                   Instruction* instr, int32_t rk_reg) {
++  if (addr >= 0 && addr < 0x400) {
++    // This has to be a nullptr-dereference, drop into debugger.
++    PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR
++           "\n",
++           addr, reinterpret_cast<intptr_t>(instr));
++    DieOrDebug();
++  }
++  if ((addr & kPointerAlignmentMask) == 0) {
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    if (local_monitor_.NotifyStoreConditional(addr,
++                                              TransactionSize::DoubleWord) &&
++        GlobalMonitor::Get()->NotifyStoreConditional_Locked(
++            addr, &global_monitor_thread_)) {
++      local_monitor_.NotifyStore();
++      GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++      TraceMemWr(addr, value, DWORD);
++      int64_t* ptr = reinterpret_cast<int64_t*>(addr);
++      *ptr = value;
++      set_register(rk_reg, 1);
++    } else {
++      set_register(rk_reg, 0);
++    }
++    return;
++  }
++  PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr,
++         reinterpret_cast<intptr_t>(instr));
++  DieOrDebug();
++}
++
++double Simulator::ReadD(int64_t addr, Instruction* instr) {
++  /*if ((addr & kDoubleAlignmentMask) == 0)*/ {
++    local_monitor_.NotifyLoad();
++    double* ptr = reinterpret_cast<double*>(addr);
++    return *ptr;
++  }
++  // PrintF("Unaligned (double) read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR
++  // "\n",
++  //       addr, reinterpret_cast<intptr_t>(instr));
++  // base::OS::Abort();
++  // return 0;
++}
++
++void Simulator::WriteD(int64_t addr, double value, Instruction* instr) {
++  /*if ((addr & kDoubleAlignmentMask) == 0)*/ {
++    local_monitor_.NotifyStore();
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++    double* ptr = reinterpret_cast<double*>(addr);
++    *ptr = value;
++    return;
++  }
++  // PrintF("Unaligned (double) write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR
++  //       "\n",
++  //       addr, reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++}
++
++uint16_t Simulator::ReadHU(int64_t addr, Instruction* instr) {
++  // if ((addr & 1) == 0) {
++  local_monitor_.NotifyLoad();
++  uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
++  TraceMemRd(addr, static_cast<int64_t>(*ptr));
++  return *ptr;
++  // }
++  // PrintF("Unaligned unsigned halfword read at 0x%08" PRIx64
++  //        " , pc=0x%08" V8PRIxPTR "\n",
++  //        addr, reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++  // return 0;
++}
++
++int16_t Simulator::ReadH(int64_t addr, Instruction* instr) {
++  // if ((addr & 1) == 0) {
++  local_monitor_.NotifyLoad();
++  int16_t* ptr = reinterpret_cast<int16_t*>(addr);
++  TraceMemRd(addr, static_cast<int64_t>(*ptr));
++  return *ptr;
++  // }
++  // PrintF("Unaligned signed halfword read at 0x%08" PRIx64
++  //        " , pc=0x%08" V8PRIxPTR "\n",
++  //        addr, reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++  // return 0;
++}
++
++void Simulator::WriteH(int64_t addr, uint16_t value, Instruction* instr) {
++  // if ((addr & 1) == 0) {
++  local_monitor_.NotifyStore();
++  base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++  GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++  TraceMemWr(addr, value, HALF);
++  uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
++  *ptr = value;
++  return;
++  // }
++  // PrintF("Unaligned unsigned halfword write at 0x%08" PRIx64
++  //        " , pc=0x%08" V8PRIxPTR "\n",
++  //        addr, reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++}
++
++void Simulator::WriteH(int64_t addr, int16_t value, Instruction* instr) {
++  // if ((addr & 1) == 0) {
++  local_monitor_.NotifyStore();
++  base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++  GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++  TraceMemWr(addr, value, HALF);
++  int16_t* ptr = reinterpret_cast<int16_t*>(addr);
++  *ptr = value;
++  return;
++  // }
++  // PrintF("Unaligned halfword write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR
++  //        "\n",
++  //        addr, reinterpret_cast<intptr_t>(instr));
++  // DieOrDebug();
++}
++
++uint32_t Simulator::ReadBU(int64_t addr) {
++  local_monitor_.NotifyLoad();
++  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
++  TraceMemRd(addr, static_cast<int64_t>(*ptr));
++  return *ptr & 0xFF;
++}
++
++int32_t Simulator::ReadB(int64_t addr) {
++  local_monitor_.NotifyLoad();
++  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
++  TraceMemRd(addr, static_cast<int64_t>(*ptr));
++  return *ptr;
++}
++
++void Simulator::WriteB(int64_t addr, uint8_t value) {
++  local_monitor_.NotifyStore();
++  base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++  GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++  TraceMemWr(addr, value, BYTE);
++  uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
++  *ptr = value;
++}
++
++void Simulator::WriteB(int64_t addr, int8_t value) {
++  local_monitor_.NotifyStore();
++  base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++  GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++  TraceMemWr(addr, value, BYTE);
++  int8_t* ptr = reinterpret_cast<int8_t*>(addr);
++  *ptr = value;
++}
++
++template <typename T>
++T Simulator::ReadMem(int64_t addr, Instruction* instr) {
++  int alignment_mask = (1 << sizeof(T)) - 1;
++  if ((addr & alignment_mask) == 0) {
++    local_monitor_.NotifyLoad();
++    T* ptr = reinterpret_cast<T*>(addr);
++    TraceMemRd(addr, *ptr);
++    return *ptr;
++  }
++  PrintF("Unaligned read of type sizeof(%ld) at 0x%08lx, pc=0x%08" V8PRIxPTR
++         "\n",
++         sizeof(T), addr, reinterpret_cast<intptr_t>(instr));
++  base::OS::Abort();
++  return 0;
++}
++
++template <typename T>
++void Simulator::WriteMem(int64_t addr, T value, Instruction* instr) {
++  int alignment_mask = (1 << sizeof(T)) - 1;
++  if ((addr & alignment_mask) == 0) {
++    local_monitor_.NotifyStore();
++    base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++    GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_);
++    T* ptr = reinterpret_cast<T*>(addr);
++    *ptr = value;
++    TraceMemWr(addr, value);
++    return;
++  }
++  PrintF("Unaligned write of type sizeof(%ld) at 0x%08lx, pc=0x%08" V8PRIxPTR
++         "\n",
++         sizeof(T), addr, reinterpret_cast<intptr_t>(instr));
++  base::OS::Abort();
++}
++
++// Returns the limit of the stack area to enable checking for stack overflows.
++uintptr_t Simulator::StackLimit(uintptr_t c_limit) const {
++  // The simulator uses a separate JS stack. If we have exhausted the C stack,
++  // we also drop down the JS limit to reflect the exhaustion on the JS stack.
++  if (GetCurrentStackPosition() < c_limit) {
++    return reinterpret_cast<uintptr_t>(get_sp());
++  }
++
++  // Otherwise the limit is the JS stack. Leave a safety margin of 1024 bytes
++  // to prevent overrunning the stack when pushing values.
++  return reinterpret_cast<uintptr_t>(stack_) + 1024;
++}
++
++// Unsupported instructions use Format to print an error and stop execution.
++void Simulator::Format(Instruction* instr, const char* format) {
++  PrintF("Simulator found unsupported instruction:\n 0x%08" PRIxPTR " : %s\n",
++         reinterpret_cast<intptr_t>(instr), format);
++  UNIMPLEMENTED();
++}
++
++// Calls into the V8 runtime are based on this very simple interface.
++// Note: To be able to return two values from some calls the code in runtime.cc
++// uses the ObjectPair which is essentially two 32-bit values stuffed into a
++// 64-bit value. With the code below we assume that all runtime calls return
++// 64 bits of result. If they don't, the v1 result register contains a bogus
++// value, which is fine because it is caller-saved.
++
++using SimulatorRuntimeCall = ObjectPair (*)(int64_t arg0, int64_t arg1,
++                                            int64_t arg2, int64_t arg3,
++                                            int64_t arg4, int64_t arg5,
++                                            int64_t arg6, int64_t arg7,
++                                            int64_t arg8, int64_t arg9);
++
++// These prototypes handle the four types of FP calls.
++using SimulatorRuntimeCompareCall = int64_t (*)(double darg0, double darg1);
++using SimulatorRuntimeFPFPCall = double (*)(double darg0, double darg1);
++using SimulatorRuntimeFPCall = double (*)(double darg0);
++using SimulatorRuntimeFPIntCall = double (*)(double darg0, int32_t arg0);
++
++// This signature supports direct call in to API function native callback
++// (refer to InvocationCallback in v8.h).
++using SimulatorRuntimeDirectApiCall = void (*)(int64_t arg0);
++using SimulatorRuntimeProfilingApiCall = void (*)(int64_t arg0, void* arg1);
++
++// This signature supports direct call to accessor getter callback.
++using SimulatorRuntimeDirectGetterCall = void (*)(int64_t arg0, int64_t arg1);
++using SimulatorRuntimeProfilingGetterCall = void (*)(int64_t arg0, int64_t arg1,
++                                                     void* arg2);
++
++// Software interrupt instructions are used by the simulator to call into the
++// C-based V8 runtime. They are also used for debugging with simulator.
++void Simulator::SoftwareInterrupt() {
++  // There are several instructions that could get us here,
++  // the break_, dbgcall_, syscall_ and hypcall instructions.
++  int32_t opcode_hi15 = instr_.Bits(31, 17);
++  CHECK_EQ(opcode_hi15, 0x15);
++  uint32_t code = instr_.Bits(14, 0);
++  // We first check if we met a call_rt_redirected.
++  if (instr_.InstructionBits() == rtCallRedirInstr) {
++    Redirection* redirection = Redirection::FromInstruction(instr_.instr());
++
++    int64_t* stack_pointer = reinterpret_cast<int64_t*>(get_register(sp));
++
++    int64_t arg0 = get_register(a0);
++    int64_t arg1 = get_register(a1);
++    int64_t arg2 = get_register(a2);
++    int64_t arg3 = get_register(a3);
++    int64_t arg4 = get_register(a4);
++    int64_t arg5 = get_register(a5);
++    int64_t arg6 = get_register(a6);
++    int64_t arg7 = get_register(a7);
++    int64_t arg8 = stack_pointer[0];
++    int64_t arg9 = stack_pointer[1];
++    STATIC_ASSERT(kMaxCParameters == 10);
++
++    bool fp_call =
++        (redirection->type() == ExternalReference::BUILTIN_FP_FP_CALL) ||
++        (redirection->type() == ExternalReference::BUILTIN_COMPARE_CALL) ||
++        (redirection->type() == ExternalReference::BUILTIN_FP_CALL) ||
++        (redirection->type() == ExternalReference::BUILTIN_FP_INT_CALL);
++
++    {
++      // With the hard floating point calling convention, double
++      // arguments are passed in FPU registers. Fetch the arguments
++      // from there and call the builtin using soft floating point
++      // convention.
++      switch (redirection->type()) {
++        case ExternalReference::BUILTIN_FP_FP_CALL:
++        case ExternalReference::BUILTIN_COMPARE_CALL:
++          arg0 = get_fpu_register(f0);
++          arg1 = get_fpu_register(f1);
++          arg2 = get_fpu_register(f2);
++          arg3 = get_fpu_register(f3);
++          break;
++        case ExternalReference::BUILTIN_FP_CALL:
++          arg0 = get_fpu_register(f0);
++          arg1 = get_fpu_register(f1);
++          break;
++        case ExternalReference::BUILTIN_FP_INT_CALL:
++          arg0 = get_fpu_register(f0);
++          arg1 = get_fpu_register(f1);
++          arg2 = get_register(a2);
++          break;
++        default:
++          break;
++      }
++    }
++
++    // This is dodgy but it works because the C entry stubs are never moved.
++    // See comment in codegen-arm.cc and bug 1242173.
++    int64_t saved_ra = get_register(ra);
++
++    intptr_t external =
++        reinterpret_cast<intptr_t>(redirection->external_function());
++
++    // Based on CpuFeatures::IsSupported(FPU), La64 will use either hardware
++    // FPU, or gcc soft-float routines. Hardware FPU is simulated in this
++    // simulator. Soft-float has additional abstraction of ExternalReference,
++    // to support serialization.
++    if (fp_call) {
++      double dval0, dval1;  // one or two double parameters
++      int32_t ival;         // zero or one integer parameters
++      int64_t iresult = 0;  // integer return value
++      double dresult = 0;   // double return value
++      GetFpArgs(&dval0, &dval1, &ival);
++      SimulatorRuntimeCall generic_target =
++          reinterpret_cast<SimulatorRuntimeCall>(external);
++      if (::v8::internal::FLAG_trace_sim) {
++        switch (redirection->type()) {
++          case ExternalReference::BUILTIN_FP_FP_CALL:
++          case ExternalReference::BUILTIN_COMPARE_CALL:
++            PrintF("Call to host function at %p with args %f, %f",
++                   reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
++                   dval0, dval1);
++            break;
++          case ExternalReference::BUILTIN_FP_CALL:
++            PrintF("Call to host function at %p with arg %f",
++                   reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
++                   dval0);
++            break;
++          case ExternalReference::BUILTIN_FP_INT_CALL:
++            PrintF("Call to host function at %p with args %f, %d",
++                   reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
++                   dval0, ival);
++            break;
++          default:
++            UNREACHABLE();
++            break;
++        }
++      }
++      switch (redirection->type()) {
++        case ExternalReference::BUILTIN_COMPARE_CALL: {
++          SimulatorRuntimeCompareCall target =
++              reinterpret_cast<SimulatorRuntimeCompareCall>(external);
++          iresult = target(dval0, dval1);
++          set_register(v0, static_cast<int64_t>(iresult));
++          //  set_register(v1, static_cast<int64_t>(iresult >> 32));
++          break;
++        }
++        case ExternalReference::BUILTIN_FP_FP_CALL: {
++          SimulatorRuntimeFPFPCall target =
++              reinterpret_cast<SimulatorRuntimeFPFPCall>(external);
++          dresult = target(dval0, dval1);
++          SetFpResult(dresult);
++          break;
++        }
++        case ExternalReference::BUILTIN_FP_CALL: {
++          SimulatorRuntimeFPCall target =
++              reinterpret_cast<SimulatorRuntimeFPCall>(external);
++          dresult = target(dval0);
++          SetFpResult(dresult);
++          break;
++        }
++        case ExternalReference::BUILTIN_FP_INT_CALL: {
++          SimulatorRuntimeFPIntCall target =
++              reinterpret_cast<SimulatorRuntimeFPIntCall>(external);
++          dresult = target(dval0, ival);
++          SetFpResult(dresult);
++          break;
++        }
++        default:
++          UNREACHABLE();
++          break;
++      }
++      if (::v8::internal::FLAG_trace_sim) {
++        switch (redirection->type()) {
++          case ExternalReference::BUILTIN_COMPARE_CALL:
++            PrintF("Returned %08x\n", static_cast<int32_t>(iresult));
++            break;
++          case ExternalReference::BUILTIN_FP_FP_CALL:
++          case ExternalReference::BUILTIN_FP_CALL:
++          case ExternalReference::BUILTIN_FP_INT_CALL:
++            PrintF("Returned %f\n", dresult);
++            break;
++          default:
++            UNREACHABLE();
++            break;
++        }
++      }
++    } else if (redirection->type() == ExternalReference::DIRECT_API_CALL) {
++      if (::v8::internal::FLAG_trace_sim) {
++        PrintF("Call to host function at %p args %08" PRIx64 " \n",
++               reinterpret_cast<void*>(external), arg0);
++      }
++      SimulatorRuntimeDirectApiCall target =
++          reinterpret_cast<SimulatorRuntimeDirectApiCall>(external);
++      target(arg0);
++    } else if (redirection->type() == ExternalReference::PROFILING_API_CALL) {
++      if (::v8::internal::FLAG_trace_sim) {
++        PrintF("Call to host function at %p args %08" PRIx64 "  %08" PRIx64
++               " \n",
++               reinterpret_cast<void*>(external), arg0, arg1);
++      }
++      SimulatorRuntimeProfilingApiCall target =
++          reinterpret_cast<SimulatorRuntimeProfilingApiCall>(external);
++      target(arg0, Redirection::ReverseRedirection(arg1));
++    } else if (redirection->type() == ExternalReference::DIRECT_GETTER_CALL) {
++      if (::v8::internal::FLAG_trace_sim) {
++        PrintF("Call to host function at %p args %08" PRIx64 "  %08" PRIx64
++               " \n",
++               reinterpret_cast<void*>(external), arg0, arg1);
++      }
++      SimulatorRuntimeDirectGetterCall target =
++          reinterpret_cast<SimulatorRuntimeDirectGetterCall>(external);
++      target(arg0, arg1);
++    } else if (redirection->type() ==
++               ExternalReference::PROFILING_GETTER_CALL) {
++      if (::v8::internal::FLAG_trace_sim) {
++        PrintF("Call to host function at %p args %08" PRIx64 "  %08" PRIx64
++               "  %08" PRIx64 " \n",
++               reinterpret_cast<void*>(external), arg0, arg1, arg2);
++      }
++      SimulatorRuntimeProfilingGetterCall target =
++          reinterpret_cast<SimulatorRuntimeProfilingGetterCall>(external);
++      target(arg0, arg1, Redirection::ReverseRedirection(arg2));
++    } else {
++      DCHECK(redirection->type() == ExternalReference::BUILTIN_CALL ||
++             redirection->type() == ExternalReference::BUILTIN_CALL_PAIR);
++      SimulatorRuntimeCall target =
++          reinterpret_cast<SimulatorRuntimeCall>(external);
++      if (::v8::internal::FLAG_trace_sim) {
++        PrintF(
++            "Call to host function at %p "
++            "args %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64
++            " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64
++            " , %08" PRIx64 " , %08" PRIx64 " \n",
++            reinterpret_cast<void*>(FUNCTION_ADDR(target)), arg0, arg1, arg2,
++            arg3, arg4, arg5, arg6, arg7, arg8, arg9);
++      }
++      ObjectPair result =
++          target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
++      set_register(v0, (int64_t)(result.x));
++      set_register(v1, (int64_t)(result.y));
++    }
++    if (::v8::internal::FLAG_trace_sim) {
++      PrintF("Returned %08" PRIx64 "  : %08" PRIx64 " \n", get_register(v1),
++             get_register(v0));
++    }
++    set_register(ra, saved_ra);
++    set_pc(get_register(ra));
++
++  } else if (code <= kMaxStopCode) {
++    if (IsWatchpoint(code)) {
++      PrintWatchpoint(code);
++    } else {
++      IncreaseStopCounter(code);
++      HandleStop(code, instr_.instr());
++    }
++  } else {
++    // All remaining break_ codes, and all traps are handled here.
++    La64Debugger dbg(this);
++    dbg.Debug();
++  }
++}
++
++// Stop helper functions.
++bool Simulator::IsWatchpoint(uint64_t code) {
++  return (code <= kMaxWatchpointCode);
++}
++
++void Simulator::PrintWatchpoint(uint64_t code) {
++  La64Debugger dbg(this);
++  ++break_count_;
++  PrintF("\n---- break %" PRId64 "  marker: %3d  (instr count: %8" PRId64
++         " ) ----------"
++         "----------------------------------",
++         code, break_count_, icount_);
++  dbg.PrintAllRegs();  // Print registers and continue running.
++}
++
++void Simulator::HandleStop(uint64_t code, Instruction* instr) {
++  // Stop if it is enabled, otherwise go on jumping over the stop
++  // and the message address.
++  if (IsEnabledStop(code)) {
++    La64Debugger dbg(this);
++    dbg.Stop(instr);
++  }
++}
++
++bool Simulator::IsStopInstruction(Instruction* instr) {
++  int32_t opcode_hi15 = instr->Bits(31, 17);
++  uint32_t code = static_cast<uint32_t>(instr->Bits(14, 0));
++  return (opcode_hi15 == 0x15) && code > kMaxWatchpointCode &&
++         code <= kMaxStopCode;
++}
++
++bool Simulator::IsEnabledStop(uint64_t code) {
++  DCHECK_LE(code, kMaxStopCode);
++  DCHECK_GT(code, kMaxWatchpointCode);
++  return !(watched_stops_[code].count & kStopDisabledBit);
++}
++
++void Simulator::EnableStop(uint64_t code) {
++  if (!IsEnabledStop(code)) {
++    watched_stops_[code].count &= ~kStopDisabledBit;
++  }
++}
++
++void Simulator::DisableStop(uint64_t code) {
++  if (IsEnabledStop(code)) {
++    watched_stops_[code].count |= kStopDisabledBit;
++  }
++}
++
++void Simulator::IncreaseStopCounter(uint64_t code) {
++  DCHECK_LE(code, kMaxStopCode);
++  if ((watched_stops_[code].count & ~(1 << 31)) == 0x7FFFFFFF) {
++    PrintF("Stop counter for code %" PRId64
++           "  has overflowed.\n"
++           "Enabling this code and reseting the counter to 0.\n",
++           code);
++    watched_stops_[code].count = 0;
++    EnableStop(code);
++  } else {
++    watched_stops_[code].count++;
++  }
++}
++
++// Print a stop status.
++void Simulator::PrintStopInfo(uint64_t code) {
++  if (code <= kMaxWatchpointCode) {
++    PrintF("That is a watchpoint, not a stop.\n");
++    return;
++  } else if (code > kMaxStopCode) {
++    PrintF("Code too large, only %u stops can be used\n", kMaxStopCode + 1);
++    return;
++  }
++  const char* state = IsEnabledStop(code) ? "Enabled" : "Disabled";
++  int32_t count = watched_stops_[code].count & ~kStopDisabledBit;
++  // Don't print the state of unused breakpoints.
++  if (count != 0) {
++    if (watched_stops_[code].desc) {
++      PrintF("stop %" PRId64 "  - 0x%" PRIx64 " : \t%s, \tcounter = %i, \t%s\n",
++             code, code, state, count, watched_stops_[code].desc);
++    } else {
++      PrintF("stop %" PRId64 "  - 0x%" PRIx64 " : \t%s, \tcounter = %i\n", code,
++             code, state, count);
++    }
++  }
++}
++
++void Simulator::SignalException(Exception e) {
++  FATAL("Error: Exception %i raised.", static_cast<int>(e));
++}
++
++template <typename T>
++static T FPAbs(T a);
++
++template <>
++double FPAbs<double>(double a) {
++  return fabs(a);
++}
++
++template <>
++float FPAbs<float>(float a) {
++  return fabsf(a);
++}
++
++template <typename T>
++static bool FPUProcessNaNsAndZeros(T a, T b, MaxMinKind kind, T* result) {
++  if (std::isnan(a) && std::isnan(b)) {
++    *result = a;
++  } else if (std::isnan(a)) {
++    *result = b;
++  } else if (std::isnan(b)) {
++    *result = a;
++  } else if (b == a) {
++    // Handle -0.0 == 0.0 case.
++    // std::signbit() returns int 0 or 1 so subtracting MaxMinKind::kMax
++    // negates the result.
++    *result = std::signbit(b) - static_cast<int>(kind) ? b : a;
++  } else {
++    return false;
++  }
++  return true;
++}
++
++template <typename T>
++static T FPUMin(T a, T b) {
++  T result;
++  if (FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) {
++    return result;
++  } else {
++    return b < a ? b : a;
++  }
++}
++
++template <typename T>
++static T FPUMax(T a, T b) {
++  T result;
++  if (FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMax, &result)) {
++    return result;
++  } else {
++    return b > a ? b : a;
++  }
++}
++
++template <typename T>
++static T FPUMinA(T a, T b) {
++  T result;
++  if (!FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) {
++    if (FPAbs(a) < FPAbs(b)) {
++      result = a;
++    } else if (FPAbs(b) < FPAbs(a)) {
++      result = b;
++    } else {
++      result = a < b ? a : b;
++    }
++  }
++  return result;
++}
++
++template <typename T>
++static T FPUMaxA(T a, T b) {
++  T result;
++  if (!FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) {
++    if (FPAbs(a) > FPAbs(b)) {
++      result = a;
++    } else if (FPAbs(b) > FPAbs(a)) {
++      result = b;
++    } else {
++      result = a > b ? a : b;
++    }
++  }
++  return result;
++}
++
++enum class KeepSign : bool { no = false, yes };
++
++template <typename T, typename std::enable_if<std::is_floating_point<T>::value,
++                                              int>::type = 0>
++T FPUCanonalizeNaNArg(T result, T arg, KeepSign keepSign = KeepSign::no) {
++  DCHECK(std::isnan(arg));
++  T qNaN = std::numeric_limits<T>::quiet_NaN();
++  if (keepSign == KeepSign::yes) {
++    return std::copysign(qNaN, result);
++  }
++  return qNaN;
++}
++
++template <typename T>
++T FPUCanonalizeNaNArgs(T result, KeepSign keepSign, T first) {
++  if (std::isnan(first)) {
++    return FPUCanonalizeNaNArg(result, first, keepSign);
++  }
++  return result;
++}
++
++template <typename T, typename... Args>
++T FPUCanonalizeNaNArgs(T result, KeepSign keepSign, T first, Args... args) {
++  if (std::isnan(first)) {
++    return FPUCanonalizeNaNArg(result, first, keepSign);
++  }
++  return FPUCanonalizeNaNArgs(result, keepSign, args...);
++}
++
++template <typename Func, typename T, typename... Args>
++T FPUCanonalizeOperation(Func f, T first, Args... args) {
++  return FPUCanonalizeOperation(f, KeepSign::no, first, args...);
++}
++
++template <typename Func, typename T, typename... Args>
++T FPUCanonalizeOperation(Func f, KeepSign keepSign, T first, Args... args) {
++  T result = f(first, args...);
++  if (std::isnan(result)) {
++    result = FPUCanonalizeNaNArgs(result, keepSign, first, args...);
++  }
++  return result;
++}
++
++// Handle execution based on instruction types.
++void Simulator::DecodeTypeOp6() {
++  int64_t alu_out;
++  // Next pc.
++  int64_t next_pc = bad_ra;
++
++  // Branch instructions common part.
++  auto BranchAndLinkHelper = [this, &next_pc]() {
++    int64_t current_pc = get_pc();
++    set_register(ra, current_pc + kInstrSize);
++    int32_t offs26_low16 =
++        static_cast<uint32_t>(instr_.Bits(25, 10) << 16) >> 16;
++    int32_t offs26_high10 = static_cast<int32_t>(instr_.Bits(9, 0) << 22) >> 6;
++    int32_t offs26 = offs26_low16 | offs26_high10;
++    next_pc = current_pc + (offs26 << 2);
++    printf_instr("Offs26: %08x\n", offs26);
++    set_pc(next_pc);
++  };
++
++  auto BranchOff16Helper = [this, &next_pc](bool do_branch) {
++    int64_t current_pc = get_pc();
++    int32_t offs16 = static_cast<int32_t>(instr_.Bits(25, 10) << 16) >> 16;
++    printf_instr("Offs16: %08x\n", offs16);
++    int32_t offs = do_branch ? (offs16 << 2) : kInstrSize;
++    next_pc = current_pc + offs;
++    set_pc(next_pc);
++  };
++
++  auto BranchOff21Helper = [this, &next_pc](bool do_branch) {
++    int64_t current_pc = get_pc();
++    int32_t offs21_low16 =
++        static_cast<uint32_t>(instr_.Bits(25, 10) << 16) >> 16;
++    int32_t offs21_high5 = static_cast<int32_t>(instr_.Bits(4, 0) << 27) >> 11;
++    int32_t offs = offs21_low16 | offs21_high5;
++    printf_instr("Offs21: %08x\n", offs);
++    offs = do_branch ? (offs << 2) : kInstrSize;
++    next_pc = current_pc + offs;
++    set_pc(next_pc);
++  };
++
++  auto BranchOff26Helper = [this, &next_pc]() {
++    int64_t current_pc = get_pc();
++    int32_t offs26_low16 =
++        static_cast<uint32_t>(instr_.Bits(25, 10) << 16) >> 16;
++    int32_t offs26_high10 = static_cast<int32_t>(instr_.Bits(9, 0) << 22) >> 6;
++    int32_t offs26 = offs26_low16 | offs26_high10;
++    next_pc = current_pc + (offs26 << 2);
++    printf_instr("Offs26: %08x\n", offs26);
++    set_pc(next_pc);
++  };
++
++  auto JumpOff16Helper = [this, &next_pc]() {
++    int32_t offs16 = static_cast<int32_t>(instr_.Bits(25, 10) << 16) >> 16;
++    printf_instr("JIRL\t %s: %016lx, %s: %016lx, offs16: %x\n",
++                 Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                 rj(), offs16);
++    set_register(rd_reg(), get_pc() + kInstrSize);
++    next_pc = rj() + (offs16 << 2);
++    set_pc(next_pc);
++  };
++
++  switch (instr_.Bits(31, 26) << 26) {
++    case ADDU16I_D: {
++      printf_instr("ADDU16I_D\t %s: %016lx, %s: %016lx, si16: %d\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si16());
++      int32_t si16_upper = static_cast<int32_t>(si16()) << 16;
++      alu_out = static_cast<int64_t>(si16_upper) + rj();
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BEQZ:
++      printf_instr("BEQZ\t %s: %016lx, ", Registers::Name(rj_reg()), rj());
++      BranchOff21Helper(rj() == 0);
++      break;
++    case BNEZ:
++      printf_instr("BNEZ\t %s: %016lx, ", Registers::Name(rj_reg()), rj());
++      BranchOff21Helper(rj() != 0);
++      break;
++    case BCZ: {
++      if (instr_.Bits(9, 8) == 0b00) {
++        // BCEQZ
++        printf_instr("BCEQZ\t fcc%d: %s, ", cj_reg(), cj() ? "True" : "False");
++        BranchOff21Helper(cj() == false);
++      } else if (instr_.Bits(9, 8) == 0b01) {
++        // BCNEZ
++        printf_instr("BCNEZ\t fcc%d: %s, ", cj_reg(), cj() ? "True" : "False");
++        BranchOff21Helper(cj() == true);
++      } else {
++        UNREACHABLE();
++      }
++      break;
++    }
++    case JIRL:
++      JumpOff16Helper();
++      break;
++    case B:
++      printf_instr("B\t ");
++      BranchOff26Helper();
++      break;
++    case BL:
++      printf_instr("BL\t ");
++      BranchAndLinkHelper();
++      break;
++    case BEQ:
++      printf_instr("BEQ\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj() == rd());
++      break;
++    case BNE:
++      printf_instr("BNE\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj() != rd());
++      break;
++    case BLT:
++      printf_instr("BLT\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj() < rd());
++      break;
++    case BGE:
++      printf_instr("BGE\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj() >= rd());
++      break;
++    case BLTU:
++      printf_instr("BLTU\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj_u() < rd_u());
++      break;
++    case BGEU:
++      printf_instr("BGEU\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rd_reg()), rd());
++      BranchOff16Helper(rj_u() >= rd_u());
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp7() {
++  int64_t alu_out;
++
++  switch (instr_.Bits(31, 25) << 25) {
++    case LU12I_W: {
++      printf_instr("LU12I_W\t %s: %016lx, si20: %d\n",
++                   Registers::Name(rd_reg()), rd(), si20());
++      int32_t si20_upper = static_cast<int32_t>(si20() << 12);
++      SetResult(rd_reg(), static_cast<int64_t>(si20_upper));
++      break;
++    }
++    case LU32I_D: {
++      printf_instr("LU32I_D\t %s: %016lx, si20: %d\n",
++                   Registers::Name(rd_reg()), rd(), si20());
++      int32_t si20_signExtend = static_cast<int32_t>(si20() << 12) >> 12;
++      int64_t lower_32bit_mask = 0xFFFFFFFF;
++      alu_out = (static_cast<int64_t>(si20_signExtend) << 32) |
++                (rd() & lower_32bit_mask);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case PCADDI: {
++      printf_instr("PCADDI\t %s: %016lx, si20: %d\n", Registers::Name(rd_reg()),
++                   rd(), si20());
++      int32_t si20_signExtend = static_cast<int32_t>(si20() << 12) >> 10;
++      int64_t current_pc = get_pc();
++      alu_out = static_cast<int64_t>(si20_signExtend) + current_pc;
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case PCALAU12I: {
++      printf_instr("PCALAU12I\t %s: %016lx, si20: %d\n",
++                   Registers::Name(rd_reg()), rd(), si20());
++      int32_t si20_signExtend = static_cast<int32_t>(si20() << 12);
++      int64_t current_pc = get_pc();
++      int64_t clear_lower12bit_mask = 0xFFFFFFFFFFFFF000;
++      alu_out = static_cast<int64_t>(si20_signExtend) + current_pc;
++      SetResult(rd_reg(), alu_out & clear_lower12bit_mask);
++      break;
++    }
++    case PCADDU12I: {
++      printf_instr("PCADDU12I\t %s: %016lx, si20: %d\n",
++                   Registers::Name(rd_reg()), rd(), si20());
++      int32_t si20_signExtend = static_cast<int32_t>(si20() << 12);
++      int64_t current_pc = get_pc();
++      alu_out = static_cast<int64_t>(si20_signExtend) + current_pc;
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case PCADDU18I: {
++      printf_instr("PCADDU18I\t %s: %016lx, si20: %d\n",
++                   Registers::Name(rd_reg()), rd(), si20());
++      int64_t si20_signExtend = (static_cast<int64_t>(si20()) << 44) >> 26;
++      int64_t current_pc = get_pc();
++      alu_out = si20_signExtend + current_pc;
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp8() {
++  int64_t addr = 0x0;
++  int64_t si14_se = (static_cast<int64_t>(si14()) << 50) >> 48;
++
++  switch (instr_.Bits(31, 24) << 24) {
++    case LDPTR_W:
++      printf_instr("LDPTR_W\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      set_register(rd_reg(), ReadW(rj() + si14_se, instr_.instr()));
++      break;
++    case STPTR_W:
++      printf_instr("STPTR_W\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      WriteW(rj() + si14_se, static_cast<int32_t>(rd()), instr_.instr());
++      break;
++    case LDPTR_D:
++      printf_instr("LDPTR_D\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      set_register(rd_reg(), Read2W(rj() + si14_se, instr_.instr()));
++      break;
++    case STPTR_D:
++      printf_instr("STPTR_D\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      Write2W(rj() + si14_se, rd(), instr_.instr());
++      break;
++    case LL_W: {
++      printf_instr("LL_W\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++      addr = si14_se + rj();
++      set_register(rd_reg(), ReadW(addr, instr_.instr()));
++      local_monitor_.NotifyLoadLinked(addr, TransactionSize::Word);
++      GlobalMonitor::Get()->NotifyLoadLinked_Locked(addr,
++                                                    &global_monitor_thread_);
++      break;
++    }
++    case SC_W: {
++      printf_instr("SC_W\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      addr = si14_se + rj();
++      WriteConditionalW(addr, static_cast<int32_t>(rd()), instr_.instr(),
++                        rd_reg());
++      break;
++    }
++    case LL_D: {
++      printf_instr("LL_D\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++      addr = si14_se + rj();
++      set_register(rd_reg(), Read2W(addr, instr_.instr()));
++      local_monitor_.NotifyLoadLinked(addr, TransactionSize::DoubleWord);
++      GlobalMonitor::Get()->NotifyLoadLinked_Locked(addr,
++                                                    &global_monitor_thread_);
++      break;
++    }
++    case SC_D: {
++      printf_instr("SC_D\t %s: %016lx, %s: %016lx, si14: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si14_se);
++      addr = si14_se + rj();
++      WriteConditional2W(addr, rd(), instr_.instr(), rd_reg());
++      break;
++    }
++    case CSR:
++      UNIMPLEMENTED();
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp10() {
++  int64_t alu_out = 0x0;
++  int64_t si12_se = (static_cast<int64_t>(si12()) << 52) >> 52;
++  uint64_t si12_ze = (static_cast<uint64_t>(ui12()) << 52) >> 52;
++
++  switch (instr_.Bits(31, 22) << 22) {
++    case BSTR_W: {
++      CHECK_EQ(instr_.Bit(21), 1);
++      uint8_t lsbw_ = lsbw();
++      uint8_t msbw_ = msbw();
++      CHECK_LE(lsbw_, msbw_);
++      uint8_t size = msbw_ - lsbw_ + 1;
++      uint64_t mask = (1ULL << size) - 1;
++      if (instr_.Bit(15) == 0) {
++        // BSTRINS_W
++        printf_instr(
++            "BSTRINS_W\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n",
++            Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(),
++            msbw_, lsbw_);
++        alu_out = static_cast<int32_t>((rd_u() & ~(mask << lsbw_)) |
++                                       ((rj_u() & mask) << lsbw_));
++      } else {
++        // BSTRPICK_W
++        printf_instr(
++            "BSTRPICK_W\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n",
++            Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(),
++            msbw_, lsbw_);
++        alu_out = static_cast<int32_t>((rj_u() & (mask << lsbw_)) >> lsbw_);
++      }
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BSTRINS_D: {
++      uint8_t lsbd_ = lsbd();
++      uint8_t msbd_ = msbd();
++      CHECK_LE(lsbd_, msbd_);
++      printf_instr(
++          "BSTRINS_D\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n",
++          Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(),
++          msbd_, lsbd_);
++      uint8_t size = msbd_ - lsbd_ + 1;
++      if (size < 64) {
++        uint64_t mask = (1ULL << size) - 1;
++        alu_out = (rd_u() & ~(mask << lsbd_)) | ((rj_u() & mask) << lsbd_);
++        SetResult(rd_reg(), alu_out);
++      } else if (size == 64) {
++        SetResult(rd_reg(), rj());
++      }
++      break;
++    }
++    case BSTRPICK_D: {
++      uint8_t lsbd_ = lsbd();
++      uint8_t msbd_ = msbd();
++      CHECK_LE(lsbd_, msbd_);
++      printf_instr(
++          "BSTRPICK_D\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n",
++          Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(),
++          msbd_, lsbd_);
++      uint8_t size = msbd_ - lsbd_ + 1;
++      if (size < 64) {
++        uint64_t mask = (1ULL << size) - 1;
++        alu_out = (rj_u() & (mask << lsbd_)) >> lsbd_;
++        SetResult(rd_reg(), alu_out);
++      } else if (size == 64) {
++        SetResult(rd_reg(), rj());
++      }
++      break;
++    }
++    case SLTI:
++      printf_instr("SLTI\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_se);
++      SetResult(rd_reg(), rj() < si12_se ? 1 : 0);
++      break;
++    case SLTUI:
++      printf_instr("SLTUI\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_se);
++      SetResult(rd_reg(), rj_u() < static_cast<uint64_t>(si12_se) ? 1 : 0);
++      break;
++    case ADDI_W: {
++      printf_instr("ADDI_W\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_se);
++      int32_t alu32_out =
++          static_cast<int32_t>(rj()) + static_cast<int32_t>(si12_se);
++      SetResult(rd_reg(), alu32_out);
++      break;
++    }
++    case ADDI_D:
++      printf_instr("ADDI_D\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_se);
++      SetResult(rd_reg(), rj() + si12_se);
++      break;
++    case LU52I_D: {
++      printf_instr("LU52I_D\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_se);
++      int64_t si12_se = static_cast<int64_t>(si12()) << 52;
++      uint64_t mask = (1ULL << 52) - 1;
++      alu_out = si12_se + (rj() & mask);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case ANDI:
++      printf_instr("ANDI\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      SetResult(rd_reg(), rj() & si12_ze);
++      break;
++    case ORI:
++      printf_instr("ORI\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      SetResult(rd_reg(), rj_u() | si12_ze);
++      break;
++    case XORI:
++      printf_instr("XORI\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      SetResult(rd_reg(), rj_u() ^ si12_ze);
++      break;
++    case LD_B:
++      printf_instr("LD_B\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadB(rj() + si12_se));
++      break;
++    case LD_H:
++      printf_instr("LD_H\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadH(rj() + si12_se, instr_.instr()));
++      break;
++    case LD_W:
++      printf_instr("LD_W\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadW(rj() + si12_se, instr_.instr()));
++      break;
++    case LD_D:
++      printf_instr("LD_D\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), Read2W(rj() + si12_se, instr_.instr()));
++      break;
++    case ST_B:
++      printf_instr("ST_B\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      WriteB(rj() + si12_se, static_cast<int8_t>(rd()));
++      break;
++    case ST_H:
++      printf_instr("ST_H\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      WriteH(rj() + si12_se, static_cast<int16_t>(rd()), instr_.instr());
++      break;
++    case ST_W:
++      printf_instr("ST_W\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      WriteW(rj() + si12_se, static_cast<int32_t>(rd()), instr_.instr());
++      break;
++    case ST_D:
++      printf_instr("ST_D\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      Write2W(rj() + si12_se, rd(), instr_.instr());
++      break;
++    case LD_BU:
++      printf_instr("LD_BU\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadBU(rj() + si12_se));
++      break;
++    case LD_HU:
++      printf_instr("LD_HU\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadHU(rj() + si12_se, instr_.instr()));
++      break;
++    case LD_WU:
++      printf_instr("LD_WU\t %s: %016lx, %s: %016lx, si12: %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), si12_ze);
++      set_register(rd_reg(), ReadWU(rj() + si12_se, instr_.instr()));
++      break;
++    case FLD_S: {
++      printf_instr("FLD_S\t %s: %016f, %s: %016lx, si12: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   Registers::Name(rj_reg()), rj(), si12_ze);
++      set_fpu_register(fd_reg(), kFPUInvalidResult);  // Trash upper 32 bits.
++      set_fpu_register_word(
++          fd_reg(), ReadW(rj() + si12_se, instr_.instr(), FLOAT_DOUBLE));
++      break;
++    }
++    case FST_S: {
++      printf_instr("FST_S\t %s: %016f, %s: %016lx, si12: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   Registers::Name(rj_reg()), rj(), si12_ze);
++      int32_t alu_out_32 = static_cast<int32_t>(get_fpu_register(fd_reg()));
++      WriteW(rj() + si12_se, alu_out_32, instr_.instr());
++      break;
++    }
++    case FLD_D: {
++      printf_instr("FLD_D\t %s: %016f, %s: %016lx, si12: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj(), si12_ze);
++      set_fpu_register_double(fd_reg(), ReadD(rj() + si12_se, instr_.instr()));
++      TraceMemRd(rj() + si12_se, get_fpu_register(fd_reg()), DOUBLE);
++      break;
++    }
++    case FST_D: {
++      printf_instr("FST_D\t %s: %016f, %s: %016lx, si12: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj(), si12_ze);
++      WriteD(rj() + si12_se, get_fpu_register_double(fd_reg()), instr_.instr());
++      TraceMemWr(rj() + si12_se, get_fpu_register(fd_reg()), DWORD);
++      break;
++    }
++    case PRELD:
++    case CACHE:
++      UNIMPLEMENTED();
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp12() {
++  switch (instr_.Bits(31, 20) << 20) {
++    case FMADD_S:
++      printf_instr("FMADD_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fk_reg()), fk_float(),
++                   FPURegisters::Name(fa_reg()), fa_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(), std::fma(fj_float(), fk_float(), fa_float()));
++      break;
++    case FMADD_D:
++      printf_instr("FMADD_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fk_reg()), fk_double(),
++                   FPURegisters::Name(fa_reg()), fa_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(),
++                         std::fma(fj_double(), fk_double(), fa_double()));
++      break;
++    case FMSUB_S:
++      printf_instr("FMSUB_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fk_reg()), fk_float(),
++                   FPURegisters::Name(fa_reg()), fa_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(),
++                        std::fma(fj_float(), fk_float(), -fa_float()));
++      break;
++    case FMSUB_D:
++      printf_instr("FMSUB_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fk_reg()), fk_double(),
++                   FPURegisters::Name(fa_reg()), fa_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(),
++                         std::fma(fj_double(), fk_double(), -fa_double()));
++      break;
++    case FNMADD_S:
++      printf_instr("FNMADD_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fk_reg()), fk_float(),
++                   FPURegisters::Name(fa_reg()), fa_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(),
++                        std::fma(-fj_float(), fk_float(), -fa_float()));
++      break;
++    case FNMADD_D:
++      printf_instr("FNMADD_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fk_reg()), fk_double(),
++                   FPURegisters::Name(fa_reg()), fa_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(),
++                         std::fma(-fj_double(), fk_double(), -fa_double()));
++      break;
++    case FNMSUB_S:
++      printf_instr("FNMSUB_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fk_reg()), fk_float(),
++                   FPURegisters::Name(fa_reg()), fa_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(),
++                        std::fma(-fj_float(), fk_float(), fa_float()));
++      break;
++    case FNMSUB_D:
++      printf_instr("FNMSUB_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fk_reg()), fk_double(),
++                   FPURegisters::Name(fa_reg()), fa_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(),
++                         std::fma(-fj_double(), fk_double(), fa_double()));
++      break;
++    case FCMP_COND_S: {
++      CHECK_EQ(instr_.Bits(4, 3), 0);
++      float fj = fj_float();
++      float fk = fk_float();
++      switch (cond()) {
++        case CAF: {
++          printf_instr("FCMP_CAF_S fcc%d\n", cd_reg());
++          set_cf_register(cd_reg(), false);
++          break;
++        }
++        case CUN: {
++          printf_instr("FCMP_CUN_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CEQ: {
++          printf_instr("FCMP_CEQ_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj == fk);
++          break;
++        }
++        case CUEQ: {
++          printf_instr("FCMP_CUEQ_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj == fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CLT: {
++          printf_instr("FCMP_CLT_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj < fk);
++          break;
++        }
++        case CULT: {
++          printf_instr("FCMP_CULT_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj < fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CLE: {
++          printf_instr("FCMP_CLE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj <= fk);
++          break;
++        }
++        case CULE: {
++          printf_instr("FCMP_CULE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj <= fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CNE: {
++          printf_instr("FCMP_CNE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), (fj < fk) || (fj > fk));
++          break;
++        }
++        case COR: {
++          printf_instr("FCMP_COR_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), !std::isnan(fj) && !std::isnan(fk));
++          break;
++        }
++        case CUNE: {
++          printf_instr("FCMP_CUNE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj != fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case SAF:
++        case SUN:
++        case SEQ:
++        case SUEQ:
++        case SLT:
++        case SULT:
++        case SLE:
++        case SULE:
++        case SNE:
++        case SOR:
++        case SUNE:
++          UNIMPLEMENTED();
++          break;
++        default:
++          UNREACHABLE();
++      }
++      break;
++    }
++    case FCMP_COND_D: {
++      CHECK_EQ(instr_.Bits(4, 3), 0);
++      double fj = fj_double();
++      double fk = fk_double();
++      switch (cond()) {
++        case CAF: {
++          printf_instr("FCMP_CAF_D fcc%d\n", cd_reg());
++          set_cf_register(cd_reg(), false);
++          break;
++        }
++        case CUN: {
++          printf_instr("FCMP_CUN_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CEQ: {
++          printf_instr("FCMP_CEQ_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj == fk);
++          break;
++        }
++        case CUEQ: {
++          printf_instr("FCMP_CUEQ_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj == fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CLT: {
++          printf_instr("FCMP_CLT_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj < fk);
++          break;
++        }
++        case CULT: {
++          printf_instr("FCMP_CULT_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj < fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CLE: {
++          printf_instr("FCMP_CLE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), fj <= fk);
++          break;
++        }
++        case CULE: {
++          printf_instr("FCMP_CULE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj <= fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case CNE: {
++          printf_instr("FCMP_CNE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), (fj < fk) || (fj > fk));
++          break;
++        }
++        case COR: {
++          printf_instr("FCMP_COR_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(), !std::isnan(fj) && !std::isnan(fk));
++          break;
++        }
++        case CUNE: {
++          printf_instr("FCMP_CUNE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(),
++                       FPURegisters::Name(fj_reg()), fj,
++                       FPURegisters::Name(fk_reg()), fk);
++          set_cf_register(cd_reg(),
++                          (fj != fk) || std::isnan(fj) || std::isnan(fk));
++          break;
++        }
++        case SAF:
++        case SUN:
++        case SEQ:
++        case SUEQ:
++        case SLT:
++        case SULT:
++        case SLE:
++        case SULE:
++        case SNE:
++        case SOR:
++        case SUNE:
++          UNIMPLEMENTED();
++          break;
++        default:
++          UNREACHABLE();
++      }
++      break;
++    }
++    case FSEL: {
++      CHECK_EQ(instr_.Bits(19, 18), 0);
++      printf_instr("FSEL fcc%d, %s: %016f, %s: %016f, %s: %016f\n", ca_reg(),
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      if (ca() == 0) {
++        SetFPUDoubleResult(fd_reg(), fj_double());
++      } else {
++        SetFPUDoubleResult(fd_reg(), fk_double());
++      }
++      break;
++    }
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp14() {
++  int64_t alu_out = 0x0;
++  int32_t alu32_out = 0x0;
++
++  switch (instr_.Bits(31, 18) << 18) {
++    case ALSL: {
++      uint8_t sa = sa2() + 1;
++      alu32_out =
++          (static_cast<int32_t>(rj()) << sa) + static_cast<int32_t>(rk());
++      if (instr_.Bit(17) == 0) {
++        // ALSL_W
++        printf_instr("ALSL_W\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), Registers::Name(rk_reg()), rk(), sa2());
++        SetResult(rd_reg(), alu32_out);
++      } else {
++        // ALSL_WU
++        printf_instr("ALSL_WU\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), Registers::Name(rk_reg()), rk(), sa2());
++        SetResult(rd_reg(), static_cast<uint32_t>(alu32_out));
++      }
++      break;
++    }
++    case BYTEPICK_W: {
++      CHECK_EQ(instr_.Bit(17), 0);
++      printf_instr("BYTEPICK_W\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk(), sa2());
++      uint8_t sa = sa2() * 8;
++      if (sa == 0) {
++        alu32_out = static_cast<int32_t>(rk());
++      } else {
++        int32_t mask = (1 << 31) >> (sa - 1);
++        int32_t rk_hi = (static_cast<int32_t>(rk()) & (~mask)) << sa;
++        int32_t rj_lo = (static_cast<uint32_t>(rj()) & mask) >> (32 - sa);
++        alu32_out = rk_hi | rj_lo;
++      }
++      SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      break;
++    }
++    case BYTEPICK_D: {
++      printf_instr("BYTEPICK_D\t %s: %016lx, %s: %016lx, %s: %016lx, sa3: %d\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk(), sa3());
++      uint8_t sa = sa3() * 8;
++      if (sa == 0) {
++        alu_out = rk();
++      } else {
++        int64_t mask = (1ULL << 63) >> (sa - 1);
++        int64_t rk_hi = (rk() & (~mask)) << sa;
++        int64_t rj_lo = (rj() & mask) >> (64 - sa);
++        alu_out = rk_hi | rj_lo;
++      }
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case ALSL_D: {
++      printf_instr("ALSL_D\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk(), sa2());
++      CHECK_EQ(instr_.Bit(17), 0);
++      uint8_t sa = sa2() + 1;
++      alu_out = (rj() << sa) + rk();
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case SLLI: {
++      DCHECK_EQ(instr_.Bit(17), 0);
++      if (instr_.Bits(17, 15) == 0b001) {
++        // SLLI_W
++        printf_instr("SLLI_W\t %s: %016lx, %s: %016lx, ui5: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui5());
++        alu32_out = static_cast<int32_t>(rj()) << ui5();
++        SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      } else if ((instr_.Bits(17, 16) == 0b01)) {
++        // SLLI_D
++        printf_instr("SLLI_D\t %s: %016lx, %s: %016lx, ui6: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui6());
++        SetResult(rd_reg(), rj() << ui6());
++      }
++      break;
++    }
++    case SRLI: {
++      DCHECK_EQ(instr_.Bit(17), 0);
++      if (instr_.Bits(17, 15) == 0b001) {
++        // SRLI_W
++        printf_instr("SRLI_W\t %s: %016lx, %s: %016lx, ui5: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui5());
++        alu32_out = static_cast<uint32_t>(rj()) >> ui5();
++        SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      } else if (instr_.Bits(17, 16) == 0b01) {
++        // SRLI_D
++        printf_instr("SRLI_D\t %s: %016lx, %s: %016lx, ui6: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui6());
++        SetResult(rd_reg(), rj_u() >> ui6());
++      }
++      break;
++    }
++    case SRAI: {
++      DCHECK_EQ(instr_.Bit(17), 0);
++      if (instr_.Bits(17, 15) == 0b001) {
++        // SRAI_W
++        printf_instr("SRAI_W\t %s: %016lx, %s: %016lx, ui5: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui5());
++        alu32_out = static_cast<int32_t>(rj()) >> ui5();
++        SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      } else if (instr_.Bits(17, 16) == 0b01) {
++        // SRAI_D
++        printf_instr("SRAI_D\t %s: %016lx, %s: %016lx, ui6: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui6());
++        SetResult(rd_reg(), rj() >> ui6());
++      }
++      break;
++    }
++    case ROTRI: {
++      DCHECK_EQ(instr_.Bit(17), 0);
++      if (instr_.Bits(17, 15) == 0b001) {
++        // ROTRI_W
++        printf_instr("ROTRI_W\t %s: %016lx, %s: %016lx, ui5: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui5());
++        alu32_out = static_cast<int32_t>(
++            base::bits::RotateRight32(static_cast<const uint32_t>(rj_u()),
++                                      static_cast<const uint32_t>(ui5())));
++        SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      } else if (instr_.Bits(17, 16) == 0b01) {
++        // ROTRI_D
++        printf_instr("ROTRI_D\t %s: %016lx, %s: %016lx, ui6: %d\n",
++                     Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                     rj(), ui6());
++        alu_out =
++            static_cast<int64_t>(base::bits::RotateRight64(rj_u(), ui6()));
++        SetResult(rd_reg(), alu_out);
++        printf_instr("ROTRI, %s, %s, %d\n", Registers::Name(rd_reg()),
++                     Registers::Name(rj_reg()), ui6());
++      }
++      break;
++    }
++    case LDDIR:
++    case LDPTE:
++      UNIMPLEMENTED();
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp17() {
++  int64_t alu_out;
++
++  switch (instr_.Bits(31, 15) << 15) {
++    case ADD_W: {
++      printf_instr("ADD_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int32_t alu32_out = static_cast<int32_t>(rj() + rk());
++      // Sign-extend result of 32bit operation into 64bit register.
++      SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      break;
++    }
++    case ADD_D:
++      printf_instr("ADD_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() + rk());
++      break;
++    case SUB_W: {
++      printf_instr("SUB_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int32_t alu32_out = static_cast<int32_t>(rj() - rk());
++      // Sign-extend result of 32bit operation into 64bit register.
++      SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
++      break;
++    }
++    case SUB_D:
++      printf_instr("SUB_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() - rk());
++      break;
++    case SLT:
++      printf_instr("SLT\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() < rk() ? 1 : 0);
++      break;
++    case SLTU:
++      printf_instr("SLTU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj_u() < rk_u() ? 1 : 0);
++      break;
++    case MASKEQZ:
++      printf_instr("MASKEQZ\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rk() == 0 ? rj() : 0);
++      break;
++    case MASKNEZ:
++      printf_instr("MASKNEZ\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rk() != 0 ? rj() : 0);
++      break;
++    case NOR:
++      printf_instr("NOR\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), ~(rj() | rk()));
++      break;
++    case AND:
++      printf_instr("AND\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() & rk());
++      break;
++    case OR:
++      printf_instr("OR\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() | rk());
++      break;
++    case XOR:
++      printf_instr("XOR\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() ^ rk());
++      break;
++    case ORN:
++      printf_instr("ORN\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() | (~rk()));
++      break;
++    case ANDN:
++      printf_instr("ANDN\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() & (~rk()));
++      break;
++    case SLL_W:
++      printf_instr("SLL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), (int32_t)rj() << (rk_u() % 32));
++      break;
++    case SRL_W: {
++      printf_instr("SRL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      alu_out = static_cast<int32_t>((uint32_t)rj_u() >> (rk_u() % 32));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case SRA_W:
++      printf_instr("SRA_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), (int32_t)rj() >> (rk_u() % 32));
++      break;
++    case SLL_D:
++      printf_instr("SLL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() << (rk_u() % 64));
++      break;
++    case SRL_D: {
++      printf_instr("SRL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      alu_out = static_cast<int64_t>(rj_u() >> (rk_u() % 64));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case SRA_D:
++      printf_instr("SRA_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() >> (rk_u() % 64));
++      break;
++    case ROTR_W: {
++      printf_instr("ROTR_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      alu_out = static_cast<int32_t>(
++          base::bits::RotateRight32(static_cast<const uint32_t>(rj_u()),
++                                    static_cast<const uint32_t>(rk_u() % 32)));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case ROTR_D: {
++      printf_instr("ROTR_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      alu_out = static_cast<int64_t>(
++          base::bits::RotateRight64((rj_u()), (rk_u() % 64)));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case MUL_W: {
++      printf_instr("MUL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      alu_out = static_cast<int32_t>(rj()) * static_cast<int32_t>(rk());
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case MULH_W: {
++      printf_instr("MULH_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int32_t rj_lo = static_cast<int32_t>(rj());
++      int32_t rk_lo = static_cast<int32_t>(rk());
++      alu_out = static_cast<int64_t>(rj_lo) * static_cast<int64_t>(rk_lo);
++      SetResult(rd_reg(), alu_out >> 32);
++      break;
++    }
++    case MULH_WU: {
++      printf_instr("MULH_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      uint32_t rj_lo = static_cast<uint32_t>(rj_u());
++      uint32_t rk_lo = static_cast<uint32_t>(rk_u());
++      alu_out = static_cast<uint64_t>(rj_lo) * static_cast<uint64_t>(rk_lo);
++      SetResult(rd_reg(), alu_out >> 32);
++      break;
++    }
++    case MUL_D:
++      printf_instr("MUL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), rj() * rk());
++      break;
++    case MULH_D:
++      printf_instr("MULH_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), MultiplyHighSigned(rj(), rk()));
++      break;
++    case MULH_DU:
++      printf_instr("MULH_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      SetResult(rd_reg(), MultiplyHighUnsigned(rj_u(), rk_u()));
++      break;
++    case MULW_D_W: {
++      printf_instr("MULW_D_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int64_t rj_i32 = static_cast<int32_t>(rj());
++      int64_t rk_i32 = static_cast<int32_t>(rk());
++      SetResult(rd_reg(), rj_i32 * rk_i32);
++      break;
++    }
++    case MULW_D_WU: {
++      printf_instr("MULW_D_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      uint64_t rj_u32 = static_cast<uint32_t>(rj_u());
++      uint64_t rk_u32 = static_cast<uint32_t>(rk_u());
++      SetResult(rd_reg(), rj_u32 * rk_u32);
++      break;
++    }
++    case DIV_W: {
++      printf_instr("DIV_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int32_t rj_i32 = static_cast<int32_t>(rj());
++      int32_t rk_i32 = static_cast<int32_t>(rk());
++      if (rj_i32 == INT_MIN && rk_i32 == -1) {
++        SetResult(rd_reg(), INT_MIN);
++      } else if (rk_i32 != 0) {
++        SetResult(rd_reg(), rj_i32 / rk_i32);
++      }
++      break;
++    }
++    case MOD_W: {
++      printf_instr("MOD_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      int32_t rj_i32 = static_cast<int32_t>(rj());
++      int32_t rk_i32 = static_cast<int32_t>(rk());
++      if (rj_i32 == INT_MIN && rk_i32 == -1) {
++        SetResult(rd_reg(), 0);
++      } else if (rk_i32 != 0) {
++        SetResult(rd_reg(), rj_i32 % rk_i32);
++      }
++      break;
++    }
++    case DIV_WU: {
++      printf_instr("DIV_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      uint32_t rj_u32 = static_cast<uint32_t>(rj());
++      uint32_t rk_u32 = static_cast<uint32_t>(rk());
++      if (rk_u32 != 0) {
++        SetResult(rd_reg(), static_cast<int32_t>(rj_u32 / rk_u32));
++      }
++      break;
++    }
++    case MOD_WU: {
++      printf_instr("MOD_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      uint32_t rj_u32 = static_cast<uint32_t>(rj());
++      uint32_t rk_u32 = static_cast<uint32_t>(rk());
++      if (rk_u32 != 0) {
++        SetResult(rd_reg(), static_cast<int32_t>(rj_u32 % rk_u32));
++      }
++      break;
++    }
++    case DIV_D: {
++      printf_instr("DIV_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      if (rj() == LONG_MIN && rk() == -1) {
++        SetResult(rd_reg(), LONG_MIN);
++      } else if (rk() != 0) {
++        SetResult(rd_reg(), rj() / rk());
++      }
++      break;
++    }
++    case MOD_D: {
++      printf_instr("MOD_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      if (rj() == LONG_MIN && rk() == -1) {
++        SetResult(rd_reg(), 0);
++      } else if (rk() != 0) {
++        SetResult(rd_reg(), rj() % rk());
++      }
++      break;
++    }
++    case DIV_DU: {
++      printf_instr("DIV_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      if (rk_u() != 0) {
++        SetResult(rd_reg(), static_cast<int64_t>(rj_u() / rk_u()));
++      }
++      break;
++    }
++    case MOD_DU: {
++      printf_instr("MOD_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      if (rk_u() != 0) {
++        SetResult(rd_reg(), static_cast<int64_t>(rj_u() % rk_u()));
++      }
++      break;
++    }
++    case BREAK:
++      printf_instr("BREAK\t code: %x\n", instr_.Bits(14, 0));
++      SoftwareInterrupt();
++      break;
++    case FADD_S: {
++      printf_instr("FADD_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(
++          fd_reg(),
++          FPUCanonalizeOperation([](float lhs, float rhs) { return lhs + rhs; },
++                                 fj_float(), fk_float()));
++      break;
++    }
++    case FADD_D: {
++      printf_instr("FADD_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(),
++                         FPUCanonalizeOperation(
++                             [](double lhs, double rhs) { return lhs + rhs; },
++                             fj_double(), fk_double()));
++      break;
++    }
++    case FSUB_S: {
++      printf_instr("FSUB_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(
++          fd_reg(),
++          FPUCanonalizeOperation([](float lhs, float rhs) { return lhs - rhs; },
++                                 fj_float(), fk_float()));
++      break;
++    }
++    case FSUB_D: {
++      printf_instr("FSUB_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(),
++                         FPUCanonalizeOperation(
++                             [](double lhs, double rhs) { return lhs - rhs; },
++                             fj_double(), fk_double()));
++      break;
++    }
++    case FMUL_S: {
++      printf_instr("FMUL_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(
++          fd_reg(),
++          FPUCanonalizeOperation([](float lhs, float rhs) { return lhs * rhs; },
++                                 fj_float(), fk_float()));
++      break;
++    }
++    case FMUL_D: {
++      printf_instr("FMUL_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(),
++                         FPUCanonalizeOperation(
++                             [](double lhs, double rhs) { return lhs * rhs; },
++                             fj_double(), fk_double()));
++      break;
++    }
++    case FDIV_S: {
++      printf_instr("FDIV_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(
++          fd_reg(),
++          FPUCanonalizeOperation([](float lhs, float rhs) { return lhs / rhs; },
++                                 fj_float(), fk_float()));
++      break;
++    }
++    case FDIV_D: {
++      printf_instr("FDIV_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(),
++                         FPUCanonalizeOperation(
++                             [](double lhs, double rhs) { return lhs / rhs; },
++                             fj_double(), fk_double()));
++      break;
++    }
++    case FMAX_S:
++      printf_instr("FMAX_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(fd_reg(), FPUMax(fk_float(), fj_float()));
++      break;
++    case FMAX_D:
++      printf_instr("FMAX_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(), FPUMax(fk_double(), fj_double()));
++      break;
++    case FMIN_S:
++      printf_instr("FMIN_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(fd_reg(), FPUMin(fk_float(), fj_float()));
++      break;
++    case FMIN_D:
++      printf_instr("FMIN_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(), FPUMin(fk_double(), fj_double()));
++      break;
++    case FMAXA_S:
++      printf_instr("FMAXA_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(fd_reg(), FPUMaxA(fk_float(), fj_float()));
++      break;
++    case FMAXA_D:
++      printf_instr("FMAXA_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(), FPUMaxA(fk_double(), fj_double()));
++      break;
++    case FMINA_S:
++      printf_instr("FMINA_S\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float(),
++                   FPURegisters::Name(fk_reg()), fk_float());
++      SetFPUFloatResult(fd_reg(), FPUMinA(fk_float(), fj_float()));
++      break;
++    case FMINA_D:
++      printf_instr("FMINA_D\t %s: %016f, %s, %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double(),
++                   FPURegisters::Name(fk_reg()), fk_double());
++      SetFPUDoubleResult(fd_reg(), FPUMinA(fk_double(), fj_double()));
++      break;
++    case LDX_B:
++      printf_instr("LDX_B\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadB(rj() + rk()));
++      break;
++    case LDX_H:
++      printf_instr("LDX_H\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadH(rj() + rk(), instr_.instr()));
++      break;
++    case LDX_W:
++      printf_instr("LDX_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadW(rj() + rk(), instr_.instr()));
++      break;
++    case LDX_D:
++      printf_instr("LDX_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), Read2W(rj() + rk(), instr_.instr()));
++      break;
++    case STX_B:
++      printf_instr("STX_B\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      WriteB(rj() + rk(), static_cast<int8_t>(rd()));
++      break;
++    case STX_H:
++      printf_instr("STX_H\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      WriteH(rj() + rk(), static_cast<int16_t>(rd()), instr_.instr());
++      break;
++    case STX_W:
++      printf_instr("STX_W\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      WriteW(rj() + rk(), static_cast<int32_t>(rd()), instr_.instr());
++      break;
++    case STX_D:
++      printf_instr("STX_D\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      Write2W(rj() + rk(), rd(), instr_.instr());
++      break;
++    case LDX_BU:
++      printf_instr("LDX_BU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadBU(rj() + rk()));
++      break;
++    case LDX_HU:
++      printf_instr("LDX_HU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadHU(rj() + rk(), instr_.instr()));
++      break;
++    case LDX_WU:
++      printf_instr("LDX_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj(), Registers::Name(rk_reg()), rk());
++      set_register(rd_reg(), ReadWU(rj() + rk(), instr_.instr()));
++      break;
++    case PRELDX:
++      printf("Sim UNIMPLEMENTED: PRELDX\n");
++      UNIMPLEMENTED();
++      break;
++    case FLDX_S:
++      printf_instr("FLDX_S\t %s: %016f, %s: %016lx, %s: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()),
++                   rk());
++      set_fpu_register(fd_reg(), kFPUInvalidResult);  // Trash upper 32 bits.
++      set_fpu_register_word(fd_reg(),
++                            ReadW(rj() + rk(), instr_.instr(), FLOAT_DOUBLE));
++      break;
++    case FLDX_D:
++      printf_instr("FLDX_D\t %s: %016f, %s: %016lx, %s: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()),
++                   rk());
++      set_fpu_register_double(fd_reg(), ReadD(rj() + rk(), instr_.instr()));
++      break;
++    case FSTX_S:
++      printf_instr("FSTX_S\t %s: %016f, %s: %016lx, %s: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()),
++                   rk());
++      WriteW(rj() + rk(), static_cast<int32_t>(get_fpu_register(fd_reg())),
++             instr_.instr());
++      break;
++    case FSTX_D:
++      printf_instr("FSTX_D\t %s: %016f, %s: %016lx, %s: %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()),
++                   rk());
++      WriteD(rj() + rk(), get_fpu_register_double(fd_reg()), instr_.instr());
++      break;
++    case ASRTLE_D:
++      printf("Sim UNIMPLEMENTED: ASRTLE_D\n");
++      UNIMPLEMENTED();
++      break;
++    case ASRTGT_D:
++      printf("Sim UNIMPLEMENTED: ASRTGT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case DBGCALL:
++      printf("Sim UNIMPLEMENTED: DBGCALL\n");
++      UNIMPLEMENTED();
++      break;
++    case SYSCALL:
++      printf("Sim UNIMPLEMENTED: SYSCALL\n");
++      UNIMPLEMENTED();
++      break;
++    case HYPCALL:
++      printf("Sim UNIMPLEMENTED: HYPCALL\n");
++      UNIMPLEMENTED();
++      break;
++    case AMSWAP_W:
++      printf("Sim UNIMPLEMENTED: AMSWAP_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMSWAP_D:
++      printf("Sim UNIMPLEMENTED: AMSWAP_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMADD_W:
++      printf("Sim UNIMPLEMENTED: AMADD_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMADD_D:
++      printf("Sim UNIMPLEMENTED: AMADD_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMAND_W:
++      printf("Sim UNIMPLEMENTED: AMAND_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMAND_D:
++      printf("Sim UNIMPLEMENTED: AMAND_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMOR_W:
++      printf("Sim UNIMPLEMENTED: AMOR_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMOR_D:
++      printf("Sim UNIMPLEMENTED: AMOR_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMXOR_W:
++      printf("Sim UNIMPLEMENTED: AMXOR_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMXOR_D:
++      printf("Sim UNIMPLEMENTED: AMXOR_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_W:
++      printf("Sim UNIMPLEMENTED: AMMAX_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_D:
++      printf("Sim UNIMPLEMENTED: AMMAX_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_W:
++      printf("Sim UNIMPLEMENTED: AMMIN_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_D:
++      printf("Sim UNIMPLEMENTED: AMMIN_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_WU:
++      printf("Sim UNIMPLEMENTED: AMMAX_WU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_DU:
++      printf("Sim UNIMPLEMENTED: AMMAX_DU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_WU:
++      printf("Sim UNIMPLEMENTED: AMMIN_WU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_DU:
++      printf("Sim UNIMPLEMENTED: AMMIN_DU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMSWAP_DB_W: {
++      printf_instr("AMSWAP_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int32_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), ReadW(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditionalW(rj(), static_cast<int32_t>(rk()), instr_.instr(),
++                          rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMSWAP_DB_D: {
++      printf_instr("AMSWAP_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int64_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), Read2W(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditional2W(rj(), rk(), instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMADD_DB_W: {
++      printf_instr("AMADD_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int32_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), ReadW(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditionalW(rj(),
++                          static_cast<int32_t>(static_cast<int32_t>(rk()) +
++                                               static_cast<int32_t>(rd())),
++                          instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMADD_DB_D: {
++      printf_instr("AMADD_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int64_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), Read2W(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditional2W(rj(), rk() + rd(), instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMAND_DB_W: {
++      printf_instr("AMAND_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int32_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), ReadW(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditionalW(rj(),
++                          static_cast<int32_t>(static_cast<int32_t>(rk()) &
++                                               static_cast<int32_t>(rd())),
++                          instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMAND_DB_D: {
++      printf_instr("AMAND_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int64_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), Read2W(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditional2W(rj(), rk() & rd(), instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMOR_DB_W: {
++      printf_instr("AMOR_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int32_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), ReadW(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditionalW(rj(),
++                          static_cast<int32_t>(static_cast<int32_t>(rk()) |
++                                               static_cast<int32_t>(rd())),
++                          instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMOR_DB_D: {
++      printf_instr("AMOR_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int64_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), Read2W(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditional2W(rj(), rk() | rd(), instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMXOR_DB_W: {
++      printf_instr("AMXOR_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int32_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), ReadW(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditionalW(rj(),
++                          static_cast<int32_t>(static_cast<int32_t>(rk()) ^
++                                               static_cast<int32_t>(rd())),
++                          instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMXOR_DB_D: {
++      printf_instr("AMXOR_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()),
++                   rk(), Registers::Name(rj_reg()), rj());
++      int64_t rdvalue;
++      do {
++        {
++          base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
++          set_register(rd_reg(), Read2W(rj(), instr_.instr()));
++          local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord);
++          GlobalMonitor::Get()->NotifyLoadLinked_Locked(
++              rj(), &global_monitor_thread_);
++        }
++        rdvalue = get_register(rd_reg());
++        WriteConditional2W(rj(), rk() ^ rd(), instr_.instr(), rd_reg());
++      } while (!get_register(rd_reg()));
++      set_register(rd_reg(), rdvalue);
++    } break;
++    case AMMAX_DB_W:
++      printf("Sim UNIMPLEMENTED: AMMAX_DB_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_DB_D:
++      printf("Sim UNIMPLEMENTED: AMMAX_DB_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_DB_W:
++      printf("Sim UNIMPLEMENTED: AMMIN_DB_W\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_DB_D:
++      printf("Sim UNIMPLEMENTED: AMMIN_DB_D\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_DB_WU:
++      printf("Sim UNIMPLEMENTED: AMMAX_DB_WU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMAX_DB_DU:
++      printf("Sim UNIMPLEMENTED: AMMAX_DB_DU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_DB_WU:
++      printf("Sim UNIMPLEMENTED: AMMIN_DB_WU\n");
++      UNIMPLEMENTED();
++      break;
++    case AMMIN_DB_DU:
++      printf("Sim UNIMPLEMENTED: AMMIN_DB_DU\n");
++      UNIMPLEMENTED();
++      break;
++    case DBAR:
++      printf_instr("DBAR\n");
++      break;
++    case IBAR:
++      printf("Sim UNIMPLEMENTED: IBAR\n");
++      UNIMPLEMENTED();
++      break;
++    case FLDGT_S:
++      printf("Sim UNIMPLEMENTED: FLDGT_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FLDGT_D:
++      printf("Sim UNIMPLEMENTED: FLDGT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FLDLE_S:
++      printf("Sim UNIMPLEMENTED: FLDLE_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FLDLE_D:
++      printf("Sim UNIMPLEMENTED: FLDLE_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FSTGT_S:
++      printf("Sim UNIMPLEMENTED: FSTGT_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FSTGT_D:
++      printf("Sim UNIMPLEMENTED: FSTGT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FSTLE_S:
++      printf("Sim UNIMPLEMENTED: FSTLE_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FSTLE_D:
++      printf("Sim UNIMPLEMENTED: FSTLE_D\n");
++      UNIMPLEMENTED();
++      break;
++    case LDGT_B:
++      printf("Sim UNIMPLEMENTED: LDGT_B\n");
++      UNIMPLEMENTED();
++      break;
++    case LDGT_H:
++      printf("Sim UNIMPLEMENTED: LDGT_H\n");
++      UNIMPLEMENTED();
++      break;
++    case LDGT_W:
++      printf("Sim UNIMPLEMENTED: LDGT_W\n");
++      UNIMPLEMENTED();
++      break;
++    case LDGT_D:
++      printf("Sim UNIMPLEMENTED: LDGT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case LDLE_B:
++      printf("Sim UNIMPLEMENTED: LDLT_B\n");
++      UNIMPLEMENTED();
++      break;
++    case LDLE_H:
++      printf("Sim UNIMPLEMENTED: LDLE_H\n");
++      UNIMPLEMENTED();
++      break;
++    case LDLE_W:
++      printf("Sim UNIMPLEMENTED: LDLE_W\n");
++      UNIMPLEMENTED();
++      break;
++    case LDLE_D:
++      printf("Sim UNIMPLEMENTED: LDLE_D\n");
++      UNIMPLEMENTED();
++      break;
++    case STGT_B:
++      printf("Sim UNIMPLEMENTED: STGT_B\n");
++      UNIMPLEMENTED();
++      break;
++    case STGT_H:
++      printf("Sim UNIMPLEMENTED: STGT_H\n");
++      UNIMPLEMENTED();
++      break;
++    case STGT_W:
++      printf("Sim UNIMPLEMENTED: STGT_W\n");
++      UNIMPLEMENTED();
++      break;
++    case STGT_D:
++      printf("Sim UNIMPLEMENTED: STGT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case STLE_B:
++      printf("Sim UNIMPLEMENTED: STLE_B\n");
++      UNIMPLEMENTED();
++      break;
++    case STLE_H:
++      printf("Sim UNIMPLEMENTED: STLE_H\n");
++      UNIMPLEMENTED();
++      break;
++    case STLE_W:
++      printf("Sim UNIMPLEMENTED: STLE_W\n");
++      UNIMPLEMENTED();
++      break;
++    case STLE_D:
++      printf("Sim UNIMPLEMENTED: STLE_D\n");
++      UNIMPLEMENTED();
++      break;
++    case WAIT_INVTLB:
++      printf("Sim UNIMPLEMENTED: WAIT_INVTLB\n");
++      UNIMPLEMENTED();
++      break;
++    case FSCALEB_S:
++      printf("Sim UNIMPLEMENTED: FSCALEB_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FSCALEB_D:
++      printf("Sim UNIMPLEMENTED: FSCALEB_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FCOPYSIGN_S:
++      printf("Sim UNIMPLEMENTED: FCOPYSIGN_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FCOPYSIGN_D:
++      printf("Sim UNIMPLEMENTED: FCOPYSIGN_D\n");
++      UNIMPLEMENTED();
++      break;
++    case CRC_W_B_W:
++      printf("Sim UNIMPLEMENTED: CRC_W_B_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRC_W_H_W:
++      printf("Sim UNIMPLEMENTED: CRC_W_H_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRC_W_W_W:
++      printf("Sim UNIMPLEMENTED: CRC_W_W_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRC_W_D_W:
++      printf("Sim UNIMPLEMENTED: CRC_W_D_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRCC_W_B_W:
++      printf("Sim UNIMPLEMENTED: CRCC_W_B_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRCC_W_H_W:
++      printf("Sim UNIMPLEMENTED: CRCC_W_H_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRCC_W_W_W:
++      printf("Sim UNIMPLEMENTED: CRCC_W_W_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CRCC_W_D_W:
++      printf("Sim UNIMPLEMENTED: CRCC_W_D_W\n");
++      UNIMPLEMENTED();
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void Simulator::DecodeTypeOp22() {
++  int64_t alu_out;
++
++  switch (instr_.Bits(31, 10) << 10) {
++    case CLZ_W: {
++      printf_instr("CLZ_W\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      alu_out = base::bits::CountLeadingZeros32(static_cast<int32_t>(rj_u()));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case CTZ_W: {
++      printf_instr("CTZ_W\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      alu_out = base::bits::CountTrailingZeros32(static_cast<int32_t>(rj_u()));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case CLZ_D: {
++      printf_instr("CLZ_D\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      alu_out = base::bits::CountLeadingZeros64(static_cast<int64_t>(rj_u()));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case CTZ_D: {
++      printf_instr("CTZ_D\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      alu_out = base::bits::CountTrailingZeros64(static_cast<int64_t>(rj_u()));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVB_2H: {
++      printf_instr("REVB_2H\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint32_t input = static_cast<uint32_t>(rj());
++      uint64_t output = 0;
++
++      uint32_t mask = 0xFF000000;
++      for (int i = 0; i < 4; i++) {
++        uint32_t tmp = mask & input;
++        if (i % 2 == 0) {
++          tmp = tmp >> 8;
++        } else {
++          tmp = tmp << 8;
++        }
++        output = output | tmp;
++        mask = mask >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(static_cast<int32_t>(output));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVB_4H: {
++      printf_instr("REVB_4H\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++
++      uint64_t mask = 0xFF00000000000000;
++      for (int i = 0; i < 8; i++) {
++        uint64_t tmp = mask & input;
++        if (i % 2 == 0) {
++          tmp = tmp >> 8;
++        } else {
++          tmp = tmp << 8;
++        }
++        output = output | tmp;
++        mask = mask >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVB_2W: {
++      printf_instr("REVB_2W\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++
++      uint64_t mask = 0xFF000000FF000000;
++      for (int i = 0; i < 4; i++) {
++        uint64_t tmp = mask & input;
++        if (i <= 1) {
++          tmp = tmp >> (24 - i * 16);
++        } else {
++          tmp = tmp << (i * 16 - 24);
++        }
++        output = output | tmp;
++        mask = mask >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVB_D: {
++      printf_instr("REVB_D\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++
++      uint64_t mask = 0xFF00000000000000;
++      for (int i = 0; i < 8; i++) {
++        uint64_t tmp = mask & input;
++        if (i <= 3) {
++          tmp = tmp >> (56 - i * 16);
++        } else {
++          tmp = tmp << (i * 16 - 56);
++        }
++        output = output | tmp;
++        mask = mask >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVH_2W: {
++      printf_instr("REVH_2W\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++
++      uint64_t mask = 0xFFFF000000000000;
++      for (int i = 0; i < 4; i++) {
++        uint64_t tmp = mask & input;
++        if (i % 2 == 0) {
++          tmp = tmp >> 16;
++        } else {
++          tmp = tmp << 16;
++        }
++        output = output | tmp;
++        mask = mask >> 16;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case REVH_D: {
++      printf_instr("REVH_D\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++
++      uint64_t mask = 0xFFFF000000000000;
++      for (int i = 0; i < 4; i++) {
++        uint64_t tmp = mask & input;
++        if (i <= 1) {
++          tmp = tmp >> (48 - i * 32);
++        } else {
++          tmp = tmp << (i * 32 - 48);
++        }
++        output = output | tmp;
++        mask = mask >> 16;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BITREV_4B: {
++      printf_instr("BITREV_4B\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint32_t input = static_cast<uint32_t>(rj());
++      uint32_t output = 0;
++      uint8_t i_byte, o_byte;
++
++      // Reverse the bit in byte for each individual byte
++      for (int i = 0; i < 4; i++) {
++        output = output >> 8;
++        i_byte = input & 0xFF;
++
++        // Fast way to reverse bits in byte
++        // Devised by Sean Anderson, July 13, 2001
++        o_byte = static_cast<uint8_t>(((i_byte * 0x0802LU & 0x22110LU) |
++                                       (i_byte * 0x8020LU & 0x88440LU)) *
++                                          0x10101LU >>
++                                      16);
++
++        output = output | (static_cast<uint32_t>(o_byte << 24));
++        input = input >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(static_cast<int32_t>(output));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BITREV_8B: {
++      printf_instr("BITREV_8B\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint64_t input = rj_u();
++      uint64_t output = 0;
++      uint8_t i_byte, o_byte;
++
++      // Reverse the bit in byte for each individual byte
++      for (int i = 0; i < 8; i++) {
++        output = output >> 8;
++        i_byte = input & 0xFF;
++
++        // Fast way to reverse bits in byte
++        // Devised by Sean Anderson, July 13, 2001
++        o_byte = static_cast<uint8_t>(((i_byte * 0x0802LU & 0x22110LU) |
++                                       (i_byte * 0x8020LU & 0x88440LU)) *
++                                          0x10101LU >>
++                                      16);
++
++        output = output | (static_cast<uint64_t>(o_byte) << 56);
++        input = input >> 8;
++      }
++
++      alu_out = static_cast<int64_t>(output);
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BITREV_W: {
++      printf_instr("BITREV_W\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint32_t input = static_cast<uint32_t>(rj());
++      uint32_t output = 0;
++      output = base::bits::ReverseBits(input);
++      alu_out = static_cast<int64_t>(static_cast<int32_t>(output));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case BITREV_D: {
++      printf_instr("BITREV_D\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      alu_out = static_cast<int64_t>(base::bits::ReverseBits(rj_u()));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case EXT_W_B: {
++      printf_instr("EXT_W_B\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint8_t input = static_cast<uint8_t>(rj());
++      alu_out = static_cast<int64_t>(static_cast<int8_t>(input));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case EXT_W_H: {
++      printf_instr("EXT_W_H\t %s: %016lx, %s, %016lx\n",
++                   Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()),
++                   rj());
++      uint16_t input = static_cast<uint16_t>(rj());
++      alu_out = static_cast<int64_t>(static_cast<int16_t>(input));
++      SetResult(rd_reg(), alu_out);
++      break;
++    }
++    case FABS_S:
++      printf_instr("FABS_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(), std::abs(fj_float()));
++      break;
++    case FABS_D:
++      printf_instr("FABS_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(), std::abs(fj_double()));
++      break;
++    case FNEG_S:
++      printf_instr("FNEG_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(), -fj_float());
++      break;
++    case FNEG_D:
++      printf_instr("FNEG_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUDoubleResult(fd_reg(), -fj_double());
++      break;
++    case FSQRT_S: {
++      printf_instr("FSQRT_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      if (fj_float() >= 0) {
++        SetFPUFloatResult(fd_reg(), std::sqrt(fj_float()));
++      } else {
++        SetFPUFloatResult(fd_reg(), std::sqrt(-1));  // qnan
++        set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++      }
++      break;
++    }
++    case FSQRT_D: {
++      printf_instr("FSQRT_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      if (fj_double() >= 0) {
++        SetFPUDoubleResult(fd_reg(), std::sqrt(fj_double()));
++      } else {
++        SetFPUDoubleResult(fd_reg(), std::sqrt(-1));  // qnan
++        set_fcsr_bit(kFCSRInvalidOpFlagBit, true);
++      }
++      break;
++    }
++    case FMOV_S:
++      printf_instr("FMOV_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUFloatResult(fd_reg(), fj_float());
++      break;
++    case FMOV_D:
++      printf_instr("FMOV_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_float(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUDoubleResult(fd_reg(), fj_double());
++      break;
++    case MOVGR2FR_W: {
++      printf_instr("MOVGR2FR_W\t %s: %016f, %s, %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj());
++      set_fpu_register_word(fd_reg(), static_cast<int32_t>(rj()));
++      TraceRegWr(get_fpu_register(fd_reg()), FLOAT_DOUBLE);
++      break;
++    }
++    case MOVGR2FR_D:
++      printf_instr("MOVGR2FR_D\t %s: %016f, %s, %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj());
++      SetFPUResult2(fd_reg(), rj());
++      break;
++    case MOVGR2FRH_W: {
++      printf_instr("MOVGR2FRH_W\t %s: %016f, %s, %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   Registers::Name(rj_reg()), rj());
++      set_fpu_register_hi_word(fd_reg(), static_cast<int32_t>(rj()));
++      TraceRegWr(get_fpu_register(fd_reg()), DOUBLE);
++      break;
++    }
++    case MOVFR2GR_S: {
++      printf_instr("MOVFR2GR_S\t %s: %016lx, %s, %016f\n",
++                   Registers::Name(rd_reg()), rd(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      set_register(rd_reg(),
++                   static_cast<int64_t>(get_fpu_register_word(fj_reg())));
++      TraceRegWr(get_register(rd_reg()), WORD_DWORD);
++      break;
++    }
++    case MOVFR2GR_D:
++      printf_instr("MOVFR2GR_D\t %s: %016lx, %s, %016f\n",
++                   Registers::Name(rd_reg()), rd(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetResult(rd_reg(), get_fpu_register(fj_reg()));
++      break;
++    case MOVFRH2GR_S:
++      printf_instr("MOVFRH2GR_S\t %s: %016lx, %s, %016f\n",
++                   Registers::Name(rd_reg()), rd(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetResult(rd_reg(), get_fpu_register_hi_word(fj_reg()));
++      break;
++    case MOVGR2FCSR: {
++      printf_instr("MOVGR2FCSR\t fcsr: %016x, %s, %016lx\n", FCSR_,
++                   Registers::Name(rj_reg()), rj());
++      // fcsr could be 0-3
++      CHECK_LT(rd_reg(), 4);
++      FCSR_ = static_cast<uint32_t>(rj());
++      TraceRegWr(FCSR_);
++      break;
++    }
++    case MOVFCSR2GR: {
++      printf_instr("MOVFCSR2GR\t %s, %016lx, FCSR: %016x\n",
++                   Registers::Name(rd_reg()), rd(), FCSR_);
++      // fcsr could be 0-3
++      CHECK_LT(rj_reg(), 4);
++      SetResult(rd_reg(), FCSR_);
++      break;
++    }
++    case FCVT_S_D:
++      printf_instr("FCVT_S_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      SetFPUFloatResult(fd_reg(), static_cast<float>(fj_double()));
++      break;
++    case FCVT_D_S:
++      printf_instr("FCVT_D_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      SetFPUDoubleResult(fd_reg(), static_cast<double>(fj_float()));
++      break;
++    case FTINTRM_W_S: {
++      printf_instr("FTINTRM_W_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::floor(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRM_W_D: {
++      printf_instr("FTINTRM_W_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::floor(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRM_L_S: {
++      printf_instr("FTINTRM_L_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::floor(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRM_L_D: {
++      printf_instr("FTINTRM_L_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::floor(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRP_W_S: {
++      printf_instr("FTINTRP_W_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::ceil(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRP_W_D: {
++      printf_instr("FTINTRP_W_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::ceil(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRP_L_S: {
++      printf_instr("FTINTRP_L_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::ceil(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRP_L_D: {
++      printf_instr("FTINTRP_L_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::ceil(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRZ_W_S: {
++      printf_instr("FTINTRZ_W_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::trunc(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRZ_W_D: {
++      printf_instr("FTINTRZ_W_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::trunc(fj);
++      int32_t result = static_cast<int32_t>(rounded);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRZ_L_S: {
++      printf_instr("FTINTRZ_L_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::trunc(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRZ_L_D: {
++      printf_instr("FTINTRZ_L_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::trunc(fj);
++      int64_t result = static_cast<int64_t>(rounded);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRNE_W_S: {
++      printf_instr("FTINTRNE_W_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::floor(fj + 0.5);
++      int32_t result = static_cast<int32_t>(rounded);
++      if ((result & 1) != 0 && result - fj == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        result--;
++      }
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRNE_W_D: {
++      printf_instr("FTINTRNE_W_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::floor(fj + 0.5);
++      int32_t result = static_cast<int32_t>(rounded);
++      if ((result & 1) != 0 && result - fj == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        result--;
++      }
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRNE_L_S: {
++      printf_instr("FTINTRNE_L_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded = std::floor(fj + 0.5);
++      int64_t result = static_cast<int64_t>(rounded);
++      if ((result & 1) != 0 && result - fj == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        result--;
++      }
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINTRNE_L_D: {
++      printf_instr("FTINTRNE_L_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded = std::floor(fj + 0.5);
++      int64_t result = static_cast<int64_t>(rounded);
++      if ((result & 1) != 0 && result - fj == 0.5) {
++        // If the number is halfway between two integers,
++        // round to the even one.
++        result--;
++      }
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINT_W_S: {
++      printf_instr("FTINT_W_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded;
++      int32_t result;
++      round_according_to_fcsr(fj, &rounded, &result);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINT_W_D: {
++      printf_instr("FTINT_W_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded;
++      int32_t result;
++      round_according_to_fcsr(fj, &rounded, &result);
++      SetFPUWordResult(fd_reg(), result);
++      if (set_fcsr_round_error(fj, rounded)) {
++        set_fpu_register_word_invalid_result(fj, rounded);
++      }
++      break;
++    }
++    case FTINT_L_S: {
++      printf_instr("FTINT_L_S\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float rounded;
++      int64_t result;
++      round64_according_to_fcsr(fj, &rounded, &result);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FTINT_L_D: {
++      printf_instr("FTINT_L_D\t %s: %016f, %s, %016f\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double rounded;
++      int64_t result;
++      round64_according_to_fcsr(fj, &rounded, &result);
++      SetFPUResult(fd_reg(), result);
++      if (set_fcsr_round64_error(fj, rounded)) {
++        set_fpu_register_invalid_result64(fj, rounded);
++      }
++      break;
++    }
++    case FFINT_S_W: {
++      alu_out = get_fpu_register_signed_word(fj_reg());
++      printf_instr("FFINT_S_W\t %s: %016f, %s, %016x\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), (int)alu_out);
++      SetFPUFloatResult(fd_reg(), static_cast<float>(alu_out));
++      break;
++    }
++    case FFINT_S_L: {
++      alu_out = get_fpu_register(fj_reg());
++      printf_instr("FFINT_S_L\t %s: %016f, %s, %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), alu_out);
++      SetFPUFloatResult(fd_reg(), static_cast<float>(alu_out));
++      break;
++    }
++    case FFINT_D_W: {
++      alu_out = get_fpu_register_signed_word(fj_reg());
++      printf_instr("FFINT_D_W\t %s: %016f, %s, %016x\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), (int)alu_out);
++      SetFPUDoubleResult(fd_reg(), static_cast<double>(alu_out));
++      break;
++    }
++    case FFINT_D_L: {
++      alu_out = get_fpu_register(fj_reg());
++      printf_instr("FFINT_D_L\t %s: %016f, %s, %016lx\n",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), alu_out);
++      SetFPUDoubleResult(fd_reg(), static_cast<double>(alu_out));
++      break;
++    }
++    case FRINT_S: {
++      printf_instr("FRINT_S\t %s: %016f, %s, %016f mode : ",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_float());
++      float fj = fj_float();
++      float result, temp_result;
++      double temp;
++      float upper = std::ceil(fj);
++      float lower = std::floor(fj);
++      switch (get_fcsr_rounding_mode()) {
++        case kRoundToNearest:
++          printf_instr(" kRoundToNearest\n");
++          if (upper - fj < fj - lower) {
++            result = upper;
++          } else if (upper - fj > fj - lower) {
++            result = lower;
++          } else {
++            temp_result = upper / 2;
++            float reminder = std::modf(temp_result, &temp);
++            if (reminder == 0) {
++              result = upper;
++            } else {
++              result = lower;
++            }
++          }
++          break;
++        case kRoundToZero:
++          printf_instr(" kRoundToZero\n");
++          result = (fj > 0 ? lower : upper);
++          break;
++        case kRoundToPlusInf:
++          printf_instr(" kRoundToPlusInf\n");
++          result = upper;
++          break;
++        case kRoundToMinusInf:
++          printf_instr(" kRoundToMinusInf\n");
++          result = lower;
++          break;
++      }
++      SetFPUFloatResult(fd_reg(), result);
++      if (result != fj) {
++        set_fcsr_bit(kFCSRInexactFlagBit, true);
++      }
++      break;
++    }
++    case FRINT_D: {
++      printf_instr("FRINT_D\t %s: %016f, %s, %016f mode : ",
++                   FPURegisters::Name(fd_reg()), fd_double(),
++                   FPURegisters::Name(fj_reg()), fj_double());
++      double fj = fj_double();
++      double result, temp, temp_result;
++      double upper = std::ceil(fj);
++      double lower = std::floor(fj);
++      switch (get_fcsr_rounding_mode()) {
++        case kRoundToNearest:
++          printf_instr(" kRoundToNearest\n");
++          if (upper - fj < fj - lower) {
++            result = upper;
++          } else if (upper - fj > fj - lower) {
++            result = lower;
++          } else {
++            temp_result = upper / 2;
++            double reminder = std::modf(temp_result, &temp);
++            if (reminder == 0) {
++              result = upper;
++            } else {
++              result = lower;
++            }
++          }
++          break;
++        case kRoundToZero:
++          printf_instr(" kRoundToZero\n");
++          result = (fj > 0 ? lower : upper);
++          break;
++        case kRoundToPlusInf:
++          printf_instr(" kRoundToPlusInf\n");
++          result = upper;
++          break;
++        case kRoundToMinusInf:
++          printf_instr(" kRoundToMinusInf\n");
++          result = lower;
++          break;
++      }
++      SetFPUDoubleResult(fd_reg(), result);
++      if (result != fj) {
++        set_fcsr_bit(kFCSRInexactFlagBit, true);
++      }
++      break;
++    }
++    case MOVFR2CF:
++      printf("Sim UNIMPLEMENTED: MOVFR2CF\n");
++      UNIMPLEMENTED();
++      break;
++    case MOVCF2FR:
++      printf("Sim UNIMPLEMENTED: MOVCF2FR\n");
++      UNIMPLEMENTED();
++      break;
++    case MOVGR2CF:
++      printf_instr("MOVGR2CF\t FCC%d, %s: %016lx\n", cd_reg(),
++                   Registers::Name(rj_reg()), rj());
++      set_cf_register(cd_reg(), rj() & 1);
++      break;
++    case MOVCF2GR:
++      printf_instr("MOVCF2GR\t %s: %016lx, FCC%d\n", Registers::Name(rd_reg()),
++                   rd(), cj_reg());
++      SetResult(rd_reg(), cj());
++      break;
++    case FRECIP_S:
++      printf("Sim UNIMPLEMENTED: FRECIP_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FRECIP_D:
++      printf("Sim UNIMPLEMENTED: FRECIP_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FRSQRT_S:
++      printf("Sim UNIMPLEMENTED: FRSQRT_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FRSQRT_D:
++      printf("Sim UNIMPLEMENTED: FRSQRT_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FCLASS_S:
++      printf("Sim UNIMPLEMENTED: FCLASS_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FCLASS_D:
++      printf("Sim UNIMPLEMENTED: FCLASS_D\n");
++      UNIMPLEMENTED();
++      break;
++    case FLOGB_S:
++      printf("Sim UNIMPLEMENTED: FLOGB_S\n");
++      UNIMPLEMENTED();
++      break;
++    case FLOGB_D:
++      printf("Sim UNIMPLEMENTED: FLOGB_D\n");
++      UNIMPLEMENTED();
++      break;
++    case CLO_W:
++      printf("Sim UNIMPLEMENTED: CLO_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CTO_W:
++      printf("Sim UNIMPLEMENTED: CTO_W\n");
++      UNIMPLEMENTED();
++      break;
++    case CLO_D:
++      printf("Sim UNIMPLEMENTED: CLO_D\n");
++      UNIMPLEMENTED();
++      break;
++    case CTO_D:
++      printf("Sim UNIMPLEMENTED: CTO_D\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRRD_B:
++      printf("Sim UNIMPLEMENTED: IOCSRRD_B\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRRD_H:
++      printf("Sim UNIMPLEMENTED: IOCSRRD_H\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRRD_W:
++      printf("Sim UNIMPLEMENTED: IOCSRRD_W\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRRD_D:
++      printf("Sim UNIMPLEMENTED: IOCSRRD_D\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRWR_B:
++      printf("Sim UNIMPLEMENTED: IOCSRWR_B\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRWR_H:
++      printf("Sim UNIMPLEMENTED: IOCSRWR_H\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRWR_W:
++      printf("Sim UNIMPLEMENTED: IOCSRWR_W\n");
++      UNIMPLEMENTED();
++      break;
++    case IOCSRWR_D:
++      printf("Sim UNIMPLEMENTED: IOCSRWR_D\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBINV:
++      printf("Sim UNIMPLEMENTED: TLBINV\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBFLUSH:
++      printf("Sim UNIMPLEMENTED: TLBFLUSH\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBP:
++      printf("Sim UNIMPLEMENTED: TLBP\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBR:
++      printf("Sim UNIMPLEMENTED: TLBR\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBWI:
++      printf("Sim UNIMPLEMENTED: TLBWI\n");
++      UNIMPLEMENTED();
++      break;
++    case TLBWR:
++      printf("Sim UNIMPLEMENTED: TLBWR\n");
++      UNIMPLEMENTED();
++      break;
++    case ERET:
++      printf("Sim UNIMPLEMENTED: ERET\n");
++      UNIMPLEMENTED();
++      break;
++    case RDTIMEL_W:
++      printf("Sim UNIMPLEMENTED: RDTIMEL_W\n");
++      UNIMPLEMENTED();
++      break;
++    case RDTIMEH_W:
++      printf("Sim UNIMPLEMENTED: RDTIMEH_W\n");
++      UNIMPLEMENTED();
++      break;
++    case RDTIME_D:
++      printf("Sim UNIMPLEMENTED: RDTIME_D\n");
++      // case CPUCFG:
++      // TODO
++      UNIMPLEMENTED();
++      break;
++    // Unimplemented opcodes raised an error in the configuration step before,
++    // so we can use the default here to set the destination register in common
++    // cases.
++    default:
++      UNREACHABLE();
++  }
++}
++
++// Executes the current instruction.
++void Simulator::InstructionDecode(Instruction* instr) {
++  if (v8::internal::FLAG_check_icache) {
++    CheckICache(i_cache(), instr);
++  }
++  pc_modified_ = false;
++
++  v8::internal::EmbeddedVector<char, 256> buffer;
++
++  if (::v8::internal::FLAG_trace_sim) {
++    SNPrintF(trace_buf_, " ");
++    disasm::NameConverter converter;
++    disasm::Disassembler dasm(converter);
++    // Use a reasonably large buffer.
++    dasm.InstructionDecode(buffer, reinterpret_cast<byte*>(instr));
++  }
++
++  static int instr_count = 0;
++  USE(instr_count);
++  instr_ = instr;
++  printf_instr("\nInstr%3d: %08x, PC: %016lx\t", instr_count++,
++               instr_.Bits(31, 0), get_pc());
++  switch (instr_.InstructionType()) {
++    case Instruction::kOp6Type:
++      DecodeTypeOp6();
++      break;
++    case Instruction::kOp7Type:
++      DecodeTypeOp7();
++      break;
++    case Instruction::kOp8Type:
++      DecodeTypeOp8();
++      break;
++    case Instruction::kOp10Type:
++      DecodeTypeOp10();
++      break;
++    case Instruction::kOp12Type:
++      DecodeTypeOp12();
++      break;
++    case Instruction::kOp14Type:
++      DecodeTypeOp14();
++      break;
++    case Instruction::kOp17Type:
++      DecodeTypeOp17();
++      break;
++    case Instruction::kOp22Type:
++      DecodeTypeOp22();
++      break;
++    default: {
++      printf("instr_: %x\n", instr_.Bits(31, 0));
++      UNREACHABLE();
++    }
++  }
++
++  if (::v8::internal::FLAG_trace_sim) {
++    PrintF("  0x%08" PRIxPTR "   %-44s   %s\n",
++           reinterpret_cast<intptr_t>(instr), buffer.begin(),
++           trace_buf_.begin());
++  }
++
++  if (!pc_modified_) {
++    set_register(pc, reinterpret_cast<int64_t>(instr) + kInstrSize);
++  }
++}
++
++void Simulator::Execute() {
++  // Get the PC to simulate. Cannot use the accessor here as we need the
++  // raw PC value and not the one used as input to arithmetic instructions.
++  int64_t program_counter = get_pc();
++  if (::v8::internal::FLAG_stop_sim_at == 0) {
++    // Fast version of the dispatch loop without checking whether the simulator
++    // should be stopping at a particular executed instruction.
++    while (program_counter != end_sim_pc) {
++      Instruction* instr = reinterpret_cast<Instruction*>(program_counter);
++      icount_++;
++      InstructionDecode(instr);
++      program_counter = get_pc();
++    }
++  } else {
++    // FLAG_stop_sim_at is at the non-default value. Stop in the debugger when
++    // we reach the particular instruction count.
++    while (program_counter != end_sim_pc) {
++      Instruction* instr = reinterpret_cast<Instruction*>(program_counter);
++      icount_++;
++      if (icount_ == static_cast<int64_t>(::v8::internal::FLAG_stop_sim_at)) {
++        La64Debugger dbg(this);
++        dbg.Debug();
++      } else {
++        InstructionDecode(instr);
++      }
++      program_counter = get_pc();
++    }
++  }
++}
++
++void Simulator::CallInternal(Address entry) {
++  // Adjust JS-based stack limit to C-based stack limit.
++  isolate_->stack_guard()->AdjustStackLimitForSimulator();
++
++  // Prepare to execute the code at entry.
++  set_register(pc, static_cast<int64_t>(entry));
++  // Put down marker for end of simulation. The simulator will stop simulation
++  // when the PC reaches this value. By saving the "end simulation" value into
++  // the LR the simulation stops when returning to this call point.
++  set_register(ra, end_sim_pc);
++
++  // Remember the values of callee-saved registers.
++  int64_t s0_val = get_register(s0);
++  int64_t s1_val = get_register(s1);
++  int64_t s2_val = get_register(s2);
++  int64_t s3_val = get_register(s3);
++  int64_t s4_val = get_register(s4);
++  int64_t s5_val = get_register(s5);
++  int64_t s6_val = get_register(s6);
++  int64_t s7_val = get_register(s7);
++  int64_t s8_val = get_register(s8);
++  int64_t gp_val = get_register(gp);
++  int64_t sp_val = get_register(sp);
++  int64_t tp_val = get_register(tp);
++  int64_t fp_val = get_register(fp);
++
++  // Set up the callee-saved registers with a known value. To be able to check
++  // that they are preserved properly across JS execution.
++  int64_t callee_saved_value = icount_;
++  set_register(s0, callee_saved_value);
++  set_register(s1, callee_saved_value);
++  set_register(s2, callee_saved_value);
++  set_register(s3, callee_saved_value);
++  set_register(s4, callee_saved_value);
++  set_register(s5, callee_saved_value);
++  set_register(s6, callee_saved_value);
++  set_register(s7, callee_saved_value);
++  set_register(s8, callee_saved_value);
++  set_register(gp, callee_saved_value);
++  set_register(tp, callee_saved_value);
++  set_register(fp, callee_saved_value);
++
++  // Start the simulation.
++  Execute();
++
++  // Check that the callee-saved registers have been preserved.
++  CHECK_EQ(callee_saved_value, get_register(s0));
++  CHECK_EQ(callee_saved_value, get_register(s1));
++  CHECK_EQ(callee_saved_value, get_register(s2));
++  CHECK_EQ(callee_saved_value, get_register(s3));
++  CHECK_EQ(callee_saved_value, get_register(s4));
++  CHECK_EQ(callee_saved_value, get_register(s5));
++  CHECK_EQ(callee_saved_value, get_register(s6));
++  CHECK_EQ(callee_saved_value, get_register(s7));
++  CHECK_EQ(callee_saved_value, get_register(s8));
++  CHECK_EQ(callee_saved_value, get_register(gp));
++  CHECK_EQ(callee_saved_value, get_register(tp));
++  CHECK_EQ(callee_saved_value, get_register(fp));
++
++  // Restore callee-saved registers with the original value.
++  set_register(s0, s0_val);
++  set_register(s1, s1_val);
++  set_register(s2, s2_val);
++  set_register(s3, s3_val);
++  set_register(s4, s4_val);
++  set_register(s5, s5_val);
++  set_register(s6, s6_val);
++  set_register(s7, s7_val);
++  set_register(s8, s8_val);
++  set_register(gp, gp_val);
++  set_register(sp, sp_val);
++  set_register(tp, tp_val);
++  set_register(fp, fp_val);
++}
++
++intptr_t Simulator::CallImpl(Address entry, int argument_count,
++                             const intptr_t* arguments) {
++  constexpr int kRegisterPassedArguments = 8;
++  // Set up arguments.
++
++  int reg_arg_count = std::min(kRegisterPassedArguments, argument_count);
++  if (reg_arg_count > 0) set_register(a0, arguments[0]);
++  if (reg_arg_count > 1) set_register(a1, arguments[1]);
++  if (reg_arg_count > 2) set_register(a2, arguments[2]);
++  if (reg_arg_count > 3) set_register(a3, arguments[3]);
++  if (reg_arg_count > 4) set_register(a4, arguments[4]);
++  if (reg_arg_count > 5) set_register(a5, arguments[5]);
++  if (reg_arg_count > 6) set_register(a6, arguments[6]);
++  if (reg_arg_count > 7) set_register(a7, arguments[7]);
++
++  // Remaining arguments passed on stack.
++  int64_t original_stack = get_register(sp);
++  // Compute position of stack on entry to generated code.
++  int stack_args_count = argument_count - reg_arg_count;
++  int stack_args_size = stack_args_count * sizeof(*arguments) + kCArgsSlotsSize;
++  int64_t entry_stack = original_stack - stack_args_size;
++
++  if (base::OS::ActivationFrameAlignment() != 0) {
++    entry_stack &= -base::OS::ActivationFrameAlignment();
++  }
++  // Store remaining arguments on stack, from low to high memory.
++  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
++  memcpy(stack_argument + kCArgSlotCount, arguments + reg_arg_count,
++         stack_args_count * sizeof(*arguments));
++  set_register(sp, entry_stack);
++
++  CallInternal(entry);
++
++  // Pop stack passed arguments.
++  CHECK_EQ(entry_stack, get_register(sp));
++  set_register(sp, original_stack);
++
++  return get_register(v0);
++}
++
++double Simulator::CallFP(Address entry, double d0, double d1) {
++  const FPURegister fparg2 = f1;
++  set_fpu_register_double(f0, d0);
++  set_fpu_register_double(fparg2, d1);
++  CallInternal(entry);
++  return get_fpu_register_double(f0);
++}
++
++uintptr_t Simulator::PushAddress(uintptr_t address) {
++  int64_t new_sp = get_register(sp) - sizeof(uintptr_t);
++  uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);
++  *stack_slot = address;
++  set_register(sp, new_sp);
++  return new_sp;
++}
++
++uintptr_t Simulator::PopAddress() {
++  int64_t current_sp = get_register(sp);
++  uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(current_sp);
++  uintptr_t address = *stack_slot;
++  set_register(sp, current_sp + sizeof(uintptr_t));
++  return address;
++}
++
++Simulator::LocalMonitor::LocalMonitor()
++    : access_state_(MonitorAccess::Open),
++      tagged_addr_(0),
++      size_(TransactionSize::None) {}
++
++void Simulator::LocalMonitor::Clear() {
++  access_state_ = MonitorAccess::Open;
++  tagged_addr_ = 0;
++  size_ = TransactionSize::None;
++}
++
++void Simulator::LocalMonitor::NotifyLoad() {
++  if (access_state_ == MonitorAccess::RMW) {
++    // A non linked load could clear the local monitor. As a result, it's
++    // most strict to unconditionally clear the local monitor on load.
++    Clear();
++  }
++}
++
++void Simulator::LocalMonitor::NotifyLoadLinked(uintptr_t addr,
++                                               TransactionSize size) {
++  access_state_ = MonitorAccess::RMW;
++  tagged_addr_ = addr;
++  size_ = size;
++}
++
++void Simulator::LocalMonitor::NotifyStore() {
++  if (access_state_ == MonitorAccess::RMW) {
++    // A non exclusive store could clear the local monitor. As a result, it's
++    // most strict to unconditionally clear the local monitor on store.
++    Clear();
++  }
++}
++
++bool Simulator::LocalMonitor::NotifyStoreConditional(uintptr_t addr,
++                                                     TransactionSize size) {
++  if (access_state_ == MonitorAccess::RMW) {
++    if (addr == tagged_addr_ && size_ == size) {
++      Clear();
++      return true;
++    } else {
++      return false;
++    }
++  } else {
++    DCHECK(access_state_ == MonitorAccess::Open);
++    return false;
++  }
++}
++
++Simulator::GlobalMonitor::LinkedAddress::LinkedAddress()
++    : access_state_(MonitorAccess::Open),
++      tagged_addr_(0),
++      next_(nullptr),
++      prev_(nullptr),
++      failure_counter_(0) {}
++
++void Simulator::GlobalMonitor::LinkedAddress::Clear_Locked() {
++  access_state_ = MonitorAccess::Open;
++  tagged_addr_ = 0;
++}
++
++void Simulator::GlobalMonitor::LinkedAddress::NotifyLoadLinked_Locked(
++    uintptr_t addr) {
++  access_state_ = MonitorAccess::RMW;
++  tagged_addr_ = addr;
++}
++
++void Simulator::GlobalMonitor::LinkedAddress::NotifyStore_Locked() {
++  if (access_state_ == MonitorAccess::RMW) {
++    // A non exclusive store could clear the global monitor. As a result, it's
++    // most strict to unconditionally clear global monitors on store.
++    Clear_Locked();
++  }
++}
++
++bool Simulator::GlobalMonitor::LinkedAddress::NotifyStoreConditional_Locked(
++    uintptr_t addr, bool is_requesting_thread) {
++  if (access_state_ == MonitorAccess::RMW) {
++    if (is_requesting_thread) {
++      if (addr == tagged_addr_) {
++        Clear_Locked();
++        // Introduce occasional sc/scd failures. This is to simulate the
++        // behavior of hardware, which can randomly fail due to background
++        // cache evictions.
++        if (failure_counter_++ >= kMaxFailureCounter) {
++          failure_counter_ = 0;
++          return false;
++        } else {
++          return true;
++        }
++      }
++    } else if ((addr & kExclusiveTaggedAddrMask) ==
++               (tagged_addr_ & kExclusiveTaggedAddrMask)) {
++      // Check the masked addresses when responding to a successful lock by
++      // another thread so the implementation is more conservative (i.e. the
++      // granularity of locking is as large as possible.)
++      Clear_Locked();
++      return false;
++    }
++  }
++  return false;
++}
++
++void Simulator::GlobalMonitor::NotifyLoadLinked_Locked(
++    uintptr_t addr, LinkedAddress* linked_address) {
++  linked_address->NotifyLoadLinked_Locked(addr);
++  PrependProcessor_Locked(linked_address);
++}
++
++void Simulator::GlobalMonitor::NotifyStore_Locked(
++    LinkedAddress* linked_address) {
++  // Notify each thread of the store operation.
++  for (LinkedAddress* iter = head_; iter; iter = iter->next_) {
++    iter->NotifyStore_Locked();
++  }
++}
++
++bool Simulator::GlobalMonitor::NotifyStoreConditional_Locked(
++    uintptr_t addr, LinkedAddress* linked_address) {
++  DCHECK(IsProcessorInLinkedList_Locked(linked_address));
++  if (linked_address->NotifyStoreConditional_Locked(addr, true)) {
++    // Notify the other processors that this StoreConditional succeeded.
++    for (LinkedAddress* iter = head_; iter; iter = iter->next_) {
++      if (iter != linked_address) {
++        iter->NotifyStoreConditional_Locked(addr, false);
++      }
++    }
++    return true;
++  } else {
++    return false;
++  }
++}
++
++bool Simulator::GlobalMonitor::IsProcessorInLinkedList_Locked(
++    LinkedAddress* linked_address) const {
++  return head_ == linked_address || linked_address->next_ ||
++         linked_address->prev_;
++}
++
++void Simulator::GlobalMonitor::PrependProcessor_Locked(
++    LinkedAddress* linked_address) {
++  if (IsProcessorInLinkedList_Locked(linked_address)) {
++    return;
++  }
++
++  if (head_) {
++    head_->prev_ = linked_address;
++  }
++  linked_address->prev_ = nullptr;
++  linked_address->next_ = head_;
++  head_ = linked_address;
++}
++
++void Simulator::GlobalMonitor::RemoveLinkedAddress(
++    LinkedAddress* linked_address) {
++  base::MutexGuard lock_guard(&mutex);
++  if (!IsProcessorInLinkedList_Locked(linked_address)) {
++    return;
++  }
++
++  if (linked_address->prev_) {
++    linked_address->prev_->next_ = linked_address->next_;
++  } else {
++    head_ = linked_address->next_;
++  }
++  if (linked_address->next_) {
++    linked_address->next_->prev_ = linked_address->prev_;
++  }
++  linked_address->prev_ = nullptr;
++  linked_address->next_ = nullptr;
++}
++
++#undef SScanF
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // USE_SIMULATOR
+diff --git a/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h
+new file mode 100644
+index 00000000000..de2d1b0d89a
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h
+@@ -0,0 +1,646 @@
++// Copyright 2020 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++// Declares a Simulator for loongisa instructions if we are not generating a
++// native loongisa binary. This Simulator allows us to run and debug loongisa
++// code generation on regular desktop machines. V8 calls into generated code via
++// the GeneratedCode wrapper, which will start execution in the Simulator or
++// forwards to the real entry on a loongisa HW platform.
++
++#ifndef V8_EXECUTION_LA64_SIMULATOR_LA64_H_
++#define V8_EXECUTION_LA64_SIMULATOR_LA64_H_
++
++// globals.h defines USE_SIMULATOR.
++#include "src/common/globals.h"
++
++template <typename T>
++int Compare(const T& a, const T& b) {
++  if (a == b)
++    return 0;
++  else if (a < b)
++    return -1;
++  else
++    return 1;
++}
++
++// Returns the negative absolute value of its argument.
++template <typename T,
++          typename = typename std::enable_if<std::is_signed<T>::value>::type>
++T Nabs(T a) {
++  return a < 0 ? a : -a;
++}
++
++#if defined(USE_SIMULATOR)
++// Running with a simulator.
++
++#include "src/base/hashmap.h"
++#include "src/codegen/assembler.h"
++#include "src/codegen/la64/constants-la64.h"
++#include "src/execution/simulator-base.h"
++#include "src/utils/allocation.h"
++
++namespace v8 {
++namespace internal {
++
++// -----------------------------------------------------------------------------
++// Utility functions
++
++class CachePage {
++ public:
++  static const int LINE_VALID = 0;
++  static const int LINE_INVALID = 1;
++
++  static const int kPageShift = 12;
++  static const int kPageSize = 1 << kPageShift;
++  static const int kPageMask = kPageSize - 1;
++  static const int kLineShift = 2;  // The cache line is only 4 bytes right now.
++  static const int kLineLength = 1 << kLineShift;
++  static const int kLineMask = kLineLength - 1;
++
++  CachePage() { memset(&validity_map_, LINE_INVALID, sizeof(validity_map_)); }
++
++  char* ValidityByte(int offset) {
++    return &validity_map_[offset >> kLineShift];
++  }
++
++  char* CachedData(int offset) { return &data_[offset]; }
++
++ private:
++  char data_[kPageSize];  // The cached data.
++  static const int kValidityMapSize = kPageSize >> kLineShift;
++  char validity_map_[kValidityMapSize];  // One byte per line.
++};
++
++class SimInstructionBase : public InstructionBase {
++ public:
++  Type InstructionType() const { return type_; }
++  inline Instruction* instr() const { return instr_; }
++  inline int32_t operand() const { return operand_; }
++
++ protected:
++  SimInstructionBase() : operand_(-1), instr_(nullptr), type_(kUnsupported) {}
++  explicit SimInstructionBase(Instruction* instr) {}
++
++  int32_t operand_;
++  Instruction* instr_;
++  Type type_;
++
++ private:
++  DISALLOW_ASSIGN(SimInstructionBase);
++};
++
++class SimInstruction : public InstructionGetters<SimInstructionBase> {
++ public:
++  SimInstruction() {}
++
++  explicit SimInstruction(Instruction* instr) { *this = instr; }
++
++  SimInstruction& operator=(Instruction* instr) {
++    operand_ = *reinterpret_cast<const int32_t*>(instr);
++    instr_ = instr;
++    type_ = InstructionBase::InstructionType();
++    DCHECK(reinterpret_cast<void*>(&operand_) == this);
++    return *this;
++  }
++};
++
++class Simulator : public SimulatorBase {
++ public:
++  friend class La64Debugger;
++
++  // Registers are declared in order.
++  enum Register {
++    no_reg = -1,
++    zero_reg = 0,
++    ra,
++    gp,
++    sp,
++    a0,
++    a1,
++    a2,
++    a3,
++    a4,
++    a5,
++    a6,
++    a7,
++    t0,
++    t1,
++    t2,
++    t3,
++    t4,
++    t5,
++    t6,
++    t7,
++    t8,
++    tp,
++    fp,
++    s0,
++    s1,
++    s2,
++    s3,
++    s4,
++    s5,
++    s6,
++    s7,
++    s8,
++    pc,  // pc must be the last register.
++    kNumSimuRegisters,
++    // aliases
++    v0 = a0,
++    v1 = a1
++  };
++
++  // Condition flag registers.
++  enum CFRegister {
++    fcc0,
++    fcc1,
++    fcc2,
++    fcc3,
++    fcc4,
++    fcc5,
++    fcc6,
++    fcc7,
++    kNumCFRegisters
++  };
++
++  // Floating point registers.
++  enum FPURegister {
++    f0,
++    f1,
++    f2,
++    f3,
++    f4,
++    f5,
++    f6,
++    f7,
++    f8,
++    f9,
++    f10,
++    f11,
++    f12,
++    f13,
++    f14,
++    f15,
++    f16,
++    f17,
++    f18,
++    f19,
++    f20,
++    f21,
++    f22,
++    f23,
++    f24,
++    f25,
++    f26,
++    f27,
++    f28,
++    f29,
++    f30,
++    f31,
++    kNumFPURegisters
++  };
++
++  explicit Simulator(Isolate* isolate);
++  ~Simulator();
++
++  // The currently executing Simulator instance. Potentially there can be one
++  // for each native thread.
++  V8_EXPORT_PRIVATE static Simulator* current(v8::internal::Isolate* isolate);
++
++  // Accessors for register state. Reading the pc value adheres to the LA64
++  // architecture specification and is off by a 8 from the currently executing
++  // instruction.
++  void set_register(int reg, int64_t value);
++  void set_register_word(int reg, int32_t value);
++  void set_dw_register(int dreg, const int* dbl);
++  int64_t get_register(int reg) const;
++  double get_double_from_register_pair(int reg);
++  // Same for FPURegisters.
++  void set_fpu_register(int fpureg, int64_t value);
++  void set_fpu_register_word(int fpureg, int32_t value);
++  void set_fpu_register_hi_word(int fpureg, int32_t value);
++  void set_fpu_register_float(int fpureg, float value);
++  void set_fpu_register_double(int fpureg, double value);
++  void set_fpu_register_invalid_result64(float original, float rounded);
++  void set_fpu_register_invalid_result(float original, float rounded);
++  void set_fpu_register_word_invalid_result(float original, float rounded);
++  void set_fpu_register_invalid_result64(double original, double rounded);
++  void set_fpu_register_invalid_result(double original, double rounded);
++  void set_fpu_register_word_invalid_result(double original, double rounded);
++  int64_t get_fpu_register(int fpureg) const;
++  int32_t get_fpu_register_word(int fpureg) const;
++  int32_t get_fpu_register_signed_word(int fpureg) const;
++  int32_t get_fpu_register_hi_word(int fpureg) const;
++  float get_fpu_register_float(int fpureg) const;
++  double get_fpu_register_double(int fpureg) const;
++  void set_cf_register(int cfreg, bool value);
++  bool get_cf_register(int cfreg) const;
++  void set_fcsr_rounding_mode(FPURoundingMode mode);
++  unsigned int get_fcsr_rounding_mode();
++  void set_fcsr_bit(uint32_t cc, bool value);
++  bool test_fcsr_bit(uint32_t cc);
++  bool set_fcsr_round_error(double original, double rounded);
++  bool set_fcsr_round64_error(double original, double rounded);
++  bool set_fcsr_round_error(float original, float rounded);
++  bool set_fcsr_round64_error(float original, float rounded);
++  void round_according_to_fcsr(double toRound, double* rounded,
++                               int32_t* rounded_int);
++  void round64_according_to_fcsr(double toRound, double* rounded,
++                                 int64_t* rounded_int);
++  void round_according_to_fcsr(float toRound, float* rounded,
++                               int32_t* rounded_int);
++  void round64_according_to_fcsr(float toRound, float* rounded,
++                                 int64_t* rounded_int);
++  // Special case of set_register and get_register to access the raw PC value.
++  void set_pc(int64_t value);
++  int64_t get_pc() const;
++
++  Address get_sp() const { return static_cast<Address>(get_register(sp)); }
++
++  // Accessor to the internal simulator stack area.
++  uintptr_t StackLimit(uintptr_t c_limit) const;
++
++  // Executes LA64 instructions until the PC reaches end_sim_pc.
++  void Execute();
++
++  template <typename Return, typename... Args>
++  Return Call(Address entry, Args... args) {
++    return VariadicCall<Return>(this, &Simulator::CallImpl, entry, args...);
++  }
++
++  // Alternative: call a 2-argument double function.
++  double CallFP(Address entry, double d0, double d1);
++
++  // Push an address onto the JS stack.
++  uintptr_t PushAddress(uintptr_t address);
++
++  // Pop an address from the JS stack.
++  uintptr_t PopAddress();
++
++  // Debugger input.
++  void set_last_debugger_input(char* input);
++  char* last_debugger_input() { return last_debugger_input_; }
++
++  // Redirection support.
++  static void SetRedirectInstruction(Instruction* instruction);
++
++  // ICache checking.
++  static bool ICacheMatch(void* one, void* two);
++  static void FlushICache(base::CustomMatcherHashMap* i_cache, void* start,
++                          size_t size);
++
++  // Returns true if pc register contains one of the 'special_values' defined
++  // below (bad_ra, end_sim_pc).
++  bool has_bad_pc() const;
++
++ private:
++  enum special_values {
++    // Known bad pc value to ensure that the simulator does not execute
++    // without being properly setup.
++    bad_ra = -1,
++    // A pc value used to signal the simulator to stop execution.  Generally
++    // the ra is set to this value on transition from native C code to
++    // simulated execution, so that the simulator can "return" to the native
++    // C code.
++    end_sim_pc = -2,
++    // Unpredictable value.
++    Unpredictable = 0xbadbeaf
++  };
++
++  V8_EXPORT_PRIVATE intptr_t CallImpl(Address entry, int argument_count,
++                                      const intptr_t* arguments);
++
++  // Unsupported instructions use Format to print an error and stop execution.
++  void Format(Instruction* instr, const char* format);
++
++  // Helpers for data value tracing.
++  enum TraceType {
++    BYTE,
++    HALF,
++    WORD,
++    DWORD,
++    FLOAT,
++    DOUBLE,
++    FLOAT_DOUBLE,
++    WORD_DWORD
++  };
++
++  // Read and write memory.
++  inline uint32_t ReadBU(int64_t addr);
++  inline int32_t ReadB(int64_t addr);
++  inline void WriteB(int64_t addr, uint8_t value);
++  inline void WriteB(int64_t addr, int8_t value);
++
++  inline uint16_t ReadHU(int64_t addr, Instruction* instr);
++  inline int16_t ReadH(int64_t addr, Instruction* instr);
++  // Note: Overloaded on the sign of the value.
++  inline void WriteH(int64_t addr, uint16_t value, Instruction* instr);
++  inline void WriteH(int64_t addr, int16_t value, Instruction* instr);
++
++  inline uint32_t ReadWU(int64_t addr, Instruction* instr);
++  inline int32_t ReadW(int64_t addr, Instruction* instr, TraceType t = WORD);
++  inline void WriteW(int64_t addr, int32_t value, Instruction* instr);
++  void WriteConditionalW(int64_t addr, int32_t value, Instruction* instr,
++                         int32_t rt_reg);
++  inline int64_t Read2W(int64_t addr, Instruction* instr);
++  inline void Write2W(int64_t addr, int64_t value, Instruction* instr);
++  inline void WriteConditional2W(int64_t addr, int64_t value,
++                                 Instruction* instr, int32_t rt_reg);
++
++  inline double ReadD(int64_t addr, Instruction* instr);
++  inline void WriteD(int64_t addr, double value, Instruction* instr);
++
++  template <typename T>
++  T ReadMem(int64_t addr, Instruction* instr);
++  template <typename T>
++  void WriteMem(int64_t addr, T value, Instruction* instr);
++
++  // Helper for debugging memory access.
++  inline void DieOrDebug();
++
++  void TraceRegWr(int64_t value, TraceType t = DWORD);
++  void TraceMemWr(int64_t addr, int64_t value, TraceType t);
++  void TraceMemRd(int64_t addr, int64_t value, TraceType t = DWORD);
++  template <typename T>
++  void TraceMemRd(int64_t addr, T value);
++  template <typename T>
++  void TraceMemWr(int64_t addr, T value);
++
++  SimInstruction instr_;
++
++  // Executing is handled based on the instruction type.
++  void DecodeTypeOp6();
++  void DecodeTypeOp7();
++  void DecodeTypeOp8();
++  void DecodeTypeOp10();
++  void DecodeTypeOp12();
++  void DecodeTypeOp14();
++  void DecodeTypeOp17();
++  void DecodeTypeOp22();
++
++  inline int32_t rj_reg() const { return instr_.RjValue(); }
++  inline int64_t rj() const { return get_register(rj_reg()); }
++  inline uint64_t rj_u() const {
++    return static_cast<uint64_t>(get_register(rj_reg()));
++  }
++  inline int32_t rk_reg() const { return instr_.RkValue(); }
++  inline int64_t rk() const { return get_register(rk_reg()); }
++  inline uint64_t rk_u() const {
++    return static_cast<uint64_t>(get_register(rk_reg()));
++  }
++  inline int32_t rd_reg() const { return instr_.RdValue(); }
++  inline int64_t rd() const { return get_register(rd_reg()); }
++  inline uint64_t rd_u() const {
++    return static_cast<uint64_t>(get_register(rd_reg()));
++  }
++  inline int32_t fa_reg() const { return instr_.FaValue(); }
++  inline float fa_float() const { return get_fpu_register_float(fa_reg()); }
++  inline double fa_double() const { return get_fpu_register_double(fa_reg()); }
++  inline int32_t fj_reg() const { return instr_.FjValue(); }
++  inline float fj_float() const { return get_fpu_register_float(fj_reg()); }
++  inline double fj_double() const { return get_fpu_register_double(fj_reg()); }
++  inline int32_t fk_reg() const { return instr_.FkValue(); }
++  inline float fk_float() const { return get_fpu_register_float(fk_reg()); }
++  inline double fk_double() const { return get_fpu_register_double(fk_reg()); }
++  inline int32_t fd_reg() const { return instr_.FdValue(); }
++  inline float fd_float() const { return get_fpu_register_float(fd_reg()); }
++  inline double fd_double() const { return get_fpu_register_double(fd_reg()); }
++  inline int32_t cj_reg() const { return instr_.CjValue(); }
++  inline bool cj() const { return get_cf_register(cj_reg()); }
++  inline int32_t cd_reg() const { return instr_.CdValue(); }
++  inline bool cd() const { return get_cf_register(cd_reg()); }
++  inline int32_t ca_reg() const { return instr_.CaValue(); }
++  inline bool ca() const { return get_cf_register(ca_reg()); }
++  inline uint32_t sa2() const { return instr_.Sa2Value(); }
++  inline uint32_t sa3() const { return instr_.Sa3Value(); }
++  inline uint32_t ui5() const { return instr_.Ui5Value(); }
++  inline uint32_t ui6() const { return instr_.Ui6Value(); }
++  inline uint32_t lsbw() const { return instr_.LsbwValue(); }
++  inline uint32_t msbw() const { return instr_.MsbwValue(); }
++  inline uint32_t lsbd() const { return instr_.LsbdValue(); }
++  inline uint32_t msbd() const { return instr_.MsbdValue(); }
++  inline uint32_t cond() const { return instr_.CondValue(); }
++  inline int32_t si12() const { return (instr_.Si12Value() << 20) >> 20; }
++  inline uint32_t ui12() const { return instr_.Ui12Value(); }
++  inline int32_t si14() const { return (instr_.Si14Value() << 18) >> 18; }
++  inline int32_t si16() const { return (instr_.Si16Value() << 16) >> 16; }
++  inline int32_t si20() const { return (instr_.Si20Value() << 12) >> 12; }
++
++  inline void SetResult(const int32_t rd_reg, const int64_t alu_out) {
++    set_register(rd_reg, alu_out);
++    TraceRegWr(alu_out);
++  }
++
++  inline void SetFPUWordResult(int32_t fd_reg, int32_t alu_out) {
++    set_fpu_register_word(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg), WORD);
++  }
++
++  inline void SetFPUWordResult2(int32_t fd_reg, int32_t alu_out) {
++    set_fpu_register_word(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg));
++  }
++
++  inline void SetFPUResult(int32_t fd_reg, int64_t alu_out) {
++    set_fpu_register(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg));
++  }
++
++  inline void SetFPUResult2(int32_t fd_reg, int64_t alu_out) {
++    set_fpu_register(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg), DOUBLE);
++  }
++
++  inline void SetFPUFloatResult(int32_t fd_reg, float alu_out) {
++    set_fpu_register_float(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg), FLOAT);
++  }
++
++  inline void SetFPUDoubleResult(int32_t fd_reg, double alu_out) {
++    set_fpu_register_double(fd_reg, alu_out);
++    TraceRegWr(get_fpu_register(fd_reg), DOUBLE);
++  }
++
++  // Used for breakpoints.
++  void SoftwareInterrupt();
++
++  // Stop helper functions.
++  bool IsWatchpoint(uint64_t code);
++  void PrintWatchpoint(uint64_t code);
++  void HandleStop(uint64_t code, Instruction* instr);
++  bool IsStopInstruction(Instruction* instr);
++  bool IsEnabledStop(uint64_t code);
++  void EnableStop(uint64_t code);
++  void DisableStop(uint64_t code);
++  void IncreaseStopCounter(uint64_t code);
++  void PrintStopInfo(uint64_t code);
++
++  // Executes one instruction.
++  void InstructionDecode(Instruction* instr);
++  // Execute one instruction placed in a branch delay slot.
++
++  // ICache.
++  static void CheckICache(base::CustomMatcherHashMap* i_cache,
++                          Instruction* instr);
++  static void FlushOnePage(base::CustomMatcherHashMap* i_cache, intptr_t start,
++                           size_t size);
++  static CachePage* GetCachePage(base::CustomMatcherHashMap* i_cache,
++                                 void* page);
++
++  enum Exception {
++    none,
++    kIntegerOverflow,
++    kIntegerUnderflow,
++    kDivideByZero,
++    kNumExceptions
++  };
++
++  // Exceptions.
++  void SignalException(Exception e);
++
++  // Handle arguments and return value for runtime FP functions.
++  void GetFpArgs(double* x, double* y, int32_t* z);
++  void SetFpResult(const double& result);
++
++  void CallInternal(Address entry);
++
++  // Architecture state.
++  // Registers.
++  int64_t registers_[kNumSimuRegisters];
++  // Floating point Registers.
++  int64_t FPUregisters_[kNumFPURegisters];
++  // Condition flags Registers.
++  bool CFregisters_[kNumCFRegisters];
++  // FPU control register.
++  uint32_t FCSR_;
++
++  // Simulator support.
++  // Allocate 1MB for stack.
++  size_t stack_size_;
++  char* stack_;
++  bool pc_modified_;
++  int64_t icount_;
++  int break_count_;
++  EmbeddedVector<char, 128> trace_buf_;
++
++  // Debugger input.
++  char* last_debugger_input_;
++
++  v8::internal::Isolate* isolate_;
++
++  // Registered breakpoints.
++  Instruction* break_pc_;
++  Instr break_instr_;
++
++  // Stop is disabled if bit 31 is set.
++  static const uint32_t kStopDisabledBit = 1 << 31;
++
++  // A stop is enabled, meaning the simulator will stop when meeting the
++  // instruction, if bit 31 of watched_stops_[code].count is unset.
++  // The value watched_stops_[code].count & ~(1 << 31) indicates how many times
++  // the breakpoint was hit or gone through.
++  struct StopCountAndDesc {
++    uint32_t count;
++    char* desc;
++  };
++  StopCountAndDesc watched_stops_[kMaxStopCode + 1];
++
++  // Synchronization primitives.
++  enum class MonitorAccess {
++    Open,
++    RMW,
++  };
++
++  enum class TransactionSize {
++    None = 0,
++    Word = 4,
++    DoubleWord = 8,
++  };
++
++  // The least-significant bits of the address are ignored. The number of bits
++  // is implementation-defined, between 3 and minimum page size.
++  static const uintptr_t kExclusiveTaggedAddrMask = ~((1 << 3) - 1);
++
++  class LocalMonitor {
++   public:
++    LocalMonitor();
++
++    // These functions manage the state machine for the local monitor, but do
++    // not actually perform loads and stores. NotifyStoreConditional only
++    // returns true if the store conditional is allowed; the global monitor will
++    // still have to be checked to see whether the memory should be updated.
++    void NotifyLoad();
++    void NotifyLoadLinked(uintptr_t addr, TransactionSize size);
++    void NotifyStore();
++    bool NotifyStoreConditional(uintptr_t addr, TransactionSize size);
++
++   private:
++    void Clear();
++
++    MonitorAccess access_state_;
++    uintptr_t tagged_addr_;
++    TransactionSize size_;
++  };
++
++  class GlobalMonitor {
++   public:
++    class LinkedAddress {
++     public:
++      LinkedAddress();
++
++     private:
++      friend class GlobalMonitor;
++      // These functions manage the state machine for the global monitor, but do
++      // not actually perform loads and stores.
++      void Clear_Locked();
++      void NotifyLoadLinked_Locked(uintptr_t addr);
++      void NotifyStore_Locked();
++      bool NotifyStoreConditional_Locked(uintptr_t addr,
++                                         bool is_requesting_thread);
++
++      MonitorAccess access_state_;
++      uintptr_t tagged_addr_;
++      LinkedAddress* next_;
++      LinkedAddress* prev_;
++      // A scd can fail due to background cache evictions. Rather than
++      // simulating this, we'll just occasionally introduce cases where an
++      // store conditional fails. This will happen once after every
++      // kMaxFailureCounter exclusive stores.
++      static const int kMaxFailureCounter = 5;
++      int failure_counter_;
++    };
++
++    // Exposed so it can be accessed by Simulator::{Read,Write}Ex*.
++    base::Mutex mutex;
++
++    void NotifyLoadLinked_Locked(uintptr_t addr, LinkedAddress* linked_address);
++    void NotifyStore_Locked(LinkedAddress* linked_address);
++    bool NotifyStoreConditional_Locked(uintptr_t addr,
++                                       LinkedAddress* linked_address);
++
++    // Called when the simulator is destroyed.
++    void RemoveLinkedAddress(LinkedAddress* linked_address);
++
++    static GlobalMonitor* Get();
++
++   private:
++    // Private constructor. Call {GlobalMonitor::Get()} to get the singleton.
++    GlobalMonitor() = default;
++    friend class base::LeakyObject<GlobalMonitor>;
++
++    bool IsProcessorInLinkedList_Locked(LinkedAddress* linked_address) const;
++    void PrependProcessor_Locked(LinkedAddress* linked_address);
++
++    LinkedAddress* head_ = nullptr;
++  };
++
++  LocalMonitor local_monitor_;
++  GlobalMonitor::LinkedAddress global_monitor_thread_;
++};
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // defined(USE_SIMULATOR)
++#endif  // V8_EXECUTION_LA64_SIMULATOR_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc b/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc
+index 72f28363292..98c50263a02 100644
+--- a/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc
++++ b/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc
+@@ -28,6 +28,8 @@ namespace internal {
+ DEFINE_LAZY_LEAKY_OBJECT_GETTER(Simulator::GlobalMonitor,
+                                 Simulator::GlobalMonitor::Get)
+ 
++// #define PRINT_SIM_LOG
++
+ // Util functions.
+ inline bool HaveSameSign(int64_t a, int64_t b) { return ((a ^ b) >= 0); }
+ 
+@@ -57,6 +59,17 @@ static int64_t MultiplyHighSigned(int64_t u, int64_t v) {
+   return u1 * v1 + w2 + (w1 >> 32);
+ }
+ 
++#ifdef PRINT_SIM_LOG
++inline void printf_instr(const char* _Format, ...) {
++  va_list varList;
++  va_start(varList, _Format);
++  vprintf(_Format, varList);
++  va_end(varList);
++}
++#else
++#define printf_instr(...)
++#endif
++
+ // This macro provides a platform independent use of sscanf. The reason for
+ // SScanF not being implemented in a platform independent was through
+ // ::v8::internal::OS in the same way as SNPrintF is that the Windows C Run-Time
+@@ -2195,6 +2208,7 @@ void Simulator::SoftwareInterrupt() {
+   uint32_t code = (func == BREAK) ? instr_.Bits(25, 6) : -1;
+   // We first check if we met a call_rt_redirected.
+   if (instr_.InstructionBits() == rtCallRedirInstr) {
++    printf_instr("Simulator::SoftwareInterrupt: BREAK 0xFFFFF\n");
+     Redirection* redirection = Redirection::FromInstruction(instr_.instr());
+ 
+     int64_t* stack_pointer = reinterpret_cast<int64_t*>(get_register(sp));
+@@ -2723,6 +2737,9 @@ void Simulator::DecodeTypeRegisterSRsType() {
+                                                KeepSign::yes, fs));
+       break;
+     case SQRT_S:
++      printf_instr("sqrt_s\t %s: %016f, %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd,
++                   FPURegisters::Name(fs_reg()), fs);
+       SetFPUFloatResult(
+           fd_reg(),
+           FPUCanonalizeOperation([](float src) { return std::sqrt(src); }, fs));
+@@ -3115,6 +3132,10 @@ void Simulator::DecodeTypeRegisterDRsType() {
+               [](double lhs, double rhs) { return lhs + rhs; }, fs, ft));
+       break;
+     case SUB_D:
++      printf_instr("sub_d\t %s: %016f, %s: %016f, %s: %016f\n",
++                   FPURegisters::Name(fd_reg()), fd,
++                   FPURegisters::Name(fs_reg()), fs,
++                   FPURegisters::Name(ft_reg()), ft);
+       SetFPUDoubleResult(
+           fd_reg(),
+           FPUCanonalizeOperation(
+@@ -3381,6 +3402,10 @@ void Simulator::DecodeTypeRegisterWRsType() {
+   int64_t alu_out = 0x12345678;
+   switch (instr_.FunctionFieldRaw()) {
+     case CVT_S_W:  // Convert word to float (single).
++      printf_instr(
++          "CVT_S_W \t %s: %016f, %s: %016x\n", FPURegisters::Name(fd_reg()),
++          get_fpu_register_float(fd_reg()), FPURegisters::Name(fs_reg()),
++          get_fpu_register_signed_word(fs_reg()));
+       alu_out = get_fpu_register_signed_word(fs_reg());
+       SetFPUFloatResult(fd_reg(), static_cast<float>(alu_out));
+       break;
+@@ -3476,6 +3501,10 @@ void Simulator::DecodeTypeRegisterLRsType() {
+       SetFPUDoubleResult(fd_reg(), static_cast<double>(i64));
+       break;
+     case CVT_S_L:
++      printf_instr("CVT_S_L \t %s: %016f, %s: %016x\n",
++                   FPURegisters::Name(fd_reg()),
++                   get_fpu_register_float(fd_reg()),
++                   FPURegisters::Name(fs_reg()), get_fpu_register(fs_reg()));
+       i64 = get_fpu_register(fs_reg());
+       SetFPUFloatResult(fd_reg(), static_cast<float>(i64));
+       break;
+@@ -3569,11 +3598,17 @@ void Simulator::DecodeTypeRegisterCOP1() {
+       SetResult(rt_reg(), FCSR_);
+       break;
+     case MFC1:
++      printf_instr("MFC1 \t %s: %016lx, %s: %016f\n", Registers::Name(rt_reg()),
++                   rt(), FPURegisters::Name(fs_reg()),
++                   get_fpu_register_float(fs_reg()));
+       set_register(rt_reg(),
+                    static_cast<int64_t>(get_fpu_register_word(fs_reg())));
+       TraceRegWr(get_register(rt_reg()), WORD_DWORD);
+       break;
+     case DMFC1:
++      printf_instr(
++          "DMFC1 \t %s: %016lx, %s: %016f\n", Registers::Name(rt_reg()), rt(),
++          FPURegisters::Name(fs_reg()), get_fpu_register_double(fs_reg()));
+       SetResult(rt_reg(), get_fpu_register(fs_reg()));
+       break;
+     case MFHC1:
+@@ -3593,12 +3628,18 @@ void Simulator::DecodeTypeRegisterCOP1() {
+       break;
+     }
+     case MTC1:
++      printf_instr(
++          "MTC1 \t %s: %016f, %s: %016lx\n", FPURegisters::Name(fs_reg()),
++          get_fpu_register_float(fs_reg()), Registers::Name(rt_reg()), rt());
+       // Hardware writes upper 32-bits to zero on mtc1.
+       set_fpu_register_hi_word(fs_reg(), 0);
+       set_fpu_register_word(fs_reg(), static_cast<int32_t>(rt()));
+       TraceRegWr(get_fpu_register(fs_reg()), FLOAT_DOUBLE);
+       break;
+     case DMTC1:
++      printf_instr(
++          "DMTC1 \t %s: %016f, %s: %016lx\n", FPURegisters::Name(fs_reg()),
++          get_fpu_register_float(fs_reg()), Registers::Name(rt_reg()), rt());
+       SetFPUResult2(fs_reg(), rt());
+       break;
+     case MTHC1:
+@@ -3683,6 +3724,7 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+     case JR: {
+       int64_t next_pc = rs();
+       int64_t current_pc = get_pc();
++      printf_instr("JALR\t %s: %016lx\n", Registers::Name(rs_reg()), rs());
+       Instruction* branch_delay_instr =
+           reinterpret_cast<Instruction*>(current_pc + kInstrSize);
+       BranchDelayInstructionDecode(branch_delay_instr);
+@@ -3694,6 +3736,8 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       int64_t next_pc = rs();
+       int64_t current_pc = get_pc();
+       int32_t return_addr_reg = rd_reg();
++      printf_instr("JALR\t %s: %016lx, %s: %016lx\n", Registers::Name(rd_reg()),
++                   get_register(rd_reg()), Registers::Name(rs_reg()), rs());
+       Instruction* branch_delay_instr =
+           reinterpret_cast<Instruction*>(current_pc + kInstrSize);
+       BranchDelayInstructionDecode(branch_delay_instr);
+@@ -3703,21 +3747,36 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     }
+     case SLL:
++      printf_instr("SLL\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), static_cast<int32_t>(rt()) << sa());
+       break;
+     case DSLL:
++      printf_instr("DSLL\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), rt() << sa());
+       break;
+     case DSLL32:
++      printf_instr("DSLL32\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), rt() << sa() << 32);
+       break;
+     case SRL:
+       if (rs_reg() == 0) {
++        printf_instr("SRL\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Regular logical right shift of a word by a fixed number of
+         // bits instruction. RS field is always equal to 0.
+         // Sign-extend the 32-bit result.
+         alu_out = static_cast<int32_t>(static_cast<uint32_t>(rt_u()) >> sa());
+       } else if (rs_reg() == 1) {
++        printf_instr("ROTR\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Logical right-rotate of a word by a fixed number of bits. This
+         // is special case of SRL instruction, added in MIPS32 Release 2.
+         // RS field is equal to 00001.
+@@ -3731,11 +3790,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     case DSRL:
+       if (rs_reg() == 0) {
++        printf_instr("DSRL\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Regular logical right shift of a word by a fixed number of
+         // bits instruction. RS field is always equal to 0.
+         // Sign-extend the 64-bit result.
+         alu_out = static_cast<int64_t>(rt_u() >> sa());
+       } else if (rs_reg() == 1) {
++        printf_instr("DROTR\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Logical right-rotate of a word by a fixed number of bits. This
+         // is special case of SRL instruction, added in MIPS32 Release 2.
+         // RS field is equal to 00001.
+@@ -3747,11 +3812,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     case DSRL32:
+       if (rs_reg() == 0) {
++        printf_instr("DSRL32\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Regular logical right shift of a word by a fixed number of
+         // bits instruction. RS field is always equal to 0.
+         // Sign-extend the 64-bit result.
+         alu_out = static_cast<int64_t>(rt_u() >> sa() >> 32);
+       } else if (rs_reg() == 1) {
++        printf_instr("DROTR32\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), sa());
+         // Logical right-rotate of a word by a fixed number of bits. This
+         // is special case of SRL instruction, added in MIPS32 Release 2.
+         // RS field is equal to 00001.
+@@ -3763,26 +3834,51 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       SetResult(rd_reg(), alu_out);
+       break;
+     case SRA:
++      printf_instr("SRA\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), (int32_t)rt() >> sa());
+       break;
+     case DSRA:
++      printf_instr("DSRA\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), rt() >> sa());
+       break;
+     case DSRA32:
++      printf_instr("DSRA32\t %s: %016lx, %s: %016lx, sa: %02x\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), sa());
+       SetResult(rd_reg(), rt() >> sa() >> 32);
+       break;
+     case SLLV:
++      printf_instr("SLLV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                   rs());
+       SetResult(rd_reg(), (int32_t)rt() << rs());
+       break;
+     case DSLLV:
++      printf_instr("DSLLV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                   rs());
+       SetResult(rd_reg(), rt() << rs());
+       break;
+     case SRLV:
+       if (sa() == 0) {
++        printf_instr("SRLV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                     rs());
+         // Regular logical right-shift of a word by a variable number of
+         // bits instruction. SA field is always equal to 0.
+         alu_out = static_cast<int32_t>((uint32_t)rt_u() >> rs());
+       } else {
++        printf_instr("ROTRV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                     rs());
+         // Logical right-rotate of a word by a variable number of bits.
+         // This is special case od SRLV instruction, added in MIPS32
+         // Release 2. SA field is equal to 00001.
+@@ -3794,10 +3890,18 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     case DSRLV:
+       if (sa() == 0) {
++        printf_instr("SRLV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                     rs());
+         // Regular logical right-shift of a word by a variable number of
+         // bits instruction. SA field is always equal to 0.
+         alu_out = static_cast<int64_t>(rt_u() >> rs());
+       } else {
++        printf_instr("DROTRV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                     Registers::Name(rd_reg()), get_register(rd_reg()),
++                     Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                     rs());
+         // Logical right-rotate of a word by a variable number of bits.
+         // This is special case od SRLV instruction, added in MIPS32
+         // Release 2. SA field is equal to 00001.
+@@ -3807,9 +3911,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       SetResult(rd_reg(), alu_out);
+       break;
+     case SRAV:
++      printf_instr("SRAV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                   rs());
+       SetResult(rd_reg(), (int32_t)rt() >> rs());
+       break;
+     case DSRAV:
++      printf_instr("DSRAV\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()),
++                   rs());
+       SetResult(rd_reg(), rt() >> rs());
+       break;
+     case LSA: {
+@@ -4018,6 +4130,10 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     case ADD:
+     case DADD:
++      printf_instr("DADD\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       if (HaveSameSign(rs(), rt())) {
+         if (rs() > 0) {
+           if (rs() > (Registers::kMaxValue - rt())) {
+@@ -4032,16 +4148,28 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       SetResult(rd_reg(), rs() + rt());
+       break;
+     case ADDU: {
++      printf_instr("ADDU\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       int32_t alu32_out = static_cast<int32_t>(rs() + rt());
+       // Sign-extend result of 32bit operation into 64bit register.
+       SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
+       break;
+     }
+     case DADDU:
++      printf_instr("DADDU\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() + rt());
+       break;
+     case SUB:
+     case DSUB:
++      printf_instr("DSUB\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       if (!HaveSameSign(rs(), rt())) {
+         if (rs() > 0) {
+           if (rs() > (Registers::kMaxValue + rt())) {
+@@ -4056,30 +4184,62 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       SetResult(rd_reg(), rs() - rt());
+       break;
+     case SUBU: {
++      printf_instr("SUBU\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       int32_t alu32_out = static_cast<int32_t>(rs() - rt());
+       // Sign-extend result of 32bit operation into 64bit register.
+       SetResult(rd_reg(), static_cast<int64_t>(alu32_out));
+       break;
+     }
+     case DSUBU:
++      printf_instr("DSUBU\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() - rt());
+       break;
+     case AND:
++      printf_instr("AND\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() & rt());
+       break;
+     case OR:
++      printf_instr("OR\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() | rt());
+       break;
+     case XOR:
++      printf_instr("XOR\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() ^ rt());
+       break;
+     case NOR:
++      printf_instr("NOR\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), ~(rs() | rt()));
+       break;
+     case SLT:
++      printf_instr("SLT\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs() < rt() ? 1 : 0);
+       break;
+     case SLTU:
++      printf_instr("SLTU\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       SetResult(rd_reg(), rs_u() < rt_u() ? 1 : 0);
+       break;
+     // Break and trap instructions.
+@@ -4106,9 +4266,14 @@ void Simulator::DecodeTypeRegisterSPECIAL() {
+       break;
+     case SYNC:
+       // TODO(palfia): Ignore sync instruction for now.
++      printf_instr("sync\n");
+       break;
+     // Conditional moves.
+     case MOVN:
++      printf_instr("MOVN\t %s: %016lx, %s: %016lx, %s: %016lx\n",
++                   Registers::Name(rd_reg()), get_register(rd_reg()),
++                   Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()),
++                   rt());
+       if (rt()) {
+         SetResult(rd_reg(), rs());
+       }
+@@ -4173,6 +4338,9 @@ void Simulator::DecodeTypeRegisterSPECIAL3() {
+       // Interpret sa field as 5-bit lsb of extract.
+       uint16_t lsb = sa();
+       uint16_t size = msbd + 1;
++      printf_instr("EXT\t %s: %016lx, %s: %016lx, pos: %d, size: %d\n",
++                   Registers::Name(rt_reg()), get_register(rt_reg()),
++                   Registers::Name(rs_reg()), rs(), lsb, size);
+       uint64_t mask = (1ULL << size) - 1;
+       alu_out = static_cast<int32_t>((rs_u() & (mask << lsb)) >> lsb);
+       SetResult(rt_reg(), alu_out);
+@@ -4184,6 +4352,9 @@ void Simulator::DecodeTypeRegisterSPECIAL3() {
+       // Interpret sa field as 5-bit lsb of extract.
+       uint16_t lsb = sa();
+       uint16_t size = msbd + 1;
++      printf_instr("DEXT\t %s: %016lx, %s: %016lx, pos: %d, size: %d\n",
++                   Registers::Name(rt_reg()), get_register(rt_reg()),
++                   Registers::Name(rs_reg()), rs(), lsb, size);
+       uint64_t mask = (size == 64) ? UINT64_MAX : (1ULL << size) - 1;
+       alu_out = static_cast<int64_t>((rs_u() & (mask << lsb)) >> lsb);
+       SetResult(rt_reg(), alu_out);
+@@ -6553,6 +6724,7 @@ void Simulator::DecodeTypeImmediate() {
+       [this, &next_pc, &execute_branch_delay_instruction](bool do_branch) {
+         execute_branch_delay_instruction = true;
+         int64_t current_pc = get_pc();
++        printf_instr("Offs16: %04x\n", instr_.Imm16Value());
+         set_register(31, current_pc + 2 * kInstrSize);
+         if (do_branch) {
+           int16_t imm16 = instr_.Imm16Value();
+@@ -6565,6 +6737,7 @@ void Simulator::DecodeTypeImmediate() {
+   auto BranchHelper = [this, &next_pc,
+                        &execute_branch_delay_instruction](bool do_branch) {
+     execute_branch_delay_instruction = true;
++    printf_instr("Offs16: %04x\n", instr_.Imm16Value());
+     int64_t current_pc = get_pc();
+     if (do_branch) {
+       int16_t imm16 = instr_.Imm16Value();
+@@ -6601,6 +6774,7 @@ void Simulator::DecodeTypeImmediate() {
+   auto BranchAndLinkCompactHelper = [this, &next_pc](bool do_branch, int bits) {
+     int64_t current_pc = get_pc();
+     CheckForbiddenSlot(current_pc);
++    printf_instr("Offs: %08x\n", instr_.ImmValue(bits));
+     if (do_branch) {
+       int32_t imm = instr_.ImmValue(bits);
+       imm <<= 32 - bits;
+@@ -6613,6 +6787,7 @@ void Simulator::DecodeTypeImmediate() {
+   auto BranchCompactHelper = [this, &next_pc](bool do_branch, int bits) {
+     int64_t current_pc = get_pc();
+     CheckForbiddenSlot(current_pc);
++    printf_instr("Offs: %08x\n", instr_.ImmValue(bits));
+     if (do_branch) {
+       int32_t imm = instr_.ImmValue(bits);
+       imm <<= 32 - bits;
+@@ -6707,15 +6882,19 @@ void Simulator::DecodeTypeImmediate() {
+     case REGIMM:
+       switch (instr_.RtFieldRaw()) {
+         case BLTZ:
++          printf_instr("BLTZ\t %s: %016lx, ", Registers::Name(rs_reg), rs);
+           BranchHelper(rs < 0);
+           break;
+         case BGEZ:
++          printf_instr("BGEZ\t %s: %016lx, ", Registers::Name(rs_reg), rs);
+           BranchHelper(rs >= 0);
+           break;
+         case BLTZAL:
++          printf_instr("BLTZAL\t %s: %016lx, ", Registers::Name(rs_reg), rs);
+           BranchAndLinkHelper(rs < 0);
+           break;
+         case BGEZAL:
++          printf_instr("BGEZAL\t %s: %016lx, ", Registers::Name(rs_reg), rs);
+           BranchAndLinkHelper(rs >= 0);
+           break;
+         case DAHI:
+@@ -6732,9 +6911,13 @@ void Simulator::DecodeTypeImmediate() {
+     // When comparing to zero, the encoding of rt field is always 0, so we don't
+     // need to replace rt with zero.
+     case BEQ:
++      printf_instr("BEQ\t %s: %016lx, %s: %016lx, ", Registers::Name(rs_reg),
++                   rs, Registers::Name(rt_reg), rt);
+       BranchHelper(rs == rt);
+       break;
+     case BNE:
++      printf_instr("BNE\t %s: %016lx, %s: %016lx, ", Registers::Name(rs_reg),
++                   rs, Registers::Name(rt_reg), rt);
+       BranchHelper(rs != rt);
+       break;
+     case POP06:  // BLEZALC, BGEZALC, BGEUC, BLEZ (pre-r6)
+@@ -6754,6 +6937,7 @@ void Simulator::DecodeTypeImmediate() {
+           BranchHelper(rs <= 0);
+         }
+       } else {  // BLEZ
++        printf_instr("BLEZ\t %s: %016lx", Registers::Name(rs_reg), rs);
+         BranchHelper(rs <= 0);
+       }
+       break;
+@@ -6774,6 +6958,7 @@ void Simulator::DecodeTypeImmediate() {
+           BranchHelper(rs > 0);
+         }
+       } else {  // BGTZ
++        printf_instr("BGTZ\t %s: %016lx", Registers::Name(rs_reg), rs);
+         BranchHelper(rs > 0);
+       }
+       break;
+@@ -6791,6 +6976,7 @@ void Simulator::DecodeTypeImmediate() {
+           }
+         }
+       } else {  // BLEZL
++        printf_instr("BLEZL\t %s: %016lx", Registers::Name(rs_reg), rs);
+         BranchAndLinkHelper(rs <= 0);
+       }
+       break;
+@@ -6808,6 +6994,7 @@ void Simulator::DecodeTypeImmediate() {
+           }
+         }
+       } else {  // BGTZL
++        printf_instr("BGTZL\t %s: %016lx", Registers::Name(rs_reg), rs);
+         BranchAndLinkHelper(rs > 0);
+       }
+       break;
+@@ -6846,6 +7033,9 @@ void Simulator::DecodeTypeImmediate() {
+           }
+         }
+       } else {  // ADDI
++        printf_instr("ADDI\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                     Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                     se_imm16);
+         if (HaveSameSign(rs, se_imm16)) {
+           if (rs > 0) {
+             if (rs <= Registers::kMaxValue - se_imm16) {
+@@ -6876,27 +7066,48 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     // ------------- Arithmetic instructions.
+     case ADDIU: {
++      printf_instr("ADDIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       int32_t alu32_out = static_cast<int32_t>(rs + se_imm16);
+       // Sign-extend result of 32bit operation into 64bit register.
+       SetResult(rt_reg, static_cast<int64_t>(alu32_out));
+       break;
+     }
+     case DADDIU:
++      printf_instr("DADDIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       SetResult(rt_reg, rs + se_imm16);
+       break;
+     case SLTI:
++      printf_instr("SLTI\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       SetResult(rt_reg, rs < se_imm16 ? 1 : 0);
+       break;
+     case SLTIU:
++      printf_instr("SLTIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       SetResult(rt_reg, rs_u < static_cast<uint64_t>(se_imm16) ? 1 : 0);
+       break;
+     case ANDI:
++      printf_instr("ANDI\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   oe_imm16);
+       SetResult(rt_reg, rs & oe_imm16);
+       break;
+     case ORI:
++      printf_instr("ORI\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   oe_imm16);
+       SetResult(rt_reg, rs | oe_imm16);
+       break;
+     case XORI:
++      printf_instr("XORI\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   oe_imm16);
+       SetResult(rt_reg, rs ^ oe_imm16);
+       break;
+     case LUI:
+@@ -6907,6 +7118,8 @@ void Simulator::DecodeTypeImmediate() {
+         SetResult(rt_reg, static_cast<int64_t>(alu32_out));
+       } else {
+         // LUI instruction.
++        printf_instr("LUI\t %s: %016lx, imm16: %04lx\n",
++                     Registers::Name(rt_reg), rt, se_imm16);
+         int32_t alu32_out = static_cast<int32_t>(oe_imm16 << 16);
+         // Sign-extend result of 32bit operation into 64bit register.
+         SetResult(rt_reg, static_cast<int64_t>(alu32_out));
+@@ -6919,12 +7132,21 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     // ------------- Memory instructions.
+     case LB:
++      printf_instr("LB\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadB(rs + se_imm16));
+       break;
+     case LH:
++      printf_instr("LH\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadH(rs + se_imm16, instr_.instr()));
+       break;
+     case LWL: {
++      printf_instr("LWL\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       local_monitor_.NotifyLoad();
+       // al_offset is offset of the effective address within an aligned word.
+       uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask;
+@@ -6938,21 +7160,39 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case LW:
++      printf_instr("LW\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadW(rs + se_imm16, instr_.instr()));
+       break;
+     case LWU:
++      printf_instr("LWU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadWU(rs + se_imm16, instr_.instr()));
+       break;
+     case LD:
++      printf_instr("LD\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, Read2W(rs + se_imm16, instr_.instr()));
+       break;
+     case LBU:
++      printf_instr("LBU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadBU(rs + se_imm16));
+       break;
+     case LHU:
++      printf_instr("LHU\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       set_register(rt_reg, ReadHU(rs + se_imm16, instr_.instr()));
+       break;
+     case LWR: {
++      printf_instr("LWR\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       // al_offset is offset of the effective address within an aligned word.
+       uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask;
+       uint8_t byte_shift = kInt32AlignmentMask - al_offset;
+@@ -6965,6 +7205,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case LDL: {
++      printf_instr("LDL\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       // al_offset is offset of the effective address within an aligned word.
+       uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask;
+       uint8_t byte_shift = kInt64AlignmentMask - al_offset;
+@@ -6977,6 +7220,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case LDR: {
++      printf_instr("LDR\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       // al_offset is offset of the effective address within an aligned word.
+       uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask;
+       uint8_t byte_shift = kInt64AlignmentMask - al_offset;
+@@ -6989,12 +7235,21 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case SB:
++      printf_instr("SB\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       WriteB(rs + se_imm16, static_cast<int8_t>(rt));
+       break;
+     case SH:
++      printf_instr("SH\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       WriteH(rs + se_imm16, static_cast<uint16_t>(rt), instr_.instr());
+       break;
+     case SWL: {
++      printf_instr("SWL\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask;
+       uint8_t byte_shift = kInt32AlignmentMask - al_offset;
+       uint32_t mask = byte_shift ? (~0 << (al_offset + 1) * 8) : 0;
+@@ -7005,12 +7260,21 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case SW:
++      printf_instr("SW\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       WriteW(rs + se_imm16, static_cast<int32_t>(rt), instr_.instr());
+       break;
+     case SD:
++      printf_instr("SD\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       Write2W(rs + se_imm16, rt, instr_.instr());
+       break;
+     case SWR: {
++      printf_instr("SWR\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask;
+       uint32_t mask = (1 << al_offset * 8) - 1;
+       addr = rs + se_imm16 - al_offset;
+@@ -7020,6 +7284,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case SDL: {
++      printf_instr("SDL\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask;
+       uint8_t byte_shift = kInt64AlignmentMask - al_offset;
+       uint64_t mask = byte_shift ? (~0UL << (al_offset + 1) * 8) : 0;
+@@ -7030,6 +7297,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case SDR: {
++      printf_instr("SDR\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask;
+       uint64_t mask = (1UL << al_offset * 8) - 1;
+       addr = rs + se_imm16 - al_offset;
+@@ -7055,6 +7325,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case LLD: {
++      printf_instr("LLD\t %s: %016lx, %s: %016lx, imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       DCHECK(kArchVariant != kMips64r6);
+       base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
+       addr = rs + se_imm16;
+@@ -7065,6 +7338,9 @@ void Simulator::DecodeTypeImmediate() {
+       break;
+     }
+     case SCD: {
++      printf_instr("SCD\t %s: %016lx, (%s: %016lx), imm16: %04lx\n",
++                   Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs,
++                   se_imm16);
+       DCHECK(kArchVariant != kMips64r6);
+       addr = rs + se_imm16;
+       WriteConditional2W(addr, rt, instr_.instr(), rt_reg);
+@@ -7080,11 +7356,17 @@ void Simulator::DecodeTypeImmediate() {
+       TraceMemRd(addr, get_fpu_register(ft_reg), DOUBLE);
+       break;
+     case SWC1: {
++      printf_instr("SWC1\t %s: %016f, %s: %016lx, imm16: %04lx\n",
++                   FPURegisters::Name(ft_reg), get_fpu_register_float(ft_reg),
++                   Registers::Name(rs_reg), rs, se_imm16);
+       int32_t alu_out_32 = static_cast<int32_t>(get_fpu_register(ft_reg));
+       WriteW(rs + se_imm16, alu_out_32, instr_.instr());
+       break;
+     }
+     case SDC1:
++      printf_instr("SDC1\t %s: %016f, %s: %016lx, imm16: %04lx\n",
++                   FPURegisters::Name(ft_reg), get_fpu_register_double(ft_reg),
++                   Registers::Name(rs_reg), rs, se_imm16);
+       WriteD(rs + se_imm16, get_fpu_register_double(ft_reg), instr_.instr());
+       TraceMemWr(rs + se_imm16, get_fpu_register(ft_reg), DWORD);
+       break;
+@@ -7257,6 +7539,8 @@ void Simulator::DecodeTypeJump() {
+   int64_t pc_high_bits = current_pc & 0xFFFFFFFFF0000000;
+   // Next pc.
+   int64_t next_pc = pc_high_bits | (simInstr.Imm26Value() << 2);
++  printf_instr("%s\t", simInstr.IsLinkingInstruction() ? "JAL" : "J");
++  printf_instr("offs26: %x\n", instr_.Bits(25, 0));
+ 
+   // Execute branch delay slot.
+   // We don't check for end_sim_pc. First it should not be met as the current pc
+@@ -7291,7 +7575,11 @@ void Simulator::InstructionDecode(Instruction* instr) {
+     dasm.InstructionDecode(buffer, reinterpret_cast<byte*>(instr));
+   }
+ 
++  static int instr_count = 0;
++  USE(instr_count);
+   instr_ = instr;
++  printf_instr("\nInstr%3d: %08x, PC: %lx\t", instr_count++, instr_.Bits(31, 0),
++               get_pc());
+   switch (instr_.InstructionType()) {
+     case Instruction::kRegisterType:
+       DecodeTypeRegister();
+diff --git a/src/3rdparty/chromium/v8/src/execution/simulator-base.h b/src/3rdparty/chromium/v8/src/execution/simulator-base.h
+index 0fa98cb4054..12e0cad3e3d 100644
+--- a/src/3rdparty/chromium/v8/src/execution/simulator-base.h
++++ b/src/3rdparty/chromium/v8/src/execution/simulator-base.h
+@@ -88,7 +88,7 @@ class SimulatorBase {
+   static typename std::enable_if<std::is_integral<T>::value, intptr_t>::type
+   ConvertArg(T arg) {
+     static_assert(sizeof(T) <= sizeof(intptr_t), "type bigger than ptrsize");
+-#if V8_TARGET_ARCH_MIPS64
++#if V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+     // The MIPS64 calling convention is to sign extend all values, even unsigned
+     // ones.
+     using signed_t = typename std::make_signed<T>::type;
+diff --git a/src/3rdparty/chromium/v8/src/execution/simulator.h b/src/3rdparty/chromium/v8/src/execution/simulator.h
+index a4e07b235b4..1bc39ac7e70 100644
+--- a/src/3rdparty/chromium/v8/src/execution/simulator.h
++++ b/src/3rdparty/chromium/v8/src/execution/simulator.h
+@@ -24,6 +24,8 @@
+ #include "src/execution/mips/simulator-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/execution/mips64/simulator-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/execution/la64/simulator-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/execution/s390/simulator-s390.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/flags/flag-definitions.h b/src/3rdparty/chromium/v8/src/flags/flag-definitions.h
+index c3f360cdf0d..f14b6a1e5e6 100644
+--- a/src/3rdparty/chromium/v8/src/flags/flag-definitions.h
++++ b/src/3rdparty/chromium/v8/src/flags/flag-definitions.h
+@@ -1246,7 +1246,7 @@ DEFINE_BOOL(check_icache, false,
+             "Check icache flushes in ARM and MIPS simulator")
+ DEFINE_INT(stop_sim_at, 0, "Simulator stop after x number of instructions")
+ #if defined(V8_TARGET_ARCH_ARM64) || defined(V8_TARGET_ARCH_MIPS64) || \
+-    defined(V8_TARGET_ARCH_PPC64)
++    defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_LA64)
+ DEFINE_INT(sim_stack_alignment, 16,
+            "Stack alignment in bytes in simulator. This must be a power of two "
+            "and it must be at least 16. 16 is default.")
+diff --git a/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc b/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc
+new file mode 100644
+index 00000000000..c9e6f5d2cc8
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc
+@@ -0,0 +1,48 @@
++// Copyright 2020 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++// Push all callee-saved registers to get them on the stack for conservative
++// stack scanning.
++//
++// See asm/x64/push_registers_clang.cc for why the function is not generated
++// using clang.
++//
++// Do not depend on V8_TARGET_OS_* defines as some embedders may override the
++// GN toolchain (e.g. ChromeOS) and not provide them.
++asm(".text                                               \n"
++    ".global PushAllRegistersAndIterateStack             \n"
++    ".type PushAllRegistersAndIterateStack, %function    \n"
++    ".hidden PushAllRegistersAndIterateStack             \n"
++    "PushAllRegistersAndIterateStack:                    \n"
++    // Push all callee-saved registers and save return address.
++    "  addi.d $sp, $sp, -96                              \n"
++    "  st.d $ra, $sp, 88                                 \n"
++    "  st.d $s8, $sp, 80                                 \n"
++    "  st.d $sp, $sp, 72                                 \n"
++    "  st.d $fp, $sp, 64                                 \n"
++    "  st.d $s7, $sp, 56                                 \n"
++    "  st.d $s6, $sp, 48                                 \n"
++    "  st.d $s5, $sp, 40                                 \n"
++    "  st.d $s4, $sp, 32                                 \n"
++    "  st.d $s3, $sp, 24                                 \n"
++    "  st.d $s2, $sp, 16                                 \n"
++    "  st.d $s1, $sp,  8                                 \n"
++    "  st.d $s0, $sp,  0                                 \n"
++    // Maintain frame pointer.
++    "  addi.d $s8, $sp, 0                                \n"
++    // Pass 1st parameter (a0) unchanged (Stack*).
++    // Pass 2nd parameter (a1) unchanged (StackVisitor*).
++    // Save 3rd parameter (a2; IterateStackCallback).
++    "  addi.d $a3, $a2, 0                                \n"
++    // Call the callback.
++    // Pass 3rd parameter as sp (stack pointer).
++    "  addi.d $a2, $sp, 0                                \n"
++    "  jirl $ra, $a3, 0                                  \n"
++    // Load return address.
++    "  ld.d $ra, $sp, 88                                 \n"
++    // Restore frame pointer.
++    "  ld.d $s8, $sp, 80                                 \n"
++    // Discard all callee-saved registers.
++    "  addi.d $sp, $sp, 96                               \n"
++    "  jirl $zero, $ra, 0                                \n");
+diff --git a/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc b/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc
+index eaea1c91dd8..66775d6dfee 100644
+--- a/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc
++++ b/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc
+@@ -1484,7 +1484,7 @@ void InterpreterAssembler::TraceBytecodeDispatch(TNode<WordT> target_bytecode) {
+ 
+ // static
+ bool InterpreterAssembler::TargetSupportsUnalignedAccess() {
+-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
++#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+   return false;
+ #elif V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_S390 || \
+     V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_PPC ||   \
+diff --git a/src/3rdparty/chromium/v8/src/libsampler/sampler.cc b/src/3rdparty/chromium/v8/src/libsampler/sampler.cc
+index e2091ceb32a..6ebb1b8305b 100644
+--- a/src/3rdparty/chromium/v8/src/libsampler/sampler.cc
++++ b/src/3rdparty/chromium/v8/src/libsampler/sampler.cc
+@@ -415,6 +415,10 @@ void SignalHandler::FillRegisterState(void* context, RegisterState* state) {
+   state->pc = reinterpret_cast<void*>(mcontext.pc);
+   state->sp = reinterpret_cast<void*>(mcontext.gregs[29]);
+   state->fp = reinterpret_cast<void*>(mcontext.gregs[30]);
++#elif V8_HOST_ARCH_LA64
++  state->pc = reinterpret_cast<void*>(mcontext.__pc);
++  state->sp = reinterpret_cast<void*>(mcontext.__gregs[3]);
++  state->fp = reinterpret_cast<void*>(mcontext.__gregs[22]);
+ #elif V8_HOST_ARCH_PPC || V8_HOST_ARCH_PPC64
+ #if V8_LIBC_GLIBC
+   state->pc = reinterpret_cast<void*>(ucontext->uc_mcontext.regs->nip);
+diff --git a/src/3rdparty/chromium/v8/src/logging/log.cc b/src/3rdparty/chromium/v8/src/logging/log.cc
+index dc79ffda5e5..6c745cea8c0 100644
+--- a/src/3rdparty/chromium/v8/src/logging/log.cc
++++ b/src/3rdparty/chromium/v8/src/logging/log.cc
+@@ -588,6 +588,8 @@ void LowLevelLogger::LogCodeInfo() {
+   const char arch[] = "ppc64";
+ #elif V8_TARGET_ARCH_MIPS
+   const char arch[] = "mips";
++#elif V8_TARGET_ARCH_LA64
++  const char arch[] = "la64";
+ #elif V8_TARGET_ARCH_ARM64
+   const char arch[] = "arm64";
+ #elif V8_TARGET_ARCH_S390
+diff --git a/src/3rdparty/chromium/v8/src/objects/backing-store.cc b/src/3rdparty/chromium/v8/src/objects/backing-store.cc
+index 52ab0085f7c..c96faf197bf 100644
+--- a/src/3rdparty/chromium/v8/src/objects/backing-store.cc
++++ b/src/3rdparty/chromium/v8/src/objects/backing-store.cc
+@@ -29,7 +29,7 @@ constexpr bool kUseGuardRegions = true;
+ constexpr bool kUseGuardRegions = false;
+ #endif
+ 
+-#if V8_TARGET_ARCH_MIPS64
++#if V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+ // MIPS64 has a user space of 2^40 bytes on most processors,
+ // address space limits needs to be smaller.
+ constexpr size_t kAddressSpaceLimit = 0x8000000000L;  // 512 GiB
+diff --git a/src/3rdparty/chromium/v8/src/objects/code.h b/src/3rdparty/chromium/v8/src/objects/code.h
+index d80e72fa038..7da4c617461 100644
+--- a/src/3rdparty/chromium/v8/src/objects/code.h
++++ b/src/3rdparty/chromium/v8/src/objects/code.h
+@@ -412,6 +412,8 @@ class Code : public HeapObject {
+   static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
+ #elif V8_TARGET_ARCH_MIPS64
+   static constexpr int kHeaderPaddingSize = 0;
++#elif V8_TARGET_ARCH_LA64
++  static constexpr int kHeaderPaddingSize = 0;
+ #elif V8_TARGET_ARCH_X64
+   static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0;
+ #elif V8_TARGET_ARCH_ARM
+diff --git a/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc b/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc
+index 00bff91cd0a..56654b6288d 100644
+--- a/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc
++++ b/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc
+@@ -104,7 +104,7 @@ bool SimulatorHelper::FillRegisters(Isolate* isolate,
+   state->sp = reinterpret_cast<void*>(simulator->sp());
+   state->fp = reinterpret_cast<void*>(simulator->fp());
+   state->lr = reinterpret_cast<void*>(simulator->lr());
+-#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
++#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+   if (!simulator->has_bad_pc()) {
+     state->pc = reinterpret_cast<void*>(simulator->get_pc());
+   }
+diff --git a/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc
+new file mode 100644
+index 00000000000..8a5e9c30c6d
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc
+@@ -0,0 +1,1286 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#if V8_TARGET_ARCH_LA64
++
++#include "src/regexp/la64/regexp-macro-assembler-la64.h"
++
++#include "src/codegen/assembler-inl.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/logging/log.h"
++#include "src/objects/objects-inl.h"
++#include "src/regexp/regexp-macro-assembler.h"
++#include "src/regexp/regexp-stack.h"
++#include "src/snapshot/embedded/embedded-data.h"
++#include "src/strings/unicode.h"
++
++namespace v8 {
++namespace internal {
++
++/* clang-format off
++ *
++ * This assembler uses the following register assignment convention
++ * - t3 : Temporarily stores the index of capture start after a matching pass
++ *        for a global regexp.
++ * - a5 : Pointer to current Code object including heap object tag.
++ * - a6 : Current position in input, as negative offset from end of string.
++ *        Please notice that this is the byte offset, not the character offset!
++ * - a7 : Currently loaded character. Must be loaded using
++ *        LoadCurrentCharacter before using any of the dispatch methods.
++ * - t0 : Points to tip of backtrack stack
++ * - t1 : Unused.
++ * - t2 : End of input (points to byte after last character in input).
++ * - fp : Frame pointer. Used to access arguments, local variables and
++ *         RegExp registers.
++ * - sp : Points to tip of C stack.
++ *
++ * The remaining registers are free for computations.
++ * Each call to a public method should retain this convention.
++ *
++ * TODO(plind): O32 documented here with intent of having single 32/64 codebase
++ *              in the future.
++ *
++ * The O32 stack will have the following structure:
++ *
++ *  - fp[72]  Isolate* isolate   (address of the current isolate)
++ *  - fp[68]  direct_call  (if 1, direct call from JavaScript code,
++ *                          if 0, call through the runtime system).
++ *  - fp[64]  stack_area_base (High end of the memory area to use as
++ *                             backtracking stack).
++ *  - fp[60]  capture array size (may fit multiple sets of matches)
++ *  - fp[44..59]  MIPS O32 four argument slots
++ *  - fp[40]  int* capture_array (int[num_saved_registers_], for output).
++ *  --- sp when called ---
++ *  - fp[36]  return address      (lr).
++ *  - fp[32]  old frame pointer   (r11).
++ *  - fp[0..31]  backup of registers s0..s7.
++ *  --- frame pointer ----
++ *  - fp[-4]  end of input       (address of end of string).
++ *  - fp[-8]  start of input     (address of first character in string).
++ *  - fp[-12] start index        (character index of start).
++ *  - fp[-16] void* input_string (location of a handle containing the string).
++ *  - fp[-20] success counter    (only for global regexps to count matches).
++ *  - fp[-24] Offset of location before start of input (effectively character
++ *            string start - 1). Used to initialize capture registers to a
++ *            non-position.
++ *  - fp[-28] At start (if 1, we are starting at the start of the
++ *    string, otherwise 0)
++ *  - fp[-32] register 0         (Only positions must be stored in the first
++ *  -         register 1          num_saved_registers_ registers)
++ *  -         ...
++ *  -         register num_registers-1
++ *  --- sp ---
++ *
++ *
++ * The N64 stack will have the following structure:
++ *
++ *  - fp[80]  Isolate* isolate   (address of the current isolate)               kIsolate
++ *                                                                              kStackFrameHeader
++ *  --- sp when called ---
++ *  - fp[72]  ra                 Return from RegExp code (ra).                  kReturnAddress
++ *  - fp[64]  s9, old-fp         Old fp, callee saved(s9).
++ *  - fp[0..63]  s0..s7          Callee-saved registers s0..s7.
++ *  --- frame pointer ----
++ *  - fp[-8]  direct_call        (1 = direct call from JS, 0 = from runtime)    kDirectCall
++ *  - fp[-16] stack_base         (Top of backtracking stack).                   kStackHighEnd
++ *  - fp[-24] capture array size (may fit multiple sets of matches)             kNumOutputRegisters
++ *  - fp[-32] int* capture_array (int[num_saved_registers_], for output).       kRegisterOutput
++ *  - fp[-40] end of input       (address of end of string).                    kInputEnd
++ *  - fp[-48] start of input     (address of first character in string).        kInputStart
++ *  - fp[-56] start index        (character index of start).                    kStartIndex
++ *  - fp[-64] void* input_string (location of a handle containing the string).  kInputString
++ *  - fp[-72] success counter    (only for global regexps to count matches).    kSuccessfulCaptures
++ *  - fp[-80] Offset of location before start of input (effectively character   kStringStartMinusOne
++ *            position -1). Used to initialize capture registers to a
++ *            non-position.
++ *  --------- The following output registers are 32-bit values. ---------
++ *  - fp[-88] register 0         (Only positions must be stored in the first    kRegisterZero
++ *  -         register 1          num_saved_registers_ registers)
++ *  -         ...
++ *  -         register num_registers-1
++ *  --- sp ---
++ *
++ * The first num_saved_registers_ registers are initialized to point to
++ * "character -1" in the string (i.e., char_size() bytes before the first
++ * character of the string). The remaining registers start out as garbage.
++ *
++ * The data up to the return address must be placed there by the calling
++ * code and the remaining arguments are passed in registers, e.g. by calling the
++ * code entry as cast to a function with the signature:
++ * int (*match)(String input_string,
++ *              int start_index,
++ *              Address start,
++ *              Address end,
++ *              int* capture_output_array,
++ *              int num_capture_registers,
++ *              byte* stack_area_base,
++ *              bool direct_call = false,
++ *              Isolate* isolate);
++ * The call is performed by NativeRegExpMacroAssembler::Execute()
++ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
++ *
++ * clang-format on
++ */
++
++#define __ ACCESS_MASM(masm_)
++
++const int RegExpMacroAssemblerLA64::kRegExpCodeSize;
++
++RegExpMacroAssemblerLA64::RegExpMacroAssemblerLA64(Isolate* isolate, Zone* zone,
++                                                   Mode mode,
++                                                   int registers_to_save)
++    : NativeRegExpMacroAssembler(isolate, zone),
++      masm_(new MacroAssembler(isolate, CodeObjectRequired::kYes,
++                               NewAssemblerBuffer(kRegExpCodeSize))),
++      mode_(mode),
++      num_registers_(registers_to_save),
++      num_saved_registers_(registers_to_save),
++      entry_label_(),
++      start_label_(),
++      success_label_(),
++      backtrack_label_(),
++      exit_label_(),
++      internal_failure_label_() {
++  masm_->set_root_array_available(false);
++
++  DCHECK_EQ(0, registers_to_save % 2);
++  __ jmp(&entry_label_);  // We'll write the entry code later.
++  // If the code gets too big or corrupted, an internal exception will be
++  // raised, and we will exit right away.
++  __ bind(&internal_failure_label_);
++  __ li(a0, Operand(FAILURE));
++  __ Ret();
++  __ bind(&start_label_);  // And then continue from here.
++}
++
++RegExpMacroAssemblerLA64::~RegExpMacroAssemblerLA64() {
++  delete masm_;
++  // Unuse labels in case we throw away the assembler without calling GetCode.
++  entry_label_.Unuse();
++  start_label_.Unuse();
++  success_label_.Unuse();
++  backtrack_label_.Unuse();
++  exit_label_.Unuse();
++  check_preempt_label_.Unuse();
++  stack_overflow_label_.Unuse();
++  internal_failure_label_.Unuse();
++}
++
++int RegExpMacroAssemblerLA64::stack_limit_slack() {
++  return RegExpStack::kStackLimitSlack;
++}
++
++void RegExpMacroAssemblerLA64::AdvanceCurrentPosition(int by) {
++  if (by != 0) {
++    __ Add_d(current_input_offset(), current_input_offset(),
++             Operand(by * char_size()));
++  }
++}
++
++void RegExpMacroAssemblerLA64::AdvanceRegister(int reg, int by) {
++  DCHECK_LE(0, reg);
++  DCHECK_GT(num_registers_, reg);
++  if (by != 0) {
++    __ Ld_d(a0, register_location(reg));
++    __ Add_d(a0, a0, Operand(by));
++    __ St_d(a0, register_location(reg));
++  }
++}
++
++void RegExpMacroAssemblerLA64::Backtrack() {
++  CheckPreemption();
++  if (has_backtrack_limit()) {
++    Label next;
++    __ Ld_d(a0, MemOperand(frame_pointer(), kBacktrackCount));
++    __ Add_d(a0, a0, Operand(1));
++    __ St_d(a0, MemOperand(frame_pointer(), kBacktrackCount));
++    __ Branch(&next, ne, a0, Operand(backtrack_limit()));
++
++    // Exceeded limits are treated as a failed match.
++    Fail();
++
++    __ bind(&next);
++  }
++  // Pop Code offset from backtrack stack, add Code and jump to location.
++  Pop(a0);
++  __ Add_d(a0, a0, code_pointer());
++  __ Jump(a0);
++}
++
++void RegExpMacroAssemblerLA64::Bind(Label* label) { __ bind(label); }
++
++void RegExpMacroAssemblerLA64::CheckCharacter(uint32_t c, Label* on_equal) {
++  BranchOrBacktrack(on_equal, eq, current_character(), Operand(c));
++}
++
++void RegExpMacroAssemblerLA64::CheckCharacterGT(uc16 limit, Label* on_greater) {
++  BranchOrBacktrack(on_greater, gt, current_character(), Operand(limit));
++}
++
++void RegExpMacroAssemblerLA64::CheckAtStart(int cp_offset, Label* on_at_start) {
++  __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
++  __ Add_d(a0, current_input_offset(),
++           Operand(-char_size() + cp_offset * char_size()));
++  BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
++}
++
++void RegExpMacroAssemblerLA64::CheckNotAtStart(int cp_offset,
++                                               Label* on_not_at_start) {
++  __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
++  __ Add_d(a0, current_input_offset(),
++           Operand(-char_size() + cp_offset * char_size()));
++  BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
++}
++
++void RegExpMacroAssemblerLA64::CheckCharacterLT(uc16 limit, Label* on_less) {
++  BranchOrBacktrack(on_less, lt, current_character(), Operand(limit));
++}
++
++void RegExpMacroAssemblerLA64::CheckGreedyLoop(Label* on_equal) {
++  Label backtrack_non_equal;
++  __ Ld_w(a0, MemOperand(backtrack_stackpointer(), 0));
++  __ Branch(&backtrack_non_equal, ne, current_input_offset(), Operand(a0));
++  __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(),
++           Operand(kIntSize));
++  __ bind(&backtrack_non_equal);
++  BranchOrBacktrack(on_equal, eq, current_input_offset(), Operand(a0));
++}
++
++void RegExpMacroAssemblerLA64::CheckNotBackReferenceIgnoreCase(
++    int start_reg, bool read_backward, Label* on_no_match) {
++  Label fallthrough;
++  __ Ld_d(a0, register_location(start_reg));      // Index of start of capture.
++  __ Ld_d(a1, register_location(start_reg + 1));  // Index of end of capture.
++  __ Sub_d(a1, a1, a0);                           // Length of capture.
++
++  // At this point, the capture registers are either both set or both cleared.
++  // If the capture length is zero, then the capture is either empty or cleared.
++  // Fall through in both cases.
++  __ Branch(&fallthrough, eq, a1, Operand(zero_reg));
++
++  if (read_backward) {
++    __ Ld_d(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
++    __ Add_d(t1, t1, a1);
++    BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
++  } else {
++    __ Add_d(t1, a1, current_input_offset());
++    // Check that there are enough characters left in the input.
++    BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
++  }
++
++  if (mode_ == LATIN1) {
++    Label success;
++    Label fail;
++    Label loop_check;
++
++    // a0 - offset of start of capture.
++    // a1 - length of capture.
++    __ Add_d(a0, a0, Operand(end_of_input_address()));
++    __ Add_d(a2, end_of_input_address(), Operand(current_input_offset()));
++    if (read_backward) {
++      __ Sub_d(a2, a2, Operand(a1));
++    }
++    __ Add_d(a1, a0, Operand(a1));
++
++    // a0 - Address of start of capture.
++    // a1 - Address of end of capture.
++    // a2 - Address of current input position.
++
++    Label loop;
++    __ bind(&loop);
++    __ Ld_bu(a3, MemOperand(a0, 0));
++    __ addi_d(a0, a0, char_size());
++    __ Ld_bu(a4, MemOperand(a2, 0));
++    __ addi_d(a2, a2, char_size());
++
++    __ Branch(&loop_check, eq, a4, Operand(a3));
++
++    // Mismatch, try case-insensitive match (converting letters to lower-case).
++    __ Or(a3, a3, Operand(0x20));  // Convert capture character to lower-case.
++    __ Or(a4, a4, Operand(0x20));  // Also convert input character.
++    __ Branch(&fail, ne, a4, Operand(a3));
++    __ Sub_d(a3, a3, Operand('a'));
++    __ Branch(&loop_check, ls, a3, Operand('z' - 'a'));
++    // Latin-1: Check for values in range [224,254] but not 247.
++    __ Sub_d(a3, a3, Operand(224 - 'a'));
++    // Weren't Latin-1 letters.
++    __ Branch(&fail, hi, a3, Operand(254 - 224));
++    // Check for 247.
++    __ Branch(&fail, eq, a3, Operand(247 - 224));
++
++    __ bind(&loop_check);
++    __ Branch(&loop, lt, a0, Operand(a1));
++    __ jmp(&success);
++
++    __ bind(&fail);
++    GoTo(on_no_match);
++
++    __ bind(&success);
++    // Compute new value of character position after the matched part.
++    __ Sub_d(current_input_offset(), a2, end_of_input_address());
++    if (read_backward) {
++      __ Ld_d(t1, register_location(start_reg));  // Index of start of capture.
++      __ Ld_d(a2,
++              register_location(start_reg + 1));  // Index of end of capture.
++      __ Add_d(current_input_offset(), current_input_offset(), Operand(t1));
++      __ Sub_d(current_input_offset(), current_input_offset(), Operand(a2));
++    }
++  } else {
++    DCHECK(mode_ == UC16);
++    // Put regexp engine registers on stack.
++    RegList regexp_registers_to_retain = current_input_offset().bit() |
++                                         current_character().bit() |
++                                         backtrack_stackpointer().bit();
++    __ MultiPush(regexp_registers_to_retain);
++
++    int argument_count = 4;
++    __ PrepareCallCFunction(argument_count, a2);
++
++    // a0 - offset of start of capture.
++    // a1 - length of capture.
++
++    // Put arguments into arguments registers.
++    // Parameters are
++    //   a0: Address byte_offset1 - Address captured substring's start.
++    //   a1: Address byte_offset2 - Address of current character position.
++    //   a2: size_t byte_length - length of capture in bytes(!).
++    //   a3: Isolate* isolate.
++
++    // Address of start of capture.
++    __ Add_d(a0, a0, Operand(end_of_input_address()));
++    // Length of capture.
++    __ mov(a2, a1);
++    // Save length in callee-save register for use on return.
++    __ mov(s3, a1);
++    // Address of current input position.
++    __ Add_d(a1, current_input_offset(), Operand(end_of_input_address()));
++    if (read_backward) {
++      __ Sub_d(a1, a1, Operand(s3));
++    }
++    // Isolate.
++    __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
++
++    {
++      AllowExternalCallThatCantCauseGC scope(masm_);
++      ExternalReference function =
++          ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate());
++      __ CallCFunction(function, argument_count);
++    }
++
++    // Restore regexp engine registers.
++    __ MultiPop(regexp_registers_to_retain);
++    __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
++    __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
++
++    // Check if function returned non-zero for success or zero for failure.
++    BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
++    // On success, increment position by length of capture.
++    if (read_backward) {
++      __ Sub_d(current_input_offset(), current_input_offset(), Operand(s3));
++    } else {
++      __ Add_d(current_input_offset(), current_input_offset(), Operand(s3));
++    }
++  }
++
++  __ bind(&fallthrough);
++}
++
++void RegExpMacroAssemblerLA64::CheckNotBackReference(int start_reg,
++                                                     bool read_backward,
++                                                     Label* on_no_match) {
++  Label fallthrough;
++
++  // Find length of back-referenced capture.
++  __ Ld_d(a0, register_location(start_reg));
++  __ Ld_d(a1, register_location(start_reg + 1));
++  __ Sub_d(a1, a1, a0);  // Length to check.
++
++  // At this point, the capture registers are either both set or both cleared.
++  // If the capture length is zero, then the capture is either empty or cleared.
++  // Fall through in both cases.
++  __ Branch(&fallthrough, eq, a1, Operand(zero_reg));
++
++  if (read_backward) {
++    __ Ld_d(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
++    __ Add_d(t1, t1, a1);
++    BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
++  } else {
++    __ Add_d(t1, a1, current_input_offset());
++    // Check that there are enough characters left in the input.
++    BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
++  }
++
++  // Compute pointers to match string and capture string.
++  __ Add_d(a0, a0, Operand(end_of_input_address()));
++  __ Add_d(a2, end_of_input_address(), Operand(current_input_offset()));
++  if (read_backward) {
++    __ Sub_d(a2, a2, Operand(a1));
++  }
++  __ Add_d(a1, a1, Operand(a0));
++
++  Label loop;
++  __ bind(&loop);
++  if (mode_ == LATIN1) {
++    __ Ld_bu(a3, MemOperand(a0, 0));
++    __ addi_d(a0, a0, char_size());
++    __ Ld_bu(a4, MemOperand(a2, 0));
++    __ addi_d(a2, a2, char_size());
++  } else {
++    DCHECK(mode_ == UC16);
++    __ Ld_hu(a3, MemOperand(a0, 0));
++    __ addi_d(a0, a0, char_size());
++    __ Ld_hu(a4, MemOperand(a2, 0));
++    __ addi_d(a2, a2, char_size());
++  }
++  BranchOrBacktrack(on_no_match, ne, a3, Operand(a4));
++  __ Branch(&loop, lt, a0, Operand(a1));
++
++  // Move current character position to position after match.
++  __ Sub_d(current_input_offset(), a2, end_of_input_address());
++  if (read_backward) {
++    __ Ld_d(t1, register_location(start_reg));  // Index of start of capture.
++    __ Ld_d(a2, register_location(start_reg + 1));  // Index of end of capture.
++    __ Add_d(current_input_offset(), current_input_offset(), Operand(t1));
++    __ Sub_d(current_input_offset(), current_input_offset(), Operand(a2));
++  }
++  __ bind(&fallthrough);
++}
++
++void RegExpMacroAssemblerLA64::CheckNotCharacter(uint32_t c,
++                                                 Label* on_not_equal) {
++  BranchOrBacktrack(on_not_equal, ne, current_character(), Operand(c));
++}
++
++void RegExpMacroAssemblerLA64::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
++                                                      Label* on_equal) {
++  __ And(a0, current_character(), Operand(mask));
++  Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
++  BranchOrBacktrack(on_equal, eq, a0, rhs);
++}
++
++void RegExpMacroAssemblerLA64::CheckNotCharacterAfterAnd(uint32_t c,
++                                                         uint32_t mask,
++                                                         Label* on_not_equal) {
++  __ And(a0, current_character(), Operand(mask));
++  Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
++  BranchOrBacktrack(on_not_equal, ne, a0, rhs);
++}
++
++void RegExpMacroAssemblerLA64::CheckNotCharacterAfterMinusAnd(
++    uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
++  DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
++  __ Sub_d(a0, current_character(), Operand(minus));
++  __ And(a0, a0, Operand(mask));
++  BranchOrBacktrack(on_not_equal, ne, a0, Operand(c));
++}
++
++void RegExpMacroAssemblerLA64::CheckCharacterInRange(uc16 from, uc16 to,
++                                                     Label* on_in_range) {
++  __ Sub_d(a0, current_character(), Operand(from));
++  // Unsigned lower-or-same condition.
++  BranchOrBacktrack(on_in_range, ls, a0, Operand(to - from));
++}
++
++void RegExpMacroAssemblerLA64::CheckCharacterNotInRange(
++    uc16 from, uc16 to, Label* on_not_in_range) {
++  __ Sub_d(a0, current_character(), Operand(from));
++  // Unsigned higher condition.
++  BranchOrBacktrack(on_not_in_range, hi, a0, Operand(to - from));
++}
++
++void RegExpMacroAssemblerLA64::CheckBitInTable(Handle<ByteArray> table,
++                                               Label* on_bit_set) {
++  __ li(a0, Operand(table));
++  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
++    __ And(a1, current_character(), Operand(kTableSize - 1));
++    __ Add_d(a0, a0, a1);
++  } else {
++    __ Add_d(a0, a0, current_character());
++  }
++
++  __ Ld_bu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize));
++  BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
++}
++
++bool RegExpMacroAssemblerLA64::CheckSpecialCharacterClass(uc16 type,
++                                                          Label* on_no_match) {
++  // Range checks (c in min..max) are generally implemented by an unsigned
++  // (c - min) <= (max - min) check.
++  switch (type) {
++    case 's':
++      // Match space-characters.
++      if (mode_ == LATIN1) {
++        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
++        Label success;
++        __ Branch(&success, eq, current_character(), Operand(' '));
++        // Check range 0x09..0x0D.
++        __ Sub_d(a0, current_character(), Operand('\t'));
++        __ Branch(&success, ls, a0, Operand('\r' - '\t'));
++        // \u00a0 (NBSP).
++        BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
++        __ bind(&success);
++        return true;
++      }
++      return false;
++    case 'S':
++      // The emitted code for generic character classes is good enough.
++      return false;
++    case 'd':
++      // Match Latin1 digits ('0'..'9').
++      __ Sub_d(a0, current_character(), Operand('0'));
++      BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0'));
++      return true;
++    case 'D':
++      // Match non Latin1-digits.
++      __ Sub_d(a0, current_character(), Operand('0'));
++      BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
++      return true;
++    case '.': {
++      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
++      __ Xor(a0, current_character(), Operand(0x01));
++      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
++      __ Sub_d(a0, a0, Operand(0x0B));
++      BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
++      if (mode_ == UC16) {
++        // Compare original value to 0x2028 and 0x2029, using the already
++        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
++        // 0x201D (0x2028 - 0x0B) or 0x201E.
++        __ Sub_d(a0, a0, Operand(0x2028 - 0x0B));
++        BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
++      }
++      return true;
++    }
++    case 'n': {
++      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
++      __ Xor(a0, current_character(), Operand(0x01));
++      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
++      __ Sub_d(a0, a0, Operand(0x0B));
++      if (mode_ == LATIN1) {
++        BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
++      } else {
++        Label done;
++        BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
++        // Compare original value to 0x2028 and 0x2029, using the already
++        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
++        // 0x201D (0x2028 - 0x0B) or 0x201E.
++        __ Sub_d(a0, a0, Operand(0x2028 - 0x0B));
++        BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
++        __ bind(&done);
++      }
++      return true;
++    }
++    case 'w': {
++      if (mode_ != LATIN1) {
++        // Table is 256 entries, so all Latin1 characters can be tested.
++        BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
++      }
++      ExternalReference map =
++          ExternalReference::re_word_character_map(isolate());
++      __ li(a0, Operand(map));
++      __ Add_d(a0, a0, current_character());
++      __ Ld_bu(a0, MemOperand(a0, 0));
++      BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
++      return true;
++    }
++    case 'W': {
++      Label done;
++      if (mode_ != LATIN1) {
++        // Table is 256 entries, so all Latin1 characters can be tested.
++        __ Branch(&done, hi, current_character(), Operand('z'));
++      }
++      ExternalReference map =
++          ExternalReference::re_word_character_map(isolate());
++      __ li(a0, Operand(map));
++      __ Add_d(a0, a0, current_character());
++      __ Ld_bu(a0, MemOperand(a0, 0));
++      BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
++      if (mode_ != LATIN1) {
++        __ bind(&done);
++      }
++      return true;
++    }
++    case '*':
++      // Match any character.
++      return true;
++    // No custom implementation (yet): s(UC16), S(UC16).
++    default:
++      return false;
++  }
++}
++
++void RegExpMacroAssemblerLA64::Fail() {
++  __ li(a0, Operand(FAILURE));
++  __ jmp(&exit_label_);
++}
++
++Handle<HeapObject> RegExpMacroAssemblerLA64::GetCode(Handle<String> source) {
++  Label return_v0;
++  if (0 /* todo masm_->has_exception()*/) {
++    // If the code gets corrupted due to long regular expressions and lack of
++    // space on trampolines, an internal exception flag is set. If this case
++    // is detected, we will jump into exit sequence right away.
++    //__ bind_to(&entry_label_, internal_failure_label_.pos());
++  } else {
++    // Finalize code - write the entry point code now we know how many
++    // registers we need.
++
++    // Entry code:
++    __ bind(&entry_label_);
++
++    // Tell the system that we have a stack frame.  Because the type is MANUAL,
++    // no is generated.
++    FrameScope scope(masm_, StackFrame::MANUAL);
++
++    // Actually emit code to start a new stack frame.
++    // Push arguments
++    // Save callee-save registers.
++    // Start new stack frame.
++    // Store link register in existing stack-cell.
++    // Order here should correspond to order of offset constants in header file.
++    // TODO(plind): we save s0..s7, but ONLY use s3 here - use the regs
++    // or dont save.
++    RegList registers_to_retain = s0.bit() | s1.bit() | s2.bit() | s3.bit() |
++                                  s4.bit() | s5.bit() | s6.bit() | s7.bit();
++    RegList argument_registers = a0.bit() | a1.bit() | a2.bit() | a3.bit();
++
++    argument_registers |= a4.bit() | a5.bit() | a6.bit() | a7.bit();
++
++    __ MultiPush(ra.bit(), fp.bit(), argument_registers | registers_to_retain);
++    // Set frame pointer in space for it if this is not a direct call
++    // from generated code.
++    // TODO(plind): this 8 is the # of argument regs, should have definition.
++    __ Add_d(frame_pointer(), sp, Operand(8 * kPointerSize));
++    STATIC_ASSERT(kSuccessfulCaptures == kInputString - kSystemPointerSize);
++    __ mov(a0, zero_reg);
++    __ push(a0);  // Make room for success counter and initialize it to 0.
++    STATIC_ASSERT(kStringStartMinusOne ==
++                  kSuccessfulCaptures - kSystemPointerSize);
++    __ push(a0);  // Make room for "string start - 1" constant.
++    STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize);
++    __ push(a0);  // The backtrack counter
++
++    // Check if we have space on the stack for registers.
++    Label stack_limit_hit;
++    Label stack_ok;
++
++    ExternalReference stack_limit =
++        ExternalReference::address_of_jslimit(masm_->isolate());
++    __ li(a0, Operand(stack_limit));
++    __ Ld_d(a0, MemOperand(a0, 0));
++    __ Sub_d(a0, sp, a0);
++    // Handle it if the stack pointer is already below the stack limit.
++    __ Branch(&stack_limit_hit, le, a0, Operand(zero_reg));
++    // Check if there is room for the variable number of registers above
++    // the stack limit.
++    __ Branch(&stack_ok, hs, a0, Operand(num_registers_ * kPointerSize));
++    // Exit with OutOfMemory exception. There is not enough space on the stack
++    // for our working registers.
++    __ li(a0, Operand(EXCEPTION));
++    __ jmp(&return_v0);
++
++    __ bind(&stack_limit_hit);
++    CallCheckStackGuardState(a0);
++    // If returned value is non-zero, we exit with the returned value as result.
++    __ Branch(&return_v0, ne, a0, Operand(zero_reg));
++
++    __ bind(&stack_ok);
++    // Allocate space on stack for registers.
++    __ Sub_d(sp, sp, Operand(num_registers_ * kPointerSize));
++    // Load string end.
++    __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
++    // Load input start.
++    __ Ld_d(a0, MemOperand(frame_pointer(), kInputStart));
++    // Find negative length (offset of start relative to end).
++    __ Sub_d(current_input_offset(), a0, end_of_input_address());
++    // Set a0 to address of char before start of the input string
++    // (effectively string position -1).
++    __ Ld_d(a1, MemOperand(frame_pointer(), kStartIndex));
++    __ Sub_d(a0, current_input_offset(), Operand(char_size()));
++    __ slli_d(t1, a1, (mode_ == UC16) ? 1 : 0);
++    __ Sub_d(a0, a0, t1);
++    // Store this value in a local variable, for use when clearing
++    // position registers.
++    __ St_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
++
++    // Initialize code pointer register
++    __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
++
++    Label load_char_start_regexp, start_regexp;
++    // Load newline if index is at start, previous character otherwise.
++    __ Branch(&load_char_start_regexp, ne, a1, Operand(zero_reg));
++    __ li(current_character(), Operand('\n'));
++    __ jmp(&start_regexp);
++
++    // Global regexp restarts matching here.
++    __ bind(&load_char_start_regexp);
++    // Load previous char as initial value of current character register.
++    LoadCurrentCharacterUnchecked(-1, 1);
++    __ bind(&start_regexp);
++
++    // Initialize on-stack registers.
++    if (num_saved_registers_ > 0) {  // Always is, if generated from a regexp.
++      // Fill saved registers with initial value = start offset - 1.
++      if (num_saved_registers_ > 8) {
++        // Address of register 0.
++        __ Add_d(a1, frame_pointer(), Operand(kRegisterZero));
++        __ li(a2, Operand(num_saved_registers_));
++        Label init_loop;
++        __ bind(&init_loop);
++        __ St_d(a0, MemOperand(a1, 0));
++        __ Add_d(a1, a1, Operand(-kPointerSize));
++        __ Sub_d(a2, a2, Operand(1));
++        __ Branch(&init_loop, ne, a2, Operand(zero_reg));
++      } else {
++        for (int i = 0; i < num_saved_registers_; i++) {
++          __ St_d(a0, register_location(i));
++        }
++      }
++    }
++
++    // Initialize backtrack stack pointer.
++    __ Ld_d(backtrack_stackpointer(),
++            MemOperand(frame_pointer(), kStackHighEnd));
++
++    __ jmp(&start_label_);
++
++    // Exit code:
++    if (success_label_.is_linked()) {
++      // Save captures when successful.
++      __ bind(&success_label_);
++      if (num_saved_registers_ > 0) {
++        // Copy captures to output.
++        __ Ld_d(a1, MemOperand(frame_pointer(), kInputStart));
++        __ Ld_d(a0, MemOperand(frame_pointer(), kRegisterOutput));
++        __ Ld_d(a2, MemOperand(frame_pointer(), kStartIndex));
++        __ Sub_d(a1, end_of_input_address(), a1);
++        // a1 is length of input in bytes.
++        if (mode_ == UC16) {
++          __ srli_d(a1, a1, 1);
++        }
++        // a1 is length of input in characters.
++        __ Add_d(a1, a1, Operand(a2));
++        // a1 is length of string in characters.
++
++        DCHECK_EQ(0, num_saved_registers_ % 2);
++        // Always an even number of capture registers. This allows us to
++        // unroll the loop once to add an operation between a load of a register
++        // and the following use of that register.
++        for (int i = 0; i < num_saved_registers_; i += 2) {
++          __ Ld_d(a2, register_location(i));
++          __ Ld_d(a3, register_location(i + 1));
++          if (i == 0 && global_with_zero_length_check()) {
++            // Keep capture start in a4 for the zero-length check later.
++            __ mov(t3, a2);
++          }
++          if (mode_ == UC16) {
++            __ srai_d(a2, a2, 1);
++            __ Add_d(a2, a2, a1);
++            __ srai_d(a3, a3, 1);
++            __ Add_d(a3, a3, a1);
++          } else {
++            __ Add_d(a2, a1, Operand(a2));
++            __ Add_d(a3, a1, Operand(a3));
++          }
++          // V8 expects the output to be an int32_t array.
++          __ St_w(a2, MemOperand(a0, 0));
++          __ Add_d(a0, a0, kIntSize);
++          __ St_w(a3, MemOperand(a0, 0));
++          __ Add_d(a0, a0, kIntSize);
++        }
++      }
++
++      if (global()) {
++        // Restart matching if the regular expression is flagged as global.
++        __ Ld_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
++        __ Ld_d(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
++        __ Ld_d(a2, MemOperand(frame_pointer(), kRegisterOutput));
++        // Increment success counter.
++        __ Add_d(a0, a0, 1);
++        __ St_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
++        // Capture results have been stored, so the number of remaining global
++        // output registers is reduced by the number of stored captures.
++        __ Sub_d(a1, a1, num_saved_registers_);
++        // Check whether we have enough room for another set of capture results.
++        //__ mov(v0, a0);
++        __ Branch(&return_v0, lt, a1, Operand(num_saved_registers_));
++
++        __ St_d(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
++        // Advance the location for output.
++        __ Add_d(a2, a2, num_saved_registers_ * kIntSize);
++        __ St_d(a2, MemOperand(frame_pointer(), kRegisterOutput));
++
++        // Prepare a0 to initialize registers with its value in the next run.
++        __ Ld_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
++
++        if (global_with_zero_length_check()) {
++          // Special case for zero-length matches.
++          // t3: capture start index
++          // Not a zero-length match, restart.
++          __ Branch(&load_char_start_regexp, ne, current_input_offset(),
++                    Operand(t3));
++          // Offset from the end is zero if we already reached the end.
++          __ Branch(&exit_label_, eq, current_input_offset(),
++                    Operand(zero_reg));
++          // Advance current position after a zero-length match.
++          Label advance;
++          __ bind(&advance);
++          __ Add_d(current_input_offset(), current_input_offset(),
++                   Operand((mode_ == UC16) ? 2 : 1));
++          if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
++        }
++
++        __ Branch(&load_char_start_regexp);
++      } else {
++        __ li(a0, Operand(SUCCESS));
++      }
++    }
++    // Exit and return v0.
++    __ bind(&exit_label_);
++    if (global()) {
++      __ Ld_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
++    }
++
++    __ bind(&return_v0);
++    // Skip sp past regexp registers and local variables..
++    __ mov(sp, frame_pointer());
++    // Restore registers s0..s7 and return (restoring ra to pc).
++    __ MultiPop(ra.bit(), fp.bit(), registers_to_retain);
++    __ Ret();
++
++    // Backtrack code (branch target for conditional backtracks).
++    if (backtrack_label_.is_linked()) {
++      __ bind(&backtrack_label_);
++      Backtrack();
++    }
++
++    Label exit_with_exception;
++
++    // Preempt-code.
++    if (check_preempt_label_.is_linked()) {
++      SafeCallTarget(&check_preempt_label_);
++      // Put regexp engine registers on stack.
++      RegList regexp_registers_to_retain = current_input_offset().bit() |
++                                           current_character().bit() |
++                                           backtrack_stackpointer().bit();
++      __ MultiPush(regexp_registers_to_retain);
++      CallCheckStackGuardState(a0);
++      __ MultiPop(regexp_registers_to_retain);
++      // If returning non-zero, we should end execution with the given
++      // result as return value.
++      __ Branch(&return_v0, ne, a0, Operand(zero_reg));
++
++      // String might have moved: Reload end of string from frame.
++      __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
++      __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
++      SafeReturn();
++    }
++
++    // Backtrack stack overflow code.
++    if (stack_overflow_label_.is_linked()) {
++      SafeCallTarget(&stack_overflow_label_);
++      // Reached if the backtrack-stack limit has been hit.
++      // Put regexp engine registers on stack first.
++      RegList regexp_registers =
++          current_input_offset().bit() | current_character().bit();
++      __ MultiPush(regexp_registers);
++
++      // Call GrowStack(backtrack_stackpointer(), &stack_base)
++      static const int num_arguments = 3;
++      __ PrepareCallCFunction(num_arguments, a0);
++      __ mov(a0, backtrack_stackpointer());
++      __ Add_d(a1, frame_pointer(), Operand(kStackHighEnd));
++      __ li(a2, Operand(ExternalReference::isolate_address(masm_->isolate())));
++      ExternalReference grow_stack =
++          ExternalReference::re_grow_stack(masm_->isolate());
++      __ CallCFunction(grow_stack, num_arguments);
++      // Restore regexp registers.
++      __ MultiPop(regexp_registers);
++      // If return nullptr, we have failed to grow the stack, and
++      // must exit with a stack-overflow exception.
++      __ Branch(&exit_with_exception, eq, a0, Operand(zero_reg));
++      // Otherwise use return value as new stack pointer.
++      __ mov(backtrack_stackpointer(), a0);
++      // Restore saved registers and continue.
++      __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
++      __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
++      SafeReturn();
++    }
++
++    if (exit_with_exception.is_linked()) {
++      // If any of the code above needed to exit with an exception.
++      __ bind(&exit_with_exception);
++      // Exit with Result EXCEPTION(-1) to signal thrown exception.
++      __ li(a0, Operand(EXCEPTION));
++      __ jmp(&return_v0);
++    }
++  }
++
++  CodeDesc code_desc;
++  masm_->GetCode(isolate(), &code_desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate(), code_desc, Code::REGEXP)
++                          .set_self_reference(masm_->CodeObject())
++                          .Build();
++  LOG(masm_->isolate(),
++      RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
++  return Handle<HeapObject>::cast(code);
++}
++
++void RegExpMacroAssemblerLA64::GoTo(Label* to) {
++  if (to == nullptr) {
++    Backtrack();
++    return;
++  }
++  __ jmp(to);
++  return;
++}
++
++void RegExpMacroAssemblerLA64::IfRegisterGE(int reg, int comparand,
++                                            Label* if_ge) {
++  __ Ld_d(a0, register_location(reg));
++  BranchOrBacktrack(if_ge, ge, a0, Operand(comparand));
++}
++
++void RegExpMacroAssemblerLA64::IfRegisterLT(int reg, int comparand,
++                                            Label* if_lt) {
++  __ Ld_d(a0, register_location(reg));
++  BranchOrBacktrack(if_lt, lt, a0, Operand(comparand));
++}
++
++void RegExpMacroAssemblerLA64::IfRegisterEqPos(int reg, Label* if_eq) {
++  __ Ld_d(a0, register_location(reg));
++  BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset()));
++}
++
++RegExpMacroAssembler::IrregexpImplementation
++RegExpMacroAssemblerLA64::Implementation() {
++  return kLA64Implementation;
++}
++
++void RegExpMacroAssemblerLA64::LoadCurrentCharacterImpl(int cp_offset,
++                                                        Label* on_end_of_input,
++                                                        bool check_bounds,
++                                                        int characters,
++                                                        int eats_at_least) {
++  // It's possible to preload a small number of characters when each success
++  // path requires a large number of characters, but not the reverse.
++  DCHECK_GE(eats_at_least, characters);
++
++  DCHECK(cp_offset < (1 << 30));  // Be sane! (And ensure negation works).
++  if (check_bounds) {
++    if (cp_offset >= 0) {
++      CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
++    } else {
++      CheckPosition(cp_offset, on_end_of_input);
++    }
++  }
++  LoadCurrentCharacterUnchecked(cp_offset, characters);
++}
++
++void RegExpMacroAssemblerLA64::PopCurrentPosition() {
++  Pop(current_input_offset());
++}
++
++void RegExpMacroAssemblerLA64::PopRegister(int register_index) {
++  Pop(a0);
++  __ St_d(a0, register_location(register_index));
++}
++
++void RegExpMacroAssemblerLA64::PushBacktrack(Label* label) {
++  if (label->is_bound()) {
++    int target = label->pos();
++    __ li(a0, Operand(target + Code::kHeaderSize - kHeapObjectTag));
++  } else {
++    // TODO: Optimize like arm64 without ld_wu?
++    Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm_);
++    Label after_constant;
++    __ Branch(&after_constant);
++    int offset = masm_->pc_offset();
++    int cp_offset = offset + Code::kHeaderSize - kHeapObjectTag;
++    //__ emit(0);
++    __ nop();
++    masm_->label_at_put(label, offset);
++    __ bind(&after_constant);
++    if (is_int12(cp_offset)) {
++      __ Ld_wu(a0, MemOperand(code_pointer(), cp_offset));
++    } else {
++      __ Add_d(a0, code_pointer(), cp_offset);
++      __ Ld_wu(a0, MemOperand(a0, 0));
++    }
++  }
++  Push(a0);
++  CheckStackLimit();
++}
++
++void RegExpMacroAssemblerLA64::PushCurrentPosition() {
++  Push(current_input_offset());
++}
++
++void RegExpMacroAssemblerLA64::PushRegister(int register_index,
++                                            StackCheckFlag check_stack_limit) {
++  __ Ld_d(a0, register_location(register_index));
++  Push(a0);
++  if (check_stack_limit) CheckStackLimit();
++}
++
++void RegExpMacroAssemblerLA64::ReadCurrentPositionFromRegister(int reg) {
++  __ Ld_d(current_input_offset(), register_location(reg));
++}
++
++void RegExpMacroAssemblerLA64::ReadStackPointerFromRegister(int reg) {
++  __ Ld_d(backtrack_stackpointer(), register_location(reg));
++  __ Ld_d(a0, MemOperand(frame_pointer(), kStackHighEnd));
++  __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), Operand(a0));
++}
++
++void RegExpMacroAssemblerLA64::SetCurrentPositionFromEnd(int by) {
++  Label after_position;
++  __ Branch(&after_position, ge, current_input_offset(),
++            Operand(-by * char_size()));
++  __ li(current_input_offset(), -by * char_size());
++  // On RegExp code entry (where this operation is used), the character before
++  // the current position is expected to be already loaded.
++  // We have advanced the position, so it's safe to read backwards.
++  LoadCurrentCharacterUnchecked(-1, 1);
++  __ bind(&after_position);
++}
++
++void RegExpMacroAssemblerLA64::SetRegister(int register_index, int to) {
++  DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
++  __ li(a0, Operand(to));
++  __ St_d(a0, register_location(register_index));
++}
++
++bool RegExpMacroAssemblerLA64::Succeed() {
++  __ jmp(&success_label_);
++  return global();
++}
++
++void RegExpMacroAssemblerLA64::WriteCurrentPositionToRegister(int reg,
++                                                              int cp_offset) {
++  if (cp_offset == 0) {
++    __ St_d(current_input_offset(), register_location(reg));
++  } else {
++    __ Add_d(a0, current_input_offset(), Operand(cp_offset * char_size()));
++    __ St_d(a0, register_location(reg));
++  }
++}
++
++void RegExpMacroAssemblerLA64::ClearRegisters(int reg_from, int reg_to) {
++  DCHECK(reg_from <= reg_to);
++  __ Ld_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
++  for (int reg = reg_from; reg <= reg_to; reg++) {
++    __ St_d(a0, register_location(reg));
++  }
++}
++
++void RegExpMacroAssemblerLA64::WriteStackPointerToRegister(int reg) {
++  __ Ld_d(a1, MemOperand(frame_pointer(), kStackHighEnd));
++  __ Sub_d(a0, backtrack_stackpointer(), a1);
++  __ St_d(a0, register_location(reg));
++}
++
++bool RegExpMacroAssemblerLA64::CanReadUnaligned() { return false; }
++
++// Private methods:
++
++void RegExpMacroAssemblerLA64::CallCheckStackGuardState(Register scratch) {
++  DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
++  DCHECK(!masm_->options().isolate_independent_code);
++
++  int stack_alignment = base::OS::ActivationFrameAlignment();
++
++  // Align the stack pointer and save the original sp value on the stack.
++  __ mov(scratch, sp);
++  __ Sub_d(sp, sp, Operand(kPointerSize));
++  DCHECK(base::bits::IsPowerOfTwo(stack_alignment));
++  __ And(sp, sp, Operand(-stack_alignment));
++  __ St_d(scratch, MemOperand(sp, 0));
++
++  __ mov(a2, frame_pointer());
++  // Code of self.
++  __ li(a1, Operand(masm_->CodeObject()), CONSTANT_SIZE);
++
++  // We need to make room for the return address on the stack.
++  DCHECK(IsAligned(stack_alignment, kPointerSize));
++  __ Sub_d(sp, sp, Operand(stack_alignment));
++
++  // The stack pointer now points to cell where the return address will be
++  // written. Arguments are in registers, meaning we treat the return address as
++  // argument 5. Since DirectCEntry will handle allocating space for the C
++  // argument slots, we don't need to care about that here. This is how the
++  // stack will look (sp meaning the value of sp at this moment):
++  // [sp + 3] - empty slot if needed for alignment.
++  // [sp + 2] - saved sp.
++  // [sp + 1] - second word reserved for return value.
++  // [sp + 0] - first word reserved for return value.
++
++  // a0 will point to the return address, placed by DirectCEntry.
++  __ mov(a0, sp);
++
++  ExternalReference stack_guard_check =
++      ExternalReference::re_check_stack_guard_state(masm_->isolate());
++  __ li(t7, Operand(stack_guard_check));
++
++  EmbeddedData d = EmbeddedData::FromBlob();
++  CHECK(Builtins::IsIsolateIndependent(Builtins::kDirectCEntry));
++  Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry);
++  __ li(kScratchReg, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
++  __ Call(kScratchReg);
++
++  // DirectCEntry allocated space for the C argument slots so we have to
++  // drop them with the return address from the stack with loading saved sp.
++  // At this point stack must look:
++  // [sp + 7] - empty slot if needed for alignment.
++  // [sp + 6] - saved sp.
++  // [sp + 5] - second word reserved for return value.
++  // [sp + 4] - first word reserved for return value.
++  // [sp + 3] - C argument slot.
++  // [sp + 2] - C argument slot.
++  // [sp + 1] - C argument slot.
++  // [sp + 0] - C argument slot.
++  __ Ld_d(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize));
++
++  __ li(code_pointer(), Operand(masm_->CodeObject()));
++}
++
++// Helper function for reading a value out of a stack frame.
++template <typename T>
++static T& frame_entry(Address re_frame, int frame_offset) {
++  return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
++}
++
++template <typename T>
++static T* frame_entry_address(Address re_frame, int frame_offset) {
++  return reinterpret_cast<T*>(re_frame + frame_offset);
++}
++
++int64_t RegExpMacroAssemblerLA64::CheckStackGuardState(Address* return_address,
++                                                       Address raw_code,
++                                                       Address re_frame) {
++  Code re_code = Code::cast(Object(raw_code));
++  return NativeRegExpMacroAssembler::CheckStackGuardState(
++      frame_entry<Isolate*>(re_frame, kIsolate),
++      static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
++      static_cast<RegExp::CallOrigin>(
++          frame_entry<int64_t>(re_frame, kDirectCall)),
++      return_address, re_code,
++      frame_entry_address<Address>(re_frame, kInputString),
++      frame_entry_address<const byte*>(re_frame, kInputStart),
++      frame_entry_address<const byte*>(re_frame, kInputEnd));
++}
++
++MemOperand RegExpMacroAssemblerLA64::register_location(int register_index) {
++  DCHECK(register_index < (1 << 30));
++  if (num_registers_ <= register_index) {
++    num_registers_ = register_index + 1;
++  }
++  return MemOperand(frame_pointer(),
++                    kRegisterZero - register_index * kPointerSize);
++}
++
++void RegExpMacroAssemblerLA64::CheckPosition(int cp_offset,
++                                             Label* on_outside_input) {
++  if (cp_offset >= 0) {
++    BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
++                      Operand(-cp_offset * char_size()));
++  } else {
++    __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
++    __ Add_d(a0, current_input_offset(), Operand(cp_offset * char_size()));
++    BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
++  }
++}
++
++void RegExpMacroAssemblerLA64::BranchOrBacktrack(Label* to, Condition condition,
++                                                 Register rs,
++                                                 const Operand& rt) {
++  if (condition == al) {  // Unconditional.
++    if (to == nullptr) {
++      Backtrack();
++      return;
++    }
++    __ jmp(to);
++    return;
++  }
++  if (to == nullptr) {
++    __ Branch(&backtrack_label_, condition, rs, rt);
++    return;
++  }
++  __ Branch(to, condition, rs, rt);
++}
++
++void RegExpMacroAssemblerLA64::SafeCall(Label* to, Condition cond, Register rs,
++                                        const Operand& rt) {
++  __ Branch(to, cond, rs, rt, true);
++}
++
++void RegExpMacroAssemblerLA64::SafeReturn() {
++  __ pop(ra);
++  __ Add_d(t1, ra, Operand(masm_->CodeObject()));
++  __ Jump(t1);
++}
++
++void RegExpMacroAssemblerLA64::SafeCallTarget(Label* name) {
++  __ bind(name);
++  __ Sub_d(ra, ra, Operand(masm_->CodeObject()));
++  __ push(ra);
++}
++
++void RegExpMacroAssemblerLA64::Push(Register source) {
++  DCHECK(source != backtrack_stackpointer());
++  __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(),
++           Operand(-kIntSize));
++  __ St_w(source, MemOperand(backtrack_stackpointer(), 0));
++}
++
++void RegExpMacroAssemblerLA64::Pop(Register target) {
++  DCHECK(target != backtrack_stackpointer());
++  __ Ld_w(target, MemOperand(backtrack_stackpointer(), 0));
++  __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize);
++}
++
++void RegExpMacroAssemblerLA64::CheckPreemption() {
++  // Check for preemption.
++  ExternalReference stack_limit =
++      ExternalReference::address_of_jslimit(masm_->isolate());
++  __ li(a0, Operand(stack_limit));
++  __ Ld_d(a0, MemOperand(a0, 0));
++  SafeCall(&check_preempt_label_, ls, sp, Operand(a0));
++}
++
++void RegExpMacroAssemblerLA64::CheckStackLimit() {
++  ExternalReference stack_limit =
++      ExternalReference::address_of_regexp_stack_limit_address(
++          masm_->isolate());
++
++  __ li(a0, Operand(stack_limit));
++  __ Ld_d(a0, MemOperand(a0, 0));
++  SafeCall(&stack_overflow_label_, ls, backtrack_stackpointer(), Operand(a0));
++}
++
++void RegExpMacroAssemblerLA64::LoadCurrentCharacterUnchecked(int cp_offset,
++                                                             int characters) {
++  Register offset = current_input_offset();
++  if (cp_offset != 0) {
++    // t3 is not being used to store the capture start index at this point.
++    __ Add_d(t3, current_input_offset(), Operand(cp_offset * char_size()));
++    offset = t3;
++  }
++  // We assume that we cannot do unaligned loads on LA64, so this function
++  // must only be used to load a single character at a time.
++  DCHECK_EQ(1, characters);
++  __ Add_d(t1, end_of_input_address(), Operand(offset));
++  if (mode_ == LATIN1) {
++    __ Ld_bu(current_character(), MemOperand(t1, 0));
++  } else {
++    DCHECK(mode_ == UC16);
++    __ Ld_hu(current_character(), MemOperand(t1, 0));
++  }
++}
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_TARGET_ARCH_LA64
+diff --git a/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h
+new file mode 100644
+index 00000000000..5ebf37807cb
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h
+@@ -0,0 +1,216 @@
++// Copyright 2011 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_
++#define V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_
++
++#include "src/codegen/la64/assembler-la64.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/regexp/regexp-macro-assembler.h"
++
++namespace v8 {
++namespace internal {
++
++class V8_EXPORT_PRIVATE RegExpMacroAssemblerLA64
++    : public NativeRegExpMacroAssembler {
++ public:
++  RegExpMacroAssemblerLA64(Isolate* isolate, Zone* zone, Mode mode,
++                           int registers_to_save);
++  virtual ~RegExpMacroAssemblerLA64();
++  virtual int stack_limit_slack();
++  virtual void AdvanceCurrentPosition(int by);
++  virtual void AdvanceRegister(int reg, int by);
++  virtual void Backtrack();
++  virtual void Bind(Label* label);
++  virtual void CheckAtStart(int cp_offset, Label* on_at_start);
++  virtual void CheckCharacter(uint32_t c, Label* on_equal);
++  virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
++                                      Label* on_equal);
++  virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
++  virtual void CheckCharacterLT(uc16 limit, Label* on_less);
++  // A "greedy loop" is a loop that is both greedy and with a simple
++  // body. It has a particularly simple implementation.
++  virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
++  virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
++  virtual void CheckNotBackReference(int start_reg, bool read_backward,
++                                     Label* on_no_match);
++  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
++                                               bool read_backward,
++                                               Label* on_no_match);
++  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
++  virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
++                                         Label* on_not_equal);
++  virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask,
++                                              Label* on_not_equal);
++  virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
++  virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
++                                        Label* on_not_in_range);
++  virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
++
++  // Checks whether the given offset from the current position is before
++  // the end of the string.
++  virtual void CheckPosition(int cp_offset, Label* on_outside_input);
++  virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
++  virtual void Fail();
++  virtual Handle<HeapObject> GetCode(Handle<String> source);
++  virtual void GoTo(Label* label);
++  virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
++  virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
++  virtual void IfRegisterEqPos(int reg, Label* if_eq);
++  virtual IrregexpImplementation Implementation();
++  virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
++                                        bool check_bounds, int characters,
++                                        int eats_at_least);
++  virtual void PopCurrentPosition();
++  virtual void PopRegister(int register_index);
++  virtual void PushBacktrack(Label* label);
++  virtual void PushCurrentPosition();
++  virtual void PushRegister(int register_index,
++                            StackCheckFlag check_stack_limit);
++  virtual void ReadCurrentPositionFromRegister(int reg);
++  virtual void ReadStackPointerFromRegister(int reg);
++  virtual void SetCurrentPositionFromEnd(int by);
++  virtual void SetRegister(int register_index, int to);
++  virtual bool Succeed();
++  virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
++  virtual void ClearRegisters(int reg_from, int reg_to);
++  virtual void WriteStackPointerToRegister(int reg);
++  virtual bool CanReadUnaligned();
++
++  // Called from RegExp if the stack-guard is triggered.
++  // If the code object is relocated, the return address is fixed before
++  // returning.
++  // {raw_code} is an Address because this is called via ExternalReference.
++  static int64_t CheckStackGuardState(Address* return_address, Address raw_code,
++                                      Address re_frame);
++
++  void print_regexp_frame_constants();
++
++ private:
++  // Offsets from frame_pointer() of function parameters and stored registers.
++  static const int kFramePointer = 0;
++
++  // Above the frame pointer - Stored registers and stack passed parameters.
++  // Registers s0 to s7, fp, and ra.
++  static const int kStoredRegisters = kFramePointer;
++  // Return address (stored from link register, read into pc on return).
++
++  // TODO(plind): This 9 - is 8 s-regs (s0..s7) plus fp.
++
++  static const int kReturnAddress = kStoredRegisters + 9 * kPointerSize;
++  // Stack frame header.
++  static const int kStackFrameHeader = kReturnAddress;
++  // Stack parameters placed by caller.
++  static const int kIsolate = kStackFrameHeader + kPointerSize;
++
++  // Below the frame pointer.
++  // Register parameters stored by setup code.
++  static const int kDirectCall = kFramePointer - kPointerSize;
++  static const int kStackHighEnd = kDirectCall - kPointerSize;
++  static const int kNumOutputRegisters = kStackHighEnd - kPointerSize;
++  static const int kRegisterOutput = kNumOutputRegisters - kPointerSize;
++  static const int kInputEnd = kRegisterOutput - kPointerSize;
++  static const int kInputStart = kInputEnd - kPointerSize;
++  static const int kStartIndex = kInputStart - kPointerSize;
++  static const int kInputString = kStartIndex - kPointerSize;
++  // When adding local variables remember to push space for them in
++  // the frame in GetCode.
++  static const int kSuccessfulCaptures = kInputString - kPointerSize;
++  static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
++  static const int kBacktrackCount = kStringStartMinusOne - kSystemPointerSize;
++  // First register address. Following registers are below it on the stack.
++  static const int kRegisterZero = kBacktrackCount - kSystemPointerSize;
++
++  // Initial size of code buffer.
++  static const int kRegExpCodeSize = 1024;
++
++  // Load a number of characters at the given offset from the
++  // current position, into the current-character register.
++  void LoadCurrentCharacterUnchecked(int cp_offset, int character_count);
++
++  // Check whether preemption has been requested.
++  void CheckPreemption();
++
++  // Check whether we are exceeding the stack limit on the backtrack stack.
++  void CheckStackLimit();
++
++  // Generate a call to CheckStackGuardState.
++  void CallCheckStackGuardState(Register scratch);
++
++  // The ebp-relative location of a regexp register.
++  MemOperand register_location(int register_index);
++
++  // Register holding the current input position as negative offset from
++  // the end of the string.
++  inline Register current_input_offset() { return a6; }
++
++  // The register containing the current character after LoadCurrentCharacter.
++  inline Register current_character() { return a7; }
++
++  // Register holding address of the end of the input string.
++  inline Register end_of_input_address() { return t2; }
++
++  // Register holding the frame address. Local variables, parameters and
++  // regexp registers are addressed relative to this.
++  inline Register frame_pointer() { return fp; }
++
++  // The register containing the backtrack stack top. Provides a meaningful
++  // name to the register.
++  inline Register backtrack_stackpointer() { return t0; }
++
++  // Register holding pointer to the current code object.
++  inline Register code_pointer() { return a5; }
++
++  // Byte size of chars in the string to match (decided by the Mode argument).
++  inline int char_size() { return static_cast<int>(mode_); }
++
++  // Equivalent to a conditional branch to the label, unless the label
++  // is nullptr, in which case it is a conditional Backtrack.
++  void BranchOrBacktrack(Label* to, Condition condition, Register rs,
++                         const Operand& rt);
++
++  // Call and return internally in the generated code in a way that
++  // is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
++  inline void SafeCall(Label* to, Condition cond, Register rs,
++                       const Operand& rt);
++  inline void SafeReturn();
++  inline void SafeCallTarget(Label* name);
++
++  // Pushes the value of a register on the backtrack stack. Decrements the
++  // stack pointer by a word size and stores the register's value there.
++  inline void Push(Register source);
++
++  // Pops a value from the backtrack stack. Reads the word at the stack pointer
++  // and increments it by a word size.
++  inline void Pop(Register target);
++
++  Isolate* isolate() const { return masm_->isolate(); }
++
++  MacroAssembler* masm_;
++
++  // Which mode to generate code for (Latin1 or UC16).
++  Mode mode_;
++
++  // One greater than maximal register index actually used.
++  int num_registers_;
++
++  // Number of registers to output at the end (the saved registers
++  // are always 0..num_saved_registers_-1).
++  int num_saved_registers_;
++
++  // Labels used internally.
++  Label entry_label_;
++  Label start_label_;
++  Label success_label_;
++  Label backtrack_label_;
++  Label exit_label_;
++  Label check_preempt_label_;
++  Label stack_overflow_label_;
++  Label internal_failure_label_;
++};
++
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h
+index 8ec12a0ae62..cdc95655184 100644
+--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h
++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h
+@@ -21,6 +21,8 @@
+ #include "src/regexp/mips/regexp-macro-assembler-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/regexp/la64/regexp-macro-assembler-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/regexp/s390/regexp-macro-assembler-s390.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
+index 0a122017437..b357ec85e8d 100644
+--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
+@@ -15,8 +15,8 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer(
+     : RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) {
+   IrregexpImplementation type = assembler->Implementation();
+   DCHECK_LT(type, 9);
+-  const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS",    "S390",
+-                              "PPC",  "X64", "X87",   "Bytecode"};
++  const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", "LA64",
++                              "S390", "PPC", "X64",   "X87",  "Bytecode"};
+   PrintF("RegExpMacroAssembler%s();\n", impl_names[type]);
+ }
+ 
+diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h
+index e83446cdc9b..6047a71e6cc 100644
+--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h
++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h
+@@ -43,6 +43,7 @@ class RegExpMacroAssembler {
+     kARMImplementation,
+     kARM64Implementation,
+     kMIPSImplementation,
++    kLA64Implementation,
+     kS390Implementation,
+     kPPCImplementation,
+     kX64Implementation,
+diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp.cc b/src/3rdparty/chromium/v8/src/regexp/regexp.cc
+index 4319990a398..641a2af9ccd 100644
+--- a/src/3rdparty/chromium/v8/src/regexp/regexp.cc
++++ b/src/3rdparty/chromium/v8/src/regexp/regexp.cc
+@@ -854,6 +854,9 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
+ #elif V8_TARGET_ARCH_MIPS64
+     macro_assembler.reset(new RegExpMacroAssemblerMIPS(
+         isolate, zone, mode, (data->capture_count + 1) * 2));
++#elif V8_TARGET_ARCH_LA64
++    macro_assembler.reset(new RegExpMacroAssemblerLA64(
++        isolate, zone, mode, (data->capture_count + 1) * 2));
+ #else
+ #error "Unsupported architecture"
+ #endif
+diff --git a/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc b/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc
+index 34259c6e67b..a0a5825f8f2 100644
+--- a/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc
++++ b/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc
+@@ -20,7 +20,8 @@ namespace internal {
+ 
+ // Other platforms have CSA support, see builtins-sharedarraybuffer-gen.h.
+ #if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_PPC64 || \
+-    V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_S390 || V8_TARGET_ARCH_S390X
++    V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_S390 || V8_TARGET_ARCH_S390X ||    \
++    V8_TARGET_ARCH_LA64
+ 
+ namespace {
+ 
+diff --git a/src/3rdparty/chromium/v8/src/snapshot/deserializer.h b/src/3rdparty/chromium/v8/src/snapshot/deserializer.h
+index 62814a881ae..dfc04f19b14 100644
+--- a/src/3rdparty/chromium/v8/src/snapshot/deserializer.h
++++ b/src/3rdparty/chromium/v8/src/snapshot/deserializer.h
+@@ -28,8 +28,9 @@ class Object;
+ // Used for platforms with embedded constant pools to trigger deserialization
+ // of objects found in code.
+ #if defined(V8_TARGET_ARCH_MIPS) || defined(V8_TARGET_ARCH_MIPS64) || \
+-    defined(V8_TARGET_ARCH_PPC) || defined(V8_TARGET_ARCH_S390) ||    \
+-    defined(V8_TARGET_ARCH_PPC64) || V8_EMBEDDED_CONSTANT_POOL
++    defined(V8_TARGET_ARCH_LA64) || defined(V8_TARGET_ARCH_PPC) ||    \
++    defined(V8_TARGET_ARCH_S390) || defined(V8_TARGET_ARCH_PPC64) ||  \
++    V8_EMBEDDED_CONSTANT_POOL
+ #define V8_CODE_EMBEDS_OBJECT_POINTER 1
+ #else
+ #define V8_CODE_EMBEDS_OBJECT_POINTER 0
+diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h b/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h
+new file mode 100644
+index 00000000000..7c82427a4fe
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h
+@@ -0,0 +1,1503 @@
++// Copyright 2017 the V8 project authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#ifndef V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_
++#define V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_
++
++#include "src/wasm/baseline/liftoff-assembler.h"
++
++namespace v8 {
++namespace internal {
++namespace wasm {
++
++namespace liftoff {
++
++// Liftoff Frames.
++//
++//  slot      Frame
++//       +--------------------+---------------------------
++//  n+4  | optional padding slot to keep the stack 16 byte aligned.
++//  n+3  |   parameter n      |
++//  ...  |       ...          |
++//   4   |   parameter 1      | or parameter 2
++//   3   |   parameter 0      | or parameter 1
++//   2   |  (result address)  | or parameter 0
++//  -----+--------------------+---------------------------
++//   1   | return addr (ra)   |
++//   0   | previous frame (fp)|
++//  -----+--------------------+  <-- frame ptr (fp)
++//  -1   | 0xa: WASM_COMPILED |
++//  -2   |     instance       |
++//  -----+--------------------+---------------------------
++//  -3   |     slot 0         |   ^
++//  -4   |     slot 1         |   |
++//       |                    | Frame slots
++//       |                    |   |
++//       |                    |   v
++//       | optional padding slot to keep the stack 16 byte aligned.
++//  -----+--------------------+  <-- stack ptr (sp)
++//
++
++// fp-8 holds the stack marker, fp-16 is the instance parameter.
++constexpr int kInstanceOffset = 16;
++
++inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
++
++inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
++
++inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
++                 ValueType type) {
++  switch (type.kind()) {
++    case ValueType::kI32:
++      assm->Ld_w(dst.gp(), src);
++      break;
++    case ValueType::kI64:
++      assm->Ld_d(dst.gp(), src);
++      break;
++    case ValueType::kF32:
++      assm->Fld_s(dst.fp(), src);
++      break;
++    case ValueType::kF64:
++      assm->Fld_d(dst.fp(), src);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
++                  LiftoffRegister src, ValueType type) {
++  MemOperand dst(base, offset);
++  switch (type.kind()) {
++    case ValueType::kI32:
++      assm->St_w(src.gp(), dst);
++      break;
++    case ValueType::kI64:
++      assm->St_d(src.gp(), dst);
++      break;
++    case ValueType::kF32:
++      assm->Fst_s(src.fp(), dst);
++      break;
++    case ValueType::kF64:
++      assm->Fst_d(src.fp(), dst);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) {
++  switch (type.kind()) {
++    case ValueType::kI32:
++      assm->addi_d(sp, sp, -kSystemPointerSize);
++      assm->St_w(reg.gp(), MemOperand(sp, 0));
++      break;
++    case ValueType::kI64:
++      assm->push(reg.gp());
++      break;
++    case ValueType::kF32:
++      assm->addi_d(sp, sp, -kSystemPointerSize);
++      assm->Fst_s(reg.fp(), MemOperand(sp, 0));
++      break;
++    case ValueType::kF64:
++      assm->addi_d(sp, sp, -kSystemPointerSize);
++      assm->Fst_d(reg.fp(), MemOperand(sp, 0));
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++}  // namespace liftoff
++
++int LiftoffAssembler::PrepareStackFrame() {
++  int offset = pc_offset();
++  // When constant that represents size of stack frame can't be represented
++  // as 16bit we need three instructions to add it to sp, so we reserve space
++  // for this case.
++  addi_d(sp, sp, 0);
++  nop();
++  nop();
++  return offset;
++}
++
++void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) {
++  // We can't run out of space, just pass anything big enough to not cause the
++  // assembler to try to grow the buffer.
++  constexpr int kAvailableSpace = 256;
++  TurboAssembler patching_assembler(
++      nullptr, AssemblerOptions{}, CodeObjectRequired::kNo,
++      ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
++  // If bytes can be represented as 16bit, daddiu will be generated and two
++  // nops will stay untouched. Otherwise, lui-ori sequence will load it to
++  // register and, as third instruction, daddu will be generated.
++  patching_assembler.Add_d(sp, sp, Operand(-frame_size));
++}
++
++void LiftoffAssembler::FinishCode() {}
++
++void LiftoffAssembler::AbortCompilation() {}
++
++// static
++constexpr int LiftoffAssembler::StaticStackFrameSize() {
++  return liftoff::kInstanceOffset;
++}
++
++int LiftoffAssembler::SlotSizeForType(ValueType type) {
++  switch (type.kind()) {
++    case ValueType::kS128:
++      return type.element_size_bytes();
++    default:
++      return kStackSlotSize;
++  }
++}
++
++bool LiftoffAssembler::NeedsAlignment(ValueType type) {
++  switch (type.kind()) {
++    case ValueType::kS128:
++      return true;
++    default:
++      // No alignment because all other types are kStackSlotSize.
++      return false;
++  }
++}
++
++void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
++                                    RelocInfo::Mode rmode) {
++  switch (value.type().kind()) {
++    case ValueType::kI32:
++      TurboAssembler::li(reg.gp(), Operand(value.to_i32(), rmode));
++      break;
++    case ValueType::kI64:
++      TurboAssembler::li(reg.gp(), Operand(value.to_i64(), rmode));
++      break;
++    case ValueType::kF32:
++      TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
++      break;
++    case ValueType::kF64:
++      TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::LoadFromInstance(Register dst, uint32_t offset,
++                                        int size) {
++  DCHECK_LE(offset, kMaxInt);
++  Ld_d(dst, liftoff::GetInstanceOperand());
++  DCHECK(size == 4 || size == 8);
++  if (size == 4) {
++    Ld_w(dst, MemOperand(dst, offset));
++  } else {
++    Ld_d(dst, MemOperand(dst, offset));
++  }
++}
++
++void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
++                                                     uint32_t offset) {
++  LoadFromInstance(dst, offset, kTaggedSize);
++}
++
++void LiftoffAssembler::SpillInstance(Register instance) {
++  St_d(instance, liftoff::GetInstanceOperand());
++}
++
++void LiftoffAssembler::FillInstanceInto(Register dst) {
++  Ld_d(dst, liftoff::GetInstanceOperand());
++}
++
++void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
++                                         Register offset_reg,
++                                         uint32_t offset_imm,
++                                         LiftoffRegList pinned) {
++  STATIC_ASSERT(kTaggedSize == kInt64Size);
++  Load(LiftoffRegister(dst), src_addr, offset_reg, offset_imm,
++       LoadType::kI64Load, pinned);
++}
++
++void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
++                            Register offset_reg, uint32_t offset_imm,
++                            LoadType type, LiftoffRegList pinned,
++                            uint32_t* protected_load_pc, bool is_load_mem) {
++  Register src = no_reg;
++  if (offset_reg != no_reg) {
++    src = GetUnusedRegister(kGpReg, pinned).gp();
++    emit_ptrsize_add(src, src_addr, offset_reg);
++  }
++  MemOperand src_op = (offset_reg != no_reg) ? MemOperand(src, offset_imm)
++                                             : MemOperand(src_addr, offset_imm);
++
++  if (protected_load_pc) *protected_load_pc = pc_offset();
++  switch (type.value()) {
++    case LoadType::kI32Load8U:
++    case LoadType::kI64Load8U:
++      Ld_bu(dst.gp(), src_op);
++      break;
++    case LoadType::kI32Load8S:
++    case LoadType::kI64Load8S:
++      Ld_b(dst.gp(), src_op);
++      break;
++    case LoadType::kI32Load16U:
++    case LoadType::kI64Load16U:
++      TurboAssembler::Ld_hu(dst.gp(), src_op);
++      break;
++    case LoadType::kI32Load16S:
++    case LoadType::kI64Load16S:
++      TurboAssembler::Ld_h(dst.gp(), src_op);
++      break;
++    case LoadType::kI64Load32U:
++      TurboAssembler::Ld_wu(dst.gp(), src_op);
++      break;
++    case LoadType::kI32Load:
++    case LoadType::kI64Load32S:
++      TurboAssembler::Ld_w(dst.gp(), src_op);
++      break;
++    case LoadType::kI64Load:
++      TurboAssembler::Ld_d(dst.gp(), src_op);
++      break;
++    case LoadType::kF32Load:
++      TurboAssembler::Fld_s(dst.fp(), src_op);
++      break;
++    case LoadType::kF64Load:
++      TurboAssembler::Fld_d(dst.fp(), src_op);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
++                             uint32_t offset_imm, LiftoffRegister src,
++                             StoreType type, LiftoffRegList pinned,
++                             uint32_t* protected_store_pc, bool is_store_mem) {
++  Register dst = no_reg;
++  MemOperand dst_op = MemOperand(dst_addr, offset_imm);
++  if (offset_reg != no_reg) {
++    if (is_store_mem) {
++      pinned.set(src);
++    }
++    dst = GetUnusedRegister(kGpReg, pinned).gp();
++    emit_ptrsize_add(dst, dst_addr, offset_reg);
++    dst_op = MemOperand(dst, offset_imm);
++  }
++
++  if (protected_store_pc) *protected_store_pc = pc_offset();
++  switch (type.value()) {
++    case StoreType::kI32Store8:
++    case StoreType::kI64Store8:
++      St_b(src.gp(), dst_op);
++      break;
++    case StoreType::kI32Store16:
++    case StoreType::kI64Store16:
++      TurboAssembler::St_h(src.gp(), dst_op);
++      break;
++    case StoreType::kI32Store:
++    case StoreType::kI64Store32:
++      TurboAssembler::St_w(src.gp(), dst_op);
++      break;
++    case StoreType::kI64Store:
++      TurboAssembler::St_d(src.gp(), dst_op);
++      break;
++    case StoreType::kF32Store:
++      TurboAssembler::Fst_s(src.fp(), dst_op);
++      break;
++    case StoreType::kF64Store:
++      TurboAssembler::Fst_d(src.fp(), dst_op);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
++                                  Register offset_reg, uint32_t offset_imm,
++                                  LoadType type, LiftoffRegList pinned) {
++  bailout(kAtomics, "AtomicLoad");
++}
++
++void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
++                                   uint32_t offset_imm, LiftoffRegister src,
++                                   StoreType type, LiftoffRegList pinned) {
++  bailout(kAtomics, "AtomicStore");
++}
++
++void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
++                                 uint32_t offset_imm, LiftoffRegister value,
++                                 StoreType type) {
++  bailout(kAtomics, "AtomicAdd");
++}
++
++void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
++                                 uint32_t offset_imm, LiftoffRegister value,
++                                 StoreType type) {
++  bailout(kAtomics, "AtomicSub");
++}
++
++void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
++                                 uint32_t offset_imm, LiftoffRegister value,
++                                 StoreType type) {
++  bailout(kAtomics, "AtomicAnd");
++}
++
++void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
++                                uint32_t offset_imm, LiftoffRegister value,
++                                StoreType type) {
++  bailout(kAtomics, "AtomicOr");
++}
++
++void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
++                                 uint32_t offset_imm, LiftoffRegister value,
++                                 StoreType type) {
++  bailout(kAtomics, "AtomicXor");
++}
++
++void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
++                                      uint32_t offset_imm,
++                                      LiftoffRegister value, StoreType type) {
++  bailout(kAtomics, "AtomicExchange");
++}
++
++void LiftoffAssembler::AtomicCompareExchange(
++    Register dst_addr, Register offset_reg, uint32_t offset_imm,
++    LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
++    StoreType type) {
++  bailout(kAtomics, "AtomicCompareExchange");
++}
++
++void LiftoffAssembler::AtomicFence() { dbar(0); }
++
++void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
++                                           uint32_t caller_slot_idx,
++                                           ValueType type) {
++  MemOperand src(fp, kSystemPointerSize * (caller_slot_idx + 1));
++  liftoff::Load(this, dst, src, type);
++}
++
++void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
++                                      ValueType type) {
++  DCHECK_NE(dst_offset, src_offset);
++  LiftoffRegister reg = GetUnusedRegister(reg_class_for(type));
++  Fill(reg, src_offset, type);
++  Spill(dst_offset, reg, type);
++}
++
++void LiftoffAssembler::Move(Register dst, Register src, ValueType type) {
++  DCHECK_NE(dst, src);
++  // TODO(ksreten): Handle different sizes here.
++  TurboAssembler::Move(dst, src);
++}
++
++void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
++                            ValueType type) {
++  DCHECK_NE(dst, src);
++  TurboAssembler::Move(dst, src);
++}
++
++void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) {
++  RecordUsedSpillOffset(offset);
++  MemOperand dst = liftoff::GetStackSlot(offset);
++  switch (type.kind()) {
++    case ValueType::kI32:
++      St_w(reg.gp(), dst);
++      break;
++    case ValueType::kI64:
++      St_d(reg.gp(), dst);
++      break;
++    case ValueType::kF32:
++      Fst_s(reg.fp(), dst);
++      break;
++    case ValueType::kF64:
++      TurboAssembler::Fst_d(reg.fp(), dst);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::Spill(int offset, WasmValue value) {
++  RecordUsedSpillOffset(offset);
++  MemOperand dst = liftoff::GetStackSlot(offset);
++  switch (value.type().kind()) {
++    case ValueType::kI32: {
++      LiftoffRegister tmp = GetUnusedRegister(kGpReg);
++      TurboAssembler::li(tmp.gp(), Operand(value.to_i32()));
++      St_w(tmp.gp(), dst);
++      break;
++    }
++    case ValueType::kI64: {
++      LiftoffRegister tmp = GetUnusedRegister(kGpReg);
++      TurboAssembler::li(tmp.gp(), value.to_i64());
++      St_d(tmp.gp(), dst);
++      break;
++    }
++    default:
++      // kWasmF32 and kWasmF64 are unreachable, since those
++      // constants are not tracked.
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) {
++  MemOperand src = liftoff::GetStackSlot(offset);
++  switch (type.kind()) {
++    case ValueType::kI32:
++      Ld_w(reg.gp(), src);
++      break;
++    case ValueType::kI64:
++      Ld_d(reg.gp(), src);
++      break;
++    case ValueType::kF32:
++      Fld_s(reg.fp(), src);
++      break;
++    case ValueType::kF64:
++      TurboAssembler::Fld_d(reg.fp(), src);
++      break;
++    default:
++      UNREACHABLE();
++  }
++}
++
++void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
++  UNREACHABLE();
++}
++
++void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
++  DCHECK_LT(0, size);
++  RecordUsedSpillOffset(start + size);
++
++  if (size <= 12 * kStackSlotSize) {
++    // Special straight-line code for up to 12 slots. Generates one
++    // instruction per slot (<= 12 instructions total).
++    uint32_t remainder = size;
++    for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
++      St_d(zero_reg, liftoff::GetStackSlot(start + remainder));
++    }
++    DCHECK(remainder == 4 || remainder == 0);
++    if (remainder) {
++      St_w(zero_reg, liftoff::GetStackSlot(start + remainder));
++    }
++  } else {
++    // General case for bigger counts (12 instructions).
++    // Use a0 for start address (inclusive), a1 for end address (exclusive).
++    Push(a1, a0);
++    Add_d(a0, fp, Operand(-start - size));
++    Add_d(a1, fp, Operand(-start));
++
++    Label loop;
++    bind(&loop);
++    St_d(zero_reg, MemOperand(a0, kSystemPointerSize));
++    addi_d(a0, a0, kSystemPointerSize);
++    BranchShort(&loop, ne, a0, Operand(a1));
++
++    Pop(a1, a0);
++  }
++}
++
++void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
++  TurboAssembler::Clz_d(dst.gp(), src.gp());
++}
++
++void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
++  TurboAssembler::Ctz_d(dst.gp(), src.gp());
++}
++
++bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
++                                       LiftoffRegister src) {
++  TurboAssembler::Popcnt_d(dst.gp(), src.gp());
++  return true;
++}
++
++void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
++  TurboAssembler::Mul_w(dst, lhs, rhs);
++}
++
++void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
++                                     Label* trap_div_by_zero,
++                                     Label* trap_div_unrepresentable) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
++
++  // Check if lhs == kMinInt and rhs == -1, since this case is unrepresentable.
++  TurboAssembler::li(kScratchReg, 1);
++  TurboAssembler::li(kScratchReg2, 1);
++  TurboAssembler::LoadZeroOnCondition(kScratchReg, lhs, Operand(kMinInt), eq);
++  TurboAssembler::LoadZeroOnCondition(kScratchReg2, rhs, Operand(-1), eq);
++  add_d(kScratchReg, kScratchReg, kScratchReg2);
++  TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg,
++                         Operand(zero_reg));
++
++  TurboAssembler::Div_w(dst, lhs, rhs);
++}
++
++void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
++  TurboAssembler::Div_wu(dst, lhs, rhs);
++}
++
++void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
++  TurboAssembler::Mod_w(dst, lhs, rhs);
++}
++
++void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg));
++  TurboAssembler::Mod_wu(dst, lhs, rhs);
++}
++
++#define I32_BINOP(name, instruction)                                 \
++  void LiftoffAssembler::emit_i32_##name(Register dst, Register lhs, \
++                                         Register rhs) {             \
++    instruction(dst, lhs, rhs);                                      \
++  }
++
++// clang-format off
++I32_BINOP(add, add_w)
++I32_BINOP(sub, sub_w)
++I32_BINOP(and, and_)
++I32_BINOP(or, or_)
++I32_BINOP(xor, xor_)
++// clang-format on
++
++#undef I32_BINOP
++
++#define I32_BINOP_I(name, instruction)                               \
++  void LiftoffAssembler::emit_i32_##name(Register dst, Register lhs, \
++                                         int32_t imm) {              \
++    instruction(dst, lhs, Operand(imm));                             \
++  }
++
++// clang-format off
++I32_BINOP_I(add, Add_w)
++I32_BINOP_I(and, And)
++I32_BINOP_I(or, Or)
++I32_BINOP_I(xor, Xor)
++// clang-format on
++
++#undef I32_BINOP_I
++
++void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
++  TurboAssembler::Clz_w(dst, src);
++}
++
++void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
++  TurboAssembler::Ctz_w(dst, src);
++}
++
++bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
++  TurboAssembler::Popcnt_w(dst, src);
++  return true;
++}
++
++#define I32_SHIFTOP(name, instruction)                               \
++  void LiftoffAssembler::emit_i32_##name(Register dst, Register src, \
++                                         Register amount) {          \
++    instruction(dst, src, amount);                                   \
++  }
++#define I32_SHIFTOP_I(name, instruction, instruction1)               \
++  I32_SHIFTOP(name, instruction)                                     \
++  void LiftoffAssembler::emit_i32_##name(Register dst, Register src, \
++                                         int amount) {               \
++    instruction1(dst, src, amount & 0x1f);                           \
++  }
++
++I32_SHIFTOP_I(shl, sll_w, slli_w)
++I32_SHIFTOP_I(sar, sra_w, srai_w)
++I32_SHIFTOP_I(shr, srl_w, srli_w)
++
++#undef I32_SHIFTOP
++#undef I32_SHIFTOP_I
++
++void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                    LiftoffRegister rhs) {
++  TurboAssembler::Mul_d(dst.gp(), lhs.gp(), rhs.gp());
++}
++
++bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
++                                     LiftoffRegister rhs,
++                                     Label* trap_div_by_zero,
++                                     Label* trap_div_unrepresentable) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
++
++  // Check if lhs == MinInt64 and rhs == -1, since this case is unrepresentable.
++  TurboAssembler::li(kScratchReg, 1);
++  TurboAssembler::li(kScratchReg2, 1);
++  TurboAssembler::LoadZeroOnCondition(
++      kScratchReg, lhs.gp(), Operand(std::numeric_limits<int64_t>::min()), eq);
++  TurboAssembler::LoadZeroOnCondition(kScratchReg2, rhs.gp(), Operand(-1), eq);
++  add_d(kScratchReg, kScratchReg, kScratchReg2);
++  TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg,
++                         Operand(zero_reg));
++
++  TurboAssembler::Div_d(dst.gp(), lhs.gp(), rhs.gp());
++  return true;
++}
++
++bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
++                                     LiftoffRegister rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
++  TurboAssembler::Div_du(dst.gp(), lhs.gp(), rhs.gp());
++  return true;
++}
++
++bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
++                                     LiftoffRegister rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
++  TurboAssembler::Mod_d(dst.gp(), lhs.gp(), rhs.gp());
++  return true;
++}
++
++bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
++                                     LiftoffRegister rhs,
++                                     Label* trap_div_by_zero) {
++  TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg));
++  TurboAssembler::Mod_du(dst.gp(), lhs.gp(), rhs.gp());
++  return true;
++}
++
++#define I64_BINOP(name, instruction)                                   \
++  void LiftoffAssembler::emit_i64_##name(                              \
++      LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \
++    instruction(dst.gp(), lhs.gp(), rhs.gp());                         \
++  }
++
++// clang-format off
++I64_BINOP(add, Add_d)
++I64_BINOP(sub, Sub_d)
++I64_BINOP(and, and_)
++I64_BINOP(or, or_)
++I64_BINOP(xor, xor_)
++// clang-format on
++
++#undef I64_BINOP
++
++#define I64_BINOP_I(name, instruction)                                       \
++  void LiftoffAssembler::emit_i64_##name(LiftoffRegister dst,                \
++                                         LiftoffRegister lhs, int32_t imm) { \
++    instruction(dst.gp(), lhs.gp(), Operand(imm));                           \
++  }
++
++// clang-format off
++I64_BINOP_I(add, Add_d)
++I64_BINOP_I(and, And)
++I64_BINOP_I(or, Or)
++I64_BINOP_I(xor, Xor)
++// clang-format on
++
++#undef I64_BINOP_I
++
++#define I64_SHIFTOP(name, instruction)                             \
++  void LiftoffAssembler::emit_i64_##name(                          \
++      LiftoffRegister dst, LiftoffRegister src, Register amount) { \
++    instruction(dst.gp(), src.gp(), amount);                       \
++  }
++#define I64_SHIFTOP_I(name, instruction, instructioni)                      \
++  I64_SHIFTOP(name, instruction)                                            \
++  void LiftoffAssembler::emit_i64_##name(LiftoffRegister dst,               \
++                                         LiftoffRegister src, int amount) { \
++    DCHECK(is_uint6(amount));                                               \
++    instructioni(dst.gp(), src.gp(), amount);                               \
++  }
++
++I64_SHIFTOP_I(shl, sll_d, slli_d)
++I64_SHIFTOP_I(sar, sra_d, srai_d)
++I64_SHIFTOP_I(shr, srl_d, srli_d)
++
++#undef I64_SHIFTOP
++#undef I64_SHIFTOP_I
++
++void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
++  add_w(dst, src, zero_reg);
++}
++
++void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
++  TurboAssembler::Neg_s(dst, src);
++}
++
++void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
++  TurboAssembler::Neg_d(dst, src);
++}
++
++void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
++                                    DoubleRegister rhs) {
++  Label ool, done;
++  TurboAssembler::Float32Min(dst, lhs, rhs, &ool);
++  Branch(&done);
++
++  bind(&ool);
++  TurboAssembler::Float32MinOutOfLine(dst, lhs, rhs);
++  bind(&done);
++}
++
++void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
++                                    DoubleRegister rhs) {
++  Label ool, done;
++  TurboAssembler::Float32Max(dst, lhs, rhs, &ool);
++  Branch(&done);
++
++  bind(&ool);
++  TurboAssembler::Float32MaxOutOfLine(dst, lhs, rhs);
++  bind(&done);
++}
++
++void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
++                                         DoubleRegister rhs) {
++  bailout(kComplexOperation, "f32_copysign");
++}
++
++void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
++                                    DoubleRegister rhs) {
++  Label ool, done;
++  TurboAssembler::Float64Min(dst, lhs, rhs, &ool);
++  Branch(&done);
++
++  bind(&ool);
++  TurboAssembler::Float64MinOutOfLine(dst, lhs, rhs);
++  bind(&done);
++}
++
++void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
++                                    DoubleRegister rhs) {
++  Label ool, done;
++  TurboAssembler::Float64Max(dst, lhs, rhs, &ool);
++  Branch(&done);
++
++  bind(&ool);
++  TurboAssembler::Float64MaxOutOfLine(dst, lhs, rhs);
++  bind(&done);
++}
++
++void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
++                                         DoubleRegister rhs) {
++  bailout(kComplexOperation, "f64_copysign");
++}
++
++#define FP_BINOP(name, instruction)                                          \
++  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
++                                     DoubleRegister rhs) {                   \
++    instruction(dst, lhs, rhs);                                              \
++  }
++#define FP_UNOP(name, instruction)                                             \
++  void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
++    instruction(dst, src);                                                     \
++  }
++#define FP_UNOP_RETURN_TRUE(name, instruction)                                 \
++  bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
++    instruction(dst, src);                                                     \
++    return true;                                                               \
++  }
++
++FP_BINOP(f32_add, fadd_s)
++FP_BINOP(f32_sub, fsub_s)
++FP_BINOP(f32_mul, fmul_s)
++FP_BINOP(f32_div, fdiv_s)
++FP_UNOP(f32_abs, fabs_s)
++FP_UNOP_RETURN_TRUE(f32_ceil, Ceil_s)
++FP_UNOP_RETURN_TRUE(f32_floor, Floor_s)
++FP_UNOP_RETURN_TRUE(f32_trunc, Trunc_s)
++FP_UNOP_RETURN_TRUE(f32_nearest_int, Round_s)
++FP_UNOP(f32_sqrt, fsqrt_s)
++FP_BINOP(f64_add, fadd_d)
++FP_BINOP(f64_sub, fsub_d)
++FP_BINOP(f64_mul, fmul_d)
++FP_BINOP(f64_div, fdiv_d)
++FP_UNOP(f64_abs, fabs_d)
++FP_UNOP_RETURN_TRUE(f64_ceil, Ceil_d)
++FP_UNOP_RETURN_TRUE(f64_floor, Floor_d)
++FP_UNOP_RETURN_TRUE(f64_trunc, Trunc_d)
++FP_UNOP_RETURN_TRUE(f64_nearest_int, Round_d)
++FP_UNOP(f64_sqrt, fsqrt_d)
++
++#undef FP_BINOP
++#undef FP_UNOP
++#undef FP_UNOP_RETURN_TRUE
++
++bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
++                                            LiftoffRegister dst,
++                                            LiftoffRegister src, Label* trap) {
++  switch (opcode) {
++    case kExprI32ConvertI64:
++      TurboAssembler::bstrpick_w(dst.gp(), src.gp(), 31, 0);
++      return true;
++    case kExprI32SConvertF32: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_s(rounded.fp(), src.fp());
++      ftintrz_w_s(kScratchDoubleReg, rounded.fp());
++      movfr2gr_s(dst.gp(), kScratchDoubleReg);
++      // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
++      // because INT32_MIN allows easier out-of-bounds detection.
++      TurboAssembler::Add_w(kScratchReg, dst.gp(), 1);
++      TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp());
++      TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2);
++
++      // Checking if trap.
++      movgr2fr_w(kScratchDoubleReg, dst.gp());
++      ffint_s_w(converted_back.fp(), kScratchDoubleReg);
++      TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI32UConvertF32: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_s(rounded.fp(), src.fp());
++      TurboAssembler::Ftintrz_uw_s(dst.gp(), rounded.fp(), kScratchDoubleReg);
++      // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
++      // because 0 allows easier out-of-bounds detection.
++      TurboAssembler::Add_w(kScratchReg, dst.gp(), 1);
++      TurboAssembler::Movz(dst.gp(), zero_reg, kScratchReg);
++
++      // Checking if trap.
++      TurboAssembler::Ffint_d_uw(converted_back.fp(), dst.gp());
++      fcvt_s_d(converted_back.fp(), converted_back.fp());
++      TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI32SConvertF64: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_d(rounded.fp(), src.fp());
++      ftintrz_w_d(kScratchDoubleReg, rounded.fp());
++      movfr2gr_s(dst.gp(), kScratchDoubleReg);
++
++      // Checking if trap.
++      ffint_d_w(converted_back.fp(), kScratchDoubleReg);
++      TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI32UConvertF64: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_d(rounded.fp(), src.fp());
++      TurboAssembler::Ftintrz_uw_d(dst.gp(), rounded.fp(), kScratchDoubleReg);
++
++      // Checking if trap.
++      TurboAssembler::Ffint_d_uw(converted_back.fp(), dst.gp());
++      TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI32ReinterpretF32:
++      TurboAssembler::FmoveLow(dst.gp(), src.fp());
++      return true;
++    case kExprI64SConvertI32:
++      slli_w(dst.gp(), src.gp(), 0);
++      return true;
++    case kExprI64UConvertI32:
++      TurboAssembler::bstrpick_d(dst.gp(), src.gp(), 31, 0);
++      return true;
++    case kExprI64SConvertF32: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_s(rounded.fp(), src.fp());
++      ftintrz_l_s(kScratchDoubleReg, rounded.fp());
++      movfr2gr_d(dst.gp(), kScratchDoubleReg);
++      // Avoid INT64_MAX as an overflow indicator and use INT64_MIN instead,
++      // because INT64_MIN allows easier out-of-bounds detection.
++      TurboAssembler::Add_d(kScratchReg, dst.gp(), 1);
++      TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp());
++      TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2);
++
++      // Checking if trap.
++      movgr2fr_d(kScratchDoubleReg, dst.gp());
++      ffint_s_l(converted_back.fp(), kScratchDoubleReg);
++      TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI64UConvertF32: {
++      // Real conversion.
++      TurboAssembler::Ftintrz_ul_s(dst.gp(), src.fp(), kScratchDoubleReg,
++                                   kScratchReg);
++
++      // Checking if trap.
++      TurboAssembler::Branch(trap, eq, kScratchReg, Operand(zero_reg));
++      return true;
++    }
++    case kExprI64SConvertF64: {
++      LiftoffRegister rounded =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src));
++      LiftoffRegister converted_back =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded));
++
++      // Real conversion.
++      TurboAssembler::Trunc_d(rounded.fp(), src.fp());
++      ftintrz_l_d(kScratchDoubleReg, rounded.fp());
++      movfr2gr_d(dst.gp(), kScratchDoubleReg);
++      // Avoid INT64_MAX as an overflow indicator and use INT64_MIN instead,
++      // because INT64_MIN allows easier out-of-bounds detection.
++      TurboAssembler::Add_d(kScratchReg, dst.gp(), 1);
++      TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp());
++      TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2);
++
++      // Checking if trap.
++      movgr2fr_d(kScratchDoubleReg, dst.gp());
++      ffint_d_l(converted_back.fp(), kScratchDoubleReg);
++      TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ);
++      TurboAssembler::BranchFalseF(trap);
++      return true;
++    }
++    case kExprI64UConvertF64: {
++      // Real conversion.
++      TurboAssembler::Ftintrz_ul_d(dst.gp(), src.fp(), kScratchDoubleReg,
++                                   kScratchReg);
++
++      // Checking if trap.
++      TurboAssembler::Branch(trap, eq, kScratchReg, Operand(zero_reg));
++      return true;
++    }
++    case kExprI64ReinterpretF64:
++      movfr2gr_d(dst.gp(), src.fp());
++      return true;
++    case kExprF32SConvertI32: {
++      LiftoffRegister scratch =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst));
++      movgr2fr_w(scratch.fp(), src.gp());
++      ffint_s_w(dst.fp(), scratch.fp());
++      return true;
++    }
++    case kExprF32UConvertI32:
++      TurboAssembler::Ffint_s_uw(dst.fp(), src.gp());
++      return true;
++    case kExprF32ConvertF64:
++      fcvt_s_d(dst.fp(), src.fp());
++      return true;
++    case kExprF32ReinterpretI32:
++      TurboAssembler::FmoveLow(dst.fp(), src.gp());
++      return true;
++    case kExprF64SConvertI32: {
++      LiftoffRegister scratch =
++          GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst));
++      movgr2fr_w(scratch.fp(), src.gp());
++      ffint_d_w(dst.fp(), scratch.fp());
++      return true;
++    }
++    case kExprF64UConvertI32:
++      TurboAssembler::Ffint_d_uw(dst.fp(), src.gp());
++      return true;
++    case kExprF64ConvertF32:
++      fcvt_d_s(dst.fp(), src.fp());
++      return true;
++    case kExprF64ReinterpretI64:
++      movgr2fr_d(dst.fp(), src.gp());
++      return true;
++    default:
++      return false;
++  }
++}
++
++void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
++  bailout(kComplexOperation, "i32_signextend_i8");
++}
++
++void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
++  bailout(kComplexOperation, "i32_signextend_i16");
++}
++
++void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
++                                              LiftoffRegister src) {
++  bailout(kComplexOperation, "i64_signextend_i8");
++}
++
++void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
++                                               LiftoffRegister src) {
++  bailout(kComplexOperation, "i64_signextend_i16");
++}
++
++void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
++                                               LiftoffRegister src) {
++  bailout(kComplexOperation, "i64_signextend_i32");
++}
++
++void LiftoffAssembler::emit_jump(Label* label) {
++  TurboAssembler::Branch(label);
++}
++
++void LiftoffAssembler::emit_jump(Register target) {
++  TurboAssembler::Jump(target);
++}
++
++void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
++                                      ValueType type, Register lhs,
++                                      Register rhs) {
++  if (rhs != no_reg) {
++    TurboAssembler::Branch(label, cond, lhs, Operand(rhs));
++  } else {
++    TurboAssembler::Branch(label, cond, lhs, Operand(zero_reg));
++  }
++}
++
++void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
++  sltui(dst, src, 1);
++}
++
++void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst,
++                                         Register lhs, Register rhs) {
++  Register tmp = dst;
++  if (dst == lhs || dst == rhs) {
++    tmp = GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(lhs, rhs)).gp();
++  }
++  // Write 1 as result.
++  TurboAssembler::li(tmp, 1);
++
++  // If negative condition is true, write 0 as result.
++  Condition neg_cond = NegateCondition(cond);
++  TurboAssembler::LoadZeroOnCondition(tmp, lhs, Operand(rhs), neg_cond);
++
++  // If tmp != dst, result will be moved.
++  TurboAssembler::Move(dst, tmp);
++}
++
++void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
++  sltui(dst, src.gp(), 1);
++}
++
++void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst,
++                                         LiftoffRegister lhs,
++                                         LiftoffRegister rhs) {
++  Register tmp = dst;
++  if (dst == lhs.gp() || dst == rhs.gp()) {
++    tmp = GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(lhs, rhs)).gp();
++  }
++  // Write 1 as result.
++  TurboAssembler::li(tmp, 1);
++
++  // If negative condition is true, write 0 as result.
++  Condition neg_cond = NegateCondition(cond);
++  TurboAssembler::LoadZeroOnCondition(tmp, lhs.gp(), Operand(rhs.gp()),
++                                      neg_cond);
++
++  // If tmp != dst, result will be moved.
++  TurboAssembler::Move(dst, tmp);
++}
++
++namespace liftoff {
++
++inline FPUCondition ConditionToConditionCmpFPU(Condition condition,
++                                               bool* predicate) {
++  switch (condition) {
++    case kEqual:
++      *predicate = true;
++      return CEQ;
++    case kUnequal:
++      *predicate = false;
++      return CEQ;
++    case kUnsignedLessThan:
++      *predicate = true;
++      return CLT;
++    case kUnsignedGreaterEqual:
++      *predicate = false;
++      return CLT;
++    case kUnsignedLessEqual:
++      *predicate = true;
++      return CLE;
++    case kUnsignedGreaterThan:
++      *predicate = false;
++      return CLE;
++    default:
++      *predicate = true;
++      break;
++  }
++  UNREACHABLE();
++}
++
++}  // namespace liftoff
++
++void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst,
++                                         DoubleRegister lhs,
++                                         DoubleRegister rhs) {
++  Label not_nan, cont;
++  TurboAssembler::CompareIsNanF32(lhs, rhs);
++  TurboAssembler::BranchFalseF(&not_nan);
++  // If one of the operands is NaN, return 1 for f32.ne, else 0.
++  if (cond == ne) {
++    TurboAssembler::li(dst, 1);
++  } else {
++    TurboAssembler::Move(dst, zero_reg);
++  }
++  TurboAssembler::Branch(&cont);
++
++  bind(&not_nan);
++
++  TurboAssembler::li(dst, 1);
++  bool predicate;
++  FPUCondition fcond = liftoff::ConditionToConditionCmpFPU(cond, &predicate);
++  TurboAssembler::CompareF32(lhs, rhs, fcond);
++  if (predicate) {
++    TurboAssembler::LoadZeroIfNotFPUCondition(dst);
++  } else {
++    TurboAssembler::LoadZeroIfFPUCondition(dst);
++  }
++
++  bind(&cont);
++}
++
++void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
++                                         DoubleRegister lhs,
++                                         DoubleRegister rhs) {
++  Label not_nan, cont;
++  TurboAssembler::CompareIsNanF64(lhs, rhs);
++  TurboAssembler::BranchFalseF(&not_nan);
++  // If one of the operands is NaN, return 1 for f64.ne, else 0.
++  if (cond == ne) {
++    TurboAssembler::li(dst, 1);
++  } else {
++    TurboAssembler::Move(dst, zero_reg);
++  }
++  TurboAssembler::Branch(&cont);
++
++  bind(&not_nan);
++
++  TurboAssembler::li(dst, 1);
++  bool predicate;
++  FPUCondition fcond = liftoff::ConditionToConditionCmpFPU(cond, &predicate);
++  TurboAssembler::CompareF64(lhs, rhs, fcond);
++  if (predicate) {
++    TurboAssembler::LoadZeroIfNotFPUCondition(dst);
++  } else {
++    TurboAssembler::LoadZeroIfFPUCondition(dst);
++  }
++
++  bind(&cont);
++}
++
++void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {
++  bailout(kSimd, "emit_i8x16_splat");
++}
++
++void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {
++  bailout(kSimd, "emit_i16x8_splat");
++}
++
++void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {
++  bailout(kSimd, "emit_i32x4_splat");
++}
++
++void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {
++  bailout(kSimd, "emit_i64x2_splat");
++}
++
++void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {
++  bailout(kSimd, "emit_f32x4_splat");
++}
++
++void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
++                                        LiftoffRegister src) {}
++
++void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
++                                      LiftoffRegister rhs) {}
++
++void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
++                                                 LiftoffRegister lhs,
++                                                 uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
++                                                 LiftoffRegister lhs,
++                                                 uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
++                                                 LiftoffRegister lhs,
++                                                 uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
++                                                 LiftoffRegister lhs,
++                                                 uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
++                                               LiftoffRegister lhs,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
++                                               LiftoffRegister lhs,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
++                                               LiftoffRegister lhs,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
++                                               LiftoffRegister lhs,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
++                                               LiftoffRegister src1,
++                                               LiftoffRegister src2,
++                                               uint8_t imm_lane_idx) {}
++
++void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
++  TurboAssembler::Ld_d(limit_address, MemOperand(limit_address, 0));
++  TurboAssembler::Branch(ool_code, ule, sp, Operand(limit_address));
++}
++
++void LiftoffAssembler::CallTrapCallbackForTesting() {
++  PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp());
++  CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
++}
++
++void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
++  if (emit_debug_code()) Abort(reason);
++}
++
++void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
++  LiftoffRegList gp_regs = regs & kGpCacheRegList;
++  unsigned num_gp_regs = gp_regs.GetNumRegsSet();
++  if (num_gp_regs) {
++    unsigned offset = num_gp_regs * kSystemPointerSize;
++    addi_d(sp, sp, -offset);
++    while (!gp_regs.is_empty()) {
++      LiftoffRegister reg = gp_regs.GetFirstRegSet();
++      offset -= kSystemPointerSize;
++      St_d(reg.gp(), MemOperand(sp, offset));
++      gp_regs.clear(reg);
++    }
++    DCHECK_EQ(offset, 0);
++  }
++  LiftoffRegList fp_regs = regs & kFpCacheRegList;
++  unsigned num_fp_regs = fp_regs.GetNumRegsSet();
++  if (num_fp_regs) {
++    addi_d(sp, sp, -(num_fp_regs * kStackSlotSize));
++    unsigned offset = 0;
++    while (!fp_regs.is_empty()) {
++      LiftoffRegister reg = fp_regs.GetFirstRegSet();
++      TurboAssembler::Fst_d(reg.fp(), MemOperand(sp, offset));
++      fp_regs.clear(reg);
++      offset += sizeof(double);
++    }
++    DCHECK_EQ(offset, num_fp_regs * sizeof(double));
++  }
++}
++
++void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
++  LiftoffRegList fp_regs = regs & kFpCacheRegList;
++  unsigned fp_offset = 0;
++  while (!fp_regs.is_empty()) {
++    LiftoffRegister reg = fp_regs.GetFirstRegSet();
++    TurboAssembler::Fld_d(reg.fp(), MemOperand(sp, fp_offset));
++    fp_regs.clear(reg);
++    fp_offset += sizeof(double);
++  }
++  if (fp_offset) addi_d(sp, sp, fp_offset);
++  LiftoffRegList gp_regs = regs & kGpCacheRegList;
++  unsigned gp_offset = 0;
++  while (!gp_regs.is_empty()) {
++    LiftoffRegister reg = gp_regs.GetLastRegSet();
++    Ld_d(reg.gp(), MemOperand(sp, gp_offset));
++    gp_regs.clear(reg);
++    gp_offset += kSystemPointerSize;
++  }
++  addi_d(sp, sp, gp_offset);
++}
++
++void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
++  DCHECK_LT(num_stack_slots,
++            (1 << 16) / kSystemPointerSize);  // 16 bit immediate
++  TurboAssembler::DropAndRet(static_cast<int>(num_stack_slots));
++}
++
++void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
++                             const LiftoffRegister* args,
++                             const LiftoffRegister* rets,
++                             ValueType out_argument_type, int stack_bytes,
++                             ExternalReference ext_ref) {
++  addi_d(sp, sp, -stack_bytes);
++
++  int arg_bytes = 0;
++  for (ValueType param_type : sig->parameters()) {
++    liftoff::Store(this, sp, arg_bytes, *args++, param_type);
++    arg_bytes += param_type.element_size_bytes();
++  }
++  DCHECK_LE(arg_bytes, stack_bytes);
++
++  // Pass a pointer to the buffer with the arguments to the C function.
++  // On mips, the first argument is passed in {a0}.
++  constexpr Register kFirstArgReg = a0;
++  mov(kFirstArgReg, sp);
++
++  // Now call the C function.
++  constexpr int kNumCCallArgs = 1;
++  PrepareCallCFunction(kNumCCallArgs, kScratchReg);
++  CallCFunction(ext_ref, kNumCCallArgs);
++
++  // Move return value to the right register.
++  const LiftoffRegister* next_result_reg = rets;
++  if (sig->return_count() > 0) {
++    DCHECK_EQ(1, sig->return_count());
++    constexpr Register kReturnReg = a0;
++    if (kReturnReg != next_result_reg->gp()) {
++      Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
++    }
++    ++next_result_reg;
++  }
++
++  // Load potential output value from the buffer on the stack.
++  if (out_argument_type != kWasmStmt) {
++    liftoff::Load(this, *next_result_reg, MemOperand(sp, 0), out_argument_type);
++  }
++
++  addi_d(sp, sp, stack_bytes);
++}
++
++void LiftoffAssembler::CallNativeWasmCode(Address addr) {
++  Call(addr, RelocInfo::WASM_CALL);
++}
++
++void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig,
++                                    compiler::CallDescriptor* call_descriptor,
++                                    Register target) {
++  if (target == no_reg) {
++    pop(kScratchReg);
++    Call(kScratchReg);
++  } else {
++    Call(target);
++  }
++}
++
++void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
++  // A direct call to a wasm runtime stub defined in this module.
++  // Just encode the stub index. This will be patched at relocation.
++  Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
++}
++
++void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
++  addi_d(sp, sp, -size);
++  TurboAssembler::Move(addr, sp);
++}
++
++void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
++  addi_d(sp, sp, size);
++}
++
++void LiftoffStackSlots::Construct() {
++  for (auto& slot : slots_) {
++    const LiftoffAssembler::VarState& src = slot.src_;
++    switch (src.loc()) {
++      case LiftoffAssembler::VarState::kStack:
++        asm_->Ld_d(kScratchReg, liftoff::GetStackSlot(slot.src_offset_));
++        asm_->push(kScratchReg);
++        break;
++      case LiftoffAssembler::VarState::kRegister:
++        liftoff::push(asm_, src.reg(), src.type());
++        break;
++      case LiftoffAssembler::VarState::kIntConst: {
++        asm_->li(kScratchReg, Operand(src.i32_const()));
++        asm_->push(kScratchReg);
++        break;
++      }
++    }
++  }
++}
++
++}  // namespace wasm
++}  // namespace internal
++}  // namespace v8
++
++#endif  // V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_
+diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h
+index 781fb87dbcf..286fe8bdea2 100644
+--- a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h
++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h
+@@ -46,6 +46,14 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs =
+ constexpr RegList kLiftoffAssemblerFpCacheRegs = DoubleRegister::ListOf(
+     f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26);
+ 
++#elif V8_TARGET_ARCH_LA64
++/*todo*/
++constexpr RegList kLiftoffAssemblerGpCacheRegs =
++    Register::ListOf(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, s7);
++
++constexpr RegList kLiftoffAssemblerFpCacheRegs = DoubleRegister::ListOf(
++    f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26);
++
+ #elif V8_TARGET_ARCH_ARM
+ 
+ // r7: cp, r10: root, r11: fp, r12: ip, r13: sp, r14: lr, r15: pc.
+@@ -90,7 +98,7 @@ constexpr Condition kUnsignedLessEqual = below_equal;
+ constexpr Condition kUnsignedGreaterThan = above;
+ constexpr Condition kUnsignedGreaterEqual = above_equal;
+ 
+-#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
++#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64
+ 
+ constexpr Condition kEqual = eq;
+ constexpr Condition kUnequal = ne;
+diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h
+index 6573ff4aa4d..4e26ea95d21 100644
+--- a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h
++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h
+@@ -1045,6 +1045,8 @@ class LiftoffStackSlots {
+ #include "src/wasm/baseline/mips/liftoff-assembler-mips.h"
+ #elif V8_TARGET_ARCH_MIPS64
+ #include "src/wasm/baseline/mips64/liftoff-assembler-mips64.h"
++#elif V8_TARGET_ARCH_LA64
++#include "src/wasm/baseline/la64/liftoff-assembler-la64.h"
+ #elif V8_TARGET_ARCH_S390
+ #include "src/wasm/baseline/s390/liftoff-assembler-s390.h"
+ #else
+diff --git a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc
+index 90cdad4672b..33f8b9e6e99 100644
+--- a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc
++++ b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc
+@@ -268,6 +268,37 @@ void JumpTableAssembler::NopBytes(int bytes) {
+   }
+ }
+ 
++#elif V8_TARGET_ARCH_LA64
++void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
++                                                 Address lazy_compile_target) {
++  DCHECK(is_int32(func_index));
++  int start = pc_offset();
++  li(kWasmCompileLazyFuncIndexRegister, (int32_t)func_index);  // max. 2 instr
++  // Jump produces max. 3 instructions for 32-bit platform
++  // and max. 4 instructions for 64-bit platform.
++  Jump(lazy_compile_target, RelocInfo::NONE);
++  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
++  DCHECK_EQ(nop_bytes % kInstrSize, 0);
++  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
++}
++bool JumpTableAssembler::EmitJumpSlot(Address target) {
++  PatchAndJump(target);
++  return true;
++}
++void JumpTableAssembler::EmitFarJumpSlot(Address target) {
++  JumpToInstructionStream(target);
++}
++void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
++  UNREACHABLE();
++}
++void JumpTableAssembler::NopBytes(int bytes) {
++  DCHECK_LE(0, bytes);
++  DCHECK_EQ(0, bytes % kInstrSize);
++  for (; bytes > 0; bytes -= kInstrSize) {
++    nop();
++  }
++}
++
+ #elif V8_TARGET_ARCH_PPC64
+ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
+                                                  Address lazy_compile_target) {
+diff --git a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h
+index 253f0bc0182..71c1c7eeb3c 100644
+--- a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h
++++ b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h
+@@ -215,6 +215,12 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
+   static constexpr int kJumpTableSlotSize = 8 * kInstrSize;
+   static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize;
+   static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize;
++#elif V8_TARGET_ARCH_LA64
++  // TODO
++  static constexpr int kJumpTableLineSize = 8 * kInstrSize;
++  static constexpr int kJumpTableSlotSize = 8 * kInstrSize;
++  static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
++  static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize;
+ #else
+ #error Unknown architecture.
+ #endif
+diff --git a/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h b/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h
+index 7e56ea6eae2..b8efe962a7c 100644
+--- a/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h
++++ b/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h
+@@ -75,6 +75,15 @@ constexpr Register kGpReturnRegisters[] = {v0, v1};
+ constexpr DoubleRegister kFpParamRegisters[] = {f2, f4, f6, f8, f10, f12, f14};
+ constexpr DoubleRegister kFpReturnRegisters[] = {f2, f4};
+ 
++#elif V8_TARGET_ARCH_LA64
++// ===========================================================================
++// == LA64 TODO  =============================================================
++// ===========================================================================
++constexpr Register kGpParamRegisters[] = {a0, a2, a3, a4, a5, a6, a7};
++constexpr Register kGpReturnRegisters[] = {a0, a1};
++constexpr DoubleRegister kFpParamRegisters[] = {f2, f4, f6, f8, f10, f12, f14};
++constexpr DoubleRegister kFpReturnRegisters[] = {f2, f4};
++
+ #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+ // ===========================================================================
+ // == ppc & ppc64 ============================================================
+diff --git a/src/3rdparty/chromium/v8/test/cctest/BUILD.gn b/src/3rdparty/chromium/v8/test/cctest/BUILD.gn
+index fd9de1bacb5..eacb2f41862 100644
+--- a/src/3rdparty/chromium/v8/test/cctest/BUILD.gn
++++ b/src/3rdparty/chromium/v8/test/cctest/BUILD.gn
+@@ -363,6 +363,12 @@ v8_source_set("cctest_sources") {
+       "test-disasm-mips64.cc",
+       "test-macro-assembler-mips64.cc",
+     ]
++  } else if (v8_current_cpu == "la64") {
++    sources += [  ### loongson(arch:la64) ###
++      "test-assembler-la64.cc",
++      "test-disasm-la64.cc",
++      "test-macro-assembler-la64.cc",
++    ]
+   } else if (v8_current_cpu == "x64") {
+     sources += [  ### gcmole(arch:x64) ###
+       "test-assembler-x64.cc",
+@@ -417,7 +423,8 @@ v8_source_set("cctest_sources") {
+       v8_current_cpu == "arm" || v8_current_cpu == "arm64" ||
+       v8_current_cpu == "s390" || v8_current_cpu == "s390x" ||
+       v8_current_cpu == "mips" || v8_current_cpu == "mips64" ||
+-      v8_current_cpu == "mipsel" || v8_current_cpu == "mipsel64") {
++      v8_current_cpu == "mipsel" || v8_current_cpu == "mipsel64" ||
++      v8_current_cpu == "la64") {
+     # Disable fmadd/fmsub so that expected results match generated code in
+     # RunFloat64MulAndFloat64Add1 and friends.
+     if (!is_win) {
+diff --git a/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc
+new file mode 100644
+index 00000000000..366bcb7cd25
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc
+@@ -0,0 +1,5127 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++//     * Redistributions of source code must retain the above copyright
++//       notice, this list of conditions and the following disclaimer.
++//     * Redistributions in binary form must reproduce the above
++//       copyright notice, this list of conditions and the following
++//       disclaimer in the documentation and/or other materials provided
++//       with the distribution.
++//     * Neither the name of Google Inc. nor the names of its
++//       contributors may be used to endorse or promote products derived
++//       from this software without specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++#include <iostream>  // NOLINT(readability/streams)
++
++#include "src/base/utils/random-number-generator.h"
++#include "src/codegen/assembler-inl.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/diagnostics/disassembler.h"
++#include "src/execution/simulator.h"
++#include "src/heap/factory.h"
++#include "src/init/v8.h"
++#include "test/cctest/cctest.h"
++
++namespace v8 {
++namespace internal {
++
++// Define these function prototypes to match JSEntryFunction in execution.cc.
++// TODO(mips64): Refine these signatures per test case.
++using F1 = void*(int x, int p1, int p2, int p3, int p4);
++using F2 = void*(int x, int y, int p2, int p3, int p4);
++using F3 = void*(void* p, int p1, int p2, int p3, int p4);
++using F4 = void*(int64_t x, int64_t y, int64_t p2, int64_t p3, int64_t p4);
++using F5 = void*(void* p0, void* p1, int p2, int p3, int p4);
++
++#define __ assm.
++// v0->a2, v1->a3
++TEST(LA0) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  // Addition.
++  __ addi_d(a2, a0, 0xC);
++
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0xAB0, 0, 0, 0, 0));
++  CHECK_EQ(0xABCL, res);
++}
++
++TEST(LA1) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  Label L, C;
++
++  __ ori(a1, a0, 0);
++  __ ori(a2, zero_reg, 0);
++  __ b(&C);
++
++  __ bind(&L);
++  __ add_d(a2, a2, a1);
++  __ addi_d(a1, a1, -1);
++
++  __ bind(&C);
++  __ ori(a3, a1, 0);
++
++  __ Branch(&L, ne, a3, Operand((int64_t)0));
++
++  __ or_(a0, a2, zero_reg);
++  __ or_(a1, a3, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(50, 0, 0, 0, 0));
++  CHECK_EQ(1275L, res);
++}
++
++TEST(LA2) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label exit, error;
++
++  __ ori(a4, zero_reg, 0);  // 00000000
++  __ lu12i_w(a4, 0x12345);  // 12345000
++  __ ori(a4, a4, 0);        // 12345000
++  __ ori(a2, a4, 0xF0F);    // 12345F0F
++  __ Branch(&error, ne, a2, Operand(0x12345F0F));
++
++  __ ori(a4, zero_reg, 0);
++  __ lu32i_d(a4, 0x12345);  // 1 2345 0000 0000
++  __ ori(a4, a4, 0xFFF);    // 1 2345 0000 0FFF
++  __ addi_d(a2, a4, 1);
++  __ Branch(&error, ne, a2, Operand(0x1234500001000));
++
++  __ ori(a4, zero_reg, 0);
++  __ lu52i_d(a4, zero_reg, 0x123);  // 1230 0000 0000 0000
++  __ ori(a4, a4, 0xFFF);            // 123F 0000 0000 0FFF
++  __ addi_d(a2, a4, 1);             // 1230 0000 0000 1000
++  __ Branch(&error, ne, a2, Operand(0x1230000000001000));
++
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(LA3) {
++  // Test 32bit calculate instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label exit, error;
++
++  __ li(a4, 0x00000004);
++  __ li(a5, 0x00001234);
++  __ li(a6, 0x12345678);
++  __ li(a7, 0x7FFFFFFF);
++  __ li(t0, static_cast<int32_t>(0xFFFFFFFC));
++  __ li(t1, static_cast<int32_t>(0xFFFFEDCC));
++  __ li(t2, static_cast<int32_t>(0xEDCBA988));
++  __ li(t3, static_cast<int32_t>(0x80000000));
++
++  __ ori(a2, zero_reg, 0);  // 0x00000000
++  __ add_w(a2, a4, a5);     // 0x00001238
++  __ sub_w(a2, a2, a4);     // 0x00001234
++  __ Branch(&error, ne, a2, Operand(0x00001234));
++  __ ori(a3, zero_reg, 0);  // 0x00000000
++  __ add_w(a3, a7, a4);  // 32bit addu result is sign-extended into 64bit reg.
++  __ Branch(&error, ne, a3, Operand(0xFFFFFFFF80000003));
++
++  __ sub_w(a3, t3, a4);  // 0x7FFFFFFC
++  __ Branch(&error, ne, a3, Operand(0x7FFFFFFC));
++
++  __ ori(a2, zero_reg, 0);         // 0x00000000
++  __ ori(a3, zero_reg, 0);         // 0x00000000
++  __ addi_w(a2, zero_reg, 0x421);  // 0x00007421
++  __ addi_w(a2, a2, -0x1);         // 0x00007420
++  __ addi_w(a2, a2, -0x20);        // 0x00007400
++  __ Branch(&error, ne, a2, Operand(0x0000400));
++  __ addi_w(a3, a7, 0x1);  // 0x80000000 - result is sign-extended.
++  __ Branch(&error, ne, a3, Operand(0xFFFFFFFF80000000));
++
++  __ ori(a2, zero_reg, 0);   // 0x00000000
++  __ ori(a3, zero_reg, 0);   // 0x00000000
++  __ alsl_w(a2, a6, a4, 3);  // 0xFFFFFFFF91A2B3C4
++  __ alsl_w(a2, a2, a4, 2);  // 0x468ACF14
++  __ Branch(&error, ne, a2, Operand(0x468acf14));
++  __ ori(a0, zero_reg, 31);
++  __ alsl_wu(a3, a6, a4, 3);  // 0x91A2B3C4
++  __ alsl_wu(a3, a3, a7, 1);  // 0xFFFFFFFFA3456787
++  __ Branch(&error, ne, a3, Operand(0xA3456787));
++
++  __ ori(a2, zero_reg, 0);
++  __ ori(a3, zero_reg, 0);
++  __ mul_w(a2, a5, a7);
++  __ div_w(a2, a2, a4);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFFB73));
++  __ mul_w(a3, a4, t1);
++  __ Branch(&error, ne, a3, Operand(0xFFFFFFFFFFFFB730));
++  __ div_w(a3, t3, a4);
++  __ Branch(&error, ne, a3, Operand(0xFFFFFFFFE0000000));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulh_w(a2, a4, t1);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFFFFF));
++  __ mulh_w(a2, a4, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulh_wu(a2, a4, t1);
++  __ Branch(&error, ne, a2, Operand(0x3));
++  __ mulh_wu(a2, a4, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulw_d_w(a2, a4, t1);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFB730));
++  __ mulw_d_w(a2, a4, a6);
++  __ Branch(&error, ne, a2, Operand(0x48D159E0));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulw_d_wu(a2, a4, t1);
++  __ Branch(&error, ne, a2, Operand(0x3FFFFB730));  //========0xFFFFB730
++  __ ori(a2, zero_reg, 81);
++  __ mulw_d_wu(a2, a4, a6);
++  __ Branch(&error, ne, a2, Operand(0x48D159E0));
++
++  __ ori(a2, zero_reg, 0);
++  __ div_wu(a2, a7, a5);
++  __ Branch(&error, ne, a2, Operand(0x70821));
++  __ div_wu(a2, t0, a5);
++  __ Branch(&error, ne, a2, Operand(0xE1042));
++  __ div_wu(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x1));
++
++  __ ori(a2, zero_reg, 0);
++  __ mod_w(a2, a6, a5);
++  __ Branch(&error, ne, a2, Operand(0xDA8));
++  __ ori(a2, zero_reg, 0);
++  __ mod_w(a2, t2, a5);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFF258));
++  __ ori(a2, zero_reg, 0);
++  __ mod_w(a2, t2, t1);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFF258));
++
++  __ ori(a2, zero_reg, 0);
++  __ mod_wu(a2, a6, a5);
++  __ Branch(&error, ne, a2, Operand(0xDA8));
++  __ mod_wu(a2, t2, a5);
++  __ Branch(&error, ne, a2, Operand(0xF0));
++  __ mod_wu(a2, t2, t1);
++  __ Branch(&error, ne, a2, Operand(0xFFFFFFFFEDCBA988));
++
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(LA4) {
++  // Test 64bit calculate instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label exit, error;
++
++  __ li(a4, 0x17312);
++  __ li(a5, 0x1012131415161718);
++  __ li(a6, 0x51F4B764A26E7412);
++  __ li(a7, 0x7FFFFFFFFFFFFFFF);
++  __ li(t0, static_cast<int64_t>(0xFFFFFFFFFFFFF547));
++  __ li(t1, static_cast<int64_t>(0xDF6B8F35A10E205C));
++  __ li(t2, static_cast<int64_t>(0x81F25A87C4236841));
++  __ li(t3, static_cast<int64_t>(0x8000000000000000));
++
++  __ ori(a2, zero_reg, 0);
++  __ add_d(a2, a4, a5);
++  __ sub_d(a2, a2, a4);
++  __ Branch(&error, ne, a2, Operand(0x1012131415161718));
++  __ ori(a3, zero_reg, 0);
++  __ add_d(a3, a6, a7);  //溢出
++  __ Branch(&error, ne, a3, Operand(0xd1f4b764a26e7411));
++  __ sub_d(a3, t3, a4);  //溢出
++  __ Branch(&error, ne, a3, Operand(0x7ffffffffffe8cee));
++
++  __ ori(a2, zero_reg, 0);
++  __ addi_d(a2, a5, 0x412);  //正值
++  __ Branch(&error, ne, a2, Operand(0x1012131415161b2a));
++  __ addi_d(a2, a7, 0x547);  //负值
++  __ Branch(&error, ne, a2, Operand(0x8000000000000546));
++
++  __ ori(t4, zero_reg, 0);
++  __ addu16i_d(a2, t4, 0x1234);
++  __ Branch(&error, ne, a2, Operand(0x12340000));
++  __ addu16i_d(a2, a2, 0x9876);
++  __ Branch(&error, ne, a2, Operand(0xffffffffaaaa0000));
++
++  __ ori(a2, zero_reg, 0);
++  __ alsl_d(a2, t2, t0, 3);
++  __ Branch(&error, ne, a2, Operand(0xf92d43e211b374f));
++
++  __ ori(a2, zero_reg, 0);
++  __ mul_d(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0xdbe6a8729a547fb0));
++  __ mul_d(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x57ad69f40f870584));
++  __ mul_d(a2, a4, t0);
++  __ Branch(&error, ne, a2, Operand(0xfffffffff07523fe));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulh_d(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467));
++  __ mulh_d(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x15d));
++
++  __ ori(a2, zero_reg, 0);
++  __ mulh_du(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467));
++  __ mulh_du(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xdf6b8f35a10e1700));
++  __ mulh_du(a2, a4, t0);
++  __ Branch(&error, ne, a2, Operand(0x17311));
++
++  __ ori(a2, zero_reg, 0);
++  __ div_d(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ div_d(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ div_d(a2, t1, a4);
++  __ Branch(&error, ne, a2, Operand(0xffffe985f631e6d9));
++
++  __ ori(a2, zero_reg, 0);
++  __ div_du(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ div_du(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ div_du(a2, t1, a4);
++  __ Branch(&error, ne, a2, Operand(0x9a22ffd3973d));
++
++  __ ori(a2, zero_reg, 0);
++  __ mod_d(a2, a6, a4);
++  __ Branch(&error, ne, a2, Operand(0x13558));
++  __ mod_d(a2, t2, t0);
++  __ Branch(&error, ne, a2, Operand(0xfffffffffffffb0a));
++  __ mod_d(a2, t1, a4);
++  __ Branch(&error, ne, a2, Operand(0xffffffffffff6a1a));
++
++  __ ori(a2, zero_reg, 0);
++  __ mod_du(a2, a6, a4);
++  __ Branch(&error, ne, a2, Operand(0x13558));
++  __ mod_du(a2, t2, t0);
++  __ Branch(&error, ne, a2, Operand(0x81f25a87c4236841));
++  __ mod_du(a2, t1, a4);
++  __ Branch(&error, ne, a2, Operand(0x1712));
++
++  // Everything was correctly executed. Load the expected result.
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++  // Got an error. Return a wrong result.
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(LA5) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label exit, error;
++
++  __ li(a4, 0x17312);
++  __ li(a5, 0x1012131415161718);
++  __ li(a6, 0x51F4B764A26E7412);
++  __ li(a7, 0x7FFFFFFFFFFFFFFF);
++  __ li(t0, static_cast<int64_t>(0xFFFFFFFFFFFFF547));
++  __ li(t1, static_cast<int64_t>(0xDF6B8F35A10E205C));
++  __ li(t2, static_cast<int64_t>(0x81F25A87C4236841));
++  __ li(t3, static_cast<int64_t>(0x8000000000000000));
++
++  __ ori(a2, zero_reg, 0);
++  __ slt(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ slt(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ slt(a2, t1, t1);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ sltu(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ sltu(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ sltu(a2, t1, t1);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ slti(a2, a5, 0x123);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ slti(a2, t0, 0x123);
++  __ Branch(&error, ne, a2, Operand(0x1));
++
++  __ ori(a2, zero_reg, 0);
++  __ sltui(a2, a5, 0x123);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ sltui(a2, t0, 0x123);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ and_(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0x1310));
++  __ and_(a2, a6, a7);
++  __ Branch(&error, ne, a2, Operand(0x51F4B764A26E7412));
++
++  __ ori(a2, zero_reg, 0);
++  __ or_(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xfffffffffffff55f));
++  __ or_(a2, t2, t3);
++  __ Branch(&error, ne, a2, Operand(0x81f25a87c4236841));
++
++  __ ori(a2, zero_reg, 0);
++  __ nor(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0xefedecebeae888e5));
++  __ nor(a2, a6, a7);
++  __ Branch(&error, ne, a2, Operand(0x8000000000000000));
++
++  __ ori(a2, zero_reg, 0);
++  __ xor_(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x209470ca5ef1d51b));
++  __ xor_(a2, t2, t3);
++  __ Branch(&error, ne, a2, Operand(0x1f25a87c4236841));
++
++  __ ori(a2, zero_reg, 0);
++  __ andn(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0x16002));
++  __ andn(a2, a6, a7);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ ori(a2, zero_reg, 0);
++  __ orn(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7));
++  __ orn(a2, t2, t3);
++  __ Branch(&error, ne, a2, Operand(0xffffffffffffffff));
++
++  __ ori(a2, zero_reg, 0);
++  __ andi(a2, a4, 0x123);
++  __ Branch(&error, ne, a2, Operand(0x102));
++  __ andi(a2, a6, 0xDCB);
++  __ Branch(&error, ne, a2, Operand(0x402));
++
++  __ ori(a2, zero_reg, 0);
++  __ xori(a2, t0, 0x123);
++  __ Branch(&error, ne, a2, Operand(0xfffffffffffff464));
++  __ xori(a2, t2, 0xDCB);
++  __ Branch(&error, ne, a2, Operand(0x81f25a87c423658a));
++
++  // Everything was correctly executed. Load the expected result.
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  // Got an error. Return a wrong result.
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(LA6) {
++  // Test loads and stores instruction.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct T {
++    int64_t si1;
++    int64_t si2;
++    int64_t si3;
++    int64_t result_ld_b_si1;
++    int64_t result_ld_b_si2;
++    int64_t result_ld_h_si1;
++    int64_t result_ld_h_si2;
++    int64_t result_ld_w_si1;
++    int64_t result_ld_w_si2;
++    int64_t result_ld_d_si1;
++    int64_t result_ld_d_si3;
++    int64_t result_ld_bu_si2;
++    int64_t result_ld_hu_si2;
++    int64_t result_ld_wu_si2;
++    int64_t result_st_b;
++    int64_t result_st_h;
++    int64_t result_st_w;
++  };
++  T t;
++
++  // Ld_b
++  __ Ld_b(a4, MemOperand(a0, offsetof(T, si1)));
++  __ St_d(a4, MemOperand(a0, offsetof(T, result_ld_b_si1)));
++
++  __ Ld_b(a4, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(a4, MemOperand(a0, offsetof(T, result_ld_b_si2)));
++
++  // Ld_h
++  __ Ld_h(a5, MemOperand(a0, offsetof(T, si1)));
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_ld_h_si1)));
++
++  __ Ld_h(a5, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_ld_h_si2)));
++
++  // Ld_w
++  __ Ld_w(a6, MemOperand(a0, offsetof(T, si1)));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_ld_w_si1)));
++
++  __ Ld_w(a6, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_ld_w_si2)));
++
++  // Ld_d
++  __ Ld_d(a7, MemOperand(a0, offsetof(T, si1)));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_ld_d_si1)));
++
++  __ Ld_d(a7, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_ld_d_si3)));
++
++  // Ld_bu
++  __ Ld_bu(t0, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_ld_bu_si2)));
++
++  // Ld_hu
++  __ Ld_hu(t1, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_ld_hu_si2)));
++
++  // Ld_wu
++  __ Ld_wu(t2, MemOperand(a0, offsetof(T, si2)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_ld_wu_si2)));
++
++  // St
++  __ li(t4, 0x11111111);
++
++  // St_b
++  __ Ld_d(t5, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(t5, MemOperand(a0, offsetof(T, result_st_b)));
++  __ St_b(t4, MemOperand(a0, offsetof(T, result_st_b)));
++
++  // St_h
++  __ Ld_d(t6, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(t6, MemOperand(a0, offsetof(T, result_st_h)));
++  __ St_h(t4, MemOperand(a0, offsetof(T, result_st_h)));
++
++  // St_w
++  __ Ld_d(t7, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(t7, MemOperand(a0, offsetof(T, result_st_w)));
++  __ St_w(t4, MemOperand(a0, offsetof(T, result_st_w)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.si1 = 0x11223344;
++  t.si2 = 0x99AABBCC;
++  t.si3 = 0x1122334455667788;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int64_t>(0x44), t.result_ld_b_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFFFFFFFCC), t.result_ld_b_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x3344), t.result_ld_h_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFFFFFBBCC), t.result_ld_h_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x11223344), t.result_ld_w_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFF99AABBCC), t.result_ld_w_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x11223344), t.result_ld_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x1122334455667788), t.result_ld_d_si3);
++
++  CHECK_EQ(static_cast<int64_t>(0xCC), t.result_ld_bu_si2);
++  CHECK_EQ(static_cast<int64_t>(0xBBCC), t.result_ld_hu_si2);
++  CHECK_EQ(static_cast<int64_t>(0x99AABBCC), t.result_ld_wu_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x1122334455667711), t.result_st_b);
++  CHECK_EQ(static_cast<int64_t>(0x1122334455661111), t.result_st_h);
++  CHECK_EQ(static_cast<int64_t>(0x1122334411111111), t.result_st_w);
++}
++
++TEST(LA7) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct T {
++    int64_t si1;
++    int64_t si2;
++    int64_t si3;
++    int64_t result_ldx_b_si1;
++    int64_t result_ldx_b_si2;
++    int64_t result_ldx_h_si1;
++    int64_t result_ldx_h_si2;
++    int64_t result_ldx_w_si1;
++    int64_t result_ldx_w_si2;
++    int64_t result_ldx_d_si1;
++    int64_t result_ldx_d_si3;
++    int64_t result_ldx_bu_si2;
++    int64_t result_ldx_hu_si2;
++    int64_t result_ldx_wu_si2;
++    int64_t result_stx_b;
++    int64_t result_stx_h;
++    int64_t result_stx_w;
++  };
++  T t;
++
++  // ldx_b
++  __ li(a2, static_cast<int64_t>(offsetof(T, si1)));
++  __ Ld_b(a4, MemOperand(a0, a2));
++  __ St_d(a4, MemOperand(a0, offsetof(T, result_ldx_b_si1)));
++
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_b(a4, MemOperand(a0, a2));
++  __ St_d(a4, MemOperand(a0, offsetof(T, result_ldx_b_si2)));
++
++  // ldx_h
++  __ li(a2, static_cast<int64_t>(offsetof(T, si1)));
++  __ Ld_h(a5, MemOperand(a0, a2));
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_ldx_h_si1)));
++
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_h(a5, MemOperand(a0, a2));
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_ldx_h_si2)));
++
++  // ldx_w
++  __ li(a2, static_cast<int64_t>(offsetof(T, si1)));
++  __ Ld_w(a6, MemOperand(a0, a2));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_ldx_w_si1)));
++
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_w(a6, MemOperand(a0, a2));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_ldx_w_si2)));
++
++  // Ld_d
++  __ li(a2, static_cast<int64_t>(offsetof(T, si1)));
++  __ Ld_d(a7, MemOperand(a0, a2));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_ldx_d_si1)));
++
++  __ li(a2, static_cast<int64_t>(offsetof(T, si3)));
++  __ Ld_d(a7, MemOperand(a0, a2));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_ldx_d_si3)));
++
++  // Ld_bu
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_bu(t0, MemOperand(a0, a2));
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_ldx_bu_si2)));
++
++  // Ld_hu
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_hu(t1, MemOperand(a0, a2));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_ldx_hu_si2)));
++
++  // Ld_wu
++  __ li(a2, static_cast<int64_t>(offsetof(T, si2)));
++  __ Ld_wu(t2, MemOperand(a0, a2));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_ldx_wu_si2)));
++
++  // St
++  __ li(t4, 0x11111111);
++
++  // St_b
++  __ Ld_d(t5, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(t5, MemOperand(a0, offsetof(T, result_stx_b)));
++  __ li(a2, static_cast<int64_t>(offsetof(T, result_stx_b)));
++  __ St_b(t4, MemOperand(a0, a2));
++
++  // St_h
++  __ Ld_d(t6, MemOperand(a0, offsetof(T, si3)));
++  __ St_d(t6, MemOperand(a0, offsetof(T, result_stx_h)));
++  __ li(a2, static_cast<int64_t>(offsetof(T, result_stx_h)));
++  __ St_h(t4, MemOperand(a0, a2));
++
++  // St_w
++  __ Ld_d(t7, MemOperand(a0, offsetof(T, si3)));
++  __ li(a2, static_cast<int64_t>(offsetof(T, result_stx_w)));
++  __ St_d(t7, MemOperand(a0, a2));
++  __ li(a3, static_cast<int64_t>(offsetof(T, result_stx_w)));
++  __ St_w(t4, MemOperand(a0, a3));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.si1 = 0x11223344;
++  t.si2 = 0x99AABBCC;
++  t.si3 = 0x1122334455667788;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int64_t>(0x44), t.result_ldx_b_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFFFFFFFCC), t.result_ldx_b_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x3344), t.result_ldx_h_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFFFFFBBCC), t.result_ldx_h_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x11223344), t.result_ldx_w_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFF99AABBCC), t.result_ldx_w_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x11223344), t.result_ldx_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x1122334455667788), t.result_ldx_d_si3);
++
++  CHECK_EQ(static_cast<int64_t>(0xCC), t.result_ldx_bu_si2);
++  CHECK_EQ(static_cast<int64_t>(0xBBCC), t.result_ldx_hu_si2);
++  CHECK_EQ(static_cast<int64_t>(0x99AABBCC), t.result_ldx_wu_si2);
++
++  CHECK_EQ(static_cast<int64_t>(0x1122334455667711), t.result_stx_b);
++  CHECK_EQ(static_cast<int64_t>(0x1122334455661111), t.result_stx_h);
++  CHECK_EQ(static_cast<int64_t>(0x1122334411111111), t.result_stx_w);
++}
++
++TEST(LDPTR_STPTR) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  int64_t test[10];
++
++  __ ldptr_w(a4, a0, 0);
++  __ stptr_d(a4, a0, 24);  // test[3]
++
++  __ ldptr_w(a5, a0, 8);   // test[1]
++  __ stptr_d(a5, a0, 32);  // test[4]
++
++  __ ldptr_d(a6, a0, 16);  // test[2]
++  __ stptr_d(a6, a0, 40);  // test[5]
++
++  __ li(t0, 0x11111111);
++
++  __ stptr_d(a6, a0, 48);  // test[6]
++  __ stptr_w(t0, a0, 48);  // test[6]
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test[0] = 0x11223344;
++  test[1] = 0x99AABBCC;
++  test[2] = 0x1122334455667788;
++  f.Call(&test, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int64_t>(0x11223344), test[3]);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFF99AABBCC), test[4]);
++  CHECK_EQ(static_cast<int64_t>(0x1122334455667788), test[5]);
++  CHECK_EQ(static_cast<int64_t>(0x1122334411111111), test[6]);
++}
++
++TEST(LA8) {
++  // Test 32bit shift instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    int32_t input;
++    int32_t result_sll_w_0;
++    int32_t result_sll_w_8;
++    int32_t result_sll_w_10;
++    int32_t result_sll_w_31;
++    int32_t result_srl_w_0;
++    int32_t result_srl_w_8;
++    int32_t result_srl_w_10;
++    int32_t result_srl_w_31;
++    int32_t result_sra_w_0;
++    int32_t result_sra_w_8;
++    int32_t result_sra_w_10;
++    int32_t result_sra_w_31;
++    int32_t result_rotr_w_0;
++    int32_t result_rotr_w_8;
++    int32_t result_slli_w_0;
++    int32_t result_slli_w_8;
++    int32_t result_slli_w_10;
++    int32_t result_slli_w_31;
++    int32_t result_srli_w_0;
++    int32_t result_srli_w_8;
++    int32_t result_srli_w_10;
++    int32_t result_srli_w_31;
++    int32_t result_srai_w_0;
++    int32_t result_srai_w_8;
++    int32_t result_srai_w_10;
++    int32_t result_srai_w_31;
++    int32_t result_rotri_w_0;
++    int32_t result_rotri_w_8;
++    int32_t result_rotri_w_10;
++    int32_t result_rotri_w_31;
++  };
++  T t;
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  __ Ld_w(a4, MemOperand(a0, offsetof(T, input)));
++
++  // sll_w
++  __ li(a5, 0);
++  __ sll_w(t0, a4, a5);
++  __ li(a5, 0x8);
++  __ sll_w(t1, a4, a5);
++  __ li(a5, 0xA);
++  __ sll_w(t2, a4, a5);
++  __ li(a5, 0x1F);
++  __ sll_w(t3, a4, a5);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_sll_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_sll_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_sll_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_sll_w_31)));
++
++  // srl_w
++  __ li(a5, 0x0);
++  __ srl_w(t0, a4, a5);
++  __ li(a5, 0x8);
++  __ srl_w(t1, a4, a5);
++  __ li(a5, 0xA);
++  __ srl_w(t2, a4, a5);
++  __ li(a5, 0x1F);
++  __ srl_w(t3, a4, a5);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_srl_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_srl_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_srl_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_srl_w_31)));
++
++  // sra_w
++  __ li(a5, 0x0);
++  __ sra_w(t0, a4, a5);
++  __ li(a5, 0x8);
++  __ sra_w(t1, a4, a5);
++
++  __ li(a6, static_cast<int32_t>(0x80000000));
++  __ add_w(a6, a6, a4);
++  __ li(a5, 0xA);
++  __ sra_w(t2, a6, a5);
++  __ li(a5, 0x1F);
++  __ sra_w(t3, a6, a5);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_sra_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_sra_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_sra_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_sra_w_31)));
++
++  // rotr
++  __ li(a5, 0x0);
++  __ rotr_w(t0, a4, a5);
++  __ li(a6, 0x8);
++  __ rotr_w(t1, a4, a6);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_rotr_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_rotr_w_8)));
++
++  // slli_w
++  __ slli_w(t0, a4, 0);
++  __ slli_w(t1, a4, 0x8);
++  __ slli_w(t2, a4, 0xA);
++  __ slli_w(t3, a4, 0x1F);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_slli_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_slli_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_slli_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_slli_w_31)));
++
++  // srli_w
++  __ srli_w(t0, a4, 0);
++  __ srli_w(t1, a4, 0x8);
++  __ srli_w(t2, a4, 0xA);
++  __ srli_w(t3, a4, 0x1F);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_srli_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_srli_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_srli_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_srli_w_31)));
++
++  // srai_w
++  __ srai_w(t0, a4, 0);
++  __ srai_w(t1, a4, 0x8);
++
++  __ li(a6, static_cast<int32_t>(0x80000000));
++  __ add_w(a6, a6, a4);
++  __ srai_w(t2, a6, 0xA);
++  __ srai_w(t3, a6, 0x1F);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_srai_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_srai_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_srai_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_srai_w_31)));
++
++  // rotri_w
++  __ rotri_w(t0, a4, 0);
++  __ rotri_w(t1, a4, 0x8);
++  __ rotri_w(t2, a4, 0xA);
++  __ rotri_w(t3, a4, 0x1F);
++
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_rotri_w_0)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_rotri_w_8)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_rotri_w_10)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_rotri_w_31)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.input = 0x12345678;
++  f.Call(&t, 0x0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_sll_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x34567800), t.result_sll_w_8);
++  CHECK_EQ(static_cast<int32_t>(0xD159E000), t.result_sll_w_10);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_sll_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_srl_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x123456), t.result_srl_w_8);
++  CHECK_EQ(static_cast<int32_t>(0x48D15), t.result_srl_w_10);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_srl_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_sra_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x123456), t.result_sra_w_8);
++  CHECK_EQ(static_cast<int32_t>(0xFFE48D15), t.result_sra_w_10);
++  CHECK_EQ(static_cast<int32_t>(0xFFFFFFFF), t.result_sra_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotr_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x78123456), t.result_rotr_w_8);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_slli_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x34567800), t.result_slli_w_8);
++  CHECK_EQ(static_cast<int32_t>(0xD159E000), t.result_slli_w_10);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_slli_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_srli_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x123456), t.result_srli_w_8);
++  CHECK_EQ(static_cast<int32_t>(0x48D15), t.result_srli_w_10);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_srli_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_srai_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x123456), t.result_srai_w_8);
++  CHECK_EQ(static_cast<int32_t>(0xFFE48D15), t.result_srai_w_10);
++  CHECK_EQ(static_cast<int32_t>(0xFFFFFFFF), t.result_srai_w_31);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotri_w_0);
++  CHECK_EQ(static_cast<int32_t>(0x78123456), t.result_rotri_w_8);
++  CHECK_EQ(static_cast<int32_t>(0x9E048D15), t.result_rotri_w_10);
++  CHECK_EQ(static_cast<int32_t>(0x2468ACF0), t.result_rotri_w_31);
++}
++
++TEST(LA9) {
++  // Test 64bit shift instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    int64_t input;
++    int64_t result_sll_d_0;
++    int64_t result_sll_d_13;
++    int64_t result_sll_d_30;
++    int64_t result_sll_d_63;
++    int64_t result_srl_d_0;
++    int64_t result_srl_d_13;
++    int64_t result_srl_d_30;
++    int64_t result_srl_d_63;
++    int64_t result_sra_d_0;
++    int64_t result_sra_d_13;
++    int64_t result_sra_d_30;
++    int64_t result_sra_d_63;
++    int64_t result_rotr_d_0;
++    int64_t result_rotr_d_13;
++    int64_t result_slli_d_0;
++    int64_t result_slli_d_13;
++    int64_t result_slli_d_30;
++    int64_t result_slli_d_63;
++    int64_t result_srli_d_0;
++    int64_t result_srli_d_13;
++    int64_t result_srli_d_30;
++    int64_t result_srli_d_63;
++    int64_t result_srai_d_0;
++    int64_t result_srai_d_13;
++    int64_t result_srai_d_30;
++    int64_t result_srai_d_63;
++    int64_t result_rotri_d_0;
++    int64_t result_rotri_d_13;
++    int64_t result_rotri_d_30;
++    int64_t result_rotri_d_63;
++  };
++
++  T t;
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, input)));
++
++  // sll_d
++  __ li(a5, 0);
++  __ sll_d(t0, a4, a5);
++  __ li(a5, 0xD);
++  __ sll_d(t1, a4, a5);
++  __ li(a5, 0x1E);
++  __ sll_d(t2, a4, a5);
++  __ li(a5, 0x3F);
++  __ sll_d(t3, a4, a5);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_sll_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_sll_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_sll_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_sll_d_63)));
++
++  // srl_d
++  __ li(a5, 0x0);
++  __ srl_d(t0, a4, a5);
++  __ li(a5, 0xD);
++  __ srl_d(t1, a4, a5);
++  __ li(a5, 0x1E);
++  __ srl_d(t2, a4, a5);
++  __ li(a5, 0x3F);
++  __ srl_d(t3, a4, a5);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_srl_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_srl_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_srl_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_srl_d_63)));
++
++  // sra_d
++  __ li(a5, 0x0);
++  __ sra_d(t0, a4, a5);
++  __ li(a5, 0xD);
++  __ sra_d(t1, a4, a5);
++
++  __ li(a6, static_cast<int64_t>(0x8000000000000000));
++  __ add_d(a6, a6, a4);
++  __ li(a5, 0x1E);
++  __ sra_d(t2, a6, a5);
++  __ li(a5, 0x3F);
++  __ sra_d(t3, a6, a5);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_sra_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_sra_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_sra_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_sra_d_63)));
++
++  // rotr
++  __ li(a5, 0x0);
++  __ rotr_d(t0, a4, a5);
++  __ li(a6, 0xD);
++  __ rotr_d(t1, a4, a6);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_rotr_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_rotr_d_13)));
++
++  // slli_d
++  __ slli_d(t0, a4, 0);
++  __ slli_d(t1, a4, 0xD);
++  __ slli_d(t2, a4, 0x1E);
++  __ slli_d(t3, a4, 0x3F);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_slli_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_slli_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_slli_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_slli_d_63)));
++
++  // srli_d
++  __ srli_d(t0, a4, 0);
++  __ srli_d(t1, a4, 0xD);
++  __ srli_d(t2, a4, 0x1E);
++  __ srli_d(t3, a4, 0x3F);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_srli_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_srli_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_srli_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_srli_d_63)));
++
++  // srai_d
++  __ srai_d(t0, a4, 0);
++  __ srai_d(t1, a4, 0xD);
++
++  __ li(a6, static_cast<int64_t>(0x8000000000000000));
++  __ add_d(a6, a6, a4);
++  __ srai_d(t2, a6, 0x1E);
++  __ srai_d(t3, a6, 0x3F);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_srai_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_srai_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_srai_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_srai_d_63)));
++
++  // rotri_d
++  __ rotri_d(t0, a4, 0);
++  __ rotri_d(t1, a4, 0xD);
++  __ rotri_d(t2, a4, 0x1E);
++  __ rotri_d(t3, a4, 0x3F);
++
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_rotri_d_0)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_rotri_d_13)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_rotri_d_30)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_rotri_d_63)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.input = 0x51F4B764A26E7412;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_sll_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x96ec944dce824000), t.result_sll_d_13);
++  CHECK_EQ(static_cast<int64_t>(0x289b9d0480000000), t.result_sll_d_30);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_sll_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_srl_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x28fa5bb251373), t.result_srl_d_13);
++  CHECK_EQ(static_cast<int64_t>(0x147d2dd92), t.result_srl_d_30);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_srl_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_sra_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x28fa5bb251373), t.result_sra_d_13);
++  CHECK_EQ(static_cast<int64_t>(0xffffffff47d2dd92), t.result_sra_d_30);
++  CHECK_EQ(static_cast<int64_t>(0xffffffffffffffff), t.result_sra_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_rotr_d_0);
++  CHECK_EQ(static_cast<int64_t>(0xa0928fa5bb251373), t.result_rotr_d_13);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_slli_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x96ec944dce824000), t.result_slli_d_13);
++  CHECK_EQ(static_cast<int64_t>(0x289b9d0480000000), t.result_slli_d_30);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_slli_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_srli_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x28fa5bb251373), t.result_srli_d_13);
++  CHECK_EQ(static_cast<int64_t>(0x147d2dd92), t.result_srli_d_30);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_srli_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_srai_d_0);
++  CHECK_EQ(static_cast<int64_t>(0x28fa5bb251373), t.result_srai_d_13);
++  CHECK_EQ(static_cast<int64_t>(0xffffffff47d2dd92), t.result_srai_d_30);
++  CHECK_EQ(static_cast<int64_t>(0xffffffffffffffff), t.result_srai_d_63);
++
++  CHECK_EQ(static_cast<int64_t>(0x51f4b764a26e7412), t.result_rotri_d_0);
++  CHECK_EQ(static_cast<int64_t>(0xa0928fa5bb251373), t.result_rotri_d_13);
++  CHECK_EQ(static_cast<int64_t>(0x89b9d04947d2dd92), t.result_rotri_d_30);
++  CHECK_EQ(static_cast<int64_t>(0xa3e96ec944dce824), t.result_rotri_d_63);
++}
++
++TEST(LA10) {
++  // Test 32bit bit operation instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct T {
++    int64_t si1;
++    int64_t si2;
++    int32_t result_ext_w_b_si1;
++    int32_t result_ext_w_b_si2;
++    int32_t result_ext_w_h_si1;
++    int32_t result_ext_w_h_si2;
++    int32_t result_clo_w_si1;
++    int32_t result_clo_w_si2;
++    int32_t result_clz_w_si1;
++    int32_t result_clz_w_si2;
++    int32_t result_cto_w_si1;
++    int32_t result_cto_w_si2;
++    int32_t result_ctz_w_si1;
++    int32_t result_ctz_w_si2;
++    int32_t result_bytepick_w_si1;
++    int32_t result_bytepick_w_si2;
++    int32_t result_revb_2h_si1;
++    int32_t result_revb_2h_si2;
++    int32_t result_bitrev_4b_si1;
++    int32_t result_bitrev_4b_si2;
++    int32_t result_bitrev_w_si1;
++    int32_t result_bitrev_w_si2;
++    int32_t result_bstrins_w_si1;
++    int32_t result_bstrins_w_si2;
++    int32_t result_bstrpick_w_si1;
++    int32_t result_bstrpick_w_si2;
++  };
++  T t;
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, si1)));
++  __ Ld_d(a5, MemOperand(a0, offsetof(T, si2)));
++
++  // ext_w_b
++  __ ext_w_b(t0, a4);
++  __ ext_w_b(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_ext_w_b_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_ext_w_b_si2)));
++
++  // ext_w_h
++  __ ext_w_h(t0, a4);
++  __ ext_w_h(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_ext_w_h_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_ext_w_h_si2)));
++
++  /*    //clo_w
++    __ clo_w(t0, a4);
++    __ clo_w(t1, a5);
++    __ St_w(t0, MemOperand(a0, offsetof(T, result_clo_w_si1)));
++    __ St_w(t1, MemOperand(a0, offsetof(T, result_clo_w_si2)));*/
++
++  // clz_w
++  __ clz_w(t0, a4);
++  __ clz_w(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_clz_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_clz_w_si2)));
++
++  /*    //cto_w
++    __ cto_w(t0, a4);
++    __ cto_w(t1, a5);
++    __ St_w(t0, MemOperand(a0, offsetof(T, result_cto_w_si1)));
++    __ St_w(t1, MemOperand(a0, offsetof(T, result_cto_w_si2)));*/
++
++  // ctz_w
++  __ ctz_w(t0, a4);
++  __ ctz_w(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_ctz_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_ctz_w_si2)));
++
++  // bytepick_w
++  __ bytepick_w(t0, a4, a5, 0);
++  __ bytepick_w(t1, a5, a4, 2);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_bytepick_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_bytepick_w_si2)));
++
++  // revb_2h
++  __ revb_2h(t0, a4);
++  __ revb_2h(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_revb_2h_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_revb_2h_si2)));
++
++  // bitrev
++  __ bitrev_4b(t0, a4);
++  __ bitrev_4b(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_bitrev_4b_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_bitrev_4b_si2)));
++
++  // bitrev_w
++  __ bitrev_w(t0, a4);
++  __ bitrev_w(t1, a5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_bitrev_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_bitrev_w_si2)));
++
++  // bstrins
++  __ or_(t0, zero_reg, zero_reg);
++  __ or_(t1, zero_reg, zero_reg);
++  __ bstrins_w(t0, a4, 0xD, 0x4);
++  __ bstrins_w(t1, a5, 0x16, 0x5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_bstrins_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_bstrins_w_si2)));
++
++  // bstrpick
++  __ or_(t0, zero_reg, zero_reg);
++  __ or_(t1, zero_reg, zero_reg);
++  __ bstrpick_w(t0, a4, 0xD, 0x4);
++  __ bstrpick_w(t1, a5, 0x16, 0x5);
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_bstrpick_w_si1)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_bstrpick_w_si2)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.si1 = 0x51F4B764A26E7412;
++  t.si2 = 0x81F25A87C423B891;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int32_t>(0x12), t.result_ext_w_b_si1);
++  CHECK_EQ(static_cast<int32_t>(0xffffff91), t.result_ext_w_b_si2);
++  CHECK_EQ(static_cast<int32_t>(0x7412), t.result_ext_w_h_si1);
++  CHECK_EQ(static_cast<int32_t>(0xffffb891), t.result_ext_w_h_si2);
++  //    CHECK_EQ(static_cast<int32_t>(0x1), t.result_clo_w_si1);
++  //    CHECK_EQ(static_cast<int32_t>(0x2), t.result_clo_w_si2);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_clz_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_clz_w_si2);
++  //    CHECK_EQ(static_cast<int32_t>(0x0), t.result_cto_w_si1);
++  //    CHECK_EQ(static_cast<int32_t>(0x1), t.result_cto_w_si2);
++  CHECK_EQ(static_cast<int32_t>(0x1), t.result_ctz_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x0), t.result_ctz_w_si2);
++  CHECK_EQ(static_cast<int32_t>(0xc423b891), t.result_bytepick_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x7412c423),
++           t.result_bytepick_w_si2);  // 0xffffc423
++  CHECK_EQ(static_cast<int32_t>(0x6ea21274), t.result_revb_2h_si1);
++  CHECK_EQ(static_cast<int32_t>(0x23c491b8), t.result_revb_2h_si2);
++  CHECK_EQ(static_cast<int32_t>(0x45762e48), t.result_bitrev_4b_si1);
++  CHECK_EQ(static_cast<int32_t>(0x23c41d89), t.result_bitrev_4b_si2);
++  CHECK_EQ(static_cast<int32_t>(0x482e7645), t.result_bitrev_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x891dc423), t.result_bitrev_w_si2);
++  CHECK_EQ(static_cast<int32_t>(0x120), t.result_bstrins_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x771220), t.result_bstrins_w_si2);
++  CHECK_EQ(static_cast<int32_t>(0x341), t.result_bstrpick_w_si1);
++  CHECK_EQ(static_cast<int32_t>(0x11dc4), t.result_bstrpick_w_si2);
++}
++
++TEST(LA11) {
++  // Test 64bit bit operation instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct T {
++    int64_t si1;
++    int64_t si2;
++    int64_t result_clo_d_si1;
++    int64_t result_clo_d_si2;
++    int64_t result_clz_d_si1;
++    int64_t result_clz_d_si2;
++    int64_t result_cto_d_si1;
++    int64_t result_cto_d_si2;
++    int64_t result_ctz_d_si1;
++    int64_t result_ctz_d_si2;
++    int64_t result_bytepick_d_si1;
++    int64_t result_bytepick_d_si2;
++    int64_t result_revb_4h_si1;
++    int64_t result_revb_4h_si2;
++    int64_t result_revb_2w_si1;
++    int64_t result_revb_2w_si2;
++    int64_t result_revb_d_si1;
++    int64_t result_revb_d_si2;
++    int64_t result_revh_2w_si1;
++    int64_t result_revh_2w_si2;
++    int64_t result_revh_d_si1;
++    int64_t result_revh_d_si2;
++    int64_t result_bitrev_8b_si1;
++    int64_t result_bitrev_8b_si2;
++    int64_t result_bitrev_d_si1;
++    int64_t result_bitrev_d_si2;
++    int64_t result_bstrins_d_si1;
++    int64_t result_bstrins_d_si2;
++    int64_t result_bstrpick_d_si1;
++    int64_t result_bstrpick_d_si2;
++    int64_t result_maskeqz_si1;
++    int64_t result_maskeqz_si2;
++    int64_t result_masknez_si1;
++    int64_t result_masknez_si2;
++  };
++
++  T t;
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, si1)));
++  __ Ld_d(a5, MemOperand(a0, offsetof(T, si2)));
++
++  /*    //clo_d
++    __ clo_d(t0, a4);
++    __ clo_d(t1, a5);
++    __ St_w(t0, MemOperand(a0, offsetof(T, result_clo_d_si1)));
++    __ St_w(t1, MemOperand(a0, offsetof(T, result_clo_d_si2)));*/
++
++  // clz_d
++  __ or_(t0, zero_reg, zero_reg);
++  __ clz_d(t0, a4);
++  __ clz_d(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_clz_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_clz_d_si2)));
++
++  /*    //cto_d
++    __ cto_d(t0, a4);
++    __ cto_d(t1, a5);
++    __ St_w(t0, MemOperand(a0, offsetof(T, result_cto_d_si1)));
++    __ St_w(t1, MemOperand(a0, offsetof(T, result_cto_d_si2)));*/
++
++  // ctz_d
++  __ ctz_d(t0, a4);
++  __ ctz_d(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_ctz_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_ctz_d_si2)));
++
++  // bytepick_d
++  __ bytepick_d(t0, a4, a5, 0);
++  __ bytepick_d(t1, a5, a4, 5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_bytepick_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_bytepick_d_si2)));
++
++  // revb_4h
++  __ revb_4h(t0, a4);
++  __ revb_4h(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_4h_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_4h_si2)));
++
++  // revb_2w
++  __ revb_2w(t0, a4);
++  __ revb_2w(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_2w_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_2w_si2)));
++
++  // revb_d
++  __ revb_d(t0, a4);
++  __ revb_d(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_d_si2)));
++
++  // revh_2w
++  __ revh_2w(t0, a4);
++  __ revh_2w(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_revh_2w_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_revh_2w_si2)));
++
++  // revh_d
++  __ revh_d(t0, a4);
++  __ revh_d(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_revh_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_revh_d_si2)));
++
++  // bitrev_8b
++  __ bitrev_8b(t0, a4);
++  __ bitrev_8b(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_bitrev_8b_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_bitrev_8b_si2)));
++
++  // bitrev_d
++  __ bitrev_d(t0, a4);
++  __ bitrev_d(t1, a5);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_bitrev_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_bitrev_d_si2)));
++
++  // bstrins_d
++  __ or_(t0, zero_reg, zero_reg);
++  __ or_(t1, zero_reg, zero_reg);
++  __ bstrins_d(t0, a4, 5, 0);
++  __ bstrins_d(t1, a5, 39, 12);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_bstrins_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_bstrins_d_si2)));
++
++  // bstrpick_d
++  __ or_(t0, zero_reg, zero_reg);
++  __ or_(t1, zero_reg, zero_reg);
++  __ bstrpick_d(t0, a4, 5, 0);
++  __ bstrpick_d(t1, a5, 63, 48);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_bstrpick_d_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_bstrpick_d_si2)));
++
++  // maskeqz
++  __ maskeqz(t0, a4, a4);
++  __ maskeqz(t1, a5, zero_reg);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_maskeqz_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_maskeqz_si2)));
++
++  // masknez
++  __ masknez(t0, a4, a4);
++  __ masknez(t1, a5, zero_reg);
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_masknez_si1)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_masknez_si2)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.si1 = 0x10C021098B710CDE;
++  t.si2 = 0xFB8017FF781A15C3;
++  f.Call(&t, 0, 0, 0, 0);
++
++  //    CHECK_EQ(static_cast<int64_t>(0x0), t.result_clo_d_si1);
++  //    CHECK_EQ(static_cast<int64_t>(0x5), t.result_clo_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x3), t.result_clz_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_clz_d_si2);
++  //    CHECK_EQ(static_cast<int64_t>(0x0), t.result_cto_d_si1);
++  //    CHECK_EQ(static_cast<int64_t>(0x2), t.result_cto_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x1), t.result_ctz_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x0), t.result_ctz_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0xfb8017ff781a15c3), t.result_bytepick_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x710cde0000000000), t.result_bytepick_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0xc0100921718bde0c), t.result_revb_4h_si1);
++  CHECK_EQ(static_cast<int64_t>(0x80fbff171a78c315), t.result_revb_4h_si2);
++  CHECK_EQ(static_cast<int64_t>(0x921c010de0c718b), t.result_revb_2w_si1);
++  CHECK_EQ(static_cast<int64_t>(0xff1780fbc3151a78), t.result_revb_2w_si2);
++  CHECK_EQ(static_cast<int64_t>(0xde0c718b0921c010), t.result_revb_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0xc3151a78ff1780fb), t.result_revb_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x210910c00cde8b71), t.result_revh_2w_si1);
++  CHECK_EQ(static_cast<int64_t>(0x17fffb8015c3781a), t.result_revh_2w_si2);
++  CHECK_EQ(static_cast<int64_t>(0xcde8b71210910c0), t.result_revh_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x15c3781a17fffb80), t.result_revh_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x8038490d18e307b), t.result_bitrev_8b_si1);
++  CHECK_EQ(static_cast<int64_t>(0xdf01e8ff1e58a8c3), t.result_bitrev_8b_si2);
++  CHECK_EQ(static_cast<int64_t>(0x7b308ed190840308), t.result_bitrev_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0xc3a8581effe801df), t.result_bitrev_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x1e), t.result_bstrins_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0x81a15c3000), t.result_bstrins_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0x1e), t.result_bstrpick_d_si1);
++  CHECK_EQ(static_cast<int64_t>(0xfb80), t.result_bstrpick_d_si2);
++  CHECK_EQ(static_cast<int64_t>(0), t.result_maskeqz_si1);
++  CHECK_EQ(static_cast<int64_t>(0xFB8017FF781A15C3), t.result_maskeqz_si2);
++  CHECK_EQ(static_cast<int64_t>(0x10C021098B710CDE), t.result_masknez_si1);
++  CHECK_EQ(static_cast<int64_t>(0), t.result_masknez_si2);
++}
++
++uint64_t run_beq(int64_t value1, int64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ beq(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BEQ) {
++  CcTest::InitializeVM();
++  struct TestCaseBeq {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBeq tc[] = {
++    // value1, value2, offset, expected_res
++    {       0,      0,    -6,          0x3 },
++    {       1,      1,    -3,         0x30 },
++    {      -2,     -2,     3,        0x300 },
++    {       3,     -3,     6,            0 },
++    {       4,      4,     6,        0x700 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBeq);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_beq(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bne(int64_t value1, int64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bne(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BNE) {
++  CcTest::InitializeVM();
++  struct TestCaseBne {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBne tc[] = {
++    // value1, value2, offset, expected_res
++    {       1,     -1,    -6,          0x3 },
++    {       2,     -2,    -3,         0x30 },
++    {       3,     -3,     3,        0x300 },
++    {       4,     -4,     6,        0x700 },
++    {       0,      0,     6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBne);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bne(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_blt(int64_t value1, int64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ blt(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BLT) {
++  CcTest::InitializeVM();
++  struct TestCaseBlt {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBlt tc[] = {
++    // value1, value2, offset, expected_res
++    {      -1,      1,    -6,          0x3 },
++    {      -2,      2,    -3,         0x30 },
++    {      -3,      3,     3,        0x300 },
++    {      -4,      4,     6,        0x700 },
++    {       5,     -5,     6,            0 },
++    {       0,      0,     6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBlt);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_blt(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bge(uint64_t value1, uint64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bge(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BGE) {
++  CcTest::InitializeVM();
++  struct TestCaseBge {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBge tc[] = {
++    // value1, value2, offset, expected_res
++    {       0,      0,    -6,          0x3 },
++    {       1,      1,    -3,         0x30 },
++    {       2,     -2,     3,        0x300 },
++    {       3,     -3,     6,        0x700 },
++    {      -4,      4,     6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBge);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bge(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bltu(int64_t value1, int64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bltu(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BLTU) {
++  CcTest::InitializeVM();
++  struct TestCaseBltu {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBltu tc[] = {
++    // value1, value2, offset, expected_res
++    {       0,      1,    -6,          0x3 },
++    {       1,     -1,    -3,         0x30 },
++    {       2,     -2,     3,        0x300 },
++    {       3,     -3,     6,        0x700 },
++    {       4,      4,     6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBltu);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bltu(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bgeu(int64_t value1, int64_t value2, int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bgeu(a0, a1, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value1, value2, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BGEU) {
++  CcTest::InitializeVM();
++  struct TestCaseBgeu {
++    int64_t value1;
++    int64_t value2;
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBgeu tc[] = {
++    // value1, value2, offset, expected_res
++    {       0,      0,    -6,          0x3 },
++    {      -1,      1,    -3,         0x30 },
++    {      -2,      2,     3,        0x300 },
++    {      -3,      3,     6,        0x700 },
++    {       4,     -4,     6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBgeu);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bgeu(tc[i].value1, tc[i].value2, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_beqz(int64_t value, int32_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(&L);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ beqz(a0, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(&L);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BEQZ) {
++  CcTest::InitializeVM();
++  struct TestCaseBeqz {
++    int64_t value;
++    int32_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBeqz tc[] = {
++    // value, offset, expected_res
++    {      0,     -6,          0x3 },
++    {      0,     -3,         0x30 },
++    {      0,      3,        0x300 },
++    {      0,      6,        0x700 },
++    {      1,      6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBeqz);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_beqz(tc[i].value, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bnez_b(int64_t value, int32_t offset) {
++  // bnez, b.
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0l);
++  __ b(&main_block);
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ b(5);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ b(2);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bnez(a0, offset);
++  __ bind(&L);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ b(-4);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ b(-7);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(value, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BNEZ_B) {
++  CcTest::InitializeVM();
++  struct TestCaseBnez {
++    int64_t value;
++    int32_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBnez tc[] = {
++    // value, offset, expected_res
++    {      1,     -6,          0x3 },
++    {     -2,     -3,         0x30 },
++    {      3,      3,        0x300 },
++    {     -4,      6,        0x700 },
++    {      0,      6,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBnez);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bnez_b(tc[i].value, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bl(int32_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block;
++  __ li(a2, 0l);
++  __ push(ra);  // push is implemented by two instructions, addi_d and st_d
++  __ b(&main_block);
++
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ bl(offset);
++  __ or_(a0, a2, zero_reg);
++  __ pop(ra);  // pop is implemented by two instructions, ld_d and addi_d.
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BL) {
++  CcTest::InitializeVM();
++  struct TestCaseBl {
++    int32_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBl tc[] = {
++    // offset, expected_res
++    {     -6,          0x3 },
++    {     -3,         0x30 },
++    {      5,        0x300 },
++    {      8,        0x700 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBl);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bl(tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++TEST(PCADD) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label exit, error;
++  __ push(ra);
++
++  // pcaddi
++  __ li(a4, 0x1FFFFC);
++  __ li(a5, 0);
++  __ li(a6, static_cast<int32_t>(0xFFE00000));
++
++  __ bl(1);
++  __ pcaddi(a3, 0x7FFFF);
++  __ add_d(a2, ra, a4);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ bl(1);
++  __ pcaddi(a3, 0);
++  __ add_d(a2, ra, a5);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ bl(1);
++  __ pcaddi(a3, 0x80000);
++  __ add_d(a2, ra, a6);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  // pcaddu12i
++  __ li(a4, 0x7FFFF000);
++  __ li(a5, 0);
++  __ li(a6, static_cast<int32_t>(0x80000000));
++
++  __ bl(1);
++  __ pcaddu12i(a2, 0x7FFFF);
++  __ add_d(a3, ra, a4);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ bl(1);
++  __ pcaddu12i(a2, 0);
++  __ add_d(a3, ra, a5);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ bl(1);
++  __ pcaddu12i(a2, 0x80000);
++  __ add_d(a3, ra, a6);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  // pcaddu18i
++  __ li(a4, 0x1FFFFC0000);
++  __ li(a5, 0);
++  __ li(a6, static_cast<int64_t>(0xFFFFFFE000000000));
++
++  __ bl(1);
++  __ pcaddu18i(a2, 0x7FFFF);
++  __ add_d(a3, ra, a4);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ bl(1);
++  __ pcaddu18i(a2, 0);
++  __ add_d(a3, ra, a5);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ bl(1);
++  __ pcaddu18i(a2, 0x80000);
++  __ add_d(a3, ra, a6);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  // pcalau12i
++  __ li(a4, 0x7FFFF000);
++  __ li(a5, 0);
++  __ li(a6, static_cast<int32_t>(0x80000000));
++  __ li(a7, static_cast<int64_t>(0xFFFFFFFFFFFFF000));
++
++  __ bl(1);
++  __ pcalau12i(a3, 0x7FFFF);
++  __ add_d(a2, ra, a4);
++  __ and_(t0, a2, a7);
++  __ and_(t1, a3, a7);
++  __ Branch(&error, ne, t0, Operand(t1));
++
++  __ bl(1);
++  __ pcalau12i(a3, 0);
++  __ add_d(a2, ra, a5);
++  __ and_(t0, a2, a7);
++  __ and_(t1, a3, a7);
++  __ Branch(&error, ne, t0, Operand(t1));
++
++  __ bl(1);
++  __ pcalau12i(a2, 0x80000);
++  __ add_d(a3, ra, a6);
++  __ and_(t0, a2, a7);
++  __ and_(t1, a3, a7);
++  __ Branch(&error, ne, t0, Operand(t1));
++
++  __ li(a0, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a0, 0x666);
++
++  __ bind(&exit);
++  __ pop(ra);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++uint64_t run_jirl(int16_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block;
++  __ li(a2, 0l);
++  __ push(ra);
++  __ b(&main_block);
++
++  // Block 1
++  __ addi_d(a2, a2, 0x1);
++  __ addi_d(a2, a2, 0x2);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 2
++  __ addi_d(a2, a2, 0x10);
++  __ addi_d(a2, a2, 0x20);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ pcaddi(a3, 1);
++  __ jirl(ra, a3, offset);
++  __ or_(a0, a2, zero_reg);
++  __ pop(ra);  // pop is implemented by two instructions, ld_d and addi_d.
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  __ addi_d(a2, a2, 0x100);
++  __ addi_d(a2, a2, 0x200);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 5
++  __ addi_d(a2, a2, 0x300);
++  __ addi_d(a2, a2, 0x400);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(JIRL) {
++  CcTest::InitializeVM();
++  struct TestCaseJirl {
++    int16_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseJirl tc[] = {
++    // offset, expected_res
++    {     -7,          0x3 },
++    {     -4,         0x30 },
++    {      5,        0x300 },
++    {      8,        0x700 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseJirl);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_jirl(tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++TEST(LA12) {
++  // Test floating point calculate instructions.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    double a;
++    double b;
++    double c;
++    double d;
++    double e;
++    double f;
++    double result_fadd_d;
++    double result_fsub_d;
++    double result_fmul_d;
++    double result_fdiv_d;
++    double result_fmadd_d;
++    double result_fmsub_d;
++    double result_fnmadd_d;
++    double result_fnmsub_d;
++    double result_fsqrt_d;
++    double result_frecip_d;
++    double result_frsqrt_d;
++    double result_fscaleb_d;
++    double result_flogb_d;
++    double result_fcopysign_d;
++    double result_fclass_d;
++  };
++  T t;
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  // Double precision floating point instructions.
++  __ Fld_d(f8, MemOperand(a0, offsetof(T, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(T, b)));
++
++  __ fneg_d(f10, f8);
++  __ fadd_d(f11, f9, f10);
++  __ Fst_d(f11, MemOperand(a0, offsetof(T, result_fadd_d)));
++  __ fabs_d(f11, f11);
++  __ fsub_d(f12, f11, f9);
++  __ Fst_d(f12, MemOperand(a0, offsetof(T, result_fsub_d)));
++
++  __ Fld_d(f13, MemOperand(a0, offsetof(T, c)));
++  __ Fld_d(f14, MemOperand(a0, offsetof(T, d)));
++  __ Fld_d(f15, MemOperand(a0, offsetof(T, e)));
++
++  __ fmin_d(f16, f13, f14);
++  __ fmul_d(f17, f15, f16);
++  __ Fst_d(f17, MemOperand(a0, offsetof(T, result_fmul_d)));
++  __ fmax_d(f18, f13, f14);
++  __ fdiv_d(f19, f15, f18);
++  __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fdiv_d)));
++
++  __ fmina_d(f16, f13, f14);
++  __ fmadd_d(f18, f17, f15, f16);
++  __ Fst_d(f18, MemOperand(a0, offsetof(T, result_fmadd_d)));
++  __ fnmadd_d(f19, f17, f15, f16);
++  __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fnmadd_d)));
++  __ fmaxa_d(f16, f13, f14);
++  __ fmsub_d(f20, f17, f15, f16);
++  __ Fst_d(f20, MemOperand(a0, offsetof(T, result_fmsub_d)));
++  __ fnmsub_d(f21, f17, f15, f16);
++  __ Fst_d(f21, MemOperand(a0, offsetof(T, result_fnmsub_d)));
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(T, f)));
++  __ fsqrt_d(f10, f8);
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_fsqrt_d)));
++  //__ frecip_d(f11, f10);
++  //__ frsqrt_d(f12, f8);
++  //__ Fst_d(f11, MemOperand(a0, offsetof(T, result_frecip_d)));
++  //__ Fst_d(f12, MemOperand(a0, offsetof(T, result_frsqrt_d)));
++
++  /*__ fscaleb_d(f16, f13, f15);
++  __ flogb_d(f17, f15);
++  __ fcopysign_d(f18, f8, f9);
++  __ fclass_d(f19, f9);
++  __ Fst_d(f16, MemOperand(a0, offsetof(T, result_fscaleb_d)));
++  __ Fst_d(f17, MemOperand(a0, offsetof(T, result_flogb_d)));
++  __ Fst_d(f18, MemOperand(a0, offsetof(T, result_fcopysign_d)));
++  __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fclass_d)));*/
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  // Double test values.
++  t.a = 1.5e14;
++  t.b = -2.75e11;
++  t.c = 1.5;
++  t.d = -2.75;
++  t.e = 120.0;
++  t.f = 120.44;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<double>(-1.502750e14), t.result_fadd_d);
++  CHECK_EQ(static_cast<double>(1.505500e14), t.result_fsub_d);
++  CHECK_EQ(static_cast<double>(-3.300000e02), t.result_fmul_d);
++  CHECK_EQ(static_cast<double>(8.000000e01), t.result_fdiv_d);
++  CHECK_EQ(static_cast<double>(-3.959850e04), t.result_fmadd_d);
++  CHECK_EQ(static_cast<double>(-3.959725e04), t.result_fmsub_d);
++  CHECK_EQ(static_cast<double>(3.959850e04), t.result_fnmadd_d);
++  CHECK_EQ(static_cast<double>(3.959725e04), t.result_fnmsub_d);
++  CHECK_EQ(static_cast<double>(10.97451593465515908537), t.result_fsqrt_d);
++  // CHECK_EQ(static_cast<double>( 8.164965e-08), t.result_frecip_d);
++  // CHECK_EQ(static_cast<double>( 8.164966e-08), t.result_frsqrt_d);
++  // CHECK_EQ(static_cast<double>(), t.result_fscaleb_d);
++  // CHECK_EQ(static_cast<double>( 6.906891), t.result_flogb_d);
++  // CHECK_EQ(static_cast<double>( 2.75e11), t.result_fcopysign_d);
++  // CHECK_EQ(static_cast<double>(), t.result_fclass_d);
++}
++
++TEST(LA13) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    float a;
++    float b;
++    float c;
++    float d;
++    float e;
++    float result_fadd_s;
++    float result_fsub_s;
++    float result_fmul_s;
++    float result_fdiv_s;
++    float result_fmadd_s;
++    float result_fmsub_s;
++    float result_fnmadd_s;
++    float result_fnmsub_s;
++    float result_fsqrt_s;
++    float result_frecip_s;
++    float result_frsqrt_s;
++    float result_fscaleb_s;
++    float result_flogb_s;
++    float result_fcopysign_s;
++    float result_fclass_s;
++  };
++  T t;
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  // Float precision floating point instructions.
++  __ Fld_s(f8, MemOperand(a0, offsetof(T, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(T, b)));
++
++  __ fneg_s(f10, f8);
++  __ fadd_s(f11, f9, f10);
++  __ Fst_s(f11, MemOperand(a0, offsetof(T, result_fadd_s)));
++  __ fabs_s(f11, f11);
++  __ fsub_s(f12, f11, f9);
++  __ Fst_s(f12, MemOperand(a0, offsetof(T, result_fsub_s)));
++
++  __ Fld_s(f13, MemOperand(a0, offsetof(T, c)));
++  __ Fld_s(f14, MemOperand(a0, offsetof(T, d)));
++  __ Fld_s(f15, MemOperand(a0, offsetof(T, e)));
++
++  __ fmin_s(f16, f13, f14);
++  __ fmul_s(f17, f15, f16);
++  __ Fst_s(f17, MemOperand(a0, offsetof(T, result_fmul_s)));
++  __ fmax_s(f18, f13, f14);
++  __ fdiv_s(f19, f15, f18);
++  __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fdiv_s)));
++
++  __ fmina_s(f16, f13, f14);
++  __ fmadd_s(f18, f17, f15, f16);
++  __ Fst_s(f18, MemOperand(a0, offsetof(T, result_fmadd_s)));
++  __ fnmadd_s(f19, f17, f15, f16);
++  __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fnmadd_s)));
++  __ fmaxa_s(f16, f13, f14);
++  __ fmsub_s(f20, f17, f15, f16);
++  __ Fst_s(f20, MemOperand(a0, offsetof(T, result_fmsub_s)));
++  __ fnmsub_s(f21, f17, f15, f16);
++  __ Fst_s(f21, MemOperand(a0, offsetof(T, result_fnmsub_s)));
++
++  __ fsqrt_s(f10, f8);
++  //__ frecip_s(f11, f10);
++  //__ frsqrt_s(f12, f8);
++  __ Fst_s(f10, MemOperand(a0, offsetof(T, result_fsqrt_s)));
++  //__ Fst_s(f11, MemOperand(a0, offsetof(T, result_frecip_s)));
++  //__ Fst_s(f12, MemOperand(a0, offsetof(T, result_frsqrt_s)));
++
++  /*__ fscaleb_s(f16, f13, f15);
++  __ flogb_s(f17, f15);
++  __ fcopysign_s(f18, f8, f9);
++  __ fclass_s(f19, f9);
++  __ Fst_s(f16, MemOperand(a0, offsetof(T, result_fscaleb_s)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(T, result_flogb_s)));
++  __ Fst_s(f18, MemOperand(a0, offsetof(T, result_fcopysign_s)));
++  __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fclass_s)));*/
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  // Float test values.
++  t.a = 1.5e6;
++  t.b = -2.75e4;
++  t.c = 1.5;
++  t.d = -2.75;
++  t.e = 120.0;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<float>(-1.527500e06), t.result_fadd_s);
++  CHECK_EQ(static_cast<float>(1.555000e06), t.result_fsub_s);
++  CHECK_EQ(static_cast<float>(-3.300000e02), t.result_fmul_s);
++  CHECK_EQ(static_cast<float>(8.000000e01), t.result_fdiv_s);
++  CHECK_EQ(static_cast<float>(-3.959850e04), t.result_fmadd_s);
++  CHECK_EQ(static_cast<float>(-3.959725e04), t.result_fmsub_s);
++  CHECK_EQ(static_cast<float>(3.959850e04), t.result_fnmadd_s);
++  CHECK_EQ(static_cast<float>(3.959725e04), t.result_fnmsub_s);
++  CHECK_EQ(static_cast<float>(1224.744873), t.result_fsqrt_s);
++  // CHECK_EQ(static_cast<float>( 8.164966e-04), t.result_frecip_s);
++  // CHECK_EQ(static_cast<float>( 8.164966e-04), t.result_frsqrt_s);
++  // CHECK_EQ(static_cast<float>(), t.result_fscaleb_s);
++  // CHECK_EQ(static_cast<float>( 6.906890), t.result_flogb_s);
++  // CHECK_EQ(static_cast<float>( 2.75e4), t.result_fcopysign_s);
++  // CHECK_EQ(static_cast<float>(), t.result_fclass_s);
++}
++
++TEST(FCMP_COND) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    double dTrue;
++    double dFalse;
++    double dOp1;
++    double dOp2;
++    double dCaf;
++    double dCun;
++    double dCeq;
++    double dCueq;
++    double dClt;
++    double dCult;
++    double dCle;
++    double dCule;
++    double dCne;
++    double dCor;
++    double dCune;
++    double dSaf;
++    double dSun;
++    double dSeq;
++    double dSueq;
++    double dSlt;
++    double dSult;
++    double dSle;
++    double dSule;
++    double dSne;
++    double dSor;
++    double dSune;
++    float fTrue;
++    float fFalse;
++    float fOp1;
++    float fOp2;
++    float fCaf;
++    float fCun;
++    float fCeq;
++    float fCueq;
++    float fClt;
++    float fCult;
++    float fCle;
++    float fCule;
++    float fCne;
++    float fCor;
++    float fCune;
++    float fSaf;
++    float fSun;
++    float fSeq;
++    float fSueq;
++    float fSlt;
++    float fSult;
++    float fSle;
++    float fSule;
++    float fSne;
++    float fSor;
++    float fSune;
++  };
++
++  TestFloat test;
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, dOp1)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, dOp2)));
++
++  __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, fOp1)));
++  __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, fOp2)));
++
++  __ Fld_d(f12, MemOperand(a0, offsetof(TestFloat, dFalse)));
++  __ Fld_d(f13, MemOperand(a0, offsetof(TestFloat, dTrue)));
++
++  __ Fld_s(f14, MemOperand(a0, offsetof(TestFloat, fFalse)));
++  __ Fld_s(f15, MemOperand(a0, offsetof(TestFloat, fTrue)));
++
++  __ fcmp_cond_d(CAF, f8, f9, FCC0);
++  __ fcmp_cond_s(CAF, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCaf)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCaf)));
++
++  __ fcmp_cond_d(CUN, f8, f9, FCC0);
++  __ fcmp_cond_s(CUN, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCun)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCun)));
++
++  __ fcmp_cond_d(CEQ, f8, f9, FCC0);
++  __ fcmp_cond_s(CEQ, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCeq)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCeq)));
++
++  __ fcmp_cond_d(CUEQ, f8, f9, FCC0);
++  __ fcmp_cond_s(CUEQ, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCueq)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCueq)));
++
++  __ fcmp_cond_d(CLT, f8, f9, FCC0);
++  __ fcmp_cond_s(CLT, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dClt)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fClt)));
++
++  __ fcmp_cond_d(CULT, f8, f9, FCC0);
++  __ fcmp_cond_s(CULT, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCult)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCult)));
++
++  __ fcmp_cond_d(CLE, f8, f9, FCC0);
++  __ fcmp_cond_s(CLE, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCle)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCle)));
++
++  __ fcmp_cond_d(CULE, f8, f9, FCC0);
++  __ fcmp_cond_s(CULE, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCule)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCule)));
++
++  __ fcmp_cond_d(CNE, f8, f9, FCC0);
++  __ fcmp_cond_s(CNE, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCne)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCne)));
++
++  __ fcmp_cond_d(COR, f8, f9, FCC0);
++  __ fcmp_cond_s(COR, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCor)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCor)));
++
++  __ fcmp_cond_d(CUNE, f8, f9, FCC0);
++  __ fcmp_cond_s(CUNE, f10, f11, FCC1);
++  __ fsel(FCC0, f16, f12, f13);
++  __ fsel(FCC1, f17, f14, f15);
++  __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCune)));
++  __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCune)));
++
++  /*  __ fcmp_cond_d(SAF, f8, f9, FCC0);
++    __ fcmp_cond_s(SAF, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSaf)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSaf)));
++
++    __ fcmp_cond_d(SUN, f8, f9, FCC0);
++    __ fcmp_cond_s(SUN, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSun)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSun)));
++
++    __ fcmp_cond_d(SEQ, f8, f9, FCC0);
++    __ fcmp_cond_s(SEQ, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSeq)));
++    __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSeq)));
++
++    __ fcmp_cond_d(SUEQ, f8, f9, FCC0);
++    __ fcmp_cond_s(SUEQ, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSueq)));
++    __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSueq)));
++
++    __ fcmp_cond_d(SLT, f8, f9, FCC0);
++    __ fcmp_cond_s(SLT, f10, f11, FCC1);
++    __ fsel(f16, f12, f13, FCC0);
++    __ fsel(f17, f14, f15, FCC1);
++    __ Fld_d(f16, MemOperand(a0, offsetof(TestFloat, dSlt)));
++    __ Fst_d(f17, MemOperand(a0, offsetof(TestFloat, fSlt)));
++
++    __ fcmp_cond_d(SULT, f8, f9, FCC0);
++    __ fcmp_cond_s(SULT, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSult)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSult)));
++
++    __ fcmp_cond_d(SLE, f8, f9, FCC0);
++    __ fcmp_cond_s(SLE, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSle)));
++    __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSle)));
++
++    __ fcmp_cond_d(SULE, f8, f9, FCC0);
++    __ fcmp_cond_s(SULE, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSule)));
++    __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSule)));
++
++    __ fcmp_cond_d(SNE, f8, f9, FCC0);
++    __ fcmp_cond_s(SNE, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSne)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSne)));
++
++    __ fcmp_cond_d(SOR, f8, f9, FCC0);
++    __ fcmp_cond_s(SOR, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSor)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSor)));
++
++    __ fcmp_cond_d(SUNE, f8, f9, FCC0);
++    __ fcmp_cond_s(SUNE, f10, f11, FCC1);
++    __ fsel(FCC0, f16, f12, f13);
++    __ fsel(FCC1, f17, f14, f15);
++    __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSune)));
++    __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSune)));*/
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test.dTrue = 1234.0;
++  test.dFalse = 0.0;
++  test.fTrue = 12.0;
++  test.fFalse = 0.0;
++
++  test.dOp1 = 2.0;
++  test.dOp2 = 3.0;
++  test.fOp1 = 2.0;
++  test.fOp2 = 3.0;
++  f.Call(&test, 0, 0, 0, 0);
++
++  CHECK_EQ(test.dCaf, test.dFalse);
++  CHECK_EQ(test.fCaf, test.fFalse);
++  CHECK_EQ(test.dCun, test.dFalse);
++  CHECK_EQ(test.fCun, test.fFalse);
++  CHECK_EQ(test.dCeq, test.dFalse);
++  CHECK_EQ(test.fCeq, test.fFalse);
++  CHECK_EQ(test.dCueq, test.dFalse);
++  CHECK_EQ(test.fCueq, test.fFalse);
++  CHECK_EQ(test.dClt, test.dTrue);
++  CHECK_EQ(test.fClt, test.fTrue);
++  CHECK_EQ(test.dCult, test.dTrue);
++  CHECK_EQ(test.fCult, test.fTrue);
++  CHECK_EQ(test.dCle, test.dTrue);
++  CHECK_EQ(test.fCle, test.fTrue);
++  CHECK_EQ(test.dCule, test.dTrue);
++  CHECK_EQ(test.fCule, test.fTrue);
++  CHECK_EQ(test.dCne, test.dTrue);
++  CHECK_EQ(test.fCne, test.fTrue);
++  CHECK_EQ(test.dCor, test.dTrue);
++  CHECK_EQ(test.fCor, test.fTrue);
++  CHECK_EQ(test.dCune, test.dTrue);
++  CHECK_EQ(test.fCune, test.fTrue);
++  /*  CHECK_EQ(test.dSaf, test.dFalse);
++    CHECK_EQ(test.fSaf, test.fFalse);
++    CHECK_EQ(test.dSun, test.dFalse);
++    CHECK_EQ(test.fSun, test.fFalse);
++    CHECK_EQ(test.dSeq, test.dFalse);
++    CHECK_EQ(test.fSeq, test.fFalse);
++    CHECK_EQ(test.dSueq, test.dFalse);
++    CHECK_EQ(test.fSueq, test.fFalse);
++    CHECK_EQ(test.dClt, test.dTrue);
++    CHECK_EQ(test.fClt, test.fTrue);
++    CHECK_EQ(test.dCult, test.dTrue);
++    CHECK_EQ(test.fCult, test.fTrue);
++    CHECK_EQ(test.dSle, test.dTrue);
++    CHECK_EQ(test.fSle, test.fTrue);
++    CHECK_EQ(test.dSule, test.dTrue);
++    CHECK_EQ(test.fSule, test.fTrue);
++    CHECK_EQ(test.dSne, test.dTrue);
++    CHECK_EQ(test.fSne, test.fTrue);
++    CHECK_EQ(test.dSor, test.dTrue);
++    CHECK_EQ(test.fSor, test.fTrue);
++    CHECK_EQ(test.dSune, test.dTrue);
++    CHECK_EQ(test.fSune, test.fTrue);*/
++
++  test.dOp1 = std::numeric_limits<double>::max();
++  test.dOp2 = std::numeric_limits<double>::min();
++  test.fOp1 = std::numeric_limits<float>::min();
++  test.fOp2 = -std::numeric_limits<float>::max();
++  f.Call(&test, 0, 0, 0, 0);
++
++  CHECK_EQ(test.dCaf, test.dFalse);
++  CHECK_EQ(test.fCaf, test.fFalse);
++  CHECK_EQ(test.dCun, test.dFalse);
++  CHECK_EQ(test.fCun, test.fFalse);
++  CHECK_EQ(test.dCeq, test.dFalse);
++  CHECK_EQ(test.fCeq, test.fFalse);
++  CHECK_EQ(test.dCueq, test.dFalse);
++  CHECK_EQ(test.fCueq, test.fFalse);
++  CHECK_EQ(test.dClt, test.dFalse);
++  CHECK_EQ(test.fClt, test.fFalse);
++  CHECK_EQ(test.dCult, test.dFalse);
++  CHECK_EQ(test.fCult, test.fFalse);
++  CHECK_EQ(test.dCle, test.dFalse);
++  CHECK_EQ(test.fCle, test.fFalse);
++  CHECK_EQ(test.dCule, test.dFalse);
++  CHECK_EQ(test.fCule, test.fFalse);
++  CHECK_EQ(test.dCne, test.dTrue);
++  CHECK_EQ(test.fCne, test.fTrue);
++  CHECK_EQ(test.dCor, test.dTrue);
++  CHECK_EQ(test.fCor, test.fTrue);
++  CHECK_EQ(test.dCune, test.dTrue);
++  CHECK_EQ(test.fCune, test.fTrue);
++  /*  CHECK_EQ(test.dSaf, test.dFalse);
++    CHECK_EQ(test.fSaf, test.fFalse);
++    CHECK_EQ(test.dSun, test.dFalse);
++    CHECK_EQ(test.fSun, test.fFalse);
++    CHECK_EQ(test.dSeq, test.dFalse);
++    CHECK_EQ(test.fSeq, test.fFalse);
++    CHECK_EQ(test.dSueq, test.dFalse);
++    CHECK_EQ(test.fSueq, test.fFalse);
++    CHECK_EQ(test.dSlt, test.dFalse);
++    CHECK_EQ(test.fSlt, test.fFalse);
++    CHECK_EQ(test.dSult, test.dFalse);
++    CHECK_EQ(test.fSult, test.fFalse);
++    CHECK_EQ(test.dSle, test.dFalse);
++    CHECK_EQ(test.fSle, test.fFalse);
++    CHECK_EQ(test.dSule, test.dFalse);
++    CHECK_EQ(test.fSule, test.fFalse);
++    CHECK_EQ(test.dSne, test.dTrue);
++    CHECK_EQ(test.fSne, test.fTrue);
++    CHECK_EQ(test.dSor, test.dTrue);
++    CHECK_EQ(test.fSor, test.fTrue);
++    CHECK_EQ(test.dSune, test.dTrue);
++    CHECK_EQ(test.fSune, test.fTrue);*/
++
++  test.dOp1 = std::numeric_limits<double>::quiet_NaN();
++  test.dOp2 = 0.0;
++  test.fOp1 = std::numeric_limits<float>::quiet_NaN();
++  test.fOp2 = 0.0;
++  f.Call(&test, 0, 0, 0, 0);
++
++  CHECK_EQ(test.dCaf, test.dFalse);
++  CHECK_EQ(test.fCaf, test.fFalse);
++  CHECK_EQ(test.dCun, test.dTrue);
++  CHECK_EQ(test.fCun, test.fTrue);
++  CHECK_EQ(test.dCeq, test.dFalse);
++  CHECK_EQ(test.fCeq, test.fFalse);
++  CHECK_EQ(test.dCueq, test.dTrue);
++  CHECK_EQ(test.fCueq, test.fTrue);
++  CHECK_EQ(test.dClt, test.dFalse);
++  CHECK_EQ(test.fClt, test.fFalse);
++  CHECK_EQ(test.dCult, test.dTrue);
++  CHECK_EQ(test.fCult, test.fTrue);
++  CHECK_EQ(test.dCle, test.dFalse);
++  CHECK_EQ(test.fCle, test.fFalse);
++  CHECK_EQ(test.dCule, test.dTrue);
++  CHECK_EQ(test.fCule, test.fTrue);
++  CHECK_EQ(test.dCne, test.dFalse);
++  CHECK_EQ(test.fCne, test.fFalse);
++  CHECK_EQ(test.dCor, test.dFalse);
++  CHECK_EQ(test.fCor, test.fFalse);
++  CHECK_EQ(test.dCune, test.dTrue);
++  CHECK_EQ(test.fCune, test.fTrue);
++  /*  CHECK_EQ(test.dSaf, test.dTrue);
++    CHECK_EQ(test.fSaf, test.fTrue);
++    CHECK_EQ(test.dSun, test.dTrue);
++    CHECK_EQ(test.fSun, test.fTrue);
++    CHECK_EQ(test.dSeq, test.dFalse);
++    CHECK_EQ(test.fSeq, test.fFalse);
++    CHECK_EQ(test.dSueq, test.dTrue);
++    CHECK_EQ(test.fSueq, test.fTrue);
++    CHECK_EQ(test.dSlt, test.dFalse);
++    CHECK_EQ(test.fSlt, test.fFalse);
++    CHECK_EQ(test.dSult, test.dTrue);
++    CHECK_EQ(test.fSult, test.fTrue);
++    CHECK_EQ(test.dSle, test.dFalse);
++    CHECK_EQ(test.fSle, test.fFalse);
++    CHECK_EQ(test.dSule, test.dTrue);
++    CHECK_EQ(test.fSule, test.fTrue);
++    CHECK_EQ(test.dSne, test.dFalse);
++    CHECK_EQ(test.fSne, test.fFalse);
++    CHECK_EQ(test.dSor, test.dFalse);
++    CHECK_EQ(test.fSor, test.fFalse);
++    CHECK_EQ(test.dSune, test.dTrue);
++    CHECK_EQ(test.fSune, test.fTrue);*/
++}
++
++TEST(FCVT) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    float fcvt_d_s_in;
++    double fcvt_s_d_in;
++    double fcvt_d_s_out;
++    float fcvt_s_d_out;
++    int fcsr;
++  };
++  TestFloat test;
++  __ xor_(a4, a4, a4);
++  __ xor_(a5, a5, a5);
++  __ Ld_w(a4, MemOperand(a0, offsetof(TestFloat, fcsr)));
++  __ movfcsr2gr(a5);
++  __ movgr2fcsr(a4);
++  __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, fcvt_d_s_in)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, fcvt_s_d_in)));
++  __ fcvt_d_s(f10, f8);
++  __ fcvt_s_d(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, fcvt_d_s_out)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, fcvt_s_d_out)));
++  __ movgr2fcsr(a5);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test.fcsr = kRoundToZero;
++
++  test.fcvt_d_s_in = -0.51;
++  test.fcvt_s_d_in = -0.51;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.fcvt_d_s_out, static_cast<double>(test.fcvt_d_s_in));
++  CHECK_EQ(test.fcvt_s_d_out, static_cast<float>(test.fcvt_s_d_in));
++
++  test.fcvt_d_s_in = 0.49;
++  test.fcvt_s_d_in = 0.49;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.fcvt_d_s_out, static_cast<double>(test.fcvt_d_s_in));
++  CHECK_EQ(test.fcvt_s_d_out, static_cast<float>(test.fcvt_s_d_in));
++
++  test.fcvt_d_s_in = std::numeric_limits<float>::max();
++  test.fcvt_s_d_in = std::numeric_limits<double>::max();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.fcvt_d_s_out, static_cast<double>(test.fcvt_d_s_in));
++  CHECK_EQ(test.fcvt_s_d_out, static_cast<float>(test.fcvt_s_d_in));
++
++  test.fcvt_d_s_in = -std::numeric_limits<float>::max();
++  test.fcvt_s_d_in = -std::numeric_limits<double>::max();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.fcvt_d_s_out, static_cast<double>(test.fcvt_d_s_in));
++  CHECK_EQ(test.fcvt_s_d_out, static_cast<float>(test.fcvt_s_d_in));
++
++  test.fcvt_d_s_in = std::numeric_limits<float>::min();
++  test.fcvt_s_d_in = std::numeric_limits<double>::min();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.fcvt_d_s_out, static_cast<double>(test.fcvt_d_s_in));
++  CHECK_EQ(test.fcvt_s_d_out, static_cast<float>(test.fcvt_s_d_in));
++}
++
++TEST(FFINT) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    int32_t ffint_s_w_in;
++    int64_t ffint_s_l_in;
++    int32_t ffint_d_w_in;
++    int64_t ffint_d_l_in;
++    float ffint_s_w_out;
++    float ffint_s_l_out;
++    double ffint_d_w_out;
++    double ffint_d_l_out;
++    int fcsr;
++  };
++  TestFloat test;
++  __ xor_(a4, a4, a4);
++  __ xor_(a5, a5, a5);
++  __ Ld_w(a4, MemOperand(a0, offsetof(TestFloat, fcsr)));
++  __ movfcsr2gr(a5);
++  __ movgr2fcsr(a4);
++  __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, ffint_s_w_in)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, ffint_s_l_in)));
++  __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, ffint_d_w_in)));
++  __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, ffint_d_l_in)));
++  __ ffint_s_w(f12, f8);
++  __ ffint_s_l(f13, f9);
++  __ ffint_d_w(f14, f10);
++  __ ffint_d_l(f15, f11);
++  __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, ffint_s_w_out)));
++  __ Fst_s(f13, MemOperand(a0, offsetof(TestFloat, ffint_s_l_out)));
++  __ Fst_d(f14, MemOperand(a0, offsetof(TestFloat, ffint_d_w_out)));
++  __ Fst_d(f15, MemOperand(a0, offsetof(TestFloat, ffint_d_l_out)));
++  __ movgr2fcsr(a5);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test.fcsr = kRoundToZero;
++
++  test.ffint_s_w_in = -1;
++  test.ffint_s_l_in = -1;
++  test.ffint_d_w_in = -1;
++  test.ffint_d_l_in = -1;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.ffint_s_w_out, static_cast<float>(test.ffint_s_w_in));
++  CHECK_EQ(test.ffint_s_l_out, static_cast<float>(test.ffint_s_l_in));
++  CHECK_EQ(test.ffint_d_w_out, static_cast<double>(test.ffint_d_w_in));
++  CHECK_EQ(test.ffint_d_l_out, static_cast<double>(test.ffint_d_l_in));
++
++  test.ffint_s_w_in = 1;
++  test.ffint_s_l_in = 1;
++  test.ffint_d_w_in = 1;
++  test.ffint_d_l_in = 1;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.ffint_s_w_out, static_cast<float>(test.ffint_s_w_in));
++  CHECK_EQ(test.ffint_s_l_out, static_cast<float>(test.ffint_s_l_in));
++  CHECK_EQ(test.ffint_d_w_out, static_cast<double>(test.ffint_d_w_in));
++  CHECK_EQ(test.ffint_d_l_out, static_cast<double>(test.ffint_d_l_in));
++
++  test.ffint_s_w_in = std::numeric_limits<int32_t>::max();
++  test.ffint_s_l_in = std::numeric_limits<int64_t>::max();
++  test.ffint_d_w_in = std::numeric_limits<int32_t>::max();
++  test.ffint_d_l_in = std::numeric_limits<int64_t>::max();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.ffint_s_w_out, static_cast<float>(test.ffint_s_w_in));
++  CHECK_EQ(test.ffint_s_l_out, static_cast<float>(test.ffint_s_l_in));
++  CHECK_EQ(test.ffint_d_w_out, static_cast<double>(test.ffint_d_w_in));
++  CHECK_EQ(test.ffint_d_l_out, static_cast<double>(test.ffint_d_l_in));
++
++  test.ffint_s_w_in = std::numeric_limits<int32_t>::min();
++  test.ffint_s_l_in = std::numeric_limits<int64_t>::min();
++  test.ffint_d_w_in = std::numeric_limits<int32_t>::min();
++  test.ffint_d_l_in = std::numeric_limits<int64_t>::min();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.ffint_s_w_out, static_cast<float>(test.ffint_s_w_in));
++  CHECK_EQ(test.ffint_s_l_out, static_cast<float>(test.ffint_s_l_in));
++  CHECK_EQ(test.ffint_d_w_out, static_cast<double>(test.ffint_d_w_in));
++  CHECK_EQ(test.ffint_d_l_out, static_cast<double>(test.ffint_d_l_in));
++}
++
++TEST(FTINT) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    int32_t c;
++    int32_t d;
++    int64_t e;
++    int64_t f;
++    int fcsr;
++  };
++  Test test;
++
++  const int kTableLength = 9;
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++      };
++  float inputs_s[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++      };
++  double outputs_RN_W[kTableLength] = {
++      3.0, 4.0, 4.0, -3.0, -4.0, -4.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_RN_L[kTableLength] = {
++      3.0, 4.0, 4.0, -3.0, -4.0, -4.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  double outputs_RZ_W[kTableLength] = {
++      3.0, 3.0, 3.0, -3.0, -3.0, -3.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_RZ_L[kTableLength] = {
++      3.0, 3.0, 3.0, -3.0, -3.0, -3.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  double outputs_RP_W[kTableLength] = {
++      4.0, 4.0, 4.0, -3.0, -3.0, -3.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_RP_L[kTableLength] = {
++      4.0, 4.0, 4.0, -3.0, -3.0, -3.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  double outputs_RM_W[kTableLength] = {
++      3.0, 3.0, 3.0, -4.0, -4.0, -4.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_RM_L[kTableLength] = {
++      3.0, 3.0, 3.0, -4.0, -4.0, -4.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  // clang-format on
++
++  int fcsr_inputs[4] = {kRoundToNearest, kRoundToZero, kRoundToPlusInf,
++                        kRoundToMinusInf};
++  double* outputs[8] = {
++      outputs_RN_W, outputs_RN_L, outputs_RZ_W, outputs_RZ_L,
++      outputs_RP_W, outputs_RP_L, outputs_RM_W, outputs_RM_L,
++  };
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ xor_(a5, a5, a5);
++  __ Ld_w(a5, MemOperand(a0, offsetof(Test, fcsr)));
++  __ movfcsr2gr(a4);
++  __ movgr2fcsr(a5);
++  __ ftint_w_d(f10, f8);
++  __ ftint_w_s(f11, f9);
++  __ ftint_l_d(f12, f8);
++  __ ftint_l_s(f13, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(Test, f)));
++  __ movgr2fcsr(a4);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int j = 0; j < 4; j++) {
++    test.fcsr = fcsr_inputs[j];
++    for (int i = 0; i < kTableLength; i++) {
++      test.a = inputs_d[i];
++      test.b = inputs_s[i];
++      f.Call(&test, 0, 0, 0, 0);
++      CHECK_EQ(test.c, outputs[2 * j][i]);
++      CHECK_EQ(test.d, outputs[2 * j][i]);
++      CHECK_EQ(test.e, outputs[2 * j + 1][i]);
++      CHECK_EQ(test.f, outputs[2 * j + 1][i]);
++    }
++  }
++}
++
++TEST(FTINTRM) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    int32_t c;
++    int32_t d;
++    int64_t e;
++    int64_t f;
++  };
++  Test test;
++
++  const int kTableLength = 9;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  float inputs_s[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  double outputs_w[kTableLength] = {
++      3.0, 3.0, 3.0, -4.0, -4.0, -4.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_l[kTableLength] = {
++      3.0, 3.0, 3.0, -4.0, -4.0, -4.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ ftintrm_w_d(f10, f8);
++  __ ftintrm_w_s(f11, f9);
++  __ ftintrm_l_d(f12, f8);
++  __ ftintrm_l_s(f13, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(Test, f)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_d[i];
++    test.b = inputs_s[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.c, outputs_w[i]);
++    CHECK_EQ(test.d, outputs_w[i]);
++    CHECK_EQ(test.e, outputs_l[i]);
++    CHECK_EQ(test.f, outputs_l[i]);
++  }
++}
++
++TEST(FTINTRP) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    int32_t c;
++    int32_t d;
++    int64_t e;
++    int64_t f;
++  };
++  Test test;
++
++  const int kTableLength = 9;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  float inputs_s[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  double outputs_w[kTableLength] = {
++      4.0, 4.0, 4.0, -3.0, -3.0, -3.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_l[kTableLength] = {
++      4.0, 4.0, 4.0, -3.0, -3.0, -3.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ ftintrp_w_d(f10, f8);
++  __ ftintrp_w_s(f11, f9);
++  __ ftintrp_l_d(f12, f8);
++  __ ftintrp_l_s(f13, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(Test, f)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_d[i];
++    test.b = inputs_s[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.c, outputs_w[i]);
++    CHECK_EQ(test.d, outputs_w[i]);
++    CHECK_EQ(test.e, outputs_l[i]);
++    CHECK_EQ(test.f, outputs_l[i]);
++  }
++}
++
++TEST(FTINTRZ) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    int32_t c;
++    int32_t d;
++    int64_t e;
++    int64_t f;
++  };
++  Test test;
++
++  const int kTableLength = 9;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  float inputs_s[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  double outputs_w[kTableLength] = {
++      3.0, 3.0, 3.0, -3.0, -3.0, -3.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_l[kTableLength] = {
++      3.0, 3.0, 3.0, -3.0, -3.0, -3.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ ftintrz_w_d(f10, f8);
++  __ ftintrz_w_s(f11, f9);
++  __ ftintrz_l_d(f12, f8);
++  __ ftintrz_l_s(f13, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(Test, f)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_d[i];
++    test.b = inputs_s[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.c, outputs_w[i]);
++    CHECK_EQ(test.d, outputs_w[i]);
++    CHECK_EQ(test.e, outputs_l[i]);
++    CHECK_EQ(test.f, outputs_l[i]);
++  }
++}
++
++TEST(FTINTRNE) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    int32_t c;
++    int32_t d;
++    int64_t e;
++    int64_t f;
++  };
++  Test test;
++
++  const int kTableLength = 9;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  float inputs_s[kTableLength] = {
++      3.1, 3.6, 3.5, -3.1, -3.6, -3.5,
++      2147483648.0,
++      std::numeric_limits<double>::quiet_NaN(),
++      std::numeric_limits<double>::infinity()
++   };
++  double outputs_w[kTableLength] = {
++      3.0, 4.0, 4.0, -3.0, -4.0, -4.0,
++      kFPUInvalidResult, 0,
++      kFPUInvalidResult};
++  double outputs_l[kTableLength] = {
++      3.0, 4.0, 4.0, -3.0, -4.0, -4.0,
++      2147483648.0, 0,
++      kFPU64InvalidResult};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ ftintrne_w_d(f10, f8);
++  __ ftintrne_w_s(f11, f9);
++  __ ftintrne_l_d(f12, f8);
++  __ ftintrne_l_s(f13, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(Test, f)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_d[i];
++    test.b = inputs_s[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.c, outputs_w[i]);
++    CHECK_EQ(test.d, outputs_w[i]);
++    CHECK_EQ(test.e, outputs_l[i]);
++    CHECK_EQ(test.f, outputs_l[i]);
++  }
++}
++
++TEST(FRINT) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double a;
++    float b;
++    double c;
++    float d;
++    int fcsr;
++  };
++  Test test;
++
++  const int kTableLength = 32;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147,
++      1.7976931348623157E+308, 6.27463370218383111104242366943E-307,
++      309485009821345068724781056.89,
++      2.1, 2.6, 2.5, 3.1, 3.6, 3.5,
++      -2.1, -2.6, -2.5, -3.1, -3.6, -3.5,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<double>::max() - 0.1,
++      std::numeric_limits<double>::infinity()
++      };
++  float inputs_s[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37,
++      1.7976931348623157E+38, 6.27463370218383111104242366943E-37,
++      309485009821345068724781056.89,
++      2.1, 2.6, 2.5, 3.1, 3.6, 3.5,
++      -2.1, -2.6, -2.5, -3.1, -3.6, -3.5,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<float>::lowest() + 0.6,
++      std::numeric_limits<float>::infinity()
++      };
++  float outputs_RN_S[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37,
++      1.7976931348623157E38, 0,
++      309485009821345068724781057.0,
++      2.0, 3.0, 2.0, 3.0, 4.0, 4.0,
++      -2.0, -3.0, -2.0, -3.0, -4.0, -4.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++      };
++  double outputs_RN_D[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147,
++      1.7976931348623157E308, 0,
++      309485009821345068724781057.0,
++      2.0, 3.0, 2.0, 3.0, 4.0, 4.0,
++      -2.0, -3.0, -2.0, -3.0, -4.0, -4.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_RZ_S[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37,
++      1.7976931348623157E38, 0,
++      309485009821345068724781057.0,
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_RZ_D[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147,
++      1.7976931348623157E308, 0,
++      309485009821345068724781057.0,
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<double>::max() - 1,
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_RP_S[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37,
++      1.7976931348623157E38, 1,
++      309485009821345068724781057.0,
++      3.0, 3.0, 3.0, 4.0, 4.0, 4.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_RP_D[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147,
++      1.7976931348623157E308, 1,
++      309485009821345068724781057.0,
++      3.0, 3.0, 3.0, 4.0, 4.0, 4.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_RM_S[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37,
++      1.7976931348623157E38, 0,
++      309485009821345068724781057.0,
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -3.0, -3.0, -3.0, -4.0, -4.0, -4.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_RM_D[kTableLength] = {
++      18446744073709551617.0, 4503599627370496.0, -4503599627370496.0,
++      1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147,
++      1.7976931348623157E308, 0,
++      309485009821345068724781057.0,
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -3.0, -3.0, -3.0, -4.0, -4.0, -4.0,
++      37778931862957161709568.0, 37778931862957161709569.0,
++      37778931862957161709580.0, 37778931862957161709581.0,
++      37778931862957161709582.0, 37778931862957161709583.0,
++      37778931862957161709584.0, 37778931862957161709585.0,
++      37778931862957161709586.0, 37778931862957161709587.0,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  // clang-format on
++
++  int fcsr_inputs[4] = {kRoundToNearest, kRoundToZero, kRoundToPlusInf,
++                        kRoundToMinusInf};
++  double* outputs_d[4] = {outputs_RN_D, outputs_RZ_D, outputs_RP_D,
++                          outputs_RM_D};
++  float* outputs_s[4] = {outputs_RN_S, outputs_RZ_S, outputs_RP_S,
++                         outputs_RM_S};
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(Test, b)));
++  __ xor_(a5, a5, a5);
++  __ Ld_w(a5, MemOperand(a0, offsetof(Test, fcsr)));
++  __ movfcsr2gr(a4);
++  __ movgr2fcsr(a5);
++  __ frint_d(f10, f8);
++  __ frint_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(Test, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(Test, d)));
++  __ movgr2fcsr(a4);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int j = 0; j < 4; j++) {
++    test.fcsr = fcsr_inputs[j];
++    for (int i = 0; i < kTableLength; i++) {
++      test.a = inputs_d[i];
++      test.b = inputs_s[i];
++      f.Call(&test, 0, 0, 0, 0);
++      CHECK_EQ(test.c, outputs_d[j][i]);
++      CHECK_EQ(test.d, outputs_s[j][i]);
++    }
++  }
++}
++
++TEST(FMOV) {
++  const int kTableLength = 7;
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    double a;
++    float b;
++    double c;
++    float d;
++  };
++
++  TestFloat test;
++
++  // clang-format off
++  double inputs_D[kTableLength] = {
++    5.3, -5.3, 0.29, -0.29, 0,
++  std::numeric_limits<double>::max(),
++  -std::numeric_limits<double>::max()
++  };
++  float inputs_S[kTableLength] = {
++    4.8, -4.8, 0.29, -0.29, 0,
++  std::numeric_limits<float>::max(),
++  -std::numeric_limits<float>::max()
++  };
++
++  double outputs_D[kTableLength] = {
++    5.3, -5.3, 0.29, -0.29, 0,
++  std::numeric_limits<double>::max(),
++  -std::numeric_limits<double>::max()
++  };
++
++  float outputs_S[kTableLength] = {
++    4.8, -4.8, 0.29, -0.29, 0,
++  std::numeric_limits<float>::max(),
++  -std::numeric_limits<float>::max()
++  };
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ fmov_d(f10, f8);
++  __ fmov_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_D[i];
++    test.b = inputs_S[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.c, outputs_D[i]);
++    CHECK_EQ(test.d, outputs_S[i]);
++  }
++}
++
++TEST(LA14) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    double a;
++    double b;
++    double c;
++    double d;
++    int64_t high;
++    int64_t low;
++  };
++  T t;
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(T, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(T, b)));
++
++  __ movfr2gr_s(a4, f8);
++  __ movfrh2gr_s(a5, f8);
++  __ movfr2gr_d(a6, f9);
++
++  __ movgr2fr_w(f9, a4);
++  __ movgr2frh_w(f9, a5);
++  __ movgr2fr_d(f8, a6);
++
++  __ Fst_d(f8, MemOperand(a0, offsetof(T, a)));
++  __ Fst_d(f9, MemOperand(a0, offsetof(T, c)));
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(T, d)));
++  __ movfrh2gr_s(a4, f8);
++  __ movfr2gr_s(a5, f8);
++
++  __ St_d(a4, MemOperand(a0, offsetof(T, high)));
++  __ St_d(a5, MemOperand(a0, offsetof(T, low)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++
++  t.a = 1.5e22;
++  t.b = 2.75e11;
++  t.c = 17.17;
++  t.d = -2.75e11;
++  f.Call(&t, 0, 0, 0, 0);
++  CHECK_EQ(2.75e11, t.a);
++  CHECK_EQ(2.75e11, t.b);
++  CHECK_EQ(1.5e22, t.c);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFC25001D1L), t.high);
++  CHECK_EQ(static_cast<int64_t>(0xFFFFFFFFBF800000L), t.low);
++
++  t.a = -1.5e22;
++  t.b = -2.75e11;
++  t.c = 17.17;
++  t.d = 274999868928.0;
++  f.Call(&t, 0, 0, 0, 0);
++  CHECK_EQ(-2.75e11, t.a);
++  CHECK_EQ(-2.75e11, t.b);
++  CHECK_EQ(-1.5e22, t.c);
++  CHECK_EQ(static_cast<int64_t>(0x425001D1L), t.high);
++  CHECK_EQ(static_cast<int64_t>(0x3F800000L), t.low);
++}
++
++uint64_t run_bceqz(int fcc_value, int32_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0);
++  __ li(t0, fcc_value);
++  __ b(&main_block);
++  // Block 1
++  for (int32_t i = -104; i <= -55; ++i) {
++    __ addi_d(a2, a2, 0x1);
++  }
++  __ b(&L);
++
++  // Block 2
++  for (int32_t i = -53; i <= -4; ++i) {
++    __ addi_d(a2, a2, 0x10);
++  }
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ movcf2gr(t1, FCC0);
++  __ movgr2cf(FCC0, t0);
++  __ bceqz(FCC0, offset);
++  __ bind(&L);
++  __ movgr2cf(FCC0, t1);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  for (int32_t i = 4; i <= 53; ++i) {
++    __ addi_d(a2, a2, 0x100);
++  }
++  __ b(&L);
++
++  // Block 5
++  for (int32_t i = 55; i <= 104; ++i) {
++    __ addi_d(a2, a2, 0x300);
++  }
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BCEQZ) {
++  CcTest::InitializeVM();
++  struct TestCaseBceqz {
++    int fcc;
++    int32_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBceqz tc[] = {
++    // fcc, offset, expected_res
++    {    0,    -90,         0x24 },
++    {    0,    -27,        0x180 },
++    {    0,     47,        0x700 },
++    {    0,     70,       0x6900 },
++    {    1,    -27,            0 },
++    {    1,     47,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBceqz);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bceqz(tc[i].fcc, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++uint64_t run_bcnez(int fcc_value, int32_t offset) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label main_block, L;
++  __ li(a2, 0);
++  __ li(t0, fcc_value);
++  __ b(&main_block);
++  // Block 1
++  for (int32_t i = -104; i <= -55; ++i) {
++    __ addi_d(a2, a2, 0x1);
++  }
++  __ b(&L);
++
++  // Block 2
++  for (int32_t i = -53; i <= -4; ++i) {
++    __ addi_d(a2, a2, 0x10);
++  }
++  __ b(&L);
++
++  // Block 3 (Main)
++  __ bind(&main_block);
++  __ movcf2gr(t1, FCC0);
++  __ movgr2cf(FCC0, t0);
++  __ bcnez(FCC0, offset);
++  __ bind(&L);
++  __ movgr2cf(FCC0, t1);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  // Block 4
++  for (int32_t i = 4; i <= 53; ++i) {
++    __ addi_d(a2, a2, 0x100);
++  }
++  __ b(&L);
++
++  // Block 5
++  for (int32_t i = 55; i <= 104; ++i) {
++    __ addi_d(a2, a2, 0x300);
++  }
++  __ b(&L);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(BCNEZ) {
++  CcTest::InitializeVM();
++  struct TestCaseBcnez {
++    int fcc;
++    int32_t offset;
++    uint64_t expected_res;
++  };
++
++  // clang-format off
++  struct TestCaseBcnez tc[] = {
++    // fcc, offset, expected_res
++    {    1,    -90,         0x24 },
++    {    1,    -27,        0x180 },
++    {    1,     47,        0x700 },
++    {    1,     70,       0x6900 },
++    {    0,    -27,            0 },
++    {    0,     47,            0 },
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBcnez);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_bcnez(tc[i].fcc, tc[i].offset);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++TEST(jump_tables1) {
++  // Test jump tables with forward jumps.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  const int kNumCases = 512;
++  int values[kNumCases];
++  isolate->random_number_generator()->NextBytes(values, sizeof(values));
++  Label labels[kNumCases];
++
++  __ addi_d(sp, sp, -8);
++  __ St_d(ra, MemOperand(sp, 0));
++  __ Align(8);
++
++  Label done;
++  {
++    __ BlockTrampolinePoolFor(kNumCases * 2 + 6);
++    __ pcaddi(ra, 2);
++    __ slli_d(t7, a0, 3);
++    __ add_d(t7, t7, ra);
++    __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize));
++    __ jirl(zero_reg, t7, 0);
++    __ nop();
++    for (int i = 0; i < kNumCases; ++i) {
++      __ dd(&labels[i]);
++    }
++  }
++
++  for (int i = 0; i < kNumCases; ++i) {
++    __ bind(&labels[i]);
++    __ lu12i_w(a2, (values[i] >> 12) & 0xFFFFF);
++    __ ori(a2, a2, values[i] & 0xFFF);
++    __ b(&done);
++    __ nop();
++  }
++
++  __ bind(&done);
++  __ Ld_d(ra, MemOperand(sp, 0));
++  __ addi_d(sp, sp, 8);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CHECK_EQ(0, assm.UnboundLabelsCount());
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  for (int i = 0; i < kNumCases; ++i) {
++    int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
++    ::printf("f(%d) = %" PRId64 "\n", i, res);
++    CHECK_EQ((values[i]), static_cast<int>(res));
++  }
++}
++
++TEST(jump_tables2) {
++  // Test jump tables with backward jumps.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  const int kNumCases = 512;
++  int values[kNumCases];
++  isolate->random_number_generator()->NextBytes(values, sizeof(values));
++  Label labels[kNumCases];
++
++  __ addi_d(sp, sp, -8);
++  __ St_d(ra, MemOperand(sp, 0));
++
++  Label done, dispatch;
++  __ b(&dispatch);
++  __ nop();
++
++  for (int i = 0; i < kNumCases; ++i) {
++    __ bind(&labels[i]);
++    __ lu12i_w(a2, (values[i] >> 12) & 0xFFFFF);
++    __ ori(a2, a2, values[i] & 0xFFF);
++    __ b(&done);
++    __ nop();
++  }
++
++  __ Align(8);
++  __ bind(&dispatch);
++  {
++    __ BlockTrampolinePoolFor(kNumCases * 2 + 6);
++    __ pcaddi(ra, 2);
++    __ slli_d(t7, a0, 3);
++    __ add_d(t7, t7, ra);
++    __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize));
++    __ jirl(zero_reg, t7, 0);
++    __ nop();
++    for (int i = 0; i < kNumCases; ++i) {
++      __ dd(&labels[i]);
++    }
++  }
++
++  __ bind(&done);
++  __ Ld_d(ra, MemOperand(sp, 0));
++  __ addi_d(sp, sp, 8);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  for (int i = 0; i < kNumCases; ++i) {
++    int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
++    ::printf("f(%d) = %" PRId64 "\n", i, res);
++    CHECK_EQ(values[i], res);
++  }
++}
++
++TEST(jump_tables3) {
++  // Test jump tables with backward jumps and embedded heap objects.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  const int kNumCases = 512;
++  Handle<Object> values[kNumCases];
++  for (int i = 0; i < kNumCases; ++i) {
++    double value = isolate->random_number_generator()->NextDouble();
++    values[i] = isolate->factory()->NewHeapNumber<AllocationType::kOld>(value);
++  }
++  Label labels[kNumCases];
++  Object obj;
++  int64_t imm64;
++
++  __ addi_d(sp, sp, -8);
++  __ St_d(ra, MemOperand(sp, 0));
++
++  Label done, dispatch;
++  __ b(&dispatch);
++  __ nop();
++
++  for (int i = 0; i < kNumCases; ++i) {
++    __ bind(&labels[i]);
++    obj = *values[i];
++    imm64 = obj.ptr();
++    __ lu12i_w(a2, (imm64 >> 12) & 0xFFFFF);
++    __ ori(a2, a2, imm64 & 0xFFF);
++    __ lu32i_d(a2, (imm64 >> 32) & 0xFFFFF);
++    __ lu52i_d(a2, a2, (imm64 >> 52) & 0xFFF);
++    __ b(&done);
++  }
++
++  __ Align(8);
++  __ bind(&dispatch);
++  {
++    __ BlockTrampolinePoolFor(kNumCases * 2 + 6);
++    __ pcaddi(ra, 2);
++    __ slli_d(t7, a0, 3);  // In delay slot.
++    __ add_d(t7, t7, ra);
++    __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize));
++    __ jirl(zero_reg, t7, 0);
++    __ nop();
++    for (int i = 0; i < kNumCases; ++i) {
++      __ dd(&labels[i]);
++    }
++  }
++  __ bind(&done);
++  __ Ld_d(ra, MemOperand(sp, 0));
++  __ addi_d(sp, sp, 8);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  for (int i = 0; i < kNumCases; ++i) {
++    Handle<Object> result(
++        Object(reinterpret_cast<Address>(f.Call(i, 0, 0, 0, 0))), isolate);
++#ifdef OBJECT_PRINT
++    ::printf("f(%d) = ", i);
++    result->Print(std::cout);
++    ::printf("\n");
++#endif
++    CHECK(values[i].is_identical_to(result));
++  }
++}
++
++uint64_t run_li_macro(int64_t imm, LiFlags mode, int32_t num_instr = 0) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  Label code_start;
++  __ bind(&code_start);
++  __ li(a2, imm, mode);
++  if (num_instr > 0) {
++    CHECK_EQ(assm.InstructionsGeneratedSince(&code_start), num_instr);
++    CHECK_EQ(__ InstrCountForLi64Bit(imm), num_instr);
++  }
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F2>::FromCode(*code);
++
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(li_macro) {
++  CcTest::InitializeVM();
++
++  // Test li macro-instruction for border cases.
++
++  struct TestCase_li {
++    uint64_t imm;
++    int32_t num_instr;
++  };
++  // clang-format off
++  struct TestCase_li tc[] = {
++      //              imm, num_instr
++      {0xFFFFFFFFFFFFF800,         1},  // min_int12
++      // The test case above generates addi_d instruction.
++      // This is int12 value and we can load it using just addi_d.
++      {             0x800,         1},  // max_int12 + 1
++      // Generates ori
++      // max_int12 + 1 is not int12 but is uint12, just use ori.
++      {0xFFFFFFFFFFFFF7FF,         2},  // min_int12 - 1
++      // Generates lu12i + ori
++      // We load int32 value using lu12i_w + ori.
++      {             0x801,         1},  // max_int12 + 2
++      // Generates ori
++      // Also an uint1 value, use ori.
++      {        0x00001000,         1},  // max_uint12 + 1
++      // Generates lu12i_w
++      // Low 12 bits are 0, load value using lu12i_w.
++      {        0x00001001,         2},  // max_uint12 + 2
++      // Generates lu12i_w + ori
++      // We have to generate two instructions in this case.
++      {0x00000000FFFFFFFF,         2},  // max_uint32
++      // addi_w + lu32i_d
++      {0x00000000FFFFFFFE,         2},  // max_uint32 - 1
++      // addi_w + lu32i_d
++      {0xFFFFFFFF80000000,         1},  // min_int32
++      // lu12i_w
++      {0x0000000080000000,         2},  // max_int32 + 1
++      // lu12i_w + lu32i_d
++      {0xFFFF0000FFFF8765,         3},
++      // lu12i_w + ori + lu32i_d
++      {0x1234ABCD87654321,         4},
++      // lu12i_w + ori + lu32i_d + lu52i_d
++      {0xFFFF789100000000,         2},
++      // xor + lu32i_d
++      {0xF12F789100000000,         3},
++      // xor + lu32i_d + lu52i_d
++      {0xF120000000000800,         2},
++      // ori + lu52i_d
++      {0xFFF0000000000000,         1},
++      // lu52i_d
++      {0xF100000000000000,         1},
++      {0x0122000000000000,         2},
++      {0x1234FFFF77654321,         4},
++      {0x1230000077654321,         3},
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCase_li);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    CHECK_EQ(tc[i].imm,
++             run_li_macro(tc[i].imm, OPTIMIZE_SIZE, tc[i].num_instr));
++    CHECK_EQ(tc[i].imm, run_li_macro(tc[i].imm, CONSTANT_SIZE));
++    if (is_int48(tc[i].imm)) {
++      CHECK_EQ(tc[i].imm, run_li_macro(tc[i].imm, ADDRESS_LOAD));
++    }
++  }
++}
++
++TEST(FMIN_FMAX) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    double a;
++    double b;
++    float c;
++    float d;
++    double e;
++    double f;
++    float g;
++    float h;
++  };
++
++  TestFloat test;
++  const double dnan = std::numeric_limits<double>::quiet_NaN();
++  const double dinf = std::numeric_limits<double>::infinity();
++  const double dminf = -std::numeric_limits<double>::infinity();
++  const float fnan = std::numeric_limits<float>::quiet_NaN();
++  const float finf = std::numeric_limits<float>::infinity();
++  const float fminf = -std::numeric_limits<float>::infinity();
++  const int kTableLength = 13;
++
++  // clang-format off
++  double inputsa[kTableLength] = {2.0,  3.0,  dnan, 3.0,   -0.0, 0.0, dinf,
++                                  dnan, 42.0, dinf, dminf, dinf, dnan};
++  double inputsb[kTableLength] = {3.0,  2.0,  3.0,  dnan, 0.0,   -0.0, dnan,
++                                  dinf, dinf, 42.0, dinf, dminf, dnan};
++  double outputsdmin[kTableLength] = {2.0,   2.0,   3.0,  3.0,  -0.0,
++                                      -0.0,  dinf,  dinf, 42.0, 42.0,
++                                      dminf, dminf, dnan};
++  double outputsdmax[kTableLength] = {3.0,  3.0,  3.0,  3.0,  0.0,  0.0, dinf,
++                                      dinf, dinf, dinf, dinf, dinf, dnan};
++
++  float inputsc[kTableLength] = {2.0,  3.0,  fnan, 3.0,   -0.0, 0.0, finf,
++                                 fnan, 42.0, finf, fminf, finf, fnan};
++  float inputsd[kTableLength] = {3.0,  2.0,  3.0,  fnan, 0.0,   -0.0, fnan,
++                                 finf, finf, 42.0, finf, fminf, fnan};
++  float outputsfmin[kTableLength] = {2.0,   2.0,   3.0,  3.0,  -0.0,
++                                     -0.0,  finf,  finf, 42.0, 42.0,
++                                     fminf, fminf, fnan};
++  float outputsfmax[kTableLength] = {3.0,  3.0,  3.0,  3.0,  0.0,  0.0, finf,
++                                     finf, finf, finf, finf, finf, fnan};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ fmin_d(f12, f8, f9);
++  __ fmax_d(f13, f8, f9);
++  __ fmin_s(f14, f10, f11);
++  __ fmax_s(f15, f10, f11);
++  __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, e)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, f)));
++  __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, g)));
++  __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, h)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 4; i < kTableLength; i++) {
++    test.a = inputsa[i];
++    test.b = inputsb[i];
++    test.c = inputsc[i];
++    test.d = inputsd[i];
++
++    f.Call(&test, 0, 0, 0, 0);
++
++    CHECK_EQ(0, memcmp(&test.e, &outputsdmin[i], sizeof(test.e)));
++    CHECK_EQ(0, memcmp(&test.f, &outputsdmax[i], sizeof(test.f)));
++    CHECK_EQ(0, memcmp(&test.g, &outputsfmin[i], sizeof(test.g)));
++    CHECK_EQ(0, memcmp(&test.h, &outputsfmax[i], sizeof(test.h)));
++  }
++}
++
++TEST(FMINA_FMAXA) {
++  const int kTableLength = 23;
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  const double dnan = std::numeric_limits<double>::quiet_NaN();
++  const double dinf = std::numeric_limits<double>::infinity();
++  const double dminf = -std::numeric_limits<double>::infinity();
++  const float fnan = std::numeric_limits<float>::quiet_NaN();
++  const float finf = std::numeric_limits<float>::infinity();
++  const float fminf = std::numeric_limits<float>::infinity();
++
++  struct TestFloat {
++    double a;
++    double b;
++    double resd1;
++    double resd2;
++    float c;
++    float d;
++    float resf1;
++    float resf2;
++  };
++
++  TestFloat test;
++  // clang-format off
++  double inputsa[kTableLength] = {
++        5.3,  4.8, 6.1,  9.8, 9.8,  9.8,  -10.0, -8.9, -9.8,  -10.0, -8.9, -9.8,
++    dnan, 3.0, -0.0, 0.0, dinf, dnan, 42.0,  dinf, dminf, dinf,  dnan};
++  double inputsb[kTableLength] = {
++        4.8, 5.3,  6.1, -10.0, -8.9, -9.8, 9.8,  9.8,  9.8,  -9.8,  -11.2, -9.8,
++    3.0, dnan, 0.0, -0.0,  dnan, dinf, dinf, 42.0, dinf, dminf, dnan};
++  double resd1[kTableLength] = {
++        4.8, 4.8, 6.1,  9.8,  -8.9, -9.8, 9.8,  -8.9, -9.8,  -9.8,  -8.9, -9.8,
++    3.0, 3.0, -0.0, -0.0, dinf, dinf, 42.0, 42.0, dminf, dminf, dnan};
++  double resd2[kTableLength] = {
++        5.3, 5.3, 6.1, -10.0, 9.8,  9.8,  -10.0, 9.8,  9.8,  -10.0, -11.2, -9.8,
++    3.0, 3.0, 0.0, 0.0,   dinf, dinf, dinf,  dinf, dinf, dinf,  dnan};
++  float inputsc[kTableLength] = {
++        5.3,  4.8, 6.1,  9.8, 9.8,  9.8,  -10.0, -8.9, -9.8,  -10.0, -8.9, -9.8,
++    fnan, 3.0, -0.0, 0.0, finf, fnan, 42.0,  finf, fminf, finf,  fnan};
++  float inputsd[kTableLength] = {
++        4.8,  5.3,  6.1, -10.0, -8.9,  -9.8, 9.8, 9.8,  9.8,  -9.8,  -11.2, -9.8,
++    3.0,  fnan, -0.0, 0.0, fnan, finf, finf, 42.0, finf, fminf, fnan};
++  float resf1[kTableLength] = {
++        4.8, 4.8, 6.1,  9.8,  -8.9, -9.8, 9.8,  -8.9, -9.8,  -9.8,  -8.9, -9.8,
++    3.0, 3.0, -0.0, -0.0, finf, finf, 42.0, 42.0, fminf, fminf, fnan};
++  float resf2[kTableLength] = {
++        5.3, 5.3, 6.1, -10.0, 9.8,  9.8,  -10.0, 9.8,  9.8,  -10.0, -11.2, -9.8,
++    3.0, 3.0, 0.0, 0.0,   finf, finf, finf,  finf, finf, finf,  fnan};
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ fmina_d(f12, f8, f9);
++  __ fmaxa_d(f13, f8, f9);
++  __ fmina_s(f14, f10, f11);
++  __ fmaxa_s(f15, f10, f11);
++  __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, resd1)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resd2)));
++  __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, resf1)));
++  __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, resf2)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputsa[i];
++    test.b = inputsb[i];
++    test.c = inputsc[i];
++    test.d = inputsd[i];
++    f.Call(&test, 0, 0, 0, 0);
++    if (i < kTableLength - 1) {
++      CHECK_EQ(test.resd1, resd1[i]);
++      CHECK_EQ(test.resd2, resd2[i]);
++      CHECK_EQ(test.resf1, resf1[i]);
++      CHECK_EQ(test.resf2, resf2[i]);
++    } else {
++      CHECK(std::isnan(test.resd1));
++      CHECK(std::isnan(test.resd2));
++      CHECK(std::isnan(test.resf1));
++      CHECK(std::isnan(test.resf2));
++    }
++  }
++}
++
++TEST(FADD) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    double a;
++    double b;
++    double c;
++    float d;
++    float e;
++    float f;
++  };
++
++  TestFloat test;
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ fadd_d(f10, f8, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, c)));
++
++  __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ Fld_s(f12, MemOperand(a0, offsetof(TestFloat, e)));
++  __ fadd_s(f13, f11, f12);
++  __ Fst_s(f13, MemOperand(a0, offsetof(TestFloat, f)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test.a = 2.0;
++  test.b = 3.0;
++  test.d = 2.0;
++  test.e = 3.0;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.c, 5.0);
++  CHECK_EQ(test.f, 5.0);
++
++  test.a = std::numeric_limits<double>::max();
++  test.b = -std::numeric_limits<double>::max();  // lowest()
++  test.d = std::numeric_limits<float>::max();
++  test.e = -std::numeric_limits<float>::max();  // lowest()
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.c, 0.0);
++  CHECK_EQ(test.f, 0.0);
++
++  test.a = std::numeric_limits<double>::max();
++  test.b = std::numeric_limits<double>::max();
++  test.d = std::numeric_limits<float>::max();
++  test.e = std::numeric_limits<float>::max();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(!std::isfinite(test.c));
++  CHECK(!std::isfinite(test.f));
++
++  test.a = 5.0;
++  test.b = std::numeric_limits<double>::signaling_NaN();
++  test.d = 5.0;
++  test.e = std::numeric_limits<float>::signaling_NaN();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(std::isnan(test.c));
++  CHECK(std::isnan(test.f));
++}
++
++TEST(FSUB) {
++  const int kTableLength = 12;
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    float a;
++    float b;
++    float resultS;
++    double c;
++    double d;
++    double resultD;
++  };
++
++  TestFloat test;
++
++  // clang-format off
++  double inputfs_D[kTableLength] = {
++    5.3, 4.8, 2.9, -5.3, -4.8, -2.9,
++    5.3, 4.8, 2.9, -5.3, -4.8, -2.9
++  };
++  double inputft_D[kTableLength] = {
++    4.8, 5.3, 2.9, 4.8, 5.3, 2.9,
++    -4.8, -5.3, -2.9, -4.8, -5.3, -2.9
++  };
++  double outputs_D[kTableLength] = {
++    0.5, -0.5, 0.0, -10.1, -10.1, -5.8,
++    10.1, 10.1, 5.8, -0.5, 0.5, 0.0
++  };
++  float inputfs_S[kTableLength] = {
++    5.3, 4.8, 2.9, -5.3, -4.8, -2.9,
++    5.3, 4.8, 2.9, -5.3, -4.8, -2.9
++  };
++  float inputft_S[kTableLength] = {
++    4.8, 5.3, 2.9, 4.8, 5.3, 2.9,
++    -4.8, -5.3, -2.9, -4.8, -5.3, -2.9
++  };
++  float outputs_S[kTableLength] = {
++    0.5, -0.5, 0.0, -10.1, -10.1, -5.8,
++    10.1, 10.1, 5.8, -0.5, 0.5, 0.0
++  };
++  // clang-format on
++
++  __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ Fld_d(f10, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ fsub_s(f12, f8, f9);
++  __ fsub_d(f13, f10, f11);
++  __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputfs_S[i];
++    test.b = inputft_S[i];
++    test.c = inputfs_D[i];
++    test.d = inputft_D[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.resultS, outputs_S[i]);
++    CHECK_EQ(test.resultD, outputs_D[i]);
++  }
++}
++
++TEST(FMUL) {
++  const int kTableLength = 4;
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    float a;
++    float b;
++    float resultS;
++    double c;
++    double d;
++    double resultD;
++  };
++
++  TestFloat test;
++  // clang-format off
++  double inputfs_D[kTableLength] = {
++    5.3, -5.3, 5.3, -2.9
++  };
++  double inputft_D[kTableLength] = {
++    4.8, 4.8, -4.8, -0.29
++  };
++
++  float inputfs_S[kTableLength] = {
++    5.3, -5.3, 5.3, -2.9
++  };
++  float inputft_S[kTableLength] = {
++    4.8, 4.8, -4.8, -0.29
++  };
++  // clang-format on
++  __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ Fld_d(f10, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, d)));
++  __ fmul_s(f12, f8, f9);
++  __ fmul_d(f13, f10, f11);
++  __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputfs_S[i];
++    test.b = inputft_S[i];
++    test.c = inputfs_D[i];
++    test.d = inputft_D[i];
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.resultS, inputfs_S[i] * inputft_S[i]);
++    CHECK_EQ(test.resultD, inputfs_D[i] * inputft_D[i]);
++  }
++}
++
++TEST(FDIV) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct Test {
++    double dOp1;
++    double dOp2;
++    double dRes;
++    float fOp1;
++    float fOp2;
++    float fRes;
++  };
++
++  Test test;
++
++  __ movfcsr2gr(a4);
++  __ movgr2fcsr(zero_reg);
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(Test, dOp1)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(Test, dOp2)));
++  __ Fld_s(f10, MemOperand(a0, offsetof(Test, fOp1)));
++  __ Fld_s(f11, MemOperand(a0, offsetof(Test, fOp2)));
++  __ fdiv_d(f12, f8, f9);
++  __ fdiv_s(f13, f10, f11);
++  __ Fst_d(f12, MemOperand(a0, offsetof(Test, dRes)));
++  __ Fst_s(f13, MemOperand(a0, offsetof(Test, fRes)));
++
++  __ movgr2fcsr(a4);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  f.Call(&test, 0, 0, 0, 0);
++  const int test_size = 3;
++  // clang-format off
++  double dOp1[test_size] = {
++    5.0,  DBL_MAX,  DBL_MAX};
++
++  double dOp2[test_size] = {
++    2.0,  2.0,  -DBL_MAX};
++
++  double dRes[test_size] = {
++    2.5,  DBL_MAX / 2.0,  -1.0};
++
++  float fOp1[test_size] = {
++    5.0,  FLT_MAX,  FLT_MAX};
++
++  float fOp2[test_size] = {
++    2.0,  2.0,  -FLT_MAX};
++
++  float fRes[test_size] = {
++    2.5,  FLT_MAX / 2.0,  -1.0};
++  // clang-format on
++
++  for (int i = 0; i < test_size; i++) {
++    test.dOp1 = dOp1[i];
++    test.dOp2 = dOp2[i];
++    test.fOp1 = fOp1[i];
++    test.fOp2 = fOp2[i];
++
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.dRes, dRes[i]);
++    CHECK_EQ(test.fRes, fRes[i]);
++  }
++
++  test.dOp1 = DBL_MAX;
++  test.dOp2 = -0.0;
++  test.fOp1 = FLT_MAX;
++  test.fOp2 = -0.0;
++
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(!std::isfinite(test.dRes));
++  CHECK(!std::isfinite(test.fRes));
++
++  test.dOp1 = 0.0;
++  test.dOp2 = -0.0;
++  test.fOp1 = 0.0;
++  test.fOp2 = -0.0;
++
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(std::isnan(test.dRes));
++  CHECK(std::isnan(test.fRes));
++
++  test.dOp1 = std::numeric_limits<double>::quiet_NaN();
++  test.dOp2 = -5.0;
++  test.fOp1 = std::numeric_limits<float>::quiet_NaN();
++  test.fOp2 = -5.0;
++
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(std::isnan(test.dRes));
++  CHECK(std::isnan(test.fRes));
++}
++
++TEST(FABS) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    double a;
++    float b;
++  };
++
++  TestFloat test;
++
++  __ movfcsr2gr(a4);
++  __ movgr2fcsr(zero_reg);
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ fabs_d(f10, f8);
++  __ fabs_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, b)));
++
++  __ movgr2fcsr(a4);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  test.a = -2.0;
++  test.b = -2.0;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, 2.0);
++  CHECK_EQ(test.b, 2.0);
++
++  test.a = 2.0;
++  test.b = 2.0;
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, 2.0);
++  CHECK_EQ(test.b, 2.0);
++
++  // Testing biggest positive number
++  test.a = std::numeric_limits<double>::max();
++  test.b = std::numeric_limits<float>::max();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, std::numeric_limits<double>::max());
++  CHECK_EQ(test.b, std::numeric_limits<float>::max());
++
++  // Testing smallest negative number
++  test.a = -std::numeric_limits<double>::max();  // lowest()
++  test.b = -std::numeric_limits<float>::max();   // lowest()
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, std::numeric_limits<double>::max());
++  CHECK_EQ(test.b, std::numeric_limits<float>::max());
++
++  // Testing smallest positive number
++  test.a = -std::numeric_limits<double>::min();
++  test.b = -std::numeric_limits<float>::min();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, std::numeric_limits<double>::min());
++  CHECK_EQ(test.b, std::numeric_limits<float>::min());
++
++  // Testing infinity
++  test.a =
++      -std::numeric_limits<double>::max() / std::numeric_limits<double>::min();
++  test.b =
++      -std::numeric_limits<float>::max() / std::numeric_limits<float>::min();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK_EQ(test.a, std::numeric_limits<double>::max() /
++                       std::numeric_limits<double>::min());
++  CHECK_EQ(test.b, std::numeric_limits<float>::max() /
++                       std::numeric_limits<float>::min());
++
++  test.a = std::numeric_limits<double>::quiet_NaN();
++  test.b = std::numeric_limits<float>::quiet_NaN();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(std::isnan(test.a));
++  CHECK(std::isnan(test.b));
++
++  test.a = std::numeric_limits<double>::signaling_NaN();
++  test.b = std::numeric_limits<float>::signaling_NaN();
++  f.Call(&test, 0, 0, 0, 0);
++  CHECK(std::isnan(test.a));
++  CHECK(std::isnan(test.b));
++}
++
++template <class T>
++struct TestCaseMaddMsub {
++  T fj, fk, fa, fd_fmadd, fd_fmsub, fd_fnmadd, fd_fnmsub;
++};
++
++template <typename T, typename F>
++void helper_fmadd_fmsub_fnmadd_fnmsub(F func) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  T x = std::sqrt(static_cast<T>(2.0));
++  T y = std::sqrt(static_cast<T>(3.0));
++  T z = std::sqrt(static_cast<T>(5.0));
++  T x2 = 11.11, y2 = 22.22, z2 = 33.33;
++  // clang-format off
++  TestCaseMaddMsub<T> test_cases[] = {
++      {x, y, z, 0.0, 0.0, 0.0, 0.0},
++      {x, y, -z, 0.0, 0.0, 0.0, 0.0},
++      {x, -y, z, 0.0, 0.0, 0.0, 0.0},
++      {x, -y, -z, 0.0, 0.0, 0.0, 0.0},
++      {-x, y, z, 0.0, 0.0, 0.0, 0.0},
++      {-x, y, -z, 0.0, 0.0, 0.0, 0.0},
++      {-x, -y, z, 0.0, 0.0, 0.0, 0.0},
++      {-x, -y, -z, 0.0, 0.0, 0.0, 0.0},
++      {-3.14, 0.2345, -123.000056, 0.0, 0.0, 0.0, 0.0},
++      {7.3, -23.257, -357.1357, 0.0, 0.0, 0.0, 0.0},
++      {x2, y2, z2, 0.0, 0.0, 0.0, 0.0},
++      {x2, y2, -z2, 0.0, 0.0, 0.0, 0.0},
++      {x2, -y2, z2, 0.0, 0.0, 0.0, 0.0},
++      {x2, -y2, -z2, 0.0, 0.0, 0.0, 0.0},
++      {-x2, y2, z2, 0.0, 0.0, 0.0, 0.0},
++      {-x2, y2, -z2, 0.0, 0.0, 0.0, 0.0},
++      {-x2, -y2, z2, 0.0, 0.0, 0.0, 0.0},
++      {-x2, -y2, -z2, 0.0, 0.0, 0.0, 0.0},
++  };
++  // clang-format on
++  if (std::is_same<T, float>::value) {
++    __ Fld_s(f8, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fj)));
++    __ Fld_s(f9, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fk)));
++    __ Fld_s(f10, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fa)));
++  } else if (std::is_same<T, double>::value) {
++    __ Fld_d(f8, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fj)));
++    __ Fld_d(f9, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fk)));
++    __ Fld_d(f10, MemOperand(a0, offsetof(TestCaseMaddMsub<T>, fa)));
++  } else {
++    UNREACHABLE();
++  }
++
++  func(assm);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++
++  const size_t kTableLength = sizeof(test_cases) / sizeof(TestCaseMaddMsub<T>);
++  TestCaseMaddMsub<T> tc;
++  for (size_t i = 0; i < kTableLength; i++) {
++    tc.fj = test_cases[i].fj;
++    tc.fk = test_cases[i].fk;
++    tc.fa = test_cases[i].fa;
++
++    f.Call(&tc, 0, 0, 0, 0);
++
++    T res_fmadd;
++    T res_fmsub;
++    T res_fnmadd;
++    T res_fnmsub;
++    res_fmadd = std::fma(tc.fj, tc.fk, tc.fa);
++    res_fmsub = std::fma(tc.fj, tc.fk, -tc.fa);
++    res_fnmadd = -std::fma(tc.fj, tc.fk, tc.fa);
++    res_fnmsub = -std::fma(tc.fj, tc.fk, -tc.fa);
++
++    CHECK_EQ(tc.fd_fmadd, res_fmadd);
++    CHECK_EQ(tc.fd_fmsub, res_fmsub);
++    CHECK_EQ(tc.fd_fnmadd, res_fnmadd);
++    CHECK_EQ(tc.fd_fnmsub, res_fnmsub);
++  }
++}
++
++TEST(FMADD_FMSUB_FNMADD_FNMSUB_S) {
++  helper_fmadd_fmsub_fnmadd_fnmsub<float>([](MacroAssembler& assm) {
++    __ fmadd_s(f11, f8, f9, f10);
++    __ Fst_s(f11, MemOperand(a0, offsetof(TestCaseMaddMsub<float>, fd_fmadd)));
++    __ fmsub_s(f12, f8, f9, f10);
++    __ Fst_s(f12, MemOperand(a0, offsetof(TestCaseMaddMsub<float>, fd_fmsub)));
++    __ fnmadd_s(f13, f8, f9, f10);
++    __ Fst_s(f13, MemOperand(a0, offsetof(TestCaseMaddMsub<float>, fd_fnmadd)));
++    __ fnmsub_s(f14, f8, f9, f10);
++    __ Fst_s(f14, MemOperand(a0, offsetof(TestCaseMaddMsub<float>, fd_fnmsub)));
++  });
++}
++
++TEST(FMADD_FMSUB_FNMADD_FNMSUB_D) {
++  helper_fmadd_fmsub_fnmadd_fnmsub<double>([](MacroAssembler& assm) {
++    __ fmadd_d(f11, f8, f9, f10);
++    __ Fst_d(f11, MemOperand(a0, offsetof(TestCaseMaddMsub<double>, fd_fmadd)));
++    __ fmsub_d(f12, f8, f9, f10);
++    __ Fst_d(f12, MemOperand(a0, offsetof(TestCaseMaddMsub<double>, fd_fmsub)));
++    __ fnmadd_d(f13, f8, f9, f10);
++    __ Fst_d(f13,
++             MemOperand(a0, offsetof(TestCaseMaddMsub<double>, fd_fnmadd)));
++    __ fnmsub_d(f14, f8, f9, f10);
++    __ Fst_d(f14,
++             MemOperand(a0, offsetof(TestCaseMaddMsub<double>, fd_fnmsub)));
++  });
++}
++
++/*
++TEST(FSQRT_FRSQRT_FRECIP) {
++  const int kTableLength = 4;
++  const double deltaDouble = 2E-15;
++  const float deltaFloat = 2E-7;
++  const float sqrt2_s = sqrt(2);
++  const double sqrt2_d = sqrt(2);
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  struct TestFloat {
++    float a;
++    float resultS1;
++    float resultS2;
++    float resultS3;
++    double b;
++    double resultD1;
++    double resultD2;
++    double resultD3;
++  };
++  TestFloat test;
++  // clang-format off
++  double inputs_D[kTableLength] = {
++    0.0L, 4.0L, 2.0L, 4e-28L
++  };
++
++  double outputs_D[kTableLength] = {
++    0.0L, 2.0L, sqrt2_d, 2e-14L
++  };
++  float inputs_S[kTableLength] = {
++    0.0, 4.0, 2.0, 4e-28
++  };
++
++  float outputs_S[kTableLength] = {
++    0.0, 2.0, sqrt2_s, 2e-14
++  };
++  // clang-format on
++  __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ fsqrt_s(f10, f8);
++  __ fsqrt_d(f11, f9);
++  __ frsqrt_s(f12, f8);
++  __ frsqrt_d(f13, f9);
++  __ frecip_s(f14, f8);
++  __ frecip_d(f15, f9);
++  __ Fst_s(f10, MemOperand(a0, offsetof(TestFloat, resultS1)));
++  __ Fst_d(f11, MemOperand(a0, offsetof(TestFloat, resultD1)));
++  __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS2)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD2)));
++  __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, resultS3)));
++  __ Fst_d(f15, MemOperand(a0, offsetof(TestFloat, resultD3)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++
++  for (int i = 0; i < kTableLength; i++) {
++    float f1;
++    double d1;
++    test.a = inputs_S[i];
++    test.b = inputs_D[i];
++
++    f.Call(&test, 0, 0, 0, 0);
++
++    CHECK_EQ(test.resultS1, outputs_S[i]);
++    CHECK_EQ(test.resultD1, outputs_D[i]);
++
++    if (i != 0) {
++      f1 = test.resultS2 - 1.0F/outputs_S[i];
++      f1 = (f1 < 0) ? f1 : -f1;
++      CHECK(f1 <= deltaFloat);
++      d1 = test.resultD2 - 1.0L/outputs_D[i];
++      d1 = (d1 < 0) ? d1 : -d1;
++      CHECK(d1 <= deltaDouble);
++      f1 = test.resultS3 - 1.0F/inputs_S[i];
++      f1 = (f1 < 0) ? f1 : -f1;
++      CHECK(f1 <= deltaFloat);
++      d1 = test.resultD3 - 1.0L/inputs_D[i];
++      d1 = (d1 < 0) ? d1 : -d1;
++      CHECK(d1 <= deltaDouble);
++    } else {
++      CHECK_EQ(test.resultS2, 1.0F/outputs_S[i]);
++      CHECK_EQ(test.resultD2, 1.0L/outputs_D[i]);
++      CHECK_EQ(test.resultS3, 1.0F/inputs_S[i]);
++      CHECK_EQ(test.resultD3, 1.0L/inputs_D[i]);
++    }
++  }
++}*/
++
++TEST(LA15) {
++  // Test chaining of label usages within instructions (issue 1644).
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  Assembler assm(AssemblerOptions{});
++
++  Label target;
++  __ beq(a0, a1, &target);
++  __ nop();
++  __ bne(a0, a1, &target);
++  __ nop();
++  __ bind(&target);
++  __ nop();
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  f.Call(1, 1, 0, 0, 0);
++}
++
++TEST(Trampoline) {
++  static const int kMaxBranchOffset = (1 << (18 - 1)) - 1;
++
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  Label done;
++  size_t nr_calls = kMaxBranchOffset / kInstrSize + 5;
++
++  __ xor_(a2, a2, a2);
++  __ BranchShort(&done, eq, a0, Operand(a1));
++  for (size_t i = 0; i < nr_calls; ++i) {
++    __ addi_d(a2, a2, 1);
++  }
++  __ bind(&done);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++
++  int64_t res = reinterpret_cast<int64_t>(f.Call(42, 42, 0, 0, 0));
++  CHECK_EQ(0, res);
++}
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc
+new file mode 100644
+index 00000000000..36e46dc2131
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc
+@@ -0,0 +1,966 @@
++// Copyright 2012 the V8 project authors. All rights reserved.
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++//     * Redistributions of source code must retain the above copyright
++//       notice, this list of conditions and the following disclaimer.
++//     * Redistributions in binary form must reproduce the above
++//       copyright notice, this list of conditions and the following
++//       disclaimer in the documentation and/or other materials provided
++//       with the distribution.
++//     * Neither the name of Google Inc. nor the names of its
++//       contributors may be used to endorse or promote products derived
++//       from this software without specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++//
++
++#include <stdio.h>
++#include <stdlib.h>
++
++#include "src/init/v8.h"
++
++#include "src/codegen/macro-assembler.h"
++#include "src/debug/debug.h"
++#include "src/diagnostics/disasm.h"
++#include "src/diagnostics/disassembler.h"
++#include "src/execution/frames-inl.h"
++#include "test/cctest/cctest.h"
++
++namespace v8 {
++namespace internal {
++
++bool DisassembleAndCompare(byte* pc, const char* compare_string) {
++  disasm::NameConverter converter;
++  disasm::Disassembler disasm(converter);
++  EmbeddedVector<char, 128> disasm_buffer;
++
++  /*  if (prev_instr_compact_branch) {
++      disasm.InstructionDecode(disasm_buffer, pc);
++      pc += 4;
++    }*/
++
++  disasm.InstructionDecode(disasm_buffer, pc);
++
++  if (strcmp(compare_string, disasm_buffer.begin()) != 0) {
++    fprintf(stderr,
++            "expected: \n"
++            "%s\n"
++            "disassembled: \n"
++            "%s\n\n",
++            compare_string, disasm_buffer.begin());
++    return false;
++  }
++  return true;
++}
++
++// Set up V8 to a state where we can at least run the assembler and
++// disassembler. Declare the variables and allocate the data structures used
++// in the rest of the macros.
++#define SET_UP()                                             \
++  CcTest::InitializeVM();                                    \
++  Isolate* isolate = CcTest::i_isolate();                    \
++  HandleScope scope(isolate);                                \
++  byte* buffer = reinterpret_cast<byte*>(malloc(4 * 1024));  \
++  Assembler assm(AssemblerOptions{},                         \
++                 ExternalAssemblerBuffer(buffer, 4 * 1024)); \
++  bool failure = false;
++
++// This macro assembles one instruction using the preallocated assembler and
++// disassembles the generated instruction, comparing the output to the expected
++// value. If the comparison fails an error message is printed, but the test
++// continues to run until the end.
++#define COMPARE(asm_, compare_string)                                        \
++  {                                                                          \
++    int pc_offset = assm.pc_offset();                                        \
++    byte* progcounter = &buffer[pc_offset];                                  \
++    assm.asm_;                                                               \
++    if (!DisassembleAndCompare(progcounter, compare_string)) failure = true; \
++  }
++
++// Verify that all invocations of the COMPARE macro passed successfully.
++// Exit with a failure if at least one of the tests failed.
++#define VERIFY_RUN()                            \
++  if (failure) {                                \
++    FATAL("LA64 Disassembler tests failed.\n"); \
++  }
++
++#define COMPARE_PC_REL(asm_, compare_string, offset)                           \
++  {                                                                            \
++    int pc_offset = assm.pc_offset();                                          \
++    byte* progcounter = &buffer[pc_offset];                                    \
++    char str_with_address[100];                                                \
++    printf("%p\n", static_cast<void*>(progcounter));                           \
++    snprintf(str_with_address, sizeof(str_with_address), "%s -> %p",           \
++             compare_string, static_cast<void*>(progcounter + (offset * 4)));  \
++    assm.asm_;                                                                 \
++    if (!DisassembleAndCompare(progcounter, str_with_address)) failure = true; \
++  }
++
++TEST(TypeOp6) {
++  SET_UP();
++
++  COMPARE(jirl(ra, t7, 0), "4c000261       jirl     ra, t7, 0");
++  COMPARE(jirl(ra, t7, 32767), "4dfffe61       jirl     ra, t7, 32767");
++  COMPARE(jirl(ra, t7, -32768), "4e000261       jirl     ra, t7, -32768");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp6PC) {
++  SET_UP();
++
++  COMPARE_PC_REL(beqz(t7, 1048575), "43fffe6f       beqz     t7, 1048575",
++                 1048575);
++  COMPARE_PC_REL(beqz(t0, -1048576), "40000190       beqz     t0, -1048576",
++                 -1048576);
++  COMPARE_PC_REL(beqz(t1, 0), "400001a0       beqz     t1, 0", 0);
++
++  COMPARE_PC_REL(bnez(a2, 1048575), "47fffccf       bnez     a2, 1048575",
++                 1048575);
++  COMPARE_PC_REL(bnez(s3, -1048576), "44000350       bnez     s3, -1048576",
++                 -1048576);
++  COMPARE_PC_REL(bnez(t8, 0), "44000280       bnez     t8, 0", 0);
++
++  COMPARE_PC_REL(bceqz(FCC0, 1048575), "4bfffc0f       bceqz     fcc0, 1048575",
++                 1048575);
++  COMPARE_PC_REL(bceqz(FCC0, -1048576),
++                 "48000010       bceqz     fcc0, -1048576", -1048576);
++  COMPARE_PC_REL(bceqz(FCC0, 0), "48000000       bceqz     fcc0, 0", 0);
++
++  COMPARE_PC_REL(bcnez(FCC0, 1048575), "4bfffd0f       bcnez     fcc0, 1048575",
++                 1048575);
++  COMPARE_PC_REL(bcnez(FCC0, -1048576),
++                 "48000110       bcnez     fcc0, -1048576", -1048576);
++  COMPARE_PC_REL(bcnez(FCC0, 0), "48000100       bcnez     fcc0, 0", 0);
++
++  COMPARE_PC_REL(b(33554431), "53fffdff       b     33554431", 33554431);
++  COMPARE_PC_REL(b(-33554432), "50000200       b     -33554432", -33554432);
++  COMPARE_PC_REL(b(0), "50000000       b     0", 0);
++
++  COMPARE_PC_REL(beq(t0, a6, 32767), "59fffd8a       beq     t0, a6, 32767",
++                 32767);
++  COMPARE_PC_REL(beq(t1, a0, -32768), "5a0001a4       beq     t1, a0, -32768",
++                 -32768);
++  COMPARE_PC_REL(beq(a4, t1, 0), "5800010d       beq     a4, t1, 0", 0);
++
++  COMPARE_PC_REL(bne(a3, a4, 32767), "5dfffce8       bne     a3, a4, 32767",
++                 32767);
++  COMPARE_PC_REL(bne(a6, a5, -32768), "5e000149       bne     a6, a5, -32768",
++                 -32768);
++  COMPARE_PC_REL(bne(a4, a5, 0), "5c000109       bne     a4, a5, 0", 0);
++
++  COMPARE_PC_REL(blt(a4, a6, 32767), "61fffd0a       blt     a4, a6, 32767",
++                 32767);
++  COMPARE_PC_REL(blt(a4, a5, -32768), "62000109       blt     a4, a5, -32768",
++                 -32768);
++  COMPARE_PC_REL(blt(a4, a6, 0), "6000010a       blt     a4, a6, 0", 0);
++
++  COMPARE_PC_REL(bge(s7, a5, 32767), "65ffffc9       bge     s7, a5, 32767",
++                 32767);
++  COMPARE_PC_REL(bge(a1, a3, -32768), "660000a7       bge     a1, a3, -32768",
++                 -32768);
++  COMPARE_PC_REL(bge(a5, s3, 0), "6400013a       bge     a5, s3, 0", 0);
++
++  COMPARE_PC_REL(bltu(a5, s7, 32767), "69fffd3e       bltu     a5, s7, 32767",
++                 32767);
++  COMPARE_PC_REL(bltu(a4, a5, -32768), "6a000109       bltu     a4, a5, -32768",
++                 -32768);
++  COMPARE_PC_REL(bltu(a4, t6, 0), "68000112       bltu     a4, t6, 0", 0);
++
++  COMPARE_PC_REL(bgeu(a7, a6, 32767), "6dfffd6a       bgeu     a7, a6, 32767",
++                 32767);
++  COMPARE_PC_REL(bgeu(a5, a3, -32768), "6e000127       bgeu     a5, a3, -32768",
++                 -32768);
++  COMPARE_PC_REL(bgeu(t2, t1, 0), "6c0001cd       bgeu     t2, t1, 0", 0);
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp7) {
++  SET_UP();
++
++  COMPARE(lu12i_w(a4, 524287), "14ffffe8       lu12i.w     a4, 524287");
++  COMPARE(lu12i_w(a5, -524288), "15000009       lu12i.w     a5, -524288");
++  COMPARE(lu12i_w(a6, 0), "1400000a       lu12i.w     a6, 0");
++
++  COMPARE(lu32i_d(a7, 524287), "16ffffeb       lu32i.d     a7, 524287");
++  COMPARE(lu32i_d(t0, 524288), "1700000c       lu32i.d     t0, -524288");
++  COMPARE(lu32i_d(t1, 0), "1600000d       lu32i.d     t1, 0");
++
++  COMPARE(pcaddi(t1, 1), "1800002d       pcaddi     t1, 1");
++  COMPARE(pcaddi(t2, 524287), "18ffffee       pcaddi     t2, 524287");
++  COMPARE(pcaddi(t3, -524288), "1900000f       pcaddi     t3, -524288");
++  COMPARE(pcaddi(t4, 0), "18000010       pcaddi     t4, 0");
++
++  COMPARE(pcalau12i(t5, 524287), "1afffff1       pcalau12i     t5, 524287");
++  COMPARE(pcalau12i(t6, -524288), "1b000012       pcalau12i     t6, -524288");
++  COMPARE(pcalau12i(a4, 0), "1a000008       pcalau12i     a4, 0");
++
++  COMPARE(pcaddu12i(a5, 524287), "1cffffe9       pcaddu12i     a5, 524287");
++  COMPARE(pcaddu12i(a6, -524288), "1d00000a       pcaddu12i     a6, -524288");
++  COMPARE(pcaddu12i(a7, 0), "1c00000b       pcaddu12i     a7, 0");
++
++  COMPARE(pcaddu18i(t0, 524287), "1effffec       pcaddu18i     t0, 524287");
++  COMPARE(pcaddu18i(t1, -524288), "1f00000d       pcaddu18i     t1, -524288");
++  COMPARE(pcaddu18i(t2, 0), "1e00000e       pcaddu18i     t2, 0");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp8) {
++  SET_UP();
++
++  COMPARE(ll_w(t2, t3, 32764), "207ffdee       ll.w     t2, t3, 32764");
++  COMPARE(ll_w(t3, t4, -32768), "2080020f       ll.w     t3, t4, -32768");
++  COMPARE(ll_w(t5, t6, 0), "20000251       ll.w     t5, t6, 0");
++
++  COMPARE(sc_w(a6, a7, 32764), "217ffd6a       sc.w     a6, a7, 32764");
++  COMPARE(sc_w(t0, t1, -32768), "218001ac       sc.w     t0, t1, -32768");
++  COMPARE(sc_w(t2, t3, 0), "210001ee       sc.w     t2, t3, 0");
++
++  COMPARE(ll_d(a0, a1, 32764), "227ffca4       ll.d     a0, a1, 32764");
++  COMPARE(ll_d(a2, a3, -32768), "228000e6       ll.d     a2, a3, -32768");
++  COMPARE(ll_d(a4, a5, 0), "22000128       ll.d     a4, a5, 0");
++
++  COMPARE(sc_d(t4, t5, 32764), "237ffe30       sc.d     t4, t5, 32764");
++  COMPARE(sc_d(t6, a0, -32768), "23800092       sc.d     t6, a0, -32768");
++  COMPARE(sc_d(a1, a2, 0), "230000c5       sc.d     a1, a2, 0");
++
++  COMPARE(ldptr_w(a4, a5, 32764), "247ffd28       ldptr.w     a4, a5, 32764");
++  COMPARE(ldptr_w(a6, a7, -32768), "2480016a       ldptr.w     a6, a7, -32768");
++  COMPARE(ldptr_w(t0, t1, 0), "240001ac       ldptr.w     t0, t1, 0");
++
++  COMPARE(stptr_w(a4, a5, 32764), "257ffd28       stptr.w     a4, a5, 32764");
++  COMPARE(stptr_w(a6, a7, -32768), "2580016a       stptr.w     a6, a7, -32768");
++  COMPARE(stptr_w(t0, t1, 0), "250001ac       stptr.w     t0, t1, 0");
++
++  COMPARE(ldptr_d(t2, t3, 32764), "267ffdee       ldptr.d     t2, t3, 32764");
++  COMPARE(ldptr_d(t4, t5, -32768), "26800230       ldptr.d     t4, t5, -32768");
++  COMPARE(ldptr_d(t6, a4, 0), "26000112       ldptr.d     t6, a4, 0");
++
++  COMPARE(stptr_d(a5, a6, 32764), "277ffd49       stptr.d     a5, a6, 32764");
++  COMPARE(stptr_d(a7, t0, -32768), "2780018b       stptr.d     a7, t0, -32768");
++  COMPARE(stptr_d(t1, t2, 0), "270001cd       stptr.d     t1, t2, 0");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp10) {
++  SET_UP();
++
++  COMPARE(bstrins_w(a4, a5, 31, 16),
++          "007f4128       bstrins.w     a4, a5, 31, 16");
++  COMPARE(bstrins_w(a6, a7, 5, 0), "0065016a       bstrins.w     a6, a7, 5, 0");
++
++  COMPARE(bstrins_d(a3, zero_reg, 17, 0),
++          "00910007       bstrins.d     a3, zero_reg, 17, 0");
++  COMPARE(bstrins_d(t1, zero_reg, 17, 0),
++          "0091000d       bstrins.d     t1, zero_reg, 17, 0");
++
++  COMPARE(bstrpick_w(t0, t1, 31, 29),
++          "007ff5ac       bstrpick.w     t0, t1, 31, 29");
++  COMPARE(bstrpick_w(a4, a5, 16, 0),
++          "00708128       bstrpick.w     a4, a5, 16, 0");
++
++  COMPARE(bstrpick_d(a5, a5, 31, 0),
++          "00df0129       bstrpick.d     a5, a5, 31, 0");
++  COMPARE(bstrpick_d(a4, a4, 25, 2),
++          "00d90908       bstrpick.d     a4, a4, 25, 2");
++
++  COMPARE(slti(t2, a5, 2047), "021ffd2e       slti     t2, a5, 2047");
++  COMPARE(slti(a7, a1, -2048), "022000ab       slti     a7, a1, -2048");
++
++  COMPARE(sltui(a7, a7, 2047), "025ffd6b       sltui     a7, a7, 2047");
++  COMPARE(sltui(t1, t1, -2048), "026001ad       sltui     t1, t1, -2048");
++
++  COMPARE(addi_w(t0, t2, 2047), "029ffdcc       addi.w     t0, t2, 2047");
++  COMPARE(addi_w(a0, a0, -2048), "02a00084       addi.w     a0, a0, -2048");
++
++  COMPARE(addi_d(a0, zero_reg, 2047),
++          "02dffc04       addi.d     a0, zero_reg, 2047");
++  COMPARE(addi_d(t7, t7, -2048), "02e00273       addi.d     t7, t7, -2048");
++
++  COMPARE(lu52i_d(a0, a0, 2047), "031ffc84       lu52i.d     a0, a0, 2047");
++  COMPARE(lu52i_d(a1, a1, -2048), "032000a5       lu52i.d     a1, a1, -2048");
++
++  COMPARE(andi(s3, a3, 0xfff), "037ffcfa       andi     s3, a3, 0xfff");
++  COMPARE(andi(a4, a4, 0), "03400108       andi     a4, a4, 0x0");
++
++  COMPARE(ori(t6, t6, 0xfff), "03bffe52       ori     t6, t6, 0xfff");
++  COMPARE(ori(t6, t6, 0), "03800252       ori     t6, t6, 0x0");
++
++  COMPARE(xori(t1, t1, 0xfff), "03fffdad       xori     t1, t1, 0xfff");
++  COMPARE(xori(a3, a3, 0x0), "03c000e7       xori     a3, a3, 0x0");
++
++  COMPARE(ld_b(a1, a1, 2047), "281ffca5       ld.b     a1, a1, 2047");
++  COMPARE(ld_b(a4, a4, -2048), "28200108       ld.b     a4, a4, -2048");
++
++  COMPARE(ld_h(a4, a0, 2047), "285ffc88       ld.h     a4, a0, 2047");
++  COMPARE(ld_h(a4, a3, -2048), "286000e8       ld.h     a4, a3, -2048");
++
++  COMPARE(ld_w(a6, a6, 2047), "289ffd4a       ld.w     a6, a6, 2047");
++  COMPARE(ld_w(a5, a4, -2048), "28a00109       ld.w     a5, a4, -2048");
++
++  COMPARE(ld_d(a0, a3, 2047), "28dffce4       ld.d     a0, a3, 2047");
++  COMPARE(ld_d(a6, fp, -2048), "28e002ca       ld.d     a6, fp, -2048");
++  COMPARE(ld_d(a0, a6, 0), "28c00144       ld.d     a0, a6, 0");
++
++  COMPARE(st_b(a4, a0, 2047), "291ffc88       st.b     a4, a0, 2047");
++  COMPARE(st_b(a6, a5, -2048), "2920012a       st.b     a6, a5, -2048");
++
++  COMPARE(st_h(a4, a0, 2047), "295ffc88       st.h     a4, a0, 2047");
++  COMPARE(st_h(t1, t2, -2048), "296001cd       st.h     t1, t2, -2048");
++
++  COMPARE(st_w(t3, a4, 2047), "299ffd0f       st.w     t3, a4, 2047");
++  COMPARE(st_w(a3, t2, -2048), "29a001c7       st.w     a3, t2, -2048");
++
++  COMPARE(st_d(s3, sp, 2047), "29dffc7a       st.d     s3, sp, 2047");
++  COMPARE(st_d(fp, s6, -2048), "29e003b6       st.d     fp, s6, -2048");
++
++  COMPARE(ld_bu(a6, a0, 2047), "2a1ffc8a       ld.bu     a6, a0, 2047");
++  COMPARE(ld_bu(a7, a7, -2048), "2a20016b       ld.bu     a7, a7, -2048");
++
++  COMPARE(ld_hu(a7, a7, 2047), "2a5ffd6b       ld.hu     a7, a7, 2047");
++  COMPARE(ld_hu(a3, a3, -2048), "2a6000e7       ld.hu     a3, a3, -2048");
++
++  COMPARE(ld_wu(a3, a0, 2047), "2a9ffc87       ld.wu     a3, a0, 2047");
++  COMPARE(ld_wu(a3, a5, -2048), "2aa00127       ld.wu     a3, a5, -2048");
++
++  COMPARE(preld(31, a7, 2047), "2adffd7f       preld     0x1f(31), a7, 2047");
++  COMPARE(preld(0, t0, -2048), "2ae00180       preld     0x0(0), t0, -2048");
++
++  COMPARE(fld_s(f0, a3, 2047), "2b1ffce0       fld.s     f0, a3, 2047");
++  COMPARE(fld_s(f0, a1, -2048), "2b2000a0       fld.s     f0, a1, -2048");
++
++  COMPARE(fld_d(f0, a0, 2047), "2b9ffc80       fld.d     f0, a0, 2047");
++  COMPARE(fld_d(f0, fp, -2048), "2ba002c0       fld.d     f0, fp, -2048");
++
++  COMPARE(fst_d(f0, fp, 2047), "2bdffec0       fst.d     f0, fp, 2047");
++  COMPARE(fst_d(f0, a0, -2048), "2be00080       fst.d     f0, a0, -2048");
++
++  COMPARE(fst_s(f0, a5, 2047), "2b5ffd20       fst.s     f0, a5, 2047");
++  COMPARE(fst_s(f0, a3, -2048), "2b6000e0       fst.s     f0, a3, -2048");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp12) {
++  SET_UP();
++
++  COMPARE(fmadd_s(f0, f1, f2, f3), "08118820       fmadd.s     f0, f1, f2, f3");
++  COMPARE(fmadd_s(f4, f5, f6, f7), "081398a4       fmadd.s     f4, f5, f6, f7");
++
++  COMPARE(fmadd_d(f8, f9, f10, f11),
++          "0825a928       fmadd.d     f8, f9, f10, f11");
++  COMPARE(fmadd_d(f12, f13, f14, f15),
++          "0827b9ac       fmadd.d     f12, f13, f14, f15");
++
++  COMPARE(fmsub_s(f0, f1, f2, f3), "08518820       fmsub.s     f0, f1, f2, f3");
++  COMPARE(fmsub_s(f4, f5, f6, f7), "085398a4       fmsub.s     f4, f5, f6, f7");
++
++  COMPARE(fmsub_d(f8, f9, f10, f11),
++          "0865a928       fmsub.d     f8, f9, f10, f11");
++  COMPARE(fmsub_d(f12, f13, f14, f15),
++          "0867b9ac       fmsub.d     f12, f13, f14, f15");
++
++  COMPARE(fnmadd_s(f0, f1, f2, f3),
++          "08918820       fnmadd.s     f0, f1, f2, f3");
++  COMPARE(fnmadd_s(f4, f5, f6, f7),
++          "089398a4       fnmadd.s     f4, f5, f6, f7");
++
++  COMPARE(fnmadd_d(f8, f9, f10, f11),
++          "08a5a928       fnmadd.d     f8, f9, f10, f11");
++  COMPARE(fnmadd_d(f12, f13, f14, f15),
++          "08a7b9ac       fnmadd.d     f12, f13, f14, f15");
++
++  COMPARE(fnmsub_s(f0, f1, f2, f3),
++          "08d18820       fnmsub.s     f0, f1, f2, f3");
++  COMPARE(fnmsub_s(f4, f5, f6, f7),
++          "08d398a4       fnmsub.s     f4, f5, f6, f7");
++
++  COMPARE(fnmsub_d(f8, f9, f10, f11),
++          "08e5a928       fnmsub.d     f8, f9, f10, f11");
++  COMPARE(fnmsub_d(f12, f13, f14, f15),
++          "08e7b9ac       fnmsub.d     f12, f13, f14, f15");
++
++  COMPARE(fcmp_cond_s(CAF, f1, f2, FCC0),
++          "0c100820       fcmp.caf.s     fcc0, f1, f2");
++  COMPARE(fcmp_cond_s(CUN, f5, f6, FCC0),
++          "0c1418a0       fcmp.cun.s     fcc0, f5, f6");
++  COMPARE(fcmp_cond_s(CEQ, f9, f10, FCC0),
++          "0c122920       fcmp.ceq.s     fcc0, f9, f10");
++  COMPARE(fcmp_cond_s(CUEQ, f13, f14, FCC0),
++          "0c1639a0       fcmp.cueq.s     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_s(CLT, f1, f2, FCC0),
++          "0c110820       fcmp.clt.s     fcc0, f1, f2");
++  COMPARE(fcmp_cond_s(CULT, f5, f6, FCC0),
++          "0c1518a0       fcmp.cult.s     fcc0, f5, f6");
++  COMPARE(fcmp_cond_s(CLE, f9, f10, FCC0),
++          "0c132920       fcmp.cle.s     fcc0, f9, f10");
++  COMPARE(fcmp_cond_s(CULE, f13, f14, FCC0),
++          "0c1739a0       fcmp.cule.s     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_s(CNE, f1, f2, FCC0),
++          "0c180820       fcmp.cne.s     fcc0, f1, f2");
++  COMPARE(fcmp_cond_s(COR, f5, f6, FCC0),
++          "0c1a18a0       fcmp.cor.s     fcc0, f5, f6");
++  COMPARE(fcmp_cond_s(CUNE, f9, f10, FCC0),
++          "0c1c2920       fcmp.cune.s     fcc0, f9, f10");
++  COMPARE(fcmp_cond_s(SAF, f13, f14, FCC0),
++          "0c10b9a0       fcmp.saf.s     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_s(SUN, f1, f2, FCC0),
++          "0c148820       fcmp.sun.s     fcc0, f1, f2");
++  COMPARE(fcmp_cond_s(SEQ, f5, f6, FCC0),
++          "0c1298a0       fcmp.seq.s     fcc0, f5, f6");
++  COMPARE(fcmp_cond_s(SUEQ, f9, f10, FCC0),
++          "0c16a920       fcmp.sueq.s     fcc0, f9, f10");
++  //  COMPARE(fcmp_cond_s(SLT, f13, f14, FCC0),
++  //          "0c11b9a0       fcmp.slt.s     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_s(SULT, f1, f2, FCC0),
++          "0c158820       fcmp.sult.s     fcc0, f1, f2");
++  COMPARE(fcmp_cond_s(SLE, f5, f6, FCC0),
++          "0c1398a0       fcmp.sle.s     fcc0, f5, f6");
++  COMPARE(fcmp_cond_s(SULE, f9, f10, FCC0),
++          "0c17a920       fcmp.sule.s     fcc0, f9, f10");
++  COMPARE(fcmp_cond_s(SNE, f13, f14, FCC0),
++          "0c18b9a0       fcmp.sne.s     fcc0, f13, f14");
++  COMPARE(fcmp_cond_s(SOR, f13, f14, FCC0),
++          "0c1ab9a0       fcmp.sor.s     fcc0, f13, f14");
++  COMPARE(fcmp_cond_s(SUNE, f1, f2, FCC0),
++          "0c1c8820       fcmp.sune.s     fcc0, f1, f2");
++
++  COMPARE(fcmp_cond_d(CAF, f1, f2, FCC0),
++          "0c200820       fcmp.caf.d     fcc0, f1, f2");
++  COMPARE(fcmp_cond_d(CUN, f5, f6, FCC0),
++          "0c2418a0       fcmp.cun.d     fcc0, f5, f6");
++  COMPARE(fcmp_cond_d(CEQ, f9, f10, FCC0),
++          "0c222920       fcmp.ceq.d     fcc0, f9, f10");
++  COMPARE(fcmp_cond_d(CUEQ, f13, f14, FCC0),
++          "0c2639a0       fcmp.cueq.d     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_d(CLT, f1, f2, FCC0),
++          "0c210820       fcmp.clt.d     fcc0, f1, f2");
++  COMPARE(fcmp_cond_d(CULT, f5, f6, FCC0),
++          "0c2518a0       fcmp.cult.d     fcc0, f5, f6");
++  COMPARE(fcmp_cond_d(CLE, f9, f10, FCC0),
++          "0c232920       fcmp.cle.d     fcc0, f9, f10");
++  COMPARE(fcmp_cond_d(CULE, f13, f14, FCC0),
++          "0c2739a0       fcmp.cule.d     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_d(CNE, f1, f2, FCC0),
++          "0c280820       fcmp.cne.d     fcc0, f1, f2");
++  COMPARE(fcmp_cond_d(COR, f5, f6, FCC0),
++          "0c2a18a0       fcmp.cor.d     fcc0, f5, f6");
++  COMPARE(fcmp_cond_d(CUNE, f9, f10, FCC0),
++          "0c2c2920       fcmp.cune.d     fcc0, f9, f10");
++  COMPARE(fcmp_cond_d(SAF, f13, f14, FCC0),
++          "0c20b9a0       fcmp.saf.d     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_d(SUN, f1, f2, FCC0),
++          "0c248820       fcmp.sun.d     fcc0, f1, f2");
++  COMPARE(fcmp_cond_d(SEQ, f5, f6, FCC0),
++          "0c2298a0       fcmp.seq.d     fcc0, f5, f6");
++  COMPARE(fcmp_cond_d(SUEQ, f9, f10, FCC0),
++          "0c26a920       fcmp.sueq.d     fcc0, f9, f10");
++  //  COMPARE(fcmp_cond_d(SLT, f13, f14, FCC0),
++  //          "0c21b9a0       fcmp.slt.d     fcc0, f13, f14");
++
++  COMPARE(fcmp_cond_d(SULT, f1, f2, FCC0),
++          "0c258820       fcmp.sult.d     fcc0, f1, f2");
++  COMPARE(fcmp_cond_d(SLE, f5, f6, FCC0),
++          "0c2398a0       fcmp.sle.d     fcc0, f5, f6");
++  COMPARE(fcmp_cond_d(SULE, f9, f10, FCC0),
++          "0c27a920       fcmp.sule.d     fcc0, f9, f10");
++  COMPARE(fcmp_cond_d(SNE, f13, f14, FCC0),
++          "0c28b9a0       fcmp.sne.d     fcc0, f13, f14");
++  COMPARE(fcmp_cond_d(SOR, f13, f14, FCC0),
++          "0c2ab9a0       fcmp.sor.d     fcc0, f13, f14");
++  COMPARE(fcmp_cond_d(SUNE, f1, f2, FCC0),
++          "0c2c8820       fcmp.sune.d     fcc0, f1, f2");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp14) {
++  SET_UP();
++
++  COMPARE(alsl_w(a0, a1, a2, 1), "000418a4       alsl.w     a0, a1, a2, 1");
++  COMPARE(alsl_w(a3, a4, a5, 3), "00052507       alsl.w     a3, a4, a5, 3");
++  COMPARE(alsl_w(a6, a7, t0, 4), "0005b16a       alsl.w     a6, a7, t0, 4");
++
++  COMPARE(alsl_wu(t1, t2, t3, 1), "00063dcd       alsl.wu     t1, t2, t3, 1");
++  COMPARE(alsl_wu(t4, t5, t6, 3), "00074a30       alsl.wu     t4, t5, t6, 3");
++  COMPARE(alsl_wu(a0, a1, a2, 4), "000798a4       alsl.wu     a0, a1, a2, 4");
++
++  COMPARE(alsl_d(a3, a4, a5, 1), "002c2507       alsl.d     a3, a4, a5, 1");
++  COMPARE(alsl_d(a6, a7, t0, 3), "002d316a       alsl.d     a6, a7, t0, 3");
++  COMPARE(alsl_d(t1, t2, t3, 4), "002dbdcd       alsl.d     t1, t2, t3, 4");
++
++  COMPARE(bytepick_w(t4, t5, t6, 0),
++          "00084a30       bytepick.w     t4, t5, t6, 0");
++  COMPARE(bytepick_w(a0, a1, a2, 3),
++          "000998a4       bytepick.w     a0, a1, a2, 3");
++
++  COMPARE(bytepick_d(a6, a7, t0, 0),
++          "000c316a       bytepick.d     a6, a7, t0, 0");
++  COMPARE(bytepick_d(t4, t5, t6, 7),
++          "000fca30       bytepick.d     t4, t5, t6, 7");
++
++  COMPARE(slli_w(a3, a3, 31), "0040fce7       slli.w     a3, a3, 31");
++  COMPARE(slli_w(a6, a6, 1), "0040854a       slli.w     a6, a6, 1");
++
++  COMPARE(slli_d(t3, t2, 63), "0041fdcf       slli.d     t3, t2, 63");
++  COMPARE(slli_d(t4, a6, 1), "00410550       slli.d     t4, a6, 1");
++
++  COMPARE(srli_w(a7, a7, 31), "0044fd6b       srli.w     a7, a7, 31");
++  COMPARE(srli_w(a4, a4, 1), "00448508       srli.w     a4, a4, 1");
++
++  COMPARE(srli_d(a4, a3, 63), "0045fce8       srli.d     a4, a3, 63");
++  COMPARE(srli_d(a4, a4, 1), "00450508       srli.d     a4, a4, 1");
++
++  COMPARE(srai_d(a0, a0, 63), "0049fc84       srai.d     a0, a0, 63");
++  COMPARE(srai_d(a4, a1, 1), "004904a8       srai.d     a4, a1, 1");
++
++  COMPARE(srai_w(s4, a3, 31), "0048fcfb       srai.w     s4, a3, 31");
++  COMPARE(srai_w(s4, a5, 1), "0048853b       srai.w     s4, a5, 1");
++
++  COMPARE(rotri_d(t7, t6, 1), "004d0653       rotri.d     t7, t6, 1");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp17) {
++  SET_UP();
++
++  COMPARE(sltu(t5, t4, a4), "0012a211       sltu     t5, t4, a4");
++  COMPARE(sltu(t4, zero_reg, t4), "0012c010       sltu     t4, zero_reg, t4");
++
++  COMPARE(add_w(a4, a4, a6), "00102908       add.w     a4, a4, a6");
++  COMPARE(add_w(a5, a6, t3), "00103d49       add.w     a5, a6, t3");
++
++  COMPARE(add_d(a4, t0, t1), "0010b588       add.d     a4, t0, t1");
++  COMPARE(add_d(a6, a3, t1), "0010b4ea       add.d     a6, a3, t1");
++
++  COMPARE(sub_w(a7, a7, a2), "0011196b       sub.w     a7, a7, a2");
++  COMPARE(sub_w(a2, a2, s3), "001168c6       sub.w     a2, a2, s3");
++
++  COMPARE(sub_d(s3, ra, s3), "0011e83a       sub.d     s3, ra, s3");
++  COMPARE(sub_d(a0, a1, a2), "001198a4       sub.d     a0, a1, a2");
++
++  COMPARE(slt(a5, a5, a6), "00122929       slt     a5, a5, a6");
++  COMPARE(slt(a6, t3, t4), "001241ea       slt     a6, t3, t4");
++
++  COMPARE(masknez(a5, a5, a3), "00131d29       masknez     a5, a5, a3");
++  COMPARE(masknez(a3, a4, a5), "00132507       masknez     a3, a4, a5");
++
++  COMPARE(maskeqz(a6, a7, t0), "0013b16a       maskeqz     a6, a7, t0");
++  COMPARE(maskeqz(t1, t2, t3), "0013bdcd       maskeqz     t1, t2, t3");
++
++  COMPARE(or_(s3, sp, zero_reg), "0015007a       or     s3, sp, zero_reg");
++  COMPARE(or_(a4, a0, zero_reg), "00150088       or     a4, a0, zero_reg");
++
++  COMPARE(and_(sp, sp, t6), "0014c863       and     sp, sp, t6");
++  COMPARE(and_(a3, a3, a7), "0014ace7       and     a3, a3, a7");
++
++  COMPARE(nor(a7, a7, a7), "00142d6b       nor     a7, a7, a7");
++  COMPARE(nor(t4, t5, t6), "00144a30       nor     t4, t5, t6");
++
++  COMPARE(xor_(a0, a1, a2), "001598a4       xor     a0, a1, a2");
++  COMPARE(xor_(a3, a4, a5), "0015a507       xor     a3, a4, a5");
++
++  COMPARE(orn(a6, a7, t0), "0016316a       orn     a6, a7, t0");
++  COMPARE(orn(t1, t2, t3), "00163dcd       orn     t1, t2, t3");
++
++  COMPARE(andn(t4, t5, t6), "0016ca30       andn     t4, t5, t6");
++  COMPARE(andn(a0, a1, a2), "001698a4       andn     a0, a1, a2");
++
++  COMPARE(sll_w(a3, t0, a7), "00172d87       sll.w     a3, t0, a7");
++  COMPARE(sll_w(a3, a4, a3), "00171d07       sll.w     a3, a4, a3");
++
++  COMPARE(srl_w(a3, a4, a3), "00179d07       srl.w     a3, a4, a3");
++  COMPARE(srl_w(a3, t1, t4), "0017c1a7       srl.w     a3, t1, t4");
++
++  COMPARE(sra_w(a4, t4, a4), "00182208       sra.w     a4, t4, a4");
++  COMPARE(sra_w(a3, t1, a6), "001829a7       sra.w     a3, t1, a6");
++
++  COMPARE(sll_d(a3, a1, a3), "00189ca7       sll.d     a3, a1, a3");
++  COMPARE(sll_d(a7, a4, t0), "0018b10b       sll.d     a7, a4, t0");
++
++  COMPARE(srl_d(a7, a7, t0), "0019316b       srl.d     a7, a7, t0");
++  COMPARE(srl_d(t0, a6, t0), "0019314c       srl.d     t0, a6, t0");
++
++  COMPARE(sra_d(a3, a4, a5), "0019a507       sra.d     a3, a4, a5");
++  COMPARE(sra_d(a6, a7, t0), "0019b16a       sra.d     a6, a7, t0");
++
++  COMPARE(rotr_d(t1, t2, t3), "001bbdcd       rotr.d     t1, t2, t3");
++  COMPARE(rotr_d(t4, t5, t6), "001bca30       rotr.d     t4, t5, t6");
++
++  COMPARE(rotr_w(a0, a1, a2), "001b18a4       rotr.w     a0, a1, a2");
++  COMPARE(rotr_w(a3, a4, a5), "001b2507       rotr.w     a3, a4, a5");
++
++  COMPARE(mul_w(t8, a5, t7), "001c4d34       mul.w     t8, a5, t7");
++  COMPARE(mul_w(t4, t5, t6), "001c4a30       mul.w     t4, t5, t6");
++
++  COMPARE(mulh_w(s3, a3, t7), "001cccfa       mulh.w     s3, a3, t7");
++  COMPARE(mulh_w(a0, a1, a2), "001c98a4       mulh.w     a0, a1, a2");
++
++  COMPARE(mulh_wu(a6, a7, t0), "001d316a       mulh.wu     a6, a7, t0");
++  COMPARE(mulh_wu(t1, t2, t3), "001d3dcd       mulh.wu     t1, t2, t3");
++
++  COMPARE(mul_d(t2, a5, t1), "001db52e       mul.d     t2, a5, t1");
++  COMPARE(mul_d(a4, a4, a5), "001da508       mul.d     a4, a4, a5");
++
++  COMPARE(mulh_d(a3, a4, a5), "001e2507       mulh.d     a3, a4, a5");
++  COMPARE(mulh_d(a6, a7, t0), "001e316a       mulh.d     a6, a7, t0");
++
++  COMPARE(mulh_du(t1, t2, t3), "001ebdcd       mulh.du     t1, t2, t3");
++  COMPARE(mulh_du(t4, t5, t6), "001eca30       mulh.du     t4, t5, t6");
++
++  COMPARE(mulw_d_w(a0, a1, a2), "001f18a4       mulw.d.w     a0, a1, a2");
++  COMPARE(mulw_d_w(a3, a4, a5), "001f2507       mulw.d.w     a3, a4, a5");
++
++  COMPARE(mulw_d_wu(a6, a7, t0), "001fb16a       mulw.d.wu     a6, a7, t0");
++  COMPARE(mulw_d_wu(t1, t2, t3), "001fbdcd       mulw.d.wu     t1, t2, t3");
++
++  COMPARE(div_w(a5, a5, a3), "00201d29       div.w     a5, a5, a3");
++  COMPARE(div_w(t4, t5, t6), "00204a30       div.w     t4, t5, t6");
++
++  COMPARE(mod_w(a6, t3, a6), "0020a9ea       mod.w     a6, t3, a6");
++  COMPARE(mod_w(a3, a4, a3), "00209d07       mod.w     a3, a4, a3");
++
++  COMPARE(div_wu(t1, t2, t3), "00213dcd       div.wu     t1, t2, t3");
++  COMPARE(div_wu(t4, t5, t6), "00214a30       div.wu     t4, t5, t6");
++
++  COMPARE(mod_wu(a0, a1, a2), "002198a4       mod.wu     a0, a1, a2");
++  COMPARE(mod_wu(a3, a4, a5), "0021a507       mod.wu     a3, a4, a5");
++
++  COMPARE(div_d(t0, t0, a6), "0022298c       div.d     t0, t0, a6");
++  COMPARE(div_d(a7, a7, a5), "0022256b       div.d     a7, a7, a5");
++
++  COMPARE(mod_d(a6, a7, t0), "0022b16a       mod.d     a6, a7, t0");
++  COMPARE(mod_d(t1, t2, t3), "0022bdcd       mod.d     t1, t2, t3");
++
++  COMPARE(div_du(t4, t5, t6), "00234a30       div.du     t4, t5, t6");
++  COMPARE(div_du(a0, a1, a2), "002318a4       div.du     a0, a1, a2");
++
++  COMPARE(mod_du(a3, a4, a5), "0023a507       mod.du     a3, a4, a5");
++  COMPARE(mod_du(a6, a7, t0), "0023b16a       mod.du     a6, a7, t0");
++
++  COMPARE(fadd_s(f3, f4, f5), "01009483       fadd.s     f3, f4, f5");
++  COMPARE(fadd_s(f6, f7, f8), "0100a0e6       fadd.s     f6, f7, f8");
++
++  COMPARE(fadd_d(f0, f1, f0), "01010020       fadd.d     f0, f1, f0");
++  COMPARE(fadd_d(f0, f1, f2), "01010820       fadd.d     f0, f1, f2");
++
++  COMPARE(fsub_s(f9, f10, f11), "0102ad49       fsub.s     f9, f10, f11");
++  COMPARE(fsub_s(f12, f13, f14), "0102b9ac       fsub.s     f12, f13, f14");
++
++  COMPARE(fsub_d(f30, f0, f30), "0103781e       fsub.d     f30, f0, f30");
++  COMPARE(fsub_d(f0, f0, f1), "01030400       fsub.d     f0, f0, f1");
++
++  COMPARE(fmul_s(f15, f16, f17), "0104c60f       fmul.s     f15, f16, f17");
++  COMPARE(fmul_s(f18, f19, f20), "0104d272       fmul.s     f18, f19, f20");
++
++  COMPARE(fmul_d(f0, f0, f1), "01050400       fmul.d     f0, f0, f1");
++  COMPARE(fmul_d(f0, f0, f0), "01050000       fmul.d     f0, f0, f0");
++
++  COMPARE(fdiv_s(f0, f1, f2), "01068820       fdiv.s     f0, f1, f2");
++  COMPARE(fdiv_s(f3, f4, f5), "01069483       fdiv.s     f3, f4, f5");
++
++  COMPARE(fdiv_d(f0, f0, f1), "01070400       fdiv.d     f0, f0, f1");
++  COMPARE(fdiv_d(f0, f1, f0), "01070020       fdiv.d     f0, f1, f0");
++
++  COMPARE(fmax_s(f9, f10, f11), "0108ad49       fmax.s     f9, f10, f11");
++  COMPARE(fmin_s(f6, f7, f8), "010aa0e6       fmin.s     f6, f7, f8");
++
++  COMPARE(fmax_d(f0, f1, f0), "01090020       fmax.d     f0, f1, f0");
++  COMPARE(fmin_d(f0, f1, f0), "010b0020       fmin.d     f0, f1, f0");
++
++  COMPARE(fmaxa_s(f12, f13, f14), "010cb9ac       fmaxa.s     f12, f13, f14");
++  COMPARE(fmina_s(f15, f16, f17), "010ec60f       fmina.s     f15, f16, f17");
++
++  COMPARE(fmaxa_d(f18, f19, f20), "010d5272       fmaxa.d     f18, f19, f20");
++  COMPARE(fmina_d(f0, f1, f2), "010f0820       fmina.d     f0, f1, f2");
++
++  COMPARE(ldx_b(a0, a1, a2), "380018a4       ldx.b     a0, a1, a2");
++  COMPARE(ldx_h(a3, a4, a5), "38042507       ldx.h     a3, a4, a5");
++  COMPARE(ldx_w(a6, a7, t0), "3808316a       ldx.w     a6, a7, t0");
++
++  COMPARE(stx_b(t1, t2, t3), "38103dcd       stx.b     t1, t2, t3");
++  COMPARE(stx_h(t4, t5, t6), "38144a30       stx.h     t4, t5, t6");
++  COMPARE(stx_w(a0, a1, a2), "381818a4       stx.w     a0, a1, a2");
++
++  COMPARE(ldx_bu(a3, a4, a5), "38202507       ldx.bu     a3, a4, a5");
++  COMPARE(ldx_hu(a6, a7, t0), "3824316a       ldx.hu     a6, a7, t0");
++  COMPARE(ldx_wu(t1, t2, t3), "38283dcd       ldx.wu     t1, t2, t3");
++
++  COMPARE(ldx_d(a2, s6, t6), "380c4ba6       ldx.d     a2, s6, t6");
++  COMPARE(ldx_d(t7, s6, t6), "380c4bb3       ldx.d     t7, s6, t6");
++
++  COMPARE(stx_d(a4, a3, t6), "381c48e8       stx.d     a4, a3, t6");
++  COMPARE(stx_d(a0, a3, t6), "381c48e4       stx.d     a0, a3, t6");
++
++  COMPARE(preldx(0, t5, t6), "382c4a20       preldx     0x0(0), t5, t6");
++  COMPARE(preldx(31, a1, a2), "382c18bf       preldx     0x1f(31), a1, a2");
++
++  COMPARE(amswap_db_w(a0, a3, t6), "38691e44       amswap_db.w     a0, a3, t6");
++  COMPARE(amswap_db_d(a0, a3, t6), "38699e44       amswap_db.d     a0, a3, t6");
++  COMPARE(amadd_db_w(a0, a3, t6), "386a1e44       amadd_db.w     a0, a3, t6");
++  COMPARE(amadd_db_d(a0, a3, t6), "386a9e44       amadd_db.d     a0, a3, t6");
++  COMPARE(amand_db_w(a0, a3, t6), "386b1e44       amand_db.w     a0, a3, t6");
++  COMPARE(amand_db_d(a0, a3, t6), "386b9e44       amand_db.d     a0, a3, t6");
++  COMPARE(amor_db_w(a0, a3, t6), "386c1e44       amor_db.w     a0, a3, t6");
++  COMPARE(amor_db_d(a0, a3, t6), "386c9e44       amor_db.d     a0, a3, t6");
++  COMPARE(amxor_db_w(a0, a3, t6), "386d1e44       amxor_db.w     a0, a3, t6");
++  COMPARE(amxor_db_d(a0, a3, t6), "386d9e44       amxor_db.d     a0, a3, t6");
++
++  COMPARE(dbar(0), "38720000       dbar     0x0(0)");
++  COMPARE(ibar(5555), "387295b3       ibar     0x15b3(5555)");
++
++  COMPARE(break_(0), "002a0000       break    code: 0x0(0)");
++  COMPARE(break_(0x3fc0), "002a3fc0       break    code: 0x3fc0(16320)");
++
++  COMPARE(fldx_s(f3, a4, a5), "38302503       fldx.s     f3, a4, a5");
++  COMPARE(fldx_d(f6, a7, t0), "38343166       fldx.d     f6, a7, t0");
++
++  COMPARE(fstx_s(f1, t2, t3), "38383dc1       fstx.s     f1, t2, t3");
++  COMPARE(fstx_d(f4, t5, t6), "383c4a24       fstx.d     f4, t5, t6");
++
++  COMPARE(asrtle_d(a0, a1), "00011480       asrtle.d     a0, a1");
++  COMPARE(asrtgt_d(a2, a3), "00019cc0       asrtgt.d     a2, a3");
++
++  COMPARE(syscall(2), "002b0002       syscall     code 0x2(2)");
++  // COMPARE(hypcall(2),
++  //         "002b8002       hypcall     0x2(2)");
++
++  COMPARE(amswap_w(a4, a5, a6), "38602548       amswap.w     a4, a5, a6");
++  COMPARE(amswap_d(a7, t0, t1), "3860b1ab       amswap.d     a7, t0, t1");
++
++  COMPARE(amadd_w(t2, t3, t4), "38613e0e       amadd.w     t2, t3, t4");
++  COMPARE(amadd_d(t5, t6, a0), "3861c891       amadd.d     t5, t6, a0");
++
++  COMPARE(amand_w(a1, a2, a3), "386218e5       amand.w     a1, a2, a3");
++  COMPARE(amand_d(a4, a5, a6), "3862a548       amand.d     a4, a5, a6");
++
++  COMPARE(amor_w(a7, t0, t1), "386331ab       amor.w     a7, t0, t1");
++  COMPARE(amor_d(t2, t3, t4), "3863be0e       amor.d     t2, t3, t4");
++
++  COMPARE(amxor_w(t5, t6, a0), "38644891       amxor.w     t5, t6, a0");
++  COMPARE(amxor_d(a1, a2, a3), "386498e5       amxor.d     a1, a2, a3");
++
++  COMPARE(ammax_w(a4, a5, a6), "38652548       ammax.w     a4, a5, a6");
++  COMPARE(ammax_d(a7, t0, t1), "3865b1ab       ammax.d     a7, t0, t1");
++
++  COMPARE(ammin_w(t2, t3, t4), "38663e0e       ammin.w     t2, t3, t4");
++  COMPARE(ammin_d(t5, t6, a0), "3866c891       ammin.d     t5, t6, a0");
++
++  COMPARE(ammax_wu(a1, a2, a3), "386718e5       ammax.wu     a1, a2, a3");
++  COMPARE(ammax_du(a4, a5, a6), "3867a548       ammax.du     a4, a5, a6");
++
++  COMPARE(ammin_wu(a7, t0, t1), "386831ab       ammin.wu     a7, t0, t1");
++  COMPARE(ammin_du(t2, t3, t4), "3868be0e       ammin.du     t2, t3, t4");
++
++  COMPARE(ammax_db_d(a0, a1, a2), "386e94c4       ammax_db.d     a0, a1, a2");
++  COMPARE(ammax_db_du(a3, a4, a5), "3870a127       ammax_db.du     a3, a4, a5");
++
++  COMPARE(ammax_db_w(a6, a7, t0), "386e2d8a       ammax_db.w     a6, a7, t0");
++  COMPARE(ammax_db_wu(t1, t2, t3), "387039ed       ammax_db.wu     t1, t2, t3");
++
++  COMPARE(ammin_db_d(t4, t5, t6), "386fc650       ammin_db.d     t4, t5, t6");
++  COMPARE(ammin_db_du(a0, a1, a2), "387194c4       ammin_db.du     a0, a1, a2");
++
++  COMPARE(ammin_db_wu(a3, a4, a5), "38712127       ammin_db.wu     a3, a4, a5");
++  COMPARE(ammin_db_w(a6, a7, t0), "386f2d8a       ammin_db.w     a6, a7, t0");
++
++  COMPARE(fldgt_s(f0, a1, a2), "387418a0       fldgt.s     f0, a1, a2");
++  COMPARE(fldgt_d(f2, a3, a4), "3874a0e2       fldgt.d     f2, a3, a4");
++
++  COMPARE(fldle_s(f5, a6, a7), "38752d45       fldle.s     f5, a6, a7");
++  COMPARE(fldle_d(f8, t0, t1), "3875b588       fldle.d     f8, t0, t1");
++
++  COMPARE(fstgt_s(f11, t2, t3), "38763dcb       fstgt.s     f11, t2, t3");
++  COMPARE(fstgt_d(f14, t4, t5), "3876c60e       fstgt.d     f14, t4, t5");
++
++  COMPARE(fstle_s(f17, t6, a0), "38771251       fstle.s     f17, t6, a0");
++  COMPARE(fstle_d(f20, a1, a2), "387798b4       fstle.d     f20, a1, a2");
++
++  COMPARE(ldgt_b(a1, a2, a3), "38781cc5       ldgt.b     a1, a2, a3");
++  COMPARE(ldgt_h(a4, a5, a6), "3878a928       ldgt.h     a4, a5, a6");
++  COMPARE(ldgt_w(a7, t0, t1), "3879358b       ldgt.w     a7, t0, t1");
++  COMPARE(ldgt_d(t2, t3, t4), "3879c1ee       ldgt.d     t2, t3, t4");
++
++  COMPARE(ldle_b(t5, t6, a0), "387a1251       ldle.b     t5, t6, a0");
++  COMPARE(ldle_h(a1, a2, a3), "387a9cc5       ldle.h     a1, a2, a3");
++  COMPARE(ldle_w(a4, a5, a6), "387b2928       ldle.w     a4, a5, a6");
++  COMPARE(ldle_d(a7, t0, t1), "387bb58b       ldle.d     a7, t0, t1");
++
++  COMPARE(stgt_b(t2, t3, t4), "387c41ee       stgt.b     t2, t3, t4");
++  COMPARE(stgt_h(t5, t6, a0), "387c9251       stgt.h     t5, t6, a0");
++  COMPARE(stgt_w(a1, a2, a3), "387d1cc5       stgt.w     a1, a2, a3");
++  COMPARE(stgt_d(a4, a5, a6), "387da928       stgt.d     a4, a5, a6");
++
++  COMPARE(stle_b(a7, t0, t1), "387e358b       stle.b     a7, t0, t1");
++  COMPARE(stle_h(t2, t3, t4), "387ec1ee       stle.h     t2, t3, t4");
++  COMPARE(stle_w(t5, t6, a0), "387f1251       stle.w     t5, t6, a0");
++  COMPARE(stle_d(a1, a2, a3), "387f9cc5       stle.d     a1, a2, a3");
++
++  COMPARE(fscaleb_s(f0, f1, f2), "01108820       fscaleb.s     f0, f1, f2");
++  COMPARE(fscaleb_d(f3, f4, f5), "01111483       fscaleb.d     f3, f4, f5");
++
++  COMPARE(fcopysign_s(f6, f7, f8), "0112a0e6       fcopysign.s     f6, f7, f8");
++  COMPARE(fcopysign_d(f9, f10, f12),
++          "01133149       fcopysign.d     f9, f10, f12");
++
++  COMPARE(crc_w_b_w(a4, a5, a6), "00242928       crc.w.b.w     a4, a5, a6");
++  COMPARE(crc_w_h_w(a7, t0, t1), "0024b58b       crc.w.h.w     a7, t0, t1");
++  COMPARE(crc_w_w_w(t2, t3, t4), "002541ee       crc.w.w.w     t2, t3, t4");
++  COMPARE(crc_w_d_w(t5, t6, a0), "00259251       crc.w.d.w     t5, t6, a0");
++
++  COMPARE(crcc_w_b_w(a1, a2, a3), "00261cc5       crcc.w.b.w     a1, a2, a3");
++  COMPARE(crcc_w_h_w(a4, a5, a6), "0026a928       crcc.w.h.w     a4, a5, a6");
++  COMPARE(crcc_w_w_w(a7, t0, t1), "0027358b       crcc.w.w.w     a7, t0, t1");
++  COMPARE(crcc_w_d_w(t2, t3, t4), "0027c1ee       crcc.w.d.w     t2, t3, t4");
++
++  VERIFY_RUN();
++}
++
++TEST(TypeOp22) {
++  SET_UP();
++
++  COMPARE(clz_w(a3, a0), "00001487       clz.w     a3, a0");
++  COMPARE(ctz_w(a0, a1), "00001ca4       ctz.w     a0, a1");
++  COMPARE(clz_d(a2, a3), "000024e6       clz.d     a2, a3");
++  COMPARE(ctz_d(a4, a5), "00002d28       ctz.d     a4, a5");
++
++  COMPARE(clo_w(a0, a1), "000010a4       clo.w     a0, a1");
++  COMPARE(cto_w(a2, a3), "000018e6       cto.w     a2, a3");
++  COMPARE(clo_d(a4, a5), "00002128       clo.d     a4, a5");
++  COMPARE(cto_d(a6, a7), "0000296a       cto.d     a6, a7");
++
++  COMPARE(revb_2h(a6, a7), "0000316a       revb.2h     a6, a7");
++  COMPARE(revb_4h(t0, t1), "000035ac       revb.4h     t0, t1");
++  COMPARE(revb_2w(t2, t3), "000039ee       revb.2w     t2, t3");
++  COMPARE(revb_d(t4, t5), "00003e30       revb.d     t4, t5");
++
++  COMPARE(revh_2w(a0, a1), "000040a4       revh.2w     a0, a1");
++  COMPARE(revh_d(a2, a3), "000044e6       revh.d     a2, a3");
++
++  COMPARE(bitrev_4b(a4, a5), "00004928       bitrev.4b     a4, a5");
++  COMPARE(bitrev_8b(a6, a7), "00004d6a       bitrev.8b     a6, a7");
++  COMPARE(bitrev_w(t0, t1), "000051ac       bitrev.w     t0, t1");
++  COMPARE(bitrev_d(t2, t3), "000055ee       bitrev.d     t2, t3");
++
++  COMPARE(ext_w_b(t4, t5), "00005e30       ext.w.b     t4, t5");
++  COMPARE(ext_w_h(a0, a1), "000058a4       ext.w.h     a0, a1");
++
++  COMPARE(fabs_s(f2, f3), "01140462       fabs.s     f2, f3");
++  COMPARE(fabs_d(f0, f0), "01140800       fabs.d     f0, f0");
++
++  COMPARE(fneg_s(f0, f1), "01141420       fneg.s     f0, f1");
++  COMPARE(fneg_d(f0, f0), "01141800       fneg.d     f0, f0");
++
++  COMPARE(fsqrt_s(f4, f5), "011444a4       fsqrt.s     f4, f5");
++  COMPARE(fsqrt_d(f0, f0), "01144800       fsqrt.d     f0, f0");
++
++  COMPARE(fmov_s(f6, f7), "011494e6       fmov.s     f6, f7");
++  COMPARE(fmov_d(f0, f1), "01149820       fmov.d     f0, f1");
++  COMPARE(fmov_d(f1, f0), "01149801       fmov.d     f1, f0");
++
++  COMPARE(movgr2fr_d(f0, t6), "0114aa40       movgr2fr.d     f0, t6");
++  COMPARE(movgr2fr_d(f1, t6), "0114aa41       movgr2fr.d     f1, t6");
++
++  COMPARE(movgr2fr_w(f30, a3), "0114a4fe       movgr2fr.w     f30, a3");
++  COMPARE(movgr2fr_w(f30, a0), "0114a49e       movgr2fr.w     f30, a0");
++
++  COMPARE(movgr2frh_w(f30, t6), "0114ae5e       movgr2frh.w     f30, t6");
++  COMPARE(movgr2frh_w(f0, a3), "0114ace0       movgr2frh.w     f0, a3");
++
++  COMPARE(movfr2gr_s(a3, f30), "0114b7c7       movfr2gr.s     a3, f30");
++
++  COMPARE(movfr2gr_d(a6, f30), "0114bbca       movfr2gr.d     a6, f30");
++  COMPARE(movfr2gr_d(t7, f30), "0114bbd3       movfr2gr.d     t7, f30");
++
++  COMPARE(movfrh2gr_s(a5, f0), "0114bc09       movfrh2gr.s     a5, f0");
++  COMPARE(movfrh2gr_s(a4, f0), "0114bc08       movfrh2gr.s     a4, f0");
++
++  COMPARE(movgr2fcsr(a2), "0114c0c0       movgr2fcsr     fcsr, a2");
++  COMPARE(movfcsr2gr(a4), "0114c808       movfcsr2gr     a4, fcsr");
++
++  COMPARE(movfr2cf(FCC0, f0), "0114d000       movfr2cf     fcc0, f0");
++  COMPARE(movcf2fr(f1, FCC1), "0114d421       movcf2fr     f1, fcc1");
++
++  COMPARE(movgr2cf(FCC2, a0), "0114d882       movgr2cf     fcc2, a0");
++  COMPARE(movcf2gr(a1, FCC3), "0114dc65       movcf2gr     a1, fcc3");
++
++  COMPARE(fcvt_s_d(f0, f0), "01191800       fcvt.s.d     f0, f0");
++  COMPARE(fcvt_d_s(f0, f0), "01192400       fcvt.d.s     f0, f0");
++
++  COMPARE(ftintrm_w_s(f8, f9), "011a0528       ftintrm.w.s     f8, f9");
++  COMPARE(ftintrm_w_d(f10, f11), "011a096a       ftintrm.w.d     f10, f11");
++  COMPARE(ftintrm_l_s(f12, f13), "011a25ac       ftintrm.l.s     f12, f13");
++  COMPARE(ftintrm_l_d(f14, f15), "011a29ee       ftintrm.l.d     f14, f15");
++
++  COMPARE(ftintrp_w_s(f16, f17), "011a4630       ftintrp.w.s     f16, f17");
++  COMPARE(ftintrp_w_d(f18, f19), "011a4a72       ftintrp.w.d     f18, f19");
++  COMPARE(ftintrp_l_s(f20, f21), "011a66b4       ftintrp.l.s     f20, f21");
++  COMPARE(ftintrp_l_d(f0, f1), "011a6820       ftintrp.l.d     f0, f1");
++
++  COMPARE(ftintrz_w_s(f30, f4), "011a849e       ftintrz.w.s     f30, f4");
++  COMPARE(ftintrz_w_d(f30, f4), "011a889e       ftintrz.w.d     f30, f4");
++  COMPARE(ftintrz_l_s(f30, f0), "011aa41e       ftintrz.l.s     f30, f0");
++  COMPARE(ftintrz_l_d(f30, f30), "011aabde       ftintrz.l.d     f30, f30");
++
++  COMPARE(ftintrne_w_s(f2, f3), "011ac462       ftintrne.w.s     f2, f3");
++  COMPARE(ftintrne_w_d(f4, f5), "011ac8a4       ftintrne.w.d     f4, f5");
++  COMPARE(ftintrne_l_s(f6, f7), "011ae4e6       ftintrne.l.s     f6, f7");
++  COMPARE(ftintrne_l_d(f8, f9), "011ae928       ftintrne.l.d     f8, f9");
++
++  COMPARE(ftint_w_s(f10, f11), "011b056a       ftint.w.s     f10, f11");
++  COMPARE(ftint_w_d(f12, f13), "011b09ac       ftint.w.d     f12, f13");
++  COMPARE(ftint_l_s(f14, f15), "011b25ee       ftint.l.s     f14, f15");
++  COMPARE(ftint_l_d(f16, f17), "011b2a30       ftint.l.d     f16, f17");
++
++  COMPARE(ffint_s_w(f18, f19), "011d1272       ffint.s.w     f18, f19");
++  COMPARE(ffint_s_l(f20, f21), "011d1ab4       ffint.s.l     f20, f21");
++  COMPARE(ffint_d_w(f0, f1), "011d2020       ffint.d.w     f0, f1");
++  COMPARE(ffint_d_l(f2, f3), "011d2862       ffint.d.l     f2, f3");
++
++  COMPARE(frint_s(f4, f5), "011e44a4       frint.s     f4, f5");
++  COMPARE(frint_d(f6, f7), "011e48e6       frint.d     f6, f7");
++
++  COMPARE(frecip_s(f8, f9), "01145528       frecip.s     f8, f9");
++  COMPARE(frecip_d(f10, f11), "0114596a       frecip.d     f10, f11");
++
++  COMPARE(frsqrt_s(f12, f13), "011465ac       frsqrt.s     f12, f13");
++  COMPARE(frsqrt_d(f14, f15), "011469ee       frsqrt.d     f14, f15");
++
++  COMPARE(fclass_s(f16, f17), "01143630       fclass.s     f16, f17");
++  COMPARE(fclass_d(f18, f19), "01143a72       fclass.d     f18, f19");
++
++  COMPARE(flogb_s(f20, f21), "011426b4       flogb.s     f20, f21");
++  COMPARE(flogb_d(f0, f1), "01142820       flogb.d     f0, f1");
++
++  COMPARE(rdtimel_w(t0, t1), "000061ac       rdtimel.w     t0, t1");
++  COMPARE(rdtimeh_w(t2, t3), "000065ee       rdtimeh.w     t2, t3");
++  COMPARE(rdtime_d(t4, t5), "00006a30       rdtime.d     t4, t5");
++
++  VERIFY_RUN();
++}
++
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc
+new file mode 100644
+index 00000000000..ef536b862ba
+--- /dev/null
++++ b/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc
+@@ -0,0 +1,2894 @@
++// Copyright 2013 the V8 project authors. All rights reserved.
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++//     * Redistributions of source code must retain the above copyright
++//       notice, this list of conditions and the following disclaimer.
++//     * Redistributions in binary form must reproduce the above
++//       copyright notice, this list of conditions and the following
++//       disclaimer in the documentation and/or other materials provided
++//       with the distribution.
++//     * Neither the name of Google Inc. nor the names of its
++//       contributors may be used to endorse or promote products derived
++//       from this software without specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++#include <stdlib.h>
++
++#include <iostream>  // NOLINT(readability/streams)
++
++#include "src/base/utils/random-number-generator.h"
++#include "src/codegen/macro-assembler.h"
++#include "src/execution/simulator.h"
++#include "src/init/v8.h"
++#include "src/objects/heap-number.h"
++#include "src/objects/objects-inl.h"
++#include "src/utils/ostreams.h"
++#include "test/cctest/cctest.h"
++
++namespace v8 {
++namespace internal {
++
++// TODO(mips64): Refine these signatures per test case.
++using FV = void*(int64_t x, int64_t y, int p2, int p3, int p4);
++using F1 = void*(int x, int p1, int p2, int p3, int p4);
++using F2 = void*(int x, int y, int p2, int p3, int p4);
++using F3 = void*(void* p, int p1, int p2, int p3, int p4);
++using F4 = void*(void* p0, void* p1, int p2, int p3, int p4);
++
++#define __ masm->
++
++TEST(BYTESWAP) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  struct T {
++    uint64_t s8;
++    uint64_t s4;
++    uint64_t s2;
++    uint64_t u4;
++    uint64_t u2;
++  };
++
++  T t;
++  // clang-format off
++  uint64_t test_values[] = {0x5612FFCD9D327ACC,
++                            0x781A15C3,
++                            0xFCDE,
++                            0x9F,
++                            0xC81A15C3,
++                            0x8000000000000000,
++                            0xFFFFFFFFFFFFFFFF,
++                            0x0000000080000000,
++                            0x0000000000008000};
++  // clang-format on
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++
++  MacroAssembler* masm = &assembler;
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, s8)));
++  __ ByteSwapSigned(a4, a4, 8);
++  __ St_d(a4, MemOperand(a0, offsetof(T, s8)));
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, s4)));
++  __ ByteSwapSigned(a4, a4, 4);
++  __ St_d(a4, MemOperand(a0, offsetof(T, s4)));
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, s2)));
++  __ ByteSwapSigned(a4, a4, 2);
++  __ St_d(a4, MemOperand(a0, offsetof(T, s2)));
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, u4)));
++  __ ByteSwapSigned(a4, a4, 4);
++  __ St_d(a4, MemOperand(a0, offsetof(T, u4)));
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, u2)));
++  __ ByteSwapSigned(a4, a4, 2);
++  __ St_d(a4, MemOperand(a0, offsetof(T, u2)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++
++  for (size_t i = 0; i < arraysize(test_values); i++) {
++    int32_t in_s4 = static_cast<int32_t>(test_values[i]);
++    int16_t in_s2 = static_cast<int16_t>(test_values[i]);
++    uint32_t in_u4 = static_cast<uint32_t>(test_values[i]);
++    uint16_t in_u2 = static_cast<uint16_t>(test_values[i]);
++
++    t.s8 = test_values[i];
++    t.s4 = static_cast<uint64_t>(in_s4);
++    t.s2 = static_cast<uint64_t>(in_s2);
++    t.u4 = static_cast<uint64_t>(in_u4);
++    t.u2 = static_cast<uint64_t>(in_u2);
++
++    f.Call(&t, 0, 0, 0, 0);
++
++    CHECK_EQ(ByteReverse<uint64_t>(test_values[i]), t.s8);
++    CHECK_EQ(ByteReverse<int32_t>(in_s4), static_cast<int32_t>(t.s4));
++    CHECK_EQ(ByteReverse<int16_t>(in_s2), static_cast<int16_t>(t.s2));
++    CHECK_EQ(ByteReverse<uint32_t>(in_u4), static_cast<uint32_t>(t.u4));
++    CHECK_EQ(ByteReverse<uint16_t>(in_u2), static_cast<uint16_t>(t.u2));
++  }
++}
++
++TEST(LoadConstants) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope handles(isolate);
++
++  int64_t refConstants[64];
++  int64_t result[64];
++
++  int64_t mask = 1;
++  for (int i = 0; i < 64; i++) {
++    refConstants[i] = ~(mask << i);
++  }
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  __ or_(a4, a0, zero_reg);
++  for (int i = 0; i < 64; i++) {
++    // Load constant.
++    __ li(a5, Operand(refConstants[i]));
++    __ St_d(a5, MemOperand(a4, zero_reg));
++    __ Add_d(a4, a4, Operand(kPointerSize));
++  }
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<FV>::FromCode(*code);
++  (void)f.Call(reinterpret_cast<int64_t>(result), 0, 0, 0, 0);
++  // Check results.
++  for (int i = 0; i < 64; i++) {
++    CHECK(refConstants[i] == result[i]);
++  }
++}
++
++TEST(LoadAddress) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope handles(isolate);
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++  Label to_jump, skip;
++  __ mov(a4, a0);
++
++  __ Branch(&skip);
++  __ bind(&to_jump);
++  __ nop();
++  __ nop();
++  __ jirl(zero_reg, ra, 0);
++  __ bind(&skip);
++  __ li(a4, Operand(masm->jump_address(&to_jump)), ADDRESS_LOAD);
++  int check_size = masm->InstructionsGeneratedSince(&skip);
++  CHECK_EQ(3, check_size);
++  __ jirl(zero_reg, a4, 0);
++  __ stop();
++  __ stop();
++  __ stop();
++  __ stop();
++  __ stop();
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<FV>::FromCode(*code);
++  (void)f.Call(0, 0, 0, 0, 0);
++  // Check results.
++}
++
++TEST(jump_tables4) {
++  // Similar to test-assembler-mips jump_tables1, with extra test for branch
++  // trampoline required before emission of the dd table (where trampolines are
++  // blocked), and proper transition to long-branch mode.
++  // Regression test for v8:4294.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  const int kNumCases = 512;
++  int values[kNumCases];
++  isolate->random_number_generator()->NextBytes(values, sizeof(values));
++  Label labels[kNumCases];
++  Label near_start, end, done;
++
++  __ Push(ra);
++  __ xor_(a2, a2, a2);
++
++  __ Branch(&end);
++  __ bind(&near_start);
++
++  for (int i = 0; i < 32768 - 256; ++i) {
++    __ Add_d(a2, a2, 1);
++  }
++
++  __ GenerateSwitchTable(a0, kNumCases,
++                         [&labels](size_t i) { return labels + i; });
++
++  for (int i = 0; i < kNumCases; ++i) {
++    __ bind(&labels[i]);
++    __ li(a2, values[i]);
++    __ Branch(&done);
++  }
++
++  __ bind(&done);
++  __ Pop(ra);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  __ bind(&end);
++  __ Branch(&near_start);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  for (int i = 0; i < kNumCases; ++i) {
++    int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
++    ::printf("f(%d) = %" PRId64 "\n", i, res);
++    CHECK_EQ(values[i], res);
++  }
++}
++
++TEST(jump_tables6) {
++  // Similar to test-assembler-mips jump_tables1, with extra test for branch
++  // trampoline required after emission of the dd table (where trampolines are
++  // blocked). This test checks if number of really generated instructions is
++  // greater than number of counted instructions from code, as we are expecting
++  // generation of trampoline in this case
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  const int kSwitchTableCases = 40;
++
++  const int kMaxBranchOffset = (1 << (18 - 1)) - 1;
++  const int kTrampolineSlotsSize = Assembler::kTrampolineSlotsSize;
++  const int kSwitchTablePrologueSize = MacroAssembler::kSwitchTablePrologueSize;
++
++  const int kMaxOffsetForTrampolineStart =
++      kMaxBranchOffset - 16 * kTrampolineSlotsSize;
++  const int kFillInstr = (kMaxOffsetForTrampolineStart / kInstrSize) -
++                         (kSwitchTablePrologueSize + 2 * kSwitchTableCases) -
++                         20;
++
++  int values[kSwitchTableCases];
++  isolate->random_number_generator()->NextBytes(values, sizeof(values));
++  Label labels[kSwitchTableCases];
++  Label near_start, end, done;
++
++  __ Push(ra);
++  __ xor_(a2, a2, a2);
++
++  int offs1 = masm->pc_offset();
++  int gen_insn = 0;
++
++  __ Branch(&end);
++  gen_insn += 1;
++  __ bind(&near_start);
++
++  for (int i = 0; i < kFillInstr; ++i) {
++    __ Add_d(a2, a2, 1);
++  }
++  gen_insn += kFillInstr;
++
++  __ GenerateSwitchTable(a0, kSwitchTableCases,
++                         [&labels](size_t i) { return labels + i; });
++  gen_insn += (kSwitchTablePrologueSize + 2 * kSwitchTableCases);
++
++  for (int i = 0; i < kSwitchTableCases; ++i) {
++    __ bind(&labels[i]);
++    __ li(a2, values[i]);
++    __ Branch(&done);
++  }
++  gen_insn += 3 * kSwitchTableCases;
++
++  // If offset from here to first branch instr is greater than max allowed
++  // offset for trampoline ...
++  CHECK_LT(kMaxOffsetForTrampolineStart, masm->pc_offset() - offs1);
++  // ... number of generated instructions must be greater then "gen_insn",
++  // as we are expecting trampoline generation
++  CHECK_LT(gen_insn, (masm->pc_offset() - offs1) / kInstrSize);
++
++  __ bind(&done);
++  __ Pop(ra);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  __ bind(&end);
++  __ Branch(&near_start);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F1>::FromCode(*code);
++  for (int i = 0; i < kSwitchTableCases; ++i) {
++    int64_t res = reinterpret_cast<int64_t>(f.Call(i, 0, 0, 0, 0));
++    ::printf("f(%d) = %" PRId64 "\n", i, res);
++    CHECK_EQ(values[i], res);
++  }
++}
++
++static uint64_t run_alsl_w(uint32_t rj, uint32_t rk, int8_t sa) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  __ Alsl_w(a2, a0, a1, sa);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assembler.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F1>::FromCode(*code);
++
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(rj, rk, 0, 0, 0));
++
++  return res;
++}
++
++TEST(ALSL_W) {
++  CcTest::InitializeVM();
++  struct TestCaseAlsl {
++    int32_t rj;
++    int32_t rk;
++    uint8_t sa;
++    uint64_t expected_res;
++  };
++  // clang-format off
++  struct TestCaseAlsl tc[] = {// rj, rk, sa, expected_res
++                             {0x1, 0x4, 1, 0x6},
++                             {0x1, 0x4, 2, 0x8},
++                             {0x1, 0x4, 3, 0xC},
++                             {0x1, 0x4, 4, 0x14},
++                             {0x1, 0x4, 5, 0x24},
++                             {0x1, 0x0, 1, 0x2},
++                             {0x1, 0x0, 2, 0x4},
++                             {0x1, 0x0, 3, 0x8},
++                             {0x1, 0x0, 4, 0x10},
++                             {0x1, 0x0, 5, 0x20},
++                             {0x0, 0x4, 1, 0x4},
++                             {0x0, 0x4, 2, 0x4},
++                             {0x0, 0x4, 3, 0x4},
++                             {0x0, 0x4, 4, 0x4},
++                             {0x0, 0x4, 5, 0x4},
++
++                             // Shift overflow.
++                             {INT32_MAX, 0x4, 1, 0x2},
++                             {INT32_MAX >> 1, 0x4, 2, 0x0},
++                             {INT32_MAX >> 2, 0x4, 3, 0xFFFFFFFFFFFFFFFC},
++                             {INT32_MAX >> 3, 0x4, 4, 0xFFFFFFFFFFFFFFF4},
++                             {INT32_MAX >> 4, 0x4, 5, 0xFFFFFFFFFFFFFFE4},
++
++                             // Signed addition overflow.
++                             {0x1, INT32_MAX - 1, 1, 0xFFFFFFFF80000000},
++                             {0x1, INT32_MAX - 3, 2, 0xFFFFFFFF80000000},
++                             {0x1, INT32_MAX - 7, 3, 0xFFFFFFFF80000000},
++                             {0x1, INT32_MAX - 15, 4, 0xFFFFFFFF80000000},
++                             {0x1, INT32_MAX - 31, 5, 0xFFFFFFFF80000000},
++
++                             // Addition overflow.
++                             {0x1, -2, 1, 0x0},
++                             {0x1, -4, 2, 0x0},
++                             {0x1, -8, 3, 0x0},
++                             {0x1, -16, 4, 0x0},
++                             {0x1, -32, 5, 0x0}};
++  // clang-format on
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseAlsl);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_alsl_w(tc[i].rj, tc[i].rk, tc[i].sa);
++    PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Alsl_w(a0, %x, %x, %hhu)\n",
++           tc[i].expected_res, res, tc[i].rj, tc[i].rk, tc[i].sa);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++
++static uint64_t run_alsl_d(uint64_t rj, uint64_t rk, int8_t sa) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  __ Alsl_d(a2, a0, a1, sa);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assembler.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<FV>::FromCode(*code);
++
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(rj, rk, 0, 0, 0));
++
++  return res;
++}
++
++TEST(ALSL_D) {
++  CcTest::InitializeVM();
++  struct TestCaseAlsl {
++    int64_t rj;
++    int64_t rk;
++    uint8_t sa;
++    uint64_t expected_res;
++  };
++  // clang-format off
++  struct TestCaseAlsl tc[] = {// rj, rk, sa, expected_res
++                             {0x1, 0x4, 1, 0x6},
++                             {0x1, 0x4, 2, 0x8},
++                             {0x1, 0x4, 3, 0xC},
++                             {0x1, 0x4, 4, 0x14},
++                             {0x1, 0x4, 5, 0x24},
++                             {0x1, 0x0, 1, 0x2},
++                             {0x1, 0x0, 2, 0x4},
++                             {0x1, 0x0, 3, 0x8},
++                             {0x1, 0x0, 4, 0x10},
++                             {0x1, 0x0, 5, 0x20},
++                             {0x0, 0x4, 1, 0x4},
++                             {0x0, 0x4, 2, 0x4},
++                             {0x0, 0x4, 3, 0x4},
++                             {0x0, 0x4, 4, 0x4},
++                             {0x0, 0x4, 5, 0x4},
++
++                             // Shift overflow.
++                             {INT64_MAX, 0x4, 1, 0x2},
++                             {INT64_MAX >> 1, 0x4, 2, 0x0},
++                             {INT64_MAX >> 2, 0x4, 3, 0xFFFFFFFFFFFFFFFC},
++                             {INT64_MAX >> 3, 0x4, 4, 0xFFFFFFFFFFFFFFF4},
++                             {INT64_MAX >> 4, 0x4, 5, 0xFFFFFFFFFFFFFFE4},
++
++                             // Signed addition overflow.
++                             {0x1, INT64_MAX - 1, 1, 0x8000000000000000},
++                             {0x1, INT64_MAX - 3, 2, 0x8000000000000000},
++                             {0x1, INT64_MAX - 7, 3, 0x8000000000000000},
++                             {0x1, INT64_MAX - 15, 4, 0x8000000000000000},
++                             {0x1, INT64_MAX - 31, 5, 0x8000000000000000},
++
++                             // Addition overflow.
++                             {0x1, -2, 1, 0x0},
++                             {0x1, -4, 2, 0x0},
++                             {0x1, -8, 3, 0x0},
++                             {0x1, -16, 4, 0x0},
++                             {0x1, -32, 5, 0x0}};
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseAlsl);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t res = run_alsl_d(tc[i].rj, tc[i].rk, tc[i].sa);
++    PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Dlsa(v0, %" PRIx64 ", %" PRIx64
++           ", %hhu)\n",
++           tc[i].expected_res, res, tc[i].rj, tc[i].rk, tc[i].sa);
++    CHECK_EQ(tc[i].expected_res, res);
++  }
++}
++// clang-format off
++static const std::vector<uint32_t> ffint_ftintrz_uint32_test_values() {
++  static const uint32_t kValues[] = {0x00000000, 0x00000001, 0x00FFFF00,
++                                     0x7FFFFFFF, 0x80000000, 0x80000001,
++                                     0x80FFFF00, 0x8FFFFFFF, 0xFFFFFFFF};
++  return std::vector<uint32_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++static const std::vector<int32_t> ffint_ftintrz_int32_test_values() {
++  static const int32_t kValues[] = {
++      static_cast<int32_t>(0x00000000), static_cast<int32_t>(0x00000001),
++      static_cast<int32_t>(0x00FFFF00), static_cast<int32_t>(0x7FFFFFFF),
++      static_cast<int32_t>(0x80000000), static_cast<int32_t>(0x80000001),
++      static_cast<int32_t>(0x80FFFF00), static_cast<int32_t>(0x8FFFFFFF),
++      static_cast<int32_t>(0xFFFFFFFF)};
++  return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++static const std::vector<uint64_t> ffint_ftintrz_uint64_test_values() {
++  static const uint64_t kValues[] = {
++      0x0000000000000000, 0x0000000000000001, 0x0000FFFFFFFF0000,
++      0x7FFFFFFFFFFFFFFF, 0x8000000000000000, 0x8000000000000001,
++      0x8000FFFFFFFF0000, 0x8FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF};
++  return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++static const std::vector<int64_t> ffint_ftintrz_int64_test_values() {
++  static const int64_t kValues[] = {static_cast<int64_t>(0x0000000000000000),
++                                    static_cast<int64_t>(0x0000000000000001),
++                                    static_cast<int64_t>(0x0000FFFFFFFF0000),
++                                    static_cast<int64_t>(0x7FFFFFFFFFFFFFFF),
++                                    static_cast<int64_t>(0x8000000000000000),
++                                    static_cast<int64_t>(0x8000000000000001),
++                                    static_cast<int64_t>(0x8000FFFFFFFF0000),
++                                    static_cast<int64_t>(0x8FFFFFFFFFFFFFFF),
++                                    static_cast<int64_t>(0xFFFFFFFFFFFFFFFF)};
++  return std::vector<int64_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++  // clang-off on
++
++// Helper macros that can be used in FOR_INT32_INPUTS(i) { ... *i ... }
++#define FOR_INPUTS(ctype, itype, var, test_vector)           \
++  std::vector<ctype> var##_vec = test_vector();              \
++  for (std::vector<ctype>::iterator var = var##_vec.begin(); \
++       var != var##_vec.end(); ++var)
++
++#define FOR_INPUTS2(ctype, itype, var, var2, test_vector)  \
++  std::vector<ctype> var##_vec = test_vector();            \
++  std::vector<ctype>::iterator var;                        \
++  std::vector<ctype>::reverse_iterator var2;               \
++  for (var = var##_vec.begin(), var2 = var##_vec.rbegin(); \
++       var != var##_vec.end(); ++var, ++var2)
++
++#define FOR_ENUM_INPUTS(var, type, test_vector) \
++  FOR_INPUTS(enum type, type, var, test_vector)
++#define FOR_STRUCT_INPUTS(var, type, test_vector) \
++  FOR_INPUTS(struct type, type, var, test_vector)
++#define FOR_INT32_INPUTS(var, test_vector) \
++  FOR_INPUTS(int32_t, int32, var, test_vector)
++#define FOR_INT32_INPUTS2(var, var2, test_vector) \
++  FOR_INPUTS2(int32_t, int32, var, var2, test_vector)
++#define FOR_INT64_INPUTS(var, test_vector) \
++  FOR_INPUTS(int64_t, int64, var, test_vector)
++#define FOR_UINT32_INPUTS(var, test_vector) \
++  FOR_INPUTS(uint32_t, uint32, var, test_vector)
++#define FOR_UINT64_INPUTS(var, test_vector) \
++  FOR_INPUTS(uint64_t, uint64, var, test_vector)
++
++template <typename RET_TYPE, typename IN_TYPE, typename Func>
++RET_TYPE run_CVT(IN_TYPE x, Func GenerateConvertInstructionFunc) {
++  using F_CVT = RET_TYPE(IN_TYPE x0, int x1, int x2, int x3, int x4);
++
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assm;
++
++  GenerateConvertInstructionFunc(masm);
++  __ movfr2gr_d(a2, f9);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F_CVT>::FromCode(*code);
++
++  return reinterpret_cast<RET_TYPE>(f.Call(x, 0, 0, 0, 0));
++}
++
++TEST(Ffint_s_uw_Ftintrz_uw_s) {
++  CcTest::InitializeVM();
++  FOR_UINT32_INPUTS(i, ffint_ftintrz_uint32_test_values) {
++
++    uint32_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ Ffint_s_uw(f8, a0);
++      __ movgr2frh_w(f9, zero_reg);
++      __ Ftintrz_uw_s(f9, f8, f10);
++    };
++    CHECK_EQ(static_cast<float>(input), run_CVT<uint64_t>(input, fn));
++  }
++}
++
++TEST(Ffint_s_ul_Ftintrz_ul_s) {
++  CcTest::InitializeVM();
++  FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) {
++    uint64_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ Ffint_s_ul(f8, a0);
++      __ Ftintrz_ul_s(f9, f8, f10, a2);
++    };
++    CHECK_EQ(static_cast<float>(input), run_CVT<uint64_t>(input, fn));
++  }
++}
++
++TEST(Ffint_d_uw_Ftintrz_uw_d) {
++  CcTest::InitializeVM();
++  FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) {
++    uint32_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ Ffint_d_uw(f8, a0);
++      __ movgr2frh_w(f9, zero_reg);
++      __ Ftintrz_uw_d(f9, f8, f10);
++    };
++    CHECK_EQ(static_cast<double>(input), run_CVT<uint64_t>(input, fn));
++  }
++}
++
++TEST(Ffint_d_ul_Ftintrz_ul_d) {
++  CcTest::InitializeVM();
++  FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) {
++    uint64_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ Ffint_d_ul(f8, a0);
++      __ Ftintrz_ul_d(f9, f8, f10, a2);
++    };
++    CHECK_EQ(static_cast<double>(input), run_CVT<uint64_t>(input, fn));
++  }
++}
++
++TEST(Ffint_d_l_Ftintrz_l_ud) {
++  CcTest::InitializeVM();
++  FOR_INT64_INPUTS(i, ffint_ftintrz_int64_test_values) {
++    int64_t input = *i;
++    uint64_t abs_input = (input < 0) ? -input : input;
++    auto fn = [](MacroAssembler* masm) {
++      __ movgr2fr_d(f8, a0);
++      __ ffint_d_l(f10, f8);
++      __ Ftintrz_l_ud(f9, f10, f11);
++    };
++    CHECK_EQ(static_cast<double>(abs_input), run_CVT<uint64_t>(input, fn));
++  }
++}
++
++TEST(ffint_d_l_Ftint_l_d) {
++  CcTest::InitializeVM();
++  FOR_INT64_INPUTS(i, ffint_ftintrz_int64_test_values) {
++    int64_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ movgr2fr_d(f8, a0);
++      __ ffint_d_l(f10, f8);
++      __ Ftintrz_l_d(f9, f10);
++    };
++    CHECK_EQ(static_cast<double>(input), run_CVT<int64_t>(input, fn));
++  }
++}
++
++TEST(ffint_d_w_Ftint_w_d) {
++  CcTest::InitializeVM();
++  FOR_INT32_INPUTS(i, ffint_ftintrz_int32_test_values) {
++    int32_t input = *i;
++    auto fn = [](MacroAssembler* masm) {
++      __ movgr2fr_w(f8, a0);
++      __ ffint_d_w(f10, f8);
++      __ Ftintrz_w_d(f9, f10);
++      __ movfr2gr_s(a4, f9);
++      __ movgr2fr_d(f9, a4);
++    };
++    CHECK_EQ(static_cast<double>(input), run_CVT<int64_t>(input, fn));
++  }
++}
++
++
++static const std::vector<int64_t> overflow_int64_test_values() {
++  // clang-format off
++  static const int64_t kValues[] = {static_cast<int64_t>(0xF000000000000000),
++                                    static_cast<int64_t>(0x0000000000000001),
++                                    static_cast<int64_t>(0xFF00000000000000),
++                                    static_cast<int64_t>(0x0000F00111111110),
++                                    static_cast<int64_t>(0x0F00001000000000),
++                                    static_cast<int64_t>(0x991234AB12A96731),
++                                    static_cast<int64_t>(0xB0FFFF0F0F0F0F01),
++                                    static_cast<int64_t>(0x00006FFFFFFFFFFF),
++                                    static_cast<int64_t>(0xFFFFFFFFFFFFFFFF)};
++  // clang-format on
++  return std::vector<int64_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++TEST(OverflowInstructions) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope handles(isolate);
++
++  struct T {
++    int64_t lhs;
++    int64_t rhs;
++    int64_t output_add1;
++    int64_t output_add2;
++    int64_t output_sub1;
++    int64_t output_sub2;
++    int64_t output_mul1;
++    int64_t output_mul2;
++    int64_t overflow_add1;
++    int64_t overflow_add2;
++    int64_t overflow_sub1;
++    int64_t overflow_sub2;
++    int64_t overflow_mul1;
++    int64_t overflow_mul2;
++  };
++  T t;
++
++  FOR_INT64_INPUTS(i, overflow_int64_test_values) {
++    FOR_INT64_INPUTS(j, overflow_int64_test_values) {
++      int64_t ii = *i;
++      int64_t jj = *j;
++      int64_t expected_add, expected_sub;
++      int32_t ii32 = static_cast<int32_t>(ii);
++      int32_t jj32 = static_cast<int32_t>(jj);
++      int32_t expected_mul;
++      int64_t expected_add_ovf, expected_sub_ovf, expected_mul_ovf;
++      MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++      MacroAssembler* masm = &assembler;
++
++      __ ld_d(t0, a0, offsetof(T, lhs));
++      __ ld_d(t1, a0, offsetof(T, rhs));
++
++      __ AdddOverflow(t2, t0, Operand(t1), t3);
++      __ st_d(t2, a0, offsetof(T, output_add1));
++      __ st_d(t3, a0, offsetof(T, overflow_add1));
++      __ or_(t3, zero_reg, zero_reg);
++      __ AdddOverflow(t0, t0, Operand(t1), t3);
++      __ st_d(t0, a0, offsetof(T, output_add2));
++      __ st_d(t3, a0, offsetof(T, overflow_add2));
++
++      __ ld_d(t0, a0, offsetof(T, lhs));
++      __ ld_d(t1, a0, offsetof(T, rhs));
++
++      __ SubdOverflow(t2, t0, Operand(t1), t3);
++      __ st_d(t2, a0, offsetof(T, output_sub1));
++      __ st_d(t3, a0, offsetof(T, overflow_sub1));
++      __ or_(t3, zero_reg, zero_reg);
++      __ SubdOverflow(t0, t0, Operand(t1), t3);
++      __ st_d(t0, a0, offsetof(T, output_sub2));
++      __ st_d(t3, a0, offsetof(T, overflow_sub2));
++
++      __ ld_d(t0, a0, offsetof(T, lhs));
++      __ ld_d(t1, a0, offsetof(T, rhs));
++      __ slli_w(t0, t0, 0);
++      __ slli_w(t1, t1, 0);
++
++      __ MulOverflow(t2, t0, Operand(t1), t3);
++      __ st_d(t2, a0, offsetof(T, output_mul1));
++      __ st_d(t3, a0, offsetof(T, overflow_mul1));
++      __ or_(t3, zero_reg, zero_reg);
++      __ MulOverflow(t0, t0, Operand(t1), t3);
++      __ st_d(t0, a0, offsetof(T, output_mul2));
++      __ st_d(t3, a0, offsetof(T, overflow_mul2));
++
++      __ jirl(zero_reg, ra, 0);
++
++      CodeDesc desc;
++      masm->GetCode(isolate, &desc);
++      Handle<Code> code =
++          Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++      auto f = GeneratedCode<F3>::FromCode(*code);
++      t.lhs = ii;
++      t.rhs = jj;
++      f.Call(&t, 0, 0, 0, 0);
++
++      expected_add_ovf = base::bits::SignedAddOverflow64(ii, jj, &expected_add);
++      expected_sub_ovf = base::bits::SignedSubOverflow64(ii, jj, &expected_sub);
++      expected_mul_ovf =
++          base::bits::SignedMulOverflow32(ii32, jj32, &expected_mul);
++
++      CHECK_EQ(expected_add_ovf, t.overflow_add1 < 0);
++      CHECK_EQ(expected_sub_ovf, t.overflow_sub1 < 0);
++      CHECK_EQ(expected_mul_ovf, t.overflow_mul1 != 0);
++
++      CHECK_EQ(t.overflow_add1, t.overflow_add2);
++      CHECK_EQ(t.overflow_sub1, t.overflow_sub2);
++      CHECK_EQ(t.overflow_mul1, t.overflow_mul2);
++
++      CHECK_EQ(expected_add, t.output_add1);
++      CHECK_EQ(expected_add, t.output_add2);
++      CHECK_EQ(expected_sub, t.output_sub1);
++      CHECK_EQ(expected_sub, t.output_sub2);
++      if (!expected_mul_ovf) {
++        CHECK_EQ(expected_mul, t.output_mul1);
++        CHECK_EQ(expected_mul, t.output_mul2);
++      }
++    }
++  }
++}
++
++TEST(min_max_nan) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct TestFloat {
++    double a;
++    double b;
++    double c;
++    double d;
++    float e;
++    float f;
++    float g;
++    float h;
++  };
++
++  TestFloat test;
++  const double dnan = std::numeric_limits<double>::quiet_NaN();
++  const double dinf = std::numeric_limits<double>::infinity();
++  const double dminf = -std::numeric_limits<double>::infinity();
++  const float fnan = std::numeric_limits<float>::quiet_NaN();
++  const float finf = std::numeric_limits<float>::infinity();
++  const float fminf = -std::numeric_limits<float>::infinity();
++  const int kTableLength = 13;
++
++  // clang-format off
++  double inputsa[kTableLength] = {dnan,  3.0,  -0.0, 0.0,  42.0, dinf, dminf,
++                                  dinf, dnan, 3.0,  dinf, dnan, dnan};
++  double inputsb[kTableLength] = {dnan,   2.0, 0.0,  -0.0, dinf, 42.0, dinf,
++                                  dminf, 3.0, dnan, dnan, dinf, dnan};
++  double outputsdmin[kTableLength] = {dnan,  2.0,   -0.0,  -0.0, 42.0,
++                                      42.0, dminf, dminf, dnan, dnan,
++                                      dnan, dnan,  dnan};
++  double outputsdmax[kTableLength] = {dnan,  3.0,  0.0,  0.0,  dinf, dinf, dinf,
++                                      dinf, dnan, dnan, dnan, dnan, dnan};
++
++  float inputse[kTableLength] = {2.0,  3.0,  -0.0, 0.0,  42.0, finf, fminf,
++                                 finf, fnan, 3.0,  finf, fnan, fnan};
++  float inputsf[kTableLength] = {3.0,   2.0, 0.0,  -0.0, finf, 42.0, finf,
++                                 fminf, 3.0, fnan, fnan, finf, fnan};
++  float outputsfmin[kTableLength] = {2.0,   2.0,  -0.0, -0.0, 42.0, 42.0, fminf,
++                                     fminf, fnan, fnan, fnan, fnan, fnan};
++  float outputsfmax[kTableLength] = {3.0,  3.0,  0.0,  0.0,  finf, finf, finf,
++                                     finf, fnan, fnan, fnan, fnan, fnan};
++
++  // clang-format on
++  auto handle_dnan = [masm](FPURegister dst, Label* nan, Label* back) {
++    __ bind(nan);
++    __ LoadRoot(t8, RootIndex::kNanValue);
++    __ Fld_d(dst, FieldMemOperand(t8, HeapNumber::kValueOffset));
++    __ Branch(back);
++  };
++
++  auto handle_snan = [masm, fnan](FPURegister dst, Label* nan, Label* back) {
++    __ bind(nan);
++    __ Move(dst, fnan);
++    __ Branch(back);
++  };
++
++  Label handle_mind_nan, handle_maxd_nan, handle_mins_nan, handle_maxs_nan;
++  Label back_mind_nan, back_maxd_nan, back_mins_nan, back_maxs_nan;
++
++  __ push(s6);
++  __ InitializeRootRegister();
++  __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a)));
++  __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b)));
++  __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, e)));
++  __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, f)));
++  __ Float64Min(f12, f8, f9, &handle_mind_nan);
++  __ bind(&back_mind_nan);
++  __ Float64Max(f13, f8, f9, &handle_maxd_nan);
++  __ bind(&back_maxd_nan);
++  __ Float32Min(f14, f10, f11, &handle_mins_nan);
++  __ bind(&back_mins_nan);
++  __ Float32Max(f15, f10, f11, &handle_maxs_nan);
++  __ bind(&back_maxs_nan);
++  __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, c)));
++  __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, d)));
++  __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, g)));
++  __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, h)));
++  __ pop(s6);
++  __ jirl(zero_reg, ra, 0);
++
++  handle_dnan(f12, &handle_mind_nan, &back_mind_nan);
++  handle_dnan(f13, &handle_maxd_nan, &back_maxd_nan);
++  handle_snan(f14, &handle_mins_nan, &back_mins_nan);
++  handle_snan(f15, &handle_maxs_nan, &back_maxs_nan);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputsa[i];
++    test.b = inputsb[i];
++    test.e = inputse[i];
++    test.f = inputsf[i];
++
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(0, memcmp(&test.c, &outputsdmin[i], sizeof(test.c)));
++    CHECK_EQ(0, memcmp(&test.d, &outputsdmax[i], sizeof(test.d)));
++    CHECK_EQ(0, memcmp(&test.g, &outputsfmin[i], sizeof(test.g)));
++    CHECK_EQ(0, memcmp(&test.h, &outputsfmax[i], sizeof(test.h)));
++  }
++}
++
++template <typename IN_TYPE, typename Func>
++bool run_Unaligned(char* memory_buffer, int32_t in_offset, int32_t out_offset,
++                   IN_TYPE value, Func GenerateUnalignedInstructionFunc) {
++  using F_CVT = int32_t(char* x0, int x1, int x2, int x3, int x4);
++
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assm;
++  IN_TYPE res;
++
++  GenerateUnalignedInstructionFunc(masm, in_offset, out_offset);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F_CVT>::FromCode(*code);
++
++  MemCopy(memory_buffer + in_offset, &value, sizeof(IN_TYPE));
++  f.Call(memory_buffer, 0, 0, 0, 0);
++  MemCopy(&res, memory_buffer + out_offset, sizeof(IN_TYPE));
++
++  return res == value;
++}
++
++static const std::vector<uint64_t> unsigned_test_values() {
++  // clang-format off
++  static const uint64_t kValues[] = {
++      0x2180F18A06384414, 0x000A714532102277, 0xBC1ACCCF180649F0,
++      0x8000000080008000, 0x0000000000000001, 0xFFFFFFFFFFFFFFFF,
++  };
++  // clang-format on
++  return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++static const std::vector<int32_t> unsigned_test_offset() {
++  static const int32_t kValues[] = {// value, offset
++                                    -132 * KB, -21 * KB, 0, 19 * KB, 135 * KB};
++  return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++static const std::vector<int32_t> unsigned_test_offset_increment() {
++  static const int32_t kValues[] = {-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5};
++  return std::vector<int32_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++TEST(Ld_b) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_b(a2, MemOperand(a0, in_offset));
++          __ St_b(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint8_t>(buffer_middle, in_offset,
++                                              out_offset, value, fn_1));
++
++        auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_b(a0, MemOperand(a0, in_offset));
++          __ St_b(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint8_t>(buffer_middle, in_offset,
++                                              out_offset, value, fn_2));
++
++        auto fn_3 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_bu(a0, MemOperand(a0, in_offset));
++          __ St_b(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint8_t>(buffer_middle, in_offset,
++                                              out_offset, value, fn_3));
++
++        auto fn_4 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_bu(a2, MemOperand(a0, in_offset));
++          __ St_b(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint8_t>(buffer_middle, in_offset,
++                                              out_offset, value, fn_4));
++      }
++    }
++  }
++}
++
++TEST(Ld_b_bitextension) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn = [](MacroAssembler* masm, int32_t in_offset,
++                     int32_t out_offset) {
++          Label success, fail, end, different;
++          __ Ld_b(t0, MemOperand(a0, in_offset));
++          __ Ld_bu(t1, MemOperand(a0, in_offset));
++          __ Branch(&different, ne, t0, Operand(t1));
++
++          // If signed and unsigned values are same, check
++          // the upper bits to see if they are zero
++          __ srai_w(t0, t0, 7);
++          __ Branch(&success, eq, t0, Operand(zero_reg));
++          __ Branch(&fail);
++
++          // If signed and unsigned values are different,
++          // check that the upper bits are complementary
++          __ bind(&different);
++          __ srai_w(t1, t1, 7);
++          __ Branch(&fail, ne, t1, Operand(1));
++          __ srai_w(t0, t0, 7);
++          __ addi_d(t0, t0, 1);
++          __ Branch(&fail, ne, t0, Operand(zero_reg));
++          // Fall through to success
++
++          __ bind(&success);
++          __ Ld_b(t0, MemOperand(a0, in_offset));
++          __ St_b(t0, MemOperand(a0, out_offset));
++          __ Branch(&end);
++          __ bind(&fail);
++          __ St_b(zero_reg, MemOperand(a0, out_offset));
++          __ bind(&end);
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint8_t>(buffer_middle, in_offset,
++                                              out_offset, value, fn));
++      }
++    }
++  }
++}
++
++TEST(Ld_h) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_h(a2, MemOperand(a0, in_offset));
++          __ St_h(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_1));
++
++        auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_h(a0, MemOperand(a0, in_offset));
++          __ St_h(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_2));
++
++        auto fn_3 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_hu(a0, MemOperand(a0, in_offset));
++          __ St_h(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_3));
++
++        auto fn_4 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_hu(a2, MemOperand(a0, in_offset));
++          __ St_h(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_4));
++      }
++    }
++  }
++}
++
++TEST(Ld_h_bitextension) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint16_t value = static_cast<uint64_t>(*i & 0xFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn = [](MacroAssembler* masm, int32_t in_offset,
++                     int32_t out_offset) {
++          Label success, fail, end, different;
++          __ Ld_h(t0, MemOperand(a0, in_offset));
++          __ Ld_hu(t1, MemOperand(a0, in_offset));
++          __ Branch(&different, ne, t0, Operand(t1));
++
++          // If signed and unsigned values are same, check
++          // the upper bits to see if they are zero
++          __ srai_w(t0, t0, 15);
++          __ Branch(&success, eq, t0, Operand(zero_reg));
++          __ Branch(&fail);
++
++          // If signed and unsigned values are different,
++          // check that the upper bits are complementary
++          __ bind(&different);
++          __ srai_w(t1, t1, 15);
++          __ Branch(&fail, ne, t1, Operand(1));
++          __ srai_w(t0, t0, 15);
++          __ addi_d(t0, t0, 1);
++          __ Branch(&fail, ne, t0, Operand(zero_reg));
++          // Fall through to success
++
++          __ bind(&success);
++          __ Ld_h(t0, MemOperand(a0, in_offset));
++          __ St_h(t0, MemOperand(a0, out_offset));
++          __ Branch(&end);
++          __ bind(&fail);
++          __ St_h(zero_reg, MemOperand(a0, out_offset));
++          __ bind(&end);
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint16_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn));
++      }
++    }
++  }
++}
++
++TEST(Ld_w) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint32_t value = static_cast<uint32_t>(*i & 0xFFFFFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_w(a2, MemOperand(a0, in_offset));
++          __ St_w(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_1));
++
++        auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_w(a0, MemOperand(a0, in_offset));
++          __ St_w(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true,
++                 run_Unaligned<uint32_t>(buffer_middle, in_offset, out_offset,
++                                         (uint32_t)value, fn_2));
++
++        auto fn_3 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_wu(a2, MemOperand(a0, in_offset));
++          __ St_w(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_3));
++
++        auto fn_4 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_wu(a0, MemOperand(a0, in_offset));
++          __ St_w(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true,
++                 run_Unaligned<uint32_t>(buffer_middle, in_offset, out_offset,
++                                         (uint32_t)value, fn_4));
++      }
++    }
++  }
++}
++
++TEST(Ld_w_extension) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint32_t value = static_cast<uint32_t>(*i & 0xFFFFFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn = [](MacroAssembler* masm, int32_t in_offset,
++                     int32_t out_offset) {
++          Label success, fail, end, different;
++          __ Ld_w(t0, MemOperand(a0, in_offset));
++          __ Ld_wu(t1, MemOperand(a0, in_offset));
++          __ Branch(&different, ne, t0, Operand(t1));
++
++          // If signed and unsigned values are same, check
++          // the upper bits to see if they are zero
++          __ srai_d(t0, t0, 31);
++          __ Branch(&success, eq, t0, Operand(zero_reg));
++          __ Branch(&fail);
++
++          // If signed and unsigned values are different,
++          // check that the upper bits are complementary
++          __ bind(&different);
++          __ srai_d(t1, t1, 31);
++          __ Branch(&fail, ne, t1, Operand(1));
++          __ srai_d(t0, t0, 31);
++          __ addi_d(t0, t0, 1);
++          __ Branch(&fail, ne, t0, Operand(zero_reg));
++          // Fall through to success
++
++          __ bind(&success);
++          __ Ld_w(t0, MemOperand(a0, in_offset));
++          __ St_w(t0, MemOperand(a0, out_offset));
++          __ Branch(&end);
++          __ bind(&fail);
++          __ St_w(zero_reg, MemOperand(a0, out_offset));
++          __ bind(&end);
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint32_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn));
++      }
++    }
++  }
++}
++
++TEST(Ld_d) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        uint64_t value = *i;
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn_1 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ Ld_d(a2, MemOperand(a0, in_offset));
++          __ St_d(a2, MemOperand(a0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true, run_Unaligned<uint64_t>(buffer_middle, in_offset,
++                                               out_offset, value, fn_1));
++
++        auto fn_2 = [](MacroAssembler* masm, int32_t in_offset,
++                       int32_t out_offset) {
++          __ mov(t0, a0);
++          __ Ld_d(a0, MemOperand(a0, in_offset));
++          __ St_d(a0, MemOperand(t0, out_offset));
++          __ or_(a0, a2, zero_reg);
++        };
++        CHECK_EQ(true,
++                 run_Unaligned<uint64_t>(buffer_middle, in_offset, out_offset,
++                                         (uint32_t)value, fn_2));
++      }
++    }
++  }
++}
++
++TEST(Fld_s) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        float value = static_cast<float>(*i & 0xFFFFFFFF);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn = [](MacroAssembler* masm, int32_t in_offset,
++                     int32_t out_offset) {
++          __ Fld_s(f0, MemOperand(a0, in_offset));
++          __ Fst_s(f0, MemOperand(a0, out_offset));
++        };
++        CHECK_EQ(true, run_Unaligned<float>(buffer_middle, in_offset,
++                                            out_offset, value, fn));
++      }
++    }
++  }
++}
++
++TEST(Fld_d) {
++  CcTest::InitializeVM();
++
++  static const int kBufferSize = 300 * KB;
++  char memory_buffer[kBufferSize];
++  char* buffer_middle = memory_buffer + (kBufferSize / 2);
++
++  FOR_UINT64_INPUTS(i, unsigned_test_values) {
++    FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) {
++      FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) {
++        double value = static_cast<double>(*i);
++        int32_t in_offset = *j1 + *k1;
++        int32_t out_offset = *j2 + *k2;
++
++        auto fn = [](MacroAssembler* masm, int32_t in_offset,
++                     int32_t out_offset) {
++          __ Fld_d(f0, MemOperand(a0, in_offset));
++          __ Fst_d(f0, MemOperand(a0, out_offset));
++        };
++        CHECK_EQ(true, run_Unaligned<double>(buffer_middle, in_offset,
++                                             out_offset, value, fn));
++      }
++    }
++  }
++}
++
++static const std::vector<uint64_t> sltu_test_values() {
++  // clang-format off
++  static const uint64_t kValues[] = {
++      0,
++      1,
++      0x7FE,
++      0x7FF,
++      0x800,
++      0x801,
++      0xFFE,
++      0xFFF,
++      0xFFFFFFFFFFFFF7FE,
++      0xFFFFFFFFFFFFF7FF,
++      0xFFFFFFFFFFFFF800,
++      0xFFFFFFFFFFFFF801,
++      0xFFFFFFFFFFFFFFFE,
++      0xFFFFFFFFFFFFFFFF,
++  };
++  // clang-format on
++  return std::vector<uint64_t>(&kValues[0], &kValues[arraysize(kValues)]);
++}
++
++template <typename Func>
++bool run_Sltu(uint64_t rj, uint64_t rk, Func GenerateSltuInstructionFunc) {
++  using F_CVT = int64_t(uint64_t x0, uint64_t x1, int x2, int x3, int x4);
++
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assm;
++
++  GenerateSltuInstructionFunc(masm, rk);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  assm.GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++
++  auto f = GeneratedCode<F_CVT>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(rj, rk, 0, 0, 0));
++  return res == 1;
++}
++
++TEST(Sltu) {
++  CcTest::InitializeVM();
++
++  FOR_UINT64_INPUTS(i, sltu_test_values) {
++    FOR_UINT64_INPUTS(j, sltu_test_values) {
++      uint64_t rj = *i;
++      uint64_t rk = *j;
++
++      auto fn_1 = [](MacroAssembler* masm, uint64_t imm) {
++        __ Sltu(a2, a0, Operand(imm));
++      };
++      CHECK_EQ(rj < rk, run_Sltu(rj, rk, fn_1));
++
++      auto fn_2 = [](MacroAssembler* masm, uint64_t imm) {
++        __ Sltu(a2, a0, a1);
++      };
++      CHECK_EQ(rj < rk, run_Sltu(rj, rk, fn_2));
++    }
++  }
++}
++
++template <typename T, typename Inputs, typename Results>
++static GeneratedCode<F4> GenerateMacroFloat32MinMax(MacroAssembler* masm) {
++  T a = T::from_code(8);   // f8
++  T b = T::from_code(9);   // f9
++  T c = T::from_code(10);  // f10
++
++  Label ool_min_abc, ool_min_aab, ool_min_aba;
++  Label ool_max_abc, ool_max_aab, ool_max_aba;
++
++  Label done_min_abc, done_min_aab, done_min_aba;
++  Label done_max_abc, done_max_aab, done_max_aba;
++
++#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \
++  __ Fld_s(x, MemOperand(a0, offsetof(Inputs, src1_)));         \
++  __ Fld_s(y, MemOperand(a0, offsetof(Inputs, src2_)));         \
++  __ fminmax(res, x, y, &ool);                                  \
++  __ bind(&done);                                               \
++  __ Fst_s(a, MemOperand(a1, offsetof(Results, res_field)))
++
++  // a = min(b, c);
++  FLOAT_MIN_MAX(Float32Min, a, b, c, done_min_abc, ool_min_abc, min_abc_);
++  // a = min(a, b);
++  FLOAT_MIN_MAX(Float32Min, a, a, b, done_min_aab, ool_min_aab, min_aab_);
++  // a = min(b, a);
++  FLOAT_MIN_MAX(Float32Min, a, b, a, done_min_aba, ool_min_aba, min_aba_);
++
++  // a = max(b, c);
++  FLOAT_MIN_MAX(Float32Max, a, b, c, done_max_abc, ool_max_abc, max_abc_);
++  // a = max(a, b);
++  FLOAT_MIN_MAX(Float32Max, a, a, b, done_max_aab, ool_max_aab, max_aab_);
++  // a = max(b, a);
++  FLOAT_MIN_MAX(Float32Max, a, b, a, done_max_aba, ool_max_aba, max_aba_);
++
++#undef FLOAT_MIN_MAX
++
++  __ jirl(zero_reg, ra, 0);
++
++  // Generate out-of-line cases.
++  __ bind(&ool_min_abc);
++  __ Float32MinOutOfLine(a, b, c);
++  __ Branch(&done_min_abc);
++
++  __ bind(&ool_min_aab);
++  __ Float32MinOutOfLine(a, a, b);
++  __ Branch(&done_min_aab);
++
++  __ bind(&ool_min_aba);
++  __ Float32MinOutOfLine(a, b, a);
++  __ Branch(&done_min_aba);
++
++  __ bind(&ool_max_abc);
++  __ Float32MaxOutOfLine(a, b, c);
++  __ Branch(&done_max_abc);
++
++  __ bind(&ool_max_aab);
++  __ Float32MaxOutOfLine(a, a, b);
++  __ Branch(&done_max_aab);
++
++  __ bind(&ool_max_aba);
++  __ Float32MaxOutOfLine(a, b, a);
++  __ Branch(&done_max_aba);
++
++  CodeDesc desc;
++  masm->GetCode(masm->isolate(), &desc);
++  Handle<Code> code =
++      Factory::CodeBuilder(masm->isolate(), desc, Code::STUB).Build();
++#ifdef DEBUG
++  StdoutStream os;
++  code->Print(os);
++#endif
++  return GeneratedCode<F4>::FromCode(*code);
++}
++
++TEST(macro_float_minmax_f32) {
++  // Test the Float32Min and Float32Max macros.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct Inputs {
++    float src1_;
++    float src2_;
++  };
++
++  struct Results {
++    // Check all register aliasing possibilities in order to exercise all
++    // code-paths in the macro assembler.
++    float min_abc_;
++    float min_aab_;
++    float min_aba_;
++    float max_abc_;
++    float max_aab_;
++    float max_aba_;
++  };
++
++  GeneratedCode<F4> f =
++      GenerateMacroFloat32MinMax<FPURegister, Inputs, Results>(masm);
++
++#define CHECK_MINMAX(src1, src2, min, max)                                   \
++  do {                                                                       \
++    Inputs inputs = {src1, src2};                                            \
++    Results results;                                                         \
++    f.Call(&inputs, &results, 0, 0, 0);                                      \
++    CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_abc_)); \
++    CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_aab_)); \
++    CHECK_EQ(bit_cast<uint32_t>(min), bit_cast<uint32_t>(results.min_aba_)); \
++    CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_abc_)); \
++    CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_aab_)); \
++    CHECK_EQ(bit_cast<uint32_t>(max), bit_cast<uint32_t>(results.max_aba_)); \
++    /* Use a bit_cast to correctly identify -0.0 and NaNs. */                \
++  } while (0)
++
++  float nan_a = std::numeric_limits<float>::quiet_NaN();
++  float nan_b = std::numeric_limits<float>::quiet_NaN();
++
++  CHECK_MINMAX(1.0f, -1.0f, -1.0f, 1.0f);
++  CHECK_MINMAX(-1.0f, 1.0f, -1.0f, 1.0f);
++  CHECK_MINMAX(0.0f, -1.0f, -1.0f, 0.0f);
++  CHECK_MINMAX(-1.0f, 0.0f, -1.0f, 0.0f);
++  CHECK_MINMAX(-0.0f, -1.0f, -1.0f, -0.0f);
++  CHECK_MINMAX(-1.0f, -0.0f, -1.0f, -0.0f);
++  CHECK_MINMAX(0.0f, 1.0f, 0.0f, 1.0f);
++  CHECK_MINMAX(1.0f, 0.0f, 0.0f, 1.0f);
++
++  CHECK_MINMAX(0.0f, 0.0f, 0.0f, 0.0f);
++  CHECK_MINMAX(-0.0f, -0.0f, -0.0f, -0.0f);
++  CHECK_MINMAX(-0.0f, 0.0f, -0.0f, 0.0f);
++  CHECK_MINMAX(0.0f, -0.0f, -0.0f, 0.0f);
++
++  CHECK_MINMAX(0.0f, nan_a, nan_a, nan_a);
++  CHECK_MINMAX(nan_a, 0.0f, nan_a, nan_a);
++  CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a);
++  CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b);
++
++#undef CHECK_MINMAX
++}
++
++template <typename T, typename Inputs, typename Results>
++static GeneratedCode<F4> GenerateMacroFloat64MinMax(MacroAssembler* masm) {
++  T a = T::from_code(8);   // f8
++  T b = T::from_code(9);   // f9
++  T c = T::from_code(10);  // f10
++
++  Label ool_min_abc, ool_min_aab, ool_min_aba;
++  Label ool_max_abc, ool_max_aab, ool_max_aba;
++
++  Label done_min_abc, done_min_aab, done_min_aba;
++  Label done_max_abc, done_max_aab, done_max_aba;
++
++#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \
++  __ Fld_d(x, MemOperand(a0, offsetof(Inputs, src1_)));         \
++  __ Fld_d(y, MemOperand(a0, offsetof(Inputs, src2_)));         \
++  __ fminmax(res, x, y, &ool);                                  \
++  __ bind(&done);                                               \
++  __ Fst_d(a, MemOperand(a1, offsetof(Results, res_field)))
++
++  // a = min(b, c);
++  FLOAT_MIN_MAX(Float64Min, a, b, c, done_min_abc, ool_min_abc, min_abc_);
++  // a = min(a, b);
++  FLOAT_MIN_MAX(Float64Min, a, a, b, done_min_aab, ool_min_aab, min_aab_);
++  // a = min(b, a);
++  FLOAT_MIN_MAX(Float64Min, a, b, a, done_min_aba, ool_min_aba, min_aba_);
++
++  // a = max(b, c);
++  FLOAT_MIN_MAX(Float64Max, a, b, c, done_max_abc, ool_max_abc, max_abc_);
++  // a = max(a, b);
++  FLOAT_MIN_MAX(Float64Max, a, a, b, done_max_aab, ool_max_aab, max_aab_);
++  // a = max(b, a);
++  FLOAT_MIN_MAX(Float64Max, a, b, a, done_max_aba, ool_max_aba, max_aba_);
++
++#undef FLOAT_MIN_MAX
++
++  __ jirl(zero_reg, ra, 0);
++
++  // Generate out-of-line cases.
++  __ bind(&ool_min_abc);
++  __ Float64MinOutOfLine(a, b, c);
++  __ Branch(&done_min_abc);
++
++  __ bind(&ool_min_aab);
++  __ Float64MinOutOfLine(a, a, b);
++  __ Branch(&done_min_aab);
++
++  __ bind(&ool_min_aba);
++  __ Float64MinOutOfLine(a, b, a);
++  __ Branch(&done_min_aba);
++
++  __ bind(&ool_max_abc);
++  __ Float64MaxOutOfLine(a, b, c);
++  __ Branch(&done_max_abc);
++
++  __ bind(&ool_max_aab);
++  __ Float64MaxOutOfLine(a, a, b);
++  __ Branch(&done_max_aab);
++
++  __ bind(&ool_max_aba);
++  __ Float64MaxOutOfLine(a, b, a);
++  __ Branch(&done_max_aba);
++
++  CodeDesc desc;
++  masm->GetCode(masm->isolate(), &desc);
++  Handle<Code> code =
++      Factory::CodeBuilder(masm->isolate(), desc, Code::STUB).Build();
++#ifdef DEBUG
++  StdoutStream os;
++  code->Print(os);
++#endif
++  return GeneratedCode<F4>::FromCode(*code);
++}
++
++TEST(macro_float_minmax_f64) {
++  // Test the Float64Min and Float64Max macros.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct Inputs {
++    double src1_;
++    double src2_;
++  };
++
++  struct Results {
++    // Check all register aliasing possibilities in order to exercise all
++    // code-paths in the macro assembler.
++    double min_abc_;
++    double min_aab_;
++    double min_aba_;
++    double max_abc_;
++    double max_aab_;
++    double max_aba_;
++  };
++
++  GeneratedCode<F4> f =
++      GenerateMacroFloat64MinMax<DoubleRegister, Inputs, Results>(masm);
++
++#define CHECK_MINMAX(src1, src2, min, max)                                   \
++  do {                                                                       \
++    Inputs inputs = {src1, src2};                                            \
++    Results results;                                                         \
++    f.Call(&inputs, &results, 0, 0, 0);                                      \
++    CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_abc_)); \
++    CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_aab_)); \
++    CHECK_EQ(bit_cast<uint64_t>(min), bit_cast<uint64_t>(results.min_aba_)); \
++    CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_abc_)); \
++    CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_aab_)); \
++    CHECK_EQ(bit_cast<uint64_t>(max), bit_cast<uint64_t>(results.max_aba_)); \
++    /* Use a bit_cast to correctly identify -0.0 and NaNs. */                \
++  } while (0)
++
++  double nan_a = std::numeric_limits<double>::quiet_NaN();
++  double nan_b = std::numeric_limits<double>::quiet_NaN();
++
++  CHECK_MINMAX(1.0, -1.0, -1.0, 1.0);
++  CHECK_MINMAX(-1.0, 1.0, -1.0, 1.0);
++  CHECK_MINMAX(0.0, -1.0, -1.0, 0.0);
++  CHECK_MINMAX(-1.0, 0.0, -1.0, 0.0);
++  CHECK_MINMAX(-0.0, -1.0, -1.0, -0.0);
++  CHECK_MINMAX(-1.0, -0.0, -1.0, -0.0);
++  CHECK_MINMAX(0.0, 1.0, 0.0, 1.0);
++  CHECK_MINMAX(1.0, 0.0, 0.0, 1.0);
++
++  CHECK_MINMAX(0.0, 0.0, 0.0, 0.0);
++  CHECK_MINMAX(-0.0, -0.0, -0.0, -0.0);
++  CHECK_MINMAX(-0.0, 0.0, -0.0, 0.0);
++  CHECK_MINMAX(0.0, -0.0, -0.0, 0.0);
++
++  CHECK_MINMAX(0.0, nan_a, nan_a, nan_a);
++  CHECK_MINMAX(nan_a, 0.0, nan_a, nan_a);
++  CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a);
++  CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b);
++
++#undef CHECK_MINMAX
++}
++
++uint64_t run_Sub_w(uint64_t imm, int32_t num_instr) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  Label code_start;
++  __ bind(&code_start);
++  __ Sub_w(a2, zero_reg, Operand(imm));
++  CHECK_EQ(masm->InstructionsGeneratedSince(&code_start), num_instr);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F2>::FromCode(*code);
++
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(SUB_W) {
++  CcTest::InitializeVM();
++
++  // Test Subu macro-instruction for min_int12 and max_int12 border cases.
++  // For subtracting int16 immediate values we use addiu.
++
++  struct TestCaseSub {
++    uint64_t imm;
++    uint64_t expected_res;
++    int32_t num_instr;
++  };
++
++  // We call Sub_w(v0, zero_reg, imm) to test cases listed below.
++  // 0 - imm = expected_res
++  // clang-format off
++  struct TestCaseSub tc[] = {
++      //              imm, expected_res, num_instr
++      {0xFFFFFFFFFFFFF800,        0x800,         2},  // min_int12
++      // The test case above generates ori + add_w instruction sequence.
++      // We can't have just addi_ because -min_int12 > max_int12 so use
++      // register. We can load min_int12 to at register with addi_w and then
++      // subtract at with sub_w, but now we use ori + add_w because -min_int12 can
++      // be loaded using ori.
++      {0x800,        0xFFFFFFFFFFFFF800,         1},  // max_int12 + 1
++      // Generates addi_w
++      // max_int12 + 1 is not int12 but -(max_int12 + 1) is, just use addi_w.
++      {0xFFFFFFFFFFFFF7FF,        0x801,         2},  // min_int12 - 1
++      // Generates ori + add_w
++      // To load this value to at we need two instructions and another one to
++      // subtract, lu12i + ori + sub_w. But we can load -value to at using just
++      // ori and then add at register with add_w.
++      {0x801,        0xFFFFFFFFFFFFF7FF,         2},  // max_int12 + 2
++      // Generates ori + sub_w
++      // Not int12 but is uint12, load value to at with ori and subtract with
++      // sub_w.
++      {0x00010000,   0xFFFFFFFFFFFF0000,         2},
++      // Generates lu12i_w + sub_w
++      // Load value using lui to at and subtract with subu.
++      {0x00010001,   0xFFFFFFFFFFFEFFFF,         3},
++      // Generates lu12i + ori + sub_w
++      // We have to generate three instructions in this case.
++      {0x7FFFFFFF,   0xFFFFFFFF80000001,         3},  // max_int32
++      // Generates lu12i_w + ori + sub_w
++      {0xFFFFFFFF80000000, 0xFFFFFFFF80000000,   2},  // min_int32
++      // The test case above generates lu12i + sub_w intruction sequence.
++      // The result of 0 - min_int32 eqauls max_int32 + 1, which wraps around to
++      // min_int32 again.
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseSub);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    CHECK_EQ(tc[i].expected_res, run_Sub_w(tc[i].imm, tc[i].num_instr));
++  }
++}
++
++uint64_t run_Sub_d(uint64_t imm, int32_t num_instr) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  Label code_start;
++  __ bind(&code_start);
++  __ Sub_d(a2, zero_reg, Operand(imm));
++  CHECK_EQ(masm->InstructionsGeneratedSince(&code_start), num_instr);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++#ifdef OBJECT_PRINT
++  code->Print(std::cout);
++#endif
++  auto f = GeneratedCode<F2>::FromCode(*code);
++
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(0, 0, 0, 0, 0));
++
++  return res;
++}
++
++TEST(SUB_D) {
++  CcTest::InitializeVM();
++
++  // Test Sub_d macro-instruction for min_int12 and max_int12 border cases.
++  // For subtracting int12 immediate values we use addi_d.
++
++  struct TestCaseSub {
++    uint64_t imm;
++    uint64_t expected_res;
++    int32_t num_instr;
++  };
++  // We call Sub(v0, zero_reg, imm) to test cases listed below.
++  // 0 - imm = expected_res
++  // clang-format off
++  struct TestCaseSub tc[] = {
++      //              imm,       expected_res,  num_instr
++      {0xFFFFFFFFFFFFF800,              0x800,         2},  // min_int12
++      // The test case above generates addi_d instruction.
++      // This is int12 value and we can load it using just addi_d.
++      {             0x800, 0xFFFFFFFFFFFFF800,         1},  // max_int12 + 1
++      // Generates addi_d
++      // max_int12 + 1 is not int12 but is uint12, just use ori.
++      {0xFFFFFFFFFFFFF7FF,              0x801,         2},  // min_int12 - 1
++      // Generates ori + add_d
++      {             0x801, 0xFFFFFFFFFFFFF7FF,         2},  // max_int12 + 2
++      // Generates ori + add_d
++      {        0x00001000, 0xFFFFFFFFFFFFF000,         2},  // max_uint12 + 1
++      // Generates lu12i_w + sub_d
++      {        0x00001001, 0xFFFFFFFFFFFFEFFF,         3},  // max_uint12 + 2
++      // Generates lu12i_w + ori + sub_d
++      {0x00000000FFFFFFFF, 0xFFFFFFFF00000001,         3},  // max_uint32
++      // Generates addi_w + li32i_d + sub_d
++      {0x00000000FFFFFFFE, 0xFFFFFFFF00000002,         3},  // max_uint32 - 1
++      // Generates addi_w + li32i_d + sub_d
++      {0xFFFFFFFF80000000,         0x80000000,         2},  // min_int32
++      // Generates lu12i_w + sub_d
++      {0x0000000080000000, 0xFFFFFFFF80000000,         2},  // max_int32 + 1
++      // Generates lu12i_w + add_d
++      {0xFFFF0000FFFF8765, 0x0000FFFF0000789B,         4},
++      // Generates lu12i_w + ori + lu32i_d + sub
++      {0x1234ABCD87654321, 0xEDCB5432789ABCDF,         5},
++      // Generates lu12i_w + ori + lu32i_d + lu52i_d + sub
++      {0xFFFF789100000000,     0x876F00000000,         3},
++      // Generates xor + lu32i_d + sub
++      {0xF12F789100000000,  0xED0876F00000000,         4},
++      // Generates xor + lu32i_d + lu52i_d + sub
++      {0xF120000000000800,  0xEDFFFFFFFFFF800,         3},
++      // Generates ori + lu52i_d + sub
++      {0xFFF0000000000000,   0x10000000000000,         2}
++      // Generates lu52i_d + sub
++  };
++  // clang-format on
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseSub);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    CHECK_EQ(tc[i].expected_res, run_Sub_d(tc[i].imm, tc[i].num_instr));
++  }
++}
++
++TEST(Move) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct T {
++    float a;
++    float b;
++    float result_a;
++    float result_b;
++    double c;
++    double d;
++    double e;
++    double result_c;
++    double result_d;
++    double result_e;
++  };
++  T t;
++  __ li(a4, static_cast<int32_t>(0x80000000));
++  __ St_w(a4, MemOperand(a0, offsetof(T, a)));
++  __ li(a5, static_cast<int32_t>(0x12345678));
++  __ St_w(a5, MemOperand(a0, offsetof(T, b)));
++  __ li(a6, static_cast<int64_t>(0x8877665544332211));
++  __ St_d(a6, MemOperand(a0, offsetof(T, c)));
++  __ li(a7, static_cast<int64_t>(0x1122334455667788));
++  __ St_d(a7, MemOperand(a0, offsetof(T, d)));
++  __ li(t0, static_cast<int64_t>(0));
++  __ St_d(t0, MemOperand(a0, offsetof(T, e)));
++
++  __ Move(f8, static_cast<uint32_t>(0x80000000));
++  __ Move(f9, static_cast<uint32_t>(0x12345678));
++  __ Move(f10, static_cast<uint64_t>(0x8877665544332211));
++  __ Move(f11, static_cast<uint64_t>(0x1122334455667788));
++  __ Move(f12, static_cast<uint64_t>(0));
++  __ Fst_s(f8, MemOperand(a0, offsetof(T, result_a)));
++  __ Fst_s(f9, MemOperand(a0, offsetof(T, result_b)));
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_c)));
++  __ Fst_d(f11, MemOperand(a0, offsetof(T, result_d)));
++  __ Fst_d(f12, MemOperand(a0, offsetof(T, result_e)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  f.Call(&t, 0, 0, 0, 0);
++  CHECK_EQ(t.a, t.result_a);
++  CHECK_EQ(t.b, t.result_b);
++  CHECK_EQ(t.c, t.result_c);
++  CHECK_EQ(t.d, t.result_d);
++  CHECK_EQ(t.e, t.result_e);
++}
++
++TEST(Movz_Movn) {
++  const int kTableLength = 4;
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct Test {
++    int64_t rt;
++    int64_t a;
++    int64_t b;
++    int64_t bold;
++    int64_t b1;
++    int64_t bold1;
++    int32_t c;
++    int32_t d;
++    int32_t dold;
++    int32_t d1;
++    int32_t dold1;
++  };
++
++  Test test;
++  // clang-format off
++    int64_t inputs_D[kTableLength] = {
++      7, 8, -9, -10
++    };
++    int32_t inputs_W[kTableLength] = {
++      3, 4, -5, -6
++    };
++
++    int32_t outputs_W[kTableLength] = {
++      3, 4, -5, -6
++    };
++    int64_t outputs_D[kTableLength] = {
++      7, 8, -9, -10
++    };
++  // clang-format on
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(Test, a)));
++  __ Ld_w(a5, MemOperand(a0, offsetof(Test, c)));
++  __ Ld_d(a6, MemOperand(a0, offsetof(Test, rt)));
++  __ li(t0, 1);
++  __ li(t1, 1);
++  __ li(t2, 1);
++  __ li(t3, 1);
++  __ St_d(t0, MemOperand(a0, offsetof(Test, bold)));
++  __ St_d(t1, MemOperand(a0, offsetof(Test, bold1)));
++  __ St_w(t2, MemOperand(a0, offsetof(Test, dold)));
++  __ St_w(t3, MemOperand(a0, offsetof(Test, dold1)));
++  __ Movz(t0, a4, a6);
++  __ Movn(t1, a4, a6);
++  __ Movz(t2, a5, a6);
++  __ Movn(t3, a5, a6);
++  __ St_d(t0, MemOperand(a0, offsetof(Test, b)));
++  __ St_d(t1, MemOperand(a0, offsetof(Test, b1)));
++  __ St_w(t2, MemOperand(a0, offsetof(Test, d)));
++  __ St_w(t3, MemOperand(a0, offsetof(Test, d1)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    test.a = inputs_D[i];
++    test.c = inputs_W[i];
++
++    test.rt = 1;
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.b, test.bold);
++    CHECK_EQ(test.d, test.dold);
++    CHECK_EQ(test.b1, outputs_D[i]);
++    CHECK_EQ(test.d1, outputs_W[i]);
++
++    test.rt = 0;
++    f.Call(&test, 0, 0, 0, 0);
++    CHECK_EQ(test.b, outputs_D[i]);
++    CHECK_EQ(test.d, outputs_W[i]);
++    CHECK_EQ(test.b1, test.bold1);
++    CHECK_EQ(test.d1, test.dold1);
++  }
++}
++
++TEST(macro_instructions1) {
++  // Test 32bit calculate instructions macros.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  Label exit, error;
++
++  __ li(a4, 0x00000004);
++  __ li(a5, 0x00001234);
++  __ li(a6, 0x12345678);
++  __ li(a7, 0x7FFFFFFF);
++  __ li(t0, static_cast<int32_t>(0xFFFFFFFC));
++  __ li(t1, static_cast<int32_t>(0xFFFFEDCC));
++  __ li(t2, static_cast<int32_t>(0xEDCBA988));
++  __ li(t3, static_cast<int32_t>(0x80000000));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ add_w(a2, a7, t1);
++  __ Add_w(a3, t1, a7);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ Add_w(t4, t1, static_cast<int32_t>(0x7FFFFFFF));
++  __ Branch(&error, ne, a2, Operand(t4));
++  __ addi_w(a2, a6, 0x800);
++  __ Add_w(a3, a6, 0xFFFFF800);
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ mul_w(a2, t1, a7);
++  __ Mul_w(a3, t1, a7);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ Mul_w(t4, t1, static_cast<int32_t>(0x7FFFFFFF));
++  __ Branch(&error, ne, a2, Operand(t4));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ mulh_w(a2, t1, a7);
++  __ Mulh_w(a3, t1, a7);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ Mulh_w(t4, t1, static_cast<int32_t>(0x7FFFFFFF));
++  __ Branch(&error, ne, a2, Operand(t4));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mulh_wu(a2, a4, static_cast<int32_t>(0xFFFFEDCC));
++  __ Branch(&error, ne, a2, Operand(0x3));
++  __ Mulh_wu(a3, a4, t1);
++  __ Branch(&error, ne, a3, Operand(0x3));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ div_w(a2, a7, t2);
++  __ Div_w(a3, a7, t2);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ Div_w(t4, a7, static_cast<int32_t>(0xEDCBA988));
++  __ Branch(&error, ne, a2, Operand(t4));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Div_wu(a2, a7, a5);
++  __ Branch(&error, ne, a2, Operand(0x70821));
++  __ Div_wu(a3, t0, static_cast<int32_t>(0x00001234));
++  __ Branch(&error, ne, a3, Operand(0xE1042));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mod_w(a2, a6, a5);
++  __ Branch(&error, ne, a2, Operand(0xDA8));
++  __ Mod_w(a3, t2, static_cast<int32_t>(0x00001234));
++  __ Branch(&error, ne, a3, Operand(0xFFFFFFFFFFFFF258));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mod_wu(a2, a6, a5);
++  __ Branch(&error, ne, a2, Operand(0xDA8));
++  __ Mod_wu(a3, t2, static_cast<int32_t>(0x00001234));
++  __ Branch(&error, ne, a3, Operand(0xF0));
++
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(macro_instructions2) {
++  // Test 64bit calculate instructions macros.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  Label exit, error;
++
++  __ li(a4, 0x17312);
++  __ li(a5, 0x1012131415161718);
++  __ li(a6, 0x51F4B764A26E7412);
++  __ li(a7, 0x7FFFFFFFFFFFFFFF);
++  __ li(t0, static_cast<int64_t>(0xFFFFFFFFFFFFF547));
++  __ li(t1, static_cast<int64_t>(0xDF6B8F35A10E205C));
++  __ li(t2, static_cast<int64_t>(0x81F25A87C4236841));
++  __ li(t3, static_cast<int64_t>(0x8000000000000000));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ add_d(a2, a7, t1);
++  __ Add_d(a3, t1, a7);
++  __ Branch(&error, ne, a2, Operand(a3));
++  __ Add_d(t4, t1, Operand(0x7FFFFFFFFFFFFFFF));
++  __ Branch(&error, ne, a2, Operand(t4));
++  __ addi_d(a2, a6, 0x800);
++  __ Add_d(a3, a6, Operand(0xFFFFFFFFFFFFF800));
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mul_d(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0xdbe6a8729a547fb0));
++  __ Mul_d(a3, t0, Operand(0xDF6B8F35A10E205C));
++  __ Branch(&error, ne, a3, Operand(0x57ad69f40f870584));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mulh_d(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467));
++  __ Mulh_d(a3, t0, Operand(0xDF6B8F35A10E205C));
++  __ Branch(&error, ne, a3, Operand(0x15d));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Div_d(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ Div_d(a3, t1, Operand(0x17312));
++  __ Branch(&error, ne, a3, Operand(0xffffe985f631e6d9));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Div_du(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ Div_du(a3, t1, 0x17312);
++  __ Branch(&error, ne, a3, Operand(0x9a22ffd3973d));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mod_d(a2, a6, a4);
++  __ Branch(&error, ne, a2, Operand(0x13558));
++  __ Mod_d(a3, t2, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(0xfffffffffffffb0a));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Mod_du(a2, a6, a4);
++  __ Branch(&error, ne, a2, Operand(0x13558));
++  __ Mod_du(a3, t2, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(0x81f25a87c4236841));
++
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(macro_instructions3) {
++  // Test 64bit calculate instructions macros.
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  Label exit, error;
++
++  __ li(a4, 0x17312);
++  __ li(a5, 0x1012131415161718);
++  __ li(a6, 0x51F4B764A26E7412);
++  __ li(a7, 0x7FFFFFFFFFFFFFFF);
++  __ li(t0, static_cast<int64_t>(0xFFFFFFFFFFFFF547));
++  __ li(t1, static_cast<int64_t>(0xDF6B8F35A10E205C));
++  __ li(t2, static_cast<int64_t>(0x81F25A87C4236841));
++  __ li(t3, static_cast<int64_t>(0x8000000000000000));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ And(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0x1310));
++  __ And(a3, a6, Operand(0x7FFFFFFFFFFFFFFF));
++  __ Branch(&error, ne, a3, Operand(0x51F4B764A26E7412));
++  __ andi(a2, a6, 0xDCB);
++  __ And(a3, a6, Operand(0xDCB));
++  __ Branch(&error, ne, a3, Operand(a2));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Or(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xfffffffffffff55f));
++  __ Or(a3, t2, Operand(0x8000000000000000));
++  __ Branch(&error, ne, a3, Operand(0x81f25a87c4236841));
++  __ ori(a2, a5, 0xDCB);
++  __ Or(a3, a5, Operand(0xDCB));
++  __ Branch(&error, ne, a2, Operand(a3));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Orn(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7));
++  __ Orn(a3, t2, Operand(0x81F25A87C4236841));
++  __ Branch(&error, ne, a3, Operand(0xffffffffffffffff));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Xor(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0x209470ca5ef1d51b));
++  __ Xor(a3, t2, Operand(0x8000000000000000));
++  __ Branch(&error, ne, a3, Operand(0x1f25a87c4236841));
++  __ Xor(a2, t2, Operand(0xDCB));
++  __ Branch(&error, ne, a2, Operand(0x81f25a87c423658a));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Nor(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0xefedecebeae888e5));
++  __ Nor(a3, a6, Operand(0x7FFFFFFFFFFFFFFF));
++  __ Branch(&error, ne, a3, Operand(0x8000000000000000));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Andn(a2, a4, a5);
++  __ Branch(&error, ne, a2, Operand(0x16002));
++  __ Andn(a3, a6, Operand(0x7FFFFFFFFFFFFFFF));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Orn(a2, t0, t1);
++  __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7));
++  __ Orn(a3, t2, Operand(0x8000000000000000));
++  __ Branch(&error, ne, a3, Operand(0xffffffffffffffff));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Neg(a2, a7);
++  __ Branch(&error, ne, a2, Operand(0x8000000000000001));
++  __ Neg(a3, t0);
++  __ Branch(&error, ne, a3, Operand(0xAB9));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Slt(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ Slt(a3, a7, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0)));
++  __ Slt(a3, a4, 0x800);
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sle(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ Sle(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0x1)));
++  __ Sle(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sleu(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(0x1));
++  __ Sleu(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0x1)));
++  __ Sleu(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0x1)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sge(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ Sge(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0x1)));
++  __ Sge(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0x1)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sgeu(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ Sgeu(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0x1)));
++  __ Sgeu(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sgt(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ Sgt(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0)));
++  __ Sgt(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0x1)));
++
++  __ or_(a2, zero_reg, zero_reg);
++  __ or_(a3, zero_reg, zero_reg);
++  __ Sgtu(a2, a5, a6);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++  __ Sgtu(a3, t0, Operand(0xFFFFFFFFFFFFF547));
++  __ Branch(&error, ne, a3, Operand(static_cast<int64_t>(0)));
++  __ Sgtu(a2, a7, t0);
++  __ Branch(&error, ne, a2, Operand(static_cast<int64_t>(0)));
++
++  __ li(a2, 0x31415926);
++  __ b(&exit);
++
++  __ bind(&error);
++  __ li(a2, 0x666);
++
++  __ bind(&exit);
++  __ or_(a0, a2, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F2>::FromCode(*code);
++  int64_t res = reinterpret_cast<int64_t>(f.Call(0, 0, 0, 0, 0));
++
++  CHECK_EQ(0x31415926L, res);
++}
++
++TEST(Rotr_w) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct T {
++    int32_t input;
++    int32_t result_rotr_0;
++    int32_t result_rotr_4;
++    int32_t result_rotr_8;
++    int32_t result_rotr_12;
++    int32_t result_rotr_16;
++    int32_t result_rotr_20;
++    int32_t result_rotr_24;
++    int32_t result_rotr_28;
++    int32_t result_rotr_32;
++    int32_t result_rotri_0;
++    int32_t result_rotri_4;
++    int32_t result_rotri_8;
++    int32_t result_rotri_12;
++    int32_t result_rotri_16;
++    int32_t result_rotri_20;
++    int32_t result_rotri_24;
++    int32_t result_rotri_28;
++    int32_t result_rotri_32;
++  };
++  T t;
++
++  __ Ld_w(a4, MemOperand(a0, offsetof(T, input)));
++
++  __ Rotr_w(a5, a4, 0);
++  __ Rotr_w(a6, a4, 0x04);
++  __ Rotr_w(a7, a4, 0x08);
++  __ Rotr_w(t0, a4, 0x0C);
++  __ Rotr_w(t1, a4, 0x10);
++  __ Rotr_w(t2, a4, -0x0C);
++  __ Rotr_w(t3, a4, -0x08);
++  __ Rotr_w(t4, a4, -0x04);
++  __ Rotr_w(t5, a4, 0x20);
++  __ St_w(a5, MemOperand(a0, offsetof(T, result_rotr_0)));
++  __ St_w(a6, MemOperand(a0, offsetof(T, result_rotr_4)));
++  __ St_w(a7, MemOperand(a0, offsetof(T, result_rotr_8)));
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_rotr_12)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_rotr_16)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_rotr_20)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_rotr_24)));
++  __ St_w(t4, MemOperand(a0, offsetof(T, result_rotr_28)));
++  __ St_w(t5, MemOperand(a0, offsetof(T, result_rotr_32)));
++
++  __ li(t5, 0);
++  __ Rotr_w(a5, a4, t5);
++  __ li(t5, 0x04);
++  __ Rotr_w(a6, a4, t5);
++  __ li(t5, 0x08);
++  __ Rotr_w(a7, a4, t5);
++  __ li(t5, 0x0C);
++  __ Rotr_w(t0, a4, t5);
++  __ li(t5, 0x10);
++  __ Rotr_w(t1, a4, t5);
++  __ li(t5, -0x0C);
++  __ Rotr_w(t2, a4, t5);
++  __ li(t5, -0x08);
++  __ Rotr_w(t3, a4, t5);
++  __ li(t5, -0x04);
++  __ Rotr_w(t4, a4, t5);
++  __ li(t5, 0x20);
++  __ Rotr_w(t5, a4, t5);
++
++  __ St_w(a5, MemOperand(a0, offsetof(T, result_rotri_0)));
++  __ St_w(a6, MemOperand(a0, offsetof(T, result_rotri_4)));
++  __ St_w(a7, MemOperand(a0, offsetof(T, result_rotri_8)));
++  __ St_w(t0, MemOperand(a0, offsetof(T, result_rotri_12)));
++  __ St_w(t1, MemOperand(a0, offsetof(T, result_rotri_16)));
++  __ St_w(t2, MemOperand(a0, offsetof(T, result_rotri_20)));
++  __ St_w(t3, MemOperand(a0, offsetof(T, result_rotri_24)));
++  __ St_w(t4, MemOperand(a0, offsetof(T, result_rotri_28)));
++  __ St_w(t5, MemOperand(a0, offsetof(T, result_rotri_32)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.input = 0x12345678;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotr_0);
++  CHECK_EQ(static_cast<int32_t>(0x81234567), t.result_rotr_4);
++  CHECK_EQ(static_cast<int32_t>(0x78123456), t.result_rotr_8);
++  CHECK_EQ(static_cast<int32_t>(0x67812345), t.result_rotr_12);
++  CHECK_EQ(static_cast<int32_t>(0x56781234), t.result_rotr_16);
++  CHECK_EQ(static_cast<int32_t>(0x45678123), t.result_rotr_20);
++  CHECK_EQ(static_cast<int32_t>(0x34567812), t.result_rotr_24);
++  CHECK_EQ(static_cast<int32_t>(0x23456781), t.result_rotr_28);
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotr_32);
++
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotri_0);
++  CHECK_EQ(static_cast<int32_t>(0x81234567), t.result_rotri_4);
++  CHECK_EQ(static_cast<int32_t>(0x78123456), t.result_rotri_8);
++  CHECK_EQ(static_cast<int32_t>(0x67812345), t.result_rotri_12);
++  CHECK_EQ(static_cast<int32_t>(0x56781234), t.result_rotri_16);
++  CHECK_EQ(static_cast<int32_t>(0x45678123), t.result_rotri_20);
++  CHECK_EQ(static_cast<int32_t>(0x34567812), t.result_rotri_24);
++  CHECK_EQ(static_cast<int32_t>(0x23456781), t.result_rotri_28);
++  CHECK_EQ(static_cast<int32_t>(0x12345678), t.result_rotri_32);
++}
++
++TEST(Rotr_d) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct T {
++    int64_t input;
++    int64_t result_rotr_0;
++    int64_t result_rotr_8;
++    int64_t result_rotr_16;
++    int64_t result_rotr_24;
++    int64_t result_rotr_32;
++    int64_t result_rotr_40;
++    int64_t result_rotr_48;
++    int64_t result_rotr_56;
++    int64_t result_rotr_64;
++    int64_t result_rotri_0;
++    int64_t result_rotri_8;
++    int64_t result_rotri_16;
++    int64_t result_rotri_24;
++    int64_t result_rotri_32;
++    int64_t result_rotri_40;
++    int64_t result_rotri_48;
++    int64_t result_rotri_56;
++    int64_t result_rotri_64;
++  };
++  T t;
++
++  __ Ld_d(a4, MemOperand(a0, offsetof(T, input)));
++
++  __ Rotr_d(a5, a4, 0);
++  __ Rotr_d(a6, a4, 0x08);
++  __ Rotr_d(a7, a4, 0x10);
++  __ Rotr_d(t0, a4, 0x18);
++  __ Rotr_d(t1, a4, 0x20);
++  __ Rotr_d(t2, a4, -0x18);
++  __ Rotr_d(t3, a4, -0x10);
++  __ Rotr_d(t4, a4, -0x08);
++  __ Rotr_d(t5, a4, 0x40);
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_rotr_0)));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_rotr_8)));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_rotr_16)));
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_rotr_24)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_rotr_32)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_rotr_40)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_rotr_48)));
++  __ St_d(t4, MemOperand(a0, offsetof(T, result_rotr_56)));
++  __ St_d(t5, MemOperand(a0, offsetof(T, result_rotr_64)));
++
++  __ li(t5, 0);
++  __ Rotr_d(a5, a4, t5);
++  __ li(t5, 0x08);
++  __ Rotr_d(a6, a4, t5);
++  __ li(t5, 0x10);
++  __ Rotr_d(a7, a4, t5);
++  __ li(t5, 0x18);
++  __ Rotr_d(t0, a4, t5);
++  __ li(t5, 0x20);
++  __ Rotr_d(t1, a4, t5);
++  __ li(t5, -0x18);
++  __ Rotr_d(t2, a4, t5);
++  __ li(t5, -0x10);
++  __ Rotr_d(t3, a4, t5);
++  __ li(t5, -0x08);
++  __ Rotr_d(t4, a4, t5);
++  __ li(t5, 0x40);
++  __ Rotr_d(t5, a4, t5);
++
++  __ St_d(a5, MemOperand(a0, offsetof(T, result_rotri_0)));
++  __ St_d(a6, MemOperand(a0, offsetof(T, result_rotri_8)));
++  __ St_d(a7, MemOperand(a0, offsetof(T, result_rotri_16)));
++  __ St_d(t0, MemOperand(a0, offsetof(T, result_rotri_24)));
++  __ St_d(t1, MemOperand(a0, offsetof(T, result_rotri_32)));
++  __ St_d(t2, MemOperand(a0, offsetof(T, result_rotri_40)));
++  __ St_d(t3, MemOperand(a0, offsetof(T, result_rotri_48)));
++  __ St_d(t4, MemOperand(a0, offsetof(T, result_rotri_56)));
++  __ St_d(t5, MemOperand(a0, offsetof(T, result_rotri_64)));
++
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  t.input = 0x0123456789ABCDEF;
++  f.Call(&t, 0, 0, 0, 0);
++
++  CHECK_EQ(static_cast<int64_t>(0x0123456789ABCDEF), t.result_rotr_0);
++  CHECK_EQ(static_cast<int64_t>(0xEF0123456789ABCD), t.result_rotr_8);
++  CHECK_EQ(static_cast<int64_t>(0xCDEF0123456789AB), t.result_rotr_16);
++  CHECK_EQ(static_cast<int64_t>(0xABCDEF0123456789), t.result_rotr_24);
++  CHECK_EQ(static_cast<int64_t>(0x89ABCDEF01234567), t.result_rotr_32);
++  CHECK_EQ(static_cast<int64_t>(0x6789ABCDEF012345), t.result_rotr_40);
++  CHECK_EQ(static_cast<int64_t>(0x456789ABCDEF0123), t.result_rotr_48);
++  CHECK_EQ(static_cast<int64_t>(0x23456789ABCDEF01), t.result_rotr_56);
++  CHECK_EQ(static_cast<int64_t>(0x0123456789ABCDEF), t.result_rotr_64);
++
++  CHECK_EQ(static_cast<int64_t>(0x0123456789ABCDEF), t.result_rotri_0);
++  CHECK_EQ(static_cast<int64_t>(0xEF0123456789ABCD), t.result_rotri_8);
++  CHECK_EQ(static_cast<int64_t>(0xCDEF0123456789AB), t.result_rotri_16);
++  CHECK_EQ(static_cast<int64_t>(0xABCDEF0123456789), t.result_rotri_24);
++  CHECK_EQ(static_cast<int64_t>(0x89ABCDEF01234567), t.result_rotri_32);
++  CHECK_EQ(static_cast<int64_t>(0x6789ABCDEF012345), t.result_rotri_40);
++  CHECK_EQ(static_cast<int64_t>(0x456789ABCDEF0123), t.result_rotri_48);
++  CHECK_EQ(static_cast<int64_t>(0x23456789ABCDEF01), t.result_rotri_56);
++  CHECK_EQ(static_cast<int64_t>(0x0123456789ABCDEF), t.result_rotri_64);
++}
++
++TEST(macro_instructions4) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct T {
++    double a;
++    float b;
++    double result_floor_a;
++    float result_floor_b;
++    double result_ceil_a;
++    float result_ceil_b;
++    double result_trunc_a;
++    float result_trunc_b;
++    double result_round_a;
++    float result_round_b;
++  };
++  T t;
++
++  const int kTableLength = 16;
++
++  // clang-format off
++  double inputs_d[kTableLength] = {
++      2.1, 2.6, 2.5, 3.1, 3.6, 3.5,
++      -2.1, -2.6, -2.5, -3.1, -3.6, -3.5,
++      1.7976931348623157E+308, 6.27463370218383111104242366943E-307,
++      std::numeric_limits<double>::max() - 0.1,
++      std::numeric_limits<double>::infinity()
++  };
++  float inputs_s[kTableLength] = {
++      2.1, 2.6, 2.5, 3.1, 3.6, 3.5,
++      -2.1, -2.6, -2.5, -3.1, -3.6, -3.5,
++      1.7976931348623157E+38, 6.27463370218383111104242366943E-37,
++      std::numeric_limits<float>::lowest() + 0.6,
++      std::numeric_limits<float>::infinity()
++      };
++  float outputs_round_s[kTableLength] = {
++      2.0, 3.0, 2.0, 3.0, 4.0, 4.0,
++      -2.0, -3.0, -2.0, -3.0, -4.0, -4.0,
++      1.7976931348623157E+38, 0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++      };
++  double outputs_round_d[kTableLength] = {
++      2.0, 3.0, 2.0, 3.0, 4.0, 4.0,
++      -2.0, -3.0, -2.0, -3.0, -4.0, -4.0,
++      1.7976931348623157E+308, 0,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_trunc_s[kTableLength] = {
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      1.7976931348623157E+38, 0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_trunc_d[kTableLength] = {
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      1.7976931348623157E+308, 0,
++      std::numeric_limits<double>::max() - 1,
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_ceil_s[kTableLength] = {
++      3.0, 3.0, 3.0, 4.0, 4.0, 4.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      1.7976931348623157E38, 1,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_ceil_d[kTableLength] = {
++      3.0, 3.0, 3.0, 4.0, 4.0, 4.0,
++      -2.0, -2.0, -2.0, -3.0, -3.0, -3.0,
++      1.7976931348623157E308, 1,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  float outputs_floor_s[kTableLength] = {
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -3.0, -3.0, -3.0, -4.0, -4.0, -4.0,
++      1.7976931348623157E38, 0,
++      std::numeric_limits<float>::lowest() + 1,
++      std::numeric_limits<float>::infinity()
++  };
++  double outputs_floor_d[kTableLength] = {
++      2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
++      -3.0, -3.0, -3.0, -4.0, -4.0, -4.0,
++      1.7976931348623157E308, 0,
++      std::numeric_limits<double>::max(),
++      std::numeric_limits<double>::infinity()
++  };
++  // clang-format on
++
++  __ Fld_d(f8, MemOperand(a0, offsetof(T, a)));
++  __ Fld_s(f9, MemOperand(a0, offsetof(T, b)));
++  __ Floor_d(f10, f8);
++  __ Floor_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_floor_a)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(T, result_floor_b)));
++  __ Ceil_d(f10, f8);
++  __ Ceil_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_ceil_a)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(T, result_ceil_b)));
++  __ Trunc_d(f10, f8);
++  __ Trunc_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_trunc_a)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(T, result_trunc_b)));
++  __ Round_d(f10, f8);
++  __ Round_s(f11, f9);
++  __ Fst_d(f10, MemOperand(a0, offsetof(T, result_round_a)));
++  __ Fst_s(f11, MemOperand(a0, offsetof(T, result_round_b)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++  for (int i = 0; i < kTableLength; i++) {
++    t.a = inputs_d[i];
++    t.b = inputs_s[i];
++    f.Call(&t, 0, 0, 0, 0);
++    CHECK_EQ(t.result_floor_a, outputs_floor_d[i]);
++    CHECK_EQ(t.result_floor_b, outputs_floor_s[i]);
++    CHECK_EQ(t.result_ceil_a, outputs_ceil_d[i]);
++    CHECK_EQ(t.result_ceil_b, outputs_ceil_s[i]);
++    CHECK_EQ(t.result_trunc_a, outputs_trunc_d[i]);
++    CHECK_EQ(t.result_trunc_b, outputs_trunc_s[i]);
++    CHECK_EQ(t.result_round_a, outputs_round_d[i]);
++    CHECK_EQ(t.result_round_b, outputs_round_s[i]);
++  }
++}
++
++uint64_t run_ExtractBits(uint64_t source, int pos, int size, bool sign_extend) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  if (sign_extend) {
++    __ ExtractBits(t0, a0, a1, size, true);
++  } else {
++    __ ExtractBits(t0, a0, a1, size);
++  }
++  __ or_(a0, t0, zero_reg);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<FV>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(source, pos, 0, 0, 0));
++  return res;
++}
++
++TEST(ExtractBits) {
++  CcTest::InitializeVM();
++
++  struct TestCase {
++    uint64_t source;
++    int pos;
++    int size;
++    bool sign_extend;
++    uint64_t res;
++  };
++
++  // clang-format off
++  struct TestCase tc[] = {
++   //source,    pos, size, sign_extend,              res;
++    {0x800,       4,    8,   false,                 0x80},
++    {0x800,       4,    8,    true,   0xFFFFFFFFFFFFFF80},
++    {0x800,       5,    8,    true,                 0x40},
++    {0x40000,     3,   16,   false,               0x8000},
++    {0x40000,     3,   16,    true,   0xFFFFFFFFFFFF8000},
++    {0x40000,     4,   16,    true,               0x4000},
++    {0x200000000, 2,   32,   false,           0x80000000},
++    {0x200000000, 2,   32,    true,   0xFFFFFFFF80000000},
++    {0x200000000, 3,   32,    true,           0x40000000},
++  };
++  // clang-format on
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCase);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t result =
++        run_ExtractBits(tc[i].source, tc[i].pos, tc[i].size, tc[i].sign_extend);
++    CHECK_EQ(tc[i].res, result);
++  }
++}
++
++uint64_t run_InsertBits(uint64_t dest, uint64_t source, int pos, int size) {
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  __ InsertBits(a0, a1, a2, size);
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<FV>::FromCode(*code);
++  uint64_t res = reinterpret_cast<uint64_t>(f.Call(dest, source, pos, 0, 0));
++  return res;
++}
++
++TEST(InsertBits) {
++  CcTest::InitializeVM();
++
++  struct TestCase {
++    uint64_t dest;
++    uint64_t source;
++    int pos;
++    int size;
++    uint64_t res;
++  };
++
++  // clang-format off
++  struct TestCase tc[] = {
++   //dest                   source,  pos, size,                 res;
++    {0x11111111,            0x1234,   32,   16,      0x123411111111},
++    {0x111111111111,       0xFFFFF,   24,   10,      0x1113FF111111},
++    {0x1111111111111111,  0xFEDCBA,   16,    4,  0x11111111111A1111},
++  };
++  // clang-format on
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCase);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    uint64_t result =
++        run_InsertBits(tc[i].dest, tc[i].source, tc[i].pos, tc[i].size);
++    CHECK_EQ(tc[i].res, result);
++  }
++}
++
++TEST(Popcnt) {
++  CcTest::InitializeVM();
++  Isolate* isolate = CcTest::i_isolate();
++  HandleScope scope(isolate);
++  MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes);
++  MacroAssembler* masm = &assembler;
++
++  struct TestCase {
++    uint32_t a;
++    uint64_t b;
++    int expected_a;
++    int expected_b;
++    int result_a;
++    int result_b;
++  };
++  // clang-format off
++  struct TestCase tc[] = {
++    {  0x12345678,  0x1122334455667788,  13,  26, 0, 0},
++    {      0x1234,            0x123456,   5,   9, 0, 0},
++    {  0xFFF00000,  0xFFFF000000000000,  12,  16, 0, 0},
++    {  0xFF000012,  0xFFFF000000001234,  10,  21, 0, 0}
++  };
++  // clang-format on
++
++  __ Ld_w(t0, MemOperand(a0, offsetof(TestCase, a)));
++  __ Ld_d(t1, MemOperand(a0, offsetof(TestCase, b)));
++  __ Popcnt_w(t2, t0);
++  __ Popcnt_d(t3, t1);
++  __ St_w(t2, MemOperand(a0, offsetof(TestCase, result_a)));
++  __ St_w(t3, MemOperand(a0, offsetof(TestCase, result_b)));
++  __ jirl(zero_reg, ra, 0);
++
++  CodeDesc desc;
++  masm->GetCode(isolate, &desc);
++  Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build();
++  auto f = GeneratedCode<F3>::FromCode(*code);
++
++  size_t nr_test_cases = sizeof(tc) / sizeof(TestCase);
++  for (size_t i = 0; i < nr_test_cases; ++i) {
++    f.Call(&tc[i], 0, 0, 0, 0);
++    CHECK_EQ(tc[i].expected_a, tc[i].result_a);
++    CHECK_EQ(tc[i].expected_b, tc[i].result_b);
++  }
++}
++
++#undef __
++
++}  // namespace internal
++}  // namespace v8
+diff --git a/src/3rdparty/chromium/v8/tools/dev/gm.py b/src/3rdparty/chromium/v8/tools/dev/gm.py
+index 9d5cbf056a2..0363af7148e 100755
+--- a/src/3rdparty/chromium/v8/tools/dev/gm.py
++++ b/src/3rdparty/chromium/v8/tools/dev/gm.py
+@@ -39,7 +39,7 @@ BUILD_TARGETS_ALL = ["all"]
+ 
+ # All arches that this script understands.
+ ARCHES = ["ia32", "x64", "arm", "arm64", "mipsel", "mips64el", "ppc", "ppc64",
+-          "s390", "s390x", "android_arm", "android_arm64"]
++          "s390", "s390x", "android_arm", "android_arm64", "la64"]
+ # Arches that get built/run when you don't specify any.
+ DEFAULT_ARCHES = ["ia32", "x64", "arm", "arm64"]
+ # Modes that this script understands.
+@@ -246,7 +246,7 @@ class Config(object):
+     if self.arch == "android_arm": return "\nv8_target_cpu = \"arm\""
+     if self.arch == "android_arm64": return "\nv8_target_cpu = \"arm64\""
+     if self.arch in ("arm", "arm64", "mipsel", "mips64el", "ppc", "ppc64",
+-                     "s390", "s390x"):
++                     "s390", "s390x", "la64"):
+       return "\nv8_target_cpu = \"%s\"" % self.arch
+     return ""
+ 
+diff --git a/src/3rdparty/gn/tools/gn/args.cc b/src/3rdparty/gn/tools/gn/args.cc
+index 802c3731d5a..748f1ff3e29 100644
+--- a/src/3rdparty/gn/tools/gn/args.cc
++++ b/src/3rdparty/gn/tools/gn/args.cc
+@@ -327,6 +327,7 @@ void Args::SetSystemVarsLocked(Scope* dest) const {
+   static const char kArm64[] = "arm64";
+   static const char kMips[] = "mipsel";
+   static const char kMips64[] = "mips64el";
++  static const char kLa64[] = "la64";
+   static const char kS390X[] = "s390x";
+   static const char kPPC64[] = "ppc64";
+   const char* arch = nullptr;
+@@ -346,6 +347,8 @@ void Args::SetSystemVarsLocked(Scope* dest) const {
+     arch = kMips;
+   else if (os_arch == "mips64")
+     arch = kMips64;
++  else if (os_arch == "loongarch64")
++    arch = kLa64;
+   else if (os_arch == "s390x")
+     arch = kS390X;
+   else if (os_arch == "ppc64" || os_arch == "ppc64le")
+diff --git a/src/3rdparty/gn/tools/gn/variables.cc b/src/3rdparty/gn/tools/gn/variables.cc
+index ff6d45cb619..771d7b04cdb 100644
+--- a/src/3rdparty/gn/tools/gn/variables.cc
++++ b/src/3rdparty/gn/tools/gn/variables.cc
+@@ -111,6 +111,7 @@ Possible values
+   - "arm"
+   - "arm64"
+   - "mipsel"
++  - "la64"
+ )";
+ 
+ const char kTargetName[] = "target_name";
+diff --git a/src/3rdparty/gn/util/build_config.h b/src/3rdparty/gn/util/build_config.h
+index addd7cfb081..14c0dab426c 100644
+--- a/src/3rdparty/gn/util/build_config.h
++++ b/src/3rdparty/gn/util/build_config.h
+@@ -172,6 +172,18 @@
+ #define ARCH_CPU_32_BITS 1
+ #define ARCH_CPU_BIG_ENDIAN 1
+ #endif
++#elif defined(__loongarch__)
++#if defined(__LP64__)
++#define ARCH_CPU_LOONGARCH_FAMILY 1
++#define ARCH_CPU_LA64 1
++#define ARCH_CPU_64_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
++#else
++#define ARCH_CPU_LOONGARCH_FAMILY 1
++#define ARCH_CPU_LA 1
++#define ARCH_CPU_32_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
++#endif
+ #else
+ #error Please add support for your architecture in build_config.h
+ #endif
+diff --git a/src/buildtools/config/linux.pri b/src/buildtools/config/linux.pri
+index 56c18bdb5..78381e0d5 100644
+--- a/src/buildtools/config/linux.pri
++++ b/src/buildtools/config/linux.pri
+@@ -116,6 +116,11 @@ contains(QT_ARCH, "mips") {
+     else: contains(QMAKE_CFLAGS, "-mdsp"): gn_args += mips_dsp_rev=1
+ }
+ 
++contains(QT_ARCH, "loongarch64") {
++    DEFINES += ARCH_CPU_LA64
++    gn_args += debug_devtools=false
++}
++
+ host_build {
+     gn_args += custom_toolchain=\"$$QTWEBENGINE_OUT_ROOT/src/toolchain:host\"
+     GN_HOST_CPU = $$gnArch($$QT_ARCH)
diff --git a/qt6-quick3dphysics/PKGBUILD b/qt6-quick3dphysics/PKGBUILD
index 8b0113ff74..973ed4f360 100644
--- a/qt6-quick3dphysics/PKGBUILD
+++ b/qt6-quick3dphysics/PKGBUILD
@@ -19,8 +19,14 @@ makedepends=(cmake
              qt6-shadertools)
 groups=(qt6)
 _pkgfn=${pkgname/6-/}-everywhere-src-$_qtver
-source=(https://download.qt.io/official_releases/qt/${pkgver%.*}/$_qtver/submodules/$_pkgfn.tar.xz)
-sha256sums=('2cc6b5f58d7b1de6de34279657ad2c73a0e82e29c7a56a12f2c00fb62725e15a')
+source=(https://download.qt.io/official_releases/qt/${pkgver%.*}/$_qtver/submodules/$_pkgfn.tar.xz
+        qt3d-la64.patch)
+sha256sums=('2cc6b5f58d7b1de6de34279657ad2c73a0e82e29c7a56a12f2c00fb62725e15a'
+            '3d0f784887aebda0498ad2778ed757069b334db8b8edf1b1e56a82e616972e55')
+
+prepare() {
+  patch -d $_pkgfn -p1 -i "$srcdir/qt3d-la64.patch"
+}
 
 build() {
   cmake -B build -S $_pkgfn -G Ninja \
diff --git a/qt6-quick3dphysics/qt3d-la64.patch b/qt6-quick3dphysics/qt3d-la64.patch
new file mode 100644
index 0000000000..6afbdc534f
--- /dev/null
+++ b/qt6-quick3dphysics/qt3d-la64.patch
@@ -0,0 +1,31 @@
+Index: qtquick3dphysics-everywhere-src-6.5.1/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h
+===================================================================
+--- qtquick3dphysics-everywhere-src-6.5.1.orig/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h
++++ qtquick3dphysics-everywhere-src-6.5.1/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h
+@@ -127,6 +127,8 @@ Architecture defines, see http://sourcef
+ #define PX_PPC 1
+ #elif defined(__mips__)
+ #define PX_X64 1
++#elif defined(__loongarch_lp64)
++#define PX_LA64 1
+ #else
+ #error "Unknown architecture"
+ #endif
+@@ -147,7 +149,7 @@ SIMD defines
+ #endif
+ 
+ /** Disable SIMD for webassembly, mips and arm64 */
+-#if defined(__EMSCRIPTEN__) || defined(__mips__) || defined(_M_ARM64) || defined(_M_ARM)
++#if defined(__EMSCRIPTEN__) || defined(__mips__) || defined(_M_ARM64) || defined(_M_ARM) || defined(__loongarch__)
+ #define PX_SIMD_DISABLED 1
+ #endif
+ 
+@@ -436,7 +438,7 @@ General defines
+ 
+ // static assert
+ #if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_SWITCH) || (PX_CLANG && PX_ARM)
+-#define PX_COMPILE_TIME_ASSERT(exp) typedef char PX_CONCAT(PxCompileTimeAssert_Dummy, __COUNTER__)[(exp) ? 1 : -1] __attribute__((unused))
++#define PX_COMPILE_TIME_ASSERT(exp) typedef char PX_CONCAT(PxCompileTimeAssert_Dummy, __COUNTER__)[(exp) ? 1 : 0] __attribute__((unused))
+ #else
+ #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1]
+ #endif
diff --git a/qt6-tools/qt6-tools-fix-build.patch b/qt6-tools/qt6-tools-fix-build.patch
new file mode 100644
index 0000000000..9facbaef20
--- /dev/null
+++ b/qt6-tools/qt6-tools-fix-build.patch
@@ -0,0 +1,11 @@
+--- qttools-everywhere-src-6.4.2/src/linguist/lupdate/lupdatepreprocessoraction.h	2023-03-06 23:30:16.999102388 +0800
++++ qttools-everywhere-src-6.4.2/src/linguist/lupdate/lupdatepreprocessoraction.h	2023-03-06 23:32:56.223626722 +0800
+@@ -59,7 +59,7 @@
+ #endif
+                             clang::StringRef /*searchPath*/, clang::StringRef /*relativePath*/,
+                             const clang::Module */*imported*/,
+-                            clang::SrcMgr::CharacteristicKind /*fileType*/) override;
++                            clang::SrcMgr::CharacteristicKind /*fileType*/);
+ 
+     std::string m_inputFile;
+     clang::Preprocessor &m_preprocessor;
diff --git a/qtcreator/qtcreator-la64.patch b/qtcreator/qtcreator-la64.patch
new file mode 100644
index 0000000000..f8dff379de
--- /dev/null
+++ b/qtcreator/qtcreator-la64.patch
@@ -0,0 +1,179 @@
+Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.cpp
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/abi.cpp
++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.cpp
+@@ -152,8 +152,10 @@ static Abi::Architecture architectureFro
+         return Abi::X86Architecture;
+     if (arch == "ia64")
+         return Abi::ItaniumArchitecture;
+     if (arch.startsWith("mips"))
+         return Abi::MipsArchitecture;
++    if (arch.startsWith("loongarch"))
++        return Abi::LoongArchitecture;
+     if (arch.startsWith("power"))
+         return Abi::PowerPCArchitecture;
+     if (arch.startsWith("sh")) // Not in Qt documentation!
+@@ -363,6 +367,9 @@ static Abis abiOf(const QByteArray &data
+         }
+ 
+         switch (machine) {
++        case 2: // EM_LOONGARCH64
++            result.append(Abi(Abi::LoongArchitecture, os, flavor, Abi::ElfFormat, 64));
++            break;
+         case 3: // EM_386
+             result.append(Abi(Abi::X86Architecture, os, flavor, Abi::ElfFormat, 32));
+             break;
+@@ -390,6 +397,8 @@ static Abis abiOf(const QByteArray &data
+         case 50: // EM_IA_64
+             result.append(Abi(Abi::ItaniumArchitecture, os, flavor, Abi::ElfFormat, 64));
+             break;
++        case 258: // EM_AARCH64
++            result.append(Abi(Abi::LoongArchitecture, os, flavor, Abi::ElfFormat, 64));
+         default:
+             ;
+         }
+@@ -575,6 +584,9 @@ Abi Abi::abiFromTargetTriplet(const QStr
+         } else if (p.startsWith("mips")) {
+             arch = MipsArchitecture;
+             width = p.contains("64") ? 64 : 32;
++        } else if (p.startsWith("loongarch")) {
++            arch = LoongArchitecture;
++            width = p.contains("64") ? 64 : 32;
+         } else if (p == "x86_64" || p == "amd64") {
+             arch = X86Architecture;
+             width = 64;
+@@ -772,6 +784,8 @@ QString Abi::toString(const Architecture
+         return QLatin1String("mcs251");
+     case MipsArchitecture:
+         return QLatin1String("mips");
++    case LoongArchitecture:
++        return QLatin1String("loongarch");
+     case PowerPCArchitecture:
+         return QLatin1String("ppc");
+     case ItaniumArchitecture:
+@@ -944,6 +958,8 @@ Abi::Architecture Abi::architectureFromS
+         return Mcs251Architecture;
+     if (a == "mips")
+         return MipsArchitecture;
++    if (a == "loongarch")
++        return LoongArchitecture;
+     if (a == "ppc")
+         return PowerPCArchitecture;
+     if (a == "itanium")
+@@ -1509,6 +1525,14 @@ void ProjectExplorer::ProjectExplorerPlu
+                                     << int(Abi::LinuxOS) << int(Abi::GenericFlavor)
+                                     << int(Abi::ElfFormat) << 64;
+ 
++    QTest::newRow("loongarch-linux-gnu") << int(Abi::LoongArchitecture)
++                                    << int(Abi::LinuxOS) << int(Abi::GenericFlavor)
++                                    << int(Abi::ElfFormat) << 32;
++
++    QTest::newRow("loongarch64-linux-gnu") << int(Abi::LoongArchitecture)
++                                    << int(Abi::LinuxOS) << int(Abi::GenericFlavor)
++                                    << int(Abi::ElfFormat) << 64;
++
+     QTest::newRow("arm-wrs-vxworks") << int(Abi::ArmArchitecture)
+                                      << int(Abi::VxWorks) << int(Abi::VxWorksFlavor)
+                                      << int(Abi::ElfFormat) << 32;
+Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.h
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/abi.h
++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.h
+@@ -76,6 +76,7 @@ public:
+         R32CArchitecture,
+         CR16Architecture,
+         RiscVArchitecture,
++        LoongArchitecture,
+         UnknownArchitecture
+     };
+ 
+Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/gcctoolchain.cpp
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/gcctoolchain.cpp
++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/gcctoolchain.cpp
+@@ -2042,6 +2042,14 @@ void ProjectExplorerPlugin::testGccAbiGu
+             << QString::fromLatin1("mips64el-linux-uclibc")
+             << QByteArray("#define __SIZEOF_SIZE_T__ 8")
+             << QStringList({"mips-linux-generic-elf-64bit"});
++    QTest::newRow("Linux 12 (loongarch)")
++            << QString::fromLatin1("loongarch32-linux-gnu")
++            << QByteArray("#define __SIZEOF_SIZE_T__ 4")
++            << QStringList({"loongarch-linux-generic-elf-32bit"});
++    QTest::newRow("Linux 13 (64bit loongarch)")
++            << QString::fromLatin1("loongarch64-linux-gnu")
++            << QByteArray("#define __SIZEOF_SIZE_T__ 8")
++            << QStringList({"loongarch64-linux-generic-elf-64bit"});
+ 
+     QTest::newRow("Mingw 1 (32bit)")
+             << QString::fromLatin1("i686-w64-mingw32")
+Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js
++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js
+@@ -550,6 +550,10 @@ function guessArchitecture(m) {
+             architecture = "mips";
+             if (hasAnyOf(m, ["_MIPS_ARCH_MIPS64", "__mips64"]))
+                 architecture += "64";
++        } else if (hasAnyOf(m, ["__loongarch", "__loongarch__"])) {
++            architecture = "loongarch";
++            if (hasAnyOf(m, ["__LP64__", "__loongarch64"]))
++                architecture += "64";
+         } else if (hasAnyOf(m, ["__ppc__", "__ppc", "__powerpc__",
+                                 "_ARCH_COM", "_ARCH_PWR", "_ARCH_PPC", "_M_MPPC", "_M_PPC"])) {
+             architecture = "ppc";
+Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs
++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs
+@@ -51,6 +51,10 @@ PathProbe {
+                     result = ["/usr/lib/mipsel-linux-gnu"]
+                 else if (qbs.architecture === "mips64")
+                     result = ["/usr/lib/mips64el-linux-gnuabi64"]
++                else if (qbs.architecture === "loongarch")
++                    result = ["/usr/lib/loongarch-linux-gnu"]
++                else if (qbs.architecture === "loongarch64")
++                    result = ["/usr/lib/loongarch64-linux-gnu"]
+                 else if (qbs.architecture === "ppc")
+                     result = ["/usr/lib/powerpc-linux-gnu"]
+                 else if (qbs.architecture === "ppc64")
+Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/architectures.cpp
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/src/lib/corelib/tools/architectures.cpp
++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/architectures.cpp
+@@ -82,6 +82,10 @@ QString canonicalTargetArchitecture(cons
+         return StringConstants::i386Arch();
+     }
+ 
++    if (arch == StringConstants::loongArch() || arch == StringConstants::loongArch64()) {
++        return arch;
++    }
++
+     if (arch == StringConstants::mipsArch() || arch == StringConstants::mips64Arch()) {
+         if (endianness == QStringLiteral("big"))
+             return arch + QStringLiteral("eb");
+@@ -145,6 +149,12 @@ QString canonicalArchitecture(const QStr
+         << QStringLiteral("mips64eb")
+         << QStringLiteral("mips64el"));
+ 
++    archMap.insert(StringConstants::loongArch(), QStringList()
++        << QStringLiteral("loongarch"));
++
++    archMap.insert(StringConstants::loongArch64(), QStringList()
++        << QStringLiteral("loongarch64"));
++
+     QMapIterator<QString, QStringList> i(archMap);
+     while (i.hasNext()) {
+         i.next();
+Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/stringconstants.h
+===================================================================
+--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/src/lib/corelib/tools/stringconstants.h
++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/stringconstants.h
+@@ -233,6 +233,8 @@ public:
+     QBS_STRING_CONSTANT(i586Arch, "i586")
+     QBS_STRING_CONSTANT(mipsArch, "mips")
+     QBS_STRING_CONSTANT(mips64Arch, "mips64")
++    QBS_STRING_CONSTANT(loongArch, "loongarch")
++    QBS_STRING_CONSTANT(loongArch64, "loongarch64")
+     QBS_STRING_CONSTANT(powerPcArch, "powerpc")
+     QBS_STRING_CONSTANT(ppcArch, "ppc")
+     QBS_STRING_CONSTANT(ppc64Arch, "ppc64")
diff --git a/quazip/PKGBUILD b/quazip/PKGBUILD
index 4a4e63c82c..8979595e8a 100644
--- a/quazip/PKGBUILD
+++ b/quazip/PKGBUILD
@@ -7,7 +7,7 @@
 pkgbase=quazip
 pkgname=(quazip-qt5 quazip-qt6)
 pkgver=1.4
-pkgrel=1
+pkgrel=2
 pkgdesc='C++ wrapper for the ZIP/UNZIP C package'
 url='https://stachenov.github.io/quazip/'
 license=(LGPL)
@@ -18,7 +18,8 @@ sha256sums=('79633fd3a18e2d11a7d5c40c4c79c1786ba0c74b59ad752e8429746fe1781dd6')
 
 build() {
   cmake -B build5 -S $pkgbase-$pkgver \
-    -DCMAKE_INSTALL_PREFIX=/usr
+    -DCMAKE_INSTALL_PREFIX=/usr \
+    -DQUAZIP_QT_MAJOR_VERSION=5
   cmake --build build5
 
   cmake -B build6 -S $pkgbase-$pkgver \
diff --git a/rathole/PKGBUILD b/rathole/PKGBUILD
index 50912f83f3..3ccf4a7556 100644
--- a/rathole/PKGBUILD
+++ b/rathole/PKGBUILD
@@ -15,7 +15,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rbw/PKGBUILD b/rbw/PKGBUILD
index aba528264f..d4f34b457a 100644
--- a/rbw/PKGBUILD
+++ b/rbw/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('2b4cd61193fe79e9a095ab4534fcb2982c5a611f54789a97f6fd8aea133a93575fb7977
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/reapack/PKGBUILD b/reapack/PKGBUILD
index c379afc3fb..c102d38b5d 100644
--- a/reapack/PKGBUILD
+++ b/reapack/PKGBUILD
@@ -83,7 +83,7 @@ package() {
   cd "$pkgname"
 
   # plugin
-  install -vDm755 -t "$pkgdir/usr/lib/$pkgname" "build/reaper_reapack-$CARCH.so"
+  install -vDm755 -t "$pkgdir/usr/lib/$pkgname" "build/reaper_reapack-`uname -m`.so"
 
   # documentation
   install -vDm644 -t "$pkgdir/usr/share/doc/$pkgname" README.md
diff --git a/rebuilderd/PKGBUILD b/rebuilderd/PKGBUILD
index e0e2a7f6fc..91b3e2dcf2 100644
--- a/rebuilderd/PKGBUILD
+++ b/rebuilderd/PKGBUILD
@@ -27,7 +27,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd ${pkgbase}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/refind/PKGBUILD b/refind/PKGBUILD
index 5de3cc369d..f59f07e06a 100644
--- a/refind/PKGBUILD
+++ b/refind/PKGBUILD
@@ -13,13 +13,17 @@ makedepends=(
   efibootmgr
   gnu-efi
 )
-source=(https://sourceforge.net/projects/refind/files/$pkgver/$pkgname-src-$pkgver.tar.gz)
-sha512sums=('41c120c1afec37c508aa5c0ec09a6563c3047ef84932308c91701795b950431dfad17d25cf664039b490a302d475add98441b75f90ff71cadce41febedc68a9e')
-b2sums=('02019ddb872ce44d2a2119902edebd633f925d49634e3bcc6bfb2c9dedb8ce213166909395a333d3a37e95c67720e31b1f5fcf25083801c17d645372aa54a06a')
+source=(https://sourceforge.net/projects/refind/files/$pkgver/$pkgname-src-$pkgver.tar.gz
+        refind-la64-0.14.0.patch)
+sha512sums=('41c120c1afec37c508aa5c0ec09a6563c3047ef84932308c91701795b950431dfad17d25cf664039b490a302d475add98441b75f90ff71cadce41febedc68a9e'
+            '413d4db728d0942036a8afbd7e0b68473e6175ab13834f79586a2597043803cb505d70081bfbd00f2f74f96ee54c85baecc2efab8c4a80b999193eaa9cfaffdd')
+b2sums=('02019ddb872ce44d2a2119902edebd633f925d49634e3bcc6bfb2c9dedb8ce213166909395a333d3a37e95c67720e31b1f5fcf25083801c17d645372aa54a06a'
+        'd54284e8da0292bddb79edb230ff36058053b6c5a3452c1b0d4b12a05d535169c83ff1dd9b31569de2d9e97a64eecff9c9f441369bbaccfc219f16aba383679f')
 _arch='x64'
 
 prepare() {
   cd $pkgbase-$pkgver
+  patch -p1 -i $srcdir/refind-la64-0.14.0.patch
   # remove the path prefix from the css reference, so that the css can live
   # in the same directory
   sed -e 's|../Styles/||g' -i docs/$pkgbase/*.html
@@ -63,11 +67,17 @@ package_refind() {
   )
 
   cd $pkgbase-$pkgver
+  if [ "$CARCH" == "loong64" ]; then
+    _arch='loongarch64'
+  else
+    _arch='x64'
+  fi
   # NOTE: the install target calls refind-install, therefore we install things
   # manually
   # efi binaries
   install -vDm 644 refind/*.efi -t "$pkgdir/usr/share/$pkgname/"
   install -vDm 644 drivers_*/*.efi -t "$pkgdir/usr/share/refind/drivers_$_arch/"
+  rm -f "$pkgdir/usr/share/refind/drivers_loongarch64/ext4_loongarch64.efi"
   install -vDm 644 gptsync/*.efi -t "$pkgdir/usr/share/$pkgname/tools_$_arch/"
   # sample config
   install -vDm 644 $pkgname.conf-sample -t "$pkgdir/usr/share/$pkgname/"
diff --git a/refind/refind-la64-0.14.0.patch b/refind/refind-la64-0.14.0.patch
new file mode 100644
index 0000000000..053c4b4a2d
--- /dev/null
+++ b/refind/refind-la64-0.14.0.patch
@@ -0,0 +1,729 @@
+From 726f40b468f8cb3136d100d94620eb78f0bb27d9 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Sat, 17 Apr 2021 22:09:40 +0800
+Subject: [PATCH] add loongarch64 support
+
+---
+ Make.common                  | 21 +++++++++++++++++++++
+ Makefile                     |  3 +++
+ RefindPkg.dsc                |  5 ++++-
+ filesystems/Make.gnuefi      |  5 +++++
+ filesystems/btrfs.inf        |  9 +++++++++
+ filesystems/ext2.inf         |  9 +++++++++
+ filesystems/ext4.inf         |  9 +++++++++
+ filesystems/hfs.inf          |  9 +++++++++
+ filesystems/iso9660.inf      |  9 +++++++++
+ filesystems/ntfs.inf         |  9 +++++++++
+ filesystems/reiserfs.inf     |  9 +++++++++
+ gptsync.inf                  |  9 +++++++++
+ gptsync/Make.gnuefi          |  4 ++++
+ gptsync/Make.tiano           |  4 ++++
+ gptsync/gptsync.h            |  2 +-
+ include/refit_call_wrapper.h |  2 +-
+ libeg/image.c                |  2 +-
+ mvrefind                     |  3 +++
+ refind-install               | 36 +++++++++++++++++++++++++++++-------
+ refind.inf                   | 11 ++++++++++-
+ refind/Make.tiano            |  4 ++++
+ refind/Makefile              |  5 +++++
+ refind/config.h              |  2 ++
+ refind/driver_support.c      |  2 ++
+ refind/global.h              |  4 ++++
+ refind/install.h             |  5 +++++
+ refind/launch_efi.c          |  4 +++-
+ refind/lib.c                 |  2 ++
+ refind/main.c                |  5 +++++
+ refind/scan.c                |  8 ++++++++
+ 31 files changed, 211 insertions(+), 13 deletions(-)
+
+diff --git a/Make.common b/Make.common
+index 10cde40..9c5fb11 100644
+--- a/Make.common
++++ b/Make.common
+@@ -166,6 +166,27 @@ ifeq ($(ARCH), aarch64)
+   LD_CODE = aarch64elf
+ endif
+ 
++ifeq ($(ARCH), loongarch64)
++  GNUEFI_CFLAGS += -DEFILOONGARCH64
++  FORMAT          = -O binary
++  FORMAT_DRIVER   = -O binary
++  SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xa
++  LDFLAGS         += --warn-common --no-undefined --fatal-warnings
++
++  ARCH_CFLAGS = -fno-merge-constants -ffreestanding -DEFILOONGARCH64
++  ifeq ($(MAKEWITH),TIANO)
++    ARCH_CFLAGS += -mcmodel=large -Wno-address -Wno-missing-braces -Wno-array-bounds -ffunction-sections -fdata-sections
++  endif
++  ifeq ($(MAKEWITH),GNUEFI)
++    ARCH_CFLAGS += -fno-stack-check
++  endif
++  ARCHDIR = LoongArch64
++  UC_ARCH = LOONGARCH64
++  FILENAME_CODE = loongarch64
++  LD_CODE = loongarch64elf
++endif
++
++
+ # GNU-EFI compilation path uses .o files for compiled object code
+ %.o: %.c
+ 	$(CC) $(CFLAGS) $(ARCH_CFLAGS) $(GNUEFI_CFLAGS) $(LOCAL_GNUEFI_CFLAGS) \
+diff --git a/Makefile b/Makefile
+index 4d07160..8ebc622 100644
+--- a/Makefile
++++ b/Makefile
+@@ -117,6 +117,9 @@ tiano:
+ ifneq ($(ARCH),aarch64)
+ 	+make MAKEWITH=TIANO -C $(GPTSYNC_DIR) -f Make.tiano
+ endif
++ifneq ($(ARCH),loongarch64)
++	+make MAKEWITH=TIANO -C $(GPTSYNC_DIR) -f Make.tiano
++endif
+ #	+make MAKEWITH=TIANO -C $(FS_DIR)
+ 
+ all_tiano: tiano fs_tiano
+diff --git a/RefindPkg.dsc b/RefindPkg.dsc
+index c267f7a..5a7f857 100644
+--- a/RefindPkg.dsc
++++ b/RefindPkg.dsc
+@@ -3,7 +3,7 @@
+   PLATFORM_GUID                  = d6365e1c-b895-426d-a012-46769b2d02a3
+   PLATFORM_VERSION               = 4.5.0
+   DSC_SPECIFICATION              = 0x00010006
+-  SUPPORTED_ARCHITECTURES        = IA32|IPF|X64|EBC|ARM|AARCH64
++  SUPPORTED_ARCHITECTURES        = IA32|IPF|X64|EBC|ARM|AARCH64|LOONGARCH64
+   BUILD_TARGETS                  = DEBUG|RELEASE
+   SKUID_IDENTIFIER               = DEFAULT
+ 
+@@ -75,6 +75,9 @@
+ [LibraryClasses.AARCH64]
+   CompilerIntrinsicsLib|ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
+ 
++[LibraryClasses.LOONGARCH64]
++  CompilerIntrinsicsLib|LoongArchPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
++
+ [Components]
+   RefindPkg/refind.inf
+   RefindPkg/gptsync.inf
+diff --git a/filesystems/Make.gnuefi b/filesystems/Make.gnuefi
+index be5b183..6638f9d 100644
+--- a/filesystems/Make.gnuefi
++++ b/filesystems/Make.gnuefi
+@@ -30,6 +30,11 @@ ifeq ($(HOSTARCH),aarch64)
+   SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xb
+ endif
+ 
++ifeq ($(HOSTARCH),loongarch64)
++  # Set symbol for driver
++  SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xb
++endif
++
+ $(SHLIB_TARGET): $(OBJS)
+ 	$(LD) $(GNUEFI_LDFLAGS) $(SUBSYSTEM_LDFLAG) $(OBJS) -o $@ $(LOCAL_LIBS) $(GNUEFI_LIBS)
+ 
+diff --git a/filesystems/btrfs.inf b/filesystems/btrfs.inf
+index 10f284a..00a2c54 100644
+--- a/filesystems/btrfs.inf
++++ b/filesystems/btrfs.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -74,3 +79,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs
+diff --git a/filesystems/ext2.inf b/filesystems/ext2.inf
+index ee8eea0..2df2dbd 100644
+--- a/filesystems/ext2.inf
++++ b/filesystems/ext2.inf
+@@ -50,6 +50,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -71,3 +76,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2
+diff --git a/filesystems/ext4.inf b/filesystems/ext4.inf
+index 3586ee5..6e445dd 100644
+--- a/filesystems/ext4.inf
++++ b/filesystems/ext4.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -72,3 +77,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4
+diff --git a/filesystems/hfs.inf b/filesystems/hfs.inf
+index 14a859c..31bc526 100644
+--- a/filesystems/hfs.inf
++++ b/filesystems/hfs.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -72,3 +77,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs
+diff --git a/filesystems/iso9660.inf b/filesystems/iso9660.inf
+index 0e03032..9bcf98b 100644
+--- a/filesystems/iso9660.inf
++++ b/filesystems/iso9660.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -72,3 +77,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660
+diff --git a/filesystems/ntfs.inf b/filesystems/ntfs.inf
+index 73262e1..b008e10 100644
+--- a/filesystems/ntfs.inf
++++ b/filesystems/ntfs.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -72,3 +77,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs
+diff --git a/filesystems/reiserfs.inf b/filesystems/reiserfs.inf
+index 474d97c..b2c6dfa 100644
+--- a/filesystems/reiserfs.inf
++++ b/filesystems/reiserfs.inf
+@@ -51,6 +51,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+ 
+ [Ppis]
+@@ -72,3 +77,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs
+diff --git a/gptsync.inf b/gptsync.inf
+index af3769c..382ac97 100644
+--- a/gptsync.inf
++++ b/gptsync.inf
+@@ -50,6 +50,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+   gEfiAcpiTableGuid
+   gEfiAcpi10TableGuid
+@@ -135,3 +140,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO
+diff --git a/gptsync/Make.gnuefi b/gptsync/Make.gnuefi
+index b74d0f3..8c6f530 100644
+--- a/gptsync/Make.gnuefi
++++ b/gptsync/Make.gnuefi
+@@ -23,6 +23,10 @@ ifeq ($(ARCH),aarch64)
+   TARGET = gptsync_aa64.efi
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++  TARGET = gptsync_loongarch64.efi
++endif
++
+ all: $(TARGET)
+ 
+ SHLIB_TARGET = $(subst .efi,.so,$(TARGET))
+diff --git a/gptsync/Make.tiano b/gptsync/Make.tiano
+index a0656cd..70a7db3 100644
+--- a/gptsync/Make.tiano
++++ b/gptsync/Make.tiano
+@@ -26,6 +26,10 @@ ifeq ($(ARCH),aarch64)
+   ALL_EFILIBS +=  $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++  ALL_EFILIBS +=  $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib
++endif
++
+ TIANO_INCLUDE_DIRS = -I $(TIANOBASE)/MdePkg \
+                      -I $(TIANOBASE)/MdePkg/Include \
+                      -I $(TIANOBASE)/MdePkg/Include/$(ARCHDIR) \
+diff --git a/gptsync/gptsync.h b/gptsync/gptsync.h
+index f6cf2a5..584d1fb 100644
+--- a/gptsync/gptsync.h
++++ b/gptsync/gptsync.h
+@@ -45,7 +45,7 @@
+ //
+ 
+ 
+-#if defined(EFI32) || defined(EFIX64) || defined(EFIAARCH64)
++#if defined(EFI32) || defined(EFIX64) || defined(EFIAARCH64) || defined(EFILOONGARCH64)
+ #define CONFIG_EFI
+ #endif
+ 
+diff --git a/include/refit_call_wrapper.h b/include/refit_call_wrapper.h
+index 1bd4042..1fa386f 100644
+--- a/include/refit_call_wrapper.h
++++ b/include/refit_call_wrapper.h
+@@ -3,7 +3,7 @@
+ 
+ #ifdef __MAKEWITH_GNUEFI
+ 
+-#if defined (EFIX64) | defined (AARCH64)
++#if defined (EFIX64) | defined (AARCH64) | defined (LOONGARCH64)
+ # define refit_call1_wrapper(f, a1) \
+   uefi_call_wrapper(f, 1, (UINT64)(a1))
+ # define refit_call2_wrapper(f, a1, a2) \
+diff --git a/libeg/image.c b/libeg/image.c
+index c8ae198..47ddede 100644
+--- a/libeg/image.c
++++ b/libeg/image.c
+@@ -71,7 +71,7 @@
+ // A value of 4096 should keep us within limits on 32-bit systems, but I've
+ // seen some minor artifacts at this level, so give it a bit more precision
+ // on 64-bit systems....
+-#if defined(EFIX64) | defined(EFIAARCH64)
++#if defined(EFIX64) | defined(EFIAARCH64) | defined(EFILOONGARCH64)
+ #define FP_MULTIPLIER (UINTN) 65536
+ #else
+ #define FP_MULTIPLIER (UINTN) 4096
+diff --git a/mvrefind b/mvrefind
+index 6b840c2..154729e 100755
+--- a/mvrefind
++++ b/mvrefind
+@@ -95,6 +95,9 @@ DeterminePlatform() {
+     i?86)
+         Platform="ia32"
+         ;;
++    loongarch64)
++        Platform="loongarch64"
++        ;;
+     *)
+         echo "Unsupported CPU type; aborting!"
+         exit 1
+diff --git a/refind-install b/refind-install
+index 22dd8e6..3fc7cab 100755
+--- a/refind-install
++++ b/refind-install
+@@ -266,7 +266,7 @@ ReadKeyPassphrase() {
+ 
+ # Determine what CPU type and EFI bit depth we're using.
+ # Sets Platform global variable to lowercase EFI platform code (currently
+-# "x64", "ia32", or "aa64") -- the same code used in filenames.
++# "x64", "ia32", "aa64" or "loongarch64") -- the same code used in filenames.
+ DeterminePlatform() {
+    local CpuType
+    case "$OSTYPE" in
+@@ -291,6 +291,9 @@ DeterminePlatform() {
+       aarch64)
+          Platform="aa64"
+          ;;
++      loongarch64)
++         Platform="loongarch64"
++         ;;
+       x86_64)
+          Platform="x64"
+          ;;
+@@ -386,6 +389,7 @@ CheckForFiles() {
+          if [[ $ShimType == "shimx64.efi" || $ShimType == "shim.efi" || $ShimType == "shimx64.efi.signed" ]] ; then
+             TargetX64="grubx64.efi"
+             TargetAARCH64="grubaa64.efi"
++            TargetLOONGARCH64="grubloongarch64.efi"
+             MokManagerSource=$(dirname "$ShimSource")/mm$Platform.efi.signed
+             if [[ ! -f "$MokManagerSource" ]] ; then
+                 MokManagerSource=$(dirname "$ShimSource")/mm$Platform.efi
+@@ -455,10 +459,12 @@ SetVarsForBoot() {
+       TargetX64="bootx64.efi"
+       TargetIA32="bootia32.efi"
+       TargetAARCH64="bootaa64.efi"
++      TargetLOONGARCH64="bootloongarch64.efi"
+    else
+-      if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimx64.efi.signed" || $ShimType = "shimaa64.efi" ]] ; then
++      if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimx64.efi.signed" || $ShimType = "shimaa64.efi"  || $ShimType = "shimloongarch64.efi" ]] ; then
+          TargetX64="grubx64.efi"
+          TargetAARCH64="grubaa64.efi"
++         TargetLOONGARCH64="grubloongarch64.efi"
+       elif [[ $ShimType == "preloader.efi" || $ShimType == "PreLoader.efi" ]] ; then
+          TargetX64="loader.efi"
+       else
+@@ -482,10 +488,12 @@ SetVarsForMsBoot() {
+       TargetX64="bootmgfw.efi"
+       TargetIA32="bootmgfw.efi"
+       TargetAARCH64="bootmgfw.efi"
++      TargetLOONGARCH64="bootmgfw.efi"
+    else
+-      if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimaa64.efi" ]] ; then
++      if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimaa64.efi"  || $ShimType == "shimloongarch64.efi" ]] ; then
+          TargetX64="grubx64.efi"
+          TargetAARCH64="grubaa64.efi"
++         TargetLOONGARCH64="grubloongarch64.efi"
+       elif [[ $ShimType == "preloader.efi" || $ShimType == "PreLoader.efi" ]] ; then
+          TargetX64="loader.efi"
+       else
+@@ -528,6 +536,7 @@ DetermineTargetDir() {
+          TargetX64="refind_x64.efi"
+          TargetIA32="refind_ia32.efi"
+          TargetAARCH64="refind_aa64.efi"
++         TargetLOONGARCH64="refind_loongarch64.efi"
+       fi
+       Upgrade=1
+    fi
+@@ -672,6 +681,10 @@ CopyRefindFiles() {
+       if [[ $? != 0 && $Platform == "aa64" ]] ; then
+          Problems=1
+       fi
++      cp "$RefindDir/refind_loongarch64.efi" "$InstallDir/$TargetDir/$TargetLOONGARCH64" 2> /dev/null
++      if [[ $? != 0 && $Platform == "loongarch64" ]] ; then
++         Problems=1
++      fi
+       if [[ "$ShimSource" != "none" ]] ; then
+          TargetShim="bootx64.efi"
+          CopyShimFiles
+@@ -709,15 +722,19 @@ CopyRefindFiles() {
+       if [[ "$TargetDir" == '/System/Library/CoreServices' ]] ; then
+          SetupMacHfs $TargetX64
+       fi
+-   elif [[ $Platform == 'ia32' || $Platform == 'aa64' ]] ; then
++   elif [[ $Platform == 'ia32' || $Platform == 'aa64' || $Platform == 'loongarch64' ]] ; then
+       if [[ $Platform == 'ia32' ]] ; then
+          if ! cp "$RefindDir/refind_ia32.efi" "$InstallDir/$TargetDir/$TargetIA32" ; then
+             Problems=1
+          fi
+-      else
++      elif [[ $Platform == 'aa64' ]] ; then
+          if ! cp "$RefindDir/refind_aa64.efi" "$InstallDir/$TargetDir/$TargetAARCH64" ; then
+             Problems=1
+          fi
++      else
++         if ! cp "$RefindDir/refind_loongarch64.efi" "$InstallDir/$TargetDir/$TargetLOONGARCH64" ; then
++            Problems=1
++         fi
+       fi
+       CopyDrivers $Platform
+       CopyTools $Platform
+@@ -816,6 +833,10 @@ CreateBootCsvFile() {
+          echo "$TargetAARCH64,rEFInd Boot Manager,,This is the boot entry for rEFInd" | \
+               $IConv -t UCS-2 > "$InstallDir/$TargetDir/BOOT.CSV"
+       fi
++      if [[ "$Platform" == "loongarch64" && -d "$InstallDir/$TargetDir" ]] ; then
++         echo "$TargetLOONGARCH64,rEFInd Boot Manager,,This is the boot entry for rEFInd" | \
++              $IConv -t UCS-2 > "$InstallDir/$TargetDir/BOOT.CSV"
++      fi
+    fi
+ } # CreateBootCsvFile()
+ 
+@@ -1362,8 +1383,9 @@ AddBootEntry() {
+       echo
+       echo "ALERT: There were problems running the efibootmgr program! You may need to"
+       echo "rename the $Refind binary to the default name (EFI/BOOT/bootx64.efi"
+-      echo "on x86-64 systems, EFI/BOOT/bootia32.efi on x86 systems, or"
+-      echo "EFI/BOOT/bootaa64.efi on ARM64 systems) to have it run!"
++      echo "on x86-64 systems, EFI/BOOT/bootia32.efi on x86 systems, "
++      echo "EFI/BOOT/bootaa64.efi on ARM64 systems or "
++      echo "EFI/BOOT/bootloongarch64.efi on LoongArch systems.) to have it run!"
+       echo
+    else
+       echo "rEFInd is set as the default boot manager."
+diff --git a/refind.inf b/refind.inf
+index 9340b1e..7bb6a36 100644
+--- a/refind.inf
++++ b/refind.inf
+@@ -22,7 +22,7 @@
+ #
+ # The following information is for reference only and not required by the build tools.
+ #
+-#  VALID_ARCHITECTURES           = IA32 X64 IPF EBC AARCH64
++#  VALID_ARCHITECTURES           = IA32 X64 IPF EBC AARCH64 LOONGARCH64
+ #
+ 
+ [Sources]
+@@ -99,6 +99,11 @@
+ # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014
+   CompilerIntrinsicsLib
+ 
++[LibraryClasses.LOONGARCH64]
++  BaseStackCheckLib
++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014
++  CompilerIntrinsicsLib
++
+ [Guids]
+   gEfiAcpiTableGuid
+   gEfiAcpi10TableGuid
+@@ -184,3 +189,7 @@
+ [BuildOptions.AARCH64]
+   XCODE:*_*_*_CC_FLAGS = -Os  -DEFIAARCH64 -D__MAKEWITH_TIANO
+   GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO
++
++[BuildOptions.LOONGARCH64]
++  XCODE:*_*_*_CC_FLAGS = -Os  -DEFILOONGARCH64 -D__MAKEWITH_TIANO
++  GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO
+diff --git a/refind/Make.tiano b/refind/Make.tiano
+index 3e9036c..4aff620 100644
+--- a/refind/Make.tiano
++++ b/refind/Make.tiano
+@@ -35,6 +35,10 @@ ifeq ($(ARCH),aarch64)
+   ALL_EFILIBS +=    $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++  ALL_EFILIBS +=    $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib
++endif
++
+ SOURCE_NAMES     = apple AutoGen config crc32 driver_support gpt icns \
+                    install  launch_efi launch_legacy lib line_edit linux \
+                    log main menu mystrings pointer scan screen
+diff --git a/refind/Makefile b/refind/Makefile
+index 8e7048c..99ba59b 100644
+--- a/refind/Makefile
++++ b/refind/Makefile
+@@ -30,6 +30,11 @@ ifeq ($(ARCH),aarch64)
+   TARGET = refind_aa64.efi
+ endif
+ 
++ifeq ($(ARCH),loongarch64)
++  LIBEG = build
++  TARGET = refind_loongarch64.efi
++endif
++
+ LOCAL_GNUEFI_CFLAGS  = -I$(SRCDIR) -I$(SRCDIR)/../include \
+                        -I$(SRCDIR)/../libeg -I$(SRCDIR)/../mok
+ LOCAL_LDFLAGS   = -L$(SRCDIR)/../libeg/ -L$(SRCDIR)/../mok/ \
+diff --git a/refind/config.h b/refind/config.h
+index 223af72..1e1bb9b 100644
+--- a/refind/config.h
++++ b/refind/config.h
+@@ -76,6 +76,8 @@ typedef struct {
+ #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbia32.efi"
+ #elif defined(EFIAARCH64)
+ #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbaa64.efi"
++#elif defined(EFILOONGARCH64)
++#define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbloongarch64.efi"
+ #else
+ #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi"
+ #endif
+diff --git a/refind/driver_support.c b/refind/driver_support.c
+index 563b5ad..a527b13 100644
+--- a/refind/driver_support.c
++++ b/refind/driver_support.c
+@@ -83,6 +83,8 @@
+ #define DRIVER_DIRS             L"drivers,drivers_ia32"
+ #elif defined (EFIAARCH64)
+ #define DRIVER_DIRS             L"drivers,drivers_aa64"
++#elif defined (EFILOONGARCH64)
++#define DRIVER_DIRS             L"drivers,drivers_loongarch64"
+ #else
+ #define DRIVER_DIRS             L"drivers"
+ #endif
+diff --git a/refind/global.h b/refind/global.h
+index ae958ed..7bbbedb 100644
+--- a/refind/global.h
++++ b/refind/global.h
+@@ -172,6 +172,8 @@
+ #define MOK_NAMES               L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmia32.efi"
+ #elif defined(EFIAARCH64)
+ #define MOK_NAMES               L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmaa64.efi"
++#elif defined(EFILOONGARCH64)
++#define MOK_NAMES               L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmloongarch64.efi"
+ #else
+ #define MOK_NAMES               L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi"
+ #endif
+@@ -182,6 +184,8 @@
+ #define FWUPDATE_NAMES          L"fwupia32.efi"
+ #elif defined(EFIAARCH64)
+ #define FWUPDATE_NAMES          L"fwupaa64.efi"
++#elif defined(EFILOONGARCH64)
++#define FWUPDATE_NAMES          L"fwuploongarch64.efi"
+ #else
+ #define FWUPDATE_NAMES          L"fwup.efi"
+ #endif
+diff --git a/refind/install.h b/refind/install.h
+index 1976884..3ecf96d 100644
+--- a/refind/install.h
++++ b/refind/install.h
+@@ -28,6 +28,11 @@
+ #define INST_DRIVERS_SUBDIR L"drivers_aa64"
+ #define INST_REFIND_NAME L"refind_aa64.efi"
+ #define INST_PLATFORM_EXTENSION L"_aa64.efi"
++#elif defined(EFILOONGARCH64)
++#define INST_DIRECTORIES L"\\EFI,\\EFI\\refind,\\EFI\\refind\\icons,\\EFI\\refind\\drivers_loongarch64"
++#define INST_DRIVERS_SUBDIR L"drivers_loongarch64"
++#define INST_REFIND_NAME L"refind_loongarch64.efi"
++#define INST_PLATFORM_EXTENSION L"_loongarch64.efi"
+ #else
+ #define INST_DIRECTORIES L"\\EFI,\\EFI\\refind,\\EFI\\refind\\icons,\\EFI\\refind\\drivers"
+ #define INST_DRIVERS_SUBDIR L"drivers"
+diff --git a/refind/launch_efi.c b/refind/launch_efi.c
+index 320a207..03da111 100644
+--- a/refind/launch_efi.c
++++ b/refind/launch_efi.c
+@@ -81,6 +81,8 @@
+ #define EFI_STUB_ARCH           0x014c
+ #elif defined (EFIAARCH64)
+ #define EFI_STUB_ARCH           0xaa64
++#elif defined (EFILOONGARCH64)
++#define EFI_STUB_ARCH           0x6264
+ #else
+ #endif
+ 
+@@ -116,7 +118,7 @@ static VOID WarnSecureBootError(CHAR16 *Name, BOOLEAN Verbose) {
+ // gzip loaders.
+ UINTN IsValidLoader(EFI_FILE_PROTOCOL *RootDir, CHAR16 *FileName) {
+     UINTN           LoaderType = LOADER_TYPE_EFI;
+-#if defined (EFIX64) | defined (EFI32) | defined (EFIAARCH64)
++#if defined (EFIX64) | defined (EFI32) | defined (EFIAARCH64) | defined (EFILOONGARCH64)
+     BOOLEAN         IsValid = TRUE;
+     EFI_STATUS      Status;
+     EFI_FILE_HANDLE FileHandle;
+diff --git a/refind/lib.c b/refind/lib.c
+index 0da1fa4..c27ae73 100644
+--- a/refind/lib.c
++++ b/refind/lib.c
+@@ -103,6 +103,8 @@ EFI_GUID gFreedesktopRootGuid = { 0x4f68bce3, 0xe8cd, 0x4db1, { 0x96, 0xe7, 0xfb
+ EFI_GUID gFreedesktopRootGuid = { 0x44479540, 0xf297, 0x41b2, { 0x9a, 0xf7, 0xd1, 0x31, 0xd5, 0xf0, 0x45, 0x8a }};
+ #elif defined (EFIAARCH64)
+ EFI_GUID gFreedesktopRootGuid = { 0xb921b045, 0x1df0, 0x41c3, { 0xaf, 0x44, 0x4c, 0x6f, 0x28, 0x0d, 0x3f, 0xae }};
++#elif defined (EFILOONGARCH64)
++EFI_GUID gFreedesktopRootGuid = { 0x77055800, 0x792c, 0x4f94, { 0xb3, 0x9a, 0x98, 0xc9, 0x1b, 0x76, 0x2b, 0xb6 }};
+ #else
+ // Below is GUID for ARM32
+ EFI_GUID gFreedesktopRootGuid = { 0x69dad710, 0x2ce4, 0x4e3c, { 0xb1, 0x6c, 0x21, 0xa1, 0xd4, 0x9a, 0xbe, 0xd3 }};
+diff --git a/refind/main.c b/refind/main.c
+index a6d0dc7..55f5b2f 100644
+--- a/refind/main.c
++++ b/refind/main.c
+@@ -192,6 +192,9 @@ VOID AboutrEFInd(VOID)
+ #elif defined(EFIAARCH64)
+         AddMenuInfoLine(&AboutMenu, PoolPrint(L" Platform: ARM (64 bit); Secure Boot %s",
+                                               secure_mode() ? L"active" : L"inactive"));
++#elif defined(EFILOONGARCH64)
++        AddMenuInfoLine(&AboutMenu, PoolPrint(L" Platform: LoongArch (64 bit); Secure Boot %s",
++                                              secure_mode() ? L"active" : L"inactive"));
+ #else
+         AddMenuInfoLine(&AboutMenu, L" Platform: unknown");
+ #endif
+@@ -384,6 +387,8 @@ VOID LogBasicInfo(VOID) {
+     LOG(1, LOG_LINE_NORMAL, L"Platform: x86-64/X64/AMD64 (64-bit)");
+ #elif defined(EFIAARCH64)
+     LOG(1, LOG_LINE_NORMAL, L"Platform: ARM64/AARCH64 (64-bit)");
++#elif defined(EFILOONGARCH64)
++    LOG(1, LOG_LINE_NORMAL, L"Platform: LoongArch (64-bit)");
+ #else
+     LOG(1, LOG_LINE_NORMAL, L"Platform: unknown");
+ #endif
+diff --git a/refind/scan.c b/refind/scan.c
+index e270e83..535ef42 100644
+--- a/refind/scan.c
++++ b/refind/scan.c
+@@ -104,6 +104,14 @@
+ #define MEMTEST_NAMES           L"memtest86.efi,memtest86_aa64.efi,memtest86aa64.efi,bootaa64.efi"
+ #define FALLBACK_FULLNAME       L"EFI\\BOOT\\bootaa64.efi"
+ #define FALLBACK_BASENAME       L"bootaa64.efi"
++#elif defined (EFILOONGARCH64)
++#define SHELL_NAMES             L"\\EFI\\tools\\shell.efi,\\EFI\\tools\\shellloongarch64.efi,\\shell.efi,\\shellloongarch64.efi"
++#define GPTSYNC_NAMES           L"\\EFI\\tools\\gptsync.efi,\\EFI\\tools\\gptsync_loongarch64.efi"
++#define GDISK_NAMES             L"\\EFI\\tools\\gdisk.efi,\\EFI\\tools\\gdisk_loongarch64.efi"
++#define NETBOOT_NAMES           L"\\EFI\\tools\\ipxe.efi"
++#define MEMTEST_NAMES           L"memtest86.efi,memtest86_loongarch64.efi,memtest86loongarch64.efi,bootloongarch64.efi"
++#define FALLBACK_FULLNAME       L"EFI\\BOOT\\bootloongarch64.efi"
++#define FALLBACK_BASENAME       L"bootloongarch64.efi"
+ #else
+ #define SHELL_NAMES             L"\\EFI\\tools\\shell.efi,\\shell.efi"
+ #define GPTSYNC_NAMES           L"\\EFI\\tools\\gptsync.efi"
+-- 
+2.39.2
+
diff --git a/repod/0001-add-loong64-support.patch b/repod/0001-add-loong64-support.patch
new file mode 100644
index 0000000000..143b7c1638
--- /dev/null
+++ b/repod/0001-add-loong64-support.patch
@@ -0,0 +1,33 @@
+From dda3c4bfdf9010e53c6e0259bc11c43922e51398 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Tue, 28 Nov 2023 22:12:58 +0800
+Subject: [PATCH] add loong64 support
+
+---
+ repod/common/enums.py | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/repod/common/enums.py b/repod/common/enums.py
+index 64dc74b..773fd81 100644
+--- a/repod/common/enums.py
++++ b/repod/common/enums.py
+@@ -23,6 +23,8 @@ class ArchitectureEnum(Enum):
+         The i486 CPU architecture
+     I686: "i686"
+         The i686 CPU architecture
++    LOONG64: "loong64"
++        The loong64 CPU architecture
+     PENTIUM4: "pentium4"
+         The pentium4 CPU architecture
+     RISCV32: "riscv32"
+@@ -46,6 +48,7 @@ class ArchitectureEnum(Enum):
+     ARMV7H = "armv7h"
+     I486 = "i486"
+     I686 = "i686"
++    LOONG64 = "loong64"
+     PENTIUM4 = "pentium4"
+     RISCV32 = "riscv32"
+     RISCV64 = "riscv64"
+-- 
+2.42.0
+
diff --git a/repod/PKGBUILD b/repod/PKGBUILD
index 074d6c0f7e..efea833dfb 100644
--- a/repod/PKGBUILD
+++ b/repod/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=repod
 pkgver=0.3.0
-pkgrel=1
+pkgrel=2
 pkgdesc="Tooling to maintain binary package repositories"
 arch=(any)
 url="https://repod.archlinux.page/"
@@ -34,9 +34,17 @@ makedepends=(
 checkdepends=(python-pytest python-pytest-asyncio python-pytest-lazy-fixture)
 source=(
   https://gitlab.archlinux.org/archlinux/$pkgname/-/archive/$pkgver/$pkgname-$pkgver.tar.gz
+  0001-add-loong64-support.patch
 )
-sha256sums=('1d7763d0234aeb64ba21772b9de33e4b05b575ff916c0bcf7055109f06168468')
-b2sums=('ba17d42856ba032d564fe8bdc5083f6524bc2b2d47bb353c68fd10f36a5a55795eba479f10853937772f470abe44dd9c582b49c3d905feea349c363e9249baa0')
+sha256sums=('1d7763d0234aeb64ba21772b9de33e4b05b575ff916c0bcf7055109f06168468'
+            'c440e466ae1fb26a401d08b86611230574e478584af25a26420cce58f660b826')
+b2sums=('ba17d42856ba032d564fe8bdc5083f6524bc2b2d47bb353c68fd10f36a5a55795eba479f10853937772f470abe44dd9c582b49c3d905feea349c363e9249baa0'
+        '509f2db206e22df793e5f7d8dd052799f30e3853a9d41c3d278065e049e9ac6dcaafde1d512077cf91b798338a52f754ef2538b773b5e5da3131aff535130492')
+
+prepare() {
+  cd $pkgname-$pkgver
+  patch -p1 -i $srcdir/0001-add-loong64-support.patch
+}
 
 build() {
   export PDM_BUILD_SCM_VERSION=$pkgver
diff --git a/repro-env/PKGBUILD b/repro-env/PKGBUILD
index 0bf6822624..f3be14351e 100644
--- a/repro-env/PKGBUILD
+++ b/repro-env/PKGBUILD
@@ -20,7 +20,7 @@ b2sums=('3bfc865c6555f809767218706f8f578d22a3891333a1ddf00351e950010f868f74e6d75
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rhit/PKGBUILD b/rhit/PKGBUILD
index 292c1b7f18..0c6122f2b1 100644
--- a/rhit/PKGBUILD
+++ b/rhit/PKGBUILD
@@ -14,7 +14,7 @@ sha512sums=('2bc59c7eb24e655eba71f4cc540823c00619eba0673dadd8133f84490642ad13f11
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rhythmbox/PKGBUILD b/rhythmbox/PKGBUILD
index bff69d16ba..f253129bcc 100644
--- a/rhythmbox/PKGBUILD
+++ b/rhythmbox/PKGBUILD
@@ -38,7 +38,7 @@ makedepends=(
   yelp-tools
   zeitgeist
 )
-checkdepends=(
+makedepends+=(
   check
   xorg-server-xvfb
 )
diff --git a/riff/PKGBUILD b/riff/PKGBUILD
index a7ca1d1b81..bd00fd0796 100644
--- a/riff/PKGBUILD
+++ b/riff/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('8e28c456a781f5e9d51d2e84f8823bf01ca85b69337a6d21d31fd435d1699e1b')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/ripgrep-all/PKGBUILD b/ripgrep-all/PKGBUILD
index 41cf2bda03..13c851edf7 100644
--- a/ripgrep-all/PKGBUILD
+++ b/ripgrep-all/PKGBUILD
@@ -28,7 +28,7 @@ b2sums=('fc2618369c349fda5a78d3604b17b78788be73ce5925a5b6aa234627ccaa4b70dba8ded
 
 prepare() {
   cd ripgrep-all-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
 }
 
 build() {
diff --git a/riscv64-linux-gnu-glibc/PKGBUILD b/riscv64-linux-gnu-glibc/PKGBUILD
index 87e6029aed..893753c8db 100644
--- a/riscv64-linux-gnu-glibc/PKGBUILD
+++ b/riscv64-linux-gnu-glibc/PKGBUILD
@@ -5,7 +5,7 @@
 _target=riscv64-linux-gnu
 pkgname=$_target-glibc
 pkgver=2.36
-pkgrel=1
+pkgrel=2
 pkgdesc='GNU C Library RISCV target'
 arch=(any)
 url='https://www.gnu.org/software/libc/'
diff --git a/roc-toolkit/PKGBUILD b/roc-toolkit/PKGBUILD
index 3648be9e21..3308caf0e1 100644
--- a/roc-toolkit/PKGBUILD
+++ b/roc-toolkit/PKGBUILD
@@ -37,12 +37,14 @@ sha512sums=('447532862dc1714054ebd03ce7fd101525c213a87bc7198a55c8e6068c28db318d5
 b2sums=('31775d330bdfb3c42278d1ecbbbb5e14f695ca82f22929e435b6c9bf10c2e25c3367d11cf2913a81e6469876c255193be11eb78b775fad3b17f57c5be9c4bb39')
 
 build() {
+  CFLAGS=${CFLAGS/-mno-relax/}
+  CXXFLAGS=${CXXFLAGS/-mno-relax/}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   local scons_options=(
     --prefix=/usr
     --libdir=/usr/lib
     --disable-openssl  # disable as it is not yet used
-    --enable-tests
-    --enable-examples
   )
 
   cd $pkgname-$pkgver
@@ -63,6 +65,10 @@ check() {
 }
 
 package() {
+  CFLAGS=${CFLAGS/-mno-relax/}
+  CXXFLAGS=${CXXFLAGS/-mno-relax/}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
   local scons_options=(
     --prefix=/usr
     --libdir=/usr/lib
diff --git a/rosenpass/PKGBUILD b/rosenpass/PKGBUILD
index ffaeb4256b..c9bc7662fa 100644
--- a/rosenpass/PKGBUILD
+++ b/rosenpass/PKGBUILD
@@ -19,7 +19,7 @@ _script=rp
 
 prepare() {
     cd "${pkgname}-${pkgver}"
-    cargo fetch --locked --target $CARCH-unknown-linux-gnu
+    cargo fetch --locked --target `uname -m`-unknown-linux-gnu
 }
 
 build() {
diff --git a/rpg-cli/PKGBUILD b/rpg-cli/PKGBUILD
index c1c5e0c004..408eb9721a 100644
--- a/rpg-cli/PKGBUILD
+++ b/rpg-cli/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('763d5a5c9219f2084d5ec6273911f84213e5424f127117ab0f1c611609663a8b'
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rpm-tools/PKGBUILD b/rpm-tools/PKGBUILD
index f0c7916339..98beb4e7bc 100644
--- a/rpm-tools/PKGBUILD
+++ b/rpm-tools/PKGBUILD
@@ -26,14 +26,17 @@ _base_pkgver=$_pkgver_major.$_pkgver_minor.x
 
 source=(http://ftp.rpm.org/releases/rpm-$_base_pkgver/rpm-$pkgver.tar.bz2
 	rpmextract.sh
-        rpmlib-filesystem-check.patch)
+        rpmlib-filesystem-check.patch
+        rpm-add-loongarch.patch)
 sha256sums=('ba7eee1bc2c6f83be73c0a40d159c625cbaed976b3ac044233404fb25ae1b979'
             '3e5bf450d4628366ba35469ec0530a99cd09ab2616a3d261a3f68270f481f777'
-            'bd0e6dbd458f990268c60324190c6825b234647ecdde08296d2b453dc4bce27a')
+            'bd0e6dbd458f990268c60324190c6825b234647ecdde08296d2b453dc4bce27a'
+            'a40afdd567b77480d2e315d3ca297f5238dd3f9f11cccc239a1d33b297b84e04')
 
 prepare() {
 	cd rpm-${pkgver}
 	patch -p1 < ../rpmlib-filesystem-check.patch
+    patch -p1 -i $srcdir/rpm-add-loongarch.patch
 }
 
 build() {
diff --git a/rpm-tools/rpm-add-loongarch.patch b/rpm-tools/rpm-add-loongarch.patch
new file mode 100644
index 0000000000..99bfb5a756
--- /dev/null
+++ b/rpm-tools/rpm-add-loongarch.patch
@@ -0,0 +1,63 @@
+Index: rpm-4.16.0/rpmrc.in
+===================================================================
+--- rpm-4.16.0.orig/rpmrc.in
++++ rpm-4.16.0/rpmrc.in
+@@ -67,6 +67,8 @@ optflags: mipsr6el -O2 -g
+ optflags: mips64r6 -O2 -g
+ optflags: mips64r6el -O2 -g
+ 
++optflags: loongarch64 -O2 -g
++
+ optflags: armv3l -O2 -g -march=armv3
+ optflags: armv4b -O2 -g -march=armv4
+ optflags: armv4l -O2 -g -march=armv4
+@@ -137,6 +139,9 @@ archcolor: mipsr6el 1
+ archcolor: mips64r6 2
+ archcolor: mips64r6el 2
+ 
++archcolor: loongarch32 1
++archcolor: loongarch64 2
++
+ archcolor: m68k 1
+ 
+ archcolor: m68kmint 1
+@@ -257,6 +262,9 @@ arch_canon:	mips64r6el: mips64r6el	21
+ arch_canon:	riscv: riscv64	22
+ arch_canon:	riscv64: riscv64	22
+ 
++arch_canon:	loongarch32:	loongarch32	25
++arch_canon:	loongarch64:	loongarch64	26
++
+ #############################################################
+ # Canonical OS names and numbers
+ 
+@@ -360,6 +368,9 @@ buildarchtranslate: mipsr6el: mipsr6el
+ buildarchtranslate: mips64r6: mips64r6
+ buildarchtranslate: mips64r6el: mips64r6el
+ 
++buildarchtranslate: loongarch32: loongarch32
++buildarchtranslate: loongarch64: loongarch64
++
+ buildarchtranslate: m68k: m68k
+ 
+ buildarchtranslate: atarist: m68kmint
+@@ -449,6 +460,9 @@ arch_compat: mipsr6el: noarch
+ arch_compat: mips64r6: mipsr6
+ arch_compat: mips64r6el: mipsr6el
+ 
++arch_compat: loongarch32: noarch
++arch_compat: loongarch64: loongarch32
++
+ arch_compat: hppa2.0: hppa1.2
+ arch_compat: hppa1.2: hppa1.1
+ arch_compat: hppa1.1: hppa1.0
+@@ -586,6 +600,9 @@ buildarch_compat: mipsr6el: noarch
+ buildarch_compat: mips64r6: noarch
+ buildarch_compat: mips64r6el: noarch
+ 
++buildarch_compat: loongarch32: noarch
++buildarch_compat: loongarch64: noarch
++
+ buildarch_compat: armv4b: noarch
+ buildarch_compat: armv8l: armv7l
+ buildarch_compat: armv7l: armv6l
diff --git a/rq/PKGBUILD b/rq/PKGBUILD
index be90634936..eac9d13119 100644
--- a/rq/PKGBUILD
+++ b/rq/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('4c3fc4427d02271c93a2cf4a784887982e97f9aba4946900aad1a35b142f9a47')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/ruby-base64/PKGBUILD b/ruby-base64/PKGBUILD
index d8c7fecc10..525f1b322b 100644
--- a/ruby-base64/PKGBUILD
+++ b/ruby-base64/PKGBUILD
@@ -3,7 +3,7 @@
 _gemname='base64'
 pkgname="ruby-${_gemname}"
 pkgver=0.1.1
-pkgrel=4
+pkgrel=5
 pkgdesc='Support for encoding and decoding binary data using a Base64 representation'
 arch=('any')
 url="https://github.com/ruby/${_gemname}"
diff --git a/ruby-bigdecimal/PKGBUILD b/ruby-bigdecimal/PKGBUILD
index 265e8566c8..30922ea8ef 100644
--- a/ruby-bigdecimal/PKGBUILD
+++ b/ruby-bigdecimal/PKGBUILD
@@ -3,7 +3,7 @@
 _gemname='bigdecimal'
 pkgname="ruby-${_gemname}"
 pkgver=3.1.2
-pkgrel=4
+pkgrel=5
 pkgdesc='This library provides arbitrary-precision decimal floating-point number class'
 arch=('loong64' 'x86_64')
 url="https://github.com/ruby/bigdecimal"
diff --git a/ruby-cri/PKGBUILD b/ruby-cri/PKGBUILD
index f9cb4a27d9..2151f00b75 100644
--- a/ruby-cri/PKGBUILD
+++ b/ruby-cri/PKGBUILD
@@ -48,7 +48,7 @@ package() {
 
   install -Dm 644 LICENSE -t "${pkgdir}/usr/share/licenses/${pkgname}/"
   install -Dm 644 README.md CODE_OF_CONDUCT.md NEWS.md -t "${pkgdir}/usr/share/doc/${pkgname}/"
-  mv doc/yardoc "${pkgdir}/usr/share/doc/${pkgname}/"
+#  mv doc/yardoc "${pkgdir}/usr/share/doc/${pkgname}/"
   rm -rf "${pkgdir}/${_gemdir}/gems/${_gemname}-${pkgver}/"{README.md,CODE_OF_CONDUCT.md,NEWS.md,.gitignore,.rubocop.yml,.travis.yml,test,LICENSE}
 
   rm -rf "${pkgdir}/${_gemdir}/cache"
diff --git a/ruby-ffi/PKGBUILD b/ruby-ffi/PKGBUILD
index 15bcc44fc8..b6aa722028 100644
--- a/ruby-ffi/PKGBUILD
+++ b/ruby-ffi/PKGBUILD
@@ -37,7 +37,7 @@ package() {
   rm -vrf cache
   cd "gems/$_gemname-$pkgver"
   rm -vrf Gemfile Rakefile "$_gemname.gemspec" ext rakelib
-  find lib/ffi/platform/* -prune -not -name "$CARCH-linux" -exec rm -rf {} +
+  find lib/ffi/platform/* -prune -not -name "`uname -m`-linux" -exec rm -rf {} +
 
   # move documentation
   install -vd "$pkgdir/usr/share/doc/$pkgname"
diff --git a/ruby-iconv/PKGBUILD b/ruby-iconv/PKGBUILD
index bfaef89c01..649819ac60 100755
--- a/ruby-iconv/PKGBUILD
+++ b/ruby-iconv/PKGBUILD
@@ -30,7 +30,7 @@ package() {
   # delete unnecessary files & folders
   cd "$pkgdir/$_gemdir"
   rm -vrf cache
-  rm -vrf "extensions/$CARCH-linux/$(basename $_gemdir)/$_gemname-$pkgver/"{gem_make.out,mkmf.log}
+  rm -vrf "extensions/`uname -m`-linux/$(basename $_gemdir)/$_gemname-$pkgver/"{gem_make.out,mkmf.log}
   cd "gems/$_gemname-$pkgver"
   find . -type f -name ".*" -delete
   rm -vrf Gemfile Rakefile "$_gemname.gemspec" ext test
diff --git a/ruby-rake/PKGBUILD b/ruby-rake/PKGBUILD
index 3a3370553b..541fe32fc1 100644
--- a/ruby-rake/PKGBUILD
+++ b/ruby-rake/PKGBUILD
@@ -16,6 +16,7 @@ noextract=($_gemname-$pkgver.gem)
 sha512sums=('9dbcd1ef4d93f4853b3da40b29890509bb260e13e5500f5a0502645ce762d6e50ee7dd6bd59d08d135868dab579e10344920ba246079cde7048e3510bd473ea2')
 
 package() {
+    set -x
   local _gemdir="$(ruby -e'puts Gem.default_dir')"
   gem install --ignore-dependencies --no-user-install --no-document -i "$pkgdir/$_gemdir" -n "$pkgdir/usr/bin" $_gemname-$pkgver.gem
   rm "$pkgdir/$_gemdir/cache/$_gemname-$pkgver.gem"
diff --git a/ruby/PKGBUILD b/ruby/PKGBUILD
index fbb939c637..9609bf44da 100644
--- a/ruby/PKGBUILD
+++ b/ruby/PKGBUILD
@@ -141,7 +141,7 @@ package_ruby() {
     rm --force --recursive --verbose \
       "${pkgdir}"/usr/lib/ruby/${rubyver}/${stdlib_gem} \
       "${pkgdir}"/usr/lib/ruby/${rubyver}/${stdlib_gem}.rb \
-      "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/${stdlib_gem}.so \
+      "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/${stdlib_gem}.so \
       "${pkgdir}"/usr/lib/ruby/gems/${rubyver}/specifications/default/${stdlib_gem}-*.gemspec
   done
 
@@ -151,14 +151,14 @@ package_ruby() {
     "${pkgdir}"/usr/lib/ruby/${rubyver}/net/http \
     "${pkgdir}"/usr/lib/ruby/${rubyver}/net/http.rb \
     "${pkgdir}"/usr/lib/ruby/${rubyver}/net/https.rb \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/cgi \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/date_core.so \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/digest \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/console.so \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/nonblock.so \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/wait.so \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/json \
-    "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/racc
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/cgi \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/date_core.so \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/digest \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/console.so \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/nonblock.so \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/wait.so \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/json \
+    "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/racc
 }
 
 package_ruby-docs() {
diff --git a/ruff/PKGBUILD b/ruff/PKGBUILD
index bccc76d4a4..243cdaa3e8 100644
--- a/ruff/PKGBUILD
+++ b/ruff/PKGBUILD
@@ -47,7 +47,7 @@ _package_common() {
 package_ruff() {
   cd "$_archive"
   _package_common
-  local _target="target/$CARCH-unknown-linux-gnu/release/ruff"
+  local _target="target/`uname -m`-unknown-linux-gnu/release/ruff"
   install -Dm0755 -t "$pkgdir/usr/bin/" "$_target"
   $_target --generate-shell-completion bash | install -Dm0644 /dev/stdin "$pkgdir/usr/share/bash-completion/completions/$pkgbase.bash"
   $_target --generate-shell-completion fish | install -Dm0644 /dev/stdin "$pkgdir/usr/share/fish/vendor_completions.d/$pkgbase.fish"
diff --git a/runc/PKGBUILD b/runc/PKGBUILD
index 33163d1d37..7d21252e20 100644
--- a/runc/PKGBUILD
+++ b/runc/PKGBUILD
@@ -16,15 +16,18 @@ optdepends=(
   'criu: checkpoint support'
 )
 source=("${pkgname}-${pkgver}.tar.xz::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz"
-        "${pkgname}-${pkgver}.tar.xz.sig::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz.asc")
+        "${pkgname}-${pkgver}.tar.xz.sig::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz.asc"
+        runc-la64.patch)
 validpgpkeys=("5F36C6C61B5460124A75F5A69E18AA267DDB8DB4"
 			  "C9C370B246B09F6DBCFC744C34401015D1D2D386")
 sha256sums=('47d9e34500e478d860512b3b646724ee4b9e638692122ddaa82af417668ca4d7'
-            'SKIP')
+            'SKIP'
+            '6027791a177bbc22751eecd0bca41ffc4287fbdbdd757c27dab6cd29e0d425f4')
 
 prepare() {
   mkdir -p src/github.com/opencontainers
   cp -r runc-${pkgver} src/github.com/opencontainers/runc
+  patch -d ${pkgname}-${pkgver} -p1 -i $srcdir/runc-la64.patch
 }
 
 build() {
@@ -36,7 +39,15 @@ build() {
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export CGO_LDFLAGS="${LDFLAGS}"
   export GOFLAGS="-trimpath -mod=readonly -modcacherw"
-  make runc man
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
+  make GO_BUILDMODE="" runc man || true
+# patch the depends code
+  cd $srcdir && patch -d src/github.com/opencontainers/runc/ -p1 -i $srcdir/runc-la64.patch
+  cd src/github.com/opencontainers/runc
+  make GO_BUILDMODE="" runc man
 }
 
 package() {
diff --git a/runc/runc-la64.patch b/runc/runc-la64.patch
new file mode 100644
index 0000000000..94734e2b73
--- /dev/null
+++ b/runc/runc-la64.patch
@@ -0,0 +1,11 @@
+--- runc-1.1.4.orig/libcontainer/system/syscall_linux_64.go	2022-08-24 08:45:13.000000000 +0800
++++ runc-1.1.4/libcontainer/system/syscall_linux_64.go	2022-09-18 15:02:00.535357829 +0800
+@@ -1,6 +1,6 @@
+-//go:build linux && (arm64 || amd64 || mips || mipsle || mips64 || mips64le || ppc || ppc64 || ppc64le || riscv64 || s390x)
++//go:build linux && (arm64 || amd64 || loong64 || mips || mipsle || mips64 || mips64le || ppc || ppc64 || ppc64le || riscv64 || s390x)
+ // +build linux
+-// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x
++// +build arm64 amd64 loong64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x
+ 
+ package system
+ 
diff --git a/runst/PKGBUILD b/runst/PKGBUILD
index 687258beab..08b1cadbc3 100644
--- a/runst/PKGBUILD
+++ b/runst/PKGBUILD
@@ -14,7 +14,7 @@ sha512sums=('341a33c66d6b77dc660686283cdaf816fbbcf75c1a2cb661936d345d90b91e919ae
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rust-bindgen/PKGBUILD b/rust-bindgen/PKGBUILD
index 76c914757d..5f75c718fc 100644
--- a/rust-bindgen/PKGBUILD
+++ b/rust-bindgen/PKGBUILD
@@ -22,7 +22,7 @@ prepare() {
 
 build() {
   cd $pkgname-$pkgver
-  cargo build --release --frozen
+  cargo build --release #--frozen
   local _completion="target/release/$_pkgname --generate-shell-completions"
   $_completion bash > "completions/$_pkgname"
   $_completion fish > "completions/$_pkgname.fish"
diff --git a/rust-script/PKGBUILD b/rust-script/PKGBUILD
index 86fb3b1298..ff3854b340 100644
--- a/rust-script/PKGBUILD
+++ b/rust-script/PKGBUILD
@@ -18,7 +18,7 @@ prepare() {
   cd "$pkgname-$pkgver"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rust/PKGBUILD b/rust/PKGBUILD
index f1202cadef..7839db8ff6 100644
--- a/rust/PKGBUILD
+++ b/rust/PKGBUILD
@@ -7,9 +7,9 @@
 pkgbase=rust
 pkgname=(
   rust
-  lib32-rust-libs
-  rust-musl
-  rust-wasm
+#  lib32-rust-libs
+#  rust-musl
+#  rust-wasm
   rust-src
 )
 epoch=1
@@ -35,11 +35,11 @@ depends=(
 )
 makedepends=(
   cmake
-  lib32-gcc-libs
+#  lib32-gcc-libs
   libffi
   lld
   llvm
-  musl
+#  musl
   ninja
   perl
   python
@@ -50,6 +50,7 @@ checkdepends=(
   gdb
   procps-ng
 )
+SKIPCONFIG=1
 source=(
   "https://static.rust-lang.org/dist/rustc-$pkgver-src.tar.gz"{,.asc}
   0001-bootstrap-Change-libexec-dir.patch
@@ -91,13 +92,7 @@ change-id = 116881
 link-shared = true
 
 [build]
-target = [
-  "x86_64-unknown-linux-gnu",
-  "i686-unknown-linux-gnu",
-  "x86_64-unknown-linux-musl",
-  "wasm32-unknown-unknown",
-  "wasm32-wasi",
-]
+target = [ "loongarch64-unknown-linux-gnu" ]
 cargo = "/usr/bin/cargo"
 rustc = "/usr/bin/rustc"
 rustfmt = "/usr/bin/rustfmt"
@@ -147,31 +142,31 @@ deny-warnings = false
 [dist]
 compression-formats = ["gz"]
 
-[target.x86_64-unknown-linux-gnu]
+[target.loongarch64-unknown-linux-gnu]
 cc = "/usr/bin/gcc"
 cxx = "/usr/bin/g++"
 ar = "/usr/bin/gcc-ar"
 ranlib = "/usr/bin/gcc-ranlib"
 llvm-config = "/usr/bin/llvm-config"
 
-[target.i686-unknown-linux-gnu]
-cc = "/usr/bin/gcc"
-cxx = "/usr/bin/g++"
-ar = "/usr/bin/gcc-ar"
-ranlib = "/usr/bin/gcc-ranlib"
-
-[target.x86_64-unknown-linux-musl]
-sanitizers = false
-musl-root = "/usr/lib/musl"
-
-[target.wasm32-unknown-unknown]
-sanitizers = false
-profiler = false
-
-[target.wasm32-wasi]
-sanitizers = false
-profiler = false
-wasi-root = "/usr/share/wasi-sysroot"
+#[target.i686-unknown-linux-gnu]
+#cc = "/usr/bin/gcc"
+#cxx = "/usr/bin/g++"
+#ar = "/usr/bin/gcc-ar"
+#ranlib = "/usr/bin/gcc-ranlib"
+#
+#[target.x86_64-unknown-linux-musl]
+#sanitizers = false
+#musl-root = "/usr/lib/musl"
+#
+#[target.wasm32-unknown-unknown]
+#sanitizers = false
+#profiler = false
+#
+#[target.wasm32-wasi]
+#sanitizers = false
+#profiler = false
+#wasi-root = "/usr/share/wasi-sysroot"
 END
 }
 
@@ -201,16 +196,16 @@ build() {
 
   # rustbuild always installs copies of the shared libraries to /usr/lib,
   # overwrite them with symlinks to the per-architecture versions
-  mkdir -p usr/lib32
-  ln -srft usr/lib   usr/lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so
-  ln -srft usr/lib32 usr/lib/rustlib/i686-unknown-linux-gnu/lib/*.so
+#mkdir -p usr/lib32
+  ln -srft usr/lib   usr/lib/rustlib/loongarch64-unknown-linux-gnu/lib/*.so
+#ln -srft usr/lib32 usr/lib/rustlib/i686-unknown-linux-gnu/lib/*.so
 
   mkdir -p usr/share/licenses/rust
   mv -t usr/share/licenses/rust usr/share/doc/rust/{COPYRIGHT,LICENSE*}
 
-  _pick dest-i686 usr/lib/rustlib/i686-unknown-linux-gnu usr/lib32
-  _pick dest-musl usr/lib/rustlib/x86_64-unknown-linux-musl
-  _pick dest-wasm usr/lib/rustlib/wasm32-*
+#_pick dest-i686 usr/lib/rustlib/i686-unknown-linux-gnu usr/lib32
+#_pick dest-musl usr/lib/rustlib/loongarch64-unknown-linux-musl
+#_pick dest-wasm usr/lib/rustlib/wasm32-*
   _pick dest-src  usr/lib/rustlib/src
 }
 
diff --git a/rustscan/PKGBUILD b/rustscan/PKGBUILD
index ebd0ba4b33..05d1408aa8 100644
--- a/rustscan/PKGBUILD
+++ b/rustscan/PKGBUILD
@@ -35,7 +35,7 @@ prepare() {
   patch -p1 -i ../update-lockfile.patch
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/rustypaste-cli/PKGBUILD b/rustypaste-cli/PKGBUILD
index ffd560f3c6..92db719c48 100644
--- a/rustypaste-cli/PKGBUILD
+++ b/rustypaste-cli/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('51f72cedfa315848cbfad2da98cf87febc5450a087996f5665311f71a83e6cbb')
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "${CARCH}-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build(){
diff --git a/rustypaste/PKGBUILD b/rustypaste/PKGBUILD
index 530bea518f..4850171c65 100644
--- a/rustypaste/PKGBUILD
+++ b/rustypaste/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('b02acf82fd38597d62cf1706e99d1789845ef6ab8c7b1b64174836e1edbb0f3906db662
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sad/PKGBUILD b/sad/PKGBUILD
index 7a2e78f3f9..3b8d47e134 100644
--- a/sad/PKGBUILD
+++ b/sad/PKGBUILD
@@ -40,7 +40,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sbsigntools/PKGBUILD b/sbsigntools/PKGBUILD
index 206ca8efc7..23982879c0 100644
--- a/sbsigntools/PKGBUILD
+++ b/sbsigntools/PKGBUILD
@@ -24,9 +24,11 @@ makedepends=(
 source=(
   git+https://git.kernel.org/pub/scm/linux/kernel/git/jejb/sbsigntools.git#tag=$_commit?signed
   git+https://git.ozlabs.org/ccan
+  sbsigntools-la64.patch
 )
 sha256sums=('SKIP'
-            'SKIP')
+            'SKIP'
+            '98442f63594d22fc463f5e6dc7d2d6e892e32406ee69a6e5c94e2738e5b3d4ae')
 validpgpkeys=('D5606E73C8B46271BEAD9ADF814AE47C214854D6') # James Bottomley <jejb@kernel.org>
 
 prepare() {
@@ -34,6 +36,7 @@ prepare() {
   git submodule init
   git config submodule."lib/ccan.git".url "$srcdir/ccan"
   git -c protocol.file.allow=always submodule update
+  patch -p1 -i $srcdir/sbsigntools-la64.patch
   ./autogen.sh
 }
 
diff --git a/sbsigntools/sbsigntools-la64.patch b/sbsigntools/sbsigntools-la64.patch
new file mode 100644
index 0000000000..77ca8707d2
--- /dev/null
+++ b/sbsigntools/sbsigntools-la64.patch
@@ -0,0 +1,24 @@
+Index: sbsigntools/src/coff/pe.h
+===================================================================
+--- sbsigntools.orig/src/coff/pe.h
++++ sbsigntools/src/coff/pe.h
+@@ -152,6 +152,7 @@
+ #define IMAGE_FILE_MACHINE_TRICORE           0x0520
+ #define IMAGE_FILE_MACHINE_WCEMIPSV2         0x0169
+ #define IMAGE_FILE_MACHINE_AARCH64           0xaa64
++#define IMAGE_FILE_MACHINE_LOONGARCH64       0x6264
+ #define IMAGE_FILE_MACHINE_RISCV64           0x5064
+ 
+ #define IMAGE_SUBSYSTEM_UNKNOWN			 0
+Index: sbsigntools/src/image.c
+===================================================================
+--- sbsigntools.orig/src/image.c
++++ sbsigntools/src/image.c
+@@ -239,6 +239,7 @@ static int image_pecoff_parse(struct ima
+ 	switch (magic) {
+ 	case IMAGE_FILE_MACHINE_AMD64:
+ 	case IMAGE_FILE_MACHINE_AARCH64:
++	case IMAGE_FILE_MACHINE_LOONGARCH64:
+ 	case IMAGE_FILE_MACHINE_RISCV64:
+ 		rc = image_pecoff_parse_64(image);
+ 		break;
diff --git a/scaleway-cli/PKGBUILD b/scaleway-cli/PKGBUILD
index 2aea586986..ea61515a89 100644
--- a/scaleway-cli/PKGBUILD
+++ b/scaleway-cli/PKGBUILD
@@ -39,8 +39,10 @@ build() {
   export CGO_CFLAGS="$CFLAGS"
   export CGO_CXXFLAGS="$CXXFLAGS"
   export CGO_LDFLAGS="$LDFLAGS"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@master
+  go mod tidy
   go build \
-    -buildmode=pie \
     -trimpath \
     -mod=readonly \
     -modcacherw \
diff --git a/sccache/PKGBUILD b/sccache/PKGBUILD
index 2bd441eabb..ec892b0831 100644
--- a/sccache/PKGBUILD
+++ b/sccache/PKGBUILD
@@ -34,17 +34,27 @@ pkgver() {
 }
 
 prepare() {
+#  find -name Cargo.lock -exec rm -f {} \;
+#  mkdir -p .cargo
+#  cat > .cargo/config.toml <<EOF
+#[source.crates-io]
+#registry = "https://gitee.com/yetist/crates.io-index"
+#EOF
   cargo fetch \
-    --locked \
     --manifest-path sccache/Cargo.toml
 }
 
 build() {
   export CFLAGS+=' -ffat-lto-objects'
   export LDFLAGS+=' -lzstd'
+#  find -name Cargo.lock -exec rm -f {} \;
+#  mkdir -p .cargo
+#  cat > .cargo/config.toml <<EOF
+#[source.crates-io]
+#registry = "https://gitee.com/yetist/crates.io-index"
+#EOF
   cargo build \
     --release \
-    --frozen \
     --manifest-path sccache/Cargo.toml \
     --features all,dist-server,native-zlib
 }
diff --git a/sdl2_gfx/PKGBUILD b/sdl2_gfx/PKGBUILD
index 0c9ed1b5c7..3dcc7bc330 100644
--- a/sdl2_gfx/PKGBUILD
+++ b/sdl2_gfx/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('81a100d3c8c3a7c6bd37a23f1290ff10685f8e62fbecd83b0086aae4edc721483e2
 
 build() {
   cd SDL2_gfx-$pkgver
-  ./configure --prefix=/usr
+  ./configure --prefix=/usr --disable-mmx
   make
 }
 
diff --git a/seabios/PKGBUILD b/seabios/PKGBUILD
index 4fd0e53804..cfb0b59bc3 100644
--- a/seabios/PKGBUILD
+++ b/seabios/PKGBUILD
@@ -15,7 +15,7 @@ license=(
   LGPL-2.0-only
   LGPL-3.0-only
 )
-makedepends=(acpica inetutils python)
+makedepends=(acpica inetutils python x86_64-linux-gnu-gcc)
 options=(!makeflags !strip)
 source=(
   seabios-$pkgver.tar.gz::https://github.com/coreboot/seabios/archive/rel-$pkgver.tar.gz
@@ -73,15 +73,18 @@ _build_bios() {
   local build_target=$4
 
   echo "Building target with config $config, output_name $output_name, binary_name $binary_name and build_target $build_target..."
+  CFLAGS=${CFLAGS/-mabi=lp64d -march=la464/}
+  CXXFLAGS=${CXXFLAGS/-mabi=lp64d -march=la464/}
 
   make clean distclean -C $pkgbase-rel-$pkgver
 
   cp -v $config $pkgbase-rel-$pkgver/.config
   # NOTE: refer to $pkgbase-rel-$pkgver/src/config.h for explanation of debug levels
   echo "CONFIG_DEBUG_LEVEL=$_debug_level" >> $pkgbase-rel-$pkgver/.config
-  make oldnoconfig V=1 -C $pkgbase-rel-$pkgver
+  make oldnoconfig V=1 -C $pkgbase-rel-$pkgver \
+      CROSS_PREFIX=x86_64-linux-gnu-
 
-  make -C $pkgbase-rel-$pkgver V=1 EXTRAVERSION=-$pkgrel PYTHON=python3 $build_target
+  make -C $pkgbase-rel-$pkgver V=1 EXTRAVERSION=-$pkgrel PYTHON=python3 $build_target CROSS_PREFIX=x86_64-linux-gnu-
 
   cp $pkgbase-rel-$pkgver/out/$output_name output/$binary_name
 }
diff --git a/selene/PKGBUILD b/selene/PKGBUILD
index a532d44d19..51e8c826fb 100644
--- a/selene/PKGBUILD
+++ b/selene/PKGBUILD
@@ -18,7 +18,7 @@ b2sums=('SKIP')
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sentry-cli/PKGBUILD b/sentry-cli/PKGBUILD
index c08b7503c3..c3fd02cd81 100644
--- a/sentry-cli/PKGBUILD
+++ b/sentry-cli/PKGBUILD
@@ -16,7 +16,13 @@ sha256sums=('bc60cc1a6015c337e7a3598123962dd24564426bd1a537f759fd19b00184643e')
 
 build() {
   cd $pkgname-$pkgver
-  cargo build --release --locked
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo build --release
 }
 
 package() {
diff --git a/sequoia-chameleon-gnupg/PKGBUILD b/sequoia-chameleon-gnupg/PKGBUILD
index d147d48caf..27715ee6c2 100644
--- a/sequoia-chameleon-gnupg/PKGBUILD
+++ b/sequoia-chameleon-gnupg/PKGBUILD
@@ -26,7 +26,7 @@ b2sums=('2b703f891f90798c984c0ca5e0dccd476e0ea0d0561771a2fc6f3efa75ddf3ac98946c2
 
 prepare() {
   cd $pkgname-v$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sequoia-sop/PKGBUILD b/sequoia-sop/PKGBUILD
index 5f80d6775e..1e851e9a58 100644
--- a/sequoia-sop/PKGBUILD
+++ b/sequoia-sop/PKGBUILD
@@ -21,14 +21,14 @@ validpgpkeys=(
 
 prepare() {
   cd ${pkgname}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+#cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   export RUSTUP_TOOLCHAIN=stable
   export CARGO_TARGET_DIR=target
   cd ${pkgname}
-  cargo build --release --frozen --features 'cli crypto-nettle compression'
+  cargo build --release --features 'cli crypto-nettle compression'
 }
 
 check() {
diff --git a/sequoia-sq/PKGBUILD b/sequoia-sq/PKGBUILD
index d80c30e90f..b99653b7bc 100644
--- a/sequoia-sq/PKGBUILD
+++ b/sequoia-sq/PKGBUILD
@@ -36,7 +36,7 @@ validpgpkeys=(
 prepare() {
   cd $pkgname
   export RUSTUP_TOOLCHAIN=stable
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+#  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
@@ -44,7 +44,7 @@ build() {
   export CARGO_TARGET_DIR=../target
   export RUSTUP_TOOLCHAIN=stable
   # NOTE: we select specific (default) features, as there are multiple crypto backends
-  cargo build --release --frozen --features 'default'
+  cargo build --release --features 'default'
   # create the man pages
   SQ_MAN=$CARGO_TARGET_DIR/manpages cargo run
 
diff --git a/sequoia-wot/PKGBUILD b/sequoia-wot/PKGBUILD
index 49623078f2..d3acc158c4 100644
--- a/sequoia-wot/PKGBUILD
+++ b/sequoia-wot/PKGBUILD
@@ -26,7 +26,7 @@ b2sums=('d158d39c824c0fb1626b1c5c1378f0df64e6cddebd85ca6b508f11141c179fa07a85a06
 
 prepare() {
   cd $pkgname-v$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sh4d0wup/PKGBUILD b/sh4d0wup/PKGBUILD
index 3bbc843d7a..0f4174998b 100644
--- a/sh4d0wup/PKGBUILD
+++ b/sh4d0wup/PKGBUILD
@@ -17,7 +17,7 @@ b2sums=('91a783a3aaf026f53e8f430c8999c3d8fa78b478441eb5fe32aa0574899ba245bafa134
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/shaderc/PKGBUILD b/shaderc/PKGBUILD
index 766bbddb39..213911c668 100644
--- a/shaderc/PKGBUILD
+++ b/shaderc/PKGBUILD
@@ -38,6 +38,7 @@ build() {
     -DCMAKE_INSTALL_PREFIX=/usr \
     -DCMAKE_CXX_FLAGS="$CXXFLAGS -ffat-lto-objects" \
     -DSHADERC_SKIP_TESTS=ON \
+    -DSHADERC_ENABLE_WERROR_COMPILE=OFF \
     -Dglslang_SOURCE_DIR=/usr/include/glslang
   ninja -C build
 
diff --git a/sharutils/PKGBUILD b/sharutils/PKGBUILD
index 0462a3b578..0747868344 100644
--- a/sharutils/PKGBUILD
+++ b/sharutils/PKGBUILD
@@ -23,6 +23,8 @@ prepare() {
 build() {
 	cd "${srcdir}/${pkgname}-${pkgver}"
 	CFLAGS+=' -fcommon' # https://wiki.gentoo.org/wiki/Gcc_10_porting_notes/fno_common
+	CFLAGS=${CFLAGS/-Werror=format-security/}
+	CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
 	./configure \
 		--prefix=/usr \
 		--mandir=/usr/share/man \
diff --git a/sheldon/PKGBUILD b/sheldon/PKGBUILD
index 14e8f05c23..f38f2eb1ab 100644
--- a/sheldon/PKGBUILD
+++ b/sheldon/PKGBUILD
@@ -27,16 +27,22 @@ pkgver() {
 
 prepare() {
   cd "$pkgbase"
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd "$pkgbase"
 
   # binary
-  cargo build --frozen --release --all-features
+  cargo build --release --all-features
 
   # documentation
   mdbook build docs
diff --git a/shotgun/PKGBUILD b/shotgun/PKGBUILD
index 0fe6b64301..8138eea1f7 100644
--- a/shotgun/PKGBUILD
+++ b/shotgun/PKGBUILD
@@ -19,12 +19,12 @@ b2sums=('fa4259a553528484c8a2d959fff4eac6a608cf4c22c68f4c55326310dd11530c864a13e
 
 prepare() {
   cd shotgun-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd shotgun-$pkgver
-  cargo build --release --locked --offline
+  cargo build --release
 }
 
 package() {
diff --git a/signon-plugin-oauth2/PKGBUILD b/signon-plugin-oauth2/PKGBUILD
index 0a588ca954..312eaf7914 100644
--- a/signon-plugin-oauth2/PKGBUILD
+++ b/signon-plugin-oauth2/PKGBUILD
@@ -11,11 +11,11 @@ url="https://gitlab.com/accounts-sso/signon-plugin-oauth2"
 license=(LGPL)
 depends=(signond)
 makedepends=(qt5-xmlpatterns)
-source=("$pkgname-$pkgver.tar.gz::https://gitlab.com/accounts-sso/signon-plugin-oauth2/repository/archive.tar.gz?ref=VERSION_$pkgver")
-sha256sums=('f750a1e767c1977d73895b7aa7d5dca5494e4cbc53cf57c7e6c311933701ae52')
+source=("$pkgname-$pkgver.tar.gz::https://gitlab.com/accounts-sso/signon-plugin-oauth2/-/archive/VERSION_$pkgver/signon-plugin-oauth2-VERSION_$pkgver.tar.gz")
+sha256sums=('48a5bb61bf74fca458c870c1a27eedfe058b20f016c19a72aad2f5b151516447')
 
 prepare() {
-  cd signon-plugin-oauth2-VERSION_$pkgver-*
+  cd signon-plugin-oauth2-VERSION_$pkgver*
 
   sed -i '/-Werror/d' common-project-config.pri
 
@@ -25,14 +25,14 @@ prepare() {
 }
 
 build() {
-  cd signon-plugin-oauth2-VERSION_$pkgver-*
+  cd signon-plugin-oauth2-VERSION_$pkgver*
 
   qmake-qt5 PREFIX=/usr LIBDIR=/usr/lib
   make
 }
 
 package() {
-  cd signon-plugin-oauth2-VERSION_$pkgver-*
+  cd signon-plugin-oauth2-VERSION_$pkgver*
 
   make INSTALL_ROOT="$pkgdir" install
 }
diff --git a/signon-ui/PKGBUILD b/signon-ui/PKGBUILD
index 193dbb1ae8..e5bef8b043 100644
--- a/signon-ui/PKGBUILD
+++ b/signon-ui/PKGBUILD
@@ -10,11 +10,14 @@ pkgdesc='UI component responsible for handling the user interactions which can h
 arch=(loong64 x86_64)
 url='https://launchpad.net/online-accounts-signon-ui'
 license=(GPL)
-depends=(libaccounts-qt qt5-webengine signond libnotify)
+depends=(libaccounts-qt signond libnotify)
+makedepends=(qt5-declarative)
 source=(https://gitlab.com/accounts-sso/signon-ui/-/archive/$_commit/$pkgname-$pkgver.tar.gz
-        fake-user-agent.patch)
+        fake-user-agent.patch
+        signon-ui-loong64-bad-fix.patch)
 sha256sums=('4534b05d10e206d332bd91838af3411fdbfc897b604ba5843e7af76c1657f99b'
-            '5eb7782c6472e51a8107a25324d1d30052bac5d8e9050907cd957c89568fa577')
+            '5eb7782c6472e51a8107a25324d1d30052bac5d8e9050907cd957c89568fa577'
+            'ab79b1e4a67edeb6150e2cb592ec56c71c46a064a048cdb73ddd0533f96f09e0')
 options=(debug)
 
 prepare() {
@@ -24,6 +27,7 @@ prepare() {
   sed -e 's|src \\|src|' -e '/tests/d' -i signon-ui.pro
   # Fake user ID to bypass Google blacklist
   patch -p1 -i ../fake-user-agent.patch
+  patch -p1 -i $srcdir/signon-ui-loong64-bad-fix.patch
 }
 
 build() {
diff --git a/signon-ui/signon-ui-loong64-bad-fix.patch b/signon-ui/signon-ui-loong64-bad-fix.patch
new file mode 100644
index 0000000000..21935f999b
--- /dev/null
+++ b/signon-ui/signon-ui-loong64-bad-fix.patch
@@ -0,0 +1,31 @@
+diff -ur signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/main.cpp signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/main.cpp
+--- signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/main.cpp	2023-04-05 13:01:45.048480875 +0800
++++ signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/main.cpp	2017-10-23 23:39:24.000000000 +0800
+@@ -29,7 +29,6 @@
+ #include <QDBusConnection>
+ #include <QProcessEnvironment>
+ #include <QSettings>
+-#include <QtWebEngine>
+ 
+ using namespace SignOnUi;
+ 
+@@ -39,7 +40,6 @@
+ int main(int argc, char **argv)
+ {
+     QApplication app(argc, argv);
+-    QtWebEngine::initialize();
+ 
+     app.setApplicationName("signon-ui");
+     app.setQuitOnLastWindowClosed(false);
+diff -ur signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro
+--- signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro	2023-04-05 13:00:36.856326177 +0800
++++ signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro	2017-10-23 23:39:24.000000000 +0800
+@@ -17,7 +17,6 @@
+     gui \
+     network \
+-    quick \
+-    webengine
++    quick
+ 
+ PKGCONFIG += \
+     signon-plugins-common \
diff --git a/singularity/PKGBUILD b/singularity/PKGBUILD
index 631da26eaa..fcdc096020 100644
--- a/singularity/PKGBUILD
+++ b/singularity/PKGBUILD
@@ -12,7 +12,7 @@ license=('GPL2' 'custom')
 depends=('python' 'python-pygame' 'python-numpy')
 makedepends=('python-setuptools')
 source=("https://github.com/singularity/singularity/releases/download/v${pkgver}/singularity-${pkgver}.tar.gz"{,.asc}
-        singularity-336.patch::https://github.com/singularity/singularity/pull/336
+        singularity-336.patch::https://github.com/singularity/singularity/pull/336.patch
         https://github.com/singularity/singularity-music/archive/025e2696638bcc3bf7690679c3a17c0b46823bbe.tar.gz
         "singularity.desktop")
 validpgpkeys=('B3131A451DBFDF7CA05B4197054BBB9F7D806442')
diff --git a/skim/PKGBUILD b/skim/PKGBUILD
index 7773e09076..6e6dfa9623 100644
--- a/skim/PKGBUILD
+++ b/skim/PKGBUILD
@@ -32,7 +32,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sn0int/PKGBUILD b/sn0int/PKGBUILD
index 4594b8f358..a4facd3356 100644
--- a/sn0int/PKGBUILD
+++ b/sn0int/PKGBUILD
@@ -25,7 +25,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sniffglue/PKGBUILD b/sniffglue/PKGBUILD
index a69ec9827a..66059ea9b1 100644
--- a/sniffglue/PKGBUILD
+++ b/sniffglue/PKGBUILD
@@ -28,7 +28,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd ${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sniffnet/PKGBUILD b/sniffnet/PKGBUILD
index 3b19213b15..08ad0a202e 100644
--- a/sniffnet/PKGBUILD
+++ b/sniffnet/PKGBUILD
@@ -18,7 +18,7 @@ options=('!lto')
 
 prepare() {
 	cd "$pkgname-$pkgver"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sonic/PKGBUILD b/sonic/PKGBUILD
index a81ffa4f21..8995aa2a39 100644
--- a/sonic/PKGBUILD
+++ b/sonic/PKGBUILD
@@ -38,7 +38,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/spicy-launcher/PKGBUILD b/spicy-launcher/PKGBUILD
index 13ed13667f..7ce3b7c858 100644
--- a/spicy-launcher/PKGBUILD
+++ b/spicy-launcher/PKGBUILD
@@ -15,13 +15,13 @@ options=('!lto')
 
 prepare() {
 	cd "SpicyLauncher-$pkgver"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
 	cd "SpicyLauncher-$pkgver/gui"
 	yarn install --ignore-engines
-	yarn tauri build --target "$CARCH-unknown-linux-gnu" --bundles none
+	yarn tauri build --target "`uname -m`-unknown-linux-gnu" --bundles none
 	cd ..
 	cargo build --release --frozen
 }
diff --git a/spirv-tools/PKGBUILD b/spirv-tools/PKGBUILD
index e3bdd67cbf..64a20f2b01 100644
--- a/spirv-tools/PKGBUILD
+++ b/spirv-tools/PKGBUILD
@@ -13,7 +13,7 @@ license=('custom')
 groups=(vulkan-devel)
 depends=('gcc-libs' 'sh')
 makedepends=('cmake' 'python' 'ninja' 'spirv-headers')
-source=("${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/${_tag}.tar.gz")
+source=("${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/v${_tag}.tar.gz")
 sha256sums=('aed90b51ce884ce3ac267acec75e785ee743a1e1fd294c25be33b49c5804d77c')
 
 build() {
diff --git a/spotify-launcher/PKGBUILD b/spotify-launcher/PKGBUILD
index 6e8e5d233a..4beb1a8f62 100644
--- a/spotify-launcher/PKGBUILD
+++ b/spotify-launcher/PKGBUILD
@@ -35,7 +35,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6")
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/spotifyd/PKGBUILD b/spotifyd/PKGBUILD
index 3599ccf89a..406cb39606 100644
--- a/spotifyd/PKGBUILD
+++ b/spotifyd/PKGBUILD
@@ -18,7 +18,7 @@ b2sums=('dd266f499f960b4f0e37e8c89d73286c0efc9f7c782007e6df901a9b432a88f4fb27666
 
 prepare() {
   cd spotifyd-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/spytrap-adb/PKGBUILD b/spytrap-adb/PKGBUILD
index 217e3d3504..296f58b731 100644
--- a/spytrap-adb/PKGBUILD
+++ b/spytrap-adb/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('05f70e429530864b7f25e2e4443d70da2d2bd3b435d1c5c95ce2ae82d8d3a665d66f572
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/sshx/PKGBUILD b/sshx/PKGBUILD
index 33e0a7830b..b4f1e4a781 100644
--- a/sshx/PKGBUILD
+++ b/sshx/PKGBUILD
@@ -23,7 +23,7 @@ sha256sums=('5773c2c65dea72ec4b98d22b0c0534b745c2c536ea5b1267ced5c668b3a6736f')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/stalonetray/PKGBUILD b/stalonetray/PKGBUILD
index e7c9491f33..40e76e596c 100644
--- a/stalonetray/PKGBUILD
+++ b/stalonetray/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=stalonetray
 pkgver=0.8.5
-pkgrel=1
+pkgrel=2
 pkgdesc="STAnd-aLONE sysTRAY. It has minimal build and run-time dependencies: the Xlib only."
 arch=('loong64' 'x86_64')
 url="https://github.com/kolbusa/stalonetray"
diff --git a/stardict/PKGBUILD b/stardict/PKGBUILD
index 0297f0523f..35dad8f017 100644
--- a/stardict/PKGBUILD
+++ b/stardict/PKGBUILD
@@ -39,6 +39,9 @@ prepare() {
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
 
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
+  CXXFLAGS+=" -std=gnu++11"
   ./configure \
     --prefix=/usr \
     --sysconfdir=/etc \
diff --git a/starship/PKGBUILD b/starship/PKGBUILD
index 4f246d30ad..6e895372a5 100644
--- a/starship/PKGBUILD
+++ b/starship/PKGBUILD
@@ -28,7 +28,7 @@ b2sums=('SKIP')
 prepare() {
   cargo fetch \
     --locked \
-    --target $CARCH-unknown-linux-gnu \
+    --target `uname -m`-unknown-linux-gnu \
     --manifest-path starship/Cargo.toml
 }
 
diff --git a/stochas/PKGBUILD b/stochas/PKGBUILD
index a1cb8e2287..65fa569cca 100644
--- a/stochas/PKGBUILD
+++ b/stochas/PKGBUILD
@@ -58,8 +58,8 @@ package() {
   cd "$pkgname-$pkgver"
   # project has no install target :(
   install -vDm 755 build/${pkgname}_artefacts/None/Standalone/${_name} -t "${pkgdir}/usr/bin/"
-  install -vDm 755 build/${pkgname}_artefacts/None/VST3/${_name}.vst3/Contents/${CARCH}-linux/${_name}.so \
-    -t "${pkgdir}/usr/lib/vst3/Stochas.vst3/Contents/${CARCH}-linux/"
+  install -vDm 755 build/${pkgname}_artefacts/None/VST3/${_name}.vst3/Contents/`uname -m`-linux/${_name}.so \
+    -t "${pkgdir}/usr/lib/vst3/Stochas.vst3/Contents/`uname -m`-linux/"
 
   install -vDm 644 *.desktop -t "${pkgdir}/usr/share/applications/"
   install -vDm 644 image/app_logo_512.png "${pkgdir}/usr/share/icons/hicolor/512x512/apps/org.surge-synth-team.${_name}.png"
diff --git a/suitesparse/PKGBUILD b/suitesparse/PKGBUILD
index a905ce8436..3f58613de0 100644
--- a/suitesparse/PKGBUILD
+++ b/suitesparse/PKGBUILD
@@ -22,11 +22,17 @@ sha256sums=('19cbeb9964ebe439413dd66d82ace1f904adc5f25d8a823c1b48c34bd0d29ea5')
 
 build() {
   cd SuiteSparse-$pkgver
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
+
+  CFLAGS+=" -ffat-lto-objects -mcmodel=extreme" \
+  CXXFLAGS+=" -ffat-lto-objects -mcmodel=extreme" \
   CMAKE_OPTIONS="-DBLA_VENDOR=Generic \
                  -DCMAKE_INSTALL_PREFIX=/usr \
                  -DCMAKE_BUILD_TYPE=None \
                  -DNSTATIC=ON" \
   make
+#  -DALLOW_64BIT_BLAS=ON \
 }
 
 package() {
diff --git a/supermin/PKGBUILD b/supermin/PKGBUILD
index 6b9e5f80b9..ad82a40ee7 100644
--- a/supermin/PKGBUILD
+++ b/supermin/PKGBUILD
@@ -12,14 +12,22 @@ license=('GPL')
 makedepends=('ocaml' 'ocaml-findlib')
 depends=('e2fsprogs' 'pacman' 'pacman-contrib' 'cpio')
 conflicts=('febootstrap<=3.21')
-source=("https://download.libguestfs.org/${pkgname}/5.3-development/${pkgname}-${pkgver}.tar.gz"{,.sig})
+source=("https://download.libguestfs.org/${pkgname}/5.3-development/${pkgname}-${pkgver}.tar.gz"{,.sig}
+    'supermin-disable-doc.patch')
 sha512sums=('501731e9cce8bf1f4743eeff4af620813d466da10b664df037575a546b3b8e8697ed9e881dde7d3ba737e6a78536717c1823e22cdc1c92409db78d976a6678b5'
-            'SKIP')
+            'SKIP'
+            'da5cfe1cce9695b05e593a732e43ab066fd39d3f72183cf7d19f9ec1c40be40a2574fcf64904ef0a3f044e30830f3c3183c4d53b03c850f59758e87b8be6a572')
 validpgpkeys=('F7774FB1AD074A7E8C8767EA91738F73E1B768A0') # Richard W.M. Jones <rjones@redhat.com>
 
+prepare() {
+  cd "${pkgname}-${pkgver}"
+  patch -p1 -i $srcdir/supermin-disable-doc.patch
+}
+
 build() {
   cd "${pkgname}-${pkgver}"
 
+  autoreconf -ifv
   ./configure --prefix=/usr
 
   make
diff --git a/sws/PKGBUILD b/sws/PKGBUILD
index 466919dfc3..2687b0955f 100644
--- a/sws/PKGBUILD
+++ b/sws/PKGBUILD
@@ -61,7 +61,7 @@ build() {
 
 package() {
   # plugin
-  install -vDm755 -t "$pkgdir/usr/lib/sws" "build/reaper_sws-$CARCH.so"
+  install -vDm755 -t "$pkgdir/usr/lib/sws" "build/reaper_sws-`uname -m`.so"
   install -vDm644 -t "$pkgdir/usr/lib/sws" build/sws_python64.py
 
   cd "$pkgname"
diff --git a/syslog-ng/PKGBUILD b/syslog-ng/PKGBUILD
index 960f18210c..742a081cf8 100644
--- a/syslog-ng/PKGBUILD
+++ b/syslog-ng/PKGBUILD
@@ -25,8 +25,8 @@ depends=(
   'systemd-libs'
 )
 makedepends=('libxslt' 'mongo-c-driver' 'librabbitmq-c' 'python' 'libesmtp' 'hiredis'
-             'libdbi' 'libmaxminddb' 'net-snmp' 'librdkafka' 'systemd')
-checkdepends=('python-nose' 'python-ply')
+             'libdbi' 'libmaxminddb' 'net-snmp' 'librdkafka' 'systemd') #'libcap'
+makedepends+=('python-nose' 'python-ply')
 optdepends=('logrotate: for rotating log files'
             'libdbi: for the SQL plugin'
             'librabbitmq-c: for the AMQP plugin'
diff --git a/systeroid/PKGBUILD b/systeroid/PKGBUILD
index 13b9350658..57b8f81e60 100644
--- a/systeroid/PKGBUILD
+++ b/systeroid/PKGBUILD
@@ -14,7 +14,7 @@ sha512sums=('61d8f8bdd34404f57e237f0843f67c1aaf9d9e552fd7857bc770db1ebf6296ed6f1
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/taplo-cli/PKGBUILD b/taplo-cli/PKGBUILD
index 684481881d..d172db899e 100644
--- a/taplo-cli/PKGBUILD
+++ b/taplo-cli/PKGBUILD
@@ -21,12 +21,12 @@ b2sums=('f5b8a1b1f10b42ddb98c7ea400a062715e3ab9c2023adece88052126847ab992db52258
 
 prepare() {
   cd $pkgname-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
   cd $pkgname-$pkgver
-  cargo build --release --locked --offline --all-features
+  cargo build --release --offline --all-features
 }
 
 package() {
diff --git a/taskwarrior-tui/PKGBUILD b/taskwarrior-tui/PKGBUILD
index cef1667561..c55152a549 100644
--- a/taskwarrior-tui/PKGBUILD
+++ b/taskwarrior-tui/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('86a00c0c33f825824ac432c50e57a9bac150c3ba9e3d06e6d86f65790a99a458'
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/tealdeer/PKGBUILD b/tealdeer/PKGBUILD
index bde21d488a..f8456537e7 100644
--- a/tealdeer/PKGBUILD
+++ b/tealdeer/PKGBUILD
@@ -29,7 +29,7 @@ prepare() {
   cd "$pkgname"
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/tectonic/PKGBUILD b/tectonic/PKGBUILD
index faf293a613..020e082578 100644
--- a/tectonic/PKGBUILD
+++ b/tectonic/PKGBUILD
@@ -18,7 +18,7 @@ b2sums=('94720d6beeef03aae68b1ac70b22995a05be2371176b844dcfbabbadc475e065bd8618c
 
 prepare() {
   cd ${pkgname}-${pkgname}-${pkgver}
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/tere/PKGBUILD b/tere/PKGBUILD
index 377fedc871..be4b8aad85 100644
--- a/tere/PKGBUILD
+++ b/tere/PKGBUILD
@@ -17,7 +17,7 @@ sha256sums=('d7f657371ffbd469c4d8855c2a2734c20b53ae632fe3cbf9bb7cab94bd726326')
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
 
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/texlab/PKGBUILD b/texlab/PKGBUILD
index 92a99dc7ab..587e9c69be 100644
--- a/texlab/PKGBUILD
+++ b/texlab/PKGBUILD
@@ -17,7 +17,7 @@ options=('!lto')
 
 prepare() {
   cd "${srcdir}/${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/texlive-bin/PKGBUILD b/texlive-bin/PKGBUILD
index c40ed33bb0..c4515aa9e3 100644
--- a/texlive-bin/PKGBUILD
+++ b/texlive-bin/PKGBUILD
@@ -47,10 +47,12 @@ url='https://tug.org/texlive/'
 _commit=871c7a2856d70e1a9703d1f72f0587b9995dba5f # tags/texlive-2023.0
 source=(git+https://github.com/Tex-Live/texlive-source.git#commit=$_commit
         ptex-debug-print.patch
-        context-luatex-1.17.patch)
+        context-luatex-1.17.patch
+        texlive-bin-la64.patch)
 sha256sums=('SKIP'
             'aa838f09003c62c2efb5770a8de66f99b409df049fbd65098d80fd1957d06c50'
-            'a56838d19c3bd820781693b5a2e058e1a22378b37ea199bac426d97fcc420920')
+            'a56838d19c3bd820781693b5a2e058e1a22378b37ea199bac426d97fcc420920'
+            '0c09915ccb8bad1792f11a69cc7057d775cdf54a443427d9ca77f5602432f1aa')
 
 prepare() {
   cd texlive-source
@@ -64,6 +66,7 @@ prepare() {
   patch -p1 -i ../ptex-debug-print.patch
 # update context to work with luatex 1.17
   patch -p1 -i ../context-luatex-1.17.patch
+  patch -p1 -i $srcdir/texlive-bin-la64.patch
 }
 
 build() {
diff --git a/texlive-bin/texlive-bin-la64.patch b/texlive-bin/texlive-bin-la64.patch
new file mode 100644
index 0000000000..9ffb6e2ba3
--- /dev/null
+++ b/texlive-bin/texlive-bin-la64.patch
@@ -0,0 +1,11150 @@
+diff --git a/libs/luajit/LuaJIT-src/Makefile b/libs/luajit/LuaJIT-src/Makefile
+index 0f93308..45b3b2d 100644
+--- a/libs/luajit/LuaJIT-src/Makefile
++++ b/libs/luajit/LuaJIT-src/Makefile
+@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
+ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+ 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
+ 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
+-	      dis_mips64.lua dis_mips64el.lua vmdef.lua
++	      dis_mips64.lua dis_mips64el.lua dis_loongarch64.lua vmdef.lua
+ 
+ ifeq (,$(findstring Windows,$(OS)))
+   HOST_SYS:= $(shell uname -s)
+diff --git a/libs/luajit/LuaJIT-src/doc/ext_jit.html b/libs/luajit/LuaJIT-src/doc/ext_jit.html
+index e4088bc..492f537 100644
+--- a/libs/luajit/LuaJIT-src/doc/ext_jit.html
++++ b/libs/luajit/LuaJIT-src/doc/ext_jit.html
+@@ -153,7 +153,7 @@ Contains the target OS name:
+ <h3 id="jit_arch"><tt>jit.arch</tt></h3>
+ <p>
+ Contains the target architecture name:
+-"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
++"x86", "x64", "arm", "arm64", "ppc", "loongarch64", "mips" or "mips64".
+ </p>
+ 
+ <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
+diff --git a/libs/luajit/LuaJIT-src/doc/install.html b/libs/luajit/LuaJIT-src/doc/install.html
+index c491c60..fc8559d 100644
+--- a/libs/luajit/LuaJIT-src/doc/install.html
++++ b/libs/luajit/LuaJIT-src/doc/install.html
+@@ -154,6 +154,13 @@ operating systems, CPUs and compilers:
+ <td class="compatos compatno">&nbsp;</td>
+ <td class="compatos compatno">&nbsp;</td>
+ </tr>
++<tr class="odd">
++<td class="compatcpu"><a href="#cross2">LoongArch64</a></td>
++<td class="compatos">GCC 4.3+</td>
++<td class="compatos">GCC 4.3+</td>
++<td class="compatos compatno">&nbsp;</td>
++<td class="compatos compatno">&nbsp;</td>
++</tr>
+ </table>
+ 
+ <h2>Configuring LuaJIT</h2>
+@@ -426,6 +433,9 @@ make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+ make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+ # MIPS64 little-endian
+ make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
++
++# LOONGARCH64
++make CROSS=loongarch64-linux-
+ </pre>
+ <p>
+ You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>.
+diff --git a/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h
+new file mode 100644
+index 0000000..e6c9e3e
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h
+@@ -0,0 +1,451 @@
++/*
++** DynASM LoongArch encoding engine.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
++** Copyright (C) 2021 Loongson Technology. All rights reserved.
++** Released under the MIT license. See dynasm.lua for full copyright notice.
++*/
++
++#include <stddef.h>
++#include <stdarg.h>
++#include <string.h>
++#include <stdlib.h>
++
++#define DASM_ARCH		"loongarch64"
++
++#ifndef DASM_EXTERN
++#define DASM_EXTERN(a,b,c,d)	0
++#endif
++
++/* Action definitions. */
++enum {
++  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
++  /* The following actions need a buffer position. */
++  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
++  /* The following actions also have an argument. */
++  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMM2,
++  DASM__MAX
++};
++
++/* Maximum number of section buffer positions for a single dasm_put() call. */
++#define DASM_MAXSECPOS		25
++
++/* DynASM encoder status codes. Action list offset or number are or'ed in. */
++#define DASM_S_OK		0x00000000
++#define DASM_S_NOMEM		0x01000000
++#define DASM_S_PHASE		0x02000000
++#define DASM_S_MATCH_SEC	0x03000000
++#define DASM_S_RANGE_I		0x11000000
++#define DASM_S_RANGE_SEC	0x12000000
++#define DASM_S_RANGE_LG		0x13000000
++#define DASM_S_RANGE_PC		0x14000000
++#define DASM_S_RANGE_REL	0x15000000
++#define DASM_S_UNDEF_LG		0x21000000
++#define DASM_S_UNDEF_PC		0x22000000
++
++/* Macros to convert positions (8 bit section + 24 bit index). */
++#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
++#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
++#define DASM_SEC2POS(sec)	((sec)<<24)
++#define DASM_POS2SEC(pos)	((pos)>>24)
++#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
++
++/* Action list type. */
++typedef const unsigned int *dasm_ActList;
++
++/* Per-section structure. */
++typedef struct dasm_Section {
++  int *rbuf;		/* Biased buffer pointer (negative section bias). */
++  int *buf;		/* True buffer pointer. */
++  size_t bsize;		/* Buffer size in bytes. */
++  int pos;		/* Biased buffer position. */
++  int epos;		/* End of biased buffer position - max single put. */
++  int ofs;		/* Byte offset into section. */
++} dasm_Section;
++
++/* Core structure holding the DynASM encoding state. */
++struct dasm_State {
++  size_t psize;			/* Allocated size of this structure. */
++  dasm_ActList actionlist;	/* Current actionlist pointer. */
++  int *lglabels;		/* Local/global chain/pos ptrs. */
++  size_t lgsize;
++  int *pclabels;		/* PC label chains/pos ptrs. */
++  size_t pcsize;
++  void **globals;		/* Array of globals (bias -10). */
++  dasm_Section *section;	/* Pointer to active section. */
++  size_t codesize;		/* Total size of all code sections. */
++  int maxsection;		/* 0 <= sectionidx < maxsection. */
++  int status;			/* Status code. */
++  dasm_Section sections[1];	/* All sections. Alloc-extended. */
++};
++
++/* The size of the core structure depends on the max. number of sections. */
++#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
++
++
++/* Initialize DynASM state. */
++void dasm_init(Dst_DECL, int maxsection)
++{
++  dasm_State *D;
++  size_t psz = 0;
++  int i;
++  Dst_REF = NULL;
++  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
++  D = Dst_REF;
++  D->psize = psz;
++  D->lglabels = NULL;
++  D->lgsize = 0;
++  D->pclabels = NULL;
++  D->pcsize = 0;
++  D->globals = NULL;
++  D->maxsection = maxsection;
++  for (i = 0; i < maxsection; i++) {
++    D->sections[i].buf = NULL;  /* Need this for pass3. */
++    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
++    D->sections[i].bsize = 0;
++    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
++  }
++}
++
++/* Free DynASM state. */
++void dasm_free(Dst_DECL)
++{
++  dasm_State *D = Dst_REF;
++  int i;
++  for (i = 0; i < D->maxsection; i++)
++    if (D->sections[i].buf)
++      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
++  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
++  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
++  DASM_M_FREE(Dst, D, D->psize);
++}
++
++/* Setup global label array. Must be called before dasm_setup(). */
++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
++{
++  dasm_State *D = Dst_REF;
++  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
++  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
++}
++
++/* Grow PC label array. Can be called after dasm_setup(), too. */
++void dasm_growpc(Dst_DECL, unsigned int maxpc)
++{
++  dasm_State *D = Dst_REF;
++  size_t osz = D->pcsize;
++  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
++  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
++}
++
++/* Setup encoder. */
++void dasm_setup(Dst_DECL, const void *actionlist)
++{
++  dasm_State *D = Dst_REF;
++  int i;
++  D->actionlist = (dasm_ActList)actionlist;
++  D->status = DASM_S_OK;
++  D->section = &D->sections[0];
++  memset((void *)D->lglabels, 0, D->lgsize);
++  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
++  for (i = 0; i < D->maxsection; i++) {
++    D->sections[i].pos = DASM_SEC2POS(i);
++    D->sections[i].ofs = 0;
++  }
++}
++
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++  do { if (!(x)) { \
++    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
++#define CKPL(kind, st) \
++  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
++    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
++#else
++#define CK(x, st)	((void)0)
++#define CKPL(kind, st)	((void)0)
++#endif
++
++static int dasm_imm2(unsigned int n)
++{
++  if ((n >> 21) == 0)
++    return n;
++    //return ((n>>16)&0x1f) | ((n&0xffff)>>10);
++  else if ((n >> 26) == 0)
++    return n;
++    //return ((n>>16)&0x3ff) | ((n&0xffff)>>10);
++  else
++    return -1;
++}
++
++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
++void dasm_put(Dst_DECL, int start, ...)
++{
++  va_list ap;
++  dasm_State *D = Dst_REF;
++  dasm_ActList p = D->actionlist + start;
++  dasm_Section *sec = D->section;
++  int pos = sec->pos, ofs = sec->ofs;
++  int *b;
++
++  if (pos >= sec->epos) {
++    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
++      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
++    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
++    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
++  }
++
++  b = sec->rbuf;
++  b[pos++] = start;
++
++  va_start(ap, start);
++  while (1) {
++    unsigned int ins = *p++;
++    unsigned int action = (ins >> 16) - 0xff00;
++    if (action >= DASM__MAX) {
++      ofs += 4;
++    } else {
++      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
++      switch (action) {
++      case DASM_STOP: goto stop;
++      case DASM_SECTION:
++	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
++	D->section = &D->sections[n]; goto stop;
++      case DASM_ESC: p++; ofs += 4; break;
++      case DASM_REL_EXT: break;
++      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
++      case DASM_REL_LG:
++	n = (ins & 2047) - 10; pl = D->lglabels + n;
++	/* Bkwd rel or global. */
++	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
++	pl += 10; n = *pl;
++	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
++	goto linkrel;
++      case DASM_REL_PC:
++	pl = D->pclabels + n; CKPL(pc, PC);
++      putrel:
++	n = *pl;
++	if (n < 0) {  /* Label exists. Get label pos and store it. */
++	  b[pos] = -n;
++	} else {
++      linkrel:
++	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
++	  *pl = pos;
++	}
++	pos++;
++	break;
++      case DASM_LABEL_LG:
++	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
++      case DASM_LABEL_PC:
++	pl = D->pclabels + n; CKPL(pc, PC);
++      putlabel:
++	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
++	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
++	}
++	*pl = -pos;  /* Label exists now. */
++	b[pos++] = ofs;  /* Store pass1 offset estimate. */
++	break;
++      case DASM_IMM:
++#ifdef DASM_CHECKS
++	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
++#endif
++	n >>= ((ins>>10)&31);
++#ifdef DASM_CHECKS
++	if (ins & 0x8000)
++	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
++	else
++	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
++#endif
++	b[pos++] = n;
++	break;
++      case DASM_IMM2:
++        CK(dasm_imm2((unsigned int)n) != -1, RANGE_I);
++        b[pos++] = n;
++        break;
++      }
++    }
++  }
++stop:
++  va_end(ap);
++  sec->pos = pos;
++  sec->ofs = ofs;
++}
++#undef CK
++
++/* Pass 2: Link sections, shrink aligns, fix label offsets. */
++int dasm_link(Dst_DECL, size_t *szp)
++{
++  dasm_State *D = Dst_REF;
++  int secnum;
++  int ofs = 0;
++
++#ifdef DASM_CHECKS
++  *szp = 0;
++  if (D->status != DASM_S_OK) return D->status;
++  {
++    int pc;
++    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
++      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
++  }
++#endif
++
++  { /* Handle globals not defined in this translation unit. */
++    int idx;
++    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
++      int n = D->lglabels[idx];
++      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
++      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
++    }
++  }
++
++  /* Combine all code sections. No support for data sections (yet). */
++  for (secnum = 0; secnum < D->maxsection; secnum++) {
++    dasm_Section *sec = D->sections + secnum;
++    int *b = sec->rbuf;
++    int pos = DASM_SEC2POS(secnum);
++    int lastpos = sec->pos;
++
++    while (pos != lastpos) {
++      dasm_ActList p = D->actionlist + b[pos++];
++      while (1) {
++	unsigned int ins = *p++;
++	unsigned int action = (ins >> 16) - 0xff00;
++	switch (action) {
++	case DASM_STOP: case DASM_SECTION: goto stop;
++	case DASM_ESC: p++; break;
++	case DASM_REL_EXT: break;
++	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
++	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
++	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
++	case DASM_IMM: case DASM_IMM2: pos++; break;
++	}
++      }
++      stop: (void)0;
++    }
++    ofs += sec->ofs;  /* Next section starts right after current section. */
++  }
++
++  D->codesize = ofs;  /* Total size of all code sections */
++  *szp = ofs;
++  return DASM_S_OK;
++}
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
++#else
++#define CK(x, st)	((void)0)
++#endif
++
++/* Pass 3: Encode sections. */
++int dasm_encode(Dst_DECL, void *buffer)
++{
++  dasm_State *D = Dst_REF;
++  char *base = (char *)buffer;
++  unsigned int *cp = (unsigned int *)buffer;
++  int secnum;
++
++  /* Encode all code sections. No support for data sections (yet). */
++  for (secnum = 0; secnum < D->maxsection; secnum++) {
++    dasm_Section *sec = D->sections + secnum;
++    int *b = sec->buf;
++    int *endb = sec->rbuf + sec->pos;
++
++    while (b != endb) {
++      dasm_ActList p = D->actionlist + *b++;
++      while (1) {
++	unsigned int ins = *p++;
++	unsigned int action = (ins >> 16) - 0xff00;
++	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
++	switch (action) {
++	case DASM_STOP: case DASM_SECTION: goto stop;
++	case DASM_ESC: *cp++ = *p++; break;
++	case DASM_REL_EXT:
++	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
++	  goto patchrel;
++	case DASM_ALIGN:
++	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
++	  break;
++	case DASM_REL_LG:
++	  if (n < 0) {
++	    n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4);
++	    goto patchrel;
++	  }
++	  /* fallthrough */
++	case DASM_REL_PC:
++	  CK(n >= 0, UNDEF_PC);
++	  n = *DASM_POS2PTR(D, n);
++	  if (ins & 2048)
++	    n = (n + (int)(size_t)base) & 0x0fffffff;
++	  else
++	    n = n - (int)((char *)cp - base) + 4;
++	patchrel: {
++          unsigned int e = 16 + ((ins >> 12) & 15);
++          CK((n & 3) == 0 &&
++             ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
++          if (!(ins & 0xf800)) { /* BEQ, BNE, BLT, BGE, BLTU, BGEU */
++            cp[-1] |= (((n >> 2) & 0xffff) << 10);
++          } else if ((ins & 0x5000)) { /* BEQZ, BNEZ, BCEQZ, BCNEZ */
++            cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x1f0000) >> 16);
++          } else if ((ins & 0xa000)) { /* B, BL */
++            cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x3ff0000) >> 16);
++          }
++        }
++	  break;
++	case DASM_LABEL_LG:
++	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
++	  break;
++	case DASM_LABEL_PC: break;
++	case DASM_IMM2: {
++	  //cp[-1] |= ((n>>3) & 4); n &= 0x1f;
++          unsigned int imm2n = dasm_imm2((unsigned int)n);
++          cp[-1] |= ((imm2n&0x3ff0000) | ((imm2n&0xffff))>>10);
++          }
++          break;
++	  /* fallthrough */
++	case DASM_IMM:
++	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
++	  break;
++	default: *cp++ = ins; break;
++	}
++      }
++      stop: (void)0;
++    }
++  }
++
++  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
++    return DASM_S_PHASE;
++  return DASM_S_OK;
++}
++#undef CK
++
++/* Get PC label offset. */
++int dasm_getpclabel(Dst_DECL, unsigned int pc)
++{
++  dasm_State *D = Dst_REF;
++  if (pc*sizeof(int) < D->pcsize) {
++    int pos = D->pclabels[pc];
++    if (pos < 0) return *DASM_POS2PTR(D, -pos);
++    if (pos > 0) return -1;  /* Undefined. */
++  }
++  return -2;  /* Unused or out of range. */
++}
++
++#ifdef DASM_CHECKS
++/* Optional sanity checker to call between isolated encoding steps. */
++int dasm_checkstep(Dst_DECL, int secmatch)
++{
++  dasm_State *D = Dst_REF;
++  if (D->status == DASM_S_OK) {
++    int i;
++    for (i = 1; i <= 9; i++) {
++      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
++      D->lglabels[i] = 0;
++    }
++  }
++  if (D->status == DASM_S_OK && secmatch >= 0 &&
++      D->section != &D->sections[secmatch])
++    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
++  return D->status;
++}
++#endif
++
+diff --git a/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua
+new file mode 100644
+index 0000000..6542763
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua
+@@ -0,0 +1,977 @@
++------------------------------------------------------------------------------
++-- DynASM LoongArch module.
++--
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
++-- Copyright (C) 2021 Loongson Technology. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++
++-- Module information:
++local _info = {
++  arch =	"loongarch64",
++  description =	"DynASM LoongArch64 module",
++  version =	"1.4.0",
++  vernum =	 10400,
++  release =	"2021-05-20",
++  author =	"Mike Pall",
++  license =	"MIT",
++}
++
++-- Exported glue functions for the arch-specific module.
++local _M = { _info = _info }
++
++-- Cache library functions.
++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
++local assert, setmetatable = assert, setmetatable
++local _s = string
++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local match, gmatch = _s.match, _s.gmatch
++local concat, sort = table.concat, table.sort
++local bit = bit or require("bit")
++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
++local tohex = bit.tohex
++
++-- Inherited tables and callbacks.
++local g_opt, g_arch
++local wline, werror, wfatal, wwarn
++
++-- Action name list.
++-- CHECK: Keep this in sync with the C code!
++local action_names = {
++  "STOP", "SECTION", "ESC", "REL_EXT",
++  "ALIGN", "REL_LG", "LABEL_LG",
++  "REL_PC", "LABEL_PC", "IMM", "IMM2",
++}
++
++-- Maximum number of section buffer positions for dasm_put().
++-- CHECK: Keep this in sync with the C code!
++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
++
++-- Action name -> action number.
++local map_action = {}
++for n,name in ipairs(action_names) do
++  map_action[name] = n-1
++end
++
++-- Action list buffer.
++local actlist = {}
++
++-- Argument list for next dasm_put(). Start with offset 0 into action list.
++local actargs = { 0 }
++
++-- Current number of section buffer positions for dasm_put().
++local secpos = 1
++
++------------------------------------------------------------------------------
++
++-- Dump action names and numbers.
++local function dumpactions(out)
++  out:write("DynASM encoding engine action codes:\n")
++  for n,name in ipairs(action_names) do
++    local num = map_action[name]
++    out:write(format("  %-10s %02X  %d\n", name, num, num))
++  end
++  out:write("\n")
++end
++
++-- Write action list buffer as a huge static C array.
++local function writeactions(out, name)
++  local nn = #actlist
++  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
++  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
++  for i = 1,nn-1 do
++    assert(out:write("0x", tohex(actlist[i]), ",\n"))
++  end
++  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
++end
++
++------------------------------------------------------------------------------
++
++-- Add word to action list.
++local function wputxw(n)
++  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++  actlist[#actlist+1] = n
++end
++
++-- Add action to list with optional arg. Advance buffer pos, too.
++local function waction(action, val, a, num)
++  local w = assert(map_action[action], "bad action name `"..action.."'")
++  wputxw(0xff000000 + w * 0x10000 + (val or 0))
++  if a then actargs[#actargs+1] = a end
++  if a or num then secpos = secpos + (num or 1) end
++end
++
++-- Flush action list (intervening C code or buffer pos overflow).
++local function wflush(term)
++  if #actlist == actargs[1] then return end -- Nothing to flush.
++  if not term then waction("STOP") end -- Terminate action list.
++  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
++  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
++  secpos = 1 -- The actionlist offset occupies a buffer position, too.
++end
++
++-- Put escaped word.
++local function wputw(n)
++  if n >= 0xff000000 then waction("ESC") end
++  wputxw(n)
++end
++
++-- Reserve position for word.
++local function wpos()
++  local pos = #actlist+1
++  actlist[pos] = ""
++  return pos
++end
++
++-- Store word to reserved position.
++local function wputpos(pos, n)
++  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++  actlist[pos] = n
++end
++
++------------------------------------------------------------------------------
++
++-- Global label name -> global label number. With auto assignment on 1st use.
++local next_global = 20
++local map_global = setmetatable({}, { __index = function(t, name)
++  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
++  local n = next_global
++  if n > 2047 then werror("too many global labels") end
++  next_global = n + 1
++  t[name] = n
++  return n
++end})
++
++-- Dump global labels.
++local function dumpglobals(out, lvl)
++  local t = {}
++  for name, n in pairs(map_global) do t[n] = name end
++  out:write("Global labels:\n")
++  for i=20,next_global-1 do
++    out:write(format("  %s\n", t[i]))
++  end
++  out:write("\n")
++end
++
++-- Write global label enum.
++local function writeglobals(out, prefix)
++  local t = {}
++  for name, n in pairs(map_global) do t[n] = name end
++  out:write("enum {\n")
++  for i=20,next_global-1 do
++    out:write("  ", prefix, t[i], ",\n")
++  end
++  out:write("  ", prefix, "_MAX\n};\n")
++end
++
++-- Write global label names.
++local function writeglobalnames(out, name)
++  local t = {}
++  for name, n in pairs(map_global) do t[n] = name end
++  out:write("static const char *const ", name, "[] = {\n")
++  for i=20,next_global-1 do
++    out:write("  \"", t[i], "\",\n")
++  end
++  out:write("  (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Extern label name -> extern label number. With auto assignment on 1st use.
++local next_extern = 0
++local map_extern_ = {}
++local map_extern = setmetatable({}, { __index = function(t, name)
++  -- No restrictions on the name for now.
++  local n = next_extern
++  if n > 2047 then werror("too many extern labels") end
++  next_extern = n + 1
++  t[name] = n
++  map_extern_[n] = name
++  return n
++end})
++
++-- Dump extern labels.
++local function dumpexterns(out, lvl)
++  out:write("Extern labels:\n")
++  for i=0,next_extern-1 do
++    out:write(format("  %s\n", map_extern_[i]))
++  end
++  out:write("\n")
++end
++
++-- Write extern label names.
++local function writeexternnames(out, name)
++  out:write("static const char *const ", name, "[] = {\n")
++  for i=0,next_extern-1 do
++    out:write("  \"", map_extern_[i], "\",\n")
++  end
++  out:write("  (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Arch-specific maps.
++local map_archdef = { sp="r3", ra="r1" } -- Ext. register name -> int. name.
++
++local map_type = {}		-- Type name -> { ctype, reg }
++local ctypenum = 0		-- Type number (for Dt... macros).
++
++-- Reverse defines for registers.
++function _M.revdef(s)
++  if s == "r3" then return "sp"
++  elseif s == "r1" then return "ra" end
++  return s
++end
++
++------------------------------------------------------------------------------
++
++-- Template strings for LoongArch instructions.
++local map_op = {
++  ["clo.w_2"] =		"00001000DJ",
++  ["clz.w_2"] =		"00001400DJ",
++  ["cto.w_2"] =		"00001800DJ",
++  ["ctz.w_2"] =		"00001c00DJ",
++  ["clo.d_2"] =		"00002000DJ",
++  ["clz.d_2"] =		"00002400DJ",
++  ["cto.d_2"] =		"00002800DJ",
++  ["ctz.d_2"] =		"00002c00DJ",
++  ["revb.2h_2"] =	"00003000DJ",
++  ["revb.4h_2"] =	"00003400DJ",
++  ["revb.2w_2"] =	"00003800DJ",
++  ["revb.d_2"] = 	"00003c00DJ",
++  ["revh.2w_2"] =	"00004000DJ",
++  ["revh.d_2"] =	"00004400DJ",
++  ["bitrev.4b_2"] =	"00004800DJ",
++  ["bitrev.8b_2"] =	"00004c00DJ",
++  ["bitrev.w_2"] =	"00005000DJ",
++  ["bitrev.d_2"] =	"00005400DJ",
++  ["ext.w.h_2"] =	"00005800DJ",
++  ["ext.w.b_2"] =	"00005c00DJ",
++
++  ["add.w_3"] =		"00100000DJK",
++  ["add.d_3"] =		"00108000DJK",
++  ["sub.w_3"] =		"00110000DJK",
++  ["sub.d_3"] =		"00118000DJK",
++  slt_3 = 		"00120000DJK",
++  sltu_3 =		"00128000DJK",
++  maskeqz_3 = 		"00130000DJK",
++  masknez_3 =		"00138000DJK",
++
++  nor_3 =		"00140000DJK",
++  and_3 = 		"00148000DJK",
++  or_3 = 		"00150000DJK",
++  xor_3 = 		"00158000DJK",
++  orn_3 =		"00160000DJK",
++  andn_3 = 		"00168000DJK",
++  ["sll.w_3"] =		"00170000DJK",
++  ["srl.w_3"] =		"00178000DJK",
++  ["sra.w_3"] = 	"00180000DJK",
++  ["sll.d_3"] =		"00188000DJK",
++  ["srl.d_3"] =		"00190000DJK",
++  ["sra.d_3"] =		"00198000DJK",
++  ["rotr.w_3"] =	"001b0000DJK",
++  ["rotr.d_3"] =	"001b8000DJK",
++  ["mul.w_3"] =		"001c0000DJK",
++  ["mulh.w_3"] = 	"001c8000DJK",
++  ["mulh.wu_3"] =	"001d0000DJK",
++  ["mul.d_3"] =		"001d8000DJK",
++  ["mulh.d_3"] =	"001e0000DJK",
++  ["mulh.du_3"] =	"001e8000DJK",
++  ["mulw.d.w_3"] =	"001f0000DJK",
++  ["mulw.d.wu_3"] =	"001f8000DJK",
++
++  ["fabs.h_2"] =	"01140000FG",
++  ["fabs.s_2"] = 	"01140400FG",
++  ["fabs.d_2"] =	"01140800FG",
++  ["fneg.h_2"] =	"01141000FG",
++  ["fneg.s_2"] =	"01141400FG",
++  ["fneg.d_2"] =	"01141800FG",
++  ["flogb.h_2"] =	"01142000FG",
++  ["flogb.s_2"] =	"01142400FG",
++  ["flogb.d_2"] =	"01142800FG",
++  ["fclass.h_2"] =	"01143000FG",
++  ["fclass.s_2"] =	"01143400FG",
++  ["fclass.d_2"] =	"01143800FG",
++  ["fsqrt.h_2"] =	"01144000FG",
++  ["fsqrt.s_2"] =	"01144400FG",
++  ["fsqrt.d_2"] =	"01144800FG",
++  ["frecip.h_2"] = 	"01145000FG",
++  ["frecip.s_2"] =	"01145400FG",
++  ["frecip.d_2"] =	"01145800FG",
++  ["frsqrt.h_2"] =	"01146000FG",
++  ["frsqrt.s_2"] =	"01146400FG",
++  ["frsqrt.d_2"] =	"01146800FG",
++  ["frecipe.h_2"] =	"01147000FG",
++  ["frecipe.s_2"] =	"01147400FG",
++  ["frecipe.d_2"] =	"01147800FG",
++  ["frsqrte.h_2"] =	"01148000FG",
++  ["frsqrte.s_2"] =	"01148400FG",
++  ["frsqrte.d_2"] =	"01148800FG",
++
++  ["fmov.h_2"] =	"01149000FG",
++  ["fmov.s_2"] =	"01149400FG",
++  ["fmov.d_2"] =	"01149800FG",
++  ["movgr2fr.h_2"] =	"0114a000FJ",
++  ["movgr2fr.w_2"] =	"0114a400FJ",
++  ["movgr2fr.d_2"] =	"0114a800FJ",
++  ["movgr2frh.w_2"] =	"0114ac00FJ",
++  ["movfr2gr.h_2"] =	"0114b000DG",
++  ["movfr2gr.s_2"] =	"0114b400DG",
++  ["movfr2gr.d_2"] =	"0114b800DG",
++  ["movfrh2gr.s_2"] =	"0114bc00DG",
++  movgr2fcsr_2 =	"0114c000SG",
++  movfcsr2gr_2 =	"0114c800FR",
++  movfr2cf_2 =		"0114d000EG",
++  movcf2fr_2 =		"0114d400FA",
++  movgr2cf_2 =		"0114d800EG",
++  movcf2gr_2 =		"0114dc00DA",
++  ["fcvt.ld.d_2"] =	"0114e000FG",
++  ["fcvt.ud.d_2"] =	"0114e400FG",
++  ["fcvt.s.d_2"] = 	"01191800FG",
++  ["fcvt.d.s_2"] =	"01192400FG",
++  ["ftintrm.w.s_2"] =	"011a0400FG",
++  ["ftintrm.w.d_2"] =	"011a0800FG",
++  ["ftintrm.l.s_2"] =	"011a2400FG",
++  ["ftintrm.l.d_2"] =	"011a2800FG",
++  ["ftintrp.w.s_2"] =	"011a4400FG",
++  ["ftintrp.w.d_2"] =	"011a4800FG",
++  ["ftintrp.l.s_2"] =	"011a6400FG",
++  ["ftintrp.l.d_2"] =	"011a6800FG",
++  ["ftintrz.w.s_2"] =	"011a8400FG",
++  ["ftintrz.w.d_2"] =	"011a8800FG",
++  ["ftintrz.l.s_2"] =	"011aa400FG",
++  ["ftintrz.l.d_2"] =	"011aa800FG",
++  ["ftintrne.w.s_2"] =	"011ac400FG",
++  ["ftintrne.w.d_2"] =	"011ac800FG",
++  ["ftintrne.l.s_2"] =	"011ae400FG",
++  ["ftintrne.l.d_2"] =	"011ae800FG",
++  ["ftint.w.s_2"] =	"011b0400FG",
++  ["ftint.w.d_2"] =	"011b0800FG",
++  ["ftint.l.s_2"] =	"011b2400FG",
++  ["ftint.l.d_2"] =	"011b2800FG",
++  ["ffint.s.w_2"] =	"011d1000FG",
++  ["ffint.s.l_2"] =	"011d1800FG",
++  ["ffint.d.w_2"] =	"011d2000FG",
++  ["ffint.d.l_2"] =	"011d2800FG",
++  ["frint.s_2"] =	"011e4400FG",
++  ["frint.d_2"] =	"011e4800FG",
++
++  ["fadd.h_3"] =	"01000000FGH",
++  ["fadd.s_3"] =	"01008000FGH",
++  ["fadd.d_3"] =	"01010000FGH",
++  ["fsub.h_3"] =	"01020000FGH",
++  ["fsub.s_3"] =	"01028000FGH",
++  ["fsub.d_3"] =	"01030000FGH",
++  ["fmul.h_3"] =	"01040000FGH",
++  ["fmul.s_3"] =	"01048000FGH",
++  ["fmul.d_3"] =	"01050000FGH",
++  ["fdiv.h_3"] =	"01060000FGH",
++  ["fdiv.s_3"] =	"01068000FGH",
++  ["fdiv.d_3"] =	"01070000FGH",
++  ["fmax.h_3"] =	"01080000FGH",
++  ["fmax.s_3"] =	"01088000FGH",
++  ["fmax.d_3"] =	"01090000FGH",
++  ["fmin.h_3"] = 	"010a0000FGH",
++  ["fmin.s_3"] =	"010a8000FGH",
++  ["fmin.d_3"] =	"010b0000FGH",
++  ["fmaxa.h_3"] =	"010c0000FGH",
++  ["fmaxa.s_3"] =	"010c8000FGH",
++  ["fmaxa.d_3"] =	"010d0000FGH",
++  ["fmina.h_3"] =	"010e0000FGH",
++  ["fmina.s_3"] =	"010e8000FGH",
++  ["fmina.d_3"] =	"010f0000FGH",
++  ["fscaleb.h_3"] =	"01100000FGH",
++  ["fscaleb.s_3"] =	"01108000FGH",
++  ["fscaleb.d_3"] =	"01110000FGH",
++  ["fcopysign.h_3"] =	"01120000FGH",
++  ["fcopysign.s_3"] =	"01128000FGH",
++  ["fcopysign.d_3"] =	"01130000FGH",
++
++  ["alsl.w_4"] =	"00040000DJKQ",
++  ["alsl.wu_4"] =	"00060000DJKQ",
++  ["alsl.d_4"] =	"002c0000DJKQ",
++  ["bytepick.w_4"] =	"00080000DJKQ",
++  ["bytepick.d_4"] =	"000c0000DJKB",
++
++  ["div.w_3"] = 	"00200000DJK",
++  ["mod.w_3"] =		"00208000DJK",
++  ["div.wu_3"] =	"00210000DJK",
++  ["mod.wu_3"] =	"00218000DJK",
++  ["div.d_3"] =		"00220000DJK",
++  ["mod.d_3"] =		"00228000DJK",
++  ["div.du_3"] =	"00230000DJK",
++  ["mod.du_3"] =	"00238000DJK",
++  ["crc.w.b.w_3"] =	"00240000DJK",
++  ["crc.w.h.w_3"] =	"00248000DJK",
++  ["crc.w.w.w_3"] =	"00250000DJK",
++  ["crc.w.d.w_3"] =	"00258000DJK",
++  ["crcc.w.b.w_3"] =	"00260000DJK",
++  ["crcc.w.h.w_3"] =	"00268000DJK",
++  ["crcc.w.w.w_3"] =	"00270000DJK",
++  ["crcc.w.d.w_3"] =	"00278000DJK",
++
++  break_1 =		"002a0000C",
++  syscall_1 =		"002b0000C",
++
++  ["slli.w_3"] =	"00408000DJU",
++  ["slli.d_3"] =	"00410000DJV",
++  ["srli.w_3"] =	"00448000DJU",
++  ["srli.d_3"] =	"00450000DJV",
++  ["srai.w_3"] =	"00488000DJU",
++  ["srai.d_3"] =	"00490000DJV",
++  ["rotri.w_3"] =	"004c8000DJU",
++  ["rotri.d_3"] =	"004d0000DJV",
++
++  ["bstrins.w_4"] =	"00600000DJMU",
++  ["bstrpick.w_4"] =	"00608000DJMU",
++  ["bstrins.d_4"] = 	"00800000DJNV",
++  ["bstrpick.d_4"] =	"00c00000DJNV",
++  slti_3 =		"02000000DJX",
++  sltui_3 =		"02400000DJX",
++  ["addi.w_3"] =	"02800000DJX",
++  ["addi.d_3"] =	"02c00000DJX",
++  ["lu52i.d_3"] =	"03000000DJX",
++  andi_3 =		"03400000DJT",
++  ori_3 =		"03800000DJT",
++  xori_3 = 		"03c00000DJT",
++  ["lu12i.w_2"] =	"14000000DZ",
++  ["lu32i.d_2"] =	"16000000DZ",
++  pcaddi_2 =		"18000000DZ",
++  pcalau12i_2 = 	"1a000000DZ",
++  pcaddu12i_2 =		"1c000000DZ",
++  pcaddu18i_2 = 	"1e000000DZ",
++
++  ["ldx.b_3"] =		"38000000DJK",
++  ["ldx.h_3"] =		"38040000DJK",
++  ["ldx.w_3"] =		"38080000DJK",
++  ["ldx.d_3"] =		"380c0000DJK",
++  ["stx.b_3"] =		"38100000DJK",
++  ["stx.h_3"] =		"38140000DJK",
++  ["stx.w_3"] =		"38180000DJK",
++  ["stx.d_3"] =		"381c0000DJK",
++  ["ldx.bu_3"] =	"38200000DJK",
++  ["ldx.hu_3"] =	"38240000DJK",
++  ["ldx.wu_3"] =	"38280000DJK",
++  ["fldx.s_3"] =	"38300000FJK",
++  ["fldx.d_3"] =	"38340000FJK",
++  ["fstx.s_3"] =	"38380000FJK",
++  ["fstx.d_3"] =	"383c0000FJK",
++  ["fldgt.s_3"] =	"38740000FJK",
++  ["fldgt.d_3"] =	"38748000FJK",
++  ["fldle.s_3"] =	"38750000FJK",
++  ["fldle.d_3"] =	"38758000FJK",
++  ["fstgt.s_3"] =	"38760000FJK",
++  ["fstgt.d_3"] =	"38768000FJK",
++  ["fstle.s_3"] =	"38770000FJK",
++  ["fstle.d_3"] =	"38778000FJK",
++  ["ldgt.b_3"] =	"38780000DJK",
++  ["ldgt.h_3"] =	"38788000DJK",
++  ["ldgt.w_3"] =	"38790000DJK",
++  ["ldgt.d_3"] =	"38798000DJK",
++  ["ldle.b_3"] =	"387a0000DJK",
++  ["ldle.h_3"] =	"387a8000DJK",
++  ["ldle.w_3"] =	"387b0000DJK",
++  ["ldle.d_3"] =	"387b8000DJK",
++  ["stgt.b_3"] =	"387c0000DJK",
++  ["stgt.h_3"] =	"387c8000DJK",
++  ["stgt.w_3"] =	"387d0000DJK",
++  ["stgt.d_3"] =	"387d8000DJK",
++  ["stle.b_3"] =	"387e0000DJK",
++  ["stle.h_3"] =	"387e8000DJK",
++  ["stle.w_3"] =	"387f0000DJK",
++  ["stle.d_3"] =	"387f8000DJK",
++
++  ["ll.w_3"] =		"20000000DJW",
++  ["sc.w_3"] =		"21000000DJW",
++  ["ll.d_3"] =		"22000000DJW",
++  ["sc.d_3"] =		"23000000DJW",
++  ["ldptr.w_3"] =	"24000000DJW",
++  ["stptr.w_3"] =	"25000000DJW",
++  ["ldptr.d_3"] =	"26000000DJW",
++  ["stptr.d_3"] =	"27000000DJW",
++
++  ["ld.b_3"] =		"28000000DJX",
++  ["ld.h_3"] =		"28400000DJX",
++  ["ld.w_2"] =		"28800000Do",
++  ["ld.d_2"] =		"28c00000Do",
++  ["st.b_2"] =		"29000000Do",
++  ["st.h_2"] =		"29400000Do",
++  ["st.w_2"] =		"29800000Do",
++  ["st.d_2"] =		"29c00000Do",
++  ["ld.bu_2"] =		"2a000000Do",
++  ["ld.hu_2"] =		"2a400000Do",
++  ["ld.wu_3"] =		"2a800000DJX",
++  ["ldx.d_3"] =		"380c0000DJK",
++  ["stx.d_3"] =		"381c0000DJK",
++  ["fld.s_2"] =		"2b000000Fo",
++  ["fst.s_2"] =		"2b400000Fo",
++  ["fld.d_2"] =		"2b800000Fo",
++  ["fst.d_2"] =		"2bc00000Fo",
++
++  ["fcmp.caf.s_3"] =	"0c100000EGH",
++  ["fcmp.saf.s_3"] =	"0c108000EGH",
++  ["fcmp.clt.s_3"] =	"0c110000EGH",
++  ["fcmp.slt.s_3"] =	"0c118000EGH",
++  ["fcmp.ceq.s_3"] =	"0c120000EGH",
++  ["fcmp.seq.s_3"] =	"0c128000EGH",
++  ["fcmp.cle.s_3"] =	"0c130000EGH",
++  ["fcmp.sle.s_3"] =	"0c138000EGH",
++  ["fcmp.cun.s_3"] =	"0c140000EGH",
++  ["fcmp.sun.s_3"] =	"0c148000EGH",
++  ["fcmp.cult.s_3"] =	"0c150000EGH",		--TODO
++  ["fcmp.sult.s_3"] =	"0c158000EGH",
++  ["fcmp.cueq.s_3"] =	"0c160000EGH",
++  ["fcmp.sueq.s_3"] =	"0c168000EGH",
++  ["fcmp.cule.s_3"] =	"0c170000EGH",
++  ["fcmp.sule.s_3"] =	"0c178000EGH",
++  ["fcmp.cne.s_3"] =	"0c180000EGH",
++  ["fcmp.sne.s_3"] =	"0c188000EGH",
++  ["fcmp.cor.s_3"] =	"0c1a0000EGH",
++  ["fcmp.sor.s_3"] =	"0c1a8000EGH",
++  ["fcmp.cune.s_3"] =	"0c1c0000EGH",
++  ["fcmp.sune.s_3"] =	"0c1c8000EGH",
++  ["fcmp.caf.d_3"] =	"0c200000EGH",
++  ["fcmp.saf.d_3"] =	"0c208000EGH",
++  ["fcmp.clt.d_3"] =	"0c210000EGH",
++  ["fcmp.slt.d_3"] =	"0c218000EGH",
++  ["fcmp.ceq.d_3"] =	"0c220000EGH",
++  ["fcmp.seq.d_3"] =	"0c228000EGH",
++  ["fcmp.cle.d_3"] =	"0c230000EGH",
++  ["fcmp.sle.d_3"] =	"0c238000EGH",
++  ["fcmp.cun.d_3"] =	"0c240000EGH",
++  ["fcmp.sun.d_3"] =	"0c248000EGH",
++  ["fcmp.cult.d_3"] =	"0c250000EGH",		--TODO
++  ["fcmp.sult.d_3"] =	"0c258000EGH",
++  ["fcmp.cueq.d_3"] =	"0c260000EGH",
++  ["fcmp.sueq.d_3"] =	"0c268000EGH",
++  ["fcmp.cule.d_3"] =	"0c270000EGH",
++  ["fcmp.sule.d_3"] =	"0c278000EGH",
++  ["fcmp.cne.d_3"] =	"0c280000EGH",
++  ["fcmp.sne.d_3"] =	"0c288000EGH",
++  ["fcmp.cor.d_3"] =	"0c2a0000EGH",
++  ["fcmp.sor.d_3"] =	"0c2a8000EGH",
++  ["fcmp.cune.d_3"] =	"0c2c0000EGH",
++  ["fcmp.sune.d_3"] =	"0c2c8000EGH",
++
++  fsel_4 =		"0d000000FGHI",
++
++  ["addu16i.d_3"] = 	"10000000DJY",
++  beqz_2 =		"40000000JL",
++  bnez_2 = 		"44000000JL",
++  bceqz_2 = 		"48000000AL",
++  bcnez_2 = 		"48000100AL",
++  jirl_3 =		"4c000000DJa",
++  b_1 =			"50000000P",
++  bl_1 =		"54000000P",
++  beq_3 =		"58000000JDO",
++  bne_3 = 		"5c000000JDO",
++  blt_3 = 		"60000000JDO",
++  bge_3 = 		"64000000JDO",
++  bltu_3 = 		"68000000JDO",
++  bgeu_3 = 		"6c000000JDO",
++}
++
++------------------------------------------------------------------------------
++
++local function parse_gpr(expr)
++  local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
++  local tp = map_type[tname or expr]
++  if tp then
++    local reg = ovreg or tp.reg
++    if not reg then
++      werror("type `"..(tname or expr).."' needs a register override")
++    end
++    expr = reg
++  end
++  local r = match(expr, "^r([1-3]?[0-9])$")
++  if r then
++    r = tonumber(r)
++    if r <= 31 then return r, tp end
++  end
++  werror("bad register name `"..expr.."'")
++end
++
++local function parse_fpr(expr)
++  local r = match(expr, "^f([1-3]?[0-9])$")
++  if r then
++    r = tonumber(r)
++    if r <= 31 then return r end
++  end
++  werror("bad register name `"..expr.."'")
++end
++
++local function parse_fcsr(expr)
++  local r = match(expr, "^fcsr([0-3])$")
++  if r then
++    r = tonumber(r)
++    return r
++  end
++  werror("bad register name `"..expr.."'")
++end
++
++local function parse_fcc(expr)
++  local r = match(expr, "^fcc([0-7])$")
++  if r then
++    r = tonumber(r)
++    return r
++  end
++  werror("bad register name `"..expr.."'")
++end
++
++local function parse_imm(imm, bits, shift, scale, signed, action)
++  local n = tonumber(imm)
++  if n then
++    local m = sar(n, scale)
++    if shl(m, scale) == n then
++      if signed then
++	local s = sar(m, bits-1)
++	if s == 0 then return shl(m, shift)
++	elseif s == -1 then return shl(m + shl(1, bits), shift) end
++      else
++	if sar(m, bits) == 0 then return shl(m, shift) end
++      end
++    end
++    werror("out of range immediate1 `"..imm.."'")
++  elseif match(imm, "^[rf]([1-3]?[0-9])$") or
++	 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
++    werror("expected immediate operand, got register")
++  else
++    waction(action or "IMM",
++	    (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
++    return 0
++  end
++end
++
++local function parse_imm21or26(imm, i)
++  local n = tonumber(imm)
++  if n then
++    -- signed
++    local m = sar(n, 0)
++    if shl(m, 0) == n then
++      local s = sar(m, i-1)
++      if s == 0 then
++        return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)
++      elseif s == -1 then
++        return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0)	--TODO
++      end
++    end
++    werror("out of range immediate2 `"..imm.."'")
++  else
++    waction("IMM2", 0, imm)	--TODO
++    return 0
++  end
++end
++
++local function parse_disp(disp)
++  local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
++  if imm then
++    local r = shl(parse_gpr(reg), 5)
++    local extname = match(imm, "^extern%s+(%S+)$")
++    if extname then
++      waction("REL_EXT", map_extern[extname], nil, 1)
++      return r
++    else
++      return r + parse_imm(imm, 12, 10, 0, true)
++    end
++  end
++  local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
++  if reg and tailr ~= "" then
++    local r, tp = parse_gpr(reg)
++    if tp then
++      waction("IMM", 32768+12*32+10, format(tp.ctypefmt, tailr))
++      return shl(r, 5)
++    end
++  end
++  werror("bad displacement `"..disp.."'")
++end
++
++local function parse_label(label, def)
++  local prefix = sub(label, 1, 2)
++  -- =>label (pc label reference)
++  if prefix == "=>" then
++    return "PC", 0, sub(label, 3)
++  end
++  -- ->name (global label reference)
++  if prefix == "->" then
++    return "LG", map_global[sub(label, 3)]
++  end
++  if def then
++    -- [1-9] (local label definition)
++    if match(label, "^[1-9]$") then
++      return "LG", 10+tonumber(label)
++    end
++  else
++    -- [<>][1-9] (local label reference)
++    local dir, lnum = match(label, "^([<>])([1-9])$")
++    if dir then -- Fwd: 1-9, Bkwd: 11-19.
++      return "LG", lnum + (dir == ">" and 0 or 10)
++    end
++    -- extern label (extern label reference)
++    local extname = match(label, "^extern%s+(%S+)$")
++    if extname then
++      return "EXT", map_extern[extname]
++    end
++  end
++  werror("bad label `"..label.."'")
++end
++
++local function branch_type(op)
++  if shr(op, 26) == 0x16 or shr(op, 26) == 0x17 or shr(op, 26) == 0x18 or
++     shr(op, 26) == 0x19 or shr(op, 26) == 0x1a or shr(op, 26) == 0x1b then
++    return 0 -- BEQ, BNE, BLT, BGE, BLTU, BGEU
++  elseif shr(op, 26) == 0x10 or shr(op, 26) == 0x11 or shr(op, 26) == 0x12 then
++    return 0x5000 -- BEQZ, BNEZ, BCEQZ, BCNEZ
++  elseif band(op, 0xf8000000) == 0x50000000 then return 0xa000 --B, BL
++  else
++    assert(false, "unknown branch type")
++  end
++end
++
++------------------------------------------------------------------------------
++
++-- Handle opcodes defined with template strings.
++map_op[".template__"] = function(params, template, nparams)
++  if not params then return sub(template, 9) end
++  local op = tonumber(sub(template, 1, 8), 16)
++  local n = 1
++
++  -- Limit number of section buffer positions used by a single dasm_put().
++  -- A single opcode needs a maximum of 2 positions (ins/ext).
++  if secpos+2 > maxsecpos then wflush() end
++  local pos = wpos()
++
++  -- Process each character.
++  for p in gmatch(sub(template, 9), ".") do
++    if p == "D" then
++      op = op + shl(parse_gpr(params[n]), 0); n = n + 1
++    elseif p == "J" then
++      op = op + shl(parse_gpr(params[n]), 5); n = n + 1
++    elseif p == "K" then
++      op = op + shl(parse_gpr(params[n]), 10); n = n + 1
++    elseif p == "F" then
++      op = op + shl(parse_fpr(params[n]), 0); n = n + 1
++    elseif p == "G" then
++      op = op + shl(parse_fpr(params[n]), 5); n = n + 1
++    elseif p == "H" then
++      op = op + shl(parse_fpr(params[n]), 10); n = n + 1
++    elseif p == "I" then
++      op = op + shl(parse_fcc(params[n]), 15); n = n + 1
++    elseif p == "A" then
++      op = op + shl(parse_fcc(params[n]), 5); n = n + 1
++    elseif p == "E" then
++      op = op + shl(parse_fcc(params[n]), 0); n = n + 1
++    elseif op == "S" then
++      op = op + shl(parse_fcsr(params[n]), 0); n = n + 1
++    elseif op == "R" then
++      op = op + shl(parse_fcsr(params[n]), 5); n = n + 1
++    elseif p == "U" then
++      op = op + parse_imm(params[n], 5, 10, 0, false); n = n + 1
++    elseif p == "V" then
++      op = op + parse_imm(params[n], 6, 10, 0, false); n = n + 1
++    elseif p == "W" then
++      op = op + parse_imm(params[n], 14, 10, 0, true); n = n + 1
++    elseif p == "X" then
++      op = op + parse_imm(params[n], 12, 10, 0, true); n = n + 1
++    elseif p == "o" then
++      op = op + parse_disp(params[n]); n = n + 1
++    elseif p == "Y" then
++      op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
++    elseif p == "Z" then
++      op = op + parse_imm(params[n], 20, 5, 0, true); n = n + 1
++    elseif p == "T" then
++      op = op + parse_imm(params[n], 12, 10, 0, false); n = n + 1
++    elseif p == "C" then
++      op = op + parse_imm(params[n], 15, 0, 0, false); n = n + 1
++    elseif p == "Q" then
++      op = op + parse_imm(params[n], 2, 15, 0, false); n = n + 1
++    elseif p == "B" then
++      op = op + parse_imm(params[n], 3, 15, 0, false); n = n + 1
++    elseif p == "M" then
++      op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
++    elseif p == "N" then
++      op = op + parse_imm(params[n], 6, 16, 0, false); n = n + 1
++--    elseif p == "O" then
++--      op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
++--    elseif p == "L" then
++--      op = op + parse_imm21or26(params[n], 21); n = n + 1
++--    elseif p == "P" then
++--      op = op + parse_imm21or26(params[n], 26); n = n + 1
++    elseif p == "O" or p == "L" or p == "P" then
++      local mode, m, s = parse_label(params[n], false)
++      local v = branch_type(op)
++      waction("REL_"..mode, m+v, s, 1)
++      n = n + 1
++    elseif p == "a" then
++      op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1
++    else
++      assert(false)
++    end
++  end
++  wputpos(pos, op)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode to mark the position where the action list is to be emitted.
++map_op[".actionlist_1"] = function(params)
++  if not params then return "cvar" end
++  local name = params[1] -- No syntax check. You get to keep the pieces.
++  wline(function(out) writeactions(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the global enum is to be emitted.
++map_op[".globals_1"] = function(params)
++  if not params then return "prefix" end
++  local prefix = params[1] -- No syntax check. You get to keep the pieces.
++  wline(function(out) writeglobals(out, prefix) end)
++end
++
++-- Pseudo-opcode to mark the position where the global names are to be emitted.
++map_op[".globalnames_1"] = function(params)
++  if not params then return "cvar" end
++  local name = params[1] -- No syntax check. You get to keep the pieces.
++  wline(function(out) writeglobalnames(out, name) end)
++end
++
++-- Pseudo-opcode to mark the position where the extern names are to be emitted.
++map_op[".externnames_1"] = function(params)
++  if not params then return "cvar" end
++  local name = params[1] -- No syntax check. You get to keep the pieces.
++  wline(function(out) writeexternnames(out, name) end)
++end
++
++------------------------------------------------------------------------------
++
++-- Label pseudo-opcode (converted from trailing colon form).
++map_op[".label_1"] = function(params)
++  if not params then return "[1-9] | ->global | =>pcexpr" end
++  if secpos+1 > maxsecpos then wflush() end
++  local mode, n, s = parse_label(params[1], true)
++  if mode == "EXT" then werror("bad label definition") end
++  waction("LABEL_"..mode, n, s, 1)
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcodes for data storage.
++map_op[".long_*"] = function(params)
++  if not params then return "imm..." end
++  for _,p in ipairs(params) do
++    local n = tonumber(p)
++    if not n then werror("bad immediate `"..p.."'") end
++    if n < 0 then n = n + 2^32 end
++    wputw(n)
++    if secpos+2 > maxsecpos then wflush() end
++  end
++end
++
++-- Alignment pseudo-opcode.
++map_op[".align_1"] = function(params)
++  if not params then return "numpow2" end
++  if secpos+1 > maxsecpos then wflush() end
++  local align = tonumber(params[1])
++  if align then
++    local x = align
++    -- Must be a power of 2 in the range (2 ... 256).
++    for i=1,8 do
++      x = x / 2
++      if x == 1 then
++	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
++	return
++      end
++    end
++  end
++  werror("bad alignment")
++end
++
++------------------------------------------------------------------------------
++
++-- Pseudo-opcode for (primitive) type definitions (map to C types).
++map_op[".type_3"] = function(params, nparams)
++  if not params then
++    return nparams == 2 and "name, ctype" or "name, ctype, reg"
++  end
++  local name, ctype, reg = params[1], params[2], params[3]
++  if not match(name, "^[%a_][%w_]*$") then
++    werror("bad type name `"..name.."'")
++  end
++  local tp = map_type[name]
++  if tp then
++    werror("duplicate type `"..name.."'")
++  end
++  -- Add #type to defines. A bit unclean to put it in map_archdef.
++  map_archdef["#"..name] = "sizeof("..ctype..")"
++  -- Add new type and emit shortcut define.
++  local num = ctypenum + 1
++  map_type[name] = {
++    ctype = ctype,
++    ctypefmt = format("Dt%X(%%s)", num),
++    reg = reg,
++  }
++  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
++  ctypenum = num
++end
++map_op[".type_2"] = map_op[".type_3"]
++
++-- Dump type definitions.
++local function dumptypes(out, lvl)
++  local t = {}
++  for name in pairs(map_type) do t[#t+1] = name end
++  sort(t)
++  out:write("Type definitions:\n")
++  for _,name in ipairs(t) do
++    local tp = map_type[name]
++    local reg = tp.reg or ""
++    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
++  end
++  out:write("\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Set the current section.
++function _M.section(num)
++  waction("SECTION", num)
++  wflush(true) -- SECTION is a terminal action.
++end
++
++------------------------------------------------------------------------------
++
++-- Dump architecture description.
++function _M.dumparch(out)
++  out:write(format("DynASM %s version %s, released %s\n\n",
++    _info.arch, _info.version, _info.release))
++  dumpactions(out)
++end
++
++-- Dump all user defined elements.
++function _M.dumpdef(out, lvl)
++  dumptypes(out, lvl)
++  dumpglobals(out, lvl)
++  dumpexterns(out, lvl)
++end
++
++------------------------------------------------------------------------------
++
++-- Pass callbacks from/to the DynASM core.
++function _M.passcb(wl, we, wf, ww)
++  wline, werror, wfatal, wwarn = wl, we, wf, ww
++  return wflush
++end
++
++-- Setup the arch-specific module.
++function _M.setup(arch, opt)
++  g_arch, g_opt = arch, opt
++end
++
++-- Merge the core maps and the arch-specific maps.
++function _M.mergemaps(map_coreop, map_def)
++  setmetatable(map_op, { __index = map_coreop })
++  setmetatable(map_def, { __index = map_archdef })
++  return map_op, map_def
++end
++
++return _M
++
++------------------------------------------------------------------------------
++
+diff --git a/libs/luajit/LuaJIT-src/src/Makefile b/libs/luajit/LuaJIT-src/src/Makefile
+index 34c5e97..cb3fc00 100644
+--- a/libs/luajit/LuaJIT-src/src/Makefile
++++ b/libs/luajit/LuaJIT-src/src/Makefile
+@@ -36,7 +36,7 @@ CC= $(DEFAULT_CC)
+ # to slow down the C part by not omitting it. Debugging, tracebacks and
+ # unwinding are not affected -- the assembler part has frame unwind
+ # information and GCC emits it where needed (x64) or with -g (see CCDEBUG).
+-CCOPT= -O2 -fomit-frame-pointer
++CCOPT= -O0 -fomit-frame-pointer
+ # Use this if you want to generate a smaller binary (but it's slower):
+ #CCOPT= -Os -fomit-frame-pointer
+ # Note: it's no longer recommended to use -O3 with GCC 4.x.
+@@ -53,6 +53,7 @@ CCOPT_arm=
+ CCOPT_arm64=
+ CCOPT_ppc=
+ CCOPT_mips=
++CCOPT_loongarch64=
+ #
+ CCDEBUG=
+ # Uncomment the next line to generate debug information:
+@@ -241,6 +242,10 @@ else
+ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+   TARGET_LJARCH= arm
+ else
++ifneq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH)))
++  TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
++  TARGET_LJARCH= loongarch64
++else
+ ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+   ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
+     TARGET_ARCH= -D__AARCH64EB__=1
+@@ -272,6 +277,7 @@ endif
+ endif
+ endif
+ endif
++endif
+ 
+ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
+   TARGET_SYS= PS3
+diff --git a/libs/luajit/LuaJIT-src/src/host/buildvm.c b/libs/luajit/LuaJIT-src/src/host/buildvm.c
+index 98a7a57..8e96cb4 100644
+--- a/libs/luajit/LuaJIT-src/src/host/buildvm.c
++++ b/libs/luajit/LuaJIT-src/src/host/buildvm.c
+@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
+ #include "../dynasm/dasm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "../dynasm/dasm_mips.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "../dynasm/dasm_loongarch64.h"
+ #else
+ #error "No support for this architecture (yet)"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c b/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c
+index ffd1490..bc9ab7f 100644
+--- a/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c
++++ b/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c
+@@ -164,6 +164,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
+ 	  "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ 	  ins, sym);
+   exit(1);
++#elif LJ_TARGET_LOONGARCH64
++  if ((ins >> 26) == 21) {
++    fprintf(ctx->fp, "\tbl %s\n", sym);
++  } else {
++    fprintf(stderr,
++            "Error: unsupported opcode %08x for %s symbol relocation.\n",
++            ins, sym);
++    exit(1);
++  }
+ #else
+ #error "missing relocation support for this architecture"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/jit/bcsave.lua b/libs/luajit/LuaJIT-src/src/jit/bcsave.lua
+index c17c88e..79bae42 100644
+--- a/libs/luajit/LuaJIT-src/src/jit/bcsave.lua
++++ b/libs/luajit/LuaJIT-src/src/jit/bcsave.lua
+@@ -64,7 +64,7 @@ local map_type = {
+ 
+ local map_arch = {
+   x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
+-  ppc = true, mips = true, mipsel = true,
++  ppc = true, mips = true, mipsel = true, loongarch64 = true,
+ }
+ 
+ local map_os = {
+diff --git a/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua b/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua
+new file mode 100644
+index 0000000..3e67efc
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua
+@@ -0,0 +1,649 @@
++----------------------------------------------------------------------------
++-- LuaJIT LoongArch disassembler module.
++--
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
++-- Copyright (C) 2021 Loongson Technology. All rights reserved.
++-- Released under the MIT/X license. See Copyright Notice in luajit.h
++----------------------------------------------------------------------------
++-- This is a helper module used by the LuaJIT machine code dumper module.
++--
++-- It disassembles most LoongArch instructions.
++-- NYI: SIMD instructions.
++------------------------------------------------------------------------------
++
++local type = type
++local byte, format = string.byte, string.format
++local match, gmatch = string.match, string.gmatch
++local concat = table.concat
++local bit = require("bit")
++local band, bor, tohex = bit.band, bit.bor, bit.tohex
++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
++
++------------------------------------------------------------------------------
++-- Opcode maps
++------------------------------------------------------------------------------
++
++local map_18_0 = {      -- 18-20:0, 10-17
++  shift = 10, mask = 255,
++  [4] = "clo.wDJ",
++  [5] = "clz.wDJ",
++  [6] = "cto.wDJ",
++  [7] = "ctz.wDJ",
++  [8] = "clo.dDJ",
++  [9] = "clz.dDJ",
++  [10] = "cto.dDJ",
++  [11] = "ctz.dDJ",
++  [12] = "revb.2hDJ",
++  [13] = "revb.4hDJ",
++  [14] = "revb.2wDJ",
++  [15] = "revb.dDJ",
++  [16] = "revh.2wDJ",
++  [17] = "revh.dDJ",
++  [18] = "bitrev.4bDJ",
++  [19] = "bitrev.8bDJ",
++  [20] = "bitrev.wDJ",
++  [21] = "bitrev.dDJ",
++  [22] = "ext.w.hDJ",
++  [23] = "ext.w.bDJ",
++}
++
++local map_18_4 = {	-- 18-20:4, 15-17
++  shift = 15, mask = 7,
++  [0] = "add.wDJK",
++  [1] = "add.dDJK",
++  [2] = "sub.wDJK",
++  [3] = "sub.dDJK",
++  [4] = "sltDJK",
++  [5] = "sltuDJK",
++  [6] = "maskeqzDJK",
++  [7] = "masknezDJK",
++}
++
++local map_18_5 = {	-- 18-20:5, 15-17
++  shift = 15, mask = 7,
++  [0] = "norDJK",
++  [1] = "andDJK",
++  [2] = "orDJK",
++  [3] = "xorDJK",
++  [4] = "ornDJK",
++  [5] = "andnDJK",
++  [6] = "sll.wDJK",
++  [7] = "srl.wDJK",
++}
++
++local map_18_6 = {	-- 18-20:6, 15-17
++  shift = 15, mask = 7,
++  [0] = "sra.wDJK",
++  [1] = "sll.dDJK",
++  [2] = "srl.dDJK",
++  [3] = "sra.dDJK",
++  [6] = "rotr.wDJK",
++  [7] = "rotr.dDJK",
++}
++
++local map_18_7 = {	-- 18-20:7, 15-17
++  shift = 15, mask = 7,
++  [0] = "mul.wDJK",
++  [1] = "mulh.wDJK",
++  [2] = "mulh.wuDJK",
++  [3] = "mul.dDJK",
++  [4] = "mulh.dDJK",
++  [5] = "mulh.duDJK",
++  [6] = "mulw.d.wDJK",
++  [7] = "mulw.d.wuDJK",
++}
++
++local map_farith2 = {
++  shift = 10, mask = 31,
++  [0] = "fabs.hFG",
++  [1] = "fabs.sFG",
++  [2] = "fabs.dFG",
++  [4] = "fneg.hFG",
++  [5] = "fneg.sFG",
++  [6] = "fneg.dFG",
++  [8] = "flogb.hFG",
++  [9] = "flogb.sFG",
++  [10] = "flogb.dFG",
++  [12] = "fclass.hFG",
++  [13] = "fclass.sFG",
++  [14] = "fclass.dFG",
++  [16] = "fsqrt.hFG",
++  [17] = "fsqrt.sFG",
++  [18] = "fsqrt.dFG",
++  [20] = "frecip.hFG",
++  [21] = "frecip.sFG",
++  [22] = "frecip.dFG",
++  [24] = "frsqrt.hFG",
++  [25] = "frsqrt.sFG",
++  [26] = "frsqrt.dFG",
++  [28] = "frecipe.hFG",
++  [29] = "frecipe.sFG",
++  [30] = "frecipe.dFG",
++  [32] = "frsqrte.hFG",
++  [33] = "frsqrte.sFG",
++  [34] = "frsqrte.dFG",
++}
++
++local map_fmov = {
++  shift = 10, mask = 31,
++  [4] = "fmov.hFG",
++  [5] = "fmov.sFG",
++  [6] = "fmov.dFG",
++  [8] = "movgr2fr.hFJ",
++  [9] = "movgr2fr.wFJ",
++  [10] = "movgr2fr.dFJ",
++  [11] = "movgr2frh.wFJ",
++  [12] = "movfr2gr.hDG",
++  [13] = "movfr2gr.sDG",
++  [14] = "movfr2gr.dDG",
++  [15] = "movfrh2gr.sDG",
++  [16] = "movgr2fcsrSJ",
++  [18] = "movfcsr2grDR",
++  [20] = { shift = 3, mask = 3, [0] = "movfr2cfEG", },
++  [21] = { shift = 8, mask = 3, [0] = "movcf2frFA", },
++  [22] = { shift = 3, mask = 3, [0] = "movgr2cfEJ", },
++  [23] = { shift = 8, mask = 3, [0] = "movcf2grDA", },
++  [24] = "fcvt.ld.dFG",
++  [25] = "fcvt.ud.dFG",
++}
++
++local map_fconvert = { -- 15-20: 110010
++  shift = 10, mask = 31,
++  [6] = "fcvt.s.dFG",	[9] = "fcvt.d.sFG",
++}
++
++local map_fconvert1 = { -- 15-20: 110100
++  shift = 10, mask = 31,
++  [1] = "ftintrm.w.sFG",
++  [2] = "ftintrm.w.dFG",
++  [9] = "ftintrm.l.sFG",
++  [10] = "ftintrm.l.dFG",
++  [17] = "ftintrp.w.sFG",
++  [18] = "ftintrp.w.dFG",
++  [25] = "ftintrp.l.sFG",
++  [26] = "ftintrp.l.dFG",
++}
++
++local map_fconvert2 = { -- 15-20: 110101
++  shift = 10, mask = 31,
++  [1] = "ftintrz.w.sFG",
++  [2] = "ftintrz.w.dFG",
++  [9] = "ftintrz.l.sFG",
++  [10] = "ftintrz.l.dFG",
++  [17] = "ftintrne.w.sFG",
++  [18] = "ftintrne.w.dFG",
++  [25] = "ftintrne.l.sFG",
++  [26] = "ftintrne.l.dFG",
++}
++
++local map_fconvert3 = { -- 15-20: 110110
++  shift = 10, mask = 31,
++  [1] = "ftint.w.sFG",
++  [2] = "ftint.w.dFG",
++  [9] = "ftint.l.sFG",
++  [10] = "ftint.l.dFG",
++}
++
++local map_fconvert4 = { -- 15-20: 111010
++  shift = 10, mask = 31,
++  [4] = "ffint.s.wFG",
++  [6] =  "ffint.s.lFG",
++  [8] = "ffint.d.wFG",
++  [10] = "ffint.d.lFG",
++}
++
++local map_fconvert5 = { -- 15-20: 111100
++  shift = 10, mask = 31,
++  [17] = "frint.sFG",
++  [18] = "frint.dFG",
++}
++
++local map_farith = {	-- 22-25:4, 15-21
++  shift = 15, mask = 127,
++  [0] = "fadd.hFGH",
++  [1] = "fadd.sFGH",
++  [2] = "fadd.dFGH",
++  [4] = "fsub.hFGH",
++  [5] = "fsub.sFGH",
++  [6] = "fsub.dFGH",
++  [8] = "fmul.hFGH",
++  [9] = "fmul.sFGH",
++  [10] = "fmul.dFGH",
++  [12] = "fdiv.hFGH",
++  [13] = "fdiv.sFGH",
++  [14] = "fdiv.dFGH",
++  [16] = "fmax.hFGH",
++  [17] = "fmax.sFGH",
++  [18] = "fmax.dFGH",
++  [20] = "fmin.hFGH",
++  [21] = "fmin.sFGH",
++  [22] = "fmin.dFGH",
++  [24] = "fmaxa.hFGH",
++  [25] = "fmaxa.sFGH",
++  [26] = "fmaxa.dFGH",
++  [28] = "fmina.hFGH",
++  [29] = "fmina.sFGH",
++  [30] = "fmina.dFGH",
++  [32] = "fscaleb.hFGH",
++  [33] = "fscaleb.sFGH",
++  [34] = "fscaleb.dFGH",
++  [36] = "fcopysign.hFGH",
++  [37] = "fcopysign.sFGH",
++  [38] = "fcopysign.dFGH",
++  [40] = map_farith2, [41] = map_fmov,
++  [50] = map_fconvert, [52] = map_fconvert1,
++  [53] = map_fconvert2, [54] = map_fconvert3,
++  [58] = map_fconvert4, [60] = map_fconvert5,
++}
++
++local map_21_0 = {	--21st:0, 18-20
++  shift = 18, mask = 7,
++  [0] = map_18_0,
++  [1] = { shift = 17, mask = 1, [0] = "alsl.wDJKQ", "alsl.wuDJKQ", },
++  [2] = {shift = 17, mask = 1, [0] = "bytepick.wDJKQ", },
++  [3] = "bytepick.dDJKB",
++  [4] = map_18_4,
++  [5] = map_18_5,
++  [6] = map_18_6,
++  [7] = map_18_7,
++}
++
++local map_21_1 = {      --21st:1, 22nd:0, 15-20
++  shift = 21, mask = 1,
++  [1] = {
++    shift = 18, mask = 7,
++    [0] = {
++      shift = 15, mask = 7,
++      [0] = "div.wDJK",
++      [1] = "mod.wDJK",
++      [2] = "div.wuDJK",
++      [3] = "mod.wuDJK",
++      [4] = "div.dDJK",
++      [5] = "mod.dDJK",
++      [6] = "div.duDJK",
++      [7] = "mod.duDJK",
++    },
++    [1] = {
++      shift = 18, mask = 7,
++      [0] = "crc.w.b.wDJK",
++      [1] = "crc.w.h.wDJK",
++      [2] = "crc.w.w.wDJK",
++      [3] = "crc.w.d.wDJK",
++      [4] = "crcc.w.b.wDJK",
++      [5] = "crcc.w.h.wDJK",
++      [6] = "crcc.w.w.wDJK",
++      [7] = "crcc.w.d.wDJK",
++    },
++    [2] = {
++      shift = 15, mask = 7,
++      [4] = breakC, [6] = syscallC,
++    },
++    [3] = { shift = 17, mask = 1, [0] = "alsl.dDJKQ", },
++  },
++}
++
++local map_22_0 = {
++  shift = 21, mask = 1,
++  [0] = map_21_0,
++  [1] = map_21_1,
++}
++
++local map_shift = {	-- 22nd:1, 21st:0
++  shift = 16, mask = 31,
++  [0] = { shift = 15, mask = 1, [1] = "slli.wDJU", },
++  [1] = "slli.dDJV",
++  [4] = { shift = 15, mask = 1, [1] = "srli.wDJU", },
++  [5] = "srli.dDJV",
++  [8] = { shift = 15, mask = 1, [1] = "srai.wDJU", },
++  [9] = "srai.dDJV",
++  [12] = { shift = 15, mask = 1, [1] = "rotri.wDJU", },
++  [13] = "rotri.dDJV",
++}
++
++local map_22_1 = {        -- 22nd:1
++  shift = 21, mask = 1,
++  [0] = map_shift,
++  [1] = { shift = 15, mask = 1, [0] = "bstrins.wDJMU", [1] = "bstrpick.wDJMU", },
++}
++
++local map_26_0 = {
++  shift = 22, mask = 15,
++  [0] = map_22_0,
++  [1] = map_22_1,
++  [2] = "bstrins.dDJNV",
++  [3] = "bstrpick.dDJNV",
++  [4] = map_farith,
++  [8] = "sltiDJX",
++  [9] = "sltuiDJX",
++  [10] = "addi.wDJX",
++  [11] = "addi.dDJX",
++  [12] = "lu52i.dDJX",
++  [13] = "andiDJT",
++  [14] = "oriDJT",
++  [15] = "xoriDJT",
++}
++
++local map_long_i_5 = { -- Long immediate fixed-point arithmetic.
++  shift = 25, mask = 1,
++  [0] = "lu12i.wDZ",
++  [1] = "lu32i.dDZ",
++}
++
++local map_long_i_6 = {
++  shift = 25, mask = 1,
++  [0] = "pcaddiDZ",
++  [1] = "pcalau12iDZ",
++}
++
++local map_long_i_7 = {
++  shift = 25, mask = 1,
++  [0] = "pcaddu12iDZ",
++  [1] = "pcaddu18iDZ",
++}
++
++local map_ldst0_14 = {
++  shift = 15, mask = 2047,
++  [0] = "ldx.bDJK", [8] = "ldx.hDJK", [16] = "ldx.wDJK",
++  [24] = "ldx.dDJK", [32] = "stx.bDJK", [40] = "stx.hDJK",
++  [48] = "stx.wDJK", [56] = "stx.dDJK", [64] = "ldx.buDJK",
++  [72] = "ldx.huDJK", [80] = "ldx.wuDJK", [96] = "fldx.sFJK",
++  [104] = "fldx.dFJK", [112] = "fstx.sFJK", [120] = "fstx.dFJK",
++  [232] = "fldgt.sFJK", [233] = "fldgt.dFJK", [234] = "fldle.sFJK",
++  [235] = "fldle.dFJK", [236] = "fstgt.sFJK", [237] = "fstgt.dFJK",
++  [238] = "fstle.sFJK", [239] = "fstle.dFJK", [240] = "ldgt.bDJK",
++  [241] = "ldgt.hDJK", [242] = "ldgt.wDJK", [243] = "ldgt.dDJK",
++  [244] = "ldle.bDJK", [245] = "ldle.hDJK", [246] = "ldle.wDJK",
++  [247] = "ldle.dDJK", [248] = "stgt.bDJK", [249] = "stgt.hDJK",
++  [250] = "stgt.wDJK", [251] = "stgt.dDJK", [252] = "stle.bDJK",
++  [253] = "stle.hDJK", [254] = "stle.wDJK", [255] = "stle.dDJK",
++}
++
++local map_ldst1_8 = {
++  shift = 24, mask = 3,
++  [0] = "ll.wDJW",
++  [1] = "sc.wDJW",
++  [2] = "ll.dDJW",
++  [3] = "sc.dDJW",
++}
++
++local map_ldst1_9 = {
++  shift = 24, mask = 3,
++  [0] = "ldptr.wDJW",
++  [1] = "stptr.wDJW",
++  [2] = "ldptr.dDJW",
++  [3] = "stptr.dDJW",
++}
++
++local map_ldst1_10 = {
++  shift = 22, mask = 15,
++  [0] = "ld.bDJX",
++  [1] = "ld.hDJX",
++  [2] = "ld.wDo",
++  [3] = "ld.dDo",
++  [4] = "st.bDo",
++  [5] = "st.hDo",
++  [6] = "st.wDo",
++  [7] = "st.dDo",
++  [8] = "ld.buDo",
++  [9] = "ld.huDo",
++  [10] = "ld.wuDJX",
++  [12] = "fld.sFo",
++  [13] = "fst.sFo",
++  [14] = "fld.dFo",
++  [15] = "fst.dFo",
++}
++
++local map_fcmp0 = {
++  shift = 15, mask = 31,
++  [0] = "fcmp.caf.sEGH",
++  [1] = "fcmp.saf.sEGH",
++  [2] = "fcmp.clt.sEGH",
++  [3] = "fcmp.slt.sEGH",
++  [4] = "fcmp.ceq.sEGH",
++  [5] = "fcmp.seq.sEGH",
++  [6] = "fcmp.cle.sEGH",
++  [7] = "fcmp.sle.sEGH",
++  [8] = "fcmp.cun.sEGH",
++  [9] = "fcmp.sun.sEGH",
++  [10] = "fcmp.cult.sEGH",
++  [11] ="fcmp.sult.sEGH",
++  [12] = "fcmp.cueq.sEGH",
++  [13] = "fcmp.sueq.sEGH",
++  [14] = "fcmp.cule.sEGH",
++  [15] = "fcmp.sule.sEGH",
++  [16] = "fcmp.cne.sEGH",
++  [17] = "fcmp.sne.sEGH",
++  [20] = "fcmp.cor.sEGH",
++  [21] = "fcmp.sor.sEGH",
++  [24] = "fcmp.cune.sEGH",
++  [25] = "fcmp.sune.sEGH",
++}
++
++local map_fcmp1 = {
++  shift = 15, mask = 31,
++  [0] = "fcmp.caf.dEGH",
++  [1] = "fcmp.saf.dEGH",
++  [2] = "fcmp.clt.dEGH",
++  [3] = "fcmp.slt.dEGH",
++  [4] = "fcmp.ceq.dEGH",
++  [5] = "fcmp.seq.dEGH",
++  [6] = "fcmp.cle.dEGH",
++  [7] = "fcmp.sle.dEGH",
++  [8] = "fcmp.cun.dEGH",
++  [9] = "fcmp.sun.dEGH",
++  [10] = "fcmp.cult.dEGH",
++  [11] = "fcmp.sult.dEGH",
++  [12] = "fcmp.cueq.dEGH",
++  [13] = "fcmp.sueq.dEGH",
++  [14] = "fcmp.cule.dEGH",
++  [15] = "fcmp.sule.dEGH",
++  [16] = "fcmp.cne.dEGH",
++  [17] = "fcmp.sne.dEGH",
++  [20] = "fcmp.cor.dEGH",
++  [21] = "fcmp.sor.dEGH",
++  [24] = "fcmp.cune.dEGH",
++  [25] = "fcmp.sune.dEGH",
++}
++
++local map_fcmp = {
++  shift = 20, mask = 63,
++  [1] = { shift = 3, mask = 3, [0] = map_fcmp0, },
++  [2] = { shift = 3, mask = 3, [0] = map_fcmp1, },
++  [16] = { shift = 18, mask = 3, [0] = "fselFGHI", },
++}
++
++local map_init = {
++  shift = 26, mask = 63,
++  [0] = map_26_0,
++  [3] = map_fcmp,
++  [4] = "addu16i.dDJY",
++  [5] = map_long_i_5,
++  [6] = map_long_i_6,
++  [7] = map_long_i_7,
++  [8] = map_ldst1_8,
++  [9] = map_ldst1_9,
++  [10] = map_ldst1_10,
++  [14] = map_ldst0_14,
++  [16] = "beqzJL",
++  [17] = "bnezJL",
++  [18] = { shift = 8, mask = 3, [0] = "bceqzAL", "bcnezAL", },
++  [19] = "jirlDJa",
++  [20] = "bP",
++  [21] = "blP",
++  [22] = "beqJDO",
++  [23] = "bneJDO",
++  [24] = "bltJDO",
++  [25] = "bgeJDO",
++  [26] = "bltuJDO",
++  [27] = "bgeuJDO",
++}
++
++------------------------------------------------------------------------------
++
++local map_gpr = {
++  [0] = "r0", "ra", "r2", "sp", "r4", "r5", "r6", "r7",
++  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
++  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
++  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
++}
++
++------------------------------------------------------------------------------
++
++-- Output a nicely formatted line with an opcode and operands.
++local function putop(ctx, text, operands)
++  local pos = ctx.pos
++  local extra = ""
++  if ctx.rel then
++    local sym = ctx.symtab[ctx.rel]
++    if sym then extra = "\t->"..sym end
++  end
++  if ctx.hexdump > 0 then
++    ctx.out(format("%08x  %s  %-7s %s%s\n",
++	    ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
++  else
++    ctx.out(format("%08x  %-7s %s%s\n",
++	    ctx.addr+pos, text, concat(operands, ", "), extra))
++  end
++  ctx.pos = pos + 4
++end
++
++-- Fallback for unknown opcodes.
++local function unknown(ctx)
++  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
++end
++
++local function get_le(ctx)
++  local pos = ctx.pos
++  local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
++  return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
++end
++
++-- Disassemble a single instruction.
++local function disass_ins(ctx)
++  local op = ctx:get()
++  local operands = {}
++  local last = nil
++  ctx.op = op
++  ctx.rel = nil
++
++  local opat = ctx.map_pri[rshift(op, 26)]
++  while type(opat) ~= "string" do
++    if not opat then return unknown(ctx) end
++    opat = opat[band(rshift(op, opat.shift), opat.mask)]
++  end
++  local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
++  local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
++  if altname then pat = pat2 end
++
++  for p in gmatch(pat, ".") do
++    local x = nil
++    if p == "D" then
++      x = map_gpr[band(rshift(op, 0), 31)]
++    elseif p == "J" then
++      x = map_gpr[band(rshift(op, 5), 31)]
++    elseif p == "K" then
++      x = map_gpr[band(rshift(op, 10), 31)]
++    elseif p == "F" then
++      x = "f"..band(rshift(op, 0), 31)
++    elseif p == "G" then
++      x = "f"..band(rshift(op, 5), 31)
++    elseif p == "H" then
++      x = "f"..band(rshift(op, 10), 31)
++    elseif p == "S" then
++      x = "fcsr"..band(rshift(op, 0), 31)
++    elseif p == "R" then
++      x = "fcsr"..band(rshift(op, 5), 31)
++    elseif p == "E" then
++      x = "fcc"..band(rshift(op, 0), 7)
++    elseif p == "A" then
++      x = "fcc"..band(rshift(op, 5), 7)
++    elseif p == "I" then
++      x = "fcc"..band(rshift(op, 15), 7)
++    elseif p == "Q" then	--TODO sa2
++      x = band(rshift(op, 15), 3)
++    elseif p == "B" then	--TODO sa3
++      x = band(rshift(op, 15), 7)
++    elseif p == "M" then	--TODO msbw
++      x = band(rshift(op, 16), 31)
++    elseif p == "N" then	--TODO msbd
++      x = band(rshift(op, 16), 63)
++    elseif p == "U" then	-- ui5
++      x = band(rshift(op, 10), 31)
++    elseif p == "V" then	-- ui6
++      x = band(rshift(op, 10), 63)
++    elseif p == "T" then	-- ui12
++      x = band(rshift(op, 10), 4095)
++    elseif p == "W" then	-- si14
++      x = band(rshift(op, 10), 16383)
++    elseif p == "X" then	-- si12
++      x = band(rshift(op, 10), 4095)
++    elseif p == "o" then
++      local disp = band((rshift(op, 10)), 0xfff)
++      operands[#operands] = format("%s, %d", last, disp)
++    elseif p == "Y" then	-- si16
++      x = band(rshift(op, 10), 65535)
++    elseif p == "Z" then	-- si20
++      x = band(rshift(op, 10), 1048575)
++    elseif p == "C" then	-- code
++      x = band(rshift(op, 0), 32767)
++    elseif p == "O" then	-- offs[15:0]
++      x = band(rshift(op, 10), 65535)
++    elseif p == "L" then	-- offs[15:0] + offs[20:16]
++      x = lshift(band(op, 31), 16) + band(rshift(op, 10), 65535)
++    elseif p == "P" then	-- offs[15:0] + offs[25:16]
++      x = lshift(band(op, 1023), 16) + band(rshift(op, 10), 65535)
++    elseif p == "a" then
++      x = band(rshift(op, 10), 65535)
++    else
++      assert(false)
++    end
++    if x then operands[#operands+1] = x; last = x end
++  end
++
++  return putop(ctx, name, operands)
++end
++
++------------------------------------------------------------------------------
++
++-- Disassemble a block of code.
++local function disass_block(ctx, ofs, len)
++  if not ofs then ofs = 0 end
++  local stop = len and ofs+len or #ctx.code
++  stop = stop - stop % 4
++  ctx.pos = ofs - ofs % 4
++  ctx.rel = nil
++  while ctx.pos < stop do disass_ins(ctx) end
++end
++
++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
++local function create(code, addr, out)
++  local ctx = {}
++  ctx.code = code
++  ctx.addr = addr or 0
++  ctx.out = out or io.write
++  ctx.symtab = {}
++  ctx.disass = disass_block
++  ctx.hexdump = 8
++  ctx.get = get_le
++  ctx.map_pri = map_init
++  return ctx
++end
++
++-- Simple API: disassemble code (a string) at address and output via out.
++local function disass(code, addr, out)
++  create(code, addr, out):disass()
++end
++
++-- Return register name for RID.
++local function regname(r)
++  if r < 32 then return map_gpr[r] end
++  return "f"..(r-32)
++end
++
++-- Public module functions.
++return {
++  create = create,
++  disass = disass,
++  regname = regname
++}
++
+diff --git a/libs/luajit/LuaJIT-src/src/lib_jit.c b/libs/luajit/LuaJIT-src/src/lib_jit.c
+index 22ca0a1..09be9eb 100644
+--- a/libs/luajit/LuaJIT-src/src/lib_jit.c
++++ b/libs/luajit/LuaJIT-src/src/lib_jit.c
+@@ -732,6 +732,10 @@ static uint32_t jit_cpudetect(lua_State *L)
+   }
+ #endif
+ #endif
++
++#elif LJ_TARGET_LOONGARCH64
++  flags |= JIT_F_GS464V;
++
+ #else
+ #error "Missing CPU detection for this architecture"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_arch.h b/libs/luajit/LuaJIT-src/src/lj_arch.h
+index e8ad844..6bdaaaf 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_arch.h
++++ b/libs/luajit/LuaJIT-src/src/lj_arch.h
+@@ -29,6 +29,8 @@
+ #define LUAJIT_ARCH_mips32	6
+ #define LUAJIT_ARCH_MIPS64	7
+ #define LUAJIT_ARCH_mips64	7
++#define LUAJIT_ARCH_LOONGARCH64	9
++#define LUAJIT_ARCH_loongarch64	9
+ 
+ /* Target OS. */
+ #define LUAJIT_OS_OTHER		0
+@@ -55,6 +57,8 @@
+ #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS64
+ #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
+ #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS32
++#elif defined(__loongarch64__) || defined(__loongarch64) || defined(__LOONGARCH64__) || defined(__LOONGARCH64)
++#define LUAJIT_TARGET	LUAJIT_ARCH_LOONGARCH64
+ #else
+ #error "No support for this architecture (yet)"
+ #endif
+@@ -358,6 +362,40 @@
+ #define LJ_ARCH_VERSION		10
+ #endif
+ 
++#elif LUAJIT_TARGET == LUAJIT_ARCH_LOONGARCH64
++#define LJ_ARCH_NAME		"loongarch64"
++#define LJ_ARCH_BITS		64
++#define LJ_ARCH_ENDIAN		LUAJIT_LE
++#define LJ_TARGET_LOONGARCH64	1
++#define LJ_TARGET_GC64		1
++#define LJ_TARGET_EHRETREG	4
++#define LJ_TARGET_EHRAREG	30
++#define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
++#define LJ_TARGET_MASKSHIFT	1
++#define LJ_TARGET_MASKROT	1
++#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
++#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
++
++#if !defined(LJ_ARCH_HASFPU)
++#ifdef __loongarch_soft_float
++#define LJ_ARCH_HASFPU		0
++#else
++#define LJ_ARCH_HASFPU		1
++#endif
++#endif
++
++#if !defined(LJ_ABI_SOFTFP)
++#ifdef __loongarch_soft_float
++#define LJ_ABI_SOFTFP		1
++#else
++#define LJ_ABI_SOFTFP		0
++#endif
++#endif
++
++#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
++#define LJ_ARCH_NOJIT		1
++#endif
++
+ #else
+ #error "No target architecture defined"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_asm.c b/libs/luajit/LuaJIT-src/src/lj_asm.c
+index c2cf5a9..72932f8 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_asm.c
++++ b/libs/luajit/LuaJIT-src/src/lj_asm.c
+@@ -177,6 +177,8 @@ IRFLDEF(FLOFS)
+ #include "lj_emit_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_emit_mips.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_emit_loongarch64.h"
+ #else
+ #error "Missing instruction emitter for target CPU"
+ #endif
+@@ -1597,6 +1599,8 @@ static void asm_loop(ASMState *as)
+ #include "lj_asm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_asm_mips.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_asm_loongarch64.h"
+ #else
+ #error "Missing assembler for target CPU"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h
+new file mode 100644
+index 0000000..28847cb
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h
+@@ -0,0 +1,2272 @@
++/*
++** LoongArch IR assembler (SSA IR -> machine code).
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2021 Loongson Technology. All rights reserved.
++*/
++
++/* -- Register allocator extensions --------------------------------------- */
++
++/* Allocate a register with a hint. */
++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
++{
++  Reg r = IR(ref)->r;
++  if (ra_noreg(r)) {
++    if (!ra_hashint(r) && !iscrossref(as, ref))
++      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
++    r = ra_allocref(as, ref, allow);
++  }
++  ra_noweak(as, r);
++  return r;
++}
++
++/* Allocate two source registers for three-operand instructions. */
++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
++{
++  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
++  Reg left = irl->r, right = irr->r;
++  if (ra_hasreg(left)) {
++    ra_noweak(as, left);
++    if (ra_noreg(right))
++      right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
++    else
++      ra_noweak(as, right);
++  } else if (ra_hasreg(right)) {
++    ra_noweak(as, right);
++    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
++  } else if (ra_hashint(right)) {
++    right = ra_allocref(as, ir->op2, allow);
++    left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
++  } else {
++    left = ra_allocref(as, ir->op1, allow);
++    right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
++  }
++  return left | (right << 8);
++}
++
++/* -- Guard handling ------------------------------------------------------ */
++
++/* Setup exit stub after the end of each trace. */
++static void asm_exitstub_setup(ASMState *as)
++{
++  MCode *mxp = as->mctop;
++  if (as->mcp == mxp)
++    --as->mcp;
++  /* st.w TMP, sp, 0; li TMP, traceno; b ->vm_exit_handler;*/
++  *--mxp = LAI_JIRL | RID_R0 | LAF_J(RID_R20) | 0<<10;
++//  *--mxp = LAI_B | LAF_I((uintptr_t)(void *)lj_vm_exit_handler & 0xffff) | (((uintptr_t)(void *)lj_vm_exit_handler >> 16) & 0x3ff);
++  emit_dj32i(as, RID_TMP, RID_ZERO, as->T->traceno);
++  *--mxp = *as->mcp;
++  *--mxp = LAI_LU52I_D | RID_R20 | LAF_J(RID_R20) | ((((uintptr_t)(void *)lj_vm_exit_handler)>>52)&0xfff)<<10;
++  *--mxp = LAI_LU32I_D | RID_R20 | ((((uintptr_t)(void *)lj_vm_exit_handler)>>32)&0xfffff)<<5;
++  *--mxp = LAI_ORI | RID_R20| LAF_J(RID_R20) | (((uintptr_t)(void *)lj_vm_exit_handler)&0xfff) << 10;
++  *--mxp = LAI_LU12I_W | RID_R20 | ((((uintptr_t)(void *)lj_vm_exit_handler)&0xfffff000)>>12)<<5;
++  *--mxp = LAI_ST_W|LAF_D(RID_TMP)|LAF_J(RID_SP)|0;
++  as->mctop = mxp;
++}
++
++/* Keep this in-sync with exitstub_trace_addr(). */
++#define asm_exitstub_addr(as)	((as)->mctop)
++
++/* Emit conditional branch to exit for guard. */
++static void asm_guard(ASMState *as, LAIns lai, Reg rj, Reg rd)
++{
++  MCode *target = asm_exitstub_addr(as);
++  MCode *p = as->mcp;
++  if (LJ_UNLIKELY(p == as->invmcp)) {
++    as->invmcp = NULL;
++    as->loopinv = 1;
++    as->mcp = p;
++    lai = lai ^ ((lai>>28) == 4 ? 0x00000100u : 0x04000000u);  /* Invert cond. BEQ BNE BGE BLZ*/
++    target = p;  /* Patch target later in asm_loop_fixup. */
++  }
++  if (rj == RID_TMP) {
++    emit_branch(as, lai, RID_R20, rd, target);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++  /* move r18, r1*/
++    emit_djk(as, LAI_OR, RID_R20, rj, RID_ZERO);
++  } else {
++    emit_branch(as, lai, rj, rd, target);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++  }
++}
++
++static void asm_guard21(ASMState *as, LAIns lai, Reg rj, Reg rd)
++{
++  MCode *target = asm_exitstub_addr(as);
++  MCode *p = as->mcp;
++  if (LJ_UNLIKELY(p == as->invmcp)) {
++    as->invmcp = NULL;
++    as->loopinv = 1;
++    as->mcp = p;
++    lai = lai ^ ((lai>>28) == 4 ? 0x00000100u : 0x04000000u);  /* Invert cond. BCEQZ BCNEZ*/
++    target = p;  /* Patch target later in asm_loop_fixup. */
++  }
++  if (rj == RID_TMP) {
++    emit_branch21(as, lai, RID_R20, target);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++    /* move r18, r1*/
++    emit_djk(as, LAI_OR, RID_R20, rj, RID_ZERO);
++  } else {
++    emit_branch21(as, lai, rj, target);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++  }
++}
++
++/* -- Operand fusion ------------------------------------------------------ */
++
++/* Limit linear search to this distance. Avoids O(n^2) behavior. */
++#define CONFLICT_SEARCH_LIM	31
++
++/* Check if there's no conflicting instruction between curins and ref. */
++static int noconflict(ASMState *as, IRRef ref, IROp conflict)
++{
++  IRIns *ir = as->ir;
++  IRRef i = as->curins;
++  if (i > ref + CONFLICT_SEARCH_LIM)
++    return 0;  /* Give up, ref is too far away. */
++  while (--i > ref)
++    if (ir[i].o == conflict)
++      return 0;  /* Conflict found. */
++  return 1;  /* Ok, no conflict. */
++}
++
++/* Fuse the array base of colocated arrays. */
++static int32_t asm_fuseabase(ASMState *as, IRRef ref)
++{
++  IRIns *ir = IR(ref);
++  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
++      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
++    return (int32_t)sizeof(GCtab);
++  return 0;
++}
++
++/* Fuse array/hash/upvalue reference into register+offset operand. */
++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)	//TODO
++{
++  IRIns *ir = IR(ref);
++  if (ra_noreg(ir->r)) {
++    if (ir->o == IR_AREF) {
++      if (mayfuse(as, ref)) {
++	if (irref_isk(ir->op2)) {
++	  IRRef tab = IR(ir->op1)->op1;
++	  int32_t ofs = asm_fuseabase(as, tab);
++	  IRRef refa = ofs ? tab : ir->op1;
++	  ofs += 8*IR(ir->op2)->i;
++	  if (checki16(ofs)) {
++	    *ofsp = ofs;
++	    return ra_alloc1(as, refa, allow);
++	  }
++	}
++      }
++    } else if (ir->o == IR_HREFK) {
++      if (mayfuse(as, ref)) {
++	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
++	if (checki16(ofs)) {
++	  *ofsp = ofs;
++	  return ra_alloc1(as, ir->op1, allow);
++	}
++      }
++    } else if (ir->o == IR_UREFC) {
++      if (irref_isk(ir->op1)) {
++	GCfunc *fn = ir_kfunc(IR(ir->op1));
++	intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
++	intptr_t jgl = (intptr_t)J2G(as->J);
++	if ((uintptr_t)(ofs-jgl) < 65536) {
++	  *ofsp = ofs-jgl-32768;
++	  return RID_JGL;
++	} else {
++	  *ofsp = (int16_t)ofs;
++	  return ra_allock(as, ofs-(int16_t)ofs, allow);
++	}
++      }
++    }
++  }
++  *ofsp = 0;
++  return ra_alloc1(as, ref, allow);
++}
++
++/* Fuse XLOAD/XSTORE reference into load/store operand. */
++static void asm_fusexref(ASMState *as, LAIns lai, Reg rd, IRRef ref,	//TODO
++			 RegSet allow, int32_t ofs)
++{
++  IRIns *ir = IR(ref);
++  Reg base;
++  if (ra_noreg(ir->r) && canfuse(as, ir)) {
++    if (ir->o == IR_ADD) {
++      intptr_t ofs2;
++      if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
++				 checki16(ofs2))) {
++	ref = ir->op1;
++	ofs = (int32_t)ofs2;
++      }
++    } else if (ir->o == IR_STRREF) {
++      intptr_t ofs2 = 65536;
++      lua_assert(ofs == 0);
++      ofs = (int32_t)sizeof(GCstr);
++      if (irref_isk(ir->op2)) {
++	ofs2 = ofs + get_kval(as, ir->op2);
++	ref = ir->op1;
++      } else if (irref_isk(ir->op1)) {
++	ofs2 = ofs + get_kval(as, ir->op1);
++	ref = ir->op2;
++      }
++      if (!checki16(ofs2)) {
++	/* NYI: Fuse ADD with constant. */
++	Reg right, left = ra_alloc2(as, ir, allow);
++	right = (left >> 8); left &= 255;
++	emit_dji(as, lai, rd, RID_TMP, ofs&0xfff);
++	emit_djk(as, LAI_ADD_D, RID_TMP, left, right);
++	return;
++      }
++      ofs = ofs2;
++    }
++  }
++  base = ra_alloc1(as, ref, allow);
++  emit_dji(as, lai, rd, base, ofs&0xfff);
++}
++
++/* -- Calls --------------------------------------------------------------- */
++
++/* Generate a call to a C function. */
++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
++{
++  uint32_t n, nargs = CCI_XNARGS(ci);
++  int32_t ofs = 0;
++#if LJ_SOFTFP
++  Reg gpr = REGARG_FIRSTGPR;
++#else
++  Reg gpr, fpr = REGARG_FIRSTFPR;
++#endif
++  if ((void *)ci->func)
++    emit_call(as, (void *)ci->func);		//TODO
++#if !LJ_SOFTFP
++  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
++    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
++  gpr = REGARG_FIRSTGPR;
++#endif
++  for (n = 0; n < nargs; n++) {  /* Setup args. */
++    IRRef ref = args[n];
++    if (ref) {
++      IRIns *ir = IR(ref);
++#if !LJ_SOFTFP
++      if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
++	  !(ci->flags & CCI_VARARG)) {
++	lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
++	ra_leftov(as, fpr, ref);
++	fpr += 1;
++      } else
++#endif
++      {
++	if (gpr <= REGARG_LASTGPR) {
++	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
++#if !LJ_SOFTFP
++	  if (irt_isfp(ir->t)) {
++	    RegSet of = as->freeset;
++	    Reg r;
++	    /* Workaround to protect argument GPRs from being used for remat. */
++	    as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
++	    r = ra_alloc1(as, ref, RSET_FPR);
++	    as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
++	    if (irt_isnum(ir->t)) {
++	      emit_dj(as, LAI_MOVFR2GR_D, gpr, r);
++	      gpr++;
++	    } else if (irt_isfloat(ir->t)) {
++	      emit_dj(as, LAI_MOVFR2GR_S, gpr, r);
++	      gpr++;
++	    }
++	  } else
++#endif
++	  {
++	    ra_leftov(as, gpr, ref);
++	    gpr++;
++	  }
++	} else {
++	  Reg r = ra_alloc1(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
++	  emit_spstore(as, ir, r, ofs);
++	  ofs += 8;
++	}
++      }
++    } else {
++#if !LJ_SOFTFP
++      fpr = REGARG_LASTFPR+1;
++#endif
++      if (gpr <= REGARG_LASTGPR) {
++	gpr++;
++      } else {
++	ofs += 8;
++      }
++    }
++    checkmclim(as);
++  }
++}
++
++/* Setup result reg/sp for call. Evict scratch regs. */
++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++  RegSet drop = RSET_SCRATCH;
++#if !LJ_SOFTFP
++  if ((ci->flags & CCI_NOFPRCLOBBER))
++    drop &= ~RSET_FPR;
++#endif
++  if (ra_hasreg(ir->r))
++    rset_clear(drop, ir->r);  /* Dest reg handled below. */
++  ra_evictset(as, drop);  /* Evictions must be performed first. */
++  if (ra_used(ir)) {
++    lua_assert(!irt_ispri(ir->t));
++    if (!LJ_SOFTFP && irt_isfp(ir->t)) {
++      if ((ci->flags & CCI_CASTU64)) {
++	int32_t ofs = sps_scale(ir->s);
++	Reg dest = ir->r;
++	if (ra_hasreg(dest)) {
++	  ra_free(as, dest);
++	  ra_modified(as, dest);
++	  emit_dj(as, LAI_MOVGR2FR_D, dest, RID_RET);
++	}
++	if (ofs) {
++	//emit_dji(as, LAI_ST_D, RID_RET, RID_SP, ofs);		//TODO ofs&0xfff?
++	  emit_djk(as, LAI_STX_D, RID_RET, RID_SP, RID_R19);
++	  emit_d16i(as, RID_R19, ofs);
++	}
++      } else {
++	ra_destreg(as, ir, RID_FPRET);
++      }
++    } else {
++      ra_destreg(as, ir, RID_RET);
++    }
++  }
++}
++
++static void asm_callx(ASMState *as, IRIns *ir)
++{
++  IRRef args[CCI_NARGS_MAX*2];
++  CCallInfo ci;
++  IRRef func;
++  IRIns *irf;
++  ci.flags = asm_callx_flags(as, ir);
++  asm_collectargs(as, ir, &ci, args);
++  asm_setupresult(as, ir, &ci);
++  func = ir->op2; irf = IR(func);
++  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
++  if (irref_isk(func)) {  /* Call to constant address. */
++    ci.func = (ASMFunction)(void *)get_kval(as, func);
++  } else {  /* Need specific register for indirect calls. */
++    Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
++    MCode *p = as->mcp;
++    *--p = LAI_JIRL | LAF_D(RID_RA) | LAF_J(r);
++    *--p = LAI_MOVE | LAF_D(RID_CFUNCADDR) | LAF_J(r);
++    //*--p = LAI_JIRL | LAF_D(RID_RA) | LAF_J(r);
++    as->mcp = p;
++    ci.func = (ASMFunction)(void *)0;
++  }
++  asm_gencall(as, &ci, args);
++}
++
++#if !LJ_SOFTFP
++static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
++{
++  /* The modified regs must match with the *.dasc implementation. */
++  RegSet drop = RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
++		RID2RSET(RID_F23)|RID2RSET(RID_F10)|RID2RSET(REGARG_FIRSTFPR)
++		|RID2RSET(RID_F19);
++  if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
++  ra_evictset(as, drop);
++  ra_destreg(as, ir, RID_FPRET);
++  emit_call(as, (void *)lj_ir_callinfo[id].func);
++  ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
++}
++#endif
++
++/* -- Returns ------------------------------------------------------------- */
++
++/* Return to lower frame. Guard that it goes to the right spot. */
++static void asm_retf(ASMState *as, IRIns *ir)
++{
++  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
++  void *pc = ir_kptr(IR(ir->op2));
++  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
++  as->topslot -= (BCReg)delta;
++  if ((int32_t)as->topslot < 0) as->topslot = 0;
++  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
++  emit_setgl(as, base, jit_base);
++  emit_addptr(as, base, -8*delta);
++  asm_guard(as, LAI_BNE, RID_TMP,
++	    ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
++  emit_dji(as, LAI_LD_D, RID_TMP, base, -8&0xfff);
++}
++
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++  IRIns irgc;
++  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
++  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++  if ((as->flags & JIT_F_GS464V)) {
++      emit_djml(as, LJ_64? LAI_BSTRINS_D : LAI_BSTRINS_W, RID_TMP, tmp, lj_fls(SBUF_MASK_FLAG), 0);
++  } else {
++    emit_djk(as, LAI_OR, RID_TMP, RID_TMP, tmp);
++    emit_dji(as, LAI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
++  }
++  emit_getgl(as, RID_TMP, cur_L);
++  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
++/* -- Type conversions ---------------------------------------------------- */
++
++#if !LJ_SOFTFP
++static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
++{
++  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  //asm_guard21(as, LAI_BCEQZ, tmp&7, (tmp&7));
++  asm_guard21(as, LAI_BCEQZ, 0, (tmp&7));
++  //emit_djk(as, LAI_FCMP_CEQ_D, tmp&7, tmp, left);
++  emit_djk(as, LAI_FCMP_CEQ_D, 0, tmp, left);
++  emit_dj(as, LAI_FFINT_D_W, tmp, tmp);
++  emit_dj(as, LAI_MOVFR2GR_S, dest, tmp);
++  emit_dj(as, LAI_FTINT_W_D, tmp, left);
++}
++
++static void asm_tobit(ASMState *as, IRIns *ir)
++{
++  RegSet allow = RSET_FPR;
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg left = ra_alloc1(as, ir->op1, allow);
++  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
++  Reg tmp = ra_scratch(as, rset_clear(allow, right));
++  emit_dj(as, LAI_MOVFR2GR_S, dest, tmp);
++  emit_djk(as, LAI_FADD_D, tmp, left, right);
++}
++#elif LJ_64  /* && LJ_SOFTFP */
++static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
++{
++  /* The modified regs must match with the *.dasc implementation. */
++  RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
++		RID2RSET(RID_R12);		// r1 -> r19, r12 -> r12
++  if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
++  ra_evictset(as, drop);
++  /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
++  ra_destreg(as, ir, RID_RET);
++  asm_guard(as, LAI_BNE, RID_RET+1, RID_ZERO);
++  emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func);
++  if (r == RID_NONE)
++    ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
++  else if (r != REGARG_FIRSTGPR)
++    emit_move(as, REGARG_FIRSTGPR, r);
++}
++
++static void asm_tobit(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  emit_dju(as, LAI_SLLI_W, dest, dest, 0);
++  asm_callid(as, ir, IRCALL_lj_vm_tobit);
++}
++#endif
++
++static void asm_conv(ASMState *as, IRIns *ir)
++{
++  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
++  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
++  int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
++  IRRef lref = ir->op1;
++  lua_assert(irt_type(ir->t) != st);
++#if !LJ_SOFTFP
++  if (irt_isfp(ir->t)) {
++    Reg dest = ra_dest(as, ir, RSET_FPR);
++    if (stfp) {  /* FP to FP conversion. */
++      emit_dj(as, st == IRT_NUM ? LAI_FCVT_S_D : LAI_FCVT_D_S,
++	      dest, ra_alloc1(as, lref, RSET_FPR));
++    } else if (st == IRT_U32) {  /* U32 to FP conversion. */
++      /* y = (x ^ 0x8000000) + 2147483648.0 */
++      Reg left = ra_alloc1(as, lref, RSET_GPR);
++      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
++      if (irt_isfloat(ir->t))
++	emit_dj(as, LAI_FCVT_S_D, dest, dest);
++      /* Must perform arithmetic with doubles to keep the precision. */
++      emit_djk(as, LAI_FADD_D, dest, dest, tmp);
++      emit_dj(as, LAI_FFINT_D_W, dest, dest);
++      emit_lsptr(as, LAI_FLD_D, (tmp & 31),	//TODO emit_lsptr
++		 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
++      emit_dj(as, LAI_MOVGR2FR_W, RID_TMP, dest);
++      emit_djk(as, LAI_XOR, RID_TMP, RID_TMP, left);
++      emit_dji(as, LAI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
++#if LJ_64
++    } else if(st == IRT_U64) {  /* U64 to FP conversion. */
++      /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
++      Reg left = ra_alloc1(as, lref, RSET_GPR);
++      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
++      MCLabel l_end = emit_label(as);
++      if (irt_isfloat(ir->t)) {
++	emit_djk(as, LAI_FADD_S, dest, dest, tmp);
++	emit_lsptr(as, LAI_FLD_S, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
++		   rset_exclude(RSET_GPR, left));
++	emit_branch(as, LAI_BGE, left, RID_ZERO, l_end);
++	emit_dj(as, LAI_FFINT_S_L, dest, dest);
++      } else {
++	emit_djk(as, LAI_FADD_D, dest, dest, tmp);
++	emit_lsptr(as, LAI_FLD_D, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
++		   rset_exclude(RSET_GPR, left));
++	emit_branch(as, LAI_BGE, left, RID_ZERO, l_end);
++	emit_dj(as, LAI_FFINT_D_L, dest, dest);
++      }
++      //emit_branch(as, LAI_BGE, left, RID_ZERO, l_end);	//TODO
++      emit_dj(as, LAI_MOVGR2FR_D, RID_TMP, dest);
++      emit_djml(as, LAI_BSTRPICK_D, RID_TMP, left, 62, 0);
++#endif
++    } else {  /* Integer to FP conversion. */
++      Reg left = ra_alloc1(as, lref, RSET_GPR);
++      LAIns lai = irt_isfloat(ir->t) ?
++	(st64 ? LAI_FFINT_S_L : LAI_FFINT_S_W) :
++	(st64 ? LAI_FFINT_D_L : LAI_FFINT_D_W);
++      emit_dj(as, lai, dest, dest);
++      emit_dj(as, st64 ? LAI_MOVGR2FR_D : LAI_MOVGR2FR_W, dest, left);
++    }
++  } else if (stfp) {  /* FP to integer conversion. */
++    if (irt_isguard(ir->t)) {
++      /* Checked conversions are only supported from number to int. */
++      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
++    } else {
++      Reg dest = ra_dest(as, ir, RSET_GPR);
++      Reg left = ra_alloc1(as, lref, RSET_FPR);
++      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
++      if (irt_isu32(ir->t)) {  /* FP to U32 conversion. */
++	/* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
++	emit_djk(as, LAI_XOR, dest, dest, RID_TMP);
++	emit_dji(as, LAI_ADDU16I_D, RID_TMP, RID_R0, 0x8000);
++	emit_dj(as, LAI_MOVFR2GR_S, dest, tmp);
++	emit_dj(as, st == IRT_FLOAT ? LAI_FTINTRM_W_S : LAI_FTINTRM_W_D,
++		tmp, tmp);
++	emit_djk(as, st == IRT_FLOAT ? LAI_FSUB_S : LAI_FSUB_D,
++		 tmp, left, tmp);
++	if (st == IRT_FLOAT)
++	  emit_lsptr(as, LAI_FLD_S, (tmp & 31),
++		     (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
++	else
++	  emit_lsptr(as, LAI_FLD_D, (tmp & 31),
++		     (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
++#if LJ_64
++      } else if (irt_isu64(ir->t)) {  /* FP to U64 conversion. */
++	MCLabel l_end;
++	emit_dj(as, LAI_MOVFR2GR_D, dest, tmp);
++	l_end = emit_label(as);
++	/* For inputs >= 2^63 add -2^64 and convert again. */
++	if (st == IRT_NUM) {
++	  emit_dj(as, LAI_FTINTRZ_L_D, tmp, tmp);
++	  emit_djk(as, LAI_FADD_D, tmp, left, tmp);
++	  emit_lsptr(as, LAI_FLD_D, (tmp & 31),
++		     (void *)&as->J->k64[LJ_K64_M2P64],
++		     rset_exclude(RSET_GPR, dest));
++	  //emit_dj(as, LAI_FTINTRZ_L_D, tmp, left);  /* Delay slot. */ //TODO
++	  //emit_branch21(as, LAI_BCNEZ, (left&7), l_end);
++	  emit_branch21(as, LAI_BCNEZ, 0, l_end);
++	  emit_dj(as, LAI_FTINTRZ_L_D, tmp, left);
++	  //emit_djk(as, LAI_FCMP_CLT_D, left&7, left, tmp);	// TODO
++	  emit_djk(as, LAI_FCMP_CLT_D, 0, left, tmp);
++	  emit_lsptr(as, LAI_FLD_D, (tmp & 31),
++		     (void *)&as->J->k64[LJ_K64_2P63],
++		     rset_exclude(RSET_GPR, dest));
++	} else {
++	  emit_dj(as, LAI_FTINTRZ_L_S, tmp, tmp);
++	  emit_djk(as, LAI_FADD_S, tmp, left, tmp);
++	  emit_lsptr(as, LAI_FLD_S, (tmp & 31),
++		     (void *)&as->J->k32[LJ_K32_M2P64],
++		     rset_exclude(RSET_GPR, dest));
++	  //emit_dj(as, LAI_FTINTRZ_L_S, tmp, left);  /* Delay slot. */ //TODO
++	  //emit_branch21(as, LAI_BCNEZ, (left&7), l_end);
++	  emit_branch21(as, LAI_BCNEZ, 0, l_end);
++	  emit_dj(as, LAI_FTINTRZ_L_S, tmp, left);
++	  //emit_djk(as, LAI_FCMP_CLT_S, left&7, left, tmp);	// TODO
++	  emit_djk(as, LAI_FCMP_CLT_S, 0, left, tmp);
++	  emit_lsptr(as, LAI_FLD_S, (tmp & 31),
++		     (void *)&as->J->k32[LJ_K32_2P63],
++		     rset_exclude(RSET_GPR, dest));
++	}
++#endif
++      } else {
++	LAIns lai = irt_is64(ir->t) ?
++	  (st == IRT_NUM ? LAI_FTINTRZ_L_D : LAI_FTINTRZ_L_S) :
++	  (st == IRT_NUM ? LAI_FTINTRZ_W_D : LAI_FTINTRZ_W_S);
++	emit_dj(as, irt_is64(ir->t) ? LAI_MOVFR2GR_D : LAI_MOVFR2GR_S, dest, left);
++	emit_dj(as, lai, left, left);
++      }
++    }
++  } else
++#else
++  if (irt_isfp(ir->t)) {
++#if LJ_64 && LJ_HASFFI
++    if (stfp) {  /* FP to FP conversion. */
++      asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
++					    IRCALL_softfp_d2f);
++    } else {  /* Integer to FP conversion. */
++      IRCallID cid = ((IRT_IS64 >> st) & 1) ?
++	(irt_isnum(ir->t) ?
++	 (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
++	 (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
++	(irt_isnum(ir->t) ?
++	 (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
++	 (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
++      asm_callid(as, ir, cid);
++    }
++#else
++    asm_callid(as, ir, IRCALL_softfp_i2d);
++#endif
++  } else if (stfp) {  /* FP to integer conversion. */
++    if (irt_isguard(ir->t)) {
++      /* Checked conversions are only supported from number to int. */
++      lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++      asm_tointg(as, ir, RID_NONE);
++    } else {
++      IRCallID cid = irt_is64(ir->t) ?
++	((st == IRT_NUM) ?
++	 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
++	 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
++	((st == IRT_NUM) ?
++	 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
++	 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
++      asm_callid(as, ir, cid);
++    }
++  } else
++#endif
++  {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
++      Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++      lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++      if ((ir->op2 & IRCONV_SEXT)) {
++	emit_dj(as, st == IRT_I8 ? LAI_EXT_W_B : LAI_EXT_W_H, dest, left);
++      } else {
++        if (st == IRT_U8) {
++          emit_dju(as, LAI_ANDI, dest, left, (int32_t)0xff);
++        } else {
++          emit_djk(as, LAI_AND, dest, left, RID_R20);
++          //emit_dj32i(as, RID_R20, RID_R0, 0xffff);
++          emit_djml(as, LAI_BSTRPICK_D, RID_R20, RID_R20, 15, 0);		// zero-extend
++          emit_d16i(as, RID_R20, 0xffff);
++        }
++      }
++    } else {  /* 32/64 bit integer conversions. */
++      if (irt_is64(ir->t)) {
++	if (st64) {
++	  /* 64/64 bit no-op (cast)*/
++	  ra_leftov(as, dest, lref);
++	} else {
++	  Reg left = ra_alloc1(as, lref, RSET_GPR);
++	  if ((ir->op2 & IRCONV_SEXT)) {  /* 32 to 64 bit sign extension. */
++	    emit_dju(as, LAI_SLLI_W, dest, left, 0);
++	  } else {  /* 32 to 64 bit zero extension. */
++	    emit_djml(as, LAI_BSTRPICK_D, dest, left, 31, 0);
++	  }
++	}
++      } else {
++	if (st64) {
++	  /* This is either a 32 bit reg/reg mov which zeroes the hiword
++	  ** or a load of the loword from a 64 bit address.
++	  */
++	  Reg left = ra_alloc1(as, lref, RSET_GPR);
++	  emit_djml(as, LAI_BSTRPICK_D, dest, left, 31, 0);
++	} else {  /* 32/32 bit no-op (cast). */
++	  /* Do nothing, but may need to move regs. */
++	  ra_leftov(as, dest, lref);
++	}
++      }
++    }
++  }
++}
++
++static void asm_strto(ASMState *as, IRIns *ir)
++{
++  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
++  IRRef args[2];
++  int32_t ofs = 0;
++  RegSet drop = RSET_SCRATCH;
++  if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
++  ra_evictset(as, drop);
++  ofs = sps_scale(ir->s);
++  asm_guard(as, LAI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
++  args[0] = ir->op1;      /* GCstr *str */
++  args[1] = ASMREF_TMP1;  /* TValue *n  */
++  asm_gencall(as, ci, args);
++  /* Store the result to the spill slot or temp slots. */
++  //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1),
++	//   RID_SP, ofs&0xfff);
++  emit_djk(as, LAI_ADD_D, ra_releasetmp(as, ASMREF_TMP1), RID_SP, RID_R19);
++  emit_d16i(as, RID_R19, ofs);
++}
++
++/* -- Memory references --------------------------------------------------- */
++
++#if LJ_64
++/* Store tagged value for ref at base+ofs. */
++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
++{
++  RegSet allow = rset_exclude(RSET_GPR, base);
++  IRIns *ir = IR(ref);
++  lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
++  if (irref_isk(ref)) {
++    TValue k;
++    lj_ir_kvalue(as->J->L, &k, ir);
++    //emit_dji(as, LAI_ST_D, ra_allock(as, (int64_t)k.u64, allow), base, ofs&0xfff);
++    emit_djk(as, LAI_STX_D, ra_allock(as, (int64_t)k.u64, allow), base, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++  } else {
++    Reg src = ra_alloc1(as, ref, allow);
++    Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
++			 rset_exclude(allow, src));
++    //emit_dji(as, LAI_ST_D, RID_TMP, base, ofs&0xfff);
++    emit_djk(as, LAI_STX_D, RID_TMP, base, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++    if (irt_isinteger(ir->t)) {
++      emit_djk(as, LAI_ADD_D, RID_TMP, RID_TMP, type);
++      emit_djml(as, LAI_BSTRPICK_D, RID_TMP, src, 31, 0);
++    } else {
++      emit_djk(as, LAI_ADD_D, RID_TMP, src, type);
++    }
++  }
++}
++#endif
++
++/* Get pointer to TValue. */
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++{
++  IRIns *ir = IR(ref);
++  if (irt_isnum(ir->t)) {
++    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
++      ra_allockreg(as, igcptr(ir_knum(ir)), dest);
++    else {  /* Otherwise force a spill and use the spill slot. */
++      emit_djk(as, LAI_ADD_D, dest, RID_SP, RID_R19);
++      emit_d16i(as, RID_R19, ra_spill(as, ir));
++    }
++  } else {
++    /* Otherwise use g->tmptv to hold the TValue. */
++    asm_tvstore64(as, dest, 0, ref);
++    emit_djk(as, LAI_ADD_D, dest, RID_JGL, RID_R19);
++    emit_d16i(as, RID_R19, (int32_t)(offsetof(global_State, tmptv)-32768));
++  }
++}
++
++static void asm_aref(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg idx, base;
++  if (irref_isk(ir->op2)) {
++    IRRef tab = IR(ir->op1)->op1;
++    int32_t ofs = asm_fuseabase(as, tab);
++    IRRef refa = ofs ? tab : ir->op1;
++    ofs += 8*IR(ir->op2)->i;
++    if (checki16(ofs)) {
++      base = ra_alloc1(as, refa, RSET_GPR);
++      //emit_dj32i(as, dest, base, ofs);		//TODO
++      emit_djk(as, LAI_ADD_D, dest, base, RID_R19);
++      emit_d16i(as, RID_R19, ofs);
++      return;
++    }
++  }
++  base = ra_alloc1(as, ir->op1, RSET_GPR);
++  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
++  // emit_djka(as, LAI_ALSL_D, dest, idx, base, 2);
++  emit_djk(as, LAI_ADD_D, dest, RID_TMP, base);
++  emit_dju(as, LAI_SLLI_D, RID_TMP, idx, 3);
++}
++
++/* Inlined hash lookup. Specialized for key type and for const keys.
++** The equivalent C code is:
++**   Node *n = hashkey(t, key);
++**   do {
++**     if (lj_obj_equal(&n->key, key)) return &n->val;
++**   } while ((n = nextnode(n)));
++**   return niltv(L);
++*/
++static void asm_href(ASMState *as, IRIns *ir, IROp merge)
++{
++  RegSet allow = RSET_GPR;
++  int destused = ra_used(ir);
++  Reg dest = ra_dest(as, ir, allow);
++  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
++  Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
++  Reg cmp64 = RID_NONE;
++  IRRef refkey = ir->op2;
++  IRIns *irkey = IR(refkey);
++  int isk = irref_isk(refkey);
++  IRType1 kt = irkey->t;
++  uint32_t khash;
++  MCLabel l_end, l_loop, l_next;
++
++  rset_clear(allow, tab);
++  if (!LJ_SOFTFP && irt_isnum(kt)) {
++    key = ra_alloc1(as, refkey, RSET_FPR);
++    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
++  } else if (!irt_ispri(kt)) {
++    key = ra_alloc1(as, refkey, allow);
++    rset_clear(allow, key);
++  }
++  tmp2 = ra_scratch(as, allow);
++  rset_clear(allow, tmp2);
++  if (LJ_SOFTFP || !irt_isnum(kt)) {
++    /* Allocate cmp64 register used for 64-bit comparisons */
++    if (LJ_SOFTFP && irt_isnum(kt)) {
++      cmp64 = key;
++    } else if (!isk && irt_isaddr(kt)) {
++      cmp64 = tmp2;
++    } else {
++      int64_t k;
++      if (isk && irt_isaddr(kt)) {
++	k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
++      } else {
++	lua_assert(irt_ispri(kt) && !irt_isnil(kt));
++	k = ~((int64_t)~irt_toitype(ir->t) << 47);
++      }
++      cmp64 = ra_allock(as, k, allow);
++      rset_clear(allow, cmp64);
++    }
++  }
++
++  /* Key not found in chain: jump to exit (if merged) or load niltv. */
++  l_end = emit_label(as);
++  as->invmcp = NULL;
++  if (merge == IR_NE)
++    asm_guard(as, LAI_BEQ, RID_ZERO, RID_ZERO);
++  else if (destused)
++    emit_loada(as, dest, niltvg(J2G(as->J)));
++  /* Follow hash chain until the end. */
++  l_loop = --as->mcp;
++  emit_move(as, dest, tmp1);
++  //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, next))&0xfff);	//TODO si12
++  emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19);
++  emit_d16i(as, RID_R19, (int32_t)offsetof(Node, next));
++  l_next = emit_label(as);
++
++  /* Type and value comparison. */
++  if (merge == IR_EQ) {  /* Must match asm_guard(). */
++    //emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);		//TODO
++    l_end = asm_exitstub_addr(as);
++  }
++  if (!LJ_SOFTFP && irt_isnum(kt)) {
++    //emit_branch21(as, LAI_BCNEZ, (tmpnum&7), l_end);
++    emit_branch21(as, LAI_BCNEZ, 0, l_end);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++    //emit_djk(as, LAI_FCMP_CEQ_D, tmpnum&7, tmpnum, key);	// TODO
++    emit_djk(as, LAI_FCMP_CEQ_D, 0, tmpnum, key);
++    *--as->mcp = LAI_NOP;  /* Avoid NaN comparison overhead. */
++    emit_branch(as, LAI_BEQ, tmp1, RID_ZERO, l_next);
++    //emit_dji(as, LAI_SLTUI, tmp1, tmp1, ((int32_t)LJ_TISNUM)&0xfff);
++    emit_djk(as, LAI_SLTU, tmp1, tmp1, RID_R19);
++    emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM);
++    emit_dju(as, LAI_SRAI_D, tmp1, tmp1, 47);
++    emit_dj(as, LAI_MOVGR2FR_D, tmpnum, tmp1);
++    //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, key.u64))&0xfff);
++    emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19);
++  } else {
++    emit_branch(as, LAI_BEQ, RID_R20, cmp64, l_end);
++    emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno);
++    emit_djk(as, LAI_OR, RID_R20, RID_R0, tmp1);
++    //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, key.u64))&0xfff);
++    emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19);
++  }
++  emit_d16i(as, RID_R19, (int32_t)offsetof(Node, key.u64));
++  // *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
++  *l_loop = LAI_BNE | LAF_J(tmp1) | LAF_D(RID_ZERO) | LAF_I(((as->mcp-l_loop) & 0xffffu));
++  if (!isk && irt_isaddr(kt)) {
++    type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
++    emit_djk(as, LAI_ADD_D, tmp2, key, type);
++    rset_clear(allow, type);
++  }
++
++  /* Load main position relative to tab->node into dest. */
++  khash = isk ? ir_khash(irkey) : 1;
++  if (khash == 0) {
++    //emit_dji(as, LAI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff);
++    emit_djk(as, LAI_LDX_D, dest, tab, RID_R19);
++    emit_d16i(as, RID_R19, (int32_t)offsetof(GCtab, node));
++  } else {
++    Reg tmphash = tmp1;
++    if (isk)
++      tmphash = ra_allock(as, khash, allow);
++    emit_djk(as, LAI_ADD_D, dest, dest, tmp1);
++    lua_assert(sizeof(Node) == 24);
++    emit_djk(as, LAI_SUB_W, tmp1, tmp2, tmp1);
++    emit_dju(as, LAI_SLLI_W, tmp1, tmp1, 3);
++    emit_dju(as, LAI_SLLI_W, tmp2, tmp1, 5);
++    emit_djk(as, LAI_AND, tmp1, tmp2, tmphash);
++    emit_dji(as, LAI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff);
++    emit_dji(as, LAI_LD_W, tmp2, tab, ((int32_t)offsetof(GCtab, hmask))&0xfff);
++    if (isk) {
++      /* Nothing to do. */
++    } else if (irt_isstr(kt)) {
++      emit_dji(as, LAI_LD_W, tmp1, key, ((int32_t)offsetof(GCstr, hash))&0xfff);
++    } else {  /* Must match with hash*() in lj_tab.c. */
++      emit_djk(as, LAI_SUB_W, tmp1, tmp1, tmp2);
++      emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);		//TODO
++      emit_djk(as, LAI_XOR, tmp1, tmp1, tmp2);
++      emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);	//TODO
++      emit_djk(as, LAI_SUB_W, tmp2, tmp2, dest);
++      emit_djk(as, LAI_XOR, tmp2, tmp2, tmp1);
++      emit_dju(as, LAI_ROTRI_W, dest, tmp1, (-HASH_ROT1)&31);
++      if (irt_isnum(kt)) {
++	emit_djk(as, LAI_ADD_W, tmp1, tmp1, tmp1);
++	emit_dju(as, LAI_SRAI_D, tmp1, LJ_SOFTFP ? key : tmp1, 32);
++	emit_dju(as, LAI_SLLI_W, tmp2, LJ_SOFTFP ? key : tmp1, 0);
++#if !LJ_SOFTFP
++	emit_dj(as, LAI_MOVFR2GR_D, tmp1, key);
++#endif
++      } else {
++	checkmclim(as);
++	emit_dju(as, LAI_SRAI_D, tmp1, tmp1, 32);
++	emit_dju(as, LAI_SLLI_W, tmp2, key, 0);
++	emit_djk(as, LAI_ADD_D, tmp1, key, type);
++      }
++    }
++  }
++}
++
++static void asm_hrefk(ASMState *as, IRIns *ir)
++{
++  IRIns *kslot = IR(ir->op2);
++  IRIns *irkey = IR(kslot->op1);
++  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
++  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
++  Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;		//TODO
++  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
++  RegSet allow = rset_exclude(RSET_GPR, node);
++  Reg idx = node;
++  Reg key = ra_scratch(as, allow);
++  int64_t k;
++  lua_assert(ofs % sizeof(Node) == 0);
++  if (ofs > 32736) {		//TODO why 32736 ?
++    idx = dest;
++    rset_clear(allow, dest);
++    kofs = (int32_t)offsetof(Node, key);
++  } else if (ra_hasreg(dest)) {
++    // emit_dj32i(as, dest, node, ofs);
++    //emit_add(as, dest, node, ofs);
++    emit_djk(as, LAI_ADD_D, dest, node, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++  }
++  if (irt_ispri(irkey->t)) {
++    lua_assert(!irt_isnil(irkey->t));
++    k = ~((int64_t)~irt_toitype(irkey->t) << 47);
++  } else if (irt_isnum(irkey->t)) {
++    k = (int64_t)ir_knum(irkey)->u64;
++  } else {
++    k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
++  }
++  asm_guard(as, LAI_BNE, key, ra_allock(as, k, allow));
++  //emit_dji(as, LAI_LD_D, key, idx, kofs&0xfff);	//TODO si12
++  emit_djk(as, LAI_LDX_D, key, idx, RID_R19);
++  emit_d16i(as, RID_R19, kofs);
++  if (ofs > 32736)
++    emit_djk(as, LAI_ADD_D, dest, node, ra_allock(as, ofs, allow));
++}
++
++static void asm_uref(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  if (irref_isk(ir->op1)) {
++    GCfunc *fn = ir_kfunc(IR(ir->op1));
++    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
++    emit_lsptr(as, LAI_LD_D, dest, v, RSET_GPR);
++  } else {
++    Reg uv = ra_scratch(as, RSET_GPR);
++    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
++    if (ir->o == IR_UREFC) {
++      asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO);
++      emit_dji(as, LAI_ADDI_D, dest, uv, ((int32_t)offsetof(GCupval, tv))&0xfff);	//TODO si12
++      emit_dji(as, LAI_LD_BU, RID_TMP, uv, ((int32_t)offsetof(GCupval, closed))&0xfff);
++    } else {
++      emit_dji(as, LAI_LD_D, dest, uv, ((int32_t)offsetof(GCupval, v))&0xfff);
++    }
++    //emit_dji(as, LAI_LD_D, uv, func, ((int32_t)offsetof(GCfuncL, uvptr) +
++	  //   (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8))&0xfff);
++    emit_djk(as, LAI_LDX_D, uv, func, RID_R19);
++    emit_d16i(as, RID_R19, (int32_t)offsetof(GCfuncL, uvptr) +
++      (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
++  }
++}
++
++static void asm_fref(ASMState *as, IRIns *ir)
++{
++  UNUSED(as); UNUSED(ir);
++  lua_assert(!ra_used(ir));
++}
++
++static void asm_strref(ASMState *as, IRIns *ir)
++{
++  RegSet allow = RSET_GPR;
++  Reg dest = ra_dest(as, ir, allow);
++  Reg base = ra_alloc1(as, ir->op1, allow);
++  IRIns *irr = IR(ir->op2);
++  int32_t ofs = sizeof(GCstr);
++  rset_clear(allow, base);
++  if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {		//TODO checki16
++    // emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
++    //emit_dj32i(as, dest, base, ofs + irr->i);
++    emit_djk(as, LAI_ADD_D, dest, base, RID_R19);
++    emit_d16i(as, RID_R19, (ofs + irr->i));
++  } else {
++    // emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
++    //emit_dj32i(as, dest, dest, ofs);				//TODO
++    emit_djk(as, LAI_ADD_D, dest, dest, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++    emit_djk(as, LAI_ADD_D, dest, base, ra_alloc1(as, ir->op2, allow));
++  }
++}
++
++/* -- Loads and stores ---------------------------------------------------- */
++
++static LAIns asm_fxloadins(ASMState *as, IRIns *ir)
++{
++  UNUSED(as);
++  switch (irt_type(ir->t)) {
++  case IRT_I8: return LAI_LD_B;
++  case IRT_U8: return LAI_LD_BU;
++  case IRT_I16: return LAI_LD_H;
++  case IRT_U16: return LAI_LD_HU;
++  case IRT_NUM:
++    lua_assert(!LJ_SOFTFP32);
++    if (!LJ_SOFTFP) return LAI_FLD_D;
++  /* fallthrough */
++  case IRT_FLOAT: if (!LJ_SOFTFP) return LAI_FLD_S;
++  /* fallthrough */
++  default: return (LJ_64 && irt_is64(ir->t)) ? LAI_LD_D : LAI_LD_W;
++  }
++}
++
++static LAIns asm_fxstoreins(ASMState *as, IRIns *ir)
++{
++  UNUSED(as);
++  switch (irt_type(ir->t)) {
++  case IRT_I8: case IRT_U8: return LAI_ST_B;
++  case IRT_I16: case IRT_U16: return LAI_ST_H;
++  case IRT_NUM:
++    lua_assert(!LJ_SOFTFP32);
++    if (!LJ_SOFTFP) return LAI_FST_D;
++  /* fallthrough */
++  case IRT_FLOAT: if (!LJ_SOFTFP) return LAI_FST_S;
++  /* fallthrough */
++  default: return (LJ_64 && irt_is64(ir->t)) ? LAI_ST_D : LAI_ST_W;
++  }
++}
++
++static void asm_fload(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  LAIns lai = asm_fxloadins(as, ir);
++  Reg idx;
++  int32_t ofs;
++  if (ir->op1 == REF_NIL) {  /* FLOAD from GG_State with offset. */
++    idx = RID_JGL;
++    ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);	//TODO
++  } else {
++    idx = ra_alloc1(as, ir->op1, RSET_GPR);
++    if (ir->op2 == IRFL_TAB_ARRAY) {
++      ofs = asm_fuseabase(as, ir->op1);
++      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
++	//emit_dji(as, LAI_ADDI_D, dest, idx, ofs&0xfff);
++	emit_djk(as, LAI_ADD_D, dest, idx, RID_R19);
++	emit_d16i(as, RID_R19, ofs);
++	return;
++      }
++    }
++    ofs = field_ofs[ir->op2];
++  }
++  lua_assert(!irt_isfp(ir->t));
++  // emit_dji(as, lai, dest, idx, ofs&0xfff);
++  /* li r17, ofs; ldx.d/w dest, idx, r17 */
++  switch (lai) {
++  case LAI_LD_B:
++    lai = LAI_LDX_B;
++    break;
++  case LAI_LD_BU:
++    lai = LAI_LDX_BU;
++    break;
++  case LAI_LD_H:
++    lai = LAI_LDX_H;
++    break;
++  case LAI_LD_HU:
++    lai = LAI_LDX_HU;
++    break;
++  case LAI_LD_D:
++    lai = LAI_LDX_D;
++    break;
++  case LAI_LD_W:
++    lai = LAI_LDX_W;
++    break;
++  case LAI_FLD_D:
++    lai = LAI_FLDX_D;
++    break;
++  case LAI_FLD_S:
++    lai = LAI_FLDX_S;
++    break;
++  default:
++    break;
++  }
++  emit_djk(as, lai, dest, idx, RID_R19);
++  //emit_loadi(as, RID_R19, ofs);
++  emit_d16i(as, RID_R19, ofs);
++}
++
++static void asm_fstore(ASMState *as, IRIns *ir)
++{
++  if (ir->r != RID_SINK) {
++    Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
++    IRIns *irf = IR(ir->op1);
++    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
++    int32_t ofs = field_ofs[irf->op2];
++    LAIns lai = asm_fxstoreins(as, ir);
++    lua_assert(!irt_isfp(ir->t));
++    emit_dji(as, lai, src, idx, ofs&0xfff);
++  }
++}
++
++static void asm_xload(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir,
++    (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++  lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED));
++  asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
++}
++
++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
++{
++  if (ir->r != RID_SINK) {
++    Reg src = ra_alloc1(as, ir->op2,
++      (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++    asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
++		 rset_exclude(RSET_GPR, src), ofs);
++  }
++}
++
++#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
++
++static void asm_ahuvload(ASMState *as, IRIns *ir)
++{
++  Reg dest = RID_NONE, type = RID_TMP, idx;
++  RegSet allow = RSET_GPR;
++  int32_t ofs = 0;
++  IRType1 t = ir->t;
++
++  if (ra_used(ir)) {
++    lua_assert((irt_isnum(ir->t)) ||
++	       irt_isint(ir->t) || irt_isaddr(ir->t));
++    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
++    rset_clear(allow, dest);
++    if (irt_isaddr(t))
++      emit_djml(as, LAI_BSTRPICK_D, dest, dest, 46, 0);		//TODO 14+1+32?
++    else if (irt_isint(t))
++      emit_dju(as, LAI_SLLI_W, dest, dest, 0);
++  }
++  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++  rset_clear(allow, idx);
++  if (irt_isnum(t)) {
++    asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO);
++    //emit_dji(as, LAI_SLTUI, RID_TMP, type, ((int32_t)LJ_TISNUM)&0xfff);	//TODO
++    emit_djk(as, LAI_SLTU, RID_TMP, type, RID_R19);
++    emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM);
++  } else {
++    asm_guard(as, LAI_BNE, type,
++	      ra_allock(as, (int32_t)irt_toitype(t), allow));
++  }
++  if (ra_hasreg(dest)) {
++    if (!LJ_SOFTFP && irt_isnum(t)) {
++      //emit_dji(as, LAI_FLD_D, dest, idx, ofs&0xfff);
++      emit_djk(as, LAI_FLDX_D, dest, idx, RID_R19);
++      emit_d16i(as, RID_R19, ofs);
++      dest = type;
++    }
++  } else {
++    dest = type;
++  }
++  emit_dju(as, LAI_SRAI_D, type, dest, (47 & 0x3f));
++  //emit_dji(as, LAI_LD_D, dest, idx, ofs&0xfff);
++  emit_djk(as, LAI_LDX_D, dest, idx, RID_R19);
++  emit_d16i(as, RID_R19, ofs);
++}
++
++static void asm_ahustore(ASMState *as, IRIns *ir)
++{
++  RegSet allow = RSET_GPR;
++  Reg idx, src = RID_NONE, type = RID_NONE;
++  int32_t ofs = 0;
++  if (ir->r == RID_SINK)
++    return;
++  if (irt_isnum(ir->t)) {
++    src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
++    idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++    //emit_dji(as, LJ_SOFTFP ? LAI_ST_D : LAI_FST_D, src, idx, ofs&0xfff);
++    emit_djk(as, LJ_SOFTFP ? LAI_STX_D : LAI_FSTX_D, src, idx, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++  } else {
++    Reg tmp = RID_TMP;
++    if (irt_ispri(ir->t)) {
++      tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
++      rset_clear(allow, tmp);
++    } else {
++      src = ra_alloc1(as, ir->op2, allow);
++      rset_clear(allow, src);
++      type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
++      rset_clear(allow, type);
++    }
++    idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++    //emit_dji(as, LAI_ST_D, tmp, idx, ofs&0xfff);
++    emit_djk(as, LAI_STX_D, tmp, idx, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++    if (ra_hasreg(src)) {
++      if (irt_isinteger(ir->t)) {
++	emit_djk(as, LAI_ADD_D, tmp, tmp, type);
++	emit_djml(as, LAI_BSTRPICK_D, tmp, src, 31, 0);	//TODO
++      } else {
++	emit_djk(as, LAI_ADD_D, tmp, src, type);
++      }
++    }
++  }
++}
++
++static void asm_sload(ASMState *as, IRIns *ir)
++{
++  Reg dest = RID_NONE, type = RID_NONE, base;
++  RegSet allow = RSET_GPR;
++  IRType1 t = ir->t;
++  int32_t ofs = 8*((int32_t)ir->op1-2);
++  lua_assert(!(ir->op2 & IRSLOAD_PARENT));
++  lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
++  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
++    dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
++    asm_tointg(as, ir, dest);
++    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
++  } else
++  if (ra_used(ir)) {
++    lua_assert((irt_isnum(ir->t)) ||
++	       irt_isint(ir->t) || irt_isaddr(ir->t));
++    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
++    rset_clear(allow, dest);
++    base = ra_alloc1(as, REF_BASE, allow);
++    rset_clear(allow, base);
++    if (ir->op2 & IRSLOAD_CONVERT) {
++      if (irt_isint(t)) {
++	Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
++#if LJ_SOFTFP
++	ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
++	ra_destreg(as, ir, RID_RET);
++	emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func);	//TODO
++	if (tmp != REGARG_FIRSTGPR)
++	  emit_move(as, REGARG_FIRSTGPR, tmp);
++#else
++	emit_dj(as, LAI_MOVFR2GR_S, dest, tmp);
++	emit_dj(as, LAI_FTINTRZ_W_D, tmp, tmp);
++#endif
++	dest = tmp;
++	t.irt = IRT_NUM;  /* Check for original type. */
++      } else {
++	Reg tmp = ra_scratch(as, RSET_GPR);
++#if LJ_SOFTFP
++	ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
++	ra_destreg(as, ir, RID_RET);
++	emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func);	//TODO
++	emit_dju(as, LAI_SLLI_W, REGARG_FIRSTGPR, tmp, 0);
++#else
++	emit_dj(as, LAI_FFINT_D_W, dest, dest);
++	emit_dj(as, LAI_MOVGR2FR_W, tmp, dest);
++#endif
++	dest = tmp;
++	t.irt = IRT_INT;  /* Check for original type. */
++      }
++    }
++    else if (irt_isaddr(t)) {
++      /* Clear type from pointers. */
++      emit_djml(as, LAI_BSTRPICK_D, dest, dest, 46, 0);
++    } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
++      /* Sign-extend integers. */
++      emit_dju(as, LAI_SLLI_W, dest, dest, 0);
++    }
++    goto dotypecheck;
++  }
++  base = ra_alloc1(as, REF_BASE, allow);
++  rset_clear(allow, base);
++dotypecheck:
++  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
++    type = dest < RID_MAX_GPR ? dest : RID_TMP;
++    if (irt_ispri(t)) {
++      asm_guard(as, LAI_BNE, type,
++		ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
++    } else {
++      if (irt_isnum(t)) {
++	asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO);
++	//emit_dji(as, LAI_SLTUI, RID_TMP, RID_TMP, ((int32_t)LJ_TISNUM)&0xfff);
++	emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, RID_R19);
++	emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM);
++	if (!LJ_SOFTFP && ra_hasreg(dest)) {
++	  //emit_dji(as, LAI_FLD_D, dest, base, ofs&0xfff);
++	  emit_djk(as, LAI_FLDX_D, dest, base, RID_R19);
++	  emit_d16i(as, RID_R19, ofs);
++	}
++      } else {
++	asm_guard(as, LAI_BNE, RID_TMP,
++		  ra_allock(as, (int32_t)irt_toitype(t), allow));
++      }
++      emit_dju(as, LAI_SRAI_D, RID_TMP, type, 47);
++    }
++    //emit_dji(as, LAI_LD_D, type, base, ofs&0xfff);
++    emit_djk(as, LAI_LDX_D, type, base, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++  } else if (ra_hasreg(dest)) {
++    if (!LJ_SOFTFP && irt_isnum(t)) {
++      emit_djk(as, LAI_FLDX_D, dest, base, RID_R19);
++    } else {
++      emit_djk(as, irt_isint(t) ? LAI_LDX_W : LAI_LDX_D, dest, base, RID_R19);
++    }
++    emit_d16i(as, RID_R19, ofs);
++  }
++}
++
++/* -- Allocations --------------------------------------------------------- */
++
++#if LJ_HASFFI
++static void asm_cnew(ASMState *as, IRIns *ir)
++{
++  CTState *cts = ctype_ctsG(J2G(as->J));
++  CTypeID id = (CTypeID)IR(ir->op1)->i;
++  CTSize sz;
++  CTInfo info = lj_ctype_info(cts, id, &sz);
++  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
++  IRRef args[4];
++  RegSet drop = RSET_SCRATCH;
++  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
++
++  as->gcsteps++;
++  if (ra_hasreg(ir->r))
++    rset_clear(drop, ir->r);  /* Dest reg handled below. */
++  ra_evictset(as, drop);
++  if (ra_used(ir))
++    ra_destreg(as, ir, RID_RET);  /* GCcdata * */
++
++  /* Initialize immutable cdata object. */
++  if (ir->o == IR_CNEWI) {
++    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
++    emit_dji(as, sz == 8 ? LAI_ST_D : LAI_ST_W, ra_alloc1(as, ir->op2, allow),
++	     RID_RET, (sizeof(GCcdata))&0xfff);
++    lua_assert(sz == 4 || sz == 8);
++  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
++    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
++    args[0] = ASMREF_L;     /* lua_State *L */
++    args[1] = ir->op1;      /* CTypeID id   */
++    args[2] = ir->op2;      /* CTSize sz    */
++    args[3] = ASMREF_TMP1;  /* CTSize align */
++    asm_gencall(as, ci, args);
++    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
++    return;
++  }
++
++  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
++  emit_dji(as, LAI_ST_B, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))&0xfff);
++  emit_dji(as, LAI_ST_H, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))&0xfff);
++  //emit_dj32i(as, RID_RET+1, 0, ~LJ_TCDATA);
++  emit_djk(as, LAI_ADD_D, RID_RET+1, 0, RID_R19);
++  emit_d16i(as, RID_R19, ~LJ_TCDATA);
++  //emit_dj32i(as, RID_TMP, 0, id); /* Lower 16 bit used. Sign-ext ok. */
++  emit_djk(as, LAI_ADD_D, RID_TMP, 0, RID_R19);
++  emit_d16i(as, RID_R19, id);
++  args[0] = ASMREF_L;     /* lua_State *L */
++  args[1] = ASMREF_TMP1;  /* MSize size   */
++  asm_gencall(as, ci, args);
++  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
++	       ra_releasetmp(as, ASMREF_TMP1));
++}
++#endif
++
++/* -- Write barriers ------------------------------------------------------ */
++
++static void asm_tbar(ASMState *as, IRIns *ir)
++{
++  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
++  Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
++  Reg link = RID_TMP;
++  MCLabel l_end = emit_label(as);
++  emit_dji(as, LAI_ST_D, link, tab, ((int32_t)offsetof(GCtab, gclist))&0xfff);
++  emit_dji(as, LAI_ST_B, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
++  emit_setgl(as, tab, gc.grayagain);	//TODO
++  emit_getgl(as, link, gc.grayagain);	//TODO
++  //emit_djk(as, LAI_XOR, mark, mark, RID_TMP);  /* Clear black bit. */
++  emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end);
++  emit_djk(as, LAI_XOR, mark, mark, RID_TMP);
++  emit_dju(as, LAI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
++  emit_dji(as, LAI_LD_BU, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff);
++}
++
++static void asm_obar(ASMState *as, IRIns *ir)
++{
++  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
++  IRRef args[2];
++  MCLabel l_end;
++  Reg obj, val, tmp;
++  /* No need for other object barriers (yet). */
++  lua_assert(IR(ir->op1)->o == IR_UREFC);
++  ra_evictset(as, RSET_SCRATCH);
++  l_end = emit_label(as);
++  args[0] = ASMREF_TMP1;  /* global_State *g */
++  args[1] = ir->op1;      /* TValue *tv      */
++  asm_gencall(as, ci, args);
++  //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
++  //emit_dj32i(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);    //TODO daddiu
++  emit_djk(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, RID_R19);
++  emit_d16i(as, RID_R19, -32768);
++  obj = IR(ir->op1)->r;
++  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
++  emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end);
++  emit_dju(as, LAI_ANDI, tmp, tmp, LJ_GC_BLACK);
++  emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end);
++  emit_dju(as, LAI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
++  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
++  emit_dji(as, LAI_LD_BU, tmp, obj,
++	   ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))&0xfff);
++  emit_dji(as, LAI_LD_BU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))&0xfff);
++}
++
++/* -- Arithmetic and logic operations ------------------------------------- */
++
++#if !LJ_SOFTFP
++static void asm_fparith(ASMState *as, IRIns *ir, LAIns lai)
++{
++  Reg dest = ra_dest(as, ir, RSET_FPR);
++  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++  right = (left >> 8); left &= 255;
++  emit_djk(as, lai, dest, left, right);
++}
++
++static void asm_fpunary(ASMState *as, IRIns *ir, LAIns lai)
++{
++  Reg dest = ra_dest(as, ir, RSET_FPR);
++  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
++  emit_dj(as, lai, dest, left);
++}
++#endif
++
++static void asm_fpmath(ASMState *as, IRIns *ir)
++{
++#if !LJ_SOFTFP
++  if (ir->op2 <= IRFPM_TRUNC)
++    asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
++  else if (ir->op2 == IRFPM_SQRT)
++    asm_fpunary(as, ir, LAI_FSQRT_D);
++  else
++#endif
++    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
++}
++
++#if !LJ_SOFTFP
++#define asm_fpadd(as, ir)	asm_fparith(as, ir, LAI_FADD_D)
++#define asm_fpsub(as, ir)	asm_fparith(as, ir, LAI_FSUB_D)
++#define asm_fpmul(as, ir)	asm_fparith(as, ir, LAI_FMUL_D)
++#elif LJ_64  /* && LJ_SOFTFP */
++#define asm_fpadd(as, ir)	asm_callid(as, ir, IRCALL_softfp_add)
++#define asm_fpsub(as, ir)	asm_callid(as, ir, IRCALL_softfp_sub)
++#define asm_fpmul(as, ir)	asm_callid(as, ir, IRCALL_softfp_mul)
++#endif
++
++static void asm_add(ASMState *as, IRIns *ir)
++{
++  IRType1 t = ir->t;
++  if (irt_isnum(t)) {
++    asm_fpadd(as, ir);
++  } else
++  {
++    /* TODO fmadd.s/d */
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++    if (irref_isk(ir->op2)) {
++      intptr_t k = get_kval(as, ir->op2);
++      if (checki16(k)) {
++        if (LJ_64 && irt_is64(t)) {
++          emit_add(as, dest, left, k);
++        } else {
++          emit_addw(as, dest, left, k);
++        }
++	return;
++      }
++    }
++    right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++    emit_djk(as, (LJ_64 && irt_is64(t)) ? LAI_ADD_D : LAI_ADD_W, dest,
++	     left, right);
++  }
++}
++
++static void asm_sub(ASMState *as, IRIns *ir)
++{
++  if (irt_isnum(ir->t)) {
++    asm_fpsub(as, ir);
++  } else
++  {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++    right = (left >> 8); left &= 255;
++    emit_djk(as, (LJ_64 && irt_is64(ir->t)) ? LAI_SUB_D : LAI_SUB_W, dest,
++	     left, right);
++  }
++}
++
++static void asm_mul(ASMState *as, IRIns *ir)
++{
++  if (irt_isnum(ir->t)) {
++    asm_fpmul(as, ir);
++  } else
++  {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++    right = (left >> 8); left &= 255;
++    if (LJ_64 && irt_is64(ir->t)) {
++      emit_djk(as, LAI_MUL_D, dest, left, right);
++    } else {
++      emit_djk(as, LAI_MUL_W, dest, left, right);
++    }
++  }
++}
++
++static void asm_mod(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++  if (!irt_isint(ir->t))
++    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
++					  IRCALL_lj_carith_modu64);
++  else
++#endif
++    asm_callid(as, ir, IRCALL_lj_vm_modi);
++}
++
++#if !LJ_SOFTFP
++static void asm_pow(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++  if (!irt_isnum(ir->t))
++    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
++					  IRCALL_lj_carith_powu64);
++  else
++#endif
++    asm_callid(as, ir, IRCALL_lj_vm_powi);
++}
++
++static void asm_div(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++  if (!irt_isnum(ir->t))
++    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
++					  IRCALL_lj_carith_divu64);
++  else
++#endif
++    asm_fparith(as, ir, LAI_DIV_D);
++}
++#endif
++
++static void asm_fpdiv(ASMState *as, IRIns *ir)
++{
++#if !LJ_SOFTFP
++    asm_fparith(as, ir, LAI_FDIV_D);
++#else
++    asm_callid(as, ir, IRCALL_softfp_div);
++#endif
++}
++
++static void asm_neg(ASMState *as, IRIns *ir)
++{
++#if !LJ_SOFTFP
++  if (irt_isnum(ir->t)) {
++    asm_fpunary(as, ir, LAI_FNEG_D);
++  } else
++#elif LJ_64  /* && LJ_SOFTFP */
++  if (irt_isnum(ir->t)) {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++    emit_djk(as, LAI_XOR, dest, left,
++	    ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
++  } else
++#endif
++  {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++    emit_djk(as, (LJ_64 && irt_is64(ir->t)) ? LAI_SUB_D : LAI_SUB_W, dest,
++	     RID_ZERO, left);
++  }
++}
++
++#if !LJ_SOFTFP
++#define asm_abs(as, ir)		asm_fpunary(as, ir, LAI_FABS_D)
++#elif LJ_64   /* && LJ_SOFTFP */
++static void asm_abs(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++  emit_djml(as, LAI_BSTRPICK_D, dest, left, 62, 0);	//TODO 30+1+32
++}
++#endif
++#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
++#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
++
++static void asm_arithov(ASMState *as, IRIns *ir)
++{
++  /* TODO */
++  Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
++  lua_assert(!irt_is64(ir->t));
++  if (irref_isk(ir->op2)) {
++    int k = IR(ir->op2)->i;
++    if (ir->o == IR_SUBOV) k = -k;
++    if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
++      left = ra_alloc1(as, ir->op1, RSET_GPR);
++      asm_guard(as, k >= 0 ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO);
++      emit_djk(as, LAI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left);
++      emit_dj32i(as, dest, left, k);	// addiu
++      if (dest == left) emit_move(as, RID_TMP, left);
++      return;
++    }
++  }
++  left = ra_alloc2(as, ir, RSET_GPR);
++  right = (left >> 8); left &= 255;
++  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
++						 right), dest));
++  asm_guard(as, LAI_BLT, RID_TMP, RID_ZERO);
++  emit_djk(as, LAI_AND, RID_TMP, RID_TMP, tmp);
++  if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
++    emit_djk(as, LAI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
++  } else {  /* ((dest^left) & (dest^~right)) < 0 */
++    emit_djk(as, LAI_XOR, RID_TMP, RID_TMP, dest);
++    emit_djk(as, LAI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
++  }
++  emit_djk(as, LAI_XOR, tmp, dest, dest == left ? RID_TMP : left);
++  emit_djk(as, ir->o == IR_ADDOV ? LAI_ADD_W : LAI_SUB_W, dest, left, right);
++  if (dest == left || dest == right)
++    emit_move(as, RID_TMP, dest == left ? left : right);
++}
++
++#define asm_addov(as, ir)	asm_arithov(as, ir)
++#define asm_subov(as, ir)	asm_arithov(as, ir)
++
++static void asm_mulov(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
++  right = (left >> 8); left &= 255;
++  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
++						 right), dest));
++  asm_guard(as, LAI_BNE, RID_TMP, tmp);
++  emit_dju(as, LAI_SRAI_W, RID_TMP, dest, 31);
++  emit_djk(as, LAI_MUL_W, dest, left, right);
++  emit_djk(as, LAI_MULH_W, tmp, left, right);
++}
++
++static void asm_bnot(ASMState *as, IRIns *ir)
++{
++  Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
++  IRIns *irl = IR(ir->op1);
++  if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
++    left = ra_alloc2(as, irl, RSET_GPR);
++    right = (left >> 8); left &= 255;
++  } else {
++    left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++    right = RID_ZERO;
++  }
++  emit_djk(as, LAI_NOR, dest, left, right);
++}
++
++static void asm_bswap(ASMState *as, IRIns *ir)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++  if (irt_is64(ir->t)) {
++    emit_dj(as, LAI_REVH_D, dest, RID_TMP);
++    emit_dj(as, LAI_REVB_4H, RID_TMP, left);
++  } else {
++    emit_dju(as, LAI_ROTRI_W, dest, RID_TMP, 16);
++    emit_dj(as, LAI_REVB_2H, RID_TMP, left);
++  }
++}
++
++static void asm_bitop1(ASMState *as, LAIns lai, Reg rd, Reg rj, int32_t i)
++{
++  emit_djk(as, LAI_ADD_W, rd, rd, RID_R20);
++  emit_dju(as, LAI_SLLI_W, RID_R20, RID_R20, 12);
++  emit_dju(as, lai, RID_R20, RID_R20, (i&0xf000)>>12);
++  emit_dju(as, LAI_SRLI_W, RID_R20, RID_R20, 12);
++  emit_dju(as, lai, rd, RID_R20, i&0xfff);
++  emit_djk(as, LAI_OR, RID_R20, RID_R0, rj);
++}
++
++static void asm_bitop(ASMState *as, IRIns *ir, LAIns lai, LAIns laik)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++  if (irref_isk(ir->op2)) {
++    intptr_t k = get_kval(as, ir->op2);
++    if (checku16(k)) {
++      asm_bitop1(as, laik, dest, left, k);
++      return;
++    }
++  }
++  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++  emit_djk(as, lai, dest, left, right);
++}
++
++#define asm_band(as, ir)	asm_bitop(as, ir, LAI_AND, LAI_ANDI)
++#define asm_bor(as, ir)		asm_bitop(as, ir, LAI_OR, LAI_ORI)
++#define asm_bxor(as, ir)	asm_bitop(as, ir, LAI_XOR, LAI_XORI)
++
++static void asm_bitshift(ASMState *as, IRIns *ir, LAIns lai, LAIns laik)
++{
++  Reg dest = ra_dest(as, ir, RSET_GPR);
++  if (irref_isk(ir->op2)) {  /* Constant shifts. */
++    uint32_t shift = (uint32_t)IR(ir->op2)->i;
++    if (LJ_64 && irt_is64(ir->t)) laik = laik + 0x8000; 
++    emit_dju(as, laik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
++	     shift);
++  } else {
++    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++    right = (left >> 8); left &= 255;
++    if (LJ_64 && irt_is64(ir->t)) {
++      if (lai == LAI_ROTR_W) {
++        lai = lai + 0x8000;
++      } else {
++        lai = lai + 0x18000;
++      }
++    }
++    emit_djk(as, lai, dest, left, right);  /* Shift amount is in rs. */
++  }
++}
++
++#define asm_bshl(as, ir)	asm_bitshift(as, ir, LAI_SLL_W, LAI_SLLI_W)
++#define asm_bshr(as, ir)	asm_bitshift(as, ir, LAI_SRL_W, LAI_SRLI_W)
++#define asm_bsar(as, ir)	asm_bitshift(as, ir, LAI_SRA_W, LAI_SRAI_W)
++#define asm_brol(as, ir)	lua_assert(0)
++#define asm_bror(as, ir)	asm_bitshift(as, ir, LAI_ROTR_W, LAI_ROTRI_W)
++
++
++#if LJ_SOFTFP
++static void asm_sfpmin_max(ASMState *as, IRIns *ir)
++{
++  CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
++  IRRef args[2];
++  args[0] = ir->op1;
++  args[1] = ir->op2;
++  asm_setupresult(as, ir, &ci);
++  emit_call(as, (void *)ci.func);	//TODO
++  ci.func = NULL;
++  asm_gencall(as, &ci, args);
++}
++#endif
++
++static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
++{
++  if (irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++    asm_sfpmin_max(as, ir);
++#else
++    Reg dest = ra_dest(as, ir, RSET_FPR);
++    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
++    right = (left >> 8); left &= 255;
++    emit_djk(as, ismax ? LAI_FMAX_D : LAI_FMIN_D, dest, left, right);
++#endif
++  } else {
++    Reg dest = ra_dest(as, ir, RSET_GPR);
++    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
++    right = (left >> 8); left &= 255;
++    if (left == right) {
++      if (dest != left) emit_move(as, dest, left);
++    } else {
++      emit_djk(as, LAI_OR, dest, dest, RID_TMP);
++      if (dest != right) {
++	emit_djk(as, LAI_MASKEQZ, RID_TMP, right, RID_TMP);
++	emit_djk(as, LAI_MASKNEZ, dest, left, RID_TMP);
++      } else {
++	emit_djk(as, LAI_MASKNEZ, RID_TMP, left, RID_TMP);
++	emit_djk(as, LAI_MASKEQZ, dest, right, RID_TMP);
++      }
++      emit_djk(as, LAI_SLT, RID_TMP,
++	       ismax ? left : right, ismax ? right : left);
++    }
++  }
++}
++
++#define asm_min(as, ir)		asm_min_max(as, ir, 0)
++#define asm_max(as, ir)		asm_min_max(as, ir, 1)
++
++/* -- Comparisons --------------------------------------------------------- */
++
++#if LJ_SOFTFP
++/* SFP comparisons. */
++static void asm_sfpcomp(ASMState *as, IRIns *ir)
++{
++  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
++  RegSet drop = RSET_SCRATCH;
++  Reg r;
++  IRRef args[2];
++  args[0] = ir->op1;
++  args[1] = ir->op2;
++
++  for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+1; r++) {
++    if (!rset_test(as->freeset, r) &&
++	regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
++      rset_clear(drop, r);
++  }
++  ra_evictset(as, drop);
++
++  asm_setupresult(as, ir, ci);
++
++  switch ((IROp)ir->o) {
++  case IR_LT:
++    asm_guard(as, LAI_BGE, RID_RET, RID_ZERO);
++    break;
++  case IR_ULT:
++    asm_guard(as, LAI_BEQ, RID_RET, RID_TMP);
++    emit_loadi(as, RID_TMP, 1);
++    asm_guard(as, LAI_BEQ, RID_RET, RID_ZERO);
++    break;
++  case IR_GE:
++    asm_guard(as, LAI_BEQ, RID_RET, RID_TMP);
++    emit_loadi(as, RID_TMP, 2);
++    asm_guard(as, LAI_BLT, RID_RET, RID_ZERO);
++    break;
++  case IR_LE:
++    asm_guard(as, LAI_BLT, RID_ZERO, RID_RET);
++    break;
++  case IR_GT:
++    asm_guard(as, LAI_BEQ, RID_RET, RID_TMP);
++    emit_loadi(as, RID_TMP, 2);
++    asm_guard(as, LAI_BGE, RID_ZERO, RID_RET);
++    break;
++  case IR_UGE:
++    asm_guard(as, LAI_BLT, RID_RET, RID_ZERO);
++    break;
++  case IR_ULE:
++    asm_guard(as, LAI_BEQ, RID_RET, RID_TMP);
++    emit_loadi(as, RID_TMP, 1);
++    break;
++  case IR_UGT: case IR_ABC:
++    asm_guard(as, LAI_BGE, RID_ZERO, RID_RET);
++    break;
++  case IR_EQ: case IR_NE:
++    asm_guard(as, (ir->o & 1) ? LAI_BEQ : LAI_BNE, RID_RET, RID_ZERO);
++  default:
++    break;
++  }
++  asm_gencall(as, ci, args);
++}
++#endif
++
++static void asm_comp(ASMState *as, IRIns *ir)
++{
++  /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
++  /*           00 01 10 11  100 101 110 111  */
++  IROp op = ir->o;
++  if (irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++    asm_sfpcomp(as, ir);
++#else
++    Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
++    right = (left >> 8); left &= 255;
++    tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
++    asm_guard21(as, (op&1) ? LAI_BCNEZ : LAI_BCEQZ, 0, (tmp&7));
++    // emit_dst(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);	//TODO
++    // use case
++    switch (op) {
++      case IR_LT: case IR_UGE:
++        emit_djk(as, LAI_FCMP_CLT_D, 0, left, right);
++        break;
++      case IR_GE: case IR_ULT:
++        emit_djk(as, LAI_FCMP_CULT_D, 0, left, right);
++        break;
++      case IR_LE: case IR_UGT: case IR_ABC:
++        emit_djk(as, LAI_FCMP_CLE_D, 0, left, right);
++        break;
++      case IR_GT: case IR_ULE:
++        emit_djk(as, LAI_FCMP_CULE_D, 0, left, right);
++        break;
++      case IR_EQ:
++        emit_djk(as, LAI_FCMP_CEQ_D, 0, left, right);
++        break;
++      case IR_NE:
++        emit_djk(as, LAI_FCMP_CNE_D, 0, left, right);
++        break;
++      default:
++        break;
++    }
++#endif
++  } else {
++    Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
++    if (op == IR_ABC) op = IR_UGT;
++    if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
++      /* MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
++			    ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
++      asm_guard(as, mi, left, 0);
++      */
++      if (op&2) {
++        if (op&1) {
++          asm_guard(as, LAI_BGE, RID_ZERO, left);
++        } else {
++          asm_guard(as, LAI_BLT, RID_ZERO, left);
++        }
++      } else {
++        if (op&1) {
++          asm_guard(as, LAI_BLT, left, RID_ZERO);
++        } else {
++          asm_guard(as, LAI_BGE, left, RID_ZERO);
++        }
++      }
++    } else {
++      if (irref_isk(ir->op2)) {
++	intptr_t k = get_kval(as, ir->op2);
++	if ((op&2)) k++;
++	if (checki16(k)) {
++	  asm_guard(as, (op&1) ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO);
++	  emit_djk(as, (op&4) ? LAI_SLTU : LAI_SLT,			//TODO si12
++		   RID_TMP, left, RID_R20);
++          //emit_djk(as, LAI_ADD_D, RID_R20, RID_R19, RID_ZERO);
++          //emit_dju(as, LAI_ORI, RID_R19, RID_R19, k&0xfff);
++          //emit_di(as, LAI_LU12I_W, RID_R19, (k>>12)&0xfffff);
++          //emit_dj32i(as, RID_R20, RID_ZERO, k);
++          emit_d16i(as, RID_R20, k);
++	  return;
++	}
++      }
++      right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
++      asm_guard(as, ((op^(op>>1))&1) ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO);
++      emit_djk(as, (op&4) ? LAI_SLTU : LAI_SLT,
++	       RID_TMP, (op&2) ? right : left, (op&2) ? left : right);
++    }
++  }
++}
++
++static void asm_equal(ASMState *as, IRIns *ir)
++{
++  Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
++				       RSET_FPR : RSET_GPR);
++  right = (left >> 8); left &= 255;
++  if (irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++    asm_sfpcomp(as, ir);
++#else
++    Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
++    asm_guard21(as, (ir->o & 1) ? LAI_BCNEZ : LAI_BCEQZ, 0, (tmp&7));
++    //emit_djk(as, LAI_FCMP_CEQ_D, tmp&7, left, right);
++    emit_djk(as, LAI_FCMP_CEQ_D, 0, left, right);
++#endif
++  } else {
++    asm_guard(as, (ir->o & 1) ? LAI_BEQ : LAI_BNE, left, right);
++  }
++}
++
++/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++
++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++static void asm_hiop(ASMState *as, IRIns *ir)
++{
++  UNUSED(as); UNUSED(ir);
++  lua_assert(0);  /* Unused on 64 bit. */
++}
++
++/* -- Profiling ----------------------------------------------------------- */
++
++static void asm_prof(ASMState *as, IRIns *ir)
++{
++  UNUSED(ir);
++  asm_guard(as, LAI_BNE, RID_TMP, RID_ZERO);
++  emit_dju(as, LAI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);	//HOOK_PROFILE=0x80
++  emit_lsglptr2(as, LAI_LD_BU, RID_TMP,				//TODO
++	       (int32_t)offsetof(global_State, hookmask));
++}
++
++/* -- Stack handling ------------------------------------------------------ */
++
++/* Check Lua stack size for overflow. Use exit handler as fallback. */
++static void asm_stack_check(ASMState *as, BCReg topslot,
++			    IRIns *irp, RegSet allow, ExitNo exitno)
++{
++  /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
++  Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
++  ExitNo oldsnap = as->snapno;
++  rset_clear(allow, pbase);
++  tmp = allow ? rset_pickbot(allow) : RID_RET;
++  as->snapno = exitno;
++  asm_guard(as, LAI_BNE, RID_TMP, RID_ZERO);
++  as->snapno = oldsnap;
++  if (allow == RSET_EMPTY)  /* Restore temp. register. */
++    emit_dji(as, LAI_LD_D, tmp, RID_SP, 0);
++  else
++    ra_modified(as, tmp);
++  //emit_dji(as, LAI_SLTUI, RID_TMP, RID_TMP, ((int32_t)(8*topslot))&0xfff);	//TODO si12
++  emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, RID_R19);
++  emit_d16i(as, RID_R19, (int32_t)(8*topslot));
++  emit_djk(as, LAI_SUB_D, RID_TMP, tmp, pbase);
++  emit_djk(as, LAI_LDX_D, tmp, tmp, RID_R19);
++  emit_loadi(as, RID_R19, offsetof(lua_State, maxstack));
++  if (pbase == RID_TMP)
++    emit_getgl(as, RID_TMP, jit_base);
++  emit_getgl(as, tmp, cur_L);
++  if (allow == RSET_EMPTY)  /* Spill temp. register. */
++    emit_dji(as, LAI_ST_D, tmp, RID_SP, 0);
++}
++
++/* Restore Lua stack from on-trace state. */
++static void asm_stack_restore(ASMState *as, SnapShot *snap)
++{
++  SnapEntry *map = &as->T->snapmap[snap->mapofs];
++#if defined(LUA_USE_ASSERT)
++  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
++#endif
++  MSize n, nent = snap->nent;
++  /* Store the value of all modified slots to the Lua stack. */
++  for (n = 0; n < nent; n++) {
++    SnapEntry sn = map[n];
++    BCReg s = snap_slot(sn);
++    int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
++    IRRef ref = snap_ref(sn);
++    IRIns *ir = IR(ref);
++    if ((sn & SNAP_NORESTORE))
++      continue;
++    if (irt_isnum(ir->t)) {
++#if LJ_SOFTFP  /* && LJ_64 */
++      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
++      //emit_dji(as, LAI_ST_D, src, RID_BASE, ofs&0xfff);	//TODO si12
++      emit_djk(as, LAI_STX_D, src, RID_BASE, RID_R19);
++#else
++      Reg src = ra_alloc1(as, ref, RSET_FPR);
++      //emit_dji(as, LAI_FST_D, src, RID_BASE, ofs&0xfff);	//TODO si12
++      emit_djk(as, LAI_FSTX_D, src, RID_BASE, RID_R19);
++#endif
++      emit_d16i(as, RID_R19, ofs);
++    } else {
++      asm_tvstore64(as, RID_BASE, ofs, ref);
++    }
++    checkmclim(as);
++  }
++  lua_assert(map + nent == flinks);
++}
++
++/* -- GC handling --------------------------------------------------------- */
++
++/* Marker to prevent patching the GC check exit. */
++#define LA_NOPATCH_GC_CHECK	LAI_OR
++
++/* Check GC threshold and do one or more GC steps. */
++static void asm_gc_check(ASMState *as)
++{
++  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
++  IRRef args[2];
++  MCLabel l_end;
++  Reg tmp;
++  ra_evictset(as, RSET_SCRATCH);
++  l_end = emit_label(as);
++  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
++  /* Assumes asm_snap_prep() already done. */
++  asm_guard(as, LAI_BNE, RID_RET, RID_ZERO);
++  args[0] = ASMREF_TMP1;  /* global_State *g */
++  args[1] = ASMREF_TMP2;  /* MSize steps     */
++  asm_gencall(as, ci, args);
++  l_end[-3] = LA_NOPATCH_GC_CHECK;  /* Replace the nop after the call. */	//TODO
++  //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);	//TODO
++  //emit_dj32i(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
++  emit_add(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
++  tmp = ra_releasetmp(as, ASMREF_TMP2);
++  /* Jump around GC step if GC total < GC threshold. */
++  emit_branch(as, LAI_BNE, RID_TMP, RID_ZERO, l_end);
++  emit_loadi(as, tmp, as->gcsteps);
++  emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, tmp);
++  emit_getgl(as, tmp, gc.threshold);
++  emit_getgl(as, RID_TMP, gc.total);
++  as->gcsteps = 0;
++  checkmclim(as);
++}
++
++/* -- Loop handling ------------------------------------------------------- */
++
++/* Fixup the loop branch. */
++static void asm_loop_fixup(ASMState *as)
++{
++  MCode *p = as->mctop;
++  MCode *target = as->mcp;
++  if (as->loopinv) {  /* Inverted loop branch? */
++    /* asm_guard already inverted the bceqz/bcnez/beq/bne/blt/bge, and patched the final b. */
++    uint32_t mask = (p[-2] & 0xfc000000) == 0x48000000 ? 0x1fffffu : 0xffffu;
++    ptrdiff_t delta = target - p ;
++    if (mask == 0x1fffffu) {
++      p[-2] = p[-2] | LAF_I((uint32_t)delta & 0xffffu) | (((uint32_t)delta & 0x1f0000u) >> 16);
++    } else {
++      p[-2] |= LAF_I(delta & 0xffffu);			//TODO
++    }
++    if (p[-1] == 0 || p[-1] == 0x109c21)		//TODO
++      p[-1] = LAI_NOP;
++  } else {
++    /* b */
++    ptrdiff_t delta = target - (p - 1);
++    p[-1] = LAI_B | LAF_I(delta & 0xffffu) | ((delta & 0x3ff0000) >> 16);	//TODO
++    if ( (p[-2] & LAI_B) == LAI_B || (p[-2] & LAI_BL) == LAI_BL || (p[-2] & LAI_BEQ) == LAI_BEQ || (p[-2] & LAI_BNE) == LAI_BNE ||(p[-2] & LAI_BLT) == LAI_BLT || (p[-2] & LAI_BGE) == LAI_BGE)
++      p[-3] = LAI_NOP;
++  }
++}
++
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++  if (as->loopinv) as->mctop--;
++}
++
++/* -- Head of trace ------------------------------------------------------- */
++
++/* Coalesce BASE register for a root trace. */
++static void asm_head_root_base(ASMState *as)
++{
++  IRIns *ir = IR(REF_BASE);
++  Reg r = ir->r;
++  if (as->loopinv) as->mctop--;
++  if (ra_hasreg(r)) {
++    ra_free(as, r);
++    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
++    if (r != RID_BASE)
++      emit_move(as, r, RID_BASE);
++  }
++}
++
++/* Coalesce BASE register for a side trace. */
++static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
++{
++  IRIns *ir = IR(REF_BASE);
++  Reg r = ir->r;
++  if (as->loopinv) as->mctop--;
++  if (ra_hasreg(r)) {
++    ra_free(as, r);
++    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
++      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
++    if (irp->r == r) {
++      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
++    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
++      rset_clear(allow, irp->r);
++      emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
++    } else {
++      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
++    }
++  }
++  return allow;
++}
++
++/* -- Tail of trace ------------------------------------------------------- */
++
++/* Fixup the tail code. */
++static void asm_tail_fixup(ASMState *as, TraceNo lnk)
++{
++  MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
++  int32_t spadj = as->T->spadjust;
++  MCode *p = as->mctop - 1;
++  if (spadj == 0) {
++    p[-1] = LAI_NOP;
++  } else {
++    p[-1] = LAI_ADDI_D|LAF_D(RID_SP)|LAF_J(RID_SP)|LAF_I(spadj);
++  }
++
++  MCode *tmp = p;
++  *p = LAI_B | LAF_I((uintptr_t)(target-tmp)&0xffffu) | (((uintptr_t)(target-tmp)&0x3ff0000u) >> 16);
++}
++
++/* Prepare tail of code. */
++static void asm_tail_prep(ASMState *as)
++{
++  // as->mcp = as->mctop-2;  /* Leave room for branch plus nop or stack adj. */
++  // as->invmcp = as->loopref ? as->mcp : NULL;
++  MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
++  if (as->loopref) {
++    as->invmcp = as->mcp = p;
++  } else {
++    as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
++    as->invmcp = NULL;
++  }
++  *p = LAI_NOP;  /* Prevent load/store merging. */
++}
++
++/* -- Trace setup --------------------------------------------------------- */
++
++/* Ensure there are enough stack slots for call arguments. */
++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
++{
++  IRRef args[CCI_NARGS_MAX*2];
++  uint32_t i, nargs = CCI_XNARGS(ci);
++  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
++  asm_collectargs(as, ir, ci, args);
++  for (i = 0; i < nargs; i++) {
++    if (args[i] && irt_isfp(IR(args[i])->t)) {
++      if (nfpr > 0) nfpr--; else nslots += 2;
++    } else {
++      if (ngpr > 0) ngpr--; else nslots += 2;
++    }
++  }
++  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
++    as->evenspill = nslots;
++  return REGSP_HINT(RID_RET);
++  // return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
++}
++
++static void asm_sparejump_setup(ASMState *as)
++{
++  MCode *mxp = as->mctop;
++  if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
++    mxp -= 4*1;
++    as->mctop = mxp;
++  }
++}
++
++static void asm_setup_target(ASMState *as)
++{
++  asm_sparejump_setup(as);
++  asm_exitstub_setup(as);
++}
++
++/* -- Trace patching ------------------------------------------------------ */
++
++/* Patch exit jumps of existing machine code to a new target. */
++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
++{
++  MCode *p = T->mcode;
++  MCode *pe = (MCode *)((char *)p + T->szmcode);
++  MCode *px = exitstub_trace_addr(T, exitno);
++  MCode *cstart = NULL;
++  MCode *mcarea = lj_mcode_patch(J, p, 0);
++
++  MCode exitload = LAI_ADDI_W | LAF_D(RID_TMP) | LAF_J(RID_ZERO) | LAF_I(exitno&0xfff);
++
++  for (p++; p < pe; p++) {
++    if (*p == exitload) {
++    /* Look for exitstub branch, replace with branch to target. */
++    ptrdiff_t delta = target - p - 1;
++    MCode ins = p[1];
++      if ((ins & 0xfc000000u) == LAI_BEQ ||
++          (ins & 0xfc000000u) == LAI_BNE ||
++          (ins & 0xfc000000u) == LAI_BLT ||
++          (ins & 0xfc000000u) == LAI_BGE) {
++        /* Patch beq/bne/blt/bge, if within range. */
++        if (LAF_S_OK(delta, 16)) {
++          p[1] = (ins & 0xfc0003ffu) | LAF_I(delta & 0xffff);
++          if (!cstart) cstart = p + 1;
++        }
++      } else if ((ins & 0xfc000000u) == LAI_BCEQZ ||
++                 (ins & 0xfc000100u) == LAI_BCNEZ) {
++        /* Patch bceqz/bcnez, if within range. */
++        if (p[-1] == LA_NOPATCH_GC_CHECK) {
++        } else if (LAF_S_OK(delta, 21)) {
++          *p = (ins & 0xfc0003e0u) | LAF_I(delta & 0xffff) | ((delta & 0x1f0000) >> 16);
++          if (!cstart) cstart = p;
++        }
++      } else if ((ins & 0xfc000000u) == LAI_B) {
++        /* Patch b. */
++        lua_assert(LAF_S_OK(delta, 26));
++        *p = (ins & 0xfc000000u) | LAF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
++        if (!cstart) cstart = p;
++      } else if (p+2 == pe){
++         if (p[2] == LAI_NOP) {
++            ptrdiff_t delta = target - &p[2];
++            lua_assert(LAF_S_OK(delta, 26));
++            p[2] = LAI_B | LAF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16);
++            *p = LAI_NOP;
++            if (!cstart) cstart = p + 2;
++         }
++       }
++    }
++  }
++  if (cstart) lj_mcode_sync(cstart, px+1);
++  lj_mcode_patch(J, mcarea, 1);
++}
+diff --git a/libs/luajit/LuaJIT-src/src/lj_ccall.c b/libs/luajit/LuaJIT-src/src/lj_ccall.c
+index 5c252e5..426e79a 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_ccall.c
++++ b/libs/luajit/LuaJIT-src/src/lj_ccall.c
+@@ -562,6 +562,81 @@
+     goto done; \
+   }
+ 
++#elif LJ_TARGET_LOONGARCH64
++/* -- LoongArch lp64 calling conventions ---------------------------------------- */
++
++#define CCALL_HANDLE_STRUCTRET \
++  /* Return structs of size <= 16 in a GPR. */ \
++  cc->retref = !(sz <= 16); \
++  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
++
++#define CCALL_HANDLE_STRUCTRET2 \
++  ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
++
++#define CCALL_HANDLE_COMPLEXRET \
++  /* Complex values are returned in 1 or 2 FPRs. */ \
++  cc->retref = 0;
++
++#if LJ_ABI_SOFTFP       /* LoongArch64 soft-float */
++
++#define CCALL_HANDLE_COMPLEXRET2 \
++  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from GPRs. */ \
++    ((intptr_t *)dp)[0] = cc->gpr[0]; \
++  } else {  /* Copy complex double from GPRs. */ \
++    ((intptr_t *)dp)[0] = cc->gpr[0]; \
++    ((intptr_t *)dp)[1] = cc->gpr[1]; \
++  }
++
++#define CCALL_HANDLE_COMPLEXARG \
++  /* Pass complex by value in 2 or 4 GPRs. */
++
++/* Position of soft-float 'float' return value depends on endianess.  */
++#define CCALL_HANDLE_RET \
++  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++    sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
++
++#else                   /* LoongArch64 hard-float */
++
++#define CCALL_HANDLE_COMPLEXRET2 \
++  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
++    ((float *)dp)[0] = cc->fpr[0].f; \
++    ((float *)dp)[1] = cc->fpr[1].f; \
++  } else {  /* Copy complex double from FPRs. */ \
++    ((double *)dp)[0] = cc->fpr[0].d; \
++    ((double *)dp)[1] = cc->fpr[1].d; \
++  }
++
++#define CCALL_HANDLE_COMPLEXARG \
++  if (sz == 2*sizeof(float)) { \
++    isfp = 2; \
++    if (ngpr < maxgpr) \
++      sz *= 2; \
++  }
++#define CCALL_HANDLE_RET \
++  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++    sp = (uint8_t *)&cc->fpr[0].f;
++
++#endif
++
++#define CCALL_HANDLE_STRUCTARG \
++  /* Pass all structs by value in registers and/or on the stack. */
++
++#define CCALL_HANDLE_REGARG \
++  if (isfp) {  /* Try to pass argument in FPRs. */ \
++    int n2 = ctype_isvector(d->info) ? 1 : n; \
++    if (nfpr + n2 <= CCALL_NARG_FPR) { \
++      dp = &cc->fpr[nfpr]; \
++      nfpr += n2; \
++      goto done; \
++    } \
++  } else {  /* Try to pass argument in GPRs. */ \
++      if (ngpr + n <= maxgpr) { \
++        dp = &cc->gpr[ngpr]; \
++        ngpr += n; \
++        goto done; \
++    } \
++  }
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -873,6 +948,79 @@ void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
+ 
+ #endif
+ 
++/* -- LoongArch64 ABI struct classification ---------------------------- */
++
++#if LJ_TARGET_LOONGARCH64
++
++#define FTYPE_FLOAT     1
++#define FTYPE_DOUBLE    2
++
++/* Classify FP fields (max. 2) and their types. */
++static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
++{
++  int n = 0, ft = 0;
++  if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
++    goto noth;
++  while (ct->sib) {
++    CType *sct;
++    ct = ctype_get(cts, ct->sib);
++    if (n == 2) {
++      goto noth;
++    } else if (ctype_isfield(ct->info)) {
++      sct = ctype_rawchild(cts, ct);
++      if (ctype_isfp(sct->info)) {
++        ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
++        n++;
++      } else {
++        goto noth;
++      }
++    } else if (ctype_isbitfield(ct->info) ||
++               ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
++      goto noth;
++    }
++  }
++  if (n <= 2)
++    return ft;
++noth:  /* Not a homogeneous float/double aggregate. */
++  return 0;  /* Struct is in GPRs. */
++}
++
++static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
++                             int ft)
++{
++  if (LJ_ABI_SOFTFP ? ft :
++      ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
++    int i, ofs = 0;
++    for (i = 0; ft != 0; i++, ft >>= 2) {
++      if ((ft & 3) == FTYPE_FLOAT) {
++#if LJ_ABI_SOFTFP
++        /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
++        memcpy((uint8_t *)dp + ofs, (uint8_t *)&cc->gpr[2*i], 4);
++#else
++        *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
++#endif
++        ofs += 4;
++      } else {
++        ofs = (ofs + 7) & ~7;  /* 64 bit alignment. */
++#if LJ_ABI_SOFTFP
++        *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
++#else
++        *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
++#endif
++        ofs += 8;
++      }
++    }
++  } else {
++#if !LJ_ABI_SOFTFP
++    if (ft) sp = (uint8_t *)&cc->fpr[0];
++#endif
++    memcpy(dp, sp, ctr->size);
++  }
++}
++
++#endif
++
++
+ /* -- Common C call handling ---------------------------------------------- */
+ 
+ /* Infer the destination CTypeID for a vararg argument. */
+@@ -1068,7 +1216,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
+       cc->fpr[nfpr-2].d[1] = 0;
+     }
+-#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
++#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) || (LJ_TARGET_LOONGARCH64 && !LJ_ABI_SOFTFP)
+     if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+       /* Split float HFA or complex float into separate registers. */
+       CTSize i = (sz >> 2) - 1;
+@@ -1080,7 +1228,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+   }
+   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
+ 
+-#if LJ_TARGET_X64 || LJ_TARGET_PPC
++#if LJ_TARGET_X64 || LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64
+   cc->nfpr = nfpr;  /* Required for vararg functions. */
+ #endif
+   cc->nsp = nsp;
+diff --git a/libs/luajit/LuaJIT-src/src/lj_ccall.h b/libs/luajit/LuaJIT-src/src/lj_ccall.h
+index 59f6648..1c150fa 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_ccall.h
++++ b/libs/luajit/LuaJIT-src/src/lj_ccall.h
+@@ -126,6 +126,21 @@ typedef union FPRArg {
+   struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+ } FPRArg;
+ 
++#elif LJ_TARGET_LOONGARCH64
++
++#define CCALL_NARG_GPR          8
++#define CCALL_NARG_FPR          8
++#define CCALL_NRET_GPR          2
++#define CCALL_NRET_FPR          (LJ_ABI_SOFTFP ? 0 : 2)
++#define CCALL_SPS_EXTRA         3
++#define CCALL_SPS_FREE          1
++
++typedef intptr_t GPRArg;
++typedef union FPRArg {
++  double d;
++  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
++} FPRArg;
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+@@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
+   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
+ #elif LJ_TARGET_ARM64
+   void *retp;			/* Aggregate return pointer in x8. */
+-#elif LJ_TARGET_PPC
++#elif LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64
+   uint8_t nfpr;			/* Number of arguments in FPRs. */
+ #endif
+ #if LJ_32
+diff --git a/libs/luajit/LuaJIT-src/src/lj_ccallback.c b/libs/luajit/LuaJIT-src/src/lj_ccallback.c
+index 846827b..c7cbd73 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_ccallback.c
++++ b/libs/luajit/LuaJIT-src/src/lj_ccallback.c
+@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
+ 
+ #define CALLBACK_MCODE_HEAD		52
+ 
++#elif LJ_TARGET_LOONGARCH64
++
++#define CALLBACK_MCODE_HEAD		52
++
+ #else
+ 
+ /* Missing support for this architecture. */
+@@ -238,6 +242,37 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+   }
+   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+ }
++#elif LJ_TARGET_LOONGARCH64
++static void *callback_mcode_init(global_State *g, uint32_t *page)
++{
++  uint32_t *p = page;
++  uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
++  uintptr_t ug = (uintptr_t)(void *)g;
++  MSize slot;
++  *p++ = LAI_LU12I_W | LAF_D(RID_R18) | LAF_I20((target >> 12) & 0xfffff);
++  *p++ = LAI_LU12I_W | LAF_D(RID_R17) | LAF_I20((ug >> 12) & 0xfffff);
++  *p++ = LAI_ORI  | LAF_D(RID_R18) | LAF_J(RID_R18) | LAF_I(target & 0xfff);
++  *p++ = LAI_ORI  | LAF_D(RID_R17) | LAF_J(RID_R17) | LAF_I(ug & 0xfff);
++  *p++ = LAI_LU32I_D | LAF_D(RID_R18) | LAF_I20((target >> 32) & 0xfffff);
++  *p++ = LAI_LU32I_D | LAF_D(RID_R17) | LAF_I20((ug >> 32) & 0xfffff);
++  *p++ = LAI_LU52I_D | LAF_D(RID_R18) | LAF_J(RID_R18) | LAF_I((target >> 52) & 0xfff);
++  *p++ = LAI_LU52I_D | LAF_D(RID_R17) | LAF_J(RID_R17) | LAF_I((ug >> 52) & 0xfff);
++  *p++ = LAI_NOP;
++  *p++ = LAI_NOP;
++  *p++ = LAI_NOP;
++  *p++ = LAI_NOP;
++  *p++ = LAI_JIRL | LAF_D(RID_R0) | LAF_J(RID_R18) | LAF_I(0);
++  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
++    //*p = LAI_BEQ | LAF_D(RID_R0) | LAF_J(RID_R0) | ((page-p-1) & 0x0000ffffu);	//TODO
++    //p++;
++    //*p++ = LAI_LU12I_W | LAF_D(RID_R19) | LAF_I20((slot >> 12) & 0xfffff);		//TODO
++    *p++ = LAI_ORI  | LAF_D(RID_R19) | LAF_J(RID_R0) | LAF_I(slot & 0xfff);
++    //*p = LAI_BEQ | LAF_D(RID_ZERO) | LAF_J(RID_ZERO) | ((page-p-1) & 0x0000ffffu);
++    *p = LAI_B | LAF_I((page-p) & 0xffff) | (((page-p) >> 16) & 0x3ff);
++    p++;
++  }
++  return p;
++}
+ #else
+ /* Missing support for this architecture. */
+ #define callback_mcode_init(g, p)	UNUSED(p)
+@@ -491,6 +526,37 @@ void lj_ccallback_mcode_free(CTState *cts)
+   }
+ #endif
+ 
++#define CALLBACK_HANDLE_RET \
++  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
++    ((float *)dp)[1] = *(float *)dp;
++
++#elif LJ_TARGET_LOONGARCH64
++
++#if !LJ_ABI_SOFTFP      /* LoongArch64 hard-float */
++#define CALLBACK_HANDLE_REGARG \
++  if (isfp) { \
++    if (nfpr + n <= CCALL_NARG_FPR) { \
++      sp = &cts->cb.fpr[nfpr]; \
++      nfpr += n; \
++      goto done; \
++    } \
++  } else { \
++    if (ngpr + n <= maxgpr) { \
++      sp = &cts->cb.gpr[ngpr]; \
++      ngpr += n; \
++      goto done; \
++    } \
++  }
++#else			/* LoongArch64 soft-float */
++#define CALLBACK_HANDLE_REGARG \
++  if (ngpr + n <= maxgpr) { \
++    UNUSED(isfp); \
++    sp = (void*) &cts->cb.gpr[ngpr]; \
++    ngpr += n; \
++    goto done; \
++  }
++#endif
++
+ #define CALLBACK_HANDLE_RET \
+   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+     ((float *)dp)[1] = *(float *)dp;
+diff --git a/libs/luajit/LuaJIT-src/src/lj_crecord.c b/libs/luajit/LuaJIT-src/src/lj_crecord.c
+index e32ae23..89a70fa 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_crecord.c
++++ b/libs/luajit/LuaJIT-src/src/lj_crecord.c
+@@ -132,7 +132,7 @@ static IRType crec_ct2irt(CTState *cts, CType *ct)
+ #define CREC_COPY_REGWIN		2
+ #elif LJ_TARGET_PPC || LJ_TARGET_MIPS
+ #define CREC_COPY_REGWIN		8
+-#else
++#else						//TODO
+ #define CREC_COPY_REGWIN		4
+ #endif
+ 
+diff --git a/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h
+new file mode 100644
+index 0000000..bf778ea
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h
+@@ -0,0 +1,384 @@
++/*
++** LoongArch instruction emitter.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2021 Loongson Technology. All rights reserved.
++*/
++
++static intptr_t get_k64val(ASMState *as, IRRef ref)
++{
++  IRIns *ir = IR(ref);
++  if (ir->o == IR_KINT64) {
++    return (intptr_t)ir_kint64(ir)->u64;
++  } else if (ir->o == IR_KGC) {
++    return (intptr_t)ir_kgc(ir);
++  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
++    return (intptr_t)ir_kptr(ir);
++  } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
++    return (intptr_t)ir_knum(ir)->u64;
++  } else {
++    lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
++    return ir->i;  /* Sign-extended. */
++  }
++}
++
++#define get_kval(as, ref)       get_k64val(as, ref)
++
++
++/* -- Emit basic instructions --------------------------------------------- */
++
++static void emit_djk(ASMState *as, LAIns lai, Reg rd, Reg rj, Reg rk)
++{
++  *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_K(rk & 0x1f);
++}
++
++#define emit_dj(as, lai, rd, rj)         emit_djk(as, (lai), (rd)&31, (rj)&31, 0)
++
++static void emit_di(ASMState *as, LAIns lai, Reg rd, int32_t i)
++{
++  *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_I20(i & 0xfffff);
++}
++
++static void emit_dji(ASMState *as, LAIns lai, Reg rd, Reg rj, int32_t i)
++{
++  *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(i);
++}
++
++static void emit_dju(ASMState *as, LAIns lai, Reg rd, Reg rj, uint32_t u)
++{
++  *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(u);
++}
++
++static void emit_dj32i(ASMState *as, Reg rd, Reg rj, int32_t i)
++{
++  if ((i>>12) == 0 || (i>>12) == 0xfffff) {
++    *--as->mcp = LAI_ADDI_D | LAF_D(rd) | LAF_J(rj) | LAF_I(i&0xfff);
++  } else {
++      emit_djk(as, LAI_ADD_D, rd, RID_R19, rj);
++      if ((i&0xfff) != 0) {
++        emit_dju(as, LAI_ORI, RID_R19, RID_R19, i&0xfff);
++      }
++      if (((i>>12)&0xfffff) != 0) {
++        emit_di(as, LAI_LU12I_W, RID_R19, (i>>12)&0xfffff);
++      }
++  }
++}
++
++static void emit_d16i(ASMState *as, Reg rd, int32_t i)
++{
++  emit_dji(as, LAI_SRAI_D, rd, rd, 16);
++  emit_dji(as, LAI_ADDU16I_D, rd, RID_ZERO, (i&0xffff));
++}
++
++static void emit_addw(ASMState *as, Reg rd, Reg rj, int32_t i)
++{
++  emit_djk(as, LAI_ADD_W, rd, rj, RID_R20);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_W, RID_R19, RID_R19, 24);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xff000000)>>24);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_W, RID_R19, RID_R19, 12);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000)>>12);
++  emit_dji(as, LAI_ORI, RID_R20, RID_R0, i&0xfff);
++}
++
++static void emit_add(ASMState *as, Reg rd, Reg rj, int64_t i)
++{
++  emit_djk(as, LAI_ADD_D, rd, rj, RID_R20);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 60);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xf000000000000000)>>60);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 48);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000000000)>>48);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 36);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000000)>>36);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 24);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000)>>24);
++  emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20);
++  emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 12);
++  emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000)>>12);
++  emit_dji(as, LAI_ORI, RID_R20, RID_R0, i&0xfff);
++}
++
++static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
++{
++  emit_dju(as, LAI_ROTRI_W, dest, src, shift);
++}
++
++static void emit_djml(ASMState *as, LAIns lai, Reg rd, Reg rj, uint32_t m, uint32_t l)
++{
++  *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(l & 0x3f) | LAF_M(m & 0x3f);
++}
++
++static void emit_b_bl(ASMState *as, LAIns lai, uint32_t i)
++{
++  *--as->mcp = lai | LAF_I(i & 0xffff) | ((i >> 16) & 0x3ff);
++}
++
++
++/* -- Emit loads/stores --------------------------------------------------- */
++
++/* Prefer rematerialization of BASE/L from global_State over spills. */
++#define emit_canremat(ref)	((ref) <= REF_BASE)
++
++/* Try to find a one step delta relative to another constant. */
++static int emit_kdelta1(ASMState *as, Reg t, intptr_t i)
++{
++  RegSet work = ~as->freeset & RSET_GPR;
++  while (work) {
++    Reg r = rset_picktop(work);
++    IRRef ref = regcost_ref(as->cost[r]);
++    lua_assert(r != t);
++    if (ref < ASMREF_L) {
++      intptr_t delta = (intptr_t)((uintptr_t)i -
++	(uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref)));
++      if (checki16(delta)) {
++	//emit_dj32i(as, t, r, delta);	// daddiu
++	emit_djk(as, LAI_ADD_D, t, r, RID_R19);
++	emit_d16i(as, RID_R19, delta);
++	return 1;
++      }
++    }
++    rset_clear(work, r);
++  }
++  return 0;  /* Failed. */
++}
++
++/* Load a 32/64 bit constant into a GPR. */
++//#define emit_loadi(as, rd, i)   emit_loadk(as, rd, i)
++//#define emit_loadu64(as, rd, i)	emit_loadk(as, rd, i)
++
++//#define emit_loada(as, r, addr)		emit_loadu64(as, (r), u64ptr((addr)))
++
++/* Load a 32 bit constant into a GPR. */
++static void emit_loadi(ASMState *as, Reg r, int32_t i)
++{
++  if (checki16(i)) {
++    //emit_ti(as, MIPSI_LI, r, i);   // MIPSI_LI = MIPSI_ADDIU
++    emit_addw(as, r, RID_R0, i);
++  } else {
++    if ((i & 0xffff)) {
++      intptr_t jgl = (intptr_t)(void *)J2G(as->J);
++      if ((uintptr_t)(i-jgl) < 65536) {
++        //emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
++        emit_addw(as, r, RID_JGL, i-jgl-32768);
++        return;
++      } else if (emit_kdelta1(as, r, i)) {
++        return;
++      } else if ((i >> 16) == 0) {
++        //emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i);
++        emit_dji(as, LAI_ORI, r, RID_R20, i&0xfff);
++        emit_dji(as, LAI_SLLI_W, RID_R20, RID_R20, 12);
++        emit_dji(as, LAI_ORI, RID_R20, RID_R0, (i>>12)&0xf);
++        return;
++      }
++      //emit_tsi(as, MIPSI_ORI, r, r, i);
++      emit_djk(as, LAI_OR, r, r, RID_R19);
++      emit_dji(as, LAI_ORI, RID_R19, RID_R20, i&0xfff);
++      emit_dji(as, LAI_SLLI_W, RID_R20, RID_R20, 12);
++      emit_dji(as, LAI_ORI, RID_R20, RID_R0, (i>>12)&0xf);
++    }
++    //emit_ti(as, MIPSI_LUI, r, (i >> 16));
++    emit_dji(as, LAI_ADDU16I_D, r, RID_R0, (i>>16)&0xffff);
++  }
++}
++
++#if LJ_64
++/* Load a 64 bit constant into a GPR. */
++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
++{
++  if (checki32((int64_t)u64)) {
++    emit_loadi(as, r, (int32_t)u64);
++  } else {
++    uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
++    if (delta < 65536) {
++      //emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
++      emit_add(as, r, RID_JGL, (int32_t)(delta-32768));
++    } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
++      return;
++    } else {
++      *--as->mcp = LAI_LU52I_D | LAF_D(r) | LAF_J(r) | LAF_I((u64>>52)&0xfff);
++      *--as->mcp = LAI_LU32I_D | LAF_D(r) | LAF_I20((u64>>32)&0xfffff);
++      *--as->mcp = LAI_ORI | LAF_D(r) | LAF_J(r) | LAF_I(u64&0xfff);
++      *--as->mcp = LAI_LU12I_W | LAF_D(r) | LAF_I20((u64>>12)&0xfffff);
++    }
++    /* TODO: There are probably more optimization opportunities. */
++  }
++}
++
++#define emit_loada(as, r, addr)         emit_loadu64(as, (r), u64ptr((addr)))
++#else
++#define emit_loada(as, r, addr)         emit_loadi(as, (r), i32ptr((addr)))
++#endif
++
++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
++static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
++
++/* Get/set from constant pointer. */
++static void emit_lsptr(ASMState *as, LAIns lai, Reg r, void *p, RegSet allow)
++{
++  intptr_t jgl = (intptr_t)(J2G(as->J));
++  intptr_t i = (intptr_t)(p);
++  Reg base;
++  if ((uint32_t)(i-jgl) < 65536) {	//TODO
++    i = i-jgl-32768;
++    base = RID_JGL;
++  } else {
++    base = ra_allock(as, i-(int16_t)i, allow);
++  }
++  // emit_dji(as, lai, r, base, i&0xfff);	/* ld.d rd, rj, si12 */
++  if ((i>>12) == 0) {
++    emit_dji(as, lai, r, base, i&0xfff);
++  }
++  else {
++    /* ld.d->ldx.d, fld.d->fldx.d, ld.s->fldx.s */
++    if (lai == LAI_LD_D)
++      lai = LAI_LDX_D;
++    else if (lai == LAI_FLD_D)
++      lai = LAI_FLDX_D;
++    else if (lai == LAI_FLD_S)
++      lai = LAI_FLDX_S;
++    emit_djk(as, lai, r, base, RID_R19);
++
++    /* move i to a GPR */
++    emit_d16i(as, RID_R19, i);
++  }
++}
++
++/* Load 64 bit IR constant into register. */
++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
++{
++  const uint64_t *k = &ir_k64(ir)->u64;
++  Reg r64 = r;
++  if (rset_test(RSET_FPR, r)) {
++    r64 = RID_TMP;
++    emit_dj(as, LAI_MOVGR2FR_D, r, r64);
++  }
++  if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
++    emit_lsptr(as, LAI_LD_D, r64, (void *)k, 0);	/*To copy a doubleword from a GPR to an FPR*/
++  else
++    emit_loadu64(as, r64, *k);
++}
++
++/* Get/set global_State fields. */
++static void emit_lsglptr2(ASMState *as, LAIns lai, Reg r, int32_t ofs)
++{
++  emit_djk(as, lai, r, RID_JGL, RID_R20);
++  emit_loadi(as, RID_R20, (ofs-32768));
++}
++
++#define emit_getgl(as, r, field) \
++  emit_lsglptr2(as, LAI_LDX_D, (r), (int32_t)offsetof(global_State, field))
++#define emit_setgl(as, r, field) \
++  emit_lsglptr2(as, LAI_STX_D, (r), (int32_t)offsetof(global_State, field))
++
++/* Trace number is determined from per-trace exit stubs. */
++#define emit_setvmstate(as, i)		UNUSED(i)
++
++/* -- Emit control-flow instructions -------------------------------------- */
++
++/* Label for internal jumps. */
++typedef MCode *MCLabel;
++
++/* Return label pointing to current PC. */
++#define emit_label(as)		((as)->mcp)
++
++static void emit_branch(ASMState *as, LAIns lai, Reg rj, Reg rd, MCode *target)
++{
++  MCode *p = as->mcp;
++  ptrdiff_t delta = target - (p - 1);
++  lua_assert(((delta + 0x8000) >> 16) == 0);
++  *--p = lai | LAF_D(rd) | LAF_J(rj) | LAF_I(((uint32_t)delta & 0xffffu));	/*BEQ BNE BGE BLZ*/
++  as->mcp = p;
++}
++
++static void emit_branch21(ASMState *as, LAIns lai, Reg rj, MCode *target)
++{
++  MCode *p = as->mcp;
++  ptrdiff_t delta = target - (p - 1);
++  lua_assert(((delta + 0x100000) >> 21) == 0);
++  *--p = lai | LAF_J(rj) | LAF_I(((uint32_t)delta & 0xffffu)) | ((uint32_t)delta & 0x1f0000u);	/*BEQZ BNEZ BCEQZ BCNEZ*/
++  as->mcp = p;
++}
++
++static void emit_jmp(ASMState *as, MCode *target)
++{
++  MCode *p = as->mcp;
++  ptrdiff_t delta = target - (p - 1);
++  emit_b_bl(as, LAI_B, (delta&0x3ffffff));	/*offs 26*/
++}
++
++#define emit_move(as, dst, src) \
++  emit_djk(as, LAI_OR, (dst), (src), RID_ZERO)
++
++static void emit_call(ASMState *as, void *target)
++{
++  RegSet pick = as->freeset & RID2RSET(RID_CFUNCADDR);
++  if (!pick) {
++    Reg r = rset_picktop(as->freeset & RSET_GPR);
++    rset_clear(as->freeset, r);
++    emit_move(as, RID_CFUNCADDR, r);
++    emit_dji(as, LAI_JIRL, RID_RA, RID_CFUNCADDR, 0);
++    //emit_dj32i(as, RID_CFUNCADDR, RID_ZERO, (intptr_t)target);
++    emit_add(as, RID_CFUNCADDR, RID_ZERO, (intptr_t)target);
++    emit_move(as, r, RID_CFUNCADDR);
++    rset_set(as->freeset, (r));
++  } else {
++    emit_dji(as, LAI_JIRL, RID_RA, RID_CFUNCADDR, 0);
++    ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
++  }
++}
++
++/* -- Emit generic operations --------------------------------------------- */
++
++/* Generic move between two regs. */
++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
++{
++  if (dst < RID_MAX_GPR)
++    emit_move(as, dst, src);
++  else
++    emit_dj(as, irt_isnum(ir->t) ? LAI_FMOV_D : LAI_FMOV_S, dst, src);
++}
++
++/* Generic load of register with base and (small) offset address. */
++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++  if (r < RID_MAX_GPR) {
++    //emit_dji(as, irt_is64(ir->t) ? LAI_LD_D : LAI_LD_W, r, base, ofs&0xfff);
++    emit_djk(as, irt_is64(ir->t) ? LAI_LDX_D : LAI_LDX_W, r, base, RID_R19);
++  } else {
++    //emit_dji(as, irt_isnum(ir->t) ? LAI_FLD_D : LAI_FLD_S, r, base, ofs&0xfff);
++    emit_djk(as, irt_isnum(ir->t) ? LAI_FLDX_D : LAI_FLDX_S, r, base, RID_R19);
++  }
++  emit_d16i(as, RID_R19, ofs);
++}
++
++/* Generic store of register with base and (small) offset address. */
++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
++{
++  if (r < RID_MAX_GPR) {
++    //emit_dji(as, irt_is64(ir->t) ? LAI_ST_D : LAI_ST_W, r, base, ofs&0xfff);
++    emit_djk(as, irt_is64(ir->t) ? LAI_STX_D : LAI_STX_W, r, base, RID_R19);
++  } else {
++    //emit_dji(as, irt_isnum(ir->t) ? LAI_FST_D : LAI_FST_S,
++	  //   (r&31), base, ofs&0xfff);
++    emit_djk(as, irt_isnum(ir->t) ? LAI_FSTX_D : LAI_FSTX_S, (r&31), base, RID_R19);
++  }
++  emit_d16i(as, RID_R19, ofs);
++}
++
++/* Add offset to pointer. */
++static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
++{
++  if (ofs) {
++    lua_assert(checki16(ofs));
++    //emit_dji(as, LAI_ADDI_D, r, r, ofs&0xfff);	//TODO 12bit -> 16bit
++    emit_djk(as, LAI_ADD_D, r, r, RID_R19);
++    emit_d16i(as, RID_R19, ofs);
++  }
++}
++
++
++#define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))
+diff --git a/libs/luajit/LuaJIT-src/src/lj_frame.h b/libs/luajit/LuaJIT-src/src/lj_frame.h
+index 19c49a4..d129530 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_frame.h
++++ b/libs/luajit/LuaJIT-src/src/lj_frame.h
+@@ -264,6 +264,24 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
+ #endif
+ #define CFRAME_OFS_MULTRES	0
+ #define CFRAME_SHIFT_MULTRES	3
++#elif LJ_TARGET_LOONGARCH64		//TODO
++#if LJ_ARCH_HASFPU
++#define CFRAME_OFS_ERRF		188
++#define CFRAME_OFS_NRES		184
++#define CFRAME_OFS_PREV		176
++#define CFRAME_OFS_L		168
++#define CFRAME_OFS_PC		160
++#define CFRAME_SIZE		192
++#else
++#define CFRAME_OFS_ERRF		124
++#define CFRAME_OFS_NRES		120
++#define CFRAME_OFS_PREV		112
++#define CFRAME_OFS_L		104
++#define CFRAME_OFS_PC		96
++#define CFRAME_SIZE		128
++#endif
++#define CFRAME_OFS_MULTRES	0
++#define CFRAME_SHIFT_MULTRES	3
+ #else
+ #error "Missing CFRAME_* definitions for this architecture"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_gdbjit.c b/libs/luajit/LuaJIT-src/src/lj_gdbjit.c
+index c219ffa..5ac5db7 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_gdbjit.c
++++ b/libs/luajit/LuaJIT-src/src/lj_gdbjit.c
+@@ -306,6 +306,9 @@ enum {
+ #elif LJ_TARGET_MIPS
+   DW_REG_SP = 29,
+   DW_REG_RA = 31,
++#elif LJ_TARGET_LOONGARCH64
++  DW_REG_SP = 3,
++  DW_REG_RA = 1,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = {
+   .machine = 20,
+ #elif LJ_TARGET_MIPS
+   .machine = 8,
++#elif LJ_TARGET_LOONGARCH64
++  .machine = 258,
+ #else
+ #error "Unsupported target architecture"
+ #endif
+@@ -591,6 +596,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
+       for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); }
+       for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); }
+     }
++#elif LJ_TARGET_LOONGARCH64
++    {
++      int i;
++      DB(DW_CFA_offset|30); DUV(2);	//TODO
++      for (i = 30; i >= 23; i--) { DB(DW_CFA_offset|i); DUV(3+(30-i)); }
++      for (i = 31; i >= 24; i--) { DB(DW_CFA_offset|32|i); DUV(42-i); }		//TODO
++    }
+ #else
+ #error "Unsupported target architecture"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_ircall.h b/libs/luajit/LuaJIT-src/src/lj_ircall.h
+index 973c36e..e136526 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_ircall.h
++++ b/libs/luajit/LuaJIT-src/src/lj_ircall.h
+@@ -84,6 +84,12 @@ typedef struct CCallInfo {
+ #define IRCALLCOND_SOFTFP_MIPS(x)	NULL
+ #endif
+ 
++#if LJ_SOFTFP && LJ_TARGET_LOONGARCH64
++#define IRCALLCOND_SOFTFP_LOONGARCH64(x)	x
++#else
++#define IRCALLCOND_SOFTFP_LOONGARCH64(x)	NULL
++#endif
++
+ #define LJ_NEED_FP64	(LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
+ 
+ #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
+@@ -272,7 +278,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
+ #define fp64_f2l __aeabi_f2lz
+ #define fp64_f2ul __aeabi_f2ulz
+ #endif
+-#elif LJ_TARGET_MIPS
++#elif LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ #define softfp_add __adddf3
+ #define softfp_sub __subdf3
+ #define softfp_mul __muldf3
+@@ -308,7 +314,7 @@ extern float softfp_ui2f(uint32_t a);
+ extern int32_t softfp_f2i(float a);
+ extern uint32_t softfp_f2ui(float a);
+ #endif
+-#if LJ_TARGET_MIPS
++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ extern double lj_vm_sfmin(double a, double b);
+ extern double lj_vm_sfmax(double a, double b);
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_jit.h b/libs/luajit/LuaJIT-src/src/lj_jit.h
+index 92054e3..db4b7f4 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_jit.h
++++ b/libs/luajit/LuaJIT-src/src/lj_jit.h
+@@ -55,6 +55,13 @@
+ #else
+ #define JIT_F_CPUSTRING		"\010MIPS64R2"
+ #endif
++
++#elif LJ_TARGET_LOONGARCH64
++#define JIT_F_CPU		0x00000010
++#define JIT_F_GS464V            (JIT_F_CPU << 0)
++#define JIT_F_CPU_FIRST		JIT_F_GS464V
++#define JIT_F_CPUSTRING         "\6GS464V"
++
+ #else
+ #define JIT_F_CPU_FIRST		0
+ #define JIT_F_CPUSTRING		""
+@@ -335,7 +342,7 @@ enum {
+   LJ_K64_M2P64_31 = LJ_K64_M2P64,
+ #endif
+ #endif
+-#if LJ_TARGET_MIPS
++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+   LJ_K64_2P31,		/* 2^31 */
+ #if LJ_64
+   LJ_K64_2P63,		/* 2^63 */
+@@ -353,10 +360,10 @@ enum {
+   LJ_K32_2P52_2P31,	/* 2^52 + 2^31 */
+   LJ_K32_2P52,		/* 2^52 */
+ #endif
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+   LJ_K32_2P31,		/* 2^31 */
+ #endif
+-#if LJ_TARGET_MIPS64
++#if LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
+   LJ_K32_2P63,		/* 2^63 */
+   LJ_K32_M2P64,		/* -2^64 */
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_target.h b/libs/luajit/LuaJIT-src/src/lj_target.h
+index 8dcae95..52a0a7e 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_target.h
++++ b/libs/luajit/LuaJIT-src/src/lj_target.h
+@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
+ /* Bitset for registers. 32 registers suffice for most architectures.
+ ** Note that one set holds bits for both GPRs and FPRs.
+ */
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_LOONGARCH64
+ typedef uint64_t RegSet;
+ #else
+ typedef uint32_t RegSet;
+@@ -69,7 +69,7 @@ typedef uint32_t RegSet;
+ #define rset_set(rs, r)		(rs |= RID2RSET(r))
+ #define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
+ #define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_LOONGARCH64
+ #define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
+ #define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
+ #else
+@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
+ #include "lj_target_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_target_mips.h"
++#elif LJ_TARGET_LOONGARCH64
++#include "lj_target_loongarch64.h"
+ #else
+ #error "Missing include for target CPU"
+ #endif
+diff --git a/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h
+new file mode 100644
+index 0000000..6d96b45
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h
+@@ -0,0 +1,339 @@
++/*
++** Definitions for LoongArch CPUs.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2021 Loongson Technology. All rights reserved.
++*/
++
++#ifndef _LJ_TARGET_LOONGARCH_H
++#define _LJ_TARGET_LOONGARCH_H
++
++/* -- Registers IDs ------------------------------------------------------- */
++
++#define GPRDEF(_) \
++  _(R0) _(RA) _(R2) _(SP) _(R4) _(R5) _(R6) _(R7) \
++  _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
++  _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
++  _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31)
++#if LJ_SOFTFP
++#define FPRDEF(_)
++#else
++#define FPRDEF(_) \
++  _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
++  _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
++  _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
++  _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
++#endif
++#define VRIDDEF(_)
++
++#define RIDENUM(name)	RID_##name,
++
++enum {
++  GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */
++  FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */
++  RID_MAX,
++  RID_ZERO = RID_R0,
++  RID_TMP = RID_RA,		//TODO
++  RID_GP = RID_R31,
++
++  /* Calling conventions. */
++  RID_RET = RID_R4,
++
++  RID_RETHI = RID_R18,
++  RID_RETLO = RID_R17,
++
++#if LJ_SOFTFP
++  RID_FPRET = RID_R17,
++#else
++  RID_FPRET = RID_F0,
++#endif
++  RID_CFUNCADDR = RID_R16,
++
++  /* These definitions must match with the *.dasc file(s): */
++  RID_BASE = RID_R23,		/* Interpreter BASE. */
++  RID_LPC = RID_R25,		/* Interpreter PC. */
++  RID_DISPATCH = RID_R26,	/* Interpreter DISPATCH table. */
++  RID_LREG = RID_R27,		/* Interpreter L. */
++  RID_JGL = RID_R22,		/* On-trace: global_State + 32768. */
++
++  /* Register ranges [min, max) and number of registers. */
++  RID_MIN_GPR = RID_R0,
++  RID_MAX_GPR = RID_R31+1,
++  RID_MIN_FPR = RID_MAX_GPR,
++#if LJ_SOFTFP
++  RID_MAX_FPR = RID_MIN_FPR,
++#else
++  RID_MAX_FPR = RID_F31+1,
++#endif
++  RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
++  RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
++};
++
++#define RID_NUM_KREF		RID_NUM_GPR
++#define RID_MIN_KREF		RID_R0
++
++/* -- Register sets ------------------------------------------------------- */
++
++/* Make use of all registers, except ZERO, TMP, SP, JGL. */
++#define RSET_FIXED \
++  (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_R2)|\
++   RID2RSET(RID_SP)|RID2RSET(RID_JGL)|RID2RSET(RID_R31)|\
++   RID2RSET(RID_R19)|RID2RSET(RID_R20))
++#define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
++#if LJ_SOFTFP
++#define RSET_FPR		0
++#else
++#define RSET_FPR		RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
++#endif
++#define RSET_ALL		(RSET_GPR|RSET_FPR)
++#define RSET_INIT		RSET_ALL
++
++/* scratch register. */
++#define RSET_SCRATCH_GPR \
++  (RSET_RANGE(RID_R4, RID_R19))
++#if LJ_SOFTFP
++#define RSET_SCRATCH_FPR	0
++#else
++#define RSET_SCRATCH_FPR	RSET_RANGE(RID_F0, RID_F23+1)
++#endif
++#define RSET_SCRATCH		(RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
++#define REGARG_FIRSTGPR		RID_R4
++#define REGARG_LASTGPR		RID_R11
++#define REGARG_NUMGPR		8
++#if LJ_ABI_SOFTFP
++#define REGARG_FIRSTFPR		0
++#define REGARG_LASTFPR		0
++#define REGARG_NUMFPR		0
++#else
++#define REGARG_FIRSTFPR		RID_F0
++#define REGARG_LASTFPR		RID_F7
++#define REGARG_NUMFPR		8
++#endif
++
++/* -- Spill slots --------------------------------------------------------- */
++
++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
++**
++** SPS_FIXED: Available fixed spill slots in interpreter frame.
++** This definition must match with the *.dasc file(s).
++**
++** SPS_FIRST: First spill slot for general use.
++*/
++#define SPS_FIXED	4
++#define SPS_FIRST	4	//TODO
++
++#define SPOFS_TMP	0
++
++#define sps_scale(slot)		(4 * (int32_t)(slot))
++#define sps_align(slot)		(((slot) - SPS_FIXED + 1) & ~1)	//TODO
++
++/* -- Exit state ---------------------------------------------------------- */
++
++/* This definition must match with the *.dasc file(s). */
++typedef struct {
++#if !LJ_SOFTFP
++  lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */
++#endif
++  intptr_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
++  int32_t spill[256];		/* Spill slots. */
++} ExitState;
++
++/* Highest exit + 1 indicates stack check. */
++#define EXITSTATE_CHECKEXIT	1
++
++/* Return the address of a per-trace exit stub. */
++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
++{
++  while (*p == 0x03400000) p++;		/* Skip LAI_NOP. */
++  return p;
++}
++/* Avoid dependence on lj_jit.h if only including lj_target.h. */
++#define exitstub_trace_addr(T, exitno) \
++  exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode))
++
++/* -- Instructions -------------------------------------------------------- */
++
++/* Instruction fields. */
++#define LAF_D(r)	(r)
++#define LAF_J(r)	((r) << 5)
++#define LAF_K(r)	((r) << 10)
++#define LAF_A(r)	((r) << 15)
++#define LAF_FC(r)	((r) << 5)
++#define LAF_I(n)	((n) << 10)
++#define LAF_I20(n)	((n) << 5)
++#define LAF_M(n)	((n) << 16)
++
++/* Check for valid field range. */
++#define LAF_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
++
++typedef enum LAIns {
++/* Integer instructions. */
++  LAI_MOVE = 0x00150000,
++  LAI_NOP = 0x03400000,
++
++  LAI_LU = 0x03800000,
++
++  LAI_AND = 0x00148000,
++  LAI_ANDI = 0x03400000,
++  LAI_OR = 0x00150000,
++  LAI_ORI = 0x03800000,
++  LAI_XOR = 0x00158000,
++  LAI_XORI = 0x03c00000,
++  LAI_NOR = 0x00140000,
++
++  LAI_SLT = 0x00120000,
++  LAI_SLTU = 0x00128000,
++  LAI_SLTI = 0x02000000,
++  LAI_SLTUI = 0x02400000,
++
++  LAI_ADD_W = 0x00100000,
++  LAI_ADDI_W = 0x02800000,
++  LAI_SUB_W = 0x00110000,
++  LAI_MUL_W = 0x001c0000,
++  LAI_MULH_W = 0x001c8000,
++  LAI_DIV_W = 0x00200000,
++  LAI_DIV_WU = 0x00210000,
++
++  LAI_SLLI_W = 0x00408000,
++  LAI_SRLI_W = 0x00448000,
++  LAI_SRAI_W = 0x00488000,
++  LAI_ROTRI_W = 0x004c8000,
++  LAI_ROTRI_D = 0x004d0000,
++  LAI_SLL_W = 0x00170000,
++  LAI_SRL_W = 0x00178000,
++  LAI_SRA_W = 0x00180000,
++  LAI_ROTR_W = 0x001b0000,
++  LAI_ROTR_D = 0x001b8000,
++
++  LAI_EXT_W_B = 0x00005c00,
++  LAI_EXT_W_H = 0x00005800,
++  LAI_REVB_2H = 0x00003000,
++  LAI_REVB_4H = 0x00003400,
++
++  LAI_ALSL_W = 0x00040000,
++  LAI_ALSL_D = 0x002c0000,
++
++  LAI_B = 0x50000000,
++  LAI_BL = 0x54000000,
++  LAI_JIRL = 0x4c000000,
++
++  LAI_BEQ = 0x58000000,
++  LAI_BNE = 0x5c000000,
++  LAI_BLT = 0x60000000,
++  LAI_BGE = 0x64000000,
++  LAI_BCEQZ = 0x48000000,
++  LAI_BCNEZ = 0x48000100,
++
++  /* Load/store instructions. */
++  LAI_LD_W = 0x28800000,
++  LAI_LD_D = 0x28c00000,
++  LAI_ST_W = 0x29800000,
++  LAI_ST_D = 0x29c00000,
++  LAI_LD_B = 0x28000000,
++  LAI_ST_B = 0x29000000,
++  LAI_LD_H = 0x28400000,
++  LAI_ST_H = 0x29400000,
++  LAI_LD_BU = 0x2a000000,
++  LAI_LD_HU = 0x2a400000,
++  LAI_LDX_B = 0x38000000,
++  LAI_LDX_BU = 0x38200000,
++  LAI_LDX_H = 0x38040000,
++  LAI_LDX_HU = 0x38240000,
++  LAI_LDX_D = 0x380c0000,
++  LAI_STX_D = 0x381c0000,
++  LAI_LDX_W = 0x38080000,
++  LAI_STX_W = 0x38180000,
++  LAI_FLD_S = 0x2b000000,
++  LAI_FST_S = 0x2b400000,
++  LAI_FLD_D = 0x2b800000,
++  LAI_FST_D = 0x2bc00000,
++  LAI_FLDX_D = 0x38340000,
++  LAI_FLDX_S = 0x38300000,
++  LAI_FSTX_D = 0x383c0000,
++  LAI_FSTX_S = 0x38380000,
++
++  /* LA64 instructions. */
++  LAI_ADD_D = 0x00108000,
++  LAI_ADDI_D = 0x02c00000,
++  LAI_ADDU16I_D = 0x10000000,
++  LAI_LU12I_W = 0x14000000,
++  LAI_LU32I_D = 0x16000000,
++  LAI_LU52I_D = 0x3000000,
++  LAI_SUB_D = 0x00118000,
++  LAI_DIV_D = 0x00220000,
++  LAI_DIV_DU = 0x00230000,
++  LAI_MUL_D = 0x001d8000,
++
++  LAI_SLLI_D = 0x00410000,
++  LAI_SRLI_D = 0x00450000,
++  LAI_SLL_D = 0x00188000,
++  LAI_SRL_D = 0x00190000,
++  LAI_SRAI_D = 0x00490000,
++  LAI_SRA_D = 0x00198000,
++  LAI_REVH_D = 0x00004400,
++
++  /* Extract/insert instructions. */
++  LAI_BSTRPICK_D = 0x00c00000,
++  LAI_BSTRINS_W = 0x00600000,
++  LAI_BSTRINS_D = 0x00800000,
++
++  LAI_MASKEQZ = 0x00130000,
++  LAI_MASKNEZ = 0x00138000,
++
++  LAI_FRINT_S = 0x011e4400,
++  LAI_FRINT_D = 0x011e4800,
++  LAI_FTINTRM_L_D = 0x011a2800,
++  LAI_FTINTRP_L_D = 0x011a6800,
++  LAI_FTINTRNE_L_D = 0x011ae800,
++
++  /* FP instructions. */
++  LAI_FMOV_S = 0x01149400,
++  LAI_FMOV_D = 0x01149800,
++
++  LAI_FABS_D = 0x01140800,
++  LAI_FNEG_D = 0x01141800,
++
++  LAI_FADD_D = 0x01010000,
++  LAI_FSUB_D = 0x01030000,
++  LAI_FMUL_D = 0x01050000,
++  LAI_FDIV_D = 0x01070000,
++  LAI_FSQRT_D = 0x01144800,
++
++  LAI_FMIN_D = 0x010b0000,
++  LAI_FMAX_D = 0x01090000,
++
++  LAI_FADD_S = 0x01008000,
++  LAI_FSUB_S = 0x01028000,
++
++  LAI_FCVT_D_S = 0x01192400,
++  LAI_FTINT_W_S = 0x011b0400,
++  LAI_FCVT_S_D = 0x01191800,
++  LAI_FTINT_W_D = 0x011b0800,
++  LAI_FFINT_S_W = 0x011d1000,
++  LAI_FFINT_D_W = 0x011d2000,
++  LAI_FFINT_S_L = 0x011d1800,
++  LAI_FFINT_D_L = 0x011d2800,
++
++  LAI_FTINTRZ_W_S = 0x011a8400,
++  LAI_FTINTRZ_W_D = 0x011a8800,
++  LAI_FTINTRZ_L_S = 0x011aa400,
++  LAI_FTINTRZ_L_D = 0x011aa800,
++  LAI_FTINTRM_W_S = 0x011a0400,
++  LAI_FTINTRM_W_D = 0x011a0800,
++
++  LAI_MOVFR2GR_S = 0x0114b400,
++  LAI_MOVGR2FR_W = 0x0114a400,
++  LAI_MOVGR2FR_D = 0x0114a800,
++  LAI_MOVFR2GR_D = 0x0114b800,
++
++  LAI_FCMP_CEQ_D = 0x0c220000,
++  LAI_FCMP_CLT_S = 0x0c110000,
++  LAI_FCMP_CLT_D = 0x0c210000,
++  LAI_FCMP_CLE_D = 0x0c230000,
++  LAI_FCMP_CULE_D = 0x0c270000,
++  LAI_FCMP_CULT_D = 0x0c250000,
++  LAI_FCMP_CNE_D = 0x0c280000, 
++  LAI_FSEL = 0x0d000000,
++} LAIns;
++
++#endif
++
+diff --git a/libs/luajit/LuaJIT-src/src/lj_trace.c b/libs/luajit/LuaJIT-src/src/lj_trace.c
+index d85b47f..021fd49 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_trace.c
++++ b/libs/luajit/LuaJIT-src/src/lj_trace.c
+@@ -325,17 +325,17 @@ void lj_trace_initstate(global_State *g)
+   J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
+   J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
+ #endif
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64
+   J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+ #endif
+ #if LJ_TARGET_PPC
+   J->k32[LJ_K32_2P52_2P31] = 0x59800004;
+   J->k32[LJ_K32_2P52] = 0x59800000;
+ #endif
+-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+   J->k32[LJ_K32_2P31] = 0x4f000000;
+ #endif
+-#if LJ_TARGET_MIPS
++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+   J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
+ #if LJ_64
+   J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
+diff --git a/libs/luajit/LuaJIT-src/src/lj_vm.h b/libs/luajit/LuaJIT-src/src/lj_vm.h
+index 1cc7eed..8bad4e6 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_vm.h
++++ b/libs/luajit/LuaJIT-src/src/lj_vm.h
+@@ -54,7 +54,8 @@ LJ_ASMF void lj_vm_exit_handler(void);
+ LJ_ASMF void lj_vm_exit_interp(void);
+ 
+ /* Internal math helper functions. */
+-#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
++#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)\
++|| (LJ_TARGET_LOONGARCH64 && LJ_ABI_SOFTFP)
+ #define lj_vm_floor	floor
+ #define lj_vm_ceil	ceil
+ #else
+diff --git a/libs/luajit/LuaJIT-src/src/lj_vmmath.c b/libs/luajit/LuaJIT-src/src/lj_vmmath.c
+index b231d3e..8484220 100644
+--- a/libs/luajit/LuaJIT-src/src/lj_vmmath.c
++++ b/libs/luajit/LuaJIT-src/src/lj_vmmath.c
+@@ -57,7 +57,7 @@ double lj_vm_foldarith(double x, double y, int op)
+   }
+ }
+ 
+-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64
+ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
+ {
+   uint32_t y, ua, ub;
+diff --git a/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc b/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc
+new file mode 100644
+index 0000000..b91092a
+--- /dev/null
++++ b/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc
+@@ -0,0 +1,5219 @@
++|// Low-level VM code for LoongArch CPUs.
++|// Bytecode interpreter, fast functions and helper functions.
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2021 Loongson Technology. All rights reserved.
++|
++|.arch loongarch64
++|.section code_op, code_sub
++|
++|.actionlist build_actionlist
++|.globals GLOB_
++|.globalnames globnames
++|.externnames extnames
++|
++|// Note: The ragged indentation of the instructions is intentional.
++|//       The starting columns indicate data dependencies.
++|
++|//-----------------------------------------------------------------------
++|
++|// Fixed register assignments for the interpreter.
++|// Don't use: r0 = 0, r1 = ra, r2 = tp, r3 = sp, r21 = reserved
++|
++|.macro .FPU, a, b, c
++|.if FPU
++|  a, b, c
++|.endif
++|.endmacro
++|
++|.macro .FPU2, a, b
++|.if FPU
++|  a, b
++|.endif
++|.endmacro
++|
++|.macro .LI, a, b
++|  addu16i.d r20, r0, b
++|  srai.d r20, r20, 16
++|  or a, r0, r20
++|.endmacro
++|
++|.macro .LUI, a, b
++|  addi.w a, r0, b>>5
++|  slli.w a, a, 5
++|  ori a, a, b&0x1f
++|  slli.w a, a, 16
++|.endmacro
++|
++|.macro .STXW, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  stx.w a, b, r20
++|.endmacro
++|
++|.macro .STXD, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  stx.d a, b, r20
++|.endmacro
++|
++|.macro .LDXW, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  ldx.w a, b, r20
++|.endmacro
++|
++|.macro .LDXD, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  ldx.d a, b, r20
++|.endmacro
++|
++|.macro .LDXBU, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  ldx.bu a, b, r20
++|.endmacro
++|
++|.macro .DADDIU, a, b, c
++|  addu16i.d r20, r0, c
++|  srai.d r20, r20, 16
++|  add.d a, b, r20
++|.endmacro
++|
++|// The following must be C callee-save (but BASE is often refetched).
++|.define BASE,		r23	// Base of current Lua stack frame.
++|.define KBASE,		r24	// Constants of current Lua function.
++|.define PC,		r25	// Next PC.
++|.define DISPATCH,	r26	// Opcode dispatch table.
++|.define LREG,		r27	// Register holding lua_State (also in SAVE_L).
++|.define MULTRES,	r28	// Size of multi-result: (nresults+1)*8.
++|
++|.define JGL,		r22	// On-trace: global_State + 32768.
++|
++|// Constants for type-comparisons, stores and conversions. C callee-save.
++|.define TISNIL,	r22
++|.define TISNUM,	r29
++|.if FPU
++|.define TOBIT,		f30	// 2^52 + 2^51.
++|.endif
++|
++|// The following temporaries are not saved across C calls, except for RA.
++|.define RA,		r30	// Callee-save.
++|.define RB,		r8
++|.define RC,		r9
++|.define RD,		r10
++|.define INS,		r11
++|
++|.define AT,		r19
++|.define TMP0,		r12
++|.define TMP1,		r13
++|.define TMP2,		r14
++|.define TMP3,		r15
++|
++|// Loongarch lp64 calling convention.
++|.define CFUNCADDR,	r16
++|.define CARG1,		r4
++|.define CARG2,		r5
++|.define CARG3,		r6
++|.define CARG4,		r7
++|.define CARG5,		r8
++|.define CARG6,		r9
++|.define CARG7,		r10
++|.define CARG8,		r11
++|
++|.define CRET1,		r4
++|.define CRET2,		r5
++|
++|.if FPU
++|.define FARG1,		f0
++|.define FARG2,		f1
++|.define FARG3,		f2
++|.define FARG4,		f3
++|.define FARG5,		f4
++|.define FARG6,		f5
++|.define FARG7,		f6
++|.define FARG8,		f7
++|
++|.define FRET1,		f22
++|.define FRET2,		f23
++|
++|.define FTMP0,		f18
++|.define FTMP1,		f19
++|.define FTMP2,		f20
++|
++|.define FCC0,		fcc0
++|.define FCC1,		fcc1
++|.endif
++|
++|// Stack layout while in interpreter. Must match with lj_frame.h.
++|.if FPU		// LoongArch64 hard-float.
++|
++|.define CFRAME_SPACE,	192	// Delta for sp.
++|
++|//----- 16 byte aligned, <-- sp entering interpreter
++|.define SAVE_ERRF,	188	// 32 bit values.
++|.define SAVE_NRES,	184
++|.define SAVE_CFRAME,	176	// 64 bit values.
++|.define SAVE_L,	168
++|.define SAVE_PC,	160
++|//----- 16 byte aligned
++|.define SAVE_GPR_,	80	// .. 80+10*8: 64 bit GPR saves.
++|.define SAVE_FPR_,	16	// .. 16+8*8: 64 bit FPR saves.
++|
++|.else			// LoongArch64 soft-float
++|
++|.define CFRAME_SPACE,	128	// Delta for sp.
++|
++|//----- 16 byte aligned, <-- sp entering interpreter
++|.define SAVE_ERRF,	124	// 32 bit values.
++|.define SAVE_NRES,	120
++|.define SAVE_CFRAME,	112	// 64 bit values.
++|.define SAVE_L,	104
++|.define SAVE_PC,	96
++|//----- 16 byte aligned
++|.define SAVE_GPR_,	16	// .. 16+10*8: 64 bit GPR saves.
++|
++|.endif
++|
++|.define TMPX,		8	// Unused by interpreter, temp for JIT code.
++|.define TMPD,		0
++|//----- 16 byte aligned
++|
++|.define TMPD_OFS,	0
++|
++|//.define SAVE_MULTRES,	sp, TMPD
++|
++|//-----------------------------------------------------------------------
++|
++|.macro saveregs
++|  addi.d sp, sp, -CFRAME_SPACE
++|  st.d ra, SAVE_GPR_+9*8(sp)
++|  st.d r22, SAVE_GPR_+8*8(sp)
++|   .FPU2 fst.d f31, SAVE_FPR_+7*8(sp)
++|  st.d r30, SAVE_GPR_+7*8(sp)
++|   .FPU2 fst.d f30, SAVE_FPR_+6*8(sp)
++|  st.d r29, SAVE_GPR_+6*8(sp)
++|   .FPU2 fst.d f29, SAVE_FPR_+5*8(sp)
++|  st.d r28, SAVE_GPR_+5*8(sp)
++|   .FPU2 fst.d f28, SAVE_FPR_+4*8(sp)
++|  st.d r27, SAVE_GPR_+4*8(sp)
++|   .FPU2 fst.d f27, SAVE_FPR_+3*8(sp)
++|  st.d r26, SAVE_GPR_+3*8(sp)
++|   .FPU2 fst.d f26, SAVE_FPR_+2*8(sp)
++|  st.d r25, SAVE_GPR_+2*8(sp)
++|   .FPU2 fst.d f25, SAVE_FPR_+1*8(sp)
++|  st.d r24, SAVE_GPR_+1*8(sp)
++|   .FPU2 fst.d f24, SAVE_FPR_+0*8(sp)
++|  st.d r23, SAVE_GPR_+0*8(sp)
++|.endmacro
++|
++|.macro restoreregs_ret
++|  ld.d ra, SAVE_GPR_+9*8(sp)
++|  ld.d r22, SAVE_GPR_+8*8(sp)
++|  ld.d r30, SAVE_GPR_+7*8(sp)
++|   .FPU2 fld.d f31, SAVE_FPR_+7*8(sp)
++|  ld.d r29, SAVE_GPR_+6*8(sp)
++|   .FPU2 fld.d f30, SAVE_FPR_+6*8(sp)
++|  ld.d r28, SAVE_GPR_+5*8(sp)
++|   .FPU2 fld.d f29, SAVE_FPR_+5*8(sp)
++|  ld.d r27, SAVE_GPR_+4*8(sp)
++|   .FPU2 fld.d f28, SAVE_FPR_+4*8(sp)
++|  ld.d r26, SAVE_GPR_+3*8(sp)
++|   .FPU2 fld.d f27, SAVE_FPR_+3*8(sp)
++|  ld.d r25, SAVE_GPR_+2*8(sp)
++|   .FPU2 fld.d f26, SAVE_FPR_+2*8(sp)
++|  ld.d r24, SAVE_GPR_+1*8(sp)
++|   .FPU2 fld.d f25, SAVE_FPR_+1*8(sp)
++|  ld.d r23, SAVE_GPR_+0*8(sp)
++|   .FPU2 fld.d f24, SAVE_FPR_+0*8(sp)
++|  addi.d sp, sp, CFRAME_SPACE
++|  jirl r0, ra, 0
++|.endmacro
++|
++|// Type definitions. Some of these are only used for documentation.
++|.type L,		lua_State,	LREG
++|.type GL,		global_State
++|.type TVALUE,		TValue
++|.type GCOBJ,		GCobj
++|.type STR,		GCstr
++|.type TAB,		GCtab
++|.type LFUNC,		GCfuncL
++|.type CFUNC,		GCfuncC
++|.type PROTO,		GCproto
++|.type UPVAL,		GCupval
++|.type NODE,		Node
++|.type NARGS8,		int
++|.type TRACE,		GCtrace
++|.type SBUF,		SBuf
++|
++|//-----------------------------------------------------------------------
++|
++|// Trap for not-yet-implemented parts.	TODO
++|.macro NYI; .long 0xf0f0f0f0; .endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Access to frame relative to BASE.
++|.define FRAME_PC,	-8
++|.define FRAME_FUNC,	-16
++|
++|//-----------------------------------------------------------------------
++|
++|// Endian-specific defines. LoongArch is little endian. TODO
++|.define HI,		4
++|.define LO,		0
++|.define OFS_RD,	2
++|.define OFS_RA,	1
++|.define OFS_OP,	0
++|
++|// Instruction decode.
++|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
++|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
++|.macro decode_OP8b, dst; slli.w dst, dst, 3; .endmacro
++|.macro decode_RC8a, dst, ins; srli.w dst, ins, 13; .endmacro
++|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
++|.macro decode_RD4b, dst; slli.w dst, dst, 2; .endmacro
++|.macro decode_RA8a, dst, ins; srli.w dst, ins, 5; .endmacro
++|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
++|.macro decode_RB8a, dst, ins; srli.w dst, ins, 21; .endmacro
++|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
++|.macro decode_RD8a, dst, ins; srli.w dst, ins, 16; .endmacro
++|.macro decode_RD8b, dst; slli.w dst, dst, 3; .endmacro
++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
++|
++|// Instruction fetch.
++|.macro ins_NEXT1
++|  ld.w INS, 0(PC)
++|   addi.d PC, PC, 4
++|.endmacro
++|// Instruction decode+dispatch.
++|.macro ins_NEXT2
++|  decode_OP8a TMP1, INS
++|  decode_OP8b TMP1
++|  add.d TMP0, DISPATCH, TMP1
++|   decode_RD8a RD, INS
++|  ld.d AT, 0(TMP0)
++|   decode_RA8a RA, INS
++|   decode_RD8b RD
++|  decode_RA8b RA
++|  jirl r0, AT, 0
++|.endmacro
++|.macro ins_NEXT
++|  ins_NEXT1
++|  ins_NEXT2
++|.endmacro
++|
++|// Instruction footer.
++|.if 1
++|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
++|  .define ins_next, ins_NEXT
++|  .define ins_next_, ins_NEXT
++|  .define ins_next1, ins_NEXT1
++|  .define ins_next2, ins_NEXT2
++|.else
++|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
++|  // Affects only certain kinds of benchmarks (and only with -j off).
++|  .macro ins_next
++|    b ->ins_next
++|  .endmacro
++|  .macro ins_next1
++|  .endmacro
++|  .macro ins_next2
++|    b ->ins_next
++|  .endmacro
++|  .macro ins_next_
++|  ->ins_next:
++|    ins_NEXT
++|  .endmacro
++|.endif
++|
++|// Call decode and dispatch.
++|.macro ins_callt
++|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++|  ld.d PC, LFUNC:RB->pc
++|  ld.w INS, 0(PC)
++|   addi.d PC, PC, 4
++|  decode_OP8a TMP1, INS
++|   decode_RA8a RA, INS
++|  decode_OP8b TMP1
++|   decode_RA8b RA
++|  add.d TMP0, DISPATCH, TMP1
++|  ld.d TMP0, 0(TMP0)
++|  add.d RA, RA, BASE
++|  jirl r0, TMP0, 0
++|.endmacro
++|
++|.macro ins_call
++|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
++|  st.d PC, FRAME_PC(BASE)
++|  ins_callt
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|.macro branch_RD
++|  srli.w TMP0, RD, 1
++|  .LUI AT, (-(BCBIAS_J*4 >> 16) & 65535)
++|  add.w TMP0, TMP0, AT
++|  add.d PC, PC, TMP0
++|.endmacro
++|
++|// Assumes DISPATCH is relative to GL.
++#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
++#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
++|
++#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
++|
++|.macro hotcheck, delta, target
++|  srli.d TMP1, PC, 1
++|  andi TMP1, TMP1, 126
++|  add.d TMP1, TMP1, DISPATCH
++|  ld.hu TMP2, GG_DISP2HOT(TMP1)
++|  addi.w TMP2, TMP2, -delta
++|  st.h TMP2, GG_DISP2HOT(TMP1)
++|  blt TMP2, r0, target
++|.endmacro
++|
++|.macro hotloop
++|  hotcheck HOTCOUNT_LOOP, ->vm_hotloop
++|.endmacro
++|
++|.macro hotcall
++|  hotcheck HOTCOUNT_CALL, ->vm_hotcall
++|.endmacro
++|
++|// Set current VM state. Uses TMP0.
++|.macro li_vmstate, st; addi.w TMP0, r0, ~LJ_VMST_..st; .endmacro
++|.macro st_vmstate; .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate); .endmacro
++|
++|// Move table write barrier back. Overwrites mark and tmp.
++|.macro barrierback, tab, mark, tmp, target
++|  .LDXD tmp, DISPATCH, DISPATCH_GL(gc.grayagain)
++|   andi mark, mark, ~LJ_GC_BLACK & 255		// black2gray(tab)
++|  .STXD tab, DISPATCH, DISPATCH_GL(gc.grayagain)
++|   st.b mark, tab->marked
++|  st.d tmp, tab->gclist
++|  beq r0, r0, target
++|.endmacro
++|
++|// Clear type tag. Isolate lowest 47 bits of reg.
++|.macro cleartp, reg; bstrpick.d reg, reg, 46, 0; .endmacro
++|.macro cleartp, dst, reg; bstrpick.d dst, reg, 46, 0; .endmacro
++|
++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
++|.macro settp, dst, tp; bstrins.d dst, tp, 63, 47; .endmacro
++|
++|// Extract (negative) type tag.
++|.macro gettp, dst, src; srai.d dst, src, 47; .endmacro
++|
++|// Macros to check the TValue type and extract the GCobj. Branch on failure.
++|.macro checktp, reg, tp, target
++|  gettp AT, reg
++|  addi.d AT, AT, tp
++|  cleartp reg
++|  bnez AT, target
++|.endmacro
++|.macro checktp, dst, reg, tp, target
++|  gettp AT, reg
++|  addi.d AT, AT, tp
++|  cleartp dst, reg
++|  bnez AT, target
++|.endmacro
++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
++|.macro checkint, reg, target
++|  gettp AT, reg
++|  bne AT, TISNUM, target
++|.endmacro
++|.macro checknum, reg, target
++|  gettp AT, reg
++|  sltui AT, AT, LJ_TISNUM
++|//  or TMP0, r0, LJ_TISNUM
++|//  sltu AT, AT, TMP0
++|  beqz AT, target
++|.endmacro
++|
++|.macro mov_false, reg
++|  addi.d reg, r0, 0x0001
++|  slli.d reg, reg, 47
++|  nor reg, reg, r0
++|.endmacro
++|.macro mov_true, reg
++|  addi.d reg, r0, 0x0001
++|  slli.d reg, reg, 48
++|  nor reg, reg, r0
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++
++/* Generate subroutines used by opcodes and other parts of the VM. */
++/* The .code_sub section should be last to help static branch prediction. */
++static void build_subroutines(BuildCtx *ctx)
++{
++  |.code_sub
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Return handling ----------------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |->vm_returnp:
++  |  // See vm_return. Also: TMP2 = previous base.
++  |  andi AT, PC, FRAME_P
++  |//  beqz AT, ->cont_dispatch
++  |
++  |  // Return from pcall or xpcall fast func.
++  |  mov_true TMP1
++  |  beqz AT, ->cont_dispatch
++  |  ld.d PC, FRAME_PC(TMP2)		// Fetch PC of previous frame.
++  |  or BASE, TMP2, r0			// Restore caller base.
++  |  // Prepending may overwrite the pcall frame, so do it at the end.
++  |   st.d TMP1, -8(RA)			// Prepend true to results.
++  |   addi.d RA, RA, -8
++  |
++  |->vm_returnc:
++  |   addi.w RD, RD, 8			// RD = (nresults+1)*8.
++  |  andi TMP0, PC, FRAME_TYPE
++  |  addi.w CRET1, r0, LUA_YIELD
++  |   beqz RD, ->vm_unwind_c_eh
++  |  or MULTRES, RD, r0
++  |  beqz TMP0, ->BC_RET_Z		// Handle regular return to Lua.
++  |
++  |->vm_return:
++  |  // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
++  |  // TMP0 = PC & FRAME_TYPE
++  |  addi.w TMP2, r0, -8
++  |  xori AT, TMP0, FRAME_C
++  |   and TMP2, PC, TMP2
++  |   sub.d TMP2, BASE, TMP2            // TMP2 = previous base.
++  |  bnez AT, ->vm_returnp
++  |
++  |  addi.w TMP1, RD, -8
++  |   st.d TMP2, L->base
++  |    li_vmstate C
++  |   ld.w TMP2, SAVE_NRES(sp)
++  |   addi.d BASE, BASE, -16
++  |    st_vmstate
++  |   slli.w TMP2, TMP2, 3
++  |  beqz TMP1, >2
++  |1:
++  |  addi.w TMP1, TMP1, -8
++  |   ld.d CRET1, 0(RA)
++  |    addi.d RA, RA, 8
++  |   st.d CRET1, 0(BASE)
++  |  addi.d BASE, BASE, 8
++  |  bnez TMP1, <1
++  |
++  |2:
++  |  bne TMP2, RD, >6
++  |3:
++  |  st.d BASE, L->top			// Store new top.
++  |
++  |->vm_leave_cp:
++  |  ld.d TMP0, SAVE_CFRAME(sp)		// Restore previous C frame.
++  |   or CRET1, r0, r0			// Ok return status for vm_pcall.
++  |  st.d TMP0, L->cframe
++  |
++  |->vm_leave_unw:
++  |  restoreregs_ret
++  |
++  |6:
++  |  ld.d TMP1, L->maxstack
++  |  slt AT, TMP2, RD
++  |  or r17, AT, r0
++  |//  bnez AT, >7			// Less results wanted?
++  |  // More results wanted. Check stack size and fill up results with nil.
++  |  slt AT, BASE, TMP1
++  |  bnez r17, >7
++  |  beqz AT, >8
++  |  st.d TISNIL, 0(BASE)
++  |  addi.w RD, RD, 8
++  |  addi.d BASE, BASE, 8
++  |  beq r0, r0, <2
++  |
++  |7:  // Less results wanted.
++  |  sub.w TMP0, RD, TMP2
++  |  sub.d TMP0, BASE, TMP0		// Either keep top or shrink it.
++  |  maskeqz TMP0, TMP0, TMP2		// LUA_MULTRET+1 case?
++  |  masknez BASE, BASE, TMP2
++  |  or BASE, BASE, TMP0
++  |  b <3
++  |
++  |8:  // Corner case: need to grow stack for filling up results.
++  |  // This can happen if:
++  |  // - A C function grows the stack (a lot).
++  |  // - The GC shrinks the stack in between.
++  |  // - A return back from a lua_call() with (high) nresults adjustment.
++  |
++  |  st.d BASE, L->top                   // Save current top held in BASE (yes).
++  |   or MULTRES, RD, r0
++  |  srli.w CARG2, TMP2, 3
++  |  or CARG1, L, r0
++  |  bl extern lj_state_growstack       // (lua_State *L, int n)
++  |  ld.w TMP2, SAVE_NRES(sp)
++  |  ld.d BASE, L->top			// Need the (realloced) L->top in BASE.
++  |  or RD, MULTRES, r0
++  |  slli.w TMP2, TMP2, 3
++  |  beq r0, r0, <2
++
++  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
++  |  // (void *cframe, int errcode)
++  |  or sp, CARG1, r0
++  |  or CRET1, CARG2, r0
++  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
++  |  ld.d L, SAVE_L(sp)
++  |  addi.w TMP0, r0, ~LJ_VMST_C
++  |  ld.d GL:TMP1, L->glref
++  |  st.w TMP0, GL:TMP1->vmstate
++  |  beq r0, r0, ->vm_leave_unw
++  |
++  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
++  |  // (void *cframe)
++  |  .LI AT, -4
++  |  and sp, CARG1, AT
++  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
++  |  ld.d L, SAVE_L(sp)
++  |     .FPU addu16i.d TMP3, r0, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
++  |     .LI TISNIL, LJ_TNIL
++  |    .LI TISNUM, LJ_TISNUM
++  |  ld.d BASE, L->base
++  |   ld.d DISPATCH, L->glref		// Setup pointer to dispatch table.
++  |     .FPU2 movgr2fr.w TOBIT, TMP3
++  |  mov_false TMP1
++  |    li_vmstate INTERP
++  |  ld.d PC, FRAME_PC(BASE)		// Fetch PC of previous frame.
++  |     .FPU2 fcvt.d.s TOBIT, TOBIT
++  |  addi.d RA, BASE, -8		// Results start at BASE-8.
++  |   .DADDIU DISPATCH, DISPATCH, GG_G2DISP
++  |  st.d TMP1, 0(RA)			// Prepend false to error message.
++  |    st_vmstate
++  |  .LI RD, 16				// 2 results: false + error message.
++  |  beq r0, r0, ->vm_returnc
++  |
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Grow stack for calls -----------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |->vm_growstack_c:			// Grow stack for C function.
++  |  .LI CARG2, LUA_MINSTACK
++  |  beq r0, r0, >2
++  |
++  |->vm_growstack_l:			// Grow stack for Lua function.
++  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
++  |  add.d RC, BASE, RC
++  |   sub.d RA, RA, BASE
++  |  st.d BASE, L->base
++  |   addi.d PC, PC, 4			// Must point after first instruction.
++  |  st.d RC, L->top
++  |   srli.w CARG2, RA, 3
++  |2:
++  |  // L->base = new base, L->top = top
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_state_growstack	// (lua_State *L, int n)
++  |  ld.d BASE, L->base
++  |  ld.d RC, L->top
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |  sub.d RC, RC, BASE
++  |  cleartp LFUNC:RB
++  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
++  |  ins_callt				// Just retry the call.
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Entry points into the assembler VM ---------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |->vm_resume:				// Setup C frame and resume thread.
++  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
++  |  saveregs
++  |  or L, CARG1, r0
++  |    ld.d DISPATCH, L->glref		// Setup pointer to dispatch table.
++  |  or BASE, CARG2, r0
++  |    ld.bu TMP1, L->status
++  |   st.d L, SAVE_L(sp)
++  |  .LI PC, FRAME_CP
++  |  addi.d TMP0, sp, CFRAME_RESUME
++  |    .DADDIU DISPATCH, DISPATCH, GG_G2DISP
++  |   st.w r0, SAVE_NRES(sp)
++  |   st.w r0, SAVE_ERRF(sp)
++  |   st.d CARG1, SAVE_PC(sp)			// Any value outside of bytecode is ok.
++  |   st.d r0, SAVE_CFRAME(sp)
++  |   st.d TMP0, L->cframe
++  |    beqz TMP1, >3
++  |
++  |  // Resume after yield (like a return).
++  |  .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++  |  or RA, BASE, r0
++  |   ld.d BASE, L->base
++  |   ld.d TMP1, L->top
++  |  ld.d PC, FRAME_PC(BASE)
++  |     .FPU  addu16i.d TMP3, r0, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
++  |   sub.d RD, TMP1, BASE
++  |     .FPU2  movgr2fr.w TOBIT, TMP3
++  |    st.b r0, L->status
++  |     .FPU2  fcvt.d.s TOBIT, TOBIT
++  |    li_vmstate INTERP
++  |   addi.d RD, RD, 8
++  |    st_vmstate
++  |   or MULTRES, RD, r0
++  |  andi TMP0, PC, FRAME_TYPE
++  |    .LI TISNIL, LJ_TNIL
++  |    .LI TISNUM, LJ_TISNUM
++  |  beqz TMP0, ->BC_RET_Z
++  |  beq r0, r0, ->vm_return
++  |
++  |->vm_pcall:				// Setup protected C frame and enter VM.
++  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
++  |  saveregs
++  |  st.w CARG4, SAVE_ERRF(sp)
++  |  .LI PC, FRAME_CP
++  |  beq r0, r0, >1
++  |
++  |->vm_call:				// Setup C frame and enter VM.
++  |  // (lua_State *L, TValue *base, int nres1)
++  |  saveregs
++  |  .LI PC, FRAME_C
++  |
++  |1:  // Entry point for vm_pcall above (PC = ftype).
++  |  ld.d TMP1, L:CARG1->cframe
++  |    or L, CARG1, r0
++  |   st.w CARG3, SAVE_NRES(sp)
++  |    ld.d DISPATCH, L->glref		// Setup pointer to dispatch table.
++  |   st.d CARG1, SAVE_L(sp)
++  |     or BASE, CARG2, r0
++  |    .DADDIU DISPATCH, DISPATCH, GG_G2DISP
++  |   st.d CARG1, SAVE_PC(sp)			// Any value outside of bytecode is ok.
++  |  st.d TMP1, SAVE_CFRAME(sp)
++  |  st.d sp, L->cframe			// Add our C frame to cframe chain.
++  |
++  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
++  |  .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++  |  ld.d TMP2, L->base			// TMP2 = old base (used in vmeta_call).
++  |     .FPU2 .LUI TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
++  |   ld.d TMP1, L->top
++  |     .FPU2 movgr2fr.w TOBIT, TMP3
++  |  add.d PC, PC, BASE
++  |   sub.d NARGS8:RC, TMP1, BASE
++  |     .LI TISNUM, LJ_TISNUM
++  |  sub.d PC, PC, TMP2			// PC = frame delta + frame type
++  |     .FPU2 fcvt.d.s TOBIT, TOBIT
++  |    li_vmstate INTERP
++  |     .LI TISNIL, LJ_TNIL
++  |    st_vmstate
++  |
++  |->vm_call_dispatch:
++  |  // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |  checkfunc LFUNC:RB, ->vmeta_call
++  |
++  |->vm_call_dispatch_f:
++  |  ins_call
++  |  // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
++  |
++  |->vm_cpcall:				// Setup protected C frame, call C.
++  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
++  |  saveregs
++  |  or L, CARG1, r0
++  |   ld.d TMP0, L:CARG1->stack
++  |  st.d CARG1, SAVE_L(sp)
++  |   ld.d TMP1, L->top
++  |     ld.d DISPATCH, L->glref		// Setup pointer to dispatch table.
++  |  st.d CARG1, SAVE_PC(sp)			// Any value outside of bytecode is ok.
++  |   sub.d TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
++  |    ld.d TMP1, L->cframe
++  |     .DADDIU DISPATCH, DISPATCH, GG_G2DISP
++  |   st.w TMP0, SAVE_NRES(sp)		// Neg. delta means cframe w/o frame.
++  |  st.w r0, SAVE_ERRF(sp)			// No error function.
++  |    st.d TMP1, SAVE_CFRAME(sp)
++  |    st.d sp, L->cframe			// Add our C frame to cframe chain.
++  |     .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++  |  or CFUNCADDR, CARG4, r0
++  |  jirl r1, CARG4, 0			// (lua_State *L, lua_CFunction func, void *ud)
++  |  or BASE, CRET1, r0
++  |  .LI PC, FRAME_CP
++  |  bnez CRET1, <3			// Else continue with the call.
++  |  beq r0, r0, ->vm_leave_cp			// No base? Just remove C frame.
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Metamethod handling ------------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
++  |// stack, so BASE doesn't need to be reloaded across these calls.
++  |
++  |//-- Continuation dispatch ----------------------------------------------
++  |
++  |->cont_dispatch:
++  |  // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
++  |  ld.d TMP0, -32(BASE)		// Continuation.
++  |   or RB, BASE, r0
++  |   or BASE, TMP2, r0			// Restore caller BASE.
++  |    ld.d LFUNC:TMP1, FRAME_FUNC(TMP2)
++  |.if FFI
++  |  sltui AT, TMP0, 2
++  |.endif
++  |     ld.d PC, -24(RB)			// Restore PC from [cont|PC].
++  |    cleartp LFUNC:TMP1
++  |   add.d TMP2, RA, RD
++  |    ld.d TMP1, LFUNC:TMP1->pc
++  |  st.d TISNIL, -8(TMP2)               // Ensure one valid arg.
++  |.if FFI
++  |  bnez AT, >1
++  |.endif
++  |  // BASE = base, RA = resultptr, RB = meta base
++  |  ld.d KBASE, PC2PROTO(k)(TMP1)
++  |  jirl r0, TMP0, 0				// Jump to continuation.
++  |
++  |.if FFI
++  |1:
++  |  addi.d TMP1, RB, -32
++  |  bnez TMP0, ->cont_ffi_callback	// cont = 1: return from FFI callback.
++  |  // cont = 0: tailcall from C function.
++  |  sub.d RC, TMP1, BASE
++  |  beq r0, r0, ->vm_call_tail
++  |.endif
++  |
++  |->cont_cat:				// RA = resultptr, RB = meta base
++  |  ld.w INS, -4(PC)
++  |   addi.d CARG2, RB, -32
++  |  ld.d CRET1, 0(RA)
++  |  decode_RB8a MULTRES, INS
++  |   decode_RA8a RA, INS
++  |  decode_RB8b MULTRES
++  |   decode_RA8b RA
++  |  add.d TMP1, BASE, MULTRES
++  |   st.d BASE, L->base
++  |   sub.d CARG3, CARG2, TMP1
++  |  st.d CRET1, 0(CARG2)
++  |  bne TMP1, CARG2, ->BC_CAT_Z
++  |  add.d RA, BASE, RA
++  |  st.d CRET1, 0(RA)
++  |  beq r0, r0, ->cont_nop
++  |
++  |//-- Table indexing metamethods -----------------------------------------
++  |
++  |->vmeta_tgets1:
++  |  .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv)
++  |  .LI TMP0, LJ_TSTR
++  |  settp STR:RC, TMP0
++  |  st.d STR:RC, 0(CARG3)
++  |  beq r0, r0, >1
++  |
++  |->vmeta_tgets:
++  |  .DADDIU CARG2, DISPATCH, DISPATCH_GL(tmptv)
++  |  .LI TMP0, LJ_TTAB
++  |  .LI TMP1, LJ_TSTR
++  |  settp TAB:RB, TMP0
++  |   .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv2)
++  |  st.d TAB:RB, 0(CARG2)
++  |   settp STR:RC, TMP1
++  |  st.d STR:RC, 0(CARG3)
++  |  beq r0, r0, >1
++  |
++  |->vmeta_tgetb:			// TMP0 = index
++  |  .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv)
++  |  settp TMP0, TISNUM
++  |  st.d TMP0, 0(CARG3)
++  |
++  |->vmeta_tgetv:
++  |1:
++  |  st.d BASE, L->base
++  |  st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
++  |  // Returns TValue * (finished) or NULL (metamethod).
++  |  addi.d TMP1, BASE, -FRAME_CONT
++  |  beqz CRET1, >3
++  |  ld.d CARG1, 0(CRET1)
++  |  ins_next1
++  |  st.d CARG1, 0(RA)
++  |  ins_next2
++  |
++  |3:  // Call __index metamethod.
++  |  // BASE = base, L->top = new base, stack = cont/func/t/k
++  |  ld.d BASE, L->top
++  |  st.d PC, -24(BASE)			// [cont|PC]
++  |   sub.d PC, BASE, TMP1
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
++  |  cleartp LFUNC:RB
++  |  .LI NARGS8:RC, 16                  // 2 args for func(t, k).
++  |  beq r0, r0, ->vm_call_dispatch_f
++  |
++  |->vmeta_tgetr:
++  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
++  |  // Returns cTValue * or NULL.
++  |  or CARG2, TISNIL, r0
++  |  beqz CRET1, ->BC_TGETR_Z
++  |  ld.d CARG2, 0(CRET1)
++  |  beq r0, r0, ->BC_TGETR_Z
++  |
++  |//-----------------------------------------------------------------------
++  |
++  |->vmeta_tsets1:
++  |  .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv)
++  |  .LI TMP0, LJ_TSTR
++  |  settp STR:RC, TMP0
++  |  st.d STR:RC, 0(CARG3)
++  |  beq r0, r0, >1
++  |
++  |->vmeta_tsets:
++  |  .DADDIU CARG2, DISPATCH, DISPATCH_GL(tmptv)
++  |  .LI TMP0, LJ_TTAB
++  |   .LI TMP1, LJ_TSTR
++  |  settp TAB:RB, TMP0
++  |   .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv2)
++  |  st.d TAB:RB, 0(CARG2)
++  |   settp STR:RC, TMP1
++  |  st.d STR:RC, 0(CARG3)
++  |  beq r0, r0,  >1
++  |
++  |->vmeta_tsetb:			// TMP0 = index
++  |  .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv)
++  |  settp TMP0, TISNUM
++  |  st.d TMP0, 0(CARG3)
++  |
++  |->vmeta_tsetv:
++  |1:
++  |  st.d BASE, L->base
++  |  st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
++  |  // Returns TValue * (finished) or NULL (metamethod).
++  |  ld.d r17, 0(RA)
++  |  beqz CRET1, >3
++  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
++  |  ins_next1
++  |  st.d r17, 0(CRET1)
++  |  ins_next2
++  |
++  |3:  // Call __newindex metamethod.
++  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
++  |  addi.d TMP1, BASE, -FRAME_CONT
++  |  ld.d BASE, L->top
++  |  st.d PC, -24(BASE)		// [cont|PC]
++  |   sub.d PC, BASE, TMP1
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
++  |  cleartp LFUNC:RB
++  |  st.d r17, 16(BASE)			// Copy value to third argument.
++  |  .LI NARGS8:RC, 24                  // 3 args for func(t, k, v)
++  |  beq r0, r0, ->vm_call_dispatch_f
++  |
++  |->vmeta_tsetr:
++  |  st.d BASE, L->base
++  |  st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_tab_setinth	// (lua_State *L, GCtab *t, int32_t key)
++  |  // Returns TValue *.
++  |  beq r0, r0, ->BC_TSETR_Z
++  |
++  |//-- Comparison metamethods ---------------------------------------------
++  |
++  |->vmeta_comp:
++  |  // RA/RD point to o1/o2.
++  |  or CARG2, RA, r0
++  |  or CARG3, RD, r0
++  |  addi.d PC, PC, -4
++  |  st.d BASE, L->base
++  |  st.d PC, SAVE_PC(sp)
++  |  decode_OP1 CARG4, INS
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
++  |  // Returns 0/1 or TValue * (metamethod).
++  |3:
++  |  sltui AT, CRET1, 2
++  |  beqz AT, ->vmeta_binop
++  |   sub.w TMP2, r0, CRET1
++  |4:
++  |  ld.hu RD, OFS_RD(PC)
++  |   addi.d PC, PC, 4
++  |   .LUI TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
++  |  slli.w RD, RD, 2
++  |  add.w RD, RD, TMP1
++  |  and RD, RD, TMP2
++  |  add.d PC, PC, RD
++  |->cont_nop:
++  |  ins_next
++  |
++  |->cont_ra:				// RA = resultptr
++  |  ld.bu TMP1, -4+OFS_RA(PC)
++  |   ld.d CRET1, 0(RA)
++  |  slli.w TMP1, TMP1, 3
++  |  add.d TMP1, BASE, TMP1
++  |   st.d CRET1, 0(TMP1)
++  |  beq r0, r0, ->cont_nop
++  |
++  |->cont_condt:			// RA = resultptr
++  |  ld.d TMP0, 0(RA)
++  |  gettp TMP0, TMP0
++  |  sltui AT, TMP0, LJ_TISTRUECOND
++  |  sub.w TMP2, r0, AT                     // Branch if result is true.
++  |  beq r0, r0, <4
++  |
++  |->cont_condf:			// RA = resultptr
++  |  ld.d TMP0, 0(RA)
++  |  gettp TMP0, TMP0
++  |  sltui AT, TMP0, LJ_TISTRUECOND
++  |  addi.w TMP2, AT, -1                // Branch if result is false.
++  |  beq r0, r0, <4
++  |
++  |->vmeta_equal:
++  |  // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
++  |   cleartp LFUNC:CARG3, CARG2
++  |  cleartp LFUNC:CARG2, CARG1
++  |    or CARG4, TMP0, r0
++  |  addi.d PC, PC, -4
++  |   st.d BASE, L->base
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
++  |  // Returns 0/1 or TValue * (metamethod).
++  |  beq r0, r0, <3
++  |
++  |->vmeta_equal_cd:
++  |.if FFI
++  |  or CARG2, INS, r0
++  |  addi.d PC, PC, -4
++  |   st.d BASE, L->base
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_equal_cd	// (lua_State *L, BCIns op)
++  |  // Returns 0/1 or TValue * (metamethod).
++  |  beq r0, r0, <3
++  |.endif
++  |
++  |->vmeta_istype:
++  |  addi.d PC, PC, -4
++  |   st.d BASE, L->base
++  |   srli.w CARG2, RA, 3
++  |   srli.w CARG3, RD, 3
++  |  st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_istype	// (lua_State *L, BCReg ra, BCReg tp)
++  |  beq r0, r0, ->cont_nop
++  |
++  |//-- Arithmetic metamethods ---------------------------------------------
++  |
++  |->vmeta_unm:
++  |  or RC, RB, r0
++  |
++  |->vmeta_arith:
++  |   st.d BASE, L->base
++  |  or CARG2, RA, r0
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG3, RB, r0
++  |  or CARG4, RC, r0
++  |  decode_OP1 CARG5, INS	// CARG5 == RB.
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_arith	// (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
++  |  // Returns NULL (finished) or TValue * (metamethod).
++  |  beqz CRET1, ->cont_nop
++  |
++  |  // Call metamethod for binary op.
++  |->vmeta_binop:
++  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
++  |  sub.d TMP1, CRET1, BASE
++  |   st.d PC, -24(CRET1)			// [cont|PC]
++  |   or TMP2, BASE, r0
++  |  addi.d PC, TMP1, FRAME_CONT
++  |   or BASE, CRET1, r0
++  |  .LI NARGS8:RC, 16                  // 2 args for func(o1, o2).
++  |  beq r0, r0, ->vm_call_dispatch
++  |
++  |->vmeta_len:
++  |  // CARG2 already set by BC_LEN.
++#if LJ_52
++  |  or MULTRES, CARG1, r0
++#endif
++  |   st.d BASE, L->base
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
++  |  // Returns NULL (retry) or TValue * (metamethod base).
++#if LJ_52
++  |  bnez CRET1, ->vmeta_binop		// Binop call for compatibility.
++  |  or CARG1, MULTRES, r0
++  |  beq r0, r0, ->BC_LEN_Z
++#else
++  |  beq r0, r0, ->vmeta_binop			// Binop call for compatibility.
++#endif
++  |
++  |//-- Call metamethod ----------------------------------------------------
++  |
++  |->vmeta_call:			// Resolve and call __call metamethod.
++  |  // TMP2 = old base, BASE = new base, RC = nargs*8
++  |   st.d TMP2, L->base			// This is the callers base!
++  |  addi.d CARG2, BASE, -16
++  |   st.d PC, SAVE_PC(sp)
++  |  add.d CARG3, BASE, RC
++  |   or MULTRES, NARGS8:RC, r0
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
++  |   addi.d NARGS8:RC, MULTRES, 8	// Got one more argument now.
++  |  cleartp LFUNC:RB
++  |  ins_call
++  |
++  |->vmeta_callt:			// Resolve __call for BC_CALLT.
++  |  // BASE = old base, RA = new base, RC = nargs*8
++  |   st.d BASE, L->base
++  |  addi.d CARG2, RA, -16
++  |   st.d PC, SAVE_PC(sp)
++  |  add.d CARG3, RA, RC
++  |   or MULTRES, NARGS8:RC, r0
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_call		// (lua_State *L, TValue *func, TValue *top)
++  |   ld.d RB, FRAME_FUNC(RA)		// Guaranteed to be a function here.
++  |  ld.d TMP1, FRAME_PC(BASE)
++  |  addi.d NARGS8:RC, MULTRES, 8	// Got one more argument now.
++  |  cleartp LFUNC:CARG3, RB
++  |  beq r0, r0, ->BC_CALLT_Z
++  |
++  |//-- Argument coercion for 'for' statement ------------------------------
++  |
++  |->vmeta_for:
++  |   st.d BASE, L->base
++  |  or CARG2, RA, r0
++  |   st.d PC, SAVE_PC(sp)
++  |  or MULTRES, INS, r0
++  |  or CARG1, L, r0
++  |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
++  |.if JIT
++  |  decode_OP1 TMP0, MULTRES
++  |  .LI AT, BC_JFORI
++  |.endif
++  |  decode_RA8a RA, MULTRES
++  |   decode_RD8a RD, MULTRES
++  |  decode_RA8b RA
++  |.if JIT
++  |  decode_RD8b RD
++  |  beq TMP0, AT, =>BC_JFORI
++  |  beq r0, r0, =>BC_FORI
++  |.else
++  |  decode_RD8b RD
++  |  beq r0, r0, =>BC_FORI
++  |.endif
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Fast functions -----------------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |.macro .ffunc, name
++  |->ff_ .. name:
++  |.endmacro
++  |
++  |.macro .ffunc_1, name
++  |->ff_ .. name:
++  |  ld.d CARG1, 0(BASE)
++  |  beqz NARGS8:RC, ->fff_fallback
++  |.endmacro
++  |
++  |.macro .ffunc_2, name
++  |->ff_ .. name:
++  |  sltui AT, NARGS8:RC, 16
++  |  ld.d CARG1, 0(BASE)
++  |  ld.d CARG2, 8(BASE)
++  |  bnez AT, ->fff_fallback
++  |.endmacro
++  |
++  |.macro .ffunc_n, name
++  |->ff_ .. name:
++  |  ld.d CARG1, 0(BASE)
++  |  .FPU2 fld.d FARG1, 0(BASE)
++  |  beqz NARGS8:RC, ->fff_fallback
++#if name == math_sqrt
++  |  fsqrt.d FRET1, FARG1
++#endif
++  |  checknum CARG1, ->fff_fallback
++  |.endmacro
++  |
++  |.macro .ffunc_nn, name	// Caveat: has delay slot!
++  |->ff_ .. name:
++  |  ld.d CARG1, 0(BASE)
++  |    sltui AT, NARGS8:RC, 16
++  |   ld.d CARG2, 8(BASE)
++  |  gettp TMP0, CARG1
++  |  bnez AT, ->fff_fallback
++  |   gettp TMP1, CARG2
++  |  sltui TMP0, TMP0, LJ_TISNUM
++  |   sltui TMP1, TMP1, LJ_TISNUM
++  |  .FPU2 fld.d FARG1, 0(BASE)
++  |  and TMP0, TMP0, TMP1
++  |   .FPU2 fld.d FARG2, 8(BASE)
++  |  beqz TMP0, ->fff_fallback
++  |.endmacro
++  |
++  |// Inlined GC threshold check.
++  |.macro ffgccheck
++  |  .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
++  |  .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
++  |  blt TMP0, TMP1, >1
++  |  bl ->fff_gcstep
++  |1:
++  |.endmacro
++  |
++  |//-- Base library: checks -----------------------------------------------
++  |.ffunc_1 assert
++  |  gettp AT, CARG1
++  |  sltui AT, AT, LJ_TISTRUECOND
++  |  addi.d RA, BASE, -16
++  |  beqz AT, ->fff_fallback
++  |  ld.d PC, FRAME_PC(BASE)
++  |  addi.w RD, NARGS8:RC, 8		// Compute (nresults+1)*8.
++  |  add.d TMP2, RA, RD
++  |  addi.d TMP1, BASE, 8
++  |  st.d CARG1, 0(RA)
++  |  beq BASE, TMP2, ->fff_res		// Done if exactly 1 argument.
++  |1:
++  |  ld.d r17, 0(TMP1)
++  |  st.d r17, -16(TMP1)
++  |  or r18, TMP1, r0
++  |  addi.d TMP1, TMP1, 8
++  |  bne r18, TMP2, <1
++  |  beq r0, r0, ->fff_res
++  |
++  |.ffunc_1 type
++  |  gettp TMP0, CARG1
++  |  sltu TMP1, TISNUM, TMP0
++  |  nor TMP2, TMP0, r0
++  |  .LI TMP3, ~LJ_TISNUM
++  |  maskeqz TMP2, TMP2, TMP1
++  |  masknez TMP3, TMP3, TMP1
++  |  or TMP2, TMP2, TMP3
++  |  slli.d TMP2, TMP2, 3
++  |  add.d TMP2, CFUNC:RB, TMP2
++  |  ld.d CARG1, CFUNC:TMP2->upvalue
++  |  beq r0, r0, ->fff_restv
++  |
++  |//-- Base library: getters and setters ---------------------------------
++  |
++  |.ffunc_1 getmetatable
++  |  gettp TMP2, CARG1
++  |  addi.d TMP0, TMP2, -LJ_TTAB
++  |  addi.d TMP1, TMP2, -LJ_TUDATA
++  |  maskeqz TMP0, TMP1, TMP0
++  |  cleartp TAB:CARG1
++  |  bnez TMP0, >6
++  |1:  // Field metatable must be at same offset for GCtab and GCudata!
++  |  ld.d TAB:RB, TAB:CARG1->metatable
++  |2:
++  |  .LDXD STR:RC, DISPATCH, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])
++  |  .LI CARG1, LJ_TNIL
++  |  beqz TAB:RB, ->fff_restv
++  |  ld.w TMP0, TAB:RB->hmask
++  |   ld.w TMP1, STR:RC->hash
++  |    ld.d NODE:TMP2, TAB:RB->node
++  |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
++  |  slli.d TMP0, TMP1, 5
++  |  slli.d TMP1, TMP1, 3
++  |  sub.d TMP1, TMP0, TMP1
++  |  add.d NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
++  |  .LI CARG4, LJ_TSTR
++  |  settp STR:RC, CARG4		// Tagged key to look for.
++  |3:  // Rearranged logic, because we expect _not_ to find the key.
++  |  ld.d TMP0, NODE:TMP2->key
++  |   ld.d CARG1, NODE:TMP2->val
++  |    ld.d NODE:TMP2, NODE:TMP2->next
++  |  .LI AT, LJ_TTAB
++  |  beq RC, TMP0, >5
++  |  bnez NODE:TMP2, <3
++  |4:
++  |  or CARG1, RB, r0
++  |  settp CARG1, AT
++  |  beq r0, r0, ->fff_restv			// Not found, keep default result.
++  |5:
++  |  bne CARG1, TISNIL, ->fff_restv
++  |  beq r0, r0, <4				// Ditto for nil value.
++  |
++  |6:
++  |  sltui AT, TMP2, LJ_TISNUM
++  |  maskeqz TMP0, TISNUM, AT
++  |  masknez AT, TMP2, AT
++  |  or TMP2, TMP0, AT
++  |  slli.d TMP2, TMP2, 3
++  |   sub.d TMP0, DISPATCH, TMP2
++  |  .LDXD TAB:RB, TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8
++  |  beq r0, r0, <2
++  |
++  |.ffunc_2 setmetatable
++  |  // Fast path: no mt for table yet and not clearing the mt.
++  |  checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++  |  gettp TMP3, CARG2
++  |   ld.d TAB:TMP0, TAB:TMP1->metatable
++  |   ld.bu TMP2, TAB:TMP1->marked
++  |  addi.d AT, TMP3, -LJ_TTAB
++  |   cleartp TAB:CARG2
++  |  or AT, AT, TAB:TMP0
++  |  or r18, AT, r0
++  |  andi AT, TMP2, LJ_GC_BLACK        // isblack(table)
++  |  bnez r18, ->fff_fallback
++  |  st.d TAB:CARG2, TAB:TMP1->metatable
++  |  beqz AT, ->fff_restv
++  |  barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
++  |
++  |.ffunc rawget
++  |  ld.d CARG2, 0(BASE)
++  |  sltui AT, NARGS8:RC, 16
++  |  gettp TMP0, CARG2
++  |   cleartp CARG2
++  |  addi.d TMP0, TMP0, -LJ_TTAB
++  |  or AT, AT, TMP0
++  |  addi.d CARG3, BASE, 8
++  |  bnez AT, ->fff_fallback
++  |  or CARG1, L, r0
++  |  bl extern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
++  |  ld.d CARG1, 0(CRET1)
++  |  beq r0, r0, ->fff_restv
++  |
++  |//-- Base library: conversions ------------------------------------------
++  |
++  |.ffunc tonumber
++  |  // Only handles the number case inline (without a base argument).
++  |  ld.d CARG1, 0(BASE)
++  |  xori AT, NARGS8:RC, 8		// Exactly one number argument.
++  |  gettp TMP1, CARG1
++  |  sltu TMP0, TISNUM, TMP1
++  |  or AT, AT, TMP0
++  |  bnez AT, ->fff_fallback
++  |  beq r0, r0, ->fff_restv
++  |
++  |.ffunc_1 tostring
++  |  // Only handles the string or number case inline.
++  |  gettp TMP0, CARG1
++  |  addi.d AT, TMP0, -LJ_TSTR
++  |  .LDXD TMP1, DISPATCH, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])
++  |  // A __tostring method in the string base metatable is ignored.
++  |  beqz AT, ->fff_restv	// String key?
++  |  // Handle numbers inline, unless a number base metatable is present.
++  |  sltu TMP0, TISNUM, TMP0
++  |  or TMP0, TMP0, TMP1
++  |  st.d BASE, L->base                  // Add frame since C call can throw.
++  |  bnez TMP0, ->fff_fallback
++  |  st.d PC, SAVE_PC(sp)                     // Redundant (but a defined value).
++  |  ffgccheck
++  |  or CARG1, L, r0
++  |  or CARG2, BASE, r0
++  |  bl extern lj_strfmt_number	// (lua_State *L, cTValue *o)
++  |  // Returns GCstr *.
++  |  .LI AT, LJ_TSTR
++  |  settp CRET1, AT
++  |  or CARG1, CRET1, r0
++  |  beq r0, r0, ->fff_restv
++  |
++  |//-- Base library: iterators -------------------------------------------
++  |
++  |.ffunc_1 next
++  |  checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
++  |  add.d TMP2, BASE, NARGS8:RC
++  |  st.d TISNIL, 0(TMP2)			// Set missing 2nd arg to nil.
++  |  ld.d PC, FRAME_PC(BASE)
++  |   st.d BASE, L->base			// Add frame since C call can throw.
++  |   st.d BASE, L->top			// Dummy frame length is ok.
++  |  addi.d CARG3, BASE, 8
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_tab_next		// (lua_State *L, GCtab *t, TValue *key)
++  |  // Returns 0 at end of traversal.
++  |  or r17, CRET1, r0
++  |  or CARG1, TISNIL, r0
++  |  beqz r17, ->fff_restv		// End of traversal: return nil.
++  |  ld.d TMP0, 8(BASE)
++  |    addi.d RA, BASE, -16
++  |  ld.d TMP2, 16(BASE)
++  |  st.d TMP0, 0(RA)
++  |  st.d TMP2, 8(RA)
++  |  .LI RD, (2+1)*8
++  |  beq r0, r0, ->fff_res
++  |
++  |.ffunc_1 pairs
++  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++  |  ld.d PC, FRAME_PC(BASE)
++#if LJ_52
++  |  ld.d TAB:TMP2, TAB:TMP1->metatable
++  |  ld.d TMP0, CFUNC:RB->upvalue[0]
++  |  addi.d RA, BASE, -16
++  |  bnez TAB:TMP2, ->fff_fallback
++#else
++  |  ld.d TMP0, CFUNC:RB->upvalue[0]
++  |  addi.d RA, BASE, -16
++#endif
++  |  st.d TISNIL, 0(BASE)
++  |   st.d CARG1, -8(BASE)
++  |    st.d TMP0, 0(RA)
++  |  .LI RD, (3+1)*8
++  |  beq r0, r0, ->fff_res
++  |
++  |.ffunc_2 ipairs_aux
++  |  checktab CARG1, ->fff_fallback
++  |  ld.w TMP0, TAB:CARG1->asize
++  |   checkint CARG2, ->fff_fallback
++  |   ld.d TMP1, TAB:CARG1->array
++  |    ld.d PC, FRAME_PC(BASE)
++  |  slli.w TMP2, CARG2, 0		// sextw -> slli.w
++  |  addi.w TMP2, TMP2, 1
++  |  sltu AT, TMP2, TMP0
++  |    addi.d RA, BASE, -16
++  |   bstrpick.d TMP0, TMP2, 31, 0		// zextw -> bstrpick.d
++  |   settp TMP0, TISNUM
++  |  st.d TMP0, 0(RA)
++  |  beqz AT, >2			// Not in array part?
++  |  slli.d TMP3, TMP2, 3
++  |  add.d TMP3, TMP1, TMP3
++  |  ld.d TMP1, 0(TMP3)
++  |1:
++  |  .LI RD, (0+1)*8
++  |  beq TMP1, TISNIL, ->fff_res	// End of iteration, return 0 results.
++  |  st.d TMP1, -8(BASE)
++  |  .LI RD, (2+1)*8
++  |  beq r0, r0, ->fff_res
++  |2:  // Check for empty hash part first. Otherwise call C function.
++  |  ld.w TMP0, TAB:CARG1->hmask
++  |  .LI RD, (0+1)*8
++  |  beqz TMP0, ->fff_res
++  |  or CARG2, TMP2, r0
++  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
++  |  // Returns cTValue * or NULL.
++  |  .LI RD, (0+1)*8
++  |  beqz CRET1, ->fff_res
++  |  ld.d TMP1, 0(CRET1)
++  |  beq r0, r0, <1
++  |
++  |.ffunc_1 ipairs
++  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
++  |  ld.d PC, FRAME_PC(BASE)
++#if LJ_52
++  |  ld.d TAB:TMP2, TAB:TMP1->metatable
++  |  ld.d CFUNC:TMP0, CFUNC:RB->upvalue[0]
++  |  addi.d RA, BASE, -16
++  |  bnez TAB:TMP2, ->fff_fallback
++#else
++  |  ld.d TMP0, CFUNC:RB->upvalue[0]
++  |  addi.d RA, BASE, -16
++#endif
++  |  slli.d AT, TISNUM, 47
++  |  st.d CARG1, -8(BASE)
++  |   st.d AT, 0(BASE)
++  |    st.d CFUNC:TMP0, 0(RA)
++  |  .LI RD, (3+1)*8
++  |  beq r0, r0, ->fff_res
++  |
++  |//-- Base library: catch errors ----------------------------------------
++  |
++  |.ffunc pcall
++  |  addi.d NARGS8:RC, NARGS8:RC, -8
++  |  .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++  |   or TMP2, BASE, r0
++  |  blt NARGS8:RC, r0, ->fff_fallback
++  |   addi.d BASE, BASE, 16
++  |  // Remember active hook before pcall.
++  |  srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
++  |  andi TMP3, TMP3, 1
++  |  addi.d PC, TMP3, 16+FRAME_PCALL
++  |  beqz NARGS8:RC, ->vm_call_dispatch
++  |1:
++  |  add.d TMP0, BASE, NARGS8:RC
++  |//  beqz NARGS8:RC, ->vm_call_dispatch
++  |2:
++  |  ld.d TMP1, -16(TMP0)
++  |  st.d TMP1, -8(TMP0)
++  |  addi.d TMP0, TMP0, -8
++  |  bne TMP0, BASE, <2
++  |  beq r0, r0, ->vm_call_dispatch
++  |
++  |.ffunc xpcall
++  |  addi.d NARGS8:TMP0, NARGS8:RC, -16
++  |  ld.d CARG1, 0(BASE)
++  |   ld.d CARG2, 8(BASE)
++  |    .LDXBU TMP1, DISPATCH, DISPATCH_GL(hookmask)
++  |    blt NARGS8:TMP0, r0, ->fff_fallback
++  |  gettp AT, CARG2
++  |  addi.d AT, AT, -LJ_TFUNC
++  |   or TMP2, BASE, r0
++  |  bnez AT, ->fff_fallback		// Traceback must be a function.
++  |  or NARGS8:RC, NARGS8:TMP0, r0
++  |   addi.d BASE, BASE, 24
++  |  // Remember active hook before pcall.
++  |  srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT
++  |   st.d CARG2, 0(TMP2)			// Swap function and traceback.
++  |  andi TMP3, TMP3, 1
++  |   st.d CARG1, 8(TMP2)
++  |  addi.d PC, TMP3, 24+FRAME_PCALL
++  |  beqz NARGS8:RC, ->vm_call_dispatch
++  |  beq r0, r0, <1
++  |
++  |//-- Coroutine library --------------------------------------------------
++  |
++  |.macro coroutine_resume_wrap, resume
++  |.if resume
++  |.ffunc_1 coroutine_resume
++  |  checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
++  |.else
++  |.ffunc coroutine_wrap_aux
++  |  ld.d L:CARG1, CFUNC:RB->upvalue[0].gcr
++  |  cleartp L:CARG1
++  |.endif
++  |  ld.bu TMP0, L:CARG1->status
++  |   ld.d TMP1, L:CARG1->cframe
++  |    ld.d CARG2, L:CARG1->top
++  |    ld.d TMP2, L:CARG1->base
++  |  addi.w AT, TMP0, -LUA_YIELD
++  |    add.d CARG3, CARG2, TMP0
++  |   addi.d TMP3, CARG2, 8
++  |  masknez CARG2, CARG2, AT
++  |  maskeqz TMP3, TMP3, AT
++  |  or CARG2, TMP3, CARG2
++  |  blt r0, AT, ->fff_fallback		// st > LUA_YIELD?
++  |   xor TMP2, TMP2, CARG3
++  |  or AT, TMP2, TMP0
++  |  bnez TMP1, ->fff_fallback		// cframe != 0?
++  |  ld.d TMP0, L:CARG1->maxstack
++  |  ld.d PC, FRAME_PC(BASE)
++  |  beqz AT, ->fff_fallback		// base == top && st == 0?
++  |  add.d TMP2, CARG2, NARGS8:RC
++  |  sltu AT, TMP0, TMP2
++  |  st.d PC, SAVE_PC(sp)
++  |  bnez AT, ->fff_fallback		// Stack overflow?
++  |   st.d BASE, L->base
++  |1:
++  |.if resume
++  |  addi.d BASE, BASE, 8		// Keep resumed thread in stack for GC.
++  |  addi.d NARGS8:RC, NARGS8:RC, -8
++  |  addi.d TMP2, TMP2, -8
++  |.endif
++  |  st.d TMP2, L:CARG1->top
++  |  add.d TMP1, BASE, NARGS8:RC
++  |  or CARG3, CARG2, r0
++  |  st.d BASE, L->top
++  |2:  // Move args to coroutine.
++  |   ld.d r17, 0(BASE)
++  |  sltu AT, BASE, TMP1
++  |  addi.d BASE, BASE, 8
++  |  beqz AT, >3
++  |   st.d r17, 0(CARG3)
++  |  addi.d CARG3, CARG3, 8
++  |  beq r0, r0, <2
++  |3:
++  |  or L:RA, L:CARG1, r0
++  |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
++  |  // Returns thread status.
++  |4:
++  |  ld.d TMP2, L:RA->base
++  |   sltui AT, CRET1, LUA_YIELD+1
++  |  ld.d TMP3, L:RA->top
++  |    li_vmstate INTERP
++  |  ld.d BASE, L->base
++  |    .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++  |    st_vmstate
++  |  sub.d RD, TMP3, TMP2
++  |   beqz AT, >8
++  |   ld.d TMP0, L->maxstack
++  |  add.d TMP1, BASE, RD
++  |  beqz RD, >6			// No results?
++  |  sltu AT, TMP0, TMP1
++  |  add.d TMP3, TMP2, RD
++  |  bnez AT, >9			// Need to grow stack?
++  |  st.d TMP2, L:RA->top			// Clear coroutine stack.
++  |  or TMP1, BASE, r0
++  |5:  // Move results from coroutine.
++  |   ld.d r17, 0(TMP2)
++  |  addi.d TMP2, TMP2, 8
++  |  sltu AT, TMP2, TMP3
++  |   st.d r17, 0(TMP1)
++  |  addi.d TMP1, TMP1, 8
++  |  bnez AT, <5
++  |6:
++  |  andi TMP0, PC, FRAME_TYPE
++  |.if resume
++  |  mov_true TMP1
++  |   addi.d RA, BASE, -8
++  |  st.d TMP1, -8(BASE)	// Prepend true to results.
++  |  addi.d RD, RD, 16
++  |.else
++  |  or RA, BASE, r0
++  |  addi.d RD, RD, 8
++  |.endif
++  |7:
++  |  st.d PC, SAVE_PC(sp)
++  |  or MULTRES, RD, r0
++  |  beqz TMP0, ->BC_RET_Z
++  |  beq r0, r0, ->vm_return
++  |
++  |8:  // Coroutine returned with error (at co->top-1).
++  |.if resume
++  |  addi.d TMP3, TMP3, -8
++  |   mov_false TMP1
++  |  ld.d r17, 0(TMP3)
++  |   st.d TMP3, L:RA->top		// Remove error from coroutine stack.
++  |    .LI RD, (2+1)*8
++  |   st.d TMP1, -8(BASE)			// Prepend false to results.
++  |    addi.d RA, BASE, -8
++  |  st.d r17, 0(BASE)			// Copy error message.
++  |  andi TMP0, PC, FRAME_TYPE
++  |  beq r0, r0, <7
++  |.else
++  |  or CARG2, L:RA, r0
++  |  or CARG1, L, r0
++  |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
++  |.endif
++  |
++  |9:  // Handle stack expansion on return from yield.
++  |  srli.w CARG2, RD, 3
++  |  or CARG1, L, r0
++  |  bl extern lj_state_growstack	// (lua_State *L, int n)
++  |  .LI CRET1, 0
++  |  beq r0, r0, <4
++  |.endmacro
++  |
++  |  coroutine_resume_wrap 1		// coroutine.resume
++  |  coroutine_resume_wrap 0		// coroutine.wrap
++  |
++  |.ffunc coroutine_yield
++  |  ld.d TMP0, L->cframe
++  |   add.d TMP1, BASE, NARGS8:RC
++  |   st.d BASE, L->base
++  |  andi TMP0, TMP0, CFRAME_RESUME
++  |   st.d TMP1, L->top
++  |   .LI CRET1, LUA_YIELD
++  |  beqz TMP0, ->fff_fallback
++  |  st.d r0, L->cframe
++  |   st.b CRET1, L->status
++  |  beq r0, r0, ->vm_leave_unw
++  |
++  |//-- Math library -------------------------------------------------------
++  |
++  |.ffunc_1 math_abs
++  |  gettp CARG2, CARG1
++  |  addi.d AT, CARG2, -LJ_TISNUM
++  |  slli.w TMP1, CARG1, 0		// sextw -> slli.w
++  |  bnez AT, >1
++  |  srai.w TMP0, TMP1, 31			// Extract sign.
++  |  xor TMP1, TMP1, TMP0
++  |  sub.d CARG1, TMP1, TMP0
++  |  slli.d TMP3, CARG1, 32
++  |  settp CARG1, TISNUM
++  |  bge TMP3, r0, ->fff_restv
++  |  .LI CARG1, 0x41e0			// 2^31 as a double.
++  |  slli.d CARG1, CARG1, 48
++  |  beq r0, r0, ->fff_restv
++  |1:
++  |  sltui AT, CARG2, LJ_TISNUM
++  |  bstrpick.d CARG1, CARG1, 62, 0
++  |  beqz AT, ->fff_fallback
++  |// fallthrough
++  |
++  |->fff_restv:
++  |  // CARG1 = TValue result.
++  |  ld.d PC, FRAME_PC(BASE)
++  |  addi.d RA, BASE, -16
++  |   st.d CARG1, -16(BASE)
++  |->fff_res1:
++  |  // RA = results, PC = return.
++  |  .LI RD, (1+1)*8
++  |->fff_res:
++  |  // RA = results, RD = (nresults+1)*8, PC = return.
++  |  andi TMP0, PC, FRAME_TYPE
++  |  or MULTRES, RD, r0
++  |  bnez TMP0, ->vm_return
++  |  ld.w INS, -4(PC)
++  |  decode_RB8a RB, INS
++  |  decode_RB8b RB
++  |5:
++  |  sltu AT, RD, RB
++  |  decode_RA8a TMP0, INS
++  |  bnez AT, >6			// More results expected?
++  |  decode_RA8b TMP0
++  |  ins_next1
++  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
++  |   sub.d BASE, RA, TMP0
++  |  ins_next2
++  |
++  |6:  // Fill up results with nil.
++  |  add.d TMP1, RA, RD
++  |   addi.d RD, RD, 8
++  |  st.d TISNIL, -8(TMP1)
++  |  beq r0, r0, <5
++  |
++  |.macro math_extern, func
++  |  .ffunc_n math_ .. func
++  |  bl extern func
++  |  fmov.d FRET1, FARG1
++  |  beq r0, r0, ->fff_resn
++  |.endmacro
++  |
++  |.macro math_extern2, func
++  |  .ffunc_nn math_ .. func
++  |  bl extern func
++  |  fmov.d FRET1, FARG1
++  |  beq r0, r0, ->fff_resn
++  |.endmacro
++  |
++  |// TODO: Return integer type if result is integer (own sf implementation).
++  |.macro math_round, func
++  |->ff_math_ .. func:
++  |  ld.d CARG1, 0(BASE)
++  |  gettp TMP0, CARG1
++  |  beqz NARGS8:RC, ->fff_fallback
++  |  sltu AT, TMP0, TISNUM
++  |  beq TMP0, TISNUM, ->fff_restv
++  |//  beqz AT, ->fff_fallback
++  |.if FPU
++  |  fld.d FARG1, 0(BASE)
++  |  beqz AT, ->fff_fallback
++  |  bl ->vm_ .. func
++  |.else
++  |  beqz AT, ->fff_fallback
++  |   bl extern func
++  |.endif
++  |  beq r0, r0, ->fff_resn
++  |.endmacro
++  |
++  |  math_round floor
++  |  math_round ceil
++  |
++  |.ffunc math_log
++  |  .LI AT, 8
++  |  ld.d CARG1, 0(BASE)
++  |  bne NARGS8:RC, AT, ->fff_fallback	// Exactly 1 argument.
++  |  checknum CARG1, ->fff_fallback
++  |.if FPU
++  |  fld.d FARG1, 0(BASE)
++  |  bl extern log
++  |.else
++  |  bl extern log
++  |.endif
++  |  fmov.d FRET1, FARG1
++  |  beq r0, r0, ->fff_resn
++  |
++  |  math_extern log10
++  |  math_extern exp
++  |  math_extern sin
++  |  math_extern cos
++  |  math_extern tan
++  |  math_extern asin
++  |  math_extern acos
++  |  math_extern atan
++  |  math_extern sinh
++  |  math_extern cosh
++  |  math_extern tanh
++  |  math_extern2 pow
++  |  math_extern2 atan2
++  |  math_extern2 fmod
++  |
++  |.if FPU
++  |//  fsqrt.d FRET1, FARG1
++  |.ffunc_n math_sqrt
++  |//  fsqrt.d FRET1, FARG1
++  |// fallthrough to ->fff_resn
++  |.else
++  |  math_extern sqrt
++  |.endif
++  |
++  |->fff_resn:
++  |  ld.d PC, FRAME_PC(BASE)
++  |  addi.d RA, BASE, -16
++  |.if FPU
++  |  fst.d FRET1, 0(RA)
++  |  beq r0, r0, ->fff_res1
++  |.else
++  |  st.d CRET1, 0(RA)
++  |  beq r0, r0, ->fff_res1
++  |.endif
++  |
++  |
++  |.ffunc_2 math_ldexp
++  |  checknum CARG1, ->fff_fallback
++  |  checkint CARG2, ->fff_fallback
++  |  .FPU2 fld.d FARG1, 0(BASE)
++  |  ld.w CARG1, 8+LO(BASE)
++  |  bl extern ldexp
++  |  fmov.d FRET1, FARG1
++  |  beq r0, r0, ->fff_resn
++  |
++  |.ffunc_n math_frexp
++  |   ld.d PC, FRAME_PC(BASE)
++  |  .DADDIU CARG1, DISPATCH, DISPATCH_GL(tmptv)
++  |  bl extern frexp
++  |   .LDXW TMP1, DISPATCH, DISPATCH_GL(tmptv)
++  |  addi.d RA, BASE, -16
++  |.if FPU
++  |   movgr2fr.w FARG2, TMP1
++  |  fst.d FRET1, 0(RA)
++  |   ffint.d.w FARG2, FARG2
++  |   fst.d FARG2, 8(RA)
++  |.else
++  |  st.d CRET1, 0(RA)
++  |  bstrpick.d TMP1, TMP1, 31, 0		// zextw -> bstrpick.d
++  |  settp TMP1, TISNUM
++  |  st.d TMP1, 8(RA)
++  |.endif
++  |  .LI RD, (2+1)*8
++  |  beq r0, r0, ->fff_res
++  |
++  |.ffunc_n math_modf
++  |   ld.d PC, FRAME_PC(BASE)
++  |  addi.d CARG1, BASE, -16
++  |  bl extern modf
++  |  addi.d RA, BASE, -16
++  |.if FPU
++  |  fst.d FRET1, -8(BASE)
++  |.else
++  |  st.d CRET1, -8(BASE)
++  |.endif
++  |  .LI RD, (2+1)*8
++  |  beq r0, r0, ->fff_res
++  |
++  |.macro math_minmax, name, intins, intinsc, fpins
++  |  .ffunc_1 name
++  |  add.d TMP3, BASE, NARGS8:RC
++  |  addi.d TMP2, BASE, 8
++  |  checkint CARG1, >5
++  |1:  // Handle integers.
++  |  ld.d CARG2, 0(TMP2)
++  |  beq TMP2, TMP3, ->fff_restv
++  |  slli.w CARG1, CARG1, 0	// sextw -> slli.w
++  |  checkint CARG2, >3
++  |  ld.w CARG2, LO(TMP2)
++  |  slt AT, CARG1, CARG2
++  |  intins TMP1, CARG2, AT
++  |  intinsc CARG1, CARG1, AT
++  |  or CARG1, CARG1, TMP1
++  |  addi.d TMP2, TMP2, 8
++  |  bstrpick.d CARG1, CARG1, 31, 0		// zextw -> bstrpick.d
++  |  settp CARG1, TISNUM
++  |  beq r0, r0, <1
++  |
++  |3:  // Convert intermediate result to number and continue with number loop.
++  |//  checknum CARG2, ->fff_fallback
++  |.if FPU
++  |  movgr2fr.w FRET1, CARG1	//TODO checknum slot ins
++  |  checknum CARG2, ->fff_fallback
++  |  ffint.d.w FRET1, FRET1
++  |  fld.d FARG1, 0(TMP2)
++  |  beq r0, r0, >7
++  |.else
++  |  checknum CARG2, ->fff_fallback
++  |  bl ->vm_sfi2d_1
++  |  beq r0, r0, >7
++  |.endif
++  |
++  |5:
++  |  .FPU2 fld.d FRET1, 0(BASE)
++  |//  checknum CARG1, ->fff_fallback
++  |6:  // Handle numbers.
++  |  ld.d CARG2, 0(TMP2)	//TODO  mips slot ins
++  |  checknum CARG1, ->fff_fallback
++  |//  beq TMP2, TMP3, ->fff_resn
++  |.if FPU
++  |  fld.d FARG1, 0(TMP2)
++  |.else
++  |  or CRET1, CARG1, r0
++  |.endif
++  |  beq TMP2, TMP3, ->fff_resn
++  |  checknum CARG2, >8
++  |7:
++  |.if FPU
++  |  fpins FRET1, FRET1, FARG1
++  |.else
++  |.if fpins  // ismax
++  |  bl ->vm_sfcmpogt
++  |.else
++  |  bl ->vm_sfcmpolt
++  |.endif
++  |  masknez AT, CARG2, CRET1
++  |  maskeqz CARG1, CARG1, CRET1
++  |  or CARG1, CARG1, AT
++  |.endif
++  |  addi.d TMP2, TMP2, 8
++  |  beq r0, r0, <6
++  |
++  |8:  // Convert integer to number and continue with number loop.
++  |//  checkint CARG2, ->fff_fallback	//TODO doesnot process the mips slot ins
++  |.if FPU
++  |  fld.s FARG1, LO(TMP2)
++  |  checkint CARG2, ->fff_fallback
++  |  ffint.d.w FARG1, FARG1
++  |  beq r0, r0, <7
++  |.else
++  |  ld.w CARG2, LO(TMP2)
++  |  checkint CARG2, ->fff_fallback
++  |  bl ->vm_sfi2d_2
++  |  beq r0, r0, <7
++  |.endif
++  |
++  |.endmacro
++  |
++  |  math_minmax math_min, masknez, maskeqz, fmin.d
++  |  math_minmax math_max, maskeqz, masknez, fmax.d
++  |
++  |//-- String library -----------------------------------------------------
++  |
++  |.ffunc string_byte			// Only handle the 1-arg case here.
++  |  ld.d CARG1, 0(BASE)
++  |  gettp TMP0, CARG1
++  |  xori AT, NARGS8:RC, 8
++  |  addi.d TMP0, TMP0, -LJ_TSTR
++  |  or AT, AT, TMP0
++  |  cleartp STR:CARG1
++  |  bnez AT, ->fff_fallback		// Need exactly 1 string argument.
++  |  ld.w TMP0, STR:CARG1->len
++  |    addi.d RA, BASE, -16
++  |    ld.d PC, FRAME_PC(BASE)
++  |  sltu RD, r0, TMP0
++  |   ld.bu TMP1, STR:CARG1[1]		// Access is always ok (NUL at end).
++  |  addi.w RD, RD, 1
++  |  slli.w RD, RD, 3			// RD = ((str->len != 0)+1)*8
++  |  settp TMP1, TISNUM
++  |  st.d TMP1, 0(RA)
++  |  beq r0, r0, ->fff_res
++  |
++  |.ffunc string_char			// Only handle the 1-arg case here.
++  |  ffgccheck
++  |  ld.d CARG1, 0(BASE)
++  |  gettp TMP0, CARG1
++  |  xori AT, NARGS8:RC, 8		// Exactly 1 argument.
++  |  addi.d TMP0, TMP0, -LJ_TISNUM	// Integer.
++  |  .LI TMP1, 255
++  |   slli.w CARG1, CARG1, 0		// sextw -> slli.w
++  |  or AT, AT, TMP0
++  |   sltu TMP1, TMP1, CARG1		// !(255 < n).
++  |   or AT, AT, TMP1
++  |  .LI CARG3, 1
++  |  bnez AT, ->fff_fallback
++  |  addi.d CARG2, sp, TMPD_OFS
++  |  st.b CARG1, TMPD(sp)
++  |->fff_newstr:
++  |   st.d BASE, L->base
++  |   st.d PC, SAVE_PC(sp)
++  |  or CARG1, L, r0
++  |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
++  |  // Returns GCstr *.
++  |  ld.d BASE, L->base
++  |->fff_resstr:
++  |  .LI AT, LJ_TSTR
++  |  settp CRET1, AT
++  |  or CARG1, CRET1, r0
++  |  beq r0, r0, ->fff_restv
++  |
++  |.ffunc string_sub
++  |  ffgccheck
++  |  addi.d AT, NARGS8:RC, -16
++  |  ld.d TMP0, 0(BASE)
++  |  gettp TMP3, TMP0
++  |  blt AT, r0, ->fff_fallback
++  |  cleartp STR:CARG1, TMP0
++  |  ld.d CARG2, 8(BASE)
++  |  .LI CARG4, -1
++  |  beqz AT, >1
++  |  ld.d CARG3, 16(BASE)
++  |//  checkint CARG3, ->fff_fallback
++  |  slli.w CARG4, CARG3, 0		//TODO it`s also a mips slot ins, sextw -> slli.w
++  |  checkint CARG3, ->fff_fallback
++  |1:
++  |  checkint CARG2, ->fff_fallback
++  |  .LI AT, LJ_TSTR			//TODO mips slot ins
++  |//  checkint CARG2, ->fff_fallback
++  |  slli.w CARG3, CARG2, 0		// sextw -> slli.w
++  |  bne TMP3, AT, ->fff_fallback
++  |  ld.w CARG2, STR:CARG1->len
++  |  // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
++  |  slt AT, CARG4, r0
++  |  addi.w TMP0, CARG2, 1
++  |  add.w TMP1, CARG4, TMP0
++  |   slt TMP3, CARG3, r0
++  |  masknez CARG4, CARG4, AT
++  |  maskeqz TMP1, TMP1, AT
++  |  or CARG4, TMP1, CARG4		// if (end < 0) end += len+1
++  |   add.w TMP1, CARG3, TMP0
++  |   maskeqz TMP1, TMP1, TMP3
++  |   masknez CARG3, CARG3, TMP3
++  |   or CARG3, TMP1, CARG3		// if (start < 0) start += len+1
++  |   .LI TMP2, 1
++  |  slt AT, CARG4, r0
++  |   slt TMP3, r0, CARG3
++  |  masknez CARG4, CARG4, AT		// if (end < 0) end = 0
++  |  maskeqz CARG3, CARG3, TMP3
++  |   masknez TMP2, TMP2, TMP3
++  |   or CARG3, TMP2, CARG3		// if (start < 1) start = 1
++  |  slt AT, CARG2, CARG4
++  |  masknez CARG4, CARG4, AT
++  |  maskeqz CARG2, CARG2, AT
++  |  or CARG4, CARG2, CARG4		// if (end > len) end = len
++  |   add.d CARG2, STR:CARG1, CARG3
++  |  sub.d CARG3, CARG4, CARG3		// len = end - start
++  |   addi.d CARG2, CARG2, sizeof(GCstr)-1
++  |  or r17, CARG3, r0
++  |  addi.w CARG3, CARG3, 1             // len++
++  |  bge r17, r0, ->fff_newstr
++  |->fff_emptystr:  // Return empty string.
++  |  .LI AT, LJ_TSTR
++  |  .DADDIU STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
++  |  settp CARG1, AT
++  |  beq r0, r0, ->fff_restv
++  |
++  |.macro ffstring_op, name
++  |  .ffunc string_ .. name
++  |  ffgccheck
++  |  ld.d CARG2, 0(BASE)
++  |  beqz NARGS8:RC, ->fff_fallback
++  |  checkstr STR:CARG2, ->fff_fallback
++  |  .DADDIU SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
++  |  ld.d TMP0, SBUF:CARG1->b
++  |   st.d L, SBUF:CARG1->L
++  |   st.d BASE, L->base
++  |  st.d TMP0, SBUF:CARG1->p
++  |  st.d PC, SAVE_PC(sp)
++  |  bl extern lj_buf_putstr_ .. name
++  |  or SBUF:CARG1, SBUF:CRET1, r0
++  |  bl extern lj_buf_tostr
++  |  ld.d BASE, L->base
++  |  beq r0, r0, ->fff_resstr
++  |.endmacro
++  |
++  |ffstring_op reverse
++  |ffstring_op lower
++  |ffstring_op upper
++  |
++  |//-- Bit library --------------------------------------------------------
++  |
++  |->vm_tobit_fb:
++  |//  beqz TMP1, ->fff_fallback		//TODO doesnot process the following mips slot ins
++  |.if FPU
++  |  fld.d FARG1, 0(BASE)
++  |  beqz TMP1, ->fff_fallback
++  |  fadd.d FARG1, FARG1, TOBIT
++  |  movfr2gr.s CRET1, FARG1
++  |  bstrpick.d CRET1, CRET1, 31, 0                // zextw -> bstrpick.d
++  |  jirl r0, ra, 0
++  |.else
++  |  beqz TMP1, ->fff_fallback
++  |// FP number to bit conversion for soft-float.
++  |->vm_tobit:
++  |  slli.d TMP0, CARG1, 1
++  |  .LI CARG3, 1076
++  |  srli.d AT, TMP0, 53
++  |  sub.d CARG3, CARG3, AT
++  |  sltui AT, CARG3, 54
++  |  bstrpick.d TMP0, TMP0, 52, 0
++  |  beqz AT, >1
++  |  bstrins.d TMP0, AT, 21, 21
++  |  slt AT, CARG1, r0
++  |  srl.d CRET1, TMP0, CARG3
++  |  sub.d TMP0, r0, CRET1
++  |  maskeqz TMP0, TMP0, AT
++  |  masknez CRET1, CRET1, AT
++  |  or CRET1, CRET1, TMP0
++  |  bstrpick.d CRET1, CRET1, 31, 0	// zextw -> bstrpick.d
++  |  jirl r0, ra, 0
++  |1:
++  |  or CRET1, r0, r0
++  |  jirl, r0, ra, 0
++  |
++  |// FP number to int conversion with a check for soft-float.
++  |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
++  |->vm_tointg:
++  |.if JIT
++  |  slli.d CRET2, CARG1, 1
++  |  .LI TMP0, 1076
++  |  beqz CRET2, >2
++  |  srli.d AT, CRET2, 53
++  |  sub.d TMP0, TMP0, AT
++  |  sltui AT, TMP0, 54
++  |  bstrpick.d CRET2, CRET2, 52, 0
++  |  beqz AT, >1
++  |  bstrins.d CRET2, AT, 21, 21
++  |  slt AT, CARG1, r0
++  |  srl.d CRET1, CRET2, TMP0
++  |  sub.d CARG1, r0, CRET1
++  |  masknez CRET1, CRET1, AT
++  |  maskeqz CARG1, CARG1, AT
++  |  or CRET1, CRET1, CARG1
++  |  .LI CARG1, 64
++  |  sub.w TMP0, CARG1, TMP0
++  |  sll.d CRET2, CRET2, TMP0	// Integer check.
++  |  slli.w AT, CRET1, 0	// sextw -> slli.w
++  |  xor AT, CRET1, AT		// Range check.
++  |  masknez AT, AT, CRET2
++  |  maskeqz CRET2, CRET2, CRET2
++  |  or CRET2, AT, CRET2
++  |  jirl r0, ra, 0
++  |1:
++  |  .LI CRET2, 1
++  |  jirl r0, ra, 0
++  |2:
++  |  or CRET1, r0, r0
++  |  jirl r0, ra, 0
++  |.endif
++  |.endif
++  |
++  |.macro .ffunc_bit, name
++  |  .ffunc_1 bit_..name
++  |  gettp TMP0, CARG1
++  |  bstrpick.d CRET1, CARG1, 31, 0	// zextw -> bstrpick.d
++  |  beq TMP0, TISNUM, >6
++  |  sltui TMP1, TMP0, LJ_TISNUM
++  |  bl ->vm_tobit_fb
++  |6:
++  |.endmacro
++  |
++  |.macro .ffunc_bit_op, name, bins
++  |  .ffunc_bit name
++  |  addi.d TMP2, BASE, 8
++  |  add.d TMP3, BASE, NARGS8:RC
++  |1:
++  |  ld.d r17, 0(TMP2)
++  |  beq TMP2, TMP3, ->fff_resi
++  |  gettp TMP0, r17
++  |.if FPU
++  |  addi.d TMP2, TMP2, 8
++  |  bne TMP0, TISNUM, >2
++  |  bstrpick.d r17, r17, 31, 0		// zextw -> bstrpick.d
++  |  bins CRET1, CRET1, r17
++  |  beq r0, r0, <1
++  |2:
++  |   fld.d FARG1, -8(TMP2)
++  |  sltui AT, TMP0, LJ_TISNUM
++  |  fadd.d FARG1, FARG1, TOBIT
++  |  beqz AT, ->fff_fallback
++  |  movfr2gr.s r17, FARG1
++  |  bstrpick.d r17, r17, 31, 0		// zextw -> bstrpick.d
++  |  bins CRET1, CRET1, r17
++  |  beq r0, r0, <1
++  |.else
++  |  or CRET2, CRET1, r0
++  |  beq TMP0, TISNUM, >2
++  |  sltui TMP1, TMP0, LJ_TISNUM
++  |  bl ->vm_tobit_fb
++  |  or CARG1, CRET2, r0
++  |2:
++  |  bstrpick.d r17, r17, 31, 0		// zextw -> bstrpick.d
++  |  bins CRET1, CRET1, r17
++  |  addi.d TMP2, TMP2, 8
++  |  beq r0, r0, <1
++  |.endif
++  |.endmacro
++  |
++  |.ffunc_bit_op band, and
++  |.ffunc_bit_op bor, or
++  |.ffunc_bit_op bxor, xor
++  |
++  |.ffunc_bit bswap
++  |  srli.d TMP0, CRET1, 8
++  |   srli.d TMP1, CRET1, 24
++  |//  andi TMP2, TMP0, 0xff00
++  |  srli.d TMP3,TMP0, 8
++  |  andi TMP2, TMP3, 0xff
++  |  slli.d TMP2, TMP2, 8
++  |   bstrins.d TMP1, CRET1, 31, 24
++  |  bstrins.d TMP2, TMP0, 23, 16
++  |  or CRET1, TMP1, TMP2
++  |  beq r0, r0, ->fff_resi
++  |
++  |.ffunc_bit bnot
++  |  nor CRET1, CRET1, r0
++  |  bstrpick.d CRET1, CRET1, 31, 0	// zextw -> bstrpick.d
++  |  beq r0, r0, ->fff_resi
++  |
++  |.macro .ffunc_bit_sh, name, shins, shmod
++  |  .ffunc_2 bit_..name
++  |  gettp TMP0, CARG1
++  |  beq TMP0, TISNUM, >1
++  |  sltui TMP1, TMP0, LJ_TISNUM
++  |  bl ->vm_tobit_fb
++  |  or CARG1, CRET1, r0
++  |1:
++  |  gettp TMP0, CARG2
++  |  bstrpick.d CARG2, CARG2, 31, 0	// zextw -> bstrpick.d
++  |  bne TMP0, TISNUM, ->fff_fallback
++  |  slli.w CARG1, CARG1, 0		// sextw -> slli.w
++  |.if shmod == 1
++  |  sub.w CARG2, r0, CARG2
++  |.endif
++  |  shins CRET1, CARG1, CARG2
++  |  bstrpick.d CRET1, CRET1, 31, 0	// zextw -> bstrpick.d
++  |  beq r0, r0, ->fff_resi
++  |.endmacro
++  |
++  |.ffunc_bit_sh lshift, sll.w, 0
++  |.ffunc_bit_sh rshift, srl.w, 0
++  |.ffunc_bit_sh arshift, sra.w, 0
++  |.ffunc_bit_sh rol, rotr.w, 1
++  |.ffunc_bit_sh ror, rotr.w, 0
++  |
++  |.ffunc_bit tobit
++  |->fff_resi:
++  |  ld.d PC, FRAME_PC(BASE)
++  |  addi.d RA, BASE, -16
++  |  settp CRET1, TISNUM
++  |  st.d CRET1, -16(BASE)
++  |  beq r0, r0, ->fff_res1
++  |
++  |//-----------------------------------------------------------------------
++  |->fff_fallback:			// Call fast function fallback handler.
++  |  // BASE = new base, RB = CFUNC, RC = nargs*8
++  |  ld.d TMP3, CFUNC:RB->f
++  |    add.d TMP1, BASE, NARGS8:RC
++  |   ld.d PC, FRAME_PC(BASE)		// Fallback may overwrite PC.
++  |    addi.d TMP0, TMP1, 8*LUA_MINSTACK
++  |     ld.d TMP2, L->maxstack
++  |   st.d PC, SAVE_PC(sp)			// Redundant (but a defined value).
++  |  sltu AT, TMP2, TMP0
++  |     st.d BASE, L->base
++  |    st.d TMP1, L->top
++  |  or CFUNCADDR, TMP3, r0
++  |  bnez AT, >5			// Need to grow stack.
++  |  or CARG1, L, r0
++  |  jirl r1, TMP3, 0				// (lua_State *L)
++  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
++  |  ld.d BASE, L->base
++  |   slli.w RD, CRET1, 3
++  |  addi.d RA, BASE, -16
++  |  blt r0, CRET1, ->fff_res		// Returned nresults+1?
++  |1:  // Returned 0 or -1: retry fast path.
++  |   ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |  ld.d TMP0, L->top
++  |   cleartp LFUNC:RB
++  |  sub.d NARGS8:RC, TMP0, BASE
++  |  bnez CRET1, ->vm_call_tail		// Returned -1?
++  |  ins_callt				// Returned 0: retry fast path.
++  |
++  |// Reconstruct previous base for vmeta_call during tailcall.
++  |->vm_call_tail:
++  |  andi TMP0, PC, FRAME_TYPE
++  |   .LI AT, -4
++  |  and TMP1, PC, AT
++  |  bnez TMP0, >3
++  |  ld.bu TMP1, OFS_RA(PC)
++  |  slli.w TMP1, TMP1, 3
++  |  addi.w TMP1, TMP1, 16
++  |3:
++  |  sub.d TMP2, BASE, TMP1
++  |  beq r0, r0, ->vm_call_dispatch		// Resolve again for tailcall.
++  |
++  |5:  // Grow stack for fallback handler.
++  |  .LI CARG2, LUA_MINSTACK
++  |  or CARG1, L, r0
++  |  bl extern lj_state_growstack	// (lua_State *L, int n)
++  |  ld.d BASE, L->base
++  |  .LI CRET1, 0                       // Force retry.
++  |  beq r0, r0, <1
++  |
++  |->fff_gcstep:			// Call GC step function.
++  |  // BASE = new base, RC = nargs*8
++  |  or MULTRES, ra, r0
++  |   st.d BASE, L->base
++  |  add.d TMP0, BASE, NARGS8:RC
++  |   st.d PC, SAVE_PC(sp)			// Redundant (but a defined value).
++  |  st.d TMP0, L->top
++  |  or CARG1, L, r0
++  |  bl extern lj_gc_step		// (lua_State *L)
++  |   ld.d BASE, L->base
++  |  or ra, MULTRES, r0
++  |    ld.d TMP0, L->top
++  |  ld.d CFUNC:RB, FRAME_FUNC(BASE)
++  |  cleartp CFUNC:RB
++  |  sub.d NARGS8:RC, TMP0, BASE
++  |  jirl r0, ra, 0
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Special dispatch targets -------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |->vm_record:				// Dispatch target for recording phase.
++  |.if JIT
++  |  .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++  |  andi AT, TMP3, HOOK_VMEVENT	// No recording while in vmevent.
++  |  .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++  |  bnez AT, >5
++  |  // Decrement the hookcount for consistency, but always do the call.
++  |  andi AT, TMP3, HOOK_ACTIVE
++  |  addi.w TMP2, TMP2, -1
++  |  bnez AT, >1
++  |  andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++  |  beqz AT, >1
++  |  .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++  |  beq r0, r0, >1
++  |.endif
++  |
++  |->vm_rethook:			// Dispatch target for return hooks.
++  |  .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++  |  andi AT, TMP3, HOOK_ACTIVE		// Hook already active?
++  |//  beqz AT, >1			//TODO dose not process the following mips slot ins
++  |5:  // Re-dispatch to static ins.
++  |  ld.d AT, GG_DISP2STATIC(TMP0)	// Assumes TMP0 holds DISPATCH+OP*4.
++  |  beqz AT, >1
++  |  jirl r0, AT, 0
++  |
++  |->vm_inshook:			// Dispatch target for instr/line hooks.
++  |  .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask)
++  |  .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++  |  andi AT, TMP3, HOOK_ACTIVE		// Hook already active?
++  |  or r17, AT, r0
++  |  andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
++  |  bnez r17, <5
++  |  addi.w TMP2, TMP2, -1
++  |  beqz AT, <5
++  |  .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount)
++  |  beqz TMP2, >1
++  |  andi AT, TMP3, LUA_MASKLINE
++  |  beqz AT, <5			//TODO dose not process the following mips slot ins
++  |1:
++  |//.  load_got lj_dispatch_ins
++  |// st.w MULTRES, SAVE_MULTRES
++  |   st.w MULTRES, TMPD(sp)
++  |  or CARG2, PC, r0
++  |   st.d BASE, L->base
++  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
++  |  or CARG1, L, r0
++  |  bl extern lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
++  |3:
++  |  ld.d BASE, L->base
++  |4:  // Re-dispatch to static ins.
++  |  ld.w INS, -4(PC)
++  |  decode_OP8a TMP1, INS
++  |  decode_OP8b TMP1
++  |  add.d TMP0, DISPATCH, TMP1
++  |   decode_RD8a RD, INS
++  |  ld.d AT, GG_DISP2STATIC(TMP0)
++  |   decode_RA8a RA, INS
++  |   decode_RD8b RD
++  |   decode_RA8b RA
++  |  jirl r0, AT, 0
++  |
++  |->cont_hook:				// Continue from hook yield.
++  |  addi.d PC, PC, 4
++  |  ld.w MULTRES, -24+LO(RB)            // Restore MULTRES for *M ins.
++  |  beq r0, r0, <4
++  |
++  |->vm_hotloop:			// Hot loop counter underflow.
++  |.if JIT
++  |  ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
++  |   .DADDIU CARG1, DISPATCH, GG_DISP2J
++  |  cleartp LFUNC:TMP1
++  |   st.d PC, SAVE_PC(sp)
++  |  ld.d TMP1, LFUNC:TMP1->pc
++  |   or CARG2, PC, r0
++  |   .STXD L, DISPATCH, DISPATCH_J(L)
++  |  ld.bu TMP1, PC2PROTO(framesize)(TMP1)
++  |   st.d BASE, L->base
++  |  slli.d TMP1, TMP1, 3
++  |  add.d TMP1, BASE, TMP1
++  |  st.d TMP1, L->top
++  |  bl extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
++  |  beq r0, r0, <3
++  |.endif
++  |
++  |
++  |->vm_callhook:			// Dispatch target for call hooks.
++  |  or CARG2, PC, r0
++  |.if JIT
++  |  beq r0, r0, >1				//TODO which is the mips slot ins
++  |.endif
++  |//  or CARG2, PC, r0
++  |
++  |->vm_hotcall:			// Hot call counter underflow.
++  |.if JIT
++  |  ori CARG2, PC, 1
++  |1:
++  |.endif
++  |  add.d TMP0, BASE, RC
++  |   st.d PC, SAVE_PC(sp)
++  |   st.d BASE, L->base
++  |  sub.d RA, RA, BASE
++  |   st.d TMP0, L->top
++  |  or CARG1, L, r0
++  |  bl extern lj_dispatch_call	// (lua_State *L, const BCIns *pc)
++  |  // Returns ASMFunction.
++  |  ld.d BASE, L->base
++  |   ld.d TMP0, L->top
++  |   st.d r0, SAVE_PC(sp)			// Invalidate for subsequent line hook.
++  |  sub.d NARGS8:RC, TMP0, BASE
++  |  add.d RA, BASE, RA
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |  cleartp LFUNC:RB
++  |  ld.w INS, -4(PC)
++  |  jirl r0, CRET1, 0
++  |
++  |->cont_stitch:			// Trace stitching.
++  |.if JIT
++  |  // RA = resultptr, RB = meta base
++  |  ld.w INS, -4(PC)
++  |    ld.d TRACE:TMP2, -40(RB)		// Save previous trace.
++  |  decode_RA8a RC, INS
++  |   addi.d AT, MULTRES, -8
++  |    cleartp TRACE:TMP2
++  |  decode_RA8b RC
++  | add.d RC, BASE, RC                 // Call base.
++  |   beqz AT, >2
++  |1:  // Move results down.
++  |  ld.d CARG1, 0(RA)
++  |   addi.d AT, AT, -8
++  |    addi.d RA, RA, 8
++  |  st.d CARG1, 0(RC)
++  |   addi.d RC, RC, 8
++  |   bnez AT, <1
++  |2:
++  |   decode_RA8a RA, INS
++  |    decode_RB8a RB, INS
++  |   decode_RA8b RA
++  |    decode_RB8b RB
++  |   add.d RA, RA, RB
++  |   add.d RA, BASE, RA
++  |3:
++  |   sltu AT, RC, RA
++  |   bnez AT, >9			// More results wanted?
++  |
++  |  ld.hu TMP3, TRACE:TMP2->traceno
++  |  ld.hu RD, TRACE:TMP2->link
++  |  beq RD, TMP3, ->cont_nop		// Blacklisted.
++  |  slli.w RD, RD, 3
++  |  bnez RD, =>BC_JLOOP		// Jump to stitched trace.
++  |
++  |  // Stitch a new trace to the previous trace.
++  |  st.w TMP3, DISPATCH_J(exitno)(DISPATCH)
++  |  .STXD L, DISPATCH, DISPATCH_J(L)
++  |  st.d BASE, L->base
++  |  .DADDIU CARG1, DISPATCH, GG_DISP2J
++  |  or CARG2, PC, r0
++  |  bl extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
++  |  ld.d BASE, L->base
++  |  beq r0, r0, ->cont_nop
++  |
++  |9:
++  |  st.d TISNIL, 0(RC)
++  |  addi.d RC, RC, 8
++  |  beq r0, r0, <3
++  |.endif
++  |
++  |->vm_profhook:			// Dispatch target for profiler hook.
++#if LJ_HASPROFILE
++  |// st.w MULTRES, SAVE_MULTRES
++  |   st.w MULTRES, TMPD(sp)
++  |  or CARG2, PC, r0
++  |   st.d BASE, L->base
++  |  or CARG1, L, r0
++  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
++  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
++  |  addi.d PC, PC, -4
++  |  ld.d BASE, L->base
++  |  beq r0, r0, ->cont_nop
++#endif
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Trace exit handler -------------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |.macro savex_, a, b
++  |.if FPU
++  |  fst.d f..a, a*8(sp)
++  |  fst.d f..b, b*8(sp)
++  |  st.d r..a, 32*8+a*8(sp)
++  |  st.d r..b, 32*8+b*8(sp)
++  |.else
++  |  st.d r..a, a*8(sp)
++  |  st.d r..b, b*8(sp)
++  |.endif
++  |.endmacro
++  |
++  |->vm_exit_handler:
++  |.if JIT
++  |.if FPU
++  |  addi.d sp, sp, -(32*8+32*8)
++  |.else
++  |  addi.d sp, sp, -(32*8)
++  |.endif
++  |  savex_ 0, 2
++  |  savex_ 4, 5
++  |  savex_ 6, 7
++  |  savex_ 8, 9
++  |  savex_ 10, 11
++  |  savex_ 12, 13
++  |  savex_ 14, 15
++  |  savex_ 16, 17
++  |  savex_ 18, 19
++  |  savex_ 20, 21
++  |  savex_ 22, 23
++  |  savex_ 24, 25
++  |  savex_ 26, 27
++  |  savex_ 28, 29
++  |  savex_ 30, 31
++  |.if FPU
++  |  fst.d f1, 1*8(sp)
++  |  fst.d f3, 3*8(sp)
++  |  st.d r0, 32*8+1*8(sp)		// Clear RID_TMP.
++  |  addi.d TMP2, sp, 32*8+32*8		// Recompute original value of sp.
++  |  st.d TMP2, 32*8+3*8(sp)		// Store sp in RID_SP
++  |.else
++  |  st.d r0, 31*8(sp)			// Clear RID_TMP.
++  |  addi.d TMP2, sp, 32*8		// Recompute original value of sp.
++  |  st.d TMP2, 3*8(sp)			// Store sp in RID_SP
++  |.endif
++  |  li_vmstate EXIT
++  |  .DADDIU DISPATCH, JGL, -GG_DISP2G-32768
++  |  ld.w TMP1, 0(TMP2)			// Load exit number.
++  |  st_vmstate
++  |  .LDXD L, DISPATCH, DISPATCH_GL(cur_L)
++  |   .LDXD BASE, DISPATCH, DISPATCH_GL(jit_base)
++  |  .STXD L, DISPATCH, DISPATCH_J(L)
++  |  st.w ra, DISPATCH_J(parent)(DISPATCH)  // Store trace number.
++  |   st.d BASE, L->base
++  |  st.w TMP1, DISPATCH_J(exitno)(DISPATCH)  // Store exit number.
++  |  .DADDIU CARG1, DISPATCH, GG_DISP2J
++  |   .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
++  |  or CARG2, sp, r0
++  |  bl extern lj_trace_exit		// (jit_State *J, ExitState *ex)
++  |  // Returns MULTRES (unscaled) or negated error code.
++  |  ld.d TMP1, L->cframe
++  |  .LI AT, -4
++  |   ld.d BASE, L->base
++  |  and sp, TMP1, AT
++  |   ld.d PC, SAVE_PC(sp)			// Get SAVE_PC.
++  |  st.d L, SAVE_L(sp)                      // Set SAVE_L (on-trace resume/yield).
++  |  beq r0, r0, >1
++  |.endif
++  |->vm_exit_interp:
++  |.if JIT
++  |  // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
++  |  ld.d L, SAVE_L(sp)
++  |   .DADDIU DISPATCH, JGL, -GG_DISP2G-32768
++  |  st.d BASE, L->base
++  |1:
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |  blt CRET1, r0, >9			// Check for error from exit.
++  |    .FPU addu16i.d TMP3, r0, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
++  |  slli.d MULTRES, CRET1, 3
++  |  cleartp LFUNC:RB
++  |//st.w MULTRES, SAVE_MULTRES
++  |  st.w MULTRES, TMPD(sp)
++  |    .LI TISNIL, LJ_TNIL
++  |     .LI TISNUM, LJ_TISNUM		// Setup type comparison constants.
++  |    .FPU2 movgr2fr.w TOBIT, TMP3
++  |  ld.d TMP1, LFUNC:RB->pc
++  |   .STXD r0, DISPATCH, DISPATCH_GL(jit_base)
++  |  ld.d KBASE, PC2PROTO(k)(TMP1)
++  |    .FPU2 fcvt.d.s TOBIT, TOBIT
++  |  // Modified copy of ins_next which handles function header dispatch, too.
++  |  ld.w INS, 0(PC)
++  |   addi.d PC, PC, 4
++  |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
++  |    .STXW TISNIL, DISPATCH, DISPATCH_GL(vmstate)
++  |  decode_OP8a TMP1, INS
++  |  decode_OP8b TMP1
++  |    sltui TMP2, TMP1, BC_FUNCF*8
++  |  add.d TMP0, DISPATCH, TMP1
++  |   decode_RD8a RD, INS
++  |  ld.d AT, 0(TMP0)
++  |   decode_RA8a RA, INS
++  |   decode_RA8b RA
++  |    beqz TMP2, >2
++  |   decode_RD8b RD
++  |  jirl r0, AT, 0
++  |2:
++  |  sltui TMP2, TMP1, (BC_FUNCC+2)*8	// Fast function?
++  |  ld.d TMP1, FRAME_PC(BASE)
++  |  bnez TMP2, >3
++  |  // Check frame below fast function.
++  |  andi TMP0, TMP1, FRAME_TYPE
++  |  bnez TMP0, >3			// Trace stitching continuation?
++  |  // Otherwise set KBASE for Lua function below fast function.
++  |  ld.w TMP2, -4(TMP1)
++  |  decode_RA8a TMP0, TMP2
++  |  decode_RA8b TMP0
++  |  sub.d TMP1, BASE, TMP0
++  |  ld.d LFUNC:TMP2, -32(TMP1)
++  |  cleartp LFUNC:TMP2
++  |  ld.d TMP1, LFUNC:TMP2->pc
++  |  ld.d KBASE, PC2PROTO(k)(TMP1)
++  |3:
++  |  addi.d RC, MULTRES, -8
++  |  add.d RA, RA, BASE
++  |  jirl r0, AT, 0
++  |
++  |9:  // Rethrow error from the right C frame.
++  |  sub.w CARG2, r0, CRET1		//TODO LA: sub.w  no trap
++  |  or CARG1, L, r0
++  |  bl extern lj_err_throw 		// (lua_State *L, int errcode)
++  |.endif
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Math helper functions ----------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |// Hard-float round to integer.
++  |.macro vm_round_hf, func
++  |  addu16i.d TMP0, r0, 0x4330			// Hiword of 2^52 (double).
++  |  slli.d TMP0, TMP0, 32
++  |  movgr2fr.d f4, TMP0
++  |  fabs.d FRET2, FARG1			// |x|
++  |    movfr2gr.d AT, FARG1
++  |  fcmp.clt.d FCC0, FRET2, f4
++  |   fadd.d FRET1, FRET2, f4		// (|x| + 2^52) - 2^52
++  |  fsub.d FRET1, FRET1, f4
++  |  bceqz FCC0, >1			// Truncate only if |x| < 2^52.
++  |    slt AT, AT, r0
++  |.if "func" == "ceil"
++  |//   addu16i.d TMP0, r0, 0xbff0			// Hiword of -1 (double). Preserves -0.
++  |  .LUI TMP0, 0xbff0
++  |.else
++  |   addu16i.d TMP0, r0, 0x3ff0			// Hiword of +1 (double).
++  |.endif
++  |.if "func" == "trunc"
++  |   slli.d TMP0, TMP0, 32
++  |   movgr2fr.d f4, TMP0
++  |  fcmp.clt.d FCC0, FRET2, FRET1	// |x| < result?
++  |   fsub.d FRET2, FRET1, f4
++  |//  sel.d  FTMP1, FRET1, FRET2		// If yes, subtract +1.
++  |  fsel FTMP1, FRET1, FRET2, FCC0
++  |  movgr2fr.d FRET1, AT
++  |  fneg.d FRET2, FTMP1
++  |//.  sel.d FRET1, FTMP1, FRET2
++  |  movfr2cf FCC0, FRET1
++  |  fsel FRET1, FTMP1, FRET2, FCC0
++  |  jirl r0, ra, 0
++  |.else
++  |  fneg.d FRET2, FRET1
++  |   slli.d TMP0, TMP0, 32
++  |   movgr2fr.d f4, TMP0
++  |  movgr2fr.d FTMP1, AT
++  |  movfr2cf FCC0, FTMP1
++  |  fsel FTMP1, FRET1, FRET2, FCC0
++  |.if "func" == "ceil"
++  |  fcmp.clt.d FCC0, FTMP1, FARG1	// x > result?
++  |.else
++  |  fcmp.clt.d FCC0, FARG1, FTMP1	// x < result?
++  |.endif
++  |   fsub.d FRET2, FTMP1, f4		// If yes, subtract +-1.
++  |  fsel FRET1, FTMP1, FRET2, FCC0
++  |  fmov.d FARG1, FRET1
++  |  jirl r0, ra, 0
++  |.endif
++  |1:
++  |  fmov.d FRET1, FARG1
++  |  jirl r0, ra, 0
++  |.endmacro
++  |
++  |.macro vm_round, func
++  |.if FPU
++  |  vm_round_hf, func
++  |.endif
++  |.endmacro
++  |
++  |->vm_floor:
++  |  vm_round floor
++  |->vm_ceil:
++  |  vm_round ceil
++  |->vm_trunc:
++  |.if JIT
++  |  vm_round trunc
++  |.endif
++  |
++  |// Soft-float integer to number conversion.
++  |.macro sfi2d, ARG
++  |.if not FPU
++  |  srai.w TMP0, ARG, 31
++  |  beqz ARG, >9			// Handle zero first.
++  |  xor TMP1, ARG, TMP0
++  |  sub.d TMP1, TMP1, TMP0		// Absolute value in TMP1.
++  |  clz.d ARG, TMP1
++  |  addi.w ARG, ARG, -11
++  |  .LI AT, 0x3ff+63-11-1
++  |   sll.d TMP1, TMP1, ARG		// Align mantissa left with leading 1.
++  |  sub.w ARG, AT, ARG			// Exponent - 1.
++  |  bstrins.w ARG, TMP0, 11, 11		// Sign | Exponent.
++  |  slli.d ARG, ARG, 52			// Align left.
++  |  add.d ARG, ARG, TMP1              // Add mantissa, increment exponent.
++  |  jirl r0, ra, 0
++  |9:
++  |  jirl r0, ra, 0
++  |.endif
++  |.endmacro
++  |
++  |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
++  |->vm_sfi2d_1:
++  |  sfi2d CARG1
++  |
++  |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
++  |->vm_sfi2d_2:
++  |  sfi2d CARG2
++  |
++  |// Soft-float comparison. Equivalent to c.eq.d.
++  |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
++  |->vm_sfcmpeq:
++  |.if not FPU
++  |  slli.d AT, CARG1, 1
++  |  slli.d TMP0, CARG2, 1
++  |  or TMP1, AT, TMP0
++  |  addu16i.d TMP1, r0, 0xffe0
++  |  beqz TMP1, >8			// Both args +-0: return 1.
++  |  slli.d TMP1, TMP1, 32
++  |   sltu AT, TMP1, AT
++  |   sltu TMP0, TMP1, TMP0
++  |  or TMP1, AT, TMP0
++  |  xor AT, CARG1, CARG2
++  |  bnez TMP1, >9			// Either arg is NaN: return 0;
++  |  sltui CRET1, AT, 1                // Same values: return 1.
++  |  jirl r0, ra, 0
++  |8:
++  |  .LI CRET1, 1
++  |  jirl r0, ra, 0
++  |9:
++  |  .LI CRET1, 0
++  |  jirl r0, ra, 0
++  |.endif
++  |
++  |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
++  |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
++  |->vm_sfcmpult:
++  |.if not FPU
++  |  .LI CRET2, 1
++  |  beq r0, r0, >1
++  |.endif
++  |
++  |->vm_sfcmpolt:
++  |.if not FPU
++  |  .LI CRET2, 0
++  |1:
++  |  slli.d AT, CARG1, 1
++  |  slli.d TMP0, CARG2, 1
++  |  or TMP1, AT, TMP0
++  |  addu16i.d TMP1, r0, 0xffe0
++  |  beqz TMP1, >8			// Both args +-0: return 0.
++  |  slli.d TMP1, TMP1, 32
++  |   sltu AT, TMP1, AT
++  |   sltu TMP0, TMP1, TMP0
++  |  or TMP1, AT, TMP0
++  |  and AT, CARG1, CARG2
++  |  bnez TMP1, >9			// Either arg is NaN: return 0 or 1;
++  |  blt AT, r0, >5			// Both args negative?
++  |  slt CRET1, CARG1, CARG2
++  |  jirl r0, ra, 0
++  |5:  // Swap conditions if both operands are negative.
++  |  slt CRET1, CARG2, CARG1
++  |  jirl r0, ra, 0
++  |8:
++  |  .LI CRET1, 0
++  |  jirl r0, ra, 0
++  |9:
++  |  or CRET1, CRET2, r0
++  |  jirl r0, ra, 0
++  |.endif
++  |
++  |->vm_sfcmpogt:
++  |.if not FPU
++  |  slli.d AT, CARG2, 1
++  |  slli.d TMP0, CARG1, 1
++  |  or TMP1, AT, TMP0
++  |  addu16i.d TMP1, r0, 0xffe0
++  |  beqz TMP1, >8			// Both args +-0: return 0.
++  |  slli.d TMP1, TMP1, 32
++  |   sltu AT, TMP1, AT
++  |   sltu TMP0, TMP1, TMP0
++  |  or TMP1, AT, TMP0
++  |  and AT, CARG2, CARG1
++  |  bnez TMP1, >9			// Either arg is NaN: return 0 or 1;
++  |  blt AT, r0, >5			// Both args negative?
++  |  slt CRET1, CARG2, CARG1
++  |  jirl r0, ra, 0
++  |5:  // Swap conditions if both operands are negative.
++  |  slt CRET1, CARG1, CARG2
++  |  jirl r0, ra, 0
++  |8:
++  |  .LI CRET1, 0
++  |  jirl r0, ra, 0
++  |9:
++  |  .LI CRET1, 0
++  |  jirl r0, ra, 0
++  |.endif
++  |
++  |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
++  |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
++  |->vm_sfcmpolex:
++  |.if not FPU
++  |  slli.d AT, CARG1, 1
++  |  slli.d TMP0, CARG2, 1
++  |  or TMP1, AT, TMP0
++  |  addu16i.d TMP1, r0, 0xffe0
++  |  beqz TMP1, >8			// Both args +-0: return 1.
++  |  slli.d TMP1, TMP1, 32
++  |   sltu AT, TMP1, AT
++  |   sltu TMP0, TMP1, TMP0
++  |  or TMP1, AT, TMP0
++  |  and AT, CARG1, CARG2
++  |  bnez TMP1, >9			// Either arg is NaN: return 0;
++  |  xor AT, AT, TMP3
++  |  bltz AT, r0, >5			// Both args negative?
++  |  slt CRET1, CARG2, CARG1
++  |  jirl r0, ra, 0
++  |5:  // Swap conditions if both operands are negative.
++  |  slt CRET1, CARG1, CARG2
++  |  jirl r0, ra, 0
++  |8:
++  |  .LI CRET1, 1
++  |  jirl r0, ra, 0
++  |9:
++  |  .LI CRET1, 0
++  |  jirl r0, ra, 0
++  |.endif
++  |
++  |.macro sfmin_max, name, fpcall
++  |->vm_sf .. name:
++  |.if JIT and not FPU
++  |  or TMP2, ra, r0
++  |  bl ->fpcall
++  |  or ra, TMP2, r0
++  |  or TMP0, CRET1, r0
++  |  or CRET1, CARG1, r0
++  |  maskeqz CRET1, CRET1, TMP0
++  |  masknez TMP0, CARG2, TMP0
++  |  or CRET1, CRET1, TMP0
++  |  jirl r0, ra, 0
++  |.endif
++  |.endmacro
++  |
++  |  sfmin_max min, vm_sfcmpolt
++  |  sfmin_max max, vm_sfcmpogt
++  |
++  |//-----------------------------------------------------------------------
++  |//-- Miscellaneous functions --------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |.define NEXT_TAB,		TAB:CARG1
++  |.define NEXT_IDX,		CARG2
++  |.define NEXT_ASIZE,		CARG3
++  |.define NEXT_NIL,		CARG4
++  |.define NEXT_TMP0,		r12
++  |.define NEXT_TMP1,		r13
++  |.define NEXT_TMP2,		r14
++  |.define NEXT_RES_VK,		CRET1
++  |.define NEXT_RES_IDX,	CRET2
++  |.define NEXT_RES_PTR,	sp
++  |.define NEXT_RES_VAL,	0(sp)
++  |.define NEXT_RES_KEY,	8(sp)
++  |
++  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++  |// Next idx returned in CRET2.
++  |->vm_next:
++  |.if JIT and ENDIAN_LE
++  |   ld.d NEXT_ASIZE, NEXT_TAB->asize
++  |  ld.d NEXT_TMP0, NEXT_TAB->array
++  |    .LI NEXT_NIL, LJ_TNIL
++  |1:  // Traverse array part.
++  |   sltu AT, NEXT_IDX, NEXT_ASIZE
++  |    slli.w NEXT_TMP1, NEXT_IDX, 3
++  |   add.d NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
++  |   beqz AT, >5
++  |   .LI AT, LJ_TISNUM
++  |  ld.d NEXT_TMP2, 4(NEXT_TMP1)
++  |   slli.d AT, AT, 47
++  |   or NEXT_TMP1, NEXT_IDX, AT
++  |  addi.d NEXT_IDX, NEXT_IDX, 1
++  |  beq NEXT_TMP2, NEXT_NIL, <1
++  |  st.d NEXT_TMP2, NEXT_RES_VAL
++  |   st.d NEXT_TMP1, NEXT_RES_KEY
++  |  addi.d NEXT_RES_VK, NEXT_RES_PTR, 0
++  |  addi.d NEXT_RES_IDX, NEXT_IDX, 0
++  |  jirl r0, ra, 0
++  |
++  |5:  // Traverse hash part.
++  |  sub.d NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
++  |   ld.d NODE:NEXT_RES_VK, NEXT_TAB->node
++  |    slli.w NEXT_TMP2, NEXT_RES_IDX, 5
++  |  ld.d NEXT_TMP0, NEXT_TAB->hmask
++  |    slli.w AT, NEXT_RES_IDX, 3
++  |    sub.d AT, NEXT_TMP2, AT
++  |   add.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
++  |6:
++  |  sltu AT, NEXT_TMP0, NEXT_RES_IDX
++  |  bnez AT, >8
++  |  ld.d NEXT_TMP2, NODE:NEXT_RES_VK->val
++  |  addi.d NEXT_RES_IDX, NEXT_RES_IDX, 1
++  |  bne NEXT_TMP2, NEXT_NIL, >9
++  |  // Skip holes in hash part.
++  |  addi.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
++  |  b <6
++  |
++  |8:  // End of iteration. Set the key to nil (not the value).
++  |  st.d NEXT_NIL, NEXT_RES_KEY
++  |  addi.d NEXT_RES_VK, NEXT_RES_PTR, 0
++  |9:
++  |  add.d NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
++  |  jirl r0, ra, 0
++  |.endif
++  |
++  |//-----------------------------------------------------------------------
++  |//-- FFI helper functions -----------------------------------------------
++  |//-----------------------------------------------------------------------
++  |
++  |// Handler for callback functions. Callback slot number in r19, g in r17.
++  |->vm_ffi_callback:
++  |.if FFI
++  |.type CTSTATE, CTState, PC
++  |  saveregs
++  |  ld.d CTSTATE, GL:r17->ctype_state
++  |   .DADDIU DISPATCH, r17, GG_G2DISP
++  |  st.w r19, CTSTATE->cb.slot
++  |  st.d CARG1, CTSTATE->cb.gpr[0]
++  |  .FPU2 fst.d FARG1, CTSTATE->cb.fpr[0]
++  |  st.d CARG2, CTSTATE->cb.gpr[1]
++  |  .FPU2 fst.d FARG2, CTSTATE->cb.fpr[1]
++  |  st.d CARG3, CTSTATE->cb.gpr[2]
++  |  .FPU2 fst.d FARG3, CTSTATE->cb.fpr[2]
++  |  st.d CARG4, CTSTATE->cb.gpr[3]
++  |  .FPU2 fst.d FARG4, CTSTATE->cb.fpr[3]
++  |  st.d CARG5, CTSTATE->cb.gpr[4]
++  |  .FPU2 fst.d FARG5, CTSTATE->cb.fpr[4]
++  |  st.d CARG6, CTSTATE->cb.gpr[5]
++  |  .FPU2 fst.d FARG6, CTSTATE->cb.fpr[5]
++  |  st.d CARG7, CTSTATE->cb.gpr[6]
++  |  .FPU2 fst.d FARG7, CTSTATE->cb.fpr[6]
++  |  st.d CARG8, CTSTATE->cb.gpr[7]
++  |  .FPU2 fst.d FARG8, CTSTATE->cb.fpr[7]
++  |  addi.d TMP0, sp, CFRAME_SPACE
++  |  st.d TMP0, CTSTATE->cb.stack
++  |  st.d r0, SAVE_PC(sp)			// Any value outside of bytecode is ok.
++  |   or CARG2, sp, r0
++  |  or CARG1, CTSTATE, r0
++  |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
++  |  // Returns lua_State *.
++  |  ld.d BASE, L:CRET1->base
++  |  ld.d RC, L:CRET1->top
++  |   or L, CRET1, r0
++  |     .FPU addu16i.d TMP3, r0, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
++  |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++  |     .FPU2 movgr2fr.w TOBIT, TMP3
++  |      .LI TISNIL, LJ_TNIL
++  |       .LI TISNUM, LJ_TISNUM
++  |    li_vmstate INTERP
++  |  sub.w RC, RC, BASE
++  |   cleartp LFUNC:RB
++  |    st_vmstate
++  |     .FPU2 fcvt.d.s TOBIT, TOBIT
++  |  ins_callt
++  |.endif
++  |
++  |->cont_ffi_callback:			// Return from FFI callback.
++  |.if FFI
++  |  .LDXD CTSTATE, DISPATCH, DISPATCH_GL(ctype_state)
++  |   st.d BASE, L->base
++  |   st.d RB, L->top
++  |  st.d L, CTSTATE->L
++  |  or CARG2, RA, r0
++  |  or CARG1, CTSTATE, r0
++  |  bl extern lj_ccallback_leave	// (CTState *cts, TValue *o)
++  |  .FPU2 fld.d FRET1, CTSTATE->cb.fpr[0]
++  |  ld.d CRET1, CTSTATE->cb.gpr[0]
++  |  .FPU2 fld.d FRET2, CTSTATE->cb.fpr[1]
++  |  ld.d CRET2, CTSTATE->cb.gpr[1]
++  |  beq r0, r0, ->vm_leave_unw
++  |.endif
++  |
++  |->vm_ffi_call:			// Call C function via FFI.
++  |  // Caveat: needs special frame unwinding, see below.
++  |.if FFI
++  |  .type CCSTATE, CCallState, CARG1
++  |  ld.w TMP1, CCSTATE->spadj
++  |   ld.bu CARG2, CCSTATE->nsp
++  |   ld.bu CARG3, CCSTATE->nfpr
++  |  or TMP2, sp, r0
++  |  sub.d sp, sp, TMP1
++  |  st.d ra, -8(TMP2)
++  |   slli.w CARG2, CARG2, 3
++  |  st.d r23, -16(TMP2)
++  |  st.d CCSTATE, -24(TMP2)
++  |  or r23, TMP2, r0
++  |  addi.d TMP1, CCSTATE, offsetof(CCallState, stack)
++  |  or TMP2, sp, r0
++  |  add.d TMP3, TMP1, CARG2
++  |  beqz CARG2, >2
++  |1:
++  |   ld.d TMP0, 0(TMP1)
++  |  addi.d TMP1, TMP1, 8
++  |  sltu AT, TMP1, TMP3
++  |   st.d TMP0, 0(TMP2)
++  |  addi.d TMP2, TMP2, 8
++  |  bnez AT, <1
++  |2:
++  |  beqz CARG3, >3
++  |  .FPU2 fld.d FARG1, CCSTATE->fpr[0]
++  |  .FPU2 fld.d FARG2, CCSTATE->fpr[1]
++  |  .FPU2 fld.d FARG3, CCSTATE->fpr[2]
++  |  .FPU2 fld.d FARG4, CCSTATE->fpr[3]
++  |  .FPU2 fld.d FARG5, CCSTATE->fpr[4]
++  |  .FPU2 fld.d FARG6, CCSTATE->fpr[5]
++  |  .FPU2 fld.d FARG7, CCSTATE->fpr[6]
++  |  .FPU2 fld.d FARG8, CCSTATE->fpr[7]
++  |3:
++  |  ld.d CFUNCADDR, CCSTATE->func
++  |  ld.d CARG2, CCSTATE->gpr[1]
++  |  ld.d CARG3, CCSTATE->gpr[2]
++  |  ld.d CARG4, CCSTATE->gpr[3]
++  |  ld.d CARG5, CCSTATE->gpr[4]
++  |  ld.d CARG6, CCSTATE->gpr[5]
++  |  ld.d CARG7, CCSTATE->gpr[6]
++  |  ld.d CARG8, CCSTATE->gpr[7]
++  |  ld.d CARG1, CCSTATE->gpr[0]         // Do this last, since CCSTATE is CARG1.
++  |  jirl r1, CFUNCADDR, 0
++  |  ld.d CCSTATE:TMP1, -24(r23)
++  |  ld.d TMP2, -16(r23)
++  |  ld.d ra, -8(r23)
++  |  st.d CRET1, CCSTATE:TMP1->gpr[0]
++  |  st.d CRET2, CCSTATE:TMP1->gpr[1]
++  |.if FPU
++  |  fmov.d FRET1, FARG1
++  |  fmov.d FRET2, FARG2
++  |  fst.d FRET1, CCSTATE:TMP1->fpr[0]
++  |  fst.d FRET2, CCSTATE:TMP1->fpr[1]
++  |.else
++  |  st.d CARG1, CCSTATE:TMP1->gpr[2]	// 2nd FP struct field for soft-float.
++  |.endif
++  |  or sp, r23, r0
++  |  or r23, TMP2, r0
++  |  jirl r0, ra, 0
++  |.endif
++  |// Note: vm_ffi_call must be the last function in this object file!
++  |
++  |//-----------------------------------------------------------------------
++}
++
++/* Generate the code for a single instruction. */
++static void build_ins(BuildCtx *ctx, BCOp op, int defop)
++{
++  int vk = 0;
++  |=>defop:
++
++  switch (op) {
++
++  /* -- Comparison ops ---------------------------------------------------- */
++
++  /* Remember: all ops branch for a true comparison, fall through otherwise. */
++
++  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
++    |  // RA = src1*8, RD = src2*8, JMP with RD = target
++    |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
++    |  add.d RA, BASE, RA
++    |   add.d RD, BASE, RD
++    |  ld.d ARGRA, 0(RA)
++    |   ld.d ARGRD, 0(RD)
++    |    ld.hu TMP2, OFS_RD(PC)
++    |  gettp CARG3, ARGRA
++    |   gettp CARG4, ARGRD
++    |   addi.d PC, PC, 4
++    |  bne CARG3, TISNUM, >2
++    |   decode_RD4b TMP2
++    |  bne CARG4, TISNUM, >5
++    |  slli.w ARGRA, ARGRA, 0	// sextw -> slli.w
++    |   slli.w ARGRD, ARGRD, 0	// sextw -> slli.w
++    |    .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  slt AT, CARG1, CARG2
++    |    add.w TMP2, TMP2, TMP3
++    |  movop TMP2, TMP2, AT
++    |1:
++    |  add.d PC, PC, TMP2
++    |  ins_next
++    |
++    |2:  // RA is not an integer.
++    |  sltui AT, CARG3, LJ_TISNUM
++    |   .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  beqz AT, ->vmeta_comp
++    |  sltui AT, CARG4, LJ_TISNUM
++    |   decode_RD4b TMP2       //TODO
++    |  beqz AT, >4
++    |.if FPU
++    |  fld.d FRA, 0(RA)
++    |   fld.d FRD, 0(RD)
++    |.endif
++    |3:  // RA and RD are both numbers.
++    |.if FPU
++    |  fcomp FCC0, FTMP0, FTMP2
++    |   add.w TMP2, TMP2, TMP3
++    |  movcf2gr TMP3, FCC0
++    |  fmovop TMP2, TMP2, TMP3
++    |  beq r0, r0, <1
++    |.else
++    |   add.w TMP2, TMP2, TMP
++    |  bl sfcomp
++    |  movop TMP2, TMP2, CRET1
++    |  beq r0, r0, <1
++    |.endif
++    |
++    |4:  // RA is a number, RD is not a number.
++    |//  bne CARG4, TISNUM, ->vmeta_comp
++    |  // RA is a number, RD is an integer. Convert RD to a number.
++    |.if FPU
++    |  fld.s FRD, LO(RD)
++    |  bne CARG4, TISNUM, ->vmeta_comp
++    |  fld.d FRA, 0(RA)
++    |  ffint.d.w FRD, FRD
++    |  beq r0, r0, <3
++    |.else
++    |.if "ARGRD" == "CARG1"
++    |  slli.w CARG1, CARG1, 0		// sextw -> slli.w
++    |  bne CARG4, TISNUM, ->vmeta_comp
++    |  bl ->vm_sfi2d_1
++    |.else
++    |  slli.w CARG2, CARG2, 0		// sextw -> slli.w
++    |  bne CARG4, TISNUM, ->vmeta_comp
++    |  bl ->vm_sfi2d_2
++    |.endif
++    |  beq r0, r0, <3
++    |.endif
++    |
++    |5:  // RA is an integer, RD is not an integer
++    |  sltui AT, CARG4, LJ_TISNUM
++    |  .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  beqz AT, ->vmeta_comp
++    |  // RA is an integer, RD is a number. Convert RA to a number.
++    |.if FPU
++    |   fld.s FRA, LO(RA)
++    |   fld.d FRD, 0(RD)
++    |   ffint.d.w FRA, FRA
++    |  beq r0, r0, <3
++    |.else
++    |.if "ARGRA" == "CARG1"
++    |  slli.w CARG1, CARG1, 0		// sextw -> slli.w
++    |  bl ->vm_sfi2d_1
++    |.else
++    |  slli.w CARG2, CARG2, 0		// sextw -> slli.w
++    |  bl ->vm_sfi2d_2
++    |.endif
++    |  beq r0, r0, <3
++    |.endif
++    |.endmacro
++    |
++    if (op == BC_ISLT) {
++      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, maskeqz, maskeqz, fcmp.clt.d, ->vm_sfcmpolt
++    } else if (op == BC_ISGE) {
++      |  bc_comp FTMP0, FTMP2, CARG1, CARG2, masknez, masknez, fcmp.clt.d, ->vm_sfcmpolt
++    } else if (op == BC_ISLE) {
++      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, masknez, masknez, fcmp.cult.d, ->vm_sfcmpult
++    } else {
++      |  bc_comp FTMP2, FTMP0, CARG2, CARG1, maskeqz, maskeqz, fcmp.cult.d, ->vm_sfcmpult
++    }
++    break;
++
++  case BC_ISEQV: case BC_ISNEV:
++    vk = op == BC_ISEQV;
++    |  // RA = src1*8, RD = src2*8, JMP with RD = target
++    |  add.d RA, BASE, RA
++    |    addi.d PC, PC, 4
++    |   add.d RD, BASE, RD
++    |  ld.d CARG1, 0(RA)
++    |    ld.hu TMP2, -4+OFS_RD(PC)
++    |   ld.d CARG2, 0(RD)
++    |  gettp CARG3, CARG1
++    |   gettp CARG4, CARG2
++    |  sltu AT, TISNUM, CARG3
++    |   sltu TMP1, TISNUM, CARG4
++    |  or AT, AT, TMP1
++    |  .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    if (vk) {
++      |  beqz AT, ->BC_ISEQN_Z		//TODO which is the following slot ins
++    } else {
++      |  beqz AT, ->BC_ISNEN_Z
++    }
++    |  // Either or both types are not numbers.
++    |.if FFI
++    |  .LI AT, LJ_TCDATA
++    |  beq CARG3, AT, ->vmeta_equal_cd
++    |.endif
++    |   decode_RD4b TMP2
++    |.if FFI
++    |  beq CARG4, AT, ->vmeta_equal_cd
++    |.endif
++    |  add.w TMP2, TMP2, TMP3
++    |  bne CARG1, CARG2, >2
++    |  // Tag and value are equal.
++    if (vk) {
++      |->BC_ISEQV_Z:
++      |  add.d PC, PC, TMP2
++    }
++    |1:
++    |  ins_next
++    |
++    |2:  // Check if the tags are the same and it's a table or userdata.
++    |  xor AT, CARG3, CARG4			// Same type?
++    |  sltui TMP0, CARG3, LJ_TISTABUD+1		// Table or userdata?
++    |  masknez TMP0, TMP0, AT
++    |  cleartp TAB:TMP1, CARG1
++    if (vk) {
++      |  beqz TMP0, <1
++    } else {
++      |  beqz TMP0, ->BC_ISEQV_Z  // Reuse code from opposite instruction.
++    }
++    |  // Different tables or userdatas. Need to check __eq metamethod.
++    |  // Field metatable must be at same offset for GCtab and GCudata!
++    |  ld.d TAB:TMP3, TAB:TMP1->metatable
++    if (vk) {
++      |  beqz TAB:TMP3, <1		// No metatable?
++      |  ld.bu TMP3, TAB:TMP3->nomm
++      |  andi TMP3, TMP3, 1<<MM_eq
++      |  bnez TMP3, >1			// Or 'no __eq' flag set?
++    } else {
++      |  beqz TAB:TMP3,->BC_ISEQV_Z	// No metatable?
++      |  ld.bu TMP3, TAB:TMP3->nomm
++      |  andi TMP3, TMP3, 1<<MM_eq
++      |  bnez TMP3, ->BC_ISEQV_Z	// Or 'no __eq' flag set?
++    }
++    |  .LI TMP0, 1-vk                   // ne = 0 or 1.
++    |  beq r0, r0, ->vmeta_equal			// Handle __eq metamethod.
++    break;
++
++  case BC_ISEQS: case BC_ISNES:
++    vk = op == BC_ISEQS;
++    |  // RA = src*8, RD = str_const*8 (~), JMP with RD = target
++    |  add.d RA, BASE, RA
++    |   addi.d PC, PC, 4
++    |  ld.d CARG1, 0(RA)
++    |   sub.d RD, KBASE, RD
++    |    ld.hu TMP2, -4+OFS_RD(PC)
++    |   ld.d CARG2, -8(RD)		// KBASE-8-str_const*8
++    |.if FFI
++    |  gettp TMP0, CARG1
++    |  .LI AT, LJ_TCDATA
++    |.endif
++    |  .LI TMP1, LJ_TSTR
++    |   decode_RD4b TMP2
++    |  settp CARG2, TMP1
++    |.if FFI
++    |  beq TMP0, AT, ->vmeta_equal_cd
++    |.endif
++    |   .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  xor TMP1, CARG1, CARG2
++    |   add.w TMP2, TMP2, TMP3
++    if (vk) {
++      |  masknez TMP2, TMP2, TMP1
++    } else {
++      |  maskeqz TMP2, TMP2, TMP1
++    }
++    |  add.d PC, PC, TMP2
++    |  ins_next
++    break;
++
++  case BC_ISEQN: case BC_ISNEN:
++    vk = op == BC_ISEQN;
++    |  // RA = src*8, RD = num_const*8, JMP with RD = target
++    |  add.d RA, BASE, RA
++    |   add.d RD, KBASE, RD
++    |  ld.d CARG1, 0(RA)
++    |   ld.d CARG2, 0(RD)
++    |    ld.hu TMP2, OFS_RD(PC)
++    |  gettp CARG3, CARG1
++    |   gettp CARG4, CARG2
++    |    addi.d PC, PC, 4
++    |    .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    if (vk) {
++      |->BC_ISEQN_Z:
++    } else {
++      |->BC_ISNEN_Z:
++    }
++    |   decode_RD4b TMP2
++    |  bne CARG3, TISNUM, >3
++    |   add.w TMP2, TMP2, TMP3
++    |  bne CARG4, TISNUM, >6
++    |  xor AT, CARG1, CARG2
++    if (vk) {
++      | masknez TMP2, TMP2, AT
++      |1:
++      |  add.d PC, PC, TMP2
++      |2:
++    } else {
++      |  maskeqz TMP2, TMP2, AT
++      |1:
++      |2:
++      |  add.d PC, PC, TMP2
++    }
++    |  ins_next
++    |
++    |3:  // RA is not an integer.
++    |  sltu AT, CARG3, TISNUM
++    |   add.w TMP2, TMP2, TMP3
++    |.if FFI
++    |  beqz AT, >8
++    |.else
++    |  beqz AT, <2
++    |.endif
++    |  sltu AT, CARG4, TISNUM
++    |.if FPU
++    |  fld.d FTMP0, 0(RA)
++    |   fld.d FTMP2, 0(RD)
++    |.endif
++    |  beqz AT, >5
++    |4:  // RA and RD are both numbers.
++    |.if FPU
++    |  fcmp.ceq.d FCC0, FTMP0, FTMP2		//TODO fcmp.cond.d cc, fj, fk
++    |  movcf2gr TMP1, FCC0
++    if (vk) {
++      |  maskeqz TMP2, TMP2, TMP1
++    } else {
++      |  masknez TMP2, TMP2, TMP1
++    }
++    |  beq r0, r0, <1
++    |.else
++    |  bl ->vm_sfcmpeq
++    if (vk) {
++      |  maskeqz TMP2, TMP2, CRET1
++    } else {
++      |  masknez TMP2, TMP2, CRET1
++    }
++    |  beq r0, r0, <1
++    |.endif
++    |
++    |5:  // RA is a number, RD is not a number.
++    |//.if FFI
++    |//  bne CARG4, TISNUM, >9		//TODO does not process the following flot ins
++    |//.else
++    |//  bne CARG4, TISNUM, <2
++    |//.endif
++    |  // RA is a number, RD is an integer. Convert RD to a number.
++    |.if FPU
++    |  fld.s FTMP2, LO(RD)
++    |.if FFI
++    |  bne CARG4, TISNUM, >9
++    |.else
++    |  bne CARG4, TISNUM, <2
++    |.endif
++    |  ffint.d.w FTMP2, FTMP2
++    |  beq r0, r0, <4
++    |.else
++    |  slli.w CARG2, CARG2, 0           // sextw -> slli.w
++    |.if FFI
++    |  bne CARG4, TISNUM, >9
++    |.else
++    |  bne CARG4, TISNUM, <2
++    |.endif
++    |  bl ->vm_sfi2d_2
++    |  beq r0, r0, <4
++    |.endif
++    |
++    |6:  // RA is an integer, RD is not an integer
++    |  sltu AT, CARG4, TISNUM
++    |//.if FFI
++    |//  beqz AT, >9			//TODO does not process the following flot ins
++    |//.else
++    |//  beqz AT, <2
++    |//.endif
++    |  // RA is an integer, RD is a number. Convert RA to a number.
++    |.if FPU
++    |  fld.s FTMP0, LO(RA)
++    |.if FFI
++    |  beqz AT, >9
++    |.else
++    |  beqz AT, <2
++    |.endif
++    |   fld.d FTMP2, 0(RD)
++    |   ffint.d.w FTMP0, FTMP0
++    |  b <4
++    |.else
++    |  slli.w CARG1, CARG1, 0		// sextw -> slli.w
++    |.if FFI
++    |  beqz AT, >9
++    |.else
++    |  beqz AT, <2
++    |.endif
++    |  bl ->vm_sfi2d_1
++    |  beq r0, r0, <4
++    |.endif
++    |
++    |.if FFI
++    |8:
++    |  .LI AT, LJ_TCDATA
++    |  bne CARG3, AT, <2
++    |  beq r0, r0, ->vmeta_equal_cd
++    |9:
++    |  .LI AT, LJ_TCDATA
++    |  bne CARG4, AT, <2
++    |  beq r0, r0, ->vmeta_equal_cd
++    |.endif
++    break;
++
++  case BC_ISEQP: case BC_ISNEP:
++    vk = op == BC_ISEQP;
++    |  // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
++    |  add.d RA, BASE, RA
++    |   srli.w TMP1, RD, 3
++    |  ld.d TMP0, 0(RA)
++    |    ld.hu TMP2, OFS_RD(PC)
++    |   nor TMP1, TMP1, r0
++    |  gettp TMP0, TMP0
++    |    addi.d PC, PC, 4
++    |  or r17, TMP0, r0
++    |  xor TMP0, TMP0, TMP1
++    |.if FFI
++    |  .LI AT, LJ_TCDATA
++    |  beq r17, AT, ->vmeta_equal_cd
++    |.endif
++    |  decode_RD4b TMP2
++    |  .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  add.w TMP2, TMP2, TMP3
++    if (vk) {
++      |  masknez TMP2, TMP2, TMP0
++    } else {
++      |  maskeqz TMP2, TMP2, TMP0
++    }
++    |  add.d PC, PC, TMP2
++    |  ins_next
++    break;
++
++  /* -- Unary test and copy ops ------------------------------------------- */
++
++  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
++    |  // RA = dst*8 or unused, RD = src*8, JMP with RD = target
++    |  add.d RD, BASE, RD
++    |   ld.hu TMP2, OFS_RD(PC)
++    |  ld.d TMP0, 0(RD)
++    |   addi.d PC, PC, 4
++    |  gettp TMP0, TMP0
++    |  sltui TMP0, TMP0, LJ_TISTRUECOND
++    if (op == BC_IST || op == BC_ISF) {
++      |   decode_RD4b TMP2
++      |   .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++      |   add.w TMP2, TMP2, TMP3
++      if (op == BC_IST) {
++	|  maskeqz TMP2, TMP2, TMP0;
++      } else {
++	|  masknez TMP2, TMP2, TMP0;
++      }
++      |  add.d PC, PC, TMP2
++    } else {
++      |  ld.d CRET1, 0(RD)
++      |  add.d RA, BASE, RA
++      if (op == BC_ISTC) {
++	|  beqz TMP0, >1
++      } else {
++	|  bnez TMP0, >1
++      }
++      |   decode_RD4b TMP2
++      |   .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++      |   add.w TMP2, TMP2, TMP3
++      |  st.d CRET1, 0(RA)
++      |   add.d PC, PC, TMP2
++      |1:
++    }
++    |  ins_next
++    break;
++
++  case BC_ISTYPE:
++    |  // RA = src*8, RD = -type*8
++    |  add.d TMP2, BASE, RA
++    |  srli.w TMP1, RD, 3
++    |  ld.d TMP0, 0(TMP2)
++    |  ins_next1
++    |  gettp TMP0, TMP0
++    |  add.d AT, TMP0, TMP1
++    |  bnez AT, ->vmeta_istype
++    |  ins_next2
++    break;
++  case BC_ISNUM:
++    |  // RA = src*8, RD = -(TISNUM-1)*8
++    |  add.d TMP2, BASE, RA
++    |  ld.d TMP0, 0(TMP2)
++    |  ins_next1
++    |  ins_next2
++    |  checknum TMP0, ->vmeta_istype
++    |//  ins_next2
++    break;
++
++  /* -- Unary ops --------------------------------------------------------- */
++
++  case BC_MOV:
++    |  // RA = dst*8, RD = src*8
++    |  add.d RD, BASE, RD
++    |   add.d RA, BASE, RA
++    |  ld.d CRET1, 0(RD)
++    |  ins_next1
++    |  st.d CRET1, 0(RA)
++    |  ins_next2
++    break;
++  case BC_NOT:
++    |  // RA = dst*8, RD = src*8
++    |  add.d RD, BASE, RD
++    |   add.d RA, BASE, RA
++    |  ld.d TMP0, 0(RD)
++    |   .LI AT, LJ_TTRUE
++    |  gettp TMP0, TMP0
++    |  sltu TMP0, AT, TMP0
++    |  addi.w TMP0, TMP0, 1
++    |  slli.d TMP0, TMP0, 47
++    |  nor TMP0, TMP0, r0
++    |  ins_next1
++    |   st.d TMP0, 0(RA)
++    |  ins_next2
++    break;
++  case BC_UNM:
++    |  // RA = dst*8, RD = src*8
++    |  add.d RB, BASE, RD
++    |  ld.d CARG1, 0(RB)
++    |    add.d RA, BASE, RA
++    |  gettp CARG3, CARG1
++    |// addu16i.d TMP1, r0, 0x8000
++    |  .LUI TMP1, 0x8000
++    |  bne CARG3, TISNUM, >2
++    |  slli.w CARG1, CARG1, 0		// sextw -> slli.w
++    |  sub.w CARG1, r0, CARG1
++    |  beq CARG1, TMP1, ->vmeta_unm	// Meta handler deals with -2^31.
++    |  bstrpick.d CARG1, CARG1, 31, 0		// zextw -> bstrpick.d
++    |  settp CARG1, TISNUM
++    |1:
++    |  ins_next1
++    |   st.d CARG1, 0(RA)
++    |  ins_next2
++    |2:
++    |  sltui AT, CARG3, LJ_TISNUM
++    |  slli.d TMP1, TMP1, 32
++    |  beqz AT, ->vmeta_unm
++    |  xor CARG1, CARG1, TMP1
++    |  beq r0, r0, <1
++    break;
++  case BC_LEN:
++    |  // RA = dst*8, RD = src*8
++    |  add.d CARG2, BASE, RD
++    |   add.d RA, BASE, RA
++    |  ld.d TMP0, 0(CARG2)
++    |  gettp TMP1, TMP0
++    |  addi.d AT, TMP1, -LJ_TSTR
++    |  cleartp STR:CARG1, TMP0
++    |  bnez AT, >2
++    |   ld.w CRET1, STR:CARG1->len
++    |1:
++    |  settp CRET1, TISNUM
++    |  ins_next1
++    |  st.d CRET1, 0(RA)
++    |  ins_next2
++    |2:
++    |  addi.d AT, TMP1, -LJ_TTAB
++    |  bnez AT, ->vmeta_len
++#if LJ_52
++    |  ld.d TAB:TMP2, TAB:CARG1->metatable
++    |  bnez TAB:TMP2, >9
++    |3:
++#endif
++    |->BC_LEN_Z:
++    |  bl extern lj_tab_len		// (GCtab *t)
++    |  // Returns uint32_t (but less than 2^31).
++    |  beq r0, r0, <1
++#if LJ_52
++    |9:
++    |  ld.bu TMP0, TAB:TMP2->nomm
++    |  andi TMP0, TMP0, 1<<MM_len
++    |  bnez TMP0, <3			// 'no __len' flag set: done.
++    |  beq r0, r0, ->vmeta_len
++#endif
++    break;
++
++  /* -- Binary ops -------------------------------------------------------- */
++
++    |.macro fpmod, a, b, c
++    |  fdiv.d FARG1, b, c
++    |  bl ->vm_floor		// floor(b/c)
++    |  fmul.d a, FRET1, c
++    |  fsub.d a, b, a		// b - floor(b/c)*c
++    |.endmacro
++
++    |.macro sfpmod
++    |  addi.d sp, sp, -16
++    |
++    |  st.d CARG1, 0(sp)
++    |  st.d CARG2, 8(sp)
++    |  bl extern __divdf3
++    |
++    |  or CARG1, CRET1, r0
++    |  bl extern floor
++    |
++    |  or CARG1, CRET1, r0
++    |  ld.d CARG2, 8(sp)
++    |  bl extern __muldf3
++    |
++    |  ld.d CARG1, 0(sp)
++    |  or CARG2, CRET1, r0
++    |  bl extern __subdf3
++    |
++    |  addi.d sp, sp, 16
++    |.endmacro
++
++    |.macro ins_arithpre, label
++    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++    |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
++    ||switch (vk) {
++    ||case 0:
++    |   decode_RB8a RB, INS
++    |   decode_RB8b RB
++    |    decode_RDtoRC8 RC, RD
++    |   // RA = dst*8, RB = src1*8, RC = num_const*8
++    |   add.d RB, BASE, RB
++    |   add.d RC, KBASE, RC
++    |.if "label" ~= "none"
++    |   beq r0, r0, label
++    |.endif
++    ||  break;
++    ||case 1:
++    |   decode_RB8a RC, INS
++    |   decode_RB8b RC
++    |    decode_RDtoRC8 RB, RD
++    |   // RA = dst*8, RB = num_const*8, RC = src1*8
++    |   add.d RC, BASE, RC
++    |   add.d RB, KBASE, RB
++    |.if "label" ~= "none"
++    |   beq r0, r0, label
++    |.endif
++    ||  break;
++    ||default:
++    |   decode_RB8a RB, INS
++    |   decode_RB8b RB
++    |    decode_RDtoRC8 RC, RD
++    |   // RA = dst*8, RB = src1*8, RC = src2*8
++    |   add.d RB, BASE, RB
++    |   add.d RC, BASE, RC
++    |.if "label" ~= "none"
++    |   beq r0, r0, label
++    |.endif
++    ||  break;
++    ||}
++    |.endmacro
++    |
++    |.macro ins_arith, intins, fpins, fpcall, label
++    |  ins_arithpre none
++    |
++    |.if "label" ~= "none"
++    |label:
++    |.endif
++    |
++    |// Used in 5.
++    |  ld.d CARG1, 0(RB)
++    |   ld.d CARG2, 0(RC)
++    |  gettp TMP0, CARG1
++    |   gettp TMP1, CARG2
++    |
++    |.if "intins" ~= "div.w"
++    |
++    |  // Check for two integers.
++    |  slli.w CARG3, CARG1, 0		// sextw -> slli.w
++    |  slli.w CARG4, CARG2, 0		// sextw -> slli.w
++    |  bne TMP0, TISNUM, >5
++    |//  bne TMP1, TISNUM, >5		//TODO not process the following slot ins
++    |
++    |.if "intins" == "add.w"
++    |  intins CRET1, CARG3, CARG4
++    |  bne TMP1, TISNUM, >5
++    |  xor TMP1, CRET1, CARG3		// ((y^a) & (y^b)) < 0: overflow.
++    |  xor TMP2, CRET1, CARG4
++    |  and TMP1, TMP1, TMP2
++    |  add.d RA, BASE, RA
++    |  blt TMP1, r0, ->vmeta_arith
++    |.elif "intins" == "sub.w"
++    |  intins CRET1, CARG3, CARG4
++    |  bne TMP1, TISNUM, >5
++    |  xor TMP1, CRET1, CARG3		// ((y^a) & (a^b)) < 0: overflow.
++    |  xor TMP2, CARG3, CARG4
++    |  and TMP1, TMP1, TMP2
++    |  add.d RA, BASE, RA
++    |  blt TMP1, r0, ->vmeta_arith
++    |.elif "intins" == "mulw.d.w"		//TODO mips: mult -> la: mulw.d.w
++    |//.  nop
++    |  bne TMP1, TISNUM, >5
++    |  mul.w CRET1, CARG3, CARG4
++    |  mulh.w TMP2, CARG3, CARG4
++    |  srai.w TMP1, CRET1, 31
++    |  add.d RA, BASE, RA
++    |  bne TMP1, TMP2, ->vmeta_arith
++    |.else
++    |//.  load_got lj_vm_modi
++    |  bne TMP1, TISNUM, >5
++    |  add.d RA, BASE, RA
++    |  beqz CARG4, ->vmeta_arith
++    |  or CARG1, CARG3, r0
++    |  or CARG2, CARG4, r0
++    |  bl extern lj_vm_modi			//TODO implement func lj_vm_modi/vm_modi
++    |.endif
++    |
++    |  bstrpick.d CRET1, CRET1, 31, 0		// zextw -> bstrpick.d
++    |  settp CRET1, TISNUM
++    |  ins_next1
++    |  st.d CRET1, 0(RA)
++    |3:
++    |  ins_next2
++    |
++    |.endif
++    |
++    |5:  // Check for two numbers.
++    |  .FPU2 fld.d FTMP0, 0(RB)
++    |  sltu AT, TMP0, TISNUM
++    |   sltu TMP0, TMP1, TISNUM
++    |  .FPU2 fld.d FTMP2, 0(RC)
++    |   and AT, AT, TMP0
++    |   add.d RA, BASE, RA
++    |   beqz AT, ->vmeta_arith
++    |
++    |.if FPU
++    |  fpins FRET1, FTMP0, FTMP2
++    |.elif "fpcall" == "sfpmod"
++    |  sfpmod
++    |.else
++    |  bl fpcall
++    |.endif
++    |
++    |  ins_next1
++    |.if FPU
++    |  fst.d FRET1, 0(RA)
++    |.else
++    |  st.d CRET1, 0(RA)
++    |.endif
++    |.if "intins" ~= "div.w"
++    |  beq r0, r0, <3
++    |.endif
++    |.if "intins" == "div.w"
++    |  ins_next2
++    |.endif
++    |
++    |.endmacro
++
++  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
++    |  ins_arith add.w, fadd.d, __adddf3, none
++    break;
++  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
++    |  ins_arith sub.w, fsub.d, __subdf3, none
++    break;
++  case BC_MULVN: case BC_MULNV: case BC_MULVV:
++    |  ins_arith mulw.d.w, fmul.d, __muldf3, none
++    break;
++  case BC_DIVVN:
++    |  ins_arith div.w, fdiv.d, __divdf3, ->BC_DIVVN_Z
++    break;
++  case BC_DIVNV: case BC_DIVVV:
++    |  ins_arithpre ->BC_DIVVN_Z
++    break;
++  case BC_MODVN:
++    |  ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z	//TODO modi -> ?
++    break;
++  case BC_MODNV: case BC_MODVV:
++    |  ins_arithpre ->BC_MODVN_Z
++    break;
++  case BC_POW:
++    |  ins_arithpre none
++    |  ld.d CARG1, 0(RB)
++    |   ld.d CARG2, 0(RC)
++    |  gettp TMP0, CARG1
++    |   gettp TMP1, CARG2
++    |  sltui TMP0, TMP0, LJ_TISNUM
++    |   sltui TMP1, TMP1, LJ_TISNUM
++    |  and AT, TMP0, TMP1
++    |  add.d RA, BASE, RA
++    |  beqz AT, ->vmeta_arith
++    |.if FPU
++    |  fld.d FARG1, 0(RB)
++    |  fld.d FARG2, 0(RC)
++    |.endif
++    |  bl extern pow
++    |  ins_next1
++    |.if FPU
++    |  fst.d FRET1, 0(RA)
++    |.else
++    |  st.d CRET1, 0(RA)
++    |.endif
++    |  ins_next2
++    break;
++
++  case BC_CAT:
++    |  // RA = dst*8, RB = src_start*8, RC = src_end*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RDtoRC8 RC, RD
++    |  sub.d CARG3, RC, RB
++    |   st.d BASE, L->base
++    |  add.d CARG2, BASE, RC
++    |  or MULTRES, RB, r0
++    |->BC_CAT_Z:
++    |  srli.w CARG3, CARG3, 3
++    |   st.d PC, SAVE_PC(sp)
++    |  or CARG1, L, r0
++    |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
++    |  // Returns NULL (finished) or TValue * (metamethod).
++    |  ld.d BASE, L->base
++    |  bnez CRET1, ->vmeta_binop
++    |  add.d RB, BASE, MULTRES
++    |  ld.d r17, 0(RB)
++    |   add.d RA, BASE, RA
++    |  ins_next1
++    |  st.d r17, 0(RA)
++    |  ins_next2
++    break;
++
++  /* -- Constant ops ------------------------------------------------------ */
++
++  case BC_KSTR:
++    |  // RA = dst*8, RD = str_const*8 (~)
++    |  sub.d TMP1, KBASE, RD
++    |  ins_next1
++    |   .LI TMP2, LJ_TSTR
++    |  ld.d TMP0, -8(TMP1)		// KBASE-8-str_const*8
++    |  add.d RA, BASE, RA
++    |   settp TMP0, TMP2
++    |  st.d TMP0, 0(RA)
++    |  ins_next2
++    break;
++  case BC_KCDATA:
++    |.if FFI
++    |  // RA = dst*8, RD = cdata_const*8 (~)
++    |  sub.d TMP1, KBASE, RD
++    |  ins_next1
++    |  ld.d TMP0, -8(TMP1)		// KBASE-8-cdata_const*8
++    |   .LI TMP2, LJ_TCDATA
++    |  add.d RA, BASE, RA
++    |   settp TMP0, TMP2
++    |  st.d TMP0, 0(RA)
++    |  ins_next2
++    |.endif
++    break;
++  case BC_KSHORT:
++    |  // RA = dst*8, RD = int16_literal*8
++    |   srai.w RD, INS, 16
++    |  add.d RA, BASE, RA
++    |   bstrpick.d RD, RD, 31, 0	// zextw -> bstrpick.d
++    |  ins_next1
++    |   settp RD, TISNUM
++    |   st.d RD, 0(RA)
++    |  ins_next2
++    break;
++  case BC_KNUM:
++    |  // RA = dst*8, RD = num_const*8
++    |  add.d RD, KBASE, RD
++    |   add.d RA, BASE, RA
++    |  ld.d CRET1, 0(RD)
++    |  ins_next1
++    |  st.d CRET1, 0(RA)
++    |  ins_next2
++    break;
++  case BC_KPRI:
++    |  // RA = dst*8, RD = primitive_type*8 (~)
++    |   add.d RA, BASE, RA
++    |  slli.d TMP0, RD, 44
++    |  nor TMP0, TMP0, r0
++    |  ins_next1
++    |   st.d TMP0, 0(RA)
++    |  ins_next2
++    break;
++  case BC_KNIL:
++    |  // RA = base*8, RD = end*8
++    |  add.d RA, BASE, RA
++    |  st.d TISNIL, 0(RA)
++    |   addi.d RA, RA, 8
++    |  add.d RD, BASE, RD
++    |1:
++    |  st.d TISNIL, 0(RA)
++    |  slt AT, RA, RD
++    |  addi.d RA, RA, 8
++    |  bnez AT, <1
++    |  ins_next_
++    break;
++
++  /* -- Upvalue and function ops ------------------------------------------ */
++
++  case BC_UGET:
++    |  // RA = dst*8, RD = uvnum*8
++    |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++    |   add.d RA, BASE, RA
++    |  cleartp LFUNC:RB
++    |  add.d RD, RD, LFUNC:RB
++    |  ld.d UPVAL:RB, LFUNC:RD->uvptr
++    |  ins_next1
++    |  ld.d TMP1, UPVAL:RB->v
++    |  ld.d CRET1, 0(TMP1)
++    |   st.d CRET1, 0(RA)
++    |  ins_next2
++    break;
++  case BC_USETV:
++    |  // RA = uvnum*8, RD = src*8
++    |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++    |   add.d RD, BASE, RD
++    |  cleartp LFUNC:RB
++    |  add.d RA, RA, LFUNC:RB
++    |  ld.d UPVAL:RB, LFUNC:RA->uvptr
++    |   ld.d CRET1, 0(RD)
++    |  ld.bu TMP3, UPVAL:RB->marked
++    |   ld.d CARG2, UPVAL:RB->v
++    |  andi TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
++    |  ld.bu TMP0, UPVAL:RB->closed
++    |   gettp TMP2, CRET1
++    |   st.d CRET1, 0(CARG2)
++    |  .LI AT, LJ_GC_BLACK|1
++    |  or TMP3, TMP3, TMP0
++    |  addi.d TMP2, TMP2, -(LJ_TNUMX+1)
++    |  beq TMP3, AT, >2			// Upvalue is closed and black?
++    |1:
++    |  ins_next
++    |
++    |2:  // Check if new value is collectable.
++    |  sltui AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
++    |  cleartp GCOBJ:CRET1, CRET1
++    |  beqz AT, <1			// tvisgcv(v)
++    |  ld.bu TMP3, GCOBJ:CRET1->gch.marked
++    |  andi TMP3, TMP3, LJ_GC_WHITES	// iswhite(v)
++    |  beqz TMP3, <1
++    |  // Crossed a write barrier. Move the barrier forward.
++    |  .DADDIU CARG1, DISPATCH, GG_DISP2G
++    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
++    |  beq r0, r0, <1
++    break;
++  case BC_USETS:
++    |  // RA = uvnum*8, RD = str_const*8 (~)
++    |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++    |   sub.d TMP1, KBASE, RD
++    |  cleartp LFUNC:RB
++    |  add.d RA, RA, LFUNC:RB
++    |  ld.d UPVAL:RB, LFUNC:RA->uvptr
++    |   ld.d STR:TMP1, -8(TMP1)		// KBASE-8-str_const*8
++    |  ld.bu TMP2, UPVAL:RB->marked
++    |   ld.d CARG2, UPVAL:RB->v
++    |   ld.bu TMP3, STR:TMP1->marked
++    |  andi AT, TMP2, LJ_GC_BLACK	// isblack(uv)
++    |   ld.bu TMP2, UPVAL:RB->closed
++    |   .LI TMP0, LJ_TSTR
++    |   settp TMP1, TMP0
++    |  st.d TMP1, 0(CARG2)
++    |  bnez AT, >2
++    |1:
++    |  ins_next
++    |
++    |2:  // Check if string is white and ensure upvalue is closed.
++    |  andi AT, TMP3, LJ_GC_WHITES     // iswhite(str)
++    |  beqz TMP2, <1
++    |  beqz AT, <1
++    |  // Crossed a write barrier. Move the barrier forward.
++    |  .DADDIU CARG1, DISPATCH, GG_DISP2G
++    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
++    |  beq r0, r0, <1
++    break;
++  case BC_USETN:
++    |  // RA = uvnum*8, RD = num_const*8
++    |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++    |   add.d RD, KBASE, RD
++    |  cleartp LFUNC:RB
++    |  add.d RA, RA, LFUNC:RB
++    |  ld.d UPVAL:RB, LFUNC:RA->uvptr
++    |   ld.d CRET1, 0(RD)
++    |  ld.d TMP1, UPVAL:RB->v
++    |  ins_next1
++    |   st.d CRET1, 0(TMP1)
++    |  ins_next2
++    break;
++  case BC_USETP:
++    |  // RA = uvnum*8, RD = primitive_type*8 (~)
++    |  ld.d LFUNC:RB, FRAME_FUNC(BASE)
++    |   slli.d TMP0, RD, 44
++    |  cleartp LFUNC:RB
++    |  add.d RA, RA, LFUNC:RB
++    |   nor TMP0, TMP0, r0
++    |  ld.d UPVAL:RB, LFUNC:RA->uvptr
++    |  ins_next1
++    |  ld.d TMP1, UPVAL:RB->v
++    |   st.d TMP0, 0(TMP1)
++    |  ins_next2
++    break;
++
++  case BC_UCLO:
++    |  // RA = level*8, RD = target
++    |  ld.d TMP2, L->openupval
++    |  branch_RD			// Do this first since RD is not saved.
++    |   st.d BASE, L->base
++    |  or CARG1, L, r0
++    |  beqz TMP2, >1
++    |  add.d CARG2, BASE, RA
++    |  bl extern lj_func_closeuv	// (lua_State *L, TValue *level)
++    |  ld.d BASE, L->base
++    |1:
++    |  ins_next
++    break;
++
++  case BC_FNEW:
++    |  // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
++    |  sub.d TMP1, KBASE, RD
++    |  ld.d CARG3, FRAME_FUNC(BASE)
++    |   ld.d CARG2, -8(TMP1)		// KBASE-8-tab_const*8
++    |    st.d BASE, L->base
++    |    st.d PC, SAVE_PC(sp)
++    |  cleartp CARG3
++    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
++    |  or CARG1, L, r0
++    |  bl extern lj_func_newL_gc
++    |  // Returns GCfuncL *.
++    |   .LI TMP0, LJ_TFUNC
++    |  ld.d BASE, L->base
++    |  ins_next1
++    |   settp CRET1, TMP0
++    |  add.d RA, BASE, RA
++    |   st.d CRET1, 0(RA)
++    |  ins_next2
++    break;
++
++  /* -- Table ops --------------------------------------------------------- */
++
++  case BC_TNEW:
++  case BC_TDUP:
++    |  // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
++    |  .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
++    |  .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
++    |   st.d BASE, L->base
++    |   st.d PC, SAVE_PC(sp)
++    |  sltu AT, TMP0, TMP1
++    |  beqz AT, >5		//TODO why no slot ins ?
++    |1:
++    if (op == BC_TNEW) {
++      |  srli.w CARG2, RD, 3
++      |  andi CARG2, CARG2, 0x7ff
++      |  .LI TMP0, 0x801
++      |  addi.w AT, CARG2, -0x7ff
++      |   srli.w CARG3, RD, 14
++      |  masknez TMP0, TMP0, AT
++      |  maskeqz CARG2, CARG2, AT
++      |  or CARG2, CARG2, TMP0
++      |  // (lua_State *L, int32_t asize, uint32_t hbits)
++      |  or CARG1, L, r0
++      |  bl extern lj_tab_new
++      |  // Returns Table *.
++    } else {
++      |  sub.d TMP1, KBASE, RD
++      |  or CARG1, L, r0
++      |  ld.d CARG2, -8(TMP1)            // KBASE-8-str_const*8
++      |  bl extern lj_tab_dup		// (lua_State *L, Table *kt)
++      |  // Returns Table *.
++    }
++    |   .LI TMP0, LJ_TTAB
++    |  ld.d BASE, L->base
++    |  ins_next1
++    |  add.d RA, BASE, RA
++    |   settp CRET1, TMP0
++    |   st.d CRET1, 0(RA)
++    |  ins_next2
++    |5:
++    |  or MULTRES, RD, r0
++    |  or CARG1, L, r0
++    |  bl extern lj_gc_step_fixtop	// (lua_State *L)
++    |  or RD, MULTRES, r0
++    |  beq r0, r0, <1
++    break;
++
++  case BC_GGET:
++    |  // RA = dst*8, RD = str_const*8 (~)
++  case BC_GSET:
++    |  // RA = src*8, RD = str_const*8 (~)
++    |  ld.d LFUNC:TMP2, FRAME_FUNC(BASE)
++    |   sub.d TMP1, KBASE, RD
++    |   ld.d STR:RC, -8(TMP1)	// KBASE-8-str_const*8
++    |  cleartp LFUNC:TMP2
++    |  ld.d TAB:RB, LFUNC:TMP2->env
++    |  add.d RA, BASE, RA
++    if (op == BC_GGET) {
++      |  beq r0, r0, ->BC_TGETS_Z
++    } else {
++      |  beq r0, r0, ->BC_TSETS_Z
++    }
++    break;
++
++  case BC_TGETV:
++    |  // RA = dst*8, RB = table*8, RC = key*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RDtoRC8 RC, RD
++    |  add.d CARG2, BASE, RB
++    |   add.d CARG3, BASE, RC
++    |  ld.d TAB:RB, 0(CARG2)
++    |   ld.d TMP2, 0(CARG3)
++    |   add.d RA, BASE, RA
++    |  checktab TAB:RB, ->vmeta_tgetv
++    |   gettp TMP3, TMP2
++    |  ld.w TMP0, TAB:RB->asize
++    |  bne TMP3, TISNUM, >5		// Integer key?
++    |  slli.w TMP2, TMP2, 0		// sextw -> slli.w
++    |   ld.d TMP1, TAB:RB->array
++    |  sltu AT, TMP2, TMP0
++    |   slli.w TMP2, TMP2, 3
++    |  add.d TMP2, TMP1, TMP2
++    |  beqz AT, ->vmeta_tgetv		// Integer key and in array part?
++    |  ld.d AT, 0(TMP2)
++    |   ld.d CRET1, 0(TMP2)
++    |  beq AT, TISNIL, >2
++    |1:
++    |  ins_next1
++    |   st.d CRET1, 0(RA)
++    |  ins_next2
++    |
++    |2:  // Check for __index if table value is nil.
++    |  ld.d TAB:TMP2, TAB:RB->metatable
++    |  beqz TAB:TMP2, <1		// No metatable: done.
++    |  ld.bu TMP0, TAB:TMP2->nomm
++    |  andi TMP0, TMP0, 1<<MM_index
++    |  bnez TMP0, <1			// 'no __index' flag set: done.
++    |  beq r0, r0, ->vmeta_tgetv
++    |
++    |5:
++    |  .LI AT, LJ_TSTR
++    |  cleartp RC, TMP2
++    |  bne TMP3, AT, ->vmeta_tgetv
++    |  beq r0, r0, ->BC_TGETS_Z			// String key?
++    break;
++  case BC_TGETS:
++    |  // RA = dst*8, RB = table*8, RC = str_const*8 (~)
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RC8a RC, INS
++    |  add.d CARG2, BASE, RB
++    |   decode_RC8b RC
++    |  ld.d TAB:RB, 0(CARG2)
++    |   sub.d CARG3, KBASE, RC
++    |  add.d RA, BASE, RA
++    |   ld.d STR:RC, -8(CARG3)		// KBASE-8-str_const*8
++    |  checktab TAB:RB, ->vmeta_tgets1
++    |->BC_TGETS_Z:
++    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
++    |  ld.w TMP0, TAB:RB->hmask
++    |   ld.w TMP1, STR:RC->hash
++    |    ld.d NODE:TMP2, TAB:RB->node
++    |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
++    |  slli.w TMP0, TMP1, 5
++    |  slli.w TMP1, TMP1, 3
++    |  sub.w TMP1, TMP0, TMP1
++    |   .LI TMP3, LJ_TSTR
++    |  add.d NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
++    |   settp STR:RC, TMP3		// Tagged key to look for.
++    |1:
++    |  ld.d CARG1, NODE:TMP2->key
++    |   ld.d r17, NODE:TMP2->val
++    |    ld.d NODE:TMP1, NODE:TMP2->next
++    |  ld.d TAB:TMP3, TAB:RB->metatable
++    |  bne CARG1, RC, >4
++    |  beq r17, TISNIL, >5		// Key found, but nil value?
++    |3:
++    |  ins_next1
++    |   st.d r17, 0(RA)
++    |  ins_next2
++    |
++    |4:  // Follow hash chain.
++    |  or NODE:TMP2, NODE:TMP1, r0
++    |  bnez NODE:TMP1, <1
++    |  // End of hash chain: key not found, nil result.
++    |
++    |5:  // Check for __index if table value is nil.
++    |  or r17, TISNIL, r0
++    |  beqz TAB:TMP3, <3		// No metatable: done.
++    |  ld.bu TMP0, TAB:TMP3->nomm
++    |  andi TMP0, TMP0, 1<<MM_index
++    |  bnez TMP0, <3			// 'no __index' flag set: done.
++    |  beq r0, r0, ->vmeta_tgets
++    break;
++  case BC_TGETB:
++    |  // RA = dst*8, RB = table*8, RC = index*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |  add.d CARG2, BASE, RB
++    |   decode_RDtoRC8 RC, RD
++    |  ld.d TAB:RB, 0(CARG2)
++    |   add.d RA, BASE, RA
++    |  srli.w TMP0, RC, 3
++    |  checktab TAB:RB, ->vmeta_tgetb
++    |  ld.w TMP1, TAB:RB->asize
++    |   ld.d TMP2, TAB:RB->array
++    |  sltu AT, TMP0, TMP1
++    |  add.d RC, TMP2, RC
++    |  beqz AT, ->vmeta_tgetb
++    |  ld.d AT, 0(RC)
++    |  ld.d CRET1, 0(RC)
++    |  beq AT, TISNIL, >5
++    |1:
++    |  ins_next1
++    |   st.d CRET1, 0(RA)
++    |  ins_next2
++    |
++    |5:  // Check for __index if table value is nil.
++    |  ld.d TAB:TMP2, TAB:RB->metatable
++    |  beqz TAB:TMP2, <1		// No metatable: done.
++    |  ld.bu TMP1, TAB:TMP2->nomm
++    |  andi TMP1, TMP1, 1<<MM_index
++    |  bnez TMP1, <1			// 'no __index' flag set: done.
++    |  beq r0, r0, ->vmeta_tgetb			// Caveat: preserve TMP0 and CARG2!
++    break;
++  case BC_TGETR:
++    |  // RA = dst*8, RB = table*8, RC = key*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RDtoRC8 RC, RD
++    |  add.d RB, BASE, RB
++    |   add.d RC, BASE, RC
++    |  ld.d TAB:CARG1, 0(RB)
++    |   ld.w CARG2, LO(RC)
++    |    add.d RA, BASE, RA
++    |  cleartp TAB:CARG1
++    |  ld.w TMP0, TAB:CARG1->asize
++    |   ld.d TMP1, TAB:CARG1->array
++    |  sltu AT, CARG2, TMP0
++    |   slli.w TMP2, CARG2, 3
++    |  add.d r17, TMP1, TMP2
++    |  beqz AT, ->vmeta_tgetr		// In array part?
++    |   ld.d CARG2, 0(r17)
++    |->BC_TGETR_Z:
++    |  ins_next1
++    |   st.d CARG2, 0(RA)
++    |  ins_next2
++    break;
++
++  case BC_TSETV:
++    |  // RA = src*8, RB = table*8, RC = key*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RDtoRC8 RC, RD
++    |  add.d CARG2, BASE, RB
++    |   add.d CARG3, BASE, RC
++    |  ld.d RB, 0(CARG2)
++    |   ld.d TMP2, 0(CARG3)
++    |  add.d RA, BASE, RA
++    |  checktab RB, ->vmeta_tsetv
++    |  slli.w RC, TMP2, 0			// sextw -> slli.w
++    |  checkint TMP2, >5
++    |  ld.w TMP0, TAB:RB->asize
++    |   ld.d TMP1, TAB:RB->array
++    |  sltu AT, RC, TMP0
++    |   slli.w TMP2, RC, 3
++    |  add.d TMP1, TMP1, TMP2
++    |  beqz AT, ->vmeta_tsetv		// Integer key and in array part?
++    |  ld.d TMP0, 0(TMP1)
++    |   ld.bu TMP3, TAB:RB->marked
++    |  ld.d CRET1, 0(RA)
++    |  beq TMP0, TISNIL, >3
++    |1:
++    |   andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
++    |  st.d CRET1, 0(TMP1)
++    |  bnez AT, >7
++    |2:
++    |  ins_next
++    |
++    |3:  // Check for __newindex if previous value is nil.
++    |  ld.d TAB:TMP2, TAB:RB->metatable
++    |  beqz TAB:TMP2, <1		// No metatable: done.
++    |  ld.bu TMP2, TAB:TMP2->nomm
++    |  andi TMP2, TMP2, 1<<MM_newindex
++    |  bnez TMP2, <1			// 'no __newindex' flag set: done.
++    |  beq r0, r0, ->vmeta_tsetv
++    |
++    |5:
++    |  gettp AT, TMP2
++    |  addi.d AT, AT, -LJ_TSTR
++    |  bnez AT, ->vmeta_tsetv
++    |  cleartp STR:RC, TMP2
++    |  beq r0, r0, ->BC_TSETS_Z			// String key?
++    |
++    |7:  // Possible table write barrier for the value. Skip valiswhite check.
++    |  barrierback TAB:RB, TMP3, TMP0, <2
++    break;
++  case BC_TSETS:
++    |  // RA = src*8, RB = table*8, RC = str_const*8 (~)
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |  add.d CARG2, BASE, RB
++    |   decode_RC8a RC, INS
++    |    ld.d TAB:RB, 0(CARG2)
++    |   decode_RC8b RC
++    |   sub.d CARG3, KBASE, RC
++    |   ld.d RC, -8(CARG3)		// KBASE-8-str_const*8
++    |  add.d RA, BASE, RA
++    |   cleartp STR:RC
++    |  checktab TAB:RB, ->vmeta_tsets1
++    |->BC_TSETS_Z:
++    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
++    |  ld.w TMP0, TAB:RB->hmask
++    |   ld.w TMP1, STR:RC->hash
++    |    ld.d NODE:TMP2, TAB:RB->node
++    |   st.b r0, TAB:RB->nomm		// Clear metamethod cache.
++    |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
++    |  slli.w TMP0, TMP1, 5
++    |  slli.w TMP1, TMP1, 3
++    |  sub.w TMP1, TMP0, TMP1
++    |   .LI TMP3, LJ_TSTR
++    |  add.d NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
++    |   settp STR:RC, TMP3		// Tagged key to look for.
++    |.if FPU
++    |   fld.d FTMP0, 0(RA)
++    |.else
++    |   ld.d CRET1, 0(RA)
++    |.endif
++    |1:
++    |  ld.d TMP0, NODE:TMP2->key
++    |   ld.d CARG2, NODE:TMP2->val
++    |    ld.d NODE:TMP1, NODE:TMP2->next
++    |    ld.bu TMP3, TAB:RB->marked
++    |  bne TMP0, RC, >5
++    |   ld.d TAB:TMP0, TAB:RB->metatable
++    |   beq CARG2, TISNIL, >4		// Key found, but nil value?
++    |2:
++    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
++    |.if FPU
++    |  fst.d FTMP0, NODE:TMP2->val
++    |.else
++    |  st.d CRET1, NODE:TMP2->val
++    |.endif
++    |  bnez AT, >7
++    |3:
++    |  ins_next
++    |
++    |4:  // Check for __newindex if previous value is nil.
++    |  beqz TAB:TMP0, <2		// No metatable: done.
++    |  ld.bu TMP0, TAB:TMP0->nomm
++    |  andi TMP0, TMP0, 1<<MM_newindex
++    |  bnez TMP0, <2			// 'no __newindex' flag set: done.
++    |  beq r0, r0, ->vmeta_tsets
++    |
++    |5:  // Follow hash chain.
++    |  or NODE:TMP2, NODE:TMP1, r0
++    |  bnez NODE:TMP1, <1
++    |  // End of hash chain: key not found, add a new one
++    |
++    |  // But check for __newindex first.
++    |  ld.d TAB:TMP2, TAB:RB->metatable
++    |  .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv)
++    |  beqz TAB:TMP2, >6		// No metatable: continue.
++    |  ld.bu TMP0, TAB:TMP2->nomm
++    |  andi TMP0, TMP0, 1<<MM_newindex
++    |  beqz TMP0, ->vmeta_tsets		// 'no __newindex' flag NOT set: check. TODO why no slot ins ?
++    |6:
++    |  st.d RC, 0(CARG3)
++    |   st.d BASE, L->base
++    |  or CARG2, TAB:RB, r0
++    |   st.d PC, SAVE_PC(sp)
++    |  or CARG1, L, r0
++    |  bl extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k
++    |  // Returns TValue *.
++    |  ld.d BASE, L->base
++    |.if FPU
++    |  fst.d FTMP0, 0(CRET1)
++    |  beq r0, r0, <3				// No 2nd write barrier needed.
++    |.else
++    |  ld.d r17, 0(RA)
++    |  st.d r17, 0(CRET1)
++    |  beq r0, r0, <3				// No 2nd write barrier needed.
++    |.endif
++    |
++    |7:  // Possible table write barrier for the value. Skip valiswhite check.
++    |  barrierback TAB:RB, TMP3, TMP0, <3
++    break;
++  case BC_TSETB:
++    |  // RA = src*8, RB = table*8, RC = index*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |  add.d CARG2, BASE, RB
++    |   decode_RDtoRC8 RC, RD
++    |  ld.d TAB:RB, 0(CARG2)
++    |   add.d RA, BASE, RA
++    |  srli.w TMP0, RC, 3
++    |  checktab RB, ->vmeta_tsetb
++    |  ld.w TMP1, TAB:RB->asize
++    |   ld.d TMP2, TAB:RB->array
++    |  sltu AT, TMP0, TMP1
++    |  add.d RC, TMP2, RC
++    |  beqz AT, ->vmeta_tsetb
++    |  ld.d TMP1, 0(RC)
++    |   ld.bu TMP3, TAB:RB->marked
++    |  beq TMP1, TISNIL, >5		//TODO not process the following slot ins
++    |1:
++    |  ld.d CRET1, 0(RA)
++    |//  beq TMP1, TISNIL, >5
++    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
++    |   st.d CRET1, 0(RC)
++    |  bnez AT, >7
++    |2:
++    |  ins_next
++    |
++    |5:  // Check for __newindex if previous value is nil.
++    |  ld.d TAB:TMP2, TAB:RB->metatable
++    |  beqz TAB:TMP2, <1		// No metatable: done.
++    |  ld.bu TMP1, TAB:TMP2->nomm
++    |  andi TMP1, TMP1, 1<<MM_newindex
++    |  bnez TMP1, <1			// 'no __newindex' flag set: done.
++    |  beq r0, r0, ->vmeta_tsetb			// Caveat: preserve TMP0 and CARG2!
++    |
++    |7:  // Possible table write barrier for the value. Skip valiswhite check.
++    |  barrierback TAB:RB, TMP3, TMP0, <2
++    break;
++  case BC_TSETR:
++    |  // RA = dst*8, RB = table*8, RC = key*8
++    |  decode_RB8a RB, INS
++    |  decode_RB8b RB
++    |   decode_RDtoRC8 RC, RD
++    |  add.d CARG1, BASE, RB
++    |   add.d CARG3, BASE, RC
++    |  ld.d TAB:CARG2, 0(CARG1)
++    |   ld.w CARG3, LO(CARG3)
++    |  cleartp TAB:CARG2
++    |  ld.bu TMP3, TAB:CARG2->marked
++    |   ld.w TMP0, TAB:CARG2->asize
++    |    ld.d TMP1, TAB:CARG2->array
++    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
++    |  add.d RA, BASE, RA
++    |  bnez AT, >7
++    |2:
++    |  sltu AT, CARG3, TMP0
++    |   slli.w TMP2, CARG3, 3
++    |  add.d r17, TMP1, TMP2
++    |  beqz AT, ->vmeta_tsetr		// In array part?
++    |->BC_TSETR_Z:
++    |  bnez AT, >3
++    |  add.d r17, CRET1, r0
++    |3:
++    |  ld.d CARG1, 0(RA)
++    |  ins_next1
++    |  st.d CARG1, 0(r17)
++    |  ins_next2
++    |
++    |7:  // Possible table write barrier for the value. Skip valiswhite check.
++    |  barrierback TAB:CARG2, TMP3, CRET1, <2
++    break;
++
++  case BC_TSETM:
++    |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
++    |  add.d RA, BASE, RA
++    |1:
++    |   add.d TMP3, KBASE, RD
++    |  ld.d TAB:CARG2, -8(RA)		// Guaranteed to be a table.
++    |    addi.w TMP0, MULTRES, -8
++    |   ld.w TMP3, LO(TMP3)		// Integer constant is in lo-word.
++    |    srli.w CARG3, TMP0, 3
++    |    beqz TMP0, >4			// Nothing to copy?
++    |  cleartp CARG2
++    |  add.w CARG3, CARG3, TMP3
++    |  ld.w TMP2, TAB:CARG2->asize
++    |   slli.w TMP1, TMP3, 3
++    |    ld.bu TMP3, TAB:CARG2->marked
++    |   ld.d CARG1, TAB:CARG2->array
++    |  sltu AT, TMP2, CARG3
++    |  add.d TMP2, RA, TMP0
++    |  bnez AT, >5
++    |   add.d TMP1, TMP1, CARG1
++    |  andi TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
++    |3:  // Copy result slots to table.
++    |   ld.d CRET1, 0(RA)
++    |    addi.d RA, RA, 8
++    |  sltu AT, RA, TMP2
++    |   st.d CRET1, 0(TMP1)
++    |   addi.d TMP1, TMP1, 8
++    |  bnez AT, <3
++    |  bnez TMP0, >7
++    |4:
++    |  ins_next
++    |
++    |5:  // Need to resize array part.
++    |   st.d BASE, L->base
++    |   st.d PC, SAVE_PC(sp)
++    |  or BASE, RD, r0
++    |  or CARG1, L, r0
++    |  bl extern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
++    |  // Must not reallocate the stack.
++    |  or RD, BASE, r0
++    |  ld.d BASE, L->base        // Reload BASE for lack of a saved register.
++    |  beq r0, r0, <1
++    |
++    |7:  // Possible table write barrier for any value. Skip valiswhite check.
++    |  barrierback TAB:CARG2, TMP3, TMP0, <4
++    break;
++
++  /* -- Calls and vararg handling ----------------------------------------- */
++
++  case BC_CALLM:
++    |  // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
++    |  decode_RDtoRC8 NARGS8:RC, RD
++    |  add.w NARGS8:RC, NARGS8:RC, MULTRES
++    |  beq r0, r0, ->BC_CALL_Z
++    break;
++  case BC_CALL:
++    |  // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
++    |  decode_RDtoRC8 NARGS8:RC, RD
++    |->BC_CALL_Z:
++    |  or TMP2, BASE, r0
++    |  add.d BASE, BASE, RA
++    |   ld.d LFUNC:RB, 0(BASE)
++    |   addi.d BASE, BASE, 16
++    |  addi.w NARGS8:RC, NARGS8:RC, -8
++    |  checkfunc RB, ->vmeta_call
++    |  ins_call
++    break;
++
++  case BC_CALLMT:
++    |  // RA = base*8, (RB = 0,) RC = extra_nargs*8
++    |  add.w NARGS8:RD, NARGS8:RD, MULTRES	// BC_CALLT gets RC from RD.
++    |  // Fall through. Assumes BC_CALLT follows.
++    break;
++  case BC_CALLT:
++    |  // RA = base*8, (RB = 0,) RC = (nargs+1)*8
++    |  add.d RA, BASE, RA
++    |  ld.d RB, 0(RA)
++    |   or NARGS8:RC, RD, r0
++    |    ld.d TMP1, FRAME_PC(BASE)
++    |   addi.d RA, RA, 16
++    |  addi.w NARGS8:RC, NARGS8:RC, -8
++    |  checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
++    |->BC_CALLT_Z:
++    |  andi TMP0, TMP1, FRAME_TYPE	// Caveat: preserve TMP0 until the 'or'.
++    |   ld.bu TMP3, LFUNC:CARG3->ffid
++    |  xori TMP2, TMP1, FRAME_VARG
++    |  bnez TMP0, >7
++    |1:
++    |  st.d RB, FRAME_FUNC(BASE)		// Copy function down, but keep PC.
++    |  sltui AT, TMP3, 2		// (> FF_C) Calling a fast function?
++    |  or TMP2, BASE, r0
++    |  or RB, CARG3, r0
++    |  or TMP3, NARGS8:RC, r0
++    |  beqz NARGS8:RC, >3
++    |2:
++    |   ld.d CRET1, 0(RA)
++    |    addi.d RA, RA, 8
++    |  addi.w TMP3, TMP3, -8
++    |   st.d CRET1, 0(TMP2)
++    |   addi.d TMP2, TMP2, 8
++    |  bnez TMP3, <2
++    |3:
++    |  or TMP0, TMP0, AT
++    |  beqz TMP0, >5
++    |4:
++    |  ins_callt
++    |
++    |5:  // Tailcall to a fast function with a Lua frame below.
++    |  ld.w INS, -4(TMP1)
++    |  decode_RA8a RA, INS
++    |  decode_RA8b RA
++    |  sub.d TMP1, BASE, RA
++    |  ld.d TMP1, -32(TMP1)
++    |  cleartp LFUNC:TMP1
++    |  ld.d TMP1, LFUNC:TMP1->pc
++    |  ld.d KBASE, PC2PROTO(k)(TMP1)     // Need to prepare KBASE.
++    |  beq r0, r0, <4
++    |
++    |7:  // Tailcall from a vararg function.
++    |  andi AT, TMP2, FRAME_TYPEP
++    |  sub.d TMP2, BASE, TMP2          // Relocate BASE down.
++    |  bnez AT, <1			// Vararg frame below?
++    |  or BASE, TMP2, r0
++    |  ld.d TMP1, FRAME_PC(TMP2)
++    |  andi TMP0, TMP1, FRAME_TYPE
++    |  beq r0, r0, <1
++    break;
++
++  case BC_ITERC:
++    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
++    |  or TMP2, BASE, r0			// Save old BASE fir vmeta_call.
++    |  add.d BASE, BASE, RA
++    |  ld.d RB, -24(BASE)
++    |   ld.d CARG1, -16(BASE)
++    |    ld.d CARG2, -8(BASE)
++    |  .LI NARGS8:RC, 16			// Iterators get 2 arguments.
++    |  st.d RB, 0(BASE)			// Copy callable.
++    |   st.d CARG1, 16(BASE)		// Copy state.
++    |    st.d CARG2, 24(BASE)		// Copy control var.
++    |   addi.d BASE, BASE, 16
++    |  checkfunc RB, ->vmeta_call
++    |  ins_call
++    break;
++
++  case BC_ITERN:
++    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
++    |.if JIT
++    |  // NYI: add hotloop, record BC_ITERN.
++    |.endif
++    |->vm_IITERN:
++    |  add.d RA, BASE, RA
++    |  ld.d TAB:RB, -16(RA)
++    |   ld.w RC, -8+LO(RA)		// Get index from control var.
++    |  cleartp TAB:RB
++    |   addi.d PC, PC, 4
++    |  ld.w TMP0, TAB:RB->asize
++    |   ld.d TMP1, TAB:RB->array
++    |  slli.d CARG3, TISNUM, 47
++    |1:  // Traverse array part.
++    |  sltu AT, RC, TMP0
++    |  slli.w TMP3, RC, 3
++    |  beqz AT, >5			// Index points after array part?
++    |  add.d TMP3, TMP1, TMP3
++    |  ld.d CARG1, 0(TMP3)
++    |     ld.hu RD, -4+OFS_RD(PC)
++    |   or TMP2, RC, CARG3
++    |  addi.w RC, RC, 1
++    |  beq CARG1, TISNIL, <1		// Skip holes in array part.
++    |   st.d TMP2, 0(RA)
++    |  st.d CARG1, 8(RA)
++    |   or TMP0, RC, CARG3
++    |     .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |     decode_RD4b RD
++    |     add.d RD, RD, TMP3
++    |   st.w TMP0, -8+LO(RA)		// Update control var.
++    |     add.d PC, PC, RD
++    |3:
++    |  ins_next
++    |
++    |5:  // Traverse hash part.
++    |  ld.w TMP1, TAB:RB->hmask
++    |  sub.w RC, RC, TMP0
++    |   ld.d TMP2, TAB:RB->node
++    |6:
++    |  sltu AT, TMP1, RC		// End of iteration? Branch to ITERL+1.
++    |  slli.w TMP3, RC, 5
++    |  bnez AT, <3
++    |   slli.w RB, RC, 3
++    |   sub.w TMP3, TMP3, RB
++    |  add.d NODE:TMP3, TMP3, TMP2
++    |  ld.d CARG1, 0(NODE:TMP3)
++    |     ld.hu RD, -4+OFS_RD(PC)
++    |  addi.w RC, RC, 1
++    |  beq CARG1, TISNIL, <6		// Skip holes in hash part.
++    |  ld.d CARG2, NODE:TMP3->key
++    |     .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  st.d CARG1, 8(RA)
++    |    add.w RC, RC, TMP0
++    |     decode_RD4b RD
++    |     add.w RD, RD, TMP3
++    |  st.d CARG2, 0(RA)
++    |     add.d PC, PC, RD
++    |  st.w RC, -8+LO(RA)                // Update control var.
++    |  beq r0, r0, <3
++    break;
++
++  case BC_ISNEXT:
++    |  // RA = base*8, RD = target (points to ITERN)
++    |  add.d RA, BASE, RA
++    |    srli.w TMP0, RD, 1
++    |  ld.d CFUNC:CARG1, -24(RA)
++    |    add.d TMP0, PC, TMP0
++    |   ld.d CARG2, -16(RA)
++    |   ld.d CARG3, -8(RA)
++    |    .LUI TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
++    |  checkfunc CFUNC:CARG1, >5
++    |  gettp CARG2, CARG2
++    |  addi.d CARG2, CARG2, -LJ_TTAB
++    |  ld.bu TMP1, CFUNC:CARG1->ffid
++    |  addi.d CARG3, CARG3, -LJ_TNIL
++    |  or AT, CARG2, CARG3
++    |  addi.d TMP1, TMP1, -FF_next_N
++    |  or AT, AT, TMP1
++    |//  addu16i.d TMP1, r0, 0xfffe
++    |  .LUI TMP1, 0xfffe
++    |  bnez AT, >5
++    |  add.d PC, TMP0, TMP2
++    |//  ori TMP1, TMP1, 0x7fff
++    |  srli.d TMP1, TMP1, 12
++    |  ori TMP1, TMP1, 0x7
++    |  slli.d TMP1, TMP1, 12
++    |  ori TMP1, TMP1, 0xfff
++    |  slli.d TMP1, TMP1, 32
++    |  st.d TMP1, -8(RA)
++    |1:
++    |  ins_next
++    |5:  // Despecialize bytecode if any of the checks fail.
++    |  .LI TMP3, BC_JMP
++    |   .LI TMP1, BC_ITERC
++    |  st.b TMP3, -4+OFS_OP(PC)
++    |   add.d PC, TMP0, TMP2
++    |  st.b TMP1, OFS_OP(PC)
++    |  beq r0, r0, <1
++    break;
++
++  case BC_VARG:
++    |  // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
++    |  ld.d TMP0, FRAME_PC(BASE)
++    |  decode_RDtoRC8 RC, RD
++    |   decode_RB8a RB, INS
++    |  add.d RC, BASE, RC
++    |   decode_RB8b RB
++    |   add.d RA, BASE, RA
++    |  addi.d RC, RC, FRAME_VARG
++    |   add.d TMP2, RA, RB
++    |  addi.d TMP3, BASE, -16		// TMP3 = vtop
++    |  sub.d RC, RC, TMP0		// RC = vbase
++    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
++    |  sub.d TMP1, TMP3, RC
++    |  beqz RB, >5			// Copy all varargs?
++    |  addi.d TMP2, TMP2, -16
++    |1:  // Copy vararg slots to destination slots.
++    |  ld.d CARG1, 0(RC)
++    |  sltu AT, RC, TMP3
++    |    addi.d RC, RC, 8
++    |  maskeqz CARG1, CARG1, AT
++    |  masknez AT, TISNIL, AT
++    |  or CARG1, CARG1, AT
++    |  st.d CARG1, 0(RA)
++    |  sltu AT, RA, TMP2
++    |   addi.d RA, RA, 8
++    |  bnez AT, <1
++    |3:
++    |  ins_next
++    |
++    |5:  // Copy all varargs.
++    |  ld.d TMP0, L->maxstack
++    |  .LI MULTRES, 8                   // MULTRES = (0+1)*8
++    |  bge r0, TMP1, <3			// No vararg slots?
++    |  add.d TMP2, RA, TMP1
++    |  sltu AT, TMP0, TMP2
++    |  addi.d MULTRES, TMP1, 8
++    |  bnez AT, >7
++    |6:
++    |  ld.d CRET1, 0(RC)
++    |   addi.d RC, RC, 8
++    |  st.d CRET1, 0(RA)
++    |  sltu AT, RC, TMP3
++    |  addi.d RA, RA, 8
++    |  bnez AT, <6			// More vararg slots?
++    |  beq r0, r0, <3
++    |
++    |7:  // Grow stack for varargs.
++    |   st.d RA, L->top
++    |  sub.d RA, RA, BASE
++    |   st.d BASE, L->base
++    |  sub.d BASE, RC, BASE		// Need delta, because BASE may change.
++    |   st.d PC, SAVE_PC(sp)
++    |  srli.w CARG2, TMP1, 3
++    |  or CARG1, L, r0
++    |  bl extern lj_state_growstack	// (lua_State *L, int n)
++    |  or RC, BASE, r0
++    |  ld.d BASE, L->base
++    |  add.d RA, BASE, RA
++    |  add.d RC, BASE, RC
++    |  addi.d TMP3, BASE, -16
++    |  beq r0, r0, <6
++    break;
++
++  /* -- Returns ----------------------------------------------------------- */
++
++  case BC_RETM:
++    |  // RA = results*8, RD = extra_nresults*8
++    |  add.w RD, RD, MULTRES		// MULTRES >= 8, so RD >= 8.
++    |  // Fall through. Assumes BC_RET follows.
++    break;
++
++  case BC_RET:
++    |  // RA = results*8, RD = (nresults+1)*8
++    |  ld.d PC, FRAME_PC(BASE)
++    |   add.d RA, BASE, RA
++    |    or MULTRES, RD, r0
++    |1:
++    |  andi TMP0, PC, FRAME_TYPE
++    |  xori TMP1, PC, FRAME_VARG
++    |  bnez TMP0, ->BC_RETV_Z
++    |
++    |->BC_RET_Z:
++    |  // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
++    |   ld.w INS, -4(PC)
++    |    addi.d TMP2, BASE, -16
++    |    addi.d RC, RD, -8
++    |  decode_RA8a TMP0, INS
++    |   decode_RB8a RB, INS
++    |  decode_RA8b TMP0
++    |   decode_RB8b RB
++    |   add.d TMP3, TMP2, RB
++    |  sub.d BASE, TMP2, TMP0
++    |  beqz RC, >3
++    |2:
++    |   ld.d CRET1, 0(RA)
++    |    addi.d RA, RA, 8
++    |  addi.d RC, RC, -8
++    |   st.d CRET1, 0(TMP2)
++    |   addi.d TMP2, TMP2, 8
++    |  bnez RC, <2
++    |3:
++    |  addi.d TMP3, TMP3, -8
++    |5:
++    |  sltu AT, TMP2, TMP3
++    |  ld.d LFUNC:TMP1, FRAME_FUNC(BASE)
++    |  bnez AT, >6
++    |  ins_next1
++    |  cleartp LFUNC:TMP1
++    |  ld.d TMP1, LFUNC:TMP1->pc
++    |  ld.d KBASE, PC2PROTO(k)(TMP1)
++    |  ins_next2
++    |
++    |6:  // Fill up results with nil.
++    |  st.d TISNIL, 0(TMP2)
++    |  addi.d TMP2, TMP2, 8
++    |  beq r0, r0, <5
++    |
++    |->BC_RETV_Z:  // Non-standard return case.
++    |  andi TMP2, TMP1, FRAME_TYPEP
++    |  bnez TMP2, ->vm_return
++    |  // Return from vararg function: relocate BASE down.
++    |  sub.d BASE, BASE, TMP1
++    |  ld.d PC, FRAME_PC(BASE)
++    |  beq r0, r0, <1
++    break;
++
++  case BC_RET0: case BC_RET1:
++    |  // RA = results*8, RD = (nresults+1)*8
++    |  ld.d PC, FRAME_PC(BASE)
++    |   add.d RA, BASE, RA
++    |    or MULTRES, RD, r0
++    |  andi TMP0, PC, FRAME_TYPE
++    |  xori TMP1, PC, FRAME_VARG
++    |  bnez TMP0, ->BC_RETV_Z
++    |  ld.w INS, -4(PC)
++    |   addi.d TMP2, BASE, -16
++    if (op == BC_RET1) {
++      |  ld.d CRET1, 0(RA)
++    }
++    |  decode_RB8a RB, INS
++    |   decode_RA8a RA, INS
++    |  decode_RB8b RB
++    |   decode_RA8b RA
++    |   sub.d BASE, TMP2, RA
++    if (op == BC_RET1) {
++      |  st.d CRET1, 0(TMP2)
++    }
++    |5:
++    |  sltu AT, RD, RB
++    |  ld.d TMP1, FRAME_FUNC(BASE)
++    |  bnez AT, >6
++    |  ins_next1
++    |  cleartp LFUNC:TMP1
++    |  ld.d TMP1, LFUNC:TMP1->pc
++    |  ld.d KBASE, PC2PROTO(k)(TMP1)
++    |  ins_next2
++    |
++    |6:  // Fill up results with nil.
++    |  addi.d TMP2, TMP2, 8
++    |  addi.d RD, RD, 8
++    if (op == BC_RET1) {
++      |  st.d TISNIL, 0(TMP2)
++    } else {
++      |  st.d TISNIL, -8(TMP2)
++    }
++    |  beq r0, r0, <5
++    break;
++
++  /* -- Loops and branches ------------------------------------------------ */
++
++  case BC_FORL:
++    |.if JIT
++    |  hotloop
++    |.endif
++    |  // Fall through. Assumes BC_IFORL follows.
++    break;
++
++  case BC_JFORI:
++  case BC_JFORL:
++#if !LJ_HASJIT
++    break;
++#endif
++  case BC_FORI:
++  case BC_IFORL:
++    |  // RA = base*8, RD = target (after end of loop or start of loop)
++    vk = (op == BC_IFORL || op == BC_JFORL);
++    |  add.d RA, BASE, RA
++    |  ld.d r17, FORL_IDX*8(RA)		// IDX CARG1 - CARG3 type
++    |  gettp CARG3, r17
++    if (op != BC_JFORL) {
++      |  srli.w RD, RD, 1
++      |  .LUI TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
++      |  add.d TMP2, RD, TMP2
++    }
++    if (!vk) {
++      |  ld.d r18, FORL_STOP*8(RA)	// STOP CARG2 - CARG4 type
++      |  ld.d CRET1, FORL_STEP*8(RA)	// STEP CRET1 - CRET2 type
++      |  gettp CARG4, r18
++      |  gettp CRET2, CRET1
++      |  bne CARG3, TISNUM, >5
++      |  slli.w CARG3, r17, 0	// sextw -> slli.w
++      |  bne CARG4, TISNUM, ->vmeta_for
++      |  slli.w r18, r18, 0	// sextw -> slli.w
++      |  bne CRET2, TISNUM, ->vmeta_for
++      |  bstrpick.d AT, CRET1, 31, 31
++      |  slt CRET1, r18, CARG3
++      |  slt TMP1, CARG3, r18
++      |  maskeqz TMP1, TMP1, AT
++      |  masknez CRET1, CRET1, AT
++      |  or CRET1, CRET1, TMP1
++    } else {
++      |  ld.d CARG2, FORL_STEP*8(RA)     // STEP CARG2 - CARG4 type
++      |  bne CARG3, TISNUM, >5
++      |    ld.d CRET1, FORL_STOP*8(RA)	// STOP CRET1 - CRET2 type
++      |  slli.w TMP3, r17, 0		// sextw -> slli.w
++      |   slli.w CARG2, CARG2, 0	// sextw -> slli.w
++      |    slli.w CRET1, CRET1, 0	// sextw -> slli.w
++      |  add.w r17, TMP3, CARG2
++      |  xor TMP0, r17, TMP3
++      |  xor TMP1, r17, CARG2
++      |  and TMP0, TMP0, TMP1
++      |  slt TMP1, r17, CRET1
++      |  slt CRET1, CRET1, r17
++      |  slt AT, CARG2, r0
++      |   slt TMP0, TMP0, r0		// ((y^a) & (y^b)) < 0: overflow.
++      |  maskeqz TMP1, TMP1, AT
++      |  masknez CRET1, CRET1, AT
++      |  or CRET1, CRET1, TMP1
++      |   or CRET1, CRET1, TMP0
++      |  bstrpick.d r17, r17, 31, 0		// zextw -> bstrpick.d
++      |  settp r17, TISNUM
++    }
++    |1:
++    if (op == BC_FORI) {
++      |  maskeqz TMP2, TMP2, CRET1
++      |  add.d PC, PC, TMP2
++    } else if (op == BC_JFORI) {
++      |  add.d PC, PC, TMP2
++      |  ld.hu RD, -4+OFS_RD(PC)
++    } else if (op == BC_IFORL) {
++      |  masknez TMP2, TMP2, CRET1
++      |  add.d PC, PC, TMP2
++    }
++    if (vk) {
++      |  st.d r17, FORL_IDX*8(RA)
++    }
++    |  ins_next1
++    |  st.d r17, FORL_EXT*8(RA)
++    |2:
++    if (op == BC_JFORI) {
++      |  decode_RD8b RD
++      |  beqz CRET1, =>BC_JLOOP
++    } else if (op == BC_JFORL) {
++      |  beqz CRET1, =>BC_JLOOP		//TODO no slot ins ?
++    }
++    |  ins_next2
++    |
++    |5:  // FP loop.
++    |.if FPU
++    if (!vk) {
++      |  fld.d f22, FORL_IDX*8(RA)
++      |   fld.d f23, FORL_STOP*8(RA)
++      |  sltui TMP0, CARG3, LJ_TISNUM
++      |  sltui TMP1, CARG4, LJ_TISNUM
++      |  sltui AT, CRET2, LJ_TISNUM
++      |   ld.d TMP3, FORL_STEP*8(RA)
++      |  and TMP0, TMP0, TMP1
++      |  and AT, AT, TMP0
++      |  slt TMP3, TMP3, r0
++      |  beqz AT, ->vmeta_for
++      |   movgr2fr.d FTMP2, TMP3
++      |  fcmp.clt.d FCC0, f22, f23
++      |  fcmp.clt.d FCC1, f23, f22
++      |  movcf2fr FTMP0, FCC0
++      |  movcf2fr FTMP1, FCC1
++      |  movfr2cf FCC0, FTMP2
++      |  fsel FTMP2, FTMP1, FTMP0, FCC0
++      |  movfr2gr.d CRET1, FTMP2
++      |  beq r0, r0, <1
++    } else {
++      |  fld.d f22, FORL_IDX*8(RA)
++      |   fld.d f10, FORL_STEP*8(RA)
++      |    fld.d f23, FORL_STOP*8(RA)
++      |   ld.d TMP3, FORL_STEP*8(RA)
++      |  fadd.d f22, f22, f10
++      |   slt TMP3, TMP3, r0
++      |   movgr2fr.d FTMP2, TMP3
++      |  fcmp.clt.d FCC0, f22, f23
++      |  fcmp.clt.d FCC1, f23, f22
++      |  movcf2fr FTMP0, FCC0
++      |  movcf2fr FTMP1, FCC1
++      |  movfr2cf FCC0, FTMP2
++      |  fsel FTMP2, FTMP1, FTMP0, FCC0
++      |  movfr2gr.d CRET1, FTMP2
++      if (op == BC_IFORL) {
++	|  masknez TMP2, TMP2, CRET1
++	|  add.d PC, PC, TMP2
++      }
++      |  fst.d f22, FORL_IDX*8(RA)
++      |  ins_next1
++      |  fst.d f22, FORL_EXT*8(RA)
++      |  beq r0, r0, <2
++    }
++    |.else
++    if (!vk) {
++      |  sltui TMP0, CARG3, LJ_TISNUM
++      |  sltui TMP1, CARG4, LJ_TISNUM
++      |  sltui AT, CRET2, LJ_TISNUM
++      |  and TMP0, TMP0, TMP1
++      |  and AT, AT, TMP0
++      |  beqz AT, ->vmeta_for
++      |  ld.w TMP3, FORL_STEP*8+HI(RA)
++      |  bl ->vm_sfcmpolex
++      |  beq r0, r0, <1
++    } else {
++      |  st.w TMP2, TMPD(sp)
++      |  bl extern __adddf3
++      |  ld.d CARG2, FORL_STOP*8(RA)
++      |  or r17, CRET1, r0
++      if ( op == BC_JFORL ) {
++	|  ld.hu RD, -4+OFS_RD(PC)
++	|  decode_RD8b RD
++      }
++      |  ld.w TMP3, FORL_STEP*8+HI(RA)
++      |  bl ->vm_sfcmpolex
++      |  ld.w TMP2, TMPD(sp)
++      |  beq r0, r0, <1
++    }
++    |.endif
++    break;
++
++  case BC_ITERL:
++    |.if JIT
++    |  hotloop
++    |.endif
++    |  // Fall through. Assumes BC_IITERL follows.
++    break;
++
++  case BC_JITERL:
++#if !LJ_HASJIT
++    break;
++#endif
++  case BC_IITERL:
++    |  // RA = base*8, RD = target
++    |  add.d RA, BASE, RA
++    |  ld.d TMP1, 0(RA)
++    |  beq TMP1, TISNIL, >1		// Stop if iterator returned nil.
++    if (op == BC_JITERL) {
++      |  st.d TMP1,-8(RA)
++      |  beq r0, r0, =>BC_JLOOP
++    } else {
++      |  branch_RD			// Otherwise save control var + branch.
++      |  st.d TMP1, -8(RA)
++    }
++    |1:
++    |  ins_next
++    break;
++
++  case BC_LOOP:
++    |  // RA = base*8, RD = target (loop extent)
++    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
++    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
++    |.if JIT
++    |  hotloop
++    |.endif
++    |  // Fall through. Assumes BC_ILOOP follows.
++    break;
++
++  case BC_ILOOP:
++    |  // RA = base*8, RD = target (loop extent)
++    |  ins_next
++    break;
++
++  case BC_JLOOP:
++    |.if JIT
++    |  // RA = base*8 (ignored), RD = traceno*8
++    |  .LDXD TMP1, DISPATCH, DISPATCH_J(trace)
++    |   .LI AT, 0
++    |  add.d TMP1, TMP1, RD
++    |  // Traces on MIPS don't store the trace number, so use 0.
++    |   .STXD AT, DISPATCH, DISPATCH_GL(vmstate)
++    |  ld.d TRACE:TMP2, 0(TMP1)
++    |   .STXD BASE, DISPATCH, DISPATCH_GL(jit_base)
++    |  ld.d TMP2, TRACE:TMP2->mcode
++    |   .STXD L, DISPATCH, DISPATCH_GL(tmpbuf.L)
++    |  .DADDIU JGL, DISPATCH, GG_DISP2G+32768
++    |  jirl r0, TMP2, 0
++    |.endif
++    break;
++
++  case BC_JMP:
++    |  // RA = base*8 (only used by trace recorder), RD = target
++    |  branch_RD
++    |  ins_next
++    break;
++
++  /* -- Function headers -------------------------------------------------- */
++
++  case BC_FUNCF:
++    |.if JIT
++    |  hotcall
++    |.endif
++  case BC_FUNCV:  /* NYI: compiled vararg functions. */
++    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
++    break;
++
++  case BC_JFUNCF:
++#if !LJ_HASJIT
++    break;
++#endif
++  case BC_IFUNCF:
++    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++    |  ld.d TMP2, L->maxstack
++    |   ld.bu TMP1, -4+PC2PROTO(numparams)(PC)
++    |    ld.d KBASE, -4+PC2PROTO(k)(PC)
++    |  sltu AT, TMP2, RA
++    |  slli.w TMP1, TMP1, 3
++    |  bnez AT, ->vm_growstack_l
++    if (op != BC_JFUNCF) {
++      |  ins_next1
++    }
++    |2:
++    |  sltu AT, NARGS8:RC, TMP1		// Check for missing parameters.
++    |  or r17, AT, r0
++    |  add.d AT, BASE, NARGS8:RC
++    |  bnez r17, >3
++    if (op == BC_JFUNCF) {
++      |  decode_RD8a RD, INS
++      |  decode_RD8b RD
++      |  beq r0, r0, =>BC_JLOOP
++    } else {
++      |  ins_next2
++    }
++    |
++    |3:  // Clear missing parameters.
++    |  st.d TISNIL, 0(AT)
++    |  addi.w NARGS8:RC, NARGS8:RC, 8
++    |  beq r0, r0, <2
++    break;
++
++  case BC_JFUNCV:
++#if !LJ_HASJIT
++    break;
++#endif
++    |  NYI  // NYI: compiled vararg functions
++    break;  /* NYI: compiled vararg functions. */
++
++  case BC_IFUNCV:
++    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
++    |   .LI TMP0, LJ_TFUNC
++    |   add.d TMP1, BASE, RC
++    |  ld.d TMP2, L->maxstack
++    |   settp LFUNC:RB, TMP0
++    |  add.d TMP0, RA, RC
++    |   st.d LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
++    |   addi.d TMP3, RC, 16+FRAME_VARG
++    |  sltu AT, TMP0, TMP2
++    |    ld.d KBASE, -4+PC2PROTO(k)(PC)
++    |  st.d TMP3, 8(TMP1)                // Store delta + FRAME_VARG.
++    |  beqz AT, ->vm_growstack_l
++    |  ld.bu TMP2, -4+PC2PROTO(numparams)(PC)
++    |   or RA, BASE, r0
++    |   or RC, TMP1, r0
++    |  ins_next1
++    |  addi.d BASE, TMP1, 16
++    |  beqz TMP2, >3
++    |1:
++    |  ld.d TMP0, 0(RA)
++    |  sltu AT, RA, RC			// Less args than parameters?
++    |  or CARG1, TMP0, r0
++    |  maskeqz TMP0, TMP0, AT
++    |  masknez TMP3, TISNIL, AT
++    |  or TMP0, TMP0, TMP3
++    |  masknez TMP3, CARG1, AT
++    |  maskeqz CARG1, TISNIL, AT
++    |  or CARG1, CARG1, TMP3
++    |    addi.w TMP2, TMP2, -1
++    |  st.d TMP0, 16(TMP1)
++    |    addi.d TMP1, TMP1, 8
++    |  st.d CARG1, 0(RA)
++    |   addi.d RA, RA, 8
++    |  bnez TMP2, <1
++    |3:
++    |  ins_next2
++    break;
++
++  case BC_FUNCC:
++  case BC_FUNCCW:
++    |  // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
++    if (op == BC_FUNCC) {
++      |  ld.d CFUNCADDR, CFUNC:RB->f
++    } else {
++      |  .LDXD CFUNCADDR, DISPATCH, DISPATCH_GL(wrapf)
++    }
++    |  add.d TMP1, RA, NARGS8:RC
++    |  ld.d TMP2, L->maxstack
++    |   add.d RC, BASE, NARGS8:RC
++    |  st.d BASE, L->base
++    |  sltu AT, TMP2, TMP1
++    |   st.d RC, L->top
++    |    li_vmstate C
++    if (op == BC_FUNCCW) {
++      |  ld.d CARG2, CFUNC:RB->f
++    }
++    |  or CARG1, L, r0
++    |  bnez AT, ->vm_growstack_c	// Need to grow stack.
++    |   st_vmstate
++    |  jirl r1, CFUNCADDR, 0		// (lua_State *L [, lua_CFunction f])
++    |  // Returns nresults.
++    |  ld.d BASE, L->base
++    |   slli.w RD, CRET1, 3
++    |  ld.d TMP1, L->top
++    |    li_vmstate INTERP
++    |  ld.d PC, FRAME_PC(BASE)		// Fetch PC of caller.
++    |   sub.d RA, TMP1, RD		// RA = L->top - nresults*8
++    |    .STXD L, DISPATCH, DISPATCH_GL(cur_L)
++    |   st_vmstate
++    |  beq r0, r0, ->vm_returnc
++    break;
++
++  /* ---------------------------------------------------------------------- */
++
++  default:
++    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
++    exit(2);
++    break;
++  }
++}
++
++static int build_backend(BuildCtx *ctx)
++{
++  int op;
++
++  dasm_growpc(Dst, BC__MAX);
++
++  build_subroutines(ctx);
++
++  |.code_op
++  for (op = 0; op < BC__MAX; op++)
++    build_ins(ctx, (BCOp)op, op);
++
++  return BC__MAX;
++}
++
++/* Emit pseudo frame-info for all assembler functions. */
++static void emit_asm_debug(BuildCtx *ctx)
++{
++  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
++  int i;
++  switch (ctx->mode) {
++  case BUILD_elfasm:
++    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
++    fprintf(ctx->fp,
++	".Lframe0:\n"
++	"\t.4byte .LECIE0-.LSCIE0\n"
++	".LSCIE0:\n"
++	"\t.4byte 0xffffffff\n"
++	"\t.byte 0x1\n"
++	"\t.string \"\"\n"
++	"\t.uleb128 0x1\n"
++	"\t.sleb128 -4\n"
++	"\t.byte 31\n"
++	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
++	"\t.align 2\n"
++	".LECIE0:\n\n");
++    fprintf(ctx->fp,
++	".LSFDE0:\n"
++	"\t.4byte .LEFDE0-.LASFDE0\n"
++	".LASFDE0:\n"
++	"\t.4byte .Lframe0\n"
++	"\t.8byte .Lbegin\n"
++	"\t.8byte %d\n"
++	"\t.byte 0xe\n\t.uleb128 %d\n"
++	"\t.byte 0x9f\n\t.sleb128 2*5\n"
++	"\t.byte 0x9e\n\t.sleb128 2*6\n",
++	fcofs, CFRAME_SIZE);
++    for (i = 23; i >= 16; i--)
++      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
++#if !LJ_SOFTFP
++    for (i = 31; i >= 24; i--)
++      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
++#endif
++    fprintf(ctx->fp,
++	"\t.align 2\n"
++	".LEFDE0:\n\n");
++#if LJ_HASFFI
++    fprintf(ctx->fp,
++	".LSFDE1:\n"
++	"\t.4byte .LEFDE1-.LASFDE1\n"
++	".LASFDE1:\n"
++	"\t.4byte .Lframe0\n"
++	"\t.4byte lj_vm_ffi_call\n"
++	"\t.4byte %d\n"
++	"\t.byte 0x9f\n\t.uleb128 2*1\n"
++	"\t.byte 0x90\n\t.uleb128 2*2\n"
++	"\t.byte 0xd\n\t.uleb128 0x10\n"
++	"\t.align 2\n"
++	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#if !LJ_NO_UNWIND
++    /* NYI */
++#endif
++    break;
++  default:
++    break;
++  }
++}
++
+diff --git a/libs/luajit/configure b/libs/luajit/configure
+index 7d798f8..ea7d3c1 100755
+--- a/libs/luajit/configure
++++ b/libs/luajit/configure
+@@ -15094,6 +15094,11 @@ then :
+   echo '-D__AARCH64EB__=1' >>native_flags
+ fi
+ 
++elif grep 'LJ_TARGET_LOONGARCH64 ' conftest.i >/dev/null 2>&1
++then :
++  LJARCH=loongarch64
++  echo '-D__loongarch__=1' >>native_flags
++
+ elif grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1
+ then :
+   LJARCH=ppc
+diff --git a/libs/luajit/m4/lj-system.m4 b/libs/luajit/m4/lj-system.m4
+index 73ba282..7005664 100644
+--- a/libs/luajit/m4/lj-system.m4
++++ b/libs/luajit/m4/lj-system.m4
+@@ -29,6 +29,9 @@ AS_IF([grep 'LJ_TARGET_X64 ' conftest.i >/dev/null 2>&1],
+          AS_IF([grep '__AARCH64EB__' conftest.i >/dev/null 2>&1],
+                  [echo '-D__AARCH64EB__=1' >>native_flags])
+         ],
++      [grep 'LJ_TARGET_LOONGARCH64 ' conftest.i >/dev/null 2>&1],
++        [LJARCH=loongarch64
++        ],
+       [grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1],
+         [LJARCH=ppc
+          AS_IF([grep 'LJ_LE 1' conftest.i >/dev/null 2>&1],
diff --git a/thunderbird/PKGBUILD b/thunderbird/PKGBUILD
index 59e53e379b..8e6ad4a43a 100644
--- a/thunderbird/PKGBUILD
+++ b/thunderbird/PKGBUILD
@@ -55,13 +55,14 @@ depends=(
 )
 makedepends=(
   unzip zip diffutils python nasm mesa libpulse libice libsm
-  rust clang llvm cbindgen nodejs lld
+  rust clang llvm cbindgen nodejs #lld
   gawk perl findutils libotr wasi-compiler-rt wasi-libc wasi-libc++ wasi-libc++abi
 )
 options=(!emptydirs !makeflags !lto)
 source=(https://archive.mozilla.org/pub/thunderbird/releases/$pkgver/source/thunderbird-$pkgver.source.tar.xz{,.asc}
         vendor-prefs.js
         distribution.ini
+        firefox-115-loong.patch
         mozconfig.cfg
         metainfo.patch
         org.mozilla.Thunderbird.desktop
@@ -119,6 +120,10 @@ build() {
   # malloc_usable_size is used in various parts of the codebase
   CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
   CXXFLAGS="${CXXFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}"
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   ./mach configure
   ./mach build
@@ -261,7 +266,8 @@ sha512sums=('de9edb81cf5da494101bf927a5b963ccdec0cc9bff87ebd72d896c6e25102c1113b
             'SKIP'
             '6918c0de63deeddc6f53b9ba331390556c12e0d649cf54587dfaabb98b32d6a597b63cf02809c7c58b15501720455a724d527375a8fb9d757ccca57460320734'
             '5cd3ac4c94ef6dcce72fba02bc18b771a2f67906ff795e0e3d71ce7db6d8a41165bd5443908470915bdbdb98dddd9cf3f837c4ba3a36413f55ec570e6efdbb9f'
-            'a34dd97954f415a5ffe956ca1f10718bd164950566ceba328805c2ccbb54ed9081df07f2e063479bf932c4a443bb5b7443cca2f82eea3914465ed6e4863e0c0e'
+            '0c1c085147db0569ec0365b9250e6b8181fe3ffbe6c22e1b5d752370eaa8d18425dfab612a906d10d5211394d232d9ee1a563b9d8d89d8f2105a4303f953eb94'
+            '702f1e889ec75e454245a46e485a554c51af1be94bdc0eeb42c466c5baee48106f41d5769f1f09888ad5bbe2db113bfbefbbea34111d6e2566126bfdb34d50b0'
             '7e43b1f25827ddae615ad43fc1e11c6ba439d6c2049477dfe60e00188a70c0a76160c59a97cc01d1fd99c476f261c7cecb57628b5be48874be7cf991c22db290'
             'fffeb73e2055408c5598439b0214b3cb3bb4e53dac3090b880a55f64afcbc56ba5d32d1187829a08ef06d592513d158ced1fde2f20e2f01e967b5fbd3b2fafd4'
             '9897cb0ababc8e1a0001c4e1f70e0b39f5cdb9c08c69e3afd42088dfd001aa1fc6996cd83df0db1fb57ee0a80686c35c8df783108408dbe9191602cddd1e3c65'
diff --git a/thunderbird/firefox-115-loong.patch b/thunderbird/firefox-115-loong.patch
new file mode 100644
index 0000000000..ad71fd595f
--- /dev/null
+++ b/thunderbird/firefox-115-loong.patch
@@ -0,0 +1,619 @@
+From 3751c1f6f1a0781eb35d65595773f7a251c5f319 Mon Sep 17 00:00:00 2001
+From: Kay Lin <kaymw@aosc.io>
+Date: Wed, 20 Sep 2023 03:03:37 -0700
+Subject: [PATCH] Add support for LoongArch
+
+Adapted from LoongArchLinux.
+
+Co-Authored-By: loongson <loongson@loongson.cn>
+Co-Authored-By: WANG Xuerui <xen0n@gentoo.org> # rebased for 115esr branch
+---
+ ipc/chromium/src/build/build_config.h         |   3 +
+ third_party/libwebrtc/build/build_config.h    |   4 +
+ .../rust/authenticator/.cargo-checksum.json   |   2 +-
+ third_party/rust/authenticator/build.rs       |   2 +
+ .../src/transport/linux/hidwrapper.rs         |   3 +
+ .../src/transport/linux/ioctl_loongarch64.rs  |   5 +
+ third_party/rust/cty/.cargo-checksum.json     |   2 +-
+ third_party/rust/cty/src/lib.rs               |   1 +
+ third_party/rust/nix/.cargo-checksum.json     |   2 +-
+ third_party/rust/nix/src/sys/ioctl/linux.rs   |   1 +
+ .../telemetry/pingsender/pingsender.cpp       |   1 +
+ toolkit/moz.configure                         |   2 +-
+ xpcom/reflect/xptcall/md/unix/moz.build       |   8 +
+ .../md/unix/xptcinvoke_asm_loongarch64.S      |  91 ++++++++++
+ .../md/unix/xptcinvoke_loongarch64.cpp        | 100 +++++++++++
+ .../md/unix/xptcstubs_asm_loongarch64.S       |  52 ++++++
+ .../xptcall/md/unix/xptcstubs_loongarch64.cpp | 159 ++++++++++++++++++
+ 17 files changed, 434 insertions(+), 4 deletions(-)
+ create mode 100644 third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs
+ create mode 100644 xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S
+ create mode 100644 xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp
+ create mode 100644 xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S
+ create mode 100644 xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp
+
+diff --git a/ipc/chromium/src/build/build_config.h b/ipc/chromium/src/build/build_config.h
+index 511f36858c034..1104c4f4665ae 100644
+--- a/ipc/chromium/src/build/build_config.h
++++ b/ipc/chromium/src/build/build_config.h
+@@ -126,6 +126,9 @@
+ #  define ARCH_CPU_ARM_FAMILY 1
+ #  define ARCH_CPU_ARM64 1
+ #  define ARCH_CPU_64_BITS 1
++#elif defined(__loongarch_lp64)
++#  define ARCH_CPU_LOONGARCH64 1
++#  define ARCH_CPU_64_BITS 1
+ #elif defined(__riscv) && __riscv_xlen == 64
+ #  define ARCH_CPU_RISCV64 1
+ #  define ARCH_CPU_64_BITS 1
+diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h
+index c39ae9da50f99..28191de02654b 100644
+--- a/third_party/libwebrtc/build/build_config.h
++++ b/third_party/libwebrtc/build/build_config.h
+@@ -210,6 +210,10 @@
+ #define ARCH_CPU_SPARC 1
+ #define ARCH_CPU_32_BITS 1
+ #define ARCH_CPU_BIG_ENDIAN 1
++#elif defined(__loongarch_lp64)
++#define ARCH_CPU_LOONGARCH64 1
++#define ARCH_CPU_64_BITS 1
++#define ARCH_CPU_LITTLE_ENDIAN 1
+ #else
+ #error Please add support for your architecture in build/build_config.h
+ #endif
+diff --git a/third_party/rust/authenticator/.cargo-checksum.json b/third_party/rust/authenticator/.cargo-checksum.json
+index 080c46c4c00c0..3f93dda5cb1f3 100644
+--- a/third_party/rust/authenticator/.cargo-checksum.json
++++ b/third_party/rust/authenticator/.cargo-checksum.json
+@@ -1 +1 @@
+-{"files":{"Cargo.lock":"803a1ca7735f93e1d952a07291a6976db787b6530bc67f9e3d2ae2dcaf8a90cc","Cargo.toml":"e8f07adde7f2c71a96cbe3809ab605a9082b8ccaf8d2a69aacb6d5db90fddcdc","Cross.toml":"8d132da818d48492aa9f4b78a348f0df3adfae45d988d42ebd6be8a5adadb6c3","LICENSE":"e866c8f5864d4cacfe403820e722e9dc03fe3c7565efa5e4dad9051d827bb92a","README.md":"c87d9c7cc44f1dd4ef861a3a9f8cd2eb68aedd3814768871f5fb63c2070806cd","build.rs":"01092254718e4cd5d6bffcd64d55cc3240dc00e79f3d7344a5dc4abf6c27bca6","examples/ctap2.rs":"51709e50dd23477f6f91225c09fca08824a00abdc851727b2f3bd9dcd746378e","examples/ctap2_discoverable_creds.rs":"952207c39bad1995998c686f99fbca39268e930099b0086a09adeb5d12931df6","examples/interactive_management.rs":"27d2578fca7672477584bb3a74db182295c85e4aa6ae2d8edfd849fc0018c413","examples/reset.rs":"b13d3a2ed3544018ede8660ec0cc79732139e792d4e55c2c6fb517ad376b36ad","examples/set_pin.rs":"991d9bd66fd6bdd9dd8627ed710fe100a3dfb65b968031f768ee9a28e1e995d7","examples/test_exclude_list.rs":"20577d6887b00c99d2ae404e1b1f64c746ecc774bd2f9f0f8d1c5bb6a6f30292","rustfmt.toml":"ceb6615363d6fff16426eb56f5727f98a7f7ed459ba9af735b1d8b672e2c3b9b","src/authenticatorservice.rs":"dc756ae9d420dac187b04afbb4831527c12fa307ef072f1c1cb4480df9cbda5f","src/consts.rs":"44fb7c396dc87d1657d1feed08e956fc70608c0b06a034716b626419b442bcfe","src/crypto/dummy.rs":"9cc6be0dc1e28c7328121e7a4bf435211ae8b1455784472b24993571c4009579","src/crypto/mod.rs":"e4342dd93fd41bf48fa26386188ed92db5f908ad4d69f32f080a65228c6d5390","src/crypto/nss.rs":"2bf33898728760f194f204876450d0906b47907d259270f6e3d43c62a709c99a","src/crypto/openssl.rs":"ef6e4dbcc7230137e505e3fc4ad37e102e6b26b37470afd0f4709a297b3aa546","src/ctap2/attestation.rs":"e3c581154fb6bd4e4d8bd2326515864849b21766f5344e2d955d607b360fc930","src/ctap2/client_data.rs":"04ee84b34e91c988183871b4975fc08e12234965187c793ad26d0d82ed44642f","src/ctap2/commands/client_pin.rs":"7f3a49b23592e985b8f32d43688593ff7411a05cb594444e24851c13f093cdef","src/ctap2/commands/get_assertion.rs":"e9cd68cff2ee54156af6e3e424691a06354aafffcc374a40ccc9622f030c4999","src/ctap2/commands/get_info.rs":"79117c39d280445fb17be057af2f45ec1d80651ea1c8b478e07118ade808291b","src/ctap2/commands/get_next_assertion.rs":"8a8fa69cb4079a21ff4734067e74784b2bfee3c20ddcc0b35675ce77a3d83ae9","src/ctap2/commands/get_version.rs":"958c273c6156af102bba515de42e4a5ae43f36b4d2d1814d922c269c500f6ce2","src/ctap2/commands/make_credentials.rs":"524cb3378fcc2b08696ab25bf5473e149af307d18ef503a4ee971b4b7e087ff3","src/ctap2/commands/mod.rs":"916eb63b3e46968a9e79d088dd217c2b80dc1c4d14beaf12803e91b7987b6c32","src/ctap2/commands/reset.rs":"45500500c900124f96269679862ceeb18e87111096d322c87c766f2694e576fc","src/ctap2/commands/selection.rs":"7832d62bf37ddbbaf996d84f905c2cdca7dceb529c8f9f1fe82eb288da886068","src/ctap2/mod.rs":"5953ee33ee5930437f9d91299f8a6fdbc21bc62297ae4194901893ef0a5ac82a","src/ctap2/preflight.rs":"1cd41e948955a8bcb22a2e55e254dad1be74590b6016437914e93a2639222aef","src/ctap2/server.rs":"61e2afa1bc3ce1d61743073f14c1a385d064e5deed2b8a194e32e0ccbd4243ad","src/ctap2/utils.rs":"ad0aa36a0dbeb510b7f37789329f1957eab206eb529dc083e6176b142984e26e","src/errors.rs":"a99e5fbdad315ba1589b116fc227310996ef900498b595545228be35744b2038","src/lib.rs":"d42fc78ab81b6fdd66ebe35951a4395a3656f557795cff4c8bfcc54199cabfcd","src/manager.rs":"d72f8523d0a549487504ef6d370aee9132ad7436aaae777e6d65a0a03f3c0c27","src/statecallback.rs":"6b16f97176db1ae3fc3851fe8394e4ffc324bc6fe59313845ac3a88132fd52f1","src/statemachine.rs":"3b1b08efda156bc8c00bad27096a95177217ad77cb041530a03b8903ba51d7e0","src/status_update.rs":"d032524f2c36c5a32db9dd424decf4577cea65adceca91bb1dfcdc07c58289cb","src/transport/device_selector.rs":"c703aa8e59b0b7ac9d11be0aac434dffda8b0c91e1a84298c48e598978e1576e","src/transport/errors.rs":"5af7cb8d22ffa63bf4264d182a0f54b9b3a2cc9d19d832b3495857229f9a2875","src/transport/freebsd/device.rs":"f41c7cf29c48bf2b403cf460e6387864372a134d6daeefc5c3afc3f40d0d4575","src/transport/freebsd/mod.rs":"42dcb57fbeb00140003a8ad39acac9b547062b8f281a3fa5deb5f92a6169dde6","src/transport/freebsd/monitor.rs":"a6b34af4dd2e357a5775b1f3a723766107c11ef98dba859b1188ed08e0e450a2","src/transport/freebsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/freebsd/uhid.rs":"a194416a8bc5d428c337f8d96a2248769ca190810852bbe5ee686ab595d8eb4c","src/transport/hid.rs":"033e0f1bf6428a1d4077e5abb53dbfa193ef72dd8a98b7666d7b5fb45a6570f0","src/transport/hidproto.rs":"9d490f161807b75f4d7d5096355006627c1f47c0d90fca53bade3692efc92a2d","src/transport/linux/device.rs":"e79bd06d98723a0d7e4f25b7cf2ac3e0260b10e52d2b0695909d2932288e10a4","src/transport/linux/hidraw.rs":"c7a0df9b4e51cb2736218ffffa02b2b2547b7c515d69f9bae2c9a8c8f1cb547b","src/transport/linux/hidwrapper.h":"72785db3a9b27ea72b6cf13a958fee032af54304522d002f56322473978a20f9","src/transport/linux/hidwrapper.rs":"753c7459dbb73befdd186b6269ac33f7a4537b4c935928f50f2b2131756e787d","src/transport/linux/ioctl_aarch64le.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_armle.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_mips64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsle.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64be.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpcbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_riscv64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_s390xbe.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86_64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/mod.rs":"446e435126d2a58f167f648dd95cba28e8ac9c17f1f799e1eaeab80ea800fc57","src/transport/linux/monitor.rs":"5e3ec2618dd74027ae6ca1527991254e3271cce59106d4920ce0414094e22f64","src/transport/linux/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/macos/device.rs":"f508d0585079ecf87a73d6135c52e8b5a887fbf16e241676d51a8099a8001a81","src/transport/macos/iokit.rs":"7dc4e7bbf8e42e2fcde0cee8e48d14d6234a5a910bd5d3c4e966d8ba6b73992f","src/transport/macos/mod.rs":"333e561554fc901d4f6092f6e4c85823e2b0c4ff31c9188d0e6d542b71a0a07c","src/transport/macos/monitor.rs":"e02288454bb4010e06b705d82646abddb3799f0cd655f574aa19f9d91485a4a2","src/transport/macos/transaction.rs":"9dcdebd13d5fd5a185b5ad777a80c825a6ba5e76b141c238aa115b451b9a72fa","src/transport/mock/device.rs":"582b2b55f13d95dd9f1127e3dde49d2137a5ca020f9c1fa1ffa5c4083d05c0e7","src/transport/mock/mod.rs":"9c4c87efd19adddc1a91c699a6c328063cfbac5531b76346a5ff92e986aded8f","src/transport/mock/transaction.rs":"be3ed8c389dfa04122364b82515edd76fad6f5d5f72d15cacd45a84fb8397292","src/transport/mod.rs":"e28d72b6f3fdaff21f940c4db213067cd94f5832f864ecaad1c9901d5aea9b79","src/transport/netbsd/device.rs":"a7dec83b5040faf1a8ddb37e9fc2b45b9b12814be4802b3b351eff081d1b80c3","src/transport/netbsd/fd.rs":"5464019025d03ea2a39c82f76b238bbbdb0ea63f5a5fc7c9d974e235139cd53b","src/transport/netbsd/mod.rs":"b1c52aa29537330cebe67427062d6c94871cab2a9b0c04b2305d686f07e88fd5","src/transport/netbsd/monitor.rs":"fb2917e4ba53cc9867987a539061f82d011f4c6e478df1157d965d32df2eb922","src/transport/netbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/netbsd/uhid.rs":"d15be35e2413240066a8f086bb8846b08a6a92bf6a1941c3eec1329dd3a4f9ce","src/transport/openbsd/device.rs":"47d8dfeb12c33e6cada2b2cd76476827059c797d8a16f2c4aea6e78d32ebab46","src/transport/openbsd/mod.rs":"514274d414042ff84b3667a41a736e78581e22fda87ccc97c2bc05617e381a30","src/transport/openbsd/monitor.rs":"2e0ba6ecc69b450be9cbfd21a7c65036ed2ce593b12363596d3eae0b5bfb79e8","src/transport/openbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/stub/device.rs":"aa21711d6690ed68bd878b28463172ba69c6324be7afabeccb1f07b4831cb020","src/transport/stub/mod.rs":"6a7fec504a52d403b0241b18cd8b95088a31807571f4c0a67e4055afc74f4453","src/transport/stub/transaction.rs":"c9a3ade9562468163f28fd51e7ff3e0bf5854b7edade9e987000d11c5d0e62d2","src/transport/windows/device.rs":"148b1572ed5fa8d476efbdb2a3a35608ec23012d6a805129f3c25c453bab4b7a","src/transport/windows/mod.rs":"218e7f2fe91ecb390c12bba5a5ffdad2c1f0b22861c937f4d386262e5b3dd617","src/transport/windows/monitor.rs":"95913d49e7d83482e420493d89b53ffceb6a49e646a87de934dff507b3092b4c","src/transport/windows/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/windows/winapi.rs":"b2a4cc85f14e39cadfbf068ee001c9d776f028d3cf09cb926d4364c5b437c112","src/u2fprotocol.rs":"e61ac223aab79ae82383cd32a23213d18461e229c448373bf2483357a9eae69e","src/u2ftypes.rs":"8511c6f04f69670ddd403178a46060644a27128ca4077a9a3e00bc6671e3864b","src/util.rs":"cf37c4c3caf6dde4fc3cf6f5f297ed3c0f13bcb50fb0e8955899fc837483ef31","src/virtualdevices/mod.rs":"2c7df7691d5c150757304241351612aed4260d65b70ab0f483edbc1a5cfb5674","src/virtualdevices/software_u2f.rs":"83e63c0c4a597e71d87b5cd1f33a49646d00b3062edbdd05c51623b80fb60168","src/virtualdevices/webdriver/mod.rs":"4a36e6dfa9f45f941d863b4039bfbcfa8eaca660bd6ed78aeb1a2962db64be5a","src/virtualdevices/webdriver/testtoken.rs":"7146e02f1a5dad2c8827dd11c12ee408c0e42a0706ac65f139998feffd42570f","src/virtualdevices/webdriver/virtualmanager.rs":"7205a0397833628fc0847aa942a6a314dc1e23306858b546053e0de6a360ebe1","src/virtualdevices/webdriver/web_api.rs":"9032525af458b6fe9a3274c36b6ef8c791ecc4ec46d38ae36583fc9a4535b59d","testing/cross/powerpc64le-unknown-linux-gnu.Dockerfile":"d7463ff4376e3e0ca3fed879fab4aa975c4c0a3e7924c5b88aef9381a5d013de","testing/cross/x86_64-unknown-linux-gnu.Dockerfile":"11c79c04b07a171b0c9b63ef75fa75f33263ce76e3c1eda0879a3e723ebd0c24","testing/run_cross.sh":"cc2a7e0359f210eba2e7121f81eb8ab0125cea6e0d0f2698177b0fe2ad0c33d8","webdriver-tools/requirements.txt":"8236aa3dedad886f213c9b778fec80b037212d30e640b458984110211d546005","webdriver-tools/webdriver-driver.py":"82327c26ba271d1689acc87b612ab8436cb5475f0a3c0dba7baa06e7f6f5e19c"},"package":"aa0e182b77b6b19eaf9c7b69fddf3be970169ec6d34eca3f5d682ab948727e57"}
+\ No newline at end of file
++{"files":{"Cargo.lock":"803a1ca7735f93e1d952a07291a6976db787b6530bc67f9e3d2ae2dcaf8a90cc","Cargo.toml":"e8f07adde7f2c71a96cbe3809ab605a9082b8ccaf8d2a69aacb6d5db90fddcdc","Cross.toml":"8d132da818d48492aa9f4b78a348f0df3adfae45d988d42ebd6be8a5adadb6c3","LICENSE":"e866c8f5864d4cacfe403820e722e9dc03fe3c7565efa5e4dad9051d827bb92a","README.md":"c87d9c7cc44f1dd4ef861a3a9f8cd2eb68aedd3814768871f5fb63c2070806cd","build.rs":"5b909f42e52ed2056afa3693544ef1c1dc5e90d00e7d8730175a228bd0233b43","examples/ctap2.rs":"51709e50dd23477f6f91225c09fca08824a00abdc851727b2f3bd9dcd746378e","examples/ctap2_discoverable_creds.rs":"952207c39bad1995998c686f99fbca39268e930099b0086a09adeb5d12931df6","examples/interactive_management.rs":"27d2578fca7672477584bb3a74db182295c85e4aa6ae2d8edfd849fc0018c413","examples/reset.rs":"b13d3a2ed3544018ede8660ec0cc79732139e792d4e55c2c6fb517ad376b36ad","examples/set_pin.rs":"991d9bd66fd6bdd9dd8627ed710fe100a3dfb65b968031f768ee9a28e1e995d7","examples/test_exclude_list.rs":"20577d6887b00c99d2ae404e1b1f64c746ecc774bd2f9f0f8d1c5bb6a6f30292","rustfmt.toml":"ceb6615363d6fff16426eb56f5727f98a7f7ed459ba9af735b1d8b672e2c3b9b","src/authenticatorservice.rs":"dc756ae9d420dac187b04afbb4831527c12fa307ef072f1c1cb4480df9cbda5f","src/consts.rs":"44fb7c396dc87d1657d1feed08e956fc70608c0b06a034716b626419b442bcfe","src/crypto/dummy.rs":"9cc6be0dc1e28c7328121e7a4bf435211ae8b1455784472b24993571c4009579","src/crypto/mod.rs":"e4342dd93fd41bf48fa26386188ed92db5f908ad4d69f32f080a65228c6d5390","src/crypto/nss.rs":"2bf33898728760f194f204876450d0906b47907d259270f6e3d43c62a709c99a","src/crypto/openssl.rs":"ef6e4dbcc7230137e505e3fc4ad37e102e6b26b37470afd0f4709a297b3aa546","src/ctap2/attestation.rs":"e3c581154fb6bd4e4d8bd2326515864849b21766f5344e2d955d607b360fc930","src/ctap2/client_data.rs":"04ee84b34e91c988183871b4975fc08e12234965187c793ad26d0d82ed44642f","src/ctap2/commands/client_pin.rs":"7f3a49b23592e985b8f32d43688593ff7411a05cb594444e24851c13f093cdef","src/ctap2/commands/get_assertion.rs":"e9cd68cff2ee54156af6e3e424691a06354aafffcc374a40ccc9622f030c4999","src/ctap2/commands/get_info.rs":"79117c39d280445fb17be057af2f45ec1d80651ea1c8b478e07118ade808291b","src/ctap2/commands/get_next_assertion.rs":"8a8fa69cb4079a21ff4734067e74784b2bfee3c20ddcc0b35675ce77a3d83ae9","src/ctap2/commands/get_version.rs":"958c273c6156af102bba515de42e4a5ae43f36b4d2d1814d922c269c500f6ce2","src/ctap2/commands/make_credentials.rs":"524cb3378fcc2b08696ab25bf5473e149af307d18ef503a4ee971b4b7e087ff3","src/ctap2/commands/mod.rs":"916eb63b3e46968a9e79d088dd217c2b80dc1c4d14beaf12803e91b7987b6c32","src/ctap2/commands/reset.rs":"45500500c900124f96269679862ceeb18e87111096d322c87c766f2694e576fc","src/ctap2/commands/selection.rs":"7832d62bf37ddbbaf996d84f905c2cdca7dceb529c8f9f1fe82eb288da886068","src/ctap2/mod.rs":"5953ee33ee5930437f9d91299f8a6fdbc21bc62297ae4194901893ef0a5ac82a","src/ctap2/preflight.rs":"1cd41e948955a8bcb22a2e55e254dad1be74590b6016437914e93a2639222aef","src/ctap2/server.rs":"61e2afa1bc3ce1d61743073f14c1a385d064e5deed2b8a194e32e0ccbd4243ad","src/ctap2/utils.rs":"ad0aa36a0dbeb510b7f37789329f1957eab206eb529dc083e6176b142984e26e","src/errors.rs":"a99e5fbdad315ba1589b116fc227310996ef900498b595545228be35744b2038","src/lib.rs":"d42fc78ab81b6fdd66ebe35951a4395a3656f557795cff4c8bfcc54199cabfcd","src/manager.rs":"d72f8523d0a549487504ef6d370aee9132ad7436aaae777e6d65a0a03f3c0c27","src/statecallback.rs":"6b16f97176db1ae3fc3851fe8394e4ffc324bc6fe59313845ac3a88132fd52f1","src/statemachine.rs":"3b1b08efda156bc8c00bad27096a95177217ad77cb041530a03b8903ba51d7e0","src/status_update.rs":"d032524f2c36c5a32db9dd424decf4577cea65adceca91bb1dfcdc07c58289cb","src/transport/device_selector.rs":"c703aa8e59b0b7ac9d11be0aac434dffda8b0c91e1a84298c48e598978e1576e","src/transport/errors.rs":"5af7cb8d22ffa63bf4264d182a0f54b9b3a2cc9d19d832b3495857229f9a2875","src/transport/freebsd/device.rs":"f41c7cf29c48bf2b403cf460e6387864372a134d6daeefc5c3afc3f40d0d4575","src/transport/freebsd/mod.rs":"42dcb57fbeb00140003a8ad39acac9b547062b8f281a3fa5deb5f92a6169dde6","src/transport/freebsd/monitor.rs":"a6b34af4dd2e357a5775b1f3a723766107c11ef98dba859b1188ed08e0e450a2","src/transport/freebsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/freebsd/uhid.rs":"a194416a8bc5d428c337f8d96a2248769ca190810852bbe5ee686ab595d8eb4c","src/transport/hid.rs":"033e0f1bf6428a1d4077e5abb53dbfa193ef72dd8a98b7666d7b5fb45a6570f0","src/transport/hidproto.rs":"9d490f161807b75f4d7d5096355006627c1f47c0d90fca53bade3692efc92a2d","src/transport/linux/device.rs":"e79bd06d98723a0d7e4f25b7cf2ac3e0260b10e52d2b0695909d2932288e10a4","src/transport/linux/hidraw.rs":"c7a0df9b4e51cb2736218ffffa02b2b2547b7c515d69f9bae2c9a8c8f1cb547b","src/transport/linux/hidwrapper.h":"72785db3a9b27ea72b6cf13a958fee032af54304522d002f56322473978a20f9","src/transport/linux/hidwrapper.rs":"d203e8804e7632b8d47a224c186d1f431800f04ddc43360d5c086f71e9b0f674","src/transport/linux/ioctl_aarch64le.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_armle.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_mips64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsle.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64be.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpcbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_riscv64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_s390xbe.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86_64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_loongarch64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/mod.rs":"446e435126d2a58f167f648dd95cba28e8ac9c17f1f799e1eaeab80ea800fc57","src/transport/linux/monitor.rs":"5e3ec2618dd74027ae6ca1527991254e3271cce59106d4920ce0414094e22f64","src/transport/linux/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/macos/device.rs":"f508d0585079ecf87a73d6135c52e8b5a887fbf16e241676d51a8099a8001a81","src/transport/macos/iokit.rs":"7dc4e7bbf8e42e2fcde0cee8e48d14d6234a5a910bd5d3c4e966d8ba6b73992f","src/transport/macos/mod.rs":"333e561554fc901d4f6092f6e4c85823e2b0c4ff31c9188d0e6d542b71a0a07c","src/transport/macos/monitor.rs":"e02288454bb4010e06b705d82646abddb3799f0cd655f574aa19f9d91485a4a2","src/transport/macos/transaction.rs":"9dcdebd13d5fd5a185b5ad777a80c825a6ba5e76b141c238aa115b451b9a72fa","src/transport/mock/device.rs":"582b2b55f13d95dd9f1127e3dde49d2137a5ca020f9c1fa1ffa5c4083d05c0e7","src/transport/mock/mod.rs":"9c4c87efd19adddc1a91c699a6c328063cfbac5531b76346a5ff92e986aded8f","src/transport/mock/transaction.rs":"be3ed8c389dfa04122364b82515edd76fad6f5d5f72d15cacd45a84fb8397292","src/transport/mod.rs":"e28d72b6f3fdaff21f940c4db213067cd94f5832f864ecaad1c9901d5aea9b79","src/transport/netbsd/device.rs":"a7dec83b5040faf1a8ddb37e9fc2b45b9b12814be4802b3b351eff081d1b80c3","src/transport/netbsd/fd.rs":"5464019025d03ea2a39c82f76b238bbbdb0ea63f5a5fc7c9d974e235139cd53b","src/transport/netbsd/mod.rs":"b1c52aa29537330cebe67427062d6c94871cab2a9b0c04b2305d686f07e88fd5","src/transport/netbsd/monitor.rs":"fb2917e4ba53cc9867987a539061f82d011f4c6e478df1157d965d32df2eb922","src/transport/netbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/netbsd/uhid.rs":"d15be35e2413240066a8f086bb8846b08a6a92bf6a1941c3eec1329dd3a4f9ce","src/transport/openbsd/device.rs":"47d8dfeb12c33e6cada2b2cd76476827059c797d8a16f2c4aea6e78d32ebab46","src/transport/openbsd/mod.rs":"514274d414042ff84b3667a41a736e78581e22fda87ccc97c2bc05617e381a30","src/transport/openbsd/monitor.rs":"2e0ba6ecc69b450be9cbfd21a7c65036ed2ce593b12363596d3eae0b5bfb79e8","src/transport/openbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/stub/device.rs":"aa21711d6690ed68bd878b28463172ba69c6324be7afabeccb1f07b4831cb020","src/transport/stub/mod.rs":"6a7fec504a52d403b0241b18cd8b95088a31807571f4c0a67e4055afc74f4453","src/transport/stub/transaction.rs":"c9a3ade9562468163f28fd51e7ff3e0bf5854b7edade9e987000d11c5d0e62d2","src/transport/windows/device.rs":"148b1572ed5fa8d476efbdb2a3a35608ec23012d6a805129f3c25c453bab4b7a","src/transport/windows/mod.rs":"218e7f2fe91ecb390c12bba5a5ffdad2c1f0b22861c937f4d386262e5b3dd617","src/transport/windows/monitor.rs":"95913d49e7d83482e420493d89b53ffceb6a49e646a87de934dff507b3092b4c","src/transport/windows/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/windows/winapi.rs":"b2a4cc85f14e39cadfbf068ee001c9d776f028d3cf09cb926d4364c5b437c112","src/u2fprotocol.rs":"e61ac223aab79ae82383cd32a23213d18461e229c448373bf2483357a9eae69e","src/u2ftypes.rs":"8511c6f04f69670ddd403178a46060644a27128ca4077a9a3e00bc6671e3864b","src/util.rs":"cf37c4c3caf6dde4fc3cf6f5f297ed3c0f13bcb50fb0e8955899fc837483ef31","src/virtualdevices/mod.rs":"2c7df7691d5c150757304241351612aed4260d65b70ab0f483edbc1a5cfb5674","src/virtualdevices/software_u2f.rs":"83e63c0c4a597e71d87b5cd1f33a49646d00b3062edbdd05c51623b80fb60168","src/virtualdevices/webdriver/mod.rs":"4a36e6dfa9f45f941d863b4039bfbcfa8eaca660bd6ed78aeb1a2962db64be5a","src/virtualdevices/webdriver/testtoken.rs":"7146e02f1a5dad2c8827dd11c12ee408c0e42a0706ac65f139998feffd42570f","src/virtualdevices/webdriver/virtualmanager.rs":"7205a0397833628fc0847aa942a6a314dc1e23306858b546053e0de6a360ebe1","src/virtualdevices/webdriver/web_api.rs":"9032525af458b6fe9a3274c36b6ef8c791ecc4ec46d38ae36583fc9a4535b59d","testing/cross/powerpc64le-unknown-linux-gnu.Dockerfile":"d7463ff4376e3e0ca3fed879fab4aa975c4c0a3e7924c5b88aef9381a5d013de","testing/cross/x86_64-unknown-linux-gnu.Dockerfile":"11c79c04b07a171b0c9b63ef75fa75f33263ce76e3c1eda0879a3e723ebd0c24","testing/run_cross.sh":"cc2a7e0359f210eba2e7121f81eb8ab0125cea6e0d0f2698177b0fe2ad0c33d8","webdriver-tools/requirements.txt":"8236aa3dedad886f213c9b778fec80b037212d30e640b458984110211d546005","webdriver-tools/webdriver-driver.py":"82327c26ba271d1689acc87b612ab8436cb5475f0a3c0dba7baa06e7f6f5e19c"},"package":"aa0e182b77b6b19eaf9c7b69fddf3be970169ec6d34eca3f5d682ab948727e57"}
+diff --git a/third_party/rust/authenticator/build.rs b/third_party/rust/authenticator/build.rs
+index 58f6cfa393aaa..acc4f09466f7d 100644
+--- a/third_party/rust/authenticator/build.rs
++++ b/third_party/rust/authenticator/build.rs
+@@ -47,6 +47,8 @@ fn main() {
+         "ioctl_s390xbe.rs"
+     } else if cfg!(all(target_arch = "riscv64", target_endian = "little")) {
+         "ioctl_riscv64.rs"
++    } else if cfg!(all(target_arch = "loongarch64", target_endian = "little")) {
++        "ioctl_loongarch64.rs"
+     } else {
+         panic!("architecture not supported");
+     };
+diff --git a/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs b/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs
+index 82aabc6301017..bc8582c5b1491 100644
+--- a/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs
++++ b/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs
+@@ -49,3 +49,6 @@ include!("ioctl_s390xbe.rs");
+ 
+ #[cfg(all(target_arch = "riscv64", target_endian = "little"))]
+ include!("ioctl_riscv64.rs");
++
++#[cfg(all(target_arch = "loongarch64", target_endian = "little"))]
++include!("ioctl_loongarch64.rs");
+diff --git a/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs b/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs
+new file mode 100644
+index 0000000000000..a784e9bf4600b
+--- /dev/null
++++ b/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs
+@@ -0,0 +1,5 @@
++/* automatically generated by rust-bindgen */
++
++pub type __u32 = ::std::os::raw::c_uint;
++pub const _HIDIOCGRDESCSIZE: __u32 = 2147764225;
++pub const _HIDIOCGRDESC: __u32 = 2416199682;
+diff --git a/third_party/rust/cty/.cargo-checksum.json b/third_party/rust/cty/.cargo-checksum.json
+index 902714f58a741..3e65dbf70a232 100644
+--- a/third_party/rust/cty/.cargo-checksum.json
++++ b/third_party/rust/cty/.cargo-checksum.json
+@@ -1 +1 @@
+-{"files":{"CHANGELOG.md":"077c738b5f2c05d66a12209edaabca887091db727d61164a7a414da23d8bf08f","Cargo.toml":"94a517ea6c7dad4634a9a2bd356f3a8035927e7ff8367bd5a975b4db4ccf8e6e","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"035e70219855119df4273b3c5b97543ae82e0dd60c520416e759107c602f651b","README.md":"19687c50697e6abc991e4c35e6d674db56bd5e5ae0d5b833440718f8f154a74d","ci/install.sh":"9b34273d9e79ec59f63d5e3e7aea27b0db66194667f9730a21158740fa1b99f1","ci/script.sh":"54962430ca4d3528e5c0d44ff590b1504be13147db5cbe8bb82f1358528ef5f3","src/lib.rs":"33a38ce6df718fb0191f34ff2cefdcbd3ad9d93f0c4073b78eaf937c07fb7614"},"package":"b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"}
+\ No newline at end of file
++{"files":{"CHANGELOG.md":"077c738b5f2c05d66a12209edaabca887091db727d61164a7a414da23d8bf08f","Cargo.toml":"94a517ea6c7dad4634a9a2bd356f3a8035927e7ff8367bd5a975b4db4ccf8e6e","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"035e70219855119df4273b3c5b97543ae82e0dd60c520416e759107c602f651b","README.md":"19687c50697e6abc991e4c35e6d674db56bd5e5ae0d5b833440718f8f154a74d","ci/install.sh":"9b34273d9e79ec59f63d5e3e7aea27b0db66194667f9730a21158740fa1b99f1","ci/script.sh":"54962430ca4d3528e5c0d44ff590b1504be13147db5cbe8bb82f1358528ef5f3","src/lib.rs":"3e9ec28a0d13cfb47546e044b8fc3a32007f7c76994704c4164c4430a7167e39"},"package":"b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"}
+diff --git a/third_party/rust/cty/src/lib.rs b/third_party/rust/cty/src/lib.rs
+index 971c9cb3a9e04..80b8f3f291716 100644
+--- a/third_party/rust/cty/src/lib.rs
++++ b/third_party/rust/cty/src/lib.rs
+@@ -24,6 +24,7 @@ pub use pwd::*;
+           target_arch = "powerpc",
+           target_arch = "powerpc64",
+           target_arch = "s390x",
++          target_arch = "loongarch64",
+           target_arch = "riscv32",
+           target_arch = "riscv64"))]
+ mod ad {
+diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json
+index f4c932b88926b..b7b9c9f3c9a89 100644
+--- a/third_party/rust/nix/.cargo-checksum.json
++++ b/third_party/rust/nix/.cargo-checksum.json
+@@ -1 +1 @@
+-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+\ No newline at end of file
++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
+diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs
+index 0c0a2090538f8..214d9e8c60281 100644
+--- a/third_party/rust/nix/src/sys/ioctl/linux.rs
++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs
+@@ -41,6 +41,7 @@ mod consts {
+     target_arch = "s390x",
+     target_arch = "x86_64",
+     target_arch = "aarch64",
++    target_arch = "loongarch64",
+     target_arch = "riscv32",
+     target_arch = "riscv64"
+ ))]
+diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp
+index 30f2907c720e1..e6645227a2949 100644
+--- a/toolkit/components/telemetry/pingsender/pingsender.cpp
++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp
+@@ -10,6 +10,7 @@
+ #include <fstream>
+ #include <iomanip>
+ #include <string>
++#include <cstdint>
+ #include <vector>
+ 
+ #include <zlib.h>
+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
+index c518e02d00534..975835ffee9d6 100644
+--- a/toolkit/moz.configure
++++ b/toolkit/moz.configure
+@@ -2240,7 +2240,7 @@ with only_when(compile_environment | artifact_builds):
+                 use_nasm = False
+         elif target.cpu == "x86_64":
+             flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
+-        elif target.cpu in ("x86", "arm", "aarch64"):
++        elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"):
+             flac_only = True
+         else:
+             enable = False
+diff --git a/xpcom/reflect/xptcall/md/unix/moz.build b/xpcom/reflect/xptcall/md/unix/moz.build
+index 1779c148cb20a..e74c936c38525 100644
+--- a/xpcom/reflect/xptcall/md/unix/moz.build
++++ b/xpcom/reflect/xptcall/md/unix/moz.build
+@@ -271,6 +271,14 @@ if CONFIG["OS_ARCH"] == "Linux" and CONFIG["CPU_ARCH"] == "riscv64":
+         "xptcstubs_riscv64.cpp",
+     ]
+ 
++if CONFIG["OS_ARCH"] == "Linux" and CONFIG["CPU_ARCH"] == "loongarch64":
++    SOURCES += [
++        "xptcinvoke_asm_loongarch64.S",
++        "xptcinvoke_loongarch64.cpp",
++        "xptcstubs_asm_loongarch64.S",
++        "xptcstubs_loongarch64.cpp",
++    ]
++
+ FINAL_LIBRARY = "xul"
+ 
+ LOCAL_INCLUDES += [
+diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S
+new file mode 100644
+index 0000000000000..7ac5a9a52e171
+--- /dev/null
++++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S
+@@ -0,0 +1,91 @@
++/* This Source Code Form subject to the terms of Mozilla Public
++ * License, v. 2.0 If a copy of the MPL was not distributed with
++ * this file, You can obtain one at http://mozilla.org/MPL/2.0/.
++ */
++
++  .set NGPREGS, 8
++  .set NFPREGS, 8
++
++  .text
++  .globl  _NS_InvokeByIndex
++  .type   _NS_InvokeByIndex, @function
++/*
++ * _NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
++ *                   uint32_t paramCount, nsXPTCVariant* params)
++ */
++_NS_InvokeByIndex:
++  .cfi_startproc
++  addi.d    $sp, $sp, -32
++  .cfi_def_cfa_offset 32
++  st.d      $s0, $sp, 16
++  .cfi_offset 23, -16
++  st.d      $s1, $sp, 8
++  .cfi_offset 24, -24
++  st.d      $s2, $sp, 0
++  .cfi_offset 25, -32
++  st.d      $ra, $sp, 24
++  .cfi_offset 1, -8
++
++  move      $s2, $a0
++  move      $s1, $a1
++  move      $s0, $sp
++  .cfi_def_cfa_register 23
++
++  /* 16-bytes alignment */
++  addi.d    $a0, $a2, 1
++  li.d      $t4, 0xfffffffffffffffe
++  and       $a0, $a0, $t4
++  slli.d    $a0, $a0, 3
++  sub.d     $sp, $sp, $a0
++  move      $a4, $sp
++
++  addi.d    $sp, $sp, -8*(NFPREGS+NGPREGS)
++  move      $a0, $sp
++  addi.d    $a1, $sp, 8*NGPREGS
++
++  bl        invoke_copy_to_stack
++
++  /* 1st argument is this */
++  move      $a0, $s2
++
++  ld.d      $a1, $sp, 8
++  ld.d      $a2, $sp, 16
++  ld.d      $a3, $sp, 24
++  ld.d      $a4, $sp, 32
++  ld.d      $a5, $sp, 40
++  ld.d      $a6, $sp, 48
++  ld.d      $a7, $sp, 56
++
++  fld.d     $fa0, $sp, 64
++  fld.d     $fa1, $sp, 72
++  fld.d     $fa2, $sp, 80
++  fld.d     $fa3, $sp, 88
++  fld.d     $fa4, $sp, 96
++  fld.d     $fa5, $sp, 104
++  fld.d     $fa6, $sp, 112
++  fld.d     $fa7, $sp, 120
++
++  addi.d    $sp, $sp, 8*(NGPREGS+NFPREGS)
++
++  ld.d      $s2, $s2, 0
++  slli.w    $s1, $s1, 3
++  add.d     $s2, $s2, $s1
++  ld.d      $t3, $s2, 0
++  jirl      $ra, $t3, 0
++
++  move      $sp, $s0
++  .cfi_def_cfa_register 3
++  ld.d      $s0, $sp, 16
++  .cfi_restore 23
++  ld.d      $s1, $sp, 8
++  .cfi_restore 24
++  ld.d      $s2, $sp, 0
++  .cfi_restore 25
++  ld.d      $ra, $sp, 24
++  .cfi_restore 1
++  addi.d    $sp, $sp, 32
++  .cfi_def_cfa_offset -32
++  jirl      $zero, $ra, 0
++  .cfi_endproc
++  .size   _NS_InvokeByIndex, .-_NS_InvokeByIndex
++  .section .note.GNU-stack, "", @progbits
+diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp
+new file mode 100644
+index 0000000000000..61bb7b2efdeb1
+--- /dev/null
++++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp
+@@ -0,0 +1,100 @@
++/* This Source Code Form is subject to the terms of the Mozilla Public
++ * License, v. 2.0. If a copy of the MPL was not distributed with this
++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
++
++// Platform specific code to invoke XPCOM methods on native objects
++
++#include "xptcprivate.h"
++
++extern "C" void invoke_copy_to_stack(uint64_t* gpregs, double* fpregs,
++                                     uint32_t paramCount, nsXPTCVariant* s,
++                                     uint64_t* d) {
++  static const uint32_t GPR_COUNT = 8;
++  static const uint32_t FPR_COUNT = 8;
++
++  uint32_t nr_gpr = 1;  // skip one GPR register for "this"
++  uint32_t nr_fpr = 0;
++  uint64_t value = 0;
++
++  for (uint32_t i = 0; i < paramCount; i++, s++) {
++    if (s->IsIndirect()) {
++      value = (uint64_t)&s->val;
++    } else {
++      switch (s->type) {
++        case nsXPTType::T_FLOAT:
++          break;
++        case nsXPTType::T_DOUBLE:
++          break;
++        case nsXPTType::T_I8:
++          value = s->val.i8;
++          break;
++        case nsXPTType::T_I16:
++          value = s->val.i16;
++          break;
++        case nsXPTType::T_I32:
++          value = s->val.i32;
++          break;
++        case nsXPTType::T_I64:
++          value = s->val.i64;
++          break;
++        case nsXPTType::T_U8:
++          value = s->val.u8;
++          break;
++        case nsXPTType::T_U16:
++          value = s->val.u16;
++          break;
++        case nsXPTType::T_U32:
++          value = s->val.u32;
++          break;
++        case nsXPTType::T_U64:
++          value = s->val.u64;
++          break;
++        case nsXPTType::T_BOOL:
++          value = s->val.b;
++          break;
++        case nsXPTType::T_CHAR:
++          value = s->val.c;
++          break;
++        case nsXPTType::T_WCHAR:
++          value = s->val.wc;
++          break;
++        default:
++          value = (uint64_t)s->val.p;
++          break;
++      }
++    }
++
++    if (!s->IsIndirect() && s->type == nsXPTType::T_DOUBLE) {
++      if (nr_fpr < FPR_COUNT) {
++        fpregs[nr_fpr++] = s->val.d;
++      } else if (nr_gpr < GPR_COUNT) {
++        memcpy(&gpregs[nr_gpr++], &(s->val.d), sizeof(s->val.d));
++      } else {
++        memcpy(d++, &(s->val.d), sizeof(s->val.d));
++      }
++    } else if (!s->IsIndirect() && s->type == nsXPTType::T_FLOAT) {
++      if (nr_fpr < FPR_COUNT) {
++        memcpy(&fpregs[nr_fpr++], &(s->val.f), sizeof(s->val.f));
++      } else if (nr_gpr < GPR_COUNT) {
++        memcpy(&gpregs[nr_gpr++], &(s->val.f), sizeof(s->val.f));
++      } else {
++        memcpy(d++, &(s->val.f), sizeof(s->val.f));
++      }
++    } else {
++      if (nr_gpr < GPR_COUNT) {
++        gpregs[nr_gpr++] = value;
++      } else {
++        *d++ = value;
++      }
++    }
++  }
++}
++
++extern "C" nsresult _NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
++                                      uint32_t paramCount,
++                                      nsXPTCVariant* params);
++EXPORT_XPCOM_API(nsresult)
++NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount,
++                 nsXPTCVariant* params) {
++  return _NS_InvokeByIndex(that, methodIndex, paramCount, params);
++}
+diff --git a/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S b/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S
+new file mode 100644
+index 0000000000000..ae4e0cf73fd36
+--- /dev/null
++++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S
+@@ -0,0 +1,52 @@
++# License, v. 2.0. If a copy of the MPL was not distributed with this
++# file, You can obtain one at http://mozilla.org/MPL/2.0/.
++
++    .set NGPRGES, 8
++    .set NFPREGS, 8
++
++    .text
++    .globl SharedStub
++    .hidden SharedStub
++    .type  SharedStub,@function
++
++SharedStub:
++    .cfi_startproc
++    move      $t0, $sp
++    addi.d    $sp, $sp, -8*(NGPRGES+NFPREGS)-16
++    .cfi_def_cfa_offset 8*(NGPRGES+NFPREGS)+16
++    st.d      $a0, $sp, 0
++    st.d      $a1, $sp, 8
++    st.d      $a2, $sp, 16
++    st.d      $a3, $sp, 24
++    st.d      $a4, $sp, 32
++    st.d      $a5, $sp, 40
++    st.d      $a6, $sp, 48
++    st.d      $a7, $sp, 56
++    fst.d     $fa0, $sp, 64
++    fst.d     $fa1, $sp, 72
++    fst.d     $fa2, $sp, 80
++    fst.d     $fa3, $sp, 88
++    fst.d     $fa4, $sp, 96
++    fst.d     $fa5, $sp, 104
++    fst.d     $fa6, $sp, 112
++    fst.d     $fa7, $sp, 120
++    st.d      $ra, $sp, 136
++    .cfi_offset 1, 136
++
++    /* methodIndex is passed from stub */
++    move      $a1, $t6
++    move      $a2, $t0
++    move      $a3, $sp
++    addi.d    $a4, $sp, 8*NGPRGES
++
++    bl        PrepareAndDispatch
++
++    ld.d      $ra, $sp, 136
++    .cfi_restore 1
++    addi.d    $sp, $sp, 8*(NGPRGES+NFPREGS)+16
++    .cfi_def_cfa_offset -8*(NGPRGES+NFPREGS)-16
++    jirl      $zero, $ra, 0
++    .cfi_endproc
++
++    .size SharedStub, .-SharedStub
++    .section .note.GNU-stack, "", @progbits
+diff --git a/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp b/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp
+new file mode 100644
+index 0000000000000..5c4cd6d95e7f6
+--- /dev/null
++++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp
+@@ -0,0 +1,159 @@
++/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
++/* This Source Code Form is subject to the terms of the Mozilla Public
++ * License, V. 2.0. If a copy of the MPL was not distributed with this
++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
++
++#include "xptcprivate.h"
++
++extern "C" nsresult ATTRIBUTE_USED PrepareAndDispatch(nsXPTCStubBase* self,
++                                                      uint32_t methodIndex,
++                                                      uint64_t* args,
++                                                      uint64_t* gpregs,
++                                                      double* fpregs) {
++  static const uint32_t GPR_COUNT = 8;
++  static const uint32_t FPR_COUNT = 8;
++  nsXPTCMiniVariant paramBuffer[PARAM_BUFFER_COUNT];
++  const nsXPTMethodInfo* info;
++
++  self->mEntry->GetMethodInfo(uint16_t(methodIndex), &info);
++
++  uint32_t paramCount = info->GetParamCount();
++  const uint8_t indexOfJSContext = info->IndexOfJSContext();
++
++  uint64_t* ap = args;
++  uint32_t nr_gpr = 1;    // skip the arg which is 'self'
++  uint32_t nr_fpr = 0;
++  uint64_t value;
++
++  for (uint32_t i = 0; i < paramCount; i++) {
++    const nsXPTParamInfo& param = info->GetParam(i);
++    const nsXPTType& type = param.GetType();
++    nsXPTCMiniVariant* dp = &paramBuffer[i];
++
++    if (i == indexOfJSContext) {
++      if (nr_gpr < GPR_COUNT)
++        nr_gpr++;
++      else
++        ap++;
++    }
++
++    if (!param.IsOut() && type == nsXPTType::T_DOUBLE) {
++      if (nr_fpr < FPR_COUNT) {
++        dp->val.d = fpregs[nr_fpr++];
++      } else if (nr_gpr < GPR_COUNT) {
++        memcpy(&dp->val.d, &gpregs[nr_gpr++], sizeof(dp->val.d));
++      } else {
++        memcpy(&dp->val.d, ap++, sizeof(dp->val.d));
++      }
++      continue;
++    }
++    
++    if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
++      if (nr_fpr < FPR_COUNT) {
++        memcpy(&dp->val.f, &fpregs[nr_fpr++], sizeof(dp->val.f));
++      } else if (nr_gpr < GPR_COUNT) {
++        memcpy(&dp->val.f, &gpregs[nr_gpr++], sizeof(dp->val.f));
++      } else {
++        memcpy(&dp->val.f, ap++, sizeof(dp->val.f));
++      }
++      continue;
++    }
++
++    if (nr_gpr < GPR_COUNT) {
++      value = gpregs[nr_gpr++];
++    } else {
++      value = *ap++;
++    }
++
++    if (param.IsOut() || !type.IsArithmetic()) {
++      dp->val.p = (void*)value;
++      continue;
++    }
++
++    switch (type) {
++      case nsXPTType::T_I8:
++        dp->val.i8 = (int8_t)value;
++        break;
++      case nsXPTType::T_I16:
++        dp->val.i16 = (int16_t)value;
++        break;
++      case nsXPTType::T_I32:
++        dp->val.i32 = (int32_t)value;
++        break;
++      case nsXPTType::T_I64:
++        dp->val.i64 = (int64_t)value;
++        break;
++      case nsXPTType::T_U8:
++        dp->val.u8 = (uint8_t)value;
++        break;
++      case nsXPTType::T_U16:
++        dp->val.u16 = (uint16_t)value;
++        break;
++      case nsXPTType::T_U32:
++        dp->val.u32 = (uint32_t)value;
++        break;
++      case nsXPTType::T_U64:
++        dp->val.u64 = (uint64_t)value;
++        break;
++      case nsXPTType::T_BOOL:
++        dp->val.b = (bool)(uint8_t)value;
++        break;
++      case nsXPTType::T_CHAR:
++        dp->val.c = (char)value;
++        break;
++      case nsXPTType::T_WCHAR:
++        dp->val.wc = (wchar_t)value;
++        break;
++      default:
++        NS_ERROR("bad type");
++        break;
++    }
++  }
++
++  nsresult result = self->mOuter->CallMethod((uint16_t)methodIndex, info,
++                                             paramBuffer);
++  return result;
++}
++
++// Load $t6 with the constant 'n' and branch to SharedStub().
++// clang-format off
++#define STUB_ENTRY(n)                                                 \
++  __asm__(                                                            \
++      ".text\n\t"                                                     \
++      ".if "#n" < 10 \n\t"                                            \
++      ".globl  _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t"                   \
++      ".hidden _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t"                   \
++      ".type   _ZN14nsXPTCStubBase5Stub"#n"Ev,@function \n\n"         \
++      "_ZN14nsXPTCStubBase5Stub"#n"Ev: \n\t"                          \
++      ".elseif "#n" < 100 \n\t"                                       \
++      ".globl  _ZN14nsXPTCStubBase6Stub"#n"Ev \n\t"                   \
++      ".hidden _ZN14nsXPTCStubBase6Stub"#n"Ev \n\t"                   \
++      ".type   _ZN14nsXPTCStubBase6Stub"#n"Ev,@function \n\n"         \
++      "_ZN14nsXPTCStubBase6Stub"#n"Ev: \n\t"                          \
++      ".elseif "#n" < 1000 \n\t"                                      \
++      ".globl  _ZN14nsXPTCStubBase7Stub"#n"Ev \n\t"                   \
++      ".hidden _ZN14nsXPTCStubBase7Stub"#n"Ev \n\t"                   \
++      ".type   _ZN14nsXPTCStubBase7Stub"#n"Ev,@function \n\n"         \
++      "_ZN14nsXPTCStubBase7Stub"#n"Ev: \n\t"                          \
++      ".else  \n\t"                                                   \
++      ".err   \"stub number "#n" >= 1000 not yet supported\"\n"       \
++      ".endif \n\t"                                                   \
++      "li.d   $t6, "#n" \n\t"                                         \
++      "b      SharedStub \n"                                          \
++      ".if "#n" < 10 \n\t"                                            \
++      ".size   _ZN14nsXPTCStubBase5Stub"#n"Ev,.-_ZN14nsXPTCStubBase5Stub"#n"Ev\n\t" \
++      ".elseif "#n" < 100 \n\t"                                                     \
++      ".size   _ZN14nsXPTCStubBase6Stub"#n"Ev,.-_ZN14nsXPTCStubBase6Stub"#n"Ev\n\t" \
++      ".else \n\t"                                                                  \
++      ".size   _ZN14nsXPTCStubBase7Stub"#n"Ev,.-_ZN14nsXPTCStubBase7Stub"#n"Ev\n\t" \
++      ".endif"                                                                      \
++);
++// clang-format on
++
++#define SENTINEL_ENTRY(n)                         \
++  nsresult nsXPTCStubBase::Sentinel##n() {        \
++    NS_ERROR("nsXPTCStubBase::Sentinel called");  \
++    return NS_ERROR_NOT_IMPLEMENTED;              \
++  }
++
++#include "xptcstubsdef.inc"
+-- 
+2.42.0
+
diff --git a/thunderbird/mozconfig.cfg b/thunderbird/mozconfig.cfg
index 3d8da9a551..8036c9bf8d 100644
--- a/thunderbird/mozconfig.cfg
+++ b/thunderbird/mozconfig.cfg
@@ -2,12 +2,13 @@ ac_add_options --enable-application=comm/mail
 
 ac_add_options --prefix=/usr
 ac_add_options --enable-release
-ac_add_options --enable-linker=lld
+#ac_add_options --enable-linker=lld
 ac_add_options --enable-hardening
 ac_add_options --enable-optimize
-ac_add_options --enable-rust-simd
+#ac_add_options --enable-rust-simd
 # https://bugzilla.mozilla.org/show_bug.cgi?id=1423822
-ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot
+#ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot
+ac_add_options --without-wasm-sandboxed-libraries
 
 # Branding
 ac_add_options --enable-official-branding
diff --git a/tickrs/PKGBUILD b/tickrs/PKGBUILD
index 0edc5a453b..9d708c5ee4 100644
--- a/tickrs/PKGBUILD
+++ b/tickrs/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/toastify/PKGBUILD b/toastify/PKGBUILD
index 59c179af62..e89e20b306 100644
--- a/toastify/PKGBUILD
+++ b/toastify/PKGBUILD
@@ -14,7 +14,7 @@ b2sums=('SKIP')
 
 prepare() {
   cd $pkgname
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/tokei/PKGBUILD b/tokei/PKGBUILD
index 6ed632e9cf..ccbcdf0396 100644
--- a/tokei/PKGBUILD
+++ b/tokei/PKGBUILD
@@ -15,7 +15,13 @@ sha512sums=('b8474cb3cad8cab8cb9c24b44a9b7bdaa436fde4e56ca25a8c6d9cbe342b27acf80
 
 build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
-  cargo build --release --locked --features all
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir -p .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
+  cargo build --release --features all
 }
 
 check() {
diff --git a/torchvision/PKGBUILD b/torchvision/PKGBUILD
index 5341a676b0..86e90c3efb 100644
--- a/torchvision/PKGBUILD
+++ b/torchvision/PKGBUILD
@@ -125,11 +125,11 @@ check() {
   # check if VideoReader is build
   # VideoReader depends on ffmpeg
   cd "${srcdir}/python-${_pkgname}-${pkgver}"
-  PYTHONPATH="${PWD}/build/lib.linux-${CARCH}-cpython-${python_version}" \
+  PYTHONPATH="${PWD}/build/lib.linux-`uname -m`-cpython-${python_version}" \
   python -c "from torchvision.io import VideoReader"
 
   cd "${srcdir}/python-${_pkgname}-cuda-${pkgver}"
-  PYTHONPATH="${PWD}/build/lib.linux-${CARCH}-cpython-${python_version}" \
+  PYTHONPATH="${PWD}/build/lib.linux-`uname -m`-cpython-${python_version}" \
   python -c "from torchvision.io import VideoReader"
 }
 
diff --git a/tracker3-miners/PKGBUILD b/tracker3-miners/PKGBUILD
index 438ad758a5..d76ee3f594 100644
--- a/tracker3-miners/PKGBUILD
+++ b/tracker3-miners/PKGBUILD
@@ -42,8 +42,10 @@ groups=(gnome)
 _commit=c3f8ef3d7b4ec57b948992ddd8137db02f9d25b7  # tags/3.6.2^0
 source=(
   "git+https://gitlab.gnome.org/GNOME/tracker-miners.git#commit=$_commit"
+  tracker-miners-fix-build.patch
 )
-b2sums=('SKIP')
+b2sums=('SKIP'
+        'f9896f048312e8e5dd080d3fa07330abbf33277ef596db55e0693948dc823d3526ecc700307c35324ac0d95c1710af90a7237299baaef32e0d87858ebb7521ef')
 
 pkgver() {
   cd tracker-miners
diff --git a/tracker3-miners/tracker-miners-fix-build.patch b/tracker3-miners/tracker-miners-fix-build.patch
new file mode 100644
index 0000000000..903d945798
--- /dev/null
+++ b/tracker3-miners/tracker-miners-fix-build.patch
@@ -0,0 +1,13 @@
+Index: tracker-miners/docs/manpages/meson.build
+===================================================================
+--- tracker-miners.orig/docs/manpages/meson.build
++++ tracker-miners/docs/manpages/meson.build
+@@ -43,7 +43,7 @@ foreach m : daemon_manpages + cli_manpag
+     command: [xsltproc,
+               '--output', '@OUTPUT@',
+               '--stringparam', 'man.authors.section.enabled', '0',
+-              '/etc/asciidoc/docbook-xsl/manpage.xsl', '@INPUT@'],
++              '/usr/lib/python3.10/site-packages/asciidoc/resources/docbook-xsl/manpage.xsl', '@INPUT@'],
+     input: xml,
+     output: manpage,
+     install: true,
diff --git a/trippy/PKGBUILD b/trippy/PKGBUILD
index 3bd3b77695..492f31495d 100644
--- a/trippy/PKGBUILD
+++ b/trippy/PKGBUILD
@@ -16,7 +16,7 @@ b2sums=('92135376948ad710f14a2c8326e938f5ae0fa47563aea588b678dc0e55752c73d56d046
 
 prepare() {
   cd $pkgname-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
   mkdir completions
 }
 
diff --git a/typst/PKGBUILD b/typst/PKGBUILD
index d9d8435513..af8cb4c6b1 100644
--- a/typst/PKGBUILD
+++ b/typst/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('f1b7baba3c6f6f37dee6d05c9ab53d2ba5cd879a57b6e726dedf9bc51811e132')
 
 prepare() {
 	cd "$_archive"
-	cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/ublock-origin/PKGBUILD b/ublock-origin/PKGBUILD
index f8895e4eac..4738fc8eb0 100644
--- a/ublock-origin/PKGBUILD
+++ b/ublock-origin/PKGBUILD
@@ -23,6 +23,7 @@ prepare() {
 
 build() {
   cd uBlock
+  npm config set registry https://registry.loongnix.cn:5873/
   make chromium firefox
   strip-nondeterminism -t zip dist/build/uBlock0.firefox.xpi
 }
diff --git a/unrar/PKGBUILD b/unrar/PKGBUILD
index 3acba843cf..6849361137 100644
--- a/unrar/PKGBUILD
+++ b/unrar/PKGBUILD
@@ -23,6 +23,7 @@ prepare() {
 build() {
   cp -a unrar libunrar
   export LDFLAGS+=' -pthread'
+  unset CFLAGS CXXFLAGS
   make -C libunrar lib
   make -C unrar -j1
 }
diff --git a/unzip/PKGBUILD b/unzip/PKGBUILD
index 781bb25cf4..bd7b71592e 100644
--- a/unzip/PKGBUILD
+++ b/unzip/PKGBUILD
@@ -118,6 +118,7 @@ build() {
 		-DUNICODE_SUPPORT -DUNICODE_WCHAR -DUTF8_MAYBE_NATIVE -DNO_LCHMOD \
 		-DDATE_FORMAT=DF_YMD -DUSE_BZIP2 -DNOMEMCPY -DNO_WORKING_ISPRINT'
 
+    unset CFLAGS
 	make -f unix/Makefile prefix=/usr \
 		D_USE_BZ2=-DUSE_BZIP2 L_BZ2=-lbz2 \
 		LF2="$LDFLAGS" CF="$CFLAGS $CPPFLAGS -I. $DEFINES" \
diff --git a/updlockfiles/PKGBUILD b/updlockfiles/PKGBUILD
index 62daa25dc7..2c87f521af 100644
--- a/updlockfiles/PKGBUILD
+++ b/updlockfiles/PKGBUILD
@@ -14,7 +14,7 @@ b2sums=('913e92e5b5ce1a1f0d1bca5947b0fe740076f529782cb724a9e8bc1b2288d576c775187
 
 prepare() {
   cd "${pkgname}-${pkgver}"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/upx/0001-just-for-la64-build.patch b/upx/0001-just-for-la64-build.patch
new file mode 100644
index 0000000000..38c1888906
--- /dev/null
+++ b/upx/0001-just-for-la64-build.patch
@@ -0,0 +1,35 @@
+From 2ca4d5f5288a904e4c8208c74537cb7d59469f24 Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Sun, 18 Apr 2021 22:37:24 +0800
+Subject: [PATCH] just for la64 build
+
+---
+ src/miniacc.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/miniacc.h b/src/miniacc.h
+index 3c949280..e52cda58 100644
+--- a/src/miniacc.h
++++ b/src/miniacc.h
+@@ -979,6 +979,9 @@
+ #elif (UINT_MAX == ACC_0xffffL) && defined(__m32c__)
+ #  define ACC_ARCH_M16C             1
+ #  define ACC_INFO_ARCH             "m16c"
++#elif defined(__loongarch64)
++#  define ACC_ARCH_LA64             1
++#  define ACC_INFO_ARCH             "la64"
+ #elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICCM16C__)
+ #  define ACC_ARCH_M16C             1
+ #  define ACC_INFO_ARCH             "m16c"
+@@ -2586,6 +2589,8 @@ ACC_COMPILE_TIME_ASSERT_HEADER(ACC_SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t))
+ #  define ACC_ABI_BIG_ENDIAN        1
+ #elif 1 && (ACC_ARCH_ARM64) && defined(__AARCH64EL__) && !defined(__AARCH64EB__)
+ #  define ACC_ABI_LITTLE_ENDIAN     1
++#elif 1 && (ACC_ARCH_LA64) && defined(__loongarch64)
++#  define ACC_ABI_LITTLE_ENDIAN     1
+ #elif 1 && (ACC_ARCH_ARM64) && defined(_MSC_VER) && defined(_WIN32)
+ #  define ACC_ABI_LITTLE_ENDIAN     1
+ #elif 1 && (ACC_ARCH_MIPS) && defined(__MIPSEB__) && !defined(__MIPSEL__)
+-- 
+2.28.0
+
diff --git a/upx/PKGBUILD b/upx/PKGBUILD
index 4e31c3dfa5..e0e01c1717 100644
--- a/upx/PKGBUILD
+++ b/upx/PKGBUILD
@@ -22,7 +22,9 @@ source=(
   git+$url-vendor-valgrind#commit=a196a50056be5e6ef0c5f7456e95b4234b799a33
   git+$url-vendor-zlib#commit=93538c2b9403ec5b8ed673a3a5f3874430569b70
   "git+$url#commit=099c3d829e80488af7395a4242b318877e980da4") # tag: v4.2.2
+source+=(0001-just-for-la64-build.patch)
 b2sums=(SKIP SKIP SKIP SKIP SKIP SKIP)
+b2sums+=('0ea1b7a64155d135bfd8872969e1003b72d4b098512f1bc89d31244e02ffc901cc8c5bd87eb63a529aadf161386cd857cc6271a8ea018ba3915616c3c2fe5c72')
 
 prepare() {
   cd $pkgname
@@ -30,6 +32,7 @@ prepare() {
   for x in doctest lzma-sdk ucl valgrind zlib; do
     rm -frv vendor/$x && ln -s "$srcdir/upx-vendor-$x" vendor/$x
   done
+  patch -p1 -i $srcdir/0001-just-for-la64-build.patch
 }
 
 build() {
diff --git a/uucp/PKGBUILD b/uucp/PKGBUILD
index 7a7a2a69c9..fd66c3edaa 100644
--- a/uucp/PKGBUILD
+++ b/uucp/PKGBUILD
@@ -21,6 +21,8 @@ prepare() {
 }
 
 build() {
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
   cd "${srcdir}/${pkgname}-${pkgver}"
   ./configure --prefix=/usr --mandir=/usr/share/man --with-newconfigdir=/etc/uucp --sbindir=/usr/bin
   make
diff --git a/v2ray-domain-list-community/PKGBUILD b/v2ray-domain-list-community/PKGBUILD
index 4903c25f40..2b3dfcfc90 100644
--- a/v2ray-domain-list-community/PKGBUILD
+++ b/v2ray-domain-list-community/PKGBUILD
@@ -14,6 +14,9 @@ sha512sums=('SKIP')
 
 build() {
   cd domain-list-community
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
   ASSUME_NO_MOVING_GC_UNSAFE_RISK_IT_WITH=go1.18 go run main.go
 }
 
diff --git a/v2ray/PKGBUILD b/v2ray/PKGBUILD
index 08fa9cc6c1..87f2774636 100644
--- a/v2ray/PKGBUILD
+++ b/v2ray/PKGBUILD
@@ -26,6 +26,10 @@ build() {
   export CGO_LDFLAGS="${LDFLAGS}"
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CPPFLAGS="${CPPFLAGS}"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
   go build -o v2ray ./main
 }
 
diff --git a/valgrind/PKGBUILD b/valgrind/PKGBUILD
index 4d53417803..edb54aa4fa 100644
--- a/valgrind/PKGBUILD
+++ b/valgrind/PKGBUILD
@@ -21,30 +21,36 @@ arch=('loong64' 'x86_64')
 license=('GPL')
 url='https://valgrind.org/'
 depends=('glibc' 'perl' 'debuginfod')
-makedepends=('gdb' 'lib32-glibc' 'lib32-gcc-libs' 'docbook-xml'
+makedepends=('gdb' 'docbook-xml'
              'docbook-xsl' 'docbook-sgml')
 checkdepends=('procps-ng')
-optdepends=('lib32-glibc: 32-bit ABI support')
 provides=('valgrind-multilib')
 replaces=('valgrind-multilib')
 options=('!emptydirs' '!strip')
 source=(https://sourceware.org/pub/valgrind/valgrind-${pkgver}.tar.bz2{,.asc}
-        valgrind-3.7.0-respect-flags.patch)
+        valgrind-3.7.0-respect-flags.patch
+        fix-perl-errors.patch
+        valgrind-3.21-la64.patch)
 validpgpkeys=(
   0E9FFD0C16A1856CF9C7C690BA0166E698FA6035 # Julian Seward <jseward@acm.org>
   EC3CFE88F6CA0788774F5C1D1AA44BE649DE760A # Mark Wielaard <mjw@gnu.org>
 )
 sha512sums=('2904c13f68245bbafcea70998c6bd20725271300a7e94b6751ca00916943595fc3fac8557da7ea8db31b54a43f092823a0a947bc142829da811d074e1fe49777'
             'SKIP'
-            'e0cec39381cefeca09ae4794cca309dfac7c8693e6315e137e64f5c33684598726d41cfbb4edf764fe985503b13ff596184ca5fc32b159d500ec092e4cf8838c')
+            'e0cec39381cefeca09ae4794cca309dfac7c8693e6315e137e64f5c33684598726d41cfbb4edf764fe985503b13ff596184ca5fc32b159d500ec092e4cf8838c'
+            '20b251bfc7bef8dfd232f9b679e907114c575299916164a608e2fe7fab5f30bf7241f25e37ab4194c56b0a21e682b3cea2fd892aab30fa2ce3863ef744f27f18'
+            '62015578845d7efe55ba1c04ccaaee1beffb597524bc17ec90494d32e53ef792947391bbc2fe2eb8557bc84a5aa470c1f0b1542b1a10aac58719efbaaeba2f87')
 b2sums=('80024371b3e70521996077fba24e233097a6190477ced1b311cd41fead687dcc2511ac0ef723792488f4af08867dff3e1f474816fda09c1604b89059e31c2514'
         'SKIP'
-        'af556fdf3c02e37892bfe9afebc954cf2f1b2fa9b75c1caacfa9f3b456ebc02bf078475f9ee30079b3af5d150d41415a947c3d04235c1ea8412cf92b959c484a')
+        'af556fdf3c02e37892bfe9afebc954cf2f1b2fa9b75c1caacfa9f3b456ebc02bf078475f9ee30079b3af5d150d41415a947c3d04235c1ea8412cf92b959c484a'
+        '78e5ebeda69302ad380923fe0e76ef8fc3443ffa29cc3104fe629335c8ceda1b4198cb5c72bdefb0e47c77ea02d2ca7bfb478cbf8731f8ded0e0c7c5d83981ee'
+        'd9bea235f6b8d07cff5db4851b7085cde9d90e85929167206c74c641fe2db50dde2ffc6cf4524109275321e4c1be8e5a4bcca8a0dda1570877d2cfbfd847b994')
 options=(!lto) # https://bugs.kde.org/show_bug.cgi?id=338252
 
 prepare() {
   cd valgrind-${pkgver}
   patch -Np1 < ../valgrind-3.7.0-respect-flags.patch
+  patch -p1 -i "${srcdir}"/valgrind-3.21-la64.patch
   sed -i 's|sgml/docbook/xsl-stylesheets|xml/docbook/xsl-stylesheets-1.79.2-nons|' docs/Makefile.am
 
   autoreconf -ifv
@@ -54,6 +60,8 @@ build() {
   # valgrind does not like some of our flags
   CPPFLAGS=${CPPFLAGS/-D_FORTIFY_SOURCE=2/}
   CFLAGS=${CFLAGS/-fno-plt/}
+  CFLAGS=${CFLAGS/ -O2/}
+  CXXFLAGS=${CFLAGS/ -O2/}
   CXXFLAGS=${CXXFLAGS/-fno-plt/}
 
   cd valgrind-${pkgver}
diff --git a/valgrind/valgrind-3.21-la64.patch b/valgrind/valgrind-3.21-la64.patch
new file mode 100644
index 0000000000..aca79dccdf
--- /dev/null
+++ b/valgrind/valgrind-3.21-la64.patch
@@ -0,0 +1,93230 @@
+diff --git a/Makefile.all.am b/Makefile.all.am
+index 1de1f13a7..d72410a9d 100755
+--- a/Makefile.all.am
++++ b/Makefile.all.am
+@@ -290,6 +290,12 @@ AM_CFLAGS_PSO_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \
+ 				$(AM_CFLAGS_PSO_BASE)
+ AM_CCASFLAGS_MIPS64_LINUX  = @FLAG_M64@ -g
+ 
++AM_FLAG_M3264_LOONGARCH64_LINUX = @FLAG_M64@
++AM_CFLAGS_LOONGARCH64_LINUX     = @FLAG_M64@ $(AM_CFLAGS_BASE)
++AM_CFLAGS_PSO_LOONGARCH64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \
++				$(AM_CFLAGS_PSO_BASE)
++AM_CCASFLAGS_LOONGARCH64_LINUX  = @FLAG_M64@ -g
++
+ AM_FLAG_M3264_X86_SOLARIS   = @FLAG_M32@
+ AM_CFLAGS_X86_SOLARIS       = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY_2@ \
+ 				$(AM_CFLAGS_BASE) -fomit-frame-pointer @SOLARIS_UNDEF_LARGESOURCE@
+@@ -350,6 +356,7 @@ PRELOAD_LDFLAGS_S390X_LINUX    = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+ PRELOAD_LDFLAGS_MIPS32_LINUX   = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+ PRELOAD_LDFLAGS_NANOMIPS_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+ PRELOAD_LDFLAGS_MIPS64_LINUX   = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
++PRELOAD_LDFLAGS_LOONGARCH64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+ PRELOAD_LDFLAGS_X86_SOLARIS    = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@
+ PRELOAD_LDFLAGS_AMD64_SOLARIS  = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M64@
+ 
+diff --git a/Makefile.tool.am b/Makefile.tool.am
+index df9502913..58a928e95 100644
+--- a/Makefile.tool.am
++++ b/Makefile.tool.am
+@@ -99,6 +99,10 @@ TOOL_LDFLAGS_MIPS64_LINUX = \
+ 	-static -nodefaultlibs -nostartfiles -u __start @FLAG_NO_BUILD_ID@ \
+ 	@FLAG_M64@
+ 
++TOOL_LDFLAGS_LOONGARCH64_LINUX = \
++	-static -nodefaultlibs -nostartfiles -u __start @FLAG_NO_BUILD_ID@ \
++	@FLAG_M64@
++
+ TOOL_LDFLAGS_X86_SOLARIS = \
+ 	$(TOOL_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@
+ 
+@@ -167,6 +171,9 @@ LIBREPLACEMALLOC_MIPS32_LINUX = \
+ LIBREPLACEMALLOC_MIPS64_LINUX = \
+ 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-mips64-linux.a
+ 
++LIBREPLACEMALLOC_LOONGARCH64_LINUX = \
++	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-loongarch64-linux.a
++
+ LIBREPLACEMALLOC_X86_SOLARIS = \
+ 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-solaris.a
+ 
+@@ -239,6 +246,11 @@ LIBREPLACEMALLOC_LDFLAGS_MIPS64_LINUX = \
+ 	$(LIBREPLACEMALLOC_MIPS64_LINUX) \
+ 	-Wl,--no-whole-archive
+ 
++LIBREPLACEMALLOC_LDFLAGS_LOONGARCH64_LINUX = \
++	-Wl,--whole-archive \
++	$(LIBREPLACEMALLOC_LOONGARCH64_LINUX) \
++	-Wl,--no-whole-archive
++
+ LIBREPLACEMALLOC_LDFLAGS_X86_SOLARIS = \
+ 	-Wl,--whole-archive \
+ 	$(LIBREPLACEMALLOC_X86_SOLARIS) \
+diff --git a/Makefile.vex.am b/Makefile.vex.am
+index 98d848359..009d93b45 100644
+--- a/Makefile.vex.am
++++ b/Makefile.vex.am
+@@ -26,6 +26,7 @@ pkginclude_HEADERS = \
+ 	pub/libvex_guest_s390x.h \
+ 	pub/libvex_guest_mips32.h \
+ 	pub/libvex_guest_mips64.h \
++	pub/libvex_guest_loongarch64.h \
+ 	pub/libvex_s390x_common.h \
+ 	pub/libvex_ir.h \
+ 	pub/libvex_trc_values.h \
+@@ -49,6 +50,7 @@ noinst_HEADERS = \
+ 	priv/guest_mips_defs.h \
+ 	priv/mips_defs.h \
+ 	priv/guest_nanomips_defs.h \
++	priv/guest_loongarch64_defs.h \
+ 	priv/host_generic_regs.h \
+ 	priv/host_generic_simd64.h \
+ 	priv/host_generic_simd128.h \
+@@ -64,7 +66,8 @@ noinst_HEADERS = \
+ 	priv/s390_defs.h \
+ 	priv/host_mips_defs.h \
+ 	priv/host_nanomips_defs.h \
+-	priv/common_nanomips_defs.h
++	priv/common_nanomips_defs.h \
++	priv/host_loongarch64_defs.h
+ 
+ BUILT_SOURCES = pub/libvex_guest_offsets.h
+ CLEANFILES    = pub/libvex_guest_offsets.h
+@@ -93,7 +96,8 @@ pub/libvex_guest_offsets.h: auxprogs/genoffsets.c \
+ 			    pub/libvex_guest_arm64.h \
+ 			    pub/libvex_guest_s390x.h \
+ 			    pub/libvex_guest_mips32.h \
+-			    pub/libvex_guest_mips64.h
++			    pub/libvex_guest_mips64.h \
++			    pub/libvex_guest_loongarch64.h
+ 	rm -f auxprogs/genoffsets.s
+ 	$(mkdir_p) auxprogs pub
+ 	$(CC) $(CFLAGS_FOR_GENOFFSETS) \
+@@ -151,6 +155,8 @@ LIBVEX_SOURCES_COMMON = \
+ 	priv/guest_mips_toIR.c \
+ 	priv/guest_nanomips_helpers.c \
+ 	priv/guest_nanomips_toIR.c \
++	priv/guest_loongarch64_helpers.c \
++	priv/guest_loongarch64_toIR.c \
+ 	priv/host_generic_regs.c \
+ 	priv/host_generic_simd64.c \
+ 	priv/host_generic_simd128.c \
+@@ -174,7 +180,9 @@ LIBVEX_SOURCES_COMMON = \
+ 	priv/host_mips_defs.c \
+ 	priv/host_nanomips_defs.c \
+ 	priv/host_mips_isel.c \
+-	priv/host_nanomips_isel.c
++	priv/host_nanomips_isel.c \
++	priv/host_loongarch64_defs.c \
++	priv/host_loongarch64_isel.c
+ 
+ LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c
+ 
+diff --git a/README b/README
+index 842388036..1a88d1d47 100644
+--- a/README
++++ b/README
+@@ -41,6 +41,7 @@ platforms:
+ - MIPS32/Linux
+ - MIPS64/Linux
+ - nanoMIPS/Linux
++- LOONGARCH64/Linux
+ - X86/Solaris
+ - AMD64/Solaris
+ - X86/FreeBSD
+diff --git a/README.loongarch64 b/README.loongarch64
+new file mode 100644
+index 000000000..2f71484bf
+--- /dev/null
++++ b/README.loongarch64
+@@ -0,0 +1,46 @@
++Status
++~~~~~~
++
++A port to LoongArch64 Linux platform.
++
++Some new IROps:
++
++* Iop_ScaleBF64
++* Iop_ScaleBF32
++* Iop_RSqrtF64
++* Iop_RSqrtF32
++* Iop_LogBF64
++* Iop_LogBF32
++* Iop_MaxNumAbsF64
++* Iop_MinNumAbsF64
++* Iop_MaxNumF32
++* Iop_MinNumF32
++* Iop_MaxNumAbsF32
++* Iop_MinNumAbsF32
++
++A new IRMBusEvent:
++
++* Imbe_InsnFence
++
++A new IRJumpKind:
++
++* Ijk_SigSYS
++
++
++Limitations
++~~~~~~~~~~~
++
++* Only support basic integer instructions and floating-point instructions.
++* Only support fallback LLSC implementation.
++
++
++Reading Material
++~~~~~~~~~~~~~~~~
++
++* LoongArch Reference Manual
++  https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
++* LoongArch ELF ABI specification:
++  https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
++* LoongArch Toolchain Conventions:
++  https://loongson.github.io/LoongArch-Documentation/LoongArch-toolchain-conventions-EN.html
++
+diff --git a/README.md b/README.md
+new file mode 100644
+index 000000000..e2f412092
+--- /dev/null
++++ b/README.md
+@@ -0,0 +1,8 @@
++# Valgrind – LOONGARCH64/Linux with vector support
++
++This branch (`loongarch64-linux-vector`) contains Valgrind with support for the LOONGARCH64/Linux platform, specifically focusing on vector support.
++
++The `loongarch64-linux` branch has been tested with Glibc version 2.37.
++It's important to be aware that starting from Glibc version 2.38, vector support is required.
++
++**Note: This branch is under development and is not stable.**
+diff --git a/VEX/auxprogs/genoffsets.c b/VEX/auxprogs/genoffsets.c
+index 54376dc90..89edf524c 100644
+--- a/VEX/auxprogs/genoffsets.c
++++ b/VEX/auxprogs/genoffsets.c
+@@ -53,6 +53,7 @@
+ #include "../pub/libvex_guest_s390x.h"
+ #include "../pub/libvex_guest_mips32.h"
+ #include "../pub/libvex_guest_mips64.h"
++#include "../pub/libvex_guest_loongarch64.h"
+ 
+ #define VG_STRINGIFZ(__str)  #__str
+ #define VG_STRINGIFY(__str)  VG_STRINGIFZ(__str)
+@@ -262,6 +263,41 @@ void foo ( void )
+    GENOFFSET(MIPS64,mips64,PC);
+    GENOFFSET(MIPS64,mips64,HI);
+    GENOFFSET(MIPS64,mips64,LO);
++
++   // LOONGARCH64
++   GENOFFSET(LOONGARCH64,loongarch64,R0);
++   GENOFFSET(LOONGARCH64,loongarch64,R1);
++   GENOFFSET(LOONGARCH64,loongarch64,R2);
++   GENOFFSET(LOONGARCH64,loongarch64,R3);
++   GENOFFSET(LOONGARCH64,loongarch64,R4);
++   GENOFFSET(LOONGARCH64,loongarch64,R5);
++   GENOFFSET(LOONGARCH64,loongarch64,R6);
++   GENOFFSET(LOONGARCH64,loongarch64,R7);
++   GENOFFSET(LOONGARCH64,loongarch64,R8);
++   GENOFFSET(LOONGARCH64,loongarch64,R9);
++   GENOFFSET(LOONGARCH64,loongarch64,R10);
++   GENOFFSET(LOONGARCH64,loongarch64,R11);
++   GENOFFSET(LOONGARCH64,loongarch64,R12);
++   GENOFFSET(LOONGARCH64,loongarch64,R13);
++   GENOFFSET(LOONGARCH64,loongarch64,R14);
++   GENOFFSET(LOONGARCH64,loongarch64,R15);
++   GENOFFSET(LOONGARCH64,loongarch64,R16);
++   GENOFFSET(LOONGARCH64,loongarch64,R17);
++   GENOFFSET(LOONGARCH64,loongarch64,R18);
++   GENOFFSET(LOONGARCH64,loongarch64,R19);
++   GENOFFSET(LOONGARCH64,loongarch64,R20);
++   GENOFFSET(LOONGARCH64,loongarch64,R21);
++   GENOFFSET(LOONGARCH64,loongarch64,R22);
++   GENOFFSET(LOONGARCH64,loongarch64,R23);
++   GENOFFSET(LOONGARCH64,loongarch64,R24);
++   GENOFFSET(LOONGARCH64,loongarch64,R25);
++   GENOFFSET(LOONGARCH64,loongarch64,R26);
++   GENOFFSET(LOONGARCH64,loongarch64,R27);
++   GENOFFSET(LOONGARCH64,loongarch64,R28);
++   GENOFFSET(LOONGARCH64,loongarch64,R29);
++   GENOFFSET(LOONGARCH64,loongarch64,R30);
++   GENOFFSET(LOONGARCH64,loongarch64,R31);
++   GENOFFSET(LOONGARCH64,loongarch64,PC);
+ }
+ 
+ /*--------------------------------------------------------------------*/
+diff --git a/VEX/priv/guest_loongarch64_defs.h b/VEX/priv/guest_loongarch64_defs.h
+new file mode 100644
+index 000000000..867d85981
+--- /dev/null
++++ b/VEX/priv/guest_loongarch64_defs.h
+@@ -0,0 +1,130 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                          guest_loongarch64_defs.h ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++/* Only to be used within the guest-loongarch64 directory. */
++
++#ifndef __VEX_GUEST_LOONGARCH64_DEFS_H
++#define __VEX_GUEST_LOONGARCH64_DEFS_H
++
++#include "libvex_basictypes.h"
++#include "guest_generic_bb_to_IR.h"  /* DisResult */
++
++
++/*---------------------------------------------------------*/
++/*--- loongarch64 to IR conversion                      ---*/
++/*---------------------------------------------------------*/
++
++/* Convert one LOONGARCH64 insn to IR.  See the type DisOneInstrFn in
++   guest_generic_bb_to_IR.h. */
++extern DisResult disInstr_LOONGARCH64 ( IRSB*              irsb_IN,
++                                        const UChar*       guest_code_IN,
++                                        Long               delta,
++                                        Addr               guest_IP,
++                                        VexArch            guest_arch,
++                                        const VexArchInfo* archinfo,
++                                        const VexAbiInfo*  abiinfo,
++                                        VexEndness         host_endness_IN,
++                                        Bool               sigill_diag_IN );
++
++/* Used by the optimiser to specialise calls to helpers. */
++extern IRExpr* guest_loongarch64_spechelper ( const HChar* function_name,
++                                              IRExpr**     args,
++                                              IRStmt**     precedingStmts,
++                                              Int          n_precedingStmts );
++
++/* Describes to the optimser which part of the guest state require
++   precise memory exceptions.  This is logically part of the guest
++   state description. */
++extern Bool guest_loongarch64_state_requires_precise_mem_exns ( Int minoff,
++                                                                Int maxoff,
++                                                                VexRegisterUpdates pxControl );
++
++extern VexGuestLayout loongarch64Guest_layout;
++
++
++/*---------------------------------------------------------*/
++/*--- loongarch64 guest helpers                         ---*/
++/*---------------------------------------------------------*/
++
++enum fpop {
++   FADD_S, FADD_D, FSUB_S, FSUB_D,
++   FMUL_S, FMUL_D, FDIV_S, FDIV_D,
++   FMADD_S, FMADD_D, FMSUB_S, FMSUB_D,
++   FNMADD_S, FNMADD_D, FNMSUB_S, FNMSUB_D,
++   FMAX_S, FMAX_D, FMIN_S, FMIN_D,
++   FMAXA_S, FMAXA_D, FMINA_S, FMINA_D,
++   FABS_S, FABS_D, FNEG_S, FNEG_D,
++   FSQRT_S, FSQRT_D,
++   FRECIP_S, FRECIP_D,
++   FRSQRT_S, FRSQRT_D,
++   FSCALEB_S, FSCALEB_D,
++   FLOGB_S, FLOGB_D,
++   FCMP_CAF_S, FCMP_CAF_D, FCMP_SAF_S, FCMP_SAF_D,
++   FCMP_CLT_S, FCMP_CLT_D, FCMP_SLT_S, FCMP_SLT_D,
++   FCMP_CEQ_S, FCMP_CEQ_D, FCMP_SEQ_S, FCMP_SEQ_D,
++   FCMP_CLE_S, FCMP_CLE_D, FCMP_SLE_S, FCMP_SLE_D,
++   FCMP_CUN_S, FCMP_CUN_D, FCMP_SUN_S, FCMP_SUN_D,
++   FCMP_CULT_S, FCMP_CULT_D, FCMP_SULT_S, FCMP_SULT_D,
++   FCMP_CUEQ_S, FCMP_CUEQ_D, FCMP_SUEQ_S, FCMP_SUEQ_D,
++   FCMP_CULE_S, FCMP_CULE_D, FCMP_SULE_S, FCMP_SULE_D,
++   FCMP_CNE_S, FCMP_CNE_D, FCMP_SNE_S, FCMP_SNE_D,
++   FCMP_COR_S, FCMP_COR_D, FCMP_SOR_S, FCMP_SOR_D,
++   FCMP_CUNE_S, FCMP_CUNE_D, FCMP_SUNE_S, FCMP_SUNE_D,
++   FCVT_S_D, FCVT_D_S,
++   FTINTRM_W_S, FTINTRM_W_D, FTINTRM_L_S, FTINTRM_L_D,
++   FTINTRP_W_S, FTINTRP_W_D, FTINTRP_L_S, FTINTRP_L_D,
++   FTINTRZ_W_S, FTINTRZ_W_D, FTINTRZ_L_S, FTINTRZ_L_D,
++   FTINTRNE_W_S, FTINTRNE_W_D, FTINTRNE_L_S, FTINTRNE_L_D,
++   FTINT_W_S, FTINT_W_D, FTINT_L_S, FTINT_L_D,
++   FFINT_S_W, FFINT_D_W, FFINT_S_L, FFINT_D_L,
++   FRINT_S, FRINT_D
++};
++
++extern ULong loongarch64_calculate_cpucfg    ( ULong src );
++extern ULong loongarch64_calculate_revb_2h   ( ULong src );
++extern ULong loongarch64_calculate_revb_4h   ( ULong src );
++extern ULong loongarch64_calculate_revb_2w   ( ULong src );
++extern ULong loongarch64_calculate_revb_d    ( ULong src );
++extern ULong loongarch64_calculate_revh_2w   ( ULong src );
++extern ULong loongarch64_calculate_revh_d    ( ULong src );
++extern ULong loongarch64_calculate_bitrev_4b ( ULong src );
++extern ULong loongarch64_calculate_bitrev_8b ( ULong src );
++extern ULong loongarch64_calculate_bitrev_w  ( ULong src );
++extern ULong loongarch64_calculate_bitrev_d  ( ULong src );
++extern ULong loongarch64_calculate_crc       ( ULong old, ULong msg, ULong len );
++extern ULong loongarch64_calculate_crcc      ( ULong old, ULong msg, ULong len );
++extern ULong loongarch64_calculate_fclass_s  ( ULong src );
++extern ULong loongarch64_calculate_fclass_d  ( ULong src );
++extern ULong loongarch64_calculate_FCSR      ( enum fpop op, ULong src1,
++                                               ULong src2, ULong src3 );
++
++#endif /* ndef __VEX_GUEST_LOONGARCH64_DEFS_H */
++
++
++/*---------------------------------------------------------------*/
++/*--- end                            guest_loongarch64_defs.h ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/priv/guest_loongarch64_helpers.c b/VEX/priv/guest_loongarch64_helpers.c
+new file mode 100644
+index 000000000..737ef7c1a
+--- /dev/null
++++ b/VEX/priv/guest_loongarch64_helpers.c
+@@ -0,0 +1,874 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                       guest_loongarch64_helpers.c ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#include "libvex_basictypes.h"
++#include "libvex_emnote.h"
++#include "libvex_guest_loongarch64.h"
++#include "libvex_ir.h"
++#include "libvex.h"
++
++#include "main_util.h"
++#include "main_globals.h"
++#include "guest_generic_bb_to_IR.h"
++#include "guest_loongarch64_defs.h"
++
++
++/* This file contains helper functions for loongarch64 guest code.
++   Calls to these functions are generated by the back end. */
++
++IRExpr* guest_loongarch64_spechelper ( const HChar * function_name,
++                                       IRExpr ** args,
++                                       IRStmt ** precedingStmts,
++                                       Int n_precedingStmts )
++{
++   return NULL;
++}
++
++/* VISIBLE TO LIBVEX CLIENT */
++void LibVEX_GuestLOONGARCH64_initialise ( /*OUT*/
++                                          VexGuestLOONGARCH64State* vex_state )
++{
++   Int i;
++
++   /* Event check fail addr and counter. */
++   vex_state->host_EvC_FAILADDR = 0;
++   vex_state->host_EvC_COUNTER  = 0;
++
++   /* CPU Registers */
++   vex_state->guest_R0   = 0; /* Constant zero */
++   vex_state->guest_R1   = 0; /* Return address */
++   vex_state->guest_R2   = 0; /* Thread pointer */
++   vex_state->guest_R3   = 0; /* Stack pointer */
++   vex_state->guest_R4   = 0; /* Argument registers / Return value */
++   vex_state->guest_R5   = 0;
++   vex_state->guest_R6   = 0; /* Argument registers */
++   vex_state->guest_R7   = 0;
++   vex_state->guest_R8   = 0;
++   vex_state->guest_R9   = 0;
++   vex_state->guest_R10  = 0;
++   vex_state->guest_R11  = 0;
++   vex_state->guest_R12  = 0; /* Temporary registers */
++   vex_state->guest_R13  = 0;
++   vex_state->guest_R14  = 0;
++   vex_state->guest_R15  = 0;
++   vex_state->guest_R16  = 0;
++   vex_state->guest_R17  = 0;
++   vex_state->guest_R18  = 0;
++   vex_state->guest_R19  = 0;
++   vex_state->guest_R20  = 0;
++   vex_state->guest_R21  = 0; /* Reserved */
++   vex_state->guest_R22  = 0; /* Frame pointer / Static register */
++   vex_state->guest_R23  = 0; /* Static registers */
++   vex_state->guest_R24  = 0;
++   vex_state->guest_R25  = 0;
++   vex_state->guest_R26  = 0;
++   vex_state->guest_R27  = 0;
++   vex_state->guest_R28  = 0;
++   vex_state->guest_R29  = 0;
++   vex_state->guest_R30  = 0;
++   vex_state->guest_R31  = 0;
++
++   vex_state->guest_PC   = 0; /* Program counter */
++
++   /* FPU/SIMD Registers */
++   for (i = 0; i < 8; i++) {
++      vex_state->guest_X0[i]   = 0xff;
++      vex_state->guest_X1[i]   = 0xff;
++      vex_state->guest_X2[i]   = 0xff;
++      vex_state->guest_X3[i]   = 0xff;
++      vex_state->guest_X4[i]   = 0xff;
++      vex_state->guest_X5[i]   = 0xff;
++      vex_state->guest_X6[i]   = 0xff;
++      vex_state->guest_X7[i]   = 0xff;
++      vex_state->guest_X8[i]   = 0xff;
++      vex_state->guest_X9[i]   = 0xff;
++      vex_state->guest_X10[i]  = 0xff;
++      vex_state->guest_X11[i]  = 0xff;
++      vex_state->guest_X12[i]  = 0xff;
++      vex_state->guest_X13[i]  = 0xff;
++      vex_state->guest_X14[i]  = 0xff;
++      vex_state->guest_X15[i]  = 0xff;
++      vex_state->guest_X16[i]  = 0xff;
++      vex_state->guest_X17[i]  = 0xff;
++      vex_state->guest_X18[i]  = 0xff;
++      vex_state->guest_X19[i]  = 0xff;
++      vex_state->guest_X20[i]  = 0xff;
++      vex_state->guest_X21[i]  = 0xff;
++      vex_state->guest_X22[i]  = 0xff;
++      vex_state->guest_X23[i]  = 0xff;
++      vex_state->guest_X24[i]  = 0xff;
++      vex_state->guest_X25[i]  = 0xff;
++      vex_state->guest_X26[i]  = 0xff;
++      vex_state->guest_X27[i]  = 0xff;
++      vex_state->guest_X28[i]  = 0xff;
++      vex_state->guest_X29[i]  = 0xff;
++      vex_state->guest_X30[i]  = 0xff;
++      vex_state->guest_X31[i]  = 0xff;
++   }
++
++   vex_state->guest_FCC0 = 0; /* Condition Flag Registers */
++   vex_state->guest_FCC1 = 0;
++   vex_state->guest_FCC2 = 0;
++   vex_state->guest_FCC3 = 0;
++   vex_state->guest_FCC4 = 0;
++   vex_state->guest_FCC5 = 0;
++   vex_state->guest_FCC6 = 0;
++   vex_state->guest_FCC7 = 0;
++   vex_state->guest_FCSR = 0; /* FP Control and Status Register */
++
++   /* Various pseudo-regs mandated by Vex or Valgrind. */
++   /* Emulation notes */
++   vex_state->guest_EMNOTE = 0;
++
++   /* For clflush: record start and length of area to invalidate */
++   vex_state->guest_CMSTART = 0;
++   vex_state->guest_CMLEN   = 0;
++
++   /* Used to record the unredirected guest address at the start of
++      a translation whose start has been redirected.  By reading
++      this pseudo-register shortly afterwards, the translation can
++      find out what the corresponding no-redirection address was.
++      Note, this is only set for wrap-style redirects, not for
++      replace-style ones. */
++   vex_state->guest_NRADDR = 0;
++}
++
++
++/*-----------------------------------------------------------*/
++/*--- Describing the loongarch64 guest state, for the     ---*/
++/*--- benefit of iropt and instrumenters                  ---*/
++/*-----------------------------------------------------------*/
++
++/* Figure out if any part of the guest state contained in minoff
++   .. maxoff requires precise memory exceptions.  If in doubt return
++   True (but this generates significantly slower code).
++
++   We enforce precise exns for guest SP, PC and FP.
++
++   Only SP is needed in mode VexRegUpdSpAtMemAccess.
++*/
++
++Bool guest_loongarch64_state_requires_precise_mem_exns ( Int minoff,
++                                                         Int maxoff,
++                                                         VexRegisterUpdates pxControl )
++{
++   Int sp_min = offsetof(VexGuestLOONGARCH64State, guest_R3);
++   Int sp_max = sp_min + 8 - 1;
++   if ( maxoff < sp_min || minoff > sp_max ) {
++      /* no overlap with sp */
++      if (pxControl == VexRegUpdSpAtMemAccess)
++         return False;  /* We only need to check stack pointer. */
++   } else {
++      return True;
++   }
++
++   Int pc_min = offsetof(VexGuestLOONGARCH64State, guest_PC);
++   Int pc_max = pc_min + 8 - 1;
++   if ( maxoff < pc_min || minoff > pc_max ) {
++      /* no overlap with pc */
++   } else {
++      return True;
++   }
++
++   Int fp_min = offsetof(VexGuestLOONGARCH64State, guest_R22);
++   Int fp_max = fp_min + 8 - 1;
++   if ( maxoff < fp_min || minoff > fp_max ) {
++      /* no overlap with fp */
++   } else {
++      return True;
++   }
++
++   return False;
++}
++
++#define ALWAYSDEFD64(field)                            \
++   { offsetof(VexGuestLOONGARCH64State, field),        \
++      (sizeof ((VexGuestLOONGARCH64State*)0)->field) }
++
++VexGuestLayout loongarch64Guest_layout = {
++   /* Total size of the guest state, in bytes. */
++   .total_sizeB = sizeof(VexGuestLOONGARCH64State),
++   /* Describe the stack pointer. */
++   .offset_SP = offsetof(VexGuestLOONGARCH64State, guest_R3),
++   .sizeof_SP = 8,
++   /* Describe the frame pointer. */
++   .offset_FP = offsetof(VexGuestLOONGARCH64State, guest_R22),
++   .sizeof_FP = 8,
++   /* Describe the instruction pointer. */
++   .offset_IP = offsetof(VexGuestLOONGARCH64State, guest_PC),
++   .sizeof_IP = 8,
++   /* Describe any sections to be regarded by Memcheck as
++      'always-defined'. */
++   .n_alwaysDefd = 6,
++   /* ? :(  */
++   .alwaysDefd = {
++                  /* 0 */ ALWAYSDEFD64(guest_R0),
++                  /* 1 */ ALWAYSDEFD64(guest_PC),
++                  /* 2 */ ALWAYSDEFD64(guest_EMNOTE),
++                  /* 3 */ ALWAYSDEFD64(guest_CMSTART),
++                  /* 4 */ ALWAYSDEFD64(guest_CMLEN),
++                  /* 5 */ ALWAYSDEFD64(guest_NRADDR),
++                  }
++};
++
++
++/*-----------------------------------------------------------*/
++/*--- loongarch64 guest helpers                           ---*/
++/*-----------------------------------------------------------*/
++
++/* Claim to be the following CPU, which is probably representative of
++   the earliest loongarch64 offerings.
++
++   CPU Family          : Loongson-64bit
++   Model Name          : Loongson-3A5000LL
++   CPU Revision        : 0x10
++   FPU Revision        : 0x00
++   CPU MHz             : 2300.00
++   BogoMIPS            : 4600.00
++   TLB Entries         : 2112
++   Address Sizes       : 48 bits physical, 48 bits virtual
++   ISA                 : loongarch32 loongarch64
++   Features            : cpucfg lam ual fpu lsx lasx complex crypto lvz
++   Hardware Watchpoint : yes, iwatch count: 8, dwatch count: 8
++*/
++ULong loongarch64_calculate_cpucfg ( ULong src )
++{
++   ULong res;
++   switch (src) {
++      case 0x0:
++         res = 0x0014c010;
++         break;
++      case 0x1:
++         res = 0x03f2f2fe;
++         break;
++      case 0x2:
++         res = 0x007ccfc7;
++         break;
++      case 0x3:
++         res = 0x0000fcff;
++         break;
++      case 0x4:
++         res = 0x05f5e100;
++         break;
++      case 0x5:
++         res = 0x00010001;
++         break;
++      case 0x6:
++         res = 0x00007f33;
++         break;
++      case 0x10:
++         res = 0x00002c3d;
++         break;
++      case 0x11:
++         res = 0x06080003;
++         break;
++      case 0x12:
++         res = 0x06080003;
++         break;
++      case 0x13:
++         res = 0x0608000f;
++         break;
++      case 0x14:
++         res = 0x060e000f;
++         break;
++      default:
++         res = 0x00000000;
++         break;
++   }
++   return (ULong)(Long)(Int)res;
++}
++
++static void swap_UChar ( UChar* a, UChar* b )
++{
++   UChar t = *a;
++   *a = *b;
++   *b = t;
++}
++
++ULong loongarch64_calculate_revb_2h ( ULong src )
++{
++   UChar* s = (UChar*)&src;
++   swap_UChar(&s[0], &s[1]);
++   swap_UChar(&s[2], &s[3]);
++   return (ULong)(Long)(Int)src;
++}
++
++ULong loongarch64_calculate_revb_4h ( ULong src )
++{
++   UChar* s = (UChar*)&src;
++   swap_UChar(&s[0], &s[1]);
++   swap_UChar(&s[2], &s[3]);
++   swap_UChar(&s[4], &s[5]);
++   swap_UChar(&s[6], &s[7]);
++   return src;
++}
++
++ULong loongarch64_calculate_revb_2w ( ULong src )
++{
++   UChar* s = (UChar*)&src;
++   swap_UChar(&s[0], &s[3]);
++   swap_UChar(&s[1], &s[2]);
++   swap_UChar(&s[4], &s[7]);
++   swap_UChar(&s[5], &s[6]);
++   return src;
++}
++
++ULong loongarch64_calculate_revb_d ( ULong src )
++{
++   UChar* s = (UChar*)&src;
++   swap_UChar(&s[0], &s[7]);
++   swap_UChar(&s[1], &s[6]);
++   swap_UChar(&s[2], &s[5]);
++   swap_UChar(&s[3], &s[4]);
++   return src;
++}
++
++static void swap_UShort ( UShort* a, UShort* b )
++{
++   UShort t = *a;
++   *a = *b;
++   *b = t;
++}
++
++ULong loongarch64_calculate_revh_2w ( ULong src )
++{
++   UShort* s = (UShort*)&src;
++   swap_UShort(&s[0], &s[1]);
++   swap_UShort(&s[2], &s[3]);
++   return src;
++}
++
++ULong loongarch64_calculate_revh_d ( ULong src )
++{
++   UShort* s = (UShort*)&src;
++   swap_UShort(&s[0], &s[3]);
++   swap_UShort(&s[1], &s[2]);
++   return src;
++}
++
++static ULong bitrev ( ULong src, ULong start, ULong end )
++{
++   int i, j;
++   ULong res = 0;
++   for (i = start, j = 1; i < end; i++, j++)
++      res |= ((src >> i) & 1) << (end - j);
++   return res;
++}
++
++ULong loongarch64_calculate_bitrev_4b ( ULong src )
++{
++   ULong res = bitrev(src, 0, 8);
++   res |= bitrev(src, 8, 16);
++   res |= bitrev(src, 16, 24);
++   res |= bitrev(src, 24, 32);
++   return (ULong)(Long)(Int)res;
++}
++
++ULong loongarch64_calculate_bitrev_8b ( ULong src )
++{
++   ULong res = bitrev(src, 0, 8);
++   res |= bitrev(src, 8, 16);
++   res |= bitrev(src, 16, 24);
++   res |= bitrev(src, 24, 32);
++   res |= bitrev(src, 32, 40);
++   res |= bitrev(src, 40, 48);
++   res |= bitrev(src, 48, 56);
++   res |= bitrev(src, 56, 64);
++   return res;
++}
++
++ULong loongarch64_calculate_bitrev_w ( ULong src )
++{
++   ULong res = bitrev(src, 0, 32);
++   return (ULong)(Long)(Int)res;
++}
++
++ULong loongarch64_calculate_bitrev_d ( ULong src )
++{
++   return bitrev(src, 0, 64);
++}
++
++static ULong crc32 ( ULong old, ULong msg, ULong width, ULong poly )
++{
++   int i;
++   ULong new;
++   if (width == 8)
++      msg &= 0xff;
++   else if (width == 16)
++      msg &= 0xffff;
++   else if (width == 32)
++      msg &= 0xffffffff;
++   new = (old & 0xffffffff) ^ msg;
++   for (i = 0; i < width; i++) {
++      if (new & 1)
++         new = (new >> 1) ^ poly;
++      else
++         new >>= 1;
++   }
++   return new;
++}
++
++ULong loongarch64_calculate_crc ( ULong old, ULong msg, ULong len )
++{
++   ULong res = crc32(old, msg, len, 0xedb88320);
++   return (ULong)(Long)(Int)res;
++}
++
++ULong loongarch64_calculate_crcc ( ULong old, ULong msg, ULong len )
++{
++   ULong res = crc32(old, msg, len, 0x82f63b78);
++   return (ULong)(Long)(Int)res;
++}
++
++ULong loongarch64_calculate_fclass_s ( ULong src )
++{
++   UInt f = src;
++   Bool sign = toBool(f >> 31);
++   if ((f & 0x7fffffff) == 0x7f800000) {
++      return sign ? 1 << 2 : 1 << 6;
++   } else if ((f & 0x7fffffff) == 0) {
++      return sign ? 1 << 5 : 1 << 9;
++   } else if ((f & 0x7f800000) == 0) {
++      return sign ? 1 << 4 : 1 << 8;
++   } else if ((f & ~(1 << 31)) > 0x7f800000) {
++      return ((UInt)(f << 1) >= 0xff800000) ? 1 << 1 : 1 << 0;
++   } else {
++      return sign ? 1 << 3 : 1 << 7;
++   }
++}
++
++ULong loongarch64_calculate_fclass_d ( ULong src )
++{
++   ULong f = src;
++   Bool sign = toBool(f >> 63);
++   if ((f & 0x7fffffffffffffffULL) == 0x7ff0000000000000ULL) {
++      return sign ? 1 << 2 : 1 << 6;
++   } else if ((f & 0x7fffffffffffffffULL) == 0) {
++      return sign ? 1 << 5 : 1 << 9;
++   } else if ((f & 0x7ff0000000000000ULL) == 0) {
++      return sign ? 1 << 4 : 1 << 8;
++   } else if ((f & ~(1ULL << 63)) > 0x7ff0000000000000ULL) {
++      return ((f << 1) >= 0xfff0000000000000ULL) ? 1 << 1 : 1 << 0;
++   } else {
++      return sign ? 1 << 3 : 1 << 7;
++   }
++}
++
++#if defined(__loongarch__)
++#define ASM_VOLATILE_UNARY(inst)                         \
++   __asm__ volatile("movfcsr2gr $s0, $r0         \n\t"   \
++                    "movgr2fcsr $r2, $zero       \n\t"   \
++                    #inst"      $f24, %1         \n\t"   \
++                    "movfcsr2gr %0, $r2          \n\t"   \
++                    "movgr2fcsr $r0, $s0         \n\t"   \
++                    : "=r" (fcsr2)                       \
++                    : "f" (src1)                         \
++                    : "$s0", "$f24"                      \
++                   )
++
++#define ASM_VOLATILE_BINARY(inst)                        \
++   __asm__ volatile("movfcsr2gr $s0, $r0         \n\t"   \
++                    "movgr2fcsr $r2, $zero       \n\t"   \
++                    #inst"      $f24, %1, %2     \n\t"   \
++                    "movfcsr2gr %0, $r2          \n\t"   \
++                    "movgr2fcsr $r0, $s0         \n\t"   \
++                    : "=r" (fcsr2)                       \
++                    : "f" (src1), "f" (src2)             \
++                    : "$s0", "$f24"                      \
++                   )
++
++#define ASM_VOLATILE_TRINARY(inst)                       \
++   __asm__ volatile("movfcsr2gr $s0, $r0         \n\t"   \
++                    "movgr2fcsr $r2, $zero       \n\t"   \
++                    #inst"      $f24, %1, %2, %3 \n\t"   \
++                    "movfcsr2gr %0, $r2          \n\t"   \
++                    "movgr2fcsr $r0, $s0         \n\t"   \
++                    : "=r" (fcsr2)                       \
++                    : "f" (src1), "f" (src2), "f" (src3) \
++                    : "$s0", "$f24"                      \
++                   )
++
++#define ASM_VOLATILE_FCMP(inst)                          \
++   __asm__ volatile("movfcsr2gr $s0, $r0         \n\t"   \
++                    "movgr2fcsr $r2, $zero       \n\t"   \
++                    #inst"      $fcc0, %1, %2    \n\t"   \
++                    "movfcsr2gr %0, $r0          \n\t"   \
++                    "movgr2fcsr $r0, $s0         \n\t"   \
++                    : "=r" (fcsr2)                       \
++                    : "f" (src1), "f" (src2)             \
++                    : "$s0", "$fcc0"                     \
++                   )
++#endif
++
++/* Calculate FCSR and return whether an exception needs to be thrown */
++ULong loongarch64_calculate_FCSR ( enum fpop op, ULong src1,
++                                   ULong src2, ULong src3 )
++{
++   UInt fcsr2 = 0;
++#if defined(__loongarch__)
++   switch (op) {
++      case FADD_S:
++         ASM_VOLATILE_BINARY(fadd.s);
++         break;
++      case FADD_D:
++         ASM_VOLATILE_BINARY(fadd.d);
++         break;
++      case FSUB_S:
++         ASM_VOLATILE_BINARY(fsub.s);
++         break;
++      case FSUB_D:
++         ASM_VOLATILE_BINARY(fsub.d);
++         break;
++      case FMUL_S:
++         ASM_VOLATILE_BINARY(fmul.s);
++         break;
++      case FMUL_D:
++         ASM_VOLATILE_BINARY(fmul.d);
++         break;
++      case FDIV_S:
++         ASM_VOLATILE_BINARY(fdiv.s);
++         break;
++      case FDIV_D:
++         ASM_VOLATILE_BINARY(fdiv.d);
++         break;
++      case FMADD_S:
++         ASM_VOLATILE_TRINARY(fmadd.s);
++         break;
++      case FMADD_D:
++         ASM_VOLATILE_TRINARY(fmadd.d);
++         break;
++      case FMSUB_S:
++         ASM_VOLATILE_TRINARY(fmsub.s);
++         break;
++      case FMSUB_D:
++         ASM_VOLATILE_TRINARY(fmsub.d);
++         break;
++      case FNMADD_S:
++         ASM_VOLATILE_TRINARY(fnmadd.s);
++         break;
++      case FNMADD_D:
++         ASM_VOLATILE_TRINARY(fnmadd.d);
++         break;
++      case FNMSUB_S:
++         ASM_VOLATILE_TRINARY(fnmsub.s);
++         break;
++      case FNMSUB_D:
++         ASM_VOLATILE_TRINARY(fnmsub.s);
++         break;
++      case FMAX_S:
++         ASM_VOLATILE_BINARY(fmax.s);
++         break;
++      case FMAX_D:
++         ASM_VOLATILE_BINARY(fmax.d);
++         break;
++      case FMIN_S:
++         ASM_VOLATILE_BINARY(fmin.s);
++         break;
++      case FMIN_D:
++         ASM_VOLATILE_BINARY(fmin.d);
++         break;
++      case FMAXA_S:
++         ASM_VOLATILE_BINARY(fmaxa.s);
++         break;
++      case FMAXA_D:
++         ASM_VOLATILE_BINARY(fmaxa.d);
++         break;
++      case FMINA_S:
++         ASM_VOLATILE_BINARY(fmina.s);
++         break;
++      case FMINA_D:
++         ASM_VOLATILE_BINARY(fmina.s);
++         break;
++      case FABS_S:
++         ASM_VOLATILE_UNARY(fabs.s);
++         break;
++      case FABS_D:
++         ASM_VOLATILE_UNARY(fabs.d);
++         break;
++      case FNEG_S:
++         ASM_VOLATILE_UNARY(fneg.s);
++         break;
++      case FNEG_D:
++         ASM_VOLATILE_UNARY(fneg.d);
++         break;
++      case FSQRT_S:
++         ASM_VOLATILE_UNARY(fsqrt.s);
++         break;
++      case FSQRT_D:
++         ASM_VOLATILE_UNARY(fsqrt.d);
++         break;
++      case FRECIP_S:
++         ASM_VOLATILE_UNARY(frecip.s);
++         break;
++      case FRECIP_D:
++         ASM_VOLATILE_UNARY(frecip.d);
++         break;
++      case FRSQRT_S:
++         ASM_VOLATILE_UNARY(frsqrt.s);
++         break;
++      case FRSQRT_D:
++         ASM_VOLATILE_UNARY(frsqrt.d);
++         break;
++      case FSCALEB_S:
++         ASM_VOLATILE_BINARY(fscaleb.s);
++         break;
++      case FSCALEB_D:
++         ASM_VOLATILE_BINARY(fscaleb.d);
++         break;
++      case FLOGB_S:
++         ASM_VOLATILE_UNARY(flogb.s);
++         break;
++      case FLOGB_D:
++         ASM_VOLATILE_UNARY(flogb.d);
++         break;
++      case FCMP_CAF_S:
++         ASM_VOLATILE_FCMP(fcmp.caf.s);
++         break;
++      case FCMP_CAF_D:
++         ASM_VOLATILE_FCMP(fcmp.caf.d);
++         break;
++      case FCMP_SAF_S:
++         ASM_VOLATILE_FCMP(fcmp.saf.s);
++         break;
++      case FCMP_SAF_D:
++         ASM_VOLATILE_FCMP(fcmp.saf.d);
++         break;
++      case FCMP_CLT_S:
++         ASM_VOLATILE_FCMP(fcmp.clt.s);
++         break;
++      case FCMP_CLT_D:
++         ASM_VOLATILE_FCMP(fcmp.clt.d);
++         break;
++      case FCMP_SLT_S:
++         ASM_VOLATILE_FCMP(fcmp.slt.s);
++         break;
++      case FCMP_SLT_D:
++         ASM_VOLATILE_FCMP(fcmp.slt.d);
++         break;
++      case FCMP_CEQ_S:
++         ASM_VOLATILE_FCMP(fcmp.ceq.s);
++         break;
++      case FCMP_CEQ_D:
++         ASM_VOLATILE_FCMP(fcmp.ceq.d);
++         break;
++      case FCMP_SEQ_S:
++         ASM_VOLATILE_FCMP(fcmp.seq.s);
++         break;
++      case FCMP_SEQ_D:
++         ASM_VOLATILE_FCMP(fcmp.seq.d);
++         break;
++      case FCMP_CLE_S:
++         ASM_VOLATILE_FCMP(fcmp.cle.s);
++         break;
++      case FCMP_CLE_D:
++         ASM_VOLATILE_FCMP(fcmp.cle.d);
++         break;
++      case FCMP_SLE_S:
++         ASM_VOLATILE_FCMP(fcmp.sle.s);
++         break;
++      case FCMP_SLE_D:
++         ASM_VOLATILE_FCMP(fcmp.sle.d);
++         break;
++      case FCMP_CUN_S:
++         ASM_VOLATILE_FCMP(fcmp.cun.s);
++         break;
++      case FCMP_CUN_D:
++         ASM_VOLATILE_FCMP(fcmp.cun.d);
++         break;
++      case FCMP_SUN_S:
++         ASM_VOLATILE_FCMP(fcmp.sun.s);
++         break;
++      case FCMP_SUN_D:
++         ASM_VOLATILE_FCMP(fcmp.sun.d);
++         break;
++      case FCMP_CULT_S:
++         ASM_VOLATILE_FCMP(fcmp.cult.s);
++         break;
++      case FCMP_CULT_D:
++         ASM_VOLATILE_FCMP(fcmp.cult.d);
++         break;
++      case FCMP_SULT_S:
++         ASM_VOLATILE_FCMP(fcmp.sult.s);
++         break;
++      case FCMP_SULT_D:
++         ASM_VOLATILE_FCMP(fcmp.sult.d);
++         break;
++      case FCMP_CUEQ_S:
++         ASM_VOLATILE_FCMP(fcmp.cueq.s);
++         break;
++      case FCMP_CUEQ_D:
++         ASM_VOLATILE_FCMP(fcmp.cueq.d);
++         break;
++      case FCMP_SUEQ_S:
++         ASM_VOLATILE_FCMP(fcmp.sueq.s);
++         break;
++      case FCMP_SUEQ_D:
++         ASM_VOLATILE_FCMP(fcmp.sueq.d);
++         break;
++      case FCMP_CULE_S:
++         ASM_VOLATILE_FCMP(fcmp.cule.s);
++         break;
++      case FCMP_CULE_D:
++         ASM_VOLATILE_FCMP(fcmp.cule.d);
++         break;
++      case FCMP_SULE_S:
++         ASM_VOLATILE_FCMP(fcmp.sule.s);
++         break;
++      case FCMP_SULE_D:
++         ASM_VOLATILE_FCMP(fcmp.sule.d);
++         break;
++      case FCMP_CNE_S:
++         ASM_VOLATILE_FCMP(fcmp.cne.s);
++         break;
++      case FCMP_CNE_D:
++         ASM_VOLATILE_FCMP(fcmp.cne.d);
++         break;
++      case FCMP_SNE_S:
++         ASM_VOLATILE_FCMP(fcmp.sne.s);
++         break;
++      case FCMP_SNE_D:
++         ASM_VOLATILE_FCMP(fcmp.sne.d);
++         break;
++      case FCMP_COR_S:
++         ASM_VOLATILE_FCMP(fcmp.cor.s);
++         break;
++      case FCMP_COR_D:
++         ASM_VOLATILE_FCMP(fcmp.cor.d);
++         break;
++      case FCMP_SOR_S:
++         ASM_VOLATILE_FCMP(fcmp.sor.s);
++         break;
++      case FCMP_SOR_D:
++         ASM_VOLATILE_FCMP(fcmp.sor.d);
++         break;
++      case FCMP_CUNE_S:
++         ASM_VOLATILE_FCMP(fcmp.cune.s);
++         break;
++      case FCMP_CUNE_D:
++         ASM_VOLATILE_FCMP(fcmp.cune.d);
++         break;
++      case FCMP_SUNE_S:
++         ASM_VOLATILE_FCMP(fcmp.sune.s);
++         break;
++      case FCMP_SUNE_D:
++         ASM_VOLATILE_FCMP(fcmp.sune.d);
++         break;
++      case FCVT_S_D:
++         ASM_VOLATILE_UNARY(fcvt.s.d);
++         break;
++      case FCVT_D_S:
++         ASM_VOLATILE_UNARY(fcvt.d.s);
++         break;
++      case FTINTRM_W_S:
++         ASM_VOLATILE_UNARY(ftintrm.w.s);
++         break;
++      case FTINTRM_W_D:
++         ASM_VOLATILE_UNARY(ftintrm.w.d);
++         break;
++      case FTINTRM_L_S:
++         ASM_VOLATILE_UNARY(ftintrm.l.s);
++         break;
++      case FTINTRM_L_D:
++         ASM_VOLATILE_UNARY(ftintrm.l.d);
++         break;
++      case FTINTRP_W_S:
++         ASM_VOLATILE_UNARY(ftintrp.w.s);
++         break;
++      case FTINTRP_W_D:
++         ASM_VOLATILE_UNARY(ftintrp.w.d);
++         break;
++      case FTINTRP_L_S:
++         ASM_VOLATILE_UNARY(ftintrp.l.s);
++         break;
++      case FTINTRP_L_D:
++         ASM_VOLATILE_UNARY(ftintrp.l.d);
++         break;
++      case FTINTRZ_W_S:
++         ASM_VOLATILE_UNARY(ftintrz.w.s);
++         break;
++      case FTINTRZ_W_D:
++         ASM_VOLATILE_UNARY(ftintrz.w.d);
++         break;
++      case FTINTRZ_L_S:
++         ASM_VOLATILE_UNARY(ftintrz.l.s);
++         break;
++      case FTINTRZ_L_D:
++         ASM_VOLATILE_UNARY(ftintrz.l.d);
++         break;
++      case FTINTRNE_W_S:
++         ASM_VOLATILE_UNARY(ftintrne.w.s);
++         break;
++      case FTINTRNE_W_D:
++         ASM_VOLATILE_UNARY(ftintrne.w.d);
++         break;
++      case FTINTRNE_L_S:
++         ASM_VOLATILE_UNARY(ftintrne.l.s);
++         break;
++      case FTINTRNE_L_D:
++         ASM_VOLATILE_UNARY(ftintrne.l.d);
++         break;
++      case FTINT_W_S:
++         ASM_VOLATILE_UNARY(ftint.w.s);
++         break;
++      case FTINT_W_D:
++         ASM_VOLATILE_UNARY(ftint.w.d);
++         break;
++      case FTINT_L_S:
++         ASM_VOLATILE_UNARY(ftint.l.s);
++         break;
++      case FTINT_L_D:
++         ASM_VOLATILE_UNARY(ftint.l.d);
++         break;
++      case FFINT_S_W:
++         ASM_VOLATILE_UNARY(ffint.s.w);
++         break;
++      case FFINT_D_W:
++         ASM_VOLATILE_UNARY(ffint.d.w);
++         break;
++      case FFINT_S_L:
++         ASM_VOLATILE_UNARY(ffint.s.l);
++         break;
++      case FFINT_D_L:
++         ASM_VOLATILE_UNARY(ffint.d.l);
++         break;
++      case FRINT_S:
++         ASM_VOLATILE_UNARY(frint.s);
++         break;
++      case FRINT_D:
++         ASM_VOLATILE_UNARY(frint.d);
++         break;
++      default:
++         break;
++   }
++#endif
++   return (ULong)fcsr2;
++}
++
++
++/*---------------------------------------------------------------*/
++/*--- end                         guest_loongarch64_helpers.c ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/priv/guest_loongarch64_toIR.c b/VEX/priv/guest_loongarch64_toIR.c
+new file mode 100644
+index 000000000..e7c344f3f
+--- /dev/null
++++ b/VEX/priv/guest_loongarch64_toIR.c
+@@ -0,0 +1,9753 @@
++
++/*--------------------------------------------------------------------*/
++/*--- begin                               guest_loongarch64_toIR.c ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++/* "Special" instructions.
++
++   This instruction decoder can decode four special instructions
++   which mean nothing natively (are no-ops as far as regs/mem are
++   concerned) but have meaning for supporting Valgrind.  A special
++   instruction is flagged by a 16-byte preamble:
++
++      00450c00  (srli.d $zero, $zero, 3
++      00453400   srli.d $zero, $zero, 13
++      00457400   srli.d $zero, $zero, 29
++      00454c00   srli.d $zero, $zero, 19)
++
++   Following that, one of the following 3 are allowed
++   (standard interpretation in parentheses):
++
++      001535ad  (or $t1, $t1, $t1)  $a7 = client_request ( $t0 )
++      001539ce  (or $t2, $t2, $t2)  $a7 = guest_NRADDR
++      00153def  (or $t3, $t3, $t3)  call-noredir $t8
++      00154210  (or $t4, $t4, $t4)  IR injection
++
++   Any other bytes following the 16-byte preamble are illegal and
++   constitute a failure in instruction decoding.  This all assumes
++   that the preamble will never occur except in specific code
++   fragments designed for Valgrind to catch.
++*/
++
++/* Translates LOONGARCH64 code to IR. */
++
++#include "libvex_basictypes.h"
++#include "libvex_ir.h"
++#include "libvex.h"
++#include "libvex_guest_loongarch64.h"
++
++#include "main_util.h"
++#include "main_globals.h"
++#include "guest_generic_bb_to_IR.h"
++#include "guest_loongarch64_defs.h"
++
++
++/*------------------------------------------------------------*/
++/*--- Globals                                              ---*/
++/*------------------------------------------------------------*/
++
++/* These are set at the start of the translation of a instruction, so
++   that we don't have to pass them around endlessly.  CONST means does
++   not change during translation of the instruction. */
++
++/* CONST: what is the host's endianness?  We need to know this in
++   order to do sub-register accesses to the SIMD/FP registers
++   correctly. */
++static VexEndness host_endness;
++
++/* CONST: The guest address for the instruction currently being
++   translated.  */
++static Addr64 guest_PC_curr_instr;
++
++/* MOD: The IRSB* into which we're generating code. */
++static IRSB* irsb;
++
++
++/*------------------------------------------------------------*/
++/*--- Debugging output                                     ---*/
++/*------------------------------------------------------------*/
++
++#define DIP(format, args...)           \
++   if (vex_traceflags & VEX_TRACE_FE)  \
++      vex_printf(format, ## args)
++
++static const HChar* nameIReg( UInt reg )
++{
++   vassert(reg < 32);
++   static const HChar* reg_names[32] = {
++      "$zero",
++      "$ra",
++      "$tp",
++      "$sp",
++      "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7",
++      "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",
++      "$r21", /* Reserved */
++      "$fp",
++      "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", "$s8"
++   };
++   return reg_names[reg];
++}
++
++static const HChar* nameFReg( UInt reg )
++{
++   vassert(reg < 32);
++   static const HChar* reg_names[32] = {
++      "$fa0",  "$fa1",  "$fa2",  "$fa3",  "$fa4",  "$fa5",  "$fa6",  "$fa7",
++      "$ft0",  "$ft1",  "$ft2",  "$ft3",  "$ft4",  "$ft5",  "$ft6",  "$ft7",
++      "$ft8",  "$ft9",  "$ft10", "$ft11", "$ft12", "$ft13", "$ft14", "$ft15",
++      "$fs0",  "$fs1",  "$fs2",  "$fs3",  "$fs4",  "$fs5",  "$fs6",  "$fs7"
++   };
++   return reg_names[reg];
++}
++
++static const HChar* nameVReg( UInt reg )
++{
++   vassert(reg < 32);
++   static const HChar* reg_names[32] = {
++      "$v0",  "$v1",  "$v2",  "$v3",  "$v4",  "$v5",  "$v6",  "$v7",
++      "$v8",  "$v9",  "$v10", "$v11", "$v12", "$v13", "$v14", "$v15",
++      "$v16", "$v17", "$v18", "$v19", "$v20", "$v21", "$v22", "$v23",
++      "$v24", "$v25", "$v26", "$v27", "$v28", "$v29", "$v30", "$v31"
++   };
++   return reg_names[reg];
++}
++
++static const HChar* nameXReg( UInt reg )
++{
++   vassert(reg < 32);
++   static const HChar* reg_names[32] = {
++      "$x0",  "$x1",  "$x2",  "$x3",  "$x4",  "$x5",  "$x6",  "$x7",
++      "$x8",  "$x9",  "$x10", "$x11", "$x12", "$x13", "$x14", "$x15",
++      "$x16", "$x17", "$x18", "$x19", "$x20", "$x21", "$x22", "$x23",
++      "$x24", "$x25", "$x26", "$x27", "$x28", "$x29", "$x30", "$x31"
++   };
++   return reg_names[reg];
++}
++
++static const HChar* nameFCC( UInt reg )
++{
++   vassert(reg < 8);
++   static const HChar* reg_names[8] = {
++      "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7"
++   };
++   return reg_names[reg];
++}
++
++static const HChar* nameFCSR( UInt reg )
++{
++   vassert(reg < 4);
++   static const HChar* reg_names[4] = {
++      "$fcsr0", "$fcsr1", "$fcsr2", "$fcsr3"
++   };
++   return reg_names[reg];
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helper bits and pieces for deconstructing the        ---*/
++/*--- loongarch64 insn stream.                             ---*/
++/*------------------------------------------------------------*/
++
++/* Get insn[max:min] */
++#define SLICE(insn, max, min) \
++   ((((UInt)(insn)) >> (min)) & (UInt)((1ULL << ((max) - (min) + 1)) - 1ULL))
++
++/* Do a little-endian load of a 32-bit word, regardless of the
++   endianness of the underlying host. */
++static inline UInt getUInt ( const UChar* p )
++{
++   UInt w = 0;
++   w = (w << 8) | p[3];
++   w = (w << 8) | p[2];
++   w = (w << 8) | p[1];
++   w = (w << 8) | p[0];
++   return w;
++}
++
++/* Sign extend to 32-bit */
++static inline UInt extend32 ( UInt imm, UInt size )
++{
++   UInt shift = 32 - size;
++   return (UInt)(((Int)imm << shift) >> shift);
++}
++
++/* Sign extend to 64-bit */
++static inline ULong extend64 ( ULong imm, UInt size )
++{
++   UInt shift = 64 - size;
++   return (ULong)(((Long)imm << shift) >> shift);
++}
++
++static inline UInt get_rd ( UInt insn )
++{
++   return SLICE(insn, 4, 0);
++}
++
++static inline UInt get_rj ( UInt insn )
++{
++   return SLICE(insn, 9, 5);
++}
++
++static inline UInt get_rk ( UInt insn )
++{
++   return SLICE(insn, 14, 10);
++}
++
++static inline UInt get_code ( UInt insn )
++{
++   return SLICE(insn, 14, 0);
++}
++
++static inline UInt get_ui5 ( UInt insn )
++{
++   return SLICE(insn, 14, 10);
++}
++
++static inline UInt get_ui6 ( UInt insn )
++{
++   return SLICE(insn, 15, 10);
++}
++
++static inline UInt get_sa2 ( UInt insn )
++{
++   return SLICE(insn, 16, 15);
++}
++
++static inline UInt get_sa3 ( UInt insn )
++{
++   return SLICE(insn, 17, 15);
++}
++
++static inline UInt get_lsbw ( UInt insn )
++{
++   return SLICE(insn, 14, 10);
++}
++
++static inline UInt get_msbw ( UInt insn )
++{
++   return SLICE(insn, 20, 16);
++}
++
++static inline UInt get_lsbd ( UInt insn )
++{
++   return SLICE(insn, 15, 10);
++}
++
++static inline UInt get_msbd ( UInt insn )
++{
++   return SLICE(insn, 21, 16);
++}
++
++static inline UInt get_si12 ( UInt insn )
++{
++   return SLICE(insn, 21, 10);
++}
++
++static inline UInt get_ui12 ( UInt insn )
++{
++   return SLICE(insn, 21, 10);
++}
++
++static inline UInt get_si14 ( UInt insn )
++{
++   return SLICE(insn, 23, 10);
++}
++
++static inline UInt get_si16 ( UInt insn )
++{
++   return SLICE(insn, 25, 10);
++}
++
++static inline UInt get_si20 ( UInt insn )
++{
++   return SLICE(insn, 24, 5);
++}
++
++static inline UInt get_hint5 ( UInt insn )
++{
++   return SLICE(insn, 4, 0);
++}
++
++static inline UInt get_hint15 ( UInt insn )
++{
++   return SLICE(insn, 14, 0);
++}
++
++static inline UInt get_offs16 ( UInt insn )
++{
++   return SLICE(insn, 25, 10);
++}
++
++static inline UInt get_offs21 ( UInt insn )
++{
++   return (SLICE(insn, 4, 0) << 16) | SLICE(insn, 25, 10);
++}
++
++static inline UInt get_offs26 ( UInt insn )
++{
++   return (SLICE(insn, 9, 0) << 16) | SLICE(insn, 25, 10);
++}
++
++static inline UInt get_fd ( UInt insn )
++{
++   return SLICE(insn, 4, 0);
++}
++
++static inline UInt get_fj ( UInt insn )
++{
++   return SLICE(insn, 9, 5);
++}
++
++static inline UInt get_fk ( UInt insn )
++{
++   return SLICE(insn, 14, 10);
++}
++
++static inline UInt get_fa ( UInt insn )
++{
++   return SLICE(insn, 19, 15);
++}
++
++static inline UInt get_cond ( UInt insn )
++{
++   return SLICE(insn, 19, 15);
++}
++
++static inline UInt get_fcsrl ( UInt insn )
++{
++   return SLICE(insn, 4, 0);
++}
++
++static inline UInt get_fcsrh ( UInt insn )
++{
++   return SLICE(insn, 9, 5);
++}
++
++static inline UInt get_cd ( UInt insn )
++{
++   return SLICE(insn, 2, 0);
++}
++
++static inline UInt get_cj ( UInt insn )
++{
++   return SLICE(insn, 7, 5);
++}
++
++static inline UInt get_ca ( UInt insn )
++{
++   return SLICE(insn, 17, 15);
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helper bits and pieces for creating IR fragments.    ---*/
++/*------------------------------------------------------------*/
++
++static inline IRExpr* mkU64 ( ULong i )
++{
++   return IRExpr_Const(IRConst_U64(i));
++}
++
++static inline IRExpr* mkU32 ( UInt i )
++{
++   return IRExpr_Const(IRConst_U32(i));
++}
++
++static inline IRExpr* mkU8 ( UInt i )
++{
++   vassert(i < 256);
++   return IRExpr_Const(IRConst_U8((UChar)i));
++}
++
++static inline IRExpr* mkU1 ( UInt i )
++{
++   vassert(i == 0 || i == 1);
++   return IRExpr_Const(IRConst_U1((Bool)i));
++}
++
++static inline IRExpr* mkF64i ( ULong i )
++{
++   return IRExpr_Const(IRConst_F64i(i));
++}
++
++static inline IRExpr* mkF32i ( UInt i )
++{
++   return IRExpr_Const(IRConst_F32i(i));
++}
++
++static inline IRExpr* mkexpr ( IRTemp tmp )
++{
++   return IRExpr_RdTmp(tmp);
++}
++
++static inline IRExpr* unop ( IROp op, IRExpr* a )
++{
++   return IRExpr_Unop(op, a);
++}
++
++static inline IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
++{
++   return IRExpr_Binop(op, a1, a2);
++}
++
++static inline IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
++{
++   return IRExpr_Triop(op, a1, a2, a3);
++}
++
++static inline IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2,
++                            IRExpr* a3, IRExpr* a4 )
++{
++   return IRExpr_Qop(op, a1, a2, a3, a4);
++}
++
++static inline IRExpr* load ( IRType ty, IRExpr* addr )
++{
++   return IRExpr_Load(Iend_LE, ty, addr);
++}
++
++/* Add a statement to the list held by "irbb". */
++static inline void stmt ( IRStmt* st )
++{
++   addStmtToIRSB(irsb, st);
++}
++
++static inline void store ( IRExpr* addr, IRExpr* data )
++{
++   stmt(IRStmt_Store(Iend_LE, addr, data));
++}
++
++static inline void assign ( IRTemp dst, IRExpr* e )
++{
++   stmt(IRStmt_WrTmp(dst, e));
++}
++
++static inline void exit ( IRExpr* e, IRJumpKind jk, ULong offs )
++{
++   stmt(IRStmt_Exit(e, jk, IRConst_U64(guest_PC_curr_instr + offs),
++                    offsetof(VexGuestLOONGARCH64State, guest_PC)));
++}
++
++/* Generate an expression to check if addr is aligned. */
++static inline IRExpr* check_align ( IRExpr* addr, IRExpr* align )
++{
++   return binop(Iop_CmpNE64, binop(Iop_And64, addr, align),
++                IRExpr_Get(offsetof(VexGuestLOONGARCH64State, guest_R0),
++                           Ity_I64));
++}
++
++/* Generate a SIGSYS if the expression evaluates to true. */
++static inline void gen_SIGSYS ( IRExpr* cond )
++{
++   exit(cond, Ijk_SigSYS, 4);
++}
++
++/* Generate a SIGBUS if the expression evaluates to true. */
++static inline void gen_SIGBUS ( IRExpr* cond )
++{
++   exit(cond, Ijk_SigBUS, 4);
++}
++
++static inline void cas ( IRTemp old, IRExpr* addr, IRExpr* expd, IRExpr* new )
++{
++   IRCAS* c = mkIRCAS(IRTemp_INVALID, old, Iend_LE, addr,
++                      NULL, expd, NULL, new);
++   stmt(IRStmt_CAS(c));
++}
++
++/* Generate a new temporary of the given type. */
++static inline IRTemp newTemp ( IRType ty )
++{
++   vassert(isPlausibleIRType(ty));
++   return newIRTemp(irsb->tyenv, ty);
++}
++
++/* S-extend 8/16/32 bit int expr to 64. */
++static IRExpr* extendS ( IRType ty, IRExpr* e )
++{
++   switch (ty) {
++      case Ity_I1:  return unop(Iop_1Sto64, e);
++      case Ity_I8:  return unop(Iop_8Sto64, e);
++      case Ity_I16: return unop(Iop_16Sto64, e);
++      case Ity_I32: return unop(Iop_32Sto64, e);
++      default: vassert(0);
++   }
++}
++
++/* Z-extend 8/16/32 bit int expr to 64. */
++static IRExpr* extendU ( IRType ty, IRExpr* e )
++{
++   switch (ty) {
++      case Ity_I1:  return unop(Iop_1Uto64, e);
++      case Ity_I8:  return unop(Iop_8Uto64, e);
++      case Ity_I16: return unop(Iop_16Uto64, e);
++      case Ity_I32: return unop(Iop_32Uto64, e);
++      default: vassert(0);
++   }
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for accessing guest registers.               ---*/
++/*------------------------------------------------------------*/
++
++/* ---------------- Integer registers ---------------- */
++
++static Int offsetIReg ( UInt iregNo )
++{
++   switch (iregNo) {
++      case 0:  return offsetof(VexGuestLOONGARCH64State, guest_R0);
++      case 1:  return offsetof(VexGuestLOONGARCH64State, guest_R1);
++      case 2:  return offsetof(VexGuestLOONGARCH64State, guest_R2);
++      case 3:  return offsetof(VexGuestLOONGARCH64State, guest_R3);
++      case 4:  return offsetof(VexGuestLOONGARCH64State, guest_R4);
++      case 5:  return offsetof(VexGuestLOONGARCH64State, guest_R5);
++      case 6:  return offsetof(VexGuestLOONGARCH64State, guest_R6);
++      case 7:  return offsetof(VexGuestLOONGARCH64State, guest_R7);
++      case 8:  return offsetof(VexGuestLOONGARCH64State, guest_R8);
++      case 9:  return offsetof(VexGuestLOONGARCH64State, guest_R9);
++      case 10: return offsetof(VexGuestLOONGARCH64State, guest_R10);
++      case 11: return offsetof(VexGuestLOONGARCH64State, guest_R11);
++      case 12: return offsetof(VexGuestLOONGARCH64State, guest_R12);
++      case 13: return offsetof(VexGuestLOONGARCH64State, guest_R13);
++      case 14: return offsetof(VexGuestLOONGARCH64State, guest_R14);
++      case 15: return offsetof(VexGuestLOONGARCH64State, guest_R15);
++      case 16: return offsetof(VexGuestLOONGARCH64State, guest_R16);
++      case 17: return offsetof(VexGuestLOONGARCH64State, guest_R17);
++      case 18: return offsetof(VexGuestLOONGARCH64State, guest_R18);
++      case 19: return offsetof(VexGuestLOONGARCH64State, guest_R19);
++      case 20: return offsetof(VexGuestLOONGARCH64State, guest_R20);
++      case 21: return offsetof(VexGuestLOONGARCH64State, guest_R21);
++      case 22: return offsetof(VexGuestLOONGARCH64State, guest_R22);
++      case 23: return offsetof(VexGuestLOONGARCH64State, guest_R23);
++      case 24: return offsetof(VexGuestLOONGARCH64State, guest_R24);
++      case 25: return offsetof(VexGuestLOONGARCH64State, guest_R25);
++      case 26: return offsetof(VexGuestLOONGARCH64State, guest_R26);
++      case 27: return offsetof(VexGuestLOONGARCH64State, guest_R27);
++      case 28: return offsetof(VexGuestLOONGARCH64State, guest_R28);
++      case 29: return offsetof(VexGuestLOONGARCH64State, guest_R29);
++      case 30: return offsetof(VexGuestLOONGARCH64State, guest_R30);
++      case 31: return offsetof(VexGuestLOONGARCH64State, guest_R31);
++      default: vassert(0);
++   }
++}
++
++static IRExpr* getIReg8 ( UInt iregNo )
++{
++   return IRExpr_Get(offsetIReg(iregNo), Ity_I8);
++}
++
++static IRExpr* getIReg16 ( UInt iregNo )
++{
++   return IRExpr_Get(offsetIReg(iregNo), Ity_I16);
++}
++
++static IRExpr* getIReg32 ( UInt iregNo )
++{
++   return IRExpr_Get(offsetIReg(iregNo), Ity_I32);
++}
++
++static IRExpr* getIReg64 ( UInt iregNo )
++{
++   return IRExpr_Get(offsetIReg(iregNo), Ity_I64);
++}
++
++static void putIReg ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
++   if (iregNo != 0) /* $r0 - constant zero */
++      stmt(IRStmt_Put(offsetIReg(iregNo), e));
++}
++
++static void putPC ( IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
++   stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_PC), e));
++}
++
++/* ---------------- Floating point / vector registers ---------------- */
++
++static Int offsetXReg ( UInt iregNo )
++{
++   switch (iregNo) {
++      case 0:  return offsetof(VexGuestLOONGARCH64State, guest_X0);
++      case 1:  return offsetof(VexGuestLOONGARCH64State, guest_X1);
++      case 2:  return offsetof(VexGuestLOONGARCH64State, guest_X2);
++      case 3:  return offsetof(VexGuestLOONGARCH64State, guest_X3);
++      case 4:  return offsetof(VexGuestLOONGARCH64State, guest_X4);
++      case 5:  return offsetof(VexGuestLOONGARCH64State, guest_X5);
++      case 6:  return offsetof(VexGuestLOONGARCH64State, guest_X6);
++      case 7:  return offsetof(VexGuestLOONGARCH64State, guest_X7);
++      case 8:  return offsetof(VexGuestLOONGARCH64State, guest_X8);
++      case 9:  return offsetof(VexGuestLOONGARCH64State, guest_X9);
++      case 10: return offsetof(VexGuestLOONGARCH64State, guest_X10);
++      case 11: return offsetof(VexGuestLOONGARCH64State, guest_X11);
++      case 12: return offsetof(VexGuestLOONGARCH64State, guest_X12);
++      case 13: return offsetof(VexGuestLOONGARCH64State, guest_X13);
++      case 14: return offsetof(VexGuestLOONGARCH64State, guest_X14);
++      case 15: return offsetof(VexGuestLOONGARCH64State, guest_X15);
++      case 16: return offsetof(VexGuestLOONGARCH64State, guest_X16);
++      case 17: return offsetof(VexGuestLOONGARCH64State, guest_X17);
++      case 18: return offsetof(VexGuestLOONGARCH64State, guest_X18);
++      case 19: return offsetof(VexGuestLOONGARCH64State, guest_X19);
++      case 20: return offsetof(VexGuestLOONGARCH64State, guest_X20);
++      case 21: return offsetof(VexGuestLOONGARCH64State, guest_X21);
++      case 22: return offsetof(VexGuestLOONGARCH64State, guest_X22);
++      case 23: return offsetof(VexGuestLOONGARCH64State, guest_X23);
++      case 24: return offsetof(VexGuestLOONGARCH64State, guest_X24);
++      case 25: return offsetof(VexGuestLOONGARCH64State, guest_X25);
++      case 26: return offsetof(VexGuestLOONGARCH64State, guest_X26);
++      case 27: return offsetof(VexGuestLOONGARCH64State, guest_X27);
++      case 28: return offsetof(VexGuestLOONGARCH64State, guest_X28);
++      case 29: return offsetof(VexGuestLOONGARCH64State, guest_X29);
++      case 30: return offsetof(VexGuestLOONGARCH64State, guest_X30);
++      case 31: return offsetof(VexGuestLOONGARCH64State, guest_X31);
++      default: vassert(0);
++   }
++}
++
++static Int offsetFCC ( UInt iregNo )
++{
++   switch (iregNo) {
++      case 0:  return offsetof(VexGuestLOONGARCH64State, guest_FCC0);
++      case 1:  return offsetof(VexGuestLOONGARCH64State, guest_FCC1);
++      case 2:  return offsetof(VexGuestLOONGARCH64State, guest_FCC2);
++      case 3:  return offsetof(VexGuestLOONGARCH64State, guest_FCC3);
++      case 4:  return offsetof(VexGuestLOONGARCH64State, guest_FCC4);
++      case 5:  return offsetof(VexGuestLOONGARCH64State, guest_FCC5);
++      case 6:  return offsetof(VexGuestLOONGARCH64State, guest_FCC6);
++      case 7:  return offsetof(VexGuestLOONGARCH64State, guest_FCC7);
++      default: vassert(0);
++   }
++}
++
++/* Find the offset of the laneNo'th lane of type laneTy in the given
++   Xreg.  Since the host is little-endian, the least significant lane
++   has the lowest offset. */
++static Int offsetXRegLane ( UInt xregNo, IRType laneTy, UInt laneNo )
++{
++   vassert(host_endness == VexEndnessLE);
++   Int laneSzB;
++   /* Since the host is little-endian, the least significant lane
++      will be at the lowest address. */
++   switch (laneTy) {
++      case Ity_F32:  laneSzB = 4;  break;
++      case Ity_F64:  laneSzB = 8;  break;
++      case Ity_V128: laneSzB = 16; break;
++      case Ity_V256: laneSzB = 32; break;
++      default:       vassert(0);   break;
++   }
++   return offsetXReg(xregNo) + laneNo * laneSzB;
++}
++
++static IRExpr* getXReg ( UInt xregNo )
++{
++   return IRExpr_Get(offsetXRegLane(xregNo, Ity_V256, 0), Ity_V256);
++}
++
++static IRExpr* getVReg ( UInt vregNo )
++{
++   return IRExpr_Get(offsetXRegLane(vregNo, Ity_V128, 0), Ity_V128);
++}
++
++static IRExpr* getFReg64 ( UInt fregNo )
++{
++   return IRExpr_Get(offsetXRegLane(fregNo, Ity_F64, 0), Ity_F64);
++}
++
++static IRExpr* getFReg32 ( UInt fregNo )
++{
++   /* Get FReg32 from FReg64.
++      We could probably use IRExpr_Get(offsetXRegLane(fregNo, Ity_F32, 0), Ity_F32),
++      but that would cause Memcheck to report some errors.
++    */
++   IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fregNo));
++   return unop(Iop_ReinterpI32asF32, unop(Iop_64to32, i));
++}
++
++static IRExpr* getFCC ( UInt iregNo )
++{
++   return IRExpr_Get(offsetFCC(iregNo), Ity_I8);
++}
++
++static IRExpr* getFCSR ( UInt iregNo )
++{
++   /*
++      bits  | name
++      ---------------
++      4:0   | Enables
++      7:5   | 0
++      9:8   | RM
++      15:10 | 0
++      20:16 | Flags
++      23:21 | 0
++      28:24 | Cause
++      31:29 | 0
++    */
++   Int offs = offsetof(VexGuestLOONGARCH64State, guest_FCSR);
++   IRExpr* fcsr0 = IRExpr_Get(offs, Ity_I32);
++   switch (iregNo) {
++      case 0:
++         return fcsr0;
++      case 1:
++         /* FCSR1 is Enables of FCSR0.  It seems that the hardware
++            implementation is that the 7th bit belongs to FCSR1. */
++         return binop(Iop_And32, fcsr0, mkU32(0x0000009f));
++      case 2:
++         /* FCSR2 is Cause and Flags of FCSR0. */
++         return binop(Iop_And32, fcsr0, mkU32(0x1f1f0000));
++      case 3:
++         /* FCSR3 is RM of FCSR0. */
++         return binop(Iop_And32, fcsr0, mkU32(0x00000300));
++      default:
++         vassert(0);
++   }
++}
++
++static void putFReg32 ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
++   stmt(IRStmt_Put(offsetXReg(iregNo), e));
++}
++
++static void putFReg64 ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
++   stmt(IRStmt_Put(offsetXReg(iregNo), e));
++}
++
++static void putVReg ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
++   stmt(IRStmt_Put(offsetXReg(iregNo), e));
++}
++
++static void putXReg ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V256);
++   stmt(IRStmt_Put(offsetXReg(iregNo), e));
++}
++
++static void putFCC ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
++   stmt(IRStmt_Put(offsetFCC(iregNo), e));
++}
++
++static void putFCSR ( UInt iregNo, IRExpr* e )
++{
++   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
++   IRExpr* fcsr0 = getFCSR(0);
++   IRExpr* and1;
++   IRExpr* and2;
++   switch (iregNo) {
++      case 0:
++         /* It seems that the hardware implementation allows the 6th
++            bit and the 7th bit to be non-zero. */
++         and1 = getIReg32(0);
++         and2 = binop(Iop_And32, e, mkU32(0x1f1f03df));
++         break;
++      case 1:
++         /* FCSR1 is Enables of FCSR0.  It seems that the hardware
++            implementation is that the 7th bit belongs to FCSR1. */
++         and1 = binop(Iop_And32, fcsr0, mkU32(0xffffff60));
++         and2 = binop(Iop_And32, e, mkU32(0x0000009f));
++         break;
++      case 2:
++         /* FCSR2 is Cause and Flags of FCSR0. */
++         and1 = binop(Iop_And32, fcsr0, mkU32(0xe0e0ffff));
++         and2 = binop(Iop_And32, e, mkU32(0x1f1f0000));
++         break;
++      case 3:
++         /* FCSR3 is RM of FCSR0. */
++         and1 = binop(Iop_And32, fcsr0, mkU32(0xfffffcff));
++         and2 = binop(Iop_And32, e, mkU32(0x00000300));
++         break;
++      default:
++         vassert(0);
++   }
++   Int offs = offsetof(VexGuestLOONGARCH64State, guest_FCSR);
++   stmt(IRStmt_Put(offs, binop(Iop_Or32, and1, and2)));
++}
++
++static IRExpr* get_rounding_mode ( void )
++{
++   /*
++      rounding mode | LOONGARCH | IR
++      ------------------------------
++      to nearest    | 00        | 00
++      to zero       | 01        | 11
++      to +infinity  | 10        | 10
++      to -infinity  | 11        | 01
++   */
++
++   /* Bits 8 to 9 in FCSR are rounding mode. */
++   IRExpr* fcsr = getFCSR(0);
++   IRExpr* shr = binop(Iop_Shr32, fcsr, mkU8(8));
++   IRTemp rm = newTemp(Ity_I32);
++   assign(rm, binop(Iop_And32, shr, mkU32(0x3)));
++
++   /* rm = XOR(rm, (rm << 1) & 2) */
++   IRExpr* shl = binop(Iop_Shl32, mkexpr(rm), mkU8(1));
++   IRExpr* and = binop(Iop_And32, shl, mkU32(2));
++   return binop(Iop_Xor32, mkexpr(rm), and);
++}
++
++static void calculateFCSR ( enum fpop op, UInt nargs,
++                            UInt src1, UInt src2, UInt src3 )
++{
++   IRExpr* s1 = NULL;
++   IRExpr* s2 = NULL;
++   IRExpr* s3 = NULL;
++   switch (nargs) {
++      case 3: s3 = unop(Iop_ReinterpF64asI64, getFReg64(src3)); /* fallthrough */
++      case 2: s2 = unop(Iop_ReinterpF64asI64, getFReg64(src2)); /* fallthrough */
++      case 1: s1 = unop(Iop_ReinterpF64asI64, getFReg64(src1)); break;
++      default: vassert(0);
++   }
++   IRExpr** arg = mkIRExprVec_4(mkU64(op), s1, s2, s3);
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_FCSR",
++                                &loongarch64_calculate_FCSR,
++                                arg);
++   IRTemp fcsr2 = newTemp(Ity_I32);
++   assign(fcsr2, unop(Iop_64to32, call));
++   putFCSR(2, mkexpr(fcsr2));
++}
++
++static IRExpr* gen_round_to_nearest ( void )
++{
++   return mkU32(0x0);
++}
++
++static IRExpr* gen_round_down ( void )
++{
++   return mkU32(0x1);
++}
++
++static IRExpr* gen_round_up ( void )
++{
++   return mkU32(0x2);
++}
++
++static IRExpr* gen_round_to_zero ( void )
++{
++   return mkU32(0x3);
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point arithmetic insns             ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_add_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("add.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* add = binop(Iop_Add32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, add));
++
++   return True;
++}
++
++static Bool gen_add_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("add.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Add64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_sub_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sub.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* sub = binop(Iop_Sub32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, sub));
++
++   return True;
++}
++
++static Bool gen_sub_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sub.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Sub64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_slt ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("slt %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, extendU(Ity_I1, cond));
++
++   return True;
++}
++
++static Bool gen_sltu ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sltu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, extendU(Ity_I1, cond));
++
++   return True;
++}
++
++static Bool gen_slti ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt rj   = get_rj(insn);
++   UInt rd   = get_rd(insn);
++
++   DIP("slti %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                           (Int)extend32(si12, 12));
++
++   IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj),
++                        mkU64(extend64(si12, 12)));
++   putIReg(rd, extendU(Ity_I1, cond));
++
++   return True;
++}
++
++static Bool gen_sltui ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt rj   = get_rj(insn);
++   UInt rd   = get_rd(insn);
++
++   DIP("sltui %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj),
++                        mkU64(extend64(si12, 12)));
++   putIReg(rd, extendU(Ity_I1, cond));
++
++   return True;
++}
++
++static Bool gen_nor ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("nor %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* or = binop(Iop_Or64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_Not64, or));
++
++   return True;
++}
++
++static Bool gen_and ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("and %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_And64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_or ( DisResult* dres, UInt insn,
++                     const VexArchInfo* archinfo,
++                     const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("or %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Or64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_xor ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("xor %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Xor64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_orn ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("orn %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* not = unop(Iop_Not64, getIReg64(rk));
++   putIReg(rd, binop(Iop_Or64, getIReg64(rj), not));
++
++   return True;
++}
++
++static Bool gen_andn ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("andn %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* not = unop(Iop_Not64, getIReg64(rk));
++   putIReg(rd, binop(Iop_And64, getIReg64(rj), not));
++
++   return True;
++}
++
++static Bool gen_mul_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mul.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullS32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64to32, mul)));
++
++   return True;
++}
++
++static Bool gen_mulh_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulh.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullS32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mul)));
++
++   return True;
++}
++
++static Bool gen_mulh_wu ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulh.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullU32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mul)));
++
++   return True;
++}
++
++static Bool gen_mul_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mul.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullS64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_128to64, mul));
++
++   return True;
++}
++
++static Bool gen_mulh_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulh.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullS64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_128HIto64, mul));
++
++   return True;
++}
++
++static Bool gen_mulh_du ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulh.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mul = binop(Iop_MullU64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_128HIto64, mul));
++
++   return True;
++}
++
++static Bool gen_mulw_d_w ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulw.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_MullS32, getIReg32(rj), getIReg32(rk)));
++
++   return True;
++}
++
++static Bool gen_mulw_d_wu ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mulw.d.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_MullU32, getIReg32(rj), getIReg32(rk)));
++
++   return True;
++}
++
++static Bool gen_div_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("div.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* div = binop(Iop_DivS32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, div));
++
++   return True;
++}
++
++static Bool gen_mod_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mod.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mod = binop(Iop_DivModS32to32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mod)));
++
++   return True;
++}
++
++static Bool gen_div_wu ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("div.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* div = binop(Iop_DivU32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, div));
++
++   return True;
++}
++
++static Bool gen_mod_wu ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mod.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mod = binop(Iop_DivModU32to32, getIReg32(rj), getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mod)));
++
++   return True;
++}
++
++static Bool gen_div_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("div.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_DivS64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_mod_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mod.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mod = binop(Iop_DivModS64to64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_128HIto64, mod));
++
++   return True;
++}
++
++static Bool gen_div_du ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("div.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_DivU64, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_mod_du ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("mod.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* mod = binop(Iop_DivModU64to64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, unop(Iop_128HIto64, mod));
++
++   return True;
++}
++
++static Bool gen_alsl_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt sa2 = get_sa2(insn);
++   UInt  rk = get_rk(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("alsl.w %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj),
++                                  nameIReg(rk), sa2);
++
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(sa2 + 1));
++   IRExpr* add = binop(Iop_Add32, shl, getIReg32(rk));
++   putIReg(rd, extendS(Ity_I32, add));
++
++   return True;
++}
++
++static Bool gen_alsl_wu ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt sa2 = get_sa2(insn);
++   UInt  rk = get_rk(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("alsl.wu %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj),
++                                   nameIReg(rk), sa2);
++
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(sa2 + 1));
++   IRExpr* add = binop(Iop_Add32, shl, getIReg32(rk));
++   putIReg(rd, extendU(Ity_I32, add));
++
++   return True;
++}
++
++static Bool gen_alsl_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt sa2 = get_sa2(insn);
++   UInt  rk = get_rk(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("alsl.d %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj),
++                                  nameIReg(rk), sa2);
++
++   IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(sa2 + 1));
++   putIReg(rd, binop(Iop_Add64, shl, getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_lu12i_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("lu12i.w %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   IRExpr* imm = mkU32(si20 << 12);
++   putIReg(rd, extendS(Ity_I32, imm));
++
++   return True;
++}
++
++static Bool gen_lu32i_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("lu32i.d %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   IRExpr* imm = mkU64((ULong)extend32(si20, 20) << 32);
++   IRExpr* shl = binop(Iop_Shl64, getIReg64(rd), mkU8(32));
++   IRExpr* shr = binop(Iop_Shr64, shl, mkU8(32));
++   putIReg(rd, binop(Iop_Or64, imm, shr));
++
++   return True;
++}
++
++static Bool gen_lu52i_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("lu52i.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                               (Int)extend32(si12, 12));
++
++   IRExpr* imm = mkU64((ULong)si12 << 52);
++   IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(12));
++   IRExpr* shr = binop(Iop_Shr64, shl, mkU8(12));
++   putIReg(rd, binop(Iop_Or64, imm, shr));
++
++   return True;
++}
++
++static Bool gen_pcaddi ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("pcaddi %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   putIReg(rd, mkU64(guest_PC_curr_instr + extend64(si20 << 2, 22)));
++
++   return True;
++}
++
++static Bool gen_pcalau12i ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("pcalau12i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   IRExpr* imm = mkU64(guest_PC_curr_instr + extend64(si20 << 12, 32));
++   IRExpr* shr = binop(Iop_Shr64, imm, mkU8(12));
++   putIReg(rd, binop(Iop_Shl64, shr, mkU8(12)));
++
++   return True;
++}
++
++static Bool gen_pcaddu12i ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("pcaddu12i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   putIReg(rd, mkU64(guest_PC_curr_instr + extend64(si20 << 12, 32)));
++
++   return True;
++}
++
++static Bool gen_pcaddu18i ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt si20 = get_si20(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("pcaddu18i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20));
++
++   putIReg(rd, mkU64(guest_PC_curr_instr + extend64((ULong)si20 << 18, 38)));
++
++   return True;
++}
++
++static Bool gen_addi_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("addi.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si12, 12));
++
++   IRExpr* imm = mkU32(extend32(si12, 12));
++   IRExpr* add = binop(Iop_Add32, getIReg32(rj), imm);
++   putIReg(rd, extendS(Ity_I32, add));
++
++   return True;
++}
++
++static Bool gen_addi_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("addi.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si12, 12));
++
++   IRExpr* imm = mkU64(extend64(si12, 12));
++   putIReg(rd, binop(Iop_Add64, getIReg64(rj), imm));
++
++   return True;
++}
++
++static Bool gen_addu16i_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt si16 = get_si16(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("addu16i.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                                 (Int)extend32(si16, 16));
++
++   IRExpr* imm = mkU64(extend64(si16 << 16, 32));
++   putIReg(rd, binop(Iop_Add64, getIReg64(rj), imm));
++
++   return True;
++}
++
++static Bool gen_andi ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt ui12 = get_ui12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("andi %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12);
++
++   IRExpr* imm = mkU64((ULong)ui12);
++   putIReg(rd, binop(Iop_And64, getIReg64(rj), imm));
++
++   return True;
++}
++
++static Bool gen_ori ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt ui12 = get_ui12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ori %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12);
++
++   IRExpr* imm = mkU64((ULong)ui12);
++   putIReg(rd, binop(Iop_Or64, getIReg64(rj), imm));
++
++   return True;
++}
++
++static Bool gen_xori ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt ui12 = get_ui12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("xori %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12);
++
++   IRExpr* imm = mkU64((ULong)ui12);
++   putIReg(rd, binop(Iop_Xor64, getIReg64(rj), imm));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point shift insns                  ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_sll_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sll.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), getIReg8(rk));
++   putIReg(rd, extendS(Ity_I32, shl));
++
++   return True;
++}
++
++static Bool gen_srl_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("srl.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), getIReg8(rk));
++   putIReg(rd, extendS(Ity_I32, shr));
++
++   return True;
++}
++
++static Bool gen_sra_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sra.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* sar = binop(Iop_Sar32, getIReg32(rj), getIReg8(rk));
++   putIReg(rd, extendS(Ity_I32, sar));
++
++   return True;
++}
++
++static Bool gen_sll_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sll.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Shl64, getIReg64(rj), getIReg8(rk)));
++
++   return True;
++}
++
++static Bool gen_srl_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("srl.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Shr64, getIReg64(rj), getIReg8(rk)));
++
++   return True;
++}
++
++static Bool gen_sra_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("sra.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   putIReg(rd, binop(Iop_Sar64, getIReg64(rj), getIReg8(rk)));
++
++   return True;
++}
++
++static Bool gen_rotr_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("rotr.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp tmp1 = newTemp(Ity_I32);
++   assign(tmp1, getIReg32(rj));
++   IRTemp tmp2 = newTemp(Ity_I8);
++   assign(tmp2, getIReg8(rk));
++   IRExpr* shr = binop(Iop_Shr32, mkexpr(tmp1), mkexpr(tmp2));
++   IRExpr* imm = unop(Iop_8Uto32, mkexpr(tmp2));
++   IRExpr* sub = binop(Iop_Sub32, mkU32(32), imm);
++   IRExpr* imm2 = unop(Iop_32to8, sub);
++   IRExpr* shl = binop(Iop_Shl32, mkexpr(tmp1), imm2);
++   IRExpr* or = binop(Iop_Or32, shr, shl);
++   putIReg(rd, extendS(Ity_I32, or));
++
++   return True;
++}
++
++static Bool gen_rotr_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("rotr.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp tmp1 = newTemp(Ity_I64);
++   assign(tmp1, getIReg64(rj));
++   IRTemp tmp2 = newTemp(Ity_I8);
++   assign(tmp2, getIReg8(rk));
++   IRExpr* shr = binop(Iop_Shr64, mkexpr(tmp1), mkexpr(tmp2));
++   IRExpr* imm = unop(Iop_8Uto64, mkexpr(tmp2));
++   IRExpr* sub = binop(Iop_Sub64, mkU64(64), imm);
++   IRExpr* imm2 = unop(Iop_64to8, sub);
++   IRExpr* shl = binop(Iop_Shl64, mkexpr(tmp1), imm2);
++   putIReg(rd, binop(Iop_Or64, shr, shl));
++
++   return True;
++}
++
++static Bool gen_slli_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui5 = get_ui5(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("slli.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5);
++
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(ui5));
++   putIReg(rd, extendS(Ity_I32, shl));
++
++   return True;
++}
++
++static Bool gen_slli_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui6 = get_ui6(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("slli.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6);
++
++   putIReg(rd, binop(Iop_Shl64, getIReg64(rj), mkU8(ui6)));
++
++   return True;
++}
++
++static Bool gen_srli_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui5 = get_ui5(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("srli.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5);
++
++   IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), mkU8(ui5));
++   putIReg(rd, extendS(Ity_I32, shr));
++
++   return True;
++}
++
++static Bool gen_srli_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui6 = get_ui6(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("srli.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6);
++
++   putIReg(rd, binop(Iop_Shr64, getIReg64(rj), mkU8(ui6)));
++
++   return True;
++}
++
++static Bool gen_srai_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui5 = get_ui5(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("srai.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5);
++
++   IRExpr* sar = binop(Iop_Sar32, getIReg32(rj), mkU8(ui5));
++   putIReg(rd, extendS(Ity_I32, sar));
++
++   return True;
++}
++
++static Bool gen_srai_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt ui6 = get_ui6(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("srai.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6);
++
++   putIReg(rd, binop(Iop_Sar64, getIReg64(rj), mkU8(ui6)));
++
++   return True;
++}
++
++static Bool gen_rotri_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt ui5 = get_ui5(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("rotri.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5);
++
++   IRTemp tmp = newTemp(Ity_I32);
++   assign(tmp, getIReg32(rj));
++   IRExpr* shr = binop(Iop_Shr32, mkexpr(tmp), mkU8(ui5));
++   IRExpr* shl = binop(Iop_Shl32, mkexpr(tmp), mkU8(32 - ui5));
++   if (32 - ui5 == 32)
++      shl = mkU32(0);
++   IRExpr* or = binop(Iop_Or32, shr, shl);
++   putIReg(rd, extendS(Ity_I32, or));
++
++   return True;
++}
++
++static Bool gen_rotri_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt ui6 = get_ui6(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("rotri.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6);
++
++   IRTemp tmp = newTemp(Ity_I64);
++   assign(tmp, getIReg64(rj));
++   IRExpr* shr = binop(Iop_Shr64, mkexpr(tmp), mkU8(ui6));
++   IRExpr* shl = binop(Iop_Shl64, mkexpr(tmp), mkU8(64 - ui6));
++   if (64 - ui6 == 64)
++      shl = mkU64(0);
++   putIReg(rd, binop(Iop_Or64, shr, shl));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point bit insns                    ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_ext_w_h ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ext.w.h %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, extendS(Ity_I16, getIReg16(rj)));
++
++   return True;
++}
++
++static Bool gen_ext_w_b ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ext.w.b %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, extendS(Ity_I8, getIReg8(rj)));
++
++   return True;
++}
++
++static Bool gen_clo_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("clo.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* not = unop(Iop_Not32, getIReg32(rj));
++   IRExpr* clz = unop(Iop_Clz32, not);
++   putIReg(rd, extendU(Ity_I32, clz));
++
++   return True;
++}
++
++static Bool gen_clz_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("clz.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* clz = unop(Iop_Clz32, getIReg32(rj));
++   putIReg(rd, extendU(Ity_I32, clz));
++
++   return True;
++}
++
++static Bool gen_cto_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("cto.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* not = unop(Iop_Not32, getIReg32(rj));
++   IRExpr* clz = unop(Iop_Ctz32, not);
++   putIReg(rd, extendU(Ity_I32, clz));
++
++   return True;
++}
++
++static Bool gen_ctz_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ctz.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* clz = unop(Iop_Ctz32, getIReg32(rj));
++   putIReg(rd, extendU(Ity_I32, clz));
++
++   return True;
++}
++
++static Bool gen_clo_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("clo.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* not = unop(Iop_Not64, getIReg64(rj));
++   putIReg(rd, unop(Iop_Clz64, not));
++
++   return True;
++}
++
++static Bool gen_clz_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("clz.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, unop(Iop_Clz64, getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_cto_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("cto.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr* not = unop(Iop_Not64, getIReg64(rj));
++   putIReg(rd, unop(Iop_Ctz64, not));
++
++   return True;
++}
++
++static Bool gen_ctz_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ctz.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, unop(Iop_Ctz64, getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_revb_2h ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revb.2h %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revb_2h",
++                                &loongarch64_calculate_revb_2h,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_revb_4h ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revb.4h %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revb_4h",
++                                &loongarch64_calculate_revb_4h,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_revb_2w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revb.2w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revb_2w",
++                                &loongarch64_calculate_revb_2w,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_revb_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revb.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revb_d",
++                                &loongarch64_calculate_revb_d,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_revh_2w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revh.2w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revh_2w",
++                                &loongarch64_calculate_revh_2w,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_revh_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("revh.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_revh_d",
++                                &loongarch64_calculate_revh_d,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_bitrev_4b ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("bitrev.4b %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_bitrev_4b",
++                                &loongarch64_calculate_bitrev_4b,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_bitrev_8b ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("bitrev.8b %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_bitrev_8b",
++                                &loongarch64_calculate_bitrev_8b,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_bitrev_w ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("bitrev.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_bitrev_w",
++                                &loongarch64_calculate_bitrev_w,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_bitrev_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("bitrev.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_bitrev_d",
++                                &loongarch64_calculate_bitrev_d,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_bytepick_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt sa2 = get_sa2(insn);
++   UInt  rk = get_rk(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bytepick.w %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj),
++                                      nameIReg(rk), sa2);
++
++   UInt shift = 8 * (4 - sa2);
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rk), mkU8(32 - shift));
++   if (32 - shift == 32)
++      shl = mkU32(0);
++   IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), mkU8(shift));
++   if (shift == 32)
++      shr = mkU32(0);
++   IRExpr* or = binop(Iop_Or32, shl, shr);
++   putIReg(rd, extendS(Ity_I32, or));
++
++   return True;
++}
++
++static Bool gen_bytepick_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt sa3 = get_sa3(insn);
++   UInt  rk = get_rk(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bytepick.d %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj),
++                                      nameIReg(rk), sa3);
++
++   UInt shift = 8 * (8 - sa3);
++   IRExpr* shl = binop(Iop_Shl64, getIReg64(rk), mkU8(64 - shift));
++   if (64 - shift == 64)
++      shl = mkU64(0);
++   IRExpr* shr = binop(Iop_Shr64, getIReg64(rj), mkU8(shift));
++   if (shift == 64)
++      shr = mkU64(0);
++   putIReg(rd, binop(Iop_Or64, shl, shr));
++
++   return True;
++}
++
++static Bool gen_maskeqz ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("maskeqz %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rk), mkU64(0));
++   putIReg(rd, binop(Iop_And64, extendS(Ity_I1, cond), getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_masknez ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("masknez %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rk), mkU64(0));
++   putIReg(rd, binop(Iop_And64, extendS(Ity_I1, cond), getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_bstrins_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt msb = get_msbw(insn);
++   UInt lsb = get_lsbw(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bstrins.w %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb);
++
++   IRTemp tmp = newTemp(Ity_I32);
++   assign(tmp, getIReg32(rd));
++   IRExpr* shl1;
++   if (msb == 31) {
++      shl1 = mkU32(0);
++   } else {
++      IRExpr* shr1 = binop(Iop_Shr32, mkexpr(tmp), mkU8(msb + 1));
++      shl1 = binop(Iop_Shl32, shr1, mkU8(msb + 1));
++   }
++   IRExpr* shl2 = binop(Iop_Shl32, getIReg32(rj), mkU8(31 - msb + lsb));
++   IRExpr* shr2 = binop(Iop_Shr32, shl2, mkU8(31 - msb));
++   IRExpr* shr3;
++   if (lsb == 0) {
++      shr3 = mkU32(0);
++   } else {
++      IRExpr* shl3 = binop(Iop_Shl32, mkexpr(tmp), mkU8(32 - lsb));
++      shr3 = binop(Iop_Shr32, shl3, mkU8(32 - lsb));
++   }
++   IRExpr* or1 = binop(Iop_Or32, shl1, shr2);
++   IRExpr* or2 = binop(Iop_Or32, or1, shr3);
++   putIReg(rd, extendS(Ity_I32, or2));
++
++   return True;
++}
++
++static Bool gen_bstrpick_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt msb = get_msbw(insn);
++   UInt lsb = get_lsbw(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bstrpick.w %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb);
++
++   IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(31 - msb));
++   IRExpr* shr = binop(Iop_Shr32, shl, mkU8(31 - msb + lsb));
++   putIReg(rd, extendS(Ity_I32, shr));
++
++   return True;
++}
++
++static Bool gen_bstrins_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt msb = get_msbd(insn);
++   UInt lsb = get_lsbd(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bstrins.d %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb);
++
++   IRTemp tmp = newTemp(Ity_I64);
++   assign(tmp, getIReg64(rd));
++   IRExpr* shl1;
++   if (msb == 63) {
++      shl1 = mkU64(0);
++   } else {
++      IRExpr* shr1 = binop(Iop_Shr64, mkexpr(tmp), mkU8(msb + 1));
++      shl1 = binop(Iop_Shl64, shr1, mkU8(msb + 1));
++   }
++   IRExpr* shl2 = binop(Iop_Shl64, getIReg64(rj), mkU8(63 - msb + lsb));
++   IRExpr* shr2 = binop(Iop_Shr64, shl2, mkU8(63 - msb));
++   IRExpr* shr3;
++   if (lsb == 0) {
++      shr3 = mkU64(0);
++   } else {
++      IRExpr* shl3 = binop(Iop_Shl64, mkexpr(tmp), mkU8(64 - lsb));
++      shr3 = binop(Iop_Shr64, shl3, mkU8(64 - lsb));
++   }
++   IRExpr* or = binop(Iop_Or64, shl1, shr2);
++   putIReg(rd, binop(Iop_Or64, or, shr3));
++
++   return True;
++}
++
++static Bool gen_bstrpick_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt msb = get_msbd(insn);
++   UInt lsb = get_lsbd(insn);
++   UInt  rj = get_rj(insn);
++   UInt  rd = get_rd(insn);
++
++   DIP("bstrpick.d %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb);
++
++   IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(63 - msb));
++   putIReg(rd, binop(Iop_Shr64, shl, mkU8(63 - msb + lsb)));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point load/store insns             ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_ld_b ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.b %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   putIReg(rd, extendS(Ity_I8, load(Ity_I8, addr)));
++
++   return True;
++}
++
++static Bool gen_ld_h ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.h %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   putIReg(rd, extendS(Ity_I16, load(Ity_I16, addr)));
++
++   return True;
++}
++
++static Bool gen_ld_w ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr)));
++
++   return True;
++}
++
++static Bool gen_ld_d ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   putIReg(rd, load(Ity_I64, addr));
++
++   return True;
++}
++
++static Bool gen_st_b ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("st.b %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   store(addr, getIReg8(rd));
++
++   return True;
++}
++
++static Bool gen_st_h ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("st.h %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   store(addr, getIReg16(rd));
++
++   return True;
++}
++
++static Bool gen_st_w ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("st.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   store(addr, getIReg32(rd));
++
++   return True;
++}
++
++static Bool gen_st_d ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("st.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   store(addr, getIReg64(rd));
++
++   return True;
++}
++
++static Bool gen_ld_bu ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.bu %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   putIReg(rd, extendU(Ity_I8, load(Ity_I8, addr)));
++
++   return True;
++}
++
++static Bool gen_ld_hu ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.hu %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   putIReg(rd, extendU(Ity_I16, load(Ity_I16, addr)));
++
++   return True;
++}
++
++static Bool gen_ld_wu ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ld.wu %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putIReg(rd, extendU(Ity_I32, load(Ity_I32, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_b ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, extendS(Ity_I8, load(Ity_I8, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_h ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   putIReg(rd, extendS(Ity_I16, load(Ity_I16, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   putIReg(rd, load(Ity_I64, addr));
++
++   return True;
++}
++
++static Bool gen_stx_b ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stx.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   store(addr, getIReg8(rd));
++
++   return True;
++}
++
++static Bool gen_stx_h ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stx.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   store(addr, getIReg16(rd));
++
++   return True;
++}
++
++static Bool gen_stx_w ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stx.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   store(addr, getIReg32(rd));
++
++   return True;
++}
++
++static Bool gen_stx_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stx.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   store(addr, getIReg64(rd));
++
++   return True;
++}
++
++static Bool gen_ldx_bu ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.bu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   putIReg(rd, extendU(Ity_I8, load(Ity_I8, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_hu ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.hu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x1)));
++   putIReg(rd, extendU(Ity_I16, load(Ity_I16, addr)));
++
++   return True;
++}
++
++static Bool gen_ldx_wu ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldx.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putIReg(rd, extendU(Ity_I32, load(Ity_I32, addr)));
++
++   return True;
++}
++
++static Bool gen_preld ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt rj   = get_rj(insn);
++   UInt hint = get_hint5(insn);
++
++   DIP("preld %u, %s, %d\n", hint, nameIReg(rj), (Int)extend32(si12, 12));
++
++   return True;
++}
++
++static Bool gen_preldx ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt rj   = get_rj(insn);
++   UInt hint = get_hint5(insn);
++
++   DIP("preldx %u, %s, %d\n", hint, nameIReg(rj), (Int)extend32(si12, 12));
++
++   return True;
++}
++
++static Bool gen_dbar ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt hint = get_hint15(insn);
++
++   DIP("dbar %u\n", hint);
++
++   stmt(IRStmt_MBE(Imbe_Fence));
++
++   return True;
++}
++
++static Bool gen_ibar ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt hint = get_hint15(insn);
++
++   DIP("ibar %u\n", hint);
++
++   stmt(IRStmt_MBE(Imbe_InsnFence));
++
++   return True;
++}
++
++static Bool gen_ldptr_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ldptr.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                               (Int)extend32(si14, 14));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj),
++                        mkU64(extend64(si14 << 2, 16)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr)));
++
++   return True;
++}
++
++static Bool gen_stptr_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("stptr.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                               (Int)extend32(si14, 14));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj),
++                        mkU64(extend64(si14 << 2, 16)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   store(addr, getIReg32(rd));
++
++   return True;
++}
++
++static Bool gen_ldptr_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ldptr.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                               (Int)extend32(si14, 14));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj),
++                        mkU64(extend64(si14 << 2, 16)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   putIReg(rd, load(Ity_I64, addr));
++
++   return True;
++}
++
++static Bool gen_stptr_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("stptr.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                               (Int)extend32(si14, 14));
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj),
++                        mkU64(extend64(si14 << 2, 16)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   store(addr, getIReg64(rd));
++
++   return True;
++}
++
++static Bool gen_ldgt_b ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldgt.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putIReg(rd, extendS(Ity_I8, load(Ity_I8, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldgt_h ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldgt.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putIReg(rd, extendS(Ity_I16, load(Ity_I16, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldgt_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldgt.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putIReg(rd, extendS(Ity_I32, load(Ity_I32, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldgt_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldgt.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putIReg(rd, load(Ity_I64, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_ldle_b ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldle.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putIReg(rd, extendS(Ity_I8, load(Ity_I8, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldle_h ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldle.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putIReg(rd, extendS(Ity_I16, load(Ity_I16, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldle_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldle.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putIReg(rd, extendS(Ity_I32, load(Ity_I32, mkexpr(addr))));
++
++   return True;
++}
++
++static Bool gen_ldle_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ldle.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putIReg(rd, load(Ity_I64, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_stgt_b ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stgt.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getIReg8(rd));
++
++   return True;
++}
++
++static Bool gen_stgt_h ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stgt.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getIReg16(rd));
++
++   return True;
++}
++
++static Bool gen_stgt_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stgt.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getIReg32(rd));
++
++   return True;
++}
++
++static Bool gen_stgt_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stgt.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getIReg64(rd));
++
++   return True;
++}
++
++static Bool gen_stle_b ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stle.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getIReg8(rd));
++
++   return True;
++}
++
++static Bool gen_stle_h ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stle.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getIReg16(rd));
++
++   return True;
++}
++
++static Bool gen_stle_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stle.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getIReg32(rd));
++
++   return True;
++}
++
++static Bool gen_stle_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("stle.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getIReg64(rd));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point atomic insns                 ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_ll_helper ( UInt rd, UInt rj, UInt si14, Bool size64 )
++{
++   Int offs_size = offsetof(VexGuestLOONGARCH64State, guest_LLSC_SIZE);
++   Int offs_addr = offsetof(VexGuestLOONGARCH64State, guest_LLSC_ADDR);
++   Int offs_data = offsetof(VexGuestLOONGARCH64State, guest_LLSC_DATA);
++
++   /* Get address of the load. */
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, binop(Iop_Add64, getIReg64(rj),
++                      mkU64(extend64(si14 << 2, 16))));
++   if (size64)
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   else
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++
++   /* Load the value. */
++   IRTemp res = newTemp(Ity_I64);
++   if (size64)
++      assign(res, load(Ity_I64, mkexpr(addr)));
++   else
++      assign(res, extendS(Ity_I32, load(Ity_I32, mkexpr(addr))));
++
++   /* Set up the LLSC fallback data. */
++   if (size64)
++      stmt(IRStmt_Put(offs_size, mkU64(8)));
++   else
++      stmt(IRStmt_Put(offs_size, mkU64(4)));
++   stmt(IRStmt_Put(offs_addr, mkexpr(addr)));
++   stmt(IRStmt_Put(offs_data, mkexpr(res)));
++
++   /* Write the result to the destination register. */
++   putIReg(rd, mkexpr(res));
++
++   return True;
++}
++
++static Bool gen_sc_helper ( UInt rd, UInt rj, UInt si14, Bool size64 )
++{
++   Int offs_size = offsetof(VexGuestLOONGARCH64State, guest_LLSC_SIZE);
++   Int offs_addr = offsetof(VexGuestLOONGARCH64State, guest_LLSC_ADDR);
++   Int offs_data = offsetof(VexGuestLOONGARCH64State, guest_LLSC_DATA);
++
++   /* Get address of the load. */
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, binop(Iop_Add64, getIReg64(rj),
++                      mkU64(extend64(si14 << 2, 16))));
++   if (size64)
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   else
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++
++   /* Get new value. */
++   IRTemp new;
++   if (size64) {
++      new = newTemp(Ity_I64);
++      assign(new, getIReg64(rd));
++   } else {
++      new = newTemp(Ity_I32);
++      assign(new, getIReg32(rd));
++   }
++
++   /* Mark the SC initially as failed. */
++   putIReg(rd, mkU64(0));
++
++   /* Set that no transaction is in progress. */
++   IRTemp size = newTemp(Ity_I64);
++   assign(size, IRExpr_Get(offs_size, Ity_I64));
++   stmt(IRStmt_Put(offs_size, mkU64(0) /* "no transaction" */));
++
++   /* Fail if no or wrong-size transaction. */
++   if (size64)
++      exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(8)), Ijk_Boring, 4);
++   else
++      exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(4)), Ijk_Boring, 4);
++
++   /* Fail if the address doesn't match the LL address. */
++   exit(binop(Iop_CmpNE64, mkexpr(addr), IRExpr_Get(offs_addr, Ity_I64)),
++        Ijk_Boring, 4);
++
++   /* Fail if the data doesn't match the LL data. */
++   IRTemp data;
++   if (size64) {
++      data = newTemp(Ity_I64);
++      assign(data, IRExpr_Get(offs_data, Ity_I64));
++      IRExpr* d = load(Ity_I64, mkexpr(addr));
++      exit(binop(Iop_CmpNE64, d, mkexpr(data)), Ijk_Boring, 4);
++   } else {
++      data = newTemp(Ity_I32);
++      IRTemp tmp = newTemp(Ity_I64);
++      assign(tmp, IRExpr_Get(offs_data, Ity_I64));
++      assign(data, unop(Iop_64to32, mkexpr(tmp)));
++      IRExpr* d = extendS(Ity_I32, load(Ity_I32, mkexpr(addr)));
++      exit(binop(Iop_CmpNE64, d, mkexpr(tmp)), Ijk_Boring, 4);
++   }
++
++   /* Try to CAS the new value in. */
++   IRTemp old;
++   if (size64) {
++      old = newTemp(Ity_I64);
++      cas(old, mkexpr(addr), mkexpr(data), mkexpr(new));
++   } else {
++      old = newTemp(Ity_I32);
++      cas(old, mkexpr(addr), mkexpr(data), mkexpr(new));
++   }
++
++   /* Fail if the CAS failed (old != expd). */
++   if (size64)
++      exit(binop(Iop_CasCmpNE64, mkexpr(old), mkexpr(data)), Ijk_Boring, 4);
++   else
++      exit(binop(Iop_CasCmpNE32, mkexpr(old), mkexpr(data)), Ijk_Boring, 4);
++
++   /* Otherwise mark the operation as successful. */
++   putIReg(rd, mkU64(1));
++
++   return True;
++}
++
++static Bool gen_ll_w ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ll.w %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si14, 14),
++                              abiinfo->guest__use_fallback_LLSC ?
++                              " (fallback implementation)" : "");
++
++   if (abiinfo->guest__use_fallback_LLSC) {
++      return gen_ll_helper(rd, rj, si14, False);
++   } else {
++      IRTemp  res = newTemp(Ity_I32);
++      IRTemp addr = newTemp(Ity_I64);
++      assign(addr, binop(Iop_Add64, getIReg64(rj),
++                         mkU64(extend64(si14 << 2, 16))));
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++      stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), NULL/*LL*/));
++      putIReg(rd, extendS(Ity_I32, mkexpr(res)));
++      return True;
++   }
++}
++
++static Bool gen_sc_w ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("sc.w %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si14, 14),
++                              abiinfo->guest__use_fallback_LLSC ?
++                              " (fallback implementation)" : "");
++
++   if (abiinfo->guest__use_fallback_LLSC) {
++      return gen_sc_helper(rd, rj, si14, False);
++   } else {
++      IRTemp  res = newTemp(Ity_I1);
++      IRTemp addr = newTemp(Ity_I64);
++      assign(addr, binop(Iop_Add64, getIReg64(rj),
++                         mkU64(extend64(si14 << 2, 16))));
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++      stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), getIReg32(rd)));
++      return True;
++   }
++}
++
++static Bool gen_ll_d ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("ll.d %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si14, 14),
++                              abiinfo->guest__use_fallback_LLSC ?
++                              " (fallback implementation)" : "");
++
++   if (abiinfo->guest__use_fallback_LLSC) {
++      return gen_ll_helper(rd, rj, si14, True);
++   } else {
++      IRTemp  res = newTemp(Ity_I64);
++      IRTemp addr = newTemp(Ity_I64);
++      assign(addr, binop(Iop_Add64, getIReg64(rj),
++                         mkU64(extend64(si14 << 2, 16))));
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++      stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), NULL/*LL*/));
++      putIReg(rd, mkexpr(res));
++      return True;
++   }
++}
++
++static Bool gen_sc_d ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt si14 = get_si14(insn);
++   UInt   rj = get_rj(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("sc.d %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj),
++                              (Int)extend32(si14, 14),
++                              abiinfo->guest__use_fallback_LLSC ?
++                              " (fallback implementation)" : "");
++
++   if (abiinfo->guest__use_fallback_LLSC) {
++      return gen_sc_helper(rd, rj, si14, True);
++   } else {
++      IRTemp  res = newTemp(Ity_I1);
++      IRTemp addr = newTemp(Ity_I64);
++      assign(addr, binop(Iop_Add64, getIReg64(rj),
++                         mkU64(extend64(si14 << 2, 16))));
++      gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++      stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), getIReg64(rd)));
++      return True;
++   }
++}
++
++enum amop {
++   AMSWAP, AMADD, AMAND, AMOR, AMXOR, AMMAX, AMMIN, AMMAX_U, AMMIN_U
++};
++
++static Bool gen_am_w_helper ( enum amop op, Bool fence,
++                              UInt rd, UInt rj, UInt rk )
++{
++   if (fence)
++      stmt(IRStmt_MBE(Imbe_Fence));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++
++   IRTemp o = newTemp(Ity_I32);
++   assign(o, load(Ity_I32, mkexpr(addr)));
++   IRTemp n = newTemp(Ity_I32);
++   assign(n, getIReg32(rk));
++   IRExpr* e;
++   switch (op) {
++      case AMSWAP:
++         e = mkexpr(n);
++         break;
++      case AMADD:
++         e = binop(Iop_Add32, mkexpr(o), mkexpr(n));
++         break;
++      case AMAND:
++         e = binop(Iop_And32, mkexpr(o), mkexpr(n));
++         break;
++      case AMOR:
++         e = binop(Iop_Or32, mkexpr(o), mkexpr(n));
++         break;
++      case AMXOR:
++         e = binop(Iop_Xor32, mkexpr(o), mkexpr(n));
++         break;
++      case AMMAX: {
++         IRExpr* cond = binop(Iop_CmpLT32S, mkexpr(n), mkexpr(o));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMIN: {
++         IRExpr* cond = binop(Iop_CmpLT32S, mkexpr(o), mkexpr(n));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMAX_U: {
++         IRExpr* cond = binop(Iop_CmpLT32U, mkexpr(n), mkexpr(o));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMIN_U: {
++         IRExpr* cond = binop(Iop_CmpLT32U, mkexpr(o), mkexpr(n));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      default:
++         return False;
++   }
++
++   IRTemp old = newTemp(Ity_I32);
++   cas(old, mkexpr(addr), mkexpr(o), e);
++   IRExpr* cond = binop(Iop_CasCmpNE32, mkexpr(old), mkexpr(o));
++   exit(cond, Ijk_Boring, 0); /* Loop if failed */
++   putIReg(rd, extendS(Ity_I32, mkexpr(o)));
++
++   if (fence)
++      stmt(IRStmt_MBE(Imbe_Fence));
++
++   return True;
++}
++
++static Bool gen_am_d_helper ( enum amop op, Bool fence,
++                              UInt rd, UInt rj, UInt rk )
++{
++   if (fence)
++      stmt(IRStmt_MBE(Imbe_Fence));
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++
++   IRTemp o = newTemp(Ity_I64);
++   assign(o, load(Ity_I64, mkexpr(addr)));
++   IRTemp n = newTemp(Ity_I64);
++   assign(n, getIReg64(rk));
++   IRExpr* e;
++   switch (op) {
++      case AMSWAP:
++         e = mkexpr(n);
++         break;
++      case AMADD:
++         e = binop(Iop_Add64, mkexpr(o), mkexpr(n));
++         break;
++      case AMAND:
++         e = binop(Iop_And64, mkexpr(o), mkexpr(n));
++         break;
++      case AMOR:
++         e = binop(Iop_Or64, mkexpr(o), mkexpr(n));
++         break;
++      case AMXOR:
++         e = binop(Iop_Xor64, mkexpr(o), mkexpr(n));
++         break;
++      case AMMAX: {
++         IRExpr* cond = binop(Iop_CmpLT64S, mkexpr(n), mkexpr(o));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMIN: {
++         IRExpr* cond = binop(Iop_CmpLT64S, mkexpr(o), mkexpr(n));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMAX_U: {
++         IRExpr* cond = binop(Iop_CmpLT64U, mkexpr(n), mkexpr(o));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      case AMMIN_U: {
++         IRExpr* cond = binop(Iop_CmpLT64U, mkexpr(o), mkexpr(n));
++         e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n));
++         break;
++      }
++      default:
++         return False;
++   }
++
++   IRTemp old = newTemp(Ity_I64);
++   cas(old, mkexpr(addr), mkexpr(o), e);
++   IRExpr* cond = binop(Iop_CasCmpNE64, mkexpr(old), mkexpr(o));
++   exit(cond, Ijk_Boring, 0); /* Loop if failed */
++   putIReg(rd, mkexpr(o));
++
++   if (fence)
++      stmt(IRStmt_MBE(Imbe_Fence));
++
++   return True;
++}
++
++static Bool gen_amswap_w ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amswap.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMSWAP, False, rd, rj, rk);
++}
++
++static Bool gen_amswap_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amswap.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMSWAP, False, rd, rj, rk);
++}
++
++static Bool gen_amadd_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amadd.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMADD, False, rd, rj, rk);
++}
++
++static Bool gen_amadd_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amadd.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMADD, False, rd, rj, rk);
++}
++
++static Bool gen_amand_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amand.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMAND, False, rd, rj, rk);
++}
++
++static Bool gen_amand_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amand.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMAND, False, rd, rj, rk);
++}
++
++static Bool gen_amor_w ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amor.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMOR, False, rd, rj, rk);
++}
++
++static Bool gen_amor_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amor.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMOR, False, rd, rj, rk);
++}
++
++static Bool gen_amxor_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amxor.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMXOR, False, rd, rj, rk);
++}
++
++static Bool gen_amxor_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amxor.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMXOR, False, rd, rj, rk);
++}
++
++static Bool gen_ammax_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMAX, False, rd, rj, rk);
++}
++
++static Bool gen_ammax_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMAX, False, rd, rj, rk);
++}
++
++static Bool gen_ammin_w ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMIN, False, rd, rj, rk);
++}
++
++static Bool gen_ammin_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMIN, False, rd, rj, rk);
++}
++
++static Bool gen_ammax_wu ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMAX_U, False, rd, rj, rk);
++}
++
++static Bool gen_ammax_du ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMAX_U, False, rd, rj, rk);
++}
++
++static Bool gen_ammin_wu ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMIN_U, False, rd, rj, rk);
++}
++
++static Bool gen_ammin_du ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMIN_U, False, rd, rj, rk);
++}
++
++static Bool gen_amswap_db_w ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amswap_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMSWAP, True, rd, rj, rk);
++}
++
++static Bool gen_amswap_db_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amswap_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMSWAP, True, rd, rj, rk);
++}
++
++static Bool gen_amadd_db_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amadd_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMADD, True, rd, rj, rk);
++}
++
++static Bool gen_amadd_db_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amadd_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMADD, True, rd, rj, rk);
++}
++
++static Bool gen_amand_db_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amand_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMAND, True, rd, rj, rk);
++}
++
++static Bool gen_amand_db_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amand_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMAND, True, rd, rj, rk);
++}
++
++static Bool gen_amor_db_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amor_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMOR, True, rd, rj, rk);
++}
++
++static Bool gen_amor_db_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amor_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMOR, True, rd, rj, rk);
++}
++
++static Bool gen_amxor_db_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amxor_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMXOR, True, rd, rj, rk);
++}
++
++static Bool gen_amxor_db_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("amxor_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMXOR, True, rd, rj, rk);
++}
++
++static Bool gen_ammax_db_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMAX, True, rd, rj, rk);
++}
++
++static Bool gen_ammax_db_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMAX, True, rd, rj, rk);
++}
++
++static Bool gen_ammin_db_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMIN, True, rd, rj, rk);
++}
++
++static Bool gen_ammin_db_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMIN, True, rd, rj, rk);
++}
++
++static Bool gen_ammax_db_wu ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax_db.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMAX_U, True, rd, rj, rk);
++}
++
++static Bool gen_ammax_db_du ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammax_db.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMAX_U, True, rd, rj, rk);
++}
++
++static Bool gen_ammin_db_wu ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin_db.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_w_helper(AMMIN_U, True, rd, rj, rk);
++}
++
++static Bool gen_ammin_db_du ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("ammin_db.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_am_d_helper(AMMIN_U, True, rd, rj, rk);
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for fixed point extra insns                  ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_crc_w_b_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crc.w.b.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(8));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crc",
++                                &loongarch64_calculate_crc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crc_w_h_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crc.w.h.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(16));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crc",
++                                &loongarch64_calculate_crc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crc_w_w_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crc.w.w.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(32));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crc",
++                                &loongarch64_calculate_crc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crc_w_d_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crc.w.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(64));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crc",
++                                &loongarch64_calculate_crc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crcc_w_b_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crcc.w.b.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(8));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crcc",
++                                &loongarch64_calculate_crcc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crcc_w_h_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crcc.w.h.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(16));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crcc",
++                                &loongarch64_calculate_crcc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crcc_w_w_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crcc.w.w.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(32));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crcc",
++                                &loongarch64_calculate_crcc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_crcc_w_d_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("crcc.w.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk));
++
++   IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(64));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_crcc",
++                                &loongarch64_calculate_crcc,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++static Bool gen_break ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt code = get_code(insn);
++
++   DIP("break %u\n", code);
++
++   putPC(mkU64(guest_PC_curr_instr + 4));
++
++   /* On LoongArch, most instructions do not raise exceptions;
++      instead, gcc notifies the kernel with a trap instruction.
++      We simulate the behavior of the linux kernel here.
++      See arch/loongarch/kernel/traps.c.
++    */
++   switch (code) {
++      case 6: /* BRK_OVERFLOW */
++         dres->jk_StopHere = Ijk_SigFPE_IntOvf;
++         break;
++      case 7: /* BRK_DIVZERO */
++         dres->jk_StopHere = Ijk_SigFPE_IntDiv;
++         break;
++      default:
++         dres->jk_StopHere = Ijk_SigTRAP;
++         break;
++   }
++   dres->whatNext    = Dis_StopHere;
++
++   return True;
++}
++
++static Bool gen_syscall ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt hint = get_hint15(insn);
++
++   DIP("syscall %u\n", hint);
++
++   putPC(mkU64(guest_PC_curr_instr + 4));
++
++   dres->jk_StopHere = Ijk_Sys_syscall;
++   dres->whatNext    = Dis_StopHere;
++
++   return True;
++}
++
++static Bool gen_asrtle_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++
++   DIP("asrtle.d %s, %s\n", nameIReg(rj), nameIReg(rk));
++
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_asrtgt_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++
++   DIP("asrtgt.d %s, %s\n", nameIReg(rj), nameIReg(rk));
++
++   gen_SIGSYS(binop(Iop_CmpLE64U, getIReg64(rj), getIReg64(rk)));
++
++   return True;
++}
++
++static Bool gen_rdtimel_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("rdtimel.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, mkU64(0));
++
++   return True;
++}
++
++static Bool gen_rdtimeh_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("rdtimeh.w %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, mkU64(0));
++
++   return True;
++}
++
++static Bool gen_rdtime_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("rdtime.d %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   putIReg(rd, mkU64(0));
++
++   return True;
++}
++
++static Bool gen_cpucfg ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("cpucfg %s, %s\n", nameIReg(rd), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_CPUCFG)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr** arg = mkIRExprVec_1(getIReg64(rj));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_cpucfg",
++                                &loongarch64_calculate_cpucfg,
++                                arg);
++   putIReg(rd, call);
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for floating point arithmetic insns          ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_fadd_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fadd.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FADD_S, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_AddF32, rm, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fadd_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fadd.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FADD_D, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_AddF64, rm, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fsub_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fsub.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSUB_S, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_SubF32, rm, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fsub_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fsub.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSUB_D, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_SubF64, rm, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fmul_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmul.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMUL_S, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_MulF32, rm, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fmul_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmul.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMUL_D, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_MulF64, rm, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fdiv_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fdiv.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FDIV_S, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_DivF32, rm, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fdiv_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fdiv.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FDIV_D, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_DivF64, rm, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fmadd_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmadd.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                   nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMADD_S, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, qop(Iop_MAddF32, rm, getFReg32(fj),
++                     getFReg32(fk), getFReg32(fa)));
++
++   return True;
++}
++
++static Bool gen_fmadd_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmadd.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                   nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMADD_D, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, qop(Iop_MAddF64, rm, getFReg64(fj),
++                     getFReg64(fk), getFReg64(fa)));
++
++   return True;
++}
++
++static Bool gen_fmsub_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmsub.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                   nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMSUB_S, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, qop(Iop_MSubF32, rm, getFReg32(fj),
++                     getFReg32(fk), getFReg32(fa)));
++
++   return True;
++}
++
++static Bool gen_fmsub_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmsub.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                   nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMSUB_D, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, qop(Iop_MSubF64, rm, getFReg64(fj),
++                     getFReg64(fk), getFReg64(fa)));
++
++   return True;
++}
++
++static Bool gen_fnmadd_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fnmadd.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                    nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNMADD_S, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* madd = qop(Iop_MAddF32, rm, getFReg32(fj),
++                      getFReg32(fk), getFReg32(fa));
++   putFReg32(fd, unop(Iop_NegF32, madd));
++
++   return True;
++}
++
++static Bool gen_fnmadd_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fnmadd.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                    nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNMADD_D, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* madd = qop(Iop_MAddF64, rm, getFReg64(fj),
++                      getFReg64(fk), getFReg64(fa));
++   putFReg64(fd, unop(Iop_NegF64, madd));
++
++   return True;
++}
++
++static Bool gen_fnmsub_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fnmsub.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                    nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNMSUB_S, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* msub = qop(Iop_MSubF32, rm, getFReg32(fj),
++                      getFReg32(fk), getFReg32(fa));
++   putFReg32(fd, unop(Iop_NegF32, msub));
++
++   return True;
++}
++
++static Bool gen_fnmsub_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fa = get_fa(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fnmsub.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                    nameFReg(fk), nameFReg(fa));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNMSUB_D, 3, fj, fk, fa);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* msub = qop(Iop_MSubF64, rm, getFReg64(fj),
++                      getFReg64(fk), getFReg64(fa));
++   putFReg64(fd, unop(Iop_NegF64, msub));
++
++   return True;
++}
++
++static Bool gen_fmax_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmax.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMAX_S, 2, fj, fk, 0);
++   putFReg32(fd, binop(Iop_MaxNumF32, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fmax_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmax.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMAX_D, 2, fj, fk, 0);
++   putFReg64(fd, binop(Iop_MaxNumF64, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fmin_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmin.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMIN_S, 2, fj, fk, 0);
++   putFReg32(fd, binop(Iop_MinNumF32, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fmin_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmin.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMIN_D, 2, fj, fk, 0);
++   putFReg64(fd, binop(Iop_MinNumF64, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fmaxa_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmaxa.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMAXA_S, 2, fj, fk, 0);
++   putFReg32(fd, binop(Iop_MaxNumAbsF32, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fmaxa_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmaxa.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMAXA_D, 2, fj, fk, 0);
++   putFReg64(fd, binop(Iop_MaxNumAbsF64, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fmina_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmina.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMINA_S, 2, fj, fk, 0);
++   putFReg32(fd, binop(Iop_MinNumAbsF32, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fmina_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmina.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FMINA_D, 2, fj, fk, 0);
++   putFReg64(fd, binop(Iop_MinNumAbsF64, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_fabs_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fabs.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FABS_S, 1, fj, 0, 0);
++   putFReg32(fd, unop(Iop_AbsF32, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_fabs_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fabs.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FABS_D, 1, fj, 0, 0);
++   putFReg64(fd, unop(Iop_AbsF64, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_fneg_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fneg.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNEG_S, 1, fj, 0, 0);
++   putFReg32(fd, unop(Iop_NegF32, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_fneg_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fneg.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FNEG_D, 1, fj, 0, 0);
++   putFReg64(fd, unop(Iop_NegF64, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_fsqrt_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fsqrt.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSQRT_S, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, binop(Iop_SqrtF32, rm, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_fsqrt_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fsqrt.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSQRT_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, binop(Iop_SqrtF64, rm, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_frecip_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frecip.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRECIP_S, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_DivF32, rm, mkF32i(1), getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_frecip_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frecip.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRECIP_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_DivF64, rm, mkF64i(1), getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_frsqrt_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frsqrt.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRSQRT_S, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, binop(Iop_RSqrtF32, rm, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_frsqrt_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frsqrt.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRSQRT_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, binop(Iop_RSqrtF64, rm, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_fscaleb_s ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fscaleb.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSCALEB_S, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, triop(Iop_ScaleBF32, rm, getFReg32(fj), getFReg32(fk)));
++
++   return True;
++}
++
++static Bool gen_fscaleb_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fscaleb.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FSCALEB_D, 2, fj, fk, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, triop(Iop_ScaleBF64, rm, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_flogb_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("flogb.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FLOGB_S, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, binop(Iop_LogBF32, rm, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_flogb_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("flogb.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FLOGB_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, binop(Iop_LogBF64, rm, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_fcopysign_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fcopysign.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* i1 = unop(Iop_ReinterpF32asI32, getFReg32(fj));
++   IRExpr* shl1 = binop(Iop_Shl32, i1, mkU8(1));
++   IRExpr* shr1 = binop(Iop_Shr32, shl1, mkU8(1));
++   IRExpr* i2 = unop(Iop_ReinterpF32asI32, getFReg32(fk));
++   IRExpr* shr2 = binop(Iop_Shr32, i2, mkU8(31));
++   IRExpr* shl2 = binop(Iop_Shl32, shr2, mkU8(31));
++   IRExpr* or = binop(Iop_Or32, shr1, shl2);
++   putFReg32(fd, unop(Iop_ReinterpI32asF32, or));
++
++   return True;
++}
++
++static Bool gen_fcopysign_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fcopysign.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* i1 = unop(Iop_ReinterpF64asI64, getFReg64(fj));
++   IRExpr* shl1 = binop(Iop_Shl64, i1, mkU8(1));
++   IRExpr* shr1 = binop(Iop_Shr64, shl1, mkU8(1));
++   IRExpr* i2 = unop(Iop_ReinterpF64asI64, getFReg64(fk));
++   IRExpr* shr2 = binop(Iop_Shr64, i2, mkU8(63));
++   IRExpr* shl2 = binop(Iop_Shl64, shr2, mkU8(63));
++   IRExpr* or = binop(Iop_Or64, shr1, shl2);
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, or));
++
++   return True;
++}
++
++static Bool gen_fclass_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fclass.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr** arg = mkIRExprVec_1(unop(Iop_ReinterpF64asI64, getFReg64(fj)));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_fclass_s",
++                                &loongarch64_calculate_fclass_s,
++                                arg);
++   putFReg32(fd, unop(Iop_ReinterpI32asF32, unop(Iop_64to32, call)));
++
++   return True;
++}
++
++static Bool gen_fclass_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fclass.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr** arg = mkIRExprVec_1(unop(Iop_ReinterpF64asI64, getFReg64(fj)));
++   IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/,
++                                "loongarch64_calculate_fclass_d",
++                                &loongarch64_calculate_fclass_d,
++                                arg);
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, call));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for floating point comparison insns          ---*/
++/*------------------------------------------------------------*/
++
++static inline IRExpr* is_UN ( IRExpr* e )
++{
++   return binop(Iop_CmpEQ32, e, mkU32(0x45));
++}
++
++static inline IRExpr* is_LT ( IRExpr* e )
++{
++   return binop(Iop_CmpEQ32, e, mkU32(0x1));
++}
++
++static inline IRExpr* is_GT ( IRExpr* e )
++{
++   return binop(Iop_CmpEQ32, e, mkU32(0x0));
++}
++
++static inline IRExpr* is_EQ ( IRExpr* e )
++{
++   return binop(Iop_CmpEQ32, e, mkU32(0x40));
++}
++
++static Bool gen_fcmp_cond_helper ( enum fpop op, UInt cc,
++                                   UInt fj, UInt fk, Bool size64 )
++{
++   /* We have to convert 'irRes' from an IR-convention return result
++      (IRCmpF32Result / IRCmpF64Result) to a LOONGARCH-encoded group.
++
++      FP cmp result | IR
++      --------------------
++      UN            | 0x45
++      LT            | 0x01
++      GT            | 0x00
++      EQ            | 0x40
++    */
++   IRTemp result = newTemp(Ity_I32);
++   if (size64)
++      assign(result, binop(Iop_CmpF64, getFReg64(fj), getFReg64(fk)));
++   else
++      assign(result, binop(Iop_CmpF32, getFReg32(fj), getFReg32(fk)));
++
++   IRExpr* e;
++   switch (op) {
++      case FCMP_CAF_S: case FCMP_CAF_D: case FCMP_SAF_S: case FCMP_SAF_D:
++         e = mkU1(0);
++         break;
++      case FCMP_CLT_S: case FCMP_CLT_D: case FCMP_SLT_S: case FCMP_SLT_D:
++         e = is_LT(mkexpr(result));
++         break;
++      case FCMP_CEQ_S: case FCMP_CEQ_D: case FCMP_SEQ_S: case FCMP_SEQ_D:
++         e = is_EQ(mkexpr(result));
++         break;
++      case FCMP_CLE_S: case FCMP_CLE_D: case FCMP_SLE_S: case FCMP_SLE_D:
++         e = binop(Iop_Or1, is_LT(mkexpr(result)), is_EQ(mkexpr(result)));
++         break;
++      case FCMP_CUN_S: case FCMP_CUN_D: case FCMP_SUN_S: case FCMP_SUN_D:
++         e = is_UN(mkexpr(result));
++         break;
++      case FCMP_CULT_S: case FCMP_CULT_D: case FCMP_SULT_S: case FCMP_SULT_D:
++         e = binop(Iop_Or1, is_UN(mkexpr(result)), is_LT(mkexpr(result)));
++         break;
++      case FCMP_CUEQ_S: case FCMP_CUEQ_D: case FCMP_SUEQ_S: case FCMP_SUEQ_D:
++         e = binop(Iop_Or1, is_UN(mkexpr(result)), is_EQ(mkexpr(result)));
++         break;
++      case FCMP_CULE_S: case FCMP_CULE_D: case FCMP_SULE_S: case FCMP_SULE_D:
++         e = binop(Iop_Or1, is_UN(mkexpr(result)),
++                            binop(Iop_Or1, is_LT(mkexpr(result)),
++                                           is_EQ(mkexpr(result))));
++         break;
++      case FCMP_CNE_S: case FCMP_CNE_D: case FCMP_SNE_S: case FCMP_SNE_D:
++         e = binop(Iop_Or1, is_GT(mkexpr(result)), is_LT(mkexpr(result)));
++         break;
++      case FCMP_COR_S: case FCMP_COR_D: case FCMP_SOR_S: case FCMP_SOR_D:
++         e = binop(Iop_Or1, is_GT(mkexpr(result)),
++                            binop(Iop_Or1, is_LT(mkexpr(result)),
++                                           is_EQ(mkexpr(result))));
++         break;
++      case FCMP_CUNE_S: case FCMP_CUNE_D: case FCMP_SUNE_S: case FCMP_SUNE_D:
++         e = binop(Iop_Or1, is_UN(mkexpr(result)),
++                            binop(Iop_Or1, is_GT(mkexpr(result)),
++                                           is_LT(mkexpr(result))));
++         break;
++      default:
++         return False;
++   }
++
++   calculateFCSR(op, 2, fj, fk, 0);
++   putFCC(cc, unop(Iop_1Uto8, e));
++
++   return True;
++}
++
++static Bool gen_fcmp_caf_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.caf.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CAF_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_caf_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.caf.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CAF_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_saf_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.saf.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SAF_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_saf_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.saf.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SAF_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_clt_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.clt.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CLT_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_clt_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.clt.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CLT_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_slt_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.slt.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SLT_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_slt_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.slt.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SLT_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_ceq_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.ceq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CEQ_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_ceq_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.ceq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CEQ_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_seq_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.seq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SEQ_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_seq_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.seq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SEQ_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cle_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cle.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CLE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cle_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cle.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CLE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sle_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sle.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SLE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sle_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sle.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SLE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cun_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cun.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUN_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cun_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cun.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUN_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sun_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sun.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUN_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sun_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sun.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUN_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cult_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cult.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CULT_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cult_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cult.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CULT_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sult_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sult.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SULT_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sult_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sult.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SULT_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cueq_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cueq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUEQ_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cueq_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cueq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUEQ_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sueq_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sueq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUEQ_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sueq_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sueq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUEQ_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cule_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cule.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CULE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cule_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cule.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CULE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sule_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sule.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SULE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sule_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sule.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SULE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cne_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cne.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CNE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cne_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cne.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CNE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sne_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sne.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SNE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sne_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sne.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SNE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cor_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cor.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_COR_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cor_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cor.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_COR_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sor_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sor.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SOR_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sor_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sor.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SOR_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_cune_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cune.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUNE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_cune_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.cune.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_CUNE_D, cd, fj, fk, True);
++}
++
++static Bool gen_fcmp_sune_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sune.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUNE_S, cd, fj, fk, False);
++}
++
++static Bool gen_fcmp_sune_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("fcmp.sune.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_fcmp_cond_helper(FCMP_SUNE_D, cd, fj, fk, True);
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for floating point conversion insns          ---*/
++/*------------------------------------------------------------*/
++
++static IRExpr* is_Invalid_Overflow ( void )
++{
++   /* Bits 16 to 20 in FCSR are flags.
++      Bit 18 - overflow
++      Bit 20 - invalid
++    */
++   IRExpr* fcsr = getFCSR(0);
++   IRExpr* shr = binop(Iop_Shr32, fcsr, mkU8(16));
++   IRExpr* and = binop(Iop_And32, shr, mkU32(0x14));
++   return binop(Iop_CmpNE32, and, getIReg32(0));
++}
++
++static Bool gen_convert_s_helper ( enum fpop op, UInt fd, UInt fj )
++{
++   IRExpr* e;
++   IRExpr* rm;
++   switch (op) {
++      case FTINTRM_W_S:
++         rm = gen_round_down();
++         e = binop(Iop_F32toI32S, rm, getFReg32(fj));
++         break;
++      case FTINTRM_W_D:
++         rm = gen_round_down();
++         e = binop(Iop_F64toI32S, rm, getFReg64(fj));
++         break;
++      case FTINTRP_W_S:
++         rm = gen_round_up();
++         e = binop(Iop_F32toI32S, rm, getFReg32(fj));
++         break;
++      case FTINTRP_W_D:
++         rm = gen_round_up();
++         e = binop(Iop_F64toI32S, rm, getFReg64(fj));
++         break;
++      case FTINTRZ_W_S:
++         rm = gen_round_to_zero();
++         e = binop(Iop_F32toI32S, rm, getFReg32(fj));
++         break;
++      case FTINTRZ_W_D:
++         rm = gen_round_to_zero();
++         e = binop(Iop_F64toI32S, rm, getFReg64(fj));
++         break;
++      case FTINTRNE_W_S:
++         rm = gen_round_to_nearest();
++         e = binop(Iop_F32toI32S, rm, getFReg32(fj));
++         break;
++      case FTINTRNE_W_D:
++         rm = gen_round_to_nearest();
++         e = binop(Iop_F64toI32S, rm, getFReg64(fj));
++         break;
++      case FTINT_W_S:
++         rm = get_rounding_mode();
++         e = binop(Iop_F32toI32S, rm, getFReg32(fj));
++         break;
++      case FTINT_W_D:
++         rm = get_rounding_mode();
++         e = binop(Iop_F64toI32S, rm, getFReg64(fj));
++         break;
++      default:
++         return False;
++   }
++
++   calculateFCSR(op, 1, fj, 0, 0);
++   IRExpr* ite = IRExpr_ITE(is_Invalid_Overflow(), mkU32(0x7fffffff), e);
++   putFReg32(fd, unop(Iop_ReinterpI32asF32, ite));
++
++   return True;
++}
++
++static Bool gen_convert_d_helper ( enum fpop op, UInt fd, UInt fj )
++{
++   IRExpr* e;
++   IRExpr* rm;
++   switch (op) {
++      case FTINTRM_L_S:
++         rm = gen_round_down();
++         e = binop(Iop_F32toI64S, rm, getFReg32(fj));
++         break;
++      case FTINTRM_L_D:
++         rm = gen_round_down();
++         e = binop(Iop_F64toI64S, rm, getFReg64(fj));
++         break;
++      case FTINTRP_L_S:
++         rm = gen_round_up();
++         e = binop(Iop_F32toI64S, rm, getFReg32(fj));
++         break;
++      case FTINTRP_L_D:
++         rm = gen_round_up();
++         e = binop(Iop_F64toI64S, rm, getFReg64(fj));
++         break;
++      case FTINTRZ_L_S:
++         rm = gen_round_to_zero();
++         e = binop(Iop_F32toI64S, rm, getFReg32(fj));
++         break;
++      case FTINTRZ_L_D:
++         rm = gen_round_to_zero();
++         e = binop(Iop_F64toI64S, rm, getFReg64(fj));
++         break;
++      case FTINTRNE_L_S:
++         rm = gen_round_to_nearest();
++         e = binop(Iop_F32toI64S, rm, getFReg32(fj));
++         break;
++      case FTINTRNE_L_D:
++         rm = gen_round_to_nearest();
++         e = binop(Iop_F64toI64S, rm, getFReg64(fj));
++         break;
++      case FTINT_L_S:
++         rm = get_rounding_mode();
++         e = binop(Iop_F32toI64S, rm, getFReg32(fj));
++         break;
++      case FTINT_L_D:
++         rm = get_rounding_mode();
++         e = binop(Iop_F64toI64S, rm, getFReg64(fj));
++         break;
++      default:
++         return False;
++   }
++
++   calculateFCSR(op, 1, fj, 0, 0);
++   IRExpr* ite = IRExpr_ITE(is_Invalid_Overflow(),
++                            mkU64(0x7fffffffffffffffULL), e);
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, ite));
++
++   return True;
++}
++
++static Bool gen_fcvt_s_d ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fcvt.s.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FCVT_S_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, binop(Iop_F64toF32, rm, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_fcvt_d_s ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fcvt.d.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FCVT_D_S, 1, fj, 0, 0);
++   putFReg64(fd, unop(Iop_F32toF64, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_ftintrm_w_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrm.w.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRM_W_S, fd, fj);
++}
++
++static Bool gen_ftintrm_w_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrm.w.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRM_W_D, fd, fj);
++}
++
++static Bool gen_ftintrm_l_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrm.l.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRM_L_S, fd, fj);
++}
++
++static Bool gen_ftintrm_l_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrm.l.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRM_L_D, fd, fj);
++}
++
++static Bool gen_ftintrp_w_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrp.w.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRP_W_S, fd, fj);
++}
++
++static Bool gen_ftintrp_w_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrp.w.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRP_W_D, fd, fj);
++}
++
++static Bool gen_ftintrp_l_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrp.l.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRP_L_S, fd, fj);
++}
++
++static Bool gen_ftintrp_l_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrp.l.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRP_L_D, fd, fj);
++}
++
++static Bool gen_ftintrz_w_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrz.w.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRZ_W_S, fd, fj);
++}
++
++static Bool gen_ftintrz_w_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrz.w.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRZ_W_D, fd, fj);
++}
++
++static Bool gen_ftintrz_l_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrz.l.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRZ_L_S, fd, fj);
++}
++
++static Bool gen_ftintrz_l_d ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrz.l.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRZ_L_D, fd, fj);
++}
++
++static Bool gen_ftintrne_w_s ( DisResult* dres, UInt insn,
++                               const VexArchInfo* archinfo,
++                               const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrne.w.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRNE_W_S, fd, fj);
++}
++
++static Bool gen_ftintrne_w_d ( DisResult* dres, UInt insn,
++                               const VexArchInfo* archinfo,
++                               const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrne.w.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINTRNE_W_D, fd, fj);
++}
++
++static Bool gen_ftintrne_l_s ( DisResult* dres, UInt insn,
++                               const VexArchInfo* archinfo,
++                               const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrne.l.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRNE_L_S, fd, fj);
++}
++
++static Bool gen_ftintrne_l_d ( DisResult* dres, UInt insn,
++                               const VexArchInfo* archinfo,
++                               const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftintrne.l.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINTRNE_L_D, fd, fj);
++}
++
++static Bool gen_ftint_w_s ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftint.w.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINT_W_S, fd, fj);
++}
++
++static Bool gen_ftint_w_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftint.w.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_s_helper(FTINT_W_D, fd, fj);
++}
++
++static Bool gen_ftint_l_s ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftint.l.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINT_L_S, fd, fj);
++}
++
++static Bool gen_ftint_l_d ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ftint.l.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   return gen_convert_d_helper(FTINT_L_D, fd, fj);
++}
++
++static Bool gen_ffint_s_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ffint.s.w %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FFINT_S_W, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* f = unop(Iop_ReinterpF32asI32, getFReg32(fj));
++   putFReg32(fd, binop(Iop_I32StoF32, rm, f));
++
++   return True;
++}
++
++static Bool gen_ffint_s_l ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ffint.s.l %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FFINT_S_L, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* f = unop(Iop_ReinterpF64asI64, getFReg64(fj));
++   putFReg32(fd, binop(Iop_I64StoF32, rm, f));
++
++   return True;
++}
++
++static Bool gen_ffint_d_w ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ffint.d.w %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FFINT_D_W, 1, fj, 0, 0);
++   IRExpr* f = unop(Iop_ReinterpF32asI32, getFReg32(fj));
++   putFReg64(fd, unop(Iop_I32StoF64, f));
++
++   return True;
++}
++
++static Bool gen_ffint_d_l ( DisResult* dres, UInt insn,
++                            const VexArchInfo* archinfo,
++                            const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("ffint.d.l %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FFINT_D_L, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   IRExpr* f = unop(Iop_ReinterpF64asI64, getFReg64(fj));
++   putFReg64(fd, binop(Iop_I64StoF64, rm, f));
++
++   return True;
++}
++
++static Bool gen_frint_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frint.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRINT_S, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg32(fd, binop(Iop_RoundF32toInt, rm, getFReg32(fj)));
++
++   return True;
++}
++
++static Bool gen_frint_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("frint.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   calculateFCSR(FRINT_D, 1, fj, 0, 0);
++   IRExpr* rm = get_rounding_mode();
++   putFReg64(fd, binop(Iop_RoundF64toInt, rm, getFReg64(fj)));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for floating point move insns                ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_fmov_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmov.s %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putFReg32(fd, getFReg32(fj));
++
++   return True;
++}
++
++static Bool gen_fmov_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fmov.d %s, %s\n", nameFReg(fd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putFReg64(fd, getFReg64(fj));
++
++   return True;
++}
++
++static Bool gen_fsel ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt ca = get_ca(insn);
++   UInt fk = get_fk(insn);
++   UInt fj = get_fj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fsel %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj),
++                                nameFReg(fk), nameFCC(ca));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* cc = unop(Iop_8Uto64, getFCC(ca));
++   IRExpr* cond = binop(Iop_CmpEQ64, cc, mkU64(0));
++   putFReg64(fd, IRExpr_ITE(cond, getFReg64(fj), getFReg64(fk)));
++
++   return True;
++}
++
++static Bool gen_movgr2fr_w ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("movgr2fr.w %s, %s\n", nameFReg(fd), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   /* The high bits might be undefined, now the hardware implementation
++      of this instruction is that it is equivalent to movgr2fr.d. */
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_movgr2fr_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("movgr2fr.d %s, %s\n", nameFReg(fd), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, getIReg64(rj)));
++
++   return True;
++}
++
++static Bool gen_movgr2frh_w ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("movgr2frh.w %s, %s\n", nameFReg(fd), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* shl1 = binop(Iop_Shl64, getIReg64(rj), mkU8(32));
++   IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fd));
++   IRExpr* shl2 = binop(Iop_Shl64, i, mkU8(32));
++   IRExpr* shr = binop(Iop_Shr64, shl2, mkU8(32));
++   IRExpr* or = binop(Iop_Or64, shl1, shr);
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, or));
++
++   return True;
++}
++
++static Bool gen_movfr2gr_s ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("movfr2gr.s %s, %s\n", nameIReg(rd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* i = unop(Iop_ReinterpF32asI32, getFReg32(fj));
++   putIReg(rd, extendS(Ity_I32, i));
++
++   return True;
++}
++
++static Bool gen_movfr2gr_d ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("movfr2gr.d %s, %s\n", nameIReg(rd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putIReg(rd, unop(Iop_ReinterpF64asI64, getFReg64(fj)));
++
++   return True;
++}
++
++static Bool gen_movfrh2gr_s ( DisResult* dres, UInt insn,
++                              const VexArchInfo* archinfo,
++                              const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("movfrh2gr.s %s, %s\n", nameIReg(rd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fj));
++   IRExpr* shr = binop(Iop_Shr64, i, mkU8(32));
++   putIReg(rd, extendS(Ity_I32, unop(Iop_64to32, shr)));
++
++   return True;
++}
++
++static Bool gen_movgr2fcsr ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt   rj = get_rj(insn);
++   UInt fcsr = get_fcsrl(insn);
++
++   DIP("movgr2fcsr %s, %s\n", nameFCSR(fcsr), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putFCSR(fcsr, getIReg32(rj));
++
++   return True;
++}
++
++static Bool gen_movfcsr2gr ( DisResult* dres, UInt insn,
++                             const VexArchInfo* archinfo,
++                             const VexAbiInfo* abiinfo )
++{
++   UInt fcsr = get_fcsrh(insn);
++   UInt   rd = get_rd(insn);
++
++   DIP("movfcsr2gr %s, %s\n", nameIReg(rd), nameFCSR(fcsr));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   putIReg(rd, extendS(Ity_I32, getFCSR(fcsr)));
++
++   return True;
++}
++
++static Bool gen_movfr2cf ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt fj = get_fj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("movfr2cf %s, %s\n", nameFCC(cd), nameFReg(fj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fj));
++   IRExpr* and = binop(Iop_And64, i, mkU64(0x1));
++   putFCC(cd, unop(Iop_64to8, and));
++
++   return True;
++}
++
++static Bool gen_movcf2fr ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt cj = get_cj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("movcf2fr %s, %s\n", nameFReg(fd), nameFCC(cj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   /* The hardware implementation of this instruction
++      does clear the high bits. */
++   IRExpr* cc = unop(Iop_8Uto64, getFCC(cj));
++   putFReg64(fd, unop(Iop_ReinterpI64asF64, cc));
++
++   return True;
++}
++
++static Bool gen_movgr2cf ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt rj = get_rj(insn);
++   UInt cd = get_cd(insn);
++
++   DIP("movgr2cf %s, %s\n", nameFCC(cd), nameIReg(rj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* and = binop(Iop_And64, getIReg64(rj), mkU64(0x1));
++   putFCC(cd, unop(Iop_64to8, and));
++
++   return True;
++}
++
++static Bool gen_movcf2gr ( DisResult* dres, UInt insn,
++                           const VexArchInfo* archinfo,
++                           const VexAbiInfo* abiinfo )
++{
++   UInt cj = get_cj(insn);
++   UInt rd = get_rd(insn);
++
++   DIP("movcf2gr %s, %s\n", nameIReg(rd), nameFCC(cj));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   /* The hardware implementation of this instruction
++      does clear the high bits. */
++   putIReg(rd, unop(Iop_8Uto64, getFCC(cj)));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for floating point load/store insns          ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_fld_s ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   fd = get_fd(insn);
++
++   DIP("fld.s %s, %s, %d\n", nameFReg(fd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putFReg32(fd, load(Ity_F32, addr));
++
++   return True;
++}
++
++static Bool gen_fst_s ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   fd = get_fd(insn);
++
++   DIP("fst.s %s, %s, %d\n", nameFReg(fd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   store(addr, getFReg32(fd));
++
++   return True;
++}
++
++static Bool gen_fld_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   fd = get_fd(insn);
++
++   DIP("fld.d %s, %s, %d\n", nameFReg(fd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   putFReg64(fd, load(Ity_F64, addr));
++
++   return True;
++}
++
++static Bool gen_fst_d ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt si12 = get_si12(insn);
++   UInt   rj = get_rj(insn);
++   UInt   fd = get_fd(insn);
++
++   DIP("fst.d %s, %s, %d\n", nameFReg(fd), nameIReg(rj),
++                             (Int)extend32(si12, 12));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12)));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   store(addr, getFReg64(fd));
++
++   return True;
++}
++
++static Bool gen_fldx_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldx.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   putFReg32(fd, load(Ity_F32, addr));
++
++   return True;
++}
++
++static Bool gen_fldx_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldx.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   putFReg64(fd, load(Ity_F64, addr));
++
++   return True;
++}
++
++static Bool gen_fstx_s ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstx.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x3)));
++   store(addr, getFReg32(fd));
++
++   return True;
++}
++
++static Bool gen_fstx_d ( DisResult* dres, UInt insn,
++                         const VexArchInfo* archinfo,
++                         const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstx.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk));
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL))
++      gen_SIGBUS(check_align(addr, mkU64(0x7)));
++   store(addr, getFReg64(fd));
++
++   return True;
++}
++
++static Bool gen_fldgt_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldgt.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putFReg32(fd, load(Ity_F32, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_fldgt_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldgt.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   putFReg64(fd, load(Ity_F64, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_fldle_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldle.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putFReg32(fd, load(Ity_F32, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_fldle_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fldle.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   putFReg64(fd, load(Ity_F64, mkexpr(addr)));
++
++   return True;
++}
++
++static Bool gen_fstgt_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstgt.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getFReg32(fd));
++
++   return True;
++}
++
++static Bool gen_fstgt_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstgt.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk)));
++   store(mkexpr(addr), getFReg64(fd));
++
++   return True;
++}
++
++static Bool gen_fstle_s ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstle.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getFReg32(fd));
++
++   return True;
++}
++
++static Bool gen_fstle_d ( DisResult* dres, UInt insn,
++                          const VexArchInfo* archinfo,
++                          const VexAbiInfo* abiinfo )
++{
++   UInt rk = get_rk(insn);
++   UInt rj = get_rj(insn);
++   UInt fd = get_fd(insn);
++
++   DIP("fstle.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRTemp addr = newTemp(Ity_I64);
++   assign(addr, getIReg64(rj));
++   gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7)));
++   gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr)));
++   store(mkexpr(addr), getFReg64(fd));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Helpers for branch insns                             ---*/
++/*------------------------------------------------------------*/
++
++static Bool gen_beqz ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt offs21 = get_offs21(insn);
++   UInt     rj = get_rj(insn);
++
++   DIP("beqz %s, %d\n", nameIReg(rj), (Int)extend32(offs21, 21));
++
++   IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rj), mkU64(0));
++   exit(cond, Ijk_Boring, extend64(offs21 << 2, 23));
++
++   return True;
++}
++
++static Bool gen_bnez ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt offs21 = get_offs21(insn);
++   UInt     rj = get_rj(insn);
++
++   DIP("bnez %s, %d\n", nameIReg(rj), (Int)extend32(offs21, 21));
++
++   IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rj), mkU64(0));
++   exit(cond, Ijk_Boring, extend64(offs21 << 2, 23));
++
++   return True;
++}
++
++static Bool gen_bceqz ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt offs21 = get_offs21(insn);
++   UInt cj     = get_cj(insn);
++
++   DIP("bceqz %s, %d\n", nameFCC(cj), (Int)extend32(offs21, 21));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* cc = unop(Iop_8Uto64, getFCC(cj));
++   IRExpr* cond = binop(Iop_CmpEQ64, cc, mkU64(0));
++   exit(cond, Ijk_Boring, extend64(offs21 << 2, 23));
++
++   return True;
++}
++
++static Bool gen_bcnez ( DisResult* dres, UInt insn,
++                        const VexArchInfo* archinfo,
++                        const VexAbiInfo* abiinfo )
++{
++   UInt offs21 = get_offs21(insn);
++   UInt cj     = get_cj(insn);
++
++   DIP("bcnez %s, %d\n", nameFCC(cj), (Int)extend32(offs21, 21));
++
++   if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) {
++      dres->jk_StopHere = Ijk_SigILL;
++      dres->whatNext    = Dis_StopHere;
++      return True;
++   }
++
++   IRExpr* cc = unop(Iop_8Uto64, getFCC(cj));
++   IRExpr* cond = binop(Iop_CmpNE64, cc, mkU64(0));
++   exit(cond, Ijk_Boring, extend64(offs21 << 2, 23));
++
++   return True;
++}
++
++static Bool gen_jirl ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("jirl %s, %s, %d\n", nameIReg(rd), nameIReg(rj),
++                            (Int)extend32(offs16, 16));
++
++   IRTemp tmp = newTemp(Ity_I64);
++   assign(tmp, getIReg64(rj)); /* This is necessary when rd == rj */
++   putIReg(rd, mkU64(guest_PC_curr_instr + 4));
++   IRExpr* imm = mkU64(extend64(offs16 << 2, 18));
++   putPC(binop(Iop_Add64, mkexpr(tmp), imm));
++
++   dres->whatNext = Dis_StopHere;
++   dres->jk_StopHere = Ijk_Boring;
++
++   return True;
++}
++
++static Bool gen_b ( DisResult* dres, UInt insn,
++                    const VexArchInfo* archinfo,
++                    const VexAbiInfo* abiinfo )
++{
++   UInt offs26 = get_offs26(insn);
++
++   DIP("b %d\n", (Int)extend32(offs26, 26));
++
++   putPC(mkU64(guest_PC_curr_instr + extend64(offs26 << 2, 28)));
++
++   dres->whatNext = Dis_StopHere;
++   dres->jk_StopHere = Ijk_Boring;
++
++   return True;
++}
++
++static Bool gen_bl ( DisResult* dres, UInt insn,
++                     const VexArchInfo* archinfo,
++                     const VexAbiInfo* abiinfo )
++{
++   UInt offs26 = get_offs26(insn);
++
++   DIP("bl %d\n", (Int)extend32(offs26, 26));
++
++   putIReg(1, mkU64(guest_PC_curr_instr + 4));
++   putPC(mkU64(guest_PC_curr_instr + extend64(offs26 << 2, 28)));
++
++   dres->whatNext = Dis_StopHere;
++   dres->jk_StopHere = Ijk_Boring;
++
++   return True;
++}
++
++static Bool gen_beq ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("beq %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                           (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rj), getIReg64(rd));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++static Bool gen_bne ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("bne %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                           (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rj), getIReg64(rd));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++static Bool gen_blt ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("blt %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                           (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj), getIReg64(rd));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++static Bool gen_bge ( DisResult* dres, UInt insn,
++                      const VexArchInfo* archinfo,
++                      const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("bge %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                           (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpLE64S, getIReg64(rd), getIReg64(rj));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++static Bool gen_bltu ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("bltu %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                            (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj), getIReg64(rd));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++static Bool gen_bgeu ( DisResult* dres, UInt insn,
++                       const VexArchInfo* archinfo,
++                       const VexAbiInfo* abiinfo )
++{
++   UInt offs16 = get_offs16(insn);
++   UInt     rj = get_rj(insn);
++   UInt     rd = get_rd(insn);
++
++   DIP("bgeu %s, %s, %d\n", nameIReg(rj), nameIReg(rd),
++                            (Int)extend32(offs16, 16));
++
++   IRExpr* cond = binop(Iop_CmpLE64U, getIReg64(rd), getIReg64(rj));
++   exit(cond, Ijk_Boring, extend64(offs16 << 2, 18));
++
++   return True;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Disassemble a single LOONGARCH64 instruction         ---*/
++/*------------------------------------------------------------*/
++
++/* Disassemble a single LOONGARCH64 instruction into IR.  The instruction
++   has is located at |guest_instr| and has guest IP of |guest_PC_curr_instr|,
++   which will have been set before the call here.  Returns True iff the
++   instruction was decoded, in which case *dres will be set accordingly,
++   or False, in which case *dres should be ignored by the caller. */
++
++static Bool disInstr_LOONGARCH64_WRK_special ( DisResult* dres,
++                                               const UChar* guest_instr )
++{
++   const UChar* code = guest_instr;
++   /* Spot the 16-byte preamble:
++      00450c00  srli.d $zero, $zero, 3
++      00453400  srli.d $zero, $zero, 13
++      00457400  srli.d $zero, $zero, 29
++      00454c00  srli.d $zero, $zero, 19
++   */
++   if (getUInt(code +  0) == 0x00450c00 &&
++       getUInt(code +  4) == 0x00453400 &&
++       getUInt(code +  8) == 0x00457400 &&
++       getUInt(code + 12) == 0x00454c00) {
++      /* Got a "Special" instruction preamble.  Which one is it? */
++      if (getUInt(code + 16) == 0x001535ad) {        /* or $t1, $t1, $t1 */
++         DIP("$a7 = client_request ( $t0 )\n");
++         putPC(mkU64(guest_PC_curr_instr + 20));
++         dres->whatNext    = Dis_StopHere;
++         dres->len         = 20;
++         dres->jk_StopHere = Ijk_ClientReq;
++         return True;
++      } else if (getUInt(code + 16) == 0x001539ce) { /* or $t2, $t2, $t2 */
++         DIP("$a7 = guest_NRADDR\n");
++         putIReg(11, IRExpr_Get(offsetof(VexGuestLOONGARCH64State, guest_NRADDR),
++                     Ity_I64));
++         dres->len = 20;
++         return True;
++      } else if (getUInt(code + 16) == 0x00153def) { /* or $t3, $t3, $t3 */
++         DIP("branch-and-link-to-noredir $t8\n");
++         putIReg(1, mkU64(guest_PC_curr_instr + 20));
++         putPC(getIReg64(20));
++         dres->whatNext    = Dis_StopHere;
++         dres->len         = 20;
++         dres->jk_StopHere = Ijk_NoRedir;
++         return True;
++      } else if (getUInt(code + 16) == 0x00154210) { /* or $t4, $t4, $t4 */
++         DIP("IR injection\n");
++         vex_inject_ir(irsb, Iend_LE);
++         /* Invalidate the current insn. The reason is that the IRop we're
++            injecting here can change. In which case the translation has to
++            be redone. For ease of handling, we simply invalidate all the
++            time.
++          */
++         stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_CMSTART),
++                         mkU64(guest_PC_curr_instr)));
++         stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_CMLEN),
++                         mkU64(20)));
++         putPC(mkU64(guest_PC_curr_instr + 20));
++         dres->whatNext    = Dis_StopHere;
++         dres->len         = 20;
++         dres->jk_StopHere = Ijk_InvalICache;
++         return True;
++      }
++      /* We don't know what it is. */
++      vassert(0);
++      /*NOTREACHED*/
++   }
++   return False;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_0000_0000 ( DisResult* dres, UInt insn,
++                                                    const VexArchInfo* archinfo,
++                                                    const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 21, 15)) {
++      case 0b0000000:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00100:
++               ok = gen_clo_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00101:
++               ok = gen_clz_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00110:
++               ok = gen_cto_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00111:
++               ok = gen_ctz_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01000:
++               ok = gen_clo_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_clz_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_cto_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01011:
++               ok = gen_ctz_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01100:
++               ok = gen_revb_2h(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01101:
++               ok = gen_revb_4h(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01110:
++               ok = gen_revb_2w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01111:
++               ok = gen_revb_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10000:
++               ok = gen_revh_2w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10001:
++               ok = gen_revh_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_bitrev_4b(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10011:
++               ok = gen_bitrev_8b(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10100:
++               ok = gen_bitrev_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10101:
++               ok = gen_bitrev_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10110:
++               ok = gen_ext_w_h(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10111:
++               ok = gen_ext_w_b(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11000:
++               ok = gen_rdtimel_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11001:
++               ok = gen_rdtimeh_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11010:
++               ok = gen_rdtime_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11011:
++               ok = gen_cpucfg(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0000010:
++         ok = gen_asrtle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0000011:
++         ok = gen_asrtgt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100000:
++         ok = gen_add_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100001:
++         ok = gen_add_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100010:
++         ok = gen_sub_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100011:
++         ok = gen_sub_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100100:
++         ok = gen_slt(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100101:
++         ok = gen_sltu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100110:
++         ok = gen_maskeqz(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100111:
++         ok = gen_masknez(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101000:
++         ok = gen_nor(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101001:
++         ok = gen_and(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101010:
++         ok = gen_or(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101011:
++         ok = gen_xor(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101100:
++         ok = gen_orn(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101101:
++         ok = gen_andn(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101110:
++         ok = gen_sll_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101111:
++         ok = gen_srl_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110000:
++         ok = gen_sra_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110001:
++         ok = gen_sll_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110010:
++         ok = gen_srl_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110011:
++         ok = gen_sra_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110110:
++         ok = gen_rotr_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110111:
++         ok = gen_rotr_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111000:
++         ok = gen_mul_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111001:
++         ok = gen_mulh_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111010:
++         ok = gen_mulh_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111011:
++         ok = gen_mul_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111100:
++         ok = gen_mulh_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111101:
++         ok = gen_mulh_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111110:
++         ok = gen_mulw_d_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111111:
++         ok = gen_mulw_d_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000000:
++         ok = gen_div_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000001:
++         ok = gen_mod_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000010:
++         ok = gen_div_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000011:
++         ok = gen_mod_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000100:
++         ok = gen_div_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000101:
++         ok = gen_mod_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000110:
++         ok = gen_div_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000111:
++         ok = gen_mod_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001000:
++         ok = gen_crc_w_b_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001001:
++         ok = gen_crc_w_h_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001010:
++         ok = gen_crc_w_w_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001011:
++         ok = gen_crc_w_d_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001100:
++         ok = gen_crcc_w_b_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001101:
++         ok = gen_crcc_w_h_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001110:
++         ok = gen_crcc_w_w_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001111:
++         ok = gen_crcc_w_d_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010100:
++         ok = gen_break(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010110:
++         ok = gen_syscall(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++
++   if (ok) {
++      return ok;
++   }
++
++   switch (SLICE(insn, 21, 18)) {
++      case 0b0001:
++         if (SLICE(insn, 17, 17) == 0) {
++            ok = gen_alsl_w(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = gen_alsl_wu(dres, insn, archinfo, abiinfo);
++         }
++         break;
++      case 0b0010:
++         if (SLICE(insn, 17, 17) == 0) {
++            ok = gen_bytepick_w(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = False;
++         }
++         break;
++      case 0b0011:
++         ok = gen_bytepick_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011:
++         if (SLICE(insn, 17, 17) == 0) {
++            ok = gen_alsl_d(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = False;
++         }
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_0000_0001 ( DisResult* dres, UInt insn,
++                                                    const VexArchInfo* archinfo,
++                                                    const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   if (SLICE(insn, 21, 21) == 0) {
++      switch (SLICE(insn, 20, 16)) {
++         case 0b00000:
++            if (SLICE(insn, 15, 15) == 1) {
++               ok = gen_slli_w(dres, insn, archinfo, abiinfo);
++            } else {
++               ok = False;
++            }
++            break;
++         case 0b00001:
++            ok = gen_slli_d(dres, insn, archinfo, abiinfo);
++            break;
++         case 0b00100:
++            if (SLICE(insn, 15, 15) == 1) {
++               ok = gen_srli_w(dres, insn, archinfo, abiinfo);
++            } else {
++               ok = False;
++            }
++            break;
++         case 0b00101:
++            ok = gen_srli_d(dres, insn, archinfo, abiinfo);
++            break;
++         case 0b01000:
++            if (SLICE(insn, 15, 15) == 1) {
++               ok = gen_srai_w(dres, insn, archinfo, abiinfo);
++            } else {
++               ok = False;
++            }
++            break;
++         case 0b01001:
++            ok = gen_srai_d(dres, insn, archinfo, abiinfo);
++            break;
++         case 0b01100:
++            if (SLICE(insn, 15, 15) == 1) {
++               ok = gen_rotri_w(dres, insn, archinfo, abiinfo);
++            } else {
++               ok = False;
++            }
++            break;
++         case 0b01101:
++            ok = gen_rotri_d(dres, insn, archinfo, abiinfo);
++            break;
++         default:
++            ok = False;
++            break;
++      }
++   } else {
++      if (SLICE(insn, 15, 15) == 0) {
++         ok = gen_bstrins_w(dres, insn, archinfo, abiinfo);
++      } else {
++         ok = gen_bstrpick_w(dres, insn, archinfo, abiinfo);
++      }
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_0000_0100 ( DisResult* dres, UInt insn,
++                                                    const VexArchInfo* archinfo,
++                                                    const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 21, 15)) {
++      case 0b0000001:
++         ok = gen_fadd_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0000010:
++         ok = gen_fadd_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0000101:
++         ok = gen_fsub_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0000110:
++         ok = gen_fsub_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001001:
++         ok = gen_fmul_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001010:
++         ok = gen_fmul_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001101:
++         ok = gen_fdiv_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001110:
++         ok = gen_fdiv_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010001:
++         ok = gen_fmax_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010010:
++         ok = gen_fmax_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010101:
++         ok = gen_fmin_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010110:
++         ok = gen_fmin_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011001:
++         ok = gen_fmaxa_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011010:
++         ok = gen_fmaxa_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011101:
++         ok = gen_fmina_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011110:
++         ok = gen_fmina_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100001:
++         ok = gen_fscaleb_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100010:
++         ok = gen_fscaleb_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100101:
++         ok = gen_fcopysign_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100110:
++         ok = gen_fcopysign_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101000:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00001:
++               ok = gen_fabs_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00010:
++               ok = gen_fabs_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00101:
++               ok = gen_fneg_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00110:
++               ok = gen_fneg_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_flogb_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_flogb_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01101:
++               ok = gen_fclass_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01110:
++               ok = gen_fclass_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10001:
++               ok = gen_fsqrt_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_fsqrt_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10101:
++               ok = gen_frecip_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10110:
++               ok = gen_frecip_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11001:
++               ok = gen_frsqrt_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11010:
++               ok = gen_frsqrt_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0101001:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00101:
++               ok = gen_fmov_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00110:
++               ok = gen_fmov_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_movgr2fr_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_movgr2fr_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01011:
++               ok = gen_movgr2frh_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01101:
++               ok = gen_movfr2gr_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01110:
++               ok = gen_movfr2gr_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01111:
++               ok = gen_movfrh2gr_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10000:
++               ok = gen_movgr2fcsr(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_movfcsr2gr(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10100:
++               if (SLICE(insn, 4, 3) == 0b00) {
++                  ok = gen_movfr2cf(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            case 0b10101:
++               if (SLICE(insn, 9, 8) == 0b00) {
++                  ok = gen_movcf2fr(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            case 0b10110:
++               if (SLICE(insn, 4, 3) == 0b00) {
++                  ok = gen_movgr2cf(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            case 0b10111:
++               if (SLICE(insn, 9, 8) == 0b00) {
++                  ok = gen_movcf2gr(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0110010:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00110:
++               ok = gen_fcvt_s_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_fcvt_d_s(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0110100:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00001:
++               ok = gen_ftintrm_w_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00010:
++               ok = gen_ftintrm_w_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_ftintrm_l_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_ftintrm_l_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10001:
++               ok = gen_ftintrp_w_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_ftintrp_w_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11001:
++               ok = gen_ftintrp_l_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11010:
++               ok = gen_ftintrp_l_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0110101:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00001:
++               ok = gen_ftintrz_w_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00010:
++               ok = gen_ftintrz_w_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_ftintrz_l_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_ftintrz_l_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10001:
++               ok = gen_ftintrne_w_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_ftintrne_w_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11001:
++               ok = gen_ftintrne_l_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11010:
++               ok = gen_ftintrne_l_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0110110:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00001:
++               ok = gen_ftint_w_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00010:
++               ok = gen_ftint_w_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01001:
++               ok = gen_ftint_l_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_ftint_l_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0111010:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b00100:
++               ok = gen_ffint_s_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b00110:
++               ok = gen_ffint_s_l(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01000:
++               ok = gen_ffint_d_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01010:
++               ok = gen_ffint_d_l(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0111100:
++         switch (SLICE(insn, 14, 10)) {
++            case 0b10001:
++               ok = gen_frint_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10010:
++               ok = gen_frint_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_0000 ( DisResult* dres, UInt insn,
++                                               const VexArchInfo* archinfo,
++                                               const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 25, 22)) {
++      case 0b0000:
++         ok = disInstr_LOONGARCH64_WRK_00_0000_0000(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001:
++         ok = disInstr_LOONGARCH64_WRK_00_0000_0001(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010:
++         ok = gen_bstrins_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011:
++         ok = gen_bstrpick_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100:
++         ok = disInstr_LOONGARCH64_WRK_00_0000_0100(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000:
++         ok = gen_slti(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001:
++         ok = gen_sltui(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010:
++         ok = gen_addi_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011:
++         ok = gen_addi_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100:
++         ok = gen_lu52i_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101:
++         ok = gen_andi(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110:
++         ok = gen_ori(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111:
++         ok = gen_xori(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_1010 ( DisResult* dres, UInt insn,
++                                               const VexArchInfo* archinfo,
++                                               const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 25, 22)) {
++      case 0b0000:
++         ok = gen_ld_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001:
++         ok = gen_ld_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010:
++         ok = gen_ld_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011:
++         ok = gen_ld_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100:
++         ok = gen_st_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101:
++         ok = gen_st_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110:
++         ok = gen_st_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111:
++         ok = gen_st_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000:
++         ok = gen_ld_bu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001:
++         ok = gen_ld_hu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010:
++         ok = gen_ld_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011:
++         ok = gen_preld(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100:
++         ok = gen_fld_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101:
++         ok = gen_fst_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110:
++         ok = gen_fld_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111:
++         ok = gen_fst_d(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_1110_0000 ( DisResult* dres, UInt insn,
++                                                    const VexArchInfo* archinfo,
++                                                    const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 21, 15)) {
++      case 0b0000000:
++         ok = gen_ldx_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001000:
++         ok = gen_ldx_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010000:
++         ok = gen_ldx_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0011000:
++         ok = gen_ldx_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100000:
++         ok = gen_stx_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101000:
++         ok = gen_stx_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110000:
++         ok = gen_stx_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111000:
++         ok = gen_stx_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000000:
++         ok = gen_ldx_bu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001000:
++         ok = gen_ldx_hu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010000:
++         ok = gen_ldx_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011000:
++         ok = gen_preldx(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100000:
++         ok = gen_fldx_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101000:
++         ok = gen_fldx_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110000:
++         ok = gen_fstx_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111000:
++         ok = gen_fstx_d(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00_1110_0001 ( DisResult* dres, UInt insn,
++                                                    const VexArchInfo* archinfo,
++                                                    const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 21, 15)) {
++      case 0b1000000:
++         ok = gen_amswap_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000001:
++         ok = gen_amswap_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000010:
++         ok = gen_amadd_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000011:
++         ok = gen_amadd_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000100:
++         ok = gen_amand_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000101:
++         ok = gen_amand_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000110:
++         ok = gen_amor_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000111:
++         ok = gen_amor_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001000:
++         ok = gen_amxor_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001001:
++         ok = gen_amxor_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001010:
++         ok = gen_ammax_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001011:
++         ok = gen_ammax_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001100:
++         ok = gen_ammin_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001101:
++         ok = gen_ammin_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001110:
++         ok = gen_ammax_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001111:
++         ok = gen_ammax_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010000:
++         ok = gen_ammin_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010001:
++         ok = gen_ammin_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010010:
++         ok = gen_amswap_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010011:
++         ok = gen_amswap_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010100:
++         ok = gen_amadd_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010101:
++         ok = gen_amadd_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010110:
++         ok = gen_amand_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010111:
++         ok = gen_amand_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011000:
++         ok = gen_amor_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011001:
++         ok = gen_amor_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011010:
++         ok = gen_amxor_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011011:
++         ok = gen_amxor_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011100:
++         ok = gen_ammax_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011101:
++         ok = gen_ammax_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011110:
++         ok = gen_ammin_db_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011111:
++         ok = gen_ammin_db_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100000:
++         ok = gen_ammax_db_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100001:
++         ok = gen_ammax_db_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100010:
++         ok = gen_ammin_db_wu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100011:
++         ok = gen_ammin_db_du(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100100:
++         ok = gen_dbar(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1100101:
++         ok = gen_ibar(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101000:
++         ok = gen_fldgt_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101001:
++         ok = gen_fldgt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101010:
++         ok = gen_fldle_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101011:
++         ok = gen_fldle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101100:
++         ok = gen_fstgt_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101101:
++         ok = gen_fstgt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101110:
++         ok = gen_fstle_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1101111:
++         ok = gen_fstle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110000:
++         ok = gen_ldgt_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110001:
++         ok = gen_ldgt_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110010:
++         ok = gen_ldgt_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110011:
++         ok = gen_ldgt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110100:
++         ok = gen_ldle_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110101:
++         ok = gen_ldle_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110110:
++         ok = gen_ldle_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110111:
++         ok = gen_ldle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111000:
++         ok = gen_stgt_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111001:
++         ok = gen_stgt_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111010:
++         ok = gen_stgt_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111011:
++         ok = gen_stgt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111100:
++         ok = gen_stle_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111101:
++         ok = gen_stle_h(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111110:
++         ok = gen_stle_w(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1111111:
++         ok = gen_stle_d(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_FCMP_S ( DisResult* dres, UInt insn,
++                                              const VexArchInfo* archinfo,
++                                              const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (get_cond(insn)) {
++      case 0x0:
++         ok = gen_fcmp_caf_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x1:
++         ok = gen_fcmp_saf_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x2:
++         ok = gen_fcmp_clt_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x3:
++         ok = gen_fcmp_slt_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x4:
++         ok = gen_fcmp_ceq_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x5:
++         ok = gen_fcmp_seq_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x6:
++         ok = gen_fcmp_cle_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x7:
++         ok = gen_fcmp_sle_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x8:
++         ok = gen_fcmp_cun_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x9:
++         ok = gen_fcmp_sun_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xa:
++         ok = gen_fcmp_cult_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xb:
++         ok = gen_fcmp_sult_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xc:
++         ok = gen_fcmp_cueq_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xd:
++         ok = gen_fcmp_sueq_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xe:
++         ok = gen_fcmp_cule_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xf:
++         ok = gen_fcmp_sule_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x10:
++         ok = gen_fcmp_cne_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x11:
++         ok = gen_fcmp_sne_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x14:
++         ok = gen_fcmp_cor_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x15:
++         ok = gen_fcmp_sor_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x18:
++         ok = gen_fcmp_cune_s(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x19:
++         ok = gen_fcmp_sune_s(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_FCMP_D ( DisResult* dres, UInt insn,
++                                              const VexArchInfo* archinfo,
++                                              const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (get_cond(insn)) {
++      case 0x0:
++         ok = gen_fcmp_caf_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x1:
++         ok = gen_fcmp_saf_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x2:
++         ok = gen_fcmp_clt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x3:
++         ok = gen_fcmp_slt_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x4:
++         ok = gen_fcmp_ceq_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x5:
++         ok = gen_fcmp_seq_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x6:
++         ok = gen_fcmp_cle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x7:
++         ok = gen_fcmp_sle_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x8:
++         ok = gen_fcmp_cun_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x9:
++         ok = gen_fcmp_sun_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xa:
++         ok = gen_fcmp_cult_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xb:
++         ok = gen_fcmp_sult_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xc:
++         ok = gen_fcmp_cueq_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xd:
++         ok = gen_fcmp_sueq_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xe:
++         ok = gen_fcmp_cule_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0xf:
++         ok = gen_fcmp_sule_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x10:
++         ok = gen_fcmp_cne_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x11:
++         ok = gen_fcmp_sne_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x14:
++         ok = gen_fcmp_cor_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x15:
++         ok = gen_fcmp_sor_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x18:
++         ok = gen_fcmp_cune_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0x19:
++         ok = gen_fcmp_sune_d(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_00 ( DisResult* dres, UInt insn,
++                                          const VexArchInfo* archinfo,
++                                          const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 29, 26)) {
++      case 0b0000:
++         ok = disInstr_LOONGARCH64_WRK_00_0000(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010:
++         switch (SLICE(insn, 25, 20)) {
++            case 0b000001:
++               ok = gen_fmadd_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b000010:
++               ok = gen_fmadd_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b000101:
++               ok = gen_fmsub_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b000110:
++               ok = gen_fmsub_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b001001:
++               ok = gen_fnmadd_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b001010:
++               ok = gen_fnmadd_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b001101:
++               ok = gen_fnmsub_s(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b001110:
++               ok = gen_fnmsub_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0011:
++         switch (SLICE(insn, 25, 20)) {
++            case 0b000001:
++               if (SLICE(insn, 4, 3) == 0b00) {
++                  ok = disInstr_LOONGARCH64_WRK_FCMP_S(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            case 0b000010:
++               if (SLICE(insn, 4, 3) == 0b00) {
++                  ok = disInstr_LOONGARCH64_WRK_FCMP_D(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            case 0b010000:
++               if (SLICE(insn, 19, 18) == 0b00) {
++                  ok = gen_fsel(dres, insn, archinfo, abiinfo);
++               } else {
++                  ok = False;
++               }
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0100:
++         ok = gen_addu16i_d(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101:
++         if (SLICE(insn, 25, 25) == 0) {
++            ok = gen_lu12i_w(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = gen_lu32i_d(dres, insn, archinfo, abiinfo);
++         }
++         break;
++      case 0b0110:
++         if (SLICE(insn, 25, 25) == 0) {
++            ok = gen_pcaddi(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = gen_pcalau12i(dres, insn, archinfo, abiinfo);
++         }
++         break;
++      case 0b0111:
++         if (SLICE(insn, 25, 25) == 0) {
++            ok = gen_pcaddu12i(dres, insn, archinfo, abiinfo);
++         } else {
++            ok = gen_pcaddu18i(dres, insn, archinfo, abiinfo);
++         }
++         break;
++      case 0b1000:
++         switch (SLICE(insn, 25, 24)) {
++            case 0b00:
++               ok = gen_ll_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01:
++               ok = gen_sc_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10:
++               ok = gen_ll_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11:
++               ok = gen_sc_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b1001:
++         switch (SLICE(insn, 25, 24)) {
++            case 0b00:
++               ok = gen_ldptr_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01:
++               ok = gen_stptr_w(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b10:
++               ok = gen_ldptr_d(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b11:
++               ok = gen_stptr_d(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b1010:
++         ok = disInstr_LOONGARCH64_WRK_00_1010(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1110:
++         switch (SLICE(insn, 25, 22)) {
++            case 0b0000:
++               ok = disInstr_LOONGARCH64_WRK_00_1110_0000(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b0001:
++               ok = disInstr_LOONGARCH64_WRK_00_1110_0001(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK_01 ( DisResult* dres, UInt insn,
++                                          const VexArchInfo* archinfo,
++                                          const VexAbiInfo*  abiinfo )
++{
++   Bool ok;
++   switch (SLICE(insn, 29, 26)) {
++      case 0b0000:
++         ok = gen_beqz(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0001:
++         ok = gen_bnez(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0010:
++         switch (SLICE(insn, 9, 8)) {
++            case 0b00:
++               ok = gen_bceqz(dres, insn, archinfo, abiinfo);
++               break;
++            case 0b01:
++               ok = gen_bcnez(dres, insn, archinfo, abiinfo);
++               break;
++            default:
++               ok = False;
++               break;
++         }
++         break;
++      case 0b0011:
++         ok = gen_jirl(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0100:
++         ok = gen_b(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0101:
++         ok = gen_bl(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0110:
++         ok = gen_beq(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b0111:
++         ok = gen_bne(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1000:
++         ok = gen_blt(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1001:
++         ok = gen_bge(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1010:
++         ok = gen_bltu(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b1011:
++         ok = gen_bgeu(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++   return ok;
++}
++
++static Bool disInstr_LOONGARCH64_WRK ( /*MB_OUT*/DisResult* dres,
++                                       const UChar* guest_instr,
++                                       const VexArchInfo* archinfo,
++                                       const VexAbiInfo*  abiinfo,
++                                       Bool sigill_diag )
++{
++   /* Set result defaults. */
++   dres->whatNext    = Dis_Continue;
++   dres->len         = 4;
++   dres->jk_StopHere = Ijk_INVALID;
++   dres->hint        = Dis_HintNone;
++
++   /* At least this is simple on LOONGARCH64: insns are all 4 bytes long,
++      and 4-aligned.  So just fish the whole thing out of memory right now
++      and have done. */
++   UInt insn = getUInt(guest_instr);
++   DIP("\t0x%llx:\t0x%08x\t", (Addr64)guest_PC_curr_instr, insn);
++   vassert((guest_PC_curr_instr & 3ULL) == 0);
++
++   /* Spot "Special" instructions (see comment at top of file). */
++   Bool ok = disInstr_LOONGARCH64_WRK_special(dres, guest_instr);
++   if (ok)
++      return ok;
++
++   /* Main LOONGARCH64 instruction decoder starts here. */
++   switch (SLICE(insn, 31, 30)) {
++      case 0b00:
++         ok = disInstr_LOONGARCH64_WRK_00(dres, insn, archinfo, abiinfo);
++         break;
++      case 0b01:
++         ok = disInstr_LOONGARCH64_WRK_01(dres, insn, archinfo, abiinfo);
++         break;
++      default:
++         ok = False;
++         break;
++   }
++
++   /* If the next-level down decoders failed, make sure |dres| didn't
++      get changed. */
++   if (!ok) {
++      vassert(dres->whatNext    == Dis_Continue);
++      vassert(dres->len         == 4);
++      vassert(dres->jk_StopHere == Ijk_INVALID);
++   }
++   return ok;
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Top-level fn                                         ---*/
++/*------------------------------------------------------------*/
++
++/* Disassemble a single instruction into IR.  The instruction
++   is located in host memory at &guest_code[delta]. */
++
++DisResult disInstr_LOONGARCH64 ( IRSB*              irsb_IN,
++                                 const UChar*       guest_code_IN,
++                                 Long               delta_IN,
++                                 Addr               guest_IP,
++                                 VexArch            guest_arch,
++                                 const VexArchInfo* archinfo,
++                                 const VexAbiInfo*  abiinfo,
++                                 VexEndness         host_endness_IN,
++                                 Bool               sigill_diag_IN )
++{
++   DisResult dres;
++   vex_bzero(&dres, sizeof(dres));
++
++   /* Set globals (see top of this file) */
++   vassert(guest_arch == VexArchLOONGARCH64);
++
++   irsb                = irsb_IN;
++   host_endness        = host_endness_IN;
++   guest_PC_curr_instr = (Addr64)guest_IP;
++
++   /* Try to decode */
++   Bool ok = disInstr_LOONGARCH64_WRK(&dres,
++                                      &guest_code_IN[delta_IN],
++                                      archinfo, abiinfo, sigill_diag_IN);
++
++   if (ok) {
++      /* All decode successes end up here. */
++      vassert(dres.len == 4 || dres.len == 20);
++      switch (dres.whatNext) {
++         case Dis_Continue:
++            putPC(mkU64(dres.len + guest_PC_curr_instr));
++            break;
++         case Dis_StopHere:
++            break;
++         default:
++            vassert(0);
++            break;
++      }
++      DIP("\n");
++   } else {
++      /* All decode failures end up here. */
++      if (sigill_diag_IN) {
++         Int   i, j;
++         UChar buf[64];
++         UInt  insn = getUInt(&guest_code_IN[delta_IN]);
++         vex_bzero(buf, sizeof(buf));
++         for (i = j = 0; i < 32; i++) {
++            if (i > 0 && (i & 3) == 0)
++               buf[j++] = ' ';
++            buf[j++] = (insn & (1 << (31 - i))) ? '1' : '0';
++         }
++         vex_printf("disInstr(loongarch64): unhandled instruction 0x%08x\n", insn);
++         vex_printf("disInstr(loongarch64): %s\n", buf);
++      }
++
++      /* Tell the dispatcher that this insn cannot be decoded, and so
++         has not been executed, and (is currently) the next to be
++         executed.  PC should be up-to-date since it is made so at the
++         start of each insn, but nevertheless be paranoid and update
++         it again right now. */
++      putPC(mkU64(guest_PC_curr_instr));
++      dres.len         = 0;
++      dres.whatNext    = Dis_StopHere;
++      dres.jk_StopHere = Ijk_NoDecode;
++   }
++
++   return dres;
++}
++
++
++/*--------------------------------------------------------------------*/
++/*--- end                                 guest_loongarch64_toIR.c ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/VEX/priv/host_loongarch64_defs.c b/VEX/priv/host_loongarch64_defs.c
+new file mode 100644
+index 000000000..9825a5e16
+--- /dev/null
++++ b/VEX/priv/host_loongarch64_defs.c
+@@ -0,0 +1,3041 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                           host_loongarch64_defs.c ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#include "libvex_basictypes.h"
++#include "libvex.h"
++#include "libvex_trc_values.h"
++
++#include "main_util.h"
++#include "host_generic_regs.h"
++#include "host_loongarch64_defs.h"
++
++
++/* --------- Local helpers. --------- */
++
++static inline void mapReg ( HRegRemap* m, HReg* r )
++{
++   *r = lookupHRegRemap(m, *r);
++}
++
++static inline Int extend ( UInt imm, UInt size )
++{
++   UInt shift = 32 - size;
++   return (((Int)imm << shift) >> shift);
++}
++
++
++/* --------- Registers. --------- */
++
++const RRegUniverse* getRRegUniverse_LOONGARCH64 ( void )
++{
++   /* The real-register universe is a big constant, so we just want to
++      initialise it once. */
++   static RRegUniverse rRegUniverse_LOONGARCH64;
++   static Bool         rRegUniverse_LOONGARCH64_initted = False;
++
++   /* Handy shorthand, nothing more */
++   RRegUniverse* ru = &rRegUniverse_LOONGARCH64;
++
++   /* This isn't thread-safe.  Sigh. */
++   if (LIKELY(rRegUniverse_LOONGARCH64_initted == True))
++      return ru;
++
++   RRegUniverse__init(ru);
++
++   /* Add the registers.  The initial segment of this array must be
++      those available for allocation by reg-alloc, and those that
++      follow are not available for allocation. */
++   ru->allocable_start[HRcInt64] = ru->size;
++   ru->regs[ru->size++] = hregLOONGARCH64_R23();
++   ru->regs[ru->size++] = hregLOONGARCH64_R24();
++   ru->regs[ru->size++] = hregLOONGARCH64_R25();
++   ru->regs[ru->size++] = hregLOONGARCH64_R26();
++   ru->regs[ru->size++] = hregLOONGARCH64_R27();
++   ru->regs[ru->size++] = hregLOONGARCH64_R28();
++   ru->regs[ru->size++] = hregLOONGARCH64_R29();
++   ru->regs[ru->size++] = hregLOONGARCH64_R30();
++   // $r31 is used as guest stack pointer, not available to regalloc.
++
++   // $r12 is used as a chaining/ProfInc/Cmove/genSpill/genReload temporary
++   // $r13 is used as a ProfInc temporary
++   ru->regs[ru->size++] = hregLOONGARCH64_R14();
++   ru->regs[ru->size++] = hregLOONGARCH64_R15();
++   ru->regs[ru->size++] = hregLOONGARCH64_R16();
++   ru->regs[ru->size++] = hregLOONGARCH64_R17();
++   ru->regs[ru->size++] = hregLOONGARCH64_R18();
++   ru->regs[ru->size++] = hregLOONGARCH64_R19();
++   ru->regs[ru->size++] = hregLOONGARCH64_R20();
++   ru->allocable_end[HRcInt64] = ru->size - 1;
++
++   ru->allocable_start[HRcFlt64] = ru->size;
++   ru->regs[ru->size++] = hregLOONGARCH64_F24();
++   ru->regs[ru->size++] = hregLOONGARCH64_F25();
++   ru->regs[ru->size++] = hregLOONGARCH64_F26();
++   ru->regs[ru->size++] = hregLOONGARCH64_F27();
++   ru->regs[ru->size++] = hregLOONGARCH64_F28();
++   ru->regs[ru->size++] = hregLOONGARCH64_F29();
++   ru->regs[ru->size++] = hregLOONGARCH64_F30();
++   ru->regs[ru->size++] = hregLOONGARCH64_F31();
++   ru->allocable_end[HRcFlt64] = ru->size - 1;
++
++   ru->allocable = ru->size;
++
++   /* And other regs, not available to the allocator. */
++   ru->regs[ru->size++] = hregLOONGARCH64_R0();
++   ru->regs[ru->size++] = hregLOONGARCH64_R1();
++   ru->regs[ru->size++] = hregLOONGARCH64_R2();
++   ru->regs[ru->size++] = hregLOONGARCH64_R3();
++   ru->regs[ru->size++] = hregLOONGARCH64_R4();
++   ru->regs[ru->size++] = hregLOONGARCH64_R5();
++   ru->regs[ru->size++] = hregLOONGARCH64_R6();
++   ru->regs[ru->size++] = hregLOONGARCH64_R7();
++   ru->regs[ru->size++] = hregLOONGARCH64_R8();
++   ru->regs[ru->size++] = hregLOONGARCH64_R9();
++   ru->regs[ru->size++] = hregLOONGARCH64_R10();
++   ru->regs[ru->size++] = hregLOONGARCH64_R11();
++   ru->regs[ru->size++] = hregLOONGARCH64_R12();
++   ru->regs[ru->size++] = hregLOONGARCH64_R13();
++   ru->regs[ru->size++] = hregLOONGARCH64_R21();
++   ru->regs[ru->size++] = hregLOONGARCH64_R22();
++   ru->regs[ru->size++] = hregLOONGARCH64_R31();
++   ru->regs[ru->size++] = hregLOONGARCH64_FCSR3();
++
++   rRegUniverse_LOONGARCH64_initted = True;
++
++   RRegUniverse__check_is_sane(ru);
++   return ru;
++}
++
++UInt ppHRegLOONGARCH64 ( HReg reg )
++{
++   Int r;
++   Int ret = 0;
++   static const HChar* ireg_names[32] = {
++      "$zero",
++      "$ra",
++      "$tp",
++      "$sp",
++      "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7",
++      "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",
++      "$r21", /* Reserved */
++      "$fp",
++      "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", "$s8"
++   };
++   static const HChar* freg_names[32] = {
++      "$fa0",  "$fa1",  "$fa2",  "$fa3",  "$fa4",  "$fa5",  "$fa6",  "$fa7",
++      "$ft0",  "$ft1",  "$ft2",  "$ft3",  "$ft4",  "$ft5",  "$ft6",  "$ft7",
++      "$ft8",  "$ft9",  "$ft10", "$ft11", "$ft12", "$ft13", "$ft14", "$ft15",
++      "$fs0",  "$fs1",  "$fs2",  "$fs3",  "$fs4",  "$fs5",  "$fs6",  "$fs7"
++   };
++
++   /* Be generic for all virtual regs. */
++   if (hregIsVirtual(reg)) {
++      return ppHReg(reg);
++   }
++
++   /* But specific for real regs. */
++   switch (hregClass(reg)) {
++      case HRcInt32:
++         r = hregEncoding(reg);
++         vassert(r < 4);
++         ret = vex_printf("$fcsr%d", r);
++         break;
++      case HRcInt64:
++         r = hregEncoding(reg);
++         vassert(r < 32);
++         ret = vex_printf("%s", ireg_names[r]);
++         break;
++      case HRcFlt64:
++         r = hregEncoding(reg);
++         vassert(r < 32);
++         ret = vex_printf("%s", freg_names[r]);
++         break;
++      default:
++         vpanic("ppHRegLOONGARCH64");
++         break;
++   }
++
++   return ret;
++}
++
++
++/* --------- Condition codes, LOONGARCH64 encoding. --------- */
++
++static inline const HChar* showLOONGARCH64CondCode ( LOONGARCH64CondCode cond )
++{
++   const HChar* ret;
++   switch (cond) {
++      case LAcc_EQ:
++         ret = "eq";  /* equal */
++         break;
++      case LAcc_NE:
++         ret = "ne";  /* not equal */
++         break;
++      case LAcc_LT:
++         ret = "lt";  /* less than (signed) */
++         break;
++      case LAcc_GE:
++         ret = "ge";  /* great equal (signed) */
++         break;
++      case LAcc_LTU:
++         ret = "ltu"; /* less than (unsigned) */
++         break;
++      case LAcc_GEU:
++         ret = "geu"; /* great equal (unsigned) */
++         break;
++      case LAcc_AL:
++         ret = "al";  /* always (unconditional) */
++         break;
++      default:
++         vpanic("showLOONGARCH64CondCode");
++         break;
++   }
++   return ret;
++}
++
++
++/* --------- Memory address expressions (amodes). --------- */
++
++LOONGARCH64AMode* LOONGARCH64AMode_RI ( HReg reg, UShort imm )
++{
++   LOONGARCH64AMode* am = LibVEX_Alloc_inline(sizeof(LOONGARCH64AMode));
++   am->tag = LAam_RI;
++   am->LAam.RI.base = reg;
++   am->LAam.RI.index = imm;
++   return am;
++}
++
++LOONGARCH64AMode* LOONGARCH64AMode_RR ( HReg base, HReg index )
++{
++   LOONGARCH64AMode* am = LibVEX_Alloc_inline(sizeof(LOONGARCH64AMode));
++   am->tag = LAam_RR;
++   am->LAam.RR.base = base;
++   am->LAam.RR.index = index;
++   return am;
++}
++
++static inline void ppLOONGARCH64AMode ( LOONGARCH64AMode* am )
++{
++   switch (am->tag) {
++      case LAam_RI:
++         ppHRegLOONGARCH64(am->LAam.RI.base);
++         vex_printf(", ");
++         vex_printf("%d", extend((UInt)am->LAam.RI.index, 12));
++         break;
++      case LAam_RR:
++         ppHRegLOONGARCH64(am->LAam.RR.base);
++         vex_printf(", ");
++         ppHRegLOONGARCH64(am->LAam.RR.index);
++         break;
++      default:
++         vpanic("ppLOONGARCH64AMode");
++         break;
++   }
++}
++
++static inline void addRegUsage_LOONGARCH64AMode( HRegUsage* u,
++                                                 LOONGARCH64AMode* am )
++{
++   switch (am->tag) {
++      case LAam_RI:
++         addHRegUse(u, HRmRead, am->LAam.RI.base);
++         break;
++      case LAam_RR:
++         addHRegUse(u, HRmRead, am->LAam.RR.base);
++         addHRegUse(u, HRmRead, am->LAam.RR.index);
++         break;
++      default:
++         vpanic("addRegUsage_LOONGARCH64AMode");
++         break;
++   }
++}
++
++static inline void mapRegs_LOONGARCH64AMode( HRegRemap* m,
++                                             LOONGARCH64AMode* am )
++{
++   switch (am->tag) {
++      case LAam_RI:
++         mapReg(m, &am->LAam.RI.base);
++         break;
++      case LAam_RR:
++         mapReg(m, &am->LAam.RR.base);
++         mapReg(m, &am->LAam.RR.index);
++         break;
++      default:
++         vpanic("mapRegs_LOONGARCH64AMode");
++         break;
++   }
++}
++
++
++/* --------- Operand, which can be reg or imm. --------- */
++
++LOONGARCH64RI* LOONGARCH64RI_R ( HReg reg )
++{
++   LOONGARCH64RI* op = LibVEX_Alloc_inline(sizeof(LOONGARCH64RI));
++   op->tag = LAri_Reg;
++   op->LAri.R.reg = reg;
++   return op;
++}
++
++LOONGARCH64RI* LOONGARCH64RI_I ( UShort imm, UChar size, Bool isSigned )
++{
++   LOONGARCH64RI* op = LibVEX_Alloc_inline(sizeof(LOONGARCH64RI));
++   op->tag = LAri_Imm;
++   op->LAri.I.imm = imm;
++   op->LAri.I.size = size;
++   op->LAri.I.isSigned = isSigned;
++   vassert(imm < (1 << size));
++   vassert(size == 5 || size == 6 || size == 12);
++   return op;
++}
++
++static inline void ppLOONGARCH64RI ( LOONGARCH64RI* ri )
++{
++   switch (ri->tag) {
++      case LAri_Reg:
++         ppHRegLOONGARCH64(ri->LAri.R.reg);
++         break;
++      case LAri_Imm:
++         if (ri->LAri.I.isSigned) {
++            vex_printf("%d", extend((UInt)ri->LAri.I.imm, ri->LAri.I.size));
++         } else {
++            vex_printf("%u", (UInt)ri->LAri.I.imm);
++         }
++         break;
++      default:
++         vpanic("ppLOONGARCH64RI");
++         break;
++   }
++}
++
++static inline void addRegUsage_LOONGARCH64RI( HRegUsage* u, LOONGARCH64RI* ri )
++{
++   switch (ri->tag) {
++      case LAri_Reg:
++         addHRegUse(u, HRmRead, ri->LAri.R.reg);
++         break;
++      case LAri_Imm:
++         break;
++      default:
++         vpanic("addRegUsage_LOONGARCH64RI");
++         break;
++   }
++}
++
++static inline void mapRegs_LOONGARCH64RI( HRegRemap* m, LOONGARCH64RI* ri )
++{
++   switch (ri->tag) {
++      case LAri_Reg:
++         mapReg(m, &ri->LAri.R.reg);
++         break;
++      case LAri_Imm:
++         break;
++      default:
++         vpanic("mapRegs_LOONGARCH64RI");
++         break;
++   }
++}
++
++
++/* --------- Instructions. --------- */
++
++static inline const HChar* showLOONGARCH64UnOp ( LOONGARCH64UnOp op )
++{
++   switch (op) {
++      case LAun_CLZ_W:
++         return "clz.w";
++      case LAun_CTZ_W:
++         return "ctz.w";
++      case LAun_CLZ_D:
++         return "clz.d";
++      case LAun_CTZ_D:
++         return "ctz.w";
++      case LAun_EXT_W_H:
++         return "ext.w.h";
++      case LAun_EXT_W_B:
++         return "ext.w.b";
++      default:
++         vpanic("showLOONGARCH64UnOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64BinOp ( LOONGARCH64BinOp op )
++{
++   switch (op) {
++      case LAbin_ADD_W:
++         return "add.w";
++      case LAbin_ADD_D:
++         return "add.d";
++      case LAbin_SUB_W:
++         return "sub.w";
++      case LAbin_SUB_D:
++         return "sub.d";
++      case LAbin_NOR:
++         return "nor";
++      case LAbin_AND:
++         return "and";
++      case LAbin_OR:
++         return "or";
++      case LAbin_XOR:
++         return "xor";
++      case LAbin_SLL_W:
++         return "sll.w";
++      case LAbin_SRL_W:
++         return "srl.w";
++      case LAbin_SRA_W:
++         return "sra.w";
++      case LAbin_SLL_D:
++         return "sll.d";
++      case LAbin_SRL_D:
++         return "srl.d";
++      case LAbin_SRA_D:
++         return "sra.d";
++      case LAbin_MUL_W:
++         return "mul.w";
++      case LAbin_MUL_D:
++         return "mul.d";
++      case LAbin_MULH_W:
++         return "mulh.w";
++      case LAbin_MULH_WU:
++         return "mulh.wu";
++      case LAbin_MULH_D:
++         return "mulh.d";
++      case LAbin_MULH_DU:
++         return "mulh.du";
++      case LAbin_MULW_D_W:
++         return "mulw.d.w";
++      case LAbin_MULW_D_WU:
++         return "mulw.d.wu";
++      case LAbin_DIV_W:
++         return "div.w";
++      case LAbin_MOD_W:
++         return "mod.w";
++      case LAbin_DIV_WU:
++         return "div.wu";
++      case LAbin_MOD_WU:
++         return "mod.wu";
++      case LAbin_DIV_D:
++         return "div.d";
++      case LAbin_MOD_D:
++         return "mod.d";
++      case LAbin_DIV_DU:
++         return "div.du";
++      case LAbin_MOD_DU:
++         return "mod.du";
++      case LAbin_SLLI_W:
++         return "slli.w";
++      case LAbin_SLLI_D:
++         return "slli.d";
++      case LAbin_SRLI_W:
++         return "srli.w";
++      case LAbin_SRLI_D:
++         return "srli.d";
++      case LAbin_SRAI_W:
++         return "srai.w";
++      case LAbin_SRAI_D:
++         return "srai.d";
++      case LAbin_ADDI_W:
++         return "addi.w";
++      case LAbin_ADDI_D:
++         return "addi.d";
++      case LAbin_ANDI:
++         return "andi";
++      case LAbin_ORI:
++         return "ori";
++      case LAbin_XORI:
++         return "xori";
++      default:
++         vpanic("showLOONGARCH64BinOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64LoadOp ( LOONGARCH64LoadOp op )
++{
++   switch (op) {
++      case LAload_LD_D:
++         return "ld.d";
++      case LAload_LD_BU:
++         return "ld.bu";
++      case LAload_LD_HU:
++         return "ld.hu";
++      case LAload_LD_WU:
++         return "ld.wu";
++      case LAload_LDX_D:
++         return "ldx.d";
++      case LAload_LDX_BU:
++         return "ldx.bu";
++      case LAload_LDX_HU:
++         return "ldx.hu";
++      case LAload_LDX_WU:
++         return "ldx.wu";
++      default:
++         vpanic("LOONGARCH64LoadOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64StoreOp ( LOONGARCH64StoreOp op )
++{
++   switch (op) {
++      case LAstore_ST_B:
++         return "st.b";
++      case LAstore_ST_H:
++         return "st.h";
++      case LAstore_ST_W:
++         return "st.w";
++      case LAstore_ST_D:
++         return "st.d";
++      case LAstore_STX_B:
++         return "stx.b";
++      case LAstore_STX_H:
++         return "stx.h";
++      case LAstore_STX_W:
++         return "stx.w";
++      case LAstore_STX_D:
++         return "stx.d";
++      default:
++         vpanic("LOONGARCH64StoreOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64LLSCOp ( LOONGARCH64LLSCOp op )
++{
++   switch (op) {
++      case LAllsc_LL_W:
++         return "ll.w";
++      case LAllsc_SC_W:
++         return "sc.w";
++      case LAllsc_LL_D:
++         return "ll.d";
++      case LAllsc_SC_D:
++         return "sc.d";
++      default:
++         vpanic("LOONGARCH64LLSCOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64BarOp ( LOONGARCH64BarOp op )
++{
++   const HChar* ret;
++   switch (op) {
++      case LAbar_DBAR:
++         return "dbar";
++      case LAbar_IBAR:
++         return "ibar";
++      default:
++         vpanic("showLOONGARCH64BarOp");
++         break;
++   }
++   return ret;
++}
++
++static inline const HChar* showLOONGARCH64FpUnOp ( LOONGARCH64FpUnOp op )
++{
++   const HChar* ret;
++   switch (op) {
++      case LAfpun_FABS_S:
++         return "fabs.s";
++      case LAfpun_FABS_D:
++         return "fabs.d";
++      case LAfpun_FNEG_S:
++         return "fneg.s";
++      case LAfpun_FNEG_D:
++         return "fneg.d";
++      case LAfpun_FLOGB_S:
++         return "flogb.s";
++      case LAfpun_FLOGB_D:
++         return "flogb.d";
++      case LAfpun_FSQRT_S:
++         return "fsqrt.s";
++      case LAfpun_FSQRT_D:
++         return "fsqrt.d";
++      case LAfpun_FRSQRT_S:
++         return "frsqrt.s";
++      case LAfpun_FRSQRT_D:
++         return "frsqrt.d";
++      case LAfpun_FCVT_S_D:
++         return "fcvt.s.d";
++      case LAfpun_FCVT_D_S:
++         return "fcvt.d.s";
++      case LAfpun_FTINT_W_S:
++         return "ftint.w.s";
++      case LAfpun_FTINT_W_D:
++         return "ftint.w.d";
++      case LAfpun_FTINT_L_S:
++         return "ftint.l.s";
++      case LAfpun_FTINT_L_D:
++         return "ftint.l.d";
++      case LAfpun_FFINT_S_W:
++         return "ffint.s.w";
++      case LAfpun_FFINT_S_L:
++         return "ffint.s.l";
++      case LAfpun_FFINT_D_W:
++         return "ffint.d.w";
++      case LAfpun_FFINT_D_L:
++         return "ffint.d.l";
++      case LAfpun_FRINT_S:
++         return "frint.s";
++      case LAfpun_FRINT_D:
++         return "frint.d";
++      default:
++         vpanic("showLOONGARCH64FpUnOp");
++         break;
++   }
++   return ret;
++}
++
++static inline const HChar* showLOONGARCH64FpBinOp ( LOONGARCH64FpBinOp op )
++{
++   const HChar* ret;
++   switch (op) {
++      case LAfpbin_FADD_S:
++         return "fadd.s";
++      case LAfpbin_FADD_D:
++         return "fadd.d";
++      case LAfpbin_FSUB_S:
++         return "fsub.s";
++      case LAfpbin_FSUB_D:
++         return "fsub.d";
++      case LAfpbin_FMUL_S:
++         return "fmul.s";
++      case LAfpbin_FMUL_D:
++         return "fmul.d";
++      case LAfpbin_FDIV_S:
++         return "fdiv.s";
++      case LAfpbin_FDIV_D:
++         return "fdiv.d";
++      case LAfpbin_FMAX_S:
++         return "fmax.s";
++      case LAfpbin_FMAX_D:
++         return "fmax.d";
++      case LAfpbin_FMIN_S:
++         return "fmin.s";
++      case LAfpbin_FMIN_D:
++         return "fmin.d";
++      case LAfpbin_FMAXA_S:
++         return "fmaxa.s";
++      case LAfpbin_FMAXA_D:
++         return "fmaxa.d";
++      case LAfpbin_FMINA_S:
++         return "fmina.s";
++      case LAfpbin_FMINA_D:
++         return "fmina.d";
++      case LAfpbin_FSCALEB_S:
++         return "fscaleb.s";
++      case LAfpbin_FSCALEB_D:
++         return "fscaleb.d";
++      default:
++         vpanic("showLOONGARCH64FpBinOp");
++         break;
++   }
++   return ret;
++}
++
++static inline const HChar* showLOONGARCH64FpTriOp ( LOONGARCH64FpTriOp op )
++{
++   const HChar* ret;
++   switch (op) {
++      case LAfpbin_FMADD_S:
++         return "fmadd.s";
++      case LAfpbin_FMADD_D:
++         return "fmadd.d";
++      case LAfpbin_FMSUB_S:
++         return "fmsub.s";
++      case LAfpbin_FMSUB_D:
++         return "fmsub.d";
++      default:
++         vpanic("showLOONGARCH64FpTriOp");
++         break;
++   }
++   return ret;
++}
++
++static inline const HChar* showLOONGARCH64FpLoadOp ( LOONGARCH64FpLoadOp op )
++{
++   switch (op) {
++      case LAfpload_FLD_S:
++         return "fld.s";
++      case LAfpload_FLD_D:
++         return "fld.d";
++      case LAfpload_FLDX_S:
++         return "fldx.s";
++      case LAfpload_FLDX_D:
++         return "fldx.d";
++      default:
++         vpanic("LOONGARCH64FpLoadOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64FpStoreOp ( LOONGARCH64FpStoreOp op )
++{
++   switch (op) {
++      case LAfpstore_FST_S:
++         return "fst.s";
++      case LAfpstore_FST_D:
++         return "fst.d";
++      case LAfpstore_FSTX_S:
++         return "fstx.s";
++      case LAfpstore_FSTX_D:
++         return "fstx.d";
++      default:
++         vpanic("LOONGARCH64FpStoreOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64FpMoveOp ( LOONGARCH64FpMoveOp op )
++{
++   switch (op) {
++      case LAfpmove_FMOV_S:
++         return "fmov.s";
++      case LAfpmove_FMOV_D:
++         return "fmov.d";
++      case LAfpmove_MOVGR2FR_W:
++         return "movgr2fr.w";
++      case LAfpmove_MOVGR2FR_D:
++         return "movgr2fr.d";
++      case LAfpmove_MOVFR2GR_S:
++         return "movfr2gr.s";
++      case LAfpmove_MOVFR2GR_D:
++         return "movfr2gr.d";
++      case LAfpmove_MOVGR2FCSR:
++         return "movgr2fcsr";
++      case LAfpmove_MOVFCSR2GR:
++         return "movfcsr2gr";
++      default:
++         vpanic("showLOONGARCH64FpMoveOp");
++         break;
++   }
++}
++
++static inline const HChar* showLOONGARCH64FpCmpOp ( LOONGARCH64FpCmpOp op )
++{
++   const HChar* ret;
++   switch (op) {
++      case LAfpcmp_FCMP_CLT_S:
++         return "fcmp.clt.s";
++      case LAfpcmp_FCMP_CLT_D:
++         return "fcmp.clt.d";
++      case LAfpcmp_FCMP_CEQ_S:
++         return "fcmp.ceq.s";
++      case LAfpcmp_FCMP_CEQ_D:
++         return "fcmp.ceq.d";
++      case LAfpcmp_FCMP_CUN_S:
++         return "fcmp.cun.s";
++      case LAfpcmp_FCMP_CUN_D:
++         return "fcmp.cun.d";
++      default:
++         vpanic("showLOONGARCH64FpCmpOp");
++         break;
++   }
++   return ret;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_LI ( ULong imm, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_LI;
++   i->LAin.LI.imm      = imm;
++   i->LAin.LI.dst      = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Unary ( LOONGARCH64UnOp op,
++                                           HReg src, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Un;
++   i->LAin.Unary.op    = op;
++   i->LAin.Unary.src   = src;
++   i->LAin.Unary.dst   = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Binary ( LOONGARCH64BinOp op,
++                                            LOONGARCH64RI* src2,
++                                            HReg src1, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Bin;
++   i->LAin.Binary.op   = op;
++   i->LAin.Binary.src2 = src2;
++   i->LAin.Binary.src1 = src1;
++   i->LAin.Binary.dst  = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Load ( LOONGARCH64LoadOp op,
++                                          LOONGARCH64AMode* src, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Load;
++   i->LAin.Load.op     = op;
++   i->LAin.Load.src    = src;
++   i->LAin.Load.dst    = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Store ( LOONGARCH64StoreOp op,
++                                           LOONGARCH64AMode* dst, HReg src )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Store;
++   i->LAin.Store.op    = op;
++   i->LAin.Store.dst   = dst;
++   i->LAin.Store.src   = src;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_LLSC ( LOONGARCH64LLSCOp op, Bool isLoad,
++                                          LOONGARCH64AMode* addr, HReg val )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_LLSC;
++   i->LAin.LLSC.op     = op;
++   i->LAin.LLSC.isLoad = isLoad;
++   i->LAin.LLSC.addr   = addr;
++   i->LAin.LLSC.val    = val;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Bar ( LOONGARCH64BarOp op, UShort hint )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Bar;
++   i->LAin.Bar.op      = op;
++   i->LAin.Bar.hint    = hint;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpUnary ( LOONGARCH64FpUnOp op,
++                                             HReg src, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_FpUn;
++   i->LAin.FpUnary.op  = op;
++   i->LAin.FpUnary.src = src;
++   i->LAin.FpUnary.dst = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpBinary ( LOONGARCH64FpBinOp op, HReg src2,
++                                              HReg src1, HReg dst )
++{
++   LOONGARCH64Instr* i   = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                = LAin_FpBin;
++   i->LAin.FpBinary.op   = op;
++   i->LAin.FpBinary.src2 = src2;
++   i->LAin.FpBinary.src1 = src1;
++   i->LAin.FpBinary.dst  = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpTrinary ( LOONGARCH64FpTriOp op,
++                                               HReg src3, HReg src2,
++                                               HReg src1, HReg dst )
++{
++   LOONGARCH64Instr* i    = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                 = LAin_FpTri;
++   i->LAin.FpTrinary.op   = op;
++   i->LAin.FpTrinary.src3 = src3;
++   i->LAin.FpTrinary.src2 = src2;
++   i->LAin.FpTrinary.src1 = src1;
++   i->LAin.FpTrinary.dst  = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpLoad ( LOONGARCH64FpLoadOp op,
++                                            LOONGARCH64AMode* src, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_FpLoad;
++   i->LAin.FpLoad.op   = op;
++   i->LAin.FpLoad.src  = src;
++   i->LAin.FpLoad.dst  = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpStore ( LOONGARCH64FpStoreOp op,
++                                             LOONGARCH64AMode* dst, HReg src )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_FpStore;
++   i->LAin.FpStore.op  = op;
++   i->LAin.FpStore.dst = dst;
++   i->LAin.FpStore.src = src;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpMove ( LOONGARCH64FpMoveOp op,
++                                            HReg src, HReg dst )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_FpMove;
++   i->LAin.FpMove.op   = op;
++   i->LAin.FpMove.src  = src;
++   i->LAin.FpMove.dst  = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_FpCmp ( LOONGARCH64FpCmpOp op, HReg src2,
++                                           HReg src1, HReg dst )
++{
++   LOONGARCH64Instr* i   = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                = LAin_FpCmp;
++   i->LAin.FpCmp.op      = op;
++   i->LAin.FpCmp.src2    = src2;
++   i->LAin.FpCmp.src1    = src1;
++   i->LAin.FpCmp.dst     = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Cas ( HReg old, HReg addr, HReg expd,
++                                         HReg data, Bool size64 )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_Cas;
++   i->LAin.Cas.old     = old;
++   i->LAin.Cas.addr    = addr;
++   i->LAin.Cas.expd    = expd;
++   i->LAin.Cas.data    = data;
++   i->LAin.Cas.size64  = size64;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Cmp ( LOONGARCH64CondCode cond,
++                                         HReg src2, HReg src1, HReg dst )
++{
++   LOONGARCH64Instr* i  = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag               = LAin_Cmp;
++   i->LAin.Cmp.cond     = cond;
++   i->LAin.Cmp.src2     = src2;
++   i->LAin.Cmp.src1     = src1;
++   i->LAin.Cmp.dst      = dst;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_CMove ( HReg cond, HReg r0, HReg r1,
++                                           HReg dst, Bool isInt )
++{
++   LOONGARCH64Instr* i  = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag               = LAin_CMove;
++   i->LAin.CMove.cond   = cond;
++   i->LAin.CMove.r0     = r0;
++   i->LAin.CMove.r1     = r1;
++   i->LAin.CMove.dst    = dst;
++   i->LAin.CMove.isInt  = isInt;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_Call ( HReg cond, Addr64 target,
++                                          UInt nArgRegs, RetLoc rloc )
++{
++   LOONGARCH64Instr* i   = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                = LAin_Call;
++   i->LAin.Call.cond     = cond;
++   i->LAin.Call.target   = target;
++   i->LAin.Call.nArgRegs = nArgRegs;
++   i->LAin.Call.rloc     = rloc;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_XDirect ( Addr64 dstGA,
++                                             LOONGARCH64AMode* amPC,
++                                             HReg cond, Bool toFastEP )
++{
++   LOONGARCH64Instr* i      = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                   = LAin_XDirect;
++   i->LAin.XDirect.dstGA    = dstGA;
++   i->LAin.XDirect.amPC     = amPC;
++   i->LAin.XDirect.cond     = cond;
++   i->LAin.XDirect.toFastEP = toFastEP;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_XIndir ( HReg dstGA, LOONGARCH64AMode* amPC,
++                                            HReg cond )
++{
++   LOONGARCH64Instr* i  = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag               = LAin_XIndir;
++   i->LAin.XIndir.dstGA = dstGA;
++   i->LAin.XIndir.amPC  = amPC;
++   i->LAin.XIndir.cond  = cond;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_XAssisted ( HReg dstGA,
++                                               LOONGARCH64AMode* amPC,
++                                               HReg cond, IRJumpKind jk )
++{
++   LOONGARCH64Instr* i     = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                  = LAin_XAssisted;
++   i->LAin.XAssisted.dstGA = dstGA;
++   i->LAin.XAssisted.amPC  = amPC;
++   i->LAin.XAssisted.cond  = cond;
++   i->LAin.XAssisted.jk    = jk;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_EvCheck ( LOONGARCH64AMode* amCounter,
++                                             LOONGARCH64AMode* amFailAddr )
++{
++   LOONGARCH64Instr* i        = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag                     = LAin_EvCheck;
++   i->LAin.EvCheck.amCounter  = amCounter;
++   i->LAin.EvCheck.amFailAddr = amFailAddr;
++   return i;
++}
++
++LOONGARCH64Instr* LOONGARCH64Instr_ProfInc ( void )
++{
++   LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr));
++   i->tag              = LAin_ProfInc;
++   return i;
++}
++
++
++/* -------- Pretty Print instructions ------------- */
++
++static inline void ppLI ( ULong imm, HReg dst )
++{
++   vex_printf("li ");
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", 0x%llx", imm);
++}
++
++static inline void ppUnary ( LOONGARCH64UnOp op, HReg src, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64UnOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src);
++}
++
++static inline void ppBinary ( LOONGARCH64BinOp op, LOONGARCH64RI* src2,
++                              HReg src1, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64BinOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src1);
++   vex_printf(", ");
++   ppLOONGARCH64RI(src2);
++}
++
++static inline void ppLoad ( LOONGARCH64LoadOp op, LOONGARCH64AMode* src,
++                            HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64LoadOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(src);
++}
++
++static inline void ppStore ( LOONGARCH64StoreOp op, LOONGARCH64AMode* dst,
++                             HReg src )
++{
++   vex_printf("%s ", showLOONGARCH64StoreOp(op));
++   ppHRegLOONGARCH64(src);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(dst);
++}
++
++static inline void ppLLSC ( LOONGARCH64LLSCOp op, LOONGARCH64AMode* addr,
++                            HReg val )
++{
++   vex_printf("%s ", showLOONGARCH64LLSCOp(op));
++   ppHRegLOONGARCH64(val);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(addr);
++}
++
++static inline void ppBar ( LOONGARCH64BarOp op, UShort hint )
++{
++   vex_printf("%s %u", showLOONGARCH64BarOp(op), (UInt)hint);
++}
++
++static inline void ppFpUnary ( LOONGARCH64FpUnOp op, HReg src, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpUnOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src);
++}
++
++static inline void ppFpBinary ( LOONGARCH64FpBinOp op, HReg src2,
++                                HReg src1, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpBinOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src1);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src2);
++}
++
++static inline void ppFpTrinary ( LOONGARCH64FpTriOp op, HReg src3,
++                                HReg src2, HReg src1, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpTriOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src1);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src2);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src3);
++}
++
++static inline void ppFpLoad ( LOONGARCH64FpLoadOp op, LOONGARCH64AMode* src,
++                              HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpLoadOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(src);
++}
++
++static inline void ppFpStore ( LOONGARCH64FpStoreOp op, LOONGARCH64AMode* dst,
++                               HReg src )
++{
++   vex_printf("%s ", showLOONGARCH64FpStoreOp(op));
++   ppHRegLOONGARCH64(src);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(dst);
++}
++
++static inline void ppFpMove ( LOONGARCH64FpMoveOp op, HReg src, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpMoveOp(op));
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src);
++}
++
++static inline void ppFpCmp ( LOONGARCH64FpCmpOp op, HReg src2,
++                             HReg src1, HReg dst )
++{
++   vex_printf("%s ", showLOONGARCH64FpCmpOp(op));
++   vex_printf("$fcc0, ");
++   ppHRegLOONGARCH64(src1);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src2);
++   vex_printf("; movcf2gr ");
++   ppHRegLOONGARCH64(dst);
++   vex_printf(", $fcc0");
++}
++
++static inline void ppCas ( HReg old, HReg addr, HReg expd,
++                           HReg data, Bool size64)
++{
++   ppHRegLOONGARCH64(old);
++   vex_printf(" = cas(%dbit)(", size64 ? 64 : 32);
++   ppHRegLOONGARCH64(expd);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(data);
++   vex_printf(" -> ");
++   ppHRegLOONGARCH64(addr);
++   vex_printf(")");
++}
++
++static inline void ppCmp ( LOONGARCH64CondCode cond, HReg src2,
++                           HReg src1, HReg dst )
++{
++   ppHRegLOONGARCH64(dst);
++   vex_printf(" = cmp%s(", showLOONGARCH64CondCode(cond));
++   ppHRegLOONGARCH64(src1);
++   vex_printf(", ");
++   ppHRegLOONGARCH64(src2);
++   vex_printf(")");
++}
++
++static inline void ppCMove ( HReg cond, HReg r0, HReg r1,
++                             HReg dst, Bool isInt )
++{
++   if (isInt) {
++      vex_printf("masknez $t0, ");
++      ppHRegLOONGARCH64(r0);
++      vex_printf(", ");
++      ppHRegLOONGARCH64(cond);
++      vex_printf("; maskeqz ");
++      ppHRegLOONGARCH64(dst);
++      vex_printf(", ");
++      ppHRegLOONGARCH64(r1);
++      vex_printf(", ");
++      ppHRegLOONGARCH64(cond);
++      vex_printf("; or ");
++      ppHRegLOONGARCH64(dst);
++      vex_printf(", $t0, ");
++      ppHRegLOONGARCH64(dst);
++   } else {
++      vex_printf("movgr2cf ");
++      ppHRegLOONGARCH64(cond);
++      vex_printf(", $fcc0; fsel ");
++      ppHRegLOONGARCH64(dst);
++      vex_printf(", ");
++      ppHRegLOONGARCH64(r0);
++      vex_printf(", ");
++      ppHRegLOONGARCH64(r1);
++      vex_printf(", $fcc0");
++   }
++}
++
++static inline void ppCall ( HReg cond, Addr64 target,
++                            UInt nArgRegs, RetLoc rloc )
++{
++   if (!hregIsInvalid(cond)) {
++      vex_printf("if (");
++      ppHRegLOONGARCH64(cond);
++      vex_printf(") { ");
++   }
++   vex_printf("call 0x%llx [nArgRegs=%u, ", target, nArgRegs);
++   ppRetLoc(rloc);
++   vex_printf("]");
++   if (!hregIsInvalid(cond))
++      vex_printf(" }");
++}
++
++static inline void ppXDirect ( Addr64 dstGA, LOONGARCH64AMode* amPC,
++                               HReg cond, Bool toFastEP )
++{
++   vex_printf("(xDirect) ");
++   if (!hregIsInvalid(cond)) {
++      vex_printf("if (");
++      ppHRegLOONGARCH64(cond);
++      vex_printf(") { ");
++   }
++   vex_printf("li $t0, 0x%llx; ", (ULong)dstGA);
++   vex_printf("st.w $t0, ");
++   ppLOONGARCH64AMode(amPC);
++   vex_printf("; li $t0, $disp_cp_chain_me_to_%sEP; ",
++              toFastEP ? "fast" : "slow");
++   vex_printf("jirl $ra, $t0, 0");
++   if (!hregIsInvalid(cond))
++      vex_printf(" }");
++}
++
++static inline void ppXIndir ( HReg dstGA, LOONGARCH64AMode* amPC,
++                              HReg cond )
++{
++   vex_printf("(xIndir) ");
++   if (!hregIsInvalid(cond)) {
++      vex_printf("if (");
++      ppHRegLOONGARCH64(cond);
++      vex_printf(") { ");
++   }
++   vex_printf("st.w ");
++   ppHRegLOONGARCH64(dstGA);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(amPC);
++   vex_printf("; la $t0, disp_indir; ");
++   vex_printf("jirl $ra, $t0, 0");
++   if (!hregIsInvalid(cond))
++      vex_printf(" }");
++}
++
++static inline void ppXAssisted ( HReg dstGA, LOONGARCH64AMode* amPC,
++                                 HReg cond, IRJumpKind jk)
++{
++   vex_printf("(xAssisted) ");
++   if (!hregIsInvalid(cond)) {
++      vex_printf("if (");
++      ppHRegLOONGARCH64(cond);
++      vex_printf(") { ");
++   }
++   vex_printf("st.w ");
++   ppHRegLOONGARCH64(dstGA);
++   vex_printf(", ");
++   ppLOONGARCH64AMode(amPC);
++   vex_printf("; li.w $s8, IRJumpKind_to_TRCVAL(%d); ", (Int)jk);
++   vex_printf("la $t0, disp_assisted; ");
++   vex_printf("jirl $ra, $t0, 0");
++   if (!hregIsInvalid(cond))
++      vex_printf(" }");
++}
++
++static inline void ppEvCheck ( LOONGARCH64AMode* amCounter,
++                               LOONGARCH64AMode* amFailAddr )
++{
++   vex_printf("(evCheck) ");
++   vex_printf("ld.w $t0, ");
++   ppLOONGARCH64AMode(amCounter);
++   vex_printf("; addi.d $t0, $t0, -1; ");
++   vex_printf("st.w $t0, ");
++   ppLOONGARCH64AMode(amCounter);
++   vex_printf("; bge $t0, $zero, nofail; ");
++   vex_printf("ld.d $t0, ");
++   ppLOONGARCH64AMode(amFailAddr);
++   vex_printf("; jirl $ra, $t0, 0");
++   vex_printf("; nofail:");
++}
++
++static inline void ppProfInc ( void )
++{
++   vex_printf("(profInc) ");
++   vex_printf("li $t0, NotKnownYet; ");
++   vex_printf("ld.d $t1, $t0, 0; ");
++   vex_printf("addi.d $t1, $t1, 1; ");
++   vex_printf("st.d $t1, $t0, 0;");
++}
++
++void ppLOONGARCH64Instr ( const LOONGARCH64Instr* i, Bool mode64 )
++{
++   vassert(mode64 == True);
++   switch (i->tag) {
++      case LAin_LI:
++         ppLI(i->LAin.LI.imm, i->LAin.LI.dst);
++         break;
++      case LAin_Un:
++         ppUnary(i->LAin.Unary.op, i->LAin.Unary.src, i->LAin.Unary.dst);
++         break;
++      case LAin_Bin:
++         ppBinary(i->LAin.Binary.op, i->LAin.Binary.src2,
++                  i->LAin.Binary.src1, i->LAin.Binary.dst);
++         break;
++      case LAin_Load:
++         ppLoad(i->LAin.Load.op, i->LAin.Load.src, i->LAin.Load.dst);
++         break;
++      case LAin_Store:
++         ppStore(i->LAin.Store.op, i->LAin.Store.dst, i->LAin.Store.src);
++         break;
++      case LAin_LLSC:
++         ppLLSC(i->LAin.LLSC.op, i->LAin.LLSC.addr, i->LAin.LLSC.val);
++         break;
++      case LAin_Bar:
++         ppBar(i->LAin.Bar.op, i->LAin.Bar.hint);
++         break;
++      case LAin_FpUn:
++         ppFpUnary(i->LAin.FpUnary.op, i->LAin.FpUnary.src,
++                   i->LAin.FpUnary.dst);
++         break;
++      case LAin_FpBin:
++         ppFpBinary(i->LAin.FpBinary.op, i->LAin.FpBinary.src2,
++                    i->LAin.FpBinary.src1, i->LAin.FpBinary.dst);
++         break;
++      case LAin_FpTri:
++         ppFpTrinary(i->LAin.FpTrinary.op, i->LAin.FpTrinary.src3,
++                     i->LAin.FpTrinary.src2, i->LAin.FpTrinary.src1,
++                     i->LAin.FpTrinary.dst);
++         break;
++      case LAin_FpLoad:
++         ppFpLoad(i->LAin.FpLoad.op, i->LAin.FpLoad.src, i->LAin.FpLoad.dst);
++         break;
++      case LAin_FpStore:
++         ppFpStore(i->LAin.FpStore.op, i->LAin.FpStore.dst,
++                   i->LAin.FpStore.src);
++         break;
++      case LAin_FpMove:
++         ppFpMove(i->LAin.FpMove.op, i->LAin.FpMove.src,
++                   i->LAin.FpMove.dst);
++         break;
++      case LAin_FpCmp:
++         ppFpCmp(i->LAin.FpCmp.op, i->LAin.FpCmp.src2,
++                 i->LAin.FpCmp.src1, i->LAin.FpCmp.dst);
++         break;
++      case LAin_Cas:
++         ppCas(i->LAin.Cas.old, i->LAin.Cas.addr, i->LAin.Cas.expd,
++               i->LAin.Cas.data, i->LAin.Cas.size64);
++         break;
++      case LAin_Cmp:
++         ppCmp(i->LAin.Cmp.cond, i->LAin.Cmp.src2,
++               i->LAin.Cmp.src1, i->LAin.Cmp.dst);
++         break;
++      case LAin_CMove:
++         ppCMove(i->LAin.CMove.cond, i->LAin.CMove.r0,
++                 i->LAin.CMove.r1, i->LAin.CMove.dst,
++                 i->LAin.CMove.isInt);
++         break;
++      case LAin_Call:
++         ppCall(i->LAin.Call.cond, i->LAin.Call.target,
++                i->LAin.Call.nArgRegs, i->LAin.Call.rloc);
++         break;
++      case LAin_XDirect:
++         ppXDirect(i->LAin.XDirect.dstGA, i->LAin.XDirect.amPC,
++                   i->LAin.XDirect.cond, i->LAin.XDirect.toFastEP);
++         break;
++      case LAin_XIndir:
++         ppXIndir(i->LAin.XIndir.dstGA, i->LAin.XIndir.amPC,
++                  i->LAin.XIndir.cond);
++         break;
++      case LAin_XAssisted:
++         ppXAssisted(i->LAin.XAssisted.dstGA, i->LAin.XAssisted.amPC,
++                     i->LAin.XAssisted.cond, i->LAin.XAssisted.jk);
++         break;
++      case LAin_EvCheck:
++         ppEvCheck(i->LAin.EvCheck.amCounter, i->LAin.EvCheck.amFailAddr);
++         break;
++      case LAin_ProfInc:
++         ppProfInc();
++         break;
++      default:
++         vpanic("ppLOONGARCH64Instr");
++         break;
++   }
++}
++
++
++/* --------- Helpers for register allocation. --------- */
++
++void getRegUsage_LOONGARCH64Instr ( HRegUsage* u, const LOONGARCH64Instr* i,
++                                    Bool mode64 )
++{
++   vassert(mode64 == True);
++   initHRegUsage(u);
++   switch (i->tag) {
++      case LAin_LI:
++         addHRegUse(u, HRmWrite, i->LAin.LI.dst);
++         break;
++      case LAin_Un:
++         addHRegUse(u, HRmRead, i->LAin.Unary.src);
++         addHRegUse(u, HRmWrite, i->LAin.Unary.dst);
++         break;
++      case LAin_Bin:
++         addRegUsage_LOONGARCH64RI(u, i->LAin.Binary.src2);
++         addHRegUse(u, HRmRead, i->LAin.Binary.src1);
++         addHRegUse(u, HRmWrite, i->LAin.Binary.dst);
++         break;
++      case LAin_Load:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.Load.src);
++         addHRegUse(u, HRmWrite, i->LAin.Load.dst);
++         break;
++      case LAin_Store:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.Store.dst);
++         addHRegUse(u, HRmRead, i->LAin.Store.src);
++         break;
++      case LAin_LLSC:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.LLSC.addr);
++         if (i->LAin.LLSC.isLoad)
++            addHRegUse(u, HRmWrite, i->LAin.LLSC.val);
++         else
++            addHRegUse(u, HRmRead, i->LAin.LLSC.val);
++         break;
++      case LAin_Bar:
++         /* No regs. */
++         break;
++      case LAin_FpUn:
++         addHRegUse(u, HRmRead, i->LAin.FpUnary.src);
++         addHRegUse(u, HRmWrite, i->LAin.FpUnary.dst);
++         break;
++      case LAin_FpBin:
++         addHRegUse(u, HRmRead, i->LAin.FpBinary.src2);
++         addHRegUse(u, HRmRead, i->LAin.FpBinary.src1);
++         addHRegUse(u, HRmWrite, i->LAin.FpBinary.dst);
++         break;
++      case LAin_FpTri:
++         addHRegUse(u, HRmRead, i->LAin.FpTrinary.src3);
++         addHRegUse(u, HRmRead, i->LAin.FpTrinary.src2);
++         addHRegUse(u, HRmRead, i->LAin.FpTrinary.src1);
++         addHRegUse(u, HRmWrite, i->LAin.FpTrinary.dst);
++         break;
++      case LAin_FpLoad:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.FpLoad.src);
++         addHRegUse(u, HRmWrite, i->LAin.FpLoad.dst);
++         break;
++      case LAin_FpStore:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.FpStore.dst);
++         addHRegUse(u, HRmRead, i->LAin.FpStore.src);
++         break;
++      case LAin_FpMove:
++         addHRegUse(u, HRmRead, i->LAin.FpMove.src);
++         addHRegUse(u, HRmWrite, i->LAin.FpMove.dst);
++         break;
++      case LAin_FpCmp:
++         addHRegUse(u, HRmRead, i->LAin.FpCmp.src2);
++         addHRegUse(u, HRmRead, i->LAin.FpCmp.src1);
++         addHRegUse(u, HRmWrite, i->LAin.FpCmp.dst);
++         break;
++      case LAin_Cas:
++         addHRegUse(u, HRmWrite, i->LAin.Cas.old);
++         addHRegUse(u, HRmRead, i->LAin.Cas.addr);
++         addHRegUse(u, HRmRead, i->LAin.Cas.expd);
++         addHRegUse(u, HRmModify, i->LAin.Cas.data);
++         break;
++      case LAin_Cmp:
++         addHRegUse(u, HRmRead, i->LAin.Cmp.src2);
++         addHRegUse(u, HRmRead, i->LAin.Cmp.src1);
++         addHRegUse(u, HRmWrite, i->LAin.Cmp.dst);
++         break;
++      case LAin_CMove:
++         addHRegUse(u, HRmRead, i->LAin.CMove.cond);
++         addHRegUse(u, HRmRead, i->LAin.CMove.r0);
++         addHRegUse(u, HRmRead, i->LAin.CMove.r1);
++         addHRegUse(u, HRmWrite, i->LAin.CMove.dst);
++         break;
++      case LAin_Call:
++         /* logic and comments copied/modified from mips and arm64 back end */
++         /* This is a bit subtle. */
++         /* First off, we need to consider the cond register. */
++         if (!hregIsInvalid(i->LAin.Call.cond))
++            addHRegUse(u, HRmRead, i->LAin.Call.cond);
++         /* Then, claim it trashes all the caller-saved regs
++            which fall within the register allocator's jurisdiction. */
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R14());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R15());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R16());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R17());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R18());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R19());
++         addHRegUse(u, HRmWrite, hregLOONGARCH64_R20());
++         /* Now we have to state any parameter-carrying registers
++            which might be read.  This depends on nArgRegs. */
++            switch (i->LAin.Call.nArgRegs) {
++            case 8: addHRegUse(u, HRmRead, hregLOONGARCH64_R11()); /* fallthrough */
++            case 7: addHRegUse(u, HRmRead, hregLOONGARCH64_R10()); /* fallthrough */
++            case 6: addHRegUse(u, HRmRead, hregLOONGARCH64_R9());  /* fallthrough */
++            case 5: addHRegUse(u, HRmRead, hregLOONGARCH64_R8());  /* fallthrough */
++            case 4: addHRegUse(u, HRmRead, hregLOONGARCH64_R7());  /* fallthrough */
++            case 3: addHRegUse(u, HRmRead, hregLOONGARCH64_R6());  /* fallthrough */
++            case 2: addHRegUse(u, HRmRead, hregLOONGARCH64_R5());  /* fallthrough */
++            case 1: addHRegUse(u, HRmRead, hregLOONGARCH64_R4());  /* fallthrough */
++            case 0: break;
++            default: vpanic("getRegUsage_LOONGARCH64:Call:regparms"); break;
++         }
++         /* Finally, there is the issue that the insn trashes a
++            register because the literal target address has to be
++            loaded into a register.  However, we reserve $t0 for that
++            purpose so there's no further complexity here.  Stating $t0
++            as trashed is pointless since it's not under the control
++            of the allocator, but what the hell. */
++         addHRegUse(u, HRmWrite, hregT0());
++         break;
++      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
++         conditionally exit the block.  Hence we only need to list (1)
++         the registers that they read, and (2) the registers that they
++         write in the case where the block is not exited.  (2) is
++         empty, hence only (1) is relevant here. */
++      case LAin_XDirect:
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.XDirect.amPC);
++         if (!hregIsInvalid(i->LAin.XDirect.cond))
++            addHRegUse(u, HRmRead, i->LAin.XDirect.cond);
++         addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */
++         break;
++      case LAin_XIndir:
++         addHRegUse(u, HRmRead, i->LAin.XIndir.dstGA);
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.XIndir.amPC);
++         if (!hregIsInvalid(i->LAin.XIndir.cond))
++            addHRegUse(u, HRmRead, i->LAin.XIndir.cond);
++         addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */
++         break;
++      case LAin_XAssisted:
++         addHRegUse(u, HRmRead, i->LAin.XAssisted.dstGA);
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.XAssisted.amPC);
++         if (!hregIsInvalid(i->LAin.XAssisted.cond))
++            addHRegUse(u, HRmRead, i->LAin.XAssisted.cond);
++         addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */
++         break;
++      case LAin_EvCheck:
++         /* We expect both amodes only to mention $r31, so this is in
++            fact pointless, since $r31 isn't allocatable, but anyway.. */
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.EvCheck.amCounter);
++         addRegUsage_LOONGARCH64AMode(u, i->LAin.EvCheck.amFailAddr);
++         addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */
++         break;
++      case LAin_ProfInc:
++         /* Again, pointless to actually state these since neither
++            is available to RA. */
++         addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */
++         addHRegUse(u, HRmWrite, hregT1()); /* unavail to RA */
++         break;
++      default:
++         ppLOONGARCH64Instr(i, mode64);
++         vpanic("getRegUsage_LOONGARCH64Instr");
++         break;
++   }
++}
++
++void mapRegs_LOONGARCH64Instr ( HRegRemap* m, LOONGARCH64Instr* i,
++                                Bool mode64 )
++{
++   vassert(mode64 == True);
++   switch (i->tag) {
++      case LAin_LI:
++         mapReg(m, &i->LAin.LI.dst);
++         break;
++      case LAin_Un:
++         mapReg(m, &i->LAin.Unary.src);
++         mapReg(m, &i->LAin.Unary.dst);
++         break;
++      case LAin_Bin:
++         mapRegs_LOONGARCH64RI(m, i->LAin.Binary.src2);
++         mapReg(m, &i->LAin.Binary.src1);
++         mapReg(m, &i->LAin.Binary.dst);
++         break;
++      case LAin_Load:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.Load.src);
++         mapReg(m, &i->LAin.Load.dst);
++         break;
++      case LAin_Store:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.Store.dst);
++         mapReg(m, &i->LAin.Store.src);
++         break;
++      case LAin_LLSC:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.LLSC.addr);
++         mapReg(m, &i->LAin.LLSC.val);
++         break;
++      case LAin_Bar:
++         /* No regs. */
++         break;
++      case LAin_FpUn:
++         mapReg(m, &i->LAin.FpUnary.src);
++         mapReg(m, &i->LAin.FpUnary.dst);
++         break;
++      case LAin_FpBin:
++         mapReg(m, &i->LAin.FpBinary.src2);
++         mapReg(m, &i->LAin.FpBinary.src1);
++         mapReg(m, &i->LAin.FpBinary.dst);
++         break;
++      case LAin_FpTri:
++         mapReg(m, &i->LAin.FpTrinary.src3);
++         mapReg(m, &i->LAin.FpTrinary.src2);
++         mapReg(m, &i->LAin.FpTrinary.src1);
++         mapReg(m, &i->LAin.FpTrinary.dst);
++         break;
++      case LAin_FpLoad:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.FpLoad.src);
++         mapReg(m, &i->LAin.FpLoad.dst);
++         break;
++      case LAin_FpStore:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.FpStore.dst);
++         mapReg(m, &i->LAin.FpStore.src);
++         break;
++      case LAin_FpMove:
++         mapReg(m, &i->LAin.FpMove.src);
++         mapReg(m, &i->LAin.FpMove.dst);
++         break;
++      case LAin_FpCmp:
++         mapReg(m, &i->LAin.FpCmp.src2);
++         mapReg(m, &i->LAin.FpCmp.src1);
++         mapReg(m, &i->LAin.FpCmp.dst);
++         break;
++      case LAin_Cas:
++         mapReg(m, &i->LAin.Cas.old);
++         mapReg(m, &i->LAin.Cas.addr);
++         mapReg(m, &i->LAin.Cas.expd);
++         mapReg(m, &i->LAin.Cas.data);
++         break;
++      case LAin_Cmp:
++         mapReg(m, &i->LAin.Cmp.src2);
++         mapReg(m, &i->LAin.Cmp.src1);
++         mapReg(m, &i->LAin.Cmp.dst);
++         break;
++      case LAin_CMove:
++         mapReg(m, &i->LAin.CMove.cond);
++         mapReg(m, &i->LAin.CMove.r0);
++         mapReg(m, &i->LAin.CMove.r1);
++         mapReg(m, &i->LAin.CMove.dst);
++         break;
++      case LAin_Call:
++         if (!hregIsInvalid(i->LAin.Call.cond))
++            mapReg(m, &i->LAin.Call.cond);
++         /* Hardwires $r12. */
++         break;
++      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
++         conditionally exit the block.  Hence we only need to list (1)
++         the registers that they read, and (2) the registers that they
++         write in the case where the block is not exited.  (2) is
++         empty, hence only (1) is relevant here. */
++      case LAin_XDirect:
++         mapRegs_LOONGARCH64AMode(m, i->LAin.XDirect.amPC);
++         if (!hregIsInvalid(i->LAin.XDirect.cond))
++            mapReg(m, &i->LAin.XDirect.cond);
++         break;
++      case LAin_XIndir:
++         mapReg(m, &i->LAin.XIndir.dstGA);
++         mapRegs_LOONGARCH64AMode(m, i->LAin.XIndir.amPC);
++         if (!hregIsInvalid(i->LAin.XIndir.cond))
++            mapReg(m, &i->LAin.XIndir.cond);
++         break;
++      case LAin_XAssisted:
++         mapReg(m, &i->LAin.XAssisted.dstGA);
++         mapRegs_LOONGARCH64AMode(m, i->LAin.XAssisted.amPC);
++         if (!hregIsInvalid(i->LAin.XAssisted.cond))
++            mapReg(m, &i->LAin.XAssisted.cond);
++         break;
++      case LAin_EvCheck:
++         /* We expect both amodes only to mention $r31, so this is in
++            fact pointless, since $r31 isn't allocatable, but anyway.. */
++         mapRegs_LOONGARCH64AMode(m, i->LAin.EvCheck.amCounter);
++         mapRegs_LOONGARCH64AMode(m, i->LAin.EvCheck.amFailAddr);
++         break;
++      case LAin_ProfInc:
++         /* Hardwires $r12 and $r13 -- nothing to modify. */
++         break;
++      default:
++         ppLOONGARCH64Instr(i, mode64);
++         vpanic("mapRegs_LOONGARCH64Instr");
++         break;
++   }
++}
++
++/* Generate loongarch64 spill instructions under the direction of the
++   register allocator. */
++void genSpill_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2,
++                            HReg rreg, Int offsetB, Bool mode64 )
++{
++   vassert(mode64 == True);
++   vassert(offsetB >= 0);
++   vassert(!hregIsVirtual(rreg));
++
++   LOONGARCH64AMode* am;
++   *i1 = *i2 = NULL;
++
++   switch (hregClass(rreg)) {
++      case HRcInt64:
++         if (offsetB < 1024) {
++            am = LOONGARCH64AMode_RI(hregGSP(), offsetB);
++            *i1 = LOONGARCH64Instr_Store(LAstore_ST_D, am, rreg);
++         } else {
++            am = LOONGARCH64AMode_RR(hregGSP(), hregT0());
++            *i1 = LOONGARCH64Instr_LI(offsetB, hregT0());
++            *i2 = LOONGARCH64Instr_Store(LAstore_STX_D, am, rreg);
++         }
++         break;
++      case HRcFlt64:
++         if (offsetB < 1024) {
++            am = LOONGARCH64AMode_RI(hregGSP(), offsetB);
++            *i1 = LOONGARCH64Instr_FpStore(LAfpstore_FST_D, am, rreg);
++         } else {
++            am = LOONGARCH64AMode_RR(hregGSP(), hregT0());
++            *i1 = LOONGARCH64Instr_LI(offsetB, hregT0());
++            *i2 = LOONGARCH64Instr_FpStore(LAfpstore_FSTX_D, am, rreg);
++         }
++         break;
++      default:
++         ppHRegClass(hregClass(rreg));
++         vpanic("genSpill_LOONGARCH64: unimplemented regclass");
++         break;
++   }
++}
++
++/* Generate loongarch64 reload instructions under the direction of the
++   register allocator. */
++void genReload_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2,
++                             HReg rreg, Int offsetB, Bool mode64 )
++{
++   vassert(mode64 == True);
++   vassert(offsetB >= 0);
++   vassert(!hregIsVirtual(rreg));
++
++   LOONGARCH64AMode* am;
++   *i1 = *i2 = NULL;
++
++   switch (hregClass(rreg)) {
++      case HRcInt64:
++         if (offsetB < 1024) {
++            am = LOONGARCH64AMode_RI(hregGSP(), offsetB);
++            *i1 = LOONGARCH64Instr_Load(LAload_LD_D, am, rreg);
++         } else {
++            am = LOONGARCH64AMode_RR(hregGSP(), hregT0());
++            *i1 = LOONGARCH64Instr_LI(offsetB, hregT0());
++            *i2 = LOONGARCH64Instr_Load(LAload_LDX_D, am, rreg);
++         }
++         break;
++      case HRcFlt64:
++         if (offsetB < 1024) {
++            am = LOONGARCH64AMode_RI(hregGSP(), offsetB);
++            *i1 = LOONGARCH64Instr_FpLoad(LAfpload_FLD_D, am, rreg);
++         } else {
++            am = LOONGARCH64AMode_RR(hregGSP(), hregT0());
++            *i1 = LOONGARCH64Instr_LI(offsetB, hregT0());
++            *i2 = LOONGARCH64Instr_FpLoad(LAfpload_FLDX_D, am, rreg);
++         }
++         break;
++      default:
++         ppHRegClass(hregClass(rreg));
++         vpanic("genReload_LOONGARCH64: unimplemented regclass");
++         break;
++   }
++}
++
++/* Generate loongarch64 move instructions under the direction of the
++   register allocator. */
++LOONGARCH64Instr* genMove_LOONGARCH64 ( HReg from, HReg to, Bool mode64 )
++{
++   vassert(mode64 == True);
++   switch (hregClass(from)) {
++      case HRcInt64:
++         return LOONGARCH64Instr_Binary(LAbin_OR,
++                                        LOONGARCH64RI_R(hregZERO()),
++                                        from, to);
++      case HRcFlt64:
++         return LOONGARCH64Instr_FpMove(LAfpmove_FMOV_D, from, to);
++      default:
++         ppHRegClass(hregClass(from));
++         vpanic("genMove_LOONGARCH64: unimplemented regclass");
++   }
++}
++
++
++/* --------- The loongarch64 assembler --------- */
++
++static inline UInt iregEnc ( HReg r )
++{
++   vassert(hregClass(r) == HRcInt64);
++   vassert(!hregIsVirtual(r));
++   UInt n = hregEncoding(r);
++   vassert(n < 32);
++   return n;
++}
++
++static inline UInt fregEnc ( HReg r )
++{
++   vassert(hregClass(r) == HRcFlt64);
++   vassert(!hregIsVirtual(r));
++   UInt n = hregEncoding(r);
++   vassert(n < 32);
++   return n;
++}
++
++static inline UInt fcsrEnc ( HReg r )
++{
++   vassert(hregClass(r) == HRcInt32);
++   vassert(!hregIsVirtual(r));
++   UInt n = hregEncoding(r);
++   vassert(n < 32);
++   return n;
++}
++
++static inline UInt emit_op_rj_rd ( UInt op, UInt rj, UInt rd )
++{
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_rk_rj_rd ( UInt op, UInt rk, UInt rj, UInt rd )
++{
++   vassert(rk < (1 << 5));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (rk << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_fj_fd ( UInt op, UInt fj, UInt fd )
++{
++   vassert(fj < (1 << 5));
++   vassert(fd < (1 << 5));
++   return op | (fj << 5) | fd;
++}
++
++static inline UInt emit_op_fa_fk_fj_fd ( UInt op, UInt fa, UInt fk, UInt fj, UInt fd )
++{
++   vassert(fa < (1 << 5));
++   vassert(fk < (1 << 5));
++   vassert(fj < (1 << 5));
++   vassert(fd < (1 << 5));
++   return op | (fa << 15) | (fk << 10) | (fj << 5) | fd;
++}
++
++static inline UInt emit_op_fk_fj_fd ( UInt op, UInt fk, UInt fj, UInt fd )
++{
++   vassert(fk < (1 << 5));
++   vassert(fj < (1 << 5));
++   vassert(fd < (1 << 5));
++   return op | (fk << 10) | (fj << 5) | fd;
++}
++
++static inline UInt emit_op_ca_fk_fj_fd ( UInt op, UInt ca, UInt fk, UInt fj, UInt fd )
++{
++   vassert(ca < (1 << 3));
++   vassert(fk < (1 << 5));
++   vassert(fj < (1 << 5));
++   vassert(fd < (1 << 5));
++   return op | (ca << 15) | (fk << 10) | (fj << 5) | fd;
++}
++
++static inline UInt emit_op_fk_fj_cd ( UInt op, UInt fk, UInt fj, UInt cd )
++{
++   vassert(fk < (1 << 5));
++   vassert(fj < (1 << 5));
++   vassert(cd < (1 << 3));
++   return op | (fk << 10) | (fj << 5) | cd;
++}
++
++static inline UInt emit_op_cj_rd ( UInt op, UInt cj, UInt rd )
++{
++   vassert(cj < (1 << 3));
++   vassert(rd < (1 << 5));
++   return op | (cj << 5) | rd;
++}
++
++static inline UInt emit_op_rj_cd ( UInt op, UInt rj, UInt cd )
++{
++   vassert(rj < (1 << 5));
++   vassert(cd < (1 << 3));
++   return op | (rj << 5) | cd;
++}
++
++static inline UInt emit_op_rj_fd ( UInt op, UInt rj, UInt fd )
++{
++   vassert(rj < (1 << 5));
++   vassert(fd < (1 << 5));
++   return op | (rj << 5) | fd;
++}
++
++static inline UInt emit_op_fj_rd ( UInt op, UInt fj, UInt rd )
++{
++   vassert(fj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (fj << 5) | rd;
++}
++
++static inline UInt emit_op_rj_fcsr ( UInt op, UInt rj, UInt fcsr )
++{
++   vassert(rj < (1 << 5));
++   vassert(fcsr < (1 << 5));
++   return op | (rj << 5) | fcsr;
++}
++
++static inline UInt emit_op_fcsr_rd ( UInt op, UInt fcsr, UInt rd )
++{
++   vassert(fcsr < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (fcsr << 5) | rd;
++}
++
++static inline UInt emit_op_ui5_rj_rd ( UInt op, UInt ui5, UInt rj, UInt rd )
++{
++   vassert(ui5 < (1 << 5));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (ui5 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_ui6_rj_rd ( UInt op, UInt ui6, UInt rj, UInt rd )
++{
++   vassert(ui6 < (1 << 6));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (ui6 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_ui12_rj_rd ( UInt op, UInt ui12, UInt rj, UInt rd )
++{
++   vassert(ui12 < (1 << 12));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (ui12 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_si12_rj_rd ( UInt op, UInt si12, UInt rj, UInt rd )
++{
++   vassert(si12 < (1 << 12));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (si12 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_si14_rj_rd ( UInt op, UInt si14, UInt rj, UInt rd )
++{
++   vassert(si14 < (1 << 14));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (si14 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_si20_rd ( UInt op, UInt si20, UInt rd )
++{
++   vassert(si20 < (1 << 20));
++   vassert(rd < (1 << 5));
++   return op | (si20 << 5) | rd;
++}
++
++static inline UInt emit_op_offs16_rj_rd ( UInt op, UInt offs16, UInt rj, UInt rd )
++{
++   vassert(offs16 < (1 << 16));
++   vassert(rj < (1 << 5));
++   vassert(rd < (1 << 5));
++   return op | (offs16 << 10) | (rj << 5) | rd;
++}
++
++static inline UInt emit_op_offs26 ( UInt op, UInt offs26 )
++{
++   vassert(offs26 < (1 << 26));
++   return op | ((offs26 & 0xffff) << 10) | (offs26 >> 16);
++}
++
++static inline UInt emit_op_hint15 ( UInt op, UInt hint )
++{
++   vassert(hint < (1 << 15));
++   return op | hint;
++}
++
++static UInt* mkLoadImm_EXACTLY4 ( UInt* p, HReg dst, ULong imm )
++{
++   /*
++      lu12i.w dst, imm[31:12]
++      ori     dst, dst, imm[11:0]
++      lu32i.d dst, imm[51:32]
++      lu52i.d dst, dst, imm[63:52]
++    */
++   UInt d = iregEnc(dst);
++   *p++ = emit_op_si20_rd(LAextra_LU12I_W, (imm >> 12) & 0xfffff, d);
++   *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm & 0xfff, d, d);
++   *p++ = emit_op_si20_rd(LAextra_LU32I_D, (imm >> 32) & 0xfffff, d);
++   *p++ = emit_op_si12_rj_rd(LAextra_LU52I_D, (imm >> 52) & 0xfff, d, d);
++   return p;
++}
++
++static inline UInt* mkLoadImm_EXACTLY2 ( UInt* p, HReg dst, ULong imm )
++{
++   /*
++      lu12i.w dst, imm[31:12]
++      ori     dst, dst, imm[11:0]
++    */
++   UInt d = iregEnc(dst);
++   *p++ = emit_op_si20_rd(LAextra_LU12I_W, (imm >> 12) & 0xfffff, d);
++   *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm & 0xfff, d, d);
++   return p;
++}
++
++static inline UInt* mkLoadImm_EXACTLY1 ( UInt* p, HReg dst, ULong imm )
++{
++   /* ori dst, $zero, imm[11:0] */
++   *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm, 0, iregEnc(dst));
++   return p;
++}
++
++static UInt* mkLoadImm ( UInt* p, HReg dst, ULong imm )
++{
++   if ((imm >> 12) == 0)
++      p = mkLoadImm_EXACTLY1(p, dst, imm);
++   else if (imm < 0x80000000 || (imm >> 31) == 0x1ffffffffUL)
++      p = mkLoadImm_EXACTLY2(p, dst, imm);
++   else
++      p = mkLoadImm_EXACTLY4(p, dst, imm);
++   return p;
++}
++
++static Bool is_LoadImm_EXACTLY4 ( UInt* p, HReg dst, ULong imm )
++{
++   UInt expect[4];
++   mkLoadImm_EXACTLY4(expect, dst, imm);
++   return toBool(p[0] == expect[0] && p[1] == expect[1] &&
++                 p[2] == expect[2] && p[3] == expect[3]);
++}
++
++static inline UInt* mkUnary ( UInt* p, LOONGARCH64UnOp op, HReg src, HReg dst )
++{
++   switch (op) {
++      case LAun_CLZ_W:
++      case LAun_CTZ_W:
++      case LAun_CLZ_D:
++      case LAun_CTZ_D:
++      case LAun_EXT_W_H:
++      case LAun_EXT_W_B:
++         *p++ = emit_op_rj_rd(op, iregEnc(src), iregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkBinary ( UInt* p, LOONGARCH64BinOp op,
++                               LOONGARCH64RI* src2, HReg src1, HReg dst )
++{
++   switch (op) {
++      case LAbin_ADD_W:
++      case LAbin_ADD_D:
++      case LAbin_SUB_W:
++      case LAbin_SUB_D:
++      case LAbin_NOR:
++      case LAbin_AND:
++      case LAbin_OR:
++      case LAbin_XOR:
++      case LAbin_SLL_W:
++      case LAbin_SRL_W:
++      case LAbin_SRA_W:
++      case LAbin_SLL_D:
++      case LAbin_SRL_D:
++      case LAbin_SRA_D:
++      case LAbin_MUL_W:
++      case LAbin_MUL_D:
++      case LAbin_MULH_W:
++      case LAbin_MULH_WU:
++      case LAbin_MULH_D:
++      case LAbin_MULH_DU:
++      case LAbin_MULW_D_W:
++      case LAbin_MULW_D_WU:
++      case LAbin_DIV_W:
++      case LAbin_MOD_W:
++      case LAbin_DIV_WU:
++      case LAbin_MOD_WU:
++      case LAbin_DIV_D:
++      case LAbin_MOD_D:
++      case LAbin_DIV_DU:
++      case LAbin_MOD_DU:
++         vassert(src2->tag == LAri_Reg);
++         *p++ = emit_op_rk_rj_rd(op, iregEnc(src2->LAri.R.reg),
++                                 iregEnc(src1), iregEnc(dst));
++         return p;
++      case LAbin_SLLI_W:
++      case LAbin_SRLI_W:
++      case LAbin_SRAI_W:
++         vassert(src2->tag == LAri_Imm);
++         *p++ = emit_op_ui5_rj_rd(op, src2->LAri.I.imm,
++                                  iregEnc(src1), iregEnc(dst));
++         return p;
++      case LAbin_SLLI_D:
++      case LAbin_SRLI_D:
++      case LAbin_SRAI_D:
++         vassert(src2->tag == LAri_Imm);
++         *p++ = emit_op_ui6_rj_rd(op, src2->LAri.I.imm,
++                                  iregEnc(src1), iregEnc(dst));
++         return p;
++      case LAbin_ADDI_W:
++      case LAbin_ADDI_D:
++         vassert(src2->tag == LAri_Imm);
++         *p++ = emit_op_si12_rj_rd(op, src2->LAri.I.imm,
++                                   iregEnc(src1), iregEnc(dst));
++         return p;
++      case LAbin_ANDI:
++      case LAbin_ORI:
++      case LAbin_XORI:
++         vassert(src2->tag == LAri_Imm);
++         *p++ = emit_op_ui12_rj_rd(op, src2->LAri.I.imm,
++                                   iregEnc(src1), iregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static UInt* mkLoad ( UInt* p, LOONGARCH64LoadOp op,
++                      LOONGARCH64AMode* src, HReg dst )
++{
++   switch (op) {
++      case LAload_LD_W:
++      case LAload_LD_D:
++      case LAload_LD_BU:
++      case LAload_LD_HU:
++      case LAload_LD_WU:
++         vassert(src->tag == LAam_RI);
++         *p++ = emit_op_si12_rj_rd(op, src->LAam.RI.index,
++                                   iregEnc(src->LAam.RI.base), iregEnc(dst));
++         return p;
++      case LAload_LDX_D:
++      case LAload_LDX_BU:
++      case LAload_LDX_HU:
++      case LAload_LDX_WU:
++         vassert(src->tag == LAam_RR);
++         *p++ = emit_op_rk_rj_rd(op, iregEnc(src->LAam.RR.index),
++                                 iregEnc(src->LAam.RR.base), iregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static UInt* mkStore ( UInt* p, LOONGARCH64StoreOp op,
++                       LOONGARCH64AMode* dst, HReg src )
++{
++   switch (op) {
++      case LAstore_ST_B:
++      case LAstore_ST_H:
++      case LAstore_ST_W:
++      case LAstore_ST_D:
++         vassert(dst->tag == LAam_RI);
++         *p++ = emit_op_si12_rj_rd(op, dst->LAam.RI.index,
++                                   iregEnc(dst->LAam.RI.base), iregEnc(src));
++         return p;
++      case LAstore_STX_B:
++      case LAstore_STX_H:
++      case LAstore_STX_W:
++      case LAstore_STX_D:
++         vassert(dst->tag == LAam_RR);
++         *p++ = emit_op_rk_rj_rd(op, iregEnc(dst->LAam.RR.index),
++                                 iregEnc(dst->LAam.RR.base), iregEnc(src));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkLLSC ( UInt* p, LOONGARCH64LLSCOp op,
++                             LOONGARCH64AMode* addr, HReg val )
++{
++   switch (op) {
++      case LAllsc_LL_W:
++      case LAllsc_SC_W:
++      case LAllsc_LL_D:
++      case LAllsc_SC_D:
++         vassert(addr->tag == LAam_RI);
++         *p++ = emit_op_si14_rj_rd(op, addr->LAam.RI.index,
++                                   iregEnc(addr->LAam.RI.base), iregEnc(val));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkBar ( UInt* p, LOONGARCH64BarOp op, UShort hint )
++{
++   switch (op) {
++      case LAbar_DBAR:
++      case LAbar_IBAR:
++         *p++ = emit_op_hint15(op, hint);
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpUnary ( UInt* p, LOONGARCH64FpUnOp op, HReg src, HReg dst )
++{
++   switch (op) {
++      case LAfpun_FABS_S:
++      case LAfpun_FABS_D:
++      case LAfpun_FNEG_S:
++      case LAfpun_FNEG_D:
++      case LAfpun_FLOGB_S:
++      case LAfpun_FLOGB_D:
++      case LAfpun_FSQRT_S:
++      case LAfpun_FSQRT_D:
++      case LAfpun_FRSQRT_S:
++      case LAfpun_FRSQRT_D:
++      case LAfpun_FCVT_S_D:
++      case LAfpun_FCVT_D_S:
++      case LAfpun_FTINT_W_S:
++      case LAfpun_FTINT_W_D:
++      case LAfpun_FTINT_L_S:
++      case LAfpun_FTINT_L_D:
++      case LAfpun_FFINT_S_W:
++      case LAfpun_FFINT_S_L:
++      case LAfpun_FFINT_D_W:
++      case LAfpun_FFINT_D_L:
++      case LAfpun_FRINT_S:
++      case LAfpun_FRINT_D:
++         *p++ = emit_op_fj_fd(op, fregEnc(src), fregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpBinary ( UInt* p, LOONGARCH64FpBinOp op, HReg src2,
++                                 HReg src1, HReg dst )
++{
++   switch (op) {
++      case LAfpbin_FADD_S:
++      case LAfpbin_FADD_D:
++      case LAfpbin_FSUB_S:
++      case LAfpbin_FSUB_D:
++      case LAfpbin_FMUL_S:
++      case LAfpbin_FMUL_D:
++      case LAfpbin_FDIV_S:
++      case LAfpbin_FDIV_D:
++      case LAfpbin_FMAX_S:
++      case LAfpbin_FMAX_D:
++      case LAfpbin_FMIN_S:
++      case LAfpbin_FMIN_D:
++      case LAfpbin_FMAXA_S:
++      case LAfpbin_FMAXA_D:
++      case LAfpbin_FMINA_S:
++      case LAfpbin_FMINA_D:
++      case LAfpbin_FSCALEB_S:
++      case LAfpbin_FSCALEB_D:
++         *p++ = emit_op_fk_fj_fd(op, fregEnc(src2), fregEnc(src1), fregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpTrinary ( UInt* p, LOONGARCH64FpTriOp op, HReg src3,
++                                  HReg src2, HReg src1, HReg dst )
++{
++   switch (op) {
++      case LAfpbin_FMADD_S:
++      case LAfpbin_FMADD_D:
++      case LAfpbin_FMSUB_S:
++      case LAfpbin_FMSUB_D:
++         *p++ = emit_op_fa_fk_fj_fd(op, fregEnc(src3), fregEnc(src2),
++                                    fregEnc(src1), fregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpLoad ( UInt* p, LOONGARCH64FpLoadOp op,
++                               LOONGARCH64AMode* src, HReg dst )
++{
++   switch (op) {
++      case LAfpload_FLD_S:
++      case LAfpload_FLD_D:
++         vassert(src->tag == LAam_RI);
++         *p++ = emit_op_si12_rj_rd(op, src->LAam.RI.index,
++                                   iregEnc(src->LAam.RI.base), fregEnc(dst));
++         return p;
++      case LAfpload_FLDX_S:
++      case LAfpload_FLDX_D:
++         vassert(src->tag == LAam_RR);
++         *p++ = emit_op_rk_rj_rd(op, iregEnc(src->LAam.RR.index),
++                                 iregEnc(src->LAam.RR.base), fregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpStore ( UInt* p, LOONGARCH64FpStoreOp op,
++                                LOONGARCH64AMode* dst, HReg src )
++{
++   switch (op) {
++      case LAfpstore_FST_S:
++      case LAfpstore_FST_D:
++         vassert(dst->tag == LAam_RI);
++         *p++ = emit_op_si12_rj_rd(op, dst->LAam.RI.index,
++                                   iregEnc(dst->LAam.RI.base), fregEnc(src));
++         return p;
++      case LAfpstore_FSTX_S:
++      case LAfpstore_FSTX_D:
++         vassert(dst->tag == LAam_RR);
++         *p++ = emit_op_rk_rj_rd(op, iregEnc(dst->LAam.RR.index),
++                                 iregEnc(dst->LAam.RR.base), fregEnc(src));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpMove ( UInt* p, LOONGARCH64FpMoveOp op, HReg src, HReg dst )
++{
++   switch (op) {
++      case LAfpmove_FMOV_S:
++      case LAfpmove_FMOV_D:
++         *p++ = emit_op_fj_fd(op, fregEnc(src), fregEnc(dst));
++         return p;
++      case LAfpmove_MOVGR2FR_W:
++      case LAfpmove_MOVGR2FR_D:
++         *p++ = emit_op_rj_fd(op, iregEnc(src), fregEnc(dst));
++         return p;
++      case LAfpmove_MOVFR2GR_S:
++      case LAfpmove_MOVFR2GR_D:
++         *p++ = emit_op_fj_rd(op, fregEnc(src), iregEnc(dst));
++         return p;
++      case LAfpmove_MOVGR2FCSR:
++         *p++ = emit_op_rj_fcsr(op, iregEnc(src), fcsrEnc(dst));
++         return p;
++      case LAfpmove_MOVFCSR2GR:
++         *p++ = emit_op_fcsr_rd(op, fcsrEnc(src), iregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkFpCmp ( UInt* p, LOONGARCH64FpCmpOp op, HReg src2,
++                              HReg src1, HReg dst )
++{
++   /*
++      fcmp.cond.[sd] $fcc0, src1, src2
++      movcf2gr       dst, $fcc0
++    */
++   switch (op) {
++      case LAfpcmp_FCMP_CLT_S:
++      case LAfpcmp_FCMP_CLT_D:
++      case LAfpcmp_FCMP_CEQ_S:
++      case LAfpcmp_FCMP_CEQ_D:
++      case LAfpcmp_FCMP_CUN_S:
++      case LAfpcmp_FCMP_CUN_D:
++         *p++ = emit_op_fk_fj_cd(op, fregEnc(src2), fregEnc(src1), 0);
++         *p++ = emit_op_cj_rd(LAextra_MOVCF2GR, 0, iregEnc(dst));
++         return p;
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkCas ( UInt* p, HReg old, HReg addr, HReg expd,
++                            HReg data, Bool size64 )
++{
++   /*
++         ll.[wd] old, addr, 0
++         bne     old, expd, barrier
++         or      $t0, data, $zero
++         sc.[wd] $t0, addr, 0
++         beq     $t0, zero, fail
++         or      old, expd, $zero
++         b       end
++      barrier:
++         dbar    0
++      fail:
++         or      old, data, $zero
++      end:
++    */
++   UInt o = iregEnc(old);
++   UInt a = iregEnc(addr);
++   UInt e = iregEnc(expd);
++   UInt d = iregEnc(data);
++   UInt t = 12;
++   UInt z = 0;
++
++   if (size64) {
++      *p++ = emit_op_si14_rj_rd(LAllsc_LL_D, 0, a, o);
++   } else {
++      *p++ = emit_op_ui6_rj_rd(LAbin_SLLI_W, 0, e, e); // Sign-extend expd
++      *p++ = emit_op_si14_rj_rd(LAllsc_LL_W, 0, a, o);
++   }
++   *p++ = emit_op_offs16_rj_rd(LAextra_BNE, 6, o, e);
++   *p++ = emit_op_rk_rj_rd(LAbin_OR, z, d, t);
++   if (size64) {
++      *p++ = emit_op_si14_rj_rd(LAllsc_SC_D, 0, a, t);
++   } else {
++      *p++ = emit_op_si14_rj_rd(LAllsc_SC_W, 0, a, t);
++   }
++   *p++ = emit_op_offs16_rj_rd(LAextra_BEQ, 4, t, z);
++   *p++ = emit_op_rk_rj_rd(LAbin_OR, z, e, o);
++   *p++ = emit_op_offs26(LAextra_B, 3);
++   *p++ = emit_op_hint15(LAbar_DBAR, 0);
++   *p++ = emit_op_rk_rj_rd(LAbin_OR, z, d, o);
++   return p;
++}
++
++static inline UInt* mkCmp ( UInt* p, LOONGARCH64CondCode cond,
++                            HReg src2, HReg src1, HReg dst )
++{
++   UInt d  = iregEnc(dst);
++   UInt s1 = iregEnc(src1);
++   UInt s2 = iregEnc(src2);
++
++   switch (cond) {
++      case LAcc_EQ:
++         /*
++            xor   dst, src1, src2
++            sltui dst, dst, 1
++          */
++         *p++ = emit_op_rk_rj_rd(LAbin_XOR, s2, s1, d);
++         *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d);
++         return p;
++      case LAcc_NE:
++         /*
++            xor   dst, src1, src2
++            sltu  dst, $zero, dst
++          */
++         *p++ = emit_op_rk_rj_rd(LAbin_XOR, s2, s1, d);
++         *p++ = emit_op_rk_rj_rd(LAextra_SLTU, d, 0, d);
++         return p;
++      case LAcc_LT:
++         /* slt   dst, src1, src2 */
++         *p++ = emit_op_rk_rj_rd(LAextra_SLT, s2, s1, d);
++         return p;
++      case LAcc_GE:
++         /*
++            slt   dst, src1, src2
++            sltui dst, dst, 1
++          */
++         *p++ = emit_op_rk_rj_rd(LAextra_SLT, s2, s1, d);
++         *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d);
++         return p;
++      case LAcc_LTU:
++         /* sltu  dst, src1, src2 */
++         *p++ = emit_op_rk_rj_rd(LAextra_SLTU, s2, s1, d);
++         return p;
++      case LAcc_GEU:
++         /*
++            sltu  dst, src1, src2
++            sltui dst, dst, 1
++          */
++         *p++ = emit_op_rk_rj_rd(LAextra_SLTU, s2, s1, d);
++         *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d);
++         return p;
++      /* No LAcc_AL here.
++         case LAcc_AL:
++            break;
++       */
++      default:
++         return NULL;
++   }
++}
++
++static inline UInt* mkCMove ( UInt* p, HReg cond, HReg r0,
++                              HReg r1, HReg dst, Bool isInt )
++{
++   if (isInt) {
++      /*
++         masknez $t0, r0, cond
++         maskeqz dst, r1, cond
++         or      dst, $t0, dst
++       */
++      UInt c = iregEnc(cond);
++      UInt d = iregEnc(dst);
++      *p++ = emit_op_rk_rj_rd(LAextra_MASKNEZ, c, iregEnc(r0), 12);
++      *p++ = emit_op_rk_rj_rd(LAextra_MASKEQZ, c, iregEnc(r1), d);
++      *p++ = emit_op_rk_rj_rd(LAbin_OR, d, 12, d);
++   } else {
++      /*
++         movgr2cf $fcc0, cond
++         fsel     dst, r0, r1, $fcc0
++       */
++      *p++ = emit_op_rj_cd(LAextra_MOVGR2CF, iregEnc(cond), 0);
++      *p++ = emit_op_ca_fk_fj_fd(LAextra_FSEL, 0, fregEnc(r1),
++                                 fregEnc(r0), fregEnc(dst));
++   }
++   return p;
++}
++
++static inline UInt* mkCall ( UInt* p, HReg cond, Addr64 target, RetLoc rloc )
++{
++   if (!hregIsInvalid(cond) && rloc.pri != RLPri_None) {
++      /* The call might not happen (it isn't unconditional) and
++         it returns a result.  In this case we will need to
++         generate a control flow diamond to put 0x555..555 in
++         the return register(s) in the case where the call
++         doesn't happen.  If this ever becomes necessary, maybe
++         copy code from the 32-bit ARM equivalent.  Until that
++         day, just give up. */
++      return NULL;
++   }
++
++   UInt* ptmp = NULL;
++   if (!hregIsInvalid(cond)) {
++      /* Create a hole to put a conditional branch in.  We'll
++         patch it once we know the branch length. */
++      ptmp = p;
++      p++;
++   }
++
++   /*
++      $t0 = target
++      jirl $ra, $t0, 0
++    */
++   p = mkLoadImm(p, hregT0(), target);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   /* Patch the hole if necessary */
++   if (!hregIsInvalid(cond)) {
++      vassert(ptmp != NULL);
++      UInt offs = (UInt)(p - ptmp);
++      vassert(offs >= 3 && offs <= 6);
++      /* beq cond, $zero, offs */
++      *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0);
++   }
++
++   return p;
++}
++
++static inline UInt* mkXDirect ( UInt* p, Addr64 dstGA,
++                                LOONGARCH64AMode* amPC,
++                                HReg cond, Bool toFastEP,
++                                const void* disp_cp_chain_me_to_slowEP,
++                                const void* disp_cp_chain_me_to_fastEP )
++{
++   /* NB: what goes on here has to be very closely coordinated
++      with chainXDirect_LOONGARCH64 and unchainXDirect_LOONGARCH64 below. */
++   /* We're generating chain-me requests here, so we need to be
++      sure this is actually allowed -- no-redir translations
++      can't use chain-me's.  Hence: */
++   vassert(disp_cp_chain_me_to_slowEP != NULL);
++   vassert(disp_cp_chain_me_to_fastEP != NULL);
++
++   /* Use ptmp for backpatching conditional jumps. */
++   UInt* ptmp = NULL;
++
++   /* First off, if this is conditional, create a conditional
++      jump over the rest of it.  Or at least, leave a space for
++      it that we will shortly fill in. */
++   if (!hregIsInvalid(cond)) {
++      ptmp = p;
++      p++;
++   }
++
++   /* Update the guest PC.
++      $t0 = dstGA
++      st.d $t0, amPC
++    */
++   p = mkLoadImm(p, hregT0(), (ULong)dstGA);
++   p = mkStore(p, LAstore_ST_D, amPC, hregT0());
++
++   /* --- FIRST PATCHABLE BYTE follows --- */
++   /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
++      calling to) backs up the return address, so as to find the
++      address of the first patchable byte.  So: don't change the
++      number of instructions (5) below. */
++   /*
++      la   $t0, VG_(disp_cp_chain_me_to_{slowEP,fastEP})
++      jirl $ra, $t0, 0
++    */
++   const void* disp_cp_chain_me = toFastEP ? disp_cp_chain_me_to_fastEP
++                                           : disp_cp_chain_me_to_slowEP;
++   p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++   /* --- END of PATCHABLE BYTES --- */
++
++   /* Fix up the conditional jump, if there was one. */
++   if (!hregIsInvalid(cond)) {
++      vassert(ptmp != NULL);
++      UInt offs = (UInt)(p - ptmp);
++      vassert(offs >= 8 && offs <= 11);
++      /* beq cond, $zero, offs */
++      *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0);
++   }
++
++   return p;
++}
++
++static inline UInt* mkXIndir ( UInt* p, HReg dstGA, LOONGARCH64AMode* amPC,
++                               HReg cond, const void* disp_cp_xindir )
++{
++   /* We're generating transfers that could lead indirectly to a
++      chain-me, so we need to be sure this is actually allowed --
++      no-redir translations are not allowed to reach normal
++      translations without going through the scheduler.  That means
++      no XDirects or XIndirs out from no-redir translations.
++      Hence: */
++   vassert(disp_cp_xindir != NULL);
++
++   /* Use ptmp for backpatching conditional jumps. */
++   UInt* ptmp = NULL;
++
++   /* First off, if this is conditional, create a conditional
++      jump over the rest of it. */
++   if (!hregIsInvalid(cond)) {
++      ptmp = p;
++      p++;
++   }
++
++   /* Update the guest PC.
++      or   $t0, dstGA, $zero
++      st.d $t0, amPC
++    */
++   *p++ = emit_op_rk_rj_rd(LAbin_OR, 0, iregEnc(dstGA), 12);
++   p = mkStore(p, LAstore_ST_D, amPC, hregT0());
++
++   /*
++      la   $t0, VG_(disp_cp_xindir)
++      jirl $ra, $t0, 0
++    */
++   p = mkLoadImm(p, hregT0(), (ULong)(Addr)disp_cp_xindir);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   /* Fix up the conditional jump, if there was one. */
++   if (!hregIsInvalid(cond)) {
++      vassert(ptmp != NULL);
++      UInt offs = (UInt)(p - ptmp);
++      vassert(offs >= 5 && offs <= 8);
++      /* beq cond, $zero, offs */
++      *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0);
++   }
++
++   return p;
++}
++
++static inline UInt* mkXAssisted ( UInt* p, HReg dstGA, LOONGARCH64AMode* amPC,
++                                  HReg cond, IRJumpKind jk,
++                                  const void* disp_cp_xassisted )
++{
++   /* First off, if this is conditional, create a conditional jump
++      over the rest of it.  Or at least, leave a space for it that
++      we will shortly fill in. */
++   UInt* ptmp = NULL;
++   if (!hregIsInvalid(cond)) {
++      ptmp = p;
++      p++;
++   }
++
++   /* Update the guest PC.
++      or   $t0, dstGA, $zero
++      st.d $t0, amPC
++    */
++   *p++ = emit_op_rk_rj_rd(LAbin_OR, 0, iregEnc(dstGA), 12);
++   p = mkStore(p, LAstore_ST_D, amPC, hregT0());
++
++   /* li.w $s8, magic_number */
++   UInt trcval = 0;
++   switch (jk) {
++      case Ijk_Boring:
++         trcval = VEX_TRC_JMP_BORING;
++         break;
++      case Ijk_ClientReq:
++         trcval = VEX_TRC_JMP_CLIENTREQ;
++         break;
++      case Ijk_NoDecode:
++         trcval = VEX_TRC_JMP_NODECODE;
++         break;
++      case Ijk_InvalICache:
++         trcval = VEX_TRC_JMP_INVALICACHE;
++         break;
++      case Ijk_NoRedir:
++         trcval = VEX_TRC_JMP_NOREDIR;
++         break;
++      case Ijk_SigTRAP:
++         trcval = VEX_TRC_JMP_SIGTRAP;
++         break;
++      case Ijk_SigSEGV:
++         trcval = VEX_TRC_JMP_SIGSEGV;
++         break;
++      case Ijk_SigBUS:
++         trcval = VEX_TRC_JMP_SIGBUS;
++         break;
++      case Ijk_SigFPE_IntDiv:
++         trcval = VEX_TRC_JMP_SIGFPE_INTDIV;
++         break;
++      case Ijk_SigFPE_IntOvf:
++         trcval = VEX_TRC_JMP_SIGFPE_INTOVF;
++         break;
++      case Ijk_SigSYS:
++         trcval = VEX_TRC_JMP_SIGSYS;
++         break;
++      case Ijk_Sys_syscall:
++         trcval = VEX_TRC_JMP_SYS_SYSCALL;
++         break;
++      /* We don't expect to see the following being assisted.
++         case Ijk_Call:
++         case Ijk_Ret:
++         case Ijk_Yield:
++         case Ijk_EmWarn:
++         case Ijk_EmFail:
++         case Ijk_MapFail:
++         case Ijk_FlushDCache:
++         case Ijk_SigILL:
++         case Ijk_SigFPE:
++         case Ijk_Sys_int32:
++         case Ijk_Sys_int128:
++         case Ijk_Sys_int129:
++         case Ijk_Sys_int130:
++         case Ijk_Sys_int145:
++         case Ijk_Sys_int210:
++         case Ijk_Sys_sysenter:
++       */
++      default:
++         ppIRJumpKind(jk);
++         vpanic("emit_LOONGARCH64Instr.LAin_XAssisted: unexpected jump kind");
++   }
++   vassert(trcval != 0);
++   p = mkLoadImm(p, hregGSP(), trcval);
++
++   /*
++      la   $t0, VG_(disp_cp_xassisted)
++      jirl $ra, $t0, 0
++    */
++   p = mkLoadImm(p, hregT0(), (ULong)(Addr)disp_cp_xassisted);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   /* Fix up the conditional jump, if there was one. */
++   if (!hregIsInvalid(cond)) {
++      vassert(ptmp != NULL);
++      UInt offs = (UInt)(p - ptmp);
++      vassert(offs >= 6 && offs <= 12);
++      /* beq cond, $zero, offs */
++      *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0);
++   }
++
++   return p;
++}
++
++static inline UInt* mkEvCheck ( UInt* p, LOONGARCH64AMode* amCounter,
++                                LOONGARCH64AMode* amFailAddr )
++{
++   UInt* p0 = p;
++
++   /*
++         ld.w   $t0, amCounter
++         addi.d $t0, $t0, -1
++         st.w   $t0, amCounter
++         bge    $t0, $zero, nofail
++         ld.d   $t0, amFailAddr
++         jirl   $ra, $t0, 0
++      nofail:
++   */
++   p = mkLoad(p, LAload_LD_W, amCounter, hregT0());
++   *p++ = emit_op_si12_rj_rd(LAbin_ADDI_D, -1 & 0xfff, 12, 12);
++   p = mkStore(p, LAstore_ST_W, amCounter, hregT0());
++   *p++ = emit_op_offs16_rj_rd(LAextra_BGE, 3, 12, 0);
++   p = mkLoad(p, LAload_LD_W, amFailAddr, hregT0());
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   /* Crosscheck */
++   vassert(evCheckSzB_LOONGARCH64() == (UChar*)p - (UChar*)p0);
++   return p;
++}
++
++static inline UInt* mkProfInc ( UInt* p )
++{
++   /*
++      li     $t0, 0x6555755585559555UL
++      ld.d   $t1, $t0, 0
++      addi.d $t1, $t1, 1
++      st.d   $t1, $t0, 0
++    */
++   p = mkLoadImm_EXACTLY4(p, hregT0(), 0x6555755585559555UL);
++   *p++ = emit_op_si12_rj_rd(LAload_LD_D, 0, 12, 13);
++   *p++ = emit_op_si12_rj_rd(LAbin_ADDI_D, 1, 13, 13);
++   *p++ = emit_op_si12_rj_rd(LAstore_ST_D, 0, 12, 13);
++   return p;
++}
++
++/* Emit an instruction into buf and return the number of bytes used.
++   Note that buf is not the insn's final place, and therefore it is
++   imperative to emit position-independent code.  If the emitted
++   instruction was a profiler inc, set *is_profInc to True, else
++   leave it unchanged. */
++Int emit_LOONGARCH64Instr ( /*MB_MOD*/Bool* is_profInc,
++                            UChar* buf,
++                            Int nbuf,
++                            const LOONGARCH64Instr* i,
++                            Bool mode64,
++                            VexEndness endness_host,
++                            const void* disp_cp_chain_me_to_slowEP,
++                            const void* disp_cp_chain_me_to_fastEP,
++                            const void* disp_cp_xindir,
++                            const void* disp_cp_xassisted )
++{
++   vassert(mode64 == True);
++
++   UInt* p = (UInt*)buf;
++   vassert(nbuf >= 32);
++   vassert((((HWord)buf) & 3) == 0);
++
++   switch (i->tag) {
++      case LAin_LI:
++         p = mkLoadImm(p, i->LAin.LI.dst, i->LAin.LI.imm);
++         break;
++      case LAin_Un:
++         p = mkUnary(p, i->LAin.Unary.op, i->LAin.Unary.src,
++                     i->LAin.Unary.dst);
++         break;
++      case LAin_Bin:
++         p = mkBinary(p, i->LAin.Binary.op, i->LAin.Binary.src2,
++                      i->LAin.Binary.src1, i->LAin.Binary.dst);
++         break;
++      case LAin_Load:
++         p = mkLoad(p, i->LAin.Load.op, i->LAin.Load.src,
++                    i->LAin.Load.dst);
++         break;
++      case LAin_Store:
++         p = mkStore(p, i->LAin.Store.op, i->LAin.Store.dst,
++                     i->LAin.Store.src);
++         break;
++      case LAin_LLSC:
++         p = mkLLSC(p, i->LAin.LLSC.op, i->LAin.LLSC.addr, i->LAin.LLSC.val);
++         break;
++      case LAin_Bar:
++         p = mkBar(p, i->LAin.Bar.op, i->LAin.Bar.hint);
++         break;
++      case LAin_FpUn:
++         p = mkFpUnary(p, i->LAin.FpUnary.op, i->LAin.FpUnary.src,
++                       i->LAin.FpUnary.dst);
++         break;
++      case LAin_FpBin:
++         p = mkFpBinary(p, i->LAin.FpBinary.op, i->LAin.FpBinary.src2,
++                        i->LAin.FpBinary.src1, i->LAin.FpBinary.dst);
++         break;
++      case LAin_FpTri:
++         p = mkFpTrinary(p, i->LAin.FpTrinary.op, i->LAin.FpTrinary.src3,
++                         i->LAin.FpTrinary.src2, i->LAin.FpTrinary.src1,
++                         i->LAin.FpTrinary.dst);
++         break;
++      case LAin_FpLoad:
++         p = mkFpLoad(p, i->LAin.FpLoad.op, i->LAin.FpLoad.src,
++                      i->LAin.FpLoad.dst);
++         break;
++      case LAin_FpStore:
++         p = mkFpStore(p, i->LAin.FpStore.op, i->LAin.FpStore.dst,
++                       i->LAin.FpStore.src);
++         break;
++      case LAin_FpMove:
++         p = mkFpMove(p, i->LAin.FpMove.op, i->LAin.FpMove.src,
++                      i->LAin.FpMove.dst);
++         break;
++      case LAin_FpCmp:
++         p = mkFpCmp(p, i->LAin.FpCmp.op, i->LAin.FpCmp.src2,
++                     i->LAin.FpCmp.src1, i->LAin.FpCmp.dst);
++         break;
++      case LAin_Cas:
++         p = mkCas(p, i->LAin.Cas.old, i->LAin.Cas.addr, i->LAin.Cas.expd,
++                   i->LAin.Cas.data, i->LAin.Cas.size64);
++         break;
++      case LAin_Cmp:
++         p = mkCmp(p, i->LAin.Cmp.cond, i->LAin.Cmp.src2,
++                   i->LAin.Cmp.src1, i->LAin.Cmp.dst);
++         break;
++      case LAin_CMove:
++         p = mkCMove(p, i->LAin.CMove.cond, i->LAin.CMove.r0,
++                     i->LAin.CMove.r1, i->LAin.CMove.dst,
++                     i->LAin.CMove.isInt);
++         break;
++      case LAin_Call:
++         p = mkCall(p, i->LAin.Call.cond, i->LAin.Call.target,
++                    i->LAin.Call.rloc);
++         break;
++      case LAin_XDirect:
++         p = mkXDirect(p, i->LAin.XDirect.dstGA, i->LAin.XDirect.amPC,
++                       i->LAin.XDirect.cond, i->LAin.XDirect.toFastEP,
++                       disp_cp_chain_me_to_slowEP,
++                       disp_cp_chain_me_to_fastEP);
++         break;
++      case LAin_XIndir:
++         p = mkXIndir(p, i->LAin.XIndir.dstGA, i->LAin.XIndir.amPC,
++                      i->LAin.XIndir.cond, disp_cp_xindir);
++         break;
++      case LAin_XAssisted:
++         p = mkXAssisted(p, i->LAin.XAssisted.dstGA, i->LAin.XAssisted.amPC,
++                         i->LAin.XAssisted.cond, i->LAin.XAssisted.jk,
++                         disp_cp_xassisted);
++         break;
++      case LAin_EvCheck:
++         p = mkEvCheck(p, i->LAin.EvCheck.amCounter,
++                       i->LAin.EvCheck.amFailAddr);
++         break;
++      case LAin_ProfInc:
++         p = mkProfInc(p);
++         break;
++      default:
++         p = NULL;
++         break;
++   }
++
++   if (p == NULL) {
++      ppLOONGARCH64Instr(i, True);
++      vpanic("emit_LOONGARCH64Instr");
++      /*NOTREACHED*/
++   }
++
++   vassert(((UChar*)p) - &buf[0] <= 48);
++   return ((UChar*)p) - &buf[0];
++}
++
++/* How big is an event check?  See case for mkEvCheck just above.  That
++   crosschecks what this returns, so we can tell if we're inconsistent. */
++Int evCheckSzB_LOONGARCH64 ( void )
++{
++   return 6 * 4; // 6 insns
++}
++
++/* NB: what goes on here has to be very closely coordinated with the
++   emitInstr case for XDirect, above. */
++VexInvalRange chainXDirect_LOONGARCH64 ( VexEndness endness_host,
++                                         void* place_to_chain,
++                                         const void* disp_cp_chain_me_EXPECTED,
++                                         const void* place_to_jump_to )
++{
++   vassert(endness_host == VexEndnessLE);
++
++   /* What we're expecting to see is:
++    *  la $t0, disp_cp_chain_me_to_EXPECTED
++    *  jirl $ra, $t0, 0
++    * viz
++    *  <16 bytes generated by mkLoadImm_EXACTLY4>
++    *  jirl $ra, $t0, 0
++    */
++   UInt* p = (UInt*)place_to_chain;
++   vassert(((HWord)p & 3) == 0);
++   vassert(is_LoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me_EXPECTED));
++   vassert(p[4] == emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1));
++
++   /* And what we want to change it to is:
++    *  la $t0, place_to_jump_to
++    *  jirl $ra, $t0, 0
++    * viz
++    *  <16 bytes generated by mkLoadImm_EXACTLY4>
++    *  jirl $ra, $t0, 0
++    *
++    * The replacement has the same length as the original.
++    */
++   p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)place_to_jump_to);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   VexInvalRange vir = { (HWord)place_to_chain, 4 * 4 + 4 };
++   return vir;
++}
++
++/* NB: what goes on here has to be very closely coordinated with the
++   emitInstr case for XDirect, above. */
++VexInvalRange unchainXDirect_LOONGARCH64 ( VexEndness endness_host,
++                                           void* place_to_unchain,
++                                           const void* place_to_jump_to_EXPECTED,
++                                           const void* disp_cp_chain_me )
++{
++   vassert(endness_host == VexEndnessLE);
++
++   /* What we're expecting to see is:
++    *  la $t0, place_to_jump_to_EXPECTED
++    *  jirl $ra, $t0, 0
++    * viz
++    *  <16 bytes generated by mkLoadImm_EXACTLY4>
++    *  jirl $ra, $t0, 0
++    */
++   UInt* p = (UInt*)place_to_unchain;
++   vassert(((HWord)p & 3) == 0);
++   vassert(is_LoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)place_to_jump_to_EXPECTED));
++   vassert(p[4] == emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1));
++
++   /* And what we want to change it to is:
++    *  la $t0, disp_cp_chain_me
++    *  jirl $ra, $t0, 0
++    * viz
++    *  <16 bytes generated by mkLoadImm_EXACTLY4>
++    *  jirl $ra, $t0, 0
++    *
++    * The replacement has the same length as the original.
++    */
++   p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me);
++   *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1);
++
++   VexInvalRange vir = { (HWord)place_to_unchain, 4 * 4 + 4 };
++   return vir;
++}
++
++/* Patch the counter address into a profile inc point, as previously
++   created by the mkProfInc. */
++VexInvalRange patchProfInc_LOONGARCH64 ( VexEndness endness_host,
++                                         void*  place_to_patch,
++                                         const ULong* location_of_counter )
++{
++   vassert(endness_host == VexEndnessLE);
++   vassert(sizeof(ULong*) == 8);
++
++   /*
++      $t0 = NotKnownYet
++      ld.d   $t1, $t0, 0
++      addi.d $t1, $t1, 1
++      st.d   $t1, $t0, 0
++    */
++   UInt* p = (UInt*)place_to_patch;
++   vassert(((HWord)p & 3) == 0);
++   vassert(is_LoadImm_EXACTLY4(p, hregT0(), 0x6555755585559555UL));
++   vassert(p[4] == emit_op_si12_rj_rd(LAload_LD_D, 0, 12, 13));
++   vassert(p[5] == emit_op_si12_rj_rd(LAbin_ADDI_D, 1, 13, 13));
++   vassert(p[6] == emit_op_si12_rj_rd(LAstore_ST_D, 0, 12, 13));
++
++   p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)location_of_counter);
++
++   VexInvalRange vir = { (HWord)place_to_patch, 4 * 4 };
++   return vir;
++}
++
++
++/*---------------------------------------------------------------*/
++/*--- end                             host_loongarch64_defs.c ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/priv/host_loongarch64_defs.h b/VEX/priv/host_loongarch64_defs.h
+new file mode 100644
+index 000000000..89365d6d1
+--- /dev/null
++++ b/VEX/priv/host_loongarch64_defs.h
+@@ -0,0 +1,685 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                           host_loongarch64_defs.h ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#ifndef __VEX_HOST_LOONGARCH64_DEFS_H
++#define __VEX_HOST_LOONGARCH64_DEFS_H
++
++#include "libvex_basictypes.h"
++#include "libvex.h"             /* VexArch */
++#include "host_generic_regs.h"  /* HReg */
++
++
++/* --------- Registers. --------- */
++
++#define ST_IN static inline
++
++/* Integer static registers */
++ST_IN HReg hregLOONGARCH64_R23 ( void ) { return mkHReg(False, HRcInt64, 23,  0); }
++ST_IN HReg hregLOONGARCH64_R24 ( void ) { return mkHReg(False, HRcInt64, 24,  1); }
++ST_IN HReg hregLOONGARCH64_R25 ( void ) { return mkHReg(False, HRcInt64, 25,  2); }
++ST_IN HReg hregLOONGARCH64_R26 ( void ) { return mkHReg(False, HRcInt64, 26,  3); }
++ST_IN HReg hregLOONGARCH64_R27 ( void ) { return mkHReg(False, HRcInt64, 27,  4); }
++ST_IN HReg hregLOONGARCH64_R28 ( void ) { return mkHReg(False, HRcInt64, 28,  5); }
++ST_IN HReg hregLOONGARCH64_R29 ( void ) { return mkHReg(False, HRcInt64, 29,  6); }
++ST_IN HReg hregLOONGARCH64_R30 ( void ) { return mkHReg(False, HRcInt64, 30,  7); }
++/* $r31 is used as guest stack pointer */
++
++/* Integer temporary registers */
++/* $r12 is used as a chaining/ProfInc/Cmove/genSpill/genReload temporary */
++/* $r13 is used as a ProfInc temporary */
++ST_IN HReg hregLOONGARCH64_R14 ( void ) { return mkHReg(False, HRcInt64, 14,  8); }
++ST_IN HReg hregLOONGARCH64_R15 ( void ) { return mkHReg(False, HRcInt64, 15,  9); }
++ST_IN HReg hregLOONGARCH64_R16 ( void ) { return mkHReg(False, HRcInt64, 16, 10); }
++ST_IN HReg hregLOONGARCH64_R17 ( void ) { return mkHReg(False, HRcInt64, 17, 11); }
++ST_IN HReg hregLOONGARCH64_R18 ( void ) { return mkHReg(False, HRcInt64, 18, 12); }
++ST_IN HReg hregLOONGARCH64_R19 ( void ) { return mkHReg(False, HRcInt64, 19, 13); }
++ST_IN HReg hregLOONGARCH64_R20 ( void ) { return mkHReg(False, HRcInt64, 20, 14); }
++
++/* Floating point static registers */
++ST_IN HReg hregLOONGARCH64_F24 ( void ) { return mkHReg(False, HRcFlt64, 24, 15); }
++ST_IN HReg hregLOONGARCH64_F25 ( void ) { return mkHReg(False, HRcFlt64, 25, 16); }
++ST_IN HReg hregLOONGARCH64_F26 ( void ) { return mkHReg(False, HRcFlt64, 26, 17); }
++ST_IN HReg hregLOONGARCH64_F27 ( void ) { return mkHReg(False, HRcFlt64, 27, 18); }
++ST_IN HReg hregLOONGARCH64_F28 ( void ) { return mkHReg(False, HRcFlt64, 28, 19); }
++ST_IN HReg hregLOONGARCH64_F29 ( void ) { return mkHReg(False, HRcFlt64, 29, 20); }
++ST_IN HReg hregLOONGARCH64_F30 ( void ) { return mkHReg(False, HRcFlt64, 30, 21); }
++ST_IN HReg hregLOONGARCH64_F31 ( void ) { return mkHReg(False, HRcFlt64, 31, 22); }
++
++/* Other Integer registers */
++ST_IN HReg hregLOONGARCH64_R0  ( void ) { return mkHReg(False, HRcInt64,  0, 23); }
++ST_IN HReg hregLOONGARCH64_R1  ( void ) { return mkHReg(False, HRcInt64,  1, 24); }
++ST_IN HReg hregLOONGARCH64_R2  ( void ) { return mkHReg(False, HRcInt64,  2, 25); }
++ST_IN HReg hregLOONGARCH64_R3  ( void ) { return mkHReg(False, HRcInt64,  3, 26); }
++ST_IN HReg hregLOONGARCH64_R4  ( void ) { return mkHReg(False, HRcInt64,  4, 27); }
++ST_IN HReg hregLOONGARCH64_R5  ( void ) { return mkHReg(False, HRcInt64,  5, 28); }
++ST_IN HReg hregLOONGARCH64_R6  ( void ) { return mkHReg(False, HRcInt64,  6, 29); }
++ST_IN HReg hregLOONGARCH64_R7  ( void ) { return mkHReg(False, HRcInt64,  7, 30); }
++ST_IN HReg hregLOONGARCH64_R8  ( void ) { return mkHReg(False, HRcInt64,  8, 31); }
++ST_IN HReg hregLOONGARCH64_R9  ( void ) { return mkHReg(False, HRcInt64,  9, 32); }
++ST_IN HReg hregLOONGARCH64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 33); }
++ST_IN HReg hregLOONGARCH64_R11 ( void ) { return mkHReg(False, HRcInt64, 11, 34); }
++ST_IN HReg hregLOONGARCH64_R12 ( void ) { return mkHReg(False, HRcInt64, 12, 35); }
++ST_IN HReg hregLOONGARCH64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 36); }
++ST_IN HReg hregLOONGARCH64_R21 ( void ) { return mkHReg(False, HRcInt64, 21, 37); }
++ST_IN HReg hregLOONGARCH64_R22 ( void ) { return mkHReg(False, HRcInt64, 22, 38); }
++ST_IN HReg hregLOONGARCH64_R31 ( void ) { return mkHReg(False, HRcInt64, 31, 39); }
++
++/* Special registers */
++ST_IN HReg hregLOONGARCH64_FCSR3 ( void ) { return mkHReg(False, HRcInt32, 3, 40); }
++
++#undef ST_IN
++
++#define hregZERO() hregLOONGARCH64_R0()
++#define hregSP()   hregLOONGARCH64_R3()
++#define hregT0()   hregLOONGARCH64_R12()
++#define hregT1()   hregLOONGARCH64_R13()
++#define hregGSP()  hregLOONGARCH64_R31()
++
++extern UInt ppHRegLOONGARCH64 ( HReg reg );
++
++/* Number of registers used arg passing in function calls */
++#define LOONGARCH64_N_ARGREGS 8 /* a0 ... a7 */
++
++
++/* --------- Condition codes, LOONGARCH64 encoding. --------- */
++typedef enum {
++   LAcc_EQ  = 0, /* equal */
++   LAcc_NE  = 1, /* not equal */
++
++   LAcc_LT  = 2, /* less than (signed) */
++   LAcc_GE  = 3, /* great equal (signed) */
++
++   LAcc_LTU = 4, /* less than (unsigned) */
++   LAcc_GEU = 5, /* great equal (unsigned) */
++
++   LAcc_AL  = 6  /* always (unconditional) */
++} LOONGARCH64CondCode;
++
++
++/* --------- Memory address expressions (amodes). --------- */
++
++typedef enum {
++   LAam_RI, /* Reg + Imm (signed 12-bit or signed 14-bit) */
++   LAam_RR  /* Reg1 + Reg2 */
++} LOONGARCH64AModeTag;
++
++typedef struct {
++   LOONGARCH64AModeTag tag;
++   union {
++      struct {
++         HReg   base;
++         UShort index;
++      } RI;
++      struct {
++         HReg base;
++         HReg index;
++      } RR;
++   } LAam;
++} LOONGARCH64AMode;
++
++extern LOONGARCH64AMode* LOONGARCH64AMode_RI ( HReg reg, UShort imm );
++extern LOONGARCH64AMode* LOONGARCH64AMode_RR ( HReg base, HReg index );
++
++
++/* --------- Operand, which can be reg or imm. --------- */
++
++typedef enum {
++   LAri_Reg,
++   LAri_Imm
++} LOONGARCH64RITag;
++
++typedef struct {
++   LOONGARCH64RITag tag;
++   union {
++      struct {
++         HReg reg;
++      } R;
++      struct {
++         UShort imm;
++         UChar  size; // size == 5 || size == 6 || size == 12
++         Bool   isSigned;
++      } I;
++   } LAri;
++} LOONGARCH64RI;
++
++extern LOONGARCH64RI* LOONGARCH64RI_R ( HReg reg );
++extern LOONGARCH64RI* LOONGARCH64RI_I ( UShort imm, UChar size, Bool isSigned );
++
++
++/* --------- Instructions. --------- */
++
++/* Tags for unary operations */
++typedef enum {
++   LAun_CLZ_W     = 0x00001400,
++   LAun_CTZ_W     = 0x00001c00,
++   LAun_CLZ_D     = 0x00002400,
++   LAun_CTZ_D     = 0x00002c00,
++   LAun_EXT_W_H   = 0x00005800,
++   LAun_EXT_W_B   = 0x00005c00
++} LOONGARCH64UnOp;
++
++/* Tags for binary operations */
++typedef enum {
++   LAbin_ADD_W     = 0x00100000,
++   LAbin_ADD_D     = 0x00108000,
++   LAbin_SUB_W     = 0x00110000,
++   LAbin_SUB_D     = 0x00118000,
++   LAbin_NOR       = 0x00140000,
++   LAbin_AND       = 0x00148000,
++   LAbin_OR        = 0x00150000,
++   LAbin_XOR       = 0x00158000,
++   LAbin_SLL_W     = 0x00170000,
++   LAbin_SRL_W     = 0x00178000,
++   LAbin_SRA_W     = 0x00180000,
++   LAbin_SLL_D     = 0x00188000,
++   LAbin_SRL_D     = 0x00190000,
++   LAbin_SRA_D     = 0x00198000,
++   LAbin_MUL_W     = 0x001c0000,
++   LAbin_MUL_D     = 0x001d8000,
++   LAbin_MULH_W    = 0x001c8000,
++   LAbin_MULH_WU   = 0x001d0000,
++   LAbin_MULH_D    = 0x001e0000,
++   LAbin_MULH_DU   = 0x001e8000,
++   LAbin_MULW_D_W  = 0x001f0000,
++   LAbin_MULW_D_WU = 0x001f8000,
++   LAbin_DIV_W     = 0x00200000,
++   LAbin_MOD_W     = 0x00208000,
++   LAbin_DIV_WU    = 0x00210000,
++   LAbin_MOD_WU    = 0x00218000,
++   LAbin_DIV_D     = 0x00220000,
++   LAbin_MOD_D     = 0x00228000,
++   LAbin_DIV_DU    = 0x00230000,
++   LAbin_MOD_DU    = 0x00238000,
++   LAbin_SLLI_W    = 0x00408000,
++   LAbin_SLLI_D    = 0x00410000,
++   LAbin_SRLI_W    = 0x00448000,
++   LAbin_SRLI_D    = 0x00450000,
++   LAbin_SRAI_W    = 0x00488000,
++   LAbin_SRAI_D    = 0x00490000,
++   LAbin_ADDI_W    = 0x02800000,
++   LAbin_ADDI_D    = 0x02c00000,
++   LAbin_ANDI      = 0x03400000,
++   LAbin_ORI       = 0x03800000,
++   LAbin_XORI      = 0x03c00000
++} LOONGARCH64BinOp;
++
++/* Tags for load operations */
++typedef enum {
++   LAload_LD_W   = 0x28800000,
++   LAload_LD_D   = 0x28c00000,
++   LAload_LD_BU  = 0x2a000000,
++   LAload_LD_HU  = 0x2a400000,
++   LAload_LD_WU  = 0x2a800000,
++   LAload_LDX_D  = 0x380c0000,
++   LAload_LDX_BU = 0x38200000,
++   LAload_LDX_HU = 0x38240000,
++   LAload_LDX_WU = 0x38280000
++} LOONGARCH64LoadOp;
++
++/* Tags for store operations */
++typedef enum {
++   LAstore_ST_B  = 0x29000000,
++   LAstore_ST_H  = 0x29400000,
++   LAstore_ST_W  = 0x29800000,
++   LAstore_ST_D  = 0x29c00000,
++   LAstore_STX_B = 0x38100000,
++   LAstore_STX_H = 0x38140000,
++   LAstore_STX_W = 0x38180000,
++   LAstore_STX_D = 0x381c0000
++} LOONGARCH64StoreOp;
++
++/* Tags for ll/sc operations */
++typedef enum {
++   LAllsc_LL_W = 0x20000000,
++   LAllsc_SC_W = 0x21000000,
++   LAllsc_LL_D = 0x22000000,
++   LAllsc_SC_D = 0x23000000
++} LOONGARCH64LLSCOp;
++
++/* Tags for barrier operations */
++typedef enum {
++   LAbar_DBAR = 0x38720000,
++   LAbar_IBAR = 0x38728000
++} LOONGARCH64BarOp;
++
++/* Tags for floating point unary operations */
++typedef enum {
++   LAfpun_FABS_S    = 0x01140400,
++   LAfpun_FABS_D    = 0x01140800,
++   LAfpun_FNEG_S    = 0x01141400,
++   LAfpun_FNEG_D    = 0x01141800,
++   LAfpun_FLOGB_S   = 0x01142400,
++   LAfpun_FLOGB_D   = 0x01142800,
++   LAfpun_FSQRT_S   = 0x01144400,
++   LAfpun_FSQRT_D   = 0x01144800,
++   LAfpun_FRSQRT_S  = 0x01146400,
++   LAfpun_FRSQRT_D  = 0x01146800,
++   LAfpun_FCVT_S_D  = 0x01191800,
++   LAfpun_FCVT_D_S  = 0x01192400,
++   LAfpun_FTINT_W_S = 0x011b0400,
++   LAfpun_FTINT_W_D = 0x011b0800,
++   LAfpun_FTINT_L_S = 0x011b2400,
++   LAfpun_FTINT_L_D = 0x011b2800,
++   LAfpun_FFINT_S_W = 0x011d1000,
++   LAfpun_FFINT_S_L = 0x011d1800,
++   LAfpun_FFINT_D_W = 0x011d2000,
++   LAfpun_FFINT_D_L = 0x011d2800,
++   LAfpun_FRINT_S   = 0x011e4400,
++   LAfpun_FRINT_D   = 0x011e4800
++} LOONGARCH64FpUnOp;
++
++/* Tags for floating point binary operations */
++typedef enum {
++   LAfpbin_FADD_S    = 0x01008000,
++   LAfpbin_FADD_D    = 0x01010000,
++   LAfpbin_FSUB_S    = 0x01028000,
++   LAfpbin_FSUB_D    = 0x01030000,
++   LAfpbin_FMUL_S    = 0x01048000,
++   LAfpbin_FMUL_D    = 0x01050000,
++   LAfpbin_FDIV_S    = 0x01068000,
++   LAfpbin_FDIV_D    = 0x01070000,
++   LAfpbin_FMAX_S    = 0x01088000,
++   LAfpbin_FMAX_D    = 0x01090000,
++   LAfpbin_FMIN_S    = 0x010a8000,
++   LAfpbin_FMIN_D    = 0x010b0000,
++   LAfpbin_FMAXA_S   = 0x010c8000,
++   LAfpbin_FMAXA_D   = 0x010d0000,
++   LAfpbin_FMINA_S   = 0x010e8000,
++   LAfpbin_FMINA_D   = 0x010f0000,
++   LAfpbin_FSCALEB_S = 0x01108000,
++   LAfpbin_FSCALEB_D = 0x01110000
++} LOONGARCH64FpBinOp;
++
++/* Tags for floating point trinary operations */
++typedef enum {
++   LAfpbin_FMADD_S = 0x08100000,
++   LAfpbin_FMADD_D = 0x08200000,
++   LAfpbin_FMSUB_S = 0x08500000,
++   LAfpbin_FMSUB_D = 0x08600000
++} LOONGARCH64FpTriOp;
++
++/* Tags for floating point load operations */
++typedef enum {
++   LAfpload_FLD_S  = 0x2b000000,
++   LAfpload_FLD_D  = 0x2b800000,
++   LAfpload_FLDX_S = 0x38300000,
++   LAfpload_FLDX_D = 0x38340000
++} LOONGARCH64FpLoadOp;
++
++/* Tags for floating point store operations */
++typedef enum {
++   LAfpstore_FST_S  = 0x2b400000,
++   LAfpstore_FST_D  = 0x2bc00000,
++   LAfpstore_FSTX_S = 0x38380000,
++   LAfpstore_FSTX_D = 0x383c0000
++} LOONGARCH64FpStoreOp;
++
++/* Tags for floating point move operations */
++typedef enum {
++   LAfpmove_FMOV_S     = 0x01149400,
++   LAfpmove_FMOV_D     = 0x01149800,
++   LAfpmove_MOVGR2FR_W = 0x0114a400,
++   LAfpmove_MOVGR2FR_D = 0x0114a800,
++   LAfpmove_MOVFR2GR_S = 0x0114b400,
++   LAfpmove_MOVFR2GR_D = 0x0114b800,
++   LAfpmove_MOVGR2FCSR = 0x0114c000,
++   LAfpmove_MOVFCSR2GR = 0x0114c800
++} LOONGARCH64FpMoveOp;
++
++/* Tags for floating point compare operations */
++typedef enum {
++   LAfpcmp_FCMP_CLT_S = 0x0c110000,
++   LAfpcmp_FCMP_CLT_D = 0x0c210000,
++   LAfpcmp_FCMP_CEQ_S = 0x0c120000,
++   LAfpcmp_FCMP_CEQ_D = 0x0c220000,
++   LAfpcmp_FCMP_CUN_S = 0x0c140000,
++   LAfpcmp_FCMP_CUN_D = 0x0c240000
++} LOONGARCH64FpCmpOp;
++
++/* Tags for extra operations, we only use them when emiting code directly */
++typedef enum {
++   LAextra_MOVGR2CF = 0x0114d800,
++   LAextra_MOVCF2GR = 0x0114dc00,
++   LAextra_SLT      = 0x00120000,
++   LAextra_SLTU     = 0x00128000,
++   LAextra_MASKEQZ  = 0x00130000,
++   LAextra_MASKNEZ  = 0x00138000,
++   LAextra_SLTI     = 0x02000000,
++   LAextra_SLTUI    = 0x02400000,
++   LAextra_LU52I_D  = 0x03000000,
++   LAextra_FSEL     = 0x0d000000,
++   LAextra_LU12I_W  = 0x14000000,
++   LAextra_LU32I_D  = 0x16000000,
++   LAextra_JIRL     = 0x4c000000,
++   LAextra_B        = 0x50000000,
++   LAextra_BEQ      = 0x58000000,
++   LAextra_BNE      = 0x5c000000,
++   LAextra_BGE      = 0x64000000
++} LOONGARCH64ExtraOp;
++
++/* Tags for instructions */
++typedef enum {
++   /* Pseudo-insn, used for generating a 64-bit
++      literal to register */
++   LAin_LI,         /* load imm */
++
++   /* Integer insns */
++   LAin_Un,         /* unary */
++   LAin_Bin,        /* binary */
++   LAin_Load,       /* load */
++   LAin_Store,      /* store */
++   LAin_LLSC,       /* ll/sc */
++   LAin_Bar,        /* barrier */
++
++   /* Floating point insns */
++   LAin_FpUn,       /* floating point unary */
++   LAin_FpBin,      /* floating point binary */
++   LAin_FpTri,      /* floating point trinary */
++   LAin_FpLoad,     /* floating point load */
++   LAin_FpStore,    /* floating point store */
++   LAin_FpMove,     /* floating point move */
++   LAin_FpCmp,      /* floating point compare */
++
++   /* Pseudo-insn */
++   LAin_Cas,        /* compare and swap */
++   LAin_Cmp,        /* word compare */
++   LAin_CMove,      /* condition move */
++
++   /* Call target (an absolute address), on given
++      condition (which could be LAcc_AL). */
++   LAin_Call,       /* call */
++
++   /* The following 5 insns are mandated by translation chaining */
++   LAin_XDirect,    /* direct transfer to GA */
++   LAin_XIndir,     /* indirect transfer to GA */
++   LAin_XAssisted,  /* assisted transfer to GA */
++   LAin_EvCheck,    /* Event check */
++   LAin_ProfInc     /* 64-bit profile counter increment */
++} LOONGARCH64InstrTag;
++
++typedef struct {
++   LOONGARCH64InstrTag tag;
++   union {
++      struct {
++         ULong                imm;
++         HReg                 dst;
++      } LI;
++      struct {
++         LOONGARCH64UnOp      op;
++         HReg                 src;
++         HReg                 dst;
++      } Unary;
++      struct {
++         LOONGARCH64BinOp     op;
++         LOONGARCH64RI*       src2;
++         HReg                 src1;
++         HReg                 dst;
++      } Binary;
++      struct {
++         LOONGARCH64LoadOp    op;
++         LOONGARCH64AMode*    src;
++         HReg                 dst;
++      } Load;
++      struct {
++         LOONGARCH64StoreOp   op;
++         LOONGARCH64AMode*    dst;
++         HReg                 src;
++      } Store;
++      struct {
++         LOONGARCH64LLSCOp    op;
++         Bool                 isLoad;
++         LOONGARCH64AMode*    addr;
++         HReg                 val;
++      } LLSC;
++      struct {
++         LOONGARCH64BarOp     op;
++         UShort               hint;
++      } Bar;
++      struct {
++         LOONGARCH64FpUnOp    op;
++         HReg                 src;
++         HReg                 dst;
++      } FpUnary;
++      struct {
++         LOONGARCH64FpBinOp   op;
++         HReg                 src2;
++         HReg                 src1;
++         HReg                 dst;
++      } FpBinary;
++      struct {
++         LOONGARCH64FpTriOp   op;
++         HReg                 src3;
++         HReg                 src2;
++         HReg                 src1;
++         HReg                 dst;
++      } FpTrinary;
++      struct {
++         LOONGARCH64FpLoadOp  op;
++         LOONGARCH64AMode*    src;
++         HReg                 dst;
++      } FpLoad;
++      struct {
++         LOONGARCH64FpStoreOp op;
++         LOONGARCH64AMode*    dst;
++         HReg                 src;
++      } FpStore;
++      struct {
++         LOONGARCH64FpMoveOp  op;
++         HReg                 src;
++         HReg                 dst;
++      } FpMove;
++      struct {
++         LOONGARCH64FpCmpOp   op;
++         HReg                 src2;
++         HReg                 src1;
++         HReg                 dst;
++      } FpCmp;
++      struct {
++         HReg                 old;
++         HReg                 addr;
++         HReg                 expd;
++         HReg                 data;
++         Bool                 size64;
++      } Cas;
++      struct {
++         LOONGARCH64CondCode  cond;
++         HReg                 dst;
++         HReg                 src1;
++         HReg                 src2;
++      } Cmp;
++      struct {
++         HReg                 cond;
++         HReg                 r0;
++         HReg                 r1;
++         HReg                 dst;
++         Bool                 isInt;
++      } CMove;
++      struct {
++         HReg                 cond;
++         Addr64               target;
++         UInt                 nArgRegs;
++         RetLoc               rloc;
++      } Call;
++      struct {
++         Addr64               dstGA;
++         LOONGARCH64AMode*    amPC;
++         HReg                 cond;
++         Bool                 toFastEP;
++      } XDirect;
++      struct {
++         HReg                 dstGA;
++         LOONGARCH64AMode*    amPC;
++         HReg                 cond;
++      } XIndir;
++      struct {
++         HReg                 dstGA;
++         LOONGARCH64AMode*    amPC;
++         HReg                 cond;
++         IRJumpKind           jk;
++      } XAssisted;
++      struct {
++         LOONGARCH64AMode*    amCounter;
++         LOONGARCH64AMode*    amFailAddr;
++      } EvCheck;
++      struct {
++         /* No fields.  The address of the counter to inc is
++            installed later, post-translation, by patching it in,
++            as it is not known at translation time. */
++      } ProfInc;
++   } LAin;
++} LOONGARCH64Instr;
++
++extern LOONGARCH64Instr* LOONGARCH64Instr_LI        ( ULong imm, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Unary     ( LOONGARCH64UnOp op,
++                                                      HReg src, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Binary    ( LOONGARCH64BinOp op,
++                                                      LOONGARCH64RI* src2,
++                                                      HReg src1, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Load      ( LOONGARCH64LoadOp op,
++                                                      LOONGARCH64AMode* src,
++                                                      HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Store     ( LOONGARCH64StoreOp op,
++                                                      LOONGARCH64AMode* dst,
++                                                      HReg src );
++extern LOONGARCH64Instr* LOONGARCH64Instr_LLSC      ( LOONGARCH64LLSCOp op,
++                                                      Bool isLoad,
++                                                      LOONGARCH64AMode* addr,
++                                                      HReg val );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Bar       ( LOONGARCH64BarOp op,
++                                                      UShort hint );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpUnary   ( LOONGARCH64FpUnOp op,
++                                                      HReg src, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpBinary  ( LOONGARCH64FpBinOp op,
++                                                      HReg src2, HReg src1,
++                                                      HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpTrinary ( LOONGARCH64FpTriOp op,
++                                                      HReg src3, HReg src2,
++                                                      HReg src1, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpLoad    ( LOONGARCH64FpLoadOp op,
++                                                      LOONGARCH64AMode* src,
++                                                      HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpStore   ( LOONGARCH64FpStoreOp op,
++                                                      LOONGARCH64AMode* dst,
++                                                      HReg src );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpMove    ( LOONGARCH64FpMoveOp op,
++                                                      HReg src, HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_FpCmp     ( LOONGARCH64FpCmpOp op,
++                                                      HReg src2, HReg src1,
++                                                      HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Cas       ( HReg old, HReg addr,
++                                                      HReg expd, HReg data,
++                                                      Bool size64 );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Cmp       ( LOONGARCH64CondCode cond,
++                                                      HReg src2, HReg src1,
++                                                      HReg dst );
++extern LOONGARCH64Instr* LOONGARCH64Instr_CMove     ( HReg cond, HReg r0, HReg r1,
++                                                      HReg dst, Bool isInt );
++extern LOONGARCH64Instr* LOONGARCH64Instr_Call      ( HReg cond, Addr64 target,
++                                                      UInt nArgRegs, RetLoc rloc );
++extern LOONGARCH64Instr* LOONGARCH64Instr_XDirect   ( Addr64 dstGA,
++                                                      LOONGARCH64AMode* amPC,
++                                                      HReg cond, Bool toFastEP );
++extern LOONGARCH64Instr* LOONGARCH64Instr_XIndir    ( HReg dstGA,
++                                                      LOONGARCH64AMode* amPC,
++                                                      HReg cond );
++extern LOONGARCH64Instr* LOONGARCH64Instr_XAssisted ( HReg dstGA,
++                                                      LOONGARCH64AMode* amPC,
++                                                      HReg cond, IRJumpKind jk );
++extern LOONGARCH64Instr* LOONGARCH64Instr_EvCheck   ( LOONGARCH64AMode* amCounter,
++                                                      LOONGARCH64AMode* amFailAddr );
++extern LOONGARCH64Instr* LOONGARCH64Instr_ProfInc   ( void );
++
++extern void ppLOONGARCH64Instr ( const LOONGARCH64Instr* i, Bool mode64 );
++
++/* Some functions that insulate the register allocator from details
++   of the underlying instruction set. */
++extern void getRegUsage_LOONGARCH64Instr ( HRegUsage* u,
++                                           const LOONGARCH64Instr* i,
++                                           Bool mode64 );
++extern void mapRegs_LOONGARCH64Instr ( HRegRemap* m, LOONGARCH64Instr* i,
++                                       Bool mode64 );
++extern Int emit_LOONGARCH64Instr (/*MB_MOD*/Bool* is_profInc,
++                                  UChar* buf,
++                                  Int nbuf,
++                                  const LOONGARCH64Instr* i,
++                                  Bool mode64,
++                                  VexEndness endness_host,
++                                  const void* disp_cp_chain_me_to_slowEP,
++                                  const void* disp_cp_chain_me_to_fastEP,
++                                  const void* disp_cp_xindir,
++                                  const void* disp_cp_xassisted );
++
++extern void genSpill_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2,
++                                   HReg rreg, Int offsetB, Bool mode64);
++extern void genReload_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2,
++                                    HReg rreg, Int offsetB, Bool mode64);
++extern LOONGARCH64Instr* genMove_LOONGARCH64 ( HReg from, HReg to,
++                                               Bool mode64 );
++
++extern const RRegUniverse* getRRegUniverse_LOONGARCH64 ( void );
++
++extern HInstrArray* iselSB_LOONGARCH64 ( const IRSB*,
++                                         VexArch,
++                                         const VexArchInfo*,
++                                         const VexAbiInfo*,
++                                         Int offs_Host_EvC_Counter,
++                                         Int offs_Host_EvC_FailAddr,
++                                         Bool chainingAllowed,
++                                         Bool addProfInc,
++                                         Addr max_ga );
++
++/* How big is an event check?  See case for Min_EvCheck in
++   emit_LOONGARCH64Instr just above.  That crosschecks what this returns,
++   so we can tell if we're inconsistent. */
++extern Int evCheckSzB_LOONGARCH64 ( void );
++
++/* NB: what goes on here has to be very closely coordinated with the
++   emitInstr case for XDirect, above. */
++extern VexInvalRange chainXDirect_LOONGARCH64 ( VexEndness endness_host,
++                                                void* place_to_chain,
++                                                const void* disp_cp_chain_me_EXPECTED,
++                                                const void* place_to_jump_to );
++
++/* NB: what goes on here has to be very closely coordinated with the
++   emitInstr case for XDirect, above. */
++extern VexInvalRange unchainXDirect_LOONGARCH64 ( VexEndness endness_host,
++                                                  void* place_to_unchain,
++                                                  const void* place_to_jump_to_EXPECTED,
++                                                  const void* disp_cp_chain_me );
++
++/* Patch the counter address into a profile inc point, as previously
++   created by the Min_ProfInc case for emit_LOONGARCH64Instr. */
++extern VexInvalRange patchProfInc_LOONGARCH64 ( VexEndness endness_host,
++                                                void*  place_to_patch,
++                                                const ULong* location_of_counter );
++
++#endif /* ndef __VEX_HOST_LOONGARCH64_DEFS_H */
++
++
++/*---------------------------------------------------------------*/
++/*--- end                             host-loongarch64_defs.h ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/priv/host_loongarch64_isel.c b/VEX/priv/host_loongarch64_isel.c
+new file mode 100644
+index 000000000..c3c4ac8da
+--- /dev/null
++++ b/VEX/priv/host_loongarch64_isel.c
+@@ -0,0 +1,2867 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                           host_loongarch64_isel.c ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details->
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#include "libvex_basictypes.h"
++#include "libvex_ir.h"
++#include "libvex.h"
++
++#include "main_util.h"
++#include "main_globals.h"
++#include "host_generic_regs.h"
++#include "host_loongarch64_defs.h"
++
++
++/*---------------------------------------------------------*/
++/*--- ISelEnv                                           ---*/
++/*---------------------------------------------------------*/
++
++/* This carries around:
++
++   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
++     might encounter.  This is computed before insn selection starts,
++     and does not change.
++
++   - A mapping from IRTemp to HReg.  This tells the insn selector
++     which virtual register is associated with each IRTemp temporary.
++     This is computed before insn selection starts, and does not
++     change.  We expect this mapping to map precisely the same set of
++     IRTemps as the type mapping does.
++
++     |vregmap|   holds the primary register for the IRTemp.
++     |vregmapHI| is only used for 128-bit integer-typed
++                 IRTemps.  It holds the identity of a second
++                 64-bit virtual HReg, which holds the high half
++                 of the value.
++
++   - The code array, that is, the insns selected so far.
++
++   - A counter, for generating new virtual registers.
++
++   - The host hardware capabilities word.  This is set at the start
++     and does not change.
++
++   - A Bool for indicating whether we may generate chain-me
++     instructions for control flow transfers, or whether we must use
++     XAssisted.
++
++   - The maximum guest address of any guest insn in this block.
++     Actually, the address of the highest-addressed byte from any insn
++     in this block.  Is set at the start and does not change.  This is
++     used for detecting jumps which are definitely forward-edges from
++     this block, and therefore can be made (chained) to the fast entry
++     point of the destination, thereby avoiding the destination's
++     event check.
++
++    - An IRExpr*, which may be NULL, holding the IR expression (an
++      IRRoundingMode-encoded value) to which the FPU's rounding mode
++      was most recently set.  Setting to NULL is always safe.  Used to
++      avoid redundant settings of the FPU's rounding mode, as
++      described in set_FPCR_rounding_mode below.
++
++   Note, this is all (well, mostly) host-independent.
++*/
++
++typedef
++   struct {
++      /* Constant -- are set at the start and do not change. */
++      IRTypeEnv*   type_env;
++
++      HReg*        vregmap;
++      HReg*        vregmapHI;
++      Int          n_vregmap;
++
++      UInt         hwcaps;
++
++      Bool         chainingAllowed;
++      Addr64       max_ga;
++
++      /* These are modified as we go along. */
++      HInstrArray* code;
++      Int          vreg_ctr;
++   }
++   ISelEnv;
++
++
++static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
++{
++   vassert(tmp < env->n_vregmap);
++   return env->vregmap[tmp];
++}
++
++static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
++                               ISelEnv* env, IRTemp tmp )
++{
++   vassert(tmp < env->n_vregmap);
++   vassert(!hregIsInvalid(env->vregmapHI[tmp]));
++   *vrLO = env->vregmap[tmp];
++   *vrHI = env->vregmapHI[tmp];
++}
++
++static void addInstr ( ISelEnv* env, LOONGARCH64Instr* instr )
++{
++   addHInstr(env->code, instr);
++   if (vex_traceflags & VEX_TRACE_VCODE) {
++      ppLOONGARCH64Instr(instr, True);
++      vex_printf("\n");
++   }
++}
++
++static HReg newVRegI ( ISelEnv* env )
++{
++   HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
++   env->vreg_ctr++;
++   return reg;
++}
++
++static HReg newVRegF ( ISelEnv* env )
++{
++   HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
++   env->vreg_ctr++;
++   return reg;
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Forward declarations                        ---*/
++/*---------------------------------------------------------*/
++
++/* These are organised as iselXXX and iselXXX_wrk pairs.  The
++   iselXXX_wrk do the real work, but are not to be called directly.
++   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
++   checks that all returned registers are virtual.  You should not
++   call the _wrk version directly.
++*/
++
++static LOONGARCH64AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env,
++                                                   IRExpr* e, IRType dty );
++static LOONGARCH64AMode*   iselIntExpr_AMode     ( ISelEnv* env,
++                                                   IRExpr* e, IRType dty );
++
++static LOONGARCH64RI*      iselIntExpr_RI_wrk    ( ISelEnv* env, IRExpr* e,
++                                                   UChar size, Bool isSigned );
++static LOONGARCH64RI*      iselIntExpr_RI        ( ISelEnv* env, IRExpr* e,
++                                                   UChar size, Bool isSigned );
++
++static HReg                iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
++static HReg                iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
++
++static HReg                iselCondCode_R_wrk    ( ISelEnv* env, IRExpr* e );
++static HReg                iselCondCode_R        ( ISelEnv* env, IRExpr* e );
++
++static void                iselInt128Expr_wrk    ( HReg* hi, HReg* lo,
++                                                   ISelEnv* env, IRExpr* e );
++static void                iselInt128Expr        ( HReg* hi, HReg* lo,
++                                                   ISelEnv* env, IRExpr* e );
++
++static HReg                iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
++static HReg                iselFltExpr            ( ISelEnv* env, IRExpr* e );
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Misc helpers                                ---*/
++/*---------------------------------------------------------*/
++
++/* Generate move insn */
++static LOONGARCH64Instr* LOONGARCH64Instr_Move ( HReg to, HReg from )
++{
++   LOONGARCH64RI *ri = LOONGARCH64RI_R(hregZERO());
++   return LOONGARCH64Instr_Binary(LAbin_OR, ri, from, to);
++}
++
++/* Generate LOONGARCH64AMode from HReg and UInt */
++static LOONGARCH64AMode* mkLOONGARCH64AMode_RI ( HReg reg, UInt imm )
++{
++   vassert(imm < (1 << 12));
++   return LOONGARCH64AMode_RI(reg, (UShort)imm);
++}
++
++/* Set floating point rounding mode */
++static void set_rounding_mode ( ISelEnv* env, IRExpr* mode )
++{
++   /*
++      rounding mode | LOONGARCH | IR
++      ------------------------------
++      to nearest    | 00        | 00
++      to zero       | 01        | 11
++      to +infinity  | 10        | 10
++      to -infinity  | 11        | 01
++   */
++
++   /* rm = XOR(rm, (rm << 1)) & 3 */
++   HReg            rm = iselIntExpr_R(env, mode);
++   HReg           tmp = newVRegI(env);
++   LOONGARCH64RI*  ri = LOONGARCH64RI_I(1, 5, False);
++   LOONGARCH64RI* ri2 = LOONGARCH64RI_R(rm);
++   LOONGARCH64RI* ri3 = LOONGARCH64RI_I(3, 12, False);
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, rm, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_XOR, ri2, tmp, rm));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri3, rm, rm));
++
++   /* Save old value of FCSR3 */
++   HReg fcsr = newVRegI(env);
++   addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFCSR2GR,
++                                         hregLOONGARCH64_FCSR3(), fcsr));
++
++   /* Store old FCSR3 to stack */
++   LOONGARCH64RI* ri4 = LOONGARCH64RI_I(-4 & 0xfff, 12, True);
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri4, hregSP(), hregSP()));
++   LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregSP(), 0);
++   addInstr(env, LOONGARCH64Instr_Store(LAstore_ST_W, am, fcsr));
++
++   /* Set new value of FCSR3 */
++   LOONGARCH64RI* ri5 = LOONGARCH64RI_I(8, 5, False);
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri5, rm, rm));
++   addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FCSR,
++                                         rm, hregLOONGARCH64_FCSR3()));
++}
++
++static void set_rounding_mode_default ( ISelEnv* env )
++{
++   /* Load old FCSR3 from stack */
++   HReg fcsr = newVRegI(env);
++   LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregSP(), 0);
++   addInstr(env, LOONGARCH64Instr_Load(LAload_LD_WU, am, fcsr));
++
++   /* Restore SP */
++   LOONGARCH64RI* ri = LOONGARCH64RI_I(4, 12, True);
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri, hregSP(), hregSP()));
++
++   /* Set new value of FCSR3 */
++   addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FCSR,
++                                         fcsr, hregLOONGARCH64_FCSR3()));
++}
++
++/* Convert LOONGARCH FCMP cond to IR result */
++static HReg convert_cond_to_IR ( ISelEnv* env, HReg src2, HReg src1, Bool size64 )
++{
++   HReg tmp = newVRegI(env);
++   HReg dst = newVRegI(env);
++
++   LOONGARCH64RI* ri1 = LOONGARCH64RI_I(63, 6, False);
++   LOONGARCH64RI* ri2 = LOONGARCH64RI_I(0x45, 12, False);
++   if (size64)
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CUN_D, src2, src1, tmp));
++   else
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CUN_S, src2, src1, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri2, tmp, dst));
++
++   LOONGARCH64RI* ri3 = LOONGARCH64RI_I(0x1, 12, False);
++   LOONGARCH64RI* ri4 = LOONGARCH64RI_R(tmp);
++   if (size64)
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CLT_D, src2, src1, tmp));
++   else
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CLT_S, src2, src1, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri3, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst));
++
++   LOONGARCH64RI* ri5 = LOONGARCH64RI_I(0x40, 12, False);
++   if (size64)
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CEQ_D, src2, src1, tmp));
++   else
++      addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CEQ_S, src2, src1, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri5, tmp, tmp));
++   addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst));
++
++   return dst;
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Function call helpers                       ---*/
++/*---------------------------------------------------------*/
++
++/* Used only in doHelperCall.  See big comment in doHelperCall re
++   handling of register-parameter args.  This function figures out
++   whether evaluation of an expression might require use of a fixed
++   register.  If in doubt return True (safe but suboptimal).
++*/
++static Bool mightRequireFixedRegs ( IRExpr* e )
++{
++   if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
++      // These are always "safe" -- either a copy of SP in some
++      // arbitrary vreg, or a copy of $r31, respectively.
++      return False;
++   }
++   /* Else it's a "normal" expression. */
++   switch (e->tag) {
++      case Iex_RdTmp: case Iex_Const: case Iex_Get:
++         return False;
++      default:
++         return True;
++   }
++}
++
++/* Do a complete function call.  |guard| is a Ity_Bit expression
++   indicating whether or not the call happens.  If guard==NULL, the
++   call is unconditional.  |retloc| is set to indicate where the
++   return value is after the call.  The caller (of this fn) must
++   generate code to add |stackAdjustAfterCall| to the stack pointer
++   after the call is done.  Returns True iff it managed to handle this
++   combination of arg/return types, else returns False. */
++static Bool doHelperCall( /*OUT*/UInt* stackAdjustAfterCall,
++                          /*OUT*/RetLoc* retloc,
++                          ISelEnv* env,
++                          IRExpr* guard,
++                          IRCallee* cee, IRType retTy, IRExpr** args )
++{
++   HReg          cond;
++   HReg          argregs[LOONGARCH64_N_ARGREGS];
++   HReg          tmpregs[LOONGARCH64_N_ARGREGS];
++   Bool          go_fast;
++   Int           n_args, i, nextArgReg;
++   Addr64        target;
++
++   vassert(LOONGARCH64_N_ARGREGS == 8);
++
++   /* Set default returns.  We'll update them later if needed. */
++   *stackAdjustAfterCall = 0;
++   *retloc               = mk_RetLoc_INVALID();
++
++   /* These are used for cross-checking that IR-level constraints on
++      the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
++   UInt nVECRETs = 0;
++   UInt nGSPTRs  = 0;
++
++   /* Marshal args for a call and do the call.
++
++      This function only deals with a tiny set of possibilities, which
++      cover all helpers in practice.  The restrictions are that only
++      arguments in registers are supported, hence only
++      LOONGARCH64_N_ARGREGS x 64 integer bits in total can be passed.
++      In fact the only supported arg type is I64.
++
++      The return type can be I{64,32}.  We currently do not add vector
++      support.
++
++      |args| may also contain IRExpr_GSPTR(), in which case the
++      value in $r31 is passed as the corresponding argument.
++
++      Generating code which is both efficient and correct when
++      parameters are to be passed in registers is difficult, for the
++      reasons elaborated in detail in comments attached to
++      doHelperCall() in priv/host_x86_isel.c.  Here, we use a variant
++      of the method described in those comments.
++
++      The problem is split into two cases: the fast scheme and the
++      slow scheme.  In the fast scheme, arguments are computed
++      directly into the target (real) registers.  This is only safe
++      when we can be sure that computation of each argument will not
++      trash any real registers set by computation of any other
++      argument.
++
++      In the slow scheme, all args are first computed into vregs, and
++      once they are all done, they are moved to the relevant real
++      regs.  This always gives correct code, but it also gives a bunch
++      of vreg-to-rreg moves which are usually redundant but are hard
++      for the register allocator to get rid of.
++
++      To decide which scheme to use, all argument expressions are
++      first examined.  If they are all so simple that it is clear they
++      will be evaluated without use of any fixed registers, use the
++      fast scheme, else use the slow scheme.  Note also that only
++      unconditional calls may use the fast scheme, since having to
++      compute a condition expression could itself trash real
++      registers.
++
++      Note this requires being able to examine an expression and
++      determine whether or not evaluation of it might use a fixed
++      register.  That requires knowledge of how the rest of this insn
++      selector works.  Currently just the following 3 are regarded as
++      safe -- hopefully they cover the majority of arguments in
++      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
++   */
++
++   /* LOONGARCH64 calling convention: up to eight registers ($a0 ... $a7)
++      are allowed to be used for passing integer arguments.  They correspond
++      to regs $r4 ... $r11.  Note that the cee->regparms field is meaningless
++      on LOONGARCH64 host (since we only implement one calling convention)
++      and so we always ignore it. */
++
++   n_args = 0;
++   for (i = 0; args[i]; i++) {
++      IRExpr* arg = args[i];
++      if (UNLIKELY(arg->tag == Iex_VECRET)) {
++         nVECRETs++;
++      } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
++         nGSPTRs++;
++      }
++      n_args++;
++   }
++
++   if (n_args > LOONGARCH64_N_ARGREGS) {
++      vpanic("doHelperCall(loongarch64): cannot currently handle > 8 args");
++   }
++
++   argregs[0] = hregLOONGARCH64_R4();
++   argregs[1] = hregLOONGARCH64_R5();
++   argregs[2] = hregLOONGARCH64_R6();
++   argregs[3] = hregLOONGARCH64_R7();
++   argregs[4] = hregLOONGARCH64_R8();
++   argregs[5] = hregLOONGARCH64_R9();
++   argregs[6] = hregLOONGARCH64_R10();
++   argregs[7] = hregLOONGARCH64_R11();
++
++   tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
++   tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
++
++   /* First decide which scheme (slow or fast) is to be used. First assume the
++      fast scheme, and select slow if any contraindications (wow) appear. */
++
++   go_fast = True;
++
++   if (guard) {
++      if (guard->tag == Iex_Const
++          && guard->Iex.Const.con->tag == Ico_U1
++          && guard->Iex.Const.con->Ico.U1 == True) {
++         /* unconditional */
++      } else {
++         /* Not manifestly unconditional -- be conservative. */
++         go_fast = False;
++      }
++   }
++
++   if (go_fast) {
++      for (i = 0; i < n_args; i++) {
++         if (mightRequireFixedRegs(args[i])) {
++            go_fast = False;
++            break;
++         }
++      }
++   }
++
++   if (go_fast) {
++      if (retTy == Ity_V128 || retTy == Ity_V256) {
++         go_fast = False;
++         vpanic("doHelperCall(loongarch64): currently do not support vector");
++      }
++   }
++
++   /* At this point the scheme to use has been established.  Generate
++      code to get the arg values into the argument rregs.  If we run
++      out of arg regs, give up. */
++
++   if (go_fast) {
++      /* FAST SCHEME */
++      nextArgReg = 0;
++
++      for (i = 0; i < n_args; i++) {
++         IRExpr* arg = args[i];
++
++         IRType aTy = Ity_INVALID;
++         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
++            aTy = typeOfIRExpr(env->type_env, args[i]);
++
++         if (nextArgReg >= LOONGARCH64_N_ARGREGS)
++            return False; /* out of argregs */
++
++         if (aTy == Ity_I64) {
++            addInstr(env, LOONGARCH64Instr_Move(argregs[nextArgReg],
++                                                iselIntExpr_R(env, args[i])));
++            nextArgReg++;
++         } else if (arg->tag == Iex_GSPTR) {
++            addInstr(env, LOONGARCH64Instr_Move(argregs[nextArgReg], hregGSP()));
++            nextArgReg++;
++         } else if (arg->tag == Iex_VECRET) {
++            // because of the go_fast logic above, we can't get here,
++            // since vector return values makes us use the slow path
++            // instead.
++            vassert(0);
++         } else
++            return False; /* unhandled arg type */
++      }
++
++      /* Fast scheme only applies for unconditional calls.  Hence: */
++      cond = INVALID_HREG;
++   } else {
++      /* SLOW SCHEME; move via temporaries */
++      nextArgReg = 0;
++
++      for (i = 0; i < n_args; i++) {
++         IRExpr* arg = args[i];
++
++         IRType  aTy = Ity_INVALID;
++         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
++            aTy = typeOfIRExpr(env->type_env, args[i]);
++
++         if (nextArgReg >= LOONGARCH64_N_ARGREGS)
++            return False; /* out of argregs */
++
++         if (aTy == Ity_I64) {
++            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
++            nextArgReg++;
++         } else if (arg->tag == Iex_GSPTR) {
++            tmpregs[nextArgReg] = hregGSP();
++            nextArgReg++;
++         } else if (arg->tag == Iex_VECRET) {
++            vpanic("doHelperCall(loongarch64): currently do not support vector");
++            nextArgReg++;
++         } else
++            return False; /* unhandled arg type */
++      }
++
++      /* Now we can compute the condition.  We can't do it earlier
++         because the argument computations could trash the condition
++         codes.  Be a bit clever to handle the common case where the
++         guard is 1:Bit. */
++      cond = INVALID_HREG;
++      if (guard) {
++         if (guard->tag == Iex_Const
++             && guard->Iex.Const.con->tag == Ico_U1
++             && guard->Iex.Const.con->Ico.U1 == True) {
++            /* unconditional -- do nothing */
++         } else {
++            cond = iselCondCode_R(env, guard);
++         }
++      }
++
++      /* Move the args to their final destinations. */
++      for (i = 0; i < nextArgReg; i++) {
++         vassert(!(hregIsInvalid(tmpregs[i])));
++         /* None of these insns, including any spill code that might
++            be generated, may alter the condition codes. */
++         addInstr(env, LOONGARCH64Instr_Move(argregs[i], tmpregs[i]));
++      }
++   }
++
++   /* Should be assured by checks above */
++   vassert(nextArgReg <= LOONGARCH64_N_ARGREGS);
++
++   /* Do final checks, set the return values, and generate the call
++      instruction proper. */
++   vassert(nGSPTRs == 0 || nGSPTRs == 1);
++   vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
++   vassert(*stackAdjustAfterCall == 0);
++   vassert(is_RetLoc_INVALID(*retloc));
++   switch (retTy) {
++      case Ity_INVALID:
++         /* Function doesn't return a value. */
++         *retloc = mk_RetLoc_simple(RLPri_None);
++         break;
++      case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
++         *retloc = mk_RetLoc_simple(RLPri_Int);
++         break;
++      case Ity_V128:
++      case Ity_V256:
++         vpanic("doHelperCall(loongarch64): currently do not support vector");
++         break;
++      default:
++         /* IR can denote other possible return types, but we don't
++            handle those here. */
++         vassert(0);
++   }
++
++   /* Finally, generate the call itself.  This needs the *retloc value
++      set in the switch above, which is why it's at the end. */
++
++   /* nextArgReg doles out argument registers.  Since these are
++      assigned in the order $a0 .. $a7, its numeric value at this point,
++      which must be between 0 and 8 inclusive, is going to be equal to
++      the number of arg regs in use for the call.  Hence bake that
++      number into the call (we'll need to know it when doing register
++      allocation, to know what regs the call reads.) */
++
++   target = (Addr)cee->addr;
++   addInstr(env, LOONGARCH64Instr_Call(cond, target, nextArgReg, *retloc));
++
++   return True; /* success */
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
++/*---------------------------------------------------------*/
++
++/* Select insns for an integer-typed expression, and add them to the
++   code list.  Return a reg holding the result.  This reg will be a
++   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
++   want to modify it, ask for a new vreg, copy it in there, and modify
++   the copy.  The register allocator will do its best to map both
++   vregs to the same real register, so the copies will often disappear
++   later in the game.
++
++   This should handle expressions of 64, 32, 16 and 8-bit type.
++   All results are returned in a (mode64 ? 64bit : 32bit) register.
++   For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
++   are arbitrary, so you should mask or sign extend partial values
++   if necessary.
++*/
++
++/* --------------------- AMode --------------------- */
++
++static LOONGARCH64AMode* iselIntExpr_AMode ( ISelEnv* env,
++                                             IRExpr* e, IRType dty )
++{
++   LOONGARCH64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
++
++   /* sanity checks ... */
++   switch (am->tag) {
++      case LAam_RI:
++         vassert(am->LAam.RI.index < (1 << 11)); /* The sign bit (bit 12) must be 0. */
++         vassert(hregClass(am->LAam.RI.base) == HRcInt64);
++         vassert(hregIsVirtual(am->LAam.RI.base));
++         break;
++      case LAam_RR:
++         vassert(hregClass(am->LAam.RR.base) == HRcInt64);
++         vassert(hregIsVirtual(am->LAam.RR.base));
++         vassert(hregClass(am->LAam.RR.index) == HRcInt64);
++         vassert(hregIsVirtual(am->LAam.RR.index));
++         break;
++      default:
++         vpanic("iselIntExpr_AMode: unknown LOONGARCH64 AMode tag");
++         break;
++   }
++
++   return am;
++}
++
++/* DO NOT CALL THIS DIRECTLY ! */
++static LOONGARCH64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
++                                                 IRExpr* e, IRType dty )
++{
++   IRType ty = typeOfIRExpr(env->type_env, e);
++   vassert(e);
++   vassert(ty == Ity_I64);
++
++   /* Add64(expr, i), where i <= 0x7ff */
++   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64
++       && e->Iex.Binop.arg2->tag == Iex_Const
++       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
++       && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 <= 0x7ff) {
++      return LOONGARCH64AMode_RI(iselIntExpr_R(env, e->Iex.Binop.arg1),
++                                 (UShort)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64);
++   }
++
++   /* Add64(expr, expr) */
++   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64) {
++      HReg base = iselIntExpr_R(env, e->Iex.Binop.arg1);
++      HReg index = iselIntExpr_R(env, e->Iex.Binop.arg2);
++      return LOONGARCH64AMode_RR(base, index);
++   }
++
++   /* Doesn't match anything in particular.  Generate it into
++      a register and use that. */
++   return LOONGARCH64AMode_RI(iselIntExpr_R(env, e), 0);
++}
++
++/* --------------------- RI --------------------- */
++
++static LOONGARCH64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e,
++                                       UChar size, Bool isSigned )
++{
++   LOONGARCH64RI* ri = iselIntExpr_RI_wrk(env, e, size, isSigned);
++
++   /* sanity checks ... */
++   switch (ri->tag) {
++      case LAri_Imm:
++         vassert(ri->LAri.I.size == 5 || ri->LAri.I.size == 6
++                 || ri->LAri.I.size == 12);
++         if (ri->LAri.I.size == 5) {
++            vassert(ri->LAri.I.isSigned == False);
++            vassert(ri->LAri.I.imm < (1 << 5));
++         } else if (ri->LAri.I.size == 6) {
++            vassert(ri->LAri.I.isSigned == False);
++            vassert(ri->LAri.I.imm < (1 << 6));
++         } else {
++            vassert(ri->LAri.I.imm < (1 << 12));
++         }
++         break;
++      case LAri_Reg:
++         vassert(hregClass(ri->LAri.R.reg) == HRcInt64);
++         vassert(hregIsVirtual(ri->LAri.R.reg));
++         break;
++      default:
++         vpanic("iselIntExpr_RI: unknown LOONGARCH64 RI tag");
++         break;
++   }
++
++   return ri;
++}
++
++/* DO NOT CALL THIS DIRECTLY ! */
++static LOONGARCH64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e,
++                                           UChar size, Bool isSigned )
++{
++   IRType ty = typeOfIRExpr(env->type_env, e);
++   vassert(e);
++   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64);
++
++   LOONGARCH64RI *ri = NULL;
++
++   /* special case: immediate */
++   if (e->tag == Iex_Const) {
++      switch (e->Iex.Const.con->tag) {
++         case Ico_U32:
++            if (!isSigned && e->Iex.Const.con->Ico.U32 < (1 << size)) {
++               UShort imm = e->Iex.Const.con->Ico.U32;
++               ri = LOONGARCH64RI_I(imm, size, isSigned);
++            }
++            break;
++         case Ico_U64:
++            if (!isSigned && e->Iex.Const.con->Ico.U64 < (1 << size)) {
++               UShort imm = e->Iex.Const.con->Ico.U64;
++               ri = LOONGARCH64RI_I(imm, size, isSigned);
++            }
++            break;
++         default:
++            break;
++      }
++      /* else fail, fall through to default case */
++   }
++
++   if (ri == NULL) {
++      /* default case: calculate into a register and return that */
++      HReg reg = iselIntExpr_R(env, e);
++      ri = LOONGARCH64RI_R(reg);
++   }
++
++   return ri;
++}
++
++/* --------------------- Reg --------------------- */
++
++static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
++{
++   HReg r = iselIntExpr_R_wrk(env, e);
++
++   /* sanity checks ... */
++   vassert(hregClass(r) == HRcInt64);
++   vassert(hregIsVirtual(r));
++
++   return r;
++}
++
++/* DO NOT CALL THIS DIRECTLY ! */
++static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
++{
++   IRType ty = typeOfIRExpr(env->type_env, e);
++   vassert(e);
++   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64);
++
++   switch (e->tag) {
++      /* --------- TEMP --------- */
++      case Iex_RdTmp:
++         return lookupIRTemp(env, e->Iex.RdTmp.tmp);
++
++      /* --------- LOAD --------- */
++      case Iex_Load: {
++         if (e->Iex.Load.end != Iend_LE)
++            goto irreducible;
++
++         LOONGARCH64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr, ty);
++         HReg             dst = newVRegI(env);
++         LOONGARCH64LoadOp op;
++         switch(ty) {
++            case Ity_I8:
++               op = (am->tag == LAam_RI) ? LAload_LD_BU : LAload_LDX_BU;
++               break;
++            case Ity_I16:
++               op = (am->tag == LAam_RI) ? LAload_LD_HU : LAload_LDX_HU;
++               break;
++            case Ity_I32:
++               op = (am->tag == LAam_RI) ? LAload_LD_WU : LAload_LDX_WU;
++               break;
++            case Ity_I64:
++               op = (am->tag == LAam_RI) ? LAload_LD_D : LAload_LDX_D;
++               break;
++            default:
++               goto irreducible;
++         }
++         addInstr(env, LOONGARCH64Instr_Load(op, am, dst));
++         return dst;
++      }
++
++      /* --------- BINARY OP --------- */
++      case Iex_Binop: {
++         switch (e->Iex.Binop.op) {
++            case Iop_32HLto64: {
++               HReg          dst = newVRegI(env);
++               HReg           hi = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* lo = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, hi, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, lo, dst, dst));
++               return dst;
++            }
++            case Iop_Add32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, True);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_ADD_W : LAbin_ADDI_W;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Add64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, True);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_ADD_D : LAbin_ADDI_D;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_And32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_AND : LAbin_ANDI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_And64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_AND : LAbin_ANDI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_DivModS32to32: {
++               HReg            dst = newVRegI(env);
++               HReg            tmp = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               HReg           src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               LOONGARCH64RI*  ri1 = LOONGARCH64RI_I(0, 5, False);
++               LOONGARCH64RI*  ri2 = LOONGARCH64RI_R(src2);
++               LOONGARCH64RI*  ri3 = LOONGARCH64RI_I(32, 6, False);
++               LOONGARCH64RI*  ri4 = LOONGARCH64RI_R(tmp);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_W, ri2, src1, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_W, ri2, src1, tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, tmp, tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri3, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst));
++               return dst;
++            }
++            case Iop_DivModU32to32: {
++               HReg            dst = newVRegI(env);
++               HReg            tmp = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               HReg           src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               LOONGARCH64RI*  ri1 = LOONGARCH64RI_I(0, 5, False);
++               LOONGARCH64RI*  ri2 = LOONGARCH64RI_R(src2);
++               LOONGARCH64RI*  ri3 = LOONGARCH64RI_I(32, 6, False);
++               LOONGARCH64RI*  ri4 = LOONGARCH64RI_R(tmp);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_WU, ri2, src1, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_WU, ri2, src1, tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, tmp, tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri3, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst));
++               return dst;
++            }
++            case Iop_DivS32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               HReg           src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               LOONGARCH64RI*  ri1 = LOONGARCH64RI_I(0, 5, False);
++               LOONGARCH64RI*  ri2 = LOONGARCH64RI_R(src2);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_W, ri2, src1, dst));
++               return dst;
++            }
++            case Iop_DivS64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_DivU32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               HReg           src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               LOONGARCH64RI*  ri1 = LOONGARCH64RI_I(0, 5, False);
++               LOONGARCH64RI*  ri2 = LOONGARCH64RI_R(src2);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_WU, ri2, src1, dst));
++               return dst;
++            }
++            case Iop_DivU64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_DU, src2, src1, dst));
++               return dst;
++            }
++            case Iop_CmpF32: {
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               return convert_cond_to_IR(env, src2, src1, False);
++            }
++            case Iop_CmpF64: {
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               return convert_cond_to_IR(env, src2, src1, True);
++            }
++            case Iop_F32toI32S: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegI(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_W_S, src, tmp));
++               set_rounding_mode_default(env);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, tmp, dst));
++               return dst;
++            }
++            case Iop_F32toI64S: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegI(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_L_S, src, tmp));
++               set_rounding_mode_default(env);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, tmp, dst));
++               return dst;
++            }
++            case Iop_F64toI32S: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegI(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_W_D, src, tmp));
++               set_rounding_mode_default(env);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, tmp, dst));
++               return dst;
++            }
++            case Iop_F64toI64S: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegI(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_L_D, src, tmp));
++               set_rounding_mode_default(env);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, tmp, dst));
++               return dst;
++            }
++            case Iop_Max32U: {
++               HReg          cond = newVRegI(env);
++               HReg           dst = newVRegI(env);
++               HReg          src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               HReg          src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               addInstr(env, LOONGARCH64Instr_Cmp(LAcc_LTU, src2, src1, cond));
++               addInstr(env, LOONGARCH64Instr_CMove(cond, src1, src2, dst, True));
++               return dst;
++            }
++            case Iop_MullS32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULW_D_W, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MullU32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULW_D_WU, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Or32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_OR : LAbin_ORI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Or64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_OR : LAbin_ORI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Sar32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRA_W : LAbin_SRAI_W;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Sar64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRA_D : LAbin_SRAI_D;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Shl32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SLL_W : LAbin_SLLI_W;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Shl64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SLL_D : LAbin_SLLI_D;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Shr32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRL_W : LAbin_SRLI_W;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Shr64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRL_D : LAbin_SRLI_D;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Sub32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_W, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Sub64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Xor32: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_XOR : LAbin_XORI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            case Iop_Xor64: {
++               HReg            dst = newVRegI(env);
++               HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++               LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False);
++               LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_XOR : LAbin_XORI;
++               addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst));
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- UNARY OP --------- */
++      case Iex_Unop: {
++         switch (e->Iex.Unop.op) {
++            case Iop_128HIto64: {
++               HReg hi, lo;
++               iselInt128Expr(&hi, &lo, env, e->Iex.Unop.arg);
++               return hi;
++            }
++            case Iop_128to64: {
++               HReg hi, lo;
++               iselInt128Expr(&hi, &lo, env, e->Iex.Unop.arg);
++               return lo;
++            }
++            case Iop_16Sto64: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_EXT_W_H, src, dst));
++               return dst;
++            }
++            case Iop_16Uto64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(48, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_1Sto32: {
++               HReg           dst = newVRegI(env);
++               HReg           src = iselCondCode_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_1Sto64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselCondCode_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_1Uto64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselCondCode_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0x1, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_1Uto8: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselCondCode_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0x1, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_32Sto64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0, 5, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src, dst));
++               return dst;
++            }
++            case Iop_32Uto64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_32to8: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_64HIto32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, src, dst));
++               return dst;
++            }
++            case Iop_64to32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_64to8: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_8Sto64: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_EXT_W_B, src, dst));
++               return dst;
++            }
++            case Iop_8Uto32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_8Uto64: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst));
++               return dst;
++            }
++            case Iop_CmpwNEZ32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False);
++               addInstr(env, LOONGARCH64Instr_Cmp(LAcc_NE, hregZERO(), src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_CmpwNEZ64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False);
++               addInstr(env, LOONGARCH64Instr_Cmp(LAcc_NE, hregZERO(), src, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, dst, dst));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst));
++               return dst;
++            }
++            case Iop_Clz32: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_CLZ_W, src, dst));
++               return dst;
++            }
++            case Iop_Clz64: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_CLZ_D, src, dst));
++               return dst;
++            }
++            case Iop_Ctz32: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_CTZ_W, src, dst));
++               return dst;
++            }
++            case Iop_Ctz64: {
++               HReg dst = newVRegI(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_Unary(LAun_CTZ_D, src, dst));
++               return dst;
++            }
++            case Iop_Left16: {
++               HReg           tmp = newVRegI(env);
++               HReg           dst = newVRegI(env);
++               HReg           src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(src);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst));
++               return dst;
++            }
++            case Iop_Left32: {
++               HReg           tmp = newVRegI(env);
++               HReg           dst = newVRegI(env);
++               HReg           src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(src);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst));
++               return dst;
++            }
++            case Iop_Left64: {
++               HReg           tmp = newVRegI(env);
++               HReg           dst = newVRegI(env);
++               HReg           src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(src);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst));
++               return dst;
++            }
++            case Iop_Left8: {
++               HReg           tmp = newVRegI(env);
++               HReg           dst = newVRegI(env);
++               HReg           src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(src);
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp));
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst));
++               return dst;
++            }
++            case Iop_ReinterpF32asI32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, src, dst));
++               return dst;
++            }
++            case Iop_ReinterpF64asI64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, src, dst));
++               return dst;
++            }
++            case Iop_Not32: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO());
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst));
++               return dst;
++            }
++            case Iop_Not64: {
++               HReg          dst = newVRegI(env);
++               HReg          src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO());
++               addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst));
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- GET --------- */
++      case Iex_Get: {
++         HReg dst = newVRegI(env);
++         if (e->Iex.Get.offset < 1024) {
++            LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregGSP(), e->Iex.Get.offset);
++            LOONGARCH64LoadOp op;
++            switch(ty) {
++               case Ity_I8:
++                  op = LAload_LD_BU;
++                  break;
++               case Ity_I16:
++                  op = LAload_LD_HU;
++                  break;
++               case Ity_I32:
++                  op = LAload_LD_WU;
++                  break;
++               case Ity_I64:
++                  op = LAload_LD_D;
++                  break;
++               default:
++                  goto irreducible;
++            }
++            addInstr(env, LOONGARCH64Instr_Load(op, am, dst));
++         } else {
++            HReg             tmp = newVRegI(env);
++            LOONGARCH64AMode* am = LOONGARCH64AMode_RR(hregGSP(), tmp);
++            LOONGARCH64LoadOp op;
++            switch(ty) {
++               case Ity_I8:
++                  op = LAload_LDX_BU;
++                  break;
++               case Ity_I16:
++                  op = LAload_LDX_HU;
++                  break;
++               case Ity_I32:
++                  op = LAload_LDX_WU;
++                  break;
++               case Ity_I64:
++                  op = LAload_LDX_D;
++                  break;
++               default:
++                  goto irreducible;
++            }
++            addInstr(env, LOONGARCH64Instr_LI(e->Iex.Get.offset, tmp));
++            addInstr(env, LOONGARCH64Instr_Load(op, am, dst));
++         }
++         return dst;
++      }
++
++      /* --------- CCALL --------- */
++      case Iex_CCall: {
++         HReg    dst = newVRegI(env);
++         vassert(ty == e->Iex.CCall.retty);
++
++         /* be very restrictive for now.  Only 64-bit ints allowed for
++            args, and 64 bits for return type.  Don't forget to change
++            the RetLoc if more types are allowed in future. */
++         if (e->Iex.CCall.retty != Ity_I64)
++            goto irreducible;
++
++         /* Marshal args, do the call, clear stack. */
++         UInt   addToSp = 0;
++         RetLoc rloc    = mk_RetLoc_INVALID();
++         Bool   ok      = doHelperCall(&addToSp, &rloc, env, NULL,
++                                       e->Iex.CCall.cee, e->Iex.CCall.retty,
++                                       e->Iex.CCall.args);
++
++         if (ok) {
++            vassert(is_sane_RetLoc(rloc));
++            vassert(rloc.pri == RLPri_Int);
++            vassert(addToSp == 0);
++            addInstr(env, LOONGARCH64Instr_Move(dst, hregLOONGARCH64_R4()));
++            return dst;
++         }
++         goto irreducible;
++      }
++
++      /* --------- LITERAL --------- */
++      /* 64-bit literals */
++      case Iex_Const: {
++         ULong imm = 0;
++         HReg  dst = newVRegI(env);
++         switch (e->Iex.Const.con->tag) {
++            case Ico_U64:
++               imm = e->Iex.Const.con->Ico.U64;
++               break;
++            case Ico_U32:
++               imm = e->Iex.Const.con->Ico.U32;
++               break;
++            case Ico_U16:
++               imm = e->Iex.Const.con->Ico.U16;
++               break;
++            case Ico_U8:
++               imm = e->Iex.Const.con->Ico.U8;
++               break;
++            default:
++               ppIRExpr(e);
++               vpanic("iselIntExpr_R.Iex_Const(loongarch64)");
++         }
++         addInstr(env, LOONGARCH64Instr_LI(imm, dst));
++         return dst;
++      }
++
++      case Iex_ITE: {
++         HReg   r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
++         HReg   r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
++         HReg cond = iselCondCode_R(env, e->Iex.ITE.cond);
++         HReg  dst = newVRegI(env);
++         addInstr(env, LOONGARCH64Instr_CMove(cond, r0, r1, dst, True));
++         return dst;
++      }
++
++      default:
++         break;
++   }
++
++   /* We get here if no pattern matched. */
++irreducible:
++   ppIRExpr(e);
++   vpanic("iselIntExpr_R(loongarch64): cannot reduce tree");
++}
++
++/* ------------------- CondCode ------------------- */
++
++/* Generate code to evaluated a bit-typed expression, returning the
++   condition code which would correspond when the expression would
++   notionally have returned 1. */
++
++static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e )
++{
++   HReg r = iselCondCode_R_wrk(env, e);
++
++   /* sanity checks ... */
++   vassert(hregClass(r) == HRcInt64);
++   vassert(hregIsVirtual(r));
++
++   return r;
++}
++
++/* DO NOT CALL THIS DIRECTLY ! */
++static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e )
++{
++   vassert(e);
++   vassert(typeOfIRExpr(env->type_env, e) == Ity_I1);
++
++   HReg dst = newVRegI(env);
++
++   /* var */
++   if (e->tag == Iex_RdTmp) {
++      HReg tmp = newVRegI(env);
++      dst = lookupIRTemp(env, e->Iex.RdTmp.tmp);
++      addInstr(env, LOONGARCH64Instr_LI(1, tmp));
++      addInstr(env, LOONGARCH64Instr_Cmp(LAcc_EQ, dst, tmp, dst));
++      return dst;
++   }
++
++   /* const */
++   if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U1) {
++      UInt imm = e->Iex.Const.con->Ico.U1;
++      addInstr(env, LOONGARCH64Instr_LI(imm, dst));
++      return dst;
++   }
++
++   if (e->tag == Iex_Unop) {
++      if (e->Iex.Unop.op == Iop_Not1) {
++         HReg          src = iselCondCode_R(env, e->Iex.Unop.arg);
++         LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO());
++         addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst));
++         return dst;
++      }
++
++      LOONGARCH64CondCode cc;
++      switch (e->Iex.Unop.op) {
++         case Iop_CmpNEZ16:
++            cc = LAcc_NE;
++            break;
++         case Iop_CmpNEZ32:
++            cc = LAcc_NE;
++            break;
++         case Iop_CmpNEZ64:
++            cc = LAcc_NE;
++            break;
++         case Iop_CmpNEZ8:
++            cc = LAcc_NE;
++            break;
++         default:
++            goto irreducible;
++      }
++      HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++      addInstr(env, LOONGARCH64Instr_Cmp(cc, hregZERO(), src, dst));
++      return dst;
++   }
++
++   if (e->tag == Iex_Binop) {
++      if (e->Iex.Binop.op == Iop_And1) {
++         HReg           src1 = iselCondCode_R(env, e->Iex.Binop.arg1);
++         HReg           src2 = iselCondCode_R(env, e->Iex.Binop.arg2);
++         LOONGARCH64RI*   ri = LOONGARCH64RI_R(src2);
++         addInstr(env, LOONGARCH64Instr_Binary(LAbin_AND, ri, src1, dst));
++         return dst;
++      } else if (e->Iex.Binop.op == Iop_Or1) {
++         HReg           src1 = iselCondCode_R(env, e->Iex.Binop.arg1);
++         HReg           src2 = iselCondCode_R(env, e->Iex.Binop.arg2);
++         LOONGARCH64RI*   ri = LOONGARCH64RI_R(src2);
++         addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, src1, dst));
++         return dst;
++      }
++
++      Bool extend  = False;
++      Bool reverse = False;
++      LOONGARCH64CondCode cc;
++      switch (e->Iex.Binop.op) {
++         case Iop_CasCmpEQ32:
++            cc = LAcc_EQ;
++            break;
++         case Iop_CasCmpEQ64:
++            cc = LAcc_EQ;
++            break;
++         case Iop_CasCmpNE32:
++            cc = LAcc_NE;
++            break;
++         case Iop_CasCmpNE64:
++            cc = LAcc_NE;
++            break;
++         case Iop_CmpEQ32:
++            cc = LAcc_EQ;
++            break;
++         case Iop_CmpEQ64:
++            cc = LAcc_EQ;
++            break;
++         case Iop_CmpLE32S:
++            cc = LAcc_GE;
++            reverse = True;
++            break;
++         case Iop_CmpLE32U:
++            cc = LAcc_GEU;
++            reverse = True;
++            break;
++         case Iop_CmpLE64S:
++            cc = LAcc_GE;
++            reverse = True;
++            break;
++         case Iop_CmpLE64U:
++            cc = LAcc_GEU;
++            reverse = True;
++            break;
++         case Iop_CmpLT32S:
++            cc = LAcc_LT;
++            extend = True;
++            break;
++         case Iop_CmpLT32U:
++            cc = LAcc_LTU;
++            extend = True;
++            break;
++         case Iop_CmpLT64S:
++            cc = LAcc_LT;
++            break;
++         case Iop_CmpLT64U:
++            cc = LAcc_LTU;
++            break;
++         case Iop_CmpNE32:
++            cc = LAcc_NE;
++            break;
++         case Iop_CmpNE64:
++            cc = LAcc_NE;
++            break;
++         default:
++            goto irreducible;
++      }
++      HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++      HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
++      if (extend) {
++         /* Sign-extend */
++         LOONGARCH64RI* ri = LOONGARCH64RI_I(0, 5, False);
++         addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src1, src1));
++         addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src2, src2));
++      }
++      if (reverse) {
++         addInstr(env, LOONGARCH64Instr_Cmp(cc, src1, src2, dst));
++      } else {
++         addInstr(env, LOONGARCH64Instr_Cmp(cc, src2, src1, dst));
++      }
++      return dst;
++   }
++
++   /* We get here if no pattern matched. */
++irreducible:
++   ppIRExpr(e);
++   vpanic("iselCondCode(loongarch64): cannot reduce tree");
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Integer expressions (128 bit)               ---*/
++/*---------------------------------------------------------*/
++
++/* Compute a 128-bit value into a register pair, which is returned as
++   the first two parameters.  As with iselIntExpr_R, these may be
++   either real or virtual regs; in any case they must not be changed
++   by subsequent code emitted by the caller.  */
++
++static void iselInt128Expr (HReg* hi, HReg* lo, ISelEnv* env, IRExpr* e)
++{
++   iselInt128Expr_wrk(hi, lo, env, e);
++
++   /* sanity checks ... */
++   vassert(hregClass(*hi) == HRcInt64);
++   vassert(hregIsVirtual(*hi));
++   vassert(hregClass(*lo) == HRcInt64);
++   vassert(hregIsVirtual(*lo));
++}
++
++/* DO NOT CALL THIS DIRECTLY ! */
++static void iselInt128Expr_wrk (HReg* hi, HReg* lo, ISelEnv* env, IRExpr* e)
++{
++   vassert(e);
++   vassert(typeOfIRExpr(env->type_env, e) == Ity_I128);
++
++   /* --------- TEMP --------- */
++   if (e->tag == Iex_RdTmp) {
++      lookupIRTempPair(hi, lo, env, e->Iex.RdTmp.tmp);
++      return;
++   }
++
++   /* --------- BINARY OP --------- */
++   if (e->tag == Iex_Binop) {
++      switch (e->Iex.Binop.op) {
++         case Iop_64HLto128: {
++            *hi = iselIntExpr_R(env, e->Iex.Binop.arg1);
++            *lo = iselIntExpr_R(env, e->Iex.Binop.arg2);
++            return;
++         }
++         case Iop_DivModS64to64: {
++            HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++            LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++            HReg          dstLo = newVRegI(env);
++            HReg          dstHi = newVRegI(env);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_D, src2, src1, dstLo));
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_D, src2, src1, dstHi));
++            *hi = dstHi;
++            *lo = dstLo;
++            return;
++         }
++         case Iop_DivModU64to64: {
++            HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++            LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++            HReg          dstLo = newVRegI(env);
++            HReg          dstHi = newVRegI(env);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_DU, src2, src1, dstLo));
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_DU, src2, src1, dstHi));
++            *hi = dstHi;
++            *lo = dstLo;
++            return;
++         }
++         case Iop_MullS64: {
++            HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++            LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++            HReg          dstLo = newVRegI(env);
++            HReg          dstHi = newVRegI(env);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MUL_D, src2, src1, dstLo));
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULH_D, src2, src1, dstHi));
++            *hi = dstHi;
++            *lo = dstLo;
++            return;
++         }
++         case Iop_MullU64: {
++            HReg           src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
++            LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False);
++            HReg          dstLo = newVRegI(env);
++            HReg          dstHi = newVRegI(env);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MUL_D, src2, src1, dstLo));
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULH_DU, src2, src1, dstHi));
++            *hi = dstHi;
++            *lo = dstLo;
++            return;
++         }
++         default:
++            goto irreducible;
++      }
++   }
++
++   /* We get here if no pattern matched. */
++irreducible:
++   ppIRExpr(e);
++   vpanic("iselInt128Expr(loongarch64): cannot reduce tree");
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Floating point expressions (64/32 bit)      ---*/
++/*---------------------------------------------------------*/
++
++/* Compute a floating point value into a register, the identity of
++   which is returned.  As with iselIntExpr_R, the reg may be either
++   real or virtual; in any case it must not be changed by subsequent
++   code emitted by the caller.  */
++
++static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
++{
++   HReg r = iselFltExpr_wrk(env, e);
++
++   /* sanity checks ... */
++   vassert(hregClass(r) == HRcFlt64);
++   vassert(hregIsVirtual(r));
++
++   return r;
++}
++
++/* DO NOT CALL THIS DIRECTLY */
++static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
++{
++   IRType ty = typeOfIRExpr(env->type_env, e);
++   vassert(e);
++   vassert(ty == Ity_F32 || ty == Ity_F64);
++
++   switch (e->tag) {
++      /* --------- TEMP --------- */
++      case Iex_RdTmp:
++         return lookupIRTemp(env, e->Iex.RdTmp.tmp);
++
++      /* --------- LOAD --------- */
++      case Iex_Load: {
++         if (e->Iex.Load.end != Iend_LE)
++            goto irreducible;
++
++         LOONGARCH64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr, ty);
++         HReg             dst = newVRegF(env);
++         LOONGARCH64FpLoadOp op;
++         switch(ty) {
++            case Ity_F32:
++               op = (am->tag == LAam_RI) ? LAfpload_FLD_S : LAfpload_FLDX_S;
++               break;
++            case Ity_F64:
++               op = (am->tag == LAam_RI) ? LAfpload_FLD_D : LAfpload_FLDX_D;
++               break;
++            default:
++               goto irreducible;
++         }
++         addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst));
++         return dst;
++      }
++
++      /* --------- GET --------- */
++      case Iex_Get: {
++         HReg dst = newVRegF(env);
++         if (e->Iex.Get.offset < 1024) {
++            LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregGSP(), e->Iex.Get.offset);
++            LOONGARCH64FpLoadOp op;
++            switch(ty) {
++               case Ity_F32:
++                  op = LAfpload_FLD_S;
++                  break;
++               case Ity_F64:
++                  op = LAfpload_FLD_D;
++                  break;
++               default:
++                  goto irreducible;
++            }
++            addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst));
++         } else {
++            HReg             tmp = newVRegI(env);
++            LOONGARCH64AMode* am = LOONGARCH64AMode_RR(hregGSP(), tmp);
++            LOONGARCH64FpLoadOp op;
++            switch(ty) {
++               case Ity_F32:
++                  op = LAfpload_FLDX_S;
++                  break;
++               case Ity_F64:
++                  op = LAfpload_FLDX_D;
++                  break;
++               default:
++                  goto irreducible;
++            }
++            addInstr(env, LOONGARCH64Instr_LI(e->Iex.Get.offset, tmp));
++            addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst));
++         }
++         return dst;
++      }
++
++      /* --------- QUATERNARY OP --------- */
++      case Iex_Qop: {
++         switch (e->Iex.Qop.details->op) {
++            case Iop_MAddF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3);
++               HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4);
++               set_rounding_mode(env, e->Iex.Qop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMADD_S, src3, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MAddF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3);
++               HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4);
++               set_rounding_mode(env, e->Iex.Qop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMADD_D, src3, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MSubF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3);
++               HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4);
++               set_rounding_mode(env, e->Iex.Qop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMSUB_S, src3, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MSubF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3);
++               HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4);
++               set_rounding_mode(env, e->Iex.Qop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMSUB_D, src3, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- TERNARY OP --------- */
++      case Iex_Triop: {
++         switch (e->Iex.Triop.details->op) {
++            case Iop_AddF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FADD_S, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_AddF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FADD_D, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_DivF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FDIV_S, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_DivF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FDIV_D, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MulF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMUL_S, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MulF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMUL_D, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_ScaleBF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSCALEB_S, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_ScaleBF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSCALEB_D, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_SubF32: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSUB_S, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_SubF64: {
++               HReg  dst = newVRegF(env);
++               HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
++               HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
++               set_rounding_mode(env, e->Iex.Triop.details->arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSUB_D, src2, src1, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- BINARY OP --------- */
++      case Iex_Binop: {
++         switch (e->Iex.Binop.op) {
++            case Iop_F64toF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FCVT_S_D, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_I32StoF32: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp));
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_W, tmp, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_I64StoF32: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp));
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_L, tmp, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_I64StoF64: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp));
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_L, tmp, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_LogBF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FLOGB_S, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_LogBF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FLOGB_D, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_MaxNumAbsF32: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAXA_S, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MaxNumF32: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAX_S, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MaxNumAbsF64: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAXA_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MaxNumF64: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAX_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MinNumAbsF32: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMINA_S, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MinNumF32: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMIN_S, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MinNumAbsF64: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMINA_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_MinNumF64: {
++               HReg  dst = newVRegF(env);
++               HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
++               HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMIN_D, src2, src1, dst));
++               return dst;
++            }
++            case Iop_RoundF32toInt: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRINT_S, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_RoundF64toInt: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRINT_D, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_RSqrtF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRSQRT_S, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_RSqrtF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRSQRT_D, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_SqrtF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FSQRT_S, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            case Iop_SqrtF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
++               set_rounding_mode(env, e->Iex.Binop.arg1);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FSQRT_D, src, dst));
++               set_rounding_mode_default(env);
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- UNARY OP --------- */
++      case Iex_Unop: {
++         switch (e->Iex.Unop.op) {
++            case Iop_AbsF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FABS_S, src, dst));
++               return dst;
++            }
++            case Iop_AbsF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FABS_D, src, dst));
++               return dst;
++            }
++            case Iop_F32toF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FCVT_D_S, src, dst));
++               return dst;
++            }
++            case Iop_I32StoF64: {
++               HReg tmp = newVRegF(env);
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp));
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_W, tmp, dst));
++               return dst;
++            }
++            case Iop_NegF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FNEG_S, src, dst));
++               return dst;
++            }
++            case Iop_NegF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselFltExpr(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FNEG_D, src, dst));
++               return dst;
++            }
++            case Iop_ReinterpI32asF32: {
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_W, src, dst));
++               return dst;
++            }
++            case Iop_ReinterpI64asF64: {
++               HReg dst = newVRegF(env);
++               HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
++               addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, dst));
++               return dst;
++            }
++            default:
++               goto irreducible;
++         }
++      }
++
++      /* --------- LITERAL --------- */
++      case Iex_Const: {
++         /* Just handle the one case. */
++         IRConst* con = e->Iex.Const.con;
++         if (con->tag == Ico_F32i && con->Ico.F32i == 1) {
++            HReg          tmp = newVRegI(env);
++            HReg          dst = newVRegF(env);
++            LOONGARCH64RI* ri = LOONGARCH64RI_I(1, 12, True);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_W, ri, hregZERO(), tmp));
++            addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_W, tmp, dst));
++            addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_W, dst, dst));
++            return dst;
++         } else if (con->tag == Ico_F64i && con->Ico.F64i == 1) {
++            HReg          tmp = newVRegI(env);
++            HReg          dst = newVRegF(env);
++            LOONGARCH64RI* ri = LOONGARCH64RI_I(1, 12, True);
++            addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri, hregZERO(), tmp));
++            addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, tmp, dst));
++            addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_L, dst, dst));
++            return dst;
++         } else {
++            goto irreducible;
++         }
++      }
++
++      case Iex_ITE: {
++         HReg   r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
++         HReg   r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
++         HReg cond = iselCondCode_R(env, e->Iex.ITE.cond);
++         HReg  dst = newVRegF(env);
++         addInstr(env, LOONGARCH64Instr_CMove(cond, r0, r1, dst, False));
++         return dst;
++      }
++
++      default:
++         break;
++   }
++
++   /* We get here if no pattern matched. */
++irreducible:
++   ppIRExpr(e);
++   vpanic("iselFltExpr(loongarch64): cannot reduce tree");
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Statements                                  ---*/
++/*---------------------------------------------------------*/
++
++static void iselStmtStore ( ISelEnv* env, IRStmt* stmt )
++{
++   IRType tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
++   IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
++
++   if (tya != Ity_I64 || stmt->Ist.Store.end != Iend_LE)
++      vpanic("iselStmt(loongarch64): Ist_Store");
++
++   Bool                 fp = False;
++   LOONGARCH64AMode*    am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
++   LOONGARCH64StoreOp   op;
++   LOONGARCH64FpStoreOp fop;
++   switch (tyd) {
++      case Ity_I8:
++         op = (am->tag == LAam_RI) ? LAstore_ST_B : LAstore_STX_B;
++         break;
++      case Ity_I16:
++         op = (am->tag == LAam_RI) ? LAstore_ST_H : LAstore_STX_H;
++         break;
++      case Ity_I32:
++         op = (am->tag == LAam_RI) ? LAstore_ST_W : LAstore_STX_W;
++         break;
++      case Ity_I64:
++         op = (am->tag == LAam_RI) ? LAstore_ST_D : LAstore_STX_D;
++         break;
++      case Ity_F32:
++         fop = (am->tag == LAam_RI) ? LAfpstore_FST_S : LAfpstore_FSTX_S;
++         fp = True;
++         break;
++      case Ity_F64:
++         fop = (am->tag == LAam_RI) ? LAfpstore_FST_D : LAfpstore_FSTX_D;
++         fp = True;
++         break;
++      default:
++         vpanic("iselStmt(loongarch64): Ist_Store");
++         break;
++   }
++
++   if (fp) {
++      HReg src = iselFltExpr(env, stmt->Ist.Store.data);
++      addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src));
++   } else {
++      HReg src = iselIntExpr_R(env, stmt->Ist.Store.data);
++      addInstr(env, LOONGARCH64Instr_Store(op, am, src));
++   }
++}
++
++static void iselStmtPut ( ISelEnv* env, IRStmt* stmt )
++{
++   IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
++
++   Bool                 fp = False;
++   LOONGARCH64AMode*    am;
++   LOONGARCH64StoreOp   op;
++   LOONGARCH64FpStoreOp fop;
++   if (stmt->Ist.Put.offset < 1024) {
++      switch (ty) {
++         case Ity_I8:
++            op = LAstore_ST_B;
++            break;
++         case Ity_I16:
++            op = LAstore_ST_H;
++            break;
++         case Ity_I32:
++            op = LAstore_ST_W;
++            break;
++         case Ity_I64:
++            op = LAstore_ST_D;
++            break;
++         case Ity_F32:
++            fop = LAfpstore_FST_S;
++            fp = True;
++            break;
++         case Ity_F64:
++            fop = LAfpstore_FST_D;
++            fp = True;
++            break;
++         default:
++            vpanic("iselStmt(loongarch64): Ist_Put");
++            break;
++      }
++
++      am = LOONGARCH64AMode_RI(hregGSP(), stmt->Ist.Put.offset);
++      if (fp) {
++         HReg src = iselFltExpr(env, stmt->Ist.Put.data);
++         addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src));
++      } else {
++         HReg src = iselIntExpr_R(env, stmt->Ist.Put.data);
++         addInstr(env, LOONGARCH64Instr_Store(op, am, src));
++      }
++   } else {
++      switch (ty) {
++         case Ity_I8:
++            op = LAstore_STX_B;
++            break;
++         case Ity_I16:
++            op = LAstore_STX_H;
++            break;
++         case Ity_I32:
++            op = LAstore_STX_W;
++            break;
++         case Ity_I64:
++            op = LAstore_STX_D;
++            break;
++         case Ity_F32:
++            fop = LAfpstore_FSTX_S;
++            fp = True;
++            break;
++         case Ity_F64:
++            fop = LAfpstore_FSTX_D;
++            fp = True;
++            break;
++         default:
++            vpanic("iselStmt(loongarch64): Ist_Put");
++            break;
++      }
++
++      HReg tmp = newVRegI(env);
++      addInstr(env, LOONGARCH64Instr_LI(stmt->Ist.Put.offset, tmp));
++      am = LOONGARCH64AMode_RR(hregGSP(), tmp);
++      if (fp) {
++         HReg src = iselFltExpr(env, stmt->Ist.Put.data);
++         addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src));
++      } else {
++         HReg src = iselIntExpr_R(env, stmt->Ist.Put.data);
++         addInstr(env, LOONGARCH64Instr_Store(op, am, src));
++      }
++   }
++}
++
++static void iselStmtTmp ( ISelEnv* env, IRStmt* stmt )
++{
++   IRTemp tmp = stmt->Ist.WrTmp.tmp;
++   IRType ty  = typeOfIRTemp(env->type_env, tmp);
++
++   if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64) {
++      HReg dst = lookupIRTemp(env, tmp);
++      HReg src = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
++      addInstr(env, LOONGARCH64Instr_Move(dst, src));
++   } else if (ty == Ity_I1) {
++      HReg dst = lookupIRTemp(env, tmp);
++      HReg src = iselCondCode_R(env, stmt->Ist.WrTmp.data);
++      addInstr(env, LOONGARCH64Instr_Move(dst, src));
++   } else if (ty == Ity_F32) {
++      HReg dst = lookupIRTemp(env, tmp);
++      HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
++      addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_FMOV_S, src, dst));
++   } else if (ty == Ity_F64) {
++      HReg dst = lookupIRTemp(env, tmp);
++      HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
++      addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_FMOV_D, src, dst));
++   } else {
++      vpanic("iselStmt(loongarch64): Ist_WrTmp");
++   }
++}
++
++static void iselStmtDirty ( ISelEnv* env, IRStmt* stmt )
++{
++   IRDirty* d = stmt->Ist.Dirty.details;
++
++   /* Figure out the return type, if any. */
++   IRType retty = Ity_INVALID;
++   if (d->tmp != IRTemp_INVALID)
++      retty = typeOfIRTemp(env->type_env, d->tmp);
++
++   Bool retty_ok = False;
++   switch (retty) {
++      case Ity_INVALID: /* function doesn't return anything */
++      case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
++         retty_ok = True;
++         break;
++      default:
++         break;
++   }
++   if (!retty_ok)
++      vpanic("iselStmt(loongarch64): Ist_Dirty");
++
++   /* Marshal args, do the call, and set the return value to 0x555..555
++      if this is a conditional call that returns a value and the
++      call is skipped. */
++   UInt   addToSp = 0;
++   RetLoc rloc    = mk_RetLoc_INVALID();
++   doHelperCall(&addToSp, &rloc, env, d->guard, d->cee, retty, d->args);
++   vassert(is_sane_RetLoc(rloc));
++
++   /* Now figure out what to do with the returned value, if any. */
++   switch (retty) {
++      case Ity_INVALID: {
++         /* No return value.  Nothing to do. */
++         vassert(d->tmp == IRTemp_INVALID);
++         vassert(rloc.pri == RLPri_None);
++         vassert(addToSp == 0);
++         break;
++      }
++      case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: {
++         vassert(rloc.pri == RLPri_Int);
++         vassert(addToSp == 0);
++         /* The returned value is in $a0.  Park it in the register
++            associated with tmp. */
++         HReg dst = lookupIRTemp(env, d->tmp);
++         addInstr(env, LOONGARCH64Instr_Move(dst, hregLOONGARCH64_R4()));
++         break;
++      }
++      default:
++         /*NOTREACHED*/
++         vassert(0);
++         break;
++   }
++}
++
++static void iselStmtLLSC ( ISelEnv* env, IRStmt* stmt )
++{
++   IRTemp res = stmt->Ist.LLSC.result;
++   IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
++
++   /* Temporary solution; this need to be rewritten again for LOONGARCH64.
++      On LOONGARCH64 you can not read from address that is locked with LL
++      before SC. If you read from address that is locked than SC will fall.
++    */
++   if (stmt->Ist.LLSC.storedata == NULL) {
++      /* LL */
++      IRType ty = typeOfIRTemp(env->type_env, res);
++      LOONGARCH64LLSCOp op;
++      switch (ty) {
++         case Ity_I32:
++            op = LAllsc_LL_W;
++            break;
++         case Ity_I64:
++            op = LAllsc_LL_D;
++            break;
++         default:
++            vpanic("iselStmt(loongarch64): Ist_LLSC");
++            break;
++      }
++      LOONGARCH64AMode* addr = iselIntExpr_AMode(env, stmt->Ist.LLSC.addr, tya);
++      HReg               val = lookupIRTemp(env, res);
++      addInstr(env, LOONGARCH64Instr_LLSC(op, True, addr, val));
++   } else {
++      /* SC */
++      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
++      LOONGARCH64LLSCOp op;
++      switch (tyd) {
++         case Ity_I32:
++            op = LAllsc_SC_W;
++            break;
++         case Ity_I64:
++            op = LAllsc_SC_D;
++            break;
++         default:
++            vpanic("iselStmt(loongarch64): Ist_LLSC");
++            break;
++      }
++      LOONGARCH64AMode* addr = iselIntExpr_AMode(env, stmt->Ist.LLSC.addr, tya);
++      HReg               val = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
++      HReg               dst = lookupIRTemp(env, res);
++      HReg               tmp = newVRegI(env);
++      addInstr(env, LOONGARCH64Instr_Move(tmp, val));
++      addInstr(env, LOONGARCH64Instr_LLSC(op, False, addr, tmp));
++      addInstr(env, LOONGARCH64Instr_Move(dst, tmp));
++   }
++}
++
++static void iselStmtCas ( ISelEnv* env, IRStmt* stmt )
++{
++   IRCAS* cas = stmt->Ist.CAS.details;
++   if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) {
++      /* "normal" singleton CAS */
++      HReg   old = lookupIRTemp(env, cas->oldLo);
++      HReg  addr = iselIntExpr_R(env, cas->addr);
++      HReg  expd = iselIntExpr_R(env, cas->expdLo);
++      HReg  data = iselIntExpr_R(env, cas->dataLo);
++      IRType  ty = typeOfIRTemp(env->type_env, cas->oldLo);
++      Bool size64;
++      switch (ty) {
++         case Ity_I32:
++            size64 = False;
++            break;
++         case Ity_I64:
++            size64 = True;
++            break;
++         default:
++            vpanic("iselStmt(loongarch64): Ist_CAS");
++            break;
++      }
++      addInstr(env, LOONGARCH64Instr_Cas(old, addr, expd, data, size64));
++   } else {
++      vpanic("iselStmt(loongarch64): Ist_CAS");
++   }
++}
++
++static void iselStmtMBE ( ISelEnv* env, IRStmt* stmt )
++{
++   switch (stmt->Ist.MBE.event) {
++      case Imbe_Fence:
++      case Imbe_CancelReservation:
++         addInstr(env, LOONGARCH64Instr_Bar(LAbar_DBAR, 0));
++         break;
++      case Imbe_InsnFence:
++         addInstr(env, LOONGARCH64Instr_Bar(LAbar_IBAR, 0));
++         break;
++      default:
++         vpanic("iselStmt(loongarch64): Ist_MBE");
++         break;
++   }
++}
++
++static void iselStmtExit ( ISelEnv* env, IRStmt* stmt )
++{
++   if (stmt->Ist.Exit.dst->tag != Ico_U64)
++      vpanic("iselStmt(loongarch64): Ist_Exit: dst is not a 64-bit value");
++
++   HReg            cond = iselCondCode_R(env, stmt->Ist.Exit.guard);
++   LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), stmt->Ist.Exit.offsIP);
++
++   /* Case: boring transfer to known address */
++   if (stmt->Ist.Exit.jk == Ijk_Boring || stmt->Ist.Exit.jk == Ijk_Call) {
++      if (env->chainingAllowed) {
++         /* .. almost always true .. */
++         /* Skip the event check at the dst if this is a forwards edge. */
++         Bool toFastEP = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
++         addInstr(env, LOONGARCH64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
++                                                am, cond, toFastEP));
++      } else {
++         /* .. very occasionally .. */
++         /* We can't use chaining, so ask for an assisted transfer,
++            as that's the only alternative that is allowable. */
++         HReg dst = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
++         addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, cond, Ijk_Boring));
++      }
++      return;
++   }
++
++   /* Case: assisted transfer to arbitrary address */
++   switch (stmt->Ist.Exit.jk) {
++      /* Keep this list in sync with that for iselNext below */
++      case Ijk_ClientReq:
++      case Ijk_Yield:
++      case Ijk_NoDecode:
++      case Ijk_InvalICache:
++      case Ijk_NoRedir:
++      case Ijk_SigILL:
++      case Ijk_SigTRAP:
++      case Ijk_SigSEGV:
++      case Ijk_SigBUS:
++      case Ijk_SigFPE_IntDiv:
++      case Ijk_SigFPE_IntOvf:
++      case Ijk_SigSYS:
++      case Ijk_Sys_syscall: {
++         HReg dst = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
++         addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, cond, stmt->Ist.Exit.jk));
++         break;
++      }
++      default:
++         /* Do we ever expect to see any other kind? */
++         ppIRJumpKind(stmt->Ist.Exit.jk);
++         vpanic("iselStmt(loongarch64): Ist_Exit: unexpected jump kind");
++         break;
++   }
++}
++
++static void iselStmt(ISelEnv* env, IRStmt* stmt)
++{
++   if (vex_traceflags & VEX_TRACE_VCODE) {
++      vex_printf("\n-- ");
++      ppIRStmt(stmt);
++      vex_printf("\n");
++   }
++
++   switch (stmt->tag) {
++      /* --------- STORE --------- */
++      /* little-endian write to memory */
++      case Ist_Store:
++         iselStmtStore(env, stmt);
++         break;
++
++      /* --------- PUT --------- */
++      /* write guest state, fixed offset */
++      case Ist_Put:
++         iselStmtPut(env, stmt);
++         break;
++
++      /* --------- TMP --------- */
++      /* assign value to temporary */
++      case Ist_WrTmp:
++         iselStmtTmp(env, stmt);
++         break;
++
++      /* --------- Call to DIRTY helper --------- */
++      /* call complex ("dirty") helper function */
++      case Ist_Dirty:
++         iselStmtDirty(env, stmt);
++         break;
++
++      /* --------- Load Linked and Store Conditional --------- */
++      case Ist_LLSC:
++         iselStmtLLSC(env, stmt);
++         break;
++
++      /* --------- CAS --------- */
++      case Ist_CAS:
++         iselStmtCas(env, stmt);
++         break;
++
++      /* --------- MEM FENCE --------- */
++      case Ist_MBE:
++         iselStmtMBE(env, stmt);
++         break;
++
++      /* --------- INSTR MARK --------- */
++      /* Doesn't generate any executable code ... */
++      case Ist_IMark:
++         break;
++
++      /* --------- ABI HINT --------- */
++      /* These have no meaning (denotation in the IR) and so we ignore
++         them ... if any actually made it this far. */
++      case Ist_AbiHint:
++         break;
++
++      /* --------- NO-OP --------- */
++      case Ist_NoOp:
++         break;
++
++      /* --------- EXIT --------- */
++      case Ist_Exit:
++         iselStmtExit(env, stmt);
++         break;
++
++      default:
++         ppIRStmt(stmt);
++         vpanic("iselStmt(loongarch64)");
++         break;
++   }
++}
++
++
++/*---------------------------------------------------------*/
++/*--- ISEL: Basic block terminators (Nexts)             ---*/
++/*---------------------------------------------------------*/
++
++static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP )
++{
++   if (vex_traceflags & VEX_TRACE_VCODE) {
++      vex_printf("\n-- PUT(%d) = ", offsIP);
++      ppIRExpr(next);
++      vex_printf("; exit-");
++      ppIRJumpKind(jk);
++      vex_printf("\n");
++   }
++
++   /* Case: boring transfer to known address */
++   if (next->tag == Iex_Const) {
++      IRConst* cdst = next->Iex.Const.con;
++      vassert(cdst->tag == Ico_U64);
++      if (jk == Ijk_Boring || jk == Ijk_Call) {
++         /* Boring transfer to known address */
++         LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP);
++         if (env->chainingAllowed) {
++            /* .. almost always true .. */
++            /* Skip the event check at the dst if this is a forwards edge. */
++            Bool toFastEP = ((Addr64)cdst->Ico.U64) > env->max_ga;
++            addInstr(env, LOONGARCH64Instr_XDirect(cdst->Ico.U64, am,
++                                                   INVALID_HREG, toFastEP));
++         } else {
++            /* .. very occasionally .. */
++            /* We can't use chaining, so ask for an assisted transfer,
++               as that's the only alternative that is allowable. */
++            HReg dst = iselIntExpr_R(env, next);
++            addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, INVALID_HREG, Ijk_Boring));
++         }
++         return;
++      }
++   }
++
++   /* Case: call/return (==boring) transfer to any address */
++   switch (jk) {
++      case Ijk_Boring:
++      case Ijk_Ret:
++      case Ijk_Call:  {
++         HReg dst = iselIntExpr_R(env, next);
++         LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP);
++         if (env->chainingAllowed) {
++            addInstr(env, LOONGARCH64Instr_XIndir(dst, am, INVALID_HREG));
++         } else {
++            addInstr(env, LOONGARCH64Instr_XAssisted(dst, am,
++                                                     INVALID_HREG, Ijk_Boring));
++         }
++         return;
++      }
++      default:
++         break;
++   }
++
++   /* Case: assisted transfer to arbitrary address */
++   switch (jk) {
++      /* Keep this list in sync with that for Ist_Exit above */
++      case Ijk_ClientReq:
++      case Ijk_Yield:
++      case Ijk_NoDecode:
++      case Ijk_InvalICache:
++      case Ijk_NoRedir:
++      case Ijk_SigILL:
++      case Ijk_SigTRAP:
++      case Ijk_SigSEGV:
++      case Ijk_SigBUS:
++      case Ijk_SigFPE_IntDiv:
++      case Ijk_SigFPE_IntOvf:
++      case Ijk_SigSYS:
++      case Ijk_Sys_syscall: {
++         HReg dst = iselIntExpr_R(env, next);
++         LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP);
++         addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, INVALID_HREG, jk));
++         return;
++      }
++      default:
++         break;
++   }
++
++   vex_printf("\n-- PUT(%d) = ", offsIP);
++   ppIRExpr(next);
++   vex_printf("; exit-");
++   ppIRJumpKind(jk);
++   vex_printf("\n");
++   vassert(0); // are we expecting any other kind?
++}
++
++
++/*---------------------------------------------------------*/
++/*--- Insn selector top-level                           ---*/
++/*---------------------------------------------------------*/
++
++/* Translate an entire BB to LOONGARCH64 code. */
++HInstrArray* iselSB_LOONGARCH64 ( const IRSB* bb,
++                                  VexArch arch_host,
++                                  const VexArchInfo* archinfo_host,
++                                  const VexAbiInfo* vbi,
++                                  Int offs_Host_EvC_Counter,
++                                  Int offs_Host_EvC_FailAddr,
++                                  Bool chainingAllowed,
++                                  Bool addProfInc,
++                                  Addr max_ga )
++{
++   Int        i, j;
++   HReg       hreg, hregHI;
++   ISelEnv*   env;
++   UInt       hwcaps_host = archinfo_host->hwcaps;
++   LOONGARCH64AMode *amCounter, *amFailAddr;
++
++   /* sanity ... */
++   vassert(arch_host == VexArchLOONGARCH64);
++   vassert((hwcaps_host & ~(VEX_HWCAPS_LOONGARCH_CPUCFG
++                          | VEX_HWCAPS_LOONGARCH_LAM
++                          | VEX_HWCAPS_LOONGARCH_UAL
++                          | VEX_HWCAPS_LOONGARCH_FP
++                          | VEX_HWCAPS_LOONGARCH_LSX
++                          | VEX_HWCAPS_LOONGARCH_LASX
++                          | VEX_HWCAPS_LOONGARCH_COMPLEX
++                          | VEX_HWCAPS_LOONGARCH_CRYPTO
++                          | VEX_HWCAPS_LOONGARCH_LVZP
++                          | VEX_HWCAPS_LOONGARCH_X86BT
++                          | VEX_HWCAPS_LOONGARCH_ARMBT
++                          | VEX_HWCAPS_LOONGARCH_MIPSBT
++                          | VEX_HWCAPS_LOONGARCH_ISA_32BIT
++                          | VEX_HWCAPS_LOONGARCH_ISA_64BIT)) == 0);
++
++   /* Check that the host's endianness is as expected. */
++   vassert(archinfo_host->endness == VexEndnessLE);
++
++   /* Make up an initial environment to use. */
++   env = LibVEX_Alloc_inline(sizeof(ISelEnv));
++   env->vreg_ctr = 0;
++
++   /* Set up output code array. */
++   env->code = newHInstrArray();
++
++   /* Copy BB's type env. */
++   env->type_env = bb->tyenv;
++
++   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
++      change as we go along. */
++   env->n_vregmap = bb->tyenv->types_used;
++   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
++   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
++
++   /* and finally ... */
++   env->chainingAllowed = chainingAllowed;
++   env->hwcaps          = hwcaps_host;
++   env->max_ga          = max_ga;
++
++   /* For each IR temporary, allocate a suitably-kinded virtual register. */
++   j = 0;
++   for (i = 0; i < env->n_vregmap; i++) {
++      hregHI = hreg = INVALID_HREG;
++      switch (bb->tyenv->types[i]) {
++         case Ity_I1:
++         case Ity_I8:
++         case Ity_I16:
++         case Ity_I32:
++         case Ity_I64:
++            hreg = mkHReg(True, HRcInt64, 0, j++);
++            break;
++         case Ity_I128:
++            hreg   = mkHReg(True, HRcInt64, 0, j++);
++            hregHI = mkHReg(True, HRcInt64, 0, j++);
++            break;
++         case Ity_F16: // we'll use HRcFlt64 regs for F16 too
++         case Ity_F32: // we'll use HRcFlt64 regs for F32 too
++         case Ity_F64:
++            hreg = mkHReg(True, HRcFlt64, 0, j++);
++            break;
++         default:
++            ppIRType(bb->tyenv->types[i]);
++            vpanic("iselBB(loongarch64): IRTemp type");
++      }
++      env->vregmap[i]   = hreg;
++      env->vregmapHI[i] = hregHI;
++   }
++   env->vreg_ctr = j;
++
++   /* The very first instruction must be an event check. */
++   amCounter  = mkLOONGARCH64AMode_RI(hregGSP(), offs_Host_EvC_Counter);
++   amFailAddr = mkLOONGARCH64AMode_RI(hregGSP(), offs_Host_EvC_FailAddr);
++   addInstr(env, LOONGARCH64Instr_EvCheck(amCounter, amFailAddr));
++
++   /* Possibly a block counter increment (for profiling).  At this
++      point we don't know the address of the counter, so just pretend
++      it is zero.  It will have to be patched later, but before this
++      translation is used, by a call to LibVEX_patchProfCtr. */
++   if (addProfInc) {
++      addInstr(env, LOONGARCH64Instr_ProfInc());
++   }
++
++   /* Ok, finally we can iterate over the statements. */
++   for (i = 0; i < bb->stmts_used; i++)
++      iselStmt(env, bb->stmts[i]);
++
++   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
++
++   /* record the number of vregs we used. */
++   env->code->n_vregs = env->vreg_ctr;
++   return env->code;
++}
++
++
++/*---------------------------------------------------------------*/
++/*--- end                             host_loongarch64_isel.c ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
+index 2d82c41a1..382c283f2 100644
+--- a/VEX/priv/ir_defs.c
++++ b/VEX/priv/ir_defs.c
+@@ -280,6 +280,8 @@ void ppIROp ( IROp op )
+       case Iop_SubF64:    vex_printf("SubF64"); return;
+       case Iop_MulF64:    vex_printf("MulF64"); return;
+       case Iop_DivF64:    vex_printf("DivF64"); return;
++      case Iop_ScaleBF64: vex_printf("ScaleBF64"); return;
++      case Iop_ScaleBF32: vex_printf("ScaleBF32"); return;
+       case Iop_AddF64r32: vex_printf("AddF64r32"); return;
+       case Iop_SubF64r32: vex_printf("SubF64r32"); return;
+       case Iop_MulF64r32: vex_printf("MulF64r32"); return;
+@@ -356,6 +358,10 @@ void ppIROp ( IROp op )
+       case Iop_SqrtF64:       vex_printf("SqrtF64"); return;
+       case Iop_SqrtF32:       vex_printf("SqrtF32"); return;
+       case Iop_SqrtF16:       vex_printf("SqrtF16"); return;
++      case Iop_RSqrtF32:      vex_printf("RSqrtF32"); return;
++      case Iop_RSqrtF64:      vex_printf("RSqrtF64"); return;
++      case Iop_LogBF32:       vex_printf("LogBF32"); return;
++      case Iop_LogBF64:       vex_printf("LogBF64"); return;
+       case Iop_SinF64:    vex_printf("SinF64"); return;
+       case Iop_CosF64:    vex_printf("CosF64"); return;
+       case Iop_TanF64:    vex_printf("TanF64"); return;
+@@ -379,8 +385,12 @@ void ppIROp ( IROp op )
+ 
+       case Iop_MaxNumF64: vex_printf("MaxNumF64"); return;
+       case Iop_MinNumF64: vex_printf("MinNumF64"); return;
++      case Iop_MaxNumAbsF64: vex_printf("MaxNumAbsF64"); return;
++      case Iop_MinNumAbsF64: vex_printf("MinNumAbsF64"); return;
+       case Iop_MaxNumF32: vex_printf("MaxNumF32"); return;
+       case Iop_MinNumF32: vex_printf("MinNumF32"); return;
++      case Iop_MaxNumAbsF32: vex_printf("MaxNumAbsF32"); return;
++      case Iop_MinNumAbsF32: vex_printf("MinNumAbsF32"); return;
+ 
+       case Iop_F16toF64: vex_printf("F16toF64"); return;
+       case Iop_F64toF16: vex_printf("F64toF16"); return;
+@@ -1434,10 +1444,13 @@ Bool primopMightTrap ( IROp op )
+    case Iop_1Uto8: case Iop_1Uto32: case Iop_1Uto64: case Iop_1Sto8:
+    case Iop_1Sto16: case Iop_1Sto32: case Iop_1Sto64:
+    case Iop_AddF64: case Iop_SubF64: case Iop_MulF64: case Iop_DivF64:
++   case Iop_ScaleBF64: case Iop_ScaleBF32:
+    case Iop_AddF32: case Iop_SubF32: case Iop_MulF32: case Iop_DivF32:
+    case Iop_AddF64r32: case Iop_SubF64r32: case Iop_MulF64r32:
+    case Iop_DivF64r32: case Iop_NegF64: case Iop_AbsF64:
+    case Iop_NegF32: case Iop_AbsF32: case Iop_SqrtF64: case Iop_SqrtF32:
++   case Iop_RSqrtF64: case Iop_RSqrtF32:
++   case Iop_LogBF64: case Iop_LogBF32:
+    case Iop_NegF16: case Iop_AbsF16: case Iop_SqrtF16: case Iop_SubF16:
+    case Iop_AddF16:
+    case Iop_CmpF64: case Iop_CmpF32: case Iop_CmpF16: case Iop_CmpF128:
+@@ -1477,8 +1490,11 @@ Bool primopMightTrap ( IROp op )
+    case Iop_RSqrtEst5GoodF64: case Iop_RoundF64toF64_NEAREST:
+    case Iop_RoundF64toF64_NegINF: case Iop_RoundF64toF64_PosINF:
+    case Iop_RoundF64toF64_ZERO: case Iop_TruncF64asF32: case Iop_RoundF64toF32:
+-   case Iop_RecpExpF64: case Iop_RecpExpF32: case Iop_MaxNumF64:
+-   case Iop_MinNumF64: case Iop_MaxNumF32: case Iop_MinNumF32:
++   case Iop_RecpExpF64: case Iop_RecpExpF32:
++   case Iop_MaxNumF64: case Iop_MinNumF64:
++   case Iop_MaxNumAbsF64: case Iop_MinNumAbsF64:
++   case Iop_MaxNumF32: case Iop_MinNumF32:
++   case Iop_MaxNumAbsF32: case Iop_MinNumAbsF32:
+    case Iop_F16toF64: case Iop_F64toF16: case Iop_F16toF32:
+    case Iop_F32toF16: case Iop_QAdd32S: case Iop_QSub32S:
+    case Iop_Add16x2: case Iop_Sub16x2:
+@@ -2075,6 +2091,7 @@ void ppIRJumpKind ( IRJumpKind kind )
+       case Ijk_SigFPE:        vex_printf("SigFPE"); break;
+       case Ijk_SigFPE_IntDiv: vex_printf("SigFPE_IntDiv"); break;
+       case Ijk_SigFPE_IntOvf: vex_printf("SigFPE_IntOvf"); break;
++      case Ijk_SigSYS:        vex_printf("SigSYS"); break;
+       case Ijk_Sys_syscall:   vex_printf("Sys_syscall"); break;
+       case Ijk_Sys_int32:     vex_printf("Sys_int32"); break;
+       case Ijk_Sys_int128:    vex_printf("Sys_int128"); break;
+@@ -2094,6 +2111,8 @@ void ppIRMBusEvent ( IRMBusEvent event )
+          vex_printf("Fence"); break;
+       case Imbe_CancelReservation:
+          vex_printf("CancelReservation"); break;
++      case Imbe_InsnFence:
++         vex_printf("InsnFence"); break;
+       default:
+          vpanic("ppIRMBusEvent");
+    }
+@@ -3372,12 +3391,14 @@ void typeOfPrimop ( IROp op,
+ 
+       case Iop_AddF64:    case Iop_SubF64: 
+       case Iop_MulF64:    case Iop_DivF64:
++      case Iop_ScaleBF64:
+       case Iop_AddF64r32: case Iop_SubF64r32: 
+       case Iop_MulF64r32: case Iop_DivF64r32:
+          TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64);
+ 
+       case Iop_AddF32: case Iop_SubF32:
+       case Iop_MulF32: case Iop_DivF32:
++      case Iop_ScaleBF32:
+          TERNARY(ity_RMode,Ity_F32,Ity_F32, Ity_F32);
+ 
+       case Iop_AddF16:
+@@ -3394,10 +3415,14 @@ void typeOfPrimop ( IROp op,
+          UNARY(Ity_F16, Ity_F16);
+ 
+       case Iop_SqrtF64:
++      case Iop_RSqrtF64:
++      case Iop_LogBF64:
+       case Iop_RecpExpF64:
+          BINARY(ity_RMode,Ity_F64, Ity_F64);
+ 
+       case Iop_SqrtF32:
++      case Iop_RSqrtF32:
++      case Iop_LogBF32:
+       case Iop_RoundF32toInt:
+       case Iop_RecpExpF32:
+          BINARY(ity_RMode,Ity_F32, Ity_F32);
+@@ -3406,9 +3431,11 @@ void typeOfPrimop ( IROp op,
+          BINARY(ity_RMode, Ity_F16, Ity_F16);
+ 
+       case Iop_MaxNumF64: case Iop_MinNumF64:
++      case Iop_MaxNumAbsF64: case Iop_MinNumAbsF64:
+          BINARY(Ity_F64,Ity_F64, Ity_F64);
+ 
+       case Iop_MaxNumF32: case Iop_MinNumF32:
++      case Iop_MaxNumAbsF32: case Iop_MinNumAbsF32:
+          BINARY(Ity_F32,Ity_F32, Ity_F32);
+ 
+      case Iop_CmpF16:
+@@ -5246,7 +5273,9 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy )
+          break;
+       case Ist_MBE:
+          switch (stmt->Ist.MBE.event) {
+-            case Imbe_Fence: case Imbe_CancelReservation:
++            case Imbe_Fence:
++            case Imbe_CancelReservation:
++            case Imbe_InsnFence:
+                break;
+             default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
+                break;
+diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
+index 482047c7a..98d4a81c6 100644
+--- a/VEX/priv/main_main.c
++++ b/VEX/priv/main_main.c
+@@ -43,6 +43,7 @@
+ #include "libvex_guest_s390x.h"
+ #include "libvex_guest_mips32.h"
+ #include "libvex_guest_mips64.h"
++#include "libvex_guest_loongarch64.h"
+ 
+ #include "main_globals.h"
+ #include "main_util.h"
+@@ -57,6 +58,7 @@
+ #include "host_s390_defs.h"
+ #include "host_mips_defs.h"
+ #include "host_nanomips_defs.h"
++#include "host_loongarch64_defs.h"
+ 
+ #include "guest_generic_bb_to_IR.h"
+ #include "guest_x86_defs.h"
+@@ -67,6 +69,7 @@
+ #include "guest_s390_defs.h"
+ #include "guest_mips_defs.h"
+ #include "guest_nanomips_defs.h"
++#include "guest_loongarch64_defs.h"
+ 
+ #include "host_generic_simd128.h"
+ 
+@@ -163,6 +166,14 @@
+ #define NANOMIPSST(f) vassert(0)
+ #endif
+ 
++#if defined(VGA_loongarch64) || defined(VEXMULTIARCH)
++#define LOONGARCH64FN(f) f
++#define LOONGARCH64ST(f) f
++#else
++#define LOONGARCH64FN(f) NULL
++#define LOONGARCH64ST(f) vassert(0)
++#endif
++
+ /* This file contains the top level interface to the library. */
+ 
+ /* --------- fwds ... --------- */
+@@ -541,6 +552,23 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta,
+          vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4);
+          break;
+ 
++      case VexArchLOONGARCH64:
++         preciseMemExnsFn
++            = LOONGARCH64FN(guest_loongarch64_state_requires_precise_mem_exns);
++         disInstrFn              = LOONGARCH64FN(disInstr_LOONGARCH64);
++         specHelper              = LOONGARCH64FN(guest_loongarch64_spechelper);
++         guest_layout            = LOONGARCH64FN(&loongarch64Guest_layout);
++         offB_CMSTART            = offsetof(VexGuestLOONGARCH64State, guest_CMSTART);
++         offB_CMLEN              = offsetof(VexGuestLOONGARCH64State, guest_CMLEN);
++         offB_GUEST_IP           = offsetof(VexGuestLOONGARCH64State, guest_PC);
++         szB_GUEST_IP            = sizeof( ((VexGuestLOONGARCH64State*)0)->guest_PC );
++         vassert(vta->archinfo_guest.endness == VexEndnessLE);
++         vassert(sizeof(VexGuestLOONGARCH64State) % LibVEX_GUEST_STATE_ALIGN == 0);
++         vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_CMSTART) == 8);
++         vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_CMLEN  ) == 8);
++         vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_NRADDR ) == 8);
++         break;
++
+       default:
+          vpanic("LibVEX_Translate: unsupported guest insn set");
+    }
+@@ -878,6 +906,14 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
+          offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
+          break;
+ 
++      case VexArchLOONGARCH64:
++         preciseMemExnsFn
++            = LOONGARCH64FN(guest_loongarch64_state_requires_precise_mem_exns);
++         guest_sizeB            = sizeof(VexGuestLOONGARCH64State);
++         offB_HOST_EvC_COUNTER  = offsetof(VexGuestLOONGARCH64State, host_EvC_COUNTER);
++         offB_HOST_EvC_FAILADDR = offsetof(VexGuestLOONGARCH64State, host_EvC_FAILADDR);
++         break;
++
+       default:
+          vpanic("LibVEX_Codegen: unsupported guest insn set");
+    }
+@@ -1052,6 +1088,23 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
+                  || vta->archinfo_host.endness == VexEndnessBE);
+          break;
+ 
++      case VexArchLOONGARCH64:
++         mode64       = True;
++         rRegUniv     = LOONGARCH64FN(getRRegUniverse_LOONGARCH64());
++         getRegUsage
++            = CAST_TO_TYPEOF(getRegUsage) LOONGARCH64FN(getRegUsage_LOONGARCH64Instr);
++         mapRegs      = CAST_TO_TYPEOF(mapRegs) LOONGARCH64FN(mapRegs_LOONGARCH64Instr);
++         genSpill     = CAST_TO_TYPEOF(genSpill) LOONGARCH64FN(genSpill_LOONGARCH64);
++         genReload    = CAST_TO_TYPEOF(genReload) LOONGARCH64FN(genReload_LOONGARCH64);
++         genMove      = CAST_TO_TYPEOF(genMove) LOONGARCH64FN(genMove_LOONGARCH64);
++         ppInstr      = CAST_TO_TYPEOF(ppInstr) LOONGARCH64FN(ppLOONGARCH64Instr);
++         ppReg        = CAST_TO_TYPEOF(ppReg) LOONGARCH64FN(ppHRegLOONGARCH64);
++         iselSB       = LOONGARCH64FN(iselSB_LOONGARCH64);
++         emit         = CAST_TO_TYPEOF(emit) LOONGARCH64FN(emit_LOONGARCH64Instr);
++         vassert(vta->archinfo_host.endness == VexEndnessLE
++                 || vta->archinfo_host.endness == VexEndnessBE);
++         break;
++
+       default:
+          vpanic("LibVEX_Translate: unsupported host insn set");
+    }
+@@ -1297,6 +1350,11 @@ VexInvalRange LibVEX_Chain ( VexArch     arch_host,
+                                                  place_to_chain,
+                                                  disp_cp_chain_me_EXPECTED,
+                                                  place_to_jump_to));
++      case VexArchLOONGARCH64:
++         LOONGARCH64ST(return chainXDirect_LOONGARCH64(endness_host,
++                                                       place_to_chain,
++                                                       disp_cp_chain_me_EXPECTED,
++                                                       place_to_jump_to));
+       default:
+          vassert(0);
+    }
+@@ -1359,6 +1417,11 @@ VexInvalRange LibVEX_UnChain ( VexArch     arch_host,
+                                                  place_to_unchain,
+                                                  place_to_jump_to_EXPECTED,
+                                                  disp_cp_chain_me));
++      case VexArchLOONGARCH64:
++         LOONGARCH64ST(return unchainXDirect_LOONGARCH64(endness_host,
++                                                         place_to_unchain,
++                                                         place_to_jump_to_EXPECTED,
++                                                         disp_cp_chain_me));
+       default:
+          vassert(0);
+    }
+@@ -1389,6 +1452,8 @@ Int LibVEX_evCheckSzB ( VexArch    arch_host )
+             MIPS64ST(cached = evCheckSzB_MIPS()); break;
+         case VexArchNANOMIPS:
+             NANOMIPSST(cached = evCheckSzB_NANOMIPS()); break;
++         case VexArchLOONGARCH64:
++            LOONGARCH64ST(cached = evCheckSzB_LOONGARCH64()); break;
+          default:
+             vassert(0);
+       }
+@@ -1432,6 +1497,10 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch    arch_host,
+       case VexArchNANOMIPS:
+          NANOMIPSST(return patchProfInc_NANOMIPS(endness_host, place_to_patch,
+                                                  location_of_counter));
++      case VexArchLOONGARCH64:
++         LOONGARCH64ST(return patchProfInc_LOONGARCH64(endness_host,
++                                                       place_to_patch,
++                                                       location_of_counter));
+       default:
+          vassert(0);
+    }
+@@ -1515,6 +1584,7 @@ const HChar* LibVEX_ppVexArch ( VexArch arch )
+       case VexArchMIPS32:   return "MIPS32";
+       case VexArchMIPS64:   return "MIPS64";
+       case VexArchNANOMIPS: return "NANOMIPS";
++      case VexArchLOONGARCH64: return "LOONGARCH64";
+       default:              return "VexArch???";
+    }
+ }
+@@ -1585,6 +1655,7 @@ static IRType arch_word_size (VexArch arch) {
+       case VexArchMIPS64:
+       case VexArchPPC64:
+       case VexArchS390X:
++      case VexArchLOONGARCH64:
+          return Ity_I64;
+ 
+       default:
+@@ -1925,6 +1996,38 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps )
+    return "Unsupported baseline";
+ }
+ 
++static const HChar* show_hwcaps_loongarch64 ( UInt hwcaps )
++{
++   static const HChar prefix[] = "loongarch64";
++   static const struct {
++      UInt  hwcaps_bit;
++      HChar name[16];
++   } hwcaps_list[] = {
++      { VEX_HWCAPS_LOONGARCH_CPUCFG,  "cpucfg"   },
++      { VEX_HWCAPS_LOONGARCH_LAM,     "lam"      },
++      { VEX_HWCAPS_LOONGARCH_UAL,     "ual"      },
++      { VEX_HWCAPS_LOONGARCH_FP,      "fpu"      },
++      { VEX_HWCAPS_LOONGARCH_LSX,     "lsx"      },
++      { VEX_HWCAPS_LOONGARCH_LASX,    "lasx"     },
++      { VEX_HWCAPS_LOONGARCH_COMPLEX, "complex"  },
++      { VEX_HWCAPS_LOONGARCH_CRYPTO,  "crypto"   },
++      { VEX_HWCAPS_LOONGARCH_LVZP,    "lvz"      },
++      { VEX_HWCAPS_LOONGARCH_X86BT,   "lbt_x86"  },
++      { VEX_HWCAPS_LOONGARCH_ARMBT,   "lbt_arm"  },
++      { VEX_HWCAPS_LOONGARCH_MIPSBT,  "lbt_mips" }
++   };
++   static HChar buf[sizeof(prefix) +
++                    NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1]; // '\0'
++
++   HChar *p = buf + vex_sprintf(buf, "%s", prefix);
++   UInt i;
++   for (i = 0 ; i < NUM_HWCAPS; ++i) {
++      if (hwcaps & hwcaps_list[i].hwcaps_bit)
++         p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name);
++   }
++   return buf;
++}
++
+ #undef NUM_HWCAPS
+ 
+ /* Thie function must not return NULL. */
+@@ -1941,6 +2044,7 @@ static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
+       case VexArchS390X:  return show_hwcaps_s390x(hwcaps);
+       case VexArchMIPS32: return show_hwcaps_mips32(hwcaps);
+       case VexArchMIPS64: return show_hwcaps_mips64(hwcaps);
++      case VexArchLOONGARCH64: return show_hwcaps_loongarch64(hwcaps);
+       default: return NULL;
+    }
+ }
+@@ -2203,6 +2307,11 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
+             return;
+          invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n");
+ 
++      case VexArchLOONGARCH64:
++         if (!(hwcaps & VEX_HWCAPS_LOONGARCH_ISA_64BIT))
++            invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n");
++         return;
++
+       default:
+          vpanic("unknown architecture");
+    }
+diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
+index ec50d52ca..88cb9e732 100644
+--- a/VEX/pub/libvex.h
++++ b/VEX/pub/libvex.h
+@@ -60,6 +60,7 @@ typedef
+       VexArchMIPS32,
+       VexArchMIPS64,
+       VexArchNANOMIPS,
++      VexArchLOONGARCH64,
+    }
+    VexArch;
+ 
+@@ -299,6 +300,22 @@ typedef
+                               (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_P5600) && \
+                               (VEX_MIPS_HOST_FP_MODE(x)))
+ 
++/* LoongArch baseline capability */
++#define VEX_HWCAPS_LOONGARCH_CPUCFG    (1 << 0)   /* CPU has CPUCFG */
++#define VEX_HWCAPS_LOONGARCH_LAM       (1 << 1)   /* CPU has Atomic instructions */
++#define VEX_HWCAPS_LOONGARCH_UAL       (1 << 2)   /* CPU has Unaligned Access support */
++#define VEX_HWCAPS_LOONGARCH_FP        (1 << 3)   /* CPU has FPU */
++#define VEX_HWCAPS_LOONGARCH_LSX       (1 << 4)   /* CPU has 128-bit SIMD instructions */
++#define VEX_HWCAPS_LOONGARCH_LASX      (1 << 5)   /* CPU has 256-bit SIMD instructions */
++#define VEX_HWCAPS_LOONGARCH_COMPLEX   (1 << 6)   /* CPU has Complex instructions */
++#define VEX_HWCAPS_LOONGARCH_CRYPTO    (1 << 7)   /* CPU has Crypto instructions */
++#define VEX_HWCAPS_LOONGARCH_LVZP      (1 << 8)   /* CPU has Virtualization extension */
++#define VEX_HWCAPS_LOONGARCH_X86BT     (1 << 9)   /* CPU has X86 Binary Translation */
++#define VEX_HWCAPS_LOONGARCH_ARMBT     (1 << 10)  /* CPU has ARM Binary Translation */
++#define VEX_HWCAPS_LOONGARCH_MIPSBT    (1 << 11)  /* CPU has MIPS Binary Translation */
++#define VEX_HWCAPS_LOONGARCH_ISA_32BIT (1 << 30)  /* 32-bit ISA */
++#define VEX_HWCAPS_LOONGARCH_ISA_64BIT (1 << 31)  /* 64-bit ISA */
++
+ /* These return statically allocated strings. */
+ 
+ extern const HChar* LibVEX_ppVexArch    ( VexArch );
+@@ -419,6 +436,7 @@ void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai );
+       guest is mips32                     ==> applicable, default True
+       guest is mips64                     ==> applicable, default True
+       guest is arm64                      ==> applicable, default False
++      guest is loongarch64                ==> const True
+ 
+    host_ppc_calls_use_fndescrs:
+       host is ppc32-linux                 ==> False
+@@ -1025,6 +1043,10 @@ extern void LibVEX_InitIRI ( const IRICB * );
+    ~~~~~
+    r21 is GSP.
+ 
++   loongarch64
++   ~~~~~
++   r31 is GSP.
++
+    ALL GUEST ARCHITECTURES
+    ~~~~~~~~~~~~~~~~~~~~~~~
+    The guest state must contain two pseudo-registers, guest_CMSTART
+diff --git a/VEX/pub/libvex_basictypes.h b/VEX/pub/libvex_basictypes.h
+index e3f1485d5..b4c81bf54 100644
+--- a/VEX/pub/libvex_basictypes.h
++++ b/VEX/pub/libvex_basictypes.h
+@@ -198,6 +198,10 @@ typedef  unsigned long HWord;
+ #   define VEX_HOST_WORDSIZE 4
+ #   define VEX_REGPARM(_n) /* */
+ 
++#elif defined(__loongarch__) && (__loongarch_grlen == 64)
++#   define VEX_HOST_WORDSIZE 8
++#   define VEX_REGPARM(_n) /* */
++
+ #else
+ #   error "Vex: Fatal: Can't establish the host architecture"
+ #endif
+diff --git a/VEX/pub/libvex_guest_loongarch64.h b/VEX/pub/libvex_guest_loongarch64.h
+new file mode 100644
+index 000000000..36a6cb3ca
+--- /dev/null
++++ b/VEX/pub/libvex_guest_loongarch64.h
+@@ -0,0 +1,172 @@
++
++/*---------------------------------------------------------------*/
++/*--- begin                        libvex_guest_loongarch64.h ---*/
++/*---------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++
++   Neither the names of the U.S. Department of Energy nor the
++   University of California nor the names of its contributors may be
++   used to endorse or promote products derived from this software
++   without prior written permission.
++*/
++
++#ifndef __LIBVEX_PUB_GUEST_LOONGARCH64_H
++#define __LIBVEX_PUB_GUEST_LOONGARCH64_H
++
++#include "libvex_basictypes.h"
++
++
++/*---------------------------------------------------------------*/
++/*--- Vex's representation of the LOONGARCH64 CPU state.      ---*/
++/*---------------------------------------------------------------*/
++
++typedef
++   struct {
++      /* Event check fail addr and counter. */
++      ULong host_EvC_FAILADDR;
++      UInt  host_EvC_COUNTER;
++      UInt  _padding0;
++
++      /* CPU Registers */
++      ULong guest_R0;   /* Constant zero */
++      ULong guest_R1;   /* Return address */
++      ULong guest_R2;   /* Thread pointer */
++      ULong guest_R3;   /* Stack pointer */
++      ULong guest_R4;   /* Argument registers / Return value */
++      ULong guest_R5;
++      ULong guest_R6;   /* Argument registers */
++      ULong guest_R7;
++      ULong guest_R8;
++      ULong guest_R9;
++      ULong guest_R10;
++      ULong guest_R11;
++      ULong guest_R12;  /* Temporary registers */
++      ULong guest_R13;
++      ULong guest_R14;
++      ULong guest_R15;
++      ULong guest_R16;
++      ULong guest_R17;
++      ULong guest_R18;
++      ULong guest_R19;
++      ULong guest_R20;
++      ULong guest_R21;  /* Reserved */
++      ULong guest_R22;  /* Frame pointer / Static register */
++      ULong guest_R23;  /* Static registers */
++      ULong guest_R24;
++      ULong guest_R25;
++      ULong guest_R26;
++      ULong guest_R27;
++      ULong guest_R28;
++      ULong guest_R29;
++      ULong guest_R30;
++      ULong guest_R31;
++
++      ULong guest_PC;    /* Program counter */
++
++      UChar guest_FCC0;  /* Condition Flag Registers */
++      UChar guest_FCC1;
++      UChar guest_FCC2;
++      UChar guest_FCC3;
++      UChar guest_FCC4;
++      UChar guest_FCC5;
++      UChar guest_FCC6;
++      UChar guest_FCC7;
++      UInt  guest_FCSR;  /* FP/SIMD Control and Status Register */
++
++      /* Various pseudo-regs mandated by Vex or Valgrind. */
++      /* Emulation notes */
++      UInt  guest_EMNOTE;
++
++      /* For clflush: record start and length of area to invalidate */
++      ULong guest_CMSTART;
++      ULong guest_CMLEN;
++
++      /* Used to record the unredirected guest address at the start of
++         a translation whose start has been redirected.  By reading
++         this pseudo-register shortly afterwards, the translation can
++         find out what the corresponding no-redirection address was.
++         Note, this is only set for wrap-style redirects, not for
++         replace-style ones. */
++      ULong guest_NRADDR;
++
++      /* Fallback LL/SC support. */
++      ULong guest_LLSC_SIZE; /* 0==no transaction, else 4 or 8. */
++      ULong guest_LLSC_ADDR; /* Address of the transaction. */
++      ULong guest_LLSC_DATA; /* Original value at ADDR. */
++
++      ULong _padding1;
++
++      /* FPU/SIMD Registers */
++      U256 guest_X0;
++      U256 guest_X1;
++      U256 guest_X2;
++      U256 guest_X3;
++      U256 guest_X4;
++      U256 guest_X5;
++      U256 guest_X6;
++      U256 guest_X7;
++      U256 guest_X8;
++      U256 guest_X9;
++      U256 guest_X10;
++      U256 guest_X11;
++      U256 guest_X12;
++      U256 guest_X13;
++      U256 guest_X14;
++      U256 guest_X15;
++      U256 guest_X16;
++      U256 guest_X17;
++      U256 guest_X18;
++      U256 guest_X19;
++      U256 guest_X20;
++      U256 guest_X21;
++      U256 guest_X22;
++      U256 guest_X23;
++      U256 guest_X24;
++      U256 guest_X25;
++      U256 guest_X26;
++      U256 guest_X27;
++      U256 guest_X28;
++      U256 guest_X29;
++      U256 guest_X30;
++      U256 guest_X31;
++
++      /* VexGuestLOONGARCH64State should have a 16-aligned size */
++} VexGuestLOONGARCH64State;
++
++/*---------------------------------------------------------------*/
++/*--- Utility functions for LOONGARCH64 guest stuff.          ---*/
++/*---------------------------------------------------------------*/
++
++/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT. */
++
++/* Initialise all guest LOONGARCH64 state. */
++
++extern
++void LibVEX_GuestLOONGARCH64_initialise ( /*OUT*/
++                                          VexGuestLOONGARCH64State* vex_state );
++
++#endif /* ndef __LIBVEX_PUB_GUEST_LOONGARCH64_H */
++
++/*---------------------------------------------------------------*/
++/*---                              libvex_guest_loongarch64.h ---*/
++/*---------------------------------------------------------------*/
+diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
+index 85805bb69..afe8dfd29 100644
+--- a/VEX/pub/libvex_ir.h
++++ b/VEX/pub/libvex_ir.h
+@@ -588,10 +588,10 @@ typedef
+ 
+       /* Binary operations, with rounding. */
+       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 
+-      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
++      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, Iop_ScaleBF64,
+ 
+       /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ 
+-      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
++      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, Iop_ScaleBF32,
+ 
+       /* Variants of the above which produce a 64-bit result but which
+          round their result to a IEEE float range first. */
+@@ -610,10 +610,10 @@ typedef
+ 
+       /* Unary operations, with rounding. */
+       /* :: IRRoundingMode(I32) x F64 -> F64 */
+-      Iop_SqrtF64,
++      Iop_SqrtF64, Iop_RSqrtF64, Iop_LogBF64,
+ 
+       /* :: IRRoundingMode(I32) x F32 -> F32 */
+-      Iop_SqrtF32,
++      Iop_SqrtF32, Iop_RSqrtF32, Iop_LogBF32,
+ 
+       /* :: IRRoundingMode(I32) x F16 -> F16 */
+       Iop_SqrtF16,
+@@ -829,10 +829,14 @@ typedef
+ 
+       /* --------- Possibly required by IEEE 754-2008. --------- */
+ 
+-      Iop_MaxNumF64,  /* max, F64, numerical operand if other is a qNaN */
+-      Iop_MinNumF64,  /* min, F64, ditto */
+-      Iop_MaxNumF32,  /* max, F32, ditto */
+-      Iop_MinNumF32,  /* min, F32, ditto */
++      Iop_MaxNumF64,    /* max, F64, numerical operand if other is a qNaN */
++      Iop_MinNumF64,    /* min, F64, ditto */
++      Iop_MaxNumAbsF64, /* max abs, F64, ditto */
++      Iop_MinNumAbsF64, /* min abs, F64, ditto */
++      Iop_MaxNumF32,    /* max, F32, ditto */
++      Iop_MinNumF32,    /* min, F32, ditto */
++      Iop_MaxNumAbsF32, /* max abs, F32, ditto */
++      Iop_MinNumAbsF32, /* min abs, F32, ditto */
+ 
+       /* ------------------ 16-bit scalar FP ------------------ */
+ 
+@@ -2503,6 +2507,7 @@ typedef
+       Ijk_SigFPE,         /* current instruction synths generic SIGFPE */
+       Ijk_SigFPE_IntDiv,  /* current instruction synths SIGFPE - IntDiv */
+       Ijk_SigFPE_IntOvf,  /* current instruction synths SIGFPE - IntOvf */
++      Ijk_SigSYS,         /* current instruction synths SIGSYS */
+       /* Unfortunately, various guest-dependent syscall kinds.  They
+ 	 all mean: do a syscall before continuing. */
+       Ijk_Sys_syscall,    /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */
+@@ -2662,7 +2667,12 @@ typedef
+       /* Needed only on ARM.  It cancels a reservation made by a
+          preceding Linked-Load, and needs to be handed through to the
+          back end, just as LL and SC themselves are. */
+-      Imbe_CancelReservation
++      Imbe_CancelReservation,
++      /* Needed only on LOONGARCH64.  It completes the synchronization
++         between the store operation and the instruction fetch operation
++         within a single processor core, and needs to be handed through
++         to the back end. */
++      Imbe_InsnFence
+    }
+    IRMBusEvent;
+ 
+diff --git a/VEX/pub/libvex_trc_values.h b/VEX/pub/libvex_trc_values.h
+index cfd54ded3..90e2b60af 100644
+--- a/VEX/pub/libvex_trc_values.h
++++ b/VEX/pub/libvex_trc_values.h
+@@ -57,6 +57,7 @@
+                                       continuing */
+ #define VEX_TRC_JMP_SIGBUS     93  /* deliver SIGBUS before continuing */
+ #define VEX_TRC_JMP_SIGFPE    105  /* deliver SIGFPE before continuing */
++#define VEX_TRC_JMP_SIGSYS    115  /* deliver SIGSYS before continuing */
+ 
+ #define VEX_TRC_JMP_SIGFPE_INTDIV     97  /* deliver SIGFPE (integer divide
+                                              by zero) before continuing */
+diff --git a/cachegrind/cg_arch.c b/cachegrind/cg_arch.c
+index 52e898218..1d906464e 100644
+--- a/cachegrind/cg_arch.c
++++ b/cachegrind/cg_arch.c
+@@ -475,6 +475,13 @@ configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
+    *D1c = (cache_t) {  65536, 2, 64 };
+    *LLc = (cache_t) { 262144, 8, 64 };
+ 
++#elif defined(VGA_loongarch64)
++
++   // Set caches to default (for LOONGARCH64 - 3A5000)
++   *I1c = (cache_t) {    65536,  4, 64 };
++   *D1c = (cache_t) {    65536,  4, 64 };
++   *LLc = (cache_t) {   262144, 16, 64 };
++
+ #else
+ 
+ #error "Unknown arch"
+diff --git a/cachegrind/cg_branchpred.c b/cachegrind/cg_branchpred.c
+index ba433ec2c..0d91b29cd 100644
+--- a/cachegrind/cg_branchpred.c
++++ b/cachegrind/cg_branchpred.c
+@@ -44,7 +44,7 @@
+    guaranteed to be zero? */
+ #if defined(VGA_ppc32) || defined(VGA_ppc64be)  || defined(VGA_ppc64le) \
+     || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \
+-    || defined(VGA_arm64)
++    || defined(VGA_arm64) || defined(VGA_loongarch64)
+ #  define N_IADDR_LO_ZERO_BITS 2
+ #elif defined(VGA_x86) || defined(VGA_amd64)
+ #  define N_IADDR_LO_ZERO_BITS 0
+diff --git a/configure.ac b/configure.ac
+index 79b17f394..7cb42252a 100755
+--- a/configure.ac
++++ b/configure.ac
+@@ -310,6 +310,11 @@ case "${host_cpu}" in
+         ARCH_MAX="nanomips"
+         ;;
+ 
++     loongarch64*)
++        AC_MSG_RESULT([ok (${host_cpu})])
++        ARCH_MAX="loongarch64"
++        ;;
++
+      *) 
+ 	AC_MSG_RESULT([no (${host_cpu})])
+ 	AC_MSG_ERROR([Unsupported host architecture. Sorry])
+@@ -941,6 +946,17 @@ case "$ARCH_MAX-$VGCONF_OS" in
+         valt_load_address_sec_inner="0xUNSET"
+         AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})])
+         ;;
++     loongarch64-linux)
++        VGCONF_ARCH_PRI="loongarch64"
++        VGCONF_ARCH_SEC=""
++        VGCONF_PLATFORM_PRI_CAPS="LOONGARCH64_LINUX"
++        VGCONF_PLATFORM_SEC_CAPS=""
++        valt_load_address_pri_norml="0x58000000"
++        valt_load_address_pri_inner="0x38000000"
++        valt_load_address_sec_norml="0xUNSET"
++        valt_load_address_sec_inner="0xUNSET"
++        AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})])
++        ;;
+      x86-solaris)
+         VGCONF_ARCH_PRI="x86"
+         VGCONF_ARCH_SEC=""
+@@ -1033,6 +1049,8 @@ AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS64,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX ) 
+ AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_NANOMIPS,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX )
++AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_LOONGARCH64,
++               test x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX )
+ 
+ # Set up VGCONF_PLATFORMS_INCLUDE_<platform>.  Either one or two of these
+ # become defined.
+@@ -1063,6 +1081,8 @@ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS64_LINUX,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX)
+ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_NANOMIPS_LINUX,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX)
++AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_LOONGARCH64_LINUX,
++               test x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX)
+ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_X86_FREEBSD,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \
+                  -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_FREEBSD)
+@@ -1094,7 +1114,8 @@ AM_CONDITIONAL(VGCONF_OS_IS_LINUX,
+                  -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
+                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \
+                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \
+-                 -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX)
++                 -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX \
++                 -o x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX)
+ AM_CONDITIONAL(VGCONF_OS_IS_FREEBSD,
+                test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \
+                  -o x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_FREEBSD)
+@@ -4908,7 +4929,8 @@ elif test x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_LINUX \
+        -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX \
+        -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \
+        -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \
+-       -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ; then
++       -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
++       -o x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX; then
+   mflag_primary=$FLAG_M64
+ elif test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN ; then
+   mflag_primary="$FLAG_M32 -arch i386"
+@@ -5398,6 +5420,7 @@ AC_CONFIG_FILES([
+    memcheck/tests/amd64-linux/Makefile
+    memcheck/tests/arm64-linux/Makefile
+    memcheck/tests/x86-linux/Makefile
++   memcheck/tests/loongarch64-linux/Makefile
+    memcheck/tests/amd64-solaris/Makefile
+    memcheck/tests/x86-solaris/Makefile
+    memcheck/tests/amd64-freebsd/Makefile
+@@ -5443,6 +5466,7 @@ AC_CONFIG_FILES([
+    none/tests/mips32/Makefile
+    none/tests/mips64/Makefile
+    none/tests/nanomips/Makefile
++   none/tests/loongarch64/Makefile
+    none/tests/linux/Makefile
+    none/tests/darwin/Makefile
+    none/tests/solaris/Makefile
+diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
+index 80115f21f..8703f9fa0 100644
+--- a/coregrind/Makefile.am
++++ b/coregrind/Makefile.am
+@@ -387,6 +387,7 @@ COREGRIND_SOURCES_COMMON = \
+ 	m_dispatch/dispatch-mips32-linux.S \
+ 	m_dispatch/dispatch-mips64-linux.S \
+ 	m_dispatch/dispatch-nanomips-linux.S \
++	m_dispatch/dispatch-loongarch64-linux.S \
+ 	m_dispatch/dispatch-x86-freebsd.S \
+ 	m_dispatch/dispatch-amd64-freebsd.S \
+ 	m_dispatch/dispatch-x86-darwin.S \
+@@ -411,6 +412,7 @@ COREGRIND_SOURCES_COMMON = \
+ 	m_gdbserver/valgrind-low-mips32.c \
+ 	m_gdbserver/valgrind-low-mips64.c \
+ 	m_gdbserver/valgrind-low-nanomips.c \
++	m_gdbserver/valgrind-low-loongarch64.c \
+ 	m_gdbserver/version.c \
+ 	m_initimg/initimg-linux.c \
+ 	m_initimg/initimg-freebsd.c \
+@@ -438,6 +440,7 @@ COREGRIND_SOURCES_COMMON = \
+ 	m_sigframe/sigframe-mips32-linux.c \
+ 	m_sigframe/sigframe-mips64-linux.c \
+ 	m_sigframe/sigframe-nanomips-linux.c \
++	m_sigframe/sigframe-loongarch64-linux.c \
+ 	m_sigframe/sigframe-x86-darwin.c \
+ 	m_sigframe/sigframe-amd64-darwin.c \
+ 	m_sigframe/sigframe-solaris.c \
+@@ -452,6 +455,7 @@ COREGRIND_SOURCES_COMMON = \
+ 	m_syswrap/syscall-mips32-linux.S \
+ 	m_syswrap/syscall-mips64-linux.S \
+ 	m_syswrap/syscall-nanomips-linux.S \
++	m_syswrap/syscall-loongarch64-linux.S \
+ 	m_syswrap/syscall-x86-freebsd.S \
+ 	m_syswrap/syscall-amd64-freebsd.S \
+ 	m_syswrap/syscall-x86-darwin.S \
+@@ -477,6 +481,7 @@ COREGRIND_SOURCES_COMMON = \
+ 	m_syswrap/syswrap-mips32-linux.c \
+ 	m_syswrap/syswrap-mips64-linux.c \
+ 	m_syswrap/syswrap-nanomips-linux.c \
++	m_syswrap/syswrap-loongarch64-linux.c \
+ 	m_syswrap/syswrap-x86-darwin.c \
+ 	m_syswrap/syswrap-amd64-darwin.c \
+ 	m_syswrap/syswrap-xen.c \
+@@ -761,7 +766,15 @@ GDBSERVER_XML_FILES = \
+ 	m_gdbserver/mips64-linux-valgrind.xml \
+ 	m_gdbserver/mips64-fpu-valgrind-s1.xml \
+ 	m_gdbserver/mips64-fpu-valgrind-s2.xml \
+-	m_gdbserver/mips64-fpu.xml
++	m_gdbserver/mips64-fpu.xml \
++	m_gdbserver/loongarch-base64.xml \
++	m_gdbserver/loongarch-fpu64.xml \
++	m_gdbserver/loongarch64-linux.xml \
++	m_gdbserver/loongarch-base64-valgrind-s1.xml \
++	m_gdbserver/loongarch-base64-valgrind-s2.xml \
++	m_gdbserver/loongarch-fpu64-valgrind-s1.xml \
++	m_gdbserver/loongarch-fpu64-valgrind-s2.xml \
++	m_gdbserver/loongarch64-linux-valgrind.xml
+ 
+ # so as to make sure these get copied into the install tree
+ vglibdir = $(pkglibexecdir)
+diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c
+index bc95e3c11..5307fd13d 100644
+--- a/coregrind/launcher-linux.c
++++ b/coregrind/launcher-linux.c
+@@ -67,6 +67,10 @@
+ #define EM_NANOMIPS 249
+ #endif
+ 
++#ifndef EM_LOONGARCH
++#define EM_LOONGARCH 258
++#endif
++
+ #ifndef E_MIPS_ABI_O32
+ #define E_MIPS_ABI_O32 0x00001000
+ #endif
+@@ -314,6 +318,10 @@ static const char *select_platform(const char *clientname)
+                 (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+                  header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+                platform = "ppc64le-linux";
++            } else if (header.ehdr64.e_machine == EM_LOONGARCH &&
++                (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV ||
++                 header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
++               platform = "loongarch64-linux";
+             }
+          } else if (header.c[EI_DATA] == ELFDATA2MSB) {
+ #           if !defined(VGPV_arm_linux_android) \
+@@ -415,7 +423,8 @@ int main(int argc, char** argv, char** envp)
+        (0==strcmp(VG_PLATFORM,"s390x-linux"))  ||
+        (0==strcmp(VG_PLATFORM,"mips32-linux")) ||
+        (0==strcmp(VG_PLATFORM,"mips64-linux")) ||
+-       (0==strcmp(VG_PLATFORM,"nanomips-linux")))
++       (0==strcmp(VG_PLATFORM,"nanomips-linux")) ||
++       (0==strcmp(VG_PLATFORM,"loongarch64-linux")))
+       default_platform = VG_PLATFORM;
+ #  elif defined(VGO_solaris)
+    if ((0==strcmp(VG_PLATFORM,"x86-solaris")) ||
+diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c b/coregrind/m_aspacemgr/aspacemgr-common.c
+index 816d2274f..0e79d3f3c 100644
+--- a/coregrind/m_aspacemgr/aspacemgr-common.c
++++ b/coregrind/m_aspacemgr/aspacemgr-common.c
+@@ -157,7 +157,8 @@ SysRes VG_(am_do_mmap_NO_NOTIFY)( Addr start, SizeT length, UInt prot,
+ #  elif defined(VGP_amd64_linux) \
+         || defined(VGP_ppc64be_linux)  || defined(VGP_ppc64le_linux) \
+         || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
+-        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
++        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
++        || defined(VGP_loongarch64_linux)
+    res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, 
+                          prot, flags, fd, offset);
+ #  elif defined(VGP_x86_darwin)
+@@ -262,7 +263,8 @@ SysRes ML_(am_do_relocate_nooverlap_mapping_NO_NOTIFY)(
+ 
+ SysRes ML_(am_open) ( const HChar* pathname, Int flags, Int mode )
+ {
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    /* ARM64 wants to use __NR_openat rather than __NR_open. */
+    SysRes res = VG_(do_syscall4)(__NR_openat,
+                                  VKI_AT_FDCWD, (UWord)pathname, flags, mode);
+@@ -291,7 +293,8 @@ void ML_(am_close) ( Int fd )
+ Int ML_(am_readlink)(const HChar* path, HChar* buf, UInt bufsiz)
+ {
+    SysRes res;
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                                            (UWord)path, (UWord)buf, bufsiz);
+ #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+diff --git a/coregrind/m_aspacemgr/aspacemgr-linux.c b/coregrind/m_aspacemgr/aspacemgr-linux.c
+index ae38d8bd0..021c5a267 100644
+--- a/coregrind/m_aspacemgr/aspacemgr-linux.c
++++ b/coregrind/m_aspacemgr/aspacemgr-linux.c
+@@ -2773,7 +2773,8 @@ static SysRes VG_(am_mmap_file_float_valgrind_flags) ( SizeT length, UInt prot,
+    req.rkind = MAny;
+    req.start = 0;
+    #if defined(VGA_arm) || defined(VGA_arm64) \
+-      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
++      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \
++      || defined(VGA_loongarch64)
+    aspacem_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
+    #else
+    aspacem_assert(VKI_SHMLBA == VKI_PAGE_SIZE);
+diff --git a/coregrind/m_cache.c b/coregrind/m_cache.c
+index 428a4df43..38fa44ea1 100644
+--- a/coregrind/m_cache.c
++++ b/coregrind/m_cache.c
+@@ -660,6 +660,239 @@ get_cache_info(VexArchInfo *vai)
+    return True;
+ }
+ 
++#elif defined(VGA_loongarch64)
++
++/*
++ * LoongArch method is straightforward, just extract appropriate bits via
++ * cpucfg instruction (__builtin_loongarch_cpucfg).
++ *
++ * 1. Get the properties of the cache from cpucfg16.
++ * 2. For each level of cache, get the properties from cpucfg17/18/19/20.
++ *
++ * It's a bit nasty since we have to get the total number of caches first.
++ * To avoid duplicating reads, I use "struct cache_status" to store some
++ * necessary information.
++ */
++
++#define BIT(x)        (1UL << (x))
++#define GENMASK(h, l) (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (64 - 1 - (h))))
++
++#define LOONGARCH_CPUCFG16  0x10
++#define CPUCFG16_L1_IUPRE   BIT(0)
++#define CPUCFG16_L1_IUUNIFY BIT(1)
++#define CPUCFG16_L1_DPRE    BIT(2)
++#define CPUCFG16_L2_IUPRE   BIT(3)
++#define CPUCFG16_L2_IUUNIFY BIT(4)
++#define CPUCFG16_L2_DPRE    BIT(7)
++#define CPUCFG16_L3_IUPRE   BIT(10)
++#define CPUCFG16_L3_IUUNIFY BIT(11)
++#define CPUCFG16_L3_DPRE    BIT(14)
++
++#define LOONGARCH_CPUCFG17  0x11
++#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0)
++#define CPUCFG17_L1I_SETS_M GENMASK(23, 16)
++#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24)
++#define CPUCFG17_L1I_WAYS   0
++#define CPUCFG17_L1I_SETS   16
++#define CPUCFG17_L1I_SIZE   24
++
++#define LOONGARCH_CPUCFG18  0x12
++#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0)
++#define CPUCFG18_L1D_SETS_M GENMASK(23, 16)
++#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24)
++#define CPUCFG18_L1D_WAYS   0
++#define CPUCFG18_L1D_SETS   16
++#define CPUCFG18_L1D_SIZE   24
++
++#define LOONGARCH_CPUCFG19  0x13
++#define CPUCFG19_L2_WAYS_M  GENMASK(15, 0)
++#define CPUCFG19_L2_SETS_M  GENMASK(23, 16)
++#define CPUCFG19_L2_SIZE_M  GENMASK(30, 24)
++#define CPUCFG19_L2_WAYS    0
++#define CPUCFG19_L2_SETS    16
++#define CPUCFG19_L2_SIZE    24
++
++#define LOONGARCH_CPUCFG20  0x14
++#define CPUCFG20_L3_WAYS_M  GENMASK(15, 0)
++#define CPUCFG20_L3_SETS_M  GENMASK(23, 16)
++#define CPUCFG20_L3_SIZE_M  GENMASK(30, 24)
++#define CPUCFG20_L3_WAYS    0
++#define CPUCFG20_L3_SETS    16
++#define CPUCFG20_L3_SIZE    24
++
++struct cache_status {
++   Bool has_iu;
++   Bool is_u;
++   Bool has_d;
++   Bool exist;
++   UInt num;
++};
++
++static inline UInt
++cpucfg(UInt reg)
++{
++   return (UInt)__builtin_loongarch_cpucfg(reg);
++}
++
++static void
++get_status(struct cache_status status[], UInt n)
++{
++   Bool has_iu = status[n].has_iu;
++   Bool is_u   = status[n].is_u;
++   Bool has_d  = status[n].has_d;
++
++   /* has_d only works with no ucache */
++   status[n].has_d = has_d = toBool(!(has_iu && is_u) && has_d);
++
++   status[n].exist = toBool(has_iu || has_d);
++   status[n].num   = has_iu + has_d;
++}
++
++static void
++get_cache(VexCacheInfo *ci, VexCacheKind kind, UInt level,
++          UInt line_size, UInt sets, UInt ways, UInt index)
++{
++   UInt assoc = ways;
++   UInt size = sets * ways * line_size;
++   ci->caches[index] = VEX_CACHE_INIT(kind, level, size, line_size, assoc);
++}
++
++static void
++get_cache_info_for_l1(VexCacheInfo *ci, struct cache_status status[])
++{
++   UInt config;
++   UInt line_size, sets, ways;
++   UInt index = 0;
++
++   if (!status[0].exist)
++      return;
++
++   if (status[0].has_iu) {
++      config    = cpucfg(LOONGARCH_CPUCFG17);
++      line_size = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE);
++      sets      = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS);
++      ways      = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1;
++      get_cache(ci, status[0].is_u ? UNIFIED_CACHE : INSN_CACHE,
++                1, line_size, sets, ways, index++);
++   }
++
++   if (status[0].has_d) {
++      config    = cpucfg(LOONGARCH_CPUCFG18);
++      line_size = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE);
++      sets      = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS);
++      ways      = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1;
++      get_cache(ci, DATA_CACHE, 1, line_size, sets, ways, index++);
++   }
++
++   /* Sanity check */
++   vg_assert(index == status[0].num);
++}
++
++static void
++get_cache_info_for_l2(VexCacheInfo *ci, struct cache_status status[])
++{
++   UInt config;
++   UInt line_size, sets, ways;
++   UInt index = status[0].num;
++
++   if (!status[1].exist)
++      return;
++
++   config    = cpucfg(LOONGARCH_CPUCFG19);
++   line_size = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE);
++   sets      = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS);
++   ways      = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1;
++
++   if (status[1].has_iu)
++      get_cache(ci, status[1].is_u ? UNIFIED_CACHE : INSN_CACHE,
++                2, line_size, sets, ways, index++);
++
++   if (status[1].has_d)
++      get_cache(ci, DATA_CACHE, 2, line_size, sets, ways, index++);
++
++   /* Sanity check */
++   vg_assert(index == status[0].num + status[1].num);
++}
++
++static void
++get_cache_info_for_l3(VexCacheInfo *ci, struct cache_status status[])
++{
++   UInt config;
++   UInt line_size, sets, ways;
++   UInt index = status[0].num + status[1].num;
++
++   if (!status[2].exist)
++      return;
++
++   config    = cpucfg(LOONGARCH_CPUCFG20);
++   line_size = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE);
++   sets      = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS);
++   ways      = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1;
++
++   if (status[2].has_iu)
++      get_cache(ci, status[2].is_u ? UNIFIED_CACHE : INSN_CACHE,
++                3, line_size, sets, ways, index++);
++
++   if (status[2].has_d)
++      get_cache(ci, DATA_CACHE, 3, line_size, sets, ways, index++);
++
++   /* Sanity check */
++   vg_assert(index == status[0].num + status[1].num + status[2].num);
++}
++
++static Bool
++get_cache_info_from_cpucfg(VexCacheInfo *ci)
++{
++   Int i;
++   struct cache_status status[3];
++   UInt config = cpucfg(LOONGARCH_CPUCFG16);
++
++   /* NB: Bool is unsigned char! */
++   /* For l1 */
++   status[0].has_iu = toBool(config & CPUCFG16_L1_IUPRE);
++   status[0].is_u   = toBool(config & CPUCFG16_L1_IUUNIFY);
++   status[0].has_d  = toBool(config & CPUCFG16_L1_DPRE);
++   get_status(status, 0);
++
++   /* For l2 */
++   status[1].has_iu = toBool(config & CPUCFG16_L2_IUPRE);
++   status[1].is_u   = toBool(config & CPUCFG16_L2_IUUNIFY);
++   status[1].has_d  = toBool(config & CPUCFG16_L2_DPRE);
++   get_status(status, 1);
++
++   /* For l3 */
++   status[2].has_iu = toBool(config & CPUCFG16_L3_IUPRE);
++   status[2].is_u   = toBool(config & CPUCFG16_L3_IUUNIFY);
++   status[2].has_d  = toBool(config & CPUCFG16_L3_DPRE);
++   get_status(status, 2);
++
++   ci->num_levels = 0;
++   ci->num_caches = 0;
++   for (i = 0; i < 3; i++) {
++      ci->num_levels += status[i].exist;
++      ci->num_caches += status[i].num;
++   }
++
++   if (ci->num_caches == 0) {
++      VG_(debugLog)(1, "cache", "Autodetect failed\n");
++      return False;
++   }
++
++   ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof(VexCache));
++   get_cache_info_for_l1(ci, status);
++   get_cache_info_for_l2(ci, status);
++   get_cache_info_for_l3(ci, status);
++   return True;
++}
++
++static Bool
++get_cache_info(VexArchInfo *vai)
++{
++   VexCacheInfo *ci = &vai->hwcache_info;
++   ci->icaches_maintain_coherence = True;
++   return get_cache_info_from_cpucfg(ci);
++}
++
+ #else
+ 
+ #error "Unknown arch"
+diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
+index 4a8c29c52..82b1b436a 100644
+--- a/coregrind/m_coredump/coredump-elf.c
++++ b/coregrind/m_coredump/coredump-elf.c
+@@ -489,6 +489,40 @@ static void fill_prstatus(const ThreadState *tst,
+    regs[VKI_MIPS32_EF_CP0_STATUS] = arch->vex.guest_CP0_status;
+    regs[VKI_MIPS32_EF_CP0_EPC]    = arch->vex.guest_PC;
+ #  undef DO
++#elif defined(VGP_loongarch64_linux)
++   regs->regs[0]  = arch->vex.guest_R0;
++   regs->regs[1]  = arch->vex.guest_R1;
++   regs->regs[2]  = arch->vex.guest_R2;
++   regs->regs[3]  = arch->vex.guest_R3;
++   regs->regs[4]  = arch->vex.guest_R4;
++   regs->regs[5]  = arch->vex.guest_R5;
++   regs->regs[6]  = arch->vex.guest_R6;
++   regs->regs[7]  = arch->vex.guest_R7;
++   regs->regs[8]  = arch->vex.guest_R8;
++   regs->regs[9]  = arch->vex.guest_R9;
++   regs->regs[10] = arch->vex.guest_R10;
++   regs->regs[11] = arch->vex.guest_R11;
++   regs->regs[12] = arch->vex.guest_R12;
++   regs->regs[13] = arch->vex.guest_R13;
++   regs->regs[14] = arch->vex.guest_R14;
++   regs->regs[15] = arch->vex.guest_R15;
++   regs->regs[16] = arch->vex.guest_R16;
++   regs->regs[17] = arch->vex.guest_R17;
++   regs->regs[18] = arch->vex.guest_R18;
++   regs->regs[19] = arch->vex.guest_R19;
++   regs->regs[20] = arch->vex.guest_R20;
++   regs->regs[21] = arch->vex.guest_R21;
++   regs->regs[22] = arch->vex.guest_R22;
++   regs->regs[23] = arch->vex.guest_R23;
++   regs->regs[24] = arch->vex.guest_R24;
++   regs->regs[25] = arch->vex.guest_R25;
++   regs->regs[26] = arch->vex.guest_R26;
++   regs->regs[27] = arch->vex.guest_R27;
++   regs->regs[28] = arch->vex.guest_R28;
++   regs->regs[29] = arch->vex.guest_R29;
++   regs->regs[30] = arch->vex.guest_R30;
++   regs->regs[31] = arch->vex.guest_R31;
++   regs->csr_era  = arch->vex.guest_PC;
+ #elif defined(VGP_amd64_freebsd)
+    regs->rflags = LibVEX_GuestAMD64_get_rflags( &arch->vex );
+    regs->rsp    = arch->vex.guest_RSP;
+@@ -654,6 +688,14 @@ static void fill_fpu(const ThreadState *tst, vki_elf_fpregset_t *fpu)
+ #  undef DO
+ #elif defined(VGP_nanomips_linux)
+ 
++#elif defined(VGP_loongarch64_linux)
++#  define DO(n)  (*fpu)[n] = *(const double*)(&arch->vex.guest_X##n)
++   DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
++   DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
++   DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
++   DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
++#  undef DO
++
+ #elif defined(VGP_x86_freebsd)
+ 
+ #elif defined(VGP_amd64_freebsd)
+diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c
+index e9e8944af..4cd99cb0c 100644
+--- a/coregrind/m_debuginfo/d3basics.c
++++ b/coregrind/m_debuginfo/d3basics.c
+@@ -555,6 +555,9 @@ static Bool get_Dwarf_Reg( /*OUT*/Addr* a, Word regno, const RegSummary* regs )
+ #  elif defined(VGP_arm64_linux)
+    if (regno == 31) { *a = regs->sp; return True; }
+    if (regno == 29) { *a = regs->fp; return True; }
++#  elif defined(VGP_loongarch64_linux)
++   if (regno ==  3) { *a = regs->sp; return True; }
++   if (regno == 22) { *a = regs->fp; return True; }
+ #  else
+ #    error "Unknown platform"
+ #  endif
+diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
+index 2d2accc99..a7ff4a57e 100644
+--- a/coregrind/m_debuginfo/debuginfo.c
++++ b/coregrind/m_debuginfo/debuginfo.c
+@@ -1262,7 +1262,7 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd )
+    is_ro_map = False;
+ 
+ #  if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \
+-      || defined(VGA_mips64) || defined(VGA_nanomips)
++      || defined(VGA_mips64) || defined(VGA_nanomips) || defined(VGA_loongarch64)
+    is_rx_map = seg->hasR && seg->hasX;
+    is_rw_map = seg->hasR && seg->hasW;
+ #  elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le)  \
+@@ -2998,6 +2998,11 @@ UWord evalCfiExpr ( const XArray* exprs, Int ix,
+             case Creg_ARM64_SP: return eec->uregs->sp;
+             case Creg_ARM64_X30: return eec->uregs->x30;
+             case Creg_ARM64_X29: return eec->uregs->x29;
++#           elif defined(VGA_loongarch64)
++            case Creg_LOONGARCH64_PC: return eec->uregs->pc;
++            case Creg_LOONGARCH64_RA: return eec->uregs->ra;
++            case Creg_LOONGARCH64_SP: return eec->uregs->sp;
++            case Creg_LOONGARCH64_FP: return eec->uregs->fp;
+ #           else
+ #             error "Unsupported arch"
+ #           endif
+@@ -3269,6 +3274,13 @@ static Addr compute_cfa ( const D3UnwindRegs* uregs,
+       case CFIC_ARM64_X29REL: 
+          cfa = cfsi_m->cfa_off + uregs->x29;
+          break;
++#     elif defined(VGA_loongarch64)
++      case CFIC_IA_SPREL:
++         cfa = cfsi_m->cfa_off + uregs->sp;
++         break;
++      case CFIC_IA_BPREL:
++         cfa = cfsi_m->cfa_off + uregs->fp;
++         break;
+ #     else
+ #       error "Unsupported arch"
+ #     endif
+@@ -3340,6 +3352,14 @@ Addr ML_(get_CFA) ( Addr ip, Addr sp, Addr fp,
+      return compute_cfa(&uregs,
+                         min_accessible,  max_accessible, ce->di, ce->cfsi_m);
+    }
++#elif defined(VGA_loongarch64)
++   { D3UnwindRegs uregs;
++     uregs.pc = ip;
++     uregs.sp = sp;
++     uregs.fp = fp;
++     return compute_cfa(&uregs,
++                        min_accessible,  max_accessible, ce->di, ce->cfsi_m);
++   }
+ 
+ #  else
+    return 0; /* indicates failure */
+@@ -3391,6 +3411,8 @@ void VG_(ppUnwindInfo) (Addr from, Addr to)
+    For arm64, the unwound registers are: X29(FP) X30(LR) SP PC.
+ 
+    For s390, the unwound registers are: R11(FP) R14(LR) R15(SP) F0..F7 PC.
++
++   For loongarch64, the unwound registers are: FP SP PC
+ */
+ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
+                         Addr min_accessible,
+@@ -3414,6 +3436,8 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
+ #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
+ #  elif defined(VGP_arm64_linux)
+    ipHere = uregsHere->pc;
++#  elif defined(VGA_loongarch64)
++   ipHere = uregsHere->pc;
+ #  else
+ #    error "Unknown arch"
+ #  endif
+@@ -3559,6 +3583,10 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
+    COMPUTE(uregsPrev.sp,  uregsHere->sp,  cfsi_m->sp_how,  cfsi_m->sp_off);
+    COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi_m->x30_how, cfsi_m->x30_off);
+    COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi_m->x29_how, cfsi_m->x29_off);
++#  elif defined(VGA_loongarch64)
++   COMPUTE(uregsPrev.pc, uregsHere->ra, cfsi_m->ra_how, cfsi_m->ra_off);
++   COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
++   COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
+ #  else
+ #    error "Unknown arch"
+ #  endif
+diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
+index a4b90d36b..8188064d4 100644
+--- a/coregrind/m_debuginfo/priv_storage.h
++++ b/coregrind/m_debuginfo/priv_storage.h
+@@ -367,6 +367,19 @@ typedef
+       Int   fp_off;
+    }
+    DiCfSI_m;
++#elif defined(VGA_loongarch64)
++typedef
++   struct {
++      UChar cfa_how;  /* a CFIC_ value */
++      UChar ra_how;   /* a CFIR_ value */
++      UChar sp_how;   /* a CFIR_ value */
++      UChar fp_how;   /* a CFIR_ value */
++      Int   cfa_off;
++      Int   ra_off;
++      Int   sp_off;
++      Int   fp_off;
++   }
++   DiCfSI_m;
+ #else
+ #  error "Unknown arch"
+ #endif
+@@ -422,7 +435,11 @@ typedef
+       Creg_S390_SP,
+       Creg_S390_FP,
+       Creg_S390_LR,
+-      Creg_MIPS_RA
++      Creg_MIPS_RA,
++      Creg_LOONGARCH64_PC,
++      Creg_LOONGARCH64_RA,
++      Creg_LOONGARCH64_SP,
++      Creg_LOONGARCH64_FP
+    }
+    CfiReg;
+ 
+diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c
+index 79d6764ea..2636cc2cc 100644
+--- a/coregrind/m_debuginfo/readdwarf.c
++++ b/coregrind/m_debuginfo/readdwarf.c
+@@ -2066,6 +2066,10 @@ void ML_(read_debuginfo_dwarf1) (
+ #  define FP_REG         30
+ #  define SP_REG         29
+ #  define RA_REG_DEFAULT 31
++#elif defined(VGP_loongarch64_linux)
++#  define FP_REG         22
++#  define SP_REG         3
++#  define RA_REG_DEFAULT 1
+ #else
+ #  error "Unknown platform"
+ #endif
+@@ -2084,6 +2088,8 @@ void ML_(read_debuginfo_dwarf1) (
+ # define N_CFI_REGS 128
+ #elif defined(VGP_s390x_linux)
+ # define N_CFI_REGS 66
++#elif defined(VGP_loongarch64_linux)
++# define N_CFI_REGS 32
+ #else
+ # define N_CFI_REGS 20
+ #endif
+@@ -2310,6 +2316,10 @@ static void initUnwindContext ( /*OUT*/UnwindContext* ctx )
+          start out as RR_Same. */
+       ctx->state[j].reg[29/*FP*/].tag = RR_Same;
+       ctx->state[j].reg[30/*LR*/].tag = RR_Same;
++#     elif defined(VGA_loongarch64)
++      /* Registers fp and ra start out implicitly as RR_Same. */
++      ctx->state[j].reg[FP_REG].tag = RR_Same;
++      ctx->state[j].reg[RA_REG_DEFAULT].tag = RR_Same;
+ #     endif
+    }
+ }
+@@ -2392,7 +2402,8 @@ static Bool summarise_context(/*OUT*/Addr* base,
+    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == SP_REG) {
+       si_m->cfa_off = ctxs->cfa_off;
+ #     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
+-         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64)
++         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) \
++         || defined(VGA_loongarch64)
+       si_m->cfa_how = CFIC_IA_SPREL;
+ #     elif defined(VGA_arm)
+       si_m->cfa_how = CFIC_ARM_R13REL;
+@@ -2406,7 +2417,8 @@ static Bool summarise_context(/*OUT*/Addr* base,
+    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == FP_REG) {
+       si_m->cfa_off = ctxs->cfa_off;
+ #     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
+-         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64)
++         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) \
++         || defined(VGA_loongarch64)
+       si_m->cfa_how = CFIC_IA_BPREL;
+ #     elif defined(VGA_arm)
+       si_m->cfa_how = CFIC_ARM_R12REL;
+@@ -2786,6 +2798,30 @@ static Bool summarise_context(/*OUT*/Addr* base,
+ #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
+    /* These don't use CFI based unwinding (is that really true?) */
+ 
++#  elif defined(VGA_loongarch64)
++
++   /* --- entire tail of this fn specialised for loongarch64 --- */
++
++   SUMMARISE_HOW(si_m->ra_how, si_m->ra_off, ctxs->reg[ctx->ra_reg]);
++   SUMMARISE_HOW(si_m->fp_how, si_m->fp_off, ctxs->reg[FP_REG]);
++
++   /* on loongarch64, it seems the old sp value before the call is always
++      the same as the CFA.  Therefore ... */
++   si_m->sp_how = CFIR_CFAREL;
++   si_m->sp_off = 0;
++
++   /* bogus looking range?  Note, we require that the difference is
++      representable in 32 bits. */
++   if (loc_start >= ctx->loc)
++      { why = 4; goto failed; }
++   if (ctx->loc - loc_start > 10000000 /* let's say */)
++      { why = 5; goto failed; }
++
++   *base = loc_start + ctx->initloc;
++   *len  = (UInt)(ctx->loc - loc_start);
++
++   return True;
++
+ #  else
+ #    error "Unknown arch"
+ #  endif
+@@ -2885,6 +2921,13 @@ static Int copy_convert_CfiExpr_tree ( XArray*        dstxa,
+             return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_X30 );
+ #        elif defined(VGA_ppc32) || defined(VGA_ppc64be) \
+             || defined(VGA_ppc64le)
++#        elif defined(VGA_loongarch64)
++         if (dwreg == SP_REG)
++            return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_SP );
++         if (dwreg == FP_REG)
++            return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_FP );
++         if (dwreg == srcuc->ra_reg)
++            return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_RA );
+ #        else
+ #           error "Unknown arch"
+ #        endif
+diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
+index ce7b7998d..bf7feffb5 100644
+--- a/coregrind/m_debuginfo/readelf.c
++++ b/coregrind/m_debuginfo/readelf.c
+@@ -1780,7 +1780,8 @@ static HChar* readlink_path (const HChar *path)
+ 
+    while (tries > 0) {
+       SysRes res;
+-#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++    || defined(VGP_loongarch64_linux)
+       res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                                               (UWord)path, (UWord)buf, bufsiz);
+ #elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+@@ -2653,6 +2654,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di )
+          || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \
+          || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+          || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++         || defined(VGP_loongarch64_linux) \
+          || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
+          || defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd)
+       /* Accept .plt where mapped as rx (code) */
+diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
+index c3fa62e96..bef564808 100644
+--- a/coregrind/m_debuginfo/storage.c
++++ b/coregrind/m_debuginfo/storage.c
+@@ -260,6 +260,11 @@ void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
+    SHOW_HOW(si_m->x30_how, si_m->x30_off);
+    VG_(printf)(" X29=");
+    SHOW_HOW(si_m->x29_how, si_m->x29_off);
++#  elif defined(VGP_loongarch64_linux)
++   VG_(printf)(" SP=");
++   SHOW_HOW(si_m->sp_how, si_m->sp_off);
++   VG_(printf)(" FP=");
++   SHOW_HOW(si_m->fp_how, si_m->fp_off);
+ #  else
+ #    error "Unknown arch"
+ #  endif
+@@ -1010,6 +1015,10 @@ static void ppCfiReg ( CfiReg reg )
+       case Creg_S390_SP:   VG_(printf)("SP"); break;
+       case Creg_S390_FP:   VG_(printf)("FP"); break;
+       case Creg_S390_LR:   VG_(printf)("LR"); break;
++      case Creg_LOONGARCH64_PC: VG_(printf)("PC"); break;
++      case Creg_LOONGARCH64_RA: VG_(printf)("RA"); break;
++      case Creg_LOONGARCH64_SP: VG_(printf)("SP"); break;
++      case Creg_LOONGARCH64_FP: VG_(printf)("FP"); break;
+       default: vg_assert(0);
+    }
+ }
+diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c
+index 355c3caf5..1d4f08984 100644
+--- a/coregrind/m_debuglog.c
++++ b/coregrind/m_debuglog.c
+@@ -601,6 +601,41 @@ static UInt local_sys_getpid ( void )
+    return a0;
+ }
+ 
++#elif defined(VGP_loongarch64_linux)
++
++static UInt local_sys_write_stderr ( const HChar* buf, Int n )
++{
++   ULong ret;
++   __asm__ volatile (
++      "li.w    $a0, 2  \n\t" // stderr
++      "move    $a1, %1 \n\t"
++      "move    $a2, %2 \n\t"
++      "li.w    $a7, " VG_STRINGIFY(__NR_write) " \n\t"
++      "syscall 0       \n\t"
++      "move    %0, $a0 \n\t"
++      : "=r" (ret)
++      : "r" (buf), "r" (n)
++      : "memory", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7",
++        "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"
++   );
++   return ret >= 0 ? (UInt)ret : -1;
++}
++
++static UInt local_sys_getpid ( void )
++{
++   ULong ret;
++   __asm__ volatile (
++      "li.w    $a7, " VG_STRINGIFY(__NR_getpid) " \n\t"
++      "syscall 0       \n\t"
++      "move    %0, $a0 \n\t"
++      : "=r" (ret)
++      :
++      : "memory", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7",
++        "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"
++   );
++   return (UInt)ret;
++}
++
+ #elif defined(VGP_x86_solaris)
+ static UInt local_sys_write_stderr ( const HChar* buf, Int n )
+ {
+diff --git a/coregrind/m_dispatch/dispatch-loongarch64-linux.S b/coregrind/m_dispatch/dispatch-loongarch64-linux.S
+new file mode 100644
+index 000000000..dec165294
+--- /dev/null
++++ b/coregrind/m_dispatch/dispatch-loongarch64-linux.S
+@@ -0,0 +1,314 @@
++
++/*--------------------------------------------------------------------*/
++/*--- The core dispatch loop, for jumping to a code address.       ---*/
++/*---                                 dispatch-loongarch64-linux.S ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++  This file is part of Valgrind, a dynamic binary instrumentation
++  framework.
++
++  Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++  This program is free software; you can redistribute it and/or
++  modify it under the terms of the GNU General Public License as
++  published by the Free Software Foundation; either version 2 of the
++  License, or (at your option) any later version.
++
++  This program is distributed in the hope that it will be useful, but
++  WITHOUT ANY WARRANTY; without even the implied warranty of
++  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++  General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++  The GNU General Public License is contained in the file COPYING.
++*/
++
++#include "pub_core_basics_asm.h"
++
++#if defined(VGP_loongarch64_linux)
++
++#include "pub_core_dispatch_asm.h"
++#include "pub_core_transtab_asm.h"
++#include "libvex_guest_offsets.h"	/* for OFFSET_loongarch64_* */
++
++
++/*------------------------------------------------------------*/
++/*---                                                      ---*/
++/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
++/*--- used to run all translations,                        ---*/
++/*--- including no-redir ones.                             ---*/
++/*---                                                      ---*/
++/*------------------------------------------------------------*/
++
++/*----------------------------------------------------*/
++/*--- Entry and preamble (set everything up)       ---*/
++/*----------------------------------------------------*/
++
++/* signature:
++void VG_(disp_run_translations)( UWord* two_words,
++                                 void*  guest_state,
++                                 Addr   host_addr );
++*/
++
++.text
++.globl VG_(disp_run_translations)
++VG_(disp_run_translations):
++   /* a0 holds two_words   */
++   /* a1 holds guest_state */
++   /* a2 holds host_addr   */
++
++   /* New stack frame.  Stack must remain 16 aligned (at least) */
++   addi.d   $sp, $sp, -96
++
++   /* Save ra */
++   st.d     $ra, $sp, 0
++
++   /* .. and s0 - s8 */
++   st.d     $s0, $sp, 8
++   st.d     $s1, $sp, 16
++   st.d     $s2, $sp, 24
++   st.d     $s3, $sp, 32
++   st.d     $s4, $sp, 40
++   st.d     $s5, $sp, 48
++   st.d     $s6, $sp, 56
++   st.d     $s7, $sp, 64
++   st.d     $s8, $sp, 72
++
++   /* ... and fp */
++   st.d     $fp, $sp, 80
++
++   /* and a0. In postamble it will be restored such that the
++      return values can be written */
++   st.d     $a0, $sp, 88
++
++   /* Load address of guest state into s8 */
++   move     $s8, $a1
++
++   /* and jump into the code cache.  Chained translations in
++      the code cache run, until for whatever reason, they can't
++      continue.  When that happens, the translation in question
++      will jump (or call) to one of the continuation points
++      VG_(cp_...) below. */
++   ibar     0              /* Insn sync barrier */
++   jr       $a2
++   /*NOTREACHED*/
++
++/*----------------------------------------------------*/
++/*--- Postamble and exit.                          ---*/
++/*----------------------------------------------------*/
++
++postamble:
++   /* At this point, a0 and a1 contain two
++      words to be returned to the caller.  a0
++      holds a TRC value, and a1 optionally may
++      hold another word (for CHAIN_ME exits, the
++      address of the place to patch.) */
++
++   /* Restore a0 from stack to t0; holds address of two_words */
++   ld.d     $t0, $sp, 88
++   st.d     $a0, $t0, 0    /* Store a0 to two_words[0] */
++   st.d     $a1, $t0, 8    /* Store a1 to two_words[1] */
++
++   /* Restore ra */
++   ld.d     $ra, $sp, 0
++
++   /* ... and s0 - s8 */
++   ld.d     $s0, $sp, 8
++   ld.d     $s1, $sp, 16
++   ld.d     $s2, $sp, 24
++   ld.d     $s3, $sp, 32
++   ld.d     $s4, $sp, 40
++   ld.d     $s5, $sp, 48
++   ld.d     $s6, $sp, 56
++   ld.d     $s7, $sp, 64
++   ld.d     $s8, $sp, 72
++
++   /* ... and fp */
++   ld.d     $fp, $sp, 80
++
++   addi.d   $sp, $sp, 96   /* Restore sp */
++   jr       $ra
++   /*NOTREACHED*/
++
++/*----------------------------------------------------*/
++/*--- Continuation points                          ---*/
++/*----------------------------------------------------*/
++
++/* ------ Chain me to slow entry point ------ */
++.globl VG_(disp_cp_chain_me_to_slowEP)
++VG_(disp_cp_chain_me_to_slowEP):
++   /* We got called.  The return address indicates
++      where the patching needs to happen.  Collect
++      the return address and, exit back to C land,
++      handing the caller the pair (Chain_me_S, RA) */
++   li.w     $a0, VG_TRC_CHAIN_ME_TO_SLOW_EP
++   move     $a1, $ra
++   /* 4 * 4 = mkLoadImm_EXACTLY4
++          4 = jirl $ra, $t0, 0 */
++   addi.d   $a1, $a1, -20
++   b        postamble
++   /*NOTREACHED*/
++
++/* ------ Chain me to fast entry point ------ */
++.globl VG_(disp_cp_chain_me_to_fastEP)
++VG_(disp_cp_chain_me_to_fastEP):
++   /* We got called.  The return address indicates
++      where the patching needs to happen.  Collect
++      the return address and, exit back to C land,
++      handing the caller the pair (Chain_me_S, RA) */
++   li.w     $a0, VG_TRC_CHAIN_ME_TO_FAST_EP
++   move     $a1, $ra
++   /* 4 * 4 = mkLoadImm_EXACTLY4
++      4     = jirl $ra, $t0, 0 */
++   addi.d   $a1, $a1, -20
++   b        postamble
++   /*NOTREACHED*/
++
++/* ------ Indirect but boring jump ------ */
++.globl VG_(disp_cp_xindir)
++VG_(disp_cp_xindir):
++   /* Where are we going? */
++   ld.d     $t0, $s8, OFFSET_loongarch64_PC
++
++   /* Stats only */
++   la.local $t4, VG_(stats__n_xIndirs_32)
++   ld.d     $t1, $t4, 0
++   addi.d   $t1, $t1, 1
++   st.w     $t1, $t4, 0
++
++   /* LIVE: s8 (guest state ptr), t0 (guest address to go to).
++      We use 6 temporaries:
++         t6 (to point at the relevant FastCacheSet),
++         t1, t2, t3 (scratch, for swapping entries within a set)
++         t4, t5 (other scratch)
++    */
++
++   /* Try a fast lookup in the translation cache.  This is pretty much
++      a handcoded version of VG_(lookupInFastCache). */
++
++   // Compute t6 = VG_TT_FAST_HASH(guest)
++   srli.d   $t6, $t0, 2                      // g2 = guest >> 2
++   srli.d   $t5, $t0, (VG_TT_FAST_BITS + 2)  // (g2 >> VG_TT_FAST_BITS)
++   xor      $t6, $t6, $t5                    // (g2 >> VG_TT_FAST_BITS) ^ g2
++   li.w     $t5, VG_TT_FAST_MASK
++   and      $t6, $t6, $t5                    // setNo
++
++   // Compute t6 = &VG_(tt_fast)[t6]
++   la.local $t5, VG_(tt_fast)
++   slli.d   $t6, $t6, VG_FAST_CACHE_SET_BITS
++   add.d    $t6, $t6, $t5
++
++   /* LIVE: s8 (guest state ptr), t0 (guest addr), t6 (cache set) */
++0: // try way 0
++   ld.d     $t4, $t6, FCS_g0   // .guest0
++   ld.d     $t5, $t6, FCS_h0   // .host0
++   bne      $t4, $t0, 1f       // cmp against .guest0
++   // hit at way 0
++   // goto .host0
++   jr       $t5
++   /*NOTREACHED*/
++
++1: // try way 1
++   ld.d     $t4, $t6, FCS_g1
++   bne      $t4, $t0, 2f       // cmp against .guest1
++   // hit at way 1; swap upwards
++   ld.d     $t1, $t6, FCS_g0   // $t1 = old .guest0
++   ld.d     $t2, $t6, FCS_h0   // $t2 = old .host0
++   ld.d     $t3, $t6, FCS_h1   // $t3 = old .host1
++   st.d     $t0, $t6, FCS_g0   // new .guest0 = guest
++   st.d     $t3, $t6, FCS_h0   // new .host0 = old .host1
++   st.d     $t1, $t6, FCS_g1   // new .guest1 = old .guest0
++   st.d     $t2, $t6, FCS_h1   // new .host1 = old .host0
++
++   // stats only
++   la.local $t4, VG_(stats__n_xIndir_hits1_32)
++   ld.d     $t5, $t4, 0
++   addi.d   $t5, $t5, 1
++   st.w     $t5, $t4, 0
++   // goto old .host1 a.k.a. new .host0
++   jr       $t3
++   /*NOTREACHED*/
++
++2: // try way 2
++   ld.d     $t4, $t6, FCS_g2
++   bne      $t4, $t0, 3f       // cmp against .guest2
++   // hit at way 2; swap upwards
++   ld.d     $t1, $t6, FCS_g1
++   ld.d     $t2, $t6, FCS_h1
++   ld.d     $t3, $t6, FCS_h2
++   st.d     $t0, $t6, FCS_g1
++   st.d     $t3, $t6, FCS_h1
++   st.d     $t1, $t6, FCS_g2
++   st.d     $t2, $t6, FCS_h2
++
++   // stats only
++   la.local $t4, VG_(stats__n_xIndir_hits2_32)
++   ld.d     $t5, $t4, 0
++   addi.d   $t5, $t5, 1
++   st.w     $t5, $t4, 0
++   // goto old .host2 a.k.a. new .host1
++   jr       $t3
++   /*NOTREACHED*/
++
++3: // try way 3
++   ld.d     $t4, $t6, FCS_g3
++   bne      $t4, $t0, 4f       // cmp against .guest3
++   // hit at way 3; swap upwards
++   ld.d     $t1, $t6, FCS_g2
++   ld.d     $t2, $t6, FCS_h2
++   ld.d     $t3, $t6, FCS_h3
++   st.d     $t0, $t6, FCS_g2
++   st.d     $t3, $t6, FCS_h2
++   st.d     $t1, $t6, FCS_g3
++   st.d     $t2, $t6, FCS_h3
++
++   // stats only
++   la.local $t4, VG_(stats__n_xIndir_hits3_32)
++   ld.d     $t5, $t4, 0
++   addi.d   $t5, $t5, 1
++   st.w     $t5, $t4, 0
++   // goto old .host3 a.k.a. new .host2
++   jr       $t3
++   /*NOTREACHED*/
++
++4: // fast lookup failed:
++   /* stats only */
++   la.local $t4, VG_(stats__n_xIndir_misses_32)
++   ld.d     $t5, $t4, 0
++   addi.d   $t5, $t5, 1
++   st.w     $t5, $t4, 0
++
++   li.w     $a0, VG_TRC_INNER_FASTMISS
++   move     $a1, $zero
++   b        postamble
++   /*NOTREACHED*/
++
++/* ------ Assisted jump ------ */
++.globl VG_(disp_cp_xassisted)
++VG_(disp_cp_xassisted):
++   /* guest-state-pointer contains the TRC. Put the value into the
++      return register */
++   move     $a0, $s8
++   move     $a1, $zero
++   b        postamble
++
++/* ------ Event check failed ------ */
++.globl VG_(disp_cp_evcheck_fail)
++VG_(disp_cp_evcheck_fail):
++   li.w     $a0, VG_TRC_INNER_COUNTERZERO
++   move     $a1, $zero
++   b        postamble
++
++.size VG_(disp_run_translations), .-VG_(disp_run_translations)
++
++#endif // defined(VGP_loongarch64_linux)
++
++/* Let the linker know we don't need an executable stack */
++MARK_STACK_NO_EXEC
++
++/*--------------------------------------------------------------------*/
++/*--- end                             dispatch-loongarch64-linux.S ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml b/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml
+new file mode 100644
+index 000000000..cab700cca
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml
+@@ -0,0 +1,45 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.base">
++  <reg name="r0s1" bitsize="64" type="uint64" group="general" regnum="0"/>
++  <reg name="r1s1" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="r2s1" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r3s1" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r4s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r5s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r6s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r7s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r8s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r9s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r10s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r11s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r12s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r13s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r14s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r15s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r16s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r17s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r18s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r19s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r20s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r21s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r22s1" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r23s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r24s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r25s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r26s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r27s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r28s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r29s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r30s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="r31s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="orig_a0s1" bitsize="64" type="uint64" group="general"/>
++  <reg name="pcs1" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="badvs1" bitsize="64" type="code_ptr" group="general"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml b/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml
+new file mode 100644
+index 000000000..cbacbbbbe
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml
+@@ -0,0 +1,45 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.base">
++  <reg name="r0s2" bitsize="64" type="uint64" group="general" regnum="0"/>
++  <reg name="r1s2" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="r2s2" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r3s2" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r4s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r5s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r6s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r7s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r8s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r9s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r10s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r11s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r12s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r13s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r14s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r15s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r16s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r17s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r18s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r19s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r20s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r21s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r22s2" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r23s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r24s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r25s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r26s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r27s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r28s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r29s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r30s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="r31s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="orig_a0s2" bitsize="64" type="uint64" group="general"/>
++  <reg name="pcs2" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="badvs2" bitsize="64" type="code_ptr" group="general"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch-base64.xml b/coregrind/m_gdbserver/loongarch-base64.xml
+new file mode 100644
+index 000000000..fadca8b9e
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-base64.xml
+@@ -0,0 +1,45 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.base">
++  <reg name="r0" bitsize="64" type="uint64" group="general" regnum="0"/>
++  <reg name="r1" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="r2" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r3" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r4" bitsize="64" type="uint64" group="general"/>
++  <reg name="r5" bitsize="64" type="uint64" group="general"/>
++  <reg name="r6" bitsize="64" type="uint64" group="general"/>
++  <reg name="r7" bitsize="64" type="uint64" group="general"/>
++  <reg name="r8" bitsize="64" type="uint64" group="general"/>
++  <reg name="r9" bitsize="64" type="uint64" group="general"/>
++  <reg name="r10" bitsize="64" type="uint64" group="general"/>
++  <reg name="r11" bitsize="64" type="uint64" group="general"/>
++  <reg name="r12" bitsize="64" type="uint64" group="general"/>
++  <reg name="r13" bitsize="64" type="uint64" group="general"/>
++  <reg name="r14" bitsize="64" type="uint64" group="general"/>
++  <reg name="r15" bitsize="64" type="uint64" group="general"/>
++  <reg name="r16" bitsize="64" type="uint64" group="general"/>
++  <reg name="r17" bitsize="64" type="uint64" group="general"/>
++  <reg name="r18" bitsize="64" type="uint64" group="general"/>
++  <reg name="r19" bitsize="64" type="uint64" group="general"/>
++  <reg name="r20" bitsize="64" type="uint64" group="general"/>
++  <reg name="r21" bitsize="64" type="uint64" group="general"/>
++  <reg name="r22" bitsize="64" type="data_ptr" group="general"/>
++  <reg name="r23" bitsize="64" type="uint64" group="general"/>
++  <reg name="r24" bitsize="64" type="uint64" group="general"/>
++  <reg name="r25" bitsize="64" type="uint64" group="general"/>
++  <reg name="r26" bitsize="64" type="uint64" group="general"/>
++  <reg name="r27" bitsize="64" type="uint64" group="general"/>
++  <reg name="r28" bitsize="64" type="uint64" group="general"/>
++  <reg name="r29" bitsize="64" type="uint64" group="general"/>
++  <reg name="r30" bitsize="64" type="uint64" group="general"/>
++  <reg name="r31" bitsize="64" type="uint64" group="general"/>
++  <reg name="orig_a0" bitsize="64" type="uint64" group="general"/>
++  <reg name="pc" bitsize="64" type="code_ptr" group="general"/>
++  <reg name="badv" bitsize="64" type="code_ptr" group="general"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml
+new file mode 100644
+index 000000000..b5c7cab50
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml
+@@ -0,0 +1,57 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.fpu.valgrind.s1">
++
++  <union id="fpu64type">
++    <field name="f" type="ieee_single"/>
++    <field name="d" type="ieee_double"/>
++  </union>
++
++  <reg name="f0s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f1s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f2s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f3s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f4s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f5s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f6s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f7s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f8s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f9s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f10s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f11s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f12s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f13s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f14s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f15s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f16s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f17s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f18s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f19s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f20s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f21s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f22s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f23s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f24s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f25s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f26s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f27s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f28s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f29s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f30s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f31s1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="fcc0s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc1s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc2s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc3s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc4s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc5s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc6s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc7s1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcsrs1" bitsize="32" type="uint32" group="float"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml
+new file mode 100644
+index 000000000..501660ebb
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml
+@@ -0,0 +1,57 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.fpu.valgrind.s2">
++
++  <union id="fpu64type">
++    <field name="f" type="ieee_single"/>
++    <field name="d" type="ieee_double"/>
++  </union>
++
++  <reg name="f0s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f1s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f2s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f3s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f4s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f5s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f6s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f7s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f8s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f9s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f10s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f11s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f12s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f13s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f14s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f15s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f16s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f17s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f18s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f19s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f20s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f21s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f22s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f23s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f24s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f25s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f26s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f27s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f28s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f29s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f30s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f31s2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="fcc0s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc1s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc2s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc3s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc4s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc5s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc6s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc7s2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcsrs2" bitsize="32" type="uint32" group="float"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch-fpu64.xml b/coregrind/m_gdbserver/loongarch-fpu64.xml
+new file mode 100644
+index 000000000..74ab55a01
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch-fpu64.xml
+@@ -0,0 +1,57 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<feature name="org.gnu.gdb.loongarch.fpu">
++
++  <union id="fpu64type">
++    <field name="f" type="ieee_single"/>
++    <field name="d" type="ieee_double"/>
++  </union>
++
++  <reg name="f0" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f1" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f2" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f3" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f4" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f5" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f6" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f7" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f8" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f9" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f10" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f11" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f12" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f13" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f14" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f15" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f16" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f17" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f18" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f19" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f20" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f21" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f22" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f23" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f24" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f25" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f26" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f27" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f28" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f29" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f30" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="f31" bitsize="64" type="fpu64type" group="float"/>
++  <reg name="fcc0" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc1" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc2" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc3" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc4" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc5" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc6" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcc7" bitsize="8" type="uint8" group="float"/>
++  <reg name="fcsr" bitsize="32" type="uint32" group="float"/>
++</feature>
+diff --git a/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml b/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml
+new file mode 100644
+index 000000000..8915a72a9
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml
+@@ -0,0 +1,18 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<target>
++  <architecture>loongarch</architecture>
++  <osabi>GNU/Linux</osabi>
++  <xi:include href="loongarch-base64.xml"/>
++  <xi:include href="loongarch-fpu64.xml"/>
++  <xi:include href="loongarch-base64-valgrind-s1.xml"/>
++  <xi:include href="loongarch-fpu64-valgrind-s1.xml"/>
++  <xi:include href="loongarch-base64-valgrind-s2.xml"/>
++  <xi:include href="loongarch-fpu64-valgrind-s2.xml"/>
++</target>
+diff --git a/coregrind/m_gdbserver/loongarch64-linux.xml b/coregrind/m_gdbserver/loongarch64-linux.xml
+new file mode 100644
+index 000000000..f1eed8338
+--- /dev/null
++++ b/coregrind/m_gdbserver/loongarch64-linux.xml
+@@ -0,0 +1,14 @@
++<?xml version="1.0"?>
++<!-- Copyright (C) 2007-2018 Free Software Foundation, Inc.
++
++     Copying and distribution of this file, with or without modification,
++     are permitted in any medium without royalty provided the copyright
++     notice and this notice are preserved.  -->
++
++<!DOCTYPE feature SYSTEM "gdb-target.dtd">
++<target>
++  <architecture>loongarch</architecture>
++  <osabi>GNU/Linux</osabi>
++  <xi:include href="loongarch-base64.xml"/>
++  <xi:include href="loongarch-fpu64.xml"/>
++</target>
+diff --git a/coregrind/m_gdbserver/target.c b/coregrind/m_gdbserver/target.c
+index 490276b6c..5e0a8ad24 100644
+--- a/coregrind/m_gdbserver/target.c
++++ b/coregrind/m_gdbserver/target.c
+@@ -867,6 +867,8 @@ void valgrind_initialize_target(void)
+    mips64_init_architecture(&the_low_target);
+ #elif defined(VGA_nanomips)
+    nanomips_init_architecture(&the_low_target);
++#elif defined(VGA_loongarch64)
++   loongarch64_init_architecture(&the_low_target);
+ #else
+    #error "architecture missing in target.c valgrind_initialize_target"
+ #endif
+diff --git a/coregrind/m_gdbserver/valgrind-low-loongarch64.c b/coregrind/m_gdbserver/valgrind-low-loongarch64.c
+new file mode 100644
+index 000000000..a606baf63
+--- /dev/null
++++ b/coregrind/m_gdbserver/valgrind-low-loongarch64.c
+@@ -0,0 +1,272 @@
++/* Low level interface to valgrind, for the remote server for GDB integrated
++   in valgrind.
++   Copyright (C) 2021
++   Free Software Foundation, Inc.
++
++   This file is part of VALGRIND.
++   It has been inspired from files from gdbserver in gdb 13.
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 2 of the License, or
++   (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 51 Franklin Street, Fifth Floor,
++   Boston, MA 02110-1301, USA.  */
++
++#include "server.h"
++#include "target.h"
++#include "regdef.h"
++#include "regcache.h"
++
++#include "pub_core_machine.h"
++#include "pub_core_debuginfo.h"
++#include "pub_core_threadstate.h"
++#include "pub_core_transtab.h"
++#include "pub_core_gdbserver.h"
++
++#include "valgrind_low.h"
++
++#include "libvex_guest_loongarch64.h"
++
++static struct reg regs[] = {
++   { "r0",      0,    64 },
++   { "r1",      64,   64 },
++   { "r2",      128,  64 },
++   { "r3",      192,  64 },
++   { "r4",      256,  64 },
++   { "r5",      320,  64 },
++   { "r6",      384,  64 },
++   { "r7",      448,  64 },
++   { "r8",      512,  64 },
++   { "r9",      576,  64 },
++   { "r10",     640,  64 },
++   { "r11",     704,  64 },
++   { "r12",     768,  64 },
++   { "r13",     832,  64 },
++   { "r14",     896,  64 },
++   { "r15",     960,  64 },
++   { "r16",     1024, 64 },
++   { "r17",     1088, 64 },
++   { "r18",     1152, 64 },
++   { "r19",     1216, 64 },
++   { "r20",     1280, 64 },
++   { "r21",     1344, 64 },
++   { "r22",     1408, 64 },
++   { "r23",     1472, 64 },
++   { "r24",     1536, 64 },
++   { "r25",     1600, 64 },
++   { "r26",     1664, 64 },
++   { "r27",     1728, 64 },
++   { "r28",     1792, 64 },
++   { "r29",     1856, 64 },
++   { "r30",     1920, 64 },
++   { "r31",     1984, 64 },
++   { "orig_a0", 2048, 64 },
++   { "pc",      2112, 64 },
++   { "badv",    2176, 64 },
++   { "f0",      2240, 64 },
++   { "f1",      2304, 64 },
++   { "f2",      2368, 64 },
++   { "f3",      2432, 64 },
++   { "f4",      2496, 64 },
++   { "f5",      2560, 64 },
++   { "f6",      2624, 64 },
++   { "f7",      2688, 64 },
++   { "f8",      2752, 64 },
++   { "f9",      2816, 64 },
++   { "f10",     2880, 64 },
++   { "f11",     2944, 64 },
++   { "f12",     3008, 64 },
++   { "f13",     3072, 64 },
++   { "f14",     3136, 64 },
++   { "f15",     3200, 64 },
++   { "f16",     3264, 64 },
++   { "f17",     3328, 64 },
++   { "f18",     3392, 64 },
++   { "f19",     3456, 64 },
++   { "f20",     3520, 64 },
++   { "f21",     3584, 64 },
++   { "f22",     3648, 64 },
++   { "f23",     3712, 64 },
++   { "f24",     3776, 64 },
++   { "f25",     3840, 64 },
++   { "f26",     3904, 64 },
++   { "f27",     3968, 64 },
++   { "f28",     4032, 64 },
++   { "f29",     4096, 64 },
++   { "f30",     4160, 64 },
++   { "f31",     4224, 64 },
++   { "fcc0",    4288, 8  },
++   { "fcc1",    4296, 8  },
++   { "fcc2",    4304, 8  },
++   { "fcc3",    4312, 8  },
++   { "fcc4",    4320, 8  },
++   { "fcc5",    4328, 8  },
++   { "fcc6",    4336, 8  },
++   { "fcc7",    4344, 8  },
++   { "fcsr",    4352, 32 }
++};
++
++#define num_regs (sizeof (regs) / sizeof (regs[0]))
++
++static const char* expedite_regs[] = { "r3", "pc", NULL };
++
++static
++CORE_ADDR get_pc (void)
++{
++   unsigned long pc;
++
++   collect_register_by_name ("pc", &pc);
++
++   dlog(1, "stop pc is %p\n", (void*) pc);
++   return pc;
++}
++
++static
++void set_pc (CORE_ADDR newpc)
++{
++   supply_register_by_name ("pc", &newpc);
++}
++
++/* store registers in the guest state (gdbserver_to_valgrind)
++   or fetch register from the guest state (valgrind_to_gdbserver). */
++static
++void transfer_register (ThreadId tid, int abs_regno, void* buf,
++                        transfer_direction dir, int size, Bool* mod)
++{
++   ThreadState* tst = VG_(get_ThreadState)(tid);
++   int set = abs_regno / num_regs;
++   int regno = abs_regno % num_regs;
++   *mod = False;
++
++   VexGuestLOONGARCH64State* loongarch64 = (VexGuestLOONGARCH64State*) get_arch (set, tst);
++
++   switch (regno) {
++   // numbers here have to match the order of regs above
++   // Attention: gdb order does not match valgrind order.
++   case 0:  VG_(transfer) (&loongarch64->guest_R0,   buf, dir, size, mod); break;
++   case 1:  VG_(transfer) (&loongarch64->guest_R1,   buf, dir, size, mod); break;
++   case 2:  VG_(transfer) (&loongarch64->guest_R2,   buf, dir, size, mod); break;
++   case 3:  VG_(transfer) (&loongarch64->guest_R3,   buf, dir, size, mod); break;
++   case 4:  VG_(transfer) (&loongarch64->guest_R4,   buf, dir, size, mod); break;
++   case 5:  VG_(transfer) (&loongarch64->guest_R5,   buf, dir, size, mod); break;
++   case 6:  VG_(transfer) (&loongarch64->guest_R6,   buf, dir, size, mod); break;
++   case 7:  VG_(transfer) (&loongarch64->guest_R7,   buf, dir, size, mod); break;
++   case 8:  VG_(transfer) (&loongarch64->guest_R8,   buf, dir, size, mod); break;
++   case 9:  VG_(transfer) (&loongarch64->guest_R9,   buf, dir, size, mod); break;
++   case 10: VG_(transfer) (&loongarch64->guest_R10,  buf, dir, size, mod); break;
++   case 11: VG_(transfer) (&loongarch64->guest_R11,  buf, dir, size, mod); break;
++   case 12: VG_(transfer) (&loongarch64->guest_R12,  buf, dir, size, mod); break;
++   case 13: VG_(transfer) (&loongarch64->guest_R13,  buf, dir, size, mod); break;
++   case 14: VG_(transfer) (&loongarch64->guest_R14,  buf, dir, size, mod); break;
++   case 15: VG_(transfer) (&loongarch64->guest_R15,  buf, dir, size, mod); break;
++   case 16: VG_(transfer) (&loongarch64->guest_R16,  buf, dir, size, mod); break;
++   case 17: VG_(transfer) (&loongarch64->guest_R17,  buf, dir, size, mod); break;
++   case 18: VG_(transfer) (&loongarch64->guest_R18,  buf, dir, size, mod); break;
++   case 19: VG_(transfer) (&loongarch64->guest_R19,  buf, dir, size, mod); break;
++   case 20: VG_(transfer) (&loongarch64->guest_R20,  buf, dir, size, mod); break;
++   case 21: VG_(transfer) (&loongarch64->guest_R21,  buf, dir, size, mod); break;
++   case 22: VG_(transfer) (&loongarch64->guest_R22,  buf, dir, size, mod); break;
++   case 23: VG_(transfer) (&loongarch64->guest_R23,  buf, dir, size, mod); break;
++   case 24: VG_(transfer) (&loongarch64->guest_R24,  buf, dir, size, mod); break;
++   case 25: VG_(transfer) (&loongarch64->guest_R25,  buf, dir, size, mod); break;
++   case 26: VG_(transfer) (&loongarch64->guest_R26,  buf, dir, size, mod); break;
++   case 27: VG_(transfer) (&loongarch64->guest_R27,  buf, dir, size, mod); break;
++   case 28: VG_(transfer) (&loongarch64->guest_R28,  buf, dir, size, mod); break;
++   case 29: VG_(transfer) (&loongarch64->guest_R29,  buf, dir, size, mod); break;
++   case 30: VG_(transfer) (&loongarch64->guest_R30,  buf, dir, size, mod); break;
++   case 31: VG_(transfer) (&loongarch64->guest_R31,  buf, dir, size, mod); break;
++   case 32: *mod = False; break; // GDBTD?? arg0
++   case 33: VG_(transfer) (&loongarch64->guest_PC,   buf, dir, size, mod); break;
++   case 34: *mod = False; break; // GDBTD?? badvaddr
++   case 35: VG_(transfer) (&loongarch64->guest_X0,   buf, dir, size, mod); break;
++   case 36: VG_(transfer) (&loongarch64->guest_X1,   buf, dir, size, mod); break;
++   case 37: VG_(transfer) (&loongarch64->guest_X2,   buf, dir, size, mod); break;
++   case 38: VG_(transfer) (&loongarch64->guest_X3,   buf, dir, size, mod); break;
++   case 39: VG_(transfer) (&loongarch64->guest_X4,   buf, dir, size, mod); break;
++   case 40: VG_(transfer) (&loongarch64->guest_X5,   buf, dir, size, mod); break;
++   case 41: VG_(transfer) (&loongarch64->guest_X6,   buf, dir, size, mod); break;
++   case 42: VG_(transfer) (&loongarch64->guest_X7,   buf, dir, size, mod); break;
++   case 43: VG_(transfer) (&loongarch64->guest_X8,   buf, dir, size, mod); break;
++   case 44: VG_(transfer) (&loongarch64->guest_X9,   buf, dir, size, mod); break;
++   case 45: VG_(transfer) (&loongarch64->guest_X10,  buf, dir, size, mod); break;
++   case 46: VG_(transfer) (&loongarch64->guest_X11,  buf, dir, size, mod); break;
++   case 47: VG_(transfer) (&loongarch64->guest_X12,  buf, dir, size, mod); break;
++   case 48: VG_(transfer) (&loongarch64->guest_X13,  buf, dir, size, mod); break;
++   case 49: VG_(transfer) (&loongarch64->guest_X14,  buf, dir, size, mod); break;
++   case 50: VG_(transfer) (&loongarch64->guest_X15,  buf, dir, size, mod); break;
++   case 51: VG_(transfer) (&loongarch64->guest_X16,  buf, dir, size, mod); break;
++   case 52: VG_(transfer) (&loongarch64->guest_X17,  buf, dir, size, mod); break;
++   case 53: VG_(transfer) (&loongarch64->guest_X18,  buf, dir, size, mod); break;
++   case 54: VG_(transfer) (&loongarch64->guest_X19,  buf, dir, size, mod); break;
++   case 55: VG_(transfer) (&loongarch64->guest_X20,  buf, dir, size, mod); break;
++   case 56: VG_(transfer) (&loongarch64->guest_X21,  buf, dir, size, mod); break;
++   case 57: VG_(transfer) (&loongarch64->guest_X22,  buf, dir, size, mod); break;
++   case 58: VG_(transfer) (&loongarch64->guest_X23,  buf, dir, size, mod); break;
++   case 59: VG_(transfer) (&loongarch64->guest_X24,  buf, dir, size, mod); break;
++   case 60: VG_(transfer) (&loongarch64->guest_X25,  buf, dir, size, mod); break;
++   case 61: VG_(transfer) (&loongarch64->guest_X26,  buf, dir, size, mod); break;
++   case 62: VG_(transfer) (&loongarch64->guest_X27,  buf, dir, size, mod); break;
++   case 63: VG_(transfer) (&loongarch64->guest_X28,  buf, dir, size, mod); break;
++   case 64: VG_(transfer) (&loongarch64->guest_X29,  buf, dir, size, mod); break;
++   case 65: VG_(transfer) (&loongarch64->guest_X30,  buf, dir, size, mod); break;
++   case 66: VG_(transfer) (&loongarch64->guest_X31,  buf, dir, size, mod); break;
++   case 67: VG_(transfer) (&loongarch64->guest_FCC0, buf, dir, size, mod); break;
++   case 68: VG_(transfer) (&loongarch64->guest_FCC1, buf, dir, size, mod); break;
++   case 69: VG_(transfer) (&loongarch64->guest_FCC2, buf, dir, size, mod); break;
++   case 70: VG_(transfer) (&loongarch64->guest_FCC3, buf, dir, size, mod); break;
++   case 71: VG_(transfer) (&loongarch64->guest_FCC4, buf, dir, size, mod); break;
++   case 72: VG_(transfer) (&loongarch64->guest_FCC5, buf, dir, size, mod); break;
++   case 73: VG_(transfer) (&loongarch64->guest_FCC6, buf, dir, size, mod); break;
++   case 74: VG_(transfer) (&loongarch64->guest_FCC7, buf, dir, size, mod); break;
++   case 75: VG_(transfer) (&loongarch64->guest_FCSR, buf, dir, size, mod); break;
++   default: vg_assert(0);
++   }
++}
++
++static
++const char* target_xml (Bool shadow_mode)
++{
++   if (shadow_mode) {
++      return "loongarch64-linux-valgrind.xml";
++   } else {
++      return "loongarch64-linux.xml";
++   }
++}
++
++static CORE_ADDR** target_get_dtv (ThreadState* tst)
++{
++   VexGuestLOONGARCH64State* loongarch64 = (VexGuestLOONGARCH64State*)&tst->arch.vex;
++   // Top of LoongArch tcbhead structure is located 0x0 bytes before the value
++   // of $r2. Dtv is the first of two pointers in tcbhead structure.
++   // More details can be found in GLIBC/sysdeps/nptl/tls.h.
++   return (CORE_ADDR**)((CORE_ADDR)loongarch64->guest_R2
++                        - 0x0 - 2 * sizeof(CORE_ADDR));
++}
++
++static struct valgrind_target_ops low_target = {
++   num_regs,
++   regs,
++   3, // SP
++   transfer_register,
++   get_pc,
++   set_pc,
++   "loongarch64",
++   target_xml,
++   target_get_dtv
++};
++
++void loongarch64_init_architecture (struct valgrind_target_ops* target)
++{
++   *target = low_target;
++   set_register_cache (regs, num_regs);
++   gdbserver_expedite_regs = expedite_regs;
++}
+diff --git a/coregrind/m_gdbserver/valgrind_low.h b/coregrind/m_gdbserver/valgrind_low.h
+index c6c0bb63b..833f3612e 100644
+--- a/coregrind/m_gdbserver/valgrind_low.h
++++ b/coregrind/m_gdbserver/valgrind_low.h
+@@ -108,5 +108,6 @@ extern void s390x_init_architecture (struct valgrind_target_ops *target);
+ extern void mips32_init_architecture (struct valgrind_target_ops *target);
+ extern void mips64_init_architecture (struct valgrind_target_ops *target);
+ extern void nanomips_init_architecture (struct valgrind_target_ops *target);
++extern void loongarch64_init_architecture (struct valgrind_target_ops *target);
+ 
+ #endif
+diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
+index 7a7d45335..237a11f09 100644
+--- a/coregrind/m_initimg/initimg-linux.c
++++ b/coregrind/m_initimg/initimg-linux.c
+@@ -913,7 +913,8 @@ Addr setup_client_stack( void*  init_sp,
+             && !defined(VGP_ppc64le_linux) \
+             && !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) \
+             && !defined(VGP_nanomips_linux) \
+-            && !defined(VGP_s390x_linux)
++            && !defined(VGP_s390x_linux) \
++            && !defined(VGP_loongarch64_linux)
+          case AT_SYSINFO_EHDR: {
+             /* Trash this, because we don't reproduce it */
+             const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr);
+@@ -1344,6 +1345,20 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
+    arch->vex.guest_PC = iifii.initial_client_IP;
+    arch->vex.guest_r31 = iifii.initial_client_SP;
+ 
++#  elif defined(VGP_loongarch64_linux)
++   vg_assert(0 == sizeof(VexGuestLOONGARCH64State) % LibVEX_GUEST_STATE_ALIGN);
++
++   /* Zero out the initial state, and set up the simulated FPU in a
++      sane way. */
++   LibVEX_GuestLOONGARCH64_initialise(&arch->vex);
++
++   /* Zero out the shadow areas. */
++   VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestLOONGARCH64State));
++   VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestLOONGARCH64State));
++
++   arch->vex.guest_R3 = iifii.initial_client_SP;
++   arch->vex.guest_PC = iifii.initial_client_IP;
++
+ #  else
+ #    error Unknown platform
+ #  endif
+diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
+index 35f37f88d..0ad514129 100644
+--- a/coregrind/m_libcassert.c
++++ b/coregrind/m_libcassert.c
+@@ -264,6 +264,26 @@
+         (srP)->misc.MIPS32.r31 = (UInt)ra;                \
+         (srP)->misc.MIPS32.r28 = (UInt)gp;                \
+       }
++#elif defined(VGP_loongarch64_linux)
++#  define GET_STARTREGS(srP)                              \
++   {                                                      \
++      ULong pc, sp, fp, ra;                               \
++      __asm__ __volatile__(                               \
++         "pcaddi %0, 0   \n\t"                            \
++         "move   %1, $sp \n\t"                            \
++         "move   %2, $fp \n\t"                            \
++         "move   %3, $ra \n\t"                            \
++         : "=r" (pc),                                     \
++           "=r" (sp),                                     \
++           "=r" (fp),                                     \
++           "=r" (ra)                                      \
++         : /* reads none */                               \
++         : /* no trashed */ );                            \
++      (srP)->r_pc = (ULong)pc;                            \
++      (srP)->r_sp = (ULong)sp;                            \
++      (srP)->misc.LOONGARCH64.r_fp = (ULong)fp;           \
++      (srP)->misc.LOONGARCH64.r_ra = (ULong)ra;           \
++   }
+ #else
+ #  error Unknown platform
+ #endif
+diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c
+index 5d3a349f2..bbbd4e7fb 100644
+--- a/coregrind/m_libcfile.c
++++ b/coregrind/m_libcfile.c
+@@ -264,7 +264,8 @@ Bool VG_(resolve_filemode) ( Int fd, Int * result )
+ 
+ SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev )
+ {
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    /* ARM64 wants to use __NR_mknodat rather than __NR_mknod. */
+    SysRes res = VG_(do_syscall4)(__NR_mknodat,
+                                  VKI_AT_FDCWD, (UWord)pathname, mode, dev);
+@@ -290,7 +291,8 @@ SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev )
+ 
+ SysRes VG_(open) ( const HChar* pathname, Int flags, Int mode )
+ {
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    /* ARM64 wants to use __NR_openat rather than __NR_open. */
+    SysRes res = VG_(do_syscall4)(__NR_openat,
+                                  VKI_AT_FDCWD, (UWord)pathname, flags, mode);
+@@ -384,7 +386,8 @@ Int VG_(pipe) ( Int fd[2] )
+    } else {
+       return -1;
+    }
+-#  elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++        || defined(VGP_loongarch64_linux)
+    SysRes res = VG_(do_syscall2)(__NR_pipe2, (UWord)fd, 0);
+    return sr_isError(res) ? -1 : 0;
+ #  elif defined(VGO_linux)
+@@ -517,12 +520,19 @@ SysRes VG_(stat) ( const HChar* file_name, struct vg_stat* vgbuf )
+    { struct vki_statx buf;
+      res = VG_(do_syscall5)(__NR_statx, VKI_AT_FDCWD, (UWord)file_name, 0,
+                             VKI_STATX_ALL, (UWord)&buf);
++#    if defined(VGP_loongarch64_linux)
++     /* On LoongArch64 Linux platform, only statx is available. */
++     if (!sr_isError(res))
++        TRANSLATE_statx_TO_vg_stat(vgbuf, &buf);
++     return res;
++#    else
+      if (!(sr_isError(res) && sr_Err(res) == VKI_ENOSYS)) {
+         /* Success, or any failure except ENOSYS */
+         if (!sr_isError(res))
+            TRANSLATE_statx_TO_vg_stat(vgbuf, &buf);
+         return res;
+      }
++#    endif
+    }
+ #  endif
+ #  if defined(VGO_linux) || defined(VGO_darwin)
+@@ -602,12 +612,19 @@ Int VG_(fstat) ( Int fd, struct vg_stat* vgbuf )
+      const char* file_name = "";
+      res = VG_(do_syscall5)(__NR_statx, fd, (RegWord)file_name,
+                             VKI_AT_EMPTY_PATH, VKI_STATX_ALL, (RegWord)&buf);
++#    if defined(VGP_loongarch64_linux)
++     /* On LoongArch64 Linux platform, only statx is available. */
++     if (!sr_isError(res))
++        TRANSLATE_statx_TO_vg_stat(vgbuf, &buf);
++     return sr_isError(res) ? (-1) : 0;
++#    else
+      if (!(sr_isError(res) && sr_Err(res) == VKI_ENOSYS)) {
+         /* Success, or any failure except ENOSYS */
+         if (!sr_isError(res))
+            TRANSLATE_statx_TO_vg_stat(vgbuf, &buf);
+         return sr_isError(res) ? (-1) : 0;
+      }
++#    endif
+    }
+ #endif
+ #  if defined(VGO_linux) || defined(VGO_darwin)
+@@ -731,7 +748,8 @@ SysRes VG_(dup) ( Int oldfd )
+ 
+ SysRes VG_(dup2) ( Int oldfd, Int newfd )
+ {
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    /* We only have dup3, that means we have to mimic dup2.
+       The only real difference is when oldfd == newfd.
+       dup3 always returns an error, but dup2 returns only an
+@@ -777,7 +795,7 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name )
+ #  if defined(VGO_solaris) || defined(VGP_arm64_linux)
+    SysRes res = VG_(do_syscall4)(__NR_renameat, VKI_AT_FDCWD, (UWord)old_name,
+                                  VKI_AT_FDCWD, (UWord)new_name);
+-#  elif defined(VGP_nanomips_linux)
++#  elif defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+    SysRes res = VG_(do_syscall5)(__NR_renameat2, VKI_AT_FDCWD, (UWord)old_name,
+                                  VKI_AT_FDCWD, (UWord)new_name, 0);
+ 
+@@ -791,7 +809,8 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name )
+ 
+ Int VG_(unlink) ( const HChar* file_name )
+ {
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    SysRes res = VG_(do_syscall2)(__NR_unlinkat, VKI_AT_FDCWD,
+                                                 (UWord)file_name);
+ #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+@@ -870,7 +889,8 @@ const HChar *VG_(get_startup_wd) ( void )
+ SysRes VG_(poll) (struct vki_pollfd *fds, Int nfds, Int timeout)
+ {
+    SysRes res;
+-#  if defined(VGP_arm64_linux)  || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux)  || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    /* ARM64 wants to use __NR_ppoll rather than __NR_poll. */
+    struct vki_timespec timeout_ts;
+    if (timeout >= 0) {
+@@ -915,7 +935,8 @@ SSizeT VG_(readlink) (const HChar* path, HChar* buf, SizeT bufsiz)
+ {
+    SysRes res;
+    /* res = readlink( path, buf, bufsiz ); */
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                                            (UWord)path, (UWord)buf, bufsiz);
+ #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+@@ -994,7 +1015,8 @@ Int VG_(access) ( const HChar* path, Bool irusr, Bool iwusr, Bool ixusr )
+    UWord w = (irusr ? VKI_R_OK : 0)
+              | (iwusr ? VKI_W_OK : 0)
+              | (ixusr ? VKI_X_OK : 0);
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    SysRes res = VG_(do_syscall3)(__NR_faccessat, VKI_AT_FDCWD, (UWord)path, w);
+ #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+    SysRes res = VG_(do_syscall2)(__NR_access, (UWord)path, w);
+@@ -1140,7 +1162,8 @@ SysRes VG_(pread) ( Int fd, void* buf, Int count, OffT offset )
+    return res;
+ #  elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \
+       || defined(VGP_ppc64be_linux)  || defined(VGP_ppc64le_linux) \
+-      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
++      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
++      || defined(VGP_loongarch64_linux)
+    res = VG_(do_syscall4)(__NR_pread64, fd, (UWord)buf, count, offset);
+    return res;
+ #  elif defined(VGP_amd64_freebsd)
+@@ -1404,7 +1427,8 @@ Int VG_(socket) ( Int domain, Int type, Int protocol )
+ 
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
++        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++        || defined(VGO_freebsd) || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall3)(__NR_socket, domain, type, protocol );
+    return sr_isError(res) ? -1 : sr_Res(res);
+@@ -1459,7 +1483,8 @@ Int my_connect ( Int sockfd, struct vki_sockaddr_in* serv_addr, Int addrlen )
+ 
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
++        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++        || defined(VGO_freebsd) || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall3)(__NR_connect, sockfd, (UWord)serv_addr, addrlen);
+    return sr_isError(res) ? -1 : sr_Res(res);
+@@ -1506,7 +1531,8 @@ Int VG_(write_socket)( Int sd, const void *msg, Int count )
+ 
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
++        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++        || defined(VGO_freebsd) || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall6)(__NR_sendto, sd, (UWord)msg, 
+                                        count, VKI_MSG_NOSIGNAL, 0,0);
+@@ -1544,7 +1570,8 @@ Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen)
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+         || defined(VGP_nanomips_linux) || defined(VGO_freebsd) \
+-        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
++        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
++        || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall3)( __NR_getsockname,
+                            (UWord)sd, (UWord)name, (UWord)namelen );
+@@ -1583,7 +1610,8 @@ Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen)
+ 
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+-        || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
++        || defined(VGP_nanomips_linux) || defined(VGO_freebsd) \
++        || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall3)( __NR_getpeername,
+                            (UWord)sd, (UWord)name, (UWord)namelen );
+@@ -1625,7 +1653,7 @@ Int VG_(getsockopt) ( Int sd, Int level, Int optname, void *optval,
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+         || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+-        || defined(VGO_freebsd)
++        || defined(VGO_freebsd) || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall5)( __NR_getsockopt,
+                            (UWord)sd, (UWord)level, (UWord)optname, 
+@@ -1669,7 +1697,8 @@ Int VG_(setsockopt) ( Int sd, Int level, Int optname, void *optval,
+ 
+ #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++        || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall5)( __NR_setsockopt,
+                            (UWord)sd, (UWord)level, (UWord)optname, 
+diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
+index 592d69bf1..61827cb7d 100644
+--- a/coregrind/m_libcproc.c
++++ b/coregrind/m_libcproc.c
+@@ -698,7 +698,8 @@ Int VG_(gettid)(void)
+        * the /proc/self link is pointing...
+        */
+ 
+-#     if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#     if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++         || defined(VGP_loongarch64_linux)
+       res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                              (UWord)"/proc/self",
+                              (UWord)pid, sizeof(pid));
+@@ -753,7 +754,8 @@ Int VG_(getpid) ( void )
+ Int VG_(getpgrp) ( void )
+ {
+    /* ASSUMES SYSCALL ALWAYS SUCCEEDS */
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    return sr_Res( VG_(do_syscall1)(__NR_getpgid, 0) );
+ #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
+    return sr_Res( VG_(do_syscall0)(__NR_getpgrp) );
+@@ -850,7 +852,7 @@ Int VG_(getgroups)( Int size, UInt* list )
+         || defined(VGO_darwin) || defined(VGP_s390x_linux)    \
+         || defined(VGP_mips32_linux) || defined(VGP_arm64_linux) \
+         || defined(VGO_solaris) || defined(VGP_nanomips_linux) \
+-        || defined(VGO_freebsd)
++        || defined(VGO_freebsd) || defined(VGP_loongarch64_linux)
+    SysRes sres;
+    sres = VG_(do_syscall2)(__NR_getgroups, size, (Addr)list);
+    if (sr_isError(sres))
+@@ -944,7 +946,8 @@ Int VG_(fork) ( void )
+       fds[0] = fds[1] = -1;
+    }
+ 
+-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+    SysRes res;
+    res = VG_(do_syscall5)(__NR_clone, VKI_SIGCHLD,
+                           (UWord)NULL, (UWord)NULL, (UWord)NULL, (UWord)NULL);
+diff --git a/coregrind/m_libcsetjmp.c b/coregrind/m_libcsetjmp.c
+index 4f1ecb150..a563f9393 100644
+--- a/coregrind/m_libcsetjmp.c
++++ b/coregrind/m_libcsetjmp.c
+@@ -741,6 +741,72 @@ __asm__(
+ );
+ #endif  /* VGP_nanomips_linux */
+ 
++#if defined(VGP_loongarch64_linux)
++
++__asm__(
++".text                      \n\t"
++".globl VG_MINIMAL_SETJMP;  \n\t"
++"VG_MINIMAL_SETJMP:         \n\t"
++"   st.d   $ra,  $a0, 0     \n\t"
++"   st.d   $sp,  $a0, 8     \n\t"
++"   st.d   $r21, $a0, 16    \n\t"
++"   st.d   $fp,  $a0, 24    \n\t"
++"   st.d   $s0,  $a0, 32    \n\t"
++"   st.d   $s1,  $a0, 40    \n\t"
++"   st.d   $s2,  $a0, 48    \n\t"
++"   st.d   $s3,  $a0, 56    \n\t"
++"   st.d   $s4,  $a0, 64    \n\t"
++"   st.d   $s5,  $a0, 72    \n\t"
++"   st.d   $s6,  $a0, 80    \n\t"
++"   st.d   $s7,  $a0, 88    \n\t"
++"   st.d   $s8,  $a0, 96    \n\t"
++#if !defined(__loongarch_soft_float)
++"   fst.d  $f24, $a0, 104   \n\t"
++"   fst.d  $f25, $a0, 112   \n\t"
++"   fst.d  $f26, $a0, 120   \n\t"
++"   fst.d  $f27, $a0, 128   \n\t"
++"   fst.d  $f28, $a0, 136   \n\t"
++"   fst.d  $f29, $a0, 144   \n\t"
++"   fst.d  $f30, $a0, 152   \n\t"
++"   fst.d  $f30, $a0, 160   \n\t"
++#endif
++"   move   $a0, $zero       \n\t"
++"   jr     $ra              \n\t"
++"                           \n\t"
++".text                      \n\t"
++".globl VG_MINIMAL_LONGJMP; \n\t"
++"VG_MINIMAL_LONGJMP:        \n\t"
++"   ld.d   $ra,  $a0, 0     \n\t"
++"   ld.d   $sp,  $a0, 8     \n\t"
++"   ld.d   $r21, $a0, 16    \n\t"
++"   ld.d   $fp,  $a0, 24    \n\t"
++"   ld.d   $s0,  $a0, 32    \n\t"
++"   ld.d   $s1,  $a0, 40    \n\t"
++"   ld.d   $s2,  $a0, 48    \n\t"
++"   ld.d   $s3,  $a0, 56    \n\t"
++"   ld.d   $s4,  $a0, 64    \n\t"
++"   ld.d   $s5,  $a0, 72    \n\t"
++"   ld.d   $s6,  $a0, 80    \n\t"
++"   ld.d   $s7,  $a0, 88    \n\t"
++"   ld.d   $s8,  $a0, 96    \n\t"
++#if !defined(__loongarch_soft_float)
++"   fld.d  $f24, $a0, 104   \n\t"
++"   fld.d  $f25, $a0, 112   \n\t"
++"   fld.d  $f26, $a0, 120   \n\t"
++"   fld.d  $f27, $a0, 128   \n\t"
++"   fld.d  $f28, $a0, 136   \n\t"
++"   fld.d  $f29, $a0, 144   \n\t"
++"   fld.d  $f30, $a0, 152   \n\t"
++"   fld.d  $f30, $a0, 160   \n\t"
++#endif
++"   bnez   $a1,  1f         \n\t"
++"   addi.d $a1,  $a1, 1     \n\t"
++"1:                         \n\t"
++"   move   $a0,  $a1        \n\t"
++"   jr     $ra              \n\t"
++);
++#endif  /* VGP_loongarch64_linux */
++
+ /*--------------------------------------------------------------------*/
+ /*--- end                                                          ---*/
+ /*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
+index 052b5d186..48e4b3f22 100644
+--- a/coregrind/m_machine.c
++++ b/coregrind/m_machine.c
+@@ -152,6 +152,13 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
+       = VG_(threads)[tid].arch.vex.guest_r31;
+    regs->misc.MIPS64.r28
+       = VG_(threads)[tid].arch.vex.guest_r28;
++#  elif defined(VGA_loongarch64)
++   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
++   regs->r_sp = VG_(threads)[tid].arch.vex.guest_R3;
++   regs->misc.LOONGARCH64.r_fp
++      = VG_(threads)[tid].arch.vex.guest_R22;
++   regs->misc.LOONGARCH64.r_ra
++      = VG_(threads)[tid].arch.vex.guest_R1;
+ #  else
+ #    error "Unknown arch"
+ #  endif
+@@ -369,6 +376,39 @@ static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
+    (*f)(tid, "x28", vex->guest_X28);
+    (*f)(tid, "x29", vex->guest_X29);
+    (*f)(tid, "x30", vex->guest_X30);
++#elif defined(VGA_loongarch64)
++   (*f)(tid, "r0" , vex->guest_R0 );
++   (*f)(tid, "r1" , vex->guest_R1 );
++   (*f)(tid, "r2" , vex->guest_R2 );
++   (*f)(tid, "r3" , vex->guest_R3 );
++   (*f)(tid, "r4" , vex->guest_R4 );
++   (*f)(tid, "r5" , vex->guest_R5 );
++   (*f)(tid, "r6" , vex->guest_R6 );
++   (*f)(tid, "r7" , vex->guest_R7 );
++   (*f)(tid, "r8" , vex->guest_R8 );
++   (*f)(tid, "r9" , vex->guest_R9 );
++   (*f)(tid, "r10", vex->guest_R10);
++   (*f)(tid, "r11", vex->guest_R11);
++   (*f)(tid, "r12", vex->guest_R12);
++   (*f)(tid, "r13", vex->guest_R13);
++   (*f)(tid, "r14", vex->guest_R14);
++   (*f)(tid, "r15", vex->guest_R15);
++   (*f)(tid, "r16", vex->guest_R16);
++   (*f)(tid, "r17", vex->guest_R17);
++   (*f)(tid, "r18", vex->guest_R18);
++   (*f)(tid, "r19", vex->guest_R19);
++   (*f)(tid, "r20", vex->guest_R20);
++   (*f)(tid, "r21", vex->guest_R21);
++   (*f)(tid, "r22", vex->guest_R22);
++   (*f)(tid, "r23", vex->guest_R23);
++   (*f)(tid, "r24", vex->guest_R24);
++   (*f)(tid, "r25", vex->guest_R25);
++   (*f)(tid, "r26", vex->guest_R26);
++   (*f)(tid, "r27", vex->guest_R27);
++   (*f)(tid, "r28", vex->guest_R28);
++   (*f)(tid, "r29", vex->guest_R29);
++   (*f)(tid, "r30", vex->guest_R30);
++   (*f)(tid, "r31", vex->guest_R31);
+ #else
+ #  error Unknown arch
+ #endif
+@@ -479,7 +519,7 @@ Int VG_(machine_arm_archlevel) = 4;
+    testing, so we need a VG_MINIMAL_JMP_BUF. */
+ #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
+-    || defined(VGA_mips64) || defined(VGA_arm64)
++    || defined(VGA_mips64) || defined(VGA_arm64) || defined(VGA_loongarch64)
+ #include "pub_core_libcsetjmp.h"
+ static VG_MINIMAL_JMP_BUF(env_unsup_insn);
+ static void handler_unsup_insn ( Int x ) {
+@@ -859,6 +899,105 @@ static Bool VG_(parse_cpuinfo)(void)
+ 
+ #endif /* defined(VGP_arm64_linux) */
+ 
++#if defined(VGA_loongarch64)
++
++/*
++ * Initialize hwcaps by parsing /proc/cpuinfo.  Returns False if it can not
++ * determine what CPU it is (it searches only for the models that are or may be
++ * supported by Valgrind).
++ */
++static Bool VG_(parse_cpuinfo)(void)
++{
++   Int    n, fh;
++   SysRes fd;
++   SizeT  num_bytes, file_buf_size;
++   HChar  *file_buf;
++
++   const char *search_Loongson_str = "Model Name\t\t: Loongson";
++
++   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
++   fd = VG_(open)("/proc/cpuinfo", 0, VKI_S_IRUSR);
++   if (sr_isError(fd))
++      return False;
++
++   fh = sr_Res(fd);
++
++   /* Determine the size of /proc/cpuinfo.
++      Work around broken-ness in /proc file system implementation.
++      fstat returns a zero size for /proc/cpuinfo although it is
++      claimed to be a regular file. */
++   num_bytes = 0;
++   file_buf_size = 1000;
++   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
++   while (True) {
++      n = VG_(read)(fh, file_buf, file_buf_size);
++      if (n < 0)
++         break;
++
++      num_bytes += n;
++      if (n < file_buf_size)
++         break;  /* reached EOF */
++   }
++
++   if (n < 0)
++      num_bytes = 0;  /* read error; ignore contents */
++
++   if (num_bytes > file_buf_size) {
++      VG_(free)(file_buf);
++      VG_(lseek)(fh, 0, VKI_SEEK_SET);
++      file_buf = VG_(malloc)("cpuinfo", num_bytes + 1);
++      n = VG_(read)(fh, file_buf, num_bytes);
++      if (n < 0)
++         num_bytes = 0;
++   }
++
++   file_buf[num_bytes] = '\0';
++   VG_(close)(fh);
++
++   /* Parse file */
++   vai.hwcaps = 0;
++   if (VG_(strstr)(file_buf, search_Loongson_str) == NULL) {
++      /* Did not find string in the proc file. */
++      VG_(free)(file_buf);
++      return False;
++   }
++
++   if (VG_(strstr)(file_buf, "loongarch32") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ISA_32BIT;
++   if (VG_(strstr)(file_buf, "loongarch64") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ISA_64BIT;
++
++   if (VG_(strstr)(file_buf, "cpucfg") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_CPUCFG;
++   if (VG_(strstr)(file_buf, "lam") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LAM;
++   if (VG_(strstr)(file_buf, "ual") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_UAL;
++   if (VG_(strstr)(file_buf, "fpu") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_FP;
++   if (VG_(strstr)(file_buf, "lsx") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LSX;
++   if (VG_(strstr)(file_buf, "lasx") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LASX;
++   if (VG_(strstr)(file_buf, "complex") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_COMPLEX;
++   if (VG_(strstr)(file_buf, "crypto") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_CRYPTO;
++   if (VG_(strstr)(file_buf, "lvz") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LVZP;
++   if (VG_(strstr)(file_buf, "lbt_x86") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_X86BT;
++   if (VG_(strstr)(file_buf, "lbt_arm") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ARMBT;
++   if (VG_(strstr)(file_buf, "lbt_mips") != NULL)
++      vai.hwcaps |= VEX_HWCAPS_LOONGARCH_MIPSBT;
++
++   VG_(free)(file_buf);
++   return True;
++}
++
++#endif /* defined(VGP_loongarch64) */
++
+ Bool VG_(machine_get_hwcaps)( void )
+ {
+    vg_assert(hwcaps_done == False);
+@@ -2227,6 +2366,54 @@ Bool VG_(machine_get_hwcaps)( void )
+ 
+      return True;
+    }
++
++#elif defined(VGA_loongarch64)
++   {
++      va = VexArchLOONGARCH64;
++      vai.endness = VexEndnessLE;
++      vai.hwcaps = 0;
++
++      if (!VG_(parse_cpuinfo)())
++         return False;
++
++      /* Same instruction set detection algorithm as for ppc32/arm... */
++      vki_sigset_t          saved_set, tmp_set;
++      vki_sigaction_fromK_t saved_sigill_act;
++      vki_sigaction_toK_t   tmp_sigill_act;
++
++      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
++
++      VG_(sigemptyset)(&tmp_set);
++      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
++
++      Int r;
++      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
++      vg_assert(r == 0);
++
++      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
++      vg_assert(r == 0);
++      tmp_sigill_act = saved_sigill_act;
++
++      /* NODEFER: signal handler does not return (from the kernel's point of
++         view), hence if it is to successfully catch a signal more than once,
++         we need the NODEFER flag. */
++      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
++      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
++      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
++      tmp_sigill_act.ksa_handler = handler_unsup_insn;
++      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
++
++      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
++      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
++      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
++
++      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
++
++      VG_(machine_get_cache_info)(&vai);
++
++      return True;
++   }
++
+ #else
+ #  error "Unknown arch"
+ #endif
+@@ -2367,6 +2554,9 @@ Int VG_(machine_get_size_of_largest_guest_register) ( void )
+ #  elif defined(VGA_mips64)
+    return 8;
+ 
++#  elif defined(VGA_loongarch64)
++   return 8;
++
+ #  else
+ #    error "Unknown arch"
+ #  endif
+@@ -2383,7 +2573,7 @@ void* VG_(fnptr_to_fnentry)( void* f )
+       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
+       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+       || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
+-      || defined(VGP_nanomips_linux)
++      || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+    return f;
+ #  elif defined(VGP_ppc64be_linux)
+    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
+diff --git a/coregrind/m_main.c b/coregrind/m_main.c
+index a857e5afe..f9c0c6fb6 100644
+--- a/coregrind/m_main.c
++++ b/coregrind/m_main.c
+@@ -1481,6 +1481,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp )
+                     "AMD Athlon or above)\n");
+         VG_(printf)("   * AMD Athlon64/Opteron\n");
+         VG_(printf)("   * ARM (armv7)\n");
++        VG_(printf)("   * LoongArch (3A5000 and above)\n");
+         VG_(printf)("   * MIPS (mips32 and above; mips64 and above)\n");
+         VG_(printf)("   * PowerPC (most; ppc405 and above)\n");
+         VG_(printf)("   * System z (64bit only - s390x; z990 and above)\n");
+@@ -2534,6 +2535,11 @@ static void final_tidyup(ThreadId tid)
+    VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+             offsetof(VexGuestS390XState, guest_r2),
+             sizeof(VG_(threads)[tid].arch.vex.guest_r2));
++#  elif defined(VGA_loongarch64)
++   VG_(threads)[tid].arch.vex.guest_R4 = to_run;
++   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
++            offsetof(VexGuestLOONGARCH64State, guest_R4),
++            sizeof(VG_(threads)[tid].arch.vex.guest_R4));
+ #else
+    I_die_here : architecture missing in m_main.c
+ #endif
+@@ -3062,6 +3068,29 @@ asm(
+     ".set pop                                           \n\t"
+ ".previous                                              \n\t"
+ );
++#elif defined(VGP_loongarch64_linux)
++asm("                                                           \n\t"
++    ".text                                                      \n\t"
++    ".globl _start                                              \n\t"
++    ".type _start,@function                                     \n\t"
++    "_start:                                                    \n\t"
++    /* t0 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
++       VG_DEFAULT_STACK_ACTIVE_SZB */
++    "la.local  $t0, vgPlain_interim_stack                       \n\t"
++    "li.w      $t1, "VG_STRINGIFY(VG_STACK_GUARD_SZB)"          \n\t"
++    "add.d     $t0, $t0, $t1                                    \n\t"
++    "li.w      $t2, "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" \n\t"
++    "add.d     $t0, $t0, $t2                                    \n\t"
++    /* allocate 16 bytes on the new stack in t0, and aligned */
++    "addi.d    $t0, $t0, -16                                    \n\t"
++    "bstrins.d $t0, $zero, 3, 0                                 \n\t"
++    /* a0 = sp, sp = t0, and then call _start_in_C_linux */
++    "move      $a0, $sp                                         \n\t"
++    "move      $sp, $t0                                         \n\t"
++    "la.local  $t0, _start_in_C_linux                           \n\t"
++    "jr        $t0                                              \n\t"
++    ".previous                                                  \n\t"
++);
+ #else
+ #  error "Unknown platform"
+ #endif
+@@ -3107,11 +3136,11 @@ void _start_in_C_linux ( UWord* pArgc )
+ #  if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
+       || defined(VGP_ppc64le_linux) || defined(VGP_arm64_linux) \
+       || defined(VGP_mips32_linux)  || defined(VGP_mips64_linux) \
+-      || defined(VGP_nanomips_linux)
++      || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+    {
+-      /* ppc32/ppc64, arm64, mips32/64 can be configured with different
+-         page sizes. Determine this early. This is an ugly hack and really
+-         should be moved into valgrind_main. */
++      /* ppc32/ppc64, arm64, mips32/64, loongarch64 can be configured with
++         different page sizes. Determine this early. This is an ugly hack
++         and really should be moved into valgrind_main. */
+       UWord *sp = &pArgc[1+argc+1];
+       while (*sp++ != 0)
+          ;
+diff --git a/coregrind/m_options.c b/coregrind/m_options.c
+index 1483af2d9..640af7121 100644
+--- a/coregrind/m_options.c
++++ b/coregrind/m_options.c
+@@ -203,7 +203,8 @@ UInt   VG_(clo_unw_stack_scan_frames) = 5;
+ VgSmc VG_(clo_smc_check) = Vg_SmcAllNonFile;
+ #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+       || defined(VGA_arm) || defined(VGA_arm64) \
+-      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
++      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \
++      || defined(VGA_loongarch64)
+ VgSmc VG_(clo_smc_check) = Vg_SmcStack;
+ #else
+ #  error "Unknown arch"
+diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
+index 37c67f4c1..a0bc86481 100644
+--- a/coregrind/m_redir.c
++++ b/coregrind/m_redir.c
+@@ -1229,6 +1229,7 @@ Bool VG_(is_soname_ld_so) (const HChar *soname)
+    if (VG_STREQ(soname, VG_U_LD_LINUX_AARCH64_SO_1)) return True;
+    if (VG_STREQ(soname, VG_U_LD_LINUX_ARMHF_SO_3))   return True;
+    if (VG_STREQ(soname, VG_U_LD_LINUX_MIPSN8_S0_1))  return True;
++   if (VG_STREQ(soname, VG_U_LD_LINUX_LOONGARCH_LP64D_SO_1)) return True;
+ #  elif defined(VGO_freebsd)
+    if (VG_STREQ(soname, VG_U_LD_ELF_SO_1))   return True;
+    if (VG_STREQ(soname, VG_U_LD_ELF32_SO_1))   return True;
+@@ -1668,6 +1669,22 @@ void VG_(redir_initialise) ( void )
+       );
+    }
+ 
++#elif defined(VGP_loongarch64_linux)
++   /* If we're using memcheck, use these intercepts right from
++      the start, otherwise ld.so makes a lot of noise. */
++   if (0==VG_(strcmp)("Memcheck", VG_(details).name)) {
++      add_hardwired_spec(
++         "ld-linux-loongarch-lp64d.so.1", "strlen",
++         (Addr)&VG_(loongarch64_linux_REDIR_FOR_strlen),
++         complain_about_stripped_glibc_ldso
++      );
++      add_hardwired_spec(
++         "ld-linux-loongarch-lp64d.so.1", "strchr",
++         (Addr)&VG_(loongarch64_linux_REDIR_FOR_strchr),
++         complain_about_stripped_glibc_ldso
++      );
++   }
++
+ #  elif defined(VGP_x86_solaris)
+    /* If we're using memcheck, use these intercepts right from
+       the start, otherwise ld.so makes a lot of noise. */
+diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
+index 3062c1afc..27dc24c8a 100644
+--- a/coregrind/m_scheduler/scheduler.c
++++ b/coregrind/m_scheduler/scheduler.c
+@@ -271,6 +271,7 @@ const HChar* name_of_sched_event ( UInt event )
+       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
+       case VEX_TRC_JMP_SIGFPE_INTOVF:
+       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
++      case VEX_TRC_JMP_SIGSYS:         return "SIGSYS";
+       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
+       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
+       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
+@@ -1657,6 +1658,10 @@ VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
+          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
+          break;
+ 
++      case VEX_TRC_JMP_SIGSYS:
++         VG_(synth_sigsys)(tid);
++         break;
++
+       case VEX_TRC_JMP_NODECODE: {
+          Addr addr = VG_(get_IP)(tid);
+ 
+@@ -1821,6 +1826,9 @@ void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
+ #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
+ #  define VG_CLREQ_ARGS       guest_r12
+ #  define VG_CLREQ_RET        guest_r11
++#elif defined(VGA_loongarch64)
++#  define VG_CLREQ_ARGS       guest_R12
++#  define VG_CLREQ_RET        guest_R11
+ #else
+ #  error Unknown arch
+ #endif
+diff --git a/coregrind/m_sigframe/sigframe-loongarch64-linux.c b/coregrind/m_sigframe/sigframe-loongarch64-linux.c
+new file mode 100644
+index 000000000..eda6c885c
+--- /dev/null
++++ b/coregrind/m_sigframe/sigframe-loongarch64-linux.c
+@@ -0,0 +1,285 @@
++
++/*--------------------------------------------------------------------*/
++/*--- Create/destroy signal delivery frames.                       ---*/
++/*---                             sigframe-loongarch64-linux.c     ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#if defined(VGP_loongarch64_linux)
++
++#include "pub_core_basics.h"
++#include "pub_core_vki.h"
++#include "pub_core_vkiscnums.h"
++#include "pub_core_threadstate.h"
++#include "pub_core_aspacemgr.h"
++#include "pub_core_libcbase.h"
++#include "pub_core_libcassert.h"
++#include "pub_core_libcprint.h"
++#include "pub_core_machine.h"
++#include "pub_core_options.h"
++#include "pub_core_sigframe.h"
++#include "pub_core_signals.h"
++#include "pub_core_tooliface.h"
++#include "pub_core_trampoline.h"
++#include "priv_sigframe.h"
++
++
++/*------------------------------------------------------------*/
++/*--- Signal frame layouts                                 ---*/
++/*------------------------------------------------------------*/
++
++struct vg_sig_private {
++   UInt magicPI;
++   UInt sigNo_private;
++   VexGuestLOONGARCH64State vex_shadow1;
++   VexGuestLOONGARCH64State vex_shadow2;
++};
++
++struct rt_sigframe {
++   struct vki_siginfo rs_info;
++   struct vki_ucontext rs_uctx;
++   struct vg_sig_private priv;
++};
++
++
++/*------------------------------------------------------------*/
++/*--- Creating signal frames                               ---*/
++/*------------------------------------------------------------*/
++
++static void create_siginfo ( ThreadId tid,
++                             struct rt_sigframe *frame,
++                             const vki_siginfo_t *si)
++{
++   VG_TRACK(pre_mem_write, Vg_CoreSignal, tid, "signal frame siginfo",
++            (Addr)&frame->rs_info, sizeof(frame->rs_info));
++
++   VG_(memcpy)(&frame->rs_info, si, sizeof(vki_siginfo_t));
++
++   VG_TRACK(post_mem_write, Vg_CoreSignal, tid,
++            (Addr)&frame->rs_info, sizeof(frame->rs_info));
++}
++
++static void create_sigcontext ( ThreadState *tst,
++                                struct vki_sigcontext **sc)
++{
++   struct vki_sigcontext *sctx = *sc;
++
++   VG_TRACK(pre_mem_write, Vg_CoreSignal, tst->tid, "signal frame mcontext",
++            (Addr)sctx, sizeof(ULong) * 32);
++
++   sctx->sc_regs[1]  = tst->arch.vex.guest_R1;
++   sctx->sc_regs[2]  = tst->arch.vex.guest_R2;
++   sctx->sc_regs[3]  = tst->arch.vex.guest_R3;
++   sctx->sc_regs[4]  = tst->arch.vex.guest_R4;
++   sctx->sc_regs[5]  = tst->arch.vex.guest_R5;
++   sctx->sc_regs[6]  = tst->arch.vex.guest_R6;
++   sctx->sc_regs[7]  = tst->arch.vex.guest_R7;
++   sctx->sc_regs[8]  = tst->arch.vex.guest_R8;
++   sctx->sc_regs[9]  = tst->arch.vex.guest_R9;
++   sctx->sc_regs[10] = tst->arch.vex.guest_R10;
++   sctx->sc_regs[11] = tst->arch.vex.guest_R11;
++   sctx->sc_regs[12] = tst->arch.vex.guest_R12;
++   sctx->sc_regs[13] = tst->arch.vex.guest_R13;
++   sctx->sc_regs[14] = tst->arch.vex.guest_R14;
++   sctx->sc_regs[15] = tst->arch.vex.guest_R15;
++   sctx->sc_regs[16] = tst->arch.vex.guest_R16;
++   sctx->sc_regs[17] = tst->arch.vex.guest_R17;
++   sctx->sc_regs[18] = tst->arch.vex.guest_R18;
++   sctx->sc_regs[19] = tst->arch.vex.guest_R19;
++   sctx->sc_regs[20] = tst->arch.vex.guest_R20;
++   sctx->sc_regs[21] = tst->arch.vex.guest_R21;
++   sctx->sc_regs[22] = tst->arch.vex.guest_R22;
++   sctx->sc_regs[23] = tst->arch.vex.guest_R23;
++   sctx->sc_regs[24] = tst->arch.vex.guest_R24;
++   sctx->sc_regs[25] = tst->arch.vex.guest_R25;
++   sctx->sc_regs[26] = tst->arch.vex.guest_R26;
++   sctx->sc_regs[27] = tst->arch.vex.guest_R27;
++   sctx->sc_regs[28] = tst->arch.vex.guest_R28;
++   sctx->sc_regs[29] = tst->arch.vex.guest_R29;
++   sctx->sc_regs[30] = tst->arch.vex.guest_R30;
++   sctx->sc_regs[31] = tst->arch.vex.guest_R31;
++   sctx->sc_pc       = tst->arch.vex.guest_PC;
++}
++
++static void create_ucontext ( ThreadState *tst,
++                              ThreadId tid,
++                              struct vki_ucontext *uc,
++                              const vki_sigset_t *mask,
++                              struct vki_sigcontext **sc,
++                              const vki_siginfo_t *siginfo)
++{
++   VG_TRACK(pre_mem_write, Vg_CoreSignal, tid, "signal frame ucontext",
++            (Addr)uc, offsetof(struct vki_ucontext, uc_mcontext));
++
++   uc->uc_flags   = 0;
++   uc->uc_link    = 0;
++   uc->uc_stack   = tst->altstack;
++   uc->uc_sigmask = *mask;
++
++   VG_TRACK(post_mem_write, Vg_CoreSignal, tid, (Addr)uc,
++            offsetof(struct vki_ucontext, uc_mcontext));
++
++   create_sigcontext(tst, sc);
++}
++
++/* EXPORTED */
++void VG_(sigframe_create) ( ThreadId tid,
++                            Bool on_altstack,
++                            Addr sp_top_of_frame,
++                            const vki_siginfo_t *siginfo,
++                            const struct vki_ucontext *siguc,
++                            void *handler,
++                            UInt flags,
++                            const vki_sigset_t *mask,
++                            void *restorer )
++{
++   UInt size = sizeof(struct rt_sigframe);
++   Addr sp = VG_ROUNDDN(sp_top_of_frame - size, 16);
++
++   ThreadState *tst = VG_(get_ThreadState)(tid);
++   if (! ML_(sf_maybe_extend_stack)(tst, sp, size, flags))
++      return;
++
++   struct rt_sigframe *frame = (struct rt_sigframe *)sp;
++   create_siginfo(tid, frame, siginfo);
++
++   struct vki_ucontext *uctx = &frame->rs_uctx;
++   struct vki_sigcontext *sctx = &(frame->rs_uctx.uc_mcontext);
++   create_ucontext(tst, tid, uctx, mask, &sctx, siginfo);
++
++   /*
++      Arguments to signal handler:
++
++         a0 = signal number
++         a1 = pointer to siginfo
++         a2 = pointer to ucontext
++
++      csr_era point to the signal handler, $r3 (sp) points to
++      the struct rt_sigframe.
++	 */
++
++   Int sigNo = siginfo->si_signo;
++   tst->arch.vex.guest_R4 = sigNo;
++   tst->arch.vex.guest_R5 = (Addr) &frame->rs_info;
++   tst->arch.vex.guest_R6 = (Addr) &frame->rs_uctx;
++   tst->arch.vex.guest_R3 = (Addr) frame;
++   tst->arch.vex.guest_R1 = (Addr) &VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn);
++
++   struct vg_sig_private *priv = &frame->priv;
++   priv->magicPI       = 0x31415927;
++   priv->sigNo_private = sigNo;
++   priv->vex_shadow1   = tst->arch.vex_shadow1;
++   priv->vex_shadow2   = tst->arch.vex_shadow2;
++
++   /* Set the thread so it will next run the handler. */
++   VG_TRACK(post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(Addr));
++
++   if (VG_(clo_trace_signals))
++      VG_(printf)("handler = %p\n", handler);
++
++   tst->arch.vex.guest_PC = (Addr) handler;
++   /* This thread needs to be marked runnable, but we leave that
++      the caller to do. */
++}
++
++
++/*------------------------------------------------------------*/
++/*--- Destroying signal frames                             ---*/
++/*------------------------------------------------------------*/
++
++static void restore_regs ( ThreadState *tst,
++                           struct vki_sigcontext *mc)
++{
++   tst->arch.vex.guest_R1  = mc->sc_regs[1];
++   tst->arch.vex.guest_R2  = mc->sc_regs[2];
++   tst->arch.vex.guest_R3  = mc->sc_regs[3];
++   tst->arch.vex.guest_R4  = mc->sc_regs[4];
++   tst->arch.vex.guest_R5  = mc->sc_regs[5];
++   tst->arch.vex.guest_R6  = mc->sc_regs[6];
++   tst->arch.vex.guest_R7  = mc->sc_regs[7];
++   tst->arch.vex.guest_R8  = mc->sc_regs[8];
++   tst->arch.vex.guest_R9  = mc->sc_regs[9];
++   tst->arch.vex.guest_R10 = mc->sc_regs[10];
++   tst->arch.vex.guest_R11 = mc->sc_regs[11];
++   tst->arch.vex.guest_R12 = mc->sc_regs[12];
++   tst->arch.vex.guest_R13 = mc->sc_regs[13];
++   tst->arch.vex.guest_R14 = mc->sc_regs[14];
++   tst->arch.vex.guest_R15 = mc->sc_regs[15];
++   tst->arch.vex.guest_R16 = mc->sc_regs[16];
++   tst->arch.vex.guest_R17 = mc->sc_regs[17];
++   tst->arch.vex.guest_R18 = mc->sc_regs[18];
++   tst->arch.vex.guest_R19 = mc->sc_regs[19];
++   tst->arch.vex.guest_R20 = mc->sc_regs[20];
++   tst->arch.vex.guest_R21 = mc->sc_regs[21];
++   tst->arch.vex.guest_R22 = mc->sc_regs[22];
++   tst->arch.vex.guest_R23 = mc->sc_regs[23];
++   tst->arch.vex.guest_R24 = mc->sc_regs[24];
++   tst->arch.vex.guest_R25 = mc->sc_regs[25];
++   tst->arch.vex.guest_R26 = mc->sc_regs[26];
++   tst->arch.vex.guest_R27 = mc->sc_regs[27];
++   tst->arch.vex.guest_R28 = mc->sc_regs[28];
++   tst->arch.vex.guest_R29 = mc->sc_regs[29];
++   tst->arch.vex.guest_R30 = mc->sc_regs[30];
++   tst->arch.vex.guest_R31 = mc->sc_regs[31];
++   tst->arch.vex.guest_PC  = mc->sc_pc;
++}
++
++/* EXPORTED */
++void VG_(sigframe_destroy)( ThreadId tid, Bool isRT )
++{
++   vg_assert(VG_(is_valid_tid)(tid));
++
++   ThreadState *tst = VG_(get_ThreadState)(tid);
++   Addr sp = tst->arch.vex.guest_R3;
++   struct rt_sigframe *frame = (struct rt_sigframe *)sp;
++   struct vki_ucontext *uc = &frame->rs_uctx;
++
++   tst->sig_mask = uc->uc_sigmask;
++   tst->tmp_sig_mask = uc->uc_sigmask;
++
++   struct vki_sigcontext *mc = &uc->uc_mcontext;
++   restore_regs(tst, mc);
++
++   struct vg_sig_private *priv = &frame->priv;
++   vg_assert(priv->magicPI == 0x31415927);
++   tst->arch.vex_shadow1 = priv->vex_shadow1;
++   tst->arch.vex_shadow2 = priv->vex_shadow2;
++
++   UInt frame_size = sizeof(*frame);
++   VG_TRACK(die_mem_stack_signal, sp, frame_size);
++
++   if (VG_(clo_trace_signals))
++      VG_(message)(Vg_DebugMsg,
++         "VG_(signal_return) (thread %u): isRT=%d valid magic; PC=%#llx\n",
++         tid, isRT, tst->arch.vex.guest_PC);
++
++   Int sigNo = priv->sigNo_private;
++   VG_TRACK( post_deliver_signal, tid, sigNo );
++}
++
++#endif /* defined(VGP_loongarch64_linux) */
++
++/*--------------------------------------------------------------------*/
++/*--- end                             sigframe-loongarch64-linux.c ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
+index b3c94fcc9..62c689bed 100644
+--- a/coregrind/m_signals.c
++++ b/coregrind/m_signals.c
+@@ -628,6 +628,22 @@ VgHashTable *ht_sigchld_ignore = NULL;
+         (srP)->misc.MIPS32.r28 = (uc)->uc_mcontext.sc_regs[28]; \
+       }
+ 
++#elif defined(VGP_loongarch64_linux)
++#  define VG_UCONTEXT_INSTR_PTR(uc)      (((uc)->uc_mcontext.sc_pc))
++#  define VG_UCONTEXT_STACK_PTR(uc)      ((uc)->uc_mcontext.sc_regs[3])
++#  define VG_UCONTEXT_FRAME_PTR(uc)      ((uc)->uc_mcontext.sc_regs[22])
++#  define VG_UCONTEXT_SYSCALL_NUM(uc)    ((uc)->uc_mcontext.sc_regs[11])
++#  define VG_UCONTEXT_SYSCALL_SYSRES(uc)                              \
++      /* Convert the value in uc_mcontext.regs[4] into a SysRes. */   \
++      VG_(mk_SysRes_loongarch64_linux)((uc)->uc_mcontext.sc_regs[4])
++
++#  define VG_UCONTEXT_TO_UnwindStartRegs(srP, uc)                     \
++      { (srP)->r_pc = (uc)->uc_mcontext.sc_pc;                        \
++        (srP)->r_sp = (uc)->uc_mcontext.sc_regs[3];                   \
++        (srP)->misc.LOONGARCH64.r_fp = (uc)->uc_mcontext.sc_regs[22]; \
++        (srP)->misc.LOONGARCH64.r_ra = (uc)->uc_mcontext.sc_regs[1];  \
++      }
++
+ #elif defined(VGP_x86_solaris)
+ #  define VG_UCONTEXT_INSTR_PTR(uc)       ((Addr)(uc)->uc_mcontext.gregs[VKI_EIP])
+ #  define VG_UCONTEXT_STACK_PTR(uc)       ((Addr)(uc)->uc_mcontext.gregs[VKI_UESP])
+@@ -899,8 +915,10 @@ void calculate_SKSS_from_SCSS ( SKSS* dst )
+       if (skss_handler != VKI_SIG_IGN && skss_handler != VKI_SIG_DFL)
+          skss_flags |= VKI_SA_SIGINFO;
+ 
++#     if !defined(VGP_loongarch64_linux)
+       /* use our own restorer */
+       skss_flags |= VKI_SA_RESTORER;
++#     endif
+ 
+       /* Create SKSS entry for this signal. */
+       if (sig != VKI_SIGKILL && sig != VKI_SIGSTOP)
+@@ -1052,6 +1070,15 @@ extern void my_sigreturn(void);
+    "   li $t4, " #name "\n" \
+    "   syscall[32]\n" \
+    ".previous\n"
++
++#elif defined(VGP_loongarch64_linux)
++#  define _MY_SIGRETURN(name) \
++   ".text\n" \
++   "my_sigreturn:\n" \
++   "   li.w $a7, " #name "\n" \
++   "   syscall 0\n" \
++   ".previous\n"
++
+ #elif defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
+ /* Not used on Solaris. */
+ #  define _MY_SIGRETURN(name) \
+@@ -1111,7 +1138,8 @@ static void handle_SCSS_change ( Bool force_update )
+       ksa.sa_flags    = skss.skss_per_sig[sig].skss_flags;
+ #     if !defined(VGP_ppc32_linux) && \
+          !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \
+-         !defined(VGP_mips32_linux) && !defined(VGO_solaris) && !defined(VGO_freebsd)
++         !defined(VGP_mips32_linux) && !defined(VGO_solaris) && \
++         !defined(VGO_freebsd) && !defined(VGP_loongarch64_linux)
+       ksa.sa_restorer = my_sigreturn;
+ #     endif
+       /* Re above ifdef (also the assertion below), PaulM says:
+@@ -1159,7 +1187,7 @@ static void handle_SCSS_change ( Bool force_update )
+             !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \
+             !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) && \
+             !defined(VGP_nanomips_linux) && !defined(VGO_solaris) && \
+-            !defined(VGO_freebsd)
++            !defined(VGO_freebsd) && !defined(VGP_loongarch64_linux)
+          vg_assert(ksa_old.sa_restorer == my_sigreturn);
+ #        endif
+          VG_(sigaddset)( &ksa_old.sa_mask, VKI_SIGKILL );
+@@ -1280,7 +1308,7 @@ SysRes VG_(do_sys_sigaction) ( Int signo,
+       old_act->sa_flags    = scss.scss_per_sig[signo].scss_flags;
+       old_act->sa_mask     = scss.scss_per_sig[signo].scss_mask;
+ #     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-         !defined(VGO_solaris)
++         !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+       old_act->sa_restorer = scss.scss_per_sig[signo].scss_restorer;
+ #     endif
+    }
+@@ -1293,7 +1321,7 @@ SysRes VG_(do_sys_sigaction) ( Int signo,
+ 
+       scss.scss_per_sig[signo].scss_restorer = NULL;
+ #     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-         !defined(VGO_solaris)
++         !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+       scss.scss_per_sig[signo].scss_restorer = new_act->sa_restorer;
+ #     endif
+ 
+@@ -1653,7 +1681,7 @@ void VG_(kill_self)(Int sigNo)
+    sa.ksa_handler = VKI_SIG_DFL;
+    sa.sa_flags = 0;
+ #  if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-      !defined(VGO_solaris)
++      !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+    sa.sa_restorer = 0;
+ #  endif
+    VG_(sigemptyset)(&sa.sa_mask);
+@@ -2296,8 +2324,9 @@ void VG_(synth_sigtrap)(ThreadId tid)
+ // Synthesise a SIGFPE.
+ void VG_(synth_sigfpe)(ThreadId tid, UInt code)
+ {
+-// Only tested on mips32, mips64, s390x and nanomips.
+-#if !defined(VGA_mips32) && !defined(VGA_mips64) && !defined(VGA_s390x) && !defined(VGA_nanomips)
++// Only tested on mips32, mips64, s390x, nanomips and loongarch64.
++#if !defined(VGA_mips32) && !defined(VGA_mips64) && !defined(VGA_s390x) \
++    && !defined(VGA_nanomips) && !defined(VGA_loongarch64)
+    vg_assert(0);
+ #else
+    vki_siginfo_t info;
+@@ -2319,6 +2348,30 @@ void VG_(synth_sigfpe)(ThreadId tid, UInt code)
+ #endif
+ }
+ 
++// Synthesise a SIGSYS.
++void VG_(synth_sigsys)(ThreadId tid)
++{
++// Only tested on loongarch64-linux.
++#if !defined(VGP_loongarch64_linux)
++   vg_assert(0);
++#else
++   vki_siginfo_t info;
++
++   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
++
++   VG_(memset)(&info, 0, sizeof(info));
++   info.si_signo = VKI_SIGSYS;
++   info.si_code  = VKI_SI_KERNEL;
++
++   if (VG_(gdbserver_report_signal) (&info, tid)) {
++      resume_scheduler(tid);
++      deliver_signal(tid, &info, NULL);
++   }
++   else
++      resume_scheduler(tid);
++#endif
++}
++
+ /* Make a signal pending for a thread, for later delivery.
+    VG_(poll_signals) will arrange for it to be delivered at the right
+    time. 
+@@ -3043,7 +3096,7 @@ void pp_ksigaction ( vki_sigaction_toK_t* sa )
+                sa->ksa_handler, 
+                (UInt)sa->sa_flags, 
+ #              if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-                  !defined(VGO_solaris)
++                  !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+                   sa->sa_restorer
+ #              else
+                   (void*)0
+@@ -3066,7 +3119,7 @@ void VG_(set_default_handler)(Int signo)
+    sa.ksa_handler = VKI_SIG_DFL;
+    sa.sa_flags = 0;
+ #  if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-      !defined(VGO_solaris)
++      !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+    sa.sa_restorer = 0;
+ #  endif
+    VG_(sigemptyset)(&sa.sa_mask);
+@@ -3188,7 +3241,7 @@ void VG_(sigstartup_actions) ( void )
+ 	 tsa.ksa_handler = (void *)sync_signalhandler;
+ 	 tsa.sa_flags = VKI_SA_SIGINFO;
+ #        if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-            !defined(VGO_solaris)
++            !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+ 	 tsa.sa_restorer = 0;
+ #        endif
+ 	 VG_(sigfillset)(&tsa.sa_mask);
+@@ -3216,7 +3269,7 @@ void VG_(sigstartup_actions) ( void )
+ 
+       scss.scss_per_sig[i].scss_restorer = NULL;
+ #     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
+-         !defined(VGO_solaris)
++         !defined(VGO_solaris) && !defined(VGP_loongarch64_linux)
+       scss.scss_per_sig[i].scss_restorer = sa.sa_restorer;
+ #     endif
+ 
+diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
+index 308bebdd8..df13de1fe 100644
+--- a/coregrind/m_stacktrace.c
++++ b/coregrind/m_stacktrace.c
+@@ -1502,6 +1502,100 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
+ 
+ #endif
+ 
++/* ---------------------- loongarch64 ----------------------- */
++
++#if defined(VGP_loongarch64_linux)
++UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
++                               /*OUT*/Addr* ips, UInt max_n_ips,
++                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
++                               const UnwindStartRegs* startRegs,
++                               Addr fp_max_orig )
++{
++   Bool  debug = False;
++   Int   i;
++   Addr  fp_max;
++   UInt  n_found = 0;
++   const Int cmrf = VG_(clo_merge_recursive_frames);
++
++   vg_assert(sizeof(Addr) == sizeof(UWord));
++   vg_assert(sizeof(Addr) == sizeof(void*));
++
++   D3UnwindRegs uregs;
++   uregs.pc = startRegs->r_pc;
++   uregs.sp = startRegs->r_sp;
++   uregs.fp = startRegs->misc.LOONGARCH64.r_fp;
++   uregs.ra = startRegs->misc.LOONGARCH64.r_ra;
++   Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
++
++   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
++      stopping when the trail goes cold, which we guess to be
++      when FP is not a reasonable stack location. */
++
++   fp_max = VG_PGROUNDUP(fp_max_orig);
++   if (fp_max >= sizeof(Addr))
++      fp_max -= sizeof(Addr);
++
++   if (debug)
++      VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
++                  "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx ra=0x%lx\n",
++                  max_n_ips, fp_min, fp_max_orig, fp_max,
++                  uregs.pc, uregs.sp, uregs.fp, uregs.ra);
++
++   if (sps) sps[0] = uregs.sp;
++   if (fps) fps[0] = uregs.fp;
++   ips[0] = uregs.pc;
++   i = 1;
++
++   /* Loop unwinding the stack, using CFI. */
++   while (True) {
++      if (debug)
++         VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
++                     i, uregs.pc, uregs.sp, uregs.fp, uregs.ra);
++      if (i >= max_n_ips)
++         break;
++
++      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
++         if (sps) sps[i] = uregs.sp;
++         if (fps) fps[i] = uregs.fp;
++         ips[i++] = uregs.pc - 1;
++         if (debug)
++            VG_(printf)(
++               "USING CFI: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
++               uregs.pc, uregs.sp, uregs.fp, uregs.ra);
++         uregs.pc = uregs.pc - 1;
++         RECURSIVE_MERGE(cmrf,ips,i);
++         continue;
++      }
++
++      /* A problem on the first frame? Lets assume it was a bad jump.
++         We will use the link register and the current stack and frame
++         pointers and see if we can use the CFI in the next round. */
++      if (i == 1) {
++         uregs.pc = uregs.ra;
++         uregs.ra = 0;
++
++         if (sps) sps[i] = uregs.sp;
++         if (fps) fps[i] = uregs.fp;
++         ips[i++] = uregs.pc - 1;
++         if (debug)
++            VG_(printf)(
++               "USING bad-jump: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
++               uregs.pc, uregs.sp, uregs.fp, uregs.ra);
++         uregs.pc = uregs.pc - 1;
++         RECURSIVE_MERGE(cmrf,ips,i);
++         continue;
++      }
++
++      /* No luck.  We have to give up. */
++      break;
++   }
++
++   n_found = i;
++   return n_found;
++}
++
++#endif
++
+ /*------------------------------------------------------------*/
+ /*---                                                      ---*/
+ /*--- END platform-dependent unwinder worker functions     ---*/
+diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
+index 1e49ed412..84d30b921 100644
+--- a/coregrind/m_syscall.c
++++ b/coregrind/m_syscall.c
+@@ -204,6 +204,17 @@ SysRes VG_(mk_SysRes_arm64_linux) ( Long val ) {
+    return res;
+ }
+ 
++SysRes VG_(mk_SysRes_loongarch64_linux) ( UWord val ) {
++   SysRes res;
++   res._isError = val >= -4095 && val <= -1;
++   if (res._isError) {
++      res._val = (UWord)(-val);
++   } else {
++      res._val = (UWord)val;
++   }
++   return res;
++}
++
+ /* Generic constructors. */
+ SysRes VG_(mk_SysRes_Success) ( UWord res ) {
+    SysRes r;
+@@ -1034,6 +1045,22 @@ asm (
+    ".previous                              \n\t"
+ );
+ 
++#elif defined(VGP_loongarch64_linux)
++extern UWord do_syscall_WRK (UWord a1, UWord a2, UWord a3, /* $a0, $a1, $a2 */
++                             UWord a4, UWord a5, UWord a6, /* $a3, $a4, $a5 */
++                             UWord syscall_no);            /* $a6 */
++asm (
++   ".text                                  \n\t"
++   ".globl do_syscall_WRK                  \n\t"
++   ".type  do_syscall_WRK, @function       \n\t"
++   "do_syscall_WRK:                        \n\t"
++   "   move    $a7, $a6                    \n\t"           /* a7 = syscall_no */
++   "   syscall 0                           \n\t"
++   "   jr      $ra                         \n\t"
++   ".size do_syscall_WRK, .-do_syscall_WRK \n\t"
++   ".previous                              \n\t"
++);
++
+ #elif defined(VGP_x86_solaris)
+ 
+ extern ULong
+@@ -1274,6 +1301,11 @@ SysRes VG_(do_syscall) ( UWord sysno, RegWord a1, RegWord a2, RegWord a3,
+    do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno, &reg_a0);
+    return VG_(mk_SysRes_nanomips_linux)(reg_a0);
+ 
++#elif defined(VGP_loongarch64_linux)
++   UWord val = 0;
++   val = do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno);
++   return VG_(mk_SysRes_loongarch64_linux)(val);
++
+ #  elif defined(VGP_x86_solaris)
+    UInt val, val2, err = False;
+    Bool restart;
+diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h
+index a73b6247e..1b75d586a 100644
+--- a/coregrind/m_syswrap/priv_syswrap-linux.h
++++ b/coregrind/m_syswrap/priv_syswrap-linux.h
+@@ -105,6 +105,7 @@ DECL_TEMPLATE(linux, sys_epoll_create1);
+ DECL_TEMPLATE(linux, sys_epoll_ctl);
+ DECL_TEMPLATE(linux, sys_epoll_wait);
+ DECL_TEMPLATE(linux, sys_epoll_pwait);
++DECL_TEMPLATE(linux, sys_epoll_pwait2);
+ DECL_TEMPLATE(linux, sys_eventfd);
+ DECL_TEMPLATE(linux, sys_eventfd2);
+ 
+@@ -330,6 +331,12 @@ DECL_TEMPLATE(linux, sys_openat2);
+ // Linux-specific (new in Linux 5.14)
+ DECL_TEMPLATE(linux, sys_memfd_secret);
+ 
++// Linux-specific (since Linux 5.6)
++DECL_TEMPLATE(linux, sys_pidfd_getfd);
++
++// Since Linux 6.6
++DECL_TEMPLATE(linux, sys_fchmodat2);
++
+ /* ---------------------------------------------------------------------
+    Wrappers for sockets and ipc-ery.  These are split into standalone
+    procedures because x86-linux hides them inside multiplexors
+@@ -508,6 +515,13 @@ extern UInt do_syscall_clone_nanomips_linux ( Word (*fn) (void *),  /* a0 - 4 */
+                                               Int*  child_tid,      /* a4 - 8 */
+                                               Int*  parent_tid,     /* a5 - 9 */
+                                               void* tls_ptr);       /* a6 - 10 */
++extern UInt do_syscall_clone_loongarch64_linux ( Word (*fn) (void *), /* a0 */
++                                                 void* stack,         /* a1 */
++                                                 Int   flags,         /* a2 */
++                                                 void* arg,           /* a3 */
++                                                 Int*  child_tid,     /* a4 */
++                                                 Int*  parent_tid,    /* a5 */
++                                                 void* tls_ptr);      /* a6 */
+ #endif   // __PRIV_SYSWRAP_LINUX_H
+ 
+ /*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_syswrap/priv_types_n_macros.h b/coregrind/m_syswrap/priv_types_n_macros.h
+index dd241839a..11a9d5e1b 100644
+--- a/coregrind/m_syswrap/priv_types_n_macros.h
++++ b/coregrind/m_syswrap/priv_types_n_macros.h
+@@ -94,7 +94,8 @@ typedef
+          || defined(VGP_ppc32_linux) \
+          || defined(VGP_arm_linux) || defined(VGP_s390x_linux) \
+          || defined(VGP_arm64_linux) \
+-         || defined(VGP_nanomips_linux)
++         || defined(VGP_nanomips_linux) \
++         || defined(VGP_loongarch64_linux)
+       Int o_arg1;
+       Int o_arg2;
+       Int o_arg3;
+diff --git a/coregrind/m_syswrap/syscall-loongarch64-linux.S b/coregrind/m_syswrap/syscall-loongarch64-linux.S
+new file mode 100644
+index 000000000..5c18041ac
+--- /dev/null
++++ b/coregrind/m_syswrap/syscall-loongarch64-linux.S
+@@ -0,0 +1,143 @@
++
++/*--------------------------------------------------------------------*/
++/*--- Support for doing system calls.  syscall-loongarch64-linux.S ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#include "pub_core_basics_asm.h"
++
++#if defined(VGP_loongarch64_linux)
++
++#include "pub_core_vkiscnums_asm.h"
++#include "libvex_guest_offsets.h"
++
++/*----------------------------------------------------------------*/
++/*
++   Perform a syscall for the client. This will run a syscall
++   with the client's specific per-thread signal mask.
++
++   The structure of this function is such that, if the syscall is
++   interrupted by a signal, we can determine exactly what
++   execution state we were in with respect to the execution of
++   the syscall by examining the value of PC in the signal
++   handler. This means that we can always do the appropriate
++   thing to precisely emulate the kernel's signal/syscall
++   interactions.
++
++   The syscall number is taken from the argument, even though it
++   should also be in guest_state->guest_R11. The syscall result
++   is written back to guest_state->guest_R4 on completion.
++
++   VG_(fixup_guest_state_after_syscall_interrupted) does the
++   thread state fixup in the case where we were interrupted by a
++   signal.
++
++   Prototype:
++
++   UWord ML_(do_syscall_for_client_WRK)(
++             Int syscallno,                 // $r4 - a0
++             void* guest_state,             // $r5 - a1
++             const vki_sigset_t *sysmask,   // $r6 - a2
++             const vki_sigset_t *postmask,  // $r7 - a3
++             Int nsigwords)                 // $r8 - a4
++*/
++
++/* from vki-loongarch64-linux.h */
++#define VKI_SIG_SETMASK 2
++
++.globl ML_(do_syscall_for_client_WRK)
++ML_(do_syscall_for_client_WRK):
++
++   /* Save regs on stack */
++   addi.d  $sp, $sp, -24
++   st.d    $a1, $sp, 0                      /* guest_state */
++   st.d    $a3, $sp, 8                      /* postmask */
++   st.d    $a4, $sp, 16                     /* sigsetSzB */
++
++1: li.w    $a7, __NR_rt_sigprocmask
++   li.w    $a0, VKI_SIG_SETMASK
++   move    $a1, $a2                         /* syscall_mask */
++   move    $a2, $a3                         /* postmask */
++   move    $a3, $a4                         /* sigsetSzB */
++   syscall 0
++
++   bnez    $a0, 5f
++
++   /* Actually do the client syscall */
++   ld.d    $a6, $sp, 0                      /* guest_state */
++
++   ld.d    $a0, $a6, OFFSET_loongarch64_R4  /* a0 */
++   ld.d    $a1, $a6, OFFSET_loongarch64_R5  /* a1 */
++   ld.d    $a2, $a6, OFFSET_loongarch64_R6  /* a2 */
++   ld.d    $a3, $a6, OFFSET_loongarch64_R7  /* a3 */
++   ld.d    $a4, $a6, OFFSET_loongarch64_R8  /* a4 */
++   ld.d    $a5, $a6, OFFSET_loongarch64_R9  /* a5 */
++
++   ld.d    $a7, $a6, OFFSET_loongarch64_R11 /* syscallno */
++
++2: syscall 0
++
++   /* Saving return values into guest state */
++3: st.d    $a0, $a6, OFFSET_loongarch64_R4  /* a0 */
++
++4: li.w    $a7, __NR_rt_sigprocmask
++   li.w    $a0, VKI_SIG_SETMASK
++   ld.d    $a1, $sp, 8                      /* postmask */
++   move    $a2, $zero                       /* 0 (zero) */
++   ld.d    $a3, $sp, 16                     /* sigsetSzB */
++   syscall 0
++
++   beqz    $a0, 6f
++
++5: /* error */
++   li.w    $a0, 0x8000
++
++6: /* Restore sp and return */
++   addi.d  $sp, $sp, 24
++   jr      $ra
++
++.section .rodata
++/* export the ranges so that
++   VG_(fixup_guest_state_after_syscall_interrupted) can do the
++   right thing */
++
++.globl ML_(blksys_setup)
++.globl ML_(blksys_restart)
++.globl ML_(blksys_complete)
++.globl ML_(blksys_committed)
++.globl ML_(blksys_finished)
++ML_(blksys_setup):      .quad 1b
++ML_(blksys_restart):    .quad 2b
++ML_(blksys_complete):   .quad 3b
++ML_(blksys_committed):  .quad 4b
++ML_(blksys_finished):   .quad 5b
++
++#endif // defined(VGP_loongarch64_linux)
++
++/* Let the linker know we don't need an executable stack */
++MARK_STACK_NO_EXEC
++
++/*--------------------------------------------------------------------*/
++/*--- end                              syscall-loongarch64-linux.S ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c
+index 1aeebd274..d93d93721 100644
+--- a/coregrind/m_syswrap/syswrap-amd64-linux.c
++++ b/coregrind/m_syswrap/syswrap-amd64-linux.c
+@@ -880,10 +880,14 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),        // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+    LINXY(__NR_openat2,           sys_openat2),           // 437
+-
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),       // 438
+    LINX_(__NR_faccessat2,	 sys_faccessat2),        // 439
+ 
++   LINXY(__NR_epoll_pwait2,      sys_epoll_pwait2),      // 441
++
+    LINXY(__NR_memfd_secret,      sys_memfd_secret),      // 447
++
++   LINX_(__NR_fchmodat2,         sys_fchmodat2),         // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c
+index bca509589..492abdb82 100644
+--- a/coregrind/m_syswrap/syswrap-arm-linux.c
++++ b/coregrind/m_syswrap/syswrap-arm-linux.c
+@@ -1056,7 +1056,11 @@ static SyscallTableEntry syscall_main_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),        // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+ 
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),       // 438
+    LINX_(__NR_faccessat2,    sys_faccessat2),           // 439
++   LINXY(__NR_epoll_pwait2,      sys_epoll_pwait2),      // 441
++
++   LINX_(__NR_fchmodat2,         sys_fchmodat2),         // 452
+ };
+ 
+ 
+diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
+index 953236000..d94228504 100644
+--- a/coregrind/m_syswrap/syswrap-arm64-linux.c
++++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
+@@ -835,9 +835,14 @@ static SyscallTableEntry syscall_main_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),        // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+ 
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),       // 438
+    LINX_(__NR_faccessat2,        sys_faccessat2),        // 439
+ 
++   LINXY(__NR_epoll_pwait2,      sys_epoll_pwait2),      // 441
++
+    LINXY(__NR_memfd_secret,      sys_memfd_secret),      // 447
++
++   LINX_(__NR_fchmodat2,         sys_fchmodat2),         // 452
+ };
+ 
+ 
+diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c
+index efdae60e1..88b0593cd 100644
+--- a/coregrind/m_syswrap/syswrap-generic.c
++++ b/coregrind/m_syswrap/syswrap-generic.c
+@@ -3439,7 +3439,7 @@ POST(sys_newfstat)
+ #endif
+ 
+ #if !defined(VGO_solaris) && !defined(VGP_arm64_linux) && \
+-    !defined(VGP_nanomips_linux)
++    !defined(VGP_nanomips_linux) && !defined(VGP_loongarch64_linux)
+ static vki_sigset_t fork_saved_mask;
+ 
+ // In Linux, the sys_fork() function varies across architectures, but we
+diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c
+index 26f1fbee3..f84200cf4 100644
+--- a/coregrind/m_syswrap/syswrap-linux.c
++++ b/coregrind/m_syswrap/syswrap-linux.c
+@@ -310,6 +310,16 @@ static void run_a_thread_NORETURN ( Word tidW )
+          : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
+          : "memory" , "$t4", "$a0"
+       );
++#elif defined(VGP_loongarch64_linux)
++      asm volatile (
++         "st.w    %1,  %0 \n\t"     /* set tst->status = VgTs_Empty */
++         "li.w    $a7, %2 \n\t"     /* set a7 = __NR_exit */
++         "ld.w    $a0, %3 \n\t"     /* set a0 = tst->os_state.exitcode */
++         "syscall 0       \n\t"     /* exit(tst->os_state.exitcode) */
++         : "=m" (tst->status)
++         : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
++         : "memory", "a0", "a7"
++      );
+ #else
+ # error Unknown platform
+ #endif
+@@ -535,6 +545,13 @@ static SysRes clone_new_thread ( Word (*fn)(void *),
+       (ML_(start_thread_NORETURN), stack, flags, ctst,
+        child_tidptr, parent_tidptr, NULL);
+    res = VG_ (mk_SysRes_nanomips_linux) (ret);
++#elif defined(VGP_loongarch64_linux)
++   UInt ret = 0;
++   ctst->arch.vex.guest_R4 = 0;
++   ret = do_syscall_clone_loongarch64_linux
++      (ML_(start_thread_NORETURN), stack, flags, ctst,
++       child_tidptr, parent_tidptr, NULL);
++   res = VG_(mk_SysRes_loongarch64_linux)(ret);
+ #else
+ # error Unknown platform
+ #endif
+@@ -597,6 +614,8 @@ static SysRes setup_child_tls (ThreadId ctid, Addr tlsaddr)
+ #elif defined(VGP_mips32_linux) || defined(VGP_nanomips_linux)
+    ctst->arch.vex.guest_ULR = tlsaddr;
+    ctst->arch.vex.guest_r27 = tlsaddr;
++#elif defined(VGP_loongarch64_linux)
++   ctst->arch.vex.guest_R2 = tlsaddr;
+ #else
+ # error Unknown platform
+ #endif
+@@ -755,7 +774,7 @@ static SysRes ML_(do_fork_clone) ( ThreadId tid, UInt flags,
+     || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)	\
+     || defined(VGP_arm_linux) || defined(VGP_mips32_linux) \
+     || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+-    || defined(VGP_nanomips_linux)
++    || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+    res = VG_(do_syscall5)( __NR_clone, flags, 
+                            (UWord)NULL, (UWord)parent_tidptr, 
+                            (UWord)NULL, (UWord)child_tidptr );
+@@ -828,7 +847,8 @@ PRE(sys_clone)
+ #define PRA_CHILD_TIDPTR PRA5
+ #define ARG_TLS          ARG4
+ #define PRA_TLS          PRA4
+-#elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux)
++#elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \
++      || defined(VGP_loongarch64_linux)
+ #define ARG_CHILD_TIDPTR ARG4
+ #define PRA_CHILD_TIDPTR PRA4
+ #define ARG_TLS          ARG5
+@@ -2165,6 +2185,29 @@ POST(sys_epoll_pwait)
+    epoll_post_helper (tid, arrghs, status);
+ }
+ 
++PRE(sys_epoll_pwait2)
++{
++   *flags |= SfMayBlock;
++   PRINT("sys_epoll_pwait2 ( %ld, %#" FMT_REGWORD "x, %ld, %#"
++          FMT_REGWORD "x, %#" FMT_REGWORD "x, %" FMT_REGWORD "u )",
++         SARG1, ARG2, SARG3, ARG4, ARG5, ARG6);
++   PRE_REG_READ6(long, "epoll_pwait2",
++                 int, epfd, struct vki_epoll_event *, events,
++                 int, maxevents, const struct timespec64 *, timeout,
++                 vki_sigset_t *, sigmask, vki_size_t, sigsetsize);
++   /* Assume all (maxevents) events records should be (fully) writable. */
++   PRE_MEM_WRITE( "epoll_pwait2(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3);
++   /* epoll_pwait2 only supports 64bit timespec. */
++   if (ARG4)
++      pre_read_timespec64(tid, "epoll_pwait2(timeout)", ARG4);
++   if (ARG5)
++      PRE_MEM_READ( "epoll_pwait2(sigmask)", ARG5, sizeof(vki_sigset_t) );
++}
++POST(sys_epoll_pwait2)
++{
++   epoll_post_helper (tid, arrghs, status);
++}
++
+ PRE(sys_eventfd)
+ {
+    PRINT("sys_eventfd ( %" FMT_REGWORD "u )", ARG1);
+@@ -4317,9 +4360,11 @@ PRE(sys_sigaction)
+       PRE_MEM_READ( "sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler));
+       PRE_MEM_READ( "sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask));
+       PRE_MEM_READ( "sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags));
++#if !defined(VGP_loongarch64_linux)
+       if (ML_(safe_to_deref)(sa,sizeof(struct vki_old_sigaction))
+           && (sa->sa_flags & VKI_SA_RESTORER))
+          PRE_MEM_READ( "sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer));
++#endif
+    }
+ 
+    if (ARG3 != 0) {
+@@ -4349,7 +4394,9 @@ PRE(sys_sigaction)
+ 
+          new.ksa_handler = oldnew->ksa_handler;
+          new.sa_flags = oldnew->sa_flags;
++#if !defined(VGP_loongarch64_linux)
+          new.sa_restorer = oldnew->sa_restorer;
++#endif
+          convert_sigset_to_rt(&oldnew->sa_mask, &new.sa_mask);
+          newp = &new;
+       }
+@@ -4362,7 +4409,9 @@ PRE(sys_sigaction)
+ 
+          oldold->ksa_handler = oldp->ksa_handler;
+          oldold->sa_flags = oldp->sa_flags;
++#if !defined(VGP_loongarch64_linux)
+          oldold->sa_restorer = oldp->sa_restorer;
++#endif
+          oldold->sa_mask = oldp->sa_mask.sig[0];
+       }
+   }
+@@ -4435,10 +4484,13 @@ PRE(sys_rt_sigaction)
+       PRE_MEM_READ( "rt_sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler));
+       PRE_MEM_READ( "rt_sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask));
+       PRE_MEM_READ( "rt_sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags));
++#if !defined(VGP_loongarch64_linux)
+       if (ML_(safe_to_deref)(sa,sizeof(vki_sigaction_toK_t))
+           && (sa->sa_flags & VKI_SA_RESTORER))
+          PRE_MEM_READ( "rt_sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer));
++#endif
+    }
++
+    if (ARG3 != 0)
+       PRE_MEM_WRITE( "rt_sigaction(oldact)", ARG3, sizeof(vki_sigaction_fromK_t));
+ 
+@@ -6034,6 +6086,17 @@ PRE(sys_fchmodat)
+    PRE_MEM_RASCIIZ( "fchmodat(path)", ARG2 );
+ }
+ 
++PRE(sys_fchmodat2)
++{
++   PRINT("sys_fchmodat2 ( %ld, %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u, %"
++	  FMT_REGWORD "u )",
++         SARG1, ARG2, (HChar*)(Addr)ARG2, ARG3, ARG4);
++   PRE_REG_READ4(long, "fchmodat2",
++                 int, dfd, const char *, path, vki_mode_t, mode,
++                 unsigned int, flags);
++   PRE_MEM_RASCIIZ( "fchmodat2(pathname)", ARG2 );
++}
++
+ PRE(sys_faccessat)
+ {
+    PRINT("sys_faccessat ( %ld, %#" FMT_REGWORD "x(%s), %ld )",
+@@ -6790,7 +6853,8 @@ POST(sys_lookup_dcookie)
+ #endif
+ 
+ #if defined(VGP_amd64_linux) || defined(VGP_s390x_linux)        \
+-      || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++      || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++      || defined(VGP_loongarch64_linux)
+ PRE(sys_lookup_dcookie)
+ {
+    *flags |= SfMayBlock;
+@@ -13621,6 +13685,24 @@ POST(sys_pidfd_open)
+    }
+ }
+ 
++PRE(sys_pidfd_getfd)
++{
++   PRINT("sys_pidfd_getfd ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
++   PRE_REG_READ3(long, "pidfd_getfd", int, pidfd, int, targetfd, unsigned int, flags);
++}
++
++POST(sys_pidfd_getfd)
++{
++   vg_assert(SUCCESS);
++   if (!ML_(fd_allowed)(RES, "pidfd_getfd", tid, True)) {
++      VG_(close)(RES);
++      SET_STATUS_Failure( VKI_EMFILE );
++   } else {
++      if (VG_(clo_track_fds))
++         ML_(record_fd_open_nameless) (tid, RES);
++   }
++}
++
+ #undef PRE
+ #undef POST
+ 
+diff --git a/coregrind/m_syswrap/syswrap-loongarch64-linux.c b/coregrind/m_syswrap/syswrap-loongarch64-linux.c
+new file mode 100644
+index 000000000..108ddc465
+--- /dev/null
++++ b/coregrind/m_syswrap/syswrap-loongarch64-linux.c
+@@ -0,0 +1,648 @@
++
++/*---------------------------------------------------------------------*/
++/*--- Platform-specific syscalls stuff. syswrap-loongarch64-linux.c ---*/
++/*---------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#if defined(VGP_loongarch64_linux)
++
++#include "pub_core_basics.h"
++#include "pub_core_vki.h"
++#include "pub_core_vkiscnums.h"
++#include "pub_core_threadstate.h"
++#include "pub_core_aspacemgr.h"
++#include "pub_core_libcbase.h"
++#include "pub_core_libcassert.h"
++#include "pub_core_libcprint.h"
++#include "pub_core_libcsignal.h"
++#include "pub_core_options.h"
++#include "pub_core_scheduler.h"
++#include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
++#include "pub_core_syscall.h"
++#include "pub_core_syswrap.h"
++#include "pub_core_tooliface.h"
++
++#include "priv_types_n_macros.h"
++#include "priv_syswrap-generic.h"   /* for decls of generic wrappers */
++#include "priv_syswrap-linux.h"     /* for decls of linux-ish wrappers */
++#include "priv_syswrap-main.h"
++
++
++/* ---------------------------------------------------------------------
++   clone() handling
++   ------------------------------------------------------------------ */
++
++/* Call f(arg1), but first switch stacks, using 'stack' as the new
++   stack, and use 'retaddr' as f's return-to address.  Also, clear all
++   the integer registers before entering f. */
++__attribute__((noreturn))
++void ML_(call_on_new_stack_0_1) ( Addr stack,
++                                  Addr retaddr,
++                                  void (*f) (Word),
++                                  Word arg1 );
++asm (
++".text\n"
++".globl vgModuleLocal_call_on_new_stack_0_1 \n\t"
++"vgModuleLocal_call_on_new_stack_0_1:       \n\t"
++"   move $sp, $a0                           \n\t" /* sp = stack */
++"   move $ra, $a1                           \n\t" /* ra = retaddr */
++"   move $t0, $a2                           \n\t" /* t0 = f */
++"   move $a0, $a3                           \n\t" /* a0 = arg1 */
++"   move $a1, $zero                         \n\t" /* zero all GP regs */
++"   move $a2, $zero                         \n\t"
++"   move $a3, $zero                         \n\t"
++"   move $a4, $zero                         \n\t"
++"   move $a5, $zero                         \n\t"
++"   move $a6, $zero                         \n\t"
++"   move $a7, $zero                         \n\t"
++/* don't zero out t0 */
++"   move $t1, $zero                         \n\t"
++"   move $t2, $zero                         \n\t"
++"   move $t3, $zero                         \n\t"
++"   move $t4, $zero                         \n\t"
++"   move $t5, $zero                         \n\t"
++"   move $t6, $zero                         \n\t"
++"   move $t7, $zero                         \n\t"
++"   move $t8, $zero                         \n\t"
++"   jr   $t0                                \n\t" /* jump to f */
++".previous                                  \n\t"
++);
++
++/*
++   Perform a clone system call.  clone is strange because it has
++   fork()-like return-twice semantics, so it needs special
++   handling here.
++
++   Upon entry, we have:
++
++      Word   (*fn)(void*) in a0
++      void*  child_stack  in a1
++      int    flags        in a2
++      void*  arg          in a3
++      pid_t* child_tid    in a4
++      pid_t* parent_tid   in a5
++      void*  tls_ptr      in a6
++
++	System call requires:
++
++      unsigned long clone_flags   in a0
++      unsigned long newsp         in a1
++		int*          parent_tidptr in a2
++		int*          child_tidptr  in a3
++		unsigned long tls           in a4
++      int           __NR_clone    in a7
++*/
++
++#define __NR_CLONE VG_STRINGIFY(__NR_clone)
++#define __NR_EXIT  VG_STRINGIFY(__NR_exit)
++
++// See priv_syswrap-linux.h for arg profile.
++asm(
++".text                                     \n\t"
++".globl do_syscall_clone_loongarch64_linux \n\t"
++"do_syscall_clone_loongarch64_linux:       \n\t"
++/* Save ra */
++"   addi.d  $sp, $sp, -16                  \n\t"
++"   st.d    $ra, $sp, 0                    \n\t"
++
++/* Save fn and arg */
++"   addi.d  $a1, $a1, -16                  \n\t"
++"   st.d    $a0, $a1, 0                    \n\t" /* fn */
++"   st.d    $a3, $a1, 8                    \n\t" /* arg */
++
++/* Call sys_clone */
++"   move    $a0, $a2                       \n\t" /* flags */
++"   move    $a2, $a5                       \n\t" /* parent */
++"   move    $a3, $a4                       \n\t" /* child */
++"   move    $a4, $a6                       \n\t" /* tls */
++"   li.w    $a7, " __NR_CLONE "            \n\t"
++"   syscall 0                              \n\t"
++
++/* If we are a child? */
++"   bnez    $a0, 1f                        \n\t"
++
++/* Restore fn and arg */
++"   ld.d    $a1, $sp, 0                    \n\t" /* fn */
++"   ld.d    $a0, $sp, 8                    \n\t" /* arg */
++
++/* Call fn(arg) */
++"   jr      $a1                            \n\t"
++
++/* Call exit(a0) */
++"   li.w    $a7, " __NR_EXIT"              \n\t"
++"   syscall 0                              \n\t"
++
++/* If we are parent or error, just return to caller */
++"1:                                        \n\t"
++"   ld.d    $ra, $sp, 0                    \n\t"
++"   addi.d  $sp, $sp, 16                   \n\t"
++"   jr      $ra                            \n\t"
++".previous                                 \n\t"
++);
++
++#undef __NR_CLONE
++#undef __NR_EXIT
++
++/* ---------------------------------------------------------------------
++   More thread stuff
++   ------------------------------------------------------------------ */
++
++// loongarch64 doesn't have any architecture specific thread stuff that
++// needs to be cleaned up
++void VG_(cleanup_thread) ( ThreadArchState* arch )
++{
++}
++
++/* ---------------------------------------------------------------------
++   PRE/POST wrappers for loongarch64/Linux-specific syscalls
++   ------------------------------------------------------------------ */
++
++#define PRE(name)       DEFN_PRE_TEMPLATE(loongarch64_linux, name)
++#define POST(name)      DEFN_POST_TEMPLATE(loongarch64_linux, name)
++
++/* Add prototypes for the wrappers declared here, so that gcc doesn't
++   harass us for not having prototypes.  Really this is a kludge --
++   the right thing to do is to make these wrappers 'static' since they
++   aren't visible outside this file, but that requires even more macro
++   magic. */
++DECL_TEMPLATE(loongarch64_linux, sys_ptrace);
++DECL_TEMPLATE(loongarch64_linux, sys_mmap);
++DECL_TEMPLATE(loongarch64_linux, sys_rt_sigreturn);
++
++PRE(sys_ptrace)
++{
++   PRINT("sys_ptrace ( %ld, %ld, %lx, %lx )",
++         SARG1, SARG2, ARG3, ARG4);
++   PRE_REG_READ4(int, "ptrace",
++                 long, request,
++                 long, pid,
++                 unsigned long, addr,
++                 unsigned long, data);
++   switch (ARG1) {
++      case VKI_PTRACE_PEEKTEXT:
++      case VKI_PTRACE_PEEKDATA:
++      case VKI_PTRACE_PEEKUSR:
++         PRE_MEM_WRITE("ptrace(peek)", ARG4, sizeof(long));
++         break;
++      case VKI_PTRACE_GETEVENTMSG:
++         PRE_MEM_WRITE("ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
++         break;
++      case VKI_PTRACE_GETSIGINFO:
++         PRE_MEM_WRITE("ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
++         break;
++      case VKI_PTRACE_SETSIGINFO:
++         PRE_MEM_READ("ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
++         break;
++      case VKI_PTRACE_GETREGSET:
++         ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
++         break;
++      default:
++         break;
++   }
++}
++
++POST(sys_ptrace)
++{
++   switch (ARG1) {
++      case VKI_PTRACE_TRACEME:
++         ML_(linux_POST_traceme)(tid);
++         break;
++      case VKI_PTRACE_PEEKTEXT:
++      case VKI_PTRACE_PEEKDATA:
++      case VKI_PTRACE_PEEKUSR:
++         POST_MEM_WRITE (ARG4, sizeof(long));
++         break;
++      case VKI_PTRACE_GETEVENTMSG:
++         POST_MEM_WRITE (ARG4, sizeof(unsigned long));
++      break;
++      case VKI_PTRACE_GETSIGINFO:
++         POST_MEM_WRITE (ARG4, sizeof(vki_siginfo_t));
++         break;
++      case VKI_PTRACE_GETREGSET:
++         ML_(linux_POST_getregset)(tid, ARG3, ARG4);
++         break;
++      default:
++      break;
++   }
++}
++
++PRE(sys_mmap)
++{
++   SysRes r;
++
++   PRINT("sys_mmap ( %#lx, %lu, %lu, %#lx, %lu, %lu )",
++         ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
++   PRE_REG_READ6(long, "mmap",
++                 unsigned long, addr, unsigned long, len,
++                 unsigned long, prot, unsigned long, flags,
++                 unsigned long, fd,   vki_off_t,     offset);
++
++   r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
++   SET_STATUS_from_SysRes(r);
++}
++
++PRE(sys_rt_sigreturn)
++{
++   /* See comments on PRE(sys_rt_sigreturn) in syswrap-loongarch64-linux.c for
++      an explanation of what follows. */
++   ThreadState* tst;
++   PRINT("rt_sigreturn ( )");
++
++   vg_assert(VG_(is_valid_tid)(tid));
++   vg_assert(tid >= 1 && tid < VG_N_THREADS);
++   vg_assert(VG_(is_running_thread)(tid));
++
++   tst = VG_(get_ThreadState)(tid);
++
++   /* This is only so that the PC is (might be) useful to report if
++      something goes wrong in the sigreturn */
++   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
++
++   /* Restore register state from frame and remove it */
++   VG_(sigframe_destroy)(tid, True);
++
++   /* Tell the driver not to update the guest state with the "result",
++      and set a bogus result to keep it happy. */
++   *flags |= SfNoWriteResult;
++   SET_STATUS_Success(0);
++
++   /* Check to see if any signals arose as a result of this. */
++   *flags |= SfPollAfter;
++}
++
++#undef PRE
++#undef POST
++
++/* ---------------------------------------------------------------------
++   The loongarch64/Linux syscall table
++   ------------------------------------------------------------------ */
++
++#define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(loongarch64_linux, sysno, name)
++#define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(loongarch64_linux, sysno, name)
++
++// This table maps from __NR_xxx syscall numbers (from
++// linux/include/uapi/asm-generic/unistd.h) to the appropriate PRE/POST
++// sys_foo() wrappers on loongarch64 (as per sys_call_table in
++// linux/arch/loongarch/kernel/syscall.c).
++//
++// For those syscalls not handled by Valgrind, the annotation indicate its
++// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
++// (unknown).
++
++static SyscallTableEntry syscall_main_table[] = {
++   LINXY(__NR_io_setup,                sys_io_setup),                // 0
++   LINX_(__NR_io_destroy,              sys_io_destroy),              // 1
++   LINX_(__NR_io_submit,               sys_io_submit),               // 2
++   LINXY(__NR_io_cancel,               sys_io_cancel),               // 3
++   LINXY(__NR_io_getevents,            sys_io_getevents),            // 4
++   LINX_(__NR_setxattr,                sys_setxattr),                // 5
++   LINX_(__NR_lsetxattr,               sys_lsetxattr),               // 6
++   LINX_(__NR_fsetxattr,               sys_fsetxattr),               // 7
++   LINXY(__NR_getxattr,                sys_getxattr),                // 8
++   LINXY(__NR_lgetxattr,               sys_lgetxattr),               // 9
++   LINXY(__NR_fgetxattr,               sys_fgetxattr),               // 10
++   LINXY(__NR_listxattr,               sys_listxattr),               // 11
++   LINXY(__NR_llistxattr,              sys_llistxattr),              // 12
++   LINXY(__NR_flistxattr,              sys_flistxattr),              // 13
++   LINX_(__NR_removexattr,             sys_removexattr),             // 14
++   LINX_(__NR_lremovexattr,            sys_lremovexattr),            // 15
++   LINX_(__NR_fremovexattr,            sys_fremovexattr),            // 16
++   GENXY(__NR_getcwd,                  sys_getcwd),                  // 17
++   LINXY(__NR_lookup_dcookie,          sys_lookup_dcookie),          // 18
++   LINXY(__NR_eventfd2,                sys_eventfd2),                // 19
++   LINXY(__NR_epoll_create1,           sys_epoll_create1),           // 20
++   LINX_(__NR_epoll_ctl,               sys_epoll_ctl),               // 21
++   LINXY(__NR_epoll_pwait,             sys_epoll_pwait),             // 22
++   GENXY(__NR_dup,                     sys_dup),                     // 23
++   LINXY(__NR_dup3,                    sys_dup3),                    // 24
++   LINXY(__NR3264_fcntl,               sys_fcntl),                   // 25
++   LINXY(__NR_inotify_init1,           sys_inotify_init1),           // 26
++   LINX_(__NR_inotify_add_watch,       sys_inotify_add_watch),       // 27
++   LINX_(__NR_inotify_rm_watch,        sys_inotify_rm_watch),        // 28
++   LINXY(__NR_ioctl,                   sys_ioctl),                   // 29
++   LINX_(__NR_ioprio_set,              sys_ioprio_set),              // 30
++   LINX_(__NR_ioprio_get,              sys_ioprio_get),              // 31
++   GENX_(__NR_flock,                   sys_flock),                   // 32
++   LINX_(__NR_mknodat,                 sys_mknodat),                 // 33
++   LINX_(__NR_mkdirat,                 sys_mkdirat),                 // 34
++   LINX_(__NR_unlinkat,                sys_unlinkat),                // 35
++   LINX_(__NR_symlinkat,               sys_symlinkat),               // 36
++   LINX_(__NR_linkat,                  sys_linkat),                  // 37
++   //   (__NR_renameat,                sys_renameat),                // 38
++   LINX_(__NR_umount2,                 sys_umount),                  // 39
++   LINX_(__NR_mount,                   sys_mount),                   // 40
++   LINX_(__NR_pivot_root,              sys_pivot_root),              // 41
++   //   (__NR_nfsservctl,              sys_ni_syscall),              // 42
++   GENXY(__NR3264_statfs,              sys_statfs),                  // 43
++   GENXY(__NR3264_fstatfs,             sys_fstatfs),                 // 44
++   GENX_(__NR3264_truncate,            sys_truncate),                // 45
++   GENX_(__NR3264_ftruncate,           sys_ftruncate),               // 46
++   LINX_(__NR_fallocate,               sys_fallocate),               // 47
++   LINX_(__NR_faccessat,               sys_faccessat),               // 48
++   GENX_(__NR_chdir,                   sys_chdir),                   // 49
++   GENX_(__NR_fchdir,                  sys_fchdir),                  // 50
++   GENX_(__NR_chroot,                  sys_chroot),                  // 51
++   GENX_(__NR_fchmod,                  sys_fchmod),                  // 52
++   LINX_(__NR_fchmodat,                sys_fchmodat),                // 53
++   LINX_(__NR_fchownat,                sys_fchownat),                // 54
++   GENX_(__NR_fchown,                  sys_fchown),                  // 55
++   LINXY(__NR_openat,                  sys_openat),                  // 56
++   GENXY(__NR_close,                   sys_close),                   // 57
++   LINX_(__NR_vhangup,                 sys_vhangup),                 // 58
++   LINXY(__NR_pipe2,                   sys_pipe2),                   // 59
++   LINX_(__NR_quotactl,                sys_quotactl),                // 60
++   GENXY(__NR_getdents64,              sys_getdents64),              // 61
++   LINX_(__NR3264_lseek,               sys_lseek),                   // 62
++   GENXY(__NR_read,                    sys_read),                    // 63
++   GENX_(__NR_write,                   sys_write),                   // 64
++   GENXY(__NR_readv,                   sys_readv),                   // 65
++   GENX_(__NR_writev,                  sys_writev),                  // 66
++   GENXY(__NR_pread64,                 sys_pread64),                 // 67
++   GENX_(__NR_pwrite64,                sys_pwrite64),                // 68
++   LINXY(__NR_preadv,                  sys_preadv),                  // 69
++   LINX_(__NR_pwritev,                 sys_pwritev),                 // 70
++   LINXY(__NR3264_sendfile,            sys_sendfile),                // 71
++   LINXY(__NR_pselect6,                sys_pselect6),                // 72
++   LINXY(__NR_ppoll,                   sys_ppoll),                   // 73
++   LINXY(__NR_signalfd4,               sys_signalfd4),               // 74
++   LINX_(__NR_vmsplice,                sys_vmsplice),                // 75
++   LINX_(__NR_splice,                  sys_splice),                  // 76
++   LINX_(__NR_tee,                     sys_tee),                     // 77
++   LINX_(__NR_readlinkat,              sys_readlinkat),              // 78
++   //   (__NR3264_fstatat,             sys_newfstatat),              // 79
++   //   (__NR3264_fstat,               sys_newfstat),                // 80
++   GENX_(__NR_sync,                    sys_sync),                    // 81
++   GENX_(__NR_fsync,                   sys_fsync),                   // 82
++   GENX_(__NR_fdatasync,               sys_fdatasync),               // 83
++   LINX_(__NR_sync_file_range,         sys_sync_file_range),         // 84
++   LINXY(__NR_timerfd_create,          sys_timerfd_create),          // 85
++   LINXY(__NR_timerfd_settime,         sys_timerfd_settime),         // 86
++   LINXY(__NR_timerfd_gettime,         sys_timerfd_gettime),         // 87
++   LINX_(__NR_utimensat,               sys_utimensat),               // 88
++   GENX_(__NR_acct,                    sys_acct),                    // 89
++   LINXY(__NR_capget,                  sys_capget),                  // 90
++   LINX_(__NR_capset,                  sys_capset),                  // 91
++   LINX_(__NR_personality,             sys_personality),             // 92
++   GENX_(__NR_exit,                    sys_exit),                    // 93
++   LINX_(__NR_exit_group,              sys_exit_group),              // 94
++   LINXY(__NR_waitid,                  sys_waitid),                  // 95
++   LINX_(__NR_set_tid_address,         sys_set_tid_address),         // 96
++   LINX_(__NR_unshare,                 sys_unshare),                 // 97
++   LINXY(__NR_futex,                   sys_futex),                   // 98
++   LINX_(__NR_set_robust_list,         sys_set_robust_list),         // 99
++   LINXY(__NR_get_robust_list,         sys_get_robust_list),         // 100
++   GENXY(__NR_nanosleep,               sys_nanosleep),               // 101
++   GENXY(__NR_getitimer,               sys_getitimer),               // 102
++   GENXY(__NR_setitimer,               sys_setitimer),               // 103
++   //   (__NR_kexec_load,              sys_kexec_load),              // 104
++   LINX_(__NR_init_module,             sys_init_module),             // 105
++   LINX_(__NR_delete_module,           sys_delete_module),           // 106
++   LINXY(__NR_timer_create,            sys_timer_create),            // 107
++   LINXY(__NR_timer_gettime,           sys_timer_gettime),           // 108
++   LINX_(__NR_timer_getoverrun,        sys_timer_getoverrun),        // 109
++   LINXY(__NR_timer_settime,           sys_timer_settime),           // 110
++   LINX_(__NR_timer_delete,            sys_timer_delete),            // 111
++   LINX_(__NR_clock_settime,           sys_clock_settime),           // 112
++   LINXY(__NR_clock_gettime,           sys_clock_gettime),           // 113
++   LINXY(__NR_clock_getres,            sys_clock_getres),            // 114
++   LINXY(__NR_clock_nanosleep,         sys_clock_nanosleep),         // 115
++   LINXY(__NR_syslog,                  sys_syslog),                  // 116
++   PLAXY(__NR_ptrace,                  sys_ptrace),                  // 117
++   LINXY(__NR_sched_setparam,          sys_sched_setparam),          // 118
++   LINX_(__NR_sched_setscheduler,      sys_sched_setscheduler),      // 119
++   LINX_(__NR_sched_getscheduler,      sys_sched_getscheduler),      // 120
++   LINXY(__NR_sched_getparam,          sys_sched_getparam),          // 121
++   LINX_(__NR_sched_setaffinity,       sys_sched_setaffinity),       // 122
++   LINXY(__NR_sched_getaffinity,       sys_sched_getaffinity),       // 123
++   LINX_(__NR_sched_yield,             sys_sched_yield),             // 124
++   LINX_(__NR_sched_get_priority_max,  sys_sched_get_priority_max),  // 125
++   LINX_(__NR_sched_get_priority_min,  sys_sched_get_priority_min),  // 126
++   LINXY(__NR_sched_rr_get_interval,   sys_sched_rr_get_interval),   // 127
++   //   (__NR_restart_syscall,         sys_restart_syscall),         // 128
++   GENX_(__NR_kill,                    sys_kill),                    // 129
++   LINXY(__NR_tkill,                   sys_tkill),                   // 130
++   LINX_(__NR_tgkill,                  sys_tgkill),                  // 131
++   GENXY(__NR_sigaltstack,             sys_sigaltstack),             // 132
++   LINX_(__NR_rt_sigsuspend,           sys_rt_sigsuspend),           // 133
++   LINXY(__NR_rt_sigaction,            sys_rt_sigaction),            // 134
++   LINXY(__NR_rt_sigprocmask,          sys_rt_sigprocmask),          // 135
++   LINXY(__NR_rt_sigpending,           sys_rt_sigpending),           // 136
++   LINXY(__NR_rt_sigtimedwait,         sys_rt_sigtimedwait),         // 137
++   LINXY(__NR_rt_sigqueueinfo,         sys_rt_sigqueueinfo),         // 138
++   PLAX_(__NR_rt_sigreturn,            sys_rt_sigreturn),            // 139
++   GENX_(__NR_setpriority,             sys_setpriority),             // 140
++   GENX_(__NR_getpriority,             sys_getpriority),             // 141
++   //   (__NR_reboot,                  sys_reboot),                  // 142
++   GENX_(__NR_setregid,                sys_setregid),                // 143
++   GENX_(__NR_setgid,                  sys_setgid),                  // 144
++   GENX_(__NR_setreuid,                sys_setreuid),                // 145
++   GENX_(__NR_setuid,                  sys_setuid),                  // 146
++   LINX_(__NR_setresuid,               sys_setresuid),               // 147
++   LINXY(__NR_getresuid,               sys_getresuid),               // 148
++   LINX_(__NR_setresgid,               sys_setresgid),               // 149
++   LINXY(__NR_getresgid,               sys_getresgid),               // 150
++   LINX_(__NR_setfsuid,                sys_setfsuid),                // 151
++   LINX_(__NR_setfsgid,                sys_setfsgid),                // 152
++   GENXY(__NR_times,                   sys_times),                   // 153
++   GENX_(__NR_setpgid,                 sys_setpgid),                 // 154
++   GENX_(__NR_getpgid,                 sys_getpgid),                 // 155
++   GENX_(__NR_getsid,                  sys_getsid),                  // 156
++   GENX_(__NR_setsid,                  sys_setsid),                  // 157
++   GENXY(__NR_getgroups,               sys_getgroups),               // 158
++   GENX_(__NR_setgroups,               sys_setgroups),               // 159
++   GENXY(__NR_uname,                   sys_newuname),                // 160
++   GENX_(__NR_sethostname,             sys_sethostname),             // 161
++   //   (__NR_setdomainname,           sys_setdomainname),           // 162
++   //   (__NR_getrlimit,               sys_old_getrlimit),           // 163
++   //   (__NR_setrlimit,               sys_setrlimit),               // 164
++   GENXY(__NR_getrusage,               sys_getrusage),               // 165
++   GENX_(__NR_umask,                   sys_umask),                   // 166
++   LINXY(__NR_prctl,                   sys_prctl),                   // 167
++   LINXY(__NR_getcpu,                  sys_getcpu),                  // 168
++   GENXY(__NR_gettimeofday,            sys_gettimeofday),            // 169
++   GENX_(__NR_settimeofday,            sys_settimeofday),            // 170
++   LINXY(__NR_adjtimex,                sys_adjtimex),                // 171
++   GENX_(__NR_getpid,                  sys_getpid),                  // 172
++   GENX_(__NR_getppid,                 sys_getppid),                 // 173
++   GENX_(__NR_getuid,                  sys_getuid),                  // 174
++   GENX_(__NR_geteuid,                 sys_geteuid),                 // 175
++   GENX_(__NR_getgid,                  sys_getgid),                  // 176
++   GENX_(__NR_getegid,                 sys_getegid),                 // 177
++   LINX_(__NR_gettid,                  sys_gettid),                  // 178
++   LINXY(__NR_sysinfo,                 sys_sysinfo),                 // 179
++   LINXY(__NR_mq_open,                 sys_mq_open),                 // 180
++   LINX_(__NR_mq_unlink,               sys_mq_unlink),               // 181
++   LINX_(__NR_mq_timedsend,            sys_mq_timedsend),            // 182
++   LINXY(__NR_mq_timedreceive,         sys_mq_timedreceive),         // 183
++   LINX_(__NR_mq_notify,               sys_mq_notify),               // 184
++   LINXY(__NR_mq_getsetattr,           sys_mq_getsetattr),           // 185
++   LINX_(__NR_msgget,                  sys_msgget),                  // 186
++   LINXY(__NR_msgctl,                  sys_msgctl),                  // 187
++   LINXY(__NR_msgrcv,                  sys_msgrcv),                  // 188
++   LINX_(__NR_msgsnd,                  sys_msgsnd),                  // 189
++   LINX_(__NR_semget,                  sys_semget),                  // 190
++   LINXY(__NR_semctl,                  sys_semctl),                  // 191
++   LINX_(__NR_semtimedop,              sys_semtimedop),              // 192
++   LINX_(__NR_semop,                   sys_semop),                   // 193
++   LINX_(__NR_shmget,                  sys_shmget),                  // 194
++   LINXY(__NR_shmctl,                  sys_shmctl),                  // 195
++   LINXY(__NR_shmat,                   sys_shmat),                   // 196
++   LINXY(__NR_shmdt,                   sys_shmdt),                   // 197
++   LINXY(__NR_socket,                  sys_socket),                  // 198
++   LINXY(__NR_socketpair,              sys_socketpair),              // 199
++   LINX_(__NR_bind,                    sys_bind),                    // 200
++   LINX_(__NR_listen,                  sys_listen),                  // 201
++   LINXY(__NR_accept,                  sys_accept),                  // 202
++   LINX_(__NR_connect,                 sys_connect),                 // 203
++   LINXY(__NR_getsockname,             sys_getsockname),             // 204
++   LINXY(__NR_getpeername,             sys_getpeername),             // 205
++   LINX_(__NR_sendto,                  sys_sendto),                  // 206
++   LINXY(__NR_recvfrom,                sys_recvfrom),                // 207
++   LINX_(__NR_setsockopt,              sys_setsockopt),              // 208
++   LINXY(__NR_getsockopt,              sys_getsockopt),              // 209
++   LINX_(__NR_shutdown,                sys_shutdown),                // 210
++   LINX_(__NR_sendmsg,                 sys_sendmsg),                 // 211
++   LINXY(__NR_recvmsg,                 sys_recvmsg),                 // 212
++   LINX_(__NR_readahead,               sys_readahead),               // 213
++   GENX_(__NR_brk,                     sys_brk),                     // 214
++   GENXY(__NR_munmap,                  sys_munmap),                  // 215
++   GENX_(__NR_mremap,                  sys_mremap),                  // 216
++   LINX_(__NR_add_key,                 sys_add_key),                 // 217
++   LINX_(__NR_request_key,             sys_request_key),             // 218
++   LINXY(__NR_keyctl,                  sys_keyctl),                  // 219
++   LINX_(__NR_clone,                   sys_clone),                   // 220
++   GENX_(__NR_execve,                  sys_execve),                  // 221
++   PLAX_(__NR3264_mmap,                sys_mmap),                    // 222
++   LINX_(__NR3264_fadvise64,           sys_fadvise64),               // 223
++   //   (__NR_swapon,                  sys_swapon),                  // 224
++   //   (__NR_swapoff,                 sys_swapoff),                 // 225
++   GENXY(__NR_mprotect,                sys_mprotect),                // 226
++   GENX_(__NR_msync,                   sys_msync),                   // 227
++   GENX_(__NR_mlock,                   sys_mlock),                   // 228
++   GENX_(__NR_munlock,                 sys_munlock),                 // 229
++   GENX_(__NR_mlockall,                sys_mlockall),                // 230
++   LINX_(__NR_munlockall,              sys_munlockall),              // 231
++   GENXY(__NR_mincore,                 sys_mincore),                 // 232
++   GENX_(__NR_madvise,                 sys_madvise),                 // 233
++   //   (__NR_remap_file_pages,        sys_remap_file_pages),        // 234
++   LINX_(__NR_mbind,                   sys_mbind),                   // 235
++   LINXY(__NR_get_mempolicy,           sys_get_mempolicy),           // 236
++   LINX_(__NR_set_mempolicy,           sys_set_mempolicy),           // 237
++   //   (__NR_migrate_pages,           sys_migrate_pages),           // 238
++   LINXY(__NR_move_pages,              sys_move_pages),              // 239
++   LINXY(__NR_rt_tgsigqueueinfo,       sys_rt_tgsigqueueinfo),       // 240
++   LINXY(__NR_perf_event_open,         sys_perf_event_open),         // 241
++   LINXY(__NR_accept4,                 sys_accept4),                 // 242
++   LINXY(__NR_recvmmsg,                sys_recvmmsg),                // 243
++
++   GENXY(__NR_wait4,                   sys_wait4),                   // 260
++   LINXY(__NR_prlimit64,               sys_prlimit64),               // 261
++   LINXY(__NR_fanotify_init,           sys_fanotify_init),           // 262
++   LINX_(__NR_fanotify_mark,           sys_fanotify_mark),           // 263
++   LINXY(__NR_name_to_handle_at,       sys_name_to_handle_at),       // 264
++   LINXY(__NR_open_by_handle_at,       sys_open_by_handle_at),       // 265
++   LINXY(__NR_clock_adjtime,           sys_clock_adjtime),           // 266
++   LINX_(__NR_syncfs,                  sys_syncfs),                  // 267
++   LINX_(__NR_setns,                   sys_setns),                   // 268
++   LINXY(__NR_sendmmsg,                sys_sendmmsg),                // 269
++   LINXY(__NR_process_vm_readv,        sys_process_vm_readv),        // 270
++   LINX_(__NR_process_vm_writev,       sys_process_vm_writev),       // 271
++   LINX_(__NR_kcmp,                    sys_kcmp),                    // 272
++   LINX_(__NR_finit_module,            sys_finit_module),            // 273
++   LINX_(__NR_sched_setattr,           sys_sched_setattr),           // 274
++   LINXY(__NR_sched_getattr,           sys_sched_getattr),           // 275
++   LINX_(__NR_renameat2,               sys_renameat2),               // 276
++   //   (__NR_seccomp,                 sys_seccomp),                 // 277
++   LINXY(__NR_getrandom,               sys_getrandom),               // 278
++   LINXY(__NR_memfd_create,            sys_memfd_create),            // 279
++   LINXY(__NR_bpf,                     sys_bpf),                     // 280
++   LINX_(__NR_execveat,                sys_execveat),                // 281
++   //   (__NR_userfaultfd,             sys_userfaultfd),             // 282
++   LINX_(__NR_membarrier,              sys_membarrier),              // 283
++   //   (__NR_mlock2,                  sys_mlock2),                  // 284
++   LINX_(__NR_copy_file_range,         sys_copy_file_range),         // 285
++   LINXY(__NR_preadv2,                 sys_preadv2),                 // 286
++   LINX_(__NR_pwritev2,                sys_pwritev2),                // 287
++   //   (__NR_pkey_mprotect,           sys_pkey_mprotect),           // 288
++   //   (__NR_pkey_alloc,              sys_pkey_alloc),              // 289
++   //   (__NR_pkey_free,               sys_pkey_free),               // 290
++   LINXY(__NR_statx,                   sys_statx),                   // 291
++   //   (__NR_io_pgetevents,           sys_io_pgetevents),           // 292
++   //   (__NR_rseq,                    sys_rseq),                    // 293
++   //   (__NR_kexec_file_load,         sys_kexec_file_load),         // 294
++
++   //   (__NR_pidfd_send_signal,       sys_pidfd_send_signal),       // 424
++   LINXY(__NR_io_uring_setup,          sys_io_uring_setup),          // 425
++   LINXY(__NR_io_uring_enter,          sys_io_uring_enter),          // 426
++   LINXY(__NR_io_uring_register,       sys_io_uring_register),       // 427
++   //   (__NR_open_tree,               sys_open_tree),               // 428
++   //   (__NR_move_mount,              sys_move_mount),              // 429
++   //   (__NR_fsopen,                  sys_fsopen),                  // 430
++   //   (__NR_fsconfig,                sys_fsconfig),                // 431
++   //   (__NR_fsmount,                 sys_fsmount),                 // 432
++   //   (__NR_fspick,                  sys_fspick),                  // 433
++   //   (__NR_pidfd_open,              sys_pidfd_open),              // 434
++   GENX_(__NR_clone3,                  sys_ni_syscall),              // 435
++   LINXY(__NR_close_range,             sys_close_range),             // 436
++   LINXY(__NR_openat2,                 sys_openat2),                 // 437
++   LINXY(__NR_pidfd_getfd,             sys_pidfd_getfd),             // 438
++   LINX_(__NR_faccessat2,              sys_faccessat2),              // 439
++   //   (__NR_process_madvise,         sys_process_madvise),         // 440
++   LINXY(__NR_epoll_pwait2,            sys_epoll_pwait2),            // 441
++   //   (__NR_mount_setattr,           sys_mount_setattr),           // 442
++   //   (__NR_quotactl_fd,             sys_quotactl_fd),             // 443
++   //   (__NR_landlock_create_ruleset, sys_landlock_create_ruleset), // 444
++   //   (__NR_landlock_add_rule,       sys_landlock_add_rule),       // 445
++   //   (__NR_landlock_restrict_self,  sys_landlock_restrict_self),  // 446
++   //   (__NR_memfd_secret,            sys_memfd_secret),            // 447
++   //   (__NR_process_mrelease,        sys_process_mrelease),        // 448
++   //   (__NR_futex_waitv,             sys_futex_waitv)              // 449
++   //   (__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)  // 450
++   //   (__NR_cachestat,               sys_cachestat)                // 451
++   LINX_(__NR_fchmodat2,               sys_fchmodat2)                // 452
++};
++
++SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
++{
++   const UInt syscall_main_table_size
++      = sizeof(syscall_main_table) / sizeof(syscall_main_table[0]);
++
++   /* Is it in the contiguous initial section of the table? */
++   if (sysno < syscall_main_table_size) {
++      SyscallTableEntry* sys = &syscall_main_table[sysno];
++      if (sys->before == NULL)
++         return NULL; /* no entry */
++      else
++         return sys;
++   }
++
++   /* Can't find a wrapper */
++   return NULL;
++}
++
++#endif  /* defined(VGP_loongarch64_linux) */
++
++/*--------------------------------------------------------------------*/
++/*--- end                              syswrap-loongarch64-linux.c ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
+index 4f8c0fe1c..b4f95e570 100644
+--- a/coregrind/m_syswrap/syswrap-main.c
++++ b/coregrind/m_syswrap/syswrap-main.c
+@@ -60,20 +60,21 @@
+ /* Useful info which needs to be recorded somewhere:
+    Use of registers in syscalls is:
+ 
+-          NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
++               NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
+    LINUX:
+-   x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
+-   amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
+-   ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+-   ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+-   arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
+-   mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
+-   mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
+-   arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
++   x86         eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
++   amd64       rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
++   ppc32       r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
++   ppc64       r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
++   arm         r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
++   mips32      v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
++   mips64      v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
++   arm64       x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
++   loongarch64 r11   r4   r5   r6   r7   r8   r9   n/a  n/a  r4        (== ARG1)
+ 
+    FreeBSD:
+-   x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
+-   amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
++   x86         eax   +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
++   amd64       rax   rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
+ 
+    On s390x the svc instruction is used for system calls. The system call
+    number is encoded in the instruction (8 bit immediate field). Since Linux
+@@ -703,6 +704,17 @@ void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
+    canonical->arg6  = gst->guest_r9;    // a5
+    canonical->arg7  = gst->guest_r10;   // a6
+    canonical->arg8  = gst->guest_r11;   // a7
++
++#elif defined(VGP_loongarch64_linux)
++   VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla;
++   canonical->sysno = gst->guest_R11;   // a7
++   canonical->arg1  = gst->guest_R4;    // a0
++   canonical->arg2  = gst->guest_R5;    // a1
++   canonical->arg3  = gst->guest_R6;    // a2
++   canonical->arg4  = gst->guest_R7;    // a3
++   canonical->arg5  = gst->guest_R8;    // a4
++   canonical->arg6  = gst->guest_R9;    // a5
++
+ #elif defined(VGP_x86_darwin)
+    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+    UWord *stack = (UWord *)gst->guest_ESP;
+@@ -1126,6 +1138,16 @@ void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
+    gst->guest_r10 = canonical->arg7;
+    gst->guest_r11 = canonical->arg8;
+ 
++#elif defined(VGP_loongarch64_linux)
++   VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla;
++   gst->guest_R11 = canonical->sysno;
++   gst->guest_R4  = canonical->arg1;
++   gst->guest_R5  = canonical->arg2;
++   gst->guest_R6  = canonical->arg3;
++   gst->guest_R7  = canonical->arg4;
++   gst->guest_R8  = canonical->arg5;
++   gst->guest_R9  = canonical->arg6;
++
+ #elif defined(VGP_x86_solaris)
+    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+    UWord *stack = (UWord *)gst->guest_ESP;
+@@ -1240,6 +1262,13 @@ void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
+    RegWord  a0 = gst->guest_r4;    // a0
+    canonical->sres = VG_(mk_SysRes_nanomips_linux)(a0);
+    canonical->what = SsComplete;
++
++#  elif defined(VGP_loongarch64_linux)
++   VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla;
++   ULong                     a0  = gst->guest_R4;    // a0
++   canonical->sres = VG_(mk_SysRes_loongarch64_linux)(a0);
++   canonical->what = SsComplete;
++
+ #  elif defined(VGP_amd64_freebsd)
+    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
+    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+@@ -1606,6 +1635,20 @@ void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
+    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+              OFFSET_mips32_r4, sizeof(UWord) );
+ 
++#  elif defined(VGP_loongarch64_linux)
++   VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla;
++   vg_assert(canonical->what == SsComplete);
++   if (sr_isError(canonical->sres)) {
++      /* This isn't exactly right, in that really a Failure with res
++         not in the range 1 .. 4095 is unrepresentable in the
++         Linux-loongarch64 scheme.  Oh well. */
++      gst->guest_R4 = - (Long)sr_Err(canonical->sres);
++   } else {
++      gst->guest_R4 = sr_Res(canonical->sres);
++   }
++   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
++             OFFSET_loongarch64_R4, sizeof(UWord) );
++
+ #  elif defined(VGP_x86_solaris)
+    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+    SysRes sres = canonical->sres;
+@@ -1855,6 +1898,15 @@ void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
+    layout->s_arg7   = sizeof(UWord) * 1;
+    layout->s_arg8   = sizeof(UWord) * 2;
+ 
++#elif defined(VGP_loongarch64_linux)
++   layout->o_sysno  = OFFSET_loongarch64_R11;
++   layout->o_arg1   = OFFSET_loongarch64_R4;
++   layout->o_arg2   = OFFSET_loongarch64_R5;
++   layout->o_arg3   = OFFSET_loongarch64_R6;
++   layout->o_arg4   = OFFSET_loongarch64_R7;
++   layout->o_arg5   = OFFSET_loongarch64_R8;
++   layout->o_arg6   = OFFSET_loongarch64_R9;
++
+ #else
+ #  error "getSyscallLayout: unknown arch"
+ #endif
+@@ -2899,6 +2951,25 @@ void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
+          arch->vex.guest_PC -= 2;
+       }
+    }
++
++#elif defined(VGP_loongarch64_linux)
++   arch->vex.guest_PC -= 4;             // sizeof(loongarch instr)
++
++   /* Make sure our caller is actually sane, and we're really backing
++      back over a syscall.
++
++      syscall 0 == 00 2B 00 00
++   */
++   {
++      UChar *p = (UChar *)(Addr)(arch->vex.guest_PC);
++      if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x2B || p[3] != 0x00)
++         VG_(message)(Vg_DebugMsg,
++                      "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
++                      (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
++
++      vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x2B && p[3] == 0x00);
++   }
++
+ #elif defined(VGP_x86_solaris)
+    arch->vex.guest_EIP -= 2;   // sizeof(int $0x91) or sizeof(syscall)
+ 
+diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c b/coregrind/m_syswrap/syswrap-mips32-linux.c
+index de27998b3..47d0a2fa3 100644
+--- a/coregrind/m_syswrap/syswrap-mips32-linux.c
++++ b/coregrind/m_syswrap/syswrap-mips32-linux.c
+@@ -1140,7 +1140,11 @@ static SyscallTableEntry syscall_main_table[] = {
+    GENX_(__NR_clone3,                  sys_ni_syscall),              // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+ 
++   LINXY(__NR_pidfd_getfd,             sys_pidfd_getfd),             // 438
+    LINX_ (__NR_faccessat2,             sys_faccessat2),              // 439
++   LINXY(__NR_epoll_pwait2,      sys_epoll_pwait2),      // 441
++
++   LINX_(__NR_fchmodat2,               sys_fchmodat2),               // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno)
+diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c b/coregrind/m_syswrap/syswrap-mips64-linux.c
+index 67e7c2c2f..037e34a79 100644
+--- a/coregrind/m_syswrap/syswrap-mips64-linux.c
++++ b/coregrind/m_syswrap/syswrap-mips64-linux.c
+@@ -818,7 +818,10 @@ static SyscallTableEntry syscall_main_table[] = {
+    LINXY (__NR_pidfd_open, sys_pidfd_open),
+    GENX_ (__NR_clone3, sys_ni_syscall),
+    LINXY (__NR_close_range, sys_close_range),
++   LINXY (__NR_pidfd_getfd, sys_pidfd_getfd),
+    LINX_ (__NR_faccessat2, sys_faccessat2),
++   LINXY(__NR_epoll_pwait2, sys_epoll_pwait2),
++   LINX_ (__NR_fchmodat2, sys_fchmodat2),
+ };
+ 
+ SyscallTableEntry * ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c b/coregrind/m_syswrap/syswrap-nanomips-linux.c
+index 9c535c68e..043932716 100644
+--- a/coregrind/m_syswrap/syswrap-nanomips-linux.c
++++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c
+@@ -827,7 +827,10 @@ static SyscallTableEntry syscall_main_table[] = {
+    LINXY (__NR_pidfd_open,             sys_pidfd_open),
+    GENX_ (__NR_clone3,                 sys_ni_syscall),
+    LINXY (__NR_close_range,            sys_close_range),
++   LINXY(__NR_pidfd_getfd,             sys_pidfd_getfd),
+    LINX_ (__NR_faccessat2,             sys_faccessat2),
++   LINXY (__NR_epoll_pwait2,           sys_epoll_pwait2),
++   LINX_ (__NR_fchmodat2,              sys_fchmodat2),
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno)
+diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c
+index 12c073027..81a518fe0 100644
+--- a/coregrind/m_syswrap/syswrap-ppc32-linux.c
++++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c
+@@ -1060,7 +1060,11 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),        // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+ 
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),       // 438
+    LINX_(__NR_faccessat2,        sys_faccessat2),       // 439
++   LINXY (__NR_epoll_pwait2,     sys_epoll_pwait2),      // 441
++
++   LINX_ (__NR_fchmodat2,        sys_fchmodat2),         // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
+index 3c33d1267..f72e4246b 100644
+--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
++++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
+@@ -1029,7 +1029,11 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),        // 435
+    LINXY(__NR_close_range,       sys_close_range),       // 436
+ 
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),       // 438
+    LINX_(__NR_faccessat2,        sys_faccessat2),       // 439
++   LINXY (__NR_epoll_pwait2,     sys_epoll_pwait2),      // 441
++
++   LINX_ (__NR_fchmodat2,        sys_fchmodat2),         // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c
+index a377cb731..2c2a438fb 100644
+--- a/coregrind/m_syswrap/syswrap-s390x-linux.c
++++ b/coregrind/m_syswrap/syswrap-s390x-linux.c
+@@ -870,7 +870,14 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_clone3, sys_ni_syscall),                                // 435
+    LINXY(__NR_close_range, sys_close_range),                          // 436
+ 
++   LINXY(__NR_pidfd_getfd, sys_pidfd_getfd),                          // 438
+    LINX_(__NR_faccessat2,  sys_faccessat2),                           // 439
++
++   LINXY(__NR_epoll_pwait2, sys_epoll_pwait2),                        // 441
++
++   LINXY(__NR_memfd_secret, sys_memfd_secret),                        // 447
++
++   LINX_ (__NR_fchmodat2, sys_fchmodat2),                             // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c
+index a9ba15dfe..f57b5395c 100644
+--- a/coregrind/m_syswrap/syswrap-x86-linux.c
++++ b/coregrind/m_syswrap/syswrap-x86-linux.c
+@@ -1651,11 +1651,14 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_clone3,            sys_ni_syscall),       // 435
+    LINXY(__NR_close_range,       sys_close_range),      // 436
+    LINXY(__NR_openat2,           sys_openat2),          // 437
+-
+-
++   LINXY(__NR_pidfd_getfd,       sys_pidfd_getfd),      // 438
+    LINX_(__NR_faccessat2,	 sys_faccessat2),       // 439
+ 
++   LINXY(__NR_epoll_pwait2,      sys_epoll_pwait2),     // 441
++
+    LINXY(__NR_memfd_secret,      sys_memfd_secret),      // 447
++
++   LINX_(__NR_fchmodat2,         sys_fchmodat2),         // 452
+ };
+ 
+ SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
+index da9697232..f02d53f08 100644
+--- a/coregrind/m_trampoline.S
++++ b/coregrind/m_trampoline.S
+@@ -1520,6 +1520,53 @@ VG_(trampoline_stuff_end):
+ #	undef UD2_1024
+ #	undef UD2_PAGE
+ 
++/*------------------- loongarch64-linux -------------------*/
++#else
++#if defined(VGP_loongarch64_linux)
++
++.global VG_(trampoline_stuff_start)
++VG_(trampoline_stuff_start):
++
++.global VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn)
++VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn):
++        li.w $a7, __NR_rt_sigreturn
++        syscall 0
++        .long 0 /*illegal insn*/
++
++/* There's no particular reason that this needs to be handwritten
++   assembly, but since that's what this file contains, here's a
++   simple strlen() and strchr() implementations.
++*/
++
++.global VG_(loongarch64_linux_REDIR_FOR_strlen)
++.type   VG_(loongarch64_linux_REDIR_FOR_strlen), @function
++VG_(loongarch64_linux_REDIR_FOR_strlen):
++      move   $t0, $a0
++   strlen_loop:
++      ld.bu  $t1, $a0, 0
++      addi.d $a0, $a0, 1
++      bne    $t1, $zero, strlen_loop
++      sub.d  $a0, $a0, $t0
++      addi.d $a0, $a0, -1
++      jr     $ra
++.size VG_(loongarch64_linux_REDIR_FOR_strlen), .-VG_(loongarch64_linux_REDIR_FOR_strlen)
++
++.global VG_(loongarch64_linux_REDIR_FOR_strchr)
++.type   VG_(loongarch64_linux_REDIR_FOR_strchr), @function
++VG_(loongarch64_linux_REDIR_FOR_strchr):
++   strchr_loop:
++      ld.bu  $t0, $a0, 0
++      beq    $t0, $a1, strchr_end
++      addi.d $a0, $a0, 1
++      bne    $t0, $zero, strchr_loop
++      move   $a0, $zero
++   strchr_end:
++      jr     $ra
++.size VG_(loongarch64_linux_REDIR_FOR_strchr), .-VG_(loongarch64_linux_REDIR_FOR_strchr)
++
++.global VG_(trampoline_stuff_end)
++VG_(trampoline_stuff_end):
++
+ /*---------------- x86-solaris ----------------*/
+ #else
+ #if defined(VGP_x86_solaris)
+@@ -1719,6 +1766,7 @@ VG_(trampoline_stuff_end):
+ #endif
+ #endif
+ #endif
++#endif
+ 
+ /* Let the linker know we don't need an executable stack */
+ MARK_STACK_NO_EXEC
+diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
+index 8ae06d2a6..8afaf8e0f 100644
+--- a/coregrind/m_translate.c
++++ b/coregrind/m_translate.c
+@@ -1750,6 +1750,11 @@ Bool VG_(translate) ( ThreadId tid,
+            vex_archinfo.arm64_requires_fallback_LLSC;
+ #  endif
+ 
++#  if defined(VGP_loongarch64_linux)
++   /* For now, we only use fallback LLSC */
++   vex_abiinfo.guest__use_fallback_LLSC = True;
++#  endif
++
+    /* Set up closure args. */
+    closure.tid    = tid;
+    closure.nraddr = nraddr;
+diff --git a/coregrind/m_vki.c b/coregrind/m_vki.c
+index 0cc1882a1..11c5fe316 100644
+--- a/coregrind/m_vki.c
++++ b/coregrind/m_vki.c
+@@ -37,13 +37,13 @@
+    describing the kernel interface, so this file is nearly empty. */
+ 
+ 
+-/* ppc32/64, arm64 and mips32/64 (linux) determine page size at startup,
+-   hence m_vki is the logical place to store that info. */
++/* ppc32/64, arm64, mips32/64 and loongarch64 (linux) determine page size
++   at startup, hence m_vki is the logical place to store that info. */
+ 
+ #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
+     || defined(VGP_ppc64le_linux) || defined(VGP_arm64_linux) \
+     || defined(VGP_mips32_linux)  || defined(VGP_mips64_linux) \
+-    || defined(VGP_nanomips_linux)
++    || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+ unsigned long VKI_PAGE_SHIFT = 12;
+ unsigned long VKI_PAGE_SIZE  = 1UL << 12;
+ #endif
+diff --git a/coregrind/pub_core_aspacemgr.h b/coregrind/pub_core_aspacemgr.h
+index b867108a2..a2b41f374 100644
+--- a/coregrind/pub_core_aspacemgr.h
++++ b/coregrind/pub_core_aspacemgr.h
+@@ -335,7 +335,8 @@ extern Bool VG_(am_relocate_nooverlap_client)( /*OUT*/Bool* need_discard,
+ #if defined(VGP_ppc32_linux) \
+     || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)	\
+     || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-    || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
++    || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
++    || defined(VGP_loongarch64_linux)
+ # define VG_STACK_GUARD_SZB  65536  // 1 or 16 pages
+ #else
+ # define VG_STACK_GUARD_SZB  8192   // 2 pages
+diff --git a/coregrind/pub_core_basics.h b/coregrind/pub_core_basics.h
+index abc5a066a..132545166 100644
+--- a/coregrind/pub_core_basics.h
++++ b/coregrind/pub_core_basics.h
+@@ -55,8 +55,8 @@
+ 
+ typedef
+    struct {
+-      ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc */
+-      ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1,  arm:R13, mips:sp */
++      ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc, loongarch64:pc */
++      ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1,  arm:R13, mips:sp, loongarch64:sp */
+       union {
+          struct {
+             UInt r_ebp;
+@@ -102,6 +102,10 @@ typedef
+             ULong r31;  /* Return address of the last subroutine call */
+             ULong r28;
+          } MIPS64;
++         struct {
++            ULong r_fp; /* Stack frame pointer or static variable */
++            ULong r_ra; /* Return address of the last subroutine call */
++         } LOONGARCH64;
+       } misc;
+    }
+    UnwindStartRegs;
+diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
+index 938ed00cc..d8c0db545 100644
+--- a/coregrind/pub_core_debuginfo.h
++++ b/coregrind/pub_core_debuginfo.h
+@@ -131,6 +131,10 @@ typedef
+ typedef
+    struct { Addr pc; Addr sp; Addr fp; Addr ra; }
+    D3UnwindRegs;
++#elif defined(VGA_loongarch64)
++typedef
++   struct { Addr pc; Addr ra; Addr sp; Addr fp; }
++   D3UnwindRegs;
+ #else
+ #  error "Unsupported arch"
+ #endif
+diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
+index a9b7dd8b1..4793d599c 100644
+--- a/coregrind/pub_core_machine.h
++++ b/coregrind/pub_core_machine.h
+@@ -126,6 +126,11 @@
+ #  define VG_ELF_MACHINE      EM_NANOMIPS
+ #  define VG_ELF_CLASS        ELFCLASS32
+ #  undef  VG_PLAT_USES_PPCTOC
++#elif defined(VGP_loongarch64_linux)
++#  define VG_ELF_DATA2XXX     ELFDATA2LSB
++#  define VG_ELF_MACHINE      EM_LOONGARCH
++#  define VG_ELF_CLASS        ELFCLASS64
++#  undef  VG_PLAT_USES_PPCTOC
+ #else
+ #  error Unknown platform
+ #endif
+@@ -163,6 +168,10 @@
+ #  define VG_INSTR_PTR        guest_PC
+ #  define VG_STACK_PTR        guest_r29
+ #  define VG_FRAME_PTR        guest_r30
++#elif defined(VGA_loongarch64)
++#  define VG_INSTR_PTR        guest_PC
++#  define VG_STACK_PTR        guest_R3
++#  define VG_FRAME_PTR        guest_R22
+ #else
+ #  error Unknown arch
+ #endif
+@@ -234,6 +243,10 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
+    s390x: initially:  call VG_(machine_get_hwcaps)
+ 
+           then safe to use VG_(machine_get_VexArchInfo)
++   -------------
++   loongarch64: initially: call VG_(machine_get_hwcaps)
++
++          then safe to use VG_(machine_get_VexArchInfo)
+ 
+    VG_(machine_get_hwcaps) may use signals (although it attempts to
+    leave signal state unchanged) and therefore should only be
+diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
+index b5922ca50..d285caa1a 100644
+--- a/coregrind/pub_core_mallocfree.h
++++ b/coregrind/pub_core_mallocfree.h
+@@ -78,6 +78,7 @@ typedef Int ArenaId;
+       defined(VGP_ppc64le_linux)  || \
+       defined(VGP_s390x_linux)    || \
+       (defined(VGP_mips64_linux) && !defined(VGABI_N32)) || \
++      defined(VGP_loongarch64_linux) || \
+       defined(VGP_x86_freebsd)    || \
+       defined(VGP_amd64_freebsd)  || \
+       defined(VGP_x86_darwin)     || \
+diff --git a/coregrind/pub_core_signals.h b/coregrind/pub_core_signals.h
+index ae8555ba8..c53323fbe 100644
+--- a/coregrind/pub_core_signals.h
++++ b/coregrind/pub_core_signals.h
+@@ -77,6 +77,7 @@ extern void VG_(synth_sigill)       (ThreadId tid, Addr addr);
+ extern void VG_(synth_sigtrap)      (ThreadId tid);
+ extern void VG_(synth_sigbus)       (ThreadId tid);
+ extern void VG_(synth_sigfpe)       (ThreadId tid, UInt code);
++extern void VG_(synth_sigsys)       (ThreadId tid);
+ 
+ /* Extend the stack to cover addr, if possible */
+ extern Bool VG_(extend_stack)(ThreadId tid, Addr addr);
+diff --git a/coregrind/pub_core_syscall.h b/coregrind/pub_core_syscall.h
+index 6c4f82591..5d7ff4435 100644
+--- a/coregrind/pub_core_syscall.h
++++ b/coregrind/pub_core_syscall.h
+@@ -105,6 +105,7 @@ extern SysRes VG_(mk_SysRes_mips32_linux)( UWord v0, UWord v1,
+ extern SysRes VG_(mk_SysRes_mips64_linux)( ULong v0, ULong v1,
+                                            ULong a3 );
+ extern SysRes VG_(mk_SysRes_nanomips_linux)( UWord a0);
++extern SysRes VG_(mk_SysRes_loongarch64_linux)( UWord a0 );
+ extern SysRes VG_(mk_SysRes_x86_solaris) ( Bool isErr, UInt val, UInt val2 );
+ extern SysRes VG_(mk_SysRes_amd64_solaris) ( Bool isErr, ULong val, ULong val2 );
+ extern SysRes VG_(mk_SysRes_Error)       ( UWord val );
+diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h
+index 54c575a72..3700acb1d 100644
+--- a/coregrind/pub_core_trampoline.h
++++ b/coregrind/pub_core_trampoline.h
+@@ -171,6 +171,12 @@ extern Char* VG_(nanomips_linux_REDIR_FOR_index)( const Char*, Int );
+ extern UInt  VG_(nanomips_linux_REDIR_FOR_strlen)( void* );
+ #endif
+ 
++#if defined(VGP_loongarch64_linux)
++extern Addr  VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn);
++extern UInt  VG_(loongarch64_linux_REDIR_FOR_strlen)( void* );
++extern Char* VG_(loongarch64_linux_REDIR_FOR_strchr)( const Char*, Int );
++#endif
++
+ #if defined(VGP_x86_solaris)
+ extern SizeT VG_(x86_solaris_REDIR_FOR_strcmp)(const HChar *, const HChar *);
+ extern SizeT VG_(x86_solaris_REDIR_FOR_strlen)(const HChar *);
+diff --git a/coregrind/pub_core_transtab.h b/coregrind/pub_core_transtab.h
+index 6cc11f658..fe9392626 100644
+--- a/coregrind/pub_core_transtab.h
++++ b/coregrind/pub_core_transtab.h
+@@ -81,7 +81,8 @@ static inline UWord VG_TT_FAST_HASH ( Addr guest ) {
+ }
+ 
+ #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+-      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64)
++      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64) \
++      || defined(VGA_loongarch64)
+ static inline UWord VG_TT_FAST_HASH ( Addr guest ) {
+    // Instructions are 4-byte aligned.
+    UWord merged = ((UWord)guest) >> 2;
+diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h
+index 8b585f17d..e73c89ae3 100644
+--- a/coregrind/pub_core_transtab_asm.h
++++ b/coregrind/pub_core_transtab_asm.h
+@@ -83,7 +83,7 @@
+ #if defined(VGA_amd64) || defined(VGA_arm64) \
+     || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+     || (defined(VGA_mips64) && defined(VGABI_64)) \
+-    || defined(VGA_s390x)
++    || defined(VGA_s390x) || defined(VGA_loongarch64)
+   // And all other 64-bit hosts
+ # define VG_FAST_CACHE_SET_BITS 6
+   // These FCS_{g,h}{0,1,2,3} are the values of
+diff --git a/coregrind/vgdb-invoker-ptrace.c b/coregrind/vgdb-invoker-ptrace.c
+index 78a6a168c..798fe5f8e 100644
+--- a/coregrind/vgdb-invoker-ptrace.c
++++ b/coregrind/vgdb-invoker-ptrace.c
+@@ -50,9 +50,10 @@
+ // Rather we use PTRACE_GETREGS or PTRACE_PEEKUSER.
+ 
+ // The only platform on which we must use PTRACE_GETREGSET is arm64.
++// We use PTRACE_GETREGSET on loongarch64 as well.
+ // The resulting vgdb cannot work in a bi-arch setup.
+ // -1 means we will check that PTRACE_GETREGSET works.
+-#  if defined(VGA_arm64)
++#  if defined(VGA_arm64) || defined(VGA_loongarch64)
+ #define USE_PTRACE_GETREGSET
+ #  endif
+ #endif
+@@ -529,6 +530,9 @@ static struct user_regs_struct user_save;
+ #    else
+ static struct user_pt_regs user_save;
+ #    endif
++#  elif defined(VGA_loongarch64)
++/* loongarch64 is extra special, glibc only defined user_regs_struct. */
++static struct user_regs_struct user_save;
+ #  else
+ static struct user user_save;
+ #  endif
+@@ -805,6 +809,9 @@ Bool invoker_invoke_gdbserver (pid_t pid)
+ #    else
+    struct user_pt_regs user_mod;
+ #    endif
++#  elif defined(VGA_loongarch64)
++/* loongarch64 is extra special, glibc only defined user_regs_struct. */
++   struct user_regs_struct user_mod;
+ #  else
+    struct user user_mod;
+ #  endif
+@@ -874,6 +881,8 @@ Bool invoker_invoke_gdbserver (pid_t pid)
+    sp = p[29];
+ #elif defined(VGA_mips64)
+    sp = user_mod.regs[29];
++#elif defined(VGA_loongarch64)
++   sp = user_mod.regs[3];
+ #else
+    I_die_here : (sp) architecture missing in vgdb-invoker-ptrace.c
+ #endif
+@@ -961,6 +970,8 @@ Bool invoker_invoke_gdbserver (pid_t pid)
+ 
+ #elif defined(VGA_mips64)
+       assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe
++#elif defined(VGA_loongarch64)
++      assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe
+ #else
+       I_die_here : architecture missing in vgdb-invoker-ptrace.c
+ #endif
+@@ -1068,6 +1079,12 @@ Bool invoker_invoke_gdbserver (pid_t pid)
+       user_mod.regs[31] = bad_return;
+       user_mod.regs[34] = shared64->invoke_gdbserver;
+       user_mod.regs[25] = shared64->invoke_gdbserver;
++#elif defined(VGA_loongarch64)
++      /* put check arg in register a0 */
++      user_mod.regs[4] = check;
++      /* put NULL return address in ra */
++      user_mod.regs[1] = bad_return;
++      user_mod.csr_era = shared64->invoke_gdbserver;
+ #else
+       I_die_here: architecture missing in vgdb-invoker-ptrace.c
+ #endif
+diff --git a/drd/drd_bitmap.h b/drd/drd_bitmap.h
+index 3b71d749a..1f11f23c4 100644
+--- a/drd/drd_bitmap.h
++++ b/drd/drd_bitmap.h
+@@ -140,7 +140,7 @@ Addr make_address(const UWord a1, const UWord a0)
+ #define BITS_PER_BITS_PER_UWORD 5
+ #elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+       || defined(VGA_s390x) || (defined(VGA_mips64) && !defined(VGABI_N32)) \
+-      || defined(VGA_arm64)
++      || defined(VGA_arm64) || defined(VGA_loongarch64)
+ #define BITS_PER_BITS_PER_UWORD 6
+ #else
+ #error Unknown platform.
+diff --git a/drd/drd_load_store.c b/drd/drd_load_store.c
+index fba1dac71..dda4ea385 100644
+--- a/drd/drd_load_store.c
++++ b/drd/drd_load_store.c
+@@ -53,6 +53,8 @@
+ #define STACK_POINTER_OFFSET OFFSET_mips32_r29
+ #elif defined(VGA_mips64)
+ #define STACK_POINTER_OFFSET OFFSET_mips64_r29
++#elif defined(VGA_loongarch64)
++#define STACK_POINTER_OFFSET OFFSET_loongarch64_R3
+ #else
+ #error Unknown architecture.
+ #endif
+@@ -634,6 +636,8 @@ IRSB* DRD_(instrument)(VgCallbackClosure* const closure,
+             break; /* not interesting to DRD */
+          case Imbe_CancelReservation:
+             break; /* not interesting to DRD */
++         case Imbe_InsnFence:
++            break; /* not interesting to DRD */
+          default:
+             tl_assert(0);
+          }
+diff --git a/drd/tests/pth_barrier_thr_cr.supp b/drd/tests/pth_barrier_thr_cr.supp
+index 653b2d293..34482ccb9 100644
+--- a/drd/tests/pth_barrier_thr_cr.supp
++++ b/drd/tests/pth_barrier_thr_cr.supp
+@@ -9,3 +9,14 @@
+    fun:pthread_barrier_wait_intercept
+    fun:pthread_barrier_wait
+ }
++{
++   number-of-concurrent-pthead_barrier_wait()-calls-exceeds-barrier-count
++   drd:BarrierErr
++   fun:pthread_barrier_wait@*
++}
++{
++   number-of-concurrent-pthead_barrier_wait()-calls-exceeds-barrier-count
++   drd:BarrierErr
++   fun:pthread_barrier_wait_intercept
++   fun:pthread_barrier_wait@*
++}
+diff --git a/gdbserver_tests/Makefile.am b/gdbserver_tests/Makefile.am
+index fbcb6596d..30e17c0b9 100755
+--- a/gdbserver_tests/Makefile.am
++++ b/gdbserver_tests/Makefile.am
+@@ -15,6 +15,7 @@ dist_noinst_SCRIPTS = \
+ 	filter_gdb filter_make_empty \
+ 	filter_memcheck_monitor filter_stderr filter_vgdb \
+ 	filter_helgrind_monitor filter_helgrind_monitor_solaris \
++	filter_helgrind_monitor_loongarch64 \
+ 	filter_passsigalrm \
+ 	send_signal
+ 
+diff --git a/gdbserver_tests/filter_helgrind_monitor b/gdbserver_tests/filter_helgrind_monitor
+index 4fc2e9af6..21bf6be14 100755
+--- a/gdbserver_tests/filter_helgrind_monitor
++++ b/gdbserver_tests/filter_helgrind_monitor
+@@ -14,6 +14,8 @@ if $dir/../tests/os_test solaris; then
+    $dir/filter_helgrind_monitor_solaris
+ elif $dir/../tests/os_test freebsd; then
+    gsed -e '/\(rtld_start.S\|kill.S\|_exit.S\|_select.S\): No such file or directory/d'
++elif $dir/../tests/arch_test loongarch64; then
++   $dir/filter_helgrind_monitor_loongarch64
+ else
+    cat
+ fi |
+diff --git a/gdbserver_tests/filter_helgrind_monitor_loongarch64 b/gdbserver_tests/filter_helgrind_monitor_loongarch64
+new file mode 100755
+index 000000000..cda73e4c2
+--- /dev/null
++++ b/gdbserver_tests/filter_helgrind_monitor_loongarch64
+@@ -0,0 +1,43 @@
++#!/usr/bin/env perl
++# From gdbserver_tests/filter_helgrind_monitor_solaris
++
++#
++# Filter out all helgrind information about locks except the one named "mx".
++# One lock record looks like:
++# Lock ga 0x........ {
++#  Address 0x........ is 2648 bytes inside data symbol "_rtld_local"
++#   kind   mbRec
++# }
++
++use strict;
++use warnings;
++
++my $lock_start_line = undef;
++my $skip_to_closing_line = 0;
++while (<STDIN>) {
++    my $line = $_;
++    chomp($line);
++    if ($line =~ /^Lock ga 0x[\.]+\s+{$/) {
++        $lock_start_line = $line;
++        $skip_to_closing_line = 1;
++    } elsif (($lock_start_line) &&
++             ($line =~ /\s*Address 0x[\.]+ is \d+ bytes inside data symbol "(\S+)"/)) {
++        if ($1 eq "mx") {
++           print "$lock_start_line\n";
++           print "$line\n";
++           $skip_to_closing_line = 0;
++        }
++    } elsif ($line =~ /^}$/) {
++        if ($skip_to_closing_line == 0) {
++            print "$line\n";
++        }
++        undef($lock_start_line);
++        $skip_to_closing_line = 0;
++    } else {
++        if ($skip_to_closing_line == 0) {
++            print "$line\n";
++        }
++    }
++}
++
++exit 0;
+diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
+index cebc2bd2a..d2f882936 100644
+--- a/helgrind/hg_main.c
++++ b/helgrind/hg_main.c
+@@ -4870,6 +4870,7 @@ IRSB* hg_instrument ( VgCallbackClosure* closure,
+             switch (st->Ist.MBE.event) {
+                case Imbe_Fence:
+                case Imbe_CancelReservation:
++               case Imbe_InsnFence:
+                   break; /* not interesting */
+                default:
+                   goto unhandled;
+diff --git a/helgrind/tests/annotate_hbefore.c b/helgrind/tests/annotate_hbefore.c
+index 259d3b64c..3200c6cd0 100644
+--- a/helgrind/tests/annotate_hbefore.c
++++ b/helgrind/tests/annotate_hbefore.c
+@@ -314,6 +314,36 @@ UWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
+    return success;
+ }
+ 
++#elif defined(VGA_loongarch64)
++
++// loongarch64
++/* return 1 if success, 0 if failure */
++UWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
++{
++   UWord success;
++   UWord block[3] = { (UWord)addr, nyu, expected };
++
++   __asm__ __volatile__(
++      "   ld.d $t0, %1, 0   \n\t"
++      "   ld.d $t2, %1, 16  \n\t"
++      "   ld.d $t3, %1, 8   \n\t"
++      "   ll.d $t1, $t0, 0  \n\t"
++      "   bne  $t1, $t2, 1f \n\t"
++      "   sc.d $t3, $t0, 0  \n\t"
++      "   move %0, $t3      \n\t"
++      "   b    2f           \n\t"
++      "1:                   \n\t"
++      "   move %0, $zero    \n\t"
++      "2:                   \n\t"
++      : /*out*/ "=r" (success)
++      : /*in*/ "r" (&block[0])
++      : /*trash*/ "t0", "t1", "t2", "t3", "memory"
++   );
++
++   assert(success == 0 || success == 1);
++   return success;
++}
++
+ #endif
+ 
+ void atomic_incW ( UWord* w )
+diff --git a/helgrind/tests/tc07_hbl1.c b/helgrind/tests/tc07_hbl1.c
+index 54297dee6..246d13c0b 100644
+--- a/helgrind/tests/tc07_hbl1.c
++++ b/helgrind/tests/tc07_hbl1.c
+@@ -18,6 +18,7 @@
+ #undef PLAT_arm64_linux
+ #undef PLAT_s390x_linux
+ #undef PLAT_mips32_linux
++#undef PLAT_loongarch64_linux
+ #undef PLAT_x86_solaris
+ #undef PLAT_amd64_solaris
+ 
+@@ -47,6 +48,8 @@
+ #  define PLAT_mips32_linux 1
+ #elif defined(__linux__) && defined(__nanomips__)
+ #  define PLAT_nanomips_linux 1
++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64)
++#  define PLAT_loongarch64_linux 1
+ #elif defined(__sun__) && defined(__i386__)
+ #  define PLAT_x86_solaris 1
+ #elif defined(__sun__) && defined(__x86_64__)
+@@ -131,6 +134,20 @@
+       : /*out*/ : /*in*/ "r"(&(_lval))              \
+       : /*trash*/ "$t0", "$t1", "memory"            \
+    )
++#elif defined(PLAT_loongarch64_linux)
++#  define INC(_lval,_lqual)                     \
++   __asm__ __volatile__ (                       \
++      "1:                     \n"               \
++      "   move   $t0, %0      \n"               \
++      "   ll.w   $t1, $t0, 0  \n"               \
++      "   addi.w $t1, $t1, 1  \n"               \
++      "   sc.w   $t1, $t0, 0  \n"               \
++      "   li.w   $t2, 1       \n"               \
++      "   bne    $t1, $t2, 1b \n"               \
++      : /*out*/                                 \
++      : /*in*/ "r" (&(_lval))                   \
++      : /*trash*/ "$t0", "$t1", "$t2", "memory" \
++   )
+ #else
+ #  error "Fix Me for this platform"
+ #endif
+diff --git a/helgrind/tests/tc08_hbl2.c b/helgrind/tests/tc08_hbl2.c
+index c3a2ec794..8683168a5 100644
+--- a/helgrind/tests/tc08_hbl2.c
++++ b/helgrind/tests/tc08_hbl2.c
+@@ -35,6 +35,7 @@
+ #undef PLAT_s390x_linux
+ #undef PLAT_mips32_linux
+ #undef PLAT_mips64_linux
++#undef PLAT_loongarch64_linux
+ #undef PLAT_x86_solaris
+ #undef PLAT_amd64_solaris
+ 
+@@ -68,6 +69,8 @@
+ #endif
+ #elif defined(__linux__) && defined(__nanomips__)
+ #  define PLAT_nanomips_linux 1
++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64)
++#  define PLAT_loongarch64_linux 1
+ #elif defined(__sun__) && defined(__i386__)
+ #  define PLAT_x86_solaris 1
+ #elif defined(__sun__) && defined(__x86_64__)
+@@ -151,6 +154,20 @@
+       : /*out*/ : /*in*/ "r"(&(_lval))              \
+       : /*trash*/ "$t0", "$t1", "memory"            \
+    )
++#elif defined(PLAT_loongarch64_linux)
++#  define INC(_lval,_lqual)                     \
++   __asm__ __volatile__ (                       \
++      "1:                     \n"               \
++      "   move   $t0, %0      \n"               \
++      "   ll.w   $t1, $t0, 0  \n"               \
++      "   addi.w $t1, $t1, 1  \n"               \
++      "   sc.w   $t1, $t0, 0  \n"               \
++      "   li.w   $t2, 1       \n"               \
++      "   bne    $t1, $t2, 1b \n"               \
++      : /*out*/                                 \
++      : /*in*/ "r" (&(_lval))                   \
++      : /*trash*/ "$t0", "$t1", "$t2", "memory" \
++   )
+ #else
+ #  error "Fix Me for this platform"
+ #endif
+diff --git a/helgrind/tests/tc11_XCHG.c b/helgrind/tests/tc11_XCHG.c
+index f6ff1c984..0d307ac0c 100644
+--- a/helgrind/tests/tc11_XCHG.c
++++ b/helgrind/tests/tc11_XCHG.c
+@@ -20,6 +20,7 @@
+ #undef PLAT_arm_linux
+ #undef PLAT_s390x_linux
+ #undef PLAT_mips32_linux
++#undef PLAT_loongarch64_linux
+ #undef PLAT_x86_solaris
+ #undef PLAT_amd64_solaris
+ 
+@@ -49,6 +50,8 @@
+ #  define PLAT_mips32_linux 1
+ #elif defined(__linux__) && defined(__nanomips__)
+ #  define PLAT_nanomips_linux 1
++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64)
++#  define PLAT_loongarch64_linux 1
+ #elif defined(__sun__) && defined(__i386__)
+ #  define PLAT_x86_solaris 1
+ #elif defined(__sun__) && defined(__x86_64__)
+@@ -146,6 +149,21 @@
+ #  define XCHG_M_R_with_redundant_LOCK(_addr,_lval) \
+       XCHG_M_R(_addr,_lval)
+ 
++#elif defined(PLAT_loongarch64_linux)
++#  define XCHG_M_R(_addr,_lval)                              \
++   __asm__ __volatile__(                                     \
++      "move $t0, %2 \n\t"                                    \
++      "ll.w $t1, %1 \n\t"                                    \
++      "sc.w $t0, %1 \n\t"                                    \
++      "move %0, $t1 \n\t"                                    \
++      : /*out*/ "=r"(_lval), "+ZC"(_addr)                    \
++      : /*in*/  "r"(_lval)                                   \
++      : "$t0", "$t1", "memory"                               \
++   )
++
++#  define XCHG_M_R_with_redundant_LOCK(_addr,_lval)          \
++      XCHG_M_R(_addr,_lval)
++
+ #else
+ #  error "Unsupported architecture"
+ 
+diff --git a/include/Makefile.am b/include/Makefile.am
+index 972d394b8..abfa2c915 100644
+--- a/include/Makefile.am
++++ b/include/Makefile.am
+@@ -63,6 +63,7 @@ nobase_pkginclude_HEADERS = \
+ 	vki/vki-posixtypes-mips32-linux.h \
+ 	vki/vki-posixtypes-mips64-linux.h \
+ 	vki/vki-posixtypes-nanomips-linux.h \
++	vki/vki-posixtypes-loongarch64-linux.h \
+ 	vki/vki-amd64-linux.h		\
+ 	vki/vki-arm64-linux.h		\
+ 	vki/vki-ppc32-linux.h		\
+@@ -75,6 +76,7 @@ nobase_pkginclude_HEADERS = \
+ 	vki/vki-mips32-linux.h		\
+ 	vki/vki-mips64-linux.h		\
+ 	vki/vki-nanomips-linux.h	\
++	vki/vki-loongarch64-linux.h	\
+ 	vki/vki-scnums-amd64-linux.h	\
+ 	vki/vki-scnums-arm64-linux.h	\
+ 	vki/vki-scnums-ppc32-linux.h	\
+@@ -86,6 +88,7 @@ nobase_pkginclude_HEADERS = \
+ 	vki/vki-scnums-mips32-linux.h	\
+ 	vki/vki-scnums-mips64-linux.h	\
+ 	vki/vki-scnums-nanomips-linux.h	\
++	vki/vki-scnums-loongarch64-linux.h \
+ 	vki/vki-scnums-darwin.h         \
+ 	vki/vki-scnums-solaris.h	\
+ 	vki/vki-scnums-shared-linux.h	\
+diff --git a/include/pub_tool_basics.h b/include/pub_tool_basics.h
+index d22a42523..079196524 100644
+--- a/include/pub_tool_basics.h
++++ b/include/pub_tool_basics.h
+@@ -442,7 +442,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) {
+ 
+ #if defined(VGA_x86) || defined(VGA_amd64) || defined (VGA_arm) \
+     || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \
+-    && defined (_MIPSEL)) || defined(VGA_arm64)  || defined(VGA_ppc64le)
++    && defined (_MIPSEL)) || defined(VGA_arm64)  || defined(VGA_ppc64le) \
++    || defined (VGA_loongarch64)
+ #  define VG_LITTLEENDIAN 1
+ #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_s390x) \
+       || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \
+@@ -490,7 +491,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) {
+       || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+       || defined(VGA_arm) || defined(VGA_s390x) \
+       || defined(VGA_mips32) || defined(VGA_mips64) \
+-      || defined(VGA_arm64) || defined(VGA_nanomips)
++      || defined(VGA_arm64) || defined(VGA_nanomips) \
++      || defined(VGA_loongarch64)
+ #  define VG_REGPARM(n)            /* */
+ #else
+ #  error Unknown arch
+diff --git a/include/pub_tool_guest.h b/include/pub_tool_guest.h
+index 08a72efac..87e8cc2bc 100644
+--- a/include/pub_tool_guest.h
++++ b/include/pub_tool_guest.h
+@@ -62,6 +62,9 @@
+ #elif defined(VGA_mips64)
+ #  include "libvex_guest_mips64.h"
+    typedef VexGuestMIPS64State VexGuestArchState;
++#elif defined(VGA_loongarch64)
++#  include "libvex_guest_loongarch64.h"
++   typedef VexGuestLOONGARCH64State VexGuestArchState;
+ #else
+ #  error Unknown arch
+ #endif
+diff --git a/include/pub_tool_libcsetjmp.h b/include/pub_tool_libcsetjmp.h
+index 6b278d285..86304a4f4 100644
+--- a/include/pub_tool_libcsetjmp.h
++++ b/include/pub_tool_libcsetjmp.h
+@@ -126,6 +126,14 @@ UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env));
+ __attribute__((noreturn))
+ void  VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env));
+ 
++#elif defined(VGP_loongarch64_linux)
++
++#define VG_MINIMAL_JMP_BUF(_name)        ULong _name [13+8+1]
++__attribute__((returns_twice))
++UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env));
++__attribute__((noreturn))
++void  VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env));
++
+ #else
+ 
+ /* The default implementation. */
+diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h
+index 9bdd4f514..12377f97a 100644
+--- a/include/pub_tool_machine.h
++++ b/include/pub_tool_machine.h
+@@ -108,6 +108,12 @@
+ #  define VG_CLREQ_SZB             20
+ #  define VG_STACK_REDZONE_SZB      0
+ 
++#elif defined(VGP_loongarch64_linux)
++#  define VG_MIN_INSTR_SZB          4
++#  define VG_MAX_INSTR_SZB          8
++#  define VG_CLREQ_SZB             20
++#  define VG_STACK_REDZONE_SZB      0
++
+ #else
+ #  error Unknown platform
+ #endif
+diff --git a/include/pub_tool_redir.h b/include/pub_tool_redir.h
+index f88d3b571..d1bb8cbce 100644
+--- a/include/pub_tool_redir.h
++++ b/include/pub_tool_redir.h
+@@ -321,6 +321,8 @@
+ 
+ #define  VG_U_LD_LINUX_MIPSN8_S0_1  "ld-linux-mipsn8.so.1"
+ 
++#define  VG_U_LD_LINUX_LOONGARCH_LP64D_SO_1 "ld-linux-loongarch-lp64d.so.1"
++
+ #endif
+ 
+ /* --- Sonames for FreeBSD ELF linkers, plus unencoded versions. --- */
+diff --git a/include/pub_tool_vkiscnums_asm.h b/include/pub_tool_vkiscnums_asm.h
+index 14b483c4d..b2222aadf 100644
+--- a/include/pub_tool_vkiscnums_asm.h
++++ b/include/pub_tool_vkiscnums_asm.h
+@@ -74,6 +74,10 @@
+ #  include "vki/vki-scnums-shared-linux.h"
+ #  include "vki/vki-scnums-mips64-linux.h"
+ 
++#elif defined(VGP_loongarch64_linux)
++#  include "vki/vki-scnums-shared-linux.h"
++#  include "vki/vki-scnums-loongarch64-linux.h"
++
+ #elif defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd)
+ #  include "vki/vki-scnums-freebsd.h"
+ 
+diff --git a/include/valgrind.h.in b/include/valgrind.h.in
+index aa0b43125..b330497f7 100644
+--- a/include/valgrind.h.in
++++ b/include/valgrind.h.in
+@@ -125,6 +125,7 @@
+ #undef PLAT_mips32_linux
+ #undef PLAT_mips64_linux
+ #undef PLAT_nanomips_linux
++#undef PLAT_loongarch64_linux
+ #undef PLAT_x86_solaris
+ #undef PLAT_amd64_solaris
+ 
+@@ -169,6 +170,8 @@
+ #  define PLAT_mips32_linux 1
+ #elif defined(__linux__) && defined(__nanomips__)
+ #  define PLAT_nanomips_linux 1
++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64)
++#  define PLAT_loongarch64_linux 1
+ #elif defined(__sun) && defined(__i386__)
+ #  define PLAT_x86_solaris 1
+ #elif defined(__sun) && defined(__x86_64__)
+@@ -1125,7 +1128,75 @@ typedef
+                     );                                              \
+  } while (0)
+ 
+-#endif
++#endif /* PLAT_nanomips_linux */
++
++/* --------------------- loongarch64-linux --------------------- */
++#if defined(PLAT_loongarch64_linux)
++
++typedef
++   struct {
++      unsigned long nraddr; /* where's the code? */
++   }
++   OrigFn;
++
++#define __SPECIAL_INSTRUCTION_PREAMBLE                              \
++                       "srli.d $zero, $zero, 3  \n\t"               \
++                       "srli.d $zero, $zero, 13 \n\t"               \
++                       "srli.d $zero, $zero, 29 \n\t"               \
++                       "srli.d $zero, $zero, 19 \n\t"
++
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                            \
++        _zzq_default, _zzq_request,                                 \
++        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)      \
++   __extension__                                                    \
++   ({                                                               \
++      volatile unsigned long int _zzq_args[6];                      \
++      volatile unsigned long int _zzq_result;                       \
++      _zzq_args[0] = (unsigned long int)(_zzq_request);             \
++      _zzq_args[1] = (unsigned long int)(_zzq_arg1);                \
++      _zzq_args[2] = (unsigned long int)(_zzq_arg2);                \
++      _zzq_args[3] = (unsigned long int)(_zzq_arg3);                \
++      _zzq_args[4] = (unsigned long int)(_zzq_arg4);                \
++      _zzq_args[5] = (unsigned long int)(_zzq_arg5);                \
++      __asm__ volatile("move $a7, %1     \n\t" /*default*/          \
++                       "move $t0, %2     \n\t" /*ptr*/              \
++                       __SPECIAL_INSTRUCTION_PREAMBLE               \
++                       /* $a7 = client_request ( $t0 ) */           \
++                       "or $t1, $t1, $t1 \n\t"                      \
++                       "move %0, $a7     \n\t" /*result*/           \
++                       : "=r" (_zzq_result)                         \
++                       : "r" (_zzq_default), "r" (&_zzq_args[0])    \
++                       : "$a7", "$t0", "memory");                   \
++      _zzq_result;                                                  \
++   })
++
++#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                         \
++   {                                                                \
++      volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
++      volatile unsigned long int __addr;                            \
++      __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
++                       /* $a7 = guest_NRADDR */                     \
++                       "or $t2, $t2, $t2 \n\t"                      \
++                       "move %0, $a7     \n\t" /*result*/           \
++                       : "=r" (__addr)                              \
++                       :                                            \
++                       : "$a7");                                    \
++      _zzq_orig->nraddr = __addr;                                   \
++   }
++
++#define VALGRIND_CALL_NOREDIR_T8                                    \
++                       __SPECIAL_INSTRUCTION_PREAMBLE               \
++                       /* call-noredir $t8 */                       \
++                       "or $t3, $t3, $t3 \n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                    \
++   do {                                                             \
++      __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
++                       "or $t4, $t4, $t4 \n\t"                      \
++                      );                                            \
++   } while (0)
++
++#endif /* PLAT_loongarch64_linux */
+ /* Insert assembly code for other platforms here... */
+ 
+ #endif /* NVALGRIND */
+@@ -6603,6 +6674,457 @@ typedef
+ 
+ #endif /* PLAT_mips64_linux */
+ 
++/* --------------------- loongarch64-linux --------------------- */
++
++#if defined(PLAT_loongarch64_linux)
++
++/* These regs are trashed by the hidden call. */
++#define __CALLER_SAVED_REGS                                        \
++   "$ra", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7",  \
++   "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",  \
++   "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",  \
++   "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \
++   "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23"
++
++/* $s0 is callee-saved, so we can use it to save and restore SP around
++   the hidden call. */
++#define VALGRIND_ALIGN_STACK            \
++      "move      $s0, $sp         \n\t" \
++      "bstrins.d $sp, $zero, 3, 0 \n\t"
++#define VALGRIND_RESTORE_STACK          \
++      "move      $sp, $s0         \n\t"
++
++/* These CALL_FN_ macros assume that on loongarch64-linux,
++   sizeof(unsigned long) == 8. */
++
++#define CALL_FN_W_v(lval, orig)                                   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[1];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $t8, %1, 0 \n\t"  /* target->t8 */                 \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0    \n\t"                                   \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_W(lval, orig, arg1)                             \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[2];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8 \n\t"  /* arg1 */                       \
++         "ld.d $t8, %1, 0 \n\t"  /* target->t8 */                 \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0    \n\t"                                   \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WW(lval, orig, arg1, arg2)                      \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3)               \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[4];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4)        \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[5];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $a3, %1, 32 \n\t"  /* arg4 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5)    \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[6];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $a3, %1, 32 \n\t"  /* arg4 */                      \
++         "ld.d $a4, %1, 40 \n\t"  /* arg5 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5,    \
++                                 arg6)                            \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[7];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $a3, %1, 32 \n\t"  /* arg4 */                      \
++         "ld.d $a4, %1, 40 \n\t"  /* arg5 */                      \
++         "ld.d $a5, %1, 48 \n\t"  /* arg6 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5,    \
++                                 arg6, arg7)                      \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[8];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $a3, %1, 32 \n\t"  /* arg4 */                      \
++         "ld.d $a4, %1, 40 \n\t"  /* arg5 */                      \
++         "ld.d $a5, %1, 48 \n\t"  /* arg6 */                      \
++         "ld.d $a6, %1, 56 \n\t"  /* arg7 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5,    \
++                                 arg6, arg7, arg8)                \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[9];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ld.d $a0, %1, 8  \n\t"  /* arg1 */                      \
++         "ld.d $a1, %1, 16 \n\t"  /* arg2 */                      \
++         "ld.d $a2, %1, 24 \n\t"  /* arg3 */                      \
++         "ld.d $a3, %1, 32 \n\t"  /* arg4 */                      \
++         "ld.d $a4, %1, 40 \n\t"  /* arg5 */                      \
++         "ld.d $a5, %1, 48 \n\t"  /* arg6 */                      \
++         "ld.d $a6, %1, 56 \n\t"  /* arg7 */                      \
++         "ld.d $a7, %1, 64 \n\t"  /* arg8 */                      \
++         "ld.d $t8, %1, 0  \n\t"  /* target->t8 */                \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5,    \
++                                 arg6, arg7, arg8, arg9)          \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[10];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "addi.d $sp, $sp, -8 \n\t"                               \
++         "ld.d   $a0, %1, 72  \n\t"                               \
++         "st.d   $a0, $sp, 0  \n\t"  /* arg9 */                   \
++         "ld.d   $a0, %1, 8   \n\t"  /* arg1 */                   \
++         "ld.d   $a1, %1, 16  \n\t"  /* arg2 */                   \
++         "ld.d   $a2, %1, 24  \n\t"  /* arg3 */                   \
++         "ld.d   $a3, %1, 32  \n\t"  /* arg4 */                   \
++         "ld.d   $a4, %1, 40  \n\t"  /* arg5 */                   \
++         "ld.d   $a5, %1, 48  \n\t"  /* arg6 */                   \
++         "ld.d   $a6, %1, 56  \n\t"  /* arg7 */                   \
++         "ld.d   $a7, %1, 64  \n\t"  /* arg8 */                   \
++         "ld.d   $t8, %1, 0   \n\t"  /* target->t8 */             \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
++                                  arg6, arg7, arg8, arg9, arg10)  \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[11];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "addi.d $sp, $sp, -16 \n\t"                              \
++         "ld.d   $a0, %1, 72   \n\t"                              \
++         "st.d   $a0, $sp, 0   \n\t"  /* arg9 */                  \
++         "ld.d   $a0, %1, 80   \n\t"                              \
++         "st.d   $a0, $sp, 8   \n\t"  /* arg10 */                 \
++         "ld.d   $a0, %1, 8    \n\t"  /* arg1 */                  \
++         "ld.d   $a1, %1, 16   \n\t"  /* arg2 */                  \
++         "ld.d   $a2, %1, 24   \n\t"  /* arg3 */                  \
++         "ld.d   $a3, %1, 32   \n\t"  /* arg4 */                  \
++         "ld.d   $a4, %1, 40   \n\t"  /* arg5 */                  \
++         "ld.d   $a5, %1, 48   \n\t"  /* arg6 */                  \
++         "ld.d   $a6, %1, 56   \n\t"  /* arg7 */                  \
++         "ld.d   $a7, %1, 64   \n\t"  /* arg8 */                  \
++         "ld.d   $t8, %1, 0    \n\t"  /* target->t8 */            \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
++                                  arg6, arg7, arg8, arg9, arg10,  \
++                                  arg11)                          \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[12];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "addi.d $sp, $sp, -24 \n\t"                              \
++         "ld.d   $a0, %1, 72   \n\t"                              \
++         "st.d   $a0, $sp, 0   \n\t"  /* arg9 */                  \
++         "ld.d   $a0, %1, 80   \n\t"                              \
++         "st.d   $a0, $sp, 8   \n\t"  /* arg10 */                 \
++         "ld.d   $a0, %1, 88   \n\t"                              \
++         "st.d   $a0, $sp, 16  \n\t"  /* arg11 */                 \
++         "ld.d   $a0, %1, 8    \n\t"  /* arg1 */                  \
++         "ld.d   $a1, %1, 16   \n\t"  /* arg2 */                  \
++         "ld.d   $a2, %1, 24   \n\t"  /* arg3 */                  \
++         "ld.d   $a3, %1, 32   \n\t"  /* arg4 */                  \
++         "ld.d   $a4, %1, 40   \n\t"  /* arg5 */                  \
++         "ld.d   $a5, %1, 48   \n\t"  /* arg6 */                  \
++         "ld.d   $a6, %1, 56   \n\t"  /* arg7 */                  \
++         "ld.d   $a7, %1, 64   \n\t"  /* arg8 */                  \
++         "ld.d   $t8, %1, 0    \n\t"  /* target->t8 */            \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
++                                  arg6, arg7, arg8, arg9, arg10,  \
++                                  arg11, arg12)                   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[13];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      _argvec[12] = (unsigned long)(arg12);                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "addi.d $sp, $sp, -32 \n\t"                              \
++         "ld.d   $a0, %1, 72   \n\t"                              \
++         "st.d   $a0, $sp, 0   \n\t"  /* arg9 */                  \
++         "ld.d   $a0, %1, 80   \n\t"                              \
++         "st.d   $a0, $sp, 8   \n\t"  /* arg10 */                 \
++         "ld.d   $a0, %1, 88   \n\t"                              \
++         "st.d   $a0, $sp, 16  \n\t"  /* arg11 */                 \
++         "ld.d   $a0, %1, 96   \n\t"                              \
++         "st.d   $a0, $sp, 24  \n\t"  /* arg12 */                 \
++         "ld.d   $a0, %1, 8    \n\t"  /* arg1 */                  \
++         "ld.d   $a1, %1, 16   \n\t"  /* arg2 */                  \
++         "ld.d   $a2, %1, 24   \n\t"  /* arg3 */                  \
++         "ld.d   $a3, %1, 32   \n\t"  /* arg4 */                  \
++         "ld.d   $a4, %1, 40   \n\t"  /* arg5 */                  \
++         "ld.d   $a5, %1, 48   \n\t"  /* arg6 */                  \
++         "ld.d   $a6, %1, 56   \n\t"  /* arg7 */                  \
++         "ld.d   $a7, %1, 64   \n\t"  /* arg8 */                  \
++         "ld.d   $t8, %1, 0    \n\t"  /* target->t8 */            \
++         VALGRIND_CALL_NOREDIR_T8                                 \
++         VALGRIND_RESTORE_STACK                                   \
++         "move %0, $a0     \n\t"                                  \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0"         \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#endif /* PLAT_loongarch64_linux */
++
+ /* ------------------------------------------------------------------ */
+ /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
+ /*                                                                    */
+@@ -7159,6 +7681,7 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+ #undef PLAT_mips32_linux
+ #undef PLAT_mips64_linux
+ #undef PLAT_nanomips_linux
++#undef PLAT_loongarch64_linux
+ #undef PLAT_x86_solaris
+ #undef PLAT_amd64_solaris
+ 
+diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h
+index be3d76690..0a60c0a09 100644
+--- a/include/vki/vki-linux.h
++++ b/include/vki/vki-linux.h
+@@ -97,6 +97,8 @@
+ #  include "vki-posixtypes-mips64-linux.h"
+ #elif defined(VGA_nanomips)
+ #  include "vki-posixtypes-nanomips-linux.h"
++#elif defined(VGA_loongarch64)
++#  include "vki-posixtypes-loongarch64-linux.h"
+ #else
+ #  error Unknown platform
+ #endif
+@@ -225,6 +227,8 @@ typedef unsigned int	        vki_uint;
+ #  include "vki-mips64-linux.h"
+ #elif defined(VGA_nanomips)
+ #  include "vki-nanomips-linux.h"
++#elif defined(VGA_loongarch64)
++#  include "vki-loongarch64-linux.h"
+ #else
+ #  error Unknown platform
+ #endif
+@@ -531,6 +535,7 @@ typedef struct vki_siginfo {
+  * Digital reserves positive values for kernel-generated signals.
+  */
+ #define VKI_SI_USER	0		/* sent by kill, sigsend, raise */
++#define VKI_SI_KERNEL	0x80		/* sent by the kernel from somewhere */
+ #define VKI_SI_TKILL	-6		/* sent by tkill system call */
+ 
+ /*
+diff --git a/include/vki/vki-loongarch64-linux.h b/include/vki/vki-loongarch64-linux.h
+new file mode 100644
+index 000000000..97d3f66dd
+--- /dev/null
++++ b/include/vki/vki-loongarch64-linux.h
+@@ -0,0 +1,811 @@
++
++/*--------------------------------------------------------------------*/
++/*--- loongarch/Linux-specific kernel interface.                   ---*/
++/*---                                      vki-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++*/
++
++#ifndef __VKI_LOONGARCH64_LINUX_H
++#define __VKI_LOONGARCH64_LINUX_H
++
++// loongarch64 is little-endian.
++#define VKI_LITTLE_ENDIAN 1
++
++//----------------------------------------------------------------------
++// From linux-5.15.2/include/uapi/asm-generic/int-ll64.h
++//----------------------------------------------------------------------
++
++typedef __signed__ char __vki_s8;
++typedef unsigned char __vki_u8;
++
++typedef __signed__ short __vki_s16;
++typedef unsigned short __vki_u16;
++
++typedef __signed__ int __vki_s32;
++typedef unsigned int __vki_u32;
++
++typedef __signed__ long long __vki_s64;
++typedef unsigned long long __vki_u64;
++
++//----------------------------------------------------------------------
++// From linux-5.15.2/include/asm-generic/int-ll64.h
++//----------------------------------------------------------------------
++
++typedef __vki_s8  vki_s8;
++typedef __vki_u8  vki_u8;
++typedef __vki_s16 vki_s16;
++typedef __vki_u16 vki_u16;
++typedef __vki_s32 vki_s32;
++typedef __vki_u32 vki_u32;
++typedef __vki_s64 vki_s64;
++typedef __vki_u64 vki_u64;
++
++//----------------------------------------------------------------------
++// From linux-5.15.2/include/linux/types.h
++//----------------------------------------------------------------------
++
++typedef vki_u8  vki_u_int8_t;
++typedef vki_s8  vki_int8_t;
++typedef vki_u16 vki_u_int16_t;
++typedef vki_s16 vki_int16_t;
++typedef vki_u32 vki_u_int32_t;
++typedef vki_s32 vki_int32_t;
++
++typedef vki_u8  vki_uint8_t;
++typedef vki_u16 vki_uint16_t;
++typedef vki_u32 vki_uint32_t;
++
++typedef vki_u64 vki_uint64_t;
++typedef vki_u64 vki_u_int64_t;
++typedef vki_s64 vki_int64_t;
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/asm/page.h
++//----------------------------------------------------------------------
++
++/* loongarch64 uses runtime pagesize detection */
++extern UWord VKI_PAGE_SHIFT;
++extern UWord VKI_PAGE_SIZE;
++#define VKI_PAGE_MASK      (~(PAGE_SIZE - 1))
++#define VKI_MAX_PAGE_SHIFT 16
++#define VKI_MAX_PAGE_SIZE  (1UL << VKI_MAX_PAGE_SHIFT)
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/asm/shmparam.h
++//----------------------------------------------------------------------
++
++#define VKI_SHMLBA 0x00010000 // SZ_64K
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/signal.h
++//----------------------------------------------------------------------
++
++#define VKI_MINSIGSTKSZ 4096
++#define VKI_SIGSTKSZ    16384
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/signal-defs.h
++//----------------------------------------------------------------------
++
++#define VKI_SA_NOCLDSTOP      0x00000001
++#define VKI_SA_NOCLDWAIT      0x00000002
++#define VKI_SA_SIGINFO        0x00000004
++/* 0x00000008 used on alpha, mips, parisc */
++/* 0x00000010 used on alpha, parisc */
++/* 0x00000020 used on alpha, parisc, sparc */
++/* 0x00000040 used on alpha, parisc */
++/* 0x00000080 used on parisc */
++/* 0x00000100 used on sparc */
++/* 0x00000200 used on sparc */
++#define VKI_SA_UNSUPPORTED    0x00000400
++#define VKI_SA_EXPOSE_TAGBITS 0x00000800
++/* 0x00010000 used on mips */
++/* 0x00800000 used for internal SA_IMMUTABLE */
++/* 0x01000000 used on x86 */
++/* 0x02000000 used on x86 */
++/*
++ * New architectures should not define the obsolete
++ *      VKI_SA_RESTORER       0x04000000
++ */
++#define VKI_SA_ONSTACK        0x08000000
++#define VKI_SA_RESTART        0x10000000
++#define VKI_SA_NODEFER        0x40000000
++#define VKI_SA_RESETHAND      0x80000000
++
++#define VKI_SA_NOMASK         VKI_SA_NODEFER
++#define VKI_SA_ONESHOT        VKI_SA_RESETHAND
++
++#define VKI_SIG_BLOCK     0 /* for blocking signals */
++#define VKI_SIG_UNBLOCK   1 /* for unblocking signals */
++#define VKI_SIG_SETMASK   2 /* for setting the signal mask */
++
++typedef void __vki_signalfn_t(int);
++typedef __vki_signalfn_t __user *__vki_sighandler_t;
++
++typedef void __vki_restorefn_t(void);
++typedef __vki_restorefn_t __user *__vki_igrestore_t;
++
++#define VKI_SIG_DFL ((__vki_sighandler_t)0)  /* default signal handling */
++#define VKI_SIG_IGN ((__vki_sighandler_t)1)  /* ignore signal */
++#define VKI_SIG_ERR ((__vki_sighandler_t)-1) /* error return from signal */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/signal.h
++//----------------------------------------------------------------------
++
++#define _VKI_NSIG       64
++#define _VKI_NSIG_BPW   64 // __BITS_PER_LONG == 64
++#define _VKI_NSIG_WORDS (_VKI_NSIG / _VKI_NSIG_BPW)
++
++#define VKI_SIGHUP     1
++#define VKI_SIGINT     2
++#define VKI_SIGQUIT    3
++#define VKI_SIGILL     4
++#define VKI_SIGTRAP    5
++#define VKI_SIGABRT    6
++#define VKI_SIGIOT     6
++#define VKI_SIGBUS     7
++#define VKI_SIGFPE     8
++#define VKI_SIGKILL    9
++#define VKI_SIGUSR1   10
++#define VKI_SIGSEGV   11
++#define VKI_SIGUSR2   12
++#define VKI_SIGPIPE   13
++#define VKI_SIGALRM   14
++#define VKI_SIGTERM   15
++#define VKI_SIGSTKFLT 16
++#define VKI_SIGCHLD   17
++#define VKI_SIGCONT   18
++#define VKI_SIGSTOP   19
++#define VKI_SIGTSTP   20
++#define VKI_SIGTTIN   21
++#define VKI_SIGTTOU   22
++#define VKI_SIGURG    23
++#define VKI_SIGXCPU   24
++#define VKI_SIGXFSZ   25
++#define VKI_SIGVTALRM 26
++#define VKI_SIGPROF   27
++#define VKI_SIGWINCH  28
++#define VKI_SIGIO     29
++#define VKI_SIGPOLL   VKI_SIGIO
++/*
++#define VKI_SIGLOST   29
++*/
++#define VKI_SIGPWR    30
++#define VKI_SIGSYS    31
++#define VKI_SIGUNUSED 31
++
++#define VKI_SIGRTMIN  32
++#define VKI_SIGRTMAX  _VKI_NSIG
++
++typedef struct {
++   unsigned long sig[_VKI_NSIG_WORDS];
++} vki_sigset_t;
++
++typedef unsigned long vki_old_sigset_t;
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/linux/signal.h
++//----------------------------------------------------------------------
++
++#define VKI_SS_ONSTACK 1
++#define VKI_SS_DISABLE 2
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/linux/signal_types.h
++//----------------------------------------------------------------------
++
++struct vki_sigaction {
++   __vki_sighandler_t sa_handler;
++	unsigned long      sa_flags;
++	vki_sigset_t       sa_mask; /* mask last for extensibility */
++};
++
++struct vki_sigaction_base {
++   // [[Nb: a 'k' prefix is added to "sa_handler" because
++   // bits/sigaction.h (which gets dragged in somehow via signal.h)
++   // #defines it as something else.  Since that is done for glibc's
++   // purposes, which we don't care about here, we use our own name.]]
++   __vki_sighandler_t ksa_handler;
++   unsigned long      sa_flags;
++   vki_sigset_t       sa_mask; /* mask last for extensibility */
++};
++
++/* On Linux we use the same type for passing sigactions to
++   and from the kernel.  Hence: */
++typedef struct vki_sigaction_base vki_sigaction_toK_t;
++typedef struct vki_sigaction_base vki_sigaction_fromK_t;
++
++typedef struct vki_sigaltstack {
++   void __user         *ss_sp;
++   int                 ss_flags;
++   __vki_kernel_size_t ss_size;
++} vki_stack_t;
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/sigcontext.h
++//----------------------------------------------------------------------
++
++struct vki_sigcontext {
++   __vki_u64 sc_pc;
++   __vki_u64 sc_regs[32];
++   __vki_u32 sc_flags;
++   __vki_u64 sc_extcontext[0] __attribute__((__aligned__(16)));
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/linux/mman.h
++//----------------------------------------------------------------------
++
++#define VKI_MAP_SHARED          0x01       /* Share changes */
++#define VKI_MAP_PRIVATE         0x02       /* Changes are private */
++#define VKI_MAP_SHARED_VALIDATE 0x03       /* share + validate extension flags */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/mman-common.h
++//----------------------------------------------------------------------
++
++#define VKI_PROT_READ  0x1                 /* page can be read */
++#define VKI_PROT_WRITE 0x2                 /* page can be written */
++#define VKI_PROT_EXEC  0x4                 /* page can be executed */
++#define VKI_PROT_SEM   0x8                 /* page may be used for atomic ops */
++/*                              0x10          reserved for arch-specific use */
++/*                              0x20          reserved for arch-specific use */
++#define VKI_PROT_NONE           0x0        /* page can not be accessed */
++#define VKI_PROT_GROWSDOWN      0x01000000 /* mprotect flag: extend change to start of growsdown vma */
++#define VKI_PROT_GROWSUP        0x02000000 /* mprotect flag: extend change to end of growsup vma */
++
++/* 0x01 - 0x03 are defined in linux/mman.h */
++#define VKI_MAP_TYPE            0x0f       /* Mask for type of mapping */
++#define VKI_MAP_FIXED           0x10       /* Interpret addr exactly */
++#define VKI_MAP_ANONYMOUS       0x20       /* don't use a file */
++
++/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */
++#define VKI_MAP_POPULATE        0x008000   /* populate (prefault) pagetables */
++#define VKI_MAP_NONBLOCK        0x010000   /* do not block on IO */
++#define VKI_MAP_STACK           0x020000   /* give out an address that is best suited for process/thread stacks */
++#define VKI_MAP_HUGETLB         0x040000   /* create a huge page mapping */
++#define VKI_MAP_SYNC            0x080000   /* perform synchronous page faults for the mapping */
++#define VKI_MAP_FIXED_NOREPLACE 0x100000   /* MAP_FIXED which doesn't unmap underlying mapping */
++
++#define VKI_MAP_UNINITIALIZED   0x4000000  /* For anonymous mmap, memory could be uninitialized */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/fcntl.h
++//----------------------------------------------------------------------
++
++#define VKI_O_ACCMODE    00000003
++#define VKI_O_RDONLY     00000000
++#define VKI_O_WRONLY     00000001
++#define VKI_O_RDWR       00000002
++#define VKI_O_CREAT      00000100 /* not fcntl */
++#define VKI_O_EXCL       00000200 /* not fcntl */
++#define VKI_O_NOCTTY     00000400 /* not fcntl */
++#define VKI_O_TRUNC      00001000 /* not fcntl */
++#define VKI_O_APPEND     00002000
++#define VKI_O_NONBLOCK   00004000
++#define VKI_O_DSYNC      00010000 /* used to be O_SYNC, see below */
++#define VKI_FASYNC       00020000 /* fcntl, for BSD compatibility */
++#define VKI_O_DIRECT     00040000 /* direct disk access hint */
++#define VKI_O_LARGEFILE  00100000
++
++#define VKI_F_DUPFD         0  /* dup */
++#define VKI_F_GETFD         1  /* get close_on_exec */
++#define VKI_F_SETFD         2  /* set/clear close_on_exec */
++#define VKI_F_GETFL         3  /* get file->f_flags */
++#define VKI_F_SETFL         4  /* set file->f_flags */
++#define VKI_F_GETLK         5
++#define VKI_F_SETLK         6
++#define VKI_F_SETLKW        7
++#define VKI_F_SETOWN        8  /* for sockets. */
++#define VKI_F_GETOWN        9  /* for sockets. */
++#define VKI_F_SETSIG        10 /* for sockets. */
++#define VKI_F_GETSIG        11 /* for sockets. */
++
++#define VKI_F_SETOWN_EX     15
++#define VKI_F_GETOWN_EX     16
++
++#define VKI_F_GETOWNER_UIDS 17
++
++#define VKI_F_OFD_GETLK     36
++#define VKI_F_OFD_SETLK     37
++#define VKI_F_OFD_SETLKW    38
++
++#define VKI_F_OWNER_TID     0
++#define VKI_F_OWNER_PID     1
++#define VKI_F_OWNER_PGRP    2
++
++struct vki_f_owner_ex {
++   int   type;
++   __vki_kernel_pid_t   pid;
++};
++
++#define VKI_FD_CLOEXEC 1  /* actually anything with low bit set goes */
++
++#define VKI_F_RDLCK    0
++#define VKI_F_WRLCK    1
++#define VKI_F_UNLCK    2
++
++#define VKI_F_EXLCK    4   /* or 3 */
++#define VKI_F_SHLCK    8   /* or 4 */
++
++#define VKI_LOCK_SH    1   /* shared lock */
++#define VKI_LOCK_EX    2   /* exclusive lock */
++#define VKI_LOCK_NB    4   /* or'd with one of the above to prevent blocking */
++#define VKI_LOCK_UN    8   /* remove lock */
++
++#define VKI_LOCK_MAND  32  /* This is a mandatory flock ... */
++#define VKI_LOCK_READ  64  /* which allows concurrent read operations */
++#define VKI_LOCK_WRITE 128 /* which allows concurrent write operations */
++#define VKI_LOCK_RW    192 /* which allows concurrent read & write ops */
++
++#define VKI_F_LINUX_SPECIFIC_BASE 1024
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/linux/fcntl.h
++//----------------------------------------------------------------------
++
++#define VKI_AT_FDCWD -100 /* Special value used to indicate
++                             openat should use the current
++                             working directory. */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/resource.h
++//----------------------------------------------------------------------
++
++#define VKI_RLIMIT_DATA   2 /* max data size */
++#define VKI_RLIMIT_STACK  3 /* max stack size */
++#define VKI_RLIMIT_CORE   4 /* max core file size */
++#define VKI_RLIMIT_NOFILE 7 /* max number of open files */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/socket.h
++//----------------------------------------------------------------------
++
++#define VKI_SOL_SOCKET 1
++#define VKI_SO_TYPE    3
++
++#define VKI_SO_ATTACH_FILTER 26
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/sockios.h
++//----------------------------------------------------------------------
++
++#define VKI_FIOSETOWN        0x8901
++#define VKI_SIOCSPGRP        0x8902
++#define VKI_FIOGETOWN        0x8903
++#define VKI_SIOCGPGRP        0x8904
++#define VKI_SIOCATMARK       0x8905
++#define VKI_SIOCGSTAMP_OLD   0x8906 /* Get stamp (timeval) */
++#define VKI_SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/linux/sockios.h
++//----------------------------------------------------------------------
++
++#define VKI_SIOCGSTAMP       VKI_SIOCGSTAMP_OLD
++#define VKI_SIOCGSTAMPNS     VKI_SIOCGSTAMPNS_OLD
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/stat.h
++//----------------------------------------------------------------------
++
++struct vki_stat {
++   unsigned long st_dev;        /* Device.  */
++   unsigned long st_ino;        /* File serial number.  */
++   unsigned int  st_mode;       /* File mode.  */
++   unsigned int  st_nlink;      /* Link count.  */
++   unsigned int  st_uid;        /* User ID of the file's owner.  */
++   unsigned int  st_gid;        /* Group ID of the file's group. */
++   unsigned long st_rdev;       /* Device number, if device.  */
++   unsigned long __pad1;
++   long          st_size;       /* Size of file, in bytes.  */
++   int           st_blksize;    /* Optimal block size for I/O.  */
++   int           __pad2;
++   long          st_blocks;     /* Number 512-byte blocks allocated. */
++   long          st_atime;      /* Time of last access.  */
++   unsigned long st_atime_nsec;
++   long          st_mtime;      /* Time of last modification.  */
++   unsigned long st_mtime_nsec;
++   long          st_ctime;      /* Time of last status change.  */
++   unsigned long st_ctime_nsec;
++   unsigned int  __unused4;
++   unsigned int  __unused5;
++};
++
++struct vki_stat64 {
++   unsigned long long st_dev;        /* Device.  */
++   unsigned long long st_ino;        /* File serial number.  */
++   unsigned int       st_mode;       /* File mode.  */
++   unsigned int       st_nlink;      /* Link count.  */
++   unsigned int       st_uid;        /* User ID of the file's owner.  */
++   unsigned int       st_gid;        /* Group ID of the file's group. */
++   unsigned long long st_rdev;       /* Device number, if device.  */
++   unsigned long long __pad1;
++   long long          st_size;       /* Size of file, in bytes.  */
++   int                st_blksize;    /* Optimal block size for I/O.  */
++   int                __pad2;
++   long long          st_blocks;     /* Number 512-byte blocks allocated. */
++   int                st_atime;      /* Time of last access.  */
++   unsigned int       st_atime_nsec;
++   int                st_mtime;      /* Time of last modification.  */
++   unsigned int       st_mtime_nsec;
++   int                st_ctime;      /* Time of last status change.  */
++   unsigned int       st_ctime_nsec;
++   unsigned int       __unused4;
++   unsigned int       __unused5;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/statfs.h
++//----------------------------------------------------------------------
++
++#define __vki_statfs_word __vki_kernel_long_t
++
++struct vki_statfs {
++   __vki_statfs_word f_type;
++   __vki_statfs_word f_bsize;
++   __vki_statfs_word f_blocks;
++   __vki_statfs_word f_bfree;
++   __vki_statfs_word f_bavail;
++   __vki_statfs_word f_files;
++   __vki_statfs_word f_ffree;
++   __vki_kernel_fsid_t f_fsid;
++   __vki_statfs_word f_namelen;
++   __vki_statfs_word f_frsize;
++   __vki_statfs_word f_flags;
++   __vki_statfs_word f_spare[4];
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/termios.h
++//----------------------------------------------------------------------
++
++struct vki_winsize {
++   unsigned short ws_row;
++   unsigned short ws_col;
++   unsigned short ws_xpixel;
++   unsigned short ws_ypixel;
++};
++
++#define VKI_NCC 8
++struct vki_termio {
++   unsigned short c_iflag;       /* input mode flags */
++   unsigned short c_oflag;       /* output mode flags */
++   unsigned short c_cflag;       /* control mode flags */
++   unsigned short c_lflag;       /* local mode flags */
++   unsigned char  c_line;        /* line discipline */
++   unsigned char  c_cc[VKI_NCC]; /* control characters */
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/termbits.h
++//----------------------------------------------------------------------
++
++typedef unsigned char vki_cc_t;
++typedef unsigned int  vki_speed_t;
++typedef unsigned int  vki_tcflag_t;
++
++#define VKI_NCCS 19
++struct vki_termios {
++   vki_tcflag_t c_iflag;     /* input mode flags */
++   vki_tcflag_t c_oflag;     /* output mode flags */
++   vki_tcflag_t c_cflag;     /* control mode flags */
++   vki_tcflag_t c_lflag;     /* local mode flags */
++   vki_cc_t c_line;          /* line discipline */
++   vki_cc_t c_cc[VKI_NCCS];  /* control characters */
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/ioctl.h
++//----------------------------------------------------------------------
++
++#define _VKI_IOC_NRBITS    8
++#define _VKI_IOC_TYPEBITS  8
++#define _VKI_IOC_SIZEBITS  14
++#define _VKI_IOC_DIRBITS   2
++
++#define _VKI_IOC_NRMASK    ((1 << _VKI_IOC_NRBITS)-1)
++#define _VKI_IOC_TYPEMASK  ((1 << _VKI_IOC_TYPEBITS)-1)
++#define _VKI_IOC_SIZEMASK  ((1 << _VKI_IOC_SIZEBITS)-1)
++#define _VKI_IOC_DIRMASK   ((1 << _VKI_IOC_DIRBITS)-1)
++
++#define _VKI_IOC_NRSHIFT   0
++#define _VKI_IOC_TYPESHIFT (_VKI_IOC_NRSHIFT+_VKI_IOC_NRBITS)
++#define _VKI_IOC_SIZESHIFT (_VKI_IOC_TYPESHIFT+_VKI_IOC_TYPEBITS)
++#define _VKI_IOC_DIRSHIFT  (_VKI_IOC_SIZESHIFT+_VKI_IOC_SIZEBITS)
++
++#define _VKI_IOC_NONE      0U
++#define _VKI_IOC_WRITE     1U
++#define _VKI_IOC_READ      2U
++
++#define _VKI_IOC(dir,type,nr,size) \
++        (((dir)  << _VKI_IOC_DIRSHIFT) | \
++         ((type) << _VKI_IOC_TYPESHIFT) | \
++         ((nr)   << _VKI_IOC_NRSHIFT) | \
++         ((size) << _VKI_IOC_SIZESHIFT))
++
++#define _VKI_IO(type,nr)            _VKI_IOC(_VKI_IOC_NONE,(type),(nr),0)
++#define _VKI_IOR(type,nr,size)      _VKI_IOC(_VKI_IOC_READ,(type),(nr),(_VKI_IOC_TYPECHECK(size)))
++#define _VKI_IOW(type,nr,size)      _VKI_IOC(_VKI_IOC_WRITE,(type),(nr),(_VKI_IOC_TYPECHECK(size)))
++#define _VKI_IOWR(type,nr,size)     _VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),(_VKI_IOC_TYPECHECK(size)))
++#define _VKI_IOR_BAD(type,nr,size)  _VKI_IOC(_VKI_IOC_READ,(type),(nr),sizeof(size))
++#define _VKI_IOW_BAD(type,nr,size)  _VKI_IOC(_VKI_IOC_WRITE,(type),(nr),sizeof(size))
++#define _VKI_IOWR_BAD(type,nr,size) _VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),sizeof(size))
++
++#define _VKI_IOC_DIR(nr)  (((nr) >> _VKI_IOC_DIRSHIFT) & _VKI_IOC_DIRMASK)
++#define _VKI_IOC_TYPE(nr) (((nr) >> _VKI_IOC_TYPESHIFT) & _VKI_IOC_TYPEMASK)
++#define _VKI_IOC_NR(nr)   (((nr) >> _VKI_IOC_NRSHIFT) & _VKI_IOC_NRMASK)
++#define _VKI_IOC_SIZE(nr) (((nr) >> _VKI_IOC_SIZESHIFT) & _VKI_IOC_SIZEMASK)
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/ioctls.h
++//----------------------------------------------------------------------
++
++#define VKI_TCGETS       0x5401
++#define VKI_TCSETS       0x5402
++#define VKI_TCSETSW      0x5403
++#define VKI_TCSETSF      0x5404
++#define VKI_TCGETA       0x5405
++#define VKI_TCSETA       0x5406
++#define VKI_TCSETAW      0x5407
++#define VKI_TCSETAF      0x5408
++#define VKI_TCSBRK       0x5409
++#define VKI_TCXONC       0x540A
++#define VKI_TCFLSH       0x540B
++#define VKI_TIOCEXCL     0x540C
++#define VKI_TIOCNXCL     0x540D
++#define VKI_TIOCSCTTY    0x540E
++#define VKI_TIOCGPGRP    0x540F
++#define VKI_TIOCSPGRP    0x5410
++#define VKI_TIOCOUTQ     0x5411
++#define VKI_TIOCSTI      0x5412
++#define VKI_TIOCGWINSZ   0x5413
++#define VKI_TIOCSWINSZ   0x5414
++#define VKI_TIOCMGET     0x5415
++#define VKI_TIOCMBIS     0x5416
++#define VKI_TIOCMBIC     0x5417
++#define VKI_TIOCMSET     0x5418
++#define VKI_TIOCGSOFTCAR 0x5419
++#define VKI_TIOCSSOFTCAR 0x541A
++#define VKI_FIONREAD     0x541B
++#define VKI_TIOCINQ      VKI_FIONREAD
++#define VKI_TIOCLINUX    0x541C
++#define VKI_TIOCCONS     0x541D
++#define VKI_TIOCGSERIAL  0x541E
++#define VKI_TIOCSSERIAL  0x541F
++#define VKI_TIOCPKT      0x5420
++#define VKI_FIONBIO      0x5421
++#define VKI_TIOCNOTTY    0x5422
++#define VKI_TIOCSETD     0x5423
++#define VKI_TIOCGETD     0x5424
++#define VKI_TCSBRKP      0x5425   /* Needed for POSIX tcsendbreak() */
++#define VKI_TIOCSBRK     0x5427   /* BSD compatibility */
++#define VKI_TIOCCBRK     0x5428   /* BSD compatibility */
++#define VKI_TIOCGSID     0x5429   /* Return the session ID of FD */
++#define VKI_TCGETS2      _VKI_IOR('T', 0x2A, struct termios2)
++#define VKI_TCSETS2      _VKI_IOW('T', 0x2B, struct termios2)
++#define VKI_TCSETSW2     _VKI_IOW('T', 0x2C, struct termios2)
++#define VKI_TCSETSF2     _VKI_IOW('T', 0x2D, struct termios2)
++#define VKI_TIOCGRS485   0x542E
++#define VKI_TIOCSRS485   0x542F
++#define VKI_TIOCGPTN     _VKI_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
++#define VKI_TIOCSPTLCK   _VKI_IOW('T', 0x31, int) /* Lock/unlock Pty */
++#define VKI_TIOCGDEV     _VKI_IOR('T', 0x32, unsigned int) /* Get primary device node of /dev/console */
++#define VKI_TCGETX       0x5432 /* SYS5 TCGETX compatibility */
++#define VKI_TCSETX       0x5433
++#define VKI_TCSETXF      0x5434
++#define VKI_TCSETXW      0x5435
++#define VKI_TIOCSIG      _VKI_IOW('T', 0x36, int) /* pty: generate signal */
++#define VKI_TIOCVHANGUP  0x5437
++#define VKI_TIOCGPKT     _VKI_IOR('T', 0x38, int) /* Get packet mode state */
++#define VKI_TIOCGPTLCK   _VKI_IOR('T', 0x39, int) /* Get Pty lock state */
++#define VKI_TIOCGEXCL    _VKI_IOR('T', 0x40, int) /* Get exclusive mode state */
++#define VKI_TIOCGPTPEER  _VKI_IO('T', 0x41)       /* Safely open the slave */
++#define VKI_TIOCGISO7816 _VKI_IOR('T', 0x42, struct serial_iso7816)
++#define VKI_TIOCSISO7816 _VKI_IOWR('T', 0x43, struct serial_iso7816)
++
++#define VKI_FIONCLEX        0x5450
++#define VKI_FIOCLEX         0x5451
++#define VKI_FIOASYNC        0x5452
++#define VKI_TIOCSERCONFIG   0x5453
++#define VKI_TIOCSERGWILD    0x5454
++#define VKI_TIOCSERSWILD    0x5455
++#define VKI_TIOCGLCKTRMIOS  0x5456
++#define VKI_TIOCSLCKTRMIOS  0x5457
++#define VKI_TIOCSERGSTRUCT  0x5458 /* For debugging only */
++#define VKI_TIOCSERGETLSR   0x5459 /* Get line status register */
++#define VKI_TIOCSERGETMULTI 0x545A /* Get multiport config  */
++#define VKI_TIOCSERSETMULTI 0x545B /* Set multiport config */
++
++#define VKI_TIOCMIWAIT      0x545C /* wait for a change on serial input line(s) */
++#define VKI_TIOCGICOUNT     0x545D /* read serial port inline interrupt counts */
++
++#define VKI_FIOQSIZE        0x5460
++
++#define VKI_TIOCPKT_DATA       0
++#define VKI_TIOCPKT_FLUSHREAD  1
++#define VKI_TIOCPKT_FLUSHWRITE 2
++#define VKI_TIOCPKT_STOP       4
++#define VKI_TIOCPKT_START      8
++#define VKI_TIOCPKT_NOSTOP    16
++#define VKI_TIOCPKT_DOSTOP    32
++#define VKI_TIOCPKT_IOCTL     64
++
++#define VKI_TIOCSER_TEMT 0x01 /* Transmitter physically empty */
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/poll.h
++//----------------------------------------------------------------------
++
++#define VKI_POLLIN 0x0001
++
++struct vki_pollfd {
++   int   fd;
++   short events;
++   short revents;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/asm/elf.h
++//----------------------------------------------------------------------
++
++#define VKI_ELF_NGREG  45
++#define VKI_ELF_NFPREG 34
++
++typedef unsigned long vki_elf_greg_t;
++typedef vki_elf_greg_t vki_elf_gregset_t[VKI_ELF_NGREG];
++
++typedef double vki_elf_fpreg_t;
++typedef vki_elf_fpreg_t vki_elf_fpregset_t[VKI_ELF_NFPREG];
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/ucontext.h
++//----------------------------------------------------------------------
++
++struct vki_ucontext {
++   unsigned long         uc_flags;
++   struct vki_ucontext   *uc_link;
++   vki_stack_t           uc_stack;
++   vki_sigset_t          uc_sigmask;
++   __vki_u8              __unused[1024 / 8 - sizeof(vki_sigset_t)];
++   struct vki_sigcontext uc_mcontext;
++};
++
++typedef char vki_modify_ldt_t;
++
++
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/ipcbuf.h
++//----------------------------------------------------------------------
++
++struct vki_ipc64_perm {
++   __vki_kernel_key_t   key;
++   __vki_kernel_uid32_t uid;
++   __vki_kernel_gid32_t gid;
++   __vki_kernel_uid32_t cuid;
++   __vki_kernel_gid32_t cgid;
++   __vki_kernel_mode_t  mode;
++   unsigned char        __pad1[4 - sizeof(__vki_kernel_mode_t)]; /* pad if mode_t is u16: */
++   unsigned short       seq;
++   unsigned short       __pad2;
++   __vki_kernel_ulong_t __unused1;
++   __vki_kernel_ulong_t __unused2;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/sembuf.h
++//----------------------------------------------------------------------
++struct vki_semid64_ds {
++   struct vki_ipc64_perm sem_perm;  /* permissions .. see ipc.h */
++   long                  sem_otime; /* last semop time */
++   long                  sem_ctime; /* last change time */
++   unsigned long         sem_nsems; /* no. of semaphores in array */
++   unsigned long         __unused3;
++   unsigned long         __unused4;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/msgbuf.h
++//----------------------------------------------------------------------
++
++struct vki_msqid64_ds {
++   struct vki_ipc64_perm msg_perm;
++   long                  msg_stime;  /* last msgsnd time */
++   long                  msg_rtime;  /* last msgrcv time */
++   long                  msg_ctime;  /* last change time */
++   unsigned long         msg_cbytes; /* current number of bytes on queue */
++   unsigned long         msg_qnum;   /* number of messages in queue */
++   unsigned long         msg_qbytes; /* max number of bytes on queue */
++   __vki_kernel_pid_t    msg_lspid;  /* pid of last msgsnd */
++   __vki_kernel_pid_t    msg_lrpid;  /* last receive pid */
++   unsigned long         __unused4;
++   unsigned long         __unused5;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/shmbuf.h
++//----------------------------------------------------------------------
++
++struct vki_shmid64_ds {
++   struct vki_ipc64_perm shm_perm;   /* operation perms */
++   vki_size_t            shm_segsz;  /* size of segment (bytes) */
++   long                  shm_atime;  /* last attach time */
++   long                  shm_dtime;  /* last detach time */
++   long                  shm_ctime;  /* last change time */
++   __vki_kernel_pid_t    shm_cpid;   /* pid of creator */
++   __vki_kernel_pid_t    shm_lpid;   /* pid of last operator */
++   unsigned long         shm_nattch; /* no. of current attaches */
++   unsigned long         __unused4;
++   unsigned long         __unused5;
++};
++
++struct vki_shminfo64 {
++   unsigned long shmmax;
++   unsigned long shmmin;
++   unsigned long shmmni;
++   unsigned long shmseg;
++   unsigned long shmall;
++   unsigned long __unused1;
++   unsigned long __unused2;
++   unsigned long __unused3;
++   unsigned long __unused4;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/ptrace.h
++//----------------------------------------------------------------------
++
++struct vki_user_pt_regs {
++   /* Saved main processor registers. */
++   unsigned long regs[32];
++
++   /* Original syscall arg0. */
++   unsigned long orig_a0;
++
++   /* Saved special registers. */
++   unsigned long csr_era;
++   unsigned long csr_badv;
++   unsigned long reserved[10];
++} __attribute__((aligned(8)));
++
++#define vki_user_regs_struct vki_user_pt_regs
++
++struct vki_user_fp_state {
++   vki_uint64_t fpr[32];
++   vki_uint64_t fcc;
++   vki_uint32_t fcsr;
++};
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/errno.h
++//----------------------------------------------------------------------
++
++#define VKI_ENOSYS     38 /* Invalid system call number */
++#define VKI_EOVERFLOW  75 /* Value too large for defined data type */
++
++#endif // __VKI_LOONGARCH64_LINUX_H
++
++/*--------------------------------------------------------------------*/
++/*--- end                                  vki-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/include/vki/vki-posixtypes-loongarch64-linux.h b/include/vki/vki-posixtypes-loongarch64-linux.h
+new file mode 100644
+index 000000000..0282a2a39
+--- /dev/null
++++ b/include/vki/vki-posixtypes-loongarch64-linux.h
+@@ -0,0 +1,76 @@
++
++/*--------------------------------------------------------------------*/
++/*--- loongarch/Linux-specific kernel interface: posix types.      ---*/
++/*---                           vki-posixtypes-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#ifndef __VKI_POSIXTYPES_LOONGARCH64_LINUX_H
++#define __VKI_POSIXTYPES_LOONGARCH64_LINUX_H
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/posix_types.h
++//----------------------------------------------------------------------
++
++typedef long                 __vki_kernel_long_t;
++typedef unsigned long        __vki_kernel_ulong_t;
++typedef __vki_kernel_ulong_t __vki_kernel_ino_t;
++typedef unsigned int         __vki_kernel_mode_t;
++typedef int                  __vki_kernel_pid_t;
++typedef int                  __vki_kernel_ipc_pid_t;
++typedef unsigned int         __vki_kernel_uid_t;
++typedef unsigned int         __vki_kernel_gid_t;
++typedef __vki_kernel_long_t  __vki_kernel_suseconds_t;
++typedef int                  __vki_kernel_daddr_t;
++typedef unsigned int         __vki_kernel_uid32_t;
++typedef unsigned int         __vki_kernel_gid32_t;
++typedef __vki_kernel_uid_t   __vki_kernel_old_uid_t;
++typedef __vki_kernel_gid_t   __vki_kernel_old_gid_t;
++typedef unsigned int         __vki_kernel_old_dev_t;
++
++typedef __vki_kernel_ulong_t __vki_kernel_size_t;
++typedef __vki_kernel_long_t  __vki_kernel_ssize_t;
++typedef __vki_kernel_long_t  __vki_kernel_ptrdiff_t;
++
++typedef struct {
++   int val[2];
++} __vki_kernel_fsid_t;
++
++typedef __vki_kernel_long_t  __vki_kernel_off_t;
++typedef long long            __vki_kernel_loff_t;
++typedef __vki_kernel_long_t  __vki_kernel_old_time_t;
++typedef __vki_kernel_long_t  __vki_kernel_time_t;
++typedef long long            __vki_kernel_time64_t;
++typedef __vki_kernel_long_t  __vki_kernel_clock_t;
++typedef int                  __vki_kernel_timer_t;
++typedef int                  __vki_kernel_clockid_t;
++typedef char *               __vki_kernel_caddr_t;
++typedef unsigned short       __vki_kernel_uid16_t;
++typedef unsigned short       __vki_kernel_gid16_t;
++
++#endif // __VKI_POSIXTYPES_LOONGARCH64_LINUX_H
++
++/*--------------------------------------------------------------------*/
++/*--- end                       vki-posixtypes-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/include/vki/vki-scnums-loongarch64-linux.h b/include/vki/vki-scnums-loongarch64-linux.h
+new file mode 100644
+index 000000000..ed3ef7e43
+--- /dev/null
++++ b/include/vki/vki-scnums-loongarch64-linux.h
+@@ -0,0 +1,333 @@
++/*--------------------------------------------------------------------*/
++/*--- System call numbers for loongarch-linux.                     ---*/
++/*---                               vki-scnums-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
++
++/*
++   This file is part of Valgrind, a dynamic binary instrumentation
++   framework.
++
++   Copyright (C) 2021-2022 Loongson Technology Corporation Limited
++
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, see <http://www.gnu.org/licenses/>.
++
++   The GNU General Public License is contained in the file COPYING.
++*/
++
++#ifndef __VKI_SCNUMS_LOONGARCH64_LINUX_H
++#define __VKI_SCNUMS_LOONGARCH64_LINUX_H
++
++//----------------------------------------------------------------------
++// From linux-5.19-rc1/include/uapi/asm-generic/unistd.h
++//----------------------------------------------------------------------
++
++#define __NR_io_setup 0
++#define __NR_io_destroy 1
++#define __NR_io_submit 2
++#define __NR_io_cancel 3
++#define __NR_io_getevents 4
++#define __NR_setxattr 5
++#define __NR_lsetxattr 6
++#define __NR_fsetxattr 7
++#define __NR_getxattr 8
++#define __NR_lgetxattr 9
++#define __NR_fgetxattr 10
++#define __NR_listxattr 11
++#define __NR_llistxattr 12
++#define __NR_flistxattr 13
++#define __NR_removexattr 14
++#define __NR_lremovexattr 15
++#define __NR_fremovexattr 16
++#define __NR_getcwd 17
++#define __NR_lookup_dcookie 18
++#define __NR_eventfd2 19
++#define __NR_epoll_create1 20
++#define __NR_epoll_ctl 21
++#define __NR_epoll_pwait 22
++#define __NR_dup 23
++#define __NR_dup3 24
++#define __NR3264_fcntl 25
++#define __NR_inotify_init1 26
++#define __NR_inotify_add_watch 27
++#define __NR_inotify_rm_watch 28
++#define __NR_ioctl 29
++#define __NR_ioprio_set 30
++#define __NR_ioprio_get 31
++#define __NR_flock 32
++#define __NR_mknodat 33
++#define __NR_mkdirat 34
++#define __NR_unlinkat 35
++#define __NR_symlinkat 36
++#define __NR_linkat 37
++// #define __NR_renameat 38
++#define __NR_umount2 39
++#define __NR_mount 40
++#define __NR_pivot_root 41
++#define __NR_nfsservctl 42
++#define __NR3264_statfs 43
++#define __NR3264_fstatfs 44
++#define __NR3264_truncate 45
++#define __NR3264_ftruncate 46
++#define __NR_fallocate 47
++#define __NR_faccessat 48
++#define __NR_chdir 49
++#define __NR_fchdir 50
++#define __NR_chroot 51
++#define __NR_fchmod 52
++#define __NR_fchmodat 53
++#define __NR_fchownat 54
++#define __NR_fchown 55
++#define __NR_openat 56
++#define __NR_close 57
++#define __NR_vhangup 58
++#define __NR_pipe2 59
++#define __NR_quotactl 60
++#define __NR_getdents64 61
++#define __NR3264_lseek 62
++#define __NR_read 63
++#define __NR_write 64
++#define __NR_readv 65
++#define __NR_writev 66
++#define __NR_pread64 67
++#define __NR_pwrite64 68
++#define __NR_preadv 69
++#define __NR_pwritev 70
++#define __NR3264_sendfile 71
++#define __NR_pselect6 72
++#define __NR_ppoll 73
++#define __NR_signalfd4 74
++#define __NR_vmsplice 75
++#define __NR_splice 76
++#define __NR_tee 77
++#define __NR_readlinkat 78
++// #define __NR3264_fstatat 79
++// #define __NR3264_fstat 80
++#define __NR_sync 81
++#define __NR_fsync 82
++#define __NR_fdatasync 83
++#define __NR_sync_file_range 84
++#define __NR_timerfd_create 85
++#define __NR_timerfd_settime 86
++#define __NR_timerfd_gettime 87
++#define __NR_utimensat 88
++#define __NR_acct 89
++#define __NR_capget 90
++#define __NR_capset 91
++#define __NR_personality 92
++#define __NR_exit 93
++#define __NR_exit_group 94
++#define __NR_waitid 95
++#define __NR_set_tid_address 96
++#define __NR_unshare 97
++#define __NR_futex 98
++#define __NR_set_robust_list 99
++#define __NR_get_robust_list 100
++#define __NR_nanosleep 101
++#define __NR_getitimer 102
++#define __NR_setitimer 103
++#define __NR_kexec_load 104
++#define __NR_init_module 105
++#define __NR_delete_module 106
++#define __NR_timer_create 107
++#define __NR_timer_gettime 108
++#define __NR_timer_getoverrun 109
++#define __NR_timer_settime 110
++#define __NR_timer_delete 111
++#define __NR_clock_settime 112
++#define __NR_clock_gettime 113
++#define __NR_clock_getres 114
++#define __NR_clock_nanosleep 115
++#define __NR_syslog 116
++#define __NR_ptrace 117
++#define __NR_sched_setparam 118
++#define __NR_sched_setscheduler 119
++#define __NR_sched_getscheduler 120
++#define __NR_sched_getparam 121
++#define __NR_sched_setaffinity 122
++#define __NR_sched_getaffinity 123
++#define __NR_sched_yield 124
++#define __NR_sched_get_priority_max 125
++#define __NR_sched_get_priority_min 126
++#define __NR_sched_rr_get_interval 127
++#define __NR_restart_syscall 128
++#define __NR_kill 129
++#define __NR_tkill 130
++#define __NR_tgkill 131
++#define __NR_sigaltstack 132
++#define __NR_rt_sigsuspend 133
++#define __NR_rt_sigaction 134
++#define __NR_rt_sigprocmask 135
++#define __NR_rt_sigpending 136
++#define __NR_rt_sigtimedwait 137
++#define __NR_rt_sigqueueinfo 138
++#define __NR_rt_sigreturn 139
++#define __NR_setpriority 140
++#define __NR_getpriority 141
++#define __NR_reboot 142
++#define __NR_setregid 143
++#define __NR_setgid 144
++#define __NR_setreuid 145
++#define __NR_setuid 146
++#define __NR_setresuid 147
++#define __NR_getresuid 148
++#define __NR_setresgid 149
++#define __NR_getresgid 150
++#define __NR_setfsuid 151
++#define __NR_setfsgid 152
++#define __NR_times 153
++#define __NR_setpgid 154
++#define __NR_getpgid 155
++#define __NR_getsid 156
++#define __NR_setsid 157
++#define __NR_getgroups 158
++#define __NR_setgroups 159
++#define __NR_uname 160
++#define __NR_sethostname 161
++#define __NR_setdomainname 162
++// #define __NR_getrlimit 163
++// #define __NR_setrlimit 164
++#define __NR_getrusage 165
++#define __NR_umask 166
++#define __NR_prctl 167
++#define __NR_getcpu 168
++#define __NR_gettimeofday 169
++#define __NR_settimeofday 170
++#define __NR_adjtimex 171
++#define __NR_getpid 172
++#define __NR_getppid 173
++#define __NR_getuid 174
++#define __NR_geteuid 175
++#define __NR_getgid 176
++#define __NR_getegid 177
++#define __NR_gettid 178
++#define __NR_sysinfo 179
++#define __NR_mq_open 180
++#define __NR_mq_unlink 181
++#define __NR_mq_timedsend 182
++#define __NR_mq_timedreceive 183
++#define __NR_mq_notify 184
++#define __NR_mq_getsetattr 185
++#define __NR_msgget 186
++#define __NR_msgctl 187
++#define __NR_msgrcv 188
++#define __NR_msgsnd 189
++#define __NR_semget 190
++#define __NR_semctl 191
++#define __NR_semtimedop 192
++#define __NR_semop 193
++#define __NR_shmget 194
++#define __NR_shmctl 195
++#define __NR_shmat 196
++#define __NR_shmdt 197
++#define __NR_socket 198
++#define __NR_socketpair 199
++#define __NR_bind 200
++#define __NR_listen 201
++#define __NR_accept 202
++#define __NR_connect 203
++#define __NR_getsockname 204
++#define __NR_getpeername 205
++#define __NR_sendto 206
++#define __NR_recvfrom 207
++#define __NR_setsockopt 208
++#define __NR_getsockopt 209
++#define __NR_shutdown 210
++#define __NR_sendmsg 211
++#define __NR_recvmsg 212
++#define __NR_readahead 213
++#define __NR_brk 214
++#define __NR_munmap 215
++#define __NR_mremap 216
++#define __NR_add_key 217
++#define __NR_request_key 218
++#define __NR_keyctl 219
++#define __NR_clone 220
++#define __NR_execve 221
++#define __NR3264_mmap 222
++#define __NR3264_fadvise64 223
++#define __NR_swapon 224
++#define __NR_swapoff 225
++#define __NR_mprotect 226
++#define __NR_msync 227
++#define __NR_mlock 228
++#define __NR_munlock 229
++#define __NR_mlockall 230
++#define __NR_munlockall 231
++#define __NR_mincore 232
++#define __NR_madvise 233
++#define __NR_remap_file_pages 234
++#define __NR_mbind 235
++#define __NR_get_mempolicy 236
++#define __NR_set_mempolicy 237
++#define __NR_migrate_pages 238
++#define __NR_move_pages 239
++#define __NR_rt_tgsigqueueinfo 240
++#define __NR_perf_event_open 241
++#define __NR_accept4 242
++#define __NR_recvmmsg 243
++
++#define __NR_wait4 260
++#define __NR_prlimit64 261
++#define __NR_fanotify_init 262
++#define __NR_fanotify_mark 263
++#define __NR_name_to_handle_at 264
++#define __NR_open_by_handle_at 265
++#define __NR_clock_adjtime 266
++#define __NR_syncfs 267
++#define __NR_setns 268
++#define __NR_sendmmsg 269
++#define __NR_process_vm_readv 270
++#define __NR_process_vm_writev 271
++#define __NR_kcmp 272
++#define __NR_finit_module 273
++#define __NR_sched_setattr 274
++#define __NR_sched_getattr 275
++#define __NR_renameat2 276
++#define __NR_seccomp 277
++#define __NR_getrandom 278
++#define __NR_memfd_create 279
++#define __NR_bpf 280
++#define __NR_execveat 281
++#define __NR_userfaultfd 282
++#define __NR_membarrier 283
++#define __NR_mlock2 284
++#define __NR_copy_file_range 285
++#define __NR_preadv2 286
++#define __NR_pwritev2 287
++#define __NR_pkey_mprotect 288
++#define __NR_pkey_alloc 289
++#define __NR_pkey_free 290
++#define __NR_statx 291
++#define __NR_io_pgetevents 292
++#define __NR_rseq 293
++#define __NR_kexec_file_load 294
++#define __NR_pidfd_getfd 438
++#define __NR_epoll_pwait2 441
++#define __NR_fchmodat2 452
++
++#define __NR_fcntl __NR3264_fcntl
++#define __NR_statfs __NR3264_statfs
++#define __NR_fstatfs __NR3264_fstatfs
++#define __NR_truncate __NR3264_truncate
++#define __NR_ftruncate __NR3264_ftruncate
++#define __NR_lseek __NR3264_lseek
++#define __NR_sendfile __NR3264_sendfile
++#define __NR_mmap __NR3264_mmap
++#define __NR_fadvise64 __NR3264_fadvise64
++
++#endif /* __VKI_SCNUMS_LOONGARCH64_LINUX_H */
++
++/*--------------------------------------------------------------------*/
++/*--- end                           vki-scnums-loongarch64-linux.h ---*/
++/*--------------------------------------------------------------------*/
+diff --git a/include/vki/vki-scnums-shared-linux.h b/include/vki/vki-scnums-shared-linux.h
+index d90cdd312..068a2cd12 100644
+--- a/include/vki/vki-scnums-shared-linux.h
++++ b/include/vki/vki-scnums-shared-linux.h
+@@ -43,9 +43,13 @@
+ #define __NR_clone3		435
+ #define __NR_close_range	436
+ #define __NR_openat2        437
+-
++#define __NR_pidfd_getfd	438
+ #define __NR_faccessat2		439
+ 
++#define __NR_epoll_pwait2		441
++
+ #define __NR_memfd_secret		447
+ 
++#define __NR_fchmodat2		452
++
+ #endif
+diff --git a/massif/tests/Makefile.am b/massif/tests/Makefile.am
+index 84c9b1273..2dec57d1e 100644
+--- a/massif/tests/Makefile.am
++++ b/massif/tests/Makefile.am
+@@ -11,6 +11,8 @@ EXTRA_DIST = \
+ 	big-alloc.post.exp big-alloc.post.exp-64bit big-alloc.post.exp-ppc64 \
+ 	big-alloc.stderr.exp big-alloc.vgtest \
+ 	big-alloc.post.exp-x86-freebsd \
++	big-alloc.post.exp-loongarch64 \
++	bug469146.post.exp bug469146.stderr.exp bug469146.vgtest \
+ 	deep-A.post.exp deep-A.stderr.exp deep-A.vgtest \
+ 	deep-B.post.exp deep-B.stderr.exp deep-B.vgtest \
+ 	deep-C.post.exp deep-C.stderr.exp deep-C.vgtest \
+diff --git a/massif/tests/big-alloc.post.exp-loongarch64 b/massif/tests/big-alloc.post.exp-loongarch64
+new file mode 100644
+index 000000000..0dd5671af
+--- /dev/null
++++ b/massif/tests/big-alloc.post.exp-loongarch64
+@@ -0,0 +1,54 @@
++--------------------------------------------------------------------------------
++Command:            ./big-alloc
++Massif arguments:   --stacks=no --time-unit=B --massif-out-file=massif.out --ignore-fn=__part_load_locale --ignore-fn=__time_load_locale --ignore-fn=dwarf2_unwind_dyld_add_image_hook --ignore-fn=get_or_create_key_element
++ms_print arguments: massif.out
++--------------------------------------------------------------------------------
++
++
++    MB
++100.2^                                                                       :
++     |                                                                       :
++     |                                                                @@@@@@@:
++     |                                                                @      :
++     |                                                         :::::::@      :
++     |                                                         :      @      :
++     |                                                  ::::::::      @      :
++     |                                                  :      :      @      :
++     |                                           ::::::::      :      @      :
++     |                                           :      :      :      @      :
++     |                                    ::::::::      :      :      @      :
++     |                                    :      :      :      :      @      :
++     |                            :::::::::      :      :      :      @      :
++     |                            :       :      :      :      :      @      :
++     |                     ::::::::       :      :      :      :      @      :
++     |                     :      :       :      :      :      :      @      :
++     |              ::::::::      :       :      :      :      :      @      :
++     |              :      :      :       :      :      :      :      @      :
++     |       ::::::::      :      :       :      :      :      :      @      :
++     |       :      :      :      :       :      :      :      :      @      :
++   0 +----------------------------------------------------------------------->MB
++     0                                                                   100.2
++
++Number of snapshots: 11
++ Detailed snapshots: [9]
++
++--------------------------------------------------------------------------------
++  n        time(B)         total(B)   useful-heap(B) extra-heap(B)    stacks(B)
++--------------------------------------------------------------------------------
++  0              0                0                0             0            0
++  1     10,502,088       10,502,088       10,485,760        16,328            0
++  2     21,004,176       21,004,176       20,971,520        32,656            0
++  3     31,506,264       31,506,264       31,457,280        48,984            0
++  4     42,008,352       42,008,352       41,943,040        65,312            0
++  5     52,510,440       52,510,440       52,428,800        81,640            0
++  6     63,012,528       63,012,528       62,914,560        97,968            0
++  7     73,514,616       73,514,616       73,400,320       114,296            0
++  8     84,016,704       84,016,704       83,886,080       130,624            0
++  9     94,518,792       94,518,792       94,371,840       146,952            0
++99.84% (94,371,840B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
++->99.84% (94,371,840B) 0x........: main (big-alloc.c:12)
++  
++--------------------------------------------------------------------------------
++  n        time(B)         total(B)   useful-heap(B) extra-heap(B)    stacks(B)
++--------------------------------------------------------------------------------
++ 10    105,020,880      105,020,880      104,857,600       163,280            0
+diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
+index 176c8e5cb..49f98948e 100644
+--- a/memcheck/mc_machine.c
++++ b/memcheck/mc_machine.c
+@@ -1394,6 +1394,118 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+                offset,szB);
+    tl_assert(0);
+ #  undef GOF
++#  undef SZB
++
++   /* ----------------- loongarch64 ----------------- */
++
++#  elif defined(VGA_loongarch64)
++
++#  define GOF(_fieldname) \
++      (offsetof(VexGuestLOONGARCH64State,guest_##_fieldname))
++#  define SZB(_fieldname) \
++      (sizeof(((VexGuestLOONGARCH64State*)0)->guest_##_fieldname))
++
++   Int  o      = offset;
++   Int  sz     = szB;
++   Bool is48   = sz == 8 || sz == 4;
++   Bool is1248 = sz == 8 || sz == 4 || sz == 2 || sz == 1;
++
++   tl_assert(sz > 0);
++   tl_assert(host_is_little_endian());
++
++   if (o == GOF(R0)  && is1248) return o;
++   if (o == GOF(R1)  && is1248) return o;
++   if (o == GOF(R2)  && is1248) return o;
++   if (o == GOF(R3)  && is1248) return o;
++   if (o == GOF(R4)  && is1248) return o;
++   if (o == GOF(R5)  && is1248) return o;
++   if (o == GOF(R6)  && is1248) return o;
++   if (o == GOF(R7)  && is1248) return o;
++   if (o == GOF(R8)  && is1248) return o;
++   if (o == GOF(R9)  && is1248) return o;
++   if (o == GOF(R10) && is1248) return o;
++   if (o == GOF(R11) && is1248) return o;
++   if (o == GOF(R12) && is1248) return o;
++   if (o == GOF(R13) && is1248) return o;
++   if (o == GOF(R14) && is1248) return o;
++   if (o == GOF(R15) && is1248) return o;
++   if (o == GOF(R16) && is1248) return o;
++   if (o == GOF(R17) && is1248) return o;
++   if (o == GOF(R18) && is1248) return o;
++   if (o == GOF(R19) && is1248) return o;
++   if (o == GOF(R20) && is1248) return o;
++   if (o == GOF(R21) && is1248) return o;
++   if (o == GOF(R22) && is1248) return o;
++   if (o == GOF(R23) && is1248) return o;
++   if (o == GOF(R24) && is1248) return o;
++   if (o == GOF(R25) && is1248) return o;
++   if (o == GOF(R26) && is1248) return o;
++   if (o == GOF(R27) && is1248) return o;
++   if (o == GOF(R28) && is1248) return o;
++   if (o == GOF(R29) && is1248) return o;
++   if (o == GOF(R30) && is1248) return o;
++   if (o == GOF(R31) && is1248) return o;
++
++   if (o == GOF(PC)  && sz == 8) return -1;  /* slot unused */
++
++   if (o >= GOF(X0)  && o + sz <= GOF(X0)  + SZB(X0))  return GOF(X0);
++   if (o >= GOF(X1)  && o + sz <= GOF(X1)  + SZB(X1))  return GOF(X1);
++   if (o >= GOF(X2)  && o + sz <= GOF(X2)  + SZB(X2))  return GOF(X2);
++   if (o >= GOF(X3)  && o + sz <= GOF(X3)  + SZB(X3))  return GOF(X3);
++   if (o >= GOF(X4)  && o + sz <= GOF(X4)  + SZB(X4))  return GOF(X4);
++   if (o >= GOF(X5)  && o + sz <= GOF(X5)  + SZB(X5))  return GOF(X5);
++   if (o >= GOF(X6)  && o + sz <= GOF(X6)  + SZB(X6))  return GOF(X6);
++   if (o >= GOF(X7)  && o + sz <= GOF(X7)  + SZB(X7))  return GOF(X7);
++   if (o >= GOF(X8)  && o + sz <= GOF(X8)  + SZB(X8))  return GOF(X8);
++   if (o >= GOF(X9)  && o + sz <= GOF(X9)  + SZB(X9))  return GOF(X9);
++   if (o >= GOF(X10) && o + sz <= GOF(X10) + SZB(X10)) return GOF(X10);
++   if (o >= GOF(X11) && o + sz <= GOF(X11) + SZB(X11)) return GOF(X11);
++   if (o >= GOF(X12) && o + sz <= GOF(X12) + SZB(X12)) return GOF(X12);
++   if (o >= GOF(X13) && o + sz <= GOF(X13) + SZB(X13)) return GOF(X13);
++   if (o >= GOF(X14) && o + sz <= GOF(X14) + SZB(X14)) return GOF(X14);
++   if (o >= GOF(X15) && o + sz <= GOF(X15) + SZB(X15)) return GOF(X15);
++   if (o >= GOF(X16) && o + sz <= GOF(X16) + SZB(X16)) return GOF(X16);
++   if (o >= GOF(X17) && o + sz <= GOF(X17) + SZB(X17)) return GOF(X17);
++   if (o >= GOF(X18) && o + sz <= GOF(X18) + SZB(X18)) return GOF(X18);
++   if (o >= GOF(X19) && o + sz <= GOF(X19) + SZB(X19)) return GOF(X19);
++   if (o >= GOF(X20) && o + sz <= GOF(X20) + SZB(X20)) return GOF(X20);
++   if (o >= GOF(X21) && o + sz <= GOF(X21) + SZB(X21)) return GOF(X21);
++   if (o >= GOF(X22) && o + sz <= GOF(X22) + SZB(X22)) return GOF(X22);
++   if (o >= GOF(X23) && o + sz <= GOF(X23) + SZB(X23)) return GOF(X23);
++   if (o >= GOF(X24) && o + sz <= GOF(X24) + SZB(X24)) return GOF(X24);
++   if (o >= GOF(X25) && o + sz <= GOF(X25) + SZB(X25)) return GOF(X25);
++   if (o >= GOF(X26) && o + sz <= GOF(X26) + SZB(X26)) return GOF(X26);
++   if (o >= GOF(X27) && o + sz <= GOF(X27) + SZB(X27)) return GOF(X27);
++   if (o >= GOF(X28) && o + sz <= GOF(X28) + SZB(X28)) return GOF(X28);
++   if (o >= GOF(X29) && o + sz <= GOF(X29) + SZB(X29)) return GOF(X29);
++   if (o >= GOF(X30) && o + sz <= GOF(X30) + SZB(X30)) return GOF(X30);
++   if (o >= GOF(X31) && o + sz <= GOF(X31) + SZB(X31)) return GOF(X31);
++
++   if (o == GOF(FCC0) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC1) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC2) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC3) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC4) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC5) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC6) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCC7) && sz == 1) return -1;  /* slot unused */
++   if (o == GOF(FCSR) && sz == 4) return -1;  /* slot unused */
++
++   if (o == GOF(EMNOTE) && sz == 4) return -1;  /* slot unused */
++
++   if (o == GOF(CMSTART) && sz == 8) return -1;  /* slot unused */
++   if (o == GOF(CMLEN)   && sz == 8) return -1;  /* slot unused */
++
++   if (o == GOF(NRADDR)  && sz == 8) return -1;  /* slot unused */
++
++   if (o == GOF(LLSC_SIZE) && sz == 8) return -1;  /* slot unused */
++   if (o == GOF(LLSC_ADDR) && sz == 8) return -1;  /* slot unused */
++   if (o == GOF(LLSC_DATA) && is48)    return -1;  /* slot unused */
++
++   VG_(printf)("MC_(get_otrack_shadow_offset)(loongarch64)(off=%d,sz=%d)\n",
++               offset,szB);
++   tl_assert(0);
++#  undef GOF
+ #  undef SZB
+ 
+ #  else
+@@ -1517,6 +1629,13 @@ IRType MC_(get_otrack_reg_array_equiv_int_type) ( IRRegArray* arr )
+    VG_(printf)("\n");
+    tl_assert(0);
+ 
++   /* ----------------- loongarch64 ----------------- */
++#  elif defined(VGA_loongarch64)
++   VG_(printf)("get_reg_array_equiv_int_type(loongarch64): unhandled: ");
++   ppIRRegArray(arr);
++   VG_(printf)("\n");
++   tl_assert(0);
++
+ #  else
+ #    error "FIXME: not implemented for this architecture"
+ #  endif
+diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
+index 72ccb3c8c..4e441f069 100644
+--- a/memcheck/mc_translate.c
++++ b/memcheck/mc_translate.c
+@@ -3385,6 +3385,7 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce,
+       case Iop_MulD64:
+       case Iop_MulF64r32:
+       case Iop_DivF64:
++      case Iop_ScaleBF64:
+       case Iop_DivD64:
+       case Iop_DivF64r32:
+       case Iop_ScaleF64:
+@@ -3404,6 +3405,7 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce,
+       case Iop_SubF32:
+       case Iop_MulF32:
+       case Iop_DivF32:
++      case Iop_ScaleBF32:
+          /* I32(rm) x F32 x F32 -> I32 */
+          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
+       case Iop_AddF16:
+@@ -4410,6 +4412,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
+       case Iop_TanF64:
+       case Iop_2xm1F64:
+       case Iop_SqrtF64:
++      case Iop_RSqrtF64:
++      case Iop_LogBF64:
+       case Iop_RecpExpF64:
+          /* I32(rm) x I64/F64 -> I64/F64 */
+          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
+@@ -4471,6 +4475,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
+ 
+       case Iop_RoundF32toInt:
+       case Iop_SqrtF32:
++      case Iop_RSqrtF32:
++      case Iop_LogBF32:
+       case Iop_RecpExpF32:
+          /* I32(rm) x I32/F32 -> I32/F32 */
+          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+@@ -4553,11 +4559,15 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
+ 
+       case Iop_MaxNumF32:
+       case Iop_MinNumF32:
++      case Iop_MaxNumAbsF32:
++      case Iop_MinNumAbsF32:
+          /* F32 x F32 -> F32 */
+          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+ 
+       case Iop_MaxNumF64:
+       case Iop_MinNumF64:
++      case Iop_MaxNumAbsF64:
++      case Iop_MinNumAbsF64:
+          /* F64 x F64 -> F64 */
+          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
+ 
+@@ -8648,6 +8658,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure,
+       mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
+ #     elif defined(VGA_arm)
+       mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
++#     elif defined(VGA_loongarch64)
++      mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
++      mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
+ #     endif
+ 
+       /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
+diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
+index 9bbbe7bec..3f7931099 100644
+--- a/memcheck/tests/Makefile.am
++++ b/memcheck/tests/Makefile.am
+@@ -50,6 +50,9 @@ endif
+ if VGCONF_PLATFORMS_INCLUDE_ARM64_LINUX
+ SUBDIRS += arm64-linux
+ endif
++if VGCONF_PLATFORMS_INCLUDE_LOONGARCH64_LINUX
++SUBDIRS += loongarch64-linux
++endif
+ if VGCONF_PLATFORMS_INCLUDE_X86_SOLARIS
+ SUBDIRS += x86-solaris
+ endif
+@@ -65,7 +68,7 @@ endif
+ 
+ DIST_SUBDIRS = x86 amd64 ppc32 ppc64 s390x linux \
+ 		darwin solaris x86-linux amd64-linux arm64-linux \
+-		x86-solaris amd64-solaris mips32 mips64 \
++		loongarch64-linux x86-solaris amd64-solaris mips32 mips64 \
+ 		freebsd amd64-freebsd x86-freebsd \
+ 		common .
+ 
+diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c
+index 1c738c530..8c0055082 100644
+--- a/memcheck/tests/atomic_incs.c
++++ b/memcheck/tests/atomic_incs.c
+@@ -245,6 +245,29 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
+       );
+    } while (block[2] != 1);
+ #endif
++#elif defined(VGA_loongarch64)
++   unsigned long long int block[3]
++      = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL };
++   do {
++      __asm__ __volatile__(
++         "move  $t0, %0         \n\t"
++         "ld.d  $t1, $t0, 0     \n\t"  // p
++         "ld.d  $t2, $t0, 8     \n\t"  // n
++         "andi  $t2, $t2, 0xff  \n\t"  // n = n & 0xff
++         "li.d  $s0, 0xff       \n\t"
++         "nor   $s0, $s0, $zero \n\t"  // $s0 = 0xffffff00
++         "ll.d  $t3, $t1, 0     \n\t"  // $t3 = old value
++         "and   $s0, $s0, $t3   \n\t"  // $s0 = $t3 & 0xffffff00
++         "add.d $t3, $t3, $t2   \n\t"  // $t3 = $t3 + n
++         "andi  $t3, $t3, 0xff  \n\t"  // $t3 = $t3 & 0xff
++         "or    $t3, $t3, $s0   \n\t"  // $t3 = $t3 | $s0
++         "sc.d  $t3, $t1, 0     \n\t"
++         "st.d  $t3, $t0, 16    \n\t"  // save result
++         : /*out*/
++         : /*in*/ "r" (&block[0])
++         : /*trash*/ "t0", "t1", "t2", "t3", "s0", "memory"
++      );
++   } while (block[2] != 1);
+ #else
+ # error "Unsupported arch"
+ #endif
+@@ -461,6 +484,30 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
+       );
+    } while (block[2] != 1);
+ #endif
++#elif defined(VGA_loongarch64)
++   unsigned long long int block[3]
++      = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL };
++   do {
++      __asm__ __volatile__(
++         "move  $t0, %0         \n\t"
++         "ld.d  $t1, $t0, 0     \n\t"  // p
++         "ld.d  $t2, $t0, 8     \n\t"  // n
++         "li.d  $s0, 0xffff     \n\t"
++         "and   $t2, $t2, $s0   \n\t"  // n = n & 0xffff
++         "nor   $s0, $s0, $zero \n\t"  // $s0= 0xffff0000
++         "ll.d  $t3, $t1, 0     \n\t"  // $t3 = old value
++         "and   $s0, $s0, $t3   \n\t"  // $s0 = $t3 & 0xffff0000
++         "add.d $t3, $t3, $t2   \n\t"  // $t3 = $t3 + n
++         "li.d  $t2, 0xffff     \n\t"
++         "and   $t3, $t3, $t2   \n\t"  // $t3 = $t3 & 0xffff
++         "or    $t3, $t3, $s0   \n\t"  // $t3 = $t3 | $s0
++         "sc.d  $t3, $t1, 0     \n\t"
++         "st.d  $t3, $t0, 16    \n\t"  // save result
++         : /*out*/
++         : /*in*/ "r" (&block[0])
++         : /*trash*/ "t0", "t1", "t2", "t3", "s0", "memory"
++      );
++   } while (block[2] != 1);
+ #else
+ # error "Unsupported arch"
+ #endif
+@@ -616,6 +663,23 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
+          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
+       );
+    } while (block[2] != 1);
++#elif defined(VGA_loongarch64)
++   unsigned long long int block[3]
++      = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL };
++   do {
++      __asm__ __volatile__(
++         "move  $t0, %0       \n\t"
++         "ld.d  $t1, $t0, 0   \n\t"  // p
++         "ld.d  $t2, $t0, 8   \n\t"  // n
++         "ll.d  $t3, $t1, 0   \n\t"
++         "add.d $t3, $t3, $t2 \n\t"
++         "sc.d  $t3, $t1, 0   \n\t"
++         "st.d  $t3, $t0, 16  \n\t"
++         : /*out*/
++         : /*in*/ "r" (&block[0])
++         : /*trash*/ "t0", "t1", "t2", "t3", "memory"
++      );
++   } while (block[2] != 1);
+ #else
+ # error "Unsupported arch"
+ #endif
+@@ -718,6 +782,23 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
+          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
+       );
+    } while (block[2] != 1);
++#elif defined(VGA_loongarch64)
++   unsigned long long int block[3]
++      = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL };
++   do {
++      __asm__ __volatile__(
++         "move  $t0, %0       \n\t"
++         "ld.d  $t1, $t0, 0   \n\t" // p
++         "ld.d  $t2, $t0, 8   \n\t" // n
++         "ll.d  $t3, $t1, 0   \n\t"
++         "add.d $t3, $t3, $t2 \n\t"
++         "sc.d  $t3, $t1, 0   \n\t"
++         "st.d  $t3, $t0, 16  \n\t"
++         : /*out*/
++         : /*in*/ "r" (&block[0])
++         : /*trash*/ "t0", "t1", "t2", "t3", "memory"
++      );
++   } while (block[2] != 1);
+ #else
+ # error "Unsupported arch"
+ #endif
+@@ -731,7 +812,8 @@ __attribute__((noinline)) void atomic_add_128bit ( MyU128* p,
+     || defined(VGA_amd64) \
+     || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+     || defined(VGA_arm) \
+-    || defined(VGA_s390x)
++    || defined(VGA_s390x) \
++    || defined(VGA_loongarch64)
+    /* do nothing; is not supported */
+ #elif defined(VGA_arm64)
+    unsigned long long int block[3]
+diff --git a/memcheck/tests/leak-segv-jmp.c b/memcheck/tests/leak-segv-jmp.c
+index 97bddaf23..4890f5bcf 100644
+--- a/memcheck/tests/leak-segv-jmp.c
++++ b/memcheck/tests/leak-segv-jmp.c
+@@ -182,6 +182,19 @@ extern UWord do_syscall_WRK (
+    return out;
+ }
+ 
++#elif defined(VGP_loongarch64_linux)
++extern UWord do_syscall_WRK (UWord a1, UWord a2, UWord a3, /* $a0, $a1, $a2 */
++                             UWord a4, UWord a5, UWord a6, /* $a3, $a4, $a5 */
++                             UWord syscall_no);            /* $a6 */
++asm (
++   ".text                                  \n\t"
++   ".globl do_syscall_WRK                  \n\t"
++   "do_syscall_WRK:                        \n\t"
++   "   move    $a7, $a6                    \n\t"           /* a7 = syscall_no */
++   "   syscall 0                           \n\t"
++   "   jr      $ra                         \n\t"
++);
++
+ #elif defined(VGP_x86_solaris)
+ extern ULong
+ do_syscall_WRK(UWord a1, UWord a2, UWord a3,
+@@ -338,7 +351,7 @@ static void non_simd_mprotect (long tid, void* addr, long len)
+                                     &err);
+    if (err)
+       mprotect_result = -1;
+-#elif defined(VGP_arm64_linux)
++#elif defined(VGP_arm64_linux) || defined (VGP_loongarch64_linux)
+    mprotect_result = do_syscall_WRK((UWord) addr, len, PROT_NONE,
+                                     0, 0, 0,
+                                     __NR_mprotect);
+diff --git a/memcheck/tests/leak-segv-jmp.stderr.exp b/memcheck/tests/leak-segv-jmp.stderr.exp
+index b30fd76ac..0eea1785c 100644
+--- a/memcheck/tests/leak-segv-jmp.stderr.exp
++++ b/memcheck/tests/leak-segv-jmp.stderr.exp
+@@ -14,8 +14,8 @@ To see them, rerun with: --leak-check=full --show-leak-kinds=all
+ expecting a leak
+ 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
+    at 0x........: malloc (vg_replace_malloc.c:...)
+-   by 0x........: f (leak-segv-jmp.c:389)
+-   by 0x........: main (leak-segv-jmp.c:464)
++   by 0x........: f (leak-segv-jmp.c:402)
++   by 0x........: main (leak-segv-jmp.c:477)
+ 
+ LEAK SUMMARY:
+    definitely lost: 1,000 bytes in 1 blocks
+@@ -30,8 +30,8 @@ mprotect result 0
+ expecting a leak again
+ 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
+    at 0x........: malloc (vg_replace_malloc.c:...)
+-   by 0x........: f (leak-segv-jmp.c:389)
+-   by 0x........: main (leak-segv-jmp.c:464)
++   by 0x........: f (leak-segv-jmp.c:402)
++   by 0x........: main (leak-segv-jmp.c:477)
+ 
+ LEAK SUMMARY:
+    definitely lost: 1,000 bytes in 1 blocks
+@@ -46,8 +46,8 @@ full mprotect result 0
+ expecting a leak again after full mprotect
+ 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
+    at 0x........: malloc (vg_replace_malloc.c:...)
+-   by 0x........: f (leak-segv-jmp.c:389)
+-   by 0x........: main (leak-segv-jmp.c:464)
++   by 0x........: f (leak-segv-jmp.c:402)
++   by 0x........: main (leak-segv-jmp.c:477)
+ 
+ LEAK SUMMARY:
+    definitely lost: 1,000 bytes in 1 blocks
+@@ -62,13 +62,13 @@ mprotect result 0
+ expecting heuristic not to crash after full mprotect
+ 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
+    at 0x........: malloc (vg_replace_malloc.c:...)
+-   by 0x........: f (leak-segv-jmp.c:389)
+-   by 0x........: main (leak-segv-jmp.c:464)
++   by 0x........: f (leak-segv-jmp.c:402)
++   by 0x........: main (leak-segv-jmp.c:477)
+ 
+ 200,000 bytes in 1 blocks are possibly lost in loss record ... of ...
+    at 0x........: calloc (vg_replace_malloc.c:...)
+-   by 0x........: f (leak-segv-jmp.c:436)
+-   by 0x........: main (leak-segv-jmp.c:464)
++   by 0x........: f (leak-segv-jmp.c:449)
++   by 0x........: main (leak-segv-jmp.c:477)
+ 
+ LEAK SUMMARY:
+    definitely lost: 1,000 bytes in 1 blocks
+diff --git a/memcheck/tests/leak.h b/memcheck/tests/leak.h
+index 79e3cd6ac..df9d2e759 100644
+--- a/memcheck/tests/leak.h
++++ b/memcheck/tests/leak.h
+@@ -148,6 +148,27 @@
+    do {                                                                      \
+       __asm__ __volatile__ ("movl $0, %ecx\n\t"); \
+    } while (0)
++#elif defined (VGA_loongarch64)
++#define CLEAR_CALLER_SAVED_REGS                                              \
++   do {                                                                      \
++      __asm__ __volatile__ ("move $a0, $zero \n\t"                           \
++                            "move $a1, $zero \n\t"                           \
++                            "move $a2, $zero \n\t"                           \
++                            "move $a3, $zero \n\t"                           \
++                            "move $a4, $zero \n\t"                           \
++                            "move $a5, $zero \n\t"                           \
++                            "move $a6, $zero \n\t"                           \
++                            "move $a7, $zero \n\t"                           \
++                            "move $t0, $zero \n\t"                           \
++                            "move $t1, $zero \n\t"                           \
++                            "move $t2, $zero \n\t"                           \
++                            "move $t3, $zero \n\t"                           \
++                            "move $t4, $zero \n\t"                           \
++                            "move $t5, $zero \n\t"                           \
++                            "move $t6, $zero \n\t"                           \
++                            "move $t7, $zero \n\t"                           \
++                            "move $t8, $zero \n\t");                         \
++   } while (0)
+ #else
+ #define CLEAR_CALLER_SAVED_REGS  /*nothing*/
+ #endif
+diff --git a/memcheck/tests/loongarch64-linux/Makefile.am b/memcheck/tests/loongarch64-linux/Makefile.am
+new file mode 100644
+index 000000000..5afcaa4ec
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/Makefile.am
+@@ -0,0 +1,17 @@
++
++include $(top_srcdir)/Makefile.tool-tests.am
++
++dist_noinst_SCRIPTS = \
++	filter_stderr
++
++noinst_HEADERS = scalar.h
++
++EXTRA_DIST = \
++	scalar.stderr.exp scalar.vgtest
++
++check_PROGRAMS = \
++	scalar
++
++AM_CFLAGS    += @FLAG_M64@
++AM_CXXFLAGS  += @FLAG_M64@
++AM_CCASFLAGS += @FLAG_M64@
+diff --git a/memcheck/tests/loongarch64-linux/filter_stderr b/memcheck/tests/loongarch64-linux/filter_stderr
+new file mode 100755
+index 000000000..a778e971f
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/filter_stderr
+@@ -0,0 +1,3 @@
++#! /bin/sh
++
++../filter_stderr "$@"
+diff --git a/memcheck/tests/loongarch64-linux/scalar.c b/memcheck/tests/loongarch64-linux/scalar.c
+new file mode 100644
+index 000000000..e39069213
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/scalar.c
+@@ -0,0 +1,1296 @@
++/* This is the loongarch64 variant of memcheck/tests/x86-linux/scalar.c.
++   Syscalls are in x86 number order to make exp comparison easier. */
++#define _GNU_SOURCE
++#include "../../memcheck.h"
++#include "scalar.h"
++#include <unistd.h>
++#include <sched.h>
++#include <signal.h>
++#include <linux/mman.h> // MREMAP_FIXED
++
++// Here we are trying to trigger every syscall error (scalar errors and
++// memory errors) for every syscall.  We do this by passing a lot of bogus
++// arguments, mostly 0 and 1 (often it's 1 because NULL ptr args often aren't
++// checked for memory errors, or in order to have a non-zero length used
++// with some buffer).  So most of the syscalls don't actually succeed and do
++// anything.
++//
++// Occasionally we have to be careful not to cause Valgrind to seg fault in
++// its pre-syscall wrappers;  it does so because it can't know in general
++// when memory is unaddressable, and so tries to dereference it when doing
++// PRE_MEM_READ/PRE_MEM_WRITE calls.  (Note that Memcheck will
++// always issue an error message immediately before these seg faults occur).
++//
++// The output has numbers like "3s 2m" for each syscall.  "s" is short for
++// "scalar", ie. the argument itself is undefined.  "m" is short for "memory",
++// ie. the argument points to memory which is unaddressable.
++int main(void)
++{
++   // uninitialised, but we know px[0] is 0x0
++   long* px  = malloc(sizeof(long));
++   long  x0  = px[0];
++   long  res;
++
++   // All __NR_xxx numbers are taken from x86
++
++   // __NR_restart_syscall 0  // XXX: not yet handled, perhaps should be...
++   GO(__NR_restart_syscall, "n/a");
++   //SY(__NR_restart_syscall); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_exit 1
++   GO(__NR_exit, "below");
++   // (see below)
++
++   // __NR_fork 2
++   //GO(__NR_fork, "other");
++   // (sse scalar_fork.c)
++
++   // __NR_read 3
++   // Nb: here we are also getting an error from the syscall arg itself.
++   GO(__NR_read, "1+3s 1m");
++   SY(__NR_read+x0, x0, x0, x0+1); FAILx(EFAULT);
++
++   // __NR_write 4
++   GO(__NR_write, "3s 1m");
++   SY(__NR_write, x0, x0, x0+1); FAIL;
++
++   // __NR_open 5
++   //GO(__NR_open, "(2-args) 2s 1m");
++   //SY(__NR_open, x0, x0); FAIL;
++
++   // Only 1s 0m errors -- the other 2s 1m have been checked in the previous
++   // open test, and if we test them they may be commoned up but they also
++   // may not.
++   //GO(__NR_open, "(3-args) 1s 0m");
++   //SY(__NR_open, "scalar.c", O_CREAT|O_EXCL, x0); FAIL;
++
++   // __NR_close 6
++   GO(__NR_close, "1s 0m");
++   SY(__NR_close, x0-1); FAIL;
++
++   // __NR_waitpid 7
++   //GO(__NR_waitpid, "3s 1m");
++   //SY(__NR_waitpid, x0, x0+1, x0); FAIL;
++
++   // __NR_creat 8
++   //GO(__NR_creat, "2s 1m");
++   //SY(__NR_creat, x0, x0); FAIL;
++
++   // __NR_link 9
++   //GO(__NR_link, "2s 2m");
++   //SY(__NR_link, x0, x0); FAIL;
++
++   // __NR_unlink 10
++   //GO(__NR_unlink, "1s 1m");
++   //SY(__NR_unlink, x0); FAIL;
++
++   // __NR_execve 11
++   GO(__NR_execve, "3s 1m");
++   SY(__NR_execve, x0 + 1, x0 + 1, x0); FAIL;
++
++   GO(__NR_execve, "3s 1m");
++   SY(__NR_execve, x0 + 1, x0, x0 + 1); FAIL;
++
++   char *argv_envp[] = {(char *) (x0 + 1), NULL};
++   GO(__NR_execve, "4s 2m");
++   SY(__NR_execve, x0 + 1, x0 + argv_envp, x0); FAIL;
++   char *argv_ok[] = {"frob", NULL};
++   GO(__NR_execve, "4s 2m");
++   SY(__NR_execve, x0 + 1, x0 + argv_ok, x0 + argv_envp); FAIL;
++
++   // __NR_chdir 12
++   GO(__NR_chdir, "1s 1m");
++   SY(__NR_chdir, x0); FAIL;
++
++   // __NR_time 13
++   //GO(__NR_time, "1s 1m");
++   //SY(__NR_time, x0+1); FAIL;
++
++   // __NR_mknod 14
++   //O(__NR_mknod, "3s 1m");
++   //Y(__NR_mknod, x0, x0, x0); FAIL;
++
++   // __NR_chmod 15
++   //GO(__NR_chmod, "2s 1m");
++   //SY(__NR_chmod, x0, x0); FAIL;
++
++   // __NR_lchown 16
++   //GO(__NR_lchown, "n/a");
++   //SY(__NR_lchown); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_break 17
++   //GO(__NR_break, "ni");
++   //SY(__NR_break); FAIL;
++
++   // __NR_oldstat 18
++   //GO(__NR_oldstat, "n/a");
++   // (obsolete, not handled by Valgrind)
++
++   // __NR_lseek 19
++   GO(__NR_lseek, "3s 0m");
++   SY(__NR_lseek, x0-1, x0, x0); FAILx(EBADF);
++
++   // __NR_getpid 20
++   GO(__NR_getpid, "0s 0m");
++   SY(__NR_getpid); SUCC;
++
++   // __NR_mount 21
++   GO(__NR_mount, "5s 3m");
++   SY(__NR_mount, x0, x0, x0, x0, x0); FAIL;
++
++   // __NR_umount 22
++   //GO(__NR_umount, "1s 1m");
++   //SY(__NR_umount, x0); FAIL;
++
++   // __NR_setuid 23
++   GO(__NR_setuid, "1s 0m");
++   SY(__NR_setuid, x0-1); FAIL;
++
++   // __NR_getuid 24
++   GO(__NR_getuid, "0s 0m");
++   SY(__NR_getuid); SUCC;
++
++   // __NR_stime 25
++   //GO(__NR_stime, "n/a");
++   //SY(__NR_stime); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_ptrace 26
++   // XXX: memory pointed to be arg3 goes unchecked... otherwise would be 2m
++   //GO(__NR_ptrace, "4s 1m");
++   //SY(__NR_ptrace, x0+PTRACE_GETREGS, x0, x0, x0); FAIL;
++
++   // __NR_alarm 27
++   //GO(__NR_alarm, "1s 0m");
++   //SY(__NR_alarm, x0); SUCC;
++
++   // __NR_oldfstat 28
++   //GO(__NR_oldfstat, "n/a");
++   // (obsolete, not handled by Valgrind)
++
++   // __NR_pause 29
++   //GO(__NR_pause, "ignore");
++   // (hard to test, and no args so not much to be gained -- don't bother)
++
++   // __NR_utime 30
++   //GO(__NR_utime, "2s 2m");
++   //SY(__NR_utime, x0, x0+1); FAIL;
++
++   // __NR_stty 31
++   //GO(__NR_stty, "ni");
++   //SY(__NR_stty); FAIL;
++
++   // __NR_gtty 32
++   //GO(__NR_gtty, "ni");
++   //SY(__NR_gtty); FAIL;
++
++   // __NR_access 33
++   //GO(__NR_access, "2s 1m");
++   //SY(__NR_access, x0, x0); FAIL;
++
++   // __NR_nice 34
++   //GO(__NR_nice, "1s 0m");
++   //SY(__NR_nice, x0); SUCC;
++
++   // __NR_ftime 35
++   //GO(__NR_ftime, "ni");
++   //SY(__NR_ftime); FAIL;
++
++   // __NR_sync 36
++   GO(__NR_sync, "0s 0m");
++   SY(__NR_sync); SUCC;
++
++   // __NR_kill 37
++   GO(__NR_kill, "2s 0m");
++   SY(__NR_kill, x0, x0); SUCC;
++
++   // __NR_rename 38
++   //GO(__NR_rename, "2s 2m");
++   //SY(__NR_rename, x0, x0); FAIL;
++
++   // __NR_mkdir 39
++   //GO(__NR_mkdir, "2s 1m");
++   //SY(__NR_mkdir, x0, x0); FAIL;
++
++   // __NR_rmdir 40
++   //GO(__NR_rmdir, "1s 1m");
++   //SY(__NR_rmdir, x0); FAIL;
++
++   // __NR_dup 41
++   GO(__NR_dup, "1s 0m");
++   SY(__NR_dup, x0-1); FAIL;
++
++   // __NR_pipe 42
++   //GO(__NR_pipe, "1s 1m");
++   //SY(__NR_pipe, x0); FAIL;
++
++   // __NR_times 43
++   GO(__NR_times, "1s 1m");
++   SY(__NR_times, x0+1); FAIL;
++
++   // __NR_prof 44
++   //GO(__NR_prof, "ni");
++   //SY(__NR_prof); FAIL;
++
++   // __NR_brk 45
++   GO(__NR_brk, "1s 0m");
++   SY(__NR_brk, x0); SUCC;
++
++   // __NR_setgid 46
++   GO(__NR_setgid, "1s 0m");
++   SY(__NR_setgid, x0-1); FAIL;
++
++   // __NR_getgid 47
++   GO(__NR_getgid, "0s 0m");
++   SY(__NR_getgid); SUCC;
++
++   // __NR_signal 48
++   //GO(__NR_signal, "n/a");
++   //SY(__NR_signal); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_geteuid 49
++   GO(__NR_geteuid, "0s 0m");
++   SY(__NR_geteuid); SUCC;
++
++   // __NR_getegid 50
++   GO(__NR_getegid, "0s 0m");
++   SY(__NR_getegid); SUCC;
++
++   // __NR_acct 51
++   GO(__NR_acct, "1s 1m");
++   SY(__NR_acct, x0-1); FAIL;
++
++   // __NR_umount2 52
++   GO(__NR_umount2, "2s 1m");
++   SY(__NR_umount2, x0, x0); FAIL;
++
++   // __NR_lock 53
++   //GO(__NR_lock, "ni");
++   //SY(__NR_lock); FAIL;
++
++   // __NR_ioctl 54
++#include <asm/ioctls.h>
++   GO(__NR_ioctl, "3s 1m");
++   SY(__NR_ioctl, x0, x0+TCSETS, x0); FAIL;
++
++   // __NR_fcntl 55
++   // As with sys_open(), the 'fd' error is suppressed for the later ones.
++   // For F_GETFD the 3rd arg is ignored
++   GO(__NR_fcntl, "(GETFD) 2s 0m");
++   SY(__NR_fcntl, x0-1, x0+F_GETFD, x0); FAILx(EBADF);
++
++   // For F_DUPFD the 3rd arg is 'arg'.  We don't check the 1st two args
++   // because any errors may or may not be commoned up with the ones from
++   // the previous fcntl call.
++   GO(__NR_fcntl, "(DUPFD) 1s 0m");
++   SY(__NR_fcntl, -1, F_DUPFD, x0); FAILx(EBADF);
++
++   // For F_GETLK the 3rd arg is 'lock'.  On x86, this fails w/EBADF.  But
++   // on amd64 in 32-bit mode it fails w/EFAULT.  We don't check the 1st two
++   // args for the reason given above.
++   GO(__NR_fcntl, "(GETLK) 1s 5m");
++   SY(__NR_fcntl, -1, F_GETLK, x0); FAIL; //FAILx(EBADF);
++
++   // __NR_mpx 56
++   //GO(__NR_mpx, "ni");
++   //SY(__NR_mpx); FAIL;
++
++   // __NR_setpgid 57
++   GO(__NR_setpgid, "2s 0m");
++   SY(__NR_setpgid, x0, x0-1); FAIL;
++
++   // __NR_ulimit 58
++   //GO(__NR_ulimit, "ni");
++   //SY(__NR_ulimit); FAIL;
++
++   // __NR_oldolduname 59
++   //GO(__NR_oldolduname, "n/a");
++   // (obsolete, not handled by Valgrind)
++
++   // __NR_umask 60
++   GO(__NR_umask, "1s 0m");
++   SY(__NR_umask, x0+022); SUCC;
++
++   // __NR_chroot 61
++   GO(__NR_chroot, "1s 1m");
++   SY(__NR_chroot, x0); FAIL;
++
++   // __NR_ustat 62
++   //GO(__NR_ustat, "n/a");
++   // (deprecated, not handled by Valgrind)
++
++   // __NR_dup2 63
++   //GO(__NR_dup2, "2s 0m");
++   //SY(__NR_dup2, x0-1, x0); FAIL;
++
++   // __NR_getppid 64
++   GO(__NR_getppid, "0s 0m");
++   SY(__NR_getppid); SUCC;
++
++   // __NR_getpgrp 65
++   //GO(__NR_getpgrp, "0s 0m");
++   //SY(__NR_getpgrp); SUCC;
++
++   // __NR_setsid 66
++   GO(__NR_setsid, "0s 0m");
++   SY(__NR_setsid); SUCC_OR_FAIL;
++
++   // __NR_sigaction 67
++   //GO(__NR_sigaction, "3s 4m");
++   //SY(__NR_sigaction, x0, x0+&px[1], x0+&px[1]); FAIL;
++
++   // __NR_sgetmask 68 sys_sgetmask()
++   //GO(__NR_sgetmask, "n/a");
++   //SY(__NR_sgetmask); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_ssetmask 69
++   //GO(__NR_ssetmask, "n/a");
++   //SY(__NR_ssetmask); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_setreuid 70
++   GO(__NR_setreuid, "2s 0m");
++   SY(__NR_setreuid, x0-1, x0-1); SUCC;
++
++   // __NR_setregid 71
++   GO(__NR_setregid, "2s 0m");
++   SY(__NR_setregid, x0-1, x0-1); SUCC;
++
++   // __NR_sigsuspend 72
++   // XXX: how do you use this function?
++   //GO(__NR_sigsuspend, "ignore");
++   // (I don't know how to test this...)
++
++   // __NR_sigpending 73
++   //GO(__NR_sigpending, "1s 1m");
++   //SY(__NR_sigpending, x0); FAIL;
++
++   // __NR_sethostname 74
++   GO(__NR_sethostname, "n/a");
++   //SY(__NR_sethostname); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_setrlimit 75
++   //GO(__NR_setrlimit, "2s 1m");
++   //SY(__NR_setrlimit, x0, x0); FAIL;
++
++   // __NR_getrlimit 76
++   //GO(__NR_getrlimit, "2s 1m");
++   //SY(__NR_getrlimit, x0, x0); FAIL;
++
++   // __NR_getrusage 77
++   GO(__NR_getrusage, "2s 1m");
++   SY(__NR_getrusage, x0, x0); FAIL;
++
++   // __NR_gettimeofday 78
++   GO(__NR_gettimeofday, "2s 2m");
++   SY(__NR_gettimeofday, x0+1, x0+1); FAIL;
++
++   // __NR_settimeofday 79
++   GO(__NR_settimeofday, "2s 2m");
++   SY(__NR_settimeofday, x0+1, x0+1); FAIL;
++
++   // __NR_getgroups 80
++   GO(__NR_getgroups, "2s 1m");
++   SY(__NR_getgroups, x0+1, x0+1); FAIL;
++
++   // __NR_setgroups 81
++   GO(__NR_setgroups, "2s 1m");
++   SY(__NR_setgroups, x0+1, x0+1); FAIL;
++
++   // __NR_select 82
++   //{
++   //   long args[5] = { x0+8, x0+0xffffffee, x0+1, x0+1, x0+1 };
++   //   GO(__NR_select, "1s 5m");
++   //   SY(__NR_select, args+x0); FAIL;
++   //}
++
++   // __NR_symlink 83
++   //GO(__NR_symlink, "2s 2m");
++   //SY(__NR_symlink, x0, x0); FAIL;
++
++   // __NR_oldlstat 84
++   //GO(__NR_oldlstat, "n/a");
++   // (obsolete, not handled by Valgrind)
++
++   // __NR_readlink 85
++   //GO(__NR_readlink, "3s 2m");
++   //SY(__NR_readlink, x0+1, x0+1, x0+1); FAIL;
++
++   // __NR_uselib 86
++   //GO(__NR_uselib, "n/a");
++   //SY(__NR_uselib); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_swapon 87
++   GO(__NR_swapon, "n/a");
++   //SY(__NR_swapon); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_reboot 88
++   GO(__NR_reboot, "n/a");
++   //SY(__NR_reboot); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_readdir 89
++   //GO(__NR_readdir, "n/a");
++   // (superseded, not handled by Valgrind)
++
++   // __NR_mmap 90
++   {
++      long args[6] = { x0, x0, x0, x0, x0-1, x0 };
++      GO(__NR_mmap, "1s 1m");
++      SY(__NR_mmap, args+x0); FAIL;
++   }
++
++   // __NR_munmap 91
++   GO(__NR_munmap, "2s 0m");
++   SY(__NR_munmap, x0, x0); FAIL;
++
++   // __NR_truncate 92
++   GO(__NR_truncate, "2s 1m");
++   SY(__NR_truncate, x0, x0); FAIL;
++
++   // __NR_ftruncate 93
++   GO(__NR_ftruncate, "2s 0m");
++   SY(__NR_ftruncate, x0, x0); FAIL;
++
++   // __NR_fchmod 94
++   GO(__NR_fchmod, "2s 0m");
++   SY(__NR_fchmod, x0-1, x0); FAIL;
++
++   // __NR_fchown 95
++   GO(__NR_fchown, "3s 0m");
++   SY(__NR_fchown, x0-1, x0, x0); FAIL;
++
++   // __NR_getpriority 96
++   GO(__NR_getpriority, "2s 0m");
++   SY(__NR_getpriority, x0-1, x0); FAIL;
++
++   // __NR_setpriority 97
++   GO(__NR_setpriority, "3s 0m");
++   SY(__NR_setpriority, x0-1, x0, x0); FAIL;
++
++   // __NR_profil 98
++   //GO(__NR_profil, "ni");
++   //SY(__NR_profil); FAIL;
++
++   // __NR_statfs 99
++   GO(__NR_statfs, "2s 2m");
++   SY(__NR_statfs, x0, x0); FAIL;
++
++   // __NR_fstatfs 100
++   GO(__NR_fstatfs, "2s 1m");
++   SY(__NR_fstatfs, x0, x0); FAIL;
++
++   // __NR_ioperm 101
++   //GO(__NR_ioperm, "3s 0m");
++   //SY(__NR_ioperm, x0, x0, x0); FAIL;
++
++   // __NR_socketcall 102
++   //GO(__NR_socketcall, "XXX");
++   // (XXX: need to do all sub-cases properly)
++
++   // __NR_syslog 103
++   GO(__NR_syslog, "3s 1m");
++   SY(__NR_syslog, x0+2, x0, x0+1); FAIL;
++
++   // __NR_setitimer 104
++   GO(__NR_setitimer, "3s 2m");
++   SY(__NR_setitimer, x0, x0+1, x0+1); FAIL;
++
++   // __NR_getitimer 105
++   GO(__NR_getitimer, "2s 1m");
++   SY(__NR_getitimer, x0, x0, x0); FAIL;
++
++   // __NR_stat 106
++   //GO(__NR_stat, "2s 2m");
++   //SY(__NR_stat, x0, x0); FAIL;
++
++   // __NR_lstat 107
++   //GO(__NR_lstat, "2s 2m");
++   //SY(__NR_lstat, x0, x0); FAIL;
++
++   // __NR_fstat 108
++   //GO(__NR_fstat, "2s 1m");
++   //SY(__NR_fstat, x0, x0); FAIL;
++
++   // __NR_olduname 109
++   //GO(__NR_olduname, "n/a");
++   // (obsolete, not handled by Valgrind)
++
++   // __NR_iopl 110
++   //GO(__NR_iopl, "1s 0m");
++   //SY(__NR_iopl, x0+100); FAIL;
++
++   // __NR_vhangup 111
++   GO(__NR_vhangup, "0s 0m");
++   SY(__NR_vhangup); SUCC_OR_FAIL;  // Will succeed for superuser
++
++   // __NR_idle 112
++   //GO(__NR_idle, "ni");
++   //SY(__NR_idle); FAIL;
++
++   // __NR_vm86old 113
++   //GO(__NR_vm86old, "n/a");
++   // (will probably never be handled by Valgrind)
++
++   // __NR_wait4 114
++   GO(__NR_wait4, "4s 2m");
++   SY(__NR_wait4, x0, x0+1, x0, x0+1); FAIL;
++
++   // __NR_swapoff 115
++   GO(__NR_swapoff, "n/a");
++   //SY(__NR_swapoff); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_sysinfo 116
++   GO(__NR_sysinfo, "1s 1m");
++   SY(__NR_sysinfo, x0); FAIL;
++
++   // __NR_ipc 117
++   // XXX: This is simplistic -- need to do all the sub-cases properly.
++   // XXX: Also, should be 6 scalar errors, except glibc's syscall() doesn't
++   //      use the 6th one!
++   //GO(__NR_ipc, "5s 0m");
++   //SY(__NR_ipc, x0+4, x0, x0, x0, x0, x0); FAIL;
++
++   // __NR_fsync 118
++   GO(__NR_fsync, "1s 0m");
++   SY(__NR_fsync, x0-1); FAIL;
++
++   // __NR_sigreturn 119
++   //GO(__NR_sigreturn, "n/a");
++   //SY(__NR_sigreturn); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_clone 120
++#ifndef CLONE_PARENT_SETTID
++#define CLONE_PARENT_SETTID	0x00100000
++#endif
++   GO(__NR_clone, "5s 3m");
++   SY(__NR_clone, x0|CLONE_PARENT_SETTID|CLONE_SETTLS|CLONE_CHILD_SETTID|SIGCHLD, x0, x0, x0, x0); FAIL;
++   if (0 == res) {
++      SY(__NR_exit, 0); FAIL;
++   }
++
++   // __NR_setdomainname 121
++   GO(__NR_setdomainname, "n/a");
++   //SY(__NR_setdomainname); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_uname 122
++   GO(__NR_uname, "1s 1m");
++   SY(__NR_uname, x0); FAIL;
++
++   // __NR_modify_ldt 123
++   //GO(__NR_modify_ldt, "3s 1m");
++   //SY(__NR_modify_ldt, x0+1, x0, x0+1); FAILx(EINVAL);
++
++   // __NR_adjtimex 124
++   // XXX: need to do properly, but deref'ing NULL causing Valgrind to crash...
++   GO(__NR_adjtimex, "XXX");
++   //SY(__NR_adjtimex, x0); FAIL;
++
++   // __NR_mprotect 125
++   GO(__NR_mprotect, "3s 0m");
++   SY(__NR_mprotect, x0+1, x0, x0); FAILx(EINVAL);
++
++   // __NR_sigprocmask 126
++   //GO(__NR_sigprocmask, "3s 2m");
++   //SY(__NR_sigprocmask, x0, x0+&px[1], x0+&px[1]); SUCC;
++
++   // __NR_create_module 127
++   //GO(__NR_create_module, "ni");
++   //SY(__NR_create_module); FAIL;
++
++   // __NR_init_module 128
++   GO(__NR_init_module, "3s 2m");
++   SY(__NR_init_module, x0, x0+1, x0); FAIL;
++
++   // __NR_delete_module 129
++   GO(__NR_delete_module, "n/a");
++   //SY(__NR_delete_module); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_get_kernel_syms 130
++   //GO(__NR_get_kernel_syms, "ni");
++   //SY(__NR_get_kernel_syms); FAIL;
++
++   // __NR_quotactl 131
++   GO(__NR_quotactl, "4s 1m");
++   SY(__NR_quotactl, x0, x0, x0, x0); FAIL;
++
++   // __NR_getpgid 132
++   GO(__NR_getpgid, "1s 0m");
++   SY(__NR_getpgid, x0-1); FAIL;
++
++   // __NR_fchdir 133
++   GO(__NR_fchdir, "1s 0m");
++   SY(__NR_fchdir, x0-1); FAIL;
++
++   // __NR_bdflush 134
++   //GO(__NR_bdflush, "n/a");
++   //SY(__NR_bdflush); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_sysfs 135
++   //GO(__NR_sysfs, "n/a");
++   //SY(__NR_sysfs); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_personality 136
++   GO(__NR_personality, "1s 0m");
++   SY(__NR_personality, x0+0xffffffff); SUCC;
++
++   // __NR_afs_syscall 137
++   //GO(__NR_afs_syscall, "ni");
++   //SY(__NR_afs_syscall); FAIL;
++
++   // __NR_setfsuid 138
++   GO(__NR_setfsuid, "1s 0m");
++   SY(__NR_setfsuid, x0); SUCC;  // This syscall has a stupid return value
++
++   // __NR_setfsgid 139
++   GO(__NR_setfsgid, "1s 0m");
++   SY(__NR_setfsgid, x0); SUCC;  // This syscall has a stupid return value
++
++   // __NR__llseek 140
++   //GO(__NR__llseek, "5s 1m");
++   //SY(__NR__llseek, x0, x0, x0, x0, x0); FAIL;
++
++   // __NR_getdents 141
++   //GO(__NR_getdents, "3s 1m");
++   //SY(__NR_getdents, x0, x0, x0+1); FAIL;
++
++   // __NR__newselect 142
++   //GO(__NR__newselect, "5s 4m");
++   //SY(__NR__newselect, x0+8, x0+0xffffffff, x0+1, x0+1, x0+1); FAIL;
++
++   // __NR_flock 143
++   GO(__NR_flock, "2s 0m");
++   SY(__NR_flock, x0, x0); FAIL;
++
++   // __NR_msync 144
++   GO(__NR_msync, "3s 1m");
++   SY(__NR_msync, x0, x0+1, x0); FAIL;
++
++   // __NR_readv 145
++   GO(__NR_readv, "3s 1m");
++   SY(__NR_readv, x0, x0, x0+1); FAIL;
++
++   // __NR_writev 146
++   GO(__NR_writev, "3s 1m");
++   SY(__NR_writev, x0, x0, x0+1); FAIL;
++
++   // __NR_getsid 147
++   GO(__NR_getsid, "1s 0m");
++   SY(__NR_getsid, x0-1); FAIL;
++
++   // __NR_fdatasync 148
++   GO(__NR_fdatasync, "1s 0m");
++   SY(__NR_fdatasync, x0-1); FAIL;
++
++   // __NR__sysctl 149
++   //GO(__NR__sysctl, "1s 1m");
++   //SY(__NR__sysctl, x0); FAIL;
++
++   // __NR_mlock 150
++   GO(__NR_mlock, "2s 0m");
++   SY(__NR_mlock, x0, x0+1); FAIL;
++
++   // __NR_munlock 151
++   GO(__NR_munlock, "2s 0m");
++   SY(__NR_munlock, x0, x0+1); FAIL;
++
++   // __NR_mlockall 152
++   GO(__NR_mlockall, "1s 0m");
++   SY(__NR_mlockall, x0-1); FAIL;
++
++   // __NR_munlockall 153
++   GO(__NR_munlockall, "0s 0m");
++   SY(__NR_munlockall); SUCC_OR_FAILx(EPERM);
++
++   // __NR_sched_setparam 154
++   GO(__NR_sched_setparam, "2s 1m");
++   SY(__NR_sched_setparam, x0, x0); FAIL;
++
++   // __NR_sched_getparam 155
++   GO(__NR_sched_getparam, "2s 1m");
++   SY(__NR_sched_getparam, x0, x0); FAIL;
++
++   // __NR_sched_setscheduler 156
++   GO(__NR_sched_setscheduler, "3s 1m");
++   SY(__NR_sched_setscheduler, x0-1, x0, x0+1); FAIL;
++
++   // __NR_sched_getscheduler 157
++   GO(__NR_sched_getscheduler, "1s 0m");
++   SY(__NR_sched_getscheduler, x0-1); FAIL;
++
++   // __NR_sched_yield 158
++   GO(__NR_sched_yield, "0s 0m");
++   SY(__NR_sched_yield); SUCC;
++
++   // __NR_sched_get_priority_max 159
++   GO(__NR_sched_get_priority_max, "1s 0m");
++   SY(__NR_sched_get_priority_max, x0-1); FAIL;
++
++   // __NR_sched_get_priority_min 160
++   GO(__NR_sched_get_priority_min, "1s 0m");
++   SY(__NR_sched_get_priority_min, x0-1); FAIL;
++
++   // __NR_sched_rr_get_interval 161
++   GO(__NR_sched_rr_get_interval, "n/a");
++   //SY(__NR_sched_rr_get_interval); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_nanosleep 162
++   GO(__NR_nanosleep, "2s 2m");
++   SY(__NR_nanosleep, x0, x0+1); FAIL;
++
++   // __NR_mremap 163
++   GO(__NR_mremap, "5s 0m");
++   SY(__NR_mremap, x0+1, x0, x0, x0+MREMAP_FIXED, x0); FAILx(EINVAL);
++
++   // __NR_setresuid 164
++   GO(__NR_setresuid, "3s 0m");
++   SY(__NR_setresuid, x0-1, x0-1, x0-1); SUCC;
++
++   // __NR_getresuid 165
++   GO(__NR_getresuid, "3s 3m");
++   SY(__NR_getresuid, x0, x0, x0); FAIL;
++
++   // __NR_vm86 166
++   //GO(__NR_vm86, "n/a");
++   // (will probably never be handled by Valgrind)
++
++   // __NR_query_module 167
++   //GO(__NR_query_module, "ni");
++   //SY(__NR_query_module); FAIL;
++
++   // __NR_poll 168
++   //GO(__NR_poll, "3s 1m");
++   //SY(__NR_poll, x0, x0+1, x0); FAIL;
++
++   // __NR_nfsservctl 169
++   GO(__NR_nfsservctl, "n/a");
++   //SY(__NR_nfsservctl); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_setresgid 170
++   GO(__NR_setresgid, "3s 0m");
++   SY(__NR_setresgid, x0-1, x0-1, x0-1); SUCC;
++
++   // __NR_getresgid 171
++   GO(__NR_getresgid, "3s 3m");
++   SY(__NR_getresgid, x0, x0, x0); FAIL;
++
++   // __NR_prctl 172
++#include <sys/prctl.h>
++   GO(__NR_prctl, "5s 0m");
++   SY(__NR_prctl, x0, x0, x0, x0, x0); FAIL;
++
++   char buf16[16] = "123456789012345.";
++   buf16[15] = x0; // this will cause 'using unitialised value'
++   GO(__NR_prctl, "2s 0m");
++   SY(__NR_prctl, x0 + PR_SET_NAME, buf16); SUCC;
++
++   char buf17[17] = "1234567890123456.";
++   buf17[16] = x0; // this must not cause 'using unitialised value'
++   GO(__NR_prctl, "1s 0m");
++   SY(__NR_prctl, x0 + PR_SET_NAME, buf17); SUCC;
++
++   // __NR_rt_sigreturn 173
++   GO(__NR_rt_sigreturn, "n/a");
++   //SY(__NR_rt_sigreturn); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_rt_sigaction 174
++   GO(__NR_rt_sigaction, "4s 4m");
++   SY(__NR_rt_sigaction, x0, x0+&px[2], x0+&px[2], x0); FAIL;
++
++   // __NR_rt_sigprocmask 175
++   GO(__NR_rt_sigprocmask, "4s 2m");
++   SY(__NR_rt_sigprocmask, x0, x0+1, x0+1, x0); FAIL;
++
++   // __NR_rt_sigpending 176
++   GO(__NR_rt_sigpending, "2s 1m");
++   SY(__NR_rt_sigpending, x0, x0+1); FAIL;
++
++   // __NR_rt_sigtimedwait 177
++   GO(__NR_rt_sigtimedwait, "4s 3m");
++   SY(__NR_rt_sigtimedwait, x0+1, x0+1, x0+1, x0); FAIL;
++
++   // __NR_rt_sigqueueinfo 178
++   GO(__NR_rt_sigqueueinfo, "3s 1m");
++   SY(__NR_rt_sigqueueinfo, x0, x0+1, x0); FAIL;
++
++   // __NR_rt_sigsuspend 179
++   GO(__NR_rt_sigsuspend, "2s 1m");
++   SY(__NR_rt_sigsuspend, x0 + 1, x0 + sizeof(sigset_t)); FAILx(EFAULT);
++
++   // __NR_pread64 180
++   GO(__NR_pread64, "5s 1m");
++   SY(__NR_pread64, x0, x0, x0+1, x0, x0); FAIL;
++
++   // __NR_pwrite64 181
++   GO(__NR_pwrite64, "5s 1m");
++   SY(__NR_pwrite64, x0, x0, x0+1, x0, x0); FAIL;
++
++   // __NR_chown 182
++   //GO(__NR_chown, "3s 1m");
++   //SY(__NR_chown, x0, x0, x0); FAIL;
++
++   // __NR_getcwd 183
++   GO(__NR_getcwd, "2s 1m");
++   SY(__NR_getcwd, x0, x0+1); FAIL;
++
++   // __NR_capget 184
++   GO(__NR_capget, "2s 2m");
++   SY(__NR_capget, x0, x0+1); FAIL;
++
++   // __NR_capset 185
++   GO(__NR_capset, "2s 2m");
++   SY(__NR_capset, x0, x0); FAIL;
++
++   // __NR_sigaltstack 186
++   {
++      struct our_sigaltstack {
++         void *ss_sp;
++         int ss_flags;
++         size_t ss_size;
++      } ss;
++      ss.ss_sp     = NULL;
++      ss.ss_flags  = 0;
++      ss.ss_size   = 0;
++      VALGRIND_MAKE_MEM_NOACCESS(& ss, sizeof(struct our_sigaltstack));
++      GO(__NR_sigaltstack, "2s 2m");
++      SY(__NR_sigaltstack, x0+&ss, x0+&ss); SUCC;
++   }
++
++   // __NR_sendfile 187
++   GO(__NR_sendfile, "4s 1m");
++   SY(__NR_sendfile, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_getpmsg 188
++   // Could do 5s 4m with more effort, but I can't be bothered for this
++   // crappy non-standard syscall.
++   //GO(__NR_getpmsg, "5s 0m");
++   //SY(__NR_getpmsg, x0, x0, x0, x0); FAIL;
++
++   // __NR_putpmsg 189
++   // Could do 5s 2m with more effort, but I can't be bothered for this
++   // crappy non-standard syscall.
++   //GO(__NR_putpmsg, "5s 0m");
++   //SY(__NR_putpmsg, x0, x0, x0, x0, x0); FAIL;
++
++   // __NR_vfork 190
++   //GO(__NR_vfork, "other");
++   // (sse scalar_vfork.c)
++
++   // __NR_ugetrlimit 191
++   //GO(__NR_ugetrlimit, "2s 1m");
++   //SY(__NR_ugetrlimit, x0, x0); FAIL;
++
++   // __NR_mmap2 192
++   //GO(__NR_mmap2, "6s 0m");
++   //SY(__NR_mmap2, x0, x0, x0, x0, x0-1, x0); FAIL;
++
++   // __NR_truncate64 193
++   //GO(__NR_truncate64, "3s 1m");
++   //SY(__NR_truncate64, x0, x0, x0); FAIL;
++
++   // __NR_ftruncate64 194
++   //GO(__NR_ftruncate64, "3s 0m");
++   //SY(__NR_ftruncate64, x0, x0, x0); FAIL;
++
++   // __NR_stat64 195
++   //GO(__NR_stat64, "2s 2m");
++   //SY(__NR_stat64, x0, x0); FAIL;
++
++   // __NR_lstat64 196
++   //GO(__NR_lstat64, "2s 2m");
++   //SY(__NR_lstat64, x0, x0); FAIL;
++
++   // __NR_fstat64 197
++   //GO(__NR_fstat64, "2s 1m");
++   //SY(__NR_fstat64, x0, x0); FAIL;
++
++   // __NR_lchown32 198
++   //GO(__NR_lchown32, "3s 1m");
++   //SY(__NR_lchown32, x0, x0, x0); FAIL;
++
++   // __NR_getuid32 199
++   //GO(__NR_getuid32, "0s 0m");
++   //SY(__NR_getuid32); SUCC;
++
++   // __NR_getgid32 200
++   //GO(__NR_getgid32, "0s 0m");
++   //SY(__NR_getgid32); SUCC;
++
++   // __NR_geteuid32 201
++   //GO(__NR_geteuid32, "0s 0m");
++   //SY(__NR_geteuid32); SUCC;
++
++   // __NR_getegid32 202
++   //GO(__NR_getegid32, "0s 0m");
++   //SY(__NR_getegid32); SUCC;
++
++   // __NR_setreuid32 203
++   //GO(__NR_setreuid32, "2s 0m");
++   //SY(__NR_setreuid32, x0-1, x0-1); SUCC;
++
++   // __NR_setregid32 204
++   //GO(__NR_setregid32, "2s 0m");
++   //SY(__NR_setregid32, x0-1, x0-1); SUCC;
++
++   // __NR_getgroups32 205
++   //GO(__NR_getgroups32, "2s 1m");
++   //SY(__NR_getgroups32, x0+1, x0+1); FAIL;
++
++   // __NR_setgroups32 206
++   //GO(__NR_setgroups32, "2s 1m");
++   //SY(__NR_setgroups32, x0+1, x0+1); FAIL;
++
++   // __NR_fchown32 207
++   //GO(__NR_fchown32, "3s 0m");
++   //SY(__NR_fchown32, x0-1, x0, x0); FAIL;
++
++   // __NR_setresuid32 208
++   //GO(__NR_setresuid32, "3s 0m");
++   //SY(__NR_setresuid32, x0-1, x0-1, x0-1); SUCC;
++
++   // __NR_getresuid32 209
++   //GO(__NR_getresuid32, "3s 3m");
++   //SY(__NR_getresuid32, x0, x0, x0); FAIL;
++
++   // __NR_setresgid32 210
++   //GO(__NR_setresgid32, "3s 0m");
++   //SY(__NR_setresgid32, x0-1, x0-1, x0-1); SUCC;
++
++   // __NR_getresgid32 211
++   //GO(__NR_getresgid32, "3s 3m");
++   //SY(__NR_getresgid32, x0, x0, x0); FAIL;
++
++   // __NR_chown32 212
++   //GO(__NR_chown32, "3s 1m");
++   //SY(__NR_chown32, x0, x0, x0); FAIL;
++
++   // __NR_setuid32 213
++   //GO(__NR_setuid32, "1s 0m");
++   //SY(__NR_setuid32, x0-1); FAIL;
++
++   // __NR_setgid32 214
++   //GO(__NR_setgid32, "1s 0m");
++   //SY(__NR_setgid32, x0-1); FAIL;
++
++   // __NR_setfsuid32 215
++   //GO(__NR_setfsuid32, "1s 0m");
++   //SY(__NR_setfsuid32, x0); SUCC;  // This syscall has a stupid return value
++
++   // __NR_setfsgid32 216
++   //GO(__NR_setfsgid32, "1s 0m");
++   //SY(__NR_setfsgid32, x0); SUCC;  // This syscall has a stupid return value
++
++   // __NR_pivot_root 217
++   GO(__NR_pivot_root, "n/a");
++   //SY(__NR_pivot_root); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_mincore 218
++   GO(__NR_mincore, "3s 1m");
++   SY(__NR_mincore, x0, x0+40960, x0); FAIL;
++
++   // __NR_madvise 219
++   GO(__NR_madvise, "3s 0m");
++   SY(__NR_madvise, x0, x0+1, x0); FAILx(ENOMEM);
++
++   // __NR_getdents64 220
++   GO(__NR_getdents64, "3s 1m");
++   SY(__NR_getdents64, x0, x0, x0+1); FAIL;
++
++   // __NR_fcntl64 221
++   // As with sys_open(), we don't trigger errors for the 1st two args for
++   // the later ones.
++   // For F_GETFD the 3rd arg is ignored.
++   //GO(__NR_fcntl64, "(GETFD) 2s 0m");
++   //SY(__NR_fcntl64, x0-1, x0+F_GETFD, x0); FAILx(EBADF);
++
++   // For F_DUPFD the 3rd arg is 'arg'
++   //GO(__NR_fcntl64, "(DUPFD) 1s 0m");
++   //SY(__NR_fcntl64, -1, F_DUPFD, x0); FAILx(EBADF);
++
++   // For F_GETLK the 3rd arg is 'lock'.
++   // On x86, this fails w/EBADF.  But on amd64 in 32-bit mode it fails
++   // w/EFAULT.
++   //GO(__NR_fcntl64, "(GETLK) 1s 0m");
++   //SY(__NR_fcntl64, -1, +F_GETLK, x0); FAIL; //FAILx(EBADF);
++
++   // 222
++   GO(222, "ni");
++   SY(222); FAIL;
++
++   // 223
++   GO(223, "ni");
++   SY(223); FAIL;
++
++   // __NR_gettid 224
++   GO(__NR_gettid, "n/a");
++   //SY(__NR_gettid); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_readahead 225
++   GO(__NR_readahead, "n/a");
++   //SY(__NR_readahead); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_setxattr 226
++   GO(__NR_setxattr, "5s 3m");
++   SY(__NR_setxattr, x0, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_lsetxattr 227
++   GO(__NR_lsetxattr, "5s 3m");
++   SY(__NR_lsetxattr, x0, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_fsetxattr 228
++   GO(__NR_fsetxattr, "5s 2m");
++   SY(__NR_fsetxattr, x0, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_getxattr 229
++   GO(__NR_getxattr, "4s 3m");
++   SY(__NR_getxattr, x0, x0, x0, x0+1); FAIL;
++
++   // __NR_lgetxattr 230
++   GO(__NR_lgetxattr, "4s 3m");
++   SY(__NR_lgetxattr, x0, x0, x0, x0+1); FAIL;
++
++   // __NR_fgetxattr 231
++   GO(__NR_fgetxattr, "4s 2m");
++   SY(__NR_fgetxattr, x0, x0, x0, x0+1); FAIL;
++
++   // __NR_listxattr 232
++   GO(__NR_listxattr, "3s 2m");
++   SY(__NR_listxattr, x0, x0, x0+1); FAIL;
++
++   // __NR_llistxattr 233
++   GO(__NR_llistxattr, "3s 2m");
++   SY(__NR_llistxattr, x0, x0, x0+1); FAIL;
++
++   // __NR_flistxattr 234
++   GO(__NR_flistxattr, "3s 1m");
++   SY(__NR_flistxattr, x0-1, x0, x0+1); FAIL; /* kernel returns EBADF, but both seem correct */
++
++   // __NR_removexattr 235
++   GO(__NR_removexattr, "2s 2m");
++   SY(__NR_removexattr, x0, x0); FAIL;
++
++   // __NR_lremovexattr 236
++   GO(__NR_lremovexattr, "2s 2m");
++   SY(__NR_lremovexattr, x0, x0); FAIL;
++
++   // __NR_fremovexattr 237
++   GO(__NR_fremovexattr, "2s 1m");
++   SY(__NR_fremovexattr, x0, x0); FAIL;
++
++   // __NR_tkill 238
++   GO(__NR_tkill, "n/a");
++   //SY(__NR_tkill); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_sendfile64 239
++   //GO(__NR_sendfile64, "4s 1m");
++   //SY(__NR_sendfile64, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_futex 240
++#ifndef FUTEX_WAIT
++#define FUTEX_WAIT   0
++#endif
++   // XXX: again, glibc not doing 6th arg means we have only 5s errors
++   GO(__NR_futex, "4s 2m");
++   SY(__NR_futex, x0+FUTEX_WAIT, x0, x0, x0+1); FAIL;
++
++   // __NR_sched_setaffinity 241
++   GO(__NR_sched_setaffinity, "3s 1m");
++   SY(__NR_sched_setaffinity, x0, x0+1, x0); FAIL;
++
++   // __NR_sched_getaffinity 242
++   GO(__NR_sched_getaffinity, "3s 1m");
++   SY(__NR_sched_getaffinity, x0, x0+1, x0); FAIL;
++
++   // __NR_set_thread_area 243
++   //GO(__NR_set_thread_area, "1s 1m");
++   //SY(__NR_set_thread_area, x0); FAILx(EFAULT);
++
++   // __NR_get_thread_area 244
++   //GO(__NR_get_thread_area, "1s 1m");
++   //SY(__NR_get_thread_area, x0); FAILx(EFAULT);
++
++   // __NR_io_setup 245
++   GO(__NR_io_setup, "2s 1m");
++   SY(__NR_io_setup, x0, x0); FAIL;
++
++   // __NR_io_destroy 246
++   {
++      // jump through hoops to prevent the PRE(io_destroy) wrapper crashing.
++      struct fake_aio_ring {
++        unsigned        id;     /* kernel internal index number */
++        unsigned        nr;     /* number of io_events */
++        // There are more fields in the real aio_ring, but the 'nr' field is
++        // the only one used by the PRE() wrapper.
++      } ring = { 0, 0 };
++      struct fake_aio_ring* ringptr = &ring;
++      GO(__NR_io_destroy, "1s 0m");
++      SY(__NR_io_destroy, x0+&ringptr); FAIL;
++   }
++
++   // __NR_io_getevents 247
++   GO(__NR_io_getevents, "5s 2m");
++   SY(__NR_io_getevents, x0, x0, x0+1, x0, x0+1); FAIL;
++
++   // __NR_io_submit 248
++   GO(__NR_io_submit, "3s 1m");
++   SY(__NR_io_submit, x0, x0+1, x0); FAIL;
++
++   // __NR_io_cancel 249
++   GO(__NR_io_cancel, "3s 2m");
++   SY(__NR_io_cancel, x0, x0, x0); FAIL;
++
++   // __NR_fadvise64 250
++   GO(__NR_fadvise64, "n/a");
++   //SY(__NR_fadvise64); // (Not yet handled by Valgrind) FAIL;
++
++   // 251
++   GO(251, "ni");
++   SY(251); FAIL;
++
++   // __NR_exit_group 252
++   GO(__NR_exit_group, "other");
++   // (see scalar_exit_group.c)
++
++   // __NR_lookup_dcookie 253
++   GO(__NR_lookup_dcookie, "4s 1m");
++   SY(__NR_lookup_dcookie, x0, x0, x0, x0+1); FAIL;
++
++   // __NR_epoll_create 254
++   //GO(__NR_epoll_create, "1s 0m");
++   //SY(__NR_epoll_create, x0); SUCC_OR_FAIL;
++
++   // __NR_epoll_ctl 255
++   GO(__NR_epoll_ctl, "4s 1m");
++   SY(__NR_epoll_ctl, x0, x0, x0, x0); FAIL;
++
++   // __NR_epoll_wait 256
++   //GO(__NR_epoll_wait, "4s 1m");
++   //SY(__NR_epoll_wait, x0, x0, x0+1, x0); FAIL;
++
++   // __NR_remap_file_pages 257
++   GO(__NR_remap_file_pages, "n/a");
++   //SY(__NR_remap_file_pages); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_set_tid_address 258
++   GO(__NR_set_tid_address, "1s 0m");
++   SY(__NR_set_tid_address, x0); SUCC_OR_FAILx(ENOSYS);
++
++   // __NR_timer_create 259
++   GO(__NR_timer_create, "3s 2m");
++   SY(__NR_timer_create, x0, x0+1, x0); FAIL;
++
++   // __NR_timer_settime (__NR_timer_create+1)
++   GO(__NR_timer_settime, "4s 2m");
++   SY(__NR_timer_settime, x0, x0, x0, x0+1); FAIL;
++
++   // __NR_timer_gettime (__NR_timer_create+2)
++   GO(__NR_timer_gettime, "2s 1m");
++   SY(__NR_timer_gettime, x0, x0); FAIL;
++
++   // __NR_timer_getoverrun (__NR_timer_create+3)
++   GO(__NR_timer_getoverrun, "1s 0m");
++   SY(__NR_timer_getoverrun, x0); FAIL;
++
++   // __NR_timer_delete (__NR_timer_create+4)
++   GO(__NR_timer_delete, "1s 0m");
++   SY(__NR_timer_delete, x0); FAIL;
++
++   // __NR_clock_settime (__NR_timer_create+5)
++   GO(__NR_clock_settime, "2s 1m");
++   SY(__NR_clock_settime, x0, x0);  FAIL; FAIL;
++
++   // __NR_clock_gettime (__NR_timer_create+6)
++   GO(__NR_clock_gettime, "2s 1m");
++   SY(__NR_clock_gettime, x0, x0); FAIL;
++
++   // __NR_clock_getres (__NR_timer_create+7)
++   GO(__NR_clock_getres, "2s 1m");
++   SY(__NR_clock_getres, x0+1, x0+1); FAIL; FAIL;
++
++   // __NR_clock_nanosleep (__NR_timer_create+8)
++   GO(__NR_clock_nanosleep, "n/a");
++   //SY(__NR_clock_nanosleep); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_statfs64 268
++   //GO(__NR_statfs64, "3s 2m");
++   //SY(__NR_statfs64, x0, x0+1, x0); FAIL;
++
++   // __NR_fstatfs64 269
++   //GO(__NR_fstatfs64, "3s 1m");
++   //SY(__NR_fstatfs64, x0, x0+1, x0); FAIL;
++
++   // __NR_tgkill 270
++   GO(__NR_tgkill, "n/a");
++   //SY(__NR_tgkill); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_utimes 271
++   //GO(__NR_utimes, "2s 2m");
++   //SY(__NR_utimes, x0, x0+1); FAIL;
++
++   // __NR_fadvise64_64 272
++   //GO(__NR_fadvise64_64, "n/a");
++   //SY(__NR_fadvise64_64); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_vserver 273
++   //GO(__NR_vserver, "ni");
++   //SY(__NR_vserver); FAIL;
++
++   // __NR_mbind 274
++   GO(__NR_mbind, "n/a");
++   //SY(__NR_mbind); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_get_mempolicy 275
++   GO(__NR_get_mempolicy, "n/a");
++   //SY(__NR_get_mempolicy); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_set_mempolicy 276
++   GO(__NR_set_mempolicy, "n/a");
++   //SY(__NR_set_mempolicy); // (Not yet handled by Valgrind) FAIL;
++
++   // __NR_mq_open 277
++   GO(__NR_mq_open, "4s 3m");
++   SY(__NR_mq_open, x0, x0+O_CREAT, x0, x0+1); FAIL;
++
++   // __NR_mq_unlink (__NR_mq_open+1)
++   GO(__NR_mq_unlink, "1s 1m");
++   SY(__NR_mq_unlink, x0); FAIL;
++
++   // __NR_mq_timedsend (__NR_mq_open+2)
++   GO(__NR_mq_timedsend, "5s 2m");
++   SY(__NR_mq_timedsend, x0, x0, x0+1, x0, x0+1); FAIL;
++
++   // __NR_mq_timedreceive (__NR_mq_open+3)
++   GO(__NR_mq_timedreceive, "5s 3m");
++   SY(__NR_mq_timedreceive, x0, x0, x0+1, x0+1, x0+1); FAIL;
++
++   // __NR_mq_notify (__NR_mq_open+4)
++   GO(__NR_mq_notify, "2s 1m");
++   SY(__NR_mq_notify, x0, x0+1); FAIL;
++
++   // __NR_mq_getsetattr (__NR_mq_open+5)
++   GO(__NR_mq_getsetattr, "3s 2m");
++   SY(__NR_mq_getsetattr, x0, x0+1, x0+1); FAIL;
++
++   // __NR_sys_kexec_load 283
++   //GO(__NR_sys_kexec_load, "ni");
++   //SY(__NR_sys_kexec_load); FAIL;
++
++   // __NR_epoll_create1 329
++   GO(__NR_epoll_create1, "1s 0m");
++   SY(__NR_epoll_create1, x0); SUCC_OR_FAIL;
++
++   // __NR_process_vm_readv 347
++   GO(__NR_process_vm_readv, "6s 2m");
++   SY(__NR_process_vm_readv, x0, x0, x0+1, x0, x0+1, x0); FAIL;
++
++   // __NR_process_vm_writev 348
++   GO(__NR_process_vm_writev, "6s 2m");
++   SY(__NR_process_vm_writev, x0, x0, x0+1, x0, x0+1, x0); FAIL;
++
++   // no such syscall...
++   GO(9999, "1e");
++   SY(9999); FAIL;
++
++   // __NR_exit 1
++   GO(__NR_exit, "1s 0m");
++   SY(__NR_exit, x0); FAIL;
++
++   assert(0);
++}
+diff --git a/memcheck/tests/loongarch64-linux/scalar.h b/memcheck/tests/loongarch64-linux/scalar.h
+new file mode 100644
+index 000000000..4d86d2c0a
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/scalar.h
+@@ -0,0 +1,62 @@
++/* This is the loongarch64 variant of memcheck/tests/x86-linux/scalar.h */
++#include "../../../include/vki/vki-scnums-loongarch64-linux.h"
++
++#include <assert.h>
++#include <errno.h>
++#include <fcntl.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <sys/syscall.h>
++#include <sys/stat.h>
++#include <sys/ptrace.h>
++#include <sys/types.h>
++#include <sys/mman.h>
++
++// Since we use vki_unistd.h, we can't include <unistd.h>.  So we have to
++// declare this ourselves.
++extern long int syscall (long int __sysno, ...) __THROW;
++
++// Thorough syscall scalar arg checking.  Also serves as thorough checking
++// for (very) basic syscall use.  Generally not trying to do anything
++// meaningful with the syscalls.
++
++#define GO(__NR_xxx, s) \
++   fprintf(stderr, "-----------------------------------------------------\n"  \
++                   "%3d:%20s %s\n"                                            \
++                   "-----------------------------------------------------\n", \
++                   __NR_xxx, #__NR_xxx, s);
++
++#define SY  res = syscall
++
++#define FAIL  assert(-1 == res);
++#define SUCC  assert(-1 != res);
++#define SUCC_OR_FAIL    /* no test */
++
++#define FAILx(E) \
++   do { \
++      int myerrno = errno; \
++      if (-1 == res) { \
++         if (E == myerrno) { \
++            /* as expected */ \
++         } else { \
++         fprintf(stderr, "Expected error %s (%d), got %d\n", #E, E, myerrno); \
++         exit(1); \
++         } \
++      } else { \
++         fprintf(stderr, "Expected error %s (%d), got success\n", #E, E); \
++         exit(1); \
++      } \
++   } while (0);
++
++#define SUCC_OR_FAILx(E) \
++   do { \
++      int myerrno = errno; \
++      if (-1 == res) { \
++         if (E == myerrno) { \
++            /* as expected */ \
++         } else { \
++         fprintf(stderr, "Expected error %s (%d), got %d\n", #E, E, myerrno); \
++         exit(1); \
++         } \
++      } \
++   } while (0);
+diff --git a/memcheck/tests/loongarch64-linux/scalar.stderr.exp b/memcheck/tests/loongarch64-linux/scalar.stderr.exp
+new file mode 100644
+index 000000000..a90b90af5
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/scalar.stderr.exp
+@@ -0,0 +1,2716 @@
++-----------------------------------------------------
++128:__NR_restart_syscall n/a
++-----------------------------------------------------
++-----------------------------------------------------
++ 93:           __NR_exit below
++-----------------------------------------------------
++-----------------------------------------------------
++ 63:           __NR_read 1+3s 1m
++-----------------------------------------------------
++Syscall param (syscallno) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:51)
++
++Syscall param read(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:51)
++
++Syscall param read(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:51)
++
++Syscall param read(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:51)
++
++Syscall param read(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:51)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 64:          __NR_write 3s 1m
++-----------------------------------------------------
++Syscall param write(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:55)
++
++Syscall param write(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:55)
++
++Syscall param write(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:55)
++
++Syscall param write(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:55)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 57:          __NR_close 1s 0m
++-----------------------------------------------------
++Syscall param close(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:69)
++
++-----------------------------------------------------
++221:         __NR_execve 3s 1m
++-----------------------------------------------------
++Syscall param execve(filename) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:89)
++
++Syscall param execve(argv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:89)
++
++Syscall param execve(envp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:89)
++
++Syscall param execve(filename) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:89)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param execve(argv) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:89)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++221:         __NR_execve 3s 1m
++-----------------------------------------------------
++Syscall param execve(filename) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:92)
++
++Syscall param execve(argv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:92)
++
++Syscall param execve(envp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:92)
++
++Syscall param execve(filename) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:92)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param execve(argv) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:92)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++221:         __NR_execve 4s 2m
++-----------------------------------------------------
++Syscall param execve(filename) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++
++Syscall param execve(argv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++
++Syscall param execve(envp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++
++Syscall param execve(filename) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param execve(argv) points to uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++ Address 0x........ is on thread 1's stack
++ in frame #1, created by main (scalar.c:28)
++
++Syscall param execve(argv[0]) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:96)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++221:         __NR_execve 4s 2m
++-----------------------------------------------------
++Syscall param execve(filename) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++
++Syscall param execve(argv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++
++Syscall param execve(envp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++
++Syscall param execve(filename) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param execve(envp) points to uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++ Address 0x........ is on thread 1's stack
++ in frame #1, created by main (scalar.c:28)
++
++Syscall param execve(envp[i]) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:99)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 49:          __NR_chdir 1s 1m
++-----------------------------------------------------
++Syscall param chdir(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:103)
++
++Syscall param chdir(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:103)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 62:          __NR_lseek 3s 0m
++-----------------------------------------------------
++Syscall param lseek(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:131)
++
++Syscall param lseek(offset) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:131)
++
++Syscall param lseek(whence) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:131)
++
++-----------------------------------------------------
++172:         __NR_getpid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++ 40:          __NR_mount 5s 3m
++-----------------------------------------------------
++Syscall param mount(source) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++
++Syscall param mount(target) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++
++Syscall param mount(type) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++
++Syscall param mount(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++
++Syscall param mount(data) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++
++Syscall param mount(target) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mount(type) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:139)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++146:         __NR_setuid 1s 0m
++-----------------------------------------------------
++Syscall param setuid(uid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:147)
++
++-----------------------------------------------------
++174:         __NR_getuid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++ 81:           __NR_sync 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++129:           __NR_kill 2s 0m
++-----------------------------------------------------
++Syscall param kill(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:204)
++
++Syscall param kill(signal) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:204)
++
++-----------------------------------------------------
++ 23:            __NR_dup 1s 0m
++-----------------------------------------------------
++Syscall param dup(oldfd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:220)
++
++-----------------------------------------------------
++153:          __NR_times 1s 1m
++-----------------------------------------------------
++Syscall param times(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:228)
++
++Syscall param times(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:228)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++214:            __NR_brk 1s 0m
++-----------------------------------------------------
++Syscall param brk(end_data_segment) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:236)
++
++-----------------------------------------------------
++144:         __NR_setgid 1s 0m
++-----------------------------------------------------
++Syscall param setgid(gid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:240)
++
++-----------------------------------------------------
++176:         __NR_getgid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++175:        __NR_geteuid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++177:        __NR_getegid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++ 89:           __NR_acct 1s 1m
++-----------------------------------------------------
++Syscall param acct(filename) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:260)
++
++Syscall param acct(filename) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:260)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 39:        __NR_umount2 2s 1m
++-----------------------------------------------------
++Syscall param umount2(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:264)
++
++Syscall param umount2(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:264)
++
++Syscall param umount2(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:264)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 29:          __NR_ioctl 3s 1m
++-----------------------------------------------------
++Syscall param ioctl(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:273)
++
++Syscall param ioctl(request) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:273)
++
++Syscall param ioctl(arg) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:273)
++
++Syscall param ioctl(TCSET{S,SW,SF}) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:273)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 25:          __NR_fcntl (GETFD) 2s 0m
++-----------------------------------------------------
++Syscall param fcntl(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:279)
++
++Syscall param fcntl(cmd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:279)
++
++-----------------------------------------------------
++ 25:          __NR_fcntl (DUPFD) 1s 0m
++-----------------------------------------------------
++Syscall param fcntl(arg) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:285)
++
++-----------------------------------------------------
++ 25:          __NR_fcntl (GETLK) 1s 5m
++-----------------------------------------------------
++Syscall param fcntl(lock) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++
++Syscall param fcntl(lock->l_type) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fcntl(lock->l_whence) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fcntl(lock->l_start) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fcntl(lock->l_len) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fcntl(lock->l_pid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:291)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++154:        __NR_setpgid 2s 0m
++-----------------------------------------------------
++Syscall param setpgid(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:299)
++
++Syscall param setpgid(pgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:299)
++
++-----------------------------------------------------
++166:          __NR_umask 1s 0m
++-----------------------------------------------------
++Syscall param umask(mask) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:311)
++
++-----------------------------------------------------
++ 51:         __NR_chroot 1s 1m
++-----------------------------------------------------
++Syscall param chroot(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:315)
++
++Syscall param chroot(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:315)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++173:        __NR_getppid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++157:         __NR_setsid 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++145:       __NR_setreuid 2s 0m
++-----------------------------------------------------
++Syscall param setreuid(ruid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:351)
++
++Syscall param setreuid(euid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:351)
++
++-----------------------------------------------------
++143:       __NR_setregid 2s 0m
++-----------------------------------------------------
++Syscall param setregid(rgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:355)
++
++Syscall param setregid(egid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:355)
++
++-----------------------------------------------------
++161:    __NR_sethostname n/a
++-----------------------------------------------------
++-----------------------------------------------------
++165:      __NR_getrusage 2s 1m
++-----------------------------------------------------
++Syscall param getrusage(who) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:380)
++
++Syscall param getrusage(usage) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:380)
++
++Syscall param getrusage(usage) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:380)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++169:   __NR_gettimeofday 2s 2m
++-----------------------------------------------------
++Syscall param gettimeofday(tv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:384)
++
++Syscall param gettimeofday(tz) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:384)
++
++Syscall param gettimeofday(tv) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:384)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param gettimeofday(tz) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:384)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++170:   __NR_settimeofday 2s 2m
++-----------------------------------------------------
++Syscall param settimeofday(tv) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:388)
++
++Syscall param settimeofday(tz) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:388)
++
++Syscall param settimeofday(tv) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:388)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param settimeofday(tz) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:388)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++158:      __NR_getgroups 2s 1m
++-----------------------------------------------------
++Syscall param getgroups(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:392)
++
++Syscall param getgroups(list) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:392)
++
++Syscall param getgroups(list) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:392)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++159:      __NR_setgroups 2s 1m
++-----------------------------------------------------
++Syscall param setgroups(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:396)
++
++Syscall param setgroups(list) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:396)
++
++Syscall param setgroups(list) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:396)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++224:         __NR_swapon n/a
++-----------------------------------------------------
++-----------------------------------------------------
++142:         __NR_reboot n/a
++-----------------------------------------------------
++-----------------------------------------------------
++222:           __NR_mmap 1s 1m
++-----------------------------------------------------
++Syscall param mmap(addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:437)
++
++-----------------------------------------------------
++215:         __NR_munmap 2s 0m
++-----------------------------------------------------
++Syscall param munmap(start) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:442)
++
++Syscall param munmap(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:442)
++
++-----------------------------------------------------
++ 45:       __NR_truncate 2s 1m
++-----------------------------------------------------
++Syscall param truncate(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:446)
++
++
++More than 100 errors detected.  Subsequent errors
++will still be recorded, but in less detail than before.
++Syscall param truncate(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:446)
++
++Syscall param truncate(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:446)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 46:      __NR_ftruncate 2s 0m
++-----------------------------------------------------
++Syscall param ftruncate(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:450)
++
++Syscall param ftruncate(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:450)
++
++-----------------------------------------------------
++ 52:         __NR_fchmod 2s 0m
++-----------------------------------------------------
++Syscall param fchmod(fildes) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:454)
++
++Syscall param fchmod(mode) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:454)
++
++-----------------------------------------------------
++ 55:         __NR_fchown 3s 0m
++-----------------------------------------------------
++Syscall param fchown(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:458)
++
++Syscall param fchown(owner) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:458)
++
++Syscall param fchown(group) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:458)
++
++-----------------------------------------------------
++141:    __NR_getpriority 2s 0m
++-----------------------------------------------------
++Syscall param getpriority(which) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:462)
++
++Syscall param getpriority(who) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:462)
++
++-----------------------------------------------------
++140:    __NR_setpriority 3s 0m
++-----------------------------------------------------
++Syscall param setpriority(which) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:466)
++
++Syscall param setpriority(who) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:466)
++
++Syscall param setpriority(prio) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:466)
++
++-----------------------------------------------------
++ 43:         __NR_statfs 2s 2m
++-----------------------------------------------------
++Syscall param statfs(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:474)
++
++Syscall param statfs(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:474)
++
++Syscall param statfs(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:474)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param statfs(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:474)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 44:        __NR_fstatfs 2s 1m
++-----------------------------------------------------
++Syscall param fstatfs(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:478)
++
++Syscall param fstatfs(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:478)
++
++Syscall param fstatfs(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:478)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++116:         __NR_syslog 3s 1m
++-----------------------------------------------------
++Syscall param syslog(type) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:490)
++
++Syscall param syslog(bufp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:490)
++
++Syscall param syslog(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:490)
++
++Syscall param syslog(bufp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:490)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++103:      __NR_setitimer 3s 2m
++-----------------------------------------------------
++Syscall param setitimer(which) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++
++Syscall param setitimer(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++
++Syscall param setitimer(ovalue) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++
++Syscall param setitimer(&value->it_interval) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param setitimer(&value->it_value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param setitimer(&ovalue->it_interval) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param setitimer(&ovalue->it_value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:494)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++102:      __NR_getitimer 2s 1m
++-----------------------------------------------------
++Syscall param getitimer(which) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:498)
++
++Syscall param getitimer(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:498)
++
++Syscall param getitimer(&value->it_interval) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:498)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getitimer(&value->it_value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:498)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 58:        __NR_vhangup 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++260:          __NR_wait4 4s 2m
++-----------------------------------------------------
++Syscall param wait4(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++
++Syscall param wait4(status) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++
++Syscall param wait4(options) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++
++Syscall param wait4(rusage) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++
++Syscall param wait4(status) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param wait4(rusage) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:534)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++225:        __NR_swapoff n/a
++-----------------------------------------------------
++-----------------------------------------------------
++179:        __NR_sysinfo 1s 1m
++-----------------------------------------------------
++Syscall param sysinfo(info) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:542)
++
++Syscall param sysinfo(info) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:542)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 82:          __NR_fsync 1s 0m
++-----------------------------------------------------
++Syscall param fsync(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:553)
++
++-----------------------------------------------------
++220:          __NR_clone 5s 3m
++-----------------------------------------------------
++Syscall param clone(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++
++Syscall param clone(child_stack) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++
++Syscall param clone(parent_tidptr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++
++Syscall param clone(parent_tidptr) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param clone(tlsinfo) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++
++Syscall param clone(child_tidptr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++
++Syscall param clone(child_tidptr) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:564)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++162:  __NR_setdomainname n/a
++-----------------------------------------------------
++-----------------------------------------------------
++160:          __NR_uname 1s 1m
++-----------------------------------------------------
++Syscall param uname(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:575)
++
++Syscall param uname(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:575)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++171:       __NR_adjtimex XXX
++-----------------------------------------------------
++-----------------------------------------------------
++226:       __NR_mprotect 3s 0m
++-----------------------------------------------------
++Syscall param mprotect(addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:588)
++
++Syscall param mprotect(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:588)
++
++Syscall param mprotect(prot) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:588)
++
++-----------------------------------------------------
++105:    __NR_init_module 3s 2m
++-----------------------------------------------------
++Syscall param init_module(umod) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:600)
++
++Syscall param init_module(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:600)
++
++Syscall param init_module(uargs) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:600)
++
++Syscall param init_module(umod) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:600)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param init_module(uargs) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:600)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++106:  __NR_delete_module n/a
++-----------------------------------------------------
++-----------------------------------------------------
++ 60:       __NR_quotactl 4s 1m
++-----------------------------------------------------
++Syscall param quotactl(cmd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:612)
++
++Syscall param quotactl(special) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:612)
++
++Syscall param quotactl(id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:612)
++
++Syscall param quotactl(addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:612)
++
++Syscall param quotactl(special) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:612)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++155:        __NR_getpgid 1s 0m
++-----------------------------------------------------
++Syscall param getpgid(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:616)
++
++-----------------------------------------------------
++ 50:         __NR_fchdir 1s 0m
++-----------------------------------------------------
++Syscall param fchdir(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:620)
++
++-----------------------------------------------------
++ 92:    __NR_personality 1s 0m
++-----------------------------------------------------
++Syscall param personality(persona) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:632)
++
++-----------------------------------------------------
++151:       __NR_setfsuid 1s 0m
++-----------------------------------------------------
++Syscall param setfsuid(uid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:640)
++
++-----------------------------------------------------
++152:       __NR_setfsgid 1s 0m
++-----------------------------------------------------
++Syscall param setfsgid(gid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:644)
++
++-----------------------------------------------------
++ 32:          __NR_flock 2s 0m
++-----------------------------------------------------
++Syscall param flock(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:660)
++
++Syscall param flock(operation) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:660)
++
++-----------------------------------------------------
++227:          __NR_msync 3s 1m
++-----------------------------------------------------
++Syscall param msync(start) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:664)
++
++Syscall param msync(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:664)
++
++Syscall param msync(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:664)
++
++Syscall param msync(start) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:664)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 65:          __NR_readv 3s 1m
++-----------------------------------------------------
++Syscall param readv(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:668)
++
++Syscall param readv(vector) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:668)
++
++Syscall param readv(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:668)
++
++Syscall param readv(vector) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:668)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 66:         __NR_writev 3s 1m
++-----------------------------------------------------
++Syscall param writev(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:672)
++
++Syscall param writev(vector) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:672)
++
++Syscall param writev(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:672)
++
++Syscall param writev(vector) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:672)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++156:         __NR_getsid 1s 0m
++-----------------------------------------------------
++Syscall param getsid(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:676)
++
++-----------------------------------------------------
++ 83:      __NR_fdatasync 1s 0m
++-----------------------------------------------------
++Syscall param fdatasync(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:680)
++
++-----------------------------------------------------
++228:          __NR_mlock 2s 0m
++-----------------------------------------------------
++Syscall param mlock(addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:688)
++
++Syscall param mlock(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:688)
++
++-----------------------------------------------------
++229:        __NR_munlock 2s 0m
++-----------------------------------------------------
++Syscall param munlock(addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:692)
++
++Syscall param munlock(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:692)
++
++-----------------------------------------------------
++230:       __NR_mlockall 1s 0m
++-----------------------------------------------------
++Syscall param mlockall(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:696)
++
++-----------------------------------------------------
++231:     __NR_munlockall 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++118: __NR_sched_setparam 2s 1m
++-----------------------------------------------------
++Syscall param sched_setparam(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:704)
++
++Syscall param sched_setparam(p) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:704)
++
++Syscall param sched_setparam(p) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:704)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++121: __NR_sched_getparam 2s 1m
++-----------------------------------------------------
++Syscall param sched_getparam(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:708)
++
++Syscall param sched_getparam(p) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:708)
++
++Syscall param sched_getparam(p) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:708)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++119:__NR_sched_setscheduler 3s 1m
++-----------------------------------------------------
++Syscall param sched_setscheduler(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:712)
++
++Syscall param sched_setscheduler(policy) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:712)
++
++Syscall param sched_setscheduler(p) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:712)
++
++Syscall param sched_setscheduler(p) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:712)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++120:__NR_sched_getscheduler 1s 0m
++-----------------------------------------------------
++Syscall param sched_getscheduler(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:716)
++
++-----------------------------------------------------
++124:    __NR_sched_yield 0s 0m
++-----------------------------------------------------
++-----------------------------------------------------
++125:__NR_sched_get_priority_max 1s 0m
++-----------------------------------------------------
++Syscall param sched_get_priority_max(policy) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:724)
++
++-----------------------------------------------------
++126:__NR_sched_get_priority_min 1s 0m
++-----------------------------------------------------
++Syscall param sched_get_priority_min(policy) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:728)
++
++-----------------------------------------------------
++127:__NR_sched_rr_get_interval n/a
++-----------------------------------------------------
++-----------------------------------------------------
++101:      __NR_nanosleep 2s 2m
++-----------------------------------------------------
++Syscall param nanosleep(req) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:736)
++
++Syscall param nanosleep(rem) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:736)
++
++Syscall param nanosleep(req) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:736)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param nanosleep(rem) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:736)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++216:         __NR_mremap 5s 0m
++-----------------------------------------------------
++Syscall param mremap(old_addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:740)
++
++Syscall param mremap(old_size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:740)
++
++Syscall param mremap(new_size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:740)
++
++Syscall param mremap(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:740)
++
++Syscall param mremap(new_addr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:740)
++
++-----------------------------------------------------
++147:      __NR_setresuid 3s 0m
++-----------------------------------------------------
++Syscall param setresuid(ruid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:744)
++
++Syscall param setresuid(euid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:744)
++
++Syscall param setresuid(suid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:744)
++
++-----------------------------------------------------
++148:      __NR_getresuid 3s 3m
++-----------------------------------------------------
++Syscall param getresuid(ruid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++
++Syscall param getresuid(euid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++
++Syscall param getresuid(suid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++
++Syscall param getresuid(ruid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getresuid(euid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getresuid(suid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:748)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 42:     __NR_nfsservctl n/a
++-----------------------------------------------------
++-----------------------------------------------------
++149:      __NR_setresgid 3s 0m
++-----------------------------------------------------
++Syscall param setresgid(rgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:768)
++
++Syscall param setresgid(egid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:768)
++
++Syscall param setresgid(sgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:768)
++
++-----------------------------------------------------
++150:      __NR_getresgid 3s 3m
++-----------------------------------------------------
++Syscall param getresgid(rgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++
++Syscall param getresgid(egid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++
++Syscall param getresgid(sgid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++
++Syscall param getresgid(rgid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getresgid(egid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getresgid(sgid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:772)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++167:          __NR_prctl 5s 0m
++-----------------------------------------------------
++Syscall param prctl(option) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:777)
++
++Syscall param prctl(arg2) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:777)
++
++Syscall param prctl(arg3) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:777)
++
++Syscall param prctl(arg4) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:777)
++
++Syscall param prctl(arg5) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:777)
++
++-----------------------------------------------------
++167:          __NR_prctl 2s 0m
++-----------------------------------------------------
++Syscall param prctl(option) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:782)
++
++Syscall param prctl(set-name) points to uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:782)
++ Address 0x........ is on thread 1's stack
++ in frame #1, created by main (scalar.c:28)
++
++-----------------------------------------------------
++167:          __NR_prctl 1s 0m
++-----------------------------------------------------
++Syscall param prctl(option) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:787)
++
++-----------------------------------------------------
++139:   __NR_rt_sigreturn n/a
++-----------------------------------------------------
++-----------------------------------------------------
++134:   __NR_rt_sigaction 4s 4m
++-----------------------------------------------------
++Syscall param rt_sigaction(signum) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++
++Syscall param rt_sigaction(act) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++
++Syscall param rt_sigaction(oldact) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++
++Syscall param rt_sigaction(sigsetsize) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++
++Syscall param rt_sigaction(act->sa_handler) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++ Address 0x........ is 8 bytes after a block of size 8 alloc'd
++   at 0x........: malloc (vg_replace_malloc.c:...)
++   by 0x........: main (scalar.c:30)
++
++Syscall param rt_sigaction(act->sa_mask) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++ Address 0x........ is 16 bytes after a block of size 16 in arena "client"
++
++Syscall param rt_sigaction(act->sa_flags) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++ Address 0x........ is 16 bytes after a block of size 8 alloc'd
++   at 0x........: malloc (vg_replace_malloc.c:...)
++   by 0x........: main (scalar.c:30)
++
++Syscall param rt_sigaction(oldact) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:795)
++ Address 0x........ is 8 bytes after a block of size 8 alloc'd
++   at 0x........: malloc (vg_replace_malloc.c:...)
++   by 0x........: main (scalar.c:30)
++
++-----------------------------------------------------
++135: __NR_rt_sigprocmask 4s 2m
++-----------------------------------------------------
++Syscall param rt_sigprocmask(how) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++
++Syscall param rt_sigprocmask(set) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++
++Syscall param rt_sigprocmask(oldset) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++
++Syscall param rt_sigprocmask(sigsetsize) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++
++Syscall param rt_sigprocmask(set) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param rt_sigprocmask(oldset) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:799)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++136:  __NR_rt_sigpending 2s 1m
++-----------------------------------------------------
++Syscall param rt_sigpending(set) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:803)
++
++Syscall param rt_sigpending(sigsetsize) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:803)
++
++Syscall param rt_sigpending(set) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:803)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++137:__NR_rt_sigtimedwait 4s 3m
++-----------------------------------------------------
++Syscall param rt_sigtimedwait(set) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++
++Syscall param rt_sigtimedwait(info) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++
++Syscall param rt_sigtimedwait(timeout) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++
++Syscall param rt_sigtimedwait(sigsetsize) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++
++Syscall param rt_sigtimedwait(set) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param rt_sigtimedwait(info) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param rt_sigtimedwait(timeout) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:807)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++138:__NR_rt_sigqueueinfo 3s 1m
++-----------------------------------------------------
++Syscall param rt_sigqueueinfo(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:811)
++
++Syscall param rt_sigqueueinfo(sig) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:811)
++
++Syscall param rt_sigqueueinfo(uinfo) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:811)
++
++Syscall param rt_sigqueueinfo(uinfo) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:811)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++133:  __NR_rt_sigsuspend 2s 1m
++-----------------------------------------------------
++Syscall param rt_sigsuspend(mask) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:815)
++
++Syscall param rt_sigsuspend(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:815)
++
++Syscall param rt_sigsuspend(mask) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:815)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 67:        __NR_pread64 5s 1m
++-----------------------------------------------------
++Syscall param pread64(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:819)
++
++Syscall param pread64(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:819)
++
++Syscall param pread64(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:819)
++
++Syscall param pread64(offset) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:819)
++
++Syscall param pread64(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:819)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 68:       __NR_pwrite64 5s 1m
++-----------------------------------------------------
++Syscall param pwrite64(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:823)
++
++Syscall param pwrite64(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:823)
++
++Syscall param pwrite64(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:823)
++
++Syscall param pwrite64(offset) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:823)
++
++Syscall param pwrite64(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:823)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 17:         __NR_getcwd 2s 1m
++-----------------------------------------------------
++Syscall param getcwd(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:831)
++
++Syscall param getcwd(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:831)
++
++Syscall param getcwd(buf) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:831)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 90:         __NR_capget 2s 2m
++-----------------------------------------------------
++Syscall param capget(header) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:835)
++
++Syscall param capget(data) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:835)
++
++Syscall param capget(header) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:835)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param capget(data) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:835)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 91:         __NR_capset 2s 2m
++-----------------------------------------------------
++Syscall param capset(header) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:839)
++
++Syscall param capset(data) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:839)
++
++Syscall param capset(header) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:839)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param capset(data) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:839)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++132:    __NR_sigaltstack 2s 2m
++-----------------------------------------------------
++Syscall param sigaltstack(ss) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:853)
++
++Syscall param sigaltstack(oss) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:853)
++
++Syscall param sigaltstack(ss) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:853)
++ Address 0x........ is on thread 1's stack
++ in frame #1, created by main (scalar.c:28)
++
++Syscall param sigaltstack(oss) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:853)
++ Address 0x........ is on thread 1's stack
++ in frame #1, created by main (scalar.c:28)
++
++-----------------------------------------------------
++ 71:       __NR_sendfile 4s 1m
++-----------------------------------------------------
++Syscall param sendfile(out_fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:858)
++
++Syscall param sendfile(in_fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:858)
++
++Syscall param sendfile(offset) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:858)
++
++Syscall param sendfile(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:858)
++
++Syscall param sendfile(offset) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:858)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 41:     __NR_pivot_root n/a
++-----------------------------------------------------
++-----------------------------------------------------
++232:        __NR_mincore 3s 1m
++-----------------------------------------------------
++Syscall param mincore(start) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:986)
++
++Syscall param mincore(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:986)
++
++Syscall param mincore(vec) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:986)
++
++Syscall param mincore(vec) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:986)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++233:        __NR_madvise 3s 0m
++-----------------------------------------------------
++Syscall param madvise(start) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:990)
++
++Syscall param madvise(length) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:990)
++
++Syscall param madvise(advice) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:990)
++
++-----------------------------------------------------
++ 61:     __NR_getdents64 3s 1m
++-----------------------------------------------------
++Syscall param getdents64(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:994)
++
++Syscall param getdents64(dirp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:994)
++
++Syscall param getdents64(count) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:994)
++
++Syscall param getdents64(dirp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:994)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++222:                 222 ni
++-----------------------------------------------------
++-----------------------------------------------------
++223:                 223 ni
++-----------------------------------------------------
++-----------------------------------------------------
++178:         __NR_gettid n/a
++-----------------------------------------------------
++-----------------------------------------------------
++213:      __NR_readahead n/a
++-----------------------------------------------------
++-----------------------------------------------------
++  5:       __NR_setxattr 5s 3m
++-----------------------------------------------------
++Syscall param setxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++
++Syscall param setxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++
++Syscall param setxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++
++Syscall param setxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++
++Syscall param setxattr(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++
++Syscall param setxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param setxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param setxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1031)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  6:      __NR_lsetxattr 5s 3m
++-----------------------------------------------------
++Syscall param lsetxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++
++Syscall param lsetxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++
++Syscall param lsetxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++
++Syscall param lsetxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++
++Syscall param lsetxattr(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++
++Syscall param lsetxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param lsetxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param lsetxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1035)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  7:      __NR_fsetxattr 5s 2m
++-----------------------------------------------------
++Syscall param fsetxattr(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++
++Syscall param fsetxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++
++Syscall param fsetxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++
++Syscall param fsetxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++
++Syscall param fsetxattr(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++
++Syscall param fsetxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fsetxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1039)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  8:       __NR_getxattr 4s 3m
++-----------------------------------------------------
++Syscall param getxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++
++Syscall param getxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++
++Syscall param getxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++
++Syscall param getxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++
++Syscall param getxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param getxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1043)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  9:      __NR_lgetxattr 4s 3m
++-----------------------------------------------------
++Syscall param lgetxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++
++Syscall param lgetxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++
++Syscall param lgetxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++
++Syscall param lgetxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++
++Syscall param lgetxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param lgetxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param lgetxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1047)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 10:      __NR_fgetxattr 4s 2m
++-----------------------------------------------------
++Syscall param fgetxattr(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++
++Syscall param fgetxattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++
++Syscall param fgetxattr(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++
++Syscall param fgetxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++
++Syscall param fgetxattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param fgetxattr(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1051)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 11:      __NR_listxattr 3s 2m
++-----------------------------------------------------
++Syscall param listxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1055)
++
++Syscall param listxattr(list) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1055)
++
++Syscall param listxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1055)
++
++Syscall param listxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1055)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param listxattr(list) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1055)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 12:     __NR_llistxattr 3s 2m
++-----------------------------------------------------
++Syscall param llistxattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1059)
++
++Syscall param llistxattr(list) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1059)
++
++Syscall param llistxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1059)
++
++Syscall param llistxattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1059)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param llistxattr(list) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1059)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 13:     __NR_flistxattr 3s 1m
++-----------------------------------------------------
++Syscall param flistxattr(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1063)
++
++Syscall param flistxattr(list) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1063)
++
++Syscall param flistxattr(size) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1063)
++
++Syscall param flistxattr(list) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1063)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 14:    __NR_removexattr 2s 2m
++-----------------------------------------------------
++Syscall param removexattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1067)
++
++Syscall param removexattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1067)
++
++Syscall param removexattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1067)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param removexattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1067)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 15:   __NR_lremovexattr 2s 2m
++-----------------------------------------------------
++Syscall param lremovexattr(path) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1071)
++
++Syscall param lremovexattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1071)
++
++Syscall param lremovexattr(path) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1071)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param lremovexattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1071)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 16:   __NR_fremovexattr 2s 1m
++-----------------------------------------------------
++Syscall param fremovexattr(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1075)
++
++Syscall param fremovexattr(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1075)
++
++Syscall param fremovexattr(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1075)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++130:          __NR_tkill n/a
++-----------------------------------------------------
++-----------------------------------------------------
++ 98:          __NR_futex 4s 2m
++-----------------------------------------------------
++Syscall param futex(futex) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++
++Syscall param futex(op) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++
++Syscall param futex(val) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++
++Syscall param futex(utime) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++
++Syscall param futex(futex) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param futex(timeout) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1091)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++122:__NR_sched_setaffinity 3s 1m
++-----------------------------------------------------
++Syscall param sched_setaffinity(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1095)
++
++Syscall param sched_setaffinity(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1095)
++
++Syscall param sched_setaffinity(mask) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1095)
++
++Syscall param sched_setaffinity(mask) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1095)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++123:__NR_sched_getaffinity 3s 1m
++-----------------------------------------------------
++Syscall param sched_getaffinity(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1099)
++
++Syscall param sched_getaffinity(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1099)
++
++Syscall param sched_getaffinity(mask) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1099)
++
++Syscall param sched_getaffinity(mask) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1099)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  0:       __NR_io_setup 2s 1m
++-----------------------------------------------------
++Syscall param io_setup(nr_events) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1111)
++
++Syscall param io_setup(ctxp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1111)
++
++Syscall param io_setup(ctxp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1111)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  1:     __NR_io_destroy 1s 0m
++-----------------------------------------------------
++Syscall param io_destroy(ctx) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1124)
++
++-----------------------------------------------------
++  4:   __NR_io_getevents 5s 2m
++-----------------------------------------------------
++Syscall param io_getevents(ctx_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++
++Syscall param io_getevents(min_nr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++
++Syscall param io_getevents(nr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++
++Syscall param io_getevents(events) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++
++Syscall param io_getevents(timeout) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++
++Syscall param io_getevents(events) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param io_getevents(timeout) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1129)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  2:      __NR_io_submit 3s 1m
++-----------------------------------------------------
++Syscall param io_submit(ctx_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1133)
++
++Syscall param io_submit(nr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1133)
++
++Syscall param io_submit(iocbpp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1133)
++
++Syscall param io_submit(iocbpp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1133)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++  3:      __NR_io_cancel 3s 2m
++-----------------------------------------------------
++Syscall param io_cancel(ctx_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1137)
++
++Syscall param io_cancel(iocb) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1137)
++
++Syscall param io_cancel(result) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1137)
++
++Syscall param io_cancel(iocb) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1137)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param io_cancel(result) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1137)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++223:      __NR_fadvise64 n/a
++-----------------------------------------------------
++-----------------------------------------------------
++251:                 251 ni
++-----------------------------------------------------
++WARNING: unhandled loongarch64-linux syscall: 251
++You may be able to write your own handler.
++Read the file README_MISSING_SYSCALL_OR_IOCTL.
++Nevertheless we consider this a bug.  Please report
++it at http://valgrind.org/support/bug_reports.html.
++-----------------------------------------------------
++ 94:     __NR_exit_group other
++-----------------------------------------------------
++-----------------------------------------------------
++ 18: __NR_lookup_dcookie 4s 1m
++-----------------------------------------------------
++Syscall param lookup_dcookie(cookie) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1153)
++
++Syscall param lookup_dcookie(buf) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1153)
++
++Syscall param lookup_dcookie(len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1153)
++
++-----------------------------------------------------
++ 21:      __NR_epoll_ctl 4s 1m
++-----------------------------------------------------
++Syscall param epoll_ctl(epfd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1161)
++
++Syscall param epoll_ctl(op) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1161)
++
++Syscall param epoll_ctl(fd) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1161)
++
++Syscall param epoll_ctl(event) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1161)
++
++Syscall param epoll_ctl(event) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1161)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++234:__NR_remap_file_pages n/a
++-----------------------------------------------------
++-----------------------------------------------------
++ 96:__NR_set_tid_address 1s 0m
++-----------------------------------------------------
++Syscall param set_tid_address(tidptr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1173)
++
++-----------------------------------------------------
++107:   __NR_timer_create 3s 2m
++-----------------------------------------------------
++Syscall param timer_create(clockid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++
++Syscall param timer_create(evp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++
++Syscall param timer_create(timerid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++
++Syscall param timer_create(evp.sigev_value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param timer_create(evp.sigev_signo) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param timer_create(evp.sigev_notify) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param timer_create(timerid) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1177)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++110:  __NR_timer_settime 4s 2m
++-----------------------------------------------------
++Syscall param timer_settime(timerid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++
++Syscall param timer_settime(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++
++Syscall param timer_settime(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++
++Syscall param timer_settime(ovalue) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++
++Syscall param timer_settime(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param timer_settime(ovalue) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1181)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++108:  __NR_timer_gettime 2s 1m
++-----------------------------------------------------
++Syscall param timer_gettime(timerid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1185)
++
++Syscall param timer_gettime(value) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1185)
++
++Syscall param timer_gettime(value) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1185)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++109:__NR_timer_getoverrun 1s 0m
++-----------------------------------------------------
++Syscall param timer_getoverrun(timerid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1189)
++
++-----------------------------------------------------
++111:   __NR_timer_delete 1s 0m
++-----------------------------------------------------
++Syscall param timer_delete(timerid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1193)
++
++-----------------------------------------------------
++112:  __NR_clock_settime 2s 1m
++-----------------------------------------------------
++Syscall param clock_settime(clk_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1197)
++
++Syscall param clock_settime(tp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1197)
++
++Syscall param clock_settime(tp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1197)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++113:  __NR_clock_gettime 2s 1m
++-----------------------------------------------------
++Syscall param clock_gettime(clk_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1201)
++
++Syscall param clock_gettime(tp) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1201)
++
++Syscall param clock_gettime(tp) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1201)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++114:   __NR_clock_getres 2s 1m
++-----------------------------------------------------
++Syscall param clock_getres(clk_id) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1205)
++
++Syscall param clock_getres(res) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1205)
++
++Syscall param clock_getres(res) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1205)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++115:__NR_clock_nanosleep n/a
++-----------------------------------------------------
++-----------------------------------------------------
++131:         __NR_tgkill n/a
++-----------------------------------------------------
++-----------------------------------------------------
++235:          __NR_mbind n/a
++-----------------------------------------------------
++-----------------------------------------------------
++236:  __NR_get_mempolicy n/a
++-----------------------------------------------------
++-----------------------------------------------------
++237:  __NR_set_mempolicy n/a
++-----------------------------------------------------
++-----------------------------------------------------
++180:        __NR_mq_open 4s 3m
++-----------------------------------------------------
++Syscall param mq_open(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++
++Syscall param mq_open(oflag) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++
++Syscall param mq_open(mode) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++
++Syscall param mq_open(attr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++
++Syscall param mq_open(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_open(attr->mq_maxmsg) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_open(attr->mq_msgsize) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1249)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++181:      __NR_mq_unlink 1s 1m
++-----------------------------------------------------
++Syscall param mq_unlink(name) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1253)
++
++Syscall param mq_unlink(name) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1253)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++182:   __NR_mq_timedsend 5s 2m
++-----------------------------------------------------
++Syscall param mq_timedsend(mqdes) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++
++Syscall param mq_timedsend(msg_ptr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++
++Syscall param mq_timedsend(msg_len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++
++Syscall param mq_timedsend(msg_prio) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++
++Syscall param mq_timedsend(abs_timeout) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++
++Syscall param mq_timedsend(msg_ptr) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_timedsend(abs_timeout) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1257)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++183:__NR_mq_timedreceive 5s 3m
++-----------------------------------------------------
++Syscall param mq_timedreceive(mqdes) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++
++Syscall param mq_timedreceive(msg_ptr) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++
++Syscall param mq_timedreceive(msg_len) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++
++Syscall param mq_timedreceive(msg_prio) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++
++Syscall param mq_timedreceive(abs_timeout) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++
++Syscall param mq_timedreceive(msg_ptr) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_timedreceive(msg_prio) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_timedreceive(abs_timeout) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1261)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++184:      __NR_mq_notify 2s 1m
++-----------------------------------------------------
++Syscall param mq_notify(mqdes) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1265)
++
++Syscall param mq_notify(notification) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1265)
++
++Syscall param mq_notify(notification) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1265)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++185:  __NR_mq_getsetattr 3s 2m
++-----------------------------------------------------
++Syscall param mq_getsetattr(mqdes) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1269)
++
++Syscall param mq_getsetattr(mqstat) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1269)
++
++Syscall param mq_getsetattr(omqstat) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1269)
++
++Syscall param mq_getsetattr(mqstat->mq_flags) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1269)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param mq_getsetattr(omqstat) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1269)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++ 20:  __NR_epoll_create1 1s 0m
++-----------------------------------------------------
++Syscall param epoll_create1(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1277)
++
++-----------------------------------------------------
++270:__NR_process_vm_readv 6s 2m
++-----------------------------------------------------
++Syscall param process_vm_readv(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(lvec) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(liovcnt) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(rvec) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(riovcnt) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++
++Syscall param process_vm_readv(lvec) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param process_vm_readv(rvec) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1281)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++271:__NR_process_vm_writev 6s 2m
++-----------------------------------------------------
++Syscall param process_vm_writev(pid) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(lvec) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(liovcnt) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(rvec) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(riovcnt) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(flags) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++
++Syscall param process_vm_writev(lvec) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++Syscall param process_vm_writev(rvec) points to unaddressable byte(s)
++   ...
++   by 0x........: main (scalar.c:1285)
++ Address 0x........ is not stack'd, malloc'd or (recently) free'd
++
++-----------------------------------------------------
++9999:                9999 1e
++-----------------------------------------------------
++WARNING: unhandled loongarch64-linux syscall: 9999
++You may be able to write your own handler.
++Read the file README_MISSING_SYSCALL_OR_IOCTL.
++Nevertheless we consider this a bug.  Please report
++it at http://valgrind.org/support/bug_reports.html.
++-----------------------------------------------------
++ 93:           __NR_exit 1s 0m
++-----------------------------------------------------
++Syscall param exit(status) contains uninitialised byte(s)
++   ...
++   by 0x........: main (scalar.c:1293)
++
+diff --git a/memcheck/tests/loongarch64-linux/scalar.vgtest b/memcheck/tests/loongarch64-linux/scalar.vgtest
+new file mode 100644
+index 000000000..53e87e8d7
+--- /dev/null
++++ b/memcheck/tests/loongarch64-linux/scalar.vgtest
+@@ -0,0 +1,5 @@
++prog: scalar
++# Do not run under root
++prereq: [ `id -u` -ne 0 ]
++vgopts: -q --error-limit=no
++args: < scalar.c
+diff --git a/memcheck/tests/unit_libcbase.c b/memcheck/tests/unit_libcbase.c
+index 0ce65be26..48036033e 100644
+--- a/memcheck/tests/unit_libcbase.c
++++ b/memcheck/tests/unit_libcbase.c
+@@ -9,14 +9,14 @@
+ #include "pub_tool_vki.h"
+ #include "m_libcbase.c"
+ 
+-/* On PPC, MIPS and ARM64 Linux VKI_PAGE_SIZE is a variable, not a macro. */
++/* On PPC, MIPS, ARM64 and LOONGARCH64 Linux VKI_PAGE_SIZE is a variable, not a macro. */
+ #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
+     || defined(VGP_ppc64le_linux)
+ unsigned long VKI_PAGE_SIZE  = 1UL << 12;
+ #elif defined(VGP_arm64_linux)
+ unsigned long VKI_PAGE_SIZE  = 1UL << 16;
+ #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+-    || defined (VGP_nanomips_linux)
++    || defined (VGP_nanomips_linux) || defined(VGP_loongarch64_linux)
+ #include <unistd.h>
+ unsigned long VKI_PAGE_SIZE;
+ #endif
+diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
+index a09470905..3ed4a2578 100644
+--- a/memcheck/tests/vbit-test/irops.c
++++ b/memcheck/tests/vbit-test/irops.c
+@@ -34,287 +34,297 @@
+    That is not necessary but helpful when supporting a new architecture.
+ */
+ static irop_t irops[] = {
+-  { DEFOP(Iop_Add8,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Add16,   UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Add32,   UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Add64,   UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_Sub8,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Sub16,   UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Sub32,   UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Sub64,   UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_Mul8,    UNDEF_LEFT), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Mul16,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Mul32,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Mul64,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_Or1,     UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Or8,     UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Or16,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Or32,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Or64,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_And1,    UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_And8,    UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_And16,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_And32,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_And64,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Xor8,    UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Xor16,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Xor32,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Xor64,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Shl8,    UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Shl16,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Shl32,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Shl64,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts
+-  { DEFOP(Iop_Shr8,    UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32/64 assert
+-  { DEFOP(Iop_Shr16,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc32/64 assert
+-  { DEFOP(Iop_Shr32,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Shr64,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts
+-  { DEFOP(Iop_Sar8,    UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32/64 assert
+-  { DEFOP(Iop_Sar16,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc32/64 assert
+-  { DEFOP(Iop_Sar32,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Sar64,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts
+-  { DEFOP(Iop_CmpEQ8,  UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpEQ16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpEQ32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpEQ64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_CmpNE8,  UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpNE16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpNE32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpNE64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_Not1,       UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Not8,       UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Not16,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Not32,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_Not64,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpEQ8,  UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpEQ16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpEQ32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpEQ64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
++  { DEFOP(Iop_Add8,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Add16,   UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Add32,   UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Add64,   UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_Sub8,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Sub16,   UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Sub32,   UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Sub64,   UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_Mul8,    UNDEF_LEFT), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Mul16,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Mul32,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Mul64,   UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert
++  { DEFOP(Iop_Or1,     UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Or8,     UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Or16,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Or32,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Or64,    UNDEF_OR),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_And1,    UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_And8,    UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_And16,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_And32,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_And64,   UNDEF_AND),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Xor8,    UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Xor16,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Xor32,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Xor64,   UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Shl8,    UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Shl16,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Shl32,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Shl64,   UNDEF_SHL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_Shr8,    UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32/64 assert
++  { DEFOP(Iop_Shr16,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc32/64 assert
++  { DEFOP(Iop_Shr32,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Shr64,   UNDEF_SHR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_Sar8,    UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32/64 assert
++  { DEFOP(Iop_Sar16,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc32/64 assert
++  { DEFOP(Iop_Sar32,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Sar64,   UNDEF_SAR),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_CmpEQ8,  UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpEQ16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpEQ32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpEQ64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_CmpNE8,  UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpNE16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpNE32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpNE64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_Not1,       UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Not8,       UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Not16,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_Not32,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Not64,      UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_CasCmpEQ8,  UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CasCmpEQ16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CasCmpEQ32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_CasCmpEQ64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
+ 
+-  { DEFOP(Iop_CasCmpNE8,  UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpNE16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpNE32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CasCmpNE64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
++  { DEFOP(Iop_CasCmpNE8,  UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CasCmpNE16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CasCmpNE32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_CasCmpNE64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
+   { DEFOP(Iop_ExpCmpNE8,  UNDEF_UNKNOWN), }, // exact (expensive) equality
+   { DEFOP(Iop_ExpCmpNE16, UNDEF_UNKNOWN), }, // exact (expensive) equality
+   { DEFOP(Iop_ExpCmpNE32, UNDEF_UNKNOWN), }, // exact (expensive) equality
+   { DEFOP(Iop_ExpCmpNE64, UNDEF_UNKNOWN), }, // exact (expensive) equality
+-  { DEFOP(Iop_MullS8,     UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MullS16,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MullS32,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
++  { DEFOP(Iop_MullS8,     UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MullS16,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MullS32,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
+   // s390 has signed multiplication of 64-bit values but the result
+   // is 64-bit (not 128-bit). So we cannot test this op standalone.
+-  { DEFOP(Iop_MullS64,    UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_MullU8,     UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_MullU16,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_MullU32,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_MullU64,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_Clz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32 asserts
+-  { DEFOP(Iop_Clz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+-  { DEFOP(Iop_Ctz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_Ctz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_ClzNat64,   UNDEF_ALL),  .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
+-  { DEFOP(Iop_ClzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_CtzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_CtzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_PopCount64, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_PopCount32, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpLT32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpLT64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
+-  { DEFOP(Iop_CmpLE32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpLE64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
+-  { DEFOP(Iop_CmpLT32U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpLT64U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1}, // ppc32, mips assert
+-  { DEFOP(Iop_CmpLE32U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpLE64U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
++  { DEFOP(Iop_MullS64,    UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_MullU8,     UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MullU16,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MullU32,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_MullU64,    UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_Clz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_Clz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_Ctz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_Ctz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_ClzNat64,   UNDEF_ALL),  .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts
++  { DEFOP(Iop_ClzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CtzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CtzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PopCount64, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PopCount32, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpLT32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpLT64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc, mips assert
++  { DEFOP(Iop_CmpLE32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpLE64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc, mips assert
++  { DEFOP(Iop_CmpLT32U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpLT64U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_CmpLE32U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpLE64U,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts
+   { DEFOP(Iop_CmpNEZ8,    UNDEF_ALL), },   // not supported by mc_translate
+   { DEFOP(Iop_CmpNEZ16,   UNDEF_ALL), },   // not supported by mc_translate
+   { DEFOP(Iop_CmpNEZ32,   UNDEF_ALL), },   // not supported by mc_translate
+   { DEFOP(Iop_CmpNEZ64,   UNDEF_ALL), },   // not supported by mc_translate
+-  { DEFOP(Iop_CmpwNEZ32,  UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpwNEZ64,  UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
++  { DEFOP(Iop_CmpwNEZ32,  UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpwNEZ64,  UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
+   { DEFOP(Iop_Left8,      UNDEF_UNKNOWN), },  // not supported by mc_translate
+   { DEFOP(Iop_Left16,     UNDEF_UNKNOWN), },  // not supported by mc_translate
+   { DEFOP(Iop_Left32,     UNDEF_UNKNOWN), },  // not supported by mc_translate
+   { DEFOP(Iop_Left64,     UNDEF_UNKNOWN), },  // not supported by mc_translate
+   { DEFOP(Iop_Max32U,     UNDEF_UNKNOWN), },  // not supported by mc_translate
+-  { DEFOP(Iop_CmpORD32U,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test
+-  { DEFOP(Iop_CmpORD64U,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test
+-  { DEFOP(Iop_CmpORD32S,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test
+-  { DEFOP(Iop_CmpORD64S,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test
+-  { DEFOP(Iop_DivU32,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivS32,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivU64,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts
+-  { DEFOP(Iop_DivS64,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts
+-  { DEFOP(Iop_DivU64E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts
+-  { DEFOP(Iop_DivS64E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts
+-  { DEFOP(Iop_DivU32E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivS32E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
++  { DEFOP(Iop_CmpORD32U,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test
++  { DEFOP(Iop_CmpORD64U,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test
++  { DEFOP(Iop_CmpORD32S,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test
++  { DEFOP(Iop_CmpORD64S,  UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test
++  { DEFOP(Iop_DivU32,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_DivS32,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_DivU64,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_DivS64,     UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts
++  { DEFOP(Iop_DivU64E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts
++  { DEFOP(Iop_DivS64E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts
++  { DEFOP(Iop_DivU32E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_DivS32E,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
+   // On s390 the DivMod operations always appear in a certain context
+   // So they cannot be tested in isolation on that platform.
+-  { DEFOP(Iop_DivModU64to32,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivModS64to32,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivModU32to32,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_DivModS32to32,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_DivModU128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts
+-  { DEFOP(Iop_DivModS128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts
+-  { DEFOP(Iop_DivModS64to64,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_DivModU64to64,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_8Uto16,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_8Uto32,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_8Uto64,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert
+-  { DEFOP(Iop_16Uto32,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_16Uto64,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert
+-  { DEFOP(Iop_32Uto64,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_8Sto16,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_8Sto32,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_8Sto64,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_16Sto32,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_16Sto64,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_32Sto64,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_64to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_32to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_64to16,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_16to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_16HIto8,   UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_8HLto16,   UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },  // ppc isel
+-  { DEFOP(Iop_32to16,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_32HIto16,  UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_16HLto32,  UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },  // ppc isel
+-  { DEFOP(Iop_64to32,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_64HIto32,  UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_32HLto64,  UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_128to64,   UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_128HIto64, UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_64HLto128, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_32to1,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_64to1,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert
+-  { DEFOP(Iop_1Uto8,     UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_1Uto32,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_1Uto64,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert
+-  { DEFOP(Iop_1Sto8,     UNDEF_ALL),    .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
++  { DEFOP(Iop_DivModU64to32,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_DivModS64to32,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_DivModU32to32,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_DivModS32to32,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_DivModU128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts
++  { DEFOP(Iop_DivModS128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts
++  { DEFOP(Iop_DivModS64to64,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_DivModU64to64,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_8Uto16,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_8Uto32,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_8Uto64,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert
++  { DEFOP(Iop_16Uto32,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_16Uto64,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert
++  { DEFOP(Iop_32Uto64,   UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_8Sto16,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_8Sto32,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_8Sto64,    UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_16Sto32,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_16Sto64,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_32Sto64,   UNDEF_SEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_64to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert
++  { DEFOP(Iop_32to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_64to16,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert
++  { DEFOP(Iop_16to8,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_16HIto8,   UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_8HLto16,   UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },  // ppc isel
++  { DEFOP(Iop_32to16,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_32HIto16,  UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_16HLto32,  UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },  // ppc isel
++  { DEFOP(Iop_64to32,    UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_64HIto32,  UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_32HLto64,  UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_128to64,   UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_128HIto64, UNDEF_UPPER),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_64HLto128, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_32to1,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_64to1,     UNDEF_TRUNC),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert
++  { DEFOP(Iop_1Uto8,     UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_1Uto32,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_1Uto64,    UNDEF_ZEXT),   .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert
++  { DEFOP(Iop_1Sto8,     UNDEF_ALL),    .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 },
+   { DEFOP(Iop_1Sto16,    UNDEF_ALL), }, // not handled by mc_translate
+-  { DEFOP(Iop_1Sto32,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_1Sto64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_AddF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_SubF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_MulF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_DivF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_AddF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_AddF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SubF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_SubF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1,.ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MulF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_DivF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_AddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MulF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_NegF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_AbsF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_NegF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_NegF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_AbsF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_AbsF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SqrtF64,   UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_SqrtF32,   UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_SqrtF16,   UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_CmpF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts
+-  { DEFOP(Iop_CmpF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CmpF128,   UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F64toI16S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F64toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_F64toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F64toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F64toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_I64StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_I64UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // mips asserts
+-  { DEFOP(Iop_I64UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F32toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F32toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_F32toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F32toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_I64StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_F32toF64,  UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_F64toF32,  UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_ReinterpF64asI64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_ReinterpI64asF64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_ReinterpF32asI32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 },
++  { DEFOP(Iop_1Sto32,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_1Sto64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_AddF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_SubF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_MulF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_DivF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_ScaleBF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_AddF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_AddF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SubF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_SubF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1,.ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MulF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_DivF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_ScaleBF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_AddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MulF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_DivF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_NegF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_AbsF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_NegF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_NegF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_AbsF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_AbsF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SqrtF64,   UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_RSqrtF64,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_LogBF64,   UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_SqrtF32,   UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_RSqrtF32,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_LogBF32,   UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_SqrtF16,   UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpF64,    UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_CmpF32,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_CmpF16,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CmpF128,   UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F64toI16S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F64toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_F64toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_F64toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F64toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_I64StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_I64UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts
++  { DEFOP(Iop_I64UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F32toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_F32toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_F32toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F32toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_I64StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_F32toF64,  UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_F64toF32,  UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_ReinterpF64asI64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_ReinterpI64asF64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts
++  { DEFOP(Iop_ReinterpF32asI32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
+   // ppc requires this op to show up in a specific context. So it cannot be
+   // tested standalone on that platform.
+-  { DEFOP(Iop_ReinterpI32asF32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_F64HLtoF128, UNDEF_CONCAT),    .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128HItoF64, UNDEF_UPPER),     .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128LOtoF64, UNDEF_TRUNC), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_AddF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SubF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MulF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_DivF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MAddF128,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MSubF128,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_NegMAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_NegMSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_NegF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_AbsF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SqrtF128,      UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32StoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I64StoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I32UtoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_I64UtoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F32toF128,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F64toF128,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toI32S,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toI64S,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toI32U,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toI64U,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toI128S,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toF64,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_F128toF32,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_RndF128,        UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_TruncF128toI32S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_TruncF128toI32U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_TruncF128toI64U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_TruncF128toI64S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_AtanF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Yl2xF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_Yl2xp1F64,     UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_PRemF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_PRemC3210F64,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_PRem1F64,      UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_PRem1C3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_ScaleF64,      UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_SinF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_CosF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_TanF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_2xm1F64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_RoundF128toInt, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_RoundF64toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_RoundF32toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+-  { DEFOP(Iop_MAddF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_MSubF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_MAddF64,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_MSubF64,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_MAddF64r32,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_MSubF64r32,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_RSqrtEst5GoodF64,      UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
+-  { DEFOP(Iop_RoundF64toF64_NEAREST, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_RoundF64toF64_NegINF,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_RoundF64toF64_PosINF,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_RoundF64toF64_ZERO,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
+-  { DEFOP(Iop_TruncF64asF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts
+-  { DEFOP(Iop_RoundF64toF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 },
++  { DEFOP(Iop_ReinterpI32asF32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_F64HLtoF128, UNDEF_CONCAT),    .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128HItoF64, UNDEF_UPPER),     .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128LOtoF64, UNDEF_TRUNC), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_AddF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SubF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MulF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_DivF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MAddF128,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MSubF128,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_NegMAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_NegMSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_NegF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_AbsF128,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SqrtF128,      UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32StoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I64StoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I32UtoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_I64UtoF128,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F32toF128,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F64toF128,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toI32S,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toI64S,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toI32U,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toI64U,    UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toI128S,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toF64,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_F128toF32,     UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_RndF128,        UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_TruncF128toI32S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_TruncF128toI32U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_TruncF128toI64U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_TruncF128toI64S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_AtanF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Yl2xF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_Yl2xp1F64,     UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PRemF64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PRemC3210F64,  UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PRem1F64,      UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_PRem1C3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_ScaleF64,      UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_SinF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_CosF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_TanF64,        UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_2xm1F64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF128toInt, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF64toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_RoundF32toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MAddF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MSubF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MAddF64,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MSubF64,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MAddF64r32,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_MSubF64r32,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_RSqrtEst5GoodF64,      UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF64toF64_NEAREST, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF64toF64_NegINF,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF64toF64_PosINF,  UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_RoundF64toF64_ZERO,    UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 },
++  { DEFOP(Iop_TruncF64asF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // mips asserts
++  { DEFOP(Iop_RoundF64toF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 },
+   { DEFOP(Iop_RecpExpF64, UNDEF_UNKNOWN), },
+   { DEFOP(Iop_RecpExpF32, UNDEF_UNKNOWN), },
+ 
+   /* --------- Possibly required by IEEE 754-2008. --------- */
+-  { DEFOP(Iop_MaxNumF64, UNDEF_ALL), .arm = 1 },
+-  { DEFOP(Iop_MinNumF64, UNDEF_ALL), .arm = 1 },
+-  { DEFOP(Iop_MaxNumF32, UNDEF_ALL), .arm = 1 },
+-  { DEFOP(Iop_MinNumF32, UNDEF_ALL), .arm = 1 },
++  { DEFOP(Iop_MaxNumF64, UNDEF_ALL), .arm = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MinNumF64, UNDEF_ALL), .arm = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MaxNumAbsF64, UNDEF_ALL), .arm = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_MinNumAbsF64, UNDEF_ALL), .arm = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_MaxNumF32, UNDEF_ALL), .arm = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MinNumF32, UNDEF_ALL), .arm = 1, .loongarch64 = 1 },
++  { DEFOP(Iop_MaxNumAbsF32, UNDEF_ALL), .arm = 0, .loongarch64 = 1 },
++  { DEFOP(Iop_MinNumAbsF32, UNDEF_ALL), .arm = 0, .loongarch64 = 1 },
+ 
+   /* ------------------ 16-bit scalar FP ------------------ */
+   { DEFOP(Iop_F16toF64,  UNDEF_ALL), .arm64 = 1 },
+diff --git a/memcheck/tests/vbit-test/vtest.h b/memcheck/tests/vbit-test/vtest.h
+index c724f4142..fe17f67da 100644
+--- a/memcheck/tests/vbit-test/vtest.h
++++ b/memcheck/tests/vbit-test/vtest.h
+@@ -179,15 +179,16 @@ typedef struct {
+    unsigned    immediate_type;
+ 
+    // Indicate whether IROp can be tested on a particular architecture
+-   unsigned    s390x  : 1;
+-   unsigned    amd64  : 1;
+-   unsigned    ppc32  : 1;
+-   unsigned    ppc64  : 1;
+-   unsigned    arm    : 1;
+-   unsigned    arm64  : 1;
+-   unsigned    x86    : 1;
+-   unsigned    mips32 : 1;
+-   unsigned    mips64 : 1;
++   unsigned    s390x      : 1;
++   unsigned    amd64      : 1;
++   unsigned    ppc32      : 1;
++   unsigned    ppc64      : 1;
++   unsigned    arm        : 1;
++   unsigned    arm64      : 1;
++   unsigned    x86        : 1;
++   unsigned    mips32     : 1;
++   unsigned    mips64     : 1;
++   unsigned    loongarch64: 1;
+ } irop_t;
+ 
+ 
+diff --git a/nightly/conf/loongarch64.conf b/nightly/conf/loongarch64.conf
+new file mode 100644
+index 000000000..8603671db
+--- /dev/null
++++ b/nightly/conf/loongarch64.conf
+@@ -0,0 +1,3 @@
++export ABT_DETAILS=`uname -mrs`
++export ABT_JOBS=4
++export ABT_PERF="--vg=../valgrind-new --vg=../valgrind-old"
+diff --git a/nightly/conf/loongarch64.sendemail b/nightly/conf/loongarch64.sendemail
+new file mode 100644
+index 000000000..dd806040c
+--- /dev/null
++++ b/nightly/conf/loongarch64.sendemail
+@@ -0,0 +1,7 @@
++#!/bin/sh
++
++subject=$1
++body=$2
++file=$3
++
++(cat "$body" "$file") | mail -s "$subject" valgrind-testresults@lists.sourceforge.net -f "Feiyang Chen <chris.chenfeiyang@gmail.com>"
+diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
+index c0dd7c21d..532f46035 100644
+--- a/none/tests/Makefile.am
++++ b/none/tests/Makefile.am
+@@ -35,7 +35,9 @@ endif
+ if VGCONF_ARCHS_INCLUDE_NANOMIPS
+ SUBDIRS += nanomips
+ endif
+-
++if VGCONF_ARCHS_INCLUDE_LOONGARCH64
++SUBDIRS += loongarch64
++endif
+ 
+ # OS-specific tests
+ if VGCONF_OS_IS_LINUX
+@@ -75,8 +77,9 @@ SUBDIRS += x86-freebsd
+ endif
+ 
+ DIST_SUBDIRS = x86 amd64 ppc32 ppc64 arm arm64 s390x mips32 mips64 nanomips \
+-               linux darwin solaris freebsd amd64-linux x86-linux amd64-darwin \
+-               x86-darwin amd64-solaris x86-solaris x86-freebsd scripts .
++               loongarch64 linux darwin solaris freebsd amd64-linux x86-linux \
++					amd64-darwin x86-darwin amd64-solaris x86-solaris x86-freebsd \
++					scripts .
+ 
+ dist_noinst_SCRIPTS = \
+ 	filter_cmdline0 \
+diff --git a/none/tests/allexec_prepare_prereq b/none/tests/allexec_prepare_prereq
+index a541f4299..49c45c7cc 100755
+--- a/none/tests/allexec_prepare_prereq
++++ b/none/tests/allexec_prepare_prereq
+@@ -28,11 +28,12 @@ pair()
+ }
+ 
+ 
+-pair x86                        amd64
+-pair ppc32                      ppc64
+-pair s390x_unexisting_in_32bits s390x
+-pair arm                        arm64
+-pair mips32                     mips64
+-pair nanomips                   nanoMIPS_unexisting_in_64bits
++pair x86                            amd64
++pair ppc32                          ppc64
++pair s390x_unexisting_in_32bits     s390x
++pair arm                            arm64
++pair mips32                         mips64
++pair nanomips                       nanoMIPS_unexisting_in_64bits
++pair loongarch_unexisting_in_32bits loongarch64
+ 
+ exit 0
+diff --git a/none/tests/libvex_test.c b/none/tests/libvex_test.c
+index 5b57a4c2e..3080ce667 100644
+--- a/none/tests/libvex_test.c
++++ b/none/tests/libvex_test.c
+@@ -76,6 +76,8 @@ __attribute__((noinline)) static void get_guest_arch(VexArch    *ga)
+    *ga = VexArchMIPS64;
+ #elif defined(VGA_nanomips)
+    *ga = VexArchNANOMIPS;
++#elif defined(VGA_loongarch64)
++   *ga = VexArchLOONGARCH64;
+ #else
+    missing arch;
+ #endif
+@@ -113,6 +115,7 @@ static VexEndness arch_endness (VexArch va) {
+          else
+             return VexEndnessBE;
+       }
++   case VexArchLOONGARCH64: return VexEndnessLE;
+    default: failure_exit();
+    }
+ }
+@@ -139,6 +142,7 @@ static UInt arch_hwcaps (VexArch va) {
+    case VexArchMIPS64: return VEX_PRID_COMP_MIPS | VEX_MIPS_HOST_FR;
+ #endif
+    case VexArchNANOMIPS: return 0;
++   case VexArchLOONGARCH64: return VEX_HWCAPS_LOONGARCH_ISA_64BIT;
+    default: failure_exit();
+    }
+ }
+@@ -156,6 +160,7 @@ static Bool mode64 (VexArch va) {
+    case VexArchMIPS32: return False;
+    case VexArchMIPS64: return True;
+    case VexArchNANOMIPS: return False;
++   case VexArchLOONGARCH64: return True;
+    default: failure_exit();
+    }
+ }
+@@ -275,7 +280,7 @@ int main(int argc, char **argv)
+    // explicitly via command line arguments.
+    if (multiarch) {
+       VexArch va;
+-      for (va = VexArchX86; va <= VexArchNANOMIPS; va++) {
++      for (va = VexArchX86; va <= VexArchLOONGARCH64; va++) {
+          vta.arch_host = va;
+          vta.archinfo_host.endness = arch_endness (vta.arch_host);
+          vta.archinfo_host.hwcaps = arch_hwcaps (vta.arch_host);
+diff --git a/none/tests/loongarch64/Makefile.am b/none/tests/loongarch64/Makefile.am
+new file mode 100644
+index 000000000..c8e5b5123
+--- /dev/null
++++ b/none/tests/loongarch64/Makefile.am
+@@ -0,0 +1,39 @@
++
++include $(top_srcdir)/Makefile.tool-tests.am
++
++dist_noinst_SCRIPTS = filter_stderr
++
++EXTRA_DIST = \
++	atomic.stdout.exp atomic.stderr.exp atomic.vgtest \
++	branch.stdout.exp branch.stderr.exp branch.vgtest \
++	cpucfg.stdout.exp cpucfg.stderr.exp cpucfg.vgtest \
++	fault.stdout.exp fault.stderr.exp fault.vgtest \
++	fault_fp.stdout.exp fault_fp.stderr.exp fault_fp.vgtest \
++	float.stdout.exp float.stderr.exp float.vgtest \
++	integer.stdout.exp integer.stderr.exp integer.vgtest \
++	llsc.stdout.exp llsc.stderr.exp llsc.vgtest \
++	memory.stdout.exp memory.stderr.exp memory.vgtest \
++	move.stdout.exp move.stderr.exp move.vgtest \
++	pc.stdout.exp pc.stderr.exp pc.vgtest \
++	special.stdout.exp special.stderr.exp special.vgtest
++
++check_PROGRAMS = \
++	allexec \
++	atomic \
++	branch \
++	cpucfg \
++	fault \
++	fault_fp \
++	float \
++	integer \
++	llsc \
++	memory \
++	move \
++	pc \
++	special
++
++AM_CFLAGS    += @FLAG_M64@
++AM_CXXFLAGS  += @FLAG_M64@
++AM_CCASFLAGS += @FLAG_M64@
++
++allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
+diff --git a/none/tests/loongarch64/allexec.c b/none/tests/loongarch64/allexec.c
+new file mode 120000
+index 000000000..6d6a9cf28
+--- /dev/null
++++ b/none/tests/loongarch64/allexec.c
+@@ -0,0 +1 @@
++../allexec.c
+\ No newline at end of file
+diff --git a/none/tests/loongarch64/atomic.c b/none/tests/loongarch64/atomic.c
+new file mode 100644
+index 000000000..916d5f787
+--- /dev/null
++++ b/none/tests/loongarch64/atomic.c
+@@ -0,0 +1,75 @@
++#include <stdio.h>
++
++#define TESTINST_AM(insn, res, val, addr)  \
++   {                                       \
++      __asm__ __volatile__(                \
++         "move $t1, %1        \n\t"        \
++         "move $t2, %2        \n\t"        \
++         insn " $t0, $t1, $t2 \n\t"        \
++         "move %0, $t0        \n\t"        \
++         : "=r" (res)                      \
++         : "r" (val), "r" (addr)           \
++         : "$t0", "$t1", "$t2", "memory"); \
++   }
++
++#define TESTINST_AM_4(insn, v)                      \
++   {                                                \
++      printf(#insn ".w ::\n");                      \
++      TESTINST_AM(#insn ".w", res_i, v, &val_i);    \
++      printf("old: %d new: %d\n", res_i, val_i);    \
++                                                    \
++      printf(#insn "_db.w ::\n");                   \
++      TESTINST_AM(#insn "_db.w", res_i, v, &val_i); \
++      printf("old: %d new: %d\n", res_i, val_i);    \
++                                                    \
++      printf(#insn ".d ::\n");                      \
++      TESTINST_AM(#insn ".d", res_l, v, &val_l);    \
++      printf("old: %ld new: %ld\n", res_l, val_l);  \
++                                                    \
++      printf(#insn "_db.d ::\n");                   \
++      TESTINST_AM(#insn "_db.d", res_l, v, &val_l); \
++      printf("old: %ld new: %ld\n", res_l, val_l);  \
++   }
++
++#define TESTINST_AM_U_4(insn, v)                     \
++   {                                                 \
++      printf(#insn ".wu ::\n");                      \
++      TESTINST_AM(#insn ".wu", res_i, v, &val_i);    \
++      printf("old: %u new: %u\n", res_i, val_i);     \
++                                                     \
++      printf(#insn "_db.wu ::\n");                   \
++      TESTINST_AM(#insn "_db.wu", res_i, v, &val_i); \
++      printf("old: %u new: %u\n", res_i, val_i);     \
++                                                     \
++      printf(#insn ".du ::\n");                      \
++      TESTINST_AM(#insn ".du", res_l, v, &val_l);    \
++      printf("old: %lu new: %lu\n", res_l, val_l);   \
++                                                     \
++      printf(#insn "_db.du ::\n");                   \
++      TESTINST_AM(#insn "_db.du", res_l, v, &val_l); \
++      printf("old: %lu new: %lu\n", res_l, val_l);   \
++   }
++
++void test(void)
++{
++   int res_i;
++   long res_l;
++   int val_i = 1;
++   long val_l = 1;
++
++   TESTINST_AM_4(amswap, 2);
++   TESTINST_AM_4(amadd, 5);
++   TESTINST_AM_4(amand, 3);
++   TESTINST_AM_4(amor, 8);
++   TESTINST_AM_4(amxor, 4);
++   TESTINST_AM_4(ammax, 16);
++   TESTINST_AM_4(ammin, -1);
++   TESTINST_AM_U_4(ammax, 9);
++   TESTINST_AM_U_4(ammin, 6);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/atomic.stderr.exp b/none/tests/loongarch64/atomic.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/atomic.stdout.exp b/none/tests/loongarch64/atomic.stdout.exp
+new file mode 100644
+index 000000000..7eab9ebd0
+--- /dev/null
++++ b/none/tests/loongarch64/atomic.stdout.exp
+@@ -0,0 +1,72 @@
++amswap.w ::
++old: 1 new: 2
++amswap_db.w ::
++old: 2 new: 2
++amswap.d ::
++old: 1 new: 2
++amswap_db.d ::
++old: 2 new: 2
++amadd.w ::
++old: 2 new: 7
++amadd_db.w ::
++old: 7 new: 12
++amadd.d ::
++old: 2 new: 7
++amadd_db.d ::
++old: 7 new: 12
++amand.w ::
++old: 12 new: 0
++amand_db.w ::
++old: 0 new: 0
++amand.d ::
++old: 12 new: 0
++amand_db.d ::
++old: 0 new: 0
++amor.w ::
++old: 0 new: 8
++amor_db.w ::
++old: 8 new: 8
++amor.d ::
++old: 0 new: 8
++amor_db.d ::
++old: 8 new: 8
++amxor.w ::
++old: 8 new: 12
++amxor_db.w ::
++old: 12 new: 8
++amxor.d ::
++old: 8 new: 12
++amxor_db.d ::
++old: 12 new: 8
++ammax.w ::
++old: 8 new: 16
++ammax_db.w ::
++old: 16 new: 16
++ammax.d ::
++old: 8 new: 16
++ammax_db.d ::
++old: 16 new: 16
++ammin.w ::
++old: 16 new: -1
++ammin_db.w ::
++old: -1 new: -1
++ammin.d ::
++old: 16 new: -1
++ammin_db.d ::
++old: -1 new: -1
++ammax.wu ::
++old: 4294967295 new: 4294967295
++ammax_db.wu ::
++old: 4294967295 new: 4294967295
++ammax.du ::
++old: 18446744073709551615 new: 18446744073709551615
++ammax_db.du ::
++old: 18446744073709551615 new: 18446744073709551615
++ammin.wu ::
++old: 4294967295 new: 6
++ammin_db.wu ::
++old: 6 new: 6
++ammin.du ::
++old: 18446744073709551615 new: 6
++ammin_db.du ::
++old: 6 new: 6
+diff --git a/none/tests/loongarch64/atomic.vgtest b/none/tests/loongarch64/atomic.vgtest
+new file mode 100644
+index 000000000..8fe5ce5f3
+--- /dev/null
++++ b/none/tests/loongarch64/atomic.vgtest
+@@ -0,0 +1,3 @@
++prereq: ../../../tests/loongarch64_features lam
++prog: atomic
++vgopts: -q
+diff --git a/none/tests/loongarch64/branch.c b/none/tests/loongarch64/branch.c
+new file mode 100644
+index 000000000..e702d3fa1
+--- /dev/null
++++ b/none/tests/loongarch64/branch.c
+@@ -0,0 +1,148 @@
++#include <stdio.h>
++
++#define TESTINST_B_RR(insn, val1, val2)       \
++   {                                          \
++      int res;                                \
++      unsigned long v1 = (unsigned long)val1; \
++      unsigned long v2 = (unsigned long)val2; \
++      __asm__ __volatile__(                   \
++             insn " %1, %2, 1f   \n\t"        \
++         "   move %0, $zero      \n\t"        \
++         "   b 2f                \n\t"        \
++         "1:                     \n\t"        \
++         "   addi.w %0, $zero, 1 \n\t"        \
++         "2:                     \n\t"        \
++         : "=r" (res)                         \
++         : "r" (v1), "r" (v2)                 \
++         : "memory");                         \
++      printf("%s::\n", insn);                 \
++      printf("input: %#lx %#lx\n", v1, v2);   \
++      printf("output: %d\n", res);            \
++   }
++
++#define TESTINST_B_R(insn, val)             \
++   {                                        \
++      int res;                              \
++      unsigned long v = (unsigned long)val; \
++      __asm__ __volatile__(                 \
++             insn " %1, 1f       \n\t"      \
++         "   move %0, $zero      \n\t"      \
++         "   b 2f                \n\t"      \
++         "1:                     \n\t"      \
++         "   addi.w %0, $zero, 1 \n\t"      \
++         "2:                     \n\t"      \
++         : "=r" (res)                       \
++         : "r" (v)                          \
++         : "memory");                       \
++      printf("%s::\n", insn);               \
++      printf("input: %#lx\n", v);           \
++      printf("output: %d\n", res);          \
++   }
++
++#define TESTINST_B_C(insn, val)             \
++   {                                        \
++      int res;                              \
++      unsigned long v = (unsigned long)val; \
++      __asm__ __volatile__(                 \
++         "   movgr2cf $fcc0, %1  \n\t"      \
++             insn " $fcc0, 1f    \n\t"      \
++         "   move %0, $zero      \n\t"      \
++         "   b 2f                \n\t"      \
++         "1:                     \n\t"      \
++         "   addi.w %0, $zero, 1 \n\t"      \
++         "2:                     \n\t"      \
++         : "=r" (res)                       \
++         : "r" (v)                          \
++         : "$fcc0", "memory");              \
++      printf("%s::\n", insn);               \
++      printf("input: %#lx\n", v);           \
++      printf("output: %d\n", res);          \
++   }
++
++#define TESTINST_BL()               \
++   {                                \
++      int res;                      \
++      __asm__ __volatile__(         \
++         "   move %0, $zero   \n\t" \
++         "   bl 1f            \n\t" \
++         "   addi.w %0, %0, 1 \n\t" \
++         "   b  2f            \n\t" \
++         "1:                  \n\t" \
++         "   addi.w %0, %0, 1 \n\t" \
++         "   jr $ra \n\t"           \
++         "2:        \n\t"           \
++         : "=r" (res)               \
++         :                          \
++         : "$ra", "memory");        \
++      printf("bl::\n");             \
++      printf("res: %d\n", res);     \
++   }
++
++#define TESTINST_JIRL(insn)                \
++   {                                       \
++      unsigned long addr1, addr2;          \
++      __asm__ __volatile__(                \
++         "   pcaddi $t0, 2   \n\t"         \
++         "   jirl %0, $t0, 0 \n\t"         \
++         "   pcaddi %1, 0    \n\t"         \
++         : "=r" (addr1), "=r" (addr2)      \
++         :                                 \
++         : "$t0", "memory");               \
++      printf("jirl::\n");                  \
++      printf("res: %d\n", addr1 == addr2); \
++   }
++
++void test(void)
++{
++   /* ---------------- beq rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("beq", 1, 2);
++   TESTINST_B_RR("beq", 1, 1);
++
++   /* ---------------- bne rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("bne", 1, 2);
++   TESTINST_B_RR("bne", 1, 1);
++
++   /* ---------------- blt rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("blt", 1, 2);
++   TESTINST_B_RR("blt", 1, 0);
++
++   /* ---------------- bge rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("bge", 1, 2);
++   TESTINST_B_RR("bge", 0, 0);
++
++   /* ---------------- bltu rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("bltu", -1, 2);
++   TESTINST_B_RR("bltu", 0, 1);
++
++   /* ---------------- bgeu rj, rd, offs16 ---------------- */
++   TESTINST_B_RR("bgeu", -1, 2);
++   TESTINST_B_RR("bgeu", 0, 1);
++
++   /* ---------------- beqz rj, offs21 ---------------- */
++   TESTINST_B_R("beqz", 0);
++   TESTINST_B_R("beqz", -1);
++
++   /* ---------------- bnez rj, offs21 ---------------- */
++   TESTINST_B_R("bnez", 0);
++   TESTINST_B_R("bnez", -1);
++
++   /* ---------------- bceqz cj, offs21 ---------------- */
++   TESTINST_B_C("bceqz", 0);
++   TESTINST_B_C("bceqz", 1);
++
++   /* ---------------- bcnez cj, offs21 ---------------- */
++   TESTINST_B_C("bcnez", 0);
++   TESTINST_B_C("bcnez", 1);
++
++   /* ---------------- bl offs26 ---------------- */
++   TESTINST_BL();
++
++   /* ---------------- jirl rd, rj, offs16 ---------------- */
++   TESTINST_JIRL();
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/branch.stderr.exp b/none/tests/loongarch64/branch.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/branch.stdout.exp b/none/tests/loongarch64/branch.stdout.exp
+new file mode 100644
+index 000000000..0a32d6ad7
+--- /dev/null
++++ b/none/tests/loongarch64/branch.stdout.exp
+@@ -0,0 +1,64 @@
++beq::
++input: 0x1 0x2
++output: 0
++beq::
++input: 0x1 0x1
++output: 1
++bne::
++input: 0x1 0x2
++output: 1
++bne::
++input: 0x1 0x1
++output: 0
++blt::
++input: 0x1 0x2
++output: 1
++blt::
++input: 0x1 0
++output: 0
++bge::
++input: 0x1 0x2
++output: 0
++bge::
++input: 0 0
++output: 1
++bltu::
++input: 0xffffffffffffffff 0x2
++output: 0
++bltu::
++input: 0 0x1
++output: 1
++bgeu::
++input: 0xffffffffffffffff 0x2
++output: 1
++bgeu::
++input: 0 0x1
++output: 0
++beqz::
++input: 0
++output: 1
++beqz::
++input: 0xffffffffffffffff
++output: 0
++bnez::
++input: 0
++output: 0
++bnez::
++input: 0xffffffffffffffff
++output: 1
++bceqz::
++input: 0
++output: 1
++bceqz::
++input: 0x1
++output: 0
++bcnez::
++input: 0
++output: 0
++bcnez::
++input: 0x1
++output: 1
++bl::
++res: 2
++jirl::
++res: 1
+diff --git a/none/tests/loongarch64/branch.vgtest b/none/tests/loongarch64/branch.vgtest
+new file mode 100644
+index 000000000..535c05590
+--- /dev/null
++++ b/none/tests/loongarch64/branch.vgtest
+@@ -0,0 +1,2 @@
++prog: branch
++vgopts: -q
+diff --git a/none/tests/loongarch64/cpucfg.c b/none/tests/loongarch64/cpucfg.c
+new file mode 100644
+index 000000000..f5d0570eb
+--- /dev/null
++++ b/none/tests/loongarch64/cpucfg.c
+@@ -0,0 +1,24 @@
++#include <stdio.h>
++
++void test(int reg)
++{
++    int res;
++    __asm__ __volatile__(
++        "cpucfg %0, %1 \n\t"
++        : "=r" (res)
++        : "r" (reg)
++        : "memory");
++    printf("cpucfg ::\n");
++    printf("input: %x\n", (unsigned)reg);
++    printf("output: %x\n", (unsigned)res);
++}
++
++int main(void)
++{
++    int i;
++
++    for (i = 0; i < 24; i++)
++        test(i);
++
++    return 0;
++}
+diff --git a/none/tests/loongarch64/cpucfg.stderr.exp b/none/tests/loongarch64/cpucfg.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/cpucfg.stdout.exp b/none/tests/loongarch64/cpucfg.stdout.exp
+new file mode 100644
+index 000000000..49e0ba7b1
+--- /dev/null
++++ b/none/tests/loongarch64/cpucfg.stdout.exp
+@@ -0,0 +1,72 @@
++cpucfg ::
++input: 0
++output: 14c010
++cpucfg ::
++input: 1
++output: 3f2f2fe
++cpucfg ::
++input: 2
++output: 7ccfc7
++cpucfg ::
++input: 3
++output: fcff
++cpucfg ::
++input: 4
++output: 5f5e100
++cpucfg ::
++input: 5
++output: 10001
++cpucfg ::
++input: 6
++output: 7f33
++cpucfg ::
++input: 7
++output: 0
++cpucfg ::
++input: 8
++output: 0
++cpucfg ::
++input: 9
++output: 0
++cpucfg ::
++input: a
++output: 0
++cpucfg ::
++input: b
++output: 0
++cpucfg ::
++input: c
++output: 0
++cpucfg ::
++input: d
++output: 0
++cpucfg ::
++input: e
++output: 0
++cpucfg ::
++input: f
++output: 0
++cpucfg ::
++input: 10
++output: 2c3d
++cpucfg ::
++input: 11
++output: 6080003
++cpucfg ::
++input: 12
++output: 6080003
++cpucfg ::
++input: 13
++output: 608000f
++cpucfg ::
++input: 14
++output: 60e000f
++cpucfg ::
++input: 15
++output: 0
++cpucfg ::
++input: 16
++output: 0
++cpucfg ::
++input: 17
++output: 0
+diff --git a/none/tests/loongarch64/cpucfg.vgtest b/none/tests/loongarch64/cpucfg.vgtest
+new file mode 100644
+index 000000000..fea964445
+--- /dev/null
++++ b/none/tests/loongarch64/cpucfg.vgtest
+@@ -0,0 +1,3 @@
++prereq: ../../../tests/loongarch64_features cpucfg
++prog: cpucfg
++vgopts: -q
+diff --git a/none/tests/loongarch64/fault.c b/none/tests/loongarch64/fault.c
+new file mode 100644
+index 000000000..294176857
+--- /dev/null
++++ b/none/tests/loongarch64/fault.c
+@@ -0,0 +1,234 @@
++#include <stdio.h>
++#include <stdbool.h>
++#include <signal.h>
++#include <setjmp.h>
++#include <unistd.h>
++
++#define NUM 24
++
++unsigned long mem[NUM] = {
++   0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc,
++   0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0,
++   0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20,
++   0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300,
++   0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003,
++   0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b,
++   0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e,
++   0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a
++};
++
++long val1 = 0;
++long val2 = 0xfdecba9087654321UL;
++char *p = (char *)mem;
++
++#define TESTINST_LOAD_RRR(n, insn, addr1, addr2) \
++   void test ## n (void)                         \
++   {                                             \
++      printf("test %d\n", n);                    \
++      printf("%s ::\n", insn);                   \
++      __asm__ __volatile__(                      \
++         insn " %0, %1, %2 \n\t"                 \
++         : "=r" (val1)                           \
++         : "r" (addr1), "r" (addr2)              \
++         : "memory");                            \
++      printf("output: %ld\n", val1);             \
++   }
++
++#define TESTINST_STORE_RRR(n, insn, addr1, addr2) \
++   void test ## n (void)                          \
++   {                                              \
++      printf("test %d\n", n);                     \
++      printf("%s ::\n", insn);                    \
++      printf("input: %ld\n", val2);               \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, %2 \n\t"                  \
++         :                                        \
++         : "r" (val2), "r" (addr1), "r" (addr2)   \
++         : "memory");                             \
++   }
++
++#define TESTINST_RR(n, insn, v1, v2)                  \
++   void test ## n (void)                              \
++   {                                                  \
++      printf("test %d\n", n);                         \
++      printf("%s ::\n", insn);                        \
++      printf("input: %ld %ld\n", (long)v1, (long)v2); \
++      __asm__ __volatile__(                           \
++         insn " %0, %1 \n\t"                          \
++         :                                            \
++         : "r" (v1), "r" (v2)                         \
++         : "memory");                                 \
++   }
++
++#define TESTINST_I(n, insn, imm)   \
++   void test ## n (void)           \
++   {                               \
++      printf("test %d\n", n);      \
++      printf("%s ::\n", insn);     \
++      printf("input: %d\n", imm);  \
++      __asm__ __volatile__(        \
++         insn " " #imm  " \n\t"    \
++         :                         \
++         :                         \
++         : "memory");              \
++   }
++
++static sigjmp_buf escape;
++
++static void handler(int sig, siginfo_t *si, void *uc)
++{
++   fprintf(stderr, "signal: %d\n", sig);
++   fprintf(stderr, "code: %d\n", si->si_code);
++   siglongjmp(escape, 1);
++}
++
++static inline void show(void)
++{
++   int i;
++   printf("memory block:\n");
++   for (i = 0; i < NUM; i++)
++      printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]);
++}
++
++TESTINST_LOAD_RRR(1,  "ldgt.b", &p[0],  &p[64]);
++TESTINST_LOAD_RRR(2,  "ldgt.b", &p[1],  &p[0] );
++TESTINST_LOAD_RRR(3,  "ldgt.h", &p[1],  &p[0] );
++TESTINST_LOAD_RRR(4,  "ldgt.h", &p[2],  &p[64]);
++TESTINST_LOAD_RRR(5,  "ldgt.h", &p[4],  &p[0] );
++TESTINST_LOAD_RRR(6,  "ldgt.w", &p[2],  &p[0] );
++TESTINST_LOAD_RRR(7,  "ldgt.w", &p[8],  &p[64]);
++TESTINST_LOAD_RRR(8,  "ldgt.w", &p[12], &p[0] );
++TESTINST_LOAD_RRR(9,  "ldgt.d", &p[4],  &p[0] );
++TESTINST_LOAD_RRR(10, "ldgt.d", &p[16], &p[64]);
++TESTINST_LOAD_RRR(11, "ldgt.d", &p[32], &p[0] );
++
++TESTINST_LOAD_RRR(12, "ldle.b", &p[64], &p[0] );
++TESTINST_LOAD_RRR(13, "ldle.b", &p[65], &p[96]);
++TESTINST_LOAD_RRR(14, "ldle.h", &p[65], &p[0] );
++TESTINST_LOAD_RRR(15, "ldle.h", &p[66], &p[0] );
++TESTINST_LOAD_RRR(16, "ldle.h", &p[68], &p[96]);
++TESTINST_LOAD_RRR(17, "ldle.w", &p[66], &p[0] );
++TESTINST_LOAD_RRR(18, "ldle.w", &p[72], &p[0] );
++TESTINST_LOAD_RRR(19, "ldle.w", &p[76], &p[96]);
++TESTINST_LOAD_RRR(20, "ldle.d", &p[68], &p[0] );
++TESTINST_LOAD_RRR(21, "ldle.d", &p[80], &p[0] );
++TESTINST_LOAD_RRR(22, "ldle.d", &p[88], &p[96]);
++
++TESTINST_STORE_RRR(23, "ldgt.b", &p[0],  &p[64]);
++TESTINST_STORE_RRR(24, "ldgt.b", &p[1],  &p[0] );
++TESTINST_STORE_RRR(25, "ldgt.h", &p[1],  &p[0] );
++TESTINST_STORE_RRR(26, "ldgt.h", &p[2],  &p[64]);
++TESTINST_STORE_RRR(27, "ldgt.h", &p[4],  &p[0] );
++TESTINST_STORE_RRR(28, "ldgt.w", &p[2],  &p[0] );
++TESTINST_STORE_RRR(29, "ldgt.w", &p[8],  &p[64]);
++TESTINST_STORE_RRR(30, "ldgt.w", &p[12], &p[0] );
++TESTINST_STORE_RRR(31, "ldgt.d", &p[4],  &p[0] );
++TESTINST_STORE_RRR(32, "ldgt.d", &p[16], &p[64]);
++TESTINST_STORE_RRR(33, "ldgt.d", &p[32], &p[0] );
++
++TESTINST_STORE_RRR(34, "ldle.b", &p[64], &p[0] );
++TESTINST_STORE_RRR(35, "ldle.b", &p[65], &p[96]);
++TESTINST_STORE_RRR(36, "ldle.h", &p[65], &p[0] );
++TESTINST_STORE_RRR(37, "ldle.h", &p[66], &p[0] );
++TESTINST_STORE_RRR(38, "ldle.h", &p[68], &p[96]);
++TESTINST_STORE_RRR(39, "ldle.w", &p[66], &p[0] );
++TESTINST_STORE_RRR(40, "ldle.w", &p[72], &p[0] );
++TESTINST_STORE_RRR(41, "ldle.w", &p[76], &p[96]);
++TESTINST_STORE_RRR(42, "ldle.d", &p[68], &p[0] );
++TESTINST_STORE_RRR(43, "ldle.d", &p[80], &p[0] );
++TESTINST_STORE_RRR(44, "ldle.d", &p[88], &p[96]);
++
++TESTINST_RR(45, "asrtle.d", 123, 456);
++TESTINST_RR(46, "asrtle.d", 789, 0);
++TESTINST_RR(47, "asrtgt.d", 123, 456);
++TESTINST_RR(48, "asrtgt.d", 789, 0);
++
++TESTINST_I(49, "break", 0);
++TESTINST_I(50, "break", 6);
++TESTINST_I(51, "break", 7);
++TESTINST_I(52, "break", 100);
++
++struct test {
++   void (*func)(void);
++   bool show;
++} tests[] = {
++   { test1,  false },
++   { test2,  false },
++   { test3,  false },
++   { test4,  false },
++   { test5,  false },
++   { test6,  false },
++   { test7,  false },
++   { test8,  false },
++   { test9,  false },
++   { test10, false },
++   { test11, true  },
++   { test12, false },
++   { test13, false },
++   { test14, false },
++   { test15, false },
++   { test16, false },
++   { test17, false },
++   { test18, false },
++   { test19, false },
++   { test20, false },
++   { test21, false },
++   { test22, true  },
++   { test23, false },
++   { test24, false },
++   { test25, false },
++   { test26, false },
++   { test27, false },
++   { test28, false },
++   { test29, false },
++   { test30, false },
++   { test31, false },
++   { test32, false },
++   { test33, true  },
++   { test34, false },
++   { test35, false },
++   { test36, false },
++   { test37, false },
++   { test38, false },
++   { test39, false },
++   { test40, false },
++   { test41, false },
++   { test42, false },
++   { test43, false },
++   { test44, true  },
++   { test45, false },
++   { test46, false },
++   { test47, false },
++   { test48, false },
++   { test49, false },
++   { test50, false },
++   { test51, false },
++   { test52, false }
++};
++
++int main(void)
++{
++   int i;
++   struct sigaction sa;
++   int sigs[] = { SIGSYS, SIGBUS, SIGFPE, SIGTRAP };
++
++   sa.sa_sigaction = handler;
++   sa.sa_flags = SA_SIGINFO;
++   sigfillset(&sa.sa_mask);
++
++   for(i = 0; i < sizeof(sigs) / sizeof(sigs[0]); i++)
++      sigaction(sigs[i], &sa, NULL);
++
++   show();
++   for(i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
++      if (sigsetjmp(escape, 1) == 0) {
++         fprintf(stderr, "test %d\n", i + 1);
++         tests[i].func();
++         if (tests[i].show)
++            show();
++         fprintf(stderr, "no fault\n");
++      }
++   }
++
++   return 0;
++}
+diff --git a/none/tests/loongarch64/fault.stderr.exp b/none/tests/loongarch64/fault.stderr.exp
+new file mode 100644
+index 000000000..3f566684c
+--- /dev/null
++++ b/none/tests/loongarch64/fault.stderr.exp
+@@ -0,0 +1,138 @@
++test 1
++signal: 31
++code: 128
++test 2
++no fault
++test 3
++signal: 7
++code: 1
++test 4
++signal: 31
++code: 128
++test 5
++no fault
++test 6
++signal: 7
++code: 1
++test 7
++signal: 31
++code: 128
++test 8
++no fault
++test 9
++signal: 7
++code: 1
++test 10
++signal: 31
++code: 128
++test 11
++no fault
++test 12
++signal: 31
++code: 128
++test 13
++no fault
++test 14
++signal: 7
++code: 1
++test 15
++signal: 31
++code: 128
++test 16
++no fault
++test 17
++signal: 7
++code: 1
++test 18
++signal: 31
++code: 128
++test 19
++no fault
++test 20
++signal: 7
++code: 1
++test 21
++signal: 31
++code: 128
++test 22
++no fault
++test 23
++signal: 31
++code: 128
++test 24
++no fault
++test 25
++signal: 7
++code: 1
++test 26
++signal: 31
++code: 128
++test 27
++no fault
++test 28
++signal: 7
++code: 1
++test 29
++signal: 31
++code: 128
++test 30
++no fault
++test 31
++signal: 7
++code: 1
++test 32
++signal: 31
++code: 128
++test 33
++no fault
++test 34
++signal: 31
++code: 128
++test 35
++no fault
++test 36
++signal: 7
++code: 1
++test 37
++signal: 31
++code: 128
++test 38
++no fault
++test 39
++signal: 7
++code: 1
++test 40
++signal: 31
++code: 128
++test 41
++no fault
++test 42
++signal: 7
++code: 1
++test 43
++signal: 31
++code: 128
++test 44
++no fault
++test 45
++no fault
++test 46
++signal: 31
++code: 128
++test 47
++signal: 31
++code: 128
++test 48
++no fault
++test 49
++signal: 5
++code: 1
++test 50
++signal: 8
++code: 2
++test 51
++signal: 8
++code: 1
++test 52
++signal: 5
++code: 1
+diff --git a/none/tests/loongarch64/fault.stdout.exp b/none/tests/loongarch64/fault.stdout.exp
+new file mode 100644
+index 000000000..d2e342df8
+--- /dev/null
++++ b/none/tests/loongarch64/fault.stdout.exp
+@@ -0,0 +1,267 @@
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 1
++ldgt.b ::
++test 2
++ldgt.b ::
++output: -26
++test 3
++ldgt.h ::
++test 4
++ldgt.h ::
++test 5
++ldgt.h ::
++output: 7711
++test 6
++ldgt.w ::
++test 7
++ldgt.w ::
++test 8
++ldgt.w ::
++output: 0
++test 9
++ldgt.d ::
++test 10
++ldgt.d ::
++test 11
++ldgt.d ::
++output: 2535295895347421136
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 12
++ldle.b ::
++test 13
++ldle.b ::
++output: -33
++test 14
++ldle.h ::
++test 15
++ldle.h ::
++test 16
++ldle.h ::
++output: 16190
++test 17
++ldle.w ::
++test 18
++ldle.w ::
++test 19
++ldle.w ::
++output: 1043676476
++test 20
++ldle.d ::
++test 21
++ldle.d ::
++test 22
++ldle.d ::
++output: 4266944292251042560
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 23
++ldgt.b ::
++input: -149539557700451551
++test 24
++ldgt.b ::
++input: -149539557700451551
++test 25
++ldgt.h ::
++input: -149539557700451551
++test 26
++ldgt.h ::
++input: -149539557700451551
++test 27
++ldgt.h ::
++input: -149539557700451551
++test 28
++ldgt.w ::
++input: -149539557700451551
++test 29
++ldgt.w ::
++input: -149539557700451551
++test 30
++ldgt.w ::
++input: -149539557700451551
++test 31
++ldgt.d ::
++input: -149539557700451551
++test 32
++ldgt.d ::
++input: -149539557700451551
++test 33
++ldgt.d ::
++input: -149539557700451551
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 34
++ldle.b ::
++input: -149539557700451551
++test 35
++ldle.b ::
++input: -149539557700451551
++test 36
++ldle.h ::
++input: -149539557700451551
++test 37
++ldle.h ::
++input: -149539557700451551
++test 38
++ldle.h ::
++input: -149539557700451551
++test 39
++ldle.w ::
++input: -149539557700451551
++test 40
++ldle.w ::
++input: -149539557700451551
++test 41
++ldle.w ::
++input: -149539557700451551
++test 42
++ldle.d ::
++input: -149539557700451551
++test 43
++ldle.d ::
++input: -149539557700451551
++test 44
++ldle.d ::
++input: -149539557700451551
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 45
++asrtle.d ::
++input: 123 456
++test 46
++asrtle.d ::
++input: 789 0
++test 47
++asrtgt.d ::
++input: 123 456
++test 48
++asrtgt.d ::
++input: 789 0
++test 49
++break ::
++input: 0
++test 50
++break ::
++input: 6
++test 51
++break ::
++input: 7
++test 52
++break ::
++input: 100
+diff --git a/none/tests/loongarch64/fault.vgtest b/none/tests/loongarch64/fault.vgtest
+new file mode 100644
+index 000000000..24bf21afe
+--- /dev/null
++++ b/none/tests/loongarch64/fault.vgtest
+@@ -0,0 +1,2 @@
++prog: fault
++vgopts: -q
+diff --git a/none/tests/loongarch64/fault_fp.c b/none/tests/loongarch64/fault_fp.c
+new file mode 100644
+index 000000000..0d5862dcc
+--- /dev/null
++++ b/none/tests/loongarch64/fault_fp.c
+@@ -0,0 +1,163 @@
++#include <stdio.h>
++#include <stdbool.h>
++#include <signal.h>
++#include <setjmp.h>
++#include <unistd.h>
++
++#define NUM 24
++
++unsigned long mem[NUM] = {
++   0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc,
++   0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0,
++   0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20,
++   0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300,
++   0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003,
++   0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b,
++   0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e,
++   0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a
++};
++
++long val1 = 0;
++long val2 = 0xfdecba9087654321UL;
++char *p = (char *)mem;
++
++#define TESTINST_LOAD_FRR_S(n, insn, addr1, addr2) \
++   void test ## n (void)                           \
++   {                                               \
++      printf("test %d\n", n);                      \
++      printf("%s ::\n", insn);                     \
++      __asm__ __volatile__(                        \
++         insn " %0, %1, %2 \n\t"                   \
++         : "=f" (val1)                             \
++         : "r" (addr1), "r" (addr2)                \
++         : "memory");                              \
++      printf("output: %d\n", (int)val1);           \
++   }
++
++#define TESTINST_LOAD_FRR_D(n, insn, addr1, addr2) \
++   void test ## n (void)                           \
++   {                                               \
++      printf("test %d\n", n);                      \
++      printf("%s ::\n", insn);                     \
++      __asm__ __volatile__(                        \
++         insn " %0, %1, %2 \n\t"                   \
++         : "=f" (val1)                             \
++         : "r" (addr1), "r" (addr2)                \
++         : "memory");                              \
++      printf("output: %ld\n", val1);               \
++   }
++
++#define TESTINST_STORE_FRR(n, insn, addr1, addr2) \
++   void test ## n (void)                          \
++   {                                              \
++      printf("test %d\n", n);                     \
++      printf("%s ::\n", insn);                    \
++      printf("input: %ld\n", val2);               \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, %2 \n\t"                  \
++         :                                        \
++         : "f" (val2), "r" (addr1), "r" (addr2)   \
++         : "memory");                             \
++   }
++
++static sigjmp_buf escape;
++
++static void handler(int sig, siginfo_t *si, void *uc)
++{
++   fprintf(stderr, "signal: %d\n", sig);
++   fprintf(stderr, "code: %d\n", si->si_code);
++   siglongjmp(escape, 1);
++}
++
++static inline void show(void)
++{
++   int i;
++   printf("memory block:\n");
++   for (i = 0; i < NUM; i++)
++      printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]);
++}
++
++TESTINST_LOAD_FRR_S(1,  "fldgt.s", &p[2],  &p[0] );
++TESTINST_LOAD_FRR_S(2,  "fldgt.s", &p[8],  &p[64]);
++TESTINST_LOAD_FRR_S(3,  "fldgt.s", &p[12], &p[0] );
++TESTINST_LOAD_FRR_D(4,  "fldgt.d", &p[4],  &p[0] );
++TESTINST_LOAD_FRR_D(5,  "fldgt.d", &p[16], &p[64]);
++TESTINST_LOAD_FRR_D(6,  "fldgt.d", &p[32], &p[0] );
++
++TESTINST_LOAD_FRR_S(7,  "fldle.s", &p[66], &p[0] );
++TESTINST_LOAD_FRR_S(8,  "fldle.s", &p[72], &p[0] );
++TESTINST_LOAD_FRR_S(9,  "fldle.s", &p[76], &p[96]);
++TESTINST_LOAD_FRR_D(10, "fldle.d", &p[68], &p[0] );
++TESTINST_LOAD_FRR_D(11, "fldle.d", &p[80], &p[0] );
++TESTINST_LOAD_FRR_D(12, "fldle.d", &p[88], &p[96]);
++
++TESTINST_STORE_FRR(13, "fstgt.s", &p[2],  &p[0] );
++TESTINST_STORE_FRR(14, "fstgt.s", &p[8],  &p[64]);
++TESTINST_STORE_FRR(15, "fstgt.s", &p[12], &p[0] );
++TESTINST_STORE_FRR(16, "fstgt.d", &p[4],  &p[0] );
++TESTINST_STORE_FRR(17, "fstgt.d", &p[16], &p[64]);
++TESTINST_STORE_FRR(18, "fstgt.d", &p[32], &p[0] );
++
++TESTINST_STORE_FRR(19, "fstle.s", &p[66], &p[0] );
++TESTINST_STORE_FRR(20, "fstle.s", &p[72], &p[0] );
++TESTINST_STORE_FRR(21, "fstle.s", &p[76], &p[96]);
++TESTINST_STORE_FRR(22, "fstle.d", &p[68], &p[0] );
++TESTINST_STORE_FRR(23, "fstle.d", &p[80], &p[0] );
++TESTINST_STORE_FRR(24, "fstle.d", &p[88], &p[96]);
++
++struct test {
++   void (*func)(void);
++   bool show;
++} tests[] = {
++   { test1,  false },
++   { test2,  false },
++   { test3,  false },
++   { test4,  false },
++   { test5,  false },
++   { test6,  true  },
++   { test7,  false },
++   { test8,  false },
++   { test9,  false },
++   { test10, false },
++   { test11, false },
++   { test12, true },
++   { test13, false },
++   { test14, false },
++   { test15, false },
++   { test16, false },
++   { test17, false },
++   { test18, true  },
++   { test19, false },
++   { test20, false },
++   { test21, false },
++   { test22, false },
++   { test23, false },
++   { test24, true  }
++};
++
++int main(void)
++{
++   int i;
++   struct sigaction sa;
++   int sigs[] = { SIGSYS, SIGBUS };
++
++   sa.sa_sigaction = handler;
++   sa.sa_flags = SA_SIGINFO;
++   sigfillset(&sa.sa_mask);
++
++   for(i = 0; i < sizeof(sigs) / sizeof(sigs[0]); i++)
++      sigaction(sigs[i], &sa, NULL);
++
++   show();
++   for(i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
++      if (sigsetjmp(escape, 1) == 0) {
++         fprintf(stderr, "test %d\n", i + 1);
++         tests[i].func();
++         if (tests[i].show)
++            show();
++         fprintf(stderr, "no fault\n");
++      }
++   }
++
++   return 0;
++}
+diff --git a/none/tests/loongarch64/fault_fp.stderr.exp b/none/tests/loongarch64/fault_fp.stderr.exp
+new file mode 100644
+index 000000000..a983dead4
+--- /dev/null
++++ b/none/tests/loongarch64/fault_fp.stderr.exp
+@@ -0,0 +1,64 @@
++test 1
++signal: 7
++code: 1
++test 2
++signal: 31
++code: 128
++test 3
++no fault
++test 4
++signal: 7
++code: 1
++test 5
++signal: 31
++code: 128
++test 6
++no fault
++test 7
++signal: 7
++code: 1
++test 8
++signal: 31
++code: 128
++test 9
++no fault
++test 10
++signal: 7
++code: 1
++test 11
++signal: 31
++code: 128
++test 12
++no fault
++test 13
++signal: 7
++code: 1
++test 14
++signal: 31
++code: 128
++test 15
++no fault
++test 16
++signal: 7
++code: 1
++test 17
++signal: 31
++code: 128
++test 18
++no fault
++test 19
++signal: 7
++code: 1
++test 20
++signal: 31
++code: 128
++test 21
++no fault
++test 22
++signal: 7
++code: 1
++test 23
++signal: 31
++code: 128
++test 24
++no fault
+diff --git a/none/tests/loongarch64/fault_fp.stdout.exp b/none/tests/loongarch64/fault_fp.stdout.exp
+new file mode 100644
+index 000000000..254a12353
+--- /dev/null
++++ b/none/tests/loongarch64/fault_fp.stdout.exp
+@@ -0,0 +1,189 @@
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 1
++fldgt.s ::
++test 2
++fldgt.s ::
++test 3
++fldgt.s ::
++output: 0
++test 4
++fldgt.d ::
++test 5
++fldgt.d ::
++test 6
++fldgt.d ::
++output: 2535295895347421136
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 7
++fldle.s ::
++test 8
++fldle.s ::
++test 9
++fldle.s ::
++output: 1043676476
++test 10
++fldle.d ::
++test 11
++fldle.d ::
++test 12
++fldle.d ::
++output: 4266944292251042560
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 13
++fstgt.s ::
++input: -149539557700451551
++test 14
++fstgt.s ::
++input: -149539557700451551
++test 15
++fstgt.s ::
++input: -149539557700451551
++test 16
++fstgt.d ::
++input: -149539557700451551
++test 17
++fstgt.d ::
++input: -149539557700451551
++test 18
++fstgt.d ::
++input: -149539557700451551
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x8765432100010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0xfdecba9087654321
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test 19
++fstle.s ::
++input: -149539557700451551
++test 20
++fstle.s ::
++input: -149539557700451551
++test 21
++fstle.s ::
++input: -149539557700451551
++test 22
++fstle.d ::
++input: -149539557700451551
++test 23
++fstle.d ::
++input: -149539557700451551
++test 24
++fstle.d ::
++input: -149539557700451551
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x8765432100010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0xfdecba9087654321
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x876543212ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0xfdecba9087654321
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
+diff --git a/none/tests/loongarch64/fault_fp.vgtest b/none/tests/loongarch64/fault_fp.vgtest
+new file mode 100644
+index 000000000..b750af8ea
+--- /dev/null
++++ b/none/tests/loongarch64/fault_fp.vgtest
+@@ -0,0 +1,3 @@
++prereq: ../../../tests/loongarch64_features fpu
++prog: fault_fp
++vgopts: -q
+diff --git a/none/tests/loongarch64/filter_stderr b/none/tests/loongarch64/filter_stderr
+new file mode 100755
+index 000000000..0ae9313a9
+--- /dev/null
++++ b/none/tests/loongarch64/filter_stderr
+@@ -0,0 +1,3 @@
++#! /bin/sh
++
++../filter_stderr
+diff --git a/none/tests/loongarch64/float.c b/none/tests/loongarch64/float.c
+new file mode 100644
+index 000000000..7aa1b6c67
+--- /dev/null
++++ b/none/tests/loongarch64/float.c
+@@ -0,0 +1,804 @@
++#include <stdio.h>
++#include <assert.h>
++
++#define NUM 24
++
++const float fj_s[NUM] = {
++   0,         456.25,   3,          -1,
++   1384.5,    -7.25,    1000000000, -5786.5,
++   1752,      0.015625, 0.03125,    -248562.75,
++   -45786.5,  456,      34.03125,   45786.75,
++   1752065,   107,      -45667.25,  -7,
++   -347856.5, 356047.5, -1.0,       23.0625
++};
++
++const double fj_d[NUM] = {
++   0,         456.25,   3,          -1,
++   1384.5,    -7.25,    1000000000, -5786.5,
++   1752,      0.015625, 0.03125,    -248562.75,
++   -45786.5,  456,      34.03125,   45786.75,
++   1752065,   107,      -45667.25,  -7,
++   -347856.5, 356047.5, -1.0,       23.0625
++};
++
++const float fk_s[NUM] = {
++   -4578.5, 456.25,   34.03125, 4578.75,
++   175,     107,      -456.25,  -7.25,
++   -3478.5, 356.5,    -1.0,     23.0625,
++   0,       456.25,   3,        -1,
++   1384.5,  -7,       100,      -5786.5,
++   1752,    0.015625, 0.03125,  -248562.75
++};
++
++const double fk_d[NUM] = {
++   -45786.5,  456.25,   34.03125,   45786.75,
++   1752065,   107,      -45667.25,  -7.25,
++   -347856.5, 356047.5, -1.0,       23.0625,
++   0,         456.25,   3,          -1,
++   1384.5,    -7,       1000000000, -5786.5,
++   1752,      0.015625, 0.03125,    -248562.75
++};
++
++const float fa_s[NUM] = {
++   -347856.5,  356047.5,  -1.0,       23.0625,
++   1752,       0.015625,  0.03125,    -248562.75,
++   1384.5,     -7.25,     1000000000, -5786.5,
++   -347856.75, 356047.75, -1.0,       23.03125,
++   0,          456.25,    3,          -1,
++   -45786.5,   456,       34.03125,   45786.03125
++};
++
++const double fa_d[NUM] = {
++   -347856.5,  356047.5,  -1.0,       23.0625,
++   1752,       0.015625,  0.03125,    -248562.75,
++   1384.5,     -7.25,     1000000000, -5786.5,
++   -347856.75, 356047.75, -1.0,       23.03125,
++   0,          456.25,    3,          -1,
++   -45786.5,   456,       34.03125,   45786.03125
++};
++
++const int fj_w[NUM] = {
++   0,          456,        3,          -1,
++   0xffffffff, 356,        1000000000, -5786,
++   1752,       24575,      10,         -248562,
++   -45786,     456,        34,         45786,
++   1752065,    107,        -45667,     -7,
++   -347856,    0x80000000, 0xfffffff,  23
++};
++
++const long fj_l[NUM] = {
++   18,         25,         3,          -1,
++   0xffffffff, 356,        1000000,    -5786,
++   -1,         24575,      10,         -125458,
++   -486,       456,        34,         45786,
++   0,          1700000,    -45667,     -7,
++   -347856,    0x80000000, 0xfffffff,  23
++};
++
++const int cf[NUM] = {
++   0, 1, 0, 1,
++   1, 0, 1, 0,
++   0, 0, 1, 1,
++   1, 1, 0, 0,
++   0, 0, 0, 0,
++   1, 1, 1, 1
++};
++
++typedef enum {
++   TO_NEAREST = 0,
++   TO_ZERO,
++   TO_PLUS_INFINITY,
++   TO_MINUS_INFINITY
++} round_mode_t;
++
++typedef enum {
++   FADD_S, FADD_D, FSUB_S, FSUB_D,
++   FMUL_S, FMUL_D, FDIV_S, FDIV_D,
++   FMADD_S, FMADD_D, FMSUB_S, FMSUB_D,
++   FNMADD_S, FNMADD_D, FNMSUB_S, FNMSUB_D,
++   FMAX_S, FMAX_D, FMIN_S, FMIN_D,
++   FMAXA_S, FMAXA_D, FMINA_S, FMINA_D,
++   FABS_S, FABS_D, FNEG_S, FNEG_D,
++   FSQRT_S, FSQRT_D,
++   FRECIP_S, FRECIP_D,
++   FRSQRT_S, FRSQRT_D,
++   FSCALEB_S, FSCALEB_D,
++   FLOGB_S, FLOGB_D,
++   FCVT_S_D, FCVT_D_S,
++   FTINTRM_W_S, FTINTRM_W_D, FTINTRM_L_S, FTINTRM_L_D,
++   FTINTRP_W_S, FTINTRP_W_D, FTINTRP_L_S, FTINTRP_L_D,
++   FTINTRZ_W_S, FTINTRZ_W_D, FTINTRZ_L_S, FTINTRZ_L_D,
++   FTINTRNE_W_S, FTINTRNE_W_D, FTINTRNE_L_S, FTINTRNE_L_D,
++   FTINT_W_S, FTINT_W_D, FTINT_L_S, FTINT_L_D,
++   FFINT_S_W, FFINT_S_L, FFINT_D_W, FFINT_D_L,
++   FRINT_S, FRINT_D,
++   FCMP_CAF_S, FCMP_CAF_D, FCMP_SAF_S, FCMP_SAF_D,
++   FCMP_CLT_S, FCMP_CLT_D, FCMP_SLT_S, FCMP_SLT_D,
++   FCMP_CEQ_S, FCMP_CEQ_D, FCMP_SEQ_S, FCMP_SEQ_D,
++   FCMP_CLE_S, FCMP_CLE_D, FCMP_SLE_S, FCMP_SLE_D,
++   FCMP_CUN_S, FCMP_CUN_D, FCMP_SUN_S, FCMP_SUN_D,
++   FCMP_CULT_S, FCMP_CULT_D, FCMP_SULT_S, FCMP_SULT_D,
++   FCMP_CUEQ_S, FCMP_CUEQ_D, FCMP_SUEQ_S, FCMP_SUEQ_D,
++   FCMP_CULE_S, FCMP_CULE_D, FCMP_SULE_S, FCMP_SULE_D,
++   FCMP_CNE_S, FCMP_CNE_D, FCMP_SNE_S, FCMP_SNE_D,
++   FCMP_COR_S, FCMP_COR_D, FCMP_SOR_S, FCMP_SOR_D,
++   FCMP_CUNE_S, FCMP_CUNE_D, FCMP_SUNE_S, FCMP_SUNE_D,
++   FSEL, FMOV_S, FMOV_D
++} op_t;
++
++static inline void set_fcsr(round_mode_t mode)
++{
++   __asm__ __volatile__("movgr2fcsr $r0, %0" : : "r" (mode << 8));
++
++   const char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
++   printf("roundig mode: %s\n", round_mode_name[mode]);
++}
++
++#define TESTINST_FF_S(insn, v1)       \
++   {                                  \
++      unsigned int fcsr;              \
++      float fd_s;                     \
++      __asm__ __volatile__(           \
++         insn " %0, %2       \n\t"    \
++         "movfcsr2gr %1, $r0 \n\t"    \
++         : "=f" (fd_s), "=r" (fcsr)   \
++         : "f" (v1)                   \
++         : "memory");                 \
++      printf("%s ::\n", insn);        \
++      printf("input: %.6f\n", v1);    \
++      printf("output: %.6f\n", fd_s); \
++      printf("fcsr: %#x\n", fcsr);    \
++   }
++
++#define TESTINST_FF_D(insn, v1)        \
++   {                                   \
++      unsigned int fcsr;               \
++      double fd_d;                     \
++      __asm__ __volatile__(            \
++         insn " %0, %2       \n\t"     \
++         "movfcsr2gr %1, $r0 \n\t"     \
++         : "=f" (fd_d), "=r" (fcsr)    \
++         : "f" (v1)                    \
++         : "memory");                  \
++      printf("%s ::\n", insn);         \
++      printf("input: %.15f\n", v1);    \
++      printf("output: %.15f\n", fd_d); \
++      printf("fcsr: %#x\n", fcsr);     \
++   }
++
++#define TESTINST_FFF_S(insn, v1, v2)        \
++   {                                        \
++      unsigned int fcsr;                    \
++      float fd_s;                           \
++      __asm__ __volatile__(                 \
++         insn " %0, %2, %3   \n\t"          \
++         "movfcsr2gr %1, $r0 \n\t"          \
++         : "=f" (fd_s), "=r" (fcsr)         \
++         : "f" (v1), "f" (v2)               \
++         : "memory");                       \
++      printf("%s ::\n", insn);              \
++      printf("input: %.6f %.6f\n", v1, v2); \
++      printf("output: %.6f\n", fd_s);       \
++      printf("fcsr: %#x\n", fcsr);          \
++   }
++
++#define TESTINST_FFF_D(insn, v1, v2)          \
++   {                                          \
++      unsigned int fcsr;                      \
++      double fd_s;                            \
++      __asm__ __volatile__(                   \
++         insn " %0, %2, %3   \n\t"            \
++         "movfcsr2gr %1, $r0 \n\t"            \
++         : "=f" (fd_s), "=r" (fcsr)           \
++         : "f" (v1), "f" (v2)                 \
++         : "memory");                         \
++      printf("%s ::\n", insn);                \
++      printf("input: %.15f %.15f\n", v1, v2); \
++      printf("output: %.15f\n", fd_s);        \
++      printf("fcsr: %#x\n", fcsr);            \
++   }
++
++#define TESTINST_FFFF_S(insn, v1, v2, v3)            \
++   {                                                 \
++      unsigned int fcsr;                             \
++      float fd_s;                                    \
++      __asm__ __volatile__(                          \
++         insn " %0, %2, %3, %4 \n\t"                 \
++         "movfcsr2gr %1, $r0   \n\t"                 \
++         : "=f" (fd_s), "=r" (fcsr)                  \
++         : "f" (v1), "f" (v2), "f" (v3)              \
++         : "memory");                                \
++      printf("%s ::\n", insn);                       \
++      printf("input: %.6f %.6f %.6f\n", v1, v2, v3); \
++      printf("output: %.6f\n", fd_s);                \
++      printf("fcsr: %#x\n", fcsr);                   \
++   }
++
++#define TESTINST_FFFF_D(insn, v1, v2, v3)               \
++   {                                                    \
++      unsigned int fcsr;                                \
++      double fd_s;                                      \
++      __asm__ __volatile__(                             \
++         insn " %0, %2, %3, %4 \n\t"                    \
++         "movfcsr2gr %1, $r0   \n\t"                    \
++         : "=f" (fd_s), "=r" (fcsr)                     \
++         : "f" (v1), "f" (v2), "f" (v3)                 \
++         : "memory");                                   \
++      printf("%s ::\n", insn);                          \
++      printf("input: %.15f %.15f %.15f\n", v1, v2, v3); \
++      printf("output: %.15f\n", fd_s);                  \
++      printf("fcsr: %#x\n", fcsr);                      \
++   }
++
++#define TESTINST_FF_S_D(insn, v1)     \
++   {                                  \
++      unsigned int fcsr;              \
++      float fd_s;                     \
++      __asm__ __volatile__(           \
++         insn " %0, %2       \n\t"    \
++         "movfcsr2gr %1, $r0 \n\t"    \
++         : "=f" (fd_s), "=r" (fcsr)   \
++         : "f" (v1)                   \
++         : "memory");                 \
++      printf("%s ::\n", insn);        \
++      printf("input: %.15f\n", v1);   \
++      printf("output: %.6f\n", fd_s); \
++      printf("fcsr: %#x\n", fcsr);    \
++   }
++
++#define TESTINST_FF_D_S(insn, v1)      \
++   {                                   \
++      unsigned int fcsr;               \
++      double fd_d;                     \
++      __asm__ __volatile__(            \
++         insn " %0, %2       \n\t"     \
++         "movfcsr2gr %1, $r0 \n\t"     \
++         : "=f" (fd_d), "=r" (fcsr)    \
++         : "f" (v1)                    \
++         : "memory");                  \
++      printf("%s ::\n", insn);         \
++      printf("input: %.6f\n", v1);     \
++      printf("output: %.15f\n", fd_d); \
++      printf("fcsr: %#x\n", fcsr);     \
++   }
++
++#define TESTINST_FF_W_S(insn, v1)   \
++   {                                \
++      unsigned int fcsr;            \
++      int fd_w;                     \
++      __asm__ __volatile__(         \
++         insn " %0, %2       \n\t"  \
++         "movfcsr2gr %1, $r0 \n\t"  \
++         : "=f" (fd_w), "=r" (fcsr) \
++         : "f" (v1)                 \
++         : "memory");               \
++      printf("%s ::\n", insn);      \
++      printf("input: %.6f\n", v1);  \
++      printf("output: %d\n", fd_w); \
++      printf("fcsr: %#x\n", fcsr);  \
++   }
++
++#define TESTINST_FF_W_D(insn, v1)   \
++   {                                \
++      unsigned int fcsr;            \
++      int fd_w;                     \
++      __asm__ __volatile__(         \
++         insn " %0, %2       \n\t"  \
++         "movfcsr2gr %1, $r0 \n\t"  \
++         : "=f" (fd_w), "=r" (fcsr) \
++         : "f" (v1)                 \
++         : "memory");               \
++      printf("%s ::\n", insn);      \
++      printf("input: %.15f\n", v1); \
++      printf("output: %d\n", fd_w); \
++      printf("fcsr: %#x\n", fcsr);  \
++   }
++#define TESTINST_FF_L_S(insn, v1)    \
++   {                                 \
++      unsigned int fcsr;             \
++      long fd_l;                     \
++      __asm__ __volatile__(          \
++         insn " %0, %2       \n\t"   \
++         "movfcsr2gr %1, $r0 \n\t"   \
++         : "=f" (fd_l), "=r" (fcsr)  \
++         : "f" (v1)                  \
++         : "memory");                \
++      printf("%s ::\n", insn);       \
++      printf("input: %.6f\n", v1);   \
++      printf("output: %ld\n", fd_l); \
++      printf("fcsr: %#x\n", fcsr);   \
++   }
++
++#define TESTINST_FF_L_D(insn, v1)     \
++   {                                  \
++      unsigned int fcsr;              \
++      long fd_l;                      \
++      __asm__ __volatile__(           \
++         insn " %0, %2       \n\t"    \
++         "movfcsr2gr %1, $r0 \n\t"    \
++         : "=f" (fd_l), "=r" (fcsr)   \
++         : "f" (v1)                   \
++         : "memory");                 \
++      printf("%s ::\n", insn);        \
++      printf("input: %.15f\n", v1);   \
++      printf("output: %ld\n", fd_l);  \
++      printf("fcsr: %#x\n", fcsr);    \
++   }
++
++#define TESTINST_FF_S_W(insn, v1)     \
++   {                                  \
++      unsigned int fcsr;              \
++      float fd_s;                     \
++      __asm__ __volatile__(           \
++         insn " %0, %2       \n\t"    \
++         "movfcsr2gr %1, $r0 \n\t"    \
++         : "=f" (fd_s), "=r" (fcsr)   \
++         : "f" (v1)                   \
++         : "memory");                 \
++      printf("%s ::\n", insn);        \
++      printf("input: %d\n", v1);      \
++      printf("output: %.6f\n", fd_s); \
++      printf("fcsr: %#x\n", fcsr);    \
++   }
++
++#define TESTINST_FF_S_L(insn, v1)     \
++   {                                  \
++      unsigned int fcsr;              \
++      float fd_s;                     \
++      __asm__ __volatile__(           \
++         insn " %0, %2       \n\t"    \
++         "movfcsr2gr %1, $r0 \n\t"    \
++         : "=f" (fd_s), "=r" (fcsr)   \
++         : "f" (v1)                   \
++         : "memory");                 \
++      printf("%s ::\n", insn);        \
++      printf("input: %ld\n", v1);     \
++      printf("output: %.6f\n", fd_s); \
++      printf("fcsr: %#x\n", fcsr);    \
++   }
++
++#define TESTINST_FF_D_W(insn, v1)      \
++   {                                   \
++      unsigned int fcsr;               \
++      double fd_d;                     \
++      __asm__ __volatile__(            \
++         insn " %0, %2       \n\t"     \
++         "movfcsr2gr %1, $r0 \n\t"     \
++         : "=f" (fd_d), "=r" (fcsr)    \
++         : "f" (v1)                    \
++         : "memory");                  \
++      printf("%s ::\n", insn);         \
++      printf("input: %d\n", v1);       \
++      printf("output: %.15f\n", fd_d); \
++      printf("fcsr: %#x\n", fcsr);     \
++   }
++
++#define TESTINST_FF_D_L(insn, v1)      \
++   {                                   \
++      unsigned int fcsr;               \
++      double fd_d;                     \
++      __asm__ __volatile__(            \
++         insn " %0, %2       \n\t"     \
++         "movfcsr2gr %1, $r0 \n\t"     \
++         : "=f" (fd_d), "=r" (fcsr)    \
++         : "f" (v1)                    \
++         : "memory");                  \
++      printf("%s ::\n", insn);         \
++      printf("input: %ld\n", v1);      \
++      printf("output: %.15f\n", fd_d); \
++      printf("fcsr: %#x\n", fcsr);     \
++   }
++
++#define TESTINST_FFC_S(insn, v1, v2)        \
++   {                                        \
++      unsigned int fcsr;                    \
++      int fcc;                              \
++      __asm__ __volatile__(                 \
++         insn " $fcc0, %2, %3   \n\t"       \
++         "movcf2gr %0, $fcc0    \n\t"       \
++         "movfcsr2gr %1, $r0    \n\t"       \
++         : "=r" (fcc), "=r" (fcsr)          \
++         : "f" (v1), "f" (v2)               \
++         : "$fcc0", "memory");              \
++      printf("%s ::\n", insn);              \
++      printf("input: %.6f %.6f\n", v1, v2); \
++      printf("output: %d\n", fcc);          \
++      printf("fcsr: %#x\n", fcsr);          \
++   }
++
++#define TESTINST_FFC_D(insn, v1, v2)          \
++   {                                          \
++      unsigned int fcsr;                      \
++      int fcc;                                \
++      __asm__ __volatile__(                   \
++         insn " $fcc0, %2, %3   \n\t"         \
++         "movcf2gr %0, $fcc0    \n\t"         \
++         "movfcsr2gr %1, $r0    \n\t"         \
++         : "=r" (fcc), "=r" (fcsr)            \
++         : "f" (v1), "f" (v2)                 \
++         : "$fcc0", "memory");                \
++      printf("%s ::\n", insn);                \
++      printf("input: %.15f %.15f\n", v1, v2); \
++      printf("output: %d\n", fcc);            \
++      printf("fcsr: %#x\n", fcsr);            \
++   }
++
++#define TESTINST_FFFC(insn, v1, v2, v3)              \
++   {                                                 \
++      unsigned int fcsr;                             \
++      double fd_s;                                   \
++      __asm__ __volatile__(                          \
++         "movgr2cf $fcc0, %4      \n\t"              \
++         insn " %0, %2, %3, $fcc0 \n\t"              \
++         "movfcsr2gr %1, $r0      \n\t"              \
++         : "=f" (fd_s), "=r" (fcsr)                  \
++         : "f" (v1), "f" (v2), "r" (v3)              \
++         : "memory");                                \
++      printf("%s ::\n", insn);                       \
++      printf("input: %.15f %.15f %d\n", v1, v2, v3); \
++      printf("output: %.15f\n", fd_s);               \
++      printf("fcsr: %#x\n", fcsr);                   \
++   }
++
++void test(op_t op)
++{
++   int i;
++   round_mode_t mode;
++   for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) {
++      for (i = 0; i < NUM; i++) {
++         set_fcsr(mode);
++         switch (op) {
++            case FADD_S:
++               TESTINST_FFF_S("fadd.s", fj_s[i], fk_s[i]);
++               break;
++            case FADD_D:
++               TESTINST_FFF_D("fadd.d", fj_d[i], fk_d[i]);
++               break;
++            case FSUB_S:
++               TESTINST_FFF_S("fsub.s", fj_s[i], fk_s[i]);
++               break;
++            case FSUB_D:
++               TESTINST_FFF_D("fsub.d", fj_d[i], fk_d[i]);
++               break;
++            case FMUL_S:
++               TESTINST_FFF_S("fmul.s", fj_s[i], fk_s[i]);
++               break;
++            case FMUL_D:
++               TESTINST_FFF_D("fmul.d", fj_d[i], fk_d[i]);
++               break;
++            case FDIV_S:
++               TESTINST_FFF_S("fdiv.s", fj_s[i], fk_s[i]);
++               break;
++            case FDIV_D:
++               TESTINST_FFF_D("fdiv.d", fj_d[i], fk_d[i]);
++               break;
++            case FMADD_S:
++               TESTINST_FFFF_S("fmadd.s", fj_s[i], fk_s[i], fa_s[i]);
++               break;
++            case FMADD_D:
++               TESTINST_FFFF_D("fmadd.d", fj_d[i], fk_d[i], fa_d[i]);
++               break;
++            case FMSUB_S:
++               TESTINST_FFFF_S("fmsub.s", fj_s[i], fk_s[i], fa_s[i]);
++               break;
++            case FMSUB_D:
++               TESTINST_FFFF_D("fmsub.d", fj_d[i], fk_d[i], fa_d[i]);
++               break;
++            case FNMADD_S:
++               TESTINST_FFFF_S("fnmadd.s", fj_s[i], fk_s[i], fa_s[i]);
++               break;
++            case FNMADD_D:
++               TESTINST_FFFF_D("fnmadd.d", fj_d[i], fk_d[i], fa_d[i]);
++               break;
++            case FNMSUB_S:
++               TESTINST_FFFF_S("fnmsub.s", fj_s[i], fk_s[i], fa_s[i]);
++               break;
++            case FNMSUB_D:
++               TESTINST_FFFF_D("fnmsub.d", fj_d[i], fk_d[i], fa_d[i]);
++               break;
++            case FMAX_S:
++               TESTINST_FFF_S("fmax.s", fj_s[i], fk_s[i]);
++               break;
++            case FMAX_D:
++               TESTINST_FFF_D("fmax.d", fj_d[i], fk_d[i]);
++               break;
++            case FMIN_S:
++               TESTINST_FFF_S("fmin.s", fj_s[i], fk_s[i]);
++               break;
++            case FMIN_D:
++               TESTINST_FFF_D("fmin.d", fj_d[i], fk_d[i]);
++               break;
++            case FMAXA_S:
++               TESTINST_FFF_S("fmaxa.s", fj_s[i], fk_s[i]);
++               break;
++            case FMAXA_D:
++               TESTINST_FFF_D("fmaxa.d", fj_d[i], fk_d[i]);
++               break;
++            case FMINA_S:
++               TESTINST_FFF_S("fmina.s", fj_s[i], fk_s[i]);
++               break;
++            case FMINA_D:
++               TESTINST_FFF_D("fmina.d", fj_d[i], fk_d[i]);
++               break;
++            case FABS_S:
++               TESTINST_FF_S("fabs.s", fj_s[i]);
++               break;
++            case FABS_D:
++               TESTINST_FF_D("fabs.d", fj_d[i]);
++               break;
++            case FNEG_S:
++               TESTINST_FF_S("fneg.s", fj_s[i]);
++               break;
++            case FNEG_D:
++               TESTINST_FF_D("fneg.d", fj_d[i]);
++               break;
++            case FSQRT_S:
++               TESTINST_FF_S("fsqrt.s", fj_s[i]);
++               break;
++            case FSQRT_D:
++               TESTINST_FF_D("fsqrt.d", fj_d[i]);
++               break;
++            case FRECIP_S:
++               TESTINST_FF_S("frecip.s", fj_s[i]);
++               break;
++            case FRECIP_D:
++               TESTINST_FF_D("frecip.d", fj_d[i]);
++               break;
++            case FRSQRT_S:
++               TESTINST_FF_S("frsqrt.s", fj_s[i]);
++               break;
++            case FRSQRT_D:
++               TESTINST_FF_D("frsqrt.d", fj_d[i]);
++               break;
++            case FSCALEB_S:
++               TESTINST_FFF_S("fscaleb.s", fj_s[i], fk_s[i]);
++               break;
++            case FSCALEB_D:
++               TESTINST_FFF_D("fscaleb.d", fj_d[i], fk_d[i]);
++               break;
++            case FLOGB_S:
++               TESTINST_FF_S("flogb.s", fj_s[i]);
++               break;
++            case FLOGB_D:
++               TESTINST_FF_D("flogb.d", fj_d[i]);
++               break;
++            case FCVT_S_D:
++               TESTINST_FF_S_D("fcvt.s.d", fj_d[i]);
++               break;
++            case FCVT_D_S:
++               TESTINST_FF_D_S("fcvt.d.s", fj_s[i]);
++               break;
++            case FTINTRM_W_S:
++               TESTINST_FF_W_S("ftintrm.w.s", fj_s[i]);
++               break;
++            case FTINTRM_W_D:
++               TESTINST_FF_W_D("ftintrm.w.d", fj_d[i]);
++               break;
++            case FTINTRM_L_S:
++               TESTINST_FF_L_S("ftintrm.l.s", fj_s[i]);
++               break;
++            case FTINTRM_L_D:
++               TESTINST_FF_L_D("ftintrm.l.d", fj_d[i]);
++               break;
++            case FTINTRP_W_S:
++               TESTINST_FF_W_S("ftintrp.w.s", fj_s[i]);
++               break;
++            case FTINTRP_W_D:
++               TESTINST_FF_W_D("ftintrp.w.d", fj_d[i]);
++               break;
++            case FTINTRP_L_S:
++               TESTINST_FF_L_S("ftintrp.l.s", fj_s[i]);
++               break;
++            case FTINTRP_L_D:
++               TESTINST_FF_L_D("ftintrp.l.d", fj_d[i]);
++               break;
++            case FTINTRZ_W_S:
++               TESTINST_FF_W_S("ftintrz.w.s", fj_s[i]);
++               break;
++            case FTINTRZ_W_D:
++               TESTINST_FF_W_D("ftintrz.w.d", fj_d[i]);
++               break;
++            case FTINTRZ_L_S:
++               TESTINST_FF_L_S("ftintrz.l.s", fj_s[i]);
++               break;
++            case FTINTRZ_L_D:
++               TESTINST_FF_L_D("ftintrz.l.d", fj_d[i]);
++               break;
++            case FTINTRNE_W_S:
++               TESTINST_FF_W_S("ftintrne.w.s", fj_s[i]);
++               break;
++            case FTINTRNE_W_D:
++               TESTINST_FF_W_D("ftintrne.w.d", fj_d[i]);
++               break;
++            case FTINTRNE_L_S:
++               TESTINST_FF_L_S("ftintrne.l.s", fj_s[i]);
++               break;
++            case FTINTRNE_L_D:
++               TESTINST_FF_L_D("ftintrne.l.d", fj_d[i]);
++               break;
++            case FTINT_W_S:
++               TESTINST_FF_W_S("ftint.w.s", fj_s[i]);
++               break;
++            case FTINT_W_D:
++               TESTINST_FF_W_D("ftint.w.d", fj_d[i]);
++               break;
++            case FTINT_L_S:
++               TESTINST_FF_L_S("ftint.l.s", fj_s[i]);
++               break;
++            case FTINT_L_D:
++               TESTINST_FF_L_D("ftint.l.d", fj_d[i]);
++               break;
++            case FFINT_S_W:
++               TESTINST_FF_S_W("ffint.s.w", fj_w[i]);
++               break;
++            case FFINT_S_L:
++               TESTINST_FF_S_L("ffint.s.l", fj_l[i]);
++               break;
++            case FFINT_D_W:
++               TESTINST_FF_D_W("ffint.d.w", fj_w[i]);
++               break;
++            case FFINT_D_L:
++               TESTINST_FF_D_L("ffint.d.l", fj_l[i]);
++               break;
++            case FRINT_S:
++               TESTINST_FF_S("frint.s", fj_s[i]);
++               break;
++            case FRINT_D:
++               TESTINST_FF_D("frint.d", fj_d[i]);
++               break;
++            case FCMP_CAF_S:
++               TESTINST_FFC_S("fcmp.caf.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CAF_D:
++               TESTINST_FFC_D("fcmp.caf.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SAF_S:
++               TESTINST_FFC_S("fcmp.saf.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SAF_D:
++               TESTINST_FFC_D("fcmp.saf.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CLT_S:
++               TESTINST_FFC_S("fcmp.clt.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CLT_D:
++               TESTINST_FFC_D("fcmp.clt.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SLT_S:
++               TESTINST_FFC_S("fcmp.slt.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SLT_D:
++               TESTINST_FFC_D("fcmp.slt.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CEQ_S:
++               TESTINST_FFC_S("fcmp.ceq.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CEQ_D:
++               TESTINST_FFC_D("fcmp.ceq.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SEQ_S:
++               TESTINST_FFC_S("fcmp.seq.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SEQ_D:
++               TESTINST_FFC_D("fcmp.seq.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CLE_S:
++               TESTINST_FFC_S("fcmp.cle.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CLE_D:
++               TESTINST_FFC_D("fcmp.cle.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SLE_S:
++               TESTINST_FFC_S("fcmp.sle.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SLE_D:
++               TESTINST_FFC_D("fcmp.sle.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CUN_S:
++               TESTINST_FFC_S("fcmp.cun.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CUN_D:
++               TESTINST_FFC_D("fcmp.cun.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SUN_S:
++               TESTINST_FFC_S("fcmp.sun.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SUN_D:
++               TESTINST_FFC_D("fcmp.sun.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CULT_S:
++               TESTINST_FFC_S("fcmp.cult.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CULT_D:
++               TESTINST_FFC_D("fcmp.cult.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SULT_S:
++               TESTINST_FFC_S("fcmp.sult.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SULT_D:
++               TESTINST_FFC_D("fcmp.sult.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CUEQ_S:
++               TESTINST_FFC_S("fcmp.cueq.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CUEQ_D:
++               TESTINST_FFC_D("fcmp.cueq.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SUEQ_S:
++               TESTINST_FFC_S("fcmp.sueq.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SUEQ_D:
++               TESTINST_FFC_D("fcmp.sueq.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CULE_S:
++               TESTINST_FFC_S("fcmp.cule.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CULE_D:
++               TESTINST_FFC_D("fcmp.cule.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SULE_S:
++               TESTINST_FFC_S("fcmp.sule.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SULE_D:
++               TESTINST_FFC_D("fcmp.sule.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CNE_S:
++               TESTINST_FFC_S("fcmp.cne.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CNE_D:
++               TESTINST_FFC_D("fcmp.cne.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SNE_S:
++               TESTINST_FFC_S("fcmp.sne.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SNE_D:
++               TESTINST_FFC_D("fcmp.sne.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_COR_S:
++               TESTINST_FFC_S("fcmp.cor.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_COR_D:
++               TESTINST_FFC_D("fcmp.cor.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SOR_S:
++               TESTINST_FFC_S("fcmp.sor.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SOR_D:
++               TESTINST_FFC_D("fcmp.sor.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_CUNE_S:
++               TESTINST_FFC_S("fcmp.cune.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_CUNE_D:
++               TESTINST_FFC_D("fcmp.cune.d", fj_d[i], fk_d[i]);
++               break;
++            case FCMP_SUNE_S:
++               TESTINST_FFC_S("fcmp.sune.s", fj_s[i], fk_s[i]);
++               break;
++            case FCMP_SUNE_D:
++               TESTINST_FFC_D("fcmp.sune.d", fj_d[i], fk_d[i]);
++               break;
++            case FSEL:
++               TESTINST_FFFC("fsel", fj_d[i], fk_d[i], cf[i]);
++               break;
++            case FMOV_S:
++               TESTINST_FF_S("fmov.s", fj_s[i]);
++               break;
++            case FMOV_D:
++               TESTINST_FF_D("fmov.d", fj_d[i]);
++               break;
++            default:
++               assert(0);
++               break;
++         }
++      }
++   }
++}
++
++int main(void)
++{
++   op_t op;
++   for (op = FADD_S; op <= FMOV_D; op++)
++      test(op);
++   return 0;
++}
+diff --git a/none/tests/loongarch64/float.stderr.exp b/none/tests/loongarch64/float.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/float.stdout.exp b/none/tests/loongarch64/float.stdout.exp
+new file mode 100644
+index 000000000..f4914ff0c
+--- /dev/null
++++ b/none/tests/loongarch64/float.stdout.exp
+@@ -0,0 +1,54240 @@
++roundig mode: near
++fadd.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 456.250000 456.250000
++output: 912.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 3.000000 34.031250
++output: 37.031250
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -1.000000 4578.750000
++output: 4577.750000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 1384.500000 175.000000
++output: 1559.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -7.250000 107.000000
++output: 99.750000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 1000000000.000000 -456.250000
++output: 999999552.000000
++fcsr: 0x1010000
++roundig mode: near
++fadd.s ::
++input: -5786.500000 -7.250000
++output: -5793.750000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 1752.000000 -3478.500000
++output: -1726.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 0.015625 356.500000
++output: 356.515625
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 0.031250 -1.000000
++output: -0.968750
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -248562.750000 23.062500
++output: -248539.687500
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 456.000000 456.250000
++output: 912.250000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 34.031250 3.000000
++output: 37.031250
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 45786.750000 -1.000000
++output: 45785.750000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 1752065.000000 1384.500000
++output: 1753449.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 107.000000 -7.000000
++output: 100.000000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -45667.250000 100.000000
++output: -45567.250000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -7.000000 -5786.500000
++output: -5793.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: -347856.500000 1752.000000
++output: -346104.500000
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x1010000
++roundig mode: near
++fadd.s ::
++input: -1.000000 0.031250
++output: -0.968750
++fcsr: 0
++roundig mode: near
++fadd.s ::
++input: 23.062500 -248562.750000
++output: -248539.687500
++fcsr: 0
++roundig mode: zero
++fadd.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 456.250000 456.250000
++output: 912.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 3.000000 34.031250
++output: 37.031250
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -1.000000 4578.750000
++output: 4577.750000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 1384.500000 175.000000
++output: 1559.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -7.250000 107.000000
++output: 99.750000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 1000000000.000000 -456.250000
++output: 999999488.000000
++fcsr: 0x1010100
++roundig mode: zero
++fadd.s ::
++input: -5786.500000 -7.250000
++output: -5793.750000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 1752.000000 -3478.500000
++output: -1726.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 0.015625 356.500000
++output: 356.515625
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 0.031250 -1.000000
++output: -0.968750
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -248562.750000 23.062500
++output: -248539.687500
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 456.000000 456.250000
++output: 912.250000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 34.031250 3.000000
++output: 37.031250
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 45786.750000 -1.000000
++output: 45785.750000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 1752065.000000 1384.500000
++output: 1753449.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 107.000000 -7.000000
++output: 100.000000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -45667.250000 100.000000
++output: -45567.250000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -7.000000 -5786.500000
++output: -5793.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: -347856.500000 1752.000000
++output: -346104.500000
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x1010100
++roundig mode: zero
++fadd.s ::
++input: -1.000000 0.031250
++output: -0.968750
++fcsr: 0x100
++roundig mode: zero
++fadd.s ::
++input: 23.062500 -248562.750000
++output: -248539.687500
++fcsr: 0x100
++roundig mode: +inf
++fadd.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 456.250000 456.250000
++output: 912.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 3.000000 34.031250
++output: 37.031250
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -1.000000 4578.750000
++output: 4577.750000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 1384.500000 175.000000
++output: 1559.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -7.250000 107.000000
++output: 99.750000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 1000000000.000000 -456.250000
++output: 999999552.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fadd.s ::
++input: -5786.500000 -7.250000
++output: -5793.750000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 1752.000000 -3478.500000
++output: -1726.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 0.015625 356.500000
++output: 356.515625
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 0.031250 -1.000000
++output: -0.968750
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -248562.750000 23.062500
++output: -248539.687500
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 456.000000 456.250000
++output: 912.250000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 34.031250 3.000000
++output: 37.031250
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 45786.750000 -1.000000
++output: 45785.750000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 1752065.000000 1384.500000
++output: 1753449.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 107.000000 -7.000000
++output: 100.000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -45667.250000 100.000000
++output: -45567.250000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -7.000000 -5786.500000
++output: -5793.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: -347856.500000 1752.000000
++output: -346104.500000
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 356047.500000 0.015625
++output: 356047.531250
++fcsr: 0x1010200
++roundig mode: +inf
++fadd.s ::
++input: -1.000000 0.031250
++output: -0.968750
++fcsr: 0x200
++roundig mode: +inf
++fadd.s ::
++input: 23.062500 -248562.750000
++output: -248539.687500
++fcsr: 0x200
++roundig mode: -inf
++fadd.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 456.250000 456.250000
++output: 912.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 3.000000 34.031250
++output: 37.031250
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -1.000000 4578.750000
++output: 4577.750000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 1384.500000 175.000000
++output: 1559.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -7.250000 107.000000
++output: 99.750000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 1000000000.000000 -456.250000
++output: 999999488.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fadd.s ::
++input: -5786.500000 -7.250000
++output: -5793.750000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 1752.000000 -3478.500000
++output: -1726.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 0.015625 356.500000
++output: 356.515625
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 0.031250 -1.000000
++output: -0.968750
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -248562.750000 23.062500
++output: -248539.687500
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 456.000000 456.250000
++output: 912.250000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 34.031250 3.000000
++output: 37.031250
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 45786.750000 -1.000000
++output: 45785.750000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 1752065.000000 1384.500000
++output: 1753449.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 107.000000 -7.000000
++output: 100.000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -45667.250000 100.000000
++output: -45567.250000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -7.000000 -5786.500000
++output: -5793.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: -347856.500000 1752.000000
++output: -346104.500000
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x1010300
++roundig mode: -inf
++fadd.s ::
++input: -1.000000 0.031250
++output: -0.968750
++fcsr: 0x300
++roundig mode: -inf
++fadd.s ::
++input: 23.062500 -248562.750000
++output: -248539.687500
++fcsr: 0x300
++roundig mode: near
++fadd.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 456.250000000000000 456.250000000000000
++output: 912.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 3.000000000000000 34.031250000000000
++output: 37.031250000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45785.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1753449.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -7.250000000000000 107.000000000000000
++output: 99.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 999954332.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5793.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -346104.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.515625000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.968750000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248539.687500000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 456.000000000000000 456.250000000000000
++output: 912.250000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 34.031250000000000 3.000000000000000
++output: 37.031250000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45785.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1753449.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 100.000000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 999954332.750000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5793.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -346104.500000000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.515625000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.968750000000000
++fcsr: 0
++roundig mode: near
++fadd.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248539.687500000000000
++fcsr: 0
++roundig mode: zero
++fadd.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 456.250000000000000 456.250000000000000
++output: 912.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 3.000000000000000 34.031250000000000
++output: 37.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45785.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1753449.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -7.250000000000000 107.000000000000000
++output: 99.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 999954332.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5793.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -346104.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.515625000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248539.687500000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 456.000000000000000 456.250000000000000
++output: 912.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 34.031250000000000 3.000000000000000
++output: 37.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45785.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1753449.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 100.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 999954332.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5793.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -346104.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.515625000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fadd.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248539.687500000000000
++fcsr: 0x100
++roundig mode: +inf
++fadd.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 456.250000000000000 456.250000000000000
++output: 912.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 3.000000000000000 34.031250000000000
++output: 37.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45785.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1753449.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -7.250000000000000 107.000000000000000
++output: 99.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 999954332.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5793.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -346104.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.515625000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248539.687500000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 456.000000000000000 456.250000000000000
++output: 912.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 34.031250000000000 3.000000000000000
++output: 37.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45785.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1753449.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 100.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 999954332.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5793.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -346104.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.515625000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fadd.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248539.687500000000000
++fcsr: 0x200
++roundig mode: -inf
++fadd.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 456.250000000000000 456.250000000000000
++output: 912.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 3.000000000000000 34.031250000000000
++output: 37.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45785.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1753449.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -7.250000000000000 107.000000000000000
++output: 99.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 999954332.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5793.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -346104.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.515625000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248539.687500000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 456.000000000000000 456.250000000000000
++output: 912.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 34.031250000000000 3.000000000000000
++output: 37.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45785.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1753449.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 100.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 999954332.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5793.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -346104.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.515625000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fadd.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248539.687500000000000
++fcsr: 0x300
++roundig mode: near
++fsub.s ::
++input: 0.000000 -4578.500000
++output: 4578.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 456.250000 456.250000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 3.000000 34.031250
++output: -31.031250
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -1.000000 4578.750000
++output: -4579.750000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 1384.500000 175.000000
++output: 1209.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -7.250000 107.000000
++output: -114.250000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000448.000000
++fcsr: 0x1010000
++roundig mode: near
++fsub.s ::
++input: -5786.500000 -7.250000
++output: -5779.250000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 1752.000000 -3478.500000
++output: 5230.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 0.015625 356.500000
++output: -356.484375
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 0.031250 -1.000000
++output: 1.031250
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -248562.750000 23.062500
++output: -248585.812500
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 456.000000 456.250000
++output: -0.250000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 34.031250 3.000000
++output: 31.031250
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 45786.750000 -1.000000
++output: 45787.750000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 1752065.000000 1384.500000
++output: 1750680.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 107.000000 -7.000000
++output: 114.000000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -45667.250000 100.000000
++output: -45767.250000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -7.000000 -5786.500000
++output: 5779.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: -347856.500000 1752.000000
++output: -349608.500000
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x1010000
++roundig mode: near
++fsub.s ::
++input: -1.000000 0.031250
++output: -1.031250
++fcsr: 0
++roundig mode: near
++fsub.s ::
++input: 23.062500 -248562.750000
++output: 248585.812500
++fcsr: 0
++roundig mode: zero
++fsub.s ::
++input: 0.000000 -4578.500000
++output: 4578.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 456.250000 456.250000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 3.000000 34.031250
++output: -31.031250
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -1.000000 4578.750000
++output: -4579.750000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 1384.500000 175.000000
++output: 1209.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -7.250000 107.000000
++output: -114.250000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000448.000000
++fcsr: 0x1010100
++roundig mode: zero
++fsub.s ::
++input: -5786.500000 -7.250000
++output: -5779.250000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 1752.000000 -3478.500000
++output: 5230.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 0.015625 356.500000
++output: -356.484375
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 0.031250 -1.000000
++output: 1.031250
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -248562.750000 23.062500
++output: -248585.812500
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 456.000000 456.250000
++output: -0.250000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 34.031250 3.000000
++output: 31.031250
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 45786.750000 -1.000000
++output: 45787.750000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 1752065.000000 1384.500000
++output: 1750680.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 107.000000 -7.000000
++output: 114.000000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -45667.250000 100.000000
++output: -45767.250000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -7.000000 -5786.500000
++output: 5779.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: -347856.500000 1752.000000
++output: -349608.500000
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 356047.500000 0.015625
++output: 356047.468750
++fcsr: 0x1010100
++roundig mode: zero
++fsub.s ::
++input: -1.000000 0.031250
++output: -1.031250
++fcsr: 0x100
++roundig mode: zero
++fsub.s ::
++input: 23.062500 -248562.750000
++output: 248585.812500
++fcsr: 0x100
++roundig mode: +inf
++fsub.s ::
++input: 0.000000 -4578.500000
++output: 4578.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 456.250000 456.250000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 3.000000 34.031250
++output: -31.031250
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -1.000000 4578.750000
++output: -4579.750000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 1384.500000 175.000000
++output: 1209.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -7.250000 107.000000
++output: -114.250000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000512.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fsub.s ::
++input: -5786.500000 -7.250000
++output: -5779.250000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 1752.000000 -3478.500000
++output: 5230.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 0.015625 356.500000
++output: -356.484375
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 0.031250 -1.000000
++output: 1.031250
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -248562.750000 23.062500
++output: -248585.812500
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 456.000000 456.250000
++output: -0.250000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 34.031250 3.000000
++output: 31.031250
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 45786.750000 -1.000000
++output: 45787.750000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 1752065.000000 1384.500000
++output: 1750680.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 107.000000 -7.000000
++output: 114.000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -45667.250000 100.000000
++output: -45767.250000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -7.000000 -5786.500000
++output: 5779.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: -347856.500000 1752.000000
++output: -349608.500000
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x1010200
++roundig mode: +inf
++fsub.s ::
++input: -1.000000 0.031250
++output: -1.031250
++fcsr: 0x200
++roundig mode: +inf
++fsub.s ::
++input: 23.062500 -248562.750000
++output: 248585.812500
++fcsr: 0x200
++roundig mode: -inf
++fsub.s ::
++input: 0.000000 -4578.500000
++output: 4578.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 456.250000 456.250000
++output: -0.000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 3.000000 34.031250
++output: -31.031250
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -1.000000 4578.750000
++output: -4579.750000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 1384.500000 175.000000
++output: 1209.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -7.250000 107.000000
++output: -114.250000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000448.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fsub.s ::
++input: -5786.500000 -7.250000
++output: -5779.250000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 1752.000000 -3478.500000
++output: 5230.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 0.015625 356.500000
++output: -356.484375
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 0.031250 -1.000000
++output: 1.031250
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -248562.750000 23.062500
++output: -248585.812500
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 456.000000 456.250000
++output: -0.250000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 34.031250 3.000000
++output: 31.031250
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 45786.750000 -1.000000
++output: 45787.750000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 1752065.000000 1384.500000
++output: 1750680.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 107.000000 -7.000000
++output: 114.000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -45667.250000 100.000000
++output: -45767.250000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -7.000000 -5786.500000
++output: 5779.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: -347856.500000 1752.000000
++output: -349608.500000
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 356047.500000 0.015625
++output: 356047.468750
++fcsr: 0x1010300
++roundig mode: -inf
++fsub.s ::
++input: -1.000000 0.031250
++output: -1.031250
++fcsr: 0x300
++roundig mode: -inf
++fsub.s ::
++input: 23.062500 -248562.750000
++output: 248585.812500
++fcsr: 0x300
++roundig mode: near
++fsub.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 3.000000000000000 34.031250000000000
++output: -31.031250000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45787.750000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: -1750680.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -7.250000000000000 107.000000000000000
++output: -114.250000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000045667.250000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5779.250000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 349608.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 0.015625000000000 356047.500000000000000
++output: -356047.484375000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1.031250000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248585.812500000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 456.000000000000000 456.250000000000000
++output: -0.250000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 34.031250000000000 3.000000000000000
++output: 31.031250000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45787.750000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1750680.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 114.000000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -1000045667.250000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 5779.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -349608.500000000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.484375000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.031250000000000
++fcsr: 0
++roundig mode: near
++fsub.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 248585.812500000000000
++fcsr: 0
++roundig mode: zero
++fsub.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 3.000000000000000 34.031250000000000
++output: -31.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45787.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: -1750680.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -7.250000000000000 107.000000000000000
++output: -114.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000045667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5779.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 349608.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 0.015625000000000 356047.500000000000000
++output: -356047.484375000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248585.812500000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 456.000000000000000 456.250000000000000
++output: -0.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 34.031250000000000 3.000000000000000
++output: 31.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45787.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1750680.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 114.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -1000045667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 5779.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -349608.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.484375000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsub.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 248585.812500000000000
++fcsr: 0x100
++roundig mode: +inf
++fsub.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 3.000000000000000 34.031250000000000
++output: -31.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45787.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: -1750680.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -7.250000000000000 107.000000000000000
++output: -114.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000045667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5779.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 349608.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 0.015625000000000 356047.500000000000000
++output: -356047.484375000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248585.812500000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 456.000000000000000 456.250000000000000
++output: -0.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 34.031250000000000 3.000000000000000
++output: 31.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45787.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1750680.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 114.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -1000045667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 5779.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -349608.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.484375000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsub.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 248585.812500000000000
++fcsr: 0x200
++roundig mode: -inf
++fsub.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 456.250000000000000 456.250000000000000
++output: -0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 3.000000000000000 34.031250000000000
++output: -31.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45787.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: -1750680.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -7.250000000000000 107.000000000000000
++output: -114.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000045667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5779.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 349608.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 0.015625000000000 356047.500000000000000
++output: -356047.484375000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248585.812500000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 456.000000000000000 456.250000000000000
++output: -0.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 34.031250000000000 3.000000000000000
++output: 31.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45787.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1750680.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 114.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -1000045667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 5779.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -349608.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.484375000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsub.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 248585.812500000000000
++fcsr: 0x300
++roundig mode: near
++fmul.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 456.250000 456.250000
++output: 208164.062500
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 3.000000 34.031250
++output: 102.093750
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -1.000000 4578.750000
++output: -4578.750000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 1384.500000 175.000000
++output: 242287.500000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -7.250000 107.000000
++output: -775.750000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 1000000000.000000 -456.250000
++output: -456249999360.000000
++fcsr: 0x1010000
++roundig mode: near
++fmul.s ::
++input: -5786.500000 -7.250000
++output: 41952.125000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 1752.000000 -3478.500000
++output: -6094332.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 0.015625 356.500000
++output: 5.570312
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -248562.750000 23.062500
++output: -5732478.500000
++fcsr: 0x1010000
++roundig mode: near
++fmul.s ::
++input: -45786.500000 0.000000
++output: -0.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 456.000000 456.250000
++output: 208050.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 34.031250 3.000000
++output: 102.093750
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 1752065.000000 1384.500000
++output: 2425733888.000000
++fcsr: 0x1010000
++roundig mode: near
++fmul.s ::
++input: 107.000000 -7.000000
++output: -749.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -45667.250000 100.000000
++output: -4566725.000000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -7.000000 -5786.500000
++output: 40505.500000
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -347856.500000 1752.000000
++output: -609444608.000000
++fcsr: 0x1010000
++roundig mode: near
++fmul.s ::
++input: 356047.500000 0.015625
++output: 5563.242188
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: -1.000000 0.031250
++output: -0.031250
++fcsr: 0
++roundig mode: near
++fmul.s ::
++input: 23.062500 -248562.750000
++output: -5732478.500000
++fcsr: 0x1010000
++roundig mode: zero
++fmul.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 456.250000 456.250000
++output: 208164.062500
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 3.000000 34.031250
++output: 102.093750
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -1.000000 4578.750000
++output: -4578.750000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 1384.500000 175.000000
++output: 242287.500000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -7.250000 107.000000
++output: -775.750000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 1000000000.000000 -456.250000
++output: -456249999360.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmul.s ::
++input: -5786.500000 -7.250000
++output: 41952.125000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 1752.000000 -3478.500000
++output: -6094332.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 0.015625 356.500000
++output: 5.570312
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -248562.750000 23.062500
++output: -5732478.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmul.s ::
++input: -45786.500000 0.000000
++output: -0.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 456.000000 456.250000
++output: 208050.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 34.031250 3.000000
++output: 102.093750
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 1752065.000000 1384.500000
++output: 2425733888.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmul.s ::
++input: 107.000000 -7.000000
++output: -749.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -45667.250000 100.000000
++output: -4566725.000000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -7.000000 -5786.500000
++output: 40505.500000
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -347856.500000 1752.000000
++output: -609444544.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmul.s ::
++input: 356047.500000 0.015625
++output: 5563.242187
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: -1.000000 0.031250
++output: -0.031250
++fcsr: 0x100
++roundig mode: zero
++fmul.s ::
++input: 23.062500 -248562.750000
++output: -5732478.000000
++fcsr: 0x1010100
++roundig mode: +inf
++fmul.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 456.250000 456.250000
++output: 208164.062500
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 3.000000 34.031250
++output: 102.093750
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -1.000000 4578.750000
++output: -4578.750000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 1384.500000 175.000000
++output: 242287.500000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -7.250000 107.000000
++output: -775.750000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 1000000000.000000 -456.250000
++output: -456249999360.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmul.s ::
++input: -5786.500000 -7.250000
++output: 41952.125000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 1752.000000 -3478.500000
++output: -6094332.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 0.015625 356.500000
++output: 5.570313
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -248562.750000 23.062500
++output: -5732478.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmul.s ::
++input: -45786.500000 0.000000
++output: -0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 456.000000 456.250000
++output: 208050.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 34.031250 3.000000
++output: 102.093750
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 1752065.000000 1384.500000
++output: 2425734144.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmul.s ::
++input: 107.000000 -7.000000
++output: -749.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -45667.250000 100.000000
++output: -4566725.000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -7.000000 -5786.500000
++output: 40505.500000
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -347856.500000 1752.000000
++output: -609444544.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmul.s ::
++input: 356047.500000 0.015625
++output: 5563.242188
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: -1.000000 0.031250
++output: -0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmul.s ::
++input: 23.062500 -248562.750000
++output: -5732478.000000
++fcsr: 0x1010200
++roundig mode: -inf
++fmul.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 456.250000 456.250000
++output: 208164.062500
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 3.000000 34.031250
++output: 102.093750
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -1.000000 4578.750000
++output: -4578.750000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 1384.500000 175.000000
++output: 242287.500000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -7.250000 107.000000
++output: -775.750000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 1000000000.000000 -456.250000
++output: -456250032128.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmul.s ::
++input: -5786.500000 -7.250000
++output: 41952.125000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 1752.000000 -3478.500000
++output: -6094332.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 0.015625 356.500000
++output: 5.570312
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -248562.750000 23.062500
++output: -5732478.500000
++fcsr: 0x1010300
++roundig mode: -inf
++fmul.s ::
++input: -45786.500000 0.000000
++output: -0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 456.000000 456.250000
++output: 208050.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 34.031250 3.000000
++output: 102.093750
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 1752065.000000 1384.500000
++output: 2425733888.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmul.s ::
++input: 107.000000 -7.000000
++output: -749.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -45667.250000 100.000000
++output: -4566725.000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -7.000000 -5786.500000
++output: 40505.500000
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -347856.500000 1752.000000
++output: -609444608.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmul.s ::
++input: 356047.500000 0.015625
++output: 5563.242187
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: -1.000000 0.031250
++output: -0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmul.s ::
++input: 23.062500 -248562.750000
++output: -5732478.500000
++fcsr: 0x1010300
++roundig mode: near
++fmul.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 456.250000000000000 456.250000000000000
++output: 208164.062500000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 3.000000000000000 34.031250000000000
++output: 102.093750000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -7.250000000000000 107.000000000000000
++output: -775.750000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667250000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 41952.125000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -609444588.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 5563.242187500000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -5732478.421875000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -0.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 456.000000000000000 456.250000000000000
++output: 208050.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 34.031250000000000 3.000000000000000
++output: 102.093750000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -749.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667250000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 40505.500000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -609444588.000000000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 5563.242187500000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.031250000000000
++fcsr: 0
++roundig mode: near
++fmul.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -5732478.421875000000000
++fcsr: 0
++roundig mode: zero
++fmul.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 456.250000000000000 456.250000000000000
++output: 208164.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 3.000000000000000 34.031250000000000
++output: 102.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -7.250000000000000 107.000000000000000
++output: -775.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 41952.125000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -609444588.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 5563.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -5732478.421875000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 456.000000000000000 456.250000000000000
++output: 208050.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 34.031250000000000 3.000000000000000
++output: 102.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -749.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 40505.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -609444588.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 5563.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmul.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -5732478.421875000000000
++fcsr: 0x100
++roundig mode: +inf
++fmul.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 456.250000000000000 456.250000000000000
++output: 208164.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 3.000000000000000 34.031250000000000
++output: 102.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -7.250000000000000 107.000000000000000
++output: -775.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 41952.125000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -609444588.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 5563.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -5732478.421875000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 456.000000000000000 456.250000000000000
++output: 208050.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 34.031250000000000 3.000000000000000
++output: 102.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -749.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 40505.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -609444588.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 5563.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmul.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -5732478.421875000000000
++fcsr: 0x200
++roundig mode: -inf
++fmul.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 456.250000000000000 456.250000000000000
++output: 208164.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 3.000000000000000 34.031250000000000
++output: 102.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -7.250000000000000 107.000000000000000
++output: -775.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 41952.125000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -609444588.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 5563.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -5732478.421875000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 456.000000000000000 456.250000000000000
++output: 208050.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 34.031250000000000 3.000000000000000
++output: 102.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -749.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667250000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 40505.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -609444588.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 5563.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: -1.000000000000000 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmul.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -5732478.421875000000000
++fcsr: 0x300
++roundig mode: near
++fdiv.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: 456.250000 456.250000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: 3.000000 34.031250
++output: 0.088154
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -1.000000 4578.750000
++output: -0.000218
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 1384.500000 175.000000
++output: 7.911428
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -7.250000 107.000000
++output: -0.067757
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 1000000000.000000 -456.250000
++output: -2191780.750000
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -5786.500000 -7.250000
++output: 798.137939
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 1752.000000 -3478.500000
++output: -0.503665
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 0.015625 356.500000
++output: 0.000044
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: -248562.750000 23.062500
++output: -10777.789062
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -45786.500000 0.000000
++output: -inf
++fcsr: 0x8080000
++roundig mode: near
++fdiv.s ::
++input: 456.000000 456.250000
++output: 0.999452
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 34.031250 3.000000
++output: 11.343750
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: 1752065.000000 1384.500000
++output: 1265.485718
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 107.000000 -7.000000
++output: -15.285714
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -45667.250000 100.000000
++output: -456.672485
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -7.000000 -5786.500000
++output: 0.001210
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: -347856.500000 1752.000000
++output: -198.548233
++fcsr: 0x1010000
++roundig mode: near
++fdiv.s ::
++input: 356047.500000 0.015625
++output: 22787040.000000
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: -1.000000 0.031250
++output: -32.000000
++fcsr: 0
++roundig mode: near
++fdiv.s ::
++input: 23.062500 -248562.750000
++output: -0.000093
++fcsr: 0x1010000
++roundig mode: zero
++fdiv.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: 456.250000 456.250000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: 3.000000 34.031250
++output: 0.088154
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -1.000000 4578.750000
++output: -0.000218
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 1384.500000 175.000000
++output: 7.911428
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -7.250000 107.000000
++output: -0.067757
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 1000000000.000000 -456.250000
++output: -2191780.750000
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -5786.500000 -7.250000
++output: 798.137878
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 1752.000000 -3478.500000
++output: -0.503665
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 0.015625 356.500000
++output: 0.000043
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: -248562.750000 23.062500
++output: -10777.788085
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -45786.500000 0.000000
++output: -inf
++fcsr: 0x8080100
++roundig mode: zero
++fdiv.s ::
++input: 456.000000 456.250000
++output: 0.999452
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 34.031250 3.000000
++output: 11.343750
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: 1752065.000000 1384.500000
++output: 1265.485717
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 107.000000 -7.000000
++output: -15.285714
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -45667.250000 100.000000
++output: -456.672485
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -7.000000 -5786.500000
++output: 0.001209
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: -347856.500000 1752.000000
++output: -198.548217
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.s ::
++input: 356047.500000 0.015625
++output: 22787040.000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: -1.000000 0.031250
++output: -32.000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.s ::
++input: 23.062500 -248562.750000
++output: -0.000092
++fcsr: 0x1010100
++roundig mode: +inf
++fdiv.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: 456.250000 456.250000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: 3.000000 34.031250
++output: 0.088155
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -1.000000 4578.750000
++output: -0.000218
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 1384.500000 175.000000
++output: 7.911429
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -7.250000 107.000000
++output: -0.067757
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 1000000000.000000 -456.250000
++output: -2191780.750000
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -5786.500000 -7.250000
++output: 798.137940
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 1752.000000 -3478.500000
++output: -0.503665
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 0.015625 356.500000
++output: 0.000044
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: -248562.750000 23.062500
++output: -10777.788085
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -45786.500000 0.000000
++output: -inf
++fcsr: 0x8080200
++roundig mode: +inf
++fdiv.s ::
++input: 456.000000 456.250000
++output: 0.999453
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 34.031250 3.000000
++output: 11.343750
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: 1752065.000000 1384.500000
++output: 1265.485840
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 107.000000 -7.000000
++output: -15.285714
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -45667.250000 100.000000
++output: -456.672485
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -7.000000 -5786.500000
++output: 0.001210
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: -347856.500000 1752.000000
++output: -198.548217
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.s ::
++input: 356047.500000 0.015625
++output: 22787040.000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: -1.000000 0.031250
++output: -32.000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.s ::
++input: 23.062500 -248562.750000
++output: -0.000092
++fcsr: 0x1010200
++roundig mode: -inf
++fdiv.s ::
++input: 0.000000 -4578.500000
++output: -0.000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: 456.250000 456.250000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: 3.000000 34.031250
++output: 0.088154
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -1.000000 4578.750000
++output: -0.000219
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 1384.500000 175.000000
++output: 7.911428
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -7.250000 107.000000
++output: -0.067758
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 1000000000.000000 -456.250000
++output: -2191781.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -5786.500000 -7.250000
++output: 798.137878
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 1752.000000 -3478.500000
++output: -0.503666
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 0.015625 356.500000
++output: 0.000043
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 0.031250 -1.000000
++output: -0.031250
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: -248562.750000 23.062500
++output: -10777.789063
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -45786.500000 0.000000
++output: -inf
++fcsr: 0x8080300
++roundig mode: -inf
++fdiv.s ::
++input: 456.000000 456.250000
++output: 0.999452
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 34.031250 3.000000
++output: 11.343750
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: 45786.750000 -1.000000
++output: -45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: 1752065.000000 1384.500000
++output: 1265.485717
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 107.000000 -7.000000
++output: -15.285716
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -45667.250000 100.000000
++output: -456.672516
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -7.000000 -5786.500000
++output: 0.001209
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: -347856.500000 1752.000000
++output: -198.548234
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.s ::
++input: 356047.500000 0.015625
++output: 22787040.000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: -1.000000 0.031250
++output: -32.000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.s ::
++input: 23.062500 -248562.750000
++output: -0.000093
++fcsr: 0x1010300
++roundig mode: near
++fdiv.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0.088154269972452
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -0.000021840379586
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0.000790210408860
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -7.250000000000000 107.000000000000000
++output: -0.067757009345794
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -21897.530505997186992
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 798.137931034482790
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -0.005036559615819
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.000000043884594
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -10777.788617886179054
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -inf
++fcsr: 0x8080000
++roundig mode: near
++fdiv.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0.999452054794521
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 34.031250000000000 3.000000000000000
++output: 11.343750000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1265.485734922354595
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -15.285714285714286
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -0.000045667250000
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0.001209712261298
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -198.548230593607315
++fcsr: 0x1010000
++roundig mode: near
++fdiv.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 22787040.000000000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: -1.000000000000000 0.031250000000000
++output: -32.000000000000000
++fcsr: 0
++roundig mode: near
++fdiv.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -0.000092783411835
++fcsr: 0x1010000
++roundig mode: zero
++fdiv.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0.088154269972451
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -0.000021840379585
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0.000790210408860
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -7.250000000000000 107.000000000000000
++output: -0.067757009345794
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -21897.530505997183354
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 798.137931034482676
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -0.005036559615818
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.000000043884594
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -10777.788617886177235
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -inf
++fcsr: 0x8080100
++roundig mode: zero
++fdiv.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0.999452054794520
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 34.031250000000000 3.000000000000000
++output: 11.343750000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1265.485734922354595
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -15.285714285714284
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -0.000045667249999
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0.001209712261297
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -198.548230593607286
++fcsr: 0x1010100
++roundig mode: zero
++fdiv.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 22787040.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: -1.000000000000000 0.031250000000000
++output: -32.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fdiv.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -0.000092783411834
++fcsr: 0x1010100
++roundig mode: +inf
++fdiv.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0.088154269972452
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -0.000021840379585
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0.000790210408861
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -7.250000000000000 107.000000000000000
++output: -0.067757009345794
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -21897.530505997183354
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 798.137931034482790
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -0.005036559615818
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.000000043884595
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -10777.788617886177235
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -inf
++fcsr: 0x8080200
++roundig mode: +inf
++fdiv.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0.999452054794521
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 34.031250000000000 3.000000000000000
++output: 11.343750000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1265.485734922354823
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -15.285714285714284
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -0.000045667249999
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0.001209712261298
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -198.548230593607286
++fcsr: 0x1010200
++roundig mode: +inf
++fdiv.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 22787040.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: -1.000000000000000 0.031250000000000
++output: -32.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fdiv.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -0.000092783411834
++fcsr: 0x1010200
++roundig mode: -inf
++fdiv.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0.088154269972451
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -0.000021840379586
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0.000790210408860
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -7.250000000000000 107.000000000000000
++output: -0.067757009345795
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -21897.530505997186993
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 798.137931034482676
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -0.005036559615819
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.000000043884594
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -10777.788617886179055
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -inf
++fcsr: 0x8080300
++roundig mode: -inf
++fdiv.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0.999452054794520
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 34.031250000000000 3.000000000000000
++output: 11.343750000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1265.485734922354595
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -15.285714285714287
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -0.000045667250001
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0.001209712261297
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -198.548230593607315
++fcsr: 0x1010300
++roundig mode: -inf
++fdiv.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 22787040.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: -1.000000000000000 0.031250000000000
++output: -32.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fdiv.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -0.000092783411835
++fcsr: 0x1010300
++roundig mode: near
++fmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: 564211.562500
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: 101.093750
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4555.687500
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 244039.500000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.734375
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010000
++roundig mode: near
++fmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -206610.625000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6092947.500000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: -1.679688
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x1010000
++roundig mode: near
++fmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5738265.000000
++fcsr: 0x1010000
++roundig mode: near
++fmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: 564097.750000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: 101.093750
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45763.718750
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010000
++roundig mode: near
++fmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: -292.750000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566722.000000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40504.500000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609490368.000000
++fcsr: 0x1010000
++roundig mode: near
++fmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: 6019.242188
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.000000
++fcsr: 0
++roundig mode: near
++fmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5686692.500000
++fcsr: 0x1010000
++roundig mode: zero
++fmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: 564211.562500
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: 101.093750
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4555.687500
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 244039.500000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.734375
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -206610.625000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6092947.500000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: -1.679687
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 999999936.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5738264.500000
++fcsr: 0x1010100
++roundig mode: zero
++fmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: 564097.750000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: 101.093750
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45763.718750
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: -292.750000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566722.000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40504.500000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609490368.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: 6019.242187
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5686692.000000
++fcsr: 0x1010100
++roundig mode: +inf
++fmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: 564211.562500
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: 101.093750
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4555.687500
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 244039.500000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.734375
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -206610.625000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6092947.500000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: -1.679687
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5738264.500000
++fcsr: 0x1010200
++roundig mode: +inf
++fmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: 564097.750000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: 101.093750
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45763.718750
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425734144.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: -292.750000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566722.000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40504.500000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609490368.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: 6019.242188
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5686692.000000
++fcsr: 0x1010200
++roundig mode: -inf
++fmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: 564211.562500
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: 101.093750
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4555.687500
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 244039.500000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.734375
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456250032128.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -206610.625000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6092947.500000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: -1.679688
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 999999936.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5738265.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: 564097.750000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: 101.093750
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45763.718750
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: -292.750000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566722.000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40504.500000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609490432.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: 6019.242187
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5686692.500000
++fcsr: 0x1010300
++roundig mode: near
++fmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 564211.562500000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45763.687500000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425735744.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.734375000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667249999999.968750000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -206610.625000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609443203.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5555.992187500000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 999999999.968750000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5738264.921875000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 564097.750000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45763.718750000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -292.750000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667249999997.000000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40504.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609490374.500000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 6019.242187500000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.000000000000000
++fcsr: 0
++roundig mode: near
++fmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5686692.390625000000000
++fcsr: 0
++roundig mode: zero
++fmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 564211.562500000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45763.687500000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425735744.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.734375000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667249999999.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -206610.625000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609443203.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5555.992187500000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 999999999.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5738264.921875000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 564097.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45763.718750000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -292.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667249999997.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40504.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609490374.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 6019.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5686692.390625000000000
++fcsr: 0x100
++roundig mode: +inf
++fmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 564211.562500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45763.687500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425735744.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.734375000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667249999999.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -206610.625000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609443203.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5555.992187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 999999999.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5738264.921875000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 564097.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45763.718750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -292.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667249999997.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40504.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609490374.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 6019.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5686692.390625000000000
++fcsr: 0x200
++roundig mode: -inf
++fmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 564211.562500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45763.687500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425735744.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.734375000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667249999999.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -206610.625000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609443203.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5555.992187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 999999999.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5738264.921875000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 564097.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 101.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45763.718750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -292.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667249999997.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40504.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609490374.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 6019.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5686692.390625000000000
++fcsr: 0x300
++roundig mode: near
++fmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: -147883.437500
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: 103.093750
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4601.812500
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 240535.500000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.765625
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010000
++roundig mode: near
++fmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 290514.875000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6095716.500000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: 12.820312
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x1010000
++roundig mode: near
++fmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5726692.000000
++fcsr: 0x1010000
++roundig mode: near
++fmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: -147997.750000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: 103.093750
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45809.781250
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010000
++roundig mode: near
++fmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: -1205.250000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566728.000000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40506.500000
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609398784.000000
++fcsr: 0x1010000
++roundig mode: near
++fmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: 5107.242188
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.062500
++fcsr: 0
++roundig mode: near
++fmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5778264.500000
++fcsr: 0x1010000
++roundig mode: zero
++fmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: -147883.437500
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: 103.093750
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4601.812500
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 240535.500000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.765625
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 290514.875000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6095716.500000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: 12.820312
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5726691.500000
++fcsr: 0x1010100
++roundig mode: zero
++fmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: -147997.750000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: 103.093750
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45809.781250
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: -1205.250000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566728.000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40506.500000
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609398784.000000
++fcsr: 0x1010100
++roundig mode: zero
++fmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: 5107.242187
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.062500
++fcsr: 0x100
++roundig mode: zero
++fmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5778264.000000
++fcsr: 0x1010100
++roundig mode: +inf
++fmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: -147883.437500
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: 103.093750
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4601.812500
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 240535.500000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.765625
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456249999360.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 290514.875000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6095716.500000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: 12.820313
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5726691.500000
++fcsr: 0x1010200
++roundig mode: +inf
++fmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: -147997.750000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: 103.093750
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45809.781250
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425734144.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: -1205.250000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566728.000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40506.500000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609398784.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: 5107.242188
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.062500
++fcsr: 0x200
++roundig mode: +inf
++fmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5778264.000000
++fcsr: 0x1010200
++roundig mode: -inf
++fmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: -147883.437500
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: 103.093750
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: -4601.812500
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: 240535.500000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: -775.765625
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: -456250032128.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 290514.875000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: -6095716.500000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: 12.820312
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000064.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: -5726692.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: -147997.750000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: 103.093750
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: -45809.781250
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: 2425733888.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: -1205.250000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: -4566728.000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: 40506.500000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: -609398848.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: 5107.242187
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.062500
++fcsr: 0x300
++roundig mode: -inf
++fmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: -5778264.500000
++fcsr: 0x1010300
++roundig mode: near
++fmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -147883.437500000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45809.812500000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425732240.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.765625000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667250000000.031250000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 290514.875000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609445972.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5570.492187500000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -1000000000.031250000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5726691.921875000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -147997.750000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45809.781250000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -1205.250000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667250000003.000000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40506.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609398801.500000000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 5107.242187500000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.062500000000000
++fcsr: 0
++roundig mode: near
++fmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5778264.453125000000000
++fcsr: 0
++roundig mode: zero
++fmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -147883.437500000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45809.812500000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425732240.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.765625000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667250000000.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 290514.875000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609445972.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5570.492187500000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -1000000000.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5726691.921875000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -147997.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45809.781250000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -1205.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667250000003.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40506.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609398801.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 5107.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5778264.453125000000000
++fcsr: 0x100
++roundig mode: +inf
++fmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -147883.437500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45809.812500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425732240.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.765625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667250000000.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 290514.875000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609445972.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5570.492187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -1000000000.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5726691.921875000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -147997.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45809.781250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -1205.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667250000003.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40506.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609398801.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 5107.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5778264.453125000000000
++fcsr: 0x200
++roundig mode: -inf
++fmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -147883.437500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: -45809.812500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: 2425732240.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: -775.765625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: -45667250000000.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 290514.875000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: -609445972.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: 5570.492187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -1000000000.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: -5726691.921875000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -147997.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: 103.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: -45809.781250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: 2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: -1205.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: -45667250000003.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: 40506.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: -609398801.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: 5107.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: -5778264.453125000000000
++fcsr: 0x300
++roundig mode: near
++fnmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: -564211.562500
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: -101.093750
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4555.687500
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -244039.500000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.734375
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 206610.625000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6092947.500000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: 1.679688
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5738265.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: -564097.750000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: -101.093750
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45763.718750
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: 292.750000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566722.000000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40504.500000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609490368.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: -6019.242188
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.000000
++fcsr: 0
++roundig mode: near
++fnmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5686692.500000
++fcsr: 0x1010000
++roundig mode: zero
++fnmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: -564211.562500
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: -101.093750
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4555.687500
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -244039.500000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.734375
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 206610.625000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6092947.500000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: 1.679687
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -999999936.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5738264.500000
++fcsr: 0x1010100
++roundig mode: zero
++fnmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: -564097.750000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: -101.093750
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45763.718750
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: 292.750000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566722.000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40504.500000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609490368.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: -6019.242187
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5686692.000000
++fcsr: 0x1010100
++roundig mode: +inf
++fnmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: -564211.562500
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: -101.093750
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4555.687500
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -244039.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.734375
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 206610.625000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6092947.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: 1.679688
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5738264.500000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: -564097.750000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: -101.093750
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45763.718750
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425734144.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: 292.750000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566722.000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40504.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609490368.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: -6019.242187
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5686692.000000
++fcsr: 0x1010200
++roundig mode: -inf
++fnmadd.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: 347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 456.250000 456.250000 356047.500000
++output: -564211.562500
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 3.000000 34.031250 -1.000000
++output: -101.093750
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4555.687500
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -244039.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.734375
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456250032128.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmadd.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: 206610.625000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6092947.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 0.015625 356.500000 -7.250000
++output: 1.679687
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: -999999936.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmadd.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5738265.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmadd.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: 347856.750000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 456.000000 456.250000 356047.750000
++output: -564097.750000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 34.031250 3.000000 -1.000000
++output: -101.093750
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45763.718750
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmadd.s ::
++input: 107.000000 -7.000000 456.250000
++output: 292.750000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566722.000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40504.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609490432.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmadd.s ::
++input: 356047.500000 0.015625 456.000000
++output: -6019.242188
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: -1.000000 0.031250 34.031250
++output: -34.000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5686692.500000
++fcsr: 0x1010300
++roundig mode: near
++fnmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -564211.562500000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45763.687500000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425735744.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.734375000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667249999999.968750000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 206610.625000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609443203.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5555.992187500000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -999999999.968750000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5738264.921875000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -564097.750000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45763.718750000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 292.750000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667249999997.000000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40504.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609490374.500000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -6019.242187500000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.000000000000000
++fcsr: 0
++roundig mode: near
++fnmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5686692.390625000000000
++fcsr: 0
++roundig mode: zero
++fnmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -564211.562500000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45763.687500000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425735744.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.734375000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667249999999.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 206610.625000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609443203.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5555.992187500000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -999999999.968750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5738264.921875000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -564097.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45763.718750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 292.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667249999997.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40504.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609490374.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -6019.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5686692.390625000000000
++fcsr: 0x100
++roundig mode: +inf
++fnmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -564211.562500000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45763.687500000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425735744.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.734375000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667249999999.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 206610.625000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609443203.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5555.992187500000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -999999999.968750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5738264.921875000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -564097.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45763.718750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 292.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667249999997.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40504.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609490374.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -6019.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5686692.390625000000000
++fcsr: 0x200
++roundig mode: -inf
++fnmadd.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: -564211.562500000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45763.687500000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425735744.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.734375000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667249999999.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: 206610.625000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609443203.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5555.992187500000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: -999999999.968750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5738264.921875000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: 347856.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: -564097.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -101.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45763.718750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 292.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667249999997.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40504.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609490374.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -6019.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: -34.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmadd.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5686692.390625000000000
++fcsr: 0x300
++roundig mode: near
++fnmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: 147883.437500
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: -103.093750
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4601.812500
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -240535.500000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.765625
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -290514.875000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6095716.500000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: -12.820312
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5726692.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: 147997.750000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: -103.093750
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45809.781250
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: 1205.250000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566728.000000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40506.500000
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609398784.000000
++fcsr: 0x1010000
++roundig mode: near
++fnmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: -5107.242188
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.062500
++fcsr: 0
++roundig mode: near
++fnmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5778264.500000
++fcsr: 0x1010000
++roundig mode: zero
++fnmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: 147883.437500
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: -103.093750
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4601.812500
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -240535.500000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.765625
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -290514.875000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6095716.500000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: -12.820312
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5726691.500000
++fcsr: 0x1010100
++roundig mode: zero
++fnmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: 147997.750000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: -103.093750
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45809.781250
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: 1205.250000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566728.000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40506.500000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609398784.000000
++fcsr: 0x1010100
++roundig mode: zero
++fnmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: -5107.242187
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.062500
++fcsr: 0x100
++roundig mode: zero
++fnmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5778264.000000
++fcsr: 0x1010100
++roundig mode: +inf
++fnmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: 147883.437500
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: -103.093750
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4601.812500
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -240535.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.765625
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456249999360.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -290514.875000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6095716.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: -12.820312
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5726691.500000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: 147997.750000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: -103.093750
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45809.781250
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425734144.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: 1205.250000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566728.000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40506.500000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609398784.000000
++fcsr: 0x1010200
++roundig mode: +inf
++fnmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: -5107.242187
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.062500
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5778264.000000
++fcsr: 0x1010200
++roundig mode: -inf
++fnmsub.s ::
++input: 0.000000 -4578.500000 -347856.500000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 456.250000 456.250000 356047.500000
++output: 147883.437500
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 3.000000 34.031250 -1.000000
++output: -103.093750
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -1.000000 4578.750000 23.062500
++output: 4601.812500
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 1384.500000 175.000000 1752.000000
++output: -240535.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -7.250000 107.000000 0.015625
++output: 775.765625
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 1000000000.000000 -456.250000 0.031250
++output: 456250032128.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmsub.s ::
++input: -5786.500000 -7.250000 -248562.750000
++output: -290514.875000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 1752.000000 -3478.500000 1384.500000
++output: 6095716.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 0.015625 356.500000 -7.250000
++output: -12.820313
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 0.031250 -1.000000 1000000000.000000
++output: 1000000064.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmsub.s ::
++input: -248562.750000 23.062500 -5786.500000
++output: 5726692.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmsub.s ::
++input: -45786.500000 0.000000 -347856.750000
++output: -347856.750000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 456.000000 456.250000 356047.750000
++output: 147997.750000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 34.031250 3.000000 -1.000000
++output: -103.093750
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 45786.750000 -1.000000 23.031250
++output: 45809.781250
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 1752065.000000 1384.500000 0.000000
++output: -2425733888.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmsub.s ::
++input: 107.000000 -7.000000 456.250000
++output: 1205.250000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -45667.250000 100.000000 3.000000
++output: 4566728.000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -7.000000 -5786.500000 -1.000000
++output: -40506.500000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -347856.500000 1752.000000 -45786.500000
++output: 609398848.000000
++fcsr: 0x1010300
++roundig mode: -inf
++fnmsub.s ::
++input: 356047.500000 0.015625 456.000000
++output: -5107.242188
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: -1.000000 0.031250 34.031250
++output: 34.062500
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.s ::
++input: 23.062500 -248562.750000 45786.031250
++output: 5778264.500000
++fcsr: 0x1010300
++roundig mode: near
++fnmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 147883.437500000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45809.812500000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425732240.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.765625000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667250000000.031250000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -290514.875000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609445972.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5570.492187500000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 1000000000.031250000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5726691.921875000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 147997.750000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45809.781250000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 1205.250000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667250000003.000000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40506.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609398801.500000000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -5107.242187500000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.062500000000000
++fcsr: 0
++roundig mode: near
++fnmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5778264.453125000000000
++fcsr: 0
++roundig mode: zero
++fnmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 147883.437500000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45809.812500000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425732240.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.765625000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667250000000.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -290514.875000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609445972.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5570.492187500000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 1000000000.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5726691.921875000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 147997.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45809.781250000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 1205.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667250000003.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40506.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609398801.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -5107.242187500000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fnmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5778264.453125000000000
++fcsr: 0x100
++roundig mode: +inf
++fnmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 147883.437500000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45809.812500000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425732240.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.765625000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667250000000.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -290514.875000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609445972.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5570.492187500000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 1000000000.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5726691.921875000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 147997.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45809.781250000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 1205.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667250000003.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40506.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609398801.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -5107.242187500000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fnmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5778264.453125000000000
++fcsr: 0x200
++roundig mode: -inf
++fnmsub.d ::
++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 456.250000000000000 456.250000000000000 356047.500000000000000
++output: 147883.437500000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 3.000000000000000 34.031250000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -1.000000000000000 45786.750000000000000 23.062500000000000
++output: 45809.812500000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000
++output: -2425732240.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -7.250000000000000 107.000000000000000 0.015625000000000
++output: 775.765625000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000
++output: 45667250000000.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000
++output: -290514.875000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000
++output: 609445972.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 0.015625000000000 356047.500000000000000 -7.250000000000000
++output: -5570.492187500000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000
++output: 1000000000.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000
++output: 5726691.921875000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000
++output: -347856.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 456.000000000000000 456.250000000000000 356047.750000000000000
++output: 147997.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 34.031250000000000 3.000000000000000 -1.000000000000000
++output: -103.093750000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 45786.750000000000000 -1.000000000000000 23.031250000000000
++output: 45809.781250000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000
++output: -2425733992.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 107.000000000000000 -7.000000000000000 456.250000000000000
++output: 1205.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000
++output: 45667250000003.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000
++output: -40506.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000
++output: 609398801.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 356047.500000000000000 0.015625000000000 456.000000000000000
++output: -5107.242187500000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: -1.000000000000000 0.031250000000000 34.031250000000000
++output: 34.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fnmsub.d ::
++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000
++output: 5778264.453125000000000
++fcsr: 0x300
++roundig mode: near
++fmax.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fmax.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0
++roundig mode: zero
++fmax.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fmax.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x100
++roundig mode: +inf
++fmax.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmax.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x200
++roundig mode: -inf
++fmax.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmax.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x300
++roundig mode: near
++fmax.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fmax.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: zero
++fmax.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmax.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fmax.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmax.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fmax.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmax.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: near
++fmin.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmin.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0
++roundig mode: zero
++fmin.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmin.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x100
++roundig mode: +inf
++fmin.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x200
++roundig mode: -inf
++fmin.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x300
++roundig mode: near
++fmin.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmin.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: zero
++fmin.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmin.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: +inf
++fmin.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmin.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: -inf
++fmin.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmin.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: near
++fmaxa.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmaxa.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0
++roundig mode: zero
++fmaxa.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x100
++roundig mode: +inf
++fmaxa.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x200
++roundig mode: -inf
++fmaxa.s ::
++input: 0.000000 -4578.500000
++output: -4578.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 3.000000 34.031250
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -1.000000 4578.750000
++output: 4578.750000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 1384.500000 175.000000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -7.250000 107.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 1000000000.000000 -456.250000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -5786.500000 -7.250000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 1752.000000 -3478.500000
++output: -3478.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 0.015625 356.500000
++output: 356.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 0.031250 -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -248562.750000 23.062500
++output: -248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 456.000000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 34.031250 3.000000
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 45786.750000 -1.000000
++output: 45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 1752065.000000 1384.500000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 107.000000 -7.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -45667.250000 100.000000
++output: -45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -7.000000 -5786.500000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -347856.500000 1752.000000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 356047.500000 0.015625
++output: 356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: -1.000000 0.031250
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.s ::
++input: 23.062500 -248562.750000
++output: -248562.750000
++fcsr: 0x300
++roundig mode: near
++fmaxa.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmaxa.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: zero
++fmaxa.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmaxa.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: +inf
++fmaxa.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmaxa.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: -inf
++fmaxa.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 3.000000000000000 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -7.250000000000000 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 0.031250000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 34.031250000000000 3.000000000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: -1.000000000000000 0.031250000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmaxa.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: near
++fmina.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fmina.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0
++roundig mode: zero
++fmina.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fmina.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x100
++roundig mode: +inf
++fmina.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmina.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x200
++roundig mode: -inf
++fmina.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 456.250000 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 3.000000 34.031250
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -1.000000 4578.750000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 1384.500000 175.000000
++output: 175.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -7.250000 107.000000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 1000000000.000000 -456.250000
++output: -456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -5786.500000 -7.250000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 1752.000000 -3478.500000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 0.015625 356.500000
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 0.031250 -1.000000
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -248562.750000 23.062500
++output: 23.062500
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -45786.500000 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 456.000000 456.250000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 34.031250 3.000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 45786.750000 -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 1752065.000000 1384.500000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 107.000000 -7.000000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -45667.250000 100.000000
++output: 100.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -7.000000 -5786.500000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -347856.500000 1752.000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 356047.500000 0.015625
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: -1.000000 0.031250
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmina.s ::
++input: 23.062500 -248562.750000
++output: 23.062500
++fcsr: 0x300
++roundig mode: near
++fmina.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fmina.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: zero
++fmina.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmina.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fmina.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmina.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fmina.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 456.250000000000000 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 3.000000000000000 34.031250000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -7.250000000000000 107.000000000000000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 456.000000000000000 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 34.031250000000000 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 107.000000000000000 -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmina.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: near
++fabs.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++fabs.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0
++roundig mode: zero
++fabs.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++fabs.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x100
++roundig mode: +inf
++fabs.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x200
++roundig mode: -inf
++fabs.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x300
++roundig mode: near
++fabs.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++fabs.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: zero
++fabs.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fabs.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fabs.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fabs.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fabs.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fabs.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: near
++fneg.s ::
++input: 0.000000
++output: -0.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 456.250000
++output: -456.250000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 3.000000
++output: -3.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 1384.500000
++output: -1384.500000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 1000000000.000000
++output: -1000000000.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 1752.000000
++output: -1752.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 0.015625
++output: -0.015625
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 0.031250
++output: -0.031250
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 456.000000
++output: -456.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 34.031250
++output: -34.031250
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 45786.750000
++output: -45786.750000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 1752065.000000
++output: -1752065.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 107.000000
++output: -107.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 356047.500000
++output: -356047.500000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++fneg.s ::
++input: 23.062500
++output: -23.062500
++fcsr: 0
++roundig mode: zero
++fneg.s ::
++input: 0.000000
++output: -0.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 456.250000
++output: -456.250000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 3.000000
++output: -3.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 1384.500000
++output: -1384.500000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 1752.000000
++output: -1752.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 0.015625
++output: -0.015625
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 0.031250
++output: -0.031250
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 456.000000
++output: -456.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 34.031250
++output: -34.031250
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 45786.750000
++output: -45786.750000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 1752065.000000
++output: -1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 107.000000
++output: -107.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 356047.500000
++output: -356047.500000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++fneg.s ::
++input: 23.062500
++output: -23.062500
++fcsr: 0x100
++roundig mode: +inf
++fneg.s ::
++input: 0.000000
++output: -0.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 456.250000
++output: -456.250000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 3.000000
++output: -3.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 1384.500000
++output: -1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 1752.000000
++output: -1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 0.015625
++output: -0.015625
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 0.031250
++output: -0.031250
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 456.000000
++output: -456.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 34.031250
++output: -34.031250
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 45786.750000
++output: -45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 1752065.000000
++output: -1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 107.000000
++output: -107.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 356047.500000
++output: -356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.s ::
++input: 23.062500
++output: -23.062500
++fcsr: 0x200
++roundig mode: -inf
++fneg.s ::
++input: 0.000000
++output: -0.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 456.250000
++output: -456.250000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 3.000000
++output: -3.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 1384.500000
++output: -1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -7.250000
++output: 7.250000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 1000000000.000000
++output: -1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -5786.500000
++output: 5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 1752.000000
++output: -1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 0.015625
++output: -0.015625
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 0.031250
++output: -0.031250
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -248562.750000
++output: 248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -45786.500000
++output: 45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 456.000000
++output: -456.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 34.031250
++output: -34.031250
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 45786.750000
++output: -45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 1752065.000000
++output: -1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 107.000000
++output: -107.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -45667.250000
++output: 45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -7.000000
++output: 7.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -347856.500000
++output: 347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 356047.500000
++output: -356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: -1.000000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.s ::
++input: 23.062500
++output: -23.062500
++fcsr: 0x300
++roundig mode: near
++fneg.d ::
++input: 0.000000000000000
++output: -0.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 456.250000000000000
++output: -456.250000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 3.000000000000000
++output: -3.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 1384.500000000000000
++output: -1384.500000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 1000000000.000000000000000
++output: -1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 1752.000000000000000
++output: -1752.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 0.015625000000000
++output: -0.015625000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 0.031250000000000
++output: -0.031250000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 456.000000000000000
++output: -456.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 34.031250000000000
++output: -34.031250000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 1752065.000000000000000
++output: -1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 107.000000000000000
++output: -107.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 356047.500000000000000
++output: -356047.500000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++fneg.d ::
++input: 23.062500000000000
++output: -23.062500000000000
++fcsr: 0
++roundig mode: zero
++fneg.d ::
++input: 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 456.250000000000000
++output: -456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 3.000000000000000
++output: -3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 1384.500000000000000
++output: -1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 1000000000.000000000000000
++output: -1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 1752.000000000000000
++output: -1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 0.015625000000000
++output: -0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 456.000000000000000
++output: -456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 34.031250000000000
++output: -34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 1752065.000000000000000
++output: -1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 107.000000000000000
++output: -107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 356047.500000000000000
++output: -356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fneg.d ::
++input: 23.062500000000000
++output: -23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fneg.d ::
++input: 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 456.250000000000000
++output: -456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 3.000000000000000
++output: -3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 1384.500000000000000
++output: -1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 1000000000.000000000000000
++output: -1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 1752.000000000000000
++output: -1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 0.015625000000000
++output: -0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 456.000000000000000
++output: -456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 34.031250000000000
++output: -34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 1752065.000000000000000
++output: -1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 107.000000000000000
++output: -107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 356047.500000000000000
++output: -356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fneg.d ::
++input: 23.062500000000000
++output: -23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fneg.d ::
++input: 0.000000000000000
++output: -0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 456.250000000000000
++output: -456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 3.000000000000000
++output: -3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 1384.500000000000000
++output: -1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -7.250000000000000
++output: 7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 1000000000.000000000000000
++output: -1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -5786.500000000000000
++output: 5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 1752.000000000000000
++output: -1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 0.015625000000000
++output: -0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 0.031250000000000
++output: -0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -248562.750000000000000
++output: 248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -45786.500000000000000
++output: 45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 456.000000000000000
++output: -456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 34.031250000000000
++output: -34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 45786.750000000000000
++output: -45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 1752065.000000000000000
++output: -1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 107.000000000000000
++output: -107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -45667.250000000000000
++output: 45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -7.000000000000000
++output: 7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -347856.500000000000000
++output: 347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 356047.500000000000000
++output: -356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: -1.000000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fneg.d ::
++input: 23.062500000000000
++output: -23.062500000000000
++fcsr: 0x300
++roundig mode: near
++fsqrt.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fsqrt.s ::
++input: 456.250000
++output: 21.360010
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 3.000000
++output: 1.732051
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 1384.500000
++output: 37.208870
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 1000000000.000000
++output: 31622.777344
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 1752.000000
++output: 41.856899
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 0.015625
++output: 0.125000
++fcsr: 0
++roundig mode: near
++fsqrt.s ::
++input: 0.031250
++output: 0.176777
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 456.000000
++output: 21.354156
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 34.031250
++output: 5.833631
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 45786.750000
++output: 213.978394
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 1752065.000000
++output: 1323.655884
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: 107.000000
++output: 10.344080
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 356047.500000
++output: 596.697144
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.s ::
++input: 23.062500
++output: 4.802343
++fcsr: 0x1010000
++roundig mode: zero
++fsqrt.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fsqrt.s ::
++input: 456.250000
++output: 21.360008
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 3.000000
++output: 1.732050
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 1384.500000
++output: 37.208866
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 1000000000.000000
++output: 31622.775390
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 1752.000000
++output: 41.856895
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 0.015625
++output: 0.125000
++fcsr: 0x100
++roundig mode: zero
++fsqrt.s ::
++input: 0.031250
++output: 0.176776
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 456.000000
++output: 21.354156
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 34.031250
++output: 5.833630
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 45786.750000
++output: 213.978378
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 1752065.000000
++output: 1323.655883
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: 107.000000
++output: 10.344079
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 356047.500000
++output: 596.697143
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.s ::
++input: 23.062500
++output: 4.802342
++fcsr: 0x1010100
++roundig mode: +inf
++fsqrt.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fsqrt.s ::
++input: 456.250000
++output: 21.360011
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 3.000000
++output: 1.732051
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 1384.500000
++output: 37.208870
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 1000000000.000000
++output: 31622.777344
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 1752.000000
++output: 41.856900
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 0.015625
++output: 0.125000
++fcsr: 0x200
++roundig mode: +inf
++fsqrt.s ::
++input: 0.031250
++output: 0.176777
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 456.000000
++output: 21.354159
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 34.031250
++output: 5.833632
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 45786.750000
++output: 213.978394
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 1752065.000000
++output: 1323.656006
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: 107.000000
++output: 10.344081
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 356047.500000
++output: 596.697205
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.s ::
++input: 23.062500
++output: 4.802344
++fcsr: 0x1010200
++roundig mode: -inf
++fsqrt.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fsqrt.s ::
++input: 456.250000
++output: 21.360008
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 3.000000
++output: 1.732050
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 1384.500000
++output: 37.208866
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 1000000000.000000
++output: 31622.775390
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 1752.000000
++output: 41.856895
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 0.015625
++output: 0.125000
++fcsr: 0x300
++roundig mode: -inf
++fsqrt.s ::
++input: 0.031250
++output: 0.176776
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 456.000000
++output: 21.354156
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 34.031250
++output: 5.833630
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 45786.750000
++output: 213.978378
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 1752065.000000
++output: 1323.655883
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: 107.000000
++output: 10.344079
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 356047.500000
++output: 596.697143
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.s ::
++input: 23.062500
++output: 4.802342
++fcsr: 0x1010300
++roundig mode: near
++fsqrt.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fsqrt.d ::
++input: 456.250000000000000
++output: 21.360009363293827
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 3.000000000000000
++output: 1.732050807568877
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 1384.500000000000000
++output: 37.208869910278111
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 1000000000.000000000000000
++output: 31622.776601683792251
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 1752.000000000000000
++output: 41.856899072912697
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 0.015625000000000
++output: 0.125000000000000
++fcsr: 0
++roundig mode: near
++fsqrt.d ::
++input: 0.031250000000000
++output: 0.176776695296637
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 456.000000000000000
++output: 21.354156504062622
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 34.031250000000000
++output: 5.833630944789017
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 45786.750000000000000
++output: 213.978386759036965
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 1752065.000000000000000
++output: 1323.655922058296937
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: 107.000000000000000
++output: 10.344080432788601
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 356047.500000000000000
++output: 596.697159369809583
++fcsr: 0x1010000
++roundig mode: near
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++fsqrt.d ::
++input: 23.062500000000000
++output: 4.802343178074636
++fcsr: 0x1010000
++roundig mode: zero
++fsqrt.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsqrt.d ::
++input: 456.250000000000000
++output: 21.360009363293826
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 3.000000000000000
++output: 1.732050807568877
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 1384.500000000000000
++output: 37.208869910278110
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 1000000000.000000000000000
++output: 31622.776601683792250
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 1752.000000000000000
++output: 41.856899072912696
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 0.015625000000000
++output: 0.125000000000000
++fcsr: 0x100
++roundig mode: zero
++fsqrt.d ::
++input: 0.031250000000000
++output: 0.176776695296636
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 456.000000000000000
++output: 21.354156504062618
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 34.031250000000000
++output: 5.833630944789016
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 45786.750000000000000
++output: 213.978386759036936
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 1752065.000000000000000
++output: 1323.655922058296710
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: 107.000000000000000
++output: 10.344080432788599
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 356047.500000000000000
++output: 596.697159369809583
++fcsr: 0x1010100
++roundig mode: zero
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++fsqrt.d ::
++input: 23.062500000000000
++output: 4.802343178074636
++fcsr: 0x1010100
++roundig mode: +inf
++fsqrt.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsqrt.d ::
++input: 456.250000000000000
++output: 21.360009363293831
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 3.000000000000000
++output: 1.732050807568878
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 1384.500000000000000
++output: 37.208869910278118
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 1000000000.000000000000000
++output: 31622.776601683795889
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 1752.000000000000000
++output: 41.856899072912704
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 0.015625000000000
++output: 0.125000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsqrt.d ::
++input: 0.031250000000000
++output: 0.176776695296637
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 456.000000000000000
++output: 21.354156504062623
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 34.031250000000000
++output: 5.833630944789018
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 45786.750000000000000
++output: 213.978386759036966
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 1752065.000000000000000
++output: 1323.655922058296938
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: 107.000000000000000
++output: 10.344080432788602
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 356047.500000000000000
++output: 596.697159369809697
++fcsr: 0x1010200
++roundig mode: +inf
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++fsqrt.d ::
++input: 23.062500000000000
++output: 4.802343178074638
++fcsr: 0x1010200
++roundig mode: -inf
++fsqrt.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsqrt.d ::
++input: 456.250000000000000
++output: 21.360009363293826
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 3.000000000000000
++output: 1.732050807568877
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 1384.500000000000000
++output: 37.208869910278110
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 1000000000.000000000000000
++output: 31622.776601683792250
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 1752.000000000000000
++output: 41.856899072912696
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 0.015625000000000
++output: 0.125000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsqrt.d ::
++input: 0.031250000000000
++output: 0.176776695296636
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 456.000000000000000
++output: 21.354156504062618
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 34.031250000000000
++output: 5.833630944789016
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 45786.750000000000000
++output: 213.978386759036936
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 1752065.000000000000000
++output: 1323.655922058296710
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: 107.000000000000000
++output: 10.344080432788599
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 356047.500000000000000
++output: 596.697159369809583
++fcsr: 0x1010300
++roundig mode: -inf
++fsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++fsqrt.d ::
++input: 23.062500000000000
++output: 4.802343178074636
++fcsr: 0x1010300
++roundig mode: near
++frecip.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080000
++roundig mode: near
++frecip.s ::
++input: 456.250000
++output: 0.002192
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 3.000000
++output: 0.333333
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++frecip.s ::
++input: 1384.500000
++output: 0.000722
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -7.250000
++output: -0.137931
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 1000000000.000000
++output: 0.000000
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -5786.500000
++output: -0.000173
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 1752.000000
++output: 0.000571
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 0.015625
++output: 64.000000
++fcsr: 0
++roundig mode: near
++frecip.s ::
++input: 0.031250
++output: 32.000000
++fcsr: 0
++roundig mode: near
++frecip.s ::
++input: -248562.750000
++output: -0.000004
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -45786.500000
++output: -0.000022
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 456.000000
++output: 0.002193
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 34.031250
++output: 0.029385
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 45786.750000
++output: 0.000022
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 1752065.000000
++output: 0.000001
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 107.000000
++output: 0.009346
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -45667.250000
++output: -0.000022
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -7.000000
++output: -0.142857
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -347856.500000
++output: -0.000003
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: 356047.500000
++output: 0.000003
++fcsr: 0x1010000
++roundig mode: near
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++frecip.s ::
++input: 23.062500
++output: 0.043360
++fcsr: 0x1010000
++roundig mode: zero
++frecip.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080100
++roundig mode: zero
++frecip.s ::
++input: 456.250000
++output: 0.002191
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 3.000000
++output: 0.333333
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++frecip.s ::
++input: 1384.500000
++output: 0.000722
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -7.250000
++output: -0.137931
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 1000000000.000000
++output: 0.000000
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -5786.500000
++output: -0.000172
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 1752.000000
++output: 0.000570
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 0.015625
++output: 64.000000
++fcsr: 0x100
++roundig mode: zero
++frecip.s ::
++input: 0.031250
++output: 32.000000
++fcsr: 0x100
++roundig mode: zero
++frecip.s ::
++input: -248562.750000
++output: -0.000004
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -45786.500000
++output: -0.000021
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 456.000000
++output: 0.002192
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 34.031250
++output: 0.029384
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 45786.750000
++output: 0.000021
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 1752065.000000
++output: 0.000000
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 107.000000
++output: 0.009345
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -45667.250000
++output: -0.000021
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -7.000000
++output: -0.142857
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -347856.500000
++output: -0.000002
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: 356047.500000
++output: 0.000002
++fcsr: 0x1010100
++roundig mode: zero
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++frecip.s ::
++input: 23.062500
++output: 0.043360
++fcsr: 0x1010100
++roundig mode: +inf
++frecip.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080200
++roundig mode: +inf
++frecip.s ::
++input: 456.250000
++output: 0.002192
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 3.000000
++output: 0.333334
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.s ::
++input: 1384.500000
++output: 0.000723
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -7.250000
++output: -0.137931
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 1000000000.000000
++output: 0.000001
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -5786.500000
++output: -0.000172
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 1752.000000
++output: 0.000571
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 0.015625
++output: 64.000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.s ::
++input: 0.031250
++output: 32.000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.s ::
++input: -248562.750000
++output: -0.000004
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -45786.500000
++output: -0.000021
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 456.000000
++output: 0.002193
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 34.031250
++output: 0.029385
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 45786.750000
++output: 0.000022
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 1752065.000000
++output: 0.000001
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 107.000000
++output: 0.009346
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -45667.250000
++output: -0.000021
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -7.000000
++output: -0.142857
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -347856.500000
++output: -0.000002
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: 356047.500000
++output: 0.000003
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.s ::
++input: 23.062500
++output: 0.043361
++fcsr: 0x1010200
++roundig mode: -inf
++frecip.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080300
++roundig mode: -inf
++frecip.s ::
++input: 456.250000
++output: 0.002191
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 3.000000
++output: 0.333333
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.s ::
++input: 1384.500000
++output: 0.000722
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -7.250000
++output: -0.137932
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 1000000000.000000
++output: 0.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -5786.500000
++output: -0.000173
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 1752.000000
++output: 0.000570
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 0.015625
++output: 64.000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.s ::
++input: 0.031250
++output: 32.000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.s ::
++input: -248562.750000
++output: -0.000005
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -45786.500000
++output: -0.000022
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 456.000000
++output: 0.002192
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 34.031250
++output: 0.029384
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 45786.750000
++output: 0.000021
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 1752065.000000
++output: 0.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 107.000000
++output: 0.009345
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -45667.250000
++output: -0.000022
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -7.000000
++output: -0.142858
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -347856.500000
++output: -0.000003
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: 356047.500000
++output: 0.000002
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.s ::
++input: 23.062500
++output: 0.043360
++fcsr: 0x1010300
++roundig mode: near
++frecip.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080000
++roundig mode: near
++frecip.d ::
++input: 456.250000000000000
++output: 0.002191780821918
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 3.000000000000000
++output: 0.333333333333333
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++frecip.d ::
++input: 1384.500000000000000
++output: 0.000722282412423
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -7.250000000000000
++output: -0.137931034482759
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 1000000000.000000000000000
++output: 0.000000001000000
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -5786.500000000000000
++output: -0.000172816037328
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 1752.000000000000000
++output: 0.000570776255708
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 0.015625000000000
++output: 64.000000000000000
++fcsr: 0
++roundig mode: near
++frecip.d ::
++input: 0.031250000000000
++output: 32.000000000000000
++fcsr: 0
++roundig mode: near
++frecip.d ::
++input: -248562.750000000000000
++output: -0.000004023128968
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -45786.500000000000000
++output: -0.000021840498837
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 456.000000000000000
++output: 0.002192982456140
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 34.031250000000000
++output: 0.029384756657484
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 45786.750000000000000
++output: 0.000021840379586
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 1752065.000000000000000
++output: 0.000000570755080
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 107.000000000000000
++output: 0.009345794392523
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -45667.250000000000000
++output: -0.000021897530506
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -7.000000000000000
++output: -0.142857142857143
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -347856.500000000000000
++output: -0.000002874748639
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: 356047.500000000000000
++output: 0.000002808614019
++fcsr: 0x1010000
++roundig mode: near
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++frecip.d ::
++input: 23.062500000000000
++output: 0.043360433604336
++fcsr: 0x1010000
++roundig mode: zero
++frecip.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080100
++roundig mode: zero
++frecip.d ::
++input: 456.250000000000000
++output: 0.002191780821917
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 3.000000000000000
++output: 0.333333333333333
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frecip.d ::
++input: 1384.500000000000000
++output: 0.000722282412423
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -7.250000000000000
++output: -0.137931034482758
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 1000000000.000000000000000
++output: 0.000000000999999
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -5786.500000000000000
++output: -0.000172816037328
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 1752.000000000000000
++output: 0.000570776255707
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 0.015625000000000
++output: 64.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frecip.d ::
++input: 0.031250000000000
++output: 32.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frecip.d ::
++input: -248562.750000000000000
++output: -0.000004023128968
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -45786.500000000000000
++output: -0.000021840498836
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 456.000000000000000
++output: 0.002192982456140
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 34.031250000000000
++output: 0.029384756657483
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 45786.750000000000000
++output: 0.000021840379585
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 1752065.000000000000000
++output: 0.000000570755080
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 107.000000000000000
++output: 0.009345794392523
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -45667.250000000000000
++output: -0.000021897530505
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -7.000000000000000
++output: -0.142857142857142
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -347856.500000000000000
++output: -0.000002874748639
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: 356047.500000000000000
++output: 0.000002808614019
++fcsr: 0x1010100
++roundig mode: zero
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frecip.d ::
++input: 23.062500000000000
++output: 0.043360433604336
++fcsr: 0x1010100
++roundig mode: +inf
++frecip.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080200
++roundig mode: +inf
++frecip.d ::
++input: 456.250000000000000
++output: 0.002191780821918
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 3.000000000000000
++output: 0.333333333333334
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.d ::
++input: 1384.500000000000000
++output: 0.000722282412424
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -7.250000000000000
++output: -0.137931034482758
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 1000000000.000000000000000
++output: 0.000000001000001
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -5786.500000000000000
++output: -0.000172816037328
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 1752.000000000000000
++output: 0.000570776255708
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 0.015625000000000
++output: 64.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.d ::
++input: 0.031250000000000
++output: 32.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.d ::
++input: -248562.750000000000000
++output: -0.000004023128968
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -45786.500000000000000
++output: -0.000021840498836
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 456.000000000000000
++output: 0.002192982456141
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 34.031250000000000
++output: 0.029384756657484
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 45786.750000000000000
++output: 0.000021840379586
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 1752065.000000000000000
++output: 0.000000570755081
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 107.000000000000000
++output: 0.009345794392524
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -45667.250000000000000
++output: -0.000021897530505
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -7.000000000000000
++output: -0.142857142857142
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -347856.500000000000000
++output: -0.000002874748639
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: 356047.500000000000000
++output: 0.000002808614020
++fcsr: 0x1010200
++roundig mode: +inf
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frecip.d ::
++input: 23.062500000000000
++output: 0.043360433604337
++fcsr: 0x1010200
++roundig mode: -inf
++frecip.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080300
++roundig mode: -inf
++frecip.d ::
++input: 456.250000000000000
++output: 0.002191780821917
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 3.000000000000000
++output: 0.333333333333333
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.d ::
++input: 1384.500000000000000
++output: 0.000722282412423
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -7.250000000000000
++output: -0.137931034482759
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 1000000000.000000000000000
++output: 0.000000000999999
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -5786.500000000000000
++output: -0.000172816037329
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 1752.000000000000000
++output: 0.000570776255707
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 0.015625000000000
++output: 64.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.d ::
++input: 0.031250000000000
++output: 32.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.d ::
++input: -248562.750000000000000
++output: -0.000004023128969
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -45786.500000000000000
++output: -0.000021840498837
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 456.000000000000000
++output: 0.002192982456140
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 34.031250000000000
++output: 0.029384756657483
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 45786.750000000000000
++output: 0.000021840379585
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 1752065.000000000000000
++output: 0.000000570755080
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 107.000000000000000
++output: 0.009345794392523
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -45667.250000000000000
++output: -0.000021897530506
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -7.000000000000000
++output: -0.142857142857143
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -347856.500000000000000
++output: -0.000002874748640
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: 356047.500000000000000
++output: 0.000002808614019
++fcsr: 0x1010300
++roundig mode: -inf
++frecip.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frecip.d ::
++input: 23.062500000000000
++output: 0.043360433604336
++fcsr: 0x1010300
++roundig mode: near
++frsqrt.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080000
++roundig mode: near
++frsqrt.s ::
++input: 456.250000
++output: 0.046816
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 3.000000
++output: 0.577350
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 1384.500000
++output: 0.026875
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 1000000000.000000
++output: 0.000032
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 1752.000000
++output: 0.023891
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 0.015625
++output: 8.000000
++fcsr: 0
++roundig mode: near
++frsqrt.s ::
++input: 0.031250
++output: 5.656854
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 456.000000
++output: 0.046829
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 34.031250
++output: 0.171420
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 45786.750000
++output: 0.004673
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 1752065.000000
++output: 0.000755
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: 107.000000
++output: 0.096674
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 356047.500000
++output: 0.001676
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.s ::
++input: 23.062500
++output: 0.208232
++fcsr: 0x1010000
++roundig mode: zero
++frsqrt.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080100
++roundig mode: zero
++frsqrt.s ::
++input: 456.250000
++output: 0.046816
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 3.000000
++output: 0.577350
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 1384.500000
++output: 0.026875
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 1000000000.000000
++output: 0.000031
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 1752.000000
++output: 0.023890
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 0.015625
++output: 8.000000
++fcsr: 0x100
++roundig mode: zero
++frsqrt.s ::
++input: 0.031250
++output: 5.656854
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 456.000000
++output: 0.046829
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 34.031250
++output: 0.171419
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 45786.750000
++output: 0.004673
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 1752065.000000
++output: 0.000755
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: 107.000000
++output: 0.096673
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 356047.500000
++output: 0.001675
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.s ::
++input: 23.062500
++output: 0.208231
++fcsr: 0x1010100
++roundig mode: +inf
++frsqrt.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080200
++roundig mode: +inf
++frsqrt.s ::
++input: 456.250000
++output: 0.046817
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 3.000000
++output: 0.577351
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 1384.500000
++output: 0.026876
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 1000000000.000000
++output: 0.000032
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 1752.000000
++output: 0.023891
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 0.015625
++output: 8.000000
++fcsr: 0x200
++roundig mode: +inf
++frsqrt.s ::
++input: 0.031250
++output: 5.656855
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 456.000000
++output: 0.046830
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 34.031250
++output: 0.171420
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 45786.750000
++output: 0.004674
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 1752065.000000
++output: 0.000756
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: 107.000000
++output: 0.096674
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 356047.500000
++output: 0.001676
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.s ::
++input: 23.062500
++output: 0.208232
++fcsr: 0x1010200
++roundig mode: -inf
++frsqrt.s ::
++input: 0.000000
++output: inf
++fcsr: 0x8080300
++roundig mode: -inf
++frsqrt.s ::
++input: 456.250000
++output: 0.046816
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 3.000000
++output: 0.577350
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 1384.500000
++output: 0.026875
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 1000000000.000000
++output: 0.000031
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 1752.000000
++output: 0.023890
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 0.015625
++output: 8.000000
++fcsr: 0x300
++roundig mode: -inf
++frsqrt.s ::
++input: 0.031250
++output: 5.656854
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 456.000000
++output: 0.046829
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 34.031250
++output: 0.171419
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 45786.750000
++output: 0.004673
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 1752065.000000
++output: 0.000755
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: 107.000000
++output: 0.096673
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 356047.500000
++output: 0.001675
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.s ::
++input: 23.062500
++output: 0.208231
++fcsr: 0x1010300
++roundig mode: near
++frsqrt.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080000
++roundig mode: near
++frsqrt.d ::
++input: 456.250000000000000
++output: 0.046816458878452
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 3.000000000000000
++output: 0.577350269189626
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 1384.500000000000000
++output: 0.026875312322339
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 1000000000.000000000000000
++output: 0.000031622776602
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 1752.000000000000000
++output: 0.023890924128375
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 0.015625000000000
++output: 8.000000000000000
++fcsr: 0
++roundig mode: near
++frsqrt.d ::
++input: 0.031250000000000
++output: 5.656854249492380
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 456.000000000000000
++output: 0.046829290579085
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 34.031250000000000
++output: 0.171419825742193
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 45786.750000000000000
++output: 0.004673369189974
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 1752065.000000000000000
++output: 0.000755483342261
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: 107.000000000000000
++output: 0.096673648904566
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 356047.500000000000000
++output: 0.001675892007021
++fcsr: 0x1010000
++roundig mode: near
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++frsqrt.d ::
++input: 23.062500000000000
++output: 0.208231682518141
++fcsr: 0x1010000
++roundig mode: zero
++frsqrt.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080100
++roundig mode: zero
++frsqrt.d ::
++input: 456.250000000000000
++output: 0.046816458878452
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 3.000000000000000
++output: 0.577350269189625
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 1384.500000000000000
++output: 0.026875312322338
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 1000000000.000000000000000
++output: 0.000031622776601
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 1752.000000000000000
++output: 0.023890924128374
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 0.015625000000000
++output: 8.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frsqrt.d ::
++input: 0.031250000000000
++output: 5.656854249492380
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 456.000000000000000
++output: 0.046829290579084
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 34.031250000000000
++output: 0.171419825742193
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 45786.750000000000000
++output: 0.004673369189973
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 1752065.000000000000000
++output: 0.000755483342260
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: 107.000000000000000
++output: 0.096673648904566
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 356047.500000000000000
++output: 0.001675892007021
++fcsr: 0x1010100
++roundig mode: zero
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++frsqrt.d ::
++input: 23.062500000000000
++output: 0.208231682518141
++fcsr: 0x1010100
++roundig mode: +inf
++frsqrt.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080200
++roundig mode: +inf
++frsqrt.d ::
++input: 456.250000000000000
++output: 0.046816458878453
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 3.000000000000000
++output: 0.577350269189626
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 1384.500000000000000
++output: 0.026875312322339
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 1000000000.000000000000000
++output: 0.000031622776602
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 1752.000000000000000
++output: 0.023890924128375
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 0.015625000000000
++output: 8.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frsqrt.d ::
++input: 0.031250000000000
++output: 5.656854249492381
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 456.000000000000000
++output: 0.046829290579085
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 34.031250000000000
++output: 0.171419825742194
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 45786.750000000000000
++output: 0.004673369189974
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 1752065.000000000000000
++output: 0.000755483342261
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: 107.000000000000000
++output: 0.096673648904567
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 356047.500000000000000
++output: 0.001675892007022
++fcsr: 0x1010200
++roundig mode: +inf
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++frsqrt.d ::
++input: 23.062500000000000
++output: 0.208231682518142
++fcsr: 0x1010200
++roundig mode: -inf
++frsqrt.d ::
++input: 0.000000000000000
++output: inf
++fcsr: 0x8080300
++roundig mode: -inf
++frsqrt.d ::
++input: 456.250000000000000
++output: 0.046816458878452
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 3.000000000000000
++output: 0.577350269189625
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 1384.500000000000000
++output: 0.026875312322338
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 1000000000.000000000000000
++output: 0.000031622776601
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 1752.000000000000000
++output: 0.023890924128374
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 0.015625000000000
++output: 8.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frsqrt.d ::
++input: 0.031250000000000
++output: 5.656854249492380
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 456.000000000000000
++output: 0.046829290579084
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 34.031250000000000
++output: 0.171419825742193
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 45786.750000000000000
++output: 0.004673369189973
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 1752065.000000000000000
++output: 0.000755483342260
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: 107.000000000000000
++output: 0.096673648904566
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 356047.500000000000000
++output: 0.001675892007021
++fcsr: 0x1010300
++roundig mode: -inf
++frsqrt.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++frsqrt.d ::
++input: 23.062500000000000
++output: 0.208231682518141
++fcsr: 0x1010300
++roundig mode: near
++fscaleb.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fscaleb.s ::
++input: 456.250000 456.250000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 3.000000 34.031250
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: -1.000000 4578.750000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 1384.500000 175.000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: -7.250000 107.000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 1000000000.000000 -456.250000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: -5786.500000 -7.250000
++output: -0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: 1752.000000 -3478.500000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: 0.015625 356.500000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 0.031250 -1.000000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: -248562.750000 23.062500
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fscaleb.s ::
++input: 456.000000 456.250000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 34.031250 3.000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 45786.750000 -1.000000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: 1752065.000000 1384.500000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 107.000000 -7.000000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: -45667.250000 100.000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: -7.000000 -5786.500000
++output: -0.000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.s ::
++input: -347856.500000 1752.000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 356047.500000 0.015625
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: -1.000000 0.031250
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.s ::
++input: 23.062500 -248562.750000
++output: 0.000000
++fcsr: 0x3030000
++roundig mode: zero
++fscaleb.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fscaleb.s ::
++input: 456.250000 456.250000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 3.000000 34.031250
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: -1.000000 4578.750000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 1384.500000 175.000000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: -7.250000 107.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 1000000000.000000 -456.250000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: -5786.500000 -7.250000
++output: -0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: 1752.000000 -3478.500000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: 0.015625 356.500000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 0.031250 -1.000000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: -248562.750000 23.062500
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fscaleb.s ::
++input: 456.000000 456.250000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 34.031250 3.000000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 45786.750000 -1.000000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: 1752065.000000 1384.500000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 107.000000 -7.000000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: -45667.250000 100.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: -7.000000 -5786.500000
++output: -0.000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.s ::
++input: -347856.500000 1752.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 356047.500000 0.015625
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: -1.000000 0.031250
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.s ::
++input: 23.062500 -248562.750000
++output: 0.000000
++fcsr: 0x3030100
++roundig mode: +inf
++fscaleb.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fscaleb.s ::
++input: 456.250000 456.250000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 3.000000 34.031250
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: -1.000000 4578.750000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 1384.500000 175.000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: -7.250000 107.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 1000000000.000000 -456.250000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: -5786.500000 -7.250000
++output: -0.000000
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: 1752.000000 -3478.500000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: 0.015625 356.500000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 0.031250 -1.000000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: -248562.750000 23.062500
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fscaleb.s ::
++input: 456.000000 456.250000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 34.031250 3.000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 45786.750000 -1.000000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: 1752065.000000 1384.500000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 107.000000 -7.000000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: -45667.250000 100.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: -7.000000 -5786.500000
++output: -0.000000
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.s ::
++input: -347856.500000 1752.000000
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 356047.500000 0.015625
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: -1.000000 0.031250
++output: -340282346638528859811704183484516925440.000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.s ::
++input: 23.062500 -248562.750000
++output: 0.000001
++fcsr: 0x3030200
++roundig mode: -inf
++fscaleb.s ::
++input: 0.000000 -4578.500000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fscaleb.s ::
++input: 456.250000 456.250000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 3.000000 34.031250
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: -1.000000 4578.750000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 1384.500000 175.000000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: -7.250000 107.000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 1000000000.000000 -456.250000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: -5786.500000 -7.250000
++output: -0.000001
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: 1752.000000 -3478.500000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: 0.015625 356.500000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 0.031250 -1.000000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: -248562.750000 23.062500
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: -45786.500000 0.000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fscaleb.s ::
++input: 456.000000 456.250000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 34.031250 3.000000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 45786.750000 -1.000000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: 1752065.000000 1384.500000
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 107.000000 -7.000000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: -45667.250000 100.000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: -7.000000 -5786.500000
++output: -0.000001
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.s ::
++input: -347856.500000 1752.000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 356047.500000 0.015625
++output: 340282346638528859811704183484516925440.000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: -1.000000 0.031250
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.s ::
++input: 23.062500 -248562.750000
++output: 0.000000
++fcsr: 0x3030300
++roundig mode: near
++fscaleb.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fscaleb.d ::
++input: 456.250000000000000 456.250000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 3.000000000000000 34.031250000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: -7.250000000000000 107.000000000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: 0.015625000000000 356047.500000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fscaleb.d ::
++input: 456.000000000000000 456.250000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 34.031250000000000 3.000000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -0.000000000000000
++fcsr: 0x3030000
++roundig mode: near
++fscaleb.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 356047.500000000000000 0.015625000000000
++output: inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: -1.000000000000000 0.031250000000000
++output: -inf
++fcsr: 0x5050000
++roundig mode: near
++fscaleb.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0.000000000000000
++fcsr: 0x3030000
++roundig mode: zero
++fscaleb.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fscaleb.d ::
++input: 456.250000000000000 456.250000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 3.000000000000000 34.031250000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: -7.250000000000000 107.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fscaleb.d ::
++input: 456.000000000000000 456.250000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 34.031250000000000 3.000000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -0.000000000000000
++fcsr: 0x3030100
++roundig mode: zero
++fscaleb.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: -1.000000000000000 0.031250000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050100
++roundig mode: zero
++fscaleb.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0.000000000000000
++fcsr: 0x3030100
++roundig mode: +inf
++fscaleb.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fscaleb.d ::
++input: 456.250000000000000 456.250000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 3.000000000000000 34.031250000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: -7.250000000000000 107.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -0.000000000000000
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: 0.015625000000000 356047.500000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fscaleb.d ::
++input: 456.000000000000000 456.250000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 34.031250000000000 3.000000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -0.000000000000000
++fcsr: 0x3030200
++roundig mode: +inf
++fscaleb.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 356047.500000000000000 0.015625000000000
++output: inf
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: -1.000000000000000 0.031250000000000
++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050200
++roundig mode: +inf
++fscaleb.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0.000000000000001
++fcsr: 0x3030200
++roundig mode: -inf
++fscaleb.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fscaleb.d ::
++input: 456.250000000000000 456.250000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 3.000000000000000 34.031250000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: -1.000000000000000 45786.750000000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: -7.250000000000000 107.000000000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: -0.000000000000001
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: -248562.750000000000000 23.062500000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: -45786.500000000000000 0.000000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fscaleb.d ::
++input: 456.000000000000000 456.250000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 34.031250000000000 3.000000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: -0.000000000000001
++fcsr: 0x3030300
++roundig mode: -inf
++fscaleb.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: -1.000000000000000 0.031250000000000
++output: -inf
++fcsr: 0x5050300
++roundig mode: -inf
++fscaleb.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0.000000000000000
++fcsr: 0x3030300
++roundig mode: near
++flogb.s ::
++input: 0.000000
++output: -inf
++fcsr: 0x8080000
++roundig mode: near
++flogb.s ::
++input: 456.250000
++output: 8.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 3.000000
++output: 1.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 1384.500000
++output: 10.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 1000000000.000000
++output: 29.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 1752.000000
++output: 10.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 0.015625
++output: -6.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 0.031250
++output: -5.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 456.000000
++output: 8.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 34.031250
++output: 5.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 45786.750000
++output: 15.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 1752065.000000
++output: 20.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: 107.000000
++output: 6.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 356047.500000
++output: 18.000000
++fcsr: 0
++roundig mode: near
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.s ::
++input: 23.062500
++output: 4.000000
++fcsr: 0
++roundig mode: zero
++flogb.s ::
++input: 0.000000
++output: -inf
++fcsr: 0x8080100
++roundig mode: zero
++flogb.s ::
++input: 456.250000
++output: 8.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 3.000000
++output: 1.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 1384.500000
++output: 10.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 1000000000.000000
++output: 29.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 1752.000000
++output: 10.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 0.015625
++output: -6.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 0.031250
++output: -5.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 456.000000
++output: 8.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 34.031250
++output: 5.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 45786.750000
++output: 15.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 1752065.000000
++output: 20.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: 107.000000
++output: 6.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 356047.500000
++output: 18.000000
++fcsr: 0x100
++roundig mode: zero
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.s ::
++input: 23.062500
++output: 4.000000
++fcsr: 0x100
++roundig mode: +inf
++flogb.s ::
++input: 0.000000
++output: -inf
++fcsr: 0x8080200
++roundig mode: +inf
++flogb.s ::
++input: 456.250000
++output: 8.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 3.000000
++output: 1.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 1384.500000
++output: 10.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 1000000000.000000
++output: 29.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 1752.000000
++output: 10.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 0.015625
++output: -6.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 0.031250
++output: -5.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 456.000000
++output: 8.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 34.031250
++output: 5.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 45786.750000
++output: 15.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 1752065.000000
++output: 20.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: 107.000000
++output: 6.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 356047.500000
++output: 18.000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.s ::
++input: 23.062500
++output: 4.000000
++fcsr: 0x200
++roundig mode: -inf
++flogb.s ::
++input: 0.000000
++output: -inf
++fcsr: 0x8080300
++roundig mode: -inf
++flogb.s ::
++input: 456.250000
++output: 8.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 3.000000
++output: 1.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 1384.500000
++output: 10.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -7.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 1000000000.000000
++output: 29.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -5786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 1752.000000
++output: 10.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 0.015625
++output: -6.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 0.031250
++output: -5.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -248562.750000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: -45786.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 456.000000
++output: 8.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 34.031250
++output: 5.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 45786.750000
++output: 15.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 1752065.000000
++output: 20.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: 107.000000
++output: 6.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -45667.250000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: -7.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: -347856.500000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 356047.500000
++output: 18.000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.s ::
++input: -1.000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.s ::
++input: 23.062500
++output: 4.000000
++fcsr: 0x300
++roundig mode: near
++flogb.d ::
++input: 0.000000000000000
++output: -inf
++fcsr: 0x8080000
++roundig mode: near
++flogb.d ::
++input: 456.250000000000000
++output: 8.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 3.000000000000000
++output: 1.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 1384.500000000000000
++output: 10.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 1000000000.000000000000000
++output: 29.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 1752.000000000000000
++output: 10.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 0.015625000000000
++output: -6.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 0.031250000000000
++output: -5.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 456.000000000000000
++output: 8.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 34.031250000000000
++output: 5.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 45786.750000000000000
++output: 15.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 1752065.000000000000000
++output: 20.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: 107.000000000000000
++output: 6.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 356047.500000000000000
++output: 18.000000000000000
++fcsr: 0
++roundig mode: near
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100000
++roundig mode: near
++flogb.d ::
++input: 23.062500000000000
++output: 4.000000000000000
++fcsr: 0
++roundig mode: zero
++flogb.d ::
++input: 0.000000000000000
++output: -inf
++fcsr: 0x8080100
++roundig mode: zero
++flogb.d ::
++input: 456.250000000000000
++output: 8.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 3.000000000000000
++output: 1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 1384.500000000000000
++output: 10.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 1000000000.000000000000000
++output: 29.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 1752.000000000000000
++output: 10.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 0.015625000000000
++output: -6.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 0.031250000000000
++output: -5.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 456.000000000000000
++output: 8.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 34.031250000000000
++output: 5.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 45786.750000000000000
++output: 15.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 1752065.000000000000000
++output: 20.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: 107.000000000000000
++output: 6.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 356047.500000000000000
++output: 18.000000000000000
++fcsr: 0x100
++roundig mode: zero
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100100
++roundig mode: zero
++flogb.d ::
++input: 23.062500000000000
++output: 4.000000000000000
++fcsr: 0x100
++roundig mode: +inf
++flogb.d ::
++input: 0.000000000000000
++output: -inf
++fcsr: 0x8080200
++roundig mode: +inf
++flogb.d ::
++input: 456.250000000000000
++output: 8.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 3.000000000000000
++output: 1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 1384.500000000000000
++output: 10.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 1000000000.000000000000000
++output: 29.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 1752.000000000000000
++output: 10.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 0.015625000000000
++output: -6.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 0.031250000000000
++output: -5.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 456.000000000000000
++output: 8.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 34.031250000000000
++output: 5.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 45786.750000000000000
++output: 15.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 1752065.000000000000000
++output: 20.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: 107.000000000000000
++output: 6.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 356047.500000000000000
++output: 18.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100200
++roundig mode: +inf
++flogb.d ::
++input: 23.062500000000000
++output: 4.000000000000000
++fcsr: 0x200
++roundig mode: -inf
++flogb.d ::
++input: 0.000000000000000
++output: -inf
++fcsr: 0x8080300
++roundig mode: -inf
++flogb.d ::
++input: 456.250000000000000
++output: 8.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 3.000000000000000
++output: 1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 1384.500000000000000
++output: 10.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -7.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 1000000000.000000000000000
++output: 29.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -5786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 1752.000000000000000
++output: 10.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 0.015625000000000
++output: -6.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 0.031250000000000
++output: -5.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -248562.750000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: -45786.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 456.000000000000000
++output: 8.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 34.031250000000000
++output: 5.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 45786.750000000000000
++output: 15.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 1752065.000000000000000
++output: 20.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: 107.000000000000000
++output: 6.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -45667.250000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: -7.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: -347856.500000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 356047.500000000000000
++output: 18.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++flogb.d ::
++input: -1.000000000000000
++output: nan
++fcsr: 0x10100300
++roundig mode: -inf
++flogb.d ::
++input: 23.062500000000000
++output: 4.000000000000000
++fcsr: 0x300
++roundig mode: near
++fcvt.s.d ::
++input: 0.000000000000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 456.250000000000000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 3.000000000000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 1384.500000000000000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -7.250000000000000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -5786.500000000000000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 1752.000000000000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 0.015625000000000
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 0.031250000000000
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -248562.750000000000000
++output: -248562.750000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -45786.500000000000000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 456.000000000000000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 34.031250000000000
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 45786.750000000000000
++output: 45786.750000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 1752065.000000000000000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 107.000000000000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -45667.250000000000000
++output: -45667.250000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -7.000000000000000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -347856.500000000000000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 356047.500000000000000
++output: 356047.500000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fcvt.s.d ::
++input: 23.062500000000000
++output: 23.062500
++fcsr: 0
++roundig mode: zero
++fcvt.s.d ::
++input: 0.000000000000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 456.250000000000000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 3.000000000000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 1384.500000000000000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -7.250000000000000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -5786.500000000000000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 1752.000000000000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 0.015625000000000
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 0.031250000000000
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -248562.750000000000000
++output: -248562.750000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -45786.500000000000000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 456.000000000000000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 34.031250000000000
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 45786.750000000000000
++output: 45786.750000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 1752065.000000000000000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 107.000000000000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -45667.250000000000000
++output: -45667.250000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -7.000000000000000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -347856.500000000000000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 356047.500000000000000
++output: 356047.500000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.s.d ::
++input: 23.062500000000000
++output: 23.062500
++fcsr: 0x100
++roundig mode: +inf
++fcvt.s.d ::
++input: 0.000000000000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 456.250000000000000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 3.000000000000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 1384.500000000000000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -7.250000000000000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -5786.500000000000000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 1752.000000000000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 0.015625000000000
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 0.031250000000000
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -248562.750000000000000
++output: -248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -45786.500000000000000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 456.000000000000000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 34.031250000000000
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 45786.750000000000000
++output: 45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 1752065.000000000000000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 107.000000000000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -45667.250000000000000
++output: -45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -7.000000000000000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -347856.500000000000000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 356047.500000000000000
++output: 356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.s.d ::
++input: 23.062500000000000
++output: 23.062500
++fcsr: 0x200
++roundig mode: -inf
++fcvt.s.d ::
++input: 0.000000000000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 456.250000000000000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 3.000000000000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 1384.500000000000000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -7.250000000000000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -5786.500000000000000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 1752.000000000000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 0.015625000000000
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 0.031250000000000
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -248562.750000000000000
++output: -248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -45786.500000000000000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 456.000000000000000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 34.031250000000000
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 45786.750000000000000
++output: 45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 1752065.000000000000000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 107.000000000000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -45667.250000000000000
++output: -45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -7.000000000000000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -347856.500000000000000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 356047.500000000000000
++output: 356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: -1.000000000000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.s.d ::
++input: 23.062500000000000
++output: 23.062500
++fcsr: 0x300
++roundig mode: near
++fcvt.d.s ::
++input: 0.000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 456.250000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 3.000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 1384.500000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -7.250000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 1000000000.000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -5786.500000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 1752.000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 0.015625
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 0.031250
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -248562.750000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -45786.500000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 456.000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 34.031250
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 45786.750000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 1752065.000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 107.000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -45667.250000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -7.000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -347856.500000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 356047.500000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fcvt.d.s ::
++input: 23.062500
++output: 23.062500000000000
++fcsr: 0
++roundig mode: zero
++fcvt.d.s ::
++input: 0.000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 456.250000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 3.000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 1384.500000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -7.250000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 1000000000.000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -5786.500000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 1752.000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 0.015625
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 0.031250
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -248562.750000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -45786.500000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 456.000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 34.031250
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 45786.750000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 1752065.000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 107.000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -45667.250000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -7.000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -347856.500000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 356047.500000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fcvt.d.s ::
++input: 23.062500
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fcvt.d.s ::
++input: 0.000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 456.250000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 3.000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 1384.500000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -7.250000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 1000000000.000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -5786.500000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 1752.000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 0.015625
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 0.031250
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -248562.750000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -45786.500000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 456.000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 34.031250
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 45786.750000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 1752065.000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 107.000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -45667.250000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -7.000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -347856.500000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 356047.500000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fcvt.d.s ::
++input: 23.062500
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fcvt.d.s ::
++input: 0.000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 456.250000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 3.000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 1384.500000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -7.250000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 1000000000.000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -5786.500000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 1752.000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 0.015625
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 0.031250
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -248562.750000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -45786.500000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 456.000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 34.031250
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 45786.750000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 1752065.000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 107.000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -45667.250000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -7.000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -347856.500000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 356047.500000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: -1.000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fcvt.d.s ::
++input: 23.062500
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: near
++ftintrm.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrm.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrm.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrm.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrm.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrm.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrm.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrm.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrm.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrm.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrm.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrm.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrm.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrm.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrm.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrm.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrm.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrm.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrm.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrm.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrm.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrp.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.w.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010000
++roundig mode: zero
++ftintrp.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrp.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrp.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010300
++roundig mode: near
++ftintrp.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.w.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010000
++roundig mode: zero
++ftintrp.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.w.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrp.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.w.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrp.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.w.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010300
++roundig mode: near
++ftintrp.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.l.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010000
++roundig mode: zero
++ftintrp.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrp.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrp.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010300
++roundig mode: near
++ftintrp.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrp.l.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010000
++roundig mode: zero
++ftintrp.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrp.l.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrp.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrp.l.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrp.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrp.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrp.l.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010300
++roundig mode: near
++ftintrz.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrz.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrz.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrz.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrz.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrz.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrz.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrz.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrz.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrz.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrz.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrz.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrz.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010000
++roundig mode: near
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrz.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrz.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrz.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrz.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrz.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrz.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrz.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrz.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrne.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrne.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrne.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrne.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrne.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrne.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrne.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrne.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrne.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrne.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrne.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrne.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftintrne.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftintrne.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftintrne.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010100
++roundig mode: zero
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftintrne.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftintrne.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftintrne.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010200
++roundig mode: -inf
++ftintrne.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010300
++roundig mode: -inf
++ftintrne.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftintrne.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftint.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftint.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftint.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftint.w.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftint.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftint.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftint.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.w.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftint.w.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.w.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.w.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftint.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftint.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftint.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 456.250000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 1384.500000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: -7.250000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: -5786.500000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 0.015625
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 0.031250
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: -248562.750000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: -45786.500000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 34.031250
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 45786.750000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: -45667.250000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: -347856.500000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: 356047.500000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.s ::
++input: 23.062500
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftint.l.s ::
++input: 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 456.250000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 3.000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 1384.500000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: -7.250000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 1000000000.000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: -5786.500000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 1752.000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 0.015625
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 0.031250
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: -248562.750000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: -45786.500000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 456.000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 34.031250
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 45786.750000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 1752065.000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 107.000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: -45667.250000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: -7.000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: -347856.500000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: 356047.500000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.s ::
++input: -1.000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.s ::
++input: 23.062500
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ftint.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010000
++roundig mode: near
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0
++roundig mode: near
++ftint.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010000
++roundig mode: zero
++ftint.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010100
++roundig mode: zero
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x100
++roundig mode: zero
++ftint.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010100
++roundig mode: +inf
++ftint.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 456.250000000000000
++output: 457
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 1384.500000000000000
++output: 1385
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: -7.250000000000000
++output: -7
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: -5786.500000000000000
++output: -5786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 0.015625000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 0.031250000000000
++output: 1
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: -248562.750000000000000
++output: -248562
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: -45786.500000000000000
++output: -45786
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 34.031250000000000
++output: 35
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 45786.750000000000000
++output: 45787
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: -45667.250000000000000
++output: -45667
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: -347856.500000000000000
++output: -347856
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: 356047.500000000000000
++output: 356048
++fcsr: 0x1010200
++roundig mode: +inf
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x200
++roundig mode: +inf
++ftint.l.d ::
++input: 23.062500000000000
++output: 24
++fcsr: 0x1010200
++roundig mode: -inf
++ftint.l.d ::
++input: 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 456.250000000000000
++output: 456
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 3.000000000000000
++output: 3
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 1384.500000000000000
++output: 1384
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: -7.250000000000000
++output: -8
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 1000000000.000000000000000
++output: 1000000000
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: -5786.500000000000000
++output: -5787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 1752.000000000000000
++output: 1752
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 0.015625000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 0.031250000000000
++output: 0
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: -248562.750000000000000
++output: -248563
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: -45786.500000000000000
++output: -45787
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 456.000000000000000
++output: 456
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 34.031250000000000
++output: 34
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 45786.750000000000000
++output: 45786
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 1752065.000000000000000
++output: 1752065
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 107.000000000000000
++output: 107
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: -45667.250000000000000
++output: -45668
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: -7.000000000000000
++output: -7
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: -347856.500000000000000
++output: -347857
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: 356047.500000000000000
++output: 356047
++fcsr: 0x1010300
++roundig mode: -inf
++ftint.l.d ::
++input: -1.000000000000000
++output: -1
++fcsr: 0x300
++roundig mode: -inf
++ftint.l.d ::
++input: 23.062500000000000
++output: 23
++fcsr: 0x1010300
++roundig mode: near
++ffint.s.w ::
++input: 0
++output: 0.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 3
++output: 3.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 356
++output: 356.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 1000000000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -5786
++output: -5786.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 1752
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 24575
++output: 24575.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 10
++output: 10.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -248562
++output: -248562.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -45786
++output: -45786.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 34
++output: 34.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 45786
++output: 45786.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 1752065
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 107
++output: 107.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -45667
++output: -45667.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -7
++output: -7.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -347856
++output: -347856.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: -2147483648
++output: -2147483648.000000
++fcsr: 0
++roundig mode: near
++ffint.s.w ::
++input: 268435455
++output: 268435456.000000
++fcsr: 0x1010000
++roundig mode: near
++ffint.s.w ::
++input: 23
++output: 23.000000
++fcsr: 0
++roundig mode: zero
++ffint.s.w ::
++input: 0
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 3
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 356
++output: 356.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 1000000000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -5786
++output: -5786.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 1752
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 24575
++output: 24575.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 10
++output: 10.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -248562
++output: -248562.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -45786
++output: -45786.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 34
++output: 34.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 45786
++output: 45786.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 1752065
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 107
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -45667
++output: -45667.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -7
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -347856
++output: -347856.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: -2147483648
++output: -2147483648.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.w ::
++input: 268435455
++output: 268435440.000000
++fcsr: 0x1010100
++roundig mode: zero
++ffint.s.w ::
++input: 23
++output: 23.000000
++fcsr: 0x100
++roundig mode: +inf
++ffint.s.w ::
++input: 0
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 3
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 356
++output: 356.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 1000000000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -5786
++output: -5786.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 1752
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 24575
++output: 24575.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 10
++output: 10.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -248562
++output: -248562.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -45786
++output: -45786.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 34
++output: 34.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 45786
++output: 45786.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 1752065
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 107
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -45667
++output: -45667.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -7
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -347856
++output: -347856.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: -2147483648
++output: -2147483648.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.w ::
++input: 268435455
++output: 268435456.000000
++fcsr: 0x1010200
++roundig mode: +inf
++ffint.s.w ::
++input: 23
++output: 23.000000
++fcsr: 0x200
++roundig mode: -inf
++ffint.s.w ::
++input: 0
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 3
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -1
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 356
++output: 356.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 1000000000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -5786
++output: -5786.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 1752
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 24575
++output: 24575.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 10
++output: 10.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -248562
++output: -248562.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -45786
++output: -45786.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 456
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 34
++output: 34.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 45786
++output: 45786.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 1752065
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 107
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -45667
++output: -45667.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -7
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -347856
++output: -347856.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: -2147483648
++output: -2147483648.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.w ::
++input: 268435455
++output: 268435440.000000
++fcsr: 0x1010300
++roundig mode: -inf
++ffint.s.w ::
++input: 23
++output: 23.000000
++fcsr: 0x300
++roundig mode: near
++ffint.s.l ::
++input: 18
++output: 18.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 25
++output: 25.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 3
++output: 3.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 4294967295
++output: 4294967296.000000
++fcsr: 0x1010000
++roundig mode: near
++ffint.s.l ::
++input: 356
++output: 356.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 1000000
++output: 1000000.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -5786
++output: -5786.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 24575
++output: 24575.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 10
++output: 10.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -125458
++output: -125458.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -486
++output: -486.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 456
++output: 456.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 34
++output: 34.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 45786
++output: 45786.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 0
++output: 0.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 1700000
++output: 1700000.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -45667
++output: -45667.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -7
++output: -7.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: -347856
++output: -347856.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 2147483648
++output: 2147483648.000000
++fcsr: 0
++roundig mode: near
++ffint.s.l ::
++input: 268435455
++output: 268435456.000000
++fcsr: 0x1010000
++roundig mode: near
++ffint.s.l ::
++input: 23
++output: 23.000000
++fcsr: 0
++roundig mode: zero
++ffint.s.l ::
++input: 18
++output: 18.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 25
++output: 25.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 3
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 4294967295
++output: 4294967040.000000
++fcsr: 0x1010100
++roundig mode: zero
++ffint.s.l ::
++input: 356
++output: 356.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 1000000
++output: 1000000.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -5786
++output: -5786.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 24575
++output: 24575.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 10
++output: 10.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -125458
++output: -125458.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -486
++output: -486.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 456
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 34
++output: 34.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 45786
++output: 45786.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 0
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 1700000
++output: 1700000.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -45667
++output: -45667.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -7
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: -347856
++output: -347856.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 2147483648
++output: 2147483648.000000
++fcsr: 0x100
++roundig mode: zero
++ffint.s.l ::
++input: 268435455
++output: 268435440.000000
++fcsr: 0x1010100
++roundig mode: zero
++ffint.s.l ::
++input: 23
++output: 23.000000
++fcsr: 0x100
++roundig mode: +inf
++ffint.s.l ::
++input: 18
++output: 18.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 25
++output: 25.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 3
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 4294967295
++output: 4294967296.000000
++fcsr: 0x1010200
++roundig mode: +inf
++ffint.s.l ::
++input: 356
++output: 356.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 1000000
++output: 1000000.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -5786
++output: -5786.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 24575
++output: 24575.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 10
++output: 10.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -125458
++output: -125458.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -486
++output: -486.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 456
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 34
++output: 34.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 45786
++output: 45786.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 0
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 1700000
++output: 1700000.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -45667
++output: -45667.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -7
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: -347856
++output: -347856.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 2147483648
++output: 2147483648.000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.s.l ::
++input: 268435455
++output: 268435456.000000
++fcsr: 0x1010200
++roundig mode: +inf
++ffint.s.l ::
++input: 23
++output: 23.000000
++fcsr: 0x200
++roundig mode: -inf
++ffint.s.l ::
++input: 18
++output: 18.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 25
++output: 25.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 3
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 4294967295
++output: 4294967040.000000
++fcsr: 0x1010300
++roundig mode: -inf
++ffint.s.l ::
++input: 356
++output: 356.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 1000000
++output: 1000000.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -5786
++output: -5786.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -1
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 24575
++output: 24575.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 10
++output: 10.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -125458
++output: -125458.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -486
++output: -486.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 456
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 34
++output: 34.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 45786
++output: 45786.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 0
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 1700000
++output: 1700000.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -45667
++output: -45667.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -7
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: -347856
++output: -347856.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 2147483648
++output: 2147483648.000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.s.l ::
++input: 268435455
++output: 268435440.000000
++fcsr: 0x1010300
++roundig mode: -inf
++ffint.s.l ::
++input: 23
++output: 23.000000
++fcsr: 0x300
++roundig mode: near
++ffint.d.w ::
++input: 0
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 3
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 356
++output: 356.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 1000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 1752
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 10
++output: 10.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -248562
++output: -248562.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -45786
++output: -45786.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 34
++output: 34.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 1752065
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 107
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -7
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: -2147483648
++output: -2147483648.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.w ::
++input: 23
++output: 23.000000000000000
++fcsr: 0
++roundig mode: zero
++ffint.d.w ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 1000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 1752
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -248562
++output: -248562.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -45786
++output: -45786.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 1752065
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 107
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: -2147483648
++output: -2147483648.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.w ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x100
++roundig mode: +inf
++ffint.d.w ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 1000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 1752
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -248562
++output: -248562.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -45786
++output: -45786.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 1752065
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 107
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: -2147483648
++output: -2147483648.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.w ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x200
++roundig mode: -inf
++ffint.d.w ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 1000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 1752
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -248562
++output: -248562.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -45786
++output: -45786.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 1752065
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 107
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: -2147483648
++output: -2147483648.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.w ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x300
++roundig mode: near
++ffint.d.l ::
++input: 18
++output: 18.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 25
++output: 25.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 3
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 4294967295
++output: 4294967295.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 356
++output: 356.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 1000000
++output: 1000000.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 10
++output: 10.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -125458
++output: -125458.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -486
++output: -486.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 456
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 34
++output: 34.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 0
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 1700000
++output: 1700000.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -7
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 2147483648
++output: 2147483648.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0
++roundig mode: near
++ffint.d.l ::
++input: 23
++output: 23.000000000000000
++fcsr: 0
++roundig mode: zero
++ffint.d.l ::
++input: 18
++output: 18.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 25
++output: 25.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 4294967295
++output: 4294967295.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 1000000
++output: 1000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -125458
++output: -125458.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -486
++output: -486.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 1700000
++output: 1700000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 2147483648
++output: 2147483648.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x100
++roundig mode: zero
++ffint.d.l ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x100
++roundig mode: +inf
++ffint.d.l ::
++input: 18
++output: 18.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 25
++output: 25.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 4294967295
++output: 4294967295.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 1000000
++output: 1000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -125458
++output: -125458.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -486
++output: -486.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 1700000
++output: 1700000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 2147483648
++output: 2147483648.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++ffint.d.l ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x200
++roundig mode: -inf
++ffint.d.l ::
++input: 18
++output: 18.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 25
++output: 25.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 3
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 4294967295
++output: 4294967295.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 356
++output: 356.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 1000000
++output: 1000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -5786
++output: -5786.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -1
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 24575
++output: 24575.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 10
++output: 10.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -125458
++output: -125458.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -486
++output: -486.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 456
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 34
++output: 34.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 45786
++output: 45786.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 0
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 1700000
++output: 1700000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -45667
++output: -45667.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -7
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: -347856
++output: -347856.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 2147483648
++output: 2147483648.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 268435455
++output: 268435455.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++ffint.d.l ::
++input: 23
++output: 23.000000000000000
++fcsr: 0x300
++roundig mode: near
++frint.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 456.250000
++output: 456.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 1384.500000
++output: 1384.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: -7.250000
++output: -7.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: -5786.500000
++output: -5786.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 0.015625
++output: 0.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 0.031250
++output: 0.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: -248562.750000
++output: -248563.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: -45786.500000
++output: -45786.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 34.031250
++output: 34.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 45786.750000
++output: 45787.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: -45667.250000
++output: -45667.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: -347856.500000
++output: -347856.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: 356047.500000
++output: 356048.000000
++fcsr: 0x1010000
++roundig mode: near
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++frint.s ::
++input: 23.062500
++output: 23.000000
++fcsr: 0x1010000
++roundig mode: zero
++frint.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 456.250000
++output: 456.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 1384.500000
++output: 1384.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: -7.250000
++output: -7.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: -5786.500000
++output: -5786.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 0.015625
++output: 0.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 0.031250
++output: 0.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: -248562.750000
++output: -248562.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: -45786.500000
++output: -45786.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 34.031250
++output: 34.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 45786.750000
++output: 45786.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: -45667.250000
++output: -45667.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: -347856.500000
++output: -347856.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: 356047.500000
++output: 356047.000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++frint.s ::
++input: 23.062500
++output: 23.000000
++fcsr: 0x1010100
++roundig mode: +inf
++frint.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 456.250000
++output: 457.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 1384.500000
++output: 1385.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: -7.250000
++output: -7.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: -5786.500000
++output: -5786.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 0.015625
++output: 1.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 0.031250
++output: 1.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: -248562.750000
++output: -248562.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: -45786.500000
++output: -45786.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 34.031250
++output: 35.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 45786.750000
++output: 45787.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: -45667.250000
++output: -45667.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: -347856.500000
++output: -347856.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: 356047.500000
++output: 356048.000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++frint.s ::
++input: 23.062500
++output: 24.000000
++fcsr: 0x1010200
++roundig mode: -inf
++frint.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 456.250000
++output: 456.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 1384.500000
++output: 1384.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: -7.250000
++output: -8.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: -5786.500000
++output: -5787.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 0.015625
++output: 0.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 0.031250
++output: 0.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: -248562.750000
++output: -248563.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: -45786.500000
++output: -45787.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 34.031250
++output: 34.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 45786.750000
++output: 45786.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: -45667.250000
++output: -45668.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: -347856.500000
++output: -347857.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: 356047.500000
++output: 356047.000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++frint.s ::
++input: 23.062500
++output: 23.000000
++fcsr: 0x1010300
++roundig mode: near
++frint.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 1384.500000000000000
++output: 1384.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: -7.250000000000000
++output: -7.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: -5786.500000000000000
++output: -5786.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 0.015625000000000
++output: 0.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 0.031250000000000
++output: 0.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: -248562.750000000000000
++output: -248563.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: -45786.500000000000000
++output: -45786.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 45786.750000000000000
++output: 45787.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: -45667.250000000000000
++output: -45667.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: -347856.500000000000000
++output: -347856.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: 356047.500000000000000
++output: 356048.000000000000000
++fcsr: 0x1010000
++roundig mode: near
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++frint.d ::
++input: 23.062500000000000
++output: 23.000000000000000
++fcsr: 0x1010000
++roundig mode: zero
++frint.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 1384.500000000000000
++output: 1384.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: -7.250000000000000
++output: -7.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: -5786.500000000000000
++output: -5786.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 0.015625000000000
++output: 0.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 0.031250000000000
++output: 0.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: -248562.750000000000000
++output: -248562.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: -45786.500000000000000
++output: -45786.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 45786.750000000000000
++output: 45786.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: -45667.250000000000000
++output: -45667.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: -347856.500000000000000
++output: -347856.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: 356047.500000000000000
++output: 356047.000000000000000
++fcsr: 0x1010100
++roundig mode: zero
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++frint.d ::
++input: 23.062500000000000
++output: 23.000000000000000
++fcsr: 0x1010100
++roundig mode: +inf
++frint.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 456.250000000000000
++output: 457.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 1384.500000000000000
++output: 1385.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: -7.250000000000000
++output: -7.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: -5786.500000000000000
++output: -5786.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 0.015625000000000
++output: 1.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 0.031250000000000
++output: 1.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: -248562.750000000000000
++output: -248562.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: -45786.500000000000000
++output: -45786.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 34.031250000000000
++output: 35.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 45786.750000000000000
++output: 45787.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: -45667.250000000000000
++output: -45667.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: -347856.500000000000000
++output: -347856.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: 356047.500000000000000
++output: 356048.000000000000000
++fcsr: 0x1010200
++roundig mode: +inf
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++frint.d ::
++input: 23.062500000000000
++output: 24.000000000000000
++fcsr: 0x1010200
++roundig mode: -inf
++frint.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 456.250000000000000
++output: 456.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 1384.500000000000000
++output: 1384.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: -7.250000000000000
++output: -8.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: -5786.500000000000000
++output: -5787.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 0.015625000000000
++output: 0.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 0.031250000000000
++output: 0.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: -248562.750000000000000
++output: -248563.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: -45786.500000000000000
++output: -45787.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 34.031250000000000
++output: 34.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 45786.750000000000000
++output: 45786.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: -45667.250000000000000
++output: -45668.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: -347856.500000000000000
++output: -347857.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: 356047.500000000000000
++output: 356047.000000000000000
++fcsr: 0x1010300
++roundig mode: -inf
++frint.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++frint.d ::
++input: 23.062500000000000
++output: 23.000000000000000
++fcsr: 0x1010300
++roundig mode: near
++fcmp.caf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.caf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.caf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.caf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.caf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.caf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.caf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.caf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.caf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.caf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.caf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.caf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.saf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.saf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.saf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.saf.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.saf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.saf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.saf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.saf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.saf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.saf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.saf.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.saf.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.clt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.clt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.clt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.clt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.clt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.clt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.clt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.clt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.clt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.clt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.clt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.clt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.slt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.slt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.slt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.slt.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.slt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.slt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.slt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.slt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.slt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.slt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.slt.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.slt.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.ceq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.ceq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.ceq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.ceq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.ceq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.ceq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.ceq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.ceq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.seq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.seq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.seq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.seq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.seq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.seq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.seq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.seq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.seq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.seq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.seq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.seq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sle.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sle.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sle.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sun.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sun.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sun.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sult.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sult.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sult.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 3.000000 34.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -1.000000 4578.750000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -7.250000 107.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -5786.500000 -7.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 0.015625 356.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -248562.750000 23.062500
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -45786.500000 0.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 456.000000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -45667.250000 100.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -347856.500000 1752.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: -1.000000 0.031250
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 3.000000000000000 34.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -7.250000000000000 107.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 456.000000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: -1.000000000000000 0.031250000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sueq.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.cule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sule.s ::
++input: 0.000000 -4578.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 1384.500000 175.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 1000000000.000000 -456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 1752.000000 -3478.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 0.031250 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 34.031250 3.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 45786.750000 -1.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 1752065.000000 1384.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 107.000000 -7.000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -7.000000 -5786.500000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 356047.500000 0.015625
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.s ::
++input: 23.062500 -248562.750000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.sule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0
++roundig mode: zero
++fcmp.sule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sule.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 34.031250000000000 3.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sule.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 0
++fcsr: 0x300
++roundig mode: near
++fcmp.cne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.cne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sne.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sne.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sne.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.cor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.cor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sor.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 456.250000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sor.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 456.250000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sor.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.cune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.cune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.cune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.cune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.cune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.cune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.cune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.cune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.cune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sune.s ::
++input: 0.000000 -4578.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 456.250000 456.250000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 3.000000 34.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -1.000000 4578.750000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 1384.500000 175.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -7.250000 107.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 1000000000.000000 -456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -5786.500000 -7.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 1752.000000 -3478.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 0.015625 356.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 0.031250 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -248562.750000 23.062500
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -45786.500000 0.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 456.000000 456.250000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 34.031250 3.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 45786.750000 -1.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 1752065.000000 1384.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 107.000000 -7.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -45667.250000 100.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -7.000000 -5786.500000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -347856.500000 1752.000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 356047.500000 0.015625
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: -1.000000 0.031250
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.s ::
++input: 23.062500 -248562.750000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fcmp.sune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0
++roundig mode: near
++fcmp.sune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0
++roundig mode: zero
++fcmp.sune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x100
++roundig mode: zero
++fcmp.sune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x100
++roundig mode: +inf
++fcmp.sune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x200
++roundig mode: +inf
++fcmp.sune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x200
++roundig mode: -inf
++fcmp.sune.d ::
++input: 0.000000000000000 -45786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 456.250000000000000 456.250000000000000
++output: 0
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 3.000000000000000 34.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -1.000000000000000 45786.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 1384.500000000000000 1752065.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -7.250000000000000 107.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 1000000000.000000000000000 -45667.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -5786.500000000000000 -7.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 1752.000000000000000 -347856.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 0.015625000000000 356047.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 0.031250000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -248562.750000000000000 23.062500000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -45786.500000000000000 0.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 456.000000000000000 456.250000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 34.031250000000000 3.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 45786.750000000000000 -1.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 1752065.000000000000000 1384.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 107.000000000000000 -7.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -45667.250000000000000 1000000000.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -7.000000000000000 -5786.500000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -347856.500000000000000 1752.000000000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 356047.500000000000000 0.015625000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: -1.000000000000000 0.031250000000000
++output: 1
++fcsr: 0x300
++roundig mode: -inf
++fcmp.sune.d ::
++input: 23.062500000000000 -248562.750000000000000
++output: 1
++fcsr: 0x300
++roundig mode: near
++fsel ::
++input: 0.000000000000000 -45786.500000000000000 0
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 456.250000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 3.000000000000000 34.031250000000000 0
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -1.000000000000000 45786.750000000000000 1
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 1384.500000000000000 1752065.000000000000000 1
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -7.250000000000000 107.000000000000000 0
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 1000000000.000000000000000 -45667.250000000000000 1
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -5786.500000000000000 -7.250000000000000 0
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 1752.000000000000000 -347856.500000000000000 0
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 0.015625000000000 356047.500000000000000 0
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 0.031250000000000 -1.000000000000000 1
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -248562.750000000000000 23.062500000000000 1
++output: 23.062500000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -45786.500000000000000 0.000000000000000 1
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 456.000000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 34.031250000000000 3.000000000000000 0
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 45786.750000000000000 -1.000000000000000 0
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 1752065.000000000000000 1384.500000000000000 0
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 107.000000000000000 -7.000000000000000 0
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -45667.250000000000000 1000000000.000000000000000 0
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -7.000000000000000 -5786.500000000000000 0
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -347856.500000000000000 1752.000000000000000 1
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 356047.500000000000000 0.015625000000000 1
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: -1.000000000000000 0.031250000000000 1
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fsel ::
++input: 23.062500000000000 -248562.750000000000000 1
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: zero
++fsel ::
++input: 0.000000000000000 -45786.500000000000000 0
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 456.250000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 3.000000000000000 34.031250000000000 0
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -1.000000000000000 45786.750000000000000 1
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 1384.500000000000000 1752065.000000000000000 1
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -7.250000000000000 107.000000000000000 0
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 1000000000.000000000000000 -45667.250000000000000 1
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -5786.500000000000000 -7.250000000000000 0
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 1752.000000000000000 -347856.500000000000000 0
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 0.015625000000000 356047.500000000000000 0
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 0.031250000000000 -1.000000000000000 1
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -248562.750000000000000 23.062500000000000 1
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -45786.500000000000000 0.000000000000000 1
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 456.000000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 34.031250000000000 3.000000000000000 0
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 45786.750000000000000 -1.000000000000000 0
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 1752065.000000000000000 1384.500000000000000 0
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 107.000000000000000 -7.000000000000000 0
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -45667.250000000000000 1000000000.000000000000000 0
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -7.000000000000000 -5786.500000000000000 0
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -347856.500000000000000 1752.000000000000000 1
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 356047.500000000000000 0.015625000000000 1
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: -1.000000000000000 0.031250000000000 1
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fsel ::
++input: 23.062500000000000 -248562.750000000000000 1
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: +inf
++fsel ::
++input: 0.000000000000000 -45786.500000000000000 0
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 456.250000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 3.000000000000000 34.031250000000000 0
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -1.000000000000000 45786.750000000000000 1
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 1384.500000000000000 1752065.000000000000000 1
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -7.250000000000000 107.000000000000000 0
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 1000000000.000000000000000 -45667.250000000000000 1
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -5786.500000000000000 -7.250000000000000 0
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 1752.000000000000000 -347856.500000000000000 0
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 0.015625000000000 356047.500000000000000 0
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 0.031250000000000 -1.000000000000000 1
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -248562.750000000000000 23.062500000000000 1
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -45786.500000000000000 0.000000000000000 1
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 456.000000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 34.031250000000000 3.000000000000000 0
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 45786.750000000000000 -1.000000000000000 0
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 1752065.000000000000000 1384.500000000000000 0
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 107.000000000000000 -7.000000000000000 0
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -45667.250000000000000 1000000000.000000000000000 0
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -7.000000000000000 -5786.500000000000000 0
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -347856.500000000000000 1752.000000000000000 1
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 356047.500000000000000 0.015625000000000 1
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: -1.000000000000000 0.031250000000000 1
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fsel ::
++input: 23.062500000000000 -248562.750000000000000 1
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: -inf
++fsel ::
++input: 0.000000000000000 -45786.500000000000000 0
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 456.250000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 3.000000000000000 34.031250000000000 0
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -1.000000000000000 45786.750000000000000 1
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 1384.500000000000000 1752065.000000000000000 1
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -7.250000000000000 107.000000000000000 0
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 1000000000.000000000000000 -45667.250000000000000 1
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -5786.500000000000000 -7.250000000000000 0
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 1752.000000000000000 -347856.500000000000000 0
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 0.015625000000000 356047.500000000000000 0
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 0.031250000000000 -1.000000000000000 1
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -248562.750000000000000 23.062500000000000 1
++output: 23.062500000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -45786.500000000000000 0.000000000000000 1
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 456.000000000000000 456.250000000000000 1
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 34.031250000000000 3.000000000000000 0
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 45786.750000000000000 -1.000000000000000 0
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 1752065.000000000000000 1384.500000000000000 0
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 107.000000000000000 -7.000000000000000 0
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -45667.250000000000000 1000000000.000000000000000 0
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -7.000000000000000 -5786.500000000000000 0
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -347856.500000000000000 1752.000000000000000 1
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 356047.500000000000000 0.015625000000000 1
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: -1.000000000000000 0.031250000000000 1
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fsel ::
++input: 23.062500000000000 -248562.750000000000000 1
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: near
++fmov.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -7.250000
++output: -7.250000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -5786.500000
++output: -5786.500000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -248562.750000
++output: -248562.750000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -45786.500000
++output: -45786.500000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -45667.250000
++output: -45667.250000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -347856.500000
++output: -347856.500000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0
++roundig mode: near
++fmov.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0
++roundig mode: zero
++fmov.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -7.250000
++output: -7.250000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -5786.500000
++output: -5786.500000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -248562.750000
++output: -248562.750000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -45786.500000
++output: -45786.500000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -45667.250000
++output: -45667.250000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -347856.500000
++output: -347856.500000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x100
++roundig mode: zero
++fmov.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x100
++roundig mode: +inf
++fmov.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -7.250000
++output: -7.250000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -5786.500000
++output: -5786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -248562.750000
++output: -248562.750000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -45786.500000
++output: -45786.500000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -45667.250000
++output: -45667.250000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -347856.500000
++output: -347856.500000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x200
++roundig mode: -inf
++fmov.s ::
++input: 0.000000
++output: 0.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 456.250000
++output: 456.250000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 3.000000
++output: 3.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 1384.500000
++output: 1384.500000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -7.250000
++output: -7.250000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 1000000000.000000
++output: 1000000000.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -5786.500000
++output: -5786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 1752.000000
++output: 1752.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 0.015625
++output: 0.015625
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 0.031250
++output: 0.031250
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -248562.750000
++output: -248562.750000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -45786.500000
++output: -45786.500000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 456.000000
++output: 456.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 34.031250
++output: 34.031250
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 45786.750000
++output: 45786.750000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 1752065.000000
++output: 1752065.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 107.000000
++output: 107.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -45667.250000
++output: -45667.250000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -7.000000
++output: -7.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -347856.500000
++output: -347856.500000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 356047.500000
++output: 356047.500000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: -1.000000
++output: -1.000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.s ::
++input: 23.062500
++output: 23.062500
++fcsr: 0x300
++roundig mode: near
++fmov.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -7.250000000000000
++output: -7.250000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0
++roundig mode: near
++fmov.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0
++roundig mode: zero
++fmov.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x100
++roundig mode: zero
++fmov.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x100
++roundig mode: +inf
++fmov.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x200
++roundig mode: +inf
++fmov.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x200
++roundig mode: -inf
++fmov.d ::
++input: 0.000000000000000
++output: 0.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 456.250000000000000
++output: 456.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 3.000000000000000
++output: 3.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 1384.500000000000000
++output: 1384.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -7.250000000000000
++output: -7.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 1000000000.000000000000000
++output: 1000000000.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -5786.500000000000000
++output: -5786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 1752.000000000000000
++output: 1752.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 0.015625000000000
++output: 0.015625000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 0.031250000000000
++output: 0.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -248562.750000000000000
++output: -248562.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -45786.500000000000000
++output: -45786.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 456.000000000000000
++output: 456.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 34.031250000000000
++output: 34.031250000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 45786.750000000000000
++output: 45786.750000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 1752065.000000000000000
++output: 1752065.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 107.000000000000000
++output: 107.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -45667.250000000000000
++output: -45667.250000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -7.000000000000000
++output: -7.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -347856.500000000000000
++output: -347856.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 356047.500000000000000
++output: 356047.500000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: -1.000000000000000
++output: -1.000000000000000
++fcsr: 0x300
++roundig mode: -inf
++fmov.d ::
++input: 23.062500000000000
++output: 23.062500000000000
++fcsr: 0x300
+diff --git a/none/tests/loongarch64/float.vgtest b/none/tests/loongarch64/float.vgtest
+new file mode 100644
+index 000000000..e65d9699b
+--- /dev/null
++++ b/none/tests/loongarch64/float.vgtest
+@@ -0,0 +1,3 @@
++prereq: ../../../tests/loongarch64_features fpu
++prog: float
++vgopts: -q
+diff --git a/none/tests/loongarch64/integer.c b/none/tests/loongarch64/integer.c
+new file mode 100644
+index 000000000..369d6285d
+--- /dev/null
++++ b/none/tests/loongarch64/integer.c
+@@ -0,0 +1,1311 @@
++#include <stdio.h>
++#include <assert.h>
++
++typedef enum {
++   SA2, SA2_1 /* for alsl */, SA3,
++   MSBW, LSBW, MSBD, LSBD,
++   UI5, UI6, UI12,
++   SI12, SI14, SI16, SI20
++} imm_t;
++
++static inline void showImm (unsigned int i, imm_t ty)
++{
++   switch (ty) {
++      case SA2:
++         assert(i < (1 << 2));
++         break;
++      case SA2_1:
++         assert(i < (1 << 3));
++         break;
++      case SA3:
++         assert(i < (1 << 3));
++         break;
++      case MSBW:
++         assert(i < (1 << 5));
++         break;
++      case LSBW:
++         assert(i < (1 << 5));
++         break;
++      case MSBD:
++         assert(i < (1 << 6));
++         break;
++      case LSBD:
++         assert(i < (1 << 6));
++         break;
++      case UI5:
++         assert(i < (1 << 5));
++         break;
++      case UI6:
++         assert(i < (1 << 6));
++         break;
++      case UI12:
++         assert(i < (1 << 12));
++         break;
++      case SI12:
++         assert(i < (1 << 12) || (i >> 12) == 0xfffff);
++         break;
++      case SI14:
++         assert(i < (1 << 14) || (i >> 14) == 0x3ffff);
++         break;
++      case SI16:
++         assert(i < (1 << 16) || (i >> 16) == 0xffff);
++         break;
++      case SI20:
++         assert(i < (1 << 20) || (i >> 20) == 0xfff);
++         break;
++      default:
++         assert(0);
++         break;
++   }
++   printf("%d", i);
++}
++
++#define TESTINST_RR(insn, rd, rj, v1, v2)         \
++   {                                              \
++      unsigned long res1, res2;                   \
++      unsigned long val1 = (unsigned long)v1;     \
++      unsigned long val2 = (unsigned long)v2;     \
++      __asm__ __volatile__(                       \
++         "move " rd ", %2      \n\t"              \
++         "move " rj ", %3      \n\t"              \
++         insn " " rd ", " rj " \n\t"              \
++         "move %0, " rd "      \n\t"              \
++         "move %1, " rj "      \n\t"              \
++         : "=r" (res1), "=r" (res2)               \
++         : "r" (val1), "r" (val2)                 \
++         : rd, rj, "memory");                     \
++      printf("%s %s, %s ::\n", insn, rd, rj);     \
++      printf("before: %s=%#018lx, %s=%#018lx\n",  \
++             rd, val1, rj, val2);                 \
++      printf("after:  %s=%#018lx, %s=%#018lx\n",  \
++             rd, res1, rj, res2);                 \
++   }
++
++#define TESTINST_RI(insn, rd, type, v1, imm)    \
++   {                                            \
++      unsigned long res1;                       \
++      unsigned long val1 = (unsigned long)v1;   \
++      __asm__ __volatile__(                     \
++         "move " rd ", %1       \n\t"           \
++         insn " " rd ", " #imm "\n\t"           \
++         "move %0, " rd "       \n\t"           \
++         : "=r" (res1)                          \
++         : "r" (val1)                           \
++         : rd, "memory");                       \
++      printf("%s %s, ", insn, rd);              \
++      showImm(imm, type);                       \
++      printf(" ::\n");                          \
++      printf("before: %s=%#018lx\n", rd, val1); \
++      printf("after:  %s=%#018lx\n", rd, res1); \
++   }
++
++#define TESTINST_RRRI(insn, rd, rj, rk, type, v1, v2, v3, imm) \
++   {                                                           \
++      unsigned long res1, res2, res3;                          \
++      unsigned long val1 = (unsigned long)v1;                  \
++      unsigned long val2 = (unsigned long)v2;                  \
++      unsigned long val3 = (unsigned long)v3;                  \
++      __asm__ __volatile__(                                    \
++         "move " rd ", %3                       \n\t"          \
++         "move " rj ", %4                       \n\t"          \
++         "move " rk ", %5                       \n\t"          \
++         insn " " rd ", " rj ", " rk ", " #imm "\n\t"          \
++         "move %0, " rd "                       \n\t"          \
++         "move %1, " rj "                       \n\t"          \
++         "move %2, " rk "                       \n\t"          \
++         : "=r" (res1), "=r" (res2), "=r" (res3)               \
++         : "r" (val1), "r" (val2), "r" (val3)                  \
++         : rd, rj, rk, "memory");                              \
++      printf("%s %s, %s, %s, ", insn, rd, rj, rk);             \
++      showImm(imm, type);                                      \
++      printf(" ::\n");                                         \
++      printf("before: %s=%#018lx, %s=%#018lx, %s=%#018lx\n",   \
++             rd, val1, rj, val2, rk, val3);                    \
++      printf("after:  %s=%#018lx, %s=%#018lx, %s=%#018lx\n",   \
++             rd, res1, rj, res2, rk, res3);                    \
++   }
++
++#define TESTINST_RRR(insn, rd, rj, rk, v1, v2, v3)           \
++   {                                                         \
++      unsigned long res1, res2, res3;                        \
++      unsigned long val1 = (unsigned long)v1;                \
++      unsigned long val2 = (unsigned long)v2;                \
++      unsigned long val3 = (unsigned long)v3;                \
++      __asm__ __volatile__(                                  \
++         "move " rd ", %3             \n\t"                  \
++         "move " rj ", %4             \n\t"                  \
++         "move " rk ", %5             \n\t"                  \
++         insn " " rd ", " rj ", " rk "\n\t"                  \
++         "move %0, " rd "             \n\t"                  \
++         "move %1, " rj "             \n\t"                  \
++         "move %2, " rk "             \n\t"                  \
++         : "=r" (res1), "=r" (res2), "=r" (res3)             \
++         : "r" (val1), "r" (val2), "r" (val3)                \
++         : rd, rj, rk, "memory");                            \
++      printf("%s %s, %s, %s ::\n", insn, rd, rj, rk);        \
++      printf("before: %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \
++             rd, val1, rj, val2, rk, val3);                  \
++      printf("after:  %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \
++             rd, res1, rj, res2, rk, res3);                  \
++   }
++
++#define TESTINST_RRI(insn, rd, rj, type, v1, v2, imm) \
++   {                                                  \
++      unsigned long res1, res2;                       \
++      unsigned long val1 = (unsigned long)v1;         \
++      unsigned long val2 = (unsigned long)v2;         \
++      __asm__ __volatile__(                           \
++         "move " rd ", %2               \n\t"         \
++         "move " rj ", %3               \n\t"         \
++         insn " " rd ", " rj ", " #imm "\n\t"         \
++         "move %0, " rd "               \n\t"         \
++         "move %1, " rj "               \n\t"         \
++         : "=r" (res1), "=r" (res2)                   \
++         : "r" (val1), "r" (val2)                     \
++         : rd, rj, "memory");                         \
++      printf("%s %s, %s, ", insn, rd, rj);            \
++      showImm(imm, type);                             \
++      printf(" ::\n");                                \
++      printf("before: %s=%#018lx, %s=%#018lx\n",      \
++             rd, val1, rj, val2);                     \
++      printf("after:  %s=%#018lx, %s=%#018lx\n",      \
++             rd, res1, rj, res2);                     \
++   }
++
++#define TESTINST_RRII(insn, rd, rj, type1, type2, v1, v2, imm1, imm2) \
++   {                                                                  \
++      unsigned long res1, res2;                                       \
++      unsigned long val1 = (unsigned long)v1;                         \
++      unsigned long val2 = (unsigned long)v2;                         \
++      __asm__ __volatile__(                                           \
++         "move " rd ", %2                           \n\t"             \
++         "move " rj ", %3                           \n\t"             \
++         insn " " rd ", " rj ", " #imm1 ", " #imm2 "\n\t"             \
++         "move %0, " rd "                           \n\t"             \
++         "move %1, " rj "                           \n\t"             \
++         : "=r" (res1), "=r" (res2)                                   \
++         : "r" (val1), "r" (val2)                                     \
++         : rd, rj, "memory");                                         \
++      printf("%s %s, %s, ", insn, rd, rj);                            \
++      showImm(imm1, type1);                                           \
++      printf(", ");                                                   \
++      showImm(imm2, type2);                                           \
++      printf(" ::\n");                                                \
++      printf("before: %s=%#018lx, %s=%#018lx\n",                      \
++             rd, val1, rj, val2);                                     \
++      printf("after:  %s=%#018lx, %s=%#018lx\n",                      \
++             rd, res1, rj, res2);                                     \
++   }
++
++void test(void)
++{
++   /* ---------------- add.w rd, rj, rk ---------------- */
++   TESTINST_RRR("add.w", "$r19", "$r20", "$r25", 0xf7f01ffbc9696094UL, 0xb664b1ce21c8c7fcUL, 0xd0a02b79ace85cfUL);
++   TESTINST_RRR("add.w", "$r29", "$r9", "$r12", 0x5418cd6f6b640953UL, 0x6465907ca2dac58cUL, 0xefea76d0d526df3aUL);
++   TESTINST_RRR("add.w", "$r23", "$r15", "$r28", 0x6ae34fbc6f2f7a9aUL, 0xbf21c48ab5c2edccUL, 0x24824ebd458ed20eUL);
++   TESTINST_RRR("add.w", "$r27", "$r14", "$r26", 0x9f33e38db05616ccUL, 0xf12ee0c276c52c78UL, 0xc3054d65ecec3fe6UL);
++   TESTINST_RRR("add.w", "$r14", "$r23", "$r27", 0x17eaa07c4607901fUL, 0xa5fa9d0c8472848eUL, 0xa34301227bb57f76UL);
++   TESTINST_RRR("add.w", "$r19", "$r19", "$r4", 0xd2e0644d9532b5eaUL, 0x2957c6f0638238bcUL, 0xf01566d0031ee917UL);
++   TESTINST_RRR("add.w", "$r19", "$r26", "$r13", 0x7b39b3f2ccbdaf79UL, 0xee877221beef9d45UL, 0x4a743034eefe075dUL);
++   TESTINST_RRR("add.w", "$r29", "$r18", "$r14", 0x95214c4de7e6d3baUL, 0x26502eb481799cd1UL, 0x34d57b775083fb91UL);
++   TESTINST_RRR("add.w", "$r16", "$r26", "$r8", 0xb66b18865bbb3036UL, 0x8881ccbe1e31aa8dUL, 0xffe0d2dde8325edcUL);
++   TESTINST_RRR("add.w", "$r26", "$r5", "$r8", 0xc367af71c905540cUL, 0xcdcbe4860d983fe3UL, 0x6687aa19ee1fc503UL);
++
++   /* ---------------- add.d rd, rj, rk ---------------- */
++   TESTINST_RRR("add.d", "$r16", "$r18", "$r8", 0xbe5505b409ce995cUL, 0x561a85fd57e87226UL, 0x923f3293987edab0UL);
++   TESTINST_RRR("add.d", "$r12", "$r7", "$r29", 0xff2682151edc3476UL, 0x90beb037eacfe3dbUL, 0xa4017082880f1151UL);
++   TESTINST_RRR("add.d", "$r31", "$r31", "$r5", 0x81e38385e39d9f16UL, 0xedb2ffa50c0c8b5fUL, 0x8776f30d75fc97c2UL);
++   TESTINST_RRR("add.d", "$r31", "$r6", "$r26", 0x64ff385d97b60dc2UL, 0x80f903f206f08f60UL, 0x4f5b589532e85398UL);
++   TESTINST_RRR("add.d", "$r25", "$r10", "$r20", 0xdd8973d6b99634caUL, 0x34c0fe5a72dd43d9UL, 0x2494af03cf5878e7UL);
++   TESTINST_RRR("add.d", "$r5", "$r10", "$r4", 0x94b272b05ffe39c8UL, 0x152d15efbbc54c04UL, 0x25afc06cf151ab29UL);
++   TESTINST_RRR("add.d", "$r19", "$r30", "$r18", 0xa6e14d42459cadf6UL, 0x558620ff616141b1UL, 0x1978905697120747UL);
++   TESTINST_RRR("add.d", "$r7", "$r8", "$r20", 0x2ea6f88031a29aeUL, 0x6a08c12301e00d49UL, 0xdd533acf17f59142UL);
++   TESTINST_RRR("add.d", "$r24", "$r14", "$r26", 0xb88df6b8315eb7a6UL, 0x137d04f7f6fe285UL, 0x2ccb253ff7ea93d6UL);
++   TESTINST_RRR("add.d", "$r7", "$r19", "$r23", 0xad464722c0967f28UL, 0x30295c1fd85ae029UL, 0x2c69edb227e01d94UL);
++
++   /* ---------------- sub.w rd, rj, rk ---------------- */
++   TESTINST_RRR("sub.w", "$r16", "$r28", "$r17", 0x8b0ba4ef20207fddUL, 0x90493cb39ff734a2UL, 0x519842bab5cc1208UL);
++   TESTINST_RRR("sub.w", "$r6", "$r13", "$r15", 0x13af983aafc53691UL, 0x27bc6a037865e47fUL, 0xe20df003930575d5UL);
++   TESTINST_RRR("sub.w", "$r8", "$r19", "$r23", 0x4177aec74585d42dUL, 0xba89b6aa9b7728acUL, 0xe6a089b8eaf43feUL);
++   TESTINST_RRR("sub.w", "$r7", "$r10", "$r23", 0xca1b83a7ab88912UL, 0xd5e2759ea82c2c80UL, 0x76e9d6f88c2624ffUL);
++   TESTINST_RRR("sub.w", "$r19", "$r24", "$r24", 0x99d63505ea0474b3UL, 0x1b53c4c34957af8eUL, 0x6146da47b731d3edUL);
++   TESTINST_RRR("sub.w", "$r26", "$r31", "$r7", 0x8eca560d8234ff55UL, 0x5beb18985c3f451eUL, 0x9c9634dfaa7b9313UL);
++   TESTINST_RRR("sub.w", "$r29", "$r16", "$r6", 0x229544d2cb1d5a64UL, 0xd23751d515597128UL, 0xa09dd29330aa8d15UL);
++   TESTINST_RRR("sub.w", "$r12", "$r16", "$r4", 0x229f5aefe9fb7fb7UL, 0x740ed49b5e95faeUL, 0xbc6304a0df442807UL);
++   TESTINST_RRR("sub.w", "$r30", "$r29", "$r26", 0x94f3a67d188df281UL, 0x48e066cdad20ac2UL, 0x1e032e60568554a7UL);
++   TESTINST_RRR("sub.w", "$r18", "$r23", "$r25", 0xedb4f44fb338ba4fUL, 0xf06e698cd08c8e7bUL, 0xa22b91e88b77d4d8UL);
++
++   /* ---------------- sub.d rd, rj, rk ---------------- */
++   TESTINST_RRR("sub.d", "$r18", "$r10", "$r27", 0x68647aa06a23c8f9UL, 0xd001cb46cb78fc4fUL, 0x460cc8702b1761f9UL);
++   TESTINST_RRR("sub.d", "$r7", "$r24", "$r18", 0x8d18e952fb747f43UL, 0x1e7d1a019fb96490UL, 0xb466fb9891e8c151UL);
++   TESTINST_RRR("sub.d", "$r4", "$r16", "$r27", 0x5f6647277ca4c99dUL, 0xa1156b863ec98e1dUL, 0xc15612f3ce819d64UL);
++   TESTINST_RRR("sub.d", "$r4", "$r25", "$r9", 0xe67b33778df480b4UL, 0xc24b2711be7e4ef1UL, 0xd940ca25b956100fUL);
++   TESTINST_RRR("sub.d", "$r5", "$r12", "$r18", 0x258ae461ef798ce7UL, 0x3f4984ea3f5692deUL, 0x99fa673f30e69019UL);
++   TESTINST_RRR("sub.d", "$r13", "$r10", "$r9", 0xdafb48debea5211eUL, 0xeac1d3b25f6bf8dbUL, 0x297d671b1c96e48fUL);
++   TESTINST_RRR("sub.d", "$r7", "$r15", "$r23", 0xc6b03274ff37baf6UL, 0x5b37ffc2c84aec9UL, 0x74d62a52cbaaec15UL);
++   TESTINST_RRR("sub.d", "$r26", "$r18", "$r26", 0x35c71e0956ffcd43UL, 0xad703a4e8078070bUL, 0x634924e8a9fdbb9eUL);
++   TESTINST_RRR("sub.d", "$r16", "$r29", "$r5", 0x18bf961cba922928UL, 0x54ed9198405f8983UL, 0x977f5b65e5f86b4aUL);
++   TESTINST_RRR("sub.d", "$r31", "$r28", "$r14", 0xa38a1e8cb3c7ba00UL, 0xd220d1ef3cf8f3f7UL, 0xc972df2ace170d61UL);
++
++   /* ---------------- slt rd, rj, rk ---------------- */
++   TESTINST_RRR("slt", "$r12", "$r17", "$r18", 0xd7a0e65c279e1082UL, 0x819edf00a849ba44UL, 0x41a0b2fe37d44db2UL);
++   TESTINST_RRR("slt", "$r31", "$r13", "$r18", 0x2ef00a5cfd100f71UL, 0x4792cd9f9abf36d3UL, 0x2c117902110ef9a8UL);
++   TESTINST_RRR("slt", "$r4", "$r30", "$r29", 0x6d8be2fb73e2c006UL, 0xf76ce97d7658995eUL, 0x3856e09bfe39df6eUL);
++   TESTINST_RRR("slt", "$r4", "$r18", "$r10", 0xeddcb9dcf092c3f5UL, 0xe57b7c25d13dea8UL, 0x761d86b48cb5ce21UL);
++   TESTINST_RRR("slt", "$r16", "$r18", "$r16", 0xcddd92e2340cd593UL, 0xc9a30f4707743f80UL, 0x3ff7d36f17396d3aUL);
++   TESTINST_RRR("slt", "$r6", "$r14", "$r10", 0xa9e71c6376093499UL, 0x26bb3955b588461fUL, 0xfae7e7a950447826UL);
++   TESTINST_RRR("slt", "$r19", "$r4", "$r17", 0x35bb27f64ebd7d62UL, 0x4a7d3941ebf88bc1UL, 0xcda32e4b1c1d5c4UL);
++   TESTINST_RRR("slt", "$r19", "$r28", "$r15", 0x29419b8261e40b99UL, 0xe7e9b059033afa7dUL, 0x1ea916293b1cc3ddUL);
++   TESTINST_RRR("slt", "$r31", "$r16", "$r16", 0xe0fb75047bc62c9aUL, 0xa634f6174dcced7dUL, 0xcca5a9d25b670e70UL);
++   TESTINST_RRR("slt", "$r4", "$r4", "$r10", 0x724ee03fb3fcdec8UL, 0xae2587f097065e2cUL, 0x65c69548f83dd0dfUL);
++
++   /* ---------------- sltu rd, rj, rk ---------------- */
++   TESTINST_RRR("sltu", "$r14", "$r10", "$r13", 0x1956e5498db3fb6eUL, 0x2d909abfec4490bdUL, 0xa7d554ebe591d5ccUL);
++   TESTINST_RRR("sltu", "$r6", "$r5", "$r18", 0xc34214447a064eb8UL, 0xad4413e45f0a226aUL, 0x4b09aab500b04bffUL);
++   TESTINST_RRR("sltu", "$r31", "$r17", "$r17", 0x86e16a1618a639c4UL, 0x87917b281cef8df0UL, 0xd543115a56dee48UL);
++   TESTINST_RRR("sltu", "$r20", "$r6", "$r25", 0x164fff47b8b23752UL, 0x9ad830d46b1660f6UL, 0xc5d72c146f4aba72UL);
++   TESTINST_RRR("sltu", "$r6", "$r26", "$r7", 0x1428360430b7c9b5UL, 0xc2052dc6eea5a53cUL, 0xda1a8e35dd060adfUL);
++   TESTINST_RRR("sltu", "$r19", "$r15", "$r26", 0xdfc9984966167604UL, 0xa9ea12b5a37dd492UL, 0x7a24be9fcf349afcUL);
++   TESTINST_RRR("sltu", "$r29", "$r26", "$r29", 0x5a3822db2cc26fc5UL, 0x5985f02e77511d80UL, 0x370f15cc98f2a6c1UL);
++   TESTINST_RRR("sltu", "$r7", "$r28", "$r16", 0xe4594ee2cc8c6d7UL, 0x177ac0014f5dd20UL, 0xde1724c7590a4908UL);
++   TESTINST_RRR("sltu", "$r8", "$r12", "$r4", 0x1df979e50aa0ed18UL, 0x5b410cd0985fce18UL, 0x9d3c39d61e29025dUL);
++   TESTINST_RRR("sltu", "$r30", "$r23", "$r25", 0x1cba022788d49d13UL, 0xd2b40941478ee865UL, 0xa503a74e41535830UL);
++
++   /* ---------------- slti rd, rj, si12 ---------------- */
++   TESTINST_RRI("slti", "$r15", "$r27", SI12, 0xe24c4ca567d1d5f4UL, 0xfef05a88adf4b892UL, 1913);
++   TESTINST_RRI("slti", "$r8", "$r31", SI12, 0xfba7284a8ab83b2dUL, 0xff63b80173f1e368UL, -738);
++   TESTINST_RRI("slti", "$r31", "$r31", SI12, 0xb4599a9fa734365aUL, 0x4327139de75dde1eUL, -1544);
++   TESTINST_RRI("slti", "$r5", "$r4", SI12, 0xa5572272e0c04a20UL, 0x87657c1b1699936bUL, 1529);
++   TESTINST_RRI("slti", "$r10", "$r28", SI12, 0x1260731618214410UL, 0xd0de0dfbafb7960aUL, 557);
++   TESTINST_RRI("slti", "$r5", "$r12", SI12, 0x4c6317772a4b06b0UL, 0x7a1d4eeb507d649bUL, -222);
++   TESTINST_RRI("slti", "$r4", "$r31", SI12, 0x23b4d62a21994afbUL, 0x85304cc393f6506bUL, 717);
++   TESTINST_RRI("slti", "$r18", "$r26", SI12, 0x67b6f5dbf6a0c55dUL, 0x451013f9a2337f9fUL, 730);
++   TESTINST_RRI("slti", "$r25", "$r8", SI12, 0xdb278cca57f1ad7bUL, 0x7371a60f5af6334bUL, 1193);
++   TESTINST_RRI("slti", "$r17", "$r24", SI12, 0xffa3ed31f9ea3a29UL, 0x1138e06e1a45c4f3UL, 329);
++
++   /* ---------------- sltui rd, rj, si12 ---------------- */
++   TESTINST_RRI("sltui", "$r13", "$r26", SI12, 0x62677116040aebffUL, 0xeedd6ccd0e5e2771UL, -462);
++   TESTINST_RRI("sltui", "$r24", "$r28", SI12, 0xef9500b68a87984aUL, 0xaf5922683f40599dUL, 1890);
++   TESTINST_RRI("sltui", "$r9", "$r6", SI12, 0x9996aa21d2b51922UL, 0xd5214fb275e738dcUL, -1538);
++   TESTINST_RRI("sltui", "$r19", "$r26", SI12, 0x3eb2777655f0f1c5UL, 0x98ed915860f0eb26UL, -215);
++   TESTINST_RRI("sltui", "$r8", "$r19", SI12, 0x5c44b5807c43724cUL, 0x63a068026b529b03UL, -780);
++   TESTINST_RRI("sltui", "$r19", "$r17", SI12, 0xf6926016cdbfacc1UL, 0xec04a9bcc8d1192aUL, -1041);
++   TESTINST_RRI("sltui", "$r26", "$r14", SI12, 0x542f05c795aa07c2UL, 0xb634bf537df4c4ceUL, 1653);
++   TESTINST_RRI("sltui", "$r8", "$r5", SI12, 0x371daf74e330ee8bUL, 0xedb0321c888ae22eUL, 441);
++   TESTINST_RRI("sltui", "$r25", "$r4", SI12, 0xba813c7acc8f5621UL, 0x8d5ce4750fe7603bUL, 678);
++   TESTINST_RRI("sltui", "$r17", "$r15", SI12, 0x199b641cefe0a0e2UL, 0x7ea0508a3fed3453UL, 2019);
++
++   /* ---------------- nor rd, rj, rk ---------------- */
++   TESTINST_RRR("nor", "$r14", "$r28", "$r9", 0xccf23cf02a48844dUL, 0x2608ea0069c4e9ddUL, 0x1c7a04255a2d13f8UL);
++   TESTINST_RRR("nor", "$r6", "$r30", "$r4", 0xbfcc3de6da2483beUL, 0xd24e9abca28d6cb5UL, 0xbb01b508523673c6UL);
++   TESTINST_RRR("nor", "$r6", "$r28", "$r13", 0x28dacd828d5736d7UL, 0xb365ff31474f736cUL, 0x593621c0f82b445cUL);
++   TESTINST_RRR("nor", "$r24", "$r16", "$r31", 0x5898010a4c6cf1bbUL, 0xecac6e093ba6146aUL, 0x50e6093f19b1194UL);
++   TESTINST_RRR("nor", "$r15", "$r7", "$r20", 0x2ddb1dea334fd92aUL, 0x401d7a663be0b31aUL, 0xb6c008973a85f779UL);
++   TESTINST_RRR("nor", "$r18", "$r31", "$r29", 0xc987982e1d91684UL, 0x181f20f581ed38f4UL, 0xefaa786e00a2e5b9UL);
++   TESTINST_RRR("nor", "$r19", "$r31", "$r13", 0x39e476d555cd20bcUL, 0xfb8fab5d35576d50UL, 0x71a92a8377c0f729UL);
++   TESTINST_RRR("nor", "$r25", "$r7", "$r5", 0x7f36d0c6d173e8c8UL, 0x181763a9f9350680UL, 0x5ec5099605d7d418UL);
++   TESTINST_RRR("nor", "$r30", "$r23", "$r23", 0x688e1d04976ac8dbUL, 0xd37b6d6a1c510287UL, 0x8670301ee2a715dfUL);
++   TESTINST_RRR("nor", "$r5", "$r23", "$r14", 0x71c4a211dd9262f4UL, 0xcb8a4aebc2c6c4f2UL, 0x84d79a5254447c9UL);
++
++   /* ---------------- and rd, rj, rk ---------------- */
++   TESTINST_RRR("and", "$r8", "$r14", "$r31", 0xbddf22c4109e20b5UL, 0xb2d25973efd1a8ffUL, 0x28b78b59dfe641e9UL);
++   TESTINST_RRR("and", "$r19", "$r23", "$r17", 0xb25e185c549f6661UL, 0xb6ccc215c2f17718UL, 0xf20669c51aee8ffeUL);
++   TESTINST_RRR("and", "$r30", "$r27", "$r23", 0xa7f4ad796393e12bUL, 0xefbcf405df3e7affUL, 0x548a0141e9fe1700UL);
++   TESTINST_RRR("and", "$r18", "$r31", "$r29", 0xa399c7f46c61d974UL, 0xe0fe8cca1cbab773UL, 0x49e680ddee7f666bUL);
++   TESTINST_RRR("and", "$r5", "$r26", "$r25", 0x1682ca17c11f90acUL, 0x4e9706cb2c885742UL, 0x250ff6304dd87d57UL);
++   TESTINST_RRR("and", "$r28", "$r14", "$r8", 0xcacf15e6ffad256fUL, 0x99527f4fa2aa8fb1UL, 0xcff546a883b63cfbUL);
++   TESTINST_RRR("and", "$r28", "$r9", "$r28", 0xc60423b9cf70d112UL, 0x2fb0db47f1d8f166UL, 0x1e9cec9d13e85210UL);
++   TESTINST_RRR("and", "$r18", "$r28", "$r5", 0x5059c37ee38d2f25UL, 0x74bf57d85d90af3aUL, 0x35479df0ebec9209UL);
++   TESTINST_RRR("and", "$r23", "$r25", "$r12", 0x18742ef4c73416beUL, 0x8b93e775860ef52bUL, 0xa909915f60a546d2UL);
++   TESTINST_RRR("and", "$r18", "$r17", "$r24", 0xadb2cc6aec909946UL, 0x3068f8b21d583e4cUL, 0xcf8aae1918f3a88eUL);
++
++   /* ---------------- or rd, rj, rk ---------------- */
++   TESTINST_RRR("or", "$r19", "$r28", "$r25", 0x46819825f87044c2UL, 0x65cb2cc7e5f5a720UL, 0x1fc0130146f13f76UL);
++   TESTINST_RRR("or", "$r8", "$r25", "$r4", 0x45083dd59c60e6feUL, 0x936ecfaeb4d51c95UL, 0xdc37c27c69024f6eUL);
++   TESTINST_RRR("or", "$r15", "$r16", "$r8", 0x516659e51cf19b26UL, 0x7589da0802d59510UL, 0x6b713c60390f3fbfUL);
++   TESTINST_RRR("or", "$r9", "$r15", "$r6", 0x1646568625c40022UL, 0xa68db9141a88850cUL, 0x756d912fbefef973UL);
++   TESTINST_RRR("or", "$r24", "$r9", "$r25", 0xda34c24d14fce443UL, 0x6ad9bf24481630b0UL, 0x2aefcdfa652395bUL);
++   TESTINST_RRR("or", "$r13", "$r9", "$r14", 0x900358ad1e848728UL, 0xa0e361b5b891a62eUL, 0xddfa0c1377ce01acUL);
++   TESTINST_RRR("or", "$r23", "$r16", "$r15", 0x27a55515d39aded9UL, 0xd0daf17f9cb0bf5aUL, 0xf44c4372982c4d74UL);
++   TESTINST_RRR("or", "$r20", "$r16", "$r16", 0x7045887bb8325d6fUL, 0xbac771cbb78dae04UL, 0x23f4928023125a5cUL);
++   TESTINST_RRR("or", "$r30", "$r5", "$r7", 0xcf609aa2057d1b98UL, 0x379641544fd1cd48UL, 0x5275ef34f265f01aUL);
++   TESTINST_RRR("or", "$r23", "$r4", "$r30", 0xc43fc1c750887406UL, 0x44a3229c33d1cd65UL, 0xceaa00084fc04912UL);
++
++   /* ---------------- xor rd, rj, rk ---------------- */
++   TESTINST_RRR("xor", "$r6", "$r19", "$r31", 0x18522418b59bf8aUL, 0x270a2ec823f26e39UL, 0x99ef76e6d4495ae3UL);
++   TESTINST_RRR("xor", "$r28", "$r20", "$r27", 0x57de83cac9dade15UL, 0xd39fdecdfd4ccb08UL, 0xc97b854adacdb4UL);
++   TESTINST_RRR("xor", "$r4", "$r29", "$r5", 0x9f7356fff2445f77UL, 0xc3c3a34d2c226b5aUL, 0x51abdd266816b94fUL);
++   TESTINST_RRR("xor", "$r14", "$r6", "$r28", 0xdd5ca0b5c6c45804UL, 0xa0ba047990ec0798UL, 0x89e6efd43651c28UL);
++   TESTINST_RRR("xor", "$r8", "$r19", "$r23", 0xc3e35cd44af166faUL, 0x6affcfe12104ccc7UL, 0x4adbb3601a07a1d9UL);
++   TESTINST_RRR("xor", "$r16", "$r5", "$r18", 0x685cdc5ca969c8e1UL, 0xd88d0e2a9900b8ebUL, 0xdd4dfbba723cde28UL);
++   TESTINST_RRR("xor", "$r20", "$r18", "$r24", 0x2362838018fa39beUL, 0xbbc8d438b24c037aUL, 0xe020a8456a45b667UL);
++   TESTINST_RRR("xor", "$r19", "$r23", "$r19", 0x637cae50fc0a1c95UL, 0x514b81a7227dd07eUL, 0x59a27a7f9c8481c3UL);
++   TESTINST_RRR("xor", "$r20", "$r16", "$r18", 0xb728dd7a443bcc8fUL, 0xe2de9bf67cdbdc0cUL, 0x26687435fbe4dbf6UL);
++   TESTINST_RRR("xor", "$r23", "$r14", "$r6", 0x744915919b52e27eUL, 0x16863c1d3e1cded7UL, 0x40ce8607349c380UL);
++
++   /* ---------------- orn rd, rj, rk ---------------- */
++   TESTINST_RRR("orn", "$r24", "$r9", "$r15", 0x39320ce9aa25fb73UL, 0xaaec06dc1b47cf43UL, 0x5fa36a558c884a69UL);
++   TESTINST_RRR("orn", "$r12", "$r4", "$r26", 0xa9c2abcbc14e3f3cUL, 0x7c87d633528d97b0UL, 0xe383c14e72ab8677UL);
++   TESTINST_RRR("orn", "$r20", "$r24", "$r28", 0xb117d8b0280738a2UL, 0x318fd949c3ba430fUL, 0xc9edab5116dc1582UL);
++   TESTINST_RRR("orn", "$r8", "$r25", "$r25", 0xb140441a36f8ededUL, 0xa26782a5e34d7addUL, 0x61bdd5b78d019958UL);
++   TESTINST_RRR("orn", "$r16", "$r18", "$r25", 0xcda0e2c1bce1eeecUL, 0xa4486eefd2c444d9UL, 0xbd007605c829cadcUL);
++   TESTINST_RRR("orn", "$r5", "$r28", "$r19", 0x8196fca50795a2aaUL, 0xec7f689a0d676560UL, 0xb4450418c4e1b333UL);
++   TESTINST_RRR("orn", "$r15", "$r14", "$r8", 0xaf1e2a9fe35ba4edUL, 0xd2207f86d89b890aUL, 0xfb31b9e37313a94dUL);
++   TESTINST_RRR("orn", "$r27", "$r14", "$r14", 0x1f24566bfa353160UL, 0xc4e17319c4766becUL, 0x29a3bbaaf6b49218UL);
++   TESTINST_RRR("orn", "$r17", "$r12", "$r31", 0xf5195a72c175fed7UL, 0x7aa8d4840359cbf6UL, 0xa1a42af83c82215bUL);
++   TESTINST_RRR("orn", "$r16", "$r20", "$r20", 0x76bb09b5b50705e2UL, 0x613fdcbd8c1eba2aUL, 0xfb1e04641f5da4ffUL);
++
++   /* ---------------- andn rd, rj, rk ---------------- */
++   TESTINST_RRR("andn", "$r19", "$r31", "$r17", 0xbcc81a9b2e349626UL, 0x5a38a8ef9c7e30e4UL, 0xcb490976d0652986UL);
++   TESTINST_RRR("andn", "$r10", "$r4", "$r10", 0x9acfa0cd6ea107fdUL, 0x1d9b572e8f6bedb7UL, 0x768fe778d2a543eaUL);
++   TESTINST_RRR("andn", "$r6", "$r12", "$r26", 0x949e36cff3b5decbUL, 0x56723f7285834fc9UL, 0xf6fa544d6cd57fa8UL);
++   TESTINST_RRR("andn", "$r16", "$r6", "$r4", 0x44a39d85132d6513UL, 0x3ca7f972b865b7ceUL, 0xf18819e4740308bcUL);
++   TESTINST_RRR("andn", "$r19", "$r26", "$r15", 0x856d1e3162c8fa2dUL, 0xc1ef79456be3885UL, 0x3c089064e60da1dUL);
++   TESTINST_RRR("andn", "$r17", "$r28", "$r9", 0x512a518c554f4b0aUL, 0x43454425b8b7755UL, 0xdc5dca386b49bdd7UL);
++   TESTINST_RRR("andn", "$r16", "$r16", "$r14", 0xa9c14796fec54f89UL, 0xe31928f90d2723a4UL, 0xcf2deaf4af11410aUL);
++   TESTINST_RRR("andn", "$r9", "$r4", "$r20", 0x51d79964a699ec8dUL, 0xe82135537ca93e7fUL, 0xcbadcb1dc4dd0ed0UL);
++   TESTINST_RRR("andn", "$r18", "$r25", "$r25", 0xeb546ce75bcba3f5UL, 0x953d86e2bd6b136dUL, 0x4914dbeee506d8adUL);
++   TESTINST_RRR("andn", "$r27", "$r15", "$r14", 0xc8b599a43b0b4683UL, 0x509638630676b88UL, 0x3d278ed22a112a89UL);
++
++   /* ---------------- mul.w rd, rj, rk ---------------- */
++   TESTINST_RRR("mul.w", "$r28", "$r12", "$r10", 0xf6fcce3e1c5b1598UL, 0xef2747013f911fe8UL, 0x14a216fd69537967UL);
++   TESTINST_RRR("mul.w", "$r13", "$r18", "$r24", 0x5e8a32c1e1e12aa4UL, 0x30e007bb8dd185faUL, 0x1a74dd893af9fb5aUL);
++   TESTINST_RRR("mul.w", "$r10", "$r20", "$r4", 0xf06f4af61b0e0c24UL, 0x1b3624a77f26275fUL, 0x653052ae3a1347dfUL);
++   TESTINST_RRR("mul.w", "$r23", "$r19", "$r10", 0xccb5485ae4605cddUL, 0x67c67c647eaf9e6cUL, 0xfb9b6c7b49ec10cfUL);
++   TESTINST_RRR("mul.w", "$r12", "$r30", "$r7", 0xc1f45aaf98ffcb39UL, 0x906f0c08c0bae02eUL, 0xdf6cf5c05b5f2d34UL);
++   TESTINST_RRR("mul.w", "$r27", "$r12", "$r12", 0x9545c6d9f812c0d9UL, 0xacd016cb69e028b3UL, 0x2b68e3a280d9c0b6UL);
++   TESTINST_RRR("mul.w", "$r28", "$r7", "$r19", 0x4cf68a9590da3da5UL, 0x70ed8b9b03a6325dUL, 0x1125383d12dad118UL);
++   TESTINST_RRR("mul.w", "$r20", "$r12", "$r20", 0x10683d31408fb4c5UL, 0x9ef4ea79672ce58dUL, 0x960a13776923d3e4UL);
++   TESTINST_RRR("mul.w", "$r26", "$r19", "$r28", 0xbf8a20b69fa4357bUL, 0xf3e9b53a654e3cbfUL, 0x20afdeb5a4b4e1c9UL);
++   TESTINST_RRR("mul.w", "$r13", "$r26", "$r25", 0x78f637d350c666bfUL, 0xff742d96dc73e9e9UL, 0x94a3289b55744707UL);
++
++   /* ---------------- mulh.w rd, rj, rk ---------------- */
++   TESTINST_RRR("mulh.w", "$r18", "$r25", "$r14", 0xa988161162710d96UL, 0x37443c6f5d0625eaUL, 0x94da379219de8576UL);
++   TESTINST_RRR("mulh.w", "$r13", "$r16", "$r18", 0x246298a54a25030aUL, 0x33643ceed35cff64UL, 0xc25702631b42c849UL);
++   TESTINST_RRR("mulh.w", "$r20", "$r5", "$r15", 0x3b606ea986dcf13eUL, 0x269dcd16567786d2UL, 0x96c0983df45d5c03UL);
++   TESTINST_RRR("mulh.w", "$r19", "$r19", "$r25", 0xab8fc1c922ba3e7aUL, 0xdec5bddca513d198UL, 0xf05e814d67d43f5aUL);
++   TESTINST_RRR("mulh.w", "$r15", "$r28", "$r16", 0x82fcfa24449231baUL, 0xf37548fee13133f3UL, 0x256188ef96bb3d23UL);
++   TESTINST_RRR("mulh.w", "$r24", "$r9", "$r27", 0x858ddeb68e948058UL, 0xffb64d62e202462UL, 0xe07a6dae07f46c11UL);
++   TESTINST_RRR("mulh.w", "$r23", "$r20", "$r14", 0x7713930e419350ffUL, 0xd5d72e6efb86e428UL, 0x49f87e78ddcc8400UL);
++   TESTINST_RRR("mulh.w", "$r28", "$r20", "$r25", 0x552a9b7f3fa0c48aUL, 0xd616afd20f193287UL, 0xbcd2ae680b131cd2UL);
++   TESTINST_RRR("mulh.w", "$r16", "$r19", "$r12", 0x94b154fc890497c3UL, 0xd8217f47e4257a7cUL, 0xb47bb0e4cff83cbfUL);
++   TESTINST_RRR("mulh.w", "$r23", "$r23", "$r6", 0xafb7fddb344318fUL, 0xaafee418c4267e18UL, 0x1763f686cd41d46eUL);
++
++   /* ---------------- mulh.wu rd, rj, rk ---------------- */
++   TESTINST_RRR("mulh.wu", "$r18", "$r17", "$r8", 0xa92fa2817b19786cUL, 0xaf23e3d2092f080cUL, 0x771c36ac19259f2aUL);
++   TESTINST_RRR("mulh.wu", "$r16", "$r13", "$r8", 0xf4a7b7abe5f3831aUL, 0xe8beff7f8f4330cdUL, 0x38cebbe3d1af354dUL);
++   TESTINST_RRR("mulh.wu", "$r8", "$r23", "$r29", 0x6ca8c7d8ec316750UL, 0xc3a59754c752c3a5UL, 0x4b77e251de7f45f1UL);
++   TESTINST_RRR("mulh.wu", "$r20", "$r25", "$r30", 0x6faa5d1372250132UL, 0x68734123142c820aUL, 0xf7b4bdf342e2017UL);
++   TESTINST_RRR("mulh.wu", "$r31", "$r18", "$r19", 0x8cfa67422c1c5d5UL, 0xb48ac9531206cef2UL, 0x9f9f5d925c5cf738UL);
++   TESTINST_RRR("mulh.wu", "$r25", "$r7", "$r27", 0x85aa17ff1b3699baUL, 0x9a7aeabb800edb53UL, 0x4eb1ec754c7cdb59UL);
++   TESTINST_RRR("mulh.wu", "$r19", "$r4", "$r28", 0x821038d7fb43149cUL, 0x44cd20261f5ae87eUL, 0xf9d8916e8eb4ecb1UL);
++   TESTINST_RRR("mulh.wu", "$r30", "$r23", "$r28", 0xef34433557594fb3UL, 0x2f9401c8064c8ca0UL, 0x5de6287c2a56e507UL);
++   TESTINST_RRR("mulh.wu", "$r13", "$r6", "$r17", 0xd6b38c427ad5f669UL, 0xbe04ea8987b20188UL, 0x52cee1d144e3c134UL);
++   TESTINST_RRR("mulh.wu", "$r26", "$r19", "$r17", 0x2ea15eee9429b8a0UL, 0x43598be92000d9f7UL, 0x6364cfeb707aba6cUL);
++
++   /* ---------------- mul.d rd, rj, rk ---------------- */
++   TESTINST_RRR("mul.d", "$r19", "$r4", "$r10", 0xf0235819cf1bab1fUL, 0xdc7a0086353cfddfUL, 0x6f18aec465b5af87UL);
++   TESTINST_RRR("mul.d", "$r19", "$r31", "$r20", 0x24d7526c5e4669e3UL, 0xaab7dd46e5af2493UL, 0xd5df6eea42205e25UL);
++   TESTINST_RRR("mul.d", "$r15", "$r20", "$r4", 0x3740ba48d64cc478UL, 0xcfeffb7c35a98382UL, 0xeab050fc9bdb3c52UL);
++   TESTINST_RRR("mul.d", "$r29", "$r7", "$r25", 0xe8858552c0e8eac8UL, 0xb65ed231c27efb70UL, 0xbb753de59e4ca3d1UL);
++   TESTINST_RRR("mul.d", "$r5", "$r30", "$r4", 0xc4f17df5c983317dUL, 0xb2af9e86d443d8ceUL, 0xf9e3c6d18372d0d3UL);
++   TESTINST_RRR("mul.d", "$r25", "$r17", "$r29", 0xa09d11d50056b350UL, 0x6609b14ca65f9affUL, 0x692def5a14a3278cUL);
++   TESTINST_RRR("mul.d", "$r13", "$r15", "$r26", 0xd528ed047af75775UL, 0x896658fe826a0817UL, 0xa456f53d5f2760b1UL);
++   TESTINST_RRR("mul.d", "$r23", "$r9", "$r7", 0x5d33f63ce8637a69UL, 0xad38922264c721ffUL, 0xe0514fea4ee52acaUL);
++   TESTINST_RRR("mul.d", "$r25", "$r23", "$r30", 0x5d74125f059662f3UL, 0xa708100731e88710UL, 0x739e4de71fec92e0UL);
++   TESTINST_RRR("mul.d", "$r26", "$r18", "$r30", 0x110a94ffa2e12f32UL, 0x1b770d6c423d4f8UL, 0x38bf04d66f91531aUL);
++
++   /* ---------------- mulh.d rd, rj, rk ---------------- */
++   TESTINST_RRR("mulh.d", "$r5", "$r15", "$r12", 0xd72f46d42ca4db6bUL, 0xe1771af0e69e49a6UL, 0xd796f52fbd01a4bbUL);
++   TESTINST_RRR("mulh.d", "$r28", "$r18", "$r14", 0x904e699bcbe32b08UL, 0x9b5b69b4d817779cUL, 0xa02ca97cc4e37f13UL);
++   TESTINST_RRR("mulh.d", "$r6", "$r12", "$r7", 0xc75e1065b8dbcd34UL, 0xec7d8ae6a65f2fd3UL, 0xb7e32b52f40bc8efUL);
++   TESTINST_RRR("mulh.d", "$r5", "$r25", "$r19", 0x7b2e04c0c2f95e4fUL, 0x9a5037ff200e982aUL, 0xf862c0c6425ff2bcUL);
++   TESTINST_RRR("mulh.d", "$r14", "$r8", "$r23", 0x5fd7ae31ad151daaUL, 0x444243172f499ec0UL, 0x9003c8aeabc39884UL);
++   TESTINST_RRR("mulh.d", "$r7", "$r23", "$r13", 0xbc21ca397041a2bUL, 0xe886455c8737b2caUL, 0xd5ccec2f631a1d60UL);
++   TESTINST_RRR("mulh.d", "$r26", "$r16", "$r13", 0xd3894783f187ee9cUL, 0xa7a6c4abeda9a22cUL, 0x4375f7e49ed91384UL);
++   TESTINST_RRR("mulh.d", "$r17", "$r31", "$r16", 0xa93bd0cf9137745eUL, 0x3a1b2b922b7645f1UL, 0x7e33f64c19972ae3UL);
++   TESTINST_RRR("mulh.d", "$r20", "$r19", "$r8", 0xda9224c9ab488939UL, 0xb7f5978bf509641dUL, 0xf6fcd615333c30c0UL);
++   TESTINST_RRR("mulh.d", "$r12", "$r17", "$r20", 0xcdbd51e35d5c1df3UL, 0x254bd8eaadc946feUL, 0x9de163435088598bUL);
++
++   /* ---------------- mulh.du rd, rj, rk ---------------- */
++   TESTINST_RRR("mulh.du", "$r25", "$r28", "$r29", 0xf7ef0dbf1bf7938aUL, 0xd267d11ae422f604UL, 0x89d6fd68226e13dUL);
++   TESTINST_RRR("mulh.du", "$r7", "$r28", "$r24", 0xe568cf4a6d6bc199UL, 0x6efedad6fbe95f2aUL, 0xdf55853ed22d024eUL);
++   TESTINST_RRR("mulh.du", "$r25", "$r8", "$r9", 0xbf7c0226b0c2072UL, 0x794fd44a65c65ebbUL, 0xa0391c3fa3cf1e5cUL);
++   TESTINST_RRR("mulh.du", "$r30", "$r16", "$r7", 0x3df3f3b3ff17f61aUL, 0xcadd1f7e7150ad7bUL, 0xbdc63d3f762cf02dUL);
++   TESTINST_RRR("mulh.du", "$r6", "$r10", "$r19", 0x6601e05fc5f801cbUL, 0xbc10a70104969251UL, 0x2f50a00036fb7821UL);
++   TESTINST_RRR("mulh.du", "$r17", "$r9", "$r5", 0xffabc0cbdc8aa7b0UL, 0x5288bc60da558afbUL, 0x2795644a58b2668fUL);
++   TESTINST_RRR("mulh.du", "$r26", "$r8", "$r15", 0x68b64c997f561b59UL, 0xe2ed2375e64b1bf3UL, 0xe1033e583092ad96UL);
++   TESTINST_RRR("mulh.du", "$r10", "$r13", "$r30", 0x6450ec488eb4753bUL, 0x4287b82860366cf8UL, 0x1c15ed3f051fe8cUL);
++   TESTINST_RRR("mulh.du", "$r24", "$r13", "$r15", 0x1169fa9dd6f8273dUL, 0x6fd2cdb39e5d1fa3UL, 0xff0526e206880684UL);
++   TESTINST_RRR("mulh.du", "$r8", "$r9", "$r10", 0xe9cb6416a1492fbfUL, 0xaf89960e18913df0UL, 0x76b4251409ff9830UL);
++
++   /* ---------------- mulw.d.w rd, rj, rk ---------------- */
++   TESTINST_RRR("mulw.d.w", "$r6", "$r31", "$r7", 0x50ce021eb3b3f3a4UL, 0xb859e7514e4c4d7cUL, 0x372cb1e2b3200f36UL);
++   TESTINST_RRR("mulw.d.w", "$r31", "$r7", "$r28", 0x925642fa7e2de9abUL, 0x61404b6550238cebUL, 0x75ed502242ed0430UL);
++   TESTINST_RRR("mulw.d.w", "$r19", "$r16", "$r10", 0xef82de697f7239fUL, 0xdf1c56dfe5c0e48dUL, 0xbc7e740fe1b1dc25UL);
++   TESTINST_RRR("mulw.d.w", "$r29", "$r12", "$r27", 0xc104a400fa0d1dbfUL, 0x2aa34e8a5fad6c6fUL, 0x7f8e4d23644b0d4dUL);
++   TESTINST_RRR("mulw.d.w", "$r25", "$r16", "$r25", 0x5b8ff9172c849fb9UL, 0x843f90380af6f2afUL, 0x12f7f8780cb8bfe0UL);
++   TESTINST_RRR("mulw.d.w", "$r13", "$r13", "$r7", 0x6bba79a88056d891UL, 0x6757a43d403285abUL, 0x2d2ea385888c2664UL);
++   TESTINST_RRR("mulw.d.w", "$r12", "$r8", "$r23", 0x5c96927dcf1fb14eUL, 0x2b3767b9e9029d4bUL, 0x252bbcc66b5d834bUL);
++   TESTINST_RRR("mulw.d.w", "$r6", "$r13", "$r10", 0x5fa5a8b36e8ec3e0UL, 0xcbca4b4d518b9466UL, 0xabdf2ec674f70c5bUL);
++   TESTINST_RRR("mulw.d.w", "$r16", "$r15", "$r23", 0x5b94eeb9c3c9fa01UL, 0x5c4ebef486f83b43UL, 0x73f3781c3a1e9216UL);
++   TESTINST_RRR("mulw.d.w", "$r6", "$r31", "$r7", 0xbc263312a123caedUL, 0xe9aa8545d3a99a97UL, 0x71b5dbacf4f7f2b8UL);
++
++   /* ---------------- mulw.d.wu rd, rj, rk ---------------- */
++   TESTINST_RRR("mulw.d.wu", "$r14", "$r17", "$r30", 0x94452e0d7eb407b7UL, 0x629b1902a484a77dUL, 0x474359ca7f7165edUL);
++   TESTINST_RRR("mulw.d.wu", "$r26", "$r7", "$r5", 0xae9771f0d59319b3UL, 0x1bcb563dea8f3a3fUL, 0x759334cc2d543103UL);
++   TESTINST_RRR("mulw.d.wu", "$r25", "$r28", "$r27", 0x27ca0bf2d6cd2699UL, 0x5a015da9b52ffc64UL, 0x482a4fa5b5625914UL);
++   TESTINST_RRR("mulw.d.wu", "$r8", "$r4", "$r16", 0x22f61239dad7bc92UL, 0xe8c9964b31b0e199UL, 0x99fdef421aa22322UL);
++   TESTINST_RRR("mulw.d.wu", "$r29", "$r17", "$r15", 0xcc5eec6e4f2b5fdbUL, 0x2d08ada074c2ac37UL, 0x8967ce1cd4c2362eUL);
++   TESTINST_RRR("mulw.d.wu", "$r27", "$r23", "$r16", 0x2d057e2ead214d6cUL, 0x987e7a10a0f3ee5dUL, 0xd515e2a2f06be633UL);
++   TESTINST_RRR("mulw.d.wu", "$r15", "$r19", "$r12", 0xce24943d6fe20263UL, 0xd6bbdcb20d76de15UL, 0xcc277905bc41da62UL);
++   TESTINST_RRR("mulw.d.wu", "$r4", "$r4", "$r19", 0xe37942a26dc0e882UL, 0x6a30fb04c3b5431fUL, 0x4c937bed67cb6c73UL);
++   TESTINST_RRR("mulw.d.wu", "$r7", "$r12", "$r9", 0xbdebe7a7b19b7dc0UL, 0x3f6e790fb24d19f1UL, 0x7a19c4fdd0d29f3eUL);
++   TESTINST_RRR("mulw.d.wu", "$r31", "$r30", "$r28", 0x690687056e169108UL, 0xa8abab5bf1d42538UL, 0x636a31884ca1e99UL);
++
++   /* ---------------- div.w rd, rj, rk ---------------- */
++   TESTINST_RRR("div.w", "$r13", "$r28", "$r23", 0x16546290UL, 0x627aa138UL, 0x534168cUL);
++   TESTINST_RRR("div.w", "$r28", "$r19", "$r9", 0xffffffffbe03930dUL, 0x223d0ec7UL, 0xffffffff8404aa67UL);
++   TESTINST_RRR("div.w", "$r18", "$r19", "$r30", 0xffffffffac214649UL, 0xffffffff8019c3b7UL, 0xffffffff871cbf90UL);
++   TESTINST_RRR("div.w", "$r24", "$r25", "$r7", 0xffffffffa144ed80UL, 0x1c4370c7UL, 0x4695aa29UL);
++   TESTINST_RRR("div.w", "$r9", "$r27", "$r4", 0x3ae8b7c7UL, 0xfffffffff3a6ebb2UL, 0x181d816aUL);
++   TESTINST_RRR("div.w", "$r28", "$r15", "$r7", 0xffffffff956a7de4UL, 0xffffffff9aab217bUL, 0x3b061b78UL);
++   TESTINST_RRR("div.w", "$r25", "$r24", "$r12", 0x3c6167d4UL, 0x2673145eUL, 0x1d5e391UL);
++   TESTINST_RRR("div.w", "$r23", "$r15", "$r4", 0x3e0820eeUL, 0x42793c51UL, 0x286cdb51UL);
++   TESTINST_RRR("div.w", "$r28", "$r16", "$r30", 0xffffffffcf8fd242UL, 0x2a76141eUL, 0x2429a52UL);
++   TESTINST_RRR("div.w", "$r29", "$r8", "$r18", 0x74991388UL, 0xffffffffd594ef43UL, 0x6d3f9603UL);
++
++   /* ---------------- mod.w rd, rj, rk ---------------- */
++   TESTINST_RRR("mod.w", "$r8", "$r13", "$r14", 0x5cc9e6dbUL, 0xfffffffff7327c6dUL, 0x23eef833UL);
++   TESTINST_RRR("mod.w", "$r25", "$r24", "$r25", 0x539195e4UL, 0xffffffffd94f10c8UL, 0x2c5786d9UL);
++   TESTINST_RRR("mod.w", "$r10", "$r16", "$r23", 0xffffffff9b15f725UL, 0x448a831dUL, 0xffffffffd5d7d92bUL);
++   TESTINST_RRR("mod.w", "$r6", "$r5", "$r29", 0x1794d969UL, 0x2fba86b0UL, 0x40e6ab6bUL);
++   TESTINST_RRR("mod.w", "$r16", "$r14", "$r29", 0x6a503328UL, 0xffffffffdf0b2ad2UL, 0xffffffff90dc29c6UL);
++   TESTINST_RRR("mod.w", "$r30", "$r14", "$r18", 0xffffffffc7670acdUL, 0x53f3b34fUL, 0xffffffff84b62159UL);
++   TESTINST_RRR("mod.w", "$r31", "$r6", "$r18", 0xffffffff98334c95UL, 0xfffffffff241ffd8UL, 0xffffffffa73314aaUL);
++   TESTINST_RRR("mod.w", "$r12", "$r8", "$r4", 0xffffffffd9f19db4UL, 0xffffffffc89f9796UL, 0xffffffffaa8e2a3bUL);
++   TESTINST_RRR("mod.w", "$r23", "$r12", "$r4", 0xffffffff94e93220UL, 0xfffffffffea1587aUL, 0xffffffffb88b2b87UL);
++   TESTINST_RRR("mod.w", "$r13", "$r9", "$r18", 0xf718c0UL, 0xffffffffe264a3a5UL, 0x2f29ef3UL);
++
++   /* ---------------- div.wu rd, rj, rk ---------------- */
++   TESTINST_RRR("div.wu", "$r24", "$r5", "$r16", 0xddf57c5UL, 0x6b1a808cUL, 0x576fe70UL);
++   TESTINST_RRR("div.wu", "$r26", "$r7", "$r9", 0x665e82ffUL, 0x344d887fUL, 0x7fd6d6d8UL);
++   TESTINST_RRR("div.wu", "$r13", "$r18", "$r15", 0xffffffffe82e2cf8UL, 0x7c66b628UL, 0x305c899UL);
++   TESTINST_RRR("div.wu", "$r15", "$r14", "$r7", 0xb06b1fUL, 0x56016282UL, 0x95a8701UL);
++   TESTINST_RRR("div.wu", "$r19", "$r12", "$r31", 0xffffffffb3a487d1UL, 0xffffffffbe2fe16eUL, 0xffffffff8dc0ff7fUL);
++   TESTINST_RRR("div.wu", "$r6", "$r10", "$r20", 0x1bb491e9UL, 0x64e382eUL, 0x5977f9f1UL);
++   TESTINST_RRR("div.wu", "$r9", "$r29", "$r28", 0x498c3349UL, 0x14cbb257UL, 0xffffffff95165a4aUL);
++   TESTINST_RRR("div.wu", "$r10", "$r29", "$r15", 0xffffffffbb3f9c5dUL, 0x2755057dUL, 0x14039cc4UL);
++   TESTINST_RRR("div.wu", "$r24", "$r31", "$r7", 0xffffffffe5a9a3cdUL, 0xffffffffa1f84b49UL, 0xffffffffe45bd3b9UL);
++   TESTINST_RRR("div.wu", "$r23", "$r18", "$r6", 0x54e07e9fUL, 0xffffffffaccbdd8cUL, 0xfffffffff3729b57UL);
++
++   /* ---------------- mod.wu rd, rj, rk ---------------- */
++   TESTINST_RRR("mod.wu", "$r5", "$r20", "$r18", 0xffffffffa1ce2e4eUL, 0xffffffffdbeb0e2dUL, 0x70157135UL);
++   TESTINST_RRR("mod.wu", "$r14", "$r30", "$r17", 0x10e75d07UL, 0x39c3080UL, 0x1658d87bUL);
++   TESTINST_RRR("mod.wu", "$r28", "$r7", "$r4", 0x6df194dbUL, 0x55fae7c9UL, 0xffffffff9a87c1efUL);
++   TESTINST_RRR("mod.wu", "$r6", "$r14", "$r10", 0xffffffff8feb78ccUL, 0xffffffffe5032316UL, 0x18ab441eUL);
++   TESTINST_RRR("mod.wu", "$r13", "$r15", "$r9", 0xffffffffbb28952cUL, 0x2d43f57dUL, 0x2dfbf584UL);
++   TESTINST_RRR("mod.wu", "$r7", "$r30", "$r5", 0x9bfb2cfUL, 0x6595d7b3UL, 0xfffffffffffd1025UL);
++   TESTINST_RRR("mod.wu", "$r10", "$r9", "$r16", 0x342671c6UL, 0xfffffffff1ff8be3UL, 0xfffffffffaea052bUL);
++   TESTINST_RRR("mod.wu", "$r16", "$r16", "$r23", 0xffffffffc0356055UL, 0x2ac1f414UL, 0x4a75c890UL);
++   TESTINST_RRR("mod.wu", "$r19", "$r8", "$r7", 0xfffffffff8ed6580UL, 0x5fef460eUL, 0x68eedef2UL);
++   TESTINST_RRR("mod.wu", "$r29", "$r25", "$r25", 0xffffffff9ea76eb0UL, 0xffffffff818904b9UL, 0xffffffffe92f4f30UL);
++
++   /* ---------------- div.d rd, rj, rk ---------------- */
++   TESTINST_RRR("div.d", "$r7", "$r17", "$r7", 0xc8f25fb958f2d668UL, 0x74a14cbaa00fdeaUL, 0xcf95f3de82ceb015UL);
++   TESTINST_RRR("div.d", "$r10", "$r19", "$r12", 0x9ead8a6f6ea63534UL, 0xaf80d344d48e6cd5UL, 0xe1f40f759cbfe0e7UL);
++   TESTINST_RRR("div.d", "$r23", "$r28", "$r28", 0x35481a5285093e04UL, 0xfd79e3c19b697fa8UL, 0x6ffab603b9e1b7fbUL);
++   TESTINST_RRR("div.d", "$r30", "$r25", "$r4", 0x3eacf1d695a34b95UL, 0xfbff957ab051d494UL, 0x670724b8930d53fUL);
++   TESTINST_RRR("div.d", "$r31", "$r29", "$r6", 0xce8d3df48871d655UL, 0xf351f7f35927e83dUL, 0x93a3085686f4101fUL);
++   TESTINST_RRR("div.d", "$r17", "$r23", "$r8", 0xfc913f8b14dda5a5UL, 0x1f938af81988deUL, 0x9d021a9f06b46953UL);
++   TESTINST_RRR("div.d", "$r7", "$r29", "$r15", 0x4593da2923f2ac5bUL, 0x11fc5a958b182a55UL, 0x2edafaf2857c6697UL);
++   TESTINST_RRR("div.d", "$r13", "$r31", "$r27", 0x97236145608dd8c3UL, 0x1f0ee96afd23910bUL, 0xe35e4d5efd2204d3UL);
++   TESTINST_RRR("div.d", "$r13", "$r26", "$r14", 0x2c057bd222f216dfUL, 0x1e006853720971c3UL, 0x81e35a993e6a15b5UL);
++   TESTINST_RRR("div.d", "$r5", "$r9", "$r4", 0x93c0d85c66f2c5abUL, 0x774fbe894b2ed067UL, 0x2c46387d55732742UL);
++
++   /* ---------------- mod.d rd, rj, rk ---------------- */
++   TESTINST_RRR("mod.d", "$r19", "$r26", "$r16", 0x63304d2181f4a4daUL, 0x9ed948849ddee475UL, 0x18a360d3ab980398UL);
++   TESTINST_RRR("mod.d", "$r27", "$r23", "$r13", 0xf7156e74db7a8d92UL, 0x324e7001287ce2a8UL, 0x3cc7524686bed31cUL);
++   TESTINST_RRR("mod.d", "$r8", "$r26", "$r19", 0x7bda37a222135803UL, 0x1daf8fd66ff987edUL, 0x334631279104fc3bUL);
++   TESTINST_RRR("mod.d", "$r25", "$r15", "$r7", 0xd1a0f45d5b463d53UL, 0x9c4cd7bef3bf0712UL, 0x420a5c702006f3ccUL);
++   TESTINST_RRR("mod.d", "$r25", "$r18", "$r7", 0x93487a905cb08a75UL, 0x8c79cafa8bebf0a8UL, 0x1478409d192c144bUL);
++   TESTINST_RRR("mod.d", "$r8", "$r27", "$r27", 0x8756a1690dd7896dUL, 0x35273279ea76319fUL, 0xc5292f2331abc6ddUL);
++   TESTINST_RRR("mod.d", "$r15", "$r10", "$r24", 0xf8c476adbc930802UL, 0x8b5832bcd0f6c87eUL, 0x6cba54a72da38702UL);
++   TESTINST_RRR("mod.d", "$r27", "$r7", "$r6", 0x2387015bddb2c076UL, 0x231e30de7a72ad90UL, 0x81f1285973e8dc11UL);
++   TESTINST_RRR("mod.d", "$r16", "$r9", "$r12", 0x3388d23c07feb1daUL, 0xe8c01f744b310474UL, 0xa29071d702959009UL);
++   TESTINST_RRR("mod.d", "$r13", "$r10", "$r20", 0xbd45a261f8de4fe4UL, 0x6fb0a8c9a2681a8eUL, 0x2f1b7055cf2409ecUL);
++
++   /* ---------------- div.du rd, rj, rk ---------------- */
++   TESTINST_RRR("div.du", "$r17", "$r10", "$r24", 0x4d363fd48a626fdaUL, 0x7ccdeeaa6c24885fUL, 0xfcc68e72f59750aeUL);
++   TESTINST_RRR("div.du", "$r20", "$r20", "$r10", 0x808fa5cb6a75fd6fUL, 0xf3f712970031005UL, 0x1709a8adab2fa578UL);
++   TESTINST_RRR("div.du", "$r15", "$r14", "$r19", 0xcd3107423486c8feUL, 0xf6bc56277282cd14UL, 0x961ac833f00f3e3UL);
++   TESTINST_RRR("div.du", "$r4", "$r29", "$r18", 0xa0bfc2fc5b35fa79UL, 0x2b28c09aa5f12845UL, 0xed44da2fdf5dce00UL);
++   TESTINST_RRR("div.du", "$r4", "$r6", "$r25", 0x1fc6e23fd0f09ed0UL, 0xeaa71d9fb42223caUL, 0x45689545e60381cUL);
++   TESTINST_RRR("div.du", "$r10", "$r8", "$r12", 0xa3710c512d4c006cUL, 0xc011778733c50a6eUL, 0xb44475ee048d8167UL);
++   TESTINST_RRR("div.du", "$r29", "$r4", "$r29", 0x46d27abff0da1972UL, 0x17a4e863a182dcd0UL, 0x59a7b82980ac6a6dUL);
++   TESTINST_RRR("div.du", "$r15", "$r8", "$r30", 0x68120919dbbd9b19UL, 0x4c296c89a6f7a6dfUL, 0x9d9166c1cd0eecfaUL);
++   TESTINST_RRR("div.du", "$r7", "$r18", "$r17", 0xd2389cb7af92be89UL, 0x9a1f65b2c59cfda3UL, 0xe316cf92f8f0574fUL);
++   TESTINST_RRR("div.du", "$r15", "$r25", "$r17", 0x49651d72d87da955UL, 0xd22c499c27908743UL, 0x8d824b01058ecb8UL);
++
++   /* ---------------- mod.du rd, rj, rk ---------------- */
++   TESTINST_RRR("mod.du", "$r26", "$r8", "$r23", 0xb0bd66f10c34fe23UL, 0x5eb9b775d83b4893UL, 0x8867d4b638f2622UL);
++   TESTINST_RRR("mod.du", "$r8", "$r10", "$r25", 0xe236349cd47eeb11UL, 0x119102fd7b236a81UL, 0x8fd72a09e4fb45fUL);
++   TESTINST_RRR("mod.du", "$r25", "$r4", "$r5", 0x1b669725a0c3a970UL, 0x175359099c87b83UL, 0xcad295c79f1d835aUL);
++   TESTINST_RRR("mod.du", "$r7", "$r28", "$r20", 0x7117e70798869df4UL, 0xe35b93aa0c37fe97UL, 0x741084dead7970d0UL);
++   TESTINST_RRR("mod.du", "$r30", "$r24", "$r9", 0xc4d432a8ce91f693UL, 0x77c03aceb2ea6b45UL, 0xb8cd7773fb72b7caUL);
++   TESTINST_RRR("mod.du", "$r23", "$r9", "$r28", 0x13f1f3e1891b6b73UL, 0x9811699becce53a9UL, 0xed15e264f0c39b88UL);
++   TESTINST_RRR("mod.du", "$r13", "$r12", "$r14", 0xb8b22bcb0cb970e8UL, 0x16cdecd7c0091cd2UL, 0x4fcab819ebadbdfdUL);
++   TESTINST_RRR("mod.du", "$r30", "$r17", "$r12", 0xbf96226d2de1240dUL, 0x9fe4b2c7557d6b9aUL, 0x3668e581a5de6efdUL);
++   TESTINST_RRR("mod.du", "$r14", "$r4", "$r6", 0x9bc8f8a69a7f55c2UL, 0x530a9c5a21769babUL, 0x2805bef72d33cbd5UL);
++   TESTINST_RRR("mod.du", "$r23", "$r28", "$r12", 0x82a854f86e642cbaUL, 0xdd0fd63485d6c3dUL, 0x56b21f15cb9d2bf2UL);
++
++   /* ---------------- alsl.w rd, rj, rk, sa2 ---------------- */
++   TESTINST_RRRI("alsl.w", "$r18", "$r10", "$r15", SA2_1, 0xafb40df16156827bUL, 0x9b0b86116a0d89cbUL, 0x80086c066ea6842bUL, 2);
++   TESTINST_RRRI("alsl.w", "$r24", "$r5", "$r4", SA2_1, 0xb8b63b8205a919dfUL, 0x7319260322fa2d6dUL, 0x1efce6644a51ebf9UL, 2);
++   TESTINST_RRRI("alsl.w", "$r24", "$r5", "$r27", SA2_1, 0xb4f0fd355869e078UL, 0x26abeea20b7d1ac1UL, 0x4108f7f27e321c8fUL, 2);
++   TESTINST_RRRI("alsl.w", "$r24", "$r29", "$r10", SA2_1, 0x4b948e9a0b82df22UL, 0x11893c9dd43d0112UL, 0x51a030165671a055UL, 1);
++   TESTINST_RRRI("alsl.w", "$r5", "$r10", "$r18", SA2_1, 0xfc253ac9e2b55590UL, 0x2682507563a85b07UL, 0xa467083f66457d1dUL, 1);
++   TESTINST_RRRI("alsl.w", "$r20", "$r13", "$r10", SA2_1, 0x76e8c346a721cdabUL, 0x548f2762bfb1bc01UL, 0xa6e0d27e62dcc594UL, 3);
++   TESTINST_RRRI("alsl.w", "$r16", "$r6", "$r24", SA2_1, 0x39f77b88fc3b663UL, 0x281818bf4a36a7e5UL, 0x86cd2a06ef475a61UL, 3);
++   TESTINST_RRRI("alsl.w", "$r14", "$r18", "$r9", SA2_1, 0x8a58ea94346ff16UL, 0x4ff191f91397adeaUL, 0x4cda359b03c97a53UL, 4);
++   TESTINST_RRRI("alsl.w", "$r8", "$r6", "$r29", SA2_1, 0xae0bfa182556c725UL, 0xda179bc2f41d03d3UL, 0x1d23e4da08af7978UL, 1);
++   TESTINST_RRRI("alsl.w", "$r31", "$r26", "$r30", SA2_1, 0xd6af9fcd7ffd8e75UL, 0x3e88bb77d6665633UL, 0x23a0414c69b804c1UL, 1);
++
++   /* ---------------- alsl.wu rd, rj, rk, sa2 ---------------- */
++   TESTINST_RRRI("alsl.wu", "$r20", "$r24", "$r18", SA2_1, 0xc714872ff3c39370UL, 0xcaea31ddabb275f9UL, 0xedbfc2cedca8eb7aUL, 2);
++   TESTINST_RRRI("alsl.wu", "$r13", "$r26", "$r15", SA2_1, 0xe1a0ba1adcb75aa4UL, 0x8adbed432acf321aUL, 0xeae447eaa60bb142UL, 3);
++   TESTINST_RRRI("alsl.wu", "$r4", "$r17", "$r27", SA2_1, 0xb153f9ecea23068cUL, 0xd2066b089c9499a3UL, 0x36ed3c96ac4751aaUL, 3);
++   TESTINST_RRRI("alsl.wu", "$r20", "$r10", "$r4", SA2_1, 0x8fb2705357e98d66UL, 0xd353329585fc71ddUL, 0x739237ed6a677f00UL, 4);
++   TESTINST_RRRI("alsl.wu", "$r31", "$r12", "$r23", SA2_1, 0x6caac60acd9bc6f4UL, 0xc87131b9171530dfUL, 0x39c8e321a6e131c0UL, 2);
++   TESTINST_RRRI("alsl.wu", "$r13", "$r14", "$r19", SA2_1, 0xd2c7072036f54e45UL, 0x35ea1627556f8f98UL, 0x97054728433042d3UL, 2);
++   TESTINST_RRRI("alsl.wu", "$r7", "$r14", "$r5", SA2_1, 0x5a0f1fae80105d64UL, 0xd300b74879e33a53UL, 0x3a1e7389d0669d4cUL, 1);
++   TESTINST_RRRI("alsl.wu", "$r28", "$r4", "$r9", SA2_1, 0xcd7fd8389b4f4062UL, 0xad1830d644c205e7UL, 0xced1c031d73f9087UL, 1);
++   TESTINST_RRRI("alsl.wu", "$r13", "$r9", "$r29", SA2_1, 0x81601560f53b081UL, 0xd3ee3c45f08cd218UL, 0xa7d5a43a1df2aa1dUL, 4);
++   TESTINST_RRRI("alsl.wu", "$r30", "$r29", "$r31", SA2_1, 0xf383bd5bfae7e46dUL, 0x67862a0151c65567UL, 0x9cdcbf604f46c48aUL, 2);
++
++   /* ---------------- alsl.d rd, rj, rk, sa2 ---------------- */
++   TESTINST_RRRI("alsl.d", "$r18", "$r28", "$r16", SA2_1, 0x53e533e973dfa49cUL, 0x6665a9d32abaaf55UL, 0xf70490874fb75e6eUL, 4);
++   TESTINST_RRRI("alsl.d", "$r10", "$r30", "$r18", SA2_1, 0xfb14c3e6acd722c3UL, 0xcae19862ab088fccUL, 0x87c434d85259d923UL, 2);
++   TESTINST_RRRI("alsl.d", "$r17", "$r25", "$r26", SA2_1, 0x95e79a567c313ec7UL, 0x83a0e706c2c4c534UL, 0x2f49f1e9d5b91fc9UL, 1);
++   TESTINST_RRRI("alsl.d", "$r7", "$r24", "$r24", SA2_1, 0x35b966d0db9f681cUL, 0xc0bc97593f1054fcUL, 0x7e564928b0a53ac6UL, 2);
++   TESTINST_RRRI("alsl.d", "$r6", "$r30", "$r24", SA2_1, 0x38ad1fb21e071421UL, 0xb959c439b0436d6dUL, 0x647c742c9ce02fc5UL, 3);
++   TESTINST_RRRI("alsl.d", "$r18", "$r28", "$r10", SA2_1, 0x1bde2962dc5bb68bUL, 0x67c403d00c9389bdUL, 0x8fc18921f225d05aUL, 2);
++   TESTINST_RRRI("alsl.d", "$r8", "$r27", "$r15", SA2_1, 0x5b8de9d8b393fa06UL, 0x393ec1c28e89e9d8UL, 0x1a59f9d852c3f8baUL, 3);
++   TESTINST_RRRI("alsl.d", "$r27", "$r24", "$r6", SA2_1, 0x72195c1ca51cc4dbUL, 0x4ee5b51e1e161ab2UL, 0x8a10acb4b625fefUL, 4);
++   TESTINST_RRRI("alsl.d", "$r29", "$r4", "$r18", SA2_1, 0xf3ed9e39d83d3decUL, 0xa3816509b9a6c23dUL, 0x6949e8e534450dd5UL, 2);
++   TESTINST_RRRI("alsl.d", "$r16", "$r13", "$r8", SA2_1, 0x588f388f25a342dfUL, 0xde33a74109c7be30UL, 0x8b02cf06997a065aUL, 1);
++
++   /* ---------------- lu12i.w rd, si20 ---------------- */
++   TESTINST_RI("lu12i.w", "$r9", SI20, 0xdf45bd002ccf48e1UL, 94146);
++   TESTINST_RI("lu12i.w", "$r10", SI20, 0xa5138a37d09ada8aUL, 129014);
++   TESTINST_RI("lu12i.w", "$r18", SI20, 0xefe46a52b8b3e5eUL, -130138);
++   TESTINST_RI("lu12i.w", "$r7", SI20, 0x29084adf6d033a88UL, -467080);
++   TESTINST_RI("lu12i.w", "$r10", SI20, 0xe9072e7fec2a5d1cUL, 360675);
++   TESTINST_RI("lu12i.w", "$r28", SI20, 0x2f7d41c7bd959cd5UL, 205272);
++   TESTINST_RI("lu12i.w", "$r16", SI20, 0xcb48200d89b48566UL, -266298);
++   TESTINST_RI("lu12i.w", "$r12", SI20, 0xd605223c244f4a50UL, -186346);
++   TESTINST_RI("lu12i.w", "$r15", SI20, 0x22c035c8c90016beUL, 247864);
++   TESTINST_RI("lu12i.w", "$r20", SI20, 0x6b2fd1aa0b603fecUL, -511005);
++
++   /* ---------------- lu32i.d rd, si20 ---------------- */
++   TESTINST_RI("lu32i.d", "$r8", SI20, 0xb331616751ed8877UL, -310956);
++   TESTINST_RI("lu32i.d", "$r17", SI20, 0xe49bab8d80e1dd7UL, 35590);
++   TESTINST_RI("lu32i.d", "$r4", SI20, 0x842cdc9ac0a0adf6UL, 500474);
++   TESTINST_RI("lu32i.d", "$r23", SI20, 0xc9ca69b8e5ab079eUL, -447277);
++   TESTINST_RI("lu32i.d", "$r12", SI20, 0x27d83e1c77dec50aUL, -503028);
++   TESTINST_RI("lu32i.d", "$r26", SI20, 0xc00dcc918a89f350UL, -355708);
++   TESTINST_RI("lu32i.d", "$r16", SI20, 0xd180188cdc073491UL, -231989);
++   TESTINST_RI("lu32i.d", "$r26", SI20, 0x4efae034432bbb3bUL, 250642);
++   TESTINST_RI("lu32i.d", "$r15", SI20, 0x7bf2141e673e336fUL, 237105);
++   TESTINST_RI("lu32i.d", "$r4", SI20, 0x187c50bfc5eb8f32UL, -312071);
++
++   /* ---------------- lu52i.d rd, rj, si12 ---------------- */
++   TESTINST_RRI("lu52i.d", "$r8", "$r25", SI12, 0x1da74dfcb33d471aUL, 0x453ae9f1200f4d41UL, 1920);
++   TESTINST_RRI("lu52i.d", "$r14", "$r25", SI12, 0x5e954055ebaec78fUL, 0xb7637f9119e12e31UL, -2008);
++   TESTINST_RRI("lu52i.d", "$r26", "$r24", SI12, 0xead69e40b96b23bfUL, 0x779862b03d1ab575UL, -1803);
++   TESTINST_RRI("lu52i.d", "$r5", "$r25", SI12, 0x452236306da7c667UL, 0x9f16a6e48cca3a7bUL, -1406);
++   TESTINST_RRI("lu52i.d", "$r26", "$r23", SI12, 0x5604b9744291e45aUL, 0x70eecb3116b1795cUL, -667);
++   TESTINST_RRI("lu52i.d", "$r14", "$r27", SI12, 0x6d9a8cfe459c1c48UL, 0x85452bdd40205e0dUL, -1221);
++   TESTINST_RRI("lu52i.d", "$r25", "$r8", SI12, 0x1a8d72e42f68a33dUL, 0x7089b6fe4c1f7a70UL, 423);
++   TESTINST_RRI("lu52i.d", "$r30", "$r10", SI12, 0x7c4fe646acac7ac0UL, 0xe7d222ba1fd5cae2UL, -177);
++   TESTINST_RRI("lu52i.d", "$r6", "$r13", SI12, 0xdb3d6a615a9e492fUL, 0xaa9303648ff489f2UL, -1438);
++   TESTINST_RRI("lu52i.d", "$r25", "$r4", SI12, 0x8b41b813d85b8ee8UL, 0xe4d31961e42e713cUL, -634);
++
++   /* ---------------- addi.w rd, rj, si12 ---------------- */
++   TESTINST_RRI("addi.w", "$r6", "$r27", SI12, 0x12845f036198fa6fUL, 0xda77c63c764655daUL, 1727);
++   TESTINST_RRI("addi.w", "$r9", "$r8", SI12, 0x21a7e3cfa2649a4fUL, 0xc64c73b3bd4c1dcbUL, -381);
++   TESTINST_RRI("addi.w", "$r16", "$r6", SI12, 0x6c47b02ef52a3502UL, 0x24ca1a646dac5cc3UL, -186);
++   TESTINST_RRI("addi.w", "$r20", "$r31", SI12, 0xb6144d8f9513c78eUL, 0xc4b808764e894e6cUL, 1503);
++   TESTINST_RRI("addi.w", "$r19", "$r17", SI12, 0xcf97c9215c961121UL, 0x9b714c4cb899399bUL, -1918);
++   TESTINST_RRI("addi.w", "$r14", "$r8", SI12, 0xe1abf22f6c3c82ecUL, 0x4110e9c1b5f59ef6UL, -1781);
++   TESTINST_RRI("addi.w", "$r29", "$r18", SI12, 0x4b64427195dda12dUL, 0xadf5af70b7b3f37bUL, 2047);
++   TESTINST_RRI("addi.w", "$r4", "$r30", SI12, 0xfc785d46f5bbdff4UL, 0x1e061e9d51362d9cUL, 244);
++   TESTINST_RRI("addi.w", "$r7", "$r23", SI12, 0xe037576d82c12e8dUL, 0xa77c8da72af708f1UL, -376);
++   TESTINST_RRI("addi.w", "$r23", "$r17", SI12, 0xa10df57c4103efUL, 0x26d2628746ad0a3eUL, 1924);
++
++   /* ---------------- addi.d rd, rj, si12 ---------------- */
++   TESTINST_RRI("addi.d", "$r14", "$r14", SI12, 0x61b497fb58a816d9UL, 0x29eb218dd65d9d6cUL, 152);
++   TESTINST_RRI("addi.d", "$r20", "$r13", SI12, 0xd80db8387a8cdd93UL, 0x5e23e4b01f2bbd6dUL, -640);
++   TESTINST_RRI("addi.d", "$r13", "$r25", SI12, 0x5dfea060c6e8f587UL, 0x95f49b783954f9f9UL, -743);
++   TESTINST_RRI("addi.d", "$r4", "$r30", SI12, 0xd72f370f6ce7bc4cUL, 0x148550b0f97ce601UL, 676);
++   TESTINST_RRI("addi.d", "$r26", "$r8", SI12, 0xa4120a67f8d6df1aUL, 0xa83f4bbcaf5bc52eUL, 1630);
++   TESTINST_RRI("addi.d", "$r20", "$r29", SI12, 0xa8f9c82780ac16d5UL, 0x7ab169a5751642bcUL, -1971);
++   TESTINST_RRI("addi.d", "$r8", "$r8", SI12, 0x6f22bdb480c14540UL, 0x94e1253c331b17f2UL, 1160);
++   TESTINST_RRI("addi.d", "$r15", "$r27", SI12, 0x312473547bcfe03UL, 0x7a786cbc8149d818UL, 844);
++   TESTINST_RRI("addi.d", "$r8", "$r26", SI12, 0xee2b1be852671bc3UL, 0x6a36d61dfee3a6fbUL, -1185);
++   TESTINST_RRI("addi.d", "$r17", "$r27", SI12, 0x70e068b54ed72e20UL, 0x922681ab8837027bUL, -2046);
++
++   /* ---------------- addu16i.d rd, rj, si16 ---------------- */
++   TESTINST_RRI("addu16i.d", "$r20", "$r29", SI16, 0x8232770e3472bdc3UL, 0x4d28c5567787c26eUL, -14564);
++   TESTINST_RRI("addu16i.d", "$r29", "$r4", SI16, 0x9076403ed2f0fdf4UL, 0x471cafb4183a389fUL, -3511);
++   TESTINST_RRI("addu16i.d", "$r26", "$r15", SI16, 0xdec118b1eb13234UL, 0x6ff5ce56111b301UL, 25897);
++   TESTINST_RRI("addu16i.d", "$r9", "$r5", SI16, 0x73209239d98fb81aUL, 0x1dc8f0ba4710eba3UL, -21829);
++   TESTINST_RRI("addu16i.d", "$r28", "$r25", SI16, 0xa39ba8429a9c13a6UL, 0x4fffb32851c13ff2UL, -23832);
++   TESTINST_RRI("addu16i.d", "$r23", "$r30", SI16, 0x8abd919f5ea43b1UL, 0x40078826f7336f0eUL, -32189);
++   TESTINST_RRI("addu16i.d", "$r28", "$r24", SI16, 0x695e543e25e7d3e4UL, 0x30279db606efa8ecUL, 16372);
++   TESTINST_RRI("addu16i.d", "$r4", "$r18", SI16, 0xa125cadb71209757UL, 0xff287b5e7fb2a2baUL, -28041);
++   TESTINST_RRI("addu16i.d", "$r5", "$r17", SI16, 0xd5d3e6da7c594ca9UL, 0x2bc9be0ef252584cUL, -11268);
++   TESTINST_RRI("addu16i.d", "$r29", "$r28", SI16, 0xee0391151007613UL, 0xae616c39d87c4b6eUL, -15645);
++
++   /* ---------------- andi rd, rj, ui12 ---------------- */
++   TESTINST_RRI("andi", "$r28", "$r18", UI12, 0xd62f833fbbd483b3UL, 0xa2f268cdcf18dd00UL, 1288);
++   TESTINST_RRI("andi", "$r12", "$r13", UI12, 0xc40efc9a74a3a13bUL, 0xfd609200795f877cUL, 153);
++   TESTINST_RRI("andi", "$r6", "$r18", UI12, 0x79ee7ee7a7865b79UL, 0x644bec92dca1ad7fUL, 3633);
++   TESTINST_RRI("andi", "$r5", "$r31", UI12, 0x2d64be0e5c2ec0f6UL, 0x87253b6589f182c7UL, 3299);
++   TESTINST_RRI("andi", "$r28", "$r5", UI12, 0xf2e4ed85d98a1860UL, 0x9f58e4edd98b60d1UL, 3189);
++   TESTINST_RRI("andi", "$r18", "$r29", UI12, 0x3c067920d48cf0d2UL, 0x2bf35e68c503ecfeUL, 4031);
++   TESTINST_RRI("andi", "$r20", "$r24", UI12, 0xe1d95be05fd57a64UL, 0xd33e771521b24bd3UL, 3252);
++   TESTINST_RRI("andi", "$r6", "$r23", UI12, 0x23341b2d86d02365UL, 0x16de10f2b4a45064UL, 1665);
++   TESTINST_RRI("andi", "$r27", "$r14", UI12, 0xd7db9d77aea4dcf5UL, 0x142272b737435eb7UL, 325);
++   TESTINST_RRI("andi", "$r23", "$r16", UI12, 0x57fee53581b09718UL, 0x2ace25d9e2ddbaaUL, 1056);
++
++   /* ---------------- ori rd, rj, ui12 ---------------- */
++   TESTINST_RRI("ori", "$r26", "$r13", UI12, 0x6d47cf7e5bb5c13eUL, 0x93aed4996805ba3bUL, 3251);
++   TESTINST_RRI("ori", "$r10", "$r25", UI12, 0x42f0332098f938afUL, 0xd7916fe8d569567bUL, 568);
++   TESTINST_RRI("ori", "$r12", "$r17", UI12, 0xc507d4150a742b76UL, 0x2b9a102a5b5b15f7UL, 1798);
++   TESTINST_RRI("ori", "$r15", "$r15", UI12, 0xa54ad5ecc0e72adbUL, 0x37c18ad4ec6e678cUL, 1781);
++   TESTINST_RRI("ori", "$r5", "$r4", UI12, 0x1f388b2a2b18004dUL, 0xb5fa23fbb02eeedbUL, 682);
++   TESTINST_RRI("ori", "$r27", "$r24", UI12, 0x73b086f8a8b4d7b5UL, 0xd23e30ab1e45470aUL, 1931);
++   TESTINST_RRI("ori", "$r28", "$r6", UI12, 0x972967beac695928UL, 0x2c701d0bc28816c5UL, 3593);
++   TESTINST_RRI("ori", "$r27", "$r4", UI12, 0x54fecbbf0a06e5a6UL, 0xf0b6d846464a3331UL, 3679);
++   TESTINST_RRI("ori", "$r9", "$r16", UI12, 0x71f3cd001c729062UL, 0xc5720758095e4592UL, 905);
++   TESTINST_RRI("ori", "$r26", "$r7", UI12, 0xd7ce86800c3c0f4bUL, 0xc4a58f787cdf5bb2UL, 3473);
++
++   /* ---------------- xori rd, rj, ui12 ---------------- */
++   TESTINST_RRI("xori", "$r27", "$r31", UI12, 0xe6d49c2dc629fbc7UL, 0x91832665d1a898e2UL, 2690);
++   TESTINST_RRI("xori", "$r15", "$r5", UI12, 0xada49c0d48beffc5UL, 0xe3cf426f1be4766UL, 697);
++   TESTINST_RRI("xori", "$r9", "$r20", UI12, 0x174a71d6d3757e3eUL, 0x25ed4678037622beUL, 2268);
++   TESTINST_RRI("xori", "$r31", "$r15", UI12, 0x1fac1694b40fbf2eUL, 0x4fe4fb2e0b660ca2UL, 3817);
++   TESTINST_RRI("xori", "$r17", "$r14", UI12, 0x2dc443400df4e153UL, 0x1db25e602ef8ece5UL, 3929);
++   TESTINST_RRI("xori", "$r4", "$r28", UI12, 0x5fb5ad5a84e97835UL, 0xc52da11293641639UL, 2735);
++   TESTINST_RRI("xori", "$r5", "$r13", UI12, 0x5c5fc4ba45da005fUL, 0xe46f853b7d602b84UL, 1153);
++   TESTINST_RRI("xori", "$r30", "$r26", UI12, 0x1419915b6f92678bUL, 0xa984612f1266da94UL, 3867);
++   TESTINST_RRI("xori", "$r13", "$r13", UI12, 0xc2b8fd036ba6314bUL, 0x4cf49604f644713cUL, 3426);
++   TESTINST_RRI("xori", "$r25", "$r23", UI12, 0xde46e3673c9a75dcUL, 0xfa1177a89f08c81eUL, 2669);
++
++   /* ---------------- sll.w rd, rj, rk ---------------- */
++   TESTINST_RRR("sll.w", "$r13", "$r8", "$r12", 0x26131fa72f4b76f1UL, 0xf34f7108538078d0UL, 0x10bbd12a8e087501UL);
++   TESTINST_RRR("sll.w", "$r29", "$r8", "$r15", 0xb6f529da4017d0d9UL, 0x49fbfb11ef643171UL, 0x9d0425e747d11bdeUL);
++   TESTINST_RRR("sll.w", "$r30", "$r31", "$r12", 0xcfc5236f5c070644UL, 0xba8301a1087b3a96UL, 0xff7589561824e1beUL);
++   TESTINST_RRR("sll.w", "$r28", "$r10", "$r7", 0x37fa51674df87149UL, 0x39212605c5d0cf7dUL, 0x18a8e323326ce5aaUL);
++   TESTINST_RRR("sll.w", "$r8", "$r9", "$r14", 0x707a9e0ece8abe40UL, 0x94b7b20a80c16c7bUL, 0x6887c46efb4cc181UL);
++   TESTINST_RRR("sll.w", "$r8", "$r4", "$r24", 0xd718a01b03a53964UL, 0x8ebd8bfeec304e2aUL, 0x6b4a83a6838b5d1UL);
++   TESTINST_RRR("sll.w", "$r23", "$r31", "$r27", 0xf50cab824a06d30eUL, 0xa8ee12cbd8dec935UL, 0x118002b3f0cecbabUL);
++   TESTINST_RRR("sll.w", "$r8", "$r25", "$r26", 0x8163368243faadeeUL, 0x3a04f47bf19a4cc8UL, 0x6a58cd3a57b4eeb4UL);
++   TESTINST_RRR("sll.w", "$r25", "$r13", "$r12", 0x3d6831e1afab1b1aUL, 0x9ee672580cb39777UL, 0x9084acd2bc7404caUL);
++   TESTINST_RRR("sll.w", "$r20", "$r5", "$r29", 0x90f7ee3ff75817a6UL, 0xe4ae07989d6148d7UL, 0x3e208bfcf046fffdUL);
++
++   /* ---------------- srl.w rd, rj, rk ---------------- */
++   TESTINST_RRR("srl.w", "$r20", "$r29", "$r30", 0xff3f6b79b5e2b56dUL, 0x1195aa09fa92d26bUL, 0xa93a8fd11ad5ae99UL);
++   TESTINST_RRR("srl.w", "$r8", "$r15", "$r4", 0x5d2fb7cd04ecd00cUL, 0x47bf914b6eca2852UL, 0x1bc63138cc45a75cUL);
++   TESTINST_RRR("srl.w", "$r20", "$r12", "$r18", 0x61fa22abda7c7b02UL, 0x9341cf09aa2e106eUL, 0x2dea831e9e121355UL);
++   TESTINST_RRR("srl.w", "$r30", "$r20", "$r26", 0x43e0249584da52dbUL, 0x482a209e436cda53UL, 0xb323a7f463f80660UL);
++   TESTINST_RRR("srl.w", "$r31", "$r16", "$r28", 0x4b10d05d93bf7288UL, 0x6d0330e88122d7c1UL, 0xc531cf8c92d53d03UL);
++   TESTINST_RRR("srl.w", "$r31", "$r15", "$r31", 0xd4654233c7648c3aUL, 0x12e6fc2a04cbf809UL, 0xcfe1c1b558a94808UL);
++   TESTINST_RRR("srl.w", "$r10", "$r30", "$r19", 0x602dee9c45a3b99bUL, 0x3ce0a6ac2acf19faUL, 0xdb5fab4bc2f82e7aUL);
++   TESTINST_RRR("srl.w", "$r17", "$r9", "$r23", 0x45106f11d4a57641UL, 0x5354795b675edacUL, 0xc67578c28ed7b6c7UL);
++   TESTINST_RRR("srl.w", "$r25", "$r26", "$r29", 0x1dc3b8477fba650cUL, 0x814377a71768e75UL, 0x60276c0e316db833UL);
++   TESTINST_RRR("srl.w", "$r31", "$r7", "$r30", 0x360fc92a085c2e14UL, 0x1b44ec96def89449UL, 0x56d6c5d85a81ed1fUL);
++
++   /* ---------------- sra.w rd, rj, rk ---------------- */
++   TESTINST_RRR("sra.w", "$r10", "$r17", "$r19", 0x576f2bfc771641b8UL, 0xfb1fb20b98a54405UL, 0xb20e9dae5a212078UL);
++   TESTINST_RRR("sra.w", "$r12", "$r16", "$r31", 0xbfdbb9a90ccc08a0UL, 0xb5d3c7f3b1a800a6UL, 0x57c3ff79f3b4198bUL);
++   TESTINST_RRR("sra.w", "$r18", "$r16", "$r5", 0xadcb6c153538b6b1UL, 0x99e245813e90b5e9UL, 0x7adff58363d5ebd2UL);
++   TESTINST_RRR("sra.w", "$r17", "$r28", "$r25", 0x7faea6a29686caf9UL, 0x801d40ea40b19beeUL, 0xf5174f678600d3fUL);
++   TESTINST_RRR("sra.w", "$r8", "$r27", "$r13", 0x86e5534832150e05UL, 0x47bb53d1cdc3560fUL, 0x917e2b49633a0f44UL);
++   TESTINST_RRR("sra.w", "$r26", "$r18", "$r20", 0xbfb83a0d762c171aUL, 0xbf67ed78d934d37cUL, 0x9f377995293fcc6bUL);
++   TESTINST_RRR("sra.w", "$r5", "$r25", "$r19", 0x266703af59334b0fUL, 0x4ed92cdab9f641c9UL, 0x5da1d0b8846d1a3dUL);
++   TESTINST_RRR("sra.w", "$r19", "$r27", "$r24", 0x72557561b3b40007UL, 0xd5db278ea099b3b5UL, 0x50b4a888b898610fUL);
++   TESTINST_RRR("sra.w", "$r16", "$r10", "$r4", 0xb349f888f1809ba3UL, 0x23d60a1fc100d89eUL, 0xc2846cc882dbc8e2UL);
++   TESTINST_RRR("sra.w", "$r23", "$r10", "$r31", 0xd7bdeddd344bb5afUL, 0xa015a07c13ff2234UL, 0x7c0fe410ce063a85UL);
++
++   /* ---------------- sll.d rd, rj, rk ---------------- */
++   TESTINST_RRR("sll.d", "$r28", "$r17", "$r10", 0x167adf26efd66416UL, 0xb861ba6e0aadf304UL, 0xa19e21ba0f406c33UL);
++   TESTINST_RRR("sll.d", "$r18", "$r29", "$r13", 0x3e8ea4dc3a9d9b44UL, 0x28ccf5dfa9cdc3b2UL, 0x33ef837a5a476bdcUL);
++   TESTINST_RRR("sll.d", "$r23", "$r27", "$r29", 0x23e29c76deed70caUL, 0x9e2265d8422e78dUL, 0xe9cc62bfd8a7c913UL);
++   TESTINST_RRR("sll.d", "$r16", "$r17", "$r17", 0xf5e858c7445fceddUL, 0x6735e4cf2fcb78fbUL, 0x726dd10e13b62663UL);
++   TESTINST_RRR("sll.d", "$r17", "$r15", "$r29", 0xfc1dbfc0551f8813UL, 0xec45100b21a74025UL, 0x186d3b737cbfd39aUL);
++   TESTINST_RRR("sll.d", "$r19", "$r15", "$r9", 0xbb01afe39a1e17b6UL, 0x3e66dd1100acc44aUL, 0xa9c74257f6e39cdfUL);
++   TESTINST_RRR("sll.d", "$r23", "$r9", "$r31", 0x945b101751c38d12UL, 0x262d14baae546199UL, 0x7ccdd8a7840948dfUL);
++   TESTINST_RRR("sll.d", "$r5", "$r31", "$r28", 0xa88eaecc1405995bUL, 0xd96ed500aff4596bUL, 0x6994841a196c562eUL);
++   TESTINST_RRR("sll.d", "$r27", "$r10", "$r25", 0x1e9540fa8237a849UL, 0x9aad6101b2470a60UL, 0x90c95628696f752fUL);
++   TESTINST_RRR("sll.d", "$r4", "$r26", "$r18", 0xb4dc3cdeab2e8454UL, 0xd27a92db3b2906cUL, 0x2bc7647c40c0b375UL);
++
++   /* ---------------- srl.d rd, rj, rk ---------------- */
++   TESTINST_RRR("srl.d", "$r6", "$r27", "$r13", 0x66ebeca9a7fad574UL, 0xdc837ce646ea6b51UL, 0xa57259e1758c564bUL);
++   TESTINST_RRR("srl.d", "$r6", "$r20", "$r5", 0x91794316e6c5e65UL, 0xdc7c47d39d64a16UL, 0x35f029b9942e11c8UL);
++   TESTINST_RRR("srl.d", "$r15", "$r5", "$r4", 0xbc963842b3ebc906UL, 0x42ea773b0bd19807UL, 0xd05cd2c4b01ea630UL);
++   TESTINST_RRR("srl.d", "$r18", "$r25", "$r28", 0x30d908baaa31230eUL, 0x779272ae228746a5UL, 0xf7b665809a3f303bUL);
++   TESTINST_RRR("srl.d", "$r5", "$r28", "$r27", 0x1f1d414f1d0f1feUL, 0x647277d3759d74bfUL, 0xa5c5fce39b4a1810UL);
++   TESTINST_RRR("srl.d", "$r24", "$r9", "$r26", 0x5fa44419162fc2c8UL, 0x9d2a589e6f6b3440UL, 0x810a615115238d8dUL);
++   TESTINST_RRR("srl.d", "$r31", "$r23", "$r30", 0xfa1a7ad64758b758UL, 0xe3d69d99e87b4297UL, 0x87fd8dc0a78e86bbUL);
++   TESTINST_RRR("srl.d", "$r26", "$r10", "$r24", 0x540888639a787231UL, 0x168791cefeb1660aUL, 0xd02b158115db9cdfUL);
++   TESTINST_RRR("srl.d", "$r23", "$r15", "$r12", 0xff3e950565409999UL, 0xe15a01fa0e34ea3bUL, 0x237aba34fe552f8eUL);
++   TESTINST_RRR("srl.d", "$r8", "$r16", "$r4", 0x825bafd36cc0d32eUL, 0x321677304d1b1406UL, 0xca68c6c83dfa5837UL);
++
++   /* ---------------- sra.d rd, rj, rk ---------------- */
++   TESTINST_RRR("sra.d", "$r23", "$r19", "$r16", 0x4cab63abd8f64774UL, 0x2c007c3ac68d7c80UL, 0xd8f4ac963a8b2c01UL);
++   TESTINST_RRR("sra.d", "$r18", "$r30", "$r25", 0x531de73fca30361aUL, 0x2857ba730cd281ffUL, 0xacab0fe400e4c113UL);
++   TESTINST_RRR("sra.d", "$r31", "$r13", "$r10", 0x3184416bc93a5e26UL, 0xad5864bc4022de96UL, 0xf7007bdbf1f728abUL);
++   TESTINST_RRR("sra.d", "$r6", "$r25", "$r23", 0x9184d2df291f3402UL, 0x7c0b117dcad80c03UL, 0x35b29b0dde1a94bdUL);
++   TESTINST_RRR("sra.d", "$r16", "$r6", "$r29", 0x2849e543d35dff5fUL, 0x9f13f36a632a3fUL, 0xf31f881e12072fe2UL);
++   TESTINST_RRR("sra.d", "$r7", "$r29", "$r10", 0x25c763f8366139ddUL, 0xfd77fd6e69e371c6UL, 0xcaa2ec6ad4f3b996UL);
++   TESTINST_RRR("sra.d", "$r24", "$r25", "$r26", 0x472602300b4f04c9UL, 0x54ceea832a5677e9UL, 0x5f63e9d9d6eb4af0UL);
++   TESTINST_RRR("sra.d", "$r23", "$r4", "$r27", 0xe8b449325a0ed51eUL, 0xd96928476f8441a5UL, 0x7e1ae8fd9c849dceUL);
++   TESTINST_RRR("sra.d", "$r15", "$r9", "$r12", 0x71601a1a2b155f51UL, 0xbcbb1d162563240UL, 0x5a906ad2f4abb4c7UL);
++   TESTINST_RRR("sra.d", "$r16", "$r29", "$r23", 0x1686886f27d397fbUL, 0x851328b2655e5689UL, 0x1634457590cd4033UL);
++
++   /* ---------------- rotr.w rd, rj, rk ---------------- */
++   TESTINST_RRR("rotr.w", "$r8", "$r5", "$r18", 0xc4394aae4c13908bUL, 0xa0c5728d1211b595UL, 0x3d562746b3943f3bUL);
++   TESTINST_RRR("rotr.w", "$r19", "$r18", "$r10", 0x284b501639de116bUL, 0x4248ad6cc0107902UL, 0xb41907b756bf8004UL);
++   TESTINST_RRR("rotr.w", "$r29", "$r8", "$r4", 0x2656b50c7d689f19UL, 0x7b5d21fdce9bcb73UL, 0x5b212fbe9e6b8522UL);
++   TESTINST_RRR("rotr.w", "$r25", "$r6", "$r30", 0x4c79ed7a1695fc25UL, 0x6bac1698a978f50fUL, 0xf1d58570dfb10203UL);
++   TESTINST_RRR("rotr.w", "$r14", "$r18", "$r6", 0xe894476b4ebbff23UL, 0x1398b65ae1e91c98UL, 0xebb6c3f5f689d2d8UL);
++   TESTINST_RRR("rotr.w", "$r19", "$r29", "$r26", 0x2595423cc93ecd7cUL, 0x6c462c2d29d8f908UL, 0x19142efd8e0b48b8UL);
++   TESTINST_RRR("rotr.w", "$r23", "$r10", "$r25", 0x68b4d913b267a3a2UL, 0x69afb673907e4506UL, 0xbd09ff2ed890862dUL);
++   TESTINST_RRR("rotr.w", "$r9", "$r14", "$r27", 0x17a45b8cbdebd6efUL, 0x33effef864846356UL, 0x3f52e437f2d5da62UL);
++   TESTINST_RRR("rotr.w", "$r5", "$r12", "$r23", 0x2d191b1a9707cf26UL, 0x86fa75433dac3d39UL, 0x21136a02424e5da4UL);
++   TESTINST_RRR("rotr.w", "$r29", "$r18", "$r27", 0x7d989f74f9944f8dUL, 0x50fe5829a153e6UL, 0x926776f9140b06fcUL);
++
++   /* ---------------- rotr.d rd, rj, rk ---------------- */
++   TESTINST_RRR("rotr.d", "$r29", "$r19", "$r13", 0x1e02c0c28ec3f9b1UL, 0xf2e79e6ff240b188UL, 0x60f500663eddf444UL);
++   TESTINST_RRR("rotr.d", "$r30", "$r4", "$r14", 0x97f6be8229e2e822UL, 0xf79aaeb2c03a2113UL, 0xbbdb2cb642605ed7UL);
++   TESTINST_RRR("rotr.d", "$r6", "$r19", "$r7", 0x1611806010ce99d8UL, 0xcb64270e0fc5b4c7UL, 0xeda6972c46af03cUL);
++   TESTINST_RRR("rotr.d", "$r4", "$r15", "$r30", 0xe63084e97bd0efb3UL, 0x6e1aa322e38e9b66UL, 0xa7df0f1d92106e2dUL);
++   TESTINST_RRR("rotr.d", "$r16", "$r27", "$r10", 0x1ff92fbb0f10ff9aUL, 0x15c2eb91c9ae124UL, 0x8b4c97ee7f9bc2faUL);
++   TESTINST_RRR("rotr.d", "$r28", "$r7", "$r25", 0xbd766a63bbead21cUL, 0xd97b509610db5e7UL, 0x3151203010315af5UL);
++   TESTINST_RRR("rotr.d", "$r9", "$r20", "$r23", 0x8a2bb5eacea50d68UL, 0x947ec1930151adb9UL, 0xc2f39e045d278b7bUL);
++   TESTINST_RRR("rotr.d", "$r25", "$r13", "$r23", 0xcaddb8ea7bd492c7UL, 0x416a1b790dbf45cbUL, 0x44c59965e1c6af25UL);
++   TESTINST_RRR("rotr.d", "$r14", "$r7", "$r31", 0x8ca18b58047c8b5aUL, 0x93a6cdc3585b5446UL, 0x70cd84ec07e33cefUL);
++   TESTINST_RRR("rotr.d", "$r14", "$r9", "$r4", 0x48bd5c133004f490UL, 0xad095be0915fe20bUL, 0xc1fff6ff603a47b3UL);
++
++   /* ---------------- slli.w rd, rj, ui5 ---------------- */
++   TESTINST_RRI("slli.w", "$r18", "$r8", UI5, 0xe7f8823a2989c395UL, 0xf0ccc85519ad1e0aUL, 10);
++   TESTINST_RRI("slli.w", "$r27", "$r17", UI5, 0x2e66b550a3bb071dUL, 0x20943aa3eaa4024eUL, 30);
++   TESTINST_RRI("slli.w", "$r27", "$r23", UI5, 0x70daa2bee8209243UL, 0x2e9160afd2e28a64UL, 31);
++   TESTINST_RRI("slli.w", "$r10", "$r13", UI5, 0x701c424632b5dc29UL, 0x591054db6afe1725UL, 12);
++   TESTINST_RRI("slli.w", "$r7", "$r15", UI5, 0xdd1d7fe3ae579499UL, 0x2e077f689088c0c7UL, 19);
++   TESTINST_RRI("slli.w", "$r6", "$r8", UI5, 0xff732113ddaab79bUL, 0x9cacf8e6d9e37f97UL, 12);
++   TESTINST_RRI("slli.w", "$r5", "$r19", UI5, 0xcef75ddd2adc5853UL, 0xcc24ed9167fd06eaUL, 22);
++   TESTINST_RRI("slli.w", "$r17", "$r8", UI5, 0x3c8788fed3e8a049UL, 0xccf9b2d2c2e80251UL, 7);
++   TESTINST_RRI("slli.w", "$r14", "$r29", UI5, 0xe1b0b077db4f08eUL, 0x76aea4b9ae43cdfbUL, 10);
++   TESTINST_RRI("slli.w", "$r23", "$r30", UI5, 0x13d8514aeb0dc12bUL, 0x9c8352804e7e8ccbUL, 26);
++
++   /* ---------------- slli.d rd, rj, ui6 ---------------- */
++   TESTINST_RRI("slli.d", "$r27", "$r28", UI6, 0x689a2c4141835926UL, 0x1b6ff38e611d1e4dUL, 5);
++   TESTINST_RRI("slli.d", "$r5", "$r20", UI6, 0xff3391c2323defa6UL, 0xe99a134a0c1a2574UL, 1);
++   TESTINST_RRI("slli.d", "$r27", "$r7", UI6, 0xc32d8fb319ba47e6UL, 0xc6530e0e601d3631UL, 61);
++   TESTINST_RRI("slli.d", "$r5", "$r26", UI6, 0x979553ff112cdf52UL, 0x931e420364fdcacaUL, 45);
++   TESTINST_RRI("slli.d", "$r27", "$r5", UI6, 0xa7f70b048a4087b0UL, 0xc1b829210c3cd5a9UL, 60);
++   TESTINST_RRI("slli.d", "$r23", "$r10", UI6, 0xcd547af78ac66ca7UL, 0xa2c0802de6c82645UL, 59);
++   TESTINST_RRI("slli.d", "$r13", "$r30", UI6, 0x410b8f25e1234eeUL, 0xdbaacfe884cda24dUL, 56);
++   TESTINST_RRI("slli.d", "$r16", "$r4", UI6, 0x44a2ff35045ec37cUL, 0xee2240010629a8eeUL, 20);
++   TESTINST_RRI("slli.d", "$r19", "$r20", UI6, 0x8617d88408d75cacUL, 0xba15483820d66ae7UL, 25);
++   TESTINST_RRI("slli.d", "$r24", "$r27", UI6, 0x669e0e9b99d5b604UL, 0xf5d1ffc374e53c7dUL, 13);
++
++   /* ---------------- srli.w rd, rj, ui5 ---------------- */
++   TESTINST_RRI("srli.w", "$r20", "$r16", UI5, 0x7f5310ac5eaa9924UL, 0xea8b69613d183eeUL, 10);
++   TESTINST_RRI("srli.w", "$r13", "$r15", UI5, 0x5f4d9313f9224389UL, 0xd544272206f4e814UL, 0);
++   TESTINST_RRI("srli.w", "$r17", "$r18", UI5, 0xd9b2c942f996cc8aUL, 0x704cd1d89de5c2b4UL, 7);
++   TESTINST_RRI("srli.w", "$r27", "$r28", UI5, 0xa3eef8efc97e0d4fUL, 0x8c449e6236daa7a2UL, 18);
++   TESTINST_RRI("srli.w", "$r9", "$r10", UI5, 0x6c044927152e5fc9UL, 0x592a1607944e0109UL, 29);
++   TESTINST_RRI("srli.w", "$r8", "$r24", UI5, 0xcaa01b37d49db675UL, 0x5e35848bbc958164UL, 31);
++   TESTINST_RRI("srli.w", "$r6", "$r16", UI5, 0xe2fbe1accb343769UL, 0x85f5e17c7d785222UL, 18);
++   TESTINST_RRI("srli.w", "$r18", "$r25", UI5, 0x4653c07e0627825fUL, 0x44fffa524ffd0417UL, 31);
++   TESTINST_RRI("srli.w", "$r5", "$r26", UI5, 0x817ebd7154c8ed46UL, 0xc7399a9899fc5958UL, 22);
++   TESTINST_RRI("srli.w", "$r27", "$r4", UI5, 0x3e4b17b34f2b08d0UL, 0x5bedb97aefd697f4UL, 27);
++
++   /* ---------------- srli.d rd, rj, ui6 ---------------- */
++   TESTINST_RRI("srli.d", "$r31", "$r9", UI6, 0x8fc21da189af52edUL, 0x235bf33e3e612a15UL, 51);
++   TESTINST_RRI("srli.d", "$r26", "$r7", UI6, 0xcd1eaac4df2531ddUL, 0xe87216fce9c75788UL, 36);
++   TESTINST_RRI("srli.d", "$r6", "$r31", UI6, 0xc0282beeb7dc6618UL, 0x8b58604d6be3e8e0UL, 29);
++   TESTINST_RRI("srli.d", "$r20", "$r6", UI6, 0x1546fdd9fc133e39UL, 0x74067840bb05a992UL, 18);
++   TESTINST_RRI("srli.d", "$r28", "$r20", UI6, 0xaa1f88b09e13e4c6UL, 0x6e153faa5221e893UL, 28);
++   TESTINST_RRI("srli.d", "$r26", "$r4", UI6, 0x2ba2151c80dbea7aUL, 0x21246f3c7063edf9UL, 55);
++   TESTINST_RRI("srli.d", "$r28", "$r29", UI6, 0xcd72eff1b5aa0877UL, 0x5d9488c1d61a1544UL, 34);
++   TESTINST_RRI("srli.d", "$r13", "$r7", UI6, 0x5953b78fbd8109a9UL, 0x862731652b653859UL, 62);
++   TESTINST_RRI("srli.d", "$r29", "$r18", UI6, 0xab821449d149a976UL, 0xcb73553146cc4bdcUL, 25);
++   TESTINST_RRI("srli.d", "$r28", "$r7", UI6, 0x31272fa88123357dUL, 0xe9359f7a9f92ec5UL, 2);
++
++   /* ---------------- srai.w rd, rj, ui5 ---------------- */
++   TESTINST_RRI("srai.w", "$r26", "$r23", UI5, 0xe73a55c2b7005c01UL, 0xfcd659254f4b3fe7UL, 2);
++   TESTINST_RRI("srai.w", "$r31", "$r10", UI5, 0x2e0c4330fae0890aUL, 0xa76ca364a204c82bUL, 0);
++   TESTINST_RRI("srai.w", "$r31", "$r8", UI5, 0x64790bb6e8674f68UL, 0xce5594f964c4a026UL, 0);
++   TESTINST_RRI("srai.w", "$r15", "$r31", UI5, 0xccfb53c708026acdUL, 0xce185873627515b5UL, 27);
++   TESTINST_RRI("srai.w", "$r16", "$r28", UI5, 0x994c4d22e90185a2UL, 0x49995d51019e1050UL, 1);
++   TESTINST_RRI("srai.w", "$r13", "$r16", UI5, 0x484408b57b3ab89UL, 0x437401347e23c399UL, 16);
++   TESTINST_RRI("srai.w", "$r4", "$r9", UI5, 0xd1d936105b7cca3UL, 0xd49c3c65e292b942UL, 7);
++   TESTINST_RRI("srai.w", "$r24", "$r15", UI5, 0xaa9377005232ec93UL, 0xde29d0172b40f03dUL, 10);
++   TESTINST_RRI("srai.w", "$r19", "$r14", UI5, 0xa49c65a4c2cde36dUL, 0x782e0d4b8a7a28d0UL, 24);
++   TESTINST_RRI("srai.w", "$r24", "$r27", UI5, 0x404f816ff696bbc8UL, 0x1b6900e15f252315UL, 24);
++
++   /* ---------------- srai.d rd, rj, ui6 ---------------- */
++   TESTINST_RRI("srai.d", "$r24", "$r4", UI6, 0x96250384fede78c7UL, 0x6c501d9ec5e9e731UL, 22);
++   TESTINST_RRI("srai.d", "$r30", "$r19", UI6, 0xcfc52d7caaf7bf47UL, 0x82499a30d50f8b83UL, 17);
++   TESTINST_RRI("srai.d", "$r12", "$r12", UI6, 0x628a1a46bbe30c16UL, 0xaba392c50d63ea53UL, 5);
++   TESTINST_RRI("srai.d", "$r24", "$r9", UI6, 0x21c1bb01f0253d8UL, 0xb35e31d92548a2feUL, 2);
++   TESTINST_RRI("srai.d", "$r28", "$r7", UI6, 0x2a5ac0a983332ec3UL, 0x2297ae499a473c6dUL, 62);
++   TESTINST_RRI("srai.d", "$r8", "$r17", UI6, 0xa27cf36651750e09UL, 0x1984e046b042d0cfUL, 31);
++   TESTINST_RRI("srai.d", "$r25", "$r16", UI6, 0x7df3822fb20b8dedUL, 0xb4e464563029fac8UL, 37);
++   TESTINST_RRI("srai.d", "$r14", "$r5", UI6, 0xe8c1939c13a2e6caUL, 0x6a22077c63497a9aUL, 57);
++   TESTINST_RRI("srai.d", "$r25", "$r15", UI6, 0xf2df68e25cccf72eUL, 0xe0af648201f919fcUL, 10);
++   TESTINST_RRI("srai.d", "$r6", "$r15", UI6, 0xa24591b35142aa9cUL, 0x12b20ac67de77b8dUL, 49);
++
++   /* ---------------- rotri.w rd, rj, ui5 ---------------- */
++   TESTINST_RRI("rotri.w", "$r18", "$r6", UI5, 0xf0c65b137926ba00UL, 0x95e0f5f057a212c5UL, 20);
++   TESTINST_RRI("rotri.w", "$r9", "$r16", UI5, 0xe36356471d2a7e18UL, 0xb8af3071021bd869UL, 27);
++   TESTINST_RRI("rotri.w", "$r5", "$r31", UI5, 0x5992fc9cfce2ebe9UL, 0x6c427c821603d01aUL, 1);
++   TESTINST_RRI("rotri.w", "$r27", "$r13", UI5, 0x239c57dca2ab060UL, 0xed54e28825b25471UL, 23);
++   TESTINST_RRI("rotri.w", "$r18", "$r18", UI5, 0xb84df2305a710936UL, 0x8aae5248c6d4973cUL, 7);
++   TESTINST_RRI("rotri.w", "$r4", "$r27", UI5, 0x730e1701570ac9fcUL, 0xd55b9d54232536e7UL, 29);
++   TESTINST_RRI("rotri.w", "$r19", "$r18", UI5, 0x36dbceffa501d8dcUL, 0x8415238fa1dd314fUL, 0);
++   TESTINST_RRI("rotri.w", "$r13", "$r24", UI5, 0xc1ac428ddf5193UL, 0x3b588028fcfbb0a8UL, 21);
++   TESTINST_RRI("rotri.w", "$r14", "$r25", UI5, 0x733414543ca8145eUL, 0xded24831de35be08UL, 29);
++   TESTINST_RRI("rotri.w", "$r27", "$r5", UI5, 0x60afaebb36d22ba0UL, 0xfd31a16f03582b5UL, 8);
++
++   /* ---------------- rotri.d rd, rj, ui6 ---------------- */
++   TESTINST_RRI("rotri.d", "$r20", "$r7", UI6, 0xe112a6d47c0444c1UL, 0xbd9bbb91bdc381c5UL, 53);
++   TESTINST_RRI("rotri.d", "$r27", "$r16", UI6, 0xf254a827c1ef7351UL, 0x3de084650f757cebUL, 62);
++   TESTINST_RRI("rotri.d", "$r30", "$r17", UI6, 0x31c36a8c83999eb2UL, 0x107098a9863e85d5UL, 10);
++   TESTINST_RRI("rotri.d", "$r29", "$r8", UI6, 0xf2e7a25c121af3c3UL, 0xb177c110c3dd3225UL, 46);
++   TESTINST_RRI("rotri.d", "$r4", "$r26", UI6, 0xdd94ff60f2e1abffUL, 0xb76d3e4a0af02e4dUL, 45);
++   TESTINST_RRI("rotri.d", "$r10", "$r9", UI6, 0x6064d48d901beca7UL, 0xea20b33360134ab2UL, 42);
++   TESTINST_RRI("rotri.d", "$r4", "$r26", UI6, 0x27f1e63c8f7f71cfUL, 0xf4c5c8a69f37a1bdUL, 27);
++   TESTINST_RRI("rotri.d", "$r9", "$r16", UI6, 0x7d4cb07a3ab72944UL, 0xd5ee210421c6080eUL, 20);
++   TESTINST_RRI("rotri.d", "$r24", "$r26", UI6, 0x1ce66a79f3e45e6fUL, 0x6e1767144ffa6e2dUL, 4);
++   TESTINST_RRI("rotri.d", "$r4", "$r18", UI6, 0x4173f8102b03399UL, 0xde7066568917d899UL, 46);
++
++   /* ---------------- ext.w.h rd, rj ---------------- */
++   TESTINST_RR("ext.w.h", "$r17", "$r14", 0x58af862c6fc4208dUL, 0x6235b0cfe4eed6edUL);
++   TESTINST_RR("ext.w.h", "$r31", "$r20", 0x425af3dcd83fa9fdUL, 0x6e59403101a538f1UL);
++   TESTINST_RR("ext.w.h", "$r18", "$r27", 0xcb140226bf788367UL, 0x58a5430ee4e1616eUL);
++   TESTINST_RR("ext.w.h", "$r15", "$r10", 0xd3debaf05f7d909fUL, 0x6f7083340247fb12UL);
++   TESTINST_RR("ext.w.h", "$r12", "$r15", 0x5dc6f7191af80bcfUL, 0xb1f1c8f4b11c03d9UL);
++   TESTINST_RR("ext.w.h", "$r7", "$r15", 0x5ffe304a5c9dc9d2UL, 0x102fb4fa33193103UL);
++   TESTINST_RR("ext.w.h", "$r16", "$r16", 0x533616e37505799fUL, 0xf988c7255086f4f5UL);
++   TESTINST_RR("ext.w.h", "$r13", "$r25", 0x805a406557ed3facUL, 0xdc6ce0f2993b219bUL);
++   TESTINST_RR("ext.w.h", "$r19", "$r20", 0xcc49c20125c4755dUL, 0xde7b765222a9703aUL);
++   TESTINST_RR("ext.w.h", "$r18", "$r7", 0xe0dd9155cbe168c6UL, 0xc1063421eae07663UL);
++
++   /* ---------------- ext.w.b rd, rj ---------------- */
++   TESTINST_RR("ext.w.b", "$r16", "$r23", 0x21666e814555aa02UL, 0x926b8d68b5c40592UL);
++   TESTINST_RR("ext.w.b", "$r8", "$r20", 0xf68ae0a0ac497dedUL, 0xbfb5d489716d0c5UL);
++   TESTINST_RR("ext.w.b", "$r24", "$r15", 0xbc84e54c82fd6e51UL, 0x7d814b11e5eb07f6UL);
++   TESTINST_RR("ext.w.b", "$r31", "$r17", 0x14e575a8dda1f0d3UL, 0x6a111e663a52244cUL);
++   TESTINST_RR("ext.w.b", "$r16", "$r8", 0x911acc218fcf640bUL, 0xac1405ad05b23e43UL);
++   TESTINST_RR("ext.w.b", "$r28", "$r8", 0x77fb13eaa8995607UL, 0x5c97a81f12da7d3UL);
++   TESTINST_RR("ext.w.b", "$r9", "$r23", 0xb88cfdb98683e15eUL, 0x74893b34973e16cbUL);
++   TESTINST_RR("ext.w.b", "$r31", "$r4", 0xc7168cb4f7d079e4UL, 0xf4fc215bc2c5273eUL);
++   TESTINST_RR("ext.w.b", "$r4", "$r18", 0xe2e5dca4727b373UL, 0xa1b97136f32e452bUL);
++   TESTINST_RR("ext.w.b", "$r8", "$r29", 0x625eb5236f483daaUL, 0x3ceca34ee347e7c8UL);
++
++   /* ---------------- clo.w rd, rj ---------------- */
++   TESTINST_RR("clo.w", "$r4", "$r13", 0xbcca747f77aca28UL, 0x8df71972c1a17096UL);
++   TESTINST_RR("clo.w", "$r27", "$r5", 0x98a9e6d99d8e84cbUL, 0xdc59d3c8fc1540e4UL);
++   TESTINST_RR("clo.w", "$r9", "$r14", 0xe8e78b162c95ed66UL, 0xdfad6854bbf442e6UL);
++   TESTINST_RR("clo.w", "$r13", "$r26", 0xa3db2cf80f9112cdUL, 0x7676463dd6f13f80UL);
++   TESTINST_RR("clo.w", "$r7", "$r16", 0xb5213ab31b574031UL, 0x478c19ebdeaa74c0UL);
++   TESTINST_RR("clo.w", "$r13", "$r12", 0xd68d9661284fb9d7UL, 0x702bf24fddd8bfe0UL);
++   TESTINST_RR("clo.w", "$r18", "$r20", 0x510cd4002aff4c6cUL, 0x4fc898e8b83669eeUL);
++   TESTINST_RR("clo.w", "$r5", "$r9", 0x53c0de96f709208dUL, 0xe56d87b898438b5UL);
++   TESTINST_RR("clo.w", "$r20", "$r5", 0x96187854fcce4fd1UL, 0xf1248bea6ed8be30UL);
++   TESTINST_RR("clo.w", "$r20", "$r31", 0xb1abb4795d411683UL, 0x1025f914a9225e6UL);
++
++   /* ---------------- clz.w rd, rj ---------------- */
++   TESTINST_RR("clz.w", "$r19", "$r8", 0x374348642747a8dcUL, 0xd8ec1d547d95ada5UL);
++   TESTINST_RR("clz.w", "$r26", "$r4", 0x741ab4d14b9ee1f8UL, 0x99e2ef840817cfffUL);
++   TESTINST_RR("clz.w", "$r17", "$r4", 0x45c9ce7217f501b3UL, 0xa387a194cd03bcf1UL);
++   TESTINST_RR("clz.w", "$r13", "$r26", 0x69707656f354d758UL, 0xd4a8f8ab02b876b0UL);
++   TESTINST_RR("clz.w", "$r25", "$r13", 0x103ce6ee41e094c3UL, 0xd7a85bf4006e655aUL);
++   TESTINST_RR("clz.w", "$r5", "$r13", 0x3910578929e7cd4aUL, 0x93c87b02b7b1b603UL);
++   TESTINST_RR("clz.w", "$r18", "$r29", 0x10639f8979feefe5UL, 0x9d8b4b8f8493f844UL);
++   TESTINST_RR("clz.w", "$r25", "$r16", 0x7b35b3e995b3b44dUL, 0xad953d0ae0b3e870UL);
++   TESTINST_RR("clz.w", "$r6", "$r25", 0xda6cbd19f10ef86fUL, 0x1d6665db1162cfb4UL);
++   TESTINST_RR("clz.w", "$r5", "$r12", 0x8a6f4d6ec8d7c00dUL, 0x19b40cb8dd8d1679UL);
++
++   /* ---------------- cto.w rd, rj ---------------- */
++   TESTINST_RR("cto.w", "$r7", "$r15", 0x7285e9c364562d11UL, 0x963655c7f58de520UL);
++   TESTINST_RR("cto.w", "$r4", "$r15", 0x105dceebc6d7e641UL, 0xfc01c17baaca9c46UL);
++   TESTINST_RR("cto.w", "$r31", "$r28", 0xdeff9742b93f0591UL, 0x2cf98074b0151f33UL);
++   TESTINST_RR("cto.w", "$r13", "$r8", 0xeee665743cd218ffUL, 0xbdd700b2535aa3b7UL);
++   TESTINST_RR("cto.w", "$r23", "$r13", 0x1cc22cfd7c0c869cUL, 0x5b848b64decbee8fUL);
++   TESTINST_RR("cto.w", "$r12", "$r18", 0x5c32b3db803e5988UL, 0x2d5d1ebf93b79dd0UL);
++   TESTINST_RR("cto.w", "$r17", "$r9", 0xc11d806786501f0eUL, 0xd175fe2ca41bda38UL);
++   TESTINST_RR("cto.w", "$r24", "$r16", 0x504f9b43af62e2adUL, 0xfce545d98e2361daUL);
++   TESTINST_RR("cto.w", "$r24", "$r8", 0xc13ac5668538f5a4UL, 0x3096912e575d64dbUL);
++   TESTINST_RR("cto.w", "$r27", "$r17", 0xd27f68629dd8d4fbUL, 0x15ac43632e175a8bUL);
++
++   /* ---------------- ctz.w rd, rj ---------------- */
++   TESTINST_RR("ctz.w", "$r8", "$r12", 0xfc9bd3736a3c08bdUL, 0xaebba33c2e268daaUL);
++   TESTINST_RR("ctz.w", "$r5", "$r27", 0x5dc8af7bac7db01aUL, 0xabce2f0e113597aaUL);
++   TESTINST_RR("ctz.w", "$r18", "$r6", 0xe4ac5b59d8442dfeUL, 0x935d1b694e96bd04UL);
++   TESTINST_RR("ctz.w", "$r9", "$r15", 0x9b760f465efbb52eUL, 0x834c9974dba65d99UL);
++   TESTINST_RR("ctz.w", "$r13", "$r7", 0x95b5748f5f8bfb38UL, 0x75dd7a9890cdf2d9UL);
++   TESTINST_RR("ctz.w", "$r29", "$r17", 0xa25119fd892d1b20UL, 0x38c12e795dc52acfUL);
++   TESTINST_RR("ctz.w", "$r15", "$r12", 0x95c2ce0f0446807cUL, 0x623a5915ac8164b2UL);
++   TESTINST_RR("ctz.w", "$r6", "$r17", 0xd9034892a300dca8UL, 0x5911fea4e6ce1df3UL);
++   TESTINST_RR("ctz.w", "$r10", "$r25", 0xda1e0d0eb34884abUL, 0x8d70d49a10ba8968UL);
++   TESTINST_RR("ctz.w", "$r14", "$r13", 0x207d275c076e5247UL, 0xd243debc9b557922UL);
++
++   /* ---------------- clo.d rd, rj ---------------- */
++   TESTINST_RR("clo.d", "$r7", "$r16", 0x9432ccd773e86812UL, 0x9f921ea959c97c2bUL);
++   TESTINST_RR("clo.d", "$r7", "$r12", 0xaf19ef0b422b09bfUL, 0x8773ec5c72444fe2UL);
++   TESTINST_RR("clo.d", "$r5", "$r10", 0xa2912bc0ca36fa58UL, 0x2c93a7506a8979b7UL);
++   TESTINST_RR("clo.d", "$r7", "$r28", 0x69dd3f71121c7380UL, 0x1784b7c2c7558b4aUL);
++   TESTINST_RR("clo.d", "$r15", "$r9", 0x95b40b42f113ceccUL, 0xf0cdb7b9c17bb9e1UL);
++   TESTINST_RR("clo.d", "$r9", "$r27", 0x1961ee1499945d08UL, 0x23c7a2252c1cbc78UL);
++   TESTINST_RR("clo.d", "$r30", "$r19", 0xda0aa8b04f719a51UL, 0x8f93c7a1b3cc9f12UL);
++   TESTINST_RR("clo.d", "$r26", "$r20", 0xdd4f62bfe1237a28UL, 0xd61c7bfe05165d04UL);
++   TESTINST_RR("clo.d", "$r26", "$r6", 0x44a1378e22d6ec81UL, 0x1b21543ee9abd103UL);
++   TESTINST_RR("clo.d", "$r24", "$r16", 0x51efcf6ef8eb9917UL, 0x602cbdf020ee6da8UL);
++
++   /* ---------------- clz.d rd, rj ---------------- */
++   TESTINST_RR("clz.d", "$r27", "$r7", 0x91df318f7b476077UL, 0x6ca0b9cf9bb84c4aUL);
++   TESTINST_RR("clz.d", "$r19", "$r30", 0x435d7fb412d9c12cUL, 0xc926e58bdb46104eUL);
++   TESTINST_RR("clz.d", "$r12", "$r30", 0x906b06441b2ef62bUL, 0x4b9b91966077ef0UL);
++   TESTINST_RR("clz.d", "$r28", "$r6", 0x28bb3e3324f33e14UL, 0x7628cd8752be6223UL);
++   TESTINST_RR("clz.d", "$r14", "$r15", 0xb7a5ae04bf2e60c0UL, 0x41a328a79afda305UL);
++   TESTINST_RR("clz.d", "$r4", "$r23", 0x5fd8327a265b1a3bUL, 0x66b92d8b5b842d4aUL);
++   TESTINST_RR("clz.d", "$r18", "$r29", 0x73df6808e38c72adUL, 0x6b91b11261dd26b6UL);
++   TESTINST_RR("clz.d", "$r13", "$r8", 0xd8d2dbd71d1783adUL, 0xdc50b7586ccab6a1UL);
++   TESTINST_RR("clz.d", "$r17", "$r10", 0xee6f842bb7686b8dUL, 0xdf52e003cd95f02fUL);
++   TESTINST_RR("clz.d", "$r13", "$r8", 0x91e717aef96cc046UL, 0x5dd0743ed560ba78UL);
++
++   /* ---------------- cto.d rd, rj ---------------- */
++   TESTINST_RR("cto.d", "$r31", "$r5", 0xf361d5d1fb232769UL, 0x1530b67240d804cfUL);
++   TESTINST_RR("cto.d", "$r5", "$r26", 0xbedb393d17f69d40UL, 0xcef56269ef7aecdaUL);
++   TESTINST_RR("cto.d", "$r5", "$r31", 0xadd75db878cdbf84UL, 0x8e08acc65c97f0b2UL);
++   TESTINST_RR("cto.d", "$r31", "$r31", 0x6a8a89827e4929f9UL, 0x7df0f59d97924bb3UL);
++   TESTINST_RR("cto.d", "$r14", "$r30", 0xefb0874ef3600b6dUL, 0x97a4b45ab971a548UL);
++   TESTINST_RR("cto.d", "$r5", "$r17", 0x144271fb49c8d2d8UL, 0x787e6dbb4fec4d21UL);
++   TESTINST_RR("cto.d", "$r28", "$r20", 0xd6d0953d2a12c998UL, 0xafd578caad0dfa09UL);
++   TESTINST_RR("cto.d", "$r16", "$r18", 0xde650be54a7990cUL, 0x3ea8f45e10441829UL);
++   TESTINST_RR("cto.d", "$r15", "$r16", 0xbbd328743f49a86UL, 0x5cafc638b6b509beUL);
++   TESTINST_RR("cto.d", "$r6", "$r20", 0x598ee27859cf8d0eUL, 0x4bce530e537ad762UL);
++
++   /* ---------------- ctz.d rd, rj ---------------- */
++   TESTINST_RR("ctz.d", "$r14", "$r28", 0xf2e4d886a8fd3fe3UL, 0xdafbabdfefac692UL);
++   TESTINST_RR("ctz.d", "$r6", "$r27", 0xe005a6a20d44fbcaUL, 0xe000ac4f4cfb2ce2UL);
++   TESTINST_RR("ctz.d", "$r15", "$r26", 0x871c2ccd50ec0784UL, 0xa82b0d96dd72f11cUL);
++   TESTINST_RR("ctz.d", "$r17", "$r20", 0xebe7d9f4ec5055d5UL, 0x65575957936d1d6eUL);
++   TESTINST_RR("ctz.d", "$r19", "$r8", 0x394effa243e5f14cUL, 0xf6852349a7b00561UL);
++   TESTINST_RR("ctz.d", "$r5", "$r9", 0x3c67392fc408e9dbUL, 0xeff4bf8e886d7cc3UL);
++   TESTINST_RR("ctz.d", "$r31", "$r15", 0xbf5435775bd0435bUL, 0x19760246c8d1d680UL);
++   TESTINST_RR("ctz.d", "$r9", "$r5", 0xccde230362ce06aUL, 0x7590c6e73077c2bcUL);
++   TESTINST_RR("ctz.d", "$r28", "$r25", 0x2518777b06d608a0UL, 0xb87647dad481ba32UL);
++   TESTINST_RR("ctz.d", "$r23", "$r19", 0xbe232a9fe2090e75UL, 0x2dceda5cdc990d2eUL);
++
++   /* ---------------- revb.2h rd, rj ---------------- */
++   TESTINST_RR("revb.2h", "$r29", "$r30", 0x75397084990a0745UL, 0xd4c83f5966c1c17UL);
++   TESTINST_RR("revb.2h", "$r17", "$r23", 0xecfbee2a69bbe344UL, 0x5a42dc5dc5705f68UL);
++   TESTINST_RR("revb.2h", "$r6", "$r14", 0xbfeffdbd68845522UL, 0x3490af5b50fd56bfUL);
++   TESTINST_RR("revb.2h", "$r13", "$r6", 0x58e1821d319a1598UL, 0x4c6711d021a72be6UL);
++   TESTINST_RR("revb.2h", "$r18", "$r8", 0x6e14994d4e16ff86UL, 0x9fda01513ab5ceb8UL);
++   TESTINST_RR("revb.2h", "$r7", "$r30", 0x9979d3a3fcfc9323UL, 0x504c708535bc136fUL);
++   TESTINST_RR("revb.2h", "$r28", "$r19", 0x9daf4aa3a33eec5fUL, 0xaa376fc54f4be6f5UL);
++   TESTINST_RR("revb.2h", "$r30", "$r8", 0x2e0bba43ec83e59eUL, 0xaee8b8acd436f6daUL);
++   TESTINST_RR("revb.2h", "$r14", "$r7", 0x9634787c9be10863UL, 0xe9da521d42716c0aUL);
++   TESTINST_RR("revb.2h", "$r23", "$r14", 0x687b89225667081aUL, 0x9089e36a4f12f9c6UL);
++
++   /* ---------------- revb.4h rd, rj ---------------- */
++   TESTINST_RR("revb.4h", "$r4", "$r25", 0xc42859bd06b669d2UL, 0x782e4ae6ab812191UL);
++   TESTINST_RR("revb.4h", "$r18", "$r19", 0x45ca4499d789fe5bUL, 0x6e558c98b95d346dUL);
++   TESTINST_RR("revb.4h", "$r24", "$r10", 0x2d04871fd753c43fUL, 0xbeab033e2b5a979eUL);
++   TESTINST_RR("revb.4h", "$r24", "$r8", 0xbc4deb39fb2ffe2eUL, 0x5e3e50b8025e77f3UL);
++   TESTINST_RR("revb.4h", "$r7", "$r14", 0xf44a6ea6f42e0918UL, 0x9f617a848e4ad8f2UL);
++   TESTINST_RR("revb.4h", "$r13", "$r12", 0xda815ff8648e92b9UL, 0xa401e74c4dd88e12UL);
++   TESTINST_RR("revb.4h", "$r31", "$r19", 0x7964d861d2ecb8d5UL, 0xe402e87f73fb4c68UL);
++   TESTINST_RR("revb.4h", "$r29", "$r25", 0x6beff3fa6167cdccUL, 0x11e350b71aee0229UL);
++   TESTINST_RR("revb.4h", "$r4", "$r8", 0x357a56e8ae275376UL, 0xdf8ebc175f4be7e3UL);
++   TESTINST_RR("revb.4h", "$r15", "$r27", 0xeb11b29acfe397d6UL, 0x42d231083cd97aa0UL);
++
++   /* ---------------- revb.2w rd, rj ---------------- */
++   TESTINST_RR("revb.2w", "$r27", "$r31", 0x978f867dd7f0cb8UL, 0x19eec2d357cd6a06UL);
++   TESTINST_RR("revb.2w", "$r10", "$r10", 0x7897a40c4fda96d5UL, 0xcb849783a18de892UL);
++   TESTINST_RR("revb.2w", "$r23", "$r14", 0x18338c734be53a1UL, 0x6258664ec1bb96b8UL);
++   TESTINST_RR("revb.2w", "$r12", "$r19", 0x7417ec4fef3451ccUL, 0x216ad32ee149542bUL);
++   TESTINST_RR("revb.2w", "$r31", "$r30", 0x8132835b9905b650UL, 0x6fac007fbefdecf2UL);
++   TESTINST_RR("revb.2w", "$r25", "$r10", 0x7336ebe375c83bedUL, 0x643f76ac3010a6bbUL);
++   TESTINST_RR("revb.2w", "$r31", "$r29", 0x5d99f79f18e805b8UL, 0xe65e70ca4cf299faUL);
++   TESTINST_RR("revb.2w", "$r30", "$r19", 0xec10dd6d7249c5faUL, 0x3f6bb22d66caf299UL);
++   TESTINST_RR("revb.2w", "$r6", "$r30", 0x2c394783817c0870UL, 0xd823cff07efd78dbUL);
++   TESTINST_RR("revb.2w", "$r4", "$r15", 0xc5acf61f075cd4e4UL, 0xc154dd7479b90c6cUL);
++
++   /* ---------------- revb.d rd, rj ---------------- */
++   TESTINST_RR("revb.d", "$r6", "$r23", 0xe6e05a0dafda37ceUL, 0x2ac7d047f197f6fbUL);
++   TESTINST_RR("revb.d", "$r19", "$r4", 0xc07a757bea6011ffUL, 0xcef6cef3e0f941ffUL);
++   TESTINST_RR("revb.d", "$r6", "$r15", 0x711bb31e18fcb2f3UL, 0x522068042cf5be1aUL);
++   TESTINST_RR("revb.d", "$r9", "$r7", 0xf9654c655c67392eUL, 0xa1b065742110e3f4UL);
++   TESTINST_RR("revb.d", "$r29", "$r4", 0x70c0dcad23609060UL, 0x5d04b7b2ece6f6bbUL);
++   TESTINST_RR("revb.d", "$r15", "$r4", 0x809930516f3136ebUL, 0xda33327a8d42ef55UL);
++   TESTINST_RR("revb.d", "$r10", "$r4", 0x1a7ee04b354f6af5UL, 0xcda6c6943e46fed7UL);
++   TESTINST_RR("revb.d", "$r20", "$r4", 0x315f95452d748459UL, 0xa001e934745758e0UL);
++   TESTINST_RR("revb.d", "$r6", "$r8", 0xabbd06000374627aUL, 0x85441006689de89bUL);
++   TESTINST_RR("revb.d", "$r27", "$r24", 0x2d404e69f54afa48UL, 0x46f47b822772f3cdUL);
++
++   /* ---------------- revh.2w rd, rj ---------------- */
++   TESTINST_RR("revh.2w", "$r6", "$r15", 0x5b764c7bfb1999ebUL, 0x86603fc3f96843edUL);
++   TESTINST_RR("revh.2w", "$r19", "$r10", 0xf39f8e6b43dd63ceUL, 0x141d294d06276941UL);
++   TESTINST_RR("revh.2w", "$r5", "$r20", 0x3ff54e5c35d83e69UL, 0xd677d6a21384278aUL);
++   TESTINST_RR("revh.2w", "$r4", "$r31", 0xce463b02a2f840ccUL, 0x6f87c9636f9cfca6UL);
++   TESTINST_RR("revh.2w", "$r19", "$r26", 0x34abc96ddde64e27UL, 0x723ec7ce92720502UL);
++   TESTINST_RR("revh.2w", "$r8", "$r18", 0x1454a1ee8739c235UL, 0xd890efa373a6dfb0UL);
++   TESTINST_RR("revh.2w", "$r12", "$r31", 0xf0c8b856751cae70UL, 0xb675dff2568e6ebfUL);
++   TESTINST_RR("revh.2w", "$r24", "$r9", 0xb36984e3a7a3eaeaUL, 0xa169cfa9f35f6a8aUL);
++   TESTINST_RR("revh.2w", "$r25", "$r27", 0x640b3e6b41180473UL, 0x9bc307f0a2ef368fUL);
++   TESTINST_RR("revh.2w", "$r7", "$r9", 0x897e1406a0eb2dc9UL, 0x1921bcf657fecdccUL);
++
++   /* ---------------- revh.d rd, rj ---------------- */
++   TESTINST_RR("revh.d", "$r14", "$r25", 0xec3573411ea025e5UL, 0x6976d4371b08f1abUL);
++   TESTINST_RR("revh.d", "$r24", "$r31", 0x9ef9e5cb1375d42aUL, 0x9ce130c8a579e11dUL);
++   TESTINST_RR("revh.d", "$r9", "$r28", 0x3c8cd0055a5e7031UL, 0xf05f9381753ded16UL);
++   TESTINST_RR("revh.d", "$r24", "$r26", 0x6a4e5797f19041f6UL, 0xd26a5ae65e21041cUL);
++   TESTINST_RR("revh.d", "$r14", "$r24", 0xe2cb9a83aee22d97UL, 0x6405d71e0bb63321UL);
++   TESTINST_RR("revh.d", "$r19", "$r23", 0x91cdf3bcd9afe76dUL, 0x171953826107396aUL);
++   TESTINST_RR("revh.d", "$r23", "$r14", 0x93ed49255d084e12UL, 0x374bd76990198b43UL);
++   TESTINST_RR("revh.d", "$r31", "$r12", 0x8e54a908f04882bUL, 0xf7e8756491b9d346UL);
++   TESTINST_RR("revh.d", "$r31", "$r20", 0xbb7cd34502fdf01fUL, 0x906b7289a6957d3fUL);
++   TESTINST_RR("revh.d", "$r27", "$r30", 0xacbca1aacdd9dd3fUL, 0x3072d9c69004d4b5UL);
++
++   /* ---------------- bitrev.4b rd, rj ---------------- */
++   TESTINST_RR("bitrev.4b", "$r23", "$r19", 0xb422f2854b491d92UL, 0x7649084cec69098aUL);
++   TESTINST_RR("bitrev.4b", "$r27", "$r16", 0xd14736328d74b448UL, 0x1abee3a271c71db9UL);
++   TESTINST_RR("bitrev.4b", "$r15", "$r23", 0xf17c0f0ccfbb2c38UL, 0x490107ff4155bd17UL);
++   TESTINST_RR("bitrev.4b", "$r5", "$r18", 0x8408d6a30523619dUL, 0x625d5aedf0add9fbUL);
++   TESTINST_RR("bitrev.4b", "$r8", "$r15", 0xc41a2fdb60ba75a6UL, 0xe2562eab3b333a00UL);
++   TESTINST_RR("bitrev.4b", "$r17", "$r18", 0x6a409394f364c02aUL, 0xea970d90edb343ccUL);
++   TESTINST_RR("bitrev.4b", "$r25", "$r29", 0xd8d1c9b8dcff266dUL, 0xacca47ac7597ca65UL);
++   TESTINST_RR("bitrev.4b", "$r26", "$r24", 0xe2a0d11df8c5055bUL, 0xc57559d03e3e216dUL);
++   TESTINST_RR("bitrev.4b", "$r8", "$r27", 0xb6a5815170d657f0UL, 0x9f60901eefa1347aUL);
++   TESTINST_RR("bitrev.4b", "$r20", "$r16", 0x432a2fbf2b073732UL, 0x604b8d7ecb5e86dcUL);
++
++   /* ---------------- bitrev.8b rd, rj ---------------- */
++   TESTINST_RR("bitrev.8b", "$r25", "$r7", 0x22b2e6007f742fd1UL, 0xe8c23886def1bbc9UL);
++   TESTINST_RR("bitrev.8b", "$r28", "$r30", 0xf985d7779c5ca157UL, 0x285cbdc0f47395d1UL);
++   TESTINST_RR("bitrev.8b", "$r29", "$r13", 0xd9b8364a793bc50cUL, 0xded35d7c7ba73d29UL);
++   TESTINST_RR("bitrev.8b", "$r12", "$r28", 0x18d7769bc1147dc5UL, 0xfb6cda8c7f12313aUL);
++   TESTINST_RR("bitrev.8b", "$r23", "$r6", 0xeff84dc134b3acbeUL, 0xee7c4e89e333eda8UL);
++   TESTINST_RR("bitrev.8b", "$r24", "$r20", 0xad65748f0bc46e9fUL, 0xd0d88137a6284eacUL);
++   TESTINST_RR("bitrev.8b", "$r10", "$r5", 0xe0e1c1e262352e89UL, 0x9c43ebc4f7c65dc1UL);
++   TESTINST_RR("bitrev.8b", "$r27", "$r13", 0x444a53aa65d317dcUL, 0x473eea7ea5691da7UL);
++   TESTINST_RR("bitrev.8b", "$r13", "$r9", 0xfc48d0fdf4c7a6e5UL, 0x5dcad407df3401a5UL);
++   TESTINST_RR("bitrev.8b", "$r12", "$r5", 0xebef32fcbd91e9aUL, 0xe1eeea527816355eUL);
++
++   /* ---------------- bitrev.w rd, rj ---------------- */
++   TESTINST_RR("bitrev.w", "$r18", "$r15", 0x2028b0c8691a767UL, 0x5822df2950c9c2d3UL);
++   TESTINST_RR("bitrev.w", "$r30", "$r27", 0x2a2d48209d9f377bUL, 0xde9d59b836df41fcUL);
++   TESTINST_RR("bitrev.w", "$r17", "$r4", 0xe6fb8b07c90464e6UL, 0x65976cb5c6c6a5b0UL);
++   TESTINST_RR("bitrev.w", "$r9", "$r31", 0x1b95159ec5c37644UL, 0x62c549b741c2adadUL);
++   TESTINST_RR("bitrev.w", "$r17", "$r14", 0x8b414dfa7156f0ceUL, 0x9642d0186f420e7cUL);
++   TESTINST_RR("bitrev.w", "$r15", "$r8", 0x2722ecb374b4d5e3UL, 0xeaf151a286bbc4cfUL);
++   TESTINST_RR("bitrev.w", "$r27", "$r19", 0x58ec913c63634a5UL, 0xe723c39df96a4fd2UL);
++   TESTINST_RR("bitrev.w", "$r7", "$r26", 0xa245e7dd80a324a2UL, 0xe7d6c2b2683291eUL);
++   TESTINST_RR("bitrev.w", "$r31", "$r6", 0x114292ed02ba1255UL, 0x13cd62afac5ac3d4UL);
++   TESTINST_RR("bitrev.w", "$r7", "$r25", 0xbd46d88fc8d2933bUL, 0x69ce9ccb487dadd1UL);
++
++   /* ---------------- bitrev.d rd, rj ---------------- */
++   TESTINST_RR("bitrev.d", "$r4", "$r29", 0xeaacaeb60b227eabUL, 0x799f36da44887e2cUL);
++   TESTINST_RR("bitrev.d", "$r29", "$r6", 0xcfbb055ab1ebf7faUL, 0x2924f63fec744b02UL);
++   TESTINST_RR("bitrev.d", "$r28", "$r31", 0xaac74a398d76900dUL, 0xf6c75e45e33b4cb7UL);
++   TESTINST_RR("bitrev.d", "$r24", "$r12", 0xfc8bc33fb4a8d023UL, 0xcccd98e9d53aa26aUL);
++   TESTINST_RR("bitrev.d", "$r8", "$r7", 0x7502cd68289f4c3aUL, 0x746ddfd3c3a512b1UL);
++   TESTINST_RR("bitrev.d", "$r6", "$r16", 0xe8b94bfe615774aeUL, 0x518770bbee53d619UL);
++   TESTINST_RR("bitrev.d", "$r24", "$r4", 0x6318c17dbae816c3UL, 0x9ab684e129b57f07UL);
++   TESTINST_RR("bitrev.d", "$r27", "$r23", 0x8a22909b005a86b8UL, 0x69337e8c3b1fc2bbUL);
++   TESTINST_RR("bitrev.d", "$r20", "$r9", 0x9f43885d40caf0UL, 0x193cbf609dbc33d4UL);
++   TESTINST_RR("bitrev.d", "$r30", "$r19", 0x30fa02e0fc390ac9UL, 0x21686c931c6260daUL);
++
++   /* ---------------- bytepick.w rd, rj, rk, sa2 ---------------- */
++   TESTINST_RRRI("bytepick.w", "$r26", "$r15", "$r19", SA2, 0x1b0b980dd3271273UL, 0x8737ca6c8106ceeeUL, 0x2807e0dcb47d6efUL, 1);
++   TESTINST_RRRI("bytepick.w", "$r15", "$r17", "$r7", SA2, 0x3d2e3fbcbd032001UL, 0x5eced8cf3da8b205UL, 0xb8155b41321e09c0UL, 0);
++   TESTINST_RRRI("bytepick.w", "$r12", "$r15", "$r17", SA2, 0x2670c80f12a87520UL, 0x29ab42125e3ea5c8UL, 0x32a39ac435460f2fUL, 3);
++   TESTINST_RRRI("bytepick.w", "$r4", "$r20", "$r18", SA2, 0x5a64271926277c04UL, 0xcbde225cc736e5d5UL, 0x18abacc874db47e9UL, 3);
++   TESTINST_RRRI("bytepick.w", "$r8", "$r5", "$r24", SA2, 0xdb41606ce3f9df94UL, 0xc3f6ce370d754a3fUL, 0x34ad5a423a5c42e3UL, 3);
++   TESTINST_RRRI("bytepick.w", "$r5", "$r30", "$r14", SA2, 0xedb3aad221050d0bUL, 0x46f5823389f2581aUL, 0xf766f1e75349809eUL, 2);
++   TESTINST_RRRI("bytepick.w", "$r4", "$r19", "$r18", SA2, 0xf92ed0231f25c991UL, 0xba59df0352ed6b3eUL, 0x58d6fbce4e4325e8UL, 0);
++   TESTINST_RRRI("bytepick.w", "$r18", "$r28", "$r24", SA2, 0x177dcaf8fcd30180UL, 0xbdc04b3b8f707462UL, 0x6102168606deb3edUL, 3);
++   TESTINST_RRRI("bytepick.w", "$r13", "$r27", "$r29", SA2, 0x383d82c5d717259bUL, 0x495e30e5e680d7fcUL, 0x1c17f315ebb3bec3UL, 2);
++   TESTINST_RRRI("bytepick.w", "$r5", "$r29", "$r4", SA2, 0x26a0fb212ab80a3aUL, 0x78b167aecd81f869UL, 0x6daab499f228fef4UL, 1);
++
++   /* ---------------- bytepick.d rd, rj, rk, sa3 ---------------- */
++   TESTINST_RRRI("bytepick.d", "$r28", "$r4", "$r28", SA3, 0x794fa22d52f7e834UL, 0x2f084db071d3bcceUL, 0xa0cf51d7020f10c1UL, 7);
++   TESTINST_RRRI("bytepick.d", "$r10", "$r18", "$r4", SA3, 0x9fd7a6b378604833UL, 0x37da15f8a7154cabUL, 0xaedd64328d27a0a8UL, 2);
++   TESTINST_RRRI("bytepick.d", "$r7", "$r6", "$r24", SA3, 0xdee49920d429d3c2UL, 0x15e3f61f2f82a2d1UL, 0xdeba03c7761e4678UL, 3);
++   TESTINST_RRRI("bytepick.d", "$r19", "$r16", "$r5", SA3, 0x53bda4d18e61fc44UL, 0xc79bd94439006673UL, 0xa8024ab452a2bd52UL, 4);
++   TESTINST_RRRI("bytepick.d", "$r26", "$r19", "$r25", SA3, 0xc8aae5136d925592UL, 0xea109dd2837d3acfUL, 0x30e93a75e695666aUL, 7);
++   TESTINST_RRRI("bytepick.d", "$r8", "$r14", "$r8", SA3, 0xa03db273c845b37fUL, 0xa7fd0053a136769fUL, 0x6ab932903229b035UL, 2);
++   TESTINST_RRRI("bytepick.d", "$r9", "$r14", "$r23", SA3, 0x2f160a0d147b300fUL, 0xdae9d5d15bb8f5b5UL, 0xc4fdfbb29d49dfe4UL, 2);
++   TESTINST_RRRI("bytepick.d", "$r20", "$r18", "$r15", SA3, 0x30cefdebc30b841aUL, 0xbfd016fb0312277cUL, 0x44269b95d496912fUL, 5);
++   TESTINST_RRRI("bytepick.d", "$r12", "$r17", "$r5", SA3, 0xde32bc5d3471eed2UL, 0xdb807610c6e762e4UL, 0xb2148e34e649d1b8UL, 2);
++   TESTINST_RRRI("bytepick.d", "$r5", "$r24", "$r28", SA3, 0x9ab1be6a0faa61a8UL, 0x97d4a12579967739UL, 0xaa592ef1fd606badUL, 3);
++
++   /* ---------------- maskeqz rd, rj, rk ---------------- */
++   TESTINST_RRR("maskeqz", "$r14", "$r28", "$r25", 0xc263b6b8f3404c8dUL, 0x90ef733c88c88866UL, 0xd256888d94e8d21aUL);
++   TESTINST_RRR("maskeqz", "$r13", "$r9", "$r15", 0x5bdd86b962c61db4UL, 0x8a78f7b88a728d92UL, 0x69e707acb2c26a83UL);
++   TESTINST_RRR("maskeqz", "$r7", "$r7", "$r13", 0xea86abdbdea660cbUL, 0xfb778deef0a5b893UL, 0xad10e23c971d1a9fUL);
++   TESTINST_RRR("maskeqz", "$r8", "$r7", "$r19", 0xf64df33b6146939fUL, 0xe7376d3da44f4dfdUL, 0x7987e122af2505abUL);
++   TESTINST_RRR("maskeqz", "$r10", "$r27", "$r29", 0x404a261c069b488bUL, 0x81886c523ec2658cUL, 0x3236dc83d0a27cc1UL);
++   TESTINST_RRR("maskeqz", "$r23", "$r16", "$r25", 0x8671050519b7bda0UL, 0x26fa2567b106d73aUL, 0xd884011e0d767feUL);
++   TESTINST_RRR("maskeqz", "$r5", "$r19", "$r18", 0xbd8d4cef53122132UL, 0x4976c047c57ec148UL, 0x602312f372049a5eUL);
++   TESTINST_RRR("maskeqz", "$r29", "$r24", "$r23", 0x7f390b695d8b12eUL, 0x70043e7666a24a34UL, 0xfee8f8f90ab3ac9bUL);
++   TESTINST_RRR("maskeqz", "$r25", "$r4", "$r18", 0x7eaffcb6dac1b5bUL, 0x4b12f8c6738216a2UL, 0x409acb80b7391511UL);
++   TESTINST_RRR("maskeqz", "$r30", "$r6", "$r24", 0x14d829636b628dc9UL, 0xdb88a366a2271c2cUL, 0xea0d5998835940aUL);
++
++   /* ---------------- masknez rd, rj, rk ---------------- */
++   TESTINST_RRR("masknez", "$r14", "$r24", "$r5", 0x46b15bbb9507bd79UL, 0xc92af628c880a454UL, 0x846a586db0af0965UL);
++   TESTINST_RRR("masknez", "$r30", "$r8", "$r8", 0x43cd20b5234db4e8UL, 0x7aeee6ab6b10561fUL, 0x45ab4fdb4ca8b325UL);
++   TESTINST_RRR("masknez", "$r24", "$r19", "$r15", 0xd3d50bbb34b528e2UL, 0xdd71746b0beedae3UL, 0xa34d82fc50174094UL);
++   TESTINST_RRR("masknez", "$r29", "$r26", "$r26", 0x576cb2da15b1462dUL, 0x6c669f0195b50b7aUL, 0xec1609ef36aa938fUL);
++   TESTINST_RRR("masknez", "$r4", "$r29", "$r10", 0xaa220f67a02617dbUL, 0xffcd18e3016e10fUL, 0x4cf9bdd8dca7f88fUL);
++   TESTINST_RRR("masknez", "$r23", "$r9", "$r29", 0x774e1c840428fbdeUL, 0x391268694388d2a7UL, 0xf06192a4e5780c53UL);
++   TESTINST_RRR("masknez", "$r7", "$r25", "$r28", 0x7b75099f16135faaUL, 0xf95af681c18bf31cUL, 0x2f6122581dfdef74UL);
++   TESTINST_RRR("masknez", "$r26", "$r10", "$r16", 0xe6006c9bd6bae204UL, 0x7e84e5db1181249dUL, 0x6ab2371059cdc875UL);
++   TESTINST_RRR("masknez", "$r26", "$r15", "$r28", 0xb4c9c784ef74245fUL, 0x20cc1c4c169ca02cUL, 0x606eeb8ce6278d16UL);
++   TESTINST_RRR("masknez", "$r19", "$r16", "$r16", 0x75a721553f7c7054UL, 0x7b63b7b7b3f5bd5fUL, 0xf8c7933e92e155eeUL);
++
++   /* ---------------- bstrins.w rd, rj, msbw, lsbw ---------------- */
++   TESTINST_RRII("bstrins.w", "$r27", "$r16", MSBW, LSBW, 0x431055863e78b187UL, 0xe18dda9620a50e9dUL, 31, 8);
++   TESTINST_RRII("bstrins.w", "$r26", "$r27", MSBW, LSBW, 0x19f800eab7e1ab51UL, 0x61e7d86005d21d29UL, 30, 27);
++   TESTINST_RRII("bstrins.w", "$r15", "$r4", MSBW, LSBW, 0xb141d462e777528dUL, 0xb7aebff9bcca1643UL, 17, 14);
++   TESTINST_RRII("bstrins.w", "$r30", "$r17", MSBW, LSBW, 0xfac48083375844feUL, 0x6d3283ba14cc27ebUL, 24, 6);
++   TESTINST_RRII("bstrins.w", "$r12", "$r12", MSBW, LSBW, 0x9b7629774f19f64aUL, 0x84ee8d65b2842686UL, 30, 25);
++   TESTINST_RRII("bstrins.w", "$r15", "$r10", MSBW, LSBW, 0x290172844863090fUL, 0x85ea298976069fcdUL, 26, 1);
++   TESTINST_RRII("bstrins.w", "$r10", "$r13", MSBW, LSBW, 0x66942ba1c15e85aaUL, 0xddb2dfa7474a4370UL, 23, 8);
++   TESTINST_RRII("bstrins.w", "$r5", "$r20", MSBW, LSBW, 0x3dcfecca80bf0d79UL, 0x5044b246f2d3f890UL, 18, 16);
++   TESTINST_RRII("bstrins.w", "$r23", "$r5", MSBW, LSBW, 0xa11723142f1472a7UL, 0xcbaaa9a23d119663UL, 25, 21);
++   TESTINST_RRII("bstrins.w", "$r20", "$r31", MSBW, LSBW, 0x6a1110240ba884b8UL, 0x45cadf0ffe08cc25UL, 13, 12);
++
++   /* ---------------- bstrpick.w rd, rj, msbw, lsbw ---------------- */
++   TESTINST_RRII("bstrpick.w", "$r5", "$r23", MSBW, LSBW, 0x6885eaa89f691954UL, 0x94f8458597294f2eUL, 23, 11);
++   TESTINST_RRII("bstrpick.w", "$r25", "$r8", MSBW, LSBW, 0x11be9b9923ebee96UL, 0x23deda120a49df15UL, 18, 11);
++   TESTINST_RRII("bstrpick.w", "$r6", "$r6", MSBW, LSBW, 0x3546d655181289bcUL, 0x7ee84a41c952b690UL, 10, 3);
++   TESTINST_RRII("bstrpick.w", "$r25", "$r5", MSBW, LSBW, 0xb2eec884ea77f548UL, 0x23992bc40919416fUL, 15, 9);
++   TESTINST_RRII("bstrpick.w", "$r26", "$r14", MSBW, LSBW, 0x8e591161730ac582UL, 0xf45f4435cc1cb138UL, 21, 8);
++   TESTINST_RRII("bstrpick.w", "$r9", "$r14", MSBW, LSBW, 0x1ac92d930e8361f9UL, 0xcc11dd56e96c6256UL, 7, 3);
++   TESTINST_RRII("bstrpick.w", "$r19", "$r9", MSBW, LSBW, 0xd15fd80fafe60a58UL, 0xb1426a8c680d628cUL, 8, 8);
++   TESTINST_RRII("bstrpick.w", "$r17", "$r13", MSBW, LSBW, 0xfa48c3cd091d2b5eUL, 0x3a2827a58a014a72UL, 30, 12);
++   TESTINST_RRII("bstrpick.w", "$r6", "$r31", MSBW, LSBW, 0xca10a858ebfa78a1UL, 0x202a38722f270884UL, 16, 7);
++   TESTINST_RRII("bstrpick.w", "$r20", "$r10", MSBW, LSBW, 0xc010deb269ae6ba2UL, 0x98f1d297734f9f4cUL, 31, 15);
++
++   /* ---------------- bstrins.d rd, rj, msbd, lsbd ---------------- */
++   TESTINST_RRII("bstrins.d", "$r29", "$r17", MSBD, LSBD, 0x7cf4a9ec79307e59UL, 0xb1b5afc00eef90a3UL, 60, 25);
++   TESTINST_RRII("bstrins.d", "$r10", "$r27", MSBD, LSBD, 0xc708602dee32579fUL, 0x199d90a711e94375UL, 31, 22);
++   TESTINST_RRII("bstrins.d", "$r4", "$r24", MSBD, LSBD, 0x4e5ce98e217a4b59UL, 0xaf25b5661daefdeaUL, 58, 58);
++   TESTINST_RRII("bstrins.d", "$r12", "$r30", MSBD, LSBD, 0x9505d862c56b1708UL, 0x7f3f0c983ce27863UL, 16, 6);
++   TESTINST_RRII("bstrins.d", "$r29", "$r5", MSBD, LSBD, 0x248f295ef3afe5aaUL, 0x9469277db61227b7UL, 43, 0);
++   TESTINST_RRII("bstrins.d", "$r31", "$r31", MSBD, LSBD, 0xbc5f0c47c3a63a94UL, 0x4aacc1c77ad0c09aUL, 49, 23);
++   TESTINST_RRII("bstrins.d", "$r6", "$r24", MSBD, LSBD, 0x79110235b8c34188UL, 0x75e3e311aef2bef9UL, 12, 2);
++   TESTINST_RRII("bstrins.d", "$r6", "$r16", MSBD, LSBD, 0xaa6e63ffd80b76c5UL, 0xb1ea7dcb3af0881dUL, 43, 13);
++   TESTINST_RRII("bstrins.d", "$r15", "$r25", MSBD, LSBD, 0x5b68a802f26a1804UL, 0xb4f651115b84591bUL, 53, 29);
++   TESTINST_RRII("bstrins.d", "$r9", "$r9", MSBD, LSBD, 0x3394218c965d5f1aUL, 0xf3d30b5d4d4089b4UL, 61, 40);
++
++   /* ---------------- bstrpick.d rd, rj, msbd, lsbd ---------------- */
++   TESTINST_RRII("bstrpick.d", "$r27", "$r27", MSBD, LSBD, 0x503c8fae2d6d7b58UL, 0x9fd9869ca812de0cUL, 63, 33);
++   TESTINST_RRII("bstrpick.d", "$r14", "$r5", MSBD, LSBD, 0x65f05eaa5e13856aUL, 0xd52c72fbeccc39f5UL, 52, 40);
++   TESTINST_RRII("bstrpick.d", "$r13", "$r20", MSBD, LSBD, 0x9cea777df4d2eae0UL, 0x6326727a36499800UL, 48, 14);
++   TESTINST_RRII("bstrpick.d", "$r10", "$r17", MSBD, LSBD, 0xf30a073a4a56604bUL, 0xc12d112f6a0c8f1UL, 43, 20);
++   TESTINST_RRII("bstrpick.d", "$r13", "$r25", MSBD, LSBD, 0xe559d975e0d9ac85UL, 0xcf41f30cc4a46713UL, 55, 37);
++   TESTINST_RRII("bstrpick.d", "$r29", "$r4", MSBD, LSBD, 0x41843db6c2a206cbUL, 0x343f795d45fcff8cUL, 34, 20);
++   TESTINST_RRII("bstrpick.d", "$r27", "$r28", MSBD, LSBD, 0xb359821297377feeUL, 0x4fc51c5773e64f69UL, 27, 10);
++   TESTINST_RRII("bstrpick.d", "$r24", "$r24", MSBD, LSBD, 0xed3cb5d1e8f0e55eUL, 0x9cdbb70a8b8d3945UL, 63, 20);
++   TESTINST_RRII("bstrpick.d", "$r7", "$r30", MSBD, LSBD, 0x11b7344343be1ccfUL, 0xa3422c671803480fUL, 34, 30);
++   TESTINST_RRII("bstrpick.d", "$r15", "$r4", MSBD, LSBD, 0x3670c6b869f28085UL, 0x2caa9d9c1351e402UL, 55, 4);
++
++   /* ---------------- crc.w.b.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crc.w.h.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crc.w.w.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crc.w.d.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crcc.w.b.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crcc.w.h.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crcc.w.w.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++
++   /* ---------------- crcc.w.d.w rd, rj, rk ---------------- */
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL);
++   TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/integer.stderr.exp b/none/tests/loongarch64/integer.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/integer.stdout.exp b/none/tests/loongarch64/integer.stdout.exp
+new file mode 100644
+index 000000000..a4592b797
+--- /dev/null
++++ b/none/tests/loongarch64/integer.stdout.exp
+@@ -0,0 +1,2748 @@
++add.w $r19, $r20, $r25 ::
++before: $r19=0xf7f01ffbc9696094, $r20=0xb664b1ce21c8c7fc, $r25=0x0d0a02b79ace85cf
++after:  $r19=0xffffffffbc974dcb, $r20=0xb664b1ce21c8c7fc, $r25=0x0d0a02b79ace85cf
++add.w $r29, $r9, $r12 ::
++before: $r29=0x5418cd6f6b640953, $r9=0x6465907ca2dac58c, $r12=0xefea76d0d526df3a
++after:  $r29=0x000000007801a4c6, $r9=0x6465907ca2dac58c, $r12=0xefea76d0d526df3a
++add.w $r23, $r15, $r28 ::
++before: $r23=0x6ae34fbc6f2f7a9a, $r15=0xbf21c48ab5c2edcc, $r28=0x24824ebd458ed20e
++after:  $r23=0xfffffffffb51bfda, $r15=0xbf21c48ab5c2edcc, $r28=0x24824ebd458ed20e
++add.w $r27, $r14, $r26 ::
++before: $r27=0x9f33e38db05616cc, $r14=0xf12ee0c276c52c78, $r26=0xc3054d65ecec3fe6
++after:  $r27=0x0000000063b16c5e, $r14=0xf12ee0c276c52c78, $r26=0xc3054d65ecec3fe6
++add.w $r14, $r23, $r27 ::
++before: $r14=0x17eaa07c4607901f, $r23=0xa5fa9d0c8472848e, $r27=0xa34301227bb57f76
++after:  $r14=0x0000000000280404, $r23=0xa5fa9d0c8472848e, $r27=0xa34301227bb57f76
++add.w $r19, $r19, $r4 ::
++before: $r19=0xd2e0644d9532b5ea, $r19=0x2957c6f0638238bc, $r4=0xf01566d0031ee917
++after:  $r19=0x0000000066a121d3, $r19=0x0000000066a121d3, $r4=0xf01566d0031ee917
++add.w $r19, $r26, $r13 ::
++before: $r19=0x7b39b3f2ccbdaf79, $r26=0xee877221beef9d45, $r13=0x4a743034eefe075d
++after:  $r19=0xffffffffadeda4a2, $r26=0xee877221beef9d45, $r13=0x4a743034eefe075d
++add.w $r29, $r18, $r14 ::
++before: $r29=0x95214c4de7e6d3ba, $r18=0x26502eb481799cd1, $r14=0x34d57b775083fb91
++after:  $r29=0xffffffffd1fd9862, $r18=0x26502eb481799cd1, $r14=0x34d57b775083fb91
++add.w $r16, $r26, $r8 ::
++before: $r16=0xb66b18865bbb3036, $r26=0x8881ccbe1e31aa8d, $r8=0xffe0d2dde8325edc
++after:  $r16=0x0000000006640969, $r26=0x8881ccbe1e31aa8d, $r8=0xffe0d2dde8325edc
++add.w $r26, $r5, $r8 ::
++before: $r26=0xc367af71c905540c, $r5=0xcdcbe4860d983fe3, $r8=0x6687aa19ee1fc503
++after:  $r26=0xfffffffffbb804e6, $r5=0xcdcbe4860d983fe3, $r8=0x6687aa19ee1fc503
++add.d $r16, $r18, $r8 ::
++before: $r16=0xbe5505b409ce995c, $r18=0x561a85fd57e87226, $r8=0x923f3293987edab0
++after:  $r16=0xe859b890f0674cd6, $r18=0x561a85fd57e87226, $r8=0x923f3293987edab0
++add.d $r12, $r7, $r29 ::
++before: $r12=0xff2682151edc3476, $r7=0x90beb037eacfe3db, $r29=0xa4017082880f1151
++after:  $r12=0x34c020ba72def52c, $r7=0x90beb037eacfe3db, $r29=0xa4017082880f1151
++add.d $r31, $r31, $r5 ::
++before: $r31=0x81e38385e39d9f16, $r31=0xedb2ffa50c0c8b5f, $r5=0x8776f30d75fc97c2
++after:  $r31=0x7529f2b282092321, $r31=0x7529f2b282092321, $r5=0x8776f30d75fc97c2
++add.d $r31, $r6, $r26 ::
++before: $r31=0x64ff385d97b60dc2, $r6=0x80f903f206f08f60, $r26=0x4f5b589532e85398
++after:  $r31=0xd0545c8739d8e2f8, $r6=0x80f903f206f08f60, $r26=0x4f5b589532e85398
++add.d $r25, $r10, $r20 ::
++before: $r25=0xdd8973d6b99634ca, $r10=0x34c0fe5a72dd43d9, $r20=0x2494af03cf5878e7
++after:  $r25=0x5955ad5e4235bcc0, $r10=0x34c0fe5a72dd43d9, $r20=0x2494af03cf5878e7
++add.d $r5, $r10, $r4 ::
++before: $r5=0x94b272b05ffe39c8, $r10=0x152d15efbbc54c04, $r4=0x25afc06cf151ab29
++after:  $r5=0x3adcd65cad16f72d, $r10=0x152d15efbbc54c04, $r4=0x25afc06cf151ab29
++add.d $r19, $r30, $r18 ::
++before: $r19=0xa6e14d42459cadf6, $r30=0x558620ff616141b1, $r18=0x1978905697120747
++after:  $r19=0x6efeb155f87348f8, $r30=0x558620ff616141b1, $r18=0x1978905697120747
++add.d $r7, $r8, $r20 ::
++before: $r7=0x02ea6f88031a29ae, $r8=0x6a08c12301e00d49, $r20=0xdd533acf17f59142
++after:  $r7=0x475bfbf219d59e8b, $r8=0x6a08c12301e00d49, $r20=0xdd533acf17f59142
++add.d $r24, $r14, $r26 ::
++before: $r24=0xb88df6b8315eb7a6, $r14=0x0137d04f7f6fe285, $r26=0x2ccb253ff7ea93d6
++after:  $r24=0x2e02f58f775a765b, $r14=0x0137d04f7f6fe285, $r26=0x2ccb253ff7ea93d6
++add.d $r7, $r19, $r23 ::
++before: $r7=0xad464722c0967f28, $r19=0x30295c1fd85ae029, $r23=0x2c69edb227e01d94
++after:  $r7=0x5c9349d2003afdbd, $r19=0x30295c1fd85ae029, $r23=0x2c69edb227e01d94
++sub.w $r16, $r28, $r17 ::
++before: $r16=0x8b0ba4ef20207fdd, $r28=0x90493cb39ff734a2, $r17=0x519842bab5cc1208
++after:  $r16=0xffffffffea2b229a, $r28=0x90493cb39ff734a2, $r17=0x519842bab5cc1208
++sub.w $r6, $r13, $r15 ::
++before: $r6=0x13af983aafc53691, $r13=0x27bc6a037865e47f, $r15=0xe20df003930575d5
++after:  $r6=0xffffffffe5606eaa, $r13=0x27bc6a037865e47f, $r15=0xe20df003930575d5
++sub.w $r8, $r19, $r23 ::
++before: $r8=0x4177aec74585d42d, $r19=0xba89b6aa9b7728ac, $r23=0x0e6a089b8eaf43fe
++after:  $r8=0x000000000cc7e4ae, $r19=0xba89b6aa9b7728ac, $r23=0x0e6a089b8eaf43fe
++sub.w $r7, $r10, $r23 ::
++before: $r7=0x0ca1b83a7ab88912, $r10=0xd5e2759ea82c2c80, $r23=0x76e9d6f88c2624ff
++after:  $r7=0x000000001c060781, $r10=0xd5e2759ea82c2c80, $r23=0x76e9d6f88c2624ff
++sub.w $r19, $r24, $r24 ::
++before: $r19=0x99d63505ea0474b3, $r24=0x1b53c4c34957af8e, $r24=0x6146da47b731d3ed
++after:  $r19=000000000000000000, $r24=0x6146da47b731d3ed, $r24=0x6146da47b731d3ed
++sub.w $r26, $r31, $r7 ::
++before: $r26=0x8eca560d8234ff55, $r31=0x5beb18985c3f451e, $r7=0x9c9634dfaa7b9313
++after:  $r26=0xffffffffb1c3b20b, $r31=0x5beb18985c3f451e, $r7=0x9c9634dfaa7b9313
++sub.w $r29, $r16, $r6 ::
++before: $r29=0x229544d2cb1d5a64, $r16=0xd23751d515597128, $r6=0xa09dd29330aa8d15
++after:  $r29=0xffffffffe4aee413, $r16=0xd23751d515597128, $r6=0xa09dd29330aa8d15
++sub.w $r12, $r16, $r4 ::
++before: $r12=0x229f5aefe9fb7fb7, $r16=0x0740ed49b5e95fae, $r4=0xbc6304a0df442807
++after:  $r12=0xffffffffd6a537a7, $r16=0x0740ed49b5e95fae, $r4=0xbc6304a0df442807
++sub.w $r30, $r29, $r26 ::
++before: $r30=0x94f3a67d188df281, $r29=0x048e066cdad20ac2, $r26=0x1e032e60568554a7
++after:  $r30=0xffffffff844cb61b, $r29=0x048e066cdad20ac2, $r26=0x1e032e60568554a7
++sub.w $r18, $r23, $r25 ::
++before: $r18=0xedb4f44fb338ba4f, $r23=0xf06e698cd08c8e7b, $r25=0xa22b91e88b77d4d8
++after:  $r18=0x000000004514b9a3, $r23=0xf06e698cd08c8e7b, $r25=0xa22b91e88b77d4d8
++sub.d $r18, $r10, $r27 ::
++before: $r18=0x68647aa06a23c8f9, $r10=0xd001cb46cb78fc4f, $r27=0x460cc8702b1761f9
++after:  $r18=0x89f502d6a0619a56, $r10=0xd001cb46cb78fc4f, $r27=0x460cc8702b1761f9
++sub.d $r7, $r24, $r18 ::
++before: $r7=0x8d18e952fb747f43, $r24=0x1e7d1a019fb96490, $r18=0xb466fb9891e8c151
++after:  $r7=0x6a161e690dd0a33f, $r24=0x1e7d1a019fb96490, $r18=0xb466fb9891e8c151
++sub.d $r4, $r16, $r27 ::
++before: $r4=0x5f6647277ca4c99d, $r16=0xa1156b863ec98e1d, $r27=0xc15612f3ce819d64
++after:  $r4=0xdfbf58927047f0b9, $r16=0xa1156b863ec98e1d, $r27=0xc15612f3ce819d64
++sub.d $r4, $r25, $r9 ::
++before: $r4=0xe67b33778df480b4, $r25=0xc24b2711be7e4ef1, $r9=0xd940ca25b956100f
++after:  $r4=0xe90a5cec05283ee2, $r25=0xc24b2711be7e4ef1, $r9=0xd940ca25b956100f
++sub.d $r5, $r12, $r18 ::
++before: $r5=0x258ae461ef798ce7, $r12=0x3f4984ea3f5692de, $r18=0x99fa673f30e69019
++after:  $r5=0xa54f1dab0e7002c5, $r12=0x3f4984ea3f5692de, $r18=0x99fa673f30e69019
++sub.d $r13, $r10, $r9 ::
++before: $r13=0xdafb48debea5211e, $r10=0xeac1d3b25f6bf8db, $r9=0x297d671b1c96e48f
++after:  $r13=0xc1446c9742d5144c, $r10=0xeac1d3b25f6bf8db, $r9=0x297d671b1c96e48f
++sub.d $r7, $r15, $r23 ::
++before: $r7=0xc6b03274ff37baf6, $r15=0x05b37ffc2c84aec9, $r23=0x74d62a52cbaaec15
++after:  $r7=0x90dd55a960d9c2b4, $r15=0x05b37ffc2c84aec9, $r23=0x74d62a52cbaaec15
++sub.d $r26, $r18, $r26 ::
++before: $r26=0x35c71e0956ffcd43, $r18=0xad703a4e8078070b, $r26=0x634924e8a9fdbb9e
++after:  $r26=0x4a271565d67a4b6d, $r18=0xad703a4e8078070b, $r26=0x4a271565d67a4b6d
++sub.d $r16, $r29, $r5 ::
++before: $r16=0x18bf961cba922928, $r29=0x54ed9198405f8983, $r5=0x977f5b65e5f86b4a
++after:  $r16=0xbd6e36325a671e39, $r29=0x54ed9198405f8983, $r5=0x977f5b65e5f86b4a
++sub.d $r31, $r28, $r14 ::
++before: $r31=0xa38a1e8cb3c7ba00, $r28=0xd220d1ef3cf8f3f7, $r14=0xc972df2ace170d61
++after:  $r31=0x08adf2c46ee1e696, $r28=0xd220d1ef3cf8f3f7, $r14=0xc972df2ace170d61
++slt $r12, $r17, $r18 ::
++before: $r12=0xd7a0e65c279e1082, $r17=0x819edf00a849ba44, $r18=0x41a0b2fe37d44db2
++after:  $r12=0x0000000000000001, $r17=0x819edf00a849ba44, $r18=0x41a0b2fe37d44db2
++slt $r31, $r13, $r18 ::
++before: $r31=0x2ef00a5cfd100f71, $r13=0x4792cd9f9abf36d3, $r18=0x2c117902110ef9a8
++after:  $r31=000000000000000000, $r13=0x4792cd9f9abf36d3, $r18=0x2c117902110ef9a8
++slt $r4, $r30, $r29 ::
++before: $r4=0x6d8be2fb73e2c006, $r30=0xf76ce97d7658995e, $r29=0x3856e09bfe39df6e
++after:  $r4=0x0000000000000001, $r30=0xf76ce97d7658995e, $r29=0x3856e09bfe39df6e
++slt $r4, $r18, $r10 ::
++before: $r4=0xeddcb9dcf092c3f5, $r18=0x0e57b7c25d13dea8, $r10=0x761d86b48cb5ce21
++after:  $r4=0x0000000000000001, $r18=0x0e57b7c25d13dea8, $r10=0x761d86b48cb5ce21
++slt $r16, $r18, $r16 ::
++before: $r16=0xcddd92e2340cd593, $r18=0xc9a30f4707743f80, $r16=0x3ff7d36f17396d3a
++after:  $r16=0x0000000000000001, $r18=0xc9a30f4707743f80, $r16=0x0000000000000001
++slt $r6, $r14, $r10 ::
++before: $r6=0xa9e71c6376093499, $r14=0x26bb3955b588461f, $r10=0xfae7e7a950447826
++after:  $r6=000000000000000000, $r14=0x26bb3955b588461f, $r10=0xfae7e7a950447826
++slt $r19, $r4, $r17 ::
++before: $r19=0x35bb27f64ebd7d62, $r4=0x4a7d3941ebf88bc1, $r17=0x0cda32e4b1c1d5c4
++after:  $r19=000000000000000000, $r4=0x4a7d3941ebf88bc1, $r17=0x0cda32e4b1c1d5c4
++slt $r19, $r28, $r15 ::
++before: $r19=0x29419b8261e40b99, $r28=0xe7e9b059033afa7d, $r15=0x1ea916293b1cc3dd
++after:  $r19=0x0000000000000001, $r28=0xe7e9b059033afa7d, $r15=0x1ea916293b1cc3dd
++slt $r31, $r16, $r16 ::
++before: $r31=0xe0fb75047bc62c9a, $r16=0xa634f6174dcced7d, $r16=0xcca5a9d25b670e70
++after:  $r31=000000000000000000, $r16=0xcca5a9d25b670e70, $r16=0xcca5a9d25b670e70
++slt $r4, $r4, $r10 ::
++before: $r4=0x724ee03fb3fcdec8, $r4=0xae2587f097065e2c, $r10=0x65c69548f83dd0df
++after:  $r4=0x0000000000000001, $r4=0x0000000000000001, $r10=0x65c69548f83dd0df
++sltu $r14, $r10, $r13 ::
++before: $r14=0x1956e5498db3fb6e, $r10=0x2d909abfec4490bd, $r13=0xa7d554ebe591d5cc
++after:  $r14=0x0000000000000001, $r10=0x2d909abfec4490bd, $r13=0xa7d554ebe591d5cc
++sltu $r6, $r5, $r18 ::
++before: $r6=0xc34214447a064eb8, $r5=0xad4413e45f0a226a, $r18=0x4b09aab500b04bff
++after:  $r6=000000000000000000, $r5=0xad4413e45f0a226a, $r18=0x4b09aab500b04bff
++sltu $r31, $r17, $r17 ::
++before: $r31=0x86e16a1618a639c4, $r17=0x87917b281cef8df0, $r17=0x0d543115a56dee48
++after:  $r31=000000000000000000, $r17=0x0d543115a56dee48, $r17=0x0d543115a56dee48
++sltu $r20, $r6, $r25 ::
++before: $r20=0x164fff47b8b23752, $r6=0x9ad830d46b1660f6, $r25=0xc5d72c146f4aba72
++after:  $r20=0x0000000000000001, $r6=0x9ad830d46b1660f6, $r25=0xc5d72c146f4aba72
++sltu $r6, $r26, $r7 ::
++before: $r6=0x1428360430b7c9b5, $r26=0xc2052dc6eea5a53c, $r7=0xda1a8e35dd060adf
++after:  $r6=0x0000000000000001, $r26=0xc2052dc6eea5a53c, $r7=0xda1a8e35dd060adf
++sltu $r19, $r15, $r26 ::
++before: $r19=0xdfc9984966167604, $r15=0xa9ea12b5a37dd492, $r26=0x7a24be9fcf349afc
++after:  $r19=000000000000000000, $r15=0xa9ea12b5a37dd492, $r26=0x7a24be9fcf349afc
++sltu $r29, $r26, $r29 ::
++before: $r29=0x5a3822db2cc26fc5, $r26=0x5985f02e77511d80, $r29=0x370f15cc98f2a6c1
++after:  $r29=000000000000000000, $r26=0x5985f02e77511d80, $r29=000000000000000000
++sltu $r7, $r28, $r16 ::
++before: $r7=0x0e4594ee2cc8c6d7, $r28=0x0177ac0014f5dd20, $r16=0xde1724c7590a4908
++after:  $r7=0x0000000000000001, $r28=0x0177ac0014f5dd20, $r16=0xde1724c7590a4908
++sltu $r8, $r12, $r4 ::
++before: $r8=0x1df979e50aa0ed18, $r12=0x5b410cd0985fce18, $r4=0x9d3c39d61e29025d
++after:  $r8=0x0000000000000001, $r12=0x5b410cd0985fce18, $r4=0x9d3c39d61e29025d
++sltu $r30, $r23, $r25 ::
++before: $r30=0x1cba022788d49d13, $r23=0xd2b40941478ee865, $r25=0xa503a74e41535830
++after:  $r30=000000000000000000, $r23=0xd2b40941478ee865, $r25=0xa503a74e41535830
++slti $r15, $r27, 1913 ::
++before: $r15=0xe24c4ca567d1d5f4, $r27=0xfef05a88adf4b892
++after:  $r15=0x0000000000000001, $r27=0xfef05a88adf4b892
++slti $r8, $r31, -738 ::
++before: $r8=0xfba7284a8ab83b2d, $r31=0xff63b80173f1e368
++after:  $r8=0x0000000000000001, $r31=0xff63b80173f1e368
++slti $r31, $r31, -1544 ::
++before: $r31=0xb4599a9fa734365a, $r31=0x4327139de75dde1e
++after:  $r31=000000000000000000, $r31=000000000000000000
++slti $r5, $r4, 1529 ::
++before: $r5=0xa5572272e0c04a20, $r4=0x87657c1b1699936b
++after:  $r5=0x0000000000000001, $r4=0x87657c1b1699936b
++slti $r10, $r28, 557 ::
++before: $r10=0x1260731618214410, $r28=0xd0de0dfbafb7960a
++after:  $r10=0x0000000000000001, $r28=0xd0de0dfbafb7960a
++slti $r5, $r12, -222 ::
++before: $r5=0x4c6317772a4b06b0, $r12=0x7a1d4eeb507d649b
++after:  $r5=000000000000000000, $r12=0x7a1d4eeb507d649b
++slti $r4, $r31, 717 ::
++before: $r4=0x23b4d62a21994afb, $r31=0x85304cc393f6506b
++after:  $r4=0x0000000000000001, $r31=0x85304cc393f6506b
++slti $r18, $r26, 730 ::
++before: $r18=0x67b6f5dbf6a0c55d, $r26=0x451013f9a2337f9f
++after:  $r18=000000000000000000, $r26=0x451013f9a2337f9f
++slti $r25, $r8, 1193 ::
++before: $r25=0xdb278cca57f1ad7b, $r8=0x7371a60f5af6334b
++after:  $r25=000000000000000000, $r8=0x7371a60f5af6334b
++slti $r17, $r24, 329 ::
++before: $r17=0xffa3ed31f9ea3a29, $r24=0x1138e06e1a45c4f3
++after:  $r17=000000000000000000, $r24=0x1138e06e1a45c4f3
++sltui $r13, $r26, -462 ::
++before: $r13=0x62677116040aebff, $r26=0xeedd6ccd0e5e2771
++after:  $r13=0x0000000000000001, $r26=0xeedd6ccd0e5e2771
++sltui $r24, $r28, 1890 ::
++before: $r24=0xef9500b68a87984a, $r28=0xaf5922683f40599d
++after:  $r24=000000000000000000, $r28=0xaf5922683f40599d
++sltui $r9, $r6, -1538 ::
++before: $r9=0x9996aa21d2b51922, $r6=0xd5214fb275e738dc
++after:  $r9=0x0000000000000001, $r6=0xd5214fb275e738dc
++sltui $r19, $r26, -215 ::
++before: $r19=0x3eb2777655f0f1c5, $r26=0x98ed915860f0eb26
++after:  $r19=0x0000000000000001, $r26=0x98ed915860f0eb26
++sltui $r8, $r19, -780 ::
++before: $r8=0x5c44b5807c43724c, $r19=0x63a068026b529b03
++after:  $r8=0x0000000000000001, $r19=0x63a068026b529b03
++sltui $r19, $r17, -1041 ::
++before: $r19=0xf6926016cdbfacc1, $r17=0xec04a9bcc8d1192a
++after:  $r19=0x0000000000000001, $r17=0xec04a9bcc8d1192a
++sltui $r26, $r14, 1653 ::
++before: $r26=0x542f05c795aa07c2, $r14=0xb634bf537df4c4ce
++after:  $r26=000000000000000000, $r14=0xb634bf537df4c4ce
++sltui $r8, $r5, 441 ::
++before: $r8=0x371daf74e330ee8b, $r5=0xedb0321c888ae22e
++after:  $r8=000000000000000000, $r5=0xedb0321c888ae22e
++sltui $r25, $r4, 678 ::
++before: $r25=0xba813c7acc8f5621, $r4=0x8d5ce4750fe7603b
++after:  $r25=000000000000000000, $r4=0x8d5ce4750fe7603b
++sltui $r17, $r15, 2019 ::
++before: $r17=0x199b641cefe0a0e2, $r15=0x7ea0508a3fed3453
++after:  $r17=000000000000000000, $r15=0x7ea0508a3fed3453
++nor $r14, $r28, $r9 ::
++before: $r14=0xccf23cf02a48844d, $r28=0x2608ea0069c4e9dd, $r9=0x1c7a04255a2d13f8
++after:  $r14=0xc18511da84120402, $r28=0x2608ea0069c4e9dd, $r9=0x1c7a04255a2d13f8
++nor $r6, $r30, $r4 ::
++before: $r6=0xbfcc3de6da2483be, $r30=0xd24e9abca28d6cb5, $r4=0xbb01b508523673c6
++after:  $r6=0x04b040430d408008, $r30=0xd24e9abca28d6cb5, $r4=0xbb01b508523673c6
++nor $r6, $r28, $r13 ::
++before: $r6=0x28dacd828d5736d7, $r28=0xb365ff31474f736c, $r13=0x593621c0f82b445c
++after:  $r6=0x0488000e00908883, $r28=0xb365ff31474f736c, $r13=0x593621c0f82b445c
++nor $r24, $r16, $r31 ::
++before: $r24=0x5898010a4c6cf1bb, $r16=0xecac6e093ba6146a, $r31=0x050e6093f19b1194
++after:  $r24=0x125191640440ea01, $r16=0xecac6e093ba6146a, $r31=0x050e6093f19b1194
++nor $r15, $r7, $r20 ::
++before: $r15=0x2ddb1dea334fd92a, $r7=0x401d7a663be0b31a, $r20=0xb6c008973a85f779
++after:  $r15=0x09228508c41a0884, $r7=0x401d7a663be0b31a, $r20=0xb6c008973a85f779
++nor $r18, $r31, $r29 ::
++before: $r18=0x0c987982e1d91684, $r31=0x181f20f581ed38f4, $r29=0xefaa786e00a2e5b9
++after:  $r18=0x004087007e100202, $r31=0x181f20f581ed38f4, $r29=0xefaa786e00a2e5b9
++nor $r19, $r31, $r13 ::
++before: $r19=0x39e476d555cd20bc, $r31=0xfb8fab5d35576d50, $r13=0x71a92a8377c0f729
++after:  $r19=0x0450542088280086, $r31=0xfb8fab5d35576d50, $r13=0x71a92a8377c0f729
++nor $r25, $r7, $r5 ::
++before: $r25=0x7f36d0c6d173e8c8, $r7=0x181763a9f9350680, $r5=0x5ec5099605d7d418
++after:  $r25=0xa128944002082967, $r7=0x181763a9f9350680, $r5=0x5ec5099605d7d418
++nor $r30, $r23, $r23 ::
++before: $r30=0x688e1d04976ac8db, $r23=0xd37b6d6a1c510287, $r23=0x8670301ee2a715df
++after:  $r30=0x798fcfe11d58ea20, $r23=0x8670301ee2a715df, $r23=0x8670301ee2a715df
++nor $r5, $r23, $r14 ::
++before: $r5=0x71c4a211dd9262f4, $r23=0xcb8a4aebc2c6c4f2, $r14=0x084d79a5254447c9
++after:  $r5=0x3430841018393804, $r23=0xcb8a4aebc2c6c4f2, $r14=0x084d79a5254447c9
++and $r8, $r14, $r31 ::
++before: $r8=0xbddf22c4109e20b5, $r14=0xb2d25973efd1a8ff, $r31=0x28b78b59dfe641e9
++after:  $r8=0x20920951cfc000e9, $r14=0xb2d25973efd1a8ff, $r31=0x28b78b59dfe641e9
++and $r19, $r23, $r17 ::
++before: $r19=0xb25e185c549f6661, $r23=0xb6ccc215c2f17718, $r17=0xf20669c51aee8ffe
++after:  $r19=0xb204400502e00718, $r23=0xb6ccc215c2f17718, $r17=0xf20669c51aee8ffe
++and $r30, $r27, $r23 ::
++before: $r30=0xa7f4ad796393e12b, $r27=0xefbcf405df3e7aff, $r23=0x548a0141e9fe1700
++after:  $r30=0x44880001c93e1200, $r27=0xefbcf405df3e7aff, $r23=0x548a0141e9fe1700
++and $r18, $r31, $r29 ::
++before: $r18=0xa399c7f46c61d974, $r31=0xe0fe8cca1cbab773, $r29=0x49e680ddee7f666b
++after:  $r18=0x40e680c80c3a2663, $r31=0xe0fe8cca1cbab773, $r29=0x49e680ddee7f666b
++and $r5, $r26, $r25 ::
++before: $r5=0x1682ca17c11f90ac, $r26=0x4e9706cb2c885742, $r25=0x250ff6304dd87d57
++after:  $r5=0x040706000c885542, $r26=0x4e9706cb2c885742, $r25=0x250ff6304dd87d57
++and $r28, $r14, $r8 ::
++before: $r28=0xcacf15e6ffad256f, $r14=0x99527f4fa2aa8fb1, $r8=0xcff546a883b63cfb
++after:  $r28=0x8950460882a20cb1, $r14=0x99527f4fa2aa8fb1, $r8=0xcff546a883b63cfb
++and $r28, $r9, $r28 ::
++before: $r28=0xc60423b9cf70d112, $r9=0x2fb0db47f1d8f166, $r28=0x1e9cec9d13e85210
++after:  $r28=0x0e90c80511c85000, $r9=0x2fb0db47f1d8f166, $r28=0x0e90c80511c85000
++and $r18, $r28, $r5 ::
++before: $r18=0x5059c37ee38d2f25, $r28=0x74bf57d85d90af3a, $r5=0x35479df0ebec9209
++after:  $r18=0x340715d049808208, $r28=0x74bf57d85d90af3a, $r5=0x35479df0ebec9209
++and $r23, $r25, $r12 ::
++before: $r23=0x18742ef4c73416be, $r25=0x8b93e775860ef52b, $r12=0xa909915f60a546d2
++after:  $r23=0x8901815500044402, $r25=0x8b93e775860ef52b, $r12=0xa909915f60a546d2
++and $r18, $r17, $r24 ::
++before: $r18=0xadb2cc6aec909946, $r17=0x3068f8b21d583e4c, $r24=0xcf8aae1918f3a88e
++after:  $r18=0x0008a8101850280c, $r17=0x3068f8b21d583e4c, $r24=0xcf8aae1918f3a88e
++or $r19, $r28, $r25 ::
++before: $r19=0x46819825f87044c2, $r28=0x65cb2cc7e5f5a720, $r25=0x1fc0130146f13f76
++after:  $r19=0x7fcb3fc7e7f5bf76, $r28=0x65cb2cc7e5f5a720, $r25=0x1fc0130146f13f76
++or $r8, $r25, $r4 ::
++before: $r8=0x45083dd59c60e6fe, $r25=0x936ecfaeb4d51c95, $r4=0xdc37c27c69024f6e
++after:  $r8=0xdf7fcffefdd75fff, $r25=0x936ecfaeb4d51c95, $r4=0xdc37c27c69024f6e
++or $r15, $r16, $r8 ::
++before: $r15=0x516659e51cf19b26, $r16=0x7589da0802d59510, $r8=0x6b713c60390f3fbf
++after:  $r15=0x7ff9fe683bdfbfbf, $r16=0x7589da0802d59510, $r8=0x6b713c60390f3fbf
++or $r9, $r15, $r6 ::
++before: $r9=0x1646568625c40022, $r15=0xa68db9141a88850c, $r6=0x756d912fbefef973
++after:  $r9=0xf7edb93fbefefd7f, $r15=0xa68db9141a88850c, $r6=0x756d912fbefef973
++or $r24, $r9, $r25 ::
++before: $r24=0xda34c24d14fce443, $r9=0x6ad9bf24481630b0, $r25=0x02aefcdfa652395b
++after:  $r24=0x6affffffee5639fb, $r9=0x6ad9bf24481630b0, $r25=0x02aefcdfa652395b
++or $r13, $r9, $r14 ::
++before: $r13=0x900358ad1e848728, $r9=0xa0e361b5b891a62e, $r14=0xddfa0c1377ce01ac
++after:  $r13=0xfdfb6db7ffdfa7ae, $r9=0xa0e361b5b891a62e, $r14=0xddfa0c1377ce01ac
++or $r23, $r16, $r15 ::
++before: $r23=0x27a55515d39aded9, $r16=0xd0daf17f9cb0bf5a, $r15=0xf44c4372982c4d74
++after:  $r23=0xf4def37f9cbcff7e, $r16=0xd0daf17f9cb0bf5a, $r15=0xf44c4372982c4d74
++or $r20, $r16, $r16 ::
++before: $r20=0x7045887bb8325d6f, $r16=0xbac771cbb78dae04, $r16=0x23f4928023125a5c
++after:  $r20=0x23f4928023125a5c, $r16=0x23f4928023125a5c, $r16=0x23f4928023125a5c
++or $r30, $r5, $r7 ::
++before: $r30=0xcf609aa2057d1b98, $r5=0x379641544fd1cd48, $r7=0x5275ef34f265f01a
++after:  $r30=0x77f7ef74fff5fd5a, $r5=0x379641544fd1cd48, $r7=0x5275ef34f265f01a
++or $r23, $r4, $r30 ::
++before: $r23=0xc43fc1c750887406, $r4=0x44a3229c33d1cd65, $r30=0xceaa00084fc04912
++after:  $r23=0xceab229c7fd1cd77, $r4=0x44a3229c33d1cd65, $r30=0xceaa00084fc04912
++xor $r6, $r19, $r31 ::
++before: $r6=0x018522418b59bf8a, $r19=0x270a2ec823f26e39, $r31=0x99ef76e6d4495ae3
++after:  $r6=0xbee5582ef7bb34da, $r19=0x270a2ec823f26e39, $r31=0x99ef76e6d4495ae3
++xor $r28, $r20, $r27 ::
++before: $r28=0x57de83cac9dade15, $r20=0xd39fdecdfd4ccb08, $r27=0x00c97b854adacdb4
++after:  $r28=0xd356a548b79606bc, $r20=0xd39fdecdfd4ccb08, $r27=0x00c97b854adacdb4
++xor $r4, $r29, $r5 ::
++before: $r4=0x9f7356fff2445f77, $r29=0xc3c3a34d2c226b5a, $r5=0x51abdd266816b94f
++after:  $r4=0x92687e6b4434d215, $r29=0xc3c3a34d2c226b5a, $r5=0x51abdd266816b94f
++xor $r14, $r6, $r28 ::
++before: $r14=0xdd5ca0b5c6c45804, $r6=0xa0ba047990ec0798, $r28=0x089e6efd43651c28
++after:  $r14=0xa8246a84d3891bb0, $r6=0xa0ba047990ec0798, $r28=0x089e6efd43651c28
++xor $r8, $r19, $r23 ::
++before: $r8=0xc3e35cd44af166fa, $r19=0x6affcfe12104ccc7, $r23=0x4adbb3601a07a1d9
++after:  $r8=0x20247c813b036d1e, $r19=0x6affcfe12104ccc7, $r23=0x4adbb3601a07a1d9
++xor $r16, $r5, $r18 ::
++before: $r16=0x685cdc5ca969c8e1, $r5=0xd88d0e2a9900b8eb, $r18=0xdd4dfbba723cde28
++after:  $r16=0x05c0f590eb3c66c3, $r5=0xd88d0e2a9900b8eb, $r18=0xdd4dfbba723cde28
++xor $r20, $r18, $r24 ::
++before: $r20=0x2362838018fa39be, $r18=0xbbc8d438b24c037a, $r24=0xe020a8456a45b667
++after:  $r20=0x5be87c7dd809b51d, $r18=0xbbc8d438b24c037a, $r24=0xe020a8456a45b667
++xor $r19, $r23, $r19 ::
++before: $r19=0x637cae50fc0a1c95, $r23=0x514b81a7227dd07e, $r19=0x59a27a7f9c8481c3
++after:  $r19=0x08e9fbd8bef951bd, $r23=0x514b81a7227dd07e, $r19=0x08e9fbd8bef951bd
++xor $r20, $r16, $r18 ::
++before: $r20=0xb728dd7a443bcc8f, $r16=0xe2de9bf67cdbdc0c, $r18=0x26687435fbe4dbf6
++after:  $r20=0xc4b6efc3873f07fa, $r16=0xe2de9bf67cdbdc0c, $r18=0x26687435fbe4dbf6
++xor $r23, $r14, $r6 ::
++before: $r23=0x744915919b52e27e, $r14=0x16863c1d3e1cded7, $r6=0x040ce8607349c380
++after:  $r23=0x128ad47d4d551d57, $r14=0x16863c1d3e1cded7, $r6=0x040ce8607349c380
++orn $r24, $r9, $r15 ::
++before: $r24=0x39320ce9aa25fb73, $r9=0xaaec06dc1b47cf43, $r15=0x5fa36a558c884a69
++after:  $r24=0xaafc97fe7b77ffd7, $r9=0xaaec06dc1b47cf43, $r15=0x5fa36a558c884a69
++orn $r12, $r4, $r26 ::
++before: $r12=0xa9c2abcbc14e3f3c, $r4=0x7c87d633528d97b0, $r26=0xe383c14e72ab8677
++after:  $r12=0x7cfffeb3dfddffb8, $r4=0x7c87d633528d97b0, $r26=0xe383c14e72ab8677
++orn $r20, $r24, $r28 ::
++before: $r20=0xb117d8b0280738a2, $r24=0x318fd949c3ba430f, $r28=0xc9edab5116dc1582
++after:  $r20=0x379fddefebbbeb7f, $r24=0x318fd949c3ba430f, $r28=0xc9edab5116dc1582
++orn $r8, $r25, $r25 ::
++before: $r8=0xb140441a36f8eded, $r25=0xa26782a5e34d7add, $r25=0x61bdd5b78d019958
++after:  $r8=0xffffffffffffffff, $r25=0x61bdd5b78d019958, $r25=0x61bdd5b78d019958
++orn $r16, $r18, $r25 ::
++before: $r16=0xcda0e2c1bce1eeec, $r18=0xa4486eefd2c444d9, $r25=0xbd007605c829cadc
++after:  $r16=0xe6ffeffff7d675fb, $r18=0xa4486eefd2c444d9, $r25=0xbd007605c829cadc
++orn $r5, $r28, $r19 ::
++before: $r5=0x8196fca50795a2aa, $r28=0xec7f689a0d676560, $r19=0xb4450418c4e1b333
++after:  $r5=0xeffffbff3f7f6dec, $r28=0xec7f689a0d676560, $r19=0xb4450418c4e1b333
++orn $r15, $r14, $r8 ::
++before: $r15=0xaf1e2a9fe35ba4ed, $r14=0xd2207f86d89b890a, $r8=0xfb31b9e37313a94d
++after:  $r15=0xd6ee7f9edcffdfba, $r14=0xd2207f86d89b890a, $r8=0xfb31b9e37313a94d
++orn $r27, $r14, $r14 ::
++before: $r27=0x1f24566bfa353160, $r14=0xc4e17319c4766bec, $r14=0x29a3bbaaf6b49218
++after:  $r27=0xffffffffffffffff, $r14=0x29a3bbaaf6b49218, $r14=0x29a3bbaaf6b49218
++orn $r17, $r12, $r31 ::
++before: $r17=0xf5195a72c175fed7, $r12=0x7aa8d4840359cbf6, $r31=0xa1a42af83c82215b
++after:  $r17=0x7efbd587c37ddff6, $r12=0x7aa8d4840359cbf6, $r31=0xa1a42af83c82215b
++orn $r16, $r20, $r20 ::
++before: $r16=0x76bb09b5b50705e2, $r20=0x613fdcbd8c1eba2a, $r20=0xfb1e04641f5da4ff
++after:  $r16=0xffffffffffffffff, $r20=0xfb1e04641f5da4ff, $r20=0xfb1e04641f5da4ff
++andn $r19, $r31, $r17 ::
++before: $r19=0xbcc81a9b2e349626, $r31=0x5a38a8ef9c7e30e4, $r17=0xcb490976d0652986
++after:  $r19=0x1030a0890c1a1060, $r31=0x5a38a8ef9c7e30e4, $r17=0xcb490976d0652986
++andn $r10, $r4, $r10 ::
++before: $r10=0x9acfa0cd6ea107fd, $r4=0x1d9b572e8f6bedb7, $r10=0x768fe778d2a543ea
++after:  $r10=0x091010060d4aac15, $r4=0x1d9b572e8f6bedb7, $r10=0x091010060d4aac15
++andn $r6, $r12, $r26 ::
++before: $r6=0x949e36cff3b5decb, $r12=0x56723f7285834fc9, $r26=0xf6fa544d6cd57fa8
++after:  $r6=0x00002b3281020041, $r12=0x56723f7285834fc9, $r26=0xf6fa544d6cd57fa8
++andn $r16, $r6, $r4 ::
++before: $r16=0x44a39d85132d6513, $r6=0x3ca7f972b865b7ce, $r4=0xf18819e4740308bc
++after:  $r16=0x0c27e0128864b742, $r6=0x3ca7f972b865b7ce, $r4=0xf18819e4740308bc
++andn $r19, $r26, $r15 ::
++before: $r19=0x856d1e3162c8fa2d, $r26=0x0c1ef79456be3885, $r15=0x03c089064e60da1d
++after:  $r19=0x0c1e7690109e2080, $r26=0x0c1ef79456be3885, $r15=0x03c089064e60da1d
++andn $r17, $r28, $r9 ::
++before: $r17=0x512a518c554f4b0a, $r28=0x043454425b8b7755, $r9=0xdc5dca386b49bdd7
++after:  $r17=0x0020144210824200, $r28=0x043454425b8b7755, $r9=0xdc5dca386b49bdd7
++andn $r16, $r16, $r14 ::
++before: $r16=0xa9c14796fec54f89, $r16=0xe31928f90d2723a4, $r14=0xcf2deaf4af11410a
++after:  $r16=0x20100009002622a4, $r16=0x20100009002622a4, $r14=0xcf2deaf4af11410a
++andn $r9, $r4, $r20 ::
++before: $r9=0x51d79964a699ec8d, $r4=0xe82135537ca93e7f, $r20=0xcbadcb1dc4dd0ed0
++after:  $r9=0x200034423820302f, $r4=0xe82135537ca93e7f, $r20=0xcbadcb1dc4dd0ed0
++andn $r18, $r25, $r25 ::
++before: $r18=0xeb546ce75bcba3f5, $r25=0x953d86e2bd6b136d, $r25=0x4914dbeee506d8ad
++after:  $r18=000000000000000000, $r25=0x4914dbeee506d8ad, $r25=0x4914dbeee506d8ad
++andn $r27, $r15, $r14 ::
++before: $r27=0xc8b599a43b0b4683, $r15=0x0509638630676b88, $r14=0x3d278ed22a112a89
++after:  $r27=0x0008610410664100, $r15=0x0509638630676b88, $r14=0x3d278ed22a112a89
++mul.w $r28, $r12, $r10 ::
++before: $r28=0xf6fcce3e1c5b1598, $r12=0xef2747013f911fe8, $r10=0x14a216fd69537967
++after:  $r28=0xffffffffabb07e58, $r12=0xef2747013f911fe8, $r10=0x14a216fd69537967
++mul.w $r13, $r18, $r24 ::
++before: $r13=0x5e8a32c1e1e12aa4, $r18=0x30e007bb8dd185fa, $r24=0x1a74dd893af9fb5a
++after:  $r13=0x000000003e2f37e4, $r18=0x30e007bb8dd185fa, $r24=0x1a74dd893af9fb5a
++mul.w $r10, $r20, $r4 ::
++before: $r10=0xf06f4af61b0e0c24, $r20=0x1b3624a77f26275f, $r4=0x653052ae3a1347df
++after:  $r10=0xffffffffc934a4c1, $r20=0x1b3624a77f26275f, $r4=0x653052ae3a1347df
++mul.w $r23, $r19, $r10 ::
++before: $r23=0xccb5485ae4605cdd, $r19=0x67c67c647eaf9e6c, $r10=0xfb9b6c7b49ec10cf
++after:  $r23=0x000000004177d954, $r19=0x67c67c647eaf9e6c, $r10=0xfb9b6c7b49ec10cf
++mul.w $r12, $r30, $r7 ::
++before: $r12=0xc1f45aaf98ffcb39, $r30=0x906f0c08c0bae02e, $r7=0xdf6cf5c05b5f2d34
++after:  $r12=0xffffffff8a6f9f58, $r30=0x906f0c08c0bae02e, $r7=0xdf6cf5c05b5f2d34
++mul.w $r27, $r12, $r12 ::
++before: $r27=0x9545c6d9f812c0d9, $r12=0xacd016cb69e028b3, $r12=0x2b68e3a280d9c0b6
++after:  $r27=0x00000000459d8164, $r12=0x2b68e3a280d9c0b6, $r12=0x2b68e3a280d9c0b6
++mul.w $r28, $r7, $r19 ::
++before: $r28=0x4cf68a9590da3da5, $r7=0x70ed8b9b03a6325d, $r19=0x1125383d12dad118
++after:  $r28=0x0000000073e4a5b8, $r7=0x70ed8b9b03a6325d, $r19=0x1125383d12dad118
++mul.w $r20, $r12, $r20 ::
++before: $r20=0x10683d31408fb4c5, $r12=0x9ef4ea79672ce58d, $r20=0x960a13776923d3e4
++after:  $r20=0x000000001c76a894, $r12=0x9ef4ea79672ce58d, $r20=0x000000001c76a894
++mul.w $r26, $r19, $r28 ::
++before: $r26=0xbf8a20b69fa4357b, $r19=0xf3e9b53a654e3cbf, $r28=0x20afdeb5a4b4e1c9
++after:  $r26=0x00000000601d90f7, $r19=0xf3e9b53a654e3cbf, $r28=0x20afdeb5a4b4e1c9
++mul.w $r13, $r26, $r25 ::
++before: $r13=0x78f637d350c666bf, $r26=0xff742d96dc73e9e9, $r25=0x94a3289b55744707
++after:  $r13=0xffffffff879f045f, $r26=0xff742d96dc73e9e9, $r25=0x94a3289b55744707
++mulh.w $r18, $r25, $r14 ::
++before: $r18=0xa988161162710d96, $r25=0x37443c6f5d0625ea, $r14=0x94da379219de8576
++after:  $r18=0x0000000009667587, $r25=0x37443c6f5d0625ea, $r14=0x94da379219de8576
++mulh.w $r13, $r16, $r18 ::
++before: $r13=0x246298a54a25030a, $r16=0x33643ceed35cff64, $r18=0xc25702631b42c849
++after:  $r13=0xfffffffffb3f29fd, $r16=0x33643ceed35cff64, $r18=0xc25702631b42c849
++mulh.w $r20, $r5, $r15 ::
++before: $r20=0x3b606ea986dcf13e, $r5=0x269dcd16567786d2, $r15=0x96c0983df45d5c03
++after:  $r20=0xfffffffffc11ee2e, $r5=0x269dcd16567786d2, $r15=0x96c0983df45d5c03
++mulh.w $r19, $r19, $r25 ::
++before: $r19=0xab8fc1c922ba3e7a, $r19=0xdec5bddca513d198, $r25=0xf05e814d67d43f5a
++after:  $r19=0xffffffffdb1f973d, $r19=0xffffffffdb1f973d, $r25=0xf05e814d67d43f5a
++mulh.w $r15, $r28, $r16 ::
++before: $r15=0x82fcfa24449231ba, $r28=0xf37548fee13133f3, $r16=0x256188ef96bb3d23
++after:  $r15=0x000000000cab1812, $r28=0xf37548fee13133f3, $r16=0x256188ef96bb3d23
++mulh.w $r24, $r9, $r27 ::
++before: $r24=0x858ddeb68e948058, $r9=0x0ffb64d62e202462, $r27=0xe07a6dae07f46c11
++after:  $r24=0x00000000016eeb19, $r9=0x0ffb64d62e202462, $r27=0xe07a6dae07f46c11
++mulh.w $r23, $r20, $r14 ::
++before: $r23=0x7713930e419350ff, $r20=0xd5d72e6efb86e428, $r14=0x49f87e78ddcc8400
++after:  $r23=0x000000000098fbfd, $r20=0xd5d72e6efb86e428, $r14=0x49f87e78ddcc8400
++mulh.w $r28, $r20, $r25 ::
++before: $r28=0x552a9b7f3fa0c48a, $r20=0xd616afd20f193287, $r25=0xbcd2ae680b131cd2
++after:  $r28=0x0000000000a735bd, $r20=0xd616afd20f193287, $r25=0xbcd2ae680b131cd2
++mulh.w $r16, $r19, $r12 ::
++before: $r16=0x94b154fc890497c3, $r19=0xd8217f47e4257a7c, $r12=0xb47bb0e4cff83cbf
++after:  $r16=0x000000000539d140, $r19=0xd8217f47e4257a7c, $r12=0xb47bb0e4cff83cbf
++mulh.w $r23, $r23, $r6 ::
++before: $r23=0x0afb7fddb344318f, $r23=0xaafee418c4267e18, $r6=0x1763f686cd41d46e
++after:  $r23=0x000000000bdcf0fd, $r23=0x000000000bdcf0fd, $r6=0x1763f686cd41d46e
++mulh.wu $r18, $r17, $r8 ::
++before: $r18=0xa92fa2817b19786c, $r17=0xaf23e3d2092f080c, $r8=0x771c36ac19259f2a
++after:  $r18=0x0000000000e6f14b, $r17=0xaf23e3d2092f080c, $r8=0x771c36ac19259f2a
++mulh.wu $r16, $r13, $r8 ::
++before: $r16=0xf4a7b7abe5f3831a, $r13=0xe8beff7f8f4330cd, $r8=0x38cebbe3d1af354d
++after:  $r16=0x000000007557e799, $r13=0xe8beff7f8f4330cd, $r8=0x38cebbe3d1af354d
++mulh.wu $r8, $r23, $r29 ::
++before: $r8=0x6ca8c7d8ec316750, $r23=0xc3a59754c752c3a5, $r29=0x4b77e251de7f45f1
++after:  $r8=0xffffffffad3cde2d, $r23=0xc3a59754c752c3a5, $r29=0x4b77e251de7f45f1
++mulh.wu $r20, $r25, $r30 ::
++before: $r20=0x6faa5d1372250132, $r25=0x68734123142c820a, $r30=0x0f7b4bdf342e2017
++after:  $r20=0x00000000041cacf0, $r25=0x68734123142c820a, $r30=0x0f7b4bdf342e2017
++mulh.wu $r31, $r18, $r19 ::
++before: $r31=0x08cfa67422c1c5d5, $r18=0xb48ac9531206cef2, $r19=0x9f9f5d925c5cf738
++after:  $r31=0x000000000680fe39, $r18=0xb48ac9531206cef2, $r19=0x9f9f5d925c5cf738
++mulh.wu $r25, $r7, $r27 ::
++before: $r25=0x85aa17ff1b3699ba, $r7=0x9a7aeabb800edb53, $r27=0x4eb1ec754c7cdb59
++after:  $r25=0x000000002642de08, $r7=0x9a7aeabb800edb53, $r27=0x4eb1ec754c7cdb59
++mulh.wu $r19, $r4, $r28 ::
++before: $r19=0x821038d7fb43149c, $r4=0x44cd20261f5ae87e, $r28=0xf9d8916e8eb4ecb1
++after:  $r19=0x00000000117a95de, $r4=0x44cd20261f5ae87e, $r28=0xf9d8916e8eb4ecb1
++mulh.wu $r30, $r23, $r28 ::
++before: $r30=0xef34433557594fb3, $r23=0x2f9401c8064c8ca0, $r28=0x5de6287c2a56e507
++after:  $r30=0x00000000010ab26c, $r23=0x2f9401c8064c8ca0, $r28=0x5de6287c2a56e507
++mulh.wu $r13, $r6, $r17 ::
++before: $r13=0xd6b38c427ad5f669, $r6=0xbe04ea8987b20188, $r17=0x52cee1d144e3c134
++after:  $r13=0x00000000248401a8, $r6=0xbe04ea8987b20188, $r17=0x52cee1d144e3c134
++mulh.wu $r26, $r19, $r17 ::
++before: $r26=0x2ea15eee9429b8a0, $r19=0x43598be92000d9f7, $r17=0x6364cfeb707aba6c
++after:  $r26=0x000000000e0fb712, $r19=0x43598be92000d9f7, $r17=0x6364cfeb707aba6c
++mul.d $r19, $r4, $r10 ::
++before: $r19=0xf0235819cf1bab1f, $r4=0xdc7a0086353cfddf, $r10=0x6f18aec465b5af87
++after:  $r19=0xb1beaa2f3e605199, $r4=0xdc7a0086353cfddf, $r10=0x6f18aec465b5af87
++mul.d $r19, $r31, $r20 ::
++before: $r19=0x24d7526c5e4669e3, $r31=0xaab7dd46e5af2493, $r20=0xd5df6eea42205e25
++after:  $r19=0x7ec27945fa1e433f, $r31=0xaab7dd46e5af2493, $r20=0xd5df6eea42205e25
++mul.d $r15, $r20, $r4 ::
++before: $r15=0x3740ba48d64cc478, $r20=0xcfeffb7c35a98382, $r4=0xeab050fc9bdb3c52
++after:  $r15=0x3ae548f8215497a4, $r20=0xcfeffb7c35a98382, $r4=0xeab050fc9bdb3c52
++mul.d $r29, $r7, $r25 ::
++before: $r29=0xe8858552c0e8eac8, $r7=0xb65ed231c27efb70, $r25=0xbb753de59e4ca3d1
++after:  $r29=0x57c0018869039670, $r7=0xb65ed231c27efb70, $r25=0xbb753de59e4ca3d1
++mul.d $r5, $r30, $r4 ::
++before: $r5=0xc4f17df5c983317d, $r30=0xb2af9e86d443d8ce, $r4=0xf9e3c6d18372d0d3
++after:  $r5=0xeff3c7ee09cf11ca, $r30=0xb2af9e86d443d8ce, $r4=0xf9e3c6d18372d0d3
++mul.d $r25, $r17, $r29 ::
++before: $r25=0xa09d11d50056b350, $r17=0x6609b14ca65f9aff, $r29=0x692def5a14a3278c
++after:  $r25=0xbd4098e529429c74, $r17=0x6609b14ca65f9aff, $r29=0x692def5a14a3278c
++mul.d $r13, $r15, $r26 ::
++before: $r13=0xd528ed047af75775, $r15=0x896658fe826a0817, $r26=0xa456f53d5f2760b1
++after:  $r13=0x630a2082b2d937e7, $r15=0x896658fe826a0817, $r26=0xa456f53d5f2760b1
++mul.d $r23, $r9, $r7 ::
++before: $r23=0x5d33f63ce8637a69, $r9=0xad38922264c721ff, $r7=0xe0514fea4ee52aca
++after:  $r23=0x9d478a3f4bcfa936, $r9=0xad38922264c721ff, $r7=0xe0514fea4ee52aca
++mul.d $r25, $r23, $r30 ::
++before: $r25=0x5d74125f059662f3, $r23=0xa708100731e88710, $r30=0x739e4de71fec92e0
++after:  $r25=0xeb30fae9bb3d4e00, $r23=0xa708100731e88710, $r30=0x739e4de71fec92e0
++mul.d $r26, $r18, $r30 ::
++before: $r26=0x110a94ffa2e12f32, $r18=0x01b770d6c423d4f8, $r30=0x38bf04d66f91531a
++after:  $r26=0x5937a05ab2280930, $r18=0x01b770d6c423d4f8, $r30=0x38bf04d66f91531a
++mulh.d $r5, $r15, $r12 ::
++before: $r5=0xd72f46d42ca4db6b, $r15=0xe1771af0e69e49a6, $r12=0xd796f52fbd01a4bb
++after:  $r5=0x04d1eb3a7f530298, $r15=0xe1771af0e69e49a6, $r12=0xd796f52fbd01a4bb
++mulh.d $r28, $r18, $r14 ::
++before: $r28=0x904e699bcbe32b08, $r18=0x9b5b69b4d817779c, $r14=0xa02ca97cc4e37f13
++after:  $r28=0x25ac2970a5b47a76, $r18=0x9b5b69b4d817779c, $r14=0xa02ca97cc4e37f13
++mulh.d $r6, $r12, $r7 ::
++before: $r6=0xc75e1065b8dbcd34, $r12=0xec7d8ae6a65f2fd3, $r7=0xb7e32b52f40bc8ef
++after:  $r6=0x057ee36929066010, $r12=0xec7d8ae6a65f2fd3, $r7=0xb7e32b52f40bc8ef
++mulh.d $r5, $r25, $r19 ::
++before: $r5=0x7b2e04c0c2f95e4f, $r25=0x9a5037ff200e982a, $r19=0xf862c0c6425ff2bc
++after:  $r5=0x03064462e05709fc, $r25=0x9a5037ff200e982a, $r19=0xf862c0c6425ff2bc
++mulh.d $r14, $r8, $r23 ::
++before: $r14=0x5fd7ae31ad151daa, $r8=0x444243172f499ec0, $r23=0x9003c8aeabc39884
++after:  $r14=0xe22404eefbd57910, $r8=0x444243172f499ec0, $r23=0x9003c8aeabc39884
++mulh.d $r7, $r23, $r13 ::
++before: $r7=0x0bc21ca397041a2b, $r23=0xe886455c8737b2ca, $r13=0xd5ccec2f631a1d60
++after:  $r7=0x03dea7b02a86f5c2, $r23=0xe886455c8737b2ca, $r13=0xd5ccec2f631a1d60
++mulh.d $r26, $r16, $r13 ::
++before: $r26=0xd3894783f187ee9c, $r16=0xa7a6c4abeda9a22c, $r13=0x4375f7e49ed91384
++after:  $r26=0xe8b7ef23e33e1269, $r16=0xa7a6c4abeda9a22c, $r13=0x4375f7e49ed91384
++mulh.d $r17, $r31, $r16 ::
++before: $r17=0xa93bd0cf9137745e, $r31=0x3a1b2b922b7645f1, $r16=0x7e33f64c19972ae3
++after:  $r17=0x1ca52ac301413b29, $r31=0x3a1b2b922b7645f1, $r16=0x7e33f64c19972ae3
++mulh.d $r20, $r19, $r8 ::
++before: $r20=0xda9224c9ab488939, $r19=0xb7f5978bf509641d, $r8=0xf6fcd615333c30c0
++after:  $r20=0x028941970b9c41ae, $r19=0xb7f5978bf509641d, $r8=0xf6fcd615333c30c0
++mulh.d $r12, $r17, $r20 ::
++before: $r12=0xcdbd51e35d5c1df3, $r17=0x254bd8eaadc946fe, $r20=0x9de163435088598b
++after:  $r12=0xf1b4813d0885ff33, $r17=0x254bd8eaadc946fe, $r20=0x9de163435088598b
++mulh.du $r25, $r28, $r29 ::
++before: $r25=0xf7ef0dbf1bf7938a, $r28=0xd267d11ae422f604, $r29=0x089d6fd68226e13d
++after:  $r25=0x0714a41f660b233e, $r28=0xd267d11ae422f604, $r29=0x089d6fd68226e13d
++mulh.du $r7, $r28, $r24 ::
++before: $r7=0xe568cf4a6d6bc199, $r28=0x6efedad6fbe95f2a, $r24=0xdf55853ed22d024e
++after:  $r7=0x60d51505935d32a4, $r28=0x6efedad6fbe95f2a, $r24=0xdf55853ed22d024e
++mulh.du $r25, $r8, $r9 ::
++before: $r25=0x0bf7c0226b0c2072, $r8=0x794fd44a65c65ebb, $r9=0xa0391c3fa3cf1e5c
++after:  $r25=0x4becf4d7a7a9ffeb, $r8=0x794fd44a65c65ebb, $r9=0xa0391c3fa3cf1e5c
++mulh.du $r30, $r16, $r7 ::
++before: $r30=0x3df3f3b3ff17f61a, $r16=0xcadd1f7e7150ad7b, $r7=0xbdc63d3f762cf02d
++after:  $r30=0x966257cfb0059e70, $r16=0xcadd1f7e7150ad7b, $r7=0xbdc63d3f762cf02d
++mulh.du $r6, $r10, $r19 ::
++before: $r6=0x6601e05fc5f801cb, $r10=0xbc10a70104969251, $r19=0x2f50a00036fb7821
++after:  $r6=0x22c24967f0edf696, $r10=0xbc10a70104969251, $r19=0x2f50a00036fb7821
++mulh.du $r17, $r9, $r5 ::
++before: $r17=0xffabc0cbdc8aa7b0, $r9=0x5288bc60da558afb, $r5=0x2795644a58b2668f
++after:  $r17=0x0cc2fe9dc756ea23, $r9=0x5288bc60da558afb, $r5=0x2795644a58b2668f
++mulh.du $r26, $r8, $r15 ::
++before: $r26=0x68b64c997f561b59, $r8=0xe2ed2375e64b1bf3, $r15=0xe1033e583092ad96
++after:  $r26=0xc7754c35a4f2f082, $r8=0xe2ed2375e64b1bf3, $r15=0xe1033e583092ad96
++mulh.du $r10, $r13, $r30 ::
++before: $r10=0x6450ec488eb4753b, $r13=0x4287b82860366cf8, $r30=0x01c15ed3f051fe8c
++after:  $r10=0x0074c8aee8c0e7ce, $r13=0x4287b82860366cf8, $r30=0x01c15ed3f051fe8c
++mulh.du $r24, $r13, $r15 ::
++before: $r24=0x1169fa9dd6f8273d, $r13=0x6fd2cdb39e5d1fa3, $r15=0xff0526e206880684
++after:  $r24=0x6f653afff4bd7810, $r13=0x6fd2cdb39e5d1fa3, $r15=0xff0526e206880684
++mulh.du $r8, $r9, $r10 ::
++before: $r8=0xe9cb6416a1492fbf, $r9=0xaf89960e18913df0, $r10=0x76b4251409ff9830
++after:  $r8=0x5164f154a1871400, $r9=0xaf89960e18913df0, $r10=0x76b4251409ff9830
++mulw.d.w $r6, $r31, $r7 ::
++before: $r6=0x50ce021eb3b3f3a4, $r31=0xb859e7514e4c4d7c, $r7=0x372cb1e2b3200f36
++after:  $r6=0xe87cdae260229c28, $r31=0xb859e7514e4c4d7c, $r7=0x372cb1e2b3200f36
++mulw.d.w $r31, $r7, $r28 ::
++before: $r31=0x925642fa7e2de9ab, $r7=0x61404b6550238ceb, $r28=0x75ed502242ed0430
++after:  $r31=0x14f35c8da06d1810, $r7=0x61404b6550238ceb, $r28=0x75ed502242ed0430
++mulw.d.w $r19, $r16, $r10 ::
++before: $r19=0x0ef82de697f7239f, $r16=0xdf1c56dfe5c0e48d, $r10=0xbc7e740fe1b1dc25
++after:  $r19=0x031b681eebc73461, $r16=0xdf1c56dfe5c0e48d, $r10=0xbc7e740fe1b1dc25
++mulw.d.w $r29, $r12, $r27 ::
++before: $r29=0xc104a400fa0d1dbf, $r12=0x2aa34e8a5fad6c6f, $r27=0x7f8e4d23644b0d4d
++after:  $r29=0x257bcb22b6304063, $r12=0x2aa34e8a5fad6c6f, $r27=0x7f8e4d23644b0d4d
++mulw.d.w $r25, $r16, $r25 ::
++before: $r25=0x5b8ff9172c849fb9, $r16=0x843f90380af6f2af, $r25=0x12f7f8780cb8bfe0
++after:  $r25=0x008b7d1678ecea20, $r16=0x843f90380af6f2af, $r25=0x008b7d1678ecea20
++mulw.d.w $r13, $r13, $r7 ::
++before: $r13=0x6bba79a88056d891, $r13=0x6757a43d403285ab, $r7=0x2d2ea385888c2664
++after:  $r13=0xe20b7699851798cc, $r13=0xe20b7699851798cc, $r7=0x2d2ea385888c2664
++mulw.d.w $r12, $r8, $r23 ::
++before: $r12=0x5c96927dcf1fb14e, $r8=0x2b3767b9e9029d4b, $r23=0x252bbcc66b5d834b
++after:  $r12=0xf65bb1e7178075f9, $r8=0x2b3767b9e9029d4b, $r23=0x252bbcc66b5d834b
++mulw.d.w $r6, $r13, $r10 ::
++before: $r6=0x5fa5a8b36e8ec3e0, $r13=0xcbca4b4d518b9466, $r10=0xabdf2ec674f70c5b
++after:  $r6=0x2541f0d9edfc8842, $r13=0xcbca4b4d518b9466, $r10=0xabdf2ec674f70c5b
++mulw.d.w $r16, $r15, $r23 ::
++before: $r16=0x5b94eeb9c3c9fa01, $r15=0x5c4ebef486f83b43, $r23=0x73f3781c3a1e9216
++after:  $r16=0xe485c9734afb4dc2, $r15=0x5c4ebef486f83b43, $r23=0x73f3781c3a1e9216
++mulw.d.w $r6, $r31, $r7 ::
++before: $r6=0xbc263312a123caed, $r31=0xe9aa8545d3a99a97, $r7=0x71b5dbacf4f7f2b8
++after:  $r6=0x01e91b5b89bada88, $r31=0xe9aa8545d3a99a97, $r7=0x71b5dbacf4f7f2b8
++mulw.d.wu $r14, $r17, $r30 ::
++before: $r14=0x94452e0d7eb407b7, $r17=0x629b1902a484a77d, $r30=0x474359ca7f7165ed
++after:  $r14=0x51e6af2596105fb9, $r17=0x629b1902a484a77d, $r30=0x474359ca7f7165ed
++mulw.d.wu $r26, $r7, $r5 ::
++before: $r26=0xae9771f0d59319b3, $r7=0x1bcb563dea8f3a3f, $r5=0x759334cc2d543103
++after:  $r26=0x29885124597fbdbd, $r7=0x1bcb563dea8f3a3f, $r5=0x759334cc2d543103
++mulw.d.wu $r25, $r28, $r27 ::
++before: $r25=0x27ca0bf2d6cd2699, $r28=0x5a015da9b52ffc64, $r27=0x482a4fa5b5625914
++after:  $r25=0x806088dd28c67bd0, $r28=0x5a015da9b52ffc64, $r27=0x482a4fa5b5625914
++mulw.d.wu $r8, $r4, $r16 ::
++before: $r8=0x22f61239dad7bc92, $r4=0xe8c9964b31b0e199, $r16=0x99fdef421aa22322
++after:  $r8=0x052b6faa1527e152, $r4=0xe8c9964b31b0e199, $r16=0x99fdef421aa22322
++mulw.d.wu $r29, $r17, $r15 ::
++before: $r29=0xcc5eec6e4f2b5fdb, $r17=0x2d08ada074c2ac37, $r15=0x8967ce1cd4c2362e
++after:  $r29=0x6109cada18fc8be2, $r17=0x2d08ada074c2ac37, $r15=0x8967ce1cd4c2362e
++mulw.d.wu $r27, $r23, $r16 ::
++before: $r27=0x2d057e2ead214d6c, $r23=0x987e7a10a0f3ee5d, $r16=0xd515e2a2f06be633
++after:  $r27=0x97288627099f0a87, $r23=0x987e7a10a0f3ee5d, $r16=0xd515e2a2f06be633
++mulw.d.wu $r15, $r19, $r12 ::
++before: $r15=0xce24943d6fe20263, $r19=0xd6bbdcb20d76de15, $r12=0xcc277905bc41da62
++after:  $r15=0x09e6c1c22ff3e60a, $r19=0xd6bbdcb20d76de15, $r12=0xcc277905bc41da62
++mulw.d.wu $r4, $r4, $r19 ::
++before: $r4=0xe37942a26dc0e882, $r4=0x6a30fb04c3b5431f, $r19=0x4c937bed67cb6c73
++after:  $r4=0x4f5971a615533aed, $r4=0x4f5971a615533aed, $r19=0x4c937bed67cb6c73
++mulw.d.wu $r7, $r12, $r9 ::
++before: $r7=0xbdebe7a7b19b7dc0, $r12=0x3f6e790fb24d19f1, $r9=0x7a19c4fdd0d29f3e
++after:  $r7=0x9171573c297af75e, $r12=0x3f6e790fb24d19f1, $r9=0x7a19c4fdd0d29f3e
++mulw.d.wu $r31, $r30, $r28 ::
++before: $r31=0x690687056e169108, $r30=0xa8abab5bf1d42538, $r28=0x0636a31884ca1e99
++after:  $r31=0x7d70517da256ce78, $r30=0xa8abab5bf1d42538, $r28=0x0636a31884ca1e99
++div.w $r13, $r28, $r23 ::
++before: $r13=0x0000000016546290, $r28=0x00000000627aa138, $r23=0x000000000534168c
++after:  $r13=0x0000000000000012, $r28=0x00000000627aa138, $r23=0x000000000534168c
++div.w $r28, $r19, $r9 ::
++before: $r28=0xffffffffbe03930d, $r19=0x00000000223d0ec7, $r9=0xffffffff8404aa67
++after:  $r28=000000000000000000, $r19=0x00000000223d0ec7, $r9=0xffffffff8404aa67
++div.w $r18, $r19, $r30 ::
++before: $r18=0xffffffffac214649, $r19=0xffffffff8019c3b7, $r30=0xffffffff871cbf90
++after:  $r18=0x0000000000000001, $r19=0xffffffff8019c3b7, $r30=0xffffffff871cbf90
++div.w $r24, $r25, $r7 ::
++before: $r24=0xffffffffa144ed80, $r25=0x000000001c4370c7, $r7=0x000000004695aa29
++after:  $r24=000000000000000000, $r25=0x000000001c4370c7, $r7=0x000000004695aa29
++div.w $r9, $r27, $r4 ::
++before: $r9=0x000000003ae8b7c7, $r27=0xfffffffff3a6ebb2, $r4=0x00000000181d816a
++after:  $r9=000000000000000000, $r27=0xfffffffff3a6ebb2, $r4=0x00000000181d816a
++div.w $r28, $r15, $r7 ::
++before: $r28=0xffffffff956a7de4, $r15=0xffffffff9aab217b, $r7=0x000000003b061b78
++after:  $r28=0xffffffffffffffff, $r15=0xffffffff9aab217b, $r7=0x000000003b061b78
++div.w $r25, $r24, $r12 ::
++before: $r25=0x000000003c6167d4, $r24=0x000000002673145e, $r12=0x0000000001d5e391
++after:  $r25=0x0000000000000014, $r24=0x000000002673145e, $r12=0x0000000001d5e391
++div.w $r23, $r15, $r4 ::
++before: $r23=0x000000003e0820ee, $r15=0x0000000042793c51, $r4=0x00000000286cdb51
++after:  $r23=0x0000000000000001, $r15=0x0000000042793c51, $r4=0x00000000286cdb51
++div.w $r28, $r16, $r30 ::
++before: $r28=0xffffffffcf8fd242, $r16=0x000000002a76141e, $r30=0x0000000002429a52
++after:  $r28=0x0000000000000012, $r16=0x000000002a76141e, $r30=0x0000000002429a52
++div.w $r29, $r8, $r18 ::
++before: $r29=0x0000000074991388, $r8=0xffffffffd594ef43, $r18=0x000000006d3f9603
++after:  $r29=000000000000000000, $r8=0xffffffffd594ef43, $r18=0x000000006d3f9603
++mod.w $r8, $r13, $r14 ::
++before: $r8=0x000000005cc9e6db, $r13=0xfffffffff7327c6d, $r14=0x0000000023eef833
++after:  $r8=0xfffffffff7327c6d, $r13=0xfffffffff7327c6d, $r14=0x0000000023eef833
++mod.w $r25, $r24, $r25 ::
++before: $r25=0x00000000539195e4, $r24=0xffffffffd94f10c8, $r25=0x000000002c5786d9
++after:  $r25=0xffffffffd94f10c8, $r24=0xffffffffd94f10c8, $r25=0xffffffffd94f10c8
++mod.w $r10, $r16, $r23 ::
++before: $r10=0xffffffff9b15f725, $r16=0x00000000448a831d, $r23=0xffffffffd5d7d92b
++after:  $r10=0x000000001a625c48, $r16=0x00000000448a831d, $r23=0xffffffffd5d7d92b
++mod.w $r6, $r5, $r29 ::
++before: $r6=0x000000001794d969, $r5=0x000000002fba86b0, $r29=0x0000000040e6ab6b
++after:  $r6=0x000000002fba86b0, $r5=0x000000002fba86b0, $r29=0x0000000040e6ab6b
++mod.w $r16, $r14, $r29 ::
++before: $r16=0x000000006a503328, $r14=0xffffffffdf0b2ad2, $r29=0xffffffff90dc29c6
++after:  $r16=0xffffffffdf0b2ad2, $r14=0xffffffffdf0b2ad2, $r29=0xffffffff90dc29c6
++mod.w $r30, $r14, $r18 ::
++before: $r30=0xffffffffc7670acd, $r14=0x0000000053f3b34f, $r18=0xffffffff84b62159
++after:  $r30=0x0000000053f3b34f, $r14=0x0000000053f3b34f, $r18=0xffffffff84b62159
++mod.w $r31, $r6, $r18 ::
++before: $r31=0xffffffff98334c95, $r6=0xfffffffff241ffd8, $r18=0xffffffffa73314aa
++after:  $r31=0xfffffffff241ffd8, $r6=0xfffffffff241ffd8, $r18=0xffffffffa73314aa
++mod.w $r12, $r8, $r4 ::
++before: $r12=0xffffffffd9f19db4, $r8=0xffffffffc89f9796, $r4=0xffffffffaa8e2a3b
++after:  $r12=0xffffffffc89f9796, $r8=0xffffffffc89f9796, $r4=0xffffffffaa8e2a3b
++mod.w $r23, $r12, $r4 ::
++before: $r23=0xffffffff94e93220, $r12=0xfffffffffea1587a, $r4=0xffffffffb88b2b87
++after:  $r23=0xfffffffffea1587a, $r12=0xfffffffffea1587a, $r4=0xffffffffb88b2b87
++mod.w $r13, $r9, $r18 ::
++before: $r13=0x0000000000f718c0, $r9=0xffffffffe264a3a5, $r18=0x0000000002f29ef3
++after:  $r13=0xffffffffffded923, $r9=0xffffffffe264a3a5, $r18=0x0000000002f29ef3
++div.wu $r24, $r5, $r16 ::
++before: $r24=0x000000000ddf57c5, $r5=0x000000006b1a808c, $r16=0x000000000576fe70
++after:  $r24=0x0000000000000013, $r5=0x000000006b1a808c, $r16=0x000000000576fe70
++div.wu $r26, $r7, $r9 ::
++before: $r26=0x00000000665e82ff, $r7=0x00000000344d887f, $r9=0x000000007fd6d6d8
++after:  $r26=000000000000000000, $r7=0x00000000344d887f, $r9=0x000000007fd6d6d8
++div.wu $r13, $r18, $r15 ::
++before: $r13=0xffffffffe82e2cf8, $r18=0x000000007c66b628, $r15=0x000000000305c899
++after:  $r13=0x0000000000000029, $r18=0x000000007c66b628, $r15=0x000000000305c899
++div.wu $r15, $r14, $r7 ::
++before: $r15=0x0000000000b06b1f, $r14=0x0000000056016282, $r7=0x00000000095a8701
++after:  $r15=0x0000000000000009, $r14=0x0000000056016282, $r7=0x00000000095a8701
++div.wu $r19, $r12, $r31 ::
++before: $r19=0xffffffffb3a487d1, $r12=0xffffffffbe2fe16e, $r31=0xffffffff8dc0ff7f
++after:  $r19=0x0000000000000001, $r12=0xffffffffbe2fe16e, $r31=0xffffffff8dc0ff7f
++div.wu $r6, $r10, $r20 ::
++before: $r6=0x000000001bb491e9, $r10=0x00000000064e382e, $r20=0x000000005977f9f1
++after:  $r6=000000000000000000, $r10=0x00000000064e382e, $r20=0x000000005977f9f1
++div.wu $r9, $r29, $r28 ::
++before: $r9=0x00000000498c3349, $r29=0x0000000014cbb257, $r28=0xffffffff95165a4a
++after:  $r9=000000000000000000, $r29=0x0000000014cbb257, $r28=0xffffffff95165a4a
++div.wu $r10, $r29, $r15 ::
++before: $r10=0xffffffffbb3f9c5d, $r29=0x000000002755057d, $r15=0x0000000014039cc4
++after:  $r10=0x0000000000000001, $r29=0x000000002755057d, $r15=0x0000000014039cc4
++div.wu $r24, $r31, $r7 ::
++before: $r24=0xffffffffe5a9a3cd, $r31=0xffffffffa1f84b49, $r7=0xffffffffe45bd3b9
++after:  $r24=000000000000000000, $r31=0xffffffffa1f84b49, $r7=0xffffffffe45bd3b9
++div.wu $r23, $r18, $r6 ::
++before: $r23=0x0000000054e07e9f, $r18=0xffffffffaccbdd8c, $r6=0xfffffffff3729b57
++after:  $r23=000000000000000000, $r18=0xffffffffaccbdd8c, $r6=0xfffffffff3729b57
++mod.wu $r5, $r20, $r18 ::
++before: $r5=0xffffffffa1ce2e4e, $r20=0xffffffffdbeb0e2d, $r18=0x0000000070157135
++after:  $r5=0x000000006bd59cf8, $r20=0xffffffffdbeb0e2d, $r18=0x0000000070157135
++mod.wu $r14, $r30, $r17 ::
++before: $r14=0x0000000010e75d07, $r30=0x00000000039c3080, $r17=0x000000001658d87b
++after:  $r14=0x00000000039c3080, $r30=0x00000000039c3080, $r17=0x000000001658d87b
++mod.wu $r28, $r7, $r4 ::
++before: $r28=0x000000006df194db, $r7=0x0000000055fae7c9, $r4=0xffffffff9a87c1ef
++after:  $r28=0x0000000055fae7c9, $r7=0x0000000055fae7c9, $r4=0xffffffff9a87c1ef
++mod.wu $r6, $r14, $r10 ::
++before: $r6=0xffffffff8feb78cc, $r14=0xffffffffe5032316, $r10=0x0000000018ab441e
++after:  $r6=0x0000000006fdbe08, $r14=0xffffffffe5032316, $r10=0x0000000018ab441e
++mod.wu $r13, $r15, $r9 ::
++before: $r13=0xffffffffbb28952c, $r15=0x000000002d43f57d, $r9=0x000000002dfbf584
++after:  $r13=0x000000002d43f57d, $r15=0x000000002d43f57d, $r9=0x000000002dfbf584
++mod.wu $r7, $r30, $r5 ::
++before: $r7=0x0000000009bfb2cf, $r30=0x000000006595d7b3, $r5=0xfffffffffffd1025
++after:  $r7=0x000000006595d7b3, $r30=0x000000006595d7b3, $r5=0xfffffffffffd1025
++mod.wu $r10, $r9, $r16 ::
++before: $r10=0x00000000342671c6, $r9=0xfffffffff1ff8be3, $r16=0xfffffffffaea052b
++after:  $r10=0xfffffffff1ff8be3, $r9=0xfffffffff1ff8be3, $r16=0xfffffffffaea052b
++mod.wu $r16, $r16, $r23 ::
++before: $r16=0xffffffffc0356055, $r16=0x000000002ac1f414, $r23=0x000000004a75c890
++after:  $r16=0x000000002ac1f414, $r16=0x000000002ac1f414, $r23=0x000000004a75c890
++mod.wu $r19, $r8, $r7 ::
++before: $r19=0xfffffffff8ed6580, $r8=0x000000005fef460e, $r7=0x0000000068eedef2
++after:  $r19=0x000000005fef460e, $r8=0x000000005fef460e, $r7=0x0000000068eedef2
++mod.wu $r29, $r25, $r25 ::
++before: $r29=0xffffffff9ea76eb0, $r25=0xffffffff818904b9, $r25=0xffffffffe92f4f30
++after:  $r29=000000000000000000, $r25=0xffffffffe92f4f30, $r25=0xffffffffe92f4f30
++div.d $r7, $r17, $r7 ::
++before: $r7=0xc8f25fb958f2d668, $r17=0x074a14cbaa00fdea, $r7=0xcf95f3de82ceb015
++after:  $r7=000000000000000000, $r17=0x074a14cbaa00fdea, $r7=000000000000000000
++div.d $r10, $r19, $r12 ::
++before: $r10=0x9ead8a6f6ea63534, $r19=0xaf80d344d48e6cd5, $r12=0xe1f40f759cbfe0e7
++after:  $r10=0x0000000000000002, $r19=0xaf80d344d48e6cd5, $r12=0xe1f40f759cbfe0e7
++div.d $r23, $r28, $r28 ::
++before: $r23=0x35481a5285093e04, $r28=0xfd79e3c19b697fa8, $r28=0x6ffab603b9e1b7fb
++after:  $r23=0x0000000000000001, $r28=0x6ffab603b9e1b7fb, $r28=0x6ffab603b9e1b7fb
++div.d $r30, $r25, $r4 ::
++before: $r30=0x3eacf1d695a34b95, $r25=0xfbff957ab051d494, $r4=0x0670724b8930d53f
++after:  $r30=000000000000000000, $r25=0xfbff957ab051d494, $r4=0x0670724b8930d53f
++div.d $r31, $r29, $r6 ::
++before: $r31=0xce8d3df48871d655, $r29=0xf351f7f35927e83d, $r6=0x93a3085686f4101f
++after:  $r31=000000000000000000, $r29=0xf351f7f35927e83d, $r6=0x93a3085686f4101f
++div.d $r17, $r23, $r8 ::
++before: $r17=0xfc913f8b14dda5a5, $r23=0x001f938af81988de, $r8=0x9d021a9f06b46953
++after:  $r17=000000000000000000, $r23=0x001f938af81988de, $r8=0x9d021a9f06b46953
++div.d $r7, $r29, $r15 ::
++before: $r7=0x4593da2923f2ac5b, $r29=0x11fc5a958b182a55, $r15=0x2edafaf2857c6697
++after:  $r7=000000000000000000, $r29=0x11fc5a958b182a55, $r15=0x2edafaf2857c6697
++div.d $r13, $r31, $r27 ::
++before: $r13=0x97236145608dd8c3, $r31=0x1f0ee96afd23910b, $r27=0xe35e4d5efd2204d3
++after:  $r13=0xffffffffffffffff, $r31=0x1f0ee96afd23910b, $r27=0xe35e4d5efd2204d3
++div.d $r13, $r26, $r14 ::
++before: $r13=0x2c057bd222f216df, $r26=0x1e006853720971c3, $r14=0x81e35a993e6a15b5
++after:  $r13=000000000000000000, $r26=0x1e006853720971c3, $r14=0x81e35a993e6a15b5
++div.d $r5, $r9, $r4 ::
++before: $r5=0x93c0d85c66f2c5ab, $r9=0x774fbe894b2ed067, $r4=0x2c46387d55732742
++after:  $r5=0x0000000000000002, $r9=0x774fbe894b2ed067, $r4=0x2c46387d55732742
++mod.d $r19, $r26, $r16 ::
++before: $r19=0x63304d2181f4a4da, $r26=0x9ed948849ddee475, $r16=0x18a360d3ab980398
++after:  $r19=0xe8c36affa0a6ef3d, $r26=0x9ed948849ddee475, $r16=0x18a360d3ab980398
++mod.d $r27, $r23, $r13 ::
++before: $r27=0xf7156e74db7a8d92, $r23=0x324e7001287ce2a8, $r13=0x3cc7524686bed31c
++after:  $r27=0x324e7001287ce2a8, $r23=0x324e7001287ce2a8, $r13=0x3cc7524686bed31c
++mod.d $r8, $r26, $r19 ::
++before: $r8=0x7bda37a222135803, $r26=0x1daf8fd66ff987ed, $r19=0x334631279104fc3b
++after:  $r8=0x1daf8fd66ff987ed, $r26=0x1daf8fd66ff987ed, $r19=0x334631279104fc3b
++mod.d $r25, $r15, $r7 ::
++before: $r25=0xd1a0f45d5b463d53, $r15=0x9c4cd7bef3bf0712, $r7=0x420a5c702006f3cc
++after:  $r25=0xde57342f13c5fade, $r15=0x9c4cd7bef3bf0712, $r7=0x420a5c702006f3cc
++mod.d $r25, $r18, $r7 ::
++before: $r25=0x93487a905cb08a75, $r18=0x8c79cafa8bebf0a8, $r7=0x1478409d192c144b
++after:  $r25=0xf2d30e0c09c8561f, $r18=0x8c79cafa8bebf0a8, $r7=0x1478409d192c144b
++mod.d $r8, $r27, $r27 ::
++before: $r8=0x8756a1690dd7896d, $r27=0x35273279ea76319f, $r27=0xc5292f2331abc6dd
++after:  $r8=000000000000000000, $r27=0xc5292f2331abc6dd, $r27=0xc5292f2331abc6dd
++mod.d $r15, $r10, $r24 ::
++before: $r15=0xf8c476adbc930802, $r10=0x8b5832bcd0f6c87e, $r24=0x6cba54a72da38702
++after:  $r15=0xf8128763fe9a4f80, $r10=0x8b5832bcd0f6c87e, $r24=0x6cba54a72da38702
++mod.d $r27, $r7, $r6 ::
++before: $r27=0x2387015bddb2c076, $r7=0x231e30de7a72ad90, $r6=0x81f1285973e8dc11
++after:  $r27=0x231e30de7a72ad90, $r7=0x231e30de7a72ad90, $r6=0x81f1285973e8dc11
++mod.d $r16, $r9, $r12 ::
++before: $r16=0x3388d23c07feb1da, $r9=0xe8c01f744b310474, $r12=0xa29071d702959009
++after:  $r16=0xe8c01f744b310474, $r9=0xe8c01f744b310474, $r12=0xa29071d702959009
++mod.d $r13, $r10, $r20 ::
++before: $r13=0xbd45a261f8de4fe4, $r10=0x6fb0a8c9a2681a8e, $r20=0x2f1b7055cf2409ec
++after:  $r13=0x1179c81e042006b6, $r10=0x6fb0a8c9a2681a8e, $r20=0x2f1b7055cf2409ec
++div.du $r17, $r10, $r24 ::
++before: $r17=0x4d363fd48a626fda, $r10=0x7ccdeeaa6c24885f, $r24=0xfcc68e72f59750ae
++after:  $r17=000000000000000000, $r10=0x7ccdeeaa6c24885f, $r24=0xfcc68e72f59750ae
++div.du $r20, $r20, $r10 ::
++before: $r20=0x808fa5cb6a75fd6f, $r20=0x0f3f712970031005, $r10=0x1709a8adab2fa578
++after:  $r20=000000000000000000, $r20=000000000000000000, $r10=0x1709a8adab2fa578
++div.du $r15, $r14, $r19 ::
++before: $r15=0xcd3107423486c8fe, $r14=0xf6bc56277282cd14, $r19=0x0961ac833f00f3e3
++after:  $r15=0x000000000000001a, $r14=0xf6bc56277282cd14, $r19=0x0961ac833f00f3e3
++div.du $r4, $r29, $r18 ::
++before: $r4=0xa0bfc2fc5b35fa79, $r29=0x2b28c09aa5f12845, $r18=0xed44da2fdf5dce00
++after:  $r4=000000000000000000, $r29=0x2b28c09aa5f12845, $r18=0xed44da2fdf5dce00
++div.du $r4, $r6, $r25 ::
++before: $r4=0x1fc6e23fd0f09ed0, $r6=0xeaa71d9fb42223ca, $r25=0x045689545e60381c
++after:  $r4=0x0000000000000036, $r6=0xeaa71d9fb42223ca, $r25=0x045689545e60381c
++div.du $r10, $r8, $r12 ::
++before: $r10=0xa3710c512d4c006c, $r8=0xc011778733c50a6e, $r12=0xb44475ee048d8167
++after:  $r10=0x0000000000000001, $r8=0xc011778733c50a6e, $r12=0xb44475ee048d8167
++div.du $r29, $r4, $r29 ::
++before: $r29=0x46d27abff0da1972, $r4=0x17a4e863a182dcd0, $r29=0x59a7b82980ac6a6d
++after:  $r29=000000000000000000, $r4=0x17a4e863a182dcd0, $r29=000000000000000000
++div.du $r15, $r8, $r30 ::
++before: $r15=0x68120919dbbd9b19, $r8=0x4c296c89a6f7a6df, $r30=0x9d9166c1cd0eecfa
++after:  $r15=000000000000000000, $r8=0x4c296c89a6f7a6df, $r30=0x9d9166c1cd0eecfa
++div.du $r7, $r18, $r17 ::
++before: $r7=0xd2389cb7af92be89, $r18=0x9a1f65b2c59cfda3, $r17=0xe316cf92f8f0574f
++after:  $r7=000000000000000000, $r18=0x9a1f65b2c59cfda3, $r17=0xe316cf92f8f0574f
++div.du $r15, $r25, $r17 ::
++before: $r15=0x49651d72d87da955, $r25=0xd22c499c27908743, $r17=0x08d824b01058ecb8
++after:  $r15=0x0000000000000017, $r25=0xd22c499c27908743, $r17=0x08d824b01058ecb8
++mod.du $r26, $r8, $r23 ::
++before: $r26=0xb0bd66f10c34fe23, $r8=0x5eb9b775d83b4893, $r23=0x08867d4b638f2622
++after:  $r26=0x00f255389114a51d, $r8=0x5eb9b775d83b4893, $r23=0x08867d4b638f2622
++mod.du $r8, $r10, $r25 ::
++before: $r8=0xe236349cd47eeb11, $r10=0x119102fd7b236a81, $r25=0x08fd72a09e4fb45f
++after:  $r8=0x0893905cdcd3b622, $r10=0x119102fd7b236a81, $r25=0x08fd72a09e4fb45f
++mod.du $r25, $r4, $r5 ::
++before: $r25=0x1b669725a0c3a970, $r4=0x0175359099c87b83, $r5=0xcad295c79f1d835a
++after:  $r25=0x0175359099c87b83, $r4=0x0175359099c87b83, $r5=0xcad295c79f1d835a
++mod.du $r7, $r28, $r20 ::
++before: $r7=0x7117e70798869df4, $r28=0xe35b93aa0c37fe97, $r20=0x741084dead7970d0
++after:  $r7=0x6f4b0ecb5ebe8dc7, $r28=0xe35b93aa0c37fe97, $r20=0x741084dead7970d0
++mod.du $r30, $r24, $r9 ::
++before: $r30=0xc4d432a8ce91f693, $r24=0x77c03aceb2ea6b45, $r9=0xb8cd7773fb72b7ca
++after:  $r30=0x77c03aceb2ea6b45, $r24=0x77c03aceb2ea6b45, $r9=0xb8cd7773fb72b7ca
++mod.du $r23, $r9, $r28 ::
++before: $r23=0x13f1f3e1891b6b73, $r9=0x9811699becce53a9, $r28=0xed15e264f0c39b88
++after:  $r23=0x9811699becce53a9, $r9=0x9811699becce53a9, $r28=0xed15e264f0c39b88
++mod.du $r13, $r12, $r14 ::
++before: $r13=0xb8b22bcb0cb970e8, $r12=0x16cdecd7c0091cd2, $r14=0x4fcab819ebadbdfd
++after:  $r13=0x16cdecd7c0091cd2, $r12=0x16cdecd7c0091cd2, $r14=0x4fcab819ebadbdfd
++mod.du $r30, $r17, $r12 ::
++before: $r30=0xbf96226d2de1240d, $r17=0x9fe4b2c7557d6b9a, $r12=0x3668e581a5de6efd
++after:  $r30=0x3312e7c409c08da0, $r17=0x9fe4b2c7557d6b9a, $r12=0x3668e581a5de6efd
++mod.du $r14, $r4, $r6 ::
++before: $r14=0x9bc8f8a69a7f55c2, $r4=0x530a9c5a21769bab, $r6=0x2805bef72d33cbd5
++after:  $r14=0x02ff1e6bc70f0401, $r4=0x530a9c5a21769bab, $r6=0x2805bef72d33cbd5
++mod.du $r23, $r28, $r12 ::
++before: $r23=0x82a854f86e642cba, $r28=0x0dd0fd63485d6c3d, $r12=0x56b21f15cb9d2bf2
++after:  $r23=0x0dd0fd63485d6c3d, $r28=0x0dd0fd63485d6c3d, $r12=0x56b21f15cb9d2bf2
++alsl.w $r18, $r10, $r15, 2 ::
++before: $r18=0xafb40df16156827b, $r10=0x9b0b86116a0d89cb, $r15=0x80086c066ea6842b
++after:  $r18=0x0000000016dcab57, $r10=0x9b0b86116a0d89cb, $r15=0x80086c066ea6842b
++alsl.w $r24, $r5, $r4, 2 ::
++before: $r24=0xb8b63b8205a919df, $r5=0x7319260322fa2d6d, $r4=0x1efce6644a51ebf9
++after:  $r24=0xffffffffd63aa1ad, $r5=0x7319260322fa2d6d, $r4=0x1efce6644a51ebf9
++alsl.w $r24, $r5, $r27, 2 ::
++before: $r24=0xb4f0fd355869e078, $r5=0x26abeea20b7d1ac1, $r27=0x4108f7f27e321c8f
++after:  $r24=0xffffffffac268793, $r5=0x26abeea20b7d1ac1, $r27=0x4108f7f27e321c8f
++alsl.w $r24, $r29, $r10, 1 ::
++before: $r24=0x4b948e9a0b82df22, $r29=0x11893c9dd43d0112, $r10=0x51a030165671a055
++after:  $r24=0xfffffffffeeba279, $r29=0x11893c9dd43d0112, $r10=0x51a030165671a055
++alsl.w $r5, $r10, $r18, 1 ::
++before: $r5=0xfc253ac9e2b55590, $r10=0x2682507563a85b07, $r18=0xa467083f66457d1d
++after:  $r5=0x000000002d96332b, $r10=0x2682507563a85b07, $r18=0xa467083f66457d1d
++alsl.w $r20, $r13, $r10, 3 ::
++before: $r20=0x76e8c346a721cdab, $r13=0x548f2762bfb1bc01, $r10=0xa6e0d27e62dcc594
++after:  $r20=0x00000000606aa59c, $r13=0x548f2762bfb1bc01, $r10=0xa6e0d27e62dcc594
++alsl.w $r16, $r6, $r24, 3 ::
++before: $r16=0x039f77b88fc3b663, $r6=0x281818bf4a36a7e5, $r24=0x86cd2a06ef475a61
++after:  $r16=0x0000000040fc9989, $r6=0x281818bf4a36a7e5, $r24=0x86cd2a06ef475a61
++alsl.w $r14, $r18, $r9, 4 ::
++before: $r14=0x08a58ea94346ff16, $r18=0x4ff191f91397adea, $r9=0x4cda359b03c97a53
++after:  $r14=0x000000003d4458f3, $r18=0x4ff191f91397adea, $r9=0x4cda359b03c97a53
++alsl.w $r8, $r6, $r29, 1 ::
++before: $r8=0xae0bfa182556c725, $r6=0xda179bc2f41d03d3, $r29=0x1d23e4da08af7978
++after:  $r8=0xfffffffff0e9811e, $r6=0xda179bc2f41d03d3, $r29=0x1d23e4da08af7978
++alsl.w $r31, $r26, $r30, 1 ::
++before: $r31=0xd6af9fcd7ffd8e75, $r26=0x3e88bb77d6665633, $r30=0x23a0414c69b804c1
++after:  $r31=0x000000001684b127, $r26=0x3e88bb77d6665633, $r30=0x23a0414c69b804c1
++alsl.wu $r20, $r24, $r18, 2 ::
++before: $r20=0xc714872ff3c39370, $r24=0xcaea31ddabb275f9, $r18=0xedbfc2cedca8eb7a
++after:  $r20=0x000000008b72c35e, $r24=0xcaea31ddabb275f9, $r18=0xedbfc2cedca8eb7a
++alsl.wu $r13, $r26, $r15, 3 ::
++before: $r13=0xe1a0ba1adcb75aa4, $r26=0x8adbed432acf321a, $r15=0xeae447eaa60bb142
++after:  $r13=0x00000000fc854212, $r26=0x8adbed432acf321a, $r15=0xeae447eaa60bb142
++alsl.wu $r4, $r17, $r27, 3 ::
++before: $r4=0xb153f9ecea23068c, $r17=0xd2066b089c9499a3, $r27=0x36ed3c96ac4751aa
++after:  $r4=0x0000000090ec1ec2, $r17=0xd2066b089c9499a3, $r27=0x36ed3c96ac4751aa
++alsl.wu $r20, $r10, $r4, 4 ::
++before: $r20=0x8fb2705357e98d66, $r10=0xd353329585fc71dd, $r4=0x739237ed6a677f00
++after:  $r20=0x00000000ca2e9cd0, $r10=0xd353329585fc71dd, $r4=0x739237ed6a677f00
++alsl.wu $r31, $r12, $r23, 2 ::
++before: $r31=0x6caac60acd9bc6f4, $r12=0xc87131b9171530df, $r23=0x39c8e321a6e131c0
++after:  $r31=0x000000000335f53c, $r12=0xc87131b9171530df, $r23=0x39c8e321a6e131c0
++alsl.wu $r13, $r14, $r19, 2 ::
++before: $r13=0xd2c7072036f54e45, $r14=0x35ea1627556f8f98, $r19=0x97054728433042d3
++after:  $r13=0x0000000098ee8133, $r14=0x35ea1627556f8f98, $r19=0x97054728433042d3
++alsl.wu $r7, $r14, $r5, 1 ::
++before: $r7=0x5a0f1fae80105d64, $r14=0xd300b74879e33a53, $r5=0x3a1e7389d0669d4c
++after:  $r7=0x00000000c42d11f2, $r14=0xd300b74879e33a53, $r5=0x3a1e7389d0669d4c
++alsl.wu $r28, $r4, $r9, 1 ::
++before: $r28=0xcd7fd8389b4f4062, $r4=0xad1830d644c205e7, $r9=0xced1c031d73f9087
++after:  $r28=0x0000000060c39c55, $r4=0xad1830d644c205e7, $r9=0xced1c031d73f9087
++alsl.wu $r13, $r9, $r29, 4 ::
++before: $r13=0x081601560f53b081, $r9=0xd3ee3c45f08cd218, $r29=0xa7d5a43a1df2aa1d
++after:  $r13=0x0000000026bfcb9d, $r9=0xd3ee3c45f08cd218, $r29=0xa7d5a43a1df2aa1d
++alsl.wu $r30, $r29, $r31, 2 ::
++before: $r30=0xf383bd5bfae7e46d, $r29=0x67862a0151c65567, $r31=0x9cdcbf604f46c48a
++after:  $r30=0x0000000096601a26, $r29=0x67862a0151c65567, $r31=0x9cdcbf604f46c48a
++alsl.d $r18, $r28, $r16, 4 ::
++before: $r18=0x53e533e973dfa49c, $r28=0x6665a9d32abaaf55, $r16=0xf70490874fb75e6e
++after:  $r18=0x5d5f2db9fb6253be, $r28=0x6665a9d32abaaf55, $r16=0xf70490874fb75e6e
++alsl.d $r10, $r30, $r18, 2 ::
++before: $r10=0xfb14c3e6acd722c3, $r30=0xcae19862ab088fcc, $r18=0x87c434d85259d923
++after:  $r10=0xb34a9662fe7c1853, $r30=0xcae19862ab088fcc, $r18=0x87c434d85259d923
++alsl.d $r17, $r25, $r26, 1 ::
++before: $r17=0x95e79a567c313ec7, $r25=0x83a0e706c2c4c534, $r26=0x2f49f1e9d5b91fc9
++after:  $r17=0x368bbff75b42aa31, $r25=0x83a0e706c2c4c534, $r26=0x2f49f1e9d5b91fc9
++alsl.d $r7, $r24, $r24, 2 ::
++before: $r7=0x35b966d0db9f681c, $r24=0xc0bc97593f1054fc, $r24=0x7e564928b0a53ac6
++after:  $r7=0x77af6dcb733a25de, $r24=0x7e564928b0a53ac6, $r24=0x7e564928b0a53ac6
++alsl.d $r6, $r30, $r24, 3 ::
++before: $r6=0x38ad1fb21e071421, $r30=0xb959c439b0436d6d, $r24=0x647c742c9ce02fc5
++after:  $r6=0x2f4a95fa1efb9b2d, $r30=0xb959c439b0436d6d, $r24=0x647c742c9ce02fc5
++alsl.d $r18, $r28, $r10, 2 ::
++before: $r18=0x1bde2962dc5bb68b, $r28=0x67c403d00c9389bd, $r10=0x8fc18921f225d05a
++after:  $r18=0x2ed198622473f74e, $r28=0x67c403d00c9389bd, $r10=0x8fc18921f225d05a
++alsl.d $r8, $r27, $r15, 3 ::
++before: $r8=0x5b8de9d8b393fa06, $r27=0x393ec1c28e89e9d8, $r15=0x1a59f9d852c3f8ba
++after:  $r8=0xe45007ecc713477a, $r27=0x393ec1c28e89e9d8, $r15=0x1a59f9d852c3f8ba
++alsl.d $r27, $r24, $r6, 4 ::
++before: $r27=0x72195c1ca51cc4db, $r24=0x4ee5b51e1e161ab2, $r6=0x08a10acb4b625fef
++after:  $r27=0xf6fc5cad2cc40b0f, $r24=0x4ee5b51e1e161ab2, $r6=0x08a10acb4b625fef
++alsl.d $r29, $r4, $r18, 2 ::
++before: $r29=0xf3ed9e39d83d3dec, $r4=0xa3816509b9a6c23d, $r18=0x6949e8e534450dd5
++after:  $r29=0xf74f7d0c1ae016c9, $r4=0xa3816509b9a6c23d, $r18=0x6949e8e534450dd5
++alsl.d $r16, $r13, $r8, 1 ::
++before: $r16=0x588f388f25a342df, $r13=0xde33a74109c7be30, $r8=0x8b02cf06997a065a
++after:  $r16=0x476a1d88ad0982ba, $r13=0xde33a74109c7be30, $r8=0x8b02cf06997a065a
++lu12i.w $r9, 94146 ::
++before: $r9=0xdf45bd002ccf48e1
++after:  $r9=0x0000000016fc2000
++lu12i.w $r10, 129014 ::
++before: $r10=0xa5138a37d09ada8a
++after:  $r10=0x000000001f7f6000
++lu12i.w $r18, -130138 ::
++before: $r18=0x0efe46a52b8b3e5e
++after:  $r18=0xffffffffe03a6000
++lu12i.w $r7, -467080 ::
++before: $r7=0x29084adf6d033a88
++after:  $r7=0xffffffff8df78000
++lu12i.w $r10, 360675 ::
++before: $r10=0xe9072e7fec2a5d1c
++after:  $r10=0x00000000580e3000
++lu12i.w $r28, 205272 ::
++before: $r28=0x2f7d41c7bd959cd5
++after:  $r28=0x00000000321d8000
++lu12i.w $r16, -266298 ::
++before: $r16=0xcb48200d89b48566
++after:  $r16=0xffffffffbefc6000
++lu12i.w $r12, -186346 ::
++before: $r12=0xd605223c244f4a50
++after:  $r12=0xffffffffd2816000
++lu12i.w $r15, 247864 ::
++before: $r15=0x22c035c8c90016be
++after:  $r15=0x000000003c838000
++lu12i.w $r20, -511005 ::
++before: $r20=0x6b2fd1aa0b603fec
++after:  $r20=0xffffffff833e3000
++lu32i.d $r8, -310956 ::
++before: $r8=0xb331616751ed8877
++after:  $r8=0xfffb415451ed8877
++lu32i.d $r17, 35590 ::
++before: $r17=0x0e49bab8d80e1dd7
++after:  $r17=0x00008b06d80e1dd7
++lu32i.d $r4, 500474 ::
++before: $r4=0x842cdc9ac0a0adf6
++after:  $r4=0x0007a2fac0a0adf6
++lu32i.d $r23, -447277 ::
++before: $r23=0xc9ca69b8e5ab079e
++after:  $r23=0xfff92cd3e5ab079e
++lu32i.d $r12, -503028 ::
++before: $r12=0x27d83e1c77dec50a
++after:  $r12=0xfff8530c77dec50a
++lu32i.d $r26, -355708 ::
++before: $r26=0xc00dcc918a89f350
++after:  $r26=0xfffa92848a89f350
++lu32i.d $r16, -231989 ::
++before: $r16=0xd180188cdc073491
++after:  $r16=0xfffc75cbdc073491
++lu32i.d $r26, 250642 ::
++before: $r26=0x4efae034432bbb3b
++after:  $r26=0x0003d312432bbb3b
++lu32i.d $r15, 237105 ::
++before: $r15=0x7bf2141e673e336f
++after:  $r15=0x00039e31673e336f
++lu32i.d $r4, -312071 ::
++before: $r4=0x187c50bfc5eb8f32
++after:  $r4=0xfffb3cf9c5eb8f32
++lu52i.d $r8, $r25, 1920 ::
++before: $r8=0x1da74dfcb33d471a, $r25=0x453ae9f1200f4d41
++after:  $r8=0x780ae9f1200f4d41, $r25=0x453ae9f1200f4d41
++lu52i.d $r14, $r25, -2008 ::
++before: $r14=0x5e954055ebaec78f, $r25=0xb7637f9119e12e31
++after:  $r14=0x82837f9119e12e31, $r25=0xb7637f9119e12e31
++lu52i.d $r26, $r24, -1803 ::
++before: $r26=0xead69e40b96b23bf, $r24=0x779862b03d1ab575
++after:  $r26=0x8f5862b03d1ab575, $r24=0x779862b03d1ab575
++lu52i.d $r5, $r25, -1406 ::
++before: $r5=0x452236306da7c667, $r25=0x9f16a6e48cca3a7b
++after:  $r5=0xa826a6e48cca3a7b, $r25=0x9f16a6e48cca3a7b
++lu52i.d $r26, $r23, -667 ::
++before: $r26=0x5604b9744291e45a, $r23=0x70eecb3116b1795c
++after:  $r26=0xd65ecb3116b1795c, $r23=0x70eecb3116b1795c
++lu52i.d $r14, $r27, -1221 ::
++before: $r14=0x6d9a8cfe459c1c48, $r27=0x85452bdd40205e0d
++after:  $r14=0xb3b52bdd40205e0d, $r27=0x85452bdd40205e0d
++lu52i.d $r25, $r8, 423 ::
++before: $r25=0x1a8d72e42f68a33d, $r8=0x7089b6fe4c1f7a70
++after:  $r25=0x1a79b6fe4c1f7a70, $r8=0x7089b6fe4c1f7a70
++lu52i.d $r30, $r10, -177 ::
++before: $r30=0x7c4fe646acac7ac0, $r10=0xe7d222ba1fd5cae2
++after:  $r30=0xf4f222ba1fd5cae2, $r10=0xe7d222ba1fd5cae2
++lu52i.d $r6, $r13, -1438 ::
++before: $r6=0xdb3d6a615a9e492f, $r13=0xaa9303648ff489f2
++after:  $r6=0xa62303648ff489f2, $r13=0xaa9303648ff489f2
++lu52i.d $r25, $r4, -634 ::
++before: $r25=0x8b41b813d85b8ee8, $r4=0xe4d31961e42e713c
++after:  $r25=0xd8631961e42e713c, $r4=0xe4d31961e42e713c
++addi.w $r6, $r27, 1727 ::
++before: $r6=0x12845f036198fa6f, $r27=0xda77c63c764655da
++after:  $r6=0x0000000076465c99, $r27=0xda77c63c764655da
++addi.w $r9, $r8, -381 ::
++before: $r9=0x21a7e3cfa2649a4f, $r8=0xc64c73b3bd4c1dcb
++after:  $r9=0xffffffffbd4c1c4e, $r8=0xc64c73b3bd4c1dcb
++addi.w $r16, $r6, -186 ::
++before: $r16=0x6c47b02ef52a3502, $r6=0x24ca1a646dac5cc3
++after:  $r16=0x000000006dac5c09, $r6=0x24ca1a646dac5cc3
++addi.w $r20, $r31, 1503 ::
++before: $r20=0xb6144d8f9513c78e, $r31=0xc4b808764e894e6c
++after:  $r20=0x000000004e89544b, $r31=0xc4b808764e894e6c
++addi.w $r19, $r17, -1918 ::
++before: $r19=0xcf97c9215c961121, $r17=0x9b714c4cb899399b
++after:  $r19=0xffffffffb899321d, $r17=0x9b714c4cb899399b
++addi.w $r14, $r8, -1781 ::
++before: $r14=0xe1abf22f6c3c82ec, $r8=0x4110e9c1b5f59ef6
++after:  $r14=0xffffffffb5f59801, $r8=0x4110e9c1b5f59ef6
++addi.w $r29, $r18, 2047 ::
++before: $r29=0x4b64427195dda12d, $r18=0xadf5af70b7b3f37b
++after:  $r29=0xffffffffb7b3fb7a, $r18=0xadf5af70b7b3f37b
++addi.w $r4, $r30, 244 ::
++before: $r4=0xfc785d46f5bbdff4, $r30=0x1e061e9d51362d9c
++after:  $r4=0x0000000051362e90, $r30=0x1e061e9d51362d9c
++addi.w $r7, $r23, -376 ::
++before: $r7=0xe037576d82c12e8d, $r23=0xa77c8da72af708f1
++after:  $r7=0x000000002af70779, $r23=0xa77c8da72af708f1
++addi.w $r23, $r17, 1924 ::
++before: $r23=0x00a10df57c4103ef, $r17=0x26d2628746ad0a3e
++after:  $r23=0x0000000046ad11c2, $r17=0x26d2628746ad0a3e
++addi.d $r14, $r14, 152 ::
++before: $r14=0x61b497fb58a816d9, $r14=0x29eb218dd65d9d6c
++after:  $r14=0x29eb218dd65d9e04, $r14=0x29eb218dd65d9e04
++addi.d $r20, $r13, -640 ::
++before: $r20=0xd80db8387a8cdd93, $r13=0x5e23e4b01f2bbd6d
++after:  $r20=0x5e23e4b01f2bbaed, $r13=0x5e23e4b01f2bbd6d
++addi.d $r13, $r25, -743 ::
++before: $r13=0x5dfea060c6e8f587, $r25=0x95f49b783954f9f9
++after:  $r13=0x95f49b783954f712, $r25=0x95f49b783954f9f9
++addi.d $r4, $r30, 676 ::
++before: $r4=0xd72f370f6ce7bc4c, $r30=0x148550b0f97ce601
++after:  $r4=0x148550b0f97ce8a5, $r30=0x148550b0f97ce601
++addi.d $r26, $r8, 1630 ::
++before: $r26=0xa4120a67f8d6df1a, $r8=0xa83f4bbcaf5bc52e
++after:  $r26=0xa83f4bbcaf5bcb8c, $r8=0xa83f4bbcaf5bc52e
++addi.d $r20, $r29, -1971 ::
++before: $r20=0xa8f9c82780ac16d5, $r29=0x7ab169a5751642bc
++after:  $r20=0x7ab169a575163b09, $r29=0x7ab169a5751642bc
++addi.d $r8, $r8, 1160 ::
++before: $r8=0x6f22bdb480c14540, $r8=0x94e1253c331b17f2
++after:  $r8=0x94e1253c331b1c7a, $r8=0x94e1253c331b1c7a
++addi.d $r15, $r27, 844 ::
++before: $r15=0x0312473547bcfe03, $r27=0x7a786cbc8149d818
++after:  $r15=0x7a786cbc8149db64, $r27=0x7a786cbc8149d818
++addi.d $r8, $r26, -1185 ::
++before: $r8=0xee2b1be852671bc3, $r26=0x6a36d61dfee3a6fb
++after:  $r8=0x6a36d61dfee3a25a, $r26=0x6a36d61dfee3a6fb
++addi.d $r17, $r27, -2046 ::
++before: $r17=0x70e068b54ed72e20, $r27=0x922681ab8837027b
++after:  $r17=0x922681ab8836fa7d, $r27=0x922681ab8837027b
++addu16i.d $r20, $r29, -14564 ::
++before: $r20=0x8232770e3472bdc3, $r29=0x4d28c5567787c26e
++after:  $r20=0x4d28c5563ea3c26e, $r29=0x4d28c5567787c26e
++addu16i.d $r29, $r4, -3511 ::
++before: $r29=0x9076403ed2f0fdf4, $r4=0x471cafb4183a389f
++after:  $r29=0x471cafb40a83389f, $r4=0x471cafb4183a389f
++addu16i.d $r26, $r15, 25897 ::
++before: $r26=0x0dec118b1eb13234, $r15=0x06ff5ce56111b301
++after:  $r26=0x06ff5ce5c63ab301, $r15=0x06ff5ce56111b301
++addu16i.d $r9, $r5, -21829 ::
++before: $r9=0x73209239d98fb81a, $r5=0x1dc8f0ba4710eba3
++after:  $r9=0x1dc8f0b9f1cbeba3, $r5=0x1dc8f0ba4710eba3
++addu16i.d $r28, $r25, -23832 ::
++before: $r28=0xa39ba8429a9c13a6, $r25=0x4fffb32851c13ff2
++after:  $r28=0x4fffb327f4a93ff2, $r25=0x4fffb32851c13ff2
++addu16i.d $r23, $r30, -32189 ::
++before: $r23=0x08abd919f5ea43b1, $r30=0x40078826f7336f0e
++after:  $r23=0x4007882679766f0e, $r30=0x40078826f7336f0e
++addu16i.d $r28, $r24, 16372 ::
++before: $r28=0x695e543e25e7d3e4, $r24=0x30279db606efa8ec
++after:  $r28=0x30279db646e3a8ec, $r24=0x30279db606efa8ec
++addu16i.d $r4, $r18, -28041 ::
++before: $r4=0xa125cadb71209757, $r18=0xff287b5e7fb2a2ba
++after:  $r4=0xff287b5e1229a2ba, $r18=0xff287b5e7fb2a2ba
++addu16i.d $r5, $r17, -11268 ::
++before: $r5=0xd5d3e6da7c594ca9, $r17=0x2bc9be0ef252584c
++after:  $r5=0x2bc9be0ec64e584c, $r17=0x2bc9be0ef252584c
++addu16i.d $r29, $r28, -15645 ::
++before: $r29=0x0ee0391151007613, $r28=0xae616c39d87c4b6e
++after:  $r29=0xae616c399b5f4b6e, $r28=0xae616c39d87c4b6e
++andi $r28, $r18, 1288 ::
++before: $r28=0xd62f833fbbd483b3, $r18=0xa2f268cdcf18dd00
++after:  $r28=0x0000000000000500, $r18=0xa2f268cdcf18dd00
++andi $r12, $r13, 153 ::
++before: $r12=0xc40efc9a74a3a13b, $r13=0xfd609200795f877c
++after:  $r12=0x0000000000000018, $r13=0xfd609200795f877c
++andi $r6, $r18, 3633 ::
++before: $r6=0x79ee7ee7a7865b79, $r18=0x644bec92dca1ad7f
++after:  $r6=0x0000000000000c31, $r18=0x644bec92dca1ad7f
++andi $r5, $r31, 3299 ::
++before: $r5=0x2d64be0e5c2ec0f6, $r31=0x87253b6589f182c7
++after:  $r5=0x00000000000000c3, $r31=0x87253b6589f182c7
++andi $r28, $r5, 3189 ::
++before: $r28=0xf2e4ed85d98a1860, $r5=0x9f58e4edd98b60d1
++after:  $r28=0x0000000000000051, $r5=0x9f58e4edd98b60d1
++andi $r18, $r29, 4031 ::
++before: $r18=0x3c067920d48cf0d2, $r29=0x2bf35e68c503ecfe
++after:  $r18=0x0000000000000cbe, $r29=0x2bf35e68c503ecfe
++andi $r20, $r24, 3252 ::
++before: $r20=0xe1d95be05fd57a64, $r24=0xd33e771521b24bd3
++after:  $r20=0x0000000000000890, $r24=0xd33e771521b24bd3
++andi $r6, $r23, 1665 ::
++before: $r6=0x23341b2d86d02365, $r23=0x16de10f2b4a45064
++after:  $r6=000000000000000000, $r23=0x16de10f2b4a45064
++andi $r27, $r14, 325 ::
++before: $r27=0xd7db9d77aea4dcf5, $r14=0x142272b737435eb7
++after:  $r27=0x0000000000000005, $r14=0x142272b737435eb7
++andi $r23, $r16, 1056 ::
++before: $r23=0x57fee53581b09718, $r16=0x02ace25d9e2ddbaa
++after:  $r23=0x0000000000000020, $r16=0x02ace25d9e2ddbaa
++ori $r26, $r13, 3251 ::
++before: $r26=0x6d47cf7e5bb5c13e, $r13=0x93aed4996805ba3b
++after:  $r26=0x93aed4996805bebb, $r13=0x93aed4996805ba3b
++ori $r10, $r25, 568 ::
++before: $r10=0x42f0332098f938af, $r25=0xd7916fe8d569567b
++after:  $r10=0xd7916fe8d569567b, $r25=0xd7916fe8d569567b
++ori $r12, $r17, 1798 ::
++before: $r12=0xc507d4150a742b76, $r17=0x2b9a102a5b5b15f7
++after:  $r12=0x2b9a102a5b5b17f7, $r17=0x2b9a102a5b5b15f7
++ori $r15, $r15, 1781 ::
++before: $r15=0xa54ad5ecc0e72adb, $r15=0x37c18ad4ec6e678c
++after:  $r15=0x37c18ad4ec6e67fd, $r15=0x37c18ad4ec6e67fd
++ori $r5, $r4, 682 ::
++before: $r5=0x1f388b2a2b18004d, $r4=0xb5fa23fbb02eeedb
++after:  $r5=0xb5fa23fbb02eeefb, $r4=0xb5fa23fbb02eeedb
++ori $r27, $r24, 1931 ::
++before: $r27=0x73b086f8a8b4d7b5, $r24=0xd23e30ab1e45470a
++after:  $r27=0xd23e30ab1e45478b, $r24=0xd23e30ab1e45470a
++ori $r28, $r6, 3593 ::
++before: $r28=0x972967beac695928, $r6=0x2c701d0bc28816c5
++after:  $r28=0x2c701d0bc2881ecd, $r6=0x2c701d0bc28816c5
++ori $r27, $r4, 3679 ::
++before: $r27=0x54fecbbf0a06e5a6, $r4=0xf0b6d846464a3331
++after:  $r27=0xf0b6d846464a3f7f, $r4=0xf0b6d846464a3331
++ori $r9, $r16, 905 ::
++before: $r9=0x71f3cd001c729062, $r16=0xc5720758095e4592
++after:  $r9=0xc5720758095e479b, $r16=0xc5720758095e4592
++ori $r26, $r7, 3473 ::
++before: $r26=0xd7ce86800c3c0f4b, $r7=0xc4a58f787cdf5bb2
++after:  $r26=0xc4a58f787cdf5fb3, $r7=0xc4a58f787cdf5bb2
++xori $r27, $r31, 2690 ::
++before: $r27=0xe6d49c2dc629fbc7, $r31=0x91832665d1a898e2
++after:  $r27=0x91832665d1a89260, $r31=0x91832665d1a898e2
++xori $r15, $r5, 697 ::
++before: $r15=0xada49c0d48beffc5, $r5=0x0e3cf426f1be4766
++after:  $r15=0x0e3cf426f1be45df, $r5=0x0e3cf426f1be4766
++xori $r9, $r20, 2268 ::
++before: $r9=0x174a71d6d3757e3e, $r20=0x25ed4678037622be
++after:  $r9=0x25ed467803762a62, $r20=0x25ed4678037622be
++xori $r31, $r15, 3817 ::
++before: $r31=0x1fac1694b40fbf2e, $r15=0x4fe4fb2e0b660ca2
++after:  $r31=0x4fe4fb2e0b66024b, $r15=0x4fe4fb2e0b660ca2
++xori $r17, $r14, 3929 ::
++before: $r17=0x2dc443400df4e153, $r14=0x1db25e602ef8ece5
++after:  $r17=0x1db25e602ef8e3bc, $r14=0x1db25e602ef8ece5
++xori $r4, $r28, 2735 ::
++before: $r4=0x5fb5ad5a84e97835, $r28=0xc52da11293641639
++after:  $r4=0xc52da11293641c96, $r28=0xc52da11293641639
++xori $r5, $r13, 1153 ::
++before: $r5=0x5c5fc4ba45da005f, $r13=0xe46f853b7d602b84
++after:  $r5=0xe46f853b7d602f05, $r13=0xe46f853b7d602b84
++xori $r30, $r26, 3867 ::
++before: $r30=0x1419915b6f92678b, $r26=0xa984612f1266da94
++after:  $r30=0xa984612f1266d58f, $r26=0xa984612f1266da94
++xori $r13, $r13, 3426 ::
++before: $r13=0xc2b8fd036ba6314b, $r13=0x4cf49604f644713c
++after:  $r13=0x4cf49604f6447c5e, $r13=0x4cf49604f6447c5e
++xori $r25, $r23, 2669 ::
++before: $r25=0xde46e3673c9a75dc, $r23=0xfa1177a89f08c81e
++after:  $r25=0xfa1177a89f08c273, $r23=0xfa1177a89f08c81e
++sll.w $r13, $r8, $r12 ::
++before: $r13=0x26131fa72f4b76f1, $r8=0xf34f7108538078d0, $r12=0x10bbd12a8e087501
++after:  $r13=0xffffffffa700f1a0, $r8=0xf34f7108538078d0, $r12=0x10bbd12a8e087501
++sll.w $r29, $r8, $r15 ::
++before: $r29=0xb6f529da4017d0d9, $r8=0x49fbfb11ef643171, $r15=0x9d0425e747d11bde
++after:  $r29=0x0000000040000000, $r8=0x49fbfb11ef643171, $r15=0x9d0425e747d11bde
++sll.w $r30, $r31, $r12 ::
++before: $r30=0xcfc5236f5c070644, $r31=0xba8301a1087b3a96, $r12=0xff7589561824e1be
++after:  $r30=0xffffffff80000000, $r31=0xba8301a1087b3a96, $r12=0xff7589561824e1be
++sll.w $r28, $r10, $r7 ::
++before: $r28=0x37fa51674df87149, $r10=0x39212605c5d0cf7d, $r7=0x18a8e323326ce5aa
++after:  $r28=0x00000000433df400, $r10=0x39212605c5d0cf7d, $r7=0x18a8e323326ce5aa
++sll.w $r8, $r9, $r14 ::
++before: $r8=0x707a9e0ece8abe40, $r9=0x94b7b20a80c16c7b, $r14=0x6887c46efb4cc181
++after:  $r8=0x000000000182d8f6, $r9=0x94b7b20a80c16c7b, $r14=0x6887c46efb4cc181
++sll.w $r8, $r4, $r24 ::
++before: $r8=0xd718a01b03a53964, $r4=0x8ebd8bfeec304e2a, $r24=0x06b4a83a6838b5d1
++after:  $r8=0xffffffff9c540000, $r4=0x8ebd8bfeec304e2a, $r24=0x06b4a83a6838b5d1
++sll.w $r23, $r31, $r27 ::
++before: $r23=0xf50cab824a06d30e, $r31=0xa8ee12cbd8dec935, $r27=0x118002b3f0cecbab
++after:  $r23=0xfffffffff649a800, $r31=0xa8ee12cbd8dec935, $r27=0x118002b3f0cecbab
++sll.w $r8, $r25, $r26 ::
++before: $r8=0x8163368243faadee, $r25=0x3a04f47bf19a4cc8, $r26=0x6a58cd3a57b4eeb4
++after:  $r8=0xffffffffcc800000, $r25=0x3a04f47bf19a4cc8, $r26=0x6a58cd3a57b4eeb4
++sll.w $r25, $r13, $r12 ::
++before: $r25=0x3d6831e1afab1b1a, $r13=0x9ee672580cb39777, $r12=0x9084acd2bc7404ca
++after:  $r25=0xffffffffce5ddc00, $r13=0x9ee672580cb39777, $r12=0x9084acd2bc7404ca
++sll.w $r20, $r5, $r29 ::
++before: $r20=0x90f7ee3ff75817a6, $r5=0xe4ae07989d6148d7, $r29=0x3e208bfcf046fffd
++after:  $r20=0xffffffffe0000000, $r5=0xe4ae07989d6148d7, $r29=0x3e208bfcf046fffd
++srl.w $r20, $r29, $r30 ::
++before: $r20=0xff3f6b79b5e2b56d, $r29=0x1195aa09fa92d26b, $r30=0xa93a8fd11ad5ae99
++after:  $r20=0x000000000000007d, $r29=0x1195aa09fa92d26b, $r30=0xa93a8fd11ad5ae99
++srl.w $r8, $r15, $r4 ::
++before: $r8=0x5d2fb7cd04ecd00c, $r15=0x47bf914b6eca2852, $r4=0x1bc63138cc45a75c
++after:  $r8=0x0000000000000006, $r15=0x47bf914b6eca2852, $r4=0x1bc63138cc45a75c
++srl.w $r20, $r12, $r18 ::
++before: $r20=0x61fa22abda7c7b02, $r12=0x9341cf09aa2e106e, $r18=0x2dea831e9e121355
++after:  $r20=0x0000000000000551, $r12=0x9341cf09aa2e106e, $r18=0x2dea831e9e121355
++srl.w $r30, $r20, $r26 ::
++before: $r30=0x43e0249584da52db, $r20=0x482a209e436cda53, $r26=0xb323a7f463f80660
++after:  $r30=0x00000000436cda53, $r20=0x482a209e436cda53, $r26=0xb323a7f463f80660
++srl.w $r31, $r16, $r28 ::
++before: $r31=0x4b10d05d93bf7288, $r16=0x6d0330e88122d7c1, $r28=0xc531cf8c92d53d03
++after:  $r31=0x0000000010245af8, $r16=0x6d0330e88122d7c1, $r28=0xc531cf8c92d53d03
++srl.w $r31, $r15, $r31 ::
++before: $r31=0xd4654233c7648c3a, $r15=0x12e6fc2a04cbf809, $r31=0xcfe1c1b558a94808
++after:  $r31=0x000000000004cbf8, $r15=0x12e6fc2a04cbf809, $r31=0x000000000004cbf8
++srl.w $r10, $r30, $r19 ::
++before: $r10=0x602dee9c45a3b99b, $r30=0x3ce0a6ac2acf19fa, $r19=0xdb5fab4bc2f82e7a
++after:  $r10=0x000000000000000a, $r30=0x3ce0a6ac2acf19fa, $r19=0xdb5fab4bc2f82e7a
++srl.w $r17, $r9, $r23 ::
++before: $r17=0x45106f11d4a57641, $r9=0x05354795b675edac, $r23=0xc67578c28ed7b6c7
++after:  $r17=0x00000000016cebdb, $r9=0x05354795b675edac, $r23=0xc67578c28ed7b6c7
++srl.w $r25, $r26, $r29 ::
++before: $r25=0x1dc3b8477fba650c, $r26=0x0814377a71768e75, $r29=0x60276c0e316db833
++after:  $r25=0x0000000000000e2e, $r26=0x0814377a71768e75, $r29=0x60276c0e316db833
++srl.w $r31, $r7, $r30 ::
++before: $r31=0x360fc92a085c2e14, $r7=0x1b44ec96def89449, $r30=0x56d6c5d85a81ed1f
++after:  $r31=0x0000000000000001, $r7=0x1b44ec96def89449, $r30=0x56d6c5d85a81ed1f
++sra.w $r10, $r17, $r19 ::
++before: $r10=0x576f2bfc771641b8, $r17=0xfb1fb20b98a54405, $r19=0xb20e9dae5a212078
++after:  $r10=0xffffffffffffff98, $r17=0xfb1fb20b98a54405, $r19=0xb20e9dae5a212078
++sra.w $r12, $r16, $r31 ::
++before: $r12=0xbfdbb9a90ccc08a0, $r16=0xb5d3c7f3b1a800a6, $r31=0x57c3ff79f3b4198b
++after:  $r12=0xfffffffffff63500, $r16=0xb5d3c7f3b1a800a6, $r31=0x57c3ff79f3b4198b
++sra.w $r18, $r16, $r5 ::
++before: $r18=0xadcb6c153538b6b1, $r16=0x99e245813e90b5e9, $r5=0x7adff58363d5ebd2
++after:  $r18=0x0000000000000fa4, $r16=0x99e245813e90b5e9, $r5=0x7adff58363d5ebd2
++sra.w $r17, $r28, $r25 ::
++before: $r17=0x7faea6a29686caf9, $r28=0x801d40ea40b19bee, $r25=0x0f5174f678600d3f
++after:  $r17=000000000000000000, $r28=0x801d40ea40b19bee, $r25=0x0f5174f678600d3f
++sra.w $r8, $r27, $r13 ::
++before: $r8=0x86e5534832150e05, $r27=0x47bb53d1cdc3560f, $r13=0x917e2b49633a0f44
++after:  $r8=0xfffffffffcdc3560, $r27=0x47bb53d1cdc3560f, $r13=0x917e2b49633a0f44
++sra.w $r26, $r18, $r20 ::
++before: $r26=0xbfb83a0d762c171a, $r18=0xbf67ed78d934d37c, $r20=0x9f377995293fcc6b
++after:  $r26=0xfffffffffffb269a, $r18=0xbf67ed78d934d37c, $r20=0x9f377995293fcc6b
++sra.w $r5, $r25, $r19 ::
++before: $r5=0x266703af59334b0f, $r25=0x4ed92cdab9f641c9, $r19=0x5da1d0b8846d1a3d
++after:  $r5=0xfffffffffffffffd, $r25=0x4ed92cdab9f641c9, $r19=0x5da1d0b8846d1a3d
++sra.w $r19, $r27, $r24 ::
++before: $r19=0x72557561b3b40007, $r27=0xd5db278ea099b3b5, $r24=0x50b4a888b898610f
++after:  $r19=0xffffffffffff4133, $r27=0xd5db278ea099b3b5, $r24=0x50b4a888b898610f
++sra.w $r16, $r10, $r4 ::
++before: $r16=0xb349f888f1809ba3, $r10=0x23d60a1fc100d89e, $r4=0xc2846cc882dbc8e2
++after:  $r16=0xfffffffff0403627, $r10=0x23d60a1fc100d89e, $r4=0xc2846cc882dbc8e2
++sra.w $r23, $r10, $r31 ::
++before: $r23=0xd7bdeddd344bb5af, $r10=0xa015a07c13ff2234, $r31=0x7c0fe410ce063a85
++after:  $r23=0x00000000009ff911, $r10=0xa015a07c13ff2234, $r31=0x7c0fe410ce063a85
++sll.d $r28, $r17, $r10 ::
++before: $r28=0x167adf26efd66416, $r17=0xb861ba6e0aadf304, $r10=0xa19e21ba0f406c33
++after:  $r28=0x9820000000000000, $r17=0xb861ba6e0aadf304, $r10=0xa19e21ba0f406c33
++sll.d $r18, $r29, $r13 ::
++before: $r18=0x3e8ea4dc3a9d9b44, $r29=0x28ccf5dfa9cdc3b2, $r13=0x33ef837a5a476bdc
++after:  $r18=0xfa9cdc3b20000000, $r29=0x28ccf5dfa9cdc3b2, $r13=0x33ef837a5a476bdc
++sll.d $r23, $r27, $r29 ::
++before: $r23=0x23e29c76deed70ca, $r27=0x09e2265d8422e78d, $r29=0xe9cc62bfd8a7c913
++after:  $r23=0x32ec21173c680000, $r27=0x09e2265d8422e78d, $r29=0xe9cc62bfd8a7c913
++sll.d $r16, $r17, $r17 ::
++before: $r16=0xf5e858c7445fcedd, $r17=0x6735e4cf2fcb78fb, $r17=0x726dd10e13b62663
++after:  $r16=0x9db1331800000000, $r17=0x726dd10e13b62663, $r17=0x726dd10e13b62663
++sll.d $r17, $r15, $r29 ::
++before: $r17=0xfc1dbfc0551f8813, $r15=0xec45100b21a74025, $r29=0x186d3b737cbfd39a
++after:  $r17=0x2c869d0094000000, $r15=0xec45100b21a74025, $r29=0x186d3b737cbfd39a
++sll.d $r19, $r15, $r9 ::
++before: $r19=0xbb01afe39a1e17b6, $r15=0x3e66dd1100acc44a, $r9=0xa9c74257f6e39cdf
++after:  $r19=0x8056622500000000, $r15=0x3e66dd1100acc44a, $r9=0xa9c74257f6e39cdf
++sll.d $r23, $r9, $r31 ::
++before: $r23=0x945b101751c38d12, $r9=0x262d14baae546199, $r31=0x7ccdd8a7840948df
++after:  $r23=0x572a30cc80000000, $r9=0x262d14baae546199, $r31=0x7ccdd8a7840948df
++sll.d $r5, $r31, $r28 ::
++before: $r5=0xa88eaecc1405995b, $r31=0xd96ed500aff4596b, $r28=0x6994841a196c562e
++after:  $r5=0x165ac00000000000, $r31=0xd96ed500aff4596b, $r28=0x6994841a196c562e
++sll.d $r27, $r10, $r25 ::
++before: $r27=0x1e9540fa8237a849, $r10=0x9aad6101b2470a60, $r25=0x90c95628696f752f
++after:  $r27=0x8530000000000000, $r10=0x9aad6101b2470a60, $r25=0x90c95628696f752f
++sll.d $r4, $r26, $r18 ::
++before: $r4=0xb4dc3cdeab2e8454, $r26=0x0d27a92db3b2906c, $r18=0x2bc7647c40c0b375
++after:  $r4=0x0d80000000000000, $r26=0x0d27a92db3b2906c, $r18=0x2bc7647c40c0b375
++srl.d $r6, $r27, $r13 ::
++before: $r6=0x66ebeca9a7fad574, $r27=0xdc837ce646ea6b51, $r13=0xa57259e1758c564b
++after:  $r6=0x001b906f9cc8dd4d, $r27=0xdc837ce646ea6b51, $r13=0xa57259e1758c564b
++srl.d $r6, $r20, $r5 ::
++before: $r6=0x091794316e6c5e65, $r20=0x0dc7c47d39d64a16, $r5=0x35f029b9942e11c8
++after:  $r6=0x000dc7c47d39d64a, $r20=0x0dc7c47d39d64a16, $r5=0x35f029b9942e11c8
++srl.d $r15, $r5, $r4 ::
++before: $r15=0xbc963842b3ebc906, $r5=0x42ea773b0bd19807, $r4=0xd05cd2c4b01ea630
++after:  $r15=0x00000000000042ea, $r5=0x42ea773b0bd19807, $r4=0xd05cd2c4b01ea630
++srl.d $r18, $r25, $r28 ::
++before: $r18=0x30d908baaa31230e, $r25=0x779272ae228746a5, $r28=0xf7b665809a3f303b
++after:  $r18=0x000000000000000e, $r25=0x779272ae228746a5, $r28=0xf7b665809a3f303b
++srl.d $r5, $r28, $r27 ::
++before: $r5=0x01f1d414f1d0f1fe, $r28=0x647277d3759d74bf, $r27=0xa5c5fce39b4a1810
++after:  $r5=0x0000647277d3759d, $r28=0x647277d3759d74bf, $r27=0xa5c5fce39b4a1810
++srl.d $r24, $r9, $r26 ::
++before: $r24=0x5fa44419162fc2c8, $r9=0x9d2a589e6f6b3440, $r26=0x810a615115238d8d
++after:  $r24=0x0004e952c4f37b59, $r9=0x9d2a589e6f6b3440, $r26=0x810a615115238d8d
++srl.d $r31, $r23, $r30 ::
++before: $r31=0xfa1a7ad64758b758, $r23=0xe3d69d99e87b4297, $r30=0x87fd8dc0a78e86bb
++after:  $r31=0x000000000000001c, $r23=0xe3d69d99e87b4297, $r30=0x87fd8dc0a78e86bb
++srl.d $r26, $r10, $r24 ::
++before: $r26=0x540888639a787231, $r10=0x168791cefeb1660a, $r24=0xd02b158115db9cdf
++after:  $r26=0x000000002d0f239d, $r10=0x168791cefeb1660a, $r24=0xd02b158115db9cdf
++srl.d $r23, $r15, $r12 ::
++before: $r23=0xff3e950565409999, $r15=0xe15a01fa0e34ea3b, $r12=0x237aba34fe552f8e
++after:  $r23=0x0003856807e838d3, $r15=0xe15a01fa0e34ea3b, $r12=0x237aba34fe552f8e
++srl.d $r8, $r16, $r4 ::
++before: $r8=0x825bafd36cc0d32e, $r16=0x321677304d1b1406, $r4=0xca68c6c83dfa5837
++after:  $r8=0x0000000000000064, $r16=0x321677304d1b1406, $r4=0xca68c6c83dfa5837
++sra.d $r23, $r19, $r16 ::
++before: $r23=0x4cab63abd8f64774, $r19=0x2c007c3ac68d7c80, $r16=0xd8f4ac963a8b2c01
++after:  $r23=0x16003e1d6346be40, $r19=0x2c007c3ac68d7c80, $r16=0xd8f4ac963a8b2c01
++sra.d $r18, $r30, $r25 ::
++before: $r18=0x531de73fca30361a, $r30=0x2857ba730cd281ff, $r25=0xacab0fe400e4c113
++after:  $r18=0x0000050af74e619a, $r30=0x2857ba730cd281ff, $r25=0xacab0fe400e4c113
++sra.d $r31, $r13, $r10 ::
++before: $r31=0x3184416bc93a5e26, $r13=0xad5864bc4022de96, $r10=0xf7007bdbf1f728ab
++after:  $r31=0xfffffffffff5ab0c, $r13=0xad5864bc4022de96, $r10=0xf7007bdbf1f728ab
++sra.d $r6, $r25, $r23 ::
++before: $r6=0x9184d2df291f3402, $r25=0x7c0b117dcad80c03, $r23=0x35b29b0dde1a94bd
++after:  $r6=0x0000000000000003, $r25=0x7c0b117dcad80c03, $r23=0x35b29b0dde1a94bd
++sra.d $r16, $r6, $r29 ::
++before: $r16=0x2849e543d35dff5f, $r6=0x009f13f36a632a3f, $r29=0xf31f881e12072fe2
++after:  $r16=0x000000000027c4fc, $r6=0x009f13f36a632a3f, $r29=0xf31f881e12072fe2
++sra.d $r7, $r29, $r10 ::
++before: $r7=0x25c763f8366139dd, $r29=0xfd77fd6e69e371c6, $r10=0xcaa2ec6ad4f3b996
++after:  $r7=0xfffffff5dff5b9a7, $r29=0xfd77fd6e69e371c6, $r10=0xcaa2ec6ad4f3b996
++sra.d $r24, $r25, $r26 ::
++before: $r24=0x472602300b4f04c9, $r25=0x54ceea832a5677e9, $r26=0x5f63e9d9d6eb4af0
++after:  $r24=0x00000000000054ce, $r25=0x54ceea832a5677e9, $r26=0x5f63e9d9d6eb4af0
++sra.d $r23, $r4, $r27 ::
++before: $r23=0xe8b449325a0ed51e, $r4=0xd96928476f8441a5, $r27=0x7e1ae8fd9c849dce
++after:  $r23=0xffff65a4a11dbe11, $r4=0xd96928476f8441a5, $r27=0x7e1ae8fd9c849dce
++sra.d $r15, $r9, $r12 ::
++before: $r15=0x71601a1a2b155f51, $r9=0x0bcbb1d162563240, $r12=0x5a906ad2f4abb4c7
++after:  $r15=0x00179763a2c4ac64, $r9=0x0bcbb1d162563240, $r12=0x5a906ad2f4abb4c7
++sra.d $r16, $r29, $r23 ::
++before: $r16=0x1686886f27d397fb, $r29=0x851328b2655e5689, $r23=0x1634457590cd4033
++after:  $r16=0xfffffffffffff0a2, $r29=0x851328b2655e5689, $r23=0x1634457590cd4033
++rotr.w $r8, $r5, $r18 ::
++before: $r8=0xc4394aae4c13908b, $r5=0xa0c5728d1211b595, $r18=0x3d562746b3943f3b
++after:  $r8=0x000000004236b2a2, $r5=0xa0c5728d1211b595, $r18=0x3d562746b3943f3b
++rotr.w $r19, $r18, $r10 ::
++before: $r19=0x284b501639de116b, $r18=0x4248ad6cc0107902, $r10=0xb41907b756bf8004
++after:  $r19=0x000000002c010790, $r18=0x4248ad6cc0107902, $r10=0xb41907b756bf8004
++rotr.w $r29, $r8, $r4 ::
++before: $r29=0x2656b50c7d689f19, $r8=0x7b5d21fdce9bcb73, $r4=0x5b212fbe9e6b8522
++after:  $r29=0xfffffffff3a6f2dc, $r8=0x7b5d21fdce9bcb73, $r4=0x5b212fbe9e6b8522
++rotr.w $r25, $r6, $r30 ::
++before: $r25=0x4c79ed7a1695fc25, $r6=0x6bac1698a978f50f, $r30=0xf1d58570dfb10203
++after:  $r25=0xfffffffff52f1ea1, $r6=0x6bac1698a978f50f, $r30=0xf1d58570dfb10203
++rotr.w $r14, $r18, $r6 ::
++before: $r14=0xe894476b4ebbff23, $r18=0x1398b65ae1e91c98, $r6=0xebb6c3f5f689d2d8
++after:  $r14=0xffffffffe91c98e1, $r18=0x1398b65ae1e91c98, $r6=0xebb6c3f5f689d2d8
++rotr.w $r19, $r29, $r26 ::
++before: $r19=0x2595423cc93ecd7c, $r29=0x6c462c2d29d8f908, $r26=0x19142efd8e0b48b8
++after:  $r19=0xffffffffd8f90829, $r29=0x6c462c2d29d8f908, $r26=0x19142efd8e0b48b8
++rotr.w $r23, $r10, $r25 ::
++before: $r23=0x68b4d913b267a3a2, $r10=0x69afb673907e4506, $r25=0xbd09ff2ed890862d
++after:  $r23=0x00000000283483f2, $r10=0x69afb673907e4506, $r25=0xbd09ff2ed890862d
++rotr.w $r9, $r14, $r27 ::
++before: $r9=0x17a45b8cbdebd6ef, $r14=0x33effef864846356, $r27=0x3f52e437f2d5da62
++after:  $r9=0xffffffff992118d5, $r14=0x33effef864846356, $r27=0x3f52e437f2d5da62
++rotr.w $r5, $r12, $r23 ::
++before: $r5=0x2d191b1a9707cf26, $r12=0x86fa75433dac3d39, $r23=0x21136a02424e5da4
++after:  $r5=0xffffffff93dac3d3, $r12=0x86fa75433dac3d39, $r23=0x21136a02424e5da4
++rotr.w $r29, $r18, $r27 ::
++before: $r29=0x7d989f74f9944f8d, $r18=0x0050fe5829a153e6, $r27=0x926776f9140b06fc
++after:  $r29=0xffffffff9a153e62, $r18=0x0050fe5829a153e6, $r27=0x926776f9140b06fc
++rotr.d $r29, $r19, $r13 ::
++before: $r29=0x1e02c0c28ec3f9b1, $r19=0xf2e79e6ff240b188, $r13=0x60f500663eddf444
++after:  $r29=0x8f2e79e6ff240b18, $r19=0xf2e79e6ff240b188, $r13=0x60f500663eddf444
++rotr.d $r30, $r4, $r14 ::
++before: $r30=0x97f6be8229e2e822, $r4=0xf79aaeb2c03a2113, $r14=0xbbdb2cb642605ed7
++after:  $r30=0x744227ef355d6580, $r4=0xf79aaeb2c03a2113, $r14=0xbbdb2cb642605ed7
++rotr.d $r6, $r19, $r7 ::
++before: $r6=0x1611806010ce99d8, $r19=0xcb64270e0fc5b4c7, $r7=0x0eda6972c46af03c
++after:  $r6=0xb64270e0fc5b4c7c, $r19=0xcb64270e0fc5b4c7, $r7=0x0eda6972c46af03c
++rotr.d $r4, $r15, $r30 ::
++before: $r4=0xe63084e97bd0efb3, $r15=0x6e1aa322e38e9b66, $r30=0xa7df0f1d92106e2d
++after:  $r4=0x19171c74db3370d5, $r15=0x6e1aa322e38e9b66, $r30=0xa7df0f1d92106e2d
++rotr.d $r16, $r27, $r10 ::
++before: $r16=0x1ff92fbb0f10ff9a, $r27=0x015c2eb91c9ae124, $r10=0x8b4c97ee7f9bc2fa
++after:  $r16=0x570bae4726b84900, $r27=0x015c2eb91c9ae124, $r10=0x8b4c97ee7f9bc2fa
++rotr.d $r28, $r7, $r25 ::
++before: $r28=0xbd766a63bbead21c, $r7=0x0d97b509610db5e7, $r25=0x3151203010315af5
++after:  $r28=0xbda84b086daf386c, $r7=0x0d97b509610db5e7, $r25=0x3151203010315af5
++rotr.d $r9, $r20, $r23 ::
++before: $r9=0x8a2bb5eacea50d68, $r20=0x947ec1930151adb9, $r23=0xc2f39e045d278b7b
++after:  $r9=0x8fd832602a35b732, $r20=0x947ec1930151adb9, $r23=0xc2f39e045d278b7b
++rotr.d $r25, $r13, $r23 ::
++before: $r25=0xcaddb8ea7bd492c7, $r13=0x416a1b790dbf45cb, $r23=0x44c59965e1c6af25
++after:  $r25=0xc86dfa2e5a0b50db, $r13=0x416a1b790dbf45cb, $r23=0x44c59965e1c6af25
++rotr.d $r14, $r7, $r31 ::
++before: $r14=0x8ca18b58047c8b5a, $r7=0x93a6cdc3585b5446, $r31=0x70cd84ec07e33cef
++after:  $r14=0x9b86b0b6a88d274d, $r7=0x93a6cdc3585b5446, $r31=0x70cd84ec07e33cef
++rotr.d $r14, $r9, $r4 ::
++before: $r14=0x48bd5c133004f490, $r9=0xad095be0915fe20b, $r4=0xc1fff6ff603a47b3
++after:  $r14=0x2b7c122bfc4175a1, $r9=0xad095be0915fe20b, $r4=0xc1fff6ff603a47b3
++slli.w $r18, $r8, 10 ::
++before: $r18=0xe7f8823a2989c395, $r8=0xf0ccc85519ad1e0a
++after:  $r18=0xffffffffb4782800, $r8=0xf0ccc85519ad1e0a
++slli.w $r27, $r17, 30 ::
++before: $r27=0x2e66b550a3bb071d, $r17=0x20943aa3eaa4024e
++after:  $r27=0xffffffff80000000, $r17=0x20943aa3eaa4024e
++slli.w $r27, $r23, 31 ::
++before: $r27=0x70daa2bee8209243, $r23=0x2e9160afd2e28a64
++after:  $r27=000000000000000000, $r23=0x2e9160afd2e28a64
++slli.w $r10, $r13, 12 ::
++before: $r10=0x701c424632b5dc29, $r13=0x591054db6afe1725
++after:  $r10=0xffffffffe1725000, $r13=0x591054db6afe1725
++slli.w $r7, $r15, 19 ::
++before: $r7=0xdd1d7fe3ae579499, $r15=0x2e077f689088c0c7
++after:  $r7=0x0000000006380000, $r15=0x2e077f689088c0c7
++slli.w $r6, $r8, 12 ::
++before: $r6=0xff732113ddaab79b, $r8=0x9cacf8e6d9e37f97
++after:  $r6=0x0000000037f97000, $r8=0x9cacf8e6d9e37f97
++slli.w $r5, $r19, 22 ::
++before: $r5=0xcef75ddd2adc5853, $r19=0xcc24ed9167fd06ea
++after:  $r5=0xffffffffba800000, $r19=0xcc24ed9167fd06ea
++slli.w $r17, $r8, 7 ::
++before: $r17=0x3c8788fed3e8a049, $r8=0xccf9b2d2c2e80251
++after:  $r17=0x0000000074012880, $r8=0xccf9b2d2c2e80251
++slli.w $r14, $r29, 10 ::
++before: $r14=0x0e1b0b077db4f08e, $r29=0x76aea4b9ae43cdfb
++after:  $r14=0x000000000f37ec00, $r29=0x76aea4b9ae43cdfb
++slli.w $r23, $r30, 26 ::
++before: $r23=0x13d8514aeb0dc12b, $r30=0x9c8352804e7e8ccb
++after:  $r23=0x000000002c000000, $r30=0x9c8352804e7e8ccb
++slli.d $r27, $r28, 5 ::
++before: $r27=0x689a2c4141835926, $r28=0x1b6ff38e611d1e4d
++after:  $r27=0x6dfe71cc23a3c9a0, $r28=0x1b6ff38e611d1e4d
++slli.d $r5, $r20, 1 ::
++before: $r5=0xff3391c2323defa6, $r20=0xe99a134a0c1a2574
++after:  $r5=0xd334269418344ae8, $r20=0xe99a134a0c1a2574
++slli.d $r27, $r7, 61 ::
++before: $r27=0xc32d8fb319ba47e6, $r7=0xc6530e0e601d3631
++after:  $r27=0x2000000000000000, $r7=0xc6530e0e601d3631
++slli.d $r5, $r26, 45 ::
++before: $r5=0x979553ff112cdf52, $r26=0x931e420364fdcaca
++after:  $r5=0xb959400000000000, $r26=0x931e420364fdcaca
++slli.d $r27, $r5, 60 ::
++before: $r27=0xa7f70b048a4087b0, $r5=0xc1b829210c3cd5a9
++after:  $r27=0x9000000000000000, $r5=0xc1b829210c3cd5a9
++slli.d $r23, $r10, 59 ::
++before: $r23=0xcd547af78ac66ca7, $r10=0xa2c0802de6c82645
++after:  $r23=0x2800000000000000, $r10=0xa2c0802de6c82645
++slli.d $r13, $r30, 56 ::
++before: $r13=0x0410b8f25e1234ee, $r30=0xdbaacfe884cda24d
++after:  $r13=0x4d00000000000000, $r30=0xdbaacfe884cda24d
++slli.d $r16, $r4, 20 ::
++before: $r16=0x44a2ff35045ec37c, $r4=0xee2240010629a8ee
++after:  $r16=0x0010629a8ee00000, $r4=0xee2240010629a8ee
++slli.d $r19, $r20, 25 ::
++before: $r19=0x8617d88408d75cac, $r20=0xba15483820d66ae7
++after:  $r19=0x7041acd5ce000000, $r20=0xba15483820d66ae7
++slli.d $r24, $r27, 13 ::
++before: $r24=0x669e0e9b99d5b604, $r27=0xf5d1ffc374e53c7d
++after:  $r24=0x3ff86e9ca78fa000, $r27=0xf5d1ffc374e53c7d
++srli.w $r20, $r16, 10 ::
++before: $r20=0x7f5310ac5eaa9924, $r16=0x0ea8b69613d183ee
++after:  $r20=0x000000000004f460, $r16=0x0ea8b69613d183ee
++srli.w $r13, $r15, 0 ::
++before: $r13=0x5f4d9313f9224389, $r15=0xd544272206f4e814
++after:  $r13=0x0000000006f4e814, $r15=0xd544272206f4e814
++srli.w $r17, $r18, 7 ::
++before: $r17=0xd9b2c942f996cc8a, $r18=0x704cd1d89de5c2b4
++after:  $r17=0x00000000013bcb85, $r18=0x704cd1d89de5c2b4
++srli.w $r27, $r28, 18 ::
++before: $r27=0xa3eef8efc97e0d4f, $r28=0x8c449e6236daa7a2
++after:  $r27=0x0000000000000db6, $r28=0x8c449e6236daa7a2
++srli.w $r9, $r10, 29 ::
++before: $r9=0x6c044927152e5fc9, $r10=0x592a1607944e0109
++after:  $r9=0x0000000000000004, $r10=0x592a1607944e0109
++srli.w $r8, $r24, 31 ::
++before: $r8=0xcaa01b37d49db675, $r24=0x5e35848bbc958164
++after:  $r8=0x0000000000000001, $r24=0x5e35848bbc958164
++srli.w $r6, $r16, 18 ::
++before: $r6=0xe2fbe1accb343769, $r16=0x85f5e17c7d785222
++after:  $r6=0x0000000000001f5e, $r16=0x85f5e17c7d785222
++srli.w $r18, $r25, 31 ::
++before: $r18=0x4653c07e0627825f, $r25=0x44fffa524ffd0417
++after:  $r18=000000000000000000, $r25=0x44fffa524ffd0417
++srli.w $r5, $r26, 22 ::
++before: $r5=0x817ebd7154c8ed46, $r26=0xc7399a9899fc5958
++after:  $r5=0x0000000000000267, $r26=0xc7399a9899fc5958
++srli.w $r27, $r4, 27 ::
++before: $r27=0x3e4b17b34f2b08d0, $r4=0x5bedb97aefd697f4
++after:  $r27=0x000000000000001d, $r4=0x5bedb97aefd697f4
++srli.d $r31, $r9, 51 ::
++before: $r31=0x8fc21da189af52ed, $r9=0x235bf33e3e612a15
++after:  $r31=0x000000000000046b, $r9=0x235bf33e3e612a15
++srli.d $r26, $r7, 36 ::
++before: $r26=0xcd1eaac4df2531dd, $r7=0xe87216fce9c75788
++after:  $r26=0x000000000e87216f, $r7=0xe87216fce9c75788
++srli.d $r6, $r31, 29 ::
++before: $r6=0xc0282beeb7dc6618, $r31=0x8b58604d6be3e8e0
++after:  $r6=0x000000045ac3026b, $r31=0x8b58604d6be3e8e0
++srli.d $r20, $r6, 18 ::
++before: $r20=0x1546fdd9fc133e39, $r6=0x74067840bb05a992
++after:  $r20=0x00001d019e102ec1, $r6=0x74067840bb05a992
++srli.d $r28, $r20, 28 ::
++before: $r28=0xaa1f88b09e13e4c6, $r20=0x6e153faa5221e893
++after:  $r28=0x00000006e153faa5, $r20=0x6e153faa5221e893
++srli.d $r26, $r4, 55 ::
++before: $r26=0x2ba2151c80dbea7a, $r4=0x21246f3c7063edf9
++after:  $r26=0x0000000000000042, $r4=0x21246f3c7063edf9
++srli.d $r28, $r29, 34 ::
++before: $r28=0xcd72eff1b5aa0877, $r29=0x5d9488c1d61a1544
++after:  $r28=0x0000000017652230, $r29=0x5d9488c1d61a1544
++srli.d $r13, $r7, 62 ::
++before: $r13=0x5953b78fbd8109a9, $r7=0x862731652b653859
++after:  $r13=0x0000000000000002, $r7=0x862731652b653859
++srli.d $r29, $r18, 25 ::
++before: $r29=0xab821449d149a976, $r18=0xcb73553146cc4bdc
++after:  $r29=0x00000065b9aa98a3, $r18=0xcb73553146cc4bdc
++srli.d $r28, $r7, 2 ::
++before: $r28=0x31272fa88123357d, $r7=0x0e9359f7a9f92ec5
++after:  $r28=0x03a4d67dea7e4bb1, $r7=0x0e9359f7a9f92ec5
++srai.w $r26, $r23, 2 ::
++before: $r26=0xe73a55c2b7005c01, $r23=0xfcd659254f4b3fe7
++after:  $r26=0x0000000013d2cff9, $r23=0xfcd659254f4b3fe7
++srai.w $r31, $r10, 0 ::
++before: $r31=0x2e0c4330fae0890a, $r10=0xa76ca364a204c82b
++after:  $r31=0xffffffffa204c82b, $r10=0xa76ca364a204c82b
++srai.w $r31, $r8, 0 ::
++before: $r31=0x64790bb6e8674f68, $r8=0xce5594f964c4a026
++after:  $r31=0x0000000064c4a026, $r8=0xce5594f964c4a026
++srai.w $r15, $r31, 27 ::
++before: $r15=0xccfb53c708026acd, $r31=0xce185873627515b5
++after:  $r15=0x000000000000000c, $r31=0xce185873627515b5
++srai.w $r16, $r28, 1 ::
++before: $r16=0x994c4d22e90185a2, $r28=0x49995d51019e1050
++after:  $r16=0x0000000000cf0828, $r28=0x49995d51019e1050
++srai.w $r13, $r16, 16 ::
++before: $r13=0x0484408b57b3ab89, $r16=0x437401347e23c399
++after:  $r13=0x0000000000007e23, $r16=0x437401347e23c399
++srai.w $r4, $r9, 7 ::
++before: $r4=0x0d1d936105b7cca3, $r9=0xd49c3c65e292b942
++after:  $r4=0xffffffffffc52572, $r9=0xd49c3c65e292b942
++srai.w $r24, $r15, 10 ::
++before: $r24=0xaa9377005232ec93, $r15=0xde29d0172b40f03d
++after:  $r24=0x00000000000ad03c, $r15=0xde29d0172b40f03d
++srai.w $r19, $r14, 24 ::
++before: $r19=0xa49c65a4c2cde36d, $r14=0x782e0d4b8a7a28d0
++after:  $r19=0xffffffffffffff8a, $r14=0x782e0d4b8a7a28d0
++srai.w $r24, $r27, 24 ::
++before: $r24=0x404f816ff696bbc8, $r27=0x1b6900e15f252315
++after:  $r24=0x000000000000005f, $r27=0x1b6900e15f252315
++srai.d $r24, $r4, 22 ::
++before: $r24=0x96250384fede78c7, $r4=0x6c501d9ec5e9e731
++after:  $r24=0x000001b140767b17, $r4=0x6c501d9ec5e9e731
++srai.d $r30, $r19, 17 ::
++before: $r30=0xcfc52d7caaf7bf47, $r19=0x82499a30d50f8b83
++after:  $r30=0xffffc124cd186a87, $r19=0x82499a30d50f8b83
++srai.d $r12, $r12, 5 ::
++before: $r12=0x628a1a46bbe30c16, $r12=0xaba392c50d63ea53
++after:  $r12=0xfd5d1c96286b1f52, $r12=0xfd5d1c96286b1f52
++srai.d $r24, $r9, 2 ::
++before: $r24=0x021c1bb01f0253d8, $r9=0xb35e31d92548a2fe
++after:  $r24=0xecd78c76495228bf, $r9=0xb35e31d92548a2fe
++srai.d $r28, $r7, 62 ::
++before: $r28=0x2a5ac0a983332ec3, $r7=0x2297ae499a473c6d
++after:  $r28=000000000000000000, $r7=0x2297ae499a473c6d
++srai.d $r8, $r17, 31 ::
++before: $r8=0xa27cf36651750e09, $r17=0x1984e046b042d0cf
++after:  $r8=0x000000003309c08d, $r17=0x1984e046b042d0cf
++srai.d $r25, $r16, 37 ::
++before: $r25=0x7df3822fb20b8ded, $r16=0xb4e464563029fac8
++after:  $r25=0xfffffffffda72322, $r16=0xb4e464563029fac8
++srai.d $r14, $r5, 57 ::
++before: $r14=0xe8c1939c13a2e6ca, $r5=0x6a22077c63497a9a
++after:  $r14=0x0000000000000035, $r5=0x6a22077c63497a9a
++srai.d $r25, $r15, 10 ::
++before: $r25=0xf2df68e25cccf72e, $r15=0xe0af648201f919fc
++after:  $r25=0xfff82bd920807e46, $r15=0xe0af648201f919fc
++srai.d $r6, $r15, 49 ::
++before: $r6=0xa24591b35142aa9c, $r15=0x12b20ac67de77b8d
++after:  $r6=0x0000000000000959, $r15=0x12b20ac67de77b8d
++rotri.w $r18, $r6, 20 ::
++before: $r18=0xf0c65b137926ba00, $r6=0x95e0f5f057a212c5
++after:  $r18=0x00000000212c557a, $r6=0x95e0f5f057a212c5
++rotri.w $r9, $r16, 27 ::
++before: $r9=0xe36356471d2a7e18, $r16=0xb8af3071021bd869
++after:  $r9=0x00000000437b0d20, $r16=0xb8af3071021bd869
++rotri.w $r5, $r31, 1 ::
++before: $r5=0x5992fc9cfce2ebe9, $r31=0x6c427c821603d01a
++after:  $r5=0x000000000b01e80d, $r31=0x6c427c821603d01a
++rotri.w $r27, $r13, 23 ::
++before: $r27=0x0239c57dca2ab060, $r13=0xed54e28825b25471
++after:  $r27=0x0000000064a8e24b, $r13=0xed54e28825b25471
++rotri.w $r18, $r18, 7 ::
++before: $r18=0xb84df2305a710936, $r18=0x8aae5248c6d4973c
++after:  $r18=0x00000000798da92e, $r18=0x00000000798da92e
++rotri.w $r4, $r27, 29 ::
++before: $r4=0x730e1701570ac9fc, $r27=0xd55b9d54232536e7
++after:  $r4=0x000000001929b739, $r27=0xd55b9d54232536e7
++rotri.w $r19, $r18, 0 ::
++before: $r19=0x36dbceffa501d8dc, $r18=0x8415238fa1dd314f
++after:  $r19=0xffffffffa1dd314f, $r18=0x8415238fa1dd314f
++rotri.w $r13, $r24, 21 ::
++before: $r13=0x00c1ac428ddf5193, $r24=0x3b588028fcfbb0a8
++after:  $r13=0xffffffffdd8547e7, $r24=0x3b588028fcfbb0a8
++rotri.w $r14, $r25, 29 ::
++before: $r14=0x733414543ca8145e, $r25=0xded24831de35be08
++after:  $r14=0xfffffffff1adf046, $r25=0xded24831de35be08
++rotri.w $r27, $r5, 8 ::
++before: $r27=0x60afaebb36d22ba0, $r5=0x0fd31a16f03582b5
++after:  $r27=0xffffffffb5f03582, $r5=0x0fd31a16f03582b5
++rotri.d $r20, $r7, 53 ::
++before: $r20=0xe112a6d47c0444c1, $r7=0xbd9bbb91bdc381c5
++after:  $r20=0xdddc8dee1c0e2dec, $r7=0xbd9bbb91bdc381c5
++rotri.d $r27, $r16, 62 ::
++before: $r27=0xf254a827c1ef7351, $r16=0x3de084650f757ceb
++after:  $r27=0xf78211943dd5f3ac, $r16=0x3de084650f757ceb
++rotri.d $r30, $r17, 10 ::
++before: $r30=0x31c36a8c83999eb2, $r17=0x107098a9863e85d5
++after:  $r30=0x75441c262a618fa1, $r17=0x107098a9863e85d5
++rotri.d $r29, $r8, 46 ::
++before: $r29=0xf2e7a25c121af3c3, $r8=0xb177c110c3dd3225
++after:  $r29=0x04430f74c896c5df, $r8=0xb177c110c3dd3225
++rotri.d $r4, $r26, 45 ::
++before: $r4=0xdd94ff60f2e1abff, $r26=0xb76d3e4a0af02e4d
++after:  $r4=0xf2505781726dbb69, $r26=0xb76d3e4a0af02e4d
++rotri.d $r10, $r9, 42 ::
++before: $r10=0x6064d48d901beca7, $r9=0xea20b33360134ab2
++after:  $r10=0xccd804d2acba882c, $r9=0xea20b33360134ab2
++rotri.d $r4, $r26, 27 ::
++before: $r4=0x27f1e63c8f7f71cf, $r26=0xf4c5c8a69f37a1bd
++after:  $r4=0xe6f437be98b914d3, $r26=0xf4c5c8a69f37a1bd
++rotri.d $r9, $r16, 20 ::
++before: $r9=0x7d4cb07a3ab72944, $r16=0xd5ee210421c6080e
++after:  $r9=0x6080ed5ee210421c, $r16=0xd5ee210421c6080e
++rotri.d $r24, $r26, 4 ::
++before: $r24=0x1ce66a79f3e45e6f, $r26=0x6e1767144ffa6e2d
++after:  $r24=0xd6e1767144ffa6e2, $r26=0x6e1767144ffa6e2d
++rotri.d $r4, $r18, 46 ::
++before: $r4=0x04173f8102b03399, $r18=0xde7066568917d899
++after:  $r4=0x995a245f626779c1, $r18=0xde7066568917d899
++ext.w.h $r17, $r14 ::
++before: $r17=0x58af862c6fc4208d, $r14=0x6235b0cfe4eed6ed
++after:  $r17=0xffffffffffffd6ed, $r14=0x6235b0cfe4eed6ed
++ext.w.h $r31, $r20 ::
++before: $r31=0x425af3dcd83fa9fd, $r20=0x6e59403101a538f1
++after:  $r31=0x00000000000038f1, $r20=0x6e59403101a538f1
++ext.w.h $r18, $r27 ::
++before: $r18=0xcb140226bf788367, $r27=0x58a5430ee4e1616e
++after:  $r18=0x000000000000616e, $r27=0x58a5430ee4e1616e
++ext.w.h $r15, $r10 ::
++before: $r15=0xd3debaf05f7d909f, $r10=0x6f7083340247fb12
++after:  $r15=0xfffffffffffffb12, $r10=0x6f7083340247fb12
++ext.w.h $r12, $r15 ::
++before: $r12=0x5dc6f7191af80bcf, $r15=0xb1f1c8f4b11c03d9
++after:  $r12=0x00000000000003d9, $r15=0xb1f1c8f4b11c03d9
++ext.w.h $r7, $r15 ::
++before: $r7=0x5ffe304a5c9dc9d2, $r15=0x102fb4fa33193103
++after:  $r7=0x0000000000003103, $r15=0x102fb4fa33193103
++ext.w.h $r16, $r16 ::
++before: $r16=0x533616e37505799f, $r16=0xf988c7255086f4f5
++after:  $r16=0xfffffffffffff4f5, $r16=0xfffffffffffff4f5
++ext.w.h $r13, $r25 ::
++before: $r13=0x805a406557ed3fac, $r25=0xdc6ce0f2993b219b
++after:  $r13=0x000000000000219b, $r25=0xdc6ce0f2993b219b
++ext.w.h $r19, $r20 ::
++before: $r19=0xcc49c20125c4755d, $r20=0xde7b765222a9703a
++after:  $r19=0x000000000000703a, $r20=0xde7b765222a9703a
++ext.w.h $r18, $r7 ::
++before: $r18=0xe0dd9155cbe168c6, $r7=0xc1063421eae07663
++after:  $r18=0x0000000000007663, $r7=0xc1063421eae07663
++ext.w.b $r16, $r23 ::
++before: $r16=0x21666e814555aa02, $r23=0x926b8d68b5c40592
++after:  $r16=0xffffffffffffff92, $r23=0x926b8d68b5c40592
++ext.w.b $r8, $r20 ::
++before: $r8=0xf68ae0a0ac497ded, $r20=0x0bfb5d489716d0c5
++after:  $r8=0xffffffffffffffc5, $r20=0x0bfb5d489716d0c5
++ext.w.b $r24, $r15 ::
++before: $r24=0xbc84e54c82fd6e51, $r15=0x7d814b11e5eb07f6
++after:  $r24=0xfffffffffffffff6, $r15=0x7d814b11e5eb07f6
++ext.w.b $r31, $r17 ::
++before: $r31=0x14e575a8dda1f0d3, $r17=0x6a111e663a52244c
++after:  $r31=0x000000000000004c, $r17=0x6a111e663a52244c
++ext.w.b $r16, $r8 ::
++before: $r16=0x911acc218fcf640b, $r8=0xac1405ad05b23e43
++after:  $r16=0x0000000000000043, $r8=0xac1405ad05b23e43
++ext.w.b $r28, $r8 ::
++before: $r28=0x77fb13eaa8995607, $r8=0x05c97a81f12da7d3
++after:  $r28=0xffffffffffffffd3, $r8=0x05c97a81f12da7d3
++ext.w.b $r9, $r23 ::
++before: $r9=0xb88cfdb98683e15e, $r23=0x74893b34973e16cb
++after:  $r9=0xffffffffffffffcb, $r23=0x74893b34973e16cb
++ext.w.b $r31, $r4 ::
++before: $r31=0xc7168cb4f7d079e4, $r4=0xf4fc215bc2c5273e
++after:  $r31=0x000000000000003e, $r4=0xf4fc215bc2c5273e
++ext.w.b $r4, $r18 ::
++before: $r4=0x0e2e5dca4727b373, $r18=0xa1b97136f32e452b
++after:  $r4=0x000000000000002b, $r18=0xa1b97136f32e452b
++ext.w.b $r8, $r29 ::
++before: $r8=0x625eb5236f483daa, $r29=0x3ceca34ee347e7c8
++after:  $r8=0xffffffffffffffc8, $r29=0x3ceca34ee347e7c8
++clo.w $r4, $r13 ::
++before: $r4=0x0bcca747f77aca28, $r13=0x8df71972c1a17096
++after:  $r4=0x0000000000000002, $r13=0x8df71972c1a17096
++clo.w $r27, $r5 ::
++before: $r27=0x98a9e6d99d8e84cb, $r5=0xdc59d3c8fc1540e4
++after:  $r27=0x0000000000000006, $r5=0xdc59d3c8fc1540e4
++clo.w $r9, $r14 ::
++before: $r9=0xe8e78b162c95ed66, $r14=0xdfad6854bbf442e6
++after:  $r9=0x0000000000000001, $r14=0xdfad6854bbf442e6
++clo.w $r13, $r26 ::
++before: $r13=0xa3db2cf80f9112cd, $r26=0x7676463dd6f13f80
++after:  $r13=0x0000000000000002, $r26=0x7676463dd6f13f80
++clo.w $r7, $r16 ::
++before: $r7=0xb5213ab31b574031, $r16=0x478c19ebdeaa74c0
++after:  $r7=0x0000000000000002, $r16=0x478c19ebdeaa74c0
++clo.w $r13, $r12 ::
++before: $r13=0xd68d9661284fb9d7, $r12=0x702bf24fddd8bfe0
++after:  $r13=0x0000000000000002, $r12=0x702bf24fddd8bfe0
++clo.w $r18, $r20 ::
++before: $r18=0x510cd4002aff4c6c, $r20=0x4fc898e8b83669ee
++after:  $r18=0x0000000000000001, $r20=0x4fc898e8b83669ee
++clo.w $r5, $r9 ::
++before: $r5=0x53c0de96f709208d, $r9=0x0e56d87b898438b5
++after:  $r5=0x0000000000000001, $r9=0x0e56d87b898438b5
++clo.w $r20, $r5 ::
++before: $r20=0x96187854fcce4fd1, $r5=0xf1248bea6ed8be30
++after:  $r20=000000000000000000, $r5=0xf1248bea6ed8be30
++clo.w $r20, $r31 ::
++before: $r20=0xb1abb4795d411683, $r31=0x01025f914a9225e6
++after:  $r20=000000000000000000, $r31=0x01025f914a9225e6
++clz.w $r19, $r8 ::
++before: $r19=0x374348642747a8dc, $r8=0xd8ec1d547d95ada5
++after:  $r19=0x0000000000000001, $r8=0xd8ec1d547d95ada5
++clz.w $r26, $r4 ::
++before: $r26=0x741ab4d14b9ee1f8, $r4=0x99e2ef840817cfff
++after:  $r26=0x0000000000000004, $r4=0x99e2ef840817cfff
++clz.w $r17, $r4 ::
++before: $r17=0x45c9ce7217f501b3, $r4=0xa387a194cd03bcf1
++after:  $r17=000000000000000000, $r4=0xa387a194cd03bcf1
++clz.w $r13, $r26 ::
++before: $r13=0x69707656f354d758, $r26=0xd4a8f8ab02b876b0
++after:  $r13=0x0000000000000006, $r26=0xd4a8f8ab02b876b0
++clz.w $r25, $r13 ::
++before: $r25=0x103ce6ee41e094c3, $r13=0xd7a85bf4006e655a
++after:  $r25=0x0000000000000009, $r13=0xd7a85bf4006e655a
++clz.w $r5, $r13 ::
++before: $r5=0x3910578929e7cd4a, $r13=0x93c87b02b7b1b603
++after:  $r5=000000000000000000, $r13=0x93c87b02b7b1b603
++clz.w $r18, $r29 ::
++before: $r18=0x10639f8979feefe5, $r29=0x9d8b4b8f8493f844
++after:  $r18=000000000000000000, $r29=0x9d8b4b8f8493f844
++clz.w $r25, $r16 ::
++before: $r25=0x7b35b3e995b3b44d, $r16=0xad953d0ae0b3e870
++after:  $r25=000000000000000000, $r16=0xad953d0ae0b3e870
++clz.w $r6, $r25 ::
++before: $r6=0xda6cbd19f10ef86f, $r25=0x1d6665db1162cfb4
++after:  $r6=0x0000000000000003, $r25=0x1d6665db1162cfb4
++clz.w $r5, $r12 ::
++before: $r5=0x8a6f4d6ec8d7c00d, $r12=0x19b40cb8dd8d1679
++after:  $r5=000000000000000000, $r12=0x19b40cb8dd8d1679
++cto.w $r7, $r15 ::
++before: $r7=0x7285e9c364562d11, $r15=0x963655c7f58de520
++after:  $r7=000000000000000000, $r15=0x963655c7f58de520
++cto.w $r4, $r15 ::
++before: $r4=0x105dceebc6d7e641, $r15=0xfc01c17baaca9c46
++after:  $r4=000000000000000000, $r15=0xfc01c17baaca9c46
++cto.w $r31, $r28 ::
++before: $r31=0xdeff9742b93f0591, $r28=0x2cf98074b0151f33
++after:  $r31=0x0000000000000002, $r28=0x2cf98074b0151f33
++cto.w $r13, $r8 ::
++before: $r13=0xeee665743cd218ff, $r8=0xbdd700b2535aa3b7
++after:  $r13=0x0000000000000003, $r8=0xbdd700b2535aa3b7
++cto.w $r23, $r13 ::
++before: $r23=0x1cc22cfd7c0c869c, $r13=0x5b848b64decbee8f
++after:  $r23=0x0000000000000004, $r13=0x5b848b64decbee8f
++cto.w $r12, $r18 ::
++before: $r12=0x5c32b3db803e5988, $r18=0x2d5d1ebf93b79dd0
++after:  $r12=000000000000000000, $r18=0x2d5d1ebf93b79dd0
++cto.w $r17, $r9 ::
++before: $r17=0xc11d806786501f0e, $r9=0xd175fe2ca41bda38
++after:  $r17=000000000000000000, $r9=0xd175fe2ca41bda38
++cto.w $r24, $r16 ::
++before: $r24=0x504f9b43af62e2ad, $r16=0xfce545d98e2361da
++after:  $r24=000000000000000000, $r16=0xfce545d98e2361da
++cto.w $r24, $r8 ::
++before: $r24=0xc13ac5668538f5a4, $r8=0x3096912e575d64db
++after:  $r24=0x0000000000000002, $r8=0x3096912e575d64db
++cto.w $r27, $r17 ::
++before: $r27=0xd27f68629dd8d4fb, $r17=0x15ac43632e175a8b
++after:  $r27=0x0000000000000002, $r17=0x15ac43632e175a8b
++ctz.w $r8, $r12 ::
++before: $r8=0xfc9bd3736a3c08bd, $r12=0xaebba33c2e268daa
++after:  $r8=0x0000000000000001, $r12=0xaebba33c2e268daa
++ctz.w $r5, $r27 ::
++before: $r5=0x5dc8af7bac7db01a, $r27=0xabce2f0e113597aa
++after:  $r5=0x0000000000000001, $r27=0xabce2f0e113597aa
++ctz.w $r18, $r6 ::
++before: $r18=0xe4ac5b59d8442dfe, $r6=0x935d1b694e96bd04
++after:  $r18=0x0000000000000002, $r6=0x935d1b694e96bd04
++ctz.w $r9, $r15 ::
++before: $r9=0x9b760f465efbb52e, $r15=0x834c9974dba65d99
++after:  $r9=000000000000000000, $r15=0x834c9974dba65d99
++ctz.w $r13, $r7 ::
++before: $r13=0x95b5748f5f8bfb38, $r7=0x75dd7a9890cdf2d9
++after:  $r13=000000000000000000, $r7=0x75dd7a9890cdf2d9
++ctz.w $r29, $r17 ::
++before: $r29=0xa25119fd892d1b20, $r17=0x38c12e795dc52acf
++after:  $r29=000000000000000000, $r17=0x38c12e795dc52acf
++ctz.w $r15, $r12 ::
++before: $r15=0x95c2ce0f0446807c, $r12=0x623a5915ac8164b2
++after:  $r15=0x0000000000000001, $r12=0x623a5915ac8164b2
++ctz.w $r6, $r17 ::
++before: $r6=0xd9034892a300dca8, $r17=0x5911fea4e6ce1df3
++after:  $r6=000000000000000000, $r17=0x5911fea4e6ce1df3
++ctz.w $r10, $r25 ::
++before: $r10=0xda1e0d0eb34884ab, $r25=0x8d70d49a10ba8968
++after:  $r10=0x0000000000000003, $r25=0x8d70d49a10ba8968
++ctz.w $r14, $r13 ::
++before: $r14=0x207d275c076e5247, $r13=0xd243debc9b557922
++after:  $r14=0x0000000000000001, $r13=0xd243debc9b557922
++clo.d $r7, $r16 ::
++before: $r7=0x9432ccd773e86812, $r16=0x9f921ea959c97c2b
++after:  $r7=0x0000000000000001, $r16=0x9f921ea959c97c2b
++clo.d $r7, $r12 ::
++before: $r7=0xaf19ef0b422b09bf, $r12=0x8773ec5c72444fe2
++after:  $r7=0x0000000000000001, $r12=0x8773ec5c72444fe2
++clo.d $r5, $r10 ::
++before: $r5=0xa2912bc0ca36fa58, $r10=0x2c93a7506a8979b7
++after:  $r5=000000000000000000, $r10=0x2c93a7506a8979b7
++clo.d $r7, $r28 ::
++before: $r7=0x69dd3f71121c7380, $r28=0x1784b7c2c7558b4a
++after:  $r7=000000000000000000, $r28=0x1784b7c2c7558b4a
++clo.d $r15, $r9 ::
++before: $r15=0x95b40b42f113cecc, $r9=0xf0cdb7b9c17bb9e1
++after:  $r15=0x0000000000000004, $r9=0xf0cdb7b9c17bb9e1
++clo.d $r9, $r27 ::
++before: $r9=0x1961ee1499945d08, $r27=0x23c7a2252c1cbc78
++after:  $r9=000000000000000000, $r27=0x23c7a2252c1cbc78
++clo.d $r30, $r19 ::
++before: $r30=0xda0aa8b04f719a51, $r19=0x8f93c7a1b3cc9f12
++after:  $r30=0x0000000000000001, $r19=0x8f93c7a1b3cc9f12
++clo.d $r26, $r20 ::
++before: $r26=0xdd4f62bfe1237a28, $r20=0xd61c7bfe05165d04
++after:  $r26=0x0000000000000002, $r20=0xd61c7bfe05165d04
++clo.d $r26, $r6 ::
++before: $r26=0x44a1378e22d6ec81, $r6=0x1b21543ee9abd103
++after:  $r26=000000000000000000, $r6=0x1b21543ee9abd103
++clo.d $r24, $r16 ::
++before: $r24=0x51efcf6ef8eb9917, $r16=0x602cbdf020ee6da8
++after:  $r24=000000000000000000, $r16=0x602cbdf020ee6da8
++clz.d $r27, $r7 ::
++before: $r27=0x91df318f7b476077, $r7=0x6ca0b9cf9bb84c4a
++after:  $r27=0x0000000000000001, $r7=0x6ca0b9cf9bb84c4a
++clz.d $r19, $r30 ::
++before: $r19=0x435d7fb412d9c12c, $r30=0xc926e58bdb46104e
++after:  $r19=000000000000000000, $r30=0xc926e58bdb46104e
++clz.d $r12, $r30 ::
++before: $r12=0x906b06441b2ef62b, $r30=0x04b9b91966077ef0
++after:  $r12=0x0000000000000005, $r30=0x04b9b91966077ef0
++clz.d $r28, $r6 ::
++before: $r28=0x28bb3e3324f33e14, $r6=0x7628cd8752be6223
++after:  $r28=0x0000000000000001, $r6=0x7628cd8752be6223
++clz.d $r14, $r15 ::
++before: $r14=0xb7a5ae04bf2e60c0, $r15=0x41a328a79afda305
++after:  $r14=0x0000000000000001, $r15=0x41a328a79afda305
++clz.d $r4, $r23 ::
++before: $r4=0x5fd8327a265b1a3b, $r23=0x66b92d8b5b842d4a
++after:  $r4=0x0000000000000001, $r23=0x66b92d8b5b842d4a
++clz.d $r18, $r29 ::
++before: $r18=0x73df6808e38c72ad, $r29=0x6b91b11261dd26b6
++after:  $r18=0x0000000000000001, $r29=0x6b91b11261dd26b6
++clz.d $r13, $r8 ::
++before: $r13=0xd8d2dbd71d1783ad, $r8=0xdc50b7586ccab6a1
++after:  $r13=000000000000000000, $r8=0xdc50b7586ccab6a1
++clz.d $r17, $r10 ::
++before: $r17=0xee6f842bb7686b8d, $r10=0xdf52e003cd95f02f
++after:  $r17=000000000000000000, $r10=0xdf52e003cd95f02f
++clz.d $r13, $r8 ::
++before: $r13=0x91e717aef96cc046, $r8=0x5dd0743ed560ba78
++after:  $r13=0x0000000000000001, $r8=0x5dd0743ed560ba78
++cto.d $r31, $r5 ::
++before: $r31=0xf361d5d1fb232769, $r5=0x1530b67240d804cf
++after:  $r31=0x0000000000000004, $r5=0x1530b67240d804cf
++cto.d $r5, $r26 ::
++before: $r5=0xbedb393d17f69d40, $r26=0xcef56269ef7aecda
++after:  $r5=000000000000000000, $r26=0xcef56269ef7aecda
++cto.d $r5, $r31 ::
++before: $r5=0xadd75db878cdbf84, $r31=0x8e08acc65c97f0b2
++after:  $r5=000000000000000000, $r31=0x8e08acc65c97f0b2
++cto.d $r31, $r31 ::
++before: $r31=0x6a8a89827e4929f9, $r31=0x7df0f59d97924bb3
++after:  $r31=0x0000000000000002, $r31=0x0000000000000002
++cto.d $r14, $r30 ::
++before: $r14=0xefb0874ef3600b6d, $r30=0x97a4b45ab971a548
++after:  $r14=000000000000000000, $r30=0x97a4b45ab971a548
++cto.d $r5, $r17 ::
++before: $r5=0x144271fb49c8d2d8, $r17=0x787e6dbb4fec4d21
++after:  $r5=0x0000000000000001, $r17=0x787e6dbb4fec4d21
++cto.d $r28, $r20 ::
++before: $r28=0xd6d0953d2a12c998, $r20=0xafd578caad0dfa09
++after:  $r28=0x0000000000000001, $r20=0xafd578caad0dfa09
++cto.d $r16, $r18 ::
++before: $r16=0x0de650be54a7990c, $r18=0x3ea8f45e10441829
++after:  $r16=0x0000000000000001, $r18=0x3ea8f45e10441829
++cto.d $r15, $r16 ::
++before: $r15=0x0bbd328743f49a86, $r16=0x5cafc638b6b509be
++after:  $r15=000000000000000000, $r16=0x5cafc638b6b509be
++cto.d $r6, $r20 ::
++before: $r6=0x598ee27859cf8d0e, $r20=0x4bce530e537ad762
++after:  $r6=000000000000000000, $r20=0x4bce530e537ad762
++ctz.d $r14, $r28 ::
++before: $r14=0xf2e4d886a8fd3fe3, $r28=0x0dafbabdfefac692
++after:  $r14=0x0000000000000001, $r28=0x0dafbabdfefac692
++ctz.d $r6, $r27 ::
++before: $r6=0xe005a6a20d44fbca, $r27=0xe000ac4f4cfb2ce2
++after:  $r6=0x0000000000000001, $r27=0xe000ac4f4cfb2ce2
++ctz.d $r15, $r26 ::
++before: $r15=0x871c2ccd50ec0784, $r26=0xa82b0d96dd72f11c
++after:  $r15=0x0000000000000002, $r26=0xa82b0d96dd72f11c
++ctz.d $r17, $r20 ::
++before: $r17=0xebe7d9f4ec5055d5, $r20=0x65575957936d1d6e
++after:  $r17=0x0000000000000001, $r20=0x65575957936d1d6e
++ctz.d $r19, $r8 ::
++before: $r19=0x394effa243e5f14c, $r8=0xf6852349a7b00561
++after:  $r19=000000000000000000, $r8=0xf6852349a7b00561
++ctz.d $r5, $r9 ::
++before: $r5=0x3c67392fc408e9db, $r9=0xeff4bf8e886d7cc3
++after:  $r5=000000000000000000, $r9=0xeff4bf8e886d7cc3
++ctz.d $r31, $r15 ::
++before: $r31=0xbf5435775bd0435b, $r15=0x19760246c8d1d680
++after:  $r31=0x0000000000000007, $r15=0x19760246c8d1d680
++ctz.d $r9, $r5 ::
++before: $r9=0x0ccde230362ce06a, $r5=0x7590c6e73077c2bc
++after:  $r9=0x0000000000000002, $r5=0x7590c6e73077c2bc
++ctz.d $r28, $r25 ::
++before: $r28=0x2518777b06d608a0, $r25=0xb87647dad481ba32
++after:  $r28=0x0000000000000001, $r25=0xb87647dad481ba32
++ctz.d $r23, $r19 ::
++before: $r23=0xbe232a9fe2090e75, $r19=0x2dceda5cdc990d2e
++after:  $r23=0x0000000000000001, $r19=0x2dceda5cdc990d2e
++revb.2h $r29, $r30 ::
++before: $r29=0x75397084990a0745, $r30=0x0d4c83f5966c1c17
++after:  $r29=0x000000006c96171c, $r30=0x0d4c83f5966c1c17
++revb.2h $r17, $r23 ::
++before: $r17=0xecfbee2a69bbe344, $r23=0x5a42dc5dc5705f68
++after:  $r17=0x0000000070c5685f, $r23=0x5a42dc5dc5705f68
++revb.2h $r6, $r14 ::
++before: $r6=0xbfeffdbd68845522, $r14=0x3490af5b50fd56bf
++after:  $r6=0xfffffffffd50bf56, $r14=0x3490af5b50fd56bf
++revb.2h $r13, $r6 ::
++before: $r13=0x58e1821d319a1598, $r6=0x4c6711d021a72be6
++after:  $r13=0xffffffffa721e62b, $r6=0x4c6711d021a72be6
++revb.2h $r18, $r8 ::
++before: $r18=0x6e14994d4e16ff86, $r8=0x9fda01513ab5ceb8
++after:  $r18=0xffffffffb53ab8ce, $r8=0x9fda01513ab5ceb8
++revb.2h $r7, $r30 ::
++before: $r7=0x9979d3a3fcfc9323, $r30=0x504c708535bc136f
++after:  $r7=0xffffffffbc356f13, $r30=0x504c708535bc136f
++revb.2h $r28, $r19 ::
++before: $r28=0x9daf4aa3a33eec5f, $r19=0xaa376fc54f4be6f5
++after:  $r28=0x000000004b4ff5e6, $r19=0xaa376fc54f4be6f5
++revb.2h $r30, $r8 ::
++before: $r30=0x2e0bba43ec83e59e, $r8=0xaee8b8acd436f6da
++after:  $r30=0x0000000036d4daf6, $r8=0xaee8b8acd436f6da
++revb.2h $r14, $r7 ::
++before: $r14=0x9634787c9be10863, $r7=0xe9da521d42716c0a
++after:  $r14=0x0000000071420a6c, $r7=0xe9da521d42716c0a
++revb.2h $r23, $r14 ::
++before: $r23=0x687b89225667081a, $r14=0x9089e36a4f12f9c6
++after:  $r23=0x00000000124fc6f9, $r14=0x9089e36a4f12f9c6
++revb.4h $r4, $r25 ::
++before: $r4=0xc42859bd06b669d2, $r25=0x782e4ae6ab812191
++after:  $r4=0x2e78e64a81ab9121, $r25=0x782e4ae6ab812191
++revb.4h $r18, $r19 ::
++before: $r18=0x45ca4499d789fe5b, $r19=0x6e558c98b95d346d
++after:  $r18=0x556e988c5db96d34, $r19=0x6e558c98b95d346d
++revb.4h $r24, $r10 ::
++before: $r24=0x2d04871fd753c43f, $r10=0xbeab033e2b5a979e
++after:  $r24=0xabbe3e035a2b9e97, $r10=0xbeab033e2b5a979e
++revb.4h $r24, $r8 ::
++before: $r24=0xbc4deb39fb2ffe2e, $r8=0x5e3e50b8025e77f3
++after:  $r24=0x3e5eb8505e02f377, $r8=0x5e3e50b8025e77f3
++revb.4h $r7, $r14 ::
++before: $r7=0xf44a6ea6f42e0918, $r14=0x9f617a848e4ad8f2
++after:  $r7=0x619f847a4a8ef2d8, $r14=0x9f617a848e4ad8f2
++revb.4h $r13, $r12 ::
++before: $r13=0xda815ff8648e92b9, $r12=0xa401e74c4dd88e12
++after:  $r13=0x01a44ce7d84d128e, $r12=0xa401e74c4dd88e12
++revb.4h $r31, $r19 ::
++before: $r31=0x7964d861d2ecb8d5, $r19=0xe402e87f73fb4c68
++after:  $r31=0x02e47fe8fb73684c, $r19=0xe402e87f73fb4c68
++revb.4h $r29, $r25 ::
++before: $r29=0x6beff3fa6167cdcc, $r25=0x11e350b71aee0229
++after:  $r29=0xe311b750ee1a2902, $r25=0x11e350b71aee0229
++revb.4h $r4, $r8 ::
++before: $r4=0x357a56e8ae275376, $r8=0xdf8ebc175f4be7e3
++after:  $r4=0x8edf17bc4b5fe3e7, $r8=0xdf8ebc175f4be7e3
++revb.4h $r15, $r27 ::
++before: $r15=0xeb11b29acfe397d6, $r27=0x42d231083cd97aa0
++after:  $r15=0xd2420831d93ca07a, $r27=0x42d231083cd97aa0
++revb.2w $r27, $r31 ::
++before: $r27=0x0978f867dd7f0cb8, $r31=0x19eec2d357cd6a06
++after:  $r27=0xd3c2ee19066acd57, $r31=0x19eec2d357cd6a06
++revb.2w $r10, $r10 ::
++before: $r10=0x7897a40c4fda96d5, $r10=0xcb849783a18de892
++after:  $r10=0x839784cb92e88da1, $r10=0x839784cb92e88da1
++revb.2w $r23, $r14 ::
++before: $r23=0x018338c734be53a1, $r14=0x6258664ec1bb96b8
++after:  $r23=0x4e665862b896bbc1, $r14=0x6258664ec1bb96b8
++revb.2w $r12, $r19 ::
++before: $r12=0x7417ec4fef3451cc, $r19=0x216ad32ee149542b
++after:  $r12=0x2ed36a212b5449e1, $r19=0x216ad32ee149542b
++revb.2w $r31, $r30 ::
++before: $r31=0x8132835b9905b650, $r30=0x6fac007fbefdecf2
++after:  $r31=0x7f00ac6ff2ecfdbe, $r30=0x6fac007fbefdecf2
++revb.2w $r25, $r10 ::
++before: $r25=0x7336ebe375c83bed, $r10=0x643f76ac3010a6bb
++after:  $r25=0xac763f64bba61030, $r10=0x643f76ac3010a6bb
++revb.2w $r31, $r29 ::
++before: $r31=0x5d99f79f18e805b8, $r29=0xe65e70ca4cf299fa
++after:  $r31=0xca705ee6fa99f24c, $r29=0xe65e70ca4cf299fa
++revb.2w $r30, $r19 ::
++before: $r30=0xec10dd6d7249c5fa, $r19=0x3f6bb22d66caf299
++after:  $r30=0x2db26b3f99f2ca66, $r19=0x3f6bb22d66caf299
++revb.2w $r6, $r30 ::
++before: $r6=0x2c394783817c0870, $r30=0xd823cff07efd78db
++after:  $r6=0xf0cf23d8db78fd7e, $r30=0xd823cff07efd78db
++revb.2w $r4, $r15 ::
++before: $r4=0xc5acf61f075cd4e4, $r15=0xc154dd7479b90c6c
++after:  $r4=0x74dd54c16c0cb979, $r15=0xc154dd7479b90c6c
++revb.d $r6, $r23 ::
++before: $r6=0xe6e05a0dafda37ce, $r23=0x2ac7d047f197f6fb
++after:  $r6=0xfbf697f147d0c72a, $r23=0x2ac7d047f197f6fb
++revb.d $r19, $r4 ::
++before: $r19=0xc07a757bea6011ff, $r4=0xcef6cef3e0f941ff
++after:  $r19=0xff41f9e0f3cef6ce, $r4=0xcef6cef3e0f941ff
++revb.d $r6, $r15 ::
++before: $r6=0x711bb31e18fcb2f3, $r15=0x522068042cf5be1a
++after:  $r6=0x1abef52c04682052, $r15=0x522068042cf5be1a
++revb.d $r9, $r7 ::
++before: $r9=0xf9654c655c67392e, $r7=0xa1b065742110e3f4
++after:  $r9=0xf4e310217465b0a1, $r7=0xa1b065742110e3f4
++revb.d $r29, $r4 ::
++before: $r29=0x70c0dcad23609060, $r4=0x5d04b7b2ece6f6bb
++after:  $r29=0xbbf6e6ecb2b7045d, $r4=0x5d04b7b2ece6f6bb
++revb.d $r15, $r4 ::
++before: $r15=0x809930516f3136eb, $r4=0xda33327a8d42ef55
++after:  $r15=0x55ef428d7a3233da, $r4=0xda33327a8d42ef55
++revb.d $r10, $r4 ::
++before: $r10=0x1a7ee04b354f6af5, $r4=0xcda6c6943e46fed7
++after:  $r10=0xd7fe463e94c6a6cd, $r4=0xcda6c6943e46fed7
++revb.d $r20, $r4 ::
++before: $r20=0x315f95452d748459, $r4=0xa001e934745758e0
++after:  $r20=0xe058577434e901a0, $r4=0xa001e934745758e0
++revb.d $r6, $r8 ::
++before: $r6=0xabbd06000374627a, $r8=0x85441006689de89b
++after:  $r6=0x9be89d6806104485, $r8=0x85441006689de89b
++revb.d $r27, $r24 ::
++before: $r27=0x2d404e69f54afa48, $r24=0x46f47b822772f3cd
++after:  $r27=0xcdf37227827bf446, $r24=0x46f47b822772f3cd
++revh.2w $r6, $r15 ::
++before: $r6=0x5b764c7bfb1999eb, $r15=0x86603fc3f96843ed
++after:  $r6=0x3fc3866043edf968, $r15=0x86603fc3f96843ed
++revh.2w $r19, $r10 ::
++before: $r19=0xf39f8e6b43dd63ce, $r10=0x141d294d06276941
++after:  $r19=0x294d141d69410627, $r10=0x141d294d06276941
++revh.2w $r5, $r20 ::
++before: $r5=0x3ff54e5c35d83e69, $r20=0xd677d6a21384278a
++after:  $r5=0xd6a2d677278a1384, $r20=0xd677d6a21384278a
++revh.2w $r4, $r31 ::
++before: $r4=0xce463b02a2f840cc, $r31=0x6f87c9636f9cfca6
++after:  $r4=0xc9636f87fca66f9c, $r31=0x6f87c9636f9cfca6
++revh.2w $r19, $r26 ::
++before: $r19=0x34abc96ddde64e27, $r26=0x723ec7ce92720502
++after:  $r19=0xc7ce723e05029272, $r26=0x723ec7ce92720502
++revh.2w $r8, $r18 ::
++before: $r8=0x1454a1ee8739c235, $r18=0xd890efa373a6dfb0
++after:  $r8=0xefa3d890dfb073a6, $r18=0xd890efa373a6dfb0
++revh.2w $r12, $r31 ::
++before: $r12=0xf0c8b856751cae70, $r31=0xb675dff2568e6ebf
++after:  $r12=0xdff2b6756ebf568e, $r31=0xb675dff2568e6ebf
++revh.2w $r24, $r9 ::
++before: $r24=0xb36984e3a7a3eaea, $r9=0xa169cfa9f35f6a8a
++after:  $r24=0xcfa9a1696a8af35f, $r9=0xa169cfa9f35f6a8a
++revh.2w $r25, $r27 ::
++before: $r25=0x640b3e6b41180473, $r27=0x9bc307f0a2ef368f
++after:  $r25=0x07f09bc3368fa2ef, $r27=0x9bc307f0a2ef368f
++revh.2w $r7, $r9 ::
++before: $r7=0x897e1406a0eb2dc9, $r9=0x1921bcf657fecdcc
++after:  $r7=0xbcf61921cdcc57fe, $r9=0x1921bcf657fecdcc
++revh.d $r14, $r25 ::
++before: $r14=0xec3573411ea025e5, $r25=0x6976d4371b08f1ab
++after:  $r14=0xf1ab1b08d4376976, $r25=0x6976d4371b08f1ab
++revh.d $r24, $r31 ::
++before: $r24=0x9ef9e5cb1375d42a, $r31=0x9ce130c8a579e11d
++after:  $r24=0xe11da57930c89ce1, $r31=0x9ce130c8a579e11d
++revh.d $r9, $r28 ::
++before: $r9=0x3c8cd0055a5e7031, $r28=0xf05f9381753ded16
++after:  $r9=0xed16753d9381f05f, $r28=0xf05f9381753ded16
++revh.d $r24, $r26 ::
++before: $r24=0x6a4e5797f19041f6, $r26=0xd26a5ae65e21041c
++after:  $r24=0x041c5e215ae6d26a, $r26=0xd26a5ae65e21041c
++revh.d $r14, $r24 ::
++before: $r14=0xe2cb9a83aee22d97, $r24=0x6405d71e0bb63321
++after:  $r14=0x33210bb6d71e6405, $r24=0x6405d71e0bb63321
++revh.d $r19, $r23 ::
++before: $r19=0x91cdf3bcd9afe76d, $r23=0x171953826107396a
++after:  $r19=0x396a610753821719, $r23=0x171953826107396a
++revh.d $r23, $r14 ::
++before: $r23=0x93ed49255d084e12, $r14=0x374bd76990198b43
++after:  $r23=0x8b439019d769374b, $r14=0x374bd76990198b43
++revh.d $r31, $r12 ::
++before: $r31=0x08e54a908f04882b, $r12=0xf7e8756491b9d346
++after:  $r31=0xd34691b97564f7e8, $r12=0xf7e8756491b9d346
++revh.d $r31, $r20 ::
++before: $r31=0xbb7cd34502fdf01f, $r20=0x906b7289a6957d3f
++after:  $r31=0x7d3fa6957289906b, $r20=0x906b7289a6957d3f
++revh.d $r27, $r30 ::
++before: $r27=0xacbca1aacdd9dd3f, $r30=0x3072d9c69004d4b5
++after:  $r27=0xd4b59004d9c63072, $r30=0x3072d9c69004d4b5
++bitrev.4b $r23, $r19 ::
++before: $r23=0xb422f2854b491d92, $r19=0x7649084cec69098a
++after:  $r23=0x0000000037969051, $r19=0x7649084cec69098a
++bitrev.4b $r27, $r16 ::
++before: $r27=0xd14736328d74b448, $r16=0x1abee3a271c71db9
++after:  $r27=0xffffffff8ee3b89d, $r16=0x1abee3a271c71db9
++bitrev.4b $r15, $r23 ::
++before: $r15=0xf17c0f0ccfbb2c38, $r23=0x490107ff4155bd17
++after:  $r15=0xffffffff82aabde8, $r23=0x490107ff4155bd17
++bitrev.4b $r5, $r18 ::
++before: $r5=0x8408d6a30523619d, $r18=0x625d5aedf0add9fb
++after:  $r5=0x000000000fb59bdf, $r18=0x625d5aedf0add9fb
++bitrev.4b $r8, $r15 ::
++before: $r8=0xc41a2fdb60ba75a6, $r15=0xe2562eab3b333a00
++after:  $r8=0xffffffffdccc5c00, $r15=0xe2562eab3b333a00
++bitrev.4b $r17, $r18 ::
++before: $r17=0x6a409394f364c02a, $r18=0xea970d90edb343cc
++after:  $r17=0xffffffffb7cdc233, $r18=0xea970d90edb343cc
++bitrev.4b $r25, $r29 ::
++before: $r25=0xd8d1c9b8dcff266d, $r29=0xacca47ac7597ca65
++after:  $r25=0xffffffffaee953a6, $r29=0xacca47ac7597ca65
++bitrev.4b $r26, $r24 ::
++before: $r26=0xe2a0d11df8c5055b, $r24=0xc57559d03e3e216d
++after:  $r26=0x000000007c7c84b6, $r24=0xc57559d03e3e216d
++bitrev.4b $r8, $r27 ::
++before: $r8=0xb6a5815170d657f0, $r27=0x9f60901eefa1347a
++after:  $r8=0xfffffffff7852c5e, $r27=0x9f60901eefa1347a
++bitrev.4b $r20, $r16 ::
++before: $r20=0x432a2fbf2b073732, $r16=0x604b8d7ecb5e86dc
++after:  $r20=0xffffffffd37a613b, $r16=0x604b8d7ecb5e86dc
++bitrev.8b $r25, $r7 ::
++before: $r25=0x22b2e6007f742fd1, $r7=0xe8c23886def1bbc9
++after:  $r25=0x17431c617b8fdd93, $r7=0xe8c23886def1bbc9
++bitrev.8b $r28, $r30 ::
++before: $r28=0xf985d7779c5ca157, $r30=0x285cbdc0f47395d1
++after:  $r28=0x143abd032fcea98b, $r30=0x285cbdc0f47395d1
++bitrev.8b $r29, $r13 ::
++before: $r29=0xd9b8364a793bc50c, $r13=0xded35d7c7ba73d29
++after:  $r29=0x7bcbba3edee5bc94, $r13=0xded35d7c7ba73d29
++bitrev.8b $r12, $r28 ::
++before: $r12=0x18d7769bc1147dc5, $r28=0xfb6cda8c7f12313a
++after:  $r12=0xdf365b31fe488c5c, $r28=0xfb6cda8c7f12313a
++bitrev.8b $r23, $r6 ::
++before: $r23=0xeff84dc134b3acbe, $r6=0xee7c4e89e333eda8
++after:  $r23=0x773e7291c7ccb715, $r6=0xee7c4e89e333eda8
++bitrev.8b $r24, $r20 ::
++before: $r24=0xad65748f0bc46e9f, $r20=0xd0d88137a6284eac
++after:  $r24=0x0b1b81ec65147235, $r20=0xd0d88137a6284eac
++bitrev.8b $r10, $r5 ::
++before: $r10=0xe0e1c1e262352e89, $r5=0x9c43ebc4f7c65dc1
++after:  $r10=0x39c2d723ef63ba83, $r5=0x9c43ebc4f7c65dc1
++bitrev.8b $r27, $r13 ::
++before: $r27=0x444a53aa65d317dc, $r13=0x473eea7ea5691da7
++after:  $r27=0xe27c577ea596b8e5, $r13=0x473eea7ea5691da7
++bitrev.8b $r13, $r9 ::
++before: $r13=0xfc48d0fdf4c7a6e5, $r9=0x5dcad407df3401a5
++after:  $r13=0xba532be0fb2c80a5, $r9=0x5dcad407df3401a5
++bitrev.8b $r12, $r5 ::
++before: $r12=0x0ebef32fcbd91e9a, $r5=0xe1eeea527816355e
++after:  $r12=0x8777574a1e68ac7a, $r5=0xe1eeea527816355e
++bitrev.w $r18, $r15 ::
++before: $r18=0x02028b0c8691a767, $r15=0x5822df2950c9c2d3
++after:  $r18=0xffffffffcb43930a, $r15=0x5822df2950c9c2d3
++bitrev.w $r30, $r27 ::
++before: $r30=0x2a2d48209d9f377b, $r27=0xde9d59b836df41fc
++after:  $r30=0x000000003f82fb6c, $r27=0xde9d59b836df41fc
++bitrev.w $r17, $r4 ::
++before: $r17=0xe6fb8b07c90464e6, $r4=0x65976cb5c6c6a5b0
++after:  $r17=0x000000000da56363, $r4=0x65976cb5c6c6a5b0
++bitrev.w $r9, $r31 ::
++before: $r9=0x1b95159ec5c37644, $r31=0x62c549b741c2adad
++after:  $r9=0xffffffffb5b54382, $r31=0x62c549b741c2adad
++bitrev.w $r17, $r14 ::
++before: $r17=0x8b414dfa7156f0ce, $r14=0x9642d0186f420e7c
++after:  $r17=0x000000003e7042f6, $r14=0x9642d0186f420e7c
++bitrev.w $r15, $r8 ::
++before: $r15=0x2722ecb374b4d5e3, $r8=0xeaf151a286bbc4cf
++after:  $r15=0xfffffffff323dd61, $r8=0xeaf151a286bbc4cf
++bitrev.w $r27, $r19 ::
++before: $r27=0x058ec913c63634a5, $r19=0xe723c39df96a4fd2
++after:  $r27=0x000000004bf2569f, $r19=0xe723c39df96a4fd2
++bitrev.w $r7, $r26 ::
++before: $r7=0xa245e7dd80a324a2, $r26=0x0e7d6c2b2683291e
++after:  $r7=0x000000007894c164, $r26=0x0e7d6c2b2683291e
++bitrev.w $r31, $r6 ::
++before: $r31=0x114292ed02ba1255, $r6=0x13cd62afac5ac3d4
++after:  $r31=0x000000002bc35a35, $r6=0x13cd62afac5ac3d4
++bitrev.w $r7, $r25 ::
++before: $r7=0xbd46d88fc8d2933b, $r25=0x69ce9ccb487dadd1
++after:  $r7=0xffffffff8bb5be12, $r25=0x69ce9ccb487dadd1
++bitrev.d $r4, $r29 ::
++before: $r4=0xeaacaeb60b227eab, $r29=0x799f36da44887e2c
++after:  $r4=0x347e11225b6cf99e, $r29=0x799f36da44887e2c
++bitrev.d $r29, $r6 ::
++before: $r29=0xcfbb055ab1ebf7fa, $r6=0x2924f63fec744b02
++after:  $r29=0x40d22e37fc6f2494, $r6=0x2924f63fec744b02
++bitrev.d $r28, $r31 ::
++before: $r28=0xaac74a398d76900d, $r31=0xf6c75e45e33b4cb7
++after:  $r28=0xed32dcc7a27ae36f, $r31=0xf6c75e45e33b4cb7
++bitrev.d $r24, $r12 ::
++before: $r24=0xfc8bc33fb4a8d023, $r12=0xcccd98e9d53aa26a
++after:  $r24=0x56455cab9719b333, $r12=0xcccd98e9d53aa26a
++bitrev.d $r8, $r7 ::
++before: $r8=0x7502cd68289f4c3a, $r7=0x746ddfd3c3a512b1
++after:  $r8=0x8d48a5c3cbfbb62e, $r7=0x746ddfd3c3a512b1
++bitrev.d $r6, $r16 ::
++before: $r6=0xe8b94bfe615774ae, $r16=0x518770bbee53d619
++after:  $r6=0x986bca77dd0ee18a, $r16=0x518770bbee53d619
++bitrev.d $r24, $r4 ::
++before: $r24=0x6318c17dbae816c3, $r4=0x9ab684e129b57f07
++after:  $r24=0xe0fead9487216d59, $r4=0x9ab684e129b57f07
++bitrev.d $r27, $r23 ::
++before: $r27=0x8a22909b005a86b8, $r23=0x69337e8c3b1fc2bb
++after:  $r27=0xdd43f8dc317ecc96, $r23=0x69337e8c3b1fc2bb
++bitrev.d $r20, $r9 ::
++before: $r20=0x009f43885d40caf0, $r9=0x193cbf609dbc33d4
++after:  $r20=0x2bcc3db906fd3c98, $r9=0x193cbf609dbc33d4
++bitrev.d $r30, $r19 ::
++before: $r30=0x30fa02e0fc390ac9, $r19=0x21686c931c6260da
++after:  $r30=0x5b064638c9361684, $r19=0x21686c931c6260da
++bytepick.w $r26, $r15, $r19, 1 ::
++before: $r26=0x1b0b980dd3271273, $r15=0x8737ca6c8106ceee, $r19=0x02807e0dcb47d6ef
++after:  $r26=0x0000000047d6ef81, $r15=0x8737ca6c8106ceee, $r19=0x02807e0dcb47d6ef
++bytepick.w $r15, $r17, $r7, 0 ::
++before: $r15=0x3d2e3fbcbd032001, $r17=0x5eced8cf3da8b205, $r7=0xb8155b41321e09c0
++after:  $r15=0x00000000321e09c0, $r17=0x5eced8cf3da8b205, $r7=0xb8155b41321e09c0
++bytepick.w $r12, $r15, $r17, 3 ::
++before: $r12=0x2670c80f12a87520, $r15=0x29ab42125e3ea5c8, $r17=0x32a39ac435460f2f
++after:  $r12=0x000000002f5e3ea5, $r15=0x29ab42125e3ea5c8, $r17=0x32a39ac435460f2f
++bytepick.w $r4, $r20, $r18, 3 ::
++before: $r4=0x5a64271926277c04, $r20=0xcbde225cc736e5d5, $r18=0x18abacc874db47e9
++after:  $r4=0xffffffffe9c736e5, $r20=0xcbde225cc736e5d5, $r18=0x18abacc874db47e9
++bytepick.w $r8, $r5, $r24, 3 ::
++before: $r8=0xdb41606ce3f9df94, $r5=0xc3f6ce370d754a3f, $r24=0x34ad5a423a5c42e3
++after:  $r8=0xffffffffe30d754a, $r5=0xc3f6ce370d754a3f, $r24=0x34ad5a423a5c42e3
++bytepick.w $r5, $r30, $r14, 2 ::
++before: $r5=0xedb3aad221050d0b, $r30=0x46f5823389f2581a, $r14=0xf766f1e75349809e
++after:  $r5=0xffffffff809e89f2, $r30=0x46f5823389f2581a, $r14=0xf766f1e75349809e
++bytepick.w $r4, $r19, $r18, 0 ::
++before: $r4=0xf92ed0231f25c991, $r19=0xba59df0352ed6b3e, $r18=0x58d6fbce4e4325e8
++after:  $r4=0x000000004e4325e8, $r19=0xba59df0352ed6b3e, $r18=0x58d6fbce4e4325e8
++bytepick.w $r18, $r28, $r24, 3 ::
++before: $r18=0x177dcaf8fcd30180, $r28=0xbdc04b3b8f707462, $r24=0x6102168606deb3ed
++after:  $r18=0xffffffffed8f7074, $r28=0xbdc04b3b8f707462, $r24=0x6102168606deb3ed
++bytepick.w $r13, $r27, $r29, 2 ::
++before: $r13=0x383d82c5d717259b, $r27=0x495e30e5e680d7fc, $r29=0x1c17f315ebb3bec3
++after:  $r13=0xffffffffbec3e680, $r27=0x495e30e5e680d7fc, $r29=0x1c17f315ebb3bec3
++bytepick.w $r5, $r29, $r4, 1 ::
++before: $r5=0x26a0fb212ab80a3a, $r29=0x78b167aecd81f869, $r4=0x6daab499f228fef4
++after:  $r5=0x0000000028fef4cd, $r29=0x78b167aecd81f869, $r4=0x6daab499f228fef4
++bytepick.d $r28, $r4, $r28, 7 ::
++before: $r28=0x794fa22d52f7e834, $r4=0x2f084db071d3bcce, $r28=0xa0cf51d7020f10c1
++after:  $r28=0xc12f084db071d3bc, $r4=0x2f084db071d3bcce, $r28=0xc12f084db071d3bc
++bytepick.d $r10, $r18, $r4, 2 ::
++before: $r10=0x9fd7a6b378604833, $r18=0x37da15f8a7154cab, $r4=0xaedd64328d27a0a8
++after:  $r10=0x64328d27a0a837da, $r18=0x37da15f8a7154cab, $r4=0xaedd64328d27a0a8
++bytepick.d $r7, $r6, $r24, 3 ::
++before: $r7=0xdee49920d429d3c2, $r6=0x15e3f61f2f82a2d1, $r24=0xdeba03c7761e4678
++after:  $r7=0xc7761e467815e3f6, $r6=0x15e3f61f2f82a2d1, $r24=0xdeba03c7761e4678
++bytepick.d $r19, $r16, $r5, 4 ::
++before: $r19=0x53bda4d18e61fc44, $r16=0xc79bd94439006673, $r5=0xa8024ab452a2bd52
++after:  $r19=0x52a2bd52c79bd944, $r16=0xc79bd94439006673, $r5=0xa8024ab452a2bd52
++bytepick.d $r26, $r19, $r25, 7 ::
++before: $r26=0xc8aae5136d925592, $r19=0xea109dd2837d3acf, $r25=0x30e93a75e695666a
++after:  $r26=0x6aea109dd2837d3a, $r19=0xea109dd2837d3acf, $r25=0x30e93a75e695666a
++bytepick.d $r8, $r14, $r8, 2 ::
++before: $r8=0xa03db273c845b37f, $r14=0xa7fd0053a136769f, $r8=0x6ab932903229b035
++after:  $r8=0x32903229b035a7fd, $r14=0xa7fd0053a136769f, $r8=0x32903229b035a7fd
++bytepick.d $r9, $r14, $r23, 2 ::
++before: $r9=0x2f160a0d147b300f, $r14=0xdae9d5d15bb8f5b5, $r23=0xc4fdfbb29d49dfe4
++after:  $r9=0xfbb29d49dfe4dae9, $r14=0xdae9d5d15bb8f5b5, $r23=0xc4fdfbb29d49dfe4
++bytepick.d $r20, $r18, $r15, 5 ::
++before: $r20=0x30cefdebc30b841a, $r18=0xbfd016fb0312277c, $r15=0x44269b95d496912f
++after:  $r20=0x96912fbfd016fb03, $r18=0xbfd016fb0312277c, $r15=0x44269b95d496912f
++bytepick.d $r12, $r17, $r5, 2 ::
++before: $r12=0xde32bc5d3471eed2, $r17=0xdb807610c6e762e4, $r5=0xb2148e34e649d1b8
++after:  $r12=0x8e34e649d1b8db80, $r17=0xdb807610c6e762e4, $r5=0xb2148e34e649d1b8
++bytepick.d $r5, $r24, $r28, 3 ::
++before: $r5=0x9ab1be6a0faa61a8, $r24=0x97d4a12579967739, $r28=0xaa592ef1fd606bad
++after:  $r5=0xf1fd606bad97d4a1, $r24=0x97d4a12579967739, $r28=0xaa592ef1fd606bad
++maskeqz $r14, $r28, $r25 ::
++before: $r14=0xc263b6b8f3404c8d, $r28=0x90ef733c88c88866, $r25=0xd256888d94e8d21a
++after:  $r14=0x90ef733c88c88866, $r28=0x90ef733c88c88866, $r25=0xd256888d94e8d21a
++maskeqz $r13, $r9, $r15 ::
++before: $r13=0x5bdd86b962c61db4, $r9=0x8a78f7b88a728d92, $r15=0x69e707acb2c26a83
++after:  $r13=0x8a78f7b88a728d92, $r9=0x8a78f7b88a728d92, $r15=0x69e707acb2c26a83
++maskeqz $r7, $r7, $r13 ::
++before: $r7=0xea86abdbdea660cb, $r7=0xfb778deef0a5b893, $r13=0xad10e23c971d1a9f
++after:  $r7=0xfb778deef0a5b893, $r7=0xfb778deef0a5b893, $r13=0xad10e23c971d1a9f
++maskeqz $r8, $r7, $r19 ::
++before: $r8=0xf64df33b6146939f, $r7=0xe7376d3da44f4dfd, $r19=0x7987e122af2505ab
++after:  $r8=0xe7376d3da44f4dfd, $r7=0xe7376d3da44f4dfd, $r19=0x7987e122af2505ab
++maskeqz $r10, $r27, $r29 ::
++before: $r10=0x404a261c069b488b, $r27=0x81886c523ec2658c, $r29=0x3236dc83d0a27cc1
++after:  $r10=0x81886c523ec2658c, $r27=0x81886c523ec2658c, $r29=0x3236dc83d0a27cc1
++maskeqz $r23, $r16, $r25 ::
++before: $r23=0x8671050519b7bda0, $r16=0x26fa2567b106d73a, $r25=0x0d884011e0d767fe
++after:  $r23=0x26fa2567b106d73a, $r16=0x26fa2567b106d73a, $r25=0x0d884011e0d767fe
++maskeqz $r5, $r19, $r18 ::
++before: $r5=0xbd8d4cef53122132, $r19=0x4976c047c57ec148, $r18=0x602312f372049a5e
++after:  $r5=0x4976c047c57ec148, $r19=0x4976c047c57ec148, $r18=0x602312f372049a5e
++maskeqz $r29, $r24, $r23 ::
++before: $r29=0x07f390b695d8b12e, $r24=0x70043e7666a24a34, $r23=0xfee8f8f90ab3ac9b
++after:  $r29=0x70043e7666a24a34, $r24=0x70043e7666a24a34, $r23=0xfee8f8f90ab3ac9b
++maskeqz $r25, $r4, $r18 ::
++before: $r25=0x07eaffcb6dac1b5b, $r4=0x4b12f8c6738216a2, $r18=0x409acb80b7391511
++after:  $r25=0x4b12f8c6738216a2, $r4=0x4b12f8c6738216a2, $r18=0x409acb80b7391511
++maskeqz $r30, $r6, $r24 ::
++before: $r30=0x14d829636b628dc9, $r6=0xdb88a366a2271c2c, $r24=0x0ea0d5998835940a
++after:  $r30=0xdb88a366a2271c2c, $r6=0xdb88a366a2271c2c, $r24=0x0ea0d5998835940a
++masknez $r14, $r24, $r5 ::
++before: $r14=0x46b15bbb9507bd79, $r24=0xc92af628c880a454, $r5=0x846a586db0af0965
++after:  $r14=000000000000000000, $r24=0xc92af628c880a454, $r5=0x846a586db0af0965
++masknez $r30, $r8, $r8 ::
++before: $r30=0x43cd20b5234db4e8, $r8=0x7aeee6ab6b10561f, $r8=0x45ab4fdb4ca8b325
++after:  $r30=000000000000000000, $r8=0x45ab4fdb4ca8b325, $r8=0x45ab4fdb4ca8b325
++masknez $r24, $r19, $r15 ::
++before: $r24=0xd3d50bbb34b528e2, $r19=0xdd71746b0beedae3, $r15=0xa34d82fc50174094
++after:  $r24=000000000000000000, $r19=0xdd71746b0beedae3, $r15=0xa34d82fc50174094
++masknez $r29, $r26, $r26 ::
++before: $r29=0x576cb2da15b1462d, $r26=0x6c669f0195b50b7a, $r26=0xec1609ef36aa938f
++after:  $r29=000000000000000000, $r26=0xec1609ef36aa938f, $r26=0xec1609ef36aa938f
++masknez $r4, $r29, $r10 ::
++before: $r4=0xaa220f67a02617db, $r29=0x0ffcd18e3016e10f, $r10=0x4cf9bdd8dca7f88f
++after:  $r4=000000000000000000, $r29=0x0ffcd18e3016e10f, $r10=0x4cf9bdd8dca7f88f
++masknez $r23, $r9, $r29 ::
++before: $r23=0x774e1c840428fbde, $r9=0x391268694388d2a7, $r29=0xf06192a4e5780c53
++after:  $r23=000000000000000000, $r9=0x391268694388d2a7, $r29=0xf06192a4e5780c53
++masknez $r7, $r25, $r28 ::
++before: $r7=0x7b75099f16135faa, $r25=0xf95af681c18bf31c, $r28=0x2f6122581dfdef74
++after:  $r7=000000000000000000, $r25=0xf95af681c18bf31c, $r28=0x2f6122581dfdef74
++masknez $r26, $r10, $r16 ::
++before: $r26=0xe6006c9bd6bae204, $r10=0x7e84e5db1181249d, $r16=0x6ab2371059cdc875
++after:  $r26=000000000000000000, $r10=0x7e84e5db1181249d, $r16=0x6ab2371059cdc875
++masknez $r26, $r15, $r28 ::
++before: $r26=0xb4c9c784ef74245f, $r15=0x20cc1c4c169ca02c, $r28=0x606eeb8ce6278d16
++after:  $r26=000000000000000000, $r15=0x20cc1c4c169ca02c, $r28=0x606eeb8ce6278d16
++masknez $r19, $r16, $r16 ::
++before: $r19=0x75a721553f7c7054, $r16=0x7b63b7b7b3f5bd5f, $r16=0xf8c7933e92e155ee
++after:  $r19=000000000000000000, $r16=0xf8c7933e92e155ee, $r16=0xf8c7933e92e155ee
++bstrins.w $r27, $r16, 31, 8 ::
++before: $r27=0x431055863e78b187, $r16=0xe18dda9620a50e9d
++after:  $r27=0xffffffffa50e9d87, $r16=0xe18dda9620a50e9d
++bstrins.w $r26, $r27, 30, 27 ::
++before: $r26=0x19f800eab7e1ab51, $r27=0x61e7d86005d21d29
++after:  $r26=0xffffffffcfe1ab51, $r27=0x61e7d86005d21d29
++bstrins.w $r15, $r4, 17, 14 ::
++before: $r15=0xb141d462e777528d, $r4=0xb7aebff9bcca1643
++after:  $r15=0xffffffffe774d28d, $r4=0xb7aebff9bcca1643
++bstrins.w $r30, $r17, 24, 6 ::
++before: $r30=0xfac48083375844fe, $r17=0x6d3283ba14cc27eb
++after:  $r30=0x000000003709fafe, $r17=0x6d3283ba14cc27eb
++bstrins.w $r12, $r12, 30, 25 ::
++before: $r12=0x9b7629774f19f64a, $r12=0x84ee8d65b2842686
++after:  $r12=0xffffffff8c842686, $r12=0xffffffff8c842686
++bstrins.w $r15, $r10, 26, 1 ::
++before: $r15=0x290172844863090f, $r10=0x85ea298976069fcd
++after:  $r15=0x000000004c0d3f9b, $r10=0x85ea298976069fcd
++bstrins.w $r10, $r13, 23, 8 ::
++before: $r10=0x66942ba1c15e85aa, $r13=0xddb2dfa7474a4370
++after:  $r10=0xffffffffc14370aa, $r13=0xddb2dfa7474a4370
++bstrins.w $r5, $r20, 18, 16 ::
++before: $r5=0x3dcfecca80bf0d79, $r20=0x5044b246f2d3f890
++after:  $r5=0xffffffff80b80d79, $r20=0x5044b246f2d3f890
++bstrins.w $r23, $r5, 25, 21 ::
++before: $r23=0xa11723142f1472a7, $r5=0xcbaaa9a23d119663
++after:  $r23=0x000000002c7472a7, $r5=0xcbaaa9a23d119663
++bstrins.w $r20, $r31, 13, 12 ::
++before: $r20=0x6a1110240ba884b8, $r31=0x45cadf0ffe08cc25
++after:  $r20=0x000000000ba894b8, $r31=0x45cadf0ffe08cc25
++bstrpick.w $r5, $r23, 23, 11 ::
++before: $r5=0x6885eaa89f691954, $r23=0x94f8458597294f2e
++after:  $r5=0x0000000000000529, $r23=0x94f8458597294f2e
++bstrpick.w $r25, $r8, 18, 11 ::
++before: $r25=0x11be9b9923ebee96, $r8=0x23deda120a49df15
++after:  $r25=0x000000000000003b, $r8=0x23deda120a49df15
++bstrpick.w $r6, $r6, 10, 3 ::
++before: $r6=0x3546d655181289bc, $r6=0x7ee84a41c952b690
++after:  $r6=0x00000000000000d2, $r6=0x00000000000000d2
++bstrpick.w $r25, $r5, 15, 9 ::
++before: $r25=0xb2eec884ea77f548, $r5=0x23992bc40919416f
++after:  $r25=0x0000000000000020, $r5=0x23992bc40919416f
++bstrpick.w $r26, $r14, 21, 8 ::
++before: $r26=0x8e591161730ac582, $r14=0xf45f4435cc1cb138
++after:  $r26=0x0000000000001cb1, $r14=0xf45f4435cc1cb138
++bstrpick.w $r9, $r14, 7, 3 ::
++before: $r9=0x1ac92d930e8361f9, $r14=0xcc11dd56e96c6256
++after:  $r9=0x000000000000000a, $r14=0xcc11dd56e96c6256
++bstrpick.w $r19, $r9, 8, 8 ::
++before: $r19=0xd15fd80fafe60a58, $r9=0xb1426a8c680d628c
++after:  $r19=000000000000000000, $r9=0xb1426a8c680d628c
++bstrpick.w $r17, $r13, 30, 12 ::
++before: $r17=0xfa48c3cd091d2b5e, $r13=0x3a2827a58a014a72
++after:  $r17=0x000000000000a014, $r13=0x3a2827a58a014a72
++bstrpick.w $r6, $r31, 16, 7 ::
++before: $r6=0xca10a858ebfa78a1, $r31=0x202a38722f270884
++after:  $r6=0x0000000000000211, $r31=0x202a38722f270884
++bstrpick.w $r20, $r10, 31, 15 ::
++before: $r20=0xc010deb269ae6ba2, $r10=0x98f1d297734f9f4c
++after:  $r20=0x000000000000e69f, $r10=0x98f1d297734f9f4c
++bstrins.d $r29, $r17, 60, 25 ::
++before: $r29=0x7cf4a9ec79307e59, $r17=0xb1b5afc00eef90a3
++after:  $r29=0x601ddf2147307e59, $r17=0xb1b5afc00eef90a3
++bstrins.d $r10, $r27, 31, 22 ::
++before: $r10=0xc708602dee32579f, $r27=0x199d90a711e94375
++after:  $r10=0xc708602ddd72579f, $r27=0x199d90a711e94375
++bstrins.d $r4, $r24, 58, 58 ::
++before: $r4=0x4e5ce98e217a4b59, $r24=0xaf25b5661daefdea
++after:  $r4=0x4a5ce98e217a4b59, $r24=0xaf25b5661daefdea
++bstrins.d $r12, $r30, 16, 6 ::
++before: $r12=0x9505d862c56b1708, $r30=0x7f3f0c983ce27863
++after:  $r12=0x9505d862c56a18c8, $r30=0x7f3f0c983ce27863
++bstrins.d $r29, $r5, 43, 0 ::
++before: $r29=0x248f295ef3afe5aa, $r5=0x9469277db61227b7
++after:  $r29=0x248f277db61227b7, $r5=0x9469277db61227b7
++bstrins.d $r31, $r31, 49, 23 ::
++before: $r31=0xbc5f0c47c3a63a94, $r31=0x4aacc1c77ad0c09a
++after:  $r31=0x4aad68604d50c09a, $r31=0x4aad68604d50c09a
++bstrins.d $r6, $r24, 12, 2 ::
++before: $r6=0x79110235b8c34188, $r24=0x75e3e311aef2bef9
++after:  $r6=0x79110235b8c35be4, $r24=0x75e3e311aef2bef9
++bstrins.d $r6, $r16, 43, 13 ::
++before: $r6=0xaa6e63ffd80b76c5, $r16=0xb1ea7dcb3af0881d
++after:  $r6=0xaa6e675e1103b6c5, $r16=0xb1ea7dcb3af0881d
++bstrins.d $r15, $r25, 53, 29 ::
++before: $r15=0x5b68a802f26a1804, $r25=0xb4f651115b84591b
++after:  $r15=0x5b708b23726a1804, $r25=0xb4f651115b84591b
++bstrins.d $r9, $r9, 61, 40 ::
++before: $r9=0x3394218c965d5f1a, $r9=0xf3d30b5d4d4089b4
++after:  $r9=0xc089b45d4d4089b4, $r9=0xc089b45d4d4089b4
++bstrpick.d $r27, $r27, 63, 33 ::
++before: $r27=0x503c8fae2d6d7b58, $r27=0x9fd9869ca812de0c
++after:  $r27=0x000000004fecc34e, $r27=0x000000004fecc34e
++bstrpick.d $r14, $r5, 52, 40 ::
++before: $r14=0x65f05eaa5e13856a, $r5=0xd52c72fbeccc39f5
++after:  $r14=0x0000000000000c72, $r5=0xd52c72fbeccc39f5
++bstrpick.d $r13, $r20, 48, 14 ::
++before: $r13=0x9cea777df4d2eae0, $r20=0x6326727a36499800
++after:  $r13=0x00000001c9e8d926, $r20=0x6326727a36499800
++bstrpick.d $r10, $r17, 43, 20 ::
++before: $r10=0xf30a073a4a56604b, $r17=0x0c12d112f6a0c8f1
++after:  $r10=0x0000000000112f6a, $r17=0x0c12d112f6a0c8f1
++bstrpick.d $r13, $r25, 55, 37 ::
++before: $r13=0xe559d975e0d9ac85, $r25=0xcf41f30cc4a46713
++after:  $r13=0x0000000000020f98, $r25=0xcf41f30cc4a46713
++bstrpick.d $r29, $r4, 34, 20 ::
++before: $r29=0x41843db6c2a206cb, $r4=0x343f795d45fcff8c
++after:  $r29=0x000000000000545f, $r4=0x343f795d45fcff8c
++bstrpick.d $r27, $r28, 27, 10 ::
++before: $r27=0xb359821297377fee, $r28=0x4fc51c5773e64f69
++after:  $r27=0x000000000000f993, $r28=0x4fc51c5773e64f69
++bstrpick.d $r24, $r24, 63, 20 ::
++before: $r24=0xed3cb5d1e8f0e55e, $r24=0x9cdbb70a8b8d3945
++after:  $r24=0x000009cdbb70a8b8, $r24=0x000009cdbb70a8b8
++bstrpick.d $r7, $r30, 34, 30 ::
++before: $r7=0x11b7344343be1ccf, $r30=0xa3422c671803480f
++after:  $r7=0x000000000000001c, $r30=0xa3422c671803480f
++bstrpick.d $r15, $r4, 55, 4 ::
++before: $r15=0x3670c6b869f28085, $r4=0x2caa9d9c1351e402
++after:  $r15=0x000aa9d9c1351e40, $r4=0x2caa9d9c1351e402
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0xfffffffff8cd11f5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0xfffffffff896edce, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0xffffffffef4aad58, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0xffffffffa0b9747b, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffff96afcb8f, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0x000000005b57dc92, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffff8fa18d00, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0xffffffff862b1fc5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0xffffffff9a47255b, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0xffffffffa7886ccc, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0xffffffffb6c69449, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffffb04637e9, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0xfffffffffb8f1bb9, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffffc7580939, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0xffffffffb24959b6, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0x00000000532cb693, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0xffffffffffe2757b, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0xffffffffc3c8592d, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffffe44ccdd5, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0x0000000004826ea7, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0x00000000784b67ea, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0xffffffffbaef431f, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0xffffffffe7361109, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0xfffffffff9af6423, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0x000000006d4f1805, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffffb22e077e, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0xffffffff8cb8356f, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffffbe1261f9, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0x000000006c5412e4, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0x000000006c0feedf, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0x0000000052b4533a, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0xffffffffe3833e19, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0x000000002a3f5685, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0xffffffffec8f3c62, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crcc.w.b.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffff9e543d84, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0x00000000318af1d5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0xffffffff92c4f3f9, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0xffffffffa40568c3, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0xffffffffeeee153e, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0x0000000071b26b5b, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0xfffffffffcb406be, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crcc.w.h.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffffade3076c, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0x000000004f6e8750, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0x000000004548949c, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0x0000000050fc77a7, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0x000000000b0f3746, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffff92a3acf2, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0xffffffffd91fb7ba, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crcc.w.w.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0x000000006b548718, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++after:  $r12=0xffffffff8547ffea, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++after:  $r12=0x000000001a265977, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++after:  $r12=0x000000000e1737b7, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++after:  $r12=0x000000007c13f4c5, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++after:  $r12=0xffffffff9d9455e3, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++after:  $r12=0x000000006df1745f, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05
++crcc.w.d.w $r12, $r13, $r14 ::
++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
++after:  $r12=0xffffffffb7862239, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72
+diff --git a/none/tests/loongarch64/integer.vgtest b/none/tests/loongarch64/integer.vgtest
+new file mode 100644
+index 000000000..daa059178
+--- /dev/null
++++ b/none/tests/loongarch64/integer.vgtest
+@@ -0,0 +1,2 @@
++prog: integer
++vgopts: -q
+diff --git a/none/tests/loongarch64/llsc.c b/none/tests/loongarch64/llsc.c
+new file mode 100644
+index 000000000..fcb7e3cb3
+--- /dev/null
++++ b/none/tests/loongarch64/llsc.c
+@@ -0,0 +1,69 @@
++#include <stdio.h>
++
++#define TESTINST_LLSC_W(insn, res, addr, offs) \
++   {                                           \
++      __asm__ __volatile__(                    \
++         "move $t1, %1             \n\t"       \
++         "ll.w $t0, $t1, %2        \n\t"       \
++         insn "                    \n\t"       \
++         "sc.w $t0, $t1, %2        \n\t"       \
++         "move %0, $t0             \n\t"       \
++         : "=r" (res)                          \
++         : "r" (addr), "i" (offs)              \
++         : "$t0", "$t1", "memory");            \
++   }
++
++#define TESTINST_LLSC_D(insn, res, addr, offs) \
++   {                                           \
++      __asm__ __volatile__(                    \
++         "move $t1, %1             \n\t"       \
++         "ll.d $t0, $t1, %2        \n\t"       \
++         insn "                    \n\t"       \
++         "sc.d $t0, $t1, %2        \n\t"       \
++         "move %0, $t0             \n\t"       \
++         : "=r" (res)                          \
++         : "r" (addr), "i" (offs)              \
++         : "$t0", "$t1", "memory");            \
++   }
++
++void test(void)
++{
++   int res_i;
++   long res_l;
++   int val_i[2] = { 6, 10 };
++   long val_l[2] = { 6, 10 };
++
++   /* ---------------- ll.w rd, rj, si14 ---------------- */
++   /* ---------------- sc.w rd, rj, si14 ---------------- */
++   printf("ll.w sc.w ::\n");
++
++   do {
++      TESTINST_LLSC_W("addi.w $t0, $t0, 1", res_i, val_i, 0);
++   } while (res_i != 1);
++   printf("res: %d val: %d\n", res_i, val_i[0]);
++
++   do {
++      TESTINST_LLSC_W("sub.w $t0, $zero, $t0", res_i, val_i, 4);
++   } while (res_i != 1);
++   printf("res: %d val: %d\n", res_i, val_i[1]);
++
++   /* ---------------- ll.d rd, rj, si14 ---------------- */
++   /* ---------------- sc.d rd, rj, si14 ---------------- */
++   printf("ll.d sc.d ::\n");
++
++   do {
++      TESTINST_LLSC_D("addi.d $t0, $t0, 1", res_l, val_l, 0);
++   } while (res_l != 1);
++   printf("res: %ld val: %ld\n", res_l, val_l[0]);
++
++   do {
++      TESTINST_LLSC_D("sub.d $t0, $zero, $t0", res_l, val_l, 8);
++   } while (res_l != 1);
++   printf("res: %ld val: %ld\n", res_l, val_l[1]);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/llsc.stderr.exp b/none/tests/loongarch64/llsc.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/llsc.stdout.exp b/none/tests/loongarch64/llsc.stdout.exp
+new file mode 100644
+index 000000000..dd1925156
+--- /dev/null
++++ b/none/tests/loongarch64/llsc.stdout.exp
+@@ -0,0 +1,6 @@
++ll.w sc.w ::
++res: 1 val: 7
++res: 1 val: -10
++ll.d sc.d ::
++res: 1 val: 7
++res: 1 val: -10
+diff --git a/none/tests/loongarch64/llsc.vgtest b/none/tests/loongarch64/llsc.vgtest
+new file mode 100644
+index 000000000..685c27911
+--- /dev/null
++++ b/none/tests/loongarch64/llsc.vgtest
+@@ -0,0 +1,2 @@
++prog: llsc
++vgopts: -q
+diff --git a/none/tests/loongarch64/memory.c b/none/tests/loongarch64/memory.c
+new file mode 100644
+index 000000000..385efc02c
+--- /dev/null
++++ b/none/tests/loongarch64/memory.c
+@@ -0,0 +1,345 @@
++#include <stdio.h>
++
++#define NUM 24
++
++unsigned long mem[NUM] = {
++   0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc,
++   0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0,
++   0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20,
++   0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300,
++   0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003,
++   0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b,
++   0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e,
++   0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a
++};
++
++#define TESTINST_LOAD_RRI(insn, val, addr, offs) \
++   {                                             \
++      __asm__ __volatile__(                      \
++         insn " %0, %1, " #offs " \n\t"          \
++         : "=r" (val)                            \
++         : "r" (addr)                            \
++         : "memory");                            \
++   }
++
++#define TESTINST_LOAD_RRR(insn, val, addr, offs) \
++   {                                             \
++      __asm__ __volatile__(                      \
++         insn " %0, %1, %2 \n\t"                 \
++         : "=r" (val)                            \
++         : "r" (addr), "r" (offs)                \
++         : "memory");                            \
++   }
++
++#define TESTINST_LOAD_FRI(insn, val, addr, offs) \
++   {                                             \
++      __asm__ __volatile__(                      \
++         insn " %0, %1, " #offs " \n\t"          \
++         : "=f" (val)                            \
++         : "r" (addr)                            \
++         : "memory");                            \
++   }
++
++#define TESTINST_LOAD_FRR(insn, val, addr, offs) \
++   {                                             \
++      __asm__ __volatile__(                      \
++         insn " %0, %1, %2 \n\t"                 \
++         : "=f" (val)                            \
++         : "r" (addr), "r" (offs)                \
++         : "memory");                            \
++   }
++
++#define TESTINST_STORE_RRI(insn, val, addr, offs) \
++   {                                              \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, " #offs " \n\t"           \
++         :                                        \
++         : "r" (val), "r" (addr)                  \
++         : "memory");                             \
++   }
++
++#define TESTINST_STORE_RRR(insn, val, addr, offs) \
++   {                                              \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, %2 \n\t"                  \
++         :                                        \
++         : "r" (val), "r" (addr), "r" (offs)      \
++         : "memory");                             \
++   }
++
++#define TESTINST_STORE_FRI(insn, val, addr, offs) \
++   {                                              \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, " #offs " \n\t"           \
++         :                                        \
++         : "f" (val), "r" (addr)                  \
++         : "memory");                             \
++   }
++
++#define TESTINST_STORE_FRR(insn, val, addr, offs) \
++   {                                              \
++      __asm__ __volatile__(                       \
++         insn " %0, %1, %2 \n\t"                  \
++         :                                        \
++         : "f" (val), "r" (addr), "r" (offs)      \
++         : "memory");                             \
++   }
++
++static inline void show(void)
++{
++   int i;
++   printf("memory block:\n");
++   for (i = 0; i < NUM; i++)
++      printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]);
++}
++
++void test(void)
++{
++   char s8;
++   unsigned char u8;
++   short s16;
++   unsigned short u16;
++   int s32;
++   unsigned int u32;
++   long s64;
++   unsigned long u64;
++
++   show();
++
++   /* ---------------- ld.b rd, rj, si12 ---------------- */
++   printf("test ld.b: ");
++   TESTINST_LOAD_RRI("ld.b", s8, mem, 0);
++   printf("%d ", (int)s8);
++   TESTINST_LOAD_RRI("ld.b", s8, mem, 24);
++   printf("%d\n", (int)s8);
++
++   /* ---------------- ld.bu rd, rj, si12 ---------------- */
++   printf("test ld.bu: ");
++   TESTINST_LOAD_RRI("ld.b", u8, mem, 0);
++   printf("%u ", (unsigned)s8);
++   TESTINST_LOAD_RRI("ld.b", u8, mem, 24);
++   printf("%u\n", (unsigned)s8);
++
++   /* ---------------- ld.h rd, rj, si12 ---------------- */
++   printf("test ld.h: ");
++   TESTINST_LOAD_RRI("ld.h", s16, mem, 0);
++   printf("%hd ", s16);
++   TESTINST_LOAD_RRI("ld.h", s16, mem, 24);
++   printf("%hd\n", s16);
++
++   /* ---------------- ld.hu rd, rj, si12 ---------------- */
++   printf("test ld.hu: ");
++   TESTINST_LOAD_RRI("ld.hu", u16, mem, 0);
++   printf("%hu ", u16);
++   TESTINST_LOAD_RRI("ld.hu", u16, mem, 24);
++   printf("%hu\n", u16);
++
++   /* ---------------- ld.w rd, rj, si12 ---------------- */
++   printf("test ld.w: ");
++   TESTINST_LOAD_RRI("ld.w", s32, mem, 0);
++   printf("%d ", s32);
++   TESTINST_LOAD_RRI("ld.w", s32, mem, 24);
++   printf("%d\n", s32);
++
++   /* ---------------- ld.wu rd, rj, si12 ---------------- */
++   printf("test ld.wu: ");
++   TESTINST_LOAD_RRI("ld.wu", u32, mem, 0);
++   printf("%u ", u32);
++   TESTINST_LOAD_RRI("ld.wu", u32, mem, 24);
++   printf("%u\n", u32);
++
++   /* ---------------- ld.d rd, rj, si12 ---------------- */
++   printf("test ld.d: ");
++   TESTINST_LOAD_RRI("ld.d", s64, mem, 0);
++   printf("%ld ", s64);
++   TESTINST_LOAD_RRI("ld.d", s64, mem, 24);
++   printf("%ld ", s64);
++   TESTINST_LOAD_RRI("ld.d", u64, mem, 0);
++   printf("%lu ", u64);
++   TESTINST_LOAD_RRI("ld.d", u64, mem, 24);
++   printf("%lu\n", u64);
++
++   /* ---------------- ldx.b rd, rj, rk ---------------- */
++   printf("test ldx.b: ");
++   TESTINST_LOAD_RRR("ldx.b", s8, mem, 0);
++   printf("%d ", (int)s8);
++   TESTINST_LOAD_RRR("ldx.b", s8, mem, 24);
++   printf("%d\n", (int)s8);
++
++   /* ---------------- ldx.bu rd, rj, rk ---------------- */
++   printf("test ldx.bu: ");
++   TESTINST_LOAD_RRR("ldx.b", u8, mem, 0);
++   printf("%u ", (unsigned)s8);
++   TESTINST_LOAD_RRR("ldx.b", u8, mem, 24);
++   printf("%u\n", (unsigned)s8);
++
++   /* ---------------- ldx.h rd, rj, rk ---------------- */
++   printf("test ldx.h: ");
++   TESTINST_LOAD_RRR("ldx.h", s16, mem, 0);
++   printf("%hd ", s16);
++   TESTINST_LOAD_RRR("ldx.h", s16, mem, 24);
++   printf("%hd\n", s16);
++
++   /* ---------------- ldx.hu rd, rj, rk ---------------- */
++   printf("test ld.hu: ");
++   TESTINST_LOAD_RRR("ldx.hu", u16, mem, 0);
++   printf("%hu ", u16);
++   TESTINST_LOAD_RRR("ldx.hu", u16, mem, 24);
++   printf("%hu\n", u16);
++
++   /* ---------------- ldx.w rd, rj, rk ---------------- */
++   printf("test ldx.w: ");
++   TESTINST_LOAD_RRR("ldx.w", s32, mem, 0);
++   printf("%d ", s32);
++   TESTINST_LOAD_RRR("ldx.w", s32, mem, 24);
++   printf("%d\n", s32);
++
++   /* ---------------- ldx.wu rd, rj, rk ---------------- */
++   printf("test ldx.wu: ");
++   TESTINST_LOAD_RRR("ldx.wu", u32, mem, 0);
++   printf("%u ", u32);
++   TESTINST_LOAD_RRR("ldx.wu", u32, mem, 24);
++   printf("%u\n", u32);
++
++   /* ---------------- ldx.d rd, rj, rk ---------------- */
++   printf("test ldx.d: ");
++   TESTINST_LOAD_RRR("ldx.d", s64, mem, 0);
++   printf("%ld ", s64);
++   TESTINST_LOAD_RRR("ldx.d", s64, mem, 24);
++   printf("%ld ", s64);
++   TESTINST_LOAD_RRR("ldx.d", u64, mem, 0);
++   printf("%lu ", u64);
++   TESTINST_LOAD_RRR("ldx.d", u64, mem, 24);
++   printf("%lu\n", u64);
++
++   /* ---------------- ldptr.w rd, rj, si14 ---------------- */
++   printf("test ldptr.w: ");
++   TESTINST_LOAD_RRI("ldptr.w", s32, mem, 0);
++   printf("%d ", s32);
++   TESTINST_LOAD_RRI("ldptr.w", s32, mem, 24);
++   printf("%d\n", s32);
++
++   /* ---------------- ldptr.d rd, rj, si14 ---------------- */
++   printf("test ldptr.d: ");
++   TESTINST_LOAD_RRI("ldptr.d", s64, mem, 0);
++   printf("%ld ", s64);
++   TESTINST_LOAD_RRI("ldptr.d", s64, mem, 24);
++   printf("%ld\n", s64);
++
++   /* ---------------- fld.s fd, rj, si12 ---------------- */
++   printf("test fld.s: ");
++   TESTINST_LOAD_FRI("fld.s", u32, mem, 0);
++   printf("%#x ", u32);
++   TESTINST_LOAD_FRI("fld.s", u32, mem, 24);
++   printf("%#x\n", u32);
++
++   /* ---------------- fld.d fd, rj, si12 ---------------- */
++   printf("test fld.d: ");
++   TESTINST_LOAD_FRI("fld.d", u64, mem, 0);
++   printf("%#lx ", u64);
++   TESTINST_LOAD_FRI("fld.d", u64, mem, 24);
++   printf("%#lx\n", u64);
++
++   /* ---------------- fldx.s fd, rj, rk ---------------- */
++   printf("test fldx.s: ");
++   TESTINST_LOAD_FRR("fldx.s", u32, mem, 0);
++   printf("%#x ", u32);
++   TESTINST_LOAD_FRR("fldx.s", u32, mem, 24);
++   printf("%#x\n", u32);
++
++   /* ---------------- fldx.d fd, rj, rk ---------------- */
++   printf("test fldx.d: ");
++   TESTINST_LOAD_FRR("fldx.d", u64, mem, 0);
++   printf("%#lx ", u64);
++   TESTINST_LOAD_FRR("fldx.d", u64, mem, 24);
++   printf("%#lx\n", u64);
++
++   show();
++
++   u8 = 0xfe;
++   s8 = (char)u8;
++   u16 = 0xfedc;
++   s16 = (short)u16;
++   u32 = 0xfedcba98;
++   s32 = (int)u32;
++   u64 = 0xfedcba9876543210;
++   s64 = (long)u64;
++
++   /* ---------------- st.b rd, rj, si12 ---------------- */
++   printf("test st.b\n");
++   TESTINST_STORE_RRI("st.b", s8, mem, 0);
++   TESTINST_STORE_RRI("st.b", u8, mem, 1);
++
++   /* ---------------- st.h rd, rj, si12 ---------------- */
++   printf("test st.h\n");
++   TESTINST_STORE_RRI("st.h", s16, mem, 2);
++   TESTINST_STORE_RRI("st.h", u16, mem, 4);
++
++   /* ---------------- st.w rd, rj, si12 ---------------- */
++   printf("test st.w\n");
++   TESTINST_STORE_RRI("st.w", s32, mem, 8);
++   TESTINST_STORE_RRI("st.w", u32, mem, 12);
++
++   /* ---------------- st.d rd, rj, si12 ---------------- */
++   printf("test st.d\n");
++   TESTINST_STORE_RRI("st.d", s64, mem, 16);
++   TESTINST_STORE_RRI("st.d", u64, mem, 24);
++
++   /* ---------------- stx.b rd, rj, rk ---------------- */
++   printf("test stx.b\n");
++   TESTINST_STORE_RRR("stx.b", s8, mem, 32);
++   TESTINST_STORE_RRR("stx.b", u8, mem, 33);
++
++   /* ---------------- stx.h rd, rj, rk ---------------- */
++   printf("test stx.h\n");
++   TESTINST_STORE_RRR("stx.h", s16, mem, 34);
++   TESTINST_STORE_RRR("stx.h", u16, mem, 36);
++
++   /* ---------------- stx.w rd, rj, rk ---------------- */
++   printf("test stx.w\n");
++   TESTINST_STORE_RRR("stx.w", s32, mem, 40);
++   TESTINST_STORE_RRR("stx.w", u32, mem, 44);
++
++   /* ---------------- stx.d rd, rj, rk ---------------- */
++   printf("test stx.d\n");
++   TESTINST_STORE_RRR("stx.d", s64, mem, 48);
++   TESTINST_STORE_RRR("stx.d", u64, mem, 56);
++
++   /* ---------------- stptr.w rd, rj, si14 ---------------- */
++   printf("test stptr.w\n");
++   TESTINST_STORE_RRI("stptr.w", s64, mem, 64);
++   TESTINST_STORE_RRI("stptr.w", u64, mem, 68);
++
++   /* ---------------- stptr.d rd, rj, si14 ---------------- */
++   printf("test stptr.d\n");
++   TESTINST_STORE_RRI("stptr.d", s64, mem, 72);
++   TESTINST_STORE_RRI("stptr.d", u64, mem, 80);
++
++   /* ---------------- fst.s rd, rj, si12 ---------------- */
++   printf("test fst.w\n");
++   TESTINST_STORE_FRI("fst.s", u32, mem, 84);
++   TESTINST_STORE_FRI("fst.s", u32, mem, 88);
++
++   /* ---------------- fst.d rd, rj, si12 ---------------- */
++   printf("test fst.d\n");
++   TESTINST_STORE_FRI("fst.d", u64, mem, 96);
++   TESTINST_STORE_FRI("fst.d", u64, mem, 104);
++
++   /* ---------------- fstx.s rd, rj, rk ---------------- */
++   printf("test fstx.w\n");
++   TESTINST_STORE_FRR("fstx.s", u32, mem, 108);
++   TESTINST_STORE_FRR("fstx.s", u32, mem, 112);
++
++   /* ---------------- fstx.d rd, rj, rk ---------------- */
++   printf("test fstx.d\n");
++   TESTINST_STORE_FRR("fstx.d", u64, mem, 120);
++   TESTINST_STORE_FRR("fstx.d", u64, mem, 128);
++
++   show();
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/memory.stderr.exp b/none/tests/loongarch64/memory.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/memory.stdout.exp b/none/tests/loongarch64/memory.stdout.exp
+new file mode 100644
+index 000000000..928961df0
+--- /dev/null
++++ b/none/tests/loongarch64/memory.stdout.exp
+@@ -0,0 +1,109 @@
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test ld.b: -128 -48
++test ld.bu: 4294967248 4294967248
++test ld.h: -6528 -20272
++test ld.hu: 59008 45264
++test ld.w: 59008 45264
++test ld.wu: 59008 45264
++test ld.d: 1305795535453611648 -4294922032 1305795535453611648 18446744069414629584
++test ldx.b: -128 -48
++test ldx.bu: 4294967248 4294967248
++test ldx.h: -6528 -20272
++test ld.hu: 59008 45264
++test ldx.w: 59008 45264
++test ldx.wu: 59008 45264
++test ldx.d: 1305795535453611648 -4294922032 1305795535453611648 18446744069414629584
++test ldptr.w: 59008 45264
++test ldptr.d: 1305795535453611648 -4294922032
++test fld.s: 0xe680 0xb0d0
++test fld.d: 0x121f1e1f0000e680 0xffffffff0000b0d0
++test fldx.s: 0xe680 0xb0d0
++test fldx.d: 0x121f1e1f0000e680 0xffffffff0000b0d0
++memory block:
++0x0:	0x121f1e1f0000e680
++0x8:	0x0000000000010700
++0x10:	0x000000030000e7dc
++0x18:	0xffffffff0000b0d0
++0x20:	0x232f2e2f2ab05fd0
++0x28:	0x242c2b2b0000b6a0
++0x30:	0x252a2e2b0000be80
++0x38:	0x262d2d2a0000de10
++0x40:	0x3f343f3e0000df20
++0x48:	0x3e353d3c2ab05fe0
++0x50:	0x363a3c3b0000dfd0
++0x58:	0x3b373b3a00010300
++0x60:	0x0000e680121f1e1f
++0x68:	0x0001070000000000
++0x70:	0x0000e7dc00000003
++0x78:	0x0000b0d0ffffffff
++0x80:	0x2ab05fd0232f2e2f
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
++test st.b
++test st.h
++test st.w
++test st.d
++test stx.b
++test stx.h
++test stx.w
++test stx.d
++test stptr.w
++test stptr.d
++test fst.w
++test fst.d
++test fstx.w
++test fstx.d
++memory block:
++0x0:	0x121ffedcfedcfefe
++0x8:	0xfedcba98fedcba98
++0x10:	0xfedcba9876543210
++0x18:	0xfedcba9876543210
++0x20:	0x232ffedcfedcfefe
++0x28:	0xfedcba98fedcba98
++0x30:	0xfedcba9876543210
++0x38:	0xfedcba9876543210
++0x40:	0x7654321076543210
++0x48:	0xfedcba9876543210
++0x50:	0xfedcba9876543210
++0x58:	0x3b373b3afedcba98
++0x60:	0xfedcba9876543210
++0x68:	0xfedcba9876543210
++0x70:	0x0000e7dcfedcba98
++0x78:	0xfedcba9876543210
++0x80:	0xfedcba9876543210
++0x88:	0x0000b6a0242c2b2b
++0x90:	0x0000be80252a2e2b
++0x98:	0x0000de10262d2d2a
++0xa0:	0x0000df203f343f3e
++0xa8:	0x2ab05fe03e353d3c
++0xb0:	0x0000dfd0363a3c3b
++0xb8:	0x000103003b373b3a
+diff --git a/none/tests/loongarch64/memory.vgtest b/none/tests/loongarch64/memory.vgtest
+new file mode 100644
+index 000000000..be6895e8f
+--- /dev/null
++++ b/none/tests/loongarch64/memory.vgtest
+@@ -0,0 +1,2 @@
++prog: memory
++vgopts: -q
+diff --git a/none/tests/loongarch64/move.c b/none/tests/loongarch64/move.c
+new file mode 100644
+index 000000000..3b7f46dd9
+--- /dev/null
++++ b/none/tests/loongarch64/move.c
+@@ -0,0 +1,112 @@
++#include <stdio.h>
++
++#define TESTINST_MOV(v1, v2, v3, v4, v5, v6, val)  \
++   {                                               \
++      unsigned long res1 = (unsigned long)v1;      \
++      unsigned long res2 = (unsigned long)v2;      \
++      unsigned long res3 = (unsigned long)v3;      \
++      unsigned long res4 = (unsigned long)v4;      \
++      unsigned long res5 = (unsigned long)v5;      \
++      unsigned long res6 = (unsigned long)v6;      \
++      __asm__ __volatile__(                        \
++         "movgr2fr.w %0, %6  \n\t"                 \
++         "movgr2fr.d %1, %6  \n\t"                 \
++         "movgr2frh.w %2, %6 \n\t"                 \
++         "movfr2gr.s %3, %7  \n\t"                 \
++         "movfrh2gr.s %4, %7  \n\t"                \
++         "movfr2gr.d %5, %7  \n\t"                 \
++         : "+f" (res1), "+f" (res2), "+f" (res3),  \
++           "+r" (res4), "+r" (res5), "+r" (res6)   \
++         : "r" (val), "f" (val)                    \
++         : "memory");                              \
++      printf("movgr2fr.w ::\n");                   \
++      printf("input: %#018lx %#018lx\n", v1, val); \
++      printf("output: %#018lx\n", res1);           \
++      printf("movgr2fr.d ::\n");                   \
++      printf("input: %#018lx %#018lx\n", v2, val); \
++      printf("output: %#018lx\n", res2);           \
++      printf("movgr2frh.w ::\n");                  \
++      printf("input: %#018lx %#018lx\n", v3, val); \
++      printf("output: %#018lx\n", res3);           \
++      printf("movfr2gr.s ::\n");                   \
++      printf("input: %#018lx %#018lx\n", v4, val); \
++      printf("output: %#018lx\n", res4);           \
++      printf("movfrh2gr.s ::\n");                  \
++      printf("input: %#018lx %#018lx\n", v5, val); \
++      printf("output: %#018lx\n", res5);           \
++      printf("movfr2gr.d ::\n");                   \
++      printf("input: %#018lx %#018lx\n", v6, val); \
++      printf("output: %#018lx\n", res6);           \
++   }
++
++#define TESTINST_FSCR(fcsr, val)            \
++   {                                        \
++      unsigned long res;                    \
++      __asm__ __volatile__(                 \
++         "movgr2fcsr " fcsr ", %1 \n\t"     \
++         "movfcsr2gr %0, " fcsr " \n\t"     \
++         : "=r" (res)                       \
++         : "r" (val)                        \
++         : "memory");                       \
++      printf("movgr2fcsr movfcsr2gr ::\n"); \
++      printf("input: %#018lx\n", val);      \
++      printf("output: %#018lx\n", res);     \
++   }
++
++#define TESTINST_CF(fcc, v1, v2, val)              \
++   {                                               \
++      unsigned long res1 = (unsigned long)v1;      \
++      unsigned long res2 = (unsigned long)v2;      \
++      __asm__ __volatile__(                        \
++         "movfr2cf " fcc ", %2 \n\t"               \
++         "movcf2fr %0, " fcc " \n\t"               \
++         "movgr2cf " fcc ", %3 \n\t"               \
++         "movcf2gr %1, " fcc " \n\t"               \
++         : "+f" (res1), "+r" (res2)                \
++         : "f" (val), "r" (val)                    \
++         : "memory");                              \
++      printf("movfr2cf movcf2fr ::\n");            \
++      printf("input: %#018lx %#018lx\n", v1, val); \
++      printf("output: %lx\n", res1);               \
++      printf("movgr2cf movcf2gr ::\n");            \
++      printf("input: %#018lx %#018lx\n", v2, val); \
++      printf("output: %lx\n", res2);               \
++   }
++
++void test(void)
++{
++   TESTINST_MOV(0x1234123412341234UL, 0x5678567856785678UL, 0x9abc9abc9abc9abcUL, 0xdef0def0def0def0UL, 0x2468246824682468UL, 0x3579357935793579UL, 0x0123456789abcdefUL);
++   TESTINST_MOV(0x1234123412341234UL, 0x5678567856785678UL, 0x9abc9abc9abc9abcUL, 0xdef0def0def0def0UL, 0x2468246824682468UL, 0x3579357935793579UL, 0xfedcba9876543210UL);
++
++   TESTINST_FSCR("$r0", 0x0123456789abcdefUL);
++   TESTINST_FSCR("$r0", 0xfedcba9876543210UL);
++   TESTINST_FSCR("$r1", 0x0123456789abcdefUL);
++   TESTINST_FSCR("$r1", 0xfedcba9876543210UL);
++   TESTINST_FSCR("$r2", 0x0123456789abcdefUL);
++   TESTINST_FSCR("$r2", 0xfedcba9876543210UL);
++   TESTINST_FSCR("$r3", 0x0123456789abcdefUL);
++   TESTINST_FSCR("$r3", 0xfedcba9876543210UL);
++
++   TESTINST_CF("$fcc0", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc0", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc1", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc1", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc2", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc2", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc3", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc3", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc4", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc4", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc5", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc5", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc6", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc6", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++   TESTINST_CF("$fcc7", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL);
++   TESTINST_CF("$fcc7", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/move.stderr.exp b/none/tests/loongarch64/move.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/move.stdout.exp b/none/tests/loongarch64/move.stdout.exp
+new file mode 100644
+index 000000000..12baf3888
+--- /dev/null
++++ b/none/tests/loongarch64/move.stdout.exp
+@@ -0,0 +1,156 @@
++movgr2fr.w ::
++input: 0x1234123412341234 0x0123456789abcdef
++output: 0x0123456789abcdef
++movgr2fr.d ::
++input: 0x5678567856785678 0x0123456789abcdef
++output: 0x0123456789abcdef
++movgr2frh.w ::
++input: 0x9abc9abc9abc9abc 0x0123456789abcdef
++output: 0x89abcdef9abc9abc
++movfr2gr.s ::
++input: 0xdef0def0def0def0 0x0123456789abcdef
++output: 0xffffffff89abcdef
++movfrh2gr.s ::
++input: 0x2468246824682468 0x0123456789abcdef
++output: 0x0000000001234567
++movfr2gr.d ::
++input: 0x3579357935793579 0x0123456789abcdef
++output: 0x0123456789abcdef
++movgr2fr.w ::
++input: 0x1234123412341234 0xfedcba9876543210
++output: 0xfedcba9876543210
++movgr2fr.d ::
++input: 0x5678567856785678 0xfedcba9876543210
++output: 0xfedcba9876543210
++movgr2frh.w ::
++input: 0x9abc9abc9abc9abc 0xfedcba9876543210
++output: 0x765432109abc9abc
++movfr2gr.s ::
++input: 0xdef0def0def0def0 0xfedcba9876543210
++output: 0x0000000076543210
++movfrh2gr.s ::
++input: 0x2468246824682468 0xfedcba9876543210
++output: 0xfffffffffedcba98
++movfr2gr.d ::
++input: 0x3579357935793579 0xfedcba9876543210
++output: 0xfedcba9876543210
++movgr2fcsr movfcsr2gr ::
++input: 0x0123456789abcdef
++output: 0x00000000090b01cf
++movgr2fcsr movfcsr2gr ::
++input: 0xfedcba9876543210
++output: 0x0000000016140210
++movgr2fcsr movfcsr2gr ::
++input: 0x0123456789abcdef
++output: 0x000000000000008f
++movgr2fcsr movfcsr2gr ::
++input: 0xfedcba9876543210
++output: 0x0000000000000010
++movgr2fcsr movfcsr2gr ::
++input: 0x0123456789abcdef
++output: 0x00000000090b0000
++movgr2fcsr movfcsr2gr ::
++input: 0xfedcba9876543210
++output: 0x0000000016140000
++movgr2fcsr movfcsr2gr ::
++input: 0x0123456789abcdef
++output: 0x0000000000000100
++movgr2fcsr movfcsr2gr ::
++input: 0xfedcba9876543210
++output: 0x0000000000000200
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xffffffffffffffff
++output: 1
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xffffffffffffffff
++output: 1
++movfr2cf movcf2fr ::
++input: 0x1234123412341234 0xdef0def0def0def0
++output: 0
++movgr2cf movcf2gr ::
++input: 0x5678567856785678 0xdef0def0def0def0
++output: 0
+diff --git a/none/tests/loongarch64/move.vgtest b/none/tests/loongarch64/move.vgtest
+new file mode 100644
+index 000000000..358d44b7a
+--- /dev/null
++++ b/none/tests/loongarch64/move.vgtest
+@@ -0,0 +1,3 @@
++prereq: ../../../tests/loongarch64_features fpu
++prog: move
++vgopts: -q
+diff --git a/none/tests/loongarch64/pc.c b/none/tests/loongarch64/pc.c
+new file mode 100644
+index 000000000..a4938463a
+--- /dev/null
++++ b/none/tests/loongarch64/pc.c
+@@ -0,0 +1,66 @@
++#include <stdio.h>
++#include <stdbool.h>
++
++#define TESTINST_RI(insn, imm, offs, clear)          \
++   {                                                 \
++      unsigned long res, exp;                        \
++      __asm__ __volatile__(                          \
++         "  la.local $t0, 1f   \n\t"                 \
++         "  jirl %0, $t0, 0    \n\t"                 \
++         "1:                   \n\t"                 \
++            insn " %1," #imm " \n\t"                 \
++         : "=r" (exp), "=r" (res)                    \
++         :                                           \
++         : "$t0", "memory");                         \
++      printf("test %s\n", insn);                     \
++      exp += (long)imm << 40 >> (40 - offs);         \
++      if (clear)                                     \
++         exp &= 0xfffffffffffff000UL;                \
++      if (res != exp)                                \
++         printf("res: %#lx, exp: %#lx\n", res, exp); \
++   }
++
++void test(void)
++{
++   /* ---------------- pcaddi rd, si20 ---------------- */
++   TESTINST_RI("pcaddi", 0, 2, false);
++   TESTINST_RI("pcaddi", 1, 2, false);
++   TESTINST_RI("pcaddi", 100, 2, false);
++   TESTINST_RI("pcaddi", 12345, 2, false);
++   TESTINST_RI("pcaddi", -12345, 2, false);
++   TESTINST_RI("pcaddi", 524287, 2, false);
++   TESTINST_RI("pcaddi", -524288, 2, false);
++
++   /* ---------------- pcaddu12i rd, si20 ---------------- */
++   TESTINST_RI("pcaddu12i", 0, 12, false);
++   TESTINST_RI("pcaddu12i", 1, 12, false);
++   TESTINST_RI("pcaddu12i", 100, 12, false);
++   TESTINST_RI("pcaddu12i", 12345, 12, false);
++   TESTINST_RI("pcaddu12i", -12345, 12, false);
++   TESTINST_RI("pcaddu12i", 524287, 12, false);
++   TESTINST_RI("pcaddu12i", -524288, 12, false);
++
++   /* ---------------- pcaddu18i rd, si20 ---------------- */
++   TESTINST_RI("pcaddu18i", 0, 18, false);
++   TESTINST_RI("pcaddu18i", 1, 18, false);
++   TESTINST_RI("pcaddu18i", 100, 18, false);
++   TESTINST_RI("pcaddu18i", 12345, 18, false);
++   TESTINST_RI("pcaddu18i", -12345, 18, false);
++   TESTINST_RI("pcaddu18i", 524287, 18, false);
++   TESTINST_RI("pcaddu18i", -524288, 18, false);
++
++   /* ---------------- pcalau12i rd, si20 ---------------- */
++   TESTINST_RI("pcalau12i", 0, 12, true);
++   TESTINST_RI("pcalau12i", 1, 12, true);
++   TESTINST_RI("pcalau12i", 100, 12, true);
++   TESTINST_RI("pcalau12i", 12345, 12, true);
++   TESTINST_RI("pcalau12i", -12345, 12, true);
++   TESTINST_RI("pcalau12i", 524287, 12, true);
++   TESTINST_RI("pcalau12i", -524288, 12, true);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/pc.stderr.exp b/none/tests/loongarch64/pc.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/pc.stdout.exp b/none/tests/loongarch64/pc.stdout.exp
+new file mode 100644
+index 000000000..422ea404b
+--- /dev/null
++++ b/none/tests/loongarch64/pc.stdout.exp
+@@ -0,0 +1,28 @@
++test pcaddi
++test pcaddi
++test pcaddi
++test pcaddi
++test pcaddi
++test pcaddi
++test pcaddi
++test pcaddu12i
++test pcaddu12i
++test pcaddu12i
++test pcaddu12i
++test pcaddu12i
++test pcaddu12i
++test pcaddu12i
++test pcaddu18i
++test pcaddu18i
++test pcaddu18i
++test pcaddu18i
++test pcaddu18i
++test pcaddu18i
++test pcaddu18i
++test pcalau12i
++test pcalau12i
++test pcalau12i
++test pcalau12i
++test pcalau12i
++test pcalau12i
++test pcalau12i
+diff --git a/none/tests/loongarch64/pc.vgtest b/none/tests/loongarch64/pc.vgtest
+new file mode 100644
+index 000000000..468226df2
+--- /dev/null
++++ b/none/tests/loongarch64/pc.vgtest
+@@ -0,0 +1,2 @@
++prog: pc
++vgopts: -q
+diff --git a/none/tests/loongarch64/special.c b/none/tests/loongarch64/special.c
+new file mode 100644
+index 000000000..e1e8c9430
+--- /dev/null
++++ b/none/tests/loongarch64/special.c
+@@ -0,0 +1,112 @@
++#include <stdio.h>
++
++#define TESTINST_HRI(insn, hint, addr, offs)   \
++   {                                           \
++      __asm__ __volatile__(                    \
++         insn " " #hint ", %0, " #offs " \n\t" \
++         :                                     \
++         : "r" (addr)                          \
++         : "memory"                            \
++      );                                       \
++      printf("test %s\n", insn);               \
++   }
++
++#define TESTINST_HRR(insn, hint, addr, offs) \
++   {                                         \
++      __asm__ __volatile__(                  \
++         insn " %0, %1, \n\t"                \
++         :                                   \
++         : "r" (addr), "r" (offs)            \
++         : "memory"                          \
++      );                                     \
++      printf("test %s\n", insn);             \
++   }
++
++#define TESTINST_CODE(insn, code) \
++   {                              \
++      __asm__ __volatile__(       \
++         insn " " #code " \n\t"   \
++         :                        \
++         :                        \
++         : "memory"               \
++      );                          \
++      printf("test %s\n", insn);  \
++   }
++
++#define TESTINST_RR(insn, id)    \
++   {                             \
++      unsigned long res = 0;     \
++      __asm__ __volatile__(      \
++         insn " %0, %1 \n\t"     \
++         : "+r" (res)            \
++         : "r" (id)              \
++         : "memory"              \
++      );                         \
++      printf("test %s\n", insn); \
++      printf("res: %ld\n", res); \
++   }
++
++unsigned long mem[8];
++
++void test(void)
++{
++   /* ---------------- preld hint, rj, si12 ---------------- */
++   TESTINST_HRI("preld", 0, mem, 0);
++   TESTINST_HRI("preld", 1, mem, 1);
++   TESTINST_HRI("preld", 2, mem, 2);
++   TESTINST_HRI("preld", 3, mem, 3);
++   TESTINST_HRI("preld", 4, mem, 4);
++   TESTINST_HRI("preld", 5, mem, 5);
++   TESTINST_HRI("preld", 6, mem, 6);
++   TESTINST_HRI("preld", 7, mem, 7);
++   TESTINST_HRI("preld", 8, mem, 8);
++   TESTINST_HRI("preld", 9, mem, 9);
++
++   /* ---------------- preldx hint, rj, rk ---------------- */
++   TESTINST_HRI("preld", 31, mem, 10);
++   TESTINST_HRI("preld", 30, mem, 12);
++   TESTINST_HRI("preld", 29, mem, 14);
++   TESTINST_HRI("preld", 28, mem, 16);
++   TESTINST_HRI("preld", 27, mem, 18);
++   TESTINST_HRI("preld", 26, mem, 20);
++   TESTINST_HRI("preld", 25, mem, 22);
++   TESTINST_HRI("preld", 24, mem, 24);
++   TESTINST_HRI("preld", 23, mem, 26);
++   TESTINST_HRI("preld", 22, mem, 28);
++
++   /* ---------------- dbar code ---------------- */
++   TESTINST_CODE("dbar", 0);
++   TESTINST_CODE("dbar", 2);
++   TESTINST_CODE("dbar", 4);
++   TESTINST_CODE("dbar", 6);
++   TESTINST_CODE("dbar", 8);
++
++   /* ---------------- ibar code ---------------- */
++   TESTINST_CODE("ibar", 9);
++   TESTINST_CODE("ibar", 7);
++   TESTINST_CODE("ibar", 5);
++   TESTINST_CODE("ibar", 3);
++   TESTINST_CODE("ibar", 1);
++
++   /* ---------------- rdtimel.w rd, rj ---------------- */
++   TESTINST_RR("rdtimel.w", 0);
++   TESTINST_RR("rdtimel.w", 1);
++   TESTINST_RR("rdtimel.w", 2);
++
++   /* ---------------- rdtimeh.w rd, rj ---------------- */
++   TESTINST_RR("rdtimeh.w", 0);
++   TESTINST_RR("rdtimeh.w", 1);
++   TESTINST_RR("rdtimeh.w", 2);
++
++
++   /* ---------------- rdtime.d rd, rj ---------------- */
++   TESTINST_RR("rdtime.d", 0);
++   TESTINST_RR("rdtime.d", 1);
++   TESTINST_RR("rdtime.d", 2);
++}
++
++int main(void)
++{
++   test();
++   return 0;
++}
+diff --git a/none/tests/loongarch64/special.stderr.exp b/none/tests/loongarch64/special.stderr.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/none/tests/loongarch64/special.stdout.exp b/none/tests/loongarch64/special.stdout.exp
+new file mode 100644
+index 000000000..7bd523e10
+--- /dev/null
++++ b/none/tests/loongarch64/special.stdout.exp
+@@ -0,0 +1,48 @@
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test preld
++test dbar
++test dbar
++test dbar
++test dbar
++test dbar
++test ibar
++test ibar
++test ibar
++test ibar
++test ibar
++test rdtimel.w
++res: 0
++test rdtimel.w
++res: 0
++test rdtimel.w
++res: 0
++test rdtimeh.w
++res: 0
++test rdtimeh.w
++res: 0
++test rdtimeh.w
++res: 0
++test rdtime.d
++res: 0
++test rdtime.d
++res: 0
++test rdtime.d
++res: 0
+diff --git a/none/tests/loongarch64/special.vgtest b/none/tests/loongarch64/special.vgtest
+new file mode 100644
+index 000000000..b2f2ae952
+--- /dev/null
++++ b/none/tests/loongarch64/special.vgtest
+@@ -0,0 +1,2 @@
++prog: special
++vgopts: -q
+diff --git a/tests/Makefile.am b/tests/Makefile.am
+index 916e5085d..e21f68bf5 100644
+--- a/tests/Makefile.am
++++ b/tests/Makefile.am
+@@ -52,7 +52,8 @@ check_PROGRAMS = \
+ 	power_insn_available \
+ 	is_ppc64_BE \
+ 	min_power_isa \
+-	arm64_features
++	arm64_features \
++	loongarch64_features
+ 
+ 
+ AM_CFLAGS   += $(AM_FLAG_M3264_PRI)
+diff --git a/tests/arch_test.c b/tests/arch_test.c
+index 37cc1bc76..97b6bc7c8 100644
+--- a/tests/arch_test.c
++++ b/tests/arch_test.c
+@@ -34,6 +34,7 @@ char* all_archs[] = {
+    "mips32",
+    "mips64",
+    "nanomips",
++   "loongarch64",
+    NULL
+ };
+ 
+@@ -79,6 +80,10 @@ static Bool go(char* arch)
+ 
+ #elif defined(VGP_nanomips_linux)
+    if ( 0 == strcmp( arch, "nanomips" ) ) return True;
++
++#elif defined(VGP_loongarch64_linux)
++   if ( 0 == strcmp( arch, "loongarch64" ) ) return True;
++
+ #else
+ #  error Unknown platform
+ #endif   // VGP_*
+diff --git a/tests/loongarch64_features.c b/tests/loongarch64_features.c
+new file mode 100644
+index 000000000..45ba2d1c0
+--- /dev/null
++++ b/tests/loongarch64_features.c
+@@ -0,0 +1,81 @@
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++
++// This file determines loongarch64 features a processor supports.
++// For now, we only support loongarch64-linux.
++//
++// We return:
++// - 0 if the machine has the asked-for feature.
++// - 1 if the machine doesn't have the asked-for feature.
++// - 2 if the asked-for feature isn't recognised (this will always be the case
++//     for any feature if run on a non-loongarch64 machine).
++// - 3 if there was a usage error (it also prints an error message).
++#define FEATURE_PRESENT       0
++#define FEATURE_NOT_PRESENT   1
++#define UNRECOGNISED_FEATURE  2
++#define USAGE_ERROR           3
++
++#if defined(VGA_loongarch64)
++
++static int go(const char* feature_name)
++{
++   int i, len, found;
++   FILE* fp;
++   char buf[256];
++   const char* features[] = {
++      "cpucfg", "lam", "ual", "fpu",
++      "lsx", "lasx", "complex", "crypto",
++      "lvz", "lbt_x86", "lbt_arm", "lbt_mips"
++   };
++
++   found = 0;
++   len = sizeof(features) / sizeof(features[0]);
++   for (i = 0; i < len; i++) {
++      if (strcmp(feature_name, features[i]) == 0) {
++         found = 1;
++         break;
++      }
++   }
++
++   if (!found)
++      return UNRECOGNISED_FEATURE;
++
++   fp = fopen("/proc/cpuinfo", "r");
++   if(fp == NULL)
++      return UNRECOGNISED_FEATURE;
++
++   while (fgets(buf, sizeof(buf), fp) != NULL) {
++      if (strstr(buf, feature_name) != NULL) {
++         fclose(fp);
++         return FEATURE_PRESENT;
++      }
++   }
++
++   fclose(fp);
++   return FEATURE_NOT_PRESENT;
++}
++
++#else
++
++static int go(const char* feature_name)
++{
++   // Feature not recognised (non-loongarch64 machine!)
++   return UNRECOGNISED_FEATURE;
++}
++
++#endif // defined(VGA_loongarch64)
++
++
++//---------------------------------------------------------------------------
++// main
++//---------------------------------------------------------------------------
++int main(int argc, char **argv)
++{
++   if (argc != 2) {
++      fprintf(stderr, "usage: loongarch64_features <feature>\n");
++      exit(USAGE_ERROR);
++   }
++
++   return go(argv[1]);
++}
+diff --git a/tests/platform_test b/tests/platform_test
+index c23a4f645..a1eaf8f1c 100644
+--- a/tests/platform_test
++++ b/tests/platform_test
+@@ -14,6 +14,7 @@ all_platforms=
+ all_platforms="$all_platforms x86-linux amd64-linux ppc32-linux ppc64-linux"
+ all_platforms="$all_platforms arm-linux arm64-linux"
+ all_platforms="$all_platforms s390x-linux mips32-linux mips64-linux"
++all_platforms="$all_platforms loongarch64-linux"
+ all_platforms="$all_platforms x86-darwin amd64-darwin"
+ all_platforms="$all_platforms x86-solaris amd64-solaris"
+ all_platforms="$all_platforms x86-freebsd amd64-freebsd"
diff --git a/vaultwarden/PKGBUILD b/vaultwarden/PKGBUILD
index 037bc72ce2..aa52fe163a 100644
--- a/vaultwarden/PKGBUILD
+++ b/vaultwarden/PKGBUILD
@@ -54,7 +54,7 @@ prepare() {
   /^# ROCKET_TLS/a ROCKET_LIMITS={json=10485760}" .env.template
 
   # download dependencies
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/virt-manager/600.patch b/virt-manager/600.patch
new file mode 100644
index 0000000000..17a3e4b0cb
--- /dev/null
+++ b/virt-manager/600.patch
@@ -0,0 +1,186 @@
+From 564b110d66843a36604bacd4107ef773ac0e8933 Mon Sep 17 00:00:00 2001
+From: li weigang <weigangli99@gmail.com>
+Date: Mon, 30 Oct 2023 14:47:47 +0800
+Subject: [PATCH] add loongarch support
+
+---
+ virtManager/createvm.py     |  5 ++++-
+ virtinst/devices/disk.py    |  2 ++
+ virtinst/devices/video.py   |  2 ++
+ virtinst/domain/cpu.py      |  5 +++++
+ virtinst/domain/os.py       |  3 +++
+ virtinst/domcapabilities.py |  6 +++++-
+ virtinst/guest.py           | 16 ++++++++++++----
+ 7 files changed, 33 insertions(+), 6 deletions(-)
+
+diff --git a/virtManager/createvm.py b/virtManager/createvm.py
+index 95aff71b2..df6dbb789 100644
+--- a/virtManager/createvm.py
++++ b/virtManager/createvm.py
+@@ -476,7 +476,8 @@ def _set_caps_state(self):
+ 
+         installable_arch = bool(guest.os.is_x86() or
+                 guest.os.is_ppc64() or
+-                guest.os.is_s390x())
++                guest.os.is_s390x() or
++                guest.os.is_loongarch())
+ 
+         default_efi = (
+             self.config.get_default_firmware_setting() == "uefi" and
+@@ -857,6 +858,8 @@ def _populate_machine(self):
+         machines.sort()
+ 
+         defmachine = None
++        if self._capsinfo.arch in ["loongarch64"]:
++            defmachine = "loongson7a"
+         prios = []
+         recommended_machine = virtinst.Guest.get_recommended_machine(
+                 self._capsinfo)
+diff --git a/virtinst/devices/disk.py b/virtinst/devices/disk.py
+index 9609ebacf..1e2c56f8a 100644
+--- a/virtinst/devices/disk.py
++++ b/virtinst/devices/disk.py
+@@ -982,6 +982,8 @@ def _default_bus(self, guest):
+         if self.conn.is_bhyve():
+             # IDE bus is not supported by bhyve
+             return "sata"
++        if self.is_cdrom() and guest.os.is_loongarch():
++            return "scsi"
+         return "ide"
+ 
+     def set_defaults(self, guest):
+diff --git a/virtinst/devices/video.py b/virtinst/devices/video.py
+index 70067a72c..d10fd7aa4 100644
+--- a/virtinst/devices/video.py
++++ b/virtinst/devices/video.py
+@@ -27,6 +27,8 @@ class DeviceVideo(Device):
+ 
+     @staticmethod
+     def default_model(guest):
++        if guest.os.is_loongarch():
++            return "virtio"
+         if not guest.os.is_hvm():
+             return None
+         if guest.os.is_pseries():
+diff --git a/virtinst/domain/cpu.py b/virtinst/domain/cpu.py
+index c635932ed..2c66b9dfc 100644
+--- a/virtinst/domain/cpu.py
++++ b/virtinst/domain/cpu.py
+@@ -462,5 +462,10 @@ def set_defaults(self, guest):
+             # -M virt defaults to a 32bit CPU, even if using aarch64
+             self.set_model(guest, "cortex-a57")
+ 
++        elif guest.os.is_loongarch() and guest.type == "kvm":
++            if guest.os.arch != self.conn.caps.host.cpu.arch:
++                return
++            self.set_special_mode(guest, guest.loongarch_cpu_default)
++
+         elif guest.os.is_x86() and guest.type == "kvm":
+             self._set_cpu_x86_kvm_default(guest)
+diff --git a/virtinst/domain/os.py b/virtinst/domain/os.py
+index 4310e6238..ae2cd97b1 100644
+--- a/virtinst/domain/os.py
++++ b/virtinst/domain/os.py
+@@ -78,6 +78,9 @@ def is_riscv(self):
+     def is_riscv_virt(self):
+         return self.is_riscv() and str(self.machine).startswith("virt")
+ 
++    def is_loongarch(self):
++        return self.arch == "loongarch64"
++
+     ##################
+     # XML properties #
+     ##################
+diff --git a/virtinst/domcapabilities.py b/virtinst/domcapabilities.py
+index db08bf65f..8694cbd3a 100644
+--- a/virtinst/domcapabilities.py
++++ b/virtinst/domcapabilities.py
+@@ -291,6 +291,10 @@ def build_from_guest(guest):
+             r".*arm/QEMU_EFI.*",  # fedora, gerd's firmware repo
+             r".*edk2-arm-code\.fd"  # upstream qemu
+         ],
++        "loongarch64": [
++            ".*loongarch_bios.bin",  # loongarch
++            ".*loongarch_bios.bin",  # gerd's firmware repo
++        ],
+     }
+ 
+     def find_uefi_path_for_arch(self):
+@@ -446,7 +450,7 @@ def supports_graphics_spice(self):
+             # support. Use our pre-existing logic
+             if not self.conn.is_qemu() and not self.conn.is_test():
+                 return False
+-            return self.conn.caps.host.cpu.arch in ["i686", "x86_64"]
++            return self.conn.caps.host.cpu.arch in ["i686", "x86_64", "loongarch64"]
+ 
+         return self.devices.graphics.get_enum("type").has_value("spice")
+ 
+diff --git a/virtinst/guest.py b/virtinst/guest.py
+index babe3de66..7bcccd817 100644
+--- a/virtinst/guest.py
++++ b/virtinst/guest.py
+@@ -213,6 +213,7 @@ def __init__(self, *args, **kwargs):
+         self.skip_default_tpm = False
+         self.have_default_tpm = False
+         self.x86_cpu_default = self.cpu.SPECIAL_MODE_APP_DEFAULT
++        self.loongarch_cpu_default = self.cpu.SPECIAL_MODE_HOST_MODEL_ONLY
+ 
+         # qemu 6.1, fairly new when we added this option, has an unfortunate
+         # bug with >= 15 root ports, so we choose 14 instead of our original 16
+@@ -353,7 +354,8 @@ def _supports_virtio(self, os_support):
+         if (self.os.is_arm_machvirt() or
+             self.os.is_riscv_virt() or
+             self.os.is_s390x() or
+-            self.os.is_pseries()):
++            self.os.is_pseries() or
++            self.os.is_loongarch()):
+             return True
+ 
+         if not os_support:
+@@ -542,7 +544,7 @@ def prefers_uefi(self):
+             # and doesn't break QEMU internal snapshots
+             prefer_efi = self.osinfo.requires_firmware_efi(self.os.arch)
+         else:
+-            prefer_efi = self.os.is_arm_machvirt() or self.conn.is_bhyve()
++            prefer_efi = self.os.is_arm_machvirt() or self.conn.is_bhyve() or self.os.is_loongarch()
+ 
+         log.debug("Prefer EFI => %s", prefer_efi)
+         return prefer_efi
+@@ -559,6 +561,8 @@ def set_uefi_path(self, path):
+         """
+         self.os.loader_ro = True
+         self.os.loader_type = "pflash"
++        if (self.os.is_loongarch()):
++            self.os.loader_type = "rom"
+         self.os.loader = path
+ 
+         # If the firmware name contains "secboot" it is probably build
+@@ -908,7 +912,8 @@ def _add_default_input_device(self):
+             usb_tablet = True
+         if (self.os.is_arm_machvirt() or
+             self.os.is_riscv_virt() or
+-            self.os.is_pseries()):
++            self.os.is_pseries() or
++            self.os.is_loongarch()):
+             usb_tablet = True
+             usb_keyboard = True
+ 
+@@ -1022,7 +1027,8 @@ def _add_default_graphics(self):
+         if self.os.is_container() and not self.conn.is_vz():
+             return
+         if (not self.os.is_x86() and
+-            not self.os.is_pseries()):
++            not self.os.is_pseries() and
++            not self.os.is_loongarch()):
+             return
+         self.add_device(DeviceGraphics(self.conn))
+ 
+@@ -1164,6 +1170,8 @@ def _add_spice_sound(self):
+         self.add_device(dev)
+ 
+     def _add_spice_usbredir(self):
++        if self.os.is_loongarch():
++            return
+         if not self.lookup_domcaps().supports_redirdev_usb():
+             return  # pragma: no cover
+         if self.skip_default_usbredir:
diff --git a/virt-manager/PKGBUILD b/virt-manager/PKGBUILD
index dbe1d98d12..891f4997ec 100644
--- a/virt-manager/PKGBUILD
+++ b/virt-manager/PKGBUILD
@@ -7,15 +7,22 @@
 pkgbase=virt-manager
 pkgname=(virt-install virt-manager)
 pkgver=4.1.0
-pkgrel=2
+pkgrel=3
 arch=('any')
 url='https://virt-manager.org/'
 license=('GPL')
 makedepends=('python-docutils' 'python-setuptools')
 checkdepends=('python-pytest' 'libosinfo' 'libvirt-python' 'python-gobject' 'python-requests' 'cpio' 'cdrtools')
 optdepends=('x11-ssh-askpass: provide password for remote machines connected via ssh tunnel')
-source=("https://releases.pagure.org/virt-manager/virt-manager-${pkgver}.tar.gz")
-b2sums=('1b4203be81bd7b82251225d691a4d9068f268e610f049bcadb96de5b539e964ca0b001f22f06ddd8266b58b079f60046f6d11942a1e4eadbc43f0607c46bbddd')
+source=("https://releases.pagure.org/virt-manager/virt-manager-${pkgver}.tar.gz"
+    600.patch)
+b2sums=('1b4203be81bd7b82251225d691a4d9068f268e610f049bcadb96de5b539e964ca0b001f22f06ddd8266b58b079f60046f6d11942a1e4eadbc43f0607c46bbddd'
+        'b97145d2f550e15c7a3bb2c93b4b82b20e33c098b51cf38c76314c58da665ead76b891cb7ceb0bceaf18f4128209d0bade0148c0ebbdc9c876fff7ae89a6e129')
+
+parpare() {
+  cd ${pkgbase}-${pkgver}
+  patch -p1 -i $srcdir/600.patch
+}
 
 build() {
   cd ${pkgbase}-${pkgver}
diff --git a/virtiofsd/PKGBUILD b/virtiofsd/PKGBUILD
index e947b36182..eaaa95f478 100644
--- a/virtiofsd/PKGBUILD
+++ b/virtiofsd/PKGBUILD
@@ -27,7 +27,7 @@ prepare() {
   # use /usr/lib instead of /usr/libexec: https://gitlab.com/virtio-fs/virtiofsd/-/issues/86
   sed 's/libexec/lib/' -i 50-$pkgname.json
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked
 }
 
 build() {
diff --git a/virtualbox-host-modules-arch/PKGBUILD b/virtualbox-host-modules-arch/PKGBUILD
index f37df363f4..825c62299b 100644
--- a/virtualbox-host-modules-arch/PKGBUILD
+++ b/virtualbox-host-modules-arch/PKGBUILD
@@ -26,7 +26,7 @@ package(){
   _kernver="$(</usr/src/linux/version)"
 
   install -Dt "$pkgdir/usr/lib/modules/$_kernver/extramodules" -m0644 \
-    vboxhost/${pkgver}_OSE/${_kernver}/${CARCH}/module/*
+    vboxhost/${pkgver}_OSE/${_kernver}/`uname -m`/module/*
 
   # compress each module individually
   find "$pkgdir" -name '*.ko' -exec xz -T1 {} +
diff --git a/viu/PKGBUILD b/viu/PKGBUILD
index 37a64385a4..ab01d86672 100644
--- a/viu/PKGBUILD
+++ b/viu/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/vivid/PKGBUILD b/vivid/PKGBUILD
index f54a59fa6d..4564e955b4 100644
--- a/vivid/PKGBUILD
+++ b/vivid/PKGBUILD
@@ -14,7 +14,7 @@ b2sums=('6bae858a27d704ff73ded2560ae74f6821c97517c8e8e3f9058619cfdf11bd3bd626074
 
 prepare() {
   cd vivid-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/vtk/PKGBUILD b/vtk/PKGBUILD
index d7c5ab3a6b..1a59bfbb2a 100644
--- a/vtk/PKGBUILD
+++ b/vtk/PKGBUILD
@@ -54,13 +54,12 @@ makedepends=(
   mariadb-libs
   netcdf
   opencascade
-  openimagedenoise
+#  openimagedenoise
   openmp
   openmpi
   openvdb
   openvr
   openxr
-  ospray
   pdal
   postgresql-libs
   proj
@@ -102,12 +101,12 @@ optdepends=(
   mariadb-libs
   netcdf
   opencascade
-  openimagedenoise
+#  openimagedenoise
   'openmpi: OpenMPI support'
   openvdb
   openvr
   openxr
-  ospray
+#  ospray
   pdal
   postgresql-libs
   proj
@@ -124,11 +123,13 @@ options=(staticlibs)
 source=(${url}/files/release/${pkgver%.*}/VTK-${pkgver}.tar.gz
         vtk-occt.patch
         https://gitlab.kitware.com/vtk/vtk/-/commit/5055371e.patch
-        ospray-3.patch)
+        ospray-3.patch
+	vtk-loong64.patch)
 sha256sums=('fdc7b9295225b34e4fdddc49cd06e66e94260cb00efee456e0f66568c9681be9'
             'df958eabc7dc4f5b33383ce0fb0f90a3ba202c1c2a24d3b5b9e7cfb1fb38b011'
             '54861a8667c505fedb96b49283521ce62af62148b629e953bbd065f831f02d04'
-            'faf3fd2eea2f73a07f5dcbd67920161a07ae554e008ad1e4099153fec2882278')
+            'faf3fd2eea2f73a07f5dcbd67920161a07ae554e008ad1e4099153fec2882278'
+            '9d0d77a25473c7c8832a333bf2436610c1b8714220296908b1b07891b21d90c5')
 
 prepare() {
   cd ${pkgname^^}-${pkgver}
@@ -137,6 +138,7 @@ prepare() {
   patch -Np1 -i "$srcdir"/vtk-occt.patch
   patch -Np1 -i ../5055371e.patch # Fix link to PDAL 2.6
   patch -Np1 -i ../ospray-3.patch # Fix build with ospray 3.0
+  patch -p1 -i ../vtk-loong64.patch
 }
 
 build() {
@@ -157,6 +159,7 @@ build() {
     -DVTK_INSTALL_TCL_DIR=/usr/lib/tcl${_tkver}/vtk/ \
     -DVTK_LEGACY_REMOVE=ON \
     -DVTK_SMP_ENABLE_OPENMP=ON \
+    -DVTK_ENABLE_OSPRAY=OFF \
     -DVTK_SMP_IMPLEMENTATION_TYPE=TBB \
     -DVTK_PYTHON_VERSION=3 \
     -DVTK_USE_MPI=ON \
@@ -164,7 +167,7 @@ build() {
     -DVTK_VERSIONED_INSTALL=OFF \
     -DVTK_WRAP_JAVA=ON \
     -DVTK_WRAP_PYTHON=ON \
-    -DVTKOSPRAY_ENABLE_DENOISER=ON \
+    -DVTKOSPRAY_ENABLE_DENOISER=OFF \
     -DVTKm_ENABLE_HDF5_IO=ON \
     -DVTKm_ENABLE_MPI=ON \
     -DVTKm_ENABLE_OPENMP=ON \
@@ -189,8 +192,8 @@ package() {
   install -dv "${pkgdir}"/usr/share/java/vtk
   mv -v "${pkgdir}"/usr/lib/java/vtk.jar "${pkgdir}"/usr/share/java/vtk
   # …and the libs to the proper place
-  mv "${pkgdir}"/usr/lib/java/vtk-Linux-${CARCH}/*.so "${pkgdir}"/usr/lib/
-  rmdir "${pkgdir}"/usr/lib/java/{vtk-Linux-${CARCH}/,}
+  mv "${pkgdir}"/usr/lib/java/vtk-Linux-`uname -m`/*.so "${pkgdir}"/usr/lib/
+  rmdir "${pkgdir}"/usr/lib/java/{vtk-Linux-`uname -m`/,}
 
   # byte-compile python modules since the CMake build does not do it
   local site_packages=$(python -c "import site; print(site.getsitepackages()[0])")
diff --git a/vtk/vtk-loong64.patch b/vtk/vtk-loong64.patch
new file mode 100644
index 0000000000..a9a6453520
--- /dev/null
+++ b/vtk/vtk-loong64.patch
@@ -0,0 +1,15 @@
+Index: VTK-9.2.6/Utilities/KWIML/vtkkwiml/include/kwiml/abi.h
+===================================================================
+--- VTK-9.2.6.orig/Utilities/KWIML/vtkkwiml/include/kwiml/abi.h
++++ VTK-9.2.6/Utilities/KWIML/vtkkwiml/include/kwiml/abi.h
+@@ -471,6 +471,10 @@ suppression macro KWIML_ABI_NO_VERIFY wa
+ #elif defined(__riscv) || defined(__riscv__)
+ # define KWIML_ABI_ENDIAN_ID KWIML_ABI_ENDIAN_ID_LITTLE
+ 
++/* LoongArch */
++#elif defined(__loongarch__)
++# define KWIML_ABI_ENDIAN_ID KWIML_ABI_ENDIAN_ID_LITTLE
++
+ /* Unknown CPU */
+ #elif !defined(KWIML_ABI_NO_ERROR_ENDIAN)
+ # error "Byte order of target CPU unknown."
diff --git a/wasm-pack/PKGBUILD b/wasm-pack/PKGBUILD
index e1bc5a6cdf..10d71f9385 100644
--- a/wasm-pack/PKGBUILD
+++ b/wasm-pack/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/wasmtime/PKGBUILD b/wasmtime/PKGBUILD
index 94ad5ef145..410c2796db 100644
--- a/wasmtime/PKGBUILD
+++ b/wasmtime/PKGBUILD
@@ -33,7 +33,7 @@ prepare() {
   git config submodule.crates/wasi-nn/spec.src "${srcdir}"/wasi-nn
   git config submodule.crates/wasi-crypto/spec.src "${srcdir}"/wasi-crypto
   git submodule update
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/wayland-protocols/PKGBUILD b/wayland-protocols/PKGBUILD
index f90bb2108b..225a660ba5 100644
--- a/wayland-protocols/PKGBUILD
+++ b/wayland-protocols/PKGBUILD
@@ -28,7 +28,7 @@ prepare() {
 }
 
 build() {
-  meson build $pkgname-$pkgver --buildtype=release --prefix=/usr
+  meson build $pkgname-$pkgver --buildtype=release --prefix=/usr -Dtests=false
   ninja -C build
 }
 
diff --git a/wayland/PKGBUILD b/wayland/PKGBUILD
index 43d5efafff..aa1a9b7a0a 100644
--- a/wayland/PKGBUILD
+++ b/wayland/PKGBUILD
@@ -33,7 +33,7 @@ package_wayland() {
 
   meson install -C build --destdir "$pkgdir"
   mkdir -p docs/share
-  mv "$pkgdir"/usr/share/{doc,man} docs/share
+  #mv "$pkgdir"/usr/share/{doc,man} docs/share
   install -Dm 644 $pkgbase-$pkgver/COPYING "$pkgdir/usr/share/licenses/$pkgname/COPYING"
 }
 
diff --git a/webkit2gtk-4.1/PKGBUILD b/webkit2gtk-4.1/PKGBUILD
index b1a03c48a4..bb098b77af 100644
--- a/webkit2gtk-4.1/PKGBUILD
+++ b/webkit2gtk-4.1/PKGBUILD
@@ -80,15 +80,21 @@ source=(
   $url/releases/webkitgtk-$pkgver.tar.xz{,.asc}
   GTK-MiniBrowser-should-hide-the-toolbar-when-using-full-screen.patch
   GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  webkit2-gtk-fix-build.patch
+  webkit2gtk-fix-cmake-build.patch
 )
 sha256sums=('52288b30bda22373442cecb86f9c9a569ad8d4769a1f97b352290ed92a67ed86'
             'SKIP'
             'a921d6be1303e9f23474971f381886fd291ec5bb1a7ff1e85acede8cfb88bef2'
-            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206')
+            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206'
+            '08917be7a1af4bb371c9919117912f1acffc9bc8fe9434693e3b0184ac352bc0'
+            '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e')
 b2sums=('3a8cd0818e0d989ab778cda63dd873d7e185ec20fbfe609b9da70041fe38ac30351046516600cb8eb86089e43136487d81c922690468daa70ed2a436561c2401'
         'SKIP'
         'd440d82c769f1b35caf5464dc850cdf1c896224205c90c17d8b0a44aee62e4b1383e11306936aaca067fde8836770d346d5122d7b05c91a5c7c1741c89c65e2f'
-        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d')
+        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d'
+        '22602b8f9836d666ca8db8200e4c965c0560263786d713f041db9814f5c93233da76c5f29e8540079e6bf97097ab063aa88938de39a32f547d954de5b8669acf'
+        '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6')
 validpgpkeys=(
   'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3'  # Carlos Garcia Campos <cgarcia@igalia.com>
   '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B'  # Adrián Pérez de Castro <aperez@igalia.com>
@@ -104,6 +110,8 @@ prepare() {
   # https://bugs.archlinux.org/task/79783
   # https://github.com/WebKit/WebKit/pull/18614
   patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  patch -p1 -i $srcdir/webkit2-gtk-fix-build.patch
+  patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch
 }
 
 build() {
@@ -127,7 +135,12 @@ build() {
   #     <artificial>:(.text+0x4a019): undefined reference to `ipint_extern_table_fill'
   #     collect2: error: ld returned 1 exit status
   export CC=clang CXX=clang++
-  LDFLAGS+=" -fuse-ld=lld"
+#  LDFLAGS+=" -fuse-ld=lld"
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   # Produce minimal debug info: 4.3 GB of debug data makes the
   # build too slow and is too much to package for debuginfod
diff --git a/webkit2gtk-4.1/webkit2-gtk-fix-build.patch b/webkit2gtk-4.1/webkit2-gtk-fix-build.patch
new file mode 100644
index 0000000000..3e03c66611
--- /dev/null
+++ b/webkit2gtk-4.1/webkit2-gtk-fix-build.patch
@@ -0,0 +1,10 @@
+--- webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h	2022-08-31 15:59:51.894493300 +0800
++++ webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h	2023-03-09 13:32:01.655350948 +0800
+@@ -14,6 +14,7 @@
+ #include <array>
+ #include <string>
+ #include <vector>
++#include <cstdint>
+ 
+ // This type is defined here to simplify ANGLE's integration with glslang for SPIR-V.
+ using ShCompileOptions = uint64_t;
diff --git a/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch b/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch
new file mode 100644
index 0000000000..b695514152
--- /dev/null
+++ b/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch
@@ -0,0 +1,11 @@
+--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.109385432 +0800
++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.129385924 +0800
+@@ -65,7 +65,7 @@
+ 
+     string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}")
+     if ("${CMAKE_MATCH_2}" STREQUAL "")
+-        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}")
++        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}")
+     else ()
+         pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name})
+     endif ()
diff --git a/webkit2gtk/PKGBUILD b/webkit2gtk/PKGBUILD
index 1df216e118..975bbe0b43 100644
--- a/webkit2gtk/PKGBUILD
+++ b/webkit2gtk/PKGBUILD
@@ -80,15 +80,21 @@ source=(
   $url/releases/webkitgtk-$pkgver.tar.xz{,.asc}
   GTK-MiniBrowser-should-hide-the-toolbar-when-using-full-screen.patch
   GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  webkit2-gtk-fix-build.patch
+  webkit2gtk-fix-cmake-build.patch
 )
 sha256sums=('52288b30bda22373442cecb86f9c9a569ad8d4769a1f97b352290ed92a67ed86'
             'SKIP'
             'a921d6be1303e9f23474971f381886fd291ec5bb1a7ff1e85acede8cfb88bef2'
-            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206')
+            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206'
+            '08917be7a1af4bb371c9919117912f1acffc9bc8fe9434693e3b0184ac352bc0'
+            '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e')
 b2sums=('3a8cd0818e0d989ab778cda63dd873d7e185ec20fbfe609b9da70041fe38ac30351046516600cb8eb86089e43136487d81c922690468daa70ed2a436561c2401'
         'SKIP'
         'd440d82c769f1b35caf5464dc850cdf1c896224205c90c17d8b0a44aee62e4b1383e11306936aaca067fde8836770d346d5122d7b05c91a5c7c1741c89c65e2f'
-        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d')
+        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d'
+        '22602b8f9836d666ca8db8200e4c965c0560263786d713f041db9814f5c93233da76c5f29e8540079e6bf97097ab063aa88938de39a32f547d954de5b8669acf'
+        '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6')
 validpgpkeys=(
   'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3'  # Carlos Garcia Campos <cgarcia@igalia.com>
   '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B'  # Adrián Pérez de Castro <aperez@igalia.com>
@@ -104,6 +110,8 @@ prepare() {
   # https://bugs.archlinux.org/task/79783
   # https://github.com/WebKit/WebKit/pull/18614
   patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  patch -p1 -i $srcdir/webkit2-gtk-fix-build.patch
+  patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch
 }
 
 build() {
@@ -127,7 +135,14 @@ build() {
   #     <artificial>:(.text+0x4a019): undefined reference to `ipint_extern_table_fill'
   #     collect2: error: ld returned 1 exit status
   export CC=clang CXX=clang++
-  LDFLAGS+=" -fuse-ld=lld"
+#  LDFLAGS+=" -fuse-ld=lld"
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
+  CFLAGS=${CFLAGS/-Wa,-mno-relax/}
+  CXXFLAGS=${CXXFLAGS/-Wa,-mno-relax/}
 
   # Produce minimal debug info: 4.3 GB of debug data makes the
   # build too slow and is too much to package for debuginfod
diff --git a/webkit2gtk/webkit2-gtk-fix-build.patch b/webkit2gtk/webkit2-gtk-fix-build.patch
new file mode 100644
index 0000000000..3e03c66611
--- /dev/null
+++ b/webkit2gtk/webkit2-gtk-fix-build.patch
@@ -0,0 +1,10 @@
+--- webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h	2022-08-31 15:59:51.894493300 +0800
++++ webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h	2023-03-09 13:32:01.655350948 +0800
+@@ -14,6 +14,7 @@
+ #include <array>
+ #include <string>
+ #include <vector>
++#include <cstdint>
+ 
+ // This type is defined here to simplify ANGLE's integration with glslang for SPIR-V.
+ using ShCompileOptions = uint64_t;
diff --git a/webkit2gtk/webkit2gtk-fix-cmake-build.patch b/webkit2gtk/webkit2gtk-fix-cmake-build.patch
new file mode 100644
index 0000000000..b695514152
--- /dev/null
+++ b/webkit2gtk/webkit2gtk-fix-cmake-build.patch
@@ -0,0 +1,11 @@
+--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.109385432 +0800
++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.129385924 +0800
+@@ -65,7 +65,7 @@
+ 
+     string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}")
+     if ("${CMAKE_MATCH_2}" STREQUAL "")
+-        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}")
++        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}")
+     else ()
+         pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name})
+     endif ()
diff --git a/webkitgtk-6.0/PKGBUILD b/webkitgtk-6.0/PKGBUILD
index 7364ac6d90..71791cd56a 100644
--- a/webkitgtk-6.0/PKGBUILD
+++ b/webkitgtk-6.0/PKGBUILD
@@ -80,15 +80,18 @@ source=(
   $url/releases/webkitgtk-$pkgver.tar.xz{,.asc}
   GTK-MiniBrowser-should-hide-the-toolbar-when-using-full-screen.patch
   GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  webkit2gtk-fix-cmake-build.patch
 )
 sha256sums=('52288b30bda22373442cecb86f9c9a569ad8d4769a1f97b352290ed92a67ed86'
             'SKIP'
             'a921d6be1303e9f23474971f381886fd291ec5bb1a7ff1e85acede8cfb88bef2'
-            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206')
+            '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206'
+            '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e')
 b2sums=('3a8cd0818e0d989ab778cda63dd873d7e185ec20fbfe609b9da70041fe38ac30351046516600cb8eb86089e43136487d81c922690468daa70ed2a436561c2401'
         'SKIP'
         'd440d82c769f1b35caf5464dc850cdf1c896224205c90c17d8b0a44aee62e4b1383e11306936aaca067fde8836770d346d5122d7b05c91a5c7c1741c89c65e2f'
-        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d')
+        'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d'
+        '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6')
 validpgpkeys=(
   'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3'  # Carlos Garcia Campos <cgarcia@igalia.com>
   '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B'  # Adrián Pérez de Castro <aperez@igalia.com>
@@ -104,6 +107,7 @@ prepare() {
   # https://bugs.archlinux.org/task/79783
   # https://github.com/WebKit/WebKit/pull/18614
   patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch
+  patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch
 }
 
 build() {
@@ -127,7 +131,12 @@ build() {
   #     <artificial>:(.text+0x4a019): undefined reference to `ipint_extern_table_fill'
   #     collect2: error: ld returned 1 exit status
   export CC=clang CXX=clang++
-  LDFLAGS+=" -fuse-ld=lld"
+#  LDFLAGS+=" -fuse-ld=lld"
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
 
   # Produce minimal debug info: 4.3 GB of debug data makes the
   # build too slow and is too much to package for debuginfod
diff --git a/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch b/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch
new file mode 100644
index 0000000000..b695514152
--- /dev/null
+++ b/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch
@@ -0,0 +1,11 @@
+--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.109385432 +0800
++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.129385924 +0800
+@@ -65,7 +65,7 @@
+ 
+     string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}")
+     if ("${CMAKE_MATCH_2}" STREQUAL "")
+-        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}")
++        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}")
+     else ()
+         pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name})
+     endif ()
diff --git a/webrtc-audio-processing-1/PKGBUILD b/webrtc-audio-processing-1/PKGBUILD
index a0e08bd2d6..0ce2b728f3 100644
--- a/webrtc-audio-processing-1/PKGBUILD
+++ b/webrtc-audio-processing-1/PKGBUILD
@@ -17,8 +17,10 @@ makedepends=(
 )
 provides=(libwebrtc-audio-{coding,processing}-${pkgver%%.*}.so)
 _commit=8e258a1933d405073c9e6465628a69ac7d2a1f13  # tags/v1.3^0
-source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit")
-b2sums=('SKIP')
+source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit"
+        webrtc-audio-processing-la64.patch)
+b2sums=('SKIP'
+        '82645a6da3e482209975fd04eec41bf4b94781e2fd0ed9df433c959738cba26c41e6d7833fadc5427a37feaa8a12a71ee5600b9531de6a6651f1b60ccd4983ed')
 
 pkgver() {
   cd webrtc-audio-processing
@@ -27,6 +29,7 @@ pkgver() {
 
 prepare() {
   cd webrtc-audio-processing
+  patch -p1 -i $srcdir/webrtc-audio-processing-la64.patch
 }
 
 build() {
diff --git a/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch b/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch
new file mode 100644
index 0000000000..5bb38adf9a
--- /dev/null
+++ b/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch
@@ -0,0 +1,12 @@
+--- webrtc-audio-processing/webrtc/rtc_base/system/arch.h	2023-11-10 21:50:34.000000000 +0800
++++ webrtc-audio-processing/webrtc/rtc_base/system/arch.h	2023-11-10 21:54:23.409972954 +0800
+@@ -57,6 +57,9 @@
+ #elif defined(__EMSCRIPTEN__)
+ #define WEBRTC_ARCH_32_BITS
+ #define WEBRTC_ARCH_LITTLE_ENDIAN
++#elif defined(__loongarch_lp64)
++#define WEBRTC_ARCH_64_BITS
++#define WEBRTC_ARCH_LITTLE_ENDIAN
+ #else
+ #error Please add support for your architecture in rtc_base/system/arch.h
+ #endif
diff --git a/webrtc-audio-processing/PKGBUILD b/webrtc-audio-processing/PKGBUILD
index efae882ce0..d858285951 100644
--- a/webrtc-audio-processing/PKGBUILD
+++ b/webrtc-audio-processing/PKGBUILD
@@ -11,7 +11,8 @@ depends=(gcc-libs)
 makedepends=(git)
 provides=(libwebrtc_audio_processing.so)
 _commit=e882a5442ac22c93648e12837248d651d18b9247  # tags/v0.3.1^0
-source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit")
+source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit"
+        webrtc-audio-processing-la64.patch)
 b2sums=('SKIP')
 
 pkgver() {
@@ -22,6 +23,7 @@ pkgver() {
 prepare() {
   cd $pkgname
   NOCONFIGURE=1 ./autogen.sh
+  patch -p1 -i $srcdir/webrtc-audio-processing-la64.patch
 }
 
 build() {
diff --git a/webrtc-audio-processing/webrtc-audio-processing-la64.patch b/webrtc-audio-processing/webrtc-audio-processing-la64.patch
new file mode 100644
index 0000000000..caa03863a4
--- /dev/null
+++ b/webrtc-audio-processing/webrtc-audio-processing-la64.patch
@@ -0,0 +1,14 @@
+Index: webrtc-audio-processing/webrtc/typedefs.h
+===================================================================
+--- webrtc-audio-processing.orig/webrtc/typedefs.h
++++ webrtc-audio-processing/webrtc/typedefs.h
+@@ -41,6 +41,9 @@
+ //#define WEBRTC_ARCH_ARMEL
+ #define WEBRTC_ARCH_32_BITS
+ #define WEBRTC_ARCH_LITTLE_ENDIAN
++#elif defined(__loongarch64)
++#define WEBRTC_ARCH_64_BITS
++#define WEBRTC_ARCH_LITTLE_ENDIAN
+ #elif defined(__MIPSEL__)
+ #define WEBRTC_ARCH_32_BITS
+ #define WEBRTC_ARCH_LITTLE_ENDIAN
diff --git a/whipper/PKGBUILD b/whipper/PKGBUILD
index 97e2549b2a..3cf6fcb119 100644
--- a/whipper/PKGBUILD
+++ b/whipper/PKGBUILD
@@ -60,7 +60,7 @@ build() {
 check() {
   cd ${pkgname}-${pkgver}
   local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))')
-  PYTHONPATH="build/lib.linux-${CARCH}-cpython-${python_version}/" python -m unittest discover
+  PYTHONPATH="build/lib.linux-`uname -m`-cpython-${python_version}/" python -m unittest discover
 }
 
 package() {
diff --git a/wiki-tui/PKGBUILD b/wiki-tui/PKGBUILD
index ef54f82e23..09b328163b 100644
--- a/wiki-tui/PKGBUILD
+++ b/wiki-tui/PKGBUILD
@@ -15,7 +15,7 @@ sha512sums=('9240c17ab9410bf4182349701d06df73f56b11fceb24415c5249a982026fb236d87
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/wldash/PKGBUILD b/wldash/PKGBUILD
index 9dac7c9516..54857a8d8e 100644
--- a/wldash/PKGBUILD
+++ b/wldash/PKGBUILD
@@ -24,7 +24,9 @@ prepare() {
 
 build() {
     cd "${pkgname}-v${pkgver}"
-    cargo build --release --locked
+    export CARGO_REGISTRIES_MY_REGISTRY_INDEX="https://gitee.com/yetist/crates.io-index"
+    rm Cargo.lock
+    cargo build --release -v
 }
 
 package() {
diff --git a/wolf-shaper/PKGBUILD b/wolf-shaper/PKGBUILD
index 60377ea686..7431924589 100644
--- a/wolf-shaper/PKGBUILD
+++ b/wolf-shaper/PKGBUILD
@@ -120,6 +120,6 @@ package_wolf-shaper-vst3() {
   pkgdesc+=" - VST3 plugin"
   groups=(pro-audio vst3-plugins)
 
-  install -vDm 755 $pkgbase/bin/$pkgbase.vst3/Contents/$CARCH-linux/*.so -t "$pkgdir/usr/lib/vst3/$pkgbase.vst3/Contents/$CARCH-linux/"
+  install -vDm 755 $pkgbase/bin/$pkgbase.vst3/Contents/`uname -m`-linux/*.so -t "$pkgdir/usr/lib/vst3/$pkgbase.vst3/Contents/`uname -m`-linux/"
 }
 
diff --git a/woodpecker/PKGBUILD b/woodpecker/PKGBUILD
index 776397f181..bc9a8816b2 100644
--- a/woodpecker/PKGBUILD
+++ b/woodpecker/PKGBUILD
@@ -70,6 +70,10 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export GOPATH="${srcdir}"
+  export GOPROXY=https://goproxy.cn
+  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod tidy
 
   # build server/agent/cli
   go build -v \
diff --git a/wpewebkit/PKGBUILD b/wpewebkit/PKGBUILD
index faf4944448..5bfd15a877 100644
--- a/wpewebkit/PKGBUILD
+++ b/wpewebkit/PKGBUILD
@@ -67,11 +67,14 @@ makedepends=(
 )
 source=(
   $url/releases/wpewebkit-$pkgver.tar.xz{,.asc}
+  webkit2gtk-fix-cmake-build.patch
 )
 sha256sums=('8836040a3687581970b47a232b713e7023c080d5613427f52db619c29fb253a4'
-            'SKIP')
+            'SKIP'
+            '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e')
 b2sums=('cfce325c574a738c5c7c8b14365d4d07496bc62b54d67d1d1e82b7f497f747ef886ea418ab199d6cbb8c6ac5df76db5b5a092d98abbb95874f11e621241dfeff'
-        'SKIP')
+        'SKIP'
+        '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6')
 validpgpkeys=(
   'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3'  # Carlos Garcia Campos <cgarcia@igalia.com>
   '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B'  # Adrián Pérez de Castro <aperez@igalia.com>
@@ -79,6 +82,7 @@ validpgpkeys=(
 
 prepare() {
   cd wpewebkit-$pkgver
+  patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch
 }
 
 build() {
@@ -102,7 +106,13 @@ build() {
   #     <artificial>:(.text+0x4a019): undefined reference to `ipint_extern_table_fill'
   #     collect2: error: ld returned 1 exit status
   export CC=clang CXX=clang++
-  LDFLAGS+=" -fuse-ld=lld"
+#  LDFLAGS+=" -fuse-ld=lld"
+# clang didn't support -mlsx
+  CFLAGS=${CFLAGS/-mlsx /}
+  CXXFLAGS=${CXXFLAGS/-mlsx /}
+  CFLAGS=${CFLAGS/-fstack-clash-protection/}
+  CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/}
+
 
   # Produce minimal debug info: 4.3 GB of debug data makes the
   # build too slow and is too much to package for debuginfod
diff --git a/wpewebkit/webkit2gtk-fix-cmake-build.patch b/wpewebkit/webkit2gtk-fix-cmake-build.patch
new file mode 100644
index 0000000000..b695514152
--- /dev/null
+++ b/wpewebkit/webkit2gtk-fix-cmake-build.patch
@@ -0,0 +1,11 @@
+--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.109385432 +0800
++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake	2023-12-27 00:33:15.129385924 +0800
+@@ -65,7 +65,7 @@
+ 
+     string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}")
+     if ("${CMAKE_MATCH_2}" STREQUAL "")
+-        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}")
++        pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}")
+     else ()
+         pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name})
+     endif ()
diff --git a/x11vnc/PKGBUILD b/x11vnc/PKGBUILD
index 42d4a6696a..d1d313efd3 100644
--- a/x11vnc/PKGBUILD
+++ b/x11vnc/PKGBUILD
@@ -26,8 +26,8 @@ source=("git+https://github.com/LibVNC/x11vnc.git#commit=${_commit}?signed"
 validpgpkeys=('25E71D2709955ECD4D041E03421BB3B45C6067F8') # Christian Beier
 sha256sums=('SKIP'
             'd39a399d7db8e942e55639ed04a51b3c4f5d31d213d4639b1e26a44d92029403'
-            '2c71af4c586eabaa11744da65916f9223b928d1fba820f117243f6c8c585f16b'
-            'e9c121a0b16013059ce903ed3e7560fabc5015e3b058a3acec85d7ae7102fcf0'
+            '9358217c23e586cb34d98564e5031dd58bf43e621d23435629d2105c901d0aa8'
+            '6047df38ca8a27760a6359a7f4029dd006f0423a4cd262cb0833da41defe1792'
             'cfb19d44e09e960e2fdb958c9258bccf23c2677715314985f7e819f1dcedb6e4')
 
 prepare() {
diff --git a/x264/PKGBUILD b/x264/PKGBUILD
index aee89b150a..68e62cbdf6 100644
--- a/x264/PKGBUILD
+++ b/x264/PKGBUILD
@@ -35,10 +35,11 @@ _commit=31e19f92f00c7003fa115047ce50978bc98c3a0d
 source=(git+https://code.videolan.org/videolan/x264.git#commit=${_commit})
 sha256sums=(SKIP)
 
-pkgver() {
-  cd x264
-  ./version.sh | grep X264_POINTVER | sed -r 's/^#define X264_POINTVER "([0-9]+\.[0-9]+)\.([0-9]+) (.*)"$/\1.r\2.\3/'
-}
+#pkgver() {
+#  cd x264
+#
+# ./version.sh | grep X264_POINTVER | sed -r 's/^#define X264_POINTVER "([0-9]+\.[0-9]+)\.([0-9]+) (.*)"$/\1.r\2.\3/'
+#}
 
 build() {
   cd x264
diff --git a/x86_64-linux-gnu-binutils/PKGBUILD b/x86_64-linux-gnu-binutils/PKGBUILD
new file mode 100644
index 0000000000..7bc0085619
--- /dev/null
+++ b/x86_64-linux-gnu-binutils/PKGBUILD
@@ -0,0 +1,66 @@
+# Maintainer: Xiaotian Wu <yetist@gmail.com>
+
+_target=x86_64-linux-gnu
+pkgname=$_target-binutils
+pkgver=2.41
+pkgrel=1
+pkgdesc='A set of programs to assemble and manipulate binary and object files for 32-bit and 64-bit x86'
+arch=(loong64)
+url='https://www.gnu.org/software/binutils/'
+license=(GPL)
+depends=(zlib libelf)
+groups=(x86)
+source=(https://ftp.gnu.org/gnu/binutils/binutils-$pkgver.tar.bz2{,.sig})
+sha1sums=('b180faf37b6e1c321d6ccbbf66194f17f7acf47c'
+          'SKIP')
+sha256sums=('a4c4bec052f7b8370024e60389e194377f3f48b56618418ea51067f67aaab30b'
+            'SKIP')
+validpgpkeys=('3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F') # Nick Clifton (Chief Binutils Maintainer) <nickc@redhat.com>
+
+prepare() {
+  cd binutils-$pkgver
+  sed -i "/ac_cpp=/s/\$CPPFLAGS/\$CPPFLAGS -O2/" libiberty/configure
+}
+
+build() {
+  cd binutils-$pkgver
+
+#  unset CPPFLAGS
+  ./configure --target=$_target \
+    --with-sysroot=/usr/$_target \
+    --prefix=/usr \
+    --enable-multilib \
+    --with-gnu-as \
+    --with-gnu-ld \
+    --disable-nls \
+    --enable-ld=default \
+    --enable-gold \
+    --enable-plugins \
+    --enable-deterministic-archives \
+    --with-system-zlib
+  make
+}
+
+check() {
+  cd binutils-$pkgver
+
+  # unset LDFLAGS as testsuite makes assumptions about which ones are active
+  # do not abort on errors - manually check log files
+  make -k LDFLAGS="" check || true
+}
+
+package() {
+  cd binutils-$pkgver
+
+  make DESTDIR="$pkgdir" install
+
+  # Remove file conflicting with host binutils and manpages for MS Windows tools
+  rm "$pkgdir"/usr/share/man/man1/$_target-{dlltool,windres,windmc}*
+  rm "$pkgdir"/usr/lib/bfd-plugins/libdep.so
+
+#  rm -r "$pkgdir"/usr/include
+#  rm -r "$pkgdir"/usr/lib/gprofng/
+
+  # Remove info documents that conflict with host version
+  rm -r "$pkgdir"/usr/share/info
+}
diff --git a/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc b/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc
new file mode 100644
index 0000000000..a76485a9fc
--- /dev/null
+++ b/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc
@@ -0,0 +1,51 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBFm/2cUBEADkvRqMWfAryJ52T4J/640Av5cam9ojdFih9MjcX7QWFxIzJfTF
+Yq2z+nb4omdfZosdCJL2zGcn6C0AxpHNvxR9HMDkEyFHKrjDh4xWU+pH4z9azQEq
+Jh331X7UzbZldqQo16VkuVavgsTJaHcXm+nGIBTcUbl2oiTtHhmuaYxx6JTMcFjC
+7vyO5mLBw78wt52HBYweJ0NjHBvvH/JxbAAULSPRUC61K0exlO49VFbFETQNG1hZ
+TKEji95fPbre7PpXQ0ewQShUgttEE/J3UA4jYaF9lOcZgUzbA27xTV//KomP0D30
+yr4e4EJEJYYNKa3hofTEHDXeeNgM25tprhBUMdbVRZpf2Keuk2uDVwc+EiOVri48
+rb1NU+60sOXvoGO6Ks81+mhAGmrBrlgLhAp8K1HPHI4MG4gHnrMqX2rEGUGRPFjC
+3qqVVlPm8H05PnosNqDLQ1Pf7C0pVgsCx6hKQB7Y1qBui7aoj9zeFaQgpYef+CEE
+RIKEcWwrjaOJwK3pi9HFdxS0NNWYZj8HPzz/AsgTTQdsbulPlVq2SsctmOnL42CZ
+OCTppGYwl53CG/EqVY+UQBzFzJBaY8TJRFFYVEy5/HH4H11rMoZwqIkk71EOGU3X
+6mWlANRikR3M4GhVITRzuaV69Fed+OeXcCmP94ASLfuhBR2uynmcHpBKpwARAQAB
+tDtOaWNrIENsaWZ0b24gKENoaWVmIEJpbnV0aWxzIE1haW50YWluZXIpIDxuaWNr
+Y0ByZWRoYXQuY29tPokCOAQTAQIAIgUCWb/ZxQIbAwYLCQgHAwIGFQgCCQoLBBYC
+AwECHgECF4AACgkQE/zvid2ePE9cOxAA3cX1bdDaTFttTqukdPXLCtD2aNwJos4v
+B4LYPSgugLkYaHIQH9d1NQPhS0TlUeovnFNESLaVsoihv0YmBUCyL4jE52FRoTjE
+6fUhYkFNqIWN2HYwkVrSap2UUJFquRVoVbPkbSup8P+D8eydBbdxsY6f+5E8Rtz5
+ibVnPZTib7CyqnFokJITWjzGdIP0Gn+JWVa6jtHTImWx1MtqiuVRDapUhrIoUIjf
+98HQn9/N5ylEFYQTw7tzaJNWeGUoGYS8+8n/0sNbuYQUU/zwMVY9wpJcrXaas6yZ
+XGpF/tua59t9LFCct+07YAUSWyaBXqBW3PKQz7QP+oE8yje91XrhOQam04eJhPIB
+LO88g6/UrdKaY7evBB8bJ76Zpn1yqsYOXwAxifD0gDcRTQcB2s5MYXYmizn2GoUm
+1MnCJeAfQCi/YMobR+c8xEEkRU83Tnnw3pmAbRU6OcPihEFuK/+SOMKIuV1QWmjk
+bAr4g9XeXvaN+TRJ9Hl/k1k/sj+uOfyGIaFzM/fpaLmFk8vHeej4i2/C6cL4mnah
+wYBDHAfHO65ZUIBAssdA6AeJ+PGsYeYhqs6zkpaA2b0wT4f9s7BPSqi0Veky8bUY
+YY7WpjzDcHnj1gEeIU55EhOQ42dnEfv7WrIAXanOP8SjhgqAUkb3R88azZCpEMTH
+iCE4bFxzOmi5Ag0EWb/ZxQEQALaJE/3u23rTvPLkitaTJFqKkwPVylzkwmKdvd2q
+eEFk1qys2J3tACTMyYVnYTSXy5EJH2zJyhUfLnhLp8jJZF4oU5QehOaJPcMmzI/C
+ZS1AmH+jnm6pukdZAowTzJyt4IKSapr+7mxcxX1YQ2XewMnFYpLkAA2dHaChLSU/
+EHJXe3+O4DgEURTFMa3SRN/J4GNMBacKXnMSSYylI5DcIOZ/v0IGa5MAXHrP1Hwm
+1rBmloIcgmzexczBf+IcWgCLThyFPffv+2pfLK1XaS82OzBC7fS01pB/eDOkjQuK
+y16sKZX6Rt57vud40uE5a0lpyItC2P7u7QWL4yT5pMF+oS8bm3YWgEntV380RyZp
+qgJGZTZLNq2T4ZgfiaueEV4JzOnG2/QRGjOUrNQaYzKy5V127CTnRg4BYF/uLEmi
+zLcI3O3U1+mEz6h48wkAojO1B6AZ8Lm+JuxOW5ouGcrkTEuIG56GcDwMWS/Pw/vN
+sDyNmOCjy9eEKWJgmMmLaq59HpfTd8IOeaYyuAQHAsYt/zzKy0giMgjhCQtuc99E
+4nQE9KZ44DKsnqRabK9s3zYE3PIkCFIEZcUiJXSXWWOIdJ43j+YyFHU5hqXfECM6
+rzKGBeBUGTzyWcOX6YwRM4LzQDVJwYG8cVfth+v4/ImcXR43D4WVxxBEAjKag02b
++1yfABEBAAGJAh8EGAECAAkFAlm/2cUCGwwACgkQE/zvid2ePE/dqQ/6ApUwgsZz
+tps0MOdRddjPwz44pWXS5MG45irMQXELGQyxkrafc8lwHeABYstoK8dpopTcJGE3
+dZGL3JNz1YWxQ5AV4uyqBn5N8RubcA8NzR6DQP+OGPIwzMketvVC/cbbKDZqf0uT
+Dy3jP65OFhSkTEIynYv1Mb4JJl3Sq+haUbfWLAV5nboSuHmiZE6Bz2+TjdoVkNwH
+Bfpqxu6MlWka+P98SUcmY8iVhPy9QC1XFOGdFDFf1kYgHW27mFwds35NQhNARgft
+AVz9FZXruW6tFIIfisjr3rVjD9R8VgL7l5vMr9ylOFpepnI6+wd2X1566HW7F1Zw
+1DIrY2NHL7kL5635bHrJY4n7o/n7Elk/Ca/MAqzdIZxz6orfXeImsqZ6ODn4Y47P
+ToS3Tr3bMNN9N6tmOPQZkJGHDBExbhAi/Jp8fpWxMmpVCUl6c85cOBCR4s8tZsvG
+YOjR3CvqKrX4bb8GElrhOvAJa6DdmZXc7AyoVMaTvhpq3gJYKmC64oqt7zwIHwaC
+xTbP6C6oUp9ENRV7nHnXN3BlvIgCo4QEs6HkDzkmgYlCEOKBiDyVMSkPDZdsspa+
+K4GlU2Swi/BDJMjtDxyo+K0M81LXXxOeRfEIfPtZ3ddxBKPva1uSsuz+pbN9d1JY
+8Ko5T/h16susi2ReUyNJEJaSnjO5z13TQ1U=
+=93P0
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-gcc/PKGBUILD b/x86_64-linux-gnu-gcc/PKGBUILD
new file mode 100644
index 0000000000..34db35ad14
--- /dev/null
+++ b/x86_64-linux-gnu-gcc/PKGBUILD
@@ -0,0 +1,96 @@
+# Maintainer: Xiaotian Wu <yetist@gmail.com>
+
+_target=x86_64-linux-gnu
+pkgname=$_target-gcc
+pkgver=13.2.0
+pkgrel=1
+pkgdesc='The GNU Compiler Collection - cross compiler for x86_64 target'
+arch=('loong64')
+url='https://gcc.gnu.org/'
+license=('GPL' 'LGPL' 'FDL')
+groups=('x86')
+depends=($_target-binutils $_target-glibc libmpc zlib libisl zstd)
+makedepends=(gmp mpfr)
+options=(!emptydirs !strip staticlibs !lto)
+source=(https://ftp.gnu.org/gnu/gcc/gcc-$pkgver/gcc-$pkgver.tar.xz{,.sig})
+        #https://gcc.gnu.org/pub/gcc/snapshots/$_snapshot/gcc-$_snapshot.tar.xz
+sha256sums=('e275e76442a6067341a27f04c5c6b83d8613144004c0413528863dc6b5c743da'
+            'SKIP')
+validpgpkeys=(D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62  # Jakub Jelinek <jakub@redhat.com>
+              33C235A34C46AA3FFB293709A328C3A2C3C45C06  # Jakub Jelinek <jakub@redhat.com>
+              13975A70E63C361C73AE69EF6EEB81F8981C74C7) # Richard Guenther <richard.guenther@gmail.com>
+
+if [ -n "$_snapshot" ]; then
+  _basedir=gcc-$_snapshot
+else
+  _basedir=gcc-$pkgver
+fi
+
+prepare() {
+  cd $_basedir
+
+  echo $pkgver > gcc/BASE-VER
+
+  # Do not run fixincludes
+  sed -i 's@\./fixinc\.sh@-c true@' gcc/Makefile.in
+
+  rm -rf "$srcdir"/gcc-build
+  mkdir "$srcdir"/gcc-build
+}
+
+build() {
+  cd gcc-build
+
+  # using -pipe causes spurious test-suite failures
+  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48565
+  CFLAGS=${CFLAGS/-pipe/}
+  CXXFLAGS=${CXXFLAGS/-pipe/}
+
+  # Credits @allanmcrae
+  # https://github.com/allanmcrae/toolchain/blob/f18604d70c5933c31b51a320978711e4e6791cf1/gcc/PKGBUILD
+  # TODO: properly deal with the build issues resulting from this
+  CFLAGS=${CFLAGS/-Werror=format-security/}
+  CXXFLAGS=${CXXFLAGS/-Werror=format-security/}
+
+  "$srcdir"/$_basedir/configure \
+      --prefix=/usr \
+      --program-prefix=$_target- \
+      --with-local-prefix=/usr/$_target \
+      --with-sysroot=/usr/$_target \
+      --with-build-sysroot=/usr/$_target \
+      --with-native-system-header-dir=/include \
+      --libdir=/usr/lib --libexecdir=/usr/lib \
+      --target=$_target --host=$CHOST --build=$CHOST \
+      --disable-nls --enable-default-pie \
+      --enable-languages=c,c++,fortran \
+      --enable-shared --enable-threads=posix \
+      --with-system-zlib --with-isl --enable-__cxa_atexit \
+      --disable-libunwind-exceptions --enable-clocale=gnu \
+      --disable-libstdcxx-pch --disable-libssp \
+      --enable-gnu-unique-object --enable-linker-build-id \
+      --enable-lto --enable-plugin --enable-install-libiberty \
+      --with-linker-hash-style=gnu --enable-gnu-indirect-function \
+      --disable-multilib --disable-werror \
+      --enable-checking=release
+
+  make
+}
+
+package() {
+  cd gcc-build
+
+  make DESTDIR="$pkgdir" install-gcc install-target-{libgcc,libstdc++-v3,libgomp,libgfortran,libquadmath,libatomic}
+
+  # strip target binaries
+  find "$pkgdir"/usr/lib/gcc/$_target/ "$pkgdir"/usr/$_target/lib \
+       -type f -and \( -name \*.a -or -name \*.o \) \
+       -exec $_target-objcopy -R .comment -R .note -R .debug_info -R .debug_aranges \
+           -R .debug_pubnames -R .debug_pubtypes -R .debug_abbrev -R .debug_line \
+           -R .debug_str -R .debug_ranges -R .debug_loc '{}' \;
+
+  # strip host binaries
+  find "$pkgdir"/usr/bin/ "$pkgdir"/usr/lib/gcc/$_target/ -type f -and \( -executable \) -exec strip '{}' \;
+
+  # Remove files that conflict with host gcc package
+  rm -r "$pkgdir/usr/share/"{man/man7,info,"gcc-$pkgver"}
+}
diff --git a/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc b/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc
new file mode 100644
index 0000000000..d6ed3d0d87
--- /dev/null
+++ b/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc
@@ -0,0 +1,53 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQGiBDs4dV0RBACZII57dgbfnCC7RTrJ1yc0F1ofEZJJ/x4tAtSHMDNj2zTnLR25
+5AHmxN85namwJdn7ixXSZv1FMPCeTs6jDk98YuA9r5uuCNPqCNZsuQtREpN7h+wO
+IeRrhvg9/F11mty/5NthXNh8P2ELnkWXSHu6DvTQyGppAtxueOL0CjRrpwCggVYu
+vxui5mqNq9+lILbMi2Zm3UkD/0T/0HupthZFXbuzY/h/nyqzoPOxnSAAAx6N7SiE
+2w9OQ1w3K8WOFoPH9P0cnIQ+KnMSGQV4C2WY/d8YtShnKkXRYZVvlK+aiwmvf1kU
+yNyUqaA/GhW5FWN26zFQc3G5Y9TDjgBqjd6SequZztK5M5cknJGJn+otpdQtA1Dx
+2KEABACSYjdRNT3OvQJ7OSz4x4C58JKz/P69WsNZxqYVo66P7PGxM7V2GykFPbG7
+agyEMWP1alvUK551IamVtXN+mD7h3uwi5Er0cFBBfV8bSLjmhSchVpyQpiMe2iAr
+IFeWox7IUp3zoT35/CP4xMu5l8pza61U5+hK3G7ud5ZQzVvh8bQtUmljaGFyZCBH
+dWVudGhlciA8cmljaGFyZC5ndWVudGhlckBnbWFpbC5jb20+iGUEExECACUCGwMC
+HgECF4ACGQEFAlZi3pMGCwkIBwMCBhUIAgkKCwQWAgMBAAoJEG7rgfiYHHTHIBIA
+n20wZDYF0KrfbJNzK4/VwAEAzN+wAJ9Dpbhtq4sRoH3cbadBsD2mXXthOrQrUmlj
+aGFyZCBHdWVudGhlciAoV29yaykgPHJndWVudGhlckBzdXNlLmRlPohiBBMRAgAi
+AhsDAh4BAheABQJWYt6YBgsJCAcDAgYVCAIJCgsEFgIDAQAKCRBu64H4mBx0x2iy
+AJ4tmLvgNsphsrpKKfDDyV0tzR5FuACeNymltMsgfFyvoueBvji/h+HyObm0K1Jp
+Y2hhcmQgR3VlbnRoZXIgPHJpY2hhcmQuZ3VlbnRoZXJAZ214Lm5ldD6IYgQTEQIA
+IgIbAwIeAQIXgAUCVmLemAYLCQgHAwIGFQgCCQoLBBYCAwEACgkQbuuB+JgcdMde
+DQCfZRUFDCB8sLK6B6wqRmwCsb3EK6MAnjSG6ZtgrdEjSQSmfAcIV/9W367MtCxS
+aWNoYXJkIEd1ZW50aGVyIChHQ0MpIDxyZ3VlbnRoQGdjYy5nbnUub3JnPohiBBMR
+AgAiAhsDAh4BAheABQJWYt6YBgsJCAcDAgYVCAIJCgsEFgIDAQAKCRBu64H4mBx0
+x9TwAJ4/9S1pd6cS2MHldWQpUdIuOBiUHACaAjNPvdqSN1SLEjH5GGlFZjo1c3+5
+AQsEQybx0QEIAPjHD/kts6GQbtsV+6+aZgfCK6MVZe14MOXFG60FmnHPzXymorzu
+7DxSQOkKiKU49mUklTIQ1ErGIr8nCzjmVHmm7CH53dy8/OklPgpecBLGSP9WiqQ8
+TJxNUiDWQA0r2HWVAsi86N+E3e9ubN4VSK4yd6JMR1Mp2KgyS2LK2PjRo+o7mulO
+FaAAoxmi8gWIVR6sv5dkh3g/6/DfKg32U8CWjFp5IXKmkyMPSH5eOZL4eBfx4Ia1
+cFcWfDJZdsYQ+EB+auzHqyr/DS5on4aS72WAppWkwH1Mu/fYnOEY37yF4GwYPb/M
+5Loz2wTMxdjfflzCMdfQ56CuMlfc84MeT/MABimJAWcEGBECAAkCGwIFAkl7HCkB
+KcBdIAQZAQIABgUCQybx0QAKCRA6sAmW/CamQbh0CADqF4FDBMsQh8+vkhFvXOTQ
+vtXkQMuQedryaCGHcS/e5/J0xb5uLybMnnFyh9tIy8cj9sc79yeTTuXSQLJgU02X
+h6EL4osGpe5JWWvFbY4SyNkyR3UcpXgzkH80crsHF5ixwCxy7PTEjQ48yT09hig4
+eiDHQ1fS4ox0F5aUQ4q2mk4bNtU6WHEP/8l6BKwSUC9/lfFQmMnk7SeIQqTwgC9T
+agPZhCQz/tNZBo+t3ETlRcfPZ2djCGRJm5mbMG/pwEy1L9frdopzBYk56yEpuA97
+HjmIDvt34YbAOlPYPSCsvnhzZdoVrRv/qBcGxNhdYA7zwiGDlrjRf7Rg1KT8izvo
+CRBu64H4mBx0x1WDAJUTqjXmf02pQphfYo7qalOuVr1tAJ0UUgg+PXpgrP0lp4LQ
+8SbkxvBRhbkBCwRDJvH8AQgAtyi1+vdUzhYos5lmUznkTURFBGWMvPSOnB62I2Mq
+0ZAazhyRjb6EuSTuGcusJXPWzRlsUFPdmyQjIqhPJ9ZkUSLOieIBMU4VqgWc7GZX
+K7P4Luh8TxQrz5YOtnpj5Hev/yj63ACDLIbzShizBSteZ+TZL+aH7/8XK/36o4rC
+Ep5OH69RiPcCUFEHQkxF7vaPdnqyH82/JtUOxSW1zYcr+7XHpHa+UNtI62Q4MV9x
+9Wi6vBBvJ6zZSarpVi7ViIf5PVMGuWb5nA1YShEtQVKnQnn6pBqRUF4iLcyrunIL
+PGp13htUUahaDr4qWUP5VKmELT9IitOTR0BX6e3E2h6NnwAGKYhJBBgRAgAJBQJD
+JvH8AhsMAAoJEG7rgfiYHHTHMuEAnRcN4qTQ1V92e+2RzJm2IYbVJjPBAJwNY6s3
+lbrcC7Zc2E/k5fxwwenSTrkBDQQ7OHVeEAQAl3WryGIZfi9uPLNZlIvRFBErvUKL
+zc7n+/c1GaUVMxXcF/Iauegblh41OoV2Kcz1sFx52MLDSDTV1DwDn4fNzwP7DYOe
+9h4EBpMePG1DS7LQ0LoD682rvey6Cvww+eFmBBXdiEqCXvPuW4d3WMnOsQqL5BiS
+QH+GiwIrLFN7yj8AAwUD/j9FOzif1GLdoSG8fsEi//axq0sXI+NtRUOrvmrkTCG3
+o5rZOJNwz+KBQpP45LdzosO4V/kPVuJ5U4EprEPRqejfSTW+oK+Bgm0mfImgw7Jv
+adkNeXfJdwYidutyF1jjroVdqprSjAAaoZgSi2sw03CFx1WkdL+GCccwN6IVl5OI
+iEYEGBECAAYFAjs4dV4ACgkQbuuB+JgcdMeqzACfeHjT2PFYdy88PHNVGw5se9Pq
+GPYAnArpX32fDdu/xhuqjqHrNkwyO/Yo
+=c9j1
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc b/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc
new file mode 100644
index 0000000000..c03bedc94b
--- /dev/null
+++ b/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc
@@ -0,0 +1,16 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQGiBECGYZsRBAC9VE8N8vHAG87MTC2wbtnmbSD8Wc2xYCaLofZAH+vXyio3Dnwx
+jQLlj7IgwRWNAVq13uL0wn0WAsGop5Cs7nA/JD4MEBBNSdnvq1bMYitch2PTtAU+
+h6HaI9JXBDUh4AKZz2rllKgbigMHlgIugxnKTAMJIhS63lCTHWEDlnycJwCgqSX9
+hDs9eBC5coearGDhc0BDvTsD/A05YkZkQBgsYD6cjWFwNLJIcaHORKlLLZ9gRJO5
+LVcKaCEgYSWAM7dadJeqIFi9RkXdv+cWozxTgrGlY4T7/PakIBB7wWj2Zl72mW5a
+NHT2vAemB8IFV1saiFXZM+qDhCHbV4yKSmNOQHY1VnSCUrgINiM0qlTz08yjUazK
+fm2BBACDF3ZfUQNeHC9zwfsgCzKnqOm7FSlwOyI0f+j83B5PH2+KuzuyEqYoxGp+
+2d1zTxvbOeBBaX8T1M4n5d9ixiFMhgbTzuyit3nn6cp5j2L0IAS9pw0kaWpPMhpQ
+zydNgnaBxHs1Y+cP4iM/4FWFCvfjUdR7xULdEzkgGxevu8pNEbQgSmFrdWIgSmVs
+aW5layA8amFrdWJAcmVkaGF0LmNvbT6IZAQTEQIAJAIbAwYLCQgHAwIDFQIDAxYC
+AQIeAQIXgAUCTI3tMgUJHtOOlwAKCRCjKMOiw8RcBjySAJ9ApMXF3+gWIr0zpMxv
+Wb53/oxsHgCaAl6V5JS9GJUnrPiHKdR+sMFPkd4=
+=MB2O
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc b/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc
new file mode 100644
index 0000000000..d80a382548
--- /dev/null
+++ b/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc
@@ -0,0 +1,122 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBF7Ps4YBEAC5i0PA1CA3te8UeAxWm8zH5KRyoXyD+IuVHar9fPR13J/IkUgO
+0f4kebDaGQGjyPoBuLHWtshQwSjDP9059eMbfne6fhe3UxqRjfknWxr83S0pSrDI
+xgdIsxMQT6dxm1YYpp+pK6PRs/tHMtXHtSJc4HwkW187nx7c7lfKXmwVoqUuEjvW
+irKyJRVNw68WZjYLmmIsRIIZcUMOE2lItPkejerHZobOuTkuXslgkWH3zeKCK8JD
+em9npzxIkLgrl8Ub0HxWdkAc6o+gj3Ih0QthvC8P7gxNuTJyf8SVaZFla+ky/t7Z
+kLmIhSBLzNSosscOtz9sdI4seXsOGgWeGRORp/+zF5ISnD3kFg3OtIudW8p4J7oA
+OICWkPIuEOXPCz5VIUmaY2Eswh76YgW7u60JMv/v0Agpjy23hovvG6HArMO8Letr
+Y5CWC+G9wp/xTo3TeyQ9mrYcKMjvrZzCos+SFaGF0lcExWpk610XQf+8/1FlhJ4U
+SiQCy78o1pW8dOpLWvWe7y9YtRm3DTgYDCDpcMzYVZPrp2oPg5h4nW5sfhPJ01yu
+gwTLDo/AILMQSkr1IVbfMkP7Mxtev51nRjxL7JCMB4bHx+uyNs56LCqdLctrF6Aa
+HrS7yaP3ym3BsHrH3TqAaTGW7rs/hrZ6MaWbU4bBxL49z4GyXWRJqglePwARAQAB
+tCBKYWt1YiBKZWxpbmVrIDxqYWt1YkByZWRoYXQuY29tPokCOAQTAQIAIgUCXs+z
+hgIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AACgkQbDW5kwm1+mJqRxAAtvq9
+NevjleUPVMJhz3X5pprhCNM/8LGSbhE284tDYv0inT9huKaMeaw/hQvSSoniac87
+zb0S5JDWYHEZ6PhOs7ZASuSwtBp3SdGtjQTMJMSuAHvEjSvBR+0EkCu+85oJxwDS
+wMMNEy1xubsyqhETD6DF/wudDr5r8IpB0A4vzFmaFv5wdbdnywd/sQzU9Fnl7GyX
+/1Wgd94dIiznr7fdxJtsdphw4WzoJf5EYTdozs+biVhnz+NJuniTjg8IKoEOl7oY
+Nrmqwfijw7FqgcbuXb9UAsxaVLFHZl3GVhXAmbQoz4io3PVw5BR+p+zeWvndeONu
+jndE8QN3PyFb/WHyWRnCv7goUb5uLZU4aDqf34PP8fral3HBaaaXB53NybvJVjEU
+nMzvjpVQj4J0yFzy+NlFwJb3oT03EDNkEK+SQrKHv4xz/atgXbfFYZ8oynCkypmt
+aw/udPuLpy2dBY6wTAhSvdzkiD9swgbgX2idNLXfoeU1AaiKd+mez3X51Arf49pn
+nL5wDCBkT2BUwX41ntIGgrNMcMNFbfNt99kUNaZ15oWI9Ia3DyWMxvgmDg4Ev8m8
+pGHH3Hq3eueyPySdJh+I8x+ipyIpFW+7AYS49L4LI7A7jFs8Nas22Qi6RoTFV3ep
+0qkEL4Lgrh8ooxhhSLOmsfyJDiXAHiz5sVUZ4wXRymrKaAEQAAEBAAAAAAAAAAAA
+AAAA/9j/4AAQSkZJRgABAQEBLAEsAAD/4gKwSUNDX1BST0ZJTEUAAQEAAAKgbGNt
+cwQwAABtbnRyUkdCIFhZWiAH5AAFABwADQASAABhY3NwQVBQTAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWxjbXMAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA1kZXNjAAABIAAAAEBjcHJ0AAAB
+YAAAADZ3dHB0AAABmAAAABRjaGFkAAABrAAAACxyWFlaAAAB2AAAABRiWFlaAAAB
+7AAAABRnWFlaAAACAAAAABRyVFJDAAACFAAAACBnVFJDAAACFAAAACBiVFJDAAAC
+FAAAACBjaHJtAAACNAAAACRkbW5kAAACWAAAACRkbWRkAAACfAAAACRtbHVjAAAA
+AAAAAAEAAAAMZW5VUwAAACQAAAAcAEcASQBNAFAAIABiAHUAaQBsAHQALQBpAG4A
+IABzAFIARwBCbWx1YwAAAAAAAAABAAAADGVuVVMAAAAaAAAAHABQAHUAYgBsAGkA
+YwAgAEQAbwBtAGEAaQBuAABYWVogAAAAAAAA9tYAAQAAAADTLXNmMzIAAAAAAAEM
+QgAABd7///MlAAAHkwAA/ZD///uh///9ogAAA9wAAMBuWFlaIAAAAAAAAG+gAAA4
+9QAAA5BYWVogAAAAAAAAJJ8AAA+EAAC2xFhZWiAAAAAAAABilwAAt4cAABjZcGFy
+YQAAAAAAAwAAAAJmZgAA8qcAAA1ZAAAT0AAACltjaHJtAAAAAAADAAAAAKPXAABU
+fAAATM0AAJmaAAAmZwAAD1xtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAEcA
+SQBNAFBtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAHMAUgBHAEL/2wBDABsS
+FBcUERsXFhceHBsgKEIrKCUlKFE6PTBCYFVlZF9VXVtqeJmBanGQc1tdhbWGkJ6j
+q62rZ4C8ybqmx5moq6T/2wBDARweHigjKE4rK06kbl1upKSkpKSkpKSkpKSkpKSk
+pKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKT/wgARCACVAGkDAREA
+AhEBAxEB/8QAGQAAAgMBAAAAAAAAAAAAAAAAAAECAwUE/8QAFgEBAQEAAAAAAAAA
+AAAAAAAAAAEC/9oADAMBAAIQAxAAAAHTAAEVFZIvGAAAABxmcUQAWVqHYAAAHMYg
+RJUgIVapoAAAZEcy2SzWQitKrKrNs6wADGilb5qxWBCKbnnue6tYAAx4qmrlmrAg
+VpRc9tmoACMSVKycty1pBEU2a9nUAEDIzuQrGspUkLJS02aR2XIAjJmyGsEasYEU
+67O25AAzpaJqxYDJDKyKaVzfYABAyc6m0kkIktSdNzoWAAAjJlc1FWoyyuzYuZAA
+ABnxytWSipIXPUmhQAAAGZLzklZWga1lwAAAVGbnTURUojZs2MAAiURwzQtgwIlN
+mnc3gAHKRAJQYgCxFh0DA5BDJDGAiBAgSLzoKxDAAABCABlp/8QAJBAAAgEEAgEF
+AQEAAAAAAAAAAAECAxESIAQQMRMhMDIzFEH/2gAIAQEAAQUC1lUhE/opirQl8VWs
+oE6056RnKBRrqe9eeFPyW2oV8tuTK8xIsYoxQ4jXXHnnDTk/rEWrGjh/bTlfrHZj
+OIvfStJzmul032zjxtS7l9WvZeB+L+xbq15UZPWYumWF3HzR+2nIjZrpjF0yJSja
+Ok/eK2ZQhdbSWMixbvy0rLbkP3v231x43+Ct+lhF+6StT2nNQL5MsWLH+rxpdEqq
+Q7yer8053WkvsYowRgYGJijFIZGppL7fEm0Rm2zEwRgjExMTExMEYI9NHpoUEmf/
+xAAcEQACAgIDAAAAAAAAAAAAAAAAEQFQMEACECD/2gAIAQMBAT8Bslrxurwu5zcs
+02EVLGMYx0f/xAAXEQEBAQEAAAAAAAAAAAAAAAARAFBw/9oACAECAQE/AexkRGJ/
+/8QAIhAAAQMEAQUBAAAAAAAAAAAAAREgMAACITFREjJAYXGB/9oACAEBAAY/Am5u
+rdYuiQZNbZg0h7n+4EvOXdPEOdiYt/Ibi1YfrCkSTFqwFxgU6ejkpJTdMH5omTml
+L0u20xIZ8Ungf//EACMQAAIBAwUAAwEBAAAAAAAAAAABERAhMSBBUWFxMJGxoeH/
+2gAIAQEAAT8h0tIUmNP+R1CJ7E5+F7AFkcFwjJFH15CHD+tbGNOGshttLu6IIIpM
+OUfSl86mysBJUI8HUdAgjo3nEac/ioQVGNCkA1vrSsehgIVGh1MU2zpkFRshLUdr
+ckdsyW9xmZFc3aJciLUiBKCxFQSlDDVhFcveNNra7MKJcbJGFGZz+bT0FmAkZErG
+I1RuDL2XfSiSfFLYle4xSiZQ59JlzrnvqZqJDdiGpMsi02WtEVvAgnIrUG7ExixZ
+fBe/hyQww2RYgPWtFeOS7sggM2DUoluWItLU4buW9XDhEoEiCBok7CAsfrSwQnlH
+Wex7PR7GSwEJYVaz2Jziq3UTJJJJGxsdMgFlkqNW5udjPQjyyPZHsj2R7PQ7mdjO
+xkgTdP/aAAwDAQACAAMAAAAQAAkkgAAAECzAAAAh67cgAASff5kAAVogX8AAychk
+wAEUiaXQAE4oCsGAEhH4sRAAfuVo2AAHjbiwAAhXVUAAAHjyPAAAFoFMAAAjSioA
+AHZKSAAAWL7+KkAiyySWyAkAkkkAg//EAB0RAAMAAwEBAQEAAAAAAAAAAAABERAg
+MCExUEH/2gAIAQMBAT8Q/BhOaEiIhMGuCFq+K6se0FhYeXs/gkOiTDVQhCeizW+C
+ITEJhfdifkFh6MQ1eyZT6QuGy/zdMQoeDKN8EUTw2Ub4J4QTLygkQhCD3Sols+FF
+6ai/hf/EABsRAQADAQEBAQAAAAAAAAAAAAEAETAgEFBA/9oACAECAQE/EPo3Lg4P
+RjX6iMGo+EXy4Oxy4moduz5XgeGDs8nay9qJTgV8P//EACgQAQACAQMEAgICAwEA
+AAAAAAEAESExUWEQIEFxgZGh4TCxwdHw8f/aAAgBAQABPxDsX9zgUmqfUA1vAoVu
+GgE/uALETj+E7QdS6D3CdU7ELWcwG00loM8nh+IAc2vHp33HgJHzKZVhYgzEv5j9
+IlEESUjY7QBUE1VR+3cnS6qK8pHYIO0HggQKpBXRKVkMMfLGwd+e2hRV5KgwdAEo
+qDpaksMovmb189og7D+UgwjN4dpXQNEyuHKQlmQA2di0LHFOYtgjUCNGLX1FdVQV
+XB4oljN+JYwxUiA3yX/x+OzAaqPaA32CfKi4GKbGU8gfmWKwF3T6iU4Bsqpbd1j1
+VYZlgpBZfY5KgWxMzMUlxknlhGFbE08RYmC6IQKhodpKV430zIQqzGXr4jslohFc
+twgixDZs4mB6Wxt2iZYvxxFgjaEwFQiaksxzZqRqjBvcf67ksTeK9oN+niGGtQdu
+cTRhv5ihlmJUoTa0QxlAHfdpgUT22MolOqmDTEKqOiDK2Ttv/BUh4AxR/RKfDMNU
+y7SKyfqYGqyvz3jGhdA1Y+qLvUMJcaQOfuUWCNrZBAoagHbTabI6ij8H3HSKr9TC
+lhNMcKmCChezSoizCW9oAuYjon2Qfb6YvoiANPxn/NSvlfUpZt+ZoGcwDlBAMAP2
+gAUI6J1yfMLJwQk5ym8pA6KuJHxUrx4+pk9vJ66N02egcnRnLHLHLHLCnmF/0z/3
+Sf8AaQ0wm/T/2YkCOAQTAQIAIgUCXs+8lgIbAwYLCQgHAwIGFQgCCQoLBBYCAwEC
+HgECF4AACgkQbDW5kwm1+mKtXQ/+MEPsOs4J0K0CBLgX25Qz0y1Hi4G+n7x1W5oZ
+fft8yvxbmsMT7BjMGJh4jNzdQndAbY26H5U3s5OtlL1w2kMqt7b7/o9BPjL+ndIj
+ujgkSrOh5xaUhPBbLaGmhiYSzjvLsXovRGl41D+wneFDJ3H5fonFd78gH5+2k5ak
+7mgYgOt0GhTeEFNxBgv1OMMxZJ+aiIiApMFVtc4q2zLWxzRNyMeTUAKhKPTz5Dz5
+LCrnpx391e4gx9pTqY24mADVRRrDtP7YdgsMDP0nXRqTbyRsc/0wcUdl9/czVRvz
+SADCZon3q7gb7ar3Jfr/5CFdN/2AQXw/IEqkF3pST900s+1WMZhOOQ/FDt/J6ETs
+q6cL2487N7QASeGfF7cczVOKQU1tsBhpk2Wqxe0c8KnsBN8Klq62g1qs7sJ8V6vv
+s64QLOgsekM1ep6w9n79Z5mxRG2KTYyg03S2jeEJklAGm1wFDgIDL7kuVYplRFTQ
+LG7YaRI1IzRi6JCeYpF4tlvNfhGuvtzX8ADHk+yYbnksgXixLKBtY+9jqQEvtcSG
+8xrrHcHj6kQs0IlRRKCnhpbegve7TkNBZUkOBl235c8/YNOISAW1ls5Lqxd2qk9j
+n98L7r0dT2ylyYR1pUIl4g4gLL6wN8VM5O/RNypcPtMhmSjikoD3sviXN6RAQ0Vy
+eL39geS5Ag0EXs+zhgEQALiuGiBqVzOlpSaCcwOREam/tYWCm/3o9AEoKhZ8VNOw
+Hy9bQZ7Qnty04Z686NGq6kOc0rXJxO5dSeqiA4OfhmkpiheD3GgEKauVcCN1EDtn
+0Yb0+/lJWBqHiXAa+haEUNsRkZdbLAaCBY6aG3S8Y9hSb1dqbed9QoDdC5DljqlT
+JUzRbQL3fFBlsYejtWTNWUHP9OdeUTIzdPCf2iENMo+PYZG3LxycwQMza0TDzmuV
+SLRbBYxLjYwzjVoiI7RYvV+aFs2HZSgtA0P2BmC+pjleeSfWeHiNAZxPH6Steb8T
+MOL/AydLrMcGnjleREco1YUNL8ho2zUAv0h+S3bzGhb7gtkOxVzu6G1EHoflTCrw
+dsAfeA8WCBZM+KLqLJcn85ehH7/N0ZBy/Y5Bu76JitYO/DJvqAR6jSiR2fJBQLiS
+EC3SOvhWwJEYexNWI3eSpDYvxdjQ33tFz6QTUHneifs1aByl6MH0WxRDYspIs1c7
+X6afqIWrVUsXmjh/IT18hGOEdVDZ8ktK1U2adQP32OmZdQ5ij86Ydyypr9cwHK97
+6VdttRLNKs7KElFempkHtde7ue+XylrzeMYQj1Mmtw4baL8jxg4AHmk06U3VEaT7
+PQIJphZMwLMes3hk4e47b3C8aeCAAYA1sfma4rCU8hTbrAoSV5qhDCDbRZiAhF0B
+ABEBAAGJAh8EGAECAAkFAl7Ps4YCGwwACgkQbDW5kwm1+mIrNw/+LNy8YHShVO+E
+uQeia3E0cVu6/qCyM/UdjCfaSYH9NwQldHUbed70IY72MqYfk+sqToHsGaCAoSef
+CSRLsELQ7sAMsfqvwNb8dibON6q9Ju5JESUwu4txmVlLlkXFlyxgk/fTeBTT89wT
+7ZYXjSVa75WRDDwJo8BLJ1N726UxVDcUm6F4Gb+elaMi0jVGClKVG/ZHLgWRlfK4
+8dp1a2CPrqvRAeGvnqyQy4q6UtV1FcwYlxzl3G2ubB8YjlQg1FG7CoSfgHPZFZ+f
+PtCvW3kUjeTOZlDQ8UNEsyvUtwL+ntKCpKwxRdAFLWbfVgl1HRtcK0eXQf+Nepn5
++lRgNRfUlzFInYUkn0QEFJQQgAmdxuQVLutLzgchxR94MBr6c18jhu+95qoKIfNp
+eSaOpxDZx9WqnMCsBmns+YKmdaFaj5MukzPXIQxw6o9Ez+AgPY9VRmIDvaK/0BJr
+gW/deFGcF12tMHzaQ4+It7eidDezYn6CWqvRAX45wpW5ig7GUXm5Xp8MSq9HUHpv
+xSESDao996QgfR6lFJ7YIy+cQcCS5ynHcJkpnLGdS0G8UzN4iYT+zAsnOybNrETF
+xFPsTAddu9c+sOjRUHUF8r9kGQZ3eKxCp3rNfELUVTjyGv2yOpun+Z1xHAeLIq5M
+Eq4PBxKJz/7TpPbxn6pwUaSrLdJBkXo=
+=Vl0Y
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-gdb/PKGBUILD b/x86_64-linux-gnu-gdb/PKGBUILD
new file mode 100644
index 0000000000..5ba374c75a
--- /dev/null
+++ b/x86_64-linux-gnu-gdb/PKGBUILD
@@ -0,0 +1,49 @@
+# Maintainer: Xiaotian Wu <yetist@gmail.com>
+
+_target=x86_64-linux-gnu
+pkgname=$_target-gdb
+pkgver=13.1
+pkgrel=1
+pkgdesc='The GNU Debugger for the 32bit and 64bit x86 target'
+arch=(loong64 x86_64)
+url='https://www.gnu.org/software/gdb/'
+license=(GPL3)
+depends=(xz ncurses expat python guile gdb-common mpfr libelf source-highlight)
+makedepends=(boost)
+options=(!emptydirs)
+source=(https://ftp.gnu.org/gnu/gdb/gdb-$pkgver.tar.xz{,.sig})
+validpgpkeys=('F40ADB902B24264AA42E50BF92EDB04BFF325CF3') # Joel Brobecker
+sha256sums=('115ad5c18d69a6be2ab15882d365dda2a2211c14f480b3502c6eba576e2e95a0'
+            'SKIP')
+validpgpkeys=('F40ADB902B24264AA42E50BF92EDB04BFF325CF3') # Joel Brobecker <brobecker@adacore.com>
+
+build() {
+  cd gdb-$pkgver
+
+  mkdir -p build && cd build
+  ../configure \
+    --target=$_target \
+    --prefix=/usr \
+    --enable-languages=c,c++ \
+    --disable-multilib \
+    --enable-interwork \
+    --with-system-readline \
+    --disable-nls \
+    --enable-source-highlight \
+    --with-python=/usr/bin/python \
+    --with-system-gdbinit=/etc/gdb/gdbinit
+
+  make
+}
+
+package() {
+  cd gdb-$pkgver/build
+
+  make -C gdb DESTDIR=$pkgdir install
+
+  # Following files conflict with 'gdb'/'gdb-common' packages
+  rm -r "$pkgdir"/usr/include/gdb/
+  rm -r "$pkgdir"/usr/share/gdb/
+  rm -r "$pkgdir"/usr/share/info/
+  rm -r "$pkgdir"/usr/share/man/man5/
+}
diff --git a/x86_64-linux-gnu-glibc/PKGBUILD b/x86_64-linux-gnu-glibc/PKGBUILD
new file mode 100644
index 0000000000..f88ab5f72f
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/PKGBUILD
@@ -0,0 +1,126 @@
+# Maintainer: Xiaotian Wu <yetist@gmail.com>
+
+_target=x86_64-linux-gnu
+pkgname=$_target-glibc
+pkgver=2.38
+pkgrel=2
+pkgdesc='GNU C Library x86_64 target'
+arch=(any)
+url='https://www.gnu.org/software/libc/'
+license=(GPL LGPL)
+depends=($_target-gcc $_target-linux-api-headers)
+groups=(x86)
+makedepends=(python)
+options=(!buildflags !strip staticlibs)
+source=(https://ftp.gnu.org/gnu/libc/glibc-$pkgver.tar.xz{,.sig}
+        sdt.h sdt-config.h
+        reenable_DT_HASH.patch
+	)
+sha256sums=('fb82998998b2b29965467bc1b69d152e9c307d2cf301c9eafb4555b770ef3fd2'
+            'SKIP'
+            '774061aff612a377714a509918a9e0e0aafce708b87d2d7e06b1bd1f6542fe70'
+            'cdc234959c6fdb43f000d3bb7d1080b0103f4080f5e67bcfe8ae1aaf477812f0'
+            'cf9fe494f7ec69752a63d1b0a9ad689aa620888ae9b902b6383a6fbc7c1726a7')
+validpgpkeys=(7273542B39962DF7B299931416792B4EA25340F8  # "Carlos O'Donell <carlos@systemhalted.org>"
+              BC7C7372637EC10C57D7AA6579C43DFBF1CF2187) # Siddhesh Poyarekar
+
+prepare() {
+  mkdir -p glibc-build lib32-glibc-build
+  cd glibc-$pkgver
+  patch -Np1 -i "${srcdir}"/reenable_DT_HASH.patch
+}
+
+build() {
+  # remove hardening options for building libraries
+  export CFLAGS="-U_FORTIFY_SOURCE -O2"
+  export CPPFLAGS="-U_FORTIFY_SOURCE -O2"
+  unset LD_LIBRARY_PATH
+
+  export BUILD_CC=gcc
+
+  local _configure_flags=(
+      --prefix=/usr
+      --target=$_target
+      --host=$_target
+      --build=$CHOST
+      --includedir=/include
+      --with-headers=/usr/$_target/include
+      --with-bugurl=https://bugs.archlinux.org/
+      --enable-fortify-source
+      --enable-cet
+      --disable-nscd
+      --enable-kernel=4.4
+      --enable-add-ons
+      --enable-bind-now
+      --disable-profile
+      --enable-stackguard-randomization
+      --enable-lock-elision
+      --enable-multi-arch
+      --disable-werror
+  )
+
+  (
+    cd glibc-build
+    export CC=${_target}-gcc
+    export CXX=${_target}-g++
+    export AR=${_target}-ar
+    export RANLIB=${_target}-ranlib
+
+    echo 'slibdir=/lib' >> configparms
+    echo 'rtlddir=/lib' >> configparms
+    echo 'sbindir=/bin' >> configparms
+    echo 'rootsbindir=/bin' >> configparms
+    echo 'build-programs=no' >> configparms
+
+    # Credits @allanmcrae
+    # https://github.com/allanmcrae/toolchain/blob/f18604d70c5933c31b51a320978711e4e6791cf1/glibc/PKGBUILD
+    # remove fortify for building libraries
+    # CFLAGS=${CFLAGS/-Wp,-D_FORTIFY_SOURCE=2/}
+
+    "${srcdir}"/glibc-$pkgver/configure \
+        --libdir=/lib \
+        --libexecdir=/lib \
+        "${_configure_flags[@]}"
+
+    make -O
+  )
+
+#  (
+#    cd lib32-glibc-build
+#    export CC="${_target}-gcc -m32 -mstackrealign"
+#    export CXX="${_target}-g++ -m32 -mstackrealign"
+#
+#    echo "slibdir=/lib32" >> configparms
+#    echo "rtlddir=/lib32" >> configparms
+#    echo "sbindir=/bin" >> configparms
+#    echo "rootsbindir=/bin" >> configparms
+#    echo 'build-programs=no' >> configparms
+#
+#    "${srcdir}"/glibc-$pkgver/configure \
+#        --host=i686-pc-linux-gnu \
+#        --libdir=/lib32 \
+#        --libexecdir=/lib32 \
+#        "${_configure_flags[@]}"
+#
+#    make -O
+#  )
+}
+
+package() {
+  cd glibc-build
+
+  make install_root="$pkgdir"/usr/$_target install
+#  make -C glibc-build DESTDIR="${pkgdir}" install
+  install -Dm644 "${srcdir}"/sdt.h "${pkgdir}"/usr/$_target/include/sys/sdt.h
+  install -Dm644 "${srcdir}"/sdt-config.h "${pkgdir}"/usr/$_target/include/sys/sdt-config.h
+#  cd lib32-glibc-build
+
+#  make install_root="$pkgdir"/usr/$_target install
+#  make DESTDIR="${pkgdir}" install
+
+#  # Dynamic linker
+#  install -d "${pkgdir}"/usr/lib
+#  ln -s ../lib32/ld-linux.so.2 "${pkgdir}"/usr/lib/
+
+  rm -r "$pkgdir"/usr/$_target/{etc,usr/share,var}
+}
diff --git a/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc b/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc
new file mode 100644
index 0000000000..f1de42479a
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc
@@ -0,0 +1,54 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBFef5BoBEACvJ15QMMZh4stKHbz0rs78XsOdxuug37dumTx6ngrDCwZ61k7n
+HQ+uxLuoQvLSc6YJGBEfiNFbs1hvhRFNR7xJbzRYmin7kJZZ/06fH2cgTkQhN0mR
+BP8KsKKT+7SvvBL785ZfAhArWf5m5Tl0CktZ8yoG8g9dM4SgdvdSdzZUaWBVHc6T
+jdAb9YEQ1/jpyfHsQp+PWLuQZI8nZUm+I3IBDLkbbuJVQklKzpT1b8yxVSsHCyIP
+FRqDDUjPL5G4WnUVy529OzfrciBvHdxGsYYDV8FX7fv6V/S3eL6qmZbObivIbLD2
+NbeDqw6vNpr+aehEwgwNbMVuVfH1PVHJV8Qkgxg4PqPgQC7GbIhxxYroGbLJCQ41
+j25M+oqCO/XW/FUu/9x0vY5w0RsZFhlmSP5lBDcaiy3SUgp3MSTePGuxpPlLVMeP
+xKvabSS7EErLKlrAEmDgnUYYdPqGCefA+5N9Rn2JPfP7SoQEp2pHhEyM6Xg9x7TJ
++JNuDowQCgwussmeDt2ZUeMl3s1f6/XePfTd3l8c8Yn5Fc8reRa28dFANU6oXiZf
+7/h3iQXPg81BsLMJK3aA/nyajRrNxL8dHIx7BjKX0/gxpOozlUHZHl73KhAvrBRa
+qLrr2tIPLkKrf3d7wdz4llg4NAGIU4ERdTTne1QAwS6x2tNa9GO9tXGPawARAQAB
+tClDYXJsb3MgTydEb25lbGwgPGNhcmxvc0BzeXN0ZW1oYWx0ZWQub3JnPokCQQQT
+AQIAKwIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4ACGQEFAls7bO0FCQdd78kA
+CgkQFnkrTqJTQPijtg//d+nIhSrlAadHxlKZpsFyS3pWQgybSfZnPQVcP1BYfTIA
+SpjqHi4idXxVw79AHOAagL769GMy7QUQo+jFrE41Brt7/9oBbD6Gy0HVtOWcdQtD
+KEWFCxKGU7utP05cOLxBfbDPpPn9zSXcJHIKiSrx91gcxTL9xCDribjLFmn6Zcef
+irkNNzlFwgEXurL1x8+e62gANIE/QuKUkFe/2w284sHZsdk6pdRdnH3m3WCLm/HR
+beeJE/Qmkb9pFzLBr/dz/43owjfc+GIn4i3+FJZVwgM5qZW2NIBClt8rQ7nl4+cH
+EMryH8SuIDrluBG0pUDMz9pyHzb1oaC+Jb+WmywBZcHxibj/qAsYGlD/vQXjAGvA
+3tIpheKvU6oC0O0KqkaYeb4Hf/kmL19NKdEexuD8brcZKi69JZowTIbCPrwXhUcZ
+wyqP8eXaHX/wPYGiwhvaLoVtjXZ8Qqb2ZRw1aeyWM3IWDuRAyPCCgrRg80LvNu0i
+vBIKQTb2ZZAFH33oAYSz+rf6FW9PX8/AWJP3spo3W6a7rJ03SqqQOJKsTF6LxcEg
+mvPun0uk/h/TWid42xkTgEx121VA/j5srkqZ5mhYvB39uUtDT4jakx2EziommvqD
+iPx10rHRT/vh3MWdMkkCUITywOJVyG+iYtTM/IbppJJJPj0pISR17ydMEko7GwC0
+KkNhcmxvcyBPJ0RvbmVsbCAoV29yaykgPGNhcmxvc0ByZWRoYXQuY29tPokCPgQT
+AQIAKAIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AFAls7bPMFCQdd78kACgkQ
+FnkrTqJTQPjDng/+OWH1/ORl2HmBfvvWgNALtNKi6uaqbLqne7DKuWCMRKLytgHg
+uXOdB3plVFIQy8TX42johGF7LbaSIICGDCZFh0hu8xFgy277kN26UmJPlI9gobBx
+Ew8SI37lAD2vkS/gDmwQlyfx8kqODL+S/33lUqY3Ak31/ZcPXQOa/6axwb2Zfbg3
+2rfqA9yFQmKvTct1L09xz64TQIlPq1r07Vaf4u1GATW6iZ2QtvOGyKzkA2VHoXaM
+1RIsLhw7LHM9W4Vto++e0ZWj8l83fGqwYXbE92T0aIlBwNxTOJUleARzL95g8Ouz
+0P++wJ3LoH1zkffLeC0Ms3K9nhrHqCHu1ISJ/QZITYi3fqEOoNYiMfvGaL914kFm
+sEpCC6O0FOR/A676eMfgk8M/jkSL40yZURDpOxPXSIdRHUFxJUKvPdMseB4u0fB6
+y+KbSntCMEeA9jKbpl6YZIrU85JFX9S94YV4PSV/qcjzWvJ3msKoK5+DFIDvhg7b
+gu/4p1mTk14B20olGcHVpkdfxBNszb31utzo83JHHyK9CZj3Y9Had5ALuVG7ZGsE
+MDMwIp8V3uf4DmlGDkuxkWmoQamsyP9dbBA1zkRbC+OL8KV9YNnyxvKZFRsZjamM
+BMC/uTi7nvoqJ6XWFBExaBvHm+khz7O+aDk9+LsrGx6yjUgOdd+F/GWKm8i0LENh
+cmxvcyBPJ0RvbmVsbCAoV29yaykgPGNvZG9uZWxsQHJlZGhhdC5jb20+iQI+BBMB
+AgAoAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAUCWzts8gUJB13vyQAKCRAW
+eStOolNA+JX6D/oCtzDbVyoF2mJf5wQZCnU1y2nHmg4Jio7enULyjIfEcpu0SXRm
+EbkTauZjFbQNw6di36dW67gsMke3wwiciKy+avR9E7cXaEtPllYOMAUYHLBHaViI
+f2Tv54Dc1N5gM7ej5CLxLMYajMOh3427l3MCVAO9ecvJBp8geO5AVN/6q0lPTznb
+rW9ZUeegxH6mUd2aQTRywQ9TmEiRSENmCHtwfIDoAT0CGTpI2V0gnv947KCXSS4+
+yITabbZot4hsB3eGPtJuxjVOvjD3KbL8Aon5QG+TpnORFnRCqhpNsGlQCpvoe12L
+B/+QtvWSOwlrriKrVcG7BHfwjb7CjadHN4pgVjNHohgkavSGGCJp8JVTWLTkmEny
+AjII3uXi6l4t7rGDpEZlGa6PRVVbAkat4T/BVpRcR7GDb5h2xaZ8b7JNqiC8BgB5
+J8i6duJrgr6Q6OmYmdL//Hs9SQiikrDjPrt2VlO2krma/aZaymUwGJjkny1B7ETZ
+bmvmT1qfVN+zXxbzsfuS4dyiYMhNgw8dEhXj/pwBmraxgqcosoZM85t+Fg7TtFif
+w3SDKCJF/v7BndYEdfWEGK6iZkItZ1feavZKfFl0eJ52tUMRnUrjpfBz+xiM/cBG
+HdDy0qnJfc1NZoulQLkloKEsITVLm3htRzBui8JWBtW7cI0TQ9TdMOtUvA==
+=JckU
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc b/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc
new file mode 100644
index 0000000000..67d0c96568
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc
@@ -0,0 +1,68 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBFMAZNMBCACeatEKl6YY9iEVxzS64bPbvJsA1mLE2XFWmKXyYzm58dFqPMa0
+OQQTKCxjFCOrc+LD2KtmypttcahKnk5Lk1lNU/lV2hCuR7jJ37sL+/TFQuMMgsLP
+ED6XU4/AYK3VUJvgGYLBnMVfpAGYLB5rnPvhFNx0r2KItO/CfiSEyD4g1Wu26SUA
+XGOp5hbSyBRGhju+8YJlhHBBjn3vZfw7IpwAWDVjK0crqMhGDXoZwK+ADUFY5NER
+AkT3Lb7d11F6+W4558WQZCYIWa3rZ62d986OE7+7xKmJUcLLWvlv5spgUmvotZ4D
+MzaKba+waY5ygXdGIpm5seVIEUCTaBIe6QVLABEBAAG0KFNpZGRoZXNoIFBveWFy
+ZWthciA8c2lkZGhlc2hAZ290cGx0Lm9yZz6JATgEEwECACIFAle0y0wCGwMGCwkI
+BwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEHnEPfvxzyGHUggIAJtLMvzHjRQi/Dg6
+oSMUMPtH7oEmgnk482dC4MGNl/bCtUV7VVIAtEN5TGvHxk1EKBNqj49Q+mZjef82
+iluW1RbXRY6+72yZ380yUC41SY+2hVOurJ//h3nvE+YHfO7QjV97yhIegc1kdwAr
+VtgNNApLxj5Nc2epT824uaSznVhwyAS2OIHFLmiMYuIW338uXVEug1XKBHwJ9Mpp
+Vblk4zapc9fRGvRG72ivbTGXNgcza+Kkx1IdA0XB2dEQaAE1XR0FOshKtpeSDRER
+wZ17+rRT8EjmkRsR7qm1uvPSNW7wMTtlj8ox/XuSyG0Coy1NRgqe5bi53ha1kBoK
+lLaxvyW0KFNpZGRoZXNoIFBveWFyZWthciA8c2lkZGhlc2hAcmVkaGF0LmNvbT6J
+AT4EMAECACgFAljc8cohHSBObyBsb25nZXIgZW1wbG95ZWQgd2l0aCBSZWQgSGF0
+AAoJEHnEPfvxzyGHhvoH/3KWe6JIWptc283au0UROXog3VdBFM3pE6SgMhOlFTM2
+r9fU24rvsTZgAMC7N7TxDil0JajMR6CYXoapDncuRs4u27D4uK/oUqHxL6CHuDKw
+GwURM9OjqV1kJY1gPYn9IZL3XHejg/YwxodGKK4jRJrL0prR5HSiR6QyQVgJ886D
+pOyHGqUwi5GGLZVAgwo9NBsr2GEmXMBmwGU44g+UuCSBiySvXwsBDDx2j34Q166t
+eoz+CHsIf4J3UPv2nIR6L0EvboTw39m55aTlyJ3dPHh8OeKwTAZCFzzOv0WxINcC
+fVWnL138fOkILt4u12Tv7D2K99PI/bYv1Xeal+zRtBO0KFNpZGRoZXNoIFBveWFy
+ZWthciA8c3BveWFyZWtAcmVkaGF0LmNvbT6JAT4EMAECACgFAljc8awhHSBObyBs
+b25nZXIgZW1wbG95ZWQgd2l0aCBSZWQgSGF0AAoJEHnEPfvxzyGHT5UH/0eeAKeR
+jobfz+8n98UgYzPZnihlS1yd8wznaVThm0cgqUp1hu3NIHuDiirr/VCRwxqP+hmJ
+ulwnQsJZwMllf2riFxbnnun4VBeocENxqE/m5EHLHjKkZklhYJSxbxWysXt7BYZb
+7+2S3zvlP7TCl2Hb7JhdJgUiOdondBBWAygA+uxolabetIv6X3v8evr+H87PMeOw
+lcaTxO2DXPAAsGDqxPJNSzVtiB5WEz6/2fRhsSGkisDSZTs5d/SL+lLS/FfRR2NT
+SN20+2/eepzMJM5fyoV9vVkytI3XxhQsepeMya34DEP92ltJnhnG/tToUvHVttOd
+IIVwZtDWIYwSg6a0KVNpZGRoZXNoIFBveWFyZWthciA8c2lkQHJlc2VydmVkLWJp
+dC5jb20+iQE3BBMBCAAhBQJVwGR2AhsDBQsJCAcCBhUICQoLAgQWAgMBAh4BAheA
+AAoJEHnEPfvxzyGHPs8H/3BzCbDhXI1txfjYCCUDJwDMtY2iqcHINZb7LPGJGDbx
+vuAwEgJcbrpK6QlbAqBXAg4hwBOzM+CLZRPRQ0g4hBsNQv1m+1WIJdbUfS9ZL4O9
+XPWSHYdKY0U+83XPyaValGOPMvSb2glOy2RRRC+CECN9CaQNbfJo91ZfmMk3waNb
+EpZ6Te04vZ9zdoRHz3D7qhr2U2Hazlvv6P9TGqKFRbEbMgMxBJM7L6WiiBhFIIyB
+D4N0NaB0xnc1JB1fwpfrRfRT9CPWeqmeXvnt0bGJAlzpG7tc3d0evva2mMVTooyq
+C8vXiCRsszcrG5NYOPYkgnt0ahqivVGXd/5F57tMtGq0PFNpZGRoZXNoIFBveWFy
+ZWthciAoUGVyc29uYWwpIDxzaWRkaGVzaC5wb3lhcmVrYXJAZ21haWwuY29tPokB
+OAQTAQIAIgIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AFAlTqsncACgkQecQ9
++/HPIYd28Qf9GfOK+VgDxaTzyixbGHWVWpk1nie67BdPvaorTb1jOUVsI582jKO+
+WDIKmI2PTFk9RwsN47q4s/QyN8oCjgYCYmuj+cse8Zh5acEJx2ENJRmP0QLJU3eK
+IQMxKJ7SG4UHyVBlqdchWryZq5KDGbAKf3WtMhgBzr825UnybImpZ7qPfJM8u78y
+jHPQvBKkKaAzhVwC65Lt+ESQA9+EZqvYeEwlYpAq5gmKKaD/QialyI/8FEsshBqw
+DdLzFMMDSjFxVukjiC5t3WJvFz52v+tzEXE0HZsV26p/LrPSA+cei/s75FBmw9qz
+4AM0YDOrEaeEG+CSSPLGgQYFYMp1EX+LvrRFU2lkZGhlc2ggUG95YXJla2FyICho
+dHRwczovL3NvdXJjZXdhcmUub3JnKSA8c2lkZGhlc2hAc291cmNld2FyZS5vcmc+
+iQE4BBMBAgAiBQJXtMr3AhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRB5
+xD378c8hh6doB/49aFm02tJCigXO7NrWRg7esjdQT66g8/LYYa2O3oHef6yACSmt
+bdJWpbZ5aReRledYcsI1WVtLgUHOchrspeKVO6KWAA5cFMDjRhMP0ArhxyL4KDqz
+U9AHpo3t9bYBGKNfH9Df3a0rVknkIB9NyENPAJpbJOdrND0TDWXhyOHLGsFC6WuE
+MtQudfSKMOhKFqySN0xZQUKlb11n2MT479gqlw7o/UPDy1gDtssG+zG9DD72Xkq3
+lbzzthM/e7ldcB8dMRoVLl9e1XZLoOfL/RAV/z9UxHezRjjYDV69EPxzKsgj+Id3
+AzmY9XeSqUeDWX17z6ARV0mh4uGePEriEmPCuQENBFMAZNMBCAChC0iEpSfa897U
+gTzZKcqsCD5+P/2QbhDSUHyFiFmDPa+9rAUR2YIopbDeyu4OhGu7y1FYL+fliJxO
+D1hUGqlEmzLm047IZ2iACHklNK7JcEUartgfR5kvJmqwflGHUPjLD1RCJ6wfLq6B
+X/CYe4ftjqrNjClDLGsqBckJFbcIdxyPwDE5Jiuorp9wIpDivifVi0MdkKn/ny42
+Of2SI26MG3fBitweIeJFD23lCOUzYOsXClcsVTzMvB2s32g8JVB34dOytFBVrWhb
+sPb97ZWonjkyx9A9HAV8mEwKoLOwuunMrccPkK+v/rh2vDDERA0MExBMMInJN4dI
+fryIQrFdABEBAAGJAR8EGAECAAkCGwwFAlTqs98ACgkQecQ9+/HPIYf1Mwf9ENd6
+C/2kvJJvbFzmvDNa6EOvTVKaqTBDhgVjuZ2ivMGdkCoeA9OQ3zWu8k+RYDyyPmuJ
+HToFm1tn4hP8DGDjIr46Bb3jnZcz6bHsOp9quf3L6KbKa4ghiVqM05ML9Xb/YH0y
+ge3QybfiAnWm6e2qIbzYucXmYDOBsQojta369CZ+zQEdy+baULFQ+Hg02vY4NKqv
+xIhfri0B/Ng+m7MbUv163u2/7Eyit4xOrLYbouuMOxd1+TNasJPFwrKgjQNWdnPs
+1pCxh+GXgf0a8WqbtB9P0wIQQbWw6OuuRmkW9zUisxuKyUo10hEHOK52v2O/7N3P
+bgdMo3cl19PJpx81Tg==
+=RgMn
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch b/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch
new file mode 100644
index 0000000000..87a2329d82
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch
@@ -0,0 +1,28 @@
+From 31915e55f9c34f6137ab1c5ac002375a2d5d4589 Mon Sep 17 00:00:00 2001
+From: Frederik Schwan <frederik.schwan@linux.com>
+Date: Fri, 4 Aug 2023 15:19:57 +0200
+Subject: [PATCH] force --hash-style=both to keep compatibility with old niche
+ software
+
+---
+ Makeconfig | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/Makeconfig b/Makeconfig
+index 77d7fd14df..2ae67c4beb 100644
+--- a/Makeconfig
++++ b/Makeconfig
+@@ -378,6 +378,10 @@ relro-LDFLAGS = -Wl,-z,relro
+ LDFLAGS.so += $(relro-LDFLAGS)
+ LDFLAGS-rtld += $(relro-LDFLAGS)
+ 
++hashstyle-LDFLAGS = -Wl,--hash-style=both
++LDFLAGS.so += $(hashstyle-LDFLAGS)
++LDFLAGS-rtld += $(hashstyle-LDFLAGS)
++
+ # Linker options to enable and disable DT_RELR.
+ ifeq ($(have-dt-relr),yes)
+ dt-relr-ldflag = -Wl,-z,pack-relative-relocs
+-- 
+2.41.0
+
diff --git a/x86_64-linux-gnu-glibc/sdt-config.h b/x86_64-linux-gnu-glibc/sdt-config.h
new file mode 100644
index 0000000000..733045a527
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h.  Generated from sdt-config.h.in by configure.
+
+   This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+   indicate whether the assembler supports "?" in .pushsection directives.  */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/x86_64-linux-gnu-glibc/sdt.h b/x86_64-linux-gnu-glibc/sdt.h
new file mode 100644
index 0000000000..c0c5a492cb
--- /dev/null
+++ b/x86_64-linux-gnu-glibc/sdt.h
@@ -0,0 +1,430 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+   This file is dedicated to the public domain, pursuant to CC0
+   (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H    1
+
+/*
+  This file defines a family of macros
+
+       STAP_PROBEn(op1, ..., opn)
+
+  that emit a nop into the instruction stream, and some data into an auxiliary
+  note section.  The data in the note section describes the operands, in terms
+  of size and location.  Each location is encoded as assembler operand string.
+  Consumer tools such as gdb or systemtap insert breakpoints on top of
+  the nop, and decode the location operand-strings, like an assembler,
+  to find the values being passed.
+
+  The operand strings are selected by the compiler for each operand.
+  They are constrained by gcc inline-assembler codes.  The default is:
+
+  #define STAP_SDT_ARG_CONSTRAINT nor
+
+  This is a good default if the operands tend to be integral and
+  moderate in number (smaller than number of registers).  In other
+  cases, the compiler may report "'asm' requires impossible reload" or
+  similar.  In this case, consider simplifying the macro call (fewer
+  and simpler operands), reduce optimization, or override the default
+  constraints string via:
+
+  #define STAP_SDT_ARG_CONSTRAINT g
+  #include <sys/sdt.h>
+
+  See also:
+  https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+  https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist)	\
+  _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING_1, (_SDT_DEPAREN_##n arglist)) \
+  _SDT_ASM_BASE
+# define _SDT_ASM_1(x)			x;
+# define _SDT_ASM_2(a, b)		a,b;
+# define _SDT_ASM_3(a, b, c)		a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e)	a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x)		.asciz #x;
+# define _SDT_DEPAREN_0()				/* empty */
+# define _SDT_DEPAREN_1(a)				a
+# define _SDT_DEPAREN_2(a,b)				a b
+# define _SDT_DEPAREN_3(a,b,c)				a b c
+# define _SDT_DEPAREN_4(a,b,c,d)			a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e)			a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f)			a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g)			a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h)		a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i)		a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j)		a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k)		a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l)	a b c d e f g h i j k l
+#else
+# define _SDT_PROBE(provider, name, n, arglist) \
+  do {									    \
+    __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+			  :: _SDT_ASM_OPERANDS_##n arglist);		    \
+    __asm__ __volatile__ (_SDT_ASM_BASE);				    \
+  } while (0)
+# define _SDT_S(x)			#x
+# define _SDT_ASM_1(x)			_SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b)		_SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c)		_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e)	_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "," _SDT_S(d) "," \
+					_SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n)		_SDT_ASM_STRING(_SDT_ASM_TEMPLATE_##n)
+# define _SDT_ASM_STRING_1(x)		_SDT_ASM_1(.asciz #x)
+
+# define _SDT_ARGFMT(no)		%n[_SDT_S##no]@_SDT_ARGTMPL(_SDT_A##no)
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT        nZr
+# else
+# define STAP_SDT_ARG_CONSTRAINT        nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x)              #x
+# define _SDT_ARG_CONSTRAINT_STRING(x)  _SDT_STRINGIFY(x)
+# define _SDT_ARG(n, x)			\
+  [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? 1 : -1) * (int) _SDT_ARGSIZE (x)), \
+  [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x)		_SDT_ASM_STRING_1(x)
+
+#define _SDT_ARGARRAY(x)	(__builtin_classify_type (x) == 14	\
+				 || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x)	(!_SDT_ARGARRAY (x) \
+				 && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x)	(_SDT_ARGARRAY (x) \
+				 ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+  static const bool __sdt_signed = false;
+};
+  
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT)						\
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x)						\
+  __typeof (__builtin_choose_expr (((__builtin_classify_type (x)	\
+				     + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x)						\
+  (!__extension__							\
+   (__builtin_constant_p ((((unsigned long long)			\
+			    (_SDT_ARGINTTYPE (x)) __sdt_unsp)		\
+			   & ((unsigned long long)1 << (sizeof (unsigned long long)	\
+				       * __CHAR_BIT__ - 1))) == 0)	\
+    || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x)	\
+  (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id)	%I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id)	%w[id]  /* gcc.gnu.org/PR80115 */
+#else
+# define _SDT_ARGTMPL(id)	%[id]
+#endif
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR	.8byte
+#else
+# define _SDT_ASM_ADDR	.4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP	nop 0
+#else
+#define _SDT_NOP	nop
+#endif
+
+#define _SDT_NOTE_NAME	"stapsdt"
+#define _SDT_NOTE_TYPE	3
+
+/* If the assembler supports the necessary feature, then we can play
+   nice with code in COMDAT sections, which comes up in C++ code.
+   Without that assembler support, some combinations of probe placements
+   in certain kinds of C++ code may produce link-time errors.  */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args)			      \
+  _SDT_ASM_1(990:	_SDT_NOP)					      \
+  _SDT_ASM_3(		.pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+  _SDT_ASM_1(		.balign 4)					      \
+  _SDT_ASM_3(		.4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)	      \
+  _SDT_ASM_1(991:	.asciz _SDT_NOTE_NAME)				      \
+  _SDT_ASM_1(992:	.balign 4)					      \
+  _SDT_ASM_1(993:	_SDT_ASM_ADDR 990b)				      \
+  _SDT_ASM_1(		_SDT_ASM_ADDR _.stapsdt.base)			      \
+  _SDT_SEMAPHORE(provider,name)						      \
+  _SDT_ASM_STRING(provider)						      \
+  _SDT_ASM_STRING(name)							      \
+  pack_args args							      \
+  _SDT_ASM_1(994:	.balign 4)					      \
+  _SDT_ASM_1(		.popsection)
+
+#define _SDT_ASM_BASE							      \
+  _SDT_ASM_1(.ifndef _.stapsdt.base)					      \
+  _SDT_ASM_5(		.pushsection .stapsdt.base,"aG","progbits",	      \
+							.stapsdt.base,comdat) \
+  _SDT_ASM_1(		.weak _.stapsdt.base)				      \
+  _SDT_ASM_1(		.hidden _.stapsdt.base)				      \
+  _SDT_ASM_1(	_.stapsdt.base: .space 1)				      \
+  _SDT_ASM_2(		.size _.stapsdt.base, 1)			      \
+  _SDT_ASM_1(		.popsection)					      \
+  _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1(		_SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1(		_SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_TEMPLATE_0		/* no arguments */
+#define _SDT_ASM_TEMPLATE_1		_SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2		_SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3		_SDT_ASM_TEMPLATE_2 _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4		_SDT_ASM_TEMPLATE_3 _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5		_SDT_ASM_TEMPLATE_4 _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6		_SDT_ASM_TEMPLATE_5 _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7		_SDT_ASM_TEMPLATE_6 _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8		_SDT_ASM_TEMPLATE_7 _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9		_SDT_ASM_TEMPLATE_8 _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10		_SDT_ASM_TEMPLATE_9 _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11		_SDT_ASM_TEMPLATE_10 _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12		_SDT_ASM_TEMPLATE_11 _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0()		[__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1)	_SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+  _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+  _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+  _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+  _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+  _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+  _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+    _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+  _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+    _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+    _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+    _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+    _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+   In assembly code the arguments should use normal assembly operand syntax.  */
+
+#define STAP_PROBE(provider, name) \
+  _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+  _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+  _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+  _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+  _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+  _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6)	\
+  _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+  _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+  _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_PROBE(provider, name, 10, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_PROBE(provider, name, 11, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_PROBE(provider, name, 12, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+   number of probe arguments is not known until compile time.  Since
+   variadic macro support may vary with compiler options, you must
+   pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+   The trick to count __VA_ARGS__ was inspired by this post by
+   Laurent Deniau <laurent.deniau@cern.ch>:
+       http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+   Note that our _SDT_NARG is called with an extra 0 arg that's not
+   counted, so we don't have to worry about the behavior of macros
+   called without any arguments.  */
+
+#ifdef SDT_USE_VARIADIC
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#define _SDT_PROBE_N(provider, name, N, ...) \
+  _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+  _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements.  You must compile
+   with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+   The STAP_PROBE_ASM macro generates a quoted string to be used in the
+   template portion of the asm statement, concatenated with strings that
+   contain the actual assembly code around the probe site.
+
+   For example:
+
+	asm ("before\n"
+	     STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+	     "after");
+
+   emits the assembly code for "before\nafter", with a probe in between.
+   The probe arguments are the %eax register, and the value of the memory
+   word located 4 bytes past the address in the %esi register.  Note that
+   because this is a simple asm, not a GNU C extended asm statement, these
+   % characters do not need to be doubled to generate literal %reg names.
+
+   In a GNU C extended asm statement, the probe arguments can be specified
+   using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments.  The paired
+   macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+   and appears in the input operand list of the asm statement.  For example:
+
+	asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+	     STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+	     "otherinsn %[namedarg]"
+	     : "r" (outvar)
+	     : "g" (some_value), [namedarg] "i" (1234),
+	       STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+    This is just like writing:
+
+	STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+    but the probe site is right between "someinsn" and "otherinsn".
+
+    The probe arguments in STAP_PROBE_ASM can be given as assembly
+    operands instead, even inside a GNU C extended asm statement.
+    Note that these can use operand templates like %0 or %[name],
+    and likewise they must write %%reg for a literal operand of %reg.  */
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...)		\
+  _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING, (__VA_ARGS__)) \
+  _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args)	\
+  _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING, (args)) \
+  _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n)	_SDT_ASM_TEMPLATE_##n
+
+
+/* DTrace compatible macro names.  */
+#define DTRACE_PROBE(provider,probe)		\
+  STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1)	\
+  STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2)	\
+  STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+  STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4)	\
+  STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)	\
+  STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+  STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+  STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+  STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+  STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+  STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+  STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+  STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/x86_64-linux-gnu-linux-api-headers/PKGBUILD b/x86_64-linux-gnu-linux-api-headers/PKGBUILD
new file mode 100644
index 0000000000..af8b70d94e
--- /dev/null
+++ b/x86_64-linux-gnu-linux-api-headers/PKGBUILD
@@ -0,0 +1,29 @@
+# Maintainer: Xiaotian Wu <yetist@gmail.com>
+
+_target_arch=x86
+_target=x86_64-linux-gnu
+pkgname=$_target-linux-api-headers
+pkgver=6.0
+pkgrel=1
+pkgdesc="Kernel headers sanitized for use in userspace ($_target)"
+arch=(any)
+url='https://www.kernel.org'
+license=(GPL2)
+makedepends=(rsync)
+source=(https://www.kernel.org/pub/linux/kernel/v6.x/linux-$pkgver.tar.{xz,sign})
+sha256sums=('5c2443a5538de52688efb55c27ab0539c1f5eb58c0cfd16a2b9fbb08fd81788e'
+            'SKIP')
+validpgpkeys=(
+  'ABAF11C65A2970B130ABE3C479BE3E4300411886'  # Linus Torvalds
+  '647F28654894E3BD457199BE38DBBDC86092693E'  # Greg Kroah-Hartman
+)
+
+
+package() {
+  cd linux-$pkgver
+
+  make INSTALL_HDR_PATH="$pkgdir/usr/$_target/" ARCH=$_target_arch V=0 headers_install
+
+  # clean-up unnecessary files generated during install
+  find "$pkgdir" \( -name .install -or -name ..install.cmd \) -delete
+}
diff --git a/xdg-desktop-portal-wlr/PKGBUILD b/xdg-desktop-portal-wlr/PKGBUILD
index 7834b50745..f5923ac7ad 100644
--- a/xdg-desktop-portal-wlr/PKGBUILD
+++ b/xdg-desktop-portal-wlr/PKGBUILD
@@ -11,7 +11,7 @@ arch=('loong64' 'x86_64')
 license=('MIT')
 provides=('xdg-desktop-portal-impl')
 depends=('xdg-desktop-portal' 'pipewire' 'pipewire-session-manager' 'libinih')
-makedepends=('meson' 'wayland-protocols' 'wayland' 'scdoc')
+makedepends=('meson' 'wayland-protocols' 'wayland' 'scdoc' 'mesa')
 optdepends=(
     'slurp: to choose which output to screencast using slurp'
     'wofi: to choose which output to screencast using wofi'
diff --git a/xf86-video-loongson/PKGBUILD b/xf86-video-loongson/PKGBUILD
new file mode 100644
index 0000000000..9b4078f5c2
--- /dev/null
+++ b/xf86-video-loongson/PKGBUILD
@@ -0,0 +1,46 @@
+# Maintainer: Jan de Groot <jgc@archlinux.org>
+# Contributor: Alexander Baldeck <alexander@archlinux.org>
+
+pkgname=xf86-video-loongson
+pkgver=0.2.0
+pkgrel=1
+epoch=1
+pkgdesc="X.org loongson video driver"
+arch=('loong64' 'x86_64')
+url="https://xorg.freedesktop.org/"
+license=('custom')
+depends=('systemd-libs' 'mesa')
+makedepends=('xorg-server-devel' 'systemd' 'X-ABI-VIDEODRV_VERSION=25.2')
+conflicts=('xorg-server<1.20.0' 'X-ABI-VIDEODRV_VERSION<25' 'X-ABI-VIDEODRV_VERSION>=26')
+groups=('xorg-drivers')
+source=(${url}/releases/individual/driver/${pkgname}-${pkgver}.tar.gz)
+sha512sums=('bdc6601c45f2e228f374dee36085c9fdea52cd2c95dd67d21d49b3f61daf38eaf04775478215ac472447edd70def661093b8385de3741f97aa670c2fc4a3ad01')
+
+build() {
+  cd ${pkgname}-${pkgver}
+
+  CFLAGS+=' -fcommon' # https://wiki.gentoo.org/wiki/Gcc_10_porting_notes/fno_common
+
+  # Since pacman 5.0.2-2, hardened flags are now enabled in makepkg.conf
+  # With them, module fail to load with undefined symbol.
+  # See https://bugs.archlinux.org/task/55102 / https://bugs.archlinux.org/task/54845
+  export CFLAGS=${CFLAGS/-fno-plt}
+  export CXXFLAGS=${CXXFLAGS/-fno-plt}
+  export LDFLAGS=${LDFLAGS/,-z,now}
+
+  ./configure --prefix=/usr
+  make
+}
+
+check() {
+  cd ${pkgname}-${pkgver}
+  make check
+}
+
+package() {
+  cd ${pkgname}-${pkgver}
+
+  make "DESTDIR=${pkgdir}" install
+  install -m755 -d "${pkgdir}/usr/share/licenses/${pkgname}"
+  install -m644 COPYING "${pkgdir}/usr/share/licenses/${pkgname}/"
+}
diff --git a/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch b/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch
new file mode 100644
index 0000000000..8eaa54598a
--- /dev/null
+++ b/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch
@@ -0,0 +1,32 @@
+From 4eda654b6099981294b35bd93c1e4e92e71f376a Mon Sep 17 00:00:00 2001
+From: "Liu, Chang" <cl91tp@gmail.com>
+Date: Wed, 8 Nov 2023 13:02:10 +0800
+Subject: [PATCH] modesetting: match against Multimedia Video Controllers as
+ well
+
+Some GPU devices such as those found in the Loongson 7A2000 bridge
+chips and 2K2000 SOCs identify as Multimedia Video Controllers
+(PCI class 0x4 subclass 0x0). These have standard KMS drivers in
+the kernel and the modesetting driver works flawlessly, so match
+against these types of devices as well.
+---
+ hw/xfree86/drivers/modesetting/driver.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/hw/xfree86/drivers/modesetting/driver.c b/hw/xfree86/drivers/modesetting/driver.c
+index 9a69452bd..69a2b683f 100644
+--- a/hw/xfree86/drivers/modesetting/driver.c
++++ b/hw/xfree86/drivers/modesetting/driver.c
+@@ -96,6 +96,9 @@ static const struct pci_id_match ms_device_match[] = {
+     {
+      PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY,
+      0x00030000, 0x00ff0000, 0},
++    {
++     PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY,
++     0x00040000, 0x00ff0000, 0},
+ 
+     {0, 0, 0},
+ };
+-- 
+2.42.0
+
diff --git a/xorg-server/10-modeset.conf b/xorg-server/10-modeset.conf
new file mode 100644
index 0000000000..d9bf1c0c32
--- /dev/null
+++ b/xorg-server/10-modeset.conf
@@ -0,0 +1,6 @@
+Section "Device"
+	Identifier "Generic Kernel Modesetting Device"
+	Driver "modesetting"
+	Option "kmsdev" "/dev/dri/card0"
+	Option "ShadowFB" "true"
+EndSection
diff --git a/xorg-server/PKGBUILD b/xorg-server/PKGBUILD
index 8cc685bd18..c8bc25ee3d 100644
--- a/xorg-server/PKGBUILD
+++ b/xorg-server/PKGBUILD
@@ -20,6 +20,8 @@ makedepends=('xorgproto' 'pixman' 'libx11' 'mesa' 'mesa-libgl' 'xtrans'
 source=(https://xorg.freedesktop.org/releases/individual/xserver/${pkgbase}-${pkgver}.tar.xz{,.sig}
         xvfb-run # with updates from FC master
         xvfb-run.1
+        10-modeset.conf
+        0001-modesetting-match-against-Multimedia-Video-Controlle.patch
 )
 validpgpkeys=('3C2C43D9447D5938EF4551EBE23B7E70B467F0BF'  # Peter Hutterer (Who-T) <office@who-t.net>
               '67DC86F2623FC5FD4BB5225D14706DBE1E4B4540'  # Olivier Fourdan <fourdan@xfce.org>
@@ -31,6 +33,11 @@ sha512sums=('ad5edacbe8c7e2ebe6b4a690af94c7ea5ebc781d00b0e58ae2d273c78ceee2fa00b
 
 prepare() {
   cd ${pkgbase}-$pkgver
+
+  # FS#73274
+  patch -Np1 -i ../xephyr_Dont_check_for_SeatId_anymore.patch
+  # fix modesetting driver for loongson and gsgpu
+  patch -Np1 -i ../0001-modesetting-match-against-Multimedia-Video-Controlle.patch
 }
 
 build() {
@@ -50,6 +57,7 @@ build() {
     -D xephyr=true \
     -D glamor=true \
     -D udev=true \
+    -D udev_kms=true \
     -D dtrace=false \
     -D systemd_logind=true \
     -D suid_wrapper=true \
@@ -111,6 +119,7 @@ package_xorg-server() {
 
   # distro specific files must be installed in /usr/share/X11/xorg.conf.d
   install -m755 -d "${pkgdir}/etc/X11/xorg.conf.d"
+  install -m644 -Dt "${pkgdir}/usr/share/X11/xorg.conf.d" 10-modeset.conf
 
   # license
   install -m644 -Dt "${pkgdir}/usr/share/licenses/${pkgname}" "${pkgbase}-${pkgver}"/COPYING
diff --git a/xsd/0120-g++10.patch b/xsd/0120-g++10.patch
new file mode 100644
index 0000000000..441b27897c
--- /dev/null
+++ b/xsd/0120-g++10.patch
@@ -0,0 +1,19 @@
+Description: Fix FTBFS with gcc-10
+Author: Boris Kolpackov <boris@codesynthesis.com>
+Origin: upstream, https://git.codesynthesis.com/cgit/libxsd-frontend/libxsd-frontend/commit/?id=5029f8665190879285787a9dcdaf5f997cadd2e2
+Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=957999
+Last-Update: 2020-09-10
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+Index: trunk/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx
+===================================================================
+--- trunk.orig/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx
++++ trunk/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx
+@@ -3,6 +3,7 @@
+ // license   : GNU GPL v2 + exceptions; see accompanying LICENSE file
+ 
+ #include <algorithm>
++#include <iostream>
+ 
+ #include <cutl/compiler/type-info.hxx>
+ 
diff --git a/xsd/xsd-c++17.patch b/xsd/xsd-c++17.patch
new file mode 100644
index 0000000000..6db0b25a20
--- /dev/null
+++ b/xsd/xsd-c++17.patch
@@ -0,0 +1,48 @@
+Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.cxx
+===================================================================
+--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.cxx
++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.cxx
+@@ -25,7 +25,7 @@ namespace cutl
+ //
+ //
+ void*
+-operator new (size_t n, cutl::share s) throw (std::bad_alloc)
++operator new (size_t n, cutl::share s) noexcept(false)
+ {
+   if (s == shared)
+   {
+Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.hxx
+===================================================================
+--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.hxx
++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.hxx
+@@ -31,7 +31,7 @@ extern LIBCUTL_EXPORT cutl::share shared
+ extern LIBCUTL_EXPORT cutl::share exclusive;
+ 
+ LIBCUTL_EXPORT void*
+-operator new (std::size_t, cutl::share) throw (std::bad_alloc);
++operator new (std::size_t, cutl::share) noexcept(false);
+ 
+ LIBCUTL_EXPORT void
+ operator delete (void*, cutl::share) throw ();
+@@ -61,7 +61,7 @@ namespace cutl
+     _ref_count () const;
+ 
+     void*
+-    operator new (std::size_t, share) throw (std::bad_alloc);
++    operator new (std::size_t, share) noexcept(false);
+ 
+     void
+     operator delete (void*, share) throw ();
+Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.ixx
+===================================================================
+--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.ixx
++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.ixx
+@@ -59,7 +59,7 @@ namespace cutl
+   }
+ 
+   inline void* shared_base::
+-  operator new (std::size_t n, share) throw (std::bad_alloc)
++  operator new (std::size_t n, share) noexcept(false)
+   {
+     return ::operator new (n);
+   }
diff --git a/xsv/PKGBUILD b/xsv/PKGBUILD
index 1fac1b7130..534dcf8217 100644
--- a/xsv/PKGBUILD
+++ b/xsv/PKGBUILD
@@ -23,7 +23,7 @@ pkgver() {
 prepare() {
   cd xsv
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/yaegi/PKGBUILD b/yaegi/PKGBUILD
index f1894a36f8..62bf9aee55 100644
--- a/yaegi/PKGBUILD
+++ b/yaegi/PKGBUILD
@@ -18,7 +18,7 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CXXFLAGS="${CXXFLAGS}"
   export CGO_LDFLAGS="${LDFLAGS}"
-  export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw -ldflags=-linkmode=external"
+  export GOFLAGS="-trimpath -mod=readonly -modcacherw -ldflags=-linkmode=external"
   go build -v ./cmd/yaegi
 }
 
diff --git a/yazi/PKGBUILD b/yazi/PKGBUILD
index 9ec862be5d..8938622e04 100644
--- a/yazi/PKGBUILD
+++ b/yazi/PKGBUILD
@@ -27,7 +27,7 @@ options=('!lto')
 
 prepare() {
   cd "$srcdir/$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zbus_xmlgen/PKGBUILD b/zbus_xmlgen/PKGBUILD
index ae3ce4b3de..d7105fe894 100644
--- a/zbus_xmlgen/PKGBUILD
+++ b/zbus_xmlgen/PKGBUILD
@@ -28,7 +28,7 @@ prepare() {
 
   cd $_project_name-$pkgname-$pkgver/$pkgname
   export RUSTUP_TOOLCHAIN=stable
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zellij/PKGBUILD b/zellij/PKGBUILD
index 98ff84fbed..8c31661200 100644
--- a/zellij/PKGBUILD
+++ b/zellij/PKGBUILD
@@ -16,7 +16,7 @@ options=('!lto')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zenith/PKGBUILD b/zenith/PKGBUILD
index cfc8248459..5d3858342a 100644
--- a/zenith/PKGBUILD
+++ b/zenith/PKGBUILD
@@ -16,7 +16,7 @@ sha256sums=('2cbcea2625cfa97c161b974ad412a47e330f7fd31bec0479e329ed3606cfc569')
 
 prepare() {
   cd "$pkgname-$pkgver"
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zip/PKGBUILD b/zip/PKGBUILD
index 201ddb348f..a727417948 100644
--- a/zip/PKGBUILD
+++ b/zip/PKGBUILD
@@ -25,6 +25,8 @@ options=('!makeflags')
 
 prepare() {
 	cd "${srcdir}/${pkgname}${_pkgver}"
+    CFLAGS=${CFLAGS/-Wformat -Werror=format-security/}
+    CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/}
 	sed -e "/^CFLAGS_NOOPT =/s/\$/ $CPPFLAGS $CFLAGS/" -i unix/Makefile
 	sed -e "s/^LFLAGS1=''/LFLAGS1=$LDFLAGS/" -i unix/configure
 	patch -p1 -i ../zip-3.0-currdir.patch
diff --git a/zola/PKGBUILD b/zola/PKGBUILD
index 9d86e9a119..646d503e4e 100644
--- a/zola/PKGBUILD
+++ b/zola/PKGBUILD
@@ -15,7 +15,7 @@ sha256sums=('c0e1711a68bc005c2e0ecc76a468f3459739c9e54af34850cb725d04391e19b5')
 
 prepare() {
   cd zola-$pkgver
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zoxide/PKGBUILD b/zoxide/PKGBUILD
index 9ef8dbe036..4cb5fb27ca 100644
--- a/zoxide/PKGBUILD
+++ b/zoxide/PKGBUILD
@@ -27,7 +27,7 @@ pkgver() {
 prepare() {
   cd "$pkgname"
 
-  cargo fetch --locked --target "$CARCH-unknown-linux-gnu"
+  cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
 }
 
 build() {
diff --git a/zram-generator/PKGBUILD b/zram-generator/PKGBUILD
index 2563aeb42d..72158f3ad8 100644
--- a/zram-generator/PKGBUILD
+++ b/zram-generator/PKGBUILD
@@ -15,6 +15,12 @@ sha256sums=('506d47acbabffa7013bb40a1f61c6edfa758a7bd55820d06ef49c7bc83dba762')
 
 build() {
   cd zram-generator-$pkgver
+  find -name Cargo.lock -exec rm -f {} \;
+  mkdir .cargo
+  cat > .cargo/config.toml <<EOF
+[source.crates-io]
+registry = "https://gitee.com/yetist/crates.io-index"
+EOF
   make CARGOFLAGS="--target-dir=target" build man
 }
 

From 916cb1ea212b9962d9ecf15e557d30af9568b159 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 19 Feb 2024 12:00:04 +0800
Subject: [PATCH 02/23] lua-bit32

---
 lua-bit32/PKGBUILD                                       | 2 +-
 ocaml/{ocaml-5.0.0-la64.patch => ocaml-5.1.0-la64.patch} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename ocaml/{ocaml-5.0.0-la64.patch => ocaml-5.1.0-la64.patch} (100%)

diff --git a/lua-bit32/PKGBUILD b/lua-bit32/PKGBUILD
index a747de9216..c0d2d5e395 100644
--- a/lua-bit32/PKGBUILD
+++ b/lua-bit32/PKGBUILD
@@ -21,7 +21,7 @@ makedepends=(lua
 options=(debug)
 _archive="$_project-$_compatrel"
 _rockspec="$_rockname-${pkgver%_*}-$_rockrel.rockspec"
-_rock="$_rockname-${pkgver%_*}-$_rockrel.linux-$CARCH.rock"
+_rock="$_rockname-${pkgver%_*}-$_rockrel.linux-`uname -m`.rock"
 source=("$url/archive/v$_compatrel/$_archive.tar.gz"
         "${url/github/raw.githubusercontent}/1e31f3ddc517b4e521c73f7d6eaecd5e1787daa6/rockspecs/$_rockspec")
 sha256sums=('d1ed32f091856f6fffab06232da79c48b437afd4cd89e5c1fc85d7905b011430'
diff --git a/ocaml/ocaml-5.0.0-la64.patch b/ocaml/ocaml-5.1.0-la64.patch
similarity index 100%
rename from ocaml/ocaml-5.0.0-la64.patch
rename to ocaml/ocaml-5.1.0-la64.patch

From c2a993d62618646c749a22fcd25b20264d3c3be8 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 19 Feb 2024 14:49:41 +0800
Subject: [PATCH 03/23] boost

---
 boost/PKGBUILD                |  7 ++---
 boost/boost-1.79.0-la64.patch | 55 -----------------------------------
 2 files changed, 2 insertions(+), 60 deletions(-)
 delete mode 100644 boost/boost-1.79.0-la64.patch

diff --git a/boost/PKGBUILD b/boost/PKGBUILD
index 8cc5cad354..5aae060550 100644
--- a/boost/PKGBUILD
+++ b/boost/PKGBUILD
@@ -21,13 +21,11 @@ makedepends=('icu' 'python' 'python-numpy' 'bzip2' 'zlib' 'openmpi' 'zstd')
 source=(https://boostorg.jfrog.io/artifactory/main/release/$pkgver/source/$_srcname.tar.bz2
         boost-1.81.0-phoenix-multiple-definitions.patch
         $pkgname-support-fn.contains-f-where-f-is-a-function.patch::https://github.com/boostorg/function/commit/7ca2310b15e3.patch
-        $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch
-        boost-1.79.0-la64.patch)
+        $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch)
 sha256sums=('6478edfe2f3305127cffe8caf73ea0176c53769f4bf1585be237eb30798c3b8e'
             '3ebf428ef6be090a7b56a233330375539ac429333b83708e28fe5db049cfecdb'
             '1b5998ee8fb389dd6df55a3684d29ffa37246bc007e8e6712bf2be6c7f745036'
-            'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee'
-            '0fb9188bf211deff0d48dfb7cef614bbdebcd7dccea6e8c015da5d691eda5d94')
+            'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee')
 
 prepare() {
   cd $_srcname
@@ -42,7 +40,6 @@ prepare() {
 
   # https://github.com/boostorg/ublas/pull/97
   patch -Np2 -i ../$pkgname-ublas-c++20-iterator.patch
-  patch -Np1 -i $srcdir/boost-1.79.0-la64.patch
 }
 
 build() {
diff --git a/boost/boost-1.79.0-la64.patch b/boost/boost-1.79.0-la64.patch
deleted file mode 100644
index 2b9602a900..0000000000
--- a/boost/boost-1.79.0-la64.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-diff --git a/boostcpp.jam b/boostcpp.jam
-index 082536e2a5..7565dae80d 100644
---- a/boostcpp.jam
-+++ b/boostcpp.jam
-@@ -634,7 +634,7 @@ rule address-model ( )
-     return <conditional>@boostcpp.deduce-address-model ;
- }
- 
--local deducable-architectures = arm mips1 power riscv s390x sparc x86 combined ;
-+local deducable-architectures = arm loongarch mips1 power riscv s390x sparc x86 combined ;
- feature.feature deduced-architecture : $(deducable-architectures) : propagated optional composite hidden ;
- for a in $(deducable-architectures)
- {
-@@ -645,9 +645,10 @@ rule deduce-architecture ( properties * )
- {
-     local result ;
-     local filtered = [ toolset-properties $(properties) ] ;
--    local names = arm mips1 power riscv s390x sparc x86 combined ;
-+    local names = arm loongarch mips1 power riscv s390x sparc x86 combined ;
-     local idx = [ configure.find-builds "default architecture" : $(filtered)
-         : /boost/architecture//arm
-+        : /boost/architecture//loongarch
-         : /boost/architecture//mips1
-         : /boost/architecture//power
-         : /boost/architecture//riscv
-Submodule libs/config 08dced51e9..5c177b2269:
-diff --git a/libs/config/checks/architecture/Jamfile.jam b/libs/config/checks/architecture/Jamfile.jam
-index 2ba54f9a..e8838b41 100644
---- a/libs/config/checks/architecture/Jamfile.jam
-+++ b/libs/config/checks/architecture/Jamfile.jam
-@@ -18,6 +18,7 @@ obj 64 : 64.cpp ;
- 
- obj arm      : arm.cpp ;
- obj combined : combined.cpp ;
-+obj loongarch : loongarch.cpp ;
- obj mips     : mips.cpp ;
- alias mips1  : mips ; # Backwards compatibility
- obj power    : power.cpp ;
-diff --git a/libs/config/checks/architecture/loongarch.cpp b/libs/config/checks/architecture/loongarch.cpp
-new file mode 100644
-index 00000000..5be8cb09
---- /dev/null
-+++ b/libs/config/checks/architecture/loongarch.cpp
-@@ -0,0 +1,11 @@
-+// loongarch.cpp
-+//
-+// Copyright (c) 2012 Steven Watanabe
-+//
-+// Distributed under the Boost Software License Version 1.0. (See
-+// accompanying file LICENSE_1_0.txt or copy at
-+// http://www.boost.org/LICENSE_1_0.txt)
-+
-+#if !defined(__loongarch__)
-+#error "Not LoongArch"
-+#endif

From b75f28c67a2a1e41b3c379e437588b21e5ee0fbc Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 19 Feb 2024 16:34:02 +0800
Subject: [PATCH 04/23] xorg-server

---
 xorg-server/PKGBUILD | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/xorg-server/PKGBUILD b/xorg-server/PKGBUILD
index c8bc25ee3d..bc969a1560 100644
--- a/xorg-server/PKGBUILD
+++ b/xorg-server/PKGBUILD
@@ -29,13 +29,12 @@ validpgpkeys=('3C2C43D9447D5938EF4551EBE23B7E70B467F0BF'  # Peter Hutterer (Who-
 sha512sums=('ad5edacbe8c7e2ebe6b4a690af94c7ea5ebc781d00b0e58ae2d273c78ceee2fa00b86d10479ad69da1b3233490619bae5a33db64c967c24bbfc5d5d39ddce1cb'
             'SKIP'
             '672375cb5028ba9cda286e317d17bd8c9a9039483e7f79c21f223fd08ba07655729e9f59a082f4b8f5d8de45a77a9e9affce1002fb8c6657e26ef1a490654e49'
-            'de5e2cb3c6825e6cf1f07ca0d52423e17f34d70ec7935e9dd24be5fb9883bf1e03b50ff584931bd3b41095c510ab2aa44d2573fd5feaebdcb59363b65607ff22')
+            'de5e2cb3c6825e6cf1f07ca0d52423e17f34d70ec7935e9dd24be5fb9883bf1e03b50ff584931bd3b41095c510ab2aa44d2573fd5feaebdcb59363b65607ff22'
+            '1aa711f4948cd1557d77bd47a64ea92be55cf737b8204214c6c3ae2ecd00dc6928fd7a789d1aa5faaff5d6162f895a41efd332e8f1710d0a7c9b33326b057ec5'
+            '0e55cc994dd8f1309c6ea40cb1f5b2d763d850a466c9d30bd8619fd68ce369b19f098d28ab684db1bd2758a6402654cf93188651e863f0e1be4eed8cf9b40e14')
 
 prepare() {
   cd ${pkgbase}-$pkgver
-
-  # FS#73274
-  patch -Np1 -i ../xephyr_Dont_check_for_SeatId_anymore.patch
   # fix modesetting driver for loongson and gsgpu
   patch -Np1 -i ../0001-modesetting-match-against-Multimedia-Video-Controlle.patch
 }

From e89488ef83290abafc3803b2dffd32d6503686c3 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Tue, 20 Feb 2024 19:37:42 +0800
Subject: [PATCH 05/23] libjxl

---
 libjxl/PKGBUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libjxl/PKGBUILD b/libjxl/PKGBUILD
index c491dde5c1..81f977eb81 100644
--- a/libjxl/PKGBUILD
+++ b/libjxl/PKGBUILD
@@ -82,7 +82,7 @@ package_libjxl() {
     
     DESTDIR="$pkgdir" cmake --install build
     install -D -m644 libjxl/{LICENSE,PATENTS} -t "${pkgdir}/usr/share/licenses/${pkgname}"
-#    mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar
+    mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar
 }
 
 package_libjxl-doc() {

From 11f7d223c21e7932fcdba090d32c4b4fdf27ebe0 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Tue, 20 Feb 2024 21:07:13 +0800
Subject: [PATCH 06/23] mediainfo

---
 mediainfo/PKGBUILD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mediainfo/PKGBUILD b/mediainfo/PKGBUILD
index e9f6488e15..6fa948cb16 100644
--- a/mediainfo/PKGBUILD
+++ b/mediainfo/PKGBUILD
@@ -14,8 +14,8 @@ license=('BSD')
 depends=("libmediainfo=$pkgver")
 makedepends=('wxwidgets-gtk3')
 source=("$pkgname-$pkgver.tar.gz::https://github.com/MediaArea/MediaInfo/archive/v$pkgver.tar.gz")
-sha512sums=('e5adb989bd9686d64c952794155993d7e7d9b0d81500e99cd9b7af61258bb71639ac1aa913dcd36ba51dca47e81ab8244de2bff37b603bd461cf13c42f59487a')
-b2sums=('9f0a1d1c0f9540be16963fb8f8d363f071efa25fcb2deb2494481d0eca58db79a29a6c5f8cc22eb01df30626ce44da7a0d921de24f6553e809e2588009445fa1')
+sha512sums=('33747cacd0657b67e7bc63596cb58cb4a8c6c2cab50c70b3075c1ffd12a9d165121a1dbcb2114a1e822273807341faa436181c3a0c3dfb1d98e45f120052d720')
+b2sums=('33b19a211b500caa0cb1af4ba17dfea70ffa1b169f7ff7a903614b53a08c54f6f8216f8a2e2bfffe02eef8733c741bc10af928111d336675634ce95418a5405d')
 
 build() {
   cd "MediaInfo-$pkgver"

From ae552bb24894b8af24540f3781777644e0b90d18 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 22 Feb 2024 11:44:00 +0800
Subject: [PATCH 07/23] v2ray

---
 v2ray/PKGBUILD | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/v2ray/PKGBUILD b/v2ray/PKGBUILD
index 87f2774636..6cbaf8d418 100644
--- a/v2ray/PKGBUILD
+++ b/v2ray/PKGBUILD
@@ -27,8 +27,7 @@ build() {
   export CGO_CFLAGS="${CFLAGS}"
   export CGO_CPPFLAGS="${CPPFLAGS}"
   export GOPROXY=https://goproxy.cn
-  go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
-  go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+  go mod edit -replace=github.com/quic-go/quic-go=github.com/quic-go/quic-go@v0.41.0
   go mod tidy
   go build -o v2ray ./main
 }

From 0dd9a83b4d25c32ad2f23bafa1d7caac51970998 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 22 Feb 2024 19:26:05 +0800
Subject: [PATCH 08/23] cargo-c

---
 cargo-c/PKGBUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cargo-c/PKGBUILD b/cargo-c/PKGBUILD
index ac384b4564..b27ca3b58f 100644
--- a/cargo-c/PKGBUILD
+++ b/cargo-c/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('a52bb78cf6db00aa1caf06c679cfece27357c84367d8ac167d715e05e5f5a778'
 
 prepare() {
     ln -sf "../${pkgname}-${pkgver}.Cargo.lock" "${pkgname}-${pkgver}/Cargo.lock"
-    cargo fetch --locked --manifest-path="${pkgname}-${pkgver}/Cargo.toml"
+    cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" --manifest-path="${pkgname}-${pkgver}/Cargo.toml"
 }
 
 build() {

From 5ca49dbd14ca2e3d56c4ae887cbef3244eb07960 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 7 Mar 2024 12:52:03 +0800
Subject: [PATCH 09/23] edk2

---
 edk2/60-edk2-loongarch64.json |  5 ++--
 edk2/PKGBUILD                 | 14 ++++-------
 edk2/relax_edk2_gcc14.diff    | 44 -----------------------------------
 3 files changed, 8 insertions(+), 55 deletions(-)
 delete mode 100644 edk2/relax_edk2_gcc14.diff

diff --git a/edk2/60-edk2-loongarch64.json b/edk2/60-edk2-loongarch64.json
index 4806bee426..b998701d8c 100644
--- a/edk2/60-edk2-loongarch64.json
+++ b/edk2/60-edk2-loongarch64.json
@@ -5,6 +5,7 @@
     ],
     "mapping": {
         "device": "flash",
+        "mode": "split",
         "executable": {
             "filename": "/usr/share/edk2/loongarch64/QEMU_CODE.fd",
             "format": "raw"
@@ -18,12 +19,12 @@
         {
             "architecture": "loongarch64",
             "machines": [
-                "virt-*"
+                "virt"
             ]
         }
     ],
     "features": [
-        "verbose-static"
+        "acpi"
     ],
     "tags": [
 
diff --git a/edk2/PKGBUILD b/edk2/PKGBUILD
index e684ce9b15..ef009286e7 100644
--- a/edk2/PKGBUILD
+++ b/edk2/PKGBUILD
@@ -30,7 +30,7 @@ makedepends=(
 options=(!makeflags)
 source=(
   git+$url#tag=$_commit
-  edk2-platforms::git+https://github.com/tianocore/edk2-platforms.git
+  edk2-platforms::git+https://github.com/tianocore/edk2-platforms.git#tag=4e34823e9c954c67a7b5b81799549d493d1235fa
   $pkgbase-softfloat::git+https://github.com/ucb-bar/berkeley-softfloat-3.git
   https://www.openssl.org/source/openssl-$_openssl_ver.tar.gz{,.asc}
   https://github.com/google/brotli/archive/v$_brotli_ver/brotli-$_brotli_ver.tar.gz
@@ -59,7 +59,6 @@ source=(
   82-edk2-ovmf-ia32-on-x86_64-csm-4m.json
   $pkgbase-202202-brotli.patch
   edk2-use-env-toolchains.patch
-  relax_edk2_gcc14.diff
 )
 sha512sums=('SKIP'
             'SKIP'
@@ -79,7 +78,7 @@ sha512sums=('SKIP'
             'b17d3ff5c9230c394ca4ee8229842c801b0cab3d88b546f2094dd0b42f2bc535f5bda3f9faee4b5418482185887648f906daaf0b7307c4c19747f5f0ab504f9a'
             '126822ef6198e87fb38014a5ba21969c9a163b41df3cdef6825317971ecc8df4a63099113e687634b88648acc93f24917d729e1c44295d2df7012288740307d3'
             'bbf663d539a985504d5fbc95552a2a60ac860a6bce4a62ecc551292d838b41cba3b5203f580a76a05e9f862ef98e7a3e5da39505c1f39d8ef48c08778fac584a'
-            'a6af6487c470e9af86022ee44ea53d2a0f513cacc413c9cc61eeec80ee2d1569daddf311ce8ccf4660d7899d7dd8119f1ef74953d91462c949c4d7ce8c129f16'
+            '66a0d97f0cc8b4c2184f235ace4fd372efa2cfa175178a05b40053c5e4ef887487e11813d3ff7193c4554c8841166c73dc4f930cb6ae04cd28125e51631d0a69'
             'b5829aaf5ebae0073de26695eddbda61d117fbfb5e3c9f169fade31127ceb9bbc332af760bf6033d90a277d44c095fc30fe0d69defd81fb1aaf82cba0cf6fb90'
             '2e03935b57fabbbac4493ba6d54ac5b68abfd75775a56c95f5ba8c4627ba38260a3691a335e597c65096c50ce5038389efbb41ef5822a1ff49a8f312d8e37f75'
             '6e91029d451c9d43c1488ce0e252e6abc18fb1da48b6938d6ae3644fce58c97da6fff6addc60740b1b9ed5e6b86e9d7e94ee0dd55ea73833a82401b4c6f8c936'
@@ -91,8 +90,7 @@ sha512sums=('SKIP'
             '692e5bdefb61ae7b8d6e2063f163e2b68136b2522d606806766186f10c5fae1f7583fd83cda52c235d0d8eb0651e5a711f505021a8d8d949d8dccfce7f0c82ac'
             'c699ad500f24569643a4581f4bb5be0e4a90d160f0b3ae7728cf8e27b39665983b80439ca7b853b1bd9a174c8c123cbaf7ed3cd4a17d6460f4fec670c62a1183'
             'd074c794796d17d77eed7c34201d93d7ef3f1322fe1ea4a2ddd7137fae884d49f94f465ee39cfd8346b026142668a41f5a8671e521409505dd6d002f71c0eebc'
-            '94d889b4bc1dacf6ab4543b5e6ac7f99a5ec71f0362577df9e49f6ed0af4275455d8fb2f1ce8c279b7e4ce8a24cafa2006446f9fe793ab60680dcd328abf0429'
-            '19c992e3c4d99a5335666e40b6619d3c701359db502722950217e7916ecb14901a5d14c400c8e871d91fe1477ab71849697bdad06014df3b13a59cbadfb0bf1d')
+            '94d889b4bc1dacf6ab4543b5e6ac7f99a5ec71f0362577df9e49f6ed0af4275455d8fb2f1ce8c279b7e4ce8a24cafa2006446f9fe793ab60680dcd328abf0429')
 b2sums=('SKIP'
         'SKIP'
         'SKIP'
@@ -111,7 +109,7 @@ b2sums=('SKIP'
         '01dbc4cad102535504eace2d9da225a481b62785d37365f1dea2d1210990ca6177485aa0134a074c09d253b539f12ae810706a77a46779ddb7dd4f1b9b934011'
         'f84ff505702e4b2a38b6fd23fbb732c25d3102a04bb6918b0cc3b3d7528a92626324199cea4ed91955aade98f308f1d1037255f26cc9ee21ace75fc6376e7df6'
         '04a7eb373d6ea1415d7cd6e8dea0d16b75cbb1fb88572a30b8ce9960dd0404adc7f25fce2ccfb103eb09405411dc4d4e0084236e4c814916d81e957dc6aedfd4'
-        'e869e1c5751a1691598c3247dcfbcadc6652ecc27cc26dc66cd9e1f7336ea7c4b5e757892137a259b50441d86b83939f01943113787bedb4318a42657dcaac59'
+        'e273fb4d50a98e8dcbc6e439579508d03f38e64fc69e5671e9a01a0e6591b0241cedfe07f9fe9eaedca07c96a118c1ba7bf5556e51888826874c7e0386446cc2'
         'b4fcc2351b2d77b85cdce35180353aef06900af1554479853bf915d27a756d4bbed50a50e85b72e2e7f4868e6dec3b9c5b27f743d7c112e24e4e0c50cd103a33'
         '1783b83c6e39c99feb59043c3cff48b24bef55d43949cd9a3097dfbee73a6cf511c180d610a52de876ccea9833fec46d7a88ebce8114e54620b9988232fb9bcc'
         '1d76eda20067c1bb9928b0304244ab5770a9c4e1f401a74d51da31a47f3a5d6e1e64b5394768cdae6a5bc396b68b6a32eb1a407e1c6377461dd2d5f2f5a2538d'
@@ -123,8 +121,7 @@ b2sums=('SKIP'
         '0ad956e3e662909abafd0b9a2b7ef12e35a8832183cb41e17dcafaa4f5db1e47ef20b3040268644daebb24f66c18b99de07f41e7d62089691c07de688a08f05a'
         'a44b5ffc35d78925ac7362ec2cf75475d02e05ed0b9e8771c909d090187aaff7436e8d856d58b8a56827990006b813c63318b60a8a7780844c829a2b13a502cf'
         '644c071dc4fbbccaa64b0b1babcad60395ffce1a7a317a6f5380eff44cbb886be5f29156a8e967ab02b508a33954fcf5602606b43362cc3bb1936a8cfc3a3c07'
-        '8c2e7b07d669e97388567f6a2d449a3f33fe98823d7a0807882b4a72f5d22c8e9e3141f0009abf2b398adb95562f8e281e6d53041e0c2ac36f178a320d5be55a'
-        '02869de544482f0d4d8a796cb94fc76491c49f00ab361218fd2bb6b480a974c8ce3e8706ab70f3d977724491db47bccfc0b7947edae1e57f46212ebf127750d6')
+        '8c2e7b07d669e97388567f6a2d449a3f33fe98823d7a0807882b4a72f5d22c8e9e3141f0009abf2b398adb95562f8e281e6d53041e0c2ac36f178a320d5be55a')
 validpgpkeys=(
   8657ABB260F056B1E5190839D9C4D26D0E604491  # Matt Caswell <matt@openssl.org>
   7953AC1FBC3DC8B3B292393ED5E9E43F7DF9EE8C  # Richard Levitte <richard@levitte.org>
@@ -143,7 +140,6 @@ prepare() {
   # patch to be able to use brotli 1.0.9
   patch -Np1 -d $pkgbase -i ../$pkgbase-202202-brotli.patch
   patch -Np1 -d $pkgbase -i ../edk2-use-env-toolchains.patch
-  patch -Np1 -d $pkgbase -i ../relax_edk2_gcc14.diff
 
   cd $pkgbase
 
diff --git a/edk2/relax_edk2_gcc14.diff b/edk2/relax_edk2_gcc14.diff
deleted file mode 100644
index 35901ff55b..0000000000
--- a/edk2/relax_edk2_gcc14.diff
+++ /dev/null
@@ -1,44 +0,0 @@
-diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c
-index d53ecb1767..8018d68db1 100644
---- a/BaseTools/Source/C/GenFw/Elf64Convert.c
-+++ b/BaseTools/Source/C/GenFw/Elf64Convert.c
-@@ -1778,7 +1778,11 @@ WriteSections64 (
-           case R_LARCH_TLS_LD64_HI20:
-           case R_LARCH_TLS_GD_PC_HI20:
-           case R_LARCH_TLS_GD64_HI20:
-+          case R_LARCH_32_PCREL:
-           case R_LARCH_RELAX:
-+          case R_LARCH_DELETE:
-+          case R_LARCH_ALIGN:
-+          case R_LARCH_PCREL20_S2:
-             //
-             // These types are not used or do not require fixup.
-             //
-@@ -2185,7 +2189,11 @@ WriteRelocations64 (
-               case R_LARCH_TLS_LD64_HI20:
-               case R_LARCH_TLS_GD_PC_HI20:
-               case R_LARCH_TLS_GD64_HI20:
-+              case R_LARCH_32_PCREL:
-               case R_LARCH_RELAX:
-+              case R_LARCH_DELETE:
-+              case R_LARCH_ALIGN:
-+              case R_LARCH_PCREL20_S2:
-                 //
-                 // These types are not used or do not require fixup in PE format files.
-                 //
-diff --git a/BaseTools/Source/C/GenFw/elf_common.h b/BaseTools/Source/C/GenFw/elf_common.h
-index ccd32804b0..d3a5303953 100644
---- a/BaseTools/Source/C/GenFw/elf_common.h
-+++ b/BaseTools/Source/C/GenFw/elf_common.h
-@@ -1144,5 +1144,10 @@ typedef struct {
- #define R_LARCH_TLS_LD64_HI20              96
- #define R_LARCH_TLS_GD_PC_HI20             97
- #define R_LARCH_TLS_GD64_HI20              98
--#define R_LARCH_RELAX                      99
-+#define R_LARCH_32_PCREL                   99
-+#define R_LARCH_RELAX                      100
-+#define R_LARCH_DELETE                     101
-+#define R_LARCH_ALIGN                      102
-+#define R_LARCH_PCREL20_S2                 103
-+
- #endif /* !_SYS_ELF_COMMON_H_ */

From 71220844a45c02ad2aebc7ed172c1430a4ed1ffb Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Wed, 13 Mar 2024 21:26:51 +0800
Subject: [PATCH 10/23] linux-tools

---
 linux-tools/PKGBUILD | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/linux-tools/PKGBUILD b/linux-tools/PKGBUILD
index 47421ceb24..b305c2aabd 100644
--- a/linux-tools/PKGBUILD
+++ b/linux-tools/PKGBUILD
@@ -4,7 +4,7 @@ pkgbase=linux-tools
 pkgname=(
   'bootconfig'
   'bpf'
-  'cgroup_event_listener'
+#  'cgroup_event_listener'
   'cpupower'
   'hyperv'
   'linux-tools-meta'
@@ -14,7 +14,7 @@ pkgname=(
   'usbip'
 #  'x86_energy_perf_policy'
 )
-pkgver=6.7
+pkgver=6.8
 pkgrel=1
 license=('GPL2')
 arch=('loong64' 'x86_64')
@@ -123,10 +123,10 @@ build() {
   make
   popd
 
-  echo ':: cgroup_event_listener'
-  pushd linux/tools/cgroup
-  make
-  popd
+#  echo ':: cgroup_event_listener'
+#  pushd linux/tools/cgroup
+#  make
+#  popd
 
 #  echo ':: turbostat'
 #  pushd linux/tools/power/x86/turbostat
@@ -159,7 +159,7 @@ package_linux-tools-meta() {
   depends=(
     'bootconfig'
     'bpf'
-    'cgroup_event_listener'
+#    'cgroup_event_listener'
     'cpupower'
     'hyperv'
     'perf'

From 5e858b5ee0156df9fd9d86eaf1631833d331af2b Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Wed, 13 Mar 2024 22:42:48 +0800
Subject: [PATCH 11/23] firefox

---
 .../0001-Add-support-for-LoongArch64.patch    | 36 ++--------
 ...2-Enable-VA-API-support-for-AMD-GPUs.patch |  6 +-
 ...=> 0003-Enable-WebRTC-for-LoongArch.patch} | 70 +++++++++----------
 ...rchitectural-limit-on-VA-API-support.patch | 40 -----------
 ...0004-Fix-libyuv-build-with-LSX-LASX.patch} | 14 ++--
 firefox/PKGBUILD                              | 32 ++++-----
 6 files changed, 65 insertions(+), 133 deletions(-)
 rename firefox/{0004-Enable-WebRTC-for-LoongArch.patch => 0003-Enable-WebRTC-for-LoongArch.patch} (77%)
 delete mode 100644 firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
 rename firefox/{0005-Fix-libyuv-build-with-LSX-LASX.patch => 0004-Fix-libyuv-build-with-LSX-LASX.patch} (98%)

diff --git a/firefox/0001-Add-support-for-LoongArch64.patch b/firefox/0001-Add-support-for-LoongArch64.patch
index b8a33207c9..01bd919907 100644
--- a/firefox/0001-Add-support-for-LoongArch64.patch
+++ b/firefox/0001-Add-support-for-LoongArch64.patch
@@ -1,7 +1,7 @@
-From 0c4dfaca7c7a38244034a6d872c0c7aeec0d4819 Mon Sep 17 00:00:00 2001
+From 6725bfacfd2142d0209c9d9af3c99f4c9d118aeb Mon Sep 17 00:00:00 2001
 From: Jiangjin Wang <kaymw@aosc.io>
 Date: Sun, 22 Oct 2023 22:13:17 -0700
-Subject: [PATCH 1/5] Add support for LoongArch64
+Subject: [PATCH 1/4] Add support for LoongArch64
 
 Adapted from LoongArchLinux. Rebased to Firefox 118.0.2.
 
@@ -9,14 +9,12 @@ Co-Authored-By: loongson <loongson@loongson.cn>
 Co-Authored-By: WANG Xuerui <xen0n@gentoo.org>
 ---
  third_party/libwebrtc/build/build_config.h             | 4 ++++
- third_party/rust/nix/.cargo-checksum.json              | 2 +-
- third_party/rust/nix/src/sys/ioctl/linux.rs            | 1 +
  toolkit/components/telemetry/pingsender/pingsender.cpp | 1 +
  toolkit/moz.configure                                  | 2 +-
- 5 files changed, 8 insertions(+), 2 deletions(-)
+ 3 files changed, 6 insertions(+), 1 deletion(-)
 
 diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h
-index c39ae9da50f99..28191de02654b 100644
+index c39ae9d..28191de 100644
 --- a/third_party/libwebrtc/build/build_config.h
 +++ b/third_party/libwebrtc/build/build_config.h
 @@ -210,6 +210,10 @@
@@ -30,28 +28,8 @@ index c39ae9da50f99..28191de02654b 100644
  #else
  #error Please add support for your architecture in build/build_config.h
  #endif
-diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json
-index f4c932b88926b..b7b9c9f3c9a89 100644
---- a/third_party/rust/nix/.cargo-checksum.json
-+++ b/third_party/rust/nix/.cargo-checksum.json
-@@ -1 +1 @@
--{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
-\ No newline at end of file
-+{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"}
-diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs
-index 0c0a2090538f8..214d9e8c60281 100644
---- a/third_party/rust/nix/src/sys/ioctl/linux.rs
-+++ b/third_party/rust/nix/src/sys/ioctl/linux.rs
-@@ -41,6 +41,7 @@ mod consts {
-     target_arch = "s390x",
-     target_arch = "x86_64",
-     target_arch = "aarch64",
-+    target_arch = "loongarch64",
-     target_arch = "riscv32",
-     target_arch = "riscv64"
- ))]
 diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp
-index 30f2907c720e1..e6645227a2949 100644
+index 30f2907..e664522 100644
 --- a/toolkit/components/telemetry/pingsender/pingsender.cpp
 +++ b/toolkit/components/telemetry/pingsender/pingsender.cpp
 @@ -10,6 +10,7 @@
@@ -63,10 +41,10 @@ index 30f2907c720e1..e6645227a2949 100644
  
  #include <zlib.h>
 diff --git a/toolkit/moz.configure b/toolkit/moz.configure
-index 8b462ecde463f..a4aa84cc7c45e 100644
+index f93a5d6..a8920f8 100644
 --- a/toolkit/moz.configure
 +++ b/toolkit/moz.configure
-@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds):
+@@ -2426,7 +2426,7 @@ with only_when(compile_environment | artifact_builds):
                  use_nasm = False
          elif target.cpu == "x86_64":
              flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
diff --git a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
index 143927ffe1..2ad77f89a6 100644
--- a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
+++ b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch
@@ -1,14 +1,14 @@
-From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001
+From 13c493de8626823c9b0a2eada572778b1d58b629 Mon Sep 17 00:00:00 2001
 From: Jiangjin Wang <kaymw@aosc.io>
 Date: Tue, 14 Nov 2023 18:14:20 -0800
-Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs
+Subject: [PATCH 2/4] Enable VA-API support for AMD GPUs
 
 ---
  widget/gtk/GfxInfo.cpp | 8 --------
  1 file changed, 8 deletions(-)
 
 diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp
-index b34e85baa28e5..8c95ce0d4274b 100644
+index b34e85b..8c95ce0 100644
 --- a/widget/gtk/GfxInfo.cpp
 +++ b/widget/gtk/GfxInfo.cpp
 @@ -1112,14 +1112,6 @@ const nsTArray<GfxDriverInfo>& GfxInfo::GetGfxDriverInfo() {
diff --git a/firefox/0004-Enable-WebRTC-for-LoongArch.patch b/firefox/0003-Enable-WebRTC-for-LoongArch.patch
similarity index 77%
rename from firefox/0004-Enable-WebRTC-for-LoongArch.patch
rename to firefox/0003-Enable-WebRTC-for-LoongArch.patch
index 3cd2fcf4f4..64f2394c06 100644
--- a/firefox/0004-Enable-WebRTC-for-LoongArch.patch
+++ b/firefox/0003-Enable-WebRTC-for-LoongArch.patch
@@ -1,7 +1,7 @@
-From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001
-From: Jiangjin Wang <kaymw@aosc.io>
-Date: Tue, 21 Nov 2023 17:17:16 -0800
-Subject: [PATCH 4/5] Enable WebRTC for LoongArch
+From bb10f5774025e90234c2d2ad95351d3b307fce2b Mon Sep 17 00:00:00 2001
+From: Xiaotian Wu <wuxiaotian@loongson.cn>
+Date: Mon, 19 Feb 2024 15:52:44 +0800
+Subject: [PATCH 3/4] Enable WebRTC for LoongArch
 
 ---
  .../common_audio/common_audio_c_gn/moz.build  |  8 ++++++
@@ -14,14 +14,14 @@ Subject: [PATCH 4/5] Enable WebRTC for LoongArch
  7 files changed, 60 insertions(+)
 
 diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
-index 60ee6cfc164be..1e69b2881ca90 100644
+index 2a9bfac..24f2199 100644
 --- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
 +++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build
-@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+@@ -258,6 +258,14 @@ if CONFIG["TARGET_CPU"] == "riscv64":
          "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
      ]
  
-+if CONFIG["CPU_ARCH"] == "loongarch64":
++if CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    UNIFIED_SOURCES += [
 +        "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c",
@@ -29,52 +29,52 @@ index 60ee6cfc164be..1e69b2881ca90 100644
 +        "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c"
 +    ]
 +
- if CONFIG["CPU_ARCH"] == "x86":
+ if CONFIG["TARGET_CPU"] == "x86":
  
      DEFINES["WEBRTC_ENABLE_AVX2"] = True
 diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
-index d2d0287623b54..36ad6222b3dea 100644
+index 316c199..08f2b64 100644
 --- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
 +++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build
-@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+@@ -177,6 +177,12 @@ if CONFIG["TARGET_CPU"] == "riscv64":
          "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
      ]
  
-+if CONFIG["CPU_ARCH"] == "loongarch64":
++if CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    UNIFIED_SOURCES += [
 +        "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c"
 +    ]
 +
- if CONFIG["CPU_ARCH"] == "x86":
+ if CONFIG["TARGET_CPU"] == "x86":
  
      DEFINES["WEBRTC_ENABLE_AVX2"] = True
 diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
-index 9874037197896..147e12653cbe2 100644
+index a645dce..57ea718 100644
 --- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
 +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build
-@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+@@ -209,6 +209,12 @@ if CONFIG["TARGET_CPU"] == "riscv64":
          "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
      ]
  
-+if CONFIG["CPU_ARCH"] == "loongarch64":
++if CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    SOURCES += [
 +        "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc"
 +    ]
 +
- if CONFIG["CPU_ARCH"] == "x86":
+ if CONFIG["TARGET_CPU"] == "x86":
  
      DEFINES["WEBRTC_ENABLE_AVX2"] = True
 diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
-index b0a5d1522da86..0efac49ac5dc3 100644
+index 22a76e9..7489dda 100644
 --- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
 +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build
-@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+@@ -393,6 +393,34 @@ if CONFIG["TARGET_CPU"] == "riscv64":
          "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
      ]
  
-+if CONFIG["CPU_ARCH"] == "loongarch64":
++if CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    DEFINES["USE_X11"] = "1"
 +    DEFINES["WEBRTC_USE_X11"] = True
@@ -102,51 +102,51 @@ index b0a5d1522da86..0efac49ac5dc3 100644
 +        "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc"
 +    ]
 +
- if CONFIG["CPU_ARCH"] == "x86":
+ if CONFIG["TARGET_CPU"] == "x86":
  
      DEFINES["WEBRTC_ENABLE_AVX2"] = True
 diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
-index 8edb2c2344870..e6cf9f56540f7 100644
+index de23257..be48aa6 100644
 --- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
 +++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build
-@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64":
+@@ -150,6 +150,10 @@ if CONFIG["TARGET_CPU"] == "riscv64":
  
      DEFINES["USE_X11"] = "1"
  
-+if CONFIG["CPU_ARCH"] == "loongarch64":
++if CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    DEFINES["USE_X11"] = "1"
 +
- if CONFIG["CPU_ARCH"] == "x86":
+ if CONFIG["TARGET_CPU"] == "x86":
  
      DEFINES["WEBRTC_ENABLE_AVX2"] = True
 diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build
-index f528cb1108180..88fd9792acdf1 100644
+index 7baea55..73cda0d 100644
 --- a/third_party/libwebrtc/moz.build
 +++ b/third_party/libwebrtc/moz.build
-@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T
+@@ -694,3 +694,10 @@ if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGE
          "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
          "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
      ]
 +
-+if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux":
++if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "loongarch64":
 +
 +    DIRS += [
 +        "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn",
 +        "/third_party/libwebrtc/modules/desktop_capture/primitives_gn"
 +    ]
 diff --git a/toolkit/moz.configure b/toolkit/moz.configure
-index 67fc08237bba4..f7252539c7eaa 100644
+index a8920f8..0ff8c6f 100644
 --- a/toolkit/moz.configure
 +++ b/toolkit/moz.configure
-@@ -1328,6 +1328,7 @@ def webrtc_default(target):
-         "ppc",
-         "ppc64",
-         "riscv64",
+@@ -1322,6 +1322,7 @@ def webrtc_default(target):
+         "aarch64",
+         "x86",
+         "ia64",
 +        "loongarch64",
-     )
- 
-     return os_match and cpu_match and target.endianness == "little"
+         "mips32",
+         "mips64",
+         "ppc",
 -- 
 2.43.0
 
diff --git a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
deleted file mode 100644
index aa45fa3e87..0000000000
--- a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001
-From: Jiangjin Wang <kaymw@aosc.io>
-Date: Tue, 14 Nov 2023 18:16:46 -0800
-Subject: [PATCH 3/5] Remove architectural limit on VA-API support
-
----
- toolkit/moz.configure | 9 ++-------
- 1 file changed, 2 insertions(+), 7 deletions(-)
-
-diff --git a/toolkit/moz.configure b/toolkit/moz.configure
-index a4aa84cc7c45e..67fc08237bba4 100644
---- a/toolkit/moz.configure
-+++ b/toolkit/moz.configure
-@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True))
- 
- # Hardware-accelerated video decode with VAAPI and V4L2 on Linux
- # ==============================================================
--@depends(target, toolkit_gtk)
--def vaapi(target, toolkit_gtk):
--    # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs.
--    if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk:
--        return True
-+set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
-+set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk)
- 
- 
- @depends(target, toolkit_gtk)
-@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk):
-         return True
- 
- 
--set_config("MOZ_ENABLE_VAAPI", True, when=vaapi)
- set_config("MOZ_ENABLE_V4L2", True, when=v4l2)
--set_define("MOZ_ENABLE_VAAPI", True, when=vaapi)
- set_define("MOZ_ENABLE_V4L2", True, when=v4l2)
- 
- 
--- 
-2.43.0
-
diff --git a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch
similarity index 98%
rename from firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch
rename to firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch
index f69d1ab983..b1a37bd933 100644
--- a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch
+++ b/firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch
@@ -1,7 +1,7 @@
-From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001
+From 4afef7f509cae7267407c6b5b839aba955cd66d1 Mon Sep 17 00:00:00 2001
 From: WANG Xuerui <xen0n@gentoo.org>
 Date: Sun, 31 Dec 2023 13:16:33 +0800
-Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX
+Subject: [PATCH 4/4] Fix libyuv build with LSX & LASX
 
 This is not of upstream quality, and will not be upstreamed as-is.
 This is only meant as a quick-and-dirty build fix for LoongArch early
@@ -15,7 +15,7 @@ adopters.
  5 files changed, 92 insertions(+), 28 deletions(-)
 
 diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn
-index a72ff06558000..7d70848be9f1a 100644
+index a72ff06..7d70848 100644
 --- a/media/libyuv/libyuv/BUILD.gn
 +++ b/media/libyuv/libyuv/BUILD.gn
 @@ -69,6 +69,14 @@ group("libyuv") {
@@ -77,7 +77,7 @@ index a72ff06558000..7d70848be9f1a 100644
    config("libyuv_unittest_warnings_config") {
      if (!is_win) {
 diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni
-index 852f08ca9d61f..ecad693508811 100644
+index 852f08c..ecad693 100644
 --- a/media/libyuv/libyuv/libyuv.gni
 +++ b/media/libyuv/libyuv/libyuv.gni
 @@ -20,4 +20,6 @@ declare_args() {
@@ -88,7 +88,7 @@ index 852f08ca9d61f..ecad693508811 100644
 +  libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64"
  }
 diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi
-index 48936aa7b0239..9c19abf9c34c9 100644
+index 48936aa..9c19abf 100644
 --- a/media/libyuv/libyuv/libyuv.gypi
 +++ b/media/libyuv/libyuv/libyuv.gypi
 @@ -18,6 +18,7 @@
@@ -125,7 +125,7 @@ index 48936aa7b0239..9c19abf9c34c9 100644
        'source/scale_neon.cc',
        'source/scale_neon64.cc',
 diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc
-index 29ac9254d9924..8c325483b116a 100644
+index 29ac925..8c32548 100644
 --- a/media/libyuv/libyuv/source/row_lasx.cc
 +++ b/media/libyuv/libyuv/source/row_lasx.cc
 @@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
@@ -264,7 +264,7 @@ index 29ac9254d9924..8c325483b116a 100644
  }  // extern "C"
  }  // namespace libyuv
 diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc
-index 9c1e16f22e02d..91221ff03ca29 100644
+index 9c1e16f..91221ff 100644
 --- a/media/libyuv/libyuv/source/row_lsx.cc
 +++ b/media/libyuv/libyuv/source/row_lsx.cc
 @@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD
index cb2ecff444..26f4516441 100644
--- a/firefox/PKGBUILD
+++ b/firefox/PKGBUILD
@@ -65,9 +65,8 @@ source=(
   identity-icons-brand.svg
   0001-Add-support-for-LoongArch64.patch
   0002-Enable-VA-API-support-for-AMD-GPUs.patch
-  0003-Remove-architectural-limit-on-VA-API-support.patch
-  0004-Enable-WebRTC-for-LoongArch.patch
-  0005-Fix-libyuv-build-with-LSX-LASX.patch
+  0003-Enable-WebRTC-for-LoongArch.patch
+  0004-Fix-libyuv-build-with-LSX-LASX.patch
 )
 validpgpkeys=(
   # Mozilla Software Releases <release@mozilla.com>
@@ -78,20 +77,18 @@ sha256sums=('b84815a90e147965e4c0b50599c85b1022ab0fce42105e5ef45c630dcca5dec3'
             'SKIP'
             '1f241fdc619f92a914c75aece7c7c717401d7467c9a306458e106b05f34e5044'
             'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9'
-            '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544'
-            'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320'
-            '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428'
-            '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc'
-            'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd')
+            'baf2381cce9d0a99eade7e6d8b2a9566cfaa292e6b11cf1a7527f048875b81d4'
+            '7d37addd28ab5d84a15d236aba916bd4ba9c81442eb92f430e023928aecf4c22'
+            '9b8141c6731216ea21e76c6efa1de3381bf8ec62b6fdd030d5c8e1e4829e62b5'
+            'e782d4b2ea14a5b662dab21e449923a425fdace096375244ac4750e0466d9fd8')
 b2sums=('7252cd58fef9f5fcb504c8c9f885567109c05e6ec92157459cc384edc6935adb206e3be0b805aeaa37dbd72656c3243db1291b745dd0f705f37a61319a4dc820'
         'SKIP'
         'd07557840097dd48a60c51cc5111950781e1c6ce255557693bd11306c7a9258b2a82548329762148f117b2295145f9e66e0483a18e2fe09c5afcffed2e4b8628'
         '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34'
-        'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9'
-        '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5'
-        'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719'
-        '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97'
-        'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628')
+        '8c19b8be27f5b333321e73bca80b1d1f3d87140f931d311f3cadc1dc18a4d5cf98f02448a8e8640d40186c3545dba2f61b7d47d7ceed23d02c9bd147d60c9009'
+        '3dc11f75ab18dfb5a02e6de36adb04e5584b324a7de1d2890afc7eaf8bc754f447846009a24ed0d532289f524a71dadc6bf0130b8cb893c91d3d581885818346'
+        '070ae992fe6f2d5a1881feb2a974e501048cc7ac2c4be472994d97381791693be53ab8ba65546008cacfc8c3197325dc46a0db8e42f9b08caa9e1f7d99bbe688'
+        'cecd147e5825dcbc4694a53f77942fc808893cee0afb9cb826803d3f79887eee8d4f98178788b2319434dd0554540a43e7302b6a1cc1f35fef3d650635cdd79f')
 
 # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys)
 # Note: These are for Arch Linux use ONLY. For your own distribution, please
@@ -109,12 +106,10 @@ prepare() {
   mkdir mozbuild
   cd firefox-$pkgver
 
-#  patch -Np1 -i ../firefox-118-loong.patch
   patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch
   patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch
-  patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch
-  patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch
-  patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch
+  patch -Np1 -i ../0003-Enable-WebRTC-for-LoongArch.patch
+  patch -Np1 -i ../0004-Fix-libyuv-build-with-LSX-LASX.patch
 
   echo -n "$_google_api_key" >google-api-key
   echo -n "$_mozilla_api_key" >mozilla-api-key
@@ -130,7 +125,7 @@ ac_add_options --enable-optimize
 ac_add_options --enable-rust-simd
 #ac_add_options --enable-linker=lld
 ac_add_options --disable-install-strip
-ac_add_options --disable-elf-hack
+#ac_add_options --disable-elf-hack
 ac_add_options --disable-bootstrap
 ac_add_options --without-wasm-sandboxed-libraries
 
@@ -162,7 +157,6 @@ END
 }
 
 build() {
-  set -x
   cd firefox-$pkgver
 
   export MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=pip

From dbbefe9b391152ae30392e97e47e025150295a69 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 14 Mar 2024 20:10:41 +0800
Subject: [PATCH 12/23] cocogitto

---
 cocogitto/PKGBUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cocogitto/PKGBUILD b/cocogitto/PKGBUILD
index c12465f811..1286652052 100644
--- a/cocogitto/PKGBUILD
+++ b/cocogitto/PKGBUILD
@@ -20,7 +20,7 @@ sha256sums=('2a0e332b7028ffcfeb113c734b4bf506c34362730e371b03a3e4a71142099330')
 
 prepare() {
 	cd "$_archive"
-#	cargo fetch --locked --target "`uname -m`-unknown-linux-gnu"
+	cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')"
 	mkdir {completions,man}
 }
 

From 6e4e9c329c873ccde474af9c8c7d09c28a9ff581 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 14 Mar 2024 20:45:19 +0800
Subject: [PATCH 13/23] deepin-grand-search

---
 deepin-grand-search/62.patch | 126 -----------------------------------
 deepin-grand-search/PKGBUILD |   4 +-
 2 files changed, 1 insertion(+), 129 deletions(-)
 delete mode 100644 deepin-grand-search/62.patch

diff --git a/deepin-grand-search/62.patch b/deepin-grand-search/62.patch
deleted file mode 100644
index 7c83b5a7be..0000000000
--- a/deepin-grand-search/62.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From 66dd5c6c79922ee5366d9bf09e8a9879f7231306 Mon Sep 17 00:00:00 2001
-From: xzl <xiangzelong@deepin.org>
-Date: Thu, 7 Sep 2023 16:05:54 +0800
-Subject: [PATCH] fix: fix build error in v23
-
-Log:
----
- src/grand-search-daemon/main.cpp                                 | 1 +
- .../exhibition/matchresult/listview/grandsearchlistdelegate.cpp  | 1 +
- .../gui/exhibition/matchresult/listview/grandsearchlistview.cpp  | 1 +
- src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp | 1 +
- src/grand-search/gui/searchconfig/bestmatchwidget.cpp            | 1 +
- .../gui/searchconfig/blacklistview/blacklistview.cpp             | 1 +
- src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp  | 1 +
- src/libgrand-search-daemon/main.cpp                              | 1 +
- src/preview-plugin/audio-preview/audioview.cpp                   | 1 +
- 9 files changed, 9 insertions(+)
-
-diff --git a/src/grand-search-daemon/main.cpp b/src/grand-search-daemon/main.cpp
-index a2aada02..87bab961 100644
---- a/src/grand-search-daemon/main.cpp
-+++ b/src/grand-search-daemon/main.cpp
-@@ -10,6 +10,7 @@
- #include <DApplication>
- 
- #include <QDebug>
-+#include <QDir>
- 
- #include <unistd.h>
- #include <signal.h>
-diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
-index 26cb863b..b71278c9 100755
---- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
-+++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp
-@@ -28,6 +28,7 @@
- #define TailMaxWidth              150      // 拖尾信息最大显示宽度
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- using namespace GrandSearch;
- 
- GrandSearchListDelegate::GrandSearchListDelegate(QAbstractItemView *parent)
-diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
-index f0133d41..d665d6b5 100755
---- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
-+++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp
-@@ -20,6 +20,7 @@
- 
- using namespace GrandSearch;
- DCORE_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- DWIDGET_USE_NAMESPACE
- 
- #define ICON_SIZE 24
-diff --git a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
-index 2d6acdc8..16e3a19e 100644
---- a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
-+++ b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp
-@@ -25,6 +25,7 @@
- using namespace GrandSearch;
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- 
- NameLabel::NameLabel(const QString &text, QWidget *parent, Qt::WindowFlags f):
-     QLabel(text, parent, f)
-diff --git a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
-index 85e9d7cb..37606ebb 100644
---- a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
-+++ b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp
-@@ -14,6 +14,7 @@
- #include <DGuiApplicationHelper>
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- using namespace GrandSearch;
- 
- BestMatchWidget::BestMatchWidget(QWidget *parent)
-diff --git a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
-index 1473e4c8..eb634305 100644
---- a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
-+++ b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp
-@@ -26,6 +26,7 @@ DCORE_USE_NAMESPACE
- #define InitCount       7   // 初始显示数量
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- using namespace GrandSearch;
- 
- BlackListView::BlackListView(QWidget *parent)
-diff --git a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
-index 1449f095..ba0efbfa 100644
---- a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
-+++ b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp
-@@ -12,6 +12,7 @@
- #define ICONLABELSIZE   36
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- using namespace GrandSearch;
- 
- SwitchWidget::SwitchWidget(const QString &title, QWidget *parent)
-diff --git a/src/libgrand-search-daemon/main.cpp b/src/libgrand-search-daemon/main.cpp
-index 1dba137b..53aab763 100644
---- a/src/libgrand-search-daemon/main.cpp
-+++ b/src/libgrand-search-daemon/main.cpp
-@@ -13,6 +13,7 @@
- #include <QDBusConnection>
- #include <QDBusError>
- #include <QDebug>
-+#include <QThread>
- 
- GRANDSEARCH_USE_NAMESPACE
- DCORE_USE_NAMESPACE
-diff --git a/src/preview-plugin/audio-preview/audioview.cpp b/src/preview-plugin/audio-preview/audioview.cpp
-index d50fde46..2e7d8cd7 100644
---- a/src/preview-plugin/audio-preview/audioview.cpp
-+++ b/src/preview-plugin/audio-preview/audioview.cpp
-@@ -19,6 +19,7 @@
- #define MARGIN_SIZE             15
- 
- DWIDGET_USE_NAMESPACE
-+DGUI_USE_NAMESPACE
- GRANDSEARCH_USE_NAMESPACE
- using namespace GrandSearch::audio_preview;
- 
diff --git a/deepin-grand-search/PKGBUILD b/deepin-grand-search/PKGBUILD
index 86f4e48c7d..469780e349 100644
--- a/deepin-grand-search/PKGBUILD
+++ b/deepin-grand-search/PKGBUILD
@@ -13,8 +13,7 @@ depends=(deepin-anything deepin-application-manager taglib ffmpeg icu deepin-pdf
 makedepends=(cmake deepin-dock ninja qt5-tools)
 groups=(deepin-extra)
 source=("$pkgname-$pkgver.tar.gz::https://github.com/linuxdeepin/dde-grand-search/archive/$pkgver.tar.gz"
-         taglib-2.patch
-	 62.patch)
+         taglib-2.patch)
 sha512sums=('7d2bd203b9c0dfef57a0667690252a9b3b3f3b5b2e30f44f6706de0d98885908f21f982fd19257812b92a0564e4e7888f8a6789bee2aa5ac2c573a2cadf0b838'
             '8364cd5aa0350a7d109be7ce10035c6c4e3fd6686205bc880017b1fc93a10cff6e78a8f66daeb25427c416a6dc075482136146c9d8278aee6de71653673d59a0')
 
@@ -23,7 +22,6 @@ prepare() {
   # https://github.com/linuxdeepin/dde-grand-search/pull/65
   sed -i 's/-fPIE -pie//g' src/*/CMakeLists.txt
 
-  patch -p1 -i $srcdir/62.patch
   patch -p1 -i ../taglib-2.patch
 }
 

From 25721e6546581b764c82da617aaddd0ff305d418 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 14 Mar 2024 21:04:18 +0800
Subject: [PATCH 14/23] pixman

---
 pixman/83.patch | 6 +++---
 pixman/PKGBUILD | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pixman/83.patch b/pixman/83.patch
index 34c100bbe7..0cd9bb9b37 100644
--- a/pixman/83.patch
+++ b/pixman/83.patch
@@ -2288,7 +2288,7 @@ index 0000000..d6d0169
 + */
 +
 +#ifdef HAVE_CONFIG_H
-+#include <config.h>
++#include <pixman-config.h>
 +#endif
 +
 +#include "pixman-private.h"
@@ -7174,7 +7174,7 @@ index 0000000..a77211c
 + */
 +
 +#ifdef HAVE_CONFIG_H
-+#include <config.h>
++#include <pixman-config.h>
 +#endif
 +
 +#include "pixman-private.h"
@@ -7281,7 +7281,7 @@ index 0000000..a4c261a
 + */
 +
 +#ifdef HAVE_CONFIG_H
-+#include <config.h>
++#include <pixman-config.h>
 +#endif
 +
 +#include "pixman-private.h"
diff --git a/pixman/PKGBUILD b/pixman/PKGBUILD
index 5358758bb8..43007d07ec 100644
--- a/pixman/PKGBUILD
+++ b/pixman/PKGBUILD
@@ -15,8 +15,9 @@ provides=('libpixman-1.so')
 source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz
     83.patch)
 sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224'
-            '0b7970cd955e31211fa55882974bbb321b0e7421da5c06a2cff196ea3e0efcefce50b4ba5f19a23417aaea2145529c762800dbb5887dfdcfa240efe400d44f7f')
+            'f0ddc083f1cafaebdaf3a3b2d3b9c85b55f3354c7c115b4eff31eb46ddbf71cd6bedea207e10a4692ba73e9857e2e8b49e197de1a037589bb842ee666e829a83')
 #validpgpkeys=('') # Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+options=(!lto)
 
 prepare() {
   cd "$pkgname-$pkgver"

From 177d138c25a8fb1d70ec5a49eac8804b2c330b64 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 1 Apr 2024 14:07:55 +0800
Subject: [PATCH 15/23] update

---
 0ad/PKGBUILD                       |   4 +-
 arch-wiki-docs/PKGBUILD            |   2 +-
 avahi/PKGBUILD                     |   2 +-
 blender/PKGBUILD                   |   3 +-
 clang/PKGBUILD                     |   4 +-
 dbeaver-plugin-apache-poi/PKGBUILD |   4 +-
 discord/PKGBUILD                   |   4 +-
 electron25/PKGBUILD                |   9 +-
 firefox-developer-edition/PKGBUILD |  19 +++-
 gcc12/PKGBUILD                     |   5 +-
 glslang/PKGBUILD                   |   2 +-
 keybase/PKGBUILD                   |   7 +-
 libretro-mame/PKGBUILD             |   2 +-
 libvirt/PKGBUILD                   |  10 +-
 libxslt/PKGBUILD                   |   2 +-
 linux-hardened/PKGBUILD            |   4 +-
 llvm/PKGBUILD                      |   2 +-
 lua-yaml/PKGBUILD                  |   2 +-
 nsxiv/PKGBUILD                     |   2 +-
 ocaml/PKGBUILD                     |   8 +-
 ocaml/ocaml-5.1.0-la64.patch       | 169 ++++++++++++++++++-----------
 21 files changed, 154 insertions(+), 112 deletions(-)

diff --git a/0ad/PKGBUILD b/0ad/PKGBUILD
index 453c704d8a..b10879627c 100644
--- a/0ad/PKGBUILD
+++ b/0ad/PKGBUILD
@@ -19,7 +19,7 @@ source=("https://releases.wildfiregames.com/$pkgname-$_pkgver-unix-build.tar.xz"
         fix_python_3.11_ftbfs.patch
         https://github.com/0ad/0ad/commit/839edc3a.patch
         https://github.com/0ad/0ad/commit/093e1eb2.patch
-        https://github.com/0ad/0ad/commit/d2426312.patch)
+        https://github.com/0ad/0ad/commit/d2426312.patch
 	0ad-fix-build.patch
 	0ad-la64.patch)
 sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c500bf4162651a5e1fcdb42bd5fb5b4f5c512c78372479fbd8565dd093f272'
@@ -27,7 +27,7 @@ sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c
             '748a75420541947e2a215b3a8789a0e137179e4981d0977e1c4b20cd7b86af2d96b9976e04d60ace8d5ee465d542cadc42ee9bceedaaa97d2b320f533e3e3892'
             '1dfc8a0c6ac29040f72d9bbf6b631a74cbdec444b9078a015345139228666354d9b5059f85b640ce3afc0f590bcbe8afd5e158509a0c95751e1cd69fece46876'
             'a7fd1454385f56b7c8cb0fc6ac001761d4419df4aeec570ba846c7df4eb327d25b9ff1a7946cb334315109fa90ca2c1820583619f4e1ec5d53805afa08e10093'
-            '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb')
+            '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb'
             '956effc37bbad8ca44a5e82a8750ca82c1c2347152dd684ebc2921953d4fa81ef9291b5bb5de05559b2b4ef79c336b837216892f0bcf806e50aac8c4ea42edde'
             'c01e52a4241736eda82f6002c3627d9c4b5b505109969fc608d95dd71db8681df8f3de6a372bca8fe977bee14f5180f4c27681e40d26b0a06ddc556122886d04')
 
diff --git a/arch-wiki-docs/PKGBUILD b/arch-wiki-docs/PKGBUILD
index 412708374a..2be93f6f7d 100644
--- a/arch-wiki-docs/PKGBUILD
+++ b/arch-wiki-docs/PKGBUILD
@@ -2,7 +2,7 @@
 # Maintainer: Sergej Pupykin <pupykin.s+arch@gmail.com>
 
 pkgname=arch-wiki-docs
-pkgver=20240103
+pkgver=20240207
 pkgrel=1
 pkgdesc='Pages from Arch Wiki optimized for offline browsing'
 arch=('any')
diff --git a/avahi/PKGBUILD b/avahi/PKGBUILD
index b2cb0dbe02..4e67c866b8 100644
--- a/avahi/PKGBUILD
+++ b/avahi/PKGBUILD
@@ -3,7 +3,7 @@
 # Contributor: Douglas Soares de Andrade <douglas@archlinux.org>
 
 pkgname=avahi
-pkgver=0.8+r194+g3f79789
+pkgver=0.8+r194+g3f79789c
 pkgrel=1
 epoch=1
 pkgdesc="Service Discovery for Linux using mDNS/DNS-SD (compatible with Bonjour)"
diff --git a/blender/PKGBUILD b/blender/PKGBUILD
index 41c4cff075..2b55992fce 100644
--- a/blender/PKGBUILD
+++ b/blender/PKGBUILD
@@ -18,7 +18,7 @@ url="https://www.blender.org"
 depends=('libpng' 'libtiff' 'openexr' 'python' 'desktop-file-utils' 'python-requests' 'potrace'
          'shared-mime-info' 'hicolor-icon-theme' 'xdg-utils' 'glew' 'openjpeg2' 'python-numpy'
          'freetype2' 'openal' 'ffmpeg' 'fftw' 'boost-libs' 'opencollada' 'alembic' 'openxr'
-         'openimageio' 'libsndfile' 'jack' 'opencolorio'
+         'openimageio' 'libsndfile' 'jack' 'opencolorio' 'materialx'
          'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'libharu'
          'draco' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'openshadinglanguage'
          'intel-oneapi-compiler-shared-runtime-libs' 'intel-oneapi-compiler-dpcpp-cpp-runtime-libs')
@@ -116,7 +116,6 @@ build() {
     -DWITH_PYTHON_INSTALL=OFF \
     -DOCLOC_INSTALL_DIR=/usr \
     -DUSD_ROOT_DIR=/usr \
-    -DWITH_MATERIALX=OFF \
     -DWITH_CYCLES=OFF \
     -DSYCL_OFFLINE_COMPILER_PARALLEL_JOBS=8
   cmake --build build
diff --git a/clang/PKGBUILD b/clang/PKGBUILD
index 2604666164..f7242aef54 100644
--- a/clang/PKGBUILD
+++ b/clang/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=clang
 pkgver=16.0.6
-pkgrel=2
+pkgrel=3
 pkgdesc="C language family frontend for LLVM"
 arch=('loong64' 'x86_64')
 url="https://clang.llvm.org/"
@@ -34,7 +34,7 @@ sha256sums=('1186b6e6eefeadd09912ed73b3729e85b59f043724bb2818a95a2ec024571840'
             'SKIP'
             '15f5b9aeeba938530af977d5f9205612737a091a7f0f6c8075df8723b7713f70'
             'SKIP'
-            'c102e8a6a2adb0e8729865ffb8799b22bb8a9bdf0f421991880fa4393378370a'
+            '0d4dc477f5a28f9f16639dc094b6d9bc14228d5de771547394799d2d5f8cd1df'
             '45da5783f4e89e4507a351ed0ffbbe6ec240e21ff7070797a89c5ccf434ac612')
 validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A'  # Tom Stellard <tstellar@redhat.com>
               'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta <tobias@hieta.se>
diff --git a/dbeaver-plugin-apache-poi/PKGBUILD b/dbeaver-plugin-apache-poi/PKGBUILD
index ae386c042b..fa66ccc803 100644
--- a/dbeaver-plugin-apache-poi/PKGBUILD
+++ b/dbeaver-plugin-apache-poi/PKGBUILD
@@ -1,7 +1,7 @@
 # Maintainer: Muflone http://www.muflone.com/contacts/english/
 
 pkgname=dbeaver-plugin-apache-poi
-pkgver=5.2.4
+pkgver=5.2.5
 pkgrel=1
 pkgdesc='DBeaver library for Microsoft Office documents'
 arch=('any')
@@ -11,7 +11,7 @@ makedepends=('unzip')
 depends=('dbeaver>=4.2.5')
 source=("https://dbeaver.io/update/ce/latest/plugins/org.jkiss.bundle.apache.poi_${pkgver}.jar"
         "${pkgname}.info")
-sha256sums=('59061d440466b1869528adc1ee7c934421e9cae8008a8caa3801d34967ec3735'
+sha256sums=('1f979af0c82db69228a116bce039a42f1a0984e4251bfd4e2f4c6ea63699f1de'
             '5a280fb2b234f76842c3e3a41d08cfecb0f159cf5ca322b3f0001755e7093150')
 noextract=("org.jkiss.bundle.apache.poi_${pkgver}.jar")
 
diff --git a/discord/PKGBUILD b/discord/PKGBUILD
index 3147b4cbdc..bd9e92c851 100644
--- a/discord/PKGBUILD
+++ b/discord/PKGBUILD
@@ -19,8 +19,8 @@ source=("https://dl.discordapp.net/apps/linux/$pkgver/$pkgname-$pkgver.tar.gz"
         "LICENSE-$pkgver.html::https://discordapp.com/terms"
         "OSS-LICENSES-$pkgver.html::https://discordapp.com/licenses")
 sha512sums=('ec11acfe3f96762cd7c230eb38e098818d9ebf32ff530f061ed3dbb5d08c3303bb4d20206510e3621f21ba89c08474367c13e7fd3c46f80b7229db37fb81db6c'
-            'a47ee83d4878f936a42faaa97d87f8a23261be1d5a5fc1c072be6b8a1fd505ccb7a3b39a8f7675677a241421f80b4cc57957d32eaf1fee96f91e0fe976cce167'
-            '4587af901dc20a843f05ba54411c7d9a39d6976e4c3689fb94c504d6ed8af9dd46aa54c2b87a9dcdee1dd4bcbf79d68be66556ce9d329959f849cde2a8522e4a')
+            '6fd75707e5ddf52454b0172309a820def89a0d400336bbe40c9176eaebcf6d65d2d083636c76a2595a14193508254520007bc8d31e14f4be929334a5f3bf91bb'
+            'c1de9d8abf32a750c27c44232b1f1f8f8b569c1a16fd081587efc5a8bef8a124b278e333f9cdd62341f4e2fdc60fb7a0e7d7d13213315feee6058f4a225258ba')
 
 prepare() {
   cd $_pkgname
diff --git a/electron25/PKGBUILD b/electron25/PKGBUILD
index 4565753c8d..95cff093fb 100644
--- a/electron25/PKGBUILD
+++ b/electron25/PKGBUILD
@@ -63,7 +63,7 @@ source=("git+https://github.com/electron/electron.git#tag=v$pkgver"
         use-system-libraries-in-node.patch
         libxml2-2.12.patch
         icu-74.patch
-        electron-la64.patch
+#electron-la64.patch
        )
 # shellcheck disable=SC2034
 sha256sums=('SKIP'
@@ -80,10 +80,7 @@ sha256sums=('SKIP'
             'ff588a8a4fd2f79eb8a4f11cf1aa151298ffb895be566c57cc355d47f161f53f'
             'bfae9e773edfd0ddbc617777fdd4c0609cba2b048be7afe40f97768e4eb6117e'
             '547e092f6a20ebd15e486b31111145bc94b8709ec230da89c591963001378845')
-            '621ed210d75d0e846192c1571bb30db988721224a41572c27769c0288d361c11'
-            '1b782b0f6d4f645e4e0daa8a4852d63f0c972aa0473319216ff04613a0592a69'
-            'ba4dd0a25a4fc3267ed19ccb39f28b28176ca3f97f53a4e9f5e9215280040ea0'
-            '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66')
+#            '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66')
 
 # Possible replacements are listed in build/linux/unbundle/replace_gn_files.py
 # Keys are the names in the above script; values are the dependencies in Arch
@@ -152,7 +149,7 @@ EOF
 
   pushd src/electron
   patch -Np1 -i ../../std-vector-non-const.patch
-  patch -Np1 -i ../../electron-la64.patch
+#  patch -Np1 -i ../../electron-la64.patch
   popd
 
   echo "Running hooks..."
diff --git a/firefox-developer-edition/PKGBUILD b/firefox-developer-edition/PKGBUILD
index 8873766e38..585d38e252 100644
--- a/firefox-developer-edition/PKGBUILD
+++ b/firefox-developer-edition/PKGBUILD
@@ -26,7 +26,7 @@ makedepends=(
   imake
   inetutils
   jack
-  lld
+#  lld
   llvm
   mesa
   nasm
@@ -75,7 +75,12 @@ sha256sums=('535c880e5f894a75c83bb9399120302fb213df6723dbbf233e28d2904246409a'
             'SKIP'
             '6522f3eeefbd3550e3bb7dafb4c5c0cef7be0eec11ef3da79e0562b96edc04e3'
             'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9'
-            '294e6ec79b0a64b5cb2372dbe7a5f9191dd1f68d7aece244e208581a98db8fb3')
+            '294e6ec79b0a64b5cb2372dbe7a5f9191dd1f68d7aece244e208581a98db8fb3'
+            '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544'
+            'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320'
+            '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428'
+            '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc'
+            'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd')
 b2sums=('4eeb4ea242b9187abafb8e580f2038747bc2962230fa598a4de0f25f999ab378d92fc61fcea39165f0800cc0d89a2bc0fbccca9d92f28281eca979b576821393'
         'SKIP'
         'd2d14042a03ffcc5ed9212fca9cc167e8bfb2ba3f0d61a89441e033484cb914424d0f2544e0f1bc58992fee9cae03a73679352ee0fac9777fa5633ddc8d76e7d'
@@ -105,10 +110,14 @@ prepare() {
 
   # Change install dir from 'firefox' to 'firefox-developer-edition'
   patch -Np1 -i ../firefox-install-dir.patch
-  patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch
+  echo "aaaaa"
+#  patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch
+  echo "bbbb"
   patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch
-  patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch
-  patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch
+#  patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch
+  echo "cccc"
+#  patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch
+  echo "dddd"
   patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch
 
   echo -n "$_google_api_key" >google-api-key
diff --git a/gcc12/PKGBUILD b/gcc12/PKGBUILD
index ae16e7ecd7..bcd9d39be0 100644
--- a/gcc12/PKGBUILD
+++ b/gcc12/PKGBUILD
@@ -39,6 +39,7 @@ options=(!emptydirs !lto)
 _libdir=usr/lib/gcc/$CHOST/${pkgver%%+*}
 source=(git+https://sourceware.org/git/gcc.git#commit=${_commit}
         c89 c99
+	gcc-12-loong64.patch
 )
 validpgpkeys=(F3691687D867B81B51CE07D9BBE43771487328A9  # bpiotrowski@archlinux.org
               86CFFCA918CF3AF47147588051E8B148A9999C34  # evangelos@foutrelis.com
@@ -46,7 +47,8 @@ validpgpkeys=(F3691687D867B81B51CE07D9BBE43771487328A9  # bpiotrowski@archlinux.
               D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62) # Jakub Jelinek <jakub@redhat.com>
 sha256sums=('SKIP'
             'de48736f6e4153f03d0a5d38ceb6c6fdb7f054e8f47ddd6af0a3dbf14f27b931'
-            '2513c6d9984dd0a2058557bf00f06d8d5181734e41dcfe07be7ed86f2959622a')
+            '2513c6d9984dd0a2058557bf00f06d8d5181734e41dcfe07be7ed86f2959622a'
+            '4018da43b2cb96145557af4409a009c63c95d4986fd16bee021ea76142615b23')
 
 prepare() {
   [[ ! -d gcc ]] && ln -s gcc-${pkgver/+/-} gcc
@@ -58,6 +60,7 @@ prepare() {
   # Arch Linux installs x86_64 libraries /lib
   sed -i '/m64=/s/lib64/lib/' gcc/config/i386/t-linux64
 
+  patch -p1 -i $srcdir/gcc-12-loong64.patch
   mkdir -p "$srcdir/gcc-build"
 }
 
diff --git a/glslang/PKGBUILD b/glslang/PKGBUILD
index cbe377fb12..df136c8618 100644
--- a/glslang/PKGBUILD
+++ b/glslang/PKGBUILD
@@ -23,7 +23,7 @@ options=('staticlibs')
 source=(${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/glslang/archive/${pkgver}.tar.gz
         ${pkgname}-3420.patch::https://github.com/KhronosGroup/glslang/pull/3420.patch)
 sha256sums=('1c4d0a5a38c8aaf89a2d7e6093be734320599f5a6775b2726beeb05b0c054e66'
-            '8930d3829bae4e0cd911bf63728d4d49d736d261af1e2cef912b769e6fa8373f')
+            '785a72531afe9dc17c88eb24a86c7137e15560f1dc4389aa811c5b0f4979029f')
 
 prepare() {
   cd ${pkgname}-${pkgver}
diff --git a/keybase/PKGBUILD b/keybase/PKGBUILD
index d8ab4a14f3..27240d999d 100644
--- a/keybase/PKGBUILD
+++ b/keybase/PKGBUILD
@@ -27,7 +27,7 @@ sha256sums=('22e5ae4d1f951ea9f3ffc3cb74de9b9f41b828b2c8a4e5cb6401de6fbccf497b'
             'SKIP'
             '7459a6846ff24c2bf7e6ab1ce31880829cf2692f23ffb3bf77e455f4de7ca34e'
             '74fd7a777275bdf2128f121e27f722f692302a50d89c6c1d3ec82df1deaffee3'
-            '0059b988777ecf30a07bb982a164546ff83333cc9d869b0756ab3c034065b8a7')
+            '5a46d9433efb4244509d26fdf04340fb628de1d19a4dff6944510f9bba69d378')
 validpgpkeys=('222B85B0F90BE2D24CFEB93F47484E50656D16C7') # Keybase.io Code Signing (v1) <code@keybase.io>
 
 prepare() {
@@ -73,6 +73,11 @@ build() {
 	export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw"
 
 	export GOPATH="$srcdir/.gopath"
+	go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664
+	go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305
+	go mod edit -replace=github.com/qrtz/nativemessaging=github.com/yetist/nativemessaging@v0.0.0-20240219130319-0aba78239ecd
+	go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef
+	go mod tidy
 	go build -a -tags production -o ./bin/keybase github.com/keybase/client/go/keybase
 	go build -a -tags production -o ./bin/kbnm github.com/keybase/client/go/kbnm
 	go build -a -tags production -o ./bin/kbfsfuse github.com/keybase/client/go/kbfs/kbfsfuse
diff --git a/libretro-mame/PKGBUILD b/libretro-mame/PKGBUILD
index b76a54b16c..061e9c5006 100644
--- a/libretro-mame/PKGBUILD
+++ b/libretro-mame/PKGBUILD
@@ -2,7 +2,7 @@
 # Contributor: Oliver Jaksch <arch-aur@com-in.de>
 
 pkgname=libretro-mame
-pkgver=89040
+pkgver=90047
 pkgrel=1
 arch=(loong64 x86_64)
 pkgdesc='MAME Arcade core'
diff --git a/libvirt/PKGBUILD b/libvirt/PKGBUILD
index 27232c604f..06bdf76b66 100644
--- a/libvirt/PKGBUILD
+++ b/libvirt/PKGBUILD
@@ -86,6 +86,7 @@ source=(
   libvirt-loongarch.patch
 )
 sha256sums=('8ba2e72ec8bdd2418554a1474c42c35704c30174b7611eaf9a16544b71bcf00a'
+            'SKIP'
             'f0562941282b157e2ebba9d203c33f4f9c0f3f93562129448f7de6e5df0575fc')
 validpgpkeys=('453B65310595562855471199CA68BE8010084C9C') # Jiří Denemark <jdenemar@redhat.com>
 
@@ -187,12 +188,3 @@ package_libvirt-storage-iscsi-direct() {
 
   install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_iscsi-direct.so"
 }
-
-package_libvirt-storage-rbd() {
-  pkgdesc="Libvirt RBD storage backend"
-  depends=("libvirt=$pkgver")
-  optdepends=()
-  backup=()
-
-  install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_rbd.so"
-}
diff --git a/libxslt/PKGBUILD b/libxslt/PKGBUILD
index ef0aee7c8f..7489ccaef8 100644
--- a/libxslt/PKGBUILD
+++ b/libxslt/PKGBUILD
@@ -53,7 +53,7 @@ build() {
   cd libxslt
   ./configure "${configure_options[@]}"
   sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool
-  make
+  make V=1
 }
 
 check() {
diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD
index 1c16ec51c9..2b0e16a3c2 100644
--- a/linux-hardened/PKGBUILD
+++ b/linux-hardened/PKGBUILD
@@ -44,10 +44,10 @@ validpgpkeys=(
 )
 # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc
 sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674'
-            '46a1e0e43247d09c5ae29cfa7a79e272767a49b90c5761c2e4a5656a4ced6cf2'
+            '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb'
             'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85')
 b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815'
-        '914edb986d34ddaa20738ec6d4f0d68b2500ee4662be3f58c1f62ecfa87f3ab88205acf91ec7d03d2f925880d538d0b1716183add857d2bff533e5a0d0596ba5'
+        'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025'
         'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6')
 
 export KBUILD_BUILD_HOST=archlinux
diff --git a/llvm/PKGBUILD b/llvm/PKGBUILD
index d4be161983..c56cbbdaad 100644
--- a/llvm/PKGBUILD
+++ b/llvm/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=('llvm' 'llvm-libs')
 pkgver=16.0.6
-pkgrel=1
+pkgrel=2
 arch=('loong64' 'x86_64')
 url="https://llvm.org/"
 license=('custom:Apache 2.0 with LLVM Exception')
diff --git a/lua-yaml/PKGBUILD b/lua-yaml/PKGBUILD
index fec793aaf0..9cc34452e1 100644
--- a/lua-yaml/PKGBUILD
+++ b/lua-yaml/PKGBUILD
@@ -20,7 +20,7 @@ checkdepends=("${_luadeps[@]/#/lua-}"
               lua-lut)
 options=(debug)
 _archive="$_rockname-REL-$pkgver"
-_rock="${_archive/-REL}-$_rockrel.linux-$CARCH.rock"
+_rock="${_archive/-REL}-$_rockrel.linux-`uname -m`.rock"
 _rockspec="${_archive/-REL}-$_rockrel.rockspec"
 source=("https://github.com/lubyk/$_rockname/archive/REL-$pkgver/$_archive.tar.gz")
 sha256sums=('b4391d182677ab644403bf1ac028c7421c2605db124f9792193013c582a273ec')
diff --git a/nsxiv/PKGBUILD b/nsxiv/PKGBUILD
index dbe10b71b0..514d6ddf97 100644
--- a/nsxiv/PKGBUILD
+++ b/nsxiv/PKGBUILD
@@ -15,7 +15,7 @@ depends=('imlib2' 'libx11'                 # core dependencies
          'hicolor-icon-theme')             # make icon
 
 source=("$pkgname-$pkgver.tar.gz"::"https://codeberg.org/nsxiv/nsxiv/archive/v$pkgver.tar.gz")
-sha256sums=('49ef1eb775ef6c34f55dada7a3f446c9c5c6773c9e208509ffef27a656338a90')
+sha256sums=('09d1d72b3cbcf17a04e26beb5e81acc9495aaba1f8f1be907bdcd8e4e3007db3')
 
 prepare() {
   cd "$pkgname"
diff --git a/ocaml/PKGBUILD b/ocaml/PKGBUILD
index a60a990534..91fbe0b6c0 100644
--- a/ocaml/PKGBUILD
+++ b/ocaml/PKGBUILD
@@ -11,15 +11,15 @@ url="https://caml.inria.fr/"
 makedepends=('ncurses>=5.6-7' autoconf)
 optdepends=('ncurses: advanced ncurses features' 'tk: advanced tk features')
 source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz
-ocaml-5.0.0-la64.patch)
+ocaml-5.1.0-la64.patch)
 sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5'
-            'a95f2e02b318183d76b858b0a1d66ad5c23977d72f6d964b95a8851edf4170ed3971602e031842ef04615d2f6b36198f62aa4ff7e57c188af052d45f22192f65')
+            'abb86947fa2c9f1180cb3255c969db67436ef46e04001f39384e37f8560ca257fd5878bb2cb350ee2b490a08fa234922bb14e2bd38962e38e12a71f0b97f5ffd')
 options=('!makeflags' '!emptydirs' 'staticlibs')
 
 
 prepare() {
   cd "${srcdir}/${pkgname}-${pkgver}"
-  patch -p1 -i $srcdir/ocaml-5.0.0-la64.patch
+  patch -p1 -i $srcdir/ocaml-5.1.0-la64.patch
   autoconf
 }
 
@@ -27,7 +27,7 @@ build() {
   cd "${srcdir}/${pkgname}-${pkgver}"
   CFLAGS+=' -ffat-lto-objects'
   CXXFLAGS+=' -ffat-lto-objects'
-  ./configure --prefix /usr --mandir /usr/share/man --enable-frame-pointers
+  ./configure --prefix /usr --mandir /usr/share/man #--enable-frame-pointers
   make --debug=v world.opt
 }
 
diff --git a/ocaml/ocaml-5.1.0-la64.patch b/ocaml/ocaml-5.1.0-la64.patch
index 6f7678ccf2..180bd1dfdb 100644
--- a/ocaml/ocaml-5.1.0-la64.patch
+++ b/ocaml/ocaml-5.1.0-la64.patch
@@ -1,23 +1,56 @@
+diff --git a/.gitignore b/.gitignore
+index 9e7022db80..d7416f9a4b 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -71,6 +71,9 @@ META
+ /asmcomp/reload.ml
+ /asmcomp/scheduling.ml
+ /asmcomp/CSE.ml
++/asmcomp/loongarch64/CSE.ml
++/asmcomp/loongarch64/reload.ml
++/asmcomp/loongarch64/scheduling.ml
+ 
+ /boot/ocamlrun
+ /boot/ocamlruns
+@@ -331,3 +334,4 @@ META
+ /yacc/ocamlyacc
+ /yacc/version.h
+ /yacc/.gdb_history
++
 diff --git a/Makefile b/Makefile
-index bb2c245ea..db03683fb 100644
+index bc12f75dfb..9ec39ef6a7 100644
 --- a/Makefile
 +++ b/Makefile
-@@ -528,6 +528,14 @@ partialclean::
+@@ -31,7 +31,7 @@ include stdlib/StdlibModules
+ 
+ CAMLC = $(BOOT_OCAMLC) $(BOOT_STDLIBFLAGS) -use-prims runtime/primitives
+ CAMLOPT=$(OCAMLRUN) ./ocamlopt$(EXE) $(STDLIBFLAGS) -I otherlibs/dynlink
+-ARCHES=amd64 arm64 power s390x riscv
++ARCHES=amd64 arm64 loongarch64 power s390x riscv
+ VPATH = utils parsing typing bytecomp file_formats lambda middle_end \
+   middle_end/closure middle_end/flambda middle_end/flambda/base_types \
+   asmcomp driver toplevel tools
+@@ -557,8 +557,18 @@ partialclean::
  
  beforedepend:: lambda/runtimedef.ml
  
+-# Choose the right machine-dependent files
++# If any of these loongarch files need to be modified, please copy the
++# corresponding file from asmcomp/riscv64 to asmcomp/loongarch64, delete the
++# corresponding rule below, update the clean target accordingly, and remove
++# the file from .gitignore.
 +asmcomp/loongarch64/CSE.ml: asmcomp/riscv/CSE.ml
 +	cp $< $@
 +asmcomp/loongarch64/reload.ml: asmcomp/riscv/reload.ml
 +	cp $< $@
 +asmcomp/loongarch64/scheduling.ml: asmcomp/riscv/scheduling.ml
 +	cp $< $@
-+
-+
- # Choose the right machine-dependent files
  
- asmcomp/arch.ml: asmcomp/$(ARCH)/arch.ml
-@@ -1031,6 +1039,7 @@ clean::
++# Choose the right machine-dependent files
+ asmcomp/arch.mli: asmcomp/$(ARCH)/arch.mli
+ 	cd asmcomp; $(LN) $(ARCH)/arch.mli .
+ 
+@@ -1061,6 +1071,7 @@ clean::
  	rm -f runtime/domain_state*.inc
  	rm -rf $(DEPDIR)
  	rm -f stdlib/libcamlrun.a stdlib/libcamlrun.lib
@@ -27,14 +60,16 @@ index bb2c245ea..db03683fb 100644
  runtimeopt: stdlib/libasmrun.$(A)
 diff --git a/asmcomp/loongarch64/NOTES.md b/asmcomp/loongarch64/NOTES.md
 new file mode 100644
-index 000000000..f9b63dd62
+index 0000000000..aacca61de0
 --- /dev/null
 +++ b/asmcomp/loongarch64/NOTES.md
-@@ -0,0 +1,11 @@
+@@ -0,0 +1,13 @@
 +# Supported platforms
 +
 +LoongArch in 64-bit mode
 +
++Debian architecture name: `loongarch64`
++
 +# Reference documents
 +
 +* Instruction set specification:
@@ -44,7 +79,7 @@ index 000000000..f9b63dd62
 +  - https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
 diff --git a/asmcomp/loongarch64/arch.ml b/asmcomp/loongarch64/arch.ml
 new file mode 100644
-index 000000000..fee052684
+index 0000000000..44bb39d2ea
 --- /dev/null
 +++ b/asmcomp/loongarch64/arch.ml
 @@ -0,0 +1,96 @@
@@ -63,7 +98,7 @@ index 000000000..fee052684
 +(*                                                                        *)
 +(**************************************************************************)
 +
-+(* Specific operations for the Loongarch processor *)
++(* Specific operations for the LoongArch processor *)
 +
 +open Format
 +
@@ -146,7 +181,7 @@ index 000000000..fee052684
 +let operation_can_raise _ = false
 diff --git a/asmcomp/loongarch64/arch.mli b/asmcomp/loongarch64/arch.mli
 new file mode 100644
-index 000000000..57174fabe
+index 0000000000..57174fabea
 --- /dev/null
 +++ b/asmcomp/loongarch64/arch.mli
 @@ -0,0 +1,76 @@
@@ -228,10 +263,10 @@ index 000000000..57174fabe
 +val operation_can_raise : specific_operation -> bool
 diff --git a/asmcomp/loongarch64/emit.mlp b/asmcomp/loongarch64/emit.mlp
 new file mode 100644
-index 000000000..b80b4f172
+index 0000000000..92a2ab2695
 --- /dev/null
 +++ b/asmcomp/loongarch64/emit.mlp
-@@ -0,0 +1,772 @@
+@@ -0,0 +1,775 @@
 +(**************************************************************************)
 +(*                                                                        *)
 +(*                                 OCaml                                  *)
@@ -285,9 +320,6 @@ index 000000000..b80b4f172
 +
 +(* Output a symbol *)
 +
-+let emit_symbol s =
-+  emit_symbol '$' s
-+
 +let emit_jump op s =
 +  if !Clflags.dlcode || !Clflags.pic_code
 +  then `{emit_string op}	%plt({emit_symbol s})`
@@ -518,6 +550,8 @@ index 000000000..b80b4f172
 +            `	move      {emit_reg dst}, {emit_reg src}\n`
 +        | {loc = Reg _; typ = Float}, {loc = Reg _; typ = Float} ->
 +            `	fmov.d   {emit_reg dst}, {emit_reg src}\n`
++        | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)} ->
++            `	movfr2gr.d {emit_reg dst}, {emit_reg src}\n`
 +        | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} ->
 +            let (base, ofs) = slot_offset env s (register_class dst) in
 +            emit_store src ofs base
@@ -530,7 +564,6 @@ index 000000000..b80b4f172
 +        | {loc = Stack s; typ = Float}, {loc = Reg _} ->
 +            let (base, ofs) = slot_offset env s (register_class src) in
 +            emit_float_load dst ofs base
-+        | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)}
 +        | {loc = Stack _}, {loc = Stack _}
 +        | {loc = Unknown}, _ | _, {loc = Unknown} ->
 +            Misc.fatal_error "Emit: Imove"
@@ -645,7 +678,8 @@ index 000000000..b80b4f172
 +        let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in
 +        emit_addimm reg_alloc_ptr reg_alloc_ptr n;
 +        `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
-+        `	bltu	{emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
++        `   sltu    {emit_reg reg_tmp}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n`;
++        `   bnez    {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`;
 +        `{emit_label lbl_after_alloc}:\n`;
 +        `	addi.d	{emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`;
 +        env.call_gc_sites <-
@@ -711,6 +745,9 @@ index 000000000..b80b4f172
 +          `	sltu	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
 +          `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
 +      end
++  | Lop(Icompf cmp) ->
++      let negated = emit_float_test cmp ~res:i.res.(0) ~arg:i.arg in
++      if negated then `	xori	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`;
 +  | Lop(Iintop (Icheckbound)) ->
 +      let lbl = bound_error_label env i.dbg in
 +      `	bleu	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n`
@@ -875,24 +912,25 @@ index 000000000..b80b4f172
 +    preproc_stack_check
 +      ~fun_body:fundecl.fun_body ~frame_size:(frame_size env) ~trap_size:16
 +  in
-+  let handle_overflow = ref None in
-+  if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
-+    let overflow = new_label () and ret = new_label () in
-+    let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
-+    let f = max_frame_size + threshold_offset in
-+    let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in
-+    `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
-+    emit_addimm reg_tmp reg_tmp f;
-+    `	bltu	$sp, {emit_reg reg_tmp}, {emit_label overflow}\n`;
-+    `{emit_label ret}:\n`;
-+    handle_overflow := Some (overflow, ret)
-+  end;
++  let handle_overflow =
++    if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
++      let overflow = new_label () and ret = new_label () in
++      let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
++      let f = max_frame_size + threshold_offset in
++      let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in
++      `	ld.d	{emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`;
++      emit_addimm reg_tmp reg_tmp f;
++      `	bltu	$sp, {emit_reg reg_tmp}, {emit_label overflow}\n`;
++      `{emit_label ret}:\n`;
++      Some (overflow, ret)
++    end else None
++  in
 +
 +  emit_all env fundecl.fun_body;
 +  List.iter emit_call_gc env.call_gc_sites;
 +  List.iter emit_call_bound_error env.bound_error_sites;
 +
-+  begin match !handle_overflow with
++  begin match handle_overflow with
 +  | None -> ()
 +  | Some (overflow, ret) ->
 +      `{emit_label overflow}:\n`;
@@ -1006,10 +1044,10 @@ index 000000000..b80b4f172
 +     }
 diff --git a/asmcomp/loongarch64/proc.ml b/asmcomp/loongarch64/proc.ml
 new file mode 100644
-index 000000000..62666c748
+index 0000000000..0380761184
 --- /dev/null
 +++ b/asmcomp/loongarch64/proc.ml
-@@ -0,0 +1,319 @@
+@@ -0,0 +1,318 @@
 +# 2 "asmcomp/loongarch64/proc.ml"
 +(**************************************************************************)
 +(*                                                                        *)
@@ -1051,8 +1089,8 @@ index 000000000..62666c748
 +    s0           18        general purpose (preserved by C)
 +    t0, t1       19-20     temporaries (used by call veneers)
 +    s1           21        trap pointer (preserved by C)
-+    s7          22        allocation pointer (preserved by C)
-+    s8          23        domain pointer (preserved by C)
++    s7           22        allocation pointer (preserved by C)
++    s8           23        domain pointer (preserved by C)
 +
 +  Floating-point register map
 +  ---------------------------
@@ -1222,6 +1260,9 @@ index 000000000..62666c748
 +        if !float <= last_float then begin
 +          loc.(i) <- [| phys_reg !float |];
 +          incr float
++        end else if !int <= last_int then begin
++          loc.(i) <- [| phys_reg !int |];
++          incr int
 +        end else begin
 +          loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
 +          ofs := !ofs + size_float
@@ -1241,10 +1282,6 @@ index 000000000..62666c748
 +
 +let loc_exn_bucket = phys_reg 0
 +
-+(* Volatile registers: none *)
-+
-+let regs_are_volatile _ = false
-+
 +(* Registers destroyed by operations *)
 +
 +let destroyed_at_c_noalloc_call =
@@ -1331,7 +1368,7 @@ index 000000000..62666c748
 +let init () = ()
 diff --git a/asmcomp/loongarch64/selection.ml b/asmcomp/loongarch64/selection.ml
 new file mode 100644
-index 000000000..be29364c1
+index 0000000000..be29364c16
 --- /dev/null
 +++ b/asmcomp/loongarch64/selection.ml
 @@ -0,0 +1,70 @@
@@ -1406,13 +1443,13 @@ index 000000000..be29364c1
 +let fundecl ~future_funcnames f =
 +  (new selector)#emit_fundecl ~future_funcnames f
 diff --git a/configure b/configure
-index 19764d19a..6415b4cc1 100755
+index 4c3b5fda20..f748759c9a 100755
 Binary files a/configure and b/configure differ
 diff --git a/configure.ac b/configure.ac
-index a7974b042..069a931d7 100644
+index aba3569f7c..335f1fbe00 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -1079,7 +1079,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'],
+@@ -1163,7 +1163,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'],
      [aarch64-*-freebsd*], [natdynlink=true],
      [aarch64-*-openbsd*], [natdynlink=true],
      [aarch64-*-netbsd*], [natdynlink=true],
@@ -1422,18 +1459,18 @@ index a7974b042..069a931d7 100644
  
  AS_CASE([$enable_native_toplevel,$natdynlink],
    [yes,false],
-@@ -1199,7 +1200,9 @@ AS_CASE([$host],
+@@ -1285,7 +1286,9 @@ AS_CASE([$host],
    [x86_64-*-cygwin*],
-     [arch=amd64; system=cygwin],
+     [has_native_backend=yes; arch=amd64; system=cygwin],
    [riscv64-*-linux*],
--    [arch=riscv; model=riscv64; system=linux]
-+    [arch=riscv; model=riscv64; system=linux],
+-    [has_native_backend=yes; arch=riscv; model=riscv64; system=linux]
++    [has_native_backend=yes; arch=riscv; model=riscv64; system=linux],
 +  [loongarch64-*-linux*],
 +    [has_native_backend=yes; arch=loongarch64; system=linux]
  )
  
- AS_CASE([$ccomptype],
-@@ -1302,7 +1305,7 @@ default_aspp="$CC -c"
+ native_cflags=''
+@@ -1394,7 +1397,7 @@ default_aspp="$CC -c"
  AS_CASE([$as_target,$ocaml_cv_cc_vendor],
    [*-*-linux*,gcc-*],
      [AS_CASE([$as_cpu],
@@ -1442,7 +1479,7 @@ index a7974b042..069a931d7 100644
          [default_as="${toolpref}as"])],
    [i686-pc-windows,*],
      [default_as="ml -nologo -coff -Cp -c -Fo"
-@@ -1940,7 +1943,7 @@ AS_IF([$native_compiler],
+@@ -2073,7 +2076,7 @@ AS_IF([$native_compiler],
  
  AS_IF([test x"$enable_frame_pointers" = "xyes"],
    [AS_CASE(["$host,$cc_basename"],
@@ -1452,11 +1489,11 @@ index a7974b042..069a931d7 100644
        frame_pointers=true
        AC_DEFINE([WITH_FRAME_POINTERS])
 diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h
-index 0c2e0b2fe..ebdc1d55a 100644
+index d595abd0da..59d72a0aca 100644
 --- a/runtime/caml/stack.h
 +++ b/runtime/caml/stack.h
-@@ -70,6 +70,17 @@
- #define Saved_return_address(sp) *((intnat *)((sp) - 8))
+@@ -75,6 +75,17 @@
+ #define Pop_frame_pointer(sp) sp += sizeof(value)
  #endif
  
 +#ifdef TARGET_loongarch64
@@ -1475,7 +1512,7 @@ index 0c2e0b2fe..ebdc1d55a 100644
  extern intnat caml_globals_inited;
 diff --git a/runtime/loongarch64.S b/runtime/loongarch64.S
 new file mode 100644
-index 000000000..d2289f821
+index 0000000000..e51eadb940
 --- /dev/null
 +++ b/runtime/loongarch64.S
 @@ -0,0 +1,827 @@
@@ -2294,21 +2331,21 @@ index 000000000..d2289f821
 +
 +/* GC roots for callback */
 +
-+
-+        .section .data
++OBJECT(caml_system.frametable)
++        .quad   2                 /* two descriptors */
++        .quad   L(caml_retaddr)   /* return address into callback */
++        .short  -1                /* negative frame size => use callback link */
++        .short  0                 /* no roots */
 +        .align  3
-+        .globl  caml_system__frametable
-+        .type   caml_system__frametable, @object
-+caml_system__frametable:
-+        .quad   1               /* one descriptor */
-+        .quad   .Lcaml_retaddr  /* return address into callback */
-+        .short  -1              /* negative frame size => use callback link */
-+        .short  0               /* no roots */
++        .quad   L(frame_runstack) /* return address into fiber handler */
++        .short  -1                /* negative frame size => use callback link */
++        .short  0                 /* no roots */
 +        .align  3
-+        .size   caml_system__frametable, .-caml_system__frametable
++END_OBJECT(caml_system.frametable)
++.end
 diff --git a/testsuite/tools/asmgen_loongarch64.S b/testsuite/tools/asmgen_loongarch64.S
 new file mode 100644
-index 000000000..97fbeae04
+index 0000000000..97fbeae046
 --- /dev/null
 +++ b/testsuite/tools/asmgen_loongarch64.S
 @@ -0,0 +1,75 @@

From a8fbe115f1ee44849b715215f977911a5eeaf8dd Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Wed, 3 Apr 2024 18:01:48 +0800
Subject: [PATCH 16/23] trojan

---
 trojan/PKGBUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trojan/PKGBUILD b/trojan/PKGBUILD
index 7a73ad018a..6fcef08e1e 100644
--- a/trojan/PKGBUILD
+++ b/trojan/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=trojan
 pkgver=1.16.0
-pkgrel=10
+pkgrel=11
 pkgdesc="An unidentifiable mechanism that helps you bypass GFW"
 arch=('loong64' 'x86_64')
 url="https://github.com/trojan-gfw/trojan"

From ad5e346aa753ca9f34ed4bbc401b54be8743b876 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Wed, 24 Apr 2024 10:56:47 +0800
Subject: [PATCH 17/23] update

---
 apr-util/PKGBUILD              | 2 +-
 atril/PKGBUILD                 | 2 +-
 enchant/PKGBUILD               | 2 +-
 eog/PKGBUILD                   | 2 +-
 evince/PKGBUILD                | 2 +-
 fcitx5-chinese-addons/PKGBUILD | 2 +-
 fcitx5-configtool/PKGBUILD     | 2 +-
 firefox/PKGBUILD               | 2 +-
 frei0r-plugins/PKGBUILD        | 2 +-
 gdb/PKGBUILD                   | 2 +-
 geary/PKGBUILD                 | 2 +-
 gedit/PKGBUILD                 | 2 +-
 gnome-software/PKGBUILD        | 2 +-
 graphicsmagick/PKGBUILD        | 2 +-
 graphviz/PKGBUILD              | 2 +-
 imagemagick/PKGBUILD           | 2 +-
 imath/PKGBUILD                 | 2 +-
 imlib2/PKGBUILD                | 2 +-
 libcmis/PKGBUILD               | 2 +-
 libheif/PKGBUILD               | 2 +-
 libime/PKGBUILD                | 2 +-
 libixion/PKGBUILD              | 2 +-
 liborcus/PKGBUILD              | 2 +-
 libpeas-2/PKGBUILD             | 2 +-
 libpeas/PKGBUILD               | 2 +-
 libreoffice-fresh/PKGBUILD     | 5 ++++-
 lm_sensors/PKGBUILD            | 2 +-
 mate-applets/PKGBUILD          | 4 ++--
 mutter/PKGBUILD                | 2 +-
 notepadqq/PKGBUILD             | 2 +-
 obs-studio/PKGBUILD            | 2 +-
 qt5-base/PKGBUILD              | 2 +-
 qt6-base/PKGBUILD              | 2 +-
 remmina/PKGBUILD               | 2 +-
 samba/PKGBUILD                 | 2 +-
 sdl2_image/PKGBUILD            | 2 +-
 source-highlight/PKGBUILD      | 2 +-
 subversion/PKGBUILD            | 2 +-
 thunderbird/PKGBUILD           | 2 +-
 tracker3-miners/PKGBUILD       | 2 +-
 vlc/PKGBUILD                   | 2 +-
 weston/PKGBUILD                | 2 +-
 workrave/PKGBUILD              | 2 +-
 43 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/apr-util/PKGBUILD b/apr-util/PKGBUILD
index 4f01001b24..ee6e36aa12 100644
--- a/apr-util/PKGBUILD
+++ b/apr-util/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=apr-util
 pkgver=1.6.3
-pkgrel=2
+pkgrel=3
 pkgdesc="The Apache Portable Runtime"
 arch=('loong64' 'x86_64')
 url="https://apr.apache.org/"
diff --git a/atril/PKGBUILD b/atril/PKGBUILD
index 0443df48a2..1ed46496ef 100644
--- a/atril/PKGBUILD
+++ b/atril/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=atril
 pkgver=1.26.1
-pkgrel=1
+pkgrel=2
 pkgdesc="MATE document viewer"
 url="https://mate-desktop.org"
 arch=('loong64' 'x86_64')
diff --git a/enchant/PKGBUILD b/enchant/PKGBUILD
index 691f2e6da1..20b6fb1b97 100644
--- a/enchant/PKGBUILD
+++ b/enchant/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=enchant
 pkgver=2.6.5
-pkgrel=1
+pkgrel=2
 pkgdesc="A wrapper library for generic spell checking"
 arch=('loong64' 'x86_64')
 url="https://abiword.github.io/enchant/"
diff --git a/eog/PKGBUILD b/eog/PKGBUILD
index 85990855d9..8a391bec03 100644
--- a/eog/PKGBUILD
+++ b/eog/PKGBUILD
@@ -8,7 +8,7 @@ pkgname=(
   eog-docs
 )
 pkgver=45.2
-pkgrel=1
+pkgrel=2
 pkgdesc="Eye of Gnome: An image viewing and cataloging program"
 url="https://wiki.gnome.org/Apps/EyeOfGnome"
 arch=(loong64 x86_64)
diff --git a/evince/PKGBUILD b/evince/PKGBUILD
index 2638f181b7..b26f161ad5 100644
--- a/evince/PKGBUILD
+++ b/evince/PKGBUILD
@@ -8,7 +8,7 @@ pkgname=(
   evince-lib-docs
 )
 pkgver=45.0
-pkgrel=1
+pkgrel=2
 pkgdesc="Document viewer (PDF, PostScript, XPS, djvu, dvi, tiff, cbr, cbz, cb7, cbt)"
 url="https://wiki.gnome.org/Apps/Evince"
 arch=(loong64 x86_64)
diff --git a/fcitx5-chinese-addons/PKGBUILD b/fcitx5-chinese-addons/PKGBUILD
index 01eb6db0d7..82a5bf946c 100644
--- a/fcitx5-chinese-addons/PKGBUILD
+++ b/fcitx5-chinese-addons/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=fcitx5-chinese-addons
 pkgver=5.1.3
-pkgrel=1
+pkgrel=2
 pkgdesc="Addons related to Chinese, including IME previous bundled inside fcitx4"
 arch=('loong64' 'x86_64')
 url="https://github.com/fcitx/fcitx5-chinese-addons"
diff --git a/fcitx5-configtool/PKGBUILD b/fcitx5-configtool/PKGBUILD
index a45cce2416..dcd20e82c1 100644
--- a/fcitx5-configtool/PKGBUILD
+++ b/fcitx5-configtool/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=fcitx5-configtool
 pkgver=5.1.3
-pkgrel=1
+pkgrel=2
 pkgdesc="Configuration Tool for Fcitx5"
 arch=('loong64' 'x86_64')
 url="https://github.com/fcitx/fcitx5-configtool"
diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD
index 26f4516441..02224113f2 100644
--- a/firefox/PKGBUILD
+++ b/firefox/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=firefox
 pkgver=122.0
-pkgrel=1
+pkgrel=2
 pkgdesc="Standalone web browser from mozilla.org"
 url="https://www.mozilla.org/firefox/"
 arch=(loong64 x86_64)
diff --git a/frei0r-plugins/PKGBUILD b/frei0r-plugins/PKGBUILD
index d2911cab2c..d498d1f134 100644
--- a/frei0r-plugins/PKGBUILD
+++ b/frei0r-plugins/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=frei0r-plugins
 pkgver=2.3.2
-pkgrel=2
+pkgrel=3
 pkgdesc='Collection of video effect plugins'
 arch=('loong64' 'x86_64')
 url='https://frei0r.dyne.org/'
diff --git a/gdb/PKGBUILD b/gdb/PKGBUILD
index b8b36af61d..efa638e64f 100644
--- a/gdb/PKGBUILD
+++ b/gdb/PKGBUILD
@@ -8,7 +8,7 @@ pkgbase=gdb
 # of gdb (for arm/avr/...)
 pkgname=(gdb gdb-common)
 pkgver=14.1
-pkgrel=1
+pkgrel=2
 pkgdesc='The GNU Debugger'
 arch=(loong64 x86_64)
 url='https://www.gnu.org/software/gdb/'
diff --git a/geary/PKGBUILD b/geary/PKGBUILD
index cffd5c1427..d439433ea2 100644
--- a/geary/PKGBUILD
+++ b/geary/PKGBUILD
@@ -5,7 +5,7 @@
 
 pkgname=geary
 pkgver=44.1
-pkgrel=2
+pkgrel=3
 epoch=1
 pkgdesc='A lightweight email client for the GNOME desktop'
 arch=(loong64 x86_64)
diff --git a/gedit/PKGBUILD b/gedit/PKGBUILD
index 105eb1d054..1810e1d02f 100644
--- a/gedit/PKGBUILD
+++ b/gedit/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=gedit
 pkgver=46.1
-pkgrel=1
+pkgrel=2
 pkgdesc="GNOME Text Editor"
 url="https://wiki.gnome.org/Apps/Gedit"
 arch=(loong64 x86_64)
diff --git a/gnome-software/PKGBUILD b/gnome-software/PKGBUILD
index 8a0e45bdff..12c4687868 100644
--- a/gnome-software/PKGBUILD
+++ b/gnome-software/PKGBUILD
@@ -6,7 +6,7 @@
 
 pkgname=gnome-software
 pkgver=45.3
-pkgrel=1
+pkgrel=2
 pkgdesc="GNOME Software Tools"
 url="https://wiki.gnome.org/Apps/Software/"
 arch=(loong64 x86_64)
diff --git a/graphicsmagick/PKGBUILD b/graphicsmagick/PKGBUILD
index 63596ab251..14cada5322 100644
--- a/graphicsmagick/PKGBUILD
+++ b/graphicsmagick/PKGBUILD
@@ -6,7 +6,7 @@
 
 pkgname=graphicsmagick
 pkgver=1.3.42
-pkgrel=2
+pkgrel=3
 pkgdesc='Image processing system'
 url='http://www.graphicsmagick.org/'
 arch=(loong64 x86_64)
diff --git a/graphviz/PKGBUILD b/graphviz/PKGBUILD
index eba587dbe4..cb984b3716 100644
--- a/graphviz/PKGBUILD
+++ b/graphviz/PKGBUILD
@@ -5,7 +5,7 @@
 
 pkgname=graphviz
 pkgver=9.0.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Graph visualization software'
 url='https://www.graphviz.org/'
 license=('EPL')
diff --git a/imagemagick/PKGBUILD b/imagemagick/PKGBUILD
index b03e93833a..f3ede7cde8 100644
--- a/imagemagick/PKGBUILD
+++ b/imagemagick/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=imagemagick
 pkgver=7.1.1.27
-pkgrel=1
+pkgrel=2
 _relname=ImageMagick-${pkgver%%.*}
 _tarname=ImageMagick-${pkgver%.*}-${pkgver##*.}
 pkgdesc='An image viewing/manipulation program'
diff --git a/imath/PKGBUILD b/imath/PKGBUILD
index bda655e067..6c558e605e 100644
--- a/imath/PKGBUILD
+++ b/imath/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=imath
 pkgver=3.1.10
-pkgrel=1
+pkgrel=2
 pkgdesc='A C++ and python library of 2D and 3D vector, matrix, and math operations for computer graphics'
 url='https://www.openexr.com/'
 arch=(loong64 x86_64)
diff --git a/imlib2/PKGBUILD b/imlib2/PKGBUILD
index 2f52a13b07..adf7e56e7e 100644
--- a/imlib2/PKGBUILD
+++ b/imlib2/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=imlib2
 pkgver=1.12.1
-pkgrel=2
+pkgrel=3
 pkgdesc='Library that does image file loading and saving as well as rendering, manipulation, arbitrary polygon support'
 url='https://sourceforge.net/projects/enlightenment/'
 arch=('loong64' 'x86_64')
diff --git a/libcmis/PKGBUILD b/libcmis/PKGBUILD
index 6ad4955547..37586ee8a9 100644
--- a/libcmis/PKGBUILD
+++ b/libcmis/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=libcmis
 pkgver=0.6.2
-pkgrel=1
+pkgrel=2
 pkgdesc="a C/C++ client library for the CMIS protocol"
 arch=('loong64' 'x86_64')
 url="https://github.com/tdf/libcmis"
diff --git a/libheif/PKGBUILD b/libheif/PKGBUILD
index e334ed160d..1a2050c90f 100644
--- a/libheif/PKGBUILD
+++ b/libheif/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=libheif
 pkgver=1.17.6
-pkgrel=3
+pkgrel=4
 pkgdesc='An HEIF and AVIF file format decoder and encoder'
 arch=(loong64 x86_64)
 url='https://github.com/strukturag/libheif'
diff --git a/libime/PKGBUILD b/libime/PKGBUILD
index 055609d636..3679e35913 100644
--- a/libime/PKGBUILD
+++ b/libime/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=libime
 pkgver=1.1.5
-pkgrel=1
+pkgrel=2
 pkgdesc="A library to support generic input method implementation"
 arch=('loong64' 'x86_64')
 url="https://github.com/fcitx/libime"
diff --git a/libixion/PKGBUILD b/libixion/PKGBUILD
index 41e8a9cd74..848f399053 100644
--- a/libixion/PKGBUILD
+++ b/libixion/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=libixion
 pkgver=0.19.0
-pkgrel=1
+pkgrel=2
 pkgdesc="A general purpose formula parser & interpreter"
 arch=('loong64' 'x86_64')
 url="https://gitlab.com/ixion/ixion/blob/master/README.md"
diff --git a/liborcus/PKGBUILD b/liborcus/PKGBUILD
index ee32a3cc72..57ecc1604c 100644
--- a/liborcus/PKGBUILD
+++ b/liborcus/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=liborcus
 pkgver=0.19.2
-pkgrel=1
+pkgrel=2
 pkgdesc="File import filter library for spreadsheet documents."
 arch=('loong64' 'x86_64')
 url="https://gitlab.com/orcus/orcus/blob/master/README.md"
diff --git a/libpeas-2/PKGBUILD b/libpeas-2/PKGBUILD
index a4a05b771d..b9a11293b4 100644
--- a/libpeas-2/PKGBUILD
+++ b/libpeas-2/PKGBUILD
@@ -7,7 +7,7 @@ pkgname=(
   libpeas-2-docs
 )
 pkgver=2.0.1
-pkgrel=1
+pkgrel=2
 pkgdesc="GObject Plugin System"
 url="https://wiki.gnome.org/Projects/Libpeas"
 arch=(loong64 x86_64)
diff --git a/libpeas/PKGBUILD b/libpeas/PKGBUILD
index 90017bfba8..84bbddd9f4 100644
--- a/libpeas/PKGBUILD
+++ b/libpeas/PKGBUILD
@@ -8,7 +8,7 @@ pkgname=(
   libpeas-docs
 )
 pkgver=1.36.0
-pkgrel=4
+pkgrel=5
 pkgdesc="GObject Plugin System"
 url="https://wiki.gnome.org/Projects/Libpeas"
 arch=(loong64 x86_64)
diff --git a/libreoffice-fresh/PKGBUILD b/libreoffice-fresh/PKGBUILD
index ea641d0b5f..a4ed0ec1e5 100644
--- a/libreoffice-fresh/PKGBUILD
+++ b/libreoffice-fresh/PKGBUILD
@@ -12,7 +12,7 @@ pkgbase=libreoffice-fresh
 pkgname=('libreoffice-fresh-sdk' 'libreoffice-fresh')
 _LOver=7.6.4.1
 pkgver=7.6.4
-pkgrel=2
+pkgrel=3
 arch=('loong64' 'x86_64')
 license=('LGPL3')
 url="https://www.libreoffice.org/"
@@ -58,6 +58,7 @@ source=(${_mirror}/libreoffice{,-help,-translations}-${_LOver}.tar.xz{,.asc}
 	${_additional_source_url2}/odfvalidator-1.2.0-incubating-SNAPSHOT-jar-with-dependencies-971c54fd38a968f5860014b44301872706f9e540.jar # for test suite
 	${_additional_source_url2}/f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140-opens___.ttf
 	${_additional_source_url2}/185d60944ea767075d27247c3162b3bc-unowinreg.dll
+	https://gitweb.gentoo.org/repo/gentoo.git/plain/app-office/libreoffice/files/libreoffice-7.6.5.2-gcc14.patch
 	make-pyuno-work-with-system-wide-module-install.diff
         623ea5c.diff
         fix-build-against-system-libxml-2.12.diff
@@ -110,6 +111,7 @@ sha256sums=('13fea7b8f24c776313b9e08628aa590390bea45064be73bc70ee7b1b70aa6a1e'
             '984f2a479df79e27e7b01a5815ac53ae64e07746b882262d8a64566494515504'
             'f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140'
             'eafde646a7dbe46d20c291685b0beac2382174d78d66ee990e229a1bf6e6cec6'
+            '17cc24b1d4d47562ea28bd89b302d40c1e112494a1a6b52c64e3a5292c18453f'
             'c463654a73ecfbc242ff109726fb4faecdbfb3d91affafe919b24bea65afb563'
             '440c9af5f3d1213d8ed7177282380f25cbc981cabc8b590dcb777aaae84178e5'
             '793a52abff29b3db51a1db9686b561911b9b3de70bd6dd02bbc1d78fcd960648'
@@ -145,6 +147,7 @@ prepare() {
 
         # fix build with icu 74
         patch -Np1 -i "${srcdir}"/libreoffice-7.5.8.2-icu-74-compatibility.patch
+	patch -Np1 -i ${srcdir}/libreoffice-7.6.5.2-gcc14.patch
 
 	#use the CFLAGS but remove the LibO overridden ones
 	for i in $CFLAGS; do
diff --git a/lm_sensors/PKGBUILD b/lm_sensors/PKGBUILD
index 28f4e15ee8..1f1203f520 100644
--- a/lm_sensors/PKGBUILD
+++ b/lm_sensors/PKGBUILD
@@ -5,7 +5,7 @@ pkgname=lm_sensors
 pkgver=3.6.0.r41.g31d1f125
 _commit=31d1f125d8076f1c8c8f3224b31d240e6e6a1763
 #_pkgver=${pkgver//./-}
-pkgrel=5
+pkgrel=6
 epoch=1
 pkgdesc="Collection of user space tools for general SMBus access and hardware monitoring"
 arch=('loong64' 'x86_64')
diff --git a/mate-applets/PKGBUILD b/mate-applets/PKGBUILD
index b7f9c4386a..0d21b6e187 100644
--- a/mate-applets/PKGBUILD
+++ b/mate-applets/PKGBUILD
@@ -4,13 +4,13 @@
 
 pkgname=mate-applets
 pkgver=1.26.1
-pkgrel=3
+pkgrel=4
 pkgdesc="Applets for MATE panel"
 arch=('loong64' 'x86_64')
 url="https://mate-desktop.org"
 license=('GPL')
 depends=('gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools')
-makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools')
+makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools' 'libnl')
 optdepends=('fortune-mod: for displaying fortune cookies in the Wanda the Fish applet' 'gucharmap: character picker applet')
 groups=('mate-extra')
 conflicts=('mate-applets-gtk3' 'mate-netspeed' 'mate-netspeed-gtk3')
diff --git a/mutter/PKGBUILD b/mutter/PKGBUILD
index 6c534c147d..f0f46c8734 100644
--- a/mutter/PKGBUILD
+++ b/mutter/PKGBUILD
@@ -8,7 +8,7 @@ pkgname=(
   mutter-docs
 )
 pkgver=45.3
-pkgrel=1
+pkgrel=2
 pkgdesc="Window manager and compositor for GNOME"
 url="https://gitlab.gnome.org/GNOME/mutter"
 arch=(loong64 x86_64)
diff --git a/notepadqq/PKGBUILD b/notepadqq/PKGBUILD
index 575827b024..81552cf671 100644
--- a/notepadqq/PKGBUILD
+++ b/notepadqq/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=notepadqq
 pkgver=2.0.0beta
-pkgrel=1
+pkgrel=2
 pkgdesc='Notepad++-like text editor for Linux'
 arch=('loong64' 'x86_64')
 url='https://notepadqq.com/'
diff --git a/obs-studio/PKGBUILD b/obs-studio/PKGBUILD
index 8f1d16ed9a..4846ce2aec 100644
--- a/obs-studio/PKGBUILD
+++ b/obs-studio/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=obs-studio
 pkgver=30.0.2
-pkgrel=3
+pkgrel=4
 pkgdesc="Free, open source software for live streaming and recording"
 arch=('loong64' 'x86_64')
 url="https://obsproject.com"
diff --git a/qt5-base/PKGBUILD b/qt5-base/PKGBUILD
index ac235d4b46..327c50ead9 100644
--- a/qt5-base/PKGBUILD
+++ b/qt5-base/PKGBUILD
@@ -5,7 +5,7 @@ pkgbase=qt5-base
 pkgname=(qt5-base qt5-xcb-private-headers)
 _basever=5.15.12
 pkgver=5.15.12+kde+r149
-pkgrel=1
+pkgrel=2
 _commit=2eb258f8548ed2218d5b2041c15b7359e99f475f
 arch=('loong64' 'x86_64')
 url='https://www.qt.io'
diff --git a/qt6-base/PKGBUILD b/qt6-base/PKGBUILD
index 27e7b3f346..52c4f85229 100644
--- a/qt6-base/PKGBUILD
+++ b/qt6-base/PKGBUILD
@@ -5,7 +5,7 @@
 pkgname=qt6-base
 _qtver=6.6.1
 pkgver=${_qtver/-/}
-pkgrel=3
+pkgrel=4
 arch=(loong64 x86_64)
 url='https://www.qt.io'
 license=(GPL3 LGPL3 FDL custom)
diff --git a/remmina/PKGBUILD b/remmina/PKGBUILD
index c5852b4d9c..279f91870c 100644
--- a/remmina/PKGBUILD
+++ b/remmina/PKGBUILD
@@ -4,7 +4,7 @@
 pkgname=remmina
 epoch=1
 pkgver=1.4.33
-pkgrel=3
+pkgrel=4
 pkgdesc="remote desktop client written in GTK+"
 url="https://www.remmina.org/"
 arch=('loong64' 'x86_64')
diff --git a/samba/PKGBUILD b/samba/PKGBUILD
index 1eaf4e6938..4250a2013b 100644
--- a/samba/PKGBUILD
+++ b/samba/PKGBUILD
@@ -10,7 +10,7 @@
 pkgbase=samba
 pkgname=('libwbclient' 'smbclient' 'samba')
 pkgver=4.19.4
-pkgrel=1
+pkgrel=2
 arch=(loong64 x86_64)
 url="https://www.samba.org"
 license=('GPL-3.0-or-later')
diff --git a/sdl2_image/PKGBUILD b/sdl2_image/PKGBUILD
index 48920ecbc4..8194d845d1 100644
--- a/sdl2_image/PKGBUILD
+++ b/sdl2_image/PKGBUILD
@@ -1,7 +1,7 @@
 # Maintainer: Sven-Hendrik Haase <svenstaro@archlinux.org>
 pkgname=sdl2_image
 pkgver=2.8.2
-pkgrel=2
+pkgrel=3
 pkgdesc="A simple library to load images of various formats as SDL surfaces (Version 2)"
 arch=('loong64' 'x86_64')
 url="https://github.com/libsdl-org/SDL_image"
diff --git a/source-highlight/PKGBUILD b/source-highlight/PKGBUILD
index 85d564105f..5266ddf214 100644
--- a/source-highlight/PKGBUILD
+++ b/source-highlight/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=source-highlight
 pkgver=3.1.9
-pkgrel=11
+pkgrel=12
 pkgdesc="Convert source code to syntax highlighted document"
 arch=('loong64' 'x86_64')
 url="https://www.gnu.org/software/src-highlite/"
diff --git a/subversion/PKGBUILD b/subversion/PKGBUILD
index 5521e792b3..13d31ce45d 100644
--- a/subversion/PKGBUILD
+++ b/subversion/PKGBUILD
@@ -6,7 +6,7 @@
 
 pkgname=subversion
 pkgver=1.14.2
-pkgrel=13
+pkgrel=14
 pkgdesc="A Modern Concurrent Version Control System"
 arch=('loong64' 'x86_64')
 url="https://subversion.apache.org/"
diff --git a/thunderbird/PKGBUILD b/thunderbird/PKGBUILD
index 8e6ad4a43a..858622f412 100644
--- a/thunderbird/PKGBUILD
+++ b/thunderbird/PKGBUILD
@@ -8,7 +8,7 @@
 pkgbase=thunderbird
 pkgname=(thunderbird)
 pkgver=115.7.0
-pkgrel=2
+pkgrel=3
 pkgdesc='Standalone mail and news reader from mozilla.org'
 url='https://www.thunderbird.net/'
 arch=(loong64 x86_64)
diff --git a/tracker3-miners/PKGBUILD b/tracker3-miners/PKGBUILD
index d76ee3f594..9a0a7aca19 100644
--- a/tracker3-miners/PKGBUILD
+++ b/tracker3-miners/PKGBUILD
@@ -2,7 +2,7 @@
 
 pkgname=tracker3-miners
 pkgver=3.6.2
-pkgrel=2
+pkgrel=3
 pkgdesc="Filesystem indexer and metadata extractor"
 url="https://tracker.gnome.org/"
 arch=(loong64 x86_64)
diff --git a/vlc/PKGBUILD b/vlc/PKGBUILD
index e0a6b7e000..048ea0dd83 100644
--- a/vlc/PKGBUILD
+++ b/vlc/PKGBUILD
@@ -8,7 +8,7 @@ _vlcver=3.0.20
 # optional fixup version including hyphen
 _vlcfixupver=
 pkgver=${_vlcver}${_vlcfixupver//-/.r}
-pkgrel=7
+pkgrel=8
 pkgdesc='Multi-platform MPEG, VCD/DVD, and DivX player'
 url='https://www.videolan.org/vlc/'
 arch=('loong64' 'x86_64')
diff --git a/weston/PKGBUILD b/weston/PKGBUILD
index 942a683086..9b3b899533 100644
--- a/weston/PKGBUILD
+++ b/weston/PKGBUILD
@@ -3,7 +3,7 @@
 
 pkgname=weston
 pkgver=13.0.0
-pkgrel=1
+pkgrel=2
 pkgdesc='Reference implementation of a Wayland compositor'
 arch=('loong64' 'x86_64')
 url='https://wayland.freedesktop.org/'
diff --git a/workrave/PKGBUILD b/workrave/PKGBUILD
index 7f555b9aef..a5c5ff1e43 100644
--- a/workrave/PKGBUILD
+++ b/workrave/PKGBUILD
@@ -4,7 +4,7 @@
 
 pkgname=workrave
 pkgver=1.10.52
-pkgrel=3
+pkgrel=4
 pkgdesc="Assist in the recovery and prevention of Repetitive Strain Injury (RSI)"
 arch=('loong64' 'x86_64')
 url="https://workrave.org/"

From 3385cca4df3da68db3ed8d9fb444f913d5df8ba1 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Wed, 24 Apr 2024 16:44:29 +0800
Subject: [PATCH 18/23] gnu-efi

---
 gnu-efi/PKGBUILD                  | 16 +++---
 gnu-efi/gnu-efi-3.0.17-la64.patch | 89 -------------------------------
 2 files changed, 6 insertions(+), 99 deletions(-)
 delete mode 100644 gnu-efi/gnu-efi-3.0.17-la64.patch

diff --git a/gnu-efi/PKGBUILD b/gnu-efi/PKGBUILD
index c894b4c940..f10eb49d91 100644
--- a/gnu-efi/PKGBUILD
+++ b/gnu-efi/PKGBUILD
@@ -1,27 +1,23 @@
 # Maintainer: David Runge <dvzrv@archlinux.org>
 
 pkgname=gnu-efi
-pkgver=3.0.17
-pkgrel=3
+pkgver=3.0.18
+pkgrel=1
 pkgdesc="Develop EFI applications using the GNU toolchain and the EFI development environment"
 arch=(loong64 x86_64)
 url="https://sourceforge.net/projects/gnu-efi/"
-license=(BSD)
+license=(BSD-2-Clause)
 conflicts=(gnu-efi-libs)
 provides=(gnu-efi-libs)
 replaces=(gnu-efi-libs)
-source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2
-        "gnu-efi-3.0.17-la64.patch")
+source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2)
 options=(!lto !strip)
-sha512sums=('0893ca234272584f889b1ae1c75341a9ceee60acfd32765daa5d704191ba00450536a287b949304c6d055d1bf125cc29e24fc41df8e5230e0da4f9d944876512'
-            'cbeb446d4e3f3b7169b798c8014aedc30e5bc3d576856ebd69af7d9ee5277f99e16709687b1140c2eac3a2edddce49aa4a5d4d91a0e1ce408c3b7fe134a57ca7')
-b2sums=('27f8171b411a6a8a138d44d91c7e4e4291aa399562825d51a398913572119482ffeb303d7508ae13eacd2cd10b8f5098405ab16eb56243587efe93235f661285'
-        '429d8a968edc6deb5e73a4faabb523dfb490a173d7c48a9270e1ccd5758d0262e9eb39bb47056375f07bd3f8ff7a4d83a3977565538d5e039652e9672220e9b1')
+sha512sums=('39f9fa14b880441a94a04400ff8850efdd9474929e5501dfd05af06e7747b4d0f7cb742ac811c7026cf52d00508efb73018be4d61d63a1211de0cd931cbc473d')
+b2sums=('e080fa4c57a281452a6473304871304d1b5c30d42ee728b4c0c084258ed2f6f2099c068ec5841cee81ecf664dd658dee3b94d68324ebaa498cb49cec4f7f7df9')
 
 prepare() {
   # -Werror, not even once
   sed -e 's/-Werror//g' -i $pkgname-$pkgver/Make.defaults
-  patch -d $pkgname-$pkgver -Np1 -i "../gnu-efi-3.0.17-la64.patch"
 }
 
 build() {
diff --git a/gnu-efi/gnu-efi-3.0.17-la64.patch b/gnu-efi/gnu-efi-3.0.17-la64.patch
deleted file mode 100644
index 9d979c8434..0000000000
--- a/gnu-efi/gnu-efi-3.0.17-la64.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-diff --git a/gnuefi/elf_loongarch64_efi.lds b/gnuefi/elf_loongarch64_efi.lds
-index e7b4d6b..7a212cd 100644
---- a/gnuefi/elf_loongarch64_efi.lds
-+++ b/gnuefi/elf_loongarch64_efi.lds
-@@ -15,6 +15,7 @@ SECTIONS
-   }
-   _etext = .;
-   _text_size = . - _text;
-+  . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
-   .dynamic  : { *(.dynamic) }
-   .data : ALIGN(4096)
-   {
-@@ -33,16 +34,27 @@ SECTIONS
-    *(.sbss)
-    *(.scommon)
-    *(.dynbss)
--   *(.bss)
-+   *(.bss*)
-    *(COMMON)
-    . = ALIGN(16);
-    _bss_end = .;
-   }
- 
--  .rela.dyn : { *(.rela.dyn) }
-+  . = ALIGN(4096);
-+  .rela :
-+  {
-+    *(.rela.text*)
-+      *(.rela.data*)
-+      *(.rela.got)
-+      *(.rela.dyn)
-+      *(.rela.stab)
-+      *(.rela.init_array)
-+      *(.rela.fini_array)
-+      *(.rela.ctors)
-+      *(.rela.dtors)
-+  }
-+  . = ALIGN(4096);
-   .rela.plt : { *(.rela.plt) }
--  .rela.got : { *(.rela.got) }
--  .rela.data : { *(.rela.data) *(.rela.data*) }
-   . = ALIGN(512);
-   _edata = .;
-   _data_size = . - _data;
-@@ -52,7 +64,9 @@ SECTIONS
-   . = ALIGN(4096);
-   .dynstr   : { *(.dynstr) }
-   . = ALIGN(4096);
--  .note.gnu.build-id : { *(.note.gnu.build-id) }
-+  .note.gnu.build-id :
-+  { *(.note.gnu.build-id) }
-+  . = DATA_SEGMENT_END (.);
-   /DISCARD/ :
-   {
-     *(.rel.reloc)
-diff --git a/inc/loongarch64/efibind.h b/inc/loongarch64/efibind.h
-index aaf3fb7..8ed83a5 100644
---- a/inc/loongarch64/efibind.h
-+++ b/inc/loongarch64/efibind.h
-@@ -42,9 +42,10 @@ typedef int64_t             intptr_t;
- // Basic EFI types of various widths
- //
- 
--#ifndef __WCHAR_TYPE__
--# define __WCHAR_TYPE__ short
--#endif
-+#include <stddef.h>
-+
-+typedef wchar_t CHAR16;
-+#define WCHAR CHAR16
- 
- typedef uint64_t   UINT64;
- typedef int64_t    INT64;
-@@ -54,12 +55,13 @@ typedef int32_t    INT32;
- 
- typedef uint16_t   UINT16;
- typedef int16_t    INT16;
-+
- typedef uint8_t    UINT8;
-+typedef char       CHAR8;
- typedef int8_t     INT8;
--typedef __WCHAR_TYPE__ WCHAR;
- 
- #undef VOID
--#define VOID    void
-+typedef void       VOID;
- 
- typedef int64_t    INTN;
- typedef uint64_t   UINTN;

From 732ab87ab207ce1a18ba04963fbdf1ae7c5f91f5 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Thu, 25 Apr 2024 16:44:54 +0800
Subject: [PATCH 19/23] linux-hardened

---
 linux-hardened/PKGBUILD    |  19 ++--
 linux-hardened/config.la64 | 173 +++++++++++++++++++------------------
 2 files changed, 98 insertions(+), 94 deletions(-)

diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD
index 2b0e16a3c2..ac5fd26595 100644
--- a/linux-hardened/PKGBUILD
+++ b/linux-hardened/PKGBUILD
@@ -4,8 +4,8 @@
 # Contributor: Thomas Baechler <thomas@archlinux.org>
 
 pkgbase=linux-hardened
-_ver=6.7.0
-_rdate=20231226
+_ver=6.8.6
+_rdate=20240424
 pkgver=${_ver}.hardened1
 pkgrel=1
 pkgdesc='Security-Hardened Linux'
@@ -27,6 +27,7 @@ makedepends=(
   graphviz
   imagemagick
   python-sphinx
+  python-yaml
   texlive-latexextra
 )
 options=('!strip')
@@ -43,12 +44,12 @@ validpgpkeys=(
   E240B57E2C4630BA768E2F26FC1B547C8D8172C8  # Levente Polyak
 )
 # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc
-sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674'
+sha256sums=('16533ee5666746c21d76e965cda3a6264f03fc58ada09546f928003890541e18'
             '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb'
-            'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85')
-b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815'
+            'a46f20931d23513ef039c934eadb60d08a67bbda480db2e42bbe18c921373a19')
+b2sums=('40820d8b5a0c9023313f0a8a54f7bceed5ed7267965b6f0699d5aab267d699900adb66587ee09c1a0dd53d039cd11f4314c42afa32ab31767b82470040e6b116'
         'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025'
-        'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6')
+        '5b0588b23784f492861deeaa4a3803c1d1cfc342049551f3a4992e95662cba7827f46cc313bfbdabc7f6d6a1da0be2672e55690ef89ad605d6a1ef2ff1339d30')
 
 export KBUILD_BUILD_HOST=archlinux
 export KBUILD_BUILD_USER=$pkgbase
@@ -82,12 +83,8 @@ prepare() {
 
 build() {
   cd $_srcname
-
-  make htmldocs &
-  local pid_docs=$!
-
   make all
-  wait "${pid_docs}"
+  make htmldocs
 }
 
 _package() {
diff --git a/linux-hardened/config.la64 b/linux-hardened/config.la64
index 3b5aeb6747..ac94cd2566 100644
--- a/linux-hardened/config.la64
+++ b/linux-hardened/config.la64
@@ -1,15 +1,15 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/loongarch 6.7.0-rc7 Kernel Configuration
+# Linux/loongarch 6.8.6 Kernel Configuration
 #
-CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.1 20230906"
+CONFIG_CC_VERSION_TEXT="gcc (GCC) 14.0.1 20240421 (experimental)"
 CONFIG_CC_IS_GCC=y
-CONFIG_GCC_VERSION=130201
+CONFIG_GCC_VERSION=140001
 CONFIG_CLANG_VERSION=0
 CONFIG_AS_IS_GNU=y
-CONFIG_AS_VERSION=24100
+CONFIG_AS_VERSION=24200
 CONFIG_LD_IS_BFD=y
-CONFIG_LD_VERSION=24100
+CONFIG_LD_VERSION=24200
 CONFIG_LLD_VERSION=0
 CONFIG_CC_CAN_LINK=y
 CONFIG_CC_CAN_LINK_STATIC=y
@@ -162,8 +162,10 @@ CONFIG_GENERIC_SCHED_CLOCK=y
 CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
 CONFIG_CC_HAS_INT128=y
 CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
-CONFIG_GCC11_NO_ARRAY_BOUNDS=y
+CONFIG_GCC10_NO_ARRAY_BOUNDS=y
 CONFIG_CC_NO_ARRAY_BOUNDS=y
+CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
+CONFIG_CC_NO_STRINGOP_OVERFLOW=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
 CONFIG_CGROUPS=y
@@ -243,17 +245,17 @@ CONFIG_AIO=y
 CONFIG_IO_URING=y
 CONFIG_ADVISE_SYSCALLS=y
 CONFIG_MEMBARRIER=y
+CONFIG_KCMP=y
+CONFIG_RSEQ=y
+# CONFIG_DEBUG_RSEQ is not set
+CONFIG_CACHESTAT_SYSCALL=y
+CONFIG_PC104=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_SELFTEST is not set
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_BASE_RELATIVE=y
-CONFIG_KCMP=y
-CONFIG_RSEQ=y
-CONFIG_CACHESTAT_SYSCALL=y
-# CONFIG_DEBUG_RSEQ is not set
 CONFIG_HAVE_PERF_EVENTS=y
 CONFIG_PERF_USE_VMALLOC=y
-CONFIG_PC104=y
 
 #
 # Kernel Performance Events And Counters
@@ -339,15 +341,15 @@ CONFIG_CPU_HAS_PREFETCH=y
 CONFIG_ARCH_SUPPORTS_KEXEC=y
 CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y
 CONFIG_ARCH_SELECTS_CRASH_DUMP=y
+CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y
 CONFIG_RELOCATABLE=y
 CONFIG_RANDOMIZE_BASE=y
 CONFIG_RANDOMIZE_BASE_MAX_OFFSET=0x01000000
-CONFIG_SECCOMP=y
+CONFIG_HAVE_LIVEPATCH=y
 # end of Kernel type and options
 
 CONFIG_ARCH_SELECT_MEMORY_MODEL=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
 CONFIG_ARCH_MEMORY_PROBE=y
 CONFIG_MMU=y
 CONFIG_ARCH_MMAP_RND_BITS_MIN=12
@@ -380,6 +382,7 @@ CONFIG_ARCH_SUPPORTS_ACPI=y
 CONFIG_ACPI=y
 CONFIG_ACPI_GENERIC_GSI=y
 CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
+CONFIG_ACPI_THERMAL_LIB=y
 # CONFIG_ACPI_DEBUGGER is not set
 CONFIG_ACPI_SPCR_TABLE=y
 CONFIG_ACPI_SLEEP=y
@@ -417,14 +420,15 @@ CONFIG_ACPI_PPTT=y
 # end of Power management options
 
 CONFIG_HAVE_KVM=y
+CONFIG_KVM_COMMON=y
 CONFIG_HAVE_KVM_DIRTY_RING=y
 CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL=y
-CONFIG_HAVE_KVM_EVENTFD=y
 CONFIG_KVM_MMIO=y
 CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
 CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL=y
 CONFIG_KVM_XFER_TO_GUEST_WORK=y
 CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y
+CONFIG_KVM_GENERIC_MMU_NOTIFIER=y
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM=m
 
@@ -455,6 +459,7 @@ CONFIG_ARCH_WANTS_NO_INSTR=y
 CONFIG_HAVE_ASM_MODVERSIONS=y
 CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
 CONFIG_HAVE_RSEQ=y
+CONFIG_HAVE_RUST=y
 CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
 CONFIG_HAVE_HW_BREAKPOINT=y
 CONFIG_HAVE_PERF_REGS=y
@@ -466,6 +471,7 @@ CONFIG_MMU_LAZY_TLB_REFCOUNT=y
 CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y
 CONFIG_HAVE_ARCH_SECCOMP=y
 CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
+CONFIG_SECCOMP=y
 CONFIG_SECCOMP_FILTER=y
 # CONFIG_SECCOMP_CACHE_DEBUG is not set
 CONFIG_HAVE_STACKPROTECTOR=y
@@ -490,6 +496,8 @@ CONFIG_ARCH_MMAP_RND_BITS=12
 CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
 CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
 CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y
+CONFIG_HAVE_OBJTOOL=y
+CONFIG_HAVE_STACK_VALIDATION=y
 # CONFIG_COMPAT_32BIT_TIME is not set
 CONFIG_ARCH_USE_MEMREMAP_PROT=y
 # CONFIG_LOCK_EVENT_COUNTS is not set
@@ -540,6 +548,7 @@ CONFIG_BLK_ICQ=y
 CONFIG_BLK_DEV_BSGLIB=y
 CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_INTEGRITY_T10=y
+CONFIG_BLK_DEV_WRITE_MOUNTED=y
 CONFIG_BLK_DEV_ZONED=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BLK_DEV_THROTTLING_LOW=y
@@ -631,6 +640,7 @@ CONFIG_SWAP=y
 CONFIG_ZSWAP=y
 CONFIG_ZSWAP_DEFAULT_ON=y
 # CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON is not set
+# CONFIG_ZSWAP_SHRINKER_DEFAULT_ON is not set
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set
 # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set
@@ -649,9 +659,8 @@ CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_CHAIN_SIZE=8
 
 #
-# SLAB allocator options
+# Slab allocator options
 #
-# CONFIG_SLAB_DEPRECATED is not set
 CONFIG_SLUB=y
 # CONFIG_SLUB_TINY is not set
 CONFIG_SLAB_MERGE_DEFAULT=y
@@ -660,7 +669,7 @@ CONFIG_SLAB_MERGE_DEFAULT=y
 # CONFIG_SLUB_STATS is not set
 CONFIG_SLUB_CPU_PARTIAL=y
 # CONFIG_RANDOM_KMALLOC_CACHES is not set
-# end of SLAB allocator options
+# end of Slab allocator options
 
 # CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set
 # CONFIG_COMPAT_BRK is not set
@@ -687,6 +696,7 @@ CONFIG_COMPACTION=y
 CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1
 CONFIG_PAGE_REPORTING=y
 CONFIG_MIGRATION=y
+CONFIG_ARCH_ENABLE_THP_MIGRATION=y
 CONFIG_CONTIG_ALLOC=y
 CONFIG_PCP_BATCH_SCALE_MAX=5
 CONFIG_PHYS_ADDR_T_64BIT=y
@@ -696,6 +706,7 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
+# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
@@ -708,6 +719,7 @@ CONFIG_CMA_SYSFS=y
 CONFIG_CMA_AREAS=19
 # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
 # CONFIG_IDLE_PAGE_TRACKING is not set
+CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y
 CONFIG_ZONE_DMA32=y
 CONFIG_HMM_MIRROR=y
 CONFIG_VM_EVENT_COUNTERS=y
@@ -1221,8 +1233,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
 CONFIG_BRIDGE_EBT_SNAT=m
 CONFIG_BRIDGE_EBT_LOG=m
 CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_BPFILTER=y
-CONFIG_BPFILTER_UMH=m
 # CONFIG_IP_DCCP is not set
 CONFIG_IP_SCTP=m
 # CONFIG_SCTP_DBG_OBJCNT is not set
@@ -1501,7 +1511,6 @@ CONFIG_BT_BNEP_MC_FILTER=y
 CONFIG_BT_BNEP_PROTO_FILTER=y
 CONFIG_BT_CMTP=m
 CONFIG_BT_HIDP=m
-CONFIG_BT_HS=y
 CONFIG_BT_LE=y
 CONFIG_BT_LE_L2CAP_ECRED=y
 CONFIG_BT_6LOWPAN=m
@@ -1715,7 +1724,6 @@ CONFIG_PCI_LOONGSON=y
 # Cadence-based PCIe controllers
 #
 # CONFIG_PCIE_CADENCE_PLAT_HOST is not set
-# CONFIG_PCI_J721E_HOST is not set
 # end of Cadence-based PCIe controllers
 
 #
@@ -1814,6 +1822,7 @@ CONFIG_DEV_COREDUMP=y
 # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
 CONFIG_HMEM_REPORTING=y
 # CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
+CONFIG_GENERIC_CPU_DEVICES=y
 CONFIG_GENERIC_CPU_AUTOPROBE=y
 CONFIG_SOC_BUS=y
 CONFIG_REGMAP=y
@@ -2071,6 +2080,7 @@ CONFIG_ZRAM_DEF_COMP_ZSTD=y
 # CONFIG_ZRAM_DEF_COMP_842 is not set
 CONFIG_ZRAM_DEF_COMP="zstd"
 # CONFIG_ZRAM_WRITEBACK is not set
+# CONFIG_ZRAM_TRACK_ENTRY_ACTIME is not set
 # CONFIG_ZRAM_MEMORY_TRACKING is not set
 # CONFIG_ZRAM_MULTI_COMP is not set
 CONFIG_BLK_DEV_LOOP=m
@@ -2146,6 +2156,7 @@ CONFIG_TIFM_7XX1=m
 # CONFIG_HISI_HIKEY_USB is not set
 # CONFIG_OPEN_DICE is not set
 # CONFIG_VCPU_STALL_DETECTOR is not set
+# CONFIG_NSM is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -2425,13 +2436,10 @@ CONFIG_PATA_PCMCIA=m
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
 CONFIG_MD_BITMAP_FILE=y
-CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
 # CONFIG_MD_CLUSTER is not set
 CONFIG_BCACHE=m
 # CONFIG_BCACHE_DEBUG is not set
@@ -2688,6 +2696,8 @@ CONFIG_NET_VENDOR_MICROCHIP=y
 # CONFIG_LAN743X is not set
 # CONFIG_LAN966X_SWITCH is not set
 # CONFIG_VCAP is not set
+CONFIG_NET_VENDOR_MOTORCOMM=y
+CONFIG_YT6801=m
 CONFIG_NET_VENDOR_MICROSEMI=y
 # CONFIG_MSCC_OCELOT_SWITCH is not set
 CONFIG_NET_VENDOR_MICROSOFT=y
@@ -2719,6 +2729,7 @@ CONFIG_8139TOO_PIO=y
 # CONFIG_8139TOO_8129 is not set
 # CONFIG_8139_OLD_RX_RESET is not set
 CONFIG_R8169=m
+CONFIG_R8169_LEDS=y
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
@@ -2815,6 +2826,7 @@ CONFIG_SMSC_PHY=m
 # CONFIG_DP83867_PHY is not set
 # CONFIG_DP83869_PHY is not set
 # CONFIG_DP83TD510_PHY is not set
+# CONFIG_DP83TG720_PHY is not set
 # CONFIG_VITESSE_PHY is not set
 # CONFIG_XILINX_GMII2RGMII is not set
 # CONFIG_MICREL_KS8995MA is not set
@@ -3028,9 +3040,6 @@ CONFIG_ATH11K_DEBUGFS=y
 CONFIG_ATH11K_SPECTRAL=y
 # CONFIG_ATH12K is not set
 CONFIG_WLAN_VENDOR_ATMEL=y
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PCMCIA_ATMEL=m
 CONFIG_AT76C50X_USB=m
 CONFIG_WLAN_VENDOR_BROADCOM=y
 CONFIG_B43=m
@@ -3072,9 +3081,6 @@ CONFIG_BRCMFMAC_USB=y
 CONFIG_BRCMFMAC_PCIE=y
 CONFIG_BRCM_TRACING=y
 CONFIG_BRCMDBG=y
-CONFIG_WLAN_VENDOR_CISCO=y
-# CONFIG_AIRO is not set
-CONFIG_AIRO_CS=m
 CONFIG_WLAN_VENDOR_INTEL=y
 CONFIG_IPW2100=m
 CONFIG_IPW2100_MONITOR=y
@@ -3112,22 +3118,6 @@ CONFIG_IWLWIFI_OPMODE_MODULAR=y
 # end of Debugging Options
 
 CONFIG_WLAN_VENDOR_INTERSIL=y
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_HOSTAP_CS=m
-CONFIG_HERMES=m
-CONFIG_HERMES_PRISM=y
-CONFIG_HERMES_CACHE_FW_ON_INIT=y
-CONFIG_PLX_HERMES=m
-CONFIG_TMD_HERMES=m
-CONFIG_NORTEL_HERMES=m
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_PCMCIA_SPECTRUM=m
-CONFIG_ORINOCO_USB=m
 CONFIG_P54_COMMON=m
 CONFIG_P54_USB=m
 CONFIG_P54_PCI=m
@@ -3137,7 +3127,6 @@ CONFIG_P54_LEDS=y
 CONFIG_WLAN_VENDOR_MARVELL=y
 CONFIG_LIBERTAS=m
 CONFIG_LIBERTAS_USB=m
-CONFIG_LIBERTAS_CS=m
 CONFIG_LIBERTAS_SDIO=m
 CONFIG_LIBERTAS_SPI=m
 # CONFIG_LIBERTAS_DEBUG is not set
@@ -3178,9 +3167,10 @@ CONFIG_MT7921_COMMON=m
 CONFIG_MT7921E=m
 CONFIG_MT7921S=m
 CONFIG_MT7921U=m
-# CONFIG_MT7996E is not set
-# CONFIG_MT7925E is not set
-# CONFIG_MT7925U is not set
+CONFIG_MT7996E=m
+CONFIG_MT7925_COMMON=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
 CONFIG_WLAN_VENDOR_MICROCHIP=y
 CONFIG_WILC1000=m
 CONFIG_WILC1000_SDIO=m
@@ -3244,32 +3234,36 @@ CONFIG_RTL8XXXU_UNTESTED=y
 CONFIG_RTW88=m
 CONFIG_RTW88_CORE=m
 CONFIG_RTW88_PCI=m
+CONFIG_RTW88_SDIO=m
+CONFIG_RTW88_USB=m
 CONFIG_RTW88_8822B=m
 CONFIG_RTW88_8822C=m
 CONFIG_RTW88_8723D=m
 CONFIG_RTW88_8821C=m
 CONFIG_RTW88_8822BE=m
-# CONFIG_RTW88_8822BS is not set
-# CONFIG_RTW88_8822BU is not set
+CONFIG_RTW88_8822BS=m
+CONFIG_RTW88_8822BU=m
 CONFIG_RTW88_8822CE=m
-# CONFIG_RTW88_8822CS is not set
-# CONFIG_RTW88_8822CU is not set
+CONFIG_RTW88_8822CS=m
+CONFIG_RTW88_8822CU=m
 CONFIG_RTW88_8723DE=m
-# CONFIG_RTW88_8723DS is not set
-# CONFIG_RTW88_8723DU is not set
+CONFIG_RTW88_8723DS=m
+CONFIG_RTW88_8723DU=m
 CONFIG_RTW88_8821CE=m
-# CONFIG_RTW88_8821CS is not set
-# CONFIG_RTW88_8821CU is not set
+CONFIG_RTW88_8821CS=m
+CONFIG_RTW88_8821CU=m
 # CONFIG_RTW88_DEBUG is not set
 # CONFIG_RTW88_DEBUGFS is not set
 CONFIG_RTW89=m
 CONFIG_RTW89_CORE=m
 CONFIG_RTW89_PCI=m
+CONFIG_RTW89_8851B=m
 CONFIG_RTW89_8852A=m
+CONFIG_RTW89_8852B=m
 CONFIG_RTW89_8852C=m
-# CONFIG_RTW89_8851BE is not set
+CONFIG_RTW89_8851BE=m
 CONFIG_RTW89_8852AE=m
-# CONFIG_RTW89_8852BE is not set
+CONFIG_RTW89_8852BE=m
 CONFIG_RTW89_8852CE=m
 CONFIG_RTW89_DEBUG=y
 CONFIG_RTW89_DEBUGMSG=y
@@ -3296,15 +3290,11 @@ CONFIG_WLCORE=m
 # CONFIG_WLCORE_SPI is not set
 CONFIG_WLCORE_SDIO=m
 CONFIG_WLAN_VENDOR_ZYDAS=y
-# CONFIG_USB_ZD1201 is not set
 CONFIG_ZD1211RW=m
 # CONFIG_ZD1211RW_DEBUG is not set
 CONFIG_WLAN_VENDOR_QUANTENNA=y
 CONFIG_QTNFMAC=m
 CONFIG_QTNFMAC_PCIE=m
-CONFIG_PCMCIA_RAYCS=m
-CONFIG_PCMCIA_WL3501=m
-CONFIG_USB_NET_RNDIS_WLAN=m
 CONFIG_MAC80211_HWSIM=m
 CONFIG_VIRT_WIFI=m
 # CONFIG_WAN is not set
@@ -3464,6 +3454,7 @@ CONFIG_INPUT_JOYSTICK=y
 # CONFIG_JOYSTICK_QWIIC is not set
 # CONFIG_JOYSTICK_FSIA6B is not set
 # CONFIG_JOYSTICK_SENSEHAT is not set
+# CONFIG_JOYSTICK_SEESAW is not set
 # CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
 CONFIG_INPUT_MISC=y
@@ -3776,7 +3767,6 @@ CONFIG_SPI_LOONGSON_PLATFORM=m
 # CONFIG_SPI_MXIC is not set
 # CONFIG_SPI_XCOMM is not set
 # CONFIG_SPI_XILINX is not set
-# CONFIG_SPI_ZYNQMP_GQSPI is not set
 # CONFIG_SPI_AMD is not set
 
 #
@@ -4022,6 +4012,7 @@ CONFIG_HWMON_VID=m
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_GIGABYTE_WATERFORCE is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_G760A is not set
@@ -4156,6 +4147,7 @@ CONFIG_SENSORS_W83627HF=m
 CONFIG_THERMAL=y
 # CONFIG_THERMAL_NETLINK is not set
 # CONFIG_THERMAL_STATISTICS is not set
+# CONFIG_THERMAL_DEBUGFS is not set
 CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0
 CONFIG_THERMAL_HWMON=y
 CONFIG_THERMAL_OF=y
@@ -4275,7 +4267,6 @@ CONFIG_MFD_CORE=m
 # CONFIG_MFD_SKY81452 is not set
 # CONFIG_MFD_STMPE is not set
 CONFIG_MFD_SYSCON=y
-# CONFIG_MFD_TI_AM335X_TSCADC is not set
 # CONFIG_MFD_LP3943 is not set
 # CONFIG_MFD_LP8788 is not set
 # CONFIG_MFD_TI_LMU is not set
@@ -4328,6 +4319,7 @@ CONFIG_REGULATOR=y
 # CONFIG_REGULATOR_FIXED_VOLTAGE is not set
 # CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
 # CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
+# CONFIG_REGULATOR_NETLINK_EVENTS is not set
 # CONFIG_REGULATOR_88PG86X is not set
 # CONFIG_REGULATOR_ACT8865 is not set
 # CONFIG_REGULATOR_AD5398 is not set
@@ -4775,7 +4767,10 @@ CONFIG_VIDEOBUF2_DMA_SG=m
 CONFIG_MEDIA_ATTACH=y
 CONFIG_VIDEO_IR_I2C=m
 CONFIG_VIDEO_CAMERA_SENSOR=y
+# CONFIG_VIDEO_ALVIUM_CSI2 is not set
 # CONFIG_VIDEO_AR0521 is not set
+# CONFIG_VIDEO_GC0308 is not set
+# CONFIG_VIDEO_GC2145 is not set
 # CONFIG_VIDEO_HI556 is not set
 # CONFIG_VIDEO_HI846 is not set
 # CONFIG_VIDEO_HI847 is not set
@@ -4821,6 +4816,7 @@ CONFIG_VIDEO_CAMERA_SENSOR=y
 # CONFIG_VIDEO_OV5675 is not set
 # CONFIG_VIDEO_OV5693 is not set
 # CONFIG_VIDEO_OV5695 is not set
+# CONFIG_VIDEO_OV64A40 is not set
 # CONFIG_VIDEO_OV6650 is not set
 # CONFIG_VIDEO_OV7251 is not set
 # CONFIG_VIDEO_OV7640 is not set
@@ -4844,6 +4840,12 @@ CONFIG_VIDEO_CAMERA_SENSOR=y
 # CONFIG_VIDEO_CCS is not set
 # CONFIG_VIDEO_ET8EK8 is not set
 
+#
+# Camera ISPs
+#
+# CONFIG_VIDEO_THP7312 is not set
+# end of Camera ISPs
+
 #
 # Lens drivers
 #
@@ -4912,6 +4914,7 @@ CONFIG_VIDEO_CAMERA_SENSOR=y
 # CONFIG_VIDEO_TVP5150 is not set
 # CONFIG_VIDEO_TVP7002 is not set
 # CONFIG_VIDEO_TW2804 is not set
+# CONFIG_VIDEO_TW9900 is not set
 # CONFIG_VIDEO_TW9903 is not set
 # CONFIG_VIDEO_TW9906 is not set
 # CONFIG_VIDEO_TW9910 is not set
@@ -5258,9 +5261,8 @@ CONFIG_DRM_AMD_DC_SI=y
 # end of Display Engine Configuration
 
 # CONFIG_DRM_NOUVEAU is not set
-CONFIG_DRM_GSGPU=m
-CONFIG_DRM_GSGPU_USERPTR=y
-CONFIG_DRM_GSGPU_GART_DEBUGFS=y
+# CONFIG_DRM_XE is not set
+# CONFIG_DRM_GSGPU is not set
 CONFIG_DRM_VGEM=m
 CONFIG_DRM_VKMS=m
 CONFIG_DRM_UDL=m
@@ -5353,7 +5355,7 @@ CONFIG_DRM_PANEL_BRIDGE=y
 # CONFIG_DRM_CDNS_MHDP8546 is not set
 # end of Display Interface Bridges
 
-# CONFIG_DRM_LOONGSON is not set
+CONFIG_DRM_LOONGSON=m
 # CONFIG_DRM_ETNAVIV is not set
 # CONFIG_DRM_LOGICVC is not set
 # CONFIG_DRM_ARCPGU is not set
@@ -5373,7 +5375,6 @@ CONFIG_DRM_SIMPLEDRM=m
 # CONFIG_TINYDRM_ST7735R is not set
 # CONFIG_DRM_GUD is not set
 # CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_LEGACY is not set
 CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
 
 #
@@ -5437,7 +5438,7 @@ CONFIG_FB_SYS_FILLRECT=y
 CONFIG_FB_SYS_COPYAREA=y
 CONFIG_FB_SYS_IMAGEBLIT=y
 # CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=y
+CONFIG_FB_SYSMEM_FOPS=y
 CONFIG_FB_DEFERRED_IO=y
 CONFIG_FB_IOMEM_FOPS=y
 CONFIG_FB_IOMEM_HELPERS=y
@@ -5474,6 +5475,7 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
 # CONFIG_BACKLIGHT_LM3630A is not set
 # CONFIG_BACKLIGHT_LM3639 is not set
 # CONFIG_BACKLIGHT_LP855X is not set
+# CONFIG_BACKLIGHT_MP3309C is not set
 # CONFIG_BACKLIGHT_GPIO is not set
 # CONFIG_BACKLIGHT_LV5207LP is not set
 # CONFIG_BACKLIGHT_BD6107 is not set
@@ -6123,6 +6125,7 @@ CONFIG_HID_ZYDACRON=m
 CONFIG_HID_SENSOR_HUB=m
 CONFIG_HID_SENSOR_CUSTOM_SENSOR=m
 CONFIG_HID_ALPS=m
+# CONFIG_HID_MCP2200 is not set
 CONFIG_HID_MCP2221=m
 # end of Special HID drivers
 
@@ -6559,6 +6562,7 @@ CONFIG_TYPEC_STUSB160X=m
 CONFIG_TYPEC_MUX_PI3USB30532=m
 # CONFIG_TYPEC_MUX_NB7VPQ904M is not set
 # CONFIG_TYPEC_MUX_PTN36502 is not set
+# CONFIG_TYPEC_MUX_WCD939X_USBSS is not set
 # end of USB Type-C Multiplexer/DeMultiplexer Switch support
 
 #
@@ -6608,8 +6612,6 @@ CONFIG_MMC_HSQ=m
 CONFIG_MMC_TOSHIBA_PCI=m
 CONFIG_MMC_MTK=m
 CONFIG_MMC_SDHCI_XENON=m
-# CONFIG_MMC_SDHCI_OMAP is not set
-# CONFIG_MMC_SDHCI_AM654 is not set
 # CONFIG_SCSI_UFSHCD is not set
 CONFIG_MEMSTICK=m
 # CONFIG_MEMSTICK_DEBUG is not set
@@ -6806,6 +6808,7 @@ CONFIG_RTC_DRV_DS1374=m
 CONFIG_RTC_DRV_DS1672=m
 # CONFIG_RTC_DRV_HYM8563 is not set
 CONFIG_RTC_DRV_MAX6900=m
+# CONFIG_RTC_DRV_MAX31335 is not set
 # CONFIG_RTC_DRV_NCT3018Y is not set
 CONFIG_RTC_DRV_RS5C372=m
 CONFIG_RTC_DRV_ISL1208=m
@@ -6908,6 +6911,7 @@ CONFIG_DMA_OF=y
 # CONFIG_DW_AXI_DMAC is not set
 # CONFIG_FSL_EDMA is not set
 # CONFIG_INTEL_IDMA64 is not set
+# CONFIG_LS2X_APB_DMA is not set
 # CONFIG_PLX_DMA is not set
 # CONFIG_XILINX_DMA is not set
 # CONFIG_XILINX_XDMA is not set
@@ -6955,6 +6959,7 @@ CONFIG_VFIO_GROUP=y
 CONFIG_VFIO_CONTAINER=y
 # CONFIG_VFIO_NOIOMMU is not set
 CONFIG_VFIO_VIRQFD=y
+# CONFIG_VFIO_DEBUGFS is not set
 
 #
 # VFIO support for PCI devices
@@ -7278,7 +7283,7 @@ CONFIG_PWM_SYSFS=y
 # CONFIG_PWM_FSL_FTM is not set
 # CONFIG_PWM_PCA9685 is not set
 # CONFIG_PWM_XILINX is not set
-CONFIG_PWM_LS=m
+# CONFIG_PWM_LS is not set
 
 #
 # IRQ chip support
@@ -7335,6 +7340,7 @@ CONFIG_GENERIC_PHY=y
 #
 # Performance monitor support
 #
+# CONFIG_DWC_PCIE_PMU is not set
 # end of Performance monitor support
 
 CONFIG_RAS=y
@@ -7352,6 +7358,7 @@ CONFIG_USB4=m
 # CONFIG_DAX is not set
 CONFIG_NVMEM=y
 CONFIG_NVMEM_SYSFS=y
+CONFIG_NVMEM_LAYOUTS=y
 
 #
 # Layout Types
@@ -7387,6 +7394,7 @@ CONFIG_PM_OPP=y
 #
 CONFIG_VALIDATE_FS_PARSER=y
 CONFIG_FS_IOMAP=y
+CONFIG_FS_STACK=y
 CONFIG_BUFFER_HEAD=y
 CONFIG_LEGACY_DIRECT_IO=y
 CONFIG_EXT2_FS=m
@@ -7496,7 +7504,7 @@ CONFIG_OVERLAY_FS_METACOPY=y
 #
 CONFIG_NETFS_SUPPORT=m
 CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
 CONFIG_FSCACHE_STATS=y
 # CONFIG_FSCACHE_DEBUG is not set
 CONFIG_CACHEFILES=m
@@ -7551,9 +7559,9 @@ CONFIG_TMPFS_INODE64=y
 # CONFIG_TMPFS_QUOTA is not set
 CONFIG_ARCH_SUPPORTS_HUGETLBFS=y
 CONFIG_HUGETLBFS=y
+# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
 CONFIG_HUGETLB_PAGE=y
 CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
-# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
 CONFIG_CONFIGFS_FS=y
 CONFIG_EFIVAR_FS=y
 # end of Pseudo filesystems
@@ -7678,6 +7686,7 @@ CONFIG_NFSD_SCSILAYOUT=y
 # CONFIG_NFSD_FLEXFILELAYOUT is not set
 CONFIG_NFSD_V4_2_INTER_SSC=y
 CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
 CONFIG_GRACE_PERIOD=m
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
@@ -7954,14 +7963,12 @@ CONFIG_CRYPTO_ADIANTUM=m
 # CONFIG_CRYPTO_ARC4 is not set
 CONFIG_CRYPTO_CHACHA20=m
 CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTR=y
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_HCTR2=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XCTR=m
 CONFIG_CRYPTO_XTS=m
@@ -8072,6 +8079,7 @@ CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_QAT_C3XXX is not set
 # CONFIG_CRYPTO_DEV_QAT_C62X is not set
 # CONFIG_CRYPTO_DEV_QAT_4XXX is not set
+# CONFIG_CRYPTO_DEV_QAT_420XX is not set
 # CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set
 # CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set
 # CONFIG_CRYPTO_DEV_QAT_C62XVF is not set
@@ -8246,7 +8254,6 @@ CONFIG_FONT_SUPPORT=y
 CONFIG_FONTS=y
 # CONFIG_FONT_8x8 is not set
 # CONFIG_FONT_8x16 is not set
-CONFIG_FONT_UTF8x16=y
 # CONFIG_FONT_6x11 is not set
 # CONFIG_FONT_7x14 is not set
 # CONFIG_FONT_PEARL_8x8 is not set
@@ -8262,6 +8269,7 @@ CONFIG_SG_POOL=y
 CONFIG_MEMREGION=y
 CONFIG_ARCH_STACKWALK=y
 CONFIG_STACKDEPOT=y
+CONFIG_STACKDEPOT_MAX_FRAMES=64
 CONFIG_SBITMAP=y
 # CONFIG_LWQ_TEST is not set
 # end of Library routines
@@ -8300,7 +8308,7 @@ CONFIG_DEBUG_MISC=y
 #
 # Compile-time checks and compiler options
 #
-CONFIG_AS_HAS_NON_CONST_LEB128=y
+CONFIG_AS_HAS_NON_CONST_ULEB128=y
 CONFIG_DEBUG_INFO_NONE=y
 # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
 # CONFIG_DEBUG_INFO_DWARF4 is not set
@@ -8432,8 +8440,6 @@ CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_MAPLE_TREE is not set
 # end of Debug kernel data structures
 
-# CONFIG_DEBUG_CREDENTIALS is not set
-
 #
 # RCU Debugging
 #
@@ -8476,6 +8482,7 @@ CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
 #
 # CONFIG_UNWINDER_GUESS is not set
 CONFIG_UNWINDER_PROLOGUE=y
+# CONFIG_UNWINDER_ORC is not set
 # end of loongarch Debugging
 
 #

From af90f8ce244082c0a59663f7f7600dee47886665 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 29 Apr 2024 17:08:41 +0800
Subject: [PATCH 20/23] ffmpeg

---
 ffmpeg/PKGBUILD | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ffmpeg/PKGBUILD b/ffmpeg/PKGBUILD
index 924225d2c9..a64e7d343b 100644
--- a/ffmpeg/PKGBUILD
+++ b/ffmpeg/PKGBUILD
@@ -6,7 +6,7 @@
 
 pkgname=ffmpeg
 pkgver=6.1.1
-pkgrel=3
+pkgrel=4
 epoch=2
 pkgdesc='Complete solution to record, convert and stream audio and video'
 arch=(loong64 x86_64)
@@ -156,7 +156,7 @@ build() {
     --disable-stripping \
     --enable-amf \
     --enable-avisynth \
-    --disable-cuda-llvm \
+    --enable-cuda-llvm \
     --enable-lto \
     --enable-fontconfig \
     --enable-frei0r \
@@ -216,7 +216,6 @@ build() {
     --enable-shared \
     --enable-version3 \
     --disable-doc \
-    --disable-lsx \
     --enable-vulkan
   make
   make tools/qt-faststart

From 41fb4f176b4d3ccdf104a529047796176b9c6444 Mon Sep 17 00:00:00 2001
From: Xiaotian Wu <wuxiaotian@loongson.cn>
Date: Mon, 6 May 2024 10:07:01 +0800
Subject: [PATCH 21/23] openblas

---
 openblas/PKGBUILD               | 11 +++++---
 openblas/fix-loong.patch        | 47 -------------------------------
 openblas/openblas-loong64.patch | 50 +++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 51 deletions(-)
 delete mode 100644 openblas/fix-loong.patch
 create mode 100644 openblas/openblas-loong64.patch

diff --git a/openblas/PKGBUILD b/openblas/PKGBUILD
index ca66c556e8..14342bb92f 100644
--- a/openblas/PKGBUILD
+++ b/openblas/PKGBUILD
@@ -5,7 +5,7 @@ pkgbase=openblas
 pkgname=(openblas openblas64 blas-openblas blas64-openblas)
 _pkgname=OpenBLAS
 pkgver=0.3.26
-pkgrel=2
+pkgrel=3
 _blasver=3.12.0
 pkgdesc="An optimized BLAS library based on GotoBLAS2 1.13 BSD"
 arch=('loong64' 'x86_64')
@@ -14,13 +14,16 @@ license=('BSD')
 depends=('gcc-libs')
 makedepends=('cmake' 'perl' 'gcc-fortran')
 source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz
-fix-loong.patch)
+    https://github.com/OpenMathLib/OpenBLAS/commit/992b71fea2e3916c294dd6a90062b4c31740cd85.patch
+    openblas-loong64.patch)
 sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373'
-            '195dc3c3daa56c55912831161bd9e73532c1a06b38c894a6eceb8d49befddda1b94e71dcd36e1d3403e2e5f70ded83febdee493059b16adc85ea52fb32e58f81')
+            '24a9d10752218c34bc1271a91b171ee6ec9e5b73ba0fa559f22d4a4c698b6bdb502a38a436ffeb8110b325d2e690010e855580f7607c9d30263d819be018d8c0'
+            'e62892a2b290938cb8cad9040a2f9d5787325aa1e8e88eddc71f7e4dd6969b760d9658c0a04c05458272cdb00c339154f98daecb1987ca9b80a2d1528dd59546')
 
 prepare() {
 	cd "$_pkgname-$pkgver"
-	patch -p1 -i "$srcdir/fix-loong.patch"
+    patch -p1 -i "$srcdir/992b71fea2e3916c294dd6a90062b4c31740cd85.patch"
+    patch -p1 -i "$srcdir/openblas-loong64.patch"
 }
 
 build() {
diff --git a/openblas/fix-loong.patch b/openblas/fix-loong.patch
deleted file mode 100644
index fc0489b826..0000000000
--- a/openblas/fix-loong.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml
-index 4a9bf98b6..b310d6938 100644
---- a/.github/workflows/loongarch64.yml
-+++ b/.github/workflows/loongarch64.yml
-@@ -40,8 +40,8 @@ jobs:
- 
-       - name: Download and install loongarch64-toolchain
-         run: |
--          wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
--          tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
-+          wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
-+          tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
- 
-       - name: Set env
-         run: |
-diff --git a/cmake/cc.cmake b/cmake/cc.cmake
-index 00952e810..242b03b5f 100644
---- a/cmake/cc.cmake
-+++ b/cmake/cc.cmake
-@@ -36,9 +36,9 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LS
- 
-     if (LOONGARCH64)
-       if (BINARY64)
--        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64")
-+        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d")
-       else ()
--        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32")
-+        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=ilp32d")
-       endif ()
-       set(BINARY_DEFINED 1)
-     endif ()
-diff --git a/cmake/fc.cmake b/cmake/fc.cmake
-index c496f6368..b356dfda3 100644
---- a/cmake/fc.cmake
-+++ b/cmake/fc.cmake
-@@ -61,9 +61,9 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
-     endif ()
-     if (LOONGARCH64)
-       if (BINARY64)
--        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
-+        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
-       else ()
--        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
-+        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
-       endif ()
-     endif ()
-     if (RISCV64)
diff --git a/openblas/openblas-loong64.patch b/openblas/openblas-loong64.patch
new file mode 100644
index 0000000000..83104eeb12
--- /dev/null
+++ b/openblas/openblas-loong64.patch
@@ -0,0 +1,50 @@
+Index: OpenBLAS-0.3.26/Makefile.system
+===================================================================
+--- OpenBLAS-0.3.26.orig/Makefile.system
++++ OpenBLAS-0.3.26/Makefile.system
+@@ -948,10 +948,7 @@ BINARY_DEFINED = 1
+ endif
+ 
+ ifeq ($(ARCH), loongarch64)
+-LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d)
+-ifneq ($(LA64_ABI), lp64d)
+-LA64_ABI=lp64
+-endif
++LA64_ABI=lp64d
+ CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
+ FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
+ endif
+Index: OpenBLAS-0.3.26/cmake/cc.cmake
+===================================================================
+--- OpenBLAS-0.3.26.orig/cmake/cc.cmake
++++ OpenBLAS-0.3.26/cmake/cc.cmake
+@@ -36,12 +36,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU
+ 
+     if (LOONGARCH64)
+       if (BINARY64)
+-	CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
+-        if(COMPILER_SUPPORT_LP64D_ABI)
+ 	  set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d")
+-	else()
+-	  set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64")
+-	endif ()
+       else ()
+ 	CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
+ 	if(COMPILER_SUPPORT_ILP32D_ABI)
+Index: OpenBLAS-0.3.26/cmake/fc.cmake
+===================================================================
+--- OpenBLAS-0.3.26.orig/cmake/fc.cmake
++++ OpenBLAS-0.3.26/cmake/fc.cmake
+@@ -61,12 +61,7 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR
+     endif ()
+     if (LOONGARCH64)
+       if (BINARY64)
+-	CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
+-        if(COMPILER_SUPPORT_LP64D_ABI)
+ 	  set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
+-	else()
+-	  set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
+-	endif ()
+       else ()
+ 	CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
+ 	if(COMPILER_SUPPORT_ILP32D_ABI)

From f1861174727dad903531a2bcf35f929ed1ba6385 Mon Sep 17 00:00:00 2001
From: Yingjie Wang <phywyj@gmail.com>
Date: Tue, 20 Aug 2024 12:49:19 -0400
Subject: [PATCH 22/23] update: fastfetch 2.21.3

---
 fastfetch/PKGBUILD | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fastfetch/PKGBUILD b/fastfetch/PKGBUILD
index 3729258934..7c2e209d9e 100644
--- a/fastfetch/PKGBUILD
+++ b/fastfetch/PKGBUILD
@@ -2,7 +2,7 @@
 # Contributor: Mark Wagie <mark dot wagie at proton dot me>
 
 pkgname=fastfetch
-pkgver=2.7.1
+pkgver=2.21.3
 pkgrel=1
 pkgdesc="Like Neofetch, but much faster because written in C"
 arch=('loong64' 'x86_64')
@@ -17,14 +17,12 @@ makedepends=(
   'ddcutil'
   'directx-headers'
   'imagemagick'
-  'libnm'
   'libpulse'
   'libxcb'
   'libxrandr'
   'mesa'
   'ocl-icd'
   'opencl-headers'
-  'pciutils'
   'vulkan-headers'
   'vulkan-icd-loader'
   'wayland'
@@ -39,30 +37,28 @@ optdepends=(
   'directx-headers: GPU detection in WSL'
   'glib2: Output for values that are only stored in GSettings'
   'imagemagick: Image output using sixel or kitty graphics protocol'
-  'libnm: Wifi detection'
+  'libelf: st term font detection and fast path of systemd version detection'
   'libpulse: Sound detection'
   'mesa: Needed by the OpenGL module for gl context creation.'
   'libxrandr: Multi monitor support'
   'ocl-icd: OpenCL module'
-  'pciutils: GPU output'
+  'hwdata: GPU output'
   'vulkan-icd-loader: Vulkan module & fallback for GPU output'
   'xfconf: Needed for XFWM theme and XFCE Terminal font'
   'zlib: Faster image output when using kitty graphics protocol'
   'libdrm: Displays detection'
 )
 source=("${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz")
-sha256sums=('64778068628426a1d4394f756cec70a62dd9f7fabc267dd7bdcbfc6302f6476e')
+sha256sums=('cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7')
 
 build() {
 	cmake -B build -S "${pkgname}-${pkgver}" \
-		-DCMAKE_BUILD_TYPE='None' \
-		-DCMAKE_INSTALL_PREFIX='/usr' \
 		-DCMAKE_BUILD_TYPE='RelWithDebInfo' \
+		-DCMAKE_INSTALL_PREFIX='/usr' \
 		-DBUILD_TESTS='ON' \
 		-DENABLE_SQLITE3='OFF' \
 		-DENABLE_RPM='OFF' \
 		-DENABLE_IMAGEMAGICK6='OFF' \
-		-DENABLE_DDCUTIL='ON' \
 		-Wno-dev
 	cmake --build build
 }
@@ -74,3 +70,4 @@ check() {
 package() {
 	DESTDIR="${pkgdir}" cmake --install build
 }
+

From cc919c066d84a035e88efc6ad2034aeae2c39a0b Mon Sep 17 00:00:00 2001
From: Yingjie Wang <phywyj@gmail.com>
Date: Thu, 22 Aug 2024 22:04:48 -0400
Subject: [PATCH 23/23] backport: LoongArch CPU support from commit 96905012a4

Backported the logic from upstream dev branch commit 96905012a413f4ded49c33a182105f1d6a98b8cf ("Fix #1204 CPU (Linux): support loongarch") to fastfetch 2.31.3.

This patch enables fastfetch to correctly parse LoongArch CPU information on Loong Arch Linux. The patch should be removed once a new upstream release that includes the fix is available.
---
 fastfetch/PKGBUILD                            |  15 ++-
 ...tch-2.31.3-loongarch-support-9690501.patch | 110 ++++++++++++++++++
 2 files changed, 123 insertions(+), 2 deletions(-)
 create mode 100644 fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch

diff --git a/fastfetch/PKGBUILD b/fastfetch/PKGBUILD
index 7c2e209d9e..6cc2269cbc 100644
--- a/fastfetch/PKGBUILD
+++ b/fastfetch/PKGBUILD
@@ -48,8 +48,19 @@ optdepends=(
   'zlib: Faster image output when using kitty graphics protocol'
   'libdrm: Displays detection'
 )
-source=("${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz")
-sha256sums=('cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7')
+source=(
+	"${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz"
+        "fastfetch-2.31.3-loongarch-support-9690501.patch"
+)
+sha256sums=(
+	'cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7'
+	'f97904d5627386dba17dffba4e73044f14e0373a60031b8e45fbefe6f8913f51'
+)
+
+prepare() {
+	cd "$pkgname-${pkgver}"
+	patch -Np1 -i ../fastfetch-2.31.3-loongarch-support-9690501.patch
+}
 
 build() {
 	cmake -B build -S "${pkgname}-${pkgver}" \
diff --git a/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch b/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch
new file mode 100644
index 0000000000..87848e9b94
--- /dev/null
+++ b/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch
@@ -0,0 +1,110 @@
+From 3047fdc15b2e3d5441f0e3e186b300b8cb5856b5 Mon Sep 17 00:00:00 2001
+From: Yingjie Wang <phywyj@gmail.com>
+Date: Thu, 22 Aug 2024 21:47:14 -0400
+Subject: [PATCH] fastfetch: backport LoongArch CPU support from commit
+ 96905012a4
+
+Backported the logic from upstream dev branch commit 96905012a413f4ded49c33a182105f1d6a98b8cf ("Fix #1204 CPU (Linux): support loongarch") to fastfetch 2.31.3.
+
+This patch enables fastfetch to correctly parse LoongArch CPU information on Loong Arch Linux. The patch should be removed once a new upstream release that includes the fix is available.
+---
+ src/detection/cpu/cpu_linux.c   | 36 +++++++++++++++++++--------------
+ src/detection/version/version.c |  2 ++
+ 2 files changed, 23 insertions(+), 15 deletions(-)
+
+diff --git a/src/detection/cpu/cpu_linux.c b/src/detection/cpu/cpu_linux.c
+index 5b948150..fe30070c 100644
+--- a/src/detection/cpu/cpu_linux.c
++++ b/src/detection/cpu/cpu_linux.c
+@@ -137,7 +137,7 @@ static void detectArmName(FILE* cpuinfo, FFCPUResult* cpu, uint32_t implId)
+ }
+ #endif
+ 
+-static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physicalCoresBuffer, FFstrbuf* cpuMHz, FFstrbuf* cpuIsa, FFstrbuf* cpuUarch, FFstrbuf* cpuImplementer)
++static const char* parseCpuInfo(FF_MAYBE_UNUSED FILE* cpuinfo, FF_MAYBE_UNUSED FFCPUResult* cpu, FF_MAYBE_UNUSED FFstrbuf* physicalCoresBuffer, FF_MAYBE_UNUSED FFstrbuf* cpuMHz, FF_MAYBE_UNUSED FFstrbuf* cpuIsa, FF_MAYBE_UNUSED FFstrbuf* cpuUarch, FF_MAYBE_UNUSED FFstrbuf* cpuImplementer)
+ {
+     FF_AUTO_FREE char* line = NULL;
+     size_t len = 0;
+@@ -145,17 +145,24 @@ static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physi
+     while(getline(&line, &len, cpuinfo) != -1)
+     {
+         //Stop after the first CPU
+-        if(*line == '\0' || *line == '\n')
++        if((*line == '\0' || *line == '\n')
++	    #if __arm__ || __loongarch__
++	    && cpu->name.length > 0
++	    #endif
++        )
+             break;
+ 
+         (void)(
+-            ffParsePropLine(line, "model name :", &cpu->name) ||
+-            ffParsePropLine(line, "vendor_id :", &cpu->vendor) ||
+-            ffParsePropLine(line, "cpu cores :", physicalCoresBuffer) ||
+-            ffParsePropLine(line, "cpu MHz :", cpuMHz) ||
+-            ffParsePropLine(line, "isa :", cpuIsa) ||
+-            ffParsePropLine(line, "uarch :", cpuUarch) ||
+-
++            // arm64 doesn't have "model name"; arm32 does have "model name" but its value is not useful.
++            // "Hardware" should always be used in this case
++            #if !(__arm__ || __aarch64__)
++            (cpu->name.length == 0 && ffParsePropLine(line, "model name :", &cpu->name)) ||
++            (cpu->vendor.length == 0 && ffParsePropLine(line, "vendor_id :", &cpu->vendor)) ||
++            (physicalCoresBuffer->length == 0 && ffParsePropLine(line, "cpu cores :", physicalCoresBuffer)) ||
++            (cpuMHz->length == 0 && ffParsePropLine(line, "cpu MHz :", cpuMHz)) ||
++            #endif
++            (cpuIsa->length == 0 && ffParsePropLine(line, "isa :", cpuIsa)) ||
++            (cpuUarch->length == 0 && ffParsePropLine(line, "uarch :", cpuUarch)) ||
+             #if __arm__ || __aarch64__
+             (cpu->vendor.length == 0 && ffParsePropLine(line, "CPU implementer :", cpuImplementer)) ||
+             #endif
+@@ -163,10 +170,10 @@ static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physi
+             (cpu->name.length == 0 && ffParsePropLine(line, "Hardware :", &cpu->name)) || //For Android devices
+             #endif
+             #if __powerpc__ || __powerpc
+-            (cpu->name.length == 0 && ffParsePropLine(line, "cpu     :", &cpu->name)) || //For POWER
++            (cpu->name.length == 0 && ffParsePropLine(line, "cpu :", &cpu->name)) || //For POWER
+             #endif
+             #if __mips__
+-            (cpu->name.length == 0 && ffParsePropLine(line, "cpu model               :", &cpu->name)) || //For MIPS
++            (cpu->name.length == 0 && ffParsePropLine(line, "cpu model :", &cpu->name)) || //For MIPS
+             #endif
+             false
+         );
+@@ -275,6 +282,9 @@ static double detectCPUTemp(void)
+ 
+ static void parseIsa(FFstrbuf* cpuIsa)
+ {
++    // Always use the last part of the ISA string. Ref: #590 #1204
++    ffStrbufSubstrAfterLastC(cpuIsa, ' ');
++
+     if(ffStrbufStartsWithS(cpuIsa, "rv"))
+     {
+         // RISC-V ISA string example: "rv64imafdch_zicsr_zifencei".
+@@ -290,10 +300,6 @@ static void parseIsa(FFstrbuf* cpuIsa)
+         }
+         // The final ISA output of the above example is "rv64gch".
+     }
+-    if(ffStrbufStartsWithS(cpuIsa, "mips"))
+-    {
+-        ffStrbufSubstrAfterLastC(cpuIsa, ' ');
+-    }
+ }
+ 
+ void detectAsahi(FFCPUResult* cpu)
+diff --git a/src/detection/version/version.c b/src/detection/version/version.c
+index 5c1fb0ae..bd62386d 100644
+--- a/src/detection/version/version.c
++++ b/src/detection/version/version.c
+@@ -16,6 +16,8 @@
+     #define FF_ARCHITECTURE "riscv"
+ #elif defined(__s390x__)
+     #define FF_ARCHITECTURE "s390x"
++#elif defined(__loongarch__)
++    #define FF_ARCHITECTURE "loongarch"
+ #else
+     #define FF_ARCHITECTURE "unknown"
+ #endif
+-- 
+2.43.0
+